Merge "Merge branch 'upstream/master'"

commit: a3fc3e7c47aa1d8163cee7c0bd1a5c95f0c2fc2d [log] [tgz]
author: Dale Sather <dalesat@google.com> Mon Nov 05 22:01:10 2018 +0000
committer: Dale Sather <dalesat@google.com> Mon Nov 05 22:01:10 2018 +0000
tree: 136807b8c91ae1f3650036d6f2b1acef28fa0686
parent: 635bdae185e8df082e60c9e230f5087f8f89929a [diff]
parent: 79bb5d6abc7181bc33e96a3da6089bb41d0bf557 [diff]
diff --git a/.gitignore b/.gitignore
index dabb517..0e57cb0 100644
--- a/.gitignore
+++ b/.gitignore

@@ -29,7 +29,6 @@
 /ffmpeg
 /ffplay
 /ffprobe
-/ffserver
 /config.asm
 /config.h
 /coverage.info

diff --git a/.travis.yml b/.travis.yml
index 40f01f9..63f2051 100644
--- a/.travis.yml
+++ b/.travis.yml

@@ -11,6 +11,10 @@
 compiler:
   - clang
   - gcc
+matrix:
+    exclude:
+        - os: osx
+          compiler: gcc
 cache:
   directories:
     - ffmpeg-samples

diff --git a/Changelog b/Changelog
index 5db7b0a..4a22ab4 100644
--- a/Changelog
+++ b/Changelog

@@ -1,143 +1,96 @@
 Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
-version 3.4.2:
-- avcodec/vp3: Error out on invalid num_coeffs in unpack_vlcs()
-- avcodec/mpeg4videodec: Ignore multiple VOL headers
-- avcodec/vp3: Check eob_run
-- avcodec/pafvideo: Check allocated frame size
-- avcodec/scpr: Fix reading a pixel before the first
-- avcodec/mpeg2dec: Fix field selection for skipped macroblocks
-- avcodec/huffyuvdec: Check input buffer size
-- avcodec/utvideodec: Fix bytes left check in decode_frame()
-- avcodec/wavpack: Fix integer overflow in FFABS
-- avcodec/aacsbr_fixed: Fix overflows in rounding in sbr_hf_assemble()
-- avcodec/exr: Fix memleaks in decode_header()
-- avcodec/mediacodecdec: use ff_hevc_ps_uninit()
-- avcodec/hevc_parser: use ff_hevc_uninit_parameter_sets()
-- avcodec/hevcdec: use ff_hevc_uninit_parameter_sets()
-- avcodec/hevc_ps: add a function to uninitialize parameter set buffers
-- avcodec/dirac_dwt: Fix several integer overflows
-- avcodec/indeo5: Do not leave frame_type set to an invalid value
-- avcodec/hevc_ps: Check log2_sao_offset_scale_*
-- avcodec/mpeg4videodec: Avoid possibly aliasing violating casts
-- avcodec/get_bits: Document the return code of get_vlc2()
-- avcodec/mpeg4videodec: Check mb_num also against 0
-- avfilter/vf_transpose: Fix used plane count.
-- avcodec/hevc_cabac: Check prefix so as to avoid invalid shifts in coeff_abs_level_remaining_decode()
-- avcodec/mjpegdec: Fix integer overflow in DC dequantization
-- avcodec/dxtory: Fix bits left checks
-- avcodec/hevc_cabac: Move prefix check in coeff_abs_level_remaining_decode() down
-- avcodec/truemotion2: Fix integer overflow in TM2_RECALC_BLOCK()
-- avcodec/snowdec: Fix integer overflow before htaps check
-- avcodec/ulti: Check number of blocks at init
-- avcodec/wavpack: Fix integer overflows in wv_unpack_stereo / mono
-- avcodec/jpeg2000: Check sum of sizes of band->prec before allocating
-- avcodec/ac3dec_fixed: Fix integer overflow in scale_coefs()
-- avformat/lrcdec: Fix memory leak in lrc_read_header()
-- avformat/matroskadec: Fix float-cast-overflow undefined behavior in matroska_parse_tracks()
-- lavfi/deinterlace_vaapi: fix can't show full option information.
-- configure:version 3.4.1: bump year
-- avcodec/utils: Avoid hardcoding duplicated types in sizeof()
-- avcodec/arm/sbrdsp_neon: Use a free register instead of putting 2 things in one
-- avcodec/h264addpx_template: Fixes integer overflows
-- avcodec/dirac_dwt: Fix overflows in COMPOSE_HAARiH0/COMPOSE_HAARiL0
-- avcodec/diracdec: Fix integer overflow with quant
-- avcodec/opus_parser: Check payload_len in parse_opus_ts_header()
-- avcodec/jpeg2000dsp: Fix integer overflows in ict_int()
-- avcodec/h264_slice: Do not attempt to render into frames already output
-- avcodec/dnxhddec: Check dc vlc
-- avcodec/exr: Check buf_size more completely
-- avcodec/flacdec: Fix overflow in multiplication in decode_subframe_fixed()
-- avcodec/hevcdsp_template: Fix Invalid shifts in put_hevc_qpel_bi_w_h() and put_hevc_qpel_bi_w_w()
-- avcodec/flacdec: avoid undefined shift
-- avcodec/hevcdsp_template.c: Fix undefined shift in FUNC(dequant)
-- avcodec/dirac_dwt: Fix integer overflow in COMPOSE_DD97iH0() and COMPOSE_DD137iL0()
-- avcodec/hevc_cabac: Fix integer overflow in ff_hevc_cu_qp_delta_abs()
-- tests/audiomatch: Add missing return code at the end of main()
-- avcodec/hevc_sei: Fix integer overflows in decode_nal_sei_message()
-- avcodec/hevcdsp_template: Fix undefined shift in put_hevc_qpel_bi_w_hv()
-- avcodec/h264_parse: Treat escaped and unescaped decoding error equal in decode_extradata_ps_mp4()
-- avcodec/vp9: mark frame as finished on decode_tiles() failure
-- libavfilter/af_dcshift.c: Fixed repeated spelling error
-- avfilter/formats: fix wrong function name in error message
+version <next>:
+- deblock filter
+- tmix filter
+- amplify filter
+- fftdnoiz filter
+- aderivative and aintegral audio filters
+- pal75bars and pal100bars video filter sources
+- support mbedTLS based TLS
+- adeclick filter
+- adeclip filter
+- libtensorflow backend for DNN based filters like srcnn
+- vc1 decoder is now bit-exact
+- ATRAC9 decoder
+- lensfun wrapper filter
+- colorconstancy filter
+- AVS2 video decoder via libdavs2
+- IMM4 video decoder
+- Brooktree ProSumer video decoder
+- MatchWare Screen Capture Codec decoder
+- WinCam Motion Video decoder
+- 1D LUT filter (lut1d)
+- RemotelyAnywhere Screen Capture decoder
+- cue and acue filters
+- support for AV1 in MP4
+- transpose_npp filter
+- AVS2 video encoder via libxavs2
+- amultiply filter
+- Block-Matching 3d (bm3d) denoising filter
+- acrossover filter
+- ilbc decoder
+- audio denoiser as afftdn filter
+- AV1 parser
+- SER demuxer
 
-version 3.4.1:
-- avcodec/vp9_superframe_split_bsf: Fix integer overflow in frame_size/total_size checks
-- avcodec/amrwbdec: Fix division by 0 in voice_factor()
-- avformat/utils: Fix warning: ISO C90 forbids mixed declarations and code
-- avcodec/decode: reset codec on receiving packet after EOF in compat_decode
-- avcodec/diracdsp: Fix integer overflow in PUT_SIGNED_RECT_CLAMPED()
-- avcodec/dirac_dwt: Fix integer overflows in COMPOSE_DAUB97*
-- avcodec/extract_extradata_bsf: Fix leak discovered via fuzzing
-- avcodec/vorbis: Fix another 1 << 31 > int32_t::max() with 1u.
-- avcodec/vorbis: 1 << 31 > int32_t::max(), so use 1u << 31 instead.
-- avformat/utils: Prevent undefined shift with wrap_bits > 64.
-- avcodec/j2kenc: Fix out of array access in encode_cblk()
-- avcodec/hevcdsp_template: Fix undefined shift in put_hevc_epel_bi_w_h()
-- lavf/mov: fix huge alloc in mov_read_ctts
-- avcodec/mlpdsp: Fix signed integer overflow, 2nd try
-- avcodec/h264idct_template: Fix integer overflow in ff_h264_idct8_add
-- avcodec/kgv1dec: Check that there is enough input for maximum RLE compression
-- avformat/aacdec: Fix leak in adts_aac_read_packet()
-- avcodec/dirac_dwt: Fix integer overflow in COMPOSE_FIDELITYi*
-- avcodec/sbrdsp_fixed: Fix integer overflow
-- avcodec/mpeg4videodec: Check also for negative versions in the validity check
-- Close ogg stream upon error when using AV_EF_EXPLODE.
-- Fix undefined shift on assumed 8-bit input.
-- Use ff_thread_once for fixed, float table init.
-- Fix leak of frame_duration_buffer in mov_fix_index().
-- avformat/mov: Propagate errors in mov_switch_root.
-- avcodec/hevcdsp_template: Fix invalid shift in put_hevc_epel_bi_w_v()
-- avcodec/mlpdsp: Fix undefined shift ff_mlp_pack_output()
-- avcodec/zmbv: Check that the buffer is large enough for mvec
-- avcodec/dirac_dwt: Fix integer overflow in COMPOSE_DD137iL0()
-- avcodec/wmv2dec: Check end of bitstream in parse_mb_skip() and ff_wmv2_decode_mb()
-- avcodec/snowdec: Check for remaining bitstream in decode_blocks()
-- avcodec/snowdec: Check intra block dc differences.
-- avformat/mov: Check size of STSC allocation
-- avcodec/vc2enc: Clear coef_buf on allocation
-- avcodec/h264dec: Fix potential array overread
-- avcodec/x86/mpegvideodsp: Fix signedness bug in need_emu
-- avcodec/aacpsdsp_template: Fix integer overflows in ps_decorrelate_c()
-- avcodec/aacdec_fixed: Fix undefined shift
-- avcodec/mdct_*: Fix integer overflow in addition in RESCALE()
-- avcodec/snowdec: Fix integer overflow in header parsing
-- avcodec/cngdec: Fix integer clipping
-- avcodec/sbrdsp_fixed: Fix integer overflow in shift in sbr_hf_g_filt_c()
-- avcodec/aacsbr_fixed: Fix division by zero in sbr_gain_calc()
-- avutil/softfloat: Add FLOAT_MIN
-- avcodec/h264idct_template: Fix integer overflows in ff_h264_idct8_add()
-- avcodec/xan: Check for bitstream end in xan_huffman_decode()
-- avcodec/exr: fix undefined shift in pxr24_uncompress()
-- avformat: Free the internal codec context at the end
-- avcodec/h264idct_template: Fix integer overflows in ff_h264_idct8_add()
-- avcodec/xan: Improve overlapping check
-- avcodec/aacdec_fixed: Fix integer overflow in apply_dependent_coupling_fixed()
-- avcodec/aacdec_fixed: Fix integer overflow in predict()
-- avcodec/jpeglsdec: Check for end of bitstream in ls_decode_line()
-- avcodec/jpeglsdec: Check ilv for being a supported value
-- tests/ffserver.regression.ref: update checksums to what ffserver currently produces
-- ffserver: Fix off by 1 error in path
-- avcodec/proresdec: align dequantization matrix buffers
-- avformat/matroskaenc: add missing allocation failure checks for stream durations
-- avformat/matroskaenc: actually enforce the stream limit
-- configure: Fix dependencies of aac_at decoder.
-- Don't manipulate duration when it's AV_NOPTS_VALUE.
-- lavfi/af_pan: fix sign handling in channel coefficient parser
-- avformat/hlsenc: write fmp4 init header after first AV frame
-- avformat/hlsenc: allocate space for terminating null
-- avformat/hlsenc: reindent hlsenc code
-- avformat/hlsenc: check hls segment mode for ignore the init filename
-- avformat/hlsenc: reindent hlsenc code
-- avformat/hlsenc: fix missing first segment bug in fmp4 mode
-- avformat/hlsenc: fix base_output_dirname is null when basename_size is 0 bug
-- ffplay: use SDL2 audio API
-- ffplay: only use hardware accelerated SDL texture formats
-- ffplay: create the window and the renderer before starting playback
-- ffmpeg: always init output stream before reaping filters
-- vc2enc_dwt: pad the temporary buffer by the slice size
-- lavu/arm: Check for have_vfp_vm instead of !have_vfpv3 for float_dsp_vfp
+
+version 4.0:
+- Bitstream filters for editing metadata in H.264, HEVC and MPEG-2 streams
+- Dropped support for OpenJPEG versions 2.0 and below. Using OpenJPEG now
+  requires 2.1 (or later) and pkg-config.
+- VDA dropped (use VideoToolbox instead)
+- MagicYUV encoder
+- Raw AMR-NB and AMR-WB demuxers
+- TiVo ty/ty+ demuxer
+- Intel QSV-accelerated MJPEG encoding
+- PCE support for extended channel layouts in the AAC encoder
+- native aptX and aptX HD encoder and decoder
+- Raw aptX and aptX HD muxer and demuxer
+- NVIDIA NVDEC-accelerated H.264, HEVC, MJPEG, MPEG-1/2/4, VC1, VP8/9 hwaccel decoding
+- Intel QSV-accelerated overlay filter
+- mcompand audio filter
+- acontrast audio filter
+- OpenCL overlay filter
+- video mix filter
+- video normalize filter
+- audio lv2 wrapper filter
+- VAAPI MJPEG and VP8 decoding
+- AMD AMF H.264 and HEVC encoders
+- video fillborders filter
+- video setrange filter
+- nsp demuxer
+- support LibreSSL (via libtls)
+- AVX-512/ZMM support added
+- Dropped support for building for Windows XP. The minimum supported Windows
+  version is Windows Vista.
+- deconvolve video filter
+- entropy video filter
+- hilbert audio filter source
+- aiir audio filter
+- aiff: add support for CD-ROM XA ADPCM
+- Removed the ffserver program
+- Removed the ffmenc and ffmdec muxer and demuxer
+- VideoToolbox HEVC encoder and hwaccel
+- VAAPI-accelerated ProcAmp (color balance), denoise and sharpness filters
+- Add android_camera indev
+- codec2 en/decoding via libcodec2
+- muxer/demuxer for raw codec2 files and .c2 files
+- Moved nvidia codec headers into an external repository.
+  They can be found at http://git.videolan.org/?p=ffmpeg/nv-codec-headers.git
+- native SBC encoder and decoder
+- drmeter audio filter
+- hapqa_extract bitstream filter
+- filter_units bitstream filter
+- AV1 Support through libaom
+- E-AC-3 dependent frames support
+- bitstream filter for extracting E-AC-3 core
+- Haivision SRT protocol via libsrt
+- segafilm muxer
+- vfrdet filter
+- SRCNN filter
+
 
 version 3.4:
 - deflicker video filter

diff --git a/MAINTAINERS b/MAINTAINERS
index 9027ed5..3dd26e3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -29,9 +29,6 @@
 ffprobe:
   ffprobe.c                             Stefano Sabatini
 
-ffserver:
-  ffserver.c                            Reynaldo H. Verdejo Pinochet
-
 Commandline utility code:
   cmdutils.c, cmdutils.h                Michael Niedermayer
 
@@ -42,7 +39,7 @@
 Miscellaneous Areas
 ===================
 
-documentation                           Stefano Sabatini, Mike Melanson, Timothy Gu, Lou Logan
+documentation                           Stefano Sabatini, Mike Melanson, Timothy Gu, Lou Logan, Gyan Doshi
 project server                          Árpád Gereöffy, Michael Niedermayer, Reimar Doeffinger, Alexander Strasser, Nikolay Aleksandrov
 presets                                 Robert Swain
 metadata subsystem                      Aurelien Jacobs
@@ -124,7 +121,6 @@
     motion*                             Michael Niedermayer
   rate control:
     ratecontrol.c                       Michael Niedermayer
-    libxvid_rc.c                        Michael Niedermayer
   simple IDCT:
     simple_idct.c, simple_idct.h        Michael Niedermayer
   postprocessing:
@@ -142,6 +138,7 @@
   aacenc*, aaccoder.c                   Rostislav Pehlivanov
   alacenc.c                             Jaikrishnan Menon
   alsdec.c                              Thilo Borgmann, Umair Khan
+  aptx.c                                Aurelien Jacobs
   ass*                                  Aurelien Jacobs
   asv*                                  Michael Niedermayer
   atrac3plus*                           Maxim Poliakovski
@@ -158,7 +155,7 @@
   cpia.c                                Stephan Hilb
   crystalhd.c                           Philip Langdale
   cscd.c                                Reimar Doeffinger
-  cuvid.c                               Timo Rothenpieler
+  cuviddec.c                            Timo Rothenpieler
   dca*                                  foo86
   dirac*                                Rostislav Pehlivanov
   dnxhd*                                Baptiste Coudurier
@@ -170,6 +167,7 @@
   eacmv*, eaidct*, eat*                 Peter Ross
   evrc*                                 Paul B Mahol
   exif.c, exif.h                        Thilo Borgmann
+  exr.c                                 Martin Vignali
   ffv1*                                 Michael Niedermayer
   ffwavesynth.c                         Nicolas George
   fifo.c                                Jan Sebechlebsky
@@ -189,6 +187,7 @@
   jvdec.c                               Peter Ross
   lcl*.c                                Roberto Togni, Reimar Doeffinger
   libcelt_dec.c                         Nicolas George
+  libcodec2.c                           Tomas Härdin
   libdirac*                             David Conrad
   libgsm.c                              Michel Bardiaux
   libkvazaar.c                          Arttu Ylä-Outinen
@@ -212,7 +211,7 @@
   msrle.c                               Mike Melanson
   msvideo1.c                            Mike Melanson
   nuv.c                                 Reimar Doeffinger
-  nvenc*                                Timo Rothenpieler
+  nvdec*, nvenc*                        Timo Rothenpieler
   opus*                                 Rostislav Pehlivanov
   paf.*                                 Paul B Mahol
   pcx.c                                 Ivo van Poorten
@@ -220,7 +219,7 @@
   ptx.c                                 Ivo van Poorten
   qcelp*                                Reynaldo H. Verdejo Pinochet
   qdm2.c, qdm2data.h                    Roberto Togni
-  qsv*                                  Mark Thompson
+  qsv*                                  Mark Thompson, Zhong Li
   qtrle.c                               Mike Melanson
   ra144.c, ra144.h, ra288.c, ra288.h    Roberto Togni
   resample2.c                           Michael Niedermayer
@@ -242,10 +241,10 @@
   tta.c                                 Alex Beregszaszi, Jaikrishnan Menon
   ttaenc.c                              Paul B Mahol
   txd.c                                 Ivo van Poorten
+  v4l2_*                                Jorge Ramirez-Ortiz
   vc2*                                  Rostislav Pehlivanov
   vcr1.c                                Michael Niedermayer
-  vda_h264_dec.c                        Xidorn Quan
-  videotoolboxenc.c                     Rick Kern
+  videotoolboxenc.c                     Rick Kern, Aman Gupta
   vima.c                                Paul B Mahol
   vorbisdec.c                           Denes Balatoni, David Conrad
   vorbisenc.c                           Oded Shimon
@@ -268,11 +267,11 @@
   crystalhd.c                           Philip Langdale
   dxva2*                                Hendrik Leppkes, Laurent Aimar, Steve Lhomme
   d3d11va*                              Steve Lhomme
-  mediacodec*                           Matthieu Bouron
+  mediacodec*                           Matthieu Bouron, Aman Gupta
   vaapi*                                Gwenole Beauchesne
   vaapi_encode*                         Mark Thompson
   vdpau*                                Philip Langdale, Carl Eugen Hoyos
-  videotoolbox*                         Rick Kern
+  videotoolbox*                         Rick Kern, Aman Gupta
 
 
 libavdevice
@@ -282,6 +281,7 @@
 
 
   avfoundation.m                        Thilo Borgmann
+  android_camera.c                      Felix Matouschek
   decklink*                             Marton Balint
   dshow.c                               Roger Pack (CC rogerdpack@gmail.com)
   fbdev_enc.c                           Lukasz Marek
@@ -332,6 +332,7 @@
   vf_bwdif                              Thomas Mundt (CC <thomas.mundt@hr.de>)
   vf_chromakey.c                        Timo Rothenpieler
   vf_colorchannelmixer.c                Paul B Mahol
+  vf_colorconstancy.c                   Mina Sami    (CC <minas.gorgy@gmail.com>)
   vf_colorbalance.c                     Paul B Mahol
   vf_colorkey.c                         Timo Rothenpieler
   vf_colorlevels.c                      Paul B Mahol
@@ -395,8 +396,10 @@
   brstm.c                               Paul B Mahol
   caf*                                  Peter Ross
   cdxl.c                                Paul B Mahol
+  codec2.c                              Tomas Härdin
   crc.c                                 Michael Niedermayer
   dashdec.c                             Steven Liu
+  dashenc.c                             Karthick Jeyapal
   daud.c                                Reimar Doeffinger
   dss.c                                 Oleksij Rempel
   dtsdec.c                              foo86
@@ -521,7 +524,7 @@
 =====================================
 
 Alpha                                   Falk Hueffner
-MIPS                                    Manojkumar Bhosale
+MIPS                                    Manojkumar Bhosale, Shiyou Yin
 Mac OS X / PowerPC                      Romain Dolbeau, Guillaume Poirier
 Amiga / PowerPC                         Colin Ward
 Windows MinGW                           Alex Beregszaszi, Ramiro Polla
@@ -549,6 +552,7 @@
 James Darnley
 Jan Ekström
 Joakim Plate
+Jun Zhao
 Kieran Kunhya
 Kirill Gavrilov
 Martin Storsjö
@@ -571,8 +575,11 @@
 If you want to maintain an older release, please contact us
 
 
-GnuPG Fingerprints of maintainers and contributors
-==================================================
+GnuPG Fingerprints and IRC nicknames of maintainers and contributors
+====================================================================
+
+IRC nicknames are in parentheses. These apply
+to the IRC channels listed on the website.
 
 Alexander Strasser            1C96 78B7 83CB 8AA7 9AF5 D1EB A7D8 A57B A876 E58F
 Anssi Hannula                 1A92 FF42 2DD9 8D2E 8AF7 65A9 4278 C520 513D F3CB
@@ -587,9 +594,10 @@
 Ganesh Ajjanagadde            C96A 848E 97C3 CEA2 AB72 5CE4 45F9 6A2D 3C36 FB1B
 Gwenole Beauchesne            2E63 B3A6 3E44 37E2 017D 2704 53C7 6266 B153 99C4
 Jaikrishnan Menon             61A1 F09F 01C9 2D45 78E1 C862 25DC 8831 AF70 D368
+James Almer                   7751 2E8C FD94 A169 57E6 9A7A 1463 01AD 7376 59E0
 Jean Delvare                  7CA6 9F44 60F1 BDC4 1FD2 C858 A552 6B9B B3CD 4E6A
 Loren Merritt                 ABD9 08F4 C920 3F65 D8BE 35D7 1540 DAA7 060F 56DE
-Lou Logan                     7D68 DC73 CBEF EABB 671A B6CF 621C 2E28 82F8 DC3A
+Lou Logan (llogan)            7D68 DC73 CBEF EABB 671A B6CF 621C 2E28 82F8 DC3A
 Michael Niedermayer           9FF2 128B 147E F673 0BAD F133 611E C787 040B 0FAB
 Nicolas George                24CE 01CE 9ACC 5CEB 74D8 8D9D B063 D997 36E5 4C93
 Nikolay Aleksandrov           8978 1D8C FB71 588E 4B27 EAA8 C4F0 B5FC E011 13B1
@@ -606,5 +614,5 @@
 Stephan Hilb                  4F38 0B3A 5F39 B99B F505 E562 8D5C 5554 4E17 8863
 Tiancheng "Timothy" Gu        9456 AFC0 814A 8139 E994 8351 7FE6 B095 B582 B0D4
 Tim Nicholson                 38CF DB09 3ED0 F607 8B67 6CED 0C0B FC44 8B0B FC83
-Tomas Härdin                  A79D 4E3D F38F 763F 91F5 8B33 A01E 8AE0 41BB 2551
+Tomas Härdin (thardin)        A79D 4E3D F38F 763F 91F5 8B33 A01E 8AE0 41BB 2551
 Wei Gao                       4269 7741 857A 0E60 9EC5 08D2 4744 4EFA 62C1 87B9

diff --git a/Makefile b/Makefile
index 642651d..4bf1dfe 100644
--- a/Makefile
+++ b/Makefile

@@ -45,12 +45,11 @@
 FF_STATIC_DEP_LIBS := $(STATIC_DEP_LIBS)
 
 $(TOOLS): %$(EXESUF): %.o
-	$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $^ $(ELIBS)
+	$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $^ $(EXTRALIBS-$(*F)) $(EXTRALIBS) $(ELIBS)
 
 target_dec_%_fuzzer$(EXESUF): target_dec_%_fuzzer.o $(FF_DEP_LIBS)
 	$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $^ $(ELIBS) $(FF_EXTRALIBS) $(LIBFUZZER_PATH)
 
-tools/cws2fws$(EXESUF): ELIBS = $(ZLIB)
 tools/sofa2wavs$(EXESUF): ELIBS = $(FF_EXTRALIBS)
 tools/uncoded_frame$(EXESUF): $(FF_DEP_LIBS)
 tools/uncoded_frame$(EXESUF): ELIBS = $(FF_EXTRALIBS)
@@ -59,6 +58,7 @@
 CONFIGURABLE_COMPONENTS =                                           \
     $(wildcard $(FFLIBS:%=$(SRC_PATH)/lib%/all*.c))                 \
     $(SRC_PATH)/libavcodec/bitstream_filters.c                      \
+    $(SRC_PATH)/libavcodec/parsers.c                                \
     $(SRC_PATH)/libavformat/protocols.c                             \
 
 config.h: ffbuild/.config
@@ -128,25 +128,24 @@
 	$(Q)mkdir -p "$(DATADIR)"
 	$(INSTALL) -m 644 $(DATA_FILES) "$(DATADIR)"
 
-uninstall: uninstall-libs uninstall-headers uninstall-data
+uninstall: uninstall-data uninstall-headers uninstall-libs uninstall-pkgconfig
 
 uninstall-data:
 	$(RM) -r "$(DATADIR)"
 
 clean::
 	$(RM) $(CLEANSUFFIXES)
-	$(RM) $(CLEANSUFFIXES:%=compat/msvcrt/%)
-	$(RM) $(CLEANSUFFIXES:%=compat/atomics/pthread/%)
-	$(RM) $(CLEANSUFFIXES:%=compat/%)
+	$(RM) $(addprefix compat/,$(CLEANSUFFIXES)) $(addprefix compat/*/,$(CLEANSUFFIXES))
 	$(RM) -r coverage-html
 	$(RM) -rf coverage.info coverage.info.in lcov
 
-distclean::
-	$(RM) $(DISTCLEANSUFFIXES)
+distclean:: clean
 	$(RM) .version avversion.h config.asm config.h mapfile  \
 		ffbuild/.config ffbuild/config.* libavutil/avconfig.h \
 		version.h libavutil/ffversion.h libavcodec/codec_names.h \
-		libavcodec/bsf_list.c libavformat/protocol_list.c
+		libavcodec/bsf_list.c libavformat/protocol_list.c \
+		libavcodec/codec_list.c libavcodec/parser_list.c \
+		libavformat/muxer_list.c libavformat/demuxer_list.c
 ifeq ($(SRC_LINK),src)
 	$(RM) src
 endif
@@ -155,6 +154,7 @@
 config:
 	$(SRC_PATH)/configure $(value FFMPEG_CONFIGURATION)
 
+build: all alltools examples testprogs
 check: all alltools examples testprogs fate
 
 include $(SRC_PATH)/tests/Makefile
@@ -170,4 +170,5 @@
 # so this saves some time on slow systems.
 .SUFFIXES:
 
-.PHONY: all all-yes alltools check *clean config install* testprogs uninstall*
+.PHONY: all all-yes alltools build check config testprogs
+.PHONY: *clean install* uninstall*

diff --git a/README.fuchsia b/README.fuchsia
index 0b2e771..416e866 100644
--- a/README.fuchsia
+++ b/README.fuchsia

@@ -71,14 +71,25 @@
 Performing An Upstream Merge
 ============================
 
-  # Update upstream/master to the desired upstream commit
-  TODO(dalesat): details
+  # Note: upstream/master automatically tracks upstream.
+
+  # Make sure your path is set up to use the right yasm and clang. yasm is
+  # built as part of fuchsia, and clang is in buildtools.
+
+  # Make sure the two sysroots you will need are available, namely
+  # ../../buildtools/linux-x64/sysroot and
+  # ../../buildtools/linux-arm64/sysroot
+  # The x64 version will be present on x64 linux hosts. The arm64 version
+  # can be obtained using cipd install.
 
   # Create a new branch based off of master.
   git checkout origin/master -b my_new_branch
 
-  # Rebase the Fuchsia changes on top of upstream/master in my_new_branch
-  TODO(dalesat): details
+  # Merge with master, taking theirs in case of conflicts.
+  git merge -X theirs upstream/master
+
+  # Remove removed files.
+  git diff --name-only --diff-filter=U | xargs git rm
 
   # Build the ffmpeg for all profiles and architectures
   ./fuchsia/scripts/build_ffmpeg.py x64
@@ -87,7 +98,6 @@
   # Update Fuchsia stuff based on the results of the builds
   ./fuchsia/scripts/copy_config.sh
   ./fuchsia/scripts/generate_gn.py
-  ./fuchsia/scripts/remove_unneeded_files.sh
 
   # Use 'git add' to add new files as necessary (not the build.* dirs)
   # Commit changes

diff --git a/README.md b/README.md
index 7d5a7ef..447347c 100644
--- a/README.md
+++ b/README.md

@@ -21,8 +21,6 @@
 * [ffplay](https://ffmpeg.org/ffplay.html) is a minimalistic multimedia player.
 * [ffprobe](https://ffmpeg.org/ffprobe.html) is a simple analysis tool to inspect
   multimedia content.
-* [ffserver](https://ffmpeg.org/ffserver.html) is a multimedia streaming server
-  for live broadcasts.
 * Additional small tools such as `aviocat`, `ismindex` and `qt-faststart`.
 
 ## Documentation

diff --git a/RELEASE b/RELEASE
index 4d9d11c..ff2c9d1 100644
--- a/RELEASE
+++ b/RELEASE

@@ -1 +1 @@
-3.4.2
+4.0.git

diff --git a/compat/cuda/dynlink_cuda.h b/compat/cuda/dynlink_cuda.h
deleted file mode 100644
index 3a13611..0000000
--- a/compat/cuda/dynlink_cuda.h
+++ /dev/null

@@ -1,98 +0,0 @@
-/*
- * This copyright notice applies to this header file only:
- *
- * Copyright (c) 2016
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the software, and to permit persons to whom the
- * software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#if !defined(AV_COMPAT_DYNLINK_CUDA_H) && !defined(CUDA_VERSION)
-#define AV_COMPAT_DYNLINK_CUDA_H
-
-#include <stddef.h>
-
-#define CUDA_VERSION 7050
-
-#if defined(_WIN32) || defined(__CYGWIN__)
-#define CUDAAPI __stdcall
-#else
-#define CUDAAPI
-#endif
-
-#define CU_CTX_SCHED_BLOCKING_SYNC 4
-
-typedef int CUdevice;
-typedef void* CUarray;
-typedef void* CUcontext;
-typedef void* CUstream;
-#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
-typedef unsigned long long CUdeviceptr;
-#else
-typedef unsigned int CUdeviceptr;
-#endif
-
-typedef enum cudaError_enum {
-    CUDA_SUCCESS = 0
-} CUresult;
-
-typedef enum CUmemorytype_enum {
-    CU_MEMORYTYPE_HOST = 1,
-    CU_MEMORYTYPE_DEVICE = 2
-} CUmemorytype;
-
-typedef struct CUDA_MEMCPY2D_st {
-    size_t srcXInBytes;
-    size_t srcY;
-    CUmemorytype srcMemoryType;
-    const void *srcHost;
-    CUdeviceptr srcDevice;
-    CUarray srcArray;
-    size_t srcPitch;
-
-    size_t dstXInBytes;
-    size_t dstY;
-    CUmemorytype dstMemoryType;
-    void *dstHost;
-    CUdeviceptr dstDevice;
-    CUarray dstArray;
-    size_t dstPitch;
-
-    size_t WidthInBytes;
-    size_t Height;
-} CUDA_MEMCPY2D;
-
-typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
-typedef CUresult CUDAAPI tcuDeviceGetCount(int *count);
-typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal);
-typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev);
-typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
-typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
-typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext *pctx);
-typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext *pctx);
-typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
-typedef CUresult CUDAAPI tcuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
-typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr);
-typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D *pcopy);
-typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr);
-typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pstr);
-
-#endif

diff --git a/compat/cuda/dynlink_cuviddec.h b/compat/cuda/dynlink_cuviddec.h
deleted file mode 100644
index 4af78a1..0000000
--- a/compat/cuda/dynlink_cuviddec.h
+++ /dev/null

@@ -1,886 +0,0 @@
-/*
- * This copyright notice applies to this header file only:
- *
- * Copyright (c) 2010-2017 NVIDIA Corporation
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the software, and to permit persons to whom the
- * software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*****************************************************************************************************/
-//! \file cuviddec.h
-//! NVDECODE API provides video decoding interface to NVIDIA GPU devices.
-//! \date 2015-2017
-//! This file contains constants, structure definitions and function prototypes used for decoding.
-/*****************************************************************************************************/
-
-#if !defined(__CUDA_VIDEO_H__)
-#define __CUDA_VIDEO_H__
-
-#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
-#if (CUDA_VERSION >= 3020) && (!defined(CUDA_FORCE_API_VERSION) || (CUDA_FORCE_API_VERSION >= 3020))
-#define __CUVID_DEVPTR64
-#endif
-#endif
-
-#if defined(__cplusplus)
-extern "C" {
-#endif /* __cplusplus */
-
-#if defined(__CYGWIN__)
-typedef unsigned int tcu_ulong;
-#else
-typedef unsigned long tcu_ulong;
-#endif
-
-typedef void *CUvideodecoder;
-typedef struct _CUcontextlock_st *CUvideoctxlock;
-
-/*********************************************************************************/
-//! \enum cudaVideoCodec
-//! Video codec enums
-//! These enums are used in CUVIDDECODECREATEINFO and CUVIDDECODECAPS structures
-/*********************************************************************************/
-typedef enum cudaVideoCodec_enum {
-    cudaVideoCodec_MPEG1=0,                                         /**<  MPEG1             */
-    cudaVideoCodec_MPEG2,                                           /**<  MPEG2             */
-    cudaVideoCodec_MPEG4,                                           /**<  MPEG4             */
-    cudaVideoCodec_VC1,                                             /**<  VC1               */
-    cudaVideoCodec_H264,                                            /**<  H264              */
-    cudaVideoCodec_JPEG,                                            /**<  JPEG              */
-    cudaVideoCodec_H264_SVC,                                        /**<  H264-SVC          */
-    cudaVideoCodec_H264_MVC,                                        /**<  H264-MVC          */
-    cudaVideoCodec_HEVC,                                            /**<  HEVC              */
-    cudaVideoCodec_VP8,                                             /**<  VP8               */
-    cudaVideoCodec_VP9,                                             /**<  VP9               */
-    cudaVideoCodec_NumCodecs,                                       /**<  Max codecs        */
-    // Uncompressed YUV
-    cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')),   /**< Y,U,V (4:2:0)      */
-    cudaVideoCodec_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')),   /**< Y,V,U (4:2:0)      */
-    cudaVideoCodec_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2')),   /**< Y,UV  (4:2:0)      */
-    cudaVideoCodec_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')),   /**< YUYV/YUY2 (4:2:2)  */
-    cudaVideoCodec_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))    /**< UYVY (4:2:2)       */
-} cudaVideoCodec;
-
-/*********************************************************************************/
-//! \enum cudaVideoSurfaceFormat
-//! Video surface format enums used for output format of decoded output
-//! These enums are used in CUVIDDECODECREATEINFO structure
-/*********************************************************************************/
-typedef enum cudaVideoSurfaceFormat_enum {
-    cudaVideoSurfaceFormat_NV12=0,       /**< NV12 format          */
-    cudaVideoSurfaceFormat_P016=1        /**< 16 bit semiplaner format. Can be used for 10 bit(6LSB bits 0),
-                                              12 bit (4LSB bits 0) */
-} cudaVideoSurfaceFormat;
-
-/******************************************************************************************************************/
-//! \enum cudaVideoDeinterlaceMode
-//! Deinterlacing mode enums
-//! These enums are used in CUVIDDECODECREATEINFO structure
-//! Use cudaVideoDeinterlaceMode_Weave for progressive content and for content that doesn't need deinterlacing
-//! cudaVideoDeinterlaceMode_Adaptive needs more video memory than other DImodes
-/******************************************************************************************************************/
-typedef enum cudaVideoDeinterlaceMode_enum {
-    cudaVideoDeinterlaceMode_Weave=0,   /**< Weave both fields (no deinterlacing) */
-    cudaVideoDeinterlaceMode_Bob,       /**< Drop one field                       */
-    cudaVideoDeinterlaceMode_Adaptive   /**< Adaptive deinterlacing               */
-} cudaVideoDeinterlaceMode;
-
-/**************************************************************************************************************/
-//! \enum cudaVideoChromaFormat
-//! Chroma format enums
-//! These enums are used in CUVIDDECODECREATEINFO and CUVIDDECODECAPS structures
-//! JPEG supports Monochrome, YUV 4:2:0, YUV 4:2:2 and YUV 4:4:4 chroma formats.
-//! H264, HEVC, VP9, VP8, VC1, MPEG1, MPEG2 and MPEG4 support YUV 4:2:0 chroma format only.
-/**************************************************************************************************************/
-typedef enum cudaVideoChromaFormat_enum {
-    cudaVideoChromaFormat_Monochrome=0,  /**< MonoChrome */
-    cudaVideoChromaFormat_420,           /**< YUV 4:2:0  */
-    cudaVideoChromaFormat_422,           /**< YUV 4:2:2  */
-    cudaVideoChromaFormat_444            /**< YUV 4:4:4  */
-} cudaVideoChromaFormat;
-
-/*************************************************************************************************************/
-//! \enum cudaVideoCreateFlags
-//! Decoder flag enums to select preferred decode path
-//! cudaVideoCreate_Default and cudaVideoCreate_PreferCUVID are most optimized, use these whenever possible
-/*************************************************************************************************************/
-typedef enum cudaVideoCreateFlags_enum {
-    cudaVideoCreate_Default     = 0x00,     /**< Default operation mode: use dedicated video engines                        */
-    cudaVideoCreate_PreferCUDA  = 0x01,     /**< Use CUDA-based decoder (requires valid vidLock object for multi-threading) */
-    cudaVideoCreate_PreferDXVA  = 0x02,     /**< Go through DXVA internally if possible (requires D3D9 interop)             */
-    cudaVideoCreate_PreferCUVID = 0x04      /**< Use dedicated video engines directly                                       */
-} cudaVideoCreateFlags;
-
-
-/**************************************************************************************************************/
-//! \struct CUVIDDECODECAPS;
-//! This structure is used in cuvidGetDecoderCaps API
-/**************************************************************************************************************/
-typedef struct _CUVIDDECODECAPS
-{
-    cudaVideoCodec          eCodecType;                 /**< IN: cudaVideoCodec_XXX                                 */
-    cudaVideoChromaFormat   eChromaFormat;              /**< IN: cudaVideoChromaFormat_XXX                          */
-    unsigned int            nBitDepthMinus8;            /**< IN: The Value "BitDepth minus 8"                       */
-    unsigned int            reserved1[3];               /**< Reserved for future use - set to zero                  */
-
-    unsigned char           bIsSupported;               /**< OUT: 1 if codec supported, 0 if not supported          */
-    unsigned char           reserved2[3];               /**< Reserved for future use - set to zero                  */
-    unsigned int            nMaxWidth;                  /**< OUT: Max supported coded width in pixels               */
-    unsigned int            nMaxHeight;                 /**< OUT: Max supported coded height in pixels              */
-    unsigned int            nMaxMBCount;                /**< OUT: Max supported macroblock count
-                                                                  CodedWidth*CodedHeight/256 must be <= nMaxMBCount */
-    unsigned short          nMinWidth;                  /**< OUT: Min supported coded width in pixels               */
-    unsigned short          nMinHeight;                 /**< OUT: Min supported coded height in pixels              */
-    unsigned int            reserved3[11];              /**< Reserved for future use - set to zero                  */
-} CUVIDDECODECAPS;
-
-/**************************************************************************************************************/
-//! \struct CUVIDDECODECREATEINFO
-//! This structure is used in cuvidCreateDecoder API
-/**************************************************************************************************************/
-typedef struct _CUVIDDECODECREATEINFO
-{
-    tcu_ulong ulWidth;                  /**< IN: Coded sequence width in pixels                                             */
-    tcu_ulong ulHeight;                 /**< IN: Coded sequence height in pixels                                            */
-    tcu_ulong ulNumDecodeSurfaces;      /**< IN: Maximum number of internal decode surfaces                                 */
-    cudaVideoCodec CodecType;           /**< IN: cudaVideoCodec_XXX                                                         */
-    cudaVideoChromaFormat ChromaFormat; /**< IN: cudaVideoChromaFormat_XXX                                                  */
-    tcu_ulong ulCreationFlags;          /**< IN: Decoder creation flags (cudaVideoCreateFlags_XXX)                          */
-    tcu_ulong bitDepthMinus8;           /**< IN: The value "BitDepth minus 8"                                               */
-    tcu_ulong ulIntraDecodeOnly;        /**< IN: Set 1 only if video has all intra frames (default value is 0). This will
-                                             optimize video memory for Intra frames only decoding. The support is limited
-                                             to specific codecs(H264 rightnow), the flag will be ignored for codecs which
-                                             are not supported. However decoding might fail if the flag is enabled in case
-                                             of supported codecs for regular bit streams having P and/or B frames.          */
-    tcu_ulong Reserved1[3];             /**< Reserved for future use - set to zero                                          */
-    /**
-    * IN: area of the frame that should be displayed
-    */
-    struct {
-        short left;
-        short top;
-        short right;
-        short bottom;
-    } display_area;
-
-    cudaVideoSurfaceFormat OutputFormat;       /**< IN: cudaVideoSurfaceFormat_XXX                                     */
-    cudaVideoDeinterlaceMode DeinterlaceMode;  /**< IN: cudaVideoDeinterlaceMode_XXX                                   */
-    tcu_ulong ulTargetWidth;                   /**< IN: Post-processed output width (Should be aligned to 2)           */
-    tcu_ulong ulTargetHeight;                  /**< IN: Post-processed output height (Should be aligbed to 2)          */
-    tcu_ulong ulNumOutputSurfaces;             /**< IN: Maximum number of output surfaces simultaneously mapped        */
-    CUvideoctxlock vidLock;                    /**< IN: If non-NULL, context lock used for synchronizing ownership of
-                                                    the cuda context. Needed for cudaVideoCreate_PreferCUDA decode     */
-    /**
-    * IN: target rectangle in the output frame (for aspect ratio conversion)
-    * if a null rectangle is specified, {0,0,ulTargetWidth,ulTargetHeight} will be used
-    */
-    struct {
-        short left;
-        short top;
-        short right;
-        short bottom;
-    } target_rect;
-    tcu_ulong Reserved2[5];                    /**< Reserved for future use - set to zero */
-} CUVIDDECODECREATEINFO;
-
-/*********************************************************/
-//! \struct CUVIDH264DPBENTRY
-//! H.264 DPB entry
-//! This structure is used in CUVIDH264PICPARAMS structure
-/*********************************************************/
-typedef struct _CUVIDH264DPBENTRY
-{
-    int PicIdx;                 /**< picture index of reference frame                                        */
-    int FrameIdx;               /**< frame_num(short-term) or LongTermFrameIdx(long-term)                    */
-    int is_long_term;           /**< 0=short term reference, 1=long term reference                           */
-    int not_existing;           /**< non-existing reference frame (corresponding PicIdx should be set to -1) */
-    int used_for_reference;     /**< 0=unused, 1=top_field, 2=bottom_field, 3=both_fields                    */
-    int FieldOrderCnt[2];       /**< field order count of top and bottom fields                              */
-} CUVIDH264DPBENTRY;
-
-/************************************************************/
-//! \struct CUVIDH264MVCEXT
-//! H.264 MVC picture parameters ext
-//! This structure is used in CUVIDH264PICPARAMS structure
-/************************************************************/
-typedef struct _CUVIDH264MVCEXT
-{
-    int num_views_minus1;                  /**< Max number of coded views minus 1 in video : Range - 0 to 1023              */
-    int view_id;                           /**< view identifier                                                             */
-    unsigned char inter_view_flag;         /**< 1 if used for inter-view prediction, 0 if not                               */
-    unsigned char num_inter_view_refs_l0;  /**< number of inter-view ref pics in RefPicList0                                */
-    unsigned char num_inter_view_refs_l1;  /**< number of inter-view ref pics in RefPicList1                                */
-    unsigned char MVCReserved8Bits;        /**< Reserved bits                                                               */
-    int InterViewRefsL0[16];               /**< view id of the i-th view component for inter-view prediction in RefPicList0 */
-    int InterViewRefsL1[16];               /**< view id of the i-th view component for inter-view prediction in RefPicList1 */
-} CUVIDH264MVCEXT;
-
-/*********************************************************/
-//! \struct CUVIDH264SVCEXT
-//! H.264 SVC picture parameters ext
-//! This structure is used in CUVIDH264PICPARAMS structure
-/*********************************************************/
-typedef struct _CUVIDH264SVCEXT
-{
-    unsigned char profile_idc;
-    unsigned char level_idc;
-    unsigned char DQId;
-    unsigned char DQIdMax;
-    unsigned char disable_inter_layer_deblocking_filter_idc;
-    unsigned char ref_layer_chroma_phase_y_plus1;
-    signed char   inter_layer_slice_alpha_c0_offset_div2;
-    signed char   inter_layer_slice_beta_offset_div2;
-
-    unsigned short DPBEntryValidFlag;
-    unsigned char inter_layer_deblocking_filter_control_present_flag;
-    unsigned char extended_spatial_scalability_idc;
-    unsigned char adaptive_tcoeff_level_prediction_flag;
-    unsigned char slice_header_restriction_flag;
-    unsigned char chroma_phase_x_plus1_flag;
-    unsigned char chroma_phase_y_plus1;
-
-    unsigned char tcoeff_level_prediction_flag;
-    unsigned char constrained_intra_resampling_flag;
-    unsigned char ref_layer_chroma_phase_x_plus1_flag;
-    unsigned char store_ref_base_pic_flag;
-    unsigned char Reserved8BitsA;
-    unsigned char Reserved8BitsB;
-
-    short scaled_ref_layer_left_offset;
-    short scaled_ref_layer_top_offset;
-    short scaled_ref_layer_right_offset;
-    short scaled_ref_layer_bottom_offset;
-    unsigned short Reserved16Bits;
-    struct _CUVIDPICPARAMS *pNextLayer; /**< Points to the picparams for the next layer to be decoded.
-                                             Linked list ends at the target layer. */
-    int bRefBaseLayer;                  /**< whether to store ref base pic */
-} CUVIDH264SVCEXT;
-
-/******************************************************/
-//! \struct CUVIDH264PICPARAMS
-//! H.264 picture parameters
-//! This structure is used in CUVIDPICPARAMS structure
-/******************************************************/
-typedef struct _CUVIDH264PICPARAMS
-{
-    // SPS
-    int log2_max_frame_num_minus4;
-    int pic_order_cnt_type;
-    int log2_max_pic_order_cnt_lsb_minus4;
-    int delta_pic_order_always_zero_flag;
-    int frame_mbs_only_flag;
-    int direct_8x8_inference_flag;
-    int num_ref_frames;             // NOTE: shall meet level 4.1 restrictions
-    unsigned char residual_colour_transform_flag;
-    unsigned char bit_depth_luma_minus8;    // Must be 0 (only 8-bit supported)
-    unsigned char bit_depth_chroma_minus8;  // Must be 0 (only 8-bit supported)
-    unsigned char qpprime_y_zero_transform_bypass_flag;
-    // PPS
-    int entropy_coding_mode_flag;
-    int pic_order_present_flag;
-    int num_ref_idx_l0_active_minus1;
-    int num_ref_idx_l1_active_minus1;
-    int weighted_pred_flag;
-    int weighted_bipred_idc;
-    int pic_init_qp_minus26;
-    int deblocking_filter_control_present_flag;
-    int redundant_pic_cnt_present_flag;
-    int transform_8x8_mode_flag;
-    int MbaffFrameFlag;
-    int constrained_intra_pred_flag;
-    int chroma_qp_index_offset;
-    int second_chroma_qp_index_offset;
-    int ref_pic_flag;
-    int frame_num;
-    int CurrFieldOrderCnt[2];
-    // DPB
-    CUVIDH264DPBENTRY dpb[16];          // List of reference frames within the DPB
-    // Quantization Matrices (raster-order)
-    unsigned char WeightScale4x4[6][16];
-    unsigned char WeightScale8x8[2][64];
-    // FMO/ASO
-    unsigned char fmo_aso_enable;
-    unsigned char num_slice_groups_minus1;
-    unsigned char slice_group_map_type;
-    signed char pic_init_qs_minus26;
-    unsigned int slice_group_change_rate_minus1;
-    union
-    {
-        unsigned long long slice_group_map_addr;
-        const unsigned char *pMb2SliceGroupMap;
-    } fmo;
-    unsigned int Reserved[12];
-    // SVC/MVC
-    union
-    {
-        CUVIDH264MVCEXT mvcext;
-        CUVIDH264SVCEXT svcext;
-    };
-} CUVIDH264PICPARAMS;
-
-
-/********************************************************/
-//! \struct CUVIDMPEG2PICPARAMS
-//! MPEG-2 picture parameters
-//! This structure is used in CUVIDPICPARAMS structure
-/********************************************************/
-typedef struct _CUVIDMPEG2PICPARAMS
-{
-    int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
-    int BackwardRefIdx;         // Picture index of backward reference (B-frames)
-    int picture_coding_type;
-    int full_pel_forward_vector;
-    int full_pel_backward_vector;
-    int f_code[2][2];
-    int intra_dc_precision;
-    int frame_pred_frame_dct;
-    int concealment_motion_vectors;
-    int q_scale_type;
-    int intra_vlc_format;
-    int alternate_scan;
-    int top_field_first;
-    // Quantization matrices (raster order)
-    unsigned char QuantMatrixIntra[64];
-    unsigned char QuantMatrixInter[64];
-} CUVIDMPEG2PICPARAMS;
-
-// MPEG-4 has VOP types instead of Picture types
-#define I_VOP 0
-#define P_VOP 1
-#define B_VOP 2
-#define S_VOP 3
-
-/*******************************************************/
-//! \struct CUVIDMPEG4PICPARAMS
-//! MPEG-4 picture parameters
-//! This structure is used in CUVIDPICPARAMS structure
-/*******************************************************/
-typedef struct _CUVIDMPEG4PICPARAMS
-{
-    int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
-    int BackwardRefIdx;         // Picture index of backward reference (B-frames)
-    // VOL
-    int video_object_layer_width;
-    int video_object_layer_height;
-    int vop_time_increment_bitcount;
-    int top_field_first;
-    int resync_marker_disable;
-    int quant_type;
-    int quarter_sample;
-    int short_video_header;
-    int divx_flags;
-    // VOP
-    int vop_coding_type;
-    int vop_coded;
-    int vop_rounding_type;
-    int alternate_vertical_scan_flag;
-    int interlaced;
-    int vop_fcode_forward;
-    int vop_fcode_backward;
-    int trd[2];
-    int trb[2];
-    // Quantization matrices (raster order)
-    unsigned char QuantMatrixIntra[64];
-    unsigned char QuantMatrixInter[64];
-    int gmc_enabled;
-} CUVIDMPEG4PICPARAMS;
-
-/********************************************************/
-//! \struct CUVIDVC1PICPARAMS
-//! VC1 picture parameters
-//! This structure is used in CUVIDPICPARAMS structure
-/********************************************************/
-typedef struct _CUVIDVC1PICPARAMS
-{
-    int ForwardRefIdx;      /**< Picture index of forward reference (P/B-frames) */
-    int BackwardRefIdx;     /**< Picture index of backward reference (B-frames)  */
-    int FrameWidth;         /**< Actual frame width                              */
-    int FrameHeight;        /**< Actual frame height                             */
-    // PICTURE
-    int intra_pic_flag;     /**< Set to 1 for I,BI frames */
-    int ref_pic_flag;       /**< Set to 1 for I,P frames  */
-    int progressive_fcm;    /**< Progressive frame        */
-    // SEQUENCE
-    int profile;
-    int postprocflag;
-    int pulldown;
-    int interlace;
-    int tfcntrflag;
-    int finterpflag;
-    int psf;
-    int multires;
-    int syncmarker;
-    int rangered;
-    int maxbframes;
-    // ENTRYPOINT
-    int panscan_flag;
-    int refdist_flag;
-    int extended_mv;
-    int dquant;
-    int vstransform;
-    int loopfilter;
-    int fastuvmc;
-    int overlap;
-    int quantizer;
-    int extended_dmv;
-    int range_mapy_flag;
-    int range_mapy;
-    int range_mapuv_flag;
-    int range_mapuv;
-    int rangeredfrm;    // range reduction state
-} CUVIDVC1PICPARAMS;
-
-/***********************************************************/
-//! \struct CUVIDJPEGPICPARAMS
-//! JPEG picture parameters
-//! This structure is used in CUVIDPICPARAMS structure
-/***********************************************************/
-typedef struct _CUVIDJPEGPICPARAMS
-{
-    int Reserved;
-} CUVIDJPEGPICPARAMS;
-
-
-/*******************************************************/
-//! \struct CUVIDHEVCPICPARAMS
-//! HEVC picture parameters
-//! This structure is used in CUVIDPICPARAMS structure
-/*******************************************************/
-typedef struct _CUVIDHEVCPICPARAMS
-{
-    // sps
-    int pic_width_in_luma_samples;
-    int pic_height_in_luma_samples;
-    unsigned char log2_min_luma_coding_block_size_minus3;
-    unsigned char log2_diff_max_min_luma_coding_block_size;
-    unsigned char log2_min_transform_block_size_minus2;
-    unsigned char log2_diff_max_min_transform_block_size;
-    unsigned char pcm_enabled_flag;
-    unsigned char log2_min_pcm_luma_coding_block_size_minus3;
-    unsigned char log2_diff_max_min_pcm_luma_coding_block_size;
-    unsigned char pcm_sample_bit_depth_luma_minus1;
-
-    unsigned char pcm_sample_bit_depth_chroma_minus1;
-    unsigned char pcm_loop_filter_disabled_flag;
-    unsigned char strong_intra_smoothing_enabled_flag;
-    unsigned char max_transform_hierarchy_depth_intra;
-    unsigned char max_transform_hierarchy_depth_inter;
-    unsigned char amp_enabled_flag;
-    unsigned char separate_colour_plane_flag;
-    unsigned char log2_max_pic_order_cnt_lsb_minus4;
-
-    unsigned char num_short_term_ref_pic_sets;
-    unsigned char long_term_ref_pics_present_flag;
-    unsigned char num_long_term_ref_pics_sps;
-    unsigned char sps_temporal_mvp_enabled_flag;
-    unsigned char sample_adaptive_offset_enabled_flag;
-    unsigned char scaling_list_enable_flag;
-    unsigned char IrapPicFlag;
-    unsigned char IdrPicFlag;
-
-    unsigned char bit_depth_luma_minus8;
-    unsigned char bit_depth_chroma_minus8;
-    unsigned char reserved1[14];
-
-    // pps
-    unsigned char dependent_slice_segments_enabled_flag;
-    unsigned char slice_segment_header_extension_present_flag;
-    unsigned char sign_data_hiding_enabled_flag;
-    unsigned char cu_qp_delta_enabled_flag;
-    unsigned char diff_cu_qp_delta_depth;
-    signed char init_qp_minus26;
-    signed char pps_cb_qp_offset;
-    signed char pps_cr_qp_offset;
-
-    unsigned char constrained_intra_pred_flag;
-    unsigned char weighted_pred_flag;
-    unsigned char weighted_bipred_flag;
-    unsigned char transform_skip_enabled_flag;
-    unsigned char transquant_bypass_enabled_flag;
-    unsigned char entropy_coding_sync_enabled_flag;
-    unsigned char log2_parallel_merge_level_minus2;
-    unsigned char num_extra_slice_header_bits;
-
-    unsigned char loop_filter_across_tiles_enabled_flag;
-    unsigned char loop_filter_across_slices_enabled_flag;
-    unsigned char output_flag_present_flag;
-    unsigned char num_ref_idx_l0_default_active_minus1;
-    unsigned char num_ref_idx_l1_default_active_minus1;
-    unsigned char lists_modification_present_flag;
-    unsigned char cabac_init_present_flag;
-    unsigned char pps_slice_chroma_qp_offsets_present_flag;
-
-    unsigned char deblocking_filter_override_enabled_flag;
-    unsigned char pps_deblocking_filter_disabled_flag;
-    signed char   pps_beta_offset_div2;
-    signed char   pps_tc_offset_div2;
-    unsigned char tiles_enabled_flag;
-    unsigned char uniform_spacing_flag;
-    unsigned char num_tile_columns_minus1;
-    unsigned char num_tile_rows_minus1;
-
-    unsigned short column_width_minus1[21];
-    unsigned short row_height_minus1[21];
-    unsigned int   reserved3[15];
-
-    // RefPicSets
-    int NumBitsForShortTermRPSInSlice;
-    int NumDeltaPocsOfRefRpsIdx;
-    int NumPocTotalCurr;
-    int NumPocStCurrBefore;
-    int NumPocStCurrAfter;
-    int NumPocLtCurr;
-    int CurrPicOrderCntVal;
-    int RefPicIdx[16];                      // [refpic] Indices of valid reference pictures (-1 if unused for reference)
-    int PicOrderCntVal[16];                 // [refpic]
-    unsigned char IsLongTerm[16];           // [refpic] 0=not a long-term reference, 1=long-term reference
-    unsigned char RefPicSetStCurrBefore[8]; // [0..NumPocStCurrBefore-1] -> refpic (0..15)
-    unsigned char RefPicSetStCurrAfter[8];  // [0..NumPocStCurrAfter-1] -> refpic (0..15)
-    unsigned char RefPicSetLtCurr[8];       // [0..NumPocLtCurr-1] -> refpic (0..15)
-    unsigned char RefPicSetInterLayer0[8];
-    unsigned char RefPicSetInterLayer1[8];
-    unsigned int  reserved4[12];
-
-    // scaling lists (diag order)
-    unsigned char ScalingList4x4[6][16];       // [matrixId][i]
-    unsigned char ScalingList8x8[6][64];       // [matrixId][i]
-    unsigned char ScalingList16x16[6][64];     // [matrixId][i]
-    unsigned char ScalingList32x32[2][64];     // [matrixId][i]
-    unsigned char ScalingListDCCoeff16x16[6];  // [matrixId]
-    unsigned char ScalingListDCCoeff32x32[2];  // [matrixId]
-} CUVIDHEVCPICPARAMS;
-
-
-/***********************************************************/
-//! \struct CUVIDVP8PICPARAMS
-//! VP8 picture parameters
-//! This structure is used in CUVIDPICPARAMS structure
-/***********************************************************/
-typedef struct _CUVIDVP8PICPARAMS
-{
-    int width;
-    int height;
-    unsigned int first_partition_size;
-    //Frame Indexes
-    unsigned char LastRefIdx;
-    unsigned char GoldenRefIdx;
-    unsigned char AltRefIdx;
-    union {
-        struct {
-            unsigned char frame_type : 1;    /**< 0 = KEYFRAME, 1 = INTERFRAME  */
-            unsigned char version : 3;
-            unsigned char show_frame : 1;
-            unsigned char update_mb_segmentation_data : 1;    /**< Must be 0 if segmentation is not enabled */
-            unsigned char Reserved2Bits : 2;
-        };
-        unsigned char wFrameTagFlags;
-    };
-    unsigned char Reserved1[4];
-    unsigned int  Reserved2[3];
-} CUVIDVP8PICPARAMS;
-
-/***********************************************************/
-//! \struct CUVIDVP9PICPARAMS
-//! VP9 picture parameters
-//! This structure is used in CUVIDPICPARAMS structure
-/***********************************************************/
-typedef struct _CUVIDVP9PICPARAMS
-{
-    unsigned int width;
-    unsigned int height;
-
-    //Frame Indices
-    unsigned char LastRefIdx;
-    unsigned char GoldenRefIdx;
-    unsigned char AltRefIdx;
-    unsigned char colorSpace;
-
-    unsigned short profile : 3;
-    unsigned short frameContextIdx : 2;
-    unsigned short frameType : 1;
-    unsigned short showFrame : 1;
-    unsigned short errorResilient : 1;
-    unsigned short frameParallelDecoding : 1;
-    unsigned short subSamplingX : 1;
-    unsigned short subSamplingY : 1;
-    unsigned short intraOnly : 1;
-    unsigned short allow_high_precision_mv : 1;
-    unsigned short refreshEntropyProbs : 1;
-    unsigned short reserved2Bits : 2;
-
-    unsigned short reserved16Bits;
-
-    unsigned char  refFrameSignBias[4];
-
-    unsigned char bitDepthMinus8Luma;
-    unsigned char bitDepthMinus8Chroma;
-    unsigned char loopFilterLevel;
-    unsigned char loopFilterSharpness;
-
-    unsigned char modeRefLfEnabled;
-    unsigned char log2_tile_columns;
-    unsigned char log2_tile_rows;
-
-    unsigned char segmentEnabled : 1;
-    unsigned char segmentMapUpdate : 1;
-    unsigned char segmentMapTemporalUpdate : 1;
-    unsigned char segmentFeatureMode : 1;
-    unsigned char reserved4Bits : 4;
-
-
-    unsigned char segmentFeatureEnable[8][4];
-    short         segmentFeatureData[8][4];
-    unsigned char mb_segment_tree_probs[7];
-    unsigned char segment_pred_probs[3];
-    unsigned char reservedSegment16Bits[2];
-
-    int qpYAc;
-    int qpYDc;
-    int qpChDc;
-    int qpChAc;
-
-    unsigned int activeRefIdx[3];
-    unsigned int resetFrameContext;
-    unsigned int mcomp_filter_type;
-    unsigned int mbRefLfDelta[4];
-    unsigned int mbModeLfDelta[2];
-    unsigned int frameTagSize;
-    unsigned int offsetToDctParts;
-    unsigned int reserved128Bits[4];
-
-} CUVIDVP9PICPARAMS;
-
-
-/******************************************************************************************/
-//! \struct CUVIDPICPARAMS
-//! Picture parameters for decoding
-//! This structure is used in cuvidDecodePicture API
-//! IN  for cuvidDecodePicture
-/******************************************************************************************/
-typedef struct _CUVIDPICPARAMS
-{
-    int PicWidthInMbs;                     /**< IN: Coded frame size in macroblocks                           */
-    int FrameHeightInMbs;                  /**< IN: Coded frame height in macroblocks                         */
-    int CurrPicIdx;                        /**< IN: Output index of the current picture                       */
-    int field_pic_flag;                    /**< IN: 0=frame picture, 1=field picture                          */
-    int bottom_field_flag;                 /**< IN: 0=top field, 1=bottom field (ignored if field_pic_flag=0) */
-    int second_field;                      /**< IN: Second field of a complementary field pair                */
-    // Bitstream data
-    unsigned int nBitstreamDataLen;        /**< IN: Number of bytes in bitstream data buffer                  */
-    const unsigned char *pBitstreamData;   /**< IN: Ptr to bitstream data for this picture (slice-layer)      */
-    unsigned int nNumSlices;               /**< IN: Number of slices in this picture                          */
-    const unsigned int *pSliceDataOffsets; /**< IN: nNumSlices entries, contains offset of each slice within
-                                                        the bitstream data buffer                             */
-    int ref_pic_flag;                      /**< IN: This picture is a reference picture                       */
-    int intra_pic_flag;                    /**< IN: This picture is entirely intra coded                      */
-    unsigned int Reserved[30];             /**< Reserved for future use                                       */
-    // IN: Codec-specific data
-    union {
-        CUVIDMPEG2PICPARAMS mpeg2;         /**< Also used for MPEG-1 */
-        CUVIDH264PICPARAMS  h264;
-        CUVIDVC1PICPARAMS   vc1;
-        CUVIDMPEG4PICPARAMS mpeg4;
-        CUVIDJPEGPICPARAMS  jpeg;
-        CUVIDHEVCPICPARAMS  hevc;
-        CUVIDVP8PICPARAMS   vp8;
-        CUVIDVP9PICPARAMS   vp9;
-        unsigned int CodecReserved[1024];
-    } CodecSpecific;
-} CUVIDPICPARAMS;
-
-
-/******************************************************/
-//! \struct CUVIDPROCPARAMS
-//! Picture parameters for postprocessing
-//! This structure is used in cuvidMapVideoFrame API
-/******************************************************/
-typedef struct _CUVIDPROCPARAMS
-{
-    int progressive_frame;              /**< IN: Input is progressive (deinterlace_mode will be ignored)                */
-    int second_field;                   /**< IN: Output the second field (ignored if deinterlace mode is Weave)         */
-    int top_field_first;                /**< IN: Input frame is top field first (1st field is top, 2nd field is bottom) */
-    int unpaired_field;                 /**< IN: Input only contains one field (2nd field is invalid)                   */
-    // The fields below are used for raw YUV input
-    unsigned int reserved_flags;        /**< Reserved for future use (set to zero)                                      */
-    unsigned int reserved_zero;         /**< Reserved (set to zero)                                                     */
-    unsigned long long raw_input_dptr;  /**< IN: Input CUdeviceptr for raw YUV extensions                               */
-    unsigned int raw_input_pitch;       /**< IN: pitch in bytes of raw YUV input (should be aligned appropriately)      */
-    unsigned int raw_input_format;      /**< IN: Input YUV format (cudaVideoCodec_enum)                                 */
-    unsigned long long raw_output_dptr; /**< IN: Output CUdeviceptr for raw YUV extensions                              */
-    unsigned int raw_output_pitch;      /**< IN: pitch in bytes of raw YUV output (should be aligned appropriately)     */
-    unsigned int Reserved1;             /**< Reserved for future use (set to zero)                                      */
-    CUstream output_stream;             /**< IN: stream object used by cuvidMapVideoFrame                               */
-    unsigned int Reserved[46];          /**< Reserved for future use (set to zero)                                      */
-    void *Reserved2[2];                 /**< Reserved for future use (set to zero)                                      */
-} CUVIDPROCPARAMS;
-
-
-/***********************************************************************************************************/
-//! VIDEO_DECODER
-//!
-//! In order to minimize decode latencies, there should be always at least 2 pictures in the decode
-//! queue at any time, in order to make sure that all decode engines are always busy.
-//!
-//! Overall data flow:
-//!  - cuvidGetDecoderCaps(...)
-//!  - cuvidCreateDecoder(...)
-//!  - For each picture:
-//!    + cuvidDecodePicture(N)
-//!    + cuvidMapVideoFrame(N-4)
-//!    + do some processing in cuda
-//!    + cuvidUnmapVideoFrame(N-4)
-//!    + cuvidDecodePicture(N+1)
-//!    + cuvidMapVideoFrame(N-3)
-//!    + ...
-//!  - cuvidDestroyDecoder(...)
-//!
-//! NOTE:
-//! - When the cuda context is created from a D3D device, the D3D device must also be created
-//!   with the D3DCREATE_MULTITHREADED flag.
-//! - There is a limit to how many pictures can be mapped simultaneously (ulNumOutputSurfaces)
-//! - cuvidDecodePicture may block the calling thread if there are too many pictures pending
-//!   in the decode queue
-/***********************************************************************************************************/
-
-
-/**********************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidGetDecoderCaps(CUVIDDECODECAPS *pdc)
-//! Queries decode capabilities of NVDEC-HW based on CodecType, ChromaFormat and BitDepthMinus8 parameters.
-//! 1. Application fills IN parameters CodecType, ChromaFormat and BitDepthMinus8 of CUVIDDECODECAPS structure
-//! 2. On calling cuvidGetDecoderCaps, driver fills OUT parameters if the IN parameters are supported
-//!    If IN parameters passed to the driver are not supported by NVDEC-HW, then all OUT params are set to 0.
-//! E.g. on Geforce GTX 960:
-//!   App fills - eCodecType = cudaVideoCodec_H264; eChromaFormat = cudaVideoChromaFormat_420; nBitDepthMinus8 = 0;
-//!   Given IN parameters are supported, hence driver fills: bIsSupported = 1; nMinWidth   = 48; nMinHeight  = 16;
-//!   nMaxWidth = 4096; nMaxHeight = 4096; nMaxMBCount = 65536;
-//! CodedWidth*CodedHeight/256 must be less than or equal to nMaxMBCount
-/**********************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidGetDecoderCaps(CUVIDDECODECAPS *pdc);
-
-/********************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci)
-//! Create the decoder object based on pdci. A handle to the created decoder is returned
-/********************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci);
-/********************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder)
-//! Destroy the decoder object.
-/********************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidDestroyDecoder(CUvideodecoder hDecoder);
-
-/********************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams)
-//! Decode a single picture (field or frame)
-//! Kicks off HW decoding
-/********************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams);
-
-
-#if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL)
-/************************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, unsigned int *pDevPtr,
-//!                                         unsigned int  *pPitch, CUVIDPROCPARAMS *pVPP);
-//! Post-process and map video frame corresponding to nPicIdx for use in cuda. Returns cuda device pointer and associated
-//! pitch of the video frame
-/************************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx,
-                                           unsigned int *pDevPtr, unsigned int *pPitch,
-                                           CUVIDPROCPARAMS *pVPP);
-
-/********************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr)
-//! Unmap a previously mapped video frame
-/********************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr);
-#endif
-
-#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
-/************************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr,
-//!                                           unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
-//! Post-process and map video frame corresponding to nPicIdx for use in cuda. Returns cuda device pointer and associated
-//! pitch of the video frame
-/************************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr,
-                                             unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
-
-/********************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
-//! Unmap a previously mapped video frame
-/********************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
-
-#if defined(__CUVID_DEVPTR64) && !defined(__CUVID_INTERNAL)
-#define tcuvidMapVideoFrame      tcuvidMapVideoFrame64
-#define tcuvidUnmapVideoFrame    tcuvidUnmapVideoFrame64
-#endif
-#endif
-
-
-
-/********************************************************************************************************************/
-//!
-//! Context-locking: to facilitate multi-threaded implementations, the following 4 functions
-//! provide a simple mutex-style host synchronization. If a non-NULL context is specified
-//! in CUVIDDECODECREATEINFO, the codec library will acquire the mutex associated with the given
-//! context before making any cuda calls.
-//! A multi-threaded application could create a lock associated with a context handle so that
-//! multiple threads can safely share the same cuda context:
-//!  - use cuCtxPopCurrent immediately after context creation in order to create a 'floating' context
-//!    that can be passed to cuvidCtxLockCreate.
-//!  - When using a floating context, all cuda calls should only be made within a cuvidCtxLock/cuvidCtxUnlock section.
-//!
-//! NOTE: This is a safer alternative to cuCtxPushCurrent and cuCtxPopCurrent, and is not related to video
-//! decoder in any way (implemented as a critical section associated with cuCtx{Push|Pop}Current calls).
-/********************************************************************************************************************/
-
-/********************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx)
-//! This API is used to create CtxLock object
-/********************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx);
-
-/********************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck)
-//! This API is used to free CtxLock object
-/********************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidCtxLockDestroy(CUvideoctxlock lck);
-
-/********************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags)
-//! This API is used to acquire ctxlock
-/********************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags);
-
-/********************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags)
-//! This API is used to release ctxlock
-/********************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags);
-
-/**********************************************************************************************/
-
-#if defined(__cplusplus)
-}
-#endif /* __cplusplus */
-
-#endif // __CUDA_VIDEO_H__

diff --git a/compat/cuda/dynlink_loader.h b/compat/cuda/dynlink_loader.h
index 7d2c874..9f93465 100644
--- a/compat/cuda/dynlink_loader.h
+++ b/compat/cuda/dynlink_loader.h

@@ -1,268 +1,33 @@
 /*
- * This copyright notice applies to this header file only:
+ * This file is part of FFmpeg.
  *
- * Copyright (c) 2016
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
  *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the software, and to permit persons to whom the
- * software is furnished to do so, subject to the following
- * conditions:
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef AV_COMPAT_CUDA_DYNLINK_LOADER_H
 #define AV_COMPAT_CUDA_DYNLINK_LOADER_H
 
-#include "compat/cuda/dynlink_cuda.h"
-#include "compat/cuda/dynlink_nvcuvid.h"
-#include "compat/nvenc/nvEncodeAPI.h"
+#include "libavutil/log.h"
 #include "compat/w32dlfcn.h"
 
-#include "libavutil/log.h"
-#include "libavutil/error.h"
+#define FFNV_LOAD_FUNC(path) dlopen((path), RTLD_LAZY)
+#define FFNV_SYM_FUNC(lib, sym) dlsym((lib), (sym))
+#define FFNV_FREE_FUNC(lib) dlclose(lib)
+#define FFNV_LOG_FUNC(logctx, msg, ...) av_log(logctx, AV_LOG_ERROR, msg,  __VA_ARGS__)
+#define FFNV_DEBUG_LOG_FUNC(logctx, msg, ...) av_log(logctx, AV_LOG_DEBUG, msg,  __VA_ARGS__)
 
-#if defined(_WIN32)
-# define LIB_HANDLE HMODULE
-#else
-# define LIB_HANDLE void*
-#endif
-
-#if defined(_WIN32) || defined(__CYGWIN__)
-# define CUDA_LIBNAME "nvcuda.dll"
-# define NVCUVID_LIBNAME "nvcuvid.dll"
-# if ARCH_X86_64
-#  define NVENC_LIBNAME "nvEncodeAPI64.dll"
-# else
-#  define NVENC_LIBNAME "nvEncodeAPI.dll"
-# endif
-#else
-# define CUDA_LIBNAME "libcuda.so.1"
-# define NVCUVID_LIBNAME "libnvcuvid.so.1"
-# define NVENC_LIBNAME "libnvidia-encode.so.1"
-#endif
-
-#define LOAD_LIBRARY(l, path)                                     \
-    do {                                                          \
-        if (!((l) = dlopen(path, RTLD_LAZY))) {                   \
-            av_log(NULL, AV_LOG_ERROR, "Cannot load %s\n", path); \
-            ret = AVERROR_UNKNOWN;                                \
-            goto error;                                           \
-        }                                                         \
-        av_log(NULL, AV_LOG_TRACE, "Loaded lib: %s\n", path);     \
-    } while (0)
-
-#define LOAD_SYMBOL(fun, tp, symbol)                                \
-    do {                                                            \
-        if (!((f->fun) = (tp*)dlsym(f->lib, symbol))) {             \
-            av_log(NULL, AV_LOG_ERROR, "Cannot load %s\n", symbol); \
-            ret = AVERROR_UNKNOWN;                                  \
-            goto error;                                             \
-        }                                                           \
-        av_log(NULL, AV_LOG_TRACE, "Loaded sym: %s\n", symbol);     \
-    } while (0)
-
-#define LOAD_SYMBOL_OPT(fun, tp, symbol)                                     \
-    do {                                                                     \
-        if (!((f->fun) = (tp*)dlsym(f->lib, symbol))) {                      \
-            av_log(NULL, AV_LOG_DEBUG, "Cannot load optional %s\n", symbol); \
-        } else {                                                             \
-            av_log(NULL, AV_LOG_TRACE, "Loaded sym: %s\n", symbol);          \
-        }                                                                    \
-    } while (0)
-
-#define GENERIC_LOAD_FUNC_PREAMBLE(T, n, N)  \
-    T *f;                                    \
-    int ret;                                 \
-                                             \
-    n##_free_functions(functions);           \
-                                             \
-    f = *functions = av_mallocz(sizeof(*f)); \
-    if (!f)                                  \
-        return AVERROR(ENOMEM);              \
-                                             \
-    LOAD_LIBRARY(f->lib, N);
-
-#define GENERIC_LOAD_FUNC_FINALE(n) \
-    return 0;                       \
-error:                              \
-    n##_free_functions(functions);  \
-    return ret;
-
-#define GENERIC_FREE_FUNC()              \
-    if (!functions)                      \
-        return;                          \
-    if (*functions && (*functions)->lib) \
-        dlclose((*functions)->lib);      \
-    av_freep(functions);
-
-#ifdef AV_COMPAT_DYNLINK_CUDA_H
-typedef struct CudaFunctions {
-    tcuInit *cuInit;
-    tcuDeviceGetCount *cuDeviceGetCount;
-    tcuDeviceGet *cuDeviceGet;
-    tcuDeviceGetName *cuDeviceGetName;
-    tcuDeviceComputeCapability *cuDeviceComputeCapability;
-    tcuCtxCreate_v2 *cuCtxCreate;
-    tcuCtxPushCurrent_v2 *cuCtxPushCurrent;
-    tcuCtxPopCurrent_v2 *cuCtxPopCurrent;
-    tcuCtxDestroy_v2 *cuCtxDestroy;
-    tcuMemAlloc_v2 *cuMemAlloc;
-    tcuMemFree_v2 *cuMemFree;
-    tcuMemcpy2D_v2 *cuMemcpy2D;
-    tcuGetErrorName *cuGetErrorName;
-    tcuGetErrorString *cuGetErrorString;
-
-    LIB_HANDLE lib;
-} CudaFunctions;
-#else
-typedef struct CudaFunctions CudaFunctions;
-#endif
-
-typedef struct CuvidFunctions {
-    tcuvidGetDecoderCaps *cuvidGetDecoderCaps;
-    tcuvidCreateDecoder *cuvidCreateDecoder;
-    tcuvidDestroyDecoder *cuvidDestroyDecoder;
-    tcuvidDecodePicture *cuvidDecodePicture;
-    tcuvidMapVideoFrame *cuvidMapVideoFrame;
-    tcuvidUnmapVideoFrame *cuvidUnmapVideoFrame;
-    tcuvidCtxLockCreate *cuvidCtxLockCreate;
-    tcuvidCtxLockDestroy *cuvidCtxLockDestroy;
-    tcuvidCtxLock *cuvidCtxLock;
-    tcuvidCtxUnlock *cuvidCtxUnlock;
-
-    tcuvidCreateVideoSource *cuvidCreateVideoSource;
-    tcuvidCreateVideoSourceW *cuvidCreateVideoSourceW;
-    tcuvidDestroyVideoSource *cuvidDestroyVideoSource;
-    tcuvidSetVideoSourceState *cuvidSetVideoSourceState;
-    tcuvidGetVideoSourceState *cuvidGetVideoSourceState;
-    tcuvidGetSourceVideoFormat *cuvidGetSourceVideoFormat;
-    tcuvidGetSourceAudioFormat *cuvidGetSourceAudioFormat;
-    tcuvidCreateVideoParser *cuvidCreateVideoParser;
-    tcuvidParseVideoData *cuvidParseVideoData;
-    tcuvidDestroyVideoParser *cuvidDestroyVideoParser;
-
-    LIB_HANDLE lib;
-} CuvidFunctions;
-
-typedef NVENCSTATUS NVENCAPI tNvEncodeAPICreateInstance(NV_ENCODE_API_FUNCTION_LIST *functionList);
-typedef NVENCSTATUS NVENCAPI tNvEncodeAPIGetMaxSupportedVersion(uint32_t* version);
-
-typedef struct NvencFunctions {
-    tNvEncodeAPICreateInstance *NvEncodeAPICreateInstance;
-    tNvEncodeAPIGetMaxSupportedVersion *NvEncodeAPIGetMaxSupportedVersion;
-
-    LIB_HANDLE lib;
-} NvencFunctions;
-
-#ifdef AV_COMPAT_DYNLINK_CUDA_H
-static inline void cuda_free_functions(CudaFunctions **functions)
-{
-    GENERIC_FREE_FUNC();
-}
-#endif
-
-static inline void cuvid_free_functions(CuvidFunctions **functions)
-{
-    GENERIC_FREE_FUNC();
-}
-
-static inline void nvenc_free_functions(NvencFunctions **functions)
-{
-    GENERIC_FREE_FUNC();
-}
-
-#ifdef AV_COMPAT_DYNLINK_CUDA_H
-static inline int cuda_load_functions(CudaFunctions **functions)
-{
-    GENERIC_LOAD_FUNC_PREAMBLE(CudaFunctions, cuda, CUDA_LIBNAME);
-
-    LOAD_SYMBOL(cuInit, tcuInit, "cuInit");
-    LOAD_SYMBOL(cuDeviceGetCount, tcuDeviceGetCount, "cuDeviceGetCount");
-    LOAD_SYMBOL(cuDeviceGet, tcuDeviceGet, "cuDeviceGet");
-    LOAD_SYMBOL(cuDeviceGetName, tcuDeviceGetName, "cuDeviceGetName");
-    LOAD_SYMBOL(cuDeviceComputeCapability, tcuDeviceComputeCapability, "cuDeviceComputeCapability");
-    LOAD_SYMBOL(cuCtxCreate, tcuCtxCreate_v2, "cuCtxCreate_v2");
-    LOAD_SYMBOL(cuCtxPushCurrent, tcuCtxPushCurrent_v2, "cuCtxPushCurrent_v2");
-    LOAD_SYMBOL(cuCtxPopCurrent, tcuCtxPopCurrent_v2, "cuCtxPopCurrent_v2");
-    LOAD_SYMBOL(cuCtxDestroy, tcuCtxDestroy_v2, "cuCtxDestroy_v2");
-    LOAD_SYMBOL(cuMemAlloc, tcuMemAlloc_v2, "cuMemAlloc_v2");
-    LOAD_SYMBOL(cuMemFree, tcuMemFree_v2, "cuMemFree_v2");
-    LOAD_SYMBOL(cuMemcpy2D, tcuMemcpy2D_v2, "cuMemcpy2D_v2");
-    LOAD_SYMBOL(cuGetErrorName, tcuGetErrorName, "cuGetErrorName");
-    LOAD_SYMBOL(cuGetErrorString, tcuGetErrorString, "cuGetErrorString");
-
-    GENERIC_LOAD_FUNC_FINALE(cuda);
-}
-#endif
-
-static inline int cuvid_load_functions(CuvidFunctions **functions)
-{
-    GENERIC_LOAD_FUNC_PREAMBLE(CuvidFunctions, cuvid, NVCUVID_LIBNAME);
-
-    LOAD_SYMBOL_OPT(cuvidGetDecoderCaps, tcuvidGetDecoderCaps, "cuvidGetDecoderCaps");
-    LOAD_SYMBOL(cuvidCreateDecoder, tcuvidCreateDecoder, "cuvidCreateDecoder");
-    LOAD_SYMBOL(cuvidDestroyDecoder, tcuvidDestroyDecoder, "cuvidDestroyDecoder");
-    LOAD_SYMBOL(cuvidDecodePicture, tcuvidDecodePicture, "cuvidDecodePicture");
-#ifdef __CUVID_DEVPTR64
-    LOAD_SYMBOL(cuvidMapVideoFrame, tcuvidMapVideoFrame, "cuvidMapVideoFrame64");
-    LOAD_SYMBOL(cuvidUnmapVideoFrame, tcuvidUnmapVideoFrame, "cuvidUnmapVideoFrame64");
-#else
-    LOAD_SYMBOL(cuvidMapVideoFrame, tcuvidMapVideoFrame, "cuvidMapVideoFrame");
-    LOAD_SYMBOL(cuvidUnmapVideoFrame, tcuvidUnmapVideoFrame, "cuvidUnmapVideoFrame");
-#endif
-    LOAD_SYMBOL(cuvidCtxLockCreate, tcuvidCtxLockCreate, "cuvidCtxLockCreate");
-    LOAD_SYMBOL(cuvidCtxLockDestroy, tcuvidCtxLockDestroy, "cuvidCtxLockDestroy");
-    LOAD_SYMBOL(cuvidCtxLock, tcuvidCtxLock, "cuvidCtxLock");
-    LOAD_SYMBOL(cuvidCtxUnlock, tcuvidCtxUnlock, "cuvidCtxUnlock");
-
-    LOAD_SYMBOL(cuvidCreateVideoSource, tcuvidCreateVideoSource, "cuvidCreateVideoSource");
-    LOAD_SYMBOL(cuvidCreateVideoSourceW, tcuvidCreateVideoSourceW, "cuvidCreateVideoSourceW");
-    LOAD_SYMBOL(cuvidDestroyVideoSource, tcuvidDestroyVideoSource, "cuvidDestroyVideoSource");
-    LOAD_SYMBOL(cuvidSetVideoSourceState, tcuvidSetVideoSourceState, "cuvidSetVideoSourceState");
-    LOAD_SYMBOL(cuvidGetVideoSourceState, tcuvidGetVideoSourceState, "cuvidGetVideoSourceState");
-    LOAD_SYMBOL(cuvidGetSourceVideoFormat, tcuvidGetSourceVideoFormat, "cuvidGetSourceVideoFormat");
-    LOAD_SYMBOL(cuvidGetSourceAudioFormat, tcuvidGetSourceAudioFormat, "cuvidGetSourceAudioFormat");
-    LOAD_SYMBOL(cuvidCreateVideoParser, tcuvidCreateVideoParser, "cuvidCreateVideoParser");
-    LOAD_SYMBOL(cuvidParseVideoData, tcuvidParseVideoData, "cuvidParseVideoData");
-    LOAD_SYMBOL(cuvidDestroyVideoParser, tcuvidDestroyVideoParser, "cuvidDestroyVideoParser");
-
-    GENERIC_LOAD_FUNC_FINALE(cuvid);
-}
-
-static inline int nvenc_load_functions(NvencFunctions **functions)
-{
-    GENERIC_LOAD_FUNC_PREAMBLE(NvencFunctions, nvenc, NVENC_LIBNAME);
-
-    LOAD_SYMBOL(NvEncodeAPICreateInstance, tNvEncodeAPICreateInstance, "NvEncodeAPICreateInstance");
-    LOAD_SYMBOL(NvEncodeAPIGetMaxSupportedVersion, tNvEncodeAPIGetMaxSupportedVersion, "NvEncodeAPIGetMaxSupportedVersion");
-
-    GENERIC_LOAD_FUNC_FINALE(nvenc);
-}
-
-#undef GENERIC_LOAD_FUNC_PREAMBLE
-#undef LOAD_LIBRARY
-#undef LOAD_SYMBOL
-#undef GENERIC_LOAD_FUNC_FINALE
-#undef GENERIC_FREE_FUNC
-#undef CUDA_LIBNAME
-#undef NVCUVID_LIBNAME
-#undef NVENC_LIBNAME
-#undef LIB_HANDLE
+#include <ffnvcodec/dynlink_loader.h>
 
 #endif
-

diff --git a/compat/cuda/dynlink_nvcuvid.h b/compat/cuda/dynlink_nvcuvid.h
deleted file mode 100644
index 8729424..0000000
--- a/compat/cuda/dynlink_nvcuvid.h
+++ /dev/null

@@ -1,356 +0,0 @@
-/*
- * This copyright notice applies to this header file only:
- *
- * Copyright (c) 2010-2017 NVIDIA Corporation
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the software, and to permit persons to whom the
- * software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/********************************************************************************************************************/
-//! \file nvcuvid.h
-//!   NVDECODE API provides video decoding interface to NVIDIA GPU devices.
-//! \date 2015-2017
-//!  This file contains the interface constants, structure definitions and function prototypes.
-/********************************************************************************************************************/
-
-#if !defined(__NVCUVID_H__)
-#define __NVCUVID_H__
-
-#include "compat/cuda/dynlink_cuviddec.h"
-
-#if defined(__cplusplus)
-extern "C" {
-#endif /* __cplusplus */
-
-/*********************************
-** Initialization
-*********************************/
-CUresult  CUDAAPI cuvidInit(unsigned int Flags);
-
-/***********************************************/
-//!
-//! High-level helper APIs for video sources
-//!
-/***********************************************/
-
-typedef void *CUvideosource;
-typedef void *CUvideoparser;
-typedef long long CUvideotimestamp;
-
-
-/************************************************************************/
-//! \enum cudaVideoState
-//! Video source state enums
-//! Used in cuvidSetVideoSourceState and cuvidGetVideoSourceState APIs
-/************************************************************************/
-typedef enum {
-    cudaVideoState_Error   = -1,    /**< Error state (invalid source)                  */
-    cudaVideoState_Stopped = 0,     /**< Source is stopped (or reached end-of-stream)  */
-    cudaVideoState_Started = 1      /**< Source is running and delivering data         */
-} cudaVideoState;
-
-/************************************************************************/
-//! \enum cudaAudioCodec
-//! Audio compression enums
-//! Used in CUAUDIOFORMAT structure
-/************************************************************************/
-typedef enum {
-    cudaAudioCodec_MPEG1=0,         /**< MPEG-1 Audio               */
-    cudaAudioCodec_MPEG2,           /**< MPEG-2 Audio               */
-    cudaAudioCodec_MP3,             /**< MPEG-1 Layer III Audio     */
-    cudaAudioCodec_AC3,             /**< Dolby Digital (AC3) Audio  */
-    cudaAudioCodec_LPCM,            /**< PCM Audio                  */
-    cudaAudioCodec_AAC,             /**< AAC Audio                  */
-} cudaAudioCodec;
-
-/************************************************************************************************/
-//! \ingroup STRUCTS
-//! \struct CUVIDEOFORMAT
-//! Video format
-//! Used in cuvidGetSourceVideoFormat API
-/************************************************************************************************/
-typedef struct
-{
-    cudaVideoCodec codec;                   /**< OUT: Compression format          */
-   /**
-    * OUT: frame rate = numerator / denominator (for example: 30000/1001)
-    */
-    struct {
-        /**< OUT: frame rate numerator   (0 = unspecified or variable frame rate) */
-        unsigned int numerator;
-        /**< OUT: frame rate denominator (0 = unspecified or variable frame rate) */
-        unsigned int denominator;
-    } frame_rate;
-    unsigned char progressive_sequence;     /**< OUT: 0=interlaced, 1=progressive                                      */
-    unsigned char bit_depth_luma_minus8;    /**< OUT: high bit depth luma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth   */
-    unsigned char bit_depth_chroma_minus8;  /**< OUT: high bit depth chroma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */
-    unsigned char reserved1;                /**< Reserved for future use                                               */
-    unsigned int coded_width;               /**< OUT: coded frame width in pixels                                      */
-    unsigned int coded_height;              /**< OUT: coded frame height in pixels                                     */
-   /**
-    * area of the frame that should be displayed
-    * typical example:
-    * coded_width = 1920, coded_height = 1088
-    * display_area = { 0,0,1920,1080 }
-    */
-    struct {
-        int left;                           /**< OUT: left position of display rect    */
-        int top;                            /**< OUT: top position of display rect     */
-        int right;                          /**< OUT: right position of display rect   */
-        int bottom;                         /**< OUT: bottom position of display rect  */
-    } display_area;
-    cudaVideoChromaFormat chroma_format;    /**< OUT:  Chroma format                   */
-    unsigned int bitrate;                   /**< OUT: video bitrate (bps, 0=unknown)   */
-   /**
-    * OUT: Display Aspect Ratio = x:y (4:3, 16:9, etc)
-    */
-    struct {
-        int x;
-        int y;
-    } display_aspect_ratio;
-    /**
-    * Video Signal Description
-    * Refer section E.2.1 (VUI parameters semantics) of H264 spec file
-    */
-    struct {
-        unsigned char video_format          : 3; /**< OUT: 0-Component, 1-PAL, 2-NTSC, 3-SECAM, 4-MAC, 5-Unspecified     */
-        unsigned char video_full_range_flag : 1; /**< OUT: indicates the black level and luma and chroma range           */
-        unsigned char reserved_zero_bits    : 4; /**< Reserved bits                                                      */
-        unsigned char color_primaries;           /**< OUT: chromaticity coordinates of source primaries                  */
-        unsigned char transfer_characteristics;  /**< OUT: opto-electronic transfer characteristic of the source picture */
-        unsigned char matrix_coefficients;       /**< OUT: used in deriving luma and chroma signals from RGB primaries   */
-    } video_signal_description;
-    unsigned int seqhdr_data_length;             /**< OUT: Additional bytes following (CUVIDEOFORMATEX)                  */
-} CUVIDEOFORMAT;
-
-/****************************************************************/
-//! \ingroup STRUCTS
-//! \struct CUVIDEOFORMATEX
-//! Video format including raw sequence header information
-//! Used in cuvidGetSourceVideoFormat API
-/****************************************************************/
-typedef struct
-{
-    CUVIDEOFORMAT format;                 /**< OUT: CUVIDEOFORMAT structure */
-    unsigned char raw_seqhdr_data[1024];  /**< OUT: Sequence header data    */
-} CUVIDEOFORMATEX;
-
-/****************************************************************/
-//! \ingroup STRUCTS
-//! \struct CUAUDIOFORMAT
-//! Audio formats
-//! Used in cuvidGetSourceAudioFormat API
-/****************************************************************/
-typedef struct
-{
-    cudaAudioCodec codec;       /**< OUT: Compression format                                              */
-    unsigned int channels;      /**< OUT: number of audio channels                                        */
-    unsigned int samplespersec; /**< OUT: sampling frequency                                              */
-    unsigned int bitrate;       /**< OUT: For uncompressed, can also be used to determine bits per sample */
-    unsigned int reserved1;     /**< Reserved for future use                                              */
-    unsigned int reserved2;     /**< Reserved for future use                                              */
-} CUAUDIOFORMAT;
-
-
-/***************************************************************/
-//! \enum CUvideopacketflags
-//! Data packet flags
-//! Used in CUVIDSOURCEDATAPACKET structure
-/***************************************************************/
-typedef enum {
-    CUVID_PKT_ENDOFSTREAM   = 0x01,   /**< Set when this is the last packet for this stream  */
-    CUVID_PKT_TIMESTAMP     = 0x02,   /**< Timestamp is valid                                */
-    CUVID_PKT_DISCONTINUITY = 0x04,   /**< Set when a discontinuity has to be signalled      */
-    CUVID_PKT_ENDOFPICTURE  = 0x08,   /**< Set when the packet contains exactly one frame    */
-} CUvideopacketflags;
-
-/*****************************************************************************/
-//! \ingroup STRUCTS
-//! \struct CUVIDSOURCEDATAPACKET
-//! Data Packet
-//! Used in cuvidParseVideoData API
-//! IN for cuvidParseVideoData
-/*****************************************************************************/
-typedef struct _CUVIDSOURCEDATAPACKET
-{
-    tcu_ulong flags;                /**< IN: Combination of CUVID_PKT_XXX flags                              */
-    tcu_ulong payload_size;         /**< IN: number of bytes in the payload (may be zero if EOS flag is set) */
-    const unsigned char *payload;   /**< IN: Pointer to packet payload data (may be NULL if EOS flag is set) */
-    CUvideotimestamp timestamp;     /**< IN: Presentation time stamp (10MHz clock), only valid if
-                                             CUVID_PKT_TIMESTAMP flag is set                                 */
-} CUVIDSOURCEDATAPACKET;
-
-// Callback for packet delivery
-typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *);
-
-/**************************************************************************************************************************/
-//! \ingroup STRUCTS
-//! \struct CUVIDSOURCEPARAMS
-//! Describes parameters needed in cuvidCreateVideoSource API
-//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported
-//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
-/**************************************************************************************************************************/
-typedef struct _CUVIDSOURCEPARAMS
-{
-    unsigned int ulClockRate;                   /**< IN: Time stamp units in Hz (0=default=10000000Hz)      */
-    unsigned int uReserved1[7];                 /**< Reserved for future use - set to zero                  */
-    void *pUserData;                            /**< IN: User private data passed in to the data handlers   */
-    PFNVIDSOURCECALLBACK pfnVideoDataHandler;   /**< IN: Called to deliver video packets                    */
-    PFNVIDSOURCECALLBACK pfnAudioDataHandler;   /**< IN: Called to deliver audio packets.                   */
-    void *pvReserved2[8];                       /**< Reserved for future use - set to NULL                  */
-} CUVIDSOURCEPARAMS;
-
-
-/**********************************************/
-//! \ingroup ENUMS
-//! \enum CUvideosourceformat_flags
-//! CUvideosourceformat_flags
-//! Used in cuvidGetSourceVideoFormat API
-/**********************************************/
-typedef enum {
-    CUVID_FMT_EXTFORMATINFO = 0x100             /**< Return extended format structure (CUVIDEOFORMATEX) */
-} CUvideosourceformat_flags;
-
-#if !defined(__APPLE__)
-/**************************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams)
-//! Create CUvideosource object. CUvideosource spawns demultiplexer thread that provides two callbacks:
-//! pfnVideoDataHandler() and pfnAudioDataHandler()
-//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported
-//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
-/**************************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams);
-
-/****************************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams)
-//! Create video source object and initialize
-/****************************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams);
-
-/*********************************************************************/
-//! \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj)
-//! Destroy video source
-/*********************************************************************/
-typedef CUresult CUDAAPI tcuvidDestroyVideoSource(CUvideosource obj);
-
-/******************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state)
-//! Set video source state
-/******************************************************************************************/
-typedef CUresult CUDAAPI tcuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state);
-
-/******************************************************************************************/
-//! \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj)
-//! Get video source state
-/******************************************************************************************/
-typedef cudaVideoState CUDAAPI tcuvidGetVideoSourceState(CUvideosource obj);
-
-/****************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags)
-//! Gets details of video stream in pvidfmt
-/****************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags);
-
-/****************************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags)
-//! Get audio source format
-//! NVDECODE API is intended for HW accelarated video decoding so CUvideosource doesn't have audio demuxer for all suppported
-//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
-/****************************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags);
-
-#endif
-/**********************************************************************************/
-//! \ingroup STRUCTS
-//! \struct CUVIDPARSERDISPINFO
-//! Used in cuvidParseVideoData API with PFNVIDDISPLAYCALLBACK pfnDisplayPicture
-/**********************************************************************************/
-typedef struct _CUVIDPARSERDISPINFO
-{
-    int picture_index;          /**< OUT: Index of the current picture                                                         */
-    int progressive_frame;      /**< OUT: 1 if progressive frame; 0 otherwise                                                  */
-    int top_field_first;        /**< OUT: 1 if top field is displayed first; 0 otherwise                                       */
-    int repeat_first_field;     /**< OUT: Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling,
-                                     -1=unpaired field)                                                                        */
-    CUvideotimestamp timestamp; /**< OUT: Presentation time stamp                                                              */
-} CUVIDPARSERDISPINFO;
-
-/***********************************************************************************************************************/
-//! Parser callbacks
-//! The parser will call these synchronously from within cuvidParseVideoData(), whenever a picture is ready to
-//! be decoded and/or displayed. First argument in functions is "void *pUserData" member of structure CUVIDSOURCEPARAMS
-/***********************************************************************************************************************/
-typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
-typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
-typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
-
-/**************************************/
-//! \ingroup STRUCTS
-//! \struct CUVIDPARSERPARAMS
-//! Used in cuvidCreateVideoParser API
-/**************************************/
-typedef struct _CUVIDPARSERPARAMS
-{
-    cudaVideoCodec CodecType;                   /**< IN: cudaVideoCodec_XXX                                                  */
-    unsigned int ulMaxNumDecodeSurfaces;        /**< IN: Max # of decode surfaces (parser will cycle through these)          */
-    unsigned int ulClockRate;                   /**< IN: Timestamp units in Hz (0=default=10000000Hz)                        */
-    unsigned int ulErrorThreshold;              /**< IN: % Error threshold (0-100) for calling pfnDecodePicture (100=always
-                                                     IN: call pfnDecodePicture even if picture bitstream is fully corrupted) */
-    unsigned int ulMaxDisplayDelay;             /**< IN: Max display queue delay (improves pipelining of decode with display)
-                                                         0=no delay (recommended values: 2..4)                               */
-    unsigned int uReserved1[5];                 /**< IN: Reserved for future use - set to 0                                  */
-    void *pUserData;                            /**< IN: User data for callbacks                                             */
-    PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< IN: Called before decoding frames and/or whenever there is a fmt change */
-    PFNVIDDECODECALLBACK pfnDecodePicture;      /**< IN: Called when a picture is ready to be decoded (decode order)         */
-    PFNVIDDISPLAYCALLBACK pfnDisplayPicture;    /**< IN: Called whenever a picture is ready to be displayed (display order)  */
-    void *pvReserved2[7];                       /**< Reserved for future use - set to NULL                                   */
-    CUVIDEOFORMATEX *pExtVideoInfo;             /**< IN: [Optional] sequence header data from system layer                   */
-} CUVIDPARSERPARAMS;
-
-/************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams)
-//! Create video parser object and initialize
-/************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams);
-
-/************************************************************************************************/
-//! \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket)
-//! Parse the video data from source data packet in pPacket
-//! Extracts parameter sets like SPS, PPS, bitstream etc. from pPacket and
-//! calls back pfnDecodePicture with CUVIDPICPARAMS data for kicking of HW decoding
-/************************************************************************************************/
-typedef CUresult CUDAAPI tcuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket);
-
-/*******************************************************************/
-//! \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj)
-/*******************************************************************/
-typedef CUresult CUDAAPI tcuvidDestroyVideoParser(CUvideoparser obj);
-
-/**********************************************************************************************/
-
-#if defined(__cplusplus)
-}
-#endif /* __cplusplus */
-
-#endif // __NVCUVID_H__
-
-

diff --git a/compat/nvenc/nvEncodeAPI.h b/compat/nvenc/nvEncodeAPI.h
deleted file mode 100644
index c3a8294..0000000
--- a/compat/nvenc/nvEncodeAPI.h
+++ /dev/null

@@ -1,3324 +0,0 @@
-/*
- * This copyright notice applies to this header file only:
- *
- * Copyright (c) 2010-2017 NVIDIA Corporation
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the software, and to permit persons to whom the
- * software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file nvEncodeAPI.h
- *   NVIDIA GPUs - beginning with the Kepler generation - contain a hardware-based encoder
- *   (referred to as NVENC) which provides fully-accelerated hardware-based video encoding.
- *   NvEncodeAPI provides the interface for NVIDIA video encoder (NVENC).
- * \date 2011-2017
- *  This file contains the interface constants, structure definitions and function prototypes.
- */
-
-#ifndef _NV_ENCODEAPI_H_
-#define _NV_ENCODEAPI_H_
-
-#include <stdlib.h>
-
-#ifdef _WIN32
-#include <windows.h>
-#endif
-
-#ifdef _MSC_VER
-#ifndef _STDINT
-typedef __int32 int32_t;
-typedef unsigned __int32 uint32_t;
-typedef __int64 int64_t;
-typedef unsigned __int64 uint64_t;
-typedef signed char int8_t;
-typedef unsigned char uint8_t;
-typedef short int16_t;
-typedef unsigned short uint16_t;
-#endif
-#else
-#include <stdint.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * \addtogroup ENCODER_STRUCTURE NvEncodeAPI Data structures
- * @{
- */
-
-#if defined(_WIN32) || defined(__CYGWIN__)
-#define NVENCAPI __stdcall
-#else
-#define NVENCAPI
-#endif
-
-#ifdef _WIN32
-typedef RECT NVENC_RECT;
-#else
-// =========================================================================================
-#ifndef GUID
-/*!
- * \struct GUID
- * Abstracts the GUID structure for non-windows platforms.
- */
-// =========================================================================================
-typedef struct
-{
-    uint32_t Data1;                                      /**< [in]: Specifies the first 8 hexadecimal digits of the GUID.                                */
-    uint16_t Data2;                                      /**< [in]: Specifies the first group of 4 hexadecimal digits.                                   */
-    uint16_t Data3;                                      /**< [in]: Specifies the second group of 4 hexadecimal digits.                                  */
-    uint8_t  Data4[8];                                   /**< [in]: Array of 8 bytes. The first 2 bytes contain the third group of 4 hexadecimal digits.
-                                                                    The remaining 6 bytes contain the final 12 hexadecimal digits.                       */
-} GUID;
-#endif // GUID
-
-/**
- * \struct _NVENC_RECT
- * Defines a Rectangle. Used in ::NV_ENC_PREPROCESS_FRAME.
- */
-typedef struct _NVENC_RECT
-{
-    uint32_t left;                                        /**< [in]: X coordinate of the upper left corner of rectangular area to be specified.       */
-    uint32_t top;                                         /**< [in]: Y coordinate of the upper left corner of the rectangular area to be specified.   */
-    uint32_t right;                                       /**< [in]: X coordinate of the bottom right corner of the rectangular area to be specified. */
-    uint32_t bottom;                                      /**< [in]: Y coordinate of the bottom right corner of the rectangular area to be specified. */
-} NVENC_RECT;
-
-#endif // _WIN32
-
-/** @} */ /* End of GUID and NVENC_RECT structure grouping*/
-
-typedef void* NV_ENC_INPUT_PTR;             /**< NVENCODE API input buffer                              */
-typedef void* NV_ENC_OUTPUT_PTR;            /**< NVENCODE API output buffer*/
-typedef void* NV_ENC_REGISTERED_PTR;        /**< A Resource that has been registered with NVENCODE API*/
-
-#define NVENCAPI_MAJOR_VERSION 8
-#define NVENCAPI_MINOR_VERSION 0
-
-#define NVENCAPI_VERSION (NVENCAPI_MAJOR_VERSION | (NVENCAPI_MINOR_VERSION << 24))
-
-/**
- * Macro to generate per-structure version for use with API.
- */
-#define NVENCAPI_STRUCT_VERSION(ver) ((uint32_t)NVENCAPI_VERSION | ((ver)<<16) | (0x7 << 28))
-
-
-#define NVENC_INFINITE_GOPLENGTH  0xffffffff
-
-#define NV_MAX_SEQ_HDR_LEN  (512)
-
-// =========================================================================================
-// Encode Codec GUIDS supported by the NvEncodeAPI interface.
-// =========================================================================================
-
-// {6BC82762-4E63-4ca4-AA85-1E50F321F6BF}
-static const GUID NV_ENC_CODEC_H264_GUID =
-{ 0x6bc82762, 0x4e63, 0x4ca4, { 0xaa, 0x85, 0x1e, 0x50, 0xf3, 0x21, 0xf6, 0xbf } };
-
-// {790CDC88-4522-4d7b-9425-BDA9975F7603}
-static const GUID NV_ENC_CODEC_HEVC_GUID =
-{ 0x790cdc88, 0x4522, 0x4d7b, { 0x94, 0x25, 0xbd, 0xa9, 0x97, 0x5f, 0x76, 0x3 } };
-
-
-
-// =========================================================================================
-// *   Encode Profile GUIDS supported by the NvEncodeAPI interface.
-// =========================================================================================
-
-// {BFD6F8E7-233C-4341-8B3E-4818523803F4}
-static const GUID NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID =
-{ 0xbfd6f8e7, 0x233c, 0x4341, { 0x8b, 0x3e, 0x48, 0x18, 0x52, 0x38, 0x3, 0xf4 } };
-
-// {0727BCAA-78C4-4c83-8C2F-EF3DFF267C6A}
-static const GUID  NV_ENC_H264_PROFILE_BASELINE_GUID =
-{ 0x727bcaa, 0x78c4, 0x4c83, { 0x8c, 0x2f, 0xef, 0x3d, 0xff, 0x26, 0x7c, 0x6a } };
-
-// {60B5C1D4-67FE-4790-94D5-C4726D7B6E6D}
-static const GUID  NV_ENC_H264_PROFILE_MAIN_GUID =
-{ 0x60b5c1d4, 0x67fe, 0x4790, { 0x94, 0xd5, 0xc4, 0x72, 0x6d, 0x7b, 0x6e, 0x6d } };
-
-// {E7CBC309-4F7A-4b89-AF2A-D537C92BE310}
-static const GUID NV_ENC_H264_PROFILE_HIGH_GUID =
-{ 0xe7cbc309, 0x4f7a, 0x4b89, { 0xaf, 0x2a, 0xd5, 0x37, 0xc9, 0x2b, 0xe3, 0x10 } };
-
-// {7AC663CB-A598-4960-B844-339B261A7D52}
-static const GUID  NV_ENC_H264_PROFILE_HIGH_444_GUID =
-{ 0x7ac663cb, 0xa598, 0x4960, { 0xb8, 0x44, 0x33, 0x9b, 0x26, 0x1a, 0x7d, 0x52 } };
-
-// {40847BF5-33F7-4601-9084-E8FE3C1DB8B7}
-static const GUID NV_ENC_H264_PROFILE_STEREO_GUID =
-{ 0x40847bf5, 0x33f7, 0x4601, { 0x90, 0x84, 0xe8, 0xfe, 0x3c, 0x1d, 0xb8, 0xb7 } };
-
-// {CE788D20-AAA9-4318-92BB-AC7E858C8D36}
-static const GUID NV_ENC_H264_PROFILE_SVC_TEMPORAL_SCALABILTY =
-{ 0xce788d20, 0xaaa9, 0x4318, { 0x92, 0xbb, 0xac, 0x7e, 0x85, 0x8c, 0x8d, 0x36 } };
-
-// {B405AFAC-F32B-417B-89C4-9ABEED3E5978}
-static const GUID NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID =
-{ 0xb405afac, 0xf32b, 0x417b, { 0x89, 0xc4, 0x9a, 0xbe, 0xed, 0x3e, 0x59, 0x78 } };
-
-// {AEC1BD87-E85B-48f2-84C3-98BCA6285072}
-static const GUID NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID =
-{ 0xaec1bd87, 0xe85b, 0x48f2, { 0x84, 0xc3, 0x98, 0xbc, 0xa6, 0x28, 0x50, 0x72 } };
-
-// {B514C39A-B55B-40fa-878F-F1253B4DFDEC}
-static const GUID NV_ENC_HEVC_PROFILE_MAIN_GUID =
-{ 0xb514c39a, 0xb55b, 0x40fa, { 0x87, 0x8f, 0xf1, 0x25, 0x3b, 0x4d, 0xfd, 0xec } };
-
-// {fa4d2b6c-3a5b-411a-8018-0a3f5e3c9be5}
-static const GUID NV_ENC_HEVC_PROFILE_MAIN10_GUID =
-{ 0xfa4d2b6c, 0x3a5b, 0x411a, { 0x80, 0x18, 0x0a, 0x3f, 0x5e, 0x3c, 0x9b, 0xe5 } };
-
-// For HEVC Main 444 8 bit and HEVC Main 444 10 bit profiles only
-// {51ec32b5-1b4c-453c-9cbd-b616bd621341}
-static const GUID NV_ENC_HEVC_PROFILE_FREXT_GUID =
-{ 0x51ec32b5, 0x1b4c, 0x453c, { 0x9c, 0xbd, 0xb6, 0x16, 0xbd, 0x62, 0x13, 0x41 } };
-
-// =========================================================================================
-// *   Preset GUIDS supported by the NvEncodeAPI interface.
-// =========================================================================================
-// {B2DFB705-4EBD-4C49-9B5F-24A777D3E587}
-static const GUID NV_ENC_PRESET_DEFAULT_GUID =
-{ 0xb2dfb705, 0x4ebd, 0x4c49, { 0x9b, 0x5f, 0x24, 0xa7, 0x77, 0xd3, 0xe5, 0x87 } };
-
-// {60E4C59F-E846-4484-A56D-CD45BE9FDDF6}
-static const GUID NV_ENC_PRESET_HP_GUID =
-{ 0x60e4c59f, 0xe846, 0x4484, { 0xa5, 0x6d, 0xcd, 0x45, 0xbe, 0x9f, 0xdd, 0xf6 } };
-
-// {34DBA71D-A77B-4B8F-9C3E-B6D5DA24C012}
-static const GUID NV_ENC_PRESET_HQ_GUID =
-{ 0x34dba71d, 0xa77b, 0x4b8f, { 0x9c, 0x3e, 0xb6, 0xd5, 0xda, 0x24, 0xc0, 0x12 } };
-
-// {82E3E450-BDBB-4e40-989C-82A90DF9EF32}
-static const GUID NV_ENC_PRESET_BD_GUID  =
-{ 0x82e3e450, 0xbdbb, 0x4e40, { 0x98, 0x9c, 0x82, 0xa9, 0xd, 0xf9, 0xef, 0x32 } };
-
-// {49DF21C5-6DFA-4feb-9787-6ACC9EFFB726}
-static const GUID NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID  =
-{ 0x49df21c5, 0x6dfa, 0x4feb, { 0x97, 0x87, 0x6a, 0xcc, 0x9e, 0xff, 0xb7, 0x26 } };
-
-// {C5F733B9-EA97-4cf9-BEC2-BF78A74FD105}
-static const GUID NV_ENC_PRESET_LOW_LATENCY_HQ_GUID  =
-{ 0xc5f733b9, 0xea97, 0x4cf9, { 0xbe, 0xc2, 0xbf, 0x78, 0xa7, 0x4f, 0xd1, 0x5 } };
-
-// {67082A44-4BAD-48FA-98EA-93056D150A58}
-static const GUID NV_ENC_PRESET_LOW_LATENCY_HP_GUID =
-{ 0x67082a44, 0x4bad, 0x48fa, { 0x98, 0xea, 0x93, 0x5, 0x6d, 0x15, 0xa, 0x58 } };
-
-// {D5BFB716-C604-44e7-9BB8-DEA5510FC3AC}
-static const GUID NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID =
-{ 0xd5bfb716, 0xc604, 0x44e7, { 0x9b, 0xb8, 0xde, 0xa5, 0x51, 0xf, 0xc3, 0xac } };
-
-// {149998E7-2364-411d-82EF-179888093409}
-static const GUID NV_ENC_PRESET_LOSSLESS_HP_GUID =
-{ 0x149998e7, 0x2364, 0x411d, { 0x82, 0xef, 0x17, 0x98, 0x88, 0x9, 0x34, 0x9 } };
-
-/**
- * \addtogroup ENCODER_STRUCTURE NvEncodeAPI Data structures
- * @{
- */
-
-/**
- * Input frame encode modes
- */
-typedef enum _NV_ENC_PARAMS_FRAME_FIELD_MODE
-{
-    NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME = 0x01,  /**< Frame mode */
-    NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD = 0x02,  /**< Field mode */
-    NV_ENC_PARAMS_FRAME_FIELD_MODE_MBAFF = 0x03   /**< MB adaptive frame/field */
-} NV_ENC_PARAMS_FRAME_FIELD_MODE;
-
-/**
- * Rate Control Modes
- */
-typedef enum _NV_ENC_PARAMS_RC_MODE
-{
-    NV_ENC_PARAMS_RC_CONSTQP                = 0x0,       /**< Constant QP mode */
-    NV_ENC_PARAMS_RC_VBR                    = 0x1,       /**< Variable bitrate mode */
-    NV_ENC_PARAMS_RC_CBR                    = 0x2,       /**< Constant bitrate mode */
-    NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ        = 0x8,       /**< low-delay CBR, high quality */
-    NV_ENC_PARAMS_RC_CBR_HQ                 = 0x10,      /**< CBR, high quality (slower) */
-    NV_ENC_PARAMS_RC_VBR_HQ                 = 0x20       /**< VBR, high quality (slower) */
-} NV_ENC_PARAMS_RC_MODE;
-
-#define NV_ENC_PARAMS_RC_VBR_MINQP              (NV_ENC_PARAMS_RC_MODE)0x4          /**< Deprecated */
-#define NV_ENC_PARAMS_RC_2_PASS_QUALITY         NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ    /**< Deprecated */
-#define NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP   NV_ENC_PARAMS_RC_CBR_HQ             /**< Deprecated */
-#define NV_ENC_PARAMS_RC_2_PASS_VBR             NV_ENC_PARAMS_RC_VBR_HQ             /**< Deprecated */
-#define NV_ENC_PARAMS_RC_CBR2                   NV_ENC_PARAMS_RC_CBR                /**< Deprecated */
-
-/**
- * Input picture structure
- */
-typedef enum _NV_ENC_PIC_STRUCT
-{
-    NV_ENC_PIC_STRUCT_FRAME             = 0x01,                 /**< Progressive frame */
-    NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM  = 0x02,                 /**< Field encoding top field first */
-    NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP  = 0x03                  /**< Field encoding bottom field first */
-} NV_ENC_PIC_STRUCT;
-
-/**
- * Input picture type
- */
-typedef enum _NV_ENC_PIC_TYPE
-{
-    NV_ENC_PIC_TYPE_P               = 0x0,     /**< Forward predicted */
-    NV_ENC_PIC_TYPE_B               = 0x01,    /**< Bi-directionally predicted picture */
-    NV_ENC_PIC_TYPE_I               = 0x02,    /**< Intra predicted picture */
-    NV_ENC_PIC_TYPE_IDR             = 0x03,    /**< IDR picture */
-    NV_ENC_PIC_TYPE_BI              = 0x04,    /**< Bi-directionally predicted with only Intra MBs */
-    NV_ENC_PIC_TYPE_SKIPPED         = 0x05,    /**< Picture is skipped */
-    NV_ENC_PIC_TYPE_INTRA_REFRESH   = 0x06,    /**< First picture in intra refresh cycle */
-    NV_ENC_PIC_TYPE_UNKNOWN         = 0xFF     /**< Picture type unknown */
-} NV_ENC_PIC_TYPE;
-
-/**
- * Motion vector precisions
- */
-typedef enum _NV_ENC_MV_PRECISION
-{
-    NV_ENC_MV_PRECISION_DEFAULT     = 0x0,       /**<Driver selects QuarterPel motion vector precision by default*/
-    NV_ENC_MV_PRECISION_FULL_PEL    = 0x01,    /**< FullPel  motion vector precision */
-    NV_ENC_MV_PRECISION_HALF_PEL    = 0x02,    /**< HalfPel motion vector precision */
-    NV_ENC_MV_PRECISION_QUARTER_PEL = 0x03     /**< QuarterPel motion vector precision */
-} NV_ENC_MV_PRECISION;
-
-
-/**
- * Input buffer formats
- */
-typedef enum _NV_ENC_BUFFER_FORMAT
-{
-    NV_ENC_BUFFER_FORMAT_UNDEFINED                       = 0x00000000,  /**< Undefined buffer format */
-
-    NV_ENC_BUFFER_FORMAT_NV12                            = 0x00000001,  /**< Semi-Planar YUV [Y plane followed by interleaved UV plane] */
-    NV_ENC_BUFFER_FORMAT_YV12                            = 0x00000010,  /**< Planar YUV [Y plane followed by V and U planes] */
-    NV_ENC_BUFFER_FORMAT_IYUV                            = 0x00000100,  /**< Planar YUV [Y plane followed by U and V planes] */
-    NV_ENC_BUFFER_FORMAT_YUV444                          = 0x00001000,  /**< Planar YUV [Y plane followed by U and V planes] */
-    NV_ENC_BUFFER_FORMAT_YUV420_10BIT                    = 0x00010000,  /**< 10 bit Semi-Planar YUV [Y plane followed by interleaved UV plane]. Each pixel of size 2 bytes. Most Significant 10 bits contain pixel data. */
-    NV_ENC_BUFFER_FORMAT_YUV444_10BIT                    = 0x00100000,  /**< 10 bit Planar YUV444 [Y plane followed by U and V planes]. Each pixel of size 2 bytes. Most Significant 10 bits contain pixel data.  */
-    NV_ENC_BUFFER_FORMAT_ARGB                            = 0x01000000,  /**< 8 bit Packed A8R8G8B8. This is a word-ordered format
-                                                                             where a pixel is represented by a 32-bit word with B
-                                                                             in the lowest 8 bits, G in the next 8 bits, R in the
-                                                                             8 bits after that and A in the highest 8 bits. */
-    NV_ENC_BUFFER_FORMAT_ARGB10                          = 0x02000000,  /**< 10 bit Packed A2R10G10B10. This is a word-ordered format
-                                                                             where a pixel is represented by a 32-bit word with B
-                                                                             in the lowest 10 bits, G in the next 10 bits, R in the
-                                                                             10 bits after that and A in the highest 2 bits. */
-    NV_ENC_BUFFER_FORMAT_AYUV                            = 0x04000000,  /**< 8 bit Packed A8Y8U8V8. This is a word-ordered format
-                                                                             where a pixel is represented by a 32-bit word with V
-                                                                             in the lowest 8 bits, U in the next 8 bits, Y in the
-                                                                             8 bits after that and A in the highest 8 bits. */
-    NV_ENC_BUFFER_FORMAT_ABGR                            = 0x10000000,  /**< 8 bit Packed A8B8G8R8. This is a word-ordered format
-                                                                             where a pixel is represented by a 32-bit word with R
-                                                                             in the lowest 8 bits, G in the next 8 bits, B in the
-                                                                             8 bits after that and A in the highest 8 bits. */
-    NV_ENC_BUFFER_FORMAT_ABGR10                          = 0x20000000,  /**< 10 bit Packed A2B10G10R10. This is a word-ordered format
-                                                                             where a pixel is represented by a 32-bit word with R
-                                                                             in the lowest 10 bits, G in the next 10 bits, B in the
-                                                                             10 bits after that and A in the highest 2 bits. */
-} NV_ENC_BUFFER_FORMAT;
-
-#define NV_ENC_BUFFER_FORMAT_NV12_PL NV_ENC_BUFFER_FORMAT_NV12
-#define NV_ENC_BUFFER_FORMAT_YV12_PL NV_ENC_BUFFER_FORMAT_YV12
-#define NV_ENC_BUFFER_FORMAT_IYUV_PL NV_ENC_BUFFER_FORMAT_IYUV
-#define NV_ENC_BUFFER_FORMAT_YUV444_PL NV_ENC_BUFFER_FORMAT_YUV444
-
-/**
- * Encoding levels
- */
-typedef enum _NV_ENC_LEVEL
-{
-    NV_ENC_LEVEL_AUTOSELECT         = 0,
-
-    NV_ENC_LEVEL_H264_1             = 10,
-    NV_ENC_LEVEL_H264_1b            = 9,
-    NV_ENC_LEVEL_H264_11            = 11,
-    NV_ENC_LEVEL_H264_12            = 12,
-    NV_ENC_LEVEL_H264_13            = 13,
-    NV_ENC_LEVEL_H264_2             = 20,
-    NV_ENC_LEVEL_H264_21            = 21,
-    NV_ENC_LEVEL_H264_22            = 22,
-    NV_ENC_LEVEL_H264_3             = 30,
-    NV_ENC_LEVEL_H264_31            = 31,
-    NV_ENC_LEVEL_H264_32            = 32,
-    NV_ENC_LEVEL_H264_4             = 40,
-    NV_ENC_LEVEL_H264_41            = 41,
-    NV_ENC_LEVEL_H264_42            = 42,
-    NV_ENC_LEVEL_H264_5             = 50,
-    NV_ENC_LEVEL_H264_51            = 51,
-    NV_ENC_LEVEL_H264_52            = 52,
-
-
-    NV_ENC_LEVEL_HEVC_1             = 30,
-    NV_ENC_LEVEL_HEVC_2             = 60,
-    NV_ENC_LEVEL_HEVC_21            = 63,
-    NV_ENC_LEVEL_HEVC_3             = 90,
-    NV_ENC_LEVEL_HEVC_31            = 93,
-    NV_ENC_LEVEL_HEVC_4             = 120,
-    NV_ENC_LEVEL_HEVC_41            = 123,
-    NV_ENC_LEVEL_HEVC_5             = 150,
-    NV_ENC_LEVEL_HEVC_51            = 153,
-    NV_ENC_LEVEL_HEVC_52            = 156,
-    NV_ENC_LEVEL_HEVC_6             = 180,
-    NV_ENC_LEVEL_HEVC_61            = 183,
-    NV_ENC_LEVEL_HEVC_62            = 186,
-
-    NV_ENC_TIER_HEVC_MAIN           = 0,
-    NV_ENC_TIER_HEVC_HIGH           = 1
-} NV_ENC_LEVEL;
-
-/**
- * Error Codes
- */
-typedef enum _NVENCSTATUS
-{
-    /**
-     * This indicates that API call returned with no errors.
-     */
-    NV_ENC_SUCCESS,
-
-    /**
-     * This indicates that no encode capable devices were detected.
-     */
-    NV_ENC_ERR_NO_ENCODE_DEVICE,
-
-    /**
-     * This indicates that devices pass by the client is not supported.
-     */
-    NV_ENC_ERR_UNSUPPORTED_DEVICE,
-
-    /**
-     * This indicates that the encoder device supplied by the client is not
-     * valid.
-     */
-    NV_ENC_ERR_INVALID_ENCODERDEVICE,
-
-    /**
-     * This indicates that device passed to the API call is invalid.
-     */
-    NV_ENC_ERR_INVALID_DEVICE,
-
-    /**
-     * This indicates that device passed to the API call is no longer available and
-     * needs to be reinitialized. The clients need to destroy the current encoder
-     * session by freeing the allocated input output buffers and destroying the device
-     * and create a new encoding session.
-     */
-    NV_ENC_ERR_DEVICE_NOT_EXIST,
-
-    /**
-     * This indicates that one or more of the pointers passed to the API call
-     * is invalid.
-     */
-    NV_ENC_ERR_INVALID_PTR,
-
-    /**
-     * This indicates that completion event passed in ::NvEncEncodePicture() call
-     * is invalid.
-     */
-    NV_ENC_ERR_INVALID_EVENT,
-
-    /**
-     * This indicates that one or more of the parameter passed to the API call
-     * is invalid.
-     */
-    NV_ENC_ERR_INVALID_PARAM,
-
-    /**
-     * This indicates that an API call was made in wrong sequence/order.
-     */
-    NV_ENC_ERR_INVALID_CALL,
-
-    /**
-     * This indicates that the API call failed because it was unable to allocate
-     * enough memory to perform the requested operation.
-     */
-    NV_ENC_ERR_OUT_OF_MEMORY,
-
-    /**
-     * This indicates that the encoder has not been initialized with
-     * ::NvEncInitializeEncoder() or that initialization has failed.
-     * The client cannot allocate input or output buffers or do any encoding
-     * related operation before successfully initializing the encoder.
-     */
-    NV_ENC_ERR_ENCODER_NOT_INITIALIZED,
-
-    /**
-     * This indicates that an unsupported parameter was passed by the client.
-     */
-    NV_ENC_ERR_UNSUPPORTED_PARAM,
-
-    /**
-     * This indicates that the ::NvEncLockBitstream() failed to lock the output
-     * buffer. This happens when the client makes a non blocking lock call to
-     * access the output bitstream by passing NV_ENC_LOCK_BITSTREAM::doNotWait flag.
-     * This is not a fatal error and client should retry the same operation after
-     * few milliseconds.
-     */
-    NV_ENC_ERR_LOCK_BUSY,
-
-    /**
-     * This indicates that the size of the user buffer passed by the client is
-     * insufficient for the requested operation.
-     */
-    NV_ENC_ERR_NOT_ENOUGH_BUFFER,
-
-    /**
-     * This indicates that an invalid struct version was used by the client.
-     */
-    NV_ENC_ERR_INVALID_VERSION,
-
-    /**
-     * This indicates that ::NvEncMapInputResource() API failed to map the client
-     * provided input resource.
-     */
-    NV_ENC_ERR_MAP_FAILED,
-
-    /**
-     * This indicates encode driver requires more input buffers to produce an output
-     * bitstream. If this error is returned from ::NvEncEncodePicture() API, this
-     * is not a fatal error. If the client is encoding with B frames then,
-     * ::NvEncEncodePicture() API might be buffering the input frame for re-ordering.
-     *
-     * A client operating in synchronous mode cannot call ::NvEncLockBitstream()
-     * API on the output bitstream buffer if ::NvEncEncodePicture() returned the
-     * ::NV_ENC_ERR_NEED_MORE_INPUT error code.
-     * The client must continue providing input frames until encode driver returns
-     * ::NV_ENC_SUCCESS. After receiving ::NV_ENC_SUCCESS status the client can call
-     * ::NvEncLockBitstream() API on the output buffers in the same order in which
-     * it has called ::NvEncEncodePicture().
-     */
-    NV_ENC_ERR_NEED_MORE_INPUT,
-
-    /**
-     * This indicates that the HW encoder is busy encoding and is unable to encode
-     * the input. The client should call ::NvEncEncodePicture() again after few
-     * milliseconds.
-     */
-    NV_ENC_ERR_ENCODER_BUSY,
-
-    /**
-     * This indicates that the completion event passed in ::NvEncEncodePicture()
-     * API has not been registered with encoder driver using ::NvEncRegisterAsyncEvent().
-     */
-    NV_ENC_ERR_EVENT_NOT_REGISTERD,
-
-    /**
-     * This indicates that an unknown internal error has occurred.
-     */
-    NV_ENC_ERR_GENERIC,
-
-    /**
-     * This indicates that the client is attempting to use a feature
-     * that is not available for the license type for the current system.
-     */
-    NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY,
-
-    /**
-     * This indicates that the client is attempting to use a feature
-     * that is not implemented for the current version.
-     */
-    NV_ENC_ERR_UNIMPLEMENTED,
-
-    /**
-     * This indicates that the ::NvEncRegisterResource API failed to register the resource.
-     */
-    NV_ENC_ERR_RESOURCE_REGISTER_FAILED,
-
-    /**
-     * This indicates that the client is attempting to unregister a resource
-     * that has not been successfully registered.
-     */
-    NV_ENC_ERR_RESOURCE_NOT_REGISTERED,
-
-    /**
-     * This indicates that the client is attempting to unmap a resource
-     * that has not been successfully mapped.
-     */
-    NV_ENC_ERR_RESOURCE_NOT_MAPPED,
-
-} NVENCSTATUS;
-
-/**
- * Encode Picture encode flags.
- */
-typedef enum _NV_ENC_PIC_FLAGS
-{
-    NV_ENC_PIC_FLAG_FORCEINTRA         = 0x1,   /**< Encode the current picture as an Intra picture */
-    NV_ENC_PIC_FLAG_FORCEIDR           = 0x2,   /**< Encode the current picture as an IDR picture.
-                                                     This flag is only valid when Picture type decision is taken by the Encoder
-                                                     [_NV_ENC_INITIALIZE_PARAMS::enablePTD == 1]. */
-    NV_ENC_PIC_FLAG_OUTPUT_SPSPPS      = 0x4,   /**< Write the sequence and picture header in encoded bitstream of the current picture */
-    NV_ENC_PIC_FLAG_EOS                = 0x8,   /**< Indicates end of the input stream */
-} NV_ENC_PIC_FLAGS;
-
-/**
- * Memory heap to allocate input and output buffers.
- */
-typedef enum _NV_ENC_MEMORY_HEAP
-{
-    NV_ENC_MEMORY_HEAP_AUTOSELECT      = 0, /**< Memory heap to be decided by the encoder driver based on the usage */
-    NV_ENC_MEMORY_HEAP_VID             = 1, /**< Memory heap is in local video memory */
-    NV_ENC_MEMORY_HEAP_SYSMEM_CACHED   = 2, /**< Memory heap is in cached system memory */
-    NV_ENC_MEMORY_HEAP_SYSMEM_UNCACHED = 3  /**< Memory heap is in uncached system memory */
-} NV_ENC_MEMORY_HEAP;
-
-
-/**
- * H.264 entropy coding modes.
- */
-typedef enum _NV_ENC_H264_ENTROPY_CODING_MODE
-{
-    NV_ENC_H264_ENTROPY_CODING_MODE_AUTOSELECT = 0x0,   /**< Entropy coding mode is auto selected by the encoder driver */
-    NV_ENC_H264_ENTROPY_CODING_MODE_CABAC      = 0x1,   /**< Entropy coding mode is CABAC */
-    NV_ENC_H264_ENTROPY_CODING_MODE_CAVLC      = 0x2    /**< Entropy coding mode is CAVLC */
-} NV_ENC_H264_ENTROPY_CODING_MODE;
-
-/**
- * H.264 specific Bdirect modes
- */
-typedef enum _NV_ENC_H264_BDIRECT_MODE
-{
-    NV_ENC_H264_BDIRECT_MODE_AUTOSELECT = 0x0,          /**< BDirect mode is auto selected by the encoder driver */
-    NV_ENC_H264_BDIRECT_MODE_DISABLE    = 0x1,          /**< Disable BDirect mode */
-    NV_ENC_H264_BDIRECT_MODE_TEMPORAL   = 0x2,          /**< Temporal BDirect mode */
-    NV_ENC_H264_BDIRECT_MODE_SPATIAL    = 0x3           /**< Spatial BDirect mode */
-} NV_ENC_H264_BDIRECT_MODE;
-
-/**
- * H.264 specific FMO usage
- */
-typedef enum _NV_ENC_H264_FMO_MODE
-{
-    NV_ENC_H264_FMO_AUTOSELECT          = 0x0,          /**< FMO usage is auto selected by the encoder driver */
-    NV_ENC_H264_FMO_ENABLE              = 0x1,          /**< Enable FMO */
-    NV_ENC_H264_FMO_DISABLE             = 0x2,          /**< Disble FMO */
-} NV_ENC_H264_FMO_MODE;
-
-/**
- * H.264 specific Adaptive Transform modes
- */
-typedef enum _NV_ENC_H264_ADAPTIVE_TRANSFORM_MODE
-{
-    NV_ENC_H264_ADAPTIVE_TRANSFORM_AUTOSELECT = 0x0,   /**< Adaptive Transform 8x8 mode is auto selected by the encoder driver*/
-    NV_ENC_H264_ADAPTIVE_TRANSFORM_DISABLE    = 0x1,   /**< Adaptive Transform 8x8 mode disabled */
-    NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE     = 0x2,   /**< Adaptive Transform 8x8 mode should be used */
-} NV_ENC_H264_ADAPTIVE_TRANSFORM_MODE;
-
-/**
- * Stereo frame packing modes.
- */
-typedef enum _NV_ENC_STEREO_PACKING_MODE
-{
-    NV_ENC_STEREO_PACKING_MODE_NONE             = 0x0,  /**< No Stereo packing required */
-    NV_ENC_STEREO_PACKING_MODE_CHECKERBOARD     = 0x1,  /**< Checkerboard mode for packing stereo frames */
-    NV_ENC_STEREO_PACKING_MODE_COLINTERLEAVE    = 0x2,  /**< Column Interleave mode for packing stereo frames */
-    NV_ENC_STEREO_PACKING_MODE_ROWINTERLEAVE    = 0x3,  /**< Row Interleave mode for packing stereo frames */
-    NV_ENC_STEREO_PACKING_MODE_SIDEBYSIDE       = 0x4,  /**< Side-by-side mode for packing stereo frames */
-    NV_ENC_STEREO_PACKING_MODE_TOPBOTTOM        = 0x5,  /**< Top-Bottom mode for packing stereo frames */
-    NV_ENC_STEREO_PACKING_MODE_FRAMESEQ         = 0x6   /**< Frame Sequential mode for packing stereo frames */
-} NV_ENC_STEREO_PACKING_MODE;
-
-/**
- *  Input Resource type
- */
-typedef enum _NV_ENC_INPUT_RESOURCE_TYPE
-{
-    NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX          = 0x0,   /**< input resource type is a directx9 surface*/
-    NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR    = 0x1,   /**< input resource type is a cuda device pointer surface*/
-    NV_ENC_INPUT_RESOURCE_TYPE_CUDAARRAY        = 0x2,   /**< input resource type is a cuda array surface */
-    NV_ENC_INPUT_RESOURCE_TYPE_OPENGL_TEX       = 0x3    /**< input resource type is an OpenGL texture */
-} NV_ENC_INPUT_RESOURCE_TYPE;
-
-/**
- *  Encoder Device type
- */
-typedef enum _NV_ENC_DEVICE_TYPE
-{
-    NV_ENC_DEVICE_TYPE_DIRECTX          = 0x0,   /**< encode device type is a directx9 device */
-    NV_ENC_DEVICE_TYPE_CUDA             = 0x1,   /**< encode device type is a cuda device */
-    NV_ENC_DEVICE_TYPE_OPENGL           = 0x2    /**< encode device type is an OpenGL device.
-                                                      Use of this device type is supported only on Linux */
-} NV_ENC_DEVICE_TYPE;
-
-/**
- * Encoder capabilities enumeration.
- */
-typedef enum _NV_ENC_CAPS
-{
-    /**
-     * Maximum number of B-Frames supported.
-     */
-    NV_ENC_CAPS_NUM_MAX_BFRAMES,
-
-    /**
-     * Rate control modes supported.
-     * \n The API return value is a bitmask of the values in NV_ENC_PARAMS_RC_MODE.
-     */
-    NV_ENC_CAPS_SUPPORTED_RATECONTROL_MODES,
-
-    /**
-     * Indicates HW support for field mode encoding.
-     * \n 0 : Interlaced mode encoding is not supported.
-     * \n 1 : Interlaced field mode encoding is supported.
-     * \n 2 : Interlaced frame encoding and field mode encoding are both supported.
-     */
-     NV_ENC_CAPS_SUPPORT_FIELD_ENCODING,
-
-    /**
-     * Indicates HW support for monochrome mode encoding.
-     * \n 0 : Monochrome mode not supported.
-     * \n 1 : Monochrome mode supported.
-     */
-    NV_ENC_CAPS_SUPPORT_MONOCHROME,
-
-    /**
-     * Indicates HW support for FMO.
-     * \n 0 : FMO not supported.
-     * \n 1 : FMO supported.
-     */
-    NV_ENC_CAPS_SUPPORT_FMO,
-
-    /**
-     * Indicates HW capability for Quarter pel motion estimation.
-     * \n 0 : QuarterPel Motion Estimation not supported.
-     * \n 1 : QuarterPel Motion Estimation supported.
-     */
-    NV_ENC_CAPS_SUPPORT_QPELMV,
-
-    /**
-     * H.264 specific. Indicates HW support for BDirect modes.
-     * \n 0 : BDirect mode encoding not supported.
-     * \n 1 : BDirect mode encoding supported.
-     */
-    NV_ENC_CAPS_SUPPORT_BDIRECT_MODE,
-
-    /**
-     * H264 specific. Indicates HW support for CABAC entropy coding mode.
-     * \n 0 : CABAC entropy coding not supported.
-     * \n 1 : CABAC entropy coding supported.
-     */
-    NV_ENC_CAPS_SUPPORT_CABAC,
-
-    /**
-     * Indicates HW support for Adaptive Transform.
-     * \n 0 : Adaptive Transform not supported.
-     * \n 1 : Adaptive Transform supported.
-     */
-    NV_ENC_CAPS_SUPPORT_ADAPTIVE_TRANSFORM,
-
-    /**
-     * Reserved enum field.
-     */
-    NV_ENC_CAPS_SUPPORT_RESERVED,
-
-    /**
-     * Indicates HW support for encoding Temporal layers.
-     * \n 0 : Encoding Temporal layers not supported.
-     * \n 1 : Encoding Temporal layers supported.
-     */
-    NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS,
-
-    /**
-     * Indicates HW support for Hierarchical P frames.
-     * \n 0 : Hierarchical P frames not supported.
-     * \n 1 : Hierarchical P frames supported.
-     */
-    NV_ENC_CAPS_SUPPORT_HIERARCHICAL_PFRAMES,
-
-    /**
-     * Indicates HW support for Hierarchical B frames.
-     * \n 0 : Hierarchical B frames not supported.
-     * \n 1 : Hierarchical B frames supported.
-     */
-    NV_ENC_CAPS_SUPPORT_HIERARCHICAL_BFRAMES,
-
-    /**
-     * Maximum Encoding level supported (See ::NV_ENC_LEVEL for details).
-     */
-    NV_ENC_CAPS_LEVEL_MAX,
-
-    /**
-     * Minimum Encoding level supported (See ::NV_ENC_LEVEL for details).
-     */
-    NV_ENC_CAPS_LEVEL_MIN,
-
-    /**
-     * Indicates HW support for separate colour plane encoding.
-     * \n 0 : Separate colour plane encoding not supported.
-     * \n 1 : Separate colour plane encoding supported.
-     */
-    NV_ENC_CAPS_SEPARATE_COLOUR_PLANE,
-
-    /**
-     * Maximum output width supported.
-     */
-    NV_ENC_CAPS_WIDTH_MAX,
-
-    /**
-     * Maximum output height supported.
-     */
-    NV_ENC_CAPS_HEIGHT_MAX,
-
-    /**
-     * Indicates Temporal Scalability Support.
-     * \n 0 : Temporal SVC encoding not supported.
-     * \n 1 : Temporal SVC encoding supported.
-     */
-    NV_ENC_CAPS_SUPPORT_TEMPORAL_SVC,
-
-    /**
-     * Indicates Dynamic Encode Resolution Change Support.
-     * Support added from NvEncodeAPI version 2.0.
-     * \n 0 : Dynamic Encode Resolution Change not supported.
-     * \n 1 : Dynamic Encode Resolution Change supported.
-     */
-    NV_ENC_CAPS_SUPPORT_DYN_RES_CHANGE,
-
-    /**
-     * Indicates Dynamic Encode Bitrate Change Support.
-     * Support added from NvEncodeAPI version 2.0.
-     * \n 0 : Dynamic Encode bitrate change not supported.
-     * \n 1 : Dynamic Encode bitrate change supported.
-     */
-    NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE,
-
-    /**
-     * Indicates Forcing Constant QP On The Fly Support.
-     * Support added from NvEncodeAPI version 2.0.
-     * \n 0 : Forcing constant QP on the fly not supported.
-     * \n 1 : Forcing constant QP on the fly supported.
-     */
-    NV_ENC_CAPS_SUPPORT_DYN_FORCE_CONSTQP,
-
-    /**
-     * Indicates Dynamic rate control mode Change Support.
-     * \n 0 : Dynamic rate control mode change not supported.
-     * \n 1 : Dynamic rate control mode change supported.
-     */
-    NV_ENC_CAPS_SUPPORT_DYN_RCMODE_CHANGE,
-
-    /**
-     * Indicates Subframe readback support for slice-based encoding.
-     * \n 0 : Subframe readback not supported.
-     * \n 1 : Subframe readback supported.
-     */
-    NV_ENC_CAPS_SUPPORT_SUBFRAME_READBACK,
-
-    /**
-     * Indicates Constrained Encoding mode support.
-     * Support added from NvEncodeAPI version 2.0.
-     * \n 0 : Constrained encoding mode not supported.
-     * \n 1 : Constarined encoding mode supported.
-     * If this mode is supported client can enable this during initialisation.
-     * Client can then force a picture to be coded as constrained picture where
-     * each slice in a constrained picture will have constrained_intra_pred_flag set to 1
-     * and disable_deblocking_filter_idc will be set to 2 and prediction vectors for inter
-     * macroblocks in each slice will be restricted to the slice region.
-     */
-    NV_ENC_CAPS_SUPPORT_CONSTRAINED_ENCODING,
-
-    /**
-     * Indicates Intra Refresh Mode Support.
-     * Support added from NvEncodeAPI version 2.0.
-     * \n 0 : Intra Refresh Mode not supported.
-     * \n 1 : Intra Refresh Mode supported.
-     */
-    NV_ENC_CAPS_SUPPORT_INTRA_REFRESH,
-
-    /**
-     * Indicates Custom VBV Bufer Size support. It can be used for capping frame size.
-     * Support added from NvEncodeAPI version 2.0.
-     * \n 0 : Custom VBV buffer size specification from client, not supported.
-     * \n 1 : Custom VBV buffer size specification from client, supported.
-     */
-    NV_ENC_CAPS_SUPPORT_CUSTOM_VBV_BUF_SIZE,
-
-    /**
-     * Indicates Dynamic Slice Mode Support.
-     * Support added from NvEncodeAPI version 2.0.
-     * \n 0 : Dynamic Slice Mode not supported.
-     * \n 1 : Dynamic Slice Mode supported.
-     */
-    NV_ENC_CAPS_SUPPORT_DYNAMIC_SLICE_MODE,
-
-    /**
-     * Indicates Reference Picture Invalidation Support.
-     * Support added from NvEncodeAPI version 2.0.
-     * \n 0 : Reference Picture Invalidation not supported.
-     * \n 1 : Reference Picture Invalidation supported.
-     */
-    NV_ENC_CAPS_SUPPORT_REF_PIC_INVALIDATION,
-
-    /**
-     * Indicates support for PreProcessing.
-     * The API return value is a bitmask of the values defined in ::NV_ENC_PREPROC_FLAGS
-     */
-    NV_ENC_CAPS_PREPROC_SUPPORT,
-
-    /**
-    * Indicates support Async mode.
-    * \n 0 : Async Encode mode not supported.
-    * \n 1 : Async Encode mode supported.
-    */
-    NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT,
-
-    /**
-     * Maximum MBs per frame supported.
-     */
-    NV_ENC_CAPS_MB_NUM_MAX,
-
-    /**
-     * Maximum aggregate throughput in MBs per sec.
-     */
-    NV_ENC_CAPS_MB_PER_SEC_MAX,
-
-    /**
-     * Indicates HW support for YUV444 mode encoding.
-     * \n 0 : YUV444 mode encoding not supported.
-     * \n 1 : YUV444 mode encoding supported.
-     */
-    NV_ENC_CAPS_SUPPORT_YUV444_ENCODE,
-
-    /**
-     * Indicates HW support for lossless encoding.
-     * \n 0 : lossless encoding not supported.
-     * \n 1 : lossless encoding supported.
-     */
-    NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE,
-
-     /**
-     * Indicates HW support for Sample Adaptive Offset.
-     * \n 0 : SAO not supported.
-     * \n 1 : SAO encoding supported.
-     */
-    NV_ENC_CAPS_SUPPORT_SAO,
-
-    /**
-     * Indicates HW support for MEOnly Mode.
-     * \n 0 : MEOnly Mode not supported.
-     * \n 1 : MEOnly Mode supported for I and P frames.
-     * \n 2 : MEOnly Mode supported for I, P and B frames.
-     */
-    NV_ENC_CAPS_SUPPORT_MEONLY_MODE,
-
-    /**
-     * Indicates HW support for lookahead encoding (enableLookahead=1).
-     * \n 0 : Lookahead not supported.
-     * \n 1 : Lookahead supported.
-     */
-    NV_ENC_CAPS_SUPPORT_LOOKAHEAD,
-
-    /**
-     * Indicates HW support for temporal AQ encoding (enableTemporalAQ=1).
-     * \n 0 : Temporal AQ not supported.
-     * \n 1 : Temporal AQ supported.
-     */
-    NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ,
-    /**
-     * Indicates HW support for 10 bit encoding.
-     * \n 0 : 10 bit encoding not supported.
-     * \n 1 : 10 bit encoding supported.
-     */
-    NV_ENC_CAPS_SUPPORT_10BIT_ENCODE,
-    /**
-     * Maximum number of Long Term Reference frames supported
-     */
-    NV_ENC_CAPS_NUM_MAX_LTR_FRAMES,
-
-    /**
-     * Indicates HW support for Weighted Predicition.
-     * \n 0 : Weighted Predicition not supported.
-     * \n 1 : Weighted Predicition supported.
-     */
-    NV_ENC_CAPS_SUPPORT_WEIGHTED_PREDICTION,
-
-    /**
-     * Reserved - Not to be used by clients.
-     */
-    NV_ENC_CAPS_EXPOSED_COUNT
-} NV_ENC_CAPS;
-
-/**
- *  HEVC CU SIZE
- */
-typedef enum _NV_ENC_HEVC_CUSIZE
-{
-    NV_ENC_HEVC_CUSIZE_AUTOSELECT = 0,
-    NV_ENC_HEVC_CUSIZE_8x8        = 1,
-    NV_ENC_HEVC_CUSIZE_16x16      = 2,
-    NV_ENC_HEVC_CUSIZE_32x32      = 3,
-    NV_ENC_HEVC_CUSIZE_64x64      = 4,
-}NV_ENC_HEVC_CUSIZE;
-
-/**
- * Input struct for querying Encoding capabilities.
- */
-typedef struct _NV_ENC_CAPS_PARAM
-{
-    uint32_t version;                                  /**< [in]: Struct version. Must be set to ::NV_ENC_CAPS_PARAM_VER */
-    NV_ENC_CAPS  capsToQuery;                          /**< [in]: Specifies the encode capability to be queried. Client should pass a member for ::NV_ENC_CAPS enum. */
-    uint32_t reserved[62];                             /**< [in]: Reserved and must be set to 0 */
-} NV_ENC_CAPS_PARAM;
-
-/** NV_ENC_CAPS_PARAM struct version. */
-#define NV_ENC_CAPS_PARAM_VER NVENCAPI_STRUCT_VERSION(1)
-
-
-/**
- * Creation parameters for input buffer.
- */
-typedef struct _NV_ENC_CREATE_INPUT_BUFFER
-{
-    uint32_t                  version;                 /**< [in]: Struct version. Must be set to ::NV_ENC_CREATE_INPUT_BUFFER_VER */
-    uint32_t                  width;                   /**< [in]: Input buffer width */
-    uint32_t                  height;                  /**< [in]: Input buffer width */
-    NV_ENC_MEMORY_HEAP        memoryHeap;              /**< [in]: Deprecated. Do not use */
-    NV_ENC_BUFFER_FORMAT      bufferFmt;               /**< [in]: Input buffer format */
-    uint32_t                  reserved;                /**< [in]: Reserved and must be set to 0 */
-    NV_ENC_INPUT_PTR          inputBuffer;             /**< [out]: Pointer to input buffer */
-    void*                     pSysMemBuffer;           /**< [in]: Pointer to existing sysmem buffer */
-    uint32_t                  reserved1[57];           /**< [in]: Reserved and must be set to 0 */
-    void*                     reserved2[63];           /**< [in]: Reserved and must be set to NULL */
-} NV_ENC_CREATE_INPUT_BUFFER;
-
-/** NV_ENC_CREATE_INPUT_BUFFER struct version. */
-#define NV_ENC_CREATE_INPUT_BUFFER_VER NVENCAPI_STRUCT_VERSION(1)
-
-/**
- * Creation parameters for output bitstream buffer.
- */
-typedef struct _NV_ENC_CREATE_BITSTREAM_BUFFER
-{
-    uint32_t              version;                     /**< [in]: Struct version. Must be set to ::NV_ENC_CREATE_BITSTREAM_BUFFER_VER */
-    uint32_t              size;                        /**< [in]: Deprecated. Do not use */
-    NV_ENC_MEMORY_HEAP    memoryHeap;                  /**< [in]: Deprecated. Do not use */
-    uint32_t              reserved;                    /**< [in]: Reserved and must be set to 0 */
-    NV_ENC_OUTPUT_PTR     bitstreamBuffer;             /**< [out]: Pointer to the output bitstream buffer */
-    void*                 bitstreamBufferPtr;          /**< [out]: Reserved and should not be used */
-    uint32_t              reserved1[58];               /**< [in]: Reserved and should be set to 0 */
-    void*                 reserved2[64];               /**< [in]: Reserved and should be set to NULL */
-} NV_ENC_CREATE_BITSTREAM_BUFFER;
-
-/** NV_ENC_CREATE_BITSTREAM_BUFFER struct version. */
-#define NV_ENC_CREATE_BITSTREAM_BUFFER_VER NVENCAPI_STRUCT_VERSION(1)
-
-/**
- * Structs needed for ME only mode.
- */
-typedef struct _NV_ENC_MVECTOR
-{
-    int16_t             mvx;               /**< the x component of MV in qpel units */
-    int16_t             mvy;               /**< the y component of MV in qpel units */
-} NV_ENC_MVECTOR;
-
-/**
- * Motion vector structure per macroblock for H264 motion estimation.
- */
-typedef struct _NV_ENC_H264_MV_DATA
-{
-    NV_ENC_MVECTOR      mv[4];             /**< up to 4 vectors for 8x8 partition */
-    uint8_t             mbType;            /**< 0 (I), 1 (P), 2 (IPCM), 3 (B) */
-    uint8_t             partitionType;     /**< Specifies the block partition type. 0:16x16, 1:8x8, 2:16x8, 3:8x16 */
-    uint16_t            reserved;          /**< reserved padding for alignment */
-    uint32_t            mbCost;
-} NV_ENC_H264_MV_DATA;
-
-/**
- * Motion vector structure per CU for HEVC motion estimation.
- */
-typedef struct _NV_ENC_HEVC_MV_DATA
-{
-    NV_ENC_MVECTOR    mv[4];               /**< up to 4 vectors within a CU */
-    uint8_t           cuType;              /**< 0 (I), 1(P), 2 (Skip) */
-    uint8_t           cuSize;              /**< 0: 8x8, 1: 16x16, 2: 32x32, 3: 64x64 */
-    uint8_t           partitionMode;       /**< The CU partition mode
-                                                0 (2Nx2N), 1 (2NxN), 2(Nx2N), 3 (NxN),
-                                                4 (2NxnU), 5 (2NxnD), 6(nLx2N), 7 (nRx2N) */
-    uint8_t           lastCUInCTB;         /**< Marker to separate CUs in the current CTB from CUs in the next CTB */
-} NV_ENC_HEVC_MV_DATA;
-
-/**
- * Creation parameters for output motion vector buffer for ME only mode.
- */
-typedef struct _NV_ENC_CREATE_MV_BUFFER
-{
-    uint32_t            version;           /**< [in]: Struct version. Must be set to NV_ENC_CREATE_MV_BUFFER_VER */
-    NV_ENC_OUTPUT_PTR   mvBuffer;          /**< [out]: Pointer to the output motion vector buffer */
-    uint32_t            reserved1[255];    /**< [in]: Reserved and should be set to 0 */
-    void*               reserved2[63];     /**< [in]: Reserved and should be set to NULL */
-} NV_ENC_CREATE_MV_BUFFER;
-
-/** NV_ENC_CREATE_MV_BUFFER struct version*/
-#define NV_ENC_CREATE_MV_BUFFER_VER NVENCAPI_STRUCT_VERSION(1)
-
-/**
- * QP value for frames
- */
-typedef struct _NV_ENC_QP
-{
-    uint32_t        qpInterP;
-    uint32_t        qpInterB;
-    uint32_t        qpIntra;
-} NV_ENC_QP;
-
-/**
- * Rate Control Configuration Paramters
- */
- typedef struct _NV_ENC_RC_PARAMS
- {
-    uint32_t                        version;
-    NV_ENC_PARAMS_RC_MODE           rateControlMode;                             /**< [in]: Specifies the rate control mode. Check support for various rate control modes using ::NV_ENC_CAPS_SUPPORTED_RATECONTROL_MODES caps. */
-    NV_ENC_QP                       constQP;                                     /**< [in]: Specifies the initial QP to be used for encoding, these values would be used for all frames if in Constant QP mode. */
-    uint32_t                        averageBitRate;                              /**< [in]: Specifies the average bitrate(in bits/sec) used for encoding. */
-    uint32_t                        maxBitRate;                                  /**< [in]: Specifies the maximum bitrate for the encoded output. This is used for VBR and ignored for CBR mode. */
-    uint32_t                        vbvBufferSize;                               /**< [in]: Specifies the VBV(HRD) buffer size. in bits. Set 0 to use the default VBV  buffer size. */
-    uint32_t                        vbvInitialDelay;                             /**< [in]: Specifies the VBV(HRD) initial delay in bits. Set 0 to use the default VBV  initial delay .*/
-    uint32_t                        enableMinQP          :1;                     /**< [in]: Set this to 1 if minimum QP used for rate control. */
-    uint32_t                        enableMaxQP          :1;                     /**< [in]: Set this to 1 if maximum QP used for rate control. */
-    uint32_t                        enableInitialRCQP    :1;                     /**< [in]: Set this to 1 if user suppplied initial QP is used for rate control. */
-    uint32_t                        enableAQ             :1;                     /**< [in]: Set this to 1 to enable adaptive quantization (Spatial). */
-    uint32_t                        enableExtQPDeltaMap  :1;                     /**< [in]: Set this to 1 to enable additional QP modifier for each MB supplied by client though signed byte array pointed to by NV_ENC_PIC_PARAMS::qpDeltaMap (Not Supported when AQ(Spatial/Temporal) is enabled) */
-    uint32_t                        enableLookahead      :1;                     /**< [in]: Set this to 1 to enable lookahead with depth <lookaheadDepth> (if lookahead is enabled, input frames must remain available to the encoder until encode completion) */
-    uint32_t                        disableIadapt        :1;                     /**< [in]: Set this to 1 to disable adaptive I-frame insertion at scene cuts (only has an effect when lookahead is enabled) */
-    uint32_t                        disableBadapt        :1;                     /**< [in]: Set this to 1 to disable adaptive B-frame decision (only has an effect when lookahead is enabled) */
-    uint32_t                        enableTemporalAQ     :1;                     /**< [in]: Set this to 1 to enable temporal AQ for H.264 */
-    uint32_t                        zeroReorderDelay     :1;                     /**< [in]: Set this to 1 to indicate zero latency operation (no reordering delay, num_reorder_frames=0) */
-    uint32_t                        enableNonRefP        :1;                     /**< [in]: Set this to 1 to enable automatic insertion of non-reference P-frames (no effect if enablePTD=0) */
-    uint32_t                        strictGOPTarget      :1;                     /**< [in]: Set this to 1 to minimize GOP-to-GOP rate fluctuations */
-    uint32_t                        aqStrength           :4;                     /**< [in]: When AQ (Spatial) is enabled (i.e. NV_ENC_RC_PARAMS::enableAQ is set), this field is used to specify AQ strength. AQ strength scale is from 1 (low) - 15 (aggressive). If not set, strength is autoselected by driver. */
-    uint32_t                        reservedBitFields    :16;                    /**< [in]: Reserved bitfields and must be set to 0 */
-    NV_ENC_QP                       minQP;                                       /**< [in]: Specifies the minimum QP used for rate control. Client must set NV_ENC_CONFIG::enableMinQP to 1. */
-    NV_ENC_QP                       maxQP;                                       /**< [in]: Specifies the maximum QP used for rate control. Client must set NV_ENC_CONFIG::enableMaxQP to 1. */
-    NV_ENC_QP                       initialRCQP;                                 /**< [in]: Specifies the initial QP used for rate control. Client must set NV_ENC_CONFIG::enableInitialRCQP to 1. */
-    uint32_t                        temporallayerIdxMask;                        /**< [in]: Specifies the temporal layers (as a bitmask) whose QPs have changed. Valid max bitmask is [2^NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS - 1] */
-    uint8_t                         temporalLayerQP[8];                          /**< [in]: Specifies the temporal layer QPs used for rate control. Temporal layer index is used as as the array index */
-    uint8_t                         targetQuality;                               /**< [in]: Target CQ (Constant Quality) level for VBR mode (range 0-51 with 0-automatic)  */
-    uint8_t                         targetQualityLSB;                            /**< [in]: Fractional part of target quality (as 8.8 fixed point format) */
-    uint16_t                        lookaheadDepth;                              /**< [in]: Maximum depth of lookahead with range 0-32 (only used if enableLookahead=1) */
-    uint32_t                        reserved[9];
- } NV_ENC_RC_PARAMS;
-
-/** macro for constructing the version field of ::_NV_ENC_RC_PARAMS */
-#define NV_ENC_RC_PARAMS_VER NVENCAPI_STRUCT_VERSION(1)
-
-
-
-/**
- * \struct _NV_ENC_CONFIG_H264_VUI_PARAMETERS
- * H264 Video Usability Info parameters
- */
-typedef struct _NV_ENC_CONFIG_H264_VUI_PARAMETERS
-{
-    uint32_t    overscanInfoPresentFlag;              /**< [in]: if set to 1 , it specifies that the overscanInfo is present */
-    uint32_t    overscanInfo;                         /**< [in]: Specifies the overscan info(as defined in Annex E of the ITU-T Specification). */
-    uint32_t    videoSignalTypePresentFlag;           /**< [in]: If set to 1, it specifies  that the videoFormat, videoFullRangeFlag and colourDescriptionPresentFlag are present. */
-    uint32_t    videoFormat;                          /**< [in]: Specifies the source video format(as defined in Annex E of the ITU-T Specification).*/
-    uint32_t    videoFullRangeFlag;                   /**< [in]: Specifies the output range of the luma and chroma samples(as defined in Annex E of the ITU-T Specification). */
-    uint32_t    colourDescriptionPresentFlag;         /**< [in]: If set to 1, it specifies that the colourPrimaries, transferCharacteristics and colourMatrix are present. */
-    uint32_t    colourPrimaries;                      /**< [in]: Specifies color primaries for converting to RGB(as defined in Annex E of the ITU-T Specification) */
-    uint32_t    transferCharacteristics;              /**< [in]: Specifies the opto-electronic transfer characteristics to use (as defined in Annex E of the ITU-T Specification) */
-    uint32_t    colourMatrix;                         /**< [in]: Specifies the matrix coefficients used in deriving the luma and chroma from the RGB primaries (as defined in Annex E of the ITU-T Specification). */
-    uint32_t    chromaSampleLocationFlag;             /**< [in]: if set to 1 , it specifies that the chromaSampleLocationTop and chromaSampleLocationBot are present.*/
-    uint32_t    chromaSampleLocationTop;              /**< [in]: Specifies the chroma sample location for top field(as defined in Annex E of the ITU-T Specification) */
-    uint32_t    chromaSampleLocationBot;              /**< [in]: Specifies the chroma sample location for bottom field(as defined in Annex E of the ITU-T Specification) */
-    uint32_t    bitstreamRestrictionFlag;             /**< [in]: if set to 1, it specifies the bitstream restriction parameters are present in the bitstream.*/
-    uint32_t    reserved[15];
-}NV_ENC_CONFIG_H264_VUI_PARAMETERS;
-
-typedef NV_ENC_CONFIG_H264_VUI_PARAMETERS NV_ENC_CONFIG_HEVC_VUI_PARAMETERS;
-
-/**
- * \struct _NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE
- * External motion vector hint counts per block type.
- * H264 supports multiple hint while HEVC supports one hint for each valid candidate.
- */
-typedef struct _NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE
-{
-    uint32_t   numCandsPerBlk16x16                   : 4;   /**< [in]: Supported for H264,HEVC.It Specifies the number of candidates per 16x16 block. */
-    uint32_t   numCandsPerBlk16x8                    : 4;   /**< [in]: Supported for H264 only.Specifies the number of candidates per 16x8 block. */
-    uint32_t   numCandsPerBlk8x16                    : 4;   /**< [in]: Supported for H264 only.Specifies the number of candidates per 8x16 block. */
-    uint32_t   numCandsPerBlk8x8                     : 4;   /**< [in]: Supported for H264,HEVC.Specifies the number of candidates per 8x8 block. */
-    uint32_t   reserved                              : 16;  /**< [in]: Reserved for padding. */
-    uint32_t   reserved1[3];                                /**< [in]: Reserved for future use. */
-} NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE;
-
-
-/**
- * \struct _NVENC_EXTERNAL_ME_HINT
- * External Motion Vector hint structure.
- */
-typedef struct _NVENC_EXTERNAL_ME_HINT
-{
-    int32_t    mvx         : 12;                        /**< [in]: Specifies the x component of integer pixel MV (relative to current MB) S12.0. */
-    int32_t    mvy         : 10;                        /**< [in]: Specifies the y component of integer pixel MV (relative to current MB) S10.0 .*/
-    int32_t    refidx      : 5;                         /**< [in]: Specifies the reference index (31=invalid). Current we support only 1 reference frame per direction for external hints, so \p refidx must be 0. */
-    int32_t    dir         : 1;                         /**< [in]: Specifies the direction of motion estimation . 0=L0 1=L1.*/
-    int32_t    partType    : 2;                         /**< [in]: Specifies the block partition type.0=16x16 1=16x8 2=8x16 3=8x8 (blocks in partition must be consecutive).*/
-    int32_t    lastofPart  : 1;                         /**< [in]: Set to 1 for the last MV of (sub) partition  */
-    int32_t    lastOfMB    : 1;                         /**< [in]: Set to 1 for the last MV of macroblock. */
-} NVENC_EXTERNAL_ME_HINT;
-
-
-/**
- * \struct _NV_ENC_CONFIG_H264
- * H264 encoder configuration parameters
- */
-typedef struct _NV_ENC_CONFIG_H264
-{
-    uint32_t enableTemporalSVC         :1;                          /**< [in]: Set to 1 to enable SVC temporal*/
-    uint32_t enableStereoMVC           :1;                          /**< [in]: Set to 1 to enable stereo MVC*/
-    uint32_t hierarchicalPFrames       :1;                          /**< [in]: Set to 1 to enable hierarchical PFrames */
-    uint32_t hierarchicalBFrames       :1;                          /**< [in]: Set to 1 to enable hierarchical BFrames */
-    uint32_t outputBufferingPeriodSEI  :1;                          /**< [in]: Set to 1 to write SEI buffering period syntax in the bitstream */
-    uint32_t outputPictureTimingSEI    :1;                          /**< [in]: Set to 1 to write SEI picture timing syntax in the bitstream.  When set for following rateControlMode : NV_ENC_PARAMS_RC_CBR, NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ,
-                                                                               NV_ENC_PARAMS_RC_CBR_HQ, filler data is inserted if needed to achieve hrd bitrate */
-    uint32_t outputAUD                 :1;                          /**< [in]: Set to 1 to write access unit delimiter syntax in bitstream */
-    uint32_t disableSPSPPS             :1;                          /**< [in]: Set to 1 to disable writing of Sequence and Picture parameter info in bitstream */
-    uint32_t outputFramePackingSEI     :1;                          /**< [in]: Set to 1 to enable writing of frame packing arrangement SEI messages to bitstream */
-    uint32_t outputRecoveryPointSEI    :1;                          /**< [in]: Set to 1 to enable writing of recovery point SEI message */
-    uint32_t enableIntraRefresh        :1;                          /**< [in]: Set to 1 to enable gradual decoder refresh or intra refresh. If the GOP structure uses B frames this will be ignored */
-    uint32_t enableConstrainedEncoding :1;                          /**< [in]: Set this to 1 to enable constrainedFrame encoding where each slice in the constarined picture is independent of other slices
-                                                                               Check support for constrained encoding using ::NV_ENC_CAPS_SUPPORT_CONSTRAINED_ENCODING caps. */
-    uint32_t repeatSPSPPS              :1;                          /**< [in]: Set to 1 to enable writing of Sequence and Picture parameter for every IDR frame */
-    uint32_t enableVFR                 :1;                          /**< [in]: Set to 1 to enable variable frame rate. */
-    uint32_t enableLTR                 :1;                          /**< [in]: Set to 1 to enable LTR (Long Term Reference) frame support. LTR can be used in two modes: "LTR Trust" mode and "LTR Per Picture" mode.
-                                                                               LTR Trust mode: In this mode, ltrNumFrames pictures after IDR are automatically marked as LTR. This mode is enabled by setting ltrTrustMode = 1.
-                                                                                               Use of LTR Trust mode is strongly discouraged as this mode may be deprecated in future.
-                                                                               LTR Per Picture mode: In this mode, client can control whether the current picture should be marked as LTR. Enable this mode by setting
-                                                                                                     ltrTrustMode = 0 and ltrMarkFrame = 1 for the picture to be marked as LTR. This is the preferred mode
-                                                                                                     for using LTR.
-                                                                               Note that LTRs are not supported if encoding session is configured with B-frames */
-    uint32_t qpPrimeYZeroTransformBypassFlag :1;                    /**< [in]: To enable lossless encode set this to 1, set QP to 0 and RC_mode to NV_ENC_PARAMS_RC_CONSTQP and profile to HIGH_444_PREDICTIVE_PROFILE.
-                                                                               Check support for lossless encoding using ::NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE caps.  */
-    uint32_t useConstrainedIntraPred   :1;                          /**< [in]: Set 1 to enable constrained intra prediction. */
-    uint32_t reservedBitFields         :15;                         /**< [in]: Reserved bitfields and must be set to 0 */
-    uint32_t level;                                                 /**< [in]: Specifies the encoding level. Client is recommended to set this to NV_ENC_LEVEL_AUTOSELECT in order to enable the NvEncodeAPI interface to select the correct level. */
-    uint32_t idrPeriod;                                             /**< [in]: Specifies the IDR interval. If not set, this is made equal to gopLength in NV_ENC_CONFIG.Low latency application client can set IDR interval to NVENC_INFINITE_GOPLENGTH so that IDR frames are not inserted automatically. */
-    uint32_t separateColourPlaneFlag;                               /**< [in]: Set to 1 to enable 4:4:4 separate colour planes */
-    uint32_t disableDeblockingFilterIDC;                            /**< [in]: Specifies the deblocking filter mode. Permissible value range: [0,2] */
-    uint32_t numTemporalLayers;                                     /**< [in]: Specifies max temporal layers to be used for hierarchical coding. Valid value range is [1,::NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS] */
-    uint32_t spsId;                                                 /**< [in]: Specifies the SPS id of the sequence header */
-    uint32_t ppsId;                                                 /**< [in]: Specifies the PPS id of the picture header */
-    NV_ENC_H264_ADAPTIVE_TRANSFORM_MODE adaptiveTransformMode;      /**< [in]: Specifies the AdaptiveTransform Mode. Check support for AdaptiveTransform mode using ::NV_ENC_CAPS_SUPPORT_ADAPTIVE_TRANSFORM caps. */
-    NV_ENC_H264_FMO_MODE                fmoMode;                    /**< [in]: Specified the FMO Mode. Check support for FMO using ::NV_ENC_CAPS_SUPPORT_FMO caps. */
-    NV_ENC_H264_BDIRECT_MODE            bdirectMode;                /**< [in]: Specifies the BDirect mode. Check support for BDirect mode using ::NV_ENC_CAPS_SUPPORT_BDIRECT_MODE caps.*/
-    NV_ENC_H264_ENTROPY_CODING_MODE     entropyCodingMode;          /**< [in]: Specifies the entropy coding mode. Check support for CABAC mode using ::NV_ENC_CAPS_SUPPORT_CABAC caps. */
-    NV_ENC_STEREO_PACKING_MODE          stereoMode;                 /**< [in]: Specifies the stereo frame packing mode which is to be signalled in frame packing arrangement SEI */
-    uint32_t                            intraRefreshPeriod;         /**< [in]: Specifies the interval between successive intra refresh if enableIntrarefresh is set. Requires enableIntraRefresh to be set.
-                                                                               Will be disabled if NV_ENC_CONFIG::gopLength is not set to NVENC_INFINITE_GOPLENGTH. */
-    uint32_t                            intraRefreshCnt;            /**< [in]: Specifies the length of intra refresh in number of frames for periodic intra refresh. This value should be smaller than intraRefreshPeriod */
-    uint32_t                            maxNumRefFrames;            /**< [in]: Specifies the DPB size used for encoding. Setting it to 0 will let driver use the default dpb size.
-                                                                               The low latency application which wants to invalidate reference frame as an error resilience tool
-                                                                               is recommended to use a large DPB size so that the encoder can keep old reference frames which can be used if recent
-                                                                               frames are invalidated. */
-    uint32_t                            sliceMode;                  /**< [in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices
-                                                                               sliceMode = 0 MB based slices, sliceMode = 1 Byte based slices, sliceMode = 2 MB row based slices, sliceMode = 3, numSlices in Picture
-                                                                               When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting
-                                                                               When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice */
-    uint32_t                            sliceModeData;              /**< [in]: Specifies the parameter needed for sliceMode. For:
-                                                                               sliceMode = 0, sliceModeData specifies # of MBs in each slice (except last slice)
-                                                                               sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice)
-                                                                               sliceMode = 2, sliceModeData specifies # of MB rows in each slice (except last slice)
-                                                                               sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally */
-    NV_ENC_CONFIG_H264_VUI_PARAMETERS   h264VUIParameters;          /**< [in]: Specifies the H264 video usability info pamameters */
-    uint32_t                            ltrNumFrames;               /**< [in]: Specifies the number of LTR frames. This parameter has different meaning in two LTR modes.
-                                                                               In "LTR Trust" mode (ltrTrustMode = 1), encoder will mark the first ltrNumFrames base layer reference frames within each IDR interval as LTR.
-                                                                               In "LTR Per Picture" mode (ltrTrustMode = 0 and ltrMarkFrame = 1), ltrNumFrames specifies maximum number of LTR frames in DPB. */
-    uint32_t                            ltrTrustMode;               /**< [in]: Specifies the LTR operating mode. See comments near NV_ENC_CONFIG_H264::enableLTR for description of the two modes.
-                                                                               Set to 1 to use "LTR Trust" mode of LTR operation. Clients are discouraged to use "LTR Trust" mode as this mode may
-                                                                               be deprecated in future releases.
-                                                                               Set to 0 when using "LTR Per Picture" mode of LTR operation. */
-    uint32_t                            chromaFormatIDC;            /**< [in]: Specifies the chroma format. Should be set to 1 for yuv420 input, 3 for yuv444 input.
-                                                                               Check support for YUV444 encoding using ::NV_ENC_CAPS_SUPPORT_YUV444_ENCODE caps.*/
-    uint32_t                            maxTemporalLayers;          /**< [in]: Specifies the max temporal layer used for hierarchical coding. */
-    uint32_t                            reserved1[270];             /**< [in]: Reserved and must be set to 0 */
-    void*                               reserved2[64];              /**< [in]: Reserved and must be set to NULL */
-} NV_ENC_CONFIG_H264;
-
-
-/**
- * \struct _NV_ENC_CONFIG_HEVC
- * HEVC encoder configuration parameters to be set during initialization.
- */
-typedef struct _NV_ENC_CONFIG_HEVC
-{
-    uint32_t level;                                                 /**< [in]: Specifies the level of the encoded bitstream.*/
-    uint32_t tier;                                                  /**< [in]: Specifies the level tier of the encoded bitstream.*/
-    NV_ENC_HEVC_CUSIZE minCUSize;                                   /**< [in]: Specifies the minimum size of luma coding unit.*/
-    NV_ENC_HEVC_CUSIZE maxCUSize;                                   /**< [in]: Specifies the maximum size of luma coding unit. Currently NVENC SDK only supports maxCUSize equal to NV_ENC_HEVC_CUSIZE_32x32.*/
-    uint32_t useConstrainedIntraPred               :1;              /**< [in]: Set 1 to enable constrained intra prediction. */
-    uint32_t disableDeblockAcrossSliceBoundary     :1;              /**< [in]: Set 1 to disable in loop filtering across slice boundary.*/
-    uint32_t outputBufferingPeriodSEI              :1;              /**< [in]: Set 1 to write SEI buffering period syntax in the bitstream */
-    uint32_t outputPictureTimingSEI                :1;              /**< [in]: Set 1 to write SEI picture timing syntax in the bitstream */
-    uint32_t outputAUD                             :1;              /**< [in]: Set 1 to write Access Unit Delimiter syntax. */
-    uint32_t enableLTR                             :1;              /**< [in]: Set to 1 to enable LTR (Long Term Reference) frame support. LTR can be used in two modes: "LTR Trust" mode and "LTR Per Picture" mode.
-                                                                               LTR Trust mode: In this mode, ltrNumFrames pictures after IDR are automatically marked as LTR. This mode is enabled by setting ltrTrustMode = 1.
-                                                                                               Use of LTR Trust mode is strongly discouraged as this mode may be deprecated in future releases.
-                                                                               LTR Per Picture mode: In this mode, client can control whether the current picture should be marked as LTR. Enable this mode by setting
-                                                                                                     ltrTrustMode = 0 and ltrMarkFrame = 1 for the picture to be marked as LTR. This is the preferred mode
-                                                                                                     for using LTR.
-                                                                               Note that LTRs are not supported if encoding session is configured with B-frames */
-    uint32_t disableSPSPPS                         :1;              /**< [in]: Set 1 to disable VPS,SPS and PPS signalling in the bitstream. */
-    uint32_t repeatSPSPPS                          :1;              /**< [in]: Set 1 to output VPS,SPS and PPS for every IDR frame.*/
-    uint32_t enableIntraRefresh                    :1;              /**< [in]: Set 1 to enable gradual decoder refresh or intra refresh. If the GOP structure uses B frames this will be ignored */
-    uint32_t chromaFormatIDC                       :2;              /**< [in]: Specifies the chroma format. Should be set to 1 for yuv420 input, 3 for yuv444 input.*/
-    uint32_t pixelBitDepthMinus8                   :3;              /**< [in]: Specifies pixel bit depth minus 8. Should be set to 0 for 8 bit input, 2 for 10 bit input.*/
-    uint32_t reserved                              :18;             /**< [in]: Reserved bitfields.*/
-    uint32_t idrPeriod;                                             /**< [in]: Specifies the IDR interval. If not set, this is made equal to gopLength in NV_ENC_CONFIG.Low latency application client can set IDR interval to NVENC_INFINITE_GOPLENGTH so that IDR frames are not inserted automatically. */
-    uint32_t intraRefreshPeriod;                                    /**< [in]: Specifies the interval between successive intra refresh if enableIntrarefresh is set. Requires enableIntraRefresh to be set.
-                                                                    Will be disabled if NV_ENC_CONFIG::gopLength is not set to NVENC_INFINITE_GOPLENGTH. */
-    uint32_t intraRefreshCnt;                                       /**< [in]: Specifies the length of intra refresh in number of frames for periodic intra refresh. This value should be smaller than intraRefreshPeriod */
-    uint32_t maxNumRefFramesInDPB;                                  /**< [in]: Specifies the maximum number of references frames in the DPB.*/
-    uint32_t ltrNumFrames;                                          /**< [in]: This parameter has different meaning in two LTR modes.
-                                                                               In "LTR Trust" mode (ltrTrustMode = 1), encoder will mark the first ltrNumFrames base layer reference frames within each IDR interval as LTR.
-                                                                               In "LTR Per Picture" mode (ltrTrustMode = 0 and ltrMarkFrame = 1), ltrNumFrames specifies maximum number of LTR frames in DPB. */
-    uint32_t vpsId;                                                 /**< [in]: Specifies the VPS id of the video parameter set */
-    uint32_t spsId;                                                 /**< [in]: Specifies the SPS id of the sequence header */
-    uint32_t ppsId;                                                 /**< [in]: Specifies the PPS id of the picture header */
-    uint32_t sliceMode;                                             /**< [in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices
-                                                                                sliceMode = 0 CTU based slices, sliceMode = 1 Byte based slices, sliceMode = 2 CTU row based slices, sliceMode = 3, numSlices in Picture
-                                                                                When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice */
-    uint32_t sliceModeData;                                         /**< [in]: Specifies the parameter needed for sliceMode. For:
-                                                                                sliceMode = 0, sliceModeData specifies # of CTUs in each slice (except last slice)
-                                                                                sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice)
-                                                                                sliceMode = 2, sliceModeData specifies # of CTU rows in each slice (except last slice)
-                                                                                sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally */
-    uint32_t maxTemporalLayersMinus1;                               /**< [in]: Specifies the max temporal layer used for hierarchical coding. */
-    NV_ENC_CONFIG_HEVC_VUI_PARAMETERS   hevcVUIParameters;          /**< [in]: Specifies the HEVC video usability info pamameters */
-    uint32_t ltrTrustMode;                                          /**< [in]: Specifies the LTR operating mode. See comments near NV_ENC_CONFIG_HEVC::enableLTR for description of the two modes.
-                                                                               Set to 1 to use "LTR Trust" mode of LTR operation. Clients are discouraged to use "LTR Trust" mode as this mode may
-                                                                               be deprecated in future releases.
-                                                                               Set to 0 when using "LTR Per Picture" mode of LTR operation. */
-    uint32_t reserved1[217];                                        /**< [in]: Reserved and must be set to 0.*/
-    void*    reserved2[64];                                         /**< [in]: Reserved and must be set to NULL */
-} NV_ENC_CONFIG_HEVC;
-
-/**
- * \struct _NV_ENC_CONFIG_H264_MEONLY
- * H264 encoder configuration parameters for ME only Mode
- *
- */
-typedef struct _NV_ENC_CONFIG_H264_MEONLY
-{
-    uint32_t disablePartition16x16 :1;                          /**< [in]: Disable MotionEstimation on 16x16 blocks*/
-    uint32_t disablePartition8x16  :1;                          /**< [in]: Disable MotionEstimation on 8x16 blocks*/
-    uint32_t disablePartition16x8  :1;                          /**< [in]: Disable MotionEstimation on 16x8 blocks*/
-    uint32_t disablePartition8x8   :1;                          /**< [in]: Disable MotionEstimation on 8x8 blocks*/
-    uint32_t disableIntraSearch    :1;                          /**< [in]: Disable Intra search during MotionEstimation*/
-    uint32_t bStereoEnable         :1;                          /**< [in]: Enable Stereo Mode for Motion Estimation where each view is independently executed*/
-    uint32_t reserved              :26;                         /**< [in]: Reserved and must be set to 0 */
-    uint32_t reserved1 [255];                                   /**< [in]: Reserved and must be set to 0 */
-    void*    reserved2[64];                                     /**< [in]: Reserved and must be set to NULL */
-} NV_ENC_CONFIG_H264_MEONLY;
-
-
-/**
- * \struct _NV_ENC_CONFIG_HEVC_MEONLY
- * HEVC encoder configuration parameters for ME only Mode
- *
- */
-typedef struct _NV_ENC_CONFIG_HEVC_MEONLY
-{
-    uint32_t reserved [256];                                   /**< [in]: Reserved and must be set to 0 */
-    void*    reserved1[64];                                     /**< [in]: Reserved and must be set to NULL */
-} NV_ENC_CONFIG_HEVC_MEONLY;
-
-/**
- * \struct _NV_ENC_CODEC_CONFIG
- * Codec-specific encoder configuration parameters to be set during initialization.
- */
-typedef union _NV_ENC_CODEC_CONFIG
-{
-    NV_ENC_CONFIG_H264        h264Config;                /**< [in]: Specifies the H.264-specific encoder configuration. */
-    NV_ENC_CONFIG_HEVC        hevcConfig;                /**< [in]: Specifies the HEVC-specific encoder configuration. */
-    NV_ENC_CONFIG_H264_MEONLY h264MeOnlyConfig;          /**< [in]: Specifies the H.264-specific ME only encoder configuration. */
-    NV_ENC_CONFIG_HEVC_MEONLY hevcMeOnlyConfig;          /**< [in]: Specifies the HEVC-specific ME only encoder configuration. */
-    uint32_t                reserved[320];               /**< [in]: Reserved and must be set to 0 */
-} NV_ENC_CODEC_CONFIG;
-
-
-/**
- * \struct _NV_ENC_CONFIG
- * Encoder configuration parameters to be set during initialization.
- */
-typedef struct _NV_ENC_CONFIG
-{
-    uint32_t                        version;                                     /**< [in]: Struct version. Must be set to ::NV_ENC_CONFIG_VER. */
-    GUID                            profileGUID;                                 /**< [in]: Specifies the codec profile guid. If client specifies \p NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID the NvEncodeAPI interface will select the appropriate codec profile. */
-    uint32_t                        gopLength;                                   /**< [in]: Specifies the number of pictures in one GOP. Low latency application client can set goplength to NVENC_INFINITE_GOPLENGTH so that keyframes are not inserted automatically. */
-    int32_t                         frameIntervalP;                              /**< [in]: Specifies the GOP pattern as follows: \p frameIntervalP = 0: I, 1: IPP, 2: IBP, 3: IBBP  If goplength is set to NVENC_INFINITE_GOPLENGTH \p frameIntervalP should be set to 1. */
-    uint32_t                        monoChromeEncoding;                          /**< [in]: Set this to 1 to enable monochrome encoding for this session. */
-    NV_ENC_PARAMS_FRAME_FIELD_MODE  frameFieldMode;                              /**< [in]: Specifies the frame/field mode.
-                                                                                            Check support for field encoding using ::NV_ENC_CAPS_SUPPORT_FIELD_ENCODING caps.
-                                                                                            Using a frameFieldMode other than NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME for RGB input is not supported. */
-    NV_ENC_MV_PRECISION             mvPrecision;                                 /**< [in]: Specifies the desired motion vector prediction precision. */
-    NV_ENC_RC_PARAMS                rcParams;                                    /**< [in]: Specifies the rate control parameters for the current encoding session. */
-    NV_ENC_CODEC_CONFIG             encodeCodecConfig;                           /**< [in]: Specifies the codec specific config parameters through this union. */
-    uint32_t                        reserved [278];                              /**< [in]: Reserved and must be set to 0 */
-    void*                           reserved2[64];                               /**< [in]: Reserved and must be set to NULL */
-} NV_ENC_CONFIG;
-
-/** macro for constructing the version field of ::_NV_ENC_CONFIG */
-#define NV_ENC_CONFIG_VER (NVENCAPI_STRUCT_VERSION(6) | ( 1<<31 ))
-
-
-/**
- * \struct _NV_ENC_INITIALIZE_PARAMS
- * Encode Session Initialization parameters.
- */
-typedef struct _NV_ENC_INITIALIZE_PARAMS
-{
-    uint32_t                                   version;                         /**< [in]: Struct version. Must be set to ::NV_ENC_INITIALIZE_PARAMS_VER. */
-    GUID                                       encodeGUID;                      /**< [in]: Specifies the Encode GUID for which the encoder is being created. ::NvEncInitializeEncoder() API will fail if this is not set, or set to unsupported value. */
-    GUID                                       presetGUID;                      /**< [in]: Specifies the preset for encoding. If the preset GUID is set then , the preset configuration will be applied before any other parameter. */
-    uint32_t                                   encodeWidth;                     /**< [in]: Specifies the encode width. If not set ::NvEncInitializeEncoder() API will fail. */
-    uint32_t                                   encodeHeight;                    /**< [in]: Specifies the encode height. If not set ::NvEncInitializeEncoder() API will fail. */
-    uint32_t                                   darWidth;                        /**< [in]: Specifies the display aspect ratio Width. */
-    uint32_t                                   darHeight;                       /**< [in]: Specifies the display aspect ratio height. */
-    uint32_t                                   frameRateNum;                    /**< [in]: Specifies the numerator for frame rate used for encoding in frames per second ( Frame rate = frameRateNum / frameRateDen ). */
-    uint32_t                                   frameRateDen;                    /**< [in]: Specifies the denominator for frame rate used for encoding in frames per second ( Frame rate = frameRateNum / frameRateDen ). */
-    uint32_t                                   enableEncodeAsync;               /**< [in]: Set this to 1 to enable asynchronous mode and is expected to use events to get picture completion notification. */
-    uint32_t                                   enablePTD;                       /**< [in]: Set this to 1 to enable the Picture Type Decision is be taken by the NvEncodeAPI interface. */
-    uint32_t                                   reportSliceOffsets        :1;    /**< [in]: Set this to 1 to enable reporting slice offsets in ::_NV_ENC_LOCK_BITSTREAM. NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync must be set to 0 to use this feature. Client must set this to 0 if NV_ENC_CONFIG_H264::sliceMode is 1 on Kepler GPUs */
-    uint32_t                                   enableSubFrameWrite       :1;    /**< [in]: Set this to 1 to write out available bitstream to memory at subframe intervals */
-    uint32_t                                   enableExternalMEHints     :1;    /**< [in]: Set to 1 to enable external ME hints for the current frame. For NV_ENC_INITIALIZE_PARAMS::enablePTD=1 with B frames, programming L1 hints is optional for B frames since Client doesn't know internal GOP structure.
-                                                                                           NV_ENC_PIC_PARAMS::meHintRefPicDist should preferably be set with enablePTD=1. */
-    uint32_t                                   enableMEOnlyMode          :1;    /**< [in]: Set to 1 to enable ME Only Mode .*/
-    uint32_t                                   enableWeightedPrediction  :1;    /**< [in]: Set this to 1 to enable weighted prediction. Not supported if encode session is configured for B-Frames( 'frameIntervalP' in NV_ENC_CONFIG is greater than 1).*/
-    uint32_t                                   reservedBitFields         :27;   /**< [in]: Reserved bitfields and must be set to 0 */
-    uint32_t                                   privDataSize;                    /**< [in]: Reserved private data buffer size and must be set to 0 */
-    void*                                      privData;                        /**< [in]: Reserved private data buffer and must be set to NULL */
-    NV_ENC_CONFIG*                             encodeConfig;                    /**< [in]: Specifies the advanced codec specific structure. If client has sent a valid codec config structure, it will override parameters set by the NV_ENC_INITIALIZE_PARAMS::presetGUID parameter. If set to NULL the NvEncodeAPI interface will use the NV_ENC_INITIALIZE_PARAMS::presetGUID to set the codec specific parameters.
-                                                                                           Client can also optionally query the NvEncodeAPI interface to get codec specific parameters for a presetGUID using ::NvEncGetEncodePresetConfig() API. It can then modify (if required) some of the codec config parameters and send down a custom config structure as part of ::_NV_ENC_INITIALIZE_PARAMS.
-                                                                                           Even in this case client is recommended to pass the same preset guid it has used in ::NvEncGetEncodePresetConfig() API to query the config structure; as NV_ENC_INITIALIZE_PARAMS::presetGUID. This will not override the custom config structure but will be used to determine other Encoder HW specific parameters not exposed in the API. */
-    uint32_t                                   maxEncodeWidth;                  /**< [in]: Maximum encode width to be used for current Encode session.
-                                                                                           Client should allocate output buffers according to this dimension for dynamic resolution change. If set to 0, Encoder will not allow dynamic resolution change. */
-    uint32_t                                   maxEncodeHeight;                 /**< [in]: Maximum encode height to be allowed for current Encode session.
-                                                                                           Client should allocate output buffers according to this dimension for dynamic resolution change. If set to 0, Encode will not allow dynamic resolution change. */
-    NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE maxMEHintCountsPerBlock[2];      /**< [in]: If Client wants to pass external motion vectors in NV_ENC_PIC_PARAMS::meExternalHints buffer it must specify the maximum number of hint candidates per block per direction for the encode session.
-                                                                                           The NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[0] is for L0 predictors and NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[1] is for L1 predictors.
-                                                                                           This client must also set NV_ENC_INITIALIZE_PARAMS::enableExternalMEHints to 1. */
-    uint32_t                                   reserved [289];                  /**< [in]: Reserved and must be set to 0 */
-    void*                                      reserved2[64];                   /**< [in]: Reserved and must be set to NULL */
-} NV_ENC_INITIALIZE_PARAMS;
-
-/** macro for constructing the version field of ::_NV_ENC_INITIALIZE_PARAMS */
-#define NV_ENC_INITIALIZE_PARAMS_VER (NVENCAPI_STRUCT_VERSION(5) | ( 1<<31 ))
-
-
-/**
- * \struct _NV_ENC_RECONFIGURE_PARAMS
- * Encode Session Reconfigured parameters.
- */
-typedef struct _NV_ENC_RECONFIGURE_PARAMS
-{
-    uint32_t                                    version;                        /**< [in]: Struct version. Must be set to ::NV_ENC_RECONFIGURE_PARAMS_VER. */
-    NV_ENC_INITIALIZE_PARAMS                    reInitEncodeParams;             /**< [in]: Encoder session re-initialization parameters. */
-    uint32_t                                    resetEncoder            :1;     /**< [in]: This resets the rate control states and other internal encoder states. This should be used only with an IDR frame.
-                                                                                           If NV_ENC_INITIALIZE_PARAMS::enablePTD is set to 1, encoder will force the frame type to IDR */
-    uint32_t                                    forceIDR                :1;     /**< [in]: Encode the current picture as an IDR picture. This flag is only valid when Picture type decision is taken by the Encoder
-                                                                                           [_NV_ENC_INITIALIZE_PARAMS::enablePTD == 1]. */
-    uint32_t                                    reserved                :30;
-
-}NV_ENC_RECONFIGURE_PARAMS;
-
-/** macro for constructing the version field of ::_NV_ENC_RECONFIGURE_PARAMS */
-#define NV_ENC_RECONFIGURE_PARAMS_VER (NVENCAPI_STRUCT_VERSION(1) | ( 1<<31 ))
-
-/**
- * \struct _NV_ENC_PRESET_CONFIG
- * Encoder preset config
- */
-typedef struct _NV_ENC_PRESET_CONFIG
-{
-    uint32_t      version;                               /**< [in]:  Struct version. Must be set to ::NV_ENC_PRESET_CONFIG_VER. */
-    NV_ENC_CONFIG presetCfg;                             /**< [out]: preset config returned by the Nvidia Video Encoder interface. */
-    uint32_t      reserved1[255];                        /**< [in]: Reserved and must be set to 0 */
-    void*         reserved2[64];                         /**< [in]: Reserved and must be set to NULL */
-}NV_ENC_PRESET_CONFIG;
-
-/** macro for constructing the version field of ::_NV_ENC_PRESET_CONFIG */
-#define NV_ENC_PRESET_CONFIG_VER (NVENCAPI_STRUCT_VERSION(4) | ( 1<<31 ))
-
-
-/**
- * \struct _NV_ENC_SEI_PAYLOAD
- *  User SEI message
- */
-typedef struct _NV_ENC_SEI_PAYLOAD
-{
-    uint32_t payloadSize;            /**< [in] SEI payload size in bytes. SEI payload must be byte aligned, as described in Annex D */
-    uint32_t payloadType;            /**< [in] SEI payload types and syntax can be found in Annex D of the H.264 Specification. */
-    uint8_t *payload;                /**< [in] pointer to user data */
-} NV_ENC_SEI_PAYLOAD;
-
-#define NV_ENC_H264_SEI_PAYLOAD NV_ENC_SEI_PAYLOAD
-
-/**
- * \struct _NV_ENC_PIC_PARAMS_H264
- * H264 specific enc pic params. sent on a per frame basis.
- */
-typedef struct _NV_ENC_PIC_PARAMS_H264
-{
-    uint32_t displayPOCSyntax;                           /**< [in]: Specifies the display POC syntax This is required to be set if client is handling the picture type decision. */
-    uint32_t reserved3;                                  /**< [in]: Reserved and must be set to 0 */
-    uint32_t refPicFlag;                                 /**< [in]: Set to 1 for a reference picture. This is ignored if NV_ENC_INITIALIZE_PARAMS::enablePTD is set to 1. */
-    uint32_t colourPlaneId;                              /**< [in]: Specifies the colour plane ID associated with the current input. */
-    uint32_t forceIntraRefreshWithFrameCnt;              /**< [in]: Forces an intra refresh with duration equal to intraRefreshFrameCnt.
-                                                                    When outputRecoveryPointSEI is set this is value is used for recovery_frame_cnt in recovery point SEI message
-                                                                    forceIntraRefreshWithFrameCnt cannot be used if B frames are used in the GOP structure specified */
-    uint32_t constrainedFrame           :1;              /**< [in]: Set to 1 if client wants to encode this frame with each slice completely independent of other slices in the frame.
-                                                                    NV_ENC_INITIALIZE_PARAMS::enableConstrainedEncoding should be set to 1 */
-    uint32_t sliceModeDataUpdate        :1;              /**< [in]: Set to 1 if client wants to change the sliceModeData field to specify new sliceSize Parameter
-                                                                    When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting */
-    uint32_t ltrMarkFrame               :1;              /**< [in]: Set to 1 if client wants to mark this frame as LTR */
-    uint32_t ltrUseFrames               :1;              /**< [in]: Set to 1 if client allows encoding this frame using the LTR frames specified in ltrFrameBitmap */
-    uint32_t reservedBitFields          :28;             /**< [in]: Reserved bit fields and must be set to 0 */
-    uint8_t* sliceTypeData;                              /**< [in]: Deprecated. */
-    uint32_t sliceTypeArrayCnt;                          /**< [in]: Deprecated. */
-    uint32_t seiPayloadArrayCnt;                         /**< [in]: Specifies the number of elements allocated in  seiPayloadArray array. */
-    NV_ENC_SEI_PAYLOAD* seiPayloadArray;                 /**< [in]: Array of SEI payloads which will be inserted for this frame. */
-    uint32_t sliceMode;                                  /**< [in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices
-                                                                    sliceMode = 0 MB based slices, sliceMode = 1 Byte based slices, sliceMode = 2 MB row based slices, sliceMode = 3, numSlices in Picture
-                                                                    When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting
-                                                                    When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice */
-    uint32_t sliceModeData;                              /**< [in]: Specifies the parameter needed for sliceMode. For:
-                                                                    sliceMode = 0, sliceModeData specifies # of MBs in each slice (except last slice)
-                                                                    sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice)
-                                                                    sliceMode = 2, sliceModeData specifies # of MB rows in each slice (except last slice)
-                                                                    sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally */
-    uint32_t ltrMarkFrameIdx;                            /**< [in]: Specifies the long term referenceframe index to use for marking this frame as LTR.*/
-    uint32_t ltrUseFrameBitmap;                          /**< [in]: Specifies the the associated bitmap of LTR frame indices to use when encoding this frame. */
-    uint32_t ltrUsageMode;                               /**< [in]: Not supported. Reserved for future use and must be set to 0. */
-    uint32_t reserved [243];                             /**< [in]: Reserved and must be set to 0. */
-    void*    reserved2[62];                              /**< [in]: Reserved and must be set to NULL. */
-} NV_ENC_PIC_PARAMS_H264;
-
-/**
- * \struct _NV_ENC_PIC_PARAMS_HEVC
- * HEVC specific enc pic params. sent on a per frame basis.
- */
-typedef struct _NV_ENC_PIC_PARAMS_HEVC
-{
-    uint32_t displayPOCSyntax;                           /**< [in]: Specifies the display POC syntax This is required to be set if client is handling the picture type decision. */
-    uint32_t refPicFlag;                                 /**< [in]: Set to 1 for a reference picture. This is ignored if NV_ENC_INITIALIZE_PARAMS::enablePTD is set to 1. */
-    uint32_t temporalId;                                 /**< [in]: Specifies the temporal id of the picture */
-    uint32_t forceIntraRefreshWithFrameCnt;              /**< [in]: Forces an intra refresh with duration equal to intraRefreshFrameCnt.
-                                                                    When outputRecoveryPointSEI is set this is value is used for recovery_frame_cnt in recovery point SEI message
-                                                                    forceIntraRefreshWithFrameCnt cannot be used if B frames are used in the GOP structure specified */
-    uint32_t constrainedFrame           :1;              /**< [in]: Set to 1 if client wants to encode this frame with each slice completely independent of other slices in the frame.
-                                                                    NV_ENC_INITIALIZE_PARAMS::enableConstrainedEncoding should be set to 1 */
-    uint32_t sliceModeDataUpdate        :1;              /**< [in]: Set to 1 if client wants to change the sliceModeData field to specify new sliceSize Parameter
-                                                                    When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting */
-    uint32_t ltrMarkFrame               :1;              /**< [in]: Set to 1 if client wants to mark this frame as LTR */
-    uint32_t ltrUseFrames               :1;              /**< [in]: Set to 1 if client allows encoding this frame using the LTR frames specified in ltrFrameBitmap */
-    uint32_t reservedBitFields          :28;             /**< [in]: Reserved bit fields and must be set to 0 */
-    uint8_t* sliceTypeData;                              /**< [in]: Array which specifies the slice type used to force intra slice for a particular slice. Currently supported only for NV_ENC_CONFIG_H264::sliceMode == 3.
-                                                                    Client should allocate array of size sliceModeData where sliceModeData is specified in field of ::_NV_ENC_CONFIG_H264
-                                                                    Array element with index n corresponds to nth slice. To force a particular slice to intra client should set corresponding array element to NV_ENC_SLICE_TYPE_I
-                                                                    all other array elements should be set to NV_ENC_SLICE_TYPE_DEFAULT */
-    uint32_t sliceTypeArrayCnt;                          /**< [in]: Client should set this to the number of elements allocated in sliceTypeData array. If sliceTypeData is NULL then this should be set to 0 */
-    uint32_t sliceMode;                                  /**< [in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices
-                                                                    sliceMode = 0 CTU based slices, sliceMode = 1 Byte based slices, sliceMode = 2 CTU row based slices, sliceMode = 3, numSlices in Picture
-                                                                    When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting
-                                                                    When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice */
-    uint32_t sliceModeData;                              /**< [in]: Specifies the parameter needed for sliceMode. For:
-                                                                    sliceMode = 0, sliceModeData specifies # of CTUs in each slice (except last slice)
-                                                                    sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice)
-                                                                    sliceMode = 2, sliceModeData specifies # of CTU rows in each slice (except last slice)
-                                                                    sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally */
-    uint32_t ltrMarkFrameIdx;                            /**< [in]: Specifies the long term reference frame index to use for marking this frame as LTR.*/
-    uint32_t ltrUseFrameBitmap;                          /**< [in]: Specifies the associated bitmap of LTR frame indices to use when encoding this frame. */
-    uint32_t ltrUsageMode;                               /**< [in]: Not supported. Reserved for future use and must be set to 0. */
-    uint32_t seiPayloadArrayCnt;                         /**< [in]: Specifies the number of elements allocated in  seiPayloadArray array. */
-    uint32_t reserved;                                   /**< [in]: Reserved and must be set to 0. */
-    NV_ENC_SEI_PAYLOAD* seiPayloadArray;                 /**< [in]: Array of SEI payloads which will be inserted for this frame. */
-    uint32_t reserved2 [244];                             /**< [in]: Reserved and must be set to 0. */
-    void*    reserved3[61];                              /**< [in]: Reserved and must be set to NULL. */
-} NV_ENC_PIC_PARAMS_HEVC;
-
-
-/**
- * Codec specific per-picture encoding parameters.
- */
-typedef union _NV_ENC_CODEC_PIC_PARAMS
-{
-    NV_ENC_PIC_PARAMS_H264 h264PicParams;                /**< [in]: H264 encode picture params. */
-    NV_ENC_PIC_PARAMS_HEVC hevcPicParams;                /**< [in]: HEVC encode picture params. */
-    uint32_t               reserved[256];                /**< [in]: Reserved and must be set to 0. */
-} NV_ENC_CODEC_PIC_PARAMS;
-
-/**
- * \struct _NV_ENC_PIC_PARAMS
- * Encoding parameters that need to be sent on a per frame basis.
- */
-typedef struct _NV_ENC_PIC_PARAMS
-{
-    uint32_t                                    version;                        /**< [in]: Struct version. Must be set to ::NV_ENC_PIC_PARAMS_VER. */
-    uint32_t                                    inputWidth;                     /**< [in]: Specifies the input buffer width */
-    uint32_t                                    inputHeight;                    /**< [in]: Specifies the input buffer height */
-    uint32_t                                    inputPitch;                     /**< [in]: Specifies the input buffer pitch. If pitch value is not known, set this to inputWidth. */
-    uint32_t                                    encodePicFlags;                 /**< [in]: Specifies bit-wise OR`ed encode pic flags. See ::NV_ENC_PIC_FLAGS enum. */
-    uint32_t                                    frameIdx;                       /**< [in]: Specifies the frame index associated with the input frame [optional]. */
-    uint64_t                                    inputTimeStamp;                 /**< [in]: Specifies presentation timestamp associated with the input picture. */
-    uint64_t                                    inputDuration;                  /**< [in]: Specifies duration of the input picture */
-    NV_ENC_INPUT_PTR                            inputBuffer;                    /**< [in]: Specifies the input buffer pointer. Client must use a pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource() APIs.*/
-    NV_ENC_OUTPUT_PTR                           outputBitstream;                /**< [in]: Specifies the pointer to output buffer. Client should use a pointer obtained from ::NvEncCreateBitstreamBuffer() API. */
-    void*                                       completionEvent;                /**< [in]: Specifies an event to be signalled on completion of encoding of this Frame [only if operating in Asynchronous mode]. Each output buffer should be associated with a distinct event pointer. */
-    NV_ENC_BUFFER_FORMAT                        bufferFmt;                      /**< [in]: Specifies the input buffer format. */
-    NV_ENC_PIC_STRUCT                           pictureStruct;                  /**< [in]: Specifies structure of the input picture. */
-    NV_ENC_PIC_TYPE                             pictureType;                    /**< [in]: Specifies input picture type. Client required to be set explicitly by the client if the client has not set NV_ENC_INITALIZE_PARAMS::enablePTD to 1 while calling NvInitializeEncoder. */
-    NV_ENC_CODEC_PIC_PARAMS                     codecPicParams;                 /**< [in]: Specifies the codec specific per-picture encoding parameters. */
-    NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE meHintCountsPerBlock[2];        /**< [in]: Specifies the number of hint candidates per block per direction for the current frame. meHintCountsPerBlock[0] is for L0 predictors and meHintCountsPerBlock[1] is for L1 predictors.
-                                                                                           The candidate count in NV_ENC_PIC_PARAMS::meHintCountsPerBlock[lx] must never exceed NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[lx] provided during encoder intialization. */
-    NVENC_EXTERNAL_ME_HINT                     *meExternalHints;                /**< [in]: Specifies the pointer to ME external hints for the current frame. The size of ME hint buffer should be equal to number of macroblocks * the total number of candidates per macroblock.
-                                                                                           The total number of candidates per MB per direction = 1*meHintCountsPerBlock[Lx].numCandsPerBlk16x16 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk16x8 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk8x8
-                                                                                           + 4*meHintCountsPerBlock[Lx].numCandsPerBlk8x8. For frames using bidirectional ME , the total number of candidates for single macroblock is sum of total number of candidates per MB for each direction (L0 and L1) */
-    uint32_t                                    reserved1[6];                    /**< [in]: Reserved and must be set to 0 */
-    void*                                       reserved2[2];                    /**< [in]: Reserved and must be set to NULL */
-    int8_t                                     *qpDeltaMap;                      /**< [in]: Specifies the pointer to signed byte array containing QP delta value per MB in raster scan order in the current picture. This QP modifier is applied on top of the QP chosen by rate control. */
-    uint32_t                                    qpDeltaMapSize;                  /**< [in]: Specifies the size in bytes of qpDeltaMap surface allocated by client and pointed to by NV_ENC_PIC_PARAMS::qpDeltaMap. Surface (array) should be picWidthInMbs * picHeightInMbs */
-    uint32_t                                    reservedBitFields;               /**< [in]: Reserved bitfields and must be set to 0 */
-    uint16_t                                    meHintRefPicDist[2];             /**< [in]: Specifies temporal distance for reference picture (NVENC_EXTERNAL_ME_HINT::refidx = 0) used during external ME with NV_ENC_INITALIZE_PARAMS::enablePTD = 1 . meHintRefPicDist[0] is for L0 hints and meHintRefPicDist[1] is for L1 hints.
-                                                                                            If not set, will internally infer distance of 1. Ignored for NV_ENC_INITALIZE_PARAMS::enablePTD = 0 */
-    uint32_t                                    reserved3[286];                  /**< [in]: Reserved and must be set to 0 */
-    void*                                       reserved4[60];                   /**< [in]: Reserved and must be set to NULL */
-} NV_ENC_PIC_PARAMS;
-
-/** Macro for constructing the version field of ::_NV_ENC_PIC_PARAMS */
-#define NV_ENC_PIC_PARAMS_VER (NVENCAPI_STRUCT_VERSION(4) | ( 1<<31 ))
-
-
-/**
- * \struct _NV_ENC_MEONLY_PARAMS
- * MEOnly parameters that need to be sent on a per motion estimation basis.
- * NV_ENC_MEONLY_PARAMS::meExternalHints is supported for H264 only.
- */
-typedef struct _NV_ENC_MEONLY_PARAMS
-{
-    uint32_t                version;                            /**< [in]: Struct version. Must be set to NV_ENC_MEONLY_PARAMS_VER.*/
-    uint32_t                inputWidth;                         /**< [in]: Specifies the input buffer width */
-    uint32_t                inputHeight;                        /**< [in]: Specifies the input buffer height */
-    NV_ENC_INPUT_PTR        inputBuffer;                        /**< [in]: Specifies the input buffer pointer. Client must use a pointer obtained from NvEncCreateInputBuffer() or NvEncMapInputResource() APIs. */
-    NV_ENC_INPUT_PTR        referenceFrame;                     /**< [in]: Specifies the reference frame pointer */
-    NV_ENC_OUTPUT_PTR       mvBuffer;                           /**< [in]: Specifies the pointer to motion vector data buffer allocated by NvEncCreateMVBuffer. Client must lock mvBuffer using ::NvEncLockBitstream() API to get the motion vector data. */
-    NV_ENC_BUFFER_FORMAT    bufferFmt;                          /**< [in]: Specifies the input buffer format. */
-    void*                   completionEvent;                    /**< [in]: Specifies an event to be signalled on completion of motion estimation
-                                                                           of this Frame [only if operating in Asynchronous mode].
-                                                                           Each output buffer should be associated with a distinct event pointer. */
-    uint32_t                viewID;                             /**< [in]: Specifies left,right viewID if NV_ENC_CONFIG_H264_MEONLY::bStereoEnable is set.
-                                                                            viewID can be 0,1 if bStereoEnable is set, 0 otherwise. */
-    NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE
-                            meHintCountsPerBlock[2];            /**< [in]: Specifies the number of hint candidates per block for the current frame. meHintCountsPerBlock[0] is for L0 predictors.
-                                                                            The candidate count in NV_ENC_PIC_PARAMS::meHintCountsPerBlock[lx] must never exceed NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[lx] provided during encoder intialization. */
-    NVENC_EXTERNAL_ME_HINT  *meExternalHints;                   /**< [in]: Specifies the pointer to ME external hints for the current frame. The size of ME hint buffer should be equal to number of macroblocks * the total number of candidates per macroblock.
-                                                                            The total number of candidates per MB per direction = 1*meHintCountsPerBlock[Lx].numCandsPerBlk16x16 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk16x8 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk8x8
-                                                                            + 4*meHintCountsPerBlock[Lx].numCandsPerBlk8x8. For frames using bidirectional ME , the total number of candidates for single macroblock is sum of total number of candidates per MB for each direction (L0 and L1) */
-    uint32_t                reserved1[243];                     /**< [in]: Reserved and must be set to 0 */
-    void*                   reserved2[59];                      /**< [in]: Reserved and must be set to NULL */
-} NV_ENC_MEONLY_PARAMS;
-
-/** NV_ENC_MEONLY_PARAMS struct version*/
-#define NV_ENC_MEONLY_PARAMS_VER NVENCAPI_STRUCT_VERSION(3)
-
-
-/**
- * \struct _NV_ENC_LOCK_BITSTREAM
- * Bitstream buffer lock parameters.
- */
-typedef struct _NV_ENC_LOCK_BITSTREAM
-{
-    uint32_t                version;                     /**< [in]: Struct version. Must be set to ::NV_ENC_LOCK_BITSTREAM_VER. */
-    uint32_t                doNotWait         :1;        /**< [in]: If this flag is set, the NvEncodeAPI interface will return buffer pointer even if operation is not completed. If not set, the call will block until operation completes. */
-    uint32_t                ltrFrame          :1;        /**< [out]: Flag indicating this frame is marked as LTR frame */
-    uint32_t                reservedBitFields :30;       /**< [in]: Reserved bit fields and must be set to 0 */
-    void*                   outputBitstream;             /**< [in]: Pointer to the bitstream buffer being locked. */
-    uint32_t*               sliceOffsets;                /**< [in,out]: Array which receives the slice offsets. This is not supported if NV_ENC_CONFIG_H264::sliceMode is 1 on Kepler GPUs. Array size must be equal to size of frame in MBs. */
-    uint32_t                frameIdx;                    /**< [out]: Frame no. for which the bitstream is being retrieved. */
-    uint32_t                hwEncodeStatus;              /**< [out]: The NvEncodeAPI interface status for the locked picture. */
-    uint32_t                numSlices;                   /**< [out]: Number of slices in the encoded picture. Will be reported only if NV_ENC_INITIALIZE_PARAMS::reportSliceOffsets set to 1. */
-    uint32_t                bitstreamSizeInBytes;        /**< [out]: Actual number of bytes generated and copied to the memory pointed by bitstreamBufferPtr. */
-    uint64_t                outputTimeStamp;             /**< [out]: Presentation timestamp associated with the encoded output. */
-    uint64_t                outputDuration;              /**< [out]: Presentation duration associates with the encoded output. */
-    void*                   bitstreamBufferPtr;          /**< [out]: Pointer to the generated output bitstream.
-                                                                     For MEOnly mode _NV_ENC_LOCK_BITSTREAM::bitstreamBufferPtr should be typecast to
-                                                                     NV_ENC_H264_MV_DATA/NV_ENC_HEVC_MV_DATA pointer respectively for H264/HEVC  */
-    NV_ENC_PIC_TYPE         pictureType;                 /**< [out]: Picture type of the encoded picture. */
-    NV_ENC_PIC_STRUCT       pictureStruct;               /**< [out]: Structure of the generated output picture. */
-    uint32_t                frameAvgQP;                  /**< [out]: Average QP of the frame. */
-    uint32_t                frameSatd;                   /**< [out]: Total SATD cost for whole frame. */
-    uint32_t                ltrFrameIdx;                 /**< [out]: Frame index associated with this LTR frame. */
-    uint32_t                ltrFrameBitmap;              /**< [out]: Bitmap of LTR frames indices which were used for encoding this frame. Value of 0 if no LTR frames were used. */
-    uint32_t                reserved [236];              /**< [in]: Reserved and must be set to 0 */
-    void*                   reserved2[64];               /**< [in]: Reserved and must be set to NULL */
-} NV_ENC_LOCK_BITSTREAM;
-
-/** Macro for constructing the version field of ::_NV_ENC_LOCK_BITSTREAM */
-#define NV_ENC_LOCK_BITSTREAM_VER NVENCAPI_STRUCT_VERSION(1)
-
-
-/**
- * \struct _NV_ENC_LOCK_INPUT_BUFFER
- * Uncompressed Input Buffer lock parameters.
- */
-typedef struct _NV_ENC_LOCK_INPUT_BUFFER
-{
-    uint32_t                  version;                   /**< [in]:  Struct version. Must be set to ::NV_ENC_LOCK_INPUT_BUFFER_VER. */
-    uint32_t                  doNotWait         :1;      /**< [in]:  Set to 1 to make ::NvEncLockInputBuffer() a unblocking call. If the encoding is not completed, driver will return ::NV_ENC_ERR_ENCODER_BUSY error code. */
-    uint32_t                  reservedBitFields :31;     /**< [in]:  Reserved bitfields and must be set to 0 */
-    NV_ENC_INPUT_PTR          inputBuffer;               /**< [in]:  Pointer to the input buffer to be locked, client should pass the pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource API. */
-    void*                     bufferDataPtr;             /**< [out]: Pointed to the locked input buffer data. Client can only access input buffer using the \p bufferDataPtr. */
-    uint32_t                  pitch;                     /**< [out]: Pitch of the locked input buffer. */
-    uint32_t                  reserved1[251];            /**< [in]:  Reserved and must be set to 0  */
-    void*                     reserved2[64];             /**< [in]:  Reserved and must be set to NULL  */
-} NV_ENC_LOCK_INPUT_BUFFER;
-
-/** Macro for constructing the version field of ::_NV_ENC_LOCK_INPUT_BUFFER */
-#define NV_ENC_LOCK_INPUT_BUFFER_VER NVENCAPI_STRUCT_VERSION(1)
-
-
-/**
- * \struct _NV_ENC_MAP_INPUT_RESOURCE
- * Map an input resource to a Nvidia Encoder Input Buffer
- */
-typedef struct _NV_ENC_MAP_INPUT_RESOURCE
-{
-    uint32_t                   version;                   /**< [in]:  Struct version. Must be set to ::NV_ENC_MAP_INPUT_RESOURCE_VER. */
-    uint32_t                   subResourceIndex;          /**< [in]:  Deprecated. Do not use. */
-    void*                      inputResource;             /**< [in]:  Deprecated. Do not use. */
-    NV_ENC_REGISTERED_PTR      registeredResource;        /**< [in]:  The Registered resource handle obtained by calling NvEncRegisterInputResource. */
-    NV_ENC_INPUT_PTR           mappedResource;            /**< [out]: Mapped pointer corresponding to the registeredResource. This pointer must be used in NV_ENC_PIC_PARAMS::inputBuffer parameter in ::NvEncEncodePicture() API. */
-    NV_ENC_BUFFER_FORMAT       mappedBufferFmt;           /**< [out]: Buffer format of the outputResource. This buffer format must be used in NV_ENC_PIC_PARAMS::bufferFmt if client using the above mapped resource pointer. */
-    uint32_t                   reserved1[251];            /**< [in]:  Reserved and must be set to 0. */
-    void*                      reserved2[63];             /**< [in]:  Reserved and must be set to NULL */
-} NV_ENC_MAP_INPUT_RESOURCE;
-
-/** Macro for constructing the version field of ::_NV_ENC_MAP_INPUT_RESOURCE */
-#define NV_ENC_MAP_INPUT_RESOURCE_VER NVENCAPI_STRUCT_VERSION(4)
-
-/**
- * \struct _NV_ENC_INPUT_RESOURCE_OPENGL_TEX
- * NV_ENC_REGISTER_RESOURCE::resourceToRegister must be a pointer to a variable of this type,
- * when NV_ENC_REGISTER_RESOURCE::resourceType is NV_ENC_INPUT_RESOURCE_TYPE_OPENGL_TEX
- */
-typedef struct _NV_ENC_INPUT_RESOURCE_OPENGL_TEX
-{
-    uint32_t texture;                                     /**< [in]: The name of the texture to be used. */
-    uint32_t target;                                      /**< [in]: Accepted values are GL_TEXTURE_RECTANGLE and GL_TEXTURE_2D. */
-} NV_ENC_INPUT_RESOURCE_OPENGL_TEX;
-
-/**
- * \struct _NV_ENC_REGISTER_RESOURCE
- * Register a resource for future use with the Nvidia Video Encoder Interface.
- */
-typedef struct _NV_ENC_REGISTER_RESOURCE
-{
-    uint32_t                    version;                        /**< [in]: Struct version. Must be set to ::NV_ENC_REGISTER_RESOURCE_VER. */
-    NV_ENC_INPUT_RESOURCE_TYPE  resourceType;                   /**< [in]: Specifies the type of resource to be registered.
-                                                                           Supported values are
-                                                                           ::NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX,
-                                                                           ::NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR,
-                                                                           ::NV_ENC_INPUT_RESOURCE_TYPE_OPENGL_TEX */
-    uint32_t                    width;                          /**< [in]: Input buffer Width. */
-    uint32_t                    height;                         /**< [in]: Input buffer Height. */
-    uint32_t                    pitch;                          /**< [in]: Input buffer Pitch.  */
-    uint32_t                    subResourceIndex;               /**< [in]: Subresource Index of the DirectX resource to be registered. Should be set to 0 for other interfaces. */
-    void*                       resourceToRegister;             /**< [in]: Handle to the resource that is being registered. */
-    NV_ENC_REGISTERED_PTR       registeredResource;             /**< [out]: Registered resource handle. This should be used in future interactions with the Nvidia Video Encoder Interface. */
-    NV_ENC_BUFFER_FORMAT        bufferFormat;                   /**< [in]: Buffer format of resource to be registered. */
-    uint32_t                    reserved1[248];                 /**< [in]: Reserved and must be set to 0. */
-    void*                       reserved2[62];                  /**< [in]: Reserved and must be set to NULL. */
-} NV_ENC_REGISTER_RESOURCE;
-
-/** Macro for constructing the version field of ::_NV_ENC_REGISTER_RESOURCE */
-#define NV_ENC_REGISTER_RESOURCE_VER NVENCAPI_STRUCT_VERSION(3)
-
-/**
- * \struct _NV_ENC_STAT
- * Encode Stats structure.
- */
-typedef struct _NV_ENC_STAT
-{
-    uint32_t            version;                         /**< [in]:  Struct version. Must be set to ::NV_ENC_STAT_VER. */
-    uint32_t            reserved;                        /**< [in]:  Reserved and must be set to 0 */
-    NV_ENC_OUTPUT_PTR   outputBitStream;                 /**< [out]: Specifies the pointer to output bitstream. */
-    uint32_t            bitStreamSize;                   /**< [out]: Size of generated bitstream in bytes. */
-    uint32_t            picType;                         /**< [out]: Picture type of encoded picture. See ::NV_ENC_PIC_TYPE. */
-    uint32_t            lastValidByteOffset;             /**< [out]: Offset of last valid bytes of completed bitstream */
-    uint32_t            sliceOffsets[16];                /**< [out]: Offsets of each slice */
-    uint32_t            picIdx;                          /**< [out]: Picture number */
-    uint32_t            reserved1[233];                  /**< [in]:  Reserved and must be set to 0 */
-    void*               reserved2[64];                   /**< [in]:  Reserved and must be set to NULL */
-} NV_ENC_STAT;
-
-/** Macro for constructing the version field of ::_NV_ENC_STAT */
-#define NV_ENC_STAT_VER NVENCAPI_STRUCT_VERSION(1)
-
-
-/**
- * \struct _NV_ENC_SEQUENCE_PARAM_PAYLOAD
- * Sequence and picture paramaters payload.
- */
-typedef struct _NV_ENC_SEQUENCE_PARAM_PAYLOAD
-{
-    uint32_t            version;                         /**< [in]:  Struct version. Must be set to ::NV_ENC_INITIALIZE_PARAMS_VER. */
-    uint32_t            inBufferSize;                    /**< [in]:  Specifies the size of the spsppsBuffer provied by the client */
-    uint32_t            spsId;                           /**< [in]:  Specifies the SPS id to be used in sequence header. Default value is 0.  */
-    uint32_t            ppsId;                           /**< [in]:  Specifies the PPS id to be used in picture header. Default value is 0.  */
-    void*               spsppsBuffer;                    /**< [in]:  Specifies bitstream header pointer of size NV_ENC_SEQUENCE_PARAM_PAYLOAD::inBufferSize. It is the client's responsibility to manage this memory. */
-    uint32_t*           outSPSPPSPayloadSize;            /**< [out]: Size of the sequence and picture header in  bytes written by the NvEncodeAPI interface to the SPSPPSBuffer. */
-    uint32_t            reserved [250];                  /**< [in]:  Reserved and must be set to 0 */
-    void*               reserved2[64];                   /**< [in]:  Reserved and must be set to NULL */
-} NV_ENC_SEQUENCE_PARAM_PAYLOAD;
-
-/** Macro for constructing the version field of ::_NV_ENC_SEQUENCE_PARAM_PAYLOAD */
-#define NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER NVENCAPI_STRUCT_VERSION(1)
-
-
-/**
- * Event registration/unregistration parameters.
- */
-typedef struct _NV_ENC_EVENT_PARAMS
-{
-    uint32_t            version;                          /**< [in]: Struct version. Must be set to ::NV_ENC_EVENT_PARAMS_VER. */
-    uint32_t            reserved;                         /**< [in]: Reserved and must be set to 0 */
-    void*               completionEvent;                  /**< [in]: Handle to event to be registered/unregistered with the NvEncodeAPI interface. */
-    uint32_t            reserved1[253];                   /**< [in]: Reserved and must be set to 0    */
-    void*               reserved2[64];                    /**< [in]: Reserved and must be set to NULL */
-} NV_ENC_EVENT_PARAMS;
-
-/** Macro for constructing the version field of ::_NV_ENC_EVENT_PARAMS */
-#define NV_ENC_EVENT_PARAMS_VER NVENCAPI_STRUCT_VERSION(1)
-
-/**
- * Encoder Session Creation parameters
- */
-typedef struct _NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS
-{
-    uint32_t            version;                          /**< [in]: Struct version. Must be set to ::NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER. */
-    NV_ENC_DEVICE_TYPE  deviceType;                       /**< [in]: Specified the device Type */
-    void*               device;                           /**< [in]: Pointer to client device. */
-    void*               reserved;                         /**< [in]: Reserved and must be set to 0. */
-    uint32_t            apiVersion;                       /**< [in]: API version. Should be set to NVENCAPI_VERSION. */
-    uint32_t            reserved1[253];                   /**< [in]: Reserved and must be set to 0    */
-    void*               reserved2[64];                    /**< [in]: Reserved and must be set to NULL */
-} NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS;
-/** Macro for constructing the version field of ::_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS */
-#define NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER NVENCAPI_STRUCT_VERSION(1)
-
-/** @} */ /* END ENCODER_STRUCTURE */
-
-
-/**
- * \addtogroup ENCODE_FUNC NvEncodeAPI Functions
- * @{
- */
-
-// NvEncOpenEncodeSession
-/**
- * \brief Opens an encoding session.
- *
- * Deprecated.
- *
- * \return
- * ::NV_ENC_ERR_INVALID_CALL\n
- *
- */
-NVENCSTATUS NVENCAPI NvEncOpenEncodeSession                     (void* device, uint32_t deviceType, void** encoder);
-
-// NvEncGetEncodeGuidCount
-/**
- * \brief Retrieves the number of supported encode GUIDs.
- *
- * The function returns the number of codec guids supported by the NvEncodeAPI
- * interface.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [out] encodeGUIDCount
- *   Number of supported encode GUIDs.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncGetEncodeGUIDCount                    (void* encoder, uint32_t* encodeGUIDCount);
-
-
-// NvEncGetEncodeGUIDs
-/**
- * \brief Retrieves an array of supported encoder codec GUIDs.
- *
- * The function returns an array of codec guids supported by the NvEncodeAPI interface.
- * The client must allocate an array where the NvEncodeAPI interface can
- * fill the supported guids and pass the pointer in \p *GUIDs parameter.
- * The size of the array can be determined by using ::NvEncGetEncodeGUIDCount() API.
- * The Nvidia Encoding interface returns the number of codec guids it has actually
- * filled in the guid array in the \p GUIDCount parameter.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] guidArraySize
- *   Number of GUIDs to retrieved. Should be set to the number retrieved using
- *   ::NvEncGetEncodeGUIDCount.
- * \param [out] GUIDs
- *   Array of supported Encode GUIDs.
- * \param [out] GUIDCount
- *   Number of supported Encode GUIDs.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncGetEncodeGUIDs                        (void* encoder, GUID* GUIDs, uint32_t guidArraySize, uint32_t* GUIDCount);
-
-
-// NvEncGetEncodeProfileGuidCount
-/**
- * \brief Retrieves the number of supported profile GUIDs.
- *
- * The function returns the number of profile GUIDs supported for a given codec.
- * The client must first enumerate the codec guids supported by the NvEncodeAPI
- * interface. After determining the codec guid, it can query the NvEncodeAPI
- * interface to determine the number of profile guids supported for a particular
- * codec guid.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] encodeGUID
- *   The codec guid for which the profile guids are being enumerated.
- * \param [out] encodeProfileGUIDCount
- *   Number of encode profiles supported for the given encodeGUID.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncGetEncodeProfileGUIDCount                    (void* encoder, GUID encodeGUID, uint32_t* encodeProfileGUIDCount);
-
-
-// NvEncGetEncodeProfileGUIDs
-/**
- * \brief Retrieves an array of supported encode profile GUIDs.
- *
- * The function returns an array of supported profile guids for a particular
- * codec guid. The client must allocate an array where the NvEncodeAPI interface
- * can populate the profile guids. The client can determine the array size using
- * ::NvEncGetEncodeProfileGUIDCount() API. The client must also validiate that the
- * NvEncodeAPI interface supports the GUID the client wants to pass as \p encodeGUID
- * parameter.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] encodeGUID
- *   The encode guid whose profile guids are being enumerated.
- * \param [in] guidArraySize
- *   Number of GUIDs to be retrieved. Should be set to the number retrieved using
- *   ::NvEncGetEncodeProfileGUIDCount.
- * \param [out] profileGUIDs
- *   Array of supported Encode Profile GUIDs
- * \param [out] GUIDCount
- *   Number of valid encode profile GUIDs in \p profileGUIDs array.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncGetEncodeProfileGUIDs                               (void* encoder, GUID encodeGUID, GUID* profileGUIDs, uint32_t guidArraySize, uint32_t* GUIDCount);
-
-// NvEncGetInputFormatCount
-/**
- * \brief Retrieve the number of supported Input formats.
- *
- * The function returns the number of supported input formats. The client must
- * query the NvEncodeAPI interface to determine the supported input formats
- * before creating the input surfaces.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] encodeGUID
- *   Encode GUID, corresponding to which the number of supported input formats
- *   is to be retrieved.
- * \param [out] inputFmtCount
- *   Number of input formats supported for specified Encode GUID.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_GENERIC \n
- */
-NVENCSTATUS NVENCAPI NvEncGetInputFormatCount                   (void* encoder, GUID encodeGUID, uint32_t* inputFmtCount);
-
-
-// NvEncGetInputFormats
-/**
- * \brief Retrieves an array of supported Input formats
- *
- * Returns an array of supported input formats  The client must use the input
- * format to create input surface using ::NvEncCreateInputBuffer() API.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] encodeGUID
- *   Encode GUID, corresponding to which the number of supported input formats
- *   is to be retrieved.
- *\param [in] inputFmtArraySize
- *   Size input format count array passed in \p inputFmts.
- *\param [out] inputFmts
- *   Array of input formats supported for this Encode GUID.
- *\param [out] inputFmtCount
- *   The number of valid input format types returned by the NvEncodeAPI
- *   interface in \p inputFmts array.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncGetInputFormats                       (void* encoder, GUID encodeGUID, NV_ENC_BUFFER_FORMAT* inputFmts, uint32_t inputFmtArraySize, uint32_t* inputFmtCount);
-
-
-// NvEncGetEncodeCaps
-/**
- * \brief Retrieves the capability value for a specified encoder attribute.
- *
- * The function returns the capability value for a given encoder attribute. The
- * client must validate the encodeGUID using ::NvEncGetEncodeGUIDs() API before
- * calling this function. The encoder attribute being queried are enumerated in
- * ::NV_ENC_CAPS_PARAM enum.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] encodeGUID
- *   Encode GUID, corresponding to which the capability attribute is to be retrieved.
- * \param [in] capsParam
- *   Used to specify attribute being queried. Refer ::NV_ENC_CAPS_PARAM for  more
- * details.
- * \param [out] capsVal
- *   The value corresponding to the capability attribute being queried.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_GENERIC \n
- */
-NVENCSTATUS NVENCAPI NvEncGetEncodeCaps                     (void* encoder, GUID encodeGUID, NV_ENC_CAPS_PARAM* capsParam, int* capsVal);
-
-
-// NvEncGetEncodePresetCount
-/**
- * \brief Retrieves the number of supported preset GUIDs.
- *
- * The function returns the number of preset GUIDs available for a given codec.
- * The client must validate the codec guid using ::NvEncGetEncodeGUIDs() API
- * before calling this function.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] encodeGUID
- *   Encode GUID, corresponding to which the number of supported presets is to
- *   be retrieved.
- * \param [out] encodePresetGUIDCount
- *   Receives the number of supported preset GUIDs.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncGetEncodePresetCount              (void* encoder, GUID encodeGUID, uint32_t* encodePresetGUIDCount);
-
-
-// NvEncGetEncodePresetGUIDs
-/**
- * \brief Receives an array of supported encoder preset GUIDs.
- *
- * The function returns an array of encode preset guids available for a given codec.
- * The client can directly use one of the preset guids based upon the use case
- * or target device. The preset guid chosen can be directly used in
- * NV_ENC_INITIALIZE_PARAMS::presetGUID parameter to ::NvEncEncodePicture() API.
- * Alternately client can  also use the preset guid to retrieve the encoding config
- * parameters being used by NvEncodeAPI interface for that given preset, using
- * ::NvEncGetEncodePresetConfig() API. It can then modify preset config parameters
- * as per its use case and send it to NvEncodeAPI interface as part of
- * NV_ENC_INITIALIZE_PARAMS::encodeConfig parameter for NvEncInitializeEncoder()
- * API.
- *
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] encodeGUID
- *   Encode GUID, corresponding to which the list of supported presets is to be
- *   retrieved.
- * \param [in] guidArraySize
- *   Size of array of preset guids passed in \p preset GUIDs
- * \param [out] presetGUIDs
- *   Array of supported Encode preset GUIDs from the NvEncodeAPI interface
- *   to client.
- * \param [out] encodePresetGUIDCount
- *   Receives the number of preset GUIDs returned by the NvEncodeAPI
- *   interface.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncGetEncodePresetGUIDs                  (void* encoder, GUID encodeGUID, GUID* presetGUIDs, uint32_t guidArraySize, uint32_t* encodePresetGUIDCount);
-
-
-// NvEncGetEncodePresetConfig
-/**
- * \brief Returns a preset config structure supported for given preset GUID.
- *
- * The function returns a preset config structure for a given preset guid. Before
- * using this function the client must enumerate the preset guids available for
- * a given codec. The preset config structure can be modified by the client depending
- * upon its use case and can be then used to initialize the encoder using
- * ::NvEncInitializeEncoder() API. The client can use this function only if it
- * wants to modify the NvEncodeAPI preset configuration, otherwise it can
- * directly use the preset guid.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] encodeGUID
- *   Encode GUID, corresponding to which the list of supported presets is to be
- *   retrieved.
- * \param [in] presetGUID
- *   Preset GUID, corresponding to which the Encoding configurations is to be
- *   retrieved.
- * \param [out] presetConfig
- *   The requested Preset Encoder Attribute set. Refer ::_NV_ENC_CONFIG for
-*    more details.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncGetEncodePresetConfig               (void* encoder, GUID encodeGUID, GUID  presetGUID, NV_ENC_PRESET_CONFIG* presetConfig);
-
-// NvEncInitializeEncoder
-/**
- * \brief Initialize the encoder.
- *
- * This API must be used to initialize the encoder. The initialization parameter
- * is passed using \p *createEncodeParams  The client must send the following
- * fields of the _NV_ENC_INITIALIZE_PARAMS structure with a valid value.
- * - NV_ENC_INITIALIZE_PARAMS::encodeGUID
- * - NV_ENC_INITIALIZE_PARAMS::encodeWidth
- * - NV_ENC_INITIALIZE_PARAMS::encodeHeight
- *
- * The client can pass a preset guid directly to the NvEncodeAPI interface using
- * NV_ENC_INITIALIZE_PARAMS::presetGUID field. If the client doesn't pass
- * NV_ENC_INITIALIZE_PARAMS::encodeConfig structure, the codec specific parameters
- * will be selected based on the preset guid. The preset guid must have been
- * validated by the client using ::NvEncGetEncodePresetGUIDs() API.
- * If the client passes a custom ::_NV_ENC_CONFIG structure through
- * NV_ENC_INITIALIZE_PARAMS::encodeConfig , it will override the codec specific parameters
- * based on the preset guid. It is recommended that even if the client passes a custom config,
- * it should also send a preset guid. In this case, the preset guid passed by the client
- * will not override any of the custom config parameters programmed by the client,
- * it is only used as a hint by the NvEncodeAPI interface to determine certain encoder parameters
- * which are not exposed to the client.
- *
- * There are two modes of operation for the encoder namely:
- * - Asynchronous mode
- * - Synchronous mode
- *
- * The client can select asynchronous or synchronous mode by setting the \p
- * enableEncodeAsync field in ::_NV_ENC_INITIALIZE_PARAMS to 1 or 0 respectively.
- *\par Asynchronous mode of operation:
- * The Asynchronous mode can be enabled by setting NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync to 1.
- * The client operating in asynchronous mode must allocate completion event object
- * for each output buffer and pass the completion event object in the
- * ::NvEncEncodePicture() API. The client can create another thread and wait on
- * the event object to be signalled by NvEncodeAPI interface on completion of the
- * encoding process for the output frame. This should unblock the main thread from
- * submitting work to the encoder. When the event is signalled the client can call
- * NvEncodeAPI interfaces to copy the bitstream data using ::NvEncLockBitstream()
- * API. This is the preferred mode of operation.
- *
- * NOTE: Asynchronous mode is not supported on Linux.
- *
- *\par Synchronous mode of operation:
- * The client can select synchronous mode by setting NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync to 0.
- * The client working in synchronous mode can work in a single threaded or multi
- * threaded mode. The client need not allocate any event objects. The client can
- * only lock the bitstream data after NvEncodeAPI interface has returned
- * ::NV_ENC_SUCCESS from encode picture. The NvEncodeAPI interface can return
- * ::NV_ENC_ERR_NEED_MORE_INPUT error code from ::NvEncEncodePicture() API. The
- * client must not lock the output buffer in such case but should send the next
- * frame for encoding. The client must keep on calling ::NvEncEncodePicture() API
- * until it returns ::NV_ENC_SUCCESS. \n
- * The client must always lock the bitstream data in order in which it has submitted.
- * This is true for both asynchronous and synchronous mode.
- *
- *\par Picture type decision:
- * If the client is taking the picture type decision and it must disable the picture
- * type decision module in NvEncodeAPI by setting NV_ENC_INITIALIZE_PARAMS::enablePTD
- * to 0. In this case the client is  required to send the picture in encoding
- * order to NvEncodeAPI by doing the re-ordering for B frames. \n
- * If the client doesn't want to take the picture type decision it can enable
- * picture type decision module in the NvEncodeAPI interface by setting
- * NV_ENC_INITIALIZE_PARAMS::enablePTD to 1 and send the input pictures in display
- * order.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] createEncodeParams
- *   Refer ::_NV_ENC_INITIALIZE_PARAMS for details.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncInitializeEncoder                     (void* encoder, NV_ENC_INITIALIZE_PARAMS* createEncodeParams);
-
-
-// NvEncCreateInputBuffer
-/**
- * \brief Allocates Input buffer.
- *
- * This function is used to allocate an input buffer. The client must enumerate
- * the input buffer format before allocating the input buffer resources. The
- * NV_ENC_INPUT_PTR returned by the NvEncodeAPI interface in the
- * NV_ENC_CREATE_INPUT_BUFFER::inputBuffer field can be directly used in
- * ::NvEncEncodePicture() API. The number of input buffers to be allocated by the
- * client must be at least 4 more than the number of B frames being used for encoding.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in,out] createInputBufferParams
- *  Pointer to the ::NV_ENC_CREATE_INPUT_BUFFER structure.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncCreateInputBuffer                     (void* encoder, NV_ENC_CREATE_INPUT_BUFFER* createInputBufferParams);
-
-
-// NvEncDestroyInputBuffer
-/**
- * \brief Release an input buffers.
- *
- * This function is used to free an input buffer. If the client has allocated
- * any input buffer using ::NvEncCreateInputBuffer() API, it must free those
- * input buffers by calling this function. The client must release the input
- * buffers before destroying the encoder using ::NvEncDestroyEncoder() API.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] inputBuffer
- *   Pointer to the input buffer to be released.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncDestroyInputBuffer                    (void* encoder, NV_ENC_INPUT_PTR inputBuffer);
-
-
-// NvEncCreateBitstreamBuffer
-/**
- * \brief Allocates an output bitstream buffer
- *
- * This function is used to allocate an output bitstream buffer and returns a
- * NV_ENC_OUTPUT_PTR to bitstream  buffer to the client in the
- * NV_ENC_CREATE_BITSTREAM_BUFFER::bitstreamBuffer field.
- * The client can only call this function after the encoder session has been
- * initialized using ::NvEncInitializeEncoder() API. The minimum number of output
- * buffers allocated by the client must be at least 4 more than the number of B
- * B frames being used for encoding. The client can only access the output
- * bitsteam data by locking the \p bitstreamBuffer using the ::NvEncLockBitstream()
- * function.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in,out] createBitstreamBufferParams
- *   Pointer ::NV_ENC_CREATE_BITSTREAM_BUFFER for details.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncCreateBitstreamBuffer                 (void* encoder, NV_ENC_CREATE_BITSTREAM_BUFFER* createBitstreamBufferParams);
-
-
-// NvEncDestroyBitstreamBuffer
-/**
- * \brief Release a bitstream buffer.
- *
- * This function is used to release the output bitstream buffer allocated using
- * the ::NvEncCreateBitstreamBuffer() function. The client must release the output
- * bitstreamBuffer using this function before destroying the encoder session.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] bitstreamBuffer
- *   Pointer to the bitstream buffer being released.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncDestroyBitstreamBuffer                (void* encoder, NV_ENC_OUTPUT_PTR bitstreamBuffer);
-
-// NvEncEncodePicture
-/**
- * \brief Submit an input picture for encoding.
- *
- * This function is used to submit an input picture buffer for encoding. The
- * encoding parameters are passed using \p *encodePicParams which is a pointer
- * to the ::_NV_ENC_PIC_PARAMS structure.
- *
- * If the client has set NV_ENC_INITIALIZE_PARAMS::enablePTD to 0, then it must
- * send a valid value for the following fields.
- * - NV_ENC_PIC_PARAMS::pictureType
- * - NV_ENC_PIC_PARAMS_H264::displayPOCSyntax (H264 only)
- * - NV_ENC_PIC_PARAMS_H264::frameNumSyntax(H264 only)
- * - NV_ENC_PIC_PARAMS_H264::refPicFlag(H264 only)
- *
- *
- *\par Asynchronous Encoding
- * If the client has enabled asynchronous mode of encoding by setting
- * NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync to 1 in the ::NvEncInitializeEncoder()
- * API ,then the client must send a valid NV_ENC_PIC_PARAMS::completionEvent.
- * Incase of asynchronous mode of operation, client can queue the ::NvEncEncodePicture()
- * API commands from the main thread and then queue output buffers to be processed
- * to a secondary worker thread. Before the locking the output buffers in the
- * secondary thread , the client must wait on NV_ENC_PIC_PARAMS::completionEvent
- * it has queued in ::NvEncEncodePicture() API call. The client must always process
- * completion event and the output buffer in the same order in which they have been
- * submitted for encoding. The NvEncodeAPI interface is responsible for any
- * re-ordering required for B frames and will always ensure that encoded bitstream
- * data is written in the same order in which output buffer is submitted.
- *\code
-  The below example shows how  asynchronous encoding in case of 1 B frames
-  ------------------------------------------------------------------------
-  Suppose the client allocated 4 input buffers(I1,I2..), 4 output buffers(O1,O2..)
-  and 4 completion events(E1, E2, ...). The NvEncodeAPI interface will need to
-  keep a copy of the input buffers for re-ordering and it allocates following
-  internal buffers (NvI1, NvI2...). These internal buffers are managed by NvEncodeAPI
-  and the client is not responsible for the allocating or freeing the memory of
-  the internal buffers.
-
-  a) The client main thread will queue the following encode frame calls.
-  Note the picture type is unknown to the client, the decision is being taken by
-  NvEncodeAPI interface. The client should pass ::_NV_ENC_PIC_PARAMS parameter
-  consisting of allocated input buffer, output buffer and output events in successive
-  ::NvEncEncodePicture() API calls along with other required encode picture params.
-  For example:
-  1st EncodePicture parameters - (I1, O1, E1)
-  2nd EncodePicture parameters - (I2, O2, E2)
-  3rd EncodePicture parameters - (I3, O3, E3)
-
-  b) NvEncodeAPI SW will receive the following encode Commands from the client.
-  The left side shows input from client in the form (Input buffer, Output Buffer,
-  Output Event). The right hand side shows a possible picture type decision take by
-  the NvEncodeAPI interface.
-  (I1, O1, E1)    ---P1 Frame
-  (I2, O2, E2)    ---B2 Frame
-  (I3, O3, E3)    ---P3 Frame
-
-  c) NvEncodeAPI interface will make a copy of the input buffers to its internal
-   buffersfor re-ordering. These copies are done as part of nvEncEncodePicture
-   function call from the client and NvEncodeAPI interface is responsible for
-   synchronization of copy operation with the actual encoding operation.
-   I1 --> NvI1
-   I2 --> NvI2
-   I3 --> NvI3
-
-  d) After returning from ::NvEncEncodePicture() call , the client must queue the output
-   bitstream  processing work to the secondary thread. The output bitstream processing
-   for asynchronous mode consist of first waiting on completion event(E1, E2..)
-   and then locking the output bitstream buffer(O1, O2..) for reading the encoded
-   data. The work queued to the secondary thread by the client is in the following order
-   (I1, O1, E1)
-   (I2, O2, E2)
-   (I3, O3, E3)
-   Note they are in the same order in which client calls ::NvEncEncodePicture() API
-   in \p step a).
-
-  e) NvEncodeAPI interface  will do the re-ordering such that Encoder HW will receive
-  the following encode commands:
-  (NvI1, O1, E1)   ---P1 Frame
-  (NvI3, O2, E2)   ---P3 Frame
-  (NvI2, O3, E3)   ---B2 frame
-
-  f) After the encoding operations are completed, the events will be signalled
-  by NvEncodeAPI interface in the following order :
-  (O1, E1) ---P1 Frame ,output bitstream copied to O1 and event E1 signalled.
-  (O2, E2) ---P3 Frame ,output bitstream copied to O2 and event E2 signalled.
-  (O3, E3) ---B2 Frame ,output bitstream copied to O3 and event E3 signalled.
-
-  g) The client must lock the bitstream data using ::NvEncLockBitstream() API in
-   the order O1,O2,O3  to read the encoded data, after waiting for the events
-   to be signalled in the same order i.e E1, E2 and E3.The output processing is
-   done in the secondary thread in the following order:
-   Waits on E1, copies encoded bitstream from O1
-   Waits on E2, copies encoded bitstream from O2
-   Waits on E3, copies encoded bitstream from O3
-
-  -Note the client will receive the events signalling and output buffer in the
-   same order in which they have submitted for encoding.
-  -Note the LockBitstream will have picture type field which will notify the
-   output picture type to the clients.
-  -Note the input, output buffer and the output completion event are free to be
-   reused once NvEncodeAPI interfaced has signalled the event and the client has
-   copied the data from the output buffer.
-
- * \endcode
- *
- *\par Synchronous Encoding
- * The client can enable synchronous mode of encoding by setting
- * NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync to 0 in ::NvEncInitializeEncoder() API.
- * The NvEncodeAPI interface may return ::NV_ENC_ERR_NEED_MORE_INPUT error code for
- * some ::NvEncEncodePicture() API calls when NV_ENC_INITIALIZE_PARAMS::enablePTD
- * is set to 1, but the client must not treat it as a fatal error. The NvEncodeAPI
- * interface might not be able to submit an input picture buffer for encoding
- * immediately due to re-ordering for B frames. The NvEncodeAPI interface cannot
- * submit the input picture which is decided to be encoded as B frame as it waits
- * for backward reference from  temporally subsequent frames. This input picture
- * is buffered internally and waits for more input picture to arrive. The client
- * must not call ::NvEncLockBitstream() API on the output buffers whose
- * ::NvEncEncodePicture() API returns ::NV_ENC_ERR_NEED_MORE_INPUT. The client must
- * wait for the NvEncodeAPI interface to return ::NV_ENC_SUCCESS before locking the
- * output bitstreams to read the encoded bitstream data. The following example
- * explains the scenario with synchronous encoding with 2 B frames.
- *\code
- The below example shows how  synchronous encoding works in case of 1 B frames
- -----------------------------------------------------------------------------
- Suppose the client allocated 4 input buffers(I1,I2..), 4 output buffers(O1,O2..)
- and 4 completion events(E1, E2, ...). The NvEncodeAPI interface will need to
- keep a copy of the input buffers for re-ordering and it allocates following
- internal buffers (NvI1, NvI2...). These internal buffers are managed by NvEncodeAPI
- and the client is not responsible for the allocating or freeing the memory of
- the internal buffers.
-
- The client calls ::NvEncEncodePicture() API with input buffer I1 and output buffer O1.
- The NvEncodeAPI decides to encode I1 as P frame and submits it to encoder
- HW and returns ::NV_ENC_SUCCESS.
- The client can now read the encoded data by locking the output O1 by calling
- NvEncLockBitstream API.
-
- The client calls ::NvEncEncodePicture() API with input buffer I2 and output buffer O2.
- The NvEncodeAPI decides to encode I2 as B frame and buffers I2 by copying it
- to internal buffer and returns ::NV_ENC_ERR_NEED_MORE_INPUT.
- The error is not fatal and it notifies client that it cannot read the encoded
- data by locking the output O2 by calling ::NvEncLockBitstream() API without submitting
- more work to the NvEncodeAPI interface.
-
- The client calls ::NvEncEncodePicture() with input buffer I3 and output buffer O3.
- The NvEncodeAPI decides to encode I3 as P frame and it first submits I3 for
- encoding which will be used as backward reference frame for I2.
- The NvEncodeAPI then submits I2 for encoding and returns ::NV_ENC_SUCESS. Both
- the submission are part of the same ::NvEncEncodePicture() function call.
- The client can now read the encoded data for both the frames by locking the output
- O2 followed by  O3 ,by calling ::NvEncLockBitstream() API.
-
- The client must always lock the output in the same order in which it has submitted
- to receive the encoded bitstream in correct encoding order.
-
- * \endcode
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in,out] encodePicParams
- *   Pointer to the ::_NV_ENC_PIC_PARAMS structure.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_ENCODER_BUSY \n
- * ::NV_ENC_ERR_NEED_MORE_INPUT \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncEncodePicture                         (void* encoder, NV_ENC_PIC_PARAMS* encodePicParams);
-
-
-// NvEncLockBitstream
-/**
- * \brief Lock output bitstream buffer
- *
- * This function is used to lock the bitstream buffer to read the encoded data.
- * The client can only access the encoded data by calling this function.
- * The pointer to client accessible encoded data is returned in the
- * NV_ENC_LOCK_BITSTREAM::bitstreamBufferPtr field. The size of the encoded data
- * in the output buffer is returned in the NV_ENC_LOCK_BITSTREAM::bitstreamSizeInBytes
- * The NvEncodeAPI interface also returns the output picture type and picture structure
- * of the encoded frame in NV_ENC_LOCK_BITSTREAM::pictureType and
- * NV_ENC_LOCK_BITSTREAM::pictureStruct fields respectively. If the client has
- * set NV_ENC_LOCK_BITSTREAM::doNotWait to 1, the function might return
- * ::NV_ENC_ERR_LOCK_BUSY if client is operating in synchronous mode. This is not
- * a fatal failure if NV_ENC_LOCK_BITSTREAM::doNotWait is set to 1. In the above case the client can
- * retry the function after few milliseconds.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in,out] lockBitstreamBufferParams
- *   Pointer to the ::_NV_ENC_LOCK_BITSTREAM structure.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_LOCK_BUSY \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncLockBitstream                         (void* encoder, NV_ENC_LOCK_BITSTREAM* lockBitstreamBufferParams);
-
-
-// NvEncUnlockBitstream
-/**
- * \brief Unlock the output bitstream buffer
- *
- * This function is used to unlock the output bitstream buffer after the client
- * has read the encoded data from output buffer. The client must call this function
- * to unlock the output buffer which it has previously locked using ::NvEncLockBitstream()
- * function. Using a locked bitstream buffer in ::NvEncEncodePicture() API will cause
- * the function to fail.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in,out] bitstreamBuffer
- *   bitstream buffer pointer being unlocked
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncUnlockBitstream                       (void* encoder, NV_ENC_OUTPUT_PTR bitstreamBuffer);
-
-
-// NvLockInputBuffer
-/**
- * \brief Locks an input buffer
- *
- * This function is used to lock the input buffer to load the uncompressed YUV
- * pixel data into input buffer memory. The client must pass the NV_ENC_INPUT_PTR
- * it had previously allocated using ::NvEncCreateInputBuffer()in the
- * NV_ENC_LOCK_INPUT_BUFFER::inputBuffer field.
- * The NvEncodeAPI interface returns pointer to client accessible input buffer
- * memory in NV_ENC_LOCK_INPUT_BUFFER::bufferDataPtr field.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in,out] lockInputBufferParams
- *   Pointer to the ::_NV_ENC_LOCK_INPUT_BUFFER structure
- *
- * \return
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_LOCK_BUSY \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncLockInputBuffer                      (void* encoder, NV_ENC_LOCK_INPUT_BUFFER* lockInputBufferParams);
-
-
-// NvUnlockInputBuffer
-/**
- * \brief Unlocks the input buffer
- *
- * This function is used to unlock the input buffer memory previously locked for
- * uploading YUV pixel data. The input buffer must be unlocked before being used
- * again for encoding, otherwise NvEncodeAPI will fail the ::NvEncEncodePicture()
- *
-  * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] inputBuffer
- *   Pointer to the input buffer that is being unlocked.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- *
- */
-NVENCSTATUS NVENCAPI NvEncUnlockInputBuffer                     (void* encoder, NV_ENC_INPUT_PTR inputBuffer);
-
-
-// NvEncGetEncodeStats
-/**
- * \brief Get encoding statistics.
- *
- * This function is used to retrieve the encoding statistics.
- * This API is not supported when encode device type is CUDA.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in,out] encodeStats
- *   Pointer to the ::_NV_ENC_STAT structure.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncGetEncodeStats                        (void* encoder, NV_ENC_STAT* encodeStats);
-
-
-// NvEncGetSequenceParams
-/**
- * \brief Get encoded sequence and picture header.
- *
- * This function can be used to retrieve the sequence and picture header out of
- * band. The client must call this function only after the encoder has been
- * initialized using ::NvEncInitializeEncoder() function. The client must
- * allocate the memory where the NvEncodeAPI interface can copy the bitstream
- * header and pass the pointer to the memory in NV_ENC_SEQUENCE_PARAM_PAYLOAD::spsppsBuffer.
- * The size of buffer is passed in the field  NV_ENC_SEQUENCE_PARAM_PAYLOAD::inBufferSize.
- * The NvEncodeAPI interface will copy the bitstream header payload and returns
- * the actual size of the bitstream header in the field
- * NV_ENC_SEQUENCE_PARAM_PAYLOAD::outSPSPPSPayloadSize.
- * The client must call  ::NvEncGetSequenceParams() function from the same thread which is
- * being used to call ::NvEncEncodePicture() function.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in,out] sequenceParamPayload
- *   Pointer to the ::_NV_ENC_SEQUENCE_PARAM_PAYLOAD structure.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncGetSequenceParams                     (void* encoder, NV_ENC_SEQUENCE_PARAM_PAYLOAD* sequenceParamPayload);
-
-
-// NvEncRegisterAsyncEvent
-/**
- * \brief Register event for notification to encoding completion.
- *
- * This function is used to register the completion event with NvEncodeAPI
- * interface. The event is required when the client has configured the encoder to
- * work in asynchronous mode. In this mode the client needs to send a completion
- * event with every output buffer. The NvEncodeAPI interface will signal the
- * completion of the encoding process using this event. Only after the event is
- * signalled the client can get the encoded data using ::NvEncLockBitstream() function.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] eventParams
- *   Pointer to the ::_NV_ENC_EVENT_PARAMS structure.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncRegisterAsyncEvent                    (void* encoder, NV_ENC_EVENT_PARAMS* eventParams);
-
-
-// NvEncUnregisterAsyncEvent
-/**
- * \brief Unregister completion event.
- *
- * This function is used to unregister completion event which has been previously
- * registered using ::NvEncRegisterAsyncEvent() function. The client must unregister
- * all events before destroying the encoder using ::NvEncDestroyEncoder() function.
- *
-  * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] eventParams
- *   Pointer to the ::_NV_ENC_EVENT_PARAMS structure.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncUnregisterAsyncEvent                  (void* encoder, NV_ENC_EVENT_PARAMS* eventParams);
-
-
-// NvEncMapInputResource
-/**
- * \brief Map an externally created input resource pointer for encoding.
- *
- * Maps an externally allocated input resource [using and returns a NV_ENC_INPUT_PTR
- * which can be used for encoding in the ::NvEncEncodePicture() function. The
- * mapped resource is returned in the field NV_ENC_MAP_INPUT_RESOURCE::outputResourcePtr.
- * The NvEncodeAPI interface also returns the buffer format of the mapped resource
- * in the field NV_ENC_MAP_INPUT_RESOURCE::outbufferFmt.
- * This function provides synchronization guarantee that any graphics or compute
- * work submitted on the input buffer is completed before the buffer is used for encoding.
- * The client should not access any input buffer while they are mapped by the encoder.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in,out] mapInputResParams
- *   Pointer to the ::_NV_ENC_MAP_INPUT_RESOURCE structure.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_RESOURCE_NOT_REGISTERED \n
- * ::NV_ENC_ERR_MAP_FAILED \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncMapInputResource                         (void* encoder, NV_ENC_MAP_INPUT_RESOURCE* mapInputResParams);
-
-
-// NvEncUnmapInputResource
-/**
- * \brief  UnMaps a NV_ENC_INPUT_PTR  which was mapped for encoding
- *
- *
- * UnMaps an input buffer which was previously mapped using ::NvEncMapInputResource()
- * API. The mapping created using ::NvEncMapInputResource() should be invalidated
- * using this API before the external resource is destroyed by the client. The client
- * must unmap the buffer after ::NvEncLockBitstream() API returns succuessfully for encode
- * work submitted using the mapped input buffer.
- *
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] mappedInputBuffer
- *   Pointer to the NV_ENC_INPUT_PTR
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_RESOURCE_NOT_REGISTERED \n
- * ::NV_ENC_ERR_RESOURCE_NOT_MAPPED \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncUnmapInputResource                         (void* encoder, NV_ENC_INPUT_PTR mappedInputBuffer);
-
-// NvEncDestroyEncoder
-/**
- * \brief Destroy Encoding Session
- *
- * Destroys the encoder session previously created using ::NvEncOpenEncodeSession()
- * function. The client must flush the encoder before freeing any resources. In order
- * to flush the encoder the client must pass a NULL encode picture packet and either
- * wait for the ::NvEncEncodePicture() function to return in synchronous mode or wait
- * for the flush event to be signaled by the encoder in asynchronous mode.
- * The client must free all the input and output resources created using the
- * NvEncodeAPI interface before destroying the encoder. If the client is operating
- * in asynchronous mode, it must also unregister the completion events previously
- * registered.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncDestroyEncoder                        (void* encoder);
-
-// NvEncInvalidateRefFrames
-/**
- * \brief Invalidate reference frames
- *
- * Invalidates reference frame based on the time stamp provided by the client.
- * The encoder marks any reference frames or any frames which have been reconstructed
- * using the corrupt frame as invalid for motion estimation and uses older reference
- * frames for motion estimation. The encoded forces the current frame to be encoded
- * as an intra frame if no reference frames are left after invalidation process.
- * This is useful for low latency application for error resiliency. The client
- * is recommended to set NV_ENC_CONFIG_H264::maxNumRefFrames to a large value so
- * that encoder can keep a backup of older reference frames in the DPB and can use them
- * for motion estimation when the newer reference frames have been invalidated.
- * This API can be called multiple times.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] invalidRefFrameTimeStamp
- *   Timestamp of the invalid reference frames which needs to be invalidated.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncInvalidateRefFrames(void* encoder, uint64_t invalidRefFrameTimeStamp);
-
-// NvEncOpenEncodeSessionEx
-/**
- * \brief Opens an encoding session.
- *
- * Opens an encoding session and returns a pointer to the encoder interface in
- * the \p **encoder parameter. The client should start encoding process by calling
- * this API first.
- * The client must pass a pointer to IDirect3DDevice9 device or CUDA context in the \p *device parameter.
- * For the OpenGL interface, \p device must be NULL. An OpenGL context must be current when
- * calling all NvEncodeAPI functions.
- * If the creation of encoder session fails, the client must call ::NvEncDestroyEncoder API
- * before exiting.
- *
- * \param [in] openSessionExParams
- *    Pointer to a ::NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS structure.
- * \param [out] encoder
- *    Encode Session pointer to the NvEncodeAPI interface.
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_NO_ENCODE_DEVICE \n
- * ::NV_ENC_ERR_UNSUPPORTED_DEVICE \n
- * ::NV_ENC_ERR_INVALID_DEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncOpenEncodeSessionEx                   (NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS *openSessionExParams, void** encoder);
-
-// NvEncRegisterResource
-/**
- * \brief Registers a resource with the Nvidia Video Encoder Interface.
- *
- * Registers a resource with the Nvidia Video Encoder Interface for book keeping.
- * The client is expected to pass the registered resource handle as well, while calling ::NvEncMapInputResource API.
- *
- * \param [in] encoder
- *   Pointer to the NVEncodeAPI interface.
- *
- * \param [in] registerResParams
- *   Pointer to a ::_NV_ENC_REGISTER_RESOURCE structure
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_RESOURCE_REGISTER_FAILED \n
- * ::NV_ENC_ERR_GENERIC \n
- * ::NV_ENC_ERR_UNIMPLEMENTED \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncRegisterResource                      (void* encoder, NV_ENC_REGISTER_RESOURCE* registerResParams);
-
-// NvEncUnregisterResource
-/**
- * \brief Unregisters a resource previously registered with the Nvidia Video Encoder Interface.
- *
- * Unregisters a resource previously registered with the Nvidia Video Encoder Interface.
- * The client is expected to unregister any resource that it has registered with the
- * Nvidia Video Encoder Interface before destroying the resource.
- *
- * \param [in] encoder
- *   Pointer to the NVEncodeAPI interface.
- *
- * \param [in] registeredResource
- *   The registered resource pointer that was returned in ::NvEncRegisterResource.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_RESOURCE_NOT_REGISTERED \n
- * ::NV_ENC_ERR_GENERIC \n
- * ::NV_ENC_ERR_UNIMPLEMENTED \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncUnregisterResource                    (void* encoder, NV_ENC_REGISTERED_PTR registeredResource);
-
-// NvEncReconfigureEncoder
-/**
- * \brief Reconfigure an existing encoding session.
- *
- * Reconfigure an existing encoding session.
- * The client should call this API to change/reconfigure the parameter passed during
- * NvEncInitializeEncoder API call.
- * Currently Reconfiguration of following are not supported.
- * Change in GOP structure.
- * Change in sync-Async mode.
- * Change in MaxWidth & MaxHeight.
- * Change in PTDmode.
- *
- * Resolution change is possible only if maxEncodeWidth & maxEncodeHeight of NV_ENC_INITIALIZE_PARAMS
- * is set while creating encoder session.
- *
- * \param [in] encoder
- *   Pointer to the NVEncodeAPI interface.
- *
- * \param [in] reInitEncodeParams
- *    Pointer to a ::NV_ENC_RECONFIGURE_PARAMS structure.
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_NO_ENCODE_DEVICE \n
- * ::NV_ENC_ERR_UNSUPPORTED_DEVICE \n
- * ::NV_ENC_ERR_INVALID_DEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_GENERIC \n
- *
- */
-NVENCSTATUS NVENCAPI NvEncReconfigureEncoder                   (void *encoder, NV_ENC_RECONFIGURE_PARAMS* reInitEncodeParams);
-
-
-
-// NvEncCreateMVBuffer
-/**
- * \brief Allocates output MV buffer for ME only mode.
- *
- * This function is used to allocate an output MV buffer. The size of the mvBuffer is
- * dependent on the frame height and width of the last ::NvEncCreateInputBuffer() call.
- * The NV_ENC_OUTPUT_PTR returned by the NvEncodeAPI interface in the
- * ::NV_ENC_CREATE_MV_BUFFER::mvBuffer field should be used in
- * ::NvEncRunMotionEstimationOnly() API.
- * Client must lock ::NV_ENC_CREATE_MV_BUFFER::mvBuffer using ::NvEncLockBitstream() API to get the motion vector data.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in,out] createMVBufferParams
- *  Pointer to the ::NV_ENC_CREATE_MV_BUFFER structure.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_GENERIC \n
- */
-NVENCSTATUS NVENCAPI NvEncCreateMVBuffer                        (void* encoder, NV_ENC_CREATE_MV_BUFFER* createMVBufferParams);
-
-
-// NvEncDestroyMVBuffer
-/**
- * \brief Release an output MV buffer for ME only mode.
- *
- * This function is used to release the output MV buffer allocated using
- * the ::NvEncCreateMVBuffer() function. The client must release the output
- * mvBuffer using this function before destroying the encoder session.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] mvBuffer
- *   Pointer to the mvBuffer being released.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_GENERIC \n
- */
-NVENCSTATUS NVENCAPI NvEncDestroyMVBuffer                       (void* encoder, NV_ENC_OUTPUT_PTR mvBuffer);
-
-
-// NvEncRunMotionEstimationOnly
-/**
- * \brief Submit an input picture and reference frame for motion estimation in ME only mode.
- *
- * This function is used to submit the input frame and reference frame for motion
- * estimation. The ME parameters are passed using *meOnlyParams which is a pointer
- * to ::_NV_ENC_MEONLY_PARAMS structure.
- * Client must lock ::NV_ENC_CREATE_MV_BUFFER::mvBuffer using ::NvEncLockBitstream() API to get the motion vector data.
- * to get motion vector data.
- *
- * \param [in] encoder
- *   Pointer to the NvEncodeAPI interface.
- * \param [in] meOnlyParams
- *   Pointer to the ::_NV_ENC_MEONLY_PARAMS structure.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n
- * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n
- * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n
- * ::NV_ENC_ERR_OUT_OF_MEMORY \n
- * ::NV_ENC_ERR_INVALID_PARAM \n
- * ::NV_ENC_ERR_INVALID_VERSION \n
- * ::NV_ENC_ERR_NEED_MORE_INPUT \n
- * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n
- * ::NV_ENC_ERR_GENERIC \n
- */
-NVENCSTATUS NVENCAPI NvEncRunMotionEstimationOnly               (void* encoder, NV_ENC_MEONLY_PARAMS* meOnlyParams);
-
-// NvEncodeAPIGetMaxSupportedVersion
-/**
- * \brief Get the largest NvEncodeAPI version supported by the driver.
- *
- * This function can be used by clients to determine if the driver supports
- * the NvEncodeAPI header the application was compiled with.
- *
- * \param [out] version
- *   Pointer to the requested value. The 4 least significant bits in the returned
- *   indicate the minor version and the rest of the bits indicate the major
- *   version of the largest supported version.
- *
- * \return
- * ::NV_ENC_SUCCESS \n
- * ::NV_ENC_ERR_INVALID_PTR \n
- */
-NVENCSTATUS NVENCAPI NvEncodeAPIGetMaxSupportedVersion          (uint32_t* version);
-
-
-/// \cond API PFN
-/*
- *  Defines API function pointers
- */
-typedef NVENCSTATUS (NVENCAPI* PNVENCOPENENCODESESSION)         (void* device, uint32_t deviceType, void** encoder);
-typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODEGUIDCOUNT)        (void* encoder, uint32_t* encodeGUIDCount);
-typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODEGUIDS)            (void* encoder, GUID* GUIDs, uint32_t guidArraySize, uint32_t* GUIDCount);
-typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODEPROFILEGUIDCOUNT) (void* encoder, GUID encodeGUID, uint32_t* encodeProfileGUIDCount);
-typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODEPROFILEGUIDS)     (void* encoder, GUID encodeGUID, GUID* profileGUIDs, uint32_t guidArraySize, uint32_t* GUIDCount);
-typedef NVENCSTATUS (NVENCAPI* PNVENCGETINPUTFORMATCOUNT)       (void* encoder, GUID encodeGUID, uint32_t* inputFmtCount);
-typedef NVENCSTATUS (NVENCAPI* PNVENCGETINPUTFORMATS)           (void* encoder, GUID encodeGUID, NV_ENC_BUFFER_FORMAT* inputFmts, uint32_t inputFmtArraySize, uint32_t* inputFmtCount);
-typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODECAPS)             (void* encoder, GUID encodeGUID, NV_ENC_CAPS_PARAM* capsParam, int* capsVal);
-typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODEPRESETCOUNT)      (void* encoder, GUID encodeGUID, uint32_t* encodePresetGUIDCount);
-typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODEPRESETGUIDS)      (void* encoder, GUID encodeGUID, GUID* presetGUIDs, uint32_t guidArraySize, uint32_t* encodePresetGUIDCount);
-typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODEPRESETCONFIG)     (void* encoder, GUID encodeGUID, GUID  presetGUID, NV_ENC_PRESET_CONFIG* presetConfig);
-typedef NVENCSTATUS (NVENCAPI* PNVENCINITIALIZEENCODER)         (void* encoder, NV_ENC_INITIALIZE_PARAMS* createEncodeParams);
-typedef NVENCSTATUS (NVENCAPI* PNVENCCREATEINPUTBUFFER)         (void* encoder, NV_ENC_CREATE_INPUT_BUFFER* createInputBufferParams);
-typedef NVENCSTATUS (NVENCAPI* PNVENCDESTROYINPUTBUFFER)        (void* encoder, NV_ENC_INPUT_PTR inputBuffer);
-typedef NVENCSTATUS (NVENCAPI* PNVENCCREATEBITSTREAMBUFFER)     (void* encoder, NV_ENC_CREATE_BITSTREAM_BUFFER* createBitstreamBufferParams);
-typedef NVENCSTATUS (NVENCAPI* PNVENCDESTROYBITSTREAMBUFFER)    (void* encoder, NV_ENC_OUTPUT_PTR bitstreamBuffer);
-typedef NVENCSTATUS (NVENCAPI* PNVENCENCODEPICTURE)             (void* encoder, NV_ENC_PIC_PARAMS* encodePicParams);
-typedef NVENCSTATUS (NVENCAPI* PNVENCLOCKBITSTREAM)             (void* encoder, NV_ENC_LOCK_BITSTREAM* lockBitstreamBufferParams);
-typedef NVENCSTATUS (NVENCAPI* PNVENCUNLOCKBITSTREAM)           (void* encoder, NV_ENC_OUTPUT_PTR bitstreamBuffer);
-typedef NVENCSTATUS (NVENCAPI* PNVENCLOCKINPUTBUFFER)           (void* encoder, NV_ENC_LOCK_INPUT_BUFFER* lockInputBufferParams);
-typedef NVENCSTATUS (NVENCAPI* PNVENCUNLOCKINPUTBUFFER)         (void* encoder, NV_ENC_INPUT_PTR inputBuffer);
-typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODESTATS)            (void* encoder, NV_ENC_STAT* encodeStats);
-typedef NVENCSTATUS (NVENCAPI* PNVENCGETSEQUENCEPARAMS)         (void* encoder, NV_ENC_SEQUENCE_PARAM_PAYLOAD* sequenceParamPayload);
-typedef NVENCSTATUS (NVENCAPI* PNVENCREGISTERASYNCEVENT)        (void* encoder, NV_ENC_EVENT_PARAMS* eventParams);
-typedef NVENCSTATUS (NVENCAPI* PNVENCUNREGISTERASYNCEVENT)      (void* encoder, NV_ENC_EVENT_PARAMS* eventParams);
-typedef NVENCSTATUS (NVENCAPI* PNVENCMAPINPUTRESOURCE)          (void* encoder, NV_ENC_MAP_INPUT_RESOURCE* mapInputResParams);
-typedef NVENCSTATUS (NVENCAPI* PNVENCUNMAPINPUTRESOURCE)        (void* encoder, NV_ENC_INPUT_PTR mappedInputBuffer);
-typedef NVENCSTATUS (NVENCAPI* PNVENCDESTROYENCODER)            (void* encoder);
-typedef NVENCSTATUS (NVENCAPI* PNVENCINVALIDATEREFFRAMES)       (void* encoder, uint64_t invalidRefFrameTimeStamp);
-typedef NVENCSTATUS (NVENCAPI* PNVENCOPENENCODESESSIONEX)       (NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS *openSessionExParams, void** encoder);
-typedef NVENCSTATUS (NVENCAPI* PNVENCREGISTERRESOURCE)          (void* encoder, NV_ENC_REGISTER_RESOURCE* registerResParams);
-typedef NVENCSTATUS (NVENCAPI* PNVENCUNREGISTERRESOURCE)        (void* encoder, NV_ENC_REGISTERED_PTR registeredRes);
-typedef NVENCSTATUS (NVENCAPI* PNVENCRECONFIGUREENCODER)        (void* encoder, NV_ENC_RECONFIGURE_PARAMS* reInitEncodeParams);
-
-typedef NVENCSTATUS (NVENCAPI* PNVENCCREATEMVBUFFER)            (void* encoder, NV_ENC_CREATE_MV_BUFFER* createMVBufferParams);
-typedef NVENCSTATUS (NVENCAPI* PNVENCDESTROYMVBUFFER)           (void* encoder, NV_ENC_OUTPUT_PTR mvBuffer);
-typedef NVENCSTATUS (NVENCAPI* PNVENCRUNMOTIONESTIMATIONONLY)   (void* encoder, NV_ENC_MEONLY_PARAMS* meOnlyParams);
-
-
-/// \endcond
-
-
-/** @} */ /* END ENCODE_FUNC */
-
-/**
- * \ingroup ENCODER_STRUCTURE
- * NV_ENCODE_API_FUNCTION_LIST
- */
-typedef struct _NV_ENCODE_API_FUNCTION_LIST
-{
-    uint32_t                        version;                           /**< [in]: Client should pass NV_ENCODE_API_FUNCTION_LIST_VER.                               */
-    uint32_t                        reserved;                          /**< [in]: Reserved and should be set to 0.                                                  */
-    PNVENCOPENENCODESESSION         nvEncOpenEncodeSession;            /**< [out]: Client should access ::NvEncOpenEncodeSession() API through this pointer.        */
-    PNVENCGETENCODEGUIDCOUNT        nvEncGetEncodeGUIDCount;           /**< [out]: Client should access ::NvEncGetEncodeGUIDCount() API through this pointer.       */
-    PNVENCGETENCODEPRESETCOUNT      nvEncGetEncodeProfileGUIDCount;    /**< [out]: Client should access ::NvEncGetEncodeProfileGUIDCount() API through this pointer.*/
-    PNVENCGETENCODEPRESETGUIDS      nvEncGetEncodeProfileGUIDs;        /**< [out]: Client should access ::NvEncGetEncodeProfileGUIDs() API through this pointer.    */
-    PNVENCGETENCODEGUIDS            nvEncGetEncodeGUIDs;               /**< [out]: Client should access ::NvEncGetEncodeGUIDs() API through this pointer.           */
-    PNVENCGETINPUTFORMATCOUNT       nvEncGetInputFormatCount;          /**< [out]: Client should access ::NvEncGetInputFormatCount() API through this pointer.      */
-    PNVENCGETINPUTFORMATS           nvEncGetInputFormats;              /**< [out]: Client should access ::NvEncGetInputFormats() API through this pointer.          */
-    PNVENCGETENCODECAPS             nvEncGetEncodeCaps;                /**< [out]: Client should access ::NvEncGetEncodeCaps() API through this pointer.            */
-    PNVENCGETENCODEPRESETCOUNT      nvEncGetEncodePresetCount;         /**< [out]: Client should access ::NvEncGetEncodePresetCount() API through this pointer.     */
-    PNVENCGETENCODEPRESETGUIDS      nvEncGetEncodePresetGUIDs;         /**< [out]: Client should access ::NvEncGetEncodePresetGUIDs() API through this pointer.     */
-    PNVENCGETENCODEPRESETCONFIG     nvEncGetEncodePresetConfig;        /**< [out]: Client should access ::NvEncGetEncodePresetConfig() API through this pointer.    */
-    PNVENCINITIALIZEENCODER         nvEncInitializeEncoder;            /**< [out]: Client should access ::NvEncInitializeEncoder() API through this pointer.        */
-    PNVENCCREATEINPUTBUFFER         nvEncCreateInputBuffer;            /**< [out]: Client should access ::NvEncCreateInputBuffer() API through this pointer.        */
-    PNVENCDESTROYINPUTBUFFER        nvEncDestroyInputBuffer;           /**< [out]: Client should access ::NvEncDestroyInputBuffer() API through this pointer.       */
-    PNVENCCREATEBITSTREAMBUFFER     nvEncCreateBitstreamBuffer;        /**< [out]: Client should access ::NvEncCreateBitstreamBuffer() API through this pointer.    */
-    PNVENCDESTROYBITSTREAMBUFFER    nvEncDestroyBitstreamBuffer;       /**< [out]: Client should access ::NvEncDestroyBitstreamBuffer() API through this pointer.   */
-    PNVENCENCODEPICTURE             nvEncEncodePicture;                /**< [out]: Client should access ::NvEncEncodePicture() API through this pointer.            */
-    PNVENCLOCKBITSTREAM             nvEncLockBitstream;                /**< [out]: Client should access ::NvEncLockBitstream() API through this pointer.            */
-    PNVENCUNLOCKBITSTREAM           nvEncUnlockBitstream;              /**< [out]: Client should access ::NvEncUnlockBitstream() API through this pointer.          */
-    PNVENCLOCKINPUTBUFFER           nvEncLockInputBuffer;              /**< [out]: Client should access ::NvEncLockInputBuffer() API through this pointer.          */
-    PNVENCUNLOCKINPUTBUFFER         nvEncUnlockInputBuffer;            /**< [out]: Client should access ::NvEncUnlockInputBuffer() API through this pointer.        */
-    PNVENCGETENCODESTATS            nvEncGetEncodeStats;               /**< [out]: Client should access ::NvEncGetEncodeStats() API through this pointer.           */
-    PNVENCGETSEQUENCEPARAMS         nvEncGetSequenceParams;            /**< [out]: Client should access ::NvEncGetSequenceParams() API through this pointer.        */
-    PNVENCREGISTERASYNCEVENT        nvEncRegisterAsyncEvent;           /**< [out]: Client should access ::NvEncRegisterAsyncEvent() API through this pointer.       */
-    PNVENCUNREGISTERASYNCEVENT      nvEncUnregisterAsyncEvent;         /**< [out]: Client should access ::NvEncUnregisterAsyncEvent() API through this pointer.     */
-    PNVENCMAPINPUTRESOURCE          nvEncMapInputResource;             /**< [out]: Client should access ::NvEncMapInputResource() API through this pointer.         */
-    PNVENCUNMAPINPUTRESOURCE        nvEncUnmapInputResource;           /**< [out]: Client should access ::NvEncUnmapInputResource() API through this pointer.       */
-    PNVENCDESTROYENCODER            nvEncDestroyEncoder;               /**< [out]: Client should access ::NvEncDestroyEncoder() API through this pointer.           */
-    PNVENCINVALIDATEREFFRAMES       nvEncInvalidateRefFrames;          /**< [out]: Client should access ::NvEncInvalidateRefFrames() API through this pointer.      */
-    PNVENCOPENENCODESESSIONEX       nvEncOpenEncodeSessionEx;          /**< [out]: Client should access ::NvEncOpenEncodeSession() API through this pointer.        */
-    PNVENCREGISTERRESOURCE          nvEncRegisterResource;             /**< [out]: Client should access ::NvEncRegisterResource() API through this pointer.         */
-    PNVENCUNREGISTERRESOURCE        nvEncUnregisterResource;           /**< [out]: Client should access ::NvEncUnregisterResource() API through this pointer.       */
-    PNVENCRECONFIGUREENCODER        nvEncReconfigureEncoder;           /**< [out]: Client should access ::NvEncReconfigureEncoder() API through this pointer.       */
-    void*                           reserved1;
-    PNVENCCREATEMVBUFFER            nvEncCreateMVBuffer;               /**< [out]: Client should access ::NvEncCreateMVBuffer API through this pointer.             */
-    PNVENCDESTROYMVBUFFER           nvEncDestroyMVBuffer;              /**< [out]: Client should access ::NvEncDestroyMVBuffer API through this pointer.            */
-    PNVENCRUNMOTIONESTIMATIONONLY   nvEncRunMotionEstimationOnly;      /**< [out]: Client should access ::NvEncRunMotionEstimationOnly API through this pointer.    */
-    void*                           reserved2[281];                    /**< [in]:  Reserved and must be set to NULL                                                 */
-} NV_ENCODE_API_FUNCTION_LIST;
-
-/** Macro for constructing the version field of ::_NV_ENCODEAPI_FUNCTION_LIST. */
-#define NV_ENCODE_API_FUNCTION_LIST_VER NVENCAPI_STRUCT_VERSION(2)
-
-// NvEncodeAPICreateInstance
-/**
- * \ingroup ENCODE_FUNC
- * Entry Point to the NvEncodeAPI interface.
- *
- * Creates an instance of the NvEncodeAPI interface, and populates the
- * pFunctionList with function pointers to the API routines implemented by the
- * NvEncodeAPI interface.
- *
- * \param [out] functionList
- *
- * \return
- * ::NV_ENC_SUCCESS
- * ::NV_ENC_ERR_INVALID_PTR
- */
-NVENCSTATUS NVENCAPI NvEncodeAPICreateInstance(NV_ENCODE_API_FUNCTION_LIST *functionList);
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#endif
-

diff --git a/compat/os2threads.h b/compat/os2threads.h
index 40a119f..2177a03 100644
--- a/compat/os2threads.h
+++ b/compat/os2threads.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 KO Myung-Hun <komh@chollian.net>
+ * Copyright (c) 2011-2017 KO Myung-Hun <komh@chollian.net>
  *
  * This file is part of FFmpeg.
  *
@@ -46,9 +46,11 @@
 
 typedef void pthread_attr_t;
 
-typedef HMTX pthread_mutex_t;
+typedef _fmutex pthread_mutex_t;
 typedef void pthread_mutexattr_t;
 
+#define PTHREAD_MUTEX_INITIALIZER _FMUTEX_INITIALIZER
+
 typedef struct {
     HEV event_sem;
     HEV ack_sem;
@@ -98,28 +100,28 @@
 static av_always_inline int pthread_mutex_init(pthread_mutex_t *mutex,
                                                const pthread_mutexattr_t *attr)
 {
-    DosCreateMutexSem(NULL, (PHMTX)mutex, 0, FALSE);
+    _fmutex_create(mutex, 0);
 
     return 0;
 }
 
 static av_always_inline int pthread_mutex_destroy(pthread_mutex_t *mutex)
 {
-    DosCloseMutexSem(*(PHMTX)mutex);
+    _fmutex_close(mutex);
 
     return 0;
 }
 
 static av_always_inline int pthread_mutex_lock(pthread_mutex_t *mutex)
 {
-    DosRequestMutexSem(*(PHMTX)mutex, SEM_INDEFINITE_WAIT);
+    _fmutex_request(mutex, 0);
 
     return 0;
 }
 
 static av_always_inline int pthread_mutex_unlock(pthread_mutex_t *mutex)
 {
-    DosReleaseMutexSem(*(PHMTX)mutex);
+    _fmutex_release(mutex);
 
     return 0;
 }

diff --git a/compat/tms470/math.h b/compat/tms470/math.h
deleted file mode 100644
index 0a42743..0000000
--- a/compat/tms470/math.h
+++ /dev/null

@@ -1,30 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef COMPAT_TMS470_MATH_H
-#define COMPAT_TMS470_MATH_H
-
-#include_next <math.h>
-
-#undef INFINITY
-#undef NAN
-
-#define INFINITY (*(const float*)((const unsigned []){ 0x7f800000 }))
-#define NAN      (*(const float*)((const unsigned []){ 0x7fc00000 }))
-
-#endif /* COMPAT_TMS470_MATH_H */

diff --git a/compat/w32dlfcn.h b/compat/w32dlfcn.h
index 78cc8f4..c83bdc9 100644
--- a/compat/w32dlfcn.h
+++ b/compat/w32dlfcn.h

@@ -21,6 +21,7 @@
 
 #ifdef _WIN32
 #include <windows.h>
+#include "config.h"
 #if (_WIN32_WINNT < 0x0602) || HAVE_WINRT
 #include "libavutil/wchar_filename.h"
 #endif

diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h
index eeead60..21acfd2 100644
--- a/compat/w32pthreads.h
+++ b/compat/w32pthreads.h

@@ -39,11 +39,6 @@
 #include <windows.h>
 #include <process.h>
 
-#if _WIN32_WINNT < 0x0600 && defined(__MINGW32__)
-#undef MemoryBarrier
-#define MemoryBarrier __sync_synchronize
-#endif
-
 #include "libavutil/attributes.h"
 #include "libavutil/common.h"
 #include "libavutil/internal.h"
@@ -56,24 +51,15 @@
     void *ret;
 } pthread_t;
 
-/* the conditional variable api for windows 6.0+ uses critical sections and
- * not mutexes */
-typedef CRITICAL_SECTION pthread_mutex_t;
-
-/* This is the CONDITION_VARIABLE typedef for using Windows' native
- * conditional variables on kernels 6.0+. */
-#if HAVE_CONDITION_VARIABLE_PTR
+/* use light weight mutex/condition variable API for Windows Vista and later */
+typedef SRWLOCK pthread_mutex_t;
 typedef CONDITION_VARIABLE pthread_cond_t;
-#else
-typedef struct pthread_cond_t {
-    void *Ptr;
-} pthread_cond_t;
-#endif
 
-#if _WIN32_WINNT >= 0x0600
+#define PTHREAD_MUTEX_INITIALIZER SRWLOCK_INIT
+#define PTHREAD_COND_INITIALIZER CONDITION_VARIABLE_INIT
+
 #define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0)
 #define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE)
-#endif
 
 static av_unused unsigned __stdcall attribute_align_arg win32thread_worker(void *arg)
 {
@@ -114,26 +100,25 @@
 
 static inline int pthread_mutex_init(pthread_mutex_t *m, void* attr)
 {
-    InitializeCriticalSection(m);
+    InitializeSRWLock(m);
     return 0;
 }
 static inline int pthread_mutex_destroy(pthread_mutex_t *m)
 {
-    DeleteCriticalSection(m);
+    /* Unlocked SWR locks use no resources */
     return 0;
 }
 static inline int pthread_mutex_lock(pthread_mutex_t *m)
 {
-    EnterCriticalSection(m);
+    AcquireSRWLockExclusive(m);
     return 0;
 }
 static inline int pthread_mutex_unlock(pthread_mutex_t *m)
 {
-    LeaveCriticalSection(m);
+    ReleaseSRWLockExclusive(m);
     return 0;
 }
 
-#if _WIN32_WINNT >= 0x0600
 typedef INIT_ONCE pthread_once_t;
 #define PTHREAD_ONCE_INIT INIT_ONCE_STATIC_INIT
 
@@ -167,7 +152,7 @@
 
 static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
 {
-    SleepConditionVariableCS(cond, mutex, INFINITE);
+    SleepConditionVariableSRW(cond, mutex, INFINITE, 0);
     return 0;
 }
 
@@ -177,242 +162,4 @@
     return 0;
 }
 
-#else // _WIN32_WINNT < 0x0600
-
-/* atomic init state of dynamically loaded functions */
-static LONG w32thread_init_state = 0;
-static av_unused void w32thread_init(void);
-
-/* for pre-Windows 6.0 platforms, define INIT_ONCE struct,
- * compatible to the one used in the native API */
-
-typedef union pthread_once_t  {
-    void * Ptr;    ///< For the Windows 6.0+ native functions
-    LONG state;    ///< For the pre-Windows 6.0 compat code
-} pthread_once_t;
-
-#define PTHREAD_ONCE_INIT {0}
-
-/* function pointers to init once API on windows 6.0+ kernels */
-static BOOL (WINAPI *initonce_begin)(pthread_once_t *lpInitOnce, DWORD dwFlags, BOOL *fPending, void **lpContext);
-static BOOL (WINAPI *initonce_complete)(pthread_once_t *lpInitOnce, DWORD dwFlags, void *lpContext);
-
-/* pre-Windows 6.0 compat using a spin-lock */
-static inline void w32thread_once_fallback(LONG volatile *state, void (*init_routine)(void))
-{
-    switch (InterlockedCompareExchange(state, 1, 0)) {
-    /* Initial run */
-    case 0:
-        init_routine();
-        InterlockedExchange(state, 2);
-        break;
-    /* Another thread is running init */
-    case 1:
-        while (1) {
-            MemoryBarrier();
-            if (*state == 2)
-                break;
-            Sleep(0);
-        }
-        break;
-    /* Initialization complete */
-    case 2:
-        break;
-    }
-}
-
-static av_unused int pthread_once(pthread_once_t *once_control, void (*init_routine)(void))
-{
-    w32thread_once_fallback(&w32thread_init_state, w32thread_init);
-
-    /* Use native functions on Windows 6.0+ */
-    if (initonce_begin && initonce_complete) {
-        BOOL pending = FALSE;
-        initonce_begin(once_control, 0, &pending, NULL);
-        if (pending)
-            init_routine();
-        initonce_complete(once_control, 0, NULL);
-        return 0;
-    }
-
-    w32thread_once_fallback(&once_control->state, init_routine);
-    return 0;
-}
-
-/* for pre-Windows 6.0 platforms we need to define and use our own condition
- * variable and api */
-
-typedef struct  win32_cond_t {
-    pthread_mutex_t mtx_broadcast;
-    pthread_mutex_t mtx_waiter_count;
-    volatile int waiter_count;
-    HANDLE semaphore;
-    HANDLE waiters_done;
-    volatile int is_broadcast;
-} win32_cond_t;
-
-/* function pointers to conditional variable API on windows 6.0+ kernels */
-static void (WINAPI *cond_broadcast)(pthread_cond_t *cond);
-static void (WINAPI *cond_init)(pthread_cond_t *cond);
-static void (WINAPI *cond_signal)(pthread_cond_t *cond);
-static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex,
-                                DWORD milliseconds);
-
-static av_unused int pthread_cond_init(pthread_cond_t *cond, const void *unused_attr)
-{
-    win32_cond_t *win32_cond = NULL;
-
-    w32thread_once_fallback(&w32thread_init_state, w32thread_init);
-
-    if (cond_init) {
-        cond_init(cond);
-        return 0;
-    }
-
-    /* non native condition variables */
-    win32_cond = (win32_cond_t*)av_mallocz(sizeof(win32_cond_t));
-    if (!win32_cond)
-        return ENOMEM;
-    cond->Ptr = win32_cond;
-    win32_cond->semaphore = CreateSemaphore(NULL, 0, 0x7fffffff, NULL);
-    if (!win32_cond->semaphore)
-        return ENOMEM;
-    win32_cond->waiters_done = CreateEvent(NULL, TRUE, FALSE, NULL);
-    if (!win32_cond->waiters_done)
-        return ENOMEM;
-
-    pthread_mutex_init(&win32_cond->mtx_waiter_count, NULL);
-    pthread_mutex_init(&win32_cond->mtx_broadcast, NULL);
-    return 0;
-}
-
-static av_unused int pthread_cond_destroy(pthread_cond_t *cond)
-{
-    win32_cond_t *win32_cond = (win32_cond_t*)cond->Ptr;
-    /* native condition variables do not destroy */
-    if (cond_init)
-        return 0;
-
-    /* non native condition variables */
-    CloseHandle(win32_cond->semaphore);
-    CloseHandle(win32_cond->waiters_done);
-    pthread_mutex_destroy(&win32_cond->mtx_waiter_count);
-    pthread_mutex_destroy(&win32_cond->mtx_broadcast);
-    av_freep(&win32_cond);
-    cond->Ptr = NULL;
-    return 0;
-}
-
-static av_unused int pthread_cond_broadcast(pthread_cond_t *cond)
-{
-    win32_cond_t *win32_cond = (win32_cond_t*)cond->Ptr;
-    int have_waiter;
-
-    if (cond_broadcast) {
-        cond_broadcast(cond);
-        return 0;
-    }
-
-    /* non native condition variables */
-    pthread_mutex_lock(&win32_cond->mtx_broadcast);
-    pthread_mutex_lock(&win32_cond->mtx_waiter_count);
-    have_waiter = 0;
-
-    if (win32_cond->waiter_count) {
-        win32_cond->is_broadcast = 1;
-        have_waiter = 1;
-    }
-
-    if (have_waiter) {
-        ReleaseSemaphore(win32_cond->semaphore, win32_cond->waiter_count, NULL);
-        pthread_mutex_unlock(&win32_cond->mtx_waiter_count);
-        WaitForSingleObject(win32_cond->waiters_done, INFINITE);
-        ResetEvent(win32_cond->waiters_done);
-        win32_cond->is_broadcast = 0;
-    } else
-        pthread_mutex_unlock(&win32_cond->mtx_waiter_count);
-    pthread_mutex_unlock(&win32_cond->mtx_broadcast);
-    return 0;
-}
-
-static av_unused int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
-{
-    win32_cond_t *win32_cond = (win32_cond_t*)cond->Ptr;
-    int last_waiter;
-    if (cond_wait) {
-        cond_wait(cond, mutex, INFINITE);
-        return 0;
-    }
-
-    /* non native condition variables */
-    pthread_mutex_lock(&win32_cond->mtx_broadcast);
-    pthread_mutex_lock(&win32_cond->mtx_waiter_count);
-    win32_cond->waiter_count++;
-    pthread_mutex_unlock(&win32_cond->mtx_waiter_count);
-    pthread_mutex_unlock(&win32_cond->mtx_broadcast);
-
-    // unlock the external mutex
-    pthread_mutex_unlock(mutex);
-    WaitForSingleObject(win32_cond->semaphore, INFINITE);
-
-    pthread_mutex_lock(&win32_cond->mtx_waiter_count);
-    win32_cond->waiter_count--;
-    last_waiter = !win32_cond->waiter_count || !win32_cond->is_broadcast;
-    pthread_mutex_unlock(&win32_cond->mtx_waiter_count);
-
-    if (last_waiter)
-        SetEvent(win32_cond->waiters_done);
-
-    // lock the external mutex
-    return pthread_mutex_lock(mutex);
-}
-
-static av_unused int pthread_cond_signal(pthread_cond_t *cond)
-{
-    win32_cond_t *win32_cond = (win32_cond_t*)cond->Ptr;
-    int have_waiter;
-    if (cond_signal) {
-        cond_signal(cond);
-        return 0;
-    }
-
-    pthread_mutex_lock(&win32_cond->mtx_broadcast);
-
-    /* non-native condition variables */
-    pthread_mutex_lock(&win32_cond->mtx_waiter_count);
-    have_waiter = win32_cond->waiter_count;
-    pthread_mutex_unlock(&win32_cond->mtx_waiter_count);
-
-    if (have_waiter) {
-        ReleaseSemaphore(win32_cond->semaphore, 1, NULL);
-        WaitForSingleObject(win32_cond->waiters_done, INFINITE);
-        ResetEvent(win32_cond->waiters_done);
-    }
-
-    pthread_mutex_unlock(&win32_cond->mtx_broadcast);
-    return 0;
-}
-#endif
-
-static av_unused void w32thread_init(void)
-{
-#if _WIN32_WINNT < 0x0600
-    HMODULE kernel_dll = GetModuleHandle(TEXT("kernel32.dll"));
-    /* if one is available, then they should all be available */
-    cond_init      = (void (WINAPI*)(pthread_cond_t *))
-        GetProcAddress(kernel_dll, "InitializeConditionVariable");
-    cond_broadcast = (void (WINAPI*)(pthread_cond_t *))
-        GetProcAddress(kernel_dll, "WakeAllConditionVariable");
-    cond_signal    = (void (WINAPI*)(pthread_cond_t *))
-        GetProcAddress(kernel_dll, "WakeConditionVariable");
-    cond_wait      = (BOOL (WINAPI*)(pthread_cond_t *, pthread_mutex_t *, DWORD))
-        GetProcAddress(kernel_dll, "SleepConditionVariableCS");
-    initonce_begin = (BOOL (WINAPI*)(pthread_once_t *, DWORD, BOOL *, void **))
-        GetProcAddress(kernel_dll, "InitOnceBeginInitialize");
-    initonce_complete = (BOOL (WINAPI*)(pthread_once_t *, DWORD, void *))
-        GetProcAddress(kernel_dll, "InitOnceComplete");
-#endif
-
-}
-
 #endif /* COMPAT_W32PTHREADS_H */

diff --git a/compat/windows/makedef b/compat/windows/makedef
index fcaf108..7258b94 100755
--- a/compat/windows/makedef
+++ b/compat/windows/makedef

@@ -45,7 +45,11 @@
 
 trap 'rm -f -- $libname' EXIT
 
-lib -out:${libname} $@ >/dev/null
+if [ -n "$AR" ]; then
+    $AR rcs ${libname} $@ >/dev/null
+else
+    lib -out:${libname} $@ >/dev/null
+fi
 if [ $? != 0 ]; then
     echo "Could not create temporary library." >&2
     exit 1
@@ -54,23 +58,7 @@
 IFS='
 '
 
-# Determine if we're building for x86 or x86_64 and
-# set the symbol prefix accordingly.
-prefix=""
-arch=$(dumpbin -headers ${libname} |
-       tr '\t' ' ' |
-       grep '^ \+.\+machine \+(.\+)' |
-       head -1 |
-       sed -e 's/^ \{1,\}.\{1,\} \{1,\}machine \{1,\}(\(...\)).*/\1/')
-
-if [ "${arch}" = "x86" ]; then
-    prefix="_"
-else
-    if [ "${arch}" != "ARM" ] && [ "${arch}" != "x64" ]; then
-        echo "Unknown machine type." >&2
-        exit 1
-    fi
-fi
+prefix="$EXTERN_PREFIX"
 
 started=0
 regex="none"
@@ -112,7 +100,19 @@
 '
 done
 
-dump=$(dumpbin -linkermember:1 ${libname})
+if [ -n "$NM" ]; then
+    # Use eval, since NM="nm -g"
+    dump=$(eval "$NM --defined-only -g ${libname}" |
+              grep -v : |
+              grep -v ^$ |
+              cut -d' ' -f3 |
+              sed -e "s/^${prefix}//")
+else
+    dump=$(dumpbin -linkermember:1 ${libname} |
+              sed -e '/public symbols/,$!d' -e '/^ \{1,\}Summary/,$d' -e "s/ \{1,\}${prefix}/ /" -e 's/ \{1,\}/ /g' |
+              tail -n +2 |
+              cut -d' ' -f3)
+fi
 
 rm ${libname}
 
@@ -121,9 +121,6 @@
 for exp in ${regex}; do
     list="${list}"'
 '$(echo "${dump}" |
-          sed -e '/public symbols/,$!d' -e '/^ \{1,\}Summary/,$d' -e "s/ \{1,\}${prefix}/ /" -e 's/ \{1,\}/ /g' |
-          tail -n +2 |
-          cut -d' ' -f3 |
           grep "^${exp}" |
           sed -e 's/^/    /')
 done

diff --git a/configure b/configure
index 231c6c3..85d5dd5 100755
--- a/configure
+++ b/configure

@@ -116,7 +116,6 @@
   --disable-ffmpeg         disable ffmpeg build
   --disable-ffplay         disable ffplay build
   --disable-ffprobe        disable ffprobe build
-  --disable-ffserver       disable ffserver build
 
 Documentation options:
   --disable-doc            do not build documentation
@@ -133,7 +132,7 @@
   --disable-swscale        disable libswscale build
   --disable-postproc       disable libpostproc build
   --disable-avfilter       disable libavfilter build
-  --enable-avresample      enable libavresample build [no]
+  --enable-avresample      enable libavresample build (deprecated) [no]
   --disable-pthreads       disable pthreads [autodetect]
   --disable-w32threads     disable Win32 threads [autodetect]
   --disable-os2threads     disable OS/2 threads [autodetect]
@@ -185,7 +184,6 @@
   --enable-filter=NAME     enable filter NAME
   --disable-filter=NAME    disable filter NAME
   --disable-filters        disable all filters
-  --disable-v4l2_m2m       disable V4L2 mem2mem code [autodetect]
 
 External library support:
 
@@ -215,11 +213,11 @@
   --enable-gmp             enable gmp, needed for rtmp(t)e support
                            if openssl or librtmp is not used [no]
   --enable-gnutls          enable gnutls, needed for https support
-                           if openssl is not used [no]
+                           if openssl, libtls or mbedtls is not used [no]
   --disable-iconv          disable iconv [autodetect]
-  --disable-jack           disable libjack support [autodetect]
   --enable-jni             enable JNI support [no]
   --enable-ladspa          enable LADSPA audio filtering [no]
+  --enable-libaom          enable AV1 video encoding/decoding via libaom [no]
   --enable-libass          enable libass subtitles rendering,
                            needed for subtitles and ass filter [no]
   --enable-libbluray       enable BluRay reading using libbluray [no]
@@ -227,6 +225,8 @@
   --enable-libcaca         enable textual display using libcaca [no]
   --enable-libcelt         enable CELT decoding via libcelt [no]
   --enable-libcdio         enable audio CD grabbing with libcdio [no]
+  --enable-libcodec2       enable codec2 en/decoding using libcodec2 [no]
+  --enable-libdavs2        enable AVS2 decoding via libdavs2 [no]
   --enable-libdc1394       enable IIDC-1394 grabbing using libdc1394
                            and libraw1394 [no]
   --enable-libfdk-aac      enable AAC de/encoding via libfdk-aac [no]
@@ -238,7 +238,10 @@
   --enable-libgsm          enable GSM de/encoding via libgsm [no]
   --enable-libiec61883     enable iec61883 via libiec61883 [no]
   --enable-libilbc         enable iLBC de/encoding via libilbc [no]
+  --enable-libjack         enable JACK audio sound server [no]
+  --enable-libklvanc       enable Kernel Labs VANC processing [no]
   --enable-libkvazaar      enable HEVC encoding via libkvazaar [no]
+  --enable-liblensfun      enable lensfun lens correction [no]
   --enable-libmodplug      enable ModPlug via libmodplug [no]
   --enable-libmp3lame      enable MP3 encoding via libmp3lame [no]
   --enable-libopencore-amrnb enable AMR-NB de/encoding via libopencore-amrnb [no]
@@ -257,9 +260,14 @@
   --enable-libsnappy       enable Snappy compression, needed for hap encoding [no]
   --enable-libsoxr         enable Include libsoxr resampling [no]
   --enable-libspeex        enable Speex de/encoding via libspeex [no]
+  --enable-libsrt          enable Haivision SRT protocol via libsrt [no]
   --enable-libssh          enable SFTP protocol via libssh [no]
+  --enable-libtensorflow   enable TensorFlow as a DNN module backend
+                           for DNN based filters like sr [no]
   --enable-libtesseract    enable Tesseract, needed for ocr filter [no]
   --enable-libtheora       enable Theora encoding via libtheora [no]
+  --enable-libtls          enable LibreSSL (via libtls), needed for https support
+                           if openssl, gnutls or mbedtls is not used [no]
   --enable-libtwolame      enable MP2 encoding via libtwolame [no]
   --enable-libv4l2         enable libv4l2/v4l-utils [no]
   --enable-libvidstab      enable video stabilization using vid.stab [no]
@@ -273,52 +281,60 @@
   --enable-libx264         enable H.264 encoding via x264 [no]
   --enable-libx265         enable HEVC encoding via x265 [no]
   --enable-libxavs         enable AVS encoding via xavs [no]
+  --enable-libxavs2        enable AVS2 encoding via xavs2 [no]
   --enable-libxcb          enable X11 grabbing using XCB [autodetect]
   --enable-libxcb-shm      enable X11 grabbing shm communication [autodetect]
   --enable-libxcb-xfixes   enable X11 grabbing mouse rendering [autodetect]
   --enable-libxcb-shape    enable X11 grabbing shape rendering [autodetect]
   --enable-libxvid         enable Xvid encoding via xvidcore,
                            native MPEG-4/Xvid encoder exists [no]
-  --enable-libxml2         enable XML parsing using the C library libxml2 [no]
+  --enable-libxml2         enable XML parsing using the C library libxml2, needed
+                           for dash demuxing support [no]
   --enable-libzimg         enable z.lib, needed for zscale filter [no]
   --enable-libzmq          enable message passing via libzmq [no]
   --enable-libzvbi         enable teletext support via libzvbi [no]
+  --enable-lv2             enable LV2 audio filtering [no]
   --disable-lzma           disable lzma [autodetect]
   --enable-decklink        enable Blackmagic DeckLink I/O support [no]
   --enable-libndi_newtek   enable Newteck NDI I/O support [no]
+  --enable-mbedtls         enable mbedTLS, needed for https support
+                           if openssl, gnutls or libtls is not used [no]
   --enable-mediacodec      enable Android MediaCodec support [no]
   --enable-libmysofa       enable libmysofa, needed for sofalizer filter [no]
   --enable-openal          enable OpenAL 1.1 capture support [no]
-  --enable-opencl          enable OpenCL code
+  --enable-opencl          enable OpenCL processing [no]
   --enable-opengl          enable OpenGL rendering [no]
   --enable-openssl         enable openssl, needed for https support
-                           if gnutls is not used [no]
+                           if gnutls, libtls or mbedtls is not used [no]
   --disable-sndio          disable sndio support [autodetect]
   --disable-schannel       disable SChannel SSP, needed for TLS support on
                            Windows if openssl and gnutls are not used [autodetect]
   --disable-sdl2           disable sdl2 [autodetect]
   --disable-securetransport disable Secure Transport, needed for TLS support
                            on OSX if openssl and gnutls are not used [autodetect]
+  --enable-vapoursynth     enable VapourSynth demuxer [no]
   --disable-xlib           disable xlib [autodetect]
   --disable-zlib           disable zlib [autodetect]
 
   The following libraries provide various hardware acceleration features:
+  --disable-amf            disable AMF video encoding code [autodetect]
   --disable-audiotoolbox   disable Apple AudioToolbox code [autodetect]
-  --disable-cuda           disable dynamically linked Nvidia CUDA code [autodetect]
   --enable-cuda-sdk        enable CUDA features that require the CUDA SDK [no]
   --disable-cuvid          disable Nvidia CUVID support [autodetect]
   --disable-d3d11va        disable Microsoft Direct3D 11 video acceleration code [autodetect]
   --disable-dxva2          disable Microsoft DirectX 9 video acceleration code [autodetect]
+  --disable-ffnvcodec      disable dynamically linked Nvidia code [autodetect]
   --enable-libdrm          enable DRM code (Linux) [no]
   --enable-libmfx          enable Intel MediaSDK (AKA Quick Sync Video) code via libmfx [no]
   --enable-libnpp          enable Nvidia Performance Primitives-based code [no]
   --enable-mmal            enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no]
+  --disable-nvdec          disable Nvidia video decoding acceleration (via hwaccel) [autodetect]
   --disable-nvenc          disable Nvidia video encoding code [autodetect]
   --enable-omx             enable OpenMAX IL code [no]
   --enable-omx-rpi         enable OpenMAX IL code for Raspberry Pi [no]
   --enable-rkmpp           enable Rockchip Media Process Platform code [no]
+  --disable-v4l2-m2m       disable V4L2 mem2mem code [autodetect]
   --disable-vaapi          disable Video Acceleration API (mainly Unix/Intel) code [autodetect]
-  --disable-vda            disable Apple Video Decode Acceleration code [autodetect]
   --disable-vdpau          disable Nvidia Video Decode and Presentation API for Unix code [autodetect]
   --disable-videotoolbox   disable VideoToolbox code [autodetect]
 
@@ -337,6 +353,10 @@
   --target-samples=DIR     path to samples directory on target
   --tempprefix=PATH        force fixed dir/prefix instead of mktemp for checks
   --toolchain=NAME         set tool defaults according to NAME
+                           (gcc-asan, clang-asan, gcc-msan, clang-msan,
+                           gcc-tsan, clang-tsan, gcc-usan, clang-usan,
+                           valgrind-massif, valgrind-memcheck,
+                           msvc, icl, gcov, llvm-cov, hardened)
   --nm=NM                  use nm tool NM [$nm_default]
   --ar=AR                  use archive tool AR [$ar_default]
   --as=AS                  use assembler AS [$as_default]
@@ -366,7 +386,7 @@
   --extra-objcflags=FLAGS  add FLAGS to OBJCFLAGS [$CFLAGS]
   --extra-ldflags=ELDFLAGS add ELDFLAGS to LDFLAGS [$LDFLAGS]
   --extra-ldexeflags=ELDFLAGS add ELDFLAGS to LDEXEFLAGS [$LDEXEFLAGS]
-  --extra-ldlibflags=ELDFLAGS add ELDFLAGS to LDLIBFLAGS [$LDLIBFLAGS]
+  --extra-ldsoflags=ELDFLAGS add ELDFLAGS to LDSOFLAGS [$LDSOFLAGS]
   --extra-libs=ELIBS       add ELIBS [$ELIBS]
   --extra-version=STRING   version string suffix []
   --optflags=OPTFLAGS      override optimization-related compiler flags
@@ -407,6 +427,7 @@
   --disable-fma3           disable FMA3 optimizations
   --disable-fma4           disable FMA4 optimizations
   --disable-avx2           disable AVX2 optimizations
+  --disable-avx512         disable AVX-512 optimizations
   --disable-aesni          disable AESNI optimizations
   --disable-armv5te        disable armv5te optimizations
   --disable-armv6          disable armv6 optimizations
@@ -462,7 +483,6 @@
   exit 0
 }
 
-quotes='""'
 if test -t 1 && which tput >/dev/null 2>&1; then
     ncolors=$(tput colors)
     if test -n "$ncolors" && test $ncolors -ge 8; then
@@ -541,7 +561,7 @@
     pat=$1
     shift
     for v; do
-        eval "case $v in $pat) printf '%s ' $v ;; esac"
+        eval "case '$v' in $pat) printf '%s ' '$v' ;; esac"
     done
 }
 
@@ -549,7 +569,7 @@
     pat=$1
     shift
     for v; do
-        eval "case $v in $pat) ;; *) printf '%s ' $v ;; esac"
+        eval "case '$v' in $pat) ;; *) printf '%s ' '$v' ;; esac"
     done
 }
 
@@ -565,6 +585,12 @@
     for v; do echo ${v}${suffix}; done
 }
 
+remove_suffix(){
+    suffix=$1
+    shift
+    for v; do echo ${v%$suffix}; done
+}
+
 set_all(){
     value=$1
     shift
@@ -585,13 +611,13 @@
     echo $@ | sed 's/[^A-Za-z0-9_]/_/g'
 }
 
-set_safe(){
+set_sanitized(){
     var=$1
     shift
     eval $(sanitize_var_name "$var")='$*'
 }
 
-get_safe(){
+get_sanitized(){
     eval echo \$$(sanitize_var_name "$1")
 }
 
@@ -637,28 +663,25 @@
     set_weak no $*
 }
 
-enable_safe(){
+enable_sanitized(){
     for var; do
-        enable $(echo "$var" | sed 's/[^A-Za-z0-9_]/_/g')
+        enable $(sanitize_var_name $var)
     done
 }
 
-disable_safe(){
+disable_sanitized(){
     for var; do
-        disable $(echo "$var" | sed 's/[^A-Za-z0-9_]/_/g')
+        disable $(sanitize_var_name $var)
     done
 }
 
 do_enable_deep(){
     for var; do
         enabled $var && continue
-        eval sel="\$${var}_select"
-        eval sgs="\$${var}_suggest"
-        pushvar var sgs
-        enable_deep $sel
-        popvar sgs
-        enable_deep_weak $sgs
-        popvar var
+        set -- $var
+        eval enable_deep \$${var}_select
+        var=$1
+        eval enable_deep_weak \$${var}_suggest
     done
 }
 
@@ -670,25 +693,25 @@
 enable_deep_weak(){
     for var; do
         disabled $var && continue
-        pushvar var
+        set -- $var
         do_enable_deep $var
-        popvar var
+        var=$1
         enable_weak $var
     done
 }
 
 requested(){
-    test "${1#!}" = "$1" && op='=' || op=!=
+    test "${1#!}" = "$1" && op="=" || op="!="
     eval test "x\$${1#!}_requested" $op "xyes"
 }
 
 enabled(){
-    test "${1#!}" = "$1" && op='=' || op=!=
+    test "${1#!}" = "$1" && op="=" || op="!="
     eval test "x\$${1#!}" $op "xyes"
 }
 
 disabled(){
-    test "${1#!}" = "$1" && op='=' || op=!=
+    test "${1#!}" = "$1" && op="=" || op="!="
     eval test "x\$${1#!}" $op "xno"
 }
 
@@ -732,50 +755,49 @@
     return 1
 }
 
-do_check_deps(){
+# The cfg loop is very hot (several thousands iterations), and in bash also
+# potentialy quite slow. Try to abort the iterations early, preferably without
+# calling functions. 70%+ of the time cfg is already done or without deps.
+check_deps(){
     for cfg; do
-        enabled ${cfg}_checking && die "Circular dependency for $cfg."
-        disabled ${cfg}_checking && continue
-        enable ${cfg}_checking
-        append allopts $cfg
+        eval [ x\$${cfg}_checking = xdone ] && continue
+        eval [ x\$${cfg}_checking = xinprogress ] && die "Circular dependency for $cfg."
 
-        eval dep_all="\$${cfg}_deps"
-        eval dep_any="\$${cfg}_deps_any"
-        eval dep_con="\$${cfg}_conflict"
-        eval dep_sel="\$${cfg}_select"
-        eval dep_sgs="\$${cfg}_suggest"
-        eval dep_ifa="\$${cfg}_if"
-        eval dep_ifn="\$${cfg}_if_any"
+        eval "
+        dep_all=\$${cfg}_deps
+        dep_any=\$${cfg}_deps_any
+        dep_con=\$${cfg}_conflict
+        dep_sel=\$${cfg}_select
+        dep_sgs=\$${cfg}_suggest
+        dep_ifa=\$${cfg}_if
+        dep_ifn=\$${cfg}_if_any
+        "
 
-        pushvar cfg dep_all dep_any dep_con dep_sel dep_sgs dep_ifa dep_ifn
-        do_check_deps $dep_all $dep_any $dep_con $dep_sel $dep_sgs $dep_ifa $dep_ifn
-        popvar cfg dep_all dep_any dep_con dep_sel dep_sgs dep_ifa dep_ifn
+        # most of the time here $cfg has no deps - avoid costly no-op work
+        if [ "$dep_all$dep_any$dep_con$dep_sel$dep_sgs$dep_ifa$dep_ifn" ]; then
+            eval ${cfg}_checking=inprogress
 
-        [ -n "$dep_ifa" ] && { enabled_all $dep_ifa && enable_weak $cfg; }
-        [ -n "$dep_ifn" ] && { enabled_any $dep_ifn && enable_weak $cfg; }
-        enabled_all  $dep_all || { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but not all dependencies are satisfied: $dep_all"; }
-        enabled_any  $dep_any || { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but not any dependency is satisfied: $dep_any"; }
-        disabled_all $dep_con || { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but some conflicting dependencies are unsatisfied: $dep_con"; }
-        disabled_any $dep_sel && { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but some selected dependency is unsatisfied: $dep_sel"; }
+            set -- $cfg "$dep_all" "$dep_any" "$dep_con" "$dep_sel" "$dep_sgs" "$dep_ifa" "$dep_ifn"
+            check_deps $dep_all $dep_any $dep_con $dep_sel $dep_sgs $dep_ifa $dep_ifn
+            cfg=$1; dep_all=$2; dep_any=$3; dep_con=$4; dep_sel=$5 dep_sgs=$6; dep_ifa=$7; dep_ifn=$8
 
-        if enabled $cfg; then
-            enable_deep $dep_sel
-            enable_deep_weak $dep_sgs
+            [ -n "$dep_ifa" ] && { enabled_all $dep_ifa && enable_weak $cfg; }
+            [ -n "$dep_ifn" ] && { enabled_any $dep_ifn && enable_weak $cfg; }
+            enabled_all  $dep_all || { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but not all dependencies are satisfied: $dep_all"; }
+            enabled_any  $dep_any || { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but not any dependency is satisfied: $dep_any"; }
+            disabled_all $dep_con || { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but some conflicting dependencies are unsatisfied: $dep_con"; }
+            disabled_any $dep_sel && { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but some selected dependency is unsatisfied: $dep_sel"; }
+
+            enabled $cfg && enable_deep_weak $dep_sel $dep_sgs
+
+            for dep in $dep_all $dep_any $dep_sel $dep_sgs; do
+                # filter out library deps, these do not belong in extralibs
+                is_in $dep $LIBRARY_LIST && continue
+                enabled $dep && eval append ${cfg}_extralibs ${dep}_extralibs
+            done
         fi
 
-        disable ${cfg}_checking
-    done
-}
-
-check_deps(){
-    unset allopts
-
-    do_check_deps "$@"
-
-    for cfg in $allopts; do
-        enabled $cfg || continue
-        eval dep_extralibs="\$${cfg}_extralibs"
-        test -n "$dep_extralibs" && add_extralibs $dep_extralibs
+        eval ${cfg}_checking=done
     done
 }
 
@@ -830,13 +852,37 @@
     eval "$var=\"$* \$$var\""
 }
 
+reverse () {
+    eval '
+        reverse_out=
+        for v in $'$1'; do
+            reverse_out="$v $reverse_out"
+        done
+        '$1'=$reverse_out
+    '
+}
+
+# keeps the last occurence of each non-unique item
 unique(){
-    var=$1
-    uniq_list=""
-    for tok in $(eval echo \$$var); do
-        uniq_list="$(filter_out $tok $uniq_list) $tok"
+    unique_out=
+    eval unique_in=\$$1
+    reverse unique_in
+    for v in $unique_in; do
+        # " $unique_out" +space such that every item is surrounded with spaces
+        case " $unique_out" in *" $v "*) continue; esac  # already in list
+        unique_out="$unique_out$v "
     done
-    eval "$var=\"${uniq_list}\""
+    reverse unique_out
+    eval $1=\$unique_out
+}
+
+resolve(){
+    resolve_out=
+    eval resolve_in=\$$1
+    for v in $resolve_in; do
+        eval 'resolve_out="$resolve_out$'$v' "'
+    done
+    eval $1=\$resolve_out
 }
 
 add_cppflags(){
@@ -855,14 +901,14 @@
     append CXXFLAGS $($cflags_filter "$@")
 }
 
-add_asflags(){
-    append ASFLAGS $($asflags_filter "$@")
-}
-
 add_objcflags(){
     append OBJCFLAGS $($objcflags_filter "$@")
 }
 
+add_asflags(){
+    append ASFLAGS $($asflags_filter "$@")
+}
+
 add_ldflags(){
     append LDFLAGS $($ldflags_filter "$@")
 }
@@ -871,18 +917,18 @@
     append LDEXEFLAGS $($ldflags_filter "$@")
 }
 
-add_ldlibflags(){
-    append LDLIBFLAGS $($ldflags_filter "$@")
-}
-
-add_stripflags(){
-    append ASMSTRIPFLAGS "$@"
+add_ldsoflags(){
+    append LDSOFLAGS $($ldflags_filter "$@")
 }
 
 add_extralibs(){
     prepend extralibs $($ldflags_filter "$@")
 }
 
+add_stripflags(){
+    append ASMSTRIPFLAGS "$@"
+}
+
 add_host_cppflags(){
     append host_cppflags "$@"
 }
@@ -901,61 +947,104 @@
     map 'add_cppflags -D$v' "$@"
 }
 
-check_cmd(){
+test_cmd(){
     log "$@"
     "$@" >> $logfile 2>&1
 }
 
-check_stat(){
-    log check_stat "$@"
+test_stat(){
+    log test_stat "$@"
     stat "$1" >> $logfile 2>&1
 }
 
-cc_o(){
-    eval printf '%s\\n' $CC_O
-}
-
 cc_e(){
     eval printf '%s\\n' $CC_E
 }
 
-check_cc(){
-    log check_cc "$@"
-    cat > $TMPC
-    log_file $TMPC
-    check_cmd $cc $CPPFLAGS $CFLAGS "$@" $CC_C $(cc_o $TMPO) $TMPC
-}
-
-check_cxx(){
-    log check_cxx "$@"
-    cat > $TMPCPP
-    log_file $TMPCPP
-    check_cmd $cxx $CPPFLAGS $CFLAGS $CXXFLAGS "$@" $CXX_C -o $TMPO $TMPCPP
-}
-
-check_objcc(){
-    log check_objcc "$@"
-    cat > $TMPM
-    log_file $TMPM
-    check_cmd $objcc -Werror=missing-prototypes $CPPFLAGS $CFLAGS $OBJCFLAGS "$@" $OBJCC_C $(cc_o $TMPO) $TMPM
-}
-
-check_cpp(){
-    log check_cpp "$@"
-    cat > $TMPC
-    log_file $TMPC
-    check_cmd $cc $CPPFLAGS $CFLAGS "$@" $(cc_e $TMPO) $TMPC
+cc_o(){
+    eval printf '%s\\n' $CC_O
 }
 
 as_o(){
     eval printf '%s\\n' $AS_O
 }
 
-check_as(){
-    log check_as "$@"
+x86asm_o(){
+    eval printf '%s\\n' $X86ASM_O
+}
+
+ld_o(){
+    eval printf '%s\\n' $LD_O
+}
+
+hostcc_e(){
+    eval printf '%s\\n' $HOSTCC_E
+}
+
+hostcc_o(){
+    eval printf '%s\\n' $HOSTCC_O
+}
+
+test_cc(){
+    log test_cc "$@"
+    cat > $TMPC
+    log_file $TMPC
+    test_cmd $cc $CPPFLAGS $CFLAGS "$@" $CC_C $(cc_o $TMPO) $TMPC
+}
+
+test_cxx(){
+    log test_cxx "$@"
+    cat > $TMPCPP
+    log_file $TMPCPP
+    test_cmd $cxx $CPPFLAGS $CFLAGS $CXXFLAGS "$@" $CXX_C -o $TMPO $TMPCPP
+}
+
+test_objcc(){
+    log test_objcc "$@"
+    cat > $TMPM
+    log_file $TMPM
+    test_cmd $objcc -Werror=missing-prototypes $CPPFLAGS $CFLAGS $OBJCFLAGS "$@" $OBJCC_C $(cc_o $TMPO) $TMPM
+}
+
+test_cpp(){
+    log test_cpp "$@"
+    cat > $TMPC
+    log_file $TMPC
+    test_cmd $cc $CPPFLAGS $CFLAGS "$@" $(cc_e $TMPO) $TMPC
+}
+
+test_as(){
+    log test_as "$@"
     cat > $TMPS
     log_file $TMPS
-    check_cmd $as $CPPFLAGS $ASFLAGS "$@" $AS_C $(as_o $TMPO) $TMPS
+    test_cmd $as $CPPFLAGS $ASFLAGS "$@" $AS_C $(as_o $TMPO) $TMPS
+}
+
+test_x86asm(){
+    log test_x86asm "$@"
+    echo "$1" > $TMPASM
+    log_file $TMPASM
+    shift
+    test_cmd $x86asmexe $X86ASMFLAGS -Werror "$@" $(x86asm_o $TMPO) $TMPASM
+}
+
+check_cmd(){
+    log check_cmd "$@"
+    cmd=$1
+    disabled $cmd && return
+    disable $cmd
+    test_cmd $@ && enable $cmd
+}
+
+check_as(){
+    log check_as "$@"
+    name=$1
+    code=$2
+    shift 2
+    disable $name
+    test_as $@ <<EOF && enable $name
+$code
+EOF
 }
 
 check_inline_asm(){
@@ -964,7 +1053,7 @@
     code="$2"
     shift 2
     disable $name
-    check_cc "$@" <<EOF && enable $name
+    test_cc "$@" <<EOF && enable $name
 void foo(void){ __asm__ volatile($code); }
 EOF
 }
@@ -984,38 +1073,43 @@
 void foo(void){ __asm__ volatile($code); }
 EOF
     log_file $TMPC
-    check_cmd $cc $CPPFLAGS $CFLAGS $flags "$@" $CC_C $(cc_o $TMPO) $TMPC &&
+    test_cmd $cc $CPPFLAGS $CFLAGS $flags "$@" $CC_C $(cc_o $TMPO) $TMPC &&
     enable $name && add_cflags $flags && add_asflags $flags && add_ldflags $flags
 }
 
 check_insn(){
     log check_insn "$@"
     check_inline_asm ${1}_inline "\"$2\""
-    echo "$2" | check_as && enable ${1}_external || disable ${1}_external
+    check_as ${1}_external "$2"
 }
 
 check_x86asm(){
     log check_x86asm "$@"
-    echo "$1" > $TMPS
-    log_file $TMPS
-    shift 1
-    check_cmd $x86asmexe $X86ASMFLAGS -Werror "$@" -o $TMPO $TMPS
+    name=$1
+    shift
+    disable $name
+    test_x86asm "$@" && enable $name
 }
 
-ld_o(){
-    eval printf '%s\\n' $LD_O
+test_ld(){
+    log test_ld "$@"
+    type=$1
+    shift 1
+    flags=$(filter_out '-l*|*.so' $@)
+    libs=$(filter '-l*|*.so' $@)
+    test_$type $($cflags_filter $flags) || return
+    flags=$($ldflags_filter $flags)
+    libs=$($ldflags_filter $libs)
+    test_cmd $ld $LDFLAGS $LDEXEFLAGS $flags $(ld_o $TMPE) $TMPO $libs $extralibs
 }
 
 check_ld(){
     log check_ld "$@"
     type=$1
-    shift 1
-    flags=$(filter_out '-l*|*.so' $@)
-    libs=$(filter '-l*|*.so' $@)
-    check_$type $($cflags_filter $flags) || return
-    flags=$($ldflags_filter $flags)
-    libs=$($ldflags_filter $libs)
-    check_cmd $ld $LDFLAGS $LDEXEFLAGS $flags $(ld_o $TMPE) $TMPO $libs $extralibs
+    name=$2
+    shift 2
+    disable $name
+    test_ld $type $@ && enable $name
 }
 
 print_include(){
@@ -1025,8 +1119,8 @@
         echo "#include <$hdr>"
 }
 
-check_code(){
-    log check_code "$@"
+test_code(){
+    log test_code "$@"
     check=$1
     headers=$2
     code=$3
@@ -1036,12 +1130,12 @@
             print_include $hdr
         done
         echo "int main(void) { $code; return 0; }"
-    } | check_$check "$@"
+    } | test_$check "$@"
 }
 
 check_cppflags(){
     log check_cppflags "$@"
-    check_cpp "$@" <<EOF && append CPPFLAGS "$@"
+    test_cpp "$@" <<EOF && append CPPFLAGS "$@"
 #include <stdlib.h>
 EOF
 }
@@ -1049,7 +1143,7 @@
 test_cflags(){
     log test_cflags "$@"
     set -- $($cflags_filter "$@")
-    check_cc "$@" <<EOF
+    test_cc "$@" <<EOF
 int x;
 EOF
 }
@@ -1062,7 +1156,7 @@
 check_cxxflags(){
     log check_cxxflags "$@"
     set -- $($cflags_filter "$@")
-    check_cxx "$@" <<EOF && append CXXFLAGS "$@"
+    test_cxx "$@" <<EOF && append CXXFLAGS "$@"
 int x;
 EOF
 }
@@ -1070,7 +1164,7 @@
 test_objcflags(){
     log test_objcflags "$@"
     set -- $($objcflags_filter "$@")
-    check_objcc "$@" <<EOF
+    test_objcc "$@" <<EOF
 int x;
 EOF
 }
@@ -1082,7 +1176,8 @@
 
 test_ldflags(){
     log test_ldflags "$@"
-    check_ld "cc" "$@" <<EOF
+    set -- $($ldflags_filter "$@")
+    test_ld "cc" "$@" <<EOF
 int main(void){ return 0; }
 EOF
 }
@@ -1094,11 +1189,11 @@
 
 test_stripflags(){
     log test_stripflags "$@"
-    # call check_cc to get a fresh TMPO
-    check_cc <<EOF
+    # call test_cc to get a fresh TMPO
+    test_cc <<EOF
 int main(void) { return 0; }
 EOF
-    check_cmd $strip $ASMSTRIPFLAGS "$@" $TMPO
+    test_cmd $strip $ASMSTRIPFLAGS "$@" $TMPO
 }
 
 check_stripflags(){
@@ -1106,15 +1201,17 @@
     test_stripflags "$@" && add_stripflags "$@"
 }
 
-check_header(){
-    log check_header "$@"
-    header=$1
+check_headers(){
+    log check_headers "$@"
+    headers=$1
     shift
-    disable_safe $header
-    check_cpp "$@" <<EOF && enable_safe $header
-#include <$header>
-int x;
-EOF
+    disable_sanitized $headers
+    {
+        for hdr in $headers; do
+            print_include $hdr
+        done
+        echo "int x;"
+    } | test_cpp "$@" && enable_sanitized $headers
 }
 
 check_header_objcc(){
@@ -1122,11 +1219,11 @@
     rm -f -- "$TMPO"
     header=$1
     shift
-    disable_safe $header
+    disable_sanitized $header
     {
        echo "#include <$header>"
        echo "int main(void) { return 0; }"
-    } | check_objcc && check_stat "$TMPO" && enable_safe $header
+    } | test_objcc && test_stat "$TMPO" && enable_sanitized $header
 }
 
 check_apple_framework(){
@@ -1135,7 +1232,8 @@
     name="$(tolower $framework)"
     header="${framework}/${framework}.h"
     disable $name
-    check_header_objcc $header && enable $name && add_extralibs "-framework $framework"
+    check_header_objcc $header &&
+        enable $name && eval ${name}_extralibs='"-framework $framework"'
 }
 
 check_func(){
@@ -1143,7 +1241,7 @@
     func=$1
     shift
     disable $func
-    check_ld "cc" "$@" <<EOF && enable $func
+    test_ld "cc" "$@" <<EOF && enable $func
 extern int $func();
 int main(void){ $func(); }
 EOF
@@ -1156,7 +1254,7 @@
     shift 2
     test $narg = 2 && args="f, g" || args="f * I"
     disable $func
-    check_ld "cc" "$@" <<EOF && enable $func
+    test_ld "cc" "$@" <<EOF && enable $func
 #include <complex.h>
 #include <math.h>
 float foo(complex float f, complex float g) { return $func($args); }
@@ -1171,7 +1269,7 @@
     shift 2
     test $narg = 2 && args="f, g" || args="f"
     disable $func
-    check_ld "cc" "$@" <<EOF && enable $func
+    test_ld "cc" "$@" <<EOF && enable $func
 #include <math.h>
 float foo(float f, float g) { return $func($args); }
 int main(void){ return (int) foo; }
@@ -1197,7 +1295,7 @@
             echo " ret |= ((intptr_t)check_$func) & 0xFFFF;"
         done
         echo "return ret; }"
-    } | check_ld "cc" "$@" && enable $funcs && enable_safe $headers
+    } | test_ld "cc" "$@" && enable $funcs && enable_sanitized $headers
 }
 
 check_class_headers_cpp(){
@@ -1216,15 +1314,15 @@
             i=$(expr $i + 1)
         done
         echo "return 0; }"
-    } | check_ld "cxx" "$@" && enable $funcs && enable_safe $headers
+    } | test_ld "cxx" "$@" && enable $funcs && enable_sanitized $headers
 }
 
-check_cpp_condition(){
-    log check_cpp_condition "$@"
+test_cpp_condition(){
+    log test_cpp_condition "$@"
     header=$1
     condition=$2
     shift 2
-    check_cpp "$@" <<EOF
+    test_cpp "$@" <<EOF
 #include <$header>
 #if !($condition)
 #error "unsatisfied condition: $condition"
@@ -1232,6 +1330,14 @@
 EOF
 }
 
+check_cpp_condition(){
+    log check_cpp_condition "$@"
+    name=$1
+    shift 1
+    disable $name
+    test_cpp_condition "$@" && enable $name
+}
+
 test_cflags_cc(){
     log test_cflags_cc "$@"
     flags=$1
@@ -1239,7 +1345,7 @@
     condition=$3
     shift 3
     set -- $($cflags_filter "$flags")
-    check_cc "$@" <<EOF
+    test_cc "$@" <<EOF
 #include <$header>
 #if !($condition)
 #error "unsatisfied condition: $condition"
@@ -1255,19 +1361,22 @@
     shift 3
     disable $name
     check_func_headers "$headers" "$funcs" "$@" &&
-        enable $name && add_extralibs "$@"
+        enable $name && eval ${name}_extralibs="\$@"
 }
 
 check_lib_cpp(){
     log check_lib_cpp "$@"
-    headers="$1"
-    classes="$2"
-    shift 2
-    check_class_headers_cpp "$headers" "$classes" "$@" && add_extralibs "$@"
+    name="$1"
+    headers="$2"
+    classes="$3"
+    shift 3
+    disable $name
+    check_class_headers_cpp "$headers" "$classes" "$@" &&
+        enable $name && eval ${name}_extralibs="\$@"
 }
 
-check_pkg_config(){
-    log check_pkg_config "$@"
+test_pkg_config(){
+    log test_pkg_config "$@"
     name="$1"
     pkg_version="$2"
     pkg="${2%% *}"
@@ -1275,17 +1384,24 @@
     funcs="$4"
     shift 4
     disable $name
-    check_cmd $pkg_config --exists --print-errors $pkg_version || return
+    test_cmd $pkg_config --exists --print-errors $pkg_version || return
     pkg_cflags=$($pkg_config --cflags $pkg_config_flags $pkg)
     pkg_libs=$($pkg_config --libs $pkg_config_flags $pkg)
     check_func_headers "$headers" "$funcs" $pkg_cflags $pkg_libs "$@" &&
         enable $name &&
-        set_safe "${pkg}_cflags"    $pkg_cflags &&
-        set_safe "${pkg}_extralibs" $pkg_libs
+        set_sanitized "${name}_cflags"    $pkg_cflags &&
+        set_sanitized "${name}_extralibs" $pkg_libs
 }
 
-check_exec(){
-    check_ld "cc" "$@" && { enabled cross_compile || $TMPE >> $logfile 2>&1; }
+check_pkg_config(){
+    log check_pkg_config "$@"
+    name="$1"
+    test_pkg_config "$@" &&
+        eval add_cflags \$${name}_cflags
+}
+
+test_exec(){
+    test_ld "cc" "$@" && { enabled cross_compile || $TMPE >> $logfile 2>&1; }
 }
 
 check_exec_crash(){
@@ -1298,7 +1414,7 @@
     # can redirect the "Terminated" message from the shell.  SIGBUS
     # is not defined by standard C so it is used conditionally.
 
-    (check_exec "$@") >> $logfile 2>&1 <<EOF
+    (test_exec "$@") >> $logfile 2>&1 <<EOF
 #include <signal.h>
 static void sighandler(int sig){
     raise(SIGTERM);
@@ -1324,8 +1440,8 @@
     headers=$1
     type=$2
     shift 2
-    disable_safe "$type"
-    check_code cc "$headers" "$type v" "$@" && enable_safe "$type"
+    disable_sanitized "$type"
+    test_code cc "$headers" "$type v" "$@" && enable_sanitized "$type"
 }
 
 check_struct(){
@@ -1334,9 +1450,9 @@
     struct=$2
     member=$3
     shift 3
-    disable_safe "${struct}_${member}"
-    check_code cc "$headers" "const void *p = &(($struct *)0)->$member" "$@" &&
-        enable_safe "${struct}_${member}"
+    disable_sanitized "${struct}_${member}"
+    test_code cc "$headers" "const void *p = &(($struct *)0)->$member" "$@" &&
+        enable_sanitized "${struct}_${member}"
 }
 
 check_builtin(){
@@ -1346,7 +1462,7 @@
     builtin=$3
     shift 3
     disable "$name"
-    check_code ld "$headers" "$builtin" "cc" "$@" && enable "$name"
+    test_code ld "$headers" "$builtin" "cc" "$@" && enable "$name"
 }
 
 check_compile_assert(){
@@ -1356,17 +1472,29 @@
     condition=$3
     shift 3
     disable "$name"
-    check_code cc "$headers" "char c[2 * !!($condition) - 1]" "$@" && enable "$name"
+    test_code cc "$headers" "char c[2 * !!($condition) - 1]" "$@" && enable "$name"
+}
+
+check_cc(){
+    log check_cc "$@"
+    name=$1
+    shift
+    disable "$name"
+    test_code cc "$@" && enable "$name"
 }
 
 require(){
     log require "$@"
     name_version="$1"
     name="${1%% *}"
-    headers="$2"
-    func="$3"
-    shift 3
-    check_lib $name "$headers" $func "$@" || die "ERROR: $name_version not found"
+    shift
+    check_lib $name "$@" || die "ERROR: $name_version not found"
+}
+
+require_cc(){
+    log require_cc "$@"
+    name="$1"
+    check_cc "$@" || die "ERROR: $name failed"
 }
 
 require_cpp(){
@@ -1377,60 +1505,41 @@
     check_lib_cpp "$headers" "$classes" "$@" || die "ERROR: $name not found"
 }
 
-require_header(){
-    log require "$@"
-    header="$1"
-    shift
-    check_header "$header" "$@" || die "ERROR: $header header not found"
+require_headers(){
+    log require_headers "$@"
+    headers="$1"
+    check_headers "$@" || die "ERROR: $headers not found"
 }
 
 require_cpp_condition(){
-    log require "$@"
-    header="$1"
-    condition="$2"
-    shift 2
-    check_cpp_condition "$header" "$condition" "$@" || die "ERROR: $condition not satisfied"
-}
-
-use_pkg_config(){
-    log use_pkg_config "$@"
-    pkg="${2%% *}"
-    check_pkg_config "$@" || return 1
-    add_cflags    $(get_safe "${pkg}_cflags")
-    add_extralibs $(get_safe "${pkg}_extralibs")
+    log require_cpp_condition "$@"
+    condition="$3"
+    check_cpp_condition "$@" || die "ERROR: $condition not satisfied"
 }
 
 require_pkg_config(){
     log require_pkg_config "$@"
     pkg_version="$2"
-    use_pkg_config "$@" || die "ERROR: $pkg_version not found using pkg-config$pkg_config_fail_message"
+    check_pkg_config "$@" || die "ERROR: $pkg_version not found using pkg-config$pkg_config_fail_message"
 }
 
-hostcc_e(){
-    eval printf '%s\\n' $HOSTCC_E
-}
-
-hostcc_o(){
-    eval printf '%s\\n' $HOSTCC_O
-}
-
-check_host_cc(){
-    log check_host_cc "$@"
+test_host_cc(){
+    log test_host_cc "$@"
     cat > $TMPC
     log_file $TMPC
-    check_cmd $host_cc $host_cflags "$@" $HOSTCC_C $(hostcc_o $TMPO) $TMPC
+    test_cmd $host_cc $host_cflags "$@" $HOSTCC_C $(hostcc_o $TMPO) $TMPC
 }
 
-check_host_cpp(){
-    log check_host_cpp "$@"
+test_host_cpp(){
+    log test_host_cpp "$@"
     cat > $TMPC
     log_file $TMPC
-    check_cmd $host_cc $host_cppflags $host_cflags "$@" $(hostcc_e $TMPO) $TMPC
+    test_cmd $host_cc $host_cppflags $host_cflags "$@" $(hostcc_e $TMPO) $TMPC
 }
 
 check_host_cppflags(){
     log check_host_cppflags "$@"
-    check_host_cpp "$@" <<EOF && append host_cppflags "$@"
+    test_host_cpp "$@" <<EOF && append host_cppflags "$@"
 #include <stdlib.h>
 EOF
 }
@@ -1438,17 +1547,17 @@
 check_host_cflags(){
     log check_host_cflags "$@"
     set -- $($host_cflags_filter "$@")
-    check_host_cc "$@" <<EOF && append host_cflags "$@"
+    test_host_cc "$@" <<EOF && append host_cflags "$@"
 int x;
 EOF
 }
 
-check_host_cpp_condition(){
-    log check_host_cpp_condition "$@"
+test_host_cpp_condition(){
+    log test_host_cpp_condition "$@"
     header=$1
     condition=$2
     shift 2
-    check_host_cpp "$@" <<EOF
+    test_host_cpp "$@" <<EOF
 #include <$header>
 #if !($condition)
 #error "unsatisfied condition: $condition"
@@ -1456,6 +1565,14 @@
 EOF
 }
 
+check_host_cpp_condition(){
+    log check_host_cpp_condition "$@"
+    name=$1
+    shift 1
+    disable $name
+    test_host_cpp_condition "$@" && enable $name
+}
+
 cp_if_changed(){
     cmp -s "$1" "$2" && { test "$quiet" != "yes" && echo "$2 is unchanged"; } && return
     mkdir -p "$(dirname $2)"
@@ -1488,17 +1605,11 @@
     protocols
 "
 
-AVRESAMPLE_COMPONENTS=""
-
-AVUTIL_COMPONENTS=""
-
 COMPONENT_LIST="
     $AVCODEC_COMPONENTS
     $AVDEVICE_COMPONENTS
     $AVFILTER_COMPONENTS
     $AVFORMAT_COMPONENTS
-    $AVRESAMPLE_COMPONENTS
-    $AVUTIL_COMPONENTS
 "
 
 EXAMPLE_LIST="
@@ -1523,6 +1634,8 @@
     scaling_video_example
     transcode_aac_example
     transcoding_example
+    vaapi_encode_example
+    vaapi_transcode_example
 "
 
 EXTERNAL_AUTODETECT_LIBRARY_LIST="
@@ -1532,7 +1645,6 @@
     bzlib
     coreimage
     iconv
-    jack
     libxcb
     libxcb_shm
     libxcb_shape
@@ -1550,11 +1662,13 @@
     avisynth
     frei0r
     libcdio
+    libdavs2
     librubberband
     libvidstab
     libx264
     libx265
     libxavs
+    libxavs2
     libxvid
 "
 
@@ -1563,13 +1677,17 @@
     libndi_newtek
     libfdk_aac
     openssl
+    libtls
 "
 
 EXTERNAL_LIBRARY_VERSION3_LIST="
     gmp
+    liblensfun
     libopencore_amrnb
     libopencore_amrwb
+    libvmaf
     libvo_amrwbenc
+    mbedtls
     rkmpp
 "
 
@@ -1578,7 +1696,6 @@
 "
 
 EXTERNAL_LIBRARY_LIST="
-    $EXTERNAL_AUTODETECT_LIBRARY_LIST
     $EXTERNAL_LIBRARY_GPL_LIST
     $EXTERNAL_LIBRARY_NONFREE_LIST
     $EXTERNAL_LIBRARY_VERSION3_LIST
@@ -1588,11 +1705,13 @@
     gnutls
     jni
     ladspa
+    libaom
     libass
     libbluray
     libbs2b
     libcaca
     libcelt
+    libcodec2
     libdc1394
     libdrm
     libflite
@@ -1603,6 +1722,8 @@
     libgsm
     libiec61883
     libilbc
+    libjack
+    libklvanc
     libkvazaar
     libmodplug
     libmp3lame
@@ -1620,12 +1741,13 @@
     libsnappy
     libsoxr
     libspeex
+    libsrt
     libssh
+    libtensorflow
     libtesseract
     libtheora
     libtwolame
     libv4l2
-    libvmaf
     libvorbis
     libvpx
     libwavpack
@@ -1634,39 +1756,48 @@
     libzimg
     libzmq
     libzvbi
+    lv2
     mediacodec
     openal
-    opencl
     opengl
+    vapoursynth
 "
 
 HWACCEL_AUTODETECT_LIBRARY_LIST="
+    amf
     audiotoolbox
     crystalhd
     cuda
     cuvid
     d3d11va
     dxva2
+    ffnvcodec
+    nvdec
     nvenc
     vaapi
-    vda
     vdpau
     videotoolbox
     v4l2_m2m
     xvmc
 "
 
+# catchall list of things that require external libs to link
+EXTRALIBS_LIST="
+    cpu_init
+    cws2fws
+"
+
 HWACCEL_LIBRARY_NONFREE_LIST="
     cuda_sdk
     libnpp
 "
 
 HWACCEL_LIBRARY_LIST="
-    $HWACCEL_AUTODETECT_LIBRARY_LIST
     $HWACCEL_LIBRARY_NONFREE_LIST
     libmfx
     mmal
     omx
+    opencl
 "
 
 DOCUMENT_LIST="
@@ -1690,16 +1821,17 @@
     swscale_alpha
 "
 
+# this list should be kept in linking order
 LIBRARY_LIST="
-    avcodec
     avdevice
     avfilter
+    swscale
+    postproc
     avformat
+    avcodec
+    swresample
     avresample
     avutil
-    postproc
-    swresample
-    swscale
 "
 
 LICENSE_LIST="
@@ -1711,7 +1843,6 @@
 PROGRAM_LIST="
     ffplay
     ffprobe
-    ffserver
     ffmpeg
 "
 
@@ -1735,7 +1866,9 @@
     $DOCUMENT_LIST
     $EXAMPLE_LIST
     $EXTERNAL_LIBRARY_LIST
+    $EXTERNAL_AUTODETECT_LIBRARY_LIST
     $HWACCEL_LIBRARY_LIST
+    $HWACCEL_AUTODETECT_LIBRARY_LIST
     $FEATURE_LIST
     $LICENSE_LIST
     $LIBRARY_LIST
@@ -1834,6 +1967,7 @@
     amd3dnowext
     avx
     avx2
+    avx512
     fma3
     fma4
     mmx
@@ -1875,11 +2009,10 @@
     fast_64bit
     fast_clz
     fast_cmov
-    local_aligned_8
-    local_aligned_16
-    local_aligned_32
+    local_aligned
     simd_align_16
     simd_align_32
+    simd_align_64
 "
 
 BUILTIN_LIST="
@@ -1888,7 +2021,6 @@
     MemoryBarrier
     mm_empty
     rdtsc
-    sarestart
     sem_timedwait
     sync_val_compare_and_swap
 "
@@ -1904,13 +2036,11 @@
 "
 
 HEADERS_LIST="
-    altivec_h
     arpa_inet_h
     asm_types_h
     cdio_paranoia_h
     cdio_paranoia_paranoia_h
     cuda_h
-    d3d11_h
     dispatch_dispatch_h
     dev_bktr_ioctl_bt848_h
     dev_bktr_ioctl_meteor_h
@@ -1919,27 +2049,18 @@
     dev_video_meteor_ioctl_meteor_h
     direct_h
     dirent_h
-    dlfcn_h
     dxgidebug_h
     dxva_h
     ES2_gl_h
     gsm_h
     io_h
-    mach_mach_time_h
+    linux_perf_event_h
     machine_ioctl_bt848_h
     machine_ioctl_meteor_h
     malloc_h
     opencv2_core_core_c_h
-    openjpeg_2_3_openjpeg_h
-    openjpeg_2_2_openjpeg_h
-    openjpeg_2_1_openjpeg_h
-    openjpeg_2_0_openjpeg_h
-    openjpeg_1_5_openjpeg_h
     OpenGL_gl3_h
     poll_h
-    soundcard_h
-    stdatomic_h
-    sys_mman_h
     sys_param_h
     sys_resource_h
     sys_select_h
@@ -1996,6 +2117,16 @@
     truncf
 "
 
+SYSTEM_FEATURES="
+    dos_paths
+    libc_msvcrt
+    MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS
+    section_data_rel_ro
+    threads
+    uwp
+    winrt
+"
+
 SYSTEM_FUNCS="
     access
     aligned_malloc
@@ -2003,11 +2134,7 @@
     clock_gettime
     closesocket
     CommandLineToArgvW
-    CoTaskMemFree
-    CryptGenRandom
     fcntl
-    flt_lim
-    fork
     getaddrinfo
     gethrtime
     getopt
@@ -2022,9 +2149,7 @@
     gmtime_r
     inet_aton
     isatty
-    jack_port_get_latency_range
     kbhit
-    LoadLibrary
     localtime_r
     lstat
     lzo1x_999_compress
@@ -2039,6 +2164,7 @@
     posix_memalign
     pthread_cancel
     sched_getaffinity
+    SecItemImport
     SetConsoleTextAttribute
     SetConsoleCtrlHandler
     setmode
@@ -2053,14 +2179,20 @@
     wglGetProcAddress
 "
 
+SYSTEM_LIBRARIES="
+    bcrypt
+    vaapi_drm
+    vaapi_x11
+    vdpau_x11
+"
+
 TOOLCHAIN_FEATURES="
+    as_arch_directive
     as_dn_directive
     as_fpu_directive
     as_func
     as_object_arch
     asm_mod_q
-    attribute_may_alias
-    attribute_packed
     blocks_extension
     ebp_available
     ebx_available
@@ -2080,7 +2212,6 @@
 "
 
 TYPES_LIST="
-    CONDITION_VARIABLE_Ptr
     kCMVideoCodecType_HEVC
     socklen_t
     struct_addrinfo
@@ -2103,7 +2234,6 @@
     $(add_suffix _external $ARCH_EXT_LIST)
     $(add_suffix _inline   $ARCH_EXT_LIST)
     $ARCH_FEATURES
-    $ATOMICS_LIST
     $BUILTIN_LIST
     $COMPLEX_FUNCS
     $HAVE_LIST_CMDLINE
@@ -2111,38 +2241,44 @@
     $HEADERS_LIST
     $INTRINSICS_LIST
     $MATH_FUNCS
+    $SYSTEM_FEATURES
     $SYSTEM_FUNCS
+    $SYSTEM_LIBRARIES
     $THREADS_LIST
     $TOOLCHAIN_FEATURES
     $TYPES_LIST
-    atomics_native
-    dos_paths
-    libc_msvcrt
     makeinfo
     makeinfo_html
-    MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS
+    opencl_d3d11
+    opencl_drm_arm
+    opencl_drm_beignet
+    opencl_dxva2
+    opencl_vaapi_beignet
+    opencl_vaapi_intel_media
     perl
     pod2man
-    section_data_rel_ro
     texi2html
-    threads
-    uwp
-    vaapi_drm
-    vaapi_x11
-    vdpau_x11
-    winrt
 "
 
 # options emitted with CONFIG_ prefix but not available on the command line
 CONFIG_EXTRA="
     aandcttables
     ac3dsp
+    adts_header
     audio_frame_queue
     audiodsp
     blockdsp
     bswapdsp
     cabac
+    cbs
+    cbs_av1
+    cbs_h264
+    cbs_h265
+    cbs_jpeg
+    cbs_mpeg2
+    cbs_vp9
     dirac_parse
+    dnn
     dvprofile
     exif
     faandct
@@ -2192,6 +2328,7 @@
     qsv
     qsvdec
     qsvenc
+    qsvvpp
     rangecoder
     riffdec
     riffenc
@@ -2277,6 +2414,7 @@
     malloc_prefix
     nm
     optflags
+    nvcc
     nvccflags
     pkg_config
     pkg_config_flags
@@ -2321,19 +2459,6 @@
 
 map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM
 
-loongson2_deps="mips"
-loongson3_deps="mips"
-mipsfpu_deps="mips"
-mipsdsp_deps="mips"
-mipsdspr2_deps="mips"
-mips32r2_deps="mips"
-mips32r5_deps="mips"
-mips32r6_deps="mips"
-mips64r2_deps="mips"
-mips64r6_deps="mips"
-msa_deps="mipsfpu"
-mmi_deps="mips"
-
 altivec_deps="ppc"
 dcbzl_deps="ppc"
 ldbrx_deps="ppc"
@@ -2341,6 +2466,19 @@
 vsx_deps="altivec"
 power8_deps="vsx"
 
+loongson2_deps="mips"
+loongson3_deps="mips"
+mips32r2_deps="mips"
+mips32r5_deps="mips"
+mips32r6_deps="mips"
+mips64r2_deps="mips"
+mips64r6_deps="mips"
+mipsfpu_deps="mips"
+mipsdsp_deps="mips"
+mipsdspr2_deps="mips"
+mmi_deps="mips"
+msa_deps="mipsfpu"
+
 cpunop_deps="i686"
 x86_64_select="i686"
 x86_64_suggest="fast_cmov"
@@ -2362,9 +2500,10 @@
 fma3_deps="avx"
 fma4_deps="avx"
 avx2_deps="avx"
+avx512_deps="avx2"
 
 mmx_external_deps="x86asm"
-mmx_inline_deps="inline_asm"
+mmx_inline_deps="inline_asm x86"
 mmx_suggest="mmx_external mmx_inline"
 
 for ext in $(filter_out mmx $ARCH_EXT_LIST_X86_SIMD); do
@@ -2380,8 +2519,10 @@
 fast_unaligned_if_any="aarch64 ppc x86"
 simd_align_16_if_any="altivec neon sse"
 simd_align_32_if_any="avx"
+simd_align_64_if_any="avx512"
 
 # system capabilities
+linux_perf_deps="linux_perf_event_h"
 symver_if_any="symver_asm_label symver_gnu_asm"
 valgrind_backtrace_conflict="optimizations"
 valgrind_backtrace_deps="valgrind_valgrind_h"
@@ -2395,11 +2536,20 @@
 threads_if_any="$THREADS_LIST"
 
 # subsystems
+cbs_av1_select="cbs"
+cbs_h264_select="cbs golomb"
+cbs_h265_select="cbs golomb"
+cbs_jpeg_select="cbs"
+cbs_mpeg2_select="cbs"
+cbs_vp9_select="cbs"
 dct_select="rdft"
 dirac_parse_select="golomb"
+dnn_suggest="libtensorflow"
 error_resilience_select="me_cmp"
-faandct_deps="faan fdctdsp"
-faanidct_deps="faan idctdsp"
+faandct_deps="faan"
+faandct_select="fdctdsp"
+faanidct_deps="faan"
+faanidct_select="idctdsp"
 h264dsp_select="startcode"
 hevcparse_select="golomb"
 frame_thread_encoder_deps="encoders threads"
@@ -2411,13 +2561,13 @@
 mpegaudio_select="mpegaudiodsp mpegaudioheader"
 mpegaudiodsp_select="dct"
 mpegvideo_select="blockdsp h264chroma hpeldsp idctdsp me_cmp mpeg_er videodsp"
-mpegvideoenc_select="me_cmp mpegvideo pixblockdsp qpeldsp"
+mpegvideoenc_select="aandcttables me_cmp mpegvideo pixblockdsp qpeldsp"
 vc1dsp_select="h264chroma qpeldsp startcode"
 rdft_select="fft"
 
 # decoders / encoders
-aac_decoder_select="mdct15 mdct sinewin"
-aac_fixed_decoder_select="mdct sinewin"
+aac_decoder_select="adts_header mdct15 mdct sinewin"
+aac_fixed_decoder_select="adts_header mdct sinewin"
 aac_encoder_select="audio_frame_queue iirfilter lpc mdct sinewin"
 aac_latm_decoder_select="aac_decoder aac_latm_parser"
 ac3_decoder_select="ac3_parser ac3dsp bswapdsp fmtconvert mdct"
@@ -2432,17 +2582,23 @@
 amrnb_decoder_select="lsp"
 amrwb_decoder_select="lsp"
 amv_decoder_select="sp5x_decoder exif"
-amv_encoder_select="aandcttables jpegtables mpegvideoenc"
+amv_encoder_select="jpegtables mpegvideoenc"
 ape_decoder_select="bswapdsp llauddsp"
-apng_decoder_select="zlib"
-apng_encoder_select="llvidencdsp zlib"
+apng_decoder_deps="zlib"
+apng_encoder_deps="zlib"
+apng_encoder_select="llvidencdsp"
+aptx_decoder_select="audio_frame_queue"
+aptx_encoder_select="audio_frame_queue"
+aptx_hd_decoder_select="audio_frame_queue"
+aptx_hd_encoder_select="audio_frame_queue"
 asv1_decoder_select="blockdsp bswapdsp idctdsp"
-asv1_encoder_select="bswapdsp fdctdsp pixblockdsp"
+asv1_encoder_select="aandcttables bswapdsp fdctdsp pixblockdsp"
 asv2_decoder_select="blockdsp bswapdsp idctdsp"
-asv2_encoder_select="bswapdsp fdctdsp pixblockdsp"
+asv2_encoder_select="aandcttables bswapdsp fdctdsp pixblockdsp"
 atrac1_decoder_select="mdct sinewin"
 atrac3_decoder_select="mdct"
 atrac3p_decoder_select="mdct sinewin"
+atrac9_decoder_select="mdct"
 avrn_decoder_select="exif jpegtables"
 bink_decoder_select="blockdsp hpeldsp"
 binkaudio_dct_decoder_select="mdct rdft dct sinewin wma_freqs"
@@ -2458,18 +2614,18 @@
 dds_decoder_select="texturedsp"
 dirac_decoder_select="dirac_parse dwt golomb videodsp mpegvideoenc"
 dnxhd_decoder_select="blockdsp idctdsp"
-dnxhd_encoder_select="aandcttables blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp"
+dnxhd_encoder_select="blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp"
 dolby_e_decoder_select="mdct"
 dvvideo_decoder_select="dvprofile idctdsp"
 dvvideo_encoder_select="dvprofile fdctdsp me_cmp pixblockdsp"
-dxa_decoder_select="zlib"
+dxa_decoder_deps="zlib"
 dxv_decoder_select="lzf texturedsp"
 eac3_decoder_select="ac3_decoder"
 eac3_encoder_select="ac3_encoder"
 eamad_decoder_select="aandcttables blockdsp bswapdsp idctdsp mpegvideo"
 eatgq_decoder_select="aandcttables"
 eatqi_decoder_select="aandcttables blockdsp bswapdsp idctdsp"
-exr_decoder_select="zlib"
+exr_decoder_deps="zlib"
 ffv1_decoder_select="rangecoder"
 ffv1_encoder_select="rangecoder"
 ffvhuff_decoder_select="huffyuv_decoder"
@@ -2477,20 +2633,21 @@
 fic_decoder_select="golomb"
 flac_decoder_select="flacdsp"
 flac_encoder_select="bswapdsp flacdsp lpc"
-flashsv2_decoder_select="zlib"
-flashsv2_encoder_select="zlib"
-flashsv_decoder_select="zlib"
-flashsv_encoder_select="zlib"
+flashsv2_decoder_deps="zlib"
+flashsv2_encoder_deps="zlib"
+flashsv_decoder_deps="zlib"
+flashsv_encoder_deps="zlib"
 flv_decoder_select="h263_decoder"
 flv_encoder_select="h263_encoder"
 fourxm_decoder_select="blockdsp bswapdsp"
 fraps_decoder_select="bswapdsp huffman"
-g2m_decoder_select="blockdsp idctdsp jpegtables zlib"
+g2m_decoder_deps="zlib"
+g2m_decoder_select="blockdsp idctdsp jpegtables"
 g729_decoder_select="audiodsp"
 h261_decoder_select="mpegvideo"
-h261_encoder_select="aandcttables mpegvideoenc"
+h261_encoder_select="mpegvideoenc"
 h263_decoder_select="h263_parser h263dsp mpegvideo qpeldsp"
-h263_encoder_select="aandcttables h263dsp mpegvideoenc"
+h263_encoder_select="h263dsp mpegvideoenc"
 h263i_decoder_select="h263_decoder"
 h263p_decoder_select="h263_decoder"
 h263p_encoder_select="h263_encoder"
@@ -2511,13 +2668,14 @@
 jpegls_decoder_select="mjpeg_decoder"
 jv_decoder_select="blockdsp"
 lagarith_decoder_select="llviddsp"
-ljpeg_encoder_select="aandcttables idctdsp jpegtables mpegvideoenc"
+ljpeg_encoder_select="idctdsp jpegtables mpegvideoenc"
 magicyuv_decoder_select="llviddsp"
+magicyuv_encoder_select="llvidencdsp"
 mdec_decoder_select="blockdsp idctdsp mpegvideo"
 metasound_decoder_select="lsp mdct sinewin"
 mimic_decoder_select="blockdsp bswapdsp hpeldsp idctdsp"
 mjpeg_decoder_select="blockdsp hpeldsp exif idctdsp jpegtables"
-mjpeg_encoder_select="aandcttables jpegtables mpegvideoenc"
+mjpeg_encoder_select="jpegtables mpegvideoenc"
 mjpegb_decoder_select="mjpeg_decoder"
 mlp_decoder_select="mlp_parser"
 mlp_encoder_select="lpc"
@@ -2534,17 +2692,15 @@
 mp3on4float_decoder_select="mpegaudio"
 mpc7_decoder_select="bswapdsp mpegaudiodsp"
 mpc8_decoder_select="mpegaudiodsp"
-mpeg_xvmc_decoder_deps="X11_extensions_XvMClib_h"
-mpeg_xvmc_decoder_select="mpeg2video_decoder"
 mpegvideo_decoder_select="mpegvideo"
 mpeg1video_decoder_select="mpegvideo"
-mpeg1video_encoder_select="aandcttables mpegvideoenc h263dsp"
+mpeg1video_encoder_select="mpegvideoenc h263dsp"
 mpeg2video_decoder_select="mpegvideo"
-mpeg2video_encoder_select="aandcttables mpegvideoenc h263dsp"
+mpeg2video_encoder_select="mpegvideoenc h263dsp"
 mpeg4_decoder_select="h263_decoder mpeg4video_parser"
 mpeg4_encoder_select="h263_encoder"
 msa1_decoder_select="mss34dsp"
-mscc_decoder_select="zlib"
+mscc_decoder_deps="zlib"
 msmpeg4v1_decoder_select="h263_decoder"
 msmpeg4v2_decoder_select="h263_decoder"
 msmpeg4v2_encoder_select="h263_encoder"
@@ -2552,6 +2708,7 @@
 msmpeg4v3_encoder_select="h263_encoder"
 mss2_decoder_select="mpegvideo qpeldsp vc1_decoder"
 mts2_decoder_select="mss34dsp"
+mwsc_decoder_deps="zlib"
 mxpeg_decoder_select="mjpeg_decoder"
 nellymoser_decoder_select="mdct sinewin"
 nellymoser_encoder_select="audio_frame_queue mdct sinewin"
@@ -2560,8 +2717,9 @@
 opus_decoder_deps="swresample"
 opus_decoder_select="mdct15"
 opus_encoder_select="audio_frame_queue mdct15"
-png_decoder_select="zlib"
-png_encoder_select="llvidencdsp zlib"
+png_decoder_deps="zlib"
+png_encoder_deps="zlib"
+png_encoder_select="llvidencdsp"
 prores_decoder_select="blockdsp idctdsp"
 prores_encoder_select="fdctdsp"
 qcelp_decoder_select="lsp"
@@ -2569,8 +2727,9 @@
 ra_144_decoder_select="audiodsp"
 ra_144_encoder_select="audio_frame_queue lpc audiodsp"
 ralf_decoder_select="golomb"
+rasc_decoder_deps="zlib"
 rawvideo_decoder_select="bswapdsp"
-rscc_decoder_select="zlib"
+rscc_decoder_deps="zlib"
 rtjpeg_decoder_select="me_cmp"
 rv10_decoder_select="h263_decoder"
 rv10_encoder_select="h263_encoder"
@@ -2578,22 +2737,24 @@
 rv20_encoder_select="h263_encoder"
 rv30_decoder_select="golomb h264pred h264qpel mpegvideo rv34dsp"
 rv40_decoder_select="golomb h264pred h264qpel mpegvideo rv34dsp"
-screenpresso_decoder_select="zlib"
+screenpresso_decoder_deps="zlib"
 shorten_decoder_select="bswapdsp"
 sipr_decoder_select="lsp"
 snow_decoder_select="dwt h264qpel hpeldsp me_cmp rangecoder videodsp"
-snow_encoder_select="aandcttables dwt h264qpel hpeldsp me_cmp mpegvideoenc rangecoder"
+snow_encoder_select="dwt h264qpel hpeldsp me_cmp mpegvideoenc rangecoder"
 sonic_decoder_select="golomb rangecoder"
 sonic_encoder_select="golomb rangecoder"
 sonic_ls_encoder_select="golomb rangecoder"
 sp5x_decoder_select="mjpeg_decoder"
-srgc_decoder_select="zlib"
+speedhq_decoder_select="mpegvideo"
+srgc_decoder_deps="zlib"
 svq1_decoder_select="hpeldsp"
-svq1_encoder_select="aandcttables hpeldsp me_cmp mpegvideoenc"
+svq1_encoder_select="hpeldsp me_cmp mpegvideoenc"
 svq3_decoder_select="golomb h264dsp h264parse h264pred hpeldsp tpeldsp videodsp"
 svq3_decoder_suggest="zlib"
 tak_decoder_select="audiodsp"
-tdsc_decoder_select="zlib mjpeg_decoder"
+tdsc_decoder_deps="zlib"
+tdsc_decoder_select="mjpeg_decoder"
 theora_decoder_select="vp3_decoder"
 thp_decoder_select="mjpeg_decoder"
 tiff_decoder_suggest="zlib lzma"
@@ -2602,18 +2763,16 @@
 truehd_encoder_select="lpc"
 truemotion2_decoder_select="bswapdsp"
 truespeech_decoder_select="bswapdsp"
-tscc_decoder_select="zlib"
+tscc_decoder_deps="zlib"
 twinvq_decoder_select="mdct lsp sinewin"
 txd_decoder_select="texturedsp"
 utvideo_decoder_select="bswapdsp llviddsp"
 utvideo_encoder_select="bswapdsp huffman llvidencdsp"
 vble_decoder_select="llviddsp"
 vc1_decoder_select="blockdsp h263_decoder h264qpel intrax8 mpegvideo vc1dsp"
-vc1_qsv_decoder_deps="libmfx"
-vc1_qsv_decoder_select="qsvdec vc1_qsv_hwaccel vc1_parser"
 vc1image_decoder_select="vc1_decoder"
 vorbis_decoder_select="mdct"
-vorbis_encoder_select="mdct"
+vorbis_encoder_select="audio_frame_queue mdct"
 vp3_decoder_select="hpeldsp vp3dsp videodsp"
 vp5_decoder_select="h264chroma hpeldsp videodsp vp3dsp vp56dsp"
 vp6_decoder_select="h264chroma hpeldsp huffman videodsp vp3dsp vp56dsp"
@@ -2621,7 +2780,8 @@
 vp6f_decoder_select="vp6_decoder"
 vp7_decoder_select="h264pred videodsp vp8dsp"
 vp8_decoder_select="h264pred videodsp vp8dsp"
-vp9_decoder_select="videodsp vp9_parser"
+vp9_decoder_select="videodsp vp9_parser vp9_superframe_split_bsf"
+wcmv_decoder_deps="zlib"
 webp_decoder_select="vp8_decoder exif"
 wmalossless_decoder_select="llauddsp"
 wmapro_decoder_select="mdct sinewin wma_freqs"
@@ -2638,23 +2798,20 @@
 wmv3image_decoder_select="wmv3_decoder"
 xma1_decoder_select="wmapro_decoder"
 xma2_decoder_select="wmapro_decoder"
-zerocodec_decoder_select="zlib"
-zlib_decoder_select="zlib"
-zlib_encoder_select="zlib"
-zmbv_decoder_select="zlib"
-zmbv_encoder_select="zlib"
+zerocodec_decoder_deps="zlib"
+zlib_decoder_deps="zlib"
+zlib_encoder_deps="zlib"
+zmbv_decoder_deps="zlib"
+zmbv_encoder_deps="zlib"
 
 # hardware accelerators
 crystalhd_deps="libcrystalhd_libcrystalhd_if_h"
-cuda_deps_any="libdl LoadLibrary"
-cuvid_deps="cuda"
-d3d11va_deps="d3d11_h dxva_h ID3D11VideoDecoder ID3D11VideoContext"
-dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32"
-dxva2_extralibs="-luser32"
-vda_framework_deps="VideoDecodeAcceleration_VDADecoder_h blocks_extension"
-vda_framework_extralibs="-framework VideoDecodeAcceleration"
-vda_deps="vda_framework pthreads"
-vda_extralibs="-framework CoreFoundation -framework QuartzCore"
+cuda_deps="ffnvcodec"
+cuvid_deps="ffnvcodec"
+d3d11va_deps="dxva_h ID3D11VideoDecoder ID3D11VideoContext"
+dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32"
+ffnvcodec_deps_any="libdl LoadLibrary"
+nvdec_deps="ffnvcodec"
 videotoolbox_hwaccel_deps="videotoolbox pthreads"
 videotoolbox_hwaccel_extralibs="-framework QuartzCore"
 xvmc_deps="X11_extensions_XvMClib_h"
@@ -2663,67 +2820,56 @@
 h263_vaapi_hwaccel_select="h263_decoder"
 h263_videotoolbox_hwaccel_deps="videotoolbox"
 h263_videotoolbox_hwaccel_select="h263_decoder"
-h264_cuvid_hwaccel_deps="cuda cuvid"
-h264_cuvid_hwaccel_select="h264_cuvid_decoder"
 h264_d3d11va_hwaccel_deps="d3d11va"
 h264_d3d11va_hwaccel_select="h264_decoder"
 h264_d3d11va2_hwaccel_deps="d3d11va"
 h264_d3d11va2_hwaccel_select="h264_decoder"
 h264_dxva2_hwaccel_deps="dxva2"
 h264_dxva2_hwaccel_select="h264_decoder"
-h264_mediacodec_hwaccel_deps="mediacodec"
-h264_mmal_hwaccel_deps="mmal"
-h264_qsv_hwaccel_deps="libmfx"
+h264_nvdec_hwaccel_deps="nvdec"
+h264_nvdec_hwaccel_select="h264_decoder"
 h264_vaapi_hwaccel_deps="vaapi"
 h264_vaapi_hwaccel_select="h264_decoder"
-h264_vda_hwaccel_deps="vda"
-h264_vda_hwaccel_select="h264_decoder"
-h264_vda_old_hwaccel_deps="vda"
-h264_vda_old_hwaccel_select="h264_decoder"
 h264_vdpau_hwaccel_deps="vdpau"
 h264_vdpau_hwaccel_select="h264_decoder"
 h264_videotoolbox_hwaccel_deps="videotoolbox"
 h264_videotoolbox_hwaccel_select="h264_decoder"
-hevc_cuvid_hwaccel_deps="cuda cuvid"
-hevc_cuvid_hwaccel_select="hevc_cuvid_decoder"
 hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
 hevc_d3d11va_hwaccel_select="hevc_decoder"
-hevc_mediacodec_hwaccel_deps="mediacodec"
 hevc_d3d11va2_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
 hevc_d3d11va2_hwaccel_select="hevc_decoder"
 hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_HEVC"
 hevc_dxva2_hwaccel_select="hevc_decoder"
-hevc_qsv_hwaccel_deps="libmfx"
+hevc_nvdec_hwaccel_deps="nvdec"
+hevc_nvdec_hwaccel_select="hevc_decoder"
 hevc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferHEVC"
 hevc_vaapi_hwaccel_select="hevc_decoder"
 hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC"
 hevc_vdpau_hwaccel_select="hevc_decoder"
 hevc_videotoolbox_hwaccel_deps="videotoolbox"
 hevc_videotoolbox_hwaccel_select="hevc_decoder"
-mjpeg_cuvid_hwaccel_deps="cuda cuvid"
-mjpeg_cuvid_hwaccel_select="mjpeg_cuvid_decoder"
+mjpeg_nvdec_hwaccel_deps="nvdec"
+mjpeg_nvdec_hwaccel_select="mjpeg_decoder"
+mjpeg_vaapi_hwaccel_deps="vaapi"
+mjpeg_vaapi_hwaccel_select="mjpeg_decoder"
 mpeg_xvmc_hwaccel_deps="xvmc"
 mpeg_xvmc_hwaccel_select="mpeg2video_decoder"
-mpeg1_cuvid_hwaccel_deps="cuda cuvid"
-mpeg1_cuvid_hwaccel_select="mpeg1_cuvid_decoder"
+mpeg1_nvdec_hwaccel_deps="nvdec"
+mpeg1_nvdec_hwaccel_select="mpeg1video_decoder"
 mpeg1_vdpau_hwaccel_deps="vdpau"
 mpeg1_vdpau_hwaccel_select="mpeg1video_decoder"
 mpeg1_videotoolbox_hwaccel_deps="videotoolbox"
 mpeg1_videotoolbox_hwaccel_select="mpeg1video_decoder"
 mpeg1_xvmc_hwaccel_deps="xvmc"
 mpeg1_xvmc_hwaccel_select="mpeg1video_decoder"
-mpeg2_cuvid_hwaccel_deps="cuda cuvid"
-mpeg2_cuvid_hwaccel_select="mpeg2_cuvid_decoder"
 mpeg2_d3d11va_hwaccel_deps="d3d11va"
 mpeg2_d3d11va_hwaccel_select="mpeg2video_decoder"
 mpeg2_d3d11va2_hwaccel_deps="d3d11va"
 mpeg2_d3d11va2_hwaccel_select="mpeg2video_decoder"
 mpeg2_dxva2_hwaccel_deps="dxva2"
 mpeg2_dxva2_hwaccel_select="mpeg2video_decoder"
-mpeg2_mediacodec_hwaccel_deps="mediacodec"
-mpeg2_mmal_hwaccel_deps="mmal"
-mpeg2_qsv_hwaccel_deps="libmfx"
-mpeg2_qsv_hwaccel_select="qsvdec_mpeg2"
+mpeg2_nvdec_hwaccel_deps="nvdec"
+mpeg2_nvdec_hwaccel_select="mpeg2video_decoder"
 mpeg2_vaapi_hwaccel_deps="vaapi"
 mpeg2_vaapi_hwaccel_select="mpeg2video_decoder"
 mpeg2_vdpau_hwaccel_deps="vdpau"
@@ -2732,165 +2878,149 @@
 mpeg2_videotoolbox_hwaccel_select="mpeg2video_decoder"
 mpeg2_xvmc_hwaccel_deps="xvmc"
 mpeg2_xvmc_hwaccel_select="mpeg2video_decoder"
-mpeg4_cuvid_hwaccel_deps="cuda cuvid"
-mpeg4_cuvid_hwaccel_select="mpeg4_cuvid_decoder"
-mpeg4_mediacodec_hwaccel_deps="mediacodec"
-mpeg4_mmal_hwaccel_deps="mmal"
+mpeg4_nvdec_hwaccel_deps="nvdec"
+mpeg4_nvdec_hwaccel_select="mpeg4_decoder"
 mpeg4_vaapi_hwaccel_deps="vaapi"
 mpeg4_vaapi_hwaccel_select="mpeg4_decoder"
 mpeg4_vdpau_hwaccel_deps="vdpau"
 mpeg4_vdpau_hwaccel_select="mpeg4_decoder"
 mpeg4_videotoolbox_hwaccel_deps="videotoolbox"
 mpeg4_videotoolbox_hwaccel_select="mpeg4_decoder"
-vc1_cuvid_hwaccel_deps="cuda cuvid"
-vc1_cuvid_hwaccel_select="vc1_cuvid_decoder"
 vc1_d3d11va_hwaccel_deps="d3d11va"
 vc1_d3d11va_hwaccel_select="vc1_decoder"
 vc1_d3d11va2_hwaccel_deps="d3d11va"
 vc1_d3d11va2_hwaccel_select="vc1_decoder"
 vc1_dxva2_hwaccel_deps="dxva2"
 vc1_dxva2_hwaccel_select="vc1_decoder"
-vc1_mmal_hwaccel_deps="mmal"
-vc1_qsv_hwaccel_deps="libmfx"
-vc1_qsv_hwaccel_select="qsvdec_vc1"
+vc1_nvdec_hwaccel_deps="nvdec"
+vc1_nvdec_hwaccel_select="vc1_decoder"
 vc1_vaapi_hwaccel_deps="vaapi"
 vc1_vaapi_hwaccel_select="vc1_decoder"
 vc1_vdpau_hwaccel_deps="vdpau"
 vc1_vdpau_hwaccel_select="vc1_decoder"
-vp8_cuvid_hwaccel_deps="cuda cuvid"
-vp8_cuvid_hwaccel_select="vp8_cuvid_decoder"
-vp9_cuvid_hwaccel_deps="cuda cuvid"
-vp9_cuvid_hwaccel_select="vp9_cuvid_decoder"
-vp8_mediacodec_hwaccel_deps="mediacodec"
-vp8_qsv_hwaccel_deps="libmfx"
+vp8_nvdec_hwaccel_deps="nvdec"
+vp8_nvdec_hwaccel_select="vp8_decoder"
+vp8_vaapi_hwaccel_deps="vaapi"
+vp8_vaapi_hwaccel_select="vp8_decoder"
 vp9_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_VP9"
 vp9_d3d11va_hwaccel_select="vp9_decoder"
 vp9_d3d11va2_hwaccel_deps="d3d11va DXVA_PicParams_VP9"
 vp9_d3d11va2_hwaccel_select="vp9_decoder"
 vp9_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_VP9"
 vp9_dxva2_hwaccel_select="vp9_decoder"
-vp9_mediacodec_hwaccel_deps="mediacodec"
+vp9_nvdec_hwaccel_deps="nvdec"
+vp9_nvdec_hwaccel_select="vp9_decoder"
 vp9_vaapi_hwaccel_deps="vaapi VADecPictureParameterBufferVP9_bit_depth"
 vp9_vaapi_hwaccel_select="vp9_decoder"
 wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel"
 wmv3_d3d11va2_hwaccel_select="vc1_d3d11va2_hwaccel"
 wmv3_dxva2_hwaccel_select="vc1_dxva2_hwaccel"
+wmv3_nvdec_hwaccel_select="vc1_nvdec_hwaccel"
 wmv3_vaapi_hwaccel_select="vc1_vaapi_hwaccel"
 wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel"
 
 # hardware-accelerated codecs
 omx_deps="libdl pthreads"
 omx_rpi_select="omx"
+qsv_deps="libmfx"
 qsvdec_select="qsv"
 qsvenc_select="qsv"
+qsvvpp_select="qsv"
 vaapi_encode_deps="vaapi"
-v4l2_m2m_deps_any="linux_videodev2_h"
+v4l2_m2m_deps="linux_videodev2_h sem_timedwait"
 
-hwupload_cuda_filter_deps="cuda"
-scale_npp_filter_deps="cuda libnpp"
+hwupload_cuda_filter_deps="ffnvcodec"
+scale_npp_filter_deps="ffnvcodec libnpp"
 scale_cuda_filter_deps="cuda_sdk"
 thumbnail_cuda_filter_deps="cuda_sdk"
+transpose_npp_filter_deps="ffnvcodec libnpp"
 
-nvenc_deps="cuda"
+amf_deps_any="libdl LoadLibrary"
+nvenc_deps="ffnvcodec"
 nvenc_deps_any="libdl LoadLibrary"
 nvenc_encoder_deps="nvenc"
 
 h263_v4l2m2m_decoder_deps="v4l2_m2m h263_v4l2_m2m"
 h263_v4l2m2m_encoder_deps="v4l2_m2m h263_v4l2_m2m"
+h264_amf_encoder_deps="amf"
 h264_crystalhd_decoder_select="crystalhd h264_mp4toannexb_bsf h264_parser"
-h264_cuvid_decoder_deps="cuda cuvid"
+h264_cuvid_decoder_deps="cuvid"
 h264_cuvid_decoder_select="h264_mp4toannexb_bsf"
 h264_mediacodec_decoder_deps="mediacodec"
 h264_mediacodec_decoder_select="h264_mp4toannexb_bsf h264_parser"
 h264_mmal_decoder_deps="mmal"
 h264_nvenc_encoder_deps="nvenc"
 h264_omx_encoder_deps="omx"
-h264_qsv_decoder_deps="libmfx"
-h264_qsv_decoder_select="h264_mp4toannexb_bsf h264_parser qsvdec h264_qsv_hwaccel"
-h264_qsv_encoder_deps="libmfx"
+h264_qsv_decoder_select="h264_mp4toannexb_bsf h264_parser qsvdec"
 h264_qsv_encoder_select="qsvenc"
 h264_rkmpp_decoder_deps="rkmpp"
 h264_rkmpp_decoder_select="h264_mp4toannexb_bsf"
-h264_vaapi_encoder_deps="VAEncPictureParameterBufferH264"
-h264_vaapi_encoder_select="vaapi_encode golomb"
-h264_vda_decoder_deps="vda"
-h264_vda_decoder_select="h264_decoder"
-h264_vdpau_decoder_deps="vdpau"
-h264_vdpau_decoder_select="h264_decoder"
+h264_vaapi_encoder_select="cbs_h264 vaapi_encode"
 h264_v4l2m2m_decoder_deps="v4l2_m2m h264_v4l2_m2m"
 h264_v4l2m2m_encoder_deps="v4l2_m2m h264_v4l2_m2m"
-hevc_cuvid_decoder_deps="cuda cuvid"
+hevc_amf_encoder_deps="amf"
+hevc_cuvid_decoder_deps="cuvid"
 hevc_cuvid_decoder_select="hevc_mp4toannexb_bsf"
 hevc_mediacodec_decoder_deps="mediacodec"
 hevc_mediacodec_decoder_select="hevc_mp4toannexb_bsf hevc_parser"
 hevc_nvenc_encoder_deps="nvenc"
-hevc_qsv_decoder_deps="libmfx"
-hevc_qsv_decoder_select="hevc_mp4toannexb_bsf hevc_parser qsvdec hevc_qsv_hwaccel"
-hevc_qsv_encoder_deps="libmfx"
+hevc_qsv_decoder_select="hevc_mp4toannexb_bsf hevc_parser qsvdec"
 hevc_qsv_encoder_select="hevcparse qsvenc"
 hevc_rkmpp_decoder_deps="rkmpp"
 hevc_rkmpp_decoder_select="hevc_mp4toannexb_bsf"
 hevc_vaapi_encoder_deps="VAEncPictureParameterBufferHEVC"
-hevc_vaapi_encoder_select="vaapi_encode golomb"
+hevc_vaapi_encoder_select="cbs_h265 vaapi_encode"
 hevc_v4l2m2m_decoder_deps="v4l2_m2m hevc_v4l2_m2m"
 hevc_v4l2m2m_encoder_deps="v4l2_m2m hevc_v4l2_m2m"
-mjpeg_cuvid_decoder_deps="cuda cuvid"
+mjpeg_cuvid_decoder_deps="cuvid"
+mjpeg_qsv_encoder_deps="libmfx"
+mjpeg_qsv_encoder_select="qsvenc"
 mjpeg_vaapi_encoder_deps="VAEncPictureParameterBufferJPEG"
-mjpeg_vaapi_encoder_select="vaapi_encode jpegtables"
-mpeg1_cuvid_decoder_deps="cuda cuvid"
-mpeg1_vdpau_decoder_deps="vdpau"
-mpeg1_vdpau_decoder_select="mpeg1video_decoder"
+mjpeg_vaapi_encoder_select="cbs_jpeg jpegtables vaapi_encode"
+mpeg1_cuvid_decoder_deps="cuvid"
 mpeg1_v4l2m2m_decoder_deps="v4l2_m2m mpeg1_v4l2_m2m"
 mpeg2_crystalhd_decoder_select="crystalhd"
-mpeg2_cuvid_decoder_deps="cuda cuvid"
+mpeg2_cuvid_decoder_deps="cuvid"
 mpeg2_mmal_decoder_deps="mmal"
 mpeg2_mediacodec_decoder_deps="mediacodec"
-mpeg2_qsv_decoder_deps="libmfx"
-mpeg2_qsv_decoder_select="qsvdec mpeg2_qsv_hwaccel"
-mpeg2_qsv_encoder_deps="libmfx"
+mpeg2_qsv_decoder_select="qsvdec mpegvideo_parser"
 mpeg2_qsv_encoder_select="qsvenc"
-mpeg2_vaapi_encoder_deps="VAEncPictureParameterBufferMPEG2"
-mpeg2_vaapi_encoder_select="vaapi_encode"
+mpeg2_vaapi_encoder_select="cbs_mpeg2 vaapi_encode"
 mpeg2_v4l2m2m_decoder_deps="v4l2_m2m mpeg2_v4l2_m2m"
 mpeg4_crystalhd_decoder_select="crystalhd"
-mpeg4_cuvid_decoder_deps="cuda cuvid"
+mpeg4_cuvid_decoder_deps="cuvid"
 mpeg4_mediacodec_decoder_deps="mediacodec"
 mpeg4_mmal_decoder_deps="mmal"
 mpeg4_omx_encoder_deps="omx"
-mpeg4_vdpau_decoder_deps="vdpau"
-mpeg4_vdpau_decoder_select="mpeg4_decoder"
 mpeg4_v4l2m2m_decoder_deps="v4l2_m2m mpeg4_v4l2_m2m"
 mpeg4_v4l2m2m_encoder_deps="v4l2_m2m mpeg4_v4l2_m2m"
-mpeg_vdpau_decoder_deps="vdpau"
-mpeg_vdpau_decoder_select="mpeg2video_decoder"
 msmpeg4_crystalhd_decoder_select="crystalhd"
 nvenc_h264_encoder_select="h264_nvenc_encoder"
 nvenc_hevc_encoder_select="hevc_nvenc_encoder"
 vc1_crystalhd_decoder_select="crystalhd"
-vc1_cuvid_decoder_deps="cuda cuvid"
+vc1_cuvid_decoder_deps="cuvid"
 vc1_mmal_decoder_deps="mmal"
-vc1_vdpau_decoder_deps="vdpau"
-vc1_vdpau_decoder_select="vc1_decoder"
+vc1_qsv_decoder_select="qsvdec vc1_parser"
 vc1_v4l2m2m_decoder_deps="v4l2_m2m vc1_v4l2_m2m"
-vp8_cuvid_decoder_deps="cuda cuvid"
+vp8_cuvid_decoder_deps="cuvid"
 vp8_mediacodec_decoder_deps="mediacodec"
-vp8_qsv_decoder_deps="libmfx"
-vp8_qsv_decoder_select="qsvdec vp8_qsv_hwaccel vp8_parser"
+vp8_qsv_decoder_select="qsvdec vp8_parser"
 vp8_rkmpp_decoder_deps="rkmpp"
 vp8_vaapi_encoder_deps="VAEncPictureParameterBufferVP8"
 vp8_vaapi_encoder_select="vaapi_encode"
 vp8_v4l2m2m_decoder_deps="v4l2_m2m vp8_v4l2_m2m"
 vp8_v4l2m2m_encoder_deps="v4l2_m2m vp8_v4l2_m2m"
-vp9_cuvid_decoder_deps="cuda cuvid"
+vp9_cuvid_decoder_deps="cuvid"
 vp9_mediacodec_decoder_deps="mediacodec"
 vp9_rkmpp_decoder_deps="rkmpp"
 vp9_vaapi_encoder_deps="VAEncPictureParameterBufferVP9"
 vp9_vaapi_encoder_select="vaapi_encode"
 vp9_v4l2m2m_decoder_deps="v4l2_m2m vp9_v4l2_m2m"
 wmv3_crystalhd_decoder_select="crystalhd"
-wmv3_vdpau_decoder_select="vc1_vdpau_decoder"
 
 # parsers
+aac_parser_select="adts_header"
+av1_parser_select="cbs_av1"
 h264_parser_select="golomb h264dsp h264parse"
 hevc_parser_select="hevcparse"
 mpegaudio_parser_select="mpegaudioheader"
@@ -2899,7 +3029,18 @@
 vc1_parser_select="vc1dsp"
 
 # bitstream_filters
+aac_adtstoasc_bsf_select="adts_header"
+av1_metadata_bsf_select="cbs_av1"
+eac3_core_bsf_select="ac3_parser"
+filter_units_bsf_select="cbs"
+h264_metadata_bsf_deps="const_nan"
+h264_metadata_bsf_select="cbs_h264"
+h264_redundant_pps_bsf_select="cbs_h264"
+hevc_metadata_bsf_select="cbs_h265"
 mjpeg2jpeg_bsf_select="jpegtables"
+mpeg2_metadata_bsf_select="cbs_mpeg2"
+trace_headers_bsf_select="cbs"
+vp9_metadata_bsf_select="cbs_vp9"
 
 # external libraries
 aac_at_decoder_deps="audiotoolbox"
@@ -2939,7 +3080,15 @@
 chromaprint_muxer_deps="chromaprint"
 h264_videotoolbox_encoder_deps="pthreads"
 h264_videotoolbox_encoder_select="videotoolbox_encoder"
+hevc_videotoolbox_encoder_deps="pthreads"
+hevc_videotoolbox_encoder_select="videotoolbox_encoder"
+libaom_av1_decoder_deps="libaom"
+libaom_av1_encoder_deps="libaom"
+libaom_av1_encoder_select="extract_extradata_bsf"
 libcelt_decoder_deps="libcelt"
+libcodec2_decoder_deps="libcodec2"
+libcodec2_encoder_deps="libcodec2"
+libdavs2_decoder_deps="libdavs2"
 libfdk_aac_decoder_deps="libfdk_aac"
 libfdk_aac_encoder_deps="libfdk_aac"
 libfdk_aac_encoder_select="audio_frame_queue"
@@ -2977,7 +3126,7 @@
 libtwolame_encoder_deps="libtwolame"
 libvo_amrwbenc_encoder_deps="libvo_amrwbenc"
 libvorbis_decoder_deps="libvorbis"
-libvorbis_encoder_deps="libvorbis"
+libvorbis_encoder_deps="libvorbis libvorbisenc"
 libvorbis_encoder_select="audio_frame_queue"
 libvpx_vp8_decoder_deps="libvpx"
 libvpx_vp8_encoder_deps="libvpx"
@@ -2993,11 +3142,13 @@
 libx264rgb_encoder_select="libx264_encoder"
 libx265_encoder_deps="libx265"
 libxavs_encoder_deps="libxavs"
+libxavs2_encoder_deps="libxavs2"
 libxvid_encoder_deps="libxvid"
 libzvbi_teletext_decoder_deps="libzvbi"
-videotoolbox_extralibs="-framework CoreFoundation -framework VideoToolbox -framework CoreMedia -framework CoreVideo"
+vapoursynth_demuxer_deps="vapoursynth"
+videotoolbox_suggest="coreservices"
+videotoolbox_deps="corefoundation coremedia corevideo"
 videotoolbox_encoder_deps="videotoolbox VTCompressionSessionPrepareToEncodeFrames"
-videotoolbox_encoder_suggest="vda_framework"
 
 # demuxers / muxers
 ac3_demuxer_select="ac3_parser"
@@ -3024,6 +3175,7 @@
 flac_demuxer_select="flac_parser"
 hds_muxer_select="flv_muxer"
 hls_muxer_select="mpegts_muxer"
+hls_muxer_suggest="gcrypt openssl"
 image2_alias_pix_demuxer_select="image2_demuxer"
 image2_brender_pix_demuxer_select="image2_demuxer"
 ipod_muxer_select="mov_muxer"
@@ -3060,7 +3212,8 @@
 sap_muxer_select="rtp_muxer rtp_protocol rtpenc_chain"
 sdp_demuxer_select="rtpdec"
 smoothstreaming_muxer_select="ismv_muxer"
-spdif_muxer_select="aac_parser"
+spdif_demuxer_select="adts_header"
+spdif_muxer_select="adts_header"
 spx_muxer_select="ogg_muxer"
 swf_demuxer_suggest="zlib"
 tak_demuxer_select="tak_parser"
@@ -3079,16 +3232,20 @@
 xwma_demuxer_select="riffdec"
 
 # indevs / outdevs
+android_camera_indev_deps="android camera2ndk mediandk pthreads"
+android_camera_indev_extralibs="-landroid -lcamera2ndk -lmediandk"
 alsa_indev_deps="alsa"
 alsa_outdev_deps="alsa"
-avfoundation_indev_deps="avfoundation pthreads"
-avfoundation_indev_extralibs="-framework Foundation -framework CoreVideo -framework CoreMedia"
+avfoundation_indev_deps="avfoundation corevideo coremedia pthreads"
+avfoundation_indev_suggest="coregraphics applicationservices"
+avfoundation_indev_extralibs="-framework Foundation"
 bktr_indev_deps_any="dev_bktr_ioctl_bt848_h machine_ioctl_bt848_h dev_video_bktr_ioctl_bt848_h dev_ic_bt8xx_h"
 caca_outdev_deps="libcaca"
 decklink_deps_any="libdl LoadLibrary"
 decklink_indev_deps="decklink threads"
 decklink_indev_extralibs="-lstdc++"
 decklink_outdev_deps="decklink threads"
+decklink_outdev_suggest="libklvanc"
 decklink_outdev_extralibs="-lstdc++"
 libndi_newtek_indev_deps="libndi_newtek"
 libndi_newtek_indev_extralibs="-lndi"
@@ -3102,82 +3259,92 @@
 gdigrab_indev_extralibs="-lgdi32"
 gdigrab_indev_select="bmp_decoder"
 iec61883_indev_deps="libiec61883"
-jack_indev_deps="jack"
+jack_indev_deps="libjack"
 jack_indev_deps_any="sem_timedwait dispatch_dispatch_h"
 kmsgrab_indev_deps="libdrm"
 lavfi_indev_deps="avfilter"
 libcdio_indev_deps="libcdio"
 libdc1394_indev_deps="libdc1394"
-libv4l2_indev_deps="libv4l2"
 openal_indev_deps="openal"
 opengl_outdev_deps="opengl"
-oss_indev_deps_any="soundcard_h sys_soundcard_h"
-oss_outdev_deps_any="soundcard_h sys_soundcard_h"
+oss_indev_deps_any="sys_soundcard_h"
+oss_outdev_deps_any="sys_soundcard_h"
 pulse_indev_deps="libpulse"
 pulse_outdev_deps="libpulse"
 sdl2_outdev_deps="sdl2"
 sndio_indev_deps="sndio"
 sndio_outdev_deps="sndio"
 v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h"
+v4l2_indev_suggest="libv4l2"
 v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h"
+v4l2_outdev_suggest="libv4l2"
 vfwcap_indev_deps="vfw32 vfwcap_defines"
 xcbgrab_indev_deps="libxcb"
-xv_outdev_deps="X11_extensions_Xvlib_h XvGetPortAttribute"
-xv_outdev_extralibs="-lXv -lX11 -lXext"
+xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes"
+xv_outdev_deps="xlib"
 
 # protocols
 async_protocol_deps="threads"
 bluray_protocol_deps="libbluray"
 ffrtmpcrypt_protocol_conflict="librtmp_protocol"
-ffrtmpcrypt_protocol_deps_any="gcrypt gmp openssl"
+ffrtmpcrypt_protocol_deps_any="gcrypt gmp openssl mbedtls"
 ffrtmpcrypt_protocol_select="tcp_protocol"
 ffrtmphttp_protocol_conflict="librtmp_protocol"
 ffrtmphttp_protocol_select="http_protocol"
 ftp_protocol_select="tcp_protocol"
 gopher_protocol_select="network"
 http_protocol_select="tcp_protocol"
+http_protocol_suggest="zlib"
 httpproxy_protocol_select="tcp_protocol"
+httpproxy_protocol_suggest="zlib"
 https_protocol_select="tls_protocol"
+https_protocol_suggest="zlib"
 icecast_protocol_select="http_protocol"
+mmsh_protocol_select="http_protocol"
+mmst_protocol_select="network"
+rtmp_protocol_conflict="librtmp_protocol"
+rtmp_protocol_select="tcp_protocol"
+rtmp_protocol_suggest="zlib"
+rtmpe_protocol_select="ffrtmpcrypt_protocol"
+rtmpe_protocol_suggest="zlib"
+rtmps_protocol_conflict="librtmp_protocol"
+rtmps_protocol_select="tls_protocol"
+rtmps_protocol_suggest="zlib"
+rtmpt_protocol_select="ffrtmphttp_protocol"
+rtmpt_protocol_suggest="zlib"
+rtmpte_protocol_select="ffrtmpcrypt_protocol ffrtmphttp_protocol"
+rtmpte_protocol_suggest="zlib"
+rtmpts_protocol_select="ffrtmphttp_protocol https_protocol"
+rtmpts_protocol_suggest="zlib"
+rtp_protocol_select="udp_protocol"
+schannel_conflict="openssl gnutls libtls mbedtls"
+sctp_protocol_deps="struct_sctp_event_subscribe struct_msghdr_msg_flags"
+sctp_protocol_select="network"
+securetransport_conflict="openssl gnutls libtls mbedtls"
+srtp_protocol_select="rtp_protocol srtp"
+tcp_protocol_select="network"
+tls_protocol_deps_any="gnutls openssl schannel securetransport libtls mbedtls"
+tls_protocol_select="tcp_protocol"
+udp_protocol_select="network"
+udplite_protocol_select="network"
+unix_protocol_deps="sys_un_h"
+unix_protocol_select="network"
+
+# external library protocols
 librtmp_protocol_deps="librtmp"
 librtmpe_protocol_deps="librtmp"
 librtmps_protocol_deps="librtmp"
 librtmpt_protocol_deps="librtmp"
 librtmpte_protocol_deps="librtmp"
 libsmbclient_protocol_deps="libsmbclient gplv3"
+libsrt_protocol_deps="libsrt"
+libsrt_protocol_select="network"
 libssh_protocol_deps="libssh"
-mmsh_protocol_select="http_protocol"
-mmst_protocol_select="network"
-rtmp_protocol_conflict="librtmp_protocol"
-rtmp_protocol_select="tcp_protocol"
-rtmpe_protocol_select="ffrtmpcrypt_protocol"
-rtmps_protocol_conflict="librtmp_protocol"
-rtmps_protocol_select="tls_protocol"
-rtmpt_protocol_select="ffrtmphttp_protocol"
-rtmpte_protocol_select="ffrtmpcrypt_protocol ffrtmphttp_protocol"
-rtmpts_protocol_select="ffrtmphttp_protocol https_protocol"
-rtp_protocol_select="udp_protocol"
-sctp_protocol_deps="struct_sctp_event_subscribe struct_msghdr_msg_flags"
-sctp_protocol_select="network"
-srtp_protocol_select="rtp_protocol srtp"
-tcp_protocol_select="network"
-tls_gnutls_protocol_conflict="tls_schannel_protocol tls_securetransport_protocol"
-tls_gnutls_protocol_deps="gnutls"
-tls_gnutls_protocol_select="tcp_protocol"
-tls_openssl_protocol_conflict="tls_schannel_protocol tls_securetransport_protocol tls_gnutls_protocol"
-tls_openssl_protocol_deps="openssl"
-tls_openssl_protocol_select="tcp_protocol"
-tls_schannel_protocol_deps="schannel"
-tls_schannel_protocol_select="tcp_protocol"
-tls_securetransport_protocol_deps="securetransport"
-tls_securetransport_protocol_select="tcp_protocol"
-tls_protocol_deps_any="tls_schannel_protocol tls_securetransport_protocol tls_gnutls_protocol tls_openssl_protocol"
-udp_protocol_select="network"
-udplite_protocol_select="network"
-unix_protocol_deps="sys_un_h"
-unix_protocol_select="network"
+libtls_conflict="openssl gnutls mbedtls"
 
 # filters
+afftdn_filter_deps="avcodec"
+afftdn_filter_select="fft"
 afftfilt_filter_deps="avcodec"
 afftfilt_filter_select="fft"
 afir_filter_deps="avcodec"
@@ -3187,26 +3354,41 @@
 ass_filter_deps="libass"
 atempo_filter_deps="avcodec"
 atempo_filter_select="rdft"
+avgblur_opencl_filter_deps="opencl"
 azmq_filter_deps="libzmq"
 blackframe_filter_deps="gpl"
+bm3d_filter_deps="avcodec"
+bm3d_filter_select="dct"
 boxblur_filter_deps="gpl"
+boxblur_opencl_filter_deps="opencl gpl"
 bs2b_filter_deps="libbs2b"
 colormatrix_filter_deps="gpl"
+convolution_opencl_filter_deps="opencl"
+convolve_filter_deps="avcodec"
+convolve_filter_select="fft"
 coreimage_filter_deps="coreimage appkit"
 coreimage_filter_extralibs="-framework OpenGL"
 coreimagesrc_filter_deps="coreimage appkit"
 coreimagesrc_filter_extralibs="-framework OpenGL"
 cover_rect_filter_deps="avcodec avformat gpl"
 cropdetect_filter_deps="gpl"
+deconvolve_filter_deps="avcodec"
+deconvolve_filter_select="fft"
 deinterlace_qsv_filter_deps="libmfx"
 deinterlace_vaapi_filter_deps="vaapi"
 delogo_filter_deps="gpl"
+denoise_vaapi_filter_deps="vaapi"
 deshake_filter_select="pixelutils"
+dilation_opencl_filter_deps="opencl"
 drawtext_filter_deps="libfreetype"
+drawtext_filter_suggest="libfontconfig libfribidi"
 elbg_filter_deps="avcodec"
 eq_filter_deps="gpl"
+erosion_opencl_filter_deps="opencl"
 fftfilt_filter_deps="avcodec"
 fftfilt_filter_select="rdft"
+fftdnoiz_filter_deps="avcodec"
+fftdnoiz_filter_select="fft"
 find_rect_filter_deps="avcodec avformat gpl"
 firequalizer_filter_deps="avcodec"
 firequalizer_filter_select="rdft"
@@ -3221,6 +3403,8 @@
 interlace_filter_deps="gpl"
 kerndeint_filter_deps="gpl"
 ladspa_filter_deps="ladspa libdl"
+lensfun_filter_deps="liblensfun version3"
+lv2_filter_deps="lv2"
 mcdeint_filter_deps="avcodec gpl"
 movie_filter_deps="avcodec avformat"
 mpdecimate_filter_deps="gpl"
@@ -3230,23 +3414,33 @@
 nnedi_filter_deps="gpl"
 ocr_filter_deps="libtesseract"
 ocv_filter_deps="libopencv"
+openclsrc_filter_deps="opencl"
+overlay_opencl_filter_deps="opencl"
+overlay_qsv_filter_deps="libmfx"
+overlay_qsv_filter_select="qsvvpp"
 owdenoise_filter_deps="gpl"
 pan_filter_deps="swresample"
 perspective_filter_deps="gpl"
 phase_filter_deps="gpl"
 pp7_filter_deps="gpl"
 pp_filter_deps="gpl postproc"
+prewitt_opencl_filter_deps="opencl"
+procamp_vaapi_filter_deps="vaapi"
+program_opencl_filter_deps="opencl"
 pullup_filter_deps="gpl"
 removelogo_filter_deps="avcodec avformat swscale"
 repeatfields_filter_deps="gpl"
 resample_filter_deps="avresample"
+roberts_opencl_filter_deps="opencl"
 rubberband_filter_deps="librubberband"
 sab_filter_deps="gpl swscale"
 scale2ref_filter_deps="swscale"
 scale_filter_deps="swscale"
 scale_qsv_filter_deps="libmfx"
 select_filter_select="pixelutils"
+sharpness_vaapi_filter_deps="vaapi"
 showcqt_filter_deps="avcodec avformat swscale"
+showcqt_filter_suggest="libfontconfig libfreetype"
 showcqt_filter_select="fft"
 showfreqs_filter_deps="avcodec"
 showfreqs_filter_select="fft"
@@ -3256,12 +3450,15 @@
 showspectrumpic_filter_select="fft"
 signature_filter_deps="gpl avcodec avformat"
 smartblur_filter_deps="gpl swscale"
+sobel_opencl_filter_deps="opencl"
 sofalizer_filter_deps="libmysofa avcodec"
 sofalizer_filter_select="fft"
 spectrumsynth_filter_deps="avcodec"
 spectrumsynth_filter_select="fft"
 spp_filter_deps="gpl avcodec"
 spp_filter_select="fft idctdsp fdctdsp me_cmp pixblockdsp"
+sr_filter_deps="avformat swscale"
+sr_filter_select="dnn"
 stereo3d_filter_deps="gpl"
 subtitles_filter_deps="avformat avcodec libass"
 super2xsai_filter_deps="gpl"
@@ -3270,15 +3467,19 @@
 tinterlace_merge_test_deps="tinterlace_filter"
 tinterlace_pad_test_deps="tinterlace_filter"
 tonemap_filter_deps="const_nan"
+tonemap_opencl_filter_deps="opencl const_nan"
+unsharp_opencl_filter_deps="opencl"
 uspp_filter_deps="gpl avcodec"
 vaguedenoiser_filter_deps="gpl"
 vidstabdetect_filter_deps="libvidstab"
 vidstabtransform_filter_deps="libvidstab"
-libvmaf_filter_deps="libvmaf"
+libvmaf_filter_deps="libvmaf pthreads"
 zmq_filter_deps="libzmq"
 zoompan_filter_deps="swscale"
 zscale_filter_deps="libzimg const_nan"
-scale_vaapi_filter_deps="vaapi VAProcPipelineParameterBuffer"
+scale_vaapi_filter_deps="vaapi"
+vpp_qsv_filter_deps="libmfx"
+vpp_qsv_filter_select="qsvvpp"
 
 # examples
 avio_dir_cmd_deps="avformat avutil"
@@ -3302,30 +3503,48 @@
 scaling_video_example_deps="avutil swscale"
 transcode_aac_example_deps="avcodec avformat swresample"
 transcoding_example_deps="avfilter avcodec avformat avutil"
+vaapi_encode_example_deps="avcodec avutil h264_vaapi_encoder"
+vaapi_transcode_example_deps="avcodec avformat avutil h264_vaapi_encoder"
 
-# libraries, in linking order
+# EXTRALIBS_LIST
+cpu_init_extralibs="pthreads_extralibs"
+cws2fws_extralibs="zlib_extralibs"
+
+# libraries, in any order
 avcodec_deps="avutil"
+avcodec_suggest="libm"
 avcodec_select="null_bsf"
 avdevice_deps="avformat avcodec avutil"
+avdevice_suggest="libm"
 avfilter_deps="avutil"
+avfilter_suggest="libm"
 avformat_deps="avcodec avutil"
-avformat_suggest="network"
+avformat_suggest="libm network zlib"
 avresample_deps="avutil"
+avresample_suggest="libm"
+avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt"
 postproc_deps="avutil gpl"
+postproc_suggest="libm"
 swresample_deps="avutil"
+swresample_suggest="libm libsoxr"
 swscale_deps="avutil"
+swscale_suggest="libm"
+
+avcodec_extralibs="pthreads_extralibs iconv_extralibs"
+avfilter_extralibs="pthreads_extralibs"
+avutil_extralibs="d3d11va_extralibs nanosleep_extralibs pthreads_extralibs vaapi_drm_extralibs vaapi_x11_extralibs vdpau_x11_extralibs"
 
 # programs
-ffmpeg_deps="avcodec avfilter avformat swresample"
+ffmpeg_deps="avcodec avfilter avformat"
 ffmpeg_select="aformat_filter anull_filter atrim_filter format_filter
                null_filter
                trim_filter"
+ffmpeg_suggest="ole32 psapi shell32"
 ffplay_deps="avcodec avformat swscale swresample sdl2"
-ffplay_extralibs='$sdl2_extralibs'
 ffplay_select="rdft crop_filter transpose_filter hflip_filter vflip_filter rotate_filter"
+ffplay_suggest="shell32"
 ffprobe_deps="avcodec avformat"
-ffserver_deps="avformat fork sarestart"
-ffserver_select="ffm_muxer rtp_protocol rtsp_demuxer"
+ffprobe_suggest="shell32"
 
 # documentation
 podpages_deps="perl"
@@ -3361,6 +3580,7 @@
 ranlib_default="ranlib"
 strip_default="strip"
 version_script='--version-script'
+objformat="elf32"
 x86asmexe_default="nasm"
 windres_default="windres"
 nvcc_default="nvcc"
@@ -3402,6 +3622,12 @@
 sws_max_filter_size_default=256
 set_default sws_max_filter_size
 
+# internal components are enabled by default
+enable $EXTRALIBS_LIST
+
+# Avoid external, non-system, libraries getting enabled by dependency resolution
+disable $EXTERNAL_LIBRARY_LIST $HWACCEL_LIBRARY_LIST
+
 # build settings
 SHFLAGS='-shared -Wl,-soname,$$(@F)'
 LIBPREF="lib"
@@ -3475,47 +3701,66 @@
     FFMPEG_CONFIGURATION="${FFMPEG_CONFIGURATION# } ${l}${r}"
 done
 
-find_things(){
-    thing=$1
-    pattern=$2
-    file=$source_path/$3
-    sed -n "s/^[^#]*$pattern.*([^,]*, *\([^,]*\)\(,.*\)*).*/\1_$thing/p" "$file"
-}
-
-ENCODER_LIST=$(find_things  encoder  ENC      libavcodec/allcodecs.c)
-DECODER_LIST=$(find_things  decoder  DEC      libavcodec/allcodecs.c)
-HWACCEL_LIST=$(find_things  hwaccel  HWACCEL  libavcodec/allcodecs.c)
-PARSER_LIST=$(find_things   parser   PARSER   libavcodec/allcodecs.c)
-MUXER_LIST=$(find_things    muxer    _MUX     libavformat/allformats.c)
-DEMUXER_LIST=$(find_things  demuxer  DEMUX    libavformat/allformats.c)
-OUTDEV_LIST=$(find_things   outdev   OUTDEV   libavdevice/alldevices.c)
-INDEV_LIST=$(find_things    indev    _IN      libavdevice/alldevices.c)
-FILTER_LIST=$(find_things   filter   FILTER   libavfilter/allfilters.c)
-
 find_things_extern(){
     thing=$1
     pattern=$2
     file=$source_path/$3
-    sed -n "s/^[^#]*extern.*$pattern *ff_\([^ ]*\)_$thing;/\1_$thing/p" "$file"
+    out=${4:-$thing}
+    sed -n "s/^[^#]*extern.*$pattern *ff_\([^ ]*\)_$thing;/\1_$out/p" "$file"
 }
 
+find_filters_extern(){
+    file=$source_path/$1
+    #sed -n "s/^extern AVFilter ff_\([avfsinkrc]\{2,5\}\)_\(\w\+\);/\2_filter/p" $file
+    sed -E -n "s/^extern AVFilter ff_([avfsinkrc]{2,5})_([a-zA-Z0-9_]+);/\2_filter/p" $file
+}
+
+FILTER_LIST=$(find_filters_extern libavfilter/allfilters.c)
+OUTDEV_LIST=$(find_things_extern muxer AVOutputFormat libavdevice/alldevices.c outdev)
+INDEV_LIST=$(find_things_extern demuxer AVInputFormat libavdevice/alldevices.c indev)
+MUXER_LIST=$(find_things_extern muxer AVOutputFormat libavformat/allformats.c)
+DEMUXER_LIST=$(find_things_extern demuxer AVInputFormat libavformat/allformats.c)
+ENCODER_LIST=$(find_things_extern encoder AVCodec libavcodec/allcodecs.c)
+DECODER_LIST=$(find_things_extern decoder AVCodec libavcodec/allcodecs.c)
+CODEC_LIST="
+    $ENCODER_LIST
+    $DECODER_LIST
+"
+PARSER_LIST=$(find_things_extern parser AVCodecParser libavcodec/parsers.c)
 BSF_LIST=$(find_things_extern bsf AVBitStreamFilter libavcodec/bitstream_filters.c)
+HWACCEL_LIST=$(find_things_extern hwaccel AVHWAccel libavcodec/hwaccels.h)
 PROTOCOL_LIST=$(find_things_extern protocol URLProtocol libavformat/protocols.c)
 
-ALL_COMPONENTS="
+AVCODEC_COMPONENTS_LIST="
     $BSF_LIST
     $DECODER_LIST
-    $DEMUXER_LIST
     $ENCODER_LIST
-    $FILTER_LIST
     $HWACCEL_LIST
-    $INDEV_LIST
-    $MUXER_LIST
-    $OUTDEV_LIST
     $PARSER_LIST
+"
+
+AVDEVICE_COMPONENTS_LIST="
+    $INDEV_LIST
+    $OUTDEV_LIST
+"
+
+AVFILTER_COMPONENTS_LIST="
+    $FILTER_LIST
+"
+
+AVFORMAT_COMPONENTS_LIST="
+    $DEMUXER_LIST
+    $MUXER_LIST
     $PROTOCOL_LIST
 "
 
+ALL_COMPONENTS="
+    $AVCODEC_COMPONENTS_LIST
+    $AVDEVICE_COMPONENTS_LIST
+    $AVFILTER_COMPONENTS_LIST
+    $AVFORMAT_COMPONENTS_LIST
+"
+
 for n in $COMPONENT_LIST; do
     v=$(toupper ${n%s})_LIST
     eval enable \$$v
@@ -3572,8 +3817,13 @@
         --extra-ldexeflags=*)
             add_ldexeflags $optval
         ;;
+        --extra-ldsoflags=*)
+            add_ldsoflags $optval
+        ;;
         --extra-ldlibflags=*)
-            add_ldlibflags $optval
+            warn "The --extra-ldlibflags option is only provided for compatibility and will be\n"\
+                 "removed in the future. Use --extra-ldsoflags instead."
+            add_ldsoflags $optval
         ;;
         --extra-libs=*)
             add_extralibs $optval
@@ -3689,6 +3939,19 @@
 
 disabled logging && logfile=/dev/null
 
+# command line configuration sanity checks
+
+# we need to build at least one lib type
+if ! enabled_any static shared; then
+    cat <<EOF
+At least one library type must be built.
+Specify --enable-static to build the static libraries or --enable-shared to
+build the shared libraries as well. To only build the shared libraries specify
+--disable-static in addition to --enable-shared.
+EOF
+    exit 1
+fi
+
 die_license_disabled() {
     enabled $1 || { enabled $v && die "$v is $1 and --enable-$1 is not specified."; }
 }
@@ -3705,6 +3968,27 @@
 
 enabled version3 && { enabled gpl && enable gplv3 || enable lgplv3; }
 
+if enabled nonfree; then
+    license="nonfree and unredistributable"
+elif enabled gplv3; then
+    license="GPL version 3 or later"
+elif enabled lgplv3; then
+    license="LGPL version 3 or later"
+elif enabled gpl; then
+    license="GPL version 2 or later"
+else
+    license="LGPL version 2.1 or later"
+fi
+
+enabled_all gnutls openssl &&
+    die "GnuTLS and OpenSSL must not be enabled at the same time."
+
+enabled_all gnutls mbedtls &&
+    die "GnuTLS and mbedTLS must not be enabled at the same time."
+
+enabled_all openssl mbedtls &&
+    die "OpenSSL and mbedTLS must not be enabled at the same time."
+
 # Disable all the library-specific components if the library itself
 # is disabled, see AVCODEC_LIST and following _LIST variables.
 
@@ -3722,7 +4006,7 @@
 
 test -n "$valgrind" && toolchain="valgrind-memcheck"
 
-enabled ossfuzz && {
+enabled ossfuzz && ! echo $CFLAGS | grep -q -- "-fsanitize="  && ! echo $CFLAGS | grep -q -- "-fcoverage-mapping" &&{
     add_cflags  -fsanitize=address,undefined -fsanitize-coverage=trace-pc-guard,trace-cmp -fno-omit-frame-pointer
     add_ldflags -fsanitize=address,undefined -fsanitize-coverage=trace-pc-guard,trace-cmp
 }
@@ -3740,8 +4024,8 @@
     ;;
     *-tsan)
         cc_default="${toolchain%-tsan}"
-        add_cflags  -fsanitize=thread -fPIE
-        add_ldflags -fsanitize=thread -pie
+        add_cflags  -fsanitize=thread
+        add_ldflags -fsanitize=thread
         case "$toolchain" in
             gcc-tsan)
                 add_cflags  -fPIC
@@ -3777,13 +4061,15 @@
             cc_default="cl"
             cxx_default="cl"
         else
-            cc_default="c99wrap cl"
-            cxx_default="c99wrap cl"
+            die "Unsupported MSVC version (2013 or newer required)"
         fi
         ld_default="$source_path/compat/windows/mslink"
         nm_default="dumpbin -symbols"
         ar_default="lib"
         case "$arch" in
+        aarch64|arm64)
+            as_default="armasm64"
+            ;;
         arm*)
             as_default="armasm"
             ;;
@@ -3823,11 +4109,10 @@
     ;;
 esac
 
-test -n "$cross_prefix" && enable cross_compile
-
-if enabled cross_compile; then
+if test -n "$cross_prefix"; then
     test -n "$arch" && test -n "$target_os" ||
         die "Must specify target arch (--arch) and OS (--target-os) when cross-compiling"
+    enable cross_compile
 fi
 
 ar_default="${cross_prefix}${ar_default}"
@@ -3884,7 +4169,7 @@
         echo "$tmpname"
         mkdir "$tmpname"
     }
-elif ! check_cmd mktemp -u XXXXXX; then
+elif ! test_cmd mktemp -u XXXXXX; then
     # simple replacement for missing mktemp
     # NOT SAFE FOR GENERAL USE
     mktemp(){
@@ -3946,24 +4231,6 @@
    done
 }
 
-ccc_flags(){
-    for flag; do
-        case $flag in
-            -std=c99)           echo -c99                       ;;
-            -mcpu=*)            echo -arch ${flag#*=}           ;;
-            -mieee)             echo -ieee                      ;;
-            -O*|-fast)          echo $flag                      ;;
-            -fno-math-errno)    echo -assume nomath_errno       ;;
-            -g)                 echo -g3                        ;;
-            -Wall)              echo -msg_enable level2         ;;
-            -Wno-pointer-sign)  echo -msg_disable ptrmismatch1  ;;
-            -Wl,*)              echo $flag                      ;;
-            -f*|-W*)                                            ;;
-            *)                  echo $flag                      ;;
-        esac
-   done
-}
-
 cparser_flags(){
     for flag; do
         case $flag in
@@ -4042,17 +4309,6 @@
     done
 }
 
-pgi_flags(){
-    for flag; do
-        case $flag in
-            -flto)                echo -Mipa=fast,libopt,libinline,vestigial ;;
-            -fomit-frame-pointer) echo -Mnoframe ;;
-            -g)                   echo -gopt ;;
-            *)                    echo $flag ;;
-        esac
-    done
-}
-
 suncc_flags(){
     for flag; do
         case $flag in
@@ -4097,35 +4353,6 @@
     done
 }
 
-tms470_flags(){
-    for flag; do
-        case $flag in
-            -march=*|-mcpu=*)
-                case "${flag#*=}" in
-                    armv7-a|cortex-a*)      echo -mv=7a8 ;;
-                    armv7-r|cortex-r*)      echo -mv=7r4 ;;
-                    armv7-m|cortex-m*)      echo -mv=7m3 ;;
-                    armv6*|arm11*)          echo -mv=6   ;;
-                    armv5*e|arm[79]*e*|arm9[24]6*|arm96*|arm102[26])
-                                            echo -mv=5e  ;;
-                    armv4*|arm7*|arm9[24]*) echo -mv=4   ;;
-                esac
-                ;;
-            -mfpu=neon)     echo --float_support=vfpv3 --neon ;;
-            -mfpu=vfp)      echo --float_support=vfpv2        ;;
-            -mfpu=vfpv3)    echo --float_support=vfpv3        ;;
-            -mfpu=vfpv3-d16) echo --float_support=vfpv3d16    ;;
-            -msoft-float)   echo --float_support=vfplib       ;;
-            -O[0-3]|-mf=*)  echo $flag                        ;;
-            -g)             echo -g -mn                       ;;
-            -pds=*)         echo $flag                        ;;
-            -D*|-I*)        echo $flag                        ;;
-            --gcc|--abi=*)  echo $flag                        ;;
-            -me)            echo $flag                        ;;
-        esac
-    done
-}
-
 probe_cc(){
     pfx=$1
     _cc=$2
@@ -4178,13 +4405,6 @@
         _ident=$($_cc -qversion 2>/dev/null | head -n1)
         _cflags_speed='-O5'
         _cflags_size='-O5 -qcompact'
-    elif $_cc -V 2>/dev/null | grep -q Compaq; then
-        _type=ccc
-        _ident=$($_cc -V | head -n1 | cut -d' ' -f1-3)
-        _DEPFLAGS='-M'
-        _cflags_speed='-fast'
-        _cflags_size='-O1'
-        _flags_filter=ccc_flags
     elif $_cc --vsn 2>/dev/null | grep -Eq "ARM (C/C\+\+ )?Compiler"; then
         test -d "$sysroot" || die "No valid sysroot specified."
         _type=armcc
@@ -4201,17 +4421,7 @@
         _depflags='-MMD'
         _cflags_speed='-O3'
         _cflags_size='-Os'
-    elif $_cc -version 2>/dev/null | grep -Eq 'TMS470|TI ARM'; then
-        _type=tms470
-        _ident=$($_cc -version | head -n1 | tr -s ' ')
-        _flags='--gcc --abi=eabi -me'
-        _cc_e='-ppl -fe=$@'
-        _cc_o='-fe=$@'
-        _depflags='-ppa -ppd=$(@:.o=.d)'
-        _cflags_speed='-O3 -mf=5'
-        _cflags_size='-O3 -mf=2'
-        _flags_filter=tms470_flags
-    elif $_cc -v 2>&1 | grep -q clang; then
+    elif $_cc -v 2>&1 | grep -q clang && ! $_cc -? > /dev/null 2>&1; then
         _type=clang
         _ident=$($_cc --version 2>/dev/null | head -n1)
         _depflags='-MMD -MF $(@:.o=.d) -MT $@'
@@ -4240,14 +4450,6 @@
         _cflags_speed='-O2'
         _cflags_size='-Os'
         _flags_filter='filter_out -Wdisabled-optimization|-Wtype-limits|-fno-signed-zeros'
-    elif $_cc -V 2>&1 | grep -q Portland; then
-        _type=pgi
-        _ident="PGI $($_cc -V 2>&1 | awk '/^pgcc/ { print $2; exit }')"
-        opt_common='-alias=ansi -Mdse -Mlre -Mpre'
-        _cflags_speed="-O3 -Mautoinline -Munroll=c:4 $opt_common"
-        _cflags_size="-O2 -Munroll=c:1 $opt_common"
-        _cflags_noopt="-O"
-        _flags_filter=pgi_flags
     elif $_cc 2>&1 | grep -q 'Microsoft.*ARM.*Assembler'; then
         _type=armasm
         _ident=$($_cc | head -n1)
@@ -4290,9 +4492,9 @@
         _flags_filter=msvc_flags
         _ld_lib='lib%.a'
         _ld_path='-libpath:'
-    elif $_cc -nologo- 2>&1 | grep -q Microsoft; then
+    elif $_cc -nologo- 2>&1 | grep -q Microsoft || { $_cc -v 2>&1 | grep -q clang && $_cc -? > /dev/null 2>&1; }; then
         _type=msvc
-        _ident=$($_cc 2>&1 | head -n1)
+        _ident=$($_cc 2>&1 | head -n1 | tr -d '\r')
         _DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< 2>&1 | awk '\''/including/ { sub(/^.*file: */, ""); gsub(/\\/, "/"); if (!match($$0, / /)) print "$@:", $$0 }'\'' > $(@:.o=.d)'
         _DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -showIncludes -Zs'
         _cflags_speed="-O2"
@@ -4396,12 +4598,6 @@
 if $ar 2>&1 | grep -q Microsoft; then
     arflags="-nologo"
     ar_o='-out:$@'
-elif $ar 2>&1 | grep -q 'Texas Instruments'; then
-    arflags="rq"
-    ar_o='$@'
-elif $ar 2>&1 | grep -q 'Usage: ar.*-X.*any'; then
-    arflags='-Xany -r -c'
-    ar_o='$@'
 elif $ar 2>&1 | grep -q "\[D\] "; then
     arflags="rcD"
     ar_o='$@'
@@ -4420,13 +4616,6 @@
         gcc|llvm_gcc|clang)
             add_cppflags --sysroot="$sysroot"
             add_ldflags --sysroot="$sysroot"
-# On Darwin --sysroot may be ignored, -isysroot always affects headers and linking
-            add_cppflags -isysroot "$sysroot"
-            add_ldflags -isysroot "$sysroot"
-        ;;
-        tms470)
-            add_cppflags -I"$sysinclude"
-            add_ldflags  --sysroot="$sysroot"
         ;;
     esac
 fi
@@ -4530,7 +4719,7 @@
 elif enabled arm; then
 
     check_arm_arch() {
-        check_cpp_condition stddef.h \
+        test_cpp_condition stddef.h \
             "defined __ARM_ARCH_${1}__ || defined __TARGET_ARCH_${2:-$1}" \
             $cpuflags
     }
@@ -4547,6 +4736,7 @@
         elif check_arm_arch 6J;       then echo armv6j
         elif check_arm_arch 6K;       then echo armv6k
         elif check_arm_arch 6Z;       then echo armv6z
+        elif check_arm_arch 6KZ;      then echo armv6zk
         elif check_arm_arch 6ZK;      then echo armv6zk
         elif check_arm_arch 6T2;      then echo armv6t2
         elif check_arm_arch 7;        then echo armv7
@@ -4638,7 +4828,7 @@
             loongson*)
                 enable loongson2
                 enable loongson3
-                enable local_aligned_8 local_aligned_16 local_aligned_32
+                enable local_aligned
                 enable simd_align_16
                 enable fast_64bit
                 enable fast_clz
@@ -4648,15 +4838,24 @@
                 disable mipsfpu
                 disable mipsdsp
                 disable mipsdspr2
+                # When gcc version less than 5.3.0, add -fno-expensive-optimizations flag.
+                if [ $cc == gcc ]; then
+                    gcc_version=$(gcc -dumpversion)
+                    if [ "$(echo "$gcc_version 5.3.0" | tr " " "\n" | sort -rV | head -n 1)" == "$gcc_version" ]; then
+                        expensive_optimization_flag=""
+                    else
+                        expensive_optimization_flag="-fno-expensive-optimizations"
+                    fi
+                fi
                 case $cpu in
                     loongson3*)
-                        cpuflags="-march=loongson3a -mhard-float -fno-expensive-optimizations"
+                        cpuflags="-march=loongson3a -mhard-float $expensive_optimization_flag"
                     ;;
                     loongson2e)
-                        cpuflags="-march=loongson2e -mhard-float -fno-expensive-optimizations"
+                        cpuflags="-march=loongson2e -mhard-float $expensive_optimization_flag"
                     ;;
                     loongson2f)
-                        cpuflags="-march=loongson2f -mhard-float -fno-expensive-optimizations"
+                        cpuflags="-march=loongson2f -mhard-float $expensive_optimization_flag"
                     ;;
                 esac
             ;;
@@ -4819,7 +5018,7 @@
 fi
 
 # compiler sanity check
-check_exec <<EOF
+test_exec <<EOF
 int main(void){ return 0; }
 EOF
 if test "$?" != 0; then
@@ -4852,66 +5051,62 @@
 check_64bit(){
     arch32=$1
     arch64=$2
-    expr=$3
-    check_code cc "" "int test[2*($expr) - 1]" &&
+    expr=${3:-'sizeof(void *) > 4'}
+    test_code cc "" "int test[2*($expr) - 1]" &&
         subarch=$arch64 || subarch=$arch32
+    enable $subarch
 }
 
 case "$arch" in
     aarch64|alpha|ia64)
-        spic=$shared
+        enabled shared && enable_weak pic
     ;;
     mips)
         check_64bit mips mips64 '_MIPS_SIM > 1'
-        spic=$shared
+        enabled shared && enable_weak pic
     ;;
     parisc)
-        check_64bit parisc parisc64 'sizeof(void *) > 4'
-        spic=$shared
+        check_64bit parisc parisc64
+        enabled shared && enable_weak pic
     ;;
     ppc)
-        check_64bit ppc ppc64 'sizeof(void *) > 4'
-        spic=$shared
+        check_64bit ppc ppc64
+        enabled shared && enable_weak pic
     ;;
     s390)
-        check_64bit s390 s390x 'sizeof(void *) > 4'
-        spic=$shared
+        check_64bit s390 s390x
+        enabled shared && enable_weak pic
     ;;
     sparc)
-        check_64bit sparc sparc64 'sizeof(void *) > 4'
-        spic=$shared
+        check_64bit sparc sparc64
+        enabled shared && enable_weak pic
     ;;
     x86)
-        check_64bit x86_32 x86_64 'sizeof(void *) > 4'
-        # Treat x32 as x64 for now. Note it also needs spic=$shared
-        test "$subarch" = "x86_32" && check_cpp_condition stddef.h 'defined(__x86_64__)' &&
-            subarch=x86_64
-        if test "$subarch" = "x86_64"; then
-            spic=$shared
+        check_64bit x86_32 x86_64
+        # Treat x32 as x64 for now. Note it also needs pic if shared
+        test "$subarch" = "x86_32" && test_cpp_condition stddef.h 'defined(__x86_64__)' &&
+            subarch=x86_64 && enable x86_64 && disable x86_32
+        if enabled x86_64; then
+            enabled shared && enable_weak pic
+            objformat=elf64
         fi
     ;;
-    ppc)
-        check_cc <<EOF && subarch="ppc64"
-        int test[(int)sizeof(char*) - 7];
-EOF
-    ;;
 esac
 
-enable $subarch
-enabled spic && enable_weak pic
-
-enabled x86_64 && objformat=elf64 || objformat="elf32"
-
 # OS specific
 case $target_os in
     aix)
         SHFLAGS=-shared
         add_cppflags '-I\$(SRC_PATH)/compat/aix'
         enabled shared && add_ldflags -Wl,-brtl
+        arflags='-Xany -r -c'
+        striptype=""
         ;;
     android)
         disable symver
         enable section_data_rel_ro
+        add_cflags -fPIE
+        add_ldexeflags -fPIE -pie
         SLIB_INSTALL_NAME='$(SLIBNAME)'
         SLIB_INSTALL_LINKS=
         SHFLAGS='-shared -Wl,-soname,$(SLIBNAME)'
@@ -4923,7 +5118,7 @@
         ;;
     sunos)
         SHFLAGS='-shared -Wl,-h,$$(@F)'
-        enabled x86 && SHFLAGS="-mimpure-text $SHFLAGS"
+        enabled x86 && append SHFLAGS -mimpure-text
         network_extralibs="-lsocket -lnsl"
         add_cppflags -D__EXTENSIONS__
         # When using suncc to build, the Solaris linker will mark
@@ -4946,6 +5141,7 @@
         ;;
     openbsd|bitrig)
         disable symver
+        striptype=""
         SHFLAGS='-shared'
         SLIB_INSTALL_NAME='$(SLIBNAME).$(LIBMAJOR).$(LIBMINOR)'
         SLIB_INSTALL_LINKS=
@@ -4968,15 +5164,19 @@
         enabled x86_32 && append SHFLAGS -Wl,-read_only_relocs,suppress
         strip="${strip} -x"
         add_ldflags -Wl,-dynamic,-search_paths_first
+        check_cflags -Werror=partial-availability
         SLIBSUF=".dylib"
         SLIBNAME_WITH_VERSION='$(SLIBPREF)$(FULLNAME).$(LIBVERSION)$(SLIBSUF)'
         SLIBNAME_WITH_MAJOR='$(SLIBPREF)$(FULLNAME).$(LIBMAJOR)$(SLIBSUF)'
-        objformat="macho"
         enabled x86_64 && objformat="macho64" || objformat="macho32"
         enabled_any pic shared x86_64 ||
             { check_cflags -mdynamic-no-pic && add_asflags -mdynamic-no-pic; }
-        check_header dispatch/dispatch.h &&
+        check_headers dispatch/dispatch.h &&
             add_cppflags '-I\$(SRC_PATH)/compat/dispatch_semaphore'
+        if test -n "$sysroot"; then
+            is_in -isysroot $cc $CPPFLAGS $CFLAGS || check_cppflags -isysroot $sysroot
+            is_in -isysroot $ld $LDFLAGS          || check_ldflags  -isysroot $sysroot
+        fi
         version_script='-exported_symbols_list'
         VERSION_SCRIPT_POSTPROCESS_CMD='tr " " "\n" | sed -n /global:/,/local:/p | grep ";" | tr ";" "\n" | sed -E "s/(.+)/_\1/g" | sed -E "s/(.+[^*])$$$$/\1*/"'
         ;;
@@ -4984,39 +5184,42 @@
         die "Native MSYS builds are discouraged, please use the MINGW environment."
         ;;
     mingw32*|mingw64*)
-        if test $target_os = "mingw32ce"; then
-            disable network
-        else
-            target_os=mingw32
-        fi
+        target_os=mingw32
         LIBTARGET=i386
         if enabled x86_64; then
             LIBTARGET="i386:x86-64"
         elif enabled arm; then
-            LIBTARGET=arm-wince
+            LIBTARGET="arm"
+        elif enabled aarch64; then
+            LIBTARGET="arm64"
         fi
-        enabled shared && ! enabled small && check_cmd $windres --version && enable gnu_windres
+        if enabled shared; then
+            # Cannot build both shared and static libs when using dllimport.
+            disable static
+        fi
+        enabled shared && ! enabled small && test_cmd $windres --version && enable gnu_windres
         enabled x86_32 && check_ldflags -Wl,--large-address-aware
         shlibdir_default="$bindir_default"
         SLIBPREF=""
         SLIBSUF=".dll"
         SLIBNAME_WITH_VERSION='$(SLIBPREF)$(FULLNAME)-$(LIBVERSION)$(SLIBSUF)'
         SLIBNAME_WITH_MAJOR='$(SLIBPREF)$(FULLNAME)-$(LIBMAJOR)$(SLIBSUF)'
-        dlltool="${cross_prefix}dlltool"
-        if check_cmd lib.exe -list; then
-            SLIB_EXTRA_CMD=-'sed -e "s/ @[^ ]*//" $$(@:$(SLIBSUF)=.orig.def) > $$(@:$(SLIBSUF)=.def); lib.exe -nologo -machine:$(LIBTARGET) -def:$$(@:$(SLIBSUF)=.def) -out:$(SUBDIR)$(SLIBNAME:$(SLIBSUF)=.lib)'
+        if test_cmd lib.exe -list; then
+            SLIB_EXTRA_CMD=-'lib.exe -nologo -machine:$(LIBTARGET) -def:$$(@:$(SLIBSUF)=.def) -out:$(SUBDIR)$(SLIBNAME:$(SLIBSUF)=.lib)'
             if enabled x86_64; then
                 LIBTARGET=x64
             fi
-        elif check_cmd $dlltool --version; then
-            SLIB_EXTRA_CMD=-'sed -e "s/ @[^ ]*//" $$(@:$(SLIBSUF)=.orig.def) > $$(@:$(SLIBSUF)=.def); $(DLLTOOL) -m $(LIBTARGET) -d $$(@:$(SLIBSUF)=.def) -l $(SUBDIR)$(SLIBNAME:$(SLIBSUF)=.lib) -D $(SLIBNAME_WITH_MAJOR)'
+        else
+            SLIB_EXTRA_CMD=-'$(DLLTOOL) -m $(LIBTARGET) -d $$(@:$(SLIBSUF)=.def) -l $(SUBDIR)$(SLIBNAME:$(SLIBSUF)=.lib) -D $(SLIBNAME_WITH_MAJOR)'
         fi
         SLIB_INSTALL_NAME='$(SLIBNAME_WITH_MAJOR)'
         SLIB_INSTALL_LINKS=
         SLIB_INSTALL_EXTRA_SHLIB='$(SLIBNAME:$(SLIBSUF)=.lib)'
         SLIB_INSTALL_EXTRA_LIB='lib$(SLIBNAME:$(SLIBSUF)=.dll.a) $(SLIBNAME_WITH_MAJOR:$(SLIBSUF)=.def)'
-        SHFLAGS='-shared -Wl,--output-def,$$(@:$(SLIBSUF)=.orig.def) -Wl,--out-implib,$(SUBDIR)lib$(SLIBNAME:$(SLIBSUF)=.dll.a) -Wl,--enable-runtime-pseudo-reloc -Wl,--disable-auto-image-base'
+        SLIB_CREATE_DEF_CMD='EXTERN_PREFIX="$(EXTERN_PREFIX)" AR="$(AR_CMD)" NM="$(NM_CMD)" $(SRC_PATH)/compat/windows/makedef $(SUBDIR)lib$(NAME).ver $(OBJS) > $$(@:$(SLIBSUF)=.def)'
+        SHFLAGS='-shared -Wl,--out-implib,$(SUBDIR)lib$(SLIBNAME:$(SLIBSUF)=.dll.a) -Wl,--disable-auto-image-base $$(@:$(SLIBSUF)=.def)'
         enabled x86_64 && objformat="win64" || objformat="win32"
+        dlltool="${cross_prefix}dlltool"
         ranlib=:
         enable dos_paths
         check_ldflags -Wl,--nxcompat,--dynamicbase
@@ -5050,7 +5253,7 @@
         SLIBSUF=".dll"
         SLIBNAME_WITH_VERSION='$(SLIBPREF)$(FULLNAME)-$(LIBVERSION)$(SLIBSUF)'
         SLIBNAME_WITH_MAJOR='$(SLIBPREF)$(FULLNAME)-$(LIBMAJOR)$(SLIBSUF)'
-        SLIB_CREATE_DEF_CMD='$(SRC_PATH)/compat/windows/makedef $(SUBDIR)lib$(NAME).ver $(OBJS) > $$(@:$(SLIBSUF)=.def)'
+        SLIB_CREATE_DEF_CMD='EXTERN_PREFIX="$(EXTERN_PREFIX)" $(SRC_PATH)/compat/windows/makedef $(SUBDIR)lib$(NAME).ver $(OBJS) > $$(@:$(SLIBSUF)=.def)'
         SLIB_INSTALL_NAME='$(SLIBNAME_WITH_MAJOR)'
         SLIB_INSTALL_LINKS=
         SLIB_INSTALL_EXTRA_SHLIB='$(SLIBNAME:$(SLIBSUF)=.lib)'
@@ -5073,7 +5276,7 @@
         SHFLAGS='-shared -Wl,--out-implib,$(SUBDIR)lib$(FULLNAME).dll.a'
         enabled x86_64 && objformat="win64" || objformat="win32"
         enable dos_paths
-        enabled shared && ! enabled small && check_cmd $windres --version && enable gnu_windres
+        enabled shared && ! enabled small && test_cmd $windres --version && enable gnu_windres
         add_cppflags -D_POSIX_C_SOURCE=200112 -D_XOPEN_SOURCE=600
         ;;
     *-dos|freedos|opendos)
@@ -5136,9 +5339,6 @@
                       -l:drtaeabi.dso -l:scppnwdl.dso -lsupc++ -lgcc \
                       -l:libc.dso -l:libm.dso -l:euser.dso -l:libcrt0.lib
         ;;
-    osf1)
-        add_cppflags -D_OSF_SOURCE -D_POSIX_PII -D_REENTRANT
-        ;;
     minix)
         ;;
     none)
@@ -5174,42 +5374,44 @@
     pfx=$1
     pfx_no_=${pfx%_}
     # uclibc defines __GLIBC__, so it needs to be checked before glibc.
-    if check_${pfx}cpp_condition features.h "defined __UCLIBC__"; then
+    if test_${pfx}cpp_condition features.h "defined __UCLIBC__"; then
         eval ${pfx}libc_type=uclibc
         add_${pfx}cppflags -D_POSIX_C_SOURCE=200112 -D_XOPEN_SOURCE=600
-    elif check_${pfx}cpp_condition features.h "defined __GLIBC__"; then
+    elif test_${pfx}cpp_condition features.h "defined __GLIBC__"; then
         eval ${pfx}libc_type=glibc
         add_${pfx}cppflags -D_POSIX_C_SOURCE=200112 -D_XOPEN_SOURCE=600
     # MinGW headers can be installed on Cygwin, so check for newlib first.
-    elif check_${pfx}cpp_condition newlib.h "defined _NEWLIB_VERSION"; then
+    elif test_${pfx}cpp_condition newlib.h "defined _NEWLIB_VERSION"; then
         eval ${pfx}libc_type=newlib
         add_${pfx}cppflags -U__STRICT_ANSI__ -D_XOPEN_SOURCE=600
     # MinGW64 is backwards compatible with MinGW32, so check for it first.
-    elif check_${pfx}cpp_condition _mingw.h "defined __MINGW64_VERSION_MAJOR"; then
+    elif test_${pfx}cpp_condition _mingw.h "defined __MINGW64_VERSION_MAJOR"; then
         eval ${pfx}libc_type=mingw64
-        if check_${pfx}cpp_condition _mingw.h "__MINGW64_VERSION_MAJOR < 3"; then
+        if test_${pfx}cpp_condition _mingw.h "__MINGW64_VERSION_MAJOR < 3"; then
             add_compat msvcrt/snprintf.o
             add_cflags "-include $source_path/compat/msvcrt/snprintf.h"
         fi
         add_${pfx}cppflags -U__STRICT_ANSI__ -D__USE_MINGW_ANSI_STDIO=1
         eval test \$${pfx_no_}cc_type = "gcc" &&
             add_${pfx}cppflags -D__printf__=__gnu_printf__
-    elif check_${pfx}cpp_condition _mingw.h "defined __MINGW_VERSION"  ||
-         check_${pfx}cpp_condition _mingw.h "defined __MINGW32_VERSION"; then
+        test_${pfx}cpp_condition windows.h "!defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0600" &&
+            add_${pfx}cppflags -D_WIN32_WINNT=0x0600
+    elif test_${pfx}cpp_condition _mingw.h "defined __MINGW_VERSION"  ||
+         test_${pfx}cpp_condition _mingw.h "defined __MINGW32_VERSION"; then
         eval ${pfx}libc_type=mingw32
-        check_${pfx}cpp_condition _mingw.h "__MINGW32_MAJOR_VERSION > 3 || \
+        test_${pfx}cpp_condition _mingw.h "__MINGW32_MAJOR_VERSION > 3 || \
             (__MINGW32_MAJOR_VERSION == 3 && __MINGW32_MINOR_VERSION >= 15)" ||
             die "ERROR: MinGW32 runtime version must be >= 3.15."
         add_${pfx}cppflags -U__STRICT_ANSI__ -D__USE_MINGW_ANSI_STDIO=1
-        check_${pfx}cpp_condition _mingw.h "defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0502" ||
-            add_${pfx}cppflags -D_WIN32_WINNT=0x0502
-        check_${pfx}cpp_condition _mingw.h "__MSVCRT_VERSION__ < 0x0700" &&
+        test_${pfx}cpp_condition _mingw.h "__MSVCRT_VERSION__ < 0x0700" &&
             add_${pfx}cppflags -D__MSVCRT_VERSION__=0x0700
+        test_${pfx}cpp_condition windows.h "!defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0600" &&
+            add_${pfx}cppflags -D_WIN32_WINNT=0x0600
         eval test \$${pfx_no_}cc_type = "gcc" &&
             add_${pfx}cppflags -D__printf__=__gnu_printf__
-    elif check_${pfx}cpp_condition crtversion.h "defined _VC_CRT_MAJOR_VERSION"; then
+    elif test_${pfx}cpp_condition crtversion.h "defined _VC_CRT_MAJOR_VERSION"; then
         eval ${pfx}libc_type=msvcrt
-        if check_${pfx}cpp_condition crtversion.h "_VC_CRT_MAJOR_VERSION < 14"; then
+        if test_${pfx}cpp_condition crtversion.h "_VC_CRT_MAJOR_VERSION < 14"; then
             if [ "$pfx" = host_ ]; then
                 add_host_cppflags -Dsnprintf=_snprintf
             else
@@ -5224,14 +5426,14 @@
         # 0x601 by default unless something else is set by the user.
         # This can easily lead to us detecting functions only present
         # in such new versions and producing binaries requiring windows 7.0.
-        # Therefore explicitly set the default to XP unless the user has
+        # Therefore explicitly set the default to Vista unless the user has
         # set something else on the command line.
         # Don't do this if WINAPI_FAMILY is set and is set to a non-desktop
         # family. For these cases, configure is free to use any functions
         # found in the SDK headers by default. (Alternatively, we could force
         # _WIN32_WINNT to 0x0602 in that case.)
-        check_${pfx}cpp_condition stdlib.h "defined(_WIN32_WINNT)" ||
-            { check_${pfx}cpp <<EOF && add_${pfx}cppflags -D_WIN32_WINNT=0x0502; }
+        test_${pfx}cpp_condition stdlib.h "defined(_WIN32_WINNT)" ||
+            { test_${pfx}cpp <<EOF && add_${pfx}cppflags -D_WIN32_WINNT=0x0600; }
 #ifdef WINAPI_FAMILY
 #include <winapifamily.h>
 #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
@@ -5243,29 +5445,28 @@
             check_func strtoll || add_cflags -Dstrtoll=_strtoi64
             check_func strtoull || add_cflags -Dstrtoull=_strtoui64
         fi
-    elif check_${pfx}cpp_condition stddef.h "defined __KLIBC__"; then
+    elif test_${pfx}cpp_condition stddef.h "defined __KLIBC__"; then
         eval ${pfx}libc_type=klibc
-    elif check_${pfx}cpp_condition sys/cdefs.h "defined __BIONIC__"; then
+    elif test_${pfx}cpp_condition sys/cdefs.h "defined __BIONIC__"; then
         eval ${pfx}libc_type=bionic
-    elif check_${pfx}cpp_condition sys/brand.h "defined LABELED_BRAND_NAME"; then
+    elif test_${pfx}cpp_condition sys/brand.h "defined LABELED_BRAND_NAME"; then
         eval ${pfx}libc_type=solaris
         add_${pfx}cppflags -D__EXTENSIONS__ -D_XOPEN_SOURCE=600
     fi
-    check_${pfx}cc <<EOF
+    test_${pfx}cc <<EOF
 #include <time.h>
 void *v = localtime_r;
 EOF
-test "$?" != 0 && check_${pfx}cc -D_POSIX_C_SOURCE=200112 -D_XOPEN_SOURCE=600 <<EOF && add_${pfx}cppflags -D_POSIX_C_SOURCE=200112 -D_XOPEN_SOURCE=600
+test "$?" != 0 && test_${pfx}cc -D_POSIX_C_SOURCE=200112 -D_XOPEN_SOURCE=600 <<EOF && add_${pfx}cppflags -D_POSIX_C_SOURCE=200112 -D_XOPEN_SOURCE=600
 #include <time.h>
 void *v = localtime_r;
 EOF
 
+    eval test -n "\${${pfx}libc_type}" && enable ${pfx}libc_${libc_type}
 }
 
 probe_libc
-test -n "$libc_type" && enable libc_$libc_type
 probe_libc host_
-test -n "$host_libc_type" && enable host_libc_$host_libc_type
 
 # hacks for compiler/libc/os combinations
 
@@ -5273,29 +5474,12 @@
     bionic)
         add_compat strtod.o strtod=avpriv_strtod
         ;;
-    glibc)
-        if enabled tms470; then
-            CPPFLAGS="-I${source_path}/compat/tms470 ${CPPFLAGS}"
-            add_cppflags -D__USER_LABEL_PREFIX__=
-            add_cppflags -D__builtin_memset=memset
-            add_cppflags -D__gnuc_va_list=va_list -D_VA_LIST_DEFINED
-            add_cflags   -pds=48    # incompatible redefinition of macro
-        elif enabled ccc; then
-            add_ldflags -Wl,-z,now  # calls to libots crash without this
-        fi
-        ;;
 esac
 
 check_compile_assert flt_lim "float.h limits.h" "DBL_MAX == (double)DBL_MAX" ||
     add_cppflags '-I\$(SRC_PATH)/compat/float'
 
-esc(){
-    echo "$*" | sed 's/%/%25/g;s/:/%3a/g'
-}
-
-echo "config:$arch:$subarch:$cpu:$target_os:$(esc $cc_ident):$(esc $FFMPEG_CONFIGURATION)" > ffbuild/config.fate
-
-check_cpp_condition stdlib.h "defined(__PIC__) || defined(__pic__) || defined(PIC)" && enable_weak pic
+test_cpp_condition stdlib.h "defined(__PIC__) || defined(__pic__) || defined(PIC)" && enable_weak pic
 
 set_default libdir
 : ${shlibdir_default:="$libdir"}
@@ -5304,17 +5488,6 @@
 set_default $PATHS_LIST
 set_default nm
 
-# we need to build at least one lib type
-if ! enabled_any static shared; then
-    cat <<EOF
-At least one library type must be built.
-Specify --enable-static to build the static libraries or --enable-shared to
-build the shared libraries as well. To only build the shared libraries specify
---disable-static in addition to --enable-shared.
-EOF
-    exit 1
-fi
-
 disabled optimizations || enabled ossfuzz || check_cflags -fomit-frame-pointer
 
 enable_weak_pic() {
@@ -5322,53 +5495,38 @@
     enable pic
     add_cppflags -DPIC
     case "$target_os" in
-    mingw*|cygwin*)
+    mingw*|cygwin*|win*)
         ;;
     *)
         add_cflags -fPIC
+        add_asflags -fPIC
         ;;
     esac
-    add_asflags  -fPIC
 }
 
 enabled pic && enable_weak_pic
 
-check_cc <<EOF || die "Symbol mangling check failed."
+test_cc <<EOF || die "Symbol mangling check failed."
 int ff_extern;
 EOF
 sym=$($nm $TMPO | awk '/ff_extern/{ print substr($0, match($0, /[^ \t]*ff_extern/)) }')
 extern_prefix=${sym%%ff_extern*}
 
-check_inline_asm inline_asm '"" ::'
+! disabled inline_asm && check_inline_asm inline_asm '"" ::'
 
-_restrict=
-for restrict_keyword in restrict __restrict__ __restrict; do
-    check_cc <<EOF && _restrict=$restrict_keyword && break
-void foo(char * $restrict_keyword p);
-EOF
+for restrict_keyword in restrict __restrict__ __restrict ""; do
+    test_code cc "" "char * $restrict_keyword p" && break
 done
 
-check_cc <<EOF && enable pragma_deprecated
-void foo(void) { _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") }
-EOF
+check_cc pragma_deprecated "" '_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")'
 
-check_cc <<EOF && enable attribute_packed
-struct { int x; } __attribute__((packed)) x;
-EOF
-
-check_cc <<EOF && enable attribute_may_alias
-union { int x; } __attribute__((may_alias)) x;
-EOF
-
-check_cc <<EOF || die "endian test failed"
+# The global variable ensures the bits appear unchanged in the object file.
+test_cc <<EOF || die "endian test failed"
 unsigned int endian = 'B' << 24 | 'I' << 16 | 'G' << 8 | 'E';
 EOF
 od -t x1 $TMPO | grep -q '42 *49 *47 *45' && enable bigendian
 
-check_cc <<EOF && enable const_nan
-#include <math.h>
-void foo(void) { struct { double d; } static const bar[] = { { NAN } }; }
-EOF
+check_cc const_nan math.h "struct { double d; } static const bar[] = { { NAN } }"
 
 if ! enabled ppc64 || enabled bigendian; then
     disable vsx
@@ -5377,18 +5535,13 @@
 check_gas() {
     log "check_gas using '$as' as AS"
     # :vararg is used on aarch64, arm and ppc altivec
-    check_as <<EOF || return 1
+    check_as vararg "
 .macro m n, y:vararg=0
 \n: .int \y
 .endm
-m x
-EOF
+m x" || return 1
     # .altmacro is only used in arm asm
-    ! enabled arm || check_as <<EOF || return 1
-.altmacro
-EOF
-    enable gnu_as
-    return 0
+    ! enabled arm || check_as gnu_as ".altmacro"
 }
 
 if enabled_any arm aarch64 || enabled_all ppc altivec && enabled asm; then
@@ -5406,7 +5559,7 @@
     [ $target_os = "darwin" ] && gaspp_as_type="apple-$gaspp_as_type"
 
     test "${as#*gas-preprocessor.pl}" != "$as" ||
-    check_cmd gas-preprocessor.pl -arch $arch -as-type $gaspp_as_type -- ${as:=$cc} $as_noop &&
+    test_cmd gas-preprocessor.pl -arch $arch -as-type $gaspp_as_type -- ${as:=$cc} $as_noop &&
         gas="${gas:=gas-preprocessor.pl} -arch $arch -as-type $gaspp_as_type -- ${as:=$cc}"
 
     if ! check_gas ; then
@@ -5415,10 +5568,8 @@
             $nogas "GNU assembler not found, install/update gas-preprocessor"
     fi
 
-    check_as <<EOF && enable as_func
-.func test
-.endfunc
-EOF
+    check_as as_func ".func test
+                      .endfunc"
 fi
 
 check_inline_asm inline_asm_labels '"1:\n"'
@@ -5439,22 +5590,20 @@
 
 elif enabled arm; then
 
-    enabled msvc && check_cpp_condition stddef.h "defined _M_ARMT" && enable thumb
-
-    check_cpp_condition stddef.h "defined __thumb__" && check_cc <<EOF && enable_weak thumb
+    enabled msvc && check_cpp_condition thumb stddef.h "defined _M_ARMT"
+    test_cpp_condition stddef.h "defined __thumb__" && test_cc <<EOF && enable_weak thumb
 float func(float a, float b){ return a+b; }
 EOF
-
     enabled thumb && check_cflags -mthumb || check_cflags -marm
 
-    if     check_cpp_condition stddef.h "defined __ARM_PCS_VFP"; then
-        enable vfp_args
-    elif check_cpp_condition stddef.h "defined _M_ARM_FP && _M_ARM_FP >= 30"; then
-        enable vfp_args
-    elif ! check_cpp_condition stddef.h "defined __ARM_PCS || defined __SOFTFP__" && [ $target_os != darwin ]; then
+    if check_cpp_condition vfp_args stddef.h "defined __ARM_PCS_VFP"; then
+        :
+    elif check_cpp_condition vfp_args stddef.h "defined _M_ARM_FP && _M_ARM_FP >= 30"; then
+        :
+    elif ! test_cpp_condition stddef.h "defined __ARM_PCS || defined __SOFTFP__" && [ $target_os != darwin ]; then
         case "${cross_prefix:-$cc}" in
-            *hardfloat*)         enable vfp_args;   fpabi=vfp ;;
-            *) check_ld "cc" <<EOF && enable vfp_args && fpabi=vfp || fpabi=soft ;;
+            *hardfloat*) enable vfp_args; fpabi=vfp ;;
+            *) check_ld "cc" vfp_args <<EOF && fpabi=vfp || fpabi=soft ;;
 __asm__ (".eabi_attribute 28, 1");
 int main(void) { return 0; }
 EOF
@@ -5476,20 +5625,15 @@
 
     check_inline_asm asm_mod_q '"add r0, %Q0, %R0" :: "r"((long long)0)'
 
-    check_as <<EOF && enable as_dn_directive
-ra .dn d0.i16
-.unreq ra
-EOF
-    check_as <<EOF && enable as_fpu_directive
-.fpu neon
-EOF
+    check_as as_arch_directive ".arch armv7-a"
+    check_as as_dn_directive   "ra .dn d0.i16"
+    check_as as_fpu_directive  ".fpu neon"
 
     # llvm's integrated assembler supports .object_arch from llvm 3.5
     [ "$objformat" = elf32 ] || [ "$objformat" = elf64 ] &&
-        check_as <<EOF && enable as_object_arch
-.object_arch armv4
-EOF
+        check_as as_object_arch ".object_arch armv4"
 
+    # MS armasm fails to assemble our PIC constructs
     [ $target_os != win32 ] && enabled_all armv6t2 shared !pic && enable_weak_pic
 
 elif enabled mips; then
@@ -5512,7 +5656,7 @@
 
     enabled mipsfpu && check_inline_asm_flags mipsfpu '"cvt.d.l $f0, $f2"' '-mhard-float'
     enabled mipsfpu && (enabled mips32r5 || enabled mips32r6 || enabled mips64r6) && check_inline_asm_flags mipsfpu '"cvt.d.l $f0, $f1"' '-mfp64'
-    enabled mipsfpu && enabled msa && check_inline_asm_flags msa '"addvi.b $w0, $w1, 1"' '-mmsa' && check_header msa.h || disable msa
+    enabled mipsfpu && enabled msa && check_inline_asm_flags msa '"addvi.b $w0, $w1, 1"' '-mmsa' && check_headers msa.h || disable msa
     enabled mipsdsp && check_inline_asm_flags mipsdsp '"addu.qb $t0, $t1, $t2"' '-mdsp'
     enabled mipsdspr2 && check_inline_asm_flags mipsdspr2 '"absq_s.qb $t0, $t1"' '-mdspr2'
 
@@ -5530,40 +5674,32 @@
 
 elif enabled ppc; then
 
-    enable local_aligned_8 local_aligned_16 local_aligned_32
+    enable local_aligned
 
     check_inline_asm dcbzl     '"dcbzl 0, %0" :: "r"(0)'
     check_inline_asm ibm_asm   '"add 0, 0, 0"'
     check_inline_asm ppc4xx    '"maclhw r10, r11, r12"'
     check_inline_asm xform_asm '"lwzx %1, %y0" :: "Z"(*(int*)0), "r"(0)'
 
-    # AltiVec flags: The FSF version of GCC differs from the Apple version
     if enabled altivec; then
-        check_cflags -maltivec -mabi=altivec &&
-        { check_header altivec.h && inc_altivec_h="#include <altivec.h>" ; } ||
-        check_cflags -faltivec
+        check_cflags -maltivec -mabi=altivec
 
         # check if our compiler supports Motorola AltiVec C API
-        check_cc <<EOF || disable altivec
-$inc_altivec_h
-int main(void) {
-    vector signed int v1 = (vector signed int) { 0 };
-    vector signed int v2 = (vector signed int) { 1 };
-    v1 = vec_add(v1, v2);
-    return 0;
-}
-EOF
+        check_cc altivec altivec.h "vector signed int v1 = (vector signed int) { 0 };
+                                    vector signed int v2 = (vector signed int) { 1 };
+                                    v1 = vec_add(v1, v2);"
 
         enabled altivec || warn "Altivec disabled, possibly missing --cpu flag"
     fi
 
     if enabled vsx; then
         check_cflags -mvsx &&
-        check_builtin vec_vsx_ld "altivec.h" "__builtin_vec_vsx_ld" || disable vsx
+        check_cc vsx altivec.h "int v[4] = { 0 };
+                                vector signed int v1 = vec_vsx_ld(0, v);"
     fi
 
     if enabled power8; then
-        check_cpp_condition "altivec.h" "defined(_ARCH_PWR8)" || disable power8
+        check_cpp_condition power8 "altivec.h" "defined(_ARCH_PWR8)"
     fi
 
 elif enabled x86; then
@@ -5571,7 +5707,7 @@
     check_builtin rdtsc    intrin.h   "__rdtsc()"
     check_builtin mm_empty mmintrin.h "_mm_empty()"
 
-    enable local_aligned_8 local_aligned_16 local_aligned_32
+    enable local_aligned
 
     # check whether EBP is available on x86
     # As 'i' is stored on the stack, this program will crash
@@ -5599,16 +5735,20 @@
 
     probe_x86asm(){
         x86asmexe_probe=$1
-        if check_cmd $x86asmexe_probe -v; then
+        if test_cmd $x86asmexe_probe -v; then
             x86asmexe=$x86asmexe_probe
             x86asm_type=nasm
             x86asm_debug="-g -F dwarf"
-        elif check_cmd $x86asmexe_probe --version; then
+            X86ASMDEP=
+            X86ASM_DEPFLAGS='-MD $(@:.o=.d)'
+        elif test_cmd $x86asmexe_probe --version; then
             x86asmexe=$x86asmexe_probe
             x86asm_type=yasm
             x86asm_debug="-g dwarf2"
+            X86ASMDEP='$(DEPX86ASM) $(X86ASMFLAGS) -M $(X86ASM_O) $< > $(@:.o=.d)'
+            X86ASM_DEPFLAGS=
         fi
-        check_x86asm "movbe ecx, [5]" && enable x86asm
+        check_x86asm x86asm "movbe ecx, [5]"
     }
 
     if ! disabled_any asm mmx x86asm; then
@@ -5617,8 +5757,6 @@
             probe_x86asm $program && break
         done
         disabled x86asm && die "nasm/yasm not found or too old. Use --disable-x86asm for a crippled build."
-        test $x86asm_type = 'nasm' && X86ASM_DEPFLAGS='-MD $(@:.o=.d)'
-        test $x86asm_type = 'yasm' && X86ASMDEP='$(DEPX86ASM) $(X86ASMFLAGS) -M $(X86ASM_O) $< > $(@:.o=.d)'
         X86ASMFLAGS="-f $objformat"
         enabled pic               && append X86ASMFLAGS "-DPIC"
         test -n "$extern_prefix"  && append X86ASMFLAGS "-DPREFIX"
@@ -5626,10 +5764,11 @@
             elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;;
         esac
 
-        check_x86asm "vextracti128 xmm0, ymm0, 0"      || disable avx2_external
-        check_x86asm "vpmacsdd xmm0, xmm1, xmm2, xmm3" || disable xop_external
-        check_x86asm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4_external
-        check_x86asm "CPU amdnop" || disable cpunop
+        check_x86asm avx512_external "vmovdqa32 [eax]{k1}{z}, zmm0"
+        check_x86asm avx2_external   "vextracti128 xmm0, ymm0, 0"
+        check_x86asm xop_external    "vpmacsdd xmm0, xmm1, xmm2, xmm3"
+        check_x86asm fma4_external   "vfmaddps ymm0, ymm1, ymm2, ymm3"
+        check_x86asm cpunop          "CPU amdnop"
     fi
 
     case "$cpu" in
@@ -5640,7 +5779,7 @@
 
 fi
 
-check_code cc arm_neon.h "int16x8_t test = vdupq_n_s16(0)" && enable intrinsics_neon
+check_cc intrinsics_neon arm_neon.h "int16x8_t test = vdupq_n_s16(0)"
 
 check_ldflags -Wl,--as-needed
 check_ldflags -Wl,-z,noexecstack
@@ -5662,9 +5801,9 @@
     check_type "sys/types.h sys/socket.h" socklen_t
 
     # Prefer arpa/inet.h over winsock2
-    if check_header arpa/inet.h ; then
+    if check_headers arpa/inet.h ; then
         check_func closesocket
-    elif check_header winsock2.h ; then
+    elif check_headers winsock2.h ; then
         check_func_headers winsock2.h closesocket -lws2 &&
             network_extralibs="-lws2" ||
         { check_func_headers winsock2.h closesocket -lws2_32 &&
@@ -5688,8 +5827,6 @@
 check_builtin atomic_cas_ptr atomic.h "void **ptr; void *oldval, *newval; atomic_cas_ptr(ptr, oldval, newval)"
 check_builtin machine_rw_barrier mbarrier.h "__machine_rw_barrier()"
 check_builtin MemoryBarrier windows.h "MemoryBarrier()"
-check_builtin sarestart signal.h "SA_RESTART"
-check_builtin sem_timedwait semaphore.h "sem_t *s; sem_init(s,0,0); sem_timedwait(s,0); sem_destroy(s)" -lpthread
 check_builtin sync_val_compare_and_swap "" "int *ptr; int oldval, newval; __sync_val_compare_and_swap(ptr, oldval, newval)"
 check_builtin gmtime_r time.h "time_t *time; struct tm *tm; gmtime_r(time, tm)"
 check_builtin localtime_r time.h "time_t *time; struct tm *tm; localtime_r(time, tm)"
@@ -5712,8 +5849,7 @@
 
 check_func  access
 check_func_headers stdlib.h arc4random
-check_func_headers time.h clock_gettime ||
-    { check_lib clock_gettime time.h clock_gettime -lrt && LIBRT="-lrt"; }
+check_lib   clock_gettime time.h clock_gettime || check_lib clock_gettime time.h clock_gettime -lrt
 check_func  fcntl
 check_func  fork
 check_func  gethrtime
@@ -5721,13 +5857,11 @@
 check_func  getrusage
 check_func  gettimeofday
 check_func  isatty
-check_func  mach_absolute_time
 check_func  mkstemp
 check_func  mmap
 check_func  mprotect
 # Solaris has nanosleep in -lrt, OpenSolaris no longer needs that
-check_func_headers time.h nanosleep ||
-    { check_lib nanosleep time.h nanosleep -lrt && LIBRT="-lrt"; }
+check_func_headers time.h nanosleep || check_lib nanosleep time.h nanosleep -lrt
 check_func  sched_getaffinity
 check_func  setrlimit
 check_struct "sys/stat.h" "struct stat" st_mtim.tv_nsec -D_BSD_SOURCE
@@ -5739,6 +5873,7 @@
 check_func_headers conio.h kbhit
 check_func_headers io.h setmode
 check_func_headers lzo/lzo1x.h lzo1x_999_compress
+check_func_headers mach/mach_time.h mach_absolute_time
 check_func_headers stdlib.h getenv
 check_func_headers sys/stat.h lstat
 
@@ -5752,58 +5887,64 @@
 check_func_headers windows.h SetConsoleCtrlHandler
 check_func_headers windows.h Sleep
 check_func_headers windows.h VirtualAlloc
-check_struct windows.h "CONDITION_VARIABLE" Ptr
 check_func_headers glob.h glob
 enabled xlib &&
-    check_func_headers "X11/Xlib.h X11/extensions/Xvlib.h" XvGetPortAttribute -lXv -lX11 -lXext
+    check_lib xlib "X11/Xlib.h X11/extensions/Xvlib.h" XvGetPortAttribute -lXv -lX11 -lXext
 
-check_header d3d11.h
-check_header direct.h
-check_header dirent.h
-check_header dlfcn.h
-check_header dxgidebug.h
-check_header dxva.h
-check_header dxva2api.h -D_WIN32_WINNT=0x0600
-check_header io.h
-check_header libcrystalhd/libcrystalhd_if.h
-check_header mach/mach_time.h
-check_header malloc.h
-check_header net/udplite.h
-check_header poll.h
-check_header sys/mman.h
-check_header sys/param.h
-check_header sys/resource.h
-check_header sys/select.h
-check_header sys/time.h
-check_header sys/un.h
-check_header termios.h
-check_header unistd.h
-check_header valgrind/valgrind.h
-check_header VideoDecodeAcceleration/VDADecoder.h
+check_headers direct.h
+check_headers dirent.h
+check_headers dxgidebug.h
+check_headers dxva.h
+check_headers dxva2api.h -D_WIN32_WINNT=0x0600
+check_headers io.h
+check_headers linux/perf_event.h
+check_headers libcrystalhd/libcrystalhd_if.h
+check_headers malloc.h
+check_headers net/udplite.h
+check_headers poll.h
+check_headers sys/param.h
+check_headers sys/resource.h
+check_headers sys/select.h
+check_headers sys/time.h
+check_headers sys/un.h
+check_headers termios.h
+check_headers unistd.h
+check_headers valgrind/valgrind.h
 check_func_headers VideoToolbox/VTCompressionSession.h VTCompressionSessionPrepareToEncodeFrames -framework VideoToolbox
-check_header windows.h
-check_header X11/extensions/XvMClib.h
-check_header asm/types.h
+check_headers windows.h
+check_headers X11/extensions/XvMClib.h
+check_headers asm/types.h
 
 # it seems there are versions of clang in some distros that try to use the
 # gcc headers, which explodes for stdatomic
 # so we also check that atomics actually work here
-check_builtin stdatomic_h stdatomic.h "atomic_int foo, bar = ATOMIC_VAR_INIT(-1); atomic_store(&foo, 0)"
+check_builtin stdatomic stdatomic.h "atomic_int foo, bar = ATOMIC_VAR_INIT(-1); atomic_store(&foo, 0); foo += bar"
 
+check_lib advapi32 "windows.h"            RegCloseKey          -ladvapi32
+check_lib bcrypt   "windows.h bcrypt.h"   BCryptGenRandom      -lbcrypt &&
+    check_cpp_condition bcrypt bcrypt.h "defined BCRYPT_RNG_ALGORITHM"
 check_lib ole32    "windows.h"            CoTaskMemFree        -lole32
 check_lib shell32  "windows.h shellapi.h" CommandLineToArgvW   -lshell32
-check_lib wincrypt "windows.h wincrypt.h" CryptGenRandom       -ladvapi32
 check_lib psapi    "windows.h psapi.h"    GetProcessMemoryInfo -lpsapi
 
+check_lib android android/native_window.h ANativeWindow_acquire -landroid
+check_lib mediandk "stdint.h media/NdkImage.h" AImage_delete -lmediandk
+check_lib camera2ndk "stdbool.h stdint.h camera/NdkCameraManager.h" ACameraManager_create -lcamera2ndk
+
 enabled appkit       && check_apple_framework AppKit
 enabled audiotoolbox && check_apple_framework AudioToolbox
 enabled avfoundation && check_apple_framework AVFoundation
 enabled coreimage    && check_apple_framework CoreImage
 enabled videotoolbox && check_apple_framework VideoToolbox
 
+check_apple_framework CoreFoundation
+check_apple_framework CoreMedia
+check_apple_framework CoreVideo
+
 enabled avfoundation && {
-    check_lib avfoundation CoreGraphics/CoreGraphics.h               CGGetActiveDisplayList "-framework CoreGraphics" ||
-    check_lib avfoundation ApplicationServices/ApplicationServices.h CGGetActiveDisplayList "-framework ApplicationServices"; }
+    disable coregraphics applicationservices
+    check_lib coregraphics        CoreGraphics/CoreGraphics.h               CGGetActiveDisplayList "-framework CoreGraphics" ||
+    check_lib applicationservices ApplicationServices/ApplicationServices.h CGGetActiveDisplayList "-framework ApplicationServices"; }
 
 enabled videotoolbox && {
     check_lib coreservices CoreServices/CoreServices.h UTGetOSTypeFromString "-framework CoreServices"
@@ -5820,20 +5961,26 @@
 
 check_type "va/va.h va/va_dec_hevc.h" "VAPictureParameterBufferHEVC"
 check_struct "va/va.h" "VADecPictureParameterBufferVP9" bit_depth
-check_type "va/va.h va/va_vpp.h" "VAProcPipelineParameterBuffer"
-check_type "va/va.h va/va_enc_h264.h" "VAEncPictureParameterBufferH264"
 check_type "va/va.h va/va_enc_hevc.h" "VAEncPictureParameterBufferHEVC"
 check_type "va/va.h va/va_enc_jpeg.h" "VAEncPictureParameterBufferJPEG"
-check_type "va/va.h va/va_enc_mpeg2.h" "VAEncPictureParameterBufferMPEG2"
 check_type "va/va.h va/va_enc_vp8.h"  "VAEncPictureParameterBufferVP8"
 check_type "va/va.h va/va_enc_vp9.h"  "VAEncPictureParameterBufferVP9"
 
 check_type "vdpau/vdpau.h" "VdpPictureInfoHEVC"
 
-check_cpp_condition windows.h "!WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)" && enable winrt || disable winrt
+if ! disabled ffnvcodec; then
+    check_pkg_config ffnvcodec "ffnvcodec >= 8.1.24.2" \
+          "ffnvcodec/nvEncodeAPI.h ffnvcodec/dynlink_cuda.h ffnvcodec/dynlink_cuviddec.h ffnvcodec/dynlink_nvcuvid.h" "" || \
+      check_pkg_config ffnvcodec "ffnvcodec >= 8.0.14.2 ffnvcodec < 8.1" \
+          "ffnvcodec/nvEncodeAPI.h ffnvcodec/dynlink_cuda.h ffnvcodec/dynlink_cuviddec.h ffnvcodec/dynlink_nvcuvid.h" ""
+fi
+
+check_cpp_condition winrt windows.h "!WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)"
 
 if ! disabled w32threads && ! enabled pthreads; then
     check_func_headers "windows.h process.h" _beginthreadex &&
+        check_type "windows.h" CONDITION_VARIABLE &&
+        check_type "windows.h" INIT_ONCE &&
         enable w32threads || disable w32threads
     if ! enabled w32threads && enabled winrt; then
         check_func_headers "windows.h" CreateThread &&
@@ -5862,25 +6009,23 @@
     elif check_func pthread_join && check_func pthread_create; then
         enable pthreads
     fi
-    check_code cc "pthread.h" "static pthread_mutex_t atomic_lock = PTHREAD_MUTEX_INITIALIZER" || disable pthreads
+    check_cc pthreads "pthread.h" "static pthread_mutex_t atomic_lock = PTHREAD_MUTEX_INITIALIZER"
+
+    if enabled pthreads; then
+        check_builtin sem_timedwait semaphore.h "sem_t *s; sem_init(s,0,0); sem_timedwait(s,0); sem_destroy(s)" $pthreads_extralibs
+        check_func pthread_cancel $pthreads_extralibs
+    fi
 fi
 
-
-if enabled pthreads; then
-  check_func pthread_cancel
-fi
-
-enabled pthreads &&
-    check_builtin sem_timedwait semaphore.h "sem_t *s; sem_init(s,0,0); sem_timedwait(s,0); sem_destroy(s)"
-
-enabled  zlib && check_lib zlib   zlib.h      zlibVersion    -lz
+enabled  zlib && { check_pkg_config zlib zlib "zlib.h" zlibVersion ||
+                   check_lib zlib   zlib.h      zlibVersion    -lz; }
 enabled bzlib && check_lib bzlib bzlib.h BZ2_bzlibVersion    -lbz2
 enabled  lzma && check_lib lzma   lzma.h lzma_version_number -llzma
 
 # On some systems dynamic loading requires no extra linker flags
 check_lib libdl dlfcn.h "dlopen dlsym" || check_lib libdl dlfcn.h "dlopen dlsym" -ldl
 
-check_lib libm math.h sin -lm && LIBM="-lm"
+check_lib libm math.h sin -lm
 
 atan2f_args=2
 copysign_args=2
@@ -5889,7 +6034,7 @@
 powf_args=2
 
 for func in $MATH_FUNCS; do
-    eval check_mathfunc $func \${${func}_args:-1} $LIBM
+    eval check_mathfunc $func \${${func}_args:-1} $libm_extralibs
 done
 
 for func in $COMPLEX_FUNCS; do
@@ -5898,17 +6043,17 @@
 
 # these are off by default, so fail if requested and not available
 enabled cuda_sdk          && require cuda_sdk cuda.h cuCtxCreate -lcuda
-enabled cuvid             && { enabled cuda ||
-                               die "ERROR: CUVID requires CUDA"; }
 enabled chromaprint       && require chromaprint chromaprint.h chromaprint_get_version -lchromaprint
-enabled decklink          && { require_header DeckLinkAPI.h &&
-                               { check_cpp_condition DeckLinkAPIVersion.h "BLACKMAGIC_DECKLINK_API_VERSION >= 0x0a060100" || die "ERROR: Decklink API version must be >= 10.6.1."; } }
-enabled libndi_newtek     && require_header Processing.NDI.Lib.h
-enabled frei0r            && require_header frei0r.h
+enabled decklink          && { require_headers DeckLinkAPI.h &&
+                               { test_cpp_condition DeckLinkAPIVersion.h "BLACKMAGIC_DECKLINK_API_VERSION >= 0x0a090500" || die "ERROR: Decklink API version must be >= 10.9.5."; } }
+enabled libndi_newtek     && require_headers Processing.NDI.Lib.h
+enabled frei0r            && require_headers frei0r.h
 enabled gmp               && require gmp gmp.h mpz_export -lgmp
 enabled gnutls            && require_pkg_config gnutls gnutls gnutls/gnutls.h gnutls_global_init
-enabled jni               && { [ $target_os = "android" ] && check_header jni.h && enabled pthreads || die "ERROR: jni not found"; }
-enabled ladspa            && require_header ladspa.h
+enabled jni               && { [ $target_os = "android" ] && check_headers jni.h && enabled pthreads || die "ERROR: jni not found"; }
+enabled ladspa            && require_headers ladspa.h
+enabled libaom            && require_pkg_config libaom "aom >= 1.0.0" aom/aom_codec.h aom_codec_version
+enabled lv2               && require_pkg_config lv2 lilv-0 "lilv/lilv.h" lilv_world_new
 enabled libiec61883       && require libiec61883 libiec61883/iec61883.h iec61883_cmp_connect -lraw1394 -lavc1394 -lrom1394 -liec61883
 enabled libass            && require_pkg_config libass libass ass/ass.h ass_library_init
 enabled libbluray         && require_pkg_config libbluray libbluray libbluray/bluray.h bd_open
@@ -5917,56 +6062,53 @@
                              { check_lib libcelt celt/celt.h celt_decoder_create_custom -lcelt0 ||
                                die "ERROR: libcelt must be installed and version must be >= 0.11.0."; }
 enabled libcaca           && require_pkg_config libcaca caca caca.h caca_create_canvas
+enabled libcodec2         && require libcodec2 codec2/codec2.h codec2_create -lcodec2
+enabled libdavs2          && require_pkg_config libdavs2 "davs2 >= 1.5.115" davs2.h davs2_decoder_open
 enabled libdc1394         && require_pkg_config libdc1394 libdc1394-2 dc1394/dc1394.h dc1394_new
 enabled libdrm            && require_pkg_config libdrm libdrm xf86drm.h drmGetVersion
-enabled libfdk_aac        && { use_pkg_config libfdk_aac fdk-aac "fdk-aac/aacenc_lib.h" aacEncOpen ||
+enabled libfdk_aac        && { check_pkg_config libfdk_aac fdk-aac "fdk-aac/aacenc_lib.h" aacEncOpen ||
                                { require libfdk_aac fdk-aac/aacenc_lib.h aacEncOpen -lfdk-aac &&
                                  warn "using libfdk without pkg-config"; } }
 flite_extralibs="-lflite_cmu_time_awb -lflite_cmu_us_awb -lflite_cmu_us_kal -lflite_cmu_us_kal16 -lflite_cmu_us_rms -lflite_cmu_us_slt -lflite_usenglish -lflite_cmulex -lflite"
 enabled libflite          && require libflite "flite/flite.h" flite_init $flite_extralibs
 enabled fontconfig        && enable libfontconfig
 enabled libfontconfig     && require_pkg_config libfontconfig fontconfig "fontconfig/fontconfig.h" FcInit
-enabled libfreetype       && require_pkg_config libfreetype2 freetype2 "ft2build.h FT_FREETYPE_H" FT_Init_FreeType
+enabled libfreetype       && require_pkg_config libfreetype freetype2 "ft2build.h FT_FREETYPE_H" FT_Init_FreeType
 enabled libfribidi        && require_pkg_config libfribidi fribidi fribidi.h fribidi_version_info
-enabled libgme            && { use_pkg_config libgme libgme gme/gme.h gme_new_emu ||
+enabled libgme            && { check_pkg_config libgme libgme gme/gme.h gme_new_emu ||
                                require libgme gme/gme.h gme_new_emu -lgme -lstdc++; }
 enabled libgsm            && { for gsm_hdr in "gsm.h" "gsm/gsm.h"; do
                                    check_lib libgsm "${gsm_hdr}" gsm_create -lgsm && break;
                                done || die "ERROR: libgsm not found"; }
-enabled libilbc           && require libilbc ilbc.h WebRtcIlbcfix_InitDecode -lilbc
+enabled libilbc           && require libilbc ilbc.h WebRtcIlbcfix_InitDecode -lilbc $pthreads_extralibs
+enabled libklvanc         && require libklvanc libklvanc/vanc.h klvanc_context_create -lklvanc
 enabled libkvazaar        && require_pkg_config libkvazaar "kvazaar >= 0.8.1" kvazaar.h kvz_api_get
+enabled liblensfun        && require_pkg_config liblensfun lensfun lensfun.h lf_db_new
 # While it may appear that require is being used as a pkg-config
 # fallback for libmfx, it is actually being used to detect a different
 # installation route altogether.  If libmfx is installed via the Intel
 # Media SDK or Intel Media Server Studio, these don't come with
 # pkg-config support.  Instead, users should make sure that the build
 # can find the libraries and headers through other means.
-enabled libmfx            && { use_pkg_config libmfx libmfx "mfx/mfxvideo.h" MFXInit ||
-                               { require libmfx "mfx/mfxvideo.h" MFXInit -llibmfx && warn "using libmfx without pkg-config"; } }
+enabled libmfx            && { check_pkg_config libmfx libmfx "mfx/mfxvideo.h" MFXInit ||
+                               { require libmfx "mfx/mfxvideo.h" MFXInit "-llibmfx $advapi32_extralibs" && warn "using libmfx without pkg-config"; } }
 enabled libmodplug        && require_pkg_config libmodplug libmodplug libmodplug/modplug.h ModPlug_Load
-enabled libmp3lame        && require "libmp3lame >= 3.98.3" lame/lame.h lame_set_VBR_quality -lmp3lame
-enabled libmysofa         && require libmysofa "mysofa.h" mysofa_load -lmysofa
-enabled libnpp            && { check_lib libnpp npp.h nppGetLibVersion -lnppig -lnppicc -lnppc ||
-                               check_lib libnpp npp.h nppGetLibVersion -lnppi -lnppc ||
+enabled libmp3lame        && require "libmp3lame >= 3.98.3" lame/lame.h lame_set_VBR_quality -lmp3lame $libm_extralibs
+enabled libmysofa         && { check_pkg_config libmysofa libmysofa mysofa.h mysofa_load ||
+                               require libmysofa mysofa.h mysofa_load -lmysofa $zlib_extralibs; }
+enabled libnpp            && { check_lib libnpp npp.h nppGetLibVersion -lnppig -lnppicc -lnppc -lnppidei ||
+                               check_lib libnpp npp.h nppGetLibVersion -lnppi -lnppc -lnppidei ||
                                die "ERROR: libnpp not found"; }
 enabled libopencore_amrnb && require libopencore_amrnb opencore-amrnb/interf_dec.h Decoder_Interface_init -lopencore-amrnb
 enabled libopencore_amrwb && require libopencore_amrwb opencore-amrwb/dec_if.h D_IF_init -lopencore-amrwb
-enabled libopencv         && { check_header opencv2/core/core_c.h &&
-                               { use_pkg_config libopencv opencv opencv2/core/core_c.h cvCreateImageHeader ||
-                                 require opencv opencv2/core/core_c.h cvCreateImageHeader -lopencv_core -lopencv_imgproc; } ||
+enabled libopencv         && { check_headers opencv2/core/core_c.h &&
+                               { check_pkg_config libopencv opencv opencv2/core/core_c.h cvCreateImageHeader ||
+                                 require libopencv opencv2/core/core_c.h cvCreateImageHeader -lopencv_core -lopencv_imgproc; } ||
                                require_pkg_config libopencv opencv opencv/cxcore.h cvCreateImageHeader; }
 enabled libopenh264       && require_pkg_config libopenh264 openh264 wels/codec_api.h WelsGetCodecVersion
-enabled libopenjpeg       && { { check_lib libopenjpeg openjpeg-2.3/openjpeg.h opj_version -lopenjp2 -DOPJ_STATIC && add_cppflags -DOPJ_STATIC; } ||
-                               check_lib libopenjpeg openjpeg-2.3/openjpeg.h opj_version -lopenjp2 ||
-                               { check_lib libopenjpeg openjpeg-2.2/openjpeg.h opj_version -lopenjp2 -DOPJ_STATIC && add_cppflags -DOPJ_STATIC; } ||
-                               check_lib libopenjpeg openjpeg-2.2/openjpeg.h opj_version -lopenjp2 ||
-                               { check_lib libopenjpeg openjpeg-2.1/openjpeg.h opj_version -lopenjp2 -DOPJ_STATIC && add_cppflags -DOPJ_STATIC; } ||
-                               check_lib libopenjpeg openjpeg-2.1/openjpeg.h opj_version -lopenjp2 ||
-                               { check_lib libopenjpeg openjpeg-2.0/openjpeg.h opj_version -lopenjp2 -DOPJ_STATIC && add_cppflags -DOPJ_STATIC; } ||
-                               { check_lib libopenjpeg openjpeg-1.5/openjpeg.h opj_version -lopenjpeg -DOPJ_STATIC && add_cppflags -DOPJ_STATIC; } ||
-                               { check_lib libopenjpeg openjpeg.h opj_version -lopenjpeg -DOPJ_STATIC && add_cppflags -DOPJ_STATIC; } ||
-                               die "ERROR: libopenjpeg not found"; }
-enabled libopenmpt        && require_pkg_config libopenmpt "libopenmpt >= 0.2.6557" libopenmpt/libopenmpt.h openmpt_module_create
+enabled libopenjpeg       && { check_pkg_config libopenjpeg "libopenjp2 >= 2.1.0" openjpeg.h opj_version ||
+                               { require_pkg_config libopenjpeg "libopenjp2 >= 2.1.0" openjpeg.h opj_version -DOPJ_STATIC && add_cppflags -DOPJ_STATIC; } }
+enabled libopenmpt        && require_pkg_config libopenmpt "libopenmpt >= 0.2.6557" libopenmpt/libopenmpt.h openmpt_module_create -lstdc++ && append libopenmpt_extralibs "-lstdc++"
 enabled libopus           && {
     enabled libopus_decoder && {
         require_pkg_config libopus opus opus_multistream.h opus_multistream_decoder_create
@@ -5978,44 +6120,47 @@
 enabled libpulse          && require_pkg_config libpulse libpulse pulse/pulseaudio.h pa_context_new
 enabled librsvg           && require_pkg_config librsvg librsvg-2.0 librsvg-2.0/librsvg/rsvg.h rsvg_handle_render_cairo
 enabled librtmp           && require_pkg_config librtmp librtmp librtmp/rtmp.h RTMP_Socket
-enabled librubberband     && require_pkg_config librubberband "rubberband >= 1.8.1" rubberband/rubberband-c.h rubberband_new
+enabled librubberband     && require_pkg_config librubberband "rubberband >= 1.8.1" rubberband/rubberband-c.h rubberband_new -lstdc++ && append librubberband_extralibs "-lstdc++"
 enabled libshine          && require_pkg_config libshine shine shine/layer3.h shine_encode_buffer
-enabled libsmbclient      && { use_pkg_config libsmbclient smbclient libsmbclient.h smbc_init ||
-                               require smbclient libsmbclient.h smbc_init -lsmbclient; }
-enabled libsnappy         && require libsnappy snappy-c.h snappy_compress -lsnappy
-enabled libsoxr           && require libsoxr soxr.h soxr_create -lsoxr && LIBSOXR="-lsoxr"
+enabled libsmbclient      && { check_pkg_config libsmbclient smbclient libsmbclient.h smbc_init ||
+                               require libsmbclient libsmbclient.h smbc_init -lsmbclient; }
+enabled libsnappy         && require libsnappy snappy-c.h snappy_compress -lsnappy -lstdc++
+enabled libsoxr           && require libsoxr soxr.h soxr_create -lsoxr
 enabled libssh            && require_pkg_config libssh libssh libssh/sftp.h sftp_init
-enabled libspeex          && require_pkg_config libspeex speex speex/speex.h speex_decoder_init -lspeex
+enabled libspeex          && require_pkg_config libspeex speex speex/speex.h speex_decoder_init
+enabled libsrt            && require_pkg_config libsrt "srt >= 1.3.0" srt/srt.h srt_socket
+enabled libtensorflow     && require libtensorflow tensorflow/c/c_api.h TF_Version -ltensorflow
 enabled libtesseract      && require_pkg_config libtesseract tesseract tesseract/capi.h TessBaseAPICreate
 enabled libtheora         && require libtheora theora/theoraenc.h th_info_init -ltheoraenc -ltheoradec -logg
+enabled libtls            && require_pkg_config libtls libtls tls.h tls_configure
 enabled libtwolame        && require libtwolame twolame.h twolame_init -ltwolame &&
                              { check_lib libtwolame twolame.h twolame_encode_buffer_float32_interleaved -ltwolame ||
                                die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; }
 enabled libv4l2           && require_pkg_config libv4l2 libv4l2 libv4l2.h v4l2_ioctl
 enabled libvidstab        && require_pkg_config libvidstab "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit
-enabled libvmaf           && require_pkg_config libvmaf libvmaf libvmaf.h compute_vmaf
+enabled libvmaf           && require_pkg_config libvmaf "libvmaf >= 1.3.9" libvmaf.h compute_vmaf
 enabled libvo_amrwbenc    && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc
 enabled libvorbis         && require_pkg_config libvorbis vorbis vorbis/codec.h vorbis_info_init &&
                              require_pkg_config libvorbisenc vorbisenc vorbis/vorbisenc.h vorbis_encode_init
 
 enabled libvpx            && {
     enabled libvpx_vp8_decoder && {
-        use_pkg_config libvpx_vp8_decoder "vpx >= 0.9.1" "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp8_dx ||
-            check_lib libvpx_vp8_decoder "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_dec_init_ver -lvpx ||
-                die "ERROR: libvpx decoder version must be >=0.9.1";
+        check_pkg_config libvpx_vp8_decoder "vpx >= 1.4.0" "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp8_dx ||
+            check_lib libvpx_vp8_decoder "vpx/vpx_decoder.h vpx/vp8dx.h" "vpx_codec_dec_init_ver VPX_IMG_FMT_HIGHBITDEPTH" -lvpx ||
+                die "ERROR: libvpx decoder version must be >=1.4.0";
     }
     enabled libvpx_vp8_encoder && {
-        use_pkg_config libvpx_vp8_encoder "vpx >= 0.9.7" "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp8_cx ||
-            check_lib libvpx_vp8_encoder "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_enc_init_ver VP8E_SET_MAX_INTRA_BITRATE_PCT" -lvpx ||
-                die "ERROR: libvpx encoder version must be >=0.9.7";
+        check_pkg_config libvpx_vp8_encoder "vpx >= 1.4.0" "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp8_cx ||
+            check_lib libvpx_vp8_encoder "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_enc_init_ver VPX_IMG_FMT_HIGHBITDEPTH" -lvpx ||
+                die "ERROR: libvpx encoder version must be >=1.4.0";
     }
     enabled libvpx_vp9_decoder && {
-        use_pkg_config libvpx_vp9_decoder "vpx >= 1.3.0" "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp9_dx ||
-            check_lib libvpx_vp9_decoder "vpx/vpx_decoder.h vpx/vp8dx.h" "vpx_codec_vp9_dx" -lvpx
+        check_pkg_config libvpx_vp9_decoder "vpx >= 1.4.0" "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp9_dx ||
+            check_lib libvpx_vp9_decoder "vpx/vpx_decoder.h vpx/vp8dx.h" "vpx_codec_vp9_dx VPX_IMG_FMT_HIGHBITDEPTH" "-lvpx $libm_extralibs"
     }
     enabled libvpx_vp9_encoder && {
-        use_pkg_config libvpx_vp9_encoder "vpx >= 1.3.0" "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp9_cx ||
-            check_lib libvpx_vp9_encoder "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_vp9_cx VP9E_SET_AQ_MODE" -lvpx
+        check_pkg_config libvpx_vp9_encoder "vpx >= 1.4.0" "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp9_cx ||
+            check_lib libvpx_vp9_encoder "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_vp9_cx VPX_IMG_FMT_HIGHBITDEPTH" "-lvpx $libm_extralibs"
     }
     if disabled_all libvpx_vp8_decoder libvpx_vp9_decoder libvpx_vp8_encoder libvpx_vp9_encoder; then
         die "libvpx enabled but no supported decoders found"
@@ -6025,23 +6170,27 @@
 enabled libwavpack        && require libwavpack wavpack/wavpack.h WavpackOpenFileOutput  -lwavpack
 enabled libwebp           && {
     enabled libwebp_encoder      && require_pkg_config libwebp "libwebp >= 0.2.0" webp/encode.h WebPGetEncoderVersion
-    enabled libwebp_anim_encoder && use_pkg_config libwebp_anim_encoder "libwebpmux >= 0.4.0" webp/mux.h WebPAnimEncoderOptionsInit; }
-enabled libx264           && { use_pkg_config libx264 x264 "stdint.h x264.h" x264_encoder_encode ||
-                               { require libx264 "stdint.h x264.h" x264_encoder_encode -lx264 &&
+    enabled libwebp_anim_encoder && check_pkg_config libwebp_anim_encoder "libwebpmux >= 0.4.0" webp/mux.h WebPAnimEncoderOptionsInit; }
+enabled libx264           && { check_pkg_config libx264 x264 "stdint.h x264.h" x264_encoder_encode ||
+                               { require libx264 "stdint.h x264.h" x264_encoder_encode "-lx264 $pthreads_extralibs $libm_extralibs" &&
                                  warn "using libx264 without pkg-config"; } } &&
-                             require_cpp_condition x264.h "X264_BUILD >= 118" &&
-                             { check_cpp_condition x264.h "X264_MPEG2" &&
-                               enable libx262; }
+                             require_cpp_condition libx264 x264.h "X264_BUILD >= 118" &&
+                             check_cpp_condition libx262 x264.h "X264_MPEG2"
 enabled libx265           && require_pkg_config libx265 x265 x265.h x265_api_get &&
-                             require_cpp_condition x265.h "X265_BUILD >= 68"
-enabled libxavs           && require libxavs "stdint.h xavs.h" xavs_encoder_encode -lxavs
+                             require_cpp_condition libx265 x265.h "X265_BUILD >= 68"
+enabled libxavs           && require libxavs "stdint.h xavs.h" xavs_encoder_encode "-lxavs $pthreads_extralibs $libm_extralibs"
+enabled libxavs2          && require_pkg_config libxavs2 "xavs2 >= 1.2.77" "stdint.h xavs2.h" xavs2_api_get
 enabled libxvid           && require libxvid xvid.h xvid_global -lxvidcore
-enabled libzimg           && require_pkg_config libzimg "zimg >= 2.3.0" zimg.h zimg_get_api_version
+enabled libzimg           && require_pkg_config libzimg "zimg >= 2.7.0" zimg.h zimg_get_api_version
 enabled libzmq            && require_pkg_config libzmq libzmq zmq.h zmq_ctx_new
-enabled libzvbi           && require libzvbi libzvbi.h vbi_decoder_new -lzvbi &&
-                             { check_cpp_condition libzvbi.h "VBI_VERSION_MAJOR > 0 || VBI_VERSION_MINOR > 2 || VBI_VERSION_MINOR == 2 && VBI_VERSION_MICRO >= 28" ||
+enabled libzvbi           && require_pkg_config libzvbi zvbi-0.2 libzvbi.h vbi_decoder_new &&
+                             { test_cpp_condition libzvbi.h "VBI_VERSION_MAJOR > 0 || VBI_VERSION_MINOR > 2 || VBI_VERSION_MINOR == 2 && VBI_VERSION_MICRO >= 28" ||
                                enabled gpl || die "ERROR: libzvbi requires version 0.2.28 or --enable-gpl."; }
 enabled libxml2           && require_pkg_config libxml2 libxml-2.0 libxml2/libxml/xmlversion.h xmlCheckVersion
+enabled mbedtls           && { check_pkg_config mbedtls mbedtls mbedtls/x509_crt.h mbedtls_x509_crt_init ||
+                               check_pkg_config mbedtls mbedtls mbedtls/ssl.h mbedtls_ssl_init ||
+                               check_lib mbedtls mbedtls/ssl.h mbedtls_ssl_init -lmbedtls -lmbedx509 -lmbedcrypto ||
+                               die "ERROR: mbedTLS not found"; }
 enabled mediacodec        && { enabled jni || die "ERROR: mediacodec requires --enable-jni"; }
 enabled mmal              && { check_lib mmal interface/mmal/mmal.h mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host ||
                                { ! enabled cross_compile &&
@@ -6053,13 +6202,14 @@
 enabled openal            && { { for al_extralibs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32"; do
                                check_lib openal 'AL/al.h' alGetError "${al_extralibs}" && break; done } ||
                                die "ERROR: openal not found"; } &&
-                             { check_cpp_condition "AL/al.h" "defined(AL_VERSION_1_1)" ||
+                             { test_cpp_condition "AL/al.h" "defined(AL_VERSION_1_1)" ||
                                die "ERROR: openal must be installed and version must be 1.1 or compatible"; }
-enabled opencl            && { check_lib opencl OpenCL/cl.h clEnqueueNDRangeKernel -Wl,-framework,OpenCL ||
+enabled opencl            && { check_pkg_config opencl OpenCL CL/cl.h clEnqueueNDRangeKernel ||
+                               check_lib opencl OpenCL/cl.h clEnqueueNDRangeKernel -Wl,-framework,OpenCL ||
                                check_lib opencl CL/cl.h clEnqueueNDRangeKernel -lOpenCL ||
                                die "ERROR: opencl not found"; } &&
-                             { check_cpp_condition "OpenCL/cl.h" "defined(CL_VERSION_1_2)" ||
-                               check_cpp_condition "CL/cl.h" "defined(CL_VERSION_1_2)" ||
+                             { test_cpp_condition "OpenCL/cl.h" "defined(CL_VERSION_1_2)" ||
+                               test_cpp_condition "CL/cl.h" "defined(CL_VERSION_1_2)" ||
                                die "ERROR: opencl must be installed and version must be 1.2 or compatible"; }
 enabled opengl            && { check_lib opengl GL/glx.h glXGetProcAddress "-lGL" ||
                                check_lib opengl windows.h wglGetProcAddress "-lopengl32 -lgdi32" ||
@@ -6067,23 +6217,23 @@
                                check_lib opengl ES2/gl.h glGetError "-isysroot=${sysroot} -Wl,-framework,OpenGLES" ||
                                die "ERROR: opengl not found."
                              }
-enabled omx_rpi           && { check_header OMX_Core.h ||
-                               { ! enabled cross_compile && add_cflags -isystem/opt/vc/include/IL && check_header OMX_Core.h ; } ||
-                               die "ERROR: OpenMAX IL headers not found"; }
-enabled omx               && require_header OMX_Core.h
-enabled openssl           && { use_pkg_config openssl openssl openssl/ssl.h OPENSSL_init_ssl ||
-                               use_pkg_config openssl openssl openssl/ssl.h SSL_library_init ||
+enabled omx               && require_headers OMX_Core.h
+enabled omx_rpi           && { check_headers OMX_Core.h ||
+                               { ! enabled cross_compile && add_cflags -isystem/opt/vc/include/IL && check_headers OMX_Core.h ; } ||
+                               die "ERROR: OpenMAX IL headers not found"; } && enable omx
+enabled openssl           && { check_pkg_config openssl openssl openssl/ssl.h OPENSSL_init_ssl ||
+                               check_pkg_config openssl openssl openssl/ssl.h SSL_library_init ||
                                check_lib openssl openssl/ssl.h SSL_library_init -lssl -lcrypto ||
                                check_lib openssl openssl/ssl.h SSL_library_init -lssl32 -leay32 ||
                                check_lib openssl openssl/ssl.h SSL_library_init -lssl -lcrypto -lws2_32 -lgdi32 ||
                                die "ERROR: openssl not found"; }
-enabled rkmpp             && { { require_pkg_config rockchip_mpp rockchip_mpp rockchip/rk_mpi.h mpp_create ||
-                                 die "ERROR : Rockchip MPP was not found."; } &&
-                               { check_func_headers rockchip/rk_mpi_cmd.h "MPP_DEC_GET_FREE_PACKET_SLOT_COUNT" ||
-                                 die "ERROR: Rockchip MPP is outdated, please get a more recent one."; } &&
+enabled rkmpp             && { require_pkg_config rkmpp rockchip_mpp  rockchip/rk_mpi.h mpp_create &&
+                               require_pkg_config rockchip_mpp "rockchip_mpp >= 1.3.7" rockchip/rk_mpi.h mpp_create &&
                                { enabled libdrm ||
                                  die "ERROR: rkmpp requires --enable-libdrm"; }
                              }
+enabled vapoursynth       && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init
+
 
 if enabled gcrypt; then
     GCRYPT_CONFIG="${cross_prefix}libgcrypt-config"
@@ -6092,7 +6242,7 @@
         gcrypt_extralibs=$("${GCRYPT_CONFIG}" --libs)
         check_func_headers gcrypt.h gcry_mpi_new $gcrypt_cflags $gcrypt_extralibs ||
             die "ERROR: gcrypt not found"
-        add_cflags $gcrypt_cflags && add_extralibs $gcrypt_extralibs
+        add_cflags $gcrypt_cflags
     else
         require gcrypt gcrypt.h gcry_mpi_new -lgcrypt
     fi
@@ -6100,22 +6250,19 @@
 
 if enabled sdl2; then
     SDL2_CONFIG="${cross_prefix}sdl2-config"
-    if check_pkg_config sdl2 "sdl2 >= 2.0.1 sdl2 < 2.1.0" SDL_events.h SDL_PollEvent; then
-        check_func SDL_Init $sdl2_extralibs $sdl2_cflags ||
-            disable sdl2
-    elif "${SDL2_CONFIG}" --version > /dev/null 2>&1; then
+    test_pkg_config sdl2 "sdl2 >= 2.0.1 sdl2 < 2.1.0" SDL_events.h SDL_PollEvent
+    if disabled sdl2 && "${SDL2_CONFIG}" --version > /dev/null 2>&1; then
         sdl2_cflags=$("${SDL2_CONFIG}" --cflags)
         sdl2_extralibs=$("${SDL2_CONFIG}" --libs)
-        check_cpp_condition SDL.h "(SDL_MAJOR_VERSION<<16 | SDL_MINOR_VERSION<<8 | SDL_PATCHLEVEL) >= 0x020001" $sdl2_cflags &&
-        check_cpp_condition SDL.h "(SDL_MAJOR_VERSION<<16 | SDL_MINOR_VERSION<<8 | SDL_PATCHLEVEL) < 0x020100" $sdl2_cflags &&
-        check_func SDL_Init $sdl2_extralibs $sdl2_cflags &&
+        test_cpp_condition SDL.h "(SDL_MAJOR_VERSION<<16 | SDL_MINOR_VERSION<<8 | SDL_PATCHLEVEL) >= 0x020001" $sdl2_cflags &&
+        test_cpp_condition SDL.h "(SDL_MAJOR_VERSION<<16 | SDL_MINOR_VERSION<<8 | SDL_PATCHLEVEL) < 0x020100" $sdl2_cflags &&
+        check_func_headers SDL_events.h SDL_PollEvent $sdl2_extralibs $sdl2_cflags &&
             enable sdl2
     fi
     if test $target_os = "mingw32"; then
         sdl2_extralibs=$(filter_out '-mwindows' $sdl2_extralibs)
     fi
 fi
-enabled sdl2 && add_cflags $(filter_out '-Dmain=SDL_main' $sdl2_cflags) && add_extralibs $sdl2_extralibs
 
 if enabled decklink; then
     case $target_os in
@@ -6128,13 +6275,16 @@
 
 enabled securetransport &&
     check_func SecIdentityCreate "-Wl,-framework,CoreFoundation -Wl,-framework,Security" &&
-    check_lib securetransport "Security/SecureTransport.h Security/Security.h" "SSLCreateContext SecItemImport" "-Wl,-framework,CoreFoundation -Wl,-framework,Security" ||
+    check_lib securetransport "Security/SecureTransport.h Security/Security.h" "SSLCreateContext" "-Wl,-framework,CoreFoundation -Wl,-framework,Security" ||
         disable securetransport
 
+enabled securetransport &&
+    check_func SecItemImport "-Wl,-framework,CoreFoundation -Wl,-framework,Security"
+
 enabled schannel &&
     check_func_headers "windows.h security.h" InitializeSecurityContext -DSECURITY_WIN32 -lsecur32 &&
-    check_cpp_condition winerror.h "defined(SEC_I_CONTEXT_EXPIRED)" &&
-    add_extralibs -lsecur32 ||
+    test_cpp_condition winerror.h "defined(SEC_I_CONTEXT_EXPIRED)" &&
+    schannel_extralibs="-lsecur32" ||
         disable schannel
 
 makeinfo --version > /dev/null 2>&1 && enable makeinfo  || disable makeinfo
@@ -6147,58 +6297,57 @@
 rsync --help 2> /dev/null | grep -q 'contimeout' && enable rsync_contimeout || disable rsync_contimeout
 
 # check V4L2 codecs available in the API
-check_header linux/fb.h
-check_header linux/videodev2.h
-check_code cc linux/videodev2.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_safe struct_v4l2_frmivalenum_discrete
-check_code cc linux/videodev2.h "int i = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M | V4L2_BUF_FLAG_LAST;" || disable v4l2_m2m
-check_code cc linux/videodev2.h "int i = V4L2_PIX_FMT_VC1_ANNEX_G;" && enable vc1_v4l2_m2m
-check_code cc linux/videodev2.h "int i = V4L2_PIX_FMT_MPEG1;" && enable mpeg1_v4l2_m2m
-check_code cc linux/videodev2.h "int i = V4L2_PIX_FMT_MPEG2;" && enable mpeg2_v4l2_m2m
-check_code cc linux/videodev2.h "int i = V4L2_PIX_FMT_MPEG4;" && enable mpeg4_v4l2_m2m
-check_code cc linux/videodev2.h "int i = V4L2_PIX_FMT_HEVC;" && enable hevc_v4l2_m2m
-check_code cc linux/videodev2.h "int i = V4L2_PIX_FMT_H263;" && enable h263_v4l2_m2m
-check_code cc linux/videodev2.h "int i = V4L2_PIX_FMT_H264;" && enable h264_v4l2_m2m
-check_code cc linux/videodev2.h "int i = V4L2_PIX_FMT_VP8;" && enable vp8_v4l2_m2m
-check_code cc linux/videodev2.h "int i = V4L2_PIX_FMT_VP9;" && enable vp9_v4l2_m2m
+check_headers linux/fb.h
+check_headers linux/videodev2.h
+test_code cc linux/videodev2.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
+check_cc v4l2_m2m linux/videodev2.h "int i = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M | V4L2_BUF_FLAG_LAST;"
+check_cc vc1_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VC1_ANNEX_G;"
+check_cc mpeg1_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_MPEG1;"
+check_cc mpeg2_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_MPEG2;"
+check_cc mpeg4_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_MPEG4;"
+check_cc hevc_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_HEVC;"
+check_cc h263_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_H263;"
+check_cc h264_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_H264;"
+check_cc vp8_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VP8;"
+check_cc vp9_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VP9;"
 
-check_header sys/videoio.h
-check_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_safe struct_v4l2_frmivalenum_discrete
+check_headers sys/videoio.h
+test_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
 
 check_lib user32 "windows.h winuser.h" GetShellWindow -luser32
 check_lib vfw32 "windows.h vfw.h" capCreateCaptureWindow -lvfw32
 # check that WM_CAP_DRIVER_CONNECT is defined to the proper value
 # w32api 3.12 had it defined wrong
-check_cpp_condition vfw.h "WM_CAP_DRIVER_CONNECT > WM_USER" && enable vfwcap_defines
+check_cpp_condition vfwcap_defines vfw.h "WM_CAP_DRIVER_CONNECT > WM_USER"
 
 check_type "dshow.h" IBaseFilter
 
 # check for ioctl_meteor.h, ioctl_bt848.h and alternatives
-{ check_header dev/bktr/ioctl_meteor.h &&
-  check_header dev/bktr/ioctl_bt848.h; } ||
-{ check_header machine/ioctl_meteor.h &&
-  check_header machine/ioctl_bt848.h; } ||
-{ check_header dev/video/meteor/ioctl_meteor.h &&
-  check_header dev/video/bktr/ioctl_bt848.h; } ||
-check_header dev/ic/bt8xx.h
+check_headers "dev/bktr/ioctl_meteor.h dev/bktr/ioctl_bt848.h"                   ||
+    check_headers "machine/ioctl_meteor.h machine/ioctl_bt848.h"                 ||
+    check_headers "dev/video/meteor/ioctl_meteor.h dev/video/bktr/ioctl_bt848.h" ||
+    check_headers "dev/ic/bt8xx.h"
 
 if check_struct sys/soundcard.h audio_buf_info bytes; then
-    enable_safe sys/soundcard.h
+    enable_sanitized sys/soundcard.h
 else
-    check_cc -D__BSD_VISIBLE -D__XSI_VISIBLE <<EOF && add_cppflags -D__BSD_VISIBLE -D__XSI_VISIBLE && enable_safe sys/soundcard.h
+    test_cc -D__BSD_VISIBLE -D__XSI_VISIBLE <<EOF && add_cppflags -D__BSD_VISIBLE -D__XSI_VISIBLE && enable_sanitized sys/soundcard.h
     #include <sys/soundcard.h>
     audio_buf_info abc;
 EOF
 fi
-check_header soundcard.h
 
-enabled alsa && check_lib alsa alsa/asoundlib.h snd_pcm_htimestamp -lasound
+enabled alsa && check_pkg_config alsa alsa "alsa/asoundlib.h" snd_pcm_htimestamp ||
+    check_lib alsa alsa/asoundlib.h snd_pcm_htimestamp -lasound
 
-enabled jack && check_lib jack jack/jack.h jack_client_open -ljack &&
-    check_func jack_port_get_latency_range -ljack
+enabled libjack &&
+    require_pkg_config libjack jack jack/jack.h jack_port_get_latency_range
 
 enabled sndio && check_lib sndio sndio.h sio_open -lsndio
 
 if enabled libcdio; then
+    check_pkg_config libcdio libcdio_paranoia "cdio/cdda.h cdio/paranoia.h" cdio_cddap_open ||
+    check_pkg_config libcdio libcdio_paranoia "cdio/paranoia/cdda.h cdio/paranoia/paranoia.h" cdio_cddap_open ||
     check_lib libcdio "cdio/cdda.h cdio/paranoia.h" cdio_cddap_open -lcdio_paranoia -lcdio_cdda -lcdio ||
     check_lib libcdio "cdio/paranoia/cdda.h cdio/paranoia/paranoia.h" cdio_cddap_open -lcdio_paranoia -lcdio_cdda -lcdio ||
     die "ERROR: No usable libcdio/cdparanoia found"
@@ -6211,16 +6360,13 @@
     enabled libxcb_shm    && check_pkg_config libxcb_shm    xcb-shm    xcb/shm.h    xcb_shm_attach
     enabled libxcb_shape  && check_pkg_config libxcb_shape  xcb-shape  xcb/shape.h  xcb_shape_get_rectangles
     enabled libxcb_xfixes && check_pkg_config libxcb_xfixes xcb-xfixes xcb/xfixes.h xcb_xfixes_get_cursor_image
-
-    add_cflags $xcb_cflags $xcb_shm_cflags $xcb_xfixes_cflags $xcb_shape_cflags
-    add_extralibs $xcb_extralibs $xcb_shm_extralibs $xcb_xfixes_extralibs $xcb_shape_extralibs
 fi
 
 check_func_headers "windows.h" CreateDIBSection "$gdigrab_indev_extralibs"
 
 # d3d11va requires linking directly to dxgi and d3d11 if not building for
 # the desktop api partition
-check_cpp <<EOF && enable uwp && d3d11va_extralibs="-ldxgi -ld3d11"
+test_cpp <<EOF && enable uwp && d3d11va_extralibs="-ldxgi -ld3d11"
 #ifdef WINAPI_FAMILY
 #include <winapifamily.h>
 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
@@ -6234,25 +6380,43 @@
 EOF
 
 enabled vaapi &&
-    check_lib vaapi va/va.h vaInitialize -lva
+    check_pkg_config vaapi "libva >= 0.35.0" "va/va.h" vaInitialize
 
-enabled vaapi &&
-    check_code cc "va/va.h" "vaCreateSurfaces(0, 0, 0, 0, 0, 0, 0, 0)" ||
-    disable vaapi
+if enabled vaapi; then
+    check_pkg_config vaapi_drm "libva-drm" "va/va_drm.h" vaGetDisplayDRM
 
-enabled vaapi &&
-    check_lib vaapi_drm "va/va.h va/va_drm.h" vaGetDisplayDRM -lva -lva-drm
+    if enabled xlib; then
+        check_pkg_config vaapi_x11 "libva-x11" "va/va_x11.h" vaGetDisplay
+    fi
 
-enabled vaapi &&
-    check_lib vaapi_x11 "va/va.h va/va_x11.h" vaGetDisplay -lva -lva-x11 -lX11
+    check_cpp_condition vaapi_1 "va/va.h" "VA_CHECK_VERSION(1, 0, 0)"
+fi
 
-enabled vaapi &&
-    check_cpp_condition "va/va.h" "VA_CHECK_VERSION(1, 0, 0)" &&
-    enable vaapi_1
+if enabled_all opencl libdrm ; then
+    check_type "CL/cl_intel.h" "clCreateImageFromFdINTEL_fn" &&
+        enable opencl_drm_beignet
+    check_func_headers "CL/cl_ext.h" clImportMemoryARM &&
+        enable opencl_drm_arm
+fi
+
+if enabled_all opencl vaapi ; then
+    enabled opencl_drm_beignet && enable opencl_vaapi_beignet
+    check_type "CL/cl.h CL/va_ext.h" "clCreateFromVA_APIMediaSurfaceINTEL_fn" &&
+        enable opencl_vaapi_intel_media
+fi
+
+if enabled_all opencl dxva2 ; then
+    check_type "CL/cl_dx9_media_sharing.h" cl_dx9_surface_info_khr &&
+        enable opencl_dxva2
+fi
+
+if enabled_all opencl d3d11va ; then
+    check_type "CL/cl_d3d11.h" clGetDeviceIDsFromD3D11KHR_fn &&
+        enable opencl_d3d11
+fi
 
 enabled vdpau &&
-    check_cpp_condition vdpau/vdpau.h "defined VDP_DECODER_PROFILE_MPEG4_PART2_ASP" ||
-    disable vdpau
+    check_cpp_condition vdpau vdpau/vdpau.h "defined VDP_DECODER_PROFILE_MPEG4_PART2_ASP"
 
 enabled vdpau &&
     check_lib vdpau_x11 "vdpau/vdpau.h vdpau/vdpau_x11.h" vdp_device_create_x11 -lvdpau -lX11
@@ -6264,21 +6428,27 @@
         mingw32*|mingw64*|win32|win64|linux|cygwin*)
             ;;
         *)
-            disable cuda cuvid nvenc
+            disable ffnvcodec cuvid nvdec nvenc
             ;;
     esac
 else
-    disable cuda cuvid nvenc
+    disable ffnvcodec cuvid nvdec nvenc
 fi
 
+enabled ffnvcodec && enable cuda
+
 enabled nvenc &&
-    check_cc -I$source_path <<EOF || disable nvenc
-#include "compat/nvenc/nvEncodeAPI.h"
+    test_cc -I$source_path <<EOF || disable nvenc
+#include <ffnvcodec/nvEncodeAPI.h>
 NV_ENCODE_API_FUNCTION_LIST flist;
 void f(void) { struct { const GUID guid; } s[] = { { NV_ENC_PRESET_HQ_GUID } }; }
 int main(void) { return 0; }
 EOF
 
+enabled amf &&
+    check_cpp_condition amf "AMF/core/Version.h" \
+        "(AMF_VERSION_MAJOR << 48 | AMF_VERSION_MINOR << 32 | AMF_VERSION_RELEASE << 16 | AMF_VERSION_BUILD_NUM) >= 0x0001000400040001"
+
 # Funny iconv installations are not unusual, so check it after all flags have been set
 if enabled libc_iconv; then
     check_func_headers iconv.h iconv
@@ -6333,15 +6503,14 @@
 check_disable_warning_headers -Wno-deprecated-declarations
 check_disable_warning_headers -Wno-unused-variable
 
-check_cc <<EOF && enable blocks_extension
+test_cc <<EOF && enable blocks_extension
 void (^block)(void);
 EOF
 
 # add some linker flags
 check_ldflags -Wl,--warn-common
 check_ldflags -Wl,-rpath-link=libpostproc:libswresample:libswscale:libavfilter:libavdevice:libavformat:libavcodec:libavutil:libavresample
-enabled rpath && add_ldexeflags -Wl,-rpath,$libdir
-enabled rpath && add_ldlibflags -Wl,-rpath,$libdir
+enabled rpath && add_ldexeflags -Wl,-rpath,$libdir && add_ldsoflags -Wl,-rpath,$libdir
 test_ldflags -Wl,-Bsymbolic && append SHFLAGS -Wl,-Bsymbolic
 
 # add some strip flags
@@ -6380,7 +6549,7 @@
                   -Wl,--wrap,sws_scale ||
     disable xmm_clobber_test
 
-check_ld "cc" <<EOF && enable proper_dce
+check_ld "cc" proper_dce <<EOF
 extern const int array[512];
 static inline int func(void) { return array[0]; }
 int main(void) { return 0; }
@@ -6390,11 +6559,12 @@
     echo "X { local: *; };" > $TMPV
     if test_ldflags -Wl,${version_script},$TMPV; then
         append SHFLAGS '-Wl,${version_script},\$(SUBDIR)lib\$(NAME).ver'
-        check_cc <<EOF && enable symver_asm_label
+        quotes='""'
+        test_cc <<EOF && enable symver_asm_label
 void ff_foo(void) __asm__ ("av_foo@VERSION");
 void ff_foo(void) { ${inline_asm+__asm__($quotes);} }
 EOF
-        check_cc <<EOF && enable symver_gnu_asm
+        test_cc <<EOF && enable symver_gnu_asm
 __asm__(".symver ff_foo,av_foo@VERSION");
 void ff_foo(void) {}
 EOF
@@ -6439,7 +6609,7 @@
 
 enabled ftrapv && check_cflags -ftrapv
 
-check_cc -mno-red-zone <<EOF && noredzone_flags="-mno-red-zone"
+test_cc -mno-red-zone <<EOF && noredzone_flags="-mno-red-zone"
 int x;
 EOF
 
@@ -6471,12 +6641,6 @@
             check_cflags -falign-stack=maintain-16-byte ||
             disable aligned_stack
     fi
-elif enabled ccc; then
-    # disable some annoying warnings
-    add_cflags -msg_disable bitnotint
-    add_cflags -msg_disable mixfuncvoid
-    add_cflags -msg_disable nonstandcast
-    add_cflags -msg_disable unsupieee
 elif enabled gcc; then
     check_optflags -fno-tree-vectorize
     check_cflags -Werror=format-security
@@ -6487,11 +6651,39 @@
     check_cflags -Wformat
     check_cflags -fdiagnostics-color=auto
     enabled extra_warnings || check_disable_warning -Wno-maybe-uninitialized
+    if enabled x86_32; then
+        case $target_os in
+        *bsd*)
+            # BSDs don't guarantee a 16 byte aligned stack, but we can
+            # request GCC to try to maintain 16 byte alignment throughout
+            # function calls. Library entry points that might call assembly
+            # functions align the stack. (The parameter means 2^4 bytes.)
+            check_cflags -mpreferred-stack-boundary=4
+            ;;
+        esac
+    fi
 elif enabled llvm_gcc; then
     check_cflags -mllvm -stack-alignment=16
 elif enabled clang; then
-    check_cflags -mllvm -stack-alignment=16
-    check_cflags -mstack-alignment=16
+    if enabled x86_32; then
+        # Clang doesn't support maintaining alignment without assuming the
+        # same alignment in every function. If 16 byte alignment would be
+        # enabled, one would also have to either add attribute_align_arg on
+        # every single entry point into the libraries or enable -mstackrealign
+        # (doing stack realignment in every single function).
+        case $target_os in
+        mingw32|win32|*bsd*)
+            disable aligned_stack
+            ;;
+        *)
+            check_cflags -mllvm -stack-alignment=16
+            check_cflags -mstack-alignment=16
+            ;;
+        esac
+    else
+        check_cflags -mllvm -stack-alignment=16
+        check_cflags -mstack-alignment=16
+    fi
     check_cflags -Qunused-arguments
     check_cflags -Werror=implicit-function-declaration
     check_cflags -Werror=missing-prototypes
@@ -6509,11 +6701,9 @@
     add_cflags -W${armcc_opt},--diag_suppress=3343 # hardfp compat
     add_cflags -W${armcc_opt},--diag_suppress=167  # pointer sign
     add_cflags -W${armcc_opt},--diag_suppress=513  # pointer sign
-elif enabled tms470; then
-    add_cflags -pds=824 -pds=837
-    disable inline_asm
 elif enabled pathscale; then
     add_cflags -fstrict-overflow -OPT:wrap_around_unsafe_opt=OFF
+    disable inline_asm
 elif enabled_any msvc icl; then
     enabled x86_32 && disable aligned_stack
     enabled_all x86_32 debug && add_cflags -Oy-
@@ -6522,7 +6712,7 @@
     if enabled icl; then
         # -Qansi-alias is basically -fstrict-aliasing, but does not work
         # (correctly) on icl 13.x.
-        check_cpp_condition "windows.h" "__ICL < 1300 || __ICL >= 1400" &&
+        test_cpp_condition "windows.h" "__ICL < 1300 || __ICL >= 1400" &&
             add_cflags -Qansi-alias
         # Some inline asm is not compilable in debug
         if enabled debug; then
@@ -6531,24 +6721,24 @@
         fi
     fi
     # msvcrt10 x64 incorrectly enables log2, only msvcrt12 (MSVC 2013) onwards actually has log2.
-    check_cpp_condition crtversion.h "_VC_CRT_MAJOR_VERSION >= 12" || disable log2
+    check_cpp_condition log2 crtversion.h "_VC_CRT_MAJOR_VERSION >= 12"
     # The CRT headers contain __declspec(restrict) in a few places, but if redefining
     # restrict, this might break. MSVC 2010 and 2012 fail with __declspec(__restrict)
     # (as it ends up if the restrict redefine is done before including stdlib.h), while
     # MSVC 2013 and newer can handle it fine.
     # If this declspec fails, force including stdlib.h before the restrict redefinition
     # happens in config.h.
-    if [ $_restrict != restrict ]; then
-        check_cc <<EOF || add_cflags -FIstdlib.h
-__declspec($_restrict) void* foo(int);
+    if [ $restrict_keyword != restrict ]; then
+        test_cc <<EOF || add_cflags -FIstdlib.h
+__declspec($restrict_keyword) void *foo(int);
 EOF
     fi
     # the new SSA optimzer in VS2015 U3 is mis-optimizing some parts of the code
     # Issue has been fixed in MSVC v19.00.24218.
-    check_cpp_condition windows.h "_MSC_FULL_VER >= 190024218" ||
+    test_cpp_condition windows.h "_MSC_FULL_VER >= 190024218" ||
         check_cflags -d2SSAOptimizer-
     # enable utf-8 source processing on VS2015 U2 and newer
-    check_cpp_condition windows.h "_MSC_FULL_VER >= 190023918" &&
+    test_cpp_condition windows.h "_MSC_FULL_VER >= 190023918" &&
         add_cflags -utf-8
 fi
 
@@ -6556,7 +6746,7 @@
     varname=${pfx%_}cc_type
     eval "type=\$$varname"
     if [ "$type" = "msvc" ]; then
-        check_${pfx}cc <<EOF || add_${pfx}cflags -Dinline=__inline
+        test_${pfx}cc <<EOF || add_${pfx}cflags -Dinline=__inline
 static inline int foo(int a) { return a; }
 EOF
     fi
@@ -6574,12 +6764,6 @@
     ;;
 esac
 
-case $target_os in
-    osf1)
-        enabled ccc && add_ldflags '-Wl,-expect_unresolved,*'
-    ;;
-esac
-
 enable frame_thread_encoder
 
 enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; }
@@ -6590,39 +6774,60 @@
            $ALL_COMPONENTS    \
 
 enabled threads && ! enabled pthreads && ! enabled atomics_native && die "non pthread threading without atomics not supported, try adding --enable-pthreads or --cpu=i486 or higher if you are on x86"
-
+enabled avresample && warn "Building with deprecated library libavresample"
 
 if test $target_os = "haiku"; then
     disable memalign
     disable posix_memalign
 fi
 
-# add_dep lib dep
-# -> enable ${lib}_deps_${dep}
-# -> add $dep to ${lib}_deps only once
-add_dep() {
-    lib=$1
-    dep=$2
-    enabled "${lib}_deps_${dep}" && return 0
-    enable  "${lib}_deps_${dep}"
-    prepend "${lib}_deps" $dep
-}
-
-# merge deps lib components
-# merge all ${component}_deps into ${lib}_deps and ${lib}_deps_*
-merge_deps() {
-    lib=$1
-    shift
-    for comp in $*; do
-        enabled $comp || continue
-        eval "dep=\"\$${comp}_deps\""
-        for d in $dep; do
-            add_dep $lib $d
+flatten_extralibs(){
+    nested_entries=
+    list_name=$1
+    eval list=\$${1}
+    for entry in $list; do
+        entry_copy=$entry
+        resolve entry_copy
+        flat_entries=
+        for e in $entry_copy; do
+            case $e in
+                *_extralibs) nested_entries="$nested_entries$e ";;
+                          *) flat_entries="$flat_entries$e ";;
+            esac
         done
+        eval $entry="\$flat_entries"
     done
+    append $list_name "$nested_entries"
+
+    resolve nested_entries
+    if test -n "$(filter '*_extralibs' $nested_entries)"; then
+        flatten_extralibs $list_name
+    fi
 }
 
-merge_deps libavfilter $FILTER_LIST
+flatten_extralibs_wrapper(){
+    list_name=$1
+    flatten_extralibs $list_name
+    unique $list_name
+    resolve $list_name
+    eval $list_name=\$\(\$ldflags_filter \$$list_name\)
+    eval printf \''%s'\' \""\$$list_name"\"
+}
+
+for linkunit in $LIBRARY_LIST; do
+    unset current_extralibs
+    eval components=\$$(toupper ${linkunit})_COMPONENTS_LIST
+    for comp in ${components}; do
+        enabled $comp || continue
+        comp_extralibs="${comp}_extralibs"
+        append current_extralibs $comp_extralibs
+    done
+    eval prepend ${linkunit}_extralibs $current_extralibs
+done
+
+for linkunit in $LIBRARY_LIST $PROGRAM_LIST $EXTRALIBS_LIST; do
+    eval ${linkunit}_extralibs=\$\(flatten_extralibs_wrapper ${linkunit}_extralibs\)
+done
 
 map 'enabled $v && intrinsics=${v#intrinsics_}' $INTRINSICS_LIST
 
@@ -6634,7 +6839,7 @@
     fi
 done
 
-if disabled stdatomic_h; then
+if disabled stdatomic; then
     if enabled atomics_gcc; then
         add_cppflags '-I\$(SRC_PATH)/compat/atomics/gcc'
     elif enabled atomics_win32; then
@@ -6657,13 +6862,17 @@
 
 enabled zlib && add_cppflags -DZLIB_CONST
 
-# conditional library dependencies, in linking order
+# conditional library dependencies, in any order
+enabled afftdn_filter       && prepend avfilter_deps "avcodec"
 enabled afftfilt_filter     && prepend avfilter_deps "avcodec"
 enabled afir_filter         && prepend avfilter_deps "avcodec"
 enabled amovie_filter       && prepend avfilter_deps "avformat avcodec"
 enabled aresample_filter    && prepend avfilter_deps "swresample"
 enabled atempo_filter       && prepend avfilter_deps "avcodec"
+enabled bm3d_filter         && prepend avfilter_deps "avcodec"
 enabled cover_rect_filter   && prepend avfilter_deps "avformat avcodec"
+enabled convolve_filter     && prepend avfilter_deps "avcodec"
+enabled deconvolve_filter   && prepend avfilter_deps "avcodec"
 enabled ebur128_filter && enabled swresample && prepend avfilter_deps "swresample"
 enabled elbg_filter         && prepend avfilter_deps "avcodec"
 enabled fftfilt_filter      && prepend avfilter_deps "avcodec"
@@ -6686,19 +6895,48 @@
 enabled smartblur_filter    && prepend avfilter_deps "swscale"
 enabled spectrumsynth_filter && prepend avfilter_deps "avcodec"
 enabled spp_filter          && prepend avfilter_deps "avcodec"
+enabled sr_filter           && prepend avfilter_deps "avformat swscale"
 enabled subtitles_filter    && prepend avfilter_deps "avformat avcodec"
 enabled uspp_filter         && prepend avfilter_deps "avcodec"
 enabled zoompan_filter      && prepend avfilter_deps "swscale"
 
 enabled lavfi_indev         && prepend avdevice_deps "avfilter"
 
+#FIXME
+enabled sdl2_outdev     && add_cflags $(filter_out '-Dmain=SDL_main' $sdl2_cflags)
+
 enabled opus_decoder    && prepend avcodec_deps "swresample"
 
+# reorder the items at var $1 to align with the items order at var $2 .
+# die if an item at $1 is not at $2 .
+reorder_by(){
+    eval rb_in=\$$1
+    eval rb_ordered=\$$2
+
+    for rb in $rb_in; do
+        is_in $rb $rb_ordered || die "$rb at \$$1 is not at \$$2"
+    done
+
+    rb_out=
+    for rb in $rb_ordered; do
+        is_in $rb $rb_in && rb_out="$rb_out$rb "
+    done
+    eval $1=\$rb_out
+}
+
+# deps-expand fflib $1:  N x {append all expanded deps; unique}
+# within a set of N items, N expansions are enough to expose a cycle.
 expand_deps(){
-    lib_deps=${1}_deps
-    eval "deps=\$$lib_deps"
-    append $lib_deps $(map 'eval echo \$${v}_deps' $deps)
-    unique $lib_deps
+    unique ${1}_deps  # required for the early break test.
+    for dummy in $LIBRARY_LIST; do  # N iteratios
+        eval deps=\$${1}_deps
+        append ${1}_deps $(map 'eval echo \$${v}_deps' $deps)
+        unique ${1}_deps
+        eval '[ ${#deps} = ${#'${1}_deps'} ]' && break  # doesn't expand anymore
+    done
+
+    eval is_in $1 \$${1}_deps && die "Dependency cycle at ${1}_deps"
+    reorder_by ${1}_deps LIBRARY_LIST  # linking order is expected later
 }
 
 #we have to remove gpl from the deps here as some code assumes all lib deps are libs
@@ -6706,17 +6944,6 @@
 
 map 'expand_deps $v' $LIBRARY_LIST
 
-license="LGPL version 2.1 or later"
-if enabled nonfree; then
-    license="nonfree and unredistributable"
-elif enabled gplv3; then
-    license="GPL version 3 or later"
-elif enabled lgplv3; then
-    license="LGPL version 3 or later"
-elif enabled gpl; then
-    license="GPL version 2 or later"
-fi
-
 if test "$quiet" != "yes"; then
 
 echo "install prefix            $prefix"
@@ -6751,6 +6978,7 @@
     echo "AESNI enabled             ${aesni-no}"
     echo "AVX enabled               ${avx-no}"
     echo "AVX2 enabled              ${avx2-no}"
+    echo "AVX-512 enabled           ${avx512-no}"
     echo "XOP enabled               ${xop-no}"
     echo "FMA3 enabled              ${fma3-no}"
     echo "FMA4 enabled              ${fma4-no}"
@@ -6805,11 +7033,11 @@
 echo
 
 echo "External libraries:"
-print_enabled '' $EXTERNAL_LIBRARY_LIST | print_in_columns
+print_enabled '' $EXTERNAL_LIBRARY_LIST $EXTERNAL_AUTODETECT_LIBRARY_LIST | print_in_columns
 echo
 
 echo "External libraries providing hardware acceleration:"
-print_enabled '' $HWACCEL_LIBRARY_LIST | print_in_columns
+print_enabled '' $HWACCEL_LIBRARY_LIST $HWACCEL_AUTODETECT_LIBRARY_LIST | print_in_columns
 echo
 
 echo "Libraries:"
@@ -6836,12 +7064,21 @@
 
 echo "License: $license"
 
-echo "Creating configuration files ..."
-
 fi # test "$quiet" != "yes"
 
+if test -n "$WARNINGS"; then
+    printf "\n%s%s$WARNINGS%s" "$warn_color" "$bold_color" "$reset_color"
+    enabled fatal_warnings && exit 1
+fi
+
 test -e Makefile || echo "include $source_path/Makefile" > Makefile
 
+esc(){
+    echo "$*" | sed 's/%/%25/g;s/:/%3a/g'
+}
+
+echo "config:$arch:$subarch:$cpu:$target_os:$(esc $cc_ident):$(esc $FFMPEG_CONFIGURATION)" > ffbuild/config.fate
+
 enabled stripping || strip="echo skipping strip"
 enabled stripping || striptype=""
 
@@ -6870,6 +7107,7 @@
 CC_IDENT=$cc_ident
 ARCH=$arch
 INTRINSICS=$intrinsics
+EXTERN_PREFIX=$extern_prefix
 CC=$cc
 CXX=$cxx
 AS=$as
@@ -6885,6 +7123,8 @@
 AR=$ar
 ARFLAGS=$arflags
 AR_O=$ar_o
+AR_CMD=$ar
+NM_CMD=$nm
 RANLIB=$ranlib
 STRIP=$strip
 STRIPTYPE=$striptype
@@ -6919,7 +7159,7 @@
 DOXYGEN=$doxygen
 LDFLAGS=$LDFLAGS
 LDEXEFLAGS=$LDEXEFLAGS
-LDLIBFLAGS=$LDLIBFLAGS
+LDSOFLAGS=$LDSOFLAGS
 SHFLAGS=$(echo $($ldflags_filter $SHFLAGS))
 ASMSTRIPFLAGS=$ASMSTRIPFLAGS
 X86ASMFLAGS=$X86ASMFLAGS
@@ -6963,7 +7203,6 @@
 TARGET_SAMPLES=${target_samples:-\$(SAMPLES)}
 CFLAGS-ffplay=${sdl2_cflags}
 CFLAGS_HEADERS=$CFLAGS_HEADERS
-ZLIB=$($ldflags_filter -lz)
 LIB_INSTALL_EXTRA_CMD=$LIB_INSTALL_EXTRA_CMD
 EXTRALIBS=$extralibs
 COMPAT_OBJS=$compat_objs
@@ -6987,12 +7226,9 @@
 
 map 'eval echo "${v}_FFLIBS=\$${v}_deps" >> ffbuild/config.mak' $LIBRARY_LIST
 
-print_program_extralibs(){
-    eval "program_extralibs=\$${1}_extralibs"
-    eval echo "EXTRALIBS-${1}=${program_extralibs}" >> ffbuild/config.mak
-}
-
-map 'print_program_extralibs $v' $PROGRAM_LIST
+for entry in $LIBRARY_LIST $PROGRAM_LIST $EXTRALIBS_LIST; do
+    eval echo "EXTRALIBS-${entry}=\$${entry}_extralibs" >> ffbuild/config.mak
+done
 
 cat > $TMPH <<EOF
 /* Automatically generated by configure - do not modify! */
@@ -7004,7 +7240,7 @@
 #define FFMPEG_DATADIR "$(eval c_escape $datadir)"
 #define AVCONV_DATADIR "$(eval c_escape $datadir)"
 #define CC_IDENT "$(c_escape ${cc_ident:-Unknown compiler})"
-#define av_restrict $_restrict
+#define av_restrict $restrict_keyword
 #define EXTERN_PREFIX "${extern_prefix}"
 #define EXTERN_ASM ${extern_prefix}
 #define BUILDSUF "$build_suffix"
@@ -7021,7 +7257,9 @@
 
 if enabled x86asm; then
     append config_files $TMPASM
-    printf '' >$TMPASM
+    cat > $TMPASM <<EOF
+; Automatically generated by configure - do not modify!
+EOF
 fi
 
 enabled getenv || echo "#define getenv(x) NULL" >> $TMPH
@@ -7048,7 +7286,7 @@
 enabled x86asm && cp_if_changed $TMPASM config.asm
 
 cat > $TMPH <<EOF
-/* Generated by ffconf */
+/* Generated by ffmpeg configure */
 #ifndef AVUTIL_AVCONFIG_H
 #define AVUTIL_AVCONFIG_H
 EOF
@@ -7059,6 +7297,11 @@
 
 cp_if_changed $TMPH libavutil/avconfig.h
 
+# full_filter_name_foo=vf_foo
+# full_filter_name_bar=asrc_bar
+# ...
+eval "$(sed -n "s/^extern AVFilter ff_\([avfsinkrc]\{2,5\}\)_\(.*\);/full_filter_name_\2=\1_\2/p" $source_path/libavfilter/allfilters.c)"
+
 # generate the lists of enabled components
 print_enabled_components(){
     file=$1
@@ -7067,23 +7310,43 @@
     shift 3
     echo "static const $struct_name * const $name[] = {" > $TMPH
     for c in $*; do
-        enabled $c && printf "    &ff_%s,\n" $c >> $TMPH
+        if enabled $c; then
+            case $name in
+                filter_list)
+                    eval c=\$full_filter_name_${c%_filter}
+                ;;
+                indev_list)
+                    c=${c%_indev}_demuxer
+                ;;
+                outdev_list)
+                    c=${c%_outdev}_muxer
+                ;;
+            esac
+            printf "    &ff_%s,\n" $c >> $TMPH
+        fi
     done
+    if [ "$name" = "filter_list" ]; then
+        for c in asrc_abuffer vsrc_buffer asink_abuffer vsink_buffer; do
+            printf "    &ff_%s,\n" $c >> $TMPH
+        done
+    fi
     echo "    NULL };" >> $TMPH
     cp_if_changed $TMPH $file
 }
 
+print_enabled_components libavfilter/filter_list.c AVFilter filter_list $FILTER_LIST
+print_enabled_components libavcodec/codec_list.c AVCodec codec_list $CODEC_LIST
+print_enabled_components libavcodec/parser_list.c AVCodecParser parser_list $PARSER_LIST
 print_enabled_components libavcodec/bsf_list.c AVBitStreamFilter bitstream_filters $BSF_LIST
+print_enabled_components libavformat/demuxer_list.c AVInputFormat demuxer_list $DEMUXER_LIST
+print_enabled_components libavformat/muxer_list.c AVOutputFormat muxer_list $MUXER_LIST
+print_enabled_components libavdevice/indev_list.c AVInputFormat indev_list $INDEV_LIST
+print_enabled_components libavdevice/outdev_list.c AVOutputFormat outdev_list $OUTDEV_LIST
 print_enabled_components libavformat/protocol_list.c URLProtocol url_protocols $PROTOCOL_LIST
 
-if test -n "$WARNINGS"; then
-    printf "\n%s%s$WARNINGS%s" "$warn_color" "$bold_color" "$reset_color"
-    enabled fatal_warnings && exit 1
-fi
-
 # Settings for pkg-config files
 
-cat > ffbuild/config.sh <<EOF
+cat > $TMPH <<EOF
 # Automatically generated by configure - do not modify!
 shared=$shared
 build_suffix=$build_suffix
@@ -7094,19 +7357,20 @@
 source_path=${source_path}
 LIBPREF=${LIBPREF}
 LIBSUF=${LIBSUF}
-
-extralibs_avutil="$LIBRT $LIBM"
-extralibs_avcodec="$extralibs"
-extralibs_avformat="$extralibs"
-extralibs_avdevice="$extralibs"
-extralibs_avfilter="$extralibs"
-extralibs_avresample="$LIBM"
-extralibs_postproc=""
-extralibs_swscale="$LIBM"
-extralibs_swresample="$LIBM $LIBSOXR"
+extralibs_avutil="$avutil_extralibs"
+extralibs_avcodec="$avcodec_extralibs"
+extralibs_avformat="$avformat_extralibs"
+extralibs_avdevice="$avdevice_extralibs"
+extralibs_avfilter="$avfilter_extralibs"
+extralibs_avresample="$avresample_extralibs"
+extralibs_postproc="$postproc_extralibs"
+extralibs_swscale="$swscale_extralibs"
+extralibs_swresample="$swresample_extralibs"
 EOF
 
 for lib in $LIBRARY_LIST; do
     lib_deps="$(eval echo \$${lib}_deps)"
-    echo ${lib}_deps=\"$lib_deps\" >> ffbuild/config.sh
+    echo ${lib}_deps=\"$lib_deps\" >> $TMPH
 done
+
+cp_if_changed $TMPH ffbuild/config.sh

diff --git a/doc/APIchanges b/doc/APIchanges
index 6803eaa..9e93555 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges

@@ -2,19 +2,193 @@
 since the last major version increase or the API was added.
 
 The last version increases were:
-libavcodec:    2015-08-28
-libavdevice:   2015-08-28
-libavfilter:   2015-08-28
-libavformat:   2015-08-28
-libavresample: 2015-08-28
-libpostproc:   2015-08-28
-libswresample: 2015-08-28
-libswscale:    2015-08-28
-libavutil:     2015-08-28
+libavcodec:    2017-10-21
+libavdevice:   2017-10-21
+libavfilter:   2017-10-21
+libavformat:   2017-10-21
+libavresample: 2017-10-21
+libpostproc:   2017-10-21
+libswresample: 2017-10-21
+libswscale:    2017-10-21
+libavutil:     2017-10-21
 
 
 API changes, most recent first:
 
+2018-10-11 - xxxxxxxxxx - lavc 58.33.100 - mediacodec.h
+  Add av_mediacodec_render_buffer_at_time().
+
+2018-09-09 - xxxxxxxxxx - lavc 58.29.100 - avcodec.h
+  Add AV_PKT_DATA_AFD
+
+2018-08-16 - xxxxxxxxxx - lavc 58.23.100 - avcodec.h
+  Add av_bsf_flush().
+
+2018-05-xx - xxxxxxxxxx - lavf 58.15.100 - avformat.h
+  Add pmt_version field to AVProgram
+
+2018-05-xx - xxxxxxxxxx - lavf 58.14.100 - avformat.h
+  Add AV_DISPOSITION_STILL_IMAGE
+
+2018-05-xx - xxxxxxxxxx - lavu 56.18.101 - hwcontext_cuda.h
+  Add AVCUDADeviceContext.stream.
+
+2018-04-xx - xxxxxxxxxx - lavu 56.18.100 - pixdesc.h
+  Add AV_PIX_FMT_FLAG_ALPHA to AV_PIX_FMT_PAL8.
+
+2018-04-xx - xxxxxxxxxx - lavu 56.17.100 - opt.h
+  Add AV_OPT_FLAG_DEPRECATED.
+
+2018-04-xx - xxxxxxxxxx - lavu 56.16.100 - threadmessage.h
+  Add av_thread_message_queue_nb_elems().
+
+-------- 8< --------- FFmpeg 4.0 was cut here -------- 8< ---------
+
+2018-04-03 - d6fc031caf - lavu 56.13.100 - pixdesc.h
+  Deprecate AV_PIX_FMT_FLAG_PSEUDOPAL and make allocating a pseudo palette
+  optional for API users (see AV_PIX_FMT_FLAG_PSEUDOPAL doxygen for details).
+
+2018-04-01 - 860086ee16 - lavc 58.17.100 - avcodec.h
+  Add av_packet_make_refcounted().
+
+2018-04-01 - f1805d160d - lavfi 7.14.100 - avfilter.h
+  Deprecate use of avfilter_register(), avfilter_register_all(),
+  avfilter_next(). Add av_filter_iterate().
+
+2018-03-25 - b7d0d912ef - lavc 58.16.100 - avcodec.h
+  Add FF_SUB_CHARENC_MODE_IGNORE.
+
+2018-03-23 - db2a7c947e - lavu 56.12.100 - encryption_info.h
+  Add AVEncryptionInitInfo and AVEncryptionInfo structures to hold new side-data
+  for encryption info.
+
+2018-03-21 - f14ca60001 - lavc 58.15.100 - avcodec.h
+  Add av_packet_make_writable().
+
+2018-03-18 - 4b86ac27a0 - lavu 56.11.100 - frame.h
+  Add AV_FRAME_DATA_QP_TABLE_PROPERTIES and AV_FRAME_DATA_QP_TABLE_DATA.
+
+2018-03-15 - e0e72539cf - lavu 56.10.100 - opt.h
+  Add AV_OPT_FLAG_BSF_PARAM
+
+2018-03-07 - 950170bd3b - lavu 56.9.100 - crc.h
+  Add AV_CRC_8_EBU crc variant.
+
+2018-03-07 - 2a0eb86857 - lavc 58.14.100 - mediacodec.h
+  Change the default behavior of avcodec_flush() on mediacodec
+  video decoders. To restore the previous behavior, use the new
+  delay_flush=1 option.
+
+2018-03-01 - 6731f60598 - lavu 56.8.100 - frame.h
+  Add av_frame_new_side_data_from_buf().
+
+2018-02-15 - 8a8d0b319a
+  Change av_ripemd_update(), av_murmur3_update() and av_hash_update() length
+  parameter type to size_t at next major bump.
+
+2018-02-12 - bcab11a1a2 - lavfi 7.12.100 - avfilter.h
+  Add AVFilterContext.extra_hw_frames.
+
+2018-02-12 - d23fff0d8a - lavc 58.11.100 - avcodec.h
+  Add AVCodecContext.extra_hw_frames.
+
+2018-02-06 - 0694d87024 - lavf 58.9.100 - avformat.h
+  Deprecate use of av_register_input_format(), av_register_output_format(),
+  av_register_all(), av_iformat_next(), av_oformat_next().
+  Add av_demuxer_iterate(), and av_muxer_iterate().
+
+2018-02-06 - 36c85d6e77 - lavc 58.10.100 - avcodec.h
+  Deprecate use of avcodec_register(), avcodec_register_all(),
+  av_codec_next(), av_register_codec_parser(), and av_parser_next().
+  Add av_codec_iterate() and av_parser_iterate().
+
+2018-02-04 - ff46124b0d - lavf 58.8.100 - avformat.h
+  Deprecate the current names of the RTSP "timeout", "stimeout", "user-agent"
+  options. Introduce "listen_timeout" as replacement for the current "timeout"
+  option, and "user_agent" as replacement for "user-agent". Once the deprecation
+  is over, the old "timeout" option will be removed, and "stimeout" will be
+  renamed to "stimeout" (the "timeout" option will essentially change semantics).
+
+2018-01-28 - ea3672b7d6 - lavf 58.7.100 - avformat.h
+  Deprecate AVFormatContext filename field which had limited length, use the
+  new dynamically allocated url field instead.
+
+2018-01-28 - ea3672b7d6 - lavf 58.7.100 - avformat.h
+  Add url field to AVFormatContext and add ff_format_set_url helper function.
+
+2018-01-27 - 6194d7e564 - lavf 58.6.100 - avformat.h
+  Add AVFMTCTX_UNSEEKABLE (for HLS demuxer).
+
+2018-01-23 - 9f07cf7c00 - lavu 56.9.100 - aes_ctr.h
+  Add method to set the 16-byte IV.
+
+2018-01-16 - 631c56a8e4 - lavf 58.5.100 - avformat.h
+  Explicitly make avformat_network_init() and avformat_network_deinit() optional.
+  If these are not called, network initialization and deinitialization is
+  automatic, and unlike in older versions, fully supported, unless libavformat
+  is linked to ancient GnuTLS and OpenSSL.
+
+2018-01-16 - 6512ff72f9 - lavf 58.4.100 - avformat.h
+  Deprecate AVStream.recommended_encoder_configuration. It was useful only for
+  FFserver, which has been removed.
+
+2018-01-05 - 798dcf2432 - lavfi 7.11.101 - avfilter.h
+  Deprecate avfilter_link_get_channels(). Use av_buffersink_get_channels().
+
+2017-01-04 - c29038f304 - lavr 4.0.0 - avresample.h
+  Deprecate the entire library. Merged years ago to provide compatibility
+  with Libav, it remained unmaintained by the FFmpeg project and duplicated
+  functionality provided by libswresample.
+
+  In order to improve consistency and reduce attack surface, it has been deprecated.
+  Users of this library are asked to migrate to libswresample, which, as well as
+  providing more functionality, is faster and has higher accuracy.
+
+2017-12-26 - a04c2c707d - lavc 58.9.100 - avcodec.h
+  Deprecate av_lockmgr_register(). You need to build FFmpeg with threading
+  support enabled to get basic thread-safety (which is the default build
+  configuration).
+
+2017-12-24 - 8b81eabe57 - lavu 56.7.100 - cpu.h
+  AVX-512 flags added.
+
+2017-12-16 - 8bf4e6d3ce - lavc 58.8.100 - avcodec.h
+  The MediaCodec decoders now support AVCodecContext.hw_device_ctx.
+
+2017-12-16 - e4d9f05ca7 - lavu 56.6.100 - hwcontext.h hwcontext_mediacodec.h
+  Add AV_HWDEVICE_TYPE_MEDIACODEC and a new installed header with
+  MediaCodec-specific hwcontext definitions.
+
+2017-12-14 - b945fed629 - lavc 58.7.100 - avcodec.h
+  Add AV_CODEC_CAP_HARDWARE, AV_CODEC_CAP_HYBRID, and AVCodec.wrapper_name,
+  and mark all AVCodecs accordingly.
+
+2017-11-29 - d268094f88 - lavu 56.4.100 / 56.7.0 - stereo3d.h
+  Add view field to AVStereo3D structure and AVStereo3DView enum.
+
+2017-11-26 - 3a71bcc213 - lavc 58.6.100 - avcodec.h
+  Add const to AVCodecContext.hwaccel.
+
+2017-11-26 - 3536a3efb9 - lavc 58.5.100 - avcodec.h
+  Deprecate user visibility of the AVHWAccel structure and the functions
+  av_register_hwaccel() and av_hwaccel_next().
+
+2017-11-26 - 24cc0a53e9 - lavc 58.4.100 - avcodec.h
+  Add AVCodecHWConfig and avcodec_get_hw_config().
+
+2017-11-22 - 3650cb2dfa - lavu 56.3.100 - opencl.h
+  Remove experimental OpenCL API (av_opencl_*).
+
+2017-11-22 - b25d8ef0a7 - lavu 56.2.100 - hwcontext.h hwcontext_opencl.h
+  Add AV_HWDEVICE_TYPE_OPENCL and a new installed header with
+  OpenCL-specific hwcontext definitions.
+
+2017-11-22 - a050f56c09 - lavu 56.1.100 - pixfmt.h
+  Add AV_PIX_FMT_OPENCL.
+
+2017-11-11 - 48e4eda11d - lavc 58.3.100 - avcodec.h
+  Add avcodec_get_hw_frames_parameters().
+
 -------- 8< --------- FFmpeg 3.4 was cut here -------- 8< ---------
 
 2017-09-28 - b6cf66ae1c - lavc 57.106.104 - avcodec.h
@@ -742,7 +916,7 @@
   Add av_opt_get_dict_val/set_dict_val with AV_OPT_TYPE_DICT to support
   dictionary types being set as options.
 
-2014-08-13 - afbd4b8 - lavf 56.01.0 - avformat.h
+2014-08-13 - afbd4b7e09 - lavf 56.01.0 - avformat.h
   Add AVFormatContext.event_flags and AVStream.event_flags for signaling to
   the user when events happen in the file/stream.
 
@@ -759,7 +933,7 @@
 2014-08-08 - 5c3c671 - lavf 55.53.100 - avio.h
   Add avio_feof() and deprecate url_feof().
 
-2014-08-07 - bb78903 - lsws 2.1.3 - swscale.h
+2014-08-07 - bb789016d4 - lsws 2.1.3 - swscale.h
   sws_getContext is not going to be removed in the future.
 
 2014-08-07 - a561662 / ad1ee5f - lavc 55.73.101 / 55.57.3 - avcodec.h

diff --git a/doc/bitstream_filters.texi b/doc/bitstream_filters.texi
index 2dffe02..d948c6d 100644
--- a/doc/bitstream_filters.texi
+++ b/doc/bitstream_filters.texi

@@ -50,21 +50,22 @@
 
 Add extradata to the beginning of the filtered packets.
 
+@table @option
+@item freq
 The additional argument specifies which packets should be filtered.
 It accepts the values:
 @table @samp
-@item a
-add extradata to all key packets, but only if @var{local_header} is
-set in the @option{flags2} codec context field
-
 @item k
+@item keyframe
 add extradata to all key packets
 
 @item e
+@item all
 add extradata to all packets
 @end table
+@end table
 
-If not specified it is assumed @samp{k}.
+If not specified it is assumed @samp{e}.
 
 For example the following @command{ffmpeg} command forces a global
 header (thus disabling individual packet headers) in the H.264 packets
@@ -74,6 +75,10 @@
 ffmpeg -i INPUT -map 0 -flags:v +global_header -c:v libx264 -bsf:v dump_extra out.ts
 @end example
 
+@section eac3_core
+
+Extract the core from a E-AC-3 stream, dropping extra channels.
+
 @section extract_extradata
 
 Extract the in-band extradata.
@@ -92,6 +97,135 @@
 bitstream after extraction.
 @end table
 
+@section filter_units
+
+Remove units with types in or not in a given set from the stream.
+
+@table @option
+@item pass_types
+List of unit types or ranges of unit types to pass through while removing
+all others.  This is specified as a '|'-separated list of unit type values
+or ranges of values with '-'.
+
+@item remove_types
+Identical to @option{pass_types}, except the units in the given set
+removed and all others passed through.
+@end table
+
+Extradata is unchanged by this transformation, but note that if the stream
+contains inline parameter sets then the output may be unusable if they are
+removed.
+
+For example, to remove all non-VCL NAL units from an H.264 stream:
+@example
+ffmpeg -i INPUT -c:v copy -bsf:v 'filter_units=pass_types=1-5' OUTPUT
+@end example
+
+To remove all AUDs, SEI and filler from an H.265 stream:
+@example
+ffmpeg -i INPUT -c:v copy -bsf:v 'filter_units=remove_types=35|38-40' OUTPUT
+@end example
+
+@section hapqa_extract
+
+Extract Rgb or Alpha part of an HAPQA file, without recompression, in order to create an HAPQ or an HAPAlphaOnly file.
+
+@table @option
+@item texture
+Specifies the texture to keep.
+
+@table @option
+@item color
+@item alpha
+@end table
+
+@end table
+
+Convert HAPQA to HAPQ
+@example
+ffmpeg -i hapqa_inputfile.mov -c copy -bsf:v hapqa_extract=texture=color -tag:v HapY -metadata:s:v:0 encoder="HAPQ" hapq_file.mov
+@end example
+
+Convert HAPQA to HAPAlphaOnly
+@example
+ffmpeg -i hapqa_inputfile.mov -c copy -bsf:v hapqa_extract=texture=alpha -tag:v HapA -metadata:s:v:0 encoder="HAPAlpha Only" hapalphaonly_file.mov
+@end example
+
+@section h264_metadata
+
+Modify metadata embedded in an H.264 stream.
+
+@table @option
+@item aud
+Insert or remove AUD NAL units in all access units of the stream.
+
+@table @samp
+@item insert
+@item remove
+@end table
+
+@item sample_aspect_ratio
+Set the sample aspect ratio of the stream in the VUI parameters.
+
+@item video_format
+@item video_full_range_flag
+Set the video format in the stream (see H.264 section E.2.1 and
+table E-2).
+
+@item colour_primaries
+@item transfer_characteristics
+@item matrix_coefficients
+Set the colour description in the stream (see H.264 section E.2.1
+and tables E-3, E-4 and E-5).
+
+@item chroma_sample_loc_type
+Set the chroma sample location in the stream (see H.264 section
+E.2.1 and figure E-1).
+
+@item tick_rate
+Set the tick rate (num_units_in_tick / time_scale) in the VUI
+parameters.  This is the smallest time unit representable in the
+stream, and in many cases represents the field rate of the stream
+(double the frame rate).
+@item fixed_frame_rate_flag
+Set whether the stream has fixed framerate - typically this indicates
+that the framerate is exactly half the tick rate, but the exact
+meaning is dependent on interlacing and the picture structure (see
+H.264 section E.2.1 and table E-6).
+
+@item crop_left
+@item crop_right
+@item crop_top
+@item crop_bottom
+Set the frame cropping offsets in the SPS.  These values will replace
+the current ones if the stream is already cropped.
+
+These fields are set in pixels.  Note that some sizes may not be
+representable if the chroma is subsampled or the stream is interlaced
+(see H.264 section 7.4.2.1.1).
+
+@item sei_user_data
+Insert a string as SEI unregistered user data.  The argument must
+be of the form @emph{UUID+string}, where the UUID is as hex digits
+possibly separated by hyphens, and the string can be anything.
+
+For example, @samp{086f3693-b7b3-4f2c-9653-21492feee5b8+hello} will
+insert the string ``hello'' associated with the given UUID.
+
+@item delete_filler
+Deletes both filler NAL units and filler SEI messages.
+
+@item level
+Set the level in the SPS.  Refer to H.264 section A.3 and tables A-1
+to A-5.
+
+The argument must be the name of a level (for example, @samp{4.2}), a
+level_idc value (for example, @samp{42}), or the special name @samp{auto}
+indicating that the filter should attempt to guess the level from the
+input stream properties.
+
+@end table
+
 @section h264_mp4toannexb
 
 Convert an H.264 bitstream from length prefixed mode to start code
@@ -111,6 +245,69 @@
 Please note that this filter is auto-inserted for MPEG-TS (muxer
 @code{mpegts}) and raw H.264 (muxer @code{h264}) output formats.
 
+@section h264_redundant_pps
+
+This applies a specific fixup to some Blu-ray streams which contain
+redundant PPSs modifying irrelevant parameters of the stream which
+confuse other transformations which require correct extradata.
+
+A new single global PPS is created, and all of the redundant PPSs
+within the stream are removed.
+
+@section hevc_metadata
+
+Modify metadata embedded in an HEVC stream.
+
+@table @option
+@item aud
+Insert or remove AUD NAL units in all access units of the stream.
+
+@table @samp
+@item insert
+@item remove
+@end table
+
+@item sample_aspect_ratio
+Set the sample aspect ratio in the stream in the VUI parameters.
+
+@item video_format
+@item video_full_range_flag
+Set the video format in the stream (see H.265 section E.3.1 and
+table E.2).
+
+@item colour_primaries
+@item transfer_characteristics
+@item matrix_coefficients
+Set the colour description in the stream (see H.265 section E.3.1
+and tables E.3, E.4 and E.5).
+
+@item chroma_sample_loc_type
+Set the chroma sample location in the stream (see H.265 section
+E.3.1 and figure E.1).
+
+@item tick_rate
+Set the tick rate in the VPS and VUI parameters (num_units_in_tick /
+time_scale).  Combined with @option{num_ticks_poc_diff_one}, this can
+set a constant framerate in the stream.  Note that it is likely to be
+overridden by container parameters when the stream is in a container.
+
+@item num_ticks_poc_diff_one
+Set poc_proportional_to_timing_flag in VPS and VUI and use this value
+to set num_ticks_poc_diff_one_minus1 (see H.265 sections 7.4.3.1 and
+E.3.1).  Ignored if @option{tick_rate} is not also set.
+
+@item crop_left
+@item crop_right
+@item crop_top
+@item crop_bottom
+Set the conformance window cropping offsets in the SPS.  These values
+will replace the current ones if the stream is already cropped.
+
+These fields are set in pixels.  Note that some sizes may not be
+representable if the chroma is subsampled (H.265 section 7.4.3.2.1).
+
+@end table
+
 @section hevc_mp4toannexb
 
 Convert an HEVC/H.265 bitstream from length prefixed mode to start code
@@ -198,6 +395,42 @@
 
 Decompress non-standard compressed MP3 audio headers.
 
+@section mpeg2_metadata
+
+Modify metadata embedded in an MPEG-2 stream.
+
+@table @option
+@item display_aspect_ratio
+Set the display aspect ratio in the stream.
+
+The following fixed values are supported:
+@table @option
+@item 4/3
+@item 16/9
+@item 221/100
+@end table
+Any other value will result in square pixels being signalled instead
+(see H.262 section 6.3.3 and table 6-3).
+
+@item frame_rate
+Set the frame rate in the stream.  This is constructed from a table
+of known values combined with a small multiplier and divisor - if
+the supplied value is not exactly representable, the nearest
+representable value will be used instead (see H.262 section 6.3.3
+and table 6-4).
+
+@item video_format
+Set the video format in the stream (see H.262 section 6.3.6 and
+table 6-6).
+
+@item colour_primaries
+@item transfer_characteristics
+@item matrix_coefficients
+Set the colour description in the stream (see H.262 section 6.3.6
+and tables 6-7, 6-8 and 6-9).
+
+@end table
+
 @section mpeg4_unpack_bframes
 
 Unpack DivX-style packed B-frames.
@@ -275,6 +508,40 @@
 
 See also the @ref{mov2textsub} filter.
 
+@section trace_headers
+
+Log trace output containing all syntax elements in the coded stream
+headers (everything above the level of individual coded blocks).
+This can be useful for debugging low-level stream issues.
+
+Supports H.264, H.265, MPEG-2 and VP9.
+
+@section vp9_metadata
+
+Modify metadata embedded in a VP9 stream.
+
+@table @option
+@item color_space
+Set the color space value in the frame header.
+@table @samp
+@item unknown
+@item bt601
+@item bt709
+@item smpte170
+@item smpte240
+@item bt2020
+@item rgb
+@end table
+
+@item color_range
+Set the color range value in the frame header.  Note that this cannot
+be set in RGB streams.
+@table @samp
+@item tv
+@item pc
+@end table
+@end table
+
 @section vp9_superframe
 
 Merge VP9 invisible (alt-ref) frames back into VP9 superframes. This

diff --git a/doc/codecs.texi b/doc/codecs.texi
index 40f64fe..3770f4f 100644
--- a/doc/codecs.texi
+++ b/doc/codecs.texi

@@ -44,12 +44,6 @@
 Use loop filter.
 @item qscale
 Use fixed qscale.
-@item gmc
-Use gmc.
-@item mv0
-Always try a mb with mv=<0,0>.
-@item input_preserved
-
 @item pass1
 Use internal 2pass ratecontrol in first pass mode.
 @item pass2
@@ -62,8 +56,6 @@
 Set error[?] variables during encoding.
 @item truncated
 
-@item naq
-Normalize adaptive quantization.
 @item ildct
 Use interlaced DCT.
 @item low_delay
@@ -475,8 +467,6 @@
 macroblock (MB) type
 @item qp
 per-block quantization parameter (QP)
-@item mv
-motion vector
 @item dct_coeff
 
 @item green_metadata
@@ -486,18 +476,12 @@
 
 @item startcode
 
-@item pts
-
 @item er
 error recognition
 @item mmco
 memory management control operations (H.264)
 @item bugs
 
-@item vis_qp
-visualize quantization parameter (QP), lower QP are tinted greener
-@item vis_mb_type
-visualize block types
 @item buffers
 picture buffer allocations
 @item thread_ops
@@ -506,21 +490,6 @@
 skip motion compensation
 @end table
 
-@item vismv @var{integer} (@emph{decoding,video})
-Visualize motion vectors (MVs).
-
-This option is deprecated, see the codecview filter instead.
-
-Possible values:
-@table @samp
-@item pf
-forward predicted MVs of P-frames
-@item bf
-forward predicted MVs of B-frames
-@item bb
-backward predicted MVs of B-frames
-@end table
-
 @item cmp @var{integer} (@emph{encoding,video})
 Set full pel me compare function.
 
@@ -757,8 +726,6 @@
 
 @item slice_flags @var{integer}
 
-@item xvmc_acceleration @var{integer}
-
 @item mbd @var{integer} (@emph{encoding,video})
 Set macroblock decision algorithm (high quality mode).
 
@@ -1019,10 +986,6 @@
 @item trellis @var{integer} (@emph{encoding,audio,video})
 Set rate-distortion optimal quantization.
 
-@item sc_factor @var{integer} (@emph{encoding,video})
-Set value multiplied by qscale for each frame and added to
-scene_change_score.
-
 @item mv0_threshold @var{integer} (@emph{encoding,video})
 @item b_sensitivity @var{integer} (@emph{encoding,video})
 Adjust sensitivity of b_frame_strategy 1.

diff --git a/doc/decoders.texi b/doc/decoders.texi
index d149d2b..25187e3 100644
--- a/doc/decoders.texi
+++ b/doc/decoders.texi

@@ -25,13 +25,6 @@
 A description of some of the currently available video decoders
 follows.
 
-@section hevc
-
-HEVC / H.265 decoder.
-
-Note: the @option{skip_loop_filter} option has effect only at level
-@code{all}.
-
 @section rawvideo
 
 Raw video decoder.
@@ -54,6 +47,12 @@
 
 @end table
 
+@section libdavs2
+
+AVS2-P2/IEEE1857.4 video decoder wrapper.
+
+This decoder allows libavcodec to decode AVS2 streams with davs2 library.
+
 @c man end VIDEO DECODERS
 
 @chapter Audio Decoders
@@ -255,18 +254,25 @@
 
 @table @option
 @item txt_page
-List of teletext page numbers to decode. You may use the special * string to
-match all pages. Pages that do not match the specified list are dropped.
+List of teletext page numbers to decode. Pages that do not match the specified
+list are dropped. You may use the special @code{*} string to match all pages,
+or @code{subtitle} to match all subtitle pages.
 Default value is *.
 @item txt_chop_top
 Discards the top teletext line. Default value is 1.
 @item txt_format
-Specifies the format of the decoded subtitles. The teletext decoder is capable
-of decoding the teletext pages to bitmaps or to simple text, you should use
-"bitmap" for teletext pages, because certain graphics and colors cannot be
-expressed in simple text. You might use "text" for teletext based subtitles if
-your application can handle simple text based subtitles. Default value is
-bitmap.
+Specifies the format of the decoded subtitles.
+@table @option
+@item bitmap
+The default format, you should use this for teletext pages, because certain
+graphics and colors cannot be expressed in simple text or even ASS.
+@item text
+Simple text based output without formatting.
+@item ass
+Formatted ASS output, subtitle pages and teletext pages are returned in
+different styles, subtitle pages are stripped down to text, but an effort is
+made to keep the text alignment and the formatting.
+@end table
 @item txt_left
 X offset of generated bitmaps, default is 0.
 @item txt_top
@@ -279,7 +285,8 @@
 Default value is 1.
 @item txt_duration
 Sets the display duration of the decoded teletext pages or subtitles in
-milliseconds. Default value is 30000 which is 30 seconds.
+milliseconds. Default value is -1 which means infinity or until the next
+subtitle event comes.
 @item txt_transparent
 Force transparent background of the generated teletext bitmaps. Default value
 is 0 which means an opaque background.

diff --git a/doc/demuxers.texi b/doc/demuxers.texi
index 73dc0fe..aad94eb 100644
--- a/doc/demuxers.texi
+++ b/doc/demuxers.texi

@@ -244,6 +244,16 @@
 @end example
 @end itemize
 
+@section dash
+
+Dynamic Adaptive Streaming over HTTP demuxer.
+
+This demuxer presents all AVStreams found in the manifest.
+By setting the discard flags on AVStreams the caller can decide
+which streams to actually receive.
+Each stream mirrors the @code{id} and @code{bandwidth} properties from the
+@code{<Representation>} as metadata keys named "id" and "variant_bitrate" respectively.
+
 @section flv, live_flv
 
 Adobe Flash Video Format demuxer.
@@ -259,6 +269,12 @@
 @table @option
 @item -flv_metadata @var{bool}
 Allocate the streams according to the onMetaData array content.
+
+@item -flv_ignore_prevtag @var{bool}
+Ignore the size of previous tag value.
+
+@item -flv_full_metadata @var{bool}
+Output all context of the onMetadata.
 @end table
 
 @section gif
@@ -316,6 +332,14 @@
 @item max_reload
 Maximum number of times a insufficient list is attempted to be reloaded.
 Default value is 1000.
+
+@item http_persistent
+Use persistent HTTP connections. Applicable only for HTTP streams.
+Enabled by default.
+
+@item http_multiple
+Use multiple HTTP connections for downloading HTTP segments.
+Enabled by default for HTTP/1.1 servers.
 @end table
 
 @section image2
@@ -520,6 +544,9 @@
 Set size limit for looking up a new synchronization. Default value is
 65536.
 
+@item skip_unknown_pmt
+Skip PMTs for programs not defined in the PAT. Default value is 0.
+
 @item fix_teletext_pts
 Override teletext packet PTS and DTS values with the timestamps calculated
 from the PCR of the first program which the teletext stream is part of and is
@@ -534,6 +561,10 @@
 Scan and combine all PMTs. The value is an integer with value from -1
 to 1 (-1 means automatic setting, 1 means enabled, 0 means
 disabled). Default value is -1.
+
+@item merge_pmt_versions
+Re-use existing streams when a PMT's version is updated and elementary
+streams move to different PIDs. Default value is 0.
 @end table
 
 @section mpjpeg

diff --git a/doc/developer.texi b/doc/developer.texi
index 98540c8..5c342c9 100644
--- a/doc/developer.texi
+++ b/doc/developer.texi

@@ -10,9 +10,7 @@
 
 @contents
 
-@chapter Developers Guide
-
-@section Notes for external developers
+@chapter Notes for external developers
 
 This document is mostly useful for internal FFmpeg developers.
 External developers who need to use the API in their application should
@@ -30,15 +28,13 @@
 external programs read the @file{LICENSE} file in the source tree and
 consult @url{https://ffmpeg.org/legal.html}.
 
-@section Contributing
+@chapter Contributing
 
-There are 3 ways by which code gets into FFmpeg.
+There are 2 ways by which code gets into FFmpeg:
 @itemize @bullet
-@item Submitting patches to the main developer mailing list.
+@item Submitting patches to the ffmpeg-devel mailing list.
       See @ref{Submitting patches} for details.
 @item Directly committing changes to the main tree.
-@item Committing changes to a git clone, for example on github.com or
-      gitorious.org. And asking us to merge these changes.
 @end itemize
 
 Whichever way, changes should be reviewed by the maintainer of the code
@@ -47,9 +43,9 @@
 and should try to fix issues their commit causes.
 
 @anchor{Coding Rules}
-@section Coding Rules
+@chapter Coding Rules
 
-@subsection Code formatting conventions
+@section Code formatting conventions
 
 There are the following guidelines regarding the indentation in files:
 
@@ -74,7 +70,7 @@
 The main priority in FFmpeg is simplicity and small code size in order to
 minimize the bug count.
 
-@subsection Comments
+@section Comments
 Use the JavaDoc/Doxygen  format (see examples below) so that code documentation
 can be generated automatically. All nontrivial functions should have a comment
 above them explaining what the function does, even if it is just one sentence.
@@ -114,7 +110,7 @@
 ...
 @end example
 
-@subsection C language features
+@section C language features
 
 FFmpeg is programmed in the ISO C90 language with a few additional
 features from ISO C99, namely:
@@ -133,6 +129,9 @@
 compound literals (@samp{x = (struct s) @{ 17, 23 @};}).
 
 @item
+for loops with variable definition (@samp{for (int i = 0; i < 8; i++)});
+
+@item
 Implementation defined behavior for signed integers is assumed to match the
 expected behavior for two's complement. Non representable values in integer
 casts are binary truncated. Shift right of signed values uses sign extension.
@@ -160,7 +159,7 @@
 GCC statement expressions (@samp{(x = (@{ int y = 4; y; @})}).
 @end itemize
 
-@subsection Naming conventions
+@section Naming conventions
 All names should be composed with underscores (_), not CamelCase. For example,
 @samp{avfilter_get_video_buffer} is an acceptable function name and
 @samp{AVFilterGetVideo} is not. The exception from this are type names, like
@@ -184,7 +183,7 @@
 @item
 For variables and functions visible outside of file scope, used internally
 across multiple libraries, use @code{avpriv_} as prefix, for example,
-@samp{avpriv_aac_parse_header}.
+@samp{avpriv_report_missing_feature}.
 
 @item
 Each library has its own prefix for public symbols, in addition to the
@@ -204,7 +203,7 @@
 are reserved at the file level and may not be used for externally visible
 symbols. If in doubt, just avoid names starting with @code{_} altogether.
 
-@subsection Miscellaneous conventions
+@section Miscellaneous conventions
 
 @itemize @bullet
 @item
@@ -216,7 +215,7 @@
 should also be avoided if they don't make the code easier to understand.
 @end itemize
 
-@subsection Editor configuration
+@section Editor configuration
 In order to configure Vim to follow FFmpeg formatting conventions, paste
 the following snippet into your @file{.vimrc}:
 @example
@@ -249,9 +248,9 @@
 (setq c-default-style "ffmpeg")
 @end lisp
 
-@section Development Policy
+@chapter Development Policy
 
-@subsection Patches/Committing
+@section Patches/Committing
 @subheading Licenses for patches must be compatible with FFmpeg.
 Contributions should be licensed under the
 @uref{http://www.gnu.org/licenses/lgpl-2.1.html, LGPL 2.1},
@@ -350,7 +349,7 @@
 1 week for big patches) then commit your patch if you think it is OK.
 Also note, the maintainer can simply ask for more time to review!
 
-@subsection Code
+@section Code
 @subheading API/ABI changes should be discussed before they are made.
 Do not change behavior of the programs (renaming options etc) or public
 API or ABI without first discussing it on the ffmpeg-devel mailing list.
@@ -381,12 +380,29 @@
 always check values read from some untrusted source before using them
 as array index or other risky things.
 
-@subsection Documentation/Other
+@section Documentation/Other
+@subheading Subscribe to the ffmpeg-devel mailing list.
+It is important to be subscribed to the
+@uref{https://lists.ffmpeg.org/mailman/listinfo/ffmpeg-devel, ffmpeg-devel}
+mailing list. Almost any non-trivial patch is to be sent there for review.
+Other developers may have comments about your contribution. We expect you see
+those comments, and to improve it if requested. (N.B. Experienced committers
+have other channels, and may sometimes skip review for trivial fixes.) Also,
+discussion here about bug fixes and FFmpeg improvements by other developers may
+be helpful information for you. Finally, by being a list subscriber, your
+contribution will be posted immediately to the list, without the moderation
+hold which messages from non-subscribers experience.
+
+However, it is more important to the project that we receive your patch than
+that you be subscribed to the ffmpeg-devel list. If you have a patch, and don't
+want to subscribe and discuss the patch, then please do send it to the list
+anyway.
+
 @subheading Subscribe to the ffmpeg-cvslog mailing list.
-It is important to do this as the diffs of all commits are sent there and
-reviewed by all the other developers. Bugs and possible improvements or
-general questions regarding commits are discussed there. We expect you to
-react if problems with your code are uncovered.
+Diffs of all commits are sent to the
+@uref{https://lists.ffmpeg.org/mailman/listinfo/ffmpeg-cvslog, ffmpeg-cvslog}
+mailing list. Some developers read this list to review all code base changes
+from all sources. Subscribing to this list is not mandatory.
 
 @subheading Keep the documentation up to date.
 Update the documentation if you change behavior or add features. If you are
@@ -406,7 +422,7 @@
 
 We think our rules are not too hard. If you have comments, contact us.
 
-@section Code of conduct
+@chapter Code of conduct
 
 Be friendly and respectful towards others and third parties.
 Treat others the way you yourself want to be treated.
@@ -436,7 +452,7 @@
 "Be excellent to each other."
 
 @anchor{Submitting patches}
-@section Submitting patches
+@chapter Submitting patches
 
 First, read the @ref{Coding Rules} above if you did not yet, in particular
 the rules regarding patch submission.
@@ -485,7 +501,7 @@
 send a reminder by email. Your patch should eventually be dealt with.
 
 
-@section New codecs or formats checklist
+@chapter New codecs or formats checklist
 
 @enumerate
 @item
@@ -537,7 +553,7 @@
 @end enumerate
 
 
-@section patch submission checklist
+@chapter Patch submission checklist
 
 @enumerate
 @item
@@ -547,9 +563,9 @@
 Was the patch generated with git format-patch or send-email?
 
 @item
-Did you sign off your patch? (git commit -s)
-See @url{http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=blob_plain;f=Documentation/SubmittingPatches} for the meaning
-of sign off.
+Did you sign-off your patch? (@code{git commit -s})
+See @uref{https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/process/submitting-patches.rst, Sign your work} for the meaning
+of @dfn{sign-off}.
 
 @item
 Did you provide a clear git commit log message?
@@ -650,7 +666,7 @@
 of leaks, out of array accesses, etc.
 @end enumerate
 
-@section Patch review process
+@chapter Patch review process
 
 All patches posted to ffmpeg-devel will be reviewed, unless they contain a
 clear note that the patch is not for the git master branch.
@@ -681,7 +697,7 @@
 way to get everyone's patches reviewed sooner.
 
 @anchor{Regression tests}
-@section Regression tests
+@chapter Regression tests
 
 Before submitting a patch (or committing to the repository), you should at least
 test that you did not break anything.
@@ -692,7 +708,7 @@
 this case, the reference results of the regression tests shall be modified
 accordingly].
 
-@subsection Adding files to the fate-suite dataset
+@section Adding files to the fate-suite dataset
 
 When there is no muxer or encoder available to generate test media for a
 specific test then the media has to be included in the fate-suite.
@@ -703,7 +719,7 @@
 message or introductory message for the patch series that you post to
 the ffmpeg-devel mailing list, a direct link to download the sample media.
 
-@subsection Visualizing Test Coverage
+@section Visualizing Test Coverage
 
 The FFmpeg build system allows visualizing the test coverage in an easy
 manner with the coverage tools @code{gcov}/@code{lcov}.  This involves
@@ -730,7 +746,7 @@
 measurements. You will need to rerun @code{make lcov} after running a
 new test.
 
-@subsection Using Valgrind
+@section Using Valgrind
 
 The configure script provides a shortcut for using valgrind to spot bugs
 related to memory handling. Just add the option
@@ -744,7 +760,7 @@
 your configure line instead.
 
 @anchor{Release process}
-@section Release process
+@chapter Release process
 
 FFmpeg maintains a set of @strong{release branches}, which are the
 recommended deliverable for system integrators and distributors (such as
@@ -776,7 +792,7 @@
 on the @strong{ffmpeg-devel} mailing list in time to allow forward planning.
 
 @anchor{Criteria for Point Releases}
-@subsection Criteria for Point Releases
+@section Criteria for Point Releases
 
 Changes that match the following criteria are valid candidates for
 inclusion into a point release:
@@ -800,7 +816,7 @@
 The order for checking the rules is (1 OR 2 OR 3) AND 4.
 
 
-@subsection Release Checklist
+@section Release Checklist
 
 The release process involves the following steps:
 

diff --git a/doc/encoders.texi b/doc/encoders.texi
index 431777c..8d184f7 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi

@@ -64,7 +64,6 @@
 all quantizers and adjusting some individual quantizer a little.  Will tune
 itself based on whether @option{aac_is}, @option{aac_ms} and @option{aac_pns}
 are enabled.
-This is the default choice for a coder.
 
 @item anmr
 Average noise to mask ratio (ANMR) trellis-based solution.
@@ -77,10 +76,10 @@
 @item fast
 Constant quantizer method.
 
-This method sets a constant quantizer for all bands. This is the fastest of all
-the methods and has no rate control or support for @option{aac_is} or
-@option{aac_pns}.
-Not recommended.
+Uses a cheaper version of twoloop algorithm that doesn't try to do as many
+clever adjustments. Worse with low bitrates (less than 64kbps), but is better
+and much faster at higher bitrates.
+This is the default choice for a coder
 
 @end table
 
@@ -982,6 +981,11 @@
 and LFE bandwidth optimizations, and 255 for independent streams with an
 unspecified channel layout.
 
+@item apply_phase_inv (N.A.) (requires libopus >= 1.2)
+If set to 0, disables the use of phase inversion for intensity stereo,
+improving the quality of mono downmixes, but slightly reducing normal stereo
+quality. The default is 1 (phase inversion enabled).
+
 @end table
 
 @anchor{libshine}
@@ -1681,6 +1685,13 @@
 @end table
 @item row-mt @var{boolean}
 Enable row based multi-threading.
+@item tune-content
+Set content type: default (0), screen (1), film (2).
+@item corpus-complexity
+Corpus VBR mode is a variant of standard VBR where the complexity distribution
+midpoint is passed in rather than calculated for a specific clip or chunk.
+
+The valid range is [0, 10000]. 0 (default) uses standard VBR.
 @end table
 
 @end table
@@ -2148,6 +2159,12 @@
 @item tune
 Set the x265 tune parameter.
 
+@item profile
+Set profile restrictions.
+
+@item crf
+Set the quality for constant quality mode.
+
 @item forced-idr
 Normally, when forcing a I-frame type, the encoder can select any type
 of I-frame. This option forces it to choose an IDR-frame.
@@ -2348,6 +2365,11 @@
 @itemx always
 Always write it.
 @end table
+@item video_format @var{integer}
+Specifies the video_format written into the sequence display extension
+indicating the source of the video pictures. The default is @samp{unspecified},
+can be @samp{component}, @samp{pal}, @samp{ntsc}, @samp{secam} or @samp{mac}.
+For maximum compatibility, use @samp{component}.
 @end table
 
 @section png
@@ -2543,6 +2565,9 @@
 @option{bf} / @option{max_b_frames}
 @item
 @option{profile}
+
+If not set, this will be determined automatically from the format of the input
+frames and the profiles supported by the driver.
 @item
 @option{level}
 @item
@@ -2563,7 +2588,8 @@
 Size / quality tradeoff: higher values are smaller / worse quality.
 @item
 @option{qmin}
-(only: @option{qmax} is not supported)
+@item
+@option{qmax}
 @item
 @option{i_qfactor} / @option{i_quant_factor}
 @item
@@ -2574,6 +2600,18 @@
 @option{b_qoffset} / @option{b_quant_offset}
 @end itemize
 
+All encoders support the following options:
+@itemize
+@item
+@option{low_power}
+
+Some drivers/platforms offer a second encoder for some codecs intended to use
+less power than the default encoder; setting this option will attempt to use
+that encoder.  Note that it may support a reduced feature set, so some other
+options may not be available in this mode.
+@end itemize
+
+Each encoder also has its own specific options:
 @table @option
 
 @item h264_vaapi
@@ -2581,8 +2619,6 @@
 @option{level} sets the value of @emph{level_idc}.
 
 @table @option
-@item low_power
-Use low-power encoding mode.
 @item coder
 Set entropy encoder (default is @emph{cabac}).  Possible values:
 
@@ -2595,21 +2631,70 @@
 @item cavlc
 Use CAVLC.
 @end table
+
+@item aud
+Include access unit delimiters in the stream (not included by default).
+
+@item sei
+Set SEI message types to include.
+Some combination of the following values:
+@table @samp
+@item identifier
+Include a @emph{user_data_unregistered} message containing information about
+the encoder.
+@item timing
+Include picture timing parameters (@emph{buffering_period} and
+@emph{pic_timing} messages).
+@item recovery_point
+Include recovery points where appropriate (@emph{recovery_point} messages).
+@end table
+
 @end table
 
 @item hevc_vaapi
 @option{profile} and @option{level} set the values of
 @emph{general_profile_idc} and @emph{general_level_idc} respectively.
 
+@table @option
+@item aud
+Include access unit delimiters in the stream (not included by default).
+
+@item tier
+Set @emph{general_tier_flag}.  This may affect the level chosen for the stream
+if it is not explicitly specified.
+
+@item sei
+Set SEI message types to include.
+Some combination of the following values:
+@table @samp
+@item hdr
+Include HDR metadata if the input frames have it
+(@emph{mastering_display_colour_volume} and @emph{content_light_level}
+messages).
+@end table
+
+@end table
+
 @item mjpeg_vaapi
-Always encodes using the standard quantisation and huffman tables -
-@option{global_quality} scales the standard quantisation table (range 1-100).
+Only baseline DCT encoding is supported.  The encoder always uses the standard
+quantisation and huffman tables - @option{global_quality} scales the standard
+quantisation table (range 1-100).
+
+For YUV, 4:2:0, 4:2:2 and 4:4:4 subsampling modes are supported.  RGB is also
+supported, and will create an RGB JPEG.
+
+@table @option
+@item jfif
+Include JFIF header in each frame (not included by default).
+@item huffman
+Include standard huffman tables (on by default).  Turning this off will save
+a few hundred bytes in each output frame, but may lose compatibility with some
+JPEG decoders which don't fully handle MJPEG.
+@end table
 
 @item mpeg2_vaapi
 @option{profile} and @option{level} set the value of @emph{profile_and_level_indication}.
 
-No rate control is supported.
-
 @item vp8_vaapi
 B-frames are not supported.
 
@@ -2704,6 +2789,52 @@
 
 @end table
 
+@section libxavs2
+
+xavs2 AVS2-P2/IEEE1857.4 encoder wrapper.
+
+This encoder requires the presence of the libxavs2 headers and library
+during configuration. You need to explicitly configure the build with
+@option{--enable-libxavs2}.
+
+@subsection Options
+
+@table @option
+@item lcu_row_threads
+Set the number of parallel threads for rows from 1 to 8 (default 5).
+
+@item initial_qp
+Set the xavs2 quantization parameter from 1 to 63 (default 34). This is
+used to set the initial qp for the first frame.
+
+@item qp
+Set the xavs2 quantization parameter from 1 to 63 (default 34). This is
+used to set the qp value under constant-QP mode.
+
+@item max_qp
+Set the max qp for rate control from 1 to 63 (default 55).
+
+@item min_qp
+Set the min qp for rate control from 1 to 63 (default 20).
+
+@item speed_level
+Set the Speed level from 0 to 9 (default 0). Higher is better but slower.
+
+@item log_level
+Set the log level from -1 to 3 (default 0). -1: none, 0: error,
+1: warning, 2: info, 3: debug.
+
+@item xavs2-params
+Set xavs2 options using a list of @var{key}=@var{value} couples separated
+by ":".
+
+For example to specify libxavs2 encoding options with @option{-xavs2-params}:
+
+@example
+ffmpeg -i input -c:v libxavs2 -xavs2-params preset_level=5 output.avs2
+@end example
+@end table
+
 @c man end VIDEO ENCODERS
 
 @chapter Subtitles Encoders

diff --git a/doc/examples/.gitignore b/doc/examples/.gitignore
index 154c841..75152cb 100644
--- a/doc/examples/.gitignore
+++ b/doc/examples/.gitignore

@@ -20,3 +20,5 @@
 /scaling_video
 /transcode_aac
 /transcoding
+/vaapi_encode
+/vaapi_transcode

diff --git a/doc/examples/Makefile b/doc/examples/Makefile
index 58afd71..928ff30 100644
--- a/doc/examples/Makefile
+++ b/doc/examples/Makefile

@@ -19,6 +19,8 @@
 EXAMPLES-$(CONFIG_SCALING_VIDEO_EXAMPLE)     += scaling_video
 EXAMPLES-$(CONFIG_TRANSCODE_AAC_EXAMPLE)     += transcode_aac
 EXAMPLES-$(CONFIG_TRANSCODING_EXAMPLE)       += transcoding
+EXAMPLES-$(CONFIG_VAAPI_ENCODE_EXAMPLE)      += vaapi_encode
+EXAMPLES-$(CONFIG_VAAPI_TRANSCODE_EXAMPLE)   += vaapi_transcode
 
 EXAMPLES       := $(EXAMPLES-yes:%=doc/examples/%$(PROGSSUF)$(EXESUF))
 EXAMPLES_G     := $(EXAMPLES-yes:%=doc/examples/%$(PROGSSUF)_g$(EXESUF))

diff --git a/doc/examples/avio_dir_cmd.c b/doc/examples/avio_dir_cmd.c
index 50c435c..0722bd9 100644
--- a/doc/examples/avio_dir_cmd.c
+++ b/doc/examples/avio_dir_cmd.c

@@ -143,8 +143,6 @@
         return 1;
     }
 
-    /* register codecs and formats and other lavf/lavc components*/
-    av_register_all();
     avformat_network_init();
 
     op = argv[1];

diff --git a/doc/examples/avio_reading.c b/doc/examples/avio_reading.c
index 02474e9..cbfeb17 100644
--- a/doc/examples/avio_reading.c
+++ b/doc/examples/avio_reading.c

@@ -44,6 +44,8 @@
     struct buffer_data *bd = (struct buffer_data *)opaque;
     buf_size = FFMIN(buf_size, bd->size);
 
+    if (!buf_size)
+        return AVERROR_EOF;
     printf("ptr:%p size:%zu\n", bd->ptr, bd->size);
 
     /* copy internal buffer data to buf */
@@ -72,9 +74,6 @@
     }
     input_filename = argv[1];
 
-    /* register codecs and formats and other lavf/lavc components*/
-    av_register_all();
-
     /* slurp file content into buffer */
     ret = av_file_map(input_filename, &buffer, &buffer_size, 0, NULL);
     if (ret < 0)

diff --git a/doc/examples/decode_audio.c b/doc/examples/decode_audio.c
index fb9a9af..19dcafd 100644
--- a/doc/examples/decode_audio.c
+++ b/doc/examples/decode_audio.c

@@ -94,9 +94,6 @@
     filename    = argv[1];
     outfilename = argv[2];
 
-    /* register all the codecs */
-    avcodec_register_all();
-
     pkt = av_packet_alloc();
 
     /* find the MPEG audio decoder */

diff --git a/doc/examples/decode_video.c b/doc/examples/decode_video.c
index 4377fd4..5a9d43f 100644
--- a/doc/examples/decode_video.c
+++ b/doc/examples/decode_video.c

@@ -101,8 +101,6 @@
     filename    = argv[1];
     outfilename = argv[2];
 
-    avcodec_register_all();
-
     pkt = av_packet_alloc();
     if (!pkt)
         exit(1);

diff --git a/doc/examples/demuxing_decoding.c b/doc/examples/demuxing_decoding.c
index b1a216a..69a31a8 100644
--- a/doc/examples/demuxing_decoding.c
+++ b/doc/examples/demuxing_decoding.c

@@ -252,9 +252,6 @@
     video_dst_filename = argv[2];
     audio_dst_filename = argv[3];
 
-    /* register all formats and codecs */
-    av_register_all();
-
     /* open input file, and allocate format context */
     if (avformat_open_input(&fmt_ctx, src_filename, NULL, NULL) < 0) {
         fprintf(stderr, "Could not open source file %s\n", src_filename);

diff --git a/doc/examples/encode_audio.c b/doc/examples/encode_audio.c
index d1ef105..ab3586b 100644
--- a/doc/examples/encode_audio.c
+++ b/doc/examples/encode_audio.c

@@ -138,9 +138,6 @@
     }
     filename = argv[1];
 
-    /* register all the codecs */
-    avcodec_register_all();
-
     /* find the MP2 encoder */
     codec = avcodec_find_encoder(AV_CODEC_ID_MP2);
     if (!codec) {

diff --git a/doc/examples/encode_video.c b/doc/examples/encode_video.c
index 8cd1321..6731b2a 100644
--- a/doc/examples/encode_video.c
+++ b/doc/examples/encode_video.c

@@ -84,8 +84,6 @@
     filename = argv[1];
     codec_name = argv[2];
 
-    avcodec_register_all();
-
     /* find the mpeg1video encoder */
     codec = avcodec_find_encoder_by_name(codec_name);
     if (!codec) {

diff --git a/doc/examples/extract_mvs.c b/doc/examples/extract_mvs.c
index 7ae934e..de31ccd 100644
--- a/doc/examples/extract_mvs.c
+++ b/doc/examples/extract_mvs.c

@@ -129,8 +129,6 @@
     }
     src_filename = argv[1];
 
-    av_register_all();
-
     if (avformat_open_input(&fmt_ctx, src_filename, NULL, NULL) < 0) {
         fprintf(stderr, "Could not open source file %s\n", src_filename);
         exit(1);

diff --git a/doc/examples/filter_audio.c b/doc/examples/filter_audio.c
index 01761dc..1611e3d 100644
--- a/doc/examples/filter_audio.c
+++ b/doc/examples/filter_audio.c

@@ -64,13 +64,13 @@
 {
     AVFilterGraph *filter_graph;
     AVFilterContext *abuffer_ctx;
-    AVFilter        *abuffer;
+    const AVFilter  *abuffer;
     AVFilterContext *volume_ctx;
-    AVFilter        *volume;
+    const AVFilter  *volume;
     AVFilterContext *aformat_ctx;
-    AVFilter        *aformat;
+    const AVFilter  *aformat;
     AVFilterContext *abuffersink_ctx;
-    AVFilter        *abuffersink;
+    const AVFilter  *abuffersink;
 
     AVDictionary *options_dict = NULL;
     uint8_t options_str[1024];
@@ -289,8 +289,6 @@
         return 1;
     }
 
-    avfilter_register_all();
-
     /* Allocate the frame we will be using to store the data. */
     frame  = av_frame_alloc();
     if (!frame) {

diff --git a/doc/examples/filtering_audio.c b/doc/examples/filtering_audio.c
index 9fc4f1c..834b137 100644
--- a/doc/examples/filtering_audio.c
+++ b/doc/examples/filtering_audio.c

@@ -32,7 +32,6 @@
 
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
-#include <libavfilter/avfiltergraph.h>
 #include <libavfilter/buffersink.h>
 #include <libavfilter/buffersrc.h>
 #include <libavutil/opt.h>
@@ -75,7 +74,6 @@
     if (!dec_ctx)
         return AVERROR(ENOMEM);
     avcodec_parameters_to_context(dec_ctx, fmt_ctx->streams[audio_stream_index]->codecpar);
-    av_opt_set_int(dec_ctx, "refcounted_frames", 1, 0);
 
     /* init the audio decoder */
     if ((ret = avcodec_open2(dec_ctx, dec, NULL)) < 0) {
@@ -90,8 +88,8 @@
 {
     char args[512];
     int ret = 0;
-    AVFilter *abuffersrc  = avfilter_get_by_name("abuffer");
-    AVFilter *abuffersink = avfilter_get_by_name("abuffersink");
+    const AVFilter *abuffersrc  = avfilter_get_by_name("abuffer");
+    const AVFilter *abuffersink = avfilter_get_by_name("abuffersink");
     AVFilterInOut *outputs = avfilter_inout_alloc();
     AVFilterInOut *inputs  = avfilter_inout_alloc();
     static const enum AVSampleFormat out_sample_fmts[] = { AV_SAMPLE_FMT_S16, -1 };
@@ -229,9 +227,6 @@
         exit(1);
     }
 
-    av_register_all();
-    avfilter_register_all();
-
     if ((ret = open_input_file(argv[1])) < 0)
         goto end;
     if ((ret = init_filters(filter_descr)) < 0)

diff --git a/doc/examples/filtering_video.c b/doc/examples/filtering_video.c
index 4e09c6f..105a200 100644
--- a/doc/examples/filtering_video.c
+++ b/doc/examples/filtering_video.c

@@ -29,10 +29,11 @@
 
 #define _XOPEN_SOURCE 600 /* for usleep */
 #include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
 
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
-#include <libavfilter/avfiltergraph.h>
 #include <libavfilter/buffersink.h>
 #include <libavfilter/buffersrc.h>
 #include <libavutil/opt.h>
@@ -78,7 +79,6 @@
     if (!dec_ctx)
         return AVERROR(ENOMEM);
     avcodec_parameters_to_context(dec_ctx, fmt_ctx->streams[video_stream_index]->codecpar);
-    av_opt_set_int(dec_ctx, "refcounted_frames", 1, 0);
 
     /* init the video decoder */
     if ((ret = avcodec_open2(dec_ctx, dec, NULL)) < 0) {
@@ -93,8 +93,8 @@
 {
     char args[512];
     int ret = 0;
-    AVFilter *buffersrc  = avfilter_get_by_name("buffer");
-    AVFilter *buffersink = avfilter_get_by_name("buffersink");
+    const AVFilter *buffersrc  = avfilter_get_by_name("buffer");
+    const AVFilter *buffersink = avfilter_get_by_name("buffersink");
     AVFilterInOut *outputs = avfilter_inout_alloc();
     AVFilterInOut *inputs  = avfilter_inout_alloc();
     AVRational time_base = fmt_ctx->streams[video_stream_index]->time_base;
@@ -211,20 +211,20 @@
 {
     int ret;
     AVPacket packet;
-    AVFrame *frame = av_frame_alloc();
-    AVFrame *filt_frame = av_frame_alloc();
+    AVFrame *frame;
+    AVFrame *filt_frame;
 
-    if (!frame || !filt_frame) {
-        perror("Could not allocate frame");
-        exit(1);
-    }
     if (argc != 2) {
         fprintf(stderr, "Usage: %s file\n", argv[0]);
         exit(1);
     }
 
-    av_register_all();
-    avfilter_register_all();
+    frame = av_frame_alloc();
+    filt_frame = av_frame_alloc();
+    if (!frame || !filt_frame) {
+        perror("Could not allocate frame");
+        exit(1);
+    }
 
     if ((ret = open_input_file(argv[1])) < 0)
         goto end;
@@ -252,27 +252,25 @@
                     goto end;
                 }
 
-                if (ret >= 0) {
-                    frame->pts = frame->best_effort_timestamp;
+                frame->pts = frame->best_effort_timestamp;
 
-                    /* push the decoded frame into the filtergraph */
-                    if (av_buffersrc_add_frame_flags(buffersrc_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0) {
-                        av_log(NULL, AV_LOG_ERROR, "Error while feeding the filtergraph\n");
-                        break;
-                    }
-
-                    /* pull filtered frames from the filtergraph */
-                    while (1) {
-                        ret = av_buffersink_get_frame(buffersink_ctx, filt_frame);
-                        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
-                            break;
-                        if (ret < 0)
-                            goto end;
-                        display_frame(filt_frame, buffersink_ctx->inputs[0]->time_base);
-                        av_frame_unref(filt_frame);
-                    }
-                    av_frame_unref(frame);
+                /* push the decoded frame into the filtergraph */
+                if (av_buffersrc_add_frame_flags(buffersrc_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0) {
+                    av_log(NULL, AV_LOG_ERROR, "Error while feeding the filtergraph\n");
+                    break;
                 }
+
+                /* pull filtered frames from the filtergraph */
+                while (1) {
+                    ret = av_buffersink_get_frame(buffersink_ctx, filt_frame);
+                    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
+                        break;
+                    if (ret < 0)
+                        goto end;
+                    display_frame(filt_frame, buffersink_ctx->inputs[0]->time_base);
+                    av_frame_unref(filt_frame);
+                }
+                av_frame_unref(frame);
             }
         }
         av_packet_unref(&packet);

diff --git a/doc/examples/http_multiclient.c b/doc/examples/http_multiclient.c
index e2c2201..831e89c 100644
--- a/doc/examples/http_multiclient.c
+++ b/doc/examples/http_multiclient.c

@@ -114,7 +114,6 @@
     in_uri = argv[1];
     out_uri = argv[2];
 
-    av_register_all();
     avformat_network_init();
 
     if ((ret = av_dict_set(&options, "listen", "2", 0)) < 0) {

diff --git a/doc/examples/hw_decode.c b/doc/examples/hw_decode.c
index 9c7adbf..f3286f4 100644
--- a/doc/examples/hw_decode.c
+++ b/doc/examples/hw_decode.c

@@ -4,21 +4,23 @@
  *
  * HW Acceleration API (video decoding) decode sample
  *
- * This file is part of FFmpeg.
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
  *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
  *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
  */
 
 /**
@@ -44,34 +46,6 @@
 static enum AVPixelFormat hw_pix_fmt;
 static FILE *output_file = NULL;
 
-static enum AVPixelFormat find_fmt_by_hw_type(const enum AVHWDeviceType type)
-{
-    enum AVPixelFormat fmt;
-
-    switch (type) {
-    case AV_HWDEVICE_TYPE_VAAPI:
-        fmt = AV_PIX_FMT_VAAPI;
-        break;
-    case AV_HWDEVICE_TYPE_DXVA2:
-        fmt = AV_PIX_FMT_DXVA2_VLD;
-        break;
-    case AV_HWDEVICE_TYPE_D3D11VA:
-        fmt = AV_PIX_FMT_D3D11;
-        break;
-    case AV_HWDEVICE_TYPE_VDPAU:
-        fmt = AV_PIX_FMT_VDPAU;
-        break;
-    case AV_HWDEVICE_TYPE_VIDEOTOOLBOX:
-        fmt = AV_PIX_FMT_VIDEOTOOLBOX;
-        break;
-    default:
-        fmt = AV_PIX_FMT_NONE;
-        break;
-    }
-
-    return fmt;
-}
-
 static int hw_decoder_init(AVCodecContext *ctx, const enum AVHWDeviceType type)
 {
     int err = 0;
@@ -114,7 +88,7 @@
         return ret;
     }
 
-    while (ret >= 0) {
+    while (1) {
         if (!(frame = av_frame_alloc()) || !(sw_frame = av_frame_alloc())) {
             fprintf(stderr, "Can not alloc frame\n");
             ret = AVERROR(ENOMEM);
@@ -166,13 +140,10 @@
     fail:
         av_frame_free(&frame);
         av_frame_free(&sw_frame);
-        if (buffer)
-            av_freep(&buffer);
+        av_freep(&buffer);
         if (ret < 0)
             return ret;
     }
-
-    return 0;
 }
 
 int main(int argc, char *argv[])
@@ -184,18 +155,20 @@
     AVCodec *decoder = NULL;
     AVPacket packet;
     enum AVHWDeviceType type;
+    int i;
 
     if (argc < 4) {
-        fprintf(stderr, "Usage: %s <vaapi|vdpau|dxva2|d3d11va> <input file> <output file>\n", argv[0]);
+        fprintf(stderr, "Usage: %s <device type> <input file> <output file>\n", argv[0]);
         return -1;
     }
 
-    av_register_all();
-
     type = av_hwdevice_find_type_by_name(argv[1]);
-    hw_pix_fmt = find_fmt_by_hw_type(type);
-    if (hw_pix_fmt == -1) {
-        fprintf(stderr, "Cannot support '%s' in this example.\n", argv[1]);
+    if (type == AV_HWDEVICE_TYPE_NONE) {
+        fprintf(stderr, "Device type %s is not supported.\n", argv[1]);
+        fprintf(stderr, "Available device types:");
+        while((type = av_hwdevice_iterate_types(type)) != AV_HWDEVICE_TYPE_NONE)
+            fprintf(stderr, " %s", av_hwdevice_get_type_name(type));
+        fprintf(stderr, "\n");
         return -1;
     }
 
@@ -218,6 +191,20 @@
     }
     video_stream = ret;
 
+    for (i = 0;; i++) {
+        const AVCodecHWConfig *config = avcodec_get_hw_config(decoder, i);
+        if (!config) {
+            fprintf(stderr, "Decoder %s does not support device type %s.\n",
+                    decoder->name, av_hwdevice_get_type_name(type));
+            return -1;
+        }
+        if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX &&
+            config->device_type == type) {
+            hw_pix_fmt = config->pix_fmt;
+            break;
+        }
+    }
+
     if (!(decoder_ctx = avcodec_alloc_context3(decoder)))
         return AVERROR(ENOMEM);
 
@@ -226,7 +213,6 @@
         return -1;
 
     decoder_ctx->get_format  = get_hw_format;
-    av_opt_set_int(decoder_ctx, "refcounted_frames", 1, 0);
 
     if (hw_decoder_init(decoder_ctx, type) < 0)
         return -1;

diff --git a/doc/examples/metadata.c b/doc/examples/metadata.c
index f73c267..e330d07 100644
--- a/doc/examples/metadata.c
+++ b/doc/examples/metadata.c

@@ -44,7 +44,6 @@
         return 1;
     }
 
-    av_register_all();
     if ((ret = avformat_open_input(&fmt_ctx, argv[1], NULL, NULL)))
         return ret;
 

diff --git a/doc/examples/muxing.c b/doc/examples/muxing.c
index e1a4770..08da98e 100644
--- a/doc/examples/muxing.c
+++ b/doc/examples/muxing.c

@@ -488,9 +488,9 @@
             }
         }
         fill_yuv_image(ost->tmp_frame, ost->next_pts, c->width, c->height);
-        sws_scale(ost->sws_ctx,
-                  (const uint8_t * const *)ost->tmp_frame->data, ost->tmp_frame->linesize,
-                  0, c->height, ost->frame->data, ost->frame->linesize);
+        sws_scale(ost->sws_ctx, (const uint8_t * const *) ost->tmp_frame->data,
+                  ost->tmp_frame->linesize, 0, c->height, ost->frame->data,
+                  ost->frame->linesize);
     } else {
         fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height);
     }
@@ -564,9 +564,6 @@
     AVDictionary *opt = NULL;
     int i;
 
-    /* Initialize libavcodec, and register all codecs and formats. */
-    av_register_all();
-
     if (argc < 2) {
         printf("usage: %s output_file\n"
                "API example program to output a media file with libavformat.\n"

diff --git a/doc/examples/qsvdec.c b/doc/examples/qsvdec.c
index 46e6ddc..7415eef 100644
--- a/doc/examples/qsvdec.c
+++ b/doc/examples/qsvdec.c

@@ -150,8 +150,6 @@
 
     int ret, i;
 
-    av_register_all();
-
     if (argc < 3) {
         fprintf(stderr, "Usage: %s <input file> <output file>\n", argv[0]);
         return 1;
@@ -210,7 +208,6 @@
                video_st->codecpar->extradata_size);
         decoder_ctx->extradata_size = video_st->codecpar->extradata_size;
     }
-    decoder_ctx->refcounted_frames = 1;
 
     decoder_ctx->opaque      = &decode;
     decoder_ctx->get_format  = get_format;

diff --git a/doc/examples/remuxing.c b/doc/examples/remuxing.c
index 5959418..9e4d103 100644
--- a/doc/examples/remuxing.c
+++ b/doc/examples/remuxing.c

@@ -65,8 +65,6 @@
     in_filename  = argv[1];
     out_filename = argv[2];
 
-    av_register_all();
-
     if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) {
         fprintf(stderr, "Could not open input file '%s'", in_filename);
         goto end;

diff --git a/doc/examples/transcode_aac.c b/doc/examples/transcode_aac.c
index 6c2f4fb..e0c76f5 100644
--- a/doc/examples/transcode_aac.c
+++ b/doc/examples/transcode_aac.c

@@ -1,4 +1,6 @@
 /*
+ * Copyright (c) 2013-2018 Andreas Unterweger
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -8,7 +10,7 @@
  *
  * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
@@ -18,10 +20,11 @@
 
 /**
  * @file
- * simple audio converter
+ * Simple audio converter
  *
  * @example transcode_aac.c
  * Convert an input audio file to AAC in an MP4 container using FFmpeg.
+ * Formats other than MP4 are supported based on the output file extension.
  * @author Andreas Unterweger (dustsigns@gmail.com)
  */
 
@@ -40,12 +43,18 @@
 
 #include "libswresample/swresample.h"
 
-/** The output bit rate in kbit/s */
+/* The output bit rate in bit/s */
 #define OUTPUT_BIT_RATE 96000
-/** The number of output channels */
+/* The number of output channels */
 #define OUTPUT_CHANNELS 2
 
-/** Open an input file and the required decoder. */
+/**
+ * Open an input file and the required decoder.
+ * @param      filename             File to be opened
+ * @param[out] input_format_context Format context of opened file
+ * @param[out] input_codec_context  Codec context of opened file
+ * @return Error code (0 if successful)
+ */
 static int open_input_file(const char *filename,
                            AVFormatContext **input_format_context,
                            AVCodecContext **input_codec_context)
@@ -54,7 +63,7 @@
     AVCodec *input_codec;
     int error;
 
-    /** Open the input file to read from it. */
+    /* Open the input file to read from it. */
     if ((error = avformat_open_input(input_format_context, filename, NULL,
                                      NULL)) < 0) {
         fprintf(stderr, "Could not open input file '%s' (error '%s')\n",
@@ -63,7 +72,7 @@
         return error;
     }
 
-    /** Get information on the input file (number of streams etc.). */
+    /* Get information on the input file (number of streams etc.). */
     if ((error = avformat_find_stream_info(*input_format_context, NULL)) < 0) {
         fprintf(stderr, "Could not open find stream info (error '%s')\n",
                 av_err2str(error));
@@ -71,7 +80,7 @@
         return error;
     }
 
-    /** Make sure that there is only one stream in the input file. */
+    /* Make sure that there is only one stream in the input file. */
     if ((*input_format_context)->nb_streams != 1) {
         fprintf(stderr, "Expected one audio input stream, but found %d\n",
                 (*input_format_context)->nb_streams);
@@ -79,14 +88,14 @@
         return AVERROR_EXIT;
     }
 
-    /** Find a decoder for the audio stream. */
+    /* Find a decoder for the audio stream. */
     if (!(input_codec = avcodec_find_decoder((*input_format_context)->streams[0]->codecpar->codec_id))) {
         fprintf(stderr, "Could not find input codec\n");
         avformat_close_input(input_format_context);
         return AVERROR_EXIT;
     }
 
-    /** allocate a new decoding context */
+    /* Allocate a new decoding context. */
     avctx = avcodec_alloc_context3(input_codec);
     if (!avctx) {
         fprintf(stderr, "Could not allocate a decoding context\n");
@@ -94,7 +103,7 @@
         return AVERROR(ENOMEM);
     }
 
-    /** initialize the stream parameters with demuxer information */
+    /* Initialize the stream parameters with demuxer information. */
     error = avcodec_parameters_to_context(avctx, (*input_format_context)->streams[0]->codecpar);
     if (error < 0) {
         avformat_close_input(input_format_context);
@@ -102,7 +111,7 @@
         return error;
     }
 
-    /** Open the decoder for the audio stream to use it later. */
+    /* Open the decoder for the audio stream to use it later. */
     if ((error = avcodec_open2(avctx, input_codec, NULL)) < 0) {
         fprintf(stderr, "Could not open input codec (error '%s')\n",
                 av_err2str(error));
@@ -111,7 +120,7 @@
         return error;
     }
 
-    /** Save the decoder context for easier access later. */
+    /* Save the decoder context for easier access later. */
     *input_codec_context = avctx;
 
     return 0;
@@ -121,6 +130,11 @@
  * Open an output file and the required encoder.
  * Also set some basic encoder parameters.
  * Some of these parameters are based on the input file's parameters.
+ * @param      filename              File to be opened
+ * @param      input_codec_context   Codec context of input file
+ * @param[out] output_format_context Format context of output file
+ * @param[out] output_codec_context  Codec context of output file
+ * @return Error code (0 if successful)
  */
 static int open_output_file(const char *filename,
                             AVCodecContext *input_codec_context,
@@ -133,7 +147,7 @@
     AVCodec *output_codec          = NULL;
     int error;
 
-    /** Open the output file to write to it. */
+    /* Open the output file to write to it. */
     if ((error = avio_open(&output_io_context, filename,
                            AVIO_FLAG_WRITE)) < 0) {
         fprintf(stderr, "Could not open output file '%s' (error '%s')\n",
@@ -141,32 +155,35 @@
         return error;
     }
 
-    /** Create a new format context for the output container format. */
+    /* Create a new format context for the output container format. */
     if (!(*output_format_context = avformat_alloc_context())) {
         fprintf(stderr, "Could not allocate output format context\n");
         return AVERROR(ENOMEM);
     }
 
-    /** Associate the output file (pointer) with the container format context. */
+    /* Associate the output file (pointer) with the container format context. */
     (*output_format_context)->pb = output_io_context;
 
-    /** Guess the desired container format based on the file extension. */
+    /* Guess the desired container format based on the file extension. */
     if (!((*output_format_context)->oformat = av_guess_format(NULL, filename,
                                                               NULL))) {
         fprintf(stderr, "Could not find output file format\n");
         goto cleanup;
     }
 
-    av_strlcpy((*output_format_context)->filename, filename,
-               sizeof((*output_format_context)->filename));
+    if (!((*output_format_context)->url = av_strdup(filename))) {
+        fprintf(stderr, "Could not allocate url.\n");
+        error = AVERROR(ENOMEM);
+        goto cleanup;
+    }
 
-    /** Find the encoder to be used by its name. */
+    /* Find the encoder to be used by its name. */
     if (!(output_codec = avcodec_find_encoder(AV_CODEC_ID_AAC))) {
         fprintf(stderr, "Could not find an AAC encoder.\n");
         goto cleanup;
     }
 
-    /** Create a new audio stream in the output file container. */
+    /* Create a new audio stream in the output file container. */
     if (!(stream = avformat_new_stream(*output_format_context, NULL))) {
         fprintf(stderr, "Could not create new stream\n");
         error = AVERROR(ENOMEM);
@@ -180,31 +197,27 @@
         goto cleanup;
     }
 
-    /**
-     * Set the basic encoder parameters.
-     * The input file's sample rate is used to avoid a sample rate conversion.
-     */
+    /* Set the basic encoder parameters.
+     * The input file's sample rate is used to avoid a sample rate conversion. */
     avctx->channels       = OUTPUT_CHANNELS;
     avctx->channel_layout = av_get_default_channel_layout(OUTPUT_CHANNELS);
     avctx->sample_rate    = input_codec_context->sample_rate;
     avctx->sample_fmt     = output_codec->sample_fmts[0];
     avctx->bit_rate       = OUTPUT_BIT_RATE;
 
-    /** Allow the use of the experimental AAC encoder */
+    /* Allow the use of the experimental AAC encoder. */
     avctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
 
-    /** Set the sample rate for the container. */
+    /* Set the sample rate for the container. */
     stream->time_base.den = input_codec_context->sample_rate;
     stream->time_base.num = 1;
 
-    /**
-     * Some container formats (like MP4) require global headers to be present
-     * Mark the encoder so that it behaves accordingly.
-     */
+    /* Some container formats (like MP4) require global headers to be present.
+     * Mark the encoder so that it behaves accordingly. */
     if ((*output_format_context)->oformat->flags & AVFMT_GLOBALHEADER)
         avctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 
-    /** Open the encoder for the audio stream to use it later. */
+    /* Open the encoder for the audio stream to use it later. */
     if ((error = avcodec_open2(avctx, output_codec, NULL)) < 0) {
         fprintf(stderr, "Could not open output codec (error '%s')\n",
                 av_err2str(error));
@@ -217,7 +230,7 @@
         goto cleanup;
     }
 
-    /** Save the encoder context for easier access later. */
+    /* Save the encoder context for easier access later. */
     *output_codec_context = avctx;
 
     return 0;
@@ -230,16 +243,23 @@
     return error < 0 ? error : AVERROR_EXIT;
 }
 
-/** Initialize one data packet for reading or writing. */
+/**
+ * Initialize one data packet for reading or writing.
+ * @param packet Packet to be initialized
+ */
 static void init_packet(AVPacket *packet)
 {
     av_init_packet(packet);
-    /** Set the packet data and size so that it is recognized as being empty. */
+    /* Set the packet data and size so that it is recognized as being empty. */
     packet->data = NULL;
     packet->size = 0;
 }
 
-/** Initialize one audio frame for reading from the input file */
+/**
+ * Initialize one audio frame for reading from the input file.
+ * @param[out] frame Frame to be initialized
+ * @return Error code (0 if successful)
+ */
 static int init_input_frame(AVFrame **frame)
 {
     if (!(*frame = av_frame_alloc())) {
@@ -253,6 +273,10 @@
  * Initialize the audio resampler based on the input and output codec settings.
  * If the input and output sample formats differ, a conversion is required
  * libswresample takes care of this, but requires initialization.
+ * @param      input_codec_context  Codec context of the input file
+ * @param      output_codec_context Codec context of the output file
+ * @param[out] resample_context     Resample context for the required conversion
+ * @return Error code (0 if successful)
  */
 static int init_resampler(AVCodecContext *input_codec_context,
                           AVCodecContext *output_codec_context,
@@ -260,7 +284,7 @@
 {
         int error;
 
-        /**
+        /*
          * Create a resampler context for the conversion.
          * Set the conversion parameters.
          * Default channel layouts based on the number of channels
@@ -279,14 +303,14 @@
             fprintf(stderr, "Could not allocate resample context\n");
             return AVERROR(ENOMEM);
         }
-        /**
+        /*
         * Perform a sanity check so that the number of converted samples is
         * not greater than the number of samples to be converted.
         * If the sample rates differ, this case has to be handled differently
         */
         av_assert0(output_codec_context->sample_rate == input_codec_context->sample_rate);
 
-        /** Open the resampler with the specified parameters. */
+        /* Open the resampler with the specified parameters. */
         if ((error = swr_init(*resample_context)) < 0) {
             fprintf(stderr, "Could not open resample context\n");
             swr_free(resample_context);
@@ -295,10 +319,15 @@
     return 0;
 }
 
-/** Initialize a FIFO buffer for the audio samples to be encoded. */
+/**
+ * Initialize a FIFO buffer for the audio samples to be encoded.
+ * @param[out] fifo                 Sample buffer
+ * @param      output_codec_context Codec context of the output file
+ * @return Error code (0 if successful)
+ */
 static int init_fifo(AVAudioFifo **fifo, AVCodecContext *output_codec_context)
 {
-    /** Create the FIFO buffer based on the specified output sample format. */
+    /* Create the FIFO buffer based on the specified output sample format. */
     if (!(*fifo = av_audio_fifo_alloc(output_codec_context->sample_fmt,
                                       output_codec_context->channels, 1))) {
         fprintf(stderr, "Could not allocate FIFO\n");
@@ -307,7 +336,11 @@
     return 0;
 }
 
-/** Write the header of the output file container. */
+/**
+ * Write the header of the output file container.
+ * @param output_format_context Format context of the output file
+ * @return Error code (0 if successful)
+ */
 static int write_output_file_header(AVFormatContext *output_format_context)
 {
     int error;
@@ -319,20 +352,32 @@
     return 0;
 }
 
-/** Decode one audio frame from the input file. */
+/**
+ * Decode one audio frame from the input file.
+ * @param      frame                Audio frame to be decoded
+ * @param      input_format_context Format context of the input file
+ * @param      input_codec_context  Codec context of the input file
+ * @param[out] data_present         Indicates whether data has been decoded
+ * @param[out] finished             Indicates whether the end of file has
+ *                                  been reached and all data has been
+ *                                  decoded. If this flag is false, there
+ *                                  is more data to be decoded, i.e., this
+ *                                  function has to be called again.
+ * @return Error code (0 if successful)
+ */
 static int decode_audio_frame(AVFrame *frame,
                               AVFormatContext *input_format_context,
                               AVCodecContext *input_codec_context,
                               int *data_present, int *finished)
 {
-    /** Packet used for temporary storage. */
+    /* Packet used for temporary storage. */
     AVPacket input_packet;
     int error;
     init_packet(&input_packet);
 
-    /** Read one audio frame from the input file into a temporary packet. */
+    /* Read one audio frame from the input file into a temporary packet. */
     if ((error = av_read_frame(input_format_context, &input_packet)) < 0) {
-        /** If we are at the end of the file, flush the decoder below. */
+        /* If we are at the end of the file, flush the decoder below. */
         if (error == AVERROR_EOF)
             *finished = 1;
         else {
@@ -342,34 +387,52 @@
         }
     }
 
-    /**
-     * Decode the audio frame stored in the temporary packet.
-     * The input audio stream decoder is used to do this.
-     * If we are at the end of the file, pass an empty packet to the decoder
-     * to flush it.
-     */
-    if ((error = avcodec_decode_audio4(input_codec_context, frame,
-                                       data_present, &input_packet)) < 0) {
-        fprintf(stderr, "Could not decode frame (error '%s')\n",
+    /* Send the audio frame stored in the temporary packet to the decoder.
+     * The input audio stream decoder is used to do this. */
+    if ((error = avcodec_send_packet(input_codec_context, &input_packet)) < 0) {
+        fprintf(stderr, "Could not send packet for decoding (error '%s')\n",
                 av_err2str(error));
-        av_packet_unref(&input_packet);
         return error;
     }
 
-    /**
-     * If the decoder has not been flushed completely, we are not finished,
-     * so that this function has to be called again.
-     */
-    if (*finished && *data_present)
-        *finished = 0;
+    /* Receive one frame from the decoder. */
+    error = avcodec_receive_frame(input_codec_context, frame);
+    /* If the decoder asks for more data to be able to decode a frame,
+     * return indicating that no data is present. */
+    if (error == AVERROR(EAGAIN)) {
+        error = 0;
+        goto cleanup;
+    /* If the end of the input file is reached, stop decoding. */
+    } else if (error == AVERROR_EOF) {
+        *finished = 1;
+        error = 0;
+        goto cleanup;
+    } else if (error < 0) {
+        fprintf(stderr, "Could not decode frame (error '%s')\n",
+                av_err2str(error));
+        goto cleanup;
+    /* Default case: Return decoded data. */
+    } else {
+        *data_present = 1;
+        goto cleanup;
+    }
+
+cleanup:
     av_packet_unref(&input_packet);
-    return 0;
+    return error;
 }
 
 /**
  * Initialize a temporary storage for the specified number of audio samples.
  * The conversion requires temporary storage due to the different format.
  * The number of audio samples to be allocated is specified in frame_size.
+ * @param[out] converted_input_samples Array of converted samples. The
+ *                                     dimensions are reference, channel
+ *                                     (for multi-channel audio), sample.
+ * @param      output_codec_context    Codec context of the output file
+ * @param      frame_size              Number of samples to be converted in
+ *                                     each round
+ * @return Error code (0 if successful)
  */
 static int init_converted_samples(uint8_t ***converted_input_samples,
                                   AVCodecContext *output_codec_context,
@@ -377,8 +440,7 @@
 {
     int error;
 
-    /**
-     * Allocate as many pointers as there are audio channels.
+    /* Allocate as many pointers as there are audio channels.
      * Each pointer will later point to the audio samples of the corresponding
      * channels (although it may be NULL for interleaved formats).
      */
@@ -388,10 +450,8 @@
         return AVERROR(ENOMEM);
     }
 
-    /**
-     * Allocate memory for the samples of all channels in one consecutive
-     * block for convenience.
-     */
+    /* Allocate memory for the samples of all channels in one consecutive
+     * block for convenience. */
     if ((error = av_samples_alloc(*converted_input_samples, NULL,
                                   output_codec_context->channels,
                                   frame_size,
@@ -408,8 +468,15 @@
 
 /**
  * Convert the input audio samples into the output sample format.
- * The conversion happens on a per-frame basis, the size of which is specified
- * by frame_size.
+ * The conversion happens on a per-frame basis, the size of which is
+ * specified by frame_size.
+ * @param      input_data       Samples to be decoded. The dimensions are
+ *                              channel (for multi-channel audio), sample.
+ * @param[out] converted_data   Converted samples. The dimensions are channel
+ *                              (for multi-channel audio), sample.
+ * @param      frame_size       Number of samples to be converted
+ * @param      resample_context Resample context for the conversion
+ * @return Error code (0 if successful)
  */
 static int convert_samples(const uint8_t **input_data,
                            uint8_t **converted_data, const int frame_size,
@@ -417,7 +484,7 @@
 {
     int error;
 
-    /** Convert the samples using the resampler. */
+    /* Convert the samples using the resampler. */
     if ((error = swr_convert(resample_context,
                              converted_data, frame_size,
                              input_data    , frame_size)) < 0) {
@@ -429,23 +496,28 @@
     return 0;
 }
 
-/** Add converted input audio samples to the FIFO buffer for later processing. */
+/**
+ * Add converted input audio samples to the FIFO buffer for later processing.
+ * @param fifo                    Buffer to add the samples to
+ * @param converted_input_samples Samples to be added. The dimensions are channel
+ *                                (for multi-channel audio), sample.
+ * @param frame_size              Number of samples to be converted
+ * @return Error code (0 if successful)
+ */
 static int add_samples_to_fifo(AVAudioFifo *fifo,
                                uint8_t **converted_input_samples,
                                const int frame_size)
 {
     int error;
 
-    /**
-     * Make the FIFO as large as it needs to be to hold both,
-     * the old and the new samples.
-     */
+    /* Make the FIFO as large as it needs to be to hold both,
+     * the old and the new samples. */
     if ((error = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + frame_size)) < 0) {
         fprintf(stderr, "Could not reallocate FIFO\n");
         return error;
     }
 
-    /** Store the new samples in the FIFO buffer. */
+    /* Store the new samples in the FIFO buffer. */
     if (av_audio_fifo_write(fifo, (void **)converted_input_samples,
                             frame_size) < frame_size) {
         fprintf(stderr, "Could not write data to FIFO\n");
@@ -455,8 +527,20 @@
 }
 
 /**
- * Read one audio frame from the input file, decodes, converts and stores
+ * Read one audio frame from the input file, decode, convert and store
  * it in the FIFO buffer.
+ * @param      fifo                 Buffer used for temporary storage
+ * @param      input_format_context Format context of the input file
+ * @param      input_codec_context  Codec context of the input file
+ * @param      output_codec_context Codec context of the output file
+ * @param      resampler_context    Resample context for the conversion
+ * @param[out] finished             Indicates whether the end of file has
+ *                                  been reached and all data has been
+ *                                  decoded. If this flag is false,
+ *                                  there is more data to be decoded,
+ *                                  i.e., this function has to be called
+ *                                  again.
+ * @return Error code (0 if successful)
  */
 static int read_decode_convert_and_store(AVAudioFifo *fifo,
                                          AVFormatContext *input_format_context,
@@ -465,45 +549,41 @@
                                          SwrContext *resampler_context,
                                          int *finished)
 {
-    /** Temporary storage of the input samples of the frame read from the file. */
+    /* Temporary storage of the input samples of the frame read from the file. */
     AVFrame *input_frame = NULL;
-    /** Temporary storage for the converted input samples. */
+    /* Temporary storage for the converted input samples. */
     uint8_t **converted_input_samples = NULL;
-    int data_present;
+    int data_present = 0;
     int ret = AVERROR_EXIT;
 
-    /** Initialize temporary storage for one input frame. */
+    /* Initialize temporary storage for one input frame. */
     if (init_input_frame(&input_frame))
         goto cleanup;
-    /** Decode one frame worth of audio samples. */
+    /* Decode one frame worth of audio samples. */
     if (decode_audio_frame(input_frame, input_format_context,
                            input_codec_context, &data_present, finished))
         goto cleanup;
-    /**
-     * If we are at the end of the file and there are no more samples
+    /* If we are at the end of the file and there are no more samples
      * in the decoder which are delayed, we are actually finished.
-     * This must not be treated as an error.
-     */
-    if (*finished && !data_present) {
+     * This must not be treated as an error. */
+    if (*finished) {
         ret = 0;
         goto cleanup;
     }
-    /** If there is decoded data, convert and store it */
+    /* If there is decoded data, convert and store it. */
     if (data_present) {
-        /** Initialize the temporary storage for the converted input samples. */
+        /* Initialize the temporary storage for the converted input samples. */
         if (init_converted_samples(&converted_input_samples, output_codec_context,
                                    input_frame->nb_samples))
             goto cleanup;
 
-        /**
-         * Convert the input samples to the desired output sample format.
-         * This requires a temporary storage provided by converted_input_samples.
-         */
+        /* Convert the input samples to the desired output sample format.
+         * This requires a temporary storage provided by converted_input_samples. */
         if (convert_samples((const uint8_t**)input_frame->extended_data, converted_input_samples,
                             input_frame->nb_samples, resampler_context))
             goto cleanup;
 
-        /** Add the converted input samples to the FIFO buffer for later processing. */
+        /* Add the converted input samples to the FIFO buffer for later processing. */
         if (add_samples_to_fifo(fifo, converted_input_samples,
                                 input_frame->nb_samples))
             goto cleanup;
@@ -524,6 +604,10 @@
 /**
  * Initialize one input frame for writing to the output file.
  * The frame will be exactly frame_size samples large.
+ * @param[out] frame                Frame to be initialized
+ * @param      output_codec_context Codec context of the output file
+ * @param      frame_size           Size of the frame
+ * @return Error code (0 if successful)
  */
 static int init_output_frame(AVFrame **frame,
                              AVCodecContext *output_codec_context,
@@ -531,28 +615,24 @@
 {
     int error;
 
-    /** Create a new frame to store the audio samples. */
+    /* Create a new frame to store the audio samples. */
     if (!(*frame = av_frame_alloc())) {
         fprintf(stderr, "Could not allocate output frame\n");
         return AVERROR_EXIT;
     }
 
-    /**
-     * Set the frame's parameters, especially its size and format.
+    /* Set the frame's parameters, especially its size and format.
      * av_frame_get_buffer needs this to allocate memory for the
      * audio samples of the frame.
      * Default channel layouts based on the number of channels
-     * are assumed for simplicity.
-     */
+     * are assumed for simplicity. */
     (*frame)->nb_samples     = frame_size;
     (*frame)->channel_layout = output_codec_context->channel_layout;
     (*frame)->format         = output_codec_context->sample_fmt;
     (*frame)->sample_rate    = output_codec_context->sample_rate;
 
-    /**
-     * Allocate the samples of the created frame. This call will make
-     * sure that the audio frame can hold as many samples as specified.
-     */
+    /* Allocate the samples of the created frame. This call will make
+     * sure that the audio frame can hold as many samples as specified. */
     if ((error = av_frame_get_buffer(*frame, 0)) < 0) {
         fprintf(stderr, "Could not allocate output frame samples (error '%s')\n",
                 av_err2str(error));
@@ -563,87 +643,114 @@
     return 0;
 }
 
-/** Global timestamp for the audio frames */
+/* Global timestamp for the audio frames. */
 static int64_t pts = 0;
 
-/** Encode one frame worth of audio to the output file. */
+/**
+ * Encode one frame worth of audio to the output file.
+ * @param      frame                 Samples to be encoded
+ * @param      output_format_context Format context of the output file
+ * @param      output_codec_context  Codec context of the output file
+ * @param[out] data_present          Indicates whether data has been
+ *                                   encoded
+ * @return Error code (0 if successful)
+ */
 static int encode_audio_frame(AVFrame *frame,
                               AVFormatContext *output_format_context,
                               AVCodecContext *output_codec_context,
                               int *data_present)
 {
-    /** Packet used for temporary storage. */
+    /* Packet used for temporary storage. */
     AVPacket output_packet;
     int error;
     init_packet(&output_packet);
 
-    /** Set a timestamp based on the sample rate for the container. */
+    /* Set a timestamp based on the sample rate for the container. */
     if (frame) {
         frame->pts = pts;
         pts += frame->nb_samples;
     }
 
-    /**
-     * Encode the audio frame and store it in the temporary packet.
-     * The output audio stream encoder is used to do this.
-     */
-    if ((error = avcodec_encode_audio2(output_codec_context, &output_packet,
-                                       frame, data_present)) < 0) {
-        fprintf(stderr, "Could not encode frame (error '%s')\n",
+    /* Send the audio frame stored in the temporary packet to the encoder.
+     * The output audio stream encoder is used to do this. */
+    error = avcodec_send_frame(output_codec_context, frame);
+    /* The encoder signals that it has nothing more to encode. */
+    if (error == AVERROR_EOF) {
+        error = 0;
+        goto cleanup;
+    } else if (error < 0) {
+        fprintf(stderr, "Could not send packet for encoding (error '%s')\n",
                 av_err2str(error));
-        av_packet_unref(&output_packet);
         return error;
     }
 
-    /** Write one audio frame from the temporary packet to the output file. */
-    if (*data_present) {
-        if ((error = av_write_frame(output_format_context, &output_packet)) < 0) {
-            fprintf(stderr, "Could not write frame (error '%s')\n",
-                    av_err2str(error));
-            av_packet_unref(&output_packet);
-            return error;
-        }
-
-        av_packet_unref(&output_packet);
+    /* Receive one encoded frame from the encoder. */
+    error = avcodec_receive_packet(output_codec_context, &output_packet);
+    /* If the encoder asks for more data to be able to provide an
+     * encoded frame, return indicating that no data is present. */
+    if (error == AVERROR(EAGAIN)) {
+        error = 0;
+        goto cleanup;
+    /* If the last frame has been encoded, stop encoding. */
+    } else if (error == AVERROR_EOF) {
+        error = 0;
+        goto cleanup;
+    } else if (error < 0) {
+        fprintf(stderr, "Could not encode frame (error '%s')\n",
+                av_err2str(error));
+        goto cleanup;
+    /* Default case: Return encoded data. */
+    } else {
+        *data_present = 1;
     }
 
-    return 0;
+    /* Write one audio frame from the temporary packet to the output file. */
+    if (*data_present &&
+        (error = av_write_frame(output_format_context, &output_packet)) < 0) {
+        fprintf(stderr, "Could not write frame (error '%s')\n",
+                av_err2str(error));
+        goto cleanup;
+    }
+
+cleanup:
+    av_packet_unref(&output_packet);
+    return error;
 }
 
 /**
  * Load one audio frame from the FIFO buffer, encode and write it to the
  * output file.
+ * @param fifo                  Buffer used for temporary storage
+ * @param output_format_context Format context of the output file
+ * @param output_codec_context  Codec context of the output file
+ * @return Error code (0 if successful)
  */
 static int load_encode_and_write(AVAudioFifo *fifo,
                                  AVFormatContext *output_format_context,
                                  AVCodecContext *output_codec_context)
 {
-    /** Temporary storage of the output samples of the frame written to the file. */
+    /* Temporary storage of the output samples of the frame written to the file. */
     AVFrame *output_frame;
-    /**
-     * Use the maximum number of possible samples per frame.
+    /* Use the maximum number of possible samples per frame.
      * If there is less than the maximum possible frame size in the FIFO
-     * buffer use this number. Otherwise, use the maximum possible frame size
-     */
+     * buffer use this number. Otherwise, use the maximum possible frame size. */
     const int frame_size = FFMIN(av_audio_fifo_size(fifo),
                                  output_codec_context->frame_size);
     int data_written;
 
-    /** Initialize temporary storage for one output frame. */
+    /* Initialize temporary storage for one output frame. */
     if (init_output_frame(&output_frame, output_codec_context, frame_size))
         return AVERROR_EXIT;
 
-    /**
-     * Read as many samples from the FIFO buffer as required to fill the frame.
-     * The samples are stored in the frame temporarily.
-     */
+    /* Read as many samples from the FIFO buffer as required to fill the frame.
+     * The samples are stored in the frame temporarily. */
     if (av_audio_fifo_read(fifo, (void **)output_frame->data, frame_size) < frame_size) {
         fprintf(stderr, "Could not read data from FIFO\n");
         av_frame_free(&output_frame);
         return AVERROR_EXIT;
     }
 
-    /** Encode one frame worth of audio samples. */
+    /* Encode one frame worth of audio samples. */
     if (encode_audio_frame(output_frame, output_format_context,
                            output_codec_context, &data_written)) {
         av_frame_free(&output_frame);
@@ -653,7 +760,11 @@
     return 0;
 }
 
-/** Write the trailer of the output file container. */
+/**
+ * Write the trailer of the output file container.
+ * @param output_format_context Format context of the output file
+ * @return Error code (0 if successful)
+ */
 static int write_output_file_trailer(AVFormatContext *output_format_context)
 {
     int error;
@@ -665,7 +776,6 @@
     return 0;
 }
 
-/** Convert an audio file to an AAC file in an MP4 container. */
 int main(int argc, char **argv)
 {
     AVFormatContext *input_format_context = NULL, *output_format_context = NULL;
@@ -674,90 +784,75 @@
     AVAudioFifo *fifo = NULL;
     int ret = AVERROR_EXIT;
 
-    if (argc < 3) {
+    if (argc != 3) {
         fprintf(stderr, "Usage: %s <input file> <output file>\n", argv[0]);
         exit(1);
     }
 
-    /** Register all codecs and formats so that they can be used. */
-    av_register_all();
-    /** Open the input file for reading. */
+    /* Open the input file for reading. */
     if (open_input_file(argv[1], &input_format_context,
                         &input_codec_context))
         goto cleanup;
-    /** Open the output file for writing. */
+    /* Open the output file for writing. */
     if (open_output_file(argv[2], input_codec_context,
                          &output_format_context, &output_codec_context))
         goto cleanup;
-    /** Initialize the resampler to be able to convert audio sample formats. */
+    /* Initialize the resampler to be able to convert audio sample formats. */
     if (init_resampler(input_codec_context, output_codec_context,
                        &resample_context))
         goto cleanup;
-    /** Initialize the FIFO buffer to store audio samples to be encoded. */
+    /* Initialize the FIFO buffer to store audio samples to be encoded. */
     if (init_fifo(&fifo, output_codec_context))
         goto cleanup;
-    /** Write the header of the output file container. */
+    /* Write the header of the output file container. */
     if (write_output_file_header(output_format_context))
         goto cleanup;
 
-    /**
-     * Loop as long as we have input samples to read or output samples
-     * to write; abort as soon as we have neither.
-     */
+    /* Loop as long as we have input samples to read or output samples
+     * to write; abort as soon as we have neither. */
     while (1) {
-        /** Use the encoder's desired frame size for processing. */
+        /* Use the encoder's desired frame size for processing. */
         const int output_frame_size = output_codec_context->frame_size;
         int finished                = 0;
 
-        /**
-         * Make sure that there is one frame worth of samples in the FIFO
+        /* Make sure that there is one frame worth of samples in the FIFO
          * buffer so that the encoder can do its work.
          * Since the decoder's and the encoder's frame size may differ, we
          * need to FIFO buffer to store as many frames worth of input samples
-         * that they make up at least one frame worth of output samples.
-         */
+         * that they make up at least one frame worth of output samples. */
         while (av_audio_fifo_size(fifo) < output_frame_size) {
-            /**
-             * Decode one frame worth of audio samples, convert it to the
-             * output sample format and put it into the FIFO buffer.
-             */
+            /* Decode one frame worth of audio samples, convert it to the
+             * output sample format and put it into the FIFO buffer. */
             if (read_decode_convert_and_store(fifo, input_format_context,
                                               input_codec_context,
                                               output_codec_context,
                                               resample_context, &finished))
                 goto cleanup;
 
-            /**
-             * If we are at the end of the input file, we continue
-             * encoding the remaining audio samples to the output file.
-             */
+            /* If we are at the end of the input file, we continue
+             * encoding the remaining audio samples to the output file. */
             if (finished)
                 break;
         }
 
-        /**
-         * If we have enough samples for the encoder, we encode them.
+        /* If we have enough samples for the encoder, we encode them.
          * At the end of the file, we pass the remaining samples to
-         * the encoder.
-         */
+         * the encoder. */
         while (av_audio_fifo_size(fifo) >= output_frame_size ||
                (finished && av_audio_fifo_size(fifo) > 0))
-            /**
-             * Take one frame worth of audio samples from the FIFO buffer,
-             * encode it and write it to the output file.
-             */
+            /* Take one frame worth of audio samples from the FIFO buffer,
+             * encode it and write it to the output file. */
             if (load_encode_and_write(fifo, output_format_context,
                                       output_codec_context))
                 goto cleanup;
 
-        /**
-         * If we are at the end of the input file and have encoded
-         * all remaining samples, we can exit this loop and finish.
-         */
+        /* If we are at the end of the input file and have encoded
+         * all remaining samples, we can exit this loop and finish. */
         if (finished) {
             int data_written;
-            /** Flush the encoder as it may have delayed frames. */
+            /* Flush the encoder as it may have delayed frames. */
             do {
+                data_written = 0;
                 if (encode_audio_frame(NULL, output_format_context,
                                        output_codec_context, &data_written))
                     goto cleanup;
@@ -766,7 +861,7 @@
         }
     }
 
-    /** Write the trailer of the output file container. */
+    /* Write the trailer of the output file container. */
     if (write_output_file_trailer(output_format_context))
         goto cleanup;
     ret = 0;

diff --git a/doc/examples/transcoding.c b/doc/examples/transcoding.c
index fb15a21..e48837c 100644
--- a/doc/examples/transcoding.c
+++ b/doc/examples/transcoding.c

@@ -30,7 +30,6 @@
 
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
-#include <libavfilter/avfiltergraph.h>
 #include <libavfilter/buffersink.h>
 #include <libavfilter/buffersrc.h>
 #include <libavutil/opt.h>
@@ -173,6 +172,9 @@
                 enc_ctx->time_base = (AVRational){1, enc_ctx->sample_rate};
             }
 
+            if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
+                enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+
             /* Third parameter can be used to pass settings to encoder */
             ret = avcodec_open2(enc_ctx, encoder, NULL);
             if (ret < 0) {
@@ -184,8 +186,6 @@
                 av_log(NULL, AV_LOG_ERROR, "Failed to copy encoder parameters to output stream #%u\n", i);
                 return ret;
             }
-            if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
-                enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 
             out_stream->time_base = enc_ctx->time_base;
             stream_ctx[i].enc_ctx = enc_ctx;
@@ -228,8 +228,8 @@
 {
     char args[512];
     int ret = 0;
-    AVFilter *buffersrc = NULL;
-    AVFilter *buffersink = NULL;
+    const AVFilter *buffersrc = NULL;
+    const AVFilter *buffersink = NULL;
     AVFilterContext *buffersrc_ctx = NULL;
     AVFilterContext *buffersink_ctx = NULL;
     AVFilterInOut *outputs = avfilter_inout_alloc();
@@ -518,9 +518,6 @@
         return 1;
     }
 
-    av_register_all();
-    avfilter_register_all();
-
     if ((ret = open_input_file(argv[1])) < 0)
         goto end;
     if ((ret = open_output_file(argv[2])) < 0)

diff --git a/doc/examples/vaapi_encode.c b/doc/examples/vaapi_encode.c
new file mode 100644
index 0000000..98fd5d3
--- /dev/null
+++ b/doc/examples/vaapi_encode.c

@@ -0,0 +1,224 @@
+/*
+ * Video Acceleration API (video encoding) encode sample
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/**
+ * @file
+ * Intel VAAPI-accelerated encoding example.
+ *
+ * @example vaapi_encode.c
+ * This example shows how to do VAAPI-accelerated encoding. now only support NV12
+ * raw file, usage like: vaapi_encode 1920 1080 input.yuv output.h264
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include <libavcodec/avcodec.h>
+#include <libavutil/pixdesc.h>
+#include <libavutil/hwcontext.h>
+
+static int width, height;
+static AVBufferRef *hw_device_ctx = NULL;
+
+static int set_hwframe_ctx(AVCodecContext *ctx, AVBufferRef *hw_device_ctx)
+{
+    AVBufferRef *hw_frames_ref;
+    AVHWFramesContext *frames_ctx = NULL;
+    int err = 0;
+
+    if (!(hw_frames_ref = av_hwframe_ctx_alloc(hw_device_ctx))) {
+        fprintf(stderr, "Failed to create VAAPI frame context.\n");
+        return -1;
+    }
+    frames_ctx = (AVHWFramesContext *)(hw_frames_ref->data);
+    frames_ctx->format    = AV_PIX_FMT_VAAPI;
+    frames_ctx->sw_format = AV_PIX_FMT_NV12;
+    frames_ctx->width     = width;
+    frames_ctx->height    = height;
+    frames_ctx->initial_pool_size = 20;
+    if ((err = av_hwframe_ctx_init(hw_frames_ref)) < 0) {
+        fprintf(stderr, "Failed to initialize VAAPI frame context."
+                "Error code: %s\n",av_err2str(err));
+        av_buffer_unref(&hw_frames_ref);
+        return err;
+    }
+    ctx->hw_frames_ctx = av_buffer_ref(hw_frames_ref);
+    if (!ctx->hw_frames_ctx)
+        err = AVERROR(ENOMEM);
+
+    av_buffer_unref(&hw_frames_ref);
+    return err;
+}
+
+static int encode_write(AVCodecContext *avctx, AVFrame *frame, FILE *fout)
+{
+    int ret = 0;
+    AVPacket enc_pkt;
+
+    av_init_packet(&enc_pkt);
+    enc_pkt.data = NULL;
+    enc_pkt.size = 0;
+
+    if ((ret = avcodec_send_frame(avctx, frame)) < 0) {
+        fprintf(stderr, "Error code: %s\n", av_err2str(ret));
+        goto end;
+    }
+    while (1) {
+        ret = avcodec_receive_packet(avctx, &enc_pkt);
+        if (ret)
+            break;
+
+        enc_pkt.stream_index = 0;
+        ret = fwrite(enc_pkt.data, enc_pkt.size, 1, fout);
+        av_packet_unref(&enc_pkt);
+    }
+
+end:
+    ret = ((ret == AVERROR(EAGAIN)) ? 0 : -1);
+    return ret;
+}
+
+int main(int argc, char *argv[])
+{
+    int size, err;
+    FILE *fin = NULL, *fout = NULL;
+    AVFrame *sw_frame = NULL, *hw_frame = NULL;
+    AVCodecContext *avctx = NULL;
+    AVCodec *codec = NULL;
+    const char *enc_name = "h264_vaapi";
+
+    if (argc < 5) {
+        fprintf(stderr, "Usage: %s <width> <height> <input file> <output file>\n", argv[0]);
+        return -1;
+    }
+
+    width  = atoi(argv[1]);
+    height = atoi(argv[2]);
+    size   = width * height;
+
+    if (!(fin = fopen(argv[3], "r"))) {
+        fprintf(stderr, "Fail to open input file : %s\n", strerror(errno));
+        return -1;
+    }
+    if (!(fout = fopen(argv[4], "w+b"))) {
+        fprintf(stderr, "Fail to open output file : %s\n", strerror(errno));
+        err = -1;
+        goto close;
+    }
+
+    err = av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_VAAPI,
+                                 NULL, NULL, 0);
+    if (err < 0) {
+        fprintf(stderr, "Failed to create a VAAPI device. Error code: %s\n", av_err2str(err));
+        goto close;
+    }
+
+    if (!(codec = avcodec_find_encoder_by_name(enc_name))) {
+        fprintf(stderr, "Could not find encoder.\n");
+        err = -1;
+        goto close;
+    }
+
+    if (!(avctx = avcodec_alloc_context3(codec))) {
+        err = AVERROR(ENOMEM);
+        goto close;
+    }
+
+    avctx->width     = width;
+    avctx->height    = height;
+    avctx->time_base = (AVRational){1, 25};
+    avctx->framerate = (AVRational){25, 1};
+    avctx->sample_aspect_ratio = (AVRational){1, 1};
+    avctx->pix_fmt   = AV_PIX_FMT_VAAPI;
+
+    /* set hw_frames_ctx for encoder's AVCodecContext */
+    if ((err = set_hwframe_ctx(avctx, hw_device_ctx)) < 0) {
+        fprintf(stderr, "Failed to set hwframe context.\n");
+        goto close;
+    }
+
+    if ((err = avcodec_open2(avctx, codec, NULL)) < 0) {
+        fprintf(stderr, "Cannot open video encoder codec. Error code: %s\n", av_err2str(err));
+        goto close;
+    }
+
+    while (1) {
+        if (!(sw_frame = av_frame_alloc())) {
+            err = AVERROR(ENOMEM);
+            goto close;
+        }
+        /* read data into software frame, and transfer them into hw frame */
+        sw_frame->width  = width;
+        sw_frame->height = height;
+        sw_frame->format = AV_PIX_FMT_NV12;
+        if ((err = av_frame_get_buffer(sw_frame, 32)) < 0)
+            goto close;
+        if ((err = fread((uint8_t*)(sw_frame->data[0]), size, 1, fin)) <= 0)
+            break;
+        if ((err = fread((uint8_t*)(sw_frame->data[1]), size/2, 1, fin)) <= 0)
+            break;
+
+        if (!(hw_frame = av_frame_alloc())) {
+            err = AVERROR(ENOMEM);
+            goto close;
+        }
+        if ((err = av_hwframe_get_buffer(avctx->hw_frames_ctx, hw_frame, 0)) < 0) {
+            fprintf(stderr, "Error code: %s.\n", av_err2str(err));
+            goto close;
+        }
+        if (!hw_frame->hw_frames_ctx) {
+            err = AVERROR(ENOMEM);
+            goto close;
+        }
+        if ((err = av_hwframe_transfer_data(hw_frame, sw_frame, 0)) < 0) {
+            fprintf(stderr, "Error while transferring frame data to surface."
+                    "Error code: %s.\n", av_err2str(err));
+            goto close;
+        }
+
+        if ((err = (encode_write(avctx, hw_frame, fout))) < 0) {
+            fprintf(stderr, "Failed to encode.\n");
+            goto close;
+        }
+        av_frame_free(&hw_frame);
+        av_frame_free(&sw_frame);
+    }
+
+    /* flush encoder */
+    err = encode_write(avctx, NULL, fout);
+    if (err == AVERROR_EOF)
+        err = 0;
+
+close:
+    if (fin)
+        fclose(fin);
+    if (fout)
+        fclose(fout);
+    av_frame_free(&sw_frame);
+    av_frame_free(&hw_frame);
+    avcodec_free_context(&avctx);
+    av_buffer_unref(&hw_device_ctx);
+
+    return err;
+}

diff --git a/doc/examples/vaapi_transcode.c b/doc/examples/vaapi_transcode.c
new file mode 100644
index 0000000..279d20f
--- /dev/null
+++ b/doc/examples/vaapi_transcode.c

@@ -0,0 +1,306 @@
+/*
+ * Video Acceleration API (video transcoding) transcode sample
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/**
+ * @file
+ * Intel VAAPI-accelerated transcoding example.
+ *
+ * @example vaapi_transcode.c
+ * This example shows how to do VAAPI-accelerated transcoding.
+ * Usage: vaapi_transcode input_stream codec output_stream
+ * e.g: - vaapi_transcode input.mp4 h264_vaapi output_h264.mp4
+ *      - vaapi_transcode input.mp4 vp9_vaapi output_vp9.ivf
+ */
+
+#include <stdio.h>
+#include <errno.h>
+
+#include <libavutil/hwcontext.h>
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+
+static AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx = NULL;
+static AVBufferRef *hw_device_ctx = NULL;
+static AVCodecContext *decoder_ctx = NULL, *encoder_ctx = NULL;
+static int video_stream = -1;
+static AVStream *ost;
+static int initialized = 0;
+
+static enum AVPixelFormat get_vaapi_format(AVCodecContext *ctx,
+                                           const enum AVPixelFormat *pix_fmts)
+{
+    const enum AVPixelFormat *p;
+
+    for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
+        if (*p == AV_PIX_FMT_VAAPI)
+            return *p;
+    }
+
+    fprintf(stderr, "Unable to decode this file using VA-API.\n");
+    return AV_PIX_FMT_NONE;
+}
+
+static int open_input_file(const char *filename)
+{
+    int ret;
+    AVCodec *decoder = NULL;
+    AVStream *video = NULL;
+
+    if ((ret = avformat_open_input(&ifmt_ctx, filename, NULL, NULL)) < 0) {
+        fprintf(stderr, "Cannot open input file '%s', Error code: %s\n",
+                filename, av_err2str(ret));
+        return ret;
+    }
+
+    if ((ret = avformat_find_stream_info(ifmt_ctx, NULL)) < 0) {
+        fprintf(stderr, "Cannot find input stream information. Error code: %s\n",
+                av_err2str(ret));
+        return ret;
+    }
+
+    ret = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &decoder, 0);
+    if (ret < 0) {
+        fprintf(stderr, "Cannot find a video stream in the input file. "
+                "Error code: %s\n", av_err2str(ret));
+        return ret;
+    }
+    video_stream = ret;
+
+    if (!(decoder_ctx = avcodec_alloc_context3(decoder)))
+        return AVERROR(ENOMEM);
+
+    video = ifmt_ctx->streams[video_stream];
+    if ((ret = avcodec_parameters_to_context(decoder_ctx, video->codecpar)) < 0) {
+        fprintf(stderr, "avcodec_parameters_to_context error. Error code: %s\n",
+                av_err2str(ret));
+        return ret;
+    }
+
+    decoder_ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
+    if (!decoder_ctx->hw_device_ctx) {
+        fprintf(stderr, "A hardware device reference create failed.\n");
+        return AVERROR(ENOMEM);
+    }
+    decoder_ctx->get_format    = get_vaapi_format;
+
+    if ((ret = avcodec_open2(decoder_ctx, decoder, NULL)) < 0)
+        fprintf(stderr, "Failed to open codec for decoding. Error code: %s\n",
+                av_err2str(ret));
+
+    return ret;
+}
+
+static int encode_write(AVFrame *frame)
+{
+    int ret = 0;
+    AVPacket enc_pkt;
+
+    av_init_packet(&enc_pkt);
+    enc_pkt.data = NULL;
+    enc_pkt.size = 0;
+
+    if ((ret = avcodec_send_frame(encoder_ctx, frame)) < 0) {
+        fprintf(stderr, "Error during encoding. Error code: %s\n", av_err2str(ret));
+        goto end;
+    }
+    while (1) {
+        ret = avcodec_receive_packet(encoder_ctx, &enc_pkt);
+        if (ret)
+            break;
+
+        enc_pkt.stream_index = 0;
+        av_packet_rescale_ts(&enc_pkt, ifmt_ctx->streams[video_stream]->time_base,
+                             ofmt_ctx->streams[0]->time_base);
+        ret = av_interleaved_write_frame(ofmt_ctx, &enc_pkt);
+        if (ret < 0) {
+            fprintf(stderr, "Error during writing data to output file. "
+                    "Error code: %s\n", av_err2str(ret));
+            return -1;
+        }
+    }
+
+end:
+    if (ret == AVERROR_EOF)
+        return 0;
+    ret = ((ret == AVERROR(EAGAIN)) ? 0:-1);
+    return ret;
+}
+
+static int dec_enc(AVPacket *pkt, AVCodec *enc_codec)
+{
+    AVFrame *frame;
+    int ret = 0;
+
+    ret = avcodec_send_packet(decoder_ctx, pkt);
+    if (ret < 0) {
+        fprintf(stderr, "Error during decoding. Error code: %s\n", av_err2str(ret));
+        return ret;
+    }
+
+    while (ret >= 0) {
+        if (!(frame = av_frame_alloc()))
+            return AVERROR(ENOMEM);
+
+        ret = avcodec_receive_frame(decoder_ctx, frame);
+        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
+            av_frame_free(&frame);
+            return 0;
+        } else if (ret < 0) {
+            fprintf(stderr, "Error while decoding. Error code: %s\n", av_err2str(ret));
+            goto fail;
+        }
+
+        if (!initialized) {
+            /* we need to ref hw_frames_ctx of decoder to initialize encoder's codec.
+               Only after we get a decoded frame, can we obtain its hw_frames_ctx */
+            encoder_ctx->hw_frames_ctx = av_buffer_ref(decoder_ctx->hw_frames_ctx);
+            if (!encoder_ctx->hw_frames_ctx) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+            /* set AVCodecContext Parameters for encoder, here we keep them stay
+             * the same as decoder.
+             * xxx: now the sample can't handle resolution change case.
+             */
+            encoder_ctx->time_base = av_inv_q(decoder_ctx->framerate);
+            encoder_ctx->pix_fmt   = AV_PIX_FMT_VAAPI;
+            encoder_ctx->width     = decoder_ctx->width;
+            encoder_ctx->height    = decoder_ctx->height;
+
+            if ((ret = avcodec_open2(encoder_ctx, enc_codec, NULL)) < 0) {
+                fprintf(stderr, "Failed to open encode codec. Error code: %s\n",
+                        av_err2str(ret));
+                goto fail;
+            }
+
+            if (!(ost = avformat_new_stream(ofmt_ctx, enc_codec))) {
+                fprintf(stderr, "Failed to allocate stream for output format.\n");
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+
+            ost->time_base = encoder_ctx->time_base;
+            ret = avcodec_parameters_from_context(ost->codecpar, encoder_ctx);
+            if (ret < 0) {
+                fprintf(stderr, "Failed to copy the stream parameters. "
+                        "Error code: %s\n", av_err2str(ret));
+                goto fail;
+            }
+
+            /* write the stream header */
+            if ((ret = avformat_write_header(ofmt_ctx, NULL)) < 0) {
+                fprintf(stderr, "Error while writing stream header. "
+                        "Error code: %s\n", av_err2str(ret));
+                goto fail;
+            }
+
+            initialized = 1;
+        }
+
+        if ((ret = encode_write(frame)) < 0)
+            fprintf(stderr, "Error during encoding and writing.\n");
+
+fail:
+        av_frame_free(&frame);
+        if (ret < 0)
+            return ret;
+    }
+    return 0;
+}
+
+int main(int argc, char **argv)
+{
+    int ret = 0;
+    AVPacket dec_pkt;
+    AVCodec *enc_codec;
+
+    if (argc != 4) {
+        fprintf(stderr, "Usage: %s <input file> <encode codec> <output file>\n"
+                "The output format is guessed according to the file extension.\n"
+                "\n", argv[0]);
+        return -1;
+    }
+
+    ret = av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_VAAPI, NULL, NULL, 0);
+    if (ret < 0) {
+        fprintf(stderr, "Failed to create a VAAPI device. Error code: %s\n", av_err2str(ret));
+        return -1;
+    }
+
+    if ((ret = open_input_file(argv[1])) < 0)
+        goto end;
+
+    if (!(enc_codec = avcodec_find_encoder_by_name(argv[2]))) {
+        fprintf(stderr, "Could not find encoder '%s'\n", argv[2]);
+        ret = -1;
+        goto end;
+    }
+
+    if ((ret = (avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, argv[3]))) < 0) {
+        fprintf(stderr, "Failed to deduce output format from file extension. Error code: "
+                "%s\n", av_err2str(ret));
+        goto end;
+    }
+
+    if (!(encoder_ctx = avcodec_alloc_context3(enc_codec))) {
+        ret = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    ret = avio_open(&ofmt_ctx->pb, argv[3], AVIO_FLAG_WRITE);
+    if (ret < 0) {
+        fprintf(stderr, "Cannot open output file. "
+                "Error code: %s\n", av_err2str(ret));
+        goto end;
+    }
+
+    /* read all packets and only transcoding video */
+    while (ret >= 0) {
+        if ((ret = av_read_frame(ifmt_ctx, &dec_pkt)) < 0)
+            break;
+
+        if (video_stream == dec_pkt.stream_index)
+            ret = dec_enc(&dec_pkt, enc_codec);
+
+        av_packet_unref(&dec_pkt);
+    }
+
+    /* flush decoder */
+    dec_pkt.data = NULL;
+    dec_pkt.size = 0;
+    ret = dec_enc(&dec_pkt, enc_codec);
+    av_packet_unref(&dec_pkt);
+
+    /* flush encoder */
+    ret = encode_write(NULL);
+
+    /* write the trailer for output stream */
+    av_write_trailer(ofmt_ctx);
+
+end:
+    avformat_close_input(&ifmt_ctx);
+    avformat_close_input(&ofmt_ctx);
+    avcodec_free_context(&decoder_ctx);
+    avcodec_free_context(&encoder_ctx);
+    av_buffer_unref(&hw_device_ctx);
+    return ret;
+}

diff --git a/doc/faq.texi b/doc/faq.texi
index dcaf89c..73624c6 100644
--- a/doc/faq.texi
+++ b/doc/faq.texi

@@ -501,6 +501,71 @@
 ffmpeg -i ega_screen.nut -aspect 4/3 -c copy ega_screen_overridden.nut
 @end example
 
+@anchor{background task}
+@section How do I run ffmpeg as a background task?
+
+ffmpeg normally checks the console input, for entries like "q" to stop
+and "?" to give help, while performing operations. ffmpeg does not have a way of
+detecting when it is running as a background task.
+When it checks the console input, that can cause the process running ffmpeg
+in the background to suspend.
+
+To prevent those input checks, allowing ffmpeg to run as a background task,
+use the @url{ffmpeg.html#stdin-option, @code{-nostdin} option}
+in the ffmpeg invocation. This is effective whether you run ffmpeg in a shell
+or invoke ffmpeg in its own process via an operating system API.
+
+As an alternative, when you are running ffmpeg in a shell, you can redirect
+standard input to @code{/dev/null} (on Linux and Mac OS)
+or @code{NUL} (on Windows). You can do this redirect either
+on the ffmpeg invocation, or from a shell script which calls ffmpeg.
+
+For example:
+
+@example
+ffmpeg -nostdin -i INPUT OUTPUT
+@end example
+
+or (on Linux, Mac OS, and other UNIX-like shells):
+
+@example
+ffmpeg -i INPUT OUTPUT </dev/null
+@end example
+
+or (on Windows):
+
+@example
+ffmpeg -i INPUT OUTPUT <NUL
+@end example
+
+@section How do I prevent ffmpeg from suspending with a message like @emph{suspended (tty output)}?
+
+If you run ffmpeg in the background, you may find that its process suspends.
+There may be a message like @emph{suspended (tty output)}. The question is how
+to prevent the process from being suspended.
+
+For example:
+
+@example
+% ffmpeg -i INPUT OUTPUT &> ~/tmp/log.txt &
+[1] 93352
+%
+[1]  + suspended (tty output)  ffmpeg -i INPUT OUTPUT &>
+@end example
+
+The message "tty output" notwithstanding, the problem here is that
+ffmpeg normally checks the console input when it runs. The operating system
+detects this, and suspends the process until you can bring it to the
+foreground and attend to it.
+
+The solution is to use the right techniques to tell ffmpeg not to consult
+console input. You can use the
+@url{ffmpeg.html#stdin-option, @code{-nostdin} option},
+or redirect standard input with @code{< /dev/null}.
+See FAQ
+@ref{background task, @emph{How do I run ffmpeg as a background task?}}
+for details.
+
 @chapter Development
 
 @section Are there examples illustrating how to use the FFmpeg libraries, particularly libavcodec and libavformat?

diff --git a/doc/fate.texi b/doc/fate.texi
index 7a96c25..a352994 100644
--- a/doc/fate.texi
+++ b/doc/fate.texi

@@ -147,6 +147,26 @@
 The only thing left is to automate the execution of the fate.sh script and
 the synchronisation of the samples directory.
 
+@chapter Uploading new samples to the fate suite
+
+This is for developers who have an account on the fate suite server.
+If you upload new samples, please make sure they are as small as possible,
+space on each client, network bandwidth and so on benefit from smaller test cases.
+Also keep in mind older checkouts use existing sample files, that means in
+practice generally do not replace, remove or overwrite files as it likely would
+break older checkouts or releases.
+
+@example
+#First update your local samples copy:
+rsync -vauL --chmod=Dg+s,Duo+x,ug+rw,o+r,o-w,+X fate-suite.ffmpeg.org:/home/samples/fate-suite/ ~/fate-suite
+
+#Then do a dry run checking what would be uploaded:
+rsync -vanL --no-g --chmod=Dg+s,Duo+x,ug+rw,o+r,o-w,+X ~/fate-suite/ fate-suite.ffmpeg.org:/home/samples/fate-suite
+
+#Upload the files:
+rsync -vaL  --no-g --chmod=Dg+s,Duo+x,ug+rw,o+r,o-w,+X ~/fate-suite/ fate-suite.ffmpeg.org:/home/samples/fate-suite
+@end example
+
 
 @chapter FATE makefile targets and variables
 
@@ -202,6 +222,11 @@
 Specify which hardware acceleration to use while running regression tests,
 by default @samp{none} is used.
 
+@item KEEP
+Set to @samp{1} to keep temp files generated by fate test(s) when test is successful.
+Default is @samp{0}, which removes these files. Files are always kept when a test
+fails.
+
 @end table
 
 @section Examples

diff --git a/doc/ffmpeg-bitstream-filters.texi b/doc/ffmpeg-bitstream-filters.texi
index bbde257..63c0b33 100644
--- a/doc/ffmpeg-bitstream-filters.texi
+++ b/doc/ffmpeg-bitstream-filters.texi

@@ -26,12 +26,12 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{libavcodec.html,libavcodec}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1), libavcodec(3)
+ffmpeg(1), ffplay(1), ffprobe(1), libavcodec(3)
 @end ifnothtml
 
 @include authors.texi

diff --git a/doc/ffmpeg-codecs.texi b/doc/ffmpeg-codecs.texi
index 7df4391..e46cd3a 100644
--- a/doc/ffmpeg-codecs.texi
+++ b/doc/ffmpeg-codecs.texi

@@ -23,12 +23,12 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{libavcodec.html,libavcodec}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1), libavcodec(3)
+ffmpeg(1), ffplay(1), ffprobe(1), libavcodec(3)
 @end ifnothtml
 
 @include authors.texi

diff --git a/doc/ffmpeg-devices.texi b/doc/ffmpeg-devices.texi
index 721c0df..a51de00 100644
--- a/doc/ffmpeg-devices.texi
+++ b/doc/ffmpeg-devices.texi

@@ -23,12 +23,12 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{libavdevice.html,libavdevice}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1), libavdevice(3)
+ffmpeg(1), ffplay(1), ffprobe(1), libavdevice(3)
 @end ifnothtml
 
 @include authors.texi

diff --git a/doc/ffmpeg-filters.texi b/doc/ffmpeg-filters.texi
index b643f2c..a8ababa 100644
--- a/doc/ffmpeg-filters.texi
+++ b/doc/ffmpeg-filters.texi

@@ -23,12 +23,12 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{libavfilter.html,libavfilter}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1), libavfilter(3)
+ffmpeg(1), ffplay(1), ffprobe(1), libavfilter(3)
 @end ifnothtml
 
 @include authors.texi

diff --git a/doc/ffmpeg-formats.texi b/doc/ffmpeg-formats.texi
index d916ee8..a1917a7 100644
--- a/doc/ffmpeg-formats.texi
+++ b/doc/ffmpeg-formats.texi

@@ -23,12 +23,12 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{libavformat.html,libavformat}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1), libavformat(3)
+ffmpeg(1), ffplay(1), ffprobe(1), libavformat(3)
 @end ifnothtml
 
 @include authors.texi

diff --git a/doc/ffmpeg-protocols.texi b/doc/ffmpeg-protocols.texi
index f3a09f6..7f0e62a 100644
--- a/doc/ffmpeg-protocols.texi
+++ b/doc/ffmpeg-protocols.texi

@@ -23,12 +23,12 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{libavformat.html,libavformat}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1), libavformat(3)
+ffmpeg(1), ffplay(1), ffprobe(1), libavformat(3)
 @end ifnothtml
 
 @include authors.texi

diff --git a/doc/ffmpeg-resampler.texi b/doc/ffmpeg-resampler.texi
index be3784f..0e9a3dd 100644
--- a/doc/ffmpeg-resampler.texi
+++ b/doc/ffmpeg-resampler.texi

@@ -25,12 +25,12 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{libswresample.html,libswresample}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1), libswresample(3)
+ffmpeg(1), ffplay(1), ffprobe(1), libswresample(3)
 @end ifnothtml
 
 @include authors.texi

diff --git a/doc/ffmpeg-scaler.texi b/doc/ffmpeg-scaler.texi
index 9ab12a1..d259da9 100644
--- a/doc/ffmpeg-scaler.texi
+++ b/doc/ffmpeg-scaler.texi

@@ -24,12 +24,12 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{libswscale.html,libswscale}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1), libswscale(3)
+ffmpeg(1), ffplay(1), ffprobe(1), libswscale(3)
 @end ifnothtml
 
 @include authors.texi

diff --git a/doc/ffmpeg-utils.texi b/doc/ffmpeg-utils.texi
index e39cfa8..d8e0884 100644
--- a/doc/ffmpeg-utils.texi
+++ b/doc/ffmpeg-utils.texi

@@ -23,12 +23,12 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{libavutil.html,libavutil}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1), libavutil(3)
+ffmpeg(1), ffplay(1), ffprobe(1), libavutil(3)
 @end ifnothtml
 
 @include authors.texi

diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index 0405d00..3717f22 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi

@@ -216,16 +216,208 @@
 @chapter Stream selection
 @c man begin STREAM SELECTION
 
-By default, @command{ffmpeg} includes only one stream of each type (video, audio, subtitle)
-present in the input files and adds them to each output file.  It picks the
-"best" of each based upon the following criteria: for video, it is the stream
-with the highest resolution, for audio, it is the stream with the most channels, for
-subtitles, it is the first subtitle stream. In the case where several streams of
-the same type rate equally, the stream with the lowest index is chosen.
+@command{ffmpeg} provides the @code{-map} option for manual control of stream selection in each
+output file. Users can skip @code{-map} and let ffmpeg perform automatic stream selection as
+described below. The @code{-vn / -an / -sn / -dn} options can be used to skip inclusion of
+video, audio, subtitle and data streams respectively, whether manually mapped or automatically
+selected, except for those streams which are outputs of complex filtergraphs.
 
-You can disable some of those defaults by using the @code{-vn/-an/-sn/-dn} options. For
-full manual control, use the @code{-map} option, which disables the defaults just
-described.
+@section Description
+The sub-sections that follow describe the various rules that are involved in stream selection.
+The examples that follow next show how these rules are applied in practice.
+
+While every effort is made to accurately reflect the behavior of the program, FFmpeg is under
+continuous development and the code may have changed since the time of this writing.
+
+@subsection Automatic stream selection
+
+In the absence of any map options for a particular output file, ffmpeg inspects the output
+format to check which type of streams can be included in it, viz. video, audio and/or
+subtitles. For each acceptable stream type, ffmpeg will pick one stream, when available,
+from among all the inputs.
+
+It will select that stream based upon the following criteria:
+@itemize
+@item
+for video, it is the stream with the highest resolution,
+@item
+for audio, it is the stream with the most channels,
+@item
+for subtitles, it is the first subtitle stream found but there's a caveat.
+The output format's default subtitle encoder can be either text-based or image-based,
+and only a subtitle stream of the same type will be chosen.
+@end itemize
+
+In the case where several streams of the same type rate equally, the stream with the lowest
+index is chosen.
+
+Data or attachment streams are not automatically selected and can only be included
+using @code{-map}.
+@subsection Manual stream selection
+
+When @code{-map} is used, only user-mapped streams are included in that output file,
+with one possible exception for filtergraph outputs described below.
+
+@subsection Complex filtergraphs
+
+If there are any complex filtergraph output streams with unlabeled pads, they will be added
+to the first output file. This will lead to a fatal error if the stream type is not supported
+by the output format. In the absence of the map option, the inclusion of these streams leads
+to the automatic stream selection of their types being skipped. If map options are present,
+these filtergraph streams are included in addition to the mapped streams.
+
+Complex filtergraph output streams with labeled pads must be mapped once and exactly once.
+
+@subsection Stream handling
+
+Stream handling is independent of stream selection, with an exception for subtitles described
+below. Stream handling is set via the @code{-codec} option addressed to streams within a
+specific @emph{output} file. In particular, codec options are applied by ffmpeg after the
+stream selection process and thus do not influence the latter. If no @code{-codec} option is
+specified for a stream type, ffmpeg will select the default encoder registered by the output
+file muxer.
+
+An exception exists for subtitles. If a subtitle encoder is specified for an output file, the
+first subtitle stream found of any type, text or image, will be included. ffmpeg does not validate
+if the specified encoder can convert the selected stream or if the converted stream is acceptable
+within the output format. This applies generally as well: when the user sets an encoder manually,
+the stream selection process cannot check if the encoded stream can be muxed into the output file.
+If it cannot, ffmpeg will abort and @emph{all} output files will fail to be processed.
+
+@section Examples
+
+The following examples illustrate the behavior, quirks and limitations of ffmpeg's stream
+selection methods.
+
+They assume the following three input files.
+
+@verbatim
+
+input file 'A.avi'
+      stream 0: video 640x360
+      stream 1: audio 2 channels
+
+input file 'B.mp4'
+      stream 0: video 1920x1080
+      stream 1: audio 2 channels
+      stream 2: subtitles (text)
+      stream 3: audio 5.1 channels
+      stream 4: subtitles (text)
+
+input file 'C.mkv'
+      stream 0: video 1280x720
+      stream 1: audio 2 channels
+      stream 2: subtitles (image)
+@end verbatim
+
+@subsubheading Example: automatic stream selection
+@example
+ffmpeg -i A.avi -i B.mp4 out1.mkv out2.wav -map 1:a -c:a copy out3.mov
+@end example
+There are three output files specified, and for the first two, no @code{-map} options
+are set, so ffmpeg will select streams for these two files automatically.
+
+@file{out1.mkv} is a Matroska container file and accepts video, audio and subtitle streams,
+so ffmpeg will try to select one of each type.@*
+For video, it will select @code{stream 0} from @file{B.mp4}, which has the highest
+resolution among all the input video streams.@*
+For audio, it will select @code{stream 3} from @file{B.mp4}, since it has the greatest
+number of channels.@*
+For subtitles, it will select @code{stream 2} from @file{B.mp4}, which is the first subtitle
+stream from among @file{A.avi} and @file{B.mp4}.
+
+@file{out2.wav} accepts only audio streams, so only @code{stream 3} from @file{B.mp4} is
+selected.
+
+For @file{out3.mov}, since a @code{-map} option is set, no automatic stream selection will
+occur. The @code{-map 1:a} option will select all audio streams from the second input
+@file{B.mp4}. No other streams will be included in this output file.
+
+For the first two outputs, all included streams will be transcoded. The encoders chosen will
+be the default ones registered by each output format, which may not match the codec of the
+selected input streams.
+
+For the third output, codec option for audio streams has been set
+to @code{copy}, so no decoding-filtering-encoding operations will occur, or @emph{can} occur.
+Packets of selected streams shall be conveyed from the input file and muxed within the output
+file.
+
+@subsubheading Example: automatic subtitles selection
+@example
+ffmpeg -i C.mkv out1.mkv -c:s dvdsub -an out2.mkv
+@end example
+Although @file{out1.mkv} is a Matroska container file which accepts subtitle streams, only a
+video and audio stream shall be selected. The subtitle stream of @file{C.mkv} is image-based
+and the default subtitle encoder of the Matroska muxer is text-based, so a transcode operation
+for the subtitles is expected to fail and hence the stream isn't selected. However, in
+@file{out2.mkv}, a subtitle encoder is specified in the command and so, the subtitle stream is
+selected, in addition to the video stream. The presence of @code{-an} disables audio stream
+selection for @file{out2.mkv}.
+
+@subsubheading Example: unlabeled filtergraph outputs
+@example
+ffmpeg -i A.avi -i C.mkv -i B.mp4 -filter_complex "overlay" out1.mp4 out2.srt
+@end example
+A filtergraph is setup here using the @code{-filter_complex} option and consists of a single
+video filter. The @code{overlay} filter requires exactly two video inputs, but none are
+specified, so the first two available video streams are used, those of @file{A.avi} and
+@file{C.mkv}. The output pad of the filter has no label and so is sent to the first output file
+@file{out1.mp4}. Due to this, automatic selection of the video stream is skipped, which would
+have selected the stream in @file{B.mp4}. The audio stream with most channels viz. @code{stream 3}
+in @file{B.mp4}, is chosen automatically. No subtitle stream is chosen however, since the MP4
+format has no default subtitle encoder registered, and the user hasn't specified a subtitle encoder.
+
+The 2nd output file, @file{out2.srt}, only accepts text-based subtitle streams. So, even though
+the first subtitle stream available belongs to @file{C.mkv}, it is image-based and hence skipped.
+The selected stream, @code{stream 2} in @file{B.mp4}, is the first text-based subtitle stream.
+
+@subsubheading Example: labeled filtergraph outputs
+@example
+ffmpeg -i A.avi -i B.mp4 -i C.mkv -filter_complex "[1:v]hue=s=0[outv];overlay;aresample" \
+       -map '[outv]' -an        out1.mp4 \
+                                out2.mkv \
+       -map '[outv]' -map 1:a:0 out3.mkv
+@end example
+
+The above command will fail, as the output pad labelled @code{[outv]} has been mapped twice.
+None of the output files shall be processed.
+
+@example
+ffmpeg -i A.avi -i B.mp4 -i C.mkv -filter_complex "[1:v]hue=s=0[outv];overlay;aresample" \
+       -an        out1.mp4 \
+                  out2.mkv \
+       -map 1:a:0 out3.mkv
+@end example
+
+This command above will also fail as the hue filter output has a label, @code{[outv]},
+and hasn't been mapped anywhere.
+
+The command should be modified as follows,
+@example
+ffmpeg -i A.avi -i B.mp4 -i C.mkv -filter_complex "[1:v]hue=s=0,split=2[outv1][outv2];overlay;aresample" \
+        -map '[outv1]' -an        out1.mp4 \
+                                  out2.mkv \
+        -map '[outv2]' -map 1:a:0 out3.mkv
+@end example
+The video stream from @file{B.mp4} is sent to the hue filter, whose output is cloned once using
+the split filter, and both outputs labelled. Then a copy each is mapped to the first and third
+output files.
+
+The overlay filter, requiring two video inputs, uses the first two unused video streams. Those
+are the streams from @file{A.avi} and @file{C.mkv}. The overlay output isn't labelled, so it is
+sent to the first output file @file{out1.mp4}, regardless of the presence of the @code{-map} option.
+
+The aresample filter is sent the first unused audio stream, that of @file{A.avi}. Since this filter
+output is also unlabelled, it too is mapped to the first output file. The presence of @code{-an}
+only suppresses automatic or manual stream selection of audio streams, not outputs sent from
+filtergraphs. Both these mapped streams shall be ordered before the mapped stream in @file{out1.mp4}.
+
+The video, audio and subtitle streams mapped to @code{out2.mkv} are entirely determined by
+automatic stream selection.
+
+@file{out3.mkv} consists of the cloned video output from the hue filter and the first audio
+stream from @file{B.mp4}.
+@*
 
 @c man end STREAM SELECTION
 
@@ -289,8 +481,8 @@
 
 -to and -t are mutually exclusive and -t has priority.
 
-@item -to @var{position} (@emph{output})
-Stop writing the output at @var{position}.
+@item -to @var{position} (@emph{input/output})
+Stop writing the output or reading the input at @var{position}.
 @var{position} must be a time duration specification,
 see @ref{time duration syntax,,the Time duration section in the ffmpeg-utils(1) manual,ffmpeg-utils}.
 
@@ -316,7 +508,7 @@
 @var{position} must be a time duration specification,
 see @ref{time duration syntax,,the Time duration section in the ffmpeg-utils(1) manual,ffmpeg-utils}.
 
-@item -sseof @var{position} (@emph{input/output})
+@item -sseof @var{position} (@emph{input})
 
 Like the @code{-ss} option but relative to the "end of file". That is negative
 values are earlier in the file, 0 is at EOF.
@@ -375,22 +567,31 @@
 @item hearing_impaired
 @item visual_impaired
 @item clean_effects
+@item attached_pic
 @item captions
 @item descriptions
+@item dependent
 @item metadata
 @end table
 
 For example, to make the second audio stream the default stream:
 @example
-ffmpeg -i in.mkv -disposition:a:1 default out.mkv
+ffmpeg -i in.mkv -c copy -disposition:a:1 default out.mkv
 @end example
 
 To make the second subtitle stream the default stream and remove the default
 disposition from the first subtitle stream:
 @example
-ffmpeg -i INPUT -disposition:s:0 0 -disposition:s:1 default OUTPUT
+ffmpeg -i in.mkv -c copy -disposition:s:0 0 -disposition:s:1 default out.mkv
 @end example
 
+To add an embedded cover/thumbnail:
+@example
+ffmpeg -i in.mp4 -i IMAGE -map 0 -map 1 -c copy -c:v:1 png -disposition:v:1 attached_pic out.mp4
+@end example
+
+Not all muxers support embedded thumbnails, and those who do, only support a few formats, like JPEG or PNG.
+
 @item -program [title=@var{title}:][program_num=@var{program_num}:]st=@var{stream}[:st=@var{stream}...] (@emph{output})
 
 Creates a program with the specified @var{title}, @var{program_num} and adds the specified
@@ -413,6 +614,10 @@
 ffmpeg -i myfile.avi -target vcd -bf 2 /tmp/vcd.mpg
 @end example
 
+@item -dn (@emph{output})
+Disable data recording. For full manual control see the @code{-map}
+option.
+
 @item -dframes @var{number} (@emph{output})
 Set the number of data frames to output. This is an obsolete alias for
 @code{-frames:d}, which you should use instead.
@@ -470,6 +675,7 @@
 consists of only alphanumeric characters. The last key of a sequence of
 progress information is always "progress".
 
+@anchor{stdin option}
 @item -stdin
 Enable interaction on standard input. On by default unless standard input is
 used as an input. To explicitly disable interaction you need to specify
@@ -570,7 +776,8 @@
 frames, if it exists.
 
 @item -vn (@emph{output})
-Disable video recording.
+Disable video recording. For full manual control see the @code{-map}
+option.
 
 @item -vcodec @var{codec} (@emph{output})
 Set the video codec. This is an alias for @code{-codec:v}.
@@ -617,8 +824,6 @@
 
 @item -sws_flags @var{flags} (@emph{input/output})
 Set SwScaler flags.
-@item -vdt @var{n}
-Discard threshold.
 
 @item -rc_override[:@var{stream_specifier}] @var{override} (@emph{output,per-stream})
 Rate control override for specific intervals, formatted as "int,int,int"
@@ -756,6 +961,43 @@
 platform-appropriate subdevice (@samp{dxva2} or @samp{vaapi}) and then deriving a
 QSV device from that.)
 
+@item opencl
+@var{device} selects the platform and device as @emph{platform_index.device_index}.
+
+The set of devices can also be filtered using the key-value pairs to find only
+devices matching particular platform or device strings.
+
+The strings usable as filters are:
+@table @option
+@item platform_profile
+@item platform_version
+@item platform_name
+@item platform_vendor
+@item platform_extensions
+@item device_name
+@item device_vendor
+@item driver_version
+@item device_version
+@item device_profile
+@item device_extensions
+@item device_type
+@end table
+
+The indices and filters must together uniquely select a device.
+
+Examples:
+@table @emph
+@item -init_hw_device opencl:0.1
+Choose the second device on the first platform.
+
+@item -init_hw_device opencl:,device_name=Foo9000
+Choose the device with a name containing the string @emph{Foo9000}.
+
+@item -init_hw_device opencl:1,device_type=gpu,device_extensions=cl_khr_fp16
+Choose the GPU device on the second platform supporting the @emph{cl_khr_fp16}
+extension.
+@end table
+
 @end table
 
 @item -init_hw_device @var{type}[=@var{name}]@@@var{source}
@@ -786,9 +1028,6 @@
 @item auto
 Automatically select the hardware acceleration method.
 
-@item vda
-Use Apple VDA hardware acceleration.
-
 @item vdpau
 Use VDPAU (Video Decode and Presentation API for Unix) hardware acceleration.
 
@@ -851,7 +1090,8 @@
 this option only makes sense for audio grabbing devices and raw demuxers
 and is mapped to the corresponding demuxer options.
 @item -an (@emph{output})
-Disable audio recording.
+Disable audio recording. For full manual control see the @code{-map}
+option.
 @item -acodec @var{codec} (@emph{input/output})
 Set the audio codec. This is an alias for @code{-codec:a}.
 @item -sample_fmt[:@var{stream_specifier}] @var{sample_fmt} (@emph{output,per-stream})
@@ -886,7 +1126,8 @@
 @item -scodec @var{codec} (@emph{input/output})
 Set the subtitle codec. This is an alias for @code{-codec:s}.
 @item -sn (@emph{output})
-Disable subtitle recording.
+Disable subtitle recording. For full manual control see the @code{-map}
+option.
 @item -sbsf @var{bitstream_filter}
 Deprecated, see -bsf
 @end table
@@ -1112,12 +1353,12 @@
 
 @item -benchmark (@emph{global})
 Show benchmarking information at the end of an encode.
-Shows CPU time used and maximum memory consumption.
+Shows real, system and user time used and maximum memory consumption.
 Maximum memory consumption is not supported on all systems,
 it will usually display as 0 if not supported.
 @item -benchmark_all (@emph{global})
 Show benchmarking information during the encode.
-Shows CPU time used in various steps (audio/video encode/decode).
+Shows real, system and user time used in various steps (audio/video encode/decode).
 @item -timelimit @var{duration} (@emph{global})
 Exit after ffmpeg has been running for @var{duration} seconds.
 @item -dump (@emph{global})
@@ -1132,10 +1373,6 @@
 By default @command{ffmpeg} attempts to read the input(s) as fast as possible.
 This option will slow down the reading of the input(s) to the native frame rate
 of the input(s). It is useful for real-time output (e.g. live streaming).
-@item -loop_input
-Loop over the input stream. Currently it works only for image
-streams. This option is used for automatic FFserver testing.
-This option is deprecated, use -loop 1.
 @item -loop_output @var{number_of_times}
 Repeatedly loop output for formats that support looping such as animated GIF
 (0 will loop the output infinitely).
@@ -1253,6 +1490,8 @@
 
 Default value is 0.
 
+@item -bitexact (@emph{input/output})
+Enable bitexact mode for (de)muxer and (de/en)coder
 @item -shortest (@emph{output})
 Finish encoding when the shortest input stream ends.
 @item -dts_delta_threshold
@@ -1375,16 +1614,6 @@
 discarded if they are not read in a timely manner; raising this value can
 avoid it.
 
-@item -override_ffserver (@emph{global})
-Overrides the input specifications from @command{ffserver}. Using this
-option you can map any input stream to @command{ffserver} and control
-many aspects of the encoding from @command{ffmpeg}. Without this
-option @command{ffmpeg} will transmit to @command{ffserver} what is
-requested by @command{ffserver}.
-
-The option is intended for cases where features are needed that cannot be
-specified to @command{ffserver} but can be to @command{ffmpeg}.
-
 @item -sdp_file @var{file} (@emph{global})
 Print sdp information for an output stream to @var{file}.
 This allows dumping sdp information when at least one output isn't an
@@ -1739,7 +1968,7 @@
 @ifset config-not-all
 @url{ffmpeg-all.html,ffmpeg-all},
 @end ifset
-@url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{ffmpeg-utils.html,ffmpeg-utils},
 @url{ffmpeg-scaler.html,ffmpeg-scaler},
 @url{ffmpeg-resampler.html,ffmpeg-resampler},
@@ -1758,7 +1987,7 @@
 @ifset config-not-all
 ffmpeg-all(1),
 @end ifset
-ffplay(1), ffprobe(1), ffserver(1),
+ffplay(1), ffprobe(1),
 ffmpeg-utils(1), ffmpeg-scaler(1), ffmpeg-resampler(1),
 ffmpeg-codecs(1), ffmpeg-bitstream-filters(1), ffmpeg-formats(1),
 ffmpeg-devices(1), ffmpeg-protocols(1), ffmpeg-filters(1)

diff --git a/doc/ffplay.texi b/doc/ffplay.texi
index a76bed4..99e1d74 100644
--- a/doc/ffplay.texi
+++ b/doc/ffplay.texi

@@ -60,6 +60,8 @@
 see @ref{time duration syntax,,the Time duration section in the ffmpeg-utils(1) manual,ffmpeg-utils}.
 @item -bytes
 Seek by bytes.
+@item -seek_interval
+Set custom interval, in seconds, for seeking using left/right keys. Default is 10 seconds.
 @item -nodisp
 Disable graphical display.
 @item -noborder
@@ -72,6 +74,10 @@
 Force format.
 @item -window_title @var{title}
 Set window title (default is the input filename).
+@item -left @var{title}
+Set the x position for the left of the window (default is a centered window).
+@item -top @var{title}
+Set the y position for the top of the window (default is a centered window).
 @item -loop @var{number}
 Loops movie playback <number> times. 0 means forever.
 @item -showmode @var{mode}
@@ -291,7 +297,7 @@
 @ifset config-not-all
 @url{ffplay-all.html,ffmpeg-all},
 @end ifset
-@url{ffmpeg.html,ffmpeg}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffprobe.html,ffprobe},
 @url{ffmpeg-utils.html,ffmpeg-utils},
 @url{ffmpeg-scaler.html,ffmpeg-scaler},
 @url{ffmpeg-resampler.html,ffmpeg-resampler},
@@ -310,7 +316,7 @@
 @ifset config-not-all
 ffplay-all(1),
 @end ifset
-ffmpeg(1), ffprobe(1), ffserver(1),
+ffmpeg(1), ffprobe(1),
 ffmpeg-utils(1), ffmpeg-scaler(1), ffmpeg-resampler(1),
 ffmpeg-codecs(1), ffmpeg-bitstream-filters(1), ffmpeg-formats(1),
 ffmpeg-devices(1), ffmpeg-protocols(1), ffmpeg-filters(1)

diff --git a/doc/ffprobe.texi b/doc/ffprobe.texi
index e3c34ba..be0539f 100644
--- a/doc/ffprobe.texi
+++ b/doc/ffprobe.texi

@@ -584,7 +584,7 @@
 This is required for generating an XML file which can be validated
 through an XSD file.
 
-@item xsd_compliant, x
+@item xsd_strict, x
 If set to 1 perform more checks for ensuring that the output is XSD
 compliant. Default value is 0.
 This option automatically sets @option{fully_qualified} to 1.
@@ -653,7 +653,7 @@
 @ifset config-not-all
 @url{ffprobe-all.html,ffprobe-all},
 @end ifset
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay},
 @url{ffmpeg-utils.html,ffmpeg-utils},
 @url{ffmpeg-scaler.html,ffmpeg-scaler},
 @url{ffmpeg-resampler.html,ffmpeg-resampler},
@@ -672,7 +672,7 @@
 @ifset config-not-all
 ffprobe-all(1),
 @end ifset
-ffmpeg(1), ffplay(1), ffserver(1),
+ffmpeg(1), ffplay(1),
 ffmpeg-utils(1), ffmpeg-scaler(1), ffmpeg-resampler(1),
 ffmpeg-codecs(1), ffmpeg-bitstream-filters(1), ffmpeg-formats(1),
 ffmpeg-devices(1), ffmpeg-protocols(1), ffmpeg-filters(1)

diff --git a/doc/ffserver.conf b/doc/ffserver.conf
deleted file mode 100644
index e3f99bb..0000000
--- a/doc/ffserver.conf
+++ /dev/null

@@ -1,372 +0,0 @@
-# Port on which the server is listening. You must select a different
-# port from your standard HTTP web server if it is running on the same
-# computer.
-HTTPPort 8090
-
-# Address on which the server is bound. Only useful if you have
-# several network interfaces.
-HTTPBindAddress 0.0.0.0
-
-# Number of simultaneous HTTP connections that can be handled. It has
-# to be defined *before* the MaxClients parameter, since it defines the
-# MaxClients maximum limit.
-MaxHTTPConnections 2000
-
-# Number of simultaneous requests that can be handled. Since FFServer
-# is very fast, it is more likely that you will want to leave this high
-# and use MaxBandwidth, below.
-MaxClients 1000
-
-# This the maximum amount of kbit/sec that you are prepared to
-# consume when streaming to clients.
-MaxBandwidth 1000
-
-# Access log file (uses standard Apache log file format)
-# '-' is the standard output.
-CustomLog -
-
-##################################################################
-# Definition of the live feeds. Each live feed contains one video
-# and/or audio sequence coming from an ffmpeg encoder or another
-# ffserver. This sequence may be encoded simultaneously with several
-# codecs at several resolutions.
-
-<Feed feed1.ffm>
-
-# You must use 'ffmpeg' to send a live feed to ffserver. In this
-# example, you can type:
-#
-# ffmpeg http://localhost:8090/feed1.ffm
-
-# ffserver can also do time shifting. It means that it can stream any
-# previously recorded live stream. The request should contain:
-# "http://xxxx?date=[YYYY-MM-DDT][[HH:]MM:]SS[.m...]".You must specify
-# a path where the feed is stored on disk. You also specify the
-# maximum size of the feed, where zero means unlimited. Default:
-# File=/tmp/feed_name.ffm FileMaxSize=5M
-File /tmp/feed1.ffm
-FileMaxSize 200K
-
-# You could specify
-# ReadOnlyFile /saved/specialvideo.ffm
-# This marks the file as readonly and it will not be deleted or updated.
-
-# Specify launch in order to start ffmpeg automatically.
-# First ffmpeg must be defined with an appropriate path if needed,
-# after that options can follow, but avoid adding the http:// field
-#Launch ffmpeg
-
-# Only allow connections from localhost to the feed.
-ACL allow 127.0.0.1
-
-</Feed>
-
-
-##################################################################
-# Now you can define each stream which will be generated from the
-# original audio and video stream. Each format has a filename (here
-# 'test1.mpg'). FFServer will send this stream when answering a
-# request containing this filename.
-
-<Stream test1.mpg>
-
-# coming from live feed 'feed1'
-Feed feed1.ffm
-
-# Format of the stream : you can choose among:
-# mpeg       : MPEG-1 multiplexed video and audio
-# mpegvideo  : only MPEG-1 video
-# mp2        : MPEG-2 audio (use AudioCodec to select layer 2 and 3 codec)
-# ogg        : Ogg format (Vorbis audio codec)
-# rm         : RealNetworks-compatible stream. Multiplexed audio and video.
-# ra         : RealNetworks-compatible stream. Audio only.
-# mpjpeg     : Multipart JPEG (works with Netscape without any plugin)
-# jpeg       : Generate a single JPEG image.
-# mjpeg      : Generate a M-JPEG stream.
-# asf        : ASF compatible streaming (Windows Media Player format).
-# swf        : Macromedia Flash compatible stream
-# avi        : AVI format (MPEG-4 video, MPEG audio sound)
-Format mpeg
-
-# Bitrate for the audio stream. Codecs usually support only a few
-# different bitrates.
-AudioBitRate 32
-
-# Number of audio channels: 1 = mono, 2 = stereo
-AudioChannels 1
-
-# Sampling frequency for audio. When using low bitrates, you should
-# lower this frequency to 22050 or 11025. The supported frequencies
-# depend on the selected audio codec.
-AudioSampleRate 44100
-
-# Bitrate for the video stream
-VideoBitRate 64
-
-# Ratecontrol buffer size
-VideoBufferSize 40
-
-# Number of frames per second
-VideoFrameRate 3
-
-# Size of the video frame: WxH (default: 160x128)
-# The following abbreviations are defined: sqcif, qcif, cif, 4cif, qqvga,
-# qvga, vga, svga, xga, uxga, qxga, sxga, qsxga, hsxga, wvga, wxga, wsxga,
-# wuxga, woxga, wqsxga, wquxga, whsxga, whuxga, cga, ega, hd480, hd720,
-# hd1080
-VideoSize 160x128
-
-# Transmit only intra frames (useful for low bitrates, but kills frame rate).
-#VideoIntraOnly
-
-# If non-intra only, an intra frame is transmitted every VideoGopSize
-# frames. Video synchronization can only begin at an intra frame.
-VideoGopSize 12
-
-# More MPEG-4 parameters
-# VideoHighQuality
-# Video4MotionVector
-
-# Choose your codecs:
-#AudioCodec mp2
-#VideoCodec mpeg1video
-
-# Suppress audio
-#NoAudio
-
-# Suppress video
-#NoVideo
-
-#VideoQMin 3
-#VideoQMax 31
-
-# Set this to the number of seconds backwards in time to start. Note that
-# most players will buffer 5-10 seconds of video, and also you need to allow
-# for a keyframe to appear in the data stream.
-#Preroll 15
-
-# ACL:
-
-# You can allow ranges of addresses (or single addresses)
-#ACL ALLOW <first address> <last address>
-
-# You can deny ranges of addresses (or single addresses)
-#ACL DENY <first address> <last address>
-
-# You can repeat the ACL allow/deny as often as you like. It is on a per
-# stream basis. The first match defines the action. If there are no matches,
-# then the default is the inverse of the last ACL statement.
-#
-# Thus 'ACL allow localhost' only allows access from localhost.
-# 'ACL deny 1.0.0.0 1.255.255.255' would deny the whole of network 1 and
-# allow everybody else.
-
-</Stream>
-
-
-##################################################################
-# Example streams
-
-
-# Multipart JPEG
-
-#<Stream test.mjpg>
-#Feed feed1.ffm
-#Format mpjpeg
-#VideoFrameRate 2
-#VideoIntraOnly
-#NoAudio
-#Strict -1
-#</Stream>
-
-
-# Single JPEG
-
-#<Stream test.jpg>
-#Feed feed1.ffm
-#Format jpeg
-#VideoFrameRate 2
-#VideoIntraOnly
-##VideoSize 352x240
-#NoAudio
-#Strict -1
-#</Stream>
-
-
-# Flash
-
-#<Stream test.swf>
-#Feed feed1.ffm
-#Format swf
-#VideoFrameRate 2
-#VideoIntraOnly
-#NoAudio
-#</Stream>
-
-
-# ASF compatible
-
-<Stream test.asf>
-Feed feed1.ffm
-Format asf
-VideoFrameRate 15
-VideoSize 352x240
-VideoBitRate 256
-VideoBufferSize 40
-VideoGopSize 30
-AudioBitRate 64
-StartSendOnKey
-</Stream>
-
-
-# MP3 audio
-
-#<Stream test.mp3>
-#Feed feed1.ffm
-#Format mp2
-#AudioCodec mp3
-#AudioBitRate 64
-#AudioChannels 1
-#AudioSampleRate 44100
-#NoVideo
-#</Stream>
-
-
-# Ogg Vorbis audio
-
-#<Stream test.ogg>
-#Feed feed1.ffm
-#Metadata title "Stream title"
-#AudioBitRate 64
-#AudioChannels 2
-#AudioSampleRate 44100
-#NoVideo
-#</Stream>
-
-
-# Real with audio only at 32 kbits
-
-#<Stream test.ra>
-#Feed feed1.ffm
-#Format rm
-#AudioBitRate 32
-#NoVideo
-#NoAudio
-#</Stream>
-
-
-# Real with audio and video at 64 kbits
-
-#<Stream test.rm>
-#Feed feed1.ffm
-#Format rm
-#AudioBitRate 32
-#VideoBitRate 128
-#VideoFrameRate 25
-#VideoGopSize 25
-#NoAudio
-#</Stream>
-
-
-##################################################################
-# A stream coming from a file: you only need to set the input
-# filename and optionally a new format. Supported conversions:
-#    AVI -> ASF
-
-#<Stream file.rm>
-#File "/usr/local/httpd/htdocs/tlive.rm"
-#NoAudio
-#</Stream>
-
-#<Stream file.asf>
-#File "/usr/local/httpd/htdocs/test.asf"
-#NoAudio
-#Metadata author "Me"
-#Metadata copyright "Super MegaCorp"
-#Metadata title "Test stream from disk"
-#Metadata comment "Test comment"
-#</Stream>
-
-
-##################################################################
-# RTSP examples
-#
-# You can access this stream with the RTSP URL:
-#   rtsp://localhost:5454/test1-rtsp.mpg
-#
-# A non-standard RTSP redirector is also created. Its URL is:
-#   http://localhost:8090/test1-rtsp.rtsp
-
-#<Stream test1-rtsp.mpg>
-#Format rtp
-#File "/usr/local/httpd/htdocs/test1.mpg"
-#</Stream>
-
-
-# Transcode an incoming live feed to another live feed,
-# using libx264 and video presets
-
-#<Stream live.h264>
-#Format rtp
-#Feed feed1.ffm
-#VideoCodec libx264
-#VideoFrameRate 24
-#VideoBitRate 100
-#VideoSize 480x272
-#AVPresetVideo default
-#AVPresetVideo baseline
-#AVOptionVideo flags +global_header
-#
-#AudioCodec aac
-#AudioBitRate 32
-#AudioChannels 2
-#AudioSampleRate 22050
-#AVOptionAudio flags +global_header
-#</Stream>
-
-##################################################################
-# SDP/multicast examples
-#
-# If you want to send your stream in multicast, you must set the
-# multicast address with MulticastAddress. The port and the TTL can
-# also be set.
-#
-# An SDP file is automatically generated by ffserver by adding the
-# 'sdp' extension to the stream name (here
-# http://localhost:8090/test1-sdp.sdp). You should usually give this
-# file to your player to play the stream.
-#
-# The 'NoLoop' option can be used to avoid looping when the stream is
-# terminated.
-
-#<Stream test1-sdp.mpg>
-#Format rtp
-#File "/usr/local/httpd/htdocs/test1.mpg"
-#MulticastAddress 224.124.0.1
-#MulticastPort 5000
-#MulticastTTL 16
-#NoLoop
-#</Stream>
-
-
-##################################################################
-# Special streams
-
-# Server status
-
-<Stream stat.html>
-Format status
-
-# Only allow local people to get the status
-ACL allow localhost
-ACL allow 192.168.0.0 192.168.255.255
-
-#FaviconURL http://pond1.gladstonefamily.net:8080/favicon.ico
-</Stream>
-
-
-# Redirect index.html to the appropriate site
-
-<Redirect index.html>
-URL http://www.ffmpeg.org/
-</Redirect>

diff --git a/doc/ffserver.texi b/doc/ffserver.texi
deleted file mode 100644
index b3e1f1d..0000000
--- a/doc/ffserver.texi
+++ /dev/null

@@ -1,923 +0,0 @@
-\input texinfo @c -*- texinfo -*-
-@documentencoding UTF-8
-
-@settitle ffserver Documentation
-@titlepage
-@center @titlefont{ffserver Documentation}
-@end titlepage
-
-@top
-
-@contents
-
-@chapter Synopsis
-
-ffserver [@var{options}]
-
-@chapter Description
-@c man begin DESCRIPTION
-
-@command{ffserver} is a streaming server for both audio and video.
-It supports several live feeds, streaming from files and time shifting
-on live feeds. You can seek to positions in the past on each live
-feed, provided you specify a big enough feed storage.
-
-@command{ffserver} is configured through a configuration file, which
-is read at startup. If not explicitly specified, it will read from
-@file{/etc/ffserver.conf}.
-
-@command{ffserver} receives prerecorded files or FFM streams from some
-@command{ffmpeg} instance as input, then streams them over
-RTP/RTSP/HTTP.
-
-An @command{ffserver} instance will listen on some port as specified
-in the configuration file. You can launch one or more instances of
-@command{ffmpeg} and send one or more FFM streams to the port where
-ffserver is expecting to receive them. Alternately, you can make
-@command{ffserver} launch such @command{ffmpeg} instances at startup.
-
-Input streams are called feeds, and each one is specified by a
-@code{<Feed>} section in the configuration file.
-
-For each feed you can have different output streams in various
-formats, each one specified by a @code{<Stream>} section in the
-configuration file.
-
-@chapter Detailed description
-
-@command{ffserver} works by forwarding streams encoded by
-@command{ffmpeg}, or pre-recorded streams which are read from disk.
-
-Precisely, @command{ffserver} acts as an HTTP server, accepting POST
-requests from @command{ffmpeg} to acquire the stream to publish, and
-serving RTSP clients or HTTP clients GET requests with the stream
-media content.
-
-A feed is an @ref{FFM} stream created by @command{ffmpeg}, and sent to
-a port where @command{ffserver} is listening.
-
-Each feed is identified by a unique name, corresponding to the name
-of the resource published on @command{ffserver}, and is configured by
-a dedicated @code{Feed} section in the configuration file.
-
-The feed publish URL is given by:
-@example
-http://@var{ffserver_ip_address}:@var{http_port}/@var{feed_name}
-@end example
-
-where @var{ffserver_ip_address} is the IP address of the machine where
-@command{ffserver} is installed, @var{http_port} is the port number of
-the HTTP server (configured through the @option{HTTPPort} option), and
-@var{feed_name} is the name of the corresponding feed defined in the
-configuration file.
-
-Each feed is associated to a file which is stored on disk. This stored
-file is used to send pre-recorded data to a player as fast as
-possible when new content is added in real-time to the stream.
-
-A "live-stream" or "stream" is a resource published by
-@command{ffserver}, and made accessible through the HTTP protocol to
-clients.
-
-A stream can be connected to a feed, or to a file. In the first case,
-the published stream is forwarded from the corresponding feed
-generated by a running instance of @command{ffmpeg}, in the second
-case the stream is read from a pre-recorded file.
-
-Each stream is identified by a unique name, corresponding to the name
-of the resource served by @command{ffserver}, and is configured by
-a dedicated @code{Stream} section in the configuration file.
-
-The stream access HTTP URL is given by:
-@example
-http://@var{ffserver_ip_address}:@var{http_port}/@var{stream_name}[@var{options}]
-@end example
-
-The stream access RTSP URL is given by:
-@example
-http://@var{ffserver_ip_address}:@var{rtsp_port}/@var{stream_name}[@var{options}]
-@end example
-
-@var{stream_name} is the name of the corresponding stream defined in
-the configuration file. @var{options} is a list of options specified
-after the URL which affects how the stream is served by
-@command{ffserver}. @var{http_port} and @var{rtsp_port} are the HTTP
-and RTSP ports configured with the options @var{HTTPPort} and
-@var{RTSPPort} respectively.
-
-In case the stream is associated to a feed, the encoding parameters
-must be configured in the stream configuration. They are sent to
-@command{ffmpeg} when setting up the encoding. This allows
-@command{ffserver} to define the encoding parameters used by
-the @command{ffmpeg} encoders.
-
-The @command{ffmpeg} @option{override_ffserver} commandline option
-allows one to override the encoding parameters set by the server.
-
-Multiple streams can be connected to the same feed.
-
-For example, you can have a situation described by the following
-graph:
-
-@verbatim
-               _________       __________
-              |         |     |          |
-ffmpeg 1 -----| feed 1  |-----| stream 1 |
-    \         |_________|\    |__________|
-     \                    \
-      \                    \   __________
-       \                    \ |          |
-        \                    \| stream 2 |
-         \                    |__________|
-          \
-           \   _________       __________
-            \ |         |     |          |
-             \| feed 2  |-----| stream 3 |
-              |_________|     |__________|
-
-               _________       __________
-              |         |     |          |
-ffmpeg 2 -----| feed 3  |-----| stream 4 |
-              |_________|     |__________|
-
-               _________       __________
-              |         |     |          |
-              | file 1  |-----| stream 5 |
-              |_________|     |__________|
-
-@end verbatim
-
-@anchor{FFM}
-@section FFM, FFM2 formats
-
-FFM and FFM2 are formats used by ffserver. They allow storing a wide variety of
-video and audio streams and encoding options, and can store a moving time segment
-of an infinite movie or a whole movie.
-
-FFM is version specific, and there is limited compatibility of FFM files
-generated by one version of ffmpeg/ffserver and another version of
-ffmpeg/ffserver. It may work but it is not guaranteed to work.
-
-FFM2 is extensible while maintaining compatibility and should work between
-differing versions of tools. FFM2 is the default.
-
-@section Status stream
-
-@command{ffserver} supports an HTTP interface which exposes the
-current status of the server.
-
-Simply point your browser to the address of the special status stream
-specified in the configuration file.
-
-For example if you have:
-@example
-<Stream status.html>
-Format status
-
-# Only allow local people to get the status
-ACL allow localhost
-ACL allow 192.168.0.0 192.168.255.255
-</Stream>
-@end example
-
-then the server will post a page with the status information when
-the special stream @file{status.html} is requested.
-
-@section How do I make it work?
-
-As a simple test, just run the following two command lines where INPUTFILE
-is some file which you can decode with ffmpeg:
-
-@example
-ffserver -f doc/ffserver.conf &
-ffmpeg -i INPUTFILE http://localhost:8090/feed1.ffm
-@end example
-
-At this point you should be able to go to your Windows machine and fire up
-Windows Media Player (WMP). Go to Open URL and enter
-
-@example
-    http://<linuxbox>:8090/test.asf
-@end example
-
-You should (after a short delay) see video and hear audio.
-
-WARNING: trying to stream test1.mpg doesn't work with WMP as it tries to
-transfer the entire file before starting to play.
-The same is true of AVI files.
-
-You should edit the @file{ffserver.conf} file to suit your needs (in
-terms of frame rates etc). Then install @command{ffserver} and
-@command{ffmpeg}, write a script to start them up, and off you go.
-
-@section What else can it do?
-
-You can replay video from .ffm files that was recorded earlier.
-However, there are a number of caveats, including the fact that the
-ffserver parameters must match the original parameters used to record the
-file. If they do not, then ffserver deletes the file before recording into it.
-(Now that I write this, it seems broken).
-
-You can fiddle with many of the codec choices and encoding parameters, and
-there are a bunch more parameters that you cannot control. Post a message
-to the mailing list if there are some 'must have' parameters. Look in
-ffserver.conf for a list of the currently available controls.
-
-It will automatically generate the ASX or RAM files that are often used
-in browsers. These files are actually redirections to the underlying ASF
-or RM file. The reason for this is that the browser often fetches the
-entire file before starting up the external viewer. The redirection files
-are very small and can be transferred quickly. [The stream itself is
-often 'infinite' and thus the browser tries to download it and never
-finishes.]
-
-@section Tips
-
-* When you connect to a live stream, most players (WMP, RA, etc) want to
-buffer a certain number of seconds of material so that they can display the
-signal continuously. However, ffserver (by default) starts sending data
-in realtime. This means that there is a pause of a few seconds while the
-buffering is being done by the player. The good news is that this can be
-cured by adding a '?buffer=5' to the end of the URL. This means that the
-stream should start 5 seconds in the past -- and so the first 5 seconds
-of the stream are sent as fast as the network will allow. It will then
-slow down to real time. This noticeably improves the startup experience.
-
-You can also add a 'Preroll 15' statement into the ffserver.conf that will
-add the 15 second prebuffering on all requests that do not otherwise
-specify a time. In addition, ffserver will skip frames until a key_frame
-is found. This further reduces the startup delay by not transferring data
-that will be discarded.
-
-@section Why does the ?buffer / Preroll stop working after a time?
-
-It turns out that (on my machine at least) the number of frames successfully
-grabbed is marginally less than the number that ought to be grabbed. This
-means that the timestamp in the encoded data stream gets behind realtime.
-This means that if you say 'Preroll 10', then when the stream gets 10
-or more seconds behind, there is no Preroll left.
-
-Fixing this requires a change in the internals of how timestamps are
-handled.
-
-@section Does the @code{?date=} stuff work.
-
-Yes (subject to the limitation outlined above). Also note that whenever you
-start ffserver, it deletes the ffm file (if any parameters have changed),
-thus wiping out what you had recorded before.
-
-The format of the @code{?date=xxxxxx} is fairly flexible. You should use one
-of the following formats (the 'T' is literal):
-
-@example
-* YYYY-MM-DDTHH:MM:SS     (localtime)
-* YYYY-MM-DDTHH:MM:SSZ    (UTC)
-@end example
-
-You can omit the YYYY-MM-DD, and then it refers to the current day. However
-note that @samp{?date=16:00:00} refers to 16:00 on the current day -- this
-may be in the future and so is unlikely to be useful.
-
-You use this by adding the ?date= to the end of the URL for the stream.
-For example:   @samp{http://localhost:8080/test.asf?date=2002-07-26T23:05:00}.
-@c man end
-
-@chapter Options
-@c man begin OPTIONS
-
-@include fftools-common-opts.texi
-
-@section Main options
-
-@table @option
-@item -f @var{configfile}
-Read configuration file @file{configfile}. If not specified it will
-read by default from @file{/etc/ffserver.conf}.
-
-@item -n
-Enable no-launch mode. This option disables all the @code{Launch}
-directives within the various @code{<Feed>} sections. Since
-@command{ffserver} will not launch any @command{ffmpeg} instances, you
-will have to launch them manually.
-
-@item -d
-Enable debug mode. This option increases log verbosity, and directs
-log messages to stdout. When specified, the @option{CustomLog} option
-is ignored.
-@end table
-
-@chapter Configuration file syntax
-
-@command{ffserver} reads a configuration file containing global
-options and settings for each stream and feed.
-
-The configuration file consists of global options and dedicated
-sections, which must be introduced by "<@var{SECTION_NAME}
-@var{ARGS}>" on a separate line and must be terminated by a line in
-the form "</@var{SECTION_NAME}>". @var{ARGS} is optional.
-
-Currently the following sections are recognized: @samp{Feed},
-@samp{Stream}, @samp{Redirect}.
-
-A line starting with @code{#} is ignored and treated as a comment.
-
-Name of options and sections are case-insensitive.
-
-@section ACL syntax
-An ACL (Access Control List) specifies the address which are allowed
-to access a given stream, or to write a given feed.
-
-It accepts the following forms
-@itemize
-@item
-Allow/deny access to @var{address}.
-@example
-ACL ALLOW <address>
-ACL DENY <address>
-@end example
-
-@item
-Allow/deny access to ranges of addresses from @var{first_address} to
-@var{last_address}.
-@example
-ACL ALLOW <first_address> <last_address>
-ACL DENY <first_address> <last_address>
-@end example
-@end itemize
-
-You can repeat the ACL allow/deny as often as you like. It is on a per
-stream basis. The first match defines the action. If there are no matches,
-then the default is the inverse of the last ACL statement.
-
-Thus 'ACL allow localhost' only allows access from localhost.
-'ACL deny 1.0.0.0 1.255.255.255' would deny the whole of network 1 and
-allow everybody else.
-
-@section Global options
-@table @option
-@item HTTPPort @var{port_number}
-@item Port @var{port_number}
-@item RTSPPort @var{port_number}
-
-@var{HTTPPort} sets the HTTP server listening TCP port number,
-@var{RTSPPort} sets the RTSP server listening TCP port number.
-
-@var{Port} is the equivalent of @var{HTTPPort} and is deprecated.
-
-You must select a different port from your standard HTTP web server if
-it is running on the same computer.
-
-If not specified, no corresponding server will be created.
-
-@item HTTPBindAddress @var{ip_address}
-@item BindAddress @var{ip_address}
-@item RTSPBindAddress @var{ip_address}
-Set address on which the HTTP/RTSP server is bound. Only useful if you
-have several network interfaces.
-
-@var{BindAddress} is the equivalent of @var{HTTPBindAddress} and is
-deprecated.
-
-@item MaxHTTPConnections @var{n}
-Set number of simultaneous HTTP connections that can be handled. It
-has to be defined @emph{before} the @option{MaxClients} parameter,
-since it defines the @option{MaxClients} maximum limit.
-
-Default value is 2000.
-
-@item MaxClients @var{n}
-Set number of simultaneous requests that can be handled. Since
-@command{ffserver} is very fast, it is more likely that you will want
-to leave this high and use @option{MaxBandwidth}.
-
-Default value is 5.
-
-@item MaxBandwidth @var{kbps}
-Set the maximum amount of kbit/sec that you are prepared to consume
-when streaming to clients.
-
-Default value is 1000.
-
-@item CustomLog @var{filename}
-Set access log file (uses standard Apache log file format). '-' is the
-standard output.
-
-If not specified @command{ffserver} will produce no log.
-
-In case the commandline option @option{-d} is specified this option is
-ignored, and the log is written to standard output.
-
-@item NoDaemon
-Set no-daemon mode. This option is currently ignored since now
-@command{ffserver} will always work in no-daemon mode, and is
-deprecated.
-
-@item UseDefaults
-@item NoDefaults
-Control whether default codec options are used for the all streams or not.
-Each stream may overwrite this setting for its own. Default is @var{UseDefaults}.
-The last occurrence overrides the previous if multiple definitions exist.
-@end table
-
-@section Feed section
-
-A Feed section defines a feed provided to @command{ffserver}.
-
-Each live feed contains one video and/or audio sequence coming from an
-@command{ffmpeg} encoder or another @command{ffserver}. This sequence
-may be encoded simultaneously with several codecs at several
-resolutions.
-
-A feed instance specification is introduced by a line in the form:
-@example
-<Feed FEED_FILENAME>
-@end example
-
-where @var{FEED_FILENAME} specifies the unique name of the FFM stream.
-
-The following options are recognized within a Feed section.
-
-@table @option
-@item File @var{filename}
-@item ReadOnlyFile @var{filename}
-Set the path where the feed file is stored on disk.
-
-If not specified, the @file{/tmp/FEED.ffm} is assumed, where
-@var{FEED} is the feed name.
-
-If @option{ReadOnlyFile} is used the file is marked as read-only and
-it will not be deleted or updated.
-
-@item Truncate
-Truncate the feed file, rather than appending to it. By default
-@command{ffserver} will append data to the file, until the maximum
-file size value is reached (see @option{FileMaxSize} option).
-
-@item FileMaxSize @var{size}
-Set maximum size of the feed file in bytes. 0 means unlimited. The
-postfixes @code{K} (2^10), @code{M} (2^20), and @code{G} (2^30) are
-recognized.
-
-Default value is 5M.
-
-@item Launch @var{args}
-Launch an @command{ffmpeg} command when creating @command{ffserver}.
-
-@var{args} must be a sequence of arguments to be provided to an
-@command{ffmpeg} instance. The first provided argument is ignored, and
-it is replaced by a path with the same dirname of the @command{ffserver}
-instance, followed by the remaining argument and terminated with a
-path corresponding to the feed.
-
-When the launched process exits, @command{ffserver} will launch
-another program instance.
-
-In case you need a more complex @command{ffmpeg} configuration,
-e.g. if you need to generate multiple FFM feeds with a single
-@command{ffmpeg} instance, you should launch @command{ffmpeg} by hand.
-
-This option is ignored in case the commandline option @option{-n} is
-specified.
-
-@item ACL @var{spec}
-Specify the list of IP address which are allowed or denied to write
-the feed. Multiple ACL options can be specified.
-@end table
-
-@section Stream section
-
-A Stream section defines a stream provided by @command{ffserver}, and
-identified by a single name.
-
-The stream is sent when answering a request containing the stream
-name.
-
-A stream section must be introduced by the line:
-@example
-<Stream STREAM_NAME>
-@end example
-
-where @var{STREAM_NAME} specifies the unique name of the stream.
-
-The following options are recognized within a Stream section.
-
-Encoding options are marked with the @emph{encoding} tag, and they are
-used to set the encoding parameters, and are mapped to libavcodec
-encoding options. Not all encoding options are supported, in
-particular it is not possible to set encoder private options. In order
-to override the encoding options specified by @command{ffserver}, you
-can use the @command{ffmpeg} @option{override_ffserver} commandline
-option.
-
-Only one of the @option{Feed} and @option{File} options should be set.
-
-@table @option
-@item Feed @var{feed_name}
-Set the input feed. @var{feed_name} must correspond to an existing
-feed defined in a @code{Feed} section.
-
-When this option is set, encoding options are used to setup the
-encoding operated by the remote @command{ffmpeg} process.
-
-@item File @var{filename}
-Set the filename of the pre-recorded input file to stream.
-
-When this option is set, encoding options are ignored and the input
-file content is re-streamed as is.
-
-@item Format @var{format_name}
-Set the format of the output stream.
-
-Must be the name of a format recognized by FFmpeg. If set to
-@samp{status}, it is treated as a status stream.
-
-@item InputFormat @var{format_name}
-Set input format. If not specified, it is automatically guessed.
-
-@item Preroll @var{n}
-Set this to the number of seconds backwards in time to start. Note that
-most players will buffer 5-10 seconds of video, and also you need to allow
-for a keyframe to appear in the data stream.
-
-Default value is 0.
-
-@item StartSendOnKey
-Do not send stream until it gets the first key frame. By default
-@command{ffserver} will send data immediately.
-
-@item MaxTime @var{n}
-Set the number of seconds to run. This value set the maximum duration
-of the stream a client will be able to receive.
-
-A value of 0 means that no limit is set on the stream duration.
-
-@item ACL @var{spec}
-Set ACL for the stream.
-
-@item DynamicACL @var{spec}
-
-@item RTSPOption @var{option}
-
-@item MulticastAddress @var{address}
-
-@item MulticastPort @var{port}
-
-@item MulticastTTL @var{integer}
-
-@item NoLoop
-
-@item FaviconURL @var{url}
-Set favicon (favourite icon) for the server status page. It is ignored
-for regular streams.
-
-@item Author @var{value}
-@item Comment @var{value}
-@item Copyright @var{value}
-@item Title @var{value}
-Set metadata corresponding to the option. All these options are
-deprecated in favor of @option{Metadata}.
-
-@item Metadata @var{key} @var{value}
-Set metadata value on the output stream.
-
-@item UseDefaults
-@item NoDefaults
-Control whether default codec options are used for the stream or not.
-Default is @var{UseDefaults} unless disabled globally.
-
-@item NoAudio
-@item NoVideo
-Suppress audio/video.
-
-@item AudioCodec @var{codec_name} (@emph{encoding,audio})
-Set audio codec.
-
-@item AudioBitRate @var{rate} (@emph{encoding,audio})
-Set bitrate for the audio stream in kbits per second.
-
-@item AudioChannels @var{n} (@emph{encoding,audio})
-Set number of audio channels.
-
-@item AudioSampleRate @var{n} (@emph{encoding,audio})
-Set sampling frequency for audio. When using low bitrates, you should
-lower this frequency to 22050 or 11025. The supported frequencies
-depend on the selected audio codec.
-
-@item AVOptionAudio [@var{codec}:]@var{option} @var{value} (@emph{encoding,audio})
-Set generic or private option for audio stream.
-Private option must be prefixed with codec name or codec must be defined before.
-
-@item AVPresetAudio @var{preset} (@emph{encoding,audio})
-Set preset for audio stream.
-
-@item VideoCodec @var{codec_name} (@emph{encoding,video})
-Set video codec.
-
-@item VideoBitRate @var{n} (@emph{encoding,video})
-Set bitrate for the video stream in kbits per second.
-
-@item VideoBitRateRange @var{range} (@emph{encoding,video})
-Set video bitrate range.
-
-A range must be specified in the form @var{minrate}-@var{maxrate}, and
-specifies the @option{minrate} and @option{maxrate} encoding options
-expressed in kbits per second.
-
-@item VideoBitRateRangeTolerance @var{n} (@emph{encoding,video})
-Set video bitrate tolerance in kbits per second.
-
-@item PixelFormat @var{pixel_format} (@emph{encoding,video})
-Set video pixel format.
-
-@item Debug @var{integer} (@emph{encoding,video})
-Set video @option{debug} encoding option.
-
-@item Strict @var{integer} (@emph{encoding,video})
-Set video @option{strict} encoding option.
-
-@item VideoBufferSize @var{n} (@emph{encoding,video})
-Set ratecontrol buffer size, expressed in KB.
-
-@item VideoFrameRate @var{n} (@emph{encoding,video})
-Set number of video frames per second.
-
-@item VideoSize (@emph{encoding,video})
-Set size of the video frame, must be an abbreviation or in the form
-@var{W}x@var{H}.  See @ref{video size syntax,,the Video size section
-in the ffmpeg-utils(1) manual,ffmpeg-utils}.
-
-Default value is @code{160x128}.
-
-@item VideoIntraOnly (@emph{encoding,video})
-Transmit only intra frames (useful for low bitrates, but kills frame rate).
-
-@item VideoGopSize @var{n} (@emph{encoding,video})
-If non-intra only, an intra frame is transmitted every VideoGopSize
-frames. Video synchronization can only begin at an intra frame.
-
-@item VideoTag @var{tag} (@emph{encoding,video})
-Set video tag.
-
-@item VideoHighQuality (@emph{encoding,video})
-@item Video4MotionVector (@emph{encoding,video})
-
-@item BitExact (@emph{encoding,video})
-Set bitexact encoding flag.
-
-@item IdctSimple (@emph{encoding,video})
-Set simple IDCT algorithm.
-
-@item Qscale @var{n} (@emph{encoding,video})
-Enable constant quality encoding, and set video qscale (quantization
-scale) value, expressed in @var{n} QP units.
-
-@item VideoQMin @var{n} (@emph{encoding,video})
-@item VideoQMax @var{n} (@emph{encoding,video})
-Set video qmin/qmax.
-
-@item VideoQDiff @var{integer} (@emph{encoding,video})
-Set video @option{qdiff} encoding option.
-
-@item LumiMask @var{float} (@emph{encoding,video})
-@item DarkMask @var{float} (@emph{encoding,video})
-Set @option{lumi_mask}/@option{dark_mask} encoding options.
-
-@item AVOptionVideo [@var{codec}:]@var{option} @var{value} (@emph{encoding,video})
-Set generic or private option for video stream.
-Private option must be prefixed with codec name or codec must be defined before.
-
-@item AVPresetVideo @var{preset} (@emph{encoding,video})
-Set preset for video stream.
-
-@var{preset} must be the path of a preset file.
-@end table
-
-@subsection Server status stream
-
-A server status stream is a special stream which is used to show
-statistics about the @command{ffserver} operations.
-
-It must be specified setting the option @option{Format} to
-@samp{status}.
-
-@section Redirect section
-
-A redirect section specifies where to redirect the requested URL to
-another page.
-
-A redirect section must be introduced by the line:
-@example
-<Redirect NAME>
-@end example
-
-where @var{NAME} is the name of the page which should be redirected.
-
-It only accepts the option @option{URL}, which specify the redirection
-URL.
-
-@chapter Stream examples
-
-@itemize
-@item
-Multipart JPEG
-@example
-<Stream test.mjpg>
-Feed feed1.ffm
-Format mpjpeg
-VideoFrameRate 2
-VideoIntraOnly
-NoAudio
-Strict -1
-</Stream>
-@end example
-
-@item
-Single JPEG
-@example
-<Stream test.jpg>
-Feed feed1.ffm
-Format jpeg
-VideoFrameRate 2
-VideoIntraOnly
-VideoSize 352x240
-NoAudio
-Strict -1
-</Stream>
-@end example
-
-@item
-Flash
-@example
-<Stream test.swf>
-Feed feed1.ffm
-Format swf
-VideoFrameRate 2
-VideoIntraOnly
-NoAudio
-</Stream>
-@end example
-
-@item
-ASF compatible
-@example
-<Stream test.asf>
-Feed feed1.ffm
-Format asf
-VideoFrameRate 15
-VideoSize 352x240
-VideoBitRate 256
-VideoBufferSize 40
-VideoGopSize 30
-AudioBitRate 64
-StartSendOnKey
-</Stream>
-@end example
-
-@item
-MP3 audio
-@example
-<Stream test.mp3>
-Feed feed1.ffm
-Format mp2
-AudioCodec mp3
-AudioBitRate 64
-AudioChannels 1
-AudioSampleRate 44100
-NoVideo
-</Stream>
-@end example
-
-@item
-Ogg Vorbis audio
-@example
-<Stream test.ogg>
-Feed feed1.ffm
-Metadata title "Stream title"
-AudioBitRate 64
-AudioChannels 2
-AudioSampleRate 44100
-NoVideo
-</Stream>
-@end example
-
-@item
-Real with audio only at 32 kbits
-@example
-<Stream test.ra>
-Feed feed1.ffm
-Format rm
-AudioBitRate 32
-NoVideo
-</Stream>
-@end example
-
-@item
-Real with audio and video at 64 kbits
-@example
-<Stream test.rm>
-Feed feed1.ffm
-Format rm
-AudioBitRate 32
-VideoBitRate 128
-VideoFrameRate 25
-VideoGopSize 25
-</Stream>
-@end example
-
-@item
-For stream coming from a file: you only need to set the input filename
-and optionally a new format.
-
-@example
-<Stream file.rm>
-File "/usr/local/httpd/htdocs/tlive.rm"
-NoAudio
-</Stream>
-@end example
-
-@example
-<Stream file.asf>
-File "/usr/local/httpd/htdocs/test.asf"
-NoAudio
-Metadata author "Me"
-Metadata copyright "Super MegaCorp"
-Metadata title "Test stream from disk"
-Metadata comment "Test comment"
-</Stream>
-@end example
-@end itemize
-
-@c man end
-
-@include config.texi
-@ifset config-all
-@ifset config-avutil
-@include utils.texi
-@end ifset
-@ifset config-avcodec
-@include codecs.texi
-@include bitstream_filters.texi
-@end ifset
-@ifset config-avformat
-@include formats.texi
-@include protocols.texi
-@end ifset
-@ifset config-avdevice
-@include devices.texi
-@end ifset
-@ifset config-swresample
-@include resampler.texi
-@end ifset
-@ifset config-swscale
-@include scaler.texi
-@end ifset
-@ifset config-avfilter
-@include filters.texi
-@end ifset
-@end ifset
-
-@chapter See Also
-
-@ifhtml
-@ifset config-all
-@url{ffserver.html,ffserver},
-@end ifset
-@ifset config-not-all
-@url{ffserver-all.html,ffserver-all},
-@end ifset
-the @file{doc/ffserver.conf} example,
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
-@url{ffmpeg-utils.html,ffmpeg-utils},
-@url{ffmpeg-scaler.html,ffmpeg-scaler},
-@url{ffmpeg-resampler.html,ffmpeg-resampler},
-@url{ffmpeg-codecs.html,ffmpeg-codecs},
-@url{ffmpeg-bitstream-filters.html,ffmpeg-bitstream-filters},
-@url{ffmpeg-formats.html,ffmpeg-formats},
-@url{ffmpeg-devices.html,ffmpeg-devices},
-@url{ffmpeg-protocols.html,ffmpeg-protocols},
-@url{ffmpeg-filters.html,ffmpeg-filters}
-@end ifhtml
-
-@ifnothtml
-@ifset config-all
-ffserver(1),
-@end ifset
-@ifset config-not-all
-ffserver-all(1),
-@end ifset
-the @file{doc/ffserver.conf} example, ffmpeg(1), ffplay(1), ffprobe(1),
-ffmpeg-utils(1), ffmpeg-scaler(1), ffmpeg-resampler(1),
-ffmpeg-codecs(1), ffmpeg-bitstream-filters(1), ffmpeg-formats(1),
-ffmpeg-devices(1), ffmpeg-protocols(1), ffmpeg-filters(1)
-@end ifnothtml
-
-@include authors.texi
-
-@ignore
-
-@setfilename ffserver
-@settitle ffserver video server
-
-@end ignore
-
-@bye

diff --git a/doc/fftools-common-opts.texi b/doc/fftools-common-opts.texi
index 2eff33a..84705c0 100644
--- a/doc/fftools-common-opts.texi
+++ b/doc/fftools-common-opts.texi

@@ -42,10 +42,20 @@
 thumbnails or cover arts.  If @var{stream_index} is given, then it matches
 stream number @var{stream_index} of this type. Otherwise, it matches all
 streams of this type.
-@item p:@var{program_id}[:@var{stream_index}]
-If @var{stream_index} is given, then it matches the stream with number @var{stream_index}
+@item p:@var{program_id}[:@var{stream_index}] or p:@var{program_id}[:@var{stream_type}[:@var{stream_index}]] or
+p:@var{program_id}:m:@var{key}[:@var{value}]
+In first version, if @var{stream_index} is given, then it matches the stream with number @var{stream_index}
 in the program with the id @var{program_id}. Otherwise, it matches all streams in the
-program.
+program. In the second version, @var{stream_type} is one of following: 'v' for video, 'a' for audio, 's'
+for subtitle, 'd' for data. If @var{stream_index} is also given, then it matches
+stream number @var{stream_index} of this type in the program with the id @var{program_id}.
+Otherwise, if only @var{stream_type} is given, it matches all
+streams of this type in the program with the id @var{program_id}.
+In the third version matches streams in the program with the id @var{program_id} with the metadata
+tag @var{key} having the specified value. If
+@var{value} is not given, matches streams that contain the given tag with any
+value.
+
 @item #@var{stream_id} or i:@var{stream_id}
 Match the stream by stream id (e.g. PID in MPEG-TS container).
 @item m:@var{key}[:@var{value}]
@@ -168,14 +178,24 @@
 ffmpeg -sinks pulse,server=192.168.0.4
 @end example
 
-@item -loglevel [repeat+]@var{loglevel} | -v [repeat+]@var{loglevel}
-Set the logging level used by the library.
-Adding "repeat+" indicates that repeated log output should not be compressed
-to the first line and the "Last message repeated n times" line will be
-omitted. "repeat" can also be used alone.
-If "repeat" is used alone, and with no prior loglevel set, the default
-loglevel will be used. If multiple loglevel parameters are given, using
-'repeat' will not change the loglevel.
+@item -loglevel [@var{flags}+]@var{loglevel} | -v [@var{flags}+]@var{loglevel}
+Set logging level and flags used by the library.
+
+The optional @var{flags} prefix can consist of the following values:
+@table @samp
+@item repeat
+Indicates that repeated log output should not be compressed to the first line
+and the "Last message repeated n times" line will be omitted.
+@item level
+Indicates that log output should add a @code{[level]} prefix to each message
+line. This can be used as an alternative to log coloring, e.g. when dumping the
+log to file.
+@end table
+Flags can also be used alone by adding a '+'/'-' prefix to set/reset a single
+flag without affecting other @var{flags} or changing @var{loglevel}. When
+setting both @var{flags} and @var{loglevel}, a '+' separator is expected
+between the last @var{flags} value and before @var{loglevel}.
+
 @var{loglevel} is a string or a number containing one of the following values:
 @table @samp
 @item quiet, -8
@@ -201,6 +221,17 @@
 @item trace, 56
 @end table
 
+For example to enable repeated log output, add the @code{level} prefix, and set
+@var{loglevel} to @code{verbose}:
+@example
+ffmpeg -loglevel repeat+level+verbose -i input output
+@end example
+Another example that enables repeated log output without affecting current
+state of @code{level} prefix flag or @var{loglevel}:
+@example
+ffmpeg [...] -loglevel +repeat
+@end example
+
 By default the program logs to stderr. If coloring is supported by the
 terminal, colors are used to mark errors and warnings. Log coloring
 can be disabled setting the environment variable
@@ -315,51 +346,6 @@
 @item k8
 @end table
 @end table
-
-@item -opencl_bench
-This option is used to benchmark all available OpenCL devices and print the
-results. This option is only available when FFmpeg has been compiled with
-@code{--enable-opencl}.
-
-When FFmpeg is configured with @code{--enable-opencl}, the options for the
-global OpenCL context are set via @option{-opencl_options}. See the
-"OpenCL Options" section in the ffmpeg-utils manual for the complete list of
-supported options. Amongst others, these options include the ability to select
-a specific platform and device to run the OpenCL code on. By default, FFmpeg
-will run on the first device of the first platform. While the options for the
-global OpenCL context provide flexibility to the user in selecting the OpenCL
-device of their choice, most users would probably want to select the fastest
-OpenCL device for their system.
-
-This option assists the selection of the most efficient configuration by
-identifying the appropriate device for the user's system. The built-in
-benchmark is run on all the OpenCL devices and the performance is measured for
-each device. The devices in the results list are sorted based on their
-performance with the fastest device listed first. The user can subsequently
-invoke @command{ffmpeg} using the device deemed most appropriate via
-@option{-opencl_options} to obtain the best performance for the OpenCL
-accelerated code.
-
-Typical usage to use the fastest OpenCL device involve the following steps.
-
-Run the command:
-@example
-ffmpeg -opencl_bench
-@end example
-Note down the platform ID (@var{pidx}) and device ID (@var{didx}) of the first
-i.e. fastest device in the list.
-Select the platform and device using the command:
-@example
-ffmpeg -opencl_options platform_idx=@var{pidx}:device_idx=@var{didx} ...
-@end example
-
-@item -opencl_options options (@emph{global})
-Set OpenCL environment options. This option is only available when
-FFmpeg has been compiled with @code{--enable-opencl}.
-
-@var{options} must be a list of @var{key}=@var{value} option pairs
-separated by ':'. See the ``OpenCL Options'' section in the
-ffmpeg-utils manual for the list of supported options.
 @end table
 
 @section AVOptions

diff --git a/doc/filters.texi b/doc/filters.texi
index e26dde4..cadf78c 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi

@@ -221,6 +221,7 @@
 @var{FILTERGRAPH}      ::= [sws_flags=@var{flags};] @var{FILTERCHAIN} [;@var{FILTERGRAPH}]
 @end example
 
+@anchor{filtergraph escaping}
 @section Notes on filtergraph escaping
 
 Filtergraph description composition entails several levels of
@@ -429,6 +430,16 @@
 Range is between 0 and 1.
 @end table
 
+@section acontrast
+Simple audio dynamic range commpression/expansion filter.
+
+The filter accepts the following options:
+
+@table @option
+@item contrast
+Set contrast. Default is 33. Allowed range is between 0 and 100.
+@end table
+
 @section acopy
 
 Copy the input audio source unchanged to the output. This is mainly useful for
@@ -482,6 +493,23 @@
 @end example
 @end itemize
 
+@section acrossover
+Split audio stream into several bands.
+
+This filter splits audio stream into two or more frequency ranges.
+Summing all streams back will give flat output.
+
+The filter accepts the following options:
+
+@table @option
+@item split
+Set split frequencies. Those must be positive and increasing.
+
+@item order
+Set filter order, can be @var{2nd}, @var{4th} or @var{8th}.
+Default is @var{4th}.
+@end table
+
 @section acrusher
 
 Reduce audio bit resolution.
@@ -499,7 +527,7 @@
 Another feature of this filter is the logarithmic mode.
 This setting switches from linear distances between bits to logarithmic ones.
 The result is a much more "natural" sounding crusher which doesn't gate low
-signals for example. The human ear has a logarithmic perception, too
+signals for example. The human ear has a logarithmic perception,
 so this kind of crushing is much more pleasant.
 Logarithmic crushing is also able to get anti-aliased.
 
@@ -540,6 +568,107 @@
 Set LFO rate.
 @end table
 
+@section acue
+
+Delay audio filtering until a given wallclock timestamp. See the @ref{cue}
+filter.
+
+@section adeclick
+Remove impulsive noise from input audio.
+
+Samples detected as impulsive noise are replaced by interpolated samples using
+autoregressive modelling.
+
+@table @option
+@item w
+Set window size, in milliseconds. Allowed range is from @code{10} to
+@code{100}. Default value is @code{55} milliseconds.
+This sets size of window which will be processed at once.
+
+@item o
+Set window overlap, in percentage of window size. Allowed range is from
+@code{50} to @code{95}. Default value is @code{75} percent.
+Setting this to a very high value increases impulsive noise removal but makes
+whole process much slower.
+
+@item a
+Set autoregression order, in percentage of window size. Allowed range is from
+@code{0} to @code{25}. Default value is @code{2} percent. This option also
+controls quality of interpolated samples using neighbour good samples.
+
+@item t
+Set threshold value. Allowed range is from @code{1} to @code{100}.
+Default value is @code{2}.
+This controls the strength of impulsive noise which is going to be removed.
+The lower value, the more samples will be detected as impulsive noise.
+
+@item b
+Set burst fusion, in percentage of window size. Allowed range is @code{0} to
+@code{10}. Default value is @code{2}.
+If any two samples deteced as noise are spaced less than this value then any
+sample inbetween those two samples will be also detected as noise.
+
+@item m
+Set overlap method.
+
+It accepts the following values:
+@table @option
+@item a
+Select overlap-add method. Even not interpolated samples are slightly
+changed with this method.
+
+@item s
+Select overlap-save method. Not interpolated samples remain unchanged.
+@end table
+
+Default value is @code{a}.
+@end table
+
+@section adeclip
+Remove clipped samples from input audio.
+
+Samples detected as clipped are replaced by interpolated samples using
+autoregressive modelling.
+
+@table @option
+@item w
+Set window size, in milliseconds. Allowed range is from @code{10} to @code{100}.
+Default value is @code{55} milliseconds.
+This sets size of window which will be processed at once.
+
+@item o
+Set window overlap, in percentage of window size. Allowed range is from @code{50}
+to @code{95}. Default value is @code{75} percent.
+
+@item a
+Set autoregression order, in percentage of window size. Allowed range is from
+@code{0} to @code{25}. Default value is @code{8} percent. This option also controls
+quality of interpolated samples using neighbour good samples.
+
+@item t
+Set threshold value. Allowed range is from @code{1} to @code{100}.
+Default value is @code{10}. Higher values make clip detection less aggressive.
+
+@item n
+Set size of histogram used to detect clips. Allowed range is from @code{100} to @code{9999}.
+Default value is @code{1000}. Higher values make clip detection less aggressive.
+
+@item m
+Set overlap method.
+
+It accepts the following values:
+@table @option
+@item a
+Select overlap-add method. Even not interpolated samples are slightly changed
+with this method.
+
+@item s
+Select overlap-save method. Not interpolated samples remain unchanged.
+@end table
+
+Default value is @code{a}.
+@end table
+
 @section adelay
 
 Delay one or more audio channels.
@@ -574,6 +703,12 @@
 @end example
 @end itemize
 
+@section aderivative, aintegral
+
+Compute derivative/integral of audio stream.
+
+Applying both filters one after another produces original audio.
+
 @section aecho
 
 Apply echoing to the input audio.
@@ -820,6 +955,8 @@
 select double-exponential seat
 @item desi
 select double-exponential sigmoid
+@item losi
+select logistic sigmoid
 @end table
 @end table
 
@@ -839,6 +976,96 @@
 @end example
 @end itemize
 
+@section afftdn
+Denoise audio samples with FFT.
+
+A description of the accepted parameters follows.
+
+@table @option
+@item nr
+Set the noise reduction in dB, allowed range is 0.01 to 97.
+Default value is 12 dB.
+
+@item nf
+Set the noise floor in dB, allowed range is -80 to -20.
+Default value is -50 dB.
+
+@item nt
+Set the noise type.
+
+It accepts the following values:
+@table @option
+@item w
+Select white noise.
+
+@item v
+Select vinyl noise.
+
+@item s
+Select shellac noise.
+
+@item c
+Select custom noise, defined in @code{bn} option.
+
+Default value is white noise.
+@end table
+
+@item bn
+Set custom band noise for every one of 15 bands.
+Bands are separated by ' ' or '|'.
+
+@item rf
+Set the residual floor in dB, allowed range is -80 to -20.
+Default value is -38 dB.
+
+@item tn
+Enable noise tracking. By default is disabled.
+With this enabled, noise floor is automatically adjusted.
+
+@item tr
+Enable residual tracking. By default is disabled.
+
+@item om
+Set the output mode.
+
+It accepts the following values:
+@table @option
+@item i
+Pass input unchanged.
+
+@item o
+Pass noise filtered out.
+
+@item n
+Pass only noise.
+
+Default value is @var{o}.
+@end table
+@end table
+
+@subsection Commands
+
+This filter supports the following commands:
+@table @option
+@item sample_noise, sn
+Start or stop measuring noise profile.
+Syntax for the command is : "start" or "stop" string.
+After measuring noise profile is stopped it will be
+automatically applied in filtering.
+
+@item noise_reduction, nr
+Change noise reduction. Argument is single float number.
+Syntax for the command is : "@var{noise_reduction}"
+
+@item noise_floor, nf
+Change noise floor. Argument is single float number.
+Syntax for the command is : "@var{noise_floor}"
+
+@item output_mode, om
+Change output mode operation.
+Syntax for the command is : "i", "o" or "n" string.
+@end table
+
 @section afftfilt
 Apply arbitrary expressions to samples in frequency domain.
 
@@ -916,12 +1143,13 @@
 @end example
 @end itemize
 
+@anchor{afir}
 @section afir
 
 Apply an arbitrary Frequency Impulse Response filter.
 
 This filter is designed for applying long FIR filters,
-up to 30 seconds long.
+up to 60 seconds long.
 
 It can be used as component for digital crossover filters,
 room equalization, cross talk cancellation, wavefield synthesis,
@@ -945,8 +1173,47 @@
 @item length
 Set Impulse Response filter length. Default is 1, which means whole IR is processed.
 
-@item again
+@item gtype
 Enable applying gain measured from power of IR.
+
+Set which approach to use for auto gain measurement.
+
+@table @option
+@item none
+Do not apply any gain.
+
+@item peak
+select peak gain, very conservative approach. This is default value.
+
+@item dc
+select DC gain, limited application.
+
+@item gn
+select gain to noise approach, this is most popular one.
+@end table
+
+@item irgain
+Set gain to be applied to IR coefficients before filtering.
+Allowed range is 0 to 1. This gain is applied after any gain applied with @var{gtype} option.
+
+@item irfmt
+Set format of IR stream. Can be @code{mono} or @code{input}.
+Default is @code{input}.
+
+@item maxir
+Set max allowed Impulse Response filter duration in seconds. Default is 30 seconds.
+Allowed range is 0.1 to 60 seconds.
+
+@item response
+Show IR frequency reponse, magnitude and phase in additional video stream.
+By default it is disabled.
+
+@item channel
+Set for which IR channel to display frequency response. By default is first channel
+displayed. This option is used only when @var{response} is enabled.
+
+@item size
+Set video stream size. This option is used only when @var{response} is enabled.
 @end table
 
 @subsection Examples
@@ -1049,6 +1316,99 @@
 Default is @code{average}. Can be @code{average} or @code{maximum}.
 @end table
 
+@section aiir
+
+Apply an arbitrary Infinite Impulse Response filter.
+
+It accepts the following parameters:
+
+@table @option
+@item z
+Set numerator/zeros coefficients.
+
+@item p
+Set denominator/poles coefficients.
+
+@item k
+Set channels gains.
+
+@item dry_gain
+Set input gain.
+
+@item wet_gain
+Set output gain.
+
+@item f
+Set coefficients format.
+
+@table @samp
+@item tf
+transfer function
+@item zp
+Z-plane zeros/poles, cartesian (default)
+@item pr
+Z-plane zeros/poles, polar radians
+@item pd
+Z-plane zeros/poles, polar degrees
+@end table
+
+@item r
+Set kind of processing.
+Can be @code{d} - direct or @code{s} - serial cascading. Defauls is @code{s}.
+
+@item e
+Set filtering precision.
+
+@table @samp
+@item dbl
+double-precision floating-point (default)
+@item flt
+single-precision floating-point
+@item i32
+32-bit integers
+@item i16
+16-bit integers
+@end table
+
+@item response
+Show IR frequency reponse, magnitude and phase in additional video stream.
+By default it is disabled.
+
+@item channel
+Set for which IR channel to display frequency response. By default is first channel
+displayed. This option is used only when @var{response} is enabled.
+
+@item size
+Set video stream size. This option is used only when @var{response} is enabled.
+@end table
+
+Coefficients in @code{tf} format are separated by spaces and are in ascending
+order.
+
+Coefficients in @code{zp} format are separated by spaces and order of coefficients
+doesn't matter. Coefficients in @code{zp} format are complex numbers with @var{i}
+imaginary unit.
+
+Different coefficients and gains can be provided for every channel, in such case
+use '|' to separate coefficients or gains. Last provided coefficients will be
+used for all remaining channels.
+
+@subsection Examples
+
+@itemize
+@item
+Apply 2 pole elliptic notch at arround 5000Hz for 48000 Hz sample rate:
+@example
+aiir=k=1:z=7.957584807809675810E-1 -2.575128568908332300 3.674839853930788710 -2.57512875289799137 7.957586296317130880E-1:p=1 -2.86950072432325953 3.63022088054647218 -2.28075678147272232 6.361362326477423500E-1:f=tf:r=d
+@end example
+
+@item
+Same as above but in @code{zp} format:
+@example
+aiir=k=0.79575848078096756:z=0.80918701+0.58773007i 0.80918701-0.58773007i 0.80884700+0.58784055i 0.80884700-0.58784055i:p=0.63892345+0.59951235i 0.63892345-0.59951235i 0.79582691+0.44198673i 0.79582691-0.44198673i:f=zp:r=s
+@end example
+@end itemize
+
 @section alimiter
 
 The limiter prevents an input signal from rising over a desired threshold.
@@ -1117,6 +1477,8 @@
 octave
 @item s
 slope
+@item k
+kHz
 @end table
 
 @item width, w
@@ -1126,6 +1488,23 @@
 Specify which channels to filter, by default all available are filtered.
 @end table
 
+@subsection Commands
+
+This filter supports the following commands:
+@table @option
+@item frequency, f
+Change allpass frequency.
+Syntax for the command is : "@var{frequency}"
+
+@item width_type, t
+Change allpass width_type.
+Syntax for the command is : "@var{width_type}"
+
+@item width, w
+Change allpass width.
+Syntax for the command is : "@var{width}"
+@end table
+
 @section aloop
 
 Loop audio samples.
@@ -1134,13 +1513,14 @@
 
 @table @option
 @item loop
-Set the number of loops.
+Set the number of loops. Setting this value to -1 will result in infinite loops.
+Default is 0.
 
 @item size
-Set maximal number of samples.
+Set maximal number of samples. Default is 0.
 
 @item start
-Set first sample of loop.
+Set first sample of loop. Default is 0.
 @end table
 
 @anchor{amerge}
@@ -1236,8 +1616,20 @@
 The transition time, in seconds, for volume renormalization when an input
 stream ends. The default value is 2 seconds.
 
+@item weights
+Specify weight of each input audio stream as sequence.
+Each weight is separated by space. By default all inputs have same weight.
 @end table
 
+@section amultiply
+
+Multiply first audio stream with second audio stream and store result
+in output audio stream. Multiplication is done by multiplying each
+sample from first stream with sample at same position from second stream.
+
+With this element-wise multiplication one can create amplitude fades and
+amplitude modulations.
+
 @section anequalizer
 
 High-order parametric multiband equalizer for each channel.
@@ -1497,7 +1889,7 @@
 [@var{sample_rate}:]@var{resampler_options}, where @var{sample_rate}
 expresses a sample rate and @var{resampler_options} is a list of
 @var{key}=@var{value} pairs, separated by ":". See the
-@ref{Resampler Options,,the "Resampler Options" section in the
+@ref{Resampler Options,,"Resampler Options" section in the
 ffmpeg-resampler(1) manual,ffmpeg-resampler}
 for the complete list of supported options.
 
@@ -1632,7 +2024,7 @@
 @table @option
 @item length
 Short window length in seconds, used for peak and trough RMS measurement.
-Default is @code{0.05} (50 milliseconds). Allowed range is @code{[0.1 - 10]}.
+Default is @code{0.05} (50 milliseconds). Allowed range is @code{[0.01 - 10]}.
 
 @item metadata
 
@@ -1656,6 +2048,8 @@
 Peak_count
 Bit_depth
 Dynamic_range
+Zero_crossings
+Zero_crossings_rate
 
 and for Overall:
 DC_offset
@@ -1733,6 +2127,12 @@
 
 @item Dynamic range
 Measured dynamic range of audio in dB.
+
+@item Zero crossings
+Number of points where the waveform crosses the zero level axis.
+
+@item Zero crossings rate
+Rate of Zero crossings and number of audio samples.
 @end table
 
 @section atempo
@@ -1741,7 +2141,12 @@
 
 The filter accepts exactly one parameter, the audio tempo. If not
 specified then the filter will assume nominal 1.0 tempo. Tempo must
-be in the [0.5, 2.0] range.
+be in the [0.5, 100.0] range.
+
+Note that tempo greater than 2 will skip some samples rather than
+blend them in.  If for any reason this is a concern it is always
+possible to daisy-chain several instances of atempo to achieve the
+desired product tempo.
 
 @subsection Examples
 
@@ -1753,9 +2158,15 @@
 @end example
 
 @item
-To speed up audio to 125% tempo:
+To speed up audio to 300% tempo:
 @example
-atempo=1.25
+atempo=3
+@end example
+
+@item
+To speed up audio to 300% tempo by daisy-chaining two atempo instances:
+@example
+atempo=sqrt(3),atempo=sqrt(3)
 @end example
 @end itemize
 
@@ -1856,6 +2267,8 @@
 octave
 @item s
 slope
+@item k
+kHz
 @end table
 
 @item width, w
@@ -1865,6 +2278,23 @@
 Specify which channels to filter, by default all available are filtered.
 @end table
 
+@subsection Commands
+
+This filter supports the following commands:
+@table @option
+@item frequency, f
+Change bandpass frequency.
+Syntax for the command is : "@var{frequency}"
+
+@item width_type, t
+Change bandpass width_type.
+Syntax for the command is : "@var{width_type}"
+
+@item width, w
+Change bandpass width.
+Syntax for the command is : "@var{width}"
+@end table
+
 @section bandreject
 
 Apply a two-pole Butterworth band-reject filter with central
@@ -1888,6 +2318,8 @@
 octave
 @item s
 slope
+@item k
+kHz
 @end table
 
 @item width, w
@@ -1897,7 +2329,24 @@
 Specify which channels to filter, by default all available are filtered.
 @end table
 
-@section bass
+@subsection Commands
+
+This filter supports the following commands:
+@table @option
+@item frequency, f
+Change bandreject frequency.
+Syntax for the command is : "@var{frequency}"
+
+@item width_type, t
+Change bandreject width_type.
+Syntax for the command is : "@var{width_type}"
+
+@item width, w
+Change bandreject width.
+Syntax for the command is : "@var{width}"
+@end table
+
+@section bass, lowshelf
 
 Boost or cut the bass (lower) frequencies of the audio using a two-pole
 shelving filter with a response similar to that of a standard
@@ -1927,6 +2376,8 @@
 octave
 @item s
 slope
+@item k
+kHz
 @end table
 
 @item width, w
@@ -1936,6 +2387,27 @@
 Specify which channels to filter, by default all available are filtered.
 @end table
 
+@subsection Commands
+
+This filter supports the following commands:
+@table @option
+@item frequency, f
+Change bass frequency.
+Syntax for the command is : "@var{frequency}"
+
+@item width_type, t
+Change bass width_type.
+Syntax for the command is : "@var{width_type}"
+
+@item width, w
+Change bass width.
+Syntax for the command is : "@var{width}"
+
+@item gain, g
+Change bass gain.
+Syntax for the command is : "@var{gain}"
+@end table
+
 @section biquad
 
 Apply a biquad IIR filter with the given coefficients.
@@ -1944,6 +2416,20 @@
 and @var{channels}, @var{c} specify which channels to filter, by default all
 available are filtered.
 
+@subsection Commands
+
+This filter supports the following commands:
+@table @option
+@item a0
+@item a1
+@item a2
+@item b0
+@item b1
+@item b2
+Change biquad parameter.
+Syntax for the command is : "@var{value}"
+@end table
+
 @section bs2b
 Bauer stereo to binaural transformation, which improves headphone listening of
 stereo audio records.
@@ -1999,6 +2485,10 @@
 If no mapping is present, the filter will implicitly map input channels to
 output channels, preserving indices.
 
+@subsection Examples
+
+@itemize
+@item
 For example, assuming a 5.1+downmix input MOV file,
 @example
 ffmpeg -i in.mov -filter 'channelmap=map=DL-FL|DR-FR' out.wav
@@ -2006,10 +2496,12 @@
 will create an output WAV file tagged as stereo from the downmix channels of
 the input.
 
+@item
 To fix a 5.1 WAV improperly encoded in AAC's native channel order
 @example
 ffmpeg -i in.wav -filter 'channelmap=1|2|0|5|3|4:5.1' out.wav
 @end example
+@end itemize
 
 @section channelsplit
 
@@ -2019,8 +2511,17 @@
 @table @option
 @item channel_layout
 The channel layout of the input stream. The default is "stereo".
+@item channels
+A channel layout describing the channels to be extracted as separate output streams
+or "all" to extract each input channel as a separate stream. The default is "all".
+
+Choosing channels not present in channel layout in the input will result in an error.
 @end table
 
+@subsection Examples
+
+@itemize
+@item
 For example, assuming a stereo input MP3 file,
 @example
 ffmpeg -i in.mp3 -filter_complex channelsplit out.mkv
@@ -2028,6 +2529,7 @@
 will create an output Matroska file with two audio streams, one containing only
 the left channel and the other the right channel.
 
+@item
 Split a 5.1 WAV file into per-channel files:
 @example
 ffmpeg -i in.wav -filter_complex
@@ -2037,6 +2539,14 @@
 side_right.wav
 @end example
 
+@item
+Extract only LFE from a 5.1 WAV file:
+@example
+ffmpeg -i in.wav -filter_complex 'channelsplit=channel_layout=5.1:channels=LFE[LFE]'
+-map '[LFE]' lfe.wav
+@end example
+@end itemize
+
 @section chorus
 Add a chorus effect to the audio.
 
@@ -2349,6 +2859,21 @@
 used to prevent clipping.
 @end table
 
+@section drmeter
+Measure audio dynamic range.
+
+DR values of 14 and higher is found in very dynamic material. DR of 8 to 13
+is found in transition material. And anything less that 8 have very poor dynamics
+and is very compressed.
+
+The filter accepts the following options:
+
+@table @option
+@item length
+Set window length in seconds used to split audio into segments of equal length.
+Default is 3 seconds.
+@end table
+
 @section dynaudnorm
 Dynamic Audio Normalizer.
 
@@ -2545,6 +3070,8 @@
 octave
 @item s
 slope
+@item k
+kHz
 @end table
 
 @item width, w
@@ -2573,6 +3100,27 @@
 @end example
 @end itemize
 
+@subsection Commands
+
+This filter supports the following commands:
+@table @option
+@item frequency, f
+Change equalizer frequency.
+Syntax for the command is : "@var{frequency}"
+
+@item width_type, t
+Change equalizer width_type.
+Syntax for the command is : "@var{width_type}"
+
+@item width, w
+Change equalizer width.
+Syntax for the command is : "@var{width}"
+
+@item gain, g
+Change equalizer gain.
+Syntax for the command is : "@var{gain}"
+@end table
+
 @section extrastereo
 
 Linearly increases the difference between left and right channels which
@@ -2920,6 +3468,21 @@
 
 @item lfe
 Set custom gain for LFE channels. Value is in dB. Default is 0.
+
+@item size
+Set size of frame in number of samples which will be processed at once.
+Default value is @var{1024}. Allowed range is from 1024 to 96000.
+
+@item hrir
+Set format of hrir stream.
+Default value is @var{stereo}. Alternative value is @var{multich}.
+If value is set to @var{stereo}, number of additional streams should
+be greater or equal to number of input channels in first input stream.
+Also each additional stream should have stereo number of channels.
+If value is set to @var{multich}, number of additional streams should
+be exactly one. Also number of input channels of additional stream
+should be equal or greater than twice number of channels of first input
+stream.
 @end table
 
 @subsection Examples
@@ -2933,6 +3496,14 @@
 ffmpeg -i input.wav -lavfi-complex "amovie=azi_270_ele_0_DFC.wav[sr],amovie=azi_90_ele_0_DFC.wav[sl],amovie=azi_225_ele_0_DFC.wav[br],amovie=azi_135_ele_0_DFC.wav[bl],amovie=azi_0_ele_0_DFC.wav,asplit[fc][lfe],amovie=azi_35_ele_0_DFC.wav[fl],amovie=azi_325_ele_0_DFC.wav[fr],[a:0][fl][fr][fc][lfe][bl][br][sl][sr]headphone=FL|FR|FC|LFE|BL|BR|SL|SR"
 output.wav
 @end example
+
+@item
+Full example using wav files as coefficients with amovie filters for 7.1 downmix,
+but now in @var{multich} @var{hrir} format.
+@example
+ffmpeg -i input.wav -lavfi-complex "amovie=minp.wav[hrirs],[a:0][hrirs]headphone=map=FL|FR|FC|LFE|BL|BR|SL|SR:hrir=multich"
+output.wav
+@end example
 @end itemize
 
 @section highpass
@@ -2961,6 +3532,8 @@
 octave
 @item s
 slope
+@item k
+kHz
 @end table
 
 @item width, w
@@ -2972,6 +3545,23 @@
 Specify which channels to filter, by default all available are filtered.
 @end table
 
+@subsection Commands
+
+This filter supports the following commands:
+@table @option
+@item frequency, f
+Change highpass frequency.
+Syntax for the command is : "@var{frequency}"
+
+@item width_type, t
+Change highpass width_type.
+Syntax for the command is : "@var{width_type}"
+
+@item width, w
+Change highpass width.
+Syntax for the command is : "@var{width}"
+@end table
+
 @section join
 
 Join multiple input streams into one multi-channel stream.
@@ -3250,6 +3840,8 @@
 octave
 @item s
 slope
+@item k
+kHz
 @end table
 
 @item width, w
@@ -3270,6 +3862,98 @@
 @end example
 @end itemize
 
+@subsection Commands
+
+This filter supports the following commands:
+@table @option
+@item frequency, f
+Change lowpass frequency.
+Syntax for the command is : "@var{frequency}"
+
+@item width_type, t
+Change lowpass width_type.
+Syntax for the command is : "@var{width_type}"
+
+@item width, w
+Change lowpass width.
+Syntax for the command is : "@var{width}"
+@end table
+
+@section lv2
+
+Load a LV2 (LADSPA Version 2) plugin.
+
+To enable compilation of this filter you need to configure FFmpeg with
+@code{--enable-lv2}.
+
+@table @option
+@item plugin, p
+Specifies the plugin URI. You may need to escape ':'.
+
+@item controls, c
+Set the '|' separated list of controls which are zero or more floating point
+values that determine the behavior of the loaded plugin (for example delay,
+threshold or gain).
+If @option{controls} is set to @code{help}, all available controls and
+their valid ranges are printed.
+
+@item sample_rate, s
+Specify the sample rate, default to 44100. Only used if plugin have
+zero inputs.
+
+@item nb_samples, n
+Set the number of samples per channel per each output frame, default
+is 1024. Only used if plugin have zero inputs.
+
+@item duration, d
+Set the minimum duration of the sourced audio. See
+@ref{time duration syntax,,the Time duration section in the ffmpeg-utils(1) manual,ffmpeg-utils}
+for the accepted syntax.
+Note that the resulting duration may be greater than the specified duration,
+as the generated audio is always cut at the end of a complete frame.
+If not specified, or the expressed duration is negative, the audio is
+supposed to be generated forever.
+Only used if plugin have zero inputs.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Apply bass enhancer plugin from Calf:
+@example
+lv2=p=http\\\\://calf.sourceforge.net/plugins/BassEnhancer:c=amount=2
+@end example
+
+@item
+Apply vinyl plugin from Calf:
+@example
+lv2=p=http\\\\://calf.sourceforge.net/plugins/Vinyl:c=drone=0.2|aging=0.5
+@end example
+
+@item
+Apply bit crusher plugin from ArtyFX:
+@example
+lv2=p=http\\\\://www.openavproductions.com/artyfx#bitta:c=crush=0.3
+@end example
+@end itemize
+
+@section mcompand
+Multiband Compress or expand the audio's dynamic range.
+
+The input audio is divided into bands using 4th order Linkwitz-Riley IIRs.
+This is akin to the crossover of a loudspeaker, and results in flat frequency
+response when absent compander action.
+
+It accepts the following parameters:
+
+@table @option
+@item args
+This option syntax is:
+attack,decay,[attack,decay..] soft-knee points crossover_frequency [delay [initial_volume [gain]]] | attack,decay ...
+For explanation of each item refer to compand filter documentation.
+@end table
+
 @anchor{pan}
 @section pan
 
@@ -3375,6 +4059,9 @@
 @section rubberband
 Apply time-stretching and pitch-shifting with librubberband.
 
+To enable compilation of this filter, you need to configure FFmpeg with
+@code{--enable-librubberband}.
+
 The filter accepts the following options:
 
 @table @option
@@ -3600,12 +4287,15 @@
 The filter accepts the following options:
 
 @table @option
-@item duration, d
-Set silence duration until notification (default is 2 seconds).
-
 @item noise, n
 Set noise tolerance. Can be specified in dB (in case "dB" is appended to the
 specified value) or amplitude ratio. Default is -60dB, or 0.001.
+
+@item duration, d
+Set silence duration until notification (default is 2 seconds).
+
+@item mono, m
+Process each channel separately, instead of combined. By default is disabled.
 @end table
 
 @subsection Examples
@@ -3653,6 +4343,19 @@
 Can be specified in dB (in case "dB" is appended to the specified value)
 or amplitude ratio. Default value is @code{0}.
 
+@item start_silence
+Specify max duration of silence at beginning that will be kept after
+trimming. Default is 0, which is equal to trimming all samples detected
+as silence.
+
+@item start_mode
+Specify mode of detection of silence end in start of multi-channel audio.
+Can be @var{any} or @var{all}. Default is @var{any}.
+With @var{any}, any sample that is detected as non-silence will cause
+stopped trimming of silence.
+With @var{all}, only if all channels are detected as non-silence will cause
+stopped trimming of silence.
+
 @item stop_periods
 Set the count for trimming silence from the end of audio.
 To remove silence from the middle of a file, specify a @var{stop_periods}
@@ -3674,11 +4377,18 @@
 Can be specified in dB (in case "dB" is appended to the specified value)
 or amplitude ratio. Default value is @code{0}.
 
-@item leave_silence
-This indicates that @var{stop_duration} length of audio should be left intact
-at the beginning of each period of silence.
-For example, if you want to remove long pauses between words but do not want
-to remove the pauses completely. Default value is @code{0}.
+@item stop_silence
+Specify max duration of silence at end that will be kept after
+trimming. Default is 0, which is equal to trimming all samples detected
+as silence.
+
+@item stop_mode
+Specify mode of detection of silence start in end of multi-channel audio.
+Can be @var{any} or @var{all}. Default is @var{any}.
+With @var{any}, any sample that is detected as non-silence will cause
+stopped trimming of silence.
+With @var{all}, only if all channels are detected as non-silence will cause
+stopped trimming of silence.
 
 @item detection
 Set how is silence detected. Can be @code{rms} or @code{peak}. Second is faster
@@ -3686,7 +4396,8 @@
 Default value is @code{rms}.
 
 @item window
-Set ratio used to calculate size of window for detecting silence.
+Set duration in number of seconds used to calculate size of window in number
+of samples for detecting silence.
 Default value is @code{0.02}. Allowed range is from @code{0} to @code{10}.
 @end table
 
@@ -3698,14 +4409,14 @@
 that does not contain the delay at the start which usually occurs between
 pressing the record button and the start of the performance:
 @example
-silenceremove=1:5:0.02
+silenceremove=start_periods=1:start_duration=5:start_threshold=0.02
 @end example
 
 @item
 Trim all silence encountered from beginning to end where there is more than 1
 second of silence in audio:
 @example
-silenceremove=0:0:0:-1:1:-90dB
+silenceremove=stop_periods=-1:stop_duration=1:stop_threshold=-90dB
 @end example
 @end itemize
 
@@ -4037,7 +4748,7 @@
 Set LFE output volume. By default, this is @var{1}.
 @end table
 
-@section treble
+@section treble, highshelf
 
 Boost or cut treble (upper) frequencies of the audio using a two-pole
 shelving filter with a response similar to that of a standard
@@ -4067,6 +4778,8 @@
 octave
 @item s
 slope
+@item k
+kHz
 @end table
 
 @item width, w
@@ -4076,6 +4789,27 @@
 Specify which channels to filter, by default all available are filtered.
 @end table
 
+@subsection Commands
+
+This filter supports the following commands:
+@table @option
+@item frequency, f
+Change treble frequency.
+Syntax for the command is : "@var{frequency}"
+
+@item width_type, t
+Change treble width_type.
+Syntax for the command is : "@var{width_type}"
+
+@item width, w
+Change treble width.
+Syntax for the command is : "@var{width}"
+
+@item gain, g
+Change treble gain.
+Syntax for the command is : "@var{gain}"
+@end table
+
 @section tremolo
 
 Sinusoidal amplitude modulation.
@@ -4512,7 +5246,7 @@
 To enable compilation of this filter you need to configure FFmpeg with
 @code{--enable-libflite}.
 
-Note that the flite library is not thread-safe.
+Note that versions of the flite library prior to 2.0 are not thread-safe.
 
 The filter accepts the following options:
 
@@ -4567,7 +5301,7 @@
 @end itemize
 
 For more information about libflite, check:
-@url{http://www.speech.cs.cmu.edu/flite/}
+@url{http://www.festvox.org/flite/}
 
 @section anoisesrc
 
@@ -4609,6 +5343,33 @@
 @end example
 @end itemize
 
+@section hilbert
+
+Generate odd-tap Hilbert transform FIR coefficients.
+
+The resulting stream can be used with @ref{afir} filter for phase-shifting
+the signal by 90 degrees.
+
+This is used in many matrix coding schemes and for analytic signal generation.
+The process is often written as a multiplication by i (or j), the imaginary unit.
+
+The filter accepts the following options:
+
+@table @option
+
+@item sample_rate, s
+Set sample rate, default is 44100.
+
+@item taps, t
+Set length of FIR filter, default is 22051.
+
+@item nb_samples, n
+Set number of samples per each frame.
+
+@item win_func, w
+Set window function to be used when generating FIR coefficients.
+@end table
+
 @section sine
 
 Generate an audio signal made of a sine wave with amplitude 1/8.
@@ -4742,6 +5503,38 @@
 pipeline drops frames. If you're trying to apply an image as an
 overlay to a video stream, consider the @var{overlay} filter instead.
 
+@section amplify
+
+Amplify differences between current pixel and pixels of adjacent frames in
+same pixel location.
+
+This filter accepts the following options:
+
+@table @option
+@item radius
+Set frame radius. Default is 2. Allowed range is from 1 to 63.
+For example radius of 3 will instruct filter to calculate average of 7 frames.
+
+@item factor
+Set factor to amplify difference. Default is 2. Allowed range is from 0 to 65535.
+
+@item threshold
+Set threshold for difference amplification. Any differrence greater or equal to
+this value will not alter source pixel. Default is 10.
+Allowed range is from 0 to 65535.
+
+@item low
+Set lower limit for changing source pixel. Default is 65535. Allowed range is from 0 to 65535.
+This option controls maximum possible value that will decrease source pixel value.
+
+@item high
+Set high limit for changing source pixel. Default is 65535. Allowed range is from 0 to 65535.
+This option controls maximum possible value that will increase source pixel value.
+
+@item planes
+Set which planes to filter. Default is all. Allowed range is from 0 to 15.
+@end table
+
 @section ass
 
 Same as the @ref{subtitles} filter, except that it doesn't require libavcodec
@@ -4802,7 +5595,7 @@
 threshold B is designed to react on continuous changes in the input signal.
 
 @item s
-Set number of frames filter will use for averaging. Default is 33. Must be odd
+Set number of frames filter will use for averaging. Default is 9. Must be odd
 number in range [5, 129].
 
 @item p
@@ -4817,13 +5610,13 @@
 
 @table @option
 @item sizeX
-Set horizontal kernel size.
+Set horizontal radius size.
 
 @item planes
 Set which planes to filter. By default all planes are filtered.
 
 @item sizeY
-Set vertical kernel size, if zero it will be same as @code{sizeX}.
+Set vertical radius size, if zero it will be same as @code{sizeX}.
 Default is @code{0}.
 @end table
 
@@ -5031,10 +5824,10 @@
 
 @item SW
 @item SH
-Width and height scale depending on the currently filtered plane. It is the
-ratio between the corresponding luma plane number of pixels and the current
-plane ones. E.g. for YUV4:2:0 the values are @code{1,1} for the luma plane, and
-@code{0.5,0.5} for chroma planes.
+Width and height scale for the plane being filtered. It is the
+ratio between the dimensions of the current plane to the luma plane,
+e.g. for a @code{yuv420p} frame, the values are @code{1,1} for
+the luma plane and @code{0.5,0.5} for the chroma planes.
 
 @item T
 Time of the current frame, expressed in seconds.
@@ -5101,6 +5894,91 @@
 @end example
 @end itemize
 
+@section bm3d
+
+Denoise frames using Block-Matching 3D algorithm.
+
+The filter accepts the following options.
+
+@table @option
+@item sigma
+Set denoising strength. Default value is 1.
+Allowed range is from 0 to 999.9.
+The denoising algorith is very sensitive to sigma, so adjust it
+according to the source.
+
+@item block
+Set local patch size. This sets dimensions in 2D.
+
+@item bstep
+Set sliding step for processing blocks. Default value is 4.
+Allowed range is from 1 to 64.
+Smaller values allows processing more reference blocks and is slower.
+
+@item group
+Set maximal number of similar blocks for 3rd dimension. Default value is 1.
+When set to 1, no block matching is done. Larger values allows more blocks
+in single group.
+Allowed range is from 1 to 256.
+
+@item range
+Set radius for search block matching. Default is 9.
+Allowed range is from 1 to INT32_MAX.
+
+@item mstep
+Set step between two search locations for block matching. Default is 1.
+Allowed range is from 1 to 64. Smaller is slower.
+
+@item thmse
+Set threshold of mean square error for block matching. Valid range is 0 to
+INT32_MAX.
+
+@item hdthr
+Set thresholding parameter for hard thresholding in 3D transformed domain.
+Larger values results in stronger hard-thresholding filtering in frequency
+domain.
+
+@item estim
+Set filtering estimation mode. Can be @code{basic} or @code{final}.
+Default is @code{basic}.
+
+@item ref
+If enabled, filter will use 2nd stream for block matching.
+Default is disabled for @code{basic} value of @var{estim} option,
+and always enabled if value of @var{estim} is @code{final}.
+
+@item planes
+Set planes to filter. Default is all available except alpha.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Basic filtering with bm3d:
+@example
+bm3d=sigma=3:block=4:bstep=2:group=1:estim=basic
+@end example
+
+@item
+Same as above, but filtering only luma:
+@example
+bm3d=sigma=3:block=4:bstep=2:group=1:estim=basic:planes=1
+@end example
+
+@item
+Same as above, but with both estimation modes:
+@example
+split[a][b],[a]bm3d=sigma=3:block=4:bstep=2:group=1:estim=basic[a],[b][a]bm3d=sigma=3:block=4:bstep=2:group=16:estim=final:ref=1
+@end example
+
+@item
+Same as above, but prefilter with @ref{nlmeans} filter instead:
+@example
+split[a][b],[a]nlmeans=s=3:r=7:p=3[a],[b][a]bm3d=sigma=3:block=4:bstep=2:group=16:estim=final:ref=1
+@end example
+@end itemize
+
 @section boxblur
 
 Apply a boxblur algorithm to the input video.
@@ -5939,7 +6817,7 @@
 
 @section convolution
 
-Apply convolution 3x3 or 5x5 filter.
+Apply convolution of 3x3, 5x5, 7x7 or horizontal/vertical up to 49 elements.
 
 The filter accepts the following options:
 
@@ -5949,13 +6827,15 @@
 @item 2m
 @item 3m
 Set matrix for each plane.
-Matrix is sequence of 9 or 25 signed integers.
+Matrix is sequence of 9, 25 or 49 signed integers in @var{square} mode,
+and from 1 to 49 odd number of signed integers in @var{row} mode.
 
 @item 0rdiv
 @item 1rdiv
 @item 2rdiv
 @item 3rdiv
 Set multiplier for calculated value for each plane.
+If unset or 0, it will be sum of all matrix elements.
 
 @item 0bias
 @item 1bias
@@ -5963,6 +6843,13 @@
 @item 3bias
 Set bias for each plane. This value is added to the result of the multiplication.
 Useful for making the overall image brighter or darker. Default is 0.0.
+
+@item 0mode
+@item 1mode
+@item 2mode
+@item 3mode
+Set matrix mode for each plane. Can be @var{square}, @var{row} or @var{column}.
+Default is @var{square}.
 @end table
 
 @subsection Examples
@@ -6364,6 +7251,37 @@
 playback.
 @end table
 
+@anchor{cue}
+@section cue
+
+Delay video filtering until a given wallclock timestamp. The filter first
+passes on @option{preroll} amount of frames, then it buffers at most
+@option{buffer} amount of frames and waits for the cue. After reaching the cue
+it forwards the buffered frames and also any subsequent frames coming in its
+input.
+
+The filter can be used synchronize the output of multiple ffmpeg processes for
+realtime output devices like decklink. By putting the delay in the filtering
+chain and pre-buffering frames the process can pass on data to output almost
+immediately after the target wallclock timestamp is reached.
+
+Perfect frame accuracy cannot be guaranteed, but the result is good enough for
+some use cases.
+
+@table @option
+
+@item cue
+The cue timestamp expressed in a UNIX timestamp in microseconds. Default is 0.
+
+@item preroll
+The duration of content to pass on as preroll expressed in seconds. Default is 0.
+
+@item buffer
+The maximum duration of content to buffer before waiting for the cue expressed
+in seconds. Default is 0.
+
+@end table
+
 @anchor{curves}
 @section curves
 
@@ -6633,6 +7551,65 @@
 The default is disabled.
 @end table
 
+@section deblock
+
+Remove blocking artifacts from input video.
+
+The filter accepts the following options:
+
+@table @option
+@item filter
+Set filter type, can be @var{weak} or @var{strong}. Default is @var{strong}.
+This controls what kind of deblocking is applied.
+
+@item block
+Set size of block, allowed range is from 4 to 512. Default is @var{8}.
+
+@item alpha
+@item beta
+@item gamma
+@item delta
+Set blocking detection thresholds. Allowed range is 0 to 1.
+Defaults are: @var{0.098} for @var{alpha} and @var{0.05} for the rest.
+Using higher threshold gives more deblocking strength.
+Setting @var{alpha} controls threshold detection at exact edge of block.
+Remaining options controls threshold detection near the edge. Each one for
+below/above or left/right. Setting any of those to @var{0} disables
+deblocking.
+
+@item planes
+Set planes to filter. Default is to filter all available planes.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Deblock using weak filter and block size of 4 pixels.
+@example
+deblock=filter=weak:block=4
+@end example
+
+@item
+Deblock using strong filter, block size of 4 pixels and custom thresholds for
+deblocking more edges.
+@example
+deblock=filter=strong:block=4:alpha=0.12:beta=0.07:gamma=0.06:delta=0.05
+@end example
+
+@item
+Similar as above, but filter only first plane.
+@example
+deblock=filter=strong:block=4:alpha=0.12:beta=0.07:gamma=0.06:delta=0.05:planes=1
+@end example
+
+@item
+Similar as above, but filter only second and third plane.
+@example
+deblock=filter=strong:block=4:alpha=0.12:beta=0.07:gamma=0.06:delta=0.05:planes=6
+@end example
+@end itemize
+
 @anchor{decimate}
 @section decimate
 
@@ -6673,6 +7650,29 @@
 @code{1}.
 @end table
 
+@section deconvolve
+
+Apply 2D deconvolution of video stream in frequency domain using second stream
+as impulse.
+
+The filter accepts the following options:
+
+@table @option
+@item planes
+Set which planes to process.
+
+@item impulse
+Set which impulse video frames will be processed, can be @var{first}
+or @var{all}. Default is @var{all}.
+
+@item noise
+Set noise when doing divisions. Default is @var{0.0000001}. Useful when width
+and height are not same and not power of 2 or if stream prior to convolving
+had noise.
+@end table
+
+The @code{deconvolve} filter also supports the @ref{framesync} options.
+
 @section deflate
 
 Apply deflate effect to the video.
@@ -6887,10 +7887,6 @@
 If set then a detailed log of the motion search is written to the
 specified file.
 
-@item opencl
-If set to 1, specify using OpenCL capabilities, only available if
-FFmpeg was configured with @code{--enable-opencl}. Default value is 0.
-
 @end table
 
 @section despill
@@ -7055,14 +8051,20 @@
 
 @item color, c
 Specify the color of the box to write. For the general syntax of this option,
-check the "Color" section in the ffmpeg-utils manual. If the special
+check the @ref{color syntax,,"Color" section in the ffmpeg-utils manual,ffmpeg-utils}. If the special
 value @code{invert} is used, the box edge color is the same as the
 video with inverted luma.
 
 @item thickness, t
-The expression which sets the thickness of the box edge. Default value is @code{3}.
+The expression which sets the thickness of the box edge.
+A value of @code{fill} will create a filled box. Default value is @code{3}.
 
 See below for the list of accepted constants.
+
+@item replace
+Applicable if the input has alpha. With value @code{1}, the pixels of the painted box
+will overwrite the video's color and alpha pixels.
+Default is @code{0}, which composites the box onto the input, leaving the video's alpha intact.
 @end table
 
 The parameters for @var{x}, @var{y}, @var{w} and @var{h} and @var{t} are expressions containing the
@@ -7123,7 +8125,7 @@
 @item
 Fill the box with pink color:
 @example
-drawbox=x=10:y=10:w=100:h=100:color=pink@@0.5:t=max
+drawbox=x=10:y=10:w=100:h=100:color=pink@@0.5:t=fill
 @end example
 
 @item
@@ -7152,7 +8154,7 @@
 
 @item color, c
 Specify the color of the grid. For the general syntax of this option,
-check the "Color" section in the ffmpeg-utils manual. If the special
+check the @ref{color syntax,,"Color" section in the ffmpeg-utils manual,ffmpeg-utils}. If the special
 value @code{invert} is used, the grid color is the same as the
 video with inverted luma.
 
@@ -7160,6 +8162,11 @@
 The expression which sets the thickness of the grid line. Default value is @code{1}.
 
 See below for the list of accepted constants.
+
+@item replace
+Applicable if the input has alpha. With @code{1} the pixels of the painted grid
+will overwrite the video's color and alpha pixels.
+Default is @code{0}, which composites the grid onto the input, leaving the video's alpha intact.
 @end table
 
 The parameters for @var{x}, @var{y}, @var{w} and @var{h} and @var{t} are expressions containing the
@@ -7243,7 +8250,7 @@
 
 @item boxcolor
 The color to be used for drawing box around text. For the syntax of this
-option, check the "Color" section in the ffmpeg-utils manual.
+option, check the @ref{color syntax,,"Color" section in the ffmpeg-utils manual,ffmpeg-utils}.
 
 The default value of @var{boxcolor} is "white".
 
@@ -7257,7 +8264,7 @@
 
 @item bordercolor
 Set the color to be used for drawing border around text. For the syntax of this
-option, check the "Color" section in the ffmpeg-utils manual.
+option, check the @ref{color syntax,,"Color" section in the ffmpeg-utils manual,ffmpeg-utils}.
 
 The default value of @var{bordercolor} is "black".
 
@@ -7278,7 +8285,7 @@
 
 @item fontcolor
 The color to be used for drawing fonts. For the syntax of this option, check
-the "Color" section in the ffmpeg-utils manual.
+the @ref{color syntax,,"Color" section in the ffmpeg-utils manual,ffmpeg-utils}.
 
 The default value of @var{fontcolor} is "black".
 
@@ -7341,7 +8348,8 @@
 
 @item shadowcolor
 The color to be used for drawing a shadow behind the drawn text. For the
-syntax of this option, check the "Color" section in the ffmpeg-utils manual.
+syntax of this option, check the @ref{color syntax,,"Color" section in the
+ffmpeg-utils manual,ffmpeg-utils}.
 
 The default value of @var{shadowcolor} is "black".
 
@@ -7365,7 +8373,9 @@
 option must be specified.
 
 @item timecode_rate, rate, r
-Set the timecode frame rate (timecode only).
+Set the timecode frame rate (timecode only). Value will be rounded to nearest
+integer. Minimum value is "1".
+Drop-frame timecode is supported for frame rates 30 & 60.
 
 @item tc24hmax
 If set to 1, the output of the timecode option will wrap around at 24 hours.
@@ -7556,6 +8566,10 @@
 
 The second argument is an offset added to the timestamp.
 
+If the format is set to @code{hms}, a third argument @code{24HH} may be
+supplied to present the hour part of the formatted timestamp in 24h format
+(00-23).
+
 If the format is set to @code{localtime} or @code{gmtime},
 a third argument may be supplied: a strftime() format string.
 By default, @var{YYYY-MM-DD HH:MM:SS} format will be used.
@@ -7698,9 +8712,14 @@
 
 @item colormix
 Mix the colors to create a paint/cartoon effect.
-@end table
 
+@item canny
+Apply Canny edge detector on all selected planes.
+@end table
 Default value is @var{wires}.
+
+@item planes
+Select planes for filtering. By default all available planes are filtered.
 @end table
 
 @subsection Examples
@@ -7924,6 +8943,20 @@
 length greater than 256.
 @end table
 
+@section entropy
+
+Measure graylevel entropy in histogram of color channels of video frames.
+
+It accepts the following parameters:
+
+@table @option
+@item mode
+Can be either @var{normal} or @var{diff}. Default is @var{normal}.
+
+@var{diff} mode measures entropy of histogram delta values, absolute differences
+between neighbour histogram values.
+@end table
+
 @section fade
 
 Apply a fade-in/out effect to the input video.
@@ -8099,6 +9132,40 @@
 
 @end itemize
 
+@section fftdnoiz
+Denoise frames using 3D FFT (frequency domain filtering).
+
+The filter accepts the following options:
+
+@table @option
+@item sigma
+Set the noise sigma constant. This sets denoising strength.
+Default value is 1. Allowed range is from 0 to 30.
+Using very high sigma with low overlap may give blocking artifacts.
+
+@item amount
+Set amount of denoising. By default all detected noise is reduced.
+Default value is 1. Allowed range is from 0 to 1.
+
+@item block
+Set size of block, Default is 4, can be 3, 4, 5 or 6.
+Actual size of block in pixels is 2 to power of @var{block}, so by default
+block size in pixels is 2^4 which is 16.
+
+@item overlap
+Set block overlap. Default is 0.5. Allowed range is from 0.2 to 0.8.
+
+@item prev
+Set number of previous frames to use for denoising. By default is set to 0.
+
+@item next
+Set number of next frames to to use for denoising. By default is set to 0.
+
+@item planes
+Set planes which will be filtered, by default are all available filtered
+except alpha.
+@end table
+
 @section field
 
 Extract a single field from an interlaced image using stride
@@ -8525,6 +9592,48 @@
 
 It does not take parameters.
 
+@section fillborders
+
+Fill borders of the input video, without changing video stream dimensions.
+Sometimes video can have garbage at the four edges and you may not want to
+crop video input to keep size multiple of some number.
+
+This filter accepts the following options:
+
+@table @option
+@item left
+Number of pixels to fill from left border.
+
+@item right
+Number of pixels to fill from right border.
+
+@item top
+Number of pixels to fill from top border.
+
+@item bottom
+Number of pixels to fill from bottom border.
+
+@item mode
+Set fill mode.
+
+It accepts the following values:
+@table @samp
+@item smear
+fill pixels using outermost pixels
+
+@item mirror
+fill pixels using mirroring
+
+@item fixed
+fill pixels with constant value
+@end table
+
+Default is @var{smear}.
+
+@item color
+Set color for pixels in fixed mode. Default is @var{black}.
+@end table
+
 @section find_rect
 
 Find a rectangular object
@@ -8806,7 +9915,7 @@
 0 and 100 to indicate a new scene; a low value reflects a low
 probability for the current frame to introduce a new scene, while a higher
 value means the current frame is more likely to be one.
-The default is @code{7}.
+The default is @code{8.2}.
 
 @item flags
 Specify flags influencing the filter process.
@@ -8859,8 +9968,9 @@
 A frei0r effect parameter can be a boolean (its value is either
 "y" or "n"), a double, a color (specified as
 @var{R}/@var{G}/@var{B}, where @var{R}, @var{G}, and @var{B} are floating point
-numbers between 0.0 and 1.0, inclusive) or by a color description specified in the "Color"
-section in the ffmpeg-utils manual), a position (specified as @var{X}/@var{Y}, where
+numbers between 0.0 and 1.0, inclusive) or a color description as specified in the
+@ref{color syntax,,"Color" section in the ffmpeg-utils manual,ffmpeg-utils},
+a position (specified as @var{X}/@var{Y}, where
 @var{X} and @var{Y} are floating point numbers) and/or a string.
 
 The number and types of parameters depend on the loaded effect. If an
@@ -9125,6 +10235,47 @@
 
 @end itemize
 
+@section greyedge
+A color constancy variation filter which estimates scene illumination via grey edge algorithm
+and corrects the scene colors accordingly.
+
+See: @url{https://staff.science.uva.nl/th.gevers/pub/GeversTIP07.pdf}
+
+The filter accepts the following options:
+
+@table @option
+@item difford
+The order of differentiation to be applied on the scene. Must be chosen in the range
+[0,2] and default value is 1.
+
+@item minknorm
+The Minkowski parameter to be used for calculating the Minkowski distance. Must
+be chosen in the range [0,20] and default value is 1. Set to 0 for getting
+max value instead of calculating Minkowski distance.
+
+@item sigma
+The standard deviation of Gaussian blur to be applied on the scene. Must be
+chosen in the range [0,1024.0] and default value = 1. floor( @var{sigma} * break_off_sigma(3) )
+can't be euqal to 0 if @var{difford} is greater than 0.
+@end table
+
+@subsection Examples
+@itemize
+
+@item
+Grey Edge:
+@example
+greyedge=difford=1:minknorm=5:sigma=2
+@end example
+
+@item
+Max Edge:
+@example
+greyedge=difford=1:minknorm=0:sigma=2
+@end example
+
+@end itemize
+
 @anchor{haldclut}
 @section haldclut
 
@@ -9865,15 +11016,17 @@
 @item cx
 Relative x-coordinate of the focal point of the image, and thereby the center of the
 distortion. This value has a range [0,1] and is expressed as fractions of the image
-width.
+width. Default is 0.5.
 @item cy
 Relative y-coordinate of the focal point of the image, and thereby the center of the
 distortion. This value has a range [0,1] and is expressed as fractions of the image
-height.
+height. Default is 0.5.
 @item k1
-Coefficient of the quadratic correction term. 0.5 means no correction.
+Coefficient of the quadratic correction term. This value has a range [-1,1]. 0 means
+no correction. Default is 0.
 @item k2
-Coefficient of the double quadratic correction term. 0.5 means no correction.
+Coefficient of the double quadratic correction term. This value has a range [-1,1].
+0 means no correction. Default is 0.
 @end table
 
 The formula that generates the correction is:
@@ -9883,27 +11036,136 @@
 where @var{r_0} is halve of the image diagonal and @var{r_src} and @var{r_tgt} are the
 distances from the focal point in the source and target images, respectively.
 
+@section lensfun
+
+Apply lens correction via the lensfun library (@url{http://lensfun.sourceforge.net/}).
+
+The @code{lensfun} filter requires the camera make, camera model, and lens model
+to apply the lens correction. The filter will load the lensfun database and
+query it to find the corresponding camera and lens entries in the database. As
+long as these entries can be found with the given options, the filter can
+perform corrections on frames. Note that incomplete strings will result in the
+filter choosing the best match with the given options, and the filter will
+output the chosen camera and lens models (logged with level "info"). You must
+provide the make, camera model, and lens model as they are required.
+
+The filter accepts the following options:
+
+@table @option
+@item make
+The make of the camera (for example, "Canon"). This option is required.
+
+@item model
+The model of the camera (for example, "Canon EOS 100D"). This option is
+required.
+
+@item lens_model
+The model of the lens (for example, "Canon EF-S 18-55mm f/3.5-5.6 IS STM"). This
+option is required.
+
+@item mode
+The type of correction to apply. The following values are valid options:
+
+@table @samp
+@item vignetting
+Enables fixing lens vignetting.
+
+@item geometry
+Enables fixing lens geometry. This is the default.
+
+@item subpixel
+Enables fixing chromatic aberrations.
+
+@item vig_geo
+Enables fixing lens vignetting and lens geometry.
+
+@item vig_subpixel
+Enables fixing lens vignetting and chromatic aberrations.
+
+@item distortion
+Enables fixing both lens geometry and chromatic aberrations.
+
+@item all
+Enables all possible corrections.
+
+@end table
+@item focal_length
+The focal length of the image/video (zoom; expected constant for video). For
+example, a 18--55mm lens has focal length range of [18--55], so a value in that
+range should be chosen when using that lens. Default 18.
+
+@item aperture
+The aperture of the image/video (expected constant for video). Note that
+aperture is only used for vignetting correction. Default 3.5.
+
+@item focus_distance
+The focus distance of the image/video (expected constant for video). Note that
+focus distance is only used for vignetting and only slightly affects the
+vignetting correction process. If unknown, leave it at the default value (which
+is 1000).
+
+@item target_geometry
+The target geometry of the output image/video. The following values are valid
+options:
+
+@table @samp
+@item rectilinear (default)
+@item fisheye
+@item panoramic
+@item equirectangular
+@item fisheye_orthographic
+@item fisheye_stereographic
+@item fisheye_equisolid
+@item fisheye_thoby
+@end table
+@item reverse
+Apply the reverse of image correction (instead of correcting distortion, apply
+it).
+
+@item interpolation
+The type of interpolation used when correcting distortion. The following values
+are valid options:
+
+@table @samp
+@item nearest
+@item linear (default)
+@item lanczos
+@end table
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Apply lens correction with make "Canon", camera model "Canon EOS 100D", and lens
+model "Canon EF-S 18-55mm f/3.5-5.6 IS STM" with focal length of "18" and
+aperture of "8.0".
+
+@example
+ffmpeg -i input.mov -vf lensfun=make=Canon:model="Canon EOS 100D":lens_model="Canon EF-S 18-55mm f/3.5-5.6 IS STM":focal_length=18:aperture=8 -c:v h264 -b:v 8000k output.mov
+@end example
+
+@item
+Apply the same as before, but only for the first 5 seconds of video.
+
+@example
+ffmpeg -i input.mov -vf lensfun=make=Canon:model="Canon EOS 100D":lens_model="Canon EF-S 18-55mm f/3.5-5.6 IS STM":focal_length=18:aperture=8:enable='lte(t\,5)' -c:v h264 -b:v 8000k output.mov
+@end example
+
+@end itemize
+
 @section libvmaf
 
-Obtain the average VMAF (Video Multi-Method Assessment Fusion)
+Obtain the VMAF (Video Multi-Method Assessment Fusion)
 score between two input videos.
 
-This filter takes two input videos.
-
-Both video inputs must have the same resolution and pixel format for
-this filter to work correctly. Also it assumes that both inputs
-have the same number of frames, which are compared one by one.
-
-The obtained average VMAF score is printed through the logging system.
+The obtained VMAF score is printed through the logging system.
 
 It requires Netflix's vmaf library (libvmaf) as a pre-requisite.
 After installing the library it can be enabled using:
-@code{./configure --enable-libvmaf}.
+@code{./configure --enable-libvmaf --enable-version3}.
 If no model path is specified it uses the default model: @code{vmaf_v0.6.1.pkl}.
 
-On the below examples the input file @file{main.mpg} being processed is
-compared with the reference file @file{ref.mpg}.
-
 The filter has following options:
 
 @table @option
@@ -9934,12 +11196,23 @@
 Enables computing ms_ssim along with vmaf.
 
 @item pool
-Set the pool method to be used for computing vmaf.
+Set the pool method (mean, min or harmonic mean) to be used for computing vmaf.
+
+@item n_threads
+Set number of threads to be used when computing vmaf.
+
+@item n_subsample
+Set interval for frame subsampling used when computing vmaf.
+
+@item enable_conf_interval
+Enables confidence interval.
 @end table
 
 This filter also supports the @ref{framesync} options.
 
-For example:
+On the below examples the input file @file{main.mpg} being processed is
+compared with the reference file @file{ref.mpg}.
+
 @example
 ffmpeg -i main.mpg -i ref.mpg -lavfi libvmaf -f null -
 @end example
@@ -9974,13 +11247,45 @@
 
 @table @option
 @item loop
-Set the number of loops.
+Set the number of loops. Setting this value to -1 will result in infinite loops.
+Default is 0.
 
 @item size
-Set maximal size in number of frames.
+Set maximal size in number of frames. Default is 0.
 
 @item start
-Set first frame of loop.
+Set first frame of loop. Default is 0.
+@end table
+
+@section lut1d
+
+Apply a 1D LUT to an input video.
+
+The filter accepts the following options:
+
+@table @option
+@item file
+Set the 1D LUT file name.
+
+Currently supported formats:
+@table @samp
+@item cube
+Iridas
+@end table
+
+@item interp
+Select interpolation mode.
+
+Available values are:
+
+@table @samp
+@item nearest
+Use values from the nearest defined point.
+@item linear
+Interpolate values using the linear interpolation.
+@item cubic
+Interpolate values using the cubic interpolation.
+@end table
 @end table
 
 @anchor{lut3d}
@@ -10550,6 +11855,41 @@
 Scene change detection threshold. Default is @code{5.0}.
 @end table
 
+@section mix
+
+Mix several video input streams into one video stream.
+
+A description of the accepted options follows.
+
+@table @option
+@item nb_inputs
+The number of inputs. If unspecified, it defaults to 2.
+
+@item weights
+Specify weight of each input video stream as sequence.
+Each weight is separated by space. If number of weights
+is smaller than number of @var{frames} last specified
+weight will be used for all remaining unset weights.
+
+@item scale
+Specify scale, if it is set it will be multiplied with sum
+of each weight multiplied with pixel values to give final destination
+pixel value. By default @var{scale} is auto scaled to sum of weights.
+
+@item duration
+Specify how end of stream is determined.
+@table @samp
+@item longest
+The duration of the longest input. (default)
+
+@item shortest
+The duration of the shortest input.
+
+@item first
+The duration of the first input.
+@end table
+@end table
+
 @section mpdecimate
 
 Drop frames that do not differ greatly from the previous frame in
@@ -10591,11 +11931,17 @@
 
 @section negate
 
-Negate input video.
+Negate (invert) the input video.
 
-It accepts an integer in input; if non-zero it negates the
-alpha component (if available). The default value in input is 0.
+It accepts the following option:
 
+@table @option
+
+@item negate_alpha
+With value 1, it negates the alpha component, if present. Default value is 0.
+@end table
+
+@anchor{nlmeans}
 @section nlmeans
 
 Denoise frames using Non-Local Means algorithm.
@@ -10820,6 +12166,86 @@
 noise=alls=20:allf=t+u
 @end example
 
+@section normalize
+
+Normalize RGB video (aka histogram stretching, contrast stretching).
+See: https://en.wikipedia.org/wiki/Normalization_(image_processing)
+
+For each channel of each frame, the filter computes the input range and maps
+it linearly to the user-specified output range. The output range defaults
+to the full dynamic range from pure black to pure white.
+
+Temporal smoothing can be used on the input range to reduce flickering (rapid
+changes in brightness) caused when small dark or bright objects enter or leave
+the scene. This is similar to the auto-exposure (automatic gain control) on a
+video camera, and, like a video camera, it may cause a period of over- or
+under-exposure of the video.
+
+The R,G,B channels can be normalized independently, which may cause some
+color shifting, or linked together as a single channel, which prevents
+color shifting. Linked normalization preserves hue. Independent normalization
+does not, so it can be used to remove some color casts. Independent and linked
+normalization can be combined in any ratio.
+
+The normalize filter accepts the following options:
+
+@table @option
+@item blackpt
+@item whitept
+Colors which define the output range. The minimum input value is mapped to
+the @var{blackpt}. The maximum input value is mapped to the @var{whitept}.
+The defaults are black and white respectively. Specifying white for
+@var{blackpt} and black for @var{whitept} will give color-inverted,
+normalized video. Shades of grey can be used to reduce the dynamic range
+(contrast). Specifying saturated colors here can create some interesting
+effects.
+
+@item smoothing
+The number of previous frames to use for temporal smoothing. The input range
+of each channel is smoothed using a rolling average over the current frame
+and the @var{smoothing} previous frames. The default is 0 (no temporal
+smoothing).
+
+@item independence
+Controls the ratio of independent (color shifting) channel normalization to
+linked (color preserving) normalization. 0.0 is fully linked, 1.0 is fully
+independent. Defaults to 1.0 (fully independent).
+
+@item strength
+Overall strength of the filter. 1.0 is full strength. 0.0 is a rather
+expensive no-op. Defaults to 1.0 (full strength).
+
+@end table
+
+@subsection Examples
+
+Stretch video contrast to use the full dynamic range, with no temporal
+smoothing; may flicker depending on the source content:
+@example
+normalize=blackpt=black:whitept=white:smoothing=0
+@end example
+
+As above, but with 50 frames of temporal smoothing; flicker should be
+reduced, depending on the source content:
+@example
+normalize=blackpt=black:whitept=white:smoothing=50
+@end example
+
+As above, but with hue-preserving linked channel normalization:
+@example
+normalize=blackpt=black:whitept=white:smoothing=50:independence=0
+@end example
+
+As above, but with half strength:
+@example
+normalize=blackpt=black:whitept=white:smoothing=50:independence=0:strength=0.5
+@end example
+
+Map the darkest input color to red, the brightest input color to cyan:
+@example
+normalize=blackpt=red:whitept=cyan
+@end example
+
 @section null
 
 Pass the video source unchanged to the output.
@@ -10827,7 +12253,9 @@
 @section ocr
 Optical Character Recognition
 
-This filter uses Tesseract for optical character recognition.
+This filter uses Tesseract for optical character recognition. To enable
+compilation of this filter, you need to configure FFmpeg with
+@code{--enable-libtesseract}.
 
 It accepts the following options:
 
@@ -11099,6 +12527,10 @@
 
 @item repeatlast
 See @ref{framesync}.
+
+@item alpha
+Set format of alpha of the overlaid video, it can be @var{straight} or
+@var{premultiplied}. Default is @var{straight}.
 @end table
 
 The @option{x}, and @option{y} expressions can contain the following
@@ -11312,7 +12744,8 @@
 
 @item color
 Specify the color of the padded area. For the syntax of this option,
-check the "Color" section in the ffmpeg-utils manual.
+check the @ref{color syntax,,"Color" section in the ffmpeg-utils
+manual,ffmpeg-utils}.
 
 The default value of @var{color} is "black".
 
@@ -11458,6 +12891,9 @@
 to disable this option for a standalone image.
 Set by default.
 
+@item transparency_color
+Set the color that will be used as background for transparency.
+
 @item stats_mode
 Set statistics mode.
 
@@ -11545,6 +12981,13 @@
 
 @item new
 Take new palette for each output frame.
+
+@item alpha_threshold
+Sets the alpha threshold for transparency. Alpha values above this threshold
+will be treated as completely opaque, and values below this threshold will be
+treated as completely transparent.
+
+The option must be an integer value in the range [0,255]. Default is @var{128}.
 @end table
 
 @subsection Examples
@@ -11974,6 +13417,136 @@
 Set value which will be added to filtered result.
 @end table
 
+@anchor{program_opencl}
+@section program_opencl
+
+Filter video using an OpenCL program.
+
+@table @option
+
+@item source
+OpenCL program source file.
+
+@item kernel
+Kernel name in program.
+
+@item inputs
+Number of inputs to the filter.  Defaults to 1.
+
+@item size, s
+Size of output frames.  Defaults to the same as the first input.
+
+@end table
+
+The program source file must contain a kernel function with the given name,
+which will be run once for each plane of the output.  Each run on a plane
+gets enqueued as a separate 2D global NDRange with one work-item for each
+pixel to be generated.  The global ID offset for each work-item is therefore
+the coordinates of a pixel in the destination image.
+
+The kernel function needs to take the following arguments:
+@itemize
+@item
+Destination image, @var{__write_only image2d_t}.
+
+This image will become the output; the kernel should write all of it.
+@item
+Frame index, @var{unsigned int}.
+
+This is a counter starting from zero and increasing by one for each frame.
+@item
+Source images, @var{__read_only image2d_t}.
+
+These are the most recent images on each input.  The kernel may read from
+them to generate the output, but they can't be written to.
+@end itemize
+
+Example programs:
+
+@itemize
+@item
+Copy the input to the output (output must be the same size as the input).
+@verbatim
+__kernel void copy(__write_only image2d_t destination,
+                   unsigned int index,
+                   __read_only  image2d_t source)
+{
+    const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE;
+
+    int2 location = (int2)(get_global_id(0), get_global_id(1));
+
+    float4 value = read_imagef(source, sampler, location);
+
+    write_imagef(destination, location, value);
+}
+@end verbatim
+
+@item
+Apply a simple transformation, rotating the input by an amount increasing
+with the index counter.  Pixel values are linearly interpolated by the
+sampler, and the output need not have the same dimensions as the input.
+@verbatim
+__kernel void rotate_image(__write_only image2d_t dst,
+                           unsigned int index,
+                           __read_only  image2d_t src)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_FILTER_LINEAR);
+
+    float angle = (float)index / 100.0f;
+
+    float2 dst_dim = convert_float2(get_image_dim(dst));
+    float2 src_dim = convert_float2(get_image_dim(src));
+
+    float2 dst_cen = dst_dim / 2.0f;
+    float2 src_cen = src_dim / 2.0f;
+
+    int2   dst_loc = (int2)(get_global_id(0), get_global_id(1));
+
+    float2 dst_pos = convert_float2(dst_loc) - dst_cen;
+    float2 src_pos = {
+        cos(angle) * dst_pos.x - sin(angle) * dst_pos.y,
+        sin(angle) * dst_pos.x + cos(angle) * dst_pos.y
+    };
+    src_pos = src_pos * src_dim / dst_dim;
+
+    float2 src_loc = src_pos + src_cen;
+
+    if (src_loc.x < 0.0f      || src_loc.y < 0.0f ||
+        src_loc.x > src_dim.x || src_loc.y > src_dim.y)
+        write_imagef(dst, dst_loc, 0.5f);
+    else
+        write_imagef(dst, dst_loc, read_imagef(src, sampler, src_loc));
+}
+@end verbatim
+
+@item
+Blend two inputs together, with the amount of each input used varying
+with the index counter.
+@verbatim
+__kernel void blend_images(__write_only image2d_t dst,
+                           unsigned int index,
+                           __read_only  image2d_t src1,
+                           __read_only  image2d_t src2)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_FILTER_LINEAR);
+
+    float blend = (cos((float)index / 50.0f) + 1.0f) / 2.0f;
+
+    int2  dst_loc = (int2)(get_global_id(0), get_global_id(1));
+    int2 src1_loc = dst_loc * get_image_dim(src1) / get_image_dim(dst);
+    int2 src2_loc = dst_loc * get_image_dim(src2) / get_image_dim(dst);
+
+    float4 val1 = read_imagef(src1, sampler, src1_loc);
+    float4 val2 = read_imagef(src2, sampler, src2_loc);
+
+    write_imagef(dst, dst_loc, val1 * blend + val2 * (1.0f - blend));
+}
+@end verbatim
+
+@end itemize
+
 @section pseudocolor
 
 Alter frame colors in video with pseudocolors.
@@ -12105,7 +13678,7 @@
 Mean Square Error pixel-by-pixel average difference of the compared
 frames, averaged over all the image components.
 
-@item mse_y, mse_u, mse_v, mse_r, mse_g, mse_g, mse_a
+@item mse_y, mse_u, mse_v, mse_r, mse_g, mse_b, mse_a
 Mean Square Error pixel-by-pixel average difference of the compared
 frames for the component specified by the suffix.
 
@@ -12564,8 +14137,9 @@
 
 @item fillcolor, c
 Set the color used to fill the output area not covered by the rotated
-image. For the general syntax of this option, check the "Color" section in the
-ffmpeg-utils manual. If the special value "none" is selected then no
+image. For the general syntax of this option, check the
+@ref{color syntax,,"Color" section in the ffmpeg-utils manual,ffmpeg-utils}.
+If the special value "none" is selected then no
 background is printed (useful for example if the background is never shown).
 
 Default value is "black".
@@ -12847,13 +14421,13 @@
 range depends on the pixel format. Possible values:
 
 @table @samp
-@item auto
+@item auto/unknown
 Choose automatically.
 
 @item jpeg/full/pc
 Set full range (0-255 in case of 8-bit luma).
 
-@item mpeg/tv
+@item mpeg/limited/tv
 Set "MPEG" range (16-235 in case of 8-bit luma).
 @end table
 
@@ -13012,6 +14586,19 @@
 @example
 scale=w='min(500\, iw*3/2):h=-1'
 @end example
+
+@item
+Make pixels square by combining scale and setsar:
+@example
+scale='trunc(ih*dar):ih',setsar=1/1
+@end example
+
+@item
+Make pixels square by combining scale and setsar,
+making sure the resulting resolution is even (required by some codecs):
+@example
+scale='trunc(ih*dar/2)*2:trunc(ih/2)*2',setsar=1/1
+@end example
 @end itemize
 
 @subsection Commands
@@ -14143,6 +15730,56 @@
 @code{0} (not enabled).
 @end table
 
+@section sr
+
+Scale the input by applying one of the super-resolution methods based on
+convolutional neural networks. Supported models:
+
+@itemize
+@item
+Super-Resolution Convolutional Neural Network model (SRCNN).
+See @url{https://arxiv.org/abs/1501.00092}.
+
+@item
+Efficient Sub-Pixel Convolutional Neural Network model (ESPCN).
+See @url{https://arxiv.org/abs/1609.05158}.
+@end itemize
+
+Training scripts as well as scripts for model generation are provided in
+the repository at @url{https://github.com/HighVoltageRocknRoll/sr.git}.
+
+The filter accepts the following options:
+
+@table @option
+@item dnn_backend
+Specify which DNN backend to use for model loading and execution. This option accepts
+the following values:
+
+@table @samp
+@item native
+Native implementation of DNN loading and execution.
+
+@item tensorflow
+TensorFlow backend. To enable this backend you
+need to install the TensorFlow for C library (see
+@url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg with
+@code{--enable-libtensorflow}
+@end table
+
+Default value is @samp{native}.
+
+@item model
+Set path to model file specifying network architecture and its parameters.
+Note that different backends use different file formats. TensorFlow backend
+can load files for both formats, while native backend can load files for only
+its format.
+
+@item scale_factor
+Set scale factor for SRCNN model. Allowed values are @code{2}, @code{3} and @code{4}.
+Default value is @code{2}. Scale factor is necessary for SRCNN model, because it accepts
+input upscaled using bicubic upscaling with proper scale factor.
+@end table
+
 @anchor{subtitles}
 @section subtitles
 
@@ -14209,10 +15846,10 @@
 subtitles=video.mkv:si=1
 @end example
 
-To make the subtitles stream from @file{sub.srt} appear in transparent green
+To make the subtitles stream from @file{sub.srt} appear in 80% transparent blue
 @code{DejaVu Serif}, use:
 @example
-subtitles=sub.srt:force_style='FontName=DejaVu Serif,PrimaryColour=&HAA00FF00'
+subtitles=sub.srt:force_style='FontName=DejaVu Serif,PrimaryColour=&HCCFF0000'
 @end example
 
 @section super2xsai
@@ -14434,8 +16071,17 @@
 
 @item color
 Specify the color of the unused area. For the syntax of this option, check the
-"Color" section in the ffmpeg-utils manual. The default value of @var{color}
-is "black".
+@ref{color syntax,,"Color" section in the ffmpeg-utils manual,ffmpeg-utils}.
+The default value of @var{color} is "black".
+
+@item overlap
+Set the number of frames to overlap when tiling several successive frames together.
+The value must be between @code{0} and @var{nb_frames - 1}.
+
+@item init_padding
+Set the number of frames to initially be empty before displaying first output frame.
+This controls how soon will one get first output frame.
+The value must be between @code{0} and @var{nb_frames - 1}.
 @end table
 
 @subsection Examples
@@ -14695,6 +16341,50 @@
 
 @end table
 
+@section tmix
+
+Mix successive video frames.
+
+A description of the accepted options follows.
+
+@table @option
+@item frames
+The number of successive frames to mix. If unspecified, it defaults to 3.
+
+@item weights
+Specify weight of each input video frame.
+Each weight is separated by space. If number of weights is smaller than
+number of @var{frames} last specified weight will be used for all remaining
+unset weights.
+
+@item scale
+Specify scale, if it is set it will be multiplied with sum
+of each weight multiplied with pixel values to give final destination
+pixel value. By default @var{scale} is auto scaled to sum of weights.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Average 7 successive frames:
+@example
+tmix=frames=7:weights="1 1 1 1 1 1 1"
+@end example
+
+@item
+Apply simple temporal convolution:
+@example
+tmix=frames=3:weights="-1 3 -1"
+@end example
+
+@item
+Similar as above but only showing temporal differences:
+@example
+tmix=frames=3:weights="-1 2 -1":scale=1
+@end example
+@end itemize
+
 @section tonemap
 Tone map colors from different dynamic ranges.
 
@@ -14802,6 +16492,7 @@
 mapping from a lower range to a higher range.
 @end table
 
+@anchor{transpose}
 @section transpose
 
 Transpose rows with columns in the input video and optionally flip it.
@@ -14881,6 +16572,47 @@
 transpose=1:portrait
 @end example
 
+@section transpose_npp
+
+Transpose rows with columns in the input video and optionally flip it.
+For more in depth examples see the @ref{transpose} video filter, which shares mostly the same options.
+
+It accepts the following parameters:
+
+@table @option
+
+@item dir
+Specify the transposition direction.
+
+Can assume the following values:
+@table @samp
+@item cclock_flip
+Rotate by 90 degrees counterclockwise and vertically flip. (default)
+
+@item clock
+Rotate by 90 degrees clockwise.
+
+@item cclock
+Rotate by 90 degrees counterclockwise.
+
+@item clock_flip
+Rotate by 90 degrees clockwise and vertically flip.
+@end table
+
+@item passthrough
+Do not apply the transposition if the input geometry matches the one
+specified by the specified value. It accepts the following values:
+@table @samp
+@item none
+Always apply transposition. (default)
+@item portrait
+Preserve portrait geometry (when @var{height} >= @var{width}).
+@item landscape
+Preserve landscape geometry (when @var{width} >= @var{height}).
+@end table
+
+@end table
+
 @section trim
 Trim the input so that the output contains one continuous subpart of the input.
 
@@ -15013,10 +16745,6 @@
 
 Default value is 0.0.
 
-@item opencl
-If set to 1, specify using OpenCL capabilities, only available if
-FFmpeg was configured with @code{--enable-opencl}. Default value is 0.
-
 @end table
 
 All parameters are optional and default to the equivalent of the
@@ -15472,6 +17200,17 @@
 ffmpeg -i in.avi -vf "vflip" out.avi
 @end example
 
+@section vfrdet
+
+Detect variable frame rate video.
+
+This filter tries to detect if the input is variable or constant frame rate.
+
+At end it will output number of frames detected as having variable delta pts,
+and ones with constant delta pts.
+If there was frames with variable delta, than it will also show min and max delta
+encountered.
+
 @anchor{vignette}
 @section vignette
 
@@ -15745,6 +17484,9 @@
 @item aflat
 Similar as above, but shows difference between blue and red chroma.
 
+@item xflat
+Similar as above, but use different colors.
+
 @item chroma
 Displays only chroma.
 
@@ -15764,6 +17506,9 @@
 
 @item green
 Display green graticule showing legal broadcast ranges.
+
+@item orange
+Display orange graticule showing legal broadcast ranges.
 @end table
 
 @item opacity, o
@@ -15831,7 +17576,7 @@
 @section xbr
 Apply the xBR high-quality magnification filter which is designed for pixel
 art. It follows a set of edge-detection rules, see
-@url{http://www.libretro.com/forums/viewtopic.php?f=6&t=134}.
+@url{https://forums.libretro.com/t/xbr-algorithm-tutorial/123}.
 
 It accepts the following option:
 
@@ -16006,7 +17751,8 @@
 @anchor{zscale}
 @section zscale
 Scale (resize) the input video, using the z.lib library:
-https://github.com/sekrit-twc/zimg.
+@url{https://github.com/sekrit-twc/zimg}. To enable compilation of this
+filter, you need to configure FFmpeg with @code{--enable-libzimg}.
 
 The zscale filter forces the output display aspect ratio to be the same
 as the input, by changing the output sample aspect ratio.
@@ -16571,8 +18317,8 @@
 value is "25".
 
 @item size, s
-Set frame size. For the syntax of this option, check the "Video
-size" section in the ffmpeg-utils manual. Default value is "640x480".
+Set frame size. For the syntax of this option, check the @ref{video size syntax,,"Video
+size" section in the ffmpeg-utils manual,ffmpeg-utils}. Default value is "640x480".
 
 @item start_scale
 Set the initial scale value. Default value is 3.0.
@@ -16771,8 +18517,8 @@
 @item mold_color
 Set mold color, for definitely dead and moldy cells.
 
-For the syntax of these 3 color options, check the "Color" section in the
-ffmpeg-utils manual.
+For the syntax of these 3 color options, check the @ref{color syntax,,"Color" section in the
+ffmpeg-utils manual,ffmpeg-utils}.
 @end table
 
 @subsection Examples
@@ -16809,13 +18555,15 @@
 @anchor{color}
 @anchor{haldclutsrc}
 @anchor{nullsrc}
+@anchor{pal75bars}
+@anchor{pal100bars}
 @anchor{rgbtestsrc}
 @anchor{smptebars}
 @anchor{smptehdbars}
 @anchor{testsrc}
 @anchor{testsrc2}
 @anchor{yuvtestsrc}
-@section allrgb, allyuv, color, haldclutsrc, nullsrc, rgbtestsrc, smptebars, smptehdbars, testsrc, testsrc2, yuvtestsrc
+@section allrgb, allyuv, color, haldclutsrc, nullsrc, pal75bars, pal100bars, rgbtestsrc, smptebars, smptehdbars, testsrc, testsrc2, yuvtestsrc
 
 The @code{allrgb} source returns frames of size 4096x4096 of all rgb colors.
 
@@ -16830,6 +18578,12 @@
 mainly useful to be employed in analysis / debugging tools, or as the
 source for filters which ignore the input data.
 
+The @code{pal75bars} source generates a color bars pattern, based on
+EBU PAL recommendations with 75% color levels.
+
+The @code{pal100bars} source generates a color bars pattern, based on
+EBU PAL recommendations with 100% color levels.
+
 The @code{rgbtestsrc} source generates an RGB test pattern useful for
 detecting RGB vs BGR issues. You should see a red, green and blue
 stripe from top to bottom.
@@ -16855,28 +18609,24 @@
 
 @table @option
 
-@item alpha
-Specify the alpha (opacity) of the background, only available in the
-@code{testsrc2} source. The value must be between 0 (fully transparent) and
-255 (fully opaque, the default).
-
-@item color, c
-Specify the color of the source, only available in the @code{color}
-source. For the syntax of this option, check the "Color" section in the
-ffmpeg-utils manual.
-
 @item level
 Specify the level of the Hald CLUT, only available in the @code{haldclutsrc}
 source. A level of @code{N} generates a picture of @code{N*N*N} by @code{N*N*N}
 pixels to be used as identity matrix for 3D lookup tables. Each component is
 coded on a @code{1/(N*N)} scale.
 
+@item color, c
+Specify the color of the source, only available in the @code{color}
+source. For the syntax of this option, check the
+@ref{color syntax,,"Color" section in the ffmpeg-utils manual,ffmpeg-utils}.
+
 @item size, s
 Specify the size of the sourced video. For the syntax of this option, check the
 @ref{video size syntax,,"Video size" section in the ffmpeg-utils manual,ffmpeg-utils}.
 The default value is @code{320x240}.
 
-This option is not available with the @code{haldclutsrc} filter.
+This option is not available with the @code{allrgb}, @code{allyuv}, and
+@code{haldclutsrc} filters.
 
 @item rate, r
 Specify the frame rate of the sourced video, as the number of frames
@@ -16885,9 +18635,6 @@
 number or a valid video frame rate abbreviation. The default value is
 "25".
 
-@item sar
-Set the sample aspect ratio of the sourced video.
-
 @item duration, d
 Set the duration of the sourced video. See
 @ref{time duration syntax,,the Time duration section in the ffmpeg-utils(1) manual,ffmpeg-utils}
@@ -16896,6 +18643,14 @@
 If not specified, or the expressed duration is negative, the video is
 supposed to be generated forever.
 
+@item sar
+Set the sample aspect ratio of the sourced video.
+
+@item alpha
+Specify the alpha (opacity) of the background, only available in the
+@code{testsrc2} source. The value must be between 0 (fully transparent) and
+255 (fully opaque, the default).
+
 @item decimals, n
 Set the number of decimals to show in the timestamp, only available in the
 @code{testsrc} source.
@@ -16905,27 +18660,32 @@
 value. Default value is 0.
 @end table
 
-For example the following:
+@subsection Examples
+
+@itemize
+@item
+Generate a video with a duration of 5.3 seconds, with size
+176x144 and a frame rate of 10 frames per second:
 @example
 testsrc=duration=5.3:size=qcif:rate=10
 @end example
 
-will generate a video with a duration of 5.3 seconds, with size
-176x144 and a frame rate of 10 frames per second.
-
+@item
 The following graph description will generate a red source
 with an opacity of 0.2, with size "qcif" and a frame rate of 10
-frames per second.
+frames per second:
 @example
 color=c=red@@0.2:s=qcif:r=10
 @end example
 
+@item
 If the input content is to be ignored, @code{nullsrc} can be used. The
 following command generates noise in the luminance plane by employing
 the @code{geq} filter:
 @example
 nullsrc=s=256x256, geq=random(1)*255:128:128
 @end example
+@end itemize
 
 @subsection Commands
 
@@ -16937,6 +18697,78 @@
 corresponding @option{color} option.
 @end table
 
+@section openclsrc
+
+Generate video using an OpenCL program.
+
+@table @option
+
+@item source
+OpenCL program source file.
+
+@item kernel
+Kernel name in program.
+
+@item size, s
+Size of frames to generate.  This must be set.
+
+@item format
+Pixel format to use for the generated frames.  This must be set.
+
+@item rate, r
+Number of frames generated every second.  Default value is '25'.
+
+@end table
+
+For details of how the program loading works, see the @ref{program_opencl}
+filter.
+
+Example programs:
+
+@itemize
+@item
+Generate a colour ramp by setting pixel values from the position of the pixel
+in the output image.  (Note that this will work with all pixel formats, but
+the generated output will not be the same.)
+@verbatim
+__kernel void ramp(__write_only image2d_t dst,
+                   unsigned int index)
+{
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    float4 val;
+    val.xy = val.zw = convert_float2(loc) / convert_float2(get_image_dim(dst));
+
+    write_imagef(dst, loc, val);
+}
+@end verbatim
+
+@item
+Generate a Sierpinski carpet pattern, panning by a single pixel each frame.
+@verbatim
+__kernel void sierpinski_carpet(__write_only image2d_t dst,
+                                unsigned int index)
+{
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    float4 value = 0.0f;
+    int x = loc.x + index;
+    int y = loc.y + index;
+    while (x > 0 || y > 0) {
+        if (x % 3 == 1 && y % 3 == 1) {
+            value = 1.0f;
+            break;
+        }
+        x /= 3;
+        y /= 3;
+    }
+
+    write_imagef(dst, loc, value);
+}
+@end verbatim
+
+@end itemize
+
 @c man end VIDEO SOURCES
 
 @chapter Video Sinks
@@ -17072,9 +18904,15 @@
 
 @section aphasemeter
 
-Convert input audio to a video output, displaying the audio phase.
+Measures phase of input audio, which is exported as metadata @code{lavfi.aphasemeter.phase},
+representing mean phase of current audio frame. A video output can also be produced and is
+enabled by default. The audio is passed through as first output.
 
-The filter accepts the following options:
+Audio will be rematrixed to stereo if it has a different channel layout. Phase value is in
+range @code{[-1, 1]} where @code{-1} means left and right channels are completely out of phase
+and @code{1} means channels are in phase.
+
+The filter accepts the following options, all related to its video output:
 
 @table @option
 @item rate, r
@@ -17100,11 +18938,6 @@
 Enable video output. Default is enabled.
 @end table
 
-The filter also exports the frame metadata @code{lavfi.aphasemeter.phase} which
-represents mean phase of current audio frame. Value is in range @code{[-1, 1]}.
-The @code{-1} means left and right channels are completely out of phase and
-@code{1} means channels are in phase.
-
 @section avectorscope
 
 Convert input audio to a video output, representing the audio vector
@@ -17197,6 +19030,26 @@
 Logarithmic.
 @end table
 
+@item swap
+Swap left channel axis with right channel axis.
+
+@item mirror
+Mirror axis.
+
+@table @samp
+@item none
+No mirror.
+
+@item x
+Mirror only x axis.
+
+@item y
+Mirror only y axis.
+
+@item xy
+Mirror both axis.
+@end table
+
 @end table
 
 @subsection Examples
@@ -17324,6 +19177,14 @@
 
 @end itemize
 
+@subsection Commands
+
+This filter supports the following commands:
+@table @option
+@item next
+Close the current segment and step to the next one
+@end table
+
 @section drawgraph, adrawgraph
 
 Draw a graph using input video or audio metadata.
@@ -17442,7 +19303,11 @@
 message mentioned above, so it is not printed anymore when this option is set,
 unless the verbose logging is set. The main graphing area contains the
 short-term loudness (3 seconds of analysis), and the gauge on the right is for
-the momentary loudness (400 milliseconds).
+the momentary loudness (400 milliseconds), but can optionally be configured
+to instead display short-term loudness (see @var{gauge}).
+
+The green area marks a  +/- 1LU target range around the target loudness
+(-23LUFS by default, unless modified through @var{target}).
 
 More information about the Loudness Recommendation EBU R128 on
 @url{http://tech.ebu.ch/loudness}.
@@ -17519,6 +19384,23 @@
 @item panlaw
 Set a specific pan law to be used for the measurement of dual mono files.
 This parameter is optional, and has a default value of -3.01dB.
+
+@item target
+Set a specific target level (in LUFS) used as relative zero in the visualization.
+This parameter is optional and has a default value of -23LUFS as specified
+by EBU R128. However, material published online may prefer a level of -16LUFS
+(e.g. for use with podcasts or video platforms).
+
+@item gauge
+Set the value displayed by the gauge. Valid values are @code{momentary} and s
+@code{shortterm}. By default the momentary value will be used, but in certain
+scenarios it may be more useful to observe the short term value instead (e.g.
+live mixing).
+
+@item scale
+Sets the display scale for the loudness. Valid parameters are @code{absolute}
+(in LUFS) or @code{relative} (LU) relative to the target. This only affects the
+video output, not the summary or continuous log output.
 @end table
 
 @subsection Examples
@@ -17679,7 +19561,7 @@
 
 @itemize
 @item
-Print all metadata values for frames with key @code{lavfi.singnalstats.YDIF} with values
+Print all metadata values for frames with key @code{lavfi.signalstats.YDIF} with values
 between 0 and 1.
 @example
 signalstats,metadata=print:key=lavfi.signalstats.YDIF:value=0:function=expr:expr='between(VALUE1,0,1)'
@@ -17807,7 +19689,7 @@
 The PTS of the last previously filtered video frame. It's NAN if undefined.
 
 @item prev_selected_t
-The PTS of the last previously selected video frame. It's NAN if undefined.
+The PTS of the last previously selected video frame, expressed in seconds. It's NAN if undefined.
 
 @item start_pts
 The PTS of the first video frame in the video. It's NAN if undefined.
@@ -18119,7 +20001,7 @@
 constants:
 
 @table @option
-@item FRAME_RATE
+@item FRAME_RATE, FR
 frame rate, only defined for constant frame-rate video
 
 @item PTS
@@ -18232,6 +20114,37 @@
 
 @end itemize
 
+@section setrange
+
+Force color range for the output video frame.
+
+The @code{setrange} filter marks the color range property for the
+output frames. It does not change the input frame, but only sets the
+corresponding property, which affects how the frame is treated by
+following filters.
+
+The filter accepts the following options:
+
+@table @option
+
+@item range
+Available values are:
+
+@table @samp
+@item auto
+Keep the same color range property.
+
+@item unspecified, unknown
+Set the color range as unspecified.
+
+@item limited, tv, mpeg
+Set the color range as limited.
+
+@item full, pc, jpeg
+Set the color range as full.
+@end table
+@end table
+
 @section settb, asettb
 
 Set the timebase to use for the output frames timestamps.
@@ -18770,6 +20683,10 @@
 each channel is displayed using the fruit color scheme
 @item cool
 each channel is displayed using the cool color scheme
+@item magma
+each channel is displayed using the magma color scheme
+@item green
+each channel is displayed using the green color scheme
 @end table
 
 Default value is @samp{channel}.
@@ -18849,6 +20766,18 @@
 @item rotation
 Set color rotation, must be in [-1.0, 1.0] range.
 Default value is @code{0}.
+
+@item start
+Set start frequency from which to display spectrogram. Default is @code{0}.
+
+@item stop
+Set stop frequency to which to display spectrogram. Default is @code{0}.
+
+@item fps
+Set upper frame rate limit. Default is @code{auto}, unlimited.
+
+@item legend
+Draw time and frequency axes and legends. Default is disabled.
 @end table
 
 The usage is very similar to the showwaves filter; see the examples in that
@@ -18919,6 +20848,10 @@
 each channel is displayed using the fruit color scheme
 @item cool
 each channel is displayed using the cool color scheme
+@item magma
+each channel is displayed using the magma color scheme
+@item green
+each channel is displayed using the green color scheme
 @end table
 Default value is @samp{intensity}.
 
@@ -18990,6 +20923,12 @@
 @item rotation
 Set color rotation, must be in [-1.0, 1.0] range.
 Default value is @code{0}.
+
+@item start
+Set start frequency from which to display spectrogram. Default is @code{0}.
+
+@item stop
+Set stop frequency to which to display spectrogram. Default is @code{0}.
 @end table
 
 @subsection Examples
@@ -19023,7 +20962,7 @@
 Set channel height, allowed range is [1, 900]. Default is 20.
 
 @item f
-Set fade, allowed range is [0.001, 1]. Default is 0.95.
+Set fade, allowed range is [0, 1]. Default is 0.95.
 
 @item c
 Set volume color expression.
@@ -19048,12 +20987,33 @@
 If set, displays volume values. Default is enabled.
 
 @item o
-Set orientation, can be @code{horizontal} or @code{vertical},
-default is @code{horizontal}.
+Set orientation, can be horizontal: @code{h} or vertical: @code{v},
+default is @code{h}.
 
 @item s
-Set step size, allowed range s [0, 5]. Default is 0, which means
+Set step size, allowed range is [0, 5]. Default is 0, which means
 step is disabled.
+
+@item p
+Set background opacity, allowed range is [0, 1]. Default is 0.
+
+@item m
+Set metering mode, can be peak: @code{p} or rms: @code{r},
+default is @code{p}.
+
+@item ds
+Set display scale, can be linear: @code{lin} or log: @code{log},
+default is @code{lin}.
+
+@item dm
+In second.
+If set to > 0., display a line for the max level
+in the previous seconds.
+default is disabled: @code{0.}
+
+@item dmc
+The color of the max line. Use when @code{dm} option is set to > 0.
+default is: @code{orange}
 @end table
 
 @section showwaves
@@ -19123,6 +21083,20 @@
 @end table
 
 Default is linear.
+
+@item draw
+Set the draw mode. This is mostly useful to set for high @var{n}.
+
+Available values are:
+@table @samp
+@item scale
+Scale pixel values for each drawn sample.
+
+@item full
+Draw every sample directly.
+@end table
+
+Default value is @code{scale}.
 @end table
 
 @subsection Examples
@@ -19333,7 +21307,7 @@
 
 @code{zmq} and @code{azmq} work as a pass-through filters. @code{zmq}
 must be inserted between two video filters, @code{azmq} between two
-audio filters.
+audio filters. Both are capable to send messages to any filter type.
 
 To enable these filters you need to install the libzmq library and
 headers and configure FFmpeg with @code{--enable-libzmq}.
@@ -19343,7 +21317,10 @@
 
 The @code{zmq} and @code{azmq} filters work as a libzmq server, which
 receives messages sent through a network interface defined by the
-@option{bind_address} option.
+@option{bind_address} (or the abbreviation "@option{b}") option.
+Default value of this option is @file{tcp://localhost:5555}. You may
+want to alter this value to your needs, but do not forget to escape any
+':' signs (see @ref{filtergraph escaping}).
 
 The received message must be in the form:
 @example
@@ -19351,7 +21328,10 @@
 @end example
 
 @var{TARGET} specifies the target of the command, usually the name of
-the filter class or a specific filter instance name.
+the filter class or a specific filter instance name. The default
+filter instance name uses the pattern @samp{Parsed_<filter_name>_<index>},
+but you can override this by using the @samp{filter_name@@id} syntax
+(see @ref{Filtergraph syntax}).
 
 @var{COMMAND} specifies the name of the command for the target filter.
 
@@ -19373,14 +21353,17 @@
 Look at @file{tools/zmqsend} for an example of a zmq client which can
 be used to send commands processed by these filters.
 
-Consider the following filtergraph generated by @command{ffplay}
+Consider the following filtergraph generated by @command{ffplay}.
+In this example the last overlay filter has an instance name. All other
+filters will have default instance names.
+
 @example
 ffplay -dumpgraph 1 -f lavfi "
 color=s=100x100:c=red  [l];
 color=s=100x100:c=blue [r];
 nullsrc=s=200x100, zmq [bg];
-[bg][l]   overlay      [bg+l];
-[bg+l][r] overlay=x=100 "
+[bg][l]   overlay     [bg+l];
+[bg+l][r] overlay@@my=x=100 "
 @end example
 
 To change the color of the left side of the video, the following
@@ -19394,6 +21377,12 @@
 echo Parsed_color_1 c pink | tools/zmqsend
 @end example
 
+To change the position of the right side:
+@example
+echo overlay@@my x 150 | tools/zmqsend
+@end example
+
+
 @c man end MULTIMEDIA FILTERS
 
 @chapter Multimedia Sources
@@ -19432,8 +21421,8 @@
 @item streams, s
 Specifies the streams to read. Several streams can be specified,
 separated by "+". The source will then have as many outputs, in the
-same order. The syntax is explained in the ``Stream specifiers''
-section in the ffmpeg manual. Two special names, "dv" and "da" specify
+same order. The syntax is explained in the @ref{Stream specifiers,,"Stream specifiers"
+section in the ffmpeg manual,ffmpeg}. Two special names, "dv" and "da" specify
 respectively the default (best suited) video and audio stream. Default
 is "dv", or "da" if the filter is called as "amovie".
 

diff --git a/doc/formats.texi b/doc/formats.texi
index ddd7743..4f334e0 100644
--- a/doc/formats.texi
+++ b/doc/formats.texi

@@ -30,37 +30,43 @@
 @item packetsize @var{integer} (@emph{output})
 Set packet size.
 
-@item fflags @var{flags} (@emph{input/output})
-Set format flags.
+@item fflags @var{flags}
+Set format flags. Some are implemented for a limited number of formats.
 
-Possible values:
+Possible values for input files:
 @table @samp
-@item ignidx
-Ignore index.
+@item discardcorrupt
+Discard corrupted packets.
 @item fastseek
 Enable fast, but inaccurate seeks for some formats.
 @item genpts
-Generate PTS.
+Generate missing PTS if DTS is present.
+@item igndts
+Ignore DTS if PTS is set. Inert when nofillin is set.
+@item ignidx
+Ignore index.
+@item keepside (@emph{deprecated},@emph{inert})
+@item nobuffer
+Reduce the latency introduced by buffering during initial input streams analysis.
 @item nofillin
-Do not fill in missing values that can be exactly calculated.
+Do not fill in missing values in packet fields that can be exactly calculated.
 @item noparse
 Disable AVParsers, this needs @code{+nofillin} too.
-@item igndts
-Ignore DTS.
-@item discardcorrupt
-Discard corrupted frames.
 @item sortdts
-Try to interleave output packets by DTS.
-@item keepside
-Do not merge side data.
-@item latm
-Enable RTP MP4A-LATM payload.
-@item nobuffer
-Reduce the latency introduced by optional buffering
+Try to interleave output packets by DTS. At present, available only for AVIs with an index.
+@end table
+
+Possible values for output files:
+@table @samp
+@item autobsf
+Automatically apply bitstream filters as required by the output format. Enabled by default.
 @item bitexact
 Only write platform-, build- and time-independent data.
 This ensures that file and data checksums are reproducible and match between
 platforms. Its primary use is for regression testing.
+@item flush_packets
+Write out packets immediately.
+@item latm (@emph{deprecated},@emph{inert})
 @item shortest
 Stop muxing at the end of the shortest stream.
 It may be needed to increase max_interleave_delta to avoid flushing the longer
@@ -214,6 +220,10 @@
 @item max_streams @var{integer} (@emph{input})
 Specifies the maximum number of streams. This can be used to reject files that
 would require too many resources due to a large number of streams.
+
+@item skip_estimate_duration_from_pts @var{bool} (@emph{input})
+Skip estimation of input duration when calculated using PTS.
+At present, applicable for MPEG-PS and MPEG-TS.
 @end table
 
 @c man end FORMAT OPTIONS

diff --git a/doc/general.texi b/doc/general.texi
index a404006..4983134 100644
--- a/doc/general.texi
+++ b/doc/general.texi

@@ -17,6 +17,42 @@
 explicitly requested by passing the appropriate flags to
 @command{./configure}.
 
+@section libxavs2
+
+FFmpeg can make use of the xavs2 library for AVS2-P2/IEEE1857.4 video encoding.
+
+Go to @url{https://github.com/pkuvcl/xavs2} and follow the instructions for
+installing the library. Then pass @code{--enable-libxavs2} to configure to
+enable it.
+
+@float NOTE
+libxavs2 is under the GNU Public License Version 2 or later
+(see @url{http://www.gnu.org/licenses/old-licenses/gpl-2.0.html} for
+details), you must upgrade FFmpeg's license to GPL in order to use it.
+@end float
+
+@section libdavs2
+
+FFmpeg can make use of the davs2 library for AVS2-P2/IEEE1857.4 video decoding.
+
+Go to @url{https://github.com/pkuvcl/davs2} and follow the instructions for
+installing the library. Then pass @code{--enable-libdavs2} to configure to
+enable it.
+
+@float NOTE
+libdavs2 is under the GNU Public License Version 2 or later
+(see @url{http://www.gnu.org/licenses/old-licenses/gpl-2.0.html} for
+details), you must upgrade FFmpeg's license to GPL in order to use it.
+@end float
+
+@section Alliance for Open Media libaom
+
+FFmpeg can make use of the libaom library for AV1 decoding.
+
+Go to @url{http://aomedia.org/} and follow the instructions for
+installing the library. Then pass @code{--enable-libaom} to configure to
+enable it.
+
 @section OpenJPEG
 
 FFmpeg can use the OpenJPEG libraries for encoding/decoding J2K videos.  Go to
@@ -38,9 +74,10 @@
 GPL components, GPL version 3) by passing @code{--enable-version3} to configure in
 order to use it.
 
-The Fraunhofer AAC library is licensed under a license incompatible to the GPL
-and is not known to be compatible to the LGPL. Therefore, you have to pass
-@code{--enable-nonfree} to configure to use it.
+The license of the Fraunhofer AAC library is incompatible with the GPL.
+Therefore, for GPL builds, you have to pass @code{--enable-nonfree} to
+configure in order to use it. To the best of our knowledge, it is
+compatible with the LGPL.
 @end float
 
 @subsection OpenCORE AMR
@@ -63,7 +100,7 @@
 
 @subsection Fraunhofer AAC library
 
-FFmpeg can make use of the Fraunhofer AAC library for AAC encoding.
+FFmpeg can make use of the Fraunhofer AAC library for AAC decoding & encoding.
 
 Go to @url{http://sourceforge.net/projects/opencore-amr/} and follow the
 instructions for installing the library.
@@ -85,6 +122,24 @@
 instructions for installing the library.
 Then pass @code{--enable-libtwolame} to configure to enable it.
 
+@section libcodec2 / codec2 general
+
+FFmpeg can make use of libcodec2 for codec2 encoding and decoding.
+There is currently no native decoder, so libcodec2 must be used for decoding.
+
+Go to @url{http://freedv.org/}, download "Codec 2 source archive".
+Build and install using CMake. Debian users can install the libcodec2-dev package instead.
+Once libcodec2 is installed you can pass @code{--enable-libcodec2} to configure to enable it.
+
+The easiest way to use codec2 is with .c2 files, since they contain the mode information required for decoding.
+To encode such a file, use a .c2 file extension and give the libcodec2 encoder the -mode option:
+@code{ffmpeg -i input.wav -mode 700C output.c2}.
+Playback is as simple as @code{ffplay output.c2}.
+For a list of supported modes, run @code{ffmpeg -h encoder=libcodec2}.
+Raw codec2 files are also supported.
+To make sense of them the mode in use needs to be specified as a format option:
+@code{ffmpeg -f codec2raw -mode 1300 -i input.raw output.wav}.
+
 @section libvpx
 
 FFmpeg can make use of the libvpx library for VP8/VP9 encoding.
@@ -225,6 +280,18 @@
 with the @code{--enable-libmfx} option and @code{pkg-config} needs to be able to
 locate the dispatcher's @code{.pc} files.
 
+@section AMD VCE
+
+FFmpeg can use the AMD Advanced Media Framework library for accelerated H.264
+and HEVC encoding on VCE enabled hardware under Windows.
+
+To enable support you must obtain the AMF framework header files from
+@url{https://github.com/GPUOpen-LibrariesAndSDKs/AMF.git}.
+
+Create an @code{AMF/} directory in the system include path.
+Copy the contents of @code{AMF/amf/public/include/} into that directory.
+Then configure FFmpeg with @code{--enable-amf}.
+
 
 @chapter Supported File Formats, Codecs or Features
 
@@ -290,6 +357,10 @@
 @item BRSTM                     @tab   @tab X
     @tab Audio format used on the Nintendo Wii.
 @item BWF                       @tab X @tab X
+@item codec2 (raw)              @tab X @tab X
+    @tab Must be given -mode format option to decode correctly.
+@item codec2 (.c2 files)        @tab X @tab X
+    @tab Contains header with version and mode info, simplifying playback.
 @item CRI ADX                   @tab X @tab X
     @tab Audio-only format used in console video games.
 @item Discworld II BMV          @tab   @tab X
@@ -365,6 +436,7 @@
 @item Interplay MVE             @tab   @tab X
     @tab Format used in various Interplay computer games.
 @item Iterated Systems ClearVideo @tab     @tab  X
+    @tab I-frames only
 @item IV8                       @tab   @tab X
     @tab A format generated by IndigoVision 8000 video server.
 @item IVF (On2)                 @tab X @tab X
@@ -424,6 +496,7 @@
 @item NC camera feed            @tab   @tab X
     @tab NC (AVIP NC4600) camera streams
 @item NIST SPeech HEader REsources @tab   @tab X
+@item Computerized Speech Lab NSP @tab   @tab X
 @item NTT TwinVQ (VQF)          @tab   @tab X
     @tab Nippon Telegraph and Telephone Corporation TwinVQ.
 @item Nullsoft Streaming Video  @tab   @tab X
@@ -438,6 +511,10 @@
 @item QCP                       @tab   @tab X
 @item raw ADTS (AAC)            @tab X @tab X
 @item raw AC-3                  @tab X @tab X
+@item raw AMR-NB                @tab   @tab X
+@item raw AMR-WB                @tab   @tab X
+@item raw aptX                  @tab X @tab X
+@item raw aptX HD               @tab X @tab X
 @item raw Chinese AVS video     @tab X @tab X
 @item raw CRI ADX               @tab X @tab X
 @item raw Dirac                 @tab X @tab X
@@ -461,6 +538,7 @@
 @item raw NULL                  @tab X @tab
 @item raw video                 @tab X @tab X
 @item raw id RoQ                @tab X @tab
+@item raw SBC                   @tab X @tab X
 @item raw Shorten               @tab   @tab X
 @item raw TAK                   @tab   @tab X
 @item raw TrueHD                @tab X @tab X
@@ -510,7 +588,8 @@
 @item SAP                       @tab X @tab X
 @item SBG                       @tab   @tab X
 @item SDP                       @tab   @tab X
-@item Sega FILM/CPK             @tab   @tab X
+@item SER                       @tab   @tab X
+@item Sega FILM/CPK             @tab X @tab X
     @tab Used in many Sega Saturn console games.
 @item Silicon Graphics Movie    @tab   @tab X
 @item Sierra SOL                @tab   @tab X
@@ -678,6 +757,8 @@
 @item Autodesk Animator Flic video  @tab     @tab  X
 @item Autodesk RLE           @tab     @tab  X
     @tab fourcc: AASC
+@item AV1                    @tab     @tab  E
+    @tab Supported through external library libaom
 @item Avid 1:1 10-bit RGB Packer  @tab  X  @tab  X
     @tab fourcc: AVrp
 @item AVS (Audio Video Standard) video  @tab     @tab  X
@@ -751,6 +832,7 @@
     @tab fourcc: G2M2, G2M3
 @item Go2Webinar             @tab     @tab  X
     @tab fourcc: G2M4
+@item Gremlin Digital Video  @tab     @tab  X
 @item H.261                  @tab  X  @tab  X
 @item H.263 / H.263-1996     @tab  X  @tab  X
 @item H.263+ / H.263-1998 / H.263 version 2  @tab  X  @tab  X
@@ -771,6 +853,7 @@
     @tab IFF interleaved bitmap
 @item IFF ByteRun1           @tab     @tab  X
     @tab IFF run length encoded bitmap
+@item Infinity IMM4          @tab     @tab  X
 @item Intel H.263            @tab     @tab  X
 @item Intel Indeo 2          @tab     @tab  X
 @item Intel Indeo 3          @tab     @tab  X
@@ -792,7 +875,7 @@
 @item LucasArts SANM/Smush   @tab     @tab  X
     @tab Used in LucasArts games / SMUSH animations.
 @item lossless MJPEG         @tab  X  @tab  X
-@item MagicYUV Video         @tab     @tab  X
+@item MagicYUV Video         @tab  X  @tab  X
 @item Mandsoft Screen Capture Codec  @tab     @tab  X
 @item Microsoft ATC Screen   @tab     @tab  X
     @tab Also known as Microsoft Screen 3.
@@ -990,13 +1073,20 @@
 @item Amazing Studio PAF Audio @tab     @tab  X
 @item Apple lossless audio   @tab  X  @tab  X
     @tab QuickTime fourcc 'alac'
+@item aptX                   @tab  X  @tab  X
+    @tab Used in Bluetooth A2DP
+@item aptX HD                @tab  X  @tab  X
+    @tab Used in Bluetooth A2DP
 @item ATRAC1                 @tab     @tab  X
 @item ATRAC3                 @tab     @tab  X
 @item ATRAC3+                @tab     @tab  X
+@item ATRAC9                 @tab     @tab  X
 @item Bink Audio             @tab     @tab  X
     @tab Used in Bink and Smacker files in many games.
 @item CELT                   @tab     @tab  E
     @tab decoding supported through external library libcelt
+@item codec2                 @tab  E  @tab  E
+    @tab en/decoding supported through external library libcodec2
 @item Delphine Software International CIN audio  @tab     @tab  X
     @tab Codec used in Delphine Software International games.
 @item Digital Speech Standard - Standard Play mode (DSS SP) @tab     @tab  X
@@ -1095,6 +1185,8 @@
     @tab Real low bitrate AC-3 codec
 @item RealAudio Lossless     @tab     @tab  X
 @item RealAudio SIPR / ACELP.NET @tab     @tab  X
+@item SBC (low-complexity subband codec) @tab  X  @tab  X
+    @tab Used in Bluetooth A2DP
 @item Shorten                @tab     @tab  X
 @item Sierra VMD audio       @tab     @tab  X
     @tab Used in Sierra VMD files.

diff --git a/doc/indevs.texi b/doc/indevs.texi
index 55a4084..9a9cb69 100644
--- a/doc/indevs.texi
+++ b/doc/indevs.texi

@@ -63,6 +63,46 @@
 
 @end table
 
+@section android_camera
+
+Android camera input device.
+
+This input devices uses the Android Camera2 NDK API which is
+available on devices with API level 24+. The availability of
+android_camera is autodetected during configuration.
+
+This device allows capturing from all cameras on an Android device,
+which are integrated into the Camera2 NDK API.
+
+The available cameras are enumerated internally and can be selected
+with the @var{camera_index} parameter. The input file string is
+discarded.
+
+Generally the back facing camera has index 0 while the front facing
+camera has index 1.
+
+@subsection Options
+
+@table @option
+
+@item video_size
+Set the video size given as a string such as 640x480 or hd720.
+Falls back to the first available configuration reported by
+Android if requested video size is not available or by default.
+
+@item framerate
+Set the video framerate.
+Falls back to the first available configuration reported by
+Android if requested framerate is not available or by default (-1).
+
+@item camera_index
+Set the index of the camera to use. Default is 0.
+
+@item input_queue_size
+Set the maximum number of frames to buffer. Default is 5.
+
+@end table
+
 @section avfoundation
 
 AVFoundation input device.
@@ -227,7 +267,8 @@
 
 @item list_devices
 If set to @option{true}, print a list of devices and exit.
-Defaults to @option{false}.
+Defaults to @option{false}. Alternatively you can use the @code{-sources}
+option of ffmpeg to list the available input devices.
 
 @item list_formats
 If set to @option{true}, print a list of supported formats and exit.
@@ -238,6 +279,8 @@
 the supported values of your device(s) use @option{list_formats}.
 Note that there is a FourCC @option{'pal '} that can also be used
 as @option{pal} (3 letters).
+Default behavior is autodetection of the input video format, if the hardware
+supports it.
 
 @item bm_v210
 This is a deprecated option, you can use @option{raw_format} instead.
@@ -284,6 +327,12 @@
 Sets the decklink device duplex mode. Must be @samp{unset}, @samp{half} or @samp{full}.
 Defaults to @samp{unset}.
 
+@item timecode_format
+Timecode type to include in the frame and video stream metadata. Must be
+@samp{none}, @samp{rp188vitc}, @samp{rp188vitc2}, @samp{rp188ltc},
+@samp{rp188any}, @samp{vitc}, @samp{vitc2}, or @samp{serial}. Defaults to
+@samp{none} (not included).
+
 @item video_input
 Sets the video input source. Must be @samp{unset}, @samp{sdi}, @samp{hdmi},
 @samp{optical_sdi}, @samp{component}, @samp{composite} or @samp{s_video}.
@@ -296,11 +345,13 @@
 
 @item video_pts
 Sets the video packet timestamp source. Must be @samp{video}, @samp{audio},
-@samp{reference} or @samp{wallclock}. Defaults to @samp{video}.
+@samp{reference}, @samp{wallclock} or @samp{abs_wallclock}.
+Defaults to @samp{video}.
 
 @item audio_pts
 Sets the audio packet timestamp source. Must be @samp{video}, @samp{audio},
-@samp{reference} or @samp{wallclock}. Defaults to @samp{audio}.
+@samp{reference}, @samp{wallclock} or @samp{abs_wallclock}.
+Defaults to @samp{audio}.
 
 @item draw_bars
 If set to @samp{true}, color bars are drawn in the event of a signal loss.
@@ -311,6 +362,29 @@
 incoming frames will be dropped.
 Defaults to @samp{1073741824}.
 
+@item audio_depth
+Sets the audio sample bit depth. Must be @samp{16} or @samp{32}.
+Defaults to @samp{16}.
+
+@item decklink_copyts
+If set to @option{true}, timestamps are forwarded as they are without removing
+the initial offset.
+Defaults to @option{false}.
+
+@item timestamp_align
+Capture start time alignment in seconds. If set to nonzero, input frames are
+dropped till the system timestamp aligns with configured value.
+Alignment difference of upto one frame duration is tolerated.
+This is useful for maintaining input synchronization across N different
+hardware devices deployed for 'N-way' redundancy. The system time of different
+hardware devices should be synchronized with protocols such as NTP or PTP,
+before using this option.
+Note that this method is not foolproof. In some border cases input
+synchronization may not happen due to thread scheduling jitters in the OS.
+Either sync could go wrong by 1 frame or in a rarer case
+@option{timestamp_align} seconds.
+Defaults to @samp{0}.
+
 @end table
 
 @subsection Examples
@@ -349,116 +423,6 @@
 
 @end itemize
 
-@section kmsgrab
-
-KMS video input device.
-
-Captures the KMS scanout framebuffer associated with a specified CRTC or plane as a
-DRM object that can be passed to other hardware functions.
-
-Requires either DRM master or CAP_SYS_ADMIN to run.
-
-If you don't understand what all of that means, you probably don't want this.  Look at
-@option{x11grab} instead.
-
-@subsection Options
-
-@table @option
-
-@item device
-DRM device to capture on.  Defaults to @option{/dev/dri/card0}.
-
-@item format
-Pixel format of the framebuffer.  Defaults to @option{bgr0}.
-
-@item format_modifier
-Format modifier to signal on output frames.  This is necessary to import correctly into
-some APIs, but can't be autodetected.  See the libdrm documentation for possible values.
-
-@item crtc_id
-KMS CRTC ID to define the capture source.  The first active plane on the given CRTC
-will be used.
-
-@item plane_id
-KMS plane ID to define the capture source.  Defaults to the first active plane found if
-neither @option{crtc_id} nor @option{plane_id} are specified.
-
-@item framerate
-Framerate to capture at.  This is not synchronised to any page flipping or framebuffer
-changes - it just defines the interval at which the framebuffer is sampled.  Sampling
-faster than the framebuffer update rate will generate independent frames with the same
-content.  Defaults to @code{30}.
-
-@end table
-
-@subsection Examples
-
-@itemize
-
-@item
-Capture from the first active plane, download the result to normal frames and encode.
-This will only work if the framebuffer is both linear and mappable - if not, the result
-may be scrambled or fail to download.
-@example
-ffmpeg -f kmsgrab -i - -vf 'hwdownload,format=bgr0' output.mp4
-@end example
-
-@item
-Capture from CRTC ID 42 at 60fps, map the result to VAAPI, convert to NV12 and encode as H.264.
-@example
-ffmpeg -crtc_id 42 -framerate 60 -f kmsgrab -i - -vf 'hwmap=derive_device=vaapi,scale_vaapi=w=1920:h=1080:format=nv12' -c:v h264_vaapi output.mp4
-@end example
-
-@end itemize
-
-@section libndi_newtek
-
-The libndi_newtek input device provides capture capabilities for using NDI (Network
-Device Interface, standard created by NewTek).
-
-Input filename is a NDI source name that could be found by sending -find_sources 1
-to command line - it has no specific syntax but human-readable formatted.
-
-To enable this input device, you need the NDI SDK and you
-need to configure with the appropriate @code{--extra-cflags}
-and @code{--extra-ldflags}.
-
-@subsection Options
-
-@table @option
-
-@item find_sources
-If set to @option{true}, print a list of found/available NDI sources and exit.
-Defaults to @option{false}.
-
-@item wait_sources
-Override time to wait until the number of online sources have changed.
-Defaults to @option{0.5}.
-
-@item allow_video_fields
-When this flag is @option{false}, all video that you receive will be progressive.
-Defaults to @option{true}.
-
-@end table
-
-@subsection Examples
-
-@itemize
-
-@item
-List input devices:
-@example
-ffmpeg -f libndi_newtek -find_sources 1 -i dummy
-@end example
-
-@item
-Restream to NDI:
-@example
-ffmpeg -f libndi_newtek -i "DEV-5.INTERNAL.M1STEREO.TV (NDI_SOURCE_NAME_1)" -f libndi_newtek -y NDI_SOURCE_NAME_2
-@end example
-
-@end itemize
-
 @section dshow
 
 Windows DirectShow input device.
@@ -886,6 +850,68 @@
 
 @end table
 
+@section kmsgrab
+
+KMS video input device.
+
+Captures the KMS scanout framebuffer associated with a specified CRTC or plane as a
+DRM object that can be passed to other hardware functions.
+
+Requires either DRM master or CAP_SYS_ADMIN to run.
+
+If you don't understand what all of that means, you probably don't want this.  Look at
+@option{x11grab} instead.
+
+@subsection Options
+
+@table @option
+
+@item device
+DRM device to capture on.  Defaults to @option{/dev/dri/card0}.
+
+@item format
+Pixel format of the framebuffer.  Defaults to @option{bgr0}.
+
+@item format_modifier
+Format modifier to signal on output frames.  This is necessary to import correctly into
+some APIs, but can't be autodetected.  See the libdrm documentation for possible values.
+
+@item crtc_id
+KMS CRTC ID to define the capture source.  The first active plane on the given CRTC
+will be used.
+
+@item plane_id
+KMS plane ID to define the capture source.  Defaults to the first active plane found if
+neither @option{crtc_id} nor @option{plane_id} are specified.
+
+@item framerate
+Framerate to capture at.  This is not synchronised to any page flipping or framebuffer
+changes - it just defines the interval at which the framebuffer is sampled.  Sampling
+faster than the framebuffer update rate will generate independent frames with the same
+content.  Defaults to @code{30}.
+
+@end table
+
+@subsection Examples
+
+@itemize
+
+@item
+Capture from the first active plane, download the result to normal frames and encode.
+This will only work if the framebuffer is both linear and mappable - if not, the result
+may be scrambled or fail to download.
+@example
+ffmpeg -f kmsgrab -i - -vf 'hwdownload,format=bgr0' output.mp4
+@end example
+
+@item
+Capture from CRTC ID 42 at 60fps, map the result to VAAPI, convert to NV12 and encode as H.264.
+@example
+ffmpeg -crtc_id 42 -framerate 60 -f kmsgrab -i - -vf 'hwmap=derive_device=vaapi,scale_vaapi=w=1920:h=1080:format=nv12' -c:v h264_vaapi output.mp4
+@end example
+
+@end itemize
+
 @section lavfi
 
 Libavfilter input virtual device.
@@ -1024,6 +1050,54 @@
 
 Requires the configure option @code{--enable-libdc1394}.
 
+@section libndi_newtek
+
+The libndi_newtek input device provides capture capabilities for using NDI (Network
+Device Interface, standard created by NewTek).
+
+Input filename is a NDI source name that could be found by sending -find_sources 1
+to command line - it has no specific syntax but human-readable formatted.
+
+To enable this input device, you need the NDI SDK and you
+need to configure with the appropriate @code{--extra-cflags}
+and @code{--extra-ldflags}.
+
+@subsection Options
+
+@table @option
+
+@item find_sources
+If set to @option{true}, print a list of found/available NDI sources and exit.
+Defaults to @option{false}.
+
+@item wait_sources
+Override time to wait until the number of online sources have changed.
+Defaults to @option{0.5}.
+
+@item allow_video_fields
+When this flag is @option{false}, all video that you receive will be progressive.
+Defaults to @option{true}.
+
+@end table
+
+@subsection Examples
+
+@itemize
+
+@item
+List input devices:
+@example
+ffmpeg -f libndi_newtek -find_sources 1 -i dummy
+@end example
+
+@item
+Restream to NDI:
+@example
+ffmpeg -f libndi_newtek -i "DEV-5.INTERNAL.M1STEREO.TV (NDI_SOURCE_NAME_1)" -f libndi_newtek -y NDI_SOURCE_NAME_2
+@end example
+
+@end itemize
+
 @section openal
 
 The OpenAL input device provides audio capture on all systems with a
@@ -1142,7 +1216,6 @@
 
 @end table
 
-
 @section pulse
 
 PulseAudio input device.

diff --git a/doc/issue_tracker.txt b/doc/issue_tracker.txt
index e8e8530..5d9805a 100644
--- a/doc/issue_tracker.txt
+++ b/doc/issue_tracker.txt

@@ -193,9 +193,6 @@
 ffprobe
     issues in or related to ffprobe.c
 
-ffserver
-    issues in or related to ffserver.c
-
 postproc
     issues in libpostproc/*
 

diff --git a/doc/libav-merge.txt b/doc/libav-merge.txt
index 4a46bfc..d5e671c 100644
--- a/doc/libav-merge.txt
+++ b/doc/libav-merge.txt

@@ -94,18 +94,16 @@
   - a853388d2 hevc: change the stride of the MC buffer to be in bytes instead of elements
   - 0cef06df0 checkasm: add HEVC MC tests
   - e7078e842 hevcdsp: add x86 SIMD for MC
-- VAAPI VP8 decode hwaccel (currently under review: http://ffmpeg.org/pipermail/ffmpeg-devel/2017-February/thread.html#207348)
-- Removal of the custom atomic API (5cc0057f49, see http://ffmpeg.org/pipermail/ffmpeg-devel/2017-March/209003.html)
-- new bitstream reader (see http://ffmpeg.org/pipermail/ffmpeg-devel/2017-April/209609.html)
-- use of the bsf instead of our parser for vp9 superframes (see fa1749dd34)
+  - 7993ec19a hevc: Add hevc_get_pixel_4/8/12/16/24/32/48/64
 - use av_cpu_max_align() instead of hardcoding alignment requirements (see https://ffmpeg.org/pipermail/ffmpeg-devel/2017-September/215834.html)
   - f44ec22e0 lavc: use av_cpu_max_align() instead of hardcoding alignment requirements
   - 4de220d2e frame: allow align=0 (meaning automatic) for av_frame_get_buffer()
+- Support recovery from an already present HLS playlist (see 16cb06bb30)
+- Remove all output devices (see 8e7e042d41, 8d3db95f20, 6ce13070bd, d46cd24986 and https://ffmpeg.org/pipermail/ffmpeg-devel/2017-September/216904.html)
 
 Collateral damage that needs work locally:
 ------------------------------------------
 
-- Merge proresdec2.c and proresdec_lgpl.c
 - Merge proresenc_anatoliy.c and proresenc_kostya.c
 - Fix MIPS AC3 downmix
 

diff --git a/doc/libavcodec.texi b/doc/libavcodec.texi
index 87b90db..b22c47a 100644
--- a/doc/libavcodec.texi
+++ b/doc/libavcodec.texi

@@ -26,13 +26,13 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{ffmpeg-codecs.html,ffmpeg-codecs}, @url{ffmpeg-bitstream-filters.html,bitstream-filters},
 @url{libavutil.html,libavutil}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1),
+ffmpeg(1), ffplay(1), ffprobe(1),
 ffmpeg-codecs(1), ffmpeg-bitstream-filters(1),
 libavutil(3)
 @end ifnothtml

diff --git a/doc/libavdevice.texi b/doc/libavdevice.texi
index 9b10282..0abdaaf 100644
--- a/doc/libavdevice.texi
+++ b/doc/libavdevice.texi

@@ -23,13 +23,13 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{ffmpeg-devices.html,ffmpeg-devices},
 @url{libavutil.html,libavutil}, @url{libavcodec.html,libavcodec}, @url{libavformat.html,libavformat}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1),
+ffmpeg(1), ffplay(1), ffprobe(1),
 ffmpeg-devices(1),
 libavutil(3), libavcodec(3), libavformat(3)
 @end ifnothtml

diff --git a/doc/libavfilter.texi b/doc/libavfilter.texi
index 52e0753..d9472eb 100644
--- a/doc/libavfilter.texi
+++ b/doc/libavfilter.texi

@@ -21,14 +21,14 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{ffmpeg-filters.html,ffmpeg-filters},
 @url{libavutil.html,libavutil}, @url{libswscale.html,libswscale}, @url{libswresample.html,libswresample},
 @url{libavcodec.html,libavcodec}, @url{libavformat.html,libavformat}, @url{libavdevice.html,libavdevice}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1),
+ffmpeg(1), ffplay(1), ffprobe(1),
 ffmpeg-filters(1),
 libavutil(3), libswscale(3), libswresample(3), libavcodec(3), libavformat(3), libavdevice(3)
 @end ifnothtml

diff --git a/doc/libavformat.texi b/doc/libavformat.texi
index d505d64..7cf41fd 100644
--- a/doc/libavformat.texi
+++ b/doc/libavformat.texi

@@ -26,13 +26,13 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{ffmpeg-formats.html,ffmpeg-formats}, @url{ffmpeg-protocols.html,ffmpeg-protocols},
 @url{libavutil.html,libavutil}, @url{libavcodec.html,libavcodec}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1),
+ffmpeg(1), ffplay(1), ffprobe(1),
 ffmpeg-formats(1), ffmpeg-protocols(1),
 libavutil(3), libavcodec(3)
 @end ifnothtml

diff --git a/doc/libavutil.texi b/doc/libavutil.texi
index 7a1c332..ee50362 100644
--- a/doc/libavutil.texi
+++ b/doc/libavutil.texi

@@ -42,12 +42,12 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{ffmpeg-utils.html,ffmpeg-utils}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1),
+ffmpeg(1), ffplay(1), ffprobe(1),
 ffmpeg-utils(1)
 @end ifnothtml
 

diff --git a/doc/libswresample.texi b/doc/libswresample.texi
index bb57278..3108cb1 100644
--- a/doc/libswresample.texi
+++ b/doc/libswresample.texi

@@ -48,13 +48,13 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{ffmpeg-resampler.html,ffmpeg-resampler},
 @url{libavutil.html,libavutil}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1),
+ffmpeg(1), ffplay(1), ffprobe(1),
 ffmpeg-resampler(1),
 libavutil(3)
 @end ifnothtml

diff --git a/doc/libswscale.texi b/doc/libswscale.texi
index 757fd24..e137c24 100644
--- a/doc/libswscale.texi
+++ b/doc/libswscale.texi

@@ -41,13 +41,13 @@
 @chapter See Also
 
 @ifhtml
-@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe}, @url{ffserver.html,ffserver},
+@url{ffmpeg.html,ffmpeg}, @url{ffplay.html,ffplay}, @url{ffprobe.html,ffprobe},
 @url{ffmpeg-scaler.html,ffmpeg-scaler},
 @url{libavutil.html,libavutil}
 @end ifhtml
 
 @ifnothtml
-ffmpeg(1), ffplay(1), ffprobe(1), ffserver(1),
+ffmpeg(1), ffplay(1), ffprobe(1),
 ffmpeg-scaler(1),
 libavutil(3)
 @end ifnothtml

diff --git a/doc/mailing-list-faq.texi b/doc/mailing-list-faq.texi
index fe2171e..3ab89d6 100644
--- a/doc/mailing-list-faq.texi
+++ b/doc/mailing-list-faq.texi

@@ -27,8 +27,7 @@
 @item
 @url{https://lists.ffmpeg.org/mailman/listinfo/ffmpeg-user/, ffmpeg-user}:
 For questions involving unscripted usage or compilation of the FFmpeg
-command-line tools (@command{ffmpeg}, @command{ffprobe}, @command{ffplay},
-@command{ffserver}).
+command-line tools (@command{ffmpeg}, @command{ffprobe}, @command{ffplay}).
 
 @item
 @url{https://lists.ffmpeg.org/mailman/listinfo/libav-user/, libav-user}:
@@ -48,7 +47,8 @@
 @anchor{How do I ask a question or send a message to a mailing list?}
 @section How do I ask a question or send a message to a mailing list?
 
-All you have to do is send an email:
+First you must @ref{How do I subscribe?, subscribe}. Then all you have to do is
+send an email:
 
 @itemize
 @item
@@ -58,49 +58,18 @@
 @item
 Email @email{libav-user@@ffmpeg.org} to send a message to the
 libav-user mailing list.
+
+@item
+Email @email{ffmpeg-devel@@ffmpeg.org} to send a message to the
+ffmpeg-devel mailing list.
 @end itemize
 
-If you are not subscribed to the mailing list then your question must be
-manually approved. Approval may take several days, but the wait is
-usually less. If you want the message to be sent with no delay then you
-must subscribe first. See @ref{How do I subscribe?}
-
-Please do not send a message, subscribe, and re-send the message: this
-results in duplicates, causes more work for the admins, and may lower
-your chance at getting an answer. However, you may do so if you first
-@ref{How do I delete my message in the moderation queue?, delete your original message from the moderation queue}.
+Note that the ffmpeg-devel mailing list does not require you to subscribe
+to send a message or patch, but ffmpeg-user and libav-user do require
+subscription.
 
 @chapter Subscribing / Unsubscribing
 
-@section What does subscribing do?
-
-Subscribing allows two things:
-
-@itemize
-@item
-Your messages will show up in the mailing list without waiting in the
-moderation queue and needing to be manually approved by a mailing list
-admin.
-
-@item
-You will receive all messages to the mailing list including replies to
-your messages. Non-subscribed users do not receive any messages.
-@end itemize
-
-@section Do I need to subscribe?
-
-No. You can still send a message to the mailing list without
-subscribing. See @ref{How do I ask a question or send a message to a mailing list?}
-
-However, your message will need to be manually approved by a mailing
-list admin, and you will not receive any mailing list messages or
-replies.
-
-You can ask to be CCd in your message, but replying users will
-sometimes forget to do so.
-
-You may also view and reply to messages via the @ref{Where are the archives?, archives}.
-
 @anchor{How do I subscribe?}
 @section How do I subscribe?
 
@@ -135,8 +104,6 @@
 These are:
 
 @itemize
-@item
-Messages from users who are @strong{not} subscribed.
 
 @item
 Messages that exceed the @ref{What is the message size limit?, message size limit}.
@@ -149,13 +116,12 @@
 
 @section How long does it take for my message in the moderation queue to be approved?
 
-The queue is usually checked once or twice a day, but on occasion
-several days may pass before someone checks the queue.
+The queue is usually checked daily to several times a week.
 
 @anchor{How do I delete my message in the moderation queue?}
 @section How do I delete my message in the moderation queue?
 
-You should have received an email with the subject @emph{Your message to ffmpeg-user awaits moderator approval}.
+You should have received an email with the subject @emph{Your message to <mailing list name> awaits moderator approval}.
 A link is in the message that will allow you to delete your message
 unless a mailing list admin already approved or rejected it.
 
@@ -176,6 +142,9 @@
 title. The link will provide the proper headers to keep the message
 within the thread.
 
+Note that you must be subscribed to send a message to the ffmpeg-user or
+libav-user mailing lists.
+
 @section How do I search the archives?
 
 Perform a site search using your favorite search engine. Example:
@@ -204,9 +173,8 @@
 @anchor{What is the message size limit?}
 @section What is the message size limit?
 
-The message size limit is 500 kilobytes for the user lists and 1000
-kilobytes for ffmpeg-devel. Please provide links to larger files instead
-of attaching them.
+The message size limit is 1000 kilobytes. Please provide links to larger files
+instead of attaching them.
 
 @section Where can I upload sample files?
 

diff --git a/doc/muxers.texi b/doc/muxers.texi
index 91bbe67..f18543e 100644
--- a/doc/muxers.texi
+++ b/doc/muxers.texi

@@ -226,7 +226,12 @@
 
 @table @option
 @item -min_seg_duration @var{microseconds}
-Set the segment length in microseconds.
+This is a deprecated option to set the segment length in microseconds, use @var{seg_duration} instead.
+@item -seg_duration @var{duration}
+Set the segment length in seconds (fractional value can be set). The value is
+treated as average segment duration when @var{use_template} is enabled and
+@var{use_timeline} is disabled and as minimum segment duration for all the other
+use cases.
 @item -window_size @var{size}
 Set the maximum number of segments kept in the manifest.
 @item -extra_window_size @var{size}
@@ -247,6 +252,18 @@
 DASH-templated name to used for the media segments. Default is "chunk-stream$RepresentationID$-$Number%05d$.m4s"
 @item -utc_timing_url @var{utc_url}
 URL of the page that will return the UTC timestamp in ISO format. Example: "https://time.akamai.com/?iso"
+@item method @var{method}
+Use the given HTTP method to create output files. Generally set to PUT or POST.
+@item -http_user_agent @var{user_agent}
+Override User-Agent field in HTTP header. Applicable only for HTTP output.
+@item -http_persistent @var{http_persistent}
+Use persistent HTTP connections. Applicable only for HTTP output.
+@item -hls_playlist @var{hls_playlist}
+Generate HLS playlist files as well. The master playlist is generated with the filename master.m3u8.
+One media playlist file is generated for each stream with filenames media_0.m3u8, media_1.m3u8, etc.
+@item -streaming @var{streaming}
+Enable (1) or disable (0) chunk streaming mode of output. In chunk streaming
+mode, each frame will be a moof fragment which forms a chunk.
 @item -adaptation_sets @var{adaptation_sets}
 Assign streams to AdaptationSets. Syntax is "id=x,streams=a,b,c id=y,streams=d,e" with x and y being the IDs
 of the adaptation sets and a,b,c,d and e are the indices of the mapped streams.
@@ -254,6 +271,32 @@
 To map all video (or audio) streams to an AdaptationSet, "v" (or "a") can be used as stream identifier instead of IDs.
 
 When no assignment is defined, this defaults to an AdaptationSet for each stream.
+@item -timeout @var{timeout}
+Set timeout for socket I/O operations. Applicable only for HTTP output.
+@item -index_correction @var{index_correction}
+Enable (1) or Disable (0) segment index correction logic. Applicable only when
+@var{use_template} is enabled and @var{use_timeline} is disabled.
+
+When enabled, the logic monitors the flow of segment indexes. If a streams's
+segment index value is not at the expected real time position, then the logic
+corrects that index value.
+
+Typically this logic is needed in live streaming use cases. The network bandwidth
+fluctuations are common during long run streaming. Each fluctuation can cause
+the segment indexes fall behind the expected real time position.
+@item -format_options @var{options_list}
+Set container format (mp4/webm) options using a @code{:} separated list of
+key=value parameters. Values containing @code{:} special characters must be
+escaped.
+
+@item dash_segment_type @var{dash_segment_type}
+Possible values:
+@item mp4
+If this flag is set, the dash segment files will be in in ISOBMFF format. This is the default format.
+
+@item webm
+If this flag is set, the dash segment files will be in in WebM format.
+
 @end table
 
 @anchor{framecrc}
@@ -468,9 +511,12 @@
 have the same name as the playlist, followed by a sequential number and a
 .ts extension.
 
+Make sure to require a closed GOP when encoding and to set the GOP
+size to fit your segment time constraint.
+
 For example, to convert an input file with @command{ffmpeg}:
 @example
-ffmpeg -i in.nut out.m3u8
+ffmpeg -i in.mkv -c:v h264 -flags +cgop -g 30 -hls_time 1 out.m3u8
 @end example
 This example will produce the playlist, @file{out.m3u8}, and segment files:
 @file{out0.ts}, @file{out1.ts}, @file{out2.ts}, etc.
@@ -498,6 +544,12 @@
 Set the maximum number of playlist entries. If set to 0 the list file
 will contain all the segments. Default value is 5.
 
+@item hls_delete_threshold @var{size}
+Set the number of unreferenced segments to keep on disk before @code{hls_flags delete_segments}
+deletes them. Increase this to allow continue clients to download segments which
+were recently referenced in the playlist. Default value is 1, meaning segments older than
+@code{hls_list_size+1} will be deleted.
+
 @item hls_ts_options @var{options_list}
 Set output format options using a :-separated list of key=value
 parameters. Values containing @code{:} special characters must be
@@ -565,38 +617,69 @@
 but only the file name part without any path info will be contained in the m3u8 segment list.
 Should a relative path be specified, the path of the created segment
 files will be relative to the current working directory.
-When use_localtime_mkdir is set, the whole expanded value of @var{filename} will be written into the m3u8 segment list.
+When strftime_mkdir is set, the whole expanded value of @var{filename} will be written into the m3u8 segment list.
 
+When @code{var_stream_map} is set with two or more variant streams, the
+@var{filename} pattern must contain the string "%v", this string specifies
+the position of variant stream index in the generated segment file names.
+@example
+ffmpeg -i in.ts -b:v:0 1000k -b:v:1 256k -b:a:0 64k -b:a:1 32k \
+  -map 0:v -map 0:a -map 0:v -map 0:a -f hls -var_stream_map "v:0,a:0 v:1,a:1" \
+  -hls_segment_filename 'file_%v_%03d.ts' out_%v.m3u8
+@end example
+This example will produce the playlists segment file sets:
+@file{file_0_000.ts}, @file{file_0_001.ts}, @file{file_0_002.ts}, etc. and
+@file{file_1_000.ts}, @file{file_1_001.ts}, @file{file_1_002.ts}, etc.
+
+The string "%v" may be present in the filename or in the last directory name
+containing the file. If the string is present in the directory name, then
+sub-directories are created after expanding the directory name pattern. This
+enables creation of segments corresponding to different variant streams in
+subdirectories.
+@example
+ffmpeg -i in.ts -b:v:0 1000k -b:v:1 256k -b:a:0 64k -b:a:1 32k \
+  -map 0:v -map 0:a -map 0:v -map 0:a -f hls -var_stream_map "v:0,a:0 v:1,a:1" \
+  -hls_segment_filename 'vs%v/file_%03d.ts' vs%v/out.m3u8
+@end example
+This example will produce the playlists segment file sets:
+@file{vs0/file_000.ts}, @file{vs0/file_001.ts}, @file{vs0/file_002.ts}, etc. and
+@file{vs1/file_000.ts}, @file{vs1/file_001.ts}, @file{vs1/file_002.ts}, etc.
 
 @item use_localtime
+Same as strftime option, will be deprecated.
+
+@item strftime
 Use strftime() on @var{filename} to expand the segment filename with localtime.
 The segment number is also available in this mode, but to use it, you need to specify second_level_segment_index
 hls_flag and %%d will be the specifier.
 @example
-ffmpeg -i in.nut -use_localtime 1 -hls_segment_filename 'file-%Y%m%d-%s.ts' out.m3u8
+ffmpeg -i in.nut -strftime 1 -hls_segment_filename 'file-%Y%m%d-%s.ts' out.m3u8
 @end example
 This example will produce the playlist, @file{out.m3u8}, and segment files:
 @file{file-20160215-1455569023.ts}, @file{file-20160215-1455569024.ts}, etc.
 Note: On some systems/environments, the @code{%s} specifier is not available. See
   @code{strftime()} documentation.
 @example
-ffmpeg -i in.nut -use_localtime 1 -hls_flags second_level_segment_index -hls_segment_filename 'file-%Y%m%d-%%04d.ts' out.m3u8
+ffmpeg -i in.nut -strftime 1 -hls_flags second_level_segment_index -hls_segment_filename 'file-%Y%m%d-%%04d.ts' out.m3u8
 @end example
 This example will produce the playlist, @file{out.m3u8}, and segment files:
 @file{file-20160215-0001.ts}, @file{file-20160215-0002.ts}, etc.
 
 @item use_localtime_mkdir
-Used together with -use_localtime, it will create all subdirectories which
+Same as strftime_mkdir option, will be deprecated .
+
+@item strftime_mkdir
+Used together with -strftime_mkdir, it will create all subdirectories which
 is expanded in @var{filename}.
 @example
-ffmpeg -i in.nut -use_localtime 1 -use_localtime_mkdir 1 -hls_segment_filename '%Y%m%d/file-%Y%m%d-%s.ts' out.m3u8
+ffmpeg -i in.nut -strftime 1 -strftime_mkdir 1 -hls_segment_filename '%Y%m%d/file-%Y%m%d-%s.ts' out.m3u8
 @end example
 This example will create a directory 201560215 (if it does not exist), and then
 produce the playlist, @file{out.m3u8}, and segment files:
 @file{20160215/file-20160215-1455569023.ts}, @file{20160215/file-20160215-1455569024.ts}, etc.
 
 @example
-ffmpeg -i in.nut -use_localtime 1 -use_localtime_mkdir 1 -hls_segment_filename '%Y/%m/%d/file-%Y%m%d-%s.ts' out.m3u8
+ffmpeg -i in.nut -strftime 1 -strftime_mkdir 1 -hls_segment_filename '%Y/%m/%d/file-%Y%m%d-%s.ts' out.m3u8
 @end example
 This example will create a directory hierarchy 2016/02/15 (if any of them do not exist), and then
 produce the playlist, @file{out.m3u8}, and segment files:
@@ -693,6 +776,15 @@
 @item hls_fmp4_init_filename @var{filename}
 set filename to the fragment files header file, default filename is @file{init.mp4}.
 
+When @code{var_stream_map} is set with two or more variant streams, the
+@var{filename} pattern must contain the string "%v", this string specifies
+the position of variant stream index in the generated init file names.
+The string "%v" may be present in the filename or in the last directory name
+containing the file. If the string is present in the directory name, then
+sub-directories are created after expanding the directory name pattern. This
+enables creation of init files corresponding to different variant streams in
+subdirectories.
+
 @item hls_flags @var{flags}
 Possible values:
 
@@ -732,6 +824,10 @@
 detect updates to the encryption info. Be sure to replace this file atomically,
 including the file containing the AES encryption key.
 
+@item independent_segments
+Add the @code{#EXT-X-INDEPENDENT-SEGMENTS} to playlists that has video segments
+and when all the segments of that playlist are guaranteed to start with a Key frame.
+
 @item split_by_time
 Allow segments to start on frames other than keyframes. This improves
 behavior on some players when the time between keyframes is inconsistent,
@@ -743,24 +839,24 @@
 
 @item second_level_segment_index
 Makes it possible to use segment indexes as %%d in hls_segment_filename expression
-besides date/time values when use_localtime is on.
+besides date/time values when strftime is on.
 To get fixed width numbers with trailing zeroes, %%0xd format is available where x is the required width.
 
 @item second_level_segment_size
 Makes it possible to use segment sizes (counted in bytes) as %%s in hls_segment_filename
-expression besides date/time values when use_localtime is on.
+expression besides date/time values when strftime is on.
 To get fixed width numbers with trailing zeroes, %%0xs format is available where x is the required width.
 
 @item second_level_segment_duration
 Makes it possible to use segment duration (calculated  in microseconds) as %%t in hls_segment_filename
-expression besides date/time values when use_localtime is on.
+expression besides date/time values when strftime is on.
 To get fixed width numbers with trailing zeroes, %%0xt format is available where x is the required width.
 
 @example
 ffmpeg -i sample.mpeg \
    -f hls -hls_time 3 -hls_list_size 5 \
    -hls_flags second_level_segment_index+second_level_segment_size+second_level_segment_duration \
-   -use_localtime 1 -use_localtime_mkdir 1 -hls_segment_filename "segment_%Y%m%d%H%M%S_%%04d_%%08s_%%013t.ts" stream.m3u8
+   -strftime 1 -strftime_mkdir 1 -hls_segment_filename "segment_%Y%m%d%H%M%S_%%04d_%%08s_%%013t.ts" stream.m3u8
 @end example
 This will produce segments like this:
 @file{segment_20170102194334_0003_00122200_0000003000000.ts}, @file{segment_20170102194334_0004_00120072_0000003000000.ts} etc.
@@ -794,6 +890,128 @@
 @item http_user_agent
 Override User-Agent field in HTTP header. Applicable only for HTTP output.
 
+@item var_stream_map
+Map string which specifies how to group the audio, video and subtitle streams
+into different variant streams. The variant stream groups are separated
+by space.
+Expected string format is like this "a:0,v:0 a:1,v:1 ....". Here a:, v:, s: are
+the keys to specify audio, video and subtitle streams respectively.
+Allowed values are 0 to 9 (limited just based on practical usage).
+
+When there are two or more variant streams, the output filename pattern must
+contain the string "%v", this string specifies the position of variant stream
+index in the output media playlist filenames. The string "%v" may be present in
+the filename or in the last directory name containing the file. If the string is
+present in the directory name, then sub-directories are created after expanding
+the directory name pattern. This enables creation of variant streams in
+subdirectories.
+
+@example
+ffmpeg -re -i in.ts -b:v:0 1000k -b:v:1 256k -b:a:0 64k -b:a:1 32k \
+  -map 0:v -map 0:a -map 0:v -map 0:a -f hls -var_stream_map "v:0,a:0 v:1,a:1" \
+  http://example.com/live/out_%v.m3u8
+@end example
+This example creates two hls variant streams. The first variant stream will
+contain video stream of bitrate 1000k and audio stream of bitrate 64k and the
+second variant stream will contain video stream of bitrate 256k and audio
+stream of bitrate 32k. Here, two media playlist with file names out_0.m3u8 and
+out_1.m3u8 will be created.
+@example
+ffmpeg -re -i in.ts -b:v:0 1000k -b:v:1 256k -b:a:0 64k \
+  -map 0:v -map 0:a -map 0:v -f hls -var_stream_map "v:0 a:0 v:1" \
+  http://example.com/live/out_%v.m3u8
+@end example
+This example creates three hls variant streams. The first variant stream will
+be a video only stream with video bitrate 1000k, the second variant stream will
+be an audio only stream with bitrate 64k and the third variant stream will be a
+video only stream with bitrate 256k. Here, three media playlist with file names
+out_0.m3u8, out_1.m3u8 and out_2.m3u8 will be created.
+@example
+ffmpeg -re -i in.ts -b:v:0 1000k -b:v:1 256k -b:a:0 64k -b:a:1 32k \
+  -map 0:v -map 0:a -map 0:v -map 0:a -f hls -var_stream_map "v:0,a:0 v:1,a:1" \
+  http://example.com/live/vs_%v/out.m3u8
+@end example
+This example creates the variant streams in subdirectories. Here, the first
+media playlist is created at @file{http://example.com/live/vs_0/out.m3u8} and
+the second one at @file{http://example.com/live/vs_1/out.m3u8}.
+@example
+ffmpeg -re -i in.ts -b:a:0 32k -b:a:1 64k -b:v:0 1000k -b:v:1 3000k  \
+  -map 0:a -map 0:a -map 0:v -map 0:v -f hls \
+  -var_stream_map "a:0,agroup:aud_low a:1,agroup:aud_high v:0,agroup:aud_low v:1,agroup:aud_high" \
+  -master_pl_name master.m3u8 \
+  http://example.com/live/out_%v.m3u8
+@end example
+This example creates two audio only and two video only variant streams. In
+addition to the #EXT-X-STREAM-INF tag for each variant stream in the master
+playlist, #EXT-X-MEDIA tag is also added for the two audio only variant streams
+and they are mapped to the two video only variant streams with audio group names
+'aud_low' and 'aud_high'.
+
+By default, a single hls variant containing all the encoded streams is created.
+
+@item cc_stream_map
+Map string which specifies different closed captions groups and their
+attributes. The closed captions stream groups are separated by space.
+Expected string format is like this
+"ccgroup:<group name>,instreamid:<INSTREAM-ID>,language:<language code> ....".
+'ccgroup' and 'instreamid' are mandatory attributes. 'language' is an optional
+attribute.
+The closed captions groups configured using this option are mapped to different
+variant streams by providing the same 'ccgroup' name in the
+@code{var_stream_map} string. If @code{var_stream_map} is not set, then the
+first available ccgroup in @code{cc_stream_map} is mapped to the output variant
+stream. The examples for these two use cases are given below.
+
+@example
+ffmpeg -re -i in.ts -b:v 1000k -b:a 64k -a53cc 1 -f hls \
+  -cc_stream_map "ccgroup:cc,instreamid:CC1,language:en" \
+  -master_pl_name master.m3u8 \
+  http://example.com/live/out.m3u8
+@end example
+This example adds @code{#EXT-X-MEDIA} tag with @code{TYPE=CLOSED-CAPTIONS} in
+the master playlist with group name 'cc', langauge 'en' (english) and
+INSTREAM-ID 'CC1'. Also, it adds @code{CLOSED-CAPTIONS} attribute with group
+name 'cc' for the output variant stream.
+@example
+ffmpeg -re -i in.ts -b:v:0 1000k -b:v:1 256k -b:a:0 64k -b:a:1 32k \
+  -a53cc:0 1 -a53cc:1 1\
+  -map 0:v -map 0:a -map 0:v -map 0:a -f hls \
+  -cc_stream_map "ccgroup:cc,instreamid:CC1,language:en ccgroup:cc,instreamid:CC2,language:sp" \
+  -var_stream_map "v:0,a:0,ccgroup:cc v:1,a:1,ccgroup:cc" \
+  -master_pl_name master.m3u8 \
+  http://example.com/live/out_%v.m3u8
+@end example
+This example adds two @code{#EXT-X-MEDIA} tags with @code{TYPE=CLOSED-CAPTIONS} in
+the master playlist for the INSTREAM-IDs 'CC1' and 'CC2'. Also, it adds
+@code{CLOSED-CAPTIONS} attribute with group name 'cc' for the two output variant
+streams.
+
+@item master_pl_name
+Create HLS master playlist with the given name.
+
+@example
+ffmpeg -re -i in.ts -f hls -master_pl_name master.m3u8 http://example.com/live/out.m3u8
+@end example
+This example creates HLS master playlist with name master.m3u8 and it is
+published at http://example.com/live/
+
+@item master_pl_publish_rate
+Publish master play list repeatedly every after specified number of segment intervals.
+
+@example
+ffmpeg -re -i in.ts -f hls -master_pl_name master.m3u8 \
+-hls_time 2 -master_pl_publish_rate 30 http://example.com/live/out.m3u8
+@end example
+
+This example creates HLS master playlist with name master.m3u8 and keep
+publishing it repeatedly every after 30 segments i.e. every after 60s.
+
+@item http_persistent
+Use persistent HTTP connections. Applicable only for HTTP output.
+
+@item timeout
+Set timeout for socket I/O operations. Applicable only for HTTP output.
+
 @end table
 
 @anchor{ico}
@@ -894,9 +1112,18 @@
 ffmpeg -f v4l2 -r 1 -i /dev/video0 -f image2 -strftime 1 "%Y-%m-%d_%H-%M-%S.jpg"
 @end example
 
+You can set the file name with current frame's PTS:
+@example
+ffmpeg -f v4l2 -r 1 -i /dev/video0 -copyts -f image2 -frame_pts true %d.jpg"
+@end example
+
 @subsection Options
 
 @table @option
+@item frame_pts
+If set to 1, expand the filename with pts from pkt->pts.
+Default value is 0.
+
 @item start_number
 Start the sequence from the specified number. Default value is 1.
 
@@ -1123,6 +1350,18 @@
 reduces the need for edit lists for some cases such as video tracks with
 B-frames. Additionally, eases conformance with the DASH-IF interoperability
 guidelines.
+
+This option is implicitly set when writing ismv (Smooth Streaming) files.
+@item -write_prft
+Write producer time reference box (PRFT) with a specified time source for the
+NTP field in the PRFT box. Set value as @samp{wallclock} to specify timesource
+as wallclock time and @samp{pts} to specify timesource as input packets' PTS
+values.
+
+Setting value to @samp{pts} is applicable only for a live encoding use case,
+where PTS values are set as as wallclock time at the source. For example, an
+encoding use case with decklink capture source where @option{video_pts} and
+@option{audio_pts} are set to @samp{abs_wallclock}.
 @end table
 
 @subsection Example
@@ -1649,6 +1888,9 @@
 packet written. Defaults to @code{0}.
 @end table
 
+Make sure to require a closed GOP when encoding and to set the GOP
+size to fit your segment time constraint.
+
 @subsection Examples
 
 @itemize
@@ -1657,7 +1899,7 @@
 @file{out-000.nut}, @file{out-001.nut}, etc., and write the list of
 generated segments to @file{out.list}:
 @example
-ffmpeg -i in.mkv -codec copy -map 0 -f segment -segment_list out.list out%03d.nut
+ffmpeg -i in.mkv -codec hevc -flags +cgop -g 60 -map 0 -f segment -segment_list out.list out%03d.nut
 @end example
 
 @item
@@ -1828,20 +2070,35 @@
 @anchor{tee}
 @section tee
 
-The tee muxer can be used to write the same data to several files or any
-other kind of muxer. It can be used, for example, to both stream a video to
-the network and save it to disk at the same time.
+The tee muxer can be used to write the same data to several outputs, such as files or streams.
+It can be used, for example, to stream a video over a network and save it to disk at the same time.
 
 It is different from specifying several outputs to the @command{ffmpeg}
-command-line tool because the audio and video data will be encoded only once
-with the tee muxer; encoding can be a very expensive process. It is not
-useful when using the libavformat API directly because it is then possible
-to feed the same packets to several muxers directly.
+command-line tool. With the tee muxer, the audio and video data will be encoded only once.
+With conventional multiple outputs, multiple encoding operations in parallel are initiated,
+which can be a very expensive process. The tee muxer is not useful when using the libavformat API
+directly because it is then possible to feed the same packets to several muxers directly.
+
+Since the tee muxer does not represent any particular output format, ffmpeg cannot auto-select
+output streams. So all streams intended for output must be specified using @code{-map}. See
+the examples below.
+
+Some encoders may need different options depending on the output format;
+the auto-detection of this can not work with the tee muxer, so they need to be explicitly specified.
+The main example is the @option{global_header} flag.
+
+The slave outputs are specified in the file name given to the muxer,
+separated by '|'. If any of the slave name contains the '|' separator,
+leading or trailing spaces or any special character, those must be
+escaped (see @ref{quoting_and_escaping,,the "Quoting and escaping"
+section in the ffmpeg-utils(1) manual,ffmpeg-utils}).
+
+@subsection Options
 
 @table @option
 
 @item use_fifo @var{bool}
-If set to 1, slave outputs will be processed in separate thread using @ref{fifo}
+If set to 1, slave outputs will be processed in separate threads using the @ref{fifo}
 muxer. This allows to compensate for different speed/latency/reliability of
 outputs and setup transparent recovery. By default this feature is turned off.
 
@@ -1850,12 +2107,6 @@
 
 @end table
 
-The slave outputs are specified in the file name given to the muxer,
-separated by '|'. If any of the slave name contains the '|' separator,
-leading or trailing spaces or any special character, it must be
-escaped (see @ref{quoting_and_escaping,,the "Quoting and escaping"
-section in the ffmpeg-utils(1) manual,ffmpeg-utils}).
-
 Muxer options can be specified for each slave by prepending them as a list of
 @var{key}=@var{value} pairs separated by ':', between square brackets. If
 the options values contain a special character or the ':' separator, they
@@ -1864,13 +2115,27 @@
 The following special options are also recognized:
 @table @option
 @item f
-Specify the format name. Useful if it cannot be guessed from the
-output name suffix.
+Specify the format name. Required if it cannot be guessed from the
+output URL.
 
 @item bsfs[/@var{spec}]
 Specify a list of bitstream filters to apply to the specified
 output.
 
+It is possible to specify to which streams a given bitstream filter
+applies, by appending a stream specifier to the option separated by
+@code{/}. @var{spec} must be a stream specifier (see @ref{Format
+stream specifiers}).
+
+If the stream specifier is not specified, the bitstream filters will be
+applied to all streams in the output. This will cause that output operation
+to fail if the output contains streams to which the bitstream filter cannot
+be applied e.g. @code{h264_mp4toannexb} being applied to an output containing an audio stream.
+
+Options for a bitstream filter must be specified in the form of @code{opt=value}.
+
+Several bitstream filters can be specified, separated by ",".
+
 @item use_fifo @var{bool}
 This allows to override tee muxer use_fifo option for individual slave muxer.
 
@@ -1878,19 +2143,13 @@
 This allows to override tee muxer fifo_options for individual slave muxer.
 See @ref{fifo}.
 
-It is possible to specify to which streams a given bitstream filter
-applies, by appending a stream specifier to the option separated by
-@code{/}. @var{spec} must be a stream specifier (see @ref{Format
-stream specifiers}).  If the stream specifier is not specified, the
-bitstream filters will be applied to all streams in the output.
-
-Several bitstream filters can be specified, separated by ",".
-
 @item select
 Select the streams that should be mapped to the slave output,
 specified by a stream specifier. If not specified, this defaults to
-all the input streams. You may use multiple stream specifiers
-separated by commas (@code{,}) e.g.: @code{a:0,v}
+all the mapped streams. This will cause that output operation to fail
+if the output format does not accept all mapped streams.
+
+You may use multiple stream specifiers separated by commas (@code{,}) e.g.: @code{a:0,v}
 
 @item onfail
 Specify behaviour on output failure. This can be set to either @code{abort} (which is
@@ -1904,7 +2163,7 @@
 @itemize
 @item
 Encode something and both archive it in a WebM file and stream it
-as MPEG-TS over UDP (the streams need to be explicitly mapped):
+as MPEG-TS over UDP:
 @example
 ffmpeg -i ... -c:v libx264 -c:a mp2 -f tee -map 0:v -map 0:a
   "archive-20121107.mkv|[f=mpegts]udp://10.0.1.255:1234/"
@@ -1927,23 +2186,19 @@
 audio packets.
 @example
 ffmpeg -i ... -map 0 -flags +global_header -c:v libx264 -c:a aac
-       -f tee "[bsfs/v=dump_extra]out.ts|[movflags=+faststart]out.mp4|[select=a]out.aac"
+       -f tee "[bsfs/v=dump_extra=freq=keyframe]out.ts|[movflags=+faststart]out.mp4|[select=a]out.aac"
 @end example
 
 @item
-As below, but select only stream @code{a:1} for the audio output. Note
+As above, but select only stream @code{a:1} for the audio output. Note
 that a second level escaping must be performed, as ":" is a special
 character used to separate options.
 @example
 ffmpeg -i ... -map 0 -flags +global_header -c:v libx264 -c:a aac
-       -f tee "[bsfs/v=dump_extra]out.ts|[movflags=+faststart]out.mp4|[select=\'a:1\']out.aac"
+       -f tee "[bsfs/v=dump_extra=freq=keyframe]out.ts|[movflags=+faststart]out.mp4|[select=\'a:1\']out.aac"
 @end example
 @end itemize
 
-Note: some codecs may need different options depending on the output format;
-the auto-detection of this can not work with the tee muxer. The main example
-is the @option{global_header} flag.
-
 @section webm_dash_manifest
 
 WebM DASH Manifest muxer.

diff --git a/doc/outdevs.texi b/doc/outdevs.texi
index daf7b1a..2518f9b 100644
--- a/doc/outdevs.texi
+++ b/doc/outdevs.texi

@@ -140,7 +140,8 @@
 
 @item list_devices
 If set to @option{true}, print a list of devices and exit.
-Defaults to @option{false}.
+Defaults to @option{false}. Alternatively you can use the @code{-sinks}
+option of ffmpeg to list the available output devices.
 
 @item list_formats
 If set to @option{true}, print a list of supported formats and exit.
@@ -150,6 +151,10 @@
 Amount of time to preroll video in seconds.
 Defaults to @option{0.5}.
 
+@item duplex_mode
+Sets the decklink device duplex mode. Must be @samp{unset}, @samp{half} or @samp{full}.
+Defaults to @samp{unset}.
+
 @end table
 
 @subsection Examples
@@ -182,6 +187,35 @@
 
 @end itemize
 
+@section fbdev
+
+Linux framebuffer output device.
+
+The Linux framebuffer is a graphic hardware-independent abstraction
+layer to show graphics on a computer monitor, typically on the
+console. It is accessed through a file device node, usually
+@file{/dev/fb0}.
+
+For more detailed information read the file
+@file{Documentation/fb/framebuffer.txt} included in the Linux source tree.
+
+@subsection Options
+@table @option
+
+@item xoffset
+@item yoffset
+Set x/y coordinate of top left corner. Default is 0.
+@end table
+
+@subsection Examples
+Play a file on framebuffer device @file{/dev/fb0}.
+Required pixel format depends on current framebuffer settings.
+@example
+ffmpeg -re -i INPUT -c:v rawvideo -pix_fmt bgra -f fbdev /dev/fb0
+@end example
+
+See also @url{http://linux-fbdev.sourceforge.net/}, and fbset(1).
+
 @section libndi_newtek
 
 The libndi_newtek output device provides playback capabilities for using NDI (Network
@@ -227,35 +261,6 @@
 
 @end itemize
 
-@section fbdev
-
-Linux framebuffer output device.
-
-The Linux framebuffer is a graphic hardware-independent abstraction
-layer to show graphics on a computer monitor, typically on the
-console. It is accessed through a file device node, usually
-@file{/dev/fb0}.
-
-For more detailed information read the file
-@file{Documentation/fb/framebuffer.txt} included in the Linux source tree.
-
-@subsection Options
-@table @option
-
-@item xoffset
-@item yoffset
-Set x/y coordinate of top left corner. Default is 0.
-@end table
-
-@subsection Examples
-Play a file on framebuffer device @file{/dev/fb0}.
-Required pixel format depends on current framebuffer settings.
-@example
-ffmpeg -re -i INPUT -c:v rawvideo -pix_fmt bgra -f fbdev /dev/fb0
-@end example
-
-See also @url{http://linux-fbdev.sourceforge.net/}, and fbset(1).
-
 @section opengl
 OpenGL output device.
 
@@ -396,6 +401,11 @@
 @item window_fullscreen
 Set fullscreen mode when non-zero value is provided.
 Default value is zero.
+
+@item window_enable_quit
+Enable quit action (using window button or keyboard key)
+when non-zero value is provided.
+Default value is 1 (enable quit action)
 @end table
 
 @subsection Interactive commands
@@ -420,6 +430,10 @@
 
 sndio audio output device.
 
+@section v4l2
+
+Video4Linux2 output device.
+
 @section xv
 
 XV (XVideo) output device.

diff --git a/doc/platform.texi b/doc/platform.texi
index bdfb6fd..4090b85 100644
--- a/doc/platform.texi
+++ b/doc/platform.texi

@@ -148,16 +148,11 @@
 
 @section Microsoft Visual C++ or Intel C++ Compiler for Windows
 
-FFmpeg can be built with MSVC 2012 or earlier using a C99-to-C89 conversion utility
-and wrapper, or with MSVC 2013 and ICL natively.
+FFmpeg can be built with MSVC 2013 or later.
 
 You will need the following prerequisites:
 
 @itemize
-@item @uref{https://github.com/libav/c99-to-c89/, C99-to-C89 Converter & Wrapper}
-(if using MSVC 2012 or earlier)
-@item @uref{http://code.google.com/p/msinttypes/, msinttypes}
-(if using MSVC 2012 or earlier)
 @item @uref{http://msys2.github.io/, MSYS2}
 @item @uref{http://www.nasm.us/, NASM}
 (Also available via MSYS2's package manager.)
@@ -166,16 +161,13 @@
 To set up a proper environment in MSYS2, you need to run @code{msys_shell.bat} from
 the Visual Studio or Intel Compiler command prompt.
 
-Place @code{yasm.exe} somewhere in your @code{PATH}. If using MSVC 2012 or
-earlier, place @code{c99wrap.exe} and @code{c99conv.exe} somewhere in your
-@code{PATH} as well.
+Place @code{yasm.exe} somewhere in your @code{PATH}.
 
 Next, make sure any other headers and libs you want to use, such as zlib, are
 located in a spot that the compiler can see. Do so by modifying the @code{LIB}
 and @code{INCLUDE} environment variables to include the @strong{Windows-style}
 paths to these directories. Alternatively, you can try to use the
-@code{--extra-cflags}/@code{--extra-ldflags} configure options. If using MSVC
-2012 or earlier, place @code{inttypes.h} somewhere the compiler can see too.
+@code{--extra-cflags}/@code{--extra-ldflags} configure options.
 
 Finally, run:
 
@@ -217,8 +209,6 @@
 
 @item FFmpeg has been tested with the following on i686 and x86_64:
 @itemize
-@item Visual Studio 2010 Pro and Express
-@item Visual Studio 2012 Pro and Express
 @item Visual Studio 2013 Pro and Express
 @item Intel Composer XE 2013
 @item Intel Composer XE 2013 SP1

diff --git a/doc/protocols.texi b/doc/protocols.texi
index a7968ff..b34f29e 100644
--- a/doc/protocols.texi
+++ b/doc/protocols.texi

@@ -296,6 +296,9 @@
 @item post_data
 Set custom HTTP post data.
 
+@item referer
+Set the Referer header. Include 'Referer: URL' header in HTTP request.
+
 @item user_agent
 Override the User-Agent header. If not specified the protocol will use a
 string describing the libavformat build. ("Lavf/<version>")
@@ -321,6 +324,9 @@
 @item mime_type
 Export the MIME type.
 
+@item http_version
+Exports the HTTP response version number. Usually "1.0" or "1.1".
+
 @item icy
 If set to 1 request ICY (SHOUTcast) metadata from the server. If the server
 supports this, the metadata has to be retrieved by the application by reading
@@ -363,7 +369,7 @@
 used as an output option, or read data from a client with HTTP POST when used as
 an input option.
 If set to 2 enables experimental multi-client HTTP server. This is not yet implemented
-in ffmpeg.c or ffserver.c and thus must not be used as a command line option.
+in ffmpeg.c and thus must not be used as a command line option.
 @example
 # Server side (sending):
 ffmpeg -i somefile.ogg -c copy -listen 1 -f ogg http://@var{server}:@var{port}
@@ -1149,6 +1155,184 @@
 Set the maximum number of streams. By default no limit is set.
 @end table
 
+@section srt
+
+Haivision Secure Reliable Transport Protocol via libsrt.
+
+The supported syntax for a SRT URL is:
+@example
+srt://@var{hostname}:@var{port}[?@var{options}]
+@end example
+
+@var{options} contains a list of &-separated options of the form
+@var{key}=@var{val}.
+
+or
+
+@example
+@var{options} srt://@var{hostname}:@var{port}
+@end example
+
+@var{options} contains a list of '-@var{key} @var{val}'
+options.
+
+This protocol accepts the following options.
+
+@table @option
+@item connect_timeout
+Connection timeout; SRT cannot connect for RTT > 1500 msec
+(2 handshake exchanges) with the default connect timeout of
+3 seconds. This option applies to the caller and rendezvous
+connection modes. The connect timeout is 10 times the value
+set for the rendezvous mode (which can be used as a
+workaround for this connection problem with earlier versions).
+
+@item ffs=@var{bytes}
+Flight Flag Size (Window Size), in bytes. FFS is actually an
+internal parameter and you should set it to not less than
+@option{recv_buffer_size} and @option{mss}. The default value
+is relatively large, therefore unless you set a very large receiver buffer,
+you do not need to change this option. Default value is 25600.
+
+@item inputbw=@var{bytes/seconds}
+Sender nominal input rate, in bytes per seconds. Used along with
+@option{oheadbw}, when @option{maxbw} is set to relative (0), to
+calculate maximum sending rate when recovery packets are sent
+along with the main media stream:
+@option{inputbw} * (100 + @option{oheadbw}) / 100
+if @option{inputbw} is not set while @option{maxbw} is set to
+relative (0), the actual input rate is evaluated inside
+the library. Default value is 0.
+
+@item iptos=@var{tos}
+IP Type of Service. Applies to sender only. Default value is 0xB8.
+
+@item ipttl=@var{ttl}
+IP Time To Live. Applies to sender only. Default value is 64.
+
+@item latency
+Timestamp-based Packet Delivery Delay.
+Used to absorb bursts of missed packet retransmissions.
+This flag sets both @option{rcvlatency} and @option{peerlatency}
+to the same value. Note that prior to version 1.3.0
+this is the only flag to set the latency, however
+this is effectively equivalent to setting @option{peerlatency},
+when side is sender and @option{rcvlatency}
+when side is receiver, and the bidirectional stream
+sending is not supported.
+
+@item listen_timeout
+Set socket listen timeout.
+
+@item maxbw=@var{bytes/seconds}
+Maximum sending bandwidth, in bytes per seconds.
+-1 infinite (CSRTCC limit is 30mbps)
+0 relative to input rate (see @option{inputbw})
+>0 absolute limit value
+Default value is 0 (relative)
+
+@item mode=@var{caller|listener|rendezvous}
+Connection mode.
+@option{caller} opens client connection.
+@option{listener} starts server to listen for incoming connections.
+@option{rendezvous} use Rendez-Vous connection mode.
+Default value is caller.
+
+@item mss=@var{bytes}
+Maximum Segment Size, in bytes. Used for buffer allocation
+and rate calculation using a packet counter assuming fully
+filled packets. The smallest MSS between the peers is
+used. This is 1500 by default in the overall internet.
+This is the maximum size of the UDP packet and can be
+only decreased, unless you have some unusual dedicated
+network settings. Default value is 1500.
+
+@item nakreport=@var{1|0}
+If set to 1, Receiver will send `UMSG_LOSSREPORT` messages
+periodically until a lost packet is retransmitted or
+intentionally dropped. Default value is 1.
+
+@item oheadbw=@var{percents}
+Recovery bandwidth overhead above input rate, in percents.
+See @option{inputbw}. Default value is 25%.
+
+@item passphrase=@var{string}
+HaiCrypt Encryption/Decryption Passphrase string, length
+from 10 to 79 characters. The passphrase is the shared
+secret between the sender and the receiver. It is used
+to generate the Key Encrypting Key using PBKDF2
+(Password-Based Key Derivation Function). It is used
+only if @option{pbkeylen} is non-zero. It is used on
+the receiver only if the received data is encrypted.
+The configured passphrase cannot be recovered (write-only).
+
+@item payload_size=@var{bytes}
+Sets the maximum declared size of a packet transferred
+during the single call to the sending function in Live
+mode. Use 0 if this value isn't used (which is default in
+file mode).
+Default is -1 (automatic), which typically means MPEG-TS;
+if you are going to use SRT
+to send any different kind of payload, such as, for example,
+wrapping a live stream in very small frames, then you can
+use a bigger maximum frame size, though not greater than
+1456 bytes.
+
+@item pkt_size=@var{bytes}
+Alias for @samp{payload_size}.
+
+@item peerlatency
+The latency value (as described in @option{rcvlatency}) that is
+set by the sender side as a minimum value for the receiver.
+
+@item pbkeylen=@var{bytes}
+Sender encryption key length, in bytes.
+Only can be set to 0, 16, 24 and 32.
+Enable sender encryption if not 0.
+Not required on receiver (set to 0),
+key size obtained from sender in HaiCrypt handshake.
+Default value is 0.
+
+@item rcvlatency
+The time that should elapse since the moment when the
+packet was sent and the moment when it's delivered to
+the receiver application in the receiving function.
+This time should be a buffer time large enough to cover
+the time spent for sending, unexpectedly extended RTT
+time, and the time needed to retransmit the lost UDP
+packet. The effective latency value will be the maximum
+of this options' value and the value of @option{peerlatency}
+set by the peer side. Before version 1.3.0 this option
+is only available as @option{latency}.
+
+@item recv_buffer_size=@var{bytes}
+Set receive buffer size, expressed in bytes.
+
+@item send_buffer_size=@var{bytes}
+Set send buffer size, expressed in bytes.
+
+@item rw_timeout
+Set raise error timeout for read/write optations.
+
+This option is only relevant in read mode:
+if no data arrived in more than this time
+interval, raise error.
+
+@item tlpktdrop=@var{1|0}
+Too-late Packet Drop. When enabled on receiver, it skips
+missing packets that have not been delivered in time and
+delivers the following packets to the application when
+their time-to-play has come. It also sends a fake ACK to
+the sender. When enabled on sender and enabled on the
+receiving peer, the sender drops the older packets that
+have no chance of being delivered in time. It was
+automatically enabled in the sender if the receiver
+supports it.
+
+@end table
+
+For more information see: @url{https://github.com/Haivision/srt}.
+
 @section srtp
 
 Secure Real-time Transport Protocol.
@@ -1186,6 +1370,7 @@
 Start offset of the extracted segment, in bytes.
 @item end
 End offset of the extracted segment, in bytes.
+If set to 0, extract till end of file.
 @end table
 
 Examples:
@@ -1201,6 +1386,11 @@
 subfile,,start,183241728,end,366490624,,:archive.tar
 @end example
 
+Play a MPEG-TS file from start offset till end:
+@example
+subfile,,start,32815239,end,0,,:video.ts
+@end example
+
 @section tee
 
 Writes the output to multiple protocols. The individual outputs are separated
@@ -1242,6 +1432,12 @@
 
 @item send_buffer_size=@var{bytes}
 Set send buffer size, expressed bytes.
+
+@item tcp_nodelay=@var{1|0}
+Set TCP_NODELAY to disable Nagle's algorithm. Default value is 0.
+
+@item tcp_mss=@var{bytes}
+Set maximum segment size for outgoing TCP packets, expressed in bytes.
 @end table
 
 The following example shows how to setup a listening TCP connection
@@ -1277,7 +1473,7 @@
 Note, if using OpenSSL, this currently only makes sure that the
 peer certificate is signed by one of the root certificates in the CA
 database, but it does not validate that the certificate actually
-matches the host name we are trying to connect to. (With GnuTLS,
+matches the host name we are trying to connect to. (With other backends,
 the host name is validated as well.)
 
 This is disabled by default since it requires a CA database to be
@@ -1348,9 +1544,8 @@
 Override the local UDP port to bind with.
 
 @item localaddr=@var{addr}
-Choose the local IP address. This is useful e.g. if sending multicast
-and the host has multiple interfaces, where the user can choose
-which interface to send on by specifying the IP address of that interface.
+Local IP address of a network interface used for sending packets or joining
+multicast groups.
 
 @item pkt_size=@var{size}
 Set the size in bytes of UDP packets.
@@ -1373,12 +1568,12 @@
 the specified peer address/port.
 
 @item sources=@var{address}[,@var{address}]
-Only receive packets sent to the multicast group from one of the
-specified sender IP addresses.
+Only receive packets sent from the specified addresses. In case of multicast,
+also subscribe to multicast traffic coming from these addresses only.
 
 @item block=@var{address}[,@var{address}]
-Ignore packets sent to the multicast group from the specified
-sender IP addresses.
+Ignore packets sent from the specified addresses. In case of multicast, also
+exclude the source addresses in the multicast subscription.
 
 @item fifo_size=@var{units}
 Set the UDP receiving circular buffer size, expressed as a number of

diff --git a/doc/utils.texi b/doc/utils.texi
index e635118..d55dd31 100644
--- a/doc/utils.texi
+++ b/doc/utils.texi

@@ -1057,33 +1057,3 @@
 @end table
 
 @c man end EXPRESSION EVALUATION
-
-@chapter OpenCL Options
-@c man begin OPENCL OPTIONS
-
-When FFmpeg is configured with @code{--enable-opencl}, it is possible
-to set the options for the global OpenCL context.
-
-The list of supported options follows:
-
-@table @option
-@item build_options
-Set build options used to compile the registered kernels.
-
-See reference "OpenCL Specification Version: 1.2 chapter 5.6.4".
-
-@item platform_idx
-Select the index of the platform to run OpenCL code.
-
-The specified index must be one of the indexes in the device list
-which can be obtained with @code{ffmpeg -opencl_bench} or @code{av_opencl_get_device_list()}.
-
-@item device_idx
-Select the index of the device used to run OpenCL code.
-
-The specified index must be one of the indexes in the device list which
-can be obtained with @code{ffmpeg -opencl_bench} or @code{av_opencl_get_device_list()}.
-
-@end table
-
-@c man end OPENCL OPTIONS

diff --git a/doc/writing_filters.txt b/doc/writing_filters.txt
index 5cd4ecd..98b9c6f 100644
--- a/doc/writing_filters.txt
+++ b/doc/writing_filters.txt

@@ -31,10 +31,8 @@
 That's it, your new playground is ready.
 
 Some little details about what's going on:
-libavfilter/allfilters.c:avfilter_register_all() is called at runtime to create
-a list of the available filters, but it's important to know that this file is
-also parsed by the configure script, which in turn will define variables for
-the build system and the C:
+libavfilter/allfilters.c:this file is parsed by the configure script, which in turn
+will define variables for the build system and the C:
 
     --- after running configure ---
 

diff --git a/ffbuild/common.mak b/ffbuild/common.mak
index e168fb2..eb41b05 100644
--- a/ffbuild/common.mak
+++ b/ffbuild/common.mak

@@ -119,7 +119,7 @@
 TESTPROGS += $(TESTPROGS-yes)
 
 LDLIBS       = $(FFLIBS:%=%$(BUILDSUF))
-FFEXTRALIBS := $(LDLIBS:%=$(LD_LIB)) $(EXTRALIBS)
+FFEXTRALIBS := $(LDLIBS:%=$(LD_LIB)) $(foreach lib,EXTRALIBS-$(NAME) $(FFLIBS:%=EXTRALIBS-%),$($(lib))) $(EXTRALIBS)
 
 OBJS      := $(sort $(OBJS:%=$(SUBDIR)%))
 SLIBOBJS  := $(sort $(SLIBOBJS:%=$(SUBDIR)%))
@@ -163,8 +163,7 @@
 
 OBJDIRS := $(OBJDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SLIBOBJS) $(TESTOBJS))
 
-CLEANSUFFIXES     = *.d *.o *~ *.h.c *.gcda *.gcno *.map *.ver *.version *.ho *$(DEFAULT_X86ASMD).asm *.ptx *.ptx.c
-DISTCLEANSUFFIXES = *.pc
+CLEANSUFFIXES     = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.pc *.ptx *.ptx.c *.ver *.version *$(DEFAULT_X86ASMD).asm *~
 LIBSUFFIXES       = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a
 
 define RULES

diff --git a/ffbuild/library.mak b/ffbuild/library.mak
index 4191edc..612bacb 100644
--- a/ffbuild/library.mak
+++ b/ffbuild/library.mak

@@ -1,6 +1,6 @@
 include $(SRC_PATH)/ffbuild/common.mak
 
-ifeq (,$(filter %clean,$(MAKECMDGOALS)))
+ifeq (,$(filter %clean config,$(MAKECMDGOALS)))
 -include $(SUBDIR)lib$(NAME).version
 endif
 
@@ -31,13 +31,15 @@
 $(TOOLS):     THISLIB = $(FULLNAME:%=$(LD_LIB))
 $(TESTPROGS): THISLIB = $(SUBDIR)$(LIBNAME)
 
+$(LIBOBJS): CPPFLAGS += -DBUILDING_$(NAME)
+
 $(TESTPROGS) $(TOOLS): %$(EXESUF): %.o
-	$$(LD) $(LDFLAGS) $(LDEXEFLAGS) $$(LD_O) $$(filter %.o,$$^) $$(THISLIB) $(FFEXTRALIBS) $$(ELIBS)
+	$$(LD) $(LDFLAGS) $(LDEXEFLAGS) $$(LD_O) $$(filter %.o,$$^) $$(THISLIB) $(FFEXTRALIBS) $$(EXTRALIBS-$$(*F)) $$(ELIBS)
 
 $(SUBDIR)lib$(NAME).version: $(SUBDIR)version.h | $(SUBDIR)
 	$$(M) $$(SRC_PATH)/ffbuild/libversion.sh $(NAME) $$< > $$@
 
-$(SUBDIR)lib$(FULLNAME).pc: $(SUBDIR)version.h | $(SUBDIR)
+$(SUBDIR)lib$(FULLNAME).pc: $(SUBDIR)version.h ffbuild/config.sh | $(SUBDIR)
 	$$(M) $$(SRC_PATH)/ffbuild/pkgconfig_generate.sh $(NAME) "$(DESC)"
 
 $(SUBDIR)lib$(NAME).ver: $(SUBDIR)lib$(NAME).v $(OBJS)
@@ -48,7 +50,7 @@
 
 $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SLIBOBJS) $(SUBDIR)lib$(NAME).ver
 	$(SLIB_CREATE_DEF_CMD)
-	$$(LD) $(SHFLAGS) $(LDFLAGS) $(LDLIBFLAGS) $$(LD_O) $$(filter %.o,$$^) $(FFEXTRALIBS)
+	$$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter %.o,$$^) $(FFEXTRALIBS)
 	$(SLIB_EXTRA_CMD)
 
 ifdef SUBDIR
@@ -59,10 +61,6 @@
 	$(RM) $(addprefix $(SUBDIR),$(CLEANFILES) $(CLEANSUFFIXES) $(LIBSUFFIXES)) \
 	    $(CLEANSUFFIXES:%=$(SUBDIR)$(ARCH)/%) $(CLEANSUFFIXES:%=$(SUBDIR)tests/%)
 
-distclean:: clean
-	$(RM) $(DISTCLEANSUFFIXES:%=$(SUBDIR)%) $(DISTCLEANSUFFIXES:%=$(SUBDIR)$(ARCH)/%) \
-            $(DISTCLEANSUFFIXES:%=$(SUBDIR)tests/%)
-
 install-lib$(NAME)-shared: $(SUBDIR)$(SLIBNAME)
 	$(Q)mkdir -p "$(SHLIBDIR)"
 	$$(INSTALL) -m 755 $$< "$(SHLIBDIR)/$(SLIB_INSTALL_NAME)"
@@ -95,8 +93,10 @@
 
 uninstall-headers::
 	$(RM) $(addprefix "$(INCINSTDIR)/",$(HEADERS) $(BUILT_HEADERS))
-	$(RM) "$(PKGCONFIGDIR)/lib$(FULLNAME).pc"
 	-rmdir "$(INCINSTDIR)"
+
+uninstall-pkgconfig::
+	$(RM) "$(PKGCONFIGDIR)/lib$(FULLNAME).pc"
 endef
 
 $(eval $(RULES))

diff --git a/ffmpeg_generated.gni b/ffmpeg_generated.gni
index 72c5a6e..f427f51 100644
--- a/ffmpeg_generated.gni
+++ b/ffmpeg_generated.gni

@@ -13,7 +13,12 @@
 ffmpeg_yasm_sources = []
 
 ffmpeg_c_sources += [
+  "libavcodec/ac3_parser.c",
+  "libavcodec/ac3tab.c",
+  "libavcodec/adts_parser.c",
   "libavcodec/allcodecs.c",
+  "libavcodec/aptx.c",
+  "libavcodec/audio_frame_queue.c",
   "libavcodec/avdct.c",
   "libavcodec/avfft.c",
   "libavcodec/avpacket.c",
@@ -24,6 +29,9 @@
   "libavcodec/bsf.c",
   "libavcodec/codec_desc.c",
   "libavcodec/d3d11va.c",
+  "libavcodec/dct.c",
+  "libavcodec/dct32_fixed.c",
+  "libavcodec/dct32_float.c",
   "libavcodec/decode.c",
   "libavcodec/dirac.c",
   "libavcodec/dv_profile.c",
@@ -50,12 +58,23 @@
   "libavcodec/mjpegenc_huffman.c",
   "libavcodec/mpeg12framerate.c",
   "libavcodec/mpeg4audio.c",
+  "libavcodec/mpegaudio.c",
+  "libavcodec/mpegaudio_parser.c",
   "libavcodec/mpegaudiodata.c",
+  "libavcodec/mpegaudiodec_fixed.c",
+  "libavcodec/mpegaudiodecheader.c",
+  "libavcodec/mpegaudiodsp.c",
+  "libavcodec/mpegaudiodsp_data.c",
+  "libavcodec/mpegaudiodsp_fixed.c",
+  "libavcodec/mpegaudiodsp_float.c",
   "libavcodec/null_bsf.c",
   "libavcodec/options.c",
   "libavcodec/opus.c",
   "libavcodec/opus_parser.c",
+  "libavcodec/opus_rc.c",
+  "libavcodec/opustab.c",
   "libavcodec/parser.c",
+  "libavcodec/parsers.c",
   "libavcodec/pcm.c",
   "libavcodec/profiles.c",
   "libavcodec/pthread.c",
@@ -64,6 +83,9 @@
   "libavcodec/qsv_api.c",
   "libavcodec/raw.c",
   "libavcodec/rdft.c",
+  "libavcodec/sbc.c",
+  "libavcodec/sbcdec.c",
+  "libavcodec/sbcdec_data.c",
   "libavcodec/utils.c",
   "libavcodec/videodsp.c",
   "libavcodec/vorbis.c",
@@ -93,6 +115,10 @@
   "libavformat/matroska.c",
   "libavformat/matroskadec.c",
   "libavformat/metadata.c",
+  "libavformat/mov.c",
+  "libavformat/mov_chan.c",
+  "libavformat/mov_esds.c",
+  "libavformat/mp3dec.c",
   "libavformat/mux.c",
   "libavformat/oggdec.c",
   "libavformat/oggparsecelt.c",
@@ -134,6 +160,7 @@
   "libavutil/dict.c",
   "libavutil/display.c",
   "libavutil/downmix_info.c",
+  "libavutil/encryption_info.c",
   "libavutil/error.c",
   "libavutil/eval.c",
   "libavutil/fifo.c",
@@ -177,6 +204,7 @@
     "libavcodec/aarch64/fft_init_aarch64.c",
     "libavcodec/aarch64/h264pred_init.c",
     "libavcodec/aarch64/hpeldsp_init_aarch64.c",
+    "libavcodec/aarch64/mpegaudiodsp_init.c",
     "libavcodec/aarch64/videodsp_init.c",
     "libavcodec/aarch64/vorbisdsp_init.c",
     "libavutil/aarch64/cpu.c",
@@ -187,6 +215,7 @@
     "libavcodec/aarch64/h264pred_neon.S",
     "libavcodec/aarch64/hpeldsp_neon.S",
     "libavcodec/aarch64/mdct_neon.S",
+    "libavcodec/aarch64/mpegaudiodsp_neon.S",
     "libavcodec/aarch64/videodsp.S",
     "libavcodec/aarch64/vorbisdsp_neon.S",
     "libavutil/aarch64/float_dsp_neon.S",
@@ -197,16 +226,15 @@
   ffmpeg_c_sources += [
     "libavcodec/aac_ac3_parser.c",
     "libavcodec/aac_parser.c",
-    "libavcodec/aacadtsdec.c",
     "libavcodec/aacdec.c",
     "libavcodec/aacps_float.c",
     "libavcodec/aacpsdsp_float.c",
     "libavcodec/aacsbr.c",
     "libavcodec/aactab.c",
-    "libavcodec/ac3tab.c",
     "libavcodec/acelp_filters.c",
     "libavcodec/acelp_pitch_delay.c",
     "libavcodec/acelp_vectors.c",
+    "libavcodec/adts_header.c",
     "libavcodec/amrnbdec.c",
     "libavcodec/amrwbdec.c",
     "libavcodec/blockdsp.c",
@@ -214,9 +242,6 @@
     "libavcodec/cbrt_data.c",
     "libavcodec/celp_filters.c",
     "libavcodec/celp_math.c",
-    "libavcodec/dct.c",
-    "libavcodec/dct32_fixed.c",
-    "libavcodec/dct32_float.c",
     "libavcodec/error_resilience.c",
     "libavcodec/exif.c",
     "libavcodec/fdctdsp.c",
@@ -262,14 +287,6 @@
     "libavcodec/mpeg4video_parser.c",
     "libavcodec/mpeg4videodec.c",
     "libavcodec/mpeg_er.c",
-    "libavcodec/mpegaudio.c",
-    "libavcodec/mpegaudio_parser.c",
-    "libavcodec/mpegaudiodec_fixed.c",
-    "libavcodec/mpegaudiodecheader.c",
-    "libavcodec/mpegaudiodsp.c",
-    "libavcodec/mpegaudiodsp_data.c",
-    "libavcodec/mpegaudiodsp_fixed.c",
-    "libavcodec/mpegaudiodsp_float.c",
     "libavcodec/mpegpicture.c",
     "libavcodec/mpegutils.c",
     "libavcodec/mpegvideo.c",
@@ -293,9 +310,6 @@
     "libavformat/avidec.c",
     "libavformat/gsmdec.c",
     "libavformat/img2.c",
-    "libavformat/mov.c",
-    "libavformat/mov_chan.c",
-    "libavformat/mp3dec.c",
   ]
 }
 
@@ -303,6 +317,7 @@
   ffmpeg_c_sources += [
     "libavcodec/vp9.c",
     "libavcodec/vp9_parser.c",
+    "libavcodec/vp9_superframe_split_bsf.c",
     "libavcodec/vp9block.c",
     "libavcodec/vp9data.c",
     "libavcodec/vp9dsp.c",
@@ -314,11 +329,13 @@
     "libavcodec/vp9prob.c",
     "libavcodec/vp9recon.c",
     "libavcodec/x86/constants.c",
+    "libavcodec/x86/dct_init.c",
     "libavcodec/x86/fft_init.c",
     "libavcodec/x86/flacdsp_init.c",
     "libavcodec/x86/h264_intrapred_init.c",
     "libavcodec/x86/hpeldsp_init.c",
     "libavcodec/x86/hpeldsp_vp3_init.c",
+    "libavcodec/x86/mpegaudiodsp.c",
     "libavcodec/x86/videodsp_init.c",
     "libavcodec/x86/vorbisdsp_init.c",
     "libavcodec/x86/vp3dsp_init.c",
@@ -334,6 +351,7 @@
     "libavutil/x86/lls_init.c",
   ]
   ffmpeg_yasm_sources += [
+    "libavcodec/x86/dct32.asm",
     "libavcodec/x86/fft.asm",
     "libavcodec/x86/flacdsp.asm",
     "libavcodec/x86/fpel.asm",
@@ -341,6 +359,7 @@
     "libavcodec/x86/h264_intrapred_10bit.asm",
     "libavcodec/x86/hpeldsp.asm",
     "libavcodec/x86/hpeldsp_vp3.asm",
+    "libavcodec/x86/imdct36.asm",
     "libavcodec/x86/videodsp.asm",
     "libavcodec/x86/vorbisdsp.asm",
     "libavcodec/x86/vp3dsp.asm",
@@ -369,7 +388,6 @@
     "libavcodec/aarch64/h264dsp_init_aarch64.c",
     "libavcodec/aarch64/h264qpel_init_aarch64.c",
     "libavcodec/aarch64/idctdsp_init_aarch64.c",
-    "libavcodec/aarch64/mpegaudiodsp_init.c",
     "libavcodec/aarch64/sbrdsp_init_aarch64.c",
     "libavcodec/neon/mpegvideo.c",
   ]
@@ -379,7 +397,6 @@
     "libavcodec/aarch64/h264dsp_neon.S",
     "libavcodec/aarch64/h264idct_neon.S",
     "libavcodec/aarch64/h264qpel_neon.S",
-    "libavcodec/aarch64/mpegaudiodsp_neon.S",
     "libavcodec/aarch64/sbrdsp_neon.S",
     "libavcodec/aarch64/simple_idct_neon.S",
   ]
@@ -389,7 +406,6 @@
   ffmpeg_c_sources += [
     "libavcodec/x86/aacpsdsp_init.c",
     "libavcodec/x86/blockdsp_init.c",
-    "libavcodec/x86/dct_init.c",
     "libavcodec/x86/fdct.c",
     "libavcodec/x86/fdctdsp_init.c",
     "libavcodec/x86/h263dsp_init.c",
@@ -399,7 +415,6 @@
     "libavcodec/x86/idctdsp_init.c",
     "libavcodec/x86/mdct15_init.c",
     "libavcodec/x86/me_cmp_init.c",
-    "libavcodec/x86/mpegaudiodsp.c",
     "libavcodec/x86/mpegvideo.c",
     "libavcodec/x86/mpegvideodsp.c",
     "libavcodec/x86/pixblockdsp_init.c",
@@ -410,7 +425,6 @@
   ffmpeg_yasm_sources += [
     "libavcodec/x86/aacpsdsp.asm",
     "libavcodec/x86/blockdsp.asm",
-    "libavcodec/x86/dct32.asm",
     "libavcodec/x86/h263_loopfilter.asm",
     "libavcodec/x86/h264_chromamc.asm",
     "libavcodec/x86/h264_chromamc_10bit.asm",
@@ -423,7 +437,6 @@
     "libavcodec/x86/h264_weight.asm",
     "libavcodec/x86/h264_weight_10bit.asm",
     "libavcodec/x86/idctdsp.asm",
-    "libavcodec/x86/imdct36.asm",
     "libavcodec/x86/mdct15.asm",
     "libavcodec/x86/me_cmp.asm",
     "libavcodec/x86/pixblockdsp.asm",

diff --git a/fftools/Makefile b/fftools/Makefile
index c867814..c3a0ff3 100644
--- a/fftools/Makefile
+++ b/fftools/Makefile

@@ -1,12 +1,11 @@
 AVPROGS-$(CONFIG_FFMPEG)   += ffmpeg
 AVPROGS-$(CONFIG_FFPLAY)   += ffplay
 AVPROGS-$(CONFIG_FFPROBE)  += ffprobe
-AVPROGS-$(CONFIG_FFSERVER) += ffserver
 
 AVPROGS     := $(AVPROGS-yes:%=%$(PROGSSUF)$(EXESUF))
 PROGS       += $(AVPROGS)
 
-AVBASENAMES  = ffmpeg ffplay ffprobe ffserver
+AVBASENAMES  = ffmpeg ffplay ffprobe
 ALLAVPROGS   = $(AVBASENAMES:%=%$(PROGSSUF)$(EXESUF))
 ALLAVPROGS_G = $(AVBASENAMES:%=%$(PROGSSUF)_g$(EXESUF))
 
@@ -17,7 +16,6 @@
 OBJS-ffmpeg-$(CONFIG_VDA)          += fftools/ffmpeg_videotoolbox.o
 endif
 OBJS-ffmpeg-$(CONFIG_VIDEOTOOLBOX) += fftools/ffmpeg_videotoolbox.o
-OBJS-ffserver                      += fftools/ffserver_config.o
 
 define DOFFTOOL
 OBJS-$(1) += fftools/cmdutils.o fftools/$(1).o $(OBJS-$(1)-yes)
@@ -29,7 +27,6 @@
 -include $$(OBJS-$(1):.o=.d)
 endef
 
-$(foreach P,$(AVPROGS-yes),$(eval OBJS-$(P)-$(CONFIG_OPENCL) += fftools/cmdutils_opencl.o))
 $(foreach P,$(AVPROGS-yes),$(eval $(call DOFFTOOL,$(P))))
 
 all: $(AVPROGS)

diff --git a/fftools/cmdutils.h b/fftools/cmdutils.h
index 2997ee3..6e2e0a2 100644
--- a/fftools/cmdutils.h
+++ b/fftools/cmdutils.h

@@ -105,12 +105,6 @@
 
 int opt_codec_debug(void *optctx, const char *opt, const char *arg);
 
-#if CONFIG_OPENCL
-int opt_opencl(void *optctx, const char *opt, const char *arg);
-
-int opt_opencl_bench(void *optctx, const char *opt, const char *arg);
-#endif
-
 /**
  * Limit the execution time.
  */
@@ -155,6 +149,7 @@
         uint8_t *str;
         int        i;
         int64_t  i64;
+        uint64_t ui64;
         float      f;
         double   dbl;
     } u;
@@ -206,17 +201,6 @@
 void show_help_options(const OptionDef *options, const char *msg, int req_flags,
                        int rej_flags, int alt_flags);
 
-#if CONFIG_OPENCL
-#define CMDUTILS_COMMON_OPTIONS_OPENCL                                                                                  \
-    { "opencl_bench", OPT_EXIT, {.func_arg = opt_opencl_bench},                                                         \
-       "run benchmark on all OpenCL devices and show results" },                                                        \
-    { "opencl_options", HAS_ARG, {.func_arg = opt_opencl},                                                              \
-       "set OpenCL environment options" },                                                                              \
-
-#else
-#define CMDUTILS_COMMON_OPTIONS_OPENCL
-#endif
-
 #if CONFIG_AVDEVICE
 #define CMDUTILS_COMMON_OPTIONS_AVDEVICE                                                                                \
     { "sources"    , OPT_EXIT | HAS_ARG, { .func_arg = show_sources },                                                  \
@@ -256,7 +240,6 @@
     { "max_alloc",   HAS_ARG,              { .func_arg = opt_max_alloc },    "set maximum size of a single allocated block", "bytes" }, \
     { "cpuflags",    HAS_ARG | OPT_EXPERT, { .func_arg = opt_cpuflags },     "force specific cpu flags", "flags" },     \
     { "hide_banner", OPT_BOOL | OPT_EXPERT, {&hide_banner},     "do not show program banner", "hide_banner" },          \
-    CMDUTILS_COMMON_OPTIONS_OPENCL                                                                                      \
     CMDUTILS_COMMON_OPTIONS_AVDEVICE                                                                                    \
 
 /**
@@ -642,6 +625,9 @@
 #define GET_PIX_FMT_NAME(pix_fmt)\
     const char *name = av_get_pix_fmt_name(pix_fmt);
 
+#define GET_CODEC_NAME(id)\
+    const char *name = avcodec_descriptor_get(id)->name;
+
 #define GET_SAMPLE_FMT_NAME(sample_fmt)\
     const char *name = av_get_sample_fmt_name(sample_fmt)
 

diff --git a/fftools/cmdutils_opencl.c b/fftools/cmdutils_opencl.c
deleted file mode 100644
index 906aef4..0000000
--- a/fftools/cmdutils_opencl.c
+++ /dev/null

@@ -1,283 +0,0 @@
-/*
- * Copyright (C) 2013 Lenny Wang
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/opt.h"
-#include "libavutil/time.h"
-#include "libavutil/log.h"
-#include "libavutil/opencl.h"
-#include "libavutil/avstring.h"
-#include "cmdutils.h"
-
-typedef struct {
-    int platform_idx;
-    int device_idx;
-    char device_name[64];
-    int64_t runtime;
-} OpenCLDeviceBenchmark;
-
-const char *ocl_bench_source = AV_OPENCL_KERNEL(
-inline unsigned char clip_uint8(int a)
-{
-    if (a & (~0xFF))
-        return (-a)>>31;
-    else
-        return a;
-}
-
-kernel void unsharp_bench(
-                    global unsigned char *src,
-                    global unsigned char *dst,
-                    global int *mask,
-                    int width,
-                    int height)
-{
-    int i, j, local_idx, lc_idx, sum = 0;
-    int2 thread_idx, block_idx, global_idx, lm_idx;
-    thread_idx.x = get_local_id(0);
-    thread_idx.y = get_local_id(1);
-    block_idx.x = get_group_id(0);
-    block_idx.y = get_group_id(1);
-    global_idx.x = get_global_id(0);
-    global_idx.y = get_global_id(1);
-    local uchar data[32][32];
-    local int lc[128];
-
-    for (i = 0; i <= 1; i++) {
-        lm_idx.y = -8 + (block_idx.y + i) * 16 + thread_idx.y;
-        lm_idx.y = lm_idx.y < 0 ? 0 : lm_idx.y;
-        lm_idx.y = lm_idx.y >= height ? height - 1: lm_idx.y;
-        for (j = 0; j <= 1; j++) {
-            lm_idx.x = -8 + (block_idx.x + j) * 16 + thread_idx.x;
-            lm_idx.x = lm_idx.x < 0 ? 0 : lm_idx.x;
-            lm_idx.x = lm_idx.x >= width ? width - 1: lm_idx.x;
-            data[i*16 + thread_idx.y][j*16 + thread_idx.x] = src[lm_idx.y*width + lm_idx.x];
-        }
-    }
-    local_idx = thread_idx.y*16 + thread_idx.x;
-    if (local_idx < 128)
-        lc[local_idx] = mask[local_idx];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    \n#pragma unroll\n
-    for (i = -4; i <= 4; i++) {
-        lm_idx.y = 8 + i + thread_idx.y;
-        \n#pragma unroll\n
-        for (j = -4; j <= 4; j++) {
-            lm_idx.x = 8 + j + thread_idx.x;
-            lc_idx = (i + 4)*8 + j + 4;
-            sum += (int)data[lm_idx.y][lm_idx.x] * lc[lc_idx];
-        }
-    }
-    int temp = (int)data[thread_idx.y + 8][thread_idx.x + 8];
-    int res = temp + (((temp - (int)((sum + 1<<15) >> 16))) >> 16);
-    if (global_idx.x < width && global_idx.y < height)
-        dst[global_idx.x + global_idx.y*width] = clip_uint8(res);
-}
-);
-
-#define OCLCHECK(method, ... )                                                 \
-do {                                                                           \
-    status = method(__VA_ARGS__);                                              \
-    if (status != CL_SUCCESS) {                                                \
-        av_log(NULL, AV_LOG_ERROR, # method " error '%s'\n",                   \
-               av_opencl_errstr(status));                                      \
-        ret = AVERROR_EXTERNAL;                                                \
-        goto end;                                                              \
-    }                                                                          \
-} while (0)
-
-#define CREATEBUF(out, flags, size)                                            \
-do {                                                                           \
-    out = clCreateBuffer(ext_opencl_env->context, flags, size, NULL, &status); \
-    if (status != CL_SUCCESS) {                                                \
-        av_log(NULL, AV_LOG_ERROR, "Could not create OpenCL buffer\n");        \
-        ret = AVERROR_EXTERNAL;                                                \
-        goto end;                                                              \
-    }                                                                          \
-} while (0)
-
-static void fill_rand_int(int *data, int n)
-{
-    int i;
-    srand(av_gettime());
-    for (i = 0; i < n; i++)
-        data[i] = rand();
-}
-
-#define OPENCL_NB_ITER 5
-static int64_t run_opencl_bench(AVOpenCLExternalEnv *ext_opencl_env)
-{
-    int i, arg = 0, width = 1920, height = 1088;
-    int64_t start, ret = 0;
-    cl_int status;
-    size_t kernel_len;
-    char *inbuf;
-    int *mask = NULL;
-    int buf_size = width * height * sizeof(char);
-    int mask_size = sizeof(uint32_t) * 128;
-
-    cl_mem cl_mask = NULL, cl_inbuf = NULL, cl_outbuf = NULL;
-    cl_kernel kernel = NULL;
-    cl_program program = NULL;
-    size_t local_work_size_2d[2] = {16, 16};
-    size_t global_work_size_2d[2] = {(size_t)width, (size_t)height};
-
-    if (!(inbuf = av_malloc(buf_size)) || !(mask = av_malloc(mask_size))) {
-        av_log(NULL, AV_LOG_ERROR, "Out of memory\n");
-        ret = AVERROR(ENOMEM);
-        goto end;
-    }
-    fill_rand_int((int*)inbuf, buf_size/4);
-    fill_rand_int(mask, mask_size/4);
-
-    CREATEBUF(cl_mask, CL_MEM_READ_ONLY, mask_size);
-    CREATEBUF(cl_inbuf, CL_MEM_READ_ONLY, buf_size);
-    CREATEBUF(cl_outbuf, CL_MEM_READ_WRITE, buf_size);
-
-    kernel_len = strlen(ocl_bench_source);
-    program = clCreateProgramWithSource(ext_opencl_env->context, 1, &ocl_bench_source,
-                                        &kernel_len, &status);
-    if (status != CL_SUCCESS || !program) {
-        av_log(NULL, AV_LOG_ERROR, "OpenCL unable to create benchmark program\n");
-        ret = AVERROR_EXTERNAL;
-        goto end;
-    }
-    status = clBuildProgram(program, 1, &(ext_opencl_env->device_id), NULL, NULL, NULL);
-    if (status != CL_SUCCESS) {
-        av_log(NULL, AV_LOG_ERROR, "OpenCL unable to build benchmark program\n");
-        ret = AVERROR_EXTERNAL;
-        goto end;
-    }
-    kernel = clCreateKernel(program, "unsharp_bench", &status);
-    if (status != CL_SUCCESS) {
-        av_log(NULL, AV_LOG_ERROR, "OpenCL unable to create benchmark kernel\n");
-        ret = AVERROR_EXTERNAL;
-        goto end;
-    }
-
-    OCLCHECK(clEnqueueWriteBuffer, ext_opencl_env->command_queue, cl_inbuf, CL_TRUE, 0,
-             buf_size, inbuf, 0, NULL, NULL);
-    OCLCHECK(clEnqueueWriteBuffer, ext_opencl_env->command_queue, cl_mask, CL_TRUE, 0,
-             mask_size, mask, 0, NULL, NULL);
-    OCLCHECK(clSetKernelArg, kernel, arg++, sizeof(cl_mem), &cl_inbuf);
-    OCLCHECK(clSetKernelArg, kernel, arg++, sizeof(cl_mem), &cl_outbuf);
-    OCLCHECK(clSetKernelArg, kernel, arg++, sizeof(cl_mem), &cl_mask);
-    OCLCHECK(clSetKernelArg, kernel, arg++, sizeof(cl_int), &width);
-    OCLCHECK(clSetKernelArg, kernel, arg++, sizeof(cl_int), &height);
-
-    start = av_gettime_relative();
-    for (i = 0; i < OPENCL_NB_ITER; i++)
-        OCLCHECK(clEnqueueNDRangeKernel, ext_opencl_env->command_queue, kernel, 2, NULL,
-                 global_work_size_2d, local_work_size_2d, 0, NULL, NULL);
-    clFinish(ext_opencl_env->command_queue);
-    ret = (av_gettime_relative() - start)/OPENCL_NB_ITER;
-end:
-    if (kernel)
-        clReleaseKernel(kernel);
-    if (program)
-        clReleaseProgram(program);
-    if (cl_inbuf)
-        clReleaseMemObject(cl_inbuf);
-    if (cl_outbuf)
-        clReleaseMemObject(cl_outbuf);
-    if (cl_mask)
-        clReleaseMemObject(cl_mask);
-    av_free(inbuf);
-    av_free(mask);
-    return ret;
-}
-
-static int compare_ocl_device_desc(const void *a, const void *b)
-{
-    const OpenCLDeviceBenchmark* va = (const OpenCLDeviceBenchmark*)a;
-    const OpenCLDeviceBenchmark* vb = (const OpenCLDeviceBenchmark*)b;
-    return FFDIFFSIGN(va->runtime , vb->runtime);
-}
-
-int opt_opencl_bench(void *optctx, const char *opt, const char *arg)
-{
-    int i, j, nb_devices = 0, count = 0, ret = 0;
-    int64_t score = 0;
-    AVOpenCLDeviceList *device_list;
-    AVOpenCLDeviceNode *device_node = NULL;
-    OpenCLDeviceBenchmark *devices = NULL;
-    cl_platform_id platform;
-
-    ret = av_opencl_get_device_list(&device_list);
-    if (ret < 0) {
-        return ret;
-    }
-    for (i = 0; i < device_list->platform_num; i++)
-        nb_devices += device_list->platform_node[i]->device_num;
-    if (!nb_devices) {
-        av_log(NULL, AV_LOG_ERROR, "No OpenCL device detected!\n");
-        av_opencl_free_device_list(&device_list);
-        return AVERROR(EINVAL);
-    }
-    if (!(devices = av_malloc_array(nb_devices, sizeof(OpenCLDeviceBenchmark)))) {
-        av_log(NULL, AV_LOG_ERROR, "Could not allocate buffer\n");
-        av_opencl_free_device_list(&device_list);
-        return AVERROR(ENOMEM);
-    }
-
-    for (i = 0; i < device_list->platform_num; i++) {
-        for (j = 0; j < device_list->platform_node[i]->device_num; j++) {
-            device_node = device_list->platform_node[i]->device_node[j];
-            platform = device_list->platform_node[i]->platform_id;
-            score = av_opencl_benchmark(device_node, platform, run_opencl_bench);
-            if (score > 0) {
-                devices[count].platform_idx = i;
-                devices[count].device_idx = j;
-                devices[count].runtime = score;
-                av_strlcpy(devices[count].device_name, device_node->device_name,
-                           sizeof(devices[count].device_name));
-                count++;
-            }
-        }
-    }
-    qsort(devices, count, sizeof(OpenCLDeviceBenchmark), compare_ocl_device_desc);
-    fprintf(stderr, "platform_idx\tdevice_idx\tdevice_name\truntime\n");
-    for (i = 0; i < count; i++)
-        fprintf(stdout, "%d\t%d\t%s\t%"PRId64"\n",
-                devices[i].platform_idx, devices[i].device_idx,
-                devices[i].device_name, devices[i].runtime);
-
-    av_opencl_free_device_list(&device_list);
-    av_free(devices);
-    return 0;
-}
-
-int opt_opencl(void *optctx, const char *opt, const char *arg)
-{
-    char *key, *value;
-    const char *opts = arg;
-    int ret = 0;
-    while (*opts) {
-        ret = av_opt_get_key_value(&opts, "=", ":", 0, &key, &value);
-        if (ret < 0)
-            return ret;
-        ret = av_opencl_set_option(key, value);
-        if (ret < 0)
-            return ret;
-        if (*opts)
-            opts++;
-    }
-    return ret;
-}

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index d581b40..da4259a 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c

@@ -61,6 +61,7 @@
 #include "libavutil/timestamp.h"
 #include "libavutil/bprint.h"
 #include "libavutil/time.h"
+#include "libavutil/thread.h"
 #include "libavutil/threadmessage.h"
 #include "libavcodec/mathops.h"
 #include "libavformat/os_support.h"
@@ -98,10 +99,6 @@
 #include <conio.h>
 #endif
 
-#if HAVE_PTHREADS
-#include <pthread.h>
-#endif
-
 #include <time.h>
 
 #include "ffmpeg.h"
@@ -123,8 +120,14 @@
     NULL
 };
 
+typedef struct BenchmarkTimeStamps {
+    int64_t real_usec;
+    int64_t user_usec;
+    int64_t sys_usec;
+} BenchmarkTimeStamps;
+
 static void do_video_stats(OutputStream *ost, int frame_size);
-static int64_t getutime(void);
+static BenchmarkTimeStamps get_benchmark_time_stamps(void);
 static int64_t getmaxrss(void);
 static int ifilter_has_all_input_formats(FilterGraph *fg);
 
@@ -136,7 +139,7 @@
 
 static int want_sdp = 1;
 
-static int current_time;
+static BenchmarkTimeStamps current_time;
 AVIOContext *progress_avio = NULL;
 
 static uint8_t *subtitle_out;
@@ -161,7 +164,7 @@
 static int restore_tty;
 #endif
 
-#if HAVE_PTHREADS
+#if HAVE_THREADS
 static void free_input_threads(void);
 #endif
 
@@ -220,13 +223,18 @@
 {
     AVFrame *frame = ist->sub2video.frame;
     int i;
+    int ret;
 
     av_assert1(frame->data[0]);
     ist->sub2video.last_pts = frame->pts = pts;
-    for (i = 0; i < ist->nb_filters; i++)
-        av_buffersrc_add_frame_flags(ist->filters[i]->filter, frame,
-                                     AV_BUFFERSRC_FLAG_KEEP_REF |
-                                     AV_BUFFERSRC_FLAG_PUSH);
+    for (i = 0; i < ist->nb_filters; i++) {
+        ret = av_buffersrc_add_frame_flags(ist->filters[i]->filter, frame,
+                                           AV_BUFFERSRC_FLAG_KEEP_REF |
+                                           AV_BUFFERSRC_FLAG_PUSH);
+        if (ret != AVERROR_EOF && ret < 0)
+            av_log(NULL, AV_LOG_WARNING, "Error while add the frame to buffer source(%s).\n",
+                   av_err2str(ret));
+    }
 }
 
 void sub2video_update(InputStream *ist, AVSubtitle *sub)
@@ -283,7 +291,8 @@
         /* do not send the heartbeat frame if the subtitle is already ahead */
         if (pts2 <= ist2->sub2video.last_pts)
             continue;
-        if (pts2 >= ist2->sub2video.end_pts || !ist2->sub2video.frame->data[0])
+        if (pts2 >= ist2->sub2video.end_pts ||
+            (!ist2->sub2video.frame->data[0] && ist2->sub2video.end_pts < INT64_MAX))
             sub2video_update(ist2, NULL);
         for (j = 0, nb_reqs = 0; j < ist2->nb_filters; j++)
             nb_reqs += av_buffersrc_get_nb_failed_requests(ist2->filters[j]->filter);
@@ -295,11 +304,15 @@
 static void sub2video_flush(InputStream *ist)
 {
     int i;
+    int ret;
 
     if (ist->sub2video.end_pts < INT64_MAX)
         sub2video_update(ist, NULL);
-    for (i = 0; i < ist->nb_filters; i++)
-        av_buffersrc_add_frame(ist->filters[i]->filter, NULL);
+    for (i = 0; i < ist->nb_filters; i++) {
+        ret = av_buffersrc_add_frame(ist->filters[i]->filter, NULL);
+        if (ret != AVERROR_EOF && ret < 0)
+            av_log(NULL, AV_LOG_WARNING, "Flush the frame error.\n");
+    }
 }
 
 /* end of sub2video hack */
@@ -327,13 +340,14 @@
 static void
 sigterm_handler(int sig)
 {
+    int ret;
     received_sigterm = sig;
     received_nb_signals++;
     term_exit_sigsafe();
     if(received_nb_signals > 3) {
-        write(2/*STDERR_FILENO*/, "Received > 3 system signals, hard exiting\n",
-                           strlen("Received > 3 system signals, hard exiting\n"));
-
+        ret = write(2/*STDERR_FILENO*/, "Received > 3 system signals, hard exiting\n",
+                    strlen("Received > 3 system signals, hard exiting\n"));
+        if (ret < 0) { /* Do nothing */ };
         exit(123);
     }
 }
@@ -399,6 +413,9 @@
 #ifdef SIGXCPU
     signal(SIGXCPU, sigterm_handler);
 #endif
+#ifdef SIGPIPE
+    signal(SIGPIPE, SIG_IGN); /* Broken pipe (POSIX). */
+#endif
 #if HAVE_SETCONSOLECTRLHANDLER
     SetConsoleCtrlHandler((PHANDLER_ROUTINE) CtrlHandler, TRUE);
 #endif
@@ -541,9 +558,6 @@
         av_frame_free(&ost->last_frame);
         av_dict_free(&ost->encoder_opts);
 
-        av_parser_close(ost->parser);
-        avcodec_free_context(&ost->parser_avctx);
-
         av_freep(&ost->forced_keyframes);
         av_expr_free(ost->forced_keyframes_pexpr);
         av_freep(&ost->avfilter);
@@ -568,7 +582,7 @@
 
         av_freep(&output_streams[i]);
     }
-#if HAVE_PTHREADS
+#if HAVE_THREADS
     free_input_threads();
 #endif
     for (i = 0; i < nb_input_files; i++) {
@@ -645,7 +659,7 @@
 static void update_benchmark(const char *fmt, ...)
 {
     if (do_benchmark_all) {
-        int64_t t = getutime();
+        BenchmarkTimeStamps t = get_benchmark_time_stamps();
         va_list va;
         char buf[1024];
 
@@ -653,7 +667,11 @@
             va_start(va, fmt);
             vsnprintf(buf, sizeof(buf), fmt, va);
             va_end(va);
-            av_log(NULL, AV_LOG_INFO, "bench: %8"PRIu64" %s \n", t - current_time, buf);
+            av_log(NULL, AV_LOG_INFO,
+                   "bench: %8" PRIu64 " user %8" PRIu64 " sys %8" PRIu64 " real %s \n",
+                   t.user_usec - current_time.user_usec,
+                   t.sys_usec - current_time.sys_usec,
+                   t.real_usec - current_time.real_usec, buf);
         }
         current_time = t;
     }
@@ -706,11 +724,11 @@
             if (ret < 0)
                 exit_program(1);
         }
-        ret = av_packet_ref(&tmp_pkt, pkt);
+        ret = av_packet_make_refcounted(pkt);
         if (ret < 0)
             exit_program(1);
+        av_packet_move_ref(&tmp_pkt, pkt);
         av_fifo_generic_write(ost->muxing_queue, &tmp_pkt, sizeof(tmp_pkt), NULL);
-        av_packet_unref(pkt);
         return;
     }
 
@@ -754,7 +772,7 @@
                      - FFMIN3(pkt->pts, pkt->dts, ost->last_mux_dts + 1)
                      - FFMAX3(pkt->pts, pkt->dts, ost->last_mux_dts + 1);
         }
-        if ((st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO || st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) &&
+        if ((st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO || st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO || st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE) &&
             pkt->dts != AV_NOPTS_VALUE &&
             !(st->codecpar->codec_id == AV_CODEC_ID_VP9 && ost->stream_copy) &&
             ost->last_mux_dts != AV_NOPTS_VALUE) {
@@ -1103,7 +1121,7 @@
             format_video_sync != VSYNC_PASSTHROUGH &&
             format_video_sync != VSYNC_DROP) {
             if (delta0 < -0.6) {
-                av_log(NULL, AV_LOG_WARNING, "Past duration %f too large\n", -delta0);
+                av_log(NULL, AV_LOG_VERBOSE, "Past duration %f too large\n", -delta0);
             } else
                 av_log(NULL, AV_LOG_DEBUG, "Clipping frame in rate conversion by %f\n", -delta0);
             sync_ipts = ost->sync_opts;
@@ -1199,24 +1217,6 @@
 #endif
         return;
 
-#if FF_API_LAVF_FMT_RAWPICTURE
-    if (of->ctx->oformat->flags & AVFMT_RAWPICTURE &&
-        enc->codec->id == AV_CODEC_ID_RAWVIDEO) {
-        /* raw pictures are written as AVPicture structure to
-           avoid any copies. We support temporarily the older
-           method. */
-        if (in_picture->interlaced_frame)
-            mux_par->field_order = in_picture->top_field_first ? AV_FIELD_TB:AV_FIELD_BT;
-        else
-            mux_par->field_order = AV_FIELD_PROGRESSIVE;
-        pkt.data   = (uint8_t *)in_picture;
-        pkt.size   =  sizeof(AVPicture);
-        pkt.pts    = av_rescale_q(in_picture->pts, enc->time_base, ost->mux_timebase);
-        pkt.flags |= AV_PKT_FLAG_KEY;
-
-        output_packet(of, &pkt, ost, 0);
-    } else
-#endif
     {
         int forced_keyframe = 0;
         double pts_time;
@@ -1236,8 +1236,12 @@
         in_picture->quality = enc->global_quality;
         in_picture->pict_type = 0;
 
+        if (ost->forced_kf_ref_pts == AV_NOPTS_VALUE &&
+            in_picture->pts != AV_NOPTS_VALUE)
+            ost->forced_kf_ref_pts = in_picture->pts;
+
         pts_time = in_picture->pts != AV_NOPTS_VALUE ?
-            in_picture->pts * av_q2d(enc->time_base) : NAN;
+            (in_picture->pts - ost->forced_kf_ref_pts) * av_q2d(enc->time_base) : NAN;
         if (ost->forced_kf_index < ost->forced_kf_count &&
             in_picture->pts >= ost->forced_kf_pts[ost->forced_kf_index]) {
             ost->forced_kf_index++;
@@ -1571,7 +1575,7 @@
         uint64_t total_packets = 0, total_size = 0;
 
         av_log(NULL, AV_LOG_VERBOSE, "Input file #%d (%s):\n",
-               i, f->ctx->filename);
+               i, f->ctx->url);
 
         for (j = 0; j < f->nb_streams; j++) {
             InputStream *ist = input_streams[f->ist_index + j];
@@ -1605,7 +1609,7 @@
         uint64_t total_packets = 0, total_size = 0;
 
         av_log(NULL, AV_LOG_VERBOSE, "Output file #%d (%s):\n",
-               i, of->ctx->filename);
+               i, of->ctx->url);
 
         for (j = 0; j < of->ctx->nb_streams; j++) {
             OutputStream *ost = output_streams[of->ost_index + j];
@@ -1645,8 +1649,7 @@
 
 static void print_report(int is_last_report, int64_t timer_start, int64_t cur_time)
 {
-    char buf[1024];
-    AVBPrint buf_script;
+    AVBPrint buf, buf_script;
     OutputStream *ost;
     AVFormatContext *oc;
     int64_t total_size;
@@ -1658,6 +1661,7 @@
     static int64_t last_time = -1;
     static int qp_histogram[52];
     int hours, mins, secs, us;
+    const char *hours_sign;
     int ret;
     float t;
 
@@ -1683,9 +1687,9 @@
     if (total_size <= 0) // FIXME improve avio_size() so it works with non seekable output too
         total_size = avio_tell(oc->pb);
 
-    buf[0] = '\0';
     vid = 0;
-    av_bprint_init(&buf_script, 0, 1);
+    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_AUTOMATIC);
+    av_bprint_init(&buf_script, 0, AV_BPRINT_SIZE_AUTOMATIC);
     for (i = 0; i < nb_output_streams; i++) {
         float q = -1;
         ost = output_streams[i];
@@ -1694,7 +1698,7 @@
             q = ost->quality / (float) FF_QP2LAMBDA;
 
         if (vid && enc->codec_type == AVMEDIA_TYPE_VIDEO) {
-            snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "q=%2.1f ", q);
+            av_bprintf(&buf, "q=%2.1f ", q);
             av_bprintf(&buf_script, "stream_%d_%d_q=%.1f\n",
                        ost->file_index, ost->index, q);
         }
@@ -1703,21 +1707,21 @@
 
             frame_number = ost->frame_number;
             fps = t > 1 ? frame_number / t : 0;
-            snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "frame=%5d fps=%3.*f q=%3.1f ",
+            av_bprintf(&buf, "frame=%5d fps=%3.*f q=%3.1f ",
                      frame_number, fps < 9.95, fps, q);
             av_bprintf(&buf_script, "frame=%d\n", frame_number);
-            av_bprintf(&buf_script, "fps=%.1f\n", fps);
+            av_bprintf(&buf_script, "fps=%.2f\n", fps);
             av_bprintf(&buf_script, "stream_%d_%d_q=%.1f\n",
                        ost->file_index, ost->index, q);
             if (is_last_report)
-                snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "L");
+                av_bprintf(&buf, "L");
             if (qp_hist) {
                 int j;
                 int qp = lrintf(q);
                 if (qp >= 0 && qp < FF_ARRAY_ELEMS(qp_histogram))
                     qp_histogram[qp]++;
                 for (j = 0; j < 32; j++)
-                    snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%X", av_log2(qp_histogram[j] + 1));
+                    av_bprintf(&buf, "%X", av_log2(qp_histogram[j] + 1));
             }
 
             if ((enc->flags & AV_CODEC_FLAG_PSNR) && (ost->pict_type != AV_PICTURE_TYPE_NONE || is_last_report)) {
@@ -1726,7 +1730,7 @@
                 double scale, scale_sum = 0;
                 double p;
                 char type[3] = { 'Y','U','V' };
-                snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "PSNR=");
+                av_bprintf(&buf, "PSNR=");
                 for (j = 0; j < 3; j++) {
                     if (is_last_report) {
                         error = enc->error[j];
@@ -1740,12 +1744,12 @@
                     error_sum += error;
                     scale_sum += scale;
                     p = psnr(error / scale);
-                    snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%c:%2.2f ", type[j], p);
+                    av_bprintf(&buf, "%c:%2.2f ", type[j], p);
                     av_bprintf(&buf_script, "stream_%d_%d_psnr_%c=%2.2f\n",
                                ost->file_index, ost->index, type[j] | 32, p);
                 }
                 p = psnr(error_sum / scale_sum);
-                snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "*:%2.2f ", psnr(error_sum / scale_sum));
+                av_bprintf(&buf, "*:%2.2f ", psnr(error_sum / scale_sum));
                 av_bprintf(&buf_script, "stream_%d_%d_psnr_all=%2.2f\n",
                            ost->file_index, ost->index, p);
             }
@@ -1765,57 +1769,64 @@
     secs %= 60;
     hours = mins / 60;
     mins %= 60;
+    hours_sign = (pts < 0) ? "-" : "";
 
     bitrate = pts && total_size >= 0 ? total_size * 8 / (pts / 1000.0) : -1;
     speed = t != 0.0 ? (double)pts / AV_TIME_BASE / t : -1;
 
-    if (total_size < 0) snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
-                                 "size=N/A time=");
-    else                snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
-                                 "size=%8.0fkB time=", total_size / 1024.0);
-    if (pts < 0)
-        snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "-");
-    snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
-             "%02d:%02d:%02d.%02d ", hours, mins, secs,
-             (100 * us) / AV_TIME_BASE);
+    if (total_size < 0) av_bprintf(&buf, "size=N/A time=");
+    else                av_bprintf(&buf, "size=%8.0fkB time=", total_size / 1024.0);
+    if (pts == AV_NOPTS_VALUE) {
+        av_bprintf(&buf, "N/A ");
+    } else {
+        av_bprintf(&buf, "%s%02d:%02d:%02d.%02d ",
+                   hours_sign, hours, mins, secs, (100 * us) / AV_TIME_BASE);
+    }
 
     if (bitrate < 0) {
-        snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),"bitrate=N/A");
+        av_bprintf(&buf, "bitrate=N/A");
         av_bprintf(&buf_script, "bitrate=N/A\n");
     }else{
-        snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),"bitrate=%6.1fkbits/s", bitrate);
+        av_bprintf(&buf, "bitrate=%6.1fkbits/s", bitrate);
         av_bprintf(&buf_script, "bitrate=%6.1fkbits/s\n", bitrate);
     }
 
     if (total_size < 0) av_bprintf(&buf_script, "total_size=N/A\n");
     else                av_bprintf(&buf_script, "total_size=%"PRId64"\n", total_size);
-    av_bprintf(&buf_script, "out_time_ms=%"PRId64"\n", pts);
-    av_bprintf(&buf_script, "out_time=%02d:%02d:%02d.%06d\n",
-               hours, mins, secs, us);
+    if (pts == AV_NOPTS_VALUE) {
+        av_bprintf(&buf_script, "out_time_us=N/A\n");
+        av_bprintf(&buf_script, "out_time_ms=N/A\n");
+        av_bprintf(&buf_script, "out_time=N/A\n");
+    } else {
+        av_bprintf(&buf_script, "out_time_us=%"PRId64"\n", pts);
+        av_bprintf(&buf_script, "out_time_ms=%"PRId64"\n", pts);
+        av_bprintf(&buf_script, "out_time=%s%02d:%02d:%02d.%06d\n",
+                   hours_sign, hours, mins, secs, us);
+    }
 
     if (nb_frames_dup || nb_frames_drop)
-        snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d",
-                nb_frames_dup, nb_frames_drop);
+        av_bprintf(&buf, " dup=%d drop=%d", nb_frames_dup, nb_frames_drop);
     av_bprintf(&buf_script, "dup_frames=%d\n", nb_frames_dup);
     av_bprintf(&buf_script, "drop_frames=%d\n", nb_frames_drop);
 
     if (speed < 0) {
-        snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf)," speed=N/A");
+        av_bprintf(&buf, " speed=N/A");
         av_bprintf(&buf_script, "speed=N/A\n");
     } else {
-        snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf)," speed=%4.3gx", speed);
+        av_bprintf(&buf, " speed=%4.3gx", speed);
         av_bprintf(&buf_script, "speed=%4.3gx\n", speed);
     }
 
     if (print_stats || is_last_report) {
         const char end = is_last_report ? '\n' : '\r';
         if (print_stats==1 && AV_LOG_INFO > av_log_get_level()) {
-            fprintf(stderr, "%s    %c", buf, end);
+            fprintf(stderr, "%s    %c", buf.str, end);
         } else
-            av_log(NULL, AV_LOG_INFO, "%s    %c", buf, end);
+            av_log(NULL, AV_LOG_INFO, "%s    %c", buf.str, end);
 
     fflush(stderr);
     }
+    av_bprint_finalize(&buf, NULL);
 
     if (progress_avio) {
         av_bprintf(&buf_script, "progress=%s\n",
@@ -1835,6 +1846,19 @@
         print_final_stats(total_size);
 }
 
+static void ifilter_parameters_from_codecpar(InputFilter *ifilter, AVCodecParameters *par)
+{
+    // We never got any input. Set a fake format, which will
+    // come from libavformat.
+    ifilter->format                 = par->format;
+    ifilter->sample_rate            = par->sample_rate;
+    ifilter->channels               = par->channels;
+    ifilter->channel_layout         = par->channel_layout;
+    ifilter->width                  = par->width;
+    ifilter->height                 = par->height;
+    ifilter->sample_aspect_ratio    = par->sample_aspect_ratio;
+}
+
 static void flush_encoders(void)
 {
     int i, ret;
@@ -1861,18 +1885,8 @@
                 int x;
                 for (x = 0; x < fg->nb_inputs; x++) {
                     InputFilter *ifilter = fg->inputs[x];
-                    if (ifilter->format < 0) {
-                        AVCodecParameters *par = ifilter->ist->st->codecpar;
-                        // We never got any input. Set a fake format, which will
-                        // come from libavformat.
-                        ifilter->format                 = par->format;
-                        ifilter->sample_rate            = par->sample_rate;
-                        ifilter->channels               = par->channels;
-                        ifilter->channel_layout         = par->channel_layout;
-                        ifilter->width                  = par->width;
-                        ifilter->height                 = par->height;
-                        ifilter->sample_aspect_ratio    = par->sample_aspect_ratio;
-                    }
+                    if (ifilter->format < 0)
+                        ifilter_parameters_from_codecpar(ifilter, ifilter->ist->st->codecpar);
                 }
 
                 if (!ifilter_has_all_input_formats(fg))
@@ -1897,10 +1911,6 @@
 
         if (enc->codec_type == AVMEDIA_TYPE_AUDIO && enc->frame_size <= 1)
             continue;
-#if FF_API_LAVF_FMT_RAWPICTURE
-        if (enc->codec_type == AVMEDIA_TYPE_VIDEO && (of->ctx->oformat->flags & AVFMT_RAWPICTURE) && enc->codec->id == AV_CODEC_ID_RAWVIDEO)
-            continue;
-#endif
 
         if (enc->codec_type != AVMEDIA_TYPE_VIDEO && enc->codec_type != AVMEDIA_TYPE_AUDIO)
             continue;
@@ -1991,11 +2001,16 @@
     InputFile   *f = input_files [ist->file_index];
     int64_t start_time = (of->start_time == AV_NOPTS_VALUE) ? 0 : of->start_time;
     int64_t ost_tb_start_time = av_rescale_q(start_time, AV_TIME_BASE_Q, ost->mux_timebase);
-    AVPicture pict;
-    AVPacket opkt;
+    AVPacket opkt = { 0 };
 
     av_init_packet(&opkt);
 
+    // EOF: flush output bitstream filters.
+    if (!pkt) {
+        output_packet(of, &opkt, ost, 1);
+        return;
+    }
+
     if ((!ost->frame_number && !(pkt->flags & AV_PKT_FLAG_KEY)) &&
         !ost->copy_initial_nonkeyframes)
         return;
@@ -2053,48 +2068,16 @@
     opkt.duration = av_rescale_q(pkt->duration, ist->st->time_base, ost->mux_timebase);
 
     opkt.flags    = pkt->flags;
-    // FIXME remove the following 2 lines they shall be replaced by the bitstream filters
-    if (  ost->st->codecpar->codec_id != AV_CODEC_ID_H264
-       && ost->st->codecpar->codec_id != AV_CODEC_ID_MPEG1VIDEO
-       && ost->st->codecpar->codec_id != AV_CODEC_ID_MPEG2VIDEO
-       && ost->st->codecpar->codec_id != AV_CODEC_ID_VC1
-       ) {
-        int ret = av_parser_change(ost->parser, ost->parser_avctx,
-                             &opkt.data, &opkt.size,
-                             pkt->data, pkt->size,
-                             pkt->flags & AV_PKT_FLAG_KEY);
-        if (ret < 0) {
-            av_log(NULL, AV_LOG_FATAL, "av_parser_change failed: %s\n",
-                   av_err2str(ret));
-            exit_program(1);
-        }
-        if (ret) {
-            opkt.buf = av_buffer_create(opkt.data, opkt.size, av_buffer_default_free, NULL, 0);
-            if (!opkt.buf)
-                exit_program(1);
-        }
-    } else {
-        opkt.data = pkt->data;
-        opkt.size = pkt->size;
-    }
-    av_copy_packet_side_data(&opkt, pkt);
 
-#if FF_API_LAVF_FMT_RAWPICTURE
-    if (ost->st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
-        ost->st->codecpar->codec_id == AV_CODEC_ID_RAWVIDEO &&
-        (of->ctx->oformat->flags & AVFMT_RAWPICTURE)) {
-        /* store AVPicture in AVPacket, as expected by the output format */
-        int ret = avpicture_fill(&pict, opkt.data, ost->st->codecpar->format, ost->st->codecpar->width, ost->st->codecpar->height);
-        if (ret < 0) {
-            av_log(NULL, AV_LOG_FATAL, "avpicture_fill failed: %s\n",
-                   av_err2str(ret));
+    if (pkt->buf) {
+        opkt.buf = av_buffer_ref(pkt->buf);
+        if (!opkt.buf)
             exit_program(1);
-        }
-        opkt.data = (uint8_t *)&pict;
-        opkt.size = sizeof(AVPicture);
-        opkt.flags |= AV_PKT_FLAG_KEY;
     }
-#endif
+    opkt.data = pkt->data;
+    opkt.size = pkt->size;
+
+    av_copy_packet_side_data(&opkt, pkt);
 
     output_packet(of, &opkt, ost, 0);
 }
@@ -2127,10 +2110,12 @@
     if (ret < 0 && exit_on_error)
         exit_program(1);
 
-    if (exit_on_error && *got_output && ist) {
+    if (*got_output && ist) {
         if (ist->decoded_frame->decode_error_flags || (ist->decoded_frame->flags & AV_FRAME_FLAG_CORRUPT)) {
-            av_log(NULL, AV_LOG_FATAL, "%s: corrupt decoded frame in stream %d\n", input_files[ist->file_index]->ctx->filename, ist->st->index);
-            exit_program(1);
+            av_log(NULL, exit_on_error ? AV_LOG_FATAL : AV_LOG_WARNING,
+                   "%s: corrupt decoded frame in stream %d\n", input_files[ist->file_index]->ctx->url, ist->st->index);
+            if (exit_on_error)
+                exit_program(1);
         }
     }
 }
@@ -2199,10 +2184,7 @@
 
         ret = reap_filters(1);
         if (ret < 0 && ret != AVERROR_EOF) {
-            char errbuf[128];
-            av_strerror(ret, errbuf, sizeof(errbuf));
-
-            av_log(NULL, AV_LOG_ERROR, "Error while filtering: %s\n", errbuf);
+            av_log(NULL, AV_LOG_ERROR, "Error while filtering: %s\n", av_err2str(ret));
             return ret;
         }
 
@@ -2225,7 +2207,7 @@
 
 static int ifilter_send_eof(InputFilter *ifilter, int64_t pts)
 {
-    int i, j, ret;
+    int ret;
 
     ifilter->eof = 1;
 
@@ -2235,16 +2217,11 @@
             return ret;
     } else {
         // the filtergraph was never configured
-        FilterGraph *fg = ifilter->graph;
-        for (i = 0; i < fg->nb_inputs; i++)
-            if (!fg->inputs[i]->eof)
-                break;
-        if (i == fg->nb_inputs) {
-            // All the input streams have finished without the filtergraph
-            // ever being configured.
-            // Mark the output streams as finished.
-            for (j = 0; j < fg->nb_outputs; j++)
-                finish_output_stream(fg->outputs[j]->ost);
+        if (ifilter->format < 0)
+            ifilter_parameters_from_codecpar(ifilter, ifilter->ist->st->codecpar);
+        if (ifilter->format < 0 && (ifilter->type == AVMEDIA_TYPE_AUDIO || ifilter->type == AVMEDIA_TYPE_VIDEO)) {
+            av_log(NULL, AV_LOG_ERROR, "Cannot determine format of input stream %d:%d after EOF\n", ifilter->ist->file_index, ifilter->ist->st->index);
+            return AVERROR_INVALIDDATA;
         }
     }
 
@@ -2733,12 +2710,17 @@
     }
 
     /* handle stream copy */
-    if (!ist->decoding_needed) {
+    if (!ist->decoding_needed && pkt) {
         ist->dts = ist->next_dts;
         switch (ist->dec_ctx->codec_type) {
         case AVMEDIA_TYPE_AUDIO:
-            ist->next_dts += ((int64_t)AV_TIME_BASE * ist->dec_ctx->frame_size) /
-                             ist->dec_ctx->sample_rate;
+            av_assert1(pkt->duration >= 0);
+            if (ist->dec_ctx->sample_rate) {
+                ist->next_dts += ((int64_t)AV_TIME_BASE * ist->dec_ctx->frame_size) /
+                                  ist->dec_ctx->sample_rate;
+            } else {
+                ist->next_dts += av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q);
+            }
             break;
         case AVMEDIA_TYPE_VIDEO:
             if (ist->framerate.num) {
@@ -2759,7 +2741,7 @@
         ist->pts = ist->dts;
         ist->next_pts = ist->next_dts;
     }
-    for (i = 0; pkt && i < nb_output_streams; i++) {
+    for (i = 0; i < nb_output_streams; i++) {
         OutputStream *ost = output_streams[i];
 
         if (!check_output_constraints(ist, ost) || ost->encoding_needed)
@@ -2816,44 +2798,77 @@
     av_freep(&avc);
 }
 
-static const HWAccel *get_hwaccel(enum AVPixelFormat pix_fmt)
-{
-    int i;
-    for (i = 0; hwaccels[i].name; i++)
-        if (hwaccels[i].pix_fmt == pix_fmt)
-            return &hwaccels[i];
-    return NULL;
-}
-
 static enum AVPixelFormat get_format(AVCodecContext *s, const enum AVPixelFormat *pix_fmts)
 {
     InputStream *ist = s->opaque;
     const enum AVPixelFormat *p;
     int ret;
 
-    for (p = pix_fmts; *p != -1; p++) {
+    for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(*p);
-        const HWAccel *hwaccel;
+        const AVCodecHWConfig  *config = NULL;
+        int i;
 
         if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL))
             break;
 
-        hwaccel = get_hwaccel(*p);
-        if (!hwaccel ||
-            (ist->active_hwaccel_id && ist->active_hwaccel_id != hwaccel->id) ||
-            (ist->hwaccel_id != HWACCEL_AUTO && ist->hwaccel_id != hwaccel->id))
-            continue;
+        if (ist->hwaccel_id == HWACCEL_GENERIC ||
+            ist->hwaccel_id == HWACCEL_AUTO) {
+            for (i = 0;; i++) {
+                config = avcodec_get_hw_config(s->codec, i);
+                if (!config)
+                    break;
+                if (!(config->methods &
+                      AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX))
+                    continue;
+                if (config->pix_fmt == *p)
+                    break;
+            }
+        }
+        if (config) {
+            if (config->device_type != ist->hwaccel_device_type) {
+                // Different hwaccel offered, ignore.
+                continue;
+            }
 
-        ret = hwaccel->init(s);
-        if (ret < 0) {
-            if (ist->hwaccel_id == hwaccel->id) {
+            ret = hwaccel_decode_init(s);
+            if (ret < 0) {
+                if (ist->hwaccel_id == HWACCEL_GENERIC) {
+                    av_log(NULL, AV_LOG_FATAL,
+                           "%s hwaccel requested for input stream #%d:%d, "
+                           "but cannot be initialized.\n",
+                           av_hwdevice_get_type_name(config->device_type),
+                           ist->file_index, ist->st->index);
+                    return AV_PIX_FMT_NONE;
+                }
+                continue;
+            }
+        } else {
+            const HWAccel *hwaccel = NULL;
+            int i;
+            for (i = 0; hwaccels[i].name; i++) {
+                if (hwaccels[i].pix_fmt == *p) {
+                    hwaccel = &hwaccels[i];
+                    break;
+                }
+            }
+            if (!hwaccel) {
+                // No hwaccel supporting this pixfmt.
+                continue;
+            }
+            if (hwaccel->id != ist->hwaccel_id) {
+                // Does not match requested hwaccel.
+                continue;
+            }
+
+            ret = hwaccel->init(s);
+            if (ret < 0) {
                 av_log(NULL, AV_LOG_FATAL,
                        "%s hwaccel requested for input stream #%d:%d, "
                        "but cannot be initialized.\n", hwaccel->name,
                        ist->file_index, ist->st->index);
                 return AV_PIX_FMT_NONE;
             }
-            continue;
         }
 
         if (ist->hw_frames_ctx) {
@@ -2862,8 +2877,7 @@
                 return AV_PIX_FMT_NONE;
         }
 
-        ist->active_hwaccel_id = hwaccel->id;
-        ist->hwaccel_pix_fmt   = *p;
+        ist->hwaccel_pix_fmt = *p;
         break;
     }
 
@@ -2910,7 +2924,7 @@
 
         /* Useful for subtitles retiming by lavf (FIXME), skipping samples in
          * audio, and video decoders such as cuvid or mediacodec */
-        av_codec_set_pkt_timebase(ist->dec_ctx, ist->st->time_base);
+        ist->dec_ctx->pkt_timebase = ist->st->time_base;
 
         if (!av_dict_get(ist->decoder_opts, "threads", NULL, 0))
             av_dict_set(&ist->decoder_opts, "threads", "auto", 0);
@@ -2981,7 +2995,7 @@
     //assert_avoptions(of->opts);
     of->header_written = 1;
 
-    av_dump_format(of->ctx, file_index, of->ctx->filename, 1);
+    av_dump_format(of->ctx, file_index, of->ctx->url, 1);
 
     if (sdp_filename || want_sdp)
         print_sdp();
@@ -3060,7 +3074,13 @@
                "Error setting up codec context options.\n");
         return ret;
     }
-    avcodec_parameters_from_context(par_src, ost->enc_ctx);
+
+    ret = avcodec_parameters_from_context(par_src, ost->enc_ctx);
+    if (ret < 0) {
+        av_log(NULL, AV_LOG_FATAL,
+               "Error getting reference codec parameters.\n");
+        return ret;
+    }
 
     if (!codec_tag) {
         unsigned int codec_tag_tmp;
@@ -3114,11 +3134,6 @@
             av_display_rotation_set((int32_t *)sd, -ost->rotate_override_value);
     }
 
-    ost->parser = av_parser_init(par_dst->codec_id);
-    ost->parser_avctx = avcodec_alloc_context3(NULL);
-    if (!ost->parser_avctx)
-        return AVERROR(ENOMEM);
-
     switch (par_dst->codec_type) {
     case AVMEDIA_TYPE_AUDIO:
         if (audio_volume != 256) {
@@ -3160,7 +3175,7 @@
     uint8_t *encoder_string;
     int encoder_string_len;
     int format_flags = 0;
-    int codec_flags = 0;
+    int codec_flags = ost->enc_ctx->flags;
 
     if (av_dict_get(ost->st->metadata, "encoder",  NULL, 0))
         return;
@@ -3393,6 +3408,12 @@
             enc_ctx->bits_per_raw_sample = frame_bits_per_raw_sample;
         }
 
+        if (ost->top_field_first == 0) {
+            enc_ctx->field_order = AV_FIELD_BB;
+        } else if (ost->top_field_first == 1) {
+            enc_ctx->field_order = AV_FIELD_TT;
+        }
+
         if (ost->forced_keyframes) {
             if (!strncmp(ost->forced_keyframes, "expr:", 5)) {
                 ret = av_expr_parse(&ost->forced_keyframes_pexpr, ost->forced_keyframes+5,
@@ -3479,6 +3500,23 @@
                 return ret;
             }
         }
+        if (ist && ist->dec->type == AVMEDIA_TYPE_SUBTITLE && ost->enc->type == AVMEDIA_TYPE_SUBTITLE) {
+            int input_props = 0, output_props = 0;
+            AVCodecDescriptor const *input_descriptor =
+                avcodec_descriptor_get(dec->codec_id);
+            AVCodecDescriptor const *output_descriptor =
+                avcodec_descriptor_get(ost->enc_ctx->codec_id);
+            if (input_descriptor)
+                input_props = input_descriptor->props & (AV_CODEC_PROP_TEXT_SUB | AV_CODEC_PROP_BITMAP_SUB);
+            if (output_descriptor)
+                output_props = output_descriptor->props & (AV_CODEC_PROP_TEXT_SUB | AV_CODEC_PROP_BITMAP_SUB);
+            if (input_props && output_props && input_props != output_props) {
+                snprintf(error, error_len,
+                         "Subtitle encoding currently only possible from text to text "
+                         "or bitmap to bitmap");
+                return AVERROR_INVALIDDATA;
+            }
+        }
 
         if ((ret = avcodec_open2(ost->enc_ctx, codec, &ost->encoder_opts)) < 0) {
             if (ret == AVERROR_EXPERIMENTAL)
@@ -3494,7 +3532,8 @@
             av_buffersink_set_frame_size(ost->filter->filter,
                                             ost->enc_ctx->frame_size);
         assert_avoptions(ost->encoder_opts);
-        if (ost->enc_ctx->bit_rate && ost->enc_ctx->bit_rate < 1000)
+        if (ost->enc_ctx->bit_rate && ost->enc_ctx->bit_rate < 1000 &&
+            ost->enc_ctx->codec_id != AV_CODEC_ID_CODEC2 /* don't complain about 700 bit/s modes */)
             av_log(NULL, AV_LOG_WARNING, "The bitrate parameter is set too low."
                                          " It takes bits/s as argument, not kbits/s\n");
 
@@ -3558,14 +3597,6 @@
         ret = init_output_stream_streamcopy(ost);
         if (ret < 0)
             return ret;
-
-        /*
-         * FIXME: will the codec context used by the parser during streamcopy
-         * This should go away with the new parser API.
-         */
-        ret = avcodec_parameters_to_context(ost->parser_avctx, ost->st->codecpar);
-        if (ret < 0)
-            return ret;
     }
 
     // parse user provided disposition, and update stream values
@@ -3582,8 +3613,10 @@
             { "hearing_impaired"    , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_DISPOSITION_HEARING_IMPAIRED  },    .unit = "flags" },
             { "visual_impaired"     , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_DISPOSITION_VISUAL_IMPAIRED   },    .unit = "flags" },
             { "clean_effects"       , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_DISPOSITION_CLEAN_EFFECTS     },    .unit = "flags" },
+            { "attached_pic"        , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_DISPOSITION_ATTACHED_PIC      },    .unit = "flags" },
             { "captions"            , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_DISPOSITION_CAPTIONS          },    .unit = "flags" },
             { "descriptions"        , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_DISPOSITION_DESCRIPTIONS      },    .unit = "flags" },
+            { "dependent"           , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_DISPOSITION_DEPENDENT         },    .unit = "flags" },
             { "metadata"            , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_DISPOSITION_METADATA          },    .unit = "flags" },
             { NULL },
         };
@@ -3988,7 +4021,7 @@
     return 0;
 }
 
-#if HAVE_PTHREADS
+#if HAVE_THREADS
 static void *input_thread(void *arg)
 {
     InputFile *f = arg;
@@ -4030,49 +4063,63 @@
     return NULL;
 }
 
+static void free_input_thread(int i)
+{
+    InputFile *f = input_files[i];
+    AVPacket pkt;
+
+    if (!f || !f->in_thread_queue)
+        return;
+    av_thread_message_queue_set_err_send(f->in_thread_queue, AVERROR_EOF);
+    while (av_thread_message_queue_recv(f->in_thread_queue, &pkt, 0) >= 0)
+        av_packet_unref(&pkt);
+
+    pthread_join(f->thread, NULL);
+    f->joined = 1;
+    av_thread_message_queue_free(&f->in_thread_queue);
+}
+
 static void free_input_threads(void)
 {
     int i;
 
-    for (i = 0; i < nb_input_files; i++) {
-        InputFile *f = input_files[i];
-        AVPacket pkt;
+    for (i = 0; i < nb_input_files; i++)
+        free_input_thread(i);
+}
 
-        if (!f || !f->in_thread_queue)
-            continue;
-        av_thread_message_queue_set_err_send(f->in_thread_queue, AVERROR_EOF);
-        while (av_thread_message_queue_recv(f->in_thread_queue, &pkt, 0) >= 0)
-            av_packet_unref(&pkt);
+static int init_input_thread(int i)
+{
+    int ret;
+    InputFile *f = input_files[i];
 
-        pthread_join(f->thread, NULL);
-        f->joined = 1;
+    if (nb_input_files == 1)
+        return 0;
+
+    if (f->ctx->pb ? !f->ctx->pb->seekable :
+        strcmp(f->ctx->iformat->name, "lavfi"))
+        f->non_blocking = 1;
+    ret = av_thread_message_queue_alloc(&f->in_thread_queue,
+                                        f->thread_queue_size, sizeof(AVPacket));
+    if (ret < 0)
+        return ret;
+
+    if ((ret = pthread_create(&f->thread, NULL, input_thread, f))) {
+        av_log(NULL, AV_LOG_ERROR, "pthread_create failed: %s. Try to increase `ulimit -v` or decrease `ulimit -s`.\n", strerror(ret));
         av_thread_message_queue_free(&f->in_thread_queue);
+        return AVERROR(ret);
     }
+
+    return 0;
 }
 
 static int init_input_threads(void)
 {
     int i, ret;
 
-    if (nb_input_files == 1)
-        return 0;
-
     for (i = 0; i < nb_input_files; i++) {
-        InputFile *f = input_files[i];
-
-        if (f->ctx->pb ? !f->ctx->pb->seekable :
-            strcmp(f->ctx->iformat->name, "lavfi"))
-            f->non_blocking = 1;
-        ret = av_thread_message_queue_alloc(&f->in_thread_queue,
-                                            f->thread_queue_size, sizeof(AVPacket));
+        ret = init_input_thread(i);
         if (ret < 0)
             return ret;
-
-        if ((ret = pthread_create(&f->thread, NULL, input_thread, f))) {
-            av_log(NULL, AV_LOG_ERROR, "pthread_create failed: %s. Try to increase `ulimit -v` or decrease `ulimit -s`.\n", strerror(ret));
-            av_thread_message_queue_free(&f->in_thread_queue);
-            return AVERROR(ret);
-        }
     }
     return 0;
 }
@@ -4098,7 +4145,7 @@
         }
     }
 
-#if HAVE_PTHREADS
+#if HAVE_THREADS
     if (nb_input_files > 1)
         return get_input_packet_mt(f, pkt);
 #endif
@@ -4158,12 +4205,6 @@
         ist   = input_streams[ifile->ist_index + i];
         avctx = ist->dec_ctx;
 
-        // flush decoders
-        if (ist->decoding_needed) {
-            process_input_packet(ist, NULL, 1);
-            avcodec_flush_buffers(avctx);
-        }
-
         /* duration is the length of the last frame in a stream
          * when audio stream is present we don't care about
          * last video frame length because it's not defined exactly */
@@ -4180,14 +4221,17 @@
                 AVRational sample_rate = {1, avctx->sample_rate};
 
                 duration = av_rescale_q(ist->nb_samples, sample_rate, ist->st->time_base);
-            } else
+            } else {
                 continue;
+            }
         } else {
             if (ist->framerate.num) {
-                duration = av_rescale_q(1, ist->framerate, ist->st->time_base);
+                duration = av_rescale_q(1, av_inv_q(ist->framerate), ist->st->time_base);
             } else if (ist->st->avg_frame_rate.num) {
-                duration = av_rescale_q(1, ist->st->avg_frame_rate, ist->st->time_base);
-            } else duration = 1;
+                duration = av_rescale_q(1, av_inv_q(ist->st->avg_frame_rate), ist->st->time_base);
+            } else {
+                duration = 1;
+            }
         }
         if (!ifile->duration)
             ifile->time_base = ist->st->time_base;
@@ -4217,7 +4261,7 @@
     AVFormatContext *is;
     InputStream *ist;
     AVPacket pkt;
-    int ret, i, j;
+    int ret, thread_ret, i, j;
     int64_t duration;
     int64_t pkt_dts;
 
@@ -4229,9 +4273,30 @@
         return ret;
     }
     if (ret < 0 && ifile->loop) {
-        if ((ret = seek_to_start(ifile, is)) < 0)
-            return ret;
-        ret = get_input_packet(ifile, &pkt);
+        AVCodecContext *avctx;
+        for (i = 0; i < ifile->nb_streams; i++) {
+            ist = input_streams[ifile->ist_index + i];
+            avctx = ist->dec_ctx;
+            if (ist->decoding_needed) {
+                ret = process_input_packet(ist, NULL, 1);
+                if (ret>0)
+                    return 0;
+                avcodec_flush_buffers(avctx);
+            }
+        }
+#if HAVE_THREADS
+        free_input_thread(file_index);
+#endif
+        ret = seek_to_start(ifile, is);
+#if HAVE_THREADS
+        thread_ret = init_input_thread(file_index);
+        if (thread_ret < 0)
+            return thread_ret;
+#endif
+        if (ret < 0)
+            av_log(NULL, AV_LOG_WARNING, "Seek to start failed.\n");
+        else
+            ret = get_input_packet(ifile, &pkt);
         if (ret == AVERROR(EAGAIN)) {
             ifile->eagain = 1;
             return ret;
@@ -4239,7 +4304,7 @@
     }
     if (ret < 0) {
         if (ret != AVERROR_EOF) {
-            print_error(is->filename, ret);
+            print_error(is->url, ret);
             if (exit_on_error)
                 exit_program(1);
         }
@@ -4287,9 +4352,11 @@
     if (ist->discard)
         goto discard_packet;
 
-    if (exit_on_error && (pkt.flags & AV_PKT_FLAG_CORRUPT)) {
-        av_log(NULL, AV_LOG_FATAL, "%s: corrupt input packet in stream %d\n", is->filename, pkt.stream_index);
-        exit_program(1);
+    if (pkt.flags & AV_PKT_FLAG_CORRUPT) {
+        av_log(NULL, exit_on_error ? AV_LOG_FATAL : AV_LOG_WARNING,
+               "%s: corrupt input packet in stream %d\n", is->url, pkt.stream_index);
+        if (exit_on_error)
+            exit_program(1);
     }
 
     if (debug_ts) {
@@ -4602,7 +4669,7 @@
 
     timer_start = av_gettime_relative();
 
-#if HAVE_PTHREADS
+#if HAVE_THREADS
     if ((ret = init_input_threads()) < 0)
         goto fail;
 #endif
@@ -4623,24 +4690,21 @@
 
         ret = transcode_step();
         if (ret < 0 && ret != AVERROR_EOF) {
-            char errbuf[128];
-            av_strerror(ret, errbuf, sizeof(errbuf));
-
-            av_log(NULL, AV_LOG_ERROR, "Error while filtering: %s\n", errbuf);
+            av_log(NULL, AV_LOG_ERROR, "Error while filtering: %s\n", av_err2str(ret));
             break;
         }
 
         /* dump report by using the output first video and audio streams */
         print_report(0, timer_start, cur_time);
     }
-#if HAVE_PTHREADS
+#if HAVE_THREADS
     free_input_threads();
 #endif
 
     /* at the end of stream, we must flush the decoder buffers */
     for (i = 0; i < nb_input_streams; i++) {
         ist = input_streams[i];
-        if (!input_files[ist->file_index]->eof_reached && ist->decoding_needed) {
+        if (!input_files[ist->file_index]->eof_reached) {
             process_input_packet(ist, NULL, 0);
         }
     }
@@ -4655,11 +4719,11 @@
             av_log(NULL, AV_LOG_ERROR,
                    "Nothing was written into output file %d (%s), because "
                    "at least one of its streams received no packets.\n",
-                   i, os->filename);
+                   i, os->url);
             continue;
         }
         if ((ret = av_write_trailer(os)) < 0) {
-            av_log(NULL, AV_LOG_ERROR, "Error writing trailer of %s: %s\n", os->filename, av_err2str(ret));
+            av_log(NULL, AV_LOG_ERROR, "Error writing trailer of %s: %s\n", os->url, av_err2str(ret));
             if (exit_on_error)
                 exit_program(1);
         }
@@ -4699,7 +4763,7 @@
     ret = 0;
 
  fail:
-#if HAVE_PTHREADS
+#if HAVE_THREADS
     free_input_threads();
 #endif
 
@@ -4727,23 +4791,30 @@
     return ret;
 }
 
-
-static int64_t getutime(void)
+static BenchmarkTimeStamps get_benchmark_time_stamps(void)
 {
+    BenchmarkTimeStamps time_stamps = { av_gettime_relative() };
 #if HAVE_GETRUSAGE
     struct rusage rusage;
 
     getrusage(RUSAGE_SELF, &rusage);
-    return (rusage.ru_utime.tv_sec * 1000000LL) + rusage.ru_utime.tv_usec;
+    time_stamps.user_usec =
+        (rusage.ru_utime.tv_sec * 1000000LL) + rusage.ru_utime.tv_usec;
+    time_stamps.sys_usec =
+        (rusage.ru_stime.tv_sec * 1000000LL) + rusage.ru_stime.tv_usec;
 #elif HAVE_GETPROCESSTIMES
     HANDLE proc;
     FILETIME c, e, k, u;
     proc = GetCurrentProcess();
     GetProcessTimes(proc, &c, &e, &k, &u);
-    return ((int64_t) u.dwHighDateTime << 32 | u.dwLowDateTime) / 10;
+    time_stamps.user_usec =
+        ((int64_t)u.dwHighDateTime << 32 | u.dwLowDateTime) / 10;
+    time_stamps.sys_usec =
+        ((int64_t)k.dwHighDateTime << 32 | k.dwLowDateTime) / 10;
 #else
-    return av_gettime_relative();
+    time_stamps.user_usec = time_stamps.sys_usec = 0;
 #endif
+    return time_stamps;
 }
 
 static int64_t getmaxrss(void)
@@ -4771,7 +4842,7 @@
 int main(int argc, char **argv)
 {
     int i, ret;
-    int64_t ti;
+    BenchmarkTimeStamps ti;
 
     init_dynload();
 
@@ -4789,12 +4860,9 @@
         argv++;
     }
 
-    avcodec_register_all();
 #if CONFIG_AVDEVICE
     avdevice_register_all();
 #endif
-    avfilter_register_all();
-    av_register_all();
     avformat_network_init();
 
     show_banner(argc, argv, options);
@@ -4826,12 +4894,18 @@
             want_sdp = 0;
     }
 
-    current_time = ti = getutime();
+    current_time = ti = get_benchmark_time_stamps();
     if (transcode() < 0)
         exit_program(1);
-    ti = getutime() - ti;
     if (do_benchmark) {
-        av_log(NULL, AV_LOG_INFO, "bench: utime=%0.3fs\n", ti / 1000000.0);
+        int64_t utime, stime, rtime;
+        current_time = get_benchmark_time_stamps();
+        utime = current_time.user_usec - ti.user_usec;
+        stime = current_time.sys_usec  - ti.sys_usec;
+        rtime = current_time.real_usec - ti.real_usec;
+        av_log(NULL, AV_LOG_INFO,
+               "bench: utime=%0.3fs stime=%0.3fs rtime=%0.3fs\n",
+               utime / 1000000.0, stime / 1000000.0, rtime / 1000000.0);
     }
     av_log(NULL, AV_LOG_DEBUG, "%"PRIu64" frames successfully decoded, %"PRIu64" decoding errors\n",
            decode_error_stat[0], decode_error_stat[1]);

diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index f6c76bc..eb1eaf6 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h

@@ -25,10 +25,6 @@
 #include <stdio.h>
 #include <signal.h>
 
-#if HAVE_PTHREADS
-#include <pthread.h>
-#endif
-
 #include "cmdutils.h"
 
 #include "libavformat/avformat.h"
@@ -45,6 +41,7 @@
 #include "libavutil/hwcontext.h"
 #include "libavutil/pixfmt.h"
 #include "libavutil/rational.h"
+#include "libavutil/thread.h"
 #include "libavutil/threadmessage.h"
 
 #include "libswresample/swresample.h"
@@ -61,14 +58,10 @@
 enum HWAccelID {
     HWACCEL_NONE = 0,
     HWACCEL_AUTO,
-    HWACCEL_VDPAU,
-    HWACCEL_DXVA2,
-    HWACCEL_VDA,
+    HWACCEL_GENERIC,
     HWACCEL_VIDEOTOOLBOX,
     HWACCEL_QSV,
-    HWACCEL_VAAPI,
     HWACCEL_CUVID,
-    HWACCEL_D3D11VA,
 };
 
 typedef struct HWAccel {
@@ -76,7 +69,6 @@
     int (*init)(AVCodecContext *s);
     enum HWAccelID id;
     enum AVPixelFormat pix_fmt;
-    enum AVHWDeviceType device_type;
 } HWAccel;
 
 typedef struct HWDevice {
@@ -161,6 +153,7 @@
     float mux_preload;
     float mux_max_delay;
     int shortest;
+    int bitexact;
 
     int video_disable;
     int audio_disable;
@@ -369,11 +362,11 @@
 
     /* hwaccel options */
     enum HWAccelID hwaccel_id;
+    enum AVHWDeviceType hwaccel_device_type;
     char  *hwaccel_device;
     enum AVPixelFormat hwaccel_output_format;
 
     /* hwaccel context */
-    enum HWAccelID active_hwaccel_id;
     void  *hwaccel_ctx;
     void (*hwaccel_uninit)(AVCodecContext *s);
     int  (*hwaccel_get_buffer)(AVCodecContext *s, AVFrame *frame, int flags);
@@ -419,7 +412,7 @@
     int rate_emu;
     int accurate_seek;
 
-#if HAVE_PTHREADS
+#if HAVE_THREADS
     AVThreadMessageQueue *in_thread_queue;
     pthread_t thread;           /* thread reading from this file */
     int non_blocking;           /* reading packets from the thread should not block */
@@ -491,6 +484,7 @@
     AVRational frame_aspect_ratio;
 
     /* forced key frames */
+    int64_t forced_kf_ref_pts;
     int64_t *forced_kf_pts;
     int forced_kf_count;
     int forced_kf_index;
@@ -533,9 +527,6 @@
 
     int keep_pix_fmt;
 
-    AVCodecParserContext *parser;
-    AVCodecContext       *parser_avctx;
-
     /* stats */
     // combined size of all the packets written
     uint64_t data_size;
@@ -624,7 +615,6 @@
 
 extern const OptionDef options[];
 extern const HWAccel hwaccels[];
-extern int hwaccel_lax_profile_check;
 extern AVBufferRef *hw_device_ctx;
 #if CONFIG_QSV
 extern char *qsv_device;
@@ -662,7 +652,6 @@
 
 int ffmpeg_parse_options(int argc, char **argv);
 
-int vda_init(AVCodecContext *s);
 int videotoolbox_init(AVCodecContext *s);
 int qsv_init(AVCodecContext *s);
 int cuvid_init(AVCodecContext *s);

diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
index aacc185..6518d50 100644
--- a/fftools/ffmpeg_filter.c
+++ b/fftools/ffmpeg_filter.c

@@ -65,6 +65,7 @@
     if (codec && codec->pix_fmts) {
         const enum AVPixelFormat *p = codec->pix_fmts;
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(target);
+        //FIXME: This should check for AV_PIX_FMT_FLAG_ALPHA after PAL8 pixel format without alpha is implemented
         int has_alpha = desc ? desc->nb_components % 2 == 0 : 0;
         enum AVPixelFormat best= AV_PIX_FMT_NONE;
 
@@ -340,6 +341,7 @@
     graph = avfilter_graph_alloc();
     if (!graph)
         return AVERROR(ENOMEM);
+    graph->nb_threads = 1;
 
     ret = avfilter_graph_parse2(graph, fg->graph_desc, &inputs, &outputs);
     if (ret < 0)
@@ -773,7 +775,7 @@
     sar = ifilter->sample_aspect_ratio;
     if(!sar.den)
         sar = (AVRational){0,1};
-    av_bprint_init(&args, 0, 1);
+    av_bprint_init(&args, 0, AV_BPRINT_SIZE_AUTOMATIC);
     av_bprintf(&args,
              "video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:"
              "pixel_aspect=%d/%d:sws_param=flags=%d",

diff --git a/fftools/ffmpeg_hw.c b/fftools/ffmpeg_hw.c
index a4d1cad..2ec1813 100644
--- a/fftools/ffmpeg_hw.c
+++ b/fftools/ffmpeg_hw.c

@@ -64,6 +64,31 @@
     return hw_devices[nb_hw_devices++];
 }
 
+static char *hw_device_default_name(enum AVHWDeviceType type)
+{
+    // Make an automatic name of the form "type%d".  We arbitrarily
+    // limit at 1000 anonymous devices of the same type - there is
+    // probably something else very wrong if you get to this limit.
+    const char *type_name = av_hwdevice_get_type_name(type);
+    char *name;
+    size_t index_pos;
+    int index, index_limit = 1000;
+    index_pos = strlen(type_name);
+    name = av_malloc(index_pos + 4);
+    if (!name)
+        return NULL;
+    for (index = 0; index < index_limit; index++) {
+        snprintf(name, index_pos + 4, "%s%d", type_name, index);
+        if (!hw_device_get_by_name(name))
+            break;
+    }
+    if (index >= index_limit) {
+        av_freep(&name);
+        return NULL;
+    }
+    return name;
+}
+
 int hw_device_init_from_string(const char *arg, HWDevice **dev_out)
 {
     // "type=name:device,key=value,key2=value2"
@@ -111,27 +136,11 @@
 
         p += 1 + k;
     } else {
-        // Give the device an automatic name of the form "type%d".
-        // We arbitrarily limit at 1000 anonymous devices of the same
-        // type - there is probably something else very wrong if you
-        // get to this limit.
-        size_t index_pos;
-        int index, index_limit = 1000;
-        index_pos = strlen(type_name);
-        name = av_malloc(index_pos + 4);
+        name = hw_device_default_name(type);
         if (!name) {
             err = AVERROR(ENOMEM);
             goto fail;
         }
-        for (index = 0; index < index_limit; index++) {
-            snprintf(name, index_pos + 4, "%s%d", type_name, index);
-            if (!hw_device_get_by_name(name))
-                break;
-        }
-        if (index >= index_limit) {
-            errmsg = "too many devices";
-            goto invalid;
-        }
     }
 
     if (!*p) {
@@ -214,6 +223,49 @@
     goto done;
 }
 
+static int hw_device_init_from_type(enum AVHWDeviceType type,
+                                    const char *device,
+                                    HWDevice **dev_out)
+{
+    AVBufferRef *device_ref = NULL;
+    HWDevice *dev;
+    char *name;
+    int err;
+
+    name = hw_device_default_name(type);
+    if (!name) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    err = av_hwdevice_ctx_create(&device_ref, type, device, NULL, 0);
+    if (err < 0) {
+        av_log(NULL, AV_LOG_ERROR,
+               "Device creation failed: %d.\n", err);
+        goto fail;
+    }
+
+    dev = hw_device_add();
+    if (!dev) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    dev->name = name;
+    dev->type = type;
+    dev->device_ref = device_ref;
+
+    if (dev_out)
+        *dev_out = dev;
+
+    return 0;
+
+fail:
+    av_freep(&name);
+    av_buffer_unref(&device_ref);
+    return err;
+}
+
 void hw_device_free_all(void)
 {
     int i;
@@ -226,80 +278,130 @@
     nb_hw_devices = 0;
 }
 
-static enum AVHWDeviceType hw_device_match_type_by_hwaccel(enum HWAccelID hwaccel_id)
+static HWDevice *hw_device_match_by_codec(const AVCodec *codec)
 {
+    const AVCodecHWConfig *config;
+    HWDevice *dev;
     int i;
-    if (hwaccel_id == HWACCEL_NONE)
-        return AV_HWDEVICE_TYPE_NONE;
-    for (i = 0; hwaccels[i].name; i++) {
-        if (hwaccels[i].id == hwaccel_id)
-            return hwaccels[i].device_type;
+    for (i = 0;; i++) {
+        config = avcodec_get_hw_config(codec, i);
+        if (!config)
+            return NULL;
+        if (!(config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX))
+            continue;
+        dev = hw_device_get_by_type(config->device_type);
+        if (dev)
+            return dev;
     }
-    return AV_HWDEVICE_TYPE_NONE;
-}
-
-static enum AVHWDeviceType hw_device_match_type_in_name(const char *codec_name)
-{
-    const char *type_name;
-    enum AVHWDeviceType type;
-    for (type = av_hwdevice_iterate_types(AV_HWDEVICE_TYPE_NONE);
-         type != AV_HWDEVICE_TYPE_NONE;
-         type = av_hwdevice_iterate_types(type)) {
-        type_name = av_hwdevice_get_type_name(type);
-        if (strstr(codec_name, type_name))
-            return type;
-    }
-    return AV_HWDEVICE_TYPE_NONE;
 }
 
 int hw_device_setup_for_decode(InputStream *ist)
 {
+    const AVCodecHWConfig *config;
     enum AVHWDeviceType type;
-    HWDevice *dev;
-    int err;
+    HWDevice *dev = NULL;
+    int err, auto_device = 0;
 
     if (ist->hwaccel_device) {
         dev = hw_device_get_by_name(ist->hwaccel_device);
         if (!dev) {
-            char *tmp;
-            type = hw_device_match_type_by_hwaccel(ist->hwaccel_id);
-            if (type == AV_HWDEVICE_TYPE_NONE) {
-                // No match - this isn't necessarily invalid, though,
-                // because an explicit device might not be needed or
-                // the hwaccel setup could be handled elsewhere.
+            if (ist->hwaccel_id == HWACCEL_AUTO) {
+                auto_device = 1;
+            } else if (ist->hwaccel_id == HWACCEL_GENERIC) {
+                type = ist->hwaccel_device_type;
+                err = hw_device_init_from_type(type, ist->hwaccel_device,
+                                               &dev);
+            } else {
+                // This will be dealt with by API-specific initialisation
+                // (using hwaccel_device), so nothing further needed here.
                 return 0;
             }
-            tmp = av_asprintf("%s:%s", av_hwdevice_get_type_name(type),
-                              ist->hwaccel_device);
-            if (!tmp)
-                return AVERROR(ENOMEM);
-            err = hw_device_init_from_string(tmp, &dev);
-            av_free(tmp);
-            if (err < 0)
-                return err;
+        } else {
+            if (ist->hwaccel_id == HWACCEL_AUTO) {
+                ist->hwaccel_device_type = dev->type;
+            } else if (ist->hwaccel_device_type != dev->type) {
+                av_log(ist->dec_ctx, AV_LOG_ERROR, "Invalid hwaccel device "
+                       "specified for decoder: device %s of type %s is not "
+                       "usable with hwaccel %s.\n", dev->name,
+                       av_hwdevice_get_type_name(dev->type),
+                       av_hwdevice_get_type_name(ist->hwaccel_device_type));
+                return AVERROR(EINVAL);
+            }
         }
     } else {
-        if (ist->hwaccel_id != HWACCEL_NONE)
-            type = hw_device_match_type_by_hwaccel(ist->hwaccel_id);
-        else
-            type = hw_device_match_type_in_name(ist->dec->name);
-        if (type != AV_HWDEVICE_TYPE_NONE) {
+        if (ist->hwaccel_id == HWACCEL_AUTO) {
+            auto_device = 1;
+        } else if (ist->hwaccel_id == HWACCEL_GENERIC) {
+            type = ist->hwaccel_device_type;
             dev = hw_device_get_by_type(type);
-            if (!dev) {
-                hw_device_init_from_string(av_hwdevice_get_type_name(type),
-                                           &dev);
-            }
+            if (!dev)
+                err = hw_device_init_from_type(type, NULL, &dev);
         } else {
-            // No device required.
+            dev = hw_device_match_by_codec(ist->dec);
+            if (!dev) {
+                // No device for this codec, but not using generic hwaccel
+                // and therefore may well not need one - ignore.
+                return 0;
+            }
+        }
+    }
+
+    if (auto_device) {
+        int i;
+        if (!avcodec_get_hw_config(ist->dec, 0)) {
+            // Decoder does not support any hardware devices.
+            return 0;
+        }
+        for (i = 0; !dev; i++) {
+            config = avcodec_get_hw_config(ist->dec, i);
+            if (!config)
+                break;
+            type = config->device_type;
+            dev = hw_device_get_by_type(type);
+            if (dev) {
+                av_log(ist->dec_ctx, AV_LOG_INFO, "Using auto "
+                       "hwaccel type %s with existing device %s.\n",
+                       av_hwdevice_get_type_name(type), dev->name);
+            }
+        }
+        for (i = 0; !dev; i++) {
+            config = avcodec_get_hw_config(ist->dec, i);
+            if (!config)
+                break;
+            type = config->device_type;
+            // Try to make a new device of this type.
+            err = hw_device_init_from_type(type, ist->hwaccel_device,
+                                           &dev);
+            if (err < 0) {
+                // Can't make a device of this type.
+                continue;
+            }
+            if (ist->hwaccel_device) {
+                av_log(ist->dec_ctx, AV_LOG_INFO, "Using auto "
+                       "hwaccel type %s with new device created "
+                       "from %s.\n", av_hwdevice_get_type_name(type),
+                       ist->hwaccel_device);
+            } else {
+                av_log(ist->dec_ctx, AV_LOG_INFO, "Using auto "
+                       "hwaccel type %s with new default device.\n",
+                       av_hwdevice_get_type_name(type));
+            }
+        }
+        if (dev) {
+            ist->hwaccel_device_type = type;
+        } else {
+            av_log(ist->dec_ctx, AV_LOG_INFO, "Auto hwaccel "
+                   "disabled: no device found.\n");
+            ist->hwaccel_id = HWACCEL_NONE;
             return 0;
         }
     }
 
     if (!dev) {
-        av_log(ist->dec_ctx, AV_LOG_WARNING, "No device available "
-               "for decoder (device type %s for codec %s).\n",
+        av_log(ist->dec_ctx, AV_LOG_ERROR, "No device available "
+               "for decoder: device type %s needed for codec %s.\n",
                av_hwdevice_get_type_name(type), ist->dec->name);
-        return 0;
+        return err;
     }
 
     ist->dec_ctx->hw_device_ctx = av_buffer_ref(dev->device_ref);
@@ -311,24 +413,16 @@
 
 int hw_device_setup_for_encode(OutputStream *ost)
 {
-    enum AVHWDeviceType type;
     HWDevice *dev;
 
-    type = hw_device_match_type_in_name(ost->enc->name);
-    if (type != AV_HWDEVICE_TYPE_NONE) {
-        dev = hw_device_get_by_type(type);
-        if (!dev) {
-            av_log(ost->enc_ctx, AV_LOG_WARNING, "No device available "
-                   "for encoder (device type %s for codec %s).\n",
-                   av_hwdevice_get_type_name(type), ost->enc->name);
-            return 0;
-        }
+    dev = hw_device_match_by_codec(ost->enc);
+    if (dev) {
         ost->enc_ctx->hw_device_ctx = av_buffer_ref(dev->device_ref);
         if (!ost->enc_ctx->hw_device_ctx)
             return AVERROR(ENOMEM);
         return 0;
     } else {
-        // No device required.
+        // No device required, or no device available.
         return 0;
     }
 }

diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index 100fa76..d4851a2 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c

@@ -66,41 +66,17 @@
 }
 
 const HWAccel hwaccels[] = {
-#if HAVE_VDPAU_X11
-    { "vdpau", hwaccel_decode_init, HWACCEL_VDPAU, AV_PIX_FMT_VDPAU,
-      AV_HWDEVICE_TYPE_VDPAU },
-#endif
-#if CONFIG_D3D11VA
-    { "d3d11va", hwaccel_decode_init, HWACCEL_D3D11VA, AV_PIX_FMT_D3D11,
-      AV_HWDEVICE_TYPE_D3D11VA },
-#endif
-#if CONFIG_DXVA2
-    { "dxva2", hwaccel_decode_init, HWACCEL_DXVA2, AV_PIX_FMT_DXVA2_VLD,
-      AV_HWDEVICE_TYPE_DXVA2 },
-#endif
-#if CONFIG_VDA
-    { "vda",   videotoolbox_init,   HWACCEL_VDA,   AV_PIX_FMT_VDA,
-      AV_HWDEVICE_TYPE_NONE },
-#endif
 #if CONFIG_VIDEOTOOLBOX
-    { "videotoolbox",   videotoolbox_init,   HWACCEL_VIDEOTOOLBOX,   AV_PIX_FMT_VIDEOTOOLBOX,
-      AV_HWDEVICE_TYPE_NONE },
+    { "videotoolbox", videotoolbox_init, HWACCEL_VIDEOTOOLBOX, AV_PIX_FMT_VIDEOTOOLBOX },
 #endif
 #if CONFIG_LIBMFX
-    { "qsv",   qsv_init,   HWACCEL_QSV,   AV_PIX_FMT_QSV,
-      AV_HWDEVICE_TYPE_NONE },
-#endif
-#if CONFIG_VAAPI
-    { "vaapi", hwaccel_decode_init, HWACCEL_VAAPI, AV_PIX_FMT_VAAPI,
-      AV_HWDEVICE_TYPE_VAAPI },
+    { "qsv",   qsv_init,   HWACCEL_QSV,   AV_PIX_FMT_QSV },
 #endif
 #if CONFIG_CUVID
-    { "cuvid", cuvid_init, HWACCEL_CUVID, AV_PIX_FMT_CUDA,
-      AV_HWDEVICE_TYPE_NONE },
+    { "cuvid", cuvid_init, HWACCEL_CUVID, AV_PIX_FMT_CUDA },
 #endif
     { 0 },
 };
-int hwaccel_lax_profile_check = 0;
 AVBufferRef *hw_device_ctx;
 HWDevice *filter_hw_device;
 
@@ -141,7 +117,6 @@
 static int no_file_overwrite  = 0;
 static int do_psnr            = 0;
 static int input_sync;
-static int override_ffserver  = 0;
 static int input_stream_potentially_available = 0;
 static int ignore_unknown_streams = 0;
 static int copy_unknown_streams = 0;
@@ -195,12 +170,15 @@
 
 static int show_hwaccels(void *optctx, const char *opt, const char *arg)
 {
+    enum AVHWDeviceType type = AV_HWDEVICE_TYPE_NONE;
     int i;
 
     printf("Hardware acceleration methods:\n");
-    for (i = 0; hwaccels[i].name; i++) {
+    while ((type = av_hwdevice_iterate_types(type)) !=
+           AV_HWDEVICE_TYPE_NONE)
+        printf("%s\n", av_hwdevice_get_type_name(type));
+    for (i = 0; hwaccels[i].name; i++)
         printf("%s\n", hwaccels[i].name);
-    }
     printf("\n");
     return 0;
 }
@@ -723,7 +701,8 @@
         AVStream *st = ic->streams[i];
         AVCodecParameters *par = st->codecpar;
         InputStream *ist = av_mallocz(sizeof(*ist));
-        char *framerate = NULL, *hwaccel = NULL, *hwaccel_device = NULL;
+        char *framerate = NULL, *hwaccel_device = NULL;
+        const char *hwaccel = NULL;
         char *hwaccel_output_format = NULL;
         char *codec_tag = NULL;
         char *next;
@@ -787,20 +766,20 @@
             exit_program(1);
         }
 
+        if (o->bitexact)
+            ist->dec_ctx->flags |= AV_CODEC_FLAG_BITEXACT;
+
         switch (par->codec_type) {
         case AVMEDIA_TYPE_VIDEO:
             if(!ist->dec)
                 ist->dec = avcodec_find_decoder(par->codec_id);
 #if FF_API_LOWRES
-            if (av_codec_get_lowres(st->codec)) {
-                av_codec_set_lowres(ist->dec_ctx, av_codec_get_lowres(st->codec));
+            if (st->codec->lowres) {
+                ist->dec_ctx->lowres = st->codec->lowres;
                 ist->dec_ctx->width  = st->codec->width;
                 ist->dec_ctx->height = st->codec->height;
                 ist->dec_ctx->coded_width  = st->codec->coded_width;
                 ist->dec_ctx->coded_height = st->codec->coded_height;
-#if FF_API_EMU_EDGE
-                ist->dec_ctx->flags |= CODEC_FLAG_EMU_EDGE;
-#endif
             }
 #endif
 
@@ -820,11 +799,16 @@
 
             MATCH_PER_STREAM_OPT(hwaccels, str, hwaccel, ic, st);
             if (hwaccel) {
+                // The NVDEC hwaccels use a CUDA device, so remap the name here.
+                if (!strcmp(hwaccel, "nvdec"))
+                    hwaccel = "cuda";
+
                 if (!strcmp(hwaccel, "none"))
                     ist->hwaccel_id = HWACCEL_NONE;
                 else if (!strcmp(hwaccel, "auto"))
                     ist->hwaccel_id = HWACCEL_AUTO;
                 else {
+                    enum AVHWDeviceType type;
                     int i;
                     for (i = 0; hwaccels[i].name; i++) {
                         if (!strcmp(hwaccels[i].name, hwaccel)) {
@@ -834,9 +818,22 @@
                     }
 
                     if (!ist->hwaccel_id) {
+                        type = av_hwdevice_find_type_by_name(hwaccel);
+                        if (type != AV_HWDEVICE_TYPE_NONE) {
+                            ist->hwaccel_id = HWACCEL_GENERIC;
+                            ist->hwaccel_device_type = type;
+                        }
+                    }
+
+                    if (!ist->hwaccel_id) {
                         av_log(NULL, AV_LOG_FATAL, "Unrecognized hwaccel: %s.\n",
                                hwaccel);
                         av_log(NULL, AV_LOG_FATAL, "Supported hwaccels: ");
+                        type = AV_HWDEVICE_TYPE_NONE;
+                        while ((type = av_hwdevice_iterate_types(type)) !=
+                               AV_HWDEVICE_TYPE_NONE)
+                            av_log(NULL, AV_LOG_FATAL, "%s ",
+                                   av_hwdevice_get_type_name(type));
                         for (i = 0; hwaccels[i].name; i++)
                             av_log(NULL, AV_LOG_FATAL, "%s ", hwaccels[i].name);
                         av_log(NULL, AV_LOG_FATAL, "\n");
@@ -903,13 +900,14 @@
 
 static void assert_file_overwrite(const char *filename)
 {
+    const char *proto_name = avio_find_protocol_name(filename);
+
     if (file_overwrite && no_file_overwrite) {
         fprintf(stderr, "Error, both -y and -n supplied. Exiting.\n");
         exit_program(1);
     }
 
     if (!file_overwrite) {
-        const char *proto_name = avio_find_protocol_name(filename);
         if (proto_name && !strcmp(proto_name, "file") && avio_check(filename, 0) == 0) {
             if (stdin_interaction && !no_file_overwrite) {
                 fprintf(stderr,"File '%s' already exists. Overwrite ? [y/N] ", filename);
@@ -928,6 +926,19 @@
             }
         }
     }
+
+    if (proto_name && !strcmp(proto_name, "file")) {
+        for (int i = 0; i < nb_input_files; i++) {
+             InputFile *file = input_files[i];
+             if (file->ctx->iformat->flags & AVFMT_NOFILE)
+                 continue;
+             if (!strcmp(filename, file->ctx->url)) {
+                 av_log(NULL, AV_LOG_FATAL, "Output %s same as Input #%d - exiting\n", filename, i);
+                 av_log(NULL, AV_LOG_WARNING, "FFmpeg cannot edit existing files in-place.\n");
+                 exit_program(1);
+             }
+        }
+    }
 }
 
 static void dump_attachment(AVStream *st, const char *filename)
@@ -977,6 +988,21 @@
     char *    data_codec_name = NULL;
     int scan_all_pmts_set = 0;
 
+    if (o->stop_time != INT64_MAX && o->recording_time != INT64_MAX) {
+        o->stop_time = INT64_MAX;
+        av_log(NULL, AV_LOG_WARNING, "-t and -to cannot be used together; using -t.\n");
+    }
+
+    if (o->stop_time != INT64_MAX && o->recording_time == INT64_MAX) {
+        int64_t start_time = o->start_time == AV_NOPTS_VALUE ? 0 : o->start_time;
+        if (o->stop_time <= start_time) {
+            av_log(NULL, AV_LOG_ERROR, "-to value smaller than -ss; aborting.\n");
+            exit_program(1);
+        } else {
+            o->recording_time = o->stop_time - start_time;
+        }
+    }
+
     if (o->format) {
         if (!(file_iformat = av_find_input_format(o->format))) {
             av_log(NULL, AV_LOG_FATAL, "Unknown input format: '%s'\n", o->format);
@@ -996,7 +1022,6 @@
         print_error(filename, AVERROR(ENOMEM));
         exit_program(1);
     }
-    ic->flags |= AVFMT_FLAG_KEEP_SIDE_DATA;
     if (o->nb_audio_sample_rate) {
         av_dict_set_int(&o->g->format_opts, "sample_rate", o->audio_sample_rate[o->nb_audio_sample_rate - 1].u.i, 0);
     }
@@ -1031,25 +1056,23 @@
     MATCH_PER_TYPE_OPT(codec_names, str, subtitle_codec_name, ic, "s");
     MATCH_PER_TYPE_OPT(codec_names, str,     data_codec_name, ic, "d");
 
-    ic->video_codec_id   = video_codec_name ?
-        find_codec_or_die(video_codec_name   , AVMEDIA_TYPE_VIDEO   , 0)->id : AV_CODEC_ID_NONE;
-    ic->audio_codec_id   = audio_codec_name ?
-        find_codec_or_die(audio_codec_name   , AVMEDIA_TYPE_AUDIO   , 0)->id : AV_CODEC_ID_NONE;
-    ic->subtitle_codec_id= subtitle_codec_name ?
-        find_codec_or_die(subtitle_codec_name, AVMEDIA_TYPE_SUBTITLE, 0)->id : AV_CODEC_ID_NONE;
-    ic->data_codec_id    = data_codec_name ?
-        find_codec_or_die(data_codec_name, AVMEDIA_TYPE_DATA, 0)->id : AV_CODEC_ID_NONE;
-
     if (video_codec_name)
-        av_format_set_video_codec   (ic, find_codec_or_die(video_codec_name   , AVMEDIA_TYPE_VIDEO   , 0));
+        ic->video_codec    = find_codec_or_die(video_codec_name   , AVMEDIA_TYPE_VIDEO   , 0);
     if (audio_codec_name)
-        av_format_set_audio_codec   (ic, find_codec_or_die(audio_codec_name   , AVMEDIA_TYPE_AUDIO   , 0));
+        ic->audio_codec    = find_codec_or_die(audio_codec_name   , AVMEDIA_TYPE_AUDIO   , 0);
     if (subtitle_codec_name)
-        av_format_set_subtitle_codec(ic, find_codec_or_die(subtitle_codec_name, AVMEDIA_TYPE_SUBTITLE, 0));
+        ic->subtitle_codec = find_codec_or_die(subtitle_codec_name, AVMEDIA_TYPE_SUBTITLE, 0);
     if (data_codec_name)
-        av_format_set_data_codec(ic, find_codec_or_die(data_codec_name, AVMEDIA_TYPE_DATA, 0));
+        ic->data_codec     = find_codec_or_die(data_codec_name    , AVMEDIA_TYPE_DATA    , 0);
+
+    ic->video_codec_id     = video_codec_name    ? ic->video_codec->id    : AV_CODEC_ID_NONE;
+    ic->audio_codec_id     = audio_codec_name    ? ic->audio_codec->id    : AV_CODEC_ID_NONE;
+    ic->subtitle_codec_id  = subtitle_codec_name ? ic->subtitle_codec->id : AV_CODEC_ID_NONE;
+    ic->data_codec_id      = data_codec_name     ? ic->data_codec->id     : AV_CODEC_ID_NONE;
 
     ic->flags |= AVFMT_FLAG_NONBLOCK;
+    if (o->bitexact)
+        ic->flags |= AVFMT_FLAG_BITEXACT;
     ic->interrupt_callback = int_cb;
 
     if (!av_dict_get(o->g->format_opts, "scan_all_pmts", NULL, AV_DICT_MATCH_CASE)) {
@@ -1094,9 +1117,22 @@
         }
     }
 
+    if (o->start_time != AV_NOPTS_VALUE && o->start_time_eof != AV_NOPTS_VALUE) {
+        av_log(NULL, AV_LOG_WARNING, "Cannot use -ss and -sseof both, using -ss for %s\n", filename);
+        o->start_time_eof = AV_NOPTS_VALUE;
+    }
+
     if (o->start_time_eof != AV_NOPTS_VALUE) {
-        if (ic->duration>0) {
+        if (o->start_time_eof >= 0) {
+            av_log(NULL, AV_LOG_ERROR, "-sseof value must be negative; aborting\n");
+            exit_program(1);
+        }
+        if (ic->duration > 0) {
             o->start_time = o->start_time_eof + ic->duration;
+            if (o->start_time < 0) {
+                av_log(NULL, AV_LOG_WARNING, "-sseof value seeks to before start of file %s; ignored\n", filename);
+                o->start_time = AV_NOPTS_VALUE;
+            }
         } else
             av_log(NULL, AV_LOG_WARNING, "Cannot use -sseof, duration of %s not known\n", filename);
     }
@@ -1113,8 +1149,10 @@
             int dts_heuristic = 0;
             for (i=0; i<ic->nb_streams; i++) {
                 const AVCodecParameters *par = ic->streams[i]->codecpar;
-                if (par->video_delay)
+                if (par->video_delay) {
                     dts_heuristic = 1;
+                    break;
+                }
             }
             if (dts_heuristic) {
                 seek_timestamp -= 3*AV_TIME_BASE / 23;
@@ -1151,7 +1189,7 @@
     f->loop = o->loop;
     f->duration = 0;
     f->time_base = (AVRational){ 1, 1 };
-#if HAVE_PTHREADS
+#if HAVE_THREADS
     f->thread_queue_size = o->thread_queue_size > 0 ? o->thread_queue_size : 8;
 #endif
 
@@ -1262,7 +1300,7 @@
     if (type == AVMEDIA_TYPE_VIDEO || type == AVMEDIA_TYPE_AUDIO || type == AVMEDIA_TYPE_SUBTITLE) {
         MATCH_PER_STREAM_OPT(codec_names, str, codec_name, s, ost->st);
         if (!codec_name) {
-            ost->st->codecpar->codec_id = av_guess_codec(s->oformat, NULL, s->filename,
+            ost->st->codecpar->codec_id = av_guess_codec(s->oformat, NULL, s->url,
                                                          NULL, ost->st->codecpar->codec_type);
             ost->enc = avcodec_find_encoder(ost->st->codecpar->codec_id);
             if (!ost->enc) {
@@ -1315,6 +1353,7 @@
     ost->file_index = nb_output_files - 1;
     ost->index      = idx;
     ost->st         = st;
+    ost->forced_kf_ref_pts = AV_NOPTS_VALUE;
     st->codecpar->codec_type = type;
 
     ret = choose_encoder(o, oc, ost);
@@ -1371,6 +1410,10 @@
         ost->encoder_opts = filter_codec_opts(o->g->codec_opts, AV_CODEC_ID_NONE, oc, st, NULL);
     }
 
+
+    if (o->bitexact)
+        ost->enc_ctx->flags |= AV_CODEC_FLAG_BITEXACT;
+
     MATCH_PER_STREAM_OPT(time_bases, str, time_base, oc, st);
     if (time_base) {
         AVRational q;
@@ -1663,7 +1706,7 @@
                 av_log(NULL, AV_LOG_FATAL, "Could not allocate memory for intra matrix.\n");
                 exit_program(1);
             }
-            av_codec_set_chroma_intra_matrix(video_enc, p);
+            video_enc->chroma_intra_matrix = p;
             parse_matrix_coeffs(p, chroma_intra_matrix);
         }
         MATCH_PER_STREAM_OPT(inter_matrices, str, inter_matrix, oc, st);
@@ -1983,57 +2026,6 @@
     return 0;
 }
 
-static int read_ffserver_streams(OptionsContext *o, AVFormatContext *s, const char *filename)
-{
-    int i, err;
-    AVFormatContext *ic = avformat_alloc_context();
-
-    ic->flags |= AVFMT_FLAG_KEEP_SIDE_DATA;
-    ic->interrupt_callback = int_cb;
-    err = avformat_open_input(&ic, filename, NULL, NULL);
-    if (err < 0)
-        return err;
-    /* copy stream format */
-    for(i=0;i<ic->nb_streams;i++) {
-        AVStream *st;
-        OutputStream *ost;
-        AVCodec *codec;
-        const char *enc_config;
-
-        codec = avcodec_find_encoder(ic->streams[i]->codecpar->codec_id);
-        if (!codec) {
-            av_log(s, AV_LOG_ERROR, "no encoder found for codec id %i\n", ic->streams[i]->codecpar->codec_id);
-            return AVERROR(EINVAL);
-        }
-        if (codec->type == AVMEDIA_TYPE_AUDIO)
-            opt_audio_codec(o, "c:a", codec->name);
-        else if (codec->type == AVMEDIA_TYPE_VIDEO)
-            opt_video_codec(o, "c:v", codec->name);
-        ost   = new_output_stream(o, s, codec->type, -1);
-        st    = ost->st;
-
-        avcodec_get_context_defaults3(st->codec, codec);
-        enc_config = av_stream_get_recommended_encoder_configuration(ic->streams[i]);
-        if (enc_config) {
-            AVDictionary *opts = NULL;
-            av_dict_parse_string(&opts, enc_config, "=", ",", 0);
-            av_opt_set_dict2(st->codec, &opts, AV_OPT_SEARCH_CHILDREN);
-            av_dict_free(&opts);
-        }
-
-        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && !ost->stream_copy)
-            choose_sample_fmt(st, codec);
-        else if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && !ost->stream_copy)
-            choose_pixel_fmt(st, st->codec, codec, st->codecpar->format);
-        avcodec_copy_context(ost->enc_ctx, st->codec);
-        if (enc_config)
-            av_dict_parse_string(&ost->encoder_opts, enc_config, "=", ",", 0);
-    }
-
-    avformat_close_input(&ic);
-    return err;
-}
-
 static void init_output_filter(OutputFilter *ofilter, OptionsContext *o,
                                AVFormatContext *oc)
 {
@@ -2092,7 +2084,6 @@
 {
     AVFormatContext *oc;
     int i, j, err;
-    AVOutputFormat *file_oformat;
     OutputFile *of;
     OutputStream *ost;
     InputStream  *ist;
@@ -2141,7 +2132,6 @@
     if (o->recording_time != INT64_MAX)
         oc->duration = o->recording_time;
 
-    file_oformat= oc->oformat;
     oc->interrupt_callback = int_cb;
 
     e = av_dict_get(o->g->format_opts, "fflags", NULL, 0);
@@ -2149,6 +2139,10 @@
         const AVOption *o = av_opt_find(oc, "fflags", NULL, 0, 0);
         av_opt_eval_flags(oc, o, e->value, &format_flags);
     }
+    if (o->bitexact) {
+        format_flags |= AVFMT_FLAG_BITEXACT;
+        oc->flags    |= AVFMT_FLAG_BITEXACT;
+    }
 
     /* create streams for all unlabeled output pads */
     for (i = 0; i < nb_filtergraphs; i++) {
@@ -2168,47 +2162,7 @@
         }
     }
 
-    /* ffserver seeking with date=... needs a date reference */
-    if (!strcmp(file_oformat->name, "ffm") &&
-        !(format_flags & AVFMT_FLAG_BITEXACT) &&
-        av_strstart(filename, "http:", NULL)) {
-        int err = parse_option(o, "metadata", "creation_time=now", options);
-        if (err < 0) {
-            print_error(filename, err);
-            exit_program(1);
-        }
-    }
-
-    if (!strcmp(file_oformat->name, "ffm") && !override_ffserver &&
-        av_strstart(filename, "http:", NULL)) {
-        int j;
-        /* special case for files sent to ffserver: we get the stream
-           parameters from ffserver */
-        int err = read_ffserver_streams(o, oc, filename);
-        if (err < 0) {
-            print_error(filename, err);
-            exit_program(1);
-        }
-        for(j = nb_output_streams - oc->nb_streams; j < nb_output_streams; j++) {
-            ost = output_streams[j];
-            for (i = 0; i < nb_input_streams; i++) {
-                ist = input_streams[i];
-                if(ist->st->codecpar->codec_type == ost->st->codecpar->codec_type){
-                    ost->sync_ist= ist;
-                    ost->source_index= i;
-                    if(ost->st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) ost->avfilter = av_strdup("anull");
-                    if(ost->st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) ost->avfilter = av_strdup("null");
-                    ist->discard = 0;
-                    ist->st->discard = ist->user_set_discard;
-                    break;
-                }
-            }
-            if(!ost->sync_ist){
-                av_log(NULL, AV_LOG_FATAL, "Missing %s stream which is required by this ffm\n", av_get_media_type_string(ost->st->codecpar->codec_type));
-                exit_program(1);
-            }
-        }
-    } else if (!o->nb_stream_maps) {
+    if (!o->nb_stream_maps) {
         char *subtitle_codec_name = NULL;
         /* pick the "best" stream of each type */
 
@@ -2410,7 +2364,7 @@
 #endif
 
     if (!oc->nb_streams && !(oc->oformat->flags & AVFMT_NOSTREAMS)) {
-        av_dump_format(oc, nb_output_files - 1, oc->filename, 1);
+        av_dump_format(oc, nb_output_files - 1, oc->url, 1);
         av_log(NULL, AV_LOG_ERROR, "Output file #%d does not contain any stream\n", nb_output_files - 1);
         exit_program(1);
     }
@@ -2542,8 +2496,8 @@
 
     /* check filename in case of an image number is expected */
     if (oc->oformat->flags & AVFMT_NEEDNUMBER) {
-        if (!av_filename_number_test(oc->filename)) {
-            print_error(oc->filename, AVERROR(EINVAL));
+        if (!av_filename_number_test(oc->url)) {
+            print_error(oc->url, AVERROR(EINVAL));
             exit_program(1);
         }
     }
@@ -3190,7 +3144,7 @@
            "    -h      -- print basic options\n"
            "    -h long -- print more options\n"
            "    -h full -- print all options (including all format and codec specific options, very long)\n"
-           "    -h type=name -- print all options for the named decoder/encoder/demuxer/muxer/filter\n"
+           "    -h type=name -- print all options for the named decoder/encoder/demuxer/muxer/filter/bsf\n"
            "    See man %s for detailed description of the options.\n"
            "\n", program_name);
 
@@ -3233,8 +3187,11 @@
 #if CONFIG_SWSCALE
         show_help_children(sws_get_class(), flags);
 #endif
+#if CONFIG_SWRESAMPLE
         show_help_children(swr_get_class(), AV_OPT_FLAG_AUDIO_PARAM);
+#endif
         show_help_children(avfilter_get_class(), AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM);
+        show_help_children(av_bsf_get_class(), AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_BSF_PARAM);
     }
 }
 
@@ -3404,7 +3361,7 @@
                         OPT_INPUT | OPT_OUTPUT,                      { .off = OFFSET(recording_time) },
         "record or transcode \"duration\" seconds of audio/video",
         "duration" },
-    { "to",             HAS_ARG | OPT_TIME | OPT_OFFSET | OPT_OUTPUT,  { .off = OFFSET(stop_time) },
+    { "to",             HAS_ARG | OPT_TIME | OPT_OFFSET | OPT_INPUT | OPT_OUTPUT,  { .off = OFFSET(stop_time) },
         "record or transcode stop time", "time_stop" },
     { "fs",             HAS_ARG | OPT_INT64 | OPT_OFFSET | OPT_OUTPUT, { .off = OFFSET(limit_filesize) },
         "set the limit file size in bytes", "limit_size" },
@@ -3412,7 +3369,7 @@
                         OPT_INPUT | OPT_OUTPUT,                      { .off = OFFSET(start_time) },
         "set the start time offset", "time_off" },
     { "sseof",          HAS_ARG | OPT_TIME | OPT_OFFSET |
-                        OPT_INPUT | OPT_OUTPUT,                      { .off = OFFSET(start_time_eof) },
+                        OPT_INPUT,                                   { .off = OFFSET(start_time_eof) },
         "set the start time offset relative to EOF", "time_off" },
     { "seek_timestamp", HAS_ARG | OPT_INT | OPT_OFFSET |
                         OPT_INPUT,                                   { .off = OFFSET(seek_timestamp) },
@@ -3472,6 +3429,9 @@
     { "shortest",       OPT_BOOL | OPT_EXPERT | OPT_OFFSET |
                         OPT_OUTPUT,                                  { .off = OFFSET(shortest) },
         "finish encoding within shortest input" },
+    { "bitexact",       OPT_BOOL | OPT_EXPERT | OPT_OFFSET |
+                        OPT_OUTPUT | OPT_INPUT,                      { .off = OFFSET(bitexact) },
+        "bitexact mode" },
     { "apad",           OPT_STRING | HAS_ARG | OPT_SPEC |
                         OPT_OUTPUT,                                  { .off = OFFSET(apad) },
         "audio pad", "" },
@@ -3530,7 +3490,7 @@
     { "debug_ts",       OPT_BOOL | OPT_EXPERT,                       { &debug_ts },
         "print timestamp debugging info" },
     { "max_error_rate",  HAS_ARG | OPT_FLOAT,                        { &max_error_rate },
-        "maximum error rate", "ratio of errors (0.0: no errors, 1.0: 100% errors) above which ffmpeg returns an error instead of success." },
+        "ratio of errors (0.0: no errors, 1.0: 100% errors) above which ffmpeg returns an error instead of success.", "maximum error rate" },
     { "discard",        OPT_STRING | HAS_ARG | OPT_SPEC |
                         OPT_INPUT,                                   { .off = OFFSET(discard) },
         "discard", "" },
@@ -3632,7 +3592,7 @@
     { "hwaccel_output_format", OPT_VIDEO | OPT_STRING | HAS_ARG | OPT_EXPERT |
                           OPT_SPEC | OPT_INPUT,                                  { .off = OFFSET(hwaccel_output_formats) },
         "select output format used with HW accelerated decoding", "format" },
-#if CONFIG_VDA || CONFIG_VIDEOTOOLBOX
+#if CONFIG_VIDEOTOOLBOX
     { "videotoolbox_pixfmt", HAS_ARG | OPT_STRING | OPT_EXPERT, { &videotoolbox_pixfmt}, "" },
 #endif
     { "hwaccels",         OPT_EXIT,                                              { .func_arg = show_hwaccels },
@@ -3640,8 +3600,6 @@
     { "autorotate",       HAS_ARG | OPT_BOOL | OPT_SPEC |
                           OPT_EXPERT | OPT_INPUT,                                { .off = OFFSET(autorotate) },
         "automatically insert correct rotate filters" },
-    { "hwaccel_lax_profile_check", OPT_BOOL | OPT_EXPERT,                        { &hwaccel_lax_profile_check},
-        "attempt to decode anyway if HW accelerated decoder's supported profiles do not exactly match the stream" },
 
     /* audio options */
     { "aframes",        OPT_AUDIO | HAS_ARG  | OPT_PERFILE | OPT_OUTPUT,           { .func_arg = opt_audio_frames },
@@ -3699,8 +3657,6 @@
         "set the maximum demux-decode delay", "seconds" },
     { "muxpreload", OPT_FLOAT | HAS_ARG | OPT_EXPERT | OPT_OFFSET | OPT_OUTPUT, { .off = OFFSET(mux_preload) },
         "set the initial demux-decode delay", "seconds" },
-    { "override_ffserver", OPT_BOOL | OPT_EXPERT | OPT_OUTPUT, { &override_ffserver },
-        "override the options from ffserver", "" },
     { "sdp_file", HAS_ARG | OPT_EXPERT | OPT_OUTPUT, { .func_arg = opt_sdp_file },
         "specify a file in which to print sdp information", "file" },
 

diff --git a/fftools/ffmpeg_qsv.c b/fftools/ffmpeg_qsv.c
index 7442750..9c4285b 100644
--- a/fftools/ffmpeg_qsv.c
+++ b/fftools/ffmpeg_qsv.c

@@ -93,7 +93,7 @@
     frames_ctx->height            = FFALIGN(s->coded_height, 32);
     frames_ctx->format            = AV_PIX_FMT_QSV;
     frames_ctx->sw_format         = s->sw_pix_fmt;
-    frames_ctx->initial_pool_size = 64;
+    frames_ctx->initial_pool_size = 64 + s->extra_hw_frames;
     frames_hwctx->frame_type      = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET;
 
     ret = av_hwframe_ctx_init(ist->hw_frames_ctx);

diff --git a/fftools/ffmpeg_videotoolbox.c b/fftools/ffmpeg_videotoolbox.c
index e903965..b820aec 100644
--- a/fftools/ffmpeg_videotoolbox.c
+++ b/fftools/ffmpeg_videotoolbox.c

@@ -23,12 +23,7 @@
 #endif
 
 #include "libavcodec/avcodec.h"
-#if CONFIG_VDA
-#  include "libavcodec/vda.h"
-#endif
-#if CONFIG_VIDEOTOOLBOX
-#  include "libavcodec/videotoolbox.h"
-#endif
+#include "libavcodec/videotoolbox.h"
 #include "libavutil/imgutils.h"
 #include "ffmpeg.h"
 
@@ -114,15 +109,7 @@
 
     av_frame_free(&vt->tmp_frame);
 
-    if (ist->hwaccel_id == HWACCEL_VIDEOTOOLBOX) {
-#if CONFIG_VIDEOTOOLBOX
-        av_videotoolbox_default_free(s);
-#endif
-    } else {
-#if CONFIG_VDA
-        av_vda_default_free(s);
-#endif
-    }
+    av_videotoolbox_default_free(s);
     av_freep(&ist->hwaccel_ctx);
 }
 
@@ -147,8 +134,7 @@
         goto fail;
     }
 
-    if (ist->hwaccel_id == HWACCEL_VIDEOTOOLBOX) {
-#if CONFIG_VIDEOTOOLBOX
+    // TODO: reindent
         if (!videotoolbox_pixfmt) {
             ret = av_videotoolbox_default_init(s);
         } else {
@@ -166,31 +152,8 @@
             ret = av_videotoolbox_default_init2(s, vtctx);
             CFRelease(pixfmt_str);
         }
-#endif
-    } else {
-#if CONFIG_VDA
-        if (!videotoolbox_pixfmt) {
-            ret = av_vda_default_init(s);
-        } else {
-            AVVDAContext *vdactx = av_vda_alloc_context();
-            CFStringRef pixfmt_str = CFStringCreateWithCString(kCFAllocatorDefault,
-                                                               videotoolbox_pixfmt,
-                                                               kCFStringEncodingUTF8);
-#if HAVE_UTGETOSTYPEFROMSTRING
-            vdactx->cv_pix_fmt_type = UTGetOSTypeFromString(pixfmt_str);
-#else
-            av_log(s, loglevel, "UTGetOSTypeFromString() is not available "
-                   "on this platform, %s pixel format can not be honored from "
-                   "the command line\n", videotoolbox_pixfmt);
-#endif
-            ret = av_vda_default_init2(s, vdactx);
-            CFRelease(pixfmt_str);
-        }
-#endif
-    }
     if (ret < 0) {
-        av_log(NULL, loglevel,
-               "Error creating %s decoder.\n", ist->hwaccel_id == HWACCEL_VIDEOTOOLBOX ? "Videotoolbox" : "VDA");
+        av_log(NULL, loglevel, "Error creating Videotoolbox decoder.\n");
         goto fail;
     }
 

diff --git a/fftools/ffplay.c b/fftools/ffplay.c
index 571dce3..ab1f9fa 100644
--- a/fftools/ffplay.c
+++ b/fftools/ffplay.c

@@ -314,11 +314,14 @@
 static int default_height = 480;
 static int screen_width  = 0;
 static int screen_height = 0;
+static int screen_left = SDL_WINDOWPOS_CENTERED;
+static int screen_top = SDL_WINDOWPOS_CENTERED;
 static int audio_disable;
 static int video_disable;
 static int subtitle_disable;
 static const char* wanted_stream_spec[AVMEDIA_TYPE_NB] = {0};
 static int seek_by_bytes = -1;
+static float seek_interval = 10;
 static int display_disable;
 static int borderless;
 static int startup_volume = 100;
@@ -609,7 +612,7 @@
                         if (ret >= 0) {
                             AVRational tb = (AVRational){1, frame->sample_rate};
                             if (frame->pts != AV_NOPTS_VALUE)
-                                frame->pts = av_rescale_q(frame->pts, av_codec_get_pkt_timebase(d->avctx), tb);
+                                frame->pts = av_rescale_q(frame->pts, d->avctx->pkt_timebase, tb);
                             else if (d->next_pts != AV_NOPTS_VALUE)
                                 frame->pts = av_rescale_q(d->next_pts, d->next_pts_tb, tb);
                             if (frame->pts != AV_NOPTS_VALUE) {
@@ -834,10 +837,11 @@
 {
     Uint32 format;
     int access, w, h;
-    if (SDL_QueryTexture(*texture, &format, &access, &w, &h) < 0 || new_width != w || new_height != h || new_format != format) {
+    if (!*texture || SDL_QueryTexture(*texture, &format, &access, &w, &h) < 0 || new_width != w || new_height != h || new_format != format) {
         void *pixels;
         int pitch;
-        SDL_DestroyTexture(*texture);
+        if (*texture)
+            SDL_DestroyTexture(*texture);
         if (!(*texture = SDL_CreateTexture(renderer, new_format, SDL_TEXTUREACCESS_STREAMING, new_width, new_height)))
             return -1;
         if (SDL_SetTextureBlendMode(*texture, blendmode) < 0)
@@ -953,6 +957,22 @@
     return ret;
 }
 
+static void set_sdl_yuv_conversion_mode(AVFrame *frame)
+{
+#if SDL_VERSION_ATLEAST(2,0,8)
+    SDL_YUV_CONVERSION_MODE mode = SDL_YUV_CONVERSION_AUTOMATIC;
+    if (frame && (frame->format == AV_PIX_FMT_YUV420P || frame->format == AV_PIX_FMT_YUYV422 || frame->format == AV_PIX_FMT_UYVY422)) {
+        if (frame->color_range == AVCOL_RANGE_JPEG)
+            mode = SDL_YUV_CONVERSION_JPEG;
+        else if (frame->colorspace == AVCOL_SPC_BT709)
+            mode = SDL_YUV_CONVERSION_BT709;
+        else if (frame->colorspace == AVCOL_SPC_BT470BG || frame->colorspace == AVCOL_SPC_SMPTE170M || frame->colorspace == AVCOL_SPC_SMPTE240M)
+            mode = SDL_YUV_CONVERSION_BT601;
+    }
+    SDL_SetYUVConversionMode(mode);
+#endif
+}
+
 static void video_image_display(VideoState *is)
 {
     Frame *vp;
@@ -1014,7 +1034,9 @@
         vp->flip_v = vp->frame->linesize[0] < 0;
     }
 
+    set_sdl_yuv_conversion_mode(vp->frame);
     SDL_RenderCopyEx(renderer, is->vid_texture, NULL, &rect, 0, NULL, vp->flip_v ? SDL_FLIP_VERTICAL : 0);
+    set_sdl_yuv_conversion_mode(NULL);
     if (sp) {
 #if USE_ONEPASS_SUBTITLE_RENDER
         SDL_RenderCopy(renderer, is->sub_texture, NULL, &rect);
@@ -1284,7 +1306,6 @@
         SDL_DestroyRenderer(renderer);
     if (window)
         SDL_DestroyWindow(window);
-    av_lockmgr_register(NULL);
     uninit_opts();
 #if CONFIG_AVFILTER
     av_freep(&vfilters_list);
@@ -1327,7 +1348,7 @@
     SDL_SetWindowTitle(window, window_title);
 
     SDL_SetWindowSize(window, w, h);
-    SDL_SetWindowPosition(window, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED);
+    SDL_SetWindowPosition(window, screen_left, screen_top);
     if (is_full_screen)
         SDL_SetWindowFullscreen(window, SDL_WINDOW_FULLSCREEN_DESKTOP);
     SDL_ShowWindow(window);
@@ -2193,6 +2214,8 @@
             ret = queue_picture(is, frame, pts, duration, frame->pkt_pos, is->viddec.pkt_serial);
             av_frame_unref(frame);
 #if CONFIG_AVFILTER
+            if (is->videoq.serial != is->viddec.pkt_serial)
+                break;
         }
 #endif
 
@@ -2563,7 +2586,7 @@
     ret = avcodec_parameters_to_context(avctx, ic->streams[stream_index]->codecpar);
     if (ret < 0)
         goto fail;
-    av_codec_set_pkt_timebase(avctx, ic->streams[stream_index]->time_base);
+    avctx->pkt_timebase = ic->streams[stream_index]->time_base;
 
     codec = avcodec_find_decoder(avctx->codec_id);
 
@@ -2578,28 +2601,21 @@
         if (forced_codec_name) av_log(NULL, AV_LOG_WARNING,
                                       "No codec could be found with name '%s'\n", forced_codec_name);
         else                   av_log(NULL, AV_LOG_WARNING,
-                                      "No codec could be found with id %d\n", avctx->codec_id);
+                                      "No decoder could be found for codec %s\n", avcodec_get_name(avctx->codec_id));
         ret = AVERROR(EINVAL);
         goto fail;
     }
 
     avctx->codec_id = codec->id;
-    if(stream_lowres > av_codec_get_max_lowres(codec)){
+    if (stream_lowres > codec->max_lowres) {
         av_log(avctx, AV_LOG_WARNING, "The maximum value for lowres supported by the decoder is %d\n",
-                av_codec_get_max_lowres(codec));
-        stream_lowres = av_codec_get_max_lowres(codec);
+                codec->max_lowres);
+        stream_lowres = codec->max_lowres;
     }
-    av_codec_set_lowres(avctx, stream_lowres);
+    avctx->lowres = stream_lowres;
 
-#if FF_API_EMU_EDGE
-    if(stream_lowres) avctx->flags |= CODEC_FLAG_EMU_EDGE;
-#endif
     if (fast)
         avctx->flags2 |= AV_CODEC_FLAG2_FAST;
-#if FF_API_EMU_EDGE
-    if(codec->capabilities & AV_CODEC_CAP_DR1)
-        avctx->flags |= CODEC_FLAG_EMU_EDGE;
-#endif
 
     opts = filter_codec_opts(codec_opts, avctx->codec_id, ic, ic->streams[stream_index], codec);
     if (!av_dict_get(opts, "threads", NULL, 0))
@@ -2720,8 +2736,8 @@
     )
         return 1;
 
-    if(s->pb && (   !strncmp(s->filename, "rtp:", 4)
-                 || !strncmp(s->filename, "udp:", 4)
+    if(s->pb && (   !strncmp(s->url, "rtp:", 4)
+                 || !strncmp(s->url, "udp:", 4)
                 )
     )
         return 1;
@@ -2936,7 +2952,7 @@
             ret = avformat_seek_file(is->ic, -1, seek_min, seek_target, seek_max, is->seek_flags);
             if (ret < 0) {
                 av_log(NULL, AV_LOG_ERROR,
-                       "%s: error while seeking\n", is->ic->filename);
+                       "%s: error while seeking\n", is->ic->url);
             } else {
                 if (is->audio_stream >= 0) {
                     packet_queue_flush(&is->audioq);
@@ -3258,15 +3274,14 @@
         refresh_loop_wait_event(cur_stream, &event);
         switch (event.type) {
         case SDL_KEYDOWN:
-            if (exit_on_keydown) {
+            if (exit_on_keydown || event.key.keysym.sym == SDLK_ESCAPE || event.key.keysym.sym == SDLK_q) {
                 do_exit(cur_stream);
                 break;
             }
+            // If we don't yet have a window, skip all key events, because read_thread might still be initializing...
+            if (!cur_stream->width)
+                continue;
             switch (event.key.keysym.sym) {
-            case SDLK_ESCAPE:
-            case SDLK_q:
-                do_exit(cur_stream);
-                break;
             case SDLK_f:
                 toggle_full_screen(cur_stream);
                 cur_stream->force_refresh = 1;
@@ -3331,10 +3346,10 @@
                 seek_chapter(cur_stream, -1);
                 break;
             case SDLK_LEFT:
-                incr = -10.0;
+                incr = seek_interval ? -seek_interval : -10.0;
                 goto do_seek;
             case SDLK_RIGHT:
-                incr = 10.0;
+                incr = seek_interval ? seek_interval : 10.0;
                 goto do_seek;
             case SDLK_UP:
                 incr = 60.0;
@@ -3570,6 +3585,7 @@
     { "ss", HAS_ARG, { .func_arg = opt_seek }, "seek to a given position in seconds", "pos" },
     { "t", HAS_ARG, { .func_arg = opt_duration }, "play  \"duration\" seconds of audio/video", "duration" },
     { "bytes", OPT_INT | HAS_ARG, { &seek_by_bytes }, "seek by bytes 0=off 1=on -1=auto", "val" },
+    { "seek_interval", OPT_FLOAT | HAS_ARG, { &seek_interval }, "set seek interval for left/right keys, in seconds", "seconds" },
     { "nodisp", OPT_BOOL, { &display_disable }, "disable graphical display" },
     { "noborder", OPT_BOOL, { &borderless }, "borderless window" },
     { "volume", OPT_INT | HAS_ARG, { &startup_volume}, "set startup volume 0=min 100=max", "volume" },
@@ -3588,6 +3604,8 @@
     { "framedrop", OPT_BOOL | OPT_EXPERT, { &framedrop }, "drop frames when cpu is too slow", "" },
     { "infbuf", OPT_BOOL | OPT_EXPERT, { &infinite_buffer }, "don't limit the input buffer size (useful with realtime streams)", "" },
     { "window_title", OPT_STRING | HAS_ARG, { &window_title }, "set window title", "window title" },
+    { "left", OPT_INT | HAS_ARG | OPT_EXPERT, { &screen_left }, "set the x position for the left of the window", "x pos" },
+    { "top", OPT_INT | HAS_ARG | OPT_EXPERT, { &screen_top }, "set the y position for the top of the window", "y pos" },
 #if CONFIG_AVFILTER
     { "vf", OPT_EXPERT | HAS_ARG, { .func_arg = opt_add_vfilter }, "set video filters", "filter_graph" },
     { "af", OPT_STRING | HAS_ARG, { &afilters }, "set audio filters", "filter_graph" },
@@ -3640,7 +3658,7 @@
            "c                   cycle program\n"
            "w                   cycle video filters or show modes\n"
            "s                   activate frame-step mode\n"
-           "left/right          seek backward/forward 10 seconds\n"
+           "left/right          seek backward/forward 10 seconds or to custom interval if -seek_interval is set\n"
            "down/up             seek backward/forward 1 minute\n"
            "page down/page up   seek backward/forward 10 minutes\n"
            "right mouse click   seek to percentage in file corresponding to fraction of width\n"
@@ -3648,27 +3666,6 @@
            );
 }
 
-static int lockmgr(void **mtx, enum AVLockOp op)
-{
-   switch(op) {
-      case AV_LOCK_CREATE:
-          *mtx = SDL_CreateMutex();
-          if(!*mtx) {
-              av_log(NULL, AV_LOG_FATAL, "SDL_CreateMutex(): %s\n", SDL_GetError());
-              return 1;
-          }
-          return 0;
-      case AV_LOCK_OBTAIN:
-          return !!SDL_LockMutex(*mtx);
-      case AV_LOCK_RELEASE:
-          return !!SDL_UnlockMutex(*mtx);
-      case AV_LOCK_DESTROY:
-          SDL_DestroyMutex(*mtx);
-          return 0;
-   }
-   return 1;
-}
-
 /* Called from the main */
 int main(int argc, char **argv)
 {
@@ -3684,10 +3681,6 @@
 #if CONFIG_AVDEVICE
     avdevice_register_all();
 #endif
-#if CONFIG_AVFILTER
-    avfilter_register_all();
-#endif
-    av_register_all();
     avformat_network_init();
 
     init_opts();
@@ -3730,11 +3723,6 @@
     SDL_EventState(SDL_SYSWMEVENT, SDL_IGNORE);
     SDL_EventState(SDL_USEREVENT, SDL_IGNORE);
 
-    if (av_lockmgr_register(lockmgr)) {
-        av_log(NULL, AV_LOG_FATAL, "Could not initialize lock manager!\n");
-        do_exit(NULL);
-    }
-
     av_init_packet(&flush_pkt);
     flush_pkt.data = (uint8_t *)&flush_pkt;
 

diff --git a/fftools/ffprobe.c b/fftools/ffprobe.c
index b2e8949..544786e 100644
--- a/fftools/ffprobe.c
+++ b/fftools/ffprobe.c

@@ -2275,7 +2275,8 @@
             break;
 
         case AVMEDIA_TYPE_SUBTITLE:
-            ret = avcodec_decode_subtitle2(dec_ctx, &sub, &got_frame, pkt);
+            if (*packet_new)
+                ret = avcodec_decode_subtitle2(dec_ctx, &sub, &got_frame, pkt);
             *packet_new = 0;
             break;
         default:
@@ -2370,11 +2371,11 @@
         goto end;
     }
     while (!av_read_frame(fmt_ctx, &pkt)) {
-        if (ifile->nb_streams > nb_streams) {
+        if (fmt_ctx->nb_streams > nb_streams) {
             REALLOCZ_ARRAY_STREAM(nb_streams_frames,  nb_streams, fmt_ctx->nb_streams);
             REALLOCZ_ARRAY_STREAM(nb_streams_packets, nb_streams, fmt_ctx->nb_streams);
             REALLOCZ_ARRAY_STREAM(selected_streams,   nb_streams, fmt_ctx->nb_streams);
-            nb_streams = ifile->nb_streams;
+            nb_streams = fmt_ctx->nb_streams;
         }
         if (selected_streams[pkt.stream_index]) {
             AVRational tb = ifile->streams[pkt.stream_index].st->time_base;
@@ -2512,13 +2513,15 @@
     case AVMEDIA_TYPE_VIDEO:
         print_int("width",        par->width);
         print_int("height",       par->height);
+#if FF_API_LAVF_AVCTX
         if (dec_ctx) {
             print_int("coded_width",  dec_ctx->coded_width);
             print_int("coded_height", dec_ctx->coded_height);
         }
+#endif
         print_int("has_b_frames", par->video_delay);
         sar = av_guess_sample_aspect_ratio(fmt_ctx, stream, NULL);
-        if (sar.den) {
+        if (sar.num) {
             print_q("sample_aspect_ratio", sar, ':');
             av_reduce(&dar.num, &dar.den,
                       par->width  * sar.num,
@@ -2778,7 +2781,7 @@
     int ret = 0;
 
     writer_print_section_header(w, SECTION_ID_FORMAT);
-    print_str_validate("filename", fmt_ctx->filename);
+    print_str_validate("filename", fmt_ctx->url);
     print_int("nb_streams",       fmt_ctx->nb_streams);
     print_int("nb_programs",      fmt_ctx->nb_programs);
     print_str("format_name",      fmt_ctx->iformat->name);
@@ -2792,7 +2795,7 @@
     else           print_str_opt("size", "N/A");
     if (fmt_ctx->bit_rate > 0) print_val    ("bit_rate", fmt_ctx->bit_rate, unit_bit_per_second_str);
     else                       print_str_opt("bit_rate", "N/A");
-    print_int("probe_score", av_format_get_probe_score(fmt_ctx));
+    print_int("probe_score", fmt_ctx->probe_score);
     if (do_show_format_tags)
         ret = show_tags(w, fmt_ctx->metadata, SECTION_ID_FORMAT_TAGS);
 
@@ -2828,8 +2831,6 @@
         exit_program(1);
     }
 
-    fmt_ctx->flags |= AVFMT_FLAG_KEEP_SIDE_DATA;
-
     if (!av_dict_get(format_opts, "scan_all_pmts", NULL, AV_DICT_MATCH_CASE)) {
         av_dict_set(&format_opts, "scan_all_pmts", "1", AV_DICT_DONT_OVERWRITE);
         scan_all_pmts_set = 1;
@@ -2912,8 +2913,12 @@
                 av_dict_set(&codec_opts, "threads", "1", 0);
             }
 
-            av_codec_set_pkt_timebase(ist->dec_ctx, stream->time_base);
+            ist->dec_ctx->pkt_timebase = stream->time_base;
             ist->dec_ctx->framerate = stream->avg_frame_rate;
+#if FF_API_LAVF_AVCTX
+            ist->dec_ctx->coded_width = stream->codec->coded_width;
+            ist->dec_ctx->coded_height = stream->codec->coded_height;
+#endif
 
             if (avcodec_open2(ist->dec_ctx, codec, &opts) < 0) {
                 av_log(NULL, AV_LOG_WARNING, "Could not open codec for input stream %d\n",
@@ -3113,7 +3118,9 @@
             PRINT_PIX_FMT_FLAG(HWACCEL,   "hwaccel");
             PRINT_PIX_FMT_FLAG(PLANAR,    "planar");
             PRINT_PIX_FMT_FLAG(RGB,       "rgb");
+#if FF_API_PSEUDOPAL
             PRINT_PIX_FMT_FLAG(PSEUDOPAL, "pseudopal");
+#endif
             PRINT_PIX_FMT_FLAG(ALPHA,     "alpha");
             writer_print_section_footer(w);
         }
@@ -3562,7 +3569,6 @@
 
     options = real_options;
     parse_loglevel(argc, argv, options);
-    av_register_all();
     avformat_network_init();
     init_opts();
 #if CONFIG_AVDEVICE

diff --git a/fftools/ffserver.c b/fftools/ffserver.c
deleted file mode 100644
index 7f8b238..0000000
--- a/fftools/ffserver.c
+++ /dev/null

@@ -1,4022 +0,0 @@
-/*
- * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * multiple format streaming server based on the FFmpeg libraries
- */
-
-#include "config.h"
-#if !HAVE_CLOSESOCKET
-#define closesocket close
-#endif
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "libavformat/avformat.h"
-/* FIXME: those are internal headers, ffserver _really_ shouldn't use them */
-#include "libavformat/rtpproto.h"
-#include "libavformat/rtsp.h"
-#include "libavformat/avio_internal.h"
-#include "libavformat/internal.h"
-
-#include "libavutil/avassert.h"
-#include "libavutil/avstring.h"
-#include "libavutil/lfg.h"
-#include "libavutil/dict.h"
-#include "libavutil/intreadwrite.h"
-#include "libavutil/mathematics.h"
-#include "libavutil/random_seed.h"
-#include "libavutil/rational.h"
-#include "libavutil/parseutils.h"
-#include "libavutil/opt.h"
-#include "libavutil/time.h"
-
-#include <stdarg.h>
-#if HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#if HAVE_POLL_H
-#include <poll.h>
-#endif
-#include <errno.h>
-#include <time.h>
-#include <sys/wait.h>
-#include <signal.h>
-
-#include "cmdutils.h"
-#include "ffserver_config.h"
-
-#define PATH_LENGTH 1024
-
-const char program_name[] = "ffserver";
-const int program_birth_year = 2000;
-
-static const OptionDef options[];
-
-enum HTTPState {
-    HTTPSTATE_WAIT_REQUEST,
-    HTTPSTATE_SEND_HEADER,
-    HTTPSTATE_SEND_DATA_HEADER,
-    HTTPSTATE_SEND_DATA,          /* sending TCP or UDP data */
-    HTTPSTATE_SEND_DATA_TRAILER,
-    HTTPSTATE_RECEIVE_DATA,
-    HTTPSTATE_WAIT_FEED,          /* wait for data from the feed */
-    HTTPSTATE_READY,
-
-    RTSPSTATE_WAIT_REQUEST,
-    RTSPSTATE_SEND_REPLY,
-    RTSPSTATE_SEND_PACKET,
-};
-
-static const char * const http_state[] = {
-    "HTTP_WAIT_REQUEST",
-    "HTTP_SEND_HEADER",
-
-    "SEND_DATA_HEADER",
-    "SEND_DATA",
-    "SEND_DATA_TRAILER",
-    "RECEIVE_DATA",
-    "WAIT_FEED",
-    "READY",
-
-    "RTSP_WAIT_REQUEST",
-    "RTSP_SEND_REPLY",
-    "RTSP_SEND_PACKET",
-};
-
-#define IOBUFFER_INIT_SIZE 8192
-
-/* timeouts are in ms */
-#define HTTP_REQUEST_TIMEOUT (15 * 1000)
-#define RTSP_REQUEST_TIMEOUT (3600 * 24 * 1000)
-
-#define SYNC_TIMEOUT (10 * 1000)
-
-typedef struct RTSPActionServerSetup {
-    uint32_t ipaddr;
-    char transport_option[512];
-} RTSPActionServerSetup;
-
-typedef struct {
-    int64_t count1, count2;
-    int64_t time1, time2;
-} DataRateData;
-
-/* context associated with one connection */
-typedef struct HTTPContext {
-    enum HTTPState state;
-    int fd; /* socket file descriptor */
-    struct sockaddr_in from_addr; /* origin */
-    struct pollfd *poll_entry; /* used when polling */
-    int64_t timeout;
-    uint8_t *buffer_ptr, *buffer_end;
-    int http_error;
-    int post;
-    int chunked_encoding;
-    int chunk_size;               /* 0 if it needs to be read */
-    struct HTTPContext *next;
-    int got_key_frame; /* stream 0 => 1, stream 1 => 2, stream 2=> 4 */
-    int64_t data_count;
-    /* feed input */
-    int feed_fd;
-    /* input format handling */
-    AVFormatContext *fmt_in;
-    int64_t start_time;            /* In milliseconds - this wraps fairly often */
-    int64_t first_pts;            /* initial pts value */
-    int64_t cur_pts;             /* current pts value from the stream in us */
-    int64_t cur_frame_duration;  /* duration of the current frame in us */
-    int cur_frame_bytes;       /* output frame size, needed to compute
-                                  the time at which we send each
-                                  packet */
-    int pts_stream_index;        /* stream we choose as clock reference */
-    int64_t cur_clock;           /* current clock reference value in us */
-    /* output format handling */
-    struct FFServerStream *stream;
-    /* -1 is invalid stream */
-    int feed_streams[FFSERVER_MAX_STREAMS]; /* index of streams in the feed */
-    int switch_feed_streams[FFSERVER_MAX_STREAMS]; /* index of streams in the feed */
-    int switch_pending;
-    AVFormatContext *pfmt_ctx; /* instance of FFServerStream for one user */
-    int last_packet_sent; /* true if last data packet was sent */
-    int suppress_log;
-    DataRateData datarate;
-    int wmp_client_id;
-    char protocol[16];
-    char method[16];
-    char url[128];
-    char clean_url[128*7];
-    int buffer_size;
-    uint8_t *buffer;
-    int is_packetized; /* if true, the stream is packetized */
-    int packet_stream_index; /* current stream for output in state machine */
-
-    /* RTSP state specific */
-    uint8_t *pb_buffer; /* XXX: use that in all the code */
-    AVIOContext *pb;
-    int seq; /* RTSP sequence number */
-
-    /* RTP state specific */
-    enum RTSPLowerTransport rtp_protocol;
-    char session_id[32]; /* session id */
-    AVFormatContext *rtp_ctx[FFSERVER_MAX_STREAMS];
-
-    /* RTP/UDP specific */
-    URLContext *rtp_handles[FFSERVER_MAX_STREAMS];
-
-    /* RTP/TCP specific */
-    struct HTTPContext *rtsp_c;
-    uint8_t *packet_buffer, *packet_buffer_ptr, *packet_buffer_end;
-} HTTPContext;
-
-static HTTPContext *first_http_ctx;
-
-static FFServerConfig config = {
-    .nb_max_http_connections = 2000,
-    .nb_max_connections = 5,
-    .max_bandwidth = 1000,
-    .use_defaults = 1,
-};
-
-static void new_connection(int server_fd, int is_rtsp);
-static void close_connection(HTTPContext *c);
-
-/* HTTP handling */
-static int handle_connection(HTTPContext *c);
-static inline void print_stream_params(AVIOContext *pb, FFServerStream *stream);
-static void compute_status(HTTPContext *c);
-static int open_input_stream(HTTPContext *c, const char *info);
-static int http_parse_request(HTTPContext *c);
-static int http_send_data(HTTPContext *c);
-static int http_start_receive_data(HTTPContext *c);
-static int http_receive_data(HTTPContext *c);
-
-/* RTSP handling */
-static int rtsp_parse_request(HTTPContext *c);
-static void rtsp_cmd_describe(HTTPContext *c, const char *url);
-static void rtsp_cmd_options(HTTPContext *c, const char *url);
-static void rtsp_cmd_setup(HTTPContext *c, const char *url,
-                           RTSPMessageHeader *h);
-static void rtsp_cmd_play(HTTPContext *c, const char *url,
-                          RTSPMessageHeader *h);
-static void rtsp_cmd_interrupt(HTTPContext *c, const char *url,
-                               RTSPMessageHeader *h, int pause_only);
-
-/* SDP handling */
-static int prepare_sdp_description(FFServerStream *stream, uint8_t **pbuffer,
-                                   struct in_addr my_ip);
-
-/* RTP handling */
-static HTTPContext *rtp_new_connection(struct sockaddr_in *from_addr,
-                                       FFServerStream *stream,
-                                       const char *session_id,
-                                       enum RTSPLowerTransport rtp_protocol);
-static int rtp_new_av_stream(HTTPContext *c,
-                             int stream_index, struct sockaddr_in *dest_addr,
-                             HTTPContext *rtsp_c);
-/* utils */
-static size_t htmlencode (const char *src, char **dest);
-static inline void cp_html_entity (char *buffer, const char *entity);
-static inline int check_codec_match(LayeredAVStream *ccf, AVStream *ccs, int stream);
-
-static const char *my_program_name;
-
-static int no_launch;
-static int need_to_start_children;
-
-/* maximum number of simultaneous HTTP connections */
-static unsigned int nb_connections;
-
-static uint64_t current_bandwidth;
-
-/* Making this global saves on passing it around everywhere */
-static int64_t cur_time;
-
-static AVLFG random_state;
-
-static FILE *logfile = NULL;
-
-static void unlayer_stream(AVStream *st, LayeredAVStream *lst)
-{
-    avcodec_free_context(&st->codec);
-    avcodec_parameters_free(&st->codecpar);
-#define COPY(a) st->a = lst->a;
-    COPY(index)
-    COPY(id)
-    COPY(codec)
-    COPY(codecpar)
-    COPY(time_base)
-    COPY(pts_wrap_bits)
-    COPY(sample_aspect_ratio)
-    COPY(recommended_encoder_configuration)
-}
-
-static inline void cp_html_entity (char *buffer, const char *entity) {
-    if (!buffer || !entity)
-        return;
-    while (*entity)
-        *buffer++ = *entity++;
-}
-
-/**
- * Substitutes known conflicting chars on a text string with
- * their corresponding HTML entities.
- *
- * Returns the number of bytes in the 'encoded' representation
- * not including the terminating NUL.
- */
-static size_t htmlencode (const char *src, char **dest) {
-    const char *amp = "&amp;";
-    const char *lt  = "&lt;";
-    const char *gt  = "&gt;";
-    const char *start;
-    char *tmp;
-    size_t final_size = 0;
-
-    if (!src)
-        return 0;
-
-    start = src;
-
-    /* Compute needed dest size */
-    while (*src != '\0') {
-        switch(*src) {
-            case 38: /* & */
-                final_size += 5;
-                break;
-            case 60: /* < */
-            case 62: /* > */
-                final_size += 4;
-                break;
-            default:
-                final_size++;
-        }
-        src++;
-    }
-
-    src = start;
-    *dest = av_mallocz(final_size + 1);
-    if (!*dest)
-        return 0;
-
-    /* Build dest */
-    tmp = *dest;
-    while (*src != '\0') {
-        switch(*src) {
-            case 38: /* & */
-                cp_html_entity (tmp, amp);
-                tmp += 5;
-                break;
-            case 60: /* < */
-                cp_html_entity (tmp, lt);
-                tmp += 4;
-                break;
-            case 62: /* > */
-                cp_html_entity (tmp, gt);
-                tmp += 4;
-                break;
-            default:
-                *tmp = *src;
-                tmp += 1;
-        }
-        src++;
-    }
-    *tmp = '\0';
-
-    return final_size;
-}
-
-static int64_t ffm_read_write_index(int fd)
-{
-    uint8_t buf[8];
-
-    if (lseek(fd, 8, SEEK_SET) < 0)
-        return AVERROR(EIO);
-    if (read(fd, buf, 8) != 8)
-        return AVERROR(EIO);
-    return AV_RB64(buf);
-}
-
-static int ffm_write_write_index(int fd, int64_t pos)
-{
-    uint8_t buf[8];
-    int i;
-
-    for(i=0;i<8;i++)
-        buf[i] = (pos >> (56 - i * 8)) & 0xff;
-    if (lseek(fd, 8, SEEK_SET) < 0)
-        goto bail_eio;
-    if (write(fd, buf, 8) != 8)
-        goto bail_eio;
-
-    return 8;
-
-bail_eio:
-    return AVERROR(EIO);
-}
-
-static void ffm_set_write_index(AVFormatContext *s, int64_t pos,
-                                int64_t file_size)
-{
-    av_opt_set_int(s, "server_attached", 1, AV_OPT_SEARCH_CHILDREN);
-    av_opt_set_int(s, "ffm_write_index", pos, AV_OPT_SEARCH_CHILDREN);
-    av_opt_set_int(s, "ffm_file_size", file_size, AV_OPT_SEARCH_CHILDREN);
-}
-
-static char *ctime1(char *buf2, size_t buf_size)
-{
-    time_t ti;
-    char *p;
-
-    ti = time(NULL);
-    p = ctime(&ti);
-    if (!p || !*p) {
-        *buf2 = '\0';
-        return buf2;
-    }
-    av_strlcpy(buf2, p, buf_size);
-    p = buf2 + strlen(buf2) - 1;
-    if (*p == '\n')
-        *p = '\0';
-    return buf2;
-}
-
-static void http_vlog(const char *fmt, va_list vargs)
-{
-    static int print_prefix = 1;
-    char buf[32];
-
-    if (!logfile)
-        return;
-
-    if (print_prefix) {
-        ctime1(buf, sizeof(buf));
-        fprintf(logfile, "%s ", buf);
-    }
-    print_prefix = strstr(fmt, "\n") != NULL;
-    vfprintf(logfile, fmt, vargs);
-    fflush(logfile);
-}
-
-#ifdef __GNUC__
-__attribute__ ((format (printf, 1, 2)))
-#endif
-static void http_log(const char *fmt, ...)
-{
-    va_list vargs;
-    va_start(vargs, fmt);
-    http_vlog(fmt, vargs);
-    va_end(vargs);
-}
-
-static void http_av_log(void *ptr, int level, const char *fmt, va_list vargs)
-{
-    static int print_prefix = 1;
-    AVClass *avc = ptr ? *(AVClass**)ptr : NULL;
-    if (level > av_log_get_level())
-        return;
-    if (print_prefix && avc)
-        http_log("[%s @ %p]", avc->item_name(ptr), ptr);
-    print_prefix = strstr(fmt, "\n") != NULL;
-    http_vlog(fmt, vargs);
-}
-
-static void log_connection(HTTPContext *c)
-{
-    if (c->suppress_log)
-        return;
-
-    http_log("%s - - [%s] \"%s %s\" %d %"PRId64"\n",
-             inet_ntoa(c->from_addr.sin_addr), c->method, c->url,
-             c->protocol, (c->http_error ? c->http_error : 200), c->data_count);
-}
-
-static void update_datarate(DataRateData *drd, int64_t count)
-{
-    if (!drd->time1 && !drd->count1) {
-        drd->time1 = drd->time2 = cur_time;
-        drd->count1 = drd->count2 = count;
-    } else if (cur_time - drd->time2 > 5000) {
-        drd->time1 = drd->time2;
-        drd->count1 = drd->count2;
-        drd->time2 = cur_time;
-        drd->count2 = count;
-    }
-}
-
-/* In bytes per second */
-static int compute_datarate(DataRateData *drd, int64_t count)
-{
-    if (cur_time == drd->time1)
-        return 0;
-
-    return ((count - drd->count1) * 1000) / (cur_time - drd->time1);
-}
-
-
-static void start_children(FFServerStream *feed)
-{
-    char *pathname;
-    char *dirname, *prog;
-    int i;
-    size_t cmd_length;
-
-    if (no_launch)
-        return;
-
-    cmd_length = strlen(my_program_name);
-
-   /**
-    * FIXME: WIP Safeguard. Remove after clearing all harcoded
-    * '1024' path lengths
-    */
-    if (cmd_length > PATH_LENGTH - 1) {
-        http_log("Could not start children. Command line: '%s' exceeds "
-                    "path length limit (%d)\n", my_program_name, PATH_LENGTH);
-        return;
-    }
-
-   /* use "ffmpeg" in the path of current program. Ignore user provided path */
-    prog = av_strdup(my_program_name);
-    if (prog) {
-        dirname = av_dirname(prog);
-        pathname = *dirname ? av_asprintf("%s/%s", dirname, "ffmpeg")
-                            : av_asprintf("ffmpeg");
-        av_free(prog);
-    }
-    if (!prog || !pathname) {
-        http_log("Could not allocate memory for children cmd line\n");
-        return;
-    }
-
-    for (; feed; feed = feed->next) {
-
-        if (!feed->child_argv || feed->pid)
-            continue;
-
-        feed->pid_start = time(0);
-
-        feed->pid = fork();
-        if (feed->pid < 0) {
-            http_log("Unable to create children: %s\n", strerror(errno));
-            av_free (pathname);
-            exit(EXIT_FAILURE);
-        }
-
-        if (feed->pid)
-            continue;
-
-        /* In child */
-
-        http_log("Launch command line: ");
-        http_log("%s ", pathname);
-
-        for (i = 1; feed->child_argv[i] && feed->child_argv[i][0]; i++)
-            http_log("%s ", feed->child_argv[i]);
-        http_log("\n");
-
-        for (i = 3; i < 256; i++)
-            close(i);
-
-        if (!config.debug) {
-            if (!freopen("/dev/null", "r", stdin))
-                http_log("failed to redirect STDIN to /dev/null\n;");
-            if (!freopen("/dev/null", "w", stdout))
-                http_log("failed to redirect STDOUT to /dev/null\n;");
-            if (!freopen("/dev/null", "w", stderr))
-                http_log("failed to redirect STDERR to /dev/null\n;");
-        }
-
-        signal(SIGPIPE, SIG_DFL);
-        execvp(pathname, feed->child_argv);
-        av_free (pathname);
-        _exit(1);
-    }
-    av_free (pathname);
-}
-
-/* open a listening socket */
-static int socket_open_listen(struct sockaddr_in *my_addr)
-{
-    int server_fd, tmp;
-
-    server_fd = socket(AF_INET,SOCK_STREAM,0);
-    if (server_fd < 0) {
-        perror ("socket");
-        return -1;
-    }
-
-    tmp = 1;
-    if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof(tmp)))
-        av_log(NULL, AV_LOG_WARNING, "setsockopt SO_REUSEADDR failed\n");
-
-    my_addr->sin_family = AF_INET;
-    if (bind (server_fd, (struct sockaddr *) my_addr, sizeof (*my_addr)) < 0) {
-        char bindmsg[32];
-        snprintf(bindmsg, sizeof(bindmsg), "bind(port %d)",
-                 ntohs(my_addr->sin_port));
-        perror (bindmsg);
-        goto fail;
-    }
-
-    if (listen (server_fd, 5) < 0) {
-        perror ("listen");
-        goto fail;
-    }
-
-    if (ff_socket_nonblock(server_fd, 1) < 0)
-        av_log(NULL, AV_LOG_WARNING, "ff_socket_nonblock failed\n");
-
-    return server_fd;
-
-fail:
-    closesocket(server_fd);
-    return -1;
-}
-
-/* start all multicast streams */
-static void start_multicast(void)
-{
-    FFServerStream *stream;
-    char session_id[32];
-    HTTPContext *rtp_c;
-    struct sockaddr_in dest_addr = {0};
-    int default_port, stream_index;
-    unsigned int random0, random1;
-
-    default_port = 6000;
-    for(stream = config.first_stream; stream; stream = stream->next) {
-
-        if (!stream->is_multicast)
-            continue;
-
-        random0 = av_lfg_get(&random_state);
-        random1 = av_lfg_get(&random_state);
-
-        /* open the RTP connection */
-        snprintf(session_id, sizeof(session_id), "%08x%08x", random0, random1);
-
-        /* choose a port if none given */
-        if (stream->multicast_port == 0) {
-            stream->multicast_port = default_port;
-            default_port += 100;
-        }
-
-        dest_addr.sin_family = AF_INET;
-        dest_addr.sin_addr = stream->multicast_ip;
-        dest_addr.sin_port = htons(stream->multicast_port);
-
-        rtp_c = rtp_new_connection(&dest_addr, stream, session_id,
-                                   RTSP_LOWER_TRANSPORT_UDP_MULTICAST);
-        if (!rtp_c)
-            continue;
-
-        if (open_input_stream(rtp_c, "") < 0) {
-            http_log("Could not open input stream for stream '%s'\n",
-                     stream->filename);
-            continue;
-        }
-
-        /* open each RTP stream */
-        for(stream_index = 0; stream_index < stream->nb_streams;
-            stream_index++) {
-            dest_addr.sin_port = htons(stream->multicast_port +
-                                       2 * stream_index);
-            if (rtp_new_av_stream(rtp_c, stream_index, &dest_addr, NULL) >= 0)
-                continue;
-
-            http_log("Could not open output stream '%s/streamid=%d'\n",
-                     stream->filename, stream_index);
-            exit(1);
-        }
-
-        rtp_c->state = HTTPSTATE_SEND_DATA;
-    }
-}
-
-/* main loop of the HTTP server */
-static int http_server(void)
-{
-    int server_fd = 0, rtsp_server_fd = 0;
-    int ret, delay;
-    struct pollfd *poll_table, *poll_entry;
-    HTTPContext *c, *c_next;
-
-    poll_table = av_mallocz_array(config.nb_max_http_connections + 2,
-                                  sizeof(*poll_table));
-    if(!poll_table) {
-        http_log("Impossible to allocate a poll table handling %d "
-                 "connections.\n", config.nb_max_http_connections);
-        return -1;
-    }
-
-    if (config.http_addr.sin_port) {
-        server_fd = socket_open_listen(&config.http_addr);
-        if (server_fd < 0)
-            goto quit;
-    }
-
-    if (config.rtsp_addr.sin_port) {
-        rtsp_server_fd = socket_open_listen(&config.rtsp_addr);
-        if (rtsp_server_fd < 0) {
-            closesocket(server_fd);
-            goto quit;
-        }
-    }
-
-    if (!rtsp_server_fd && !server_fd) {
-        http_log("HTTP and RTSP disabled.\n");
-        goto quit;
-    }
-
-    http_log("FFserver started.\n");
-
-    start_children(config.first_feed);
-
-    start_multicast();
-
-    for(;;) {
-        poll_entry = poll_table;
-        if (server_fd) {
-            poll_entry->fd = server_fd;
-            poll_entry->events = POLLIN;
-            poll_entry++;
-        }
-        if (rtsp_server_fd) {
-            poll_entry->fd = rtsp_server_fd;
-            poll_entry->events = POLLIN;
-            poll_entry++;
-        }
-
-        /* wait for events on each HTTP handle */
-        c = first_http_ctx;
-        delay = 1000;
-        while (c) {
-            int fd;
-            fd = c->fd;
-            switch(c->state) {
-            case HTTPSTATE_SEND_HEADER:
-            case RTSPSTATE_SEND_REPLY:
-            case RTSPSTATE_SEND_PACKET:
-                c->poll_entry = poll_entry;
-                poll_entry->fd = fd;
-                poll_entry->events = POLLOUT;
-                poll_entry++;
-                break;
-            case HTTPSTATE_SEND_DATA_HEADER:
-            case HTTPSTATE_SEND_DATA:
-            case HTTPSTATE_SEND_DATA_TRAILER:
-                if (!c->is_packetized) {
-                    /* for TCP, we output as much as we can
-                     * (may need to put a limit) */
-                    c->poll_entry = poll_entry;
-                    poll_entry->fd = fd;
-                    poll_entry->events = POLLOUT;
-                    poll_entry++;
-                } else {
-                    /* when ffserver is doing the timing, we work by
-                     * looking at which packet needs to be sent every
-                     * 10 ms (one tick wait XXX: 10 ms assumed) */
-                    if (delay > 10)
-                        delay = 10;
-                }
-                break;
-            case HTTPSTATE_WAIT_REQUEST:
-            case HTTPSTATE_RECEIVE_DATA:
-            case HTTPSTATE_WAIT_FEED:
-            case RTSPSTATE_WAIT_REQUEST:
-                /* need to catch errors */
-                c->poll_entry = poll_entry;
-                poll_entry->fd = fd;
-                poll_entry->events = POLLIN;/* Maybe this will work */
-                poll_entry++;
-                break;
-            default:
-                c->poll_entry = NULL;
-                break;
-            }
-            c = c->next;
-        }
-
-        /* wait for an event on one connection. We poll at least every
-         * second to handle timeouts */
-        do {
-            ret = poll(poll_table, poll_entry - poll_table, delay);
-            if (ret < 0 && ff_neterrno() != AVERROR(EAGAIN) &&
-                ff_neterrno() != AVERROR(EINTR)) {
-                goto quit;
-            }
-        } while (ret < 0);
-
-        cur_time = av_gettime() / 1000;
-
-        if (need_to_start_children) {
-            need_to_start_children = 0;
-            start_children(config.first_feed);
-        }
-
-        /* now handle the events */
-        for(c = first_http_ctx; c; c = c_next) {
-            c_next = c->next;
-            if (handle_connection(c) < 0) {
-                log_connection(c);
-                /* close and free the connection */
-                close_connection(c);
-            }
-        }
-
-        poll_entry = poll_table;
-        if (server_fd) {
-            /* new HTTP connection request ? */
-            if (poll_entry->revents & POLLIN)
-                new_connection(server_fd, 0);
-            poll_entry++;
-        }
-        if (rtsp_server_fd) {
-            /* new RTSP connection request ? */
-            if (poll_entry->revents & POLLIN)
-                new_connection(rtsp_server_fd, 1);
-        }
-    }
-
-quit:
-    av_free(poll_table);
-    return -1;
-}
-
-/* start waiting for a new HTTP/RTSP request */
-static void start_wait_request(HTTPContext *c, int is_rtsp)
-{
-    c->buffer_ptr = c->buffer;
-    c->buffer_end = c->buffer + c->buffer_size - 1; /* leave room for '\0' */
-
-    c->state = is_rtsp ? RTSPSTATE_WAIT_REQUEST : HTTPSTATE_WAIT_REQUEST;
-    c->timeout = cur_time +
-                 (is_rtsp ? RTSP_REQUEST_TIMEOUT : HTTP_REQUEST_TIMEOUT);
-}
-
-static void http_send_too_busy_reply(int fd)
-{
-    char buffer[400];
-    int len = snprintf(buffer, sizeof(buffer),
-                       "HTTP/1.0 503 Server too busy\r\n"
-                       "Content-type: text/html\r\n"
-                       "\r\n"
-                       "<!DOCTYPE html>\n"
-                       "<html><head><title>Too busy</title></head><body>\r\n"
-                       "<p>The server is too busy to serve your request at "
-                       "this time.</p>\r\n"
-                       "<p>The number of current connections is %u, and this "
-                       "exceeds the limit of %u.</p>\r\n"
-                       "</body></html>\r\n",
-                       nb_connections, config.nb_max_connections);
-    av_assert0(len < sizeof(buffer));
-    if (send(fd, buffer, len, 0) < len)
-        av_log(NULL, AV_LOG_WARNING,
-               "Could not send too-busy reply, send() failed\n");
-}
-
-
-static void new_connection(int server_fd, int is_rtsp)
-{
-    struct sockaddr_in from_addr;
-    socklen_t len;
-    int fd;
-    HTTPContext *c = NULL;
-
-    len = sizeof(from_addr);
-    fd = accept(server_fd, (struct sockaddr *)&from_addr,
-                &len);
-    if (fd < 0) {
-        http_log("error during accept %s\n", strerror(errno));
-        return;
-    }
-    if (ff_socket_nonblock(fd, 1) < 0)
-        av_log(NULL, AV_LOG_WARNING, "ff_socket_nonblock failed\n");
-
-    if (nb_connections >= config.nb_max_connections) {
-        http_send_too_busy_reply(fd);
-        goto fail;
-    }
-
-    /* add a new connection */
-    c = av_mallocz(sizeof(HTTPContext));
-    if (!c)
-        goto fail;
-
-    c->fd = fd;
-    c->poll_entry = NULL;
-    c->from_addr = from_addr;
-    c->buffer_size = IOBUFFER_INIT_SIZE;
-    c->buffer = av_malloc(c->buffer_size);
-    if (!c->buffer)
-        goto fail;
-
-    c->next = first_http_ctx;
-    first_http_ctx = c;
-    nb_connections++;
-
-    start_wait_request(c, is_rtsp);
-
-    return;
-
- fail:
-    if (c) {
-        av_freep(&c->buffer);
-        av_free(c);
-    }
-    closesocket(fd);
-}
-
-static void close_connection(HTTPContext *c)
-{
-    HTTPContext **cp, *c1;
-    int i, nb_streams;
-    AVFormatContext *ctx;
-    AVStream *st;
-
-    /* remove connection from list */
-    cp = &first_http_ctx;
-    while (*cp) {
-        c1 = *cp;
-        if (c1 == c)
-            *cp = c->next;
-        else
-            cp = &c1->next;
-    }
-
-    /* remove references, if any (XXX: do it faster) */
-    for(c1 = first_http_ctx; c1; c1 = c1->next) {
-        if (c1->rtsp_c == c)
-            c1->rtsp_c = NULL;
-    }
-
-    /* remove connection associated resources */
-    if (c->fd >= 0)
-        closesocket(c->fd);
-    if (c->fmt_in) {
-        /* close each frame parser */
-        for(i=0;i<c->fmt_in->nb_streams;i++) {
-            st = c->fmt_in->streams[i];
-            if (st->codec->codec)
-                avcodec_close(st->codec);
-        }
-        avformat_close_input(&c->fmt_in);
-    }
-
-    /* free RTP output streams if any */
-    nb_streams = 0;
-    if (c->stream)
-        nb_streams = c->stream->nb_streams;
-
-    for(i=0;i<nb_streams;i++) {
-        ctx = c->rtp_ctx[i];
-        if (ctx) {
-            av_write_trailer(ctx);
-            av_dict_free(&ctx->metadata);
-            av_freep(&ctx->streams[0]);
-            av_freep(&ctx);
-        }
-        ffurl_close(c->rtp_handles[i]);
-    }
-
-    ctx = c->pfmt_ctx;
-
-    if (ctx) {
-        if (!c->last_packet_sent && c->state == HTTPSTATE_SEND_DATA_TRAILER) {
-            /* prepare header */
-            if (ctx->oformat && avio_open_dyn_buf(&ctx->pb) >= 0) {
-                av_write_trailer(ctx);
-                av_freep(&c->pb_buffer);
-                avio_close_dyn_buf(ctx->pb, &c->pb_buffer);
-            }
-        }
-        for(i=0; i<ctx->nb_streams; i++)
-            av_freep(&ctx->streams[i]);
-        av_freep(&ctx->streams);
-        av_freep(&ctx->priv_data);
-        }
-
-    if (c->stream && !c->post && c->stream->stream_type == STREAM_TYPE_LIVE)
-        current_bandwidth -= c->stream->bandwidth;
-
-    /* signal that there is no feed if we are the feeder socket */
-    if (c->state == HTTPSTATE_RECEIVE_DATA && c->stream) {
-        c->stream->feed_opened = 0;
-        close(c->feed_fd);
-    }
-
-    av_freep(&c->pb_buffer);
-    av_freep(&c->packet_buffer);
-    av_freep(&c->buffer);
-    av_free(c);
-    nb_connections--;
-}
-
-static int handle_connection(HTTPContext *c)
-{
-    int len, ret;
-    uint8_t *ptr;
-
-    switch(c->state) {
-    case HTTPSTATE_WAIT_REQUEST:
-    case RTSPSTATE_WAIT_REQUEST:
-        /* timeout ? */
-        if ((c->timeout - cur_time) < 0)
-            return -1;
-        if (c->poll_entry->revents & (POLLERR | POLLHUP))
-            return -1;
-
-        /* no need to read if no events */
-        if (!(c->poll_entry->revents & POLLIN))
-            return 0;
-        /* read the data */
-    read_loop:
-        if (!(len = recv(c->fd, c->buffer_ptr, 1, 0)))
-            return -1;
-
-        if (len < 0) {
-            if (ff_neterrno() != AVERROR(EAGAIN) &&
-                ff_neterrno() != AVERROR(EINTR))
-                return -1;
-            break;
-        }
-        /* search for end of request. */
-        c->buffer_ptr += len;
-        ptr = c->buffer_ptr;
-        if ((ptr >= c->buffer + 2 && !memcmp(ptr-2, "\n\n", 2)) ||
-            (ptr >= c->buffer + 4 && !memcmp(ptr-4, "\r\n\r\n", 4))) {
-            /* request found : parse it and reply */
-            if (c->state == HTTPSTATE_WAIT_REQUEST)
-                ret = http_parse_request(c);
-            else
-                ret = rtsp_parse_request(c);
-
-            if (ret < 0)
-                return -1;
-        } else if (ptr >= c->buffer_end) {
-            /* request too long: cannot do anything */
-            return -1;
-        } else goto read_loop;
-
-        break;
-
-    case HTTPSTATE_SEND_HEADER:
-        if (c->poll_entry->revents & (POLLERR | POLLHUP))
-            return -1;
-
-        /* no need to write if no events */
-        if (!(c->poll_entry->revents & POLLOUT))
-            return 0;
-        len = send(c->fd, c->buffer_ptr, c->buffer_end - c->buffer_ptr, 0);
-        if (len < 0) {
-            if (ff_neterrno() != AVERROR(EAGAIN) &&
-                ff_neterrno() != AVERROR(EINTR)) {
-                goto close_connection;
-            }
-            break;
-        }
-        c->buffer_ptr += len;
-        if (c->stream)
-            c->stream->bytes_served += len;
-        c->data_count += len;
-        if (c->buffer_ptr >= c->buffer_end) {
-            av_freep(&c->pb_buffer);
-            /* if error, exit */
-            if (c->http_error)
-                return -1;
-            /* all the buffer was sent : synchronize to the incoming
-             * stream */
-            c->state = HTTPSTATE_SEND_DATA_HEADER;
-            c->buffer_ptr = c->buffer_end = c->buffer;
-        }
-        break;
-
-    case HTTPSTATE_SEND_DATA:
-    case HTTPSTATE_SEND_DATA_HEADER:
-    case HTTPSTATE_SEND_DATA_TRAILER:
-        /* for packetized output, we consider we can always write (the
-         * input streams set the speed). It may be better to verify
-         * that we do not rely too much on the kernel queues */
-        if (!c->is_packetized) {
-            if (c->poll_entry->revents & (POLLERR | POLLHUP))
-                return -1;
-
-            /* no need to read if no events */
-            if (!(c->poll_entry->revents & POLLOUT))
-                return 0;
-        }
-        if (http_send_data(c) < 0)
-            return -1;
-        /* close connection if trailer sent */
-        if (c->state == HTTPSTATE_SEND_DATA_TRAILER)
-            return -1;
-        /* Check if it is a single jpeg frame 123 */
-        if (c->stream->single_frame && c->data_count > c->cur_frame_bytes && c->cur_frame_bytes > 0) {
-            close_connection(c);
-        }
-        break;
-    case HTTPSTATE_RECEIVE_DATA:
-        /* no need to read if no events */
-        if (c->poll_entry->revents & (POLLERR | POLLHUP))
-            return -1;
-        if (!(c->poll_entry->revents & POLLIN))
-            return 0;
-        if (http_receive_data(c) < 0)
-            return -1;
-        break;
-    case HTTPSTATE_WAIT_FEED:
-        /* no need to read if no events */
-        if (c->poll_entry->revents & (POLLIN | POLLERR | POLLHUP))
-            return -1;
-
-        /* nothing to do, we'll be waken up by incoming feed packets */
-        break;
-
-    case RTSPSTATE_SEND_REPLY:
-        if (c->poll_entry->revents & (POLLERR | POLLHUP))
-            goto close_connection;
-        /* no need to write if no events */
-        if (!(c->poll_entry->revents & POLLOUT))
-            return 0;
-        len = send(c->fd, c->buffer_ptr, c->buffer_end - c->buffer_ptr, 0);
-        if (len < 0) {
-            if (ff_neterrno() != AVERROR(EAGAIN) &&
-                ff_neterrno() != AVERROR(EINTR)) {
-                goto close_connection;
-            }
-            break;
-        }
-        c->buffer_ptr += len;
-        c->data_count += len;
-        if (c->buffer_ptr >= c->buffer_end) {
-            /* all the buffer was sent : wait for a new request */
-            av_freep(&c->pb_buffer);
-            start_wait_request(c, 1);
-        }
-        break;
-    case RTSPSTATE_SEND_PACKET:
-        if (c->poll_entry->revents & (POLLERR | POLLHUP)) {
-            av_freep(&c->packet_buffer);
-            return -1;
-        }
-        /* no need to write if no events */
-        if (!(c->poll_entry->revents & POLLOUT))
-            return 0;
-        len = send(c->fd, c->packet_buffer_ptr,
-                    c->packet_buffer_end - c->packet_buffer_ptr, 0);
-        if (len < 0) {
-            if (ff_neterrno() != AVERROR(EAGAIN) &&
-                ff_neterrno() != AVERROR(EINTR)) {
-                /* error : close connection */
-                av_freep(&c->packet_buffer);
-                return -1;
-            }
-            break;
-        }
-        c->packet_buffer_ptr += len;
-        if (c->packet_buffer_ptr >= c->packet_buffer_end) {
-            /* all the buffer was sent : wait for a new request */
-            av_freep(&c->packet_buffer);
-            c->state = RTSPSTATE_WAIT_REQUEST;
-        }
-        break;
-    case HTTPSTATE_READY:
-        /* nothing to do */
-        break;
-    default:
-        return -1;
-    }
-    return 0;
-
-close_connection:
-    av_freep(&c->pb_buffer);
-    return -1;
-}
-
-static int extract_rates(char *rates, int ratelen, const char *request)
-{
-    const char *p;
-
-    for (p = request; *p && *p != '\r' && *p != '\n'; ) {
-        if (av_strncasecmp(p, "Pragma:", 7) == 0) {
-            const char *q = p + 7;
-
-            while (*q && *q != '\n' && av_isspace(*q))
-                q++;
-
-            if (av_strncasecmp(q, "stream-switch-entry=", 20) == 0) {
-                int stream_no;
-                int rate_no;
-
-                q += 20;
-
-                memset(rates, 0xff, ratelen);
-
-                while (1) {
-                    while (*q && *q != '\n' && *q != ':')
-                        q++;
-
-                    if (sscanf(q, ":%d:%d", &stream_no, &rate_no) != 2)
-                        break;
-
-                    stream_no--;
-                    if (stream_no < ratelen && stream_no >= 0)
-                        rates[stream_no] = rate_no;
-
-                    while (*q && *q != '\n' && !av_isspace(*q))
-                        q++;
-                }
-
-                return 1;
-            }
-        }
-        p = strchr(p, '\n');
-        if (!p)
-            break;
-
-        p++;
-    }
-
-    return 0;
-}
-
-static int find_stream_in_feed(FFServerStream *feed, AVCodecParameters *codec,
-                               int bit_rate)
-{
-    int i;
-    int best_bitrate = 100000000;
-    int best = -1;
-
-    for (i = 0; i < feed->nb_streams; i++) {
-        AVCodecParameters *feed_codec = feed->streams[i]->codecpar;
-
-        if (feed_codec->codec_id != codec->codec_id ||
-            feed_codec->sample_rate != codec->sample_rate ||
-            feed_codec->width != codec->width ||
-            feed_codec->height != codec->height)
-            continue;
-
-        /* Potential stream */
-
-        /* We want the fastest stream less than bit_rate, or the slowest
-         * faster than bit_rate
-         */
-
-        if (feed_codec->bit_rate <= bit_rate) {
-            if (best_bitrate > bit_rate ||
-                feed_codec->bit_rate > best_bitrate) {
-                best_bitrate = feed_codec->bit_rate;
-                best = i;
-            }
-            continue;
-        }
-        if (feed_codec->bit_rate < best_bitrate) {
-            best_bitrate = feed_codec->bit_rate;
-            best = i;
-        }
-    }
-    return best;
-}
-
-static int modify_current_stream(HTTPContext *c, char *rates)
-{
-    int i;
-    FFServerStream *req = c->stream;
-    int action_required = 0;
-
-    /* Not much we can do for a feed */
-    if (!req->feed)
-        return 0;
-
-    for (i = 0; i < req->nb_streams; i++) {
-        AVCodecParameters *codec = req->streams[i]->codecpar;
-
-        switch(rates[i]) {
-            case 0:
-                c->switch_feed_streams[i] = req->feed_streams[i];
-                break;
-            case 1:
-                c->switch_feed_streams[i] = find_stream_in_feed(req->feed, codec, codec->bit_rate / 2);
-                break;
-            case 2:
-                /* Wants off or slow */
-                c->switch_feed_streams[i] = find_stream_in_feed(req->feed, codec, codec->bit_rate / 4);
-#ifdef WANTS_OFF
-                /* This doesn't work well when it turns off the only stream! */
-                c->switch_feed_streams[i] = -2;
-                c->feed_streams[i] = -2;
-#endif
-                break;
-        }
-
-        if (c->switch_feed_streams[i] >= 0 &&
-            c->switch_feed_streams[i] != c->feed_streams[i]) {
-            action_required = 1;
-        }
-    }
-
-    return action_required;
-}
-
-static void get_word(char *buf, int buf_size, const char **pp)
-{
-    const char *p;
-    char *q;
-
-#define SPACE_CHARS " \t\r\n"
-
-    p = *pp;
-    p += strspn(p, SPACE_CHARS);
-    q = buf;
-    while (!av_isspace(*p) && *p != '\0') {
-        if ((q - buf) < buf_size - 1)
-            *q++ = *p;
-        p++;
-    }
-    if (buf_size > 0)
-        *q = '\0';
-    *pp = p;
-}
-
-static FFServerIPAddressACL* parse_dynamic_acl(FFServerStream *stream,
-                                               HTTPContext *c)
-{
-    FILE* f;
-    char line[1024];
-    char  cmd[1024];
-    FFServerIPAddressACL *acl = NULL;
-    int line_num = 0;
-    const char *p;
-
-    f = fopen(stream->dynamic_acl, "r");
-    if (!f) {
-        perror(stream->dynamic_acl);
-        return NULL;
-    }
-
-    acl = av_mallocz(sizeof(FFServerIPAddressACL));
-    if (!acl) {
-        fclose(f);
-        return NULL;
-    }
-
-    /* Build ACL */
-    while (fgets(line, sizeof(line), f)) {
-        line_num++;
-        p = line;
-        while (av_isspace(*p))
-            p++;
-        if (*p == '\0' || *p == '#')
-            continue;
-        ffserver_get_arg(cmd, sizeof(cmd), &p);
-
-        if (!av_strcasecmp(cmd, "ACL"))
-            ffserver_parse_acl_row(NULL, NULL, acl, p, stream->dynamic_acl,
-                                   line_num);
-    }
-    fclose(f);
-    return acl;
-}
-
-
-static void free_acl_list(FFServerIPAddressACL *in_acl)
-{
-    FFServerIPAddressACL *pacl, *pacl2;
-
-    pacl = in_acl;
-    while(pacl) {
-        pacl2 = pacl;
-        pacl = pacl->next;
-        av_freep(pacl2);
-    }
-}
-
-static int validate_acl_list(FFServerIPAddressACL *in_acl, HTTPContext *c)
-{
-    enum FFServerIPAddressAction last_action = IP_DENY;
-    FFServerIPAddressACL *acl;
-    struct in_addr *src = &c->from_addr.sin_addr;
-    unsigned long src_addr = src->s_addr;
-
-    for (acl = in_acl; acl; acl = acl->next) {
-        if (src_addr >= acl->first.s_addr && src_addr <= acl->last.s_addr)
-            return (acl->action == IP_ALLOW) ? 1 : 0;
-        last_action = acl->action;
-    }
-
-    /* Nothing matched, so return not the last action */
-    return (last_action == IP_DENY) ? 1 : 0;
-}
-
-static int validate_acl(FFServerStream *stream, HTTPContext *c)
-{
-    int ret = 0;
-    FFServerIPAddressACL *acl;
-
-    /* if stream->acl is null validate_acl_list will return 1 */
-    ret = validate_acl_list(stream->acl, c);
-
-    if (stream->dynamic_acl[0]) {
-        acl = parse_dynamic_acl(stream, c);
-        ret = validate_acl_list(acl, c);
-        free_acl_list(acl);
-    }
-
-    return ret;
-}
-
-/**
- * compute the real filename of a file by matching it without its
- * extensions to all the stream's filenames
- */
-static void compute_real_filename(char *filename, int max_size)
-{
-    char file1[1024];
-    char file2[1024];
-    char *p;
-    FFServerStream *stream;
-
-    av_strlcpy(file1, filename, sizeof(file1));
-    p = strrchr(file1, '.');
-    if (p)
-        *p = '\0';
-    for(stream = config.first_stream; stream; stream = stream->next) {
-        av_strlcpy(file2, stream->filename, sizeof(file2));
-        p = strrchr(file2, '.');
-        if (p)
-            *p = '\0';
-        if (!strcmp(file1, file2)) {
-            av_strlcpy(filename, stream->filename, max_size);
-            break;
-        }
-    }
-}
-
-enum RedirType {
-    REDIR_NONE,
-    REDIR_ASX,
-    REDIR_RAM,
-    REDIR_ASF,
-    REDIR_RTSP,
-    REDIR_SDP,
-};
-
-/* parse HTTP request and prepare header */
-static int http_parse_request(HTTPContext *c)
-{
-    const char *p;
-    char *p1;
-    enum RedirType redir_type;
-    char cmd[32];
-    char info[1024], filename[1024];
-    char url[1024], *q;
-    char protocol[32];
-    char msg[1024];
-    char *encoded_msg = NULL;
-    const char *mime_type;
-    FFServerStream *stream;
-    int i;
-    char ratebuf[32];
-    const char *useragent = 0;
-
-    p = c->buffer;
-    get_word(cmd, sizeof(cmd), &p);
-    av_strlcpy(c->method, cmd, sizeof(c->method));
-
-    if (!strcmp(cmd, "GET"))
-        c->post = 0;
-    else if (!strcmp(cmd, "POST"))
-        c->post = 1;
-    else
-        return -1;
-
-    get_word(url, sizeof(url), &p);
-    av_strlcpy(c->url, url, sizeof(c->url));
-
-    get_word(protocol, sizeof(protocol), (const char **)&p);
-    if (strcmp(protocol, "HTTP/1.0") && strcmp(protocol, "HTTP/1.1"))
-        return -1;
-
-    av_strlcpy(c->protocol, protocol, sizeof(c->protocol));
-
-    if (config.debug)
-        http_log("%s - - New connection: %s %s\n",
-                 inet_ntoa(c->from_addr.sin_addr), cmd, url);
-
-    /* find the filename and the optional info string in the request */
-    p1 = strchr(url, '?');
-    if (p1) {
-        av_strlcpy(info, p1, sizeof(info));
-        *p1 = '\0';
-    } else
-        info[0] = '\0';
-
-    av_strlcpy(filename, url + ((*url == '/') ? 1 : 0), sizeof(filename)-1);
-
-    for (p = c->buffer; *p && *p != '\r' && *p != '\n'; ) {
-        if (av_strncasecmp(p, "User-Agent:", 11) == 0) {
-            useragent = p + 11;
-            if (*useragent && *useragent != '\n' && av_isspace(*useragent))
-                useragent++;
-            break;
-        }
-        p = strchr(p, '\n');
-        if (!p)
-            break;
-
-        p++;
-    }
-
-    redir_type = REDIR_NONE;
-    if (av_match_ext(filename, "asx")) {
-        redir_type = REDIR_ASX;
-        filename[strlen(filename)-1] = 'f';
-    } else if (av_match_ext(filename, "asf") &&
-        (!useragent || av_strncasecmp(useragent, "NSPlayer", 8))) {
-        /* if this isn't WMP or lookalike, return the redirector file */
-        redir_type = REDIR_ASF;
-    } else if (av_match_ext(filename, "rpm,ram")) {
-        redir_type = REDIR_RAM;
-        strcpy(filename + strlen(filename)-2, "m");
-    } else if (av_match_ext(filename, "rtsp")) {
-        redir_type = REDIR_RTSP;
-        compute_real_filename(filename, sizeof(filename) - 1);
-    } else if (av_match_ext(filename, "sdp")) {
-        redir_type = REDIR_SDP;
-        compute_real_filename(filename, sizeof(filename) - 1);
-    }
-
-    /* "redirect" request to index.html */
-    if (!strlen(filename))
-        av_strlcpy(filename, "index.html", sizeof(filename) - 1);
-
-    stream = config.first_stream;
-    while (stream) {
-        if (!strcmp(stream->filename, filename) && validate_acl(stream, c))
-            break;
-        stream = stream->next;
-    }
-    if (!stream) {
-        snprintf(msg, sizeof(msg), "File '%s' not found", url);
-        http_log("File '%s' not found\n", url);
-        goto send_error;
-    }
-
-    c->stream = stream;
-    memcpy(c->feed_streams, stream->feed_streams, sizeof(c->feed_streams));
-    memset(c->switch_feed_streams, -1, sizeof(c->switch_feed_streams));
-
-    if (stream->stream_type == STREAM_TYPE_REDIRECT) {
-        c->http_error = 301;
-        q = c->buffer;
-        snprintf(q, c->buffer_size,
-                      "HTTP/1.0 301 Moved\r\n"
-                      "Location: %s\r\n"
-                      "Content-type: text/html\r\n"
-                      "\r\n"
-                      "<!DOCTYPE html>\n"
-                      "<html><head><title>Moved</title></head><body>\r\n"
-                      "You should be <a href=\"%s\">redirected</a>.\r\n"
-                      "</body></html>\r\n",
-                 stream->feed_filename, stream->feed_filename);
-        q += strlen(q);
-        /* prepare output buffer */
-        c->buffer_ptr = c->buffer;
-        c->buffer_end = q;
-        c->state = HTTPSTATE_SEND_HEADER;
-        return 0;
-    }
-
-    /* If this is WMP, get the rate information */
-    if (extract_rates(ratebuf, sizeof(ratebuf), c->buffer)) {
-        if (modify_current_stream(c, ratebuf)) {
-            for (i = 0; i < FF_ARRAY_ELEMS(c->feed_streams); i++) {
-                if (c->switch_feed_streams[i] >= 0)
-                    c->switch_feed_streams[i] = -1;
-            }
-        }
-    }
-
-    if (c->post == 0 && stream->stream_type == STREAM_TYPE_LIVE)
-        current_bandwidth += stream->bandwidth;
-
-    /* If already streaming this feed, do not let another feeder start */
-    if (stream->feed_opened) {
-        snprintf(msg, sizeof(msg), "This feed is already being received.");
-        http_log("Feed '%s' already being received\n", stream->feed_filename);
-        goto send_error;
-    }
-
-    if (c->post == 0 && config.max_bandwidth < current_bandwidth) {
-        c->http_error = 503;
-        q = c->buffer;
-        snprintf(q, c->buffer_size,
-                      "HTTP/1.0 503 Server too busy\r\n"
-                      "Content-type: text/html\r\n"
-                      "\r\n"
-                      "<!DOCTYPE html>\n"
-                      "<html><head><title>Too busy</title></head><body>\r\n"
-                      "<p>The server is too busy to serve your request at "
-                      "this time.</p>\r\n"
-                      "<p>The bandwidth being served (including your stream) "
-                      "is %"PRIu64"kbit/s, and this exceeds the limit of "
-                      "%"PRIu64"kbit/s.</p>\r\n"
-                      "</body></html>\r\n",
-                 current_bandwidth, config.max_bandwidth);
-        q += strlen(q);
-        /* prepare output buffer */
-        c->buffer_ptr = c->buffer;
-        c->buffer_end = q;
-        c->state = HTTPSTATE_SEND_HEADER;
-        return 0;
-    }
-
-    if (redir_type != REDIR_NONE) {
-        const char *hostinfo = 0;
-
-        for (p = c->buffer; *p && *p != '\r' && *p != '\n'; ) {
-            if (av_strncasecmp(p, "Host:", 5) == 0) {
-                hostinfo = p + 5;
-                break;
-            }
-            p = strchr(p, '\n');
-            if (!p)
-                break;
-
-            p++;
-        }
-
-        if (hostinfo) {
-            char *eoh;
-            char hostbuf[260];
-
-            while (av_isspace(*hostinfo))
-                hostinfo++;
-
-            eoh = strchr(hostinfo, '\n');
-            if (eoh) {
-                if (eoh[-1] == '\r')
-                    eoh--;
-
-                if (eoh - hostinfo < sizeof(hostbuf) - 1) {
-                    memcpy(hostbuf, hostinfo, eoh - hostinfo);
-                    hostbuf[eoh - hostinfo] = 0;
-
-                    c->http_error = 200;
-                    q = c->buffer;
-                    switch(redir_type) {
-                    case REDIR_ASX:
-                        snprintf(q, c->buffer_size,
-                                      "HTTP/1.0 200 ASX Follows\r\n"
-                                      "Content-type: video/x-ms-asf\r\n"
-                                      "\r\n"
-                                      "<ASX Version=\"3\">\r\n"
-                                      //"<!-- Autogenerated by ffserver -->\r\n"
-                                      "<ENTRY><REF HREF=\"http://%s/%s%s\"/></ENTRY>\r\n"
-                                      "</ASX>\r\n", hostbuf, filename, info);
-                        q += strlen(q);
-                        break;
-                    case REDIR_RAM:
-                        snprintf(q, c->buffer_size,
-                                      "HTTP/1.0 200 RAM Follows\r\n"
-                                      "Content-type: audio/x-pn-realaudio\r\n"
-                                      "\r\n"
-                                      "# Autogenerated by ffserver\r\n"
-                                      "http://%s/%s%s\r\n", hostbuf, filename, info);
-                        q += strlen(q);
-                        break;
-                    case REDIR_ASF:
-                        snprintf(q, c->buffer_size,
-                                      "HTTP/1.0 200 ASF Redirect follows\r\n"
-                                      "Content-type: video/x-ms-asf\r\n"
-                                      "\r\n"
-                                      "[Reference]\r\n"
-                                      "Ref1=http://%s/%s%s\r\n", hostbuf, filename, info);
-                        q += strlen(q);
-                        break;
-                    case REDIR_RTSP:
-                        {
-                            char hostname[256], *p;
-                            /* extract only hostname */
-                            av_strlcpy(hostname, hostbuf, sizeof(hostname));
-                            p = strrchr(hostname, ':');
-                            if (p)
-                                *p = '\0';
-                            snprintf(q, c->buffer_size,
-                                          "HTTP/1.0 200 RTSP Redirect follows\r\n"
-                                          /* XXX: incorrect MIME type ? */
-                                          "Content-type: application/x-rtsp\r\n"
-                                          "\r\n"
-                                          "rtsp://%s:%d/%s\r\n", hostname, ntohs(config.rtsp_addr.sin_port), filename);
-                            q += strlen(q);
-                        }
-                        break;
-                    case REDIR_SDP:
-                        {
-                            uint8_t *sdp_data;
-                            int sdp_data_size;
-                            socklen_t len;
-                            struct sockaddr_in my_addr;
-
-                            snprintf(q, c->buffer_size,
-                                          "HTTP/1.0 200 OK\r\n"
-                                          "Content-type: application/sdp\r\n"
-                                          "\r\n");
-                            q += strlen(q);
-
-                            len = sizeof(my_addr);
-
-                            /* XXX: Should probably fail? */
-                            if (getsockname(c->fd, (struct sockaddr *)&my_addr, &len))
-                                http_log("getsockname() failed\n");
-
-                            /* XXX: should use a dynamic buffer */
-                            sdp_data_size = prepare_sdp_description(stream,
-                                                                    &sdp_data,
-                                                                    my_addr.sin_addr);
-                            if (sdp_data_size > 0) {
-                                memcpy(q, sdp_data, sdp_data_size);
-                                q += sdp_data_size;
-                                *q = '\0';
-                                av_freep(&sdp_data);
-                            }
-                        }
-                        break;
-                    default:
-                        abort();
-                        break;
-                    }
-
-                    /* prepare output buffer */
-                    c->buffer_ptr = c->buffer;
-                    c->buffer_end = q;
-                    c->state = HTTPSTATE_SEND_HEADER;
-                    return 0;
-                }
-            }
-        }
-
-        snprintf(msg, sizeof(msg), "ASX/RAM file not handled");
-        goto send_error;
-    }
-
-    stream->conns_served++;
-
-    /* XXX: add there authenticate and IP match */
-
-    if (c->post) {
-        /* if post, it means a feed is being sent */
-        if (!stream->is_feed) {
-            /* However it might be a status report from WMP! Let us log the
-             * data as it might come handy one day. */
-            const char *logline = 0;
-            int client_id = 0;
-
-            for (p = c->buffer; *p && *p != '\r' && *p != '\n'; ) {
-                if (av_strncasecmp(p, "Pragma: log-line=", 17) == 0) {
-                    logline = p;
-                    break;
-                }
-                if (av_strncasecmp(p, "Pragma: client-id=", 18) == 0)
-                    client_id = strtol(p + 18, 0, 10);
-                p = strchr(p, '\n');
-                if (!p)
-                    break;
-
-                p++;
-            }
-
-            if (logline) {
-                char *eol = strchr(logline, '\n');
-
-                logline += 17;
-
-                if (eol) {
-                    if (eol[-1] == '\r')
-                        eol--;
-                    http_log("%.*s\n", (int) (eol - logline), logline);
-                    c->suppress_log = 1;
-                }
-            }
-
-#ifdef DEBUG
-            http_log("\nGot request:\n%s\n", c->buffer);
-#endif
-
-            if (client_id && extract_rates(ratebuf, sizeof(ratebuf), c->buffer)) {
-                HTTPContext *wmpc;
-
-                /* Now we have to find the client_id */
-                for (wmpc = first_http_ctx; wmpc; wmpc = wmpc->next) {
-                    if (wmpc->wmp_client_id == client_id)
-                        break;
-                }
-
-                if (wmpc && modify_current_stream(wmpc, ratebuf))
-                    wmpc->switch_pending = 1;
-            }
-
-            snprintf(msg, sizeof(msg), "POST command not handled");
-            c->stream = 0;
-            goto send_error;
-        }
-        if (http_start_receive_data(c) < 0) {
-            snprintf(msg, sizeof(msg), "could not open feed");
-            goto send_error;
-        }
-        c->http_error = 0;
-        c->state = HTTPSTATE_RECEIVE_DATA;
-        return 0;
-    }
-
-#ifdef DEBUG
-    if (strcmp(stream->filename + strlen(stream->filename) - 4, ".asf") == 0)
-        http_log("\nGot request:\n%s\n", c->buffer);
-#endif
-
-    if (c->stream->stream_type == STREAM_TYPE_STATUS)
-        goto send_status;
-
-    /* open input stream */
-    if (open_input_stream(c, info) < 0) {
-        snprintf(msg, sizeof(msg), "Input stream corresponding to '%s' not found", url);
-        goto send_error;
-    }
-
-    /* prepare HTTP header */
-    c->buffer[0] = 0;
-    av_strlcatf(c->buffer, c->buffer_size, "HTTP/1.0 200 OK\r\n");
-    mime_type = c->stream->fmt->mime_type;
-    if (!mime_type)
-        mime_type = "application/x-octet-stream";
-    av_strlcatf(c->buffer, c->buffer_size, "Pragma: no-cache\r\n");
-
-    /* for asf, we need extra headers */
-    if (!strcmp(c->stream->fmt->name,"asf_stream")) {
-        /* Need to allocate a client id */
-
-        c->wmp_client_id = av_lfg_get(&random_state);
-
-        av_strlcatf(c->buffer, c->buffer_size, "Server: Cougar 4.1.0.3923\r\nCache-Control: no-cache\r\nPragma: client-id=%d\r\nPragma: features=\"broadcast\"\r\n", c->wmp_client_id);
-    }
-    av_strlcatf(c->buffer, c->buffer_size, "Content-Type: %s\r\n", mime_type);
-    av_strlcatf(c->buffer, c->buffer_size, "\r\n");
-    q = c->buffer + strlen(c->buffer);
-
-    /* prepare output buffer */
-    c->http_error = 0;
-    c->buffer_ptr = c->buffer;
-    c->buffer_end = q;
-    c->state = HTTPSTATE_SEND_HEADER;
-    return 0;
- send_error:
-    c->http_error = 404;
-    q = c->buffer;
-    if (!htmlencode(msg, &encoded_msg)) {
-        http_log("Could not encode filename '%s' as HTML\n", msg);
-    }
-    snprintf(q, c->buffer_size,
-                  "HTTP/1.0 404 Not Found\r\n"
-                  "Content-type: text/html\r\n"
-                  "\r\n"
-                  "<!DOCTYPE html>\n"
-                  "<html>\n"
-                  "<head>\n"
-                  "<meta charset=\"UTF-8\">\n"
-                  "<title>404 Not Found</title>\n"
-                  "</head>\n"
-                  "<body>%s</body>\n"
-                  "</html>\n", encoded_msg? encoded_msg : "File not found");
-    q += strlen(q);
-    /* prepare output buffer */
-    c->buffer_ptr = c->buffer;
-    c->buffer_end = q;
-    c->state = HTTPSTATE_SEND_HEADER;
-    av_freep(&encoded_msg);
-    return 0;
- send_status:
-    compute_status(c);
-    /* horrible: we use this value to avoid
-     * going to the send data state */
-    c->http_error = 200;
-    c->state = HTTPSTATE_SEND_HEADER;
-    return 0;
-}
-
-static void fmt_bytecount(AVIOContext *pb, int64_t count)
-{
-    static const char suffix[] = " kMGTP";
-    const char *s;
-
-    for (s = suffix; count >= 100000 && s[1]; count /= 1000, s++);
-
-    avio_printf(pb, "%"PRId64"%c", count, *s);
-}
-
-static inline void print_stream_params(AVIOContext *pb, FFServerStream *stream)
-{
-    int i, stream_no;
-    const char *type = "unknown";
-    char parameters[64];
-    LayeredAVStream *st;
-    AVCodec *codec;
-
-    stream_no = stream->nb_streams;
-
-    avio_printf(pb, "<table><tr><th>Stream<th>"
-                    "type<th>kbit/s<th>codec<th>"
-                    "Parameters\n");
-
-    for (i = 0; i < stream_no; i++) {
-        st = stream->streams[i];
-        codec = avcodec_find_encoder(st->codecpar->codec_id);
-
-        parameters[0] = 0;
-
-        switch(st->codecpar->codec_type) {
-        case AVMEDIA_TYPE_AUDIO:
-            type = "audio";
-            snprintf(parameters, sizeof(parameters), "%d channel(s), %d Hz",
-                     st->codecpar->channels, st->codecpar->sample_rate);
-            break;
-        case AVMEDIA_TYPE_VIDEO:
-            type = "video";
-            snprintf(parameters, sizeof(parameters),
-                     "%dx%d, q=%d-%d, fps=%d", st->codecpar->width,
-                     st->codecpar->height, st->codec->qmin, st->codec->qmax,
-                     st->time_base.den / st->time_base.num);
-            break;
-        default:
-            abort();
-        }
-
-        avio_printf(pb, "<tr><td>%d<td>%s<td>%"PRId64
-                        "<td>%s<td>%s\n",
-                    i, type, st->codecpar->bit_rate/1000,
-                    codec ? codec->name : "", parameters);
-     }
-
-     avio_printf(pb, "</table>\n");
-}
-
-static void clean_html(char *clean, int clean_len, char *dirty)
-{
-    int i, o;
-
-    for (o = i = 0; o+10 < clean_len && dirty[i];) {
-        int len = strspn(dirty+i, "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$-_.+!*(),?/ :;%");
-        if (len) {
-            if (o + len >= clean_len)
-                break;
-            memcpy(clean + o, dirty + i, len);
-            i += len;
-            o += len;
-        } else {
-            int c = dirty[i++];
-            switch (c) {
-            case  '&': av_strlcat(clean+o, "&amp;"  , clean_len - o); break;
-            case  '<': av_strlcat(clean+o, "&lt;"   , clean_len - o); break;
-            case  '>': av_strlcat(clean+o, "&gt;"   , clean_len - o); break;
-            case '\'': av_strlcat(clean+o, "&apos;" , clean_len - o); break;
-            case '\"': av_strlcat(clean+o, "&quot;" , clean_len - o); break;
-            default:   av_strlcat(clean+o, "&#9785;", clean_len - o); break;
-            }
-            o += strlen(clean+o);
-        }
-    }
-    clean[o] = 0;
-}
-
-static void compute_status(HTTPContext *c)
-{
-    HTTPContext *c1;
-    FFServerStream *stream;
-    char *p;
-    time_t ti;
-    int i, len;
-    AVIOContext *pb;
-
-    if (avio_open_dyn_buf(&pb) < 0) {
-        /* XXX: return an error ? */
-        c->buffer_ptr = c->buffer;
-        c->buffer_end = c->buffer;
-        return;
-    }
-
-    avio_printf(pb, "HTTP/1.0 200 OK\r\n");
-    avio_printf(pb, "Content-type: text/html\r\n");
-    avio_printf(pb, "Pragma: no-cache\r\n");
-    avio_printf(pb, "\r\n");
-
-    avio_printf(pb, "<!DOCTYPE html>\n");
-    avio_printf(pb, "<html><head><title>%s Status</title>\n", program_name);
-    if (c->stream->feed_filename[0])
-        avio_printf(pb, "<link rel=\"shortcut icon\" href=\"%s\">\n",
-                    c->stream->feed_filename);
-    avio_printf(pb, "</head>\n<body>");
-    avio_printf(pb, "<h1>%s Status</h1>\n", program_name);
-    /* format status */
-    avio_printf(pb, "<h2>Available Streams</h2>\n");
-    avio_printf(pb, "<table>\n");
-    avio_printf(pb, "<tr><th>Path<th>Served<br>Conns<th><br>bytes<th>Format<th>Bit rate<br>kbit/s<th>Video<br>kbit/s<th><br>Codec<th>Audio<br>kbit/s<th><br>Codec<th>Feed\n");
-    stream = config.first_stream;
-    while (stream) {
-        char sfilename[1024];
-        char *eosf;
-
-        if (stream->feed == stream) {
-            stream = stream->next;
-            continue;
-        }
-
-        av_strlcpy(sfilename, stream->filename, sizeof(sfilename) - 10);
-        eosf = sfilename + strlen(sfilename);
-        if (eosf - sfilename >= 4) {
-            if (strcmp(eosf - 4, ".asf") == 0)
-                strcpy(eosf - 4, ".asx");
-            else if (strcmp(eosf - 3, ".rm") == 0)
-                strcpy(eosf - 3, ".ram");
-            else if (stream->fmt && !strcmp(stream->fmt->name, "rtp")) {
-                /* generate a sample RTSP director if
-                 * unicast. Generate an SDP redirector if
-                 * multicast */
-                eosf = strrchr(sfilename, '.');
-                if (!eosf)
-                    eosf = sfilename + strlen(sfilename);
-                if (stream->is_multicast)
-                    strcpy(eosf, ".sdp");
-                else
-                    strcpy(eosf, ".rtsp");
-            }
-        }
-
-        avio_printf(pb, "<tr><td><a href=\"/%s\">%s</a> ",
-                    sfilename, stream->filename);
-        avio_printf(pb, "<td> %d <td> ",
-                    stream->conns_served);
-        // TODO: Investigate if we can make http bitexact so it always produces the same count of bytes
-        if (!config.bitexact)
-            fmt_bytecount(pb, stream->bytes_served);
-
-        switch(stream->stream_type) {
-        case STREAM_TYPE_LIVE: {
-            int audio_bit_rate = 0;
-            int video_bit_rate = 0;
-            const char *audio_codec_name = "";
-            const char *video_codec_name = "";
-            const char *audio_codec_name_extra = "";
-            const char *video_codec_name_extra = "";
-
-            for(i=0;i<stream->nb_streams;i++) {
-                LayeredAVStream *st = stream->streams[i];
-                AVCodec *codec = avcodec_find_encoder(st->codecpar->codec_id);
-
-                switch(st->codecpar->codec_type) {
-                case AVMEDIA_TYPE_AUDIO:
-                    audio_bit_rate += st->codecpar->bit_rate;
-                    if (codec) {
-                        if (*audio_codec_name)
-                            audio_codec_name_extra = "...";
-                        audio_codec_name = codec->name;
-                    }
-                    break;
-                case AVMEDIA_TYPE_VIDEO:
-                    video_bit_rate += st->codecpar->bit_rate;
-                    if (codec) {
-                        if (*video_codec_name)
-                            video_codec_name_extra = "...";
-                        video_codec_name = codec->name;
-                    }
-                    break;
-                case AVMEDIA_TYPE_DATA:
-                    video_bit_rate += st->codecpar->bit_rate;
-                    break;
-                default:
-                    abort();
-                }
-            }
-
-            avio_printf(pb, "<td> %s <td> %d <td> %d <td> %s %s <td> "
-                            "%d <td> %s %s",
-                        stream->fmt->name, stream->bandwidth,
-                        video_bit_rate / 1000, video_codec_name,
-                        video_codec_name_extra, audio_bit_rate / 1000,
-                        audio_codec_name, audio_codec_name_extra);
-
-            if (stream->feed)
-                avio_printf(pb, "<td>%s", stream->feed->filename);
-            else
-                avio_printf(pb, "<td>%s", stream->feed_filename);
-            avio_printf(pb, "\n");
-        }
-            break;
-        default:
-            avio_printf(pb, "<td> - <td> - "
-                            "<td> - <td><td> - <td>\n");
-            break;
-        }
-        stream = stream->next;
-    }
-    avio_printf(pb, "</table>\n");
-
-    stream = config.first_stream;
-    while (stream) {
-
-        if (stream->feed != stream) {
-            stream = stream->next;
-            continue;
-        }
-
-        avio_printf(pb, "<h2>Feed %s</h2>", stream->filename);
-        if (stream->pid) {
-            avio_printf(pb, "Running as pid %"PRId64".\n", (int64_t) stream->pid);
-
-#if defined(linux)
-            {
-                FILE *pid_stat;
-                char ps_cmd[64];
-
-                /* This is somewhat linux specific I guess */
-                snprintf(ps_cmd, sizeof(ps_cmd),
-                         "ps -o \"%%cpu,cputime\" --no-headers %"PRId64"",
-                         (int64_t) stream->pid);
-
-                 pid_stat = popen(ps_cmd, "r");
-                 if (pid_stat) {
-                     char cpuperc[10];
-                     char cpuused[64];
-
-                     if (fscanf(pid_stat, "%9s %63s", cpuperc, cpuused) == 2) {
-                         avio_printf(pb, "Currently using %s%% of the cpu. "
-                                         "Total time used %s.\n",
-                                     cpuperc, cpuused);
-                     }
-                     fclose(pid_stat);
-                 }
-            }
-#endif
-
-            avio_printf(pb, "<p>");
-        }
-
-        print_stream_params(pb, stream);
-        stream = stream->next;
-    }
-
-    /* connection status */
-    avio_printf(pb, "<h2>Connection Status</h2>\n");
-
-    avio_printf(pb, "Number of connections: %d / %d<br>\n",
-                nb_connections, config.nb_max_connections);
-
-    avio_printf(pb, "Bandwidth in use: %"PRIu64"k / %"PRIu64"k<br>\n",
-                current_bandwidth, config.max_bandwidth);
-
-    avio_printf(pb, "<table>\n");
-    avio_printf(pb, "<tr><th>#<th>File<th>IP<th>URL<th>Proto<th>State<th>Target "
-                    "bit/s<th>Actual bit/s<th>Bytes transferred\n");
-    c1 = first_http_ctx;
-    i = 0;
-    while (c1) {
-        int bitrate;
-        int j;
-
-        bitrate = 0;
-        if (c1->stream) {
-            for (j = 0; j < c1->stream->nb_streams; j++) {
-                if (!c1->stream->feed)
-                    bitrate += c1->stream->streams[j]->codecpar->bit_rate;
-                else if (c1->feed_streams[j] >= 0)
-                    bitrate += c1->stream->feed->streams[c1->feed_streams[j]]->codecpar->bit_rate;
-            }
-        }
-
-        i++;
-        p = inet_ntoa(c1->from_addr.sin_addr);
-        clean_html(c1->clean_url, sizeof(c1->clean_url), c1->url);
-        avio_printf(pb, "<tr><td><b>%d</b><td>%s%s<td>%s<td>%s<td>%s<td>%s"
-                        "<td>",
-                    i, c1->stream ? c1->stream->filename : "",
-                    c1->state == HTTPSTATE_RECEIVE_DATA ? "(input)" : "",
-                    p,
-                    c1->clean_url,
-                    c1->protocol, http_state[c1->state]);
-        fmt_bytecount(pb, bitrate);
-        avio_printf(pb, "<td>");
-        fmt_bytecount(pb, compute_datarate(&c1->datarate, c1->data_count) * 8);
-        avio_printf(pb, "<td>");
-        fmt_bytecount(pb, c1->data_count);
-        avio_printf(pb, "\n");
-        c1 = c1->next;
-    }
-    avio_printf(pb, "</table>\n");
-
-    if (!config.bitexact) {
-        /* date */
-        ti = time(NULL);
-        p = ctime(&ti);
-        avio_printf(pb, "<hr>Generated at %s", p);
-    }
-    avio_printf(pb, "</body>\n</html>\n");
-
-    len = avio_close_dyn_buf(pb, &c->pb_buffer);
-    c->buffer_ptr = c->pb_buffer;
-    c->buffer_end = c->pb_buffer + len;
-}
-
-static int open_input_stream(HTTPContext *c, const char *info)
-{
-    char buf[128];
-    char input_filename[1024];
-    AVFormatContext *s = NULL;
-    int buf_size, i, ret;
-    int64_t stream_pos;
-
-    /* find file name */
-    if (c->stream->feed) {
-        strcpy(input_filename, c->stream->feed->feed_filename);
-        buf_size = FFM_PACKET_SIZE;
-        /* compute position (absolute time) */
-        if (av_find_info_tag(buf, sizeof(buf), "date", info)) {
-            if ((ret = av_parse_time(&stream_pos, buf, 0)) < 0) {
-                http_log("Invalid date specification '%s' for stream\n", buf);
-                return ret;
-            }
-        } else if (av_find_info_tag(buf, sizeof(buf), "buffer", info)) {
-            int prebuffer = strtol(buf, 0, 10);
-            stream_pos = av_gettime() - prebuffer * (int64_t)1000000;
-        } else
-            stream_pos = av_gettime() - c->stream->prebuffer * (int64_t)1000;
-    } else {
-        strcpy(input_filename, c->stream->feed_filename);
-        buf_size = 0;
-        /* compute position (relative time) */
-        if (av_find_info_tag(buf, sizeof(buf), "date", info)) {
-            if ((ret = av_parse_time(&stream_pos, buf, 1)) < 0) {
-                http_log("Invalid date specification '%s' for stream\n", buf);
-                return ret;
-            }
-        } else
-            stream_pos = 0;
-    }
-    if (!input_filename[0]) {
-        http_log("No filename was specified for stream\n");
-        return AVERROR(EINVAL);
-    }
-
-    /* open stream */
-    ret = avformat_open_input(&s, input_filename, c->stream->ifmt,
-                              &c->stream->in_opts);
-    if (ret < 0) {
-        http_log("Could not open input '%s': %s\n",
-                 input_filename, av_err2str(ret));
-        return ret;
-    }
-
-    /* set buffer size */
-    if (buf_size > 0) {
-        ret = ffio_set_buf_size(s->pb, buf_size);
-        if (ret < 0) {
-            http_log("Failed to set buffer size\n");
-            return ret;
-        }
-    }
-
-    s->flags |= AVFMT_FLAG_GENPTS;
-    c->fmt_in = s;
-    if (strcmp(s->iformat->name, "ffm") &&
-        (ret = avformat_find_stream_info(c->fmt_in, NULL)) < 0) {
-        http_log("Could not find stream info for input '%s'\n", input_filename);
-        avformat_close_input(&s);
-        return ret;
-    }
-
-    /* choose stream as clock source (we favor the video stream if
-     * present) for packet sending */
-    c->pts_stream_index = 0;
-    for(i=0;i<c->stream->nb_streams;i++) {
-        if (c->pts_stream_index == 0 &&
-            c->stream->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
-            c->pts_stream_index = i;
-        }
-    }
-
-    if (c->fmt_in->iformat->read_seek)
-        av_seek_frame(c->fmt_in, -1, stream_pos, 0);
-    /* set the start time (needed for maxtime and RTP packet timing) */
-    c->start_time = cur_time;
-    c->first_pts = AV_NOPTS_VALUE;
-    return 0;
-}
-
-/* return the server clock (in us) */
-static int64_t get_server_clock(HTTPContext *c)
-{
-    /* compute current pts value from system time */
-    return (cur_time - c->start_time) * 1000;
-}
-
-/* return the estimated time (in us) at which the current packet must be sent */
-static int64_t get_packet_send_clock(HTTPContext *c)
-{
-    int bytes_left, bytes_sent, frame_bytes;
-
-    frame_bytes = c->cur_frame_bytes;
-    if (frame_bytes <= 0)
-        return c->cur_pts;
-
-    bytes_left = c->buffer_end - c->buffer_ptr;
-    bytes_sent = frame_bytes - bytes_left;
-    return c->cur_pts + (c->cur_frame_duration * bytes_sent) / frame_bytes;
-}
-
-
-static int http_prepare_data(HTTPContext *c)
-{
-    int i, len, ret;
-    AVFormatContext *ctx;
-
-    av_freep(&c->pb_buffer);
-    switch(c->state) {
-    case HTTPSTATE_SEND_DATA_HEADER:
-        ctx = avformat_alloc_context();
-        if (!ctx)
-            return AVERROR(ENOMEM);
-        c->pfmt_ctx = ctx;
-        av_dict_copy(&(c->pfmt_ctx->metadata), c->stream->metadata, 0);
-
-        for(i=0;i<c->stream->nb_streams;i++) {
-            LayeredAVStream *src;
-            AVStream *st = avformat_new_stream(c->pfmt_ctx, NULL);
-            if (!st)
-                return AVERROR(ENOMEM);
-
-            /* if file or feed, then just take streams from FFServerStream
-             * struct */
-            if (!c->stream->feed ||
-                c->stream->feed == c->stream)
-                src = c->stream->streams[i];
-            else
-                src = c->stream->feed->streams[c->stream->feed_streams[i]];
-
-            unlayer_stream(c->pfmt_ctx->streams[i], src); //TODO we no longer copy st->internal, does this matter?
-            av_assert0(!c->pfmt_ctx->streams[i]->priv_data);
-
-            if (src->codec->flags & AV_CODEC_FLAG_BITEXACT)
-                c->pfmt_ctx->flags |= AVFMT_FLAG_BITEXACT;
-        }
-        /* set output format parameters */
-        c->pfmt_ctx->oformat = c->stream->fmt;
-        av_assert0(c->pfmt_ctx->nb_streams == c->stream->nb_streams);
-
-        c->got_key_frame = 0;
-
-        /* prepare header and save header data in a stream */
-        if (avio_open_dyn_buf(&c->pfmt_ctx->pb) < 0) {
-            /* XXX: potential leak */
-            return -1;
-        }
-        c->pfmt_ctx->pb->seekable = 0;
-
-        /*
-         * HACK to avoid MPEG-PS muxer to spit many underflow errors
-         * Default value from FFmpeg
-         * Try to set it using configuration option
-         */
-        c->pfmt_ctx->max_delay = (int)(0.7*AV_TIME_BASE);
-
-        if ((ret = avformat_write_header(c->pfmt_ctx, NULL)) < 0) {
-            http_log("Error writing output header for stream '%s': %s\n",
-                     c->stream->filename, av_err2str(ret));
-            return ret;
-        }
-        av_dict_free(&c->pfmt_ctx->metadata);
-
-        len = avio_close_dyn_buf(c->pfmt_ctx->pb, &c->pb_buffer);
-        c->buffer_ptr = c->pb_buffer;
-        c->buffer_end = c->pb_buffer + len;
-
-        c->state = HTTPSTATE_SEND_DATA;
-        c->last_packet_sent = 0;
-        break;
-    case HTTPSTATE_SEND_DATA:
-        /* find a new packet */
-        /* read a packet from the input stream */
-        if (c->stream->feed)
-            ffm_set_write_index(c->fmt_in,
-                                c->stream->feed->feed_write_index,
-                                c->stream->feed->feed_size);
-
-        if (c->stream->max_time &&
-            c->stream->max_time + c->start_time - cur_time < 0)
-            /* We have timed out */
-            c->state = HTTPSTATE_SEND_DATA_TRAILER;
-        else {
-            AVPacket pkt;
-        redo:
-            ret = av_read_frame(c->fmt_in, &pkt);
-            if (ret < 0) {
-                if (c->stream->feed) {
-                    /* if coming from feed, it means we reached the end of the
-                     * ffm file, so must wait for more data */
-                    c->state = HTTPSTATE_WAIT_FEED;
-                    return 1; /* state changed */
-                }
-                if (ret == AVERROR(EAGAIN)) {
-                    /* input not ready, come back later */
-                    return 0;
-                }
-                if (c->stream->loop) {
-                    avformat_close_input(&c->fmt_in);
-                    if (open_input_stream(c, "") < 0)
-                        goto no_loop;
-                    goto redo;
-                } else {
-                    no_loop:
-                        /* must send trailer now because EOF or error */
-                        c->state = HTTPSTATE_SEND_DATA_TRAILER;
-                }
-            } else {
-                int source_index = pkt.stream_index;
-                /* update first pts if needed */
-                if (c->first_pts == AV_NOPTS_VALUE && pkt.dts != AV_NOPTS_VALUE) {
-                    c->first_pts = av_rescale_q(pkt.dts, c->fmt_in->streams[pkt.stream_index]->time_base, AV_TIME_BASE_Q);
-                    c->start_time = cur_time;
-                }
-                /* send it to the appropriate stream */
-                if (c->stream->feed) {
-                    /* if coming from a feed, select the right stream */
-                    if (c->switch_pending) {
-                        c->switch_pending = 0;
-                        for(i=0;i<c->stream->nb_streams;i++) {
-                            if (c->switch_feed_streams[i] == pkt.stream_index)
-                                if (pkt.flags & AV_PKT_FLAG_KEY)
-                                    c->switch_feed_streams[i] = -1;
-                            if (c->switch_feed_streams[i] >= 0)
-                                c->switch_pending = 1;
-                        }
-                    }
-                    for(i=0;i<c->stream->nb_streams;i++) {
-                        if (c->stream->feed_streams[i] == pkt.stream_index) {
-                            AVStream *st = c->fmt_in->streams[source_index];
-                            pkt.stream_index = i;
-                            if (pkt.flags & AV_PKT_FLAG_KEY &&
-                                (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO ||
-                                 c->stream->nb_streams == 1))
-                                c->got_key_frame = 1;
-                            if (!c->stream->send_on_key || c->got_key_frame)
-                                goto send_it;
-                        }
-                    }
-                } else {
-                    AVStream *ist, *ost;
-                send_it:
-                    ist = c->fmt_in->streams[source_index];
-                    /* specific handling for RTP: we use several
-                     * output streams (one for each RTP connection).
-                     * XXX: need more abstract handling */
-                    if (c->is_packetized) {
-                        /* compute send time and duration */
-                        if (pkt.dts != AV_NOPTS_VALUE) {
-                            c->cur_pts = av_rescale_q(pkt.dts, ist->time_base, AV_TIME_BASE_Q);
-                            c->cur_pts -= c->first_pts;
-                        }
-                        c->cur_frame_duration = av_rescale_q(pkt.duration, ist->time_base, AV_TIME_BASE_Q);
-                        /* find RTP context */
-                        c->packet_stream_index = pkt.stream_index;
-                        ctx = c->rtp_ctx[c->packet_stream_index];
-                        if(!ctx) {
-                            av_packet_unref(&pkt);
-                            break;
-                        }
-                        /* only one stream per RTP connection */
-                        pkt.stream_index = 0;
-                    } else {
-                        ctx = c->pfmt_ctx;
-                        /* Fudge here */
-                    }
-
-                    if (c->is_packetized) {
-                        int max_packet_size;
-                        if (c->rtp_protocol == RTSP_LOWER_TRANSPORT_TCP)
-                            max_packet_size = RTSP_TCP_MAX_PACKET_SIZE;
-                        else
-                            max_packet_size = c->rtp_handles[c->packet_stream_index]->max_packet_size;
-                        ret = ffio_open_dyn_packet_buf(&ctx->pb,
-                                                       max_packet_size);
-                    } else
-                        ret = avio_open_dyn_buf(&ctx->pb);
-
-                    if (ret < 0) {
-                        /* XXX: potential leak */
-                        return -1;
-                    }
-                    ost = ctx->streams[pkt.stream_index];
-
-                    ctx->pb->seekable = 0;
-                    if (pkt.dts != AV_NOPTS_VALUE)
-                        pkt.dts = av_rescale_q(pkt.dts, ist->time_base,
-                                               ost->time_base);
-                    if (pkt.pts != AV_NOPTS_VALUE)
-                        pkt.pts = av_rescale_q(pkt.pts, ist->time_base,
-                                               ost->time_base);
-                    pkt.duration = av_rescale_q(pkt.duration, ist->time_base,
-                                                ost->time_base);
-                    if ((ret = av_write_frame(ctx, &pkt)) < 0) {
-                        http_log("Error writing frame to output for stream '%s': %s\n",
-                                 c->stream->filename, av_err2str(ret));
-                        c->state = HTTPSTATE_SEND_DATA_TRAILER;
-                    }
-
-                    av_freep(&c->pb_buffer);
-                    len = avio_close_dyn_buf(ctx->pb, &c->pb_buffer);
-                    ctx->pb = NULL;
-                    c->cur_frame_bytes = len;
-                    c->buffer_ptr = c->pb_buffer;
-                    c->buffer_end = c->pb_buffer + len;
-
-                    if (len == 0) {
-                        av_packet_unref(&pkt);
-                        goto redo;
-                    }
-                }
-                av_packet_unref(&pkt);
-            }
-        }
-        break;
-    default:
-    case HTTPSTATE_SEND_DATA_TRAILER:
-        /* last packet test ? */
-        if (c->last_packet_sent || c->is_packetized)
-            return -1;
-        ctx = c->pfmt_ctx;
-        /* prepare header */
-        if (avio_open_dyn_buf(&ctx->pb) < 0) {
-            /* XXX: potential leak */
-            return -1;
-        }
-        c->pfmt_ctx->pb->seekable = 0;
-        av_write_trailer(ctx);
-        len = avio_close_dyn_buf(ctx->pb, &c->pb_buffer);
-        c->buffer_ptr = c->pb_buffer;
-        c->buffer_end = c->pb_buffer + len;
-
-        c->last_packet_sent = 1;
-        break;
-    }
-    return 0;
-}
-
-/* should convert the format at the same time */
-/* send data starting at c->buffer_ptr to the output connection
- * (either UDP or TCP)
- */
-static int http_send_data(HTTPContext *c)
-{
-    int len, ret;
-
-    for(;;) {
-        if (c->buffer_ptr >= c->buffer_end) {
-            ret = http_prepare_data(c);
-            if (ret < 0)
-                return -1;
-            else if (ret)
-                /* state change requested */
-                break;
-        } else {
-            if (c->is_packetized) {
-                /* RTP data output */
-                len = c->buffer_end - c->buffer_ptr;
-                if (len < 4) {
-                    /* fail safe - should never happen */
-                fail1:
-                    c->buffer_ptr = c->buffer_end;
-                    return 0;
-                }
-                len = (c->buffer_ptr[0] << 24) |
-                    (c->buffer_ptr[1] << 16) |
-                    (c->buffer_ptr[2] << 8) |
-                    (c->buffer_ptr[3]);
-                if (len > (c->buffer_end - c->buffer_ptr))
-                    goto fail1;
-                if ((get_packet_send_clock(c) - get_server_clock(c)) > 0) {
-                    /* nothing to send yet: we can wait */
-                    return 0;
-                }
-
-                c->data_count += len;
-                update_datarate(&c->datarate, c->data_count);
-                if (c->stream)
-                    c->stream->bytes_served += len;
-
-                if (c->rtp_protocol == RTSP_LOWER_TRANSPORT_TCP) {
-                    /* RTP packets are sent inside the RTSP TCP connection */
-                    AVIOContext *pb;
-                    int interleaved_index, size;
-                    uint8_t header[4];
-                    HTTPContext *rtsp_c;
-
-                    rtsp_c = c->rtsp_c;
-                    /* if no RTSP connection left, error */
-                    if (!rtsp_c)
-                        return -1;
-                    /* if already sending something, then wait. */
-                    if (rtsp_c->state != RTSPSTATE_WAIT_REQUEST)
-                        break;
-                    if (avio_open_dyn_buf(&pb) < 0)
-                        goto fail1;
-                    interleaved_index = c->packet_stream_index * 2;
-                    /* RTCP packets are sent at odd indexes */
-                    if (c->buffer_ptr[1] == 200)
-                        interleaved_index++;
-                    /* write RTSP TCP header */
-                    header[0] = '$';
-                    header[1] = interleaved_index;
-                    header[2] = len >> 8;
-                    header[3] = len;
-                    avio_write(pb, header, 4);
-                    /* write RTP packet data */
-                    c->buffer_ptr += 4;
-                    avio_write(pb, c->buffer_ptr, len);
-                    size = avio_close_dyn_buf(pb, &c->packet_buffer);
-                    /* prepare asynchronous TCP sending */
-                    rtsp_c->packet_buffer_ptr = c->packet_buffer;
-                    rtsp_c->packet_buffer_end = c->packet_buffer + size;
-                    c->buffer_ptr += len;
-
-                    /* send everything we can NOW */
-                    len = send(rtsp_c->fd, rtsp_c->packet_buffer_ptr,
-                               rtsp_c->packet_buffer_end - rtsp_c->packet_buffer_ptr, 0);
-                    if (len > 0)
-                        rtsp_c->packet_buffer_ptr += len;
-                    if (rtsp_c->packet_buffer_ptr < rtsp_c->packet_buffer_end) {
-                        /* if we could not send all the data, we will
-                         * send it later, so a new state is needed to
-                         * "lock" the RTSP TCP connection */
-                        rtsp_c->state = RTSPSTATE_SEND_PACKET;
-                        break;
-                    } else
-                        /* all data has been sent */
-                        av_freep(&c->packet_buffer);
-                } else {
-                    /* send RTP packet directly in UDP */
-                    c->buffer_ptr += 4;
-                    ffurl_write(c->rtp_handles[c->packet_stream_index],
-                                c->buffer_ptr, len);
-                    c->buffer_ptr += len;
-                    /* here we continue as we can send several packets
-                     * per 10 ms slot */
-                }
-            } else {
-                /* TCP data output */
-                len = send(c->fd, c->buffer_ptr,
-                           c->buffer_end - c->buffer_ptr, 0);
-                if (len < 0) {
-                    if (ff_neterrno() != AVERROR(EAGAIN) &&
-                        ff_neterrno() != AVERROR(EINTR))
-                        /* error : close connection */
-                        return -1;
-                    else
-                        return 0;
-                }
-                c->buffer_ptr += len;
-
-                c->data_count += len;
-                update_datarate(&c->datarate, c->data_count);
-                if (c->stream)
-                    c->stream->bytes_served += len;
-                break;
-            }
-        }
-    } /* for(;;) */
-    return 0;
-}
-
-static int http_start_receive_data(HTTPContext *c)
-{
-    int fd;
-    int ret;
-    int64_t ret64;
-
-    if (c->stream->feed_opened) {
-        http_log("Stream feed '%s' was not opened\n",
-                 c->stream->feed_filename);
-        return AVERROR(EINVAL);
-    }
-
-    /* Don't permit writing to this one */
-    if (c->stream->readonly) {
-        http_log("Cannot write to read-only file '%s'\n",
-                 c->stream->feed_filename);
-        return AVERROR(EINVAL);
-    }
-
-    /* open feed */
-    fd = open(c->stream->feed_filename, O_RDWR);
-    if (fd < 0) {
-        ret = AVERROR(errno);
-        http_log("Could not open feed file '%s': %s\n",
-                 c->stream->feed_filename, strerror(errno));
-        return ret;
-    }
-    c->feed_fd = fd;
-
-    if (c->stream->truncate) {
-        /* truncate feed file */
-        ffm_write_write_index(c->feed_fd, FFM_PACKET_SIZE);
-        http_log("Truncating feed file '%s'\n", c->stream->feed_filename);
-        if (ftruncate(c->feed_fd, FFM_PACKET_SIZE) < 0) {
-            ret = AVERROR(errno);
-            http_log("Error truncating feed file '%s': %s\n",
-                     c->stream->feed_filename, strerror(errno));
-            return ret;
-        }
-    } else {
-        ret64 = ffm_read_write_index(fd);
-        if (ret64 < 0) {
-            http_log("Error reading write index from feed file '%s': %s\n",
-                     c->stream->feed_filename, strerror(errno));
-            return ret64;
-        }
-        c->stream->feed_write_index = ret64;
-    }
-
-    c->stream->feed_write_index = FFMAX(ffm_read_write_index(fd),
-                                        FFM_PACKET_SIZE);
-    c->stream->feed_size = lseek(fd, 0, SEEK_END);
-    lseek(fd, 0, SEEK_SET);
-
-    /* init buffer input */
-    c->buffer_ptr = c->buffer;
-    c->buffer_end = c->buffer + FFM_PACKET_SIZE;
-    c->stream->feed_opened = 1;
-    c->chunked_encoding = !!av_stristr(c->buffer, "Transfer-Encoding: chunked");
-    return 0;
-}
-
-static int http_receive_data(HTTPContext *c)
-{
-    HTTPContext *c1;
-    int len, loop_run = 0;
-
-    while (c->chunked_encoding && !c->chunk_size &&
-           c->buffer_end > c->buffer_ptr) {
-        /* read chunk header, if present */
-        len = recv(c->fd, c->buffer_ptr, 1, 0);
-
-        if (len < 0) {
-            if (ff_neterrno() != AVERROR(EAGAIN) &&
-                ff_neterrno() != AVERROR(EINTR))
-                /* error : close connection */
-                goto fail;
-            return 0;
-        } else if (len == 0) {
-            /* end of connection : close it */
-            goto fail;
-        } else if (c->buffer_ptr - c->buffer >= 2 &&
-                   !memcmp(c->buffer_ptr - 1, "\r\n", 2)) {
-            c->chunk_size = strtol(c->buffer, 0, 16);
-            if (c->chunk_size <= 0) { // end of stream or invalid chunk size
-                c->chunk_size = 0;
-                goto fail;
-            }
-            c->buffer_ptr = c->buffer;
-            break;
-        } else if (++loop_run > 10)
-            /* no chunk header, abort */
-            goto fail;
-        else
-            c->buffer_ptr++;
-    }
-
-    if (c->buffer_end > c->buffer_ptr) {
-        len = recv(c->fd, c->buffer_ptr,
-                   FFMIN(c->chunk_size, c->buffer_end - c->buffer_ptr), 0);
-        if (len < 0) {
-            if (ff_neterrno() != AVERROR(EAGAIN) &&
-                ff_neterrno() != AVERROR(EINTR))
-                /* error : close connection */
-                goto fail;
-        } else if (len == 0)
-            /* end of connection : close it */
-            goto fail;
-        else {
-            av_assert0(len <= c->chunk_size);
-            c->chunk_size -= len;
-            c->buffer_ptr += len;
-            c->data_count += len;
-            update_datarate(&c->datarate, c->data_count);
-        }
-    }
-
-    if (c->buffer_ptr - c->buffer >= 2 && c->data_count > FFM_PACKET_SIZE) {
-        if (c->buffer[0] != 'f' ||
-            c->buffer[1] != 'm') {
-            http_log("Feed stream has become desynchronized -- disconnecting\n");
-            goto fail;
-        }
-    }
-
-    if (c->buffer_ptr >= c->buffer_end) {
-        FFServerStream *feed = c->stream;
-        /* a packet has been received : write it in the store, except
-         * if header */
-        if (c->data_count > FFM_PACKET_SIZE) {
-            /* XXX: use llseek or url_seek
-             * XXX: Should probably fail? */
-            if (lseek(c->feed_fd, feed->feed_write_index, SEEK_SET) == -1)
-                http_log("Seek to %"PRId64" failed\n", feed->feed_write_index);
-
-            if (write(c->feed_fd, c->buffer, FFM_PACKET_SIZE) < 0) {
-                http_log("Error writing to feed file: %s\n", strerror(errno));
-                goto fail;
-            }
-
-            feed->feed_write_index += FFM_PACKET_SIZE;
-            /* update file size */
-            if (feed->feed_write_index > c->stream->feed_size)
-                feed->feed_size = feed->feed_write_index;
-
-            /* handle wrap around if max file size reached */
-            if (c->stream->feed_max_size &&
-                feed->feed_write_index >= c->stream->feed_max_size)
-                feed->feed_write_index = FFM_PACKET_SIZE;
-
-            /* write index */
-            if (ffm_write_write_index(c->feed_fd, feed->feed_write_index) < 0) {
-                http_log("Error writing index to feed file: %s\n",
-                         strerror(errno));
-                goto fail;
-            }
-
-            /* wake up any waiting connections */
-            for(c1 = first_http_ctx; c1; c1 = c1->next) {
-                if (c1->state == HTTPSTATE_WAIT_FEED &&
-                    c1->stream->feed == c->stream->feed)
-                    c1->state = HTTPSTATE_SEND_DATA;
-            }
-        } else {
-            /* We have a header in our hands that contains useful data */
-            AVFormatContext *s = avformat_alloc_context();
-            AVIOContext *pb;
-            AVInputFormat *fmt_in;
-            int i;
-
-            if (!s)
-                goto fail;
-
-            /* use feed output format name to find corresponding input format */
-            fmt_in = av_find_input_format(feed->fmt->name);
-            if (!fmt_in)
-                goto fail;
-
-            pb = avio_alloc_context(c->buffer, c->buffer_end - c->buffer,
-                                    0, NULL, NULL, NULL, NULL);
-            if (!pb)
-                goto fail;
-
-            pb->seekable = 0;
-
-            s->pb = pb;
-            if (avformat_open_input(&s, c->stream->feed_filename, fmt_in, NULL) < 0) {
-                av_freep(&pb);
-                goto fail;
-            }
-
-            /* Now we have the actual streams */
-            if (s->nb_streams != feed->nb_streams) {
-                avformat_close_input(&s);
-                av_freep(&pb);
-                http_log("Feed '%s' stream number does not match registered feed\n",
-                         c->stream->feed_filename);
-                goto fail;
-            }
-
-            for (i = 0; i < s->nb_streams; i++) {
-                LayeredAVStream *fst = feed->streams[i];
-                AVStream *st = s->streams[i];
-                avcodec_parameters_to_context(fst->codec, st->codecpar);
-                avcodec_parameters_from_context(fst->codecpar, fst->codec);
-            }
-
-            avformat_close_input(&s);
-            av_freep(&pb);
-        }
-        c->buffer_ptr = c->buffer;
-    }
-
-    return 0;
- fail:
-    c->stream->feed_opened = 0;
-    close(c->feed_fd);
-    /* wake up any waiting connections to stop waiting for feed */
-    for(c1 = first_http_ctx; c1; c1 = c1->next) {
-        if (c1->state == HTTPSTATE_WAIT_FEED &&
-            c1->stream->feed == c->stream->feed)
-            c1->state = HTTPSTATE_SEND_DATA_TRAILER;
-    }
-    return -1;
-}
-
-/********************************************************************/
-/* RTSP handling */
-
-static void rtsp_reply_header(HTTPContext *c, enum RTSPStatusCode error_number)
-{
-    const char *str;
-    time_t ti;
-    struct tm *tm;
-    char buf2[32];
-
-    str = RTSP_STATUS_CODE2STRING(error_number);
-    if (!str)
-        str = "Unknown Error";
-
-    avio_printf(c->pb, "RTSP/1.0 %d %s\r\n", error_number, str);
-    avio_printf(c->pb, "CSeq: %d\r\n", c->seq);
-
-    /* output GMT time */
-    ti = time(NULL);
-    tm = gmtime(&ti);
-    strftime(buf2, sizeof(buf2), "%a, %d %b %Y %H:%M:%S", tm);
-    avio_printf(c->pb, "Date: %s GMT\r\n", buf2);
-}
-
-static void rtsp_reply_error(HTTPContext *c, enum RTSPStatusCode error_number)
-{
-    rtsp_reply_header(c, error_number);
-    avio_printf(c->pb, "\r\n");
-}
-
-static int rtsp_parse_request(HTTPContext *c)
-{
-    const char *p, *p1, *p2;
-    char cmd[32];
-    char url[1024];
-    char protocol[32];
-    char line[1024];
-    int len;
-    RTSPMessageHeader header1 = { 0 }, *header = &header1;
-
-    c->buffer_ptr[0] = '\0';
-    p = c->buffer;
-
-    get_word(cmd, sizeof(cmd), &p);
-    get_word(url, sizeof(url), &p);
-    get_word(protocol, sizeof(protocol), &p);
-
-    av_strlcpy(c->method, cmd, sizeof(c->method));
-    av_strlcpy(c->url, url, sizeof(c->url));
-    av_strlcpy(c->protocol, protocol, sizeof(c->protocol));
-
-    if (avio_open_dyn_buf(&c->pb) < 0) {
-        /* XXX: cannot do more */
-        c->pb = NULL; /* safety */
-        return -1;
-    }
-
-    /* check version name */
-    if (strcmp(protocol, "RTSP/1.0")) {
-        rtsp_reply_error(c, RTSP_STATUS_VERSION);
-        goto the_end;
-    }
-
-    /* parse each header line */
-    /* skip to next line */
-    while (*p != '\n' && *p != '\0')
-        p++;
-    if (*p == '\n')
-        p++;
-    while (*p != '\0') {
-        p1 = memchr(p, '\n', (char *)c->buffer_ptr - p);
-        if (!p1)
-            break;
-        p2 = p1;
-        if (p2 > p && p2[-1] == '\r')
-            p2--;
-        /* skip empty line */
-        if (p2 == p)
-            break;
-        len = p2 - p;
-        if (len > sizeof(line) - 1)
-            len = sizeof(line) - 1;
-        memcpy(line, p, len);
-        line[len] = '\0';
-        ff_rtsp_parse_line(NULL, header, line, NULL, NULL);
-        p = p1 + 1;
-    }
-
-    /* handle sequence number */
-    c->seq = header->seq;
-
-    if (!strcmp(cmd, "DESCRIBE"))
-        rtsp_cmd_describe(c, url);
-    else if (!strcmp(cmd, "OPTIONS"))
-        rtsp_cmd_options(c, url);
-    else if (!strcmp(cmd, "SETUP"))
-        rtsp_cmd_setup(c, url, header);
-    else if (!strcmp(cmd, "PLAY"))
-        rtsp_cmd_play(c, url, header);
-    else if (!strcmp(cmd, "PAUSE"))
-        rtsp_cmd_interrupt(c, url, header, 1);
-    else if (!strcmp(cmd, "TEARDOWN"))
-        rtsp_cmd_interrupt(c, url, header, 0);
-    else
-        rtsp_reply_error(c, RTSP_STATUS_METHOD);
-
- the_end:
-    len = avio_close_dyn_buf(c->pb, &c->pb_buffer);
-    c->pb = NULL; /* safety */
-    if (len < 0)
-        /* XXX: cannot do more */
-        return -1;
-
-    c->buffer_ptr = c->pb_buffer;
-    c->buffer_end = c->pb_buffer + len;
-    c->state = RTSPSTATE_SEND_REPLY;
-    return 0;
-}
-
-static int prepare_sdp_description(FFServerStream *stream, uint8_t **pbuffer,
-                                   struct in_addr my_ip)
-{
-    AVFormatContext *avc;
-    AVOutputFormat *rtp_format = av_guess_format("rtp", NULL, NULL);
-    AVDictionaryEntry *entry = av_dict_get(stream->metadata, "title", NULL, 0);
-    int i;
-
-    *pbuffer = NULL;
-
-    avc =  avformat_alloc_context();
-    if (!avc || !rtp_format)
-        return -1;
-
-    avc->oformat = rtp_format;
-    av_dict_set(&avc->metadata, "title",
-                entry ? entry->value : "No Title", 0);
-    if (stream->is_multicast) {
-        snprintf(avc->filename, 1024, "rtp://%s:%d?multicast=1?ttl=%d",
-                 inet_ntoa(stream->multicast_ip),
-                 stream->multicast_port, stream->multicast_ttl);
-    } else
-        snprintf(avc->filename, 1024, "rtp://0.0.0.0");
-
-    for(i = 0; i < stream->nb_streams; i++) {
-        AVStream *st = avformat_new_stream(avc, NULL);
-        if (!st)
-            goto sdp_done;
-        avcodec_parameters_from_context(stream->streams[i]->codecpar, stream->streams[i]->codec);
-        unlayer_stream(st, stream->streams[i]);
-    }
-#define PBUFFER_SIZE 2048
-    *pbuffer = av_mallocz(PBUFFER_SIZE);
-    if (!*pbuffer)
-        goto sdp_done;
-    av_sdp_create(&avc, 1, *pbuffer, PBUFFER_SIZE);
-
- sdp_done:
-    av_freep(&avc->streams);
-    av_dict_free(&avc->metadata);
-    av_free(avc);
-
-    return *pbuffer ? strlen(*pbuffer) : AVERROR(ENOMEM);
-}
-
-static void rtsp_cmd_options(HTTPContext *c, const char *url)
-{
-    /* rtsp_reply_header(c, RTSP_STATUS_OK); */
-    avio_printf(c->pb, "RTSP/1.0 %d %s\r\n", RTSP_STATUS_OK, "OK");
-    avio_printf(c->pb, "CSeq: %d\r\n", c->seq);
-    avio_printf(c->pb, "Public: %s\r\n",
-                "OPTIONS, DESCRIBE, SETUP, TEARDOWN, PLAY, PAUSE");
-    avio_printf(c->pb, "\r\n");
-}
-
-static void rtsp_cmd_describe(HTTPContext *c, const char *url)
-{
-    FFServerStream *stream;
-    char path1[1024];
-    const char *path;
-    uint8_t *content;
-    int content_length;
-    socklen_t len;
-    struct sockaddr_in my_addr;
-
-    /* find which URL is asked */
-    av_url_split(NULL, 0, NULL, 0, NULL, 0, NULL, path1, sizeof(path1), url);
-    path = path1;
-    if (*path == '/')
-        path++;
-
-    for(stream = config.first_stream; stream; stream = stream->next) {
-        if (!stream->is_feed &&
-            stream->fmt && !strcmp(stream->fmt->name, "rtp") &&
-            !strcmp(path, stream->filename)) {
-            goto found;
-        }
-    }
-    /* no stream found */
-    rtsp_reply_error(c, RTSP_STATUS_NOT_FOUND);
-    return;
-
- found:
-    /* prepare the media description in SDP format */
-
-    /* get the host IP */
-    len = sizeof(my_addr);
-    getsockname(c->fd, (struct sockaddr *)&my_addr, &len);
-    content_length = prepare_sdp_description(stream, &content,
-                                             my_addr.sin_addr);
-    if (content_length < 0) {
-        rtsp_reply_error(c, RTSP_STATUS_INTERNAL);
-        return;
-    }
-    rtsp_reply_header(c, RTSP_STATUS_OK);
-    avio_printf(c->pb, "Content-Base: %s/\r\n", url);
-    avio_printf(c->pb, "Content-Type: application/sdp\r\n");
-    avio_printf(c->pb, "Content-Length: %d\r\n", content_length);
-    avio_printf(c->pb, "\r\n");
-    avio_write(c->pb, content, content_length);
-    av_free(content);
-}
-
-static HTTPContext *find_rtp_session(const char *session_id)
-{
-    HTTPContext *c;
-
-    if (session_id[0] == '\0')
-        return NULL;
-
-    for(c = first_http_ctx; c; c = c->next) {
-        if (!strcmp(c->session_id, session_id))
-            return c;
-    }
-    return NULL;
-}
-
-static RTSPTransportField *find_transport(RTSPMessageHeader *h, enum RTSPLowerTransport lower_transport)
-{
-    RTSPTransportField *th;
-    int i;
-
-    for(i=0;i<h->nb_transports;i++) {
-        th = &h->transports[i];
-        if (th->lower_transport == lower_transport)
-            return th;
-    }
-    return NULL;
-}
-
-static void rtsp_cmd_setup(HTTPContext *c, const char *url,
-                           RTSPMessageHeader *h)
-{
-    FFServerStream *stream;
-    int stream_index, rtp_port, rtcp_port;
-    char buf[1024];
-    char path1[1024];
-    const char *path;
-    HTTPContext *rtp_c;
-    RTSPTransportField *th;
-    struct sockaddr_in dest_addr;
-    RTSPActionServerSetup setup;
-
-    /* find which URL is asked */
-    av_url_split(NULL, 0, NULL, 0, NULL, 0, NULL, path1, sizeof(path1), url);
-    path = path1;
-    if (*path == '/')
-        path++;
-
-    /* now check each stream */
-    for(stream = config.first_stream; stream; stream = stream->next) {
-        if (stream->is_feed || !stream->fmt ||
-            strcmp(stream->fmt->name, "rtp")) {
-            continue;
-        }
-        /* accept aggregate filenames only if single stream */
-        if (!strcmp(path, stream->filename)) {
-            if (stream->nb_streams != 1) {
-                rtsp_reply_error(c, RTSP_STATUS_AGGREGATE);
-                return;
-            }
-            stream_index = 0;
-            goto found;
-        }
-
-        for(stream_index = 0; stream_index < stream->nb_streams;
-            stream_index++) {
-            snprintf(buf, sizeof(buf), "%s/streamid=%d",
-                     stream->filename, stream_index);
-            if (!strcmp(path, buf))
-                goto found;
-        }
-    }
-    /* no stream found */
-    rtsp_reply_error(c, RTSP_STATUS_SERVICE); /* XXX: right error ? */
-    return;
- found:
-
-    /* generate session id if needed */
-    if (h->session_id[0] == '\0') {
-        unsigned random0 = av_lfg_get(&random_state);
-        unsigned random1 = av_lfg_get(&random_state);
-        snprintf(h->session_id, sizeof(h->session_id), "%08x%08x",
-                 random0, random1);
-    }
-
-    /* find RTP session, and create it if none found */
-    rtp_c = find_rtp_session(h->session_id);
-    if (!rtp_c) {
-        /* always prefer UDP */
-        th = find_transport(h, RTSP_LOWER_TRANSPORT_UDP);
-        if (!th) {
-            th = find_transport(h, RTSP_LOWER_TRANSPORT_TCP);
-            if (!th) {
-                rtsp_reply_error(c, RTSP_STATUS_TRANSPORT);
-                return;
-            }
-        }
-
-        rtp_c = rtp_new_connection(&c->from_addr, stream, h->session_id,
-                                   th->lower_transport);
-        if (!rtp_c) {
-            rtsp_reply_error(c, RTSP_STATUS_BANDWIDTH);
-            return;
-        }
-
-        /* open input stream */
-        if (open_input_stream(rtp_c, "") < 0) {
-            rtsp_reply_error(c, RTSP_STATUS_INTERNAL);
-            return;
-        }
-    }
-
-    /* test if stream is OK (test needed because several SETUP needs
-     * to be done for a given file) */
-    if (rtp_c->stream != stream) {
-        rtsp_reply_error(c, RTSP_STATUS_SERVICE);
-        return;
-    }
-
-    /* test if stream is already set up */
-    if (rtp_c->rtp_ctx[stream_index]) {
-        rtsp_reply_error(c, RTSP_STATUS_STATE);
-        return;
-    }
-
-    /* check transport */
-    th = find_transport(h, rtp_c->rtp_protocol);
-    if (!th || (th->lower_transport == RTSP_LOWER_TRANSPORT_UDP &&
-                th->client_port_min <= 0)) {
-        rtsp_reply_error(c, RTSP_STATUS_TRANSPORT);
-        return;
-    }
-
-    /* setup default options */
-    setup.transport_option[0] = '\0';
-    dest_addr = rtp_c->from_addr;
-    dest_addr.sin_port = htons(th->client_port_min);
-
-    /* setup stream */
-    if (rtp_new_av_stream(rtp_c, stream_index, &dest_addr, c) < 0) {
-        rtsp_reply_error(c, RTSP_STATUS_TRANSPORT);
-        return;
-    }
-
-    /* now everything is OK, so we can send the connection parameters */
-    rtsp_reply_header(c, RTSP_STATUS_OK);
-    /* session ID */
-    avio_printf(c->pb, "Session: %s\r\n", rtp_c->session_id);
-
-    switch(rtp_c->rtp_protocol) {
-    case RTSP_LOWER_TRANSPORT_UDP:
-        rtp_port = ff_rtp_get_local_rtp_port(rtp_c->rtp_handles[stream_index]);
-        rtcp_port = ff_rtp_get_local_rtcp_port(rtp_c->rtp_handles[stream_index]);
-        avio_printf(c->pb, "Transport: RTP/AVP/UDP;unicast;"
-                    "client_port=%d-%d;server_port=%d-%d",
-                    th->client_port_min, th->client_port_max,
-                    rtp_port, rtcp_port);
-        break;
-    case RTSP_LOWER_TRANSPORT_TCP:
-        avio_printf(c->pb, "Transport: RTP/AVP/TCP;interleaved=%d-%d",
-                    stream_index * 2, stream_index * 2 + 1);
-        break;
-    default:
-        break;
-    }
-    if (setup.transport_option[0] != '\0')
-        avio_printf(c->pb, ";%s", setup.transport_option);
-    avio_printf(c->pb, "\r\n");
-
-
-    avio_printf(c->pb, "\r\n");
-}
-
-
-/**
- * find an RTP connection by using the session ID. Check consistency
- * with filename
- */
-static HTTPContext *find_rtp_session_with_url(const char *url,
-                                              const char *session_id)
-{
-    HTTPContext *rtp_c;
-    char path1[1024];
-    const char *path;
-    char buf[1024];
-    int s, len;
-
-    rtp_c = find_rtp_session(session_id);
-    if (!rtp_c)
-        return NULL;
-
-    /* find which URL is asked */
-    av_url_split(NULL, 0, NULL, 0, NULL, 0, NULL, path1, sizeof(path1), url);
-    path = path1;
-    if (*path == '/')
-        path++;
-    if(!strcmp(path, rtp_c->stream->filename)) return rtp_c;
-    for(s=0; s<rtp_c->stream->nb_streams; ++s) {
-      snprintf(buf, sizeof(buf), "%s/streamid=%d",
-        rtp_c->stream->filename, s);
-      if(!strncmp(path, buf, sizeof(buf)))
-        /* XXX: Should we reply with RTSP_STATUS_ONLY_AGGREGATE
-         * if nb_streams>1? */
-        return rtp_c;
-    }
-    len = strlen(path);
-    if (len > 0 && path[len - 1] == '/' &&
-        !strncmp(path, rtp_c->stream->filename, len - 1))
-        return rtp_c;
-    return NULL;
-}
-
-static void rtsp_cmd_play(HTTPContext *c, const char *url, RTSPMessageHeader *h)
-{
-    HTTPContext *rtp_c;
-
-    rtp_c = find_rtp_session_with_url(url, h->session_id);
-    if (!rtp_c) {
-        rtsp_reply_error(c, RTSP_STATUS_SESSION);
-        return;
-    }
-
-    if (rtp_c->state != HTTPSTATE_SEND_DATA &&
-        rtp_c->state != HTTPSTATE_WAIT_FEED &&
-        rtp_c->state != HTTPSTATE_READY) {
-        rtsp_reply_error(c, RTSP_STATUS_STATE);
-        return;
-    }
-
-    rtp_c->state = HTTPSTATE_SEND_DATA;
-
-    /* now everything is OK, so we can send the connection parameters */
-    rtsp_reply_header(c, RTSP_STATUS_OK);
-    /* session ID */
-    avio_printf(c->pb, "Session: %s\r\n", rtp_c->session_id);
-    avio_printf(c->pb, "\r\n");
-}
-
-static void rtsp_cmd_interrupt(HTTPContext *c, const char *url,
-                               RTSPMessageHeader *h, int pause_only)
-{
-    HTTPContext *rtp_c;
-
-    rtp_c = find_rtp_session_with_url(url, h->session_id);
-    if (!rtp_c) {
-        rtsp_reply_error(c, RTSP_STATUS_SESSION);
-        return;
-    }
-
-    if (pause_only) {
-        if (rtp_c->state != HTTPSTATE_SEND_DATA &&
-            rtp_c->state != HTTPSTATE_WAIT_FEED) {
-            rtsp_reply_error(c, RTSP_STATUS_STATE);
-            return;
-        }
-        rtp_c->state = HTTPSTATE_READY;
-        rtp_c->first_pts = AV_NOPTS_VALUE;
-    }
-
-    /* now everything is OK, so we can send the connection parameters */
-    rtsp_reply_header(c, RTSP_STATUS_OK);
-    /* session ID */
-    avio_printf(c->pb, "Session: %s\r\n", rtp_c->session_id);
-    avio_printf(c->pb, "\r\n");
-
-    if (!pause_only)
-        close_connection(rtp_c);
-}
-
-/********************************************************************/
-/* RTP handling */
-
-static HTTPContext *rtp_new_connection(struct sockaddr_in *from_addr,
-                                       FFServerStream *stream,
-                                       const char *session_id,
-                                       enum RTSPLowerTransport rtp_protocol)
-{
-    HTTPContext *c = NULL;
-    const char *proto_str;
-
-    /* XXX: should output a warning page when coming
-     * close to the connection limit */
-    if (nb_connections >= config.nb_max_connections)
-        goto fail;
-
-    /* add a new connection */
-    c = av_mallocz(sizeof(HTTPContext));
-    if (!c)
-        goto fail;
-
-    c->fd = -1;
-    c->poll_entry = NULL;
-    c->from_addr = *from_addr;
-    c->buffer_size = IOBUFFER_INIT_SIZE;
-    c->buffer = av_malloc(c->buffer_size);
-    if (!c->buffer)
-        goto fail;
-    nb_connections++;
-    c->stream = stream;
-    av_strlcpy(c->session_id, session_id, sizeof(c->session_id));
-    c->state = HTTPSTATE_READY;
-    c->is_packetized = 1;
-    c->rtp_protocol = rtp_protocol;
-
-    /* protocol is shown in statistics */
-    switch(c->rtp_protocol) {
-    case RTSP_LOWER_TRANSPORT_UDP_MULTICAST:
-        proto_str = "MCAST";
-        break;
-    case RTSP_LOWER_TRANSPORT_UDP:
-        proto_str = "UDP";
-        break;
-    case RTSP_LOWER_TRANSPORT_TCP:
-        proto_str = "TCP";
-        break;
-    default:
-        proto_str = "???";
-        break;
-    }
-    av_strlcpy(c->protocol, "RTP/", sizeof(c->protocol));
-    av_strlcat(c->protocol, proto_str, sizeof(c->protocol));
-
-    current_bandwidth += stream->bandwidth;
-
-    c->next = first_http_ctx;
-    first_http_ctx = c;
-    return c;
-
- fail:
-    if (c) {
-        av_freep(&c->buffer);
-        av_free(c);
-    }
-    return NULL;
-}
-
-/**
- * add a new RTP stream in an RTP connection (used in RTSP SETUP
- * command). If RTP/TCP protocol is used, TCP connection 'rtsp_c' is
- * used.
- */
-static int rtp_new_av_stream(HTTPContext *c,
-                             int stream_index, struct sockaddr_in *dest_addr,
-                             HTTPContext *rtsp_c)
-{
-    AVFormatContext *ctx;
-    AVStream *st;
-    char *ipaddr;
-    URLContext *h = NULL;
-    uint8_t *dummy_buf;
-    int max_packet_size;
-    void *st_internal;
-
-    /* now we can open the relevant output stream */
-    ctx = avformat_alloc_context();
-    if (!ctx)
-        return -1;
-    ctx->oformat = av_guess_format("rtp", NULL, NULL);
-
-    st = avformat_new_stream(ctx, NULL);
-    if (!st)
-        goto fail;
-
-    st_internal = st->internal;
-
-    if (!c->stream->feed ||
-        c->stream->feed == c->stream)
-        unlayer_stream(st, c->stream->streams[stream_index]);
-    else
-        unlayer_stream(st,
-               c->stream->feed->streams[c->stream->feed_streams[stream_index]]);
-    av_assert0(st->priv_data == NULL);
-    av_assert0(st->internal == st_internal);
-
-    /* build destination RTP address */
-    ipaddr = inet_ntoa(dest_addr->sin_addr);
-
-    switch(c->rtp_protocol) {
-    case RTSP_LOWER_TRANSPORT_UDP:
-    case RTSP_LOWER_TRANSPORT_UDP_MULTICAST:
-        /* RTP/UDP case */
-
-        /* XXX: also pass as parameter to function ? */
-        if (c->stream->is_multicast) {
-            int ttl;
-            ttl = c->stream->multicast_ttl;
-            if (!ttl)
-                ttl = 16;
-            snprintf(ctx->filename, sizeof(ctx->filename),
-                     "rtp://%s:%d?multicast=1&ttl=%d",
-                     ipaddr, ntohs(dest_addr->sin_port), ttl);
-        } else {
-            snprintf(ctx->filename, sizeof(ctx->filename),
-                     "rtp://%s:%d", ipaddr, ntohs(dest_addr->sin_port));
-        }
-
-        if (ffurl_open(&h, ctx->filename, AVIO_FLAG_WRITE, NULL, NULL) < 0)
-            goto fail;
-        c->rtp_handles[stream_index] = h;
-        max_packet_size = h->max_packet_size;
-        break;
-    case RTSP_LOWER_TRANSPORT_TCP:
-        /* RTP/TCP case */
-        c->rtsp_c = rtsp_c;
-        max_packet_size = RTSP_TCP_MAX_PACKET_SIZE;
-        break;
-    default:
-        goto fail;
-    }
-
-    http_log("%s:%d - - \"PLAY %s/streamid=%d %s\"\n",
-             ipaddr, ntohs(dest_addr->sin_port),
-             c->stream->filename, stream_index, c->protocol);
-
-    /* normally, no packets should be output here, but the packet size may
-     * be checked */
-    if (ffio_open_dyn_packet_buf(&ctx->pb, max_packet_size) < 0)
-        /* XXX: close stream */
-        goto fail;
-
-    if (avformat_write_header(ctx, NULL) < 0) {
-    fail:
-        if (h)
-            ffurl_close(h);
-        av_free(st);
-        av_free(ctx);
-        return -1;
-    }
-    avio_close_dyn_buf(ctx->pb, &dummy_buf);
-    ctx->pb = NULL;
-    av_free(dummy_buf);
-
-    c->rtp_ctx[stream_index] = ctx;
-    return 0;
-}
-
-/********************************************************************/
-/* ffserver initialization */
-
-/* FIXME: This code should use avformat_new_stream() */
-static LayeredAVStream *add_av_stream1(FFServerStream *stream,
-                                AVCodecContext *codec, int copy)
-{
-    LayeredAVStream *fst;
-
-    if(stream->nb_streams >= FF_ARRAY_ELEMS(stream->streams))
-        return NULL;
-
-    fst = av_mallocz(sizeof(*fst));
-    if (!fst)
-        return NULL;
-    if (copy) {
-        fst->codec = avcodec_alloc_context3(codec->codec);
-        if (!fst->codec) {
-            av_free(fst);
-            return NULL;
-        }
-        avcodec_copy_context(fst->codec, codec);
-    } else
-        /* live streams must use the actual feed's codec since it may be
-         * updated later to carry extradata needed by them.
-         */
-        fst->codec = codec;
-
-    //NOTE we previously allocated internal & internal->avctx, these seemed uneeded though
-    fst->codecpar = avcodec_parameters_alloc();
-    fst->index = stream->nb_streams;
-    fst->time_base = codec->time_base;
-    fst->pts_wrap_bits = 33;
-    fst->sample_aspect_ratio = codec->sample_aspect_ratio;
-    stream->streams[stream->nb_streams++] = fst;
-    return fst;
-}
-
-/* return the stream number in the feed */
-static int add_av_stream(FFServerStream *feed, LayeredAVStream *st)
-{
-    LayeredAVStream *fst;
-    AVCodecContext *av, *av1;
-    int i;
-
-    av = st->codec;
-    for(i=0;i<feed->nb_streams;i++) {
-        av1 = feed->streams[i]->codec;
-        if (av1->codec_id == av->codec_id &&
-            av1->codec_type == av->codec_type &&
-            av1->bit_rate == av->bit_rate) {
-
-            switch(av->codec_type) {
-            case AVMEDIA_TYPE_AUDIO:
-                if (av1->channels == av->channels &&
-                    av1->sample_rate == av->sample_rate)
-                    return i;
-                break;
-            case AVMEDIA_TYPE_VIDEO:
-                if (av1->width == av->width &&
-                    av1->height == av->height &&
-                    av1->time_base.den == av->time_base.den &&
-                    av1->time_base.num == av->time_base.num &&
-                    av1->gop_size == av->gop_size)
-                    return i;
-                break;
-            default:
-                abort();
-            }
-        }
-    }
-
-    fst = add_av_stream1(feed, av, 0);
-    if (!fst)
-        return -1;
-    if (st->recommended_encoder_configuration)
-        fst->recommended_encoder_configuration =
-            av_strdup(st->recommended_encoder_configuration);
-    return feed->nb_streams - 1;
-}
-
-static void remove_stream(FFServerStream *stream)
-{
-    FFServerStream **ps;
-    ps = &config.first_stream;
-    while (*ps) {
-        if (*ps == stream)
-            *ps = (*ps)->next;
-        else
-            ps = &(*ps)->next;
-    }
-}
-
-/* compute the needed AVStream for each file */
-static void build_file_streams(void)
-{
-    FFServerStream *stream;
-    AVFormatContext *infile;
-    int i, ret;
-
-    /* gather all streams */
-    for(stream = config.first_stream; stream; stream = stream->next) {
-        infile = NULL;
-
-        if (stream->stream_type != STREAM_TYPE_LIVE || stream->feed)
-            continue;
-
-        /* the stream comes from a file */
-        /* try to open the file */
-        /* open stream */
-
-
-        /* specific case: if transport stream output to RTP,
-         * we use a raw transport stream reader */
-        if (stream->fmt && !strcmp(stream->fmt->name, "rtp"))
-            av_dict_set(&stream->in_opts, "mpeg2ts_compute_pcr", "1", 0);
-
-        if (!stream->feed_filename[0]) {
-            http_log("Unspecified feed file for stream '%s'\n",
-                     stream->filename);
-            goto fail;
-        }
-
-        http_log("Opening feed file '%s' for stream '%s'\n",
-                 stream->feed_filename, stream->filename);
-
-        ret = avformat_open_input(&infile, stream->feed_filename,
-                                  stream->ifmt, &stream->in_opts);
-        if (ret < 0) {
-            http_log("Could not open '%s': %s\n", stream->feed_filename,
-                     av_err2str(ret));
-            /* remove stream (no need to spend more time on it) */
-        fail:
-            remove_stream(stream);
-        } else {
-            /* find all the AVStreams inside and reference them in
-             * 'stream' */
-            if (avformat_find_stream_info(infile, NULL) < 0) {
-                http_log("Could not find codec parameters from '%s'\n",
-                         stream->feed_filename);
-                avformat_close_input(&infile);
-                goto fail;
-            }
-
-            for(i=0;i<infile->nb_streams;i++)
-                add_av_stream1(stream, infile->streams[i]->codec, 1);
-
-            avformat_close_input(&infile);
-        }
-    }
-}
-
-static inline
-int check_codec_match(LayeredAVStream *ccf, AVStream *ccs, int stream)
-{
-    int matches = 1;
-
-/* FIXME: Missed check on AVCodecContext.flags */
-#define CHECK_CODEC(x)  (ccf->codecpar->x != ccs->codecpar->x)
-    if (CHECK_CODEC(codec_id) || CHECK_CODEC(codec_type)) {
-        http_log("Codecs do not match for stream %d\n", stream);
-        matches = 0;
-    } else if (CHECK_CODEC(bit_rate)) {
-        http_log("Codec bitrates do not match for stream %d\n", stream);
-        matches = 0;
-    } else if (ccf->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
-        if (av_cmp_q(ccf->time_base, ccs->time_base) ||
-            CHECK_CODEC(width) || CHECK_CODEC(height)) {
-            http_log("Codec width, height or framerate do not match for stream %d\n", stream);
-            matches = 0;
-        }
-    } else if (ccf->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
-        if (CHECK_CODEC(sample_rate) ||
-            CHECK_CODEC(channels) ||
-            CHECK_CODEC(frame_size)) {
-            http_log("Codec sample_rate, channels, frame_size do not match for stream %d\n", stream);
-            matches = 0;
-        }
-    } else {
-        http_log("Unknown codec type for stream %d\n", stream);
-        matches = 0;
-    }
-
-    return matches;
-}
-
-/* compute the needed AVStream for each feed */
-static int build_feed_streams(void)
-{
-    FFServerStream *stream, *feed;
-    int i, fd;
-
-    /* gather all streams */
-    for(stream = config.first_stream; stream; stream = stream->next) {
-        feed = stream->feed;
-        if (!feed)
-            continue;
-
-        if (stream->is_feed) {
-            for(i=0;i<stream->nb_streams;i++)
-                stream->feed_streams[i] = i;
-            continue;
-        }
-        /* we handle a stream coming from a feed */
-        for(i=0;i<stream->nb_streams;i++)
-            stream->feed_streams[i] = add_av_stream(feed, stream->streams[i]);
-    }
-
-    /* create feed files if needed */
-    for(feed = config.first_feed; feed; feed = feed->next_feed) {
-
-        if (avio_check(feed->feed_filename, AVIO_FLAG_READ) > 0) {
-            AVFormatContext *s = NULL;
-            int matches = 0;
-
-            /* See if it matches */
-
-            if (avformat_open_input(&s, feed->feed_filename, NULL, NULL) < 0) {
-                http_log("Deleting feed file '%s' as it appears "
-                            "to be corrupt\n",
-                         feed->feed_filename);
-                goto drop;
-            }
-
-            /* set buffer size */
-            if (ffio_set_buf_size(s->pb, FFM_PACKET_SIZE) < 0) {
-                http_log("Failed to set buffer size\n");
-                avformat_close_input(&s);
-                goto bail;
-            }
-
-            /* Now see if it matches */
-            if (s->nb_streams != feed->nb_streams) {
-                http_log("Deleting feed file '%s' as stream counts "
-                            "differ (%d != %d)\n",
-                         feed->feed_filename, s->nb_streams, feed->nb_streams);
-                goto drop;
-            }
-
-            matches = 1;
-            for(i=0;i<s->nb_streams;i++) {
-                AVStream *ss;
-                LayeredAVStream *sf;
-
-                sf = feed->streams[i];
-                ss = s->streams[i];
-
-                if (sf->index != ss->index || sf->id != ss->id) {
-                    http_log("Index & Id do not match for stream %d (%s)\n",
-                             i, feed->feed_filename);
-                    matches = 0;
-                    break;
-                }
-
-                matches = check_codec_match (sf, ss, i);
-                if (!matches)
-                    break;
-            }
-
-drop:
-            if (s)
-                avformat_close_input(&s);
-
-            if (!matches) {
-                if (feed->readonly) {
-                    http_log("Unable to delete read-only feed file '%s'\n",
-                             feed->feed_filename);
-                    goto bail;
-                }
-                unlink(feed->feed_filename);
-            }
-        }
-
-        if (avio_check(feed->feed_filename, AVIO_FLAG_WRITE) <= 0) {
-            AVFormatContext *s = avformat_alloc_context();
-
-            if (!s) {
-                http_log("Failed to allocate context\n");
-                goto bail;
-            }
-
-            if (feed->readonly) {
-                http_log("Unable to create feed file '%s' as it is "
-                            "marked readonly\n",
-                         feed->feed_filename);
-                avformat_free_context(s);
-                goto bail;
-            }
-
-            /* only write the header of the ffm file */
-            if (avio_open(&s->pb, feed->feed_filename, AVIO_FLAG_WRITE) < 0) {
-                http_log("Could not open output feed file '%s'\n",
-                         feed->feed_filename);
-                avformat_free_context(s);
-                goto bail;
-            }
-            s->oformat = feed->fmt;
-            for (i = 0; i<feed->nb_streams; i++) {
-                AVStream *st = avformat_new_stream(s, NULL); // FIXME free this
-                if (!st) {
-                    http_log("Failed to allocate stream\n");
-                    goto bail;
-                }
-                unlayer_stream(st, feed->streams[i]);
-            }
-            if (avformat_write_header(s, NULL) < 0) {
-                http_log("Container doesn't support the required parameters\n");
-                avio_closep(&s->pb);
-                s->streams = NULL;
-                s->nb_streams = 0;
-                avformat_free_context(s);
-                goto bail;
-            }
-            /* XXX: need better API */
-            av_freep(&s->priv_data);
-            avio_closep(&s->pb);
-            s->streams = NULL;
-            s->nb_streams = 0;
-            avformat_free_context(s);
-        }
-
-        /* get feed size and write index */
-        fd = open(feed->feed_filename, O_RDONLY);
-        if (fd < 0) {
-            http_log("Could not open output feed file '%s'\n",
-                    feed->feed_filename);
-            goto bail;
-        }
-
-        feed->feed_write_index = FFMAX(ffm_read_write_index(fd),
-                                       FFM_PACKET_SIZE);
-        feed->feed_size = lseek(fd, 0, SEEK_END);
-        /* ensure that we do not wrap before the end of file */
-        if (feed->feed_max_size && feed->feed_max_size < feed->feed_size)
-            feed->feed_max_size = feed->feed_size;
-
-        close(fd);
-    }
-    return 0;
-
-bail:
-    return -1;
-}
-
-/* compute the bandwidth used by each stream */
-static void compute_bandwidth(void)
-{
-    unsigned bandwidth;
-    int i;
-    FFServerStream *stream;
-
-    for(stream = config.first_stream; stream; stream = stream->next) {
-        bandwidth = 0;
-        for(i=0;i<stream->nb_streams;i++) {
-            LayeredAVStream *st = stream->streams[i];
-            switch(st->codec->codec_type) {
-            case AVMEDIA_TYPE_AUDIO:
-            case AVMEDIA_TYPE_VIDEO:
-                bandwidth += st->codec->bit_rate;
-                break;
-            default:
-                break;
-            }
-        }
-        stream->bandwidth = (bandwidth + 999) / 1000;
-    }
-}
-
-static void handle_child_exit(int sig)
-{
-    pid_t pid;
-    int status;
-    time_t uptime;
-
-    while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
-        FFServerStream *feed;
-
-        for (feed = config.first_feed; feed; feed = feed->next) {
-            if (feed->pid != pid)
-                continue;
-
-            uptime = time(0) - feed->pid_start;
-            feed->pid = 0;
-            fprintf(stderr,
-                    "%s: Pid %"PRId64" exited with status %d after %"PRId64" "
-                        "seconds\n",
-                    feed->filename, (int64_t) pid, status, (int64_t)uptime);
-
-            if (uptime < 30)
-                /* Turn off any more restarts */
-                ffserver_free_child_args(&feed->child_argv);
-        }
-    }
-
-    need_to_start_children = 1;
-}
-
-static void opt_debug(void)
-{
-    config.debug = 1;
-    snprintf(config.logfilename, sizeof(config.logfilename), "-");
-}
-
-void show_help_default(const char *opt, const char *arg)
-{
-    printf("usage: ffserver [options]\n"
-           "Hyper fast multi format Audio/Video streaming server\n");
-    printf("\n");
-    show_help_options(options, "Main options:", 0, 0, 0);
-}
-
-static const OptionDef options[] = {
-    CMDUTILS_COMMON_OPTIONS
-    { "n", OPT_BOOL, {(void *)&no_launch }, "enable no-launch mode" },
-    { "d", 0, {(void*)opt_debug}, "enable debug mode" },
-    { "f", HAS_ARG | OPT_STRING, {(void*)&config.filename }, "use configfile instead of /etc/ffserver.conf", "configfile" },
-    { NULL },
-};
-
-int main(int argc, char **argv)
-{
-    struct sigaction sigact = { { 0 } };
-    int cfg_parsed;
-    int ret = EXIT_FAILURE;
-
-    init_dynload();
-
-    config.filename = av_strdup("/etc/ffserver.conf");
-
-    parse_loglevel(argc, argv, options);
-    av_register_all();
-    avformat_network_init();
-
-    show_banner(argc, argv, options);
-
-    my_program_name = argv[0];
-
-    parse_options(NULL, argc, argv, options, NULL);
-
-    unsetenv("http_proxy");             /* Kill the http_proxy */
-
-    av_lfg_init(&random_state, av_get_random_seed());
-
-    sigact.sa_handler = handle_child_exit;
-    sigact.sa_flags = SA_NOCLDSTOP | SA_RESTART;
-    sigaction(SIGCHLD, &sigact, 0);
-
-    if ((cfg_parsed = ffserver_parse_ffconfig(config.filename, &config)) < 0) {
-        fprintf(stderr, "Error reading configuration file '%s': %s\n",
-                config.filename, av_err2str(cfg_parsed));
-        goto bail;
-    }
-
-    /* open log file if needed */
-    if (config.logfilename[0] != '\0') {
-        if (!strcmp(config.logfilename, "-"))
-            logfile = stdout;
-        else
-            logfile = fopen(config.logfilename, "a");
-        av_log_set_callback(http_av_log);
-    }
-
-    build_file_streams();
-
-    if (build_feed_streams() < 0) {
-        http_log("Could not setup feed streams\n");
-        goto bail;
-    }
-
-    compute_bandwidth();
-
-    /* signal init */
-    signal(SIGPIPE, SIG_IGN);
-
-    if (http_server() < 0) {
-        http_log("Could not start server\n");
-        goto bail;
-    }
-
-    ret=EXIT_SUCCESS;
-
-bail:
-    av_freep (&config.filename);
-    avformat_network_deinit();
-    return ret;
-}

diff --git a/fftools/ffserver_config.c b/fftools/ffserver_config.c
deleted file mode 100644
index 54135be..0000000
--- a/fftools/ffserver_config.c
+++ /dev/null

@@ -1,1325 +0,0 @@
-/*
- * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <float.h>
-#include "libavutil/opt.h"
-#include "libavutil/parseutils.h"
-#include "libavutil/avstring.h"
-#include "libavutil/pixdesc.h"
-#include "libavutil/avassert.h"
-
-#include "cmdutils.h"
-#include "ffserver_config.h"
-
-#define MAX_CHILD_ARGS 64
-
-static int ffserver_save_avoption(const char *opt, const char *arg, int type,
-                                  FFServerConfig *config);
-static void vreport_config_error(const char *filename, int line_num,
-                                 int log_level, int *errors, const char *fmt,
-                                 va_list vl);
-static void report_config_error(const char *filename, int line_num,
-                                int log_level, int *errors, const char *fmt,
-                                ...);
-
-#define ERROR(...) report_config_error(config->filename, config->line_num,\
-                                       AV_LOG_ERROR, &config->errors, __VA_ARGS__)
-#define WARNING(...) report_config_error(config->filename, config->line_num,\
-                                         AV_LOG_WARNING, &config->warnings, __VA_ARGS__)
-
-/* FIXME: make ffserver work with IPv6 */
-/* resolve host with also IP address parsing */
-static int resolve_host(struct in_addr *sin_addr, const char *hostname)
-{
-
-    if (!ff_inet_aton(hostname, sin_addr)) {
-#if HAVE_GETADDRINFO
-        struct addrinfo *ai, *cur;
-        struct addrinfo hints = { 0 };
-        hints.ai_family = AF_INET;
-        if (getaddrinfo(hostname, NULL, &hints, &ai))
-            return -1;
-        /* getaddrinfo returns a linked list of addrinfo structs.
-         * Even if we set ai_family = AF_INET above, make sure
-         * that the returned one actually is of the correct type. */
-        for (cur = ai; cur; cur = cur->ai_next) {
-            if (cur->ai_family == AF_INET) {
-                *sin_addr = ((struct sockaddr_in *)cur->ai_addr)->sin_addr;
-                freeaddrinfo(ai);
-                return 0;
-            }
-        }
-        freeaddrinfo(ai);
-        return -1;
-#else
-        struct hostent *hp;
-        hp = gethostbyname(hostname);
-        if (!hp)
-            return -1;
-        memcpy(sin_addr, hp->h_addr_list[0], sizeof(struct in_addr));
-#endif
-    }
-    return 0;
-}
-
-void ffserver_get_arg(char *buf, int buf_size, const char **pp)
-{
-    const char *p;
-    char *q;
-    int quote = 0;
-
-    p = *pp;
-    q = buf;
-
-    while (av_isspace(*p)) p++;
-
-    if (*p == '\"' || *p == '\'')
-        quote = *p++;
-
-    while (*p != '\0') {
-        if (quote && *p == quote || !quote && av_isspace(*p))
-            break;
-        if ((q - buf) < buf_size - 1)
-            *q++ = *p;
-        p++;
-    }
-
-    *q = '\0';
-    if (quote && *p == quote)
-        p++;
-    *pp = p;
-}
-
-void ffserver_parse_acl_row(FFServerStream *stream, FFServerStream* feed,
-                            FFServerIPAddressACL *ext_acl,
-                            const char *p, const char *filename, int line_num)
-{
-    char arg[1024];
-    FFServerIPAddressACL acl;
-    FFServerIPAddressACL *nacl;
-    FFServerIPAddressACL **naclp;
-
-    ffserver_get_arg(arg, sizeof(arg), &p);
-    if (av_strcasecmp(arg, "allow") == 0)
-        acl.action = IP_ALLOW;
-    else if (av_strcasecmp(arg, "deny") == 0)
-        acl.action = IP_DENY;
-    else {
-        fprintf(stderr, "%s:%d: ACL action '%s' should be ALLOW or DENY.\n",
-                filename, line_num, arg);
-        goto bail;
-    }
-
-    ffserver_get_arg(arg, sizeof(arg), &p);
-
-    if (resolve_host(&acl.first, arg)) {
-        fprintf(stderr,
-                "%s:%d: ACL refers to invalid host or IP address '%s'\n",
-                filename, line_num, arg);
-        goto bail;
-    }
-
-    acl.last = acl.first;
-
-    ffserver_get_arg(arg, sizeof(arg), &p);
-
-    if (arg[0]) {
-        if (resolve_host(&acl.last, arg)) {
-            fprintf(stderr,
-                    "%s:%d: ACL refers to invalid host or IP address '%s'\n",
-                    filename, line_num, arg);
-            goto bail;
-        }
-    }
-
-    nacl = av_mallocz(sizeof(*nacl));
-    if (!nacl) {
-        fprintf(stderr, "Failed to allocate FFServerIPAddressACL\n");
-        goto bail;
-    }
-
-    naclp = 0;
-
-    acl.next = 0;
-    *nacl = acl;
-
-    if (stream)
-        naclp = &stream->acl;
-    else if (feed)
-        naclp = &feed->acl;
-    else if (ext_acl)
-        naclp = &ext_acl;
-    else
-        fprintf(stderr, "%s:%d: ACL found not in <Stream> or <Feed>\n",
-                filename, line_num);
-
-    if (naclp) {
-        while (*naclp)
-            naclp = &(*naclp)->next;
-
-        *naclp = nacl;
-    } else
-        av_free(nacl);
-
-bail:
-  return;
-
-}
-
-/* add a codec and set the default parameters */
-static void add_codec(FFServerStream *stream, AVCodecContext *av,
-                      FFServerConfig *config)
-{
-    LayeredAVStream *st;
-    AVDictionary **opts, *recommended = NULL;
-    char *enc_config;
-
-    if(stream->nb_streams >= FF_ARRAY_ELEMS(stream->streams))
-        return;
-
-    opts = av->codec_type == AVMEDIA_TYPE_AUDIO ?
-           &config->audio_opts : &config->video_opts;
-    av_dict_copy(&recommended, *opts, 0);
-    av_opt_set_dict2(av->priv_data, opts, AV_OPT_SEARCH_CHILDREN);
-    av_opt_set_dict2(av, opts, AV_OPT_SEARCH_CHILDREN);
-
-    if (av_dict_count(*opts))
-        av_log(NULL, AV_LOG_WARNING,
-               "Something is wrong, %d options are not set!\n",
-               av_dict_count(*opts));
-
-    if (!config->stream_use_defaults) {
-        switch(av->codec_type) {
-        case AVMEDIA_TYPE_AUDIO:
-            if (av->bit_rate == 0)
-                report_config_error(config->filename, config->line_num,
-                                    AV_LOG_ERROR, &config->errors,
-                                    "audio bit rate is not set\n");
-            if (av->sample_rate == 0)
-                report_config_error(config->filename, config->line_num,
-                                    AV_LOG_ERROR, &config->errors,
-                                    "audio sample rate is not set\n");
-            break;
-        case AVMEDIA_TYPE_VIDEO:
-            if (av->width == 0 || av->height == 0)
-                report_config_error(config->filename, config->line_num,
-                                    AV_LOG_ERROR, &config->errors,
-                                    "video size is not set\n");
-            break;
-        default:
-            av_assert0(0);
-        }
-        goto done;
-    }
-
-    /* stream_use_defaults = true */
-
-    /* compute default parameters */
-    switch(av->codec_type) {
-    case AVMEDIA_TYPE_AUDIO:
-        if (!av_dict_get(recommended, "b", NULL, 0)) {
-            av->bit_rate = 64000;
-            av_dict_set_int(&recommended, "b", av->bit_rate, 0);
-            WARNING("Setting default value for audio bit rate = %d. "
-                    "Use NoDefaults to disable it.\n",
-                    av->bit_rate);
-        }
-        if (!av_dict_get(recommended, "ar", NULL, 0)) {
-            av->sample_rate = 22050;
-            av_dict_set_int(&recommended, "ar", av->sample_rate, 0);
-            WARNING("Setting default value for audio sample rate = %d. "
-                    "Use NoDefaults to disable it.\n",
-                    av->sample_rate);
-        }
-        if (!av_dict_get(recommended, "ac", NULL, 0)) {
-            av->channels = 1;
-            av_dict_set_int(&recommended, "ac", av->channels, 0);
-            WARNING("Setting default value for audio channel count = %d. "
-                    "Use NoDefaults to disable it.\n",
-                    av->channels);
-        }
-        break;
-    case AVMEDIA_TYPE_VIDEO:
-        if (!av_dict_get(recommended, "b", NULL, 0)) {
-            av->bit_rate = 64000;
-            av_dict_set_int(&recommended, "b", av->bit_rate, 0);
-            WARNING("Setting default value for video bit rate = %d. "
-                    "Use NoDefaults to disable it.\n",
-                    av->bit_rate);
-        }
-        if (!av_dict_get(recommended, "time_base", NULL, 0)){
-            av->time_base.den = 5;
-            av->time_base.num = 1;
-            av_dict_set(&recommended, "time_base", "1/5", 0);
-            WARNING("Setting default value for video frame rate = %d. "
-                    "Use NoDefaults to disable it.\n",
-                    av->time_base.den);
-        }
-        if (!av_dict_get(recommended, "video_size", NULL, 0)) {
-            av->width = 160;
-            av->height = 128;
-            av_dict_set(&recommended, "video_size", "160x128", 0);
-            WARNING("Setting default value for video size = %dx%d. "
-                    "Use NoDefaults to disable it.\n",
-                    av->width, av->height);
-        }
-        /* Bitrate tolerance is less for streaming */
-        if (!av_dict_get(recommended, "bt", NULL, 0)) {
-            av->bit_rate_tolerance = FFMAX(av->bit_rate / 4,
-                      (int64_t)av->bit_rate*av->time_base.num/av->time_base.den);
-            av_dict_set_int(&recommended, "bt", av->bit_rate_tolerance, 0);
-            WARNING("Setting default value for video bit rate tolerance = %d. "
-                    "Use NoDefaults to disable it.\n",
-                    av->bit_rate_tolerance);
-        }
-
-        if (!av_dict_get(recommended, "rc_eq", NULL, 0)) {
-            av->rc_eq = av_strdup("tex^qComp");
-            av_dict_set(&recommended, "rc_eq", "tex^qComp", 0);
-            WARNING("Setting default value for video rate control equation = "
-                    "%s. Use NoDefaults to disable it.\n",
-                    av->rc_eq);
-        }
-        if (!av_dict_get(recommended, "maxrate", NULL, 0)) {
-            av->rc_max_rate = av->bit_rate * 2;
-            av_dict_set_int(&recommended, "maxrate", av->rc_max_rate, 0);
-            WARNING("Setting default value for video max rate = %d. "
-                    "Use NoDefaults to disable it.\n",
-                    av->rc_max_rate);
-        }
-
-        if (av->rc_max_rate && !av_dict_get(recommended, "bufsize", NULL, 0)) {
-            av->rc_buffer_size = av->rc_max_rate;
-            av_dict_set_int(&recommended, "bufsize", av->rc_buffer_size, 0);
-            WARNING("Setting default value for video buffer size = %d. "
-                    "Use NoDefaults to disable it.\n",
-                    av->rc_buffer_size);
-        }
-        break;
-    default:
-        abort();
-    }
-
-done:
-    st = av_mallocz(sizeof(*st));
-    if (!st)
-        return;
-    av_dict_get_string(recommended, &enc_config, '=', ',');
-    av_dict_free(&recommended);
-    st->recommended_encoder_configuration = enc_config;
-    st->codec = av;
-    st->codecpar = avcodec_parameters_alloc();
-    avcodec_parameters_from_context(st->codecpar, av);
-    stream->streams[stream->nb_streams++] = st;
-}
-
-static int ffserver_set_codec(AVCodecContext *ctx, const char *codec_name,
-                              FFServerConfig *config)
-{
-    int ret;
-    AVCodec *codec = avcodec_find_encoder_by_name(codec_name);
-    if (!codec || codec->type != ctx->codec_type) {
-        report_config_error(config->filename, config->line_num, AV_LOG_ERROR,
-                            &config->errors,
-                            "Invalid codec name: '%s'\n", codec_name);
-        return 0;
-    }
-    if (ctx->codec_id == AV_CODEC_ID_NONE && !ctx->priv_data) {
-        if ((ret = avcodec_get_context_defaults3(ctx, codec)) < 0)
-            return ret;
-        ctx->codec = codec;
-    }
-    if (ctx->codec_id != codec->id)
-        report_config_error(config->filename, config->line_num, AV_LOG_ERROR,
-                            &config->errors,
-                            "Inconsistent configuration: trying to set '%s' "
-                            "codec option, but '%s' codec is used previously\n",
-                            codec_name, avcodec_get_name(ctx->codec_id));
-    return 0;
-}
-
-static int ffserver_opt_preset(const char *arg, int type, FFServerConfig *config)
-{
-    FILE *f=NULL;
-    char filename[1000], tmp[1000], tmp2[1000], line[1000];
-    int ret = 0;
-    AVCodecContext *avctx;
-    const AVCodec *codec;
-
-    switch(type) {
-    case AV_OPT_FLAG_AUDIO_PARAM:
-        avctx = config->dummy_actx;
-        break;
-    case AV_OPT_FLAG_VIDEO_PARAM:
-        avctx = config->dummy_vctx;
-        break;
-    default:
-        av_assert0(0);
-    }
-    codec = avcodec_find_encoder(avctx->codec_id);
-
-    if (!(f = get_preset_file(filename, sizeof(filename), arg, 0,
-                              codec ? codec->name : NULL))) {
-        av_log(NULL, AV_LOG_ERROR, "File for preset '%s' not found\n", arg);
-        return AVERROR(EINVAL);
-    }
-
-    while(!feof(f)){
-        int e= fscanf(f, "%999[^\n]\n", line) - 1;
-        if(line[0] == '#' && !e)
-            continue;
-        e|= sscanf(line, "%999[^=]=%999[^\n]\n", tmp, tmp2) - 2;
-        if(e){
-            av_log(NULL, AV_LOG_ERROR, "%s: Invalid syntax: '%s'\n", filename,
-                   line);
-            ret = AVERROR(EINVAL);
-            break;
-        }
-        if (!strcmp(tmp, "acodec") && avctx->codec_type == AVMEDIA_TYPE_AUDIO ||
-            !strcmp(tmp, "vcodec") && avctx->codec_type == AVMEDIA_TYPE_VIDEO)
-        {
-            if (ffserver_set_codec(avctx, tmp2, config) < 0)
-                break;
-        } else if (!strcmp(tmp, "scodec")) {
-            av_log(NULL, AV_LOG_ERROR, "Subtitles preset found.\n");
-            ret = AVERROR(EINVAL);
-            break;
-        } else if (ffserver_save_avoption(tmp, tmp2, type, config) < 0)
-            break;
-    }
-
-    fclose(f);
-
-    return ret;
-}
-
-static AVOutputFormat *ffserver_guess_format(const char *short_name,
-                                             const char *filename,
-                                             const char *mime_type)
-{
-    AVOutputFormat *fmt = av_guess_format(short_name, filename, mime_type);
-
-    if (fmt) {
-        AVOutputFormat *stream_fmt;
-        char stream_format_name[64];
-
-        snprintf(stream_format_name, sizeof(stream_format_name), "%s_stream",
-                fmt->name);
-        stream_fmt = av_guess_format(stream_format_name, NULL, NULL);
-
-        if (stream_fmt)
-            fmt = stream_fmt;
-    }
-
-    return fmt;
-}
-
-static void vreport_config_error(const char *filename, int line_num,
-                                 int log_level, int *errors, const char *fmt,
-                                 va_list vl)
-{
-    av_log(NULL, log_level, "%s:%d: ", filename, line_num);
-    av_vlog(NULL, log_level, fmt, vl);
-    if (errors)
-        (*errors)++;
-}
-
-static void report_config_error(const char *filename, int line_num,
-                                int log_level, int *errors,
-                                const char *fmt, ...)
-{
-    va_list vl;
-    va_start(vl, fmt);
-    vreport_config_error(filename, line_num, log_level, errors, fmt, vl);
-    va_end(vl);
-}
-
-static int ffserver_set_int_param(int *dest, const char *value, int factor,
-                                  int min, int max, FFServerConfig *config,
-                                  const char *error_msg, ...)
-{
-    int tmp;
-    char *tailp;
-    if (!value || !value[0])
-        goto error;
-    errno = 0;
-    tmp = strtol(value, &tailp, 0);
-    if (tmp < min || tmp > max)
-        goto error;
-    if (factor) {
-        if (tmp == INT_MIN || FFABS(tmp) > INT_MAX / FFABS(factor))
-            goto error;
-        tmp *= factor;
-    }
-    if (tailp[0] || errno)
-        goto error;
-    if (dest)
-        *dest = tmp;
-    return 0;
-  error:
-    if (config) {
-        va_list vl;
-        va_start(vl, error_msg);
-        vreport_config_error(config->filename, config->line_num, AV_LOG_ERROR,
-                &config->errors, error_msg, vl);
-        va_end(vl);
-    }
-    return AVERROR(EINVAL);
-}
-
-static int ffserver_set_float_param(float *dest, const char *value,
-                                    float factor, float min, float max,
-                                    FFServerConfig *config,
-                                    const char *error_msg, ...)
-{
-    double tmp;
-    char *tailp;
-    if (!value || !value[0])
-        goto error;
-    errno = 0;
-    tmp = strtod(value, &tailp);
-    if (tmp < min || tmp > max)
-        goto error;
-    if (factor)
-        tmp *= factor;
-    if (tailp[0] || errno)
-        goto error;
-    if (dest)
-        *dest = tmp;
-    return 0;
-  error:
-    if (config) {
-        va_list vl;
-        va_start(vl, error_msg);
-        vreport_config_error(config->filename, config->line_num, AV_LOG_ERROR,
-                &config->errors, error_msg, vl);
-        va_end(vl);
-    }
-    return AVERROR(EINVAL);
-}
-
-static int ffserver_save_avoption(const char *opt, const char *arg, int type,
-                                  FFServerConfig *config)
-{
-    static int hinted = 0;
-    int ret = 0;
-    AVDictionaryEntry *e;
-    const AVOption *o = NULL;
-    const char *option = NULL;
-    const char *codec_name = NULL;
-    char buff[1024];
-    AVCodecContext *ctx;
-    AVDictionary **dict;
-    enum AVCodecID guessed_codec_id;
-
-    switch (type) {
-    case AV_OPT_FLAG_VIDEO_PARAM:
-        ctx = config->dummy_vctx;
-        dict = &config->video_opts;
-        guessed_codec_id = config->guessed_video_codec_id != AV_CODEC_ID_NONE ?
-                           config->guessed_video_codec_id : AV_CODEC_ID_H264;
-        break;
-    case AV_OPT_FLAG_AUDIO_PARAM:
-        ctx = config->dummy_actx;
-        dict = &config->audio_opts;
-        guessed_codec_id = config->guessed_audio_codec_id != AV_CODEC_ID_NONE ?
-                           config->guessed_audio_codec_id : AV_CODEC_ID_AAC;
-        break;
-    default:
-        av_assert0(0);
-    }
-
-    if (strchr(opt, ':')) {
-        //explicit private option
-        snprintf(buff, sizeof(buff), "%s", opt);
-        codec_name = buff;
-        if(!(option = strchr(buff, ':'))){
-            report_config_error(config->filename, config->line_num,
-                                AV_LOG_ERROR, &config->errors,
-                                "Syntax error. Unmatched ':'\n");
-            return -1;
-
-        }
-        buff[option - buff] = '\0';
-        option++;
-        if ((ret = ffserver_set_codec(ctx, codec_name, config)) < 0)
-            return ret;
-        if (!ctx->codec || !ctx->priv_data)
-            return -1;
-    } else {
-        option = opt;
-    }
-
-    o = av_opt_find(ctx, option, NULL, type | AV_OPT_FLAG_ENCODING_PARAM,
-                    AV_OPT_SEARCH_CHILDREN);
-    if (!o &&
-        (!strcmp(option, "time_base")  || !strcmp(option, "pixel_format") ||
-         !strcmp(option, "video_size") || !strcmp(option, "codec_tag")))
-        o = av_opt_find(ctx, option, NULL, 0, 0);
-    if (!o) {
-        report_config_error(config->filename, config->line_num, AV_LOG_ERROR,
-                            &config->errors, "Option not found: '%s'\n", opt);
-        if (!hinted && ctx->codec_id == AV_CODEC_ID_NONE) {
-            hinted = 1;
-            report_config_error(config->filename, config->line_num,
-                                AV_LOG_ERROR, NULL, "If '%s' is a codec private"
-                                "option, then prefix it with codec name, for "
-                                "example '%s:%s %s' or define codec earlier.\n",
-                                opt, avcodec_get_name(guessed_codec_id) ,opt,
-                                arg);
-        }
-    } else if ((ret = av_opt_set(ctx, option, arg, AV_OPT_SEARCH_CHILDREN)) < 0) {
-        report_config_error(config->filename, config->line_num, AV_LOG_ERROR,
-                &config->errors, "Invalid value for option %s (%s): %s\n", opt,
-                arg, av_err2str(ret));
-    } else if ((e = av_dict_get(*dict, option, NULL, 0))) {
-        if ((o->type == AV_OPT_TYPE_FLAGS) && arg &&
-            (arg[0] == '+' || arg[0] == '-'))
-            return av_dict_set(dict, option, arg, AV_DICT_APPEND);
-        report_config_error(config->filename, config->line_num, AV_LOG_ERROR,
-                            &config->errors, "Redeclaring value of option '%s'."
-                            "Previous value was: '%s'.\n", opt, e->value);
-    } else if (av_dict_set(dict, option, arg, 0) < 0) {
-        return AVERROR(ENOMEM);
-    }
-    return 0;
-}
-
-static int ffserver_save_avoption_int(const char *opt, int64_t arg,
-                                      int type, FFServerConfig *config)
-{
-    char buf[22];
-    snprintf(buf, sizeof(buf), "%"PRId64, arg);
-    return ffserver_save_avoption(opt, buf, type, config);
-}
-
-static int ffserver_parse_config_global(FFServerConfig *config, const char *cmd,
-                                        const char **p)
-{
-    int val;
-    char arg[1024];
-    if (!av_strcasecmp(cmd, "Port") || !av_strcasecmp(cmd, "HTTPPort")) {
-        if (!av_strcasecmp(cmd, "Port"))
-            WARNING("Port option is deprecated. Use HTTPPort instead.\n");
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ffserver_set_int_param(&val, arg, 0, 1, 65535, config,
-                "Invalid port: %s\n", arg);
-        if (val < 1024)
-            WARNING("Trying to use IETF assigned system port: '%d'\n", val);
-        config->http_addr.sin_port = htons(val);
-    } else if (!av_strcasecmp(cmd, "HTTPBindAddress") ||
-               !av_strcasecmp(cmd, "BindAddress")) {
-        if (!av_strcasecmp(cmd, "BindAddress"))
-            WARNING("BindAddress option is deprecated. Use HTTPBindAddress "
-                    "instead.\n");
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (resolve_host(&config->http_addr.sin_addr, arg))
-            ERROR("Invalid host/IP address: '%s'\n", arg);
-    } else if (!av_strcasecmp(cmd, "NoDaemon")) {
-        WARNING("NoDaemon option has no effect. You should remove it.\n");
-    } else if (!av_strcasecmp(cmd, "RTSPPort")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ffserver_set_int_param(&val, arg, 0, 1, 65535, config,
-                "Invalid port: %s\n", arg);
-        config->rtsp_addr.sin_port = htons(val);
-    } else if (!av_strcasecmp(cmd, "RTSPBindAddress")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (resolve_host(&config->rtsp_addr.sin_addr, arg))
-            ERROR("Invalid host/IP address: %s\n", arg);
-    } else if (!av_strcasecmp(cmd, "MaxHTTPConnections")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ffserver_set_int_param(&val, arg, 0, 1, 65535, config,
-                "Invalid MaxHTTPConnections: %s\n", arg);
-        config->nb_max_http_connections = val;
-        if (config->nb_max_connections > config->nb_max_http_connections) {
-            ERROR("Inconsistent configuration: MaxClients(%d) > "
-                  "MaxHTTPConnections(%d)\n", config->nb_max_connections,
-                  config->nb_max_http_connections);
-        }
-    } else if (!av_strcasecmp(cmd, "MaxClients")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ffserver_set_int_param(&val, arg, 0, 1, 65535, config,
-                "Invalid MaxClients: '%s'\n", arg);
-        config->nb_max_connections = val;
-        if (config->nb_max_connections > config->nb_max_http_connections) {
-            ERROR("Inconsistent configuration: MaxClients(%d) > "
-                  "MaxHTTPConnections(%d)\n", config->nb_max_connections,
-                  config->nb_max_http_connections);
-        }
-    } else if (!av_strcasecmp(cmd, "MaxBandwidth")) {
-        int64_t llval;
-        char *tailp;
-        ffserver_get_arg(arg, sizeof(arg), p);
-        errno = 0;
-        llval = strtoll(arg, &tailp, 10);
-        if (llval < 10 || llval > 10000000 || tailp[0] || errno)
-            ERROR("Invalid MaxBandwidth: '%s'\n", arg);
-        else
-            config->max_bandwidth = llval;
-    } else if (!av_strcasecmp(cmd, "CustomLog")) {
-        if (!config->debug) {
-            ffserver_get_arg(config->logfilename, sizeof(config->logfilename),
-                             p);
-        }
-    } else if (!av_strcasecmp(cmd, "LoadModule")) {
-        ERROR("Loadable modules are no longer supported\n");
-    } else if (!av_strcasecmp(cmd, "NoDefaults")) {
-        config->use_defaults = 0;
-    } else if (!av_strcasecmp(cmd, "UseDefaults")) {
-        config->use_defaults = 1;
-    } else
-        ERROR("Incorrect keyword: '%s'\n", cmd);
-    return 0;
-}
-
-static int ffserver_parse_config_feed(FFServerConfig *config, const char *cmd,
-                                      const char **p, FFServerStream **pfeed)
-{
-    FFServerStream *feed;
-    char arg[1024];
-    av_assert0(pfeed);
-    feed = *pfeed;
-    if (!av_strcasecmp(cmd, "<Feed")) {
-        char *q;
-        FFServerStream *s;
-        feed = av_mallocz(sizeof(FFServerStream));
-        if (!feed)
-            return AVERROR(ENOMEM);
-        ffserver_get_arg(feed->filename, sizeof(feed->filename), p);
-        q = strrchr(feed->filename, '>');
-        if (*q)
-            *q = '\0';
-
-        for (s = config->first_feed; s; s = s->next) {
-            if (!strcmp(feed->filename, s->filename))
-                ERROR("Feed '%s' already registered\n", s->filename);
-        }
-
-        feed->fmt = av_guess_format("ffm", NULL, NULL);
-        /* default feed file */
-        snprintf(feed->feed_filename, sizeof(feed->feed_filename),
-                 "/tmp/%s.ffm", feed->filename);
-        feed->feed_max_size = 5 * 1024 * 1024;
-        feed->is_feed = 1;
-        feed->feed = feed; /* self feeding :-) */
-        *pfeed = feed;
-        return 0;
-    }
-    av_assert0(feed);
-    if (!av_strcasecmp(cmd, "Launch")) {
-        int i;
-
-        feed->child_argv = av_mallocz_array(MAX_CHILD_ARGS, sizeof(char *));
-        if (!feed->child_argv)
-            return AVERROR(ENOMEM);
-        for (i = 0; i < MAX_CHILD_ARGS - 2; i++) {
-            ffserver_get_arg(arg, sizeof(arg), p);
-            if (!arg[0])
-                break;
-
-            feed->child_argv[i] = av_strdup(arg);
-            if (!feed->child_argv[i])
-                return AVERROR(ENOMEM);
-        }
-
-        feed->child_argv[i] =
-            av_asprintf("http://%s:%d/%s",
-                        (config->http_addr.sin_addr.s_addr == INADDR_ANY) ?
-                        "127.0.0.1" : inet_ntoa(config->http_addr.sin_addr),
-                        ntohs(config->http_addr.sin_port), feed->filename);
-        if (!feed->child_argv[i])
-            return AVERROR(ENOMEM);
-    } else if (!av_strcasecmp(cmd, "ACL")) {
-        ffserver_parse_acl_row(NULL, feed, NULL, *p, config->filename,
-                config->line_num);
-    } else if (!av_strcasecmp(cmd, "File") ||
-               !av_strcasecmp(cmd, "ReadOnlyFile")) {
-        ffserver_get_arg(feed->feed_filename, sizeof(feed->feed_filename), p);
-        feed->readonly = !av_strcasecmp(cmd, "ReadOnlyFile");
-    } else if (!av_strcasecmp(cmd, "Truncate")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        /* assume Truncate is true in case no argument is specified */
-        if (!arg[0]) {
-            feed->truncate = 1;
-        } else {
-            WARNING("Truncate N syntax in configuration file is deprecated. "
-                    "Use Truncate alone with no arguments.\n");
-            feed->truncate = strtod(arg, NULL);
-        }
-    } else if (!av_strcasecmp(cmd, "FileMaxSize")) {
-        char *p1;
-        double fsize;
-
-        ffserver_get_arg(arg, sizeof(arg), p);
-        p1 = arg;
-        fsize = strtod(p1, &p1);
-        switch(av_toupper(*p1)) {
-        case 'K':
-            fsize *= 1024;
-            break;
-        case 'M':
-            fsize *= 1024 * 1024;
-            break;
-        case 'G':
-            fsize *= 1024 * 1024 * 1024;
-            break;
-        default:
-            ERROR("Invalid file size: '%s'\n", arg);
-            break;
-        }
-        feed->feed_max_size = (int64_t)fsize;
-        if (feed->feed_max_size < FFM_PACKET_SIZE*4) {
-            ERROR("Feed max file size is too small. Must be at least %d.\n",
-                  FFM_PACKET_SIZE*4);
-        }
-    } else if (!av_strcasecmp(cmd, "</Feed>")) {
-        *pfeed = NULL;
-    } else {
-        ERROR("Invalid entry '%s' inside <Feed></Feed>\n", cmd);
-    }
-    return 0;
-}
-
-static int ffserver_parse_config_stream(FFServerConfig *config, const char *cmd,
-                                        const char **p,
-                                        FFServerStream **pstream)
-{
-    char arg[1024], arg2[1024];
-    FFServerStream *stream;
-    int val;
-
-    av_assert0(pstream);
-    stream = *pstream;
-
-    if (!av_strcasecmp(cmd, "<Stream")) {
-        char *q;
-        FFServerStream *s;
-        stream = av_mallocz(sizeof(FFServerStream));
-        if (!stream)
-            return AVERROR(ENOMEM);
-        config->dummy_actx = avcodec_alloc_context3(NULL);
-        config->dummy_vctx = avcodec_alloc_context3(NULL);
-        if (!config->dummy_vctx || !config->dummy_actx) {
-            av_free(stream);
-            avcodec_free_context(&config->dummy_vctx);
-            avcodec_free_context(&config->dummy_actx);
-            return AVERROR(ENOMEM);
-        }
-        config->dummy_actx->codec_type = AVMEDIA_TYPE_AUDIO;
-        config->dummy_vctx->codec_type = AVMEDIA_TYPE_VIDEO;
-        ffserver_get_arg(stream->filename, sizeof(stream->filename), p);
-        q = strrchr(stream->filename, '>');
-        if (q)
-            *q = '\0';
-
-        for (s = config->first_stream; s; s = s->next) {
-            if (!strcmp(stream->filename, s->filename))
-                ERROR("Stream '%s' already registered\n", s->filename);
-        }
-
-        stream->fmt = ffserver_guess_format(NULL, stream->filename, NULL);
-        if (stream->fmt) {
-            config->guessed_audio_codec_id = stream->fmt->audio_codec;
-            config->guessed_video_codec_id = stream->fmt->video_codec;
-        } else {
-            config->guessed_audio_codec_id = AV_CODEC_ID_NONE;
-            config->guessed_video_codec_id = AV_CODEC_ID_NONE;
-        }
-        config->stream_use_defaults = config->use_defaults;
-        *pstream = stream;
-        return 0;
-    }
-    av_assert0(stream);
-    if (!av_strcasecmp(cmd, "Feed")) {
-        FFServerStream *sfeed;
-        ffserver_get_arg(arg, sizeof(arg), p);
-        sfeed = config->first_feed;
-        while (sfeed) {
-            if (!strcmp(sfeed->filename, arg))
-                break;
-            sfeed = sfeed->next_feed;
-        }
-        if (!sfeed)
-            ERROR("Feed with name '%s' for stream '%s' is not defined\n", arg,
-                    stream->filename);
-        else
-            stream->feed = sfeed;
-    } else if (!av_strcasecmp(cmd, "Format")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (!strcmp(arg, "status")) {
-            stream->stream_type = STREAM_TYPE_STATUS;
-            stream->fmt = NULL;
-        } else {
-            stream->stream_type = STREAM_TYPE_LIVE;
-            /* JPEG cannot be used here, so use single frame MJPEG */
-            if (!strcmp(arg, "jpeg")) {
-                strcpy(arg, "singlejpeg");
-                stream->single_frame=1;
-            }
-            stream->fmt = ffserver_guess_format(arg, NULL, NULL);
-            if (!stream->fmt)
-                ERROR("Unknown Format: '%s'\n", arg);
-        }
-        if (stream->fmt) {
-            config->guessed_audio_codec_id = stream->fmt->audio_codec;
-            config->guessed_video_codec_id = stream->fmt->video_codec;
-        }
-    } else if (!av_strcasecmp(cmd, "InputFormat")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        stream->ifmt = av_find_input_format(arg);
-        if (!stream->ifmt)
-            ERROR("Unknown input format: '%s'\n", arg);
-    } else if (!av_strcasecmp(cmd, "FaviconURL")) {
-        if (stream->stream_type == STREAM_TYPE_STATUS)
-            ffserver_get_arg(stream->feed_filename,
-                    sizeof(stream->feed_filename), p);
-        else
-            ERROR("FaviconURL only permitted for status streams\n");
-    } else if (!av_strcasecmp(cmd, "Author")    ||
-               !av_strcasecmp(cmd, "Comment")   ||
-               !av_strcasecmp(cmd, "Copyright") ||
-               !av_strcasecmp(cmd, "Title")) {
-        char key[32];
-        int i;
-        ffserver_get_arg(arg, sizeof(arg), p);
-        for (i = 0; i < strlen(cmd); i++)
-            key[i] = av_tolower(cmd[i]);
-        key[i] = 0;
-        WARNING("Deprecated '%s' option in configuration file. Use "
-                "'Metadata %s VALUE' instead.\n", cmd, key);
-        if (av_dict_set(&stream->metadata, key, arg, 0) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "Metadata")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ffserver_get_arg(arg2, sizeof(arg2), p);
-        if (av_dict_set(&stream->metadata, arg, arg2, 0) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "Preroll")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        stream->prebuffer = atof(arg) * 1000;
-    } else if (!av_strcasecmp(cmd, "StartSendOnKey")) {
-        stream->send_on_key = 1;
-    } else if (!av_strcasecmp(cmd, "AudioCodec")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ffserver_set_codec(config->dummy_actx, arg, config);
-    } else if (!av_strcasecmp(cmd, "VideoCodec")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ffserver_set_codec(config->dummy_vctx, arg, config);
-    } else if (!av_strcasecmp(cmd, "MaxTime")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        stream->max_time = atof(arg) * 1000;
-    } else if (!av_strcasecmp(cmd, "AudioBitRate")) {
-        float f;
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ffserver_set_float_param(&f, arg, 1000, -FLT_MAX, FLT_MAX, config,
-                "Invalid %s: '%s'\n", cmd, arg);
-        if (ffserver_save_avoption_int("b", (int64_t)lrintf(f),
-                                       AV_OPT_FLAG_AUDIO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "AudioChannels")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (ffserver_save_avoption("ac", arg, AV_OPT_FLAG_AUDIO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "AudioSampleRate")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (ffserver_save_avoption("ar", arg, AV_OPT_FLAG_AUDIO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "VideoBitRateRange")) {
-        int minrate, maxrate;
-        char *dash;
-        ffserver_get_arg(arg, sizeof(arg), p);
-        dash = strchr(arg, '-');
-        if (dash) {
-            *dash = '\0';
-            dash++;
-            if (ffserver_set_int_param(&minrate, arg,  1000, 0, INT_MAX, config, "Invalid %s: '%s'", cmd, arg) >= 0 &&
-                ffserver_set_int_param(&maxrate, dash, 1000, 0, INT_MAX, config, "Invalid %s: '%s'", cmd, arg) >= 0) {
-                if (ffserver_save_avoption_int("minrate", minrate, AV_OPT_FLAG_VIDEO_PARAM, config) < 0 ||
-                    ffserver_save_avoption_int("maxrate", maxrate, AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-                goto nomem;
-            }
-        } else
-            ERROR("Incorrect format for VideoBitRateRange. It should be "
-                  "<min>-<max>: '%s'.\n", arg);
-    } else if (!av_strcasecmp(cmd, "Debug")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (ffserver_save_avoption("debug", arg, AV_OPT_FLAG_AUDIO_PARAM, config) < 0 ||
-            ffserver_save_avoption("debug", arg, AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "Strict")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (ffserver_save_avoption("strict", arg, AV_OPT_FLAG_AUDIO_PARAM, config) < 0 ||
-            ffserver_save_avoption("strict", arg, AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "VideoBufferSize")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ffserver_set_int_param(&val, arg, 8*1024, 0, INT_MAX, config,
-                "Invalid %s: '%s'", cmd, arg);
-        if (ffserver_save_avoption_int("bufsize", val, AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "VideoBitRateTolerance")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ffserver_set_int_param(&val, arg, 1000, INT_MIN, INT_MAX, config,
-                               "Invalid %s: '%s'", cmd, arg);
-        if (ffserver_save_avoption_int("bt", val, AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "VideoBitRate")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ffserver_set_int_param(&val, arg, 1000, INT_MIN, INT_MAX, config,
-                               "Invalid %s: '%s'", cmd, arg);
-        if (ffserver_save_avoption_int("b", val, AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-           goto nomem;
-    } else if (!av_strcasecmp(cmd, "VideoSize")) {
-        int ret, w, h;
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ret = av_parse_video_size(&w, &h, arg);
-        if (ret < 0)
-            ERROR("Invalid video size '%s'\n", arg);
-        else {
-            if (w % 2 || h % 2)
-                WARNING("Image size is not a multiple of 2\n");
-            if (ffserver_save_avoption("video_size", arg, AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-                goto nomem;
-        }
-    } else if (!av_strcasecmp(cmd, "VideoFrameRate")) {
-        ffserver_get_arg(&arg[2], sizeof(arg) - 2, p);
-        arg[0] = '1'; arg[1] = '/';
-        if (ffserver_save_avoption("time_base", arg, AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "PixelFormat")) {
-        enum AVPixelFormat pix_fmt;
-        ffserver_get_arg(arg, sizeof(arg), p);
-        pix_fmt = av_get_pix_fmt(arg);
-        if (pix_fmt == AV_PIX_FMT_NONE)
-            ERROR("Unknown pixel format: '%s'\n", arg);
-        else if (ffserver_save_avoption("pixel_format", arg, AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "VideoGopSize")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (ffserver_save_avoption("g", arg, AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "VideoIntraOnly")) {
-        if (ffserver_save_avoption("g", "1", AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "VideoHighQuality")) {
-        if (ffserver_save_avoption("mbd", "+bits", AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "Video4MotionVector")) {
-        if (ffserver_save_avoption("mbd", "+bits",  AV_OPT_FLAG_VIDEO_PARAM, config) < 0 || //FIXME remove
-            ffserver_save_avoption("flags", "+mv4", AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "AVOptionVideo") ||
-               !av_strcasecmp(cmd, "AVOptionAudio")) {
-        int ret;
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ffserver_get_arg(arg2, sizeof(arg2), p);
-        if (!av_strcasecmp(cmd, "AVOptionVideo"))
-            ret = ffserver_save_avoption(arg, arg2, AV_OPT_FLAG_VIDEO_PARAM,
-                                         config);
-        else
-            ret = ffserver_save_avoption(arg, arg2, AV_OPT_FLAG_AUDIO_PARAM,
-                                         config);
-        if (ret < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "AVPresetVideo") ||
-               !av_strcasecmp(cmd, "AVPresetAudio")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (!av_strcasecmp(cmd, "AVPresetVideo"))
-            ffserver_opt_preset(arg, AV_OPT_FLAG_VIDEO_PARAM, config);
-        else
-            ffserver_opt_preset(arg, AV_OPT_FLAG_AUDIO_PARAM, config);
-    } else if (!av_strcasecmp(cmd, "VideoTag")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (strlen(arg) == 4 &&
-            ffserver_save_avoption_int("codec_tag",
-                                       MKTAG(arg[0], arg[1], arg[2], arg[3]),
-                                       AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "BitExact")) {
-        config->bitexact = 1;
-        if (ffserver_save_avoption("flags", "+bitexact", AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "DctFastint")) {
-        if (ffserver_save_avoption("dct", "fastint", AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "IdctSimple")) {
-        if (ffserver_save_avoption("idct", "simple", AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "Qscale")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ffserver_set_int_param(&val, arg, 0, INT_MIN, INT_MAX, config,
-                               "Invalid Qscale: '%s'\n", arg);
-        if (ffserver_save_avoption("flags", "+qscale", AV_OPT_FLAG_VIDEO_PARAM, config) < 0 ||
-            ffserver_save_avoption_int("global_quality", FF_QP2LAMBDA * val,
-                                       AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "VideoQDiff")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (ffserver_save_avoption("qdiff", arg, AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "VideoQMax")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (ffserver_save_avoption("qmax", arg, AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "VideoQMin")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (ffserver_save_avoption("qmin", arg, AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "LumiMask")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (ffserver_save_avoption("lumi_mask", arg, AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "DarkMask")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (ffserver_save_avoption("dark_mask", arg, AV_OPT_FLAG_VIDEO_PARAM, config) < 0)
-            goto nomem;
-    } else if (!av_strcasecmp(cmd, "NoVideo")) {
-        config->no_video = 1;
-    } else if (!av_strcasecmp(cmd, "NoAudio")) {
-        config->no_audio = 1;
-    } else if (!av_strcasecmp(cmd, "ACL")) {
-        ffserver_parse_acl_row(stream, NULL, NULL, *p, config->filename,
-                config->line_num);
-    } else if (!av_strcasecmp(cmd, "DynamicACL")) {
-        ffserver_get_arg(stream->dynamic_acl, sizeof(stream->dynamic_acl), p);
-    } else if (!av_strcasecmp(cmd, "RTSPOption")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        av_freep(&stream->rtsp_option);
-        stream->rtsp_option = av_strdup(arg);
-    } else if (!av_strcasecmp(cmd, "MulticastAddress")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        if (resolve_host(&stream->multicast_ip, arg))
-            ERROR("Invalid host/IP address: '%s'\n", arg);
-        stream->is_multicast = 1;
-        stream->loop = 1; /* default is looping */
-    } else if (!av_strcasecmp(cmd, "MulticastPort")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ffserver_set_int_param(&val, arg, 0, 1, 65535, config,
-                "Invalid MulticastPort: '%s'\n", arg);
-        stream->multicast_port = val;
-    } else if (!av_strcasecmp(cmd, "MulticastTTL")) {
-        ffserver_get_arg(arg, sizeof(arg), p);
-        ffserver_set_int_param(&val, arg, 0, INT_MIN, INT_MAX, config,
-                "Invalid MulticastTTL: '%s'\n", arg);
-        stream->multicast_ttl = val;
-    } else if (!av_strcasecmp(cmd, "NoLoop")) {
-        stream->loop = 0;
-    } else if (!av_strcasecmp(cmd, "</Stream>")) {
-        config->stream_use_defaults &= 1;
-        if (stream->feed && stream->fmt && strcmp(stream->fmt->name, "ffm")) {
-            if (config->dummy_actx->codec_id == AV_CODEC_ID_NONE)
-                config->dummy_actx->codec_id = config->guessed_audio_codec_id;
-            if (!config->no_audio &&
-                config->dummy_actx->codec_id != AV_CODEC_ID_NONE) {
-                AVCodecContext *audio_enc = avcodec_alloc_context3(avcodec_find_encoder(config->dummy_actx->codec_id));
-                add_codec(stream, audio_enc, config);
-            }
-            if (config->dummy_vctx->codec_id == AV_CODEC_ID_NONE)
-                config->dummy_vctx->codec_id = config->guessed_video_codec_id;
-            if (!config->no_video &&
-                config->dummy_vctx->codec_id != AV_CODEC_ID_NONE) {
-                AVCodecContext *video_enc = avcodec_alloc_context3(avcodec_find_encoder(config->dummy_vctx->codec_id));
-                add_codec(stream, video_enc, config);
-            }
-        }
-        av_dict_free(&config->video_opts);
-        av_dict_free(&config->audio_opts);
-        avcodec_free_context(&config->dummy_vctx);
-        avcodec_free_context(&config->dummy_actx);
-        config->no_video = 0;
-        config->no_audio = 0;
-        *pstream = NULL;
-    } else if (!av_strcasecmp(cmd, "File") ||
-               !av_strcasecmp(cmd, "ReadOnlyFile")) {
-        ffserver_get_arg(stream->feed_filename, sizeof(stream->feed_filename),
-                p);
-    } else if (!av_strcasecmp(cmd, "UseDefaults")) {
-        if (config->stream_use_defaults > 1)
-            WARNING("Multiple UseDefaults/NoDefaults entries.\n");
-        config->stream_use_defaults = 3;
-    } else if (!av_strcasecmp(cmd, "NoDefaults")) {
-        if (config->stream_use_defaults > 1)
-            WARNING("Multiple UseDefaults/NoDefaults entries.\n");
-        config->stream_use_defaults = 2;
-    } else {
-        ERROR("Invalid entry '%s' inside <Stream></Stream>\n", cmd);
-    }
-    return 0;
-  nomem:
-    av_log(NULL, AV_LOG_ERROR, "Out of memory. Aborting.\n");
-    av_dict_free(&config->video_opts);
-    av_dict_free(&config->audio_opts);
-    avcodec_free_context(&config->dummy_vctx);
-    avcodec_free_context(&config->dummy_actx);
-    return AVERROR(ENOMEM);
-}
-
-static int ffserver_parse_config_redirect(FFServerConfig *config,
-                                          const char *cmd, const char **p,
-                                          FFServerStream **predirect)
-{
-    FFServerStream *redirect;
-    av_assert0(predirect);
-    redirect = *predirect;
-
-    if (!av_strcasecmp(cmd, "<Redirect")) {
-        char *q;
-        redirect = av_mallocz(sizeof(FFServerStream));
-        if (!redirect)
-            return AVERROR(ENOMEM);
-
-        ffserver_get_arg(redirect->filename, sizeof(redirect->filename), p);
-        q = strrchr(redirect->filename, '>');
-        if (*q)
-            *q = '\0';
-        redirect->stream_type = STREAM_TYPE_REDIRECT;
-        *predirect = redirect;
-        return 0;
-    }
-    av_assert0(redirect);
-    if (!av_strcasecmp(cmd, "URL")) {
-        ffserver_get_arg(redirect->feed_filename,
-                sizeof(redirect->feed_filename), p);
-    } else if (!av_strcasecmp(cmd, "</Redirect>")) {
-        if (!redirect->feed_filename[0])
-            ERROR("No URL found for <Redirect>\n");
-        *predirect = NULL;
-    } else {
-        ERROR("Invalid entry '%s' inside <Redirect></Redirect>\n", cmd);
-    }
-    return 0;
-}
-
-int ffserver_parse_ffconfig(const char *filename, FFServerConfig *config)
-{
-    FILE *f;
-    char line[1024];
-    char cmd[64];
-    const char *p;
-    FFServerStream **last_stream, *stream = NULL, *redirect = NULL;
-    FFServerStream **last_feed, *feed = NULL;
-    int ret = 0;
-
-    av_assert0(config);
-
-    f = fopen(filename, "r");
-    if (!f) {
-        ret = AVERROR(errno);
-        av_log(NULL, AV_LOG_ERROR,
-                "Could not open the configuration file '%s'\n", filename);
-        return ret;
-    }
-
-    config->first_stream = NULL;
-    config->first_feed = NULL;
-    config->errors = config->warnings = 0;
-
-    last_stream = &config->first_stream;
-    last_feed = &config->first_feed;
-
-    config->line_num = 0;
-    while (fgets(line, sizeof(line), f) != NULL) {
-        config->line_num++;
-        p = line;
-        while (av_isspace(*p))
-            p++;
-        if (*p == '\0' || *p == '#')
-            continue;
-
-        ffserver_get_arg(cmd, sizeof(cmd), &p);
-
-        if (feed || !av_strcasecmp(cmd, "<Feed")) {
-            int opening = !av_strcasecmp(cmd, "<Feed");
-            if (opening && (stream || feed || redirect)) {
-                ERROR("Already in a tag\n");
-            } else {
-                ret = ffserver_parse_config_feed(config, cmd, &p, &feed);
-                if (ret < 0)
-                    break;
-                if (opening) {
-                    /* add in stream & feed list */
-                    *last_stream = feed;
-                    *last_feed = feed;
-                    last_stream = &feed->next;
-                    last_feed = &feed->next_feed;
-                }
-            }
-        } else if (stream || !av_strcasecmp(cmd, "<Stream")) {
-            int opening = !av_strcasecmp(cmd, "<Stream");
-            if (opening && (stream || feed || redirect)) {
-                ERROR("Already in a tag\n");
-            } else {
-                ret = ffserver_parse_config_stream(config, cmd, &p, &stream);
-                if (ret < 0)
-                    break;
-                if (opening) {
-                    /* add in stream list */
-                    *last_stream = stream;
-                    last_stream = &stream->next;
-                }
-            }
-        } else if (redirect || !av_strcasecmp(cmd, "<Redirect")) {
-            int opening = !av_strcasecmp(cmd, "<Redirect");
-            if (opening && (stream || feed || redirect))
-                ERROR("Already in a tag\n");
-            else {
-                ret = ffserver_parse_config_redirect(config, cmd, &p,
-                                                     &redirect);
-                if (ret < 0)
-                    break;
-                if (opening) {
-                    /* add in stream list */
-                    *last_stream = redirect;
-                    last_stream = &redirect->next;
-                }
-            }
-        } else {
-            ffserver_parse_config_global(config, cmd, &p);
-        }
-    }
-    if (stream || feed || redirect)
-        ERROR("Missing closing </%s> tag\n",
-              stream ? "Stream" : (feed ? "Feed" : "Redirect"));
-
-    fclose(f);
-    if (ret < 0)
-        return ret;
-    if (config->errors)
-        return AVERROR(EINVAL);
-    else
-        return 0;
-}
-
-#undef ERROR
-#undef WARNING
-
-void ffserver_free_child_args(void *argsp)
-{
-    int i;
-    char **args;
-    if (!argsp)
-        return;
-    args = *(char ***)argsp;
-    if (!args)
-        return;
-    for (i = 0; i < MAX_CHILD_ARGS; i++)
-        av_free(args[i]);
-    av_freep(argsp);
-}

diff --git a/fftools/ffserver_config.h b/fftools/ffserver_config.h
deleted file mode 100644
index 089b848..0000000
--- a/fftools/ffserver_config.h
+++ /dev/null

@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef FFTOOLS_FFSERVER_CONFIG_H
-#define FFTOOLS_FFSERVER_CONFIG_H
-
-#define FFM_PACKET_SIZE 4096
-
-#include "libavutil/dict.h"
-#include "libavformat/avformat.h"
-#include "libavformat/network.h"
-
-#define FFSERVER_MAX_STREAMS 20
-
-/* each generated stream is described here */
-enum FFServerStreamType {
-    STREAM_TYPE_LIVE,
-    STREAM_TYPE_STATUS,
-    STREAM_TYPE_REDIRECT,
-};
-
-enum FFServerIPAddressAction {
-    IP_ALLOW = 1,
-    IP_DENY,
-};
-
-typedef struct FFServerIPAddressACL {
-    struct FFServerIPAddressACL *next;
-    enum FFServerIPAddressAction action;
-    /* These are in host order */
-    struct in_addr first;
-    struct in_addr last;
-} FFServerIPAddressACL;
-
-/**
- * This holds the stream parameters for an AVStream, it cannot be a AVStream
- * because AVStreams cannot be instanciated without a AVFormatContext, especially
- * not outside libavformat.
- *
- * The fields of this struct have the same semantics as the fields of an AVStream.
- */
-typedef struct LayeredAVStream {
-    int index;
-    int id;
-    AVCodecParameters *codecpar;
-    AVCodecContext *codec;
-    AVRational time_base;
-    int pts_wrap_bits;
-    AVRational sample_aspect_ratio;
-    char *recommended_encoder_configuration;
-} LayeredAVStream;
-
-/* description of each stream of the ffserver.conf file */
-typedef struct FFServerStream {
-    enum FFServerStreamType stream_type;
-    char filename[1024];          /* stream filename */
-    struct FFServerStream *feed;  /* feed we are using (can be null if coming from file) */
-    AVDictionary *in_opts;        /* input parameters */
-    AVDictionary *metadata;       /* metadata to set on the stream */
-    AVInputFormat *ifmt;          /* if non NULL, force input format */
-    AVOutputFormat *fmt;
-    FFServerIPAddressACL *acl;
-    char dynamic_acl[1024];
-    int nb_streams;
-    int prebuffer;                /* Number of milliseconds early to start */
-    int64_t max_time;             /* Number of milliseconds to run */
-    int send_on_key;
-    LayeredAVStream *streams[FFSERVER_MAX_STREAMS];
-    int feed_streams[FFSERVER_MAX_STREAMS]; /* index of streams in the feed */
-    char feed_filename[1024];     /* file name of the feed storage, or
-                                     input file name for a stream */
-    pid_t pid;                    /* Of ffmpeg process */
-    time_t pid_start;             /* Of ffmpeg process */
-    char **child_argv;
-    struct FFServerStream *next;
-    unsigned bandwidth;           /* bandwidth, in kbits/s */
-    /* RTSP options */
-    char *rtsp_option;
-    /* multicast specific */
-    int is_multicast;
-    struct in_addr multicast_ip;
-    int multicast_port;           /* first port used for multicast */
-    int multicast_ttl;
-    int loop;                     /* if true, send the stream in loops (only meaningful if file) */
-    char single_frame;            /* only single frame */
-
-    /* feed specific */
-    int feed_opened;              /* true if someone is writing to the feed */
-    int is_feed;                  /* true if it is a feed */
-    int readonly;                 /* True if writing is prohibited to the file */
-    int truncate;                 /* True if feeder connection truncate the feed file */
-    int conns_served;
-    int64_t bytes_served;
-    int64_t feed_max_size;        /* maximum storage size, zero means unlimited */
-    int64_t feed_write_index;     /* current write position in feed (it wraps around) */
-    int64_t feed_size;            /* current size of feed */
-    struct FFServerStream *next_feed;
-} FFServerStream;
-
-typedef struct FFServerConfig {
-    char *filename;
-    FFServerStream *first_feed;   /* contains only feeds */
-    FFServerStream *first_stream; /* contains all streams, including feeds */
-    unsigned int nb_max_http_connections;
-    unsigned int nb_max_connections;
-    uint64_t max_bandwidth;
-    int debug;
-    int bitexact;
-    char logfilename[1024];
-    struct sockaddr_in http_addr;
-    struct sockaddr_in rtsp_addr;
-    int errors;
-    int warnings;
-    int use_defaults;
-    // Following variables MUST NOT be used outside configuration parsing code.
-    enum AVCodecID guessed_audio_codec_id;
-    enum AVCodecID guessed_video_codec_id;
-    AVDictionary *video_opts;     /* AVOptions for video encoder */
-    AVDictionary *audio_opts;     /* AVOptions for audio encoder */
-    AVCodecContext *dummy_actx;   /* Used internally to test audio AVOptions. */
-    AVCodecContext *dummy_vctx;   /* Used internally to test video AVOptions. */
-    int no_audio;
-    int no_video;
-    int line_num;
-    int stream_use_defaults;
-} FFServerConfig;
-
-void ffserver_get_arg(char *buf, int buf_size, const char **pp);
-
-void ffserver_parse_acl_row(FFServerStream *stream, FFServerStream* feed,
-                            FFServerIPAddressACL *ext_acl,
-                            const char *p, const char *filename, int line_num);
-
-int ffserver_parse_ffconfig(const char *filename, FFServerConfig *config);
-
-void ffserver_free_child_args(void *argsp);
-
-#endif /* FFTOOLS_FFSERVER_CONFIG_H */

diff --git a/fuchsia/config/default/arm64/config.h b/fuchsia/config/default/arm64/config.h
index 297f47b..b26df9d 100644
--- a/fuchsia/config/default/arm64/config.h
+++ b/fuchsia/config/default/arm64/config.h

@@ -1,12 +1,12 @@
 /* Automatically generated by configure - do not modify! */
 #ifndef FFMPEG_CONFIG_H
 #define FFMPEG_CONFIG_H
-#define FFMPEG_CONFIGURATION "--disable-everything --disable-all --disable-doc --disable-htmlpages --disable-manpages --disable-podpages --disable-txtpages --disable-static --enable-avcodec --enable-avformat --enable-avutil --enable-fft --enable-rdft --enable-static --disable-bzlib --disable-iconv --disable-lzo --disable-network --disable-schannel --disable-sdl2 --disable-symver --disable-xlib --disable-zlib --disable-securetransport --disable-faan --disable-alsa --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vda --disable-vdpau --disable-videotoolbox --disable-nvenc --disable-cuda --disable-cuvid --disable-v4l2_m2m --enable-decoder='vorbis,flac' --enable-decoder='pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le' --enable-decoder='pcm_s16be,pcm_s24be,pcm_mulaw,pcm_alaw' --enable-decoder='theora,vp8' --enable-demuxer='ogg,matroska,wav,flac' --enable-parser='opus,vorbis,flac' --enable-parser='vp3,vp8' --optflags='\"-O2\"' --enable-pic --enable-pic --enable-lto --cc=clang --cxx=clang++ --ld=clang --extra-ldflags='-fuse-ld=lld' --enable-cross-compile --cross-prefix=/usr/bin/aarch64-linux-gnu- --target-os=linux --arch=aarch64 --enable-armv8 --extra-cflags='-march=armv8-a' --sysroot=/usr/local/google/home/phosek/fuchsia/third_party/ffmpeg/../../buildtools/linux-arm64/sysroot --extra-cflags='--target=aarch64-linux-gnu' --extra-ldflags='--target=aarch64-linux-gnu' --disable-linux-perf --disable-error-resilience"
+#define FFMPEG_CONFIGURATION "--disable-everything --disable-all --disable-doc --disable-htmlpages --disable-manpages --disable-podpages --disable-txtpages --disable-static --enable-avcodec --enable-avformat --enable-avutil --enable-fft --enable-rdft --enable-static --disable-debug --disable-bzlib --disable-iconv --disable-lzo --disable-network --disable-schannel --disable-sdl2 --disable-symver --disable-xlib --disable-zlib --disable-securetransport --disable-faan --disable-alsa --disable-autodetect --enable-decoder='vorbis,flac' --enable-decoder='pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le,mp3' --enable-decoder='pcm_s16be,pcm_s24be,pcm_mulaw,pcm_alaw' --enable-decoder='theora,vp8,sbc,aptx' --enable-demuxer='ogg,matroska,wav,flac,mp3,mov' --enable-parser='opus,vorbis,flac,mpegaudio' --enable-parser='vp3,vp8' --optflags='\"-O2\"' --enable-pic --x86asmexe=yasm --enable-pic --enable-lto --cc=clang --cxx=clang++ --ld=clang --enable-cross-compile --cross-prefix=/usr/bin/aarch64-linux-gnu- --target-os=linux --arch=aarch64 --enable-armv8 --extra-cflags='-march=armv8-a' --sysroot=/usr/local/google/home/dalesat/fuchsia/third_party/ffmpeg/../../buildtools/linux-arm64/sysroot --extra-cflags='--target=aarch64-linux-gnu' --extra-ldflags='--target=aarch64-linux-gnu' --disable-linux-perf --disable-error-resilience"
 #define FFMPEG_LICENSE "LGPL version 2.1 or later"
 #define CONFIG_THIS_YEAR 2018
 #define FFMPEG_DATADIR "/usr/local/share/ffmpeg"
 #define AVCONV_DATADIR "/usr/local/share/ffmpeg"
-#define CC_IDENT "Fuchsia clang version 7.0.0 (https://fuchsia.googlesource.com/a/third_party/clang 5034f5fddab316b12887b39b129ebbca999500e2) (https://fuchsia.googlesource.com/a/third_party/llvm 197b6c81959a17be37035d4fe71b382023bff2f0) (based on LLVM 7.0.0svn)"
+#define CC_IDENT "Fuchsia clang version 8.0.0 (https://fuchsia.googlesource.com/a/third_party/clang 0a217961416a0cbf1ac29bcb26577d41ca0e0e8d) (https://fuchsia.googlesource.com/a/third_party/llvm aff6cf491087ba32e338c9af076c9b7739c978a0) (based on LLVM 8.0.0svn)"
 #define av_restrict restrict
 #define EXTERN_PREFIX ""
 #define EXTERN_ASM 
@@ -57,6 +57,7 @@
 #define HAVE_AMD3DNOWEXT 0
 #define HAVE_AVX 0
 #define HAVE_AVX2 0
+#define HAVE_AVX512 0
 #define HAVE_FMA3 0
 #define HAVE_FMA4 0
 #define HAVE_MMX 0
@@ -101,6 +102,7 @@
 #define HAVE_AMD3DNOWEXT_EXTERNAL 0
 #define HAVE_AVX_EXTERNAL 0
 #define HAVE_AVX2_EXTERNAL 0
+#define HAVE_AVX512_EXTERNAL 0
 #define HAVE_FMA3_EXTERNAL 0
 #define HAVE_FMA4_EXTERNAL 0
 #define HAVE_MMX_EXTERNAL 0
@@ -145,6 +147,7 @@
 #define HAVE_AMD3DNOWEXT_INLINE 0
 #define HAVE_AVX_INLINE 0
 #define HAVE_AVX2_INLINE 0
+#define HAVE_AVX512_INLINE 0
 #define HAVE_FMA3_INLINE 0
 #define HAVE_FMA4_INLINE 0
 #define HAVE_MMX_INLINE 0
@@ -174,36 +177,29 @@
 #define HAVE_FAST_64BIT 1
 #define HAVE_FAST_CLZ 1
 #define HAVE_FAST_CMOV 0
-#define HAVE_LOCAL_ALIGNED_8 0
-#define HAVE_LOCAL_ALIGNED_16 0
-#define HAVE_LOCAL_ALIGNED_32 0
+#define HAVE_LOCAL_ALIGNED 0
 #define HAVE_SIMD_ALIGN_16 1
 #define HAVE_SIMD_ALIGN_32 0
-#define HAVE_ATOMICS_GCC 1
-#define HAVE_ATOMICS_SUNCC 0
-#define HAVE_ATOMICS_WIN32 0
+#define HAVE_SIMD_ALIGN_64 0
 #define HAVE_ATOMIC_CAS_PTR 0
 #define HAVE_MACHINE_RW_BARRIER 0
 #define HAVE_MEMORYBARRIER 0
 #define HAVE_MM_EMPTY 0
 #define HAVE_RDTSC 0
-#define HAVE_SARESTART 1
 #define HAVE_SEM_TIMEDWAIT 1
 #define HAVE_SYNC_VAL_COMPARE_AND_SWAP 1
-#define HAVE_CABS 1
-#define HAVE_CEXP 1
+#define HAVE_CABS 0
+#define HAVE_CEXP 0
 #define HAVE_INLINE_ASM 1
 #define HAVE_SYMVER 0
 #define HAVE_X86ASM 0
 #define HAVE_BIGENDIAN 0
 #define HAVE_FAST_UNALIGNED 1
-#define HAVE_ALTIVEC_H 0
 #define HAVE_ARPA_INET_H 0
 #define HAVE_ASM_TYPES_H 1
 #define HAVE_CDIO_PARANOIA_H 0
 #define HAVE_CDIO_PARANOIA_PARANOIA_H 0
 #define HAVE_CUDA_H 0
-#define HAVE_D3D11_H 0
 #define HAVE_DISPATCH_DISPATCH_H 0
 #define HAVE_DEV_BKTR_IOCTL_BT848_H 0
 #define HAVE_DEV_BKTR_IOCTL_METEOR_H 0
@@ -212,27 +208,18 @@
 #define HAVE_DEV_VIDEO_METEOR_IOCTL_METEOR_H 0
 #define HAVE_DIRECT_H 0
 #define HAVE_DIRENT_H 1
-#define HAVE_DLFCN_H 1
 #define HAVE_DXGIDEBUG_H 0
 #define HAVE_DXVA_H 0
 #define HAVE_ES2_GL_H 0
 #define HAVE_GSM_H 0
 #define HAVE_IO_H 0
-#define HAVE_MACH_MACH_TIME_H 0
+#define HAVE_LINUX_PERF_EVENT_H 1
 #define HAVE_MACHINE_IOCTL_BT848_H 0
 #define HAVE_MACHINE_IOCTL_METEOR_H 0
 #define HAVE_MALLOC_H 1
 #define HAVE_OPENCV2_CORE_CORE_C_H 0
-#define HAVE_OPENJPEG_2_3_OPENJPEG_H 0
-#define HAVE_OPENJPEG_2_2_OPENJPEG_H 0
-#define HAVE_OPENJPEG_2_1_OPENJPEG_H 0
-#define HAVE_OPENJPEG_2_0_OPENJPEG_H 0
-#define HAVE_OPENJPEG_1_5_OPENJPEG_H 0
 #define HAVE_OPENGL_GL3_H 0
 #define HAVE_POLL_H 1
-#define HAVE_SOUNDCARD_H 0
-#define HAVE_STDATOMIC_H 1
-#define HAVE_SYS_MMAN_H 1
 #define HAVE_SYS_PARAM_H 1
 #define HAVE_SYS_RESOURCE_H 1
 #define HAVE_SYS_SELECT_H 1
@@ -276,17 +263,20 @@
 #define HAVE_SINF 1
 #define HAVE_TRUNC 1
 #define HAVE_TRUNCF 1
+#define HAVE_DOS_PATHS 0
+#define HAVE_LIBC_MSVCRT 0
+#define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
+#define HAVE_SECTION_DATA_REL_RO 1
+#define HAVE_THREADS 1
+#define HAVE_UWP 0
+#define HAVE_WINRT 0
 #define HAVE_ACCESS 1
 #define HAVE_ALIGNED_MALLOC 0
 #define HAVE_ARC4RANDOM 0
 #define HAVE_CLOCK_GETTIME 1
 #define HAVE_CLOSESOCKET 0
 #define HAVE_COMMANDLINETOARGVW 0
-#define HAVE_COTASKMEMFREE 0
-#define HAVE_CRYPTGENRANDOM 0
 #define HAVE_FCNTL 1
-#define HAVE_FLT_LIM 1
-#define HAVE_FORK 1
 #define HAVE_GETADDRINFO 0
 #define HAVE_GETHRTIME 0
 #define HAVE_GETOPT 1
@@ -301,9 +291,7 @@
 #define HAVE_GMTIME_R 1
 #define HAVE_INET_ATON 0
 #define HAVE_ISATTY 1
-#define HAVE_JACK_PORT_GET_LATENCY_RANGE 0
 #define HAVE_KBHIT 0
-#define HAVE_LOADLIBRARY 0
 #define HAVE_LOCALTIME_R 1
 #define HAVE_LSTAT 1
 #define HAVE_LZO1X_999_COMPRESS 0
@@ -318,6 +306,7 @@
 #define HAVE_POSIX_MEMALIGN 1
 #define HAVE_PTHREAD_CANCEL 1
 #define HAVE_SCHED_GETAFFINITY 1
+#define HAVE_SECITEMIMPORT 0
 #define HAVE_SETCONSOLETEXTATTRIBUTE 0
 #define HAVE_SETCONSOLECTRLHANDLER 0
 #define HAVE_SETMODE 0
@@ -330,20 +319,23 @@
 #define HAVE_UTGETOSTYPEFROMSTRING 0
 #define HAVE_VIRTUALALLOC 0
 #define HAVE_WGLGETPROCADDRESS 0
+#define HAVE_BCRYPT 0
+#define HAVE_VAAPI_DRM 0
+#define HAVE_VAAPI_X11 0
+#define HAVE_VDPAU_X11 0
 #define HAVE_PTHREADS 1
 #define HAVE_OS2THREADS 0
 #define HAVE_W32THREADS 0
+#define HAVE_AS_ARCH_DIRECTIVE 0
 #define HAVE_AS_DN_DIRECTIVE 0
 #define HAVE_AS_FPU_DIRECTIVE 0
 #define HAVE_AS_FUNC 0
 #define HAVE_AS_OBJECT_ARCH 0
 #define HAVE_ASM_MOD_Q 0
-#define HAVE_ATTRIBUTE_MAY_ALIAS 1
-#define HAVE_ATTRIBUTE_PACKED 1
 #define HAVE_BLOCKS_EXTENSION 0
 #define HAVE_EBP_AVAILABLE 0
 #define HAVE_EBX_AVAILABLE 0
-#define HAVE_GNU_AS 1
+#define HAVE_GNU_AS 0
 #define HAVE_GNU_WINDRES 0
 #define HAVE_IBM_ASM 0
 #define HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS 0
@@ -356,7 +348,6 @@
 /* #define HAVE_VFP_ARGS 0 -- softfp/hardfp selection is done by the fuchsia build */
 #define HAVE_XFORM_ASM 0
 #define HAVE_XMM_CLOBBERS 0
-#define HAVE_CONDITION_VARIABLE_PTR 0
 #define HAVE_KCMVIDEOCODECTYPE_HEVC 0
 #define HAVE_SOCKLEN_T 0
 #define HAVE_STRUCT_ADDRINFO 0
@@ -372,22 +363,17 @@
 #define HAVE_STRUCT_SOCKADDR_STORAGE 0
 #define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1
 #define HAVE_STRUCT_V4L2_FRMIVALENUM_DISCRETE 1
-#define HAVE_ATOMICS_NATIVE 1
-#define HAVE_DOS_PATHS 0
-#define HAVE_LIBC_MSVCRT 0
 #define HAVE_MAKEINFO 1
 #define HAVE_MAKEINFO_HTML 1
-#define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
+#define HAVE_OPENCL_D3D11 0
+#define HAVE_OPENCL_DRM_ARM 0
+#define HAVE_OPENCL_DRM_BEIGNET 0
+#define HAVE_OPENCL_DXVA2 0
+#define HAVE_OPENCL_VAAPI_BEIGNET 0
+#define HAVE_OPENCL_VAAPI_INTEL_MEDIA 0
 #define HAVE_PERL 1
 #define HAVE_POD2MAN 1
-#define HAVE_SECTION_DATA_REL_RO 1
 #define HAVE_TEXI2HTML 0
-#define HAVE_THREADS 1
-#define HAVE_UWP 0
-#define HAVE_VAAPI_DRM 0
-#define HAVE_VAAPI_X11 0
-#define HAVE_VDPAU_X11 0
-#define HAVE_WINRT 0
 #define CONFIG_DOC 0
 #define CONFIG_HTMLPAGES 0
 #define CONFIG_MANPAGES 0
@@ -414,41 +400,31 @@
 #define CONFIG_SCALING_VIDEO_EXAMPLE 0
 #define CONFIG_TRANSCODE_AAC_EXAMPLE 0
 #define CONFIG_TRANSCODING_EXAMPLE 0
-#define CONFIG_ALSA 0
-#define CONFIG_APPKIT 0
-#define CONFIG_AVFOUNDATION 0
-#define CONFIG_BZLIB 0
-#define CONFIG_COREIMAGE 0
-#define CONFIG_ICONV 0
-#define CONFIG_JACK 0
-#define CONFIG_LIBXCB 0
-#define CONFIG_LIBXCB_SHM 0
-#define CONFIG_LIBXCB_SHAPE 0
-#define CONFIG_LIBXCB_XFIXES 0
-#define CONFIG_LZMA 1
-#define CONFIG_SCHANNEL 0
-#define CONFIG_SDL2 0
-#define CONFIG_SECURETRANSPORT 0
-#define CONFIG_SNDIO 0
-#define CONFIG_XLIB 0
-#define CONFIG_ZLIB 0
+#define CONFIG_VAAPI_ENCODE_EXAMPLE 0
+#define CONFIG_VAAPI_TRANSCODE_EXAMPLE 0
 #define CONFIG_AVISYNTH 0
 #define CONFIG_FREI0R 0
 #define CONFIG_LIBCDIO 0
+#define CONFIG_LIBDAVS2 0
 #define CONFIG_LIBRUBBERBAND 0
 #define CONFIG_LIBVIDSTAB 0
 #define CONFIG_LIBX264 0
 #define CONFIG_LIBX265 0
 #define CONFIG_LIBXAVS 0
+#define CONFIG_LIBXAVS2 0
 #define CONFIG_LIBXVID 0
 #define CONFIG_DECKLINK 0
 #define CONFIG_LIBNDI_NEWTEK 0
 #define CONFIG_LIBFDK_AAC 0
 #define CONFIG_OPENSSL 0
+#define CONFIG_LIBTLS 0
 #define CONFIG_GMP 0
+#define CONFIG_LIBLENSFUN 0
 #define CONFIG_LIBOPENCORE_AMRNB 0
 #define CONFIG_LIBOPENCORE_AMRWB 0
+#define CONFIG_LIBVMAF 0
 #define CONFIG_LIBVO_AMRWBENC 0
+#define CONFIG_MBEDTLS 0
 #define CONFIG_RKMPP 0
 #define CONFIG_LIBSMBCLIENT 0
 #define CONFIG_CHROMAPRINT 0
@@ -456,11 +432,13 @@
 #define CONFIG_GNUTLS 0
 #define CONFIG_JNI 0
 #define CONFIG_LADSPA 0
+#define CONFIG_LIBAOM 0
 #define CONFIG_LIBASS 0
 #define CONFIG_LIBBLURAY 0
 #define CONFIG_LIBBS2B 0
 #define CONFIG_LIBCACA 0
 #define CONFIG_LIBCELT 0
+#define CONFIG_LIBCODEC2 0
 #define CONFIG_LIBDC1394 0
 #define CONFIG_LIBDRM 0
 #define CONFIG_LIBFLITE 0
@@ -471,6 +449,8 @@
 #define CONFIG_LIBGSM 0
 #define CONFIG_LIBIEC61883 0
 #define CONFIG_LIBILBC 0
+#define CONFIG_LIBJACK 0
+#define CONFIG_LIBKLVANC 0
 #define CONFIG_LIBKVAZAAR 0
 #define CONFIG_LIBMODPLUG 0
 #define CONFIG_LIBMP3LAME 0
@@ -488,12 +468,13 @@
 #define CONFIG_LIBSNAPPY 0
 #define CONFIG_LIBSOXR 0
 #define CONFIG_LIBSPEEX 0
+#define CONFIG_LIBSRT 0
 #define CONFIG_LIBSSH 0
+#define CONFIG_LIBTENSORFLOW 0
 #define CONFIG_LIBTESSERACT 0
 #define CONFIG_LIBTHEORA 0
 #define CONFIG_LIBTWOLAME 0
 #define CONFIG_LIBV4L2 0
-#define CONFIG_LIBVMAF 0
 #define CONFIG_LIBVORBIS 0
 #define CONFIG_LIBVPX 0
 #define CONFIG_LIBWAVPACK 0
@@ -502,28 +483,49 @@
 #define CONFIG_LIBZIMG 0
 #define CONFIG_LIBZMQ 0
 #define CONFIG_LIBZVBI 0
+#define CONFIG_LV2 0
 #define CONFIG_MEDIACODEC 0
 #define CONFIG_OPENAL 0
-#define CONFIG_OPENCL 0
 #define CONFIG_OPENGL 0
+#define CONFIG_VAPOURSYNTH 0
+#define CONFIG_ALSA 1
+#define CONFIG_APPKIT 0
+#define CONFIG_AVFOUNDATION 0
+#define CONFIG_BZLIB 0
+#define CONFIG_COREIMAGE 0
+#define CONFIG_ICONV 0
+#define CONFIG_LIBXCB 0
+#define CONFIG_LIBXCB_SHM 0
+#define CONFIG_LIBXCB_SHAPE 0
+#define CONFIG_LIBXCB_XFIXES 0
+#define CONFIG_LZMA 0
+#define CONFIG_SCHANNEL 0
+#define CONFIG_SDL2 0
+#define CONFIG_SECURETRANSPORT 0
+#define CONFIG_SNDIO 0
+#define CONFIG_XLIB 0
+#define CONFIG_ZLIB 0
+#define CONFIG_CUDA_SDK 0
+#define CONFIG_LIBNPP 0
+#define CONFIG_LIBMFX 0
+#define CONFIG_MMAL 0
+#define CONFIG_OMX 0
+#define CONFIG_OPENCL 0
+#define CONFIG_AMF 0
 #define CONFIG_AUDIOTOOLBOX 0
 #define CONFIG_CRYSTALHD 0
 #define CONFIG_CUDA 0
 #define CONFIG_CUVID 0
 #define CONFIG_D3D11VA 0
 #define CONFIG_DXVA2 0
+#define CONFIG_FFNVCODEC 0
+#define CONFIG_NVDEC 0
 #define CONFIG_NVENC 0
 #define CONFIG_VAAPI 0
-#define CONFIG_VDA 0
 #define CONFIG_VDPAU 0
 #define CONFIG_VIDEOTOOLBOX 0
 #define CONFIG_V4L2_M2M 0
 #define CONFIG_XVMC 0
-#define CONFIG_CUDA_SDK 0
-#define CONFIG_LIBNPP 0
-#define CONFIG_LIBMFX 0
-#define CONFIG_MMAL 0
-#define CONFIG_OMX 0
 #define CONFIG_FTRAPV 0
 #define CONFIG_GRAY 0
 #define CONFIG_HARDCODED_TABLES 0
@@ -537,20 +539,19 @@
 #define CONFIG_GPL 0
 #define CONFIG_NONFREE 0
 #define CONFIG_VERSION3 0
-#define CONFIG_AVCODEC 1
 #define CONFIG_AVDEVICE 0
 #define CONFIG_AVFILTER 0
+#define CONFIG_SWSCALE 0
+#define CONFIG_POSTPROC 0
 #define CONFIG_AVFORMAT 1
+#define CONFIG_AVCODEC 1
+#define CONFIG_SWRESAMPLE 0
 #define CONFIG_AVRESAMPLE 0
 #define CONFIG_AVUTIL 1
-#define CONFIG_POSTPROC 0
-#define CONFIG_SWRESAMPLE 0
-#define CONFIG_SWSCALE 0
 #define CONFIG_FFPLAY 0
 #define CONFIG_FFPROBE 0
-#define CONFIG_FFSERVER 0
 #define CONFIG_FFMPEG 0
-#define CONFIG_DCT 0
+#define CONFIG_DCT 1
 #define CONFIG_DWT 0
 #define CONFIG_ERROR_RESILIENCE 0
 #define CONFIG_FAAN 0
@@ -585,12 +586,21 @@
 #define CONFIG_PROTOCOLS 0
 #define CONFIG_AANDCTTABLES 0
 #define CONFIG_AC3DSP 0
-#define CONFIG_AUDIO_FRAME_QUEUE 0
+#define CONFIG_ADTS_HEADER 0
+#define CONFIG_AUDIO_FRAME_QUEUE 1
 #define CONFIG_AUDIODSP 0
 #define CONFIG_BLOCKDSP 0
 #define CONFIG_BSWAPDSP 0
 #define CONFIG_CABAC 0
+#define CONFIG_CBS 0
+#define CONFIG_CBS_AV1 0
+#define CONFIG_CBS_H264 0
+#define CONFIG_CBS_H265 0
+#define CONFIG_CBS_JPEG 0
+#define CONFIG_CBS_MPEG2 0
+#define CONFIG_CBS_VP9 0
 #define CONFIG_DIRAC_PARSE 1
+#define CONFIG_DNN 0
 #define CONFIG_DVPROFILE 0
 #define CONFIG_EXIF 0
 #define CONFIG_FAANDCT 0
@@ -629,9 +639,9 @@
 #define CONFIG_LZF 0
 #define CONFIG_ME_CMP 0
 #define CONFIG_MPEG_ER 0
-#define CONFIG_MPEGAUDIO 0
-#define CONFIG_MPEGAUDIODSP 0
-#define CONFIG_MPEGAUDIOHEADER 0
+#define CONFIG_MPEGAUDIO 1
+#define CONFIG_MPEGAUDIODSP 1
+#define CONFIG_MPEGAUDIOHEADER 1
 #define CONFIG_MPEGVIDEO 0
 #define CONFIG_MPEGVIDEOENC 0
 #define CONFIG_MSS34DSP 0
@@ -640,6 +650,7 @@
 #define CONFIG_QSV 0
 #define CONFIG_QSVDEC 0
 #define CONFIG_QSVENC 0
+#define CONFIG_QSVVPP 0
 #define CONFIG_RANGECODER 0
 #define CONFIG_RIFFDEC 1
 #define CONFIG_RIFFENC 0
@@ -663,22 +674,32 @@
 #define CONFIG_WMA_FREQS 0
 #define CONFIG_WMV2DSP 0
 #define CONFIG_AAC_ADTSTOASC_BSF 0
+#define CONFIG_AV1_METADATA_BSF 0
 #define CONFIG_CHOMP_BSF 0
 #define CONFIG_DUMP_EXTRADATA_BSF 0
 #define CONFIG_DCA_CORE_BSF 0
+#define CONFIG_EAC3_CORE_BSF 0
 #define CONFIG_EXTRACT_EXTRADATA_BSF 0
+#define CONFIG_FILTER_UNITS_BSF 0
+#define CONFIG_H264_METADATA_BSF 0
 #define CONFIG_H264_MP4TOANNEXB_BSF 0
+#define CONFIG_H264_REDUNDANT_PPS_BSF 0
+#define CONFIG_HAPQA_EXTRACT_BSF 0
+#define CONFIG_HEVC_METADATA_BSF 0
 #define CONFIG_HEVC_MP4TOANNEXB_BSF 0
 #define CONFIG_IMX_DUMP_HEADER_BSF 0
 #define CONFIG_MJPEG2JPEG_BSF 0
 #define CONFIG_MJPEGA_DUMP_HEADER_BSF 0
 #define CONFIG_MP3_HEADER_DECOMPRESS_BSF 0
+#define CONFIG_MPEG2_METADATA_BSF 0
 #define CONFIG_MPEG4_UNPACK_BFRAMES_BSF 0
 #define CONFIG_MOV2TEXTSUB_BSF 0
 #define CONFIG_NOISE_BSF 0
 #define CONFIG_NULL_BSF 1
 #define CONFIG_REMOVE_EXTRADATA_BSF 0
 #define CONFIG_TEXT2MOVSUB_BSF 0
+#define CONFIG_TRACE_HEADERS_BSF 0
+#define CONFIG_VP9_METADATA_BSF 0
 #define CONFIG_VP9_RAW_REORDER_BSF 0
 #define CONFIG_VP9_SUPERFRAME_BSF 0
 #define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 0
@@ -701,6 +722,7 @@
 #define CONFIG_BETHSOFTVID_DECODER 0
 #define CONFIG_BFI_DECODER 0
 #define CONFIG_BINK_DECODER 0
+#define CONFIG_BITPACKED_DECODER 0
 #define CONFIG_BMP_DECODER 0
 #define CONFIG_BMV_VIDEO_DECODER 0
 #define CONFIG_BRENDER_PIX_DECODER 0
@@ -766,8 +788,6 @@
 #define CONFIG_H264_MMAL_DECODER 0
 #define CONFIG_H264_QSV_DECODER 0
 #define CONFIG_H264_RKMPP_DECODER 0
-#define CONFIG_H264_VDA_DECODER 0
-#define CONFIG_H264_VDPAU_DECODER 0
 #define CONFIG_HAP_DECODER 0
 #define CONFIG_HEVC_DECODER 0
 #define CONFIG_HEVC_QSV_DECODER 0
@@ -779,6 +799,7 @@
 #define CONFIG_HUFFYUV_DECODER 0
 #define CONFIG_IDCIN_DECODER 0
 #define CONFIG_IFF_ILBM_DECODER 0
+#define CONFIG_IMM4_DECODER 0
 #define CONFIG_INDEO2_DECODER 0
 #define CONFIG_INDEO3_DECODER 0
 #define CONFIG_INDEO4_DECODER 0
@@ -799,17 +820,13 @@
 #define CONFIG_MJPEGB_DECODER 0
 #define CONFIG_MMVIDEO_DECODER 0
 #define CONFIG_MOTIONPIXELS_DECODER 0
-#define CONFIG_MPEG_XVMC_DECODER 0
 #define CONFIG_MPEG1VIDEO_DECODER 0
 #define CONFIG_MPEG2VIDEO_DECODER 0
 #define CONFIG_MPEG4_DECODER 0
 #define CONFIG_MPEG4_CRYSTALHD_DECODER 0
 #define CONFIG_MPEG4_V4L2M2M_DECODER 0
 #define CONFIG_MPEG4_MMAL_DECODER 0
-#define CONFIG_MPEG4_VDPAU_DECODER 0
 #define CONFIG_MPEGVIDEO_DECODER 0
-#define CONFIG_MPEG_VDPAU_DECODER 0
-#define CONFIG_MPEG1_VDPAU_DECODER 0
 #define CONFIG_MPEG1_V4L2M2M_DECODER 0
 #define CONFIG_MPEG2_MMAL_DECODER 0
 #define CONFIG_MPEG2_CRYSTALHD_DECODER 0
@@ -830,6 +847,7 @@
 #define CONFIG_MTS2_DECODER 0
 #define CONFIG_MVC1_DECODER 0
 #define CONFIG_MVC2_DECODER 0
+#define CONFIG_MWSC_DECODER 0
 #define CONFIG_MXPEG_DECODER 0
 #define CONFIG_NUV_DECODER 0
 #define CONFIG_PAF_VIDEO_DECODER 0
@@ -843,7 +861,7 @@
 #define CONFIG_PNG_DECODER 0
 #define CONFIG_PPM_DECODER 0
 #define CONFIG_PRORES_DECODER 0
-#define CONFIG_PRORES_LGPL_DECODER 0
+#define CONFIG_PROSUMER_DECODER 0
 #define CONFIG_PSD_DECODER 0
 #define CONFIG_PTX_DECODER 0
 #define CONFIG_QDRAW_DECODER 0
@@ -851,6 +869,7 @@
 #define CONFIG_QTRLE_DECODER 0
 #define CONFIG_R10K_DECODER 0
 #define CONFIG_R210_DECODER 0
+#define CONFIG_RASC_DECODER 0
 #define CONFIG_RAWVIDEO_DECODER 0
 #define CONFIG_RL2_DECODER 0
 #define CONFIG_ROQ_DECODER 0
@@ -903,7 +922,6 @@
 #define CONFIG_VBLE_DECODER 0
 #define CONFIG_VC1_DECODER 0
 #define CONFIG_VC1_CRYSTALHD_DECODER 0
-#define CONFIG_VC1_VDPAU_DECODER 0
 #define CONFIG_VC1IMAGE_DECODER 0
 #define CONFIG_VC1_MMAL_DECODER 0
 #define CONFIG_VC1_QSV_DECODER 0
@@ -924,14 +942,13 @@
 #define CONFIG_VP9_RKMPP_DECODER 0
 #define CONFIG_VP9_V4L2M2M_DECODER 0
 #define CONFIG_VQA_DECODER 0
-#define CONFIG_BITPACKED_DECODER 0
 #define CONFIG_WEBP_DECODER 0
+#define CONFIG_WCMV_DECODER 0
 #define CONFIG_WRAPPED_AVFRAME_DECODER 0
 #define CONFIG_WMV1_DECODER 0
 #define CONFIG_WMV2_DECODER 0
 #define CONFIG_WMV3_DECODER 0
 #define CONFIG_WMV3_CRYSTALHD_DECODER 0
-#define CONFIG_WMV3_VDPAU_DECODER 0
 #define CONFIG_WMV3IMAGE_DECODER 0
 #define CONFIG_WNV1_DECODER 0
 #define CONFIG_XAN_WC3_DECODER 0
@@ -959,11 +976,14 @@
 #define CONFIG_AMRNB_DECODER 0
 #define CONFIG_AMRWB_DECODER 0
 #define CONFIG_APE_DECODER 0
+#define CONFIG_APTX_DECODER 1
+#define CONFIG_APTX_HD_DECODER 0
 #define CONFIG_ATRAC1_DECODER 0
 #define CONFIG_ATRAC3_DECODER 0
 #define CONFIG_ATRAC3AL_DECODER 0
 #define CONFIG_ATRAC3P_DECODER 0
 #define CONFIG_ATRAC3PAL_DECODER 0
+#define CONFIG_ATRAC9_DECODER 0
 #define CONFIG_BINKAUDIO_DCT_DECODER 0
 #define CONFIG_BINKAUDIO_RDFT_DECODER 0
 #define CONFIG_BMV_AUDIO_DECODER 0
@@ -986,6 +1006,7 @@
 #define CONFIG_GSM_DECODER 0
 #define CONFIG_GSM_MS_DECODER 0
 #define CONFIG_IAC_DECODER 0
+#define CONFIG_ILBC_DECODER 0
 #define CONFIG_IMC_DECODER 0
 #define CONFIG_INTERPLAY_ACM_DECODER 0
 #define CONFIG_MACE3_DECODER 0
@@ -996,12 +1017,12 @@
 #define CONFIG_MP1FLOAT_DECODER 0
 #define CONFIG_MP2_DECODER 0
 #define CONFIG_MP2FLOAT_DECODER 0
-#define CONFIG_MP3_DECODER 0
 #define CONFIG_MP3FLOAT_DECODER 0
-#define CONFIG_MP3ADU_DECODER 0
+#define CONFIG_MP3_DECODER 1
 #define CONFIG_MP3ADUFLOAT_DECODER 0
-#define CONFIG_MP3ON4_DECODER 0
+#define CONFIG_MP3ADU_DECODER 0
 #define CONFIG_MP3ON4FLOAT_DECODER 0
+#define CONFIG_MP3ON4_DECODER 0
 #define CONFIG_MPC7_DECODER 0
 #define CONFIG_MPC8_DECODER 0
 #define CONFIG_NELLYMOSER_DECODER 0
@@ -1014,6 +1035,7 @@
 #define CONFIG_RA_144_DECODER 0
 #define CONFIG_RA_288_DECODER 0
 #define CONFIG_RALF_DECODER 0
+#define CONFIG_SBC_DECODER 1
 #define CONFIG_SHORTEN_DECODER 0
 #define CONFIG_SIPR_DECODER 0
 #define CONFIG_SMACKAUD_DECODER 0
@@ -1151,7 +1173,10 @@
 #define CONFIG_PCM_MULAW_AT_DECODER 0
 #define CONFIG_QDMC_AT_DECODER 0
 #define CONFIG_QDM2_AT_DECODER 0
+#define CONFIG_LIBAOM_AV1_DECODER 0
 #define CONFIG_LIBCELT_DECODER 0
+#define CONFIG_LIBCODEC2_DECODER 0
+#define CONFIG_LIBDAVS2_DECODER 0
 #define CONFIG_LIBFDK_AAC_DECODER 0
 #define CONFIG_LIBGSM_DECODER 0
 #define CONFIG_LIBGSM_MS_DECODER 0
@@ -1184,288 +1209,6 @@
 #define CONFIG_VP8_QSV_DECODER 0
 #define CONFIG_VP9_CUVID_DECODER 0
 #define CONFIG_VP9_MEDIACODEC_DECODER 0
-#define CONFIG_AA_DEMUXER 0
-#define CONFIG_AAC_DEMUXER 0
-#define CONFIG_AC3_DEMUXER 0
-#define CONFIG_ACM_DEMUXER 0
-#define CONFIG_ACT_DEMUXER 0
-#define CONFIG_ADF_DEMUXER 0
-#define CONFIG_ADP_DEMUXER 0
-#define CONFIG_ADS_DEMUXER 0
-#define CONFIG_ADX_DEMUXER 0
-#define CONFIG_AEA_DEMUXER 0
-#define CONFIG_AFC_DEMUXER 0
-#define CONFIG_AIFF_DEMUXER 0
-#define CONFIG_AIX_DEMUXER 0
-#define CONFIG_AMR_DEMUXER 0
-#define CONFIG_ANM_DEMUXER 0
-#define CONFIG_APC_DEMUXER 0
-#define CONFIG_APE_DEMUXER 0
-#define CONFIG_APNG_DEMUXER 0
-#define CONFIG_AQTITLE_DEMUXER 0
-#define CONFIG_ASF_DEMUXER 0
-#define CONFIG_ASF_O_DEMUXER 0
-#define CONFIG_ASS_DEMUXER 0
-#define CONFIG_AST_DEMUXER 0
-#define CONFIG_AU_DEMUXER 0
-#define CONFIG_AVI_DEMUXER 0
-#define CONFIG_AVISYNTH_DEMUXER 0
-#define CONFIG_AVR_DEMUXER 0
-#define CONFIG_AVS_DEMUXER 0
-#define CONFIG_BETHSOFTVID_DEMUXER 0
-#define CONFIG_BFI_DEMUXER 0
-#define CONFIG_BINTEXT_DEMUXER 0
-#define CONFIG_BINK_DEMUXER 0
-#define CONFIG_BIT_DEMUXER 0
-#define CONFIG_BMV_DEMUXER 0
-#define CONFIG_BFSTM_DEMUXER 0
-#define CONFIG_BRSTM_DEMUXER 0
-#define CONFIG_BOA_DEMUXER 0
-#define CONFIG_C93_DEMUXER 0
-#define CONFIG_CAF_DEMUXER 0
-#define CONFIG_CAVSVIDEO_DEMUXER 0
-#define CONFIG_CDG_DEMUXER 0
-#define CONFIG_CDXL_DEMUXER 0
-#define CONFIG_CINE_DEMUXER 0
-#define CONFIG_CONCAT_DEMUXER 0
-#define CONFIG_DASH_DEMUXER 0
-#define CONFIG_DATA_DEMUXER 0
-#define CONFIG_DAUD_DEMUXER 0
-#define CONFIG_DCSTR_DEMUXER 0
-#define CONFIG_DFA_DEMUXER 0
-#define CONFIG_DIRAC_DEMUXER 0
-#define CONFIG_DNXHD_DEMUXER 0
-#define CONFIG_DSF_DEMUXER 0
-#define CONFIG_DSICIN_DEMUXER 0
-#define CONFIG_DSS_DEMUXER 0
-#define CONFIG_DTS_DEMUXER 0
-#define CONFIG_DTSHD_DEMUXER 0
-#define CONFIG_DV_DEMUXER 0
-#define CONFIG_DVBSUB_DEMUXER 0
-#define CONFIG_DVBTXT_DEMUXER 0
-#define CONFIG_DXA_DEMUXER 0
-#define CONFIG_EA_DEMUXER 0
-#define CONFIG_EA_CDATA_DEMUXER 0
-#define CONFIG_EAC3_DEMUXER 0
-#define CONFIG_EPAF_DEMUXER 0
-#define CONFIG_FFM_DEMUXER 0
-#define CONFIG_FFMETADATA_DEMUXER 0
-#define CONFIG_FILMSTRIP_DEMUXER 0
-#define CONFIG_FITS_DEMUXER 0
-#define CONFIG_FLAC_DEMUXER 1
-#define CONFIG_FLIC_DEMUXER 0
-#define CONFIG_FLV_DEMUXER 0
-#define CONFIG_LIVE_FLV_DEMUXER 0
-#define CONFIG_FOURXM_DEMUXER 0
-#define CONFIG_FRM_DEMUXER 0
-#define CONFIG_FSB_DEMUXER 0
-#define CONFIG_G722_DEMUXER 0
-#define CONFIG_G723_1_DEMUXER 0
-#define CONFIG_G726_DEMUXER 0
-#define CONFIG_G726LE_DEMUXER 0
-#define CONFIG_G729_DEMUXER 0
-#define CONFIG_GDV_DEMUXER 0
-#define CONFIG_GENH_DEMUXER 0
-#define CONFIG_GIF_DEMUXER 0
-#define CONFIG_GSM_DEMUXER 0
-#define CONFIG_GXF_DEMUXER 0
-#define CONFIG_H261_DEMUXER 0
-#define CONFIG_H263_DEMUXER 0
-#define CONFIG_H264_DEMUXER 0
-#define CONFIG_HEVC_DEMUXER 0
-#define CONFIG_HLS_DEMUXER 0
-#define CONFIG_HNM_DEMUXER 0
-#define CONFIG_ICO_DEMUXER 0
-#define CONFIG_IDCIN_DEMUXER 0
-#define CONFIG_IDF_DEMUXER 0
-#define CONFIG_IFF_DEMUXER 0
-#define CONFIG_ILBC_DEMUXER 0
-#define CONFIG_IMAGE2_DEMUXER 0
-#define CONFIG_IMAGE2PIPE_DEMUXER 0
-#define CONFIG_IMAGE2_ALIAS_PIX_DEMUXER 0
-#define CONFIG_IMAGE2_BRENDER_PIX_DEMUXER 0
-#define CONFIG_INGENIENT_DEMUXER 0
-#define CONFIG_IPMOVIE_DEMUXER 0
-#define CONFIG_IRCAM_DEMUXER 0
-#define CONFIG_ISS_DEMUXER 0
-#define CONFIG_IV8_DEMUXER 0
-#define CONFIG_IVF_DEMUXER 0
-#define CONFIG_IVR_DEMUXER 0
-#define CONFIG_JACOSUB_DEMUXER 0
-#define CONFIG_JV_DEMUXER 0
-#define CONFIG_LMLM4_DEMUXER 0
-#define CONFIG_LOAS_DEMUXER 0
-#define CONFIG_LRC_DEMUXER 0
-#define CONFIG_LVF_DEMUXER 0
-#define CONFIG_LXF_DEMUXER 0
-#define CONFIG_M4V_DEMUXER 0
-#define CONFIG_MATROSKA_DEMUXER 1
-#define CONFIG_MGSTS_DEMUXER 0
-#define CONFIG_MICRODVD_DEMUXER 0
-#define CONFIG_MJPEG_DEMUXER 0
-#define CONFIG_MJPEG_2000_DEMUXER 0
-#define CONFIG_MLP_DEMUXER 0
-#define CONFIG_MLV_DEMUXER 0
-#define CONFIG_MM_DEMUXER 0
-#define CONFIG_MMF_DEMUXER 0
-#define CONFIG_MOV_DEMUXER 0
-#define CONFIG_MP3_DEMUXER 0
-#define CONFIG_MPC_DEMUXER 0
-#define CONFIG_MPC8_DEMUXER 0
-#define CONFIG_MPEGPS_DEMUXER 0
-#define CONFIG_MPEGTS_DEMUXER 0
-#define CONFIG_MPEGTSRAW_DEMUXER 0
-#define CONFIG_MPEGVIDEO_DEMUXER 0
-#define CONFIG_MPJPEG_DEMUXER 0
-#define CONFIG_MPL2_DEMUXER 0
-#define CONFIG_MPSUB_DEMUXER 0
-#define CONFIG_MSF_DEMUXER 0
-#define CONFIG_MSNWC_TCP_DEMUXER 0
-#define CONFIG_MTAF_DEMUXER 0
-#define CONFIG_MTV_DEMUXER 0
-#define CONFIG_MUSX_DEMUXER 0
-#define CONFIG_MV_DEMUXER 0
-#define CONFIG_MVI_DEMUXER 0
-#define CONFIG_MXF_DEMUXER 0
-#define CONFIG_MXG_DEMUXER 0
-#define CONFIG_NC_DEMUXER 0
-#define CONFIG_NISTSPHERE_DEMUXER 0
-#define CONFIG_NSV_DEMUXER 0
-#define CONFIG_NUT_DEMUXER 0
-#define CONFIG_NUV_DEMUXER 0
-#define CONFIG_OGG_DEMUXER 1
-#define CONFIG_OMA_DEMUXER 0
-#define CONFIG_PAF_DEMUXER 0
-#define CONFIG_PCM_ALAW_DEMUXER 0
-#define CONFIG_PCM_MULAW_DEMUXER 0
-#define CONFIG_PCM_F64BE_DEMUXER 0
-#define CONFIG_PCM_F64LE_DEMUXER 0
-#define CONFIG_PCM_F32BE_DEMUXER 0
-#define CONFIG_PCM_F32LE_DEMUXER 0
-#define CONFIG_PCM_S32BE_DEMUXER 0
-#define CONFIG_PCM_S32LE_DEMUXER 0
-#define CONFIG_PCM_S24BE_DEMUXER 0
-#define CONFIG_PCM_S24LE_DEMUXER 0
-#define CONFIG_PCM_S16BE_DEMUXER 0
-#define CONFIG_PCM_S16LE_DEMUXER 0
-#define CONFIG_PCM_S8_DEMUXER 0
-#define CONFIG_PCM_U32BE_DEMUXER 0
-#define CONFIG_PCM_U32LE_DEMUXER 0
-#define CONFIG_PCM_U24BE_DEMUXER 0
-#define CONFIG_PCM_U24LE_DEMUXER 0
-#define CONFIG_PCM_U16BE_DEMUXER 0
-#define CONFIG_PCM_U16LE_DEMUXER 0
-#define CONFIG_PCM_U8_DEMUXER 0
-#define CONFIG_PJS_DEMUXER 0
-#define CONFIG_PMP_DEMUXER 0
-#define CONFIG_PVA_DEMUXER 0
-#define CONFIG_PVF_DEMUXER 0
-#define CONFIG_QCP_DEMUXER 0
-#define CONFIG_R3D_DEMUXER 0
-#define CONFIG_RAWVIDEO_DEMUXER 0
-#define CONFIG_REALTEXT_DEMUXER 0
-#define CONFIG_REDSPARK_DEMUXER 0
-#define CONFIG_RL2_DEMUXER 0
-#define CONFIG_RM_DEMUXER 0
-#define CONFIG_ROQ_DEMUXER 0
-#define CONFIG_RPL_DEMUXER 0
-#define CONFIG_RSD_DEMUXER 0
-#define CONFIG_RSO_DEMUXER 0
-#define CONFIG_RTP_DEMUXER 0
-#define CONFIG_RTSP_DEMUXER 0
-#define CONFIG_S337M_DEMUXER 0
-#define CONFIG_SAMI_DEMUXER 0
-#define CONFIG_SAP_DEMUXER 0
-#define CONFIG_SBG_DEMUXER 0
-#define CONFIG_SCC_DEMUXER 0
-#define CONFIG_SDP_DEMUXER 0
-#define CONFIG_SDR2_DEMUXER 0
-#define CONFIG_SDS_DEMUXER 0
-#define CONFIG_SDX_DEMUXER 0
-#define CONFIG_SEGAFILM_DEMUXER 0
-#define CONFIG_SHORTEN_DEMUXER 0
-#define CONFIG_SIFF_DEMUXER 0
-#define CONFIG_SLN_DEMUXER 0
-#define CONFIG_SMACKER_DEMUXER 0
-#define CONFIG_SMJPEG_DEMUXER 0
-#define CONFIG_SMUSH_DEMUXER 0
-#define CONFIG_SOL_DEMUXER 0
-#define CONFIG_SOX_DEMUXER 0
-#define CONFIG_SPDIF_DEMUXER 0
-#define CONFIG_SRT_DEMUXER 0
-#define CONFIG_STR_DEMUXER 0
-#define CONFIG_STL_DEMUXER 0
-#define CONFIG_SUBVIEWER1_DEMUXER 0
-#define CONFIG_SUBVIEWER_DEMUXER 0
-#define CONFIG_SUP_DEMUXER 0
-#define CONFIG_SVAG_DEMUXER 0
-#define CONFIG_SWF_DEMUXER 0
-#define CONFIG_TAK_DEMUXER 0
-#define CONFIG_TEDCAPTIONS_DEMUXER 0
-#define CONFIG_THP_DEMUXER 0
-#define CONFIG_THREEDOSTR_DEMUXER 0
-#define CONFIG_TIERTEXSEQ_DEMUXER 0
-#define CONFIG_TMV_DEMUXER 0
-#define CONFIG_TRUEHD_DEMUXER 0
-#define CONFIG_TTA_DEMUXER 0
-#define CONFIG_TXD_DEMUXER 0
-#define CONFIG_TTY_DEMUXER 0
-#define CONFIG_V210_DEMUXER 0
-#define CONFIG_V210X_DEMUXER 0
-#define CONFIG_VAG_DEMUXER 0
-#define CONFIG_VC1_DEMUXER 0
-#define CONFIG_VC1T_DEMUXER 0
-#define CONFIG_VIVO_DEMUXER 0
-#define CONFIG_VMD_DEMUXER 0
-#define CONFIG_VOBSUB_DEMUXER 0
-#define CONFIG_VOC_DEMUXER 0
-#define CONFIG_VPK_DEMUXER 0
-#define CONFIG_VPLAYER_DEMUXER 0
-#define CONFIG_VQF_DEMUXER 0
-#define CONFIG_W64_DEMUXER 0
-#define CONFIG_WAV_DEMUXER 1
-#define CONFIG_WC3_DEMUXER 0
-#define CONFIG_WEBM_DASH_MANIFEST_DEMUXER 0
-#define CONFIG_WEBVTT_DEMUXER 0
-#define CONFIG_WSAUD_DEMUXER 0
-#define CONFIG_WSD_DEMUXER 0
-#define CONFIG_WSVQA_DEMUXER 0
-#define CONFIG_WTV_DEMUXER 0
-#define CONFIG_WVE_DEMUXER 0
-#define CONFIG_WV_DEMUXER 0
-#define CONFIG_XA_DEMUXER 0
-#define CONFIG_XBIN_DEMUXER 0
-#define CONFIG_XMV_DEMUXER 0
-#define CONFIG_XVAG_DEMUXER 0
-#define CONFIG_XWMA_DEMUXER 0
-#define CONFIG_YOP_DEMUXER 0
-#define CONFIG_YUV4MPEGPIPE_DEMUXER 0
-#define CONFIG_IMAGE_BMP_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_DDS_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_DPX_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_EXR_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_J2K_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_JPEG_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_JPEGLS_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PAM_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PBM_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PCX_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PGMYUV_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PGM_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PICTOR_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PNG_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PPM_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PSD_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_QDRAW_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_SGI_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_SVG_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_SUNRAST_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_TIFF_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_WEBP_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_XPM_PIPE_DEMUXER 0
-#define CONFIG_LIBGME_DEMUXER 0
-#define CONFIG_LIBMODPLUG_DEMUXER 0
-#define CONFIG_LIBOPENMPT_DEMUXER 0
 #define CONFIG_A64MULTI_ENCODER 0
 #define CONFIG_A64MULTI5_ENCODER 0
 #define CONFIG_ALIAS_PIX_ENCODER 0
@@ -1498,6 +1241,7 @@
 #define CONFIG_JPEG2000_ENCODER 0
 #define CONFIG_JPEGLS_ENCODER 0
 #define CONFIG_LJPEG_ENCODER 0
+#define CONFIG_MAGICYUV_ENCODER 0
 #define CONFIG_MJPEG_ENCODER 0
 #define CONFIG_MPEG1VIDEO_ENCODER 0
 #define CONFIG_MPEG2VIDEO_ENCODER 0
@@ -1549,6 +1293,8 @@
 #define CONFIG_AC3_ENCODER 0
 #define CONFIG_AC3_FIXED_ENCODER 0
 #define CONFIG_ALAC_ENCODER 0
+#define CONFIG_APTX_ENCODER 0
+#define CONFIG_APTX_HD_ENCODER 0
 #define CONFIG_DCA_ENCODER 0
 #define CONFIG_EAC3_ENCODER 0
 #define CONFIG_FLAC_ENCODER 0
@@ -1559,6 +1305,7 @@
 #define CONFIG_NELLYMOSER_ENCODER 0
 #define CONFIG_OPUS_ENCODER 0
 #define CONFIG_RA_144_ENCODER 0
+#define CONFIG_SBC_ENCODER 0
 #define CONFIG_SONIC_ENCODER 0
 #define CONFIG_SONIC_LS_ENCODER 0
 #define CONFIG_TRUEHD_ENCODER 0
@@ -1620,6 +1367,8 @@
 #define CONFIG_ILBC_AT_ENCODER 0
 #define CONFIG_PCM_ALAW_AT_ENCODER 0
 #define CONFIG_PCM_MULAW_AT_ENCODER 0
+#define CONFIG_LIBAOM_AV1_ENCODER 0
+#define CONFIG_LIBCODEC2_ENCODER 0
 #define CONFIG_LIBFDK_AAC_ENCODER 0
 #define CONFIG_LIBGSM_ENCODER 0
 #define CONFIG_LIBGSM_MS_ENCODER 0
@@ -1644,9 +1393,11 @@
 #define CONFIG_LIBX264RGB_ENCODER 0
 #define CONFIG_LIBX265_ENCODER 0
 #define CONFIG_LIBXAVS_ENCODER 0
+#define CONFIG_LIBXAVS2_ENCODER 0
 #define CONFIG_LIBXVID_ENCODER 0
 #define CONFIG_H263_V4L2M2M_ENCODER 0
 #define CONFIG_LIBOPENH264_ENCODER 0
+#define CONFIG_H264_AMF_ENCODER 0
 #define CONFIG_H264_NVENC_ENCODER 0
 #define CONFIG_H264_OMX_ENCODER 0
 #define CONFIG_H264_QSV_ENCODER 0
@@ -1656,11 +1407,14 @@
 #define CONFIG_NVENC_ENCODER 0
 #define CONFIG_NVENC_H264_ENCODER 0
 #define CONFIG_NVENC_HEVC_ENCODER 0
+#define CONFIG_HEVC_AMF_ENCODER 0
 #define CONFIG_HEVC_NVENC_ENCODER 0
 #define CONFIG_HEVC_QSV_ENCODER 0
 #define CONFIG_HEVC_V4L2M2M_ENCODER 0
 #define CONFIG_HEVC_VAAPI_ENCODER 0
+#define CONFIG_HEVC_VIDEOTOOLBOX_ENCODER 0
 #define CONFIG_LIBKVAZAAR_ENCODER 0
+#define CONFIG_MJPEG_QSV_ENCODER 0
 #define CONFIG_MJPEG_VAAPI_ENCODER 0
 #define CONFIG_MPEG2_QSV_ENCODER 0
 #define CONFIG_MPEG2_VAAPI_ENCODER 0
@@ -1668,20 +1422,159 @@
 #define CONFIG_VP8_V4L2M2M_ENCODER 0
 #define CONFIG_VP8_VAAPI_ENCODER 0
 #define CONFIG_VP9_VAAPI_ENCODER 0
+#define CONFIG_H263_VAAPI_HWACCEL 0
+#define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_H264_D3D11VA_HWACCEL 0
+#define CONFIG_H264_D3D11VA2_HWACCEL 0
+#define CONFIG_H264_DXVA2_HWACCEL 0
+#define CONFIG_H264_NVDEC_HWACCEL 0
+#define CONFIG_H264_VAAPI_HWACCEL 0
+#define CONFIG_H264_VDPAU_HWACCEL 0
+#define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_HEVC_D3D11VA_HWACCEL 0
+#define CONFIG_HEVC_D3D11VA2_HWACCEL 0
+#define CONFIG_HEVC_DXVA2_HWACCEL 0
+#define CONFIG_HEVC_NVDEC_HWACCEL 0
+#define CONFIG_HEVC_VAAPI_HWACCEL 0
+#define CONFIG_HEVC_VDPAU_HWACCEL 0
+#define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_MJPEG_NVDEC_HWACCEL 0
+#define CONFIG_MJPEG_VAAPI_HWACCEL 0
+#define CONFIG_MPEG1_NVDEC_HWACCEL 0
+#define CONFIG_MPEG1_VDPAU_HWACCEL 0
+#define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_MPEG1_XVMC_HWACCEL 0
+#define CONFIG_MPEG2_D3D11VA_HWACCEL 0
+#define CONFIG_MPEG2_D3D11VA2_HWACCEL 0
+#define CONFIG_MPEG2_NVDEC_HWACCEL 0
+#define CONFIG_MPEG2_DXVA2_HWACCEL 0
+#define CONFIG_MPEG2_VAAPI_HWACCEL 0
+#define CONFIG_MPEG2_VDPAU_HWACCEL 0
+#define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_MPEG2_XVMC_HWACCEL 0
+#define CONFIG_MPEG4_NVDEC_HWACCEL 0
+#define CONFIG_MPEG4_VAAPI_HWACCEL 0
+#define CONFIG_MPEG4_VDPAU_HWACCEL 0
+#define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_VC1_D3D11VA_HWACCEL 0
+#define CONFIG_VC1_D3D11VA2_HWACCEL 0
+#define CONFIG_VC1_DXVA2_HWACCEL 0
+#define CONFIG_VC1_NVDEC_HWACCEL 0
+#define CONFIG_VC1_VAAPI_HWACCEL 0
+#define CONFIG_VC1_VDPAU_HWACCEL 0
+#define CONFIG_VP8_NVDEC_HWACCEL 0
+#define CONFIG_VP8_VAAPI_HWACCEL 0
+#define CONFIG_VP9_D3D11VA_HWACCEL 0
+#define CONFIG_VP9_D3D11VA2_HWACCEL 0
+#define CONFIG_VP9_DXVA2_HWACCEL 0
+#define CONFIG_VP9_NVDEC_HWACCEL 0
+#define CONFIG_VP9_VAAPI_HWACCEL 0
+#define CONFIG_WMV3_D3D11VA_HWACCEL 0
+#define CONFIG_WMV3_D3D11VA2_HWACCEL 0
+#define CONFIG_WMV3_DXVA2_HWACCEL 0
+#define CONFIG_WMV3_NVDEC_HWACCEL 0
+#define CONFIG_WMV3_VAAPI_HWACCEL 0
+#define CONFIG_WMV3_VDPAU_HWACCEL 0
+#define CONFIG_AAC_PARSER 0
+#define CONFIG_AAC_LATM_PARSER 0
+#define CONFIG_AC3_PARSER 0
+#define CONFIG_ADX_PARSER 0
+#define CONFIG_AV1_PARSER 0
+#define CONFIG_AVS2_PARSER 0
+#define CONFIG_BMP_PARSER 0
+#define CONFIG_CAVSVIDEO_PARSER 0
+#define CONFIG_COOK_PARSER 0
+#define CONFIG_DCA_PARSER 0
+#define CONFIG_DIRAC_PARSER 0
+#define CONFIG_DNXHD_PARSER 0
+#define CONFIG_DPX_PARSER 0
+#define CONFIG_DVAUDIO_PARSER 0
+#define CONFIG_DVBSUB_PARSER 0
+#define CONFIG_DVDSUB_PARSER 0
+#define CONFIG_DVD_NAV_PARSER 0
+#define CONFIG_FLAC_PARSER 1
+#define CONFIG_G729_PARSER 0
+#define CONFIG_GSM_PARSER 0
+#define CONFIG_H261_PARSER 0
+#define CONFIG_H263_PARSER 0
+#define CONFIG_H264_PARSER 0
+#define CONFIG_HEVC_PARSER 0
+#define CONFIG_MJPEG_PARSER 0
+#define CONFIG_MLP_PARSER 0
+#define CONFIG_MPEG4VIDEO_PARSER 0
+#define CONFIG_MPEGAUDIO_PARSER 1
+#define CONFIG_MPEGVIDEO_PARSER 0
+#define CONFIG_OPUS_PARSER 1
+#define CONFIG_PNG_PARSER 0
+#define CONFIG_PNM_PARSER 0
+#define CONFIG_RV30_PARSER 0
+#define CONFIG_RV40_PARSER 0
+#define CONFIG_SBC_PARSER 0
+#define CONFIG_SIPR_PARSER 0
+#define CONFIG_TAK_PARSER 0
+#define CONFIG_VC1_PARSER 0
+#define CONFIG_VORBIS_PARSER 1
+#define CONFIG_VP3_PARSER 1
+#define CONFIG_VP8_PARSER 1
+#define CONFIG_VP9_PARSER 0
+#define CONFIG_XMA_PARSER 0
+#define CONFIG_ALSA_INDEV 0
+#define CONFIG_ANDROID_CAMERA_INDEV 0
+#define CONFIG_AVFOUNDATION_INDEV 0
+#define CONFIG_BKTR_INDEV 0
+#define CONFIG_DECKLINK_INDEV 0
+#define CONFIG_LIBNDI_NEWTEK_INDEV 0
+#define CONFIG_DSHOW_INDEV 0
+#define CONFIG_FBDEV_INDEV 0
+#define CONFIG_GDIGRAB_INDEV 0
+#define CONFIG_IEC61883_INDEV 0
+#define CONFIG_JACK_INDEV 0
+#define CONFIG_KMSGRAB_INDEV 0
+#define CONFIG_LAVFI_INDEV 0
+#define CONFIG_OPENAL_INDEV 0
+#define CONFIG_OSS_INDEV 0
+#define CONFIG_PULSE_INDEV 0
+#define CONFIG_SNDIO_INDEV 0
+#define CONFIG_V4L2_INDEV 0
+#define CONFIG_VFWCAP_INDEV 0
+#define CONFIG_XCBGRAB_INDEV 0
+#define CONFIG_LIBCDIO_INDEV 0
+#define CONFIG_LIBDC1394_INDEV 0
+#define CONFIG_ALSA_OUTDEV 0
+#define CONFIG_CACA_OUTDEV 0
+#define CONFIG_DECKLINK_OUTDEV 0
+#define CONFIG_LIBNDI_NEWTEK_OUTDEV 0
+#define CONFIG_FBDEV_OUTDEV 0
+#define CONFIG_OPENGL_OUTDEV 0
+#define CONFIG_OSS_OUTDEV 0
+#define CONFIG_PULSE_OUTDEV 0
+#define CONFIG_SDL2_OUTDEV 0
+#define CONFIG_SNDIO_OUTDEV 0
+#define CONFIG_V4L2_OUTDEV 0
+#define CONFIG_XV_OUTDEV 0
 #define CONFIG_ABENCH_FILTER 0
 #define CONFIG_ACOMPRESSOR_FILTER 0
+#define CONFIG_ACONTRAST_FILTER 0
 #define CONFIG_ACOPY_FILTER 0
+#define CONFIG_ACUE_FILTER 0
 #define CONFIG_ACROSSFADE_FILTER 0
+#define CONFIG_ACROSSOVER_FILTER 0
 #define CONFIG_ACRUSHER_FILTER 0
+#define CONFIG_ADECLICK_FILTER 0
+#define CONFIG_ADECLIP_FILTER 0
 #define CONFIG_ADELAY_FILTER 0
+#define CONFIG_ADERIVATIVE_FILTER 0
 #define CONFIG_AECHO_FILTER 0
 #define CONFIG_AEMPHASIS_FILTER 0
 #define CONFIG_AEVAL_FILTER 0
 #define CONFIG_AFADE_FILTER 0
+#define CONFIG_AFFTDN_FILTER 0
 #define CONFIG_AFFTFILT_FILTER 0
 #define CONFIG_AFIR_FILTER 0
 #define CONFIG_AFORMAT_FILTER 0
 #define CONFIG_AGATE_FILTER 0
+#define CONFIG_AIIR_FILTER 0
+#define CONFIG_AINTEGRAL_FILTER 0
 #define CONFIG_AINTERLEAVE_FILTER 0
 #define CONFIG_ALIMITER_FILTER 0
 #define CONFIG_ALLPASS_FILTER 0
@@ -1689,6 +1582,7 @@
 #define CONFIG_AMERGE_FILTER 0
 #define CONFIG_AMETADATA_FILTER 0
 #define CONFIG_AMIX_FILTER 0
+#define CONFIG_AMULTIPLY_FILTER 0
 #define CONFIG_ANEQUALIZER_FILTER 0
 #define CONFIG_ANULL_FILTER 0
 #define CONFIG_APAD_FILTER 0
@@ -1725,6 +1619,7 @@
 #define CONFIG_CROSSFEED_FILTER 0
 #define CONFIG_CRYSTALIZER_FILTER 0
 #define CONFIG_DCSHIFT_FILTER 0
+#define CONFIG_DRMETER_FILTER 0
 #define CONFIG_DYNAUDNORM_FILTER 0
 #define CONFIG_EARWAX_FILTER 0
 #define CONFIG_EBUR128_FILTER 0
@@ -1736,10 +1631,14 @@
 #define CONFIG_HDCD_FILTER 0
 #define CONFIG_HEADPHONE_FILTER 0
 #define CONFIG_HIGHPASS_FILTER 0
+#define CONFIG_HIGHSHELF_FILTER 0
 #define CONFIG_JOIN_FILTER 0
 #define CONFIG_LADSPA_FILTER 0
 #define CONFIG_LOUDNORM_FILTER 0
 #define CONFIG_LOWPASS_FILTER 0
+#define CONFIG_LOWSHELF_FILTER 0
+#define CONFIG_LV2_FILTER 0
+#define CONFIG_MCOMPAND_FILTER 0
 #define CONFIG_PAN_FILTER 0
 #define CONFIG_REPLAYGAIN_FILTER 0
 #define CONFIG_RESAMPLE_FILTER 0
@@ -1762,20 +1661,25 @@
 #define CONFIG_ANOISESRC_FILTER 0
 #define CONFIG_ANULLSRC_FILTER 0
 #define CONFIG_FLITE_FILTER 0
+#define CONFIG_HILBERT_FILTER 0
 #define CONFIG_SINE_FILTER 0
 #define CONFIG_ANULLSINK_FILTER 0
 #define CONFIG_ALPHAEXTRACT_FILTER 0
 #define CONFIG_ALPHAMERGE_FILTER 0
+#define CONFIG_AMPLIFY_FILTER 0
 #define CONFIG_ASS_FILTER 0
 #define CONFIG_ATADENOISE_FILTER 0
 #define CONFIG_AVGBLUR_FILTER 0
+#define CONFIG_AVGBLUR_OPENCL_FILTER 0
 #define CONFIG_BBOX_FILTER 0
 #define CONFIG_BENCH_FILTER 0
 #define CONFIG_BITPLANENOISE_FILTER 0
 #define CONFIG_BLACKDETECT_FILTER 0
 #define CONFIG_BLACKFRAME_FILTER 0
 #define CONFIG_BLEND_FILTER 0
+#define CONFIG_BM3D_FILTER 0
 #define CONFIG_BOXBLUR_FILTER 0
+#define CONFIG_BOXBLUR_OPENCL_FILTER 0
 #define CONFIG_BWDIF_FILTER 0
 #define CONFIG_CHROMAKEY_FILTER 0
 #define CONFIG_CIESCOPE_FILTER 0
@@ -1787,27 +1691,33 @@
 #define CONFIG_COLORMATRIX_FILTER 0
 #define CONFIG_COLORSPACE_FILTER 0
 #define CONFIG_CONVOLUTION_FILTER 0
+#define CONFIG_CONVOLUTION_OPENCL_FILTER 0
 #define CONFIG_CONVOLVE_FILTER 0
 #define CONFIG_COPY_FILTER 0
 #define CONFIG_COREIMAGE_FILTER 0
 #define CONFIG_COVER_RECT_FILTER 0
 #define CONFIG_CROP_FILTER 0
 #define CONFIG_CROPDETECT_FILTER 0
+#define CONFIG_CUE_FILTER 0
 #define CONFIG_CURVES_FILTER 0
 #define CONFIG_DATASCOPE_FILTER 0
 #define CONFIG_DCTDNOIZ_FILTER 0
 #define CONFIG_DEBAND_FILTER 0
+#define CONFIG_DEBLOCK_FILTER 0
 #define CONFIG_DECIMATE_FILTER 0
+#define CONFIG_DECONVOLVE_FILTER 0
 #define CONFIG_DEFLATE_FILTER 0
 #define CONFIG_DEFLICKER_FILTER 0
 #define CONFIG_DEINTERLACE_QSV_FILTER 0
 #define CONFIG_DEINTERLACE_VAAPI_FILTER 0
 #define CONFIG_DEJUDDER_FILTER 0
 #define CONFIG_DELOGO_FILTER 0
+#define CONFIG_DENOISE_VAAPI_FILTER 0
 #define CONFIG_DESHAKE_FILTER 0
 #define CONFIG_DESPILL_FILTER 0
 #define CONFIG_DETELECINE_FILTER 0
 #define CONFIG_DILATION_FILTER 0
+#define CONFIG_DILATION_OPENCL_FILTER 0
 #define CONFIG_DISPLACE_FILTER 0
 #define CONFIG_DOUBLEWEAVE_FILTER 0
 #define CONFIG_DRAWBOX_FILTER 0
@@ -1816,15 +1726,19 @@
 #define CONFIG_DRAWTEXT_FILTER 0
 #define CONFIG_EDGEDETECT_FILTER 0
 #define CONFIG_ELBG_FILTER 0
+#define CONFIG_ENTROPY_FILTER 0
 #define CONFIG_EQ_FILTER 0
 #define CONFIG_EROSION_FILTER 0
+#define CONFIG_EROSION_OPENCL_FILTER 0
 #define CONFIG_EXTRACTPLANES_FILTER 0
 #define CONFIG_FADE_FILTER 0
+#define CONFIG_FFTDNOIZ_FILTER 0
 #define CONFIG_FFTFILT_FILTER 0
 #define CONFIG_FIELD_FILTER 0
 #define CONFIG_FIELDHINT_FILTER 0
 #define CONFIG_FIELDMATCH_FILTER 0
 #define CONFIG_FIELDORDER_FILTER 0
+#define CONFIG_FILLBORDERS_FILTER 0
 #define CONFIG_FIND_RECT_FILTER 0
 #define CONFIG_FLOODFILL_FILTER 0
 #define CONFIG_FORMAT_FILTER 0
@@ -1837,6 +1751,7 @@
 #define CONFIG_GBLUR_FILTER 0
 #define CONFIG_GEQ_FILTER 0
 #define CONFIG_GRADFUN_FILTER 0
+#define CONFIG_GREYEDGE_FILTER 0
 #define CONFIG_HALDCLUT_FILTER 0
 #define CONFIG_HFLIP_FILTER 0
 #define CONFIG_HISTEQ_FILTER 0
@@ -1857,11 +1772,13 @@
 #define CONFIG_INTERLEAVE_FILTER 0
 #define CONFIG_KERNDEINT_FILTER 0
 #define CONFIG_LENSCORRECTION_FILTER 0
+#define CONFIG_LENSFUN_FILTER 0
 #define CONFIG_LIBVMAF_FILTER 0
 #define CONFIG_LIMITER_FILTER 0
 #define CONFIG_LOOP_FILTER 0
 #define CONFIG_LUMAKEY_FILTER 0
 #define CONFIG_LUT_FILTER 0
+#define CONFIG_LUT1D_FILTER 0
 #define CONFIG_LUT2_FILTER 0
 #define CONFIG_LUT3D_FILTER 0
 #define CONFIG_LUTRGB_FILTER 0
@@ -1874,17 +1791,21 @@
 #define CONFIG_METADATA_FILTER 0
 #define CONFIG_MIDEQUALIZER_FILTER 0
 #define CONFIG_MINTERPOLATE_FILTER 0
+#define CONFIG_MIX_FILTER 0
 #define CONFIG_MPDECIMATE_FILTER 0
 #define CONFIG_NEGATE_FILTER 0
 #define CONFIG_NLMEANS_FILTER 0
 #define CONFIG_NNEDI_FILTER 0
 #define CONFIG_NOFORMAT_FILTER 0
 #define CONFIG_NOISE_FILTER 0
+#define CONFIG_NORMALIZE_FILTER 0
 #define CONFIG_NULL_FILTER 0
 #define CONFIG_OCR_FILTER 0
 #define CONFIG_OCV_FILTER 0
 #define CONFIG_OSCILLOSCOPE_FILTER 0
 #define CONFIG_OVERLAY_FILTER 0
+#define CONFIG_OVERLAY_OPENCL_FILTER 0
+#define CONFIG_OVERLAY_QSV_FILTER 0
 #define CONFIG_OWDENOISE_FILTER 0
 #define CONFIG_PAD_FILTER 0
 #define CONFIG_PALETTEGEN_FILTER 0
@@ -1898,6 +1819,9 @@
 #define CONFIG_PP7_FILTER 0
 #define CONFIG_PREMULTIPLY_FILTER 0
 #define CONFIG_PREWITT_FILTER 0
+#define CONFIG_PREWITT_OPENCL_FILTER 0
+#define CONFIG_PROCAMP_VAAPI_FILTER 0
+#define CONFIG_PROGRAM_OPENCL_FILTER 0
 #define CONFIG_PSEUDOCOLOR_FILTER 0
 #define CONFIG_PSNR_FILTER 0
 #define CONFIG_PULLUP_FILTER 0
@@ -1912,6 +1836,7 @@
 #define CONFIG_REPEATFIELDS_FILTER 0
 #define CONFIG_REVERSE_FILTER 0
 #define CONFIG_ROBERTS_FILTER 0
+#define CONFIG_ROBERTS_OPENCL_FILTER 0
 #define CONFIG_ROTATE_FILTER 0
 #define CONFIG_SAB_FILTER 0
 #define CONFIG_SCALE_FILTER 0
@@ -1927,8 +1852,10 @@
 #define CONFIG_SETDAR_FILTER 0
 #define CONFIG_SETFIELD_FILTER 0
 #define CONFIG_SETPTS_FILTER 0
+#define CONFIG_SETRANGE_FILTER 0
 #define CONFIG_SETSAR_FILTER 0
 #define CONFIG_SETTB_FILTER 0
+#define CONFIG_SHARPNESS_VAAPI_FILTER 0
 #define CONFIG_SHOWINFO_FILTER 0
 #define CONFIG_SHOWPALETTE_FILTER 0
 #define CONFIG_SHUFFLEFRAMES_FILTER 0
@@ -1938,8 +1865,10 @@
 #define CONFIG_SIGNATURE_FILTER 0
 #define CONFIG_SMARTBLUR_FILTER 0
 #define CONFIG_SOBEL_FILTER 0
+#define CONFIG_SOBEL_OPENCL_FILTER 0
 #define CONFIG_SPLIT_FILTER 0
 #define CONFIG_SPP_FILTER 0
+#define CONFIG_SR_FILTER 0
 #define CONFIG_SSIM_FILTER 0
 #define CONFIG_STEREO3D_FILTER 0
 #define CONFIG_STREAMSELECT_FILTER 0
@@ -1955,19 +1884,25 @@
 #define CONFIG_TILE_FILTER 0
 #define CONFIG_TINTERLACE_FILTER 0
 #define CONFIG_TLUT2_FILTER 0
+#define CONFIG_TMIX_FILTER 0
 #define CONFIG_TONEMAP_FILTER 0
+#define CONFIG_TONEMAP_OPENCL_FILTER 0
 #define CONFIG_TRANSPOSE_FILTER 0
+#define CONFIG_TRANSPOSE_NPP_FILTER 0
 #define CONFIG_TRIM_FILTER 0
 #define CONFIG_UNPREMULTIPLY_FILTER 0
 #define CONFIG_UNSHARP_FILTER 0
+#define CONFIG_UNSHARP_OPENCL_FILTER 0
 #define CONFIG_USPP_FILTER 0
 #define CONFIG_VAGUEDENOISER_FILTER 0
 #define CONFIG_VECTORSCOPE_FILTER 0
 #define CONFIG_VFLIP_FILTER 0
+#define CONFIG_VFRDET_FILTER 0
 #define CONFIG_VIDSTABDETECT_FILTER 0
 #define CONFIG_VIDSTABTRANSFORM_FILTER 0
 #define CONFIG_VIGNETTE_FILTER 0
 #define CONFIG_VMAFMOTION_FILTER 0
+#define CONFIG_VPP_QSV_FILTER 0
 #define CONFIG_VSTACK_FILTER 0
 #define CONFIG_W3FDIF_FILTER 0
 #define CONFIG_WAVEFORM_FILTER 0
@@ -1988,6 +1923,9 @@
 #define CONFIG_MANDELBROT_FILTER 0
 #define CONFIG_MPTESTSRC_FILTER 0
 #define CONFIG_NULLSRC_FILTER 0
+#define CONFIG_OPENCLSRC_FILTER 0
+#define CONFIG_PAL75BARS_FILTER 0
+#define CONFIG_PAL100BARS_FILTER 0
 #define CONFIG_RGBTESTSRC_FILTER 0
 #define CONFIG_SMPTEBARS_FILTER 0
 #define CONFIG_SMPTEHDBARS_FILTER 0
@@ -2011,94 +1949,302 @@
 #define CONFIG_SPECTRUMSYNTH_FILTER 0
 #define CONFIG_AMOVIE_FILTER 0
 #define CONFIG_MOVIE_FILTER 0
-#define CONFIG_H263_VAAPI_HWACCEL 0
-#define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_H264_CUVID_HWACCEL 0
-#define CONFIG_H264_D3D11VA_HWACCEL 0
-#define CONFIG_H264_D3D11VA2_HWACCEL 0
-#define CONFIG_H264_DXVA2_HWACCEL 0
-#define CONFIG_H264_MEDIACODEC_HWACCEL 0
-#define CONFIG_H264_MMAL_HWACCEL 0
-#define CONFIG_H264_QSV_HWACCEL 0
-#define CONFIG_H264_VAAPI_HWACCEL 0
-#define CONFIG_H264_VDA_HWACCEL 0
-#define CONFIG_H264_VDA_OLD_HWACCEL 0
-#define CONFIG_H264_VDPAU_HWACCEL 0
-#define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_HEVC_CUVID_HWACCEL 0
-#define CONFIG_HEVC_D3D11VA_HWACCEL 0
-#define CONFIG_HEVC_D3D11VA2_HWACCEL 0
-#define CONFIG_HEVC_DXVA2_HWACCEL 0
-#define CONFIG_HEVC_MEDIACODEC_HWACCEL 0
-#define CONFIG_HEVC_QSV_HWACCEL 0
-#define CONFIG_HEVC_VAAPI_HWACCEL 0
-#define CONFIG_HEVC_VDPAU_HWACCEL 0
-#define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_MJPEG_CUVID_HWACCEL 0
-#define CONFIG_MPEG1_CUVID_HWACCEL 0
-#define CONFIG_MPEG1_XVMC_HWACCEL 0
-#define CONFIG_MPEG1_VDPAU_HWACCEL 0
-#define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_MPEG2_CUVID_HWACCEL 0
-#define CONFIG_MPEG2_XVMC_HWACCEL 0
-#define CONFIG_MPEG2_D3D11VA_HWACCEL 0
-#define CONFIG_MPEG2_D3D11VA2_HWACCEL 0
-#define CONFIG_MPEG2_DXVA2_HWACCEL 0
-#define CONFIG_MPEG2_MMAL_HWACCEL 0
-#define CONFIG_MPEG2_QSV_HWACCEL 0
-#define CONFIG_MPEG2_VAAPI_HWACCEL 0
-#define CONFIG_MPEG2_VDPAU_HWACCEL 0
-#define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_MPEG2_MEDIACODEC_HWACCEL 0
-#define CONFIG_MPEG4_CUVID_HWACCEL 0
-#define CONFIG_MPEG4_MEDIACODEC_HWACCEL 0
-#define CONFIG_MPEG4_MMAL_HWACCEL 0
-#define CONFIG_MPEG4_VAAPI_HWACCEL 0
-#define CONFIG_MPEG4_VDPAU_HWACCEL 0
-#define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_VC1_CUVID_HWACCEL 0
-#define CONFIG_VC1_D3D11VA_HWACCEL 0
-#define CONFIG_VC1_D3D11VA2_HWACCEL 0
-#define CONFIG_VC1_DXVA2_HWACCEL 0
-#define CONFIG_VC1_VAAPI_HWACCEL 0
-#define CONFIG_VC1_VDPAU_HWACCEL 0
-#define CONFIG_VC1_MMAL_HWACCEL 0
-#define CONFIG_VC1_QSV_HWACCEL 0
-#define CONFIG_VP8_CUVID_HWACCEL 0
-#define CONFIG_VP8_MEDIACODEC_HWACCEL 0
-#define CONFIG_VP8_QSV_HWACCEL 0
-#define CONFIG_VP9_CUVID_HWACCEL 0
-#define CONFIG_VP9_D3D11VA_HWACCEL 0
-#define CONFIG_VP9_D3D11VA2_HWACCEL 0
-#define CONFIG_VP9_DXVA2_HWACCEL 0
-#define CONFIG_VP9_MEDIACODEC_HWACCEL 0
-#define CONFIG_VP9_VAAPI_HWACCEL 0
-#define CONFIG_WMV3_D3D11VA_HWACCEL 0
-#define CONFIG_WMV3_D3D11VA2_HWACCEL 0
-#define CONFIG_WMV3_DXVA2_HWACCEL 0
-#define CONFIG_WMV3_VAAPI_HWACCEL 0
-#define CONFIG_WMV3_VDPAU_HWACCEL 0
-#define CONFIG_ALSA_INDEV 0
-#define CONFIG_AVFOUNDATION_INDEV 0
-#define CONFIG_BKTR_INDEV 0
-#define CONFIG_DECKLINK_INDEV 0
-#define CONFIG_LIBNDI_NEWTEK_INDEV 0
-#define CONFIG_DSHOW_INDEV 0
-#define CONFIG_FBDEV_INDEV 0
-#define CONFIG_GDIGRAB_INDEV 0
-#define CONFIG_IEC61883_INDEV 0
-#define CONFIG_JACK_INDEV 0
-#define CONFIG_KMSGRAB_INDEV 0
-#define CONFIG_LAVFI_INDEV 0
-#define CONFIG_OPENAL_INDEV 0
-#define CONFIG_OSS_INDEV 0
-#define CONFIG_PULSE_INDEV 0
-#define CONFIG_SNDIO_INDEV 0
-#define CONFIG_V4L2_INDEV 0
-#define CONFIG_VFWCAP_INDEV 0
-#define CONFIG_XCBGRAB_INDEV 0
-#define CONFIG_LIBCDIO_INDEV 0
-#define CONFIG_LIBDC1394_INDEV 0
+#define CONFIG_AFIFO_FILTER 0
+#define CONFIG_FIFO_FILTER 0
+#define CONFIG_AA_DEMUXER 0
+#define CONFIG_AAC_DEMUXER 0
+#define CONFIG_AC3_DEMUXER 0
+#define CONFIG_ACM_DEMUXER 0
+#define CONFIG_ACT_DEMUXER 0
+#define CONFIG_ADF_DEMUXER 0
+#define CONFIG_ADP_DEMUXER 0
+#define CONFIG_ADS_DEMUXER 0
+#define CONFIG_ADX_DEMUXER 0
+#define CONFIG_AEA_DEMUXER 0
+#define CONFIG_AFC_DEMUXER 0
+#define CONFIG_AIFF_DEMUXER 0
+#define CONFIG_AIX_DEMUXER 0
+#define CONFIG_AMR_DEMUXER 0
+#define CONFIG_AMRNB_DEMUXER 0
+#define CONFIG_AMRWB_DEMUXER 0
+#define CONFIG_ANM_DEMUXER 0
+#define CONFIG_APC_DEMUXER 0
+#define CONFIG_APE_DEMUXER 0
+#define CONFIG_APNG_DEMUXER 0
+#define CONFIG_APTX_DEMUXER 0
+#define CONFIG_APTX_HD_DEMUXER 0
+#define CONFIG_AQTITLE_DEMUXER 0
+#define CONFIG_ASF_DEMUXER 0
+#define CONFIG_ASF_O_DEMUXER 0
+#define CONFIG_ASS_DEMUXER 0
+#define CONFIG_AST_DEMUXER 0
+#define CONFIG_AU_DEMUXER 0
+#define CONFIG_AVI_DEMUXER 0
+#define CONFIG_AVISYNTH_DEMUXER 0
+#define CONFIG_AVR_DEMUXER 0
+#define CONFIG_AVS_DEMUXER 0
+#define CONFIG_AVS2_DEMUXER 0
+#define CONFIG_BETHSOFTVID_DEMUXER 0
+#define CONFIG_BFI_DEMUXER 0
+#define CONFIG_BINTEXT_DEMUXER 0
+#define CONFIG_BINK_DEMUXER 0
+#define CONFIG_BIT_DEMUXER 0
+#define CONFIG_BMV_DEMUXER 0
+#define CONFIG_BFSTM_DEMUXER 0
+#define CONFIG_BRSTM_DEMUXER 0
+#define CONFIG_BOA_DEMUXER 0
+#define CONFIG_C93_DEMUXER 0
+#define CONFIG_CAF_DEMUXER 0
+#define CONFIG_CAVSVIDEO_DEMUXER 0
+#define CONFIG_CDG_DEMUXER 0
+#define CONFIG_CDXL_DEMUXER 0
+#define CONFIG_CINE_DEMUXER 0
+#define CONFIG_CODEC2_DEMUXER 0
+#define CONFIG_CODEC2RAW_DEMUXER 0
+#define CONFIG_CONCAT_DEMUXER 0
+#define CONFIG_DASH_DEMUXER 0
+#define CONFIG_DATA_DEMUXER 0
+#define CONFIG_DAUD_DEMUXER 0
+#define CONFIG_DCSTR_DEMUXER 0
+#define CONFIG_DFA_DEMUXER 0
+#define CONFIG_DIRAC_DEMUXER 0
+#define CONFIG_DNXHD_DEMUXER 0
+#define CONFIG_DSF_DEMUXER 0
+#define CONFIG_DSICIN_DEMUXER 0
+#define CONFIG_DSS_DEMUXER 0
+#define CONFIG_DTS_DEMUXER 0
+#define CONFIG_DTSHD_DEMUXER 0
+#define CONFIG_DV_DEMUXER 0
+#define CONFIG_DVBSUB_DEMUXER 0
+#define CONFIG_DVBTXT_DEMUXER 0
+#define CONFIG_DXA_DEMUXER 0
+#define CONFIG_EA_DEMUXER 0
+#define CONFIG_EA_CDATA_DEMUXER 0
+#define CONFIG_EAC3_DEMUXER 0
+#define CONFIG_EPAF_DEMUXER 0
+#define CONFIG_FFMETADATA_DEMUXER 0
+#define CONFIG_FILMSTRIP_DEMUXER 0
+#define CONFIG_FITS_DEMUXER 0
+#define CONFIG_FLAC_DEMUXER 1
+#define CONFIG_FLIC_DEMUXER 0
+#define CONFIG_FLV_DEMUXER 0
+#define CONFIG_LIVE_FLV_DEMUXER 0
+#define CONFIG_FOURXM_DEMUXER 0
+#define CONFIG_FRM_DEMUXER 0
+#define CONFIG_FSB_DEMUXER 0
+#define CONFIG_G722_DEMUXER 0
+#define CONFIG_G723_1_DEMUXER 0
+#define CONFIG_G726_DEMUXER 0
+#define CONFIG_G726LE_DEMUXER 0
+#define CONFIG_G729_DEMUXER 0
+#define CONFIG_GDV_DEMUXER 0
+#define CONFIG_GENH_DEMUXER 0
+#define CONFIG_GIF_DEMUXER 0
+#define CONFIG_GSM_DEMUXER 0
+#define CONFIG_GXF_DEMUXER 0
+#define CONFIG_H261_DEMUXER 0
+#define CONFIG_H263_DEMUXER 0
+#define CONFIG_H264_DEMUXER 0
+#define CONFIG_HEVC_DEMUXER 0
+#define CONFIG_HLS_DEMUXER 0
+#define CONFIG_HNM_DEMUXER 0
+#define CONFIG_ICO_DEMUXER 0
+#define CONFIG_IDCIN_DEMUXER 0
+#define CONFIG_IDF_DEMUXER 0
+#define CONFIG_IFF_DEMUXER 0
+#define CONFIG_ILBC_DEMUXER 0
+#define CONFIG_IMAGE2_DEMUXER 0
+#define CONFIG_IMAGE2PIPE_DEMUXER 0
+#define CONFIG_IMAGE2_ALIAS_PIX_DEMUXER 0
+#define CONFIG_IMAGE2_BRENDER_PIX_DEMUXER 0
+#define CONFIG_INGENIENT_DEMUXER 0
+#define CONFIG_IPMOVIE_DEMUXER 0
+#define CONFIG_IRCAM_DEMUXER 0
+#define CONFIG_ISS_DEMUXER 0
+#define CONFIG_IV8_DEMUXER 0
+#define CONFIG_IVF_DEMUXER 0
+#define CONFIG_IVR_DEMUXER 0
+#define CONFIG_JACOSUB_DEMUXER 0
+#define CONFIG_JV_DEMUXER 0
+#define CONFIG_LMLM4_DEMUXER 0
+#define CONFIG_LOAS_DEMUXER 0
+#define CONFIG_LRC_DEMUXER 0
+#define CONFIG_LVF_DEMUXER 0
+#define CONFIG_LXF_DEMUXER 0
+#define CONFIG_M4V_DEMUXER 0
+#define CONFIG_MATROSKA_DEMUXER 1
+#define CONFIG_MGSTS_DEMUXER 0
+#define CONFIG_MICRODVD_DEMUXER 0
+#define CONFIG_MJPEG_DEMUXER 0
+#define CONFIG_MJPEG_2000_DEMUXER 0
+#define CONFIG_MLP_DEMUXER 0
+#define CONFIG_MLV_DEMUXER 0
+#define CONFIG_MM_DEMUXER 0
+#define CONFIG_MMF_DEMUXER 0
+#define CONFIG_MOV_DEMUXER 1
+#define CONFIG_MP3_DEMUXER 1
+#define CONFIG_MPC_DEMUXER 0
+#define CONFIG_MPC8_DEMUXER 0
+#define CONFIG_MPEGPS_DEMUXER 0
+#define CONFIG_MPEGTS_DEMUXER 0
+#define CONFIG_MPEGTSRAW_DEMUXER 0
+#define CONFIG_MPEGVIDEO_DEMUXER 0
+#define CONFIG_MPJPEG_DEMUXER 0
+#define CONFIG_MPL2_DEMUXER 0
+#define CONFIG_MPSUB_DEMUXER 0
+#define CONFIG_MSF_DEMUXER 0
+#define CONFIG_MSNWC_TCP_DEMUXER 0
+#define CONFIG_MTAF_DEMUXER 0
+#define CONFIG_MTV_DEMUXER 0
+#define CONFIG_MUSX_DEMUXER 0
+#define CONFIG_MV_DEMUXER 0
+#define CONFIG_MVI_DEMUXER 0
+#define CONFIG_MXF_DEMUXER 0
+#define CONFIG_MXG_DEMUXER 0
+#define CONFIG_NC_DEMUXER 0
+#define CONFIG_NISTSPHERE_DEMUXER 0
+#define CONFIG_NSP_DEMUXER 0
+#define CONFIG_NSV_DEMUXER 0
+#define CONFIG_NUT_DEMUXER 0
+#define CONFIG_NUV_DEMUXER 0
+#define CONFIG_OGG_DEMUXER 1
+#define CONFIG_OMA_DEMUXER 0
+#define CONFIG_PAF_DEMUXER 0
+#define CONFIG_PCM_ALAW_DEMUXER 0
+#define CONFIG_PCM_MULAW_DEMUXER 0
+#define CONFIG_PCM_F64BE_DEMUXER 0
+#define CONFIG_PCM_F64LE_DEMUXER 0
+#define CONFIG_PCM_F32BE_DEMUXER 0
+#define CONFIG_PCM_F32LE_DEMUXER 0
+#define CONFIG_PCM_S32BE_DEMUXER 0
+#define CONFIG_PCM_S32LE_DEMUXER 0
+#define CONFIG_PCM_S24BE_DEMUXER 0
+#define CONFIG_PCM_S24LE_DEMUXER 0
+#define CONFIG_PCM_S16BE_DEMUXER 0
+#define CONFIG_PCM_S16LE_DEMUXER 0
+#define CONFIG_PCM_S8_DEMUXER 0
+#define CONFIG_PCM_U32BE_DEMUXER 0
+#define CONFIG_PCM_U32LE_DEMUXER 0
+#define CONFIG_PCM_U24BE_DEMUXER 0
+#define CONFIG_PCM_U24LE_DEMUXER 0
+#define CONFIG_PCM_U16BE_DEMUXER 0
+#define CONFIG_PCM_U16LE_DEMUXER 0
+#define CONFIG_PCM_U8_DEMUXER 0
+#define CONFIG_PJS_DEMUXER 0
+#define CONFIG_PMP_DEMUXER 0
+#define CONFIG_PVA_DEMUXER 0
+#define CONFIG_PVF_DEMUXER 0
+#define CONFIG_QCP_DEMUXER 0
+#define CONFIG_R3D_DEMUXER 0
+#define CONFIG_RAWVIDEO_DEMUXER 0
+#define CONFIG_REALTEXT_DEMUXER 0
+#define CONFIG_REDSPARK_DEMUXER 0
+#define CONFIG_RL2_DEMUXER 0
+#define CONFIG_RM_DEMUXER 0
+#define CONFIG_ROQ_DEMUXER 0
+#define CONFIG_RPL_DEMUXER 0
+#define CONFIG_RSD_DEMUXER 0
+#define CONFIG_RSO_DEMUXER 0
+#define CONFIG_RTP_DEMUXER 0
+#define CONFIG_RTSP_DEMUXER 0
+#define CONFIG_S337M_DEMUXER 0
+#define CONFIG_SAMI_DEMUXER 0
+#define CONFIG_SAP_DEMUXER 0
+#define CONFIG_SBC_DEMUXER 0
+#define CONFIG_SBG_DEMUXER 0
+#define CONFIG_SCC_DEMUXER 0
+#define CONFIG_SDP_DEMUXER 0
+#define CONFIG_SDR2_DEMUXER 0
+#define CONFIG_SDS_DEMUXER 0
+#define CONFIG_SDX_DEMUXER 0
+#define CONFIG_SEGAFILM_DEMUXER 0
+#define CONFIG_SER_DEMUXER 0
+#define CONFIG_SHORTEN_DEMUXER 0
+#define CONFIG_SIFF_DEMUXER 0
+#define CONFIG_SLN_DEMUXER 0
+#define CONFIG_SMACKER_DEMUXER 0
+#define CONFIG_SMJPEG_DEMUXER 0
+#define CONFIG_SMUSH_DEMUXER 0
+#define CONFIG_SOL_DEMUXER 0
+#define CONFIG_SOX_DEMUXER 0
+#define CONFIG_SPDIF_DEMUXER 0
+#define CONFIG_SRT_DEMUXER 0
+#define CONFIG_STR_DEMUXER 0
+#define CONFIG_STL_DEMUXER 0
+#define CONFIG_SUBVIEWER1_DEMUXER 0
+#define CONFIG_SUBVIEWER_DEMUXER 0
+#define CONFIG_SUP_DEMUXER 0
+#define CONFIG_SVAG_DEMUXER 0
+#define CONFIG_SWF_DEMUXER 0
+#define CONFIG_TAK_DEMUXER 0
+#define CONFIG_TEDCAPTIONS_DEMUXER 0
+#define CONFIG_THP_DEMUXER 0
+#define CONFIG_THREEDOSTR_DEMUXER 0
+#define CONFIG_TIERTEXSEQ_DEMUXER 0
+#define CONFIG_TMV_DEMUXER 0
+#define CONFIG_TRUEHD_DEMUXER 0
+#define CONFIG_TTA_DEMUXER 0
+#define CONFIG_TXD_DEMUXER 0
+#define CONFIG_TTY_DEMUXER 0
+#define CONFIG_TY_DEMUXER 0
+#define CONFIG_V210_DEMUXER 0
+#define CONFIG_V210X_DEMUXER 0
+#define CONFIG_VAG_DEMUXER 0
+#define CONFIG_VC1_DEMUXER 0
+#define CONFIG_VC1T_DEMUXER 0
+#define CONFIG_VIVO_DEMUXER 0
+#define CONFIG_VMD_DEMUXER 0
+#define CONFIG_VOBSUB_DEMUXER 0
+#define CONFIG_VOC_DEMUXER 0
+#define CONFIG_VPK_DEMUXER 0
+#define CONFIG_VPLAYER_DEMUXER 0
+#define CONFIG_VQF_DEMUXER 0
+#define CONFIG_W64_DEMUXER 0
+#define CONFIG_WAV_DEMUXER 1
+#define CONFIG_WC3_DEMUXER 0
+#define CONFIG_WEBM_DASH_MANIFEST_DEMUXER 0
+#define CONFIG_WEBVTT_DEMUXER 0
+#define CONFIG_WSAUD_DEMUXER 0
+#define CONFIG_WSD_DEMUXER 0
+#define CONFIG_WSVQA_DEMUXER 0
+#define CONFIG_WTV_DEMUXER 0
+#define CONFIG_WVE_DEMUXER 0
+#define CONFIG_WV_DEMUXER 0
+#define CONFIG_XA_DEMUXER 0
+#define CONFIG_XBIN_DEMUXER 0
+#define CONFIG_XMV_DEMUXER 0
+#define CONFIG_XVAG_DEMUXER 0
+#define CONFIG_XWMA_DEMUXER 0
+#define CONFIG_YOP_DEMUXER 0
+#define CONFIG_YUV4MPEGPIPE_DEMUXER 0
+#define CONFIG_IMAGE_BMP_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_DDS_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_DPX_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_EXR_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_J2K_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_JPEG_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_JPEGLS_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PAM_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PBM_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PCX_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PGMYUV_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PGM_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PICTOR_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PNG_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PPM_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PSD_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_QDRAW_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_SGI_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_SVG_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_SUNRAST_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_TIFF_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_WEBP_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_XPM_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_XWD_PIPE_DEMUXER 0
+#define CONFIG_LIBGME_DEMUXER 0
+#define CONFIG_LIBMODPLUG_DEMUXER 0
+#define CONFIG_LIBOPENMPT_DEMUXER 0
+#define CONFIG_VAPOURSYNTH_DEMUXER 0
 #define CONFIG_A64_MUXER 0
 #define CONFIG_AC3_MUXER 0
 #define CONFIG_ADTS_MUXER 0
@@ -2106,6 +2252,8 @@
 #define CONFIG_AIFF_MUXER 0
 #define CONFIG_AMR_MUXER 0
 #define CONFIG_APNG_MUXER 0
+#define CONFIG_APTX_MUXER 0
+#define CONFIG_APTX_HD_MUXER 0
 #define CONFIG_ASF_MUXER 0
 #define CONFIG_ASS_MUXER 0
 #define CONFIG_AST_MUXER 0
@@ -2113,9 +2261,12 @@
 #define CONFIG_AU_MUXER 0
 #define CONFIG_AVI_MUXER 0
 #define CONFIG_AVM2_MUXER 0
+#define CONFIG_AVS2_MUXER 0
 #define CONFIG_BIT_MUXER 0
 #define CONFIG_CAF_MUXER 0
 #define CONFIG_CAVSVIDEO_MUXER 0
+#define CONFIG_CODEC2_MUXER 0
+#define CONFIG_CODEC2RAW_MUXER 0
 #define CONFIG_CRC_MUXER 0
 #define CONFIG_DASH_MUXER 0
 #define CONFIG_DATA_MUXER 0
@@ -2126,9 +2277,9 @@
 #define CONFIG_DV_MUXER 0
 #define CONFIG_EAC3_MUXER 0
 #define CONFIG_F4V_MUXER 0
-#define CONFIG_FFM_MUXER 0
 #define CONFIG_FFMETADATA_MUXER 0
 #define CONFIG_FIFO_MUXER 0
+#define CONFIG_FIFO_TEST_MUXER 0
 #define CONFIG_FILMSTRIP_MUXER 0
 #define CONFIG_FITS_MUXER 0
 #define CONFIG_FLAC_MUXER 0
@@ -2221,7 +2372,9 @@
 #define CONFIG_RTP_MPEGTS_MUXER 0
 #define CONFIG_RTSP_MUXER 0
 #define CONFIG_SAP_MUXER 0
+#define CONFIG_SBC_MUXER 0
 #define CONFIG_SCC_MUXER 0
+#define CONFIG_SEGAFILM_MUXER 0
 #define CONFIG_SEGMENT_MUXER 0
 #define CONFIG_STREAM_SEGMENT_MUXER 0
 #define CONFIG_SINGLEJPEG_MUXER 0
@@ -2254,58 +2407,6 @@
 #define CONFIG_WV_MUXER 0
 #define CONFIG_YUV4MPEGPIPE_MUXER 0
 #define CONFIG_CHROMAPRINT_MUXER 0
-#define CONFIG_ALSA_OUTDEV 0
-#define CONFIG_CACA_OUTDEV 0
-#define CONFIG_DECKLINK_OUTDEV 0
-#define CONFIG_LIBNDI_NEWTEK_OUTDEV 0
-#define CONFIG_FBDEV_OUTDEV 0
-#define CONFIG_OPENGL_OUTDEV 0
-#define CONFIG_OSS_OUTDEV 0
-#define CONFIG_PULSE_OUTDEV 0
-#define CONFIG_SDL2_OUTDEV 0
-#define CONFIG_SNDIO_OUTDEV 0
-#define CONFIG_V4L2_OUTDEV 0
-#define CONFIG_XV_OUTDEV 0
-#define CONFIG_AAC_PARSER 0
-#define CONFIG_AAC_LATM_PARSER 0
-#define CONFIG_AC3_PARSER 0
-#define CONFIG_ADX_PARSER 0
-#define CONFIG_BMP_PARSER 0
-#define CONFIG_CAVSVIDEO_PARSER 0
-#define CONFIG_COOK_PARSER 0
-#define CONFIG_DCA_PARSER 0
-#define CONFIG_DIRAC_PARSER 0
-#define CONFIG_DNXHD_PARSER 0
-#define CONFIG_DPX_PARSER 0
-#define CONFIG_DVAUDIO_PARSER 0
-#define CONFIG_DVBSUB_PARSER 0
-#define CONFIG_DVDSUB_PARSER 0
-#define CONFIG_DVD_NAV_PARSER 0
-#define CONFIG_FLAC_PARSER 1
-#define CONFIG_G729_PARSER 0
-#define CONFIG_GSM_PARSER 0
-#define CONFIG_H261_PARSER 0
-#define CONFIG_H263_PARSER 0
-#define CONFIG_H264_PARSER 0
-#define CONFIG_HEVC_PARSER 0
-#define CONFIG_MJPEG_PARSER 0
-#define CONFIG_MLP_PARSER 0
-#define CONFIG_MPEG4VIDEO_PARSER 0
-#define CONFIG_MPEGAUDIO_PARSER 0
-#define CONFIG_MPEGVIDEO_PARSER 0
-#define CONFIG_OPUS_PARSER 1
-#define CONFIG_PNG_PARSER 0
-#define CONFIG_PNM_PARSER 0
-#define CONFIG_RV30_PARSER 0
-#define CONFIG_RV40_PARSER 0
-#define CONFIG_SIPR_PARSER 0
-#define CONFIG_TAK_PARSER 0
-#define CONFIG_VC1_PARSER 0
-#define CONFIG_VORBIS_PARSER 1
-#define CONFIG_VP3_PARSER 1
-#define CONFIG_VP8_PARSER 1
-#define CONFIG_VP9_PARSER 0
-#define CONFIG_XMA_PARSER 0
 #define CONFIG_ASYNC_PROTOCOL 0
 #define CONFIG_BLURAY_PROTOCOL 0
 #define CONFIG_CACHE_PROTOCOL 0
@@ -2339,10 +2440,7 @@
 #define CONFIG_SUBFILE_PROTOCOL 0
 #define CONFIG_TEE_PROTOCOL 0
 #define CONFIG_TCP_PROTOCOL 0
-#define CONFIG_TLS_GNUTLS_PROTOCOL 0
-#define CONFIG_TLS_SCHANNEL_PROTOCOL 0
-#define CONFIG_TLS_SECURETRANSPORT_PROTOCOL 0
-#define CONFIG_TLS_OPENSSL_PROTOCOL 0
+#define CONFIG_TLS_PROTOCOL 0
 #define CONFIG_UDP_PROTOCOL 0
 #define CONFIG_UDPLITE_PROTOCOL 0
 #define CONFIG_UNIX_PROTOCOL 0
@@ -2351,6 +2449,7 @@
 #define CONFIG_LIBRTMPS_PROTOCOL 0
 #define CONFIG_LIBRTMPT_PROTOCOL 0
 #define CONFIG_LIBRTMPTE_PROTOCOL 0
+#define CONFIG_LIBSRT_PROTOCOL 0
 #define CONFIG_LIBSSH_PROTOCOL 0
 #define CONFIG_LIBSMBCLIENT_PROTOCOL 0
 #endif /* FFMPEG_CONFIG_H */

diff --git a/fuchsia/config/default/arm64/libavutil/avconfig.h b/fuchsia/config/default/arm64/libavutil/avconfig.h
index f10aa61..c289fbb 100644
--- a/fuchsia/config/default/arm64/libavutil/avconfig.h
+++ b/fuchsia/config/default/arm64/libavutil/avconfig.h

@@ -1,4 +1,4 @@
-/* Generated by ffconf */
+/* Generated by ffmpeg configure */
 #ifndef AVUTIL_AVCONFIG_H
 #define AVUTIL_AVCONFIG_H
 #define AV_HAVE_BIGENDIAN 0

diff --git a/fuchsia/config/default/arm64/libavutil/ffversion.h b/fuchsia/config/default/arm64/libavutil/ffversion.h
index 0342cbb..b97c110 100644
--- a/fuchsia/config/default/arm64/libavutil/ffversion.h
+++ b/fuchsia/config/default/arm64/libavutil/ffversion.h

@@ -1,5 +1,5 @@
 /* Automatically generated by version.sh, do not manually edit! */
 #ifndef AVUTIL_FFVERSION_H
 #define AVUTIL_FFVERSION_H
-#define FFMPEG_VERSION "n3.4.2-1-g67e0ba6f22"
+#define FFMPEG_VERSION "N-92356-g750018e43a"
 #endif /* AVUTIL_FFVERSION_H */

diff --git a/fuchsia/config/default/x64/config.asm b/fuchsia/config/default/x64/config.asm
index 89f2d90..5a46da1 100644
--- a/fuchsia/config/default/x64/config.asm
+++ b/fuchsia/config/default/x64/config.asm

@@ -1,3 +1,4 @@
+; Automatically generated by configure - do not modify!
 %define ARCH_AARCH64 0
 %define ARCH_ALPHA 0
 %define ARCH_ARM 0
@@ -41,6 +42,7 @@
 %define HAVE_AMD3DNOWEXT 1
 %define HAVE_AVX 1
 %define HAVE_AVX2 1
+%define HAVE_AVX512 1
 %define HAVE_FMA3 1
 %define HAVE_FMA4 1
 %define HAVE_MMX 1
@@ -85,6 +87,7 @@
 %define HAVE_AMD3DNOWEXT_EXTERNAL 1
 %define HAVE_AVX_EXTERNAL 1
 %define HAVE_AVX2_EXTERNAL 1
+%define HAVE_AVX512_EXTERNAL 0
 %define HAVE_FMA3_EXTERNAL 1
 %define HAVE_FMA4_EXTERNAL 1
 %define HAVE_MMX_EXTERNAL 1
@@ -129,6 +132,7 @@
 %define HAVE_AMD3DNOWEXT_INLINE 1
 %define HAVE_AVX_INLINE 1
 %define HAVE_AVX2_INLINE 1
+%define HAVE_AVX512_INLINE 1
 %define HAVE_FMA3_INLINE 1
 %define HAVE_FMA4_INLINE 1
 %define HAVE_MMX_INLINE 1
@@ -158,36 +162,29 @@
 %define HAVE_FAST_64BIT 1
 %define HAVE_FAST_CLZ 1
 %define HAVE_FAST_CMOV 1
-%define HAVE_LOCAL_ALIGNED_8 1
-%define HAVE_LOCAL_ALIGNED_16 1
-%define HAVE_LOCAL_ALIGNED_32 1
+%define HAVE_LOCAL_ALIGNED 1
 %define HAVE_SIMD_ALIGN_16 1
 %define HAVE_SIMD_ALIGN_32 1
-%define HAVE_ATOMICS_GCC 1
-%define HAVE_ATOMICS_SUNCC 0
-%define HAVE_ATOMICS_WIN32 0
+%define HAVE_SIMD_ALIGN_64 1
 %define HAVE_ATOMIC_CAS_PTR 0
 %define HAVE_MACHINE_RW_BARRIER 0
 %define HAVE_MEMORYBARRIER 0
 %define HAVE_MM_EMPTY 1
 %define HAVE_RDTSC 0
-%define HAVE_SARESTART 1
 %define HAVE_SEM_TIMEDWAIT 1
 %define HAVE_SYNC_VAL_COMPARE_AND_SWAP 1
-%define HAVE_CABS 1
-%define HAVE_CEXP 1
+%define HAVE_CABS 0
+%define HAVE_CEXP 0
 %define HAVE_INLINE_ASM 1
 %define HAVE_SYMVER 0
 %define HAVE_X86ASM 1
 %define HAVE_BIGENDIAN 0
 %define HAVE_FAST_UNALIGNED 1
-%define HAVE_ALTIVEC_H 0
 %define HAVE_ARPA_INET_H 0
 %define HAVE_ASM_TYPES_H 1
 %define HAVE_CDIO_PARANOIA_H 0
 %define HAVE_CDIO_PARANOIA_PARANOIA_H 0
 %define HAVE_CUDA_H 0
-%define HAVE_D3D11_H 0
 %define HAVE_DISPATCH_DISPATCH_H 0
 %define HAVE_DEV_BKTR_IOCTL_BT848_H 0
 %define HAVE_DEV_BKTR_IOCTL_METEOR_H 0
@@ -196,27 +193,18 @@
 %define HAVE_DEV_VIDEO_METEOR_IOCTL_METEOR_H 0
 %define HAVE_DIRECT_H 0
 %define HAVE_DIRENT_H 1
-%define HAVE_DLFCN_H 1
 %define HAVE_DXGIDEBUG_H 0
 %define HAVE_DXVA_H 0
 %define HAVE_ES2_GL_H 0
 %define HAVE_GSM_H 0
 %define HAVE_IO_H 0
-%define HAVE_MACH_MACH_TIME_H 0
+%define HAVE_LINUX_PERF_EVENT_H 1
 %define HAVE_MACHINE_IOCTL_BT848_H 0
 %define HAVE_MACHINE_IOCTL_METEOR_H 0
 %define HAVE_MALLOC_H 1
 %define HAVE_OPENCV2_CORE_CORE_C_H 0
-%define HAVE_OPENJPEG_2_3_OPENJPEG_H 0
-%define HAVE_OPENJPEG_2_2_OPENJPEG_H 0
-%define HAVE_OPENJPEG_2_1_OPENJPEG_H 0
-%define HAVE_OPENJPEG_2_0_OPENJPEG_H 0
-%define HAVE_OPENJPEG_1_5_OPENJPEG_H 0
 %define HAVE_OPENGL_GL3_H 0
 %define HAVE_POLL_H 1
-%define HAVE_SOUNDCARD_H 0
-%define HAVE_STDATOMIC_H 1
-%define HAVE_SYS_MMAN_H 1
 %define HAVE_SYS_PARAM_H 1
 %define HAVE_SYS_RESOURCE_H 1
 %define HAVE_SYS_SELECT_H 1
@@ -260,17 +248,20 @@
 %define HAVE_SINF 1
 %define HAVE_TRUNC 1
 %define HAVE_TRUNCF 1
+%define HAVE_DOS_PATHS 0
+%define HAVE_LIBC_MSVCRT 0
+%define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
+%define HAVE_SECTION_DATA_REL_RO 1
+%define HAVE_THREADS 1
+%define HAVE_UWP 0
+%define HAVE_WINRT 0
 %define HAVE_ACCESS 1
 %define HAVE_ALIGNED_MALLOC 0
 %define HAVE_ARC4RANDOM 0
 %define HAVE_CLOCK_GETTIME 1
 %define HAVE_CLOSESOCKET 0
 %define HAVE_COMMANDLINETOARGVW 0
-%define HAVE_COTASKMEMFREE 0
-%define HAVE_CRYPTGENRANDOM 0
 %define HAVE_FCNTL 1
-%define HAVE_FLT_LIM 1
-%define HAVE_FORK 1
 %define HAVE_GETADDRINFO 0
 %define HAVE_GETHRTIME 0
 %define HAVE_GETOPT 1
@@ -285,9 +276,7 @@
 %define HAVE_GMTIME_R 1
 %define HAVE_INET_ATON 0
 %define HAVE_ISATTY 1
-%define HAVE_JACK_PORT_GET_LATENCY_RANGE 0
 %define HAVE_KBHIT 0
-%define HAVE_LOADLIBRARY 0
 %define HAVE_LOCALTIME_R 1
 %define HAVE_LSTAT 1
 %define HAVE_LZO1X_999_COMPRESS 0
@@ -302,6 +291,7 @@
 %define HAVE_POSIX_MEMALIGN 1
 %define HAVE_PTHREAD_CANCEL 1
 %define HAVE_SCHED_GETAFFINITY 1
+%define HAVE_SECITEMIMPORT 0
 %define HAVE_SETCONSOLETEXTATTRIBUTE 0
 %define HAVE_SETCONSOLECTRLHANDLER 0
 %define HAVE_SETMODE 0
@@ -314,16 +304,19 @@
 %define HAVE_UTGETOSTYPEFROMSTRING 0
 %define HAVE_VIRTUALALLOC 0
 %define HAVE_WGLGETPROCADDRESS 0
+%define HAVE_BCRYPT 0
+%define HAVE_VAAPI_DRM 0
+%define HAVE_VAAPI_X11 0
+%define HAVE_VDPAU_X11 0
 %define HAVE_PTHREADS 1
 %define HAVE_OS2THREADS 0
 %define HAVE_W32THREADS 0
+%define HAVE_AS_ARCH_DIRECTIVE 0
 %define HAVE_AS_DN_DIRECTIVE 0
 %define HAVE_AS_FPU_DIRECTIVE 0
 %define HAVE_AS_FUNC 0
 %define HAVE_AS_OBJECT_ARCH 0
 %define HAVE_ASM_MOD_Q 0
-%define HAVE_ATTRIBUTE_MAY_ALIAS 1
-%define HAVE_ATTRIBUTE_PACKED 1
 %define HAVE_BLOCKS_EXTENSION 0
 %define HAVE_EBP_AVAILABLE 1
 %define HAVE_EBX_AVAILABLE 1
@@ -340,7 +333,6 @@
 %define HAVE_VFP_ARGS 0
 %define HAVE_XFORM_ASM 0
 %define HAVE_XMM_CLOBBERS 1
-%define HAVE_CONDITION_VARIABLE_PTR 0
 %define HAVE_KCMVIDEOCODECTYPE_HEVC 0
 %define HAVE_SOCKLEN_T 0
 %define HAVE_STRUCT_ADDRINFO 0
@@ -356,22 +348,17 @@
 %define HAVE_STRUCT_SOCKADDR_STORAGE 0
 %define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1
 %define HAVE_STRUCT_V4L2_FRMIVALENUM_DISCRETE 1
-%define HAVE_ATOMICS_NATIVE 1
-%define HAVE_DOS_PATHS 0
-%define HAVE_LIBC_MSVCRT 0
 %define HAVE_MAKEINFO 1
 %define HAVE_MAKEINFO_HTML 1
-%define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
+%define HAVE_OPENCL_D3D11 0
+%define HAVE_OPENCL_DRM_ARM 0
+%define HAVE_OPENCL_DRM_BEIGNET 0
+%define HAVE_OPENCL_DXVA2 0
+%define HAVE_OPENCL_VAAPI_BEIGNET 0
+%define HAVE_OPENCL_VAAPI_INTEL_MEDIA 0
 %define HAVE_PERL 1
 %define HAVE_POD2MAN 1
-%define HAVE_SECTION_DATA_REL_RO 1
 %define HAVE_TEXI2HTML 0
-%define HAVE_THREADS 1
-%define HAVE_UWP 0
-%define HAVE_VAAPI_DRM 0
-%define HAVE_VAAPI_X11 0
-%define HAVE_VDPAU_X11 0
-%define HAVE_WINRT 0
 %define CONFIG_DOC 0
 %define CONFIG_HTMLPAGES 0
 %define CONFIG_MANPAGES 0
@@ -398,41 +385,31 @@
 %define CONFIG_SCALING_VIDEO_EXAMPLE 0
 %define CONFIG_TRANSCODE_AAC_EXAMPLE 0
 %define CONFIG_TRANSCODING_EXAMPLE 0
-%define CONFIG_ALSA 0
-%define CONFIG_APPKIT 0
-%define CONFIG_AVFOUNDATION 0
-%define CONFIG_BZLIB 0
-%define CONFIG_COREIMAGE 0
-%define CONFIG_ICONV 0
-%define CONFIG_JACK 0
-%define CONFIG_LIBXCB 1
-%define CONFIG_LIBXCB_SHM 1
-%define CONFIG_LIBXCB_SHAPE 0
-%define CONFIG_LIBXCB_XFIXES 0
-%define CONFIG_LZMA 1
-%define CONFIG_SCHANNEL 0
-%define CONFIG_SDL2 0
-%define CONFIG_SECURETRANSPORT 0
-%define CONFIG_SNDIO 0
-%define CONFIG_XLIB 0
-%define CONFIG_ZLIB 0
+%define CONFIG_VAAPI_ENCODE_EXAMPLE 0
+%define CONFIG_VAAPI_TRANSCODE_EXAMPLE 0
 %define CONFIG_AVISYNTH 0
 %define CONFIG_FREI0R 0
 %define CONFIG_LIBCDIO 0
+%define CONFIG_LIBDAVS2 0
 %define CONFIG_LIBRUBBERBAND 0
 %define CONFIG_LIBVIDSTAB 0
 %define CONFIG_LIBX264 0
 %define CONFIG_LIBX265 0
 %define CONFIG_LIBXAVS 0
+%define CONFIG_LIBXAVS2 0
 %define CONFIG_LIBXVID 0
 %define CONFIG_DECKLINK 0
 %define CONFIG_LIBNDI_NEWTEK 0
 %define CONFIG_LIBFDK_AAC 0
 %define CONFIG_OPENSSL 0
+%define CONFIG_LIBTLS 0
 %define CONFIG_GMP 0
+%define CONFIG_LIBLENSFUN 0
 %define CONFIG_LIBOPENCORE_AMRNB 0
 %define CONFIG_LIBOPENCORE_AMRWB 0
+%define CONFIG_LIBVMAF 0
 %define CONFIG_LIBVO_AMRWBENC 0
+%define CONFIG_MBEDTLS 0
 %define CONFIG_RKMPP 0
 %define CONFIG_LIBSMBCLIENT 0
 %define CONFIG_CHROMAPRINT 0
@@ -440,11 +417,13 @@
 %define CONFIG_GNUTLS 0
 %define CONFIG_JNI 0
 %define CONFIG_LADSPA 0
+%define CONFIG_LIBAOM 0
 %define CONFIG_LIBASS 0
 %define CONFIG_LIBBLURAY 0
 %define CONFIG_LIBBS2B 0
 %define CONFIG_LIBCACA 0
 %define CONFIG_LIBCELT 0
+%define CONFIG_LIBCODEC2 0
 %define CONFIG_LIBDC1394 0
 %define CONFIG_LIBDRM 0
 %define CONFIG_LIBFLITE 0
@@ -455,6 +434,8 @@
 %define CONFIG_LIBGSM 0
 %define CONFIG_LIBIEC61883 0
 %define CONFIG_LIBILBC 0
+%define CONFIG_LIBJACK 0
+%define CONFIG_LIBKLVANC 0
 %define CONFIG_LIBKVAZAAR 0
 %define CONFIG_LIBMODPLUG 0
 %define CONFIG_LIBMP3LAME 0
@@ -472,12 +453,13 @@
 %define CONFIG_LIBSNAPPY 0
 %define CONFIG_LIBSOXR 0
 %define CONFIG_LIBSPEEX 0
+%define CONFIG_LIBSRT 0
 %define CONFIG_LIBSSH 0
+%define CONFIG_LIBTENSORFLOW 0
 %define CONFIG_LIBTESSERACT 0
 %define CONFIG_LIBTHEORA 0
 %define CONFIG_LIBTWOLAME 0
 %define CONFIG_LIBV4L2 0
-%define CONFIG_LIBVMAF 0
 %define CONFIG_LIBVORBIS 0
 %define CONFIG_LIBVPX 0
 %define CONFIG_LIBWAVPACK 0
@@ -486,28 +468,49 @@
 %define CONFIG_LIBZIMG 0
 %define CONFIG_LIBZMQ 0
 %define CONFIG_LIBZVBI 0
+%define CONFIG_LV2 0
 %define CONFIG_MEDIACODEC 0
 %define CONFIG_OPENAL 0
-%define CONFIG_OPENCL 0
 %define CONFIG_OPENGL 0
+%define CONFIG_VAPOURSYNTH 0
+%define CONFIG_ALSA 1
+%define CONFIG_APPKIT 0
+%define CONFIG_AVFOUNDATION 0
+%define CONFIG_BZLIB 0
+%define CONFIG_COREIMAGE 0
+%define CONFIG_ICONV 0
+%define CONFIG_LIBXCB 0
+%define CONFIG_LIBXCB_SHM 0
+%define CONFIG_LIBXCB_SHAPE 0
+%define CONFIG_LIBXCB_XFIXES 0
+%define CONFIG_LZMA 0
+%define CONFIG_SCHANNEL 0
+%define CONFIG_SDL2 0
+%define CONFIG_SECURETRANSPORT 0
+%define CONFIG_SNDIO 0
+%define CONFIG_XLIB 0
+%define CONFIG_ZLIB 0
+%define CONFIG_CUDA_SDK 0
+%define CONFIG_LIBNPP 0
+%define CONFIG_LIBMFX 0
+%define CONFIG_MMAL 0
+%define CONFIG_OMX 0
+%define CONFIG_OPENCL 0
+%define CONFIG_AMF 0
 %define CONFIG_AUDIOTOOLBOX 0
 %define CONFIG_CRYSTALHD 0
 %define CONFIG_CUDA 0
 %define CONFIG_CUVID 0
 %define CONFIG_D3D11VA 0
 %define CONFIG_DXVA2 0
+%define CONFIG_FFNVCODEC 0
+%define CONFIG_NVDEC 0
 %define CONFIG_NVENC 0
 %define CONFIG_VAAPI 0
-%define CONFIG_VDA 0
 %define CONFIG_VDPAU 0
 %define CONFIG_VIDEOTOOLBOX 0
 %define CONFIG_V4L2_M2M 0
 %define CONFIG_XVMC 0
-%define CONFIG_CUDA_SDK 0
-%define CONFIG_LIBNPP 0
-%define CONFIG_LIBMFX 0
-%define CONFIG_MMAL 0
-%define CONFIG_OMX 0
 %define CONFIG_FTRAPV 0
 %define CONFIG_GRAY 0
 %define CONFIG_HARDCODED_TABLES 0
@@ -521,20 +524,19 @@
 %define CONFIG_GPL 0
 %define CONFIG_NONFREE 0
 %define CONFIG_VERSION3 0
-%define CONFIG_AVCODEC 1
 %define CONFIG_AVDEVICE 0
 %define CONFIG_AVFILTER 0
+%define CONFIG_SWSCALE 0
+%define CONFIG_POSTPROC 0
 %define CONFIG_AVFORMAT 1
+%define CONFIG_AVCODEC 1
+%define CONFIG_SWRESAMPLE 0
 %define CONFIG_AVRESAMPLE 0
 %define CONFIG_AVUTIL 1
-%define CONFIG_POSTPROC 0
-%define CONFIG_SWRESAMPLE 0
-%define CONFIG_SWSCALE 0
 %define CONFIG_FFPLAY 0
 %define CONFIG_FFPROBE 0
-%define CONFIG_FFSERVER 0
 %define CONFIG_FFMPEG 0
-%define CONFIG_DCT 0
+%define CONFIG_DCT 1
 %define CONFIG_DWT 0
 %define CONFIG_ERROR_RESILIENCE 0
 %define CONFIG_FAAN 0
@@ -569,12 +571,21 @@
 %define CONFIG_PROTOCOLS 0
 %define CONFIG_AANDCTTABLES 0
 %define CONFIG_AC3DSP 0
-%define CONFIG_AUDIO_FRAME_QUEUE 0
+%define CONFIG_ADTS_HEADER 0
+%define CONFIG_AUDIO_FRAME_QUEUE 1
 %define CONFIG_AUDIODSP 0
 %define CONFIG_BLOCKDSP 0
 %define CONFIG_BSWAPDSP 0
 %define CONFIG_CABAC 0
+%define CONFIG_CBS 0
+%define CONFIG_CBS_AV1 0
+%define CONFIG_CBS_H264 0
+%define CONFIG_CBS_H265 0
+%define CONFIG_CBS_JPEG 0
+%define CONFIG_CBS_MPEG2 0
+%define CONFIG_CBS_VP9 0
 %define CONFIG_DIRAC_PARSE 1
+%define CONFIG_DNN 0
 %define CONFIG_DVPROFILE 0
 %define CONFIG_EXIF 0
 %define CONFIG_FAANDCT 0
@@ -613,9 +624,9 @@
 %define CONFIG_LZF 0
 %define CONFIG_ME_CMP 0
 %define CONFIG_MPEG_ER 0
-%define CONFIG_MPEGAUDIO 0
-%define CONFIG_MPEGAUDIODSP 0
-%define CONFIG_MPEGAUDIOHEADER 0
+%define CONFIG_MPEGAUDIO 1
+%define CONFIG_MPEGAUDIODSP 1
+%define CONFIG_MPEGAUDIOHEADER 1
 %define CONFIG_MPEGVIDEO 0
 %define CONFIG_MPEGVIDEOENC 0
 %define CONFIG_MSS34DSP 0
@@ -624,6 +635,7 @@
 %define CONFIG_QSV 0
 %define CONFIG_QSVDEC 0
 %define CONFIG_QSVENC 0
+%define CONFIG_QSVVPP 0
 %define CONFIG_RANGECODER 0
 %define CONFIG_RIFFDEC 1
 %define CONFIG_RIFFENC 0
@@ -647,25 +659,35 @@
 %define CONFIG_WMA_FREQS 0
 %define CONFIG_WMV2DSP 0
 %define CONFIG_AAC_ADTSTOASC_BSF 0
+%define CONFIG_AV1_METADATA_BSF 0
 %define CONFIG_CHOMP_BSF 0
 %define CONFIG_DUMP_EXTRADATA_BSF 0
 %define CONFIG_DCA_CORE_BSF 0
+%define CONFIG_EAC3_CORE_BSF 0
 %define CONFIG_EXTRACT_EXTRADATA_BSF 0
+%define CONFIG_FILTER_UNITS_BSF 0
+%define CONFIG_H264_METADATA_BSF 0
 %define CONFIG_H264_MP4TOANNEXB_BSF 0
+%define CONFIG_H264_REDUNDANT_PPS_BSF 0
+%define CONFIG_HAPQA_EXTRACT_BSF 0
+%define CONFIG_HEVC_METADATA_BSF 0
 %define CONFIG_HEVC_MP4TOANNEXB_BSF 0
 %define CONFIG_IMX_DUMP_HEADER_BSF 0
 %define CONFIG_MJPEG2JPEG_BSF 0
 %define CONFIG_MJPEGA_DUMP_HEADER_BSF 0
 %define CONFIG_MP3_HEADER_DECOMPRESS_BSF 0
+%define CONFIG_MPEG2_METADATA_BSF 0
 %define CONFIG_MPEG4_UNPACK_BFRAMES_BSF 0
 %define CONFIG_MOV2TEXTSUB_BSF 0
 %define CONFIG_NOISE_BSF 0
 %define CONFIG_NULL_BSF 1
 %define CONFIG_REMOVE_EXTRADATA_BSF 0
 %define CONFIG_TEXT2MOVSUB_BSF 0
+%define CONFIG_TRACE_HEADERS_BSF 0
+%define CONFIG_VP9_METADATA_BSF 0
 %define CONFIG_VP9_RAW_REORDER_BSF 0
 %define CONFIG_VP9_SUPERFRAME_BSF 0
-%define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 0
+%define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 1
 %define CONFIG_AASC_DECODER 0
 %define CONFIG_AIC_DECODER 0
 %define CONFIG_ALIAS_PIX_DECODER 0
@@ -685,6 +707,7 @@
 %define CONFIG_BETHSOFTVID_DECODER 0
 %define CONFIG_BFI_DECODER 0
 %define CONFIG_BINK_DECODER 0
+%define CONFIG_BITPACKED_DECODER 0
 %define CONFIG_BMP_DECODER 0
 %define CONFIG_BMV_VIDEO_DECODER 0
 %define CONFIG_BRENDER_PIX_DECODER 0
@@ -750,8 +773,6 @@
 %define CONFIG_H264_MMAL_DECODER 0
 %define CONFIG_H264_QSV_DECODER 0
 %define CONFIG_H264_RKMPP_DECODER 0
-%define CONFIG_H264_VDA_DECODER 0
-%define CONFIG_H264_VDPAU_DECODER 0
 %define CONFIG_HAP_DECODER 0
 %define CONFIG_HEVC_DECODER 0
 %define CONFIG_HEVC_QSV_DECODER 0
@@ -763,6 +784,7 @@
 %define CONFIG_HUFFYUV_DECODER 0
 %define CONFIG_IDCIN_DECODER 0
 %define CONFIG_IFF_ILBM_DECODER 0
+%define CONFIG_IMM4_DECODER 0
 %define CONFIG_INDEO2_DECODER 0
 %define CONFIG_INDEO3_DECODER 0
 %define CONFIG_INDEO4_DECODER 0
@@ -783,17 +805,13 @@
 %define CONFIG_MJPEGB_DECODER 0
 %define CONFIG_MMVIDEO_DECODER 0
 %define CONFIG_MOTIONPIXELS_DECODER 0
-%define CONFIG_MPEG_XVMC_DECODER 0
 %define CONFIG_MPEG1VIDEO_DECODER 0
 %define CONFIG_MPEG2VIDEO_DECODER 0
 %define CONFIG_MPEG4_DECODER 0
 %define CONFIG_MPEG4_CRYSTALHD_DECODER 0
 %define CONFIG_MPEG4_V4L2M2M_DECODER 0
 %define CONFIG_MPEG4_MMAL_DECODER 0
-%define CONFIG_MPEG4_VDPAU_DECODER 0
 %define CONFIG_MPEGVIDEO_DECODER 0
-%define CONFIG_MPEG_VDPAU_DECODER 0
-%define CONFIG_MPEG1_VDPAU_DECODER 0
 %define CONFIG_MPEG1_V4L2M2M_DECODER 0
 %define CONFIG_MPEG2_MMAL_DECODER 0
 %define CONFIG_MPEG2_CRYSTALHD_DECODER 0
@@ -814,6 +832,7 @@
 %define CONFIG_MTS2_DECODER 0
 %define CONFIG_MVC1_DECODER 0
 %define CONFIG_MVC2_DECODER 0
+%define CONFIG_MWSC_DECODER 0
 %define CONFIG_MXPEG_DECODER 0
 %define CONFIG_NUV_DECODER 0
 %define CONFIG_PAF_VIDEO_DECODER 0
@@ -827,7 +846,7 @@
 %define CONFIG_PNG_DECODER 0
 %define CONFIG_PPM_DECODER 0
 %define CONFIG_PRORES_DECODER 0
-%define CONFIG_PRORES_LGPL_DECODER 0
+%define CONFIG_PROSUMER_DECODER 0
 %define CONFIG_PSD_DECODER 0
 %define CONFIG_PTX_DECODER 0
 %define CONFIG_QDRAW_DECODER 0
@@ -835,6 +854,7 @@
 %define CONFIG_QTRLE_DECODER 0
 %define CONFIG_R10K_DECODER 0
 %define CONFIG_R210_DECODER 0
+%define CONFIG_RASC_DECODER 0
 %define CONFIG_RAWVIDEO_DECODER 0
 %define CONFIG_RL2_DECODER 0
 %define CONFIG_ROQ_DECODER 0
@@ -887,7 +907,6 @@
 %define CONFIG_VBLE_DECODER 0
 %define CONFIG_VC1_DECODER 0
 %define CONFIG_VC1_CRYSTALHD_DECODER 0
-%define CONFIG_VC1_VDPAU_DECODER 0
 %define CONFIG_VC1IMAGE_DECODER 0
 %define CONFIG_VC1_MMAL_DECODER 0
 %define CONFIG_VC1_QSV_DECODER 0
@@ -908,14 +927,13 @@
 %define CONFIG_VP9_RKMPP_DECODER 0
 %define CONFIG_VP9_V4L2M2M_DECODER 0
 %define CONFIG_VQA_DECODER 0
-%define CONFIG_BITPACKED_DECODER 0
 %define CONFIG_WEBP_DECODER 0
+%define CONFIG_WCMV_DECODER 0
 %define CONFIG_WRAPPED_AVFRAME_DECODER 0
 %define CONFIG_WMV1_DECODER 0
 %define CONFIG_WMV2_DECODER 0
 %define CONFIG_WMV3_DECODER 0
 %define CONFIG_WMV3_CRYSTALHD_DECODER 0
-%define CONFIG_WMV3_VDPAU_DECODER 0
 %define CONFIG_WMV3IMAGE_DECODER 0
 %define CONFIG_WNV1_DECODER 0
 %define CONFIG_XAN_WC3_DECODER 0
@@ -943,11 +961,14 @@
 %define CONFIG_AMRNB_DECODER 0
 %define CONFIG_AMRWB_DECODER 0
 %define CONFIG_APE_DECODER 0
+%define CONFIG_APTX_DECODER 1
+%define CONFIG_APTX_HD_DECODER 0
 %define CONFIG_ATRAC1_DECODER 0
 %define CONFIG_ATRAC3_DECODER 0
 %define CONFIG_ATRAC3AL_DECODER 0
 %define CONFIG_ATRAC3P_DECODER 0
 %define CONFIG_ATRAC3PAL_DECODER 0
+%define CONFIG_ATRAC9_DECODER 0
 %define CONFIG_BINKAUDIO_DCT_DECODER 0
 %define CONFIG_BINKAUDIO_RDFT_DECODER 0
 %define CONFIG_BMV_AUDIO_DECODER 0
@@ -970,6 +991,7 @@
 %define CONFIG_GSM_DECODER 0
 %define CONFIG_GSM_MS_DECODER 0
 %define CONFIG_IAC_DECODER 0
+%define CONFIG_ILBC_DECODER 0
 %define CONFIG_IMC_DECODER 0
 %define CONFIG_INTERPLAY_ACM_DECODER 0
 %define CONFIG_MACE3_DECODER 0
@@ -980,12 +1002,12 @@
 %define CONFIG_MP1FLOAT_DECODER 0
 %define CONFIG_MP2_DECODER 0
 %define CONFIG_MP2FLOAT_DECODER 0
-%define CONFIG_MP3_DECODER 0
 %define CONFIG_MP3FLOAT_DECODER 0
-%define CONFIG_MP3ADU_DECODER 0
+%define CONFIG_MP3_DECODER 1
 %define CONFIG_MP3ADUFLOAT_DECODER 0
-%define CONFIG_MP3ON4_DECODER 0
+%define CONFIG_MP3ADU_DECODER 0
 %define CONFIG_MP3ON4FLOAT_DECODER 0
+%define CONFIG_MP3ON4_DECODER 0
 %define CONFIG_MPC7_DECODER 0
 %define CONFIG_MPC8_DECODER 0
 %define CONFIG_NELLYMOSER_DECODER 0
@@ -998,6 +1020,7 @@
 %define CONFIG_RA_144_DECODER 0
 %define CONFIG_RA_288_DECODER 0
 %define CONFIG_RALF_DECODER 0
+%define CONFIG_SBC_DECODER 1
 %define CONFIG_SHORTEN_DECODER 0
 %define CONFIG_SIPR_DECODER 0
 %define CONFIG_SMACKAUD_DECODER 0
@@ -1135,7 +1158,10 @@
 %define CONFIG_PCM_MULAW_AT_DECODER 0
 %define CONFIG_QDMC_AT_DECODER 0
 %define CONFIG_QDM2_AT_DECODER 0
+%define CONFIG_LIBAOM_AV1_DECODER 0
 %define CONFIG_LIBCELT_DECODER 0
+%define CONFIG_LIBCODEC2_DECODER 0
+%define CONFIG_LIBDAVS2_DECODER 0
 %define CONFIG_LIBFDK_AAC_DECODER 0
 %define CONFIG_LIBGSM_DECODER 0
 %define CONFIG_LIBGSM_MS_DECODER 0
@@ -1168,288 +1194,6 @@
 %define CONFIG_VP8_QSV_DECODER 0
 %define CONFIG_VP9_CUVID_DECODER 0
 %define CONFIG_VP9_MEDIACODEC_DECODER 0
-%define CONFIG_AA_DEMUXER 0
-%define CONFIG_AAC_DEMUXER 0
-%define CONFIG_AC3_DEMUXER 0
-%define CONFIG_ACM_DEMUXER 0
-%define CONFIG_ACT_DEMUXER 0
-%define CONFIG_ADF_DEMUXER 0
-%define CONFIG_ADP_DEMUXER 0
-%define CONFIG_ADS_DEMUXER 0
-%define CONFIG_ADX_DEMUXER 0
-%define CONFIG_AEA_DEMUXER 0
-%define CONFIG_AFC_DEMUXER 0
-%define CONFIG_AIFF_DEMUXER 0
-%define CONFIG_AIX_DEMUXER 0
-%define CONFIG_AMR_DEMUXER 0
-%define CONFIG_ANM_DEMUXER 0
-%define CONFIG_APC_DEMUXER 0
-%define CONFIG_APE_DEMUXER 0
-%define CONFIG_APNG_DEMUXER 0
-%define CONFIG_AQTITLE_DEMUXER 0
-%define CONFIG_ASF_DEMUXER 0
-%define CONFIG_ASF_O_DEMUXER 0
-%define CONFIG_ASS_DEMUXER 0
-%define CONFIG_AST_DEMUXER 0
-%define CONFIG_AU_DEMUXER 0
-%define CONFIG_AVI_DEMUXER 0
-%define CONFIG_AVISYNTH_DEMUXER 0
-%define CONFIG_AVR_DEMUXER 0
-%define CONFIG_AVS_DEMUXER 0
-%define CONFIG_BETHSOFTVID_DEMUXER 0
-%define CONFIG_BFI_DEMUXER 0
-%define CONFIG_BINTEXT_DEMUXER 0
-%define CONFIG_BINK_DEMUXER 0
-%define CONFIG_BIT_DEMUXER 0
-%define CONFIG_BMV_DEMUXER 0
-%define CONFIG_BFSTM_DEMUXER 0
-%define CONFIG_BRSTM_DEMUXER 0
-%define CONFIG_BOA_DEMUXER 0
-%define CONFIG_C93_DEMUXER 0
-%define CONFIG_CAF_DEMUXER 0
-%define CONFIG_CAVSVIDEO_DEMUXER 0
-%define CONFIG_CDG_DEMUXER 0
-%define CONFIG_CDXL_DEMUXER 0
-%define CONFIG_CINE_DEMUXER 0
-%define CONFIG_CONCAT_DEMUXER 0
-%define CONFIG_DASH_DEMUXER 0
-%define CONFIG_DATA_DEMUXER 0
-%define CONFIG_DAUD_DEMUXER 0
-%define CONFIG_DCSTR_DEMUXER 0
-%define CONFIG_DFA_DEMUXER 0
-%define CONFIG_DIRAC_DEMUXER 0
-%define CONFIG_DNXHD_DEMUXER 0
-%define CONFIG_DSF_DEMUXER 0
-%define CONFIG_DSICIN_DEMUXER 0
-%define CONFIG_DSS_DEMUXER 0
-%define CONFIG_DTS_DEMUXER 0
-%define CONFIG_DTSHD_DEMUXER 0
-%define CONFIG_DV_DEMUXER 0
-%define CONFIG_DVBSUB_DEMUXER 0
-%define CONFIG_DVBTXT_DEMUXER 0
-%define CONFIG_DXA_DEMUXER 0
-%define CONFIG_EA_DEMUXER 0
-%define CONFIG_EA_CDATA_DEMUXER 0
-%define CONFIG_EAC3_DEMUXER 0
-%define CONFIG_EPAF_DEMUXER 0
-%define CONFIG_FFM_DEMUXER 0
-%define CONFIG_FFMETADATA_DEMUXER 0
-%define CONFIG_FILMSTRIP_DEMUXER 0
-%define CONFIG_FITS_DEMUXER 0
-%define CONFIG_FLAC_DEMUXER 1
-%define CONFIG_FLIC_DEMUXER 0
-%define CONFIG_FLV_DEMUXER 0
-%define CONFIG_LIVE_FLV_DEMUXER 0
-%define CONFIG_FOURXM_DEMUXER 0
-%define CONFIG_FRM_DEMUXER 0
-%define CONFIG_FSB_DEMUXER 0
-%define CONFIG_G722_DEMUXER 0
-%define CONFIG_G723_1_DEMUXER 0
-%define CONFIG_G726_DEMUXER 0
-%define CONFIG_G726LE_DEMUXER 0
-%define CONFIG_G729_DEMUXER 0
-%define CONFIG_GDV_DEMUXER 0
-%define CONFIG_GENH_DEMUXER 0
-%define CONFIG_GIF_DEMUXER 0
-%define CONFIG_GSM_DEMUXER 0
-%define CONFIG_GXF_DEMUXER 0
-%define CONFIG_H261_DEMUXER 0
-%define CONFIG_H263_DEMUXER 0
-%define CONFIG_H264_DEMUXER 0
-%define CONFIG_HEVC_DEMUXER 0
-%define CONFIG_HLS_DEMUXER 0
-%define CONFIG_HNM_DEMUXER 0
-%define CONFIG_ICO_DEMUXER 0
-%define CONFIG_IDCIN_DEMUXER 0
-%define CONFIG_IDF_DEMUXER 0
-%define CONFIG_IFF_DEMUXER 0
-%define CONFIG_ILBC_DEMUXER 0
-%define CONFIG_IMAGE2_DEMUXER 0
-%define CONFIG_IMAGE2PIPE_DEMUXER 0
-%define CONFIG_IMAGE2_ALIAS_PIX_DEMUXER 0
-%define CONFIG_IMAGE2_BRENDER_PIX_DEMUXER 0
-%define CONFIG_INGENIENT_DEMUXER 0
-%define CONFIG_IPMOVIE_DEMUXER 0
-%define CONFIG_IRCAM_DEMUXER 0
-%define CONFIG_ISS_DEMUXER 0
-%define CONFIG_IV8_DEMUXER 0
-%define CONFIG_IVF_DEMUXER 0
-%define CONFIG_IVR_DEMUXER 0
-%define CONFIG_JACOSUB_DEMUXER 0
-%define CONFIG_JV_DEMUXER 0
-%define CONFIG_LMLM4_DEMUXER 0
-%define CONFIG_LOAS_DEMUXER 0
-%define CONFIG_LRC_DEMUXER 0
-%define CONFIG_LVF_DEMUXER 0
-%define CONFIG_LXF_DEMUXER 0
-%define CONFIG_M4V_DEMUXER 0
-%define CONFIG_MATROSKA_DEMUXER 1
-%define CONFIG_MGSTS_DEMUXER 0
-%define CONFIG_MICRODVD_DEMUXER 0
-%define CONFIG_MJPEG_DEMUXER 0
-%define CONFIG_MJPEG_2000_DEMUXER 0
-%define CONFIG_MLP_DEMUXER 0
-%define CONFIG_MLV_DEMUXER 0
-%define CONFIG_MM_DEMUXER 0
-%define CONFIG_MMF_DEMUXER 0
-%define CONFIG_MOV_DEMUXER 0
-%define CONFIG_MP3_DEMUXER 0
-%define CONFIG_MPC_DEMUXER 0
-%define CONFIG_MPC8_DEMUXER 0
-%define CONFIG_MPEGPS_DEMUXER 0
-%define CONFIG_MPEGTS_DEMUXER 0
-%define CONFIG_MPEGTSRAW_DEMUXER 0
-%define CONFIG_MPEGVIDEO_DEMUXER 0
-%define CONFIG_MPJPEG_DEMUXER 0
-%define CONFIG_MPL2_DEMUXER 0
-%define CONFIG_MPSUB_DEMUXER 0
-%define CONFIG_MSF_DEMUXER 0
-%define CONFIG_MSNWC_TCP_DEMUXER 0
-%define CONFIG_MTAF_DEMUXER 0
-%define CONFIG_MTV_DEMUXER 0
-%define CONFIG_MUSX_DEMUXER 0
-%define CONFIG_MV_DEMUXER 0
-%define CONFIG_MVI_DEMUXER 0
-%define CONFIG_MXF_DEMUXER 0
-%define CONFIG_MXG_DEMUXER 0
-%define CONFIG_NC_DEMUXER 0
-%define CONFIG_NISTSPHERE_DEMUXER 0
-%define CONFIG_NSV_DEMUXER 0
-%define CONFIG_NUT_DEMUXER 0
-%define CONFIG_NUV_DEMUXER 0
-%define CONFIG_OGG_DEMUXER 1
-%define CONFIG_OMA_DEMUXER 0
-%define CONFIG_PAF_DEMUXER 0
-%define CONFIG_PCM_ALAW_DEMUXER 0
-%define CONFIG_PCM_MULAW_DEMUXER 0
-%define CONFIG_PCM_F64BE_DEMUXER 0
-%define CONFIG_PCM_F64LE_DEMUXER 0
-%define CONFIG_PCM_F32BE_DEMUXER 0
-%define CONFIG_PCM_F32LE_DEMUXER 0
-%define CONFIG_PCM_S32BE_DEMUXER 0
-%define CONFIG_PCM_S32LE_DEMUXER 0
-%define CONFIG_PCM_S24BE_DEMUXER 0
-%define CONFIG_PCM_S24LE_DEMUXER 0
-%define CONFIG_PCM_S16BE_DEMUXER 0
-%define CONFIG_PCM_S16LE_DEMUXER 0
-%define CONFIG_PCM_S8_DEMUXER 0
-%define CONFIG_PCM_U32BE_DEMUXER 0
-%define CONFIG_PCM_U32LE_DEMUXER 0
-%define CONFIG_PCM_U24BE_DEMUXER 0
-%define CONFIG_PCM_U24LE_DEMUXER 0
-%define CONFIG_PCM_U16BE_DEMUXER 0
-%define CONFIG_PCM_U16LE_DEMUXER 0
-%define CONFIG_PCM_U8_DEMUXER 0
-%define CONFIG_PJS_DEMUXER 0
-%define CONFIG_PMP_DEMUXER 0
-%define CONFIG_PVA_DEMUXER 0
-%define CONFIG_PVF_DEMUXER 0
-%define CONFIG_QCP_DEMUXER 0
-%define CONFIG_R3D_DEMUXER 0
-%define CONFIG_RAWVIDEO_DEMUXER 0
-%define CONFIG_REALTEXT_DEMUXER 0
-%define CONFIG_REDSPARK_DEMUXER 0
-%define CONFIG_RL2_DEMUXER 0
-%define CONFIG_RM_DEMUXER 0
-%define CONFIG_ROQ_DEMUXER 0
-%define CONFIG_RPL_DEMUXER 0
-%define CONFIG_RSD_DEMUXER 0
-%define CONFIG_RSO_DEMUXER 0
-%define CONFIG_RTP_DEMUXER 0
-%define CONFIG_RTSP_DEMUXER 0
-%define CONFIG_S337M_DEMUXER 0
-%define CONFIG_SAMI_DEMUXER 0
-%define CONFIG_SAP_DEMUXER 0
-%define CONFIG_SBG_DEMUXER 0
-%define CONFIG_SCC_DEMUXER 0
-%define CONFIG_SDP_DEMUXER 0
-%define CONFIG_SDR2_DEMUXER 0
-%define CONFIG_SDS_DEMUXER 0
-%define CONFIG_SDX_DEMUXER 0
-%define CONFIG_SEGAFILM_DEMUXER 0
-%define CONFIG_SHORTEN_DEMUXER 0
-%define CONFIG_SIFF_DEMUXER 0
-%define CONFIG_SLN_DEMUXER 0
-%define CONFIG_SMACKER_DEMUXER 0
-%define CONFIG_SMJPEG_DEMUXER 0
-%define CONFIG_SMUSH_DEMUXER 0
-%define CONFIG_SOL_DEMUXER 0
-%define CONFIG_SOX_DEMUXER 0
-%define CONFIG_SPDIF_DEMUXER 0
-%define CONFIG_SRT_DEMUXER 0
-%define CONFIG_STR_DEMUXER 0
-%define CONFIG_STL_DEMUXER 0
-%define CONFIG_SUBVIEWER1_DEMUXER 0
-%define CONFIG_SUBVIEWER_DEMUXER 0
-%define CONFIG_SUP_DEMUXER 0
-%define CONFIG_SVAG_DEMUXER 0
-%define CONFIG_SWF_DEMUXER 0
-%define CONFIG_TAK_DEMUXER 0
-%define CONFIG_TEDCAPTIONS_DEMUXER 0
-%define CONFIG_THP_DEMUXER 0
-%define CONFIG_THREEDOSTR_DEMUXER 0
-%define CONFIG_TIERTEXSEQ_DEMUXER 0
-%define CONFIG_TMV_DEMUXER 0
-%define CONFIG_TRUEHD_DEMUXER 0
-%define CONFIG_TTA_DEMUXER 0
-%define CONFIG_TXD_DEMUXER 0
-%define CONFIG_TTY_DEMUXER 0
-%define CONFIG_V210_DEMUXER 0
-%define CONFIG_V210X_DEMUXER 0
-%define CONFIG_VAG_DEMUXER 0
-%define CONFIG_VC1_DEMUXER 0
-%define CONFIG_VC1T_DEMUXER 0
-%define CONFIG_VIVO_DEMUXER 0
-%define CONFIG_VMD_DEMUXER 0
-%define CONFIG_VOBSUB_DEMUXER 0
-%define CONFIG_VOC_DEMUXER 0
-%define CONFIG_VPK_DEMUXER 0
-%define CONFIG_VPLAYER_DEMUXER 0
-%define CONFIG_VQF_DEMUXER 0
-%define CONFIG_W64_DEMUXER 0
-%define CONFIG_WAV_DEMUXER 1
-%define CONFIG_WC3_DEMUXER 0
-%define CONFIG_WEBM_DASH_MANIFEST_DEMUXER 0
-%define CONFIG_WEBVTT_DEMUXER 0
-%define CONFIG_WSAUD_DEMUXER 0
-%define CONFIG_WSD_DEMUXER 0
-%define CONFIG_WSVQA_DEMUXER 0
-%define CONFIG_WTV_DEMUXER 0
-%define CONFIG_WVE_DEMUXER 0
-%define CONFIG_WV_DEMUXER 0
-%define CONFIG_XA_DEMUXER 0
-%define CONFIG_XBIN_DEMUXER 0
-%define CONFIG_XMV_DEMUXER 0
-%define CONFIG_XVAG_DEMUXER 0
-%define CONFIG_XWMA_DEMUXER 0
-%define CONFIG_YOP_DEMUXER 0
-%define CONFIG_YUV4MPEGPIPE_DEMUXER 0
-%define CONFIG_IMAGE_BMP_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_DDS_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_DPX_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_EXR_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_J2K_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_JPEG_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_JPEGLS_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_PAM_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_PBM_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_PCX_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_PGMYUV_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_PGM_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_PICTOR_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_PNG_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_PPM_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_PSD_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_QDRAW_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_SGI_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_SVG_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_SUNRAST_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_TIFF_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_WEBP_PIPE_DEMUXER 0
-%define CONFIG_IMAGE_XPM_PIPE_DEMUXER 0
-%define CONFIG_LIBGME_DEMUXER 0
-%define CONFIG_LIBMODPLUG_DEMUXER 0
-%define CONFIG_LIBOPENMPT_DEMUXER 0
 %define CONFIG_A64MULTI_ENCODER 0
 %define CONFIG_A64MULTI5_ENCODER 0
 %define CONFIG_ALIAS_PIX_ENCODER 0
@@ -1482,6 +1226,7 @@
 %define CONFIG_JPEG2000_ENCODER 0
 %define CONFIG_JPEGLS_ENCODER 0
 %define CONFIG_LJPEG_ENCODER 0
+%define CONFIG_MAGICYUV_ENCODER 0
 %define CONFIG_MJPEG_ENCODER 0
 %define CONFIG_MPEG1VIDEO_ENCODER 0
 %define CONFIG_MPEG2VIDEO_ENCODER 0
@@ -1533,6 +1278,8 @@
 %define CONFIG_AC3_ENCODER 0
 %define CONFIG_AC3_FIXED_ENCODER 0
 %define CONFIG_ALAC_ENCODER 0
+%define CONFIG_APTX_ENCODER 0
+%define CONFIG_APTX_HD_ENCODER 0
 %define CONFIG_DCA_ENCODER 0
 %define CONFIG_EAC3_ENCODER 0
 %define CONFIG_FLAC_ENCODER 0
@@ -1543,6 +1290,7 @@
 %define CONFIG_NELLYMOSER_ENCODER 0
 %define CONFIG_OPUS_ENCODER 0
 %define CONFIG_RA_144_ENCODER 0
+%define CONFIG_SBC_ENCODER 0
 %define CONFIG_SONIC_ENCODER 0
 %define CONFIG_SONIC_LS_ENCODER 0
 %define CONFIG_TRUEHD_ENCODER 0
@@ -1604,6 +1352,8 @@
 %define CONFIG_ILBC_AT_ENCODER 0
 %define CONFIG_PCM_ALAW_AT_ENCODER 0
 %define CONFIG_PCM_MULAW_AT_ENCODER 0
+%define CONFIG_LIBAOM_AV1_ENCODER 0
+%define CONFIG_LIBCODEC2_ENCODER 0
 %define CONFIG_LIBFDK_AAC_ENCODER 0
 %define CONFIG_LIBGSM_ENCODER 0
 %define CONFIG_LIBGSM_MS_ENCODER 0
@@ -1628,9 +1378,11 @@
 %define CONFIG_LIBX264RGB_ENCODER 0
 %define CONFIG_LIBX265_ENCODER 0
 %define CONFIG_LIBXAVS_ENCODER 0
+%define CONFIG_LIBXAVS2_ENCODER 0
 %define CONFIG_LIBXVID_ENCODER 0
 %define CONFIG_H263_V4L2M2M_ENCODER 0
 %define CONFIG_LIBOPENH264_ENCODER 0
+%define CONFIG_H264_AMF_ENCODER 0
 %define CONFIG_H264_NVENC_ENCODER 0
 %define CONFIG_H264_OMX_ENCODER 0
 %define CONFIG_H264_QSV_ENCODER 0
@@ -1640,11 +1392,14 @@
 %define CONFIG_NVENC_ENCODER 0
 %define CONFIG_NVENC_H264_ENCODER 0
 %define CONFIG_NVENC_HEVC_ENCODER 0
+%define CONFIG_HEVC_AMF_ENCODER 0
 %define CONFIG_HEVC_NVENC_ENCODER 0
 %define CONFIG_HEVC_QSV_ENCODER 0
 %define CONFIG_HEVC_V4L2M2M_ENCODER 0
 %define CONFIG_HEVC_VAAPI_ENCODER 0
+%define CONFIG_HEVC_VIDEOTOOLBOX_ENCODER 0
 %define CONFIG_LIBKVAZAAR_ENCODER 0
+%define CONFIG_MJPEG_QSV_ENCODER 0
 %define CONFIG_MJPEG_VAAPI_ENCODER 0
 %define CONFIG_MPEG2_QSV_ENCODER 0
 %define CONFIG_MPEG2_VAAPI_ENCODER 0
@@ -1652,20 +1407,159 @@
 %define CONFIG_VP8_V4L2M2M_ENCODER 0
 %define CONFIG_VP8_VAAPI_ENCODER 0
 %define CONFIG_VP9_VAAPI_ENCODER 0
+%define CONFIG_H263_VAAPI_HWACCEL 0
+%define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0
+%define CONFIG_H264_D3D11VA_HWACCEL 0
+%define CONFIG_H264_D3D11VA2_HWACCEL 0
+%define CONFIG_H264_DXVA2_HWACCEL 0
+%define CONFIG_H264_NVDEC_HWACCEL 0
+%define CONFIG_H264_VAAPI_HWACCEL 0
+%define CONFIG_H264_VDPAU_HWACCEL 0
+%define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0
+%define CONFIG_HEVC_D3D11VA_HWACCEL 0
+%define CONFIG_HEVC_D3D11VA2_HWACCEL 0
+%define CONFIG_HEVC_DXVA2_HWACCEL 0
+%define CONFIG_HEVC_NVDEC_HWACCEL 0
+%define CONFIG_HEVC_VAAPI_HWACCEL 0
+%define CONFIG_HEVC_VDPAU_HWACCEL 0
+%define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0
+%define CONFIG_MJPEG_NVDEC_HWACCEL 0
+%define CONFIG_MJPEG_VAAPI_HWACCEL 0
+%define CONFIG_MPEG1_NVDEC_HWACCEL 0
+%define CONFIG_MPEG1_VDPAU_HWACCEL 0
+%define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0
+%define CONFIG_MPEG1_XVMC_HWACCEL 0
+%define CONFIG_MPEG2_D3D11VA_HWACCEL 0
+%define CONFIG_MPEG2_D3D11VA2_HWACCEL 0
+%define CONFIG_MPEG2_NVDEC_HWACCEL 0
+%define CONFIG_MPEG2_DXVA2_HWACCEL 0
+%define CONFIG_MPEG2_VAAPI_HWACCEL 0
+%define CONFIG_MPEG2_VDPAU_HWACCEL 0
+%define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0
+%define CONFIG_MPEG2_XVMC_HWACCEL 0
+%define CONFIG_MPEG4_NVDEC_HWACCEL 0
+%define CONFIG_MPEG4_VAAPI_HWACCEL 0
+%define CONFIG_MPEG4_VDPAU_HWACCEL 0
+%define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0
+%define CONFIG_VC1_D3D11VA_HWACCEL 0
+%define CONFIG_VC1_D3D11VA2_HWACCEL 0
+%define CONFIG_VC1_DXVA2_HWACCEL 0
+%define CONFIG_VC1_NVDEC_HWACCEL 0
+%define CONFIG_VC1_VAAPI_HWACCEL 0
+%define CONFIG_VC1_VDPAU_HWACCEL 0
+%define CONFIG_VP8_NVDEC_HWACCEL 0
+%define CONFIG_VP8_VAAPI_HWACCEL 0
+%define CONFIG_VP9_D3D11VA_HWACCEL 0
+%define CONFIG_VP9_D3D11VA2_HWACCEL 0
+%define CONFIG_VP9_DXVA2_HWACCEL 0
+%define CONFIG_VP9_NVDEC_HWACCEL 0
+%define CONFIG_VP9_VAAPI_HWACCEL 0
+%define CONFIG_WMV3_D3D11VA_HWACCEL 0
+%define CONFIG_WMV3_D3D11VA2_HWACCEL 0
+%define CONFIG_WMV3_DXVA2_HWACCEL 0
+%define CONFIG_WMV3_NVDEC_HWACCEL 0
+%define CONFIG_WMV3_VAAPI_HWACCEL 0
+%define CONFIG_WMV3_VDPAU_HWACCEL 0
+%define CONFIG_AAC_PARSER 0
+%define CONFIG_AAC_LATM_PARSER 0
+%define CONFIG_AC3_PARSER 0
+%define CONFIG_ADX_PARSER 0
+%define CONFIG_AV1_PARSER 0
+%define CONFIG_AVS2_PARSER 0
+%define CONFIG_BMP_PARSER 0
+%define CONFIG_CAVSVIDEO_PARSER 0
+%define CONFIG_COOK_PARSER 0
+%define CONFIG_DCA_PARSER 0
+%define CONFIG_DIRAC_PARSER 0
+%define CONFIG_DNXHD_PARSER 0
+%define CONFIG_DPX_PARSER 0
+%define CONFIG_DVAUDIO_PARSER 0
+%define CONFIG_DVBSUB_PARSER 0
+%define CONFIG_DVDSUB_PARSER 0
+%define CONFIG_DVD_NAV_PARSER 0
+%define CONFIG_FLAC_PARSER 1
+%define CONFIG_G729_PARSER 0
+%define CONFIG_GSM_PARSER 0
+%define CONFIG_H261_PARSER 0
+%define CONFIG_H263_PARSER 0
+%define CONFIG_H264_PARSER 0
+%define CONFIG_HEVC_PARSER 0
+%define CONFIG_MJPEG_PARSER 0
+%define CONFIG_MLP_PARSER 0
+%define CONFIG_MPEG4VIDEO_PARSER 0
+%define CONFIG_MPEGAUDIO_PARSER 1
+%define CONFIG_MPEGVIDEO_PARSER 0
+%define CONFIG_OPUS_PARSER 1
+%define CONFIG_PNG_PARSER 0
+%define CONFIG_PNM_PARSER 0
+%define CONFIG_RV30_PARSER 0
+%define CONFIG_RV40_PARSER 0
+%define CONFIG_SBC_PARSER 0
+%define CONFIG_SIPR_PARSER 0
+%define CONFIG_TAK_PARSER 0
+%define CONFIG_VC1_PARSER 0
+%define CONFIG_VORBIS_PARSER 1
+%define CONFIG_VP3_PARSER 1
+%define CONFIG_VP8_PARSER 1
+%define CONFIG_VP9_PARSER 1
+%define CONFIG_XMA_PARSER 0
+%define CONFIG_ALSA_INDEV 0
+%define CONFIG_ANDROID_CAMERA_INDEV 0
+%define CONFIG_AVFOUNDATION_INDEV 0
+%define CONFIG_BKTR_INDEV 0
+%define CONFIG_DECKLINK_INDEV 0
+%define CONFIG_LIBNDI_NEWTEK_INDEV 0
+%define CONFIG_DSHOW_INDEV 0
+%define CONFIG_FBDEV_INDEV 0
+%define CONFIG_GDIGRAB_INDEV 0
+%define CONFIG_IEC61883_INDEV 0
+%define CONFIG_JACK_INDEV 0
+%define CONFIG_KMSGRAB_INDEV 0
+%define CONFIG_LAVFI_INDEV 0
+%define CONFIG_OPENAL_INDEV 0
+%define CONFIG_OSS_INDEV 0
+%define CONFIG_PULSE_INDEV 0
+%define CONFIG_SNDIO_INDEV 0
+%define CONFIG_V4L2_INDEV 0
+%define CONFIG_VFWCAP_INDEV 0
+%define CONFIG_XCBGRAB_INDEV 0
+%define CONFIG_LIBCDIO_INDEV 0
+%define CONFIG_LIBDC1394_INDEV 0
+%define CONFIG_ALSA_OUTDEV 0
+%define CONFIG_CACA_OUTDEV 0
+%define CONFIG_DECKLINK_OUTDEV 0
+%define CONFIG_LIBNDI_NEWTEK_OUTDEV 0
+%define CONFIG_FBDEV_OUTDEV 0
+%define CONFIG_OPENGL_OUTDEV 0
+%define CONFIG_OSS_OUTDEV 0
+%define CONFIG_PULSE_OUTDEV 0
+%define CONFIG_SDL2_OUTDEV 0
+%define CONFIG_SNDIO_OUTDEV 0
+%define CONFIG_V4L2_OUTDEV 0
+%define CONFIG_XV_OUTDEV 0
 %define CONFIG_ABENCH_FILTER 0
 %define CONFIG_ACOMPRESSOR_FILTER 0
+%define CONFIG_ACONTRAST_FILTER 0
 %define CONFIG_ACOPY_FILTER 0
+%define CONFIG_ACUE_FILTER 0
 %define CONFIG_ACROSSFADE_FILTER 0
+%define CONFIG_ACROSSOVER_FILTER 0
 %define CONFIG_ACRUSHER_FILTER 0
+%define CONFIG_ADECLICK_FILTER 0
+%define CONFIG_ADECLIP_FILTER 0
 %define CONFIG_ADELAY_FILTER 0
+%define CONFIG_ADERIVATIVE_FILTER 0
 %define CONFIG_AECHO_FILTER 0
 %define CONFIG_AEMPHASIS_FILTER 0
 %define CONFIG_AEVAL_FILTER 0
 %define CONFIG_AFADE_FILTER 0
+%define CONFIG_AFFTDN_FILTER 0
 %define CONFIG_AFFTFILT_FILTER 0
 %define CONFIG_AFIR_FILTER 0
 %define CONFIG_AFORMAT_FILTER 0
 %define CONFIG_AGATE_FILTER 0
+%define CONFIG_AIIR_FILTER 0
+%define CONFIG_AINTEGRAL_FILTER 0
 %define CONFIG_AINTERLEAVE_FILTER 0
 %define CONFIG_ALIMITER_FILTER 0
 %define CONFIG_ALLPASS_FILTER 0
@@ -1673,6 +1567,7 @@
 %define CONFIG_AMERGE_FILTER 0
 %define CONFIG_AMETADATA_FILTER 0
 %define CONFIG_AMIX_FILTER 0
+%define CONFIG_AMULTIPLY_FILTER 0
 %define CONFIG_ANEQUALIZER_FILTER 0
 %define CONFIG_ANULL_FILTER 0
 %define CONFIG_APAD_FILTER 0
@@ -1709,6 +1604,7 @@
 %define CONFIG_CROSSFEED_FILTER 0
 %define CONFIG_CRYSTALIZER_FILTER 0
 %define CONFIG_DCSHIFT_FILTER 0
+%define CONFIG_DRMETER_FILTER 0
 %define CONFIG_DYNAUDNORM_FILTER 0
 %define CONFIG_EARWAX_FILTER 0
 %define CONFIG_EBUR128_FILTER 0
@@ -1720,10 +1616,14 @@
 %define CONFIG_HDCD_FILTER 0
 %define CONFIG_HEADPHONE_FILTER 0
 %define CONFIG_HIGHPASS_FILTER 0
+%define CONFIG_HIGHSHELF_FILTER 0
 %define CONFIG_JOIN_FILTER 0
 %define CONFIG_LADSPA_FILTER 0
 %define CONFIG_LOUDNORM_FILTER 0
 %define CONFIG_LOWPASS_FILTER 0
+%define CONFIG_LOWSHELF_FILTER 0
+%define CONFIG_LV2_FILTER 0
+%define CONFIG_MCOMPAND_FILTER 0
 %define CONFIG_PAN_FILTER 0
 %define CONFIG_REPLAYGAIN_FILTER 0
 %define CONFIG_RESAMPLE_FILTER 0
@@ -1746,20 +1646,25 @@
 %define CONFIG_ANOISESRC_FILTER 0
 %define CONFIG_ANULLSRC_FILTER 0
 %define CONFIG_FLITE_FILTER 0
+%define CONFIG_HILBERT_FILTER 0
 %define CONFIG_SINE_FILTER 0
 %define CONFIG_ANULLSINK_FILTER 0
 %define CONFIG_ALPHAEXTRACT_FILTER 0
 %define CONFIG_ALPHAMERGE_FILTER 0
+%define CONFIG_AMPLIFY_FILTER 0
 %define CONFIG_ASS_FILTER 0
 %define CONFIG_ATADENOISE_FILTER 0
 %define CONFIG_AVGBLUR_FILTER 0
+%define CONFIG_AVGBLUR_OPENCL_FILTER 0
 %define CONFIG_BBOX_FILTER 0
 %define CONFIG_BENCH_FILTER 0
 %define CONFIG_BITPLANENOISE_FILTER 0
 %define CONFIG_BLACKDETECT_FILTER 0
 %define CONFIG_BLACKFRAME_FILTER 0
 %define CONFIG_BLEND_FILTER 0
+%define CONFIG_BM3D_FILTER 0
 %define CONFIG_BOXBLUR_FILTER 0
+%define CONFIG_BOXBLUR_OPENCL_FILTER 0
 %define CONFIG_BWDIF_FILTER 0
 %define CONFIG_CHROMAKEY_FILTER 0
 %define CONFIG_CIESCOPE_FILTER 0
@@ -1771,27 +1676,33 @@
 %define CONFIG_COLORMATRIX_FILTER 0
 %define CONFIG_COLORSPACE_FILTER 0
 %define CONFIG_CONVOLUTION_FILTER 0
+%define CONFIG_CONVOLUTION_OPENCL_FILTER 0
 %define CONFIG_CONVOLVE_FILTER 0
 %define CONFIG_COPY_FILTER 0
 %define CONFIG_COREIMAGE_FILTER 0
 %define CONFIG_COVER_RECT_FILTER 0
 %define CONFIG_CROP_FILTER 0
 %define CONFIG_CROPDETECT_FILTER 0
+%define CONFIG_CUE_FILTER 0
 %define CONFIG_CURVES_FILTER 0
 %define CONFIG_DATASCOPE_FILTER 0
 %define CONFIG_DCTDNOIZ_FILTER 0
 %define CONFIG_DEBAND_FILTER 0
+%define CONFIG_DEBLOCK_FILTER 0
 %define CONFIG_DECIMATE_FILTER 0
+%define CONFIG_DECONVOLVE_FILTER 0
 %define CONFIG_DEFLATE_FILTER 0
 %define CONFIG_DEFLICKER_FILTER 0
 %define CONFIG_DEINTERLACE_QSV_FILTER 0
 %define CONFIG_DEINTERLACE_VAAPI_FILTER 0
 %define CONFIG_DEJUDDER_FILTER 0
 %define CONFIG_DELOGO_FILTER 0
+%define CONFIG_DENOISE_VAAPI_FILTER 0
 %define CONFIG_DESHAKE_FILTER 0
 %define CONFIG_DESPILL_FILTER 0
 %define CONFIG_DETELECINE_FILTER 0
 %define CONFIG_DILATION_FILTER 0
+%define CONFIG_DILATION_OPENCL_FILTER 0
 %define CONFIG_DISPLACE_FILTER 0
 %define CONFIG_DOUBLEWEAVE_FILTER 0
 %define CONFIG_DRAWBOX_FILTER 0
@@ -1800,15 +1711,19 @@
 %define CONFIG_DRAWTEXT_FILTER 0
 %define CONFIG_EDGEDETECT_FILTER 0
 %define CONFIG_ELBG_FILTER 0
+%define CONFIG_ENTROPY_FILTER 0
 %define CONFIG_EQ_FILTER 0
 %define CONFIG_EROSION_FILTER 0
+%define CONFIG_EROSION_OPENCL_FILTER 0
 %define CONFIG_EXTRACTPLANES_FILTER 0
 %define CONFIG_FADE_FILTER 0
+%define CONFIG_FFTDNOIZ_FILTER 0
 %define CONFIG_FFTFILT_FILTER 0
 %define CONFIG_FIELD_FILTER 0
 %define CONFIG_FIELDHINT_FILTER 0
 %define CONFIG_FIELDMATCH_FILTER 0
 %define CONFIG_FIELDORDER_FILTER 0
+%define CONFIG_FILLBORDERS_FILTER 0
 %define CONFIG_FIND_RECT_FILTER 0
 %define CONFIG_FLOODFILL_FILTER 0
 %define CONFIG_FORMAT_FILTER 0
@@ -1821,6 +1736,7 @@
 %define CONFIG_GBLUR_FILTER 0
 %define CONFIG_GEQ_FILTER 0
 %define CONFIG_GRADFUN_FILTER 0
+%define CONFIG_GREYEDGE_FILTER 0
 %define CONFIG_HALDCLUT_FILTER 0
 %define CONFIG_HFLIP_FILTER 0
 %define CONFIG_HISTEQ_FILTER 0
@@ -1841,11 +1757,13 @@
 %define CONFIG_INTERLEAVE_FILTER 0
 %define CONFIG_KERNDEINT_FILTER 0
 %define CONFIG_LENSCORRECTION_FILTER 0
+%define CONFIG_LENSFUN_FILTER 0
 %define CONFIG_LIBVMAF_FILTER 0
 %define CONFIG_LIMITER_FILTER 0
 %define CONFIG_LOOP_FILTER 0
 %define CONFIG_LUMAKEY_FILTER 0
 %define CONFIG_LUT_FILTER 0
+%define CONFIG_LUT1D_FILTER 0
 %define CONFIG_LUT2_FILTER 0
 %define CONFIG_LUT3D_FILTER 0
 %define CONFIG_LUTRGB_FILTER 0
@@ -1858,17 +1776,21 @@
 %define CONFIG_METADATA_FILTER 0
 %define CONFIG_MIDEQUALIZER_FILTER 0
 %define CONFIG_MINTERPOLATE_FILTER 0
+%define CONFIG_MIX_FILTER 0
 %define CONFIG_MPDECIMATE_FILTER 0
 %define CONFIG_NEGATE_FILTER 0
 %define CONFIG_NLMEANS_FILTER 0
 %define CONFIG_NNEDI_FILTER 0
 %define CONFIG_NOFORMAT_FILTER 0
 %define CONFIG_NOISE_FILTER 0
+%define CONFIG_NORMALIZE_FILTER 0
 %define CONFIG_NULL_FILTER 0
 %define CONFIG_OCR_FILTER 0
 %define CONFIG_OCV_FILTER 0
 %define CONFIG_OSCILLOSCOPE_FILTER 0
 %define CONFIG_OVERLAY_FILTER 0
+%define CONFIG_OVERLAY_OPENCL_FILTER 0
+%define CONFIG_OVERLAY_QSV_FILTER 0
 %define CONFIG_OWDENOISE_FILTER 0
 %define CONFIG_PAD_FILTER 0
 %define CONFIG_PALETTEGEN_FILTER 0
@@ -1882,6 +1804,9 @@
 %define CONFIG_PP7_FILTER 0
 %define CONFIG_PREMULTIPLY_FILTER 0
 %define CONFIG_PREWITT_FILTER 0
+%define CONFIG_PREWITT_OPENCL_FILTER 0
+%define CONFIG_PROCAMP_VAAPI_FILTER 0
+%define CONFIG_PROGRAM_OPENCL_FILTER 0
 %define CONFIG_PSEUDOCOLOR_FILTER 0
 %define CONFIG_PSNR_FILTER 0
 %define CONFIG_PULLUP_FILTER 0
@@ -1896,6 +1821,7 @@
 %define CONFIG_REPEATFIELDS_FILTER 0
 %define CONFIG_REVERSE_FILTER 0
 %define CONFIG_ROBERTS_FILTER 0
+%define CONFIG_ROBERTS_OPENCL_FILTER 0
 %define CONFIG_ROTATE_FILTER 0
 %define CONFIG_SAB_FILTER 0
 %define CONFIG_SCALE_FILTER 0
@@ -1911,8 +1837,10 @@
 %define CONFIG_SETDAR_FILTER 0
 %define CONFIG_SETFIELD_FILTER 0
 %define CONFIG_SETPTS_FILTER 0
+%define CONFIG_SETRANGE_FILTER 0
 %define CONFIG_SETSAR_FILTER 0
 %define CONFIG_SETTB_FILTER 0
+%define CONFIG_SHARPNESS_VAAPI_FILTER 0
 %define CONFIG_SHOWINFO_FILTER 0
 %define CONFIG_SHOWPALETTE_FILTER 0
 %define CONFIG_SHUFFLEFRAMES_FILTER 0
@@ -1922,8 +1850,10 @@
 %define CONFIG_SIGNATURE_FILTER 0
 %define CONFIG_SMARTBLUR_FILTER 0
 %define CONFIG_SOBEL_FILTER 0
+%define CONFIG_SOBEL_OPENCL_FILTER 0
 %define CONFIG_SPLIT_FILTER 0
 %define CONFIG_SPP_FILTER 0
+%define CONFIG_SR_FILTER 0
 %define CONFIG_SSIM_FILTER 0
 %define CONFIG_STEREO3D_FILTER 0
 %define CONFIG_STREAMSELECT_FILTER 0
@@ -1939,19 +1869,25 @@
 %define CONFIG_TILE_FILTER 0
 %define CONFIG_TINTERLACE_FILTER 0
 %define CONFIG_TLUT2_FILTER 0
+%define CONFIG_TMIX_FILTER 0
 %define CONFIG_TONEMAP_FILTER 0
+%define CONFIG_TONEMAP_OPENCL_FILTER 0
 %define CONFIG_TRANSPOSE_FILTER 0
+%define CONFIG_TRANSPOSE_NPP_FILTER 0
 %define CONFIG_TRIM_FILTER 0
 %define CONFIG_UNPREMULTIPLY_FILTER 0
 %define CONFIG_UNSHARP_FILTER 0
+%define CONFIG_UNSHARP_OPENCL_FILTER 0
 %define CONFIG_USPP_FILTER 0
 %define CONFIG_VAGUEDENOISER_FILTER 0
 %define CONFIG_VECTORSCOPE_FILTER 0
 %define CONFIG_VFLIP_FILTER 0
+%define CONFIG_VFRDET_FILTER 0
 %define CONFIG_VIDSTABDETECT_FILTER 0
 %define CONFIG_VIDSTABTRANSFORM_FILTER 0
 %define CONFIG_VIGNETTE_FILTER 0
 %define CONFIG_VMAFMOTION_FILTER 0
+%define CONFIG_VPP_QSV_FILTER 0
 %define CONFIG_VSTACK_FILTER 0
 %define CONFIG_W3FDIF_FILTER 0
 %define CONFIG_WAVEFORM_FILTER 0
@@ -1972,6 +1908,9 @@
 %define CONFIG_MANDELBROT_FILTER 0
 %define CONFIG_MPTESTSRC_FILTER 0
 %define CONFIG_NULLSRC_FILTER 0
+%define CONFIG_OPENCLSRC_FILTER 0
+%define CONFIG_PAL75BARS_FILTER 0
+%define CONFIG_PAL100BARS_FILTER 0
 %define CONFIG_RGBTESTSRC_FILTER 0
 %define CONFIG_SMPTEBARS_FILTER 0
 %define CONFIG_SMPTEHDBARS_FILTER 0
@@ -1995,94 +1934,302 @@
 %define CONFIG_SPECTRUMSYNTH_FILTER 0
 %define CONFIG_AMOVIE_FILTER 0
 %define CONFIG_MOVIE_FILTER 0
-%define CONFIG_H263_VAAPI_HWACCEL 0
-%define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0
-%define CONFIG_H264_CUVID_HWACCEL 0
-%define CONFIG_H264_D3D11VA_HWACCEL 0
-%define CONFIG_H264_D3D11VA2_HWACCEL 0
-%define CONFIG_H264_DXVA2_HWACCEL 0
-%define CONFIG_H264_MEDIACODEC_HWACCEL 0
-%define CONFIG_H264_MMAL_HWACCEL 0
-%define CONFIG_H264_QSV_HWACCEL 0
-%define CONFIG_H264_VAAPI_HWACCEL 0
-%define CONFIG_H264_VDA_HWACCEL 0
-%define CONFIG_H264_VDA_OLD_HWACCEL 0
-%define CONFIG_H264_VDPAU_HWACCEL 0
-%define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0
-%define CONFIG_HEVC_CUVID_HWACCEL 0
-%define CONFIG_HEVC_D3D11VA_HWACCEL 0
-%define CONFIG_HEVC_D3D11VA2_HWACCEL 0
-%define CONFIG_HEVC_DXVA2_HWACCEL 0
-%define CONFIG_HEVC_MEDIACODEC_HWACCEL 0
-%define CONFIG_HEVC_QSV_HWACCEL 0
-%define CONFIG_HEVC_VAAPI_HWACCEL 0
-%define CONFIG_HEVC_VDPAU_HWACCEL 0
-%define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0
-%define CONFIG_MJPEG_CUVID_HWACCEL 0
-%define CONFIG_MPEG1_CUVID_HWACCEL 0
-%define CONFIG_MPEG1_XVMC_HWACCEL 0
-%define CONFIG_MPEG1_VDPAU_HWACCEL 0
-%define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0
-%define CONFIG_MPEG2_CUVID_HWACCEL 0
-%define CONFIG_MPEG2_XVMC_HWACCEL 0
-%define CONFIG_MPEG2_D3D11VA_HWACCEL 0
-%define CONFIG_MPEG2_D3D11VA2_HWACCEL 0
-%define CONFIG_MPEG2_DXVA2_HWACCEL 0
-%define CONFIG_MPEG2_MMAL_HWACCEL 0
-%define CONFIG_MPEG2_QSV_HWACCEL 0
-%define CONFIG_MPEG2_VAAPI_HWACCEL 0
-%define CONFIG_MPEG2_VDPAU_HWACCEL 0
-%define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0
-%define CONFIG_MPEG2_MEDIACODEC_HWACCEL 0
-%define CONFIG_MPEG4_CUVID_HWACCEL 0
-%define CONFIG_MPEG4_MEDIACODEC_HWACCEL 0
-%define CONFIG_MPEG4_MMAL_HWACCEL 0
-%define CONFIG_MPEG4_VAAPI_HWACCEL 0
-%define CONFIG_MPEG4_VDPAU_HWACCEL 0
-%define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0
-%define CONFIG_VC1_CUVID_HWACCEL 0
-%define CONFIG_VC1_D3D11VA_HWACCEL 0
-%define CONFIG_VC1_D3D11VA2_HWACCEL 0
-%define CONFIG_VC1_DXVA2_HWACCEL 0
-%define CONFIG_VC1_VAAPI_HWACCEL 0
-%define CONFIG_VC1_VDPAU_HWACCEL 0
-%define CONFIG_VC1_MMAL_HWACCEL 0
-%define CONFIG_VC1_QSV_HWACCEL 0
-%define CONFIG_VP8_CUVID_HWACCEL 0
-%define CONFIG_VP8_MEDIACODEC_HWACCEL 0
-%define CONFIG_VP8_QSV_HWACCEL 0
-%define CONFIG_VP9_CUVID_HWACCEL 0
-%define CONFIG_VP9_D3D11VA_HWACCEL 0
-%define CONFIG_VP9_D3D11VA2_HWACCEL 0
-%define CONFIG_VP9_DXVA2_HWACCEL 0
-%define CONFIG_VP9_MEDIACODEC_HWACCEL 0
-%define CONFIG_VP9_VAAPI_HWACCEL 0
-%define CONFIG_WMV3_D3D11VA_HWACCEL 0
-%define CONFIG_WMV3_D3D11VA2_HWACCEL 0
-%define CONFIG_WMV3_DXVA2_HWACCEL 0
-%define CONFIG_WMV3_VAAPI_HWACCEL 0
-%define CONFIG_WMV3_VDPAU_HWACCEL 0
-%define CONFIG_ALSA_INDEV 0
-%define CONFIG_AVFOUNDATION_INDEV 0
-%define CONFIG_BKTR_INDEV 0
-%define CONFIG_DECKLINK_INDEV 0
-%define CONFIG_LIBNDI_NEWTEK_INDEV 0
-%define CONFIG_DSHOW_INDEV 0
-%define CONFIG_FBDEV_INDEV 0
-%define CONFIG_GDIGRAB_INDEV 0
-%define CONFIG_IEC61883_INDEV 0
-%define CONFIG_JACK_INDEV 0
-%define CONFIG_KMSGRAB_INDEV 0
-%define CONFIG_LAVFI_INDEV 0
-%define CONFIG_OPENAL_INDEV 0
-%define CONFIG_OSS_INDEV 0
-%define CONFIG_PULSE_INDEV 0
-%define CONFIG_SNDIO_INDEV 0
-%define CONFIG_V4L2_INDEV 0
-%define CONFIG_VFWCAP_INDEV 0
-%define CONFIG_XCBGRAB_INDEV 0
-%define CONFIG_LIBCDIO_INDEV 0
-%define CONFIG_LIBDC1394_INDEV 0
+%define CONFIG_AFIFO_FILTER 0
+%define CONFIG_FIFO_FILTER 0
+%define CONFIG_AA_DEMUXER 0
+%define CONFIG_AAC_DEMUXER 0
+%define CONFIG_AC3_DEMUXER 0
+%define CONFIG_ACM_DEMUXER 0
+%define CONFIG_ACT_DEMUXER 0
+%define CONFIG_ADF_DEMUXER 0
+%define CONFIG_ADP_DEMUXER 0
+%define CONFIG_ADS_DEMUXER 0
+%define CONFIG_ADX_DEMUXER 0
+%define CONFIG_AEA_DEMUXER 0
+%define CONFIG_AFC_DEMUXER 0
+%define CONFIG_AIFF_DEMUXER 0
+%define CONFIG_AIX_DEMUXER 0
+%define CONFIG_AMR_DEMUXER 0
+%define CONFIG_AMRNB_DEMUXER 0
+%define CONFIG_AMRWB_DEMUXER 0
+%define CONFIG_ANM_DEMUXER 0
+%define CONFIG_APC_DEMUXER 0
+%define CONFIG_APE_DEMUXER 0
+%define CONFIG_APNG_DEMUXER 0
+%define CONFIG_APTX_DEMUXER 0
+%define CONFIG_APTX_HD_DEMUXER 0
+%define CONFIG_AQTITLE_DEMUXER 0
+%define CONFIG_ASF_DEMUXER 0
+%define CONFIG_ASF_O_DEMUXER 0
+%define CONFIG_ASS_DEMUXER 0
+%define CONFIG_AST_DEMUXER 0
+%define CONFIG_AU_DEMUXER 0
+%define CONFIG_AVI_DEMUXER 0
+%define CONFIG_AVISYNTH_DEMUXER 0
+%define CONFIG_AVR_DEMUXER 0
+%define CONFIG_AVS_DEMUXER 0
+%define CONFIG_AVS2_DEMUXER 0
+%define CONFIG_BETHSOFTVID_DEMUXER 0
+%define CONFIG_BFI_DEMUXER 0
+%define CONFIG_BINTEXT_DEMUXER 0
+%define CONFIG_BINK_DEMUXER 0
+%define CONFIG_BIT_DEMUXER 0
+%define CONFIG_BMV_DEMUXER 0
+%define CONFIG_BFSTM_DEMUXER 0
+%define CONFIG_BRSTM_DEMUXER 0
+%define CONFIG_BOA_DEMUXER 0
+%define CONFIG_C93_DEMUXER 0
+%define CONFIG_CAF_DEMUXER 0
+%define CONFIG_CAVSVIDEO_DEMUXER 0
+%define CONFIG_CDG_DEMUXER 0
+%define CONFIG_CDXL_DEMUXER 0
+%define CONFIG_CINE_DEMUXER 0
+%define CONFIG_CODEC2_DEMUXER 0
+%define CONFIG_CODEC2RAW_DEMUXER 0
+%define CONFIG_CONCAT_DEMUXER 0
+%define CONFIG_DASH_DEMUXER 0
+%define CONFIG_DATA_DEMUXER 0
+%define CONFIG_DAUD_DEMUXER 0
+%define CONFIG_DCSTR_DEMUXER 0
+%define CONFIG_DFA_DEMUXER 0
+%define CONFIG_DIRAC_DEMUXER 0
+%define CONFIG_DNXHD_DEMUXER 0
+%define CONFIG_DSF_DEMUXER 0
+%define CONFIG_DSICIN_DEMUXER 0
+%define CONFIG_DSS_DEMUXER 0
+%define CONFIG_DTS_DEMUXER 0
+%define CONFIG_DTSHD_DEMUXER 0
+%define CONFIG_DV_DEMUXER 0
+%define CONFIG_DVBSUB_DEMUXER 0
+%define CONFIG_DVBTXT_DEMUXER 0
+%define CONFIG_DXA_DEMUXER 0
+%define CONFIG_EA_DEMUXER 0
+%define CONFIG_EA_CDATA_DEMUXER 0
+%define CONFIG_EAC3_DEMUXER 0
+%define CONFIG_EPAF_DEMUXER 0
+%define CONFIG_FFMETADATA_DEMUXER 0
+%define CONFIG_FILMSTRIP_DEMUXER 0
+%define CONFIG_FITS_DEMUXER 0
+%define CONFIG_FLAC_DEMUXER 1
+%define CONFIG_FLIC_DEMUXER 0
+%define CONFIG_FLV_DEMUXER 0
+%define CONFIG_LIVE_FLV_DEMUXER 0
+%define CONFIG_FOURXM_DEMUXER 0
+%define CONFIG_FRM_DEMUXER 0
+%define CONFIG_FSB_DEMUXER 0
+%define CONFIG_G722_DEMUXER 0
+%define CONFIG_G723_1_DEMUXER 0
+%define CONFIG_G726_DEMUXER 0
+%define CONFIG_G726LE_DEMUXER 0
+%define CONFIG_G729_DEMUXER 0
+%define CONFIG_GDV_DEMUXER 0
+%define CONFIG_GENH_DEMUXER 0
+%define CONFIG_GIF_DEMUXER 0
+%define CONFIG_GSM_DEMUXER 0
+%define CONFIG_GXF_DEMUXER 0
+%define CONFIG_H261_DEMUXER 0
+%define CONFIG_H263_DEMUXER 0
+%define CONFIG_H264_DEMUXER 0
+%define CONFIG_HEVC_DEMUXER 0
+%define CONFIG_HLS_DEMUXER 0
+%define CONFIG_HNM_DEMUXER 0
+%define CONFIG_ICO_DEMUXER 0
+%define CONFIG_IDCIN_DEMUXER 0
+%define CONFIG_IDF_DEMUXER 0
+%define CONFIG_IFF_DEMUXER 0
+%define CONFIG_ILBC_DEMUXER 0
+%define CONFIG_IMAGE2_DEMUXER 0
+%define CONFIG_IMAGE2PIPE_DEMUXER 0
+%define CONFIG_IMAGE2_ALIAS_PIX_DEMUXER 0
+%define CONFIG_IMAGE2_BRENDER_PIX_DEMUXER 0
+%define CONFIG_INGENIENT_DEMUXER 0
+%define CONFIG_IPMOVIE_DEMUXER 0
+%define CONFIG_IRCAM_DEMUXER 0
+%define CONFIG_ISS_DEMUXER 0
+%define CONFIG_IV8_DEMUXER 0
+%define CONFIG_IVF_DEMUXER 0
+%define CONFIG_IVR_DEMUXER 0
+%define CONFIG_JACOSUB_DEMUXER 0
+%define CONFIG_JV_DEMUXER 0
+%define CONFIG_LMLM4_DEMUXER 0
+%define CONFIG_LOAS_DEMUXER 0
+%define CONFIG_LRC_DEMUXER 0
+%define CONFIG_LVF_DEMUXER 0
+%define CONFIG_LXF_DEMUXER 0
+%define CONFIG_M4V_DEMUXER 0
+%define CONFIG_MATROSKA_DEMUXER 1
+%define CONFIG_MGSTS_DEMUXER 0
+%define CONFIG_MICRODVD_DEMUXER 0
+%define CONFIG_MJPEG_DEMUXER 0
+%define CONFIG_MJPEG_2000_DEMUXER 0
+%define CONFIG_MLP_DEMUXER 0
+%define CONFIG_MLV_DEMUXER 0
+%define CONFIG_MM_DEMUXER 0
+%define CONFIG_MMF_DEMUXER 0
+%define CONFIG_MOV_DEMUXER 1
+%define CONFIG_MP3_DEMUXER 1
+%define CONFIG_MPC_DEMUXER 0
+%define CONFIG_MPC8_DEMUXER 0
+%define CONFIG_MPEGPS_DEMUXER 0
+%define CONFIG_MPEGTS_DEMUXER 0
+%define CONFIG_MPEGTSRAW_DEMUXER 0
+%define CONFIG_MPEGVIDEO_DEMUXER 0
+%define CONFIG_MPJPEG_DEMUXER 0
+%define CONFIG_MPL2_DEMUXER 0
+%define CONFIG_MPSUB_DEMUXER 0
+%define CONFIG_MSF_DEMUXER 0
+%define CONFIG_MSNWC_TCP_DEMUXER 0
+%define CONFIG_MTAF_DEMUXER 0
+%define CONFIG_MTV_DEMUXER 0
+%define CONFIG_MUSX_DEMUXER 0
+%define CONFIG_MV_DEMUXER 0
+%define CONFIG_MVI_DEMUXER 0
+%define CONFIG_MXF_DEMUXER 0
+%define CONFIG_MXG_DEMUXER 0
+%define CONFIG_NC_DEMUXER 0
+%define CONFIG_NISTSPHERE_DEMUXER 0
+%define CONFIG_NSP_DEMUXER 0
+%define CONFIG_NSV_DEMUXER 0
+%define CONFIG_NUT_DEMUXER 0
+%define CONFIG_NUV_DEMUXER 0
+%define CONFIG_OGG_DEMUXER 1
+%define CONFIG_OMA_DEMUXER 0
+%define CONFIG_PAF_DEMUXER 0
+%define CONFIG_PCM_ALAW_DEMUXER 0
+%define CONFIG_PCM_MULAW_DEMUXER 0
+%define CONFIG_PCM_F64BE_DEMUXER 0
+%define CONFIG_PCM_F64LE_DEMUXER 0
+%define CONFIG_PCM_F32BE_DEMUXER 0
+%define CONFIG_PCM_F32LE_DEMUXER 0
+%define CONFIG_PCM_S32BE_DEMUXER 0
+%define CONFIG_PCM_S32LE_DEMUXER 0
+%define CONFIG_PCM_S24BE_DEMUXER 0
+%define CONFIG_PCM_S24LE_DEMUXER 0
+%define CONFIG_PCM_S16BE_DEMUXER 0
+%define CONFIG_PCM_S16LE_DEMUXER 0
+%define CONFIG_PCM_S8_DEMUXER 0
+%define CONFIG_PCM_U32BE_DEMUXER 0
+%define CONFIG_PCM_U32LE_DEMUXER 0
+%define CONFIG_PCM_U24BE_DEMUXER 0
+%define CONFIG_PCM_U24LE_DEMUXER 0
+%define CONFIG_PCM_U16BE_DEMUXER 0
+%define CONFIG_PCM_U16LE_DEMUXER 0
+%define CONFIG_PCM_U8_DEMUXER 0
+%define CONFIG_PJS_DEMUXER 0
+%define CONFIG_PMP_DEMUXER 0
+%define CONFIG_PVA_DEMUXER 0
+%define CONFIG_PVF_DEMUXER 0
+%define CONFIG_QCP_DEMUXER 0
+%define CONFIG_R3D_DEMUXER 0
+%define CONFIG_RAWVIDEO_DEMUXER 0
+%define CONFIG_REALTEXT_DEMUXER 0
+%define CONFIG_REDSPARK_DEMUXER 0
+%define CONFIG_RL2_DEMUXER 0
+%define CONFIG_RM_DEMUXER 0
+%define CONFIG_ROQ_DEMUXER 0
+%define CONFIG_RPL_DEMUXER 0
+%define CONFIG_RSD_DEMUXER 0
+%define CONFIG_RSO_DEMUXER 0
+%define CONFIG_RTP_DEMUXER 0
+%define CONFIG_RTSP_DEMUXER 0
+%define CONFIG_S337M_DEMUXER 0
+%define CONFIG_SAMI_DEMUXER 0
+%define CONFIG_SAP_DEMUXER 0
+%define CONFIG_SBC_DEMUXER 0
+%define CONFIG_SBG_DEMUXER 0
+%define CONFIG_SCC_DEMUXER 0
+%define CONFIG_SDP_DEMUXER 0
+%define CONFIG_SDR2_DEMUXER 0
+%define CONFIG_SDS_DEMUXER 0
+%define CONFIG_SDX_DEMUXER 0
+%define CONFIG_SEGAFILM_DEMUXER 0
+%define CONFIG_SER_DEMUXER 0
+%define CONFIG_SHORTEN_DEMUXER 0
+%define CONFIG_SIFF_DEMUXER 0
+%define CONFIG_SLN_DEMUXER 0
+%define CONFIG_SMACKER_DEMUXER 0
+%define CONFIG_SMJPEG_DEMUXER 0
+%define CONFIG_SMUSH_DEMUXER 0
+%define CONFIG_SOL_DEMUXER 0
+%define CONFIG_SOX_DEMUXER 0
+%define CONFIG_SPDIF_DEMUXER 0
+%define CONFIG_SRT_DEMUXER 0
+%define CONFIG_STR_DEMUXER 0
+%define CONFIG_STL_DEMUXER 0
+%define CONFIG_SUBVIEWER1_DEMUXER 0
+%define CONFIG_SUBVIEWER_DEMUXER 0
+%define CONFIG_SUP_DEMUXER 0
+%define CONFIG_SVAG_DEMUXER 0
+%define CONFIG_SWF_DEMUXER 0
+%define CONFIG_TAK_DEMUXER 0
+%define CONFIG_TEDCAPTIONS_DEMUXER 0
+%define CONFIG_THP_DEMUXER 0
+%define CONFIG_THREEDOSTR_DEMUXER 0
+%define CONFIG_TIERTEXSEQ_DEMUXER 0
+%define CONFIG_TMV_DEMUXER 0
+%define CONFIG_TRUEHD_DEMUXER 0
+%define CONFIG_TTA_DEMUXER 0
+%define CONFIG_TXD_DEMUXER 0
+%define CONFIG_TTY_DEMUXER 0
+%define CONFIG_TY_DEMUXER 0
+%define CONFIG_V210_DEMUXER 0
+%define CONFIG_V210X_DEMUXER 0
+%define CONFIG_VAG_DEMUXER 0
+%define CONFIG_VC1_DEMUXER 0
+%define CONFIG_VC1T_DEMUXER 0
+%define CONFIG_VIVO_DEMUXER 0
+%define CONFIG_VMD_DEMUXER 0
+%define CONFIG_VOBSUB_DEMUXER 0
+%define CONFIG_VOC_DEMUXER 0
+%define CONFIG_VPK_DEMUXER 0
+%define CONFIG_VPLAYER_DEMUXER 0
+%define CONFIG_VQF_DEMUXER 0
+%define CONFIG_W64_DEMUXER 0
+%define CONFIG_WAV_DEMUXER 1
+%define CONFIG_WC3_DEMUXER 0
+%define CONFIG_WEBM_DASH_MANIFEST_DEMUXER 0
+%define CONFIG_WEBVTT_DEMUXER 0
+%define CONFIG_WSAUD_DEMUXER 0
+%define CONFIG_WSD_DEMUXER 0
+%define CONFIG_WSVQA_DEMUXER 0
+%define CONFIG_WTV_DEMUXER 0
+%define CONFIG_WVE_DEMUXER 0
+%define CONFIG_WV_DEMUXER 0
+%define CONFIG_XA_DEMUXER 0
+%define CONFIG_XBIN_DEMUXER 0
+%define CONFIG_XMV_DEMUXER 0
+%define CONFIG_XVAG_DEMUXER 0
+%define CONFIG_XWMA_DEMUXER 0
+%define CONFIG_YOP_DEMUXER 0
+%define CONFIG_YUV4MPEGPIPE_DEMUXER 0
+%define CONFIG_IMAGE_BMP_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_DDS_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_DPX_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_EXR_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_J2K_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_JPEG_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_JPEGLS_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_PAM_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_PBM_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_PCX_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_PGMYUV_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_PGM_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_PICTOR_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_PNG_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_PPM_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_PSD_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_QDRAW_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_SGI_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_SVG_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_SUNRAST_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_TIFF_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_WEBP_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_XPM_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_XWD_PIPE_DEMUXER 0
+%define CONFIG_LIBGME_DEMUXER 0
+%define CONFIG_LIBMODPLUG_DEMUXER 0
+%define CONFIG_LIBOPENMPT_DEMUXER 0
+%define CONFIG_VAPOURSYNTH_DEMUXER 0
 %define CONFIG_A64_MUXER 0
 %define CONFIG_AC3_MUXER 0
 %define CONFIG_ADTS_MUXER 0
@@ -2090,6 +2237,8 @@
 %define CONFIG_AIFF_MUXER 0
 %define CONFIG_AMR_MUXER 0
 %define CONFIG_APNG_MUXER 0
+%define CONFIG_APTX_MUXER 0
+%define CONFIG_APTX_HD_MUXER 0
 %define CONFIG_ASF_MUXER 0
 %define CONFIG_ASS_MUXER 0
 %define CONFIG_AST_MUXER 0
@@ -2097,9 +2246,12 @@
 %define CONFIG_AU_MUXER 0
 %define CONFIG_AVI_MUXER 0
 %define CONFIG_AVM2_MUXER 0
+%define CONFIG_AVS2_MUXER 0
 %define CONFIG_BIT_MUXER 0
 %define CONFIG_CAF_MUXER 0
 %define CONFIG_CAVSVIDEO_MUXER 0
+%define CONFIG_CODEC2_MUXER 0
+%define CONFIG_CODEC2RAW_MUXER 0
 %define CONFIG_CRC_MUXER 0
 %define CONFIG_DASH_MUXER 0
 %define CONFIG_DATA_MUXER 0
@@ -2110,9 +2262,9 @@
 %define CONFIG_DV_MUXER 0
 %define CONFIG_EAC3_MUXER 0
 %define CONFIG_F4V_MUXER 0
-%define CONFIG_FFM_MUXER 0
 %define CONFIG_FFMETADATA_MUXER 0
 %define CONFIG_FIFO_MUXER 0
+%define CONFIG_FIFO_TEST_MUXER 0
 %define CONFIG_FILMSTRIP_MUXER 0
 %define CONFIG_FITS_MUXER 0
 %define CONFIG_FLAC_MUXER 0
@@ -2205,7 +2357,9 @@
 %define CONFIG_RTP_MPEGTS_MUXER 0
 %define CONFIG_RTSP_MUXER 0
 %define CONFIG_SAP_MUXER 0
+%define CONFIG_SBC_MUXER 0
 %define CONFIG_SCC_MUXER 0
+%define CONFIG_SEGAFILM_MUXER 0
 %define CONFIG_SEGMENT_MUXER 0
 %define CONFIG_STREAM_SEGMENT_MUXER 0
 %define CONFIG_SINGLEJPEG_MUXER 0
@@ -2238,58 +2392,6 @@
 %define CONFIG_WV_MUXER 0
 %define CONFIG_YUV4MPEGPIPE_MUXER 0
 %define CONFIG_CHROMAPRINT_MUXER 0
-%define CONFIG_ALSA_OUTDEV 0
-%define CONFIG_CACA_OUTDEV 0
-%define CONFIG_DECKLINK_OUTDEV 0
-%define CONFIG_LIBNDI_NEWTEK_OUTDEV 0
-%define CONFIG_FBDEV_OUTDEV 0
-%define CONFIG_OPENGL_OUTDEV 0
-%define CONFIG_OSS_OUTDEV 0
-%define CONFIG_PULSE_OUTDEV 0
-%define CONFIG_SDL2_OUTDEV 0
-%define CONFIG_SNDIO_OUTDEV 0
-%define CONFIG_V4L2_OUTDEV 0
-%define CONFIG_XV_OUTDEV 0
-%define CONFIG_AAC_PARSER 0
-%define CONFIG_AAC_LATM_PARSER 0
-%define CONFIG_AC3_PARSER 0
-%define CONFIG_ADX_PARSER 0
-%define CONFIG_BMP_PARSER 0
-%define CONFIG_CAVSVIDEO_PARSER 0
-%define CONFIG_COOK_PARSER 0
-%define CONFIG_DCA_PARSER 0
-%define CONFIG_DIRAC_PARSER 0
-%define CONFIG_DNXHD_PARSER 0
-%define CONFIG_DPX_PARSER 0
-%define CONFIG_DVAUDIO_PARSER 0
-%define CONFIG_DVBSUB_PARSER 0
-%define CONFIG_DVDSUB_PARSER 0
-%define CONFIG_DVD_NAV_PARSER 0
-%define CONFIG_FLAC_PARSER 1
-%define CONFIG_G729_PARSER 0
-%define CONFIG_GSM_PARSER 0
-%define CONFIG_H261_PARSER 0
-%define CONFIG_H263_PARSER 0
-%define CONFIG_H264_PARSER 0
-%define CONFIG_HEVC_PARSER 0
-%define CONFIG_MJPEG_PARSER 0
-%define CONFIG_MLP_PARSER 0
-%define CONFIG_MPEG4VIDEO_PARSER 0
-%define CONFIG_MPEGAUDIO_PARSER 0
-%define CONFIG_MPEGVIDEO_PARSER 0
-%define CONFIG_OPUS_PARSER 1
-%define CONFIG_PNG_PARSER 0
-%define CONFIG_PNM_PARSER 0
-%define CONFIG_RV30_PARSER 0
-%define CONFIG_RV40_PARSER 0
-%define CONFIG_SIPR_PARSER 0
-%define CONFIG_TAK_PARSER 0
-%define CONFIG_VC1_PARSER 0
-%define CONFIG_VORBIS_PARSER 1
-%define CONFIG_VP3_PARSER 1
-%define CONFIG_VP8_PARSER 1
-%define CONFIG_VP9_PARSER 1
-%define CONFIG_XMA_PARSER 0
 %define CONFIG_ASYNC_PROTOCOL 0
 %define CONFIG_BLURAY_PROTOCOL 0
 %define CONFIG_CACHE_PROTOCOL 0
@@ -2323,10 +2425,7 @@
 %define CONFIG_SUBFILE_PROTOCOL 0
 %define CONFIG_TEE_PROTOCOL 0
 %define CONFIG_TCP_PROTOCOL 0
-%define CONFIG_TLS_GNUTLS_PROTOCOL 0
-%define CONFIG_TLS_SCHANNEL_PROTOCOL 0
-%define CONFIG_TLS_SECURETRANSPORT_PROTOCOL 0
-%define CONFIG_TLS_OPENSSL_PROTOCOL 0
+%define CONFIG_TLS_PROTOCOL 0
 %define CONFIG_UDP_PROTOCOL 0
 %define CONFIG_UDPLITE_PROTOCOL 0
 %define CONFIG_UNIX_PROTOCOL 0
@@ -2335,5 +2434,6 @@
 %define CONFIG_LIBRTMPS_PROTOCOL 0
 %define CONFIG_LIBRTMPT_PROTOCOL 0
 %define CONFIG_LIBRTMPTE_PROTOCOL 0
+%define CONFIG_LIBSRT_PROTOCOL 0
 %define CONFIG_LIBSSH_PROTOCOL 0
 %define CONFIG_LIBSMBCLIENT_PROTOCOL 0

diff --git a/fuchsia/config/default/x64/config.h b/fuchsia/config/default/x64/config.h
index 415b035..aa9528f 100644
--- a/fuchsia/config/default/x64/config.h
+++ b/fuchsia/config/default/x64/config.h

@@ -1,12 +1,12 @@
 /* Automatically generated by configure - do not modify! */
 #ifndef FFMPEG_CONFIG_H
 #define FFMPEG_CONFIG_H
-#define FFMPEG_CONFIGURATION "--disable-everything --disable-all --disable-doc --disable-htmlpages --disable-manpages --disable-podpages --disable-txtpages --disable-static --enable-avcodec --enable-avformat --enable-avutil --enable-fft --enable-rdft --enable-static --disable-bzlib --disable-iconv --disable-lzo --disable-network --disable-schannel --disable-sdl2 --disable-symver --disable-xlib --disable-zlib --disable-securetransport --disable-faan --disable-alsa --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vda --disable-vdpau --disable-videotoolbox --disable-nvenc --disable-cuda --disable-cuvid --disable-v4l2_m2m --enable-decoder='vorbis,flac' --enable-decoder='pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le' --enable-decoder='pcm_s16be,pcm_s24be,pcm_mulaw,pcm_alaw' --enable-decoder='theora,vp8' --enable-demuxer='ogg,matroska,wav,flac' --enable-parser='opus,vorbis,flac' --enable-parser='vp3,vp8' --optflags='\"-O2\"' --enable-pic --enable-pic --enable-lto --cc=clang --cxx=clang++ --ld=clang --extra-ldflags='-fuse-ld=lld' --enable-decoder=vp9 --enable-parser=vp9 --sysroot=/usr/local/google/home/phosek/fuchsia/third_party/ffmpeg/../../buildtools/linux-x64/sysroot --disable-error-resilience"
+#define FFMPEG_CONFIGURATION "--disable-everything --disable-all --disable-doc --disable-htmlpages --disable-manpages --disable-podpages --disable-txtpages --disable-static --enable-avcodec --enable-avformat --enable-avutil --enable-fft --enable-rdft --enable-static --disable-debug --disable-bzlib --disable-iconv --disable-lzo --disable-network --disable-schannel --disable-sdl2 --disable-symver --disable-xlib --disable-zlib --disable-securetransport --disable-faan --disable-alsa --disable-autodetect --enable-decoder='vorbis,flac' --enable-decoder='pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le,mp3' --enable-decoder='pcm_s16be,pcm_s24be,pcm_mulaw,pcm_alaw' --enable-decoder='theora,vp8,sbc,aptx' --enable-demuxer='ogg,matroska,wav,flac,mp3,mov' --enable-parser='opus,vorbis,flac,mpegaudio' --enable-parser='vp3,vp8' --optflags='\"-O2\"' --enable-pic --x86asmexe=yasm --enable-pic --enable-lto --cc=clang --cxx=clang++ --ld=clang --enable-decoder=vp9 --enable-parser=vp9 --sysroot=/usr/local/google/home/dalesat/fuchsia/third_party/ffmpeg/../../buildtools/linux-x64/sysroot --extra-ldflags='-fuse-ld=lld' --disable-error-resilience"
 #define FFMPEG_LICENSE "LGPL version 2.1 or later"
 #define CONFIG_THIS_YEAR 2018
 #define FFMPEG_DATADIR "/usr/local/share/ffmpeg"
 #define AVCONV_DATADIR "/usr/local/share/ffmpeg"
-#define CC_IDENT "Fuchsia clang version 7.0.0 (https://fuchsia.googlesource.com/a/third_party/clang 5034f5fddab316b12887b39b129ebbca999500e2) (https://fuchsia.googlesource.com/a/third_party/llvm 197b6c81959a17be37035d4fe71b382023bff2f0) (based on LLVM 7.0.0svn)"
+#define CC_IDENT "Fuchsia clang version 8.0.0 (https://fuchsia.googlesource.com/a/third_party/clang 0a217961416a0cbf1ac29bcb26577d41ca0e0e8d) (https://fuchsia.googlesource.com/a/third_party/llvm aff6cf491087ba32e338c9af076c9b7739c978a0) (based on LLVM 8.0.0svn)"
 #define av_restrict restrict
 #define EXTERN_PREFIX ""
 #define EXTERN_ASM 
@@ -57,6 +57,7 @@
 #define HAVE_AMD3DNOWEXT 1
 #define HAVE_AVX 1
 #define HAVE_AVX2 1
+#define HAVE_AVX512 1
 #define HAVE_FMA3 1
 #define HAVE_FMA4 1
 #define HAVE_MMX 1
@@ -101,6 +102,7 @@
 #define HAVE_AMD3DNOWEXT_EXTERNAL 1
 #define HAVE_AVX_EXTERNAL 1
 #define HAVE_AVX2_EXTERNAL 1
+#define HAVE_AVX512_EXTERNAL 0
 #define HAVE_FMA3_EXTERNAL 1
 #define HAVE_FMA4_EXTERNAL 1
 #define HAVE_MMX_EXTERNAL 1
@@ -145,6 +147,7 @@
 #define HAVE_AMD3DNOWEXT_INLINE 1
 #define HAVE_AVX_INLINE 1
 #define HAVE_AVX2_INLINE 1
+#define HAVE_AVX512_INLINE 1
 #define HAVE_FMA3_INLINE 1
 #define HAVE_FMA4_INLINE 1
 #define HAVE_MMX_INLINE 1
@@ -174,36 +177,29 @@
 #define HAVE_FAST_64BIT 1
 #define HAVE_FAST_CLZ 1
 #define HAVE_FAST_CMOV 1
-#define HAVE_LOCAL_ALIGNED_8 1
-#define HAVE_LOCAL_ALIGNED_16 1
-#define HAVE_LOCAL_ALIGNED_32 1
+#define HAVE_LOCAL_ALIGNED 1
 #define HAVE_SIMD_ALIGN_16 1
 #define HAVE_SIMD_ALIGN_32 1
-#define HAVE_ATOMICS_GCC 1
-#define HAVE_ATOMICS_SUNCC 0
-#define HAVE_ATOMICS_WIN32 0
+#define HAVE_SIMD_ALIGN_64 1
 #define HAVE_ATOMIC_CAS_PTR 0
 #define HAVE_MACHINE_RW_BARRIER 0
 #define HAVE_MEMORYBARRIER 0
 #define HAVE_MM_EMPTY 1
 #define HAVE_RDTSC 0
-#define HAVE_SARESTART 1
 #define HAVE_SEM_TIMEDWAIT 1
 #define HAVE_SYNC_VAL_COMPARE_AND_SWAP 1
-#define HAVE_CABS 1
-#define HAVE_CEXP 1
+#define HAVE_CABS 0
+#define HAVE_CEXP 0
 #define HAVE_INLINE_ASM 1
 #define HAVE_SYMVER 0
 #define HAVE_X86ASM 1
 #define HAVE_BIGENDIAN 0
 #define HAVE_FAST_UNALIGNED 1
-#define HAVE_ALTIVEC_H 0
 #define HAVE_ARPA_INET_H 0
 #define HAVE_ASM_TYPES_H 1
 #define HAVE_CDIO_PARANOIA_H 0
 #define HAVE_CDIO_PARANOIA_PARANOIA_H 0
 #define HAVE_CUDA_H 0
-#define HAVE_D3D11_H 0
 #define HAVE_DISPATCH_DISPATCH_H 0
 #define HAVE_DEV_BKTR_IOCTL_BT848_H 0
 #define HAVE_DEV_BKTR_IOCTL_METEOR_H 0
@@ -212,27 +208,18 @@
 #define HAVE_DEV_VIDEO_METEOR_IOCTL_METEOR_H 0
 #define HAVE_DIRECT_H 0
 #define HAVE_DIRENT_H 1
-#define HAVE_DLFCN_H 1
 #define HAVE_DXGIDEBUG_H 0
 #define HAVE_DXVA_H 0
 #define HAVE_ES2_GL_H 0
 #define HAVE_GSM_H 0
 #define HAVE_IO_H 0
-#define HAVE_MACH_MACH_TIME_H 0
+#define HAVE_LINUX_PERF_EVENT_H 1
 #define HAVE_MACHINE_IOCTL_BT848_H 0
 #define HAVE_MACHINE_IOCTL_METEOR_H 0
 #define HAVE_MALLOC_H 1
 #define HAVE_OPENCV2_CORE_CORE_C_H 0
-#define HAVE_OPENJPEG_2_3_OPENJPEG_H 0
-#define HAVE_OPENJPEG_2_2_OPENJPEG_H 0
-#define HAVE_OPENJPEG_2_1_OPENJPEG_H 0
-#define HAVE_OPENJPEG_2_0_OPENJPEG_H 0
-#define HAVE_OPENJPEG_1_5_OPENJPEG_H 0
 #define HAVE_OPENGL_GL3_H 0
 #define HAVE_POLL_H 1
-#define HAVE_SOUNDCARD_H 0
-#define HAVE_STDATOMIC_H 1
-#define HAVE_SYS_MMAN_H 1
 #define HAVE_SYS_PARAM_H 1
 #define HAVE_SYS_RESOURCE_H 1
 #define HAVE_SYS_SELECT_H 1
@@ -276,17 +263,20 @@
 #define HAVE_SINF 1
 #define HAVE_TRUNC 1
 #define HAVE_TRUNCF 1
+#define HAVE_DOS_PATHS 0
+#define HAVE_LIBC_MSVCRT 0
+#define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
+#define HAVE_SECTION_DATA_REL_RO 1
+#define HAVE_THREADS 1
+#define HAVE_UWP 0
+#define HAVE_WINRT 0
 #define HAVE_ACCESS 1
 #define HAVE_ALIGNED_MALLOC 0
 #define HAVE_ARC4RANDOM 0
 #define HAVE_CLOCK_GETTIME 1
 #define HAVE_CLOSESOCKET 0
 #define HAVE_COMMANDLINETOARGVW 0
-#define HAVE_COTASKMEMFREE 0
-#define HAVE_CRYPTGENRANDOM 0
 #define HAVE_FCNTL 1
-#define HAVE_FLT_LIM 1
-#define HAVE_FORK 1
 #define HAVE_GETADDRINFO 0
 #define HAVE_GETHRTIME 0
 #define HAVE_GETOPT 1
@@ -301,9 +291,7 @@
 #define HAVE_GMTIME_R 1
 #define HAVE_INET_ATON 0
 #define HAVE_ISATTY 1
-#define HAVE_JACK_PORT_GET_LATENCY_RANGE 0
 #define HAVE_KBHIT 0
-#define HAVE_LOADLIBRARY 0
 #define HAVE_LOCALTIME_R 1
 #define HAVE_LSTAT 1
 #define HAVE_LZO1X_999_COMPRESS 0
@@ -318,6 +306,7 @@
 #define HAVE_POSIX_MEMALIGN 1
 #define HAVE_PTHREAD_CANCEL 1
 #define HAVE_SCHED_GETAFFINITY 1
+#define HAVE_SECITEMIMPORT 0
 #define HAVE_SETCONSOLETEXTATTRIBUTE 0
 #define HAVE_SETCONSOLECTRLHANDLER 0
 #define HAVE_SETMODE 0
@@ -330,16 +319,19 @@
 #define HAVE_UTGETOSTYPEFROMSTRING 0
 #define HAVE_VIRTUALALLOC 0
 #define HAVE_WGLGETPROCADDRESS 0
+#define HAVE_BCRYPT 0
+#define HAVE_VAAPI_DRM 0
+#define HAVE_VAAPI_X11 0
+#define HAVE_VDPAU_X11 0
 #define HAVE_PTHREADS 1
 #define HAVE_OS2THREADS 0
 #define HAVE_W32THREADS 0
+#define HAVE_AS_ARCH_DIRECTIVE 0
 #define HAVE_AS_DN_DIRECTIVE 0
 #define HAVE_AS_FPU_DIRECTIVE 0
 #define HAVE_AS_FUNC 0
 #define HAVE_AS_OBJECT_ARCH 0
 #define HAVE_ASM_MOD_Q 0
-#define HAVE_ATTRIBUTE_MAY_ALIAS 1
-#define HAVE_ATTRIBUTE_PACKED 1
 #define HAVE_BLOCKS_EXTENSION 0
 #define HAVE_EBP_AVAILABLE 1
 #define HAVE_EBX_AVAILABLE 1
@@ -356,7 +348,6 @@
 #define HAVE_VFP_ARGS 0
 #define HAVE_XFORM_ASM 0
 #define HAVE_XMM_CLOBBERS 1
-#define HAVE_CONDITION_VARIABLE_PTR 0
 #define HAVE_KCMVIDEOCODECTYPE_HEVC 0
 #define HAVE_SOCKLEN_T 0
 #define HAVE_STRUCT_ADDRINFO 0
@@ -372,22 +363,17 @@
 #define HAVE_STRUCT_SOCKADDR_STORAGE 0
 #define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1
 #define HAVE_STRUCT_V4L2_FRMIVALENUM_DISCRETE 1
-#define HAVE_ATOMICS_NATIVE 1
-#define HAVE_DOS_PATHS 0
-#define HAVE_LIBC_MSVCRT 0
 #define HAVE_MAKEINFO 1
 #define HAVE_MAKEINFO_HTML 1
-#define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
+#define HAVE_OPENCL_D3D11 0
+#define HAVE_OPENCL_DRM_ARM 0
+#define HAVE_OPENCL_DRM_BEIGNET 0
+#define HAVE_OPENCL_DXVA2 0
+#define HAVE_OPENCL_VAAPI_BEIGNET 0
+#define HAVE_OPENCL_VAAPI_INTEL_MEDIA 0
 #define HAVE_PERL 1
 #define HAVE_POD2MAN 1
-#define HAVE_SECTION_DATA_REL_RO 1
 #define HAVE_TEXI2HTML 0
-#define HAVE_THREADS 1
-#define HAVE_UWP 0
-#define HAVE_VAAPI_DRM 0
-#define HAVE_VAAPI_X11 0
-#define HAVE_VDPAU_X11 0
-#define HAVE_WINRT 0
 #define CONFIG_DOC 0
 #define CONFIG_HTMLPAGES 0
 #define CONFIG_MANPAGES 0
@@ -414,41 +400,31 @@
 #define CONFIG_SCALING_VIDEO_EXAMPLE 0
 #define CONFIG_TRANSCODE_AAC_EXAMPLE 0
 #define CONFIG_TRANSCODING_EXAMPLE 0
-#define CONFIG_ALSA 0
-#define CONFIG_APPKIT 0
-#define CONFIG_AVFOUNDATION 0
-#define CONFIG_BZLIB 0
-#define CONFIG_COREIMAGE 0
-#define CONFIG_ICONV 0
-#define CONFIG_JACK 0
-#define CONFIG_LIBXCB 1
-#define CONFIG_LIBXCB_SHM 1
-#define CONFIG_LIBXCB_SHAPE 0
-#define CONFIG_LIBXCB_XFIXES 0
-#define CONFIG_LZMA 1
-#define CONFIG_SCHANNEL 0
-#define CONFIG_SDL2 0
-#define CONFIG_SECURETRANSPORT 0
-#define CONFIG_SNDIO 0
-#define CONFIG_XLIB 0
-#define CONFIG_ZLIB 0
+#define CONFIG_VAAPI_ENCODE_EXAMPLE 0
+#define CONFIG_VAAPI_TRANSCODE_EXAMPLE 0
 #define CONFIG_AVISYNTH 0
 #define CONFIG_FREI0R 0
 #define CONFIG_LIBCDIO 0
+#define CONFIG_LIBDAVS2 0
 #define CONFIG_LIBRUBBERBAND 0
 #define CONFIG_LIBVIDSTAB 0
 #define CONFIG_LIBX264 0
 #define CONFIG_LIBX265 0
 #define CONFIG_LIBXAVS 0
+#define CONFIG_LIBXAVS2 0
 #define CONFIG_LIBXVID 0
 #define CONFIG_DECKLINK 0
 #define CONFIG_LIBNDI_NEWTEK 0
 #define CONFIG_LIBFDK_AAC 0
 #define CONFIG_OPENSSL 0
+#define CONFIG_LIBTLS 0
 #define CONFIG_GMP 0
+#define CONFIG_LIBLENSFUN 0
 #define CONFIG_LIBOPENCORE_AMRNB 0
 #define CONFIG_LIBOPENCORE_AMRWB 0
+#define CONFIG_LIBVMAF 0
 #define CONFIG_LIBVO_AMRWBENC 0
+#define CONFIG_MBEDTLS 0
 #define CONFIG_RKMPP 0
 #define CONFIG_LIBSMBCLIENT 0
 #define CONFIG_CHROMAPRINT 0
@@ -456,11 +432,13 @@
 #define CONFIG_GNUTLS 0
 #define CONFIG_JNI 0
 #define CONFIG_LADSPA 0
+#define CONFIG_LIBAOM 0
 #define CONFIG_LIBASS 0
 #define CONFIG_LIBBLURAY 0
 #define CONFIG_LIBBS2B 0
 #define CONFIG_LIBCACA 0
 #define CONFIG_LIBCELT 0
+#define CONFIG_LIBCODEC2 0
 #define CONFIG_LIBDC1394 0
 #define CONFIG_LIBDRM 0
 #define CONFIG_LIBFLITE 0
@@ -471,6 +449,8 @@
 #define CONFIG_LIBGSM 0
 #define CONFIG_LIBIEC61883 0
 #define CONFIG_LIBILBC 0
+#define CONFIG_LIBJACK 0
+#define CONFIG_LIBKLVANC 0
 #define CONFIG_LIBKVAZAAR 0
 #define CONFIG_LIBMODPLUG 0
 #define CONFIG_LIBMP3LAME 0
@@ -488,12 +468,13 @@
 #define CONFIG_LIBSNAPPY 0
 #define CONFIG_LIBSOXR 0
 #define CONFIG_LIBSPEEX 0
+#define CONFIG_LIBSRT 0
 #define CONFIG_LIBSSH 0
+#define CONFIG_LIBTENSORFLOW 0
 #define CONFIG_LIBTESSERACT 0
 #define CONFIG_LIBTHEORA 0
 #define CONFIG_LIBTWOLAME 0
 #define CONFIG_LIBV4L2 0
-#define CONFIG_LIBVMAF 0
 #define CONFIG_LIBVORBIS 0
 #define CONFIG_LIBVPX 0
 #define CONFIG_LIBWAVPACK 0
@@ -502,28 +483,49 @@
 #define CONFIG_LIBZIMG 0
 #define CONFIG_LIBZMQ 0
 #define CONFIG_LIBZVBI 0
+#define CONFIG_LV2 0
 #define CONFIG_MEDIACODEC 0
 #define CONFIG_OPENAL 0
-#define CONFIG_OPENCL 0
 #define CONFIG_OPENGL 0
+#define CONFIG_VAPOURSYNTH 0
+#define CONFIG_ALSA 1
+#define CONFIG_APPKIT 0
+#define CONFIG_AVFOUNDATION 0
+#define CONFIG_BZLIB 0
+#define CONFIG_COREIMAGE 0
+#define CONFIG_ICONV 0
+#define CONFIG_LIBXCB 0
+#define CONFIG_LIBXCB_SHM 0
+#define CONFIG_LIBXCB_SHAPE 0
+#define CONFIG_LIBXCB_XFIXES 0
+#define CONFIG_LZMA 0
+#define CONFIG_SCHANNEL 0
+#define CONFIG_SDL2 0
+#define CONFIG_SECURETRANSPORT 0
+#define CONFIG_SNDIO 0
+#define CONFIG_XLIB 0
+#define CONFIG_ZLIB 0
+#define CONFIG_CUDA_SDK 0
+#define CONFIG_LIBNPP 0
+#define CONFIG_LIBMFX 0
+#define CONFIG_MMAL 0
+#define CONFIG_OMX 0
+#define CONFIG_OPENCL 0
+#define CONFIG_AMF 0
 #define CONFIG_AUDIOTOOLBOX 0
 #define CONFIG_CRYSTALHD 0
 #define CONFIG_CUDA 0
 #define CONFIG_CUVID 0
 #define CONFIG_D3D11VA 0
 #define CONFIG_DXVA2 0
+#define CONFIG_FFNVCODEC 0
+#define CONFIG_NVDEC 0
 #define CONFIG_NVENC 0
 #define CONFIG_VAAPI 0
-#define CONFIG_VDA 0
 #define CONFIG_VDPAU 0
 #define CONFIG_VIDEOTOOLBOX 0
 #define CONFIG_V4L2_M2M 0
 #define CONFIG_XVMC 0
-#define CONFIG_CUDA_SDK 0
-#define CONFIG_LIBNPP 0
-#define CONFIG_LIBMFX 0
-#define CONFIG_MMAL 0
-#define CONFIG_OMX 0
 #define CONFIG_FTRAPV 0
 #define CONFIG_GRAY 0
 #define CONFIG_HARDCODED_TABLES 0
@@ -537,20 +539,19 @@
 #define CONFIG_GPL 0
 #define CONFIG_NONFREE 0
 #define CONFIG_VERSION3 0
-#define CONFIG_AVCODEC 1
 #define CONFIG_AVDEVICE 0
 #define CONFIG_AVFILTER 0
+#define CONFIG_SWSCALE 0
+#define CONFIG_POSTPROC 0
 #define CONFIG_AVFORMAT 1
+#define CONFIG_AVCODEC 1
+#define CONFIG_SWRESAMPLE 0
 #define CONFIG_AVRESAMPLE 0
 #define CONFIG_AVUTIL 1
-#define CONFIG_POSTPROC 0
-#define CONFIG_SWRESAMPLE 0
-#define CONFIG_SWSCALE 0
 #define CONFIG_FFPLAY 0
 #define CONFIG_FFPROBE 0
-#define CONFIG_FFSERVER 0
 #define CONFIG_FFMPEG 0
-#define CONFIG_DCT 0
+#define CONFIG_DCT 1
 #define CONFIG_DWT 0
 #define CONFIG_ERROR_RESILIENCE 0
 #define CONFIG_FAAN 0
@@ -585,12 +586,21 @@
 #define CONFIG_PROTOCOLS 0
 #define CONFIG_AANDCTTABLES 0
 #define CONFIG_AC3DSP 0
-#define CONFIG_AUDIO_FRAME_QUEUE 0
+#define CONFIG_ADTS_HEADER 0
+#define CONFIG_AUDIO_FRAME_QUEUE 1
 #define CONFIG_AUDIODSP 0
 #define CONFIG_BLOCKDSP 0
 #define CONFIG_BSWAPDSP 0
 #define CONFIG_CABAC 0
+#define CONFIG_CBS 0
+#define CONFIG_CBS_AV1 0
+#define CONFIG_CBS_H264 0
+#define CONFIG_CBS_H265 0
+#define CONFIG_CBS_JPEG 0
+#define CONFIG_CBS_MPEG2 0
+#define CONFIG_CBS_VP9 0
 #define CONFIG_DIRAC_PARSE 1
+#define CONFIG_DNN 0
 #define CONFIG_DVPROFILE 0
 #define CONFIG_EXIF 0
 #define CONFIG_FAANDCT 0
@@ -629,9 +639,9 @@
 #define CONFIG_LZF 0
 #define CONFIG_ME_CMP 0
 #define CONFIG_MPEG_ER 0
-#define CONFIG_MPEGAUDIO 0
-#define CONFIG_MPEGAUDIODSP 0
-#define CONFIG_MPEGAUDIOHEADER 0
+#define CONFIG_MPEGAUDIO 1
+#define CONFIG_MPEGAUDIODSP 1
+#define CONFIG_MPEGAUDIOHEADER 1
 #define CONFIG_MPEGVIDEO 0
 #define CONFIG_MPEGVIDEOENC 0
 #define CONFIG_MSS34DSP 0
@@ -640,6 +650,7 @@
 #define CONFIG_QSV 0
 #define CONFIG_QSVDEC 0
 #define CONFIG_QSVENC 0
+#define CONFIG_QSVVPP 0
 #define CONFIG_RANGECODER 0
 #define CONFIG_RIFFDEC 1
 #define CONFIG_RIFFENC 0
@@ -663,25 +674,35 @@
 #define CONFIG_WMA_FREQS 0
 #define CONFIG_WMV2DSP 0
 #define CONFIG_AAC_ADTSTOASC_BSF 0
+#define CONFIG_AV1_METADATA_BSF 0
 #define CONFIG_CHOMP_BSF 0
 #define CONFIG_DUMP_EXTRADATA_BSF 0
 #define CONFIG_DCA_CORE_BSF 0
+#define CONFIG_EAC3_CORE_BSF 0
 #define CONFIG_EXTRACT_EXTRADATA_BSF 0
+#define CONFIG_FILTER_UNITS_BSF 0
+#define CONFIG_H264_METADATA_BSF 0
 #define CONFIG_H264_MP4TOANNEXB_BSF 0
+#define CONFIG_H264_REDUNDANT_PPS_BSF 0
+#define CONFIG_HAPQA_EXTRACT_BSF 0
+#define CONFIG_HEVC_METADATA_BSF 0
 #define CONFIG_HEVC_MP4TOANNEXB_BSF 0
 #define CONFIG_IMX_DUMP_HEADER_BSF 0
 #define CONFIG_MJPEG2JPEG_BSF 0
 #define CONFIG_MJPEGA_DUMP_HEADER_BSF 0
 #define CONFIG_MP3_HEADER_DECOMPRESS_BSF 0
+#define CONFIG_MPEG2_METADATA_BSF 0
 #define CONFIG_MPEG4_UNPACK_BFRAMES_BSF 0
 #define CONFIG_MOV2TEXTSUB_BSF 0
 #define CONFIG_NOISE_BSF 0
 #define CONFIG_NULL_BSF 1
 #define CONFIG_REMOVE_EXTRADATA_BSF 0
 #define CONFIG_TEXT2MOVSUB_BSF 0
+#define CONFIG_TRACE_HEADERS_BSF 0
+#define CONFIG_VP9_METADATA_BSF 0
 #define CONFIG_VP9_RAW_REORDER_BSF 0
 #define CONFIG_VP9_SUPERFRAME_BSF 0
-#define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 0
+#define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 1
 #define CONFIG_AASC_DECODER 0
 #define CONFIG_AIC_DECODER 0
 #define CONFIG_ALIAS_PIX_DECODER 0
@@ -701,6 +722,7 @@
 #define CONFIG_BETHSOFTVID_DECODER 0
 #define CONFIG_BFI_DECODER 0
 #define CONFIG_BINK_DECODER 0
+#define CONFIG_BITPACKED_DECODER 0
 #define CONFIG_BMP_DECODER 0
 #define CONFIG_BMV_VIDEO_DECODER 0
 #define CONFIG_BRENDER_PIX_DECODER 0
@@ -766,8 +788,6 @@
 #define CONFIG_H264_MMAL_DECODER 0
 #define CONFIG_H264_QSV_DECODER 0
 #define CONFIG_H264_RKMPP_DECODER 0
-#define CONFIG_H264_VDA_DECODER 0
-#define CONFIG_H264_VDPAU_DECODER 0
 #define CONFIG_HAP_DECODER 0
 #define CONFIG_HEVC_DECODER 0
 #define CONFIG_HEVC_QSV_DECODER 0
@@ -779,6 +799,7 @@
 #define CONFIG_HUFFYUV_DECODER 0
 #define CONFIG_IDCIN_DECODER 0
 #define CONFIG_IFF_ILBM_DECODER 0
+#define CONFIG_IMM4_DECODER 0
 #define CONFIG_INDEO2_DECODER 0
 #define CONFIG_INDEO3_DECODER 0
 #define CONFIG_INDEO4_DECODER 0
@@ -799,17 +820,13 @@
 #define CONFIG_MJPEGB_DECODER 0
 #define CONFIG_MMVIDEO_DECODER 0
 #define CONFIG_MOTIONPIXELS_DECODER 0
-#define CONFIG_MPEG_XVMC_DECODER 0
 #define CONFIG_MPEG1VIDEO_DECODER 0
 #define CONFIG_MPEG2VIDEO_DECODER 0
 #define CONFIG_MPEG4_DECODER 0
 #define CONFIG_MPEG4_CRYSTALHD_DECODER 0
 #define CONFIG_MPEG4_V4L2M2M_DECODER 0
 #define CONFIG_MPEG4_MMAL_DECODER 0
-#define CONFIG_MPEG4_VDPAU_DECODER 0
 #define CONFIG_MPEGVIDEO_DECODER 0
-#define CONFIG_MPEG_VDPAU_DECODER 0
-#define CONFIG_MPEG1_VDPAU_DECODER 0
 #define CONFIG_MPEG1_V4L2M2M_DECODER 0
 #define CONFIG_MPEG2_MMAL_DECODER 0
 #define CONFIG_MPEG2_CRYSTALHD_DECODER 0
@@ -830,6 +847,7 @@
 #define CONFIG_MTS2_DECODER 0
 #define CONFIG_MVC1_DECODER 0
 #define CONFIG_MVC2_DECODER 0
+#define CONFIG_MWSC_DECODER 0
 #define CONFIG_MXPEG_DECODER 0
 #define CONFIG_NUV_DECODER 0
 #define CONFIG_PAF_VIDEO_DECODER 0
@@ -843,7 +861,7 @@
 #define CONFIG_PNG_DECODER 0
 #define CONFIG_PPM_DECODER 0
 #define CONFIG_PRORES_DECODER 0
-#define CONFIG_PRORES_LGPL_DECODER 0
+#define CONFIG_PROSUMER_DECODER 0
 #define CONFIG_PSD_DECODER 0
 #define CONFIG_PTX_DECODER 0
 #define CONFIG_QDRAW_DECODER 0
@@ -851,6 +869,7 @@
 #define CONFIG_QTRLE_DECODER 0
 #define CONFIG_R10K_DECODER 0
 #define CONFIG_R210_DECODER 0
+#define CONFIG_RASC_DECODER 0
 #define CONFIG_RAWVIDEO_DECODER 0
 #define CONFIG_RL2_DECODER 0
 #define CONFIG_ROQ_DECODER 0
@@ -903,7 +922,6 @@
 #define CONFIG_VBLE_DECODER 0
 #define CONFIG_VC1_DECODER 0
 #define CONFIG_VC1_CRYSTALHD_DECODER 0
-#define CONFIG_VC1_VDPAU_DECODER 0
 #define CONFIG_VC1IMAGE_DECODER 0
 #define CONFIG_VC1_MMAL_DECODER 0
 #define CONFIG_VC1_QSV_DECODER 0
@@ -924,14 +942,13 @@
 #define CONFIG_VP9_RKMPP_DECODER 0
 #define CONFIG_VP9_V4L2M2M_DECODER 0
 #define CONFIG_VQA_DECODER 0
-#define CONFIG_BITPACKED_DECODER 0
 #define CONFIG_WEBP_DECODER 0
+#define CONFIG_WCMV_DECODER 0
 #define CONFIG_WRAPPED_AVFRAME_DECODER 0
 #define CONFIG_WMV1_DECODER 0
 #define CONFIG_WMV2_DECODER 0
 #define CONFIG_WMV3_DECODER 0
 #define CONFIG_WMV3_CRYSTALHD_DECODER 0
-#define CONFIG_WMV3_VDPAU_DECODER 0
 #define CONFIG_WMV3IMAGE_DECODER 0
 #define CONFIG_WNV1_DECODER 0
 #define CONFIG_XAN_WC3_DECODER 0
@@ -959,11 +976,14 @@
 #define CONFIG_AMRNB_DECODER 0
 #define CONFIG_AMRWB_DECODER 0
 #define CONFIG_APE_DECODER 0
+#define CONFIG_APTX_DECODER 1
+#define CONFIG_APTX_HD_DECODER 0
 #define CONFIG_ATRAC1_DECODER 0
 #define CONFIG_ATRAC3_DECODER 0
 #define CONFIG_ATRAC3AL_DECODER 0
 #define CONFIG_ATRAC3P_DECODER 0
 #define CONFIG_ATRAC3PAL_DECODER 0
+#define CONFIG_ATRAC9_DECODER 0
 #define CONFIG_BINKAUDIO_DCT_DECODER 0
 #define CONFIG_BINKAUDIO_RDFT_DECODER 0
 #define CONFIG_BMV_AUDIO_DECODER 0
@@ -986,6 +1006,7 @@
 #define CONFIG_GSM_DECODER 0
 #define CONFIG_GSM_MS_DECODER 0
 #define CONFIG_IAC_DECODER 0
+#define CONFIG_ILBC_DECODER 0
 #define CONFIG_IMC_DECODER 0
 #define CONFIG_INTERPLAY_ACM_DECODER 0
 #define CONFIG_MACE3_DECODER 0
@@ -996,12 +1017,12 @@
 #define CONFIG_MP1FLOAT_DECODER 0
 #define CONFIG_MP2_DECODER 0
 #define CONFIG_MP2FLOAT_DECODER 0
-#define CONFIG_MP3_DECODER 0
 #define CONFIG_MP3FLOAT_DECODER 0
-#define CONFIG_MP3ADU_DECODER 0
+#define CONFIG_MP3_DECODER 1
 #define CONFIG_MP3ADUFLOAT_DECODER 0
-#define CONFIG_MP3ON4_DECODER 0
+#define CONFIG_MP3ADU_DECODER 0
 #define CONFIG_MP3ON4FLOAT_DECODER 0
+#define CONFIG_MP3ON4_DECODER 0
 #define CONFIG_MPC7_DECODER 0
 #define CONFIG_MPC8_DECODER 0
 #define CONFIG_NELLYMOSER_DECODER 0
@@ -1014,6 +1035,7 @@
 #define CONFIG_RA_144_DECODER 0
 #define CONFIG_RA_288_DECODER 0
 #define CONFIG_RALF_DECODER 0
+#define CONFIG_SBC_DECODER 1
 #define CONFIG_SHORTEN_DECODER 0
 #define CONFIG_SIPR_DECODER 0
 #define CONFIG_SMACKAUD_DECODER 0
@@ -1151,7 +1173,10 @@
 #define CONFIG_PCM_MULAW_AT_DECODER 0
 #define CONFIG_QDMC_AT_DECODER 0
 #define CONFIG_QDM2_AT_DECODER 0
+#define CONFIG_LIBAOM_AV1_DECODER 0
 #define CONFIG_LIBCELT_DECODER 0
+#define CONFIG_LIBCODEC2_DECODER 0
+#define CONFIG_LIBDAVS2_DECODER 0
 #define CONFIG_LIBFDK_AAC_DECODER 0
 #define CONFIG_LIBGSM_DECODER 0
 #define CONFIG_LIBGSM_MS_DECODER 0
@@ -1184,288 +1209,6 @@
 #define CONFIG_VP8_QSV_DECODER 0
 #define CONFIG_VP9_CUVID_DECODER 0
 #define CONFIG_VP9_MEDIACODEC_DECODER 0
-#define CONFIG_AA_DEMUXER 0
-#define CONFIG_AAC_DEMUXER 0
-#define CONFIG_AC3_DEMUXER 0
-#define CONFIG_ACM_DEMUXER 0
-#define CONFIG_ACT_DEMUXER 0
-#define CONFIG_ADF_DEMUXER 0
-#define CONFIG_ADP_DEMUXER 0
-#define CONFIG_ADS_DEMUXER 0
-#define CONFIG_ADX_DEMUXER 0
-#define CONFIG_AEA_DEMUXER 0
-#define CONFIG_AFC_DEMUXER 0
-#define CONFIG_AIFF_DEMUXER 0
-#define CONFIG_AIX_DEMUXER 0
-#define CONFIG_AMR_DEMUXER 0
-#define CONFIG_ANM_DEMUXER 0
-#define CONFIG_APC_DEMUXER 0
-#define CONFIG_APE_DEMUXER 0
-#define CONFIG_APNG_DEMUXER 0
-#define CONFIG_AQTITLE_DEMUXER 0
-#define CONFIG_ASF_DEMUXER 0
-#define CONFIG_ASF_O_DEMUXER 0
-#define CONFIG_ASS_DEMUXER 0
-#define CONFIG_AST_DEMUXER 0
-#define CONFIG_AU_DEMUXER 0
-#define CONFIG_AVI_DEMUXER 0
-#define CONFIG_AVISYNTH_DEMUXER 0
-#define CONFIG_AVR_DEMUXER 0
-#define CONFIG_AVS_DEMUXER 0
-#define CONFIG_BETHSOFTVID_DEMUXER 0
-#define CONFIG_BFI_DEMUXER 0
-#define CONFIG_BINTEXT_DEMUXER 0
-#define CONFIG_BINK_DEMUXER 0
-#define CONFIG_BIT_DEMUXER 0
-#define CONFIG_BMV_DEMUXER 0
-#define CONFIG_BFSTM_DEMUXER 0
-#define CONFIG_BRSTM_DEMUXER 0
-#define CONFIG_BOA_DEMUXER 0
-#define CONFIG_C93_DEMUXER 0
-#define CONFIG_CAF_DEMUXER 0
-#define CONFIG_CAVSVIDEO_DEMUXER 0
-#define CONFIG_CDG_DEMUXER 0
-#define CONFIG_CDXL_DEMUXER 0
-#define CONFIG_CINE_DEMUXER 0
-#define CONFIG_CONCAT_DEMUXER 0
-#define CONFIG_DASH_DEMUXER 0
-#define CONFIG_DATA_DEMUXER 0
-#define CONFIG_DAUD_DEMUXER 0
-#define CONFIG_DCSTR_DEMUXER 0
-#define CONFIG_DFA_DEMUXER 0
-#define CONFIG_DIRAC_DEMUXER 0
-#define CONFIG_DNXHD_DEMUXER 0
-#define CONFIG_DSF_DEMUXER 0
-#define CONFIG_DSICIN_DEMUXER 0
-#define CONFIG_DSS_DEMUXER 0
-#define CONFIG_DTS_DEMUXER 0
-#define CONFIG_DTSHD_DEMUXER 0
-#define CONFIG_DV_DEMUXER 0
-#define CONFIG_DVBSUB_DEMUXER 0
-#define CONFIG_DVBTXT_DEMUXER 0
-#define CONFIG_DXA_DEMUXER 0
-#define CONFIG_EA_DEMUXER 0
-#define CONFIG_EA_CDATA_DEMUXER 0
-#define CONFIG_EAC3_DEMUXER 0
-#define CONFIG_EPAF_DEMUXER 0
-#define CONFIG_FFM_DEMUXER 0
-#define CONFIG_FFMETADATA_DEMUXER 0
-#define CONFIG_FILMSTRIP_DEMUXER 0
-#define CONFIG_FITS_DEMUXER 0
-#define CONFIG_FLAC_DEMUXER 1
-#define CONFIG_FLIC_DEMUXER 0
-#define CONFIG_FLV_DEMUXER 0
-#define CONFIG_LIVE_FLV_DEMUXER 0
-#define CONFIG_FOURXM_DEMUXER 0
-#define CONFIG_FRM_DEMUXER 0
-#define CONFIG_FSB_DEMUXER 0
-#define CONFIG_G722_DEMUXER 0
-#define CONFIG_G723_1_DEMUXER 0
-#define CONFIG_G726_DEMUXER 0
-#define CONFIG_G726LE_DEMUXER 0
-#define CONFIG_G729_DEMUXER 0
-#define CONFIG_GDV_DEMUXER 0
-#define CONFIG_GENH_DEMUXER 0
-#define CONFIG_GIF_DEMUXER 0
-#define CONFIG_GSM_DEMUXER 0
-#define CONFIG_GXF_DEMUXER 0
-#define CONFIG_H261_DEMUXER 0
-#define CONFIG_H263_DEMUXER 0
-#define CONFIG_H264_DEMUXER 0
-#define CONFIG_HEVC_DEMUXER 0
-#define CONFIG_HLS_DEMUXER 0
-#define CONFIG_HNM_DEMUXER 0
-#define CONFIG_ICO_DEMUXER 0
-#define CONFIG_IDCIN_DEMUXER 0
-#define CONFIG_IDF_DEMUXER 0
-#define CONFIG_IFF_DEMUXER 0
-#define CONFIG_ILBC_DEMUXER 0
-#define CONFIG_IMAGE2_DEMUXER 0
-#define CONFIG_IMAGE2PIPE_DEMUXER 0
-#define CONFIG_IMAGE2_ALIAS_PIX_DEMUXER 0
-#define CONFIG_IMAGE2_BRENDER_PIX_DEMUXER 0
-#define CONFIG_INGENIENT_DEMUXER 0
-#define CONFIG_IPMOVIE_DEMUXER 0
-#define CONFIG_IRCAM_DEMUXER 0
-#define CONFIG_ISS_DEMUXER 0
-#define CONFIG_IV8_DEMUXER 0
-#define CONFIG_IVF_DEMUXER 0
-#define CONFIG_IVR_DEMUXER 0
-#define CONFIG_JACOSUB_DEMUXER 0
-#define CONFIG_JV_DEMUXER 0
-#define CONFIG_LMLM4_DEMUXER 0
-#define CONFIG_LOAS_DEMUXER 0
-#define CONFIG_LRC_DEMUXER 0
-#define CONFIG_LVF_DEMUXER 0
-#define CONFIG_LXF_DEMUXER 0
-#define CONFIG_M4V_DEMUXER 0
-#define CONFIG_MATROSKA_DEMUXER 1
-#define CONFIG_MGSTS_DEMUXER 0
-#define CONFIG_MICRODVD_DEMUXER 0
-#define CONFIG_MJPEG_DEMUXER 0
-#define CONFIG_MJPEG_2000_DEMUXER 0
-#define CONFIG_MLP_DEMUXER 0
-#define CONFIG_MLV_DEMUXER 0
-#define CONFIG_MM_DEMUXER 0
-#define CONFIG_MMF_DEMUXER 0
-#define CONFIG_MOV_DEMUXER 0
-#define CONFIG_MP3_DEMUXER 0
-#define CONFIG_MPC_DEMUXER 0
-#define CONFIG_MPC8_DEMUXER 0
-#define CONFIG_MPEGPS_DEMUXER 0
-#define CONFIG_MPEGTS_DEMUXER 0
-#define CONFIG_MPEGTSRAW_DEMUXER 0
-#define CONFIG_MPEGVIDEO_DEMUXER 0
-#define CONFIG_MPJPEG_DEMUXER 0
-#define CONFIG_MPL2_DEMUXER 0
-#define CONFIG_MPSUB_DEMUXER 0
-#define CONFIG_MSF_DEMUXER 0
-#define CONFIG_MSNWC_TCP_DEMUXER 0
-#define CONFIG_MTAF_DEMUXER 0
-#define CONFIG_MTV_DEMUXER 0
-#define CONFIG_MUSX_DEMUXER 0
-#define CONFIG_MV_DEMUXER 0
-#define CONFIG_MVI_DEMUXER 0
-#define CONFIG_MXF_DEMUXER 0
-#define CONFIG_MXG_DEMUXER 0
-#define CONFIG_NC_DEMUXER 0
-#define CONFIG_NISTSPHERE_DEMUXER 0
-#define CONFIG_NSV_DEMUXER 0
-#define CONFIG_NUT_DEMUXER 0
-#define CONFIG_NUV_DEMUXER 0
-#define CONFIG_OGG_DEMUXER 1
-#define CONFIG_OMA_DEMUXER 0
-#define CONFIG_PAF_DEMUXER 0
-#define CONFIG_PCM_ALAW_DEMUXER 0
-#define CONFIG_PCM_MULAW_DEMUXER 0
-#define CONFIG_PCM_F64BE_DEMUXER 0
-#define CONFIG_PCM_F64LE_DEMUXER 0
-#define CONFIG_PCM_F32BE_DEMUXER 0
-#define CONFIG_PCM_F32LE_DEMUXER 0
-#define CONFIG_PCM_S32BE_DEMUXER 0
-#define CONFIG_PCM_S32LE_DEMUXER 0
-#define CONFIG_PCM_S24BE_DEMUXER 0
-#define CONFIG_PCM_S24LE_DEMUXER 0
-#define CONFIG_PCM_S16BE_DEMUXER 0
-#define CONFIG_PCM_S16LE_DEMUXER 0
-#define CONFIG_PCM_S8_DEMUXER 0
-#define CONFIG_PCM_U32BE_DEMUXER 0
-#define CONFIG_PCM_U32LE_DEMUXER 0
-#define CONFIG_PCM_U24BE_DEMUXER 0
-#define CONFIG_PCM_U24LE_DEMUXER 0
-#define CONFIG_PCM_U16BE_DEMUXER 0
-#define CONFIG_PCM_U16LE_DEMUXER 0
-#define CONFIG_PCM_U8_DEMUXER 0
-#define CONFIG_PJS_DEMUXER 0
-#define CONFIG_PMP_DEMUXER 0
-#define CONFIG_PVA_DEMUXER 0
-#define CONFIG_PVF_DEMUXER 0
-#define CONFIG_QCP_DEMUXER 0
-#define CONFIG_R3D_DEMUXER 0
-#define CONFIG_RAWVIDEO_DEMUXER 0
-#define CONFIG_REALTEXT_DEMUXER 0
-#define CONFIG_REDSPARK_DEMUXER 0
-#define CONFIG_RL2_DEMUXER 0
-#define CONFIG_RM_DEMUXER 0
-#define CONFIG_ROQ_DEMUXER 0
-#define CONFIG_RPL_DEMUXER 0
-#define CONFIG_RSD_DEMUXER 0
-#define CONFIG_RSO_DEMUXER 0
-#define CONFIG_RTP_DEMUXER 0
-#define CONFIG_RTSP_DEMUXER 0
-#define CONFIG_S337M_DEMUXER 0
-#define CONFIG_SAMI_DEMUXER 0
-#define CONFIG_SAP_DEMUXER 0
-#define CONFIG_SBG_DEMUXER 0
-#define CONFIG_SCC_DEMUXER 0
-#define CONFIG_SDP_DEMUXER 0
-#define CONFIG_SDR2_DEMUXER 0
-#define CONFIG_SDS_DEMUXER 0
-#define CONFIG_SDX_DEMUXER 0
-#define CONFIG_SEGAFILM_DEMUXER 0
-#define CONFIG_SHORTEN_DEMUXER 0
-#define CONFIG_SIFF_DEMUXER 0
-#define CONFIG_SLN_DEMUXER 0
-#define CONFIG_SMACKER_DEMUXER 0
-#define CONFIG_SMJPEG_DEMUXER 0
-#define CONFIG_SMUSH_DEMUXER 0
-#define CONFIG_SOL_DEMUXER 0
-#define CONFIG_SOX_DEMUXER 0
-#define CONFIG_SPDIF_DEMUXER 0
-#define CONFIG_SRT_DEMUXER 0
-#define CONFIG_STR_DEMUXER 0
-#define CONFIG_STL_DEMUXER 0
-#define CONFIG_SUBVIEWER1_DEMUXER 0
-#define CONFIG_SUBVIEWER_DEMUXER 0
-#define CONFIG_SUP_DEMUXER 0
-#define CONFIG_SVAG_DEMUXER 0
-#define CONFIG_SWF_DEMUXER 0
-#define CONFIG_TAK_DEMUXER 0
-#define CONFIG_TEDCAPTIONS_DEMUXER 0
-#define CONFIG_THP_DEMUXER 0
-#define CONFIG_THREEDOSTR_DEMUXER 0
-#define CONFIG_TIERTEXSEQ_DEMUXER 0
-#define CONFIG_TMV_DEMUXER 0
-#define CONFIG_TRUEHD_DEMUXER 0
-#define CONFIG_TTA_DEMUXER 0
-#define CONFIG_TXD_DEMUXER 0
-#define CONFIG_TTY_DEMUXER 0
-#define CONFIG_V210_DEMUXER 0
-#define CONFIG_V210X_DEMUXER 0
-#define CONFIG_VAG_DEMUXER 0
-#define CONFIG_VC1_DEMUXER 0
-#define CONFIG_VC1T_DEMUXER 0
-#define CONFIG_VIVO_DEMUXER 0
-#define CONFIG_VMD_DEMUXER 0
-#define CONFIG_VOBSUB_DEMUXER 0
-#define CONFIG_VOC_DEMUXER 0
-#define CONFIG_VPK_DEMUXER 0
-#define CONFIG_VPLAYER_DEMUXER 0
-#define CONFIG_VQF_DEMUXER 0
-#define CONFIG_W64_DEMUXER 0
-#define CONFIG_WAV_DEMUXER 1
-#define CONFIG_WC3_DEMUXER 0
-#define CONFIG_WEBM_DASH_MANIFEST_DEMUXER 0
-#define CONFIG_WEBVTT_DEMUXER 0
-#define CONFIG_WSAUD_DEMUXER 0
-#define CONFIG_WSD_DEMUXER 0
-#define CONFIG_WSVQA_DEMUXER 0
-#define CONFIG_WTV_DEMUXER 0
-#define CONFIG_WVE_DEMUXER 0
-#define CONFIG_WV_DEMUXER 0
-#define CONFIG_XA_DEMUXER 0
-#define CONFIG_XBIN_DEMUXER 0
-#define CONFIG_XMV_DEMUXER 0
-#define CONFIG_XVAG_DEMUXER 0
-#define CONFIG_XWMA_DEMUXER 0
-#define CONFIG_YOP_DEMUXER 0
-#define CONFIG_YUV4MPEGPIPE_DEMUXER 0
-#define CONFIG_IMAGE_BMP_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_DDS_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_DPX_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_EXR_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_J2K_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_JPEG_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_JPEGLS_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PAM_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PBM_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PCX_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PGMYUV_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PGM_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PICTOR_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PNG_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PPM_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_PSD_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_QDRAW_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_SGI_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_SVG_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_SUNRAST_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_TIFF_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_WEBP_PIPE_DEMUXER 0
-#define CONFIG_IMAGE_XPM_PIPE_DEMUXER 0
-#define CONFIG_LIBGME_DEMUXER 0
-#define CONFIG_LIBMODPLUG_DEMUXER 0
-#define CONFIG_LIBOPENMPT_DEMUXER 0
 #define CONFIG_A64MULTI_ENCODER 0
 #define CONFIG_A64MULTI5_ENCODER 0
 #define CONFIG_ALIAS_PIX_ENCODER 0
@@ -1498,6 +1241,7 @@
 #define CONFIG_JPEG2000_ENCODER 0
 #define CONFIG_JPEGLS_ENCODER 0
 #define CONFIG_LJPEG_ENCODER 0
+#define CONFIG_MAGICYUV_ENCODER 0
 #define CONFIG_MJPEG_ENCODER 0
 #define CONFIG_MPEG1VIDEO_ENCODER 0
 #define CONFIG_MPEG2VIDEO_ENCODER 0
@@ -1549,6 +1293,8 @@
 #define CONFIG_AC3_ENCODER 0
 #define CONFIG_AC3_FIXED_ENCODER 0
 #define CONFIG_ALAC_ENCODER 0
+#define CONFIG_APTX_ENCODER 0
+#define CONFIG_APTX_HD_ENCODER 0
 #define CONFIG_DCA_ENCODER 0
 #define CONFIG_EAC3_ENCODER 0
 #define CONFIG_FLAC_ENCODER 0
@@ -1559,6 +1305,7 @@
 #define CONFIG_NELLYMOSER_ENCODER 0
 #define CONFIG_OPUS_ENCODER 0
 #define CONFIG_RA_144_ENCODER 0
+#define CONFIG_SBC_ENCODER 0
 #define CONFIG_SONIC_ENCODER 0
 #define CONFIG_SONIC_LS_ENCODER 0
 #define CONFIG_TRUEHD_ENCODER 0
@@ -1620,6 +1367,8 @@
 #define CONFIG_ILBC_AT_ENCODER 0
 #define CONFIG_PCM_ALAW_AT_ENCODER 0
 #define CONFIG_PCM_MULAW_AT_ENCODER 0
+#define CONFIG_LIBAOM_AV1_ENCODER 0
+#define CONFIG_LIBCODEC2_ENCODER 0
 #define CONFIG_LIBFDK_AAC_ENCODER 0
 #define CONFIG_LIBGSM_ENCODER 0
 #define CONFIG_LIBGSM_MS_ENCODER 0
@@ -1644,9 +1393,11 @@
 #define CONFIG_LIBX264RGB_ENCODER 0
 #define CONFIG_LIBX265_ENCODER 0
 #define CONFIG_LIBXAVS_ENCODER 0
+#define CONFIG_LIBXAVS2_ENCODER 0
 #define CONFIG_LIBXVID_ENCODER 0
 #define CONFIG_H263_V4L2M2M_ENCODER 0
 #define CONFIG_LIBOPENH264_ENCODER 0
+#define CONFIG_H264_AMF_ENCODER 0
 #define CONFIG_H264_NVENC_ENCODER 0
 #define CONFIG_H264_OMX_ENCODER 0
 #define CONFIG_H264_QSV_ENCODER 0
@@ -1656,11 +1407,14 @@
 #define CONFIG_NVENC_ENCODER 0
 #define CONFIG_NVENC_H264_ENCODER 0
 #define CONFIG_NVENC_HEVC_ENCODER 0
+#define CONFIG_HEVC_AMF_ENCODER 0
 #define CONFIG_HEVC_NVENC_ENCODER 0
 #define CONFIG_HEVC_QSV_ENCODER 0
 #define CONFIG_HEVC_V4L2M2M_ENCODER 0
 #define CONFIG_HEVC_VAAPI_ENCODER 0
+#define CONFIG_HEVC_VIDEOTOOLBOX_ENCODER 0
 #define CONFIG_LIBKVAZAAR_ENCODER 0
+#define CONFIG_MJPEG_QSV_ENCODER 0
 #define CONFIG_MJPEG_VAAPI_ENCODER 0
 #define CONFIG_MPEG2_QSV_ENCODER 0
 #define CONFIG_MPEG2_VAAPI_ENCODER 0
@@ -1668,20 +1422,159 @@
 #define CONFIG_VP8_V4L2M2M_ENCODER 0
 #define CONFIG_VP8_VAAPI_ENCODER 0
 #define CONFIG_VP9_VAAPI_ENCODER 0
+#define CONFIG_H263_VAAPI_HWACCEL 0
+#define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_H264_D3D11VA_HWACCEL 0
+#define CONFIG_H264_D3D11VA2_HWACCEL 0
+#define CONFIG_H264_DXVA2_HWACCEL 0
+#define CONFIG_H264_NVDEC_HWACCEL 0
+#define CONFIG_H264_VAAPI_HWACCEL 0
+#define CONFIG_H264_VDPAU_HWACCEL 0
+#define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_HEVC_D3D11VA_HWACCEL 0
+#define CONFIG_HEVC_D3D11VA2_HWACCEL 0
+#define CONFIG_HEVC_DXVA2_HWACCEL 0
+#define CONFIG_HEVC_NVDEC_HWACCEL 0
+#define CONFIG_HEVC_VAAPI_HWACCEL 0
+#define CONFIG_HEVC_VDPAU_HWACCEL 0
+#define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_MJPEG_NVDEC_HWACCEL 0
+#define CONFIG_MJPEG_VAAPI_HWACCEL 0
+#define CONFIG_MPEG1_NVDEC_HWACCEL 0
+#define CONFIG_MPEG1_VDPAU_HWACCEL 0
+#define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_MPEG1_XVMC_HWACCEL 0
+#define CONFIG_MPEG2_D3D11VA_HWACCEL 0
+#define CONFIG_MPEG2_D3D11VA2_HWACCEL 0
+#define CONFIG_MPEG2_NVDEC_HWACCEL 0
+#define CONFIG_MPEG2_DXVA2_HWACCEL 0
+#define CONFIG_MPEG2_VAAPI_HWACCEL 0
+#define CONFIG_MPEG2_VDPAU_HWACCEL 0
+#define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_MPEG2_XVMC_HWACCEL 0
+#define CONFIG_MPEG4_NVDEC_HWACCEL 0
+#define CONFIG_MPEG4_VAAPI_HWACCEL 0
+#define CONFIG_MPEG4_VDPAU_HWACCEL 0
+#define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_VC1_D3D11VA_HWACCEL 0
+#define CONFIG_VC1_D3D11VA2_HWACCEL 0
+#define CONFIG_VC1_DXVA2_HWACCEL 0
+#define CONFIG_VC1_NVDEC_HWACCEL 0
+#define CONFIG_VC1_VAAPI_HWACCEL 0
+#define CONFIG_VC1_VDPAU_HWACCEL 0
+#define CONFIG_VP8_NVDEC_HWACCEL 0
+#define CONFIG_VP8_VAAPI_HWACCEL 0
+#define CONFIG_VP9_D3D11VA_HWACCEL 0
+#define CONFIG_VP9_D3D11VA2_HWACCEL 0
+#define CONFIG_VP9_DXVA2_HWACCEL 0
+#define CONFIG_VP9_NVDEC_HWACCEL 0
+#define CONFIG_VP9_VAAPI_HWACCEL 0
+#define CONFIG_WMV3_D3D11VA_HWACCEL 0
+#define CONFIG_WMV3_D3D11VA2_HWACCEL 0
+#define CONFIG_WMV3_DXVA2_HWACCEL 0
+#define CONFIG_WMV3_NVDEC_HWACCEL 0
+#define CONFIG_WMV3_VAAPI_HWACCEL 0
+#define CONFIG_WMV3_VDPAU_HWACCEL 0
+#define CONFIG_AAC_PARSER 0
+#define CONFIG_AAC_LATM_PARSER 0
+#define CONFIG_AC3_PARSER 0
+#define CONFIG_ADX_PARSER 0
+#define CONFIG_AV1_PARSER 0
+#define CONFIG_AVS2_PARSER 0
+#define CONFIG_BMP_PARSER 0
+#define CONFIG_CAVSVIDEO_PARSER 0
+#define CONFIG_COOK_PARSER 0
+#define CONFIG_DCA_PARSER 0
+#define CONFIG_DIRAC_PARSER 0
+#define CONFIG_DNXHD_PARSER 0
+#define CONFIG_DPX_PARSER 0
+#define CONFIG_DVAUDIO_PARSER 0
+#define CONFIG_DVBSUB_PARSER 0
+#define CONFIG_DVDSUB_PARSER 0
+#define CONFIG_DVD_NAV_PARSER 0
+#define CONFIG_FLAC_PARSER 1
+#define CONFIG_G729_PARSER 0
+#define CONFIG_GSM_PARSER 0
+#define CONFIG_H261_PARSER 0
+#define CONFIG_H263_PARSER 0
+#define CONFIG_H264_PARSER 0
+#define CONFIG_HEVC_PARSER 0
+#define CONFIG_MJPEG_PARSER 0
+#define CONFIG_MLP_PARSER 0
+#define CONFIG_MPEG4VIDEO_PARSER 0
+#define CONFIG_MPEGAUDIO_PARSER 1
+#define CONFIG_MPEGVIDEO_PARSER 0
+#define CONFIG_OPUS_PARSER 1
+#define CONFIG_PNG_PARSER 0
+#define CONFIG_PNM_PARSER 0
+#define CONFIG_RV30_PARSER 0
+#define CONFIG_RV40_PARSER 0
+#define CONFIG_SBC_PARSER 0
+#define CONFIG_SIPR_PARSER 0
+#define CONFIG_TAK_PARSER 0
+#define CONFIG_VC1_PARSER 0
+#define CONFIG_VORBIS_PARSER 1
+#define CONFIG_VP3_PARSER 1
+#define CONFIG_VP8_PARSER 1
+#define CONFIG_VP9_PARSER 1
+#define CONFIG_XMA_PARSER 0
+#define CONFIG_ALSA_INDEV 0
+#define CONFIG_ANDROID_CAMERA_INDEV 0
+#define CONFIG_AVFOUNDATION_INDEV 0
+#define CONFIG_BKTR_INDEV 0
+#define CONFIG_DECKLINK_INDEV 0
+#define CONFIG_LIBNDI_NEWTEK_INDEV 0
+#define CONFIG_DSHOW_INDEV 0
+#define CONFIG_FBDEV_INDEV 0
+#define CONFIG_GDIGRAB_INDEV 0
+#define CONFIG_IEC61883_INDEV 0
+#define CONFIG_JACK_INDEV 0
+#define CONFIG_KMSGRAB_INDEV 0
+#define CONFIG_LAVFI_INDEV 0
+#define CONFIG_OPENAL_INDEV 0
+#define CONFIG_OSS_INDEV 0
+#define CONFIG_PULSE_INDEV 0
+#define CONFIG_SNDIO_INDEV 0
+#define CONFIG_V4L2_INDEV 0
+#define CONFIG_VFWCAP_INDEV 0
+#define CONFIG_XCBGRAB_INDEV 0
+#define CONFIG_LIBCDIO_INDEV 0
+#define CONFIG_LIBDC1394_INDEV 0
+#define CONFIG_ALSA_OUTDEV 0
+#define CONFIG_CACA_OUTDEV 0
+#define CONFIG_DECKLINK_OUTDEV 0
+#define CONFIG_LIBNDI_NEWTEK_OUTDEV 0
+#define CONFIG_FBDEV_OUTDEV 0
+#define CONFIG_OPENGL_OUTDEV 0
+#define CONFIG_OSS_OUTDEV 0
+#define CONFIG_PULSE_OUTDEV 0
+#define CONFIG_SDL2_OUTDEV 0
+#define CONFIG_SNDIO_OUTDEV 0
+#define CONFIG_V4L2_OUTDEV 0
+#define CONFIG_XV_OUTDEV 0
 #define CONFIG_ABENCH_FILTER 0
 #define CONFIG_ACOMPRESSOR_FILTER 0
+#define CONFIG_ACONTRAST_FILTER 0
 #define CONFIG_ACOPY_FILTER 0
+#define CONFIG_ACUE_FILTER 0
 #define CONFIG_ACROSSFADE_FILTER 0
+#define CONFIG_ACROSSOVER_FILTER 0
 #define CONFIG_ACRUSHER_FILTER 0
+#define CONFIG_ADECLICK_FILTER 0
+#define CONFIG_ADECLIP_FILTER 0
 #define CONFIG_ADELAY_FILTER 0
+#define CONFIG_ADERIVATIVE_FILTER 0
 #define CONFIG_AECHO_FILTER 0
 #define CONFIG_AEMPHASIS_FILTER 0
 #define CONFIG_AEVAL_FILTER 0
 #define CONFIG_AFADE_FILTER 0
+#define CONFIG_AFFTDN_FILTER 0
 #define CONFIG_AFFTFILT_FILTER 0
 #define CONFIG_AFIR_FILTER 0
 #define CONFIG_AFORMAT_FILTER 0
 #define CONFIG_AGATE_FILTER 0
+#define CONFIG_AIIR_FILTER 0
+#define CONFIG_AINTEGRAL_FILTER 0
 #define CONFIG_AINTERLEAVE_FILTER 0
 #define CONFIG_ALIMITER_FILTER 0
 #define CONFIG_ALLPASS_FILTER 0
@@ -1689,6 +1582,7 @@
 #define CONFIG_AMERGE_FILTER 0
 #define CONFIG_AMETADATA_FILTER 0
 #define CONFIG_AMIX_FILTER 0
+#define CONFIG_AMULTIPLY_FILTER 0
 #define CONFIG_ANEQUALIZER_FILTER 0
 #define CONFIG_ANULL_FILTER 0
 #define CONFIG_APAD_FILTER 0
@@ -1725,6 +1619,7 @@
 #define CONFIG_CROSSFEED_FILTER 0
 #define CONFIG_CRYSTALIZER_FILTER 0
 #define CONFIG_DCSHIFT_FILTER 0
+#define CONFIG_DRMETER_FILTER 0
 #define CONFIG_DYNAUDNORM_FILTER 0
 #define CONFIG_EARWAX_FILTER 0
 #define CONFIG_EBUR128_FILTER 0
@@ -1736,10 +1631,14 @@
 #define CONFIG_HDCD_FILTER 0
 #define CONFIG_HEADPHONE_FILTER 0
 #define CONFIG_HIGHPASS_FILTER 0
+#define CONFIG_HIGHSHELF_FILTER 0
 #define CONFIG_JOIN_FILTER 0
 #define CONFIG_LADSPA_FILTER 0
 #define CONFIG_LOUDNORM_FILTER 0
 #define CONFIG_LOWPASS_FILTER 0
+#define CONFIG_LOWSHELF_FILTER 0
+#define CONFIG_LV2_FILTER 0
+#define CONFIG_MCOMPAND_FILTER 0
 #define CONFIG_PAN_FILTER 0
 #define CONFIG_REPLAYGAIN_FILTER 0
 #define CONFIG_RESAMPLE_FILTER 0
@@ -1762,20 +1661,25 @@
 #define CONFIG_ANOISESRC_FILTER 0
 #define CONFIG_ANULLSRC_FILTER 0
 #define CONFIG_FLITE_FILTER 0
+#define CONFIG_HILBERT_FILTER 0
 #define CONFIG_SINE_FILTER 0
 #define CONFIG_ANULLSINK_FILTER 0
 #define CONFIG_ALPHAEXTRACT_FILTER 0
 #define CONFIG_ALPHAMERGE_FILTER 0
+#define CONFIG_AMPLIFY_FILTER 0
 #define CONFIG_ASS_FILTER 0
 #define CONFIG_ATADENOISE_FILTER 0
 #define CONFIG_AVGBLUR_FILTER 0
+#define CONFIG_AVGBLUR_OPENCL_FILTER 0
 #define CONFIG_BBOX_FILTER 0
 #define CONFIG_BENCH_FILTER 0
 #define CONFIG_BITPLANENOISE_FILTER 0
 #define CONFIG_BLACKDETECT_FILTER 0
 #define CONFIG_BLACKFRAME_FILTER 0
 #define CONFIG_BLEND_FILTER 0
+#define CONFIG_BM3D_FILTER 0
 #define CONFIG_BOXBLUR_FILTER 0
+#define CONFIG_BOXBLUR_OPENCL_FILTER 0
 #define CONFIG_BWDIF_FILTER 0
 #define CONFIG_CHROMAKEY_FILTER 0
 #define CONFIG_CIESCOPE_FILTER 0
@@ -1787,27 +1691,33 @@
 #define CONFIG_COLORMATRIX_FILTER 0
 #define CONFIG_COLORSPACE_FILTER 0
 #define CONFIG_CONVOLUTION_FILTER 0
+#define CONFIG_CONVOLUTION_OPENCL_FILTER 0
 #define CONFIG_CONVOLVE_FILTER 0
 #define CONFIG_COPY_FILTER 0
 #define CONFIG_COREIMAGE_FILTER 0
 #define CONFIG_COVER_RECT_FILTER 0
 #define CONFIG_CROP_FILTER 0
 #define CONFIG_CROPDETECT_FILTER 0
+#define CONFIG_CUE_FILTER 0
 #define CONFIG_CURVES_FILTER 0
 #define CONFIG_DATASCOPE_FILTER 0
 #define CONFIG_DCTDNOIZ_FILTER 0
 #define CONFIG_DEBAND_FILTER 0
+#define CONFIG_DEBLOCK_FILTER 0
 #define CONFIG_DECIMATE_FILTER 0
+#define CONFIG_DECONVOLVE_FILTER 0
 #define CONFIG_DEFLATE_FILTER 0
 #define CONFIG_DEFLICKER_FILTER 0
 #define CONFIG_DEINTERLACE_QSV_FILTER 0
 #define CONFIG_DEINTERLACE_VAAPI_FILTER 0
 #define CONFIG_DEJUDDER_FILTER 0
 #define CONFIG_DELOGO_FILTER 0
+#define CONFIG_DENOISE_VAAPI_FILTER 0
 #define CONFIG_DESHAKE_FILTER 0
 #define CONFIG_DESPILL_FILTER 0
 #define CONFIG_DETELECINE_FILTER 0
 #define CONFIG_DILATION_FILTER 0
+#define CONFIG_DILATION_OPENCL_FILTER 0
 #define CONFIG_DISPLACE_FILTER 0
 #define CONFIG_DOUBLEWEAVE_FILTER 0
 #define CONFIG_DRAWBOX_FILTER 0
@@ -1816,15 +1726,19 @@
 #define CONFIG_DRAWTEXT_FILTER 0
 #define CONFIG_EDGEDETECT_FILTER 0
 #define CONFIG_ELBG_FILTER 0
+#define CONFIG_ENTROPY_FILTER 0
 #define CONFIG_EQ_FILTER 0
 #define CONFIG_EROSION_FILTER 0
+#define CONFIG_EROSION_OPENCL_FILTER 0
 #define CONFIG_EXTRACTPLANES_FILTER 0
 #define CONFIG_FADE_FILTER 0
+#define CONFIG_FFTDNOIZ_FILTER 0
 #define CONFIG_FFTFILT_FILTER 0
 #define CONFIG_FIELD_FILTER 0
 #define CONFIG_FIELDHINT_FILTER 0
 #define CONFIG_FIELDMATCH_FILTER 0
 #define CONFIG_FIELDORDER_FILTER 0
+#define CONFIG_FILLBORDERS_FILTER 0
 #define CONFIG_FIND_RECT_FILTER 0
 #define CONFIG_FLOODFILL_FILTER 0
 #define CONFIG_FORMAT_FILTER 0
@@ -1837,6 +1751,7 @@
 #define CONFIG_GBLUR_FILTER 0
 #define CONFIG_GEQ_FILTER 0
 #define CONFIG_GRADFUN_FILTER 0
+#define CONFIG_GREYEDGE_FILTER 0
 #define CONFIG_HALDCLUT_FILTER 0
 #define CONFIG_HFLIP_FILTER 0
 #define CONFIG_HISTEQ_FILTER 0
@@ -1857,11 +1772,13 @@
 #define CONFIG_INTERLEAVE_FILTER 0
 #define CONFIG_KERNDEINT_FILTER 0
 #define CONFIG_LENSCORRECTION_FILTER 0
+#define CONFIG_LENSFUN_FILTER 0
 #define CONFIG_LIBVMAF_FILTER 0
 #define CONFIG_LIMITER_FILTER 0
 #define CONFIG_LOOP_FILTER 0
 #define CONFIG_LUMAKEY_FILTER 0
 #define CONFIG_LUT_FILTER 0
+#define CONFIG_LUT1D_FILTER 0
 #define CONFIG_LUT2_FILTER 0
 #define CONFIG_LUT3D_FILTER 0
 #define CONFIG_LUTRGB_FILTER 0
@@ -1874,17 +1791,21 @@
 #define CONFIG_METADATA_FILTER 0
 #define CONFIG_MIDEQUALIZER_FILTER 0
 #define CONFIG_MINTERPOLATE_FILTER 0
+#define CONFIG_MIX_FILTER 0
 #define CONFIG_MPDECIMATE_FILTER 0
 #define CONFIG_NEGATE_FILTER 0
 #define CONFIG_NLMEANS_FILTER 0
 #define CONFIG_NNEDI_FILTER 0
 #define CONFIG_NOFORMAT_FILTER 0
 #define CONFIG_NOISE_FILTER 0
+#define CONFIG_NORMALIZE_FILTER 0
 #define CONFIG_NULL_FILTER 0
 #define CONFIG_OCR_FILTER 0
 #define CONFIG_OCV_FILTER 0
 #define CONFIG_OSCILLOSCOPE_FILTER 0
 #define CONFIG_OVERLAY_FILTER 0
+#define CONFIG_OVERLAY_OPENCL_FILTER 0
+#define CONFIG_OVERLAY_QSV_FILTER 0
 #define CONFIG_OWDENOISE_FILTER 0
 #define CONFIG_PAD_FILTER 0
 #define CONFIG_PALETTEGEN_FILTER 0
@@ -1898,6 +1819,9 @@
 #define CONFIG_PP7_FILTER 0
 #define CONFIG_PREMULTIPLY_FILTER 0
 #define CONFIG_PREWITT_FILTER 0
+#define CONFIG_PREWITT_OPENCL_FILTER 0
+#define CONFIG_PROCAMP_VAAPI_FILTER 0
+#define CONFIG_PROGRAM_OPENCL_FILTER 0
 #define CONFIG_PSEUDOCOLOR_FILTER 0
 #define CONFIG_PSNR_FILTER 0
 #define CONFIG_PULLUP_FILTER 0
@@ -1912,6 +1836,7 @@
 #define CONFIG_REPEATFIELDS_FILTER 0
 #define CONFIG_REVERSE_FILTER 0
 #define CONFIG_ROBERTS_FILTER 0
+#define CONFIG_ROBERTS_OPENCL_FILTER 0
 #define CONFIG_ROTATE_FILTER 0
 #define CONFIG_SAB_FILTER 0
 #define CONFIG_SCALE_FILTER 0
@@ -1927,8 +1852,10 @@
 #define CONFIG_SETDAR_FILTER 0
 #define CONFIG_SETFIELD_FILTER 0
 #define CONFIG_SETPTS_FILTER 0
+#define CONFIG_SETRANGE_FILTER 0
 #define CONFIG_SETSAR_FILTER 0
 #define CONFIG_SETTB_FILTER 0
+#define CONFIG_SHARPNESS_VAAPI_FILTER 0
 #define CONFIG_SHOWINFO_FILTER 0
 #define CONFIG_SHOWPALETTE_FILTER 0
 #define CONFIG_SHUFFLEFRAMES_FILTER 0
@@ -1938,8 +1865,10 @@
 #define CONFIG_SIGNATURE_FILTER 0
 #define CONFIG_SMARTBLUR_FILTER 0
 #define CONFIG_SOBEL_FILTER 0
+#define CONFIG_SOBEL_OPENCL_FILTER 0
 #define CONFIG_SPLIT_FILTER 0
 #define CONFIG_SPP_FILTER 0
+#define CONFIG_SR_FILTER 0
 #define CONFIG_SSIM_FILTER 0
 #define CONFIG_STEREO3D_FILTER 0
 #define CONFIG_STREAMSELECT_FILTER 0
@@ -1955,19 +1884,25 @@
 #define CONFIG_TILE_FILTER 0
 #define CONFIG_TINTERLACE_FILTER 0
 #define CONFIG_TLUT2_FILTER 0
+#define CONFIG_TMIX_FILTER 0
 #define CONFIG_TONEMAP_FILTER 0
+#define CONFIG_TONEMAP_OPENCL_FILTER 0
 #define CONFIG_TRANSPOSE_FILTER 0
+#define CONFIG_TRANSPOSE_NPP_FILTER 0
 #define CONFIG_TRIM_FILTER 0
 #define CONFIG_UNPREMULTIPLY_FILTER 0
 #define CONFIG_UNSHARP_FILTER 0
+#define CONFIG_UNSHARP_OPENCL_FILTER 0
 #define CONFIG_USPP_FILTER 0
 #define CONFIG_VAGUEDENOISER_FILTER 0
 #define CONFIG_VECTORSCOPE_FILTER 0
 #define CONFIG_VFLIP_FILTER 0
+#define CONFIG_VFRDET_FILTER 0
 #define CONFIG_VIDSTABDETECT_FILTER 0
 #define CONFIG_VIDSTABTRANSFORM_FILTER 0
 #define CONFIG_VIGNETTE_FILTER 0
 #define CONFIG_VMAFMOTION_FILTER 0
+#define CONFIG_VPP_QSV_FILTER 0
 #define CONFIG_VSTACK_FILTER 0
 #define CONFIG_W3FDIF_FILTER 0
 #define CONFIG_WAVEFORM_FILTER 0
@@ -1988,6 +1923,9 @@
 #define CONFIG_MANDELBROT_FILTER 0
 #define CONFIG_MPTESTSRC_FILTER 0
 #define CONFIG_NULLSRC_FILTER 0
+#define CONFIG_OPENCLSRC_FILTER 0
+#define CONFIG_PAL75BARS_FILTER 0
+#define CONFIG_PAL100BARS_FILTER 0
 #define CONFIG_RGBTESTSRC_FILTER 0
 #define CONFIG_SMPTEBARS_FILTER 0
 #define CONFIG_SMPTEHDBARS_FILTER 0
@@ -2011,94 +1949,302 @@
 #define CONFIG_SPECTRUMSYNTH_FILTER 0
 #define CONFIG_AMOVIE_FILTER 0
 #define CONFIG_MOVIE_FILTER 0
-#define CONFIG_H263_VAAPI_HWACCEL 0
-#define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_H264_CUVID_HWACCEL 0
-#define CONFIG_H264_D3D11VA_HWACCEL 0
-#define CONFIG_H264_D3D11VA2_HWACCEL 0
-#define CONFIG_H264_DXVA2_HWACCEL 0
-#define CONFIG_H264_MEDIACODEC_HWACCEL 0
-#define CONFIG_H264_MMAL_HWACCEL 0
-#define CONFIG_H264_QSV_HWACCEL 0
-#define CONFIG_H264_VAAPI_HWACCEL 0
-#define CONFIG_H264_VDA_HWACCEL 0
-#define CONFIG_H264_VDA_OLD_HWACCEL 0
-#define CONFIG_H264_VDPAU_HWACCEL 0
-#define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_HEVC_CUVID_HWACCEL 0
-#define CONFIG_HEVC_D3D11VA_HWACCEL 0
-#define CONFIG_HEVC_D3D11VA2_HWACCEL 0
-#define CONFIG_HEVC_DXVA2_HWACCEL 0
-#define CONFIG_HEVC_MEDIACODEC_HWACCEL 0
-#define CONFIG_HEVC_QSV_HWACCEL 0
-#define CONFIG_HEVC_VAAPI_HWACCEL 0
-#define CONFIG_HEVC_VDPAU_HWACCEL 0
-#define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_MJPEG_CUVID_HWACCEL 0
-#define CONFIG_MPEG1_CUVID_HWACCEL 0
-#define CONFIG_MPEG1_XVMC_HWACCEL 0
-#define CONFIG_MPEG1_VDPAU_HWACCEL 0
-#define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_MPEG2_CUVID_HWACCEL 0
-#define CONFIG_MPEG2_XVMC_HWACCEL 0
-#define CONFIG_MPEG2_D3D11VA_HWACCEL 0
-#define CONFIG_MPEG2_D3D11VA2_HWACCEL 0
-#define CONFIG_MPEG2_DXVA2_HWACCEL 0
-#define CONFIG_MPEG2_MMAL_HWACCEL 0
-#define CONFIG_MPEG2_QSV_HWACCEL 0
-#define CONFIG_MPEG2_VAAPI_HWACCEL 0
-#define CONFIG_MPEG2_VDPAU_HWACCEL 0
-#define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_MPEG2_MEDIACODEC_HWACCEL 0
-#define CONFIG_MPEG4_CUVID_HWACCEL 0
-#define CONFIG_MPEG4_MEDIACODEC_HWACCEL 0
-#define CONFIG_MPEG4_MMAL_HWACCEL 0
-#define CONFIG_MPEG4_VAAPI_HWACCEL 0
-#define CONFIG_MPEG4_VDPAU_HWACCEL 0
-#define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_VC1_CUVID_HWACCEL 0
-#define CONFIG_VC1_D3D11VA_HWACCEL 0
-#define CONFIG_VC1_D3D11VA2_HWACCEL 0
-#define CONFIG_VC1_DXVA2_HWACCEL 0
-#define CONFIG_VC1_VAAPI_HWACCEL 0
-#define CONFIG_VC1_VDPAU_HWACCEL 0
-#define CONFIG_VC1_MMAL_HWACCEL 0
-#define CONFIG_VC1_QSV_HWACCEL 0
-#define CONFIG_VP8_CUVID_HWACCEL 0
-#define CONFIG_VP8_MEDIACODEC_HWACCEL 0
-#define CONFIG_VP8_QSV_HWACCEL 0
-#define CONFIG_VP9_CUVID_HWACCEL 0
-#define CONFIG_VP9_D3D11VA_HWACCEL 0
-#define CONFIG_VP9_D3D11VA2_HWACCEL 0
-#define CONFIG_VP9_DXVA2_HWACCEL 0
-#define CONFIG_VP9_MEDIACODEC_HWACCEL 0
-#define CONFIG_VP9_VAAPI_HWACCEL 0
-#define CONFIG_WMV3_D3D11VA_HWACCEL 0
-#define CONFIG_WMV3_D3D11VA2_HWACCEL 0
-#define CONFIG_WMV3_DXVA2_HWACCEL 0
-#define CONFIG_WMV3_VAAPI_HWACCEL 0
-#define CONFIG_WMV3_VDPAU_HWACCEL 0
-#define CONFIG_ALSA_INDEV 0
-#define CONFIG_AVFOUNDATION_INDEV 0
-#define CONFIG_BKTR_INDEV 0
-#define CONFIG_DECKLINK_INDEV 0
-#define CONFIG_LIBNDI_NEWTEK_INDEV 0
-#define CONFIG_DSHOW_INDEV 0
-#define CONFIG_FBDEV_INDEV 0
-#define CONFIG_GDIGRAB_INDEV 0
-#define CONFIG_IEC61883_INDEV 0
-#define CONFIG_JACK_INDEV 0
-#define CONFIG_KMSGRAB_INDEV 0
-#define CONFIG_LAVFI_INDEV 0
-#define CONFIG_OPENAL_INDEV 0
-#define CONFIG_OSS_INDEV 0
-#define CONFIG_PULSE_INDEV 0
-#define CONFIG_SNDIO_INDEV 0
-#define CONFIG_V4L2_INDEV 0
-#define CONFIG_VFWCAP_INDEV 0
-#define CONFIG_XCBGRAB_INDEV 0
-#define CONFIG_LIBCDIO_INDEV 0
-#define CONFIG_LIBDC1394_INDEV 0
+#define CONFIG_AFIFO_FILTER 0
+#define CONFIG_FIFO_FILTER 0
+#define CONFIG_AA_DEMUXER 0
+#define CONFIG_AAC_DEMUXER 0
+#define CONFIG_AC3_DEMUXER 0
+#define CONFIG_ACM_DEMUXER 0
+#define CONFIG_ACT_DEMUXER 0
+#define CONFIG_ADF_DEMUXER 0
+#define CONFIG_ADP_DEMUXER 0
+#define CONFIG_ADS_DEMUXER 0
+#define CONFIG_ADX_DEMUXER 0
+#define CONFIG_AEA_DEMUXER 0
+#define CONFIG_AFC_DEMUXER 0
+#define CONFIG_AIFF_DEMUXER 0
+#define CONFIG_AIX_DEMUXER 0
+#define CONFIG_AMR_DEMUXER 0
+#define CONFIG_AMRNB_DEMUXER 0
+#define CONFIG_AMRWB_DEMUXER 0
+#define CONFIG_ANM_DEMUXER 0
+#define CONFIG_APC_DEMUXER 0
+#define CONFIG_APE_DEMUXER 0
+#define CONFIG_APNG_DEMUXER 0
+#define CONFIG_APTX_DEMUXER 0
+#define CONFIG_APTX_HD_DEMUXER 0
+#define CONFIG_AQTITLE_DEMUXER 0
+#define CONFIG_ASF_DEMUXER 0
+#define CONFIG_ASF_O_DEMUXER 0
+#define CONFIG_ASS_DEMUXER 0
+#define CONFIG_AST_DEMUXER 0
+#define CONFIG_AU_DEMUXER 0
+#define CONFIG_AVI_DEMUXER 0
+#define CONFIG_AVISYNTH_DEMUXER 0
+#define CONFIG_AVR_DEMUXER 0
+#define CONFIG_AVS_DEMUXER 0
+#define CONFIG_AVS2_DEMUXER 0
+#define CONFIG_BETHSOFTVID_DEMUXER 0
+#define CONFIG_BFI_DEMUXER 0
+#define CONFIG_BINTEXT_DEMUXER 0
+#define CONFIG_BINK_DEMUXER 0
+#define CONFIG_BIT_DEMUXER 0
+#define CONFIG_BMV_DEMUXER 0
+#define CONFIG_BFSTM_DEMUXER 0
+#define CONFIG_BRSTM_DEMUXER 0
+#define CONFIG_BOA_DEMUXER 0
+#define CONFIG_C93_DEMUXER 0
+#define CONFIG_CAF_DEMUXER 0
+#define CONFIG_CAVSVIDEO_DEMUXER 0
+#define CONFIG_CDG_DEMUXER 0
+#define CONFIG_CDXL_DEMUXER 0
+#define CONFIG_CINE_DEMUXER 0
+#define CONFIG_CODEC2_DEMUXER 0
+#define CONFIG_CODEC2RAW_DEMUXER 0
+#define CONFIG_CONCAT_DEMUXER 0
+#define CONFIG_DASH_DEMUXER 0
+#define CONFIG_DATA_DEMUXER 0
+#define CONFIG_DAUD_DEMUXER 0
+#define CONFIG_DCSTR_DEMUXER 0
+#define CONFIG_DFA_DEMUXER 0
+#define CONFIG_DIRAC_DEMUXER 0
+#define CONFIG_DNXHD_DEMUXER 0
+#define CONFIG_DSF_DEMUXER 0
+#define CONFIG_DSICIN_DEMUXER 0
+#define CONFIG_DSS_DEMUXER 0
+#define CONFIG_DTS_DEMUXER 0
+#define CONFIG_DTSHD_DEMUXER 0
+#define CONFIG_DV_DEMUXER 0
+#define CONFIG_DVBSUB_DEMUXER 0
+#define CONFIG_DVBTXT_DEMUXER 0
+#define CONFIG_DXA_DEMUXER 0
+#define CONFIG_EA_DEMUXER 0
+#define CONFIG_EA_CDATA_DEMUXER 0
+#define CONFIG_EAC3_DEMUXER 0
+#define CONFIG_EPAF_DEMUXER 0
+#define CONFIG_FFMETADATA_DEMUXER 0
+#define CONFIG_FILMSTRIP_DEMUXER 0
+#define CONFIG_FITS_DEMUXER 0
+#define CONFIG_FLAC_DEMUXER 1
+#define CONFIG_FLIC_DEMUXER 0
+#define CONFIG_FLV_DEMUXER 0
+#define CONFIG_LIVE_FLV_DEMUXER 0
+#define CONFIG_FOURXM_DEMUXER 0
+#define CONFIG_FRM_DEMUXER 0
+#define CONFIG_FSB_DEMUXER 0
+#define CONFIG_G722_DEMUXER 0
+#define CONFIG_G723_1_DEMUXER 0
+#define CONFIG_G726_DEMUXER 0
+#define CONFIG_G726LE_DEMUXER 0
+#define CONFIG_G729_DEMUXER 0
+#define CONFIG_GDV_DEMUXER 0
+#define CONFIG_GENH_DEMUXER 0
+#define CONFIG_GIF_DEMUXER 0
+#define CONFIG_GSM_DEMUXER 0
+#define CONFIG_GXF_DEMUXER 0
+#define CONFIG_H261_DEMUXER 0
+#define CONFIG_H263_DEMUXER 0
+#define CONFIG_H264_DEMUXER 0
+#define CONFIG_HEVC_DEMUXER 0
+#define CONFIG_HLS_DEMUXER 0
+#define CONFIG_HNM_DEMUXER 0
+#define CONFIG_ICO_DEMUXER 0
+#define CONFIG_IDCIN_DEMUXER 0
+#define CONFIG_IDF_DEMUXER 0
+#define CONFIG_IFF_DEMUXER 0
+#define CONFIG_ILBC_DEMUXER 0
+#define CONFIG_IMAGE2_DEMUXER 0
+#define CONFIG_IMAGE2PIPE_DEMUXER 0
+#define CONFIG_IMAGE2_ALIAS_PIX_DEMUXER 0
+#define CONFIG_IMAGE2_BRENDER_PIX_DEMUXER 0
+#define CONFIG_INGENIENT_DEMUXER 0
+#define CONFIG_IPMOVIE_DEMUXER 0
+#define CONFIG_IRCAM_DEMUXER 0
+#define CONFIG_ISS_DEMUXER 0
+#define CONFIG_IV8_DEMUXER 0
+#define CONFIG_IVF_DEMUXER 0
+#define CONFIG_IVR_DEMUXER 0
+#define CONFIG_JACOSUB_DEMUXER 0
+#define CONFIG_JV_DEMUXER 0
+#define CONFIG_LMLM4_DEMUXER 0
+#define CONFIG_LOAS_DEMUXER 0
+#define CONFIG_LRC_DEMUXER 0
+#define CONFIG_LVF_DEMUXER 0
+#define CONFIG_LXF_DEMUXER 0
+#define CONFIG_M4V_DEMUXER 0
+#define CONFIG_MATROSKA_DEMUXER 1
+#define CONFIG_MGSTS_DEMUXER 0
+#define CONFIG_MICRODVD_DEMUXER 0
+#define CONFIG_MJPEG_DEMUXER 0
+#define CONFIG_MJPEG_2000_DEMUXER 0
+#define CONFIG_MLP_DEMUXER 0
+#define CONFIG_MLV_DEMUXER 0
+#define CONFIG_MM_DEMUXER 0
+#define CONFIG_MMF_DEMUXER 0
+#define CONFIG_MOV_DEMUXER 1
+#define CONFIG_MP3_DEMUXER 1
+#define CONFIG_MPC_DEMUXER 0
+#define CONFIG_MPC8_DEMUXER 0
+#define CONFIG_MPEGPS_DEMUXER 0
+#define CONFIG_MPEGTS_DEMUXER 0
+#define CONFIG_MPEGTSRAW_DEMUXER 0
+#define CONFIG_MPEGVIDEO_DEMUXER 0
+#define CONFIG_MPJPEG_DEMUXER 0
+#define CONFIG_MPL2_DEMUXER 0
+#define CONFIG_MPSUB_DEMUXER 0
+#define CONFIG_MSF_DEMUXER 0
+#define CONFIG_MSNWC_TCP_DEMUXER 0
+#define CONFIG_MTAF_DEMUXER 0
+#define CONFIG_MTV_DEMUXER 0
+#define CONFIG_MUSX_DEMUXER 0
+#define CONFIG_MV_DEMUXER 0
+#define CONFIG_MVI_DEMUXER 0
+#define CONFIG_MXF_DEMUXER 0
+#define CONFIG_MXG_DEMUXER 0
+#define CONFIG_NC_DEMUXER 0
+#define CONFIG_NISTSPHERE_DEMUXER 0
+#define CONFIG_NSP_DEMUXER 0
+#define CONFIG_NSV_DEMUXER 0
+#define CONFIG_NUT_DEMUXER 0
+#define CONFIG_NUV_DEMUXER 0
+#define CONFIG_OGG_DEMUXER 1
+#define CONFIG_OMA_DEMUXER 0
+#define CONFIG_PAF_DEMUXER 0
+#define CONFIG_PCM_ALAW_DEMUXER 0
+#define CONFIG_PCM_MULAW_DEMUXER 0
+#define CONFIG_PCM_F64BE_DEMUXER 0
+#define CONFIG_PCM_F64LE_DEMUXER 0
+#define CONFIG_PCM_F32BE_DEMUXER 0
+#define CONFIG_PCM_F32LE_DEMUXER 0
+#define CONFIG_PCM_S32BE_DEMUXER 0
+#define CONFIG_PCM_S32LE_DEMUXER 0
+#define CONFIG_PCM_S24BE_DEMUXER 0
+#define CONFIG_PCM_S24LE_DEMUXER 0
+#define CONFIG_PCM_S16BE_DEMUXER 0
+#define CONFIG_PCM_S16LE_DEMUXER 0
+#define CONFIG_PCM_S8_DEMUXER 0
+#define CONFIG_PCM_U32BE_DEMUXER 0
+#define CONFIG_PCM_U32LE_DEMUXER 0
+#define CONFIG_PCM_U24BE_DEMUXER 0
+#define CONFIG_PCM_U24LE_DEMUXER 0
+#define CONFIG_PCM_U16BE_DEMUXER 0
+#define CONFIG_PCM_U16LE_DEMUXER 0
+#define CONFIG_PCM_U8_DEMUXER 0
+#define CONFIG_PJS_DEMUXER 0
+#define CONFIG_PMP_DEMUXER 0
+#define CONFIG_PVA_DEMUXER 0
+#define CONFIG_PVF_DEMUXER 0
+#define CONFIG_QCP_DEMUXER 0
+#define CONFIG_R3D_DEMUXER 0
+#define CONFIG_RAWVIDEO_DEMUXER 0
+#define CONFIG_REALTEXT_DEMUXER 0
+#define CONFIG_REDSPARK_DEMUXER 0
+#define CONFIG_RL2_DEMUXER 0
+#define CONFIG_RM_DEMUXER 0
+#define CONFIG_ROQ_DEMUXER 0
+#define CONFIG_RPL_DEMUXER 0
+#define CONFIG_RSD_DEMUXER 0
+#define CONFIG_RSO_DEMUXER 0
+#define CONFIG_RTP_DEMUXER 0
+#define CONFIG_RTSP_DEMUXER 0
+#define CONFIG_S337M_DEMUXER 0
+#define CONFIG_SAMI_DEMUXER 0
+#define CONFIG_SAP_DEMUXER 0
+#define CONFIG_SBC_DEMUXER 0
+#define CONFIG_SBG_DEMUXER 0
+#define CONFIG_SCC_DEMUXER 0
+#define CONFIG_SDP_DEMUXER 0
+#define CONFIG_SDR2_DEMUXER 0
+#define CONFIG_SDS_DEMUXER 0
+#define CONFIG_SDX_DEMUXER 0
+#define CONFIG_SEGAFILM_DEMUXER 0
+#define CONFIG_SER_DEMUXER 0
+#define CONFIG_SHORTEN_DEMUXER 0
+#define CONFIG_SIFF_DEMUXER 0
+#define CONFIG_SLN_DEMUXER 0
+#define CONFIG_SMACKER_DEMUXER 0
+#define CONFIG_SMJPEG_DEMUXER 0
+#define CONFIG_SMUSH_DEMUXER 0
+#define CONFIG_SOL_DEMUXER 0
+#define CONFIG_SOX_DEMUXER 0
+#define CONFIG_SPDIF_DEMUXER 0
+#define CONFIG_SRT_DEMUXER 0
+#define CONFIG_STR_DEMUXER 0
+#define CONFIG_STL_DEMUXER 0
+#define CONFIG_SUBVIEWER1_DEMUXER 0
+#define CONFIG_SUBVIEWER_DEMUXER 0
+#define CONFIG_SUP_DEMUXER 0
+#define CONFIG_SVAG_DEMUXER 0
+#define CONFIG_SWF_DEMUXER 0
+#define CONFIG_TAK_DEMUXER 0
+#define CONFIG_TEDCAPTIONS_DEMUXER 0
+#define CONFIG_THP_DEMUXER 0
+#define CONFIG_THREEDOSTR_DEMUXER 0
+#define CONFIG_TIERTEXSEQ_DEMUXER 0
+#define CONFIG_TMV_DEMUXER 0
+#define CONFIG_TRUEHD_DEMUXER 0
+#define CONFIG_TTA_DEMUXER 0
+#define CONFIG_TXD_DEMUXER 0
+#define CONFIG_TTY_DEMUXER 0
+#define CONFIG_TY_DEMUXER 0
+#define CONFIG_V210_DEMUXER 0
+#define CONFIG_V210X_DEMUXER 0
+#define CONFIG_VAG_DEMUXER 0
+#define CONFIG_VC1_DEMUXER 0
+#define CONFIG_VC1T_DEMUXER 0
+#define CONFIG_VIVO_DEMUXER 0
+#define CONFIG_VMD_DEMUXER 0
+#define CONFIG_VOBSUB_DEMUXER 0
+#define CONFIG_VOC_DEMUXER 0
+#define CONFIG_VPK_DEMUXER 0
+#define CONFIG_VPLAYER_DEMUXER 0
+#define CONFIG_VQF_DEMUXER 0
+#define CONFIG_W64_DEMUXER 0
+#define CONFIG_WAV_DEMUXER 1
+#define CONFIG_WC3_DEMUXER 0
+#define CONFIG_WEBM_DASH_MANIFEST_DEMUXER 0
+#define CONFIG_WEBVTT_DEMUXER 0
+#define CONFIG_WSAUD_DEMUXER 0
+#define CONFIG_WSD_DEMUXER 0
+#define CONFIG_WSVQA_DEMUXER 0
+#define CONFIG_WTV_DEMUXER 0
+#define CONFIG_WVE_DEMUXER 0
+#define CONFIG_WV_DEMUXER 0
+#define CONFIG_XA_DEMUXER 0
+#define CONFIG_XBIN_DEMUXER 0
+#define CONFIG_XMV_DEMUXER 0
+#define CONFIG_XVAG_DEMUXER 0
+#define CONFIG_XWMA_DEMUXER 0
+#define CONFIG_YOP_DEMUXER 0
+#define CONFIG_YUV4MPEGPIPE_DEMUXER 0
+#define CONFIG_IMAGE_BMP_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_DDS_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_DPX_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_EXR_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_J2K_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_JPEG_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_JPEGLS_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PAM_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PBM_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PCX_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PGMYUV_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PGM_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PICTOR_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PNG_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PPM_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_PSD_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_QDRAW_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_SGI_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_SVG_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_SUNRAST_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_TIFF_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_WEBP_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_XPM_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_XWD_PIPE_DEMUXER 0
+#define CONFIG_LIBGME_DEMUXER 0
+#define CONFIG_LIBMODPLUG_DEMUXER 0
+#define CONFIG_LIBOPENMPT_DEMUXER 0
+#define CONFIG_VAPOURSYNTH_DEMUXER 0
 #define CONFIG_A64_MUXER 0
 #define CONFIG_AC3_MUXER 0
 #define CONFIG_ADTS_MUXER 0
@@ -2106,6 +2252,8 @@
 #define CONFIG_AIFF_MUXER 0
 #define CONFIG_AMR_MUXER 0
 #define CONFIG_APNG_MUXER 0
+#define CONFIG_APTX_MUXER 0
+#define CONFIG_APTX_HD_MUXER 0
 #define CONFIG_ASF_MUXER 0
 #define CONFIG_ASS_MUXER 0
 #define CONFIG_AST_MUXER 0
@@ -2113,9 +2261,12 @@
 #define CONFIG_AU_MUXER 0
 #define CONFIG_AVI_MUXER 0
 #define CONFIG_AVM2_MUXER 0
+#define CONFIG_AVS2_MUXER 0
 #define CONFIG_BIT_MUXER 0
 #define CONFIG_CAF_MUXER 0
 #define CONFIG_CAVSVIDEO_MUXER 0
+#define CONFIG_CODEC2_MUXER 0
+#define CONFIG_CODEC2RAW_MUXER 0
 #define CONFIG_CRC_MUXER 0
 #define CONFIG_DASH_MUXER 0
 #define CONFIG_DATA_MUXER 0
@@ -2126,9 +2277,9 @@
 #define CONFIG_DV_MUXER 0
 #define CONFIG_EAC3_MUXER 0
 #define CONFIG_F4V_MUXER 0
-#define CONFIG_FFM_MUXER 0
 #define CONFIG_FFMETADATA_MUXER 0
 #define CONFIG_FIFO_MUXER 0
+#define CONFIG_FIFO_TEST_MUXER 0
 #define CONFIG_FILMSTRIP_MUXER 0
 #define CONFIG_FITS_MUXER 0
 #define CONFIG_FLAC_MUXER 0
@@ -2221,7 +2372,9 @@
 #define CONFIG_RTP_MPEGTS_MUXER 0
 #define CONFIG_RTSP_MUXER 0
 #define CONFIG_SAP_MUXER 0
+#define CONFIG_SBC_MUXER 0
 #define CONFIG_SCC_MUXER 0
+#define CONFIG_SEGAFILM_MUXER 0
 #define CONFIG_SEGMENT_MUXER 0
 #define CONFIG_STREAM_SEGMENT_MUXER 0
 #define CONFIG_SINGLEJPEG_MUXER 0
@@ -2254,58 +2407,6 @@
 #define CONFIG_WV_MUXER 0
 #define CONFIG_YUV4MPEGPIPE_MUXER 0
 #define CONFIG_CHROMAPRINT_MUXER 0
-#define CONFIG_ALSA_OUTDEV 0
-#define CONFIG_CACA_OUTDEV 0
-#define CONFIG_DECKLINK_OUTDEV 0
-#define CONFIG_LIBNDI_NEWTEK_OUTDEV 0
-#define CONFIG_FBDEV_OUTDEV 0
-#define CONFIG_OPENGL_OUTDEV 0
-#define CONFIG_OSS_OUTDEV 0
-#define CONFIG_PULSE_OUTDEV 0
-#define CONFIG_SDL2_OUTDEV 0
-#define CONFIG_SNDIO_OUTDEV 0
-#define CONFIG_V4L2_OUTDEV 0
-#define CONFIG_XV_OUTDEV 0
-#define CONFIG_AAC_PARSER 0
-#define CONFIG_AAC_LATM_PARSER 0
-#define CONFIG_AC3_PARSER 0
-#define CONFIG_ADX_PARSER 0
-#define CONFIG_BMP_PARSER 0
-#define CONFIG_CAVSVIDEO_PARSER 0
-#define CONFIG_COOK_PARSER 0
-#define CONFIG_DCA_PARSER 0
-#define CONFIG_DIRAC_PARSER 0
-#define CONFIG_DNXHD_PARSER 0
-#define CONFIG_DPX_PARSER 0
-#define CONFIG_DVAUDIO_PARSER 0
-#define CONFIG_DVBSUB_PARSER 0
-#define CONFIG_DVDSUB_PARSER 0
-#define CONFIG_DVD_NAV_PARSER 0
-#define CONFIG_FLAC_PARSER 1
-#define CONFIG_G729_PARSER 0
-#define CONFIG_GSM_PARSER 0
-#define CONFIG_H261_PARSER 0
-#define CONFIG_H263_PARSER 0
-#define CONFIG_H264_PARSER 0
-#define CONFIG_HEVC_PARSER 0
-#define CONFIG_MJPEG_PARSER 0
-#define CONFIG_MLP_PARSER 0
-#define CONFIG_MPEG4VIDEO_PARSER 0
-#define CONFIG_MPEGAUDIO_PARSER 0
-#define CONFIG_MPEGVIDEO_PARSER 0
-#define CONFIG_OPUS_PARSER 1
-#define CONFIG_PNG_PARSER 0
-#define CONFIG_PNM_PARSER 0
-#define CONFIG_RV30_PARSER 0
-#define CONFIG_RV40_PARSER 0
-#define CONFIG_SIPR_PARSER 0
-#define CONFIG_TAK_PARSER 0
-#define CONFIG_VC1_PARSER 0
-#define CONFIG_VORBIS_PARSER 1
-#define CONFIG_VP3_PARSER 1
-#define CONFIG_VP8_PARSER 1
-#define CONFIG_VP9_PARSER 1
-#define CONFIG_XMA_PARSER 0
 #define CONFIG_ASYNC_PROTOCOL 0
 #define CONFIG_BLURAY_PROTOCOL 0
 #define CONFIG_CACHE_PROTOCOL 0
@@ -2339,10 +2440,7 @@
 #define CONFIG_SUBFILE_PROTOCOL 0
 #define CONFIG_TEE_PROTOCOL 0
 #define CONFIG_TCP_PROTOCOL 0
-#define CONFIG_TLS_GNUTLS_PROTOCOL 0
-#define CONFIG_TLS_SCHANNEL_PROTOCOL 0
-#define CONFIG_TLS_SECURETRANSPORT_PROTOCOL 0
-#define CONFIG_TLS_OPENSSL_PROTOCOL 0
+#define CONFIG_TLS_PROTOCOL 0
 #define CONFIG_UDP_PROTOCOL 0
 #define CONFIG_UDPLITE_PROTOCOL 0
 #define CONFIG_UNIX_PROTOCOL 0
@@ -2351,6 +2449,7 @@
 #define CONFIG_LIBRTMPS_PROTOCOL 0
 #define CONFIG_LIBRTMPT_PROTOCOL 0
 #define CONFIG_LIBRTMPTE_PROTOCOL 0
+#define CONFIG_LIBSRT_PROTOCOL 0
 #define CONFIG_LIBSSH_PROTOCOL 0
 #define CONFIG_LIBSMBCLIENT_PROTOCOL 0
 #endif /* FFMPEG_CONFIG_H */

diff --git a/fuchsia/config/default/x64/libavcodec/bsf_list.c b/fuchsia/config/default/x64/libavcodec/bsf_list.c
index d31ece9..ee5ac8a 100644
--- a/fuchsia/config/default/x64/libavcodec/bsf_list.c
+++ b/fuchsia/config/default/x64/libavcodec/bsf_list.c

@@ -1,3 +1,4 @@
 static const AVBitStreamFilter * const bitstream_filters[] = {
     &ff_null_bsf,
+    &ff_vp9_superframe_split_bsf,
     NULL };

diff --git a/fuchsia/config/default/x64/libavutil/avconfig.h b/fuchsia/config/default/x64/libavutil/avconfig.h
index f10aa61..c289fbb 100644
--- a/fuchsia/config/default/x64/libavutil/avconfig.h
+++ b/fuchsia/config/default/x64/libavutil/avconfig.h

@@ -1,4 +1,4 @@
-/* Generated by ffconf */
+/* Generated by ffmpeg configure */
 #ifndef AVUTIL_AVCONFIG_H
 #define AVUTIL_AVCONFIG_H
 #define AV_HAVE_BIGENDIAN 0

diff --git a/fuchsia/config/default/x64/libavutil/ffversion.h b/fuchsia/config/default/x64/libavutil/ffversion.h
index 0342cbb..b97c110 100644
--- a/fuchsia/config/default/x64/libavutil/ffversion.h
+++ b/fuchsia/config/default/x64/libavutil/ffversion.h

@@ -1,5 +1,5 @@
 /* Automatically generated by version.sh, do not manually edit! */
 #ifndef AVUTIL_FFVERSION_H
 #define AVUTIL_FFVERSION_H
-#define FFMPEG_VERSION "n3.4.2-1-g67e0ba6f22"
+#define FFMPEG_VERSION "N-92356-g750018e43a"
 #endif /* AVUTIL_FFVERSION_H */

diff --git a/fuchsia/config/max/arm64/config.h b/fuchsia/config/max/arm64/config.h
index 02fd756..49b23e0 100644
--- a/fuchsia/config/max/arm64/config.h
+++ b/fuchsia/config/max/arm64/config.h

@@ -1,12 +1,12 @@
 /* Automatically generated by configure - do not modify! */
 #ifndef FFMPEG_CONFIG_H
 #define FFMPEG_CONFIG_H
-#define FFMPEG_CONFIGURATION "--disable-everything --disable-all --disable-doc --disable-htmlpages --disable-manpages --disable-podpages --disable-txtpages --disable-static --enable-avcodec --enable-avformat --enable-avutil --enable-fft --enable-rdft --enable-static --disable-bzlib --disable-iconv --disable-lzo --disable-network --disable-schannel --disable-sdl2 --disable-symver --disable-xlib --disable-zlib --disable-securetransport --disable-faan --disable-alsa --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vda --disable-vdpau --disable-videotoolbox --disable-nvenc --disable-cuda --disable-cuvid --disable-v4l2_m2m --enable-decoder='vorbis,flac' --enable-decoder='pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le' --enable-decoder='pcm_s16be,pcm_s24be,pcm_mulaw,pcm_alaw' --enable-decoder='theora,vp8' --enable-demuxer='ogg,matroska,wav,flac' --enable-parser='opus,vorbis,flac' --enable-parser='vp3,vp8' --optflags='\"-O2\"' --enable-pic --enable-pic --enable-lto --cc=clang --cxx=clang++ --ld=clang --extra-ldflags='-fuse-ld=lld' --enable-cross-compile --cross-prefix=/usr/bin/aarch64-linux-gnu- --target-os=linux --arch=aarch64 --enable-armv8 --extra-cflags='-march=armv8-a' --sysroot=/usr/local/google/home/phosek/fuchsia/third_party/ffmpeg/../../buildtools/linux-arm64/sysroot --extra-cflags='--target=aarch64-linux-gnu' --extra-ldflags='--target=aarch64-linux-gnu' --disable-linux-perf --enable-decoder='aac,h264,mp3' --enable-demuxer='aac,mp3,mov' --enable-parser='aac,h264,mpegaudio' --enable-decoder=mpeg4 --enable-parser='h263,mpeg4video' --enable-demuxer=avi --enable-demuxer=amr --enable-decoder='amrnb,amrwb' --enable-decoder=gsm_ms --enable-demuxer=gsm --enable-parser=gsm"
+#define FFMPEG_CONFIGURATION "--disable-everything --disable-all --disable-doc --disable-htmlpages --disable-manpages --disable-podpages --disable-txtpages --disable-static --enable-avcodec --enable-avformat --enable-avutil --enable-fft --enable-rdft --enable-static --disable-debug --disable-bzlib --disable-iconv --disable-lzo --disable-network --disable-schannel --disable-sdl2 --disable-symver --disable-xlib --disable-zlib --disable-securetransport --disable-faan --disable-alsa --disable-autodetect --enable-decoder='vorbis,flac' --enable-decoder='pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le,mp3' --enable-decoder='pcm_s16be,pcm_s24be,pcm_mulaw,pcm_alaw' --enable-decoder='theora,vp8,sbc,aptx' --enable-demuxer='ogg,matroska,wav,flac,mp3,mov' --enable-parser='opus,vorbis,flac,mpegaudio' --enable-parser='vp3,vp8' --optflags='\"-O2\"' --enable-pic --x86asmexe=yasm --enable-pic --enable-lto --cc=clang --cxx=clang++ --ld=clang --enable-cross-compile --cross-prefix=/usr/bin/aarch64-linux-gnu- --target-os=linux --arch=aarch64 --enable-armv8 --extra-cflags='-march=armv8-a' --sysroot=/usr/local/google/home/dalesat/fuchsia/third_party/ffmpeg/../../buildtools/linux-arm64/sysroot --extra-cflags='--target=aarch64-linux-gnu' --extra-ldflags='--target=aarch64-linux-gnu' --disable-linux-perf --enable-decoder='aac,h264,mp3' --enable-demuxer='aac,mp3,mov' --enable-parser='aac,h264,mpegaudio' --enable-decoder=mpeg4 --enable-parser='h263,mpeg4video' --enable-demuxer=avi --enable-demuxer=amr --enable-decoder='amrnb,amrwb' --enable-decoder=gsm_ms --enable-demuxer=gsm --enable-parser=gsm"
 #define FFMPEG_LICENSE "LGPL version 2.1 or later"
 #define CONFIG_THIS_YEAR 2018
 #define FFMPEG_DATADIR "/usr/local/share/ffmpeg"
 #define AVCONV_DATADIR "/usr/local/share/ffmpeg"
-#define CC_IDENT "Fuchsia clang version 7.0.0 (https://fuchsia.googlesource.com/a/third_party/clang 5034f5fddab316b12887b39b129ebbca999500e2) (https://fuchsia.googlesource.com/a/third_party/llvm 197b6c81959a17be37035d4fe71b382023bff2f0) (based on LLVM 7.0.0svn)"
+#define CC_IDENT "Fuchsia clang version 8.0.0 (https://fuchsia.googlesource.com/a/third_party/clang 0a217961416a0cbf1ac29bcb26577d41ca0e0e8d) (https://fuchsia.googlesource.com/a/third_party/llvm aff6cf491087ba32e338c9af076c9b7739c978a0) (based on LLVM 8.0.0svn)"
 #define av_restrict restrict
 #define EXTERN_PREFIX ""
 #define EXTERN_ASM 
@@ -57,6 +57,7 @@
 #define HAVE_AMD3DNOWEXT 0
 #define HAVE_AVX 0
 #define HAVE_AVX2 0
+#define HAVE_AVX512 0
 #define HAVE_FMA3 0
 #define HAVE_FMA4 0
 #define HAVE_MMX 0
@@ -101,6 +102,7 @@
 #define HAVE_AMD3DNOWEXT_EXTERNAL 0
 #define HAVE_AVX_EXTERNAL 0
 #define HAVE_AVX2_EXTERNAL 0
+#define HAVE_AVX512_EXTERNAL 0
 #define HAVE_FMA3_EXTERNAL 0
 #define HAVE_FMA4_EXTERNAL 0
 #define HAVE_MMX_EXTERNAL 0
@@ -145,6 +147,7 @@
 #define HAVE_AMD3DNOWEXT_INLINE 0
 #define HAVE_AVX_INLINE 0
 #define HAVE_AVX2_INLINE 0
+#define HAVE_AVX512_INLINE 0
 #define HAVE_FMA3_INLINE 0
 #define HAVE_FMA4_INLINE 0
 #define HAVE_MMX_INLINE 0
@@ -174,36 +177,29 @@
 #define HAVE_FAST_64BIT 1
 #define HAVE_FAST_CLZ 1
 #define HAVE_FAST_CMOV 0
-#define HAVE_LOCAL_ALIGNED_8 0
-#define HAVE_LOCAL_ALIGNED_16 0
-#define HAVE_LOCAL_ALIGNED_32 0
+#define HAVE_LOCAL_ALIGNED 0
 #define HAVE_SIMD_ALIGN_16 1
 #define HAVE_SIMD_ALIGN_32 0
-#define HAVE_ATOMICS_GCC 1
-#define HAVE_ATOMICS_SUNCC 0
-#define HAVE_ATOMICS_WIN32 0
+#define HAVE_SIMD_ALIGN_64 0
 #define HAVE_ATOMIC_CAS_PTR 0
 #define HAVE_MACHINE_RW_BARRIER 0
 #define HAVE_MEMORYBARRIER 0
 #define HAVE_MM_EMPTY 0
 #define HAVE_RDTSC 0
-#define HAVE_SARESTART 1
 #define HAVE_SEM_TIMEDWAIT 1
 #define HAVE_SYNC_VAL_COMPARE_AND_SWAP 1
-#define HAVE_CABS 1
-#define HAVE_CEXP 1
+#define HAVE_CABS 0
+#define HAVE_CEXP 0
 #define HAVE_INLINE_ASM 1
 #define HAVE_SYMVER 0
 #define HAVE_X86ASM 0
 #define HAVE_BIGENDIAN 0
 #define HAVE_FAST_UNALIGNED 1
-#define HAVE_ALTIVEC_H 0
 #define HAVE_ARPA_INET_H 0
 #define HAVE_ASM_TYPES_H 1
 #define HAVE_CDIO_PARANOIA_H 0
 #define HAVE_CDIO_PARANOIA_PARANOIA_H 0
 #define HAVE_CUDA_H 0
-#define HAVE_D3D11_H 0
 #define HAVE_DISPATCH_DISPATCH_H 0
 #define HAVE_DEV_BKTR_IOCTL_BT848_H 0
 #define HAVE_DEV_BKTR_IOCTL_METEOR_H 0
@@ -212,27 +208,18 @@
 #define HAVE_DEV_VIDEO_METEOR_IOCTL_METEOR_H 0
 #define HAVE_DIRECT_H 0
 #define HAVE_DIRENT_H 1
-#define HAVE_DLFCN_H 1
 #define HAVE_DXGIDEBUG_H 0
 #define HAVE_DXVA_H 0
 #define HAVE_ES2_GL_H 0
 #define HAVE_GSM_H 0
 #define HAVE_IO_H 0
-#define HAVE_MACH_MACH_TIME_H 0
+#define HAVE_LINUX_PERF_EVENT_H 1
 #define HAVE_MACHINE_IOCTL_BT848_H 0
 #define HAVE_MACHINE_IOCTL_METEOR_H 0
 #define HAVE_MALLOC_H 1
 #define HAVE_OPENCV2_CORE_CORE_C_H 0
-#define HAVE_OPENJPEG_2_3_OPENJPEG_H 0
-#define HAVE_OPENJPEG_2_2_OPENJPEG_H 0
-#define HAVE_OPENJPEG_2_1_OPENJPEG_H 0
-#define HAVE_OPENJPEG_2_0_OPENJPEG_H 0
-#define HAVE_OPENJPEG_1_5_OPENJPEG_H 0
 #define HAVE_OPENGL_GL3_H 0
 #define HAVE_POLL_H 1
-#define HAVE_SOUNDCARD_H 0
-#define HAVE_STDATOMIC_H 1
-#define HAVE_SYS_MMAN_H 1
 #define HAVE_SYS_PARAM_H 1
 #define HAVE_SYS_RESOURCE_H 1
 #define HAVE_SYS_SELECT_H 1
@@ -276,17 +263,20 @@
 #define HAVE_SINF 1
 #define HAVE_TRUNC 1
 #define HAVE_TRUNCF 1
+#define HAVE_DOS_PATHS 0
+#define HAVE_LIBC_MSVCRT 0
+#define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
+#define HAVE_SECTION_DATA_REL_RO 1
+#define HAVE_THREADS 1
+#define HAVE_UWP 0
+#define HAVE_WINRT 0
 #define HAVE_ACCESS 1
 #define HAVE_ALIGNED_MALLOC 0
 #define HAVE_ARC4RANDOM 0
 #define HAVE_CLOCK_GETTIME 1
 #define HAVE_CLOSESOCKET 0
 #define HAVE_COMMANDLINETOARGVW 0
-#define HAVE_COTASKMEMFREE 0
-#define HAVE_CRYPTGENRANDOM 0
 #define HAVE_FCNTL 1
-#define HAVE_FLT_LIM 1
-#define HAVE_FORK 1
 #define HAVE_GETADDRINFO 0
 #define HAVE_GETHRTIME 0
 #define HAVE_GETOPT 1
@@ -301,9 +291,7 @@
 #define HAVE_GMTIME_R 1
 #define HAVE_INET_ATON 0
 #define HAVE_ISATTY 1
-#define HAVE_JACK_PORT_GET_LATENCY_RANGE 0
 #define HAVE_KBHIT 0
-#define HAVE_LOADLIBRARY 0
 #define HAVE_LOCALTIME_R 1
 #define HAVE_LSTAT 1
 #define HAVE_LZO1X_999_COMPRESS 0
@@ -318,6 +306,7 @@
 #define HAVE_POSIX_MEMALIGN 1
 #define HAVE_PTHREAD_CANCEL 1
 #define HAVE_SCHED_GETAFFINITY 1
+#define HAVE_SECITEMIMPORT 0
 #define HAVE_SETCONSOLETEXTATTRIBUTE 0
 #define HAVE_SETCONSOLECTRLHANDLER 0
 #define HAVE_SETMODE 0
@@ -330,20 +319,23 @@
 #define HAVE_UTGETOSTYPEFROMSTRING 0
 #define HAVE_VIRTUALALLOC 0
 #define HAVE_WGLGETPROCADDRESS 0
+#define HAVE_BCRYPT 0
+#define HAVE_VAAPI_DRM 0
+#define HAVE_VAAPI_X11 0
+#define HAVE_VDPAU_X11 0
 #define HAVE_PTHREADS 1
 #define HAVE_OS2THREADS 0
 #define HAVE_W32THREADS 0
+#define HAVE_AS_ARCH_DIRECTIVE 0
 #define HAVE_AS_DN_DIRECTIVE 0
 #define HAVE_AS_FPU_DIRECTIVE 0
 #define HAVE_AS_FUNC 0
 #define HAVE_AS_OBJECT_ARCH 0
 #define HAVE_ASM_MOD_Q 0
-#define HAVE_ATTRIBUTE_MAY_ALIAS 1
-#define HAVE_ATTRIBUTE_PACKED 1
 #define HAVE_BLOCKS_EXTENSION 0
 #define HAVE_EBP_AVAILABLE 0
 #define HAVE_EBX_AVAILABLE 0
-#define HAVE_GNU_AS 1
+#define HAVE_GNU_AS 0
 #define HAVE_GNU_WINDRES 0
 #define HAVE_IBM_ASM 0
 #define HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS 0
@@ -356,7 +348,6 @@
 /* #define HAVE_VFP_ARGS 0 -- softfp/hardfp selection is done by the fuchsia build */
 #define HAVE_XFORM_ASM 0
 #define HAVE_XMM_CLOBBERS 0
-#define HAVE_CONDITION_VARIABLE_PTR 0
 #define HAVE_KCMVIDEOCODECTYPE_HEVC 0
 #define HAVE_SOCKLEN_T 0
 #define HAVE_STRUCT_ADDRINFO 0
@@ -372,22 +363,17 @@
 #define HAVE_STRUCT_SOCKADDR_STORAGE 0
 #define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1
 #define HAVE_STRUCT_V4L2_FRMIVALENUM_DISCRETE 1
-#define HAVE_ATOMICS_NATIVE 1
-#define HAVE_DOS_PATHS 0
-#define HAVE_LIBC_MSVCRT 0
 #define HAVE_MAKEINFO 1
 #define HAVE_MAKEINFO_HTML 1
-#define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
+#define HAVE_OPENCL_D3D11 0
+#define HAVE_OPENCL_DRM_ARM 0
+#define HAVE_OPENCL_DRM_BEIGNET 0
+#define HAVE_OPENCL_DXVA2 0
+#define HAVE_OPENCL_VAAPI_BEIGNET 0
+#define HAVE_OPENCL_VAAPI_INTEL_MEDIA 0
 #define HAVE_PERL 1
 #define HAVE_POD2MAN 1
-#define HAVE_SECTION_DATA_REL_RO 1
 #define HAVE_TEXI2HTML 0
-#define HAVE_THREADS 1
-#define HAVE_UWP 0
-#define HAVE_VAAPI_DRM 0
-#define HAVE_VAAPI_X11 0
-#define HAVE_VDPAU_X11 0
-#define HAVE_WINRT 0
 #define CONFIG_DOC 0
 #define CONFIG_HTMLPAGES 0
 #define CONFIG_MANPAGES 0
@@ -414,41 +400,31 @@
 #define CONFIG_SCALING_VIDEO_EXAMPLE 0
 #define CONFIG_TRANSCODE_AAC_EXAMPLE 0
 #define CONFIG_TRANSCODING_EXAMPLE 0
-#define CONFIG_ALSA 0
-#define CONFIG_APPKIT 0
-#define CONFIG_AVFOUNDATION 0
-#define CONFIG_BZLIB 0
-#define CONFIG_COREIMAGE 0
-#define CONFIG_ICONV 0
-#define CONFIG_JACK 0
-#define CONFIG_LIBXCB 0
-#define CONFIG_LIBXCB_SHM 0
-#define CONFIG_LIBXCB_SHAPE 0
-#define CONFIG_LIBXCB_XFIXES 0
-#define CONFIG_LZMA 1
-#define CONFIG_SCHANNEL 0
-#define CONFIG_SDL2 0
-#define CONFIG_SECURETRANSPORT 0
-#define CONFIG_SNDIO 0
-#define CONFIG_XLIB 0
-#define CONFIG_ZLIB 0
+#define CONFIG_VAAPI_ENCODE_EXAMPLE 0
+#define CONFIG_VAAPI_TRANSCODE_EXAMPLE 0
 #define CONFIG_AVISYNTH 0
 #define CONFIG_FREI0R 0
 #define CONFIG_LIBCDIO 0
+#define CONFIG_LIBDAVS2 0
 #define CONFIG_LIBRUBBERBAND 0
 #define CONFIG_LIBVIDSTAB 0
 #define CONFIG_LIBX264 0
 #define CONFIG_LIBX265 0
 #define CONFIG_LIBXAVS 0
+#define CONFIG_LIBXAVS2 0
 #define CONFIG_LIBXVID 0
 #define CONFIG_DECKLINK 0
 #define CONFIG_LIBNDI_NEWTEK 0
 #define CONFIG_LIBFDK_AAC 0
 #define CONFIG_OPENSSL 0
+#define CONFIG_LIBTLS 0
 #define CONFIG_GMP 0
+#define CONFIG_LIBLENSFUN 0
 #define CONFIG_LIBOPENCORE_AMRNB 0
 #define CONFIG_LIBOPENCORE_AMRWB 0
+#define CONFIG_LIBVMAF 0
 #define CONFIG_LIBVO_AMRWBENC 0
+#define CONFIG_MBEDTLS 0
 #define CONFIG_RKMPP 0
 #define CONFIG_LIBSMBCLIENT 0
 #define CONFIG_CHROMAPRINT 0
@@ -456,11 +432,13 @@
 #define CONFIG_GNUTLS 0
 #define CONFIG_JNI 0
 #define CONFIG_LADSPA 0
+#define CONFIG_LIBAOM 0
 #define CONFIG_LIBASS 0
 #define CONFIG_LIBBLURAY 0
 #define CONFIG_LIBBS2B 0
 #define CONFIG_LIBCACA 0
 #define CONFIG_LIBCELT 0
+#define CONFIG_LIBCODEC2 0
 #define CONFIG_LIBDC1394 0
 #define CONFIG_LIBDRM 0
 #define CONFIG_LIBFLITE 0
@@ -471,6 +449,8 @@
 #define CONFIG_LIBGSM 0
 #define CONFIG_LIBIEC61883 0
 #define CONFIG_LIBILBC 0
+#define CONFIG_LIBJACK 0
+#define CONFIG_LIBKLVANC 0
 #define CONFIG_LIBKVAZAAR 0
 #define CONFIG_LIBMODPLUG 0
 #define CONFIG_LIBMP3LAME 0
@@ -488,12 +468,13 @@
 #define CONFIG_LIBSNAPPY 0
 #define CONFIG_LIBSOXR 0
 #define CONFIG_LIBSPEEX 0
+#define CONFIG_LIBSRT 0
 #define CONFIG_LIBSSH 0
+#define CONFIG_LIBTENSORFLOW 0
 #define CONFIG_LIBTESSERACT 0
 #define CONFIG_LIBTHEORA 0
 #define CONFIG_LIBTWOLAME 0
 #define CONFIG_LIBV4L2 0
-#define CONFIG_LIBVMAF 0
 #define CONFIG_LIBVORBIS 0
 #define CONFIG_LIBVPX 0
 #define CONFIG_LIBWAVPACK 0
@@ -502,28 +483,49 @@
 #define CONFIG_LIBZIMG 0
 #define CONFIG_LIBZMQ 0
 #define CONFIG_LIBZVBI 0
+#define CONFIG_LV2 0
 #define CONFIG_MEDIACODEC 0
 #define CONFIG_OPENAL 0
-#define CONFIG_OPENCL 0
 #define CONFIG_OPENGL 0
+#define CONFIG_VAPOURSYNTH 0
+#define CONFIG_ALSA 1
+#define CONFIG_APPKIT 0
+#define CONFIG_AVFOUNDATION 0
+#define CONFIG_BZLIB 0
+#define CONFIG_COREIMAGE 0
+#define CONFIG_ICONV 0
+#define CONFIG_LIBXCB 0
+#define CONFIG_LIBXCB_SHM 0
+#define CONFIG_LIBXCB_SHAPE 0
+#define CONFIG_LIBXCB_XFIXES 0
+#define CONFIG_LZMA 0
+#define CONFIG_SCHANNEL 0
+#define CONFIG_SDL2 0
+#define CONFIG_SECURETRANSPORT 0
+#define CONFIG_SNDIO 0
+#define CONFIG_XLIB 0
+#define CONFIG_ZLIB 0
+#define CONFIG_CUDA_SDK 0
+#define CONFIG_LIBNPP 0
+#define CONFIG_LIBMFX 0
+#define CONFIG_MMAL 0
+#define CONFIG_OMX 0
+#define CONFIG_OPENCL 0
+#define CONFIG_AMF 0
 #define CONFIG_AUDIOTOOLBOX 0
 #define CONFIG_CRYSTALHD 0
 #define CONFIG_CUDA 0
 #define CONFIG_CUVID 0
 #define CONFIG_D3D11VA 0
 #define CONFIG_DXVA2 0
+#define CONFIG_FFNVCODEC 0
+#define CONFIG_NVDEC 0
 #define CONFIG_NVENC 0
 #define CONFIG_VAAPI 0
-#define CONFIG_VDA 0
 #define CONFIG_VDPAU 0
 #define CONFIG_VIDEOTOOLBOX 0
 #define CONFIG_V4L2_M2M 0
 #define CONFIG_XVMC 0
-#define CONFIG_CUDA_SDK 0
-#define CONFIG_LIBNPP 0
-#define CONFIG_LIBMFX 0
-#define CONFIG_MMAL 0
-#define CONFIG_OMX 0
 #define CONFIG_FTRAPV 0
 #define CONFIG_GRAY 0
 #define CONFIG_HARDCODED_TABLES 0
@@ -537,18 +539,17 @@
 #define CONFIG_GPL 0
 #define CONFIG_NONFREE 0
 #define CONFIG_VERSION3 0
-#define CONFIG_AVCODEC 1
 #define CONFIG_AVDEVICE 0
 #define CONFIG_AVFILTER 0
+#define CONFIG_SWSCALE 0
+#define CONFIG_POSTPROC 0
 #define CONFIG_AVFORMAT 1
+#define CONFIG_AVCODEC 1
+#define CONFIG_SWRESAMPLE 0
 #define CONFIG_AVRESAMPLE 0
 #define CONFIG_AVUTIL 1
-#define CONFIG_POSTPROC 0
-#define CONFIG_SWRESAMPLE 0
-#define CONFIG_SWSCALE 0
 #define CONFIG_FFPLAY 0
 #define CONFIG_FFPROBE 0
-#define CONFIG_FFSERVER 0
 #define CONFIG_FFMPEG 0
 #define CONFIG_DCT 1
 #define CONFIG_DWT 0
@@ -585,12 +586,21 @@
 #define CONFIG_PROTOCOLS 0
 #define CONFIG_AANDCTTABLES 0
 #define CONFIG_AC3DSP 0
-#define CONFIG_AUDIO_FRAME_QUEUE 0
+#define CONFIG_ADTS_HEADER 1
+#define CONFIG_AUDIO_FRAME_QUEUE 1
 #define CONFIG_AUDIODSP 0
 #define CONFIG_BLOCKDSP 1
 #define CONFIG_BSWAPDSP 0
 #define CONFIG_CABAC 1
+#define CONFIG_CBS 0
+#define CONFIG_CBS_AV1 0
+#define CONFIG_CBS_H264 0
+#define CONFIG_CBS_H265 0
+#define CONFIG_CBS_JPEG 0
+#define CONFIG_CBS_MPEG2 0
+#define CONFIG_CBS_VP9 0
 #define CONFIG_DIRAC_PARSE 1
+#define CONFIG_DNN 0
 #define CONFIG_DVPROFILE 0
 #define CONFIG_EXIF 1
 #define CONFIG_FAANDCT 0
@@ -640,6 +650,7 @@
 #define CONFIG_QSV 0
 #define CONFIG_QSVDEC 0
 #define CONFIG_QSVENC 0
+#define CONFIG_QSVVPP 0
 #define CONFIG_RANGECODER 0
 #define CONFIG_RIFFDEC 1
 #define CONFIG_RIFFENC 0
@@ -663,22 +674,32 @@
 #define CONFIG_WMA_FREQS 0
 #define CONFIG_WMV2DSP 0
 #define CONFIG_AAC_ADTSTOASC_BSF 0
+#define CONFIG_AV1_METADATA_BSF 0
 #define CONFIG_CHOMP_BSF 0
 #define CONFIG_DUMP_EXTRADATA_BSF 0
 #define CONFIG_DCA_CORE_BSF 0
+#define CONFIG_EAC3_CORE_BSF 0
 #define CONFIG_EXTRACT_EXTRADATA_BSF 0
+#define CONFIG_FILTER_UNITS_BSF 0
+#define CONFIG_H264_METADATA_BSF 0
 #define CONFIG_H264_MP4TOANNEXB_BSF 0
+#define CONFIG_H264_REDUNDANT_PPS_BSF 0
+#define CONFIG_HAPQA_EXTRACT_BSF 0
+#define CONFIG_HEVC_METADATA_BSF 0
 #define CONFIG_HEVC_MP4TOANNEXB_BSF 0
 #define CONFIG_IMX_DUMP_HEADER_BSF 0
 #define CONFIG_MJPEG2JPEG_BSF 0
 #define CONFIG_MJPEGA_DUMP_HEADER_BSF 0
 #define CONFIG_MP3_HEADER_DECOMPRESS_BSF 0
+#define CONFIG_MPEG2_METADATA_BSF 0
 #define CONFIG_MPEG4_UNPACK_BFRAMES_BSF 0
 #define CONFIG_MOV2TEXTSUB_BSF 0
 #define CONFIG_NOISE_BSF 0
 #define CONFIG_NULL_BSF 1
 #define CONFIG_REMOVE_EXTRADATA_BSF 0
 #define CONFIG_TEXT2MOVSUB_BSF 0
+#define CONFIG_TRACE_HEADERS_BSF 0
+#define CONFIG_VP9_METADATA_BSF 0
 #define CONFIG_VP9_RAW_REORDER_BSF 0
 #define CONFIG_VP9_SUPERFRAME_BSF 0
 #define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 0
@@ -701,6 +722,7 @@
 #define CONFIG_BETHSOFTVID_DECODER 0
 #define CONFIG_BFI_DECODER 0
 #define CONFIG_BINK_DECODER 0
+#define CONFIG_BITPACKED_DECODER 0
 #define CONFIG_BMP_DECODER 0
 #define CONFIG_BMV_VIDEO_DECODER 0
 #define CONFIG_BRENDER_PIX_DECODER 0
@@ -766,8 +788,6 @@
 #define CONFIG_H264_MMAL_DECODER 0
 #define CONFIG_H264_QSV_DECODER 0
 #define CONFIG_H264_RKMPP_DECODER 0
-#define CONFIG_H264_VDA_DECODER 0
-#define CONFIG_H264_VDPAU_DECODER 0
 #define CONFIG_HAP_DECODER 0
 #define CONFIG_HEVC_DECODER 0
 #define CONFIG_HEVC_QSV_DECODER 0
@@ -779,6 +799,7 @@
 #define CONFIG_HUFFYUV_DECODER 0
 #define CONFIG_IDCIN_DECODER 0
 #define CONFIG_IFF_ILBM_DECODER 0
+#define CONFIG_IMM4_DECODER 0
 #define CONFIG_INDEO2_DECODER 0
 #define CONFIG_INDEO3_DECODER 0
 #define CONFIG_INDEO4_DECODER 0
@@ -799,17 +820,13 @@
 #define CONFIG_MJPEGB_DECODER 0
 #define CONFIG_MMVIDEO_DECODER 0
 #define CONFIG_MOTIONPIXELS_DECODER 0
-#define CONFIG_MPEG_XVMC_DECODER 0
 #define CONFIG_MPEG1VIDEO_DECODER 0
 #define CONFIG_MPEG2VIDEO_DECODER 0
 #define CONFIG_MPEG4_DECODER 1
 #define CONFIG_MPEG4_CRYSTALHD_DECODER 0
 #define CONFIG_MPEG4_V4L2M2M_DECODER 0
 #define CONFIG_MPEG4_MMAL_DECODER 0
-#define CONFIG_MPEG4_VDPAU_DECODER 0
 #define CONFIG_MPEGVIDEO_DECODER 0
-#define CONFIG_MPEG_VDPAU_DECODER 0
-#define CONFIG_MPEG1_VDPAU_DECODER 0
 #define CONFIG_MPEG1_V4L2M2M_DECODER 0
 #define CONFIG_MPEG2_MMAL_DECODER 0
 #define CONFIG_MPEG2_CRYSTALHD_DECODER 0
@@ -830,6 +847,7 @@
 #define CONFIG_MTS2_DECODER 0
 #define CONFIG_MVC1_DECODER 0
 #define CONFIG_MVC2_DECODER 0
+#define CONFIG_MWSC_DECODER 0
 #define CONFIG_MXPEG_DECODER 0
 #define CONFIG_NUV_DECODER 0
 #define CONFIG_PAF_VIDEO_DECODER 0
@@ -843,7 +861,7 @@
 #define CONFIG_PNG_DECODER 0
 #define CONFIG_PPM_DECODER 0
 #define CONFIG_PRORES_DECODER 0
-#define CONFIG_PRORES_LGPL_DECODER 0
+#define CONFIG_PROSUMER_DECODER 0
 #define CONFIG_PSD_DECODER 0
 #define CONFIG_PTX_DECODER 0
 #define CONFIG_QDRAW_DECODER 0
@@ -851,6 +869,7 @@
 #define CONFIG_QTRLE_DECODER 0
 #define CONFIG_R10K_DECODER 0
 #define CONFIG_R210_DECODER 0
+#define CONFIG_RASC_DECODER 0
 #define CONFIG_RAWVIDEO_DECODER 0
 #define CONFIG_RL2_DECODER 0
 #define CONFIG_ROQ_DECODER 0
@@ -903,7 +922,6 @@
 #define CONFIG_VBLE_DECODER 0
 #define CONFIG_VC1_DECODER 0
 #define CONFIG_VC1_CRYSTALHD_DECODER 0
-#define CONFIG_VC1_VDPAU_DECODER 0
 #define CONFIG_VC1IMAGE_DECODER 0
 #define CONFIG_VC1_MMAL_DECODER 0
 #define CONFIG_VC1_QSV_DECODER 0
@@ -924,14 +942,13 @@
 #define CONFIG_VP9_RKMPP_DECODER 0
 #define CONFIG_VP9_V4L2M2M_DECODER 0
 #define CONFIG_VQA_DECODER 0
-#define CONFIG_BITPACKED_DECODER 0
 #define CONFIG_WEBP_DECODER 0
+#define CONFIG_WCMV_DECODER 0
 #define CONFIG_WRAPPED_AVFRAME_DECODER 0
 #define CONFIG_WMV1_DECODER 0
 #define CONFIG_WMV2_DECODER 0
 #define CONFIG_WMV3_DECODER 0
 #define CONFIG_WMV3_CRYSTALHD_DECODER 0
-#define CONFIG_WMV3_VDPAU_DECODER 0
 #define CONFIG_WMV3IMAGE_DECODER 0
 #define CONFIG_WNV1_DECODER 0
 #define CONFIG_XAN_WC3_DECODER 0
@@ -959,11 +976,14 @@
 #define CONFIG_AMRNB_DECODER 1
 #define CONFIG_AMRWB_DECODER 1
 #define CONFIG_APE_DECODER 0
+#define CONFIG_APTX_DECODER 1
+#define CONFIG_APTX_HD_DECODER 0
 #define CONFIG_ATRAC1_DECODER 0
 #define CONFIG_ATRAC3_DECODER 0
 #define CONFIG_ATRAC3AL_DECODER 0
 #define CONFIG_ATRAC3P_DECODER 0
 #define CONFIG_ATRAC3PAL_DECODER 0
+#define CONFIG_ATRAC9_DECODER 0
 #define CONFIG_BINKAUDIO_DCT_DECODER 0
 #define CONFIG_BINKAUDIO_RDFT_DECODER 0
 #define CONFIG_BMV_AUDIO_DECODER 0
@@ -986,6 +1006,7 @@
 #define CONFIG_GSM_DECODER 0
 #define CONFIG_GSM_MS_DECODER 1
 #define CONFIG_IAC_DECODER 0
+#define CONFIG_ILBC_DECODER 0
 #define CONFIG_IMC_DECODER 0
 #define CONFIG_INTERPLAY_ACM_DECODER 0
 #define CONFIG_MACE3_DECODER 0
@@ -996,12 +1017,12 @@
 #define CONFIG_MP1FLOAT_DECODER 0
 #define CONFIG_MP2_DECODER 0
 #define CONFIG_MP2FLOAT_DECODER 0
-#define CONFIG_MP3_DECODER 1
 #define CONFIG_MP3FLOAT_DECODER 0
-#define CONFIG_MP3ADU_DECODER 0
+#define CONFIG_MP3_DECODER 1
 #define CONFIG_MP3ADUFLOAT_DECODER 0
-#define CONFIG_MP3ON4_DECODER 0
+#define CONFIG_MP3ADU_DECODER 0
 #define CONFIG_MP3ON4FLOAT_DECODER 0
+#define CONFIG_MP3ON4_DECODER 0
 #define CONFIG_MPC7_DECODER 0
 #define CONFIG_MPC8_DECODER 0
 #define CONFIG_NELLYMOSER_DECODER 0
@@ -1014,6 +1035,7 @@
 #define CONFIG_RA_144_DECODER 0
 #define CONFIG_RA_288_DECODER 0
 #define CONFIG_RALF_DECODER 0
+#define CONFIG_SBC_DECODER 1
 #define CONFIG_SHORTEN_DECODER 0
 #define CONFIG_SIPR_DECODER 0
 #define CONFIG_SMACKAUD_DECODER 0
@@ -1151,7 +1173,10 @@
 #define CONFIG_PCM_MULAW_AT_DECODER 0
 #define CONFIG_QDMC_AT_DECODER 0
 #define CONFIG_QDM2_AT_DECODER 0
+#define CONFIG_LIBAOM_AV1_DECODER 0
 #define CONFIG_LIBCELT_DECODER 0
+#define CONFIG_LIBCODEC2_DECODER 0
+#define CONFIG_LIBDAVS2_DECODER 0
 #define CONFIG_LIBFDK_AAC_DECODER 0
 #define CONFIG_LIBGSM_DECODER 0
 #define CONFIG_LIBGSM_MS_DECODER 0
@@ -1184,6 +1209,748 @@
 #define CONFIG_VP8_QSV_DECODER 0
 #define CONFIG_VP9_CUVID_DECODER 0
 #define CONFIG_VP9_MEDIACODEC_DECODER 0
+#define CONFIG_A64MULTI_ENCODER 0
+#define CONFIG_A64MULTI5_ENCODER 0
+#define CONFIG_ALIAS_PIX_ENCODER 0
+#define CONFIG_AMV_ENCODER 0
+#define CONFIG_APNG_ENCODER 0
+#define CONFIG_ASV1_ENCODER 0
+#define CONFIG_ASV2_ENCODER 0
+#define CONFIG_AVRP_ENCODER 0
+#define CONFIG_AVUI_ENCODER 0
+#define CONFIG_AYUV_ENCODER 0
+#define CONFIG_BMP_ENCODER 0
+#define CONFIG_CINEPAK_ENCODER 0
+#define CONFIG_CLJR_ENCODER 0
+#define CONFIG_COMFORTNOISE_ENCODER 0
+#define CONFIG_DNXHD_ENCODER 0
+#define CONFIG_DPX_ENCODER 0
+#define CONFIG_DVVIDEO_ENCODER 0
+#define CONFIG_FFV1_ENCODER 0
+#define CONFIG_FFVHUFF_ENCODER 0
+#define CONFIG_FITS_ENCODER 0
+#define CONFIG_FLASHSV_ENCODER 0
+#define CONFIG_FLASHSV2_ENCODER 0
+#define CONFIG_FLV_ENCODER 0
+#define CONFIG_GIF_ENCODER 0
+#define CONFIG_H261_ENCODER 0
+#define CONFIG_H263_ENCODER 0
+#define CONFIG_H263P_ENCODER 0
+#define CONFIG_HAP_ENCODER 0
+#define CONFIG_HUFFYUV_ENCODER 0
+#define CONFIG_JPEG2000_ENCODER 0
+#define CONFIG_JPEGLS_ENCODER 0
+#define CONFIG_LJPEG_ENCODER 0
+#define CONFIG_MAGICYUV_ENCODER 0
+#define CONFIG_MJPEG_ENCODER 0
+#define CONFIG_MPEG1VIDEO_ENCODER 0
+#define CONFIG_MPEG2VIDEO_ENCODER 0
+#define CONFIG_MPEG4_ENCODER 0
+#define CONFIG_MSMPEG4V2_ENCODER 0
+#define CONFIG_MSMPEG4V3_ENCODER 0
+#define CONFIG_MSVIDEO1_ENCODER 0
+#define CONFIG_PAM_ENCODER 0
+#define CONFIG_PBM_ENCODER 0
+#define CONFIG_PCX_ENCODER 0
+#define CONFIG_PGM_ENCODER 0
+#define CONFIG_PGMYUV_ENCODER 0
+#define CONFIG_PNG_ENCODER 0
+#define CONFIG_PPM_ENCODER 0
+#define CONFIG_PRORES_ENCODER 0
+#define CONFIG_PRORES_AW_ENCODER 0
+#define CONFIG_PRORES_KS_ENCODER 0
+#define CONFIG_QTRLE_ENCODER 0
+#define CONFIG_R10K_ENCODER 0
+#define CONFIG_R210_ENCODER 0
+#define CONFIG_RAWVIDEO_ENCODER 0
+#define CONFIG_ROQ_ENCODER 0
+#define CONFIG_RV10_ENCODER 0
+#define CONFIG_RV20_ENCODER 0
+#define CONFIG_S302M_ENCODER 0
+#define CONFIG_SGI_ENCODER 0
+#define CONFIG_SNOW_ENCODER 0
+#define CONFIG_SUNRAST_ENCODER 0
+#define CONFIG_SVQ1_ENCODER 0
+#define CONFIG_TARGA_ENCODER 0
+#define CONFIG_TIFF_ENCODER 0
+#define CONFIG_UTVIDEO_ENCODER 0
+#define CONFIG_V210_ENCODER 0
+#define CONFIG_V308_ENCODER 0
+#define CONFIG_V408_ENCODER 0
+#define CONFIG_V410_ENCODER 0
+#define CONFIG_VC2_ENCODER 0
+#define CONFIG_WRAPPED_AVFRAME_ENCODER 0
+#define CONFIG_WMV1_ENCODER 0
+#define CONFIG_WMV2_ENCODER 0
+#define CONFIG_XBM_ENCODER 0
+#define CONFIG_XFACE_ENCODER 0
+#define CONFIG_XWD_ENCODER 0
+#define CONFIG_Y41P_ENCODER 0
+#define CONFIG_YUV4_ENCODER 0
+#define CONFIG_ZLIB_ENCODER 0
+#define CONFIG_ZMBV_ENCODER 0
+#define CONFIG_AAC_ENCODER 0
+#define CONFIG_AC3_ENCODER 0
+#define CONFIG_AC3_FIXED_ENCODER 0
+#define CONFIG_ALAC_ENCODER 0
+#define CONFIG_APTX_ENCODER 0
+#define CONFIG_APTX_HD_ENCODER 0
+#define CONFIG_DCA_ENCODER 0
+#define CONFIG_EAC3_ENCODER 0
+#define CONFIG_FLAC_ENCODER 0
+#define CONFIG_G723_1_ENCODER 0
+#define CONFIG_MLP_ENCODER 0
+#define CONFIG_MP2_ENCODER 0
+#define CONFIG_MP2FIXED_ENCODER 0
+#define CONFIG_NELLYMOSER_ENCODER 0
+#define CONFIG_OPUS_ENCODER 0
+#define CONFIG_RA_144_ENCODER 0
+#define CONFIG_SBC_ENCODER 0
+#define CONFIG_SONIC_ENCODER 0
+#define CONFIG_SONIC_LS_ENCODER 0
+#define CONFIG_TRUEHD_ENCODER 0
+#define CONFIG_TTA_ENCODER 0
+#define CONFIG_VORBIS_ENCODER 0
+#define CONFIG_WAVPACK_ENCODER 0
+#define CONFIG_WMAV1_ENCODER 0
+#define CONFIG_WMAV2_ENCODER 0
+#define CONFIG_PCM_ALAW_ENCODER 0
+#define CONFIG_PCM_F32BE_ENCODER 0
+#define CONFIG_PCM_F32LE_ENCODER 0
+#define CONFIG_PCM_F64BE_ENCODER 0
+#define CONFIG_PCM_F64LE_ENCODER 0
+#define CONFIG_PCM_MULAW_ENCODER 0
+#define CONFIG_PCM_S8_ENCODER 0
+#define CONFIG_PCM_S8_PLANAR_ENCODER 0
+#define CONFIG_PCM_S16BE_ENCODER 0
+#define CONFIG_PCM_S16BE_PLANAR_ENCODER 0
+#define CONFIG_PCM_S16LE_ENCODER 0
+#define CONFIG_PCM_S16LE_PLANAR_ENCODER 0
+#define CONFIG_PCM_S24BE_ENCODER 0
+#define CONFIG_PCM_S24DAUD_ENCODER 0
+#define CONFIG_PCM_S24LE_ENCODER 0
+#define CONFIG_PCM_S24LE_PLANAR_ENCODER 0
+#define CONFIG_PCM_S32BE_ENCODER 0
+#define CONFIG_PCM_S32LE_ENCODER 0
+#define CONFIG_PCM_S32LE_PLANAR_ENCODER 0
+#define CONFIG_PCM_S64BE_ENCODER 0
+#define CONFIG_PCM_S64LE_ENCODER 0
+#define CONFIG_PCM_U8_ENCODER 0
+#define CONFIG_PCM_U16BE_ENCODER 0
+#define CONFIG_PCM_U16LE_ENCODER 0
+#define CONFIG_PCM_U24BE_ENCODER 0
+#define CONFIG_PCM_U24LE_ENCODER 0
+#define CONFIG_PCM_U32BE_ENCODER 0
+#define CONFIG_PCM_U32LE_ENCODER 0
+#define CONFIG_ROQ_DPCM_ENCODER 0
+#define CONFIG_ADPCM_ADX_ENCODER 0
+#define CONFIG_ADPCM_G722_ENCODER 0
+#define CONFIG_ADPCM_G726_ENCODER 0
+#define CONFIG_ADPCM_G726LE_ENCODER 0
+#define CONFIG_ADPCM_IMA_QT_ENCODER 0
+#define CONFIG_ADPCM_IMA_WAV_ENCODER 0
+#define CONFIG_ADPCM_MS_ENCODER 0
+#define CONFIG_ADPCM_SWF_ENCODER 0
+#define CONFIG_ADPCM_YAMAHA_ENCODER 0
+#define CONFIG_SSA_ENCODER 0
+#define CONFIG_ASS_ENCODER 0
+#define CONFIG_DVBSUB_ENCODER 0
+#define CONFIG_DVDSUB_ENCODER 0
+#define CONFIG_MOVTEXT_ENCODER 0
+#define CONFIG_SRT_ENCODER 0
+#define CONFIG_SUBRIP_ENCODER 0
+#define CONFIG_TEXT_ENCODER 0
+#define CONFIG_WEBVTT_ENCODER 0
+#define CONFIG_XSUB_ENCODER 0
+#define CONFIG_AAC_AT_ENCODER 0
+#define CONFIG_ALAC_AT_ENCODER 0
+#define CONFIG_ILBC_AT_ENCODER 0
+#define CONFIG_PCM_ALAW_AT_ENCODER 0
+#define CONFIG_PCM_MULAW_AT_ENCODER 0
+#define CONFIG_LIBAOM_AV1_ENCODER 0
+#define CONFIG_LIBCODEC2_ENCODER 0
+#define CONFIG_LIBFDK_AAC_ENCODER 0
+#define CONFIG_LIBGSM_ENCODER 0
+#define CONFIG_LIBGSM_MS_ENCODER 0
+#define CONFIG_LIBILBC_ENCODER 0
+#define CONFIG_LIBMP3LAME_ENCODER 0
+#define CONFIG_LIBOPENCORE_AMRNB_ENCODER 0
+#define CONFIG_LIBOPENJPEG_ENCODER 0
+#define CONFIG_LIBOPUS_ENCODER 0
+#define CONFIG_LIBSHINE_ENCODER 0
+#define CONFIG_LIBSPEEX_ENCODER 0
+#define CONFIG_LIBTHEORA_ENCODER 0
+#define CONFIG_LIBTWOLAME_ENCODER 0
+#define CONFIG_LIBVO_AMRWBENC_ENCODER 0
+#define CONFIG_LIBVORBIS_ENCODER 0
+#define CONFIG_LIBVPX_VP8_ENCODER 0
+#define CONFIG_LIBVPX_VP9_ENCODER 0
+#define CONFIG_LIBWAVPACK_ENCODER 0
+#define CONFIG_LIBWEBP_ANIM_ENCODER 0
+#define CONFIG_LIBWEBP_ENCODER 0
+#define CONFIG_LIBX262_ENCODER 0
+#define CONFIG_LIBX264_ENCODER 0
+#define CONFIG_LIBX264RGB_ENCODER 0
+#define CONFIG_LIBX265_ENCODER 0
+#define CONFIG_LIBXAVS_ENCODER 0
+#define CONFIG_LIBXAVS2_ENCODER 0
+#define CONFIG_LIBXVID_ENCODER 0
+#define CONFIG_H263_V4L2M2M_ENCODER 0
+#define CONFIG_LIBOPENH264_ENCODER 0
+#define CONFIG_H264_AMF_ENCODER 0
+#define CONFIG_H264_NVENC_ENCODER 0
+#define CONFIG_H264_OMX_ENCODER 0
+#define CONFIG_H264_QSV_ENCODER 0
+#define CONFIG_H264_V4L2M2M_ENCODER 0
+#define CONFIG_H264_VAAPI_ENCODER 0
+#define CONFIG_H264_VIDEOTOOLBOX_ENCODER 0
+#define CONFIG_NVENC_ENCODER 0
+#define CONFIG_NVENC_H264_ENCODER 0
+#define CONFIG_NVENC_HEVC_ENCODER 0
+#define CONFIG_HEVC_AMF_ENCODER 0
+#define CONFIG_HEVC_NVENC_ENCODER 0
+#define CONFIG_HEVC_QSV_ENCODER 0
+#define CONFIG_HEVC_V4L2M2M_ENCODER 0
+#define CONFIG_HEVC_VAAPI_ENCODER 0
+#define CONFIG_HEVC_VIDEOTOOLBOX_ENCODER 0
+#define CONFIG_LIBKVAZAAR_ENCODER 0
+#define CONFIG_MJPEG_QSV_ENCODER 0
+#define CONFIG_MJPEG_VAAPI_ENCODER 0
+#define CONFIG_MPEG2_QSV_ENCODER 0
+#define CONFIG_MPEG2_VAAPI_ENCODER 0
+#define CONFIG_MPEG4_V4L2M2M_ENCODER 0
+#define CONFIG_VP8_V4L2M2M_ENCODER 0
+#define CONFIG_VP8_VAAPI_ENCODER 0
+#define CONFIG_VP9_VAAPI_ENCODER 0
+#define CONFIG_H263_VAAPI_HWACCEL 0
+#define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_H264_D3D11VA_HWACCEL 0
+#define CONFIG_H264_D3D11VA2_HWACCEL 0
+#define CONFIG_H264_DXVA2_HWACCEL 0
+#define CONFIG_H264_NVDEC_HWACCEL 0
+#define CONFIG_H264_VAAPI_HWACCEL 0
+#define CONFIG_H264_VDPAU_HWACCEL 0
+#define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_HEVC_D3D11VA_HWACCEL 0
+#define CONFIG_HEVC_D3D11VA2_HWACCEL 0
+#define CONFIG_HEVC_DXVA2_HWACCEL 0
+#define CONFIG_HEVC_NVDEC_HWACCEL 0
+#define CONFIG_HEVC_VAAPI_HWACCEL 0
+#define CONFIG_HEVC_VDPAU_HWACCEL 0
+#define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_MJPEG_NVDEC_HWACCEL 0
+#define CONFIG_MJPEG_VAAPI_HWACCEL 0
+#define CONFIG_MPEG1_NVDEC_HWACCEL 0
+#define CONFIG_MPEG1_VDPAU_HWACCEL 0
+#define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_MPEG1_XVMC_HWACCEL 0
+#define CONFIG_MPEG2_D3D11VA_HWACCEL 0
+#define CONFIG_MPEG2_D3D11VA2_HWACCEL 0
+#define CONFIG_MPEG2_NVDEC_HWACCEL 0
+#define CONFIG_MPEG2_DXVA2_HWACCEL 0
+#define CONFIG_MPEG2_VAAPI_HWACCEL 0
+#define CONFIG_MPEG2_VDPAU_HWACCEL 0
+#define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_MPEG2_XVMC_HWACCEL 0
+#define CONFIG_MPEG4_NVDEC_HWACCEL 0
+#define CONFIG_MPEG4_VAAPI_HWACCEL 0
+#define CONFIG_MPEG4_VDPAU_HWACCEL 0
+#define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_VC1_D3D11VA_HWACCEL 0
+#define CONFIG_VC1_D3D11VA2_HWACCEL 0
+#define CONFIG_VC1_DXVA2_HWACCEL 0
+#define CONFIG_VC1_NVDEC_HWACCEL 0
+#define CONFIG_VC1_VAAPI_HWACCEL 0
+#define CONFIG_VC1_VDPAU_HWACCEL 0
+#define CONFIG_VP8_NVDEC_HWACCEL 0
+#define CONFIG_VP8_VAAPI_HWACCEL 0
+#define CONFIG_VP9_D3D11VA_HWACCEL 0
+#define CONFIG_VP9_D3D11VA2_HWACCEL 0
+#define CONFIG_VP9_DXVA2_HWACCEL 0
+#define CONFIG_VP9_NVDEC_HWACCEL 0
+#define CONFIG_VP9_VAAPI_HWACCEL 0
+#define CONFIG_WMV3_D3D11VA_HWACCEL 0
+#define CONFIG_WMV3_D3D11VA2_HWACCEL 0
+#define CONFIG_WMV3_DXVA2_HWACCEL 0
+#define CONFIG_WMV3_NVDEC_HWACCEL 0
+#define CONFIG_WMV3_VAAPI_HWACCEL 0
+#define CONFIG_WMV3_VDPAU_HWACCEL 0
+#define CONFIG_AAC_PARSER 1
+#define CONFIG_AAC_LATM_PARSER 0
+#define CONFIG_AC3_PARSER 0
+#define CONFIG_ADX_PARSER 0
+#define CONFIG_AV1_PARSER 0
+#define CONFIG_AVS2_PARSER 0
+#define CONFIG_BMP_PARSER 0
+#define CONFIG_CAVSVIDEO_PARSER 0
+#define CONFIG_COOK_PARSER 0
+#define CONFIG_DCA_PARSER 0
+#define CONFIG_DIRAC_PARSER 0
+#define CONFIG_DNXHD_PARSER 0
+#define CONFIG_DPX_PARSER 0
+#define CONFIG_DVAUDIO_PARSER 0
+#define CONFIG_DVBSUB_PARSER 0
+#define CONFIG_DVDSUB_PARSER 0
+#define CONFIG_DVD_NAV_PARSER 0
+#define CONFIG_FLAC_PARSER 1
+#define CONFIG_G729_PARSER 0
+#define CONFIG_GSM_PARSER 1
+#define CONFIG_H261_PARSER 0
+#define CONFIG_H263_PARSER 1
+#define CONFIG_H264_PARSER 1
+#define CONFIG_HEVC_PARSER 0
+#define CONFIG_MJPEG_PARSER 0
+#define CONFIG_MLP_PARSER 0
+#define CONFIG_MPEG4VIDEO_PARSER 1
+#define CONFIG_MPEGAUDIO_PARSER 1
+#define CONFIG_MPEGVIDEO_PARSER 0
+#define CONFIG_OPUS_PARSER 1
+#define CONFIG_PNG_PARSER 0
+#define CONFIG_PNM_PARSER 0
+#define CONFIG_RV30_PARSER 0
+#define CONFIG_RV40_PARSER 0
+#define CONFIG_SBC_PARSER 0
+#define CONFIG_SIPR_PARSER 0
+#define CONFIG_TAK_PARSER 0
+#define CONFIG_VC1_PARSER 0
+#define CONFIG_VORBIS_PARSER 1
+#define CONFIG_VP3_PARSER 1
+#define CONFIG_VP8_PARSER 1
+#define CONFIG_VP9_PARSER 0
+#define CONFIG_XMA_PARSER 0
+#define CONFIG_ALSA_INDEV 0
+#define CONFIG_ANDROID_CAMERA_INDEV 0
+#define CONFIG_AVFOUNDATION_INDEV 0
+#define CONFIG_BKTR_INDEV 0
+#define CONFIG_DECKLINK_INDEV 0
+#define CONFIG_LIBNDI_NEWTEK_INDEV 0
+#define CONFIG_DSHOW_INDEV 0
+#define CONFIG_FBDEV_INDEV 0
+#define CONFIG_GDIGRAB_INDEV 0
+#define CONFIG_IEC61883_INDEV 0
+#define CONFIG_JACK_INDEV 0
+#define CONFIG_KMSGRAB_INDEV 0
+#define CONFIG_LAVFI_INDEV 0
+#define CONFIG_OPENAL_INDEV 0
+#define CONFIG_OSS_INDEV 0
+#define CONFIG_PULSE_INDEV 0
+#define CONFIG_SNDIO_INDEV 0
+#define CONFIG_V4L2_INDEV 0
+#define CONFIG_VFWCAP_INDEV 0
+#define CONFIG_XCBGRAB_INDEV 0
+#define CONFIG_LIBCDIO_INDEV 0
+#define CONFIG_LIBDC1394_INDEV 0
+#define CONFIG_ALSA_OUTDEV 0
+#define CONFIG_CACA_OUTDEV 0
+#define CONFIG_DECKLINK_OUTDEV 0
+#define CONFIG_LIBNDI_NEWTEK_OUTDEV 0
+#define CONFIG_FBDEV_OUTDEV 0
+#define CONFIG_OPENGL_OUTDEV 0
+#define CONFIG_OSS_OUTDEV 0
+#define CONFIG_PULSE_OUTDEV 0
+#define CONFIG_SDL2_OUTDEV 0
+#define CONFIG_SNDIO_OUTDEV 0
+#define CONFIG_V4L2_OUTDEV 0
+#define CONFIG_XV_OUTDEV 0
+#define CONFIG_ABENCH_FILTER 0
+#define CONFIG_ACOMPRESSOR_FILTER 0
+#define CONFIG_ACONTRAST_FILTER 0
+#define CONFIG_ACOPY_FILTER 0
+#define CONFIG_ACUE_FILTER 0
+#define CONFIG_ACROSSFADE_FILTER 0
+#define CONFIG_ACROSSOVER_FILTER 0
+#define CONFIG_ACRUSHER_FILTER 0
+#define CONFIG_ADECLICK_FILTER 0
+#define CONFIG_ADECLIP_FILTER 0
+#define CONFIG_ADELAY_FILTER 0
+#define CONFIG_ADERIVATIVE_FILTER 0
+#define CONFIG_AECHO_FILTER 0
+#define CONFIG_AEMPHASIS_FILTER 0
+#define CONFIG_AEVAL_FILTER 0
+#define CONFIG_AFADE_FILTER 0
+#define CONFIG_AFFTDN_FILTER 0
+#define CONFIG_AFFTFILT_FILTER 0
+#define CONFIG_AFIR_FILTER 0
+#define CONFIG_AFORMAT_FILTER 0
+#define CONFIG_AGATE_FILTER 0
+#define CONFIG_AIIR_FILTER 0
+#define CONFIG_AINTEGRAL_FILTER 0
+#define CONFIG_AINTERLEAVE_FILTER 0
+#define CONFIG_ALIMITER_FILTER 0
+#define CONFIG_ALLPASS_FILTER 0
+#define CONFIG_ALOOP_FILTER 0
+#define CONFIG_AMERGE_FILTER 0
+#define CONFIG_AMETADATA_FILTER 0
+#define CONFIG_AMIX_FILTER 0
+#define CONFIG_AMULTIPLY_FILTER 0
+#define CONFIG_ANEQUALIZER_FILTER 0
+#define CONFIG_ANULL_FILTER 0
+#define CONFIG_APAD_FILTER 0
+#define CONFIG_APERMS_FILTER 0
+#define CONFIG_APHASER_FILTER 0
+#define CONFIG_APULSATOR_FILTER 0
+#define CONFIG_AREALTIME_FILTER 0
+#define CONFIG_ARESAMPLE_FILTER 0
+#define CONFIG_AREVERSE_FILTER 0
+#define CONFIG_ASELECT_FILTER 0
+#define CONFIG_ASENDCMD_FILTER 0
+#define CONFIG_ASETNSAMPLES_FILTER 0
+#define CONFIG_ASETPTS_FILTER 0
+#define CONFIG_ASETRATE_FILTER 0
+#define CONFIG_ASETTB_FILTER 0
+#define CONFIG_ASHOWINFO_FILTER 0
+#define CONFIG_ASIDEDATA_FILTER 0
+#define CONFIG_ASPLIT_FILTER 0
+#define CONFIG_ASTATS_FILTER 0
+#define CONFIG_ASTREAMSELECT_FILTER 0
+#define CONFIG_ATEMPO_FILTER 0
+#define CONFIG_ATRIM_FILTER 0
+#define CONFIG_AZMQ_FILTER 0
+#define CONFIG_BANDPASS_FILTER 0
+#define CONFIG_BANDREJECT_FILTER 0
+#define CONFIG_BASS_FILTER 0
+#define CONFIG_BIQUAD_FILTER 0
+#define CONFIG_BS2B_FILTER 0
+#define CONFIG_CHANNELMAP_FILTER 0
+#define CONFIG_CHANNELSPLIT_FILTER 0
+#define CONFIG_CHORUS_FILTER 0
+#define CONFIG_COMPAND_FILTER 0
+#define CONFIG_COMPENSATIONDELAY_FILTER 0
+#define CONFIG_CROSSFEED_FILTER 0
+#define CONFIG_CRYSTALIZER_FILTER 0
+#define CONFIG_DCSHIFT_FILTER 0
+#define CONFIG_DRMETER_FILTER 0
+#define CONFIG_DYNAUDNORM_FILTER 0
+#define CONFIG_EARWAX_FILTER 0
+#define CONFIG_EBUR128_FILTER 0
+#define CONFIG_EQUALIZER_FILTER 0
+#define CONFIG_EXTRASTEREO_FILTER 0
+#define CONFIG_FIREQUALIZER_FILTER 0
+#define CONFIG_FLANGER_FILTER 0
+#define CONFIG_HAAS_FILTER 0
+#define CONFIG_HDCD_FILTER 0
+#define CONFIG_HEADPHONE_FILTER 0
+#define CONFIG_HIGHPASS_FILTER 0
+#define CONFIG_HIGHSHELF_FILTER 0
+#define CONFIG_JOIN_FILTER 0
+#define CONFIG_LADSPA_FILTER 0
+#define CONFIG_LOUDNORM_FILTER 0
+#define CONFIG_LOWPASS_FILTER 0
+#define CONFIG_LOWSHELF_FILTER 0
+#define CONFIG_LV2_FILTER 0
+#define CONFIG_MCOMPAND_FILTER 0
+#define CONFIG_PAN_FILTER 0
+#define CONFIG_REPLAYGAIN_FILTER 0
+#define CONFIG_RESAMPLE_FILTER 0
+#define CONFIG_RUBBERBAND_FILTER 0
+#define CONFIG_SIDECHAINCOMPRESS_FILTER 0
+#define CONFIG_SIDECHAINGATE_FILTER 0
+#define CONFIG_SILENCEDETECT_FILTER 0
+#define CONFIG_SILENCEREMOVE_FILTER 0
+#define CONFIG_SOFALIZER_FILTER 0
+#define CONFIG_STEREOTOOLS_FILTER 0
+#define CONFIG_STEREOWIDEN_FILTER 0
+#define CONFIG_SUPEREQUALIZER_FILTER 0
+#define CONFIG_SURROUND_FILTER 0
+#define CONFIG_TREBLE_FILTER 0
+#define CONFIG_TREMOLO_FILTER 0
+#define CONFIG_VIBRATO_FILTER 0
+#define CONFIG_VOLUME_FILTER 0
+#define CONFIG_VOLUMEDETECT_FILTER 0
+#define CONFIG_AEVALSRC_FILTER 0
+#define CONFIG_ANOISESRC_FILTER 0
+#define CONFIG_ANULLSRC_FILTER 0
+#define CONFIG_FLITE_FILTER 0
+#define CONFIG_HILBERT_FILTER 0
+#define CONFIG_SINE_FILTER 0
+#define CONFIG_ANULLSINK_FILTER 0
+#define CONFIG_ALPHAEXTRACT_FILTER 0
+#define CONFIG_ALPHAMERGE_FILTER 0
+#define CONFIG_AMPLIFY_FILTER 0
+#define CONFIG_ASS_FILTER 0
+#define CONFIG_ATADENOISE_FILTER 0
+#define CONFIG_AVGBLUR_FILTER 0
+#define CONFIG_AVGBLUR_OPENCL_FILTER 0
+#define CONFIG_BBOX_FILTER 0
+#define CONFIG_BENCH_FILTER 0
+#define CONFIG_BITPLANENOISE_FILTER 0
+#define CONFIG_BLACKDETECT_FILTER 0
+#define CONFIG_BLACKFRAME_FILTER 0
+#define CONFIG_BLEND_FILTER 0
+#define CONFIG_BM3D_FILTER 0
+#define CONFIG_BOXBLUR_FILTER 0
+#define CONFIG_BOXBLUR_OPENCL_FILTER 0
+#define CONFIG_BWDIF_FILTER 0
+#define CONFIG_CHROMAKEY_FILTER 0
+#define CONFIG_CIESCOPE_FILTER 0
+#define CONFIG_CODECVIEW_FILTER 0
+#define CONFIG_COLORBALANCE_FILTER 0
+#define CONFIG_COLORCHANNELMIXER_FILTER 0
+#define CONFIG_COLORKEY_FILTER 0
+#define CONFIG_COLORLEVELS_FILTER 0
+#define CONFIG_COLORMATRIX_FILTER 0
+#define CONFIG_COLORSPACE_FILTER 0
+#define CONFIG_CONVOLUTION_FILTER 0
+#define CONFIG_CONVOLUTION_OPENCL_FILTER 0
+#define CONFIG_CONVOLVE_FILTER 0
+#define CONFIG_COPY_FILTER 0
+#define CONFIG_COREIMAGE_FILTER 0
+#define CONFIG_COVER_RECT_FILTER 0
+#define CONFIG_CROP_FILTER 0
+#define CONFIG_CROPDETECT_FILTER 0
+#define CONFIG_CUE_FILTER 0
+#define CONFIG_CURVES_FILTER 0
+#define CONFIG_DATASCOPE_FILTER 0
+#define CONFIG_DCTDNOIZ_FILTER 0
+#define CONFIG_DEBAND_FILTER 0
+#define CONFIG_DEBLOCK_FILTER 0
+#define CONFIG_DECIMATE_FILTER 0
+#define CONFIG_DECONVOLVE_FILTER 0
+#define CONFIG_DEFLATE_FILTER 0
+#define CONFIG_DEFLICKER_FILTER 0
+#define CONFIG_DEINTERLACE_QSV_FILTER 0
+#define CONFIG_DEINTERLACE_VAAPI_FILTER 0
+#define CONFIG_DEJUDDER_FILTER 0
+#define CONFIG_DELOGO_FILTER 0
+#define CONFIG_DENOISE_VAAPI_FILTER 0
+#define CONFIG_DESHAKE_FILTER 0
+#define CONFIG_DESPILL_FILTER 0
+#define CONFIG_DETELECINE_FILTER 0
+#define CONFIG_DILATION_FILTER 0
+#define CONFIG_DILATION_OPENCL_FILTER 0
+#define CONFIG_DISPLACE_FILTER 0
+#define CONFIG_DOUBLEWEAVE_FILTER 0
+#define CONFIG_DRAWBOX_FILTER 0
+#define CONFIG_DRAWGRAPH_FILTER 0
+#define CONFIG_DRAWGRID_FILTER 0
+#define CONFIG_DRAWTEXT_FILTER 0
+#define CONFIG_EDGEDETECT_FILTER 0
+#define CONFIG_ELBG_FILTER 0
+#define CONFIG_ENTROPY_FILTER 0
+#define CONFIG_EQ_FILTER 0
+#define CONFIG_EROSION_FILTER 0
+#define CONFIG_EROSION_OPENCL_FILTER 0
+#define CONFIG_EXTRACTPLANES_FILTER 0
+#define CONFIG_FADE_FILTER 0
+#define CONFIG_FFTDNOIZ_FILTER 0
+#define CONFIG_FFTFILT_FILTER 0
+#define CONFIG_FIELD_FILTER 0
+#define CONFIG_FIELDHINT_FILTER 0
+#define CONFIG_FIELDMATCH_FILTER 0
+#define CONFIG_FIELDORDER_FILTER 0
+#define CONFIG_FILLBORDERS_FILTER 0
+#define CONFIG_FIND_RECT_FILTER 0
+#define CONFIG_FLOODFILL_FILTER 0
+#define CONFIG_FORMAT_FILTER 0
+#define CONFIG_FPS_FILTER 0
+#define CONFIG_FRAMEPACK_FILTER 0
+#define CONFIG_FRAMERATE_FILTER 0
+#define CONFIG_FRAMESTEP_FILTER 0
+#define CONFIG_FREI0R_FILTER 0
+#define CONFIG_FSPP_FILTER 0
+#define CONFIG_GBLUR_FILTER 0
+#define CONFIG_GEQ_FILTER 0
+#define CONFIG_GRADFUN_FILTER 0
+#define CONFIG_GREYEDGE_FILTER 0
+#define CONFIG_HALDCLUT_FILTER 0
+#define CONFIG_HFLIP_FILTER 0
+#define CONFIG_HISTEQ_FILTER 0
+#define CONFIG_HISTOGRAM_FILTER 0
+#define CONFIG_HQDN3D_FILTER 0
+#define CONFIG_HQX_FILTER 0
+#define CONFIG_HSTACK_FILTER 0
+#define CONFIG_HUE_FILTER 0
+#define CONFIG_HWDOWNLOAD_FILTER 0
+#define CONFIG_HWMAP_FILTER 0
+#define CONFIG_HWUPLOAD_FILTER 0
+#define CONFIG_HWUPLOAD_CUDA_FILTER 0
+#define CONFIG_HYSTERESIS_FILTER 0
+#define CONFIG_IDET_FILTER 0
+#define CONFIG_IL_FILTER 0
+#define CONFIG_INFLATE_FILTER 0
+#define CONFIG_INTERLACE_FILTER 0
+#define CONFIG_INTERLEAVE_FILTER 0
+#define CONFIG_KERNDEINT_FILTER 0
+#define CONFIG_LENSCORRECTION_FILTER 0
+#define CONFIG_LENSFUN_FILTER 0
+#define CONFIG_LIBVMAF_FILTER 0
+#define CONFIG_LIMITER_FILTER 0
+#define CONFIG_LOOP_FILTER 0
+#define CONFIG_LUMAKEY_FILTER 0
+#define CONFIG_LUT_FILTER 0
+#define CONFIG_LUT1D_FILTER 0
+#define CONFIG_LUT2_FILTER 0
+#define CONFIG_LUT3D_FILTER 0
+#define CONFIG_LUTRGB_FILTER 0
+#define CONFIG_LUTYUV_FILTER 0
+#define CONFIG_MASKEDCLAMP_FILTER 0
+#define CONFIG_MASKEDMERGE_FILTER 0
+#define CONFIG_MCDEINT_FILTER 0
+#define CONFIG_MERGEPLANES_FILTER 0
+#define CONFIG_MESTIMATE_FILTER 0
+#define CONFIG_METADATA_FILTER 0
+#define CONFIG_MIDEQUALIZER_FILTER 0
+#define CONFIG_MINTERPOLATE_FILTER 0
+#define CONFIG_MIX_FILTER 0
+#define CONFIG_MPDECIMATE_FILTER 0
+#define CONFIG_NEGATE_FILTER 0
+#define CONFIG_NLMEANS_FILTER 0
+#define CONFIG_NNEDI_FILTER 0
+#define CONFIG_NOFORMAT_FILTER 0
+#define CONFIG_NOISE_FILTER 0
+#define CONFIG_NORMALIZE_FILTER 0
+#define CONFIG_NULL_FILTER 0
+#define CONFIG_OCR_FILTER 0
+#define CONFIG_OCV_FILTER 0
+#define CONFIG_OSCILLOSCOPE_FILTER 0
+#define CONFIG_OVERLAY_FILTER 0
+#define CONFIG_OVERLAY_OPENCL_FILTER 0
+#define CONFIG_OVERLAY_QSV_FILTER 0
+#define CONFIG_OWDENOISE_FILTER 0
+#define CONFIG_PAD_FILTER 0
+#define CONFIG_PALETTEGEN_FILTER 0
+#define CONFIG_PALETTEUSE_FILTER 0
+#define CONFIG_PERMS_FILTER 0
+#define CONFIG_PERSPECTIVE_FILTER 0
+#define CONFIG_PHASE_FILTER 0
+#define CONFIG_PIXDESCTEST_FILTER 0
+#define CONFIG_PIXSCOPE_FILTER 0
+#define CONFIG_PP_FILTER 0
+#define CONFIG_PP7_FILTER 0
+#define CONFIG_PREMULTIPLY_FILTER 0
+#define CONFIG_PREWITT_FILTER 0
+#define CONFIG_PREWITT_OPENCL_FILTER 0
+#define CONFIG_PROCAMP_VAAPI_FILTER 0
+#define CONFIG_PROGRAM_OPENCL_FILTER 0
+#define CONFIG_PSEUDOCOLOR_FILTER 0
+#define CONFIG_PSNR_FILTER 0
+#define CONFIG_PULLUP_FILTER 0
+#define CONFIG_QP_FILTER 0
+#define CONFIG_RANDOM_FILTER 0
+#define CONFIG_READEIA608_FILTER 0
+#define CONFIG_READVITC_FILTER 0
+#define CONFIG_REALTIME_FILTER 0
+#define CONFIG_REMAP_FILTER 0
+#define CONFIG_REMOVEGRAIN_FILTER 0
+#define CONFIG_REMOVELOGO_FILTER 0
+#define CONFIG_REPEATFIELDS_FILTER 0
+#define CONFIG_REVERSE_FILTER 0
+#define CONFIG_ROBERTS_FILTER 0
+#define CONFIG_ROBERTS_OPENCL_FILTER 0
+#define CONFIG_ROTATE_FILTER 0
+#define CONFIG_SAB_FILTER 0
+#define CONFIG_SCALE_FILTER 0
+#define CONFIG_SCALE_CUDA_FILTER 0
+#define CONFIG_SCALE_NPP_FILTER 0
+#define CONFIG_SCALE_QSV_FILTER 0
+#define CONFIG_SCALE_VAAPI_FILTER 0
+#define CONFIG_SCALE2REF_FILTER 0
+#define CONFIG_SELECT_FILTER 0
+#define CONFIG_SELECTIVECOLOR_FILTER 0
+#define CONFIG_SENDCMD_FILTER 0
+#define CONFIG_SEPARATEFIELDS_FILTER 0
+#define CONFIG_SETDAR_FILTER 0
+#define CONFIG_SETFIELD_FILTER 0
+#define CONFIG_SETPTS_FILTER 0
+#define CONFIG_SETRANGE_FILTER 0
+#define CONFIG_SETSAR_FILTER 0
+#define CONFIG_SETTB_FILTER 0
+#define CONFIG_SHARPNESS_VAAPI_FILTER 0
+#define CONFIG_SHOWINFO_FILTER 0
+#define CONFIG_SHOWPALETTE_FILTER 0
+#define CONFIG_SHUFFLEFRAMES_FILTER 0
+#define CONFIG_SHUFFLEPLANES_FILTER 0
+#define CONFIG_SIDEDATA_FILTER 0
+#define CONFIG_SIGNALSTATS_FILTER 0
+#define CONFIG_SIGNATURE_FILTER 0
+#define CONFIG_SMARTBLUR_FILTER 0
+#define CONFIG_SOBEL_FILTER 0
+#define CONFIG_SOBEL_OPENCL_FILTER 0
+#define CONFIG_SPLIT_FILTER 0
+#define CONFIG_SPP_FILTER 0
+#define CONFIG_SR_FILTER 0
+#define CONFIG_SSIM_FILTER 0
+#define CONFIG_STEREO3D_FILTER 0
+#define CONFIG_STREAMSELECT_FILTER 0
+#define CONFIG_SUBTITLES_FILTER 0
+#define CONFIG_SUPER2XSAI_FILTER 0
+#define CONFIG_SWAPRECT_FILTER 0
+#define CONFIG_SWAPUV_FILTER 0
+#define CONFIG_TBLEND_FILTER 0
+#define CONFIG_TELECINE_FILTER 0
+#define CONFIG_THRESHOLD_FILTER 0
+#define CONFIG_THUMBNAIL_FILTER 0
+#define CONFIG_THUMBNAIL_CUDA_FILTER 0
+#define CONFIG_TILE_FILTER 0
+#define CONFIG_TINTERLACE_FILTER 0
+#define CONFIG_TLUT2_FILTER 0
+#define CONFIG_TMIX_FILTER 0
+#define CONFIG_TONEMAP_FILTER 0
+#define CONFIG_TONEMAP_OPENCL_FILTER 0
+#define CONFIG_TRANSPOSE_FILTER 0
+#define CONFIG_TRANSPOSE_NPP_FILTER 0
+#define CONFIG_TRIM_FILTER 0
+#define CONFIG_UNPREMULTIPLY_FILTER 0
+#define CONFIG_UNSHARP_FILTER 0
+#define CONFIG_UNSHARP_OPENCL_FILTER 0
+#define CONFIG_USPP_FILTER 0
+#define CONFIG_VAGUEDENOISER_FILTER 0
+#define CONFIG_VECTORSCOPE_FILTER 0
+#define CONFIG_VFLIP_FILTER 0
+#define CONFIG_VFRDET_FILTER 0
+#define CONFIG_VIDSTABDETECT_FILTER 0
+#define CONFIG_VIDSTABTRANSFORM_FILTER 0
+#define CONFIG_VIGNETTE_FILTER 0
+#define CONFIG_VMAFMOTION_FILTER 0
+#define CONFIG_VPP_QSV_FILTER 0
+#define CONFIG_VSTACK_FILTER 0
+#define CONFIG_W3FDIF_FILTER 0
+#define CONFIG_WAVEFORM_FILTER 0
+#define CONFIG_WEAVE_FILTER 0
+#define CONFIG_XBR_FILTER 0
+#define CONFIG_YADIF_FILTER 0
+#define CONFIG_ZMQ_FILTER 0
+#define CONFIG_ZOOMPAN_FILTER 0
+#define CONFIG_ZSCALE_FILTER 0
+#define CONFIG_ALLRGB_FILTER 0
+#define CONFIG_ALLYUV_FILTER 0
+#define CONFIG_CELLAUTO_FILTER 0
+#define CONFIG_COLOR_FILTER 0
+#define CONFIG_COREIMAGESRC_FILTER 0
+#define CONFIG_FREI0R_SRC_FILTER 0
+#define CONFIG_HALDCLUTSRC_FILTER 0
+#define CONFIG_LIFE_FILTER 0
+#define CONFIG_MANDELBROT_FILTER 0
+#define CONFIG_MPTESTSRC_FILTER 0
+#define CONFIG_NULLSRC_FILTER 0
+#define CONFIG_OPENCLSRC_FILTER 0
+#define CONFIG_PAL75BARS_FILTER 0
+#define CONFIG_PAL100BARS_FILTER 0
+#define CONFIG_RGBTESTSRC_FILTER 0
+#define CONFIG_SMPTEBARS_FILTER 0
+#define CONFIG_SMPTEHDBARS_FILTER 0
+#define CONFIG_TESTSRC_FILTER 0
+#define CONFIG_TESTSRC2_FILTER 0
+#define CONFIG_YUVTESTSRC_FILTER 0
+#define CONFIG_NULLSINK_FILTER 0
+#define CONFIG_ABITSCOPE_FILTER 0
+#define CONFIG_ADRAWGRAPH_FILTER 0
+#define CONFIG_AHISTOGRAM_FILTER 0
+#define CONFIG_APHASEMETER_FILTER 0
+#define CONFIG_AVECTORSCOPE_FILTER 0
+#define CONFIG_CONCAT_FILTER 0
+#define CONFIG_SHOWCQT_FILTER 0
+#define CONFIG_SHOWFREQS_FILTER 0
+#define CONFIG_SHOWSPECTRUM_FILTER 0
+#define CONFIG_SHOWSPECTRUMPIC_FILTER 0
+#define CONFIG_SHOWVOLUME_FILTER 0
+#define CONFIG_SHOWWAVES_FILTER 0
+#define CONFIG_SHOWWAVESPIC_FILTER 0
+#define CONFIG_SPECTRUMSYNTH_FILTER 0
+#define CONFIG_AMOVIE_FILTER 0
+#define CONFIG_MOVIE_FILTER 0
+#define CONFIG_AFIFO_FILTER 0
+#define CONFIG_FIFO_FILTER 0
 #define CONFIG_AA_DEMUXER 0
 #define CONFIG_AAC_DEMUXER 1
 #define CONFIG_AC3_DEMUXER 0
@@ -1198,10 +1965,14 @@
 #define CONFIG_AIFF_DEMUXER 0
 #define CONFIG_AIX_DEMUXER 0
 #define CONFIG_AMR_DEMUXER 1
+#define CONFIG_AMRNB_DEMUXER 0
+#define CONFIG_AMRWB_DEMUXER 0
 #define CONFIG_ANM_DEMUXER 0
 #define CONFIG_APC_DEMUXER 0
 #define CONFIG_APE_DEMUXER 0
 #define CONFIG_APNG_DEMUXER 0
+#define CONFIG_APTX_DEMUXER 0
+#define CONFIG_APTX_HD_DEMUXER 0
 #define CONFIG_AQTITLE_DEMUXER 0
 #define CONFIG_ASF_DEMUXER 0
 #define CONFIG_ASF_O_DEMUXER 0
@@ -1212,6 +1983,7 @@
 #define CONFIG_AVISYNTH_DEMUXER 0
 #define CONFIG_AVR_DEMUXER 0
 #define CONFIG_AVS_DEMUXER 0
+#define CONFIG_AVS2_DEMUXER 0
 #define CONFIG_BETHSOFTVID_DEMUXER 0
 #define CONFIG_BFI_DEMUXER 0
 #define CONFIG_BINTEXT_DEMUXER 0
@@ -1227,6 +1999,8 @@
 #define CONFIG_CDG_DEMUXER 0
 #define CONFIG_CDXL_DEMUXER 0
 #define CONFIG_CINE_DEMUXER 0
+#define CONFIG_CODEC2_DEMUXER 0
+#define CONFIG_CODEC2RAW_DEMUXER 0
 #define CONFIG_CONCAT_DEMUXER 0
 #define CONFIG_DASH_DEMUXER 0
 #define CONFIG_DATA_DEMUXER 0
@@ -1248,7 +2022,6 @@
 #define CONFIG_EA_CDATA_DEMUXER 0
 #define CONFIG_EAC3_DEMUXER 0
 #define CONFIG_EPAF_DEMUXER 0
-#define CONFIG_FFM_DEMUXER 0
 #define CONFIG_FFMETADATA_DEMUXER 0
 #define CONFIG_FILMSTRIP_DEMUXER 0
 #define CONFIG_FITS_DEMUXER 0
@@ -1330,6 +2103,7 @@
 #define CONFIG_MXG_DEMUXER 0
 #define CONFIG_NC_DEMUXER 0
 #define CONFIG_NISTSPHERE_DEMUXER 0
+#define CONFIG_NSP_DEMUXER 0
 #define CONFIG_NSV_DEMUXER 0
 #define CONFIG_NUT_DEMUXER 0
 #define CONFIG_NUV_DEMUXER 0
@@ -1376,6 +2150,7 @@
 #define CONFIG_S337M_DEMUXER 0
 #define CONFIG_SAMI_DEMUXER 0
 #define CONFIG_SAP_DEMUXER 0
+#define CONFIG_SBC_DEMUXER 0
 #define CONFIG_SBG_DEMUXER 0
 #define CONFIG_SCC_DEMUXER 0
 #define CONFIG_SDP_DEMUXER 0
@@ -1383,6 +2158,7 @@
 #define CONFIG_SDS_DEMUXER 0
 #define CONFIG_SDX_DEMUXER 0
 #define CONFIG_SEGAFILM_DEMUXER 0
+#define CONFIG_SER_DEMUXER 0
 #define CONFIG_SHORTEN_DEMUXER 0
 #define CONFIG_SIFF_DEMUXER 0
 #define CONFIG_SLN_DEMUXER 0
@@ -1410,6 +2186,7 @@
 #define CONFIG_TTA_DEMUXER 0
 #define CONFIG_TXD_DEMUXER 0
 #define CONFIG_TTY_DEMUXER 0
+#define CONFIG_TY_DEMUXER 0
 #define CONFIG_V210_DEMUXER 0
 #define CONFIG_V210X_DEMUXER 0
 #define CONFIG_VAG_DEMUXER 0
@@ -1463,642 +2240,11 @@
 #define CONFIG_IMAGE_TIFF_PIPE_DEMUXER 0
 #define CONFIG_IMAGE_WEBP_PIPE_DEMUXER 0
 #define CONFIG_IMAGE_XPM_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_XWD_PIPE_DEMUXER 0
 #define CONFIG_LIBGME_DEMUXER 0
 #define CONFIG_LIBMODPLUG_DEMUXER 0
 #define CONFIG_LIBOPENMPT_DEMUXER 0
-#define CONFIG_A64MULTI_ENCODER 0
-#define CONFIG_A64MULTI5_ENCODER 0
-#define CONFIG_ALIAS_PIX_ENCODER 0
-#define CONFIG_AMV_ENCODER 0
-#define CONFIG_APNG_ENCODER 0
-#define CONFIG_ASV1_ENCODER 0
-#define CONFIG_ASV2_ENCODER 0
-#define CONFIG_AVRP_ENCODER 0
-#define CONFIG_AVUI_ENCODER 0
-#define CONFIG_AYUV_ENCODER 0
-#define CONFIG_BMP_ENCODER 0
-#define CONFIG_CINEPAK_ENCODER 0
-#define CONFIG_CLJR_ENCODER 0
-#define CONFIG_COMFORTNOISE_ENCODER 0
-#define CONFIG_DNXHD_ENCODER 0
-#define CONFIG_DPX_ENCODER 0
-#define CONFIG_DVVIDEO_ENCODER 0
-#define CONFIG_FFV1_ENCODER 0
-#define CONFIG_FFVHUFF_ENCODER 0
-#define CONFIG_FITS_ENCODER 0
-#define CONFIG_FLASHSV_ENCODER 0
-#define CONFIG_FLASHSV2_ENCODER 0
-#define CONFIG_FLV_ENCODER 0
-#define CONFIG_GIF_ENCODER 0
-#define CONFIG_H261_ENCODER 0
-#define CONFIG_H263_ENCODER 0
-#define CONFIG_H263P_ENCODER 0
-#define CONFIG_HAP_ENCODER 0
-#define CONFIG_HUFFYUV_ENCODER 0
-#define CONFIG_JPEG2000_ENCODER 0
-#define CONFIG_JPEGLS_ENCODER 0
-#define CONFIG_LJPEG_ENCODER 0
-#define CONFIG_MJPEG_ENCODER 0
-#define CONFIG_MPEG1VIDEO_ENCODER 0
-#define CONFIG_MPEG2VIDEO_ENCODER 0
-#define CONFIG_MPEG4_ENCODER 0
-#define CONFIG_MSMPEG4V2_ENCODER 0
-#define CONFIG_MSMPEG4V3_ENCODER 0
-#define CONFIG_MSVIDEO1_ENCODER 0
-#define CONFIG_PAM_ENCODER 0
-#define CONFIG_PBM_ENCODER 0
-#define CONFIG_PCX_ENCODER 0
-#define CONFIG_PGM_ENCODER 0
-#define CONFIG_PGMYUV_ENCODER 0
-#define CONFIG_PNG_ENCODER 0
-#define CONFIG_PPM_ENCODER 0
-#define CONFIG_PRORES_ENCODER 0
-#define CONFIG_PRORES_AW_ENCODER 0
-#define CONFIG_PRORES_KS_ENCODER 0
-#define CONFIG_QTRLE_ENCODER 0
-#define CONFIG_R10K_ENCODER 0
-#define CONFIG_R210_ENCODER 0
-#define CONFIG_RAWVIDEO_ENCODER 0
-#define CONFIG_ROQ_ENCODER 0
-#define CONFIG_RV10_ENCODER 0
-#define CONFIG_RV20_ENCODER 0
-#define CONFIG_S302M_ENCODER 0
-#define CONFIG_SGI_ENCODER 0
-#define CONFIG_SNOW_ENCODER 0
-#define CONFIG_SUNRAST_ENCODER 0
-#define CONFIG_SVQ1_ENCODER 0
-#define CONFIG_TARGA_ENCODER 0
-#define CONFIG_TIFF_ENCODER 0
-#define CONFIG_UTVIDEO_ENCODER 0
-#define CONFIG_V210_ENCODER 0
-#define CONFIG_V308_ENCODER 0
-#define CONFIG_V408_ENCODER 0
-#define CONFIG_V410_ENCODER 0
-#define CONFIG_VC2_ENCODER 0
-#define CONFIG_WRAPPED_AVFRAME_ENCODER 0
-#define CONFIG_WMV1_ENCODER 0
-#define CONFIG_WMV2_ENCODER 0
-#define CONFIG_XBM_ENCODER 0
-#define CONFIG_XFACE_ENCODER 0
-#define CONFIG_XWD_ENCODER 0
-#define CONFIG_Y41P_ENCODER 0
-#define CONFIG_YUV4_ENCODER 0
-#define CONFIG_ZLIB_ENCODER 0
-#define CONFIG_ZMBV_ENCODER 0
-#define CONFIG_AAC_ENCODER 0
-#define CONFIG_AC3_ENCODER 0
-#define CONFIG_AC3_FIXED_ENCODER 0
-#define CONFIG_ALAC_ENCODER 0
-#define CONFIG_DCA_ENCODER 0
-#define CONFIG_EAC3_ENCODER 0
-#define CONFIG_FLAC_ENCODER 0
-#define CONFIG_G723_1_ENCODER 0
-#define CONFIG_MLP_ENCODER 0
-#define CONFIG_MP2_ENCODER 0
-#define CONFIG_MP2FIXED_ENCODER 0
-#define CONFIG_NELLYMOSER_ENCODER 0
-#define CONFIG_OPUS_ENCODER 0
-#define CONFIG_RA_144_ENCODER 0
-#define CONFIG_SONIC_ENCODER 0
-#define CONFIG_SONIC_LS_ENCODER 0
-#define CONFIG_TRUEHD_ENCODER 0
-#define CONFIG_TTA_ENCODER 0
-#define CONFIG_VORBIS_ENCODER 0
-#define CONFIG_WAVPACK_ENCODER 0
-#define CONFIG_WMAV1_ENCODER 0
-#define CONFIG_WMAV2_ENCODER 0
-#define CONFIG_PCM_ALAW_ENCODER 0
-#define CONFIG_PCM_F32BE_ENCODER 0
-#define CONFIG_PCM_F32LE_ENCODER 0
-#define CONFIG_PCM_F64BE_ENCODER 0
-#define CONFIG_PCM_F64LE_ENCODER 0
-#define CONFIG_PCM_MULAW_ENCODER 0
-#define CONFIG_PCM_S8_ENCODER 0
-#define CONFIG_PCM_S8_PLANAR_ENCODER 0
-#define CONFIG_PCM_S16BE_ENCODER 0
-#define CONFIG_PCM_S16BE_PLANAR_ENCODER 0
-#define CONFIG_PCM_S16LE_ENCODER 0
-#define CONFIG_PCM_S16LE_PLANAR_ENCODER 0
-#define CONFIG_PCM_S24BE_ENCODER 0
-#define CONFIG_PCM_S24DAUD_ENCODER 0
-#define CONFIG_PCM_S24LE_ENCODER 0
-#define CONFIG_PCM_S24LE_PLANAR_ENCODER 0
-#define CONFIG_PCM_S32BE_ENCODER 0
-#define CONFIG_PCM_S32LE_ENCODER 0
-#define CONFIG_PCM_S32LE_PLANAR_ENCODER 0
-#define CONFIG_PCM_S64BE_ENCODER 0
-#define CONFIG_PCM_S64LE_ENCODER 0
-#define CONFIG_PCM_U8_ENCODER 0
-#define CONFIG_PCM_U16BE_ENCODER 0
-#define CONFIG_PCM_U16LE_ENCODER 0
-#define CONFIG_PCM_U24BE_ENCODER 0
-#define CONFIG_PCM_U24LE_ENCODER 0
-#define CONFIG_PCM_U32BE_ENCODER 0
-#define CONFIG_PCM_U32LE_ENCODER 0
-#define CONFIG_ROQ_DPCM_ENCODER 0
-#define CONFIG_ADPCM_ADX_ENCODER 0
-#define CONFIG_ADPCM_G722_ENCODER 0
-#define CONFIG_ADPCM_G726_ENCODER 0
-#define CONFIG_ADPCM_G726LE_ENCODER 0
-#define CONFIG_ADPCM_IMA_QT_ENCODER 0
-#define CONFIG_ADPCM_IMA_WAV_ENCODER 0
-#define CONFIG_ADPCM_MS_ENCODER 0
-#define CONFIG_ADPCM_SWF_ENCODER 0
-#define CONFIG_ADPCM_YAMAHA_ENCODER 0
-#define CONFIG_SSA_ENCODER 0
-#define CONFIG_ASS_ENCODER 0
-#define CONFIG_DVBSUB_ENCODER 0
-#define CONFIG_DVDSUB_ENCODER 0
-#define CONFIG_MOVTEXT_ENCODER 0
-#define CONFIG_SRT_ENCODER 0
-#define CONFIG_SUBRIP_ENCODER 0
-#define CONFIG_TEXT_ENCODER 0
-#define CONFIG_WEBVTT_ENCODER 0
-#define CONFIG_XSUB_ENCODER 0
-#define CONFIG_AAC_AT_ENCODER 0
-#define CONFIG_ALAC_AT_ENCODER 0
-#define CONFIG_ILBC_AT_ENCODER 0
-#define CONFIG_PCM_ALAW_AT_ENCODER 0
-#define CONFIG_PCM_MULAW_AT_ENCODER 0
-#define CONFIG_LIBFDK_AAC_ENCODER 0
-#define CONFIG_LIBGSM_ENCODER 0
-#define CONFIG_LIBGSM_MS_ENCODER 0
-#define CONFIG_LIBILBC_ENCODER 0
-#define CONFIG_LIBMP3LAME_ENCODER 0
-#define CONFIG_LIBOPENCORE_AMRNB_ENCODER 0
-#define CONFIG_LIBOPENJPEG_ENCODER 0
-#define CONFIG_LIBOPUS_ENCODER 0
-#define CONFIG_LIBSHINE_ENCODER 0
-#define CONFIG_LIBSPEEX_ENCODER 0
-#define CONFIG_LIBTHEORA_ENCODER 0
-#define CONFIG_LIBTWOLAME_ENCODER 0
-#define CONFIG_LIBVO_AMRWBENC_ENCODER 0
-#define CONFIG_LIBVORBIS_ENCODER 0
-#define CONFIG_LIBVPX_VP8_ENCODER 0
-#define CONFIG_LIBVPX_VP9_ENCODER 0
-#define CONFIG_LIBWAVPACK_ENCODER 0
-#define CONFIG_LIBWEBP_ANIM_ENCODER 0
-#define CONFIG_LIBWEBP_ENCODER 0
-#define CONFIG_LIBX262_ENCODER 0
-#define CONFIG_LIBX264_ENCODER 0
-#define CONFIG_LIBX264RGB_ENCODER 0
-#define CONFIG_LIBX265_ENCODER 0
-#define CONFIG_LIBXAVS_ENCODER 0
-#define CONFIG_LIBXVID_ENCODER 0
-#define CONFIG_H263_V4L2M2M_ENCODER 0
-#define CONFIG_LIBOPENH264_ENCODER 0
-#define CONFIG_H264_NVENC_ENCODER 0
-#define CONFIG_H264_OMX_ENCODER 0
-#define CONFIG_H264_QSV_ENCODER 0
-#define CONFIG_H264_V4L2M2M_ENCODER 0
-#define CONFIG_H264_VAAPI_ENCODER 0
-#define CONFIG_H264_VIDEOTOOLBOX_ENCODER 0
-#define CONFIG_NVENC_ENCODER 0
-#define CONFIG_NVENC_H264_ENCODER 0
-#define CONFIG_NVENC_HEVC_ENCODER 0
-#define CONFIG_HEVC_NVENC_ENCODER 0
-#define CONFIG_HEVC_QSV_ENCODER 0
-#define CONFIG_HEVC_V4L2M2M_ENCODER 0
-#define CONFIG_HEVC_VAAPI_ENCODER 0
-#define CONFIG_LIBKVAZAAR_ENCODER 0
-#define CONFIG_MJPEG_VAAPI_ENCODER 0
-#define CONFIG_MPEG2_QSV_ENCODER 0
-#define CONFIG_MPEG2_VAAPI_ENCODER 0
-#define CONFIG_MPEG4_V4L2M2M_ENCODER 0
-#define CONFIG_VP8_V4L2M2M_ENCODER 0
-#define CONFIG_VP8_VAAPI_ENCODER 0
-#define CONFIG_VP9_VAAPI_ENCODER 0
-#define CONFIG_ABENCH_FILTER 0
-#define CONFIG_ACOMPRESSOR_FILTER 0
-#define CONFIG_ACOPY_FILTER 0
-#define CONFIG_ACROSSFADE_FILTER 0
-#define CONFIG_ACRUSHER_FILTER 0
-#define CONFIG_ADELAY_FILTER 0
-#define CONFIG_AECHO_FILTER 0
-#define CONFIG_AEMPHASIS_FILTER 0
-#define CONFIG_AEVAL_FILTER 0
-#define CONFIG_AFADE_FILTER 0
-#define CONFIG_AFFTFILT_FILTER 0
-#define CONFIG_AFIR_FILTER 0
-#define CONFIG_AFORMAT_FILTER 0
-#define CONFIG_AGATE_FILTER 0
-#define CONFIG_AINTERLEAVE_FILTER 0
-#define CONFIG_ALIMITER_FILTER 0
-#define CONFIG_ALLPASS_FILTER 0
-#define CONFIG_ALOOP_FILTER 0
-#define CONFIG_AMERGE_FILTER 0
-#define CONFIG_AMETADATA_FILTER 0
-#define CONFIG_AMIX_FILTER 0
-#define CONFIG_ANEQUALIZER_FILTER 0
-#define CONFIG_ANULL_FILTER 0
-#define CONFIG_APAD_FILTER 0
-#define CONFIG_APERMS_FILTER 0
-#define CONFIG_APHASER_FILTER 0
-#define CONFIG_APULSATOR_FILTER 0
-#define CONFIG_AREALTIME_FILTER 0
-#define CONFIG_ARESAMPLE_FILTER 0
-#define CONFIG_AREVERSE_FILTER 0
-#define CONFIG_ASELECT_FILTER 0
-#define CONFIG_ASENDCMD_FILTER 0
-#define CONFIG_ASETNSAMPLES_FILTER 0
-#define CONFIG_ASETPTS_FILTER 0
-#define CONFIG_ASETRATE_FILTER 0
-#define CONFIG_ASETTB_FILTER 0
-#define CONFIG_ASHOWINFO_FILTER 0
-#define CONFIG_ASIDEDATA_FILTER 0
-#define CONFIG_ASPLIT_FILTER 0
-#define CONFIG_ASTATS_FILTER 0
-#define CONFIG_ASTREAMSELECT_FILTER 0
-#define CONFIG_ATEMPO_FILTER 0
-#define CONFIG_ATRIM_FILTER 0
-#define CONFIG_AZMQ_FILTER 0
-#define CONFIG_BANDPASS_FILTER 0
-#define CONFIG_BANDREJECT_FILTER 0
-#define CONFIG_BASS_FILTER 0
-#define CONFIG_BIQUAD_FILTER 0
-#define CONFIG_BS2B_FILTER 0
-#define CONFIG_CHANNELMAP_FILTER 0
-#define CONFIG_CHANNELSPLIT_FILTER 0
-#define CONFIG_CHORUS_FILTER 0
-#define CONFIG_COMPAND_FILTER 0
-#define CONFIG_COMPENSATIONDELAY_FILTER 0
-#define CONFIG_CROSSFEED_FILTER 0
-#define CONFIG_CRYSTALIZER_FILTER 0
-#define CONFIG_DCSHIFT_FILTER 0
-#define CONFIG_DYNAUDNORM_FILTER 0
-#define CONFIG_EARWAX_FILTER 0
-#define CONFIG_EBUR128_FILTER 0
-#define CONFIG_EQUALIZER_FILTER 0
-#define CONFIG_EXTRASTEREO_FILTER 0
-#define CONFIG_FIREQUALIZER_FILTER 0
-#define CONFIG_FLANGER_FILTER 0
-#define CONFIG_HAAS_FILTER 0
-#define CONFIG_HDCD_FILTER 0
-#define CONFIG_HEADPHONE_FILTER 0
-#define CONFIG_HIGHPASS_FILTER 0
-#define CONFIG_JOIN_FILTER 0
-#define CONFIG_LADSPA_FILTER 0
-#define CONFIG_LOUDNORM_FILTER 0
-#define CONFIG_LOWPASS_FILTER 0
-#define CONFIG_PAN_FILTER 0
-#define CONFIG_REPLAYGAIN_FILTER 0
-#define CONFIG_RESAMPLE_FILTER 0
-#define CONFIG_RUBBERBAND_FILTER 0
-#define CONFIG_SIDECHAINCOMPRESS_FILTER 0
-#define CONFIG_SIDECHAINGATE_FILTER 0
-#define CONFIG_SILENCEDETECT_FILTER 0
-#define CONFIG_SILENCEREMOVE_FILTER 0
-#define CONFIG_SOFALIZER_FILTER 0
-#define CONFIG_STEREOTOOLS_FILTER 0
-#define CONFIG_STEREOWIDEN_FILTER 0
-#define CONFIG_SUPEREQUALIZER_FILTER 0
-#define CONFIG_SURROUND_FILTER 0
-#define CONFIG_TREBLE_FILTER 0
-#define CONFIG_TREMOLO_FILTER 0
-#define CONFIG_VIBRATO_FILTER 0
-#define CONFIG_VOLUME_FILTER 0
-#define CONFIG_VOLUMEDETECT_FILTER 0
-#define CONFIG_AEVALSRC_FILTER 0
-#define CONFIG_ANOISESRC_FILTER 0
-#define CONFIG_ANULLSRC_FILTER 0
-#define CONFIG_FLITE_FILTER 0
-#define CONFIG_SINE_FILTER 0
-#define CONFIG_ANULLSINK_FILTER 0
-#define CONFIG_ALPHAEXTRACT_FILTER 0
-#define CONFIG_ALPHAMERGE_FILTER 0
-#define CONFIG_ASS_FILTER 0
-#define CONFIG_ATADENOISE_FILTER 0
-#define CONFIG_AVGBLUR_FILTER 0
-#define CONFIG_BBOX_FILTER 0
-#define CONFIG_BENCH_FILTER 0
-#define CONFIG_BITPLANENOISE_FILTER 0
-#define CONFIG_BLACKDETECT_FILTER 0
-#define CONFIG_BLACKFRAME_FILTER 0
-#define CONFIG_BLEND_FILTER 0
-#define CONFIG_BOXBLUR_FILTER 0
-#define CONFIG_BWDIF_FILTER 0
-#define CONFIG_CHROMAKEY_FILTER 0
-#define CONFIG_CIESCOPE_FILTER 0
-#define CONFIG_CODECVIEW_FILTER 0
-#define CONFIG_COLORBALANCE_FILTER 0
-#define CONFIG_COLORCHANNELMIXER_FILTER 0
-#define CONFIG_COLORKEY_FILTER 0
-#define CONFIG_COLORLEVELS_FILTER 0
-#define CONFIG_COLORMATRIX_FILTER 0
-#define CONFIG_COLORSPACE_FILTER 0
-#define CONFIG_CONVOLUTION_FILTER 0
-#define CONFIG_CONVOLVE_FILTER 0
-#define CONFIG_COPY_FILTER 0
-#define CONFIG_COREIMAGE_FILTER 0
-#define CONFIG_COVER_RECT_FILTER 0
-#define CONFIG_CROP_FILTER 0
-#define CONFIG_CROPDETECT_FILTER 0
-#define CONFIG_CURVES_FILTER 0
-#define CONFIG_DATASCOPE_FILTER 0
-#define CONFIG_DCTDNOIZ_FILTER 0
-#define CONFIG_DEBAND_FILTER 0
-#define CONFIG_DECIMATE_FILTER 0
-#define CONFIG_DEFLATE_FILTER 0
-#define CONFIG_DEFLICKER_FILTER 0
-#define CONFIG_DEINTERLACE_QSV_FILTER 0
-#define CONFIG_DEINTERLACE_VAAPI_FILTER 0
-#define CONFIG_DEJUDDER_FILTER 0
-#define CONFIG_DELOGO_FILTER 0
-#define CONFIG_DESHAKE_FILTER 0
-#define CONFIG_DESPILL_FILTER 0
-#define CONFIG_DETELECINE_FILTER 0
-#define CONFIG_DILATION_FILTER 0
-#define CONFIG_DISPLACE_FILTER 0
-#define CONFIG_DOUBLEWEAVE_FILTER 0
-#define CONFIG_DRAWBOX_FILTER 0
-#define CONFIG_DRAWGRAPH_FILTER 0
-#define CONFIG_DRAWGRID_FILTER 0
-#define CONFIG_DRAWTEXT_FILTER 0
-#define CONFIG_EDGEDETECT_FILTER 0
-#define CONFIG_ELBG_FILTER 0
-#define CONFIG_EQ_FILTER 0
-#define CONFIG_EROSION_FILTER 0
-#define CONFIG_EXTRACTPLANES_FILTER 0
-#define CONFIG_FADE_FILTER 0
-#define CONFIG_FFTFILT_FILTER 0
-#define CONFIG_FIELD_FILTER 0
-#define CONFIG_FIELDHINT_FILTER 0
-#define CONFIG_FIELDMATCH_FILTER 0
-#define CONFIG_FIELDORDER_FILTER 0
-#define CONFIG_FIND_RECT_FILTER 0
-#define CONFIG_FLOODFILL_FILTER 0
-#define CONFIG_FORMAT_FILTER 0
-#define CONFIG_FPS_FILTER 0
-#define CONFIG_FRAMEPACK_FILTER 0
-#define CONFIG_FRAMERATE_FILTER 0
-#define CONFIG_FRAMESTEP_FILTER 0
-#define CONFIG_FREI0R_FILTER 0
-#define CONFIG_FSPP_FILTER 0
-#define CONFIG_GBLUR_FILTER 0
-#define CONFIG_GEQ_FILTER 0
-#define CONFIG_GRADFUN_FILTER 0
-#define CONFIG_HALDCLUT_FILTER 0
-#define CONFIG_HFLIP_FILTER 0
-#define CONFIG_HISTEQ_FILTER 0
-#define CONFIG_HISTOGRAM_FILTER 0
-#define CONFIG_HQDN3D_FILTER 0
-#define CONFIG_HQX_FILTER 0
-#define CONFIG_HSTACK_FILTER 0
-#define CONFIG_HUE_FILTER 0
-#define CONFIG_HWDOWNLOAD_FILTER 0
-#define CONFIG_HWMAP_FILTER 0
-#define CONFIG_HWUPLOAD_FILTER 0
-#define CONFIG_HWUPLOAD_CUDA_FILTER 0
-#define CONFIG_HYSTERESIS_FILTER 0
-#define CONFIG_IDET_FILTER 0
-#define CONFIG_IL_FILTER 0
-#define CONFIG_INFLATE_FILTER 0
-#define CONFIG_INTERLACE_FILTER 0
-#define CONFIG_INTERLEAVE_FILTER 0
-#define CONFIG_KERNDEINT_FILTER 0
-#define CONFIG_LENSCORRECTION_FILTER 0
-#define CONFIG_LIBVMAF_FILTER 0
-#define CONFIG_LIMITER_FILTER 0
-#define CONFIG_LOOP_FILTER 0
-#define CONFIG_LUMAKEY_FILTER 0
-#define CONFIG_LUT_FILTER 0
-#define CONFIG_LUT2_FILTER 0
-#define CONFIG_LUT3D_FILTER 0
-#define CONFIG_LUTRGB_FILTER 0
-#define CONFIG_LUTYUV_FILTER 0
-#define CONFIG_MASKEDCLAMP_FILTER 0
-#define CONFIG_MASKEDMERGE_FILTER 0
-#define CONFIG_MCDEINT_FILTER 0
-#define CONFIG_MERGEPLANES_FILTER 0
-#define CONFIG_MESTIMATE_FILTER 0
-#define CONFIG_METADATA_FILTER 0
-#define CONFIG_MIDEQUALIZER_FILTER 0
-#define CONFIG_MINTERPOLATE_FILTER 0
-#define CONFIG_MPDECIMATE_FILTER 0
-#define CONFIG_NEGATE_FILTER 0
-#define CONFIG_NLMEANS_FILTER 0
-#define CONFIG_NNEDI_FILTER 0
-#define CONFIG_NOFORMAT_FILTER 0
-#define CONFIG_NOISE_FILTER 0
-#define CONFIG_NULL_FILTER 0
-#define CONFIG_OCR_FILTER 0
-#define CONFIG_OCV_FILTER 0
-#define CONFIG_OSCILLOSCOPE_FILTER 0
-#define CONFIG_OVERLAY_FILTER 0
-#define CONFIG_OWDENOISE_FILTER 0
-#define CONFIG_PAD_FILTER 0
-#define CONFIG_PALETTEGEN_FILTER 0
-#define CONFIG_PALETTEUSE_FILTER 0
-#define CONFIG_PERMS_FILTER 0
-#define CONFIG_PERSPECTIVE_FILTER 0
-#define CONFIG_PHASE_FILTER 0
-#define CONFIG_PIXDESCTEST_FILTER 0
-#define CONFIG_PIXSCOPE_FILTER 0
-#define CONFIG_PP_FILTER 0
-#define CONFIG_PP7_FILTER 0
-#define CONFIG_PREMULTIPLY_FILTER 0
-#define CONFIG_PREWITT_FILTER 0
-#define CONFIG_PSEUDOCOLOR_FILTER 0
-#define CONFIG_PSNR_FILTER 0
-#define CONFIG_PULLUP_FILTER 0
-#define CONFIG_QP_FILTER 0
-#define CONFIG_RANDOM_FILTER 0
-#define CONFIG_READEIA608_FILTER 0
-#define CONFIG_READVITC_FILTER 0
-#define CONFIG_REALTIME_FILTER 0
-#define CONFIG_REMAP_FILTER 0
-#define CONFIG_REMOVEGRAIN_FILTER 0
-#define CONFIG_REMOVELOGO_FILTER 0
-#define CONFIG_REPEATFIELDS_FILTER 0
-#define CONFIG_REVERSE_FILTER 0
-#define CONFIG_ROBERTS_FILTER 0
-#define CONFIG_ROTATE_FILTER 0
-#define CONFIG_SAB_FILTER 0
-#define CONFIG_SCALE_FILTER 0
-#define CONFIG_SCALE_CUDA_FILTER 0
-#define CONFIG_SCALE_NPP_FILTER 0
-#define CONFIG_SCALE_QSV_FILTER 0
-#define CONFIG_SCALE_VAAPI_FILTER 0
-#define CONFIG_SCALE2REF_FILTER 0
-#define CONFIG_SELECT_FILTER 0
-#define CONFIG_SELECTIVECOLOR_FILTER 0
-#define CONFIG_SENDCMD_FILTER 0
-#define CONFIG_SEPARATEFIELDS_FILTER 0
-#define CONFIG_SETDAR_FILTER 0
-#define CONFIG_SETFIELD_FILTER 0
-#define CONFIG_SETPTS_FILTER 0
-#define CONFIG_SETSAR_FILTER 0
-#define CONFIG_SETTB_FILTER 0
-#define CONFIG_SHOWINFO_FILTER 0
-#define CONFIG_SHOWPALETTE_FILTER 0
-#define CONFIG_SHUFFLEFRAMES_FILTER 0
-#define CONFIG_SHUFFLEPLANES_FILTER 0
-#define CONFIG_SIDEDATA_FILTER 0
-#define CONFIG_SIGNALSTATS_FILTER 0
-#define CONFIG_SIGNATURE_FILTER 0
-#define CONFIG_SMARTBLUR_FILTER 0
-#define CONFIG_SOBEL_FILTER 0
-#define CONFIG_SPLIT_FILTER 0
-#define CONFIG_SPP_FILTER 0
-#define CONFIG_SSIM_FILTER 0
-#define CONFIG_STEREO3D_FILTER 0
-#define CONFIG_STREAMSELECT_FILTER 0
-#define CONFIG_SUBTITLES_FILTER 0
-#define CONFIG_SUPER2XSAI_FILTER 0
-#define CONFIG_SWAPRECT_FILTER 0
-#define CONFIG_SWAPUV_FILTER 0
-#define CONFIG_TBLEND_FILTER 0
-#define CONFIG_TELECINE_FILTER 0
-#define CONFIG_THRESHOLD_FILTER 0
-#define CONFIG_THUMBNAIL_FILTER 0
-#define CONFIG_THUMBNAIL_CUDA_FILTER 0
-#define CONFIG_TILE_FILTER 0
-#define CONFIG_TINTERLACE_FILTER 0
-#define CONFIG_TLUT2_FILTER 0
-#define CONFIG_TONEMAP_FILTER 0
-#define CONFIG_TRANSPOSE_FILTER 0
-#define CONFIG_TRIM_FILTER 0
-#define CONFIG_UNPREMULTIPLY_FILTER 0
-#define CONFIG_UNSHARP_FILTER 0
-#define CONFIG_USPP_FILTER 0
-#define CONFIG_VAGUEDENOISER_FILTER 0
-#define CONFIG_VECTORSCOPE_FILTER 0
-#define CONFIG_VFLIP_FILTER 0
-#define CONFIG_VIDSTABDETECT_FILTER 0
-#define CONFIG_VIDSTABTRANSFORM_FILTER 0
-#define CONFIG_VIGNETTE_FILTER 0
-#define CONFIG_VMAFMOTION_FILTER 0
-#define CONFIG_VSTACK_FILTER 0
-#define CONFIG_W3FDIF_FILTER 0
-#define CONFIG_WAVEFORM_FILTER 0
-#define CONFIG_WEAVE_FILTER 0
-#define CONFIG_XBR_FILTER 0
-#define CONFIG_YADIF_FILTER 0
-#define CONFIG_ZMQ_FILTER 0
-#define CONFIG_ZOOMPAN_FILTER 0
-#define CONFIG_ZSCALE_FILTER 0
-#define CONFIG_ALLRGB_FILTER 0
-#define CONFIG_ALLYUV_FILTER 0
-#define CONFIG_CELLAUTO_FILTER 0
-#define CONFIG_COLOR_FILTER 0
-#define CONFIG_COREIMAGESRC_FILTER 0
-#define CONFIG_FREI0R_SRC_FILTER 0
-#define CONFIG_HALDCLUTSRC_FILTER 0
-#define CONFIG_LIFE_FILTER 0
-#define CONFIG_MANDELBROT_FILTER 0
-#define CONFIG_MPTESTSRC_FILTER 0
-#define CONFIG_NULLSRC_FILTER 0
-#define CONFIG_RGBTESTSRC_FILTER 0
-#define CONFIG_SMPTEBARS_FILTER 0
-#define CONFIG_SMPTEHDBARS_FILTER 0
-#define CONFIG_TESTSRC_FILTER 0
-#define CONFIG_TESTSRC2_FILTER 0
-#define CONFIG_YUVTESTSRC_FILTER 0
-#define CONFIG_NULLSINK_FILTER 0
-#define CONFIG_ABITSCOPE_FILTER 0
-#define CONFIG_ADRAWGRAPH_FILTER 0
-#define CONFIG_AHISTOGRAM_FILTER 0
-#define CONFIG_APHASEMETER_FILTER 0
-#define CONFIG_AVECTORSCOPE_FILTER 0
-#define CONFIG_CONCAT_FILTER 0
-#define CONFIG_SHOWCQT_FILTER 0
-#define CONFIG_SHOWFREQS_FILTER 0
-#define CONFIG_SHOWSPECTRUM_FILTER 0
-#define CONFIG_SHOWSPECTRUMPIC_FILTER 0
-#define CONFIG_SHOWVOLUME_FILTER 0
-#define CONFIG_SHOWWAVES_FILTER 0
-#define CONFIG_SHOWWAVESPIC_FILTER 0
-#define CONFIG_SPECTRUMSYNTH_FILTER 0
-#define CONFIG_AMOVIE_FILTER 0
-#define CONFIG_MOVIE_FILTER 0
-#define CONFIG_H263_VAAPI_HWACCEL 0
-#define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_H264_CUVID_HWACCEL 0
-#define CONFIG_H264_D3D11VA_HWACCEL 0
-#define CONFIG_H264_D3D11VA2_HWACCEL 0
-#define CONFIG_H264_DXVA2_HWACCEL 0
-#define CONFIG_H264_MEDIACODEC_HWACCEL 0
-#define CONFIG_H264_MMAL_HWACCEL 0
-#define CONFIG_H264_QSV_HWACCEL 0
-#define CONFIG_H264_VAAPI_HWACCEL 0
-#define CONFIG_H264_VDA_HWACCEL 0
-#define CONFIG_H264_VDA_OLD_HWACCEL 0
-#define CONFIG_H264_VDPAU_HWACCEL 0
-#define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_HEVC_CUVID_HWACCEL 0
-#define CONFIG_HEVC_D3D11VA_HWACCEL 0
-#define CONFIG_HEVC_D3D11VA2_HWACCEL 0
-#define CONFIG_HEVC_DXVA2_HWACCEL 0
-#define CONFIG_HEVC_MEDIACODEC_HWACCEL 0
-#define CONFIG_HEVC_QSV_HWACCEL 0
-#define CONFIG_HEVC_VAAPI_HWACCEL 0
-#define CONFIG_HEVC_VDPAU_HWACCEL 0
-#define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_MJPEG_CUVID_HWACCEL 0
-#define CONFIG_MPEG1_CUVID_HWACCEL 0
-#define CONFIG_MPEG1_XVMC_HWACCEL 0
-#define CONFIG_MPEG1_VDPAU_HWACCEL 0
-#define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_MPEG2_CUVID_HWACCEL 0
-#define CONFIG_MPEG2_XVMC_HWACCEL 0
-#define CONFIG_MPEG2_D3D11VA_HWACCEL 0
-#define CONFIG_MPEG2_D3D11VA2_HWACCEL 0
-#define CONFIG_MPEG2_DXVA2_HWACCEL 0
-#define CONFIG_MPEG2_MMAL_HWACCEL 0
-#define CONFIG_MPEG2_QSV_HWACCEL 0
-#define CONFIG_MPEG2_VAAPI_HWACCEL 0
-#define CONFIG_MPEG2_VDPAU_HWACCEL 0
-#define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_MPEG2_MEDIACODEC_HWACCEL 0
-#define CONFIG_MPEG4_CUVID_HWACCEL 0
-#define CONFIG_MPEG4_MEDIACODEC_HWACCEL 0
-#define CONFIG_MPEG4_MMAL_HWACCEL 0
-#define CONFIG_MPEG4_VAAPI_HWACCEL 0
-#define CONFIG_MPEG4_VDPAU_HWACCEL 0
-#define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_VC1_CUVID_HWACCEL 0
-#define CONFIG_VC1_D3D11VA_HWACCEL 0
-#define CONFIG_VC1_D3D11VA2_HWACCEL 0
-#define CONFIG_VC1_DXVA2_HWACCEL 0
-#define CONFIG_VC1_VAAPI_HWACCEL 0
-#define CONFIG_VC1_VDPAU_HWACCEL 0
-#define CONFIG_VC1_MMAL_HWACCEL 0
-#define CONFIG_VC1_QSV_HWACCEL 0
-#define CONFIG_VP8_CUVID_HWACCEL 0
-#define CONFIG_VP8_MEDIACODEC_HWACCEL 0
-#define CONFIG_VP8_QSV_HWACCEL 0
-#define CONFIG_VP9_CUVID_HWACCEL 0
-#define CONFIG_VP9_D3D11VA_HWACCEL 0
-#define CONFIG_VP9_D3D11VA2_HWACCEL 0
-#define CONFIG_VP9_DXVA2_HWACCEL 0
-#define CONFIG_VP9_MEDIACODEC_HWACCEL 0
-#define CONFIG_VP9_VAAPI_HWACCEL 0
-#define CONFIG_WMV3_D3D11VA_HWACCEL 0
-#define CONFIG_WMV3_D3D11VA2_HWACCEL 0
-#define CONFIG_WMV3_DXVA2_HWACCEL 0
-#define CONFIG_WMV3_VAAPI_HWACCEL 0
-#define CONFIG_WMV3_VDPAU_HWACCEL 0
-#define CONFIG_ALSA_INDEV 0
-#define CONFIG_AVFOUNDATION_INDEV 0
-#define CONFIG_BKTR_INDEV 0
-#define CONFIG_DECKLINK_INDEV 0
-#define CONFIG_LIBNDI_NEWTEK_INDEV 0
-#define CONFIG_DSHOW_INDEV 0
-#define CONFIG_FBDEV_INDEV 0
-#define CONFIG_GDIGRAB_INDEV 0
-#define CONFIG_IEC61883_INDEV 0
-#define CONFIG_JACK_INDEV 0
-#define CONFIG_KMSGRAB_INDEV 0
-#define CONFIG_LAVFI_INDEV 0
-#define CONFIG_OPENAL_INDEV 0
-#define CONFIG_OSS_INDEV 0
-#define CONFIG_PULSE_INDEV 0
-#define CONFIG_SNDIO_INDEV 0
-#define CONFIG_V4L2_INDEV 0
-#define CONFIG_VFWCAP_INDEV 0
-#define CONFIG_XCBGRAB_INDEV 0
-#define CONFIG_LIBCDIO_INDEV 0
-#define CONFIG_LIBDC1394_INDEV 0
+#define CONFIG_VAPOURSYNTH_DEMUXER 0
 #define CONFIG_A64_MUXER 0
 #define CONFIG_AC3_MUXER 0
 #define CONFIG_ADTS_MUXER 0
@@ -2106,6 +2252,8 @@
 #define CONFIG_AIFF_MUXER 0
 #define CONFIG_AMR_MUXER 0
 #define CONFIG_APNG_MUXER 0
+#define CONFIG_APTX_MUXER 0
+#define CONFIG_APTX_HD_MUXER 0
 #define CONFIG_ASF_MUXER 0
 #define CONFIG_ASS_MUXER 0
 #define CONFIG_AST_MUXER 0
@@ -2113,9 +2261,12 @@
 #define CONFIG_AU_MUXER 0
 #define CONFIG_AVI_MUXER 0
 #define CONFIG_AVM2_MUXER 0
+#define CONFIG_AVS2_MUXER 0
 #define CONFIG_BIT_MUXER 0
 #define CONFIG_CAF_MUXER 0
 #define CONFIG_CAVSVIDEO_MUXER 0
+#define CONFIG_CODEC2_MUXER 0
+#define CONFIG_CODEC2RAW_MUXER 0
 #define CONFIG_CRC_MUXER 0
 #define CONFIG_DASH_MUXER 0
 #define CONFIG_DATA_MUXER 0
@@ -2126,9 +2277,9 @@
 #define CONFIG_DV_MUXER 0
 #define CONFIG_EAC3_MUXER 0
 #define CONFIG_F4V_MUXER 0
-#define CONFIG_FFM_MUXER 0
 #define CONFIG_FFMETADATA_MUXER 0
 #define CONFIG_FIFO_MUXER 0
+#define CONFIG_FIFO_TEST_MUXER 0
 #define CONFIG_FILMSTRIP_MUXER 0
 #define CONFIG_FITS_MUXER 0
 #define CONFIG_FLAC_MUXER 0
@@ -2221,7 +2372,9 @@
 #define CONFIG_RTP_MPEGTS_MUXER 0
 #define CONFIG_RTSP_MUXER 0
 #define CONFIG_SAP_MUXER 0
+#define CONFIG_SBC_MUXER 0
 #define CONFIG_SCC_MUXER 0
+#define CONFIG_SEGAFILM_MUXER 0
 #define CONFIG_SEGMENT_MUXER 0
 #define CONFIG_STREAM_SEGMENT_MUXER 0
 #define CONFIG_SINGLEJPEG_MUXER 0
@@ -2254,58 +2407,6 @@
 #define CONFIG_WV_MUXER 0
 #define CONFIG_YUV4MPEGPIPE_MUXER 0
 #define CONFIG_CHROMAPRINT_MUXER 0
-#define CONFIG_ALSA_OUTDEV 0
-#define CONFIG_CACA_OUTDEV 0
-#define CONFIG_DECKLINK_OUTDEV 0
-#define CONFIG_LIBNDI_NEWTEK_OUTDEV 0
-#define CONFIG_FBDEV_OUTDEV 0
-#define CONFIG_OPENGL_OUTDEV 0
-#define CONFIG_OSS_OUTDEV 0
-#define CONFIG_PULSE_OUTDEV 0
-#define CONFIG_SDL2_OUTDEV 0
-#define CONFIG_SNDIO_OUTDEV 0
-#define CONFIG_V4L2_OUTDEV 0
-#define CONFIG_XV_OUTDEV 0
-#define CONFIG_AAC_PARSER 1
-#define CONFIG_AAC_LATM_PARSER 0
-#define CONFIG_AC3_PARSER 0
-#define CONFIG_ADX_PARSER 0
-#define CONFIG_BMP_PARSER 0
-#define CONFIG_CAVSVIDEO_PARSER 0
-#define CONFIG_COOK_PARSER 0
-#define CONFIG_DCA_PARSER 0
-#define CONFIG_DIRAC_PARSER 0
-#define CONFIG_DNXHD_PARSER 0
-#define CONFIG_DPX_PARSER 0
-#define CONFIG_DVAUDIO_PARSER 0
-#define CONFIG_DVBSUB_PARSER 0
-#define CONFIG_DVDSUB_PARSER 0
-#define CONFIG_DVD_NAV_PARSER 0
-#define CONFIG_FLAC_PARSER 1
-#define CONFIG_G729_PARSER 0
-#define CONFIG_GSM_PARSER 1
-#define CONFIG_H261_PARSER 0
-#define CONFIG_H263_PARSER 1
-#define CONFIG_H264_PARSER 1
-#define CONFIG_HEVC_PARSER 0
-#define CONFIG_MJPEG_PARSER 0
-#define CONFIG_MLP_PARSER 0
-#define CONFIG_MPEG4VIDEO_PARSER 1
-#define CONFIG_MPEGAUDIO_PARSER 1
-#define CONFIG_MPEGVIDEO_PARSER 0
-#define CONFIG_OPUS_PARSER 1
-#define CONFIG_PNG_PARSER 0
-#define CONFIG_PNM_PARSER 0
-#define CONFIG_RV30_PARSER 0
-#define CONFIG_RV40_PARSER 0
-#define CONFIG_SIPR_PARSER 0
-#define CONFIG_TAK_PARSER 0
-#define CONFIG_VC1_PARSER 0
-#define CONFIG_VORBIS_PARSER 1
-#define CONFIG_VP3_PARSER 1
-#define CONFIG_VP8_PARSER 1
-#define CONFIG_VP9_PARSER 0
-#define CONFIG_XMA_PARSER 0
 #define CONFIG_ASYNC_PROTOCOL 0
 #define CONFIG_BLURAY_PROTOCOL 0
 #define CONFIG_CACHE_PROTOCOL 0
@@ -2339,10 +2440,7 @@
 #define CONFIG_SUBFILE_PROTOCOL 0
 #define CONFIG_TEE_PROTOCOL 0
 #define CONFIG_TCP_PROTOCOL 0
-#define CONFIG_TLS_GNUTLS_PROTOCOL 0
-#define CONFIG_TLS_SCHANNEL_PROTOCOL 0
-#define CONFIG_TLS_SECURETRANSPORT_PROTOCOL 0
-#define CONFIG_TLS_OPENSSL_PROTOCOL 0
+#define CONFIG_TLS_PROTOCOL 0
 #define CONFIG_UDP_PROTOCOL 0
 #define CONFIG_UDPLITE_PROTOCOL 0
 #define CONFIG_UNIX_PROTOCOL 0
@@ -2351,6 +2449,7 @@
 #define CONFIG_LIBRTMPS_PROTOCOL 0
 #define CONFIG_LIBRTMPT_PROTOCOL 0
 #define CONFIG_LIBRTMPTE_PROTOCOL 0
+#define CONFIG_LIBSRT_PROTOCOL 0
 #define CONFIG_LIBSSH_PROTOCOL 0
 #define CONFIG_LIBSMBCLIENT_PROTOCOL 0
 #endif /* FFMPEG_CONFIG_H */

diff --git a/fuchsia/config/max/arm64/libavutil/avconfig.h b/fuchsia/config/max/arm64/libavutil/avconfig.h
index f10aa61..c289fbb 100644
--- a/fuchsia/config/max/arm64/libavutil/avconfig.h
+++ b/fuchsia/config/max/arm64/libavutil/avconfig.h

@@ -1,4 +1,4 @@
-/* Generated by ffconf */
+/* Generated by ffmpeg configure */
 #ifndef AVUTIL_AVCONFIG_H
 #define AVUTIL_AVCONFIG_H
 #define AV_HAVE_BIGENDIAN 0

diff --git a/fuchsia/config/max/arm64/libavutil/ffversion.h b/fuchsia/config/max/arm64/libavutil/ffversion.h
index 0342cbb..b97c110 100644
--- a/fuchsia/config/max/arm64/libavutil/ffversion.h
+++ b/fuchsia/config/max/arm64/libavutil/ffversion.h

@@ -1,5 +1,5 @@
 /* Automatically generated by version.sh, do not manually edit! */
 #ifndef AVUTIL_FFVERSION_H
 #define AVUTIL_FFVERSION_H
-#define FFMPEG_VERSION "n3.4.2-1-g67e0ba6f22"
+#define FFMPEG_VERSION "N-92356-g750018e43a"
 #endif /* AVUTIL_FFVERSION_H */

diff --git a/fuchsia/config/max/x64/config.asm b/fuchsia/config/max/x64/config.asm
index 13457e0..8b69748 100644
--- a/fuchsia/config/max/x64/config.asm
+++ b/fuchsia/config/max/x64/config.asm

@@ -1,3 +1,4 @@
+; Automatically generated by configure - do not modify!
 %define ARCH_AARCH64 0
 %define ARCH_ALPHA 0
 %define ARCH_ARM 0
@@ -41,6 +42,7 @@
 %define HAVE_AMD3DNOWEXT 1
 %define HAVE_AVX 1
 %define HAVE_AVX2 1
+%define HAVE_AVX512 1
 %define HAVE_FMA3 1
 %define HAVE_FMA4 1
 %define HAVE_MMX 1
@@ -85,6 +87,7 @@
 %define HAVE_AMD3DNOWEXT_EXTERNAL 1
 %define HAVE_AVX_EXTERNAL 1
 %define HAVE_AVX2_EXTERNAL 1
+%define HAVE_AVX512_EXTERNAL 0
 %define HAVE_FMA3_EXTERNAL 1
 %define HAVE_FMA4_EXTERNAL 1
 %define HAVE_MMX_EXTERNAL 1
@@ -129,6 +132,7 @@
 %define HAVE_AMD3DNOWEXT_INLINE 1
 %define HAVE_AVX_INLINE 1
 %define HAVE_AVX2_INLINE 1
+%define HAVE_AVX512_INLINE 1
 %define HAVE_FMA3_INLINE 1
 %define HAVE_FMA4_INLINE 1
 %define HAVE_MMX_INLINE 1
@@ -158,36 +162,29 @@
 %define HAVE_FAST_64BIT 1
 %define HAVE_FAST_CLZ 1
 %define HAVE_FAST_CMOV 1
-%define HAVE_LOCAL_ALIGNED_8 1
-%define HAVE_LOCAL_ALIGNED_16 1
-%define HAVE_LOCAL_ALIGNED_32 1
+%define HAVE_LOCAL_ALIGNED 1
 %define HAVE_SIMD_ALIGN_16 1
 %define HAVE_SIMD_ALIGN_32 1
-%define HAVE_ATOMICS_GCC 1
-%define HAVE_ATOMICS_SUNCC 0
-%define HAVE_ATOMICS_WIN32 0
+%define HAVE_SIMD_ALIGN_64 1
 %define HAVE_ATOMIC_CAS_PTR 0
 %define HAVE_MACHINE_RW_BARRIER 0
 %define HAVE_MEMORYBARRIER 0
 %define HAVE_MM_EMPTY 1
 %define HAVE_RDTSC 0
-%define HAVE_SARESTART 1
 %define HAVE_SEM_TIMEDWAIT 1
 %define HAVE_SYNC_VAL_COMPARE_AND_SWAP 1
-%define HAVE_CABS 1
-%define HAVE_CEXP 1
+%define HAVE_CABS 0
+%define HAVE_CEXP 0
 %define HAVE_INLINE_ASM 1
 %define HAVE_SYMVER 0
 %define HAVE_X86ASM 1
 %define HAVE_BIGENDIAN 0
 %define HAVE_FAST_UNALIGNED 1
-%define HAVE_ALTIVEC_H 0
 %define HAVE_ARPA_INET_H 0
 %define HAVE_ASM_TYPES_H 1
 %define HAVE_CDIO_PARANOIA_H 0
 %define HAVE_CDIO_PARANOIA_PARANOIA_H 0
 %define HAVE_CUDA_H 0
-%define HAVE_D3D11_H 0
 %define HAVE_DISPATCH_DISPATCH_H 0
 %define HAVE_DEV_BKTR_IOCTL_BT848_H 0
 %define HAVE_DEV_BKTR_IOCTL_METEOR_H 0
@@ -196,27 +193,18 @@
 %define HAVE_DEV_VIDEO_METEOR_IOCTL_METEOR_H 0
 %define HAVE_DIRECT_H 0
 %define HAVE_DIRENT_H 1
-%define HAVE_DLFCN_H 1
 %define HAVE_DXGIDEBUG_H 0
 %define HAVE_DXVA_H 0
 %define HAVE_ES2_GL_H 0
 %define HAVE_GSM_H 0
 %define HAVE_IO_H 0
-%define HAVE_MACH_MACH_TIME_H 0
+%define HAVE_LINUX_PERF_EVENT_H 1
 %define HAVE_MACHINE_IOCTL_BT848_H 0
 %define HAVE_MACHINE_IOCTL_METEOR_H 0
 %define HAVE_MALLOC_H 1
 %define HAVE_OPENCV2_CORE_CORE_C_H 0
-%define HAVE_OPENJPEG_2_3_OPENJPEG_H 0
-%define HAVE_OPENJPEG_2_2_OPENJPEG_H 0
-%define HAVE_OPENJPEG_2_1_OPENJPEG_H 0
-%define HAVE_OPENJPEG_2_0_OPENJPEG_H 0
-%define HAVE_OPENJPEG_1_5_OPENJPEG_H 0
 %define HAVE_OPENGL_GL3_H 0
 %define HAVE_POLL_H 1
-%define HAVE_SOUNDCARD_H 0
-%define HAVE_STDATOMIC_H 1
-%define HAVE_SYS_MMAN_H 1
 %define HAVE_SYS_PARAM_H 1
 %define HAVE_SYS_RESOURCE_H 1
 %define HAVE_SYS_SELECT_H 1
@@ -260,17 +248,20 @@
 %define HAVE_SINF 1
 %define HAVE_TRUNC 1
 %define HAVE_TRUNCF 1
+%define HAVE_DOS_PATHS 0
+%define HAVE_LIBC_MSVCRT 0
+%define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
+%define HAVE_SECTION_DATA_REL_RO 1
+%define HAVE_THREADS 1
+%define HAVE_UWP 0
+%define HAVE_WINRT 0
 %define HAVE_ACCESS 1
 %define HAVE_ALIGNED_MALLOC 0
 %define HAVE_ARC4RANDOM 0
 %define HAVE_CLOCK_GETTIME 1
 %define HAVE_CLOSESOCKET 0
 %define HAVE_COMMANDLINETOARGVW 0
-%define HAVE_COTASKMEMFREE 0
-%define HAVE_CRYPTGENRANDOM 0
 %define HAVE_FCNTL 1
-%define HAVE_FLT_LIM 1
-%define HAVE_FORK 1
 %define HAVE_GETADDRINFO 0
 %define HAVE_GETHRTIME 0
 %define HAVE_GETOPT 1
@@ -285,9 +276,7 @@
 %define HAVE_GMTIME_R 1
 %define HAVE_INET_ATON 0
 %define HAVE_ISATTY 1
-%define HAVE_JACK_PORT_GET_LATENCY_RANGE 0
 %define HAVE_KBHIT 0
-%define HAVE_LOADLIBRARY 0
 %define HAVE_LOCALTIME_R 1
 %define HAVE_LSTAT 1
 %define HAVE_LZO1X_999_COMPRESS 0
@@ -302,6 +291,7 @@
 %define HAVE_POSIX_MEMALIGN 1
 %define HAVE_PTHREAD_CANCEL 1
 %define HAVE_SCHED_GETAFFINITY 1
+%define HAVE_SECITEMIMPORT 0
 %define HAVE_SETCONSOLETEXTATTRIBUTE 0
 %define HAVE_SETCONSOLECTRLHANDLER 0
 %define HAVE_SETMODE 0
@@ -314,16 +304,19 @@
 %define HAVE_UTGETOSTYPEFROMSTRING 0
 %define HAVE_VIRTUALALLOC 0
 %define HAVE_WGLGETPROCADDRESS 0
+%define HAVE_BCRYPT 0
+%define HAVE_VAAPI_DRM 0
+%define HAVE_VAAPI_X11 0
+%define HAVE_VDPAU_X11 0
 %define HAVE_PTHREADS 1
 %define HAVE_OS2THREADS 0
 %define HAVE_W32THREADS 0
+%define HAVE_AS_ARCH_DIRECTIVE 0
 %define HAVE_AS_DN_DIRECTIVE 0
 %define HAVE_AS_FPU_DIRECTIVE 0
 %define HAVE_AS_FUNC 0
 %define HAVE_AS_OBJECT_ARCH 0
 %define HAVE_ASM_MOD_Q 0
-%define HAVE_ATTRIBUTE_MAY_ALIAS 1
-%define HAVE_ATTRIBUTE_PACKED 1
 %define HAVE_BLOCKS_EXTENSION 0
 %define HAVE_EBP_AVAILABLE 1
 %define HAVE_EBX_AVAILABLE 1
@@ -340,7 +333,6 @@
 %define HAVE_VFP_ARGS 0
 %define HAVE_XFORM_ASM 0
 %define HAVE_XMM_CLOBBERS 1
-%define HAVE_CONDITION_VARIABLE_PTR 0
 %define HAVE_KCMVIDEOCODECTYPE_HEVC 0
 %define HAVE_SOCKLEN_T 0
 %define HAVE_STRUCT_ADDRINFO 0
@@ -356,22 +348,17 @@
 %define HAVE_STRUCT_SOCKADDR_STORAGE 0
 %define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1
 %define HAVE_STRUCT_V4L2_FRMIVALENUM_DISCRETE 1
-%define HAVE_ATOMICS_NATIVE 1
-%define HAVE_DOS_PATHS 0
-%define HAVE_LIBC_MSVCRT 0
 %define HAVE_MAKEINFO 1
 %define HAVE_MAKEINFO_HTML 1
-%define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
+%define HAVE_OPENCL_D3D11 0
+%define HAVE_OPENCL_DRM_ARM 0
+%define HAVE_OPENCL_DRM_BEIGNET 0
+%define HAVE_OPENCL_DXVA2 0
+%define HAVE_OPENCL_VAAPI_BEIGNET 0
+%define HAVE_OPENCL_VAAPI_INTEL_MEDIA 0
 %define HAVE_PERL 1
 %define HAVE_POD2MAN 1
-%define HAVE_SECTION_DATA_REL_RO 1
 %define HAVE_TEXI2HTML 0
-%define HAVE_THREADS 1
-%define HAVE_UWP 0
-%define HAVE_VAAPI_DRM 0
-%define HAVE_VAAPI_X11 0
-%define HAVE_VDPAU_X11 0
-%define HAVE_WINRT 0
 %define CONFIG_DOC 0
 %define CONFIG_HTMLPAGES 0
 %define CONFIG_MANPAGES 0
@@ -398,41 +385,31 @@
 %define CONFIG_SCALING_VIDEO_EXAMPLE 0
 %define CONFIG_TRANSCODE_AAC_EXAMPLE 0
 %define CONFIG_TRANSCODING_EXAMPLE 0
-%define CONFIG_ALSA 0
-%define CONFIG_APPKIT 0
-%define CONFIG_AVFOUNDATION 0
-%define CONFIG_BZLIB 0
-%define CONFIG_COREIMAGE 0
-%define CONFIG_ICONV 0
-%define CONFIG_JACK 0
-%define CONFIG_LIBXCB 1
-%define CONFIG_LIBXCB_SHM 1
-%define CONFIG_LIBXCB_SHAPE 0
-%define CONFIG_LIBXCB_XFIXES 0
-%define CONFIG_LZMA 1
-%define CONFIG_SCHANNEL 0
-%define CONFIG_SDL2 0
-%define CONFIG_SECURETRANSPORT 0
-%define CONFIG_SNDIO 0
-%define CONFIG_XLIB 0
-%define CONFIG_ZLIB 0
+%define CONFIG_VAAPI_ENCODE_EXAMPLE 0
+%define CONFIG_VAAPI_TRANSCODE_EXAMPLE 0
 %define CONFIG_AVISYNTH 0
 %define CONFIG_FREI0R 0
 %define CONFIG_LIBCDIO 0
+%define CONFIG_LIBDAVS2 0
 %define CONFIG_LIBRUBBERBAND 0
 %define CONFIG_LIBVIDSTAB 0
 %define CONFIG_LIBX264 0
 %define CONFIG_LIBX265 0
 %define CONFIG_LIBXAVS 0
+%define CONFIG_LIBXAVS2 0
 %define CONFIG_LIBXVID 0
 %define CONFIG_DECKLINK 0
 %define CONFIG_LIBNDI_NEWTEK 0
 %define CONFIG_LIBFDK_AAC 0
 %define CONFIG_OPENSSL 0
+%define CONFIG_LIBTLS 0
 %define CONFIG_GMP 0
+%define CONFIG_LIBLENSFUN 0
 %define CONFIG_LIBOPENCORE_AMRNB 0
 %define CONFIG_LIBOPENCORE_AMRWB 0
+%define CONFIG_LIBVMAF 0
 %define CONFIG_LIBVO_AMRWBENC 0
+%define CONFIG_MBEDTLS 0
 %define CONFIG_RKMPP 0
 %define CONFIG_LIBSMBCLIENT 0
 %define CONFIG_CHROMAPRINT 0
@@ -440,11 +417,13 @@
 %define CONFIG_GNUTLS 0
 %define CONFIG_JNI 0
 %define CONFIG_LADSPA 0
+%define CONFIG_LIBAOM 0
 %define CONFIG_LIBASS 0
 %define CONFIG_LIBBLURAY 0
 %define CONFIG_LIBBS2B 0
 %define CONFIG_LIBCACA 0
 %define CONFIG_LIBCELT 0
+%define CONFIG_LIBCODEC2 0
 %define CONFIG_LIBDC1394 0
 %define CONFIG_LIBDRM 0
 %define CONFIG_LIBFLITE 0
@@ -455,6 +434,8 @@
 %define CONFIG_LIBGSM 0
 %define CONFIG_LIBIEC61883 0
 %define CONFIG_LIBILBC 0
+%define CONFIG_LIBJACK 0
+%define CONFIG_LIBKLVANC 0
 %define CONFIG_LIBKVAZAAR 0
 %define CONFIG_LIBMODPLUG 0
 %define CONFIG_LIBMP3LAME 0
@@ -472,12 +453,13 @@
 %define CONFIG_LIBSNAPPY 0
 %define CONFIG_LIBSOXR 0
 %define CONFIG_LIBSPEEX 0
+%define CONFIG_LIBSRT 0
 %define CONFIG_LIBSSH 0
+%define CONFIG_LIBTENSORFLOW 0
 %define CONFIG_LIBTESSERACT 0
 %define CONFIG_LIBTHEORA 0
 %define CONFIG_LIBTWOLAME 0
 %define CONFIG_LIBV4L2 0
-%define CONFIG_LIBVMAF 0
 %define CONFIG_LIBVORBIS 0
 %define CONFIG_LIBVPX 0
 %define CONFIG_LIBWAVPACK 0
@@ -486,28 +468,49 @@
 %define CONFIG_LIBZIMG 0
 %define CONFIG_LIBZMQ 0
 %define CONFIG_LIBZVBI 0
+%define CONFIG_LV2 0
 %define CONFIG_MEDIACODEC 0
 %define CONFIG_OPENAL 0
-%define CONFIG_OPENCL 0
 %define CONFIG_OPENGL 0
+%define CONFIG_VAPOURSYNTH 0
+%define CONFIG_ALSA 1
+%define CONFIG_APPKIT 0
+%define CONFIG_AVFOUNDATION 0
+%define CONFIG_BZLIB 0
+%define CONFIG_COREIMAGE 0
+%define CONFIG_ICONV 0
+%define CONFIG_LIBXCB 0
+%define CONFIG_LIBXCB_SHM 0
+%define CONFIG_LIBXCB_SHAPE 0
+%define CONFIG_LIBXCB_XFIXES 0
+%define CONFIG_LZMA 0
+%define CONFIG_SCHANNEL 0
+%define CONFIG_SDL2 0
+%define CONFIG_SECURETRANSPORT 0
+%define CONFIG_SNDIO 0
+%define CONFIG_XLIB 0
+%define CONFIG_ZLIB 0
+%define CONFIG_CUDA_SDK 0
+%define CONFIG_LIBNPP 0
+%define CONFIG_LIBMFX 0
+%define CONFIG_MMAL 0
+%define CONFIG_OMX 0
+%define CONFIG_OPENCL 0
+%define CONFIG_AMF 0
 %define CONFIG_AUDIOTOOLBOX 0
 %define CONFIG_CRYSTALHD 0
 %define CONFIG_CUDA 0
 %define CONFIG_CUVID 0
 %define CONFIG_D3D11VA 0
 %define CONFIG_DXVA2 0
+%define CONFIG_FFNVCODEC 0
+%define CONFIG_NVDEC 0
 %define CONFIG_NVENC 0
 %define CONFIG_VAAPI 0
-%define CONFIG_VDA 0
 %define CONFIG_VDPAU 0
 %define CONFIG_VIDEOTOOLBOX 0
 %define CONFIG_V4L2_M2M 0
 %define CONFIG_XVMC 0
-%define CONFIG_CUDA_SDK 0
-%define CONFIG_LIBNPP 0
-%define CONFIG_LIBMFX 0
-%define CONFIG_MMAL 0
-%define CONFIG_OMX 0
 %define CONFIG_FTRAPV 0
 %define CONFIG_GRAY 0
 %define CONFIG_HARDCODED_TABLES 0
@@ -521,18 +524,17 @@
 %define CONFIG_GPL 0
 %define CONFIG_NONFREE 0
 %define CONFIG_VERSION3 0
-%define CONFIG_AVCODEC 1
 %define CONFIG_AVDEVICE 0
 %define CONFIG_AVFILTER 0
+%define CONFIG_SWSCALE 0
+%define CONFIG_POSTPROC 0
 %define CONFIG_AVFORMAT 1
+%define CONFIG_AVCODEC 1
+%define CONFIG_SWRESAMPLE 0
 %define CONFIG_AVRESAMPLE 0
 %define CONFIG_AVUTIL 1
-%define CONFIG_POSTPROC 0
-%define CONFIG_SWRESAMPLE 0
-%define CONFIG_SWSCALE 0
 %define CONFIG_FFPLAY 0
 %define CONFIG_FFPROBE 0
-%define CONFIG_FFSERVER 0
 %define CONFIG_FFMPEG 0
 %define CONFIG_DCT 1
 %define CONFIG_DWT 0
@@ -569,12 +571,21 @@
 %define CONFIG_PROTOCOLS 0
 %define CONFIG_AANDCTTABLES 0
 %define CONFIG_AC3DSP 0
-%define CONFIG_AUDIO_FRAME_QUEUE 0
+%define CONFIG_ADTS_HEADER 1
+%define CONFIG_AUDIO_FRAME_QUEUE 1
 %define CONFIG_AUDIODSP 0
 %define CONFIG_BLOCKDSP 1
 %define CONFIG_BSWAPDSP 0
 %define CONFIG_CABAC 1
+%define CONFIG_CBS 0
+%define CONFIG_CBS_AV1 0
+%define CONFIG_CBS_H264 0
+%define CONFIG_CBS_H265 0
+%define CONFIG_CBS_JPEG 0
+%define CONFIG_CBS_MPEG2 0
+%define CONFIG_CBS_VP9 0
 %define CONFIG_DIRAC_PARSE 1
+%define CONFIG_DNN 0
 %define CONFIG_DVPROFILE 0
 %define CONFIG_EXIF 1
 %define CONFIG_FAANDCT 0
@@ -624,6 +635,7 @@
 %define CONFIG_QSV 0
 %define CONFIG_QSVDEC 0
 %define CONFIG_QSVENC 0
+%define CONFIG_QSVVPP 0
 %define CONFIG_RANGECODER 0
 %define CONFIG_RIFFDEC 1
 %define CONFIG_RIFFENC 0
@@ -647,25 +659,35 @@
 %define CONFIG_WMA_FREQS 0
 %define CONFIG_WMV2DSP 0
 %define CONFIG_AAC_ADTSTOASC_BSF 0
+%define CONFIG_AV1_METADATA_BSF 0
 %define CONFIG_CHOMP_BSF 0
 %define CONFIG_DUMP_EXTRADATA_BSF 0
 %define CONFIG_DCA_CORE_BSF 0
+%define CONFIG_EAC3_CORE_BSF 0
 %define CONFIG_EXTRACT_EXTRADATA_BSF 0
+%define CONFIG_FILTER_UNITS_BSF 0
+%define CONFIG_H264_METADATA_BSF 0
 %define CONFIG_H264_MP4TOANNEXB_BSF 0
+%define CONFIG_H264_REDUNDANT_PPS_BSF 0
+%define CONFIG_HAPQA_EXTRACT_BSF 0
+%define CONFIG_HEVC_METADATA_BSF 0
 %define CONFIG_HEVC_MP4TOANNEXB_BSF 0
 %define CONFIG_IMX_DUMP_HEADER_BSF 0
 %define CONFIG_MJPEG2JPEG_BSF 0
 %define CONFIG_MJPEGA_DUMP_HEADER_BSF 0
 %define CONFIG_MP3_HEADER_DECOMPRESS_BSF 0
+%define CONFIG_MPEG2_METADATA_BSF 0
 %define CONFIG_MPEG4_UNPACK_BFRAMES_BSF 0
 %define CONFIG_MOV2TEXTSUB_BSF 0
 %define CONFIG_NOISE_BSF 0
 %define CONFIG_NULL_BSF 1
 %define CONFIG_REMOVE_EXTRADATA_BSF 0
 %define CONFIG_TEXT2MOVSUB_BSF 0
+%define CONFIG_TRACE_HEADERS_BSF 0
+%define CONFIG_VP9_METADATA_BSF 0
 %define CONFIG_VP9_RAW_REORDER_BSF 0
 %define CONFIG_VP9_SUPERFRAME_BSF 0
-%define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 0
+%define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 1
 %define CONFIG_AASC_DECODER 0
 %define CONFIG_AIC_DECODER 0
 %define CONFIG_ALIAS_PIX_DECODER 0
@@ -685,6 +707,7 @@
 %define CONFIG_BETHSOFTVID_DECODER 0
 %define CONFIG_BFI_DECODER 0
 %define CONFIG_BINK_DECODER 0
+%define CONFIG_BITPACKED_DECODER 0
 %define CONFIG_BMP_DECODER 0
 %define CONFIG_BMV_VIDEO_DECODER 0
 %define CONFIG_BRENDER_PIX_DECODER 0
@@ -750,8 +773,6 @@
 %define CONFIG_H264_MMAL_DECODER 0
 %define CONFIG_H264_QSV_DECODER 0
 %define CONFIG_H264_RKMPP_DECODER 0
-%define CONFIG_H264_VDA_DECODER 0
-%define CONFIG_H264_VDPAU_DECODER 0
 %define CONFIG_HAP_DECODER 0
 %define CONFIG_HEVC_DECODER 0
 %define CONFIG_HEVC_QSV_DECODER 0
@@ -763,6 +784,7 @@
 %define CONFIG_HUFFYUV_DECODER 0
 %define CONFIG_IDCIN_DECODER 0
 %define CONFIG_IFF_ILBM_DECODER 0
+%define CONFIG_IMM4_DECODER 0
 %define CONFIG_INDEO2_DECODER 0
 %define CONFIG_INDEO3_DECODER 0
 %define CONFIG_INDEO4_DECODER 0
@@ -783,17 +805,13 @@
 %define CONFIG_MJPEGB_DECODER 0
 %define CONFIG_MMVIDEO_DECODER 0
 %define CONFIG_MOTIONPIXELS_DECODER 0
-%define CONFIG_MPEG_XVMC_DECODER 0
 %define CONFIG_MPEG1VIDEO_DECODER 0
 %define CONFIG_MPEG2VIDEO_DECODER 0
 %define CONFIG_MPEG4_DECODER 1
 %define CONFIG_MPEG4_CRYSTALHD_DECODER 0
 %define CONFIG_MPEG4_V4L2M2M_DECODER 0
 %define CONFIG_MPEG4_MMAL_DECODER 0
-%define CONFIG_MPEG4_VDPAU_DECODER 0
 %define CONFIG_MPEGVIDEO_DECODER 0
-%define CONFIG_MPEG_VDPAU_DECODER 0
-%define CONFIG_MPEG1_VDPAU_DECODER 0
 %define CONFIG_MPEG1_V4L2M2M_DECODER 0
 %define CONFIG_MPEG2_MMAL_DECODER 0
 %define CONFIG_MPEG2_CRYSTALHD_DECODER 0
@@ -814,6 +832,7 @@
 %define CONFIG_MTS2_DECODER 0
 %define CONFIG_MVC1_DECODER 0
 %define CONFIG_MVC2_DECODER 0
+%define CONFIG_MWSC_DECODER 0
 %define CONFIG_MXPEG_DECODER 0
 %define CONFIG_NUV_DECODER 0
 %define CONFIG_PAF_VIDEO_DECODER 0
@@ -827,7 +846,7 @@
 %define CONFIG_PNG_DECODER 0
 %define CONFIG_PPM_DECODER 0
 %define CONFIG_PRORES_DECODER 0
-%define CONFIG_PRORES_LGPL_DECODER 0
+%define CONFIG_PROSUMER_DECODER 0
 %define CONFIG_PSD_DECODER 0
 %define CONFIG_PTX_DECODER 0
 %define CONFIG_QDRAW_DECODER 0
@@ -835,6 +854,7 @@
 %define CONFIG_QTRLE_DECODER 0
 %define CONFIG_R10K_DECODER 0
 %define CONFIG_R210_DECODER 0
+%define CONFIG_RASC_DECODER 0
 %define CONFIG_RAWVIDEO_DECODER 0
 %define CONFIG_RL2_DECODER 0
 %define CONFIG_ROQ_DECODER 0
@@ -887,7 +907,6 @@
 %define CONFIG_VBLE_DECODER 0
 %define CONFIG_VC1_DECODER 0
 %define CONFIG_VC1_CRYSTALHD_DECODER 0
-%define CONFIG_VC1_VDPAU_DECODER 0
 %define CONFIG_VC1IMAGE_DECODER 0
 %define CONFIG_VC1_MMAL_DECODER 0
 %define CONFIG_VC1_QSV_DECODER 0
@@ -908,14 +927,13 @@
 %define CONFIG_VP9_RKMPP_DECODER 0
 %define CONFIG_VP9_V4L2M2M_DECODER 0
 %define CONFIG_VQA_DECODER 0
-%define CONFIG_BITPACKED_DECODER 0
 %define CONFIG_WEBP_DECODER 0
+%define CONFIG_WCMV_DECODER 0
 %define CONFIG_WRAPPED_AVFRAME_DECODER 0
 %define CONFIG_WMV1_DECODER 0
 %define CONFIG_WMV2_DECODER 0
 %define CONFIG_WMV3_DECODER 0
 %define CONFIG_WMV3_CRYSTALHD_DECODER 0
-%define CONFIG_WMV3_VDPAU_DECODER 0
 %define CONFIG_WMV3IMAGE_DECODER 0
 %define CONFIG_WNV1_DECODER 0
 %define CONFIG_XAN_WC3_DECODER 0
@@ -943,11 +961,14 @@
 %define CONFIG_AMRNB_DECODER 1
 %define CONFIG_AMRWB_DECODER 1
 %define CONFIG_APE_DECODER 0
+%define CONFIG_APTX_DECODER 1
+%define CONFIG_APTX_HD_DECODER 0
 %define CONFIG_ATRAC1_DECODER 0
 %define CONFIG_ATRAC3_DECODER 0
 %define CONFIG_ATRAC3AL_DECODER 0
 %define CONFIG_ATRAC3P_DECODER 0
 %define CONFIG_ATRAC3PAL_DECODER 0
+%define CONFIG_ATRAC9_DECODER 0
 %define CONFIG_BINKAUDIO_DCT_DECODER 0
 %define CONFIG_BINKAUDIO_RDFT_DECODER 0
 %define CONFIG_BMV_AUDIO_DECODER 0
@@ -970,6 +991,7 @@
 %define CONFIG_GSM_DECODER 0
 %define CONFIG_GSM_MS_DECODER 1
 %define CONFIG_IAC_DECODER 0
+%define CONFIG_ILBC_DECODER 0
 %define CONFIG_IMC_DECODER 0
 %define CONFIG_INTERPLAY_ACM_DECODER 0
 %define CONFIG_MACE3_DECODER 0
@@ -980,12 +1002,12 @@
 %define CONFIG_MP1FLOAT_DECODER 0
 %define CONFIG_MP2_DECODER 0
 %define CONFIG_MP2FLOAT_DECODER 0
-%define CONFIG_MP3_DECODER 1
 %define CONFIG_MP3FLOAT_DECODER 0
-%define CONFIG_MP3ADU_DECODER 0
+%define CONFIG_MP3_DECODER 1
 %define CONFIG_MP3ADUFLOAT_DECODER 0
-%define CONFIG_MP3ON4_DECODER 0
+%define CONFIG_MP3ADU_DECODER 0
 %define CONFIG_MP3ON4FLOAT_DECODER 0
+%define CONFIG_MP3ON4_DECODER 0
 %define CONFIG_MPC7_DECODER 0
 %define CONFIG_MPC8_DECODER 0
 %define CONFIG_NELLYMOSER_DECODER 0
@@ -998,6 +1020,7 @@
 %define CONFIG_RA_144_DECODER 0
 %define CONFIG_RA_288_DECODER 0
 %define CONFIG_RALF_DECODER 0
+%define CONFIG_SBC_DECODER 1
 %define CONFIG_SHORTEN_DECODER 0
 %define CONFIG_SIPR_DECODER 0
 %define CONFIG_SMACKAUD_DECODER 0
@@ -1135,7 +1158,10 @@
 %define CONFIG_PCM_MULAW_AT_DECODER 0
 %define CONFIG_QDMC_AT_DECODER 0
 %define CONFIG_QDM2_AT_DECODER 0
+%define CONFIG_LIBAOM_AV1_DECODER 0
 %define CONFIG_LIBCELT_DECODER 0
+%define CONFIG_LIBCODEC2_DECODER 0
+%define CONFIG_LIBDAVS2_DECODER 0
 %define CONFIG_LIBFDK_AAC_DECODER 0
 %define CONFIG_LIBGSM_DECODER 0
 %define CONFIG_LIBGSM_MS_DECODER 0
@@ -1168,6 +1194,748 @@
 %define CONFIG_VP8_QSV_DECODER 0
 %define CONFIG_VP9_CUVID_DECODER 0
 %define CONFIG_VP9_MEDIACODEC_DECODER 0
+%define CONFIG_A64MULTI_ENCODER 0
+%define CONFIG_A64MULTI5_ENCODER 0
+%define CONFIG_ALIAS_PIX_ENCODER 0
+%define CONFIG_AMV_ENCODER 0
+%define CONFIG_APNG_ENCODER 0
+%define CONFIG_ASV1_ENCODER 0
+%define CONFIG_ASV2_ENCODER 0
+%define CONFIG_AVRP_ENCODER 0
+%define CONFIG_AVUI_ENCODER 0
+%define CONFIG_AYUV_ENCODER 0
+%define CONFIG_BMP_ENCODER 0
+%define CONFIG_CINEPAK_ENCODER 0
+%define CONFIG_CLJR_ENCODER 0
+%define CONFIG_COMFORTNOISE_ENCODER 0
+%define CONFIG_DNXHD_ENCODER 0
+%define CONFIG_DPX_ENCODER 0
+%define CONFIG_DVVIDEO_ENCODER 0
+%define CONFIG_FFV1_ENCODER 0
+%define CONFIG_FFVHUFF_ENCODER 0
+%define CONFIG_FITS_ENCODER 0
+%define CONFIG_FLASHSV_ENCODER 0
+%define CONFIG_FLASHSV2_ENCODER 0
+%define CONFIG_FLV_ENCODER 0
+%define CONFIG_GIF_ENCODER 0
+%define CONFIG_H261_ENCODER 0
+%define CONFIG_H263_ENCODER 0
+%define CONFIG_H263P_ENCODER 0
+%define CONFIG_HAP_ENCODER 0
+%define CONFIG_HUFFYUV_ENCODER 0
+%define CONFIG_JPEG2000_ENCODER 0
+%define CONFIG_JPEGLS_ENCODER 0
+%define CONFIG_LJPEG_ENCODER 0
+%define CONFIG_MAGICYUV_ENCODER 0
+%define CONFIG_MJPEG_ENCODER 0
+%define CONFIG_MPEG1VIDEO_ENCODER 0
+%define CONFIG_MPEG2VIDEO_ENCODER 0
+%define CONFIG_MPEG4_ENCODER 0
+%define CONFIG_MSMPEG4V2_ENCODER 0
+%define CONFIG_MSMPEG4V3_ENCODER 0
+%define CONFIG_MSVIDEO1_ENCODER 0
+%define CONFIG_PAM_ENCODER 0
+%define CONFIG_PBM_ENCODER 0
+%define CONFIG_PCX_ENCODER 0
+%define CONFIG_PGM_ENCODER 0
+%define CONFIG_PGMYUV_ENCODER 0
+%define CONFIG_PNG_ENCODER 0
+%define CONFIG_PPM_ENCODER 0
+%define CONFIG_PRORES_ENCODER 0
+%define CONFIG_PRORES_AW_ENCODER 0
+%define CONFIG_PRORES_KS_ENCODER 0
+%define CONFIG_QTRLE_ENCODER 0
+%define CONFIG_R10K_ENCODER 0
+%define CONFIG_R210_ENCODER 0
+%define CONFIG_RAWVIDEO_ENCODER 0
+%define CONFIG_ROQ_ENCODER 0
+%define CONFIG_RV10_ENCODER 0
+%define CONFIG_RV20_ENCODER 0
+%define CONFIG_S302M_ENCODER 0
+%define CONFIG_SGI_ENCODER 0
+%define CONFIG_SNOW_ENCODER 0
+%define CONFIG_SUNRAST_ENCODER 0
+%define CONFIG_SVQ1_ENCODER 0
+%define CONFIG_TARGA_ENCODER 0
+%define CONFIG_TIFF_ENCODER 0
+%define CONFIG_UTVIDEO_ENCODER 0
+%define CONFIG_V210_ENCODER 0
+%define CONFIG_V308_ENCODER 0
+%define CONFIG_V408_ENCODER 0
+%define CONFIG_V410_ENCODER 0
+%define CONFIG_VC2_ENCODER 0
+%define CONFIG_WRAPPED_AVFRAME_ENCODER 0
+%define CONFIG_WMV1_ENCODER 0
+%define CONFIG_WMV2_ENCODER 0
+%define CONFIG_XBM_ENCODER 0
+%define CONFIG_XFACE_ENCODER 0
+%define CONFIG_XWD_ENCODER 0
+%define CONFIG_Y41P_ENCODER 0
+%define CONFIG_YUV4_ENCODER 0
+%define CONFIG_ZLIB_ENCODER 0
+%define CONFIG_ZMBV_ENCODER 0
+%define CONFIG_AAC_ENCODER 0
+%define CONFIG_AC3_ENCODER 0
+%define CONFIG_AC3_FIXED_ENCODER 0
+%define CONFIG_ALAC_ENCODER 0
+%define CONFIG_APTX_ENCODER 0
+%define CONFIG_APTX_HD_ENCODER 0
+%define CONFIG_DCA_ENCODER 0
+%define CONFIG_EAC3_ENCODER 0
+%define CONFIG_FLAC_ENCODER 0
+%define CONFIG_G723_1_ENCODER 0
+%define CONFIG_MLP_ENCODER 0
+%define CONFIG_MP2_ENCODER 0
+%define CONFIG_MP2FIXED_ENCODER 0
+%define CONFIG_NELLYMOSER_ENCODER 0
+%define CONFIG_OPUS_ENCODER 0
+%define CONFIG_RA_144_ENCODER 0
+%define CONFIG_SBC_ENCODER 0
+%define CONFIG_SONIC_ENCODER 0
+%define CONFIG_SONIC_LS_ENCODER 0
+%define CONFIG_TRUEHD_ENCODER 0
+%define CONFIG_TTA_ENCODER 0
+%define CONFIG_VORBIS_ENCODER 0
+%define CONFIG_WAVPACK_ENCODER 0
+%define CONFIG_WMAV1_ENCODER 0
+%define CONFIG_WMAV2_ENCODER 0
+%define CONFIG_PCM_ALAW_ENCODER 0
+%define CONFIG_PCM_F32BE_ENCODER 0
+%define CONFIG_PCM_F32LE_ENCODER 0
+%define CONFIG_PCM_F64BE_ENCODER 0
+%define CONFIG_PCM_F64LE_ENCODER 0
+%define CONFIG_PCM_MULAW_ENCODER 0
+%define CONFIG_PCM_S8_ENCODER 0
+%define CONFIG_PCM_S8_PLANAR_ENCODER 0
+%define CONFIG_PCM_S16BE_ENCODER 0
+%define CONFIG_PCM_S16BE_PLANAR_ENCODER 0
+%define CONFIG_PCM_S16LE_ENCODER 0
+%define CONFIG_PCM_S16LE_PLANAR_ENCODER 0
+%define CONFIG_PCM_S24BE_ENCODER 0
+%define CONFIG_PCM_S24DAUD_ENCODER 0
+%define CONFIG_PCM_S24LE_ENCODER 0
+%define CONFIG_PCM_S24LE_PLANAR_ENCODER 0
+%define CONFIG_PCM_S32BE_ENCODER 0
+%define CONFIG_PCM_S32LE_ENCODER 0
+%define CONFIG_PCM_S32LE_PLANAR_ENCODER 0
+%define CONFIG_PCM_S64BE_ENCODER 0
+%define CONFIG_PCM_S64LE_ENCODER 0
+%define CONFIG_PCM_U8_ENCODER 0
+%define CONFIG_PCM_U16BE_ENCODER 0
+%define CONFIG_PCM_U16LE_ENCODER 0
+%define CONFIG_PCM_U24BE_ENCODER 0
+%define CONFIG_PCM_U24LE_ENCODER 0
+%define CONFIG_PCM_U32BE_ENCODER 0
+%define CONFIG_PCM_U32LE_ENCODER 0
+%define CONFIG_ROQ_DPCM_ENCODER 0
+%define CONFIG_ADPCM_ADX_ENCODER 0
+%define CONFIG_ADPCM_G722_ENCODER 0
+%define CONFIG_ADPCM_G726_ENCODER 0
+%define CONFIG_ADPCM_G726LE_ENCODER 0
+%define CONFIG_ADPCM_IMA_QT_ENCODER 0
+%define CONFIG_ADPCM_IMA_WAV_ENCODER 0
+%define CONFIG_ADPCM_MS_ENCODER 0
+%define CONFIG_ADPCM_SWF_ENCODER 0
+%define CONFIG_ADPCM_YAMAHA_ENCODER 0
+%define CONFIG_SSA_ENCODER 0
+%define CONFIG_ASS_ENCODER 0
+%define CONFIG_DVBSUB_ENCODER 0
+%define CONFIG_DVDSUB_ENCODER 0
+%define CONFIG_MOVTEXT_ENCODER 0
+%define CONFIG_SRT_ENCODER 0
+%define CONFIG_SUBRIP_ENCODER 0
+%define CONFIG_TEXT_ENCODER 0
+%define CONFIG_WEBVTT_ENCODER 0
+%define CONFIG_XSUB_ENCODER 0
+%define CONFIG_AAC_AT_ENCODER 0
+%define CONFIG_ALAC_AT_ENCODER 0
+%define CONFIG_ILBC_AT_ENCODER 0
+%define CONFIG_PCM_ALAW_AT_ENCODER 0
+%define CONFIG_PCM_MULAW_AT_ENCODER 0
+%define CONFIG_LIBAOM_AV1_ENCODER 0
+%define CONFIG_LIBCODEC2_ENCODER 0
+%define CONFIG_LIBFDK_AAC_ENCODER 0
+%define CONFIG_LIBGSM_ENCODER 0
+%define CONFIG_LIBGSM_MS_ENCODER 0
+%define CONFIG_LIBILBC_ENCODER 0
+%define CONFIG_LIBMP3LAME_ENCODER 0
+%define CONFIG_LIBOPENCORE_AMRNB_ENCODER 0
+%define CONFIG_LIBOPENJPEG_ENCODER 0
+%define CONFIG_LIBOPUS_ENCODER 0
+%define CONFIG_LIBSHINE_ENCODER 0
+%define CONFIG_LIBSPEEX_ENCODER 0
+%define CONFIG_LIBTHEORA_ENCODER 0
+%define CONFIG_LIBTWOLAME_ENCODER 0
+%define CONFIG_LIBVO_AMRWBENC_ENCODER 0
+%define CONFIG_LIBVORBIS_ENCODER 0
+%define CONFIG_LIBVPX_VP8_ENCODER 0
+%define CONFIG_LIBVPX_VP9_ENCODER 0
+%define CONFIG_LIBWAVPACK_ENCODER 0
+%define CONFIG_LIBWEBP_ANIM_ENCODER 0
+%define CONFIG_LIBWEBP_ENCODER 0
+%define CONFIG_LIBX262_ENCODER 0
+%define CONFIG_LIBX264_ENCODER 0
+%define CONFIG_LIBX264RGB_ENCODER 0
+%define CONFIG_LIBX265_ENCODER 0
+%define CONFIG_LIBXAVS_ENCODER 0
+%define CONFIG_LIBXAVS2_ENCODER 0
+%define CONFIG_LIBXVID_ENCODER 0
+%define CONFIG_H263_V4L2M2M_ENCODER 0
+%define CONFIG_LIBOPENH264_ENCODER 0
+%define CONFIG_H264_AMF_ENCODER 0
+%define CONFIG_H264_NVENC_ENCODER 0
+%define CONFIG_H264_OMX_ENCODER 0
+%define CONFIG_H264_QSV_ENCODER 0
+%define CONFIG_H264_V4L2M2M_ENCODER 0
+%define CONFIG_H264_VAAPI_ENCODER 0
+%define CONFIG_H264_VIDEOTOOLBOX_ENCODER 0
+%define CONFIG_NVENC_ENCODER 0
+%define CONFIG_NVENC_H264_ENCODER 0
+%define CONFIG_NVENC_HEVC_ENCODER 0
+%define CONFIG_HEVC_AMF_ENCODER 0
+%define CONFIG_HEVC_NVENC_ENCODER 0
+%define CONFIG_HEVC_QSV_ENCODER 0
+%define CONFIG_HEVC_V4L2M2M_ENCODER 0
+%define CONFIG_HEVC_VAAPI_ENCODER 0
+%define CONFIG_HEVC_VIDEOTOOLBOX_ENCODER 0
+%define CONFIG_LIBKVAZAAR_ENCODER 0
+%define CONFIG_MJPEG_QSV_ENCODER 0
+%define CONFIG_MJPEG_VAAPI_ENCODER 0
+%define CONFIG_MPEG2_QSV_ENCODER 0
+%define CONFIG_MPEG2_VAAPI_ENCODER 0
+%define CONFIG_MPEG4_V4L2M2M_ENCODER 0
+%define CONFIG_VP8_V4L2M2M_ENCODER 0
+%define CONFIG_VP8_VAAPI_ENCODER 0
+%define CONFIG_VP9_VAAPI_ENCODER 0
+%define CONFIG_H263_VAAPI_HWACCEL 0
+%define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0
+%define CONFIG_H264_D3D11VA_HWACCEL 0
+%define CONFIG_H264_D3D11VA2_HWACCEL 0
+%define CONFIG_H264_DXVA2_HWACCEL 0
+%define CONFIG_H264_NVDEC_HWACCEL 0
+%define CONFIG_H264_VAAPI_HWACCEL 0
+%define CONFIG_H264_VDPAU_HWACCEL 0
+%define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0
+%define CONFIG_HEVC_D3D11VA_HWACCEL 0
+%define CONFIG_HEVC_D3D11VA2_HWACCEL 0
+%define CONFIG_HEVC_DXVA2_HWACCEL 0
+%define CONFIG_HEVC_NVDEC_HWACCEL 0
+%define CONFIG_HEVC_VAAPI_HWACCEL 0
+%define CONFIG_HEVC_VDPAU_HWACCEL 0
+%define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0
+%define CONFIG_MJPEG_NVDEC_HWACCEL 0
+%define CONFIG_MJPEG_VAAPI_HWACCEL 0
+%define CONFIG_MPEG1_NVDEC_HWACCEL 0
+%define CONFIG_MPEG1_VDPAU_HWACCEL 0
+%define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0
+%define CONFIG_MPEG1_XVMC_HWACCEL 0
+%define CONFIG_MPEG2_D3D11VA_HWACCEL 0
+%define CONFIG_MPEG2_D3D11VA2_HWACCEL 0
+%define CONFIG_MPEG2_NVDEC_HWACCEL 0
+%define CONFIG_MPEG2_DXVA2_HWACCEL 0
+%define CONFIG_MPEG2_VAAPI_HWACCEL 0
+%define CONFIG_MPEG2_VDPAU_HWACCEL 0
+%define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0
+%define CONFIG_MPEG2_XVMC_HWACCEL 0
+%define CONFIG_MPEG4_NVDEC_HWACCEL 0
+%define CONFIG_MPEG4_VAAPI_HWACCEL 0
+%define CONFIG_MPEG4_VDPAU_HWACCEL 0
+%define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0
+%define CONFIG_VC1_D3D11VA_HWACCEL 0
+%define CONFIG_VC1_D3D11VA2_HWACCEL 0
+%define CONFIG_VC1_DXVA2_HWACCEL 0
+%define CONFIG_VC1_NVDEC_HWACCEL 0
+%define CONFIG_VC1_VAAPI_HWACCEL 0
+%define CONFIG_VC1_VDPAU_HWACCEL 0
+%define CONFIG_VP8_NVDEC_HWACCEL 0
+%define CONFIG_VP8_VAAPI_HWACCEL 0
+%define CONFIG_VP9_D3D11VA_HWACCEL 0
+%define CONFIG_VP9_D3D11VA2_HWACCEL 0
+%define CONFIG_VP9_DXVA2_HWACCEL 0
+%define CONFIG_VP9_NVDEC_HWACCEL 0
+%define CONFIG_VP9_VAAPI_HWACCEL 0
+%define CONFIG_WMV3_D3D11VA_HWACCEL 0
+%define CONFIG_WMV3_D3D11VA2_HWACCEL 0
+%define CONFIG_WMV3_DXVA2_HWACCEL 0
+%define CONFIG_WMV3_NVDEC_HWACCEL 0
+%define CONFIG_WMV3_VAAPI_HWACCEL 0
+%define CONFIG_WMV3_VDPAU_HWACCEL 0
+%define CONFIG_AAC_PARSER 1
+%define CONFIG_AAC_LATM_PARSER 0
+%define CONFIG_AC3_PARSER 0
+%define CONFIG_ADX_PARSER 0
+%define CONFIG_AV1_PARSER 0
+%define CONFIG_AVS2_PARSER 0
+%define CONFIG_BMP_PARSER 0
+%define CONFIG_CAVSVIDEO_PARSER 0
+%define CONFIG_COOK_PARSER 0
+%define CONFIG_DCA_PARSER 0
+%define CONFIG_DIRAC_PARSER 0
+%define CONFIG_DNXHD_PARSER 0
+%define CONFIG_DPX_PARSER 0
+%define CONFIG_DVAUDIO_PARSER 0
+%define CONFIG_DVBSUB_PARSER 0
+%define CONFIG_DVDSUB_PARSER 0
+%define CONFIG_DVD_NAV_PARSER 0
+%define CONFIG_FLAC_PARSER 1
+%define CONFIG_G729_PARSER 0
+%define CONFIG_GSM_PARSER 1
+%define CONFIG_H261_PARSER 0
+%define CONFIG_H263_PARSER 1
+%define CONFIG_H264_PARSER 1
+%define CONFIG_HEVC_PARSER 0
+%define CONFIG_MJPEG_PARSER 0
+%define CONFIG_MLP_PARSER 0
+%define CONFIG_MPEG4VIDEO_PARSER 1
+%define CONFIG_MPEGAUDIO_PARSER 1
+%define CONFIG_MPEGVIDEO_PARSER 0
+%define CONFIG_OPUS_PARSER 1
+%define CONFIG_PNG_PARSER 0
+%define CONFIG_PNM_PARSER 0
+%define CONFIG_RV30_PARSER 0
+%define CONFIG_RV40_PARSER 0
+%define CONFIG_SBC_PARSER 0
+%define CONFIG_SIPR_PARSER 0
+%define CONFIG_TAK_PARSER 0
+%define CONFIG_VC1_PARSER 0
+%define CONFIG_VORBIS_PARSER 1
+%define CONFIG_VP3_PARSER 1
+%define CONFIG_VP8_PARSER 1
+%define CONFIG_VP9_PARSER 1
+%define CONFIG_XMA_PARSER 0
+%define CONFIG_ALSA_INDEV 0
+%define CONFIG_ANDROID_CAMERA_INDEV 0
+%define CONFIG_AVFOUNDATION_INDEV 0
+%define CONFIG_BKTR_INDEV 0
+%define CONFIG_DECKLINK_INDEV 0
+%define CONFIG_LIBNDI_NEWTEK_INDEV 0
+%define CONFIG_DSHOW_INDEV 0
+%define CONFIG_FBDEV_INDEV 0
+%define CONFIG_GDIGRAB_INDEV 0
+%define CONFIG_IEC61883_INDEV 0
+%define CONFIG_JACK_INDEV 0
+%define CONFIG_KMSGRAB_INDEV 0
+%define CONFIG_LAVFI_INDEV 0
+%define CONFIG_OPENAL_INDEV 0
+%define CONFIG_OSS_INDEV 0
+%define CONFIG_PULSE_INDEV 0
+%define CONFIG_SNDIO_INDEV 0
+%define CONFIG_V4L2_INDEV 0
+%define CONFIG_VFWCAP_INDEV 0
+%define CONFIG_XCBGRAB_INDEV 0
+%define CONFIG_LIBCDIO_INDEV 0
+%define CONFIG_LIBDC1394_INDEV 0
+%define CONFIG_ALSA_OUTDEV 0
+%define CONFIG_CACA_OUTDEV 0
+%define CONFIG_DECKLINK_OUTDEV 0
+%define CONFIG_LIBNDI_NEWTEK_OUTDEV 0
+%define CONFIG_FBDEV_OUTDEV 0
+%define CONFIG_OPENGL_OUTDEV 0
+%define CONFIG_OSS_OUTDEV 0
+%define CONFIG_PULSE_OUTDEV 0
+%define CONFIG_SDL2_OUTDEV 0
+%define CONFIG_SNDIO_OUTDEV 0
+%define CONFIG_V4L2_OUTDEV 0
+%define CONFIG_XV_OUTDEV 0
+%define CONFIG_ABENCH_FILTER 0
+%define CONFIG_ACOMPRESSOR_FILTER 0
+%define CONFIG_ACONTRAST_FILTER 0
+%define CONFIG_ACOPY_FILTER 0
+%define CONFIG_ACUE_FILTER 0
+%define CONFIG_ACROSSFADE_FILTER 0
+%define CONFIG_ACROSSOVER_FILTER 0
+%define CONFIG_ACRUSHER_FILTER 0
+%define CONFIG_ADECLICK_FILTER 0
+%define CONFIG_ADECLIP_FILTER 0
+%define CONFIG_ADELAY_FILTER 0
+%define CONFIG_ADERIVATIVE_FILTER 0
+%define CONFIG_AECHO_FILTER 0
+%define CONFIG_AEMPHASIS_FILTER 0
+%define CONFIG_AEVAL_FILTER 0
+%define CONFIG_AFADE_FILTER 0
+%define CONFIG_AFFTDN_FILTER 0
+%define CONFIG_AFFTFILT_FILTER 0
+%define CONFIG_AFIR_FILTER 0
+%define CONFIG_AFORMAT_FILTER 0
+%define CONFIG_AGATE_FILTER 0
+%define CONFIG_AIIR_FILTER 0
+%define CONFIG_AINTEGRAL_FILTER 0
+%define CONFIG_AINTERLEAVE_FILTER 0
+%define CONFIG_ALIMITER_FILTER 0
+%define CONFIG_ALLPASS_FILTER 0
+%define CONFIG_ALOOP_FILTER 0
+%define CONFIG_AMERGE_FILTER 0
+%define CONFIG_AMETADATA_FILTER 0
+%define CONFIG_AMIX_FILTER 0
+%define CONFIG_AMULTIPLY_FILTER 0
+%define CONFIG_ANEQUALIZER_FILTER 0
+%define CONFIG_ANULL_FILTER 0
+%define CONFIG_APAD_FILTER 0
+%define CONFIG_APERMS_FILTER 0
+%define CONFIG_APHASER_FILTER 0
+%define CONFIG_APULSATOR_FILTER 0
+%define CONFIG_AREALTIME_FILTER 0
+%define CONFIG_ARESAMPLE_FILTER 0
+%define CONFIG_AREVERSE_FILTER 0
+%define CONFIG_ASELECT_FILTER 0
+%define CONFIG_ASENDCMD_FILTER 0
+%define CONFIG_ASETNSAMPLES_FILTER 0
+%define CONFIG_ASETPTS_FILTER 0
+%define CONFIG_ASETRATE_FILTER 0
+%define CONFIG_ASETTB_FILTER 0
+%define CONFIG_ASHOWINFO_FILTER 0
+%define CONFIG_ASIDEDATA_FILTER 0
+%define CONFIG_ASPLIT_FILTER 0
+%define CONFIG_ASTATS_FILTER 0
+%define CONFIG_ASTREAMSELECT_FILTER 0
+%define CONFIG_ATEMPO_FILTER 0
+%define CONFIG_ATRIM_FILTER 0
+%define CONFIG_AZMQ_FILTER 0
+%define CONFIG_BANDPASS_FILTER 0
+%define CONFIG_BANDREJECT_FILTER 0
+%define CONFIG_BASS_FILTER 0
+%define CONFIG_BIQUAD_FILTER 0
+%define CONFIG_BS2B_FILTER 0
+%define CONFIG_CHANNELMAP_FILTER 0
+%define CONFIG_CHANNELSPLIT_FILTER 0
+%define CONFIG_CHORUS_FILTER 0
+%define CONFIG_COMPAND_FILTER 0
+%define CONFIG_COMPENSATIONDELAY_FILTER 0
+%define CONFIG_CROSSFEED_FILTER 0
+%define CONFIG_CRYSTALIZER_FILTER 0
+%define CONFIG_DCSHIFT_FILTER 0
+%define CONFIG_DRMETER_FILTER 0
+%define CONFIG_DYNAUDNORM_FILTER 0
+%define CONFIG_EARWAX_FILTER 0
+%define CONFIG_EBUR128_FILTER 0
+%define CONFIG_EQUALIZER_FILTER 0
+%define CONFIG_EXTRASTEREO_FILTER 0
+%define CONFIG_FIREQUALIZER_FILTER 0
+%define CONFIG_FLANGER_FILTER 0
+%define CONFIG_HAAS_FILTER 0
+%define CONFIG_HDCD_FILTER 0
+%define CONFIG_HEADPHONE_FILTER 0
+%define CONFIG_HIGHPASS_FILTER 0
+%define CONFIG_HIGHSHELF_FILTER 0
+%define CONFIG_JOIN_FILTER 0
+%define CONFIG_LADSPA_FILTER 0
+%define CONFIG_LOUDNORM_FILTER 0
+%define CONFIG_LOWPASS_FILTER 0
+%define CONFIG_LOWSHELF_FILTER 0
+%define CONFIG_LV2_FILTER 0
+%define CONFIG_MCOMPAND_FILTER 0
+%define CONFIG_PAN_FILTER 0
+%define CONFIG_REPLAYGAIN_FILTER 0
+%define CONFIG_RESAMPLE_FILTER 0
+%define CONFIG_RUBBERBAND_FILTER 0
+%define CONFIG_SIDECHAINCOMPRESS_FILTER 0
+%define CONFIG_SIDECHAINGATE_FILTER 0
+%define CONFIG_SILENCEDETECT_FILTER 0
+%define CONFIG_SILENCEREMOVE_FILTER 0
+%define CONFIG_SOFALIZER_FILTER 0
+%define CONFIG_STEREOTOOLS_FILTER 0
+%define CONFIG_STEREOWIDEN_FILTER 0
+%define CONFIG_SUPEREQUALIZER_FILTER 0
+%define CONFIG_SURROUND_FILTER 0
+%define CONFIG_TREBLE_FILTER 0
+%define CONFIG_TREMOLO_FILTER 0
+%define CONFIG_VIBRATO_FILTER 0
+%define CONFIG_VOLUME_FILTER 0
+%define CONFIG_VOLUMEDETECT_FILTER 0
+%define CONFIG_AEVALSRC_FILTER 0
+%define CONFIG_ANOISESRC_FILTER 0
+%define CONFIG_ANULLSRC_FILTER 0
+%define CONFIG_FLITE_FILTER 0
+%define CONFIG_HILBERT_FILTER 0
+%define CONFIG_SINE_FILTER 0
+%define CONFIG_ANULLSINK_FILTER 0
+%define CONFIG_ALPHAEXTRACT_FILTER 0
+%define CONFIG_ALPHAMERGE_FILTER 0
+%define CONFIG_AMPLIFY_FILTER 0
+%define CONFIG_ASS_FILTER 0
+%define CONFIG_ATADENOISE_FILTER 0
+%define CONFIG_AVGBLUR_FILTER 0
+%define CONFIG_AVGBLUR_OPENCL_FILTER 0
+%define CONFIG_BBOX_FILTER 0
+%define CONFIG_BENCH_FILTER 0
+%define CONFIG_BITPLANENOISE_FILTER 0
+%define CONFIG_BLACKDETECT_FILTER 0
+%define CONFIG_BLACKFRAME_FILTER 0
+%define CONFIG_BLEND_FILTER 0
+%define CONFIG_BM3D_FILTER 0
+%define CONFIG_BOXBLUR_FILTER 0
+%define CONFIG_BOXBLUR_OPENCL_FILTER 0
+%define CONFIG_BWDIF_FILTER 0
+%define CONFIG_CHROMAKEY_FILTER 0
+%define CONFIG_CIESCOPE_FILTER 0
+%define CONFIG_CODECVIEW_FILTER 0
+%define CONFIG_COLORBALANCE_FILTER 0
+%define CONFIG_COLORCHANNELMIXER_FILTER 0
+%define CONFIG_COLORKEY_FILTER 0
+%define CONFIG_COLORLEVELS_FILTER 0
+%define CONFIG_COLORMATRIX_FILTER 0
+%define CONFIG_COLORSPACE_FILTER 0
+%define CONFIG_CONVOLUTION_FILTER 0
+%define CONFIG_CONVOLUTION_OPENCL_FILTER 0
+%define CONFIG_CONVOLVE_FILTER 0
+%define CONFIG_COPY_FILTER 0
+%define CONFIG_COREIMAGE_FILTER 0
+%define CONFIG_COVER_RECT_FILTER 0
+%define CONFIG_CROP_FILTER 0
+%define CONFIG_CROPDETECT_FILTER 0
+%define CONFIG_CUE_FILTER 0
+%define CONFIG_CURVES_FILTER 0
+%define CONFIG_DATASCOPE_FILTER 0
+%define CONFIG_DCTDNOIZ_FILTER 0
+%define CONFIG_DEBAND_FILTER 0
+%define CONFIG_DEBLOCK_FILTER 0
+%define CONFIG_DECIMATE_FILTER 0
+%define CONFIG_DECONVOLVE_FILTER 0
+%define CONFIG_DEFLATE_FILTER 0
+%define CONFIG_DEFLICKER_FILTER 0
+%define CONFIG_DEINTERLACE_QSV_FILTER 0
+%define CONFIG_DEINTERLACE_VAAPI_FILTER 0
+%define CONFIG_DEJUDDER_FILTER 0
+%define CONFIG_DELOGO_FILTER 0
+%define CONFIG_DENOISE_VAAPI_FILTER 0
+%define CONFIG_DESHAKE_FILTER 0
+%define CONFIG_DESPILL_FILTER 0
+%define CONFIG_DETELECINE_FILTER 0
+%define CONFIG_DILATION_FILTER 0
+%define CONFIG_DILATION_OPENCL_FILTER 0
+%define CONFIG_DISPLACE_FILTER 0
+%define CONFIG_DOUBLEWEAVE_FILTER 0
+%define CONFIG_DRAWBOX_FILTER 0
+%define CONFIG_DRAWGRAPH_FILTER 0
+%define CONFIG_DRAWGRID_FILTER 0
+%define CONFIG_DRAWTEXT_FILTER 0
+%define CONFIG_EDGEDETECT_FILTER 0
+%define CONFIG_ELBG_FILTER 0
+%define CONFIG_ENTROPY_FILTER 0
+%define CONFIG_EQ_FILTER 0
+%define CONFIG_EROSION_FILTER 0
+%define CONFIG_EROSION_OPENCL_FILTER 0
+%define CONFIG_EXTRACTPLANES_FILTER 0
+%define CONFIG_FADE_FILTER 0
+%define CONFIG_FFTDNOIZ_FILTER 0
+%define CONFIG_FFTFILT_FILTER 0
+%define CONFIG_FIELD_FILTER 0
+%define CONFIG_FIELDHINT_FILTER 0
+%define CONFIG_FIELDMATCH_FILTER 0
+%define CONFIG_FIELDORDER_FILTER 0
+%define CONFIG_FILLBORDERS_FILTER 0
+%define CONFIG_FIND_RECT_FILTER 0
+%define CONFIG_FLOODFILL_FILTER 0
+%define CONFIG_FORMAT_FILTER 0
+%define CONFIG_FPS_FILTER 0
+%define CONFIG_FRAMEPACK_FILTER 0
+%define CONFIG_FRAMERATE_FILTER 0
+%define CONFIG_FRAMESTEP_FILTER 0
+%define CONFIG_FREI0R_FILTER 0
+%define CONFIG_FSPP_FILTER 0
+%define CONFIG_GBLUR_FILTER 0
+%define CONFIG_GEQ_FILTER 0
+%define CONFIG_GRADFUN_FILTER 0
+%define CONFIG_GREYEDGE_FILTER 0
+%define CONFIG_HALDCLUT_FILTER 0
+%define CONFIG_HFLIP_FILTER 0
+%define CONFIG_HISTEQ_FILTER 0
+%define CONFIG_HISTOGRAM_FILTER 0
+%define CONFIG_HQDN3D_FILTER 0
+%define CONFIG_HQX_FILTER 0
+%define CONFIG_HSTACK_FILTER 0
+%define CONFIG_HUE_FILTER 0
+%define CONFIG_HWDOWNLOAD_FILTER 0
+%define CONFIG_HWMAP_FILTER 0
+%define CONFIG_HWUPLOAD_FILTER 0
+%define CONFIG_HWUPLOAD_CUDA_FILTER 0
+%define CONFIG_HYSTERESIS_FILTER 0
+%define CONFIG_IDET_FILTER 0
+%define CONFIG_IL_FILTER 0
+%define CONFIG_INFLATE_FILTER 0
+%define CONFIG_INTERLACE_FILTER 0
+%define CONFIG_INTERLEAVE_FILTER 0
+%define CONFIG_KERNDEINT_FILTER 0
+%define CONFIG_LENSCORRECTION_FILTER 0
+%define CONFIG_LENSFUN_FILTER 0
+%define CONFIG_LIBVMAF_FILTER 0
+%define CONFIG_LIMITER_FILTER 0
+%define CONFIG_LOOP_FILTER 0
+%define CONFIG_LUMAKEY_FILTER 0
+%define CONFIG_LUT_FILTER 0
+%define CONFIG_LUT1D_FILTER 0
+%define CONFIG_LUT2_FILTER 0
+%define CONFIG_LUT3D_FILTER 0
+%define CONFIG_LUTRGB_FILTER 0
+%define CONFIG_LUTYUV_FILTER 0
+%define CONFIG_MASKEDCLAMP_FILTER 0
+%define CONFIG_MASKEDMERGE_FILTER 0
+%define CONFIG_MCDEINT_FILTER 0
+%define CONFIG_MERGEPLANES_FILTER 0
+%define CONFIG_MESTIMATE_FILTER 0
+%define CONFIG_METADATA_FILTER 0
+%define CONFIG_MIDEQUALIZER_FILTER 0
+%define CONFIG_MINTERPOLATE_FILTER 0
+%define CONFIG_MIX_FILTER 0
+%define CONFIG_MPDECIMATE_FILTER 0
+%define CONFIG_NEGATE_FILTER 0
+%define CONFIG_NLMEANS_FILTER 0
+%define CONFIG_NNEDI_FILTER 0
+%define CONFIG_NOFORMAT_FILTER 0
+%define CONFIG_NOISE_FILTER 0
+%define CONFIG_NORMALIZE_FILTER 0
+%define CONFIG_NULL_FILTER 0
+%define CONFIG_OCR_FILTER 0
+%define CONFIG_OCV_FILTER 0
+%define CONFIG_OSCILLOSCOPE_FILTER 0
+%define CONFIG_OVERLAY_FILTER 0
+%define CONFIG_OVERLAY_OPENCL_FILTER 0
+%define CONFIG_OVERLAY_QSV_FILTER 0
+%define CONFIG_OWDENOISE_FILTER 0
+%define CONFIG_PAD_FILTER 0
+%define CONFIG_PALETTEGEN_FILTER 0
+%define CONFIG_PALETTEUSE_FILTER 0
+%define CONFIG_PERMS_FILTER 0
+%define CONFIG_PERSPECTIVE_FILTER 0
+%define CONFIG_PHASE_FILTER 0
+%define CONFIG_PIXDESCTEST_FILTER 0
+%define CONFIG_PIXSCOPE_FILTER 0
+%define CONFIG_PP_FILTER 0
+%define CONFIG_PP7_FILTER 0
+%define CONFIG_PREMULTIPLY_FILTER 0
+%define CONFIG_PREWITT_FILTER 0
+%define CONFIG_PREWITT_OPENCL_FILTER 0
+%define CONFIG_PROCAMP_VAAPI_FILTER 0
+%define CONFIG_PROGRAM_OPENCL_FILTER 0
+%define CONFIG_PSEUDOCOLOR_FILTER 0
+%define CONFIG_PSNR_FILTER 0
+%define CONFIG_PULLUP_FILTER 0
+%define CONFIG_QP_FILTER 0
+%define CONFIG_RANDOM_FILTER 0
+%define CONFIG_READEIA608_FILTER 0
+%define CONFIG_READVITC_FILTER 0
+%define CONFIG_REALTIME_FILTER 0
+%define CONFIG_REMAP_FILTER 0
+%define CONFIG_REMOVEGRAIN_FILTER 0
+%define CONFIG_REMOVELOGO_FILTER 0
+%define CONFIG_REPEATFIELDS_FILTER 0
+%define CONFIG_REVERSE_FILTER 0
+%define CONFIG_ROBERTS_FILTER 0
+%define CONFIG_ROBERTS_OPENCL_FILTER 0
+%define CONFIG_ROTATE_FILTER 0
+%define CONFIG_SAB_FILTER 0
+%define CONFIG_SCALE_FILTER 0
+%define CONFIG_SCALE_CUDA_FILTER 0
+%define CONFIG_SCALE_NPP_FILTER 0
+%define CONFIG_SCALE_QSV_FILTER 0
+%define CONFIG_SCALE_VAAPI_FILTER 0
+%define CONFIG_SCALE2REF_FILTER 0
+%define CONFIG_SELECT_FILTER 0
+%define CONFIG_SELECTIVECOLOR_FILTER 0
+%define CONFIG_SENDCMD_FILTER 0
+%define CONFIG_SEPARATEFIELDS_FILTER 0
+%define CONFIG_SETDAR_FILTER 0
+%define CONFIG_SETFIELD_FILTER 0
+%define CONFIG_SETPTS_FILTER 0
+%define CONFIG_SETRANGE_FILTER 0
+%define CONFIG_SETSAR_FILTER 0
+%define CONFIG_SETTB_FILTER 0
+%define CONFIG_SHARPNESS_VAAPI_FILTER 0
+%define CONFIG_SHOWINFO_FILTER 0
+%define CONFIG_SHOWPALETTE_FILTER 0
+%define CONFIG_SHUFFLEFRAMES_FILTER 0
+%define CONFIG_SHUFFLEPLANES_FILTER 0
+%define CONFIG_SIDEDATA_FILTER 0
+%define CONFIG_SIGNALSTATS_FILTER 0
+%define CONFIG_SIGNATURE_FILTER 0
+%define CONFIG_SMARTBLUR_FILTER 0
+%define CONFIG_SOBEL_FILTER 0
+%define CONFIG_SOBEL_OPENCL_FILTER 0
+%define CONFIG_SPLIT_FILTER 0
+%define CONFIG_SPP_FILTER 0
+%define CONFIG_SR_FILTER 0
+%define CONFIG_SSIM_FILTER 0
+%define CONFIG_STEREO3D_FILTER 0
+%define CONFIG_STREAMSELECT_FILTER 0
+%define CONFIG_SUBTITLES_FILTER 0
+%define CONFIG_SUPER2XSAI_FILTER 0
+%define CONFIG_SWAPRECT_FILTER 0
+%define CONFIG_SWAPUV_FILTER 0
+%define CONFIG_TBLEND_FILTER 0
+%define CONFIG_TELECINE_FILTER 0
+%define CONFIG_THRESHOLD_FILTER 0
+%define CONFIG_THUMBNAIL_FILTER 0
+%define CONFIG_THUMBNAIL_CUDA_FILTER 0
+%define CONFIG_TILE_FILTER 0
+%define CONFIG_TINTERLACE_FILTER 0
+%define CONFIG_TLUT2_FILTER 0
+%define CONFIG_TMIX_FILTER 0
+%define CONFIG_TONEMAP_FILTER 0
+%define CONFIG_TONEMAP_OPENCL_FILTER 0
+%define CONFIG_TRANSPOSE_FILTER 0
+%define CONFIG_TRANSPOSE_NPP_FILTER 0
+%define CONFIG_TRIM_FILTER 0
+%define CONFIG_UNPREMULTIPLY_FILTER 0
+%define CONFIG_UNSHARP_FILTER 0
+%define CONFIG_UNSHARP_OPENCL_FILTER 0
+%define CONFIG_USPP_FILTER 0
+%define CONFIG_VAGUEDENOISER_FILTER 0
+%define CONFIG_VECTORSCOPE_FILTER 0
+%define CONFIG_VFLIP_FILTER 0
+%define CONFIG_VFRDET_FILTER 0
+%define CONFIG_VIDSTABDETECT_FILTER 0
+%define CONFIG_VIDSTABTRANSFORM_FILTER 0
+%define CONFIG_VIGNETTE_FILTER 0
+%define CONFIG_VMAFMOTION_FILTER 0
+%define CONFIG_VPP_QSV_FILTER 0
+%define CONFIG_VSTACK_FILTER 0
+%define CONFIG_W3FDIF_FILTER 0
+%define CONFIG_WAVEFORM_FILTER 0
+%define CONFIG_WEAVE_FILTER 0
+%define CONFIG_XBR_FILTER 0
+%define CONFIG_YADIF_FILTER 0
+%define CONFIG_ZMQ_FILTER 0
+%define CONFIG_ZOOMPAN_FILTER 0
+%define CONFIG_ZSCALE_FILTER 0
+%define CONFIG_ALLRGB_FILTER 0
+%define CONFIG_ALLYUV_FILTER 0
+%define CONFIG_CELLAUTO_FILTER 0
+%define CONFIG_COLOR_FILTER 0
+%define CONFIG_COREIMAGESRC_FILTER 0
+%define CONFIG_FREI0R_SRC_FILTER 0
+%define CONFIG_HALDCLUTSRC_FILTER 0
+%define CONFIG_LIFE_FILTER 0
+%define CONFIG_MANDELBROT_FILTER 0
+%define CONFIG_MPTESTSRC_FILTER 0
+%define CONFIG_NULLSRC_FILTER 0
+%define CONFIG_OPENCLSRC_FILTER 0
+%define CONFIG_PAL75BARS_FILTER 0
+%define CONFIG_PAL100BARS_FILTER 0
+%define CONFIG_RGBTESTSRC_FILTER 0
+%define CONFIG_SMPTEBARS_FILTER 0
+%define CONFIG_SMPTEHDBARS_FILTER 0
+%define CONFIG_TESTSRC_FILTER 0
+%define CONFIG_TESTSRC2_FILTER 0
+%define CONFIG_YUVTESTSRC_FILTER 0
+%define CONFIG_NULLSINK_FILTER 0
+%define CONFIG_ABITSCOPE_FILTER 0
+%define CONFIG_ADRAWGRAPH_FILTER 0
+%define CONFIG_AHISTOGRAM_FILTER 0
+%define CONFIG_APHASEMETER_FILTER 0
+%define CONFIG_AVECTORSCOPE_FILTER 0
+%define CONFIG_CONCAT_FILTER 0
+%define CONFIG_SHOWCQT_FILTER 0
+%define CONFIG_SHOWFREQS_FILTER 0
+%define CONFIG_SHOWSPECTRUM_FILTER 0
+%define CONFIG_SHOWSPECTRUMPIC_FILTER 0
+%define CONFIG_SHOWVOLUME_FILTER 0
+%define CONFIG_SHOWWAVES_FILTER 0
+%define CONFIG_SHOWWAVESPIC_FILTER 0
+%define CONFIG_SPECTRUMSYNTH_FILTER 0
+%define CONFIG_AMOVIE_FILTER 0
+%define CONFIG_MOVIE_FILTER 0
+%define CONFIG_AFIFO_FILTER 0
+%define CONFIG_FIFO_FILTER 0
 %define CONFIG_AA_DEMUXER 0
 %define CONFIG_AAC_DEMUXER 1
 %define CONFIG_AC3_DEMUXER 0
@@ -1182,10 +1950,14 @@
 %define CONFIG_AIFF_DEMUXER 0
 %define CONFIG_AIX_DEMUXER 0
 %define CONFIG_AMR_DEMUXER 1
+%define CONFIG_AMRNB_DEMUXER 0
+%define CONFIG_AMRWB_DEMUXER 0
 %define CONFIG_ANM_DEMUXER 0
 %define CONFIG_APC_DEMUXER 0
 %define CONFIG_APE_DEMUXER 0
 %define CONFIG_APNG_DEMUXER 0
+%define CONFIG_APTX_DEMUXER 0
+%define CONFIG_APTX_HD_DEMUXER 0
 %define CONFIG_AQTITLE_DEMUXER 0
 %define CONFIG_ASF_DEMUXER 0
 %define CONFIG_ASF_O_DEMUXER 0
@@ -1196,6 +1968,7 @@
 %define CONFIG_AVISYNTH_DEMUXER 0
 %define CONFIG_AVR_DEMUXER 0
 %define CONFIG_AVS_DEMUXER 0
+%define CONFIG_AVS2_DEMUXER 0
 %define CONFIG_BETHSOFTVID_DEMUXER 0
 %define CONFIG_BFI_DEMUXER 0
 %define CONFIG_BINTEXT_DEMUXER 0
@@ -1211,6 +1984,8 @@
 %define CONFIG_CDG_DEMUXER 0
 %define CONFIG_CDXL_DEMUXER 0
 %define CONFIG_CINE_DEMUXER 0
+%define CONFIG_CODEC2_DEMUXER 0
+%define CONFIG_CODEC2RAW_DEMUXER 0
 %define CONFIG_CONCAT_DEMUXER 0
 %define CONFIG_DASH_DEMUXER 0
 %define CONFIG_DATA_DEMUXER 0
@@ -1232,7 +2007,6 @@
 %define CONFIG_EA_CDATA_DEMUXER 0
 %define CONFIG_EAC3_DEMUXER 0
 %define CONFIG_EPAF_DEMUXER 0
-%define CONFIG_FFM_DEMUXER 0
 %define CONFIG_FFMETADATA_DEMUXER 0
 %define CONFIG_FILMSTRIP_DEMUXER 0
 %define CONFIG_FITS_DEMUXER 0
@@ -1314,6 +2088,7 @@
 %define CONFIG_MXG_DEMUXER 0
 %define CONFIG_NC_DEMUXER 0
 %define CONFIG_NISTSPHERE_DEMUXER 0
+%define CONFIG_NSP_DEMUXER 0
 %define CONFIG_NSV_DEMUXER 0
 %define CONFIG_NUT_DEMUXER 0
 %define CONFIG_NUV_DEMUXER 0
@@ -1360,6 +2135,7 @@
 %define CONFIG_S337M_DEMUXER 0
 %define CONFIG_SAMI_DEMUXER 0
 %define CONFIG_SAP_DEMUXER 0
+%define CONFIG_SBC_DEMUXER 0
 %define CONFIG_SBG_DEMUXER 0
 %define CONFIG_SCC_DEMUXER 0
 %define CONFIG_SDP_DEMUXER 0
@@ -1367,6 +2143,7 @@
 %define CONFIG_SDS_DEMUXER 0
 %define CONFIG_SDX_DEMUXER 0
 %define CONFIG_SEGAFILM_DEMUXER 0
+%define CONFIG_SER_DEMUXER 0
 %define CONFIG_SHORTEN_DEMUXER 0
 %define CONFIG_SIFF_DEMUXER 0
 %define CONFIG_SLN_DEMUXER 0
@@ -1394,6 +2171,7 @@
 %define CONFIG_TTA_DEMUXER 0
 %define CONFIG_TXD_DEMUXER 0
 %define CONFIG_TTY_DEMUXER 0
+%define CONFIG_TY_DEMUXER 0
 %define CONFIG_V210_DEMUXER 0
 %define CONFIG_V210X_DEMUXER 0
 %define CONFIG_VAG_DEMUXER 0
@@ -1447,642 +2225,11 @@
 %define CONFIG_IMAGE_TIFF_PIPE_DEMUXER 0
 %define CONFIG_IMAGE_WEBP_PIPE_DEMUXER 0
 %define CONFIG_IMAGE_XPM_PIPE_DEMUXER 0
+%define CONFIG_IMAGE_XWD_PIPE_DEMUXER 0
 %define CONFIG_LIBGME_DEMUXER 0
 %define CONFIG_LIBMODPLUG_DEMUXER 0
 %define CONFIG_LIBOPENMPT_DEMUXER 0
-%define CONFIG_A64MULTI_ENCODER 0
-%define CONFIG_A64MULTI5_ENCODER 0
-%define CONFIG_ALIAS_PIX_ENCODER 0
-%define CONFIG_AMV_ENCODER 0
-%define CONFIG_APNG_ENCODER 0
-%define CONFIG_ASV1_ENCODER 0
-%define CONFIG_ASV2_ENCODER 0
-%define CONFIG_AVRP_ENCODER 0
-%define CONFIG_AVUI_ENCODER 0
-%define CONFIG_AYUV_ENCODER 0
-%define CONFIG_BMP_ENCODER 0
-%define CONFIG_CINEPAK_ENCODER 0
-%define CONFIG_CLJR_ENCODER 0
-%define CONFIG_COMFORTNOISE_ENCODER 0
-%define CONFIG_DNXHD_ENCODER 0
-%define CONFIG_DPX_ENCODER 0
-%define CONFIG_DVVIDEO_ENCODER 0
-%define CONFIG_FFV1_ENCODER 0
-%define CONFIG_FFVHUFF_ENCODER 0
-%define CONFIG_FITS_ENCODER 0
-%define CONFIG_FLASHSV_ENCODER 0
-%define CONFIG_FLASHSV2_ENCODER 0
-%define CONFIG_FLV_ENCODER 0
-%define CONFIG_GIF_ENCODER 0
-%define CONFIG_H261_ENCODER 0
-%define CONFIG_H263_ENCODER 0
-%define CONFIG_H263P_ENCODER 0
-%define CONFIG_HAP_ENCODER 0
-%define CONFIG_HUFFYUV_ENCODER 0
-%define CONFIG_JPEG2000_ENCODER 0
-%define CONFIG_JPEGLS_ENCODER 0
-%define CONFIG_LJPEG_ENCODER 0
-%define CONFIG_MJPEG_ENCODER 0
-%define CONFIG_MPEG1VIDEO_ENCODER 0
-%define CONFIG_MPEG2VIDEO_ENCODER 0
-%define CONFIG_MPEG4_ENCODER 0
-%define CONFIG_MSMPEG4V2_ENCODER 0
-%define CONFIG_MSMPEG4V3_ENCODER 0
-%define CONFIG_MSVIDEO1_ENCODER 0
-%define CONFIG_PAM_ENCODER 0
-%define CONFIG_PBM_ENCODER 0
-%define CONFIG_PCX_ENCODER 0
-%define CONFIG_PGM_ENCODER 0
-%define CONFIG_PGMYUV_ENCODER 0
-%define CONFIG_PNG_ENCODER 0
-%define CONFIG_PPM_ENCODER 0
-%define CONFIG_PRORES_ENCODER 0
-%define CONFIG_PRORES_AW_ENCODER 0
-%define CONFIG_PRORES_KS_ENCODER 0
-%define CONFIG_QTRLE_ENCODER 0
-%define CONFIG_R10K_ENCODER 0
-%define CONFIG_R210_ENCODER 0
-%define CONFIG_RAWVIDEO_ENCODER 0
-%define CONFIG_ROQ_ENCODER 0
-%define CONFIG_RV10_ENCODER 0
-%define CONFIG_RV20_ENCODER 0
-%define CONFIG_S302M_ENCODER 0
-%define CONFIG_SGI_ENCODER 0
-%define CONFIG_SNOW_ENCODER 0
-%define CONFIG_SUNRAST_ENCODER 0
-%define CONFIG_SVQ1_ENCODER 0
-%define CONFIG_TARGA_ENCODER 0
-%define CONFIG_TIFF_ENCODER 0
-%define CONFIG_UTVIDEO_ENCODER 0
-%define CONFIG_V210_ENCODER 0
-%define CONFIG_V308_ENCODER 0
-%define CONFIG_V408_ENCODER 0
-%define CONFIG_V410_ENCODER 0
-%define CONFIG_VC2_ENCODER 0
-%define CONFIG_WRAPPED_AVFRAME_ENCODER 0
-%define CONFIG_WMV1_ENCODER 0
-%define CONFIG_WMV2_ENCODER 0
-%define CONFIG_XBM_ENCODER 0
-%define CONFIG_XFACE_ENCODER 0
-%define CONFIG_XWD_ENCODER 0
-%define CONFIG_Y41P_ENCODER 0
-%define CONFIG_YUV4_ENCODER 0
-%define CONFIG_ZLIB_ENCODER 0
-%define CONFIG_ZMBV_ENCODER 0
-%define CONFIG_AAC_ENCODER 0
-%define CONFIG_AC3_ENCODER 0
-%define CONFIG_AC3_FIXED_ENCODER 0
-%define CONFIG_ALAC_ENCODER 0
-%define CONFIG_DCA_ENCODER 0
-%define CONFIG_EAC3_ENCODER 0
-%define CONFIG_FLAC_ENCODER 0
-%define CONFIG_G723_1_ENCODER 0
-%define CONFIG_MLP_ENCODER 0
-%define CONFIG_MP2_ENCODER 0
-%define CONFIG_MP2FIXED_ENCODER 0
-%define CONFIG_NELLYMOSER_ENCODER 0
-%define CONFIG_OPUS_ENCODER 0
-%define CONFIG_RA_144_ENCODER 0
-%define CONFIG_SONIC_ENCODER 0
-%define CONFIG_SONIC_LS_ENCODER 0
-%define CONFIG_TRUEHD_ENCODER 0
-%define CONFIG_TTA_ENCODER 0
-%define CONFIG_VORBIS_ENCODER 0
-%define CONFIG_WAVPACK_ENCODER 0
-%define CONFIG_WMAV1_ENCODER 0
-%define CONFIG_WMAV2_ENCODER 0
-%define CONFIG_PCM_ALAW_ENCODER 0
-%define CONFIG_PCM_F32BE_ENCODER 0
-%define CONFIG_PCM_F32LE_ENCODER 0
-%define CONFIG_PCM_F64BE_ENCODER 0
-%define CONFIG_PCM_F64LE_ENCODER 0
-%define CONFIG_PCM_MULAW_ENCODER 0
-%define CONFIG_PCM_S8_ENCODER 0
-%define CONFIG_PCM_S8_PLANAR_ENCODER 0
-%define CONFIG_PCM_S16BE_ENCODER 0
-%define CONFIG_PCM_S16BE_PLANAR_ENCODER 0
-%define CONFIG_PCM_S16LE_ENCODER 0
-%define CONFIG_PCM_S16LE_PLANAR_ENCODER 0
-%define CONFIG_PCM_S24BE_ENCODER 0
-%define CONFIG_PCM_S24DAUD_ENCODER 0
-%define CONFIG_PCM_S24LE_ENCODER 0
-%define CONFIG_PCM_S24LE_PLANAR_ENCODER 0
-%define CONFIG_PCM_S32BE_ENCODER 0
-%define CONFIG_PCM_S32LE_ENCODER 0
-%define CONFIG_PCM_S32LE_PLANAR_ENCODER 0
-%define CONFIG_PCM_S64BE_ENCODER 0
-%define CONFIG_PCM_S64LE_ENCODER 0
-%define CONFIG_PCM_U8_ENCODER 0
-%define CONFIG_PCM_U16BE_ENCODER 0
-%define CONFIG_PCM_U16LE_ENCODER 0
-%define CONFIG_PCM_U24BE_ENCODER 0
-%define CONFIG_PCM_U24LE_ENCODER 0
-%define CONFIG_PCM_U32BE_ENCODER 0
-%define CONFIG_PCM_U32LE_ENCODER 0
-%define CONFIG_ROQ_DPCM_ENCODER 0
-%define CONFIG_ADPCM_ADX_ENCODER 0
-%define CONFIG_ADPCM_G722_ENCODER 0
-%define CONFIG_ADPCM_G726_ENCODER 0
-%define CONFIG_ADPCM_G726LE_ENCODER 0
-%define CONFIG_ADPCM_IMA_QT_ENCODER 0
-%define CONFIG_ADPCM_IMA_WAV_ENCODER 0
-%define CONFIG_ADPCM_MS_ENCODER 0
-%define CONFIG_ADPCM_SWF_ENCODER 0
-%define CONFIG_ADPCM_YAMAHA_ENCODER 0
-%define CONFIG_SSA_ENCODER 0
-%define CONFIG_ASS_ENCODER 0
-%define CONFIG_DVBSUB_ENCODER 0
-%define CONFIG_DVDSUB_ENCODER 0
-%define CONFIG_MOVTEXT_ENCODER 0
-%define CONFIG_SRT_ENCODER 0
-%define CONFIG_SUBRIP_ENCODER 0
-%define CONFIG_TEXT_ENCODER 0
-%define CONFIG_WEBVTT_ENCODER 0
-%define CONFIG_XSUB_ENCODER 0
-%define CONFIG_AAC_AT_ENCODER 0
-%define CONFIG_ALAC_AT_ENCODER 0
-%define CONFIG_ILBC_AT_ENCODER 0
-%define CONFIG_PCM_ALAW_AT_ENCODER 0
-%define CONFIG_PCM_MULAW_AT_ENCODER 0
-%define CONFIG_LIBFDK_AAC_ENCODER 0
-%define CONFIG_LIBGSM_ENCODER 0
-%define CONFIG_LIBGSM_MS_ENCODER 0
-%define CONFIG_LIBILBC_ENCODER 0
-%define CONFIG_LIBMP3LAME_ENCODER 0
-%define CONFIG_LIBOPENCORE_AMRNB_ENCODER 0
-%define CONFIG_LIBOPENJPEG_ENCODER 0
-%define CONFIG_LIBOPUS_ENCODER 0
-%define CONFIG_LIBSHINE_ENCODER 0
-%define CONFIG_LIBSPEEX_ENCODER 0
-%define CONFIG_LIBTHEORA_ENCODER 0
-%define CONFIG_LIBTWOLAME_ENCODER 0
-%define CONFIG_LIBVO_AMRWBENC_ENCODER 0
-%define CONFIG_LIBVORBIS_ENCODER 0
-%define CONFIG_LIBVPX_VP8_ENCODER 0
-%define CONFIG_LIBVPX_VP9_ENCODER 0
-%define CONFIG_LIBWAVPACK_ENCODER 0
-%define CONFIG_LIBWEBP_ANIM_ENCODER 0
-%define CONFIG_LIBWEBP_ENCODER 0
-%define CONFIG_LIBX262_ENCODER 0
-%define CONFIG_LIBX264_ENCODER 0
-%define CONFIG_LIBX264RGB_ENCODER 0
-%define CONFIG_LIBX265_ENCODER 0
-%define CONFIG_LIBXAVS_ENCODER 0
-%define CONFIG_LIBXVID_ENCODER 0
-%define CONFIG_H263_V4L2M2M_ENCODER 0
-%define CONFIG_LIBOPENH264_ENCODER 0
-%define CONFIG_H264_NVENC_ENCODER 0
-%define CONFIG_H264_OMX_ENCODER 0
-%define CONFIG_H264_QSV_ENCODER 0
-%define CONFIG_H264_V4L2M2M_ENCODER 0
-%define CONFIG_H264_VAAPI_ENCODER 0
-%define CONFIG_H264_VIDEOTOOLBOX_ENCODER 0
-%define CONFIG_NVENC_ENCODER 0
-%define CONFIG_NVENC_H264_ENCODER 0
-%define CONFIG_NVENC_HEVC_ENCODER 0
-%define CONFIG_HEVC_NVENC_ENCODER 0
-%define CONFIG_HEVC_QSV_ENCODER 0
-%define CONFIG_HEVC_V4L2M2M_ENCODER 0
-%define CONFIG_HEVC_VAAPI_ENCODER 0
-%define CONFIG_LIBKVAZAAR_ENCODER 0
-%define CONFIG_MJPEG_VAAPI_ENCODER 0
-%define CONFIG_MPEG2_QSV_ENCODER 0
-%define CONFIG_MPEG2_VAAPI_ENCODER 0
-%define CONFIG_MPEG4_V4L2M2M_ENCODER 0
-%define CONFIG_VP8_V4L2M2M_ENCODER 0
-%define CONFIG_VP8_VAAPI_ENCODER 0
-%define CONFIG_VP9_VAAPI_ENCODER 0
-%define CONFIG_ABENCH_FILTER 0
-%define CONFIG_ACOMPRESSOR_FILTER 0
-%define CONFIG_ACOPY_FILTER 0
-%define CONFIG_ACROSSFADE_FILTER 0
-%define CONFIG_ACRUSHER_FILTER 0
-%define CONFIG_ADELAY_FILTER 0
-%define CONFIG_AECHO_FILTER 0
-%define CONFIG_AEMPHASIS_FILTER 0
-%define CONFIG_AEVAL_FILTER 0
-%define CONFIG_AFADE_FILTER 0
-%define CONFIG_AFFTFILT_FILTER 0
-%define CONFIG_AFIR_FILTER 0
-%define CONFIG_AFORMAT_FILTER 0
-%define CONFIG_AGATE_FILTER 0
-%define CONFIG_AINTERLEAVE_FILTER 0
-%define CONFIG_ALIMITER_FILTER 0
-%define CONFIG_ALLPASS_FILTER 0
-%define CONFIG_ALOOP_FILTER 0
-%define CONFIG_AMERGE_FILTER 0
-%define CONFIG_AMETADATA_FILTER 0
-%define CONFIG_AMIX_FILTER 0
-%define CONFIG_ANEQUALIZER_FILTER 0
-%define CONFIG_ANULL_FILTER 0
-%define CONFIG_APAD_FILTER 0
-%define CONFIG_APERMS_FILTER 0
-%define CONFIG_APHASER_FILTER 0
-%define CONFIG_APULSATOR_FILTER 0
-%define CONFIG_AREALTIME_FILTER 0
-%define CONFIG_ARESAMPLE_FILTER 0
-%define CONFIG_AREVERSE_FILTER 0
-%define CONFIG_ASELECT_FILTER 0
-%define CONFIG_ASENDCMD_FILTER 0
-%define CONFIG_ASETNSAMPLES_FILTER 0
-%define CONFIG_ASETPTS_FILTER 0
-%define CONFIG_ASETRATE_FILTER 0
-%define CONFIG_ASETTB_FILTER 0
-%define CONFIG_ASHOWINFO_FILTER 0
-%define CONFIG_ASIDEDATA_FILTER 0
-%define CONFIG_ASPLIT_FILTER 0
-%define CONFIG_ASTATS_FILTER 0
-%define CONFIG_ASTREAMSELECT_FILTER 0
-%define CONFIG_ATEMPO_FILTER 0
-%define CONFIG_ATRIM_FILTER 0
-%define CONFIG_AZMQ_FILTER 0
-%define CONFIG_BANDPASS_FILTER 0
-%define CONFIG_BANDREJECT_FILTER 0
-%define CONFIG_BASS_FILTER 0
-%define CONFIG_BIQUAD_FILTER 0
-%define CONFIG_BS2B_FILTER 0
-%define CONFIG_CHANNELMAP_FILTER 0
-%define CONFIG_CHANNELSPLIT_FILTER 0
-%define CONFIG_CHORUS_FILTER 0
-%define CONFIG_COMPAND_FILTER 0
-%define CONFIG_COMPENSATIONDELAY_FILTER 0
-%define CONFIG_CROSSFEED_FILTER 0
-%define CONFIG_CRYSTALIZER_FILTER 0
-%define CONFIG_DCSHIFT_FILTER 0
-%define CONFIG_DYNAUDNORM_FILTER 0
-%define CONFIG_EARWAX_FILTER 0
-%define CONFIG_EBUR128_FILTER 0
-%define CONFIG_EQUALIZER_FILTER 0
-%define CONFIG_EXTRASTEREO_FILTER 0
-%define CONFIG_FIREQUALIZER_FILTER 0
-%define CONFIG_FLANGER_FILTER 0
-%define CONFIG_HAAS_FILTER 0
-%define CONFIG_HDCD_FILTER 0
-%define CONFIG_HEADPHONE_FILTER 0
-%define CONFIG_HIGHPASS_FILTER 0
-%define CONFIG_JOIN_FILTER 0
-%define CONFIG_LADSPA_FILTER 0
-%define CONFIG_LOUDNORM_FILTER 0
-%define CONFIG_LOWPASS_FILTER 0
-%define CONFIG_PAN_FILTER 0
-%define CONFIG_REPLAYGAIN_FILTER 0
-%define CONFIG_RESAMPLE_FILTER 0
-%define CONFIG_RUBBERBAND_FILTER 0
-%define CONFIG_SIDECHAINCOMPRESS_FILTER 0
-%define CONFIG_SIDECHAINGATE_FILTER 0
-%define CONFIG_SILENCEDETECT_FILTER 0
-%define CONFIG_SILENCEREMOVE_FILTER 0
-%define CONFIG_SOFALIZER_FILTER 0
-%define CONFIG_STEREOTOOLS_FILTER 0
-%define CONFIG_STEREOWIDEN_FILTER 0
-%define CONFIG_SUPEREQUALIZER_FILTER 0
-%define CONFIG_SURROUND_FILTER 0
-%define CONFIG_TREBLE_FILTER 0
-%define CONFIG_TREMOLO_FILTER 0
-%define CONFIG_VIBRATO_FILTER 0
-%define CONFIG_VOLUME_FILTER 0
-%define CONFIG_VOLUMEDETECT_FILTER 0
-%define CONFIG_AEVALSRC_FILTER 0
-%define CONFIG_ANOISESRC_FILTER 0
-%define CONFIG_ANULLSRC_FILTER 0
-%define CONFIG_FLITE_FILTER 0
-%define CONFIG_SINE_FILTER 0
-%define CONFIG_ANULLSINK_FILTER 0
-%define CONFIG_ALPHAEXTRACT_FILTER 0
-%define CONFIG_ALPHAMERGE_FILTER 0
-%define CONFIG_ASS_FILTER 0
-%define CONFIG_ATADENOISE_FILTER 0
-%define CONFIG_AVGBLUR_FILTER 0
-%define CONFIG_BBOX_FILTER 0
-%define CONFIG_BENCH_FILTER 0
-%define CONFIG_BITPLANENOISE_FILTER 0
-%define CONFIG_BLACKDETECT_FILTER 0
-%define CONFIG_BLACKFRAME_FILTER 0
-%define CONFIG_BLEND_FILTER 0
-%define CONFIG_BOXBLUR_FILTER 0
-%define CONFIG_BWDIF_FILTER 0
-%define CONFIG_CHROMAKEY_FILTER 0
-%define CONFIG_CIESCOPE_FILTER 0
-%define CONFIG_CODECVIEW_FILTER 0
-%define CONFIG_COLORBALANCE_FILTER 0
-%define CONFIG_COLORCHANNELMIXER_FILTER 0
-%define CONFIG_COLORKEY_FILTER 0
-%define CONFIG_COLORLEVELS_FILTER 0
-%define CONFIG_COLORMATRIX_FILTER 0
-%define CONFIG_COLORSPACE_FILTER 0
-%define CONFIG_CONVOLUTION_FILTER 0
-%define CONFIG_CONVOLVE_FILTER 0
-%define CONFIG_COPY_FILTER 0
-%define CONFIG_COREIMAGE_FILTER 0
-%define CONFIG_COVER_RECT_FILTER 0
-%define CONFIG_CROP_FILTER 0
-%define CONFIG_CROPDETECT_FILTER 0
-%define CONFIG_CURVES_FILTER 0
-%define CONFIG_DATASCOPE_FILTER 0
-%define CONFIG_DCTDNOIZ_FILTER 0
-%define CONFIG_DEBAND_FILTER 0
-%define CONFIG_DECIMATE_FILTER 0
-%define CONFIG_DEFLATE_FILTER 0
-%define CONFIG_DEFLICKER_FILTER 0
-%define CONFIG_DEINTERLACE_QSV_FILTER 0
-%define CONFIG_DEINTERLACE_VAAPI_FILTER 0
-%define CONFIG_DEJUDDER_FILTER 0
-%define CONFIG_DELOGO_FILTER 0
-%define CONFIG_DESHAKE_FILTER 0
-%define CONFIG_DESPILL_FILTER 0
-%define CONFIG_DETELECINE_FILTER 0
-%define CONFIG_DILATION_FILTER 0
-%define CONFIG_DISPLACE_FILTER 0
-%define CONFIG_DOUBLEWEAVE_FILTER 0
-%define CONFIG_DRAWBOX_FILTER 0
-%define CONFIG_DRAWGRAPH_FILTER 0
-%define CONFIG_DRAWGRID_FILTER 0
-%define CONFIG_DRAWTEXT_FILTER 0
-%define CONFIG_EDGEDETECT_FILTER 0
-%define CONFIG_ELBG_FILTER 0
-%define CONFIG_EQ_FILTER 0
-%define CONFIG_EROSION_FILTER 0
-%define CONFIG_EXTRACTPLANES_FILTER 0
-%define CONFIG_FADE_FILTER 0
-%define CONFIG_FFTFILT_FILTER 0
-%define CONFIG_FIELD_FILTER 0
-%define CONFIG_FIELDHINT_FILTER 0
-%define CONFIG_FIELDMATCH_FILTER 0
-%define CONFIG_FIELDORDER_FILTER 0
-%define CONFIG_FIND_RECT_FILTER 0
-%define CONFIG_FLOODFILL_FILTER 0
-%define CONFIG_FORMAT_FILTER 0
-%define CONFIG_FPS_FILTER 0
-%define CONFIG_FRAMEPACK_FILTER 0
-%define CONFIG_FRAMERATE_FILTER 0
-%define CONFIG_FRAMESTEP_FILTER 0
-%define CONFIG_FREI0R_FILTER 0
-%define CONFIG_FSPP_FILTER 0
-%define CONFIG_GBLUR_FILTER 0
-%define CONFIG_GEQ_FILTER 0
-%define CONFIG_GRADFUN_FILTER 0
-%define CONFIG_HALDCLUT_FILTER 0
-%define CONFIG_HFLIP_FILTER 0
-%define CONFIG_HISTEQ_FILTER 0
-%define CONFIG_HISTOGRAM_FILTER 0
-%define CONFIG_HQDN3D_FILTER 0
-%define CONFIG_HQX_FILTER 0
-%define CONFIG_HSTACK_FILTER 0
-%define CONFIG_HUE_FILTER 0
-%define CONFIG_HWDOWNLOAD_FILTER 0
-%define CONFIG_HWMAP_FILTER 0
-%define CONFIG_HWUPLOAD_FILTER 0
-%define CONFIG_HWUPLOAD_CUDA_FILTER 0
-%define CONFIG_HYSTERESIS_FILTER 0
-%define CONFIG_IDET_FILTER 0
-%define CONFIG_IL_FILTER 0
-%define CONFIG_INFLATE_FILTER 0
-%define CONFIG_INTERLACE_FILTER 0
-%define CONFIG_INTERLEAVE_FILTER 0
-%define CONFIG_KERNDEINT_FILTER 0
-%define CONFIG_LENSCORRECTION_FILTER 0
-%define CONFIG_LIBVMAF_FILTER 0
-%define CONFIG_LIMITER_FILTER 0
-%define CONFIG_LOOP_FILTER 0
-%define CONFIG_LUMAKEY_FILTER 0
-%define CONFIG_LUT_FILTER 0
-%define CONFIG_LUT2_FILTER 0
-%define CONFIG_LUT3D_FILTER 0
-%define CONFIG_LUTRGB_FILTER 0
-%define CONFIG_LUTYUV_FILTER 0
-%define CONFIG_MASKEDCLAMP_FILTER 0
-%define CONFIG_MASKEDMERGE_FILTER 0
-%define CONFIG_MCDEINT_FILTER 0
-%define CONFIG_MERGEPLANES_FILTER 0
-%define CONFIG_MESTIMATE_FILTER 0
-%define CONFIG_METADATA_FILTER 0
-%define CONFIG_MIDEQUALIZER_FILTER 0
-%define CONFIG_MINTERPOLATE_FILTER 0
-%define CONFIG_MPDECIMATE_FILTER 0
-%define CONFIG_NEGATE_FILTER 0
-%define CONFIG_NLMEANS_FILTER 0
-%define CONFIG_NNEDI_FILTER 0
-%define CONFIG_NOFORMAT_FILTER 0
-%define CONFIG_NOISE_FILTER 0
-%define CONFIG_NULL_FILTER 0
-%define CONFIG_OCR_FILTER 0
-%define CONFIG_OCV_FILTER 0
-%define CONFIG_OSCILLOSCOPE_FILTER 0
-%define CONFIG_OVERLAY_FILTER 0
-%define CONFIG_OWDENOISE_FILTER 0
-%define CONFIG_PAD_FILTER 0
-%define CONFIG_PALETTEGEN_FILTER 0
-%define CONFIG_PALETTEUSE_FILTER 0
-%define CONFIG_PERMS_FILTER 0
-%define CONFIG_PERSPECTIVE_FILTER 0
-%define CONFIG_PHASE_FILTER 0
-%define CONFIG_PIXDESCTEST_FILTER 0
-%define CONFIG_PIXSCOPE_FILTER 0
-%define CONFIG_PP_FILTER 0
-%define CONFIG_PP7_FILTER 0
-%define CONFIG_PREMULTIPLY_FILTER 0
-%define CONFIG_PREWITT_FILTER 0
-%define CONFIG_PSEUDOCOLOR_FILTER 0
-%define CONFIG_PSNR_FILTER 0
-%define CONFIG_PULLUP_FILTER 0
-%define CONFIG_QP_FILTER 0
-%define CONFIG_RANDOM_FILTER 0
-%define CONFIG_READEIA608_FILTER 0
-%define CONFIG_READVITC_FILTER 0
-%define CONFIG_REALTIME_FILTER 0
-%define CONFIG_REMAP_FILTER 0
-%define CONFIG_REMOVEGRAIN_FILTER 0
-%define CONFIG_REMOVELOGO_FILTER 0
-%define CONFIG_REPEATFIELDS_FILTER 0
-%define CONFIG_REVERSE_FILTER 0
-%define CONFIG_ROBERTS_FILTER 0
-%define CONFIG_ROTATE_FILTER 0
-%define CONFIG_SAB_FILTER 0
-%define CONFIG_SCALE_FILTER 0
-%define CONFIG_SCALE_CUDA_FILTER 0
-%define CONFIG_SCALE_NPP_FILTER 0
-%define CONFIG_SCALE_QSV_FILTER 0
-%define CONFIG_SCALE_VAAPI_FILTER 0
-%define CONFIG_SCALE2REF_FILTER 0
-%define CONFIG_SELECT_FILTER 0
-%define CONFIG_SELECTIVECOLOR_FILTER 0
-%define CONFIG_SENDCMD_FILTER 0
-%define CONFIG_SEPARATEFIELDS_FILTER 0
-%define CONFIG_SETDAR_FILTER 0
-%define CONFIG_SETFIELD_FILTER 0
-%define CONFIG_SETPTS_FILTER 0
-%define CONFIG_SETSAR_FILTER 0
-%define CONFIG_SETTB_FILTER 0
-%define CONFIG_SHOWINFO_FILTER 0
-%define CONFIG_SHOWPALETTE_FILTER 0
-%define CONFIG_SHUFFLEFRAMES_FILTER 0
-%define CONFIG_SHUFFLEPLANES_FILTER 0
-%define CONFIG_SIDEDATA_FILTER 0
-%define CONFIG_SIGNALSTATS_FILTER 0
-%define CONFIG_SIGNATURE_FILTER 0
-%define CONFIG_SMARTBLUR_FILTER 0
-%define CONFIG_SOBEL_FILTER 0
-%define CONFIG_SPLIT_FILTER 0
-%define CONFIG_SPP_FILTER 0
-%define CONFIG_SSIM_FILTER 0
-%define CONFIG_STEREO3D_FILTER 0
-%define CONFIG_STREAMSELECT_FILTER 0
-%define CONFIG_SUBTITLES_FILTER 0
-%define CONFIG_SUPER2XSAI_FILTER 0
-%define CONFIG_SWAPRECT_FILTER 0
-%define CONFIG_SWAPUV_FILTER 0
-%define CONFIG_TBLEND_FILTER 0
-%define CONFIG_TELECINE_FILTER 0
-%define CONFIG_THRESHOLD_FILTER 0
-%define CONFIG_THUMBNAIL_FILTER 0
-%define CONFIG_THUMBNAIL_CUDA_FILTER 0
-%define CONFIG_TILE_FILTER 0
-%define CONFIG_TINTERLACE_FILTER 0
-%define CONFIG_TLUT2_FILTER 0
-%define CONFIG_TONEMAP_FILTER 0
-%define CONFIG_TRANSPOSE_FILTER 0
-%define CONFIG_TRIM_FILTER 0
-%define CONFIG_UNPREMULTIPLY_FILTER 0
-%define CONFIG_UNSHARP_FILTER 0
-%define CONFIG_USPP_FILTER 0
-%define CONFIG_VAGUEDENOISER_FILTER 0
-%define CONFIG_VECTORSCOPE_FILTER 0
-%define CONFIG_VFLIP_FILTER 0
-%define CONFIG_VIDSTABDETECT_FILTER 0
-%define CONFIG_VIDSTABTRANSFORM_FILTER 0
-%define CONFIG_VIGNETTE_FILTER 0
-%define CONFIG_VMAFMOTION_FILTER 0
-%define CONFIG_VSTACK_FILTER 0
-%define CONFIG_W3FDIF_FILTER 0
-%define CONFIG_WAVEFORM_FILTER 0
-%define CONFIG_WEAVE_FILTER 0
-%define CONFIG_XBR_FILTER 0
-%define CONFIG_YADIF_FILTER 0
-%define CONFIG_ZMQ_FILTER 0
-%define CONFIG_ZOOMPAN_FILTER 0
-%define CONFIG_ZSCALE_FILTER 0
-%define CONFIG_ALLRGB_FILTER 0
-%define CONFIG_ALLYUV_FILTER 0
-%define CONFIG_CELLAUTO_FILTER 0
-%define CONFIG_COLOR_FILTER 0
-%define CONFIG_COREIMAGESRC_FILTER 0
-%define CONFIG_FREI0R_SRC_FILTER 0
-%define CONFIG_HALDCLUTSRC_FILTER 0
-%define CONFIG_LIFE_FILTER 0
-%define CONFIG_MANDELBROT_FILTER 0
-%define CONFIG_MPTESTSRC_FILTER 0
-%define CONFIG_NULLSRC_FILTER 0
-%define CONFIG_RGBTESTSRC_FILTER 0
-%define CONFIG_SMPTEBARS_FILTER 0
-%define CONFIG_SMPTEHDBARS_FILTER 0
-%define CONFIG_TESTSRC_FILTER 0
-%define CONFIG_TESTSRC2_FILTER 0
-%define CONFIG_YUVTESTSRC_FILTER 0
-%define CONFIG_NULLSINK_FILTER 0
-%define CONFIG_ABITSCOPE_FILTER 0
-%define CONFIG_ADRAWGRAPH_FILTER 0
-%define CONFIG_AHISTOGRAM_FILTER 0
-%define CONFIG_APHASEMETER_FILTER 0
-%define CONFIG_AVECTORSCOPE_FILTER 0
-%define CONFIG_CONCAT_FILTER 0
-%define CONFIG_SHOWCQT_FILTER 0
-%define CONFIG_SHOWFREQS_FILTER 0
-%define CONFIG_SHOWSPECTRUM_FILTER 0
-%define CONFIG_SHOWSPECTRUMPIC_FILTER 0
-%define CONFIG_SHOWVOLUME_FILTER 0
-%define CONFIG_SHOWWAVES_FILTER 0
-%define CONFIG_SHOWWAVESPIC_FILTER 0
-%define CONFIG_SPECTRUMSYNTH_FILTER 0
-%define CONFIG_AMOVIE_FILTER 0
-%define CONFIG_MOVIE_FILTER 0
-%define CONFIG_H263_VAAPI_HWACCEL 0
-%define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0
-%define CONFIG_H264_CUVID_HWACCEL 0
-%define CONFIG_H264_D3D11VA_HWACCEL 0
-%define CONFIG_H264_D3D11VA2_HWACCEL 0
-%define CONFIG_H264_DXVA2_HWACCEL 0
-%define CONFIG_H264_MEDIACODEC_HWACCEL 0
-%define CONFIG_H264_MMAL_HWACCEL 0
-%define CONFIG_H264_QSV_HWACCEL 0
-%define CONFIG_H264_VAAPI_HWACCEL 0
-%define CONFIG_H264_VDA_HWACCEL 0
-%define CONFIG_H264_VDA_OLD_HWACCEL 0
-%define CONFIG_H264_VDPAU_HWACCEL 0
-%define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0
-%define CONFIG_HEVC_CUVID_HWACCEL 0
-%define CONFIG_HEVC_D3D11VA_HWACCEL 0
-%define CONFIG_HEVC_D3D11VA2_HWACCEL 0
-%define CONFIG_HEVC_DXVA2_HWACCEL 0
-%define CONFIG_HEVC_MEDIACODEC_HWACCEL 0
-%define CONFIG_HEVC_QSV_HWACCEL 0
-%define CONFIG_HEVC_VAAPI_HWACCEL 0
-%define CONFIG_HEVC_VDPAU_HWACCEL 0
-%define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0
-%define CONFIG_MJPEG_CUVID_HWACCEL 0
-%define CONFIG_MPEG1_CUVID_HWACCEL 0
-%define CONFIG_MPEG1_XVMC_HWACCEL 0
-%define CONFIG_MPEG1_VDPAU_HWACCEL 0
-%define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0
-%define CONFIG_MPEG2_CUVID_HWACCEL 0
-%define CONFIG_MPEG2_XVMC_HWACCEL 0
-%define CONFIG_MPEG2_D3D11VA_HWACCEL 0
-%define CONFIG_MPEG2_D3D11VA2_HWACCEL 0
-%define CONFIG_MPEG2_DXVA2_HWACCEL 0
-%define CONFIG_MPEG2_MMAL_HWACCEL 0
-%define CONFIG_MPEG2_QSV_HWACCEL 0
-%define CONFIG_MPEG2_VAAPI_HWACCEL 0
-%define CONFIG_MPEG2_VDPAU_HWACCEL 0
-%define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0
-%define CONFIG_MPEG2_MEDIACODEC_HWACCEL 0
-%define CONFIG_MPEG4_CUVID_HWACCEL 0
-%define CONFIG_MPEG4_MEDIACODEC_HWACCEL 0
-%define CONFIG_MPEG4_MMAL_HWACCEL 0
-%define CONFIG_MPEG4_VAAPI_HWACCEL 0
-%define CONFIG_MPEG4_VDPAU_HWACCEL 0
-%define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0
-%define CONFIG_VC1_CUVID_HWACCEL 0
-%define CONFIG_VC1_D3D11VA_HWACCEL 0
-%define CONFIG_VC1_D3D11VA2_HWACCEL 0
-%define CONFIG_VC1_DXVA2_HWACCEL 0
-%define CONFIG_VC1_VAAPI_HWACCEL 0
-%define CONFIG_VC1_VDPAU_HWACCEL 0
-%define CONFIG_VC1_MMAL_HWACCEL 0
-%define CONFIG_VC1_QSV_HWACCEL 0
-%define CONFIG_VP8_CUVID_HWACCEL 0
-%define CONFIG_VP8_MEDIACODEC_HWACCEL 0
-%define CONFIG_VP8_QSV_HWACCEL 0
-%define CONFIG_VP9_CUVID_HWACCEL 0
-%define CONFIG_VP9_D3D11VA_HWACCEL 0
-%define CONFIG_VP9_D3D11VA2_HWACCEL 0
-%define CONFIG_VP9_DXVA2_HWACCEL 0
-%define CONFIG_VP9_MEDIACODEC_HWACCEL 0
-%define CONFIG_VP9_VAAPI_HWACCEL 0
-%define CONFIG_WMV3_D3D11VA_HWACCEL 0
-%define CONFIG_WMV3_D3D11VA2_HWACCEL 0
-%define CONFIG_WMV3_DXVA2_HWACCEL 0
-%define CONFIG_WMV3_VAAPI_HWACCEL 0
-%define CONFIG_WMV3_VDPAU_HWACCEL 0
-%define CONFIG_ALSA_INDEV 0
-%define CONFIG_AVFOUNDATION_INDEV 0
-%define CONFIG_BKTR_INDEV 0
-%define CONFIG_DECKLINK_INDEV 0
-%define CONFIG_LIBNDI_NEWTEK_INDEV 0
-%define CONFIG_DSHOW_INDEV 0
-%define CONFIG_FBDEV_INDEV 0
-%define CONFIG_GDIGRAB_INDEV 0
-%define CONFIG_IEC61883_INDEV 0
-%define CONFIG_JACK_INDEV 0
-%define CONFIG_KMSGRAB_INDEV 0
-%define CONFIG_LAVFI_INDEV 0
-%define CONFIG_OPENAL_INDEV 0
-%define CONFIG_OSS_INDEV 0
-%define CONFIG_PULSE_INDEV 0
-%define CONFIG_SNDIO_INDEV 0
-%define CONFIG_V4L2_INDEV 0
-%define CONFIG_VFWCAP_INDEV 0
-%define CONFIG_XCBGRAB_INDEV 0
-%define CONFIG_LIBCDIO_INDEV 0
-%define CONFIG_LIBDC1394_INDEV 0
+%define CONFIG_VAPOURSYNTH_DEMUXER 0
 %define CONFIG_A64_MUXER 0
 %define CONFIG_AC3_MUXER 0
 %define CONFIG_ADTS_MUXER 0
@@ -2090,6 +2237,8 @@
 %define CONFIG_AIFF_MUXER 0
 %define CONFIG_AMR_MUXER 0
 %define CONFIG_APNG_MUXER 0
+%define CONFIG_APTX_MUXER 0
+%define CONFIG_APTX_HD_MUXER 0
 %define CONFIG_ASF_MUXER 0
 %define CONFIG_ASS_MUXER 0
 %define CONFIG_AST_MUXER 0
@@ -2097,9 +2246,12 @@
 %define CONFIG_AU_MUXER 0
 %define CONFIG_AVI_MUXER 0
 %define CONFIG_AVM2_MUXER 0
+%define CONFIG_AVS2_MUXER 0
 %define CONFIG_BIT_MUXER 0
 %define CONFIG_CAF_MUXER 0
 %define CONFIG_CAVSVIDEO_MUXER 0
+%define CONFIG_CODEC2_MUXER 0
+%define CONFIG_CODEC2RAW_MUXER 0
 %define CONFIG_CRC_MUXER 0
 %define CONFIG_DASH_MUXER 0
 %define CONFIG_DATA_MUXER 0
@@ -2110,9 +2262,9 @@
 %define CONFIG_DV_MUXER 0
 %define CONFIG_EAC3_MUXER 0
 %define CONFIG_F4V_MUXER 0
-%define CONFIG_FFM_MUXER 0
 %define CONFIG_FFMETADATA_MUXER 0
 %define CONFIG_FIFO_MUXER 0
+%define CONFIG_FIFO_TEST_MUXER 0
 %define CONFIG_FILMSTRIP_MUXER 0
 %define CONFIG_FITS_MUXER 0
 %define CONFIG_FLAC_MUXER 0
@@ -2205,7 +2357,9 @@
 %define CONFIG_RTP_MPEGTS_MUXER 0
 %define CONFIG_RTSP_MUXER 0
 %define CONFIG_SAP_MUXER 0
+%define CONFIG_SBC_MUXER 0
 %define CONFIG_SCC_MUXER 0
+%define CONFIG_SEGAFILM_MUXER 0
 %define CONFIG_SEGMENT_MUXER 0
 %define CONFIG_STREAM_SEGMENT_MUXER 0
 %define CONFIG_SINGLEJPEG_MUXER 0
@@ -2238,58 +2392,6 @@
 %define CONFIG_WV_MUXER 0
 %define CONFIG_YUV4MPEGPIPE_MUXER 0
 %define CONFIG_CHROMAPRINT_MUXER 0
-%define CONFIG_ALSA_OUTDEV 0
-%define CONFIG_CACA_OUTDEV 0
-%define CONFIG_DECKLINK_OUTDEV 0
-%define CONFIG_LIBNDI_NEWTEK_OUTDEV 0
-%define CONFIG_FBDEV_OUTDEV 0
-%define CONFIG_OPENGL_OUTDEV 0
-%define CONFIG_OSS_OUTDEV 0
-%define CONFIG_PULSE_OUTDEV 0
-%define CONFIG_SDL2_OUTDEV 0
-%define CONFIG_SNDIO_OUTDEV 0
-%define CONFIG_V4L2_OUTDEV 0
-%define CONFIG_XV_OUTDEV 0
-%define CONFIG_AAC_PARSER 1
-%define CONFIG_AAC_LATM_PARSER 0
-%define CONFIG_AC3_PARSER 0
-%define CONFIG_ADX_PARSER 0
-%define CONFIG_BMP_PARSER 0
-%define CONFIG_CAVSVIDEO_PARSER 0
-%define CONFIG_COOK_PARSER 0
-%define CONFIG_DCA_PARSER 0
-%define CONFIG_DIRAC_PARSER 0
-%define CONFIG_DNXHD_PARSER 0
-%define CONFIG_DPX_PARSER 0
-%define CONFIG_DVAUDIO_PARSER 0
-%define CONFIG_DVBSUB_PARSER 0
-%define CONFIG_DVDSUB_PARSER 0
-%define CONFIG_DVD_NAV_PARSER 0
-%define CONFIG_FLAC_PARSER 1
-%define CONFIG_G729_PARSER 0
-%define CONFIG_GSM_PARSER 1
-%define CONFIG_H261_PARSER 0
-%define CONFIG_H263_PARSER 1
-%define CONFIG_H264_PARSER 1
-%define CONFIG_HEVC_PARSER 0
-%define CONFIG_MJPEG_PARSER 0
-%define CONFIG_MLP_PARSER 0
-%define CONFIG_MPEG4VIDEO_PARSER 1
-%define CONFIG_MPEGAUDIO_PARSER 1
-%define CONFIG_MPEGVIDEO_PARSER 0
-%define CONFIG_OPUS_PARSER 1
-%define CONFIG_PNG_PARSER 0
-%define CONFIG_PNM_PARSER 0
-%define CONFIG_RV30_PARSER 0
-%define CONFIG_RV40_PARSER 0
-%define CONFIG_SIPR_PARSER 0
-%define CONFIG_TAK_PARSER 0
-%define CONFIG_VC1_PARSER 0
-%define CONFIG_VORBIS_PARSER 1
-%define CONFIG_VP3_PARSER 1
-%define CONFIG_VP8_PARSER 1
-%define CONFIG_VP9_PARSER 1
-%define CONFIG_XMA_PARSER 0
 %define CONFIG_ASYNC_PROTOCOL 0
 %define CONFIG_BLURAY_PROTOCOL 0
 %define CONFIG_CACHE_PROTOCOL 0
@@ -2323,10 +2425,7 @@
 %define CONFIG_SUBFILE_PROTOCOL 0
 %define CONFIG_TEE_PROTOCOL 0
 %define CONFIG_TCP_PROTOCOL 0
-%define CONFIG_TLS_GNUTLS_PROTOCOL 0
-%define CONFIG_TLS_SCHANNEL_PROTOCOL 0
-%define CONFIG_TLS_SECURETRANSPORT_PROTOCOL 0
-%define CONFIG_TLS_OPENSSL_PROTOCOL 0
+%define CONFIG_TLS_PROTOCOL 0
 %define CONFIG_UDP_PROTOCOL 0
 %define CONFIG_UDPLITE_PROTOCOL 0
 %define CONFIG_UNIX_PROTOCOL 0
@@ -2335,5 +2434,6 @@
 %define CONFIG_LIBRTMPS_PROTOCOL 0
 %define CONFIG_LIBRTMPT_PROTOCOL 0
 %define CONFIG_LIBRTMPTE_PROTOCOL 0
+%define CONFIG_LIBSRT_PROTOCOL 0
 %define CONFIG_LIBSSH_PROTOCOL 0
 %define CONFIG_LIBSMBCLIENT_PROTOCOL 0

diff --git a/fuchsia/config/max/x64/config.h b/fuchsia/config/max/x64/config.h
index 927e4b9..36efc42 100644
--- a/fuchsia/config/max/x64/config.h
+++ b/fuchsia/config/max/x64/config.h

@@ -1,12 +1,12 @@
 /* Automatically generated by configure - do not modify! */
 #ifndef FFMPEG_CONFIG_H
 #define FFMPEG_CONFIG_H
-#define FFMPEG_CONFIGURATION "--disable-everything --disable-all --disable-doc --disable-htmlpages --disable-manpages --disable-podpages --disable-txtpages --disable-static --enable-avcodec --enable-avformat --enable-avutil --enable-fft --enable-rdft --enable-static --disable-bzlib --disable-iconv --disable-lzo --disable-network --disable-schannel --disable-sdl2 --disable-symver --disable-xlib --disable-zlib --disable-securetransport --disable-faan --disable-alsa --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vda --disable-vdpau --disable-videotoolbox --disable-nvenc --disable-cuda --disable-cuvid --disable-v4l2_m2m --enable-decoder='vorbis,flac' --enable-decoder='pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le' --enable-decoder='pcm_s16be,pcm_s24be,pcm_mulaw,pcm_alaw' --enable-decoder='theora,vp8' --enable-demuxer='ogg,matroska,wav,flac' --enable-parser='opus,vorbis,flac' --enable-parser='vp3,vp8' --optflags='\"-O2\"' --enable-pic --enable-pic --enable-lto --cc=clang --cxx=clang++ --ld=clang --extra-ldflags='-fuse-ld=lld' --enable-decoder=vp9 --enable-parser=vp9 --sysroot=/usr/local/google/home/phosek/fuchsia/third_party/ffmpeg/../../buildtools/linux-x64/sysroot --enable-decoder='aac,h264,mp3' --enable-demuxer='aac,mp3,mov' --enable-parser='aac,h264,mpegaudio' --enable-decoder=mpeg4 --enable-parser='h263,mpeg4video' --enable-demuxer=avi --enable-demuxer=amr --enable-decoder='amrnb,amrwb' --enable-decoder=gsm_ms --enable-demuxer=gsm --enable-parser=gsm"
+#define FFMPEG_CONFIGURATION "--disable-everything --disable-all --disable-doc --disable-htmlpages --disable-manpages --disable-podpages --disable-txtpages --disable-static --enable-avcodec --enable-avformat --enable-avutil --enable-fft --enable-rdft --enable-static --disable-debug --disable-bzlib --disable-iconv --disable-lzo --disable-network --disable-schannel --disable-sdl2 --disable-symver --disable-xlib --disable-zlib --disable-securetransport --disable-faan --disable-alsa --disable-autodetect --enable-decoder='vorbis,flac' --enable-decoder='pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le,mp3' --enable-decoder='pcm_s16be,pcm_s24be,pcm_mulaw,pcm_alaw' --enable-decoder='theora,vp8,sbc,aptx' --enable-demuxer='ogg,matroska,wav,flac,mp3,mov' --enable-parser='opus,vorbis,flac,mpegaudio' --enable-parser='vp3,vp8' --optflags='\"-O2\"' --enable-pic --x86asmexe=yasm --enable-pic --enable-lto --cc=clang --cxx=clang++ --ld=clang --enable-decoder=vp9 --enable-parser=vp9 --sysroot=/usr/local/google/home/dalesat/fuchsia/third_party/ffmpeg/../../buildtools/linux-x64/sysroot --extra-ldflags='-fuse-ld=lld' --enable-decoder='aac,h264,mp3' --enable-demuxer='aac,mp3,mov' --enable-parser='aac,h264,mpegaudio' --enable-decoder=mpeg4 --enable-parser='h263,mpeg4video' --enable-demuxer=avi --enable-demuxer=amr --enable-decoder='amrnb,amrwb' --enable-decoder=gsm_ms --enable-demuxer=gsm --enable-parser=gsm"
 #define FFMPEG_LICENSE "LGPL version 2.1 or later"
 #define CONFIG_THIS_YEAR 2018
 #define FFMPEG_DATADIR "/usr/local/share/ffmpeg"
 #define AVCONV_DATADIR "/usr/local/share/ffmpeg"
-#define CC_IDENT "Fuchsia clang version 7.0.0 (https://fuchsia.googlesource.com/a/third_party/clang 5034f5fddab316b12887b39b129ebbca999500e2) (https://fuchsia.googlesource.com/a/third_party/llvm 197b6c81959a17be37035d4fe71b382023bff2f0) (based on LLVM 7.0.0svn)"
+#define CC_IDENT "Fuchsia clang version 8.0.0 (https://fuchsia.googlesource.com/a/third_party/clang 0a217961416a0cbf1ac29bcb26577d41ca0e0e8d) (https://fuchsia.googlesource.com/a/third_party/llvm aff6cf491087ba32e338c9af076c9b7739c978a0) (based on LLVM 8.0.0svn)"
 #define av_restrict restrict
 #define EXTERN_PREFIX ""
 #define EXTERN_ASM 
@@ -57,6 +57,7 @@
 #define HAVE_AMD3DNOWEXT 1
 #define HAVE_AVX 1
 #define HAVE_AVX2 1
+#define HAVE_AVX512 1
 #define HAVE_FMA3 1
 #define HAVE_FMA4 1
 #define HAVE_MMX 1
@@ -101,6 +102,7 @@
 #define HAVE_AMD3DNOWEXT_EXTERNAL 1
 #define HAVE_AVX_EXTERNAL 1
 #define HAVE_AVX2_EXTERNAL 1
+#define HAVE_AVX512_EXTERNAL 0
 #define HAVE_FMA3_EXTERNAL 1
 #define HAVE_FMA4_EXTERNAL 1
 #define HAVE_MMX_EXTERNAL 1
@@ -145,6 +147,7 @@
 #define HAVE_AMD3DNOWEXT_INLINE 1
 #define HAVE_AVX_INLINE 1
 #define HAVE_AVX2_INLINE 1
+#define HAVE_AVX512_INLINE 1
 #define HAVE_FMA3_INLINE 1
 #define HAVE_FMA4_INLINE 1
 #define HAVE_MMX_INLINE 1
@@ -174,36 +177,29 @@
 #define HAVE_FAST_64BIT 1
 #define HAVE_FAST_CLZ 1
 #define HAVE_FAST_CMOV 1
-#define HAVE_LOCAL_ALIGNED_8 1
-#define HAVE_LOCAL_ALIGNED_16 1
-#define HAVE_LOCAL_ALIGNED_32 1
+#define HAVE_LOCAL_ALIGNED 1
 #define HAVE_SIMD_ALIGN_16 1
 #define HAVE_SIMD_ALIGN_32 1
-#define HAVE_ATOMICS_GCC 1
-#define HAVE_ATOMICS_SUNCC 0
-#define HAVE_ATOMICS_WIN32 0
+#define HAVE_SIMD_ALIGN_64 1
 #define HAVE_ATOMIC_CAS_PTR 0
 #define HAVE_MACHINE_RW_BARRIER 0
 #define HAVE_MEMORYBARRIER 0
 #define HAVE_MM_EMPTY 1
 #define HAVE_RDTSC 0
-#define HAVE_SARESTART 1
 #define HAVE_SEM_TIMEDWAIT 1
 #define HAVE_SYNC_VAL_COMPARE_AND_SWAP 1
-#define HAVE_CABS 1
-#define HAVE_CEXP 1
+#define HAVE_CABS 0
+#define HAVE_CEXP 0
 #define HAVE_INLINE_ASM 1
 #define HAVE_SYMVER 0
 #define HAVE_X86ASM 1
 #define HAVE_BIGENDIAN 0
 #define HAVE_FAST_UNALIGNED 1
-#define HAVE_ALTIVEC_H 0
 #define HAVE_ARPA_INET_H 0
 #define HAVE_ASM_TYPES_H 1
 #define HAVE_CDIO_PARANOIA_H 0
 #define HAVE_CDIO_PARANOIA_PARANOIA_H 0
 #define HAVE_CUDA_H 0
-#define HAVE_D3D11_H 0
 #define HAVE_DISPATCH_DISPATCH_H 0
 #define HAVE_DEV_BKTR_IOCTL_BT848_H 0
 #define HAVE_DEV_BKTR_IOCTL_METEOR_H 0
@@ -212,27 +208,18 @@
 #define HAVE_DEV_VIDEO_METEOR_IOCTL_METEOR_H 0
 #define HAVE_DIRECT_H 0
 #define HAVE_DIRENT_H 1
-#define HAVE_DLFCN_H 1
 #define HAVE_DXGIDEBUG_H 0
 #define HAVE_DXVA_H 0
 #define HAVE_ES2_GL_H 0
 #define HAVE_GSM_H 0
 #define HAVE_IO_H 0
-#define HAVE_MACH_MACH_TIME_H 0
+#define HAVE_LINUX_PERF_EVENT_H 1
 #define HAVE_MACHINE_IOCTL_BT848_H 0
 #define HAVE_MACHINE_IOCTL_METEOR_H 0
 #define HAVE_MALLOC_H 1
 #define HAVE_OPENCV2_CORE_CORE_C_H 0
-#define HAVE_OPENJPEG_2_3_OPENJPEG_H 0
-#define HAVE_OPENJPEG_2_2_OPENJPEG_H 0
-#define HAVE_OPENJPEG_2_1_OPENJPEG_H 0
-#define HAVE_OPENJPEG_2_0_OPENJPEG_H 0
-#define HAVE_OPENJPEG_1_5_OPENJPEG_H 0
 #define HAVE_OPENGL_GL3_H 0
 #define HAVE_POLL_H 1
-#define HAVE_SOUNDCARD_H 0
-#define HAVE_STDATOMIC_H 1
-#define HAVE_SYS_MMAN_H 1
 #define HAVE_SYS_PARAM_H 1
 #define HAVE_SYS_RESOURCE_H 1
 #define HAVE_SYS_SELECT_H 1
@@ -276,17 +263,20 @@
 #define HAVE_SINF 1
 #define HAVE_TRUNC 1
 #define HAVE_TRUNCF 1
+#define HAVE_DOS_PATHS 0
+#define HAVE_LIBC_MSVCRT 0
+#define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
+#define HAVE_SECTION_DATA_REL_RO 1
+#define HAVE_THREADS 1
+#define HAVE_UWP 0
+#define HAVE_WINRT 0
 #define HAVE_ACCESS 1
 #define HAVE_ALIGNED_MALLOC 0
 #define HAVE_ARC4RANDOM 0
 #define HAVE_CLOCK_GETTIME 1
 #define HAVE_CLOSESOCKET 0
 #define HAVE_COMMANDLINETOARGVW 0
-#define HAVE_COTASKMEMFREE 0
-#define HAVE_CRYPTGENRANDOM 0
 #define HAVE_FCNTL 1
-#define HAVE_FLT_LIM 1
-#define HAVE_FORK 1
 #define HAVE_GETADDRINFO 0
 #define HAVE_GETHRTIME 0
 #define HAVE_GETOPT 1
@@ -301,9 +291,7 @@
 #define HAVE_GMTIME_R 1
 #define HAVE_INET_ATON 0
 #define HAVE_ISATTY 1
-#define HAVE_JACK_PORT_GET_LATENCY_RANGE 0
 #define HAVE_KBHIT 0
-#define HAVE_LOADLIBRARY 0
 #define HAVE_LOCALTIME_R 1
 #define HAVE_LSTAT 1
 #define HAVE_LZO1X_999_COMPRESS 0
@@ -318,6 +306,7 @@
 #define HAVE_POSIX_MEMALIGN 1
 #define HAVE_PTHREAD_CANCEL 1
 #define HAVE_SCHED_GETAFFINITY 1
+#define HAVE_SECITEMIMPORT 0
 #define HAVE_SETCONSOLETEXTATTRIBUTE 0
 #define HAVE_SETCONSOLECTRLHANDLER 0
 #define HAVE_SETMODE 0
@@ -330,16 +319,19 @@
 #define HAVE_UTGETOSTYPEFROMSTRING 0
 #define HAVE_VIRTUALALLOC 0
 #define HAVE_WGLGETPROCADDRESS 0
+#define HAVE_BCRYPT 0
+#define HAVE_VAAPI_DRM 0
+#define HAVE_VAAPI_X11 0
+#define HAVE_VDPAU_X11 0
 #define HAVE_PTHREADS 1
 #define HAVE_OS2THREADS 0
 #define HAVE_W32THREADS 0
+#define HAVE_AS_ARCH_DIRECTIVE 0
 #define HAVE_AS_DN_DIRECTIVE 0
 #define HAVE_AS_FPU_DIRECTIVE 0
 #define HAVE_AS_FUNC 0
 #define HAVE_AS_OBJECT_ARCH 0
 #define HAVE_ASM_MOD_Q 0
-#define HAVE_ATTRIBUTE_MAY_ALIAS 1
-#define HAVE_ATTRIBUTE_PACKED 1
 #define HAVE_BLOCKS_EXTENSION 0
 #define HAVE_EBP_AVAILABLE 1
 #define HAVE_EBX_AVAILABLE 1
@@ -356,7 +348,6 @@
 #define HAVE_VFP_ARGS 0
 #define HAVE_XFORM_ASM 0
 #define HAVE_XMM_CLOBBERS 1
-#define HAVE_CONDITION_VARIABLE_PTR 0
 #define HAVE_KCMVIDEOCODECTYPE_HEVC 0
 #define HAVE_SOCKLEN_T 0
 #define HAVE_STRUCT_ADDRINFO 0
@@ -372,22 +363,17 @@
 #define HAVE_STRUCT_SOCKADDR_STORAGE 0
 #define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1
 #define HAVE_STRUCT_V4L2_FRMIVALENUM_DISCRETE 1
-#define HAVE_ATOMICS_NATIVE 1
-#define HAVE_DOS_PATHS 0
-#define HAVE_LIBC_MSVCRT 0
 #define HAVE_MAKEINFO 1
 #define HAVE_MAKEINFO_HTML 1
-#define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
+#define HAVE_OPENCL_D3D11 0
+#define HAVE_OPENCL_DRM_ARM 0
+#define HAVE_OPENCL_DRM_BEIGNET 0
+#define HAVE_OPENCL_DXVA2 0
+#define HAVE_OPENCL_VAAPI_BEIGNET 0
+#define HAVE_OPENCL_VAAPI_INTEL_MEDIA 0
 #define HAVE_PERL 1
 #define HAVE_POD2MAN 1
-#define HAVE_SECTION_DATA_REL_RO 1
 #define HAVE_TEXI2HTML 0
-#define HAVE_THREADS 1
-#define HAVE_UWP 0
-#define HAVE_VAAPI_DRM 0
-#define HAVE_VAAPI_X11 0
-#define HAVE_VDPAU_X11 0
-#define HAVE_WINRT 0
 #define CONFIG_DOC 0
 #define CONFIG_HTMLPAGES 0
 #define CONFIG_MANPAGES 0
@@ -414,41 +400,31 @@
 #define CONFIG_SCALING_VIDEO_EXAMPLE 0
 #define CONFIG_TRANSCODE_AAC_EXAMPLE 0
 #define CONFIG_TRANSCODING_EXAMPLE 0
-#define CONFIG_ALSA 0
-#define CONFIG_APPKIT 0
-#define CONFIG_AVFOUNDATION 0
-#define CONFIG_BZLIB 0
-#define CONFIG_COREIMAGE 0
-#define CONFIG_ICONV 0
-#define CONFIG_JACK 0
-#define CONFIG_LIBXCB 1
-#define CONFIG_LIBXCB_SHM 1
-#define CONFIG_LIBXCB_SHAPE 0
-#define CONFIG_LIBXCB_XFIXES 0
-#define CONFIG_LZMA 1
-#define CONFIG_SCHANNEL 0
-#define CONFIG_SDL2 0
-#define CONFIG_SECURETRANSPORT 0
-#define CONFIG_SNDIO 0
-#define CONFIG_XLIB 0
-#define CONFIG_ZLIB 0
+#define CONFIG_VAAPI_ENCODE_EXAMPLE 0
+#define CONFIG_VAAPI_TRANSCODE_EXAMPLE 0
 #define CONFIG_AVISYNTH 0
 #define CONFIG_FREI0R 0
 #define CONFIG_LIBCDIO 0
+#define CONFIG_LIBDAVS2 0
 #define CONFIG_LIBRUBBERBAND 0
 #define CONFIG_LIBVIDSTAB 0
 #define CONFIG_LIBX264 0
 #define CONFIG_LIBX265 0
 #define CONFIG_LIBXAVS 0
+#define CONFIG_LIBXAVS2 0
 #define CONFIG_LIBXVID 0
 #define CONFIG_DECKLINK 0
 #define CONFIG_LIBNDI_NEWTEK 0
 #define CONFIG_LIBFDK_AAC 0
 #define CONFIG_OPENSSL 0
+#define CONFIG_LIBTLS 0
 #define CONFIG_GMP 0
+#define CONFIG_LIBLENSFUN 0
 #define CONFIG_LIBOPENCORE_AMRNB 0
 #define CONFIG_LIBOPENCORE_AMRWB 0
+#define CONFIG_LIBVMAF 0
 #define CONFIG_LIBVO_AMRWBENC 0
+#define CONFIG_MBEDTLS 0
 #define CONFIG_RKMPP 0
 #define CONFIG_LIBSMBCLIENT 0
 #define CONFIG_CHROMAPRINT 0
@@ -456,11 +432,13 @@
 #define CONFIG_GNUTLS 0
 #define CONFIG_JNI 0
 #define CONFIG_LADSPA 0
+#define CONFIG_LIBAOM 0
 #define CONFIG_LIBASS 0
 #define CONFIG_LIBBLURAY 0
 #define CONFIG_LIBBS2B 0
 #define CONFIG_LIBCACA 0
 #define CONFIG_LIBCELT 0
+#define CONFIG_LIBCODEC2 0
 #define CONFIG_LIBDC1394 0
 #define CONFIG_LIBDRM 0
 #define CONFIG_LIBFLITE 0
@@ -471,6 +449,8 @@
 #define CONFIG_LIBGSM 0
 #define CONFIG_LIBIEC61883 0
 #define CONFIG_LIBILBC 0
+#define CONFIG_LIBJACK 0
+#define CONFIG_LIBKLVANC 0
 #define CONFIG_LIBKVAZAAR 0
 #define CONFIG_LIBMODPLUG 0
 #define CONFIG_LIBMP3LAME 0
@@ -488,12 +468,13 @@
 #define CONFIG_LIBSNAPPY 0
 #define CONFIG_LIBSOXR 0
 #define CONFIG_LIBSPEEX 0
+#define CONFIG_LIBSRT 0
 #define CONFIG_LIBSSH 0
+#define CONFIG_LIBTENSORFLOW 0
 #define CONFIG_LIBTESSERACT 0
 #define CONFIG_LIBTHEORA 0
 #define CONFIG_LIBTWOLAME 0
 #define CONFIG_LIBV4L2 0
-#define CONFIG_LIBVMAF 0
 #define CONFIG_LIBVORBIS 0
 #define CONFIG_LIBVPX 0
 #define CONFIG_LIBWAVPACK 0
@@ -502,28 +483,49 @@
 #define CONFIG_LIBZIMG 0
 #define CONFIG_LIBZMQ 0
 #define CONFIG_LIBZVBI 0
+#define CONFIG_LV2 0
 #define CONFIG_MEDIACODEC 0
 #define CONFIG_OPENAL 0
-#define CONFIG_OPENCL 0
 #define CONFIG_OPENGL 0
+#define CONFIG_VAPOURSYNTH 0
+#define CONFIG_ALSA 1
+#define CONFIG_APPKIT 0
+#define CONFIG_AVFOUNDATION 0
+#define CONFIG_BZLIB 0
+#define CONFIG_COREIMAGE 0
+#define CONFIG_ICONV 0
+#define CONFIG_LIBXCB 0
+#define CONFIG_LIBXCB_SHM 0
+#define CONFIG_LIBXCB_SHAPE 0
+#define CONFIG_LIBXCB_XFIXES 0
+#define CONFIG_LZMA 0
+#define CONFIG_SCHANNEL 0
+#define CONFIG_SDL2 0
+#define CONFIG_SECURETRANSPORT 0
+#define CONFIG_SNDIO 0
+#define CONFIG_XLIB 0
+#define CONFIG_ZLIB 0
+#define CONFIG_CUDA_SDK 0
+#define CONFIG_LIBNPP 0
+#define CONFIG_LIBMFX 0
+#define CONFIG_MMAL 0
+#define CONFIG_OMX 0
+#define CONFIG_OPENCL 0
+#define CONFIG_AMF 0
 #define CONFIG_AUDIOTOOLBOX 0
 #define CONFIG_CRYSTALHD 0
 #define CONFIG_CUDA 0
 #define CONFIG_CUVID 0
 #define CONFIG_D3D11VA 0
 #define CONFIG_DXVA2 0
+#define CONFIG_FFNVCODEC 0
+#define CONFIG_NVDEC 0
 #define CONFIG_NVENC 0
 #define CONFIG_VAAPI 0
-#define CONFIG_VDA 0
 #define CONFIG_VDPAU 0
 #define CONFIG_VIDEOTOOLBOX 0
 #define CONFIG_V4L2_M2M 0
 #define CONFIG_XVMC 0
-#define CONFIG_CUDA_SDK 0
-#define CONFIG_LIBNPP 0
-#define CONFIG_LIBMFX 0
-#define CONFIG_MMAL 0
-#define CONFIG_OMX 0
 #define CONFIG_FTRAPV 0
 #define CONFIG_GRAY 0
 #define CONFIG_HARDCODED_TABLES 0
@@ -537,18 +539,17 @@
 #define CONFIG_GPL 0
 #define CONFIG_NONFREE 0
 #define CONFIG_VERSION3 0
-#define CONFIG_AVCODEC 1
 #define CONFIG_AVDEVICE 0
 #define CONFIG_AVFILTER 0
+#define CONFIG_SWSCALE 0
+#define CONFIG_POSTPROC 0
 #define CONFIG_AVFORMAT 1
+#define CONFIG_AVCODEC 1
+#define CONFIG_SWRESAMPLE 0
 #define CONFIG_AVRESAMPLE 0
 #define CONFIG_AVUTIL 1
-#define CONFIG_POSTPROC 0
-#define CONFIG_SWRESAMPLE 0
-#define CONFIG_SWSCALE 0
 #define CONFIG_FFPLAY 0
 #define CONFIG_FFPROBE 0
-#define CONFIG_FFSERVER 0
 #define CONFIG_FFMPEG 0
 #define CONFIG_DCT 1
 #define CONFIG_DWT 0
@@ -585,12 +586,21 @@
 #define CONFIG_PROTOCOLS 0
 #define CONFIG_AANDCTTABLES 0
 #define CONFIG_AC3DSP 0
-#define CONFIG_AUDIO_FRAME_QUEUE 0
+#define CONFIG_ADTS_HEADER 1
+#define CONFIG_AUDIO_FRAME_QUEUE 1
 #define CONFIG_AUDIODSP 0
 #define CONFIG_BLOCKDSP 1
 #define CONFIG_BSWAPDSP 0
 #define CONFIG_CABAC 1
+#define CONFIG_CBS 0
+#define CONFIG_CBS_AV1 0
+#define CONFIG_CBS_H264 0
+#define CONFIG_CBS_H265 0
+#define CONFIG_CBS_JPEG 0
+#define CONFIG_CBS_MPEG2 0
+#define CONFIG_CBS_VP9 0
 #define CONFIG_DIRAC_PARSE 1
+#define CONFIG_DNN 0
 #define CONFIG_DVPROFILE 0
 #define CONFIG_EXIF 1
 #define CONFIG_FAANDCT 0
@@ -640,6 +650,7 @@
 #define CONFIG_QSV 0
 #define CONFIG_QSVDEC 0
 #define CONFIG_QSVENC 0
+#define CONFIG_QSVVPP 0
 #define CONFIG_RANGECODER 0
 #define CONFIG_RIFFDEC 1
 #define CONFIG_RIFFENC 0
@@ -663,25 +674,35 @@
 #define CONFIG_WMA_FREQS 0
 #define CONFIG_WMV2DSP 0
 #define CONFIG_AAC_ADTSTOASC_BSF 0
+#define CONFIG_AV1_METADATA_BSF 0
 #define CONFIG_CHOMP_BSF 0
 #define CONFIG_DUMP_EXTRADATA_BSF 0
 #define CONFIG_DCA_CORE_BSF 0
+#define CONFIG_EAC3_CORE_BSF 0
 #define CONFIG_EXTRACT_EXTRADATA_BSF 0
+#define CONFIG_FILTER_UNITS_BSF 0
+#define CONFIG_H264_METADATA_BSF 0
 #define CONFIG_H264_MP4TOANNEXB_BSF 0
+#define CONFIG_H264_REDUNDANT_PPS_BSF 0
+#define CONFIG_HAPQA_EXTRACT_BSF 0
+#define CONFIG_HEVC_METADATA_BSF 0
 #define CONFIG_HEVC_MP4TOANNEXB_BSF 0
 #define CONFIG_IMX_DUMP_HEADER_BSF 0
 #define CONFIG_MJPEG2JPEG_BSF 0
 #define CONFIG_MJPEGA_DUMP_HEADER_BSF 0
 #define CONFIG_MP3_HEADER_DECOMPRESS_BSF 0
+#define CONFIG_MPEG2_METADATA_BSF 0
 #define CONFIG_MPEG4_UNPACK_BFRAMES_BSF 0
 #define CONFIG_MOV2TEXTSUB_BSF 0
 #define CONFIG_NOISE_BSF 0
 #define CONFIG_NULL_BSF 1
 #define CONFIG_REMOVE_EXTRADATA_BSF 0
 #define CONFIG_TEXT2MOVSUB_BSF 0
+#define CONFIG_TRACE_HEADERS_BSF 0
+#define CONFIG_VP9_METADATA_BSF 0
 #define CONFIG_VP9_RAW_REORDER_BSF 0
 #define CONFIG_VP9_SUPERFRAME_BSF 0
-#define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 0
+#define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 1
 #define CONFIG_AASC_DECODER 0
 #define CONFIG_AIC_DECODER 0
 #define CONFIG_ALIAS_PIX_DECODER 0
@@ -701,6 +722,7 @@
 #define CONFIG_BETHSOFTVID_DECODER 0
 #define CONFIG_BFI_DECODER 0
 #define CONFIG_BINK_DECODER 0
+#define CONFIG_BITPACKED_DECODER 0
 #define CONFIG_BMP_DECODER 0
 #define CONFIG_BMV_VIDEO_DECODER 0
 #define CONFIG_BRENDER_PIX_DECODER 0
@@ -766,8 +788,6 @@
 #define CONFIG_H264_MMAL_DECODER 0
 #define CONFIG_H264_QSV_DECODER 0
 #define CONFIG_H264_RKMPP_DECODER 0
-#define CONFIG_H264_VDA_DECODER 0
-#define CONFIG_H264_VDPAU_DECODER 0
 #define CONFIG_HAP_DECODER 0
 #define CONFIG_HEVC_DECODER 0
 #define CONFIG_HEVC_QSV_DECODER 0
@@ -779,6 +799,7 @@
 #define CONFIG_HUFFYUV_DECODER 0
 #define CONFIG_IDCIN_DECODER 0
 #define CONFIG_IFF_ILBM_DECODER 0
+#define CONFIG_IMM4_DECODER 0
 #define CONFIG_INDEO2_DECODER 0
 #define CONFIG_INDEO3_DECODER 0
 #define CONFIG_INDEO4_DECODER 0
@@ -799,17 +820,13 @@
 #define CONFIG_MJPEGB_DECODER 0
 #define CONFIG_MMVIDEO_DECODER 0
 #define CONFIG_MOTIONPIXELS_DECODER 0
-#define CONFIG_MPEG_XVMC_DECODER 0
 #define CONFIG_MPEG1VIDEO_DECODER 0
 #define CONFIG_MPEG2VIDEO_DECODER 0
 #define CONFIG_MPEG4_DECODER 1
 #define CONFIG_MPEG4_CRYSTALHD_DECODER 0
 #define CONFIG_MPEG4_V4L2M2M_DECODER 0
 #define CONFIG_MPEG4_MMAL_DECODER 0
-#define CONFIG_MPEG4_VDPAU_DECODER 0
 #define CONFIG_MPEGVIDEO_DECODER 0
-#define CONFIG_MPEG_VDPAU_DECODER 0
-#define CONFIG_MPEG1_VDPAU_DECODER 0
 #define CONFIG_MPEG1_V4L2M2M_DECODER 0
 #define CONFIG_MPEG2_MMAL_DECODER 0
 #define CONFIG_MPEG2_CRYSTALHD_DECODER 0
@@ -830,6 +847,7 @@
 #define CONFIG_MTS2_DECODER 0
 #define CONFIG_MVC1_DECODER 0
 #define CONFIG_MVC2_DECODER 0
+#define CONFIG_MWSC_DECODER 0
 #define CONFIG_MXPEG_DECODER 0
 #define CONFIG_NUV_DECODER 0
 #define CONFIG_PAF_VIDEO_DECODER 0
@@ -843,7 +861,7 @@
 #define CONFIG_PNG_DECODER 0
 #define CONFIG_PPM_DECODER 0
 #define CONFIG_PRORES_DECODER 0
-#define CONFIG_PRORES_LGPL_DECODER 0
+#define CONFIG_PROSUMER_DECODER 0
 #define CONFIG_PSD_DECODER 0
 #define CONFIG_PTX_DECODER 0
 #define CONFIG_QDRAW_DECODER 0
@@ -851,6 +869,7 @@
 #define CONFIG_QTRLE_DECODER 0
 #define CONFIG_R10K_DECODER 0
 #define CONFIG_R210_DECODER 0
+#define CONFIG_RASC_DECODER 0
 #define CONFIG_RAWVIDEO_DECODER 0
 #define CONFIG_RL2_DECODER 0
 #define CONFIG_ROQ_DECODER 0
@@ -903,7 +922,6 @@
 #define CONFIG_VBLE_DECODER 0
 #define CONFIG_VC1_DECODER 0
 #define CONFIG_VC1_CRYSTALHD_DECODER 0
-#define CONFIG_VC1_VDPAU_DECODER 0
 #define CONFIG_VC1IMAGE_DECODER 0
 #define CONFIG_VC1_MMAL_DECODER 0
 #define CONFIG_VC1_QSV_DECODER 0
@@ -924,14 +942,13 @@
 #define CONFIG_VP9_RKMPP_DECODER 0
 #define CONFIG_VP9_V4L2M2M_DECODER 0
 #define CONFIG_VQA_DECODER 0
-#define CONFIG_BITPACKED_DECODER 0
 #define CONFIG_WEBP_DECODER 0
+#define CONFIG_WCMV_DECODER 0
 #define CONFIG_WRAPPED_AVFRAME_DECODER 0
 #define CONFIG_WMV1_DECODER 0
 #define CONFIG_WMV2_DECODER 0
 #define CONFIG_WMV3_DECODER 0
 #define CONFIG_WMV3_CRYSTALHD_DECODER 0
-#define CONFIG_WMV3_VDPAU_DECODER 0
 #define CONFIG_WMV3IMAGE_DECODER 0
 #define CONFIG_WNV1_DECODER 0
 #define CONFIG_XAN_WC3_DECODER 0
@@ -959,11 +976,14 @@
 #define CONFIG_AMRNB_DECODER 1
 #define CONFIG_AMRWB_DECODER 1
 #define CONFIG_APE_DECODER 0
+#define CONFIG_APTX_DECODER 1
+#define CONFIG_APTX_HD_DECODER 0
 #define CONFIG_ATRAC1_DECODER 0
 #define CONFIG_ATRAC3_DECODER 0
 #define CONFIG_ATRAC3AL_DECODER 0
 #define CONFIG_ATRAC3P_DECODER 0
 #define CONFIG_ATRAC3PAL_DECODER 0
+#define CONFIG_ATRAC9_DECODER 0
 #define CONFIG_BINKAUDIO_DCT_DECODER 0
 #define CONFIG_BINKAUDIO_RDFT_DECODER 0
 #define CONFIG_BMV_AUDIO_DECODER 0
@@ -986,6 +1006,7 @@
 #define CONFIG_GSM_DECODER 0
 #define CONFIG_GSM_MS_DECODER 1
 #define CONFIG_IAC_DECODER 0
+#define CONFIG_ILBC_DECODER 0
 #define CONFIG_IMC_DECODER 0
 #define CONFIG_INTERPLAY_ACM_DECODER 0
 #define CONFIG_MACE3_DECODER 0
@@ -996,12 +1017,12 @@
 #define CONFIG_MP1FLOAT_DECODER 0
 #define CONFIG_MP2_DECODER 0
 #define CONFIG_MP2FLOAT_DECODER 0
-#define CONFIG_MP3_DECODER 1
 #define CONFIG_MP3FLOAT_DECODER 0
-#define CONFIG_MP3ADU_DECODER 0
+#define CONFIG_MP3_DECODER 1
 #define CONFIG_MP3ADUFLOAT_DECODER 0
-#define CONFIG_MP3ON4_DECODER 0
+#define CONFIG_MP3ADU_DECODER 0
 #define CONFIG_MP3ON4FLOAT_DECODER 0
+#define CONFIG_MP3ON4_DECODER 0
 #define CONFIG_MPC7_DECODER 0
 #define CONFIG_MPC8_DECODER 0
 #define CONFIG_NELLYMOSER_DECODER 0
@@ -1014,6 +1035,7 @@
 #define CONFIG_RA_144_DECODER 0
 #define CONFIG_RA_288_DECODER 0
 #define CONFIG_RALF_DECODER 0
+#define CONFIG_SBC_DECODER 1
 #define CONFIG_SHORTEN_DECODER 0
 #define CONFIG_SIPR_DECODER 0
 #define CONFIG_SMACKAUD_DECODER 0
@@ -1151,7 +1173,10 @@
 #define CONFIG_PCM_MULAW_AT_DECODER 0
 #define CONFIG_QDMC_AT_DECODER 0
 #define CONFIG_QDM2_AT_DECODER 0
+#define CONFIG_LIBAOM_AV1_DECODER 0
 #define CONFIG_LIBCELT_DECODER 0
+#define CONFIG_LIBCODEC2_DECODER 0
+#define CONFIG_LIBDAVS2_DECODER 0
 #define CONFIG_LIBFDK_AAC_DECODER 0
 #define CONFIG_LIBGSM_DECODER 0
 #define CONFIG_LIBGSM_MS_DECODER 0
@@ -1184,6 +1209,748 @@
 #define CONFIG_VP8_QSV_DECODER 0
 #define CONFIG_VP9_CUVID_DECODER 0
 #define CONFIG_VP9_MEDIACODEC_DECODER 0
+#define CONFIG_A64MULTI_ENCODER 0
+#define CONFIG_A64MULTI5_ENCODER 0
+#define CONFIG_ALIAS_PIX_ENCODER 0
+#define CONFIG_AMV_ENCODER 0
+#define CONFIG_APNG_ENCODER 0
+#define CONFIG_ASV1_ENCODER 0
+#define CONFIG_ASV2_ENCODER 0
+#define CONFIG_AVRP_ENCODER 0
+#define CONFIG_AVUI_ENCODER 0
+#define CONFIG_AYUV_ENCODER 0
+#define CONFIG_BMP_ENCODER 0
+#define CONFIG_CINEPAK_ENCODER 0
+#define CONFIG_CLJR_ENCODER 0
+#define CONFIG_COMFORTNOISE_ENCODER 0
+#define CONFIG_DNXHD_ENCODER 0
+#define CONFIG_DPX_ENCODER 0
+#define CONFIG_DVVIDEO_ENCODER 0
+#define CONFIG_FFV1_ENCODER 0
+#define CONFIG_FFVHUFF_ENCODER 0
+#define CONFIG_FITS_ENCODER 0
+#define CONFIG_FLASHSV_ENCODER 0
+#define CONFIG_FLASHSV2_ENCODER 0
+#define CONFIG_FLV_ENCODER 0
+#define CONFIG_GIF_ENCODER 0
+#define CONFIG_H261_ENCODER 0
+#define CONFIG_H263_ENCODER 0
+#define CONFIG_H263P_ENCODER 0
+#define CONFIG_HAP_ENCODER 0
+#define CONFIG_HUFFYUV_ENCODER 0
+#define CONFIG_JPEG2000_ENCODER 0
+#define CONFIG_JPEGLS_ENCODER 0
+#define CONFIG_LJPEG_ENCODER 0
+#define CONFIG_MAGICYUV_ENCODER 0
+#define CONFIG_MJPEG_ENCODER 0
+#define CONFIG_MPEG1VIDEO_ENCODER 0
+#define CONFIG_MPEG2VIDEO_ENCODER 0
+#define CONFIG_MPEG4_ENCODER 0
+#define CONFIG_MSMPEG4V2_ENCODER 0
+#define CONFIG_MSMPEG4V3_ENCODER 0
+#define CONFIG_MSVIDEO1_ENCODER 0
+#define CONFIG_PAM_ENCODER 0
+#define CONFIG_PBM_ENCODER 0
+#define CONFIG_PCX_ENCODER 0
+#define CONFIG_PGM_ENCODER 0
+#define CONFIG_PGMYUV_ENCODER 0
+#define CONFIG_PNG_ENCODER 0
+#define CONFIG_PPM_ENCODER 0
+#define CONFIG_PRORES_ENCODER 0
+#define CONFIG_PRORES_AW_ENCODER 0
+#define CONFIG_PRORES_KS_ENCODER 0
+#define CONFIG_QTRLE_ENCODER 0
+#define CONFIG_R10K_ENCODER 0
+#define CONFIG_R210_ENCODER 0
+#define CONFIG_RAWVIDEO_ENCODER 0
+#define CONFIG_ROQ_ENCODER 0
+#define CONFIG_RV10_ENCODER 0
+#define CONFIG_RV20_ENCODER 0
+#define CONFIG_S302M_ENCODER 0
+#define CONFIG_SGI_ENCODER 0
+#define CONFIG_SNOW_ENCODER 0
+#define CONFIG_SUNRAST_ENCODER 0
+#define CONFIG_SVQ1_ENCODER 0
+#define CONFIG_TARGA_ENCODER 0
+#define CONFIG_TIFF_ENCODER 0
+#define CONFIG_UTVIDEO_ENCODER 0
+#define CONFIG_V210_ENCODER 0
+#define CONFIG_V308_ENCODER 0
+#define CONFIG_V408_ENCODER 0
+#define CONFIG_V410_ENCODER 0
+#define CONFIG_VC2_ENCODER 0
+#define CONFIG_WRAPPED_AVFRAME_ENCODER 0
+#define CONFIG_WMV1_ENCODER 0
+#define CONFIG_WMV2_ENCODER 0
+#define CONFIG_XBM_ENCODER 0
+#define CONFIG_XFACE_ENCODER 0
+#define CONFIG_XWD_ENCODER 0
+#define CONFIG_Y41P_ENCODER 0
+#define CONFIG_YUV4_ENCODER 0
+#define CONFIG_ZLIB_ENCODER 0
+#define CONFIG_ZMBV_ENCODER 0
+#define CONFIG_AAC_ENCODER 0
+#define CONFIG_AC3_ENCODER 0
+#define CONFIG_AC3_FIXED_ENCODER 0
+#define CONFIG_ALAC_ENCODER 0
+#define CONFIG_APTX_ENCODER 0
+#define CONFIG_APTX_HD_ENCODER 0
+#define CONFIG_DCA_ENCODER 0
+#define CONFIG_EAC3_ENCODER 0
+#define CONFIG_FLAC_ENCODER 0
+#define CONFIG_G723_1_ENCODER 0
+#define CONFIG_MLP_ENCODER 0
+#define CONFIG_MP2_ENCODER 0
+#define CONFIG_MP2FIXED_ENCODER 0
+#define CONFIG_NELLYMOSER_ENCODER 0
+#define CONFIG_OPUS_ENCODER 0
+#define CONFIG_RA_144_ENCODER 0
+#define CONFIG_SBC_ENCODER 0
+#define CONFIG_SONIC_ENCODER 0
+#define CONFIG_SONIC_LS_ENCODER 0
+#define CONFIG_TRUEHD_ENCODER 0
+#define CONFIG_TTA_ENCODER 0
+#define CONFIG_VORBIS_ENCODER 0
+#define CONFIG_WAVPACK_ENCODER 0
+#define CONFIG_WMAV1_ENCODER 0
+#define CONFIG_WMAV2_ENCODER 0
+#define CONFIG_PCM_ALAW_ENCODER 0
+#define CONFIG_PCM_F32BE_ENCODER 0
+#define CONFIG_PCM_F32LE_ENCODER 0
+#define CONFIG_PCM_F64BE_ENCODER 0
+#define CONFIG_PCM_F64LE_ENCODER 0
+#define CONFIG_PCM_MULAW_ENCODER 0
+#define CONFIG_PCM_S8_ENCODER 0
+#define CONFIG_PCM_S8_PLANAR_ENCODER 0
+#define CONFIG_PCM_S16BE_ENCODER 0
+#define CONFIG_PCM_S16BE_PLANAR_ENCODER 0
+#define CONFIG_PCM_S16LE_ENCODER 0
+#define CONFIG_PCM_S16LE_PLANAR_ENCODER 0
+#define CONFIG_PCM_S24BE_ENCODER 0
+#define CONFIG_PCM_S24DAUD_ENCODER 0
+#define CONFIG_PCM_S24LE_ENCODER 0
+#define CONFIG_PCM_S24LE_PLANAR_ENCODER 0
+#define CONFIG_PCM_S32BE_ENCODER 0
+#define CONFIG_PCM_S32LE_ENCODER 0
+#define CONFIG_PCM_S32LE_PLANAR_ENCODER 0
+#define CONFIG_PCM_S64BE_ENCODER 0
+#define CONFIG_PCM_S64LE_ENCODER 0
+#define CONFIG_PCM_U8_ENCODER 0
+#define CONFIG_PCM_U16BE_ENCODER 0
+#define CONFIG_PCM_U16LE_ENCODER 0
+#define CONFIG_PCM_U24BE_ENCODER 0
+#define CONFIG_PCM_U24LE_ENCODER 0
+#define CONFIG_PCM_U32BE_ENCODER 0
+#define CONFIG_PCM_U32LE_ENCODER 0
+#define CONFIG_ROQ_DPCM_ENCODER 0
+#define CONFIG_ADPCM_ADX_ENCODER 0
+#define CONFIG_ADPCM_G722_ENCODER 0
+#define CONFIG_ADPCM_G726_ENCODER 0
+#define CONFIG_ADPCM_G726LE_ENCODER 0
+#define CONFIG_ADPCM_IMA_QT_ENCODER 0
+#define CONFIG_ADPCM_IMA_WAV_ENCODER 0
+#define CONFIG_ADPCM_MS_ENCODER 0
+#define CONFIG_ADPCM_SWF_ENCODER 0
+#define CONFIG_ADPCM_YAMAHA_ENCODER 0
+#define CONFIG_SSA_ENCODER 0
+#define CONFIG_ASS_ENCODER 0
+#define CONFIG_DVBSUB_ENCODER 0
+#define CONFIG_DVDSUB_ENCODER 0
+#define CONFIG_MOVTEXT_ENCODER 0
+#define CONFIG_SRT_ENCODER 0
+#define CONFIG_SUBRIP_ENCODER 0
+#define CONFIG_TEXT_ENCODER 0
+#define CONFIG_WEBVTT_ENCODER 0
+#define CONFIG_XSUB_ENCODER 0
+#define CONFIG_AAC_AT_ENCODER 0
+#define CONFIG_ALAC_AT_ENCODER 0
+#define CONFIG_ILBC_AT_ENCODER 0
+#define CONFIG_PCM_ALAW_AT_ENCODER 0
+#define CONFIG_PCM_MULAW_AT_ENCODER 0
+#define CONFIG_LIBAOM_AV1_ENCODER 0
+#define CONFIG_LIBCODEC2_ENCODER 0
+#define CONFIG_LIBFDK_AAC_ENCODER 0
+#define CONFIG_LIBGSM_ENCODER 0
+#define CONFIG_LIBGSM_MS_ENCODER 0
+#define CONFIG_LIBILBC_ENCODER 0
+#define CONFIG_LIBMP3LAME_ENCODER 0
+#define CONFIG_LIBOPENCORE_AMRNB_ENCODER 0
+#define CONFIG_LIBOPENJPEG_ENCODER 0
+#define CONFIG_LIBOPUS_ENCODER 0
+#define CONFIG_LIBSHINE_ENCODER 0
+#define CONFIG_LIBSPEEX_ENCODER 0
+#define CONFIG_LIBTHEORA_ENCODER 0
+#define CONFIG_LIBTWOLAME_ENCODER 0
+#define CONFIG_LIBVO_AMRWBENC_ENCODER 0
+#define CONFIG_LIBVORBIS_ENCODER 0
+#define CONFIG_LIBVPX_VP8_ENCODER 0
+#define CONFIG_LIBVPX_VP9_ENCODER 0
+#define CONFIG_LIBWAVPACK_ENCODER 0
+#define CONFIG_LIBWEBP_ANIM_ENCODER 0
+#define CONFIG_LIBWEBP_ENCODER 0
+#define CONFIG_LIBX262_ENCODER 0
+#define CONFIG_LIBX264_ENCODER 0
+#define CONFIG_LIBX264RGB_ENCODER 0
+#define CONFIG_LIBX265_ENCODER 0
+#define CONFIG_LIBXAVS_ENCODER 0
+#define CONFIG_LIBXAVS2_ENCODER 0
+#define CONFIG_LIBXVID_ENCODER 0
+#define CONFIG_H263_V4L2M2M_ENCODER 0
+#define CONFIG_LIBOPENH264_ENCODER 0
+#define CONFIG_H264_AMF_ENCODER 0
+#define CONFIG_H264_NVENC_ENCODER 0
+#define CONFIG_H264_OMX_ENCODER 0
+#define CONFIG_H264_QSV_ENCODER 0
+#define CONFIG_H264_V4L2M2M_ENCODER 0
+#define CONFIG_H264_VAAPI_ENCODER 0
+#define CONFIG_H264_VIDEOTOOLBOX_ENCODER 0
+#define CONFIG_NVENC_ENCODER 0
+#define CONFIG_NVENC_H264_ENCODER 0
+#define CONFIG_NVENC_HEVC_ENCODER 0
+#define CONFIG_HEVC_AMF_ENCODER 0
+#define CONFIG_HEVC_NVENC_ENCODER 0
+#define CONFIG_HEVC_QSV_ENCODER 0
+#define CONFIG_HEVC_V4L2M2M_ENCODER 0
+#define CONFIG_HEVC_VAAPI_ENCODER 0
+#define CONFIG_HEVC_VIDEOTOOLBOX_ENCODER 0
+#define CONFIG_LIBKVAZAAR_ENCODER 0
+#define CONFIG_MJPEG_QSV_ENCODER 0
+#define CONFIG_MJPEG_VAAPI_ENCODER 0
+#define CONFIG_MPEG2_QSV_ENCODER 0
+#define CONFIG_MPEG2_VAAPI_ENCODER 0
+#define CONFIG_MPEG4_V4L2M2M_ENCODER 0
+#define CONFIG_VP8_V4L2M2M_ENCODER 0
+#define CONFIG_VP8_VAAPI_ENCODER 0
+#define CONFIG_VP9_VAAPI_ENCODER 0
+#define CONFIG_H263_VAAPI_HWACCEL 0
+#define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_H264_D3D11VA_HWACCEL 0
+#define CONFIG_H264_D3D11VA2_HWACCEL 0
+#define CONFIG_H264_DXVA2_HWACCEL 0
+#define CONFIG_H264_NVDEC_HWACCEL 0
+#define CONFIG_H264_VAAPI_HWACCEL 0
+#define CONFIG_H264_VDPAU_HWACCEL 0
+#define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_HEVC_D3D11VA_HWACCEL 0
+#define CONFIG_HEVC_D3D11VA2_HWACCEL 0
+#define CONFIG_HEVC_DXVA2_HWACCEL 0
+#define CONFIG_HEVC_NVDEC_HWACCEL 0
+#define CONFIG_HEVC_VAAPI_HWACCEL 0
+#define CONFIG_HEVC_VDPAU_HWACCEL 0
+#define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_MJPEG_NVDEC_HWACCEL 0
+#define CONFIG_MJPEG_VAAPI_HWACCEL 0
+#define CONFIG_MPEG1_NVDEC_HWACCEL 0
+#define CONFIG_MPEG1_VDPAU_HWACCEL 0
+#define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_MPEG1_XVMC_HWACCEL 0
+#define CONFIG_MPEG2_D3D11VA_HWACCEL 0
+#define CONFIG_MPEG2_D3D11VA2_HWACCEL 0
+#define CONFIG_MPEG2_NVDEC_HWACCEL 0
+#define CONFIG_MPEG2_DXVA2_HWACCEL 0
+#define CONFIG_MPEG2_VAAPI_HWACCEL 0
+#define CONFIG_MPEG2_VDPAU_HWACCEL 0
+#define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_MPEG2_XVMC_HWACCEL 0
+#define CONFIG_MPEG4_NVDEC_HWACCEL 0
+#define CONFIG_MPEG4_VAAPI_HWACCEL 0
+#define CONFIG_MPEG4_VDPAU_HWACCEL 0
+#define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0
+#define CONFIG_VC1_D3D11VA_HWACCEL 0
+#define CONFIG_VC1_D3D11VA2_HWACCEL 0
+#define CONFIG_VC1_DXVA2_HWACCEL 0
+#define CONFIG_VC1_NVDEC_HWACCEL 0
+#define CONFIG_VC1_VAAPI_HWACCEL 0
+#define CONFIG_VC1_VDPAU_HWACCEL 0
+#define CONFIG_VP8_NVDEC_HWACCEL 0
+#define CONFIG_VP8_VAAPI_HWACCEL 0
+#define CONFIG_VP9_D3D11VA_HWACCEL 0
+#define CONFIG_VP9_D3D11VA2_HWACCEL 0
+#define CONFIG_VP9_DXVA2_HWACCEL 0
+#define CONFIG_VP9_NVDEC_HWACCEL 0
+#define CONFIG_VP9_VAAPI_HWACCEL 0
+#define CONFIG_WMV3_D3D11VA_HWACCEL 0
+#define CONFIG_WMV3_D3D11VA2_HWACCEL 0
+#define CONFIG_WMV3_DXVA2_HWACCEL 0
+#define CONFIG_WMV3_NVDEC_HWACCEL 0
+#define CONFIG_WMV3_VAAPI_HWACCEL 0
+#define CONFIG_WMV3_VDPAU_HWACCEL 0
+#define CONFIG_AAC_PARSER 1
+#define CONFIG_AAC_LATM_PARSER 0
+#define CONFIG_AC3_PARSER 0
+#define CONFIG_ADX_PARSER 0
+#define CONFIG_AV1_PARSER 0
+#define CONFIG_AVS2_PARSER 0
+#define CONFIG_BMP_PARSER 0
+#define CONFIG_CAVSVIDEO_PARSER 0
+#define CONFIG_COOK_PARSER 0
+#define CONFIG_DCA_PARSER 0
+#define CONFIG_DIRAC_PARSER 0
+#define CONFIG_DNXHD_PARSER 0
+#define CONFIG_DPX_PARSER 0
+#define CONFIG_DVAUDIO_PARSER 0
+#define CONFIG_DVBSUB_PARSER 0
+#define CONFIG_DVDSUB_PARSER 0
+#define CONFIG_DVD_NAV_PARSER 0
+#define CONFIG_FLAC_PARSER 1
+#define CONFIG_G729_PARSER 0
+#define CONFIG_GSM_PARSER 1
+#define CONFIG_H261_PARSER 0
+#define CONFIG_H263_PARSER 1
+#define CONFIG_H264_PARSER 1
+#define CONFIG_HEVC_PARSER 0
+#define CONFIG_MJPEG_PARSER 0
+#define CONFIG_MLP_PARSER 0
+#define CONFIG_MPEG4VIDEO_PARSER 1
+#define CONFIG_MPEGAUDIO_PARSER 1
+#define CONFIG_MPEGVIDEO_PARSER 0
+#define CONFIG_OPUS_PARSER 1
+#define CONFIG_PNG_PARSER 0
+#define CONFIG_PNM_PARSER 0
+#define CONFIG_RV30_PARSER 0
+#define CONFIG_RV40_PARSER 0
+#define CONFIG_SBC_PARSER 0
+#define CONFIG_SIPR_PARSER 0
+#define CONFIG_TAK_PARSER 0
+#define CONFIG_VC1_PARSER 0
+#define CONFIG_VORBIS_PARSER 1
+#define CONFIG_VP3_PARSER 1
+#define CONFIG_VP8_PARSER 1
+#define CONFIG_VP9_PARSER 1
+#define CONFIG_XMA_PARSER 0
+#define CONFIG_ALSA_INDEV 0
+#define CONFIG_ANDROID_CAMERA_INDEV 0
+#define CONFIG_AVFOUNDATION_INDEV 0
+#define CONFIG_BKTR_INDEV 0
+#define CONFIG_DECKLINK_INDEV 0
+#define CONFIG_LIBNDI_NEWTEK_INDEV 0
+#define CONFIG_DSHOW_INDEV 0
+#define CONFIG_FBDEV_INDEV 0
+#define CONFIG_GDIGRAB_INDEV 0
+#define CONFIG_IEC61883_INDEV 0
+#define CONFIG_JACK_INDEV 0
+#define CONFIG_KMSGRAB_INDEV 0
+#define CONFIG_LAVFI_INDEV 0
+#define CONFIG_OPENAL_INDEV 0
+#define CONFIG_OSS_INDEV 0
+#define CONFIG_PULSE_INDEV 0
+#define CONFIG_SNDIO_INDEV 0
+#define CONFIG_V4L2_INDEV 0
+#define CONFIG_VFWCAP_INDEV 0
+#define CONFIG_XCBGRAB_INDEV 0
+#define CONFIG_LIBCDIO_INDEV 0
+#define CONFIG_LIBDC1394_INDEV 0
+#define CONFIG_ALSA_OUTDEV 0
+#define CONFIG_CACA_OUTDEV 0
+#define CONFIG_DECKLINK_OUTDEV 0
+#define CONFIG_LIBNDI_NEWTEK_OUTDEV 0
+#define CONFIG_FBDEV_OUTDEV 0
+#define CONFIG_OPENGL_OUTDEV 0
+#define CONFIG_OSS_OUTDEV 0
+#define CONFIG_PULSE_OUTDEV 0
+#define CONFIG_SDL2_OUTDEV 0
+#define CONFIG_SNDIO_OUTDEV 0
+#define CONFIG_V4L2_OUTDEV 0
+#define CONFIG_XV_OUTDEV 0
+#define CONFIG_ABENCH_FILTER 0
+#define CONFIG_ACOMPRESSOR_FILTER 0
+#define CONFIG_ACONTRAST_FILTER 0
+#define CONFIG_ACOPY_FILTER 0
+#define CONFIG_ACUE_FILTER 0
+#define CONFIG_ACROSSFADE_FILTER 0
+#define CONFIG_ACROSSOVER_FILTER 0
+#define CONFIG_ACRUSHER_FILTER 0
+#define CONFIG_ADECLICK_FILTER 0
+#define CONFIG_ADECLIP_FILTER 0
+#define CONFIG_ADELAY_FILTER 0
+#define CONFIG_ADERIVATIVE_FILTER 0
+#define CONFIG_AECHO_FILTER 0
+#define CONFIG_AEMPHASIS_FILTER 0
+#define CONFIG_AEVAL_FILTER 0
+#define CONFIG_AFADE_FILTER 0
+#define CONFIG_AFFTDN_FILTER 0
+#define CONFIG_AFFTFILT_FILTER 0
+#define CONFIG_AFIR_FILTER 0
+#define CONFIG_AFORMAT_FILTER 0
+#define CONFIG_AGATE_FILTER 0
+#define CONFIG_AIIR_FILTER 0
+#define CONFIG_AINTEGRAL_FILTER 0
+#define CONFIG_AINTERLEAVE_FILTER 0
+#define CONFIG_ALIMITER_FILTER 0
+#define CONFIG_ALLPASS_FILTER 0
+#define CONFIG_ALOOP_FILTER 0
+#define CONFIG_AMERGE_FILTER 0
+#define CONFIG_AMETADATA_FILTER 0
+#define CONFIG_AMIX_FILTER 0
+#define CONFIG_AMULTIPLY_FILTER 0
+#define CONFIG_ANEQUALIZER_FILTER 0
+#define CONFIG_ANULL_FILTER 0
+#define CONFIG_APAD_FILTER 0
+#define CONFIG_APERMS_FILTER 0
+#define CONFIG_APHASER_FILTER 0
+#define CONFIG_APULSATOR_FILTER 0
+#define CONFIG_AREALTIME_FILTER 0
+#define CONFIG_ARESAMPLE_FILTER 0
+#define CONFIG_AREVERSE_FILTER 0
+#define CONFIG_ASELECT_FILTER 0
+#define CONFIG_ASENDCMD_FILTER 0
+#define CONFIG_ASETNSAMPLES_FILTER 0
+#define CONFIG_ASETPTS_FILTER 0
+#define CONFIG_ASETRATE_FILTER 0
+#define CONFIG_ASETTB_FILTER 0
+#define CONFIG_ASHOWINFO_FILTER 0
+#define CONFIG_ASIDEDATA_FILTER 0
+#define CONFIG_ASPLIT_FILTER 0
+#define CONFIG_ASTATS_FILTER 0
+#define CONFIG_ASTREAMSELECT_FILTER 0
+#define CONFIG_ATEMPO_FILTER 0
+#define CONFIG_ATRIM_FILTER 0
+#define CONFIG_AZMQ_FILTER 0
+#define CONFIG_BANDPASS_FILTER 0
+#define CONFIG_BANDREJECT_FILTER 0
+#define CONFIG_BASS_FILTER 0
+#define CONFIG_BIQUAD_FILTER 0
+#define CONFIG_BS2B_FILTER 0
+#define CONFIG_CHANNELMAP_FILTER 0
+#define CONFIG_CHANNELSPLIT_FILTER 0
+#define CONFIG_CHORUS_FILTER 0
+#define CONFIG_COMPAND_FILTER 0
+#define CONFIG_COMPENSATIONDELAY_FILTER 0
+#define CONFIG_CROSSFEED_FILTER 0
+#define CONFIG_CRYSTALIZER_FILTER 0
+#define CONFIG_DCSHIFT_FILTER 0
+#define CONFIG_DRMETER_FILTER 0
+#define CONFIG_DYNAUDNORM_FILTER 0
+#define CONFIG_EARWAX_FILTER 0
+#define CONFIG_EBUR128_FILTER 0
+#define CONFIG_EQUALIZER_FILTER 0
+#define CONFIG_EXTRASTEREO_FILTER 0
+#define CONFIG_FIREQUALIZER_FILTER 0
+#define CONFIG_FLANGER_FILTER 0
+#define CONFIG_HAAS_FILTER 0
+#define CONFIG_HDCD_FILTER 0
+#define CONFIG_HEADPHONE_FILTER 0
+#define CONFIG_HIGHPASS_FILTER 0
+#define CONFIG_HIGHSHELF_FILTER 0
+#define CONFIG_JOIN_FILTER 0
+#define CONFIG_LADSPA_FILTER 0
+#define CONFIG_LOUDNORM_FILTER 0
+#define CONFIG_LOWPASS_FILTER 0
+#define CONFIG_LOWSHELF_FILTER 0
+#define CONFIG_LV2_FILTER 0
+#define CONFIG_MCOMPAND_FILTER 0
+#define CONFIG_PAN_FILTER 0
+#define CONFIG_REPLAYGAIN_FILTER 0
+#define CONFIG_RESAMPLE_FILTER 0
+#define CONFIG_RUBBERBAND_FILTER 0
+#define CONFIG_SIDECHAINCOMPRESS_FILTER 0
+#define CONFIG_SIDECHAINGATE_FILTER 0
+#define CONFIG_SILENCEDETECT_FILTER 0
+#define CONFIG_SILENCEREMOVE_FILTER 0
+#define CONFIG_SOFALIZER_FILTER 0
+#define CONFIG_STEREOTOOLS_FILTER 0
+#define CONFIG_STEREOWIDEN_FILTER 0
+#define CONFIG_SUPEREQUALIZER_FILTER 0
+#define CONFIG_SURROUND_FILTER 0
+#define CONFIG_TREBLE_FILTER 0
+#define CONFIG_TREMOLO_FILTER 0
+#define CONFIG_VIBRATO_FILTER 0
+#define CONFIG_VOLUME_FILTER 0
+#define CONFIG_VOLUMEDETECT_FILTER 0
+#define CONFIG_AEVALSRC_FILTER 0
+#define CONFIG_ANOISESRC_FILTER 0
+#define CONFIG_ANULLSRC_FILTER 0
+#define CONFIG_FLITE_FILTER 0
+#define CONFIG_HILBERT_FILTER 0
+#define CONFIG_SINE_FILTER 0
+#define CONFIG_ANULLSINK_FILTER 0
+#define CONFIG_ALPHAEXTRACT_FILTER 0
+#define CONFIG_ALPHAMERGE_FILTER 0
+#define CONFIG_AMPLIFY_FILTER 0
+#define CONFIG_ASS_FILTER 0
+#define CONFIG_ATADENOISE_FILTER 0
+#define CONFIG_AVGBLUR_FILTER 0
+#define CONFIG_AVGBLUR_OPENCL_FILTER 0
+#define CONFIG_BBOX_FILTER 0
+#define CONFIG_BENCH_FILTER 0
+#define CONFIG_BITPLANENOISE_FILTER 0
+#define CONFIG_BLACKDETECT_FILTER 0
+#define CONFIG_BLACKFRAME_FILTER 0
+#define CONFIG_BLEND_FILTER 0
+#define CONFIG_BM3D_FILTER 0
+#define CONFIG_BOXBLUR_FILTER 0
+#define CONFIG_BOXBLUR_OPENCL_FILTER 0
+#define CONFIG_BWDIF_FILTER 0
+#define CONFIG_CHROMAKEY_FILTER 0
+#define CONFIG_CIESCOPE_FILTER 0
+#define CONFIG_CODECVIEW_FILTER 0
+#define CONFIG_COLORBALANCE_FILTER 0
+#define CONFIG_COLORCHANNELMIXER_FILTER 0
+#define CONFIG_COLORKEY_FILTER 0
+#define CONFIG_COLORLEVELS_FILTER 0
+#define CONFIG_COLORMATRIX_FILTER 0
+#define CONFIG_COLORSPACE_FILTER 0
+#define CONFIG_CONVOLUTION_FILTER 0
+#define CONFIG_CONVOLUTION_OPENCL_FILTER 0
+#define CONFIG_CONVOLVE_FILTER 0
+#define CONFIG_COPY_FILTER 0
+#define CONFIG_COREIMAGE_FILTER 0
+#define CONFIG_COVER_RECT_FILTER 0
+#define CONFIG_CROP_FILTER 0
+#define CONFIG_CROPDETECT_FILTER 0
+#define CONFIG_CUE_FILTER 0
+#define CONFIG_CURVES_FILTER 0
+#define CONFIG_DATASCOPE_FILTER 0
+#define CONFIG_DCTDNOIZ_FILTER 0
+#define CONFIG_DEBAND_FILTER 0
+#define CONFIG_DEBLOCK_FILTER 0
+#define CONFIG_DECIMATE_FILTER 0
+#define CONFIG_DECONVOLVE_FILTER 0
+#define CONFIG_DEFLATE_FILTER 0
+#define CONFIG_DEFLICKER_FILTER 0
+#define CONFIG_DEINTERLACE_QSV_FILTER 0
+#define CONFIG_DEINTERLACE_VAAPI_FILTER 0
+#define CONFIG_DEJUDDER_FILTER 0
+#define CONFIG_DELOGO_FILTER 0
+#define CONFIG_DENOISE_VAAPI_FILTER 0
+#define CONFIG_DESHAKE_FILTER 0
+#define CONFIG_DESPILL_FILTER 0
+#define CONFIG_DETELECINE_FILTER 0
+#define CONFIG_DILATION_FILTER 0
+#define CONFIG_DILATION_OPENCL_FILTER 0
+#define CONFIG_DISPLACE_FILTER 0
+#define CONFIG_DOUBLEWEAVE_FILTER 0
+#define CONFIG_DRAWBOX_FILTER 0
+#define CONFIG_DRAWGRAPH_FILTER 0
+#define CONFIG_DRAWGRID_FILTER 0
+#define CONFIG_DRAWTEXT_FILTER 0
+#define CONFIG_EDGEDETECT_FILTER 0
+#define CONFIG_ELBG_FILTER 0
+#define CONFIG_ENTROPY_FILTER 0
+#define CONFIG_EQ_FILTER 0
+#define CONFIG_EROSION_FILTER 0
+#define CONFIG_EROSION_OPENCL_FILTER 0
+#define CONFIG_EXTRACTPLANES_FILTER 0
+#define CONFIG_FADE_FILTER 0
+#define CONFIG_FFTDNOIZ_FILTER 0
+#define CONFIG_FFTFILT_FILTER 0
+#define CONFIG_FIELD_FILTER 0
+#define CONFIG_FIELDHINT_FILTER 0
+#define CONFIG_FIELDMATCH_FILTER 0
+#define CONFIG_FIELDORDER_FILTER 0
+#define CONFIG_FILLBORDERS_FILTER 0
+#define CONFIG_FIND_RECT_FILTER 0
+#define CONFIG_FLOODFILL_FILTER 0
+#define CONFIG_FORMAT_FILTER 0
+#define CONFIG_FPS_FILTER 0
+#define CONFIG_FRAMEPACK_FILTER 0
+#define CONFIG_FRAMERATE_FILTER 0
+#define CONFIG_FRAMESTEP_FILTER 0
+#define CONFIG_FREI0R_FILTER 0
+#define CONFIG_FSPP_FILTER 0
+#define CONFIG_GBLUR_FILTER 0
+#define CONFIG_GEQ_FILTER 0
+#define CONFIG_GRADFUN_FILTER 0
+#define CONFIG_GREYEDGE_FILTER 0
+#define CONFIG_HALDCLUT_FILTER 0
+#define CONFIG_HFLIP_FILTER 0
+#define CONFIG_HISTEQ_FILTER 0
+#define CONFIG_HISTOGRAM_FILTER 0
+#define CONFIG_HQDN3D_FILTER 0
+#define CONFIG_HQX_FILTER 0
+#define CONFIG_HSTACK_FILTER 0
+#define CONFIG_HUE_FILTER 0
+#define CONFIG_HWDOWNLOAD_FILTER 0
+#define CONFIG_HWMAP_FILTER 0
+#define CONFIG_HWUPLOAD_FILTER 0
+#define CONFIG_HWUPLOAD_CUDA_FILTER 0
+#define CONFIG_HYSTERESIS_FILTER 0
+#define CONFIG_IDET_FILTER 0
+#define CONFIG_IL_FILTER 0
+#define CONFIG_INFLATE_FILTER 0
+#define CONFIG_INTERLACE_FILTER 0
+#define CONFIG_INTERLEAVE_FILTER 0
+#define CONFIG_KERNDEINT_FILTER 0
+#define CONFIG_LENSCORRECTION_FILTER 0
+#define CONFIG_LENSFUN_FILTER 0
+#define CONFIG_LIBVMAF_FILTER 0
+#define CONFIG_LIMITER_FILTER 0
+#define CONFIG_LOOP_FILTER 0
+#define CONFIG_LUMAKEY_FILTER 0
+#define CONFIG_LUT_FILTER 0
+#define CONFIG_LUT1D_FILTER 0
+#define CONFIG_LUT2_FILTER 0
+#define CONFIG_LUT3D_FILTER 0
+#define CONFIG_LUTRGB_FILTER 0
+#define CONFIG_LUTYUV_FILTER 0
+#define CONFIG_MASKEDCLAMP_FILTER 0
+#define CONFIG_MASKEDMERGE_FILTER 0
+#define CONFIG_MCDEINT_FILTER 0
+#define CONFIG_MERGEPLANES_FILTER 0
+#define CONFIG_MESTIMATE_FILTER 0
+#define CONFIG_METADATA_FILTER 0
+#define CONFIG_MIDEQUALIZER_FILTER 0
+#define CONFIG_MINTERPOLATE_FILTER 0
+#define CONFIG_MIX_FILTER 0
+#define CONFIG_MPDECIMATE_FILTER 0
+#define CONFIG_NEGATE_FILTER 0
+#define CONFIG_NLMEANS_FILTER 0
+#define CONFIG_NNEDI_FILTER 0
+#define CONFIG_NOFORMAT_FILTER 0
+#define CONFIG_NOISE_FILTER 0
+#define CONFIG_NORMALIZE_FILTER 0
+#define CONFIG_NULL_FILTER 0
+#define CONFIG_OCR_FILTER 0
+#define CONFIG_OCV_FILTER 0
+#define CONFIG_OSCILLOSCOPE_FILTER 0
+#define CONFIG_OVERLAY_FILTER 0
+#define CONFIG_OVERLAY_OPENCL_FILTER 0
+#define CONFIG_OVERLAY_QSV_FILTER 0
+#define CONFIG_OWDENOISE_FILTER 0
+#define CONFIG_PAD_FILTER 0
+#define CONFIG_PALETTEGEN_FILTER 0
+#define CONFIG_PALETTEUSE_FILTER 0
+#define CONFIG_PERMS_FILTER 0
+#define CONFIG_PERSPECTIVE_FILTER 0
+#define CONFIG_PHASE_FILTER 0
+#define CONFIG_PIXDESCTEST_FILTER 0
+#define CONFIG_PIXSCOPE_FILTER 0
+#define CONFIG_PP_FILTER 0
+#define CONFIG_PP7_FILTER 0
+#define CONFIG_PREMULTIPLY_FILTER 0
+#define CONFIG_PREWITT_FILTER 0
+#define CONFIG_PREWITT_OPENCL_FILTER 0
+#define CONFIG_PROCAMP_VAAPI_FILTER 0
+#define CONFIG_PROGRAM_OPENCL_FILTER 0
+#define CONFIG_PSEUDOCOLOR_FILTER 0
+#define CONFIG_PSNR_FILTER 0
+#define CONFIG_PULLUP_FILTER 0
+#define CONFIG_QP_FILTER 0
+#define CONFIG_RANDOM_FILTER 0
+#define CONFIG_READEIA608_FILTER 0
+#define CONFIG_READVITC_FILTER 0
+#define CONFIG_REALTIME_FILTER 0
+#define CONFIG_REMAP_FILTER 0
+#define CONFIG_REMOVEGRAIN_FILTER 0
+#define CONFIG_REMOVELOGO_FILTER 0
+#define CONFIG_REPEATFIELDS_FILTER 0
+#define CONFIG_REVERSE_FILTER 0
+#define CONFIG_ROBERTS_FILTER 0
+#define CONFIG_ROBERTS_OPENCL_FILTER 0
+#define CONFIG_ROTATE_FILTER 0
+#define CONFIG_SAB_FILTER 0
+#define CONFIG_SCALE_FILTER 0
+#define CONFIG_SCALE_CUDA_FILTER 0
+#define CONFIG_SCALE_NPP_FILTER 0
+#define CONFIG_SCALE_QSV_FILTER 0
+#define CONFIG_SCALE_VAAPI_FILTER 0
+#define CONFIG_SCALE2REF_FILTER 0
+#define CONFIG_SELECT_FILTER 0
+#define CONFIG_SELECTIVECOLOR_FILTER 0
+#define CONFIG_SENDCMD_FILTER 0
+#define CONFIG_SEPARATEFIELDS_FILTER 0
+#define CONFIG_SETDAR_FILTER 0
+#define CONFIG_SETFIELD_FILTER 0
+#define CONFIG_SETPTS_FILTER 0
+#define CONFIG_SETRANGE_FILTER 0
+#define CONFIG_SETSAR_FILTER 0
+#define CONFIG_SETTB_FILTER 0
+#define CONFIG_SHARPNESS_VAAPI_FILTER 0
+#define CONFIG_SHOWINFO_FILTER 0
+#define CONFIG_SHOWPALETTE_FILTER 0
+#define CONFIG_SHUFFLEFRAMES_FILTER 0
+#define CONFIG_SHUFFLEPLANES_FILTER 0
+#define CONFIG_SIDEDATA_FILTER 0
+#define CONFIG_SIGNALSTATS_FILTER 0
+#define CONFIG_SIGNATURE_FILTER 0
+#define CONFIG_SMARTBLUR_FILTER 0
+#define CONFIG_SOBEL_FILTER 0
+#define CONFIG_SOBEL_OPENCL_FILTER 0
+#define CONFIG_SPLIT_FILTER 0
+#define CONFIG_SPP_FILTER 0
+#define CONFIG_SR_FILTER 0
+#define CONFIG_SSIM_FILTER 0
+#define CONFIG_STEREO3D_FILTER 0
+#define CONFIG_STREAMSELECT_FILTER 0
+#define CONFIG_SUBTITLES_FILTER 0
+#define CONFIG_SUPER2XSAI_FILTER 0
+#define CONFIG_SWAPRECT_FILTER 0
+#define CONFIG_SWAPUV_FILTER 0
+#define CONFIG_TBLEND_FILTER 0
+#define CONFIG_TELECINE_FILTER 0
+#define CONFIG_THRESHOLD_FILTER 0
+#define CONFIG_THUMBNAIL_FILTER 0
+#define CONFIG_THUMBNAIL_CUDA_FILTER 0
+#define CONFIG_TILE_FILTER 0
+#define CONFIG_TINTERLACE_FILTER 0
+#define CONFIG_TLUT2_FILTER 0
+#define CONFIG_TMIX_FILTER 0
+#define CONFIG_TONEMAP_FILTER 0
+#define CONFIG_TONEMAP_OPENCL_FILTER 0
+#define CONFIG_TRANSPOSE_FILTER 0
+#define CONFIG_TRANSPOSE_NPP_FILTER 0
+#define CONFIG_TRIM_FILTER 0
+#define CONFIG_UNPREMULTIPLY_FILTER 0
+#define CONFIG_UNSHARP_FILTER 0
+#define CONFIG_UNSHARP_OPENCL_FILTER 0
+#define CONFIG_USPP_FILTER 0
+#define CONFIG_VAGUEDENOISER_FILTER 0
+#define CONFIG_VECTORSCOPE_FILTER 0
+#define CONFIG_VFLIP_FILTER 0
+#define CONFIG_VFRDET_FILTER 0
+#define CONFIG_VIDSTABDETECT_FILTER 0
+#define CONFIG_VIDSTABTRANSFORM_FILTER 0
+#define CONFIG_VIGNETTE_FILTER 0
+#define CONFIG_VMAFMOTION_FILTER 0
+#define CONFIG_VPP_QSV_FILTER 0
+#define CONFIG_VSTACK_FILTER 0
+#define CONFIG_W3FDIF_FILTER 0
+#define CONFIG_WAVEFORM_FILTER 0
+#define CONFIG_WEAVE_FILTER 0
+#define CONFIG_XBR_FILTER 0
+#define CONFIG_YADIF_FILTER 0
+#define CONFIG_ZMQ_FILTER 0
+#define CONFIG_ZOOMPAN_FILTER 0
+#define CONFIG_ZSCALE_FILTER 0
+#define CONFIG_ALLRGB_FILTER 0
+#define CONFIG_ALLYUV_FILTER 0
+#define CONFIG_CELLAUTO_FILTER 0
+#define CONFIG_COLOR_FILTER 0
+#define CONFIG_COREIMAGESRC_FILTER 0
+#define CONFIG_FREI0R_SRC_FILTER 0
+#define CONFIG_HALDCLUTSRC_FILTER 0
+#define CONFIG_LIFE_FILTER 0
+#define CONFIG_MANDELBROT_FILTER 0
+#define CONFIG_MPTESTSRC_FILTER 0
+#define CONFIG_NULLSRC_FILTER 0
+#define CONFIG_OPENCLSRC_FILTER 0
+#define CONFIG_PAL75BARS_FILTER 0
+#define CONFIG_PAL100BARS_FILTER 0
+#define CONFIG_RGBTESTSRC_FILTER 0
+#define CONFIG_SMPTEBARS_FILTER 0
+#define CONFIG_SMPTEHDBARS_FILTER 0
+#define CONFIG_TESTSRC_FILTER 0
+#define CONFIG_TESTSRC2_FILTER 0
+#define CONFIG_YUVTESTSRC_FILTER 0
+#define CONFIG_NULLSINK_FILTER 0
+#define CONFIG_ABITSCOPE_FILTER 0
+#define CONFIG_ADRAWGRAPH_FILTER 0
+#define CONFIG_AHISTOGRAM_FILTER 0
+#define CONFIG_APHASEMETER_FILTER 0
+#define CONFIG_AVECTORSCOPE_FILTER 0
+#define CONFIG_CONCAT_FILTER 0
+#define CONFIG_SHOWCQT_FILTER 0
+#define CONFIG_SHOWFREQS_FILTER 0
+#define CONFIG_SHOWSPECTRUM_FILTER 0
+#define CONFIG_SHOWSPECTRUMPIC_FILTER 0
+#define CONFIG_SHOWVOLUME_FILTER 0
+#define CONFIG_SHOWWAVES_FILTER 0
+#define CONFIG_SHOWWAVESPIC_FILTER 0
+#define CONFIG_SPECTRUMSYNTH_FILTER 0
+#define CONFIG_AMOVIE_FILTER 0
+#define CONFIG_MOVIE_FILTER 0
+#define CONFIG_AFIFO_FILTER 0
+#define CONFIG_FIFO_FILTER 0
 #define CONFIG_AA_DEMUXER 0
 #define CONFIG_AAC_DEMUXER 1
 #define CONFIG_AC3_DEMUXER 0
@@ -1198,10 +1965,14 @@
 #define CONFIG_AIFF_DEMUXER 0
 #define CONFIG_AIX_DEMUXER 0
 #define CONFIG_AMR_DEMUXER 1
+#define CONFIG_AMRNB_DEMUXER 0
+#define CONFIG_AMRWB_DEMUXER 0
 #define CONFIG_ANM_DEMUXER 0
 #define CONFIG_APC_DEMUXER 0
 #define CONFIG_APE_DEMUXER 0
 #define CONFIG_APNG_DEMUXER 0
+#define CONFIG_APTX_DEMUXER 0
+#define CONFIG_APTX_HD_DEMUXER 0
 #define CONFIG_AQTITLE_DEMUXER 0
 #define CONFIG_ASF_DEMUXER 0
 #define CONFIG_ASF_O_DEMUXER 0
@@ -1212,6 +1983,7 @@
 #define CONFIG_AVISYNTH_DEMUXER 0
 #define CONFIG_AVR_DEMUXER 0
 #define CONFIG_AVS_DEMUXER 0
+#define CONFIG_AVS2_DEMUXER 0
 #define CONFIG_BETHSOFTVID_DEMUXER 0
 #define CONFIG_BFI_DEMUXER 0
 #define CONFIG_BINTEXT_DEMUXER 0
@@ -1227,6 +1999,8 @@
 #define CONFIG_CDG_DEMUXER 0
 #define CONFIG_CDXL_DEMUXER 0
 #define CONFIG_CINE_DEMUXER 0
+#define CONFIG_CODEC2_DEMUXER 0
+#define CONFIG_CODEC2RAW_DEMUXER 0
 #define CONFIG_CONCAT_DEMUXER 0
 #define CONFIG_DASH_DEMUXER 0
 #define CONFIG_DATA_DEMUXER 0
@@ -1248,7 +2022,6 @@
 #define CONFIG_EA_CDATA_DEMUXER 0
 #define CONFIG_EAC3_DEMUXER 0
 #define CONFIG_EPAF_DEMUXER 0
-#define CONFIG_FFM_DEMUXER 0
 #define CONFIG_FFMETADATA_DEMUXER 0
 #define CONFIG_FILMSTRIP_DEMUXER 0
 #define CONFIG_FITS_DEMUXER 0
@@ -1330,6 +2103,7 @@
 #define CONFIG_MXG_DEMUXER 0
 #define CONFIG_NC_DEMUXER 0
 #define CONFIG_NISTSPHERE_DEMUXER 0
+#define CONFIG_NSP_DEMUXER 0
 #define CONFIG_NSV_DEMUXER 0
 #define CONFIG_NUT_DEMUXER 0
 #define CONFIG_NUV_DEMUXER 0
@@ -1376,6 +2150,7 @@
 #define CONFIG_S337M_DEMUXER 0
 #define CONFIG_SAMI_DEMUXER 0
 #define CONFIG_SAP_DEMUXER 0
+#define CONFIG_SBC_DEMUXER 0
 #define CONFIG_SBG_DEMUXER 0
 #define CONFIG_SCC_DEMUXER 0
 #define CONFIG_SDP_DEMUXER 0
@@ -1383,6 +2158,7 @@
 #define CONFIG_SDS_DEMUXER 0
 #define CONFIG_SDX_DEMUXER 0
 #define CONFIG_SEGAFILM_DEMUXER 0
+#define CONFIG_SER_DEMUXER 0
 #define CONFIG_SHORTEN_DEMUXER 0
 #define CONFIG_SIFF_DEMUXER 0
 #define CONFIG_SLN_DEMUXER 0
@@ -1410,6 +2186,7 @@
 #define CONFIG_TTA_DEMUXER 0
 #define CONFIG_TXD_DEMUXER 0
 #define CONFIG_TTY_DEMUXER 0
+#define CONFIG_TY_DEMUXER 0
 #define CONFIG_V210_DEMUXER 0
 #define CONFIG_V210X_DEMUXER 0
 #define CONFIG_VAG_DEMUXER 0
@@ -1463,642 +2240,11 @@
 #define CONFIG_IMAGE_TIFF_PIPE_DEMUXER 0
 #define CONFIG_IMAGE_WEBP_PIPE_DEMUXER 0
 #define CONFIG_IMAGE_XPM_PIPE_DEMUXER 0
+#define CONFIG_IMAGE_XWD_PIPE_DEMUXER 0
 #define CONFIG_LIBGME_DEMUXER 0
 #define CONFIG_LIBMODPLUG_DEMUXER 0
 #define CONFIG_LIBOPENMPT_DEMUXER 0
-#define CONFIG_A64MULTI_ENCODER 0
-#define CONFIG_A64MULTI5_ENCODER 0
-#define CONFIG_ALIAS_PIX_ENCODER 0
-#define CONFIG_AMV_ENCODER 0
-#define CONFIG_APNG_ENCODER 0
-#define CONFIG_ASV1_ENCODER 0
-#define CONFIG_ASV2_ENCODER 0
-#define CONFIG_AVRP_ENCODER 0
-#define CONFIG_AVUI_ENCODER 0
-#define CONFIG_AYUV_ENCODER 0
-#define CONFIG_BMP_ENCODER 0
-#define CONFIG_CINEPAK_ENCODER 0
-#define CONFIG_CLJR_ENCODER 0
-#define CONFIG_COMFORTNOISE_ENCODER 0
-#define CONFIG_DNXHD_ENCODER 0
-#define CONFIG_DPX_ENCODER 0
-#define CONFIG_DVVIDEO_ENCODER 0
-#define CONFIG_FFV1_ENCODER 0
-#define CONFIG_FFVHUFF_ENCODER 0
-#define CONFIG_FITS_ENCODER 0
-#define CONFIG_FLASHSV_ENCODER 0
-#define CONFIG_FLASHSV2_ENCODER 0
-#define CONFIG_FLV_ENCODER 0
-#define CONFIG_GIF_ENCODER 0
-#define CONFIG_H261_ENCODER 0
-#define CONFIG_H263_ENCODER 0
-#define CONFIG_H263P_ENCODER 0
-#define CONFIG_HAP_ENCODER 0
-#define CONFIG_HUFFYUV_ENCODER 0
-#define CONFIG_JPEG2000_ENCODER 0
-#define CONFIG_JPEGLS_ENCODER 0
-#define CONFIG_LJPEG_ENCODER 0
-#define CONFIG_MJPEG_ENCODER 0
-#define CONFIG_MPEG1VIDEO_ENCODER 0
-#define CONFIG_MPEG2VIDEO_ENCODER 0
-#define CONFIG_MPEG4_ENCODER 0
-#define CONFIG_MSMPEG4V2_ENCODER 0
-#define CONFIG_MSMPEG4V3_ENCODER 0
-#define CONFIG_MSVIDEO1_ENCODER 0
-#define CONFIG_PAM_ENCODER 0
-#define CONFIG_PBM_ENCODER 0
-#define CONFIG_PCX_ENCODER 0
-#define CONFIG_PGM_ENCODER 0
-#define CONFIG_PGMYUV_ENCODER 0
-#define CONFIG_PNG_ENCODER 0
-#define CONFIG_PPM_ENCODER 0
-#define CONFIG_PRORES_ENCODER 0
-#define CONFIG_PRORES_AW_ENCODER 0
-#define CONFIG_PRORES_KS_ENCODER 0
-#define CONFIG_QTRLE_ENCODER 0
-#define CONFIG_R10K_ENCODER 0
-#define CONFIG_R210_ENCODER 0
-#define CONFIG_RAWVIDEO_ENCODER 0
-#define CONFIG_ROQ_ENCODER 0
-#define CONFIG_RV10_ENCODER 0
-#define CONFIG_RV20_ENCODER 0
-#define CONFIG_S302M_ENCODER 0
-#define CONFIG_SGI_ENCODER 0
-#define CONFIG_SNOW_ENCODER 0
-#define CONFIG_SUNRAST_ENCODER 0
-#define CONFIG_SVQ1_ENCODER 0
-#define CONFIG_TARGA_ENCODER 0
-#define CONFIG_TIFF_ENCODER 0
-#define CONFIG_UTVIDEO_ENCODER 0
-#define CONFIG_V210_ENCODER 0
-#define CONFIG_V308_ENCODER 0
-#define CONFIG_V408_ENCODER 0
-#define CONFIG_V410_ENCODER 0
-#define CONFIG_VC2_ENCODER 0
-#define CONFIG_WRAPPED_AVFRAME_ENCODER 0
-#define CONFIG_WMV1_ENCODER 0
-#define CONFIG_WMV2_ENCODER 0
-#define CONFIG_XBM_ENCODER 0
-#define CONFIG_XFACE_ENCODER 0
-#define CONFIG_XWD_ENCODER 0
-#define CONFIG_Y41P_ENCODER 0
-#define CONFIG_YUV4_ENCODER 0
-#define CONFIG_ZLIB_ENCODER 0
-#define CONFIG_ZMBV_ENCODER 0
-#define CONFIG_AAC_ENCODER 0
-#define CONFIG_AC3_ENCODER 0
-#define CONFIG_AC3_FIXED_ENCODER 0
-#define CONFIG_ALAC_ENCODER 0
-#define CONFIG_DCA_ENCODER 0
-#define CONFIG_EAC3_ENCODER 0
-#define CONFIG_FLAC_ENCODER 0
-#define CONFIG_G723_1_ENCODER 0
-#define CONFIG_MLP_ENCODER 0
-#define CONFIG_MP2_ENCODER 0
-#define CONFIG_MP2FIXED_ENCODER 0
-#define CONFIG_NELLYMOSER_ENCODER 0
-#define CONFIG_OPUS_ENCODER 0
-#define CONFIG_RA_144_ENCODER 0
-#define CONFIG_SONIC_ENCODER 0
-#define CONFIG_SONIC_LS_ENCODER 0
-#define CONFIG_TRUEHD_ENCODER 0
-#define CONFIG_TTA_ENCODER 0
-#define CONFIG_VORBIS_ENCODER 0
-#define CONFIG_WAVPACK_ENCODER 0
-#define CONFIG_WMAV1_ENCODER 0
-#define CONFIG_WMAV2_ENCODER 0
-#define CONFIG_PCM_ALAW_ENCODER 0
-#define CONFIG_PCM_F32BE_ENCODER 0
-#define CONFIG_PCM_F32LE_ENCODER 0
-#define CONFIG_PCM_F64BE_ENCODER 0
-#define CONFIG_PCM_F64LE_ENCODER 0
-#define CONFIG_PCM_MULAW_ENCODER 0
-#define CONFIG_PCM_S8_ENCODER 0
-#define CONFIG_PCM_S8_PLANAR_ENCODER 0
-#define CONFIG_PCM_S16BE_ENCODER 0
-#define CONFIG_PCM_S16BE_PLANAR_ENCODER 0
-#define CONFIG_PCM_S16LE_ENCODER 0
-#define CONFIG_PCM_S16LE_PLANAR_ENCODER 0
-#define CONFIG_PCM_S24BE_ENCODER 0
-#define CONFIG_PCM_S24DAUD_ENCODER 0
-#define CONFIG_PCM_S24LE_ENCODER 0
-#define CONFIG_PCM_S24LE_PLANAR_ENCODER 0
-#define CONFIG_PCM_S32BE_ENCODER 0
-#define CONFIG_PCM_S32LE_ENCODER 0
-#define CONFIG_PCM_S32LE_PLANAR_ENCODER 0
-#define CONFIG_PCM_S64BE_ENCODER 0
-#define CONFIG_PCM_S64LE_ENCODER 0
-#define CONFIG_PCM_U8_ENCODER 0
-#define CONFIG_PCM_U16BE_ENCODER 0
-#define CONFIG_PCM_U16LE_ENCODER 0
-#define CONFIG_PCM_U24BE_ENCODER 0
-#define CONFIG_PCM_U24LE_ENCODER 0
-#define CONFIG_PCM_U32BE_ENCODER 0
-#define CONFIG_PCM_U32LE_ENCODER 0
-#define CONFIG_ROQ_DPCM_ENCODER 0
-#define CONFIG_ADPCM_ADX_ENCODER 0
-#define CONFIG_ADPCM_G722_ENCODER 0
-#define CONFIG_ADPCM_G726_ENCODER 0
-#define CONFIG_ADPCM_G726LE_ENCODER 0
-#define CONFIG_ADPCM_IMA_QT_ENCODER 0
-#define CONFIG_ADPCM_IMA_WAV_ENCODER 0
-#define CONFIG_ADPCM_MS_ENCODER 0
-#define CONFIG_ADPCM_SWF_ENCODER 0
-#define CONFIG_ADPCM_YAMAHA_ENCODER 0
-#define CONFIG_SSA_ENCODER 0
-#define CONFIG_ASS_ENCODER 0
-#define CONFIG_DVBSUB_ENCODER 0
-#define CONFIG_DVDSUB_ENCODER 0
-#define CONFIG_MOVTEXT_ENCODER 0
-#define CONFIG_SRT_ENCODER 0
-#define CONFIG_SUBRIP_ENCODER 0
-#define CONFIG_TEXT_ENCODER 0
-#define CONFIG_WEBVTT_ENCODER 0
-#define CONFIG_XSUB_ENCODER 0
-#define CONFIG_AAC_AT_ENCODER 0
-#define CONFIG_ALAC_AT_ENCODER 0
-#define CONFIG_ILBC_AT_ENCODER 0
-#define CONFIG_PCM_ALAW_AT_ENCODER 0
-#define CONFIG_PCM_MULAW_AT_ENCODER 0
-#define CONFIG_LIBFDK_AAC_ENCODER 0
-#define CONFIG_LIBGSM_ENCODER 0
-#define CONFIG_LIBGSM_MS_ENCODER 0
-#define CONFIG_LIBILBC_ENCODER 0
-#define CONFIG_LIBMP3LAME_ENCODER 0
-#define CONFIG_LIBOPENCORE_AMRNB_ENCODER 0
-#define CONFIG_LIBOPENJPEG_ENCODER 0
-#define CONFIG_LIBOPUS_ENCODER 0
-#define CONFIG_LIBSHINE_ENCODER 0
-#define CONFIG_LIBSPEEX_ENCODER 0
-#define CONFIG_LIBTHEORA_ENCODER 0
-#define CONFIG_LIBTWOLAME_ENCODER 0
-#define CONFIG_LIBVO_AMRWBENC_ENCODER 0
-#define CONFIG_LIBVORBIS_ENCODER 0
-#define CONFIG_LIBVPX_VP8_ENCODER 0
-#define CONFIG_LIBVPX_VP9_ENCODER 0
-#define CONFIG_LIBWAVPACK_ENCODER 0
-#define CONFIG_LIBWEBP_ANIM_ENCODER 0
-#define CONFIG_LIBWEBP_ENCODER 0
-#define CONFIG_LIBX262_ENCODER 0
-#define CONFIG_LIBX264_ENCODER 0
-#define CONFIG_LIBX264RGB_ENCODER 0
-#define CONFIG_LIBX265_ENCODER 0
-#define CONFIG_LIBXAVS_ENCODER 0
-#define CONFIG_LIBXVID_ENCODER 0
-#define CONFIG_H263_V4L2M2M_ENCODER 0
-#define CONFIG_LIBOPENH264_ENCODER 0
-#define CONFIG_H264_NVENC_ENCODER 0
-#define CONFIG_H264_OMX_ENCODER 0
-#define CONFIG_H264_QSV_ENCODER 0
-#define CONFIG_H264_V4L2M2M_ENCODER 0
-#define CONFIG_H264_VAAPI_ENCODER 0
-#define CONFIG_H264_VIDEOTOOLBOX_ENCODER 0
-#define CONFIG_NVENC_ENCODER 0
-#define CONFIG_NVENC_H264_ENCODER 0
-#define CONFIG_NVENC_HEVC_ENCODER 0
-#define CONFIG_HEVC_NVENC_ENCODER 0
-#define CONFIG_HEVC_QSV_ENCODER 0
-#define CONFIG_HEVC_V4L2M2M_ENCODER 0
-#define CONFIG_HEVC_VAAPI_ENCODER 0
-#define CONFIG_LIBKVAZAAR_ENCODER 0
-#define CONFIG_MJPEG_VAAPI_ENCODER 0
-#define CONFIG_MPEG2_QSV_ENCODER 0
-#define CONFIG_MPEG2_VAAPI_ENCODER 0
-#define CONFIG_MPEG4_V4L2M2M_ENCODER 0
-#define CONFIG_VP8_V4L2M2M_ENCODER 0
-#define CONFIG_VP8_VAAPI_ENCODER 0
-#define CONFIG_VP9_VAAPI_ENCODER 0
-#define CONFIG_ABENCH_FILTER 0
-#define CONFIG_ACOMPRESSOR_FILTER 0
-#define CONFIG_ACOPY_FILTER 0
-#define CONFIG_ACROSSFADE_FILTER 0
-#define CONFIG_ACRUSHER_FILTER 0
-#define CONFIG_ADELAY_FILTER 0
-#define CONFIG_AECHO_FILTER 0
-#define CONFIG_AEMPHASIS_FILTER 0
-#define CONFIG_AEVAL_FILTER 0
-#define CONFIG_AFADE_FILTER 0
-#define CONFIG_AFFTFILT_FILTER 0
-#define CONFIG_AFIR_FILTER 0
-#define CONFIG_AFORMAT_FILTER 0
-#define CONFIG_AGATE_FILTER 0
-#define CONFIG_AINTERLEAVE_FILTER 0
-#define CONFIG_ALIMITER_FILTER 0
-#define CONFIG_ALLPASS_FILTER 0
-#define CONFIG_ALOOP_FILTER 0
-#define CONFIG_AMERGE_FILTER 0
-#define CONFIG_AMETADATA_FILTER 0
-#define CONFIG_AMIX_FILTER 0
-#define CONFIG_ANEQUALIZER_FILTER 0
-#define CONFIG_ANULL_FILTER 0
-#define CONFIG_APAD_FILTER 0
-#define CONFIG_APERMS_FILTER 0
-#define CONFIG_APHASER_FILTER 0
-#define CONFIG_APULSATOR_FILTER 0
-#define CONFIG_AREALTIME_FILTER 0
-#define CONFIG_ARESAMPLE_FILTER 0
-#define CONFIG_AREVERSE_FILTER 0
-#define CONFIG_ASELECT_FILTER 0
-#define CONFIG_ASENDCMD_FILTER 0
-#define CONFIG_ASETNSAMPLES_FILTER 0
-#define CONFIG_ASETPTS_FILTER 0
-#define CONFIG_ASETRATE_FILTER 0
-#define CONFIG_ASETTB_FILTER 0
-#define CONFIG_ASHOWINFO_FILTER 0
-#define CONFIG_ASIDEDATA_FILTER 0
-#define CONFIG_ASPLIT_FILTER 0
-#define CONFIG_ASTATS_FILTER 0
-#define CONFIG_ASTREAMSELECT_FILTER 0
-#define CONFIG_ATEMPO_FILTER 0
-#define CONFIG_ATRIM_FILTER 0
-#define CONFIG_AZMQ_FILTER 0
-#define CONFIG_BANDPASS_FILTER 0
-#define CONFIG_BANDREJECT_FILTER 0
-#define CONFIG_BASS_FILTER 0
-#define CONFIG_BIQUAD_FILTER 0
-#define CONFIG_BS2B_FILTER 0
-#define CONFIG_CHANNELMAP_FILTER 0
-#define CONFIG_CHANNELSPLIT_FILTER 0
-#define CONFIG_CHORUS_FILTER 0
-#define CONFIG_COMPAND_FILTER 0
-#define CONFIG_COMPENSATIONDELAY_FILTER 0
-#define CONFIG_CROSSFEED_FILTER 0
-#define CONFIG_CRYSTALIZER_FILTER 0
-#define CONFIG_DCSHIFT_FILTER 0
-#define CONFIG_DYNAUDNORM_FILTER 0
-#define CONFIG_EARWAX_FILTER 0
-#define CONFIG_EBUR128_FILTER 0
-#define CONFIG_EQUALIZER_FILTER 0
-#define CONFIG_EXTRASTEREO_FILTER 0
-#define CONFIG_FIREQUALIZER_FILTER 0
-#define CONFIG_FLANGER_FILTER 0
-#define CONFIG_HAAS_FILTER 0
-#define CONFIG_HDCD_FILTER 0
-#define CONFIG_HEADPHONE_FILTER 0
-#define CONFIG_HIGHPASS_FILTER 0
-#define CONFIG_JOIN_FILTER 0
-#define CONFIG_LADSPA_FILTER 0
-#define CONFIG_LOUDNORM_FILTER 0
-#define CONFIG_LOWPASS_FILTER 0
-#define CONFIG_PAN_FILTER 0
-#define CONFIG_REPLAYGAIN_FILTER 0
-#define CONFIG_RESAMPLE_FILTER 0
-#define CONFIG_RUBBERBAND_FILTER 0
-#define CONFIG_SIDECHAINCOMPRESS_FILTER 0
-#define CONFIG_SIDECHAINGATE_FILTER 0
-#define CONFIG_SILENCEDETECT_FILTER 0
-#define CONFIG_SILENCEREMOVE_FILTER 0
-#define CONFIG_SOFALIZER_FILTER 0
-#define CONFIG_STEREOTOOLS_FILTER 0
-#define CONFIG_STEREOWIDEN_FILTER 0
-#define CONFIG_SUPEREQUALIZER_FILTER 0
-#define CONFIG_SURROUND_FILTER 0
-#define CONFIG_TREBLE_FILTER 0
-#define CONFIG_TREMOLO_FILTER 0
-#define CONFIG_VIBRATO_FILTER 0
-#define CONFIG_VOLUME_FILTER 0
-#define CONFIG_VOLUMEDETECT_FILTER 0
-#define CONFIG_AEVALSRC_FILTER 0
-#define CONFIG_ANOISESRC_FILTER 0
-#define CONFIG_ANULLSRC_FILTER 0
-#define CONFIG_FLITE_FILTER 0
-#define CONFIG_SINE_FILTER 0
-#define CONFIG_ANULLSINK_FILTER 0
-#define CONFIG_ALPHAEXTRACT_FILTER 0
-#define CONFIG_ALPHAMERGE_FILTER 0
-#define CONFIG_ASS_FILTER 0
-#define CONFIG_ATADENOISE_FILTER 0
-#define CONFIG_AVGBLUR_FILTER 0
-#define CONFIG_BBOX_FILTER 0
-#define CONFIG_BENCH_FILTER 0
-#define CONFIG_BITPLANENOISE_FILTER 0
-#define CONFIG_BLACKDETECT_FILTER 0
-#define CONFIG_BLACKFRAME_FILTER 0
-#define CONFIG_BLEND_FILTER 0
-#define CONFIG_BOXBLUR_FILTER 0
-#define CONFIG_BWDIF_FILTER 0
-#define CONFIG_CHROMAKEY_FILTER 0
-#define CONFIG_CIESCOPE_FILTER 0
-#define CONFIG_CODECVIEW_FILTER 0
-#define CONFIG_COLORBALANCE_FILTER 0
-#define CONFIG_COLORCHANNELMIXER_FILTER 0
-#define CONFIG_COLORKEY_FILTER 0
-#define CONFIG_COLORLEVELS_FILTER 0
-#define CONFIG_COLORMATRIX_FILTER 0
-#define CONFIG_COLORSPACE_FILTER 0
-#define CONFIG_CONVOLUTION_FILTER 0
-#define CONFIG_CONVOLVE_FILTER 0
-#define CONFIG_COPY_FILTER 0
-#define CONFIG_COREIMAGE_FILTER 0
-#define CONFIG_COVER_RECT_FILTER 0
-#define CONFIG_CROP_FILTER 0
-#define CONFIG_CROPDETECT_FILTER 0
-#define CONFIG_CURVES_FILTER 0
-#define CONFIG_DATASCOPE_FILTER 0
-#define CONFIG_DCTDNOIZ_FILTER 0
-#define CONFIG_DEBAND_FILTER 0
-#define CONFIG_DECIMATE_FILTER 0
-#define CONFIG_DEFLATE_FILTER 0
-#define CONFIG_DEFLICKER_FILTER 0
-#define CONFIG_DEINTERLACE_QSV_FILTER 0
-#define CONFIG_DEINTERLACE_VAAPI_FILTER 0
-#define CONFIG_DEJUDDER_FILTER 0
-#define CONFIG_DELOGO_FILTER 0
-#define CONFIG_DESHAKE_FILTER 0
-#define CONFIG_DESPILL_FILTER 0
-#define CONFIG_DETELECINE_FILTER 0
-#define CONFIG_DILATION_FILTER 0
-#define CONFIG_DISPLACE_FILTER 0
-#define CONFIG_DOUBLEWEAVE_FILTER 0
-#define CONFIG_DRAWBOX_FILTER 0
-#define CONFIG_DRAWGRAPH_FILTER 0
-#define CONFIG_DRAWGRID_FILTER 0
-#define CONFIG_DRAWTEXT_FILTER 0
-#define CONFIG_EDGEDETECT_FILTER 0
-#define CONFIG_ELBG_FILTER 0
-#define CONFIG_EQ_FILTER 0
-#define CONFIG_EROSION_FILTER 0
-#define CONFIG_EXTRACTPLANES_FILTER 0
-#define CONFIG_FADE_FILTER 0
-#define CONFIG_FFTFILT_FILTER 0
-#define CONFIG_FIELD_FILTER 0
-#define CONFIG_FIELDHINT_FILTER 0
-#define CONFIG_FIELDMATCH_FILTER 0
-#define CONFIG_FIELDORDER_FILTER 0
-#define CONFIG_FIND_RECT_FILTER 0
-#define CONFIG_FLOODFILL_FILTER 0
-#define CONFIG_FORMAT_FILTER 0
-#define CONFIG_FPS_FILTER 0
-#define CONFIG_FRAMEPACK_FILTER 0
-#define CONFIG_FRAMERATE_FILTER 0
-#define CONFIG_FRAMESTEP_FILTER 0
-#define CONFIG_FREI0R_FILTER 0
-#define CONFIG_FSPP_FILTER 0
-#define CONFIG_GBLUR_FILTER 0
-#define CONFIG_GEQ_FILTER 0
-#define CONFIG_GRADFUN_FILTER 0
-#define CONFIG_HALDCLUT_FILTER 0
-#define CONFIG_HFLIP_FILTER 0
-#define CONFIG_HISTEQ_FILTER 0
-#define CONFIG_HISTOGRAM_FILTER 0
-#define CONFIG_HQDN3D_FILTER 0
-#define CONFIG_HQX_FILTER 0
-#define CONFIG_HSTACK_FILTER 0
-#define CONFIG_HUE_FILTER 0
-#define CONFIG_HWDOWNLOAD_FILTER 0
-#define CONFIG_HWMAP_FILTER 0
-#define CONFIG_HWUPLOAD_FILTER 0
-#define CONFIG_HWUPLOAD_CUDA_FILTER 0
-#define CONFIG_HYSTERESIS_FILTER 0
-#define CONFIG_IDET_FILTER 0
-#define CONFIG_IL_FILTER 0
-#define CONFIG_INFLATE_FILTER 0
-#define CONFIG_INTERLACE_FILTER 0
-#define CONFIG_INTERLEAVE_FILTER 0
-#define CONFIG_KERNDEINT_FILTER 0
-#define CONFIG_LENSCORRECTION_FILTER 0
-#define CONFIG_LIBVMAF_FILTER 0
-#define CONFIG_LIMITER_FILTER 0
-#define CONFIG_LOOP_FILTER 0
-#define CONFIG_LUMAKEY_FILTER 0
-#define CONFIG_LUT_FILTER 0
-#define CONFIG_LUT2_FILTER 0
-#define CONFIG_LUT3D_FILTER 0
-#define CONFIG_LUTRGB_FILTER 0
-#define CONFIG_LUTYUV_FILTER 0
-#define CONFIG_MASKEDCLAMP_FILTER 0
-#define CONFIG_MASKEDMERGE_FILTER 0
-#define CONFIG_MCDEINT_FILTER 0
-#define CONFIG_MERGEPLANES_FILTER 0
-#define CONFIG_MESTIMATE_FILTER 0
-#define CONFIG_METADATA_FILTER 0
-#define CONFIG_MIDEQUALIZER_FILTER 0
-#define CONFIG_MINTERPOLATE_FILTER 0
-#define CONFIG_MPDECIMATE_FILTER 0
-#define CONFIG_NEGATE_FILTER 0
-#define CONFIG_NLMEANS_FILTER 0
-#define CONFIG_NNEDI_FILTER 0
-#define CONFIG_NOFORMAT_FILTER 0
-#define CONFIG_NOISE_FILTER 0
-#define CONFIG_NULL_FILTER 0
-#define CONFIG_OCR_FILTER 0
-#define CONFIG_OCV_FILTER 0
-#define CONFIG_OSCILLOSCOPE_FILTER 0
-#define CONFIG_OVERLAY_FILTER 0
-#define CONFIG_OWDENOISE_FILTER 0
-#define CONFIG_PAD_FILTER 0
-#define CONFIG_PALETTEGEN_FILTER 0
-#define CONFIG_PALETTEUSE_FILTER 0
-#define CONFIG_PERMS_FILTER 0
-#define CONFIG_PERSPECTIVE_FILTER 0
-#define CONFIG_PHASE_FILTER 0
-#define CONFIG_PIXDESCTEST_FILTER 0
-#define CONFIG_PIXSCOPE_FILTER 0
-#define CONFIG_PP_FILTER 0
-#define CONFIG_PP7_FILTER 0
-#define CONFIG_PREMULTIPLY_FILTER 0
-#define CONFIG_PREWITT_FILTER 0
-#define CONFIG_PSEUDOCOLOR_FILTER 0
-#define CONFIG_PSNR_FILTER 0
-#define CONFIG_PULLUP_FILTER 0
-#define CONFIG_QP_FILTER 0
-#define CONFIG_RANDOM_FILTER 0
-#define CONFIG_READEIA608_FILTER 0
-#define CONFIG_READVITC_FILTER 0
-#define CONFIG_REALTIME_FILTER 0
-#define CONFIG_REMAP_FILTER 0
-#define CONFIG_REMOVEGRAIN_FILTER 0
-#define CONFIG_REMOVELOGO_FILTER 0
-#define CONFIG_REPEATFIELDS_FILTER 0
-#define CONFIG_REVERSE_FILTER 0
-#define CONFIG_ROBERTS_FILTER 0
-#define CONFIG_ROTATE_FILTER 0
-#define CONFIG_SAB_FILTER 0
-#define CONFIG_SCALE_FILTER 0
-#define CONFIG_SCALE_CUDA_FILTER 0
-#define CONFIG_SCALE_NPP_FILTER 0
-#define CONFIG_SCALE_QSV_FILTER 0
-#define CONFIG_SCALE_VAAPI_FILTER 0
-#define CONFIG_SCALE2REF_FILTER 0
-#define CONFIG_SELECT_FILTER 0
-#define CONFIG_SELECTIVECOLOR_FILTER 0
-#define CONFIG_SENDCMD_FILTER 0
-#define CONFIG_SEPARATEFIELDS_FILTER 0
-#define CONFIG_SETDAR_FILTER 0
-#define CONFIG_SETFIELD_FILTER 0
-#define CONFIG_SETPTS_FILTER 0
-#define CONFIG_SETSAR_FILTER 0
-#define CONFIG_SETTB_FILTER 0
-#define CONFIG_SHOWINFO_FILTER 0
-#define CONFIG_SHOWPALETTE_FILTER 0
-#define CONFIG_SHUFFLEFRAMES_FILTER 0
-#define CONFIG_SHUFFLEPLANES_FILTER 0
-#define CONFIG_SIDEDATA_FILTER 0
-#define CONFIG_SIGNALSTATS_FILTER 0
-#define CONFIG_SIGNATURE_FILTER 0
-#define CONFIG_SMARTBLUR_FILTER 0
-#define CONFIG_SOBEL_FILTER 0
-#define CONFIG_SPLIT_FILTER 0
-#define CONFIG_SPP_FILTER 0
-#define CONFIG_SSIM_FILTER 0
-#define CONFIG_STEREO3D_FILTER 0
-#define CONFIG_STREAMSELECT_FILTER 0
-#define CONFIG_SUBTITLES_FILTER 0
-#define CONFIG_SUPER2XSAI_FILTER 0
-#define CONFIG_SWAPRECT_FILTER 0
-#define CONFIG_SWAPUV_FILTER 0
-#define CONFIG_TBLEND_FILTER 0
-#define CONFIG_TELECINE_FILTER 0
-#define CONFIG_THRESHOLD_FILTER 0
-#define CONFIG_THUMBNAIL_FILTER 0
-#define CONFIG_THUMBNAIL_CUDA_FILTER 0
-#define CONFIG_TILE_FILTER 0
-#define CONFIG_TINTERLACE_FILTER 0
-#define CONFIG_TLUT2_FILTER 0
-#define CONFIG_TONEMAP_FILTER 0
-#define CONFIG_TRANSPOSE_FILTER 0
-#define CONFIG_TRIM_FILTER 0
-#define CONFIG_UNPREMULTIPLY_FILTER 0
-#define CONFIG_UNSHARP_FILTER 0
-#define CONFIG_USPP_FILTER 0
-#define CONFIG_VAGUEDENOISER_FILTER 0
-#define CONFIG_VECTORSCOPE_FILTER 0
-#define CONFIG_VFLIP_FILTER 0
-#define CONFIG_VIDSTABDETECT_FILTER 0
-#define CONFIG_VIDSTABTRANSFORM_FILTER 0
-#define CONFIG_VIGNETTE_FILTER 0
-#define CONFIG_VMAFMOTION_FILTER 0
-#define CONFIG_VSTACK_FILTER 0
-#define CONFIG_W3FDIF_FILTER 0
-#define CONFIG_WAVEFORM_FILTER 0
-#define CONFIG_WEAVE_FILTER 0
-#define CONFIG_XBR_FILTER 0
-#define CONFIG_YADIF_FILTER 0
-#define CONFIG_ZMQ_FILTER 0
-#define CONFIG_ZOOMPAN_FILTER 0
-#define CONFIG_ZSCALE_FILTER 0
-#define CONFIG_ALLRGB_FILTER 0
-#define CONFIG_ALLYUV_FILTER 0
-#define CONFIG_CELLAUTO_FILTER 0
-#define CONFIG_COLOR_FILTER 0
-#define CONFIG_COREIMAGESRC_FILTER 0
-#define CONFIG_FREI0R_SRC_FILTER 0
-#define CONFIG_HALDCLUTSRC_FILTER 0
-#define CONFIG_LIFE_FILTER 0
-#define CONFIG_MANDELBROT_FILTER 0
-#define CONFIG_MPTESTSRC_FILTER 0
-#define CONFIG_NULLSRC_FILTER 0
-#define CONFIG_RGBTESTSRC_FILTER 0
-#define CONFIG_SMPTEBARS_FILTER 0
-#define CONFIG_SMPTEHDBARS_FILTER 0
-#define CONFIG_TESTSRC_FILTER 0
-#define CONFIG_TESTSRC2_FILTER 0
-#define CONFIG_YUVTESTSRC_FILTER 0
-#define CONFIG_NULLSINK_FILTER 0
-#define CONFIG_ABITSCOPE_FILTER 0
-#define CONFIG_ADRAWGRAPH_FILTER 0
-#define CONFIG_AHISTOGRAM_FILTER 0
-#define CONFIG_APHASEMETER_FILTER 0
-#define CONFIG_AVECTORSCOPE_FILTER 0
-#define CONFIG_CONCAT_FILTER 0
-#define CONFIG_SHOWCQT_FILTER 0
-#define CONFIG_SHOWFREQS_FILTER 0
-#define CONFIG_SHOWSPECTRUM_FILTER 0
-#define CONFIG_SHOWSPECTRUMPIC_FILTER 0
-#define CONFIG_SHOWVOLUME_FILTER 0
-#define CONFIG_SHOWWAVES_FILTER 0
-#define CONFIG_SHOWWAVESPIC_FILTER 0
-#define CONFIG_SPECTRUMSYNTH_FILTER 0
-#define CONFIG_AMOVIE_FILTER 0
-#define CONFIG_MOVIE_FILTER 0
-#define CONFIG_H263_VAAPI_HWACCEL 0
-#define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_H264_CUVID_HWACCEL 0
-#define CONFIG_H264_D3D11VA_HWACCEL 0
-#define CONFIG_H264_D3D11VA2_HWACCEL 0
-#define CONFIG_H264_DXVA2_HWACCEL 0
-#define CONFIG_H264_MEDIACODEC_HWACCEL 0
-#define CONFIG_H264_MMAL_HWACCEL 0
-#define CONFIG_H264_QSV_HWACCEL 0
-#define CONFIG_H264_VAAPI_HWACCEL 0
-#define CONFIG_H264_VDA_HWACCEL 0
-#define CONFIG_H264_VDA_OLD_HWACCEL 0
-#define CONFIG_H264_VDPAU_HWACCEL 0
-#define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_HEVC_CUVID_HWACCEL 0
-#define CONFIG_HEVC_D3D11VA_HWACCEL 0
-#define CONFIG_HEVC_D3D11VA2_HWACCEL 0
-#define CONFIG_HEVC_DXVA2_HWACCEL 0
-#define CONFIG_HEVC_MEDIACODEC_HWACCEL 0
-#define CONFIG_HEVC_QSV_HWACCEL 0
-#define CONFIG_HEVC_VAAPI_HWACCEL 0
-#define CONFIG_HEVC_VDPAU_HWACCEL 0
-#define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_MJPEG_CUVID_HWACCEL 0
-#define CONFIG_MPEG1_CUVID_HWACCEL 0
-#define CONFIG_MPEG1_XVMC_HWACCEL 0
-#define CONFIG_MPEG1_VDPAU_HWACCEL 0
-#define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_MPEG2_CUVID_HWACCEL 0
-#define CONFIG_MPEG2_XVMC_HWACCEL 0
-#define CONFIG_MPEG2_D3D11VA_HWACCEL 0
-#define CONFIG_MPEG2_D3D11VA2_HWACCEL 0
-#define CONFIG_MPEG2_DXVA2_HWACCEL 0
-#define CONFIG_MPEG2_MMAL_HWACCEL 0
-#define CONFIG_MPEG2_QSV_HWACCEL 0
-#define CONFIG_MPEG2_VAAPI_HWACCEL 0
-#define CONFIG_MPEG2_VDPAU_HWACCEL 0
-#define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_MPEG2_MEDIACODEC_HWACCEL 0
-#define CONFIG_MPEG4_CUVID_HWACCEL 0
-#define CONFIG_MPEG4_MEDIACODEC_HWACCEL 0
-#define CONFIG_MPEG4_MMAL_HWACCEL 0
-#define CONFIG_MPEG4_VAAPI_HWACCEL 0
-#define CONFIG_MPEG4_VDPAU_HWACCEL 0
-#define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0
-#define CONFIG_VC1_CUVID_HWACCEL 0
-#define CONFIG_VC1_D3D11VA_HWACCEL 0
-#define CONFIG_VC1_D3D11VA2_HWACCEL 0
-#define CONFIG_VC1_DXVA2_HWACCEL 0
-#define CONFIG_VC1_VAAPI_HWACCEL 0
-#define CONFIG_VC1_VDPAU_HWACCEL 0
-#define CONFIG_VC1_MMAL_HWACCEL 0
-#define CONFIG_VC1_QSV_HWACCEL 0
-#define CONFIG_VP8_CUVID_HWACCEL 0
-#define CONFIG_VP8_MEDIACODEC_HWACCEL 0
-#define CONFIG_VP8_QSV_HWACCEL 0
-#define CONFIG_VP9_CUVID_HWACCEL 0
-#define CONFIG_VP9_D3D11VA_HWACCEL 0
-#define CONFIG_VP9_D3D11VA2_HWACCEL 0
-#define CONFIG_VP9_DXVA2_HWACCEL 0
-#define CONFIG_VP9_MEDIACODEC_HWACCEL 0
-#define CONFIG_VP9_VAAPI_HWACCEL 0
-#define CONFIG_WMV3_D3D11VA_HWACCEL 0
-#define CONFIG_WMV3_D3D11VA2_HWACCEL 0
-#define CONFIG_WMV3_DXVA2_HWACCEL 0
-#define CONFIG_WMV3_VAAPI_HWACCEL 0
-#define CONFIG_WMV3_VDPAU_HWACCEL 0
-#define CONFIG_ALSA_INDEV 0
-#define CONFIG_AVFOUNDATION_INDEV 0
-#define CONFIG_BKTR_INDEV 0
-#define CONFIG_DECKLINK_INDEV 0
-#define CONFIG_LIBNDI_NEWTEK_INDEV 0
-#define CONFIG_DSHOW_INDEV 0
-#define CONFIG_FBDEV_INDEV 0
-#define CONFIG_GDIGRAB_INDEV 0
-#define CONFIG_IEC61883_INDEV 0
-#define CONFIG_JACK_INDEV 0
-#define CONFIG_KMSGRAB_INDEV 0
-#define CONFIG_LAVFI_INDEV 0
-#define CONFIG_OPENAL_INDEV 0
-#define CONFIG_OSS_INDEV 0
-#define CONFIG_PULSE_INDEV 0
-#define CONFIG_SNDIO_INDEV 0
-#define CONFIG_V4L2_INDEV 0
-#define CONFIG_VFWCAP_INDEV 0
-#define CONFIG_XCBGRAB_INDEV 0
-#define CONFIG_LIBCDIO_INDEV 0
-#define CONFIG_LIBDC1394_INDEV 0
+#define CONFIG_VAPOURSYNTH_DEMUXER 0
 #define CONFIG_A64_MUXER 0
 #define CONFIG_AC3_MUXER 0
 #define CONFIG_ADTS_MUXER 0
@@ -2106,6 +2252,8 @@
 #define CONFIG_AIFF_MUXER 0
 #define CONFIG_AMR_MUXER 0
 #define CONFIG_APNG_MUXER 0
+#define CONFIG_APTX_MUXER 0
+#define CONFIG_APTX_HD_MUXER 0
 #define CONFIG_ASF_MUXER 0
 #define CONFIG_ASS_MUXER 0
 #define CONFIG_AST_MUXER 0
@@ -2113,9 +2261,12 @@
 #define CONFIG_AU_MUXER 0
 #define CONFIG_AVI_MUXER 0
 #define CONFIG_AVM2_MUXER 0
+#define CONFIG_AVS2_MUXER 0
 #define CONFIG_BIT_MUXER 0
 #define CONFIG_CAF_MUXER 0
 #define CONFIG_CAVSVIDEO_MUXER 0
+#define CONFIG_CODEC2_MUXER 0
+#define CONFIG_CODEC2RAW_MUXER 0
 #define CONFIG_CRC_MUXER 0
 #define CONFIG_DASH_MUXER 0
 #define CONFIG_DATA_MUXER 0
@@ -2126,9 +2277,9 @@
 #define CONFIG_DV_MUXER 0
 #define CONFIG_EAC3_MUXER 0
 #define CONFIG_F4V_MUXER 0
-#define CONFIG_FFM_MUXER 0
 #define CONFIG_FFMETADATA_MUXER 0
 #define CONFIG_FIFO_MUXER 0
+#define CONFIG_FIFO_TEST_MUXER 0
 #define CONFIG_FILMSTRIP_MUXER 0
 #define CONFIG_FITS_MUXER 0
 #define CONFIG_FLAC_MUXER 0
@@ -2221,7 +2372,9 @@
 #define CONFIG_RTP_MPEGTS_MUXER 0
 #define CONFIG_RTSP_MUXER 0
 #define CONFIG_SAP_MUXER 0
+#define CONFIG_SBC_MUXER 0
 #define CONFIG_SCC_MUXER 0
+#define CONFIG_SEGAFILM_MUXER 0
 #define CONFIG_SEGMENT_MUXER 0
 #define CONFIG_STREAM_SEGMENT_MUXER 0
 #define CONFIG_SINGLEJPEG_MUXER 0
@@ -2254,58 +2407,6 @@
 #define CONFIG_WV_MUXER 0
 #define CONFIG_YUV4MPEGPIPE_MUXER 0
 #define CONFIG_CHROMAPRINT_MUXER 0
-#define CONFIG_ALSA_OUTDEV 0
-#define CONFIG_CACA_OUTDEV 0
-#define CONFIG_DECKLINK_OUTDEV 0
-#define CONFIG_LIBNDI_NEWTEK_OUTDEV 0
-#define CONFIG_FBDEV_OUTDEV 0
-#define CONFIG_OPENGL_OUTDEV 0
-#define CONFIG_OSS_OUTDEV 0
-#define CONFIG_PULSE_OUTDEV 0
-#define CONFIG_SDL2_OUTDEV 0
-#define CONFIG_SNDIO_OUTDEV 0
-#define CONFIG_V4L2_OUTDEV 0
-#define CONFIG_XV_OUTDEV 0
-#define CONFIG_AAC_PARSER 1
-#define CONFIG_AAC_LATM_PARSER 0
-#define CONFIG_AC3_PARSER 0
-#define CONFIG_ADX_PARSER 0
-#define CONFIG_BMP_PARSER 0
-#define CONFIG_CAVSVIDEO_PARSER 0
-#define CONFIG_COOK_PARSER 0
-#define CONFIG_DCA_PARSER 0
-#define CONFIG_DIRAC_PARSER 0
-#define CONFIG_DNXHD_PARSER 0
-#define CONFIG_DPX_PARSER 0
-#define CONFIG_DVAUDIO_PARSER 0
-#define CONFIG_DVBSUB_PARSER 0
-#define CONFIG_DVDSUB_PARSER 0
-#define CONFIG_DVD_NAV_PARSER 0
-#define CONFIG_FLAC_PARSER 1
-#define CONFIG_G729_PARSER 0
-#define CONFIG_GSM_PARSER 1
-#define CONFIG_H261_PARSER 0
-#define CONFIG_H263_PARSER 1
-#define CONFIG_H264_PARSER 1
-#define CONFIG_HEVC_PARSER 0
-#define CONFIG_MJPEG_PARSER 0
-#define CONFIG_MLP_PARSER 0
-#define CONFIG_MPEG4VIDEO_PARSER 1
-#define CONFIG_MPEGAUDIO_PARSER 1
-#define CONFIG_MPEGVIDEO_PARSER 0
-#define CONFIG_OPUS_PARSER 1
-#define CONFIG_PNG_PARSER 0
-#define CONFIG_PNM_PARSER 0
-#define CONFIG_RV30_PARSER 0
-#define CONFIG_RV40_PARSER 0
-#define CONFIG_SIPR_PARSER 0
-#define CONFIG_TAK_PARSER 0
-#define CONFIG_VC1_PARSER 0
-#define CONFIG_VORBIS_PARSER 1
-#define CONFIG_VP3_PARSER 1
-#define CONFIG_VP8_PARSER 1
-#define CONFIG_VP9_PARSER 1
-#define CONFIG_XMA_PARSER 0
 #define CONFIG_ASYNC_PROTOCOL 0
 #define CONFIG_BLURAY_PROTOCOL 0
 #define CONFIG_CACHE_PROTOCOL 0
@@ -2339,10 +2440,7 @@
 #define CONFIG_SUBFILE_PROTOCOL 0
 #define CONFIG_TEE_PROTOCOL 0
 #define CONFIG_TCP_PROTOCOL 0
-#define CONFIG_TLS_GNUTLS_PROTOCOL 0
-#define CONFIG_TLS_SCHANNEL_PROTOCOL 0
-#define CONFIG_TLS_SECURETRANSPORT_PROTOCOL 0
-#define CONFIG_TLS_OPENSSL_PROTOCOL 0
+#define CONFIG_TLS_PROTOCOL 0
 #define CONFIG_UDP_PROTOCOL 0
 #define CONFIG_UDPLITE_PROTOCOL 0
 #define CONFIG_UNIX_PROTOCOL 0
@@ -2351,6 +2449,7 @@
 #define CONFIG_LIBRTMPS_PROTOCOL 0
 #define CONFIG_LIBRTMPT_PROTOCOL 0
 #define CONFIG_LIBRTMPTE_PROTOCOL 0
+#define CONFIG_LIBSRT_PROTOCOL 0
 #define CONFIG_LIBSSH_PROTOCOL 0
 #define CONFIG_LIBSMBCLIENT_PROTOCOL 0
 #endif /* FFMPEG_CONFIG_H */

diff --git a/fuchsia/config/max/x64/libavcodec/bsf_list.c b/fuchsia/config/max/x64/libavcodec/bsf_list.c
index d31ece9..ee5ac8a 100644
--- a/fuchsia/config/max/x64/libavcodec/bsf_list.c
+++ b/fuchsia/config/max/x64/libavcodec/bsf_list.c

@@ -1,3 +1,4 @@
 static const AVBitStreamFilter * const bitstream_filters[] = {
     &ff_null_bsf,
+    &ff_vp9_superframe_split_bsf,
     NULL };

diff --git a/fuchsia/config/max/x64/libavutil/avconfig.h b/fuchsia/config/max/x64/libavutil/avconfig.h
index f10aa61..c289fbb 100644
--- a/fuchsia/config/max/x64/libavutil/avconfig.h
+++ b/fuchsia/config/max/x64/libavutil/avconfig.h

@@ -1,4 +1,4 @@
-/* Generated by ffconf */
+/* Generated by ffmpeg configure */
 #ifndef AVUTIL_AVCONFIG_H
 #define AVUTIL_AVCONFIG_H
 #define AV_HAVE_BIGENDIAN 0

diff --git a/fuchsia/config/max/x64/libavutil/ffversion.h b/fuchsia/config/max/x64/libavutil/ffversion.h
index 0342cbb..b97c110 100644
--- a/fuchsia/config/max/x64/libavutil/ffversion.h
+++ b/fuchsia/config/max/x64/libavutil/ffversion.h

@@ -1,5 +1,5 @@
 /* Automatically generated by version.sh, do not manually edit! */
 #ifndef AVUTIL_FFVERSION_H
 #define AVUTIL_FFVERSION_H
-#define FFMPEG_VERSION "n3.4.2-1-g67e0ba6f22"
+#define FFMPEG_VERSION "N-92356-g750018e43a"
 #endif /* AVUTIL_FFVERSION_H */

diff --git a/fuchsia/scripts/build_ffmpeg.py b/fuchsia/scripts/build_ffmpeg.py
index 9df71b5..edf0120 100755
--- a/fuchsia/scripts/build_ffmpeg.py
+++ b/fuchsia/scripts/build_ffmpeg.py

@@ -41,7 +41,7 @@
 
 
 def PrintAndCheckCall(argv, *args, **kwargs):
-  print('Running %r' % argv)
+  print('Running %s' % '\n '.join(argv))
   subprocess.check_call(argv, *args, **kwargs)
 
 
@@ -84,9 +84,9 @@
     print('Skipping build step as requested.')
   else:
     libraries = [
-        os.path.join('libavcodec', GetDsoName('avcodec', 57)),
-        os.path.join('libavformat', GetDsoName('avformat', 57)),
-        os.path.join('libavutil', GetDsoName('avutil', 55)),
+        os.path.join('libavcodec', GetDsoName('avcodec', 58)),
+        os.path.join('libavformat', GetDsoName('avformat', 58)),
+        os.path.join('libavutil', GetDsoName('avutil', 56)),
     ]
     PrintAndCheckCall(
         ['make', '-j%d' % parallel_jobs] + libraries, cwd=config_dir)
@@ -145,6 +145,7 @@
       '--enable-static',
 
       # Disable features.
+      '--disable-debug',
       '--disable-bzlib',
       '--disable-iconv',
       '--disable-lzo',
@@ -158,30 +159,26 @@
       '--disable-faan',
       '--disable-alsa',
 
-      # Disable hardware decoding options which will sometimes turn on
-      # via autodetect.
-      '--disable-d3d11va',
-      '--disable-dxva2',
-      '--disable-vaapi',
-      '--disable-vda',
-      '--disable-vdpau',
-      '--disable-videotoolbox',
-      '--disable-nvenc',
-      '--disable-cuda',
-      '--disable-cuvid',
-      '--disable-v4l2_m2m',
+      # Disable automatically detected external libraries. This prevents
+      # automatic inclusion of things like hardware decoders. Each roll should
+      # audit new [autodetect] configure options and add any desired options to
+      # this file.
+      '--disable-autodetect',
 
       # Common codecs.
       '--enable-decoder=vorbis,flac',
-      '--enable-decoder=pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le',
+      '--enable-decoder=pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le,mp3',
       '--enable-decoder=pcm_s16be,pcm_s24be,pcm_mulaw,pcm_alaw',
-      '--enable-decoder=theora,vp8',
-      '--enable-demuxer=ogg,matroska,wav,flac',
-      '--enable-parser=opus,vorbis,flac',
+      '--enable-decoder=theora,vp8,sbc,aptx',
+      '--enable-demuxer=ogg,matroska,wav,flac,mp3,mov',
+      '--enable-parser=opus,vorbis,flac,mpegaudio',
       '--enable-parser=vp3,vp8',
 
       '--optflags="-O2"',
-      '--enable-pic'
+      '--enable-pic',
+
+      # Force usage of yasm
+      '--x86asmexe=yasm',
   ])
 
   configure_flags['Common'].extend([
@@ -190,9 +187,9 @@
       '--cc=clang',
       '--cxx=clang++',
       '--ld=clang',
-      '--extra-ldflags=-fuse-ld=lld',
   ])
 
+
   # TODO(dalesat): determine if we can use --enable-lto in x64
   # TODO(dalesat): enable vp9 on arm64
 
@@ -202,6 +199,7 @@
         '--enable-parser=vp9',
         '--sysroot=' + os.path.join(
             FFMPEG_DIR, '..', '..', 'buildtools', 'linux-x64', 'sysroot'),
+        '--extra-ldflags=-fuse-ld=lld',
     ])
   elif target_arch == 'arm64':
     configure_flags['Common'].extend([

diff --git a/fuchsia/scripts/copy_config.sh b/fuchsia/scripts/copy_config.sh
index 5eef67f..f8c01d3 100755
--- a/fuchsia/scripts/copy_config.sh
+++ b/fuchsia/scripts/copy_config.sh

@@ -12,7 +12,7 @@
   for arch in x64 arm64; do
     # Don't waste time on non-existent configs, if no config.h then skip.
     [ ! -e "build.$arch/$profile/config.h" ] && continue
-    for f in config.h config.asm libavutil/avconfig.h libavutil/ffversion.h libavcodec/bsf_list.c libavformat/protocol_list.c; do
+    for f in config.h config.asm libavutil/avconfig.h libavutil/ffversion.h libavcodec/bsf_list.c libavcodec/codec_list.c libavcodec/parser_list.c libavformat/demuxer_list.c libavformat/muxer_list.c libavformat/protocol_list.c; do
       FROM="build.$arch/$profile/$f"
       TO="fuchsia/config/$profile/$arch/$f"
       if [ "$(dirname $f)" != "" ]; then mkdir -p $(dirname $TO); fi

diff --git a/fuchsia/scripts/generate_gn.py b/fuchsia/scripts/generate_gn.py
index 0cc75cd..27f474e 100755
--- a/fuchsia/scripts/generate_gn.py
+++ b/fuchsia/scripts/generate_gn.py

@@ -564,6 +564,10 @@
     # Fuchsia generated files
     'config.h',
     os.path.join('libavcodec', 'bsf_list.c'),
+    os.path.join('libavcodec', 'codec_list.c'),
+    os.path.join('libavcodec', 'parser_list.c'),
+    os.path.join('libavformat', 'demuxer_list.c'),
+    os.path.join('libavformat', 'muxer_list.c'),
     os.path.join('libavformat', 'protocol_list.c'),
     os.path.join('libavutil', 'avconfig.h'),
     os.path.join('libavutil', 'ffversion.h'),

diff --git a/fuchsia/scripts/remove_unneeded_files.sh b/fuchsia/scripts/remove_unneeded_files.sh
deleted file mode 100755
index 9669ba7..0000000
--- a/fuchsia/scripts/remove_unneeded_files.sh
+++ /dev/null

@@ -1,7 +0,0 @@
-#!/bin/bash -e
-
-# Copyright (c) 2017 The Fuchsia Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-git grep -l "GNU\ General\ Public\ License" ":(exclude)fuchsia" | grep / | xargs rm -f --

diff --git a/libavcodec/.gitignore b/libavcodec/.gitignore
index 77a2ab1..28814f7 100644
--- a/libavcodec/.gitignore
+++ b/libavcodec/.gitignore

@@ -2,3 +2,5 @@
 /*_tables.c
 /*_tables.h
 /bsf_list.c
+/codec_list.c
+/parser_list.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index c4ec09b..ce766aa 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile

@@ -1,7 +1,9 @@
 NAME = avcodec
 DESC = FFmpeg codec library
 
-HEADERS = avcodec.h                                                     \
+HEADERS = ac3_parser.h                                                  \
+          adts_parser.h                                                 \
+          avcodec.h                                                     \
           avdct.h                                                       \
           avfft.h                                                       \
           d3d11va.h                                                     \
@@ -12,15 +14,15 @@
           mediacodec.h                                                  \
           qsv.h                                                         \
           vaapi.h                                                       \
-          vda.h                                                         \
           vdpau.h                                                       \
           version.h                                                     \
           videotoolbox.h                                                \
           vorbis_parser.h                                               \
           xvmc.h                                                        \
 
-OBJS = allcodecs.o                                                      \
-       audioconvert.o                                                   \
+OBJS = ac3_parser.o                                                     \
+       adts_parser.o                                                    \
+       allcodecs.o                                                      \
        avdct.o                                                          \
        avpacket.o                                                       \
        avpicture.o                                                      \
@@ -42,11 +44,10 @@
        options.o                                                        \
        mjpegenc_huffman.o                                               \
        parser.o                                                         \
+       parsers.o                                                        \
        profiles.o                                                       \
        qsv_api.o                                                        \
        raw.o                                                            \
-       resample.o                                                       \
-       resample2.o                                                      \
        utils.o                                                          \
        vorbis_parser.o                                                  \
        xiph.o                                                           \
@@ -54,11 +55,20 @@
 # subsystems
 OBJS-$(CONFIG_AANDCTTABLES)            += aandcttab.o
 OBJS-$(CONFIG_AC3DSP)                  += ac3dsp.o ac3.o ac3tab.o
+OBJS-$(CONFIG_ADTS_HEADER)             += adts_header.o mpeg4audio.o
+OBJS-$(CONFIG_AMF)                     += amfenc.o
 OBJS-$(CONFIG_AUDIO_FRAME_QUEUE)       += audio_frame_queue.o
 OBJS-$(CONFIG_AUDIODSP)                += audiodsp.o
 OBJS-$(CONFIG_BLOCKDSP)                += blockdsp.o
 OBJS-$(CONFIG_BSWAPDSP)                += bswapdsp.o
 OBJS-$(CONFIG_CABAC)                   += cabac.o
+OBJS-$(CONFIG_CBS)                     += cbs.o
+OBJS-$(CONFIG_CBS_AV1)                 += cbs_av1.o
+OBJS-$(CONFIG_CBS_H264)                += cbs_h2645.o h2645_parse.o
+OBJS-$(CONFIG_CBS_H265)                += cbs_h2645.o h2645_parse.o
+OBJS-$(CONFIG_CBS_JPEG)                += cbs_jpeg.o
+OBJS-$(CONFIG_CBS_MPEG2)               += cbs_mpeg2.o
+OBJS-$(CONFIG_CBS_VP9)                 += cbs_vp9.o
 OBJS-$(CONFIG_CRYSTALHD)               += crystalhd.o
 OBJS-$(CONFIG_DCT)                     += dct.o dct32_fixed.o dct32_float.o
 OBJS-$(CONFIG_ERROR_RESILIENCE)        += error_resilience.o
@@ -91,7 +101,6 @@
 OBJS-$(CONFIG_IVIDSP)                  += ivi_dsp.o
 OBJS-$(CONFIG_JNI)                     += ffjni.o jni.o
 OBJS-$(CONFIG_JPEGTABLES)              += jpegtables.o
-OBJS-$(CONFIG_LIBXVID)                 += libxvid_rc.o
 OBJS-$(CONFIG_LLAUDDSP)                += lossless_audiodsp.o
 OBJS-$(CONFIG_LLVIDDSP)                += lossless_videodsp.o
 OBJS-$(CONFIG_LLVIDENCDSP)             += lossless_videoencdsp.o
@@ -146,10 +155,10 @@
 OBJS-$(CONFIG_A64MULTI_ENCODER)        += a64multienc.o elbg.o
 OBJS-$(CONFIG_A64MULTI5_ENCODER)       += a64multienc.o elbg.o
 OBJS-$(CONFIG_AAC_DECODER)             += aacdec.o aactab.o aacsbr.o aacps_float.o \
-                                          aacadtsdec.o mpeg4audio.o kbdwin.o \
+                                          mpeg4audio.o kbdwin.o \
                                           sbrdsp.o aacpsdsp_float.o cbrt_data.o
 OBJS-$(CONFIG_AAC_FIXED_DECODER)       += aacdec_fixed.o aactab.o aacsbr_fixed.o aacps_fixed.o \
-                                          aacadtsdec.o mpeg4audio.o kbdwin.o \
+                                          mpeg4audio.o kbdwin.o \
                                           sbrdsp_fixed.o aacpsdsp_fixed.o cbrt_data_fixed.o
 OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aaccoder.o aacenctab.o    \
                                           aacpsy.o aactab.o      \
@@ -183,6 +192,10 @@
 OBJS-$(CONFIG_ANM_DECODER)             += anm.o
 OBJS-$(CONFIG_ANSI_DECODER)            += ansi.o cga_data.o
 OBJS-$(CONFIG_APE_DECODER)             += apedec.o
+OBJS-$(CONFIG_APTX_DECODER)            += aptx.o
+OBJS-$(CONFIG_APTX_ENCODER)            += aptx.o
+OBJS-$(CONFIG_APTX_HD_DECODER)         += aptx.o
+OBJS-$(CONFIG_APTX_HD_ENCODER)         += aptx.o
 OBJS-$(CONFIG_APNG_DECODER)            += png.o pngdec.o pngdsp.o
 OBJS-$(CONFIG_APNG_ENCODER)            += png.o pngenc.o
 OBJS-$(CONFIG_SSA_DECODER)             += assdec.o ass.o
@@ -200,6 +213,7 @@
                                           atrac3plusdsp.o atrac.o
 OBJS-$(CONFIG_ATRAC3PAL_DECODER)       += atrac3plusdec.o atrac3plus.o \
                                           atrac3plusdsp.o atrac.o
+OBJS-$(CONFIG_ATRAC9_DECODER)          += atrac9dec.o
 OBJS-$(CONFIG_AURA_DECODER)            += cyuv.o
 OBJS-$(CONFIG_AURA2_DECODER)           += aura.o
 OBJS-$(CONFIG_AVRN_DECODER)            += avrndec.o mjpegdec.o
@@ -331,18 +345,18 @@
                                           h264_mb.o h264_picture.o \
                                           h264_refs.o h264_sei.o \
                                           h264_slice.o h264data.o
-OBJS-$(CONFIG_H264_CUVID_DECODER)      += cuvid.o
+OBJS-$(CONFIG_H264_AMF_ENCODER)        += amfenc_h264.o
+OBJS-$(CONFIG_H264_CUVID_DECODER)      += cuviddec.o
 OBJS-$(CONFIG_H264_MEDIACODEC_DECODER) += mediacodecdec.o
 OBJS-$(CONFIG_H264_MMAL_DECODER)       += mmaldec.o
 OBJS-$(CONFIG_H264_NVENC_ENCODER)      += nvenc_h264.o
 OBJS-$(CONFIG_NVENC_ENCODER)           += nvenc_h264.o
 OBJS-$(CONFIG_NVENC_H264_ENCODER)      += nvenc_h264.o
-OBJS-$(CONFIG_H264_VDA_DECODER)        += vda_h264_dec.o
 OBJS-$(CONFIG_H264_OMX_ENCODER)        += omx.o
 OBJS-$(CONFIG_H264_QSV_DECODER)        += qsvdec_h2645.o
 OBJS-$(CONFIG_H264_QSV_ENCODER)        += qsvenc_h264.o
 OBJS-$(CONFIG_H264_RKMPP_DECODER)      += rkmppdec.o
-OBJS-$(CONFIG_H264_VAAPI_ENCODER)      += vaapi_encode_h264.o vaapi_encode_h26x.o
+OBJS-$(CONFIG_H264_VAAPI_ENCODER)      += vaapi_encode_h264.o h264_levels.o
 OBJS-$(CONFIG_H264_VIDEOTOOLBOX_ENCODER) += videotoolboxenc.o
 OBJS-$(CONFIG_H264_V4L2M2M_DECODER)    += v4l2_m2m_dec.o
 OBJS-$(CONFIG_H264_V4L2M2M_ENCODER)    += v4l2_m2m_enc.o
@@ -351,7 +365,8 @@
 OBJS-$(CONFIG_HEVC_DECODER)            += hevcdec.o hevc_mvs.o \
                                           hevc_cabac.o hevc_refs.o hevcpred.o    \
                                           hevcdsp.o hevc_filter.o hevc_data.o
-OBJS-$(CONFIG_HEVC_CUVID_DECODER)      += cuvid.o
+OBJS-$(CONFIG_HEVC_AMF_ENCODER)        += amfenc_hevc.o
+OBJS-$(CONFIG_HEVC_CUVID_DECODER)      += cuviddec.o
 OBJS-$(CONFIG_HEVC_MEDIACODEC_DECODER) += mediacodecdec.o
 OBJS-$(CONFIG_HEVC_NVENC_ENCODER)      += nvenc_hevc.o
 OBJS-$(CONFIG_NVENC_HEVC_ENCODER)      += nvenc_hevc.o
@@ -359,7 +374,7 @@
 OBJS-$(CONFIG_HEVC_QSV_ENCODER)        += qsvenc_hevc.o hevc_ps_enc.o       \
                                           hevc_data.o
 OBJS-$(CONFIG_HEVC_RKMPP_DECODER)      += rkmppdec.o
-OBJS-$(CONFIG_HEVC_VAAPI_ENCODER)      += vaapi_encode_h265.o vaapi_encode_h26x.o
+OBJS-$(CONFIG_HEVC_VAAPI_ENCODER)      += vaapi_encode_h265.o h265_profile_level.o
 OBJS-$(CONFIG_HEVC_V4L2M2M_DECODER)    += v4l2_m2m_dec.o
 OBJS-$(CONFIG_HEVC_V4L2M2M_ENCODER)    += v4l2_m2m_enc.o
 OBJS-$(CONFIG_HNM4_VIDEO_DECODER)      += hnm4video.o
@@ -371,7 +386,9 @@
 OBJS-$(CONFIG_IDCIN_DECODER)           += idcinvideo.o
 OBJS-$(CONFIG_IDF_DECODER)             += bintext.o cga_data.o
 OBJS-$(CONFIG_IFF_ILBM_DECODER)        += iff.o
+OBJS-$(CONFIG_ILBC_DECODER)            += ilbcdec.o
 OBJS-$(CONFIG_IMC_DECODER)             += imc.o
+OBJS-$(CONFIG_IMM4_DECODER)            += imm4.o
 OBJS-$(CONFIG_INDEO2_DECODER)          += indeo2.o
 OBJS-$(CONFIG_INDEO3_DECODER)          += indeo3.o
 OBJS-$(CONFIG_INDEO4_DECODER)          += indeo4.o ivi.o
@@ -396,6 +413,7 @@
 OBJS-$(CONFIG_MACE3_DECODER)           += mace.o
 OBJS-$(CONFIG_MACE6_DECODER)           += mace.o
 OBJS-$(CONFIG_MAGICYUV_DECODER)        += magicyuv.o
+OBJS-$(CONFIG_MAGICYUV_ENCODER)        += magicyuvenc.o
 OBJS-$(CONFIG_MDEC_DECODER)            += mdec.o mpeg12.o mpeg12data.o
 OBJS-$(CONFIG_METASOUND_DECODER)       += metasound.o metasound_data.o \
                                           twinvq.o
@@ -405,6 +423,8 @@
 OBJS-$(CONFIG_MJPEG_ENCODER)           += mjpegenc.o mjpegenc_common.o \
                                           mjpegenc_huffman.o
 OBJS-$(CONFIG_MJPEGB_DECODER)          += mjpegbdec.o
+OBJS-$(CONFIG_MJPEG_CUVID_DECODER)     += cuviddec.o
+OBJS-$(CONFIG_MJPEG_QSV_ENCODER)       += qsvenc_jpeg.o
 OBJS-$(CONFIG_MJPEG_VAAPI_ENCODER)     += vaapi_encode_mjpeg.o
 OBJS-$(CONFIG_MLP_DECODER)             += mlpdec.o mlpdsp.o
 OBJS-$(CONFIG_MLP_ENCODER)             += mlpenc.o mlp.o
@@ -431,16 +451,19 @@
 OBJS-$(CONFIG_MPEGVIDEO_DECODER)       += mpeg12dec.o mpeg12.o mpeg12data.o
 OBJS-$(CONFIG_MPEG1VIDEO_DECODER)      += mpeg12dec.o mpeg12.o mpeg12data.o
 OBJS-$(CONFIG_MPEG1VIDEO_ENCODER)      += mpeg12enc.o mpeg12.o
+OBJS-$(CONFIG_MPEG1_CUVID_DECODER)     += cuviddec.o
 OBJS-$(CONFIG_MPEG1_V4L2M2M_DECODER)   += v4l2_m2m_dec.o
 OBJS-$(CONFIG_MPEG2_MMAL_DECODER)      += mmaldec.o
 OBJS-$(CONFIG_MPEG2_QSV_DECODER)       += qsvdec_other.o
 OBJS-$(CONFIG_MPEG2_QSV_ENCODER)       += qsvenc_mpeg2.o
 OBJS-$(CONFIG_MPEG2VIDEO_DECODER)      += mpeg12dec.o mpeg12.o mpeg12data.o
 OBJS-$(CONFIG_MPEG2VIDEO_ENCODER)      += mpeg12enc.o mpeg12.o
+OBJS-$(CONFIG_MPEG2_CUVID_DECODER)     += cuviddec.o
 OBJS-$(CONFIG_MPEG2_MEDIACODEC_DECODER) += mediacodecdec.o
 OBJS-$(CONFIG_MPEG2_VAAPI_ENCODER)     += vaapi_encode_mpeg2.o
 OBJS-$(CONFIG_MPEG2_V4L2M2M_DECODER)   += v4l2_m2m_dec.o
 OBJS-$(CONFIG_MPEG4_DECODER)           += xvididct.o
+OBJS-$(CONFIG_MPEG4_CUVID_DECODER)     += cuviddec.o
 OBJS-$(CONFIG_MPEG4_MEDIACODEC_DECODER) += mediacodecdec.o
 OBJS-$(CONFIG_MPEG4_OMX_ENCODER)       += omx.o
 OBJS-$(CONFIG_MPEG4_V4L2M2M_DECODER)   += v4l2_m2m_dec.o
@@ -462,6 +485,7 @@
 OBJS-$(CONFIG_MTS2_DECODER)            += mss4.o
 OBJS-$(CONFIG_MVC1_DECODER)            += mvcdec.o
 OBJS-$(CONFIG_MVC2_DECODER)            += mvcdec.o
+OBJS-$(CONFIG_MWSC_DECODER)            += mwsc.o
 OBJS-$(CONFIG_MXPEG_DECODER)           += mxpegdec.o
 OBJS-$(CONFIG_NELLYMOSER_DECODER)      += nellymoserdec.o nellymoser.o
 OBJS-$(CONFIG_NELLYMOSER_ENCODER)      += nellymoserenc.o nellymoser.o
@@ -469,7 +493,7 @@
 OBJS-$(CONFIG_ON2AVC_DECODER)          += on2avc.o on2avcdata.o
 OBJS-$(CONFIG_OPUS_DECODER)            += opusdec.o opus.o opus_celt.o opus_rc.o \
                                           opus_pvq.o opus_silk.o opustab.o vorbis_data.o
-OBJS-$(CONFIG_OPUS_ENCODER)            += opusenc.o opus_rc.o opustab.o opus_pvq.o \
+OBJS-$(CONFIG_OPUS_ENCODER)            += opusenc.o opus.o opus_rc.o opustab.o opus_pvq.o \
                                           opusenc_psy.o
 OBJS-$(CONFIG_PAF_AUDIO_DECODER)       += pafaudio.o
 OBJS-$(CONFIG_PAF_VIDEO_DECODER)       += pafvideo.o
@@ -492,10 +516,10 @@
 OBJS-$(CONFIG_PPM_DECODER)             += pnmdec.o pnm.o
 OBJS-$(CONFIG_PPM_ENCODER)             += pnmenc.o
 OBJS-$(CONFIG_PRORES_DECODER)          += proresdec2.o proresdsp.o proresdata.o
-OBJS-$(CONFIG_PRORES_LGPL_DECODER)     += proresdec_lgpl.o proresdsp.o proresdata.o
 OBJS-$(CONFIG_PRORES_ENCODER)          += proresenc_anatoliy.o
 OBJS-$(CONFIG_PRORES_AW_ENCODER)       += proresenc_anatoliy.o
 OBJS-$(CONFIG_PRORES_KS_ENCODER)       += proresenc_kostya.o proresdata.o
+OBJS-$(CONFIG_PROSUMER_DECODER)        += prosumer.o
 OBJS-$(CONFIG_PSD_DECODER)             += psd.o
 OBJS-$(CONFIG_PTX_DECODER)             += ptx.o
 OBJS-$(CONFIG_QCELP_DECODER)           += qcelpdec.o                     \
@@ -515,6 +539,7 @@
 OBJS-$(CONFIG_RA_144_ENCODER)          += ra144enc.o ra144.o celp_filters.o
 OBJS-$(CONFIG_RA_288_DECODER)          += ra288.o celp_filters.o
 OBJS-$(CONFIG_RALF_DECODER)            += ralf.o
+OBJS-$(CONFIG_RASC_DECODER)            += rasc.o
 OBJS-$(CONFIG_RAWVIDEO_DECODER)        += rawdec.o
 OBJS-$(CONFIG_RAWVIDEO_ENCODER)        += rawenc.o
 OBJS-$(CONFIG_REALTEXT_DECODER)        += realtextdec.o ass.o
@@ -558,7 +583,7 @@
 OBJS-$(CONFIG_SONIC_DECODER)           += sonic.o
 OBJS-$(CONFIG_SONIC_ENCODER)           += sonic.o
 OBJS-$(CONFIG_SONIC_LS_ENCODER)        += sonic.o
-OBJS-$(CONFIG_SPEEDHQ_DECODER)         += speedhq.o simple_idct.o
+OBJS-$(CONFIG_SPEEDHQ_DECODER)         += speedhq.o mpeg12.o mpeg12data.o simple_idct.o
 OBJS-$(CONFIG_SP5X_DECODER)            += sp5xdec.o
 OBJS-$(CONFIG_SRGC_DECODER)            += mscc.o
 OBJS-$(CONFIG_SRT_DECODER)             += srtdec.o ass.o htmlsubtitles.o
@@ -571,6 +596,8 @@
 OBJS-$(CONFIG_SUNRAST_DECODER)         += sunrast.o
 OBJS-$(CONFIG_SUNRAST_ENCODER)         += sunrastenc.o
 OBJS-$(CONFIG_LIBRSVG_DECODER)         += librsvgdec.o
+OBJS-$(CONFIG_SBC_DECODER)             += sbcdec.o sbcdec_data.o sbc.o
+OBJS-$(CONFIG_SBC_ENCODER)             += sbcenc.o sbc.o sbcdsp.o sbcdsp_data.o
 OBJS-$(CONFIG_SVQ1_DECODER)            += svq1dec.o svq1.o svq13.o h263data.o
 OBJS-$(CONFIG_SVQ1_ENCODER)            += svq1enc.o svq1.o  h263data.o  \
                                           h263.o ituh263enc.o
@@ -616,7 +643,7 @@
                                           vc1_mc.o vc1_pred.o vc1.o vc1data.o \
                                           msmpeg4dec.o msmpeg4.o msmpeg4data.o \
                                           wmv2dsp.o wmv2data.o
-OBJS-$(CONFIG_VC1_CUVID_DECODER)       += cuvid.o
+OBJS-$(CONFIG_VC1_CUVID_DECODER)       += cuviddec.o
 OBJS-$(CONFIG_VC1_MMAL_DECODER)        += mmaldec.o
 OBJS-$(CONFIG_VC1_QSV_DECODER)         += qsvdec_other.o
 OBJS-$(CONFIG_VC1_V4L2M2M_DECODER)     += v4l2_m2m_dec.o
@@ -635,7 +662,7 @@
                                           vp6dsp.o vp56rac.o
 OBJS-$(CONFIG_VP7_DECODER)             += vp8.o vp56rac.o
 OBJS-$(CONFIG_VP8_DECODER)             += vp8.o vp56rac.o
-OBJS-$(CONFIG_VP8_CUVID_DECODER)       += cuvid.o
+OBJS-$(CONFIG_VP8_CUVID_DECODER)       += cuviddec.o
 OBJS-$(CONFIG_VP8_MEDIACODEC_DECODER)  += mediacodecdec.o
 OBJS-$(CONFIG_VP8_QSV_DECODER)         += qsvdec_other.o
 OBJS-$(CONFIG_VP8_RKMPP_DECODER)       += rkmppdec.o
@@ -645,7 +672,7 @@
 OBJS-$(CONFIG_VP9_DECODER)             += vp9.o vp9data.o vp9dsp.o vp9lpf.o vp9recon.o \
                                           vp9block.o vp9prob.o vp9mvs.o vp56rac.o \
                                           vp9dsp_8bpp.o vp9dsp_10bpp.o vp9dsp_12bpp.o
-OBJS-$(CONFIG_VP9_CUVID_DECODER)       += cuvid.o
+OBJS-$(CONFIG_VP9_CUVID_DECODER)       += cuviddec.o
 OBJS-$(CONFIG_VP9_MEDIACODEC_DECODER)  += mediacodecdec.o
 OBJS-$(CONFIG_VP9_RKMPP_DECODER)       += rkmppdec.o
 OBJS-$(CONFIG_VP9_VAAPI_ENCODER)       += vaapi_encode_vp9.o
@@ -654,6 +681,7 @@
 OBJS-$(CONFIG_VQA_DECODER)             += vqavideo.o
 OBJS-$(CONFIG_WAVPACK_DECODER)         += wavpack.o
 OBJS-$(CONFIG_WAVPACK_ENCODER)         += wavpackenc.o
+OBJS-$(CONFIG_WCMV_DECODER)            += wcmv.o
 OBJS-$(CONFIG_WEBP_DECODER)            += webp.o
 OBJS-$(CONFIG_WEBVTT_DECODER)          += webvttdec.o ass.o
 OBJS-$(CONFIG_WEBVTT_ENCODER)          += webvttenc.o ass_split.o
@@ -822,8 +850,8 @@
 # hardware accelerators
 OBJS-$(CONFIG_D3D11VA)                    += dxva2.o
 OBJS-$(CONFIG_DXVA2)                      += dxva2.o
+OBJS-$(CONFIG_NVDEC)                      += nvdec.o
 OBJS-$(CONFIG_VAAPI)                      += vaapi_decode.o
-OBJS-$(CONFIG_VDA)                        += vda.o videotoolbox.o
 OBJS-$(CONFIG_VIDEOTOOLBOX)               += videotoolbox.o
 OBJS-$(CONFIG_VDPAU)                      += vdpau.o
 
@@ -831,36 +859,46 @@
 OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL)  += videotoolbox.o
 OBJS-$(CONFIG_H264_D3D11VA_HWACCEL)       += dxva2_h264.o
 OBJS-$(CONFIG_H264_DXVA2_HWACCEL)         += dxva2_h264.o
+OBJS-$(CONFIG_H264_NVDEC_HWACCEL)         += nvdec_h264.o
 OBJS-$(CONFIG_H264_QSV_HWACCEL)           += qsvdec_h2645.o
 OBJS-$(CONFIG_H264_VAAPI_HWACCEL)         += vaapi_h264.o
-OBJS-$(CONFIG_H264_VDA_HWACCEL)           += vda_h264.o
 OBJS-$(CONFIG_H264_VDPAU_HWACCEL)         += vdpau_h264.o
 OBJS-$(CONFIG_H264_VIDEOTOOLBOX_HWACCEL)  += videotoolbox.o
 OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL)       += dxva2_hevc.o
 OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL)         += dxva2_hevc.o
+OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL)         += nvdec_hevc.o
 OBJS-$(CONFIG_HEVC_QSV_HWACCEL)           += qsvdec_h2645.o
 OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL)         += vaapi_hevc.o
 OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL)         += vdpau_hevc.o
+OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL)        += nvdec_mjpeg.o
+OBJS-$(CONFIG_MJPEG_VAAPI_HWACCEL)        += vaapi_mjpeg.o
+OBJS-$(CONFIG_MPEG1_NVDEC_HWACCEL)        += nvdec_mpeg12.o
 OBJS-$(CONFIG_MPEG1_VDPAU_HWACCEL)        += vdpau_mpeg12.o
 OBJS-$(CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
 OBJS-$(CONFIG_MPEG1_XVMC_HWACCEL)         += mpegvideo_xvmc.o
 OBJS-$(CONFIG_MPEG2_D3D11VA_HWACCEL)      += dxva2_mpeg2.o
 OBJS-$(CONFIG_MPEG2_DXVA2_HWACCEL)        += dxva2_mpeg2.o
+OBJS-$(CONFIG_MPEG2_NVDEC_HWACCEL)        += nvdec_mpeg12.o
 OBJS-$(CONFIG_MPEG2_QSV_HWACCEL)          += qsvdec_other.o
 OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL)        += vaapi_mpeg2.o
 OBJS-$(CONFIG_MPEG2_VDPAU_HWACCEL)        += vdpau_mpeg12.o
 OBJS-$(CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
 OBJS-$(CONFIG_MPEG2_XVMC_HWACCEL)         += mpegvideo_xvmc.o
+OBJS-$(CONFIG_MPEG4_NVDEC_HWACCEL)        += nvdec_mpeg4.o
 OBJS-$(CONFIG_MPEG4_VAAPI_HWACCEL)        += vaapi_mpeg4.o
 OBJS-$(CONFIG_MPEG4_VDPAU_HWACCEL)        += vdpau_mpeg4.o
 OBJS-$(CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
 OBJS-$(CONFIG_VC1_D3D11VA_HWACCEL)        += dxva2_vc1.o
 OBJS-$(CONFIG_VC1_DXVA2_HWACCEL)          += dxva2_vc1.o
+OBJS-$(CONFIG_VC1_NVDEC_HWACCEL)          += nvdec_vc1.o
 OBJS-$(CONFIG_VC1_QSV_HWACCEL)            += qsvdec_other.o
 OBJS-$(CONFIG_VC1_VAAPI_HWACCEL)          += vaapi_vc1.o
 OBJS-$(CONFIG_VC1_VDPAU_HWACCEL)          += vdpau_vc1.o
+OBJS-$(CONFIG_VP8_NVDEC_HWACCEL)          += nvdec_vp8.o
+OBJS-$(CONFIG_VP8_VAAPI_HWACCEL)          += vaapi_vp8.o
 OBJS-$(CONFIG_VP9_D3D11VA_HWACCEL)        += dxva2_vp9.o
 OBJS-$(CONFIG_VP9_DXVA2_HWACCEL)          += dxva2_vp9.o
+OBJS-$(CONFIG_VP9_NVDEC_HWACCEL)          += nvdec_vp9.o
 OBJS-$(CONFIG_VP9_VAAPI_HWACCEL)          += vaapi_vp9.o
 OBJS-$(CONFIG_VP8_QSV_HWACCEL)            += qsvdec_other.o
 
@@ -869,6 +907,9 @@
 
 OBJS-$(CONFIG_ADTS_MUXER)              += mpeg4audio.o
 OBJS-$(CONFIG_CAF_DEMUXER)             += ac3tab.o
+OBJS-$(CONFIG_CODEC2_DEMUXER)          += codec2utils.o
+OBJS-$(CONFIG_CODEC2_MUXER)            += codec2utils.o
+OBJS-$(CONFIG_CODEC2RAW_DEMUXER)       += codec2utils.o
 OBJS-$(CONFIG_DNXHD_DEMUXER)           += dnxhddata.o
 OBJS-$(CONFIG_FITS_DEMUXER)            += fits.o
 OBJS-$(CONFIG_FLV_DEMUXER)             += mpeg4audio.o
@@ -880,7 +921,6 @@
 OBJS-$(CONFIG_NUT_MUXER)               += mpegaudiodata.o
 OBJS-$(CONFIG_NUT_DEMUXER)             += mpegaudiodata.o mpeg4audio.o
 OBJS-$(CONFIG_RTP_MUXER)               += mpeg4audio.o
-OBJS-$(CONFIG_SPDIF_DEMUXER)           += aacadtsdec.o mpeg4audio.o
 OBJS-$(CONFIG_SPDIF_MUXER)             += dca.o
 OBJS-$(CONFIG_TAK_DEMUXER)             += tak.o
 OBJS-$(CONFIG_WEBM_MUXER)              += mpeg4audio.o
@@ -909,7 +949,12 @@
 OBJS-$(CONFIG_ILBC_AT_ENCODER)            += audiotoolboxenc.o
 OBJS-$(CONFIG_PCM_ALAW_AT_ENCODER)        += audiotoolboxenc.o
 OBJS-$(CONFIG_PCM_MULAW_AT_ENCODER)       += audiotoolboxenc.o
+OBJS-$(CONFIG_LIBAOM_AV1_DECODER)         += libaomdec.o
+OBJS-$(CONFIG_LIBAOM_AV1_ENCODER)         += libaomenc.o
 OBJS-$(CONFIG_LIBCELT_DECODER)            += libcelt_dec.o
+OBJS-$(CONFIG_LIBCODEC2_DECODER)          += libcodec2.o codec2utils.o
+OBJS-$(CONFIG_LIBCODEC2_ENCODER)          += libcodec2.o codec2utils.o
+OBJS-$(CONFIG_LIBDAVS2_DECODER)           += libdavs2.o
 OBJS-$(CONFIG_LIBFDK_AAC_DECODER)         += libfdk-aacdec.o
 OBJS-$(CONFIG_LIBFDK_AAC_ENCODER)         += libfdk-aacenc.o
 OBJS-$(CONFIG_LIBGSM_DECODER)             += libgsmdec.o
@@ -951,16 +996,18 @@
 OBJS-$(CONFIG_LIBX264_ENCODER)            += libx264.o
 OBJS-$(CONFIG_LIBX265_ENCODER)            += libx265.o
 OBJS-$(CONFIG_LIBXAVS_ENCODER)            += libxavs.o
+OBJS-$(CONFIG_LIBXAVS2_ENCODER)           += libxavs2.o
 OBJS-$(CONFIG_LIBXVID_ENCODER)            += libxvid.o
 OBJS-$(CONFIG_LIBZVBI_TELETEXT_DECODER)   += libzvbi-teletextdec.o ass.o
 
 # parsers
 OBJS-$(CONFIG_AAC_LATM_PARSER)         += latm_parser.o
 OBJS-$(CONFIG_AAC_PARSER)              += aac_parser.o aac_ac3_parser.o \
-                                          aacadtsdec.o mpeg4audio.o
-OBJS-$(CONFIG_AC3_PARSER)              += ac3_parser.o ac3tab.o \
-                                          aac_ac3_parser.o
+                                          mpeg4audio.o
+OBJS-$(CONFIG_AC3_PARSER)              += ac3tab.o aac_ac3_parser.o
 OBJS-$(CONFIG_ADX_PARSER)              += adx_parser.o adx.o
+OBJS-$(CONFIG_AV1_PARSER)              += av1_parser.o av1_parse.o
+OBJS-$(CONFIG_AVS2_PARSER)             += avs2_parser.o
 OBJS-$(CONFIG_BMP_PARSER)              += bmp_parser.o
 OBJS-$(CONFIG_CAVSVIDEO_PARSER)        += cavs_parser.o
 OBJS-$(CONFIG_COOK_PARSER)             += cook_parser.o
@@ -989,11 +1036,13 @@
 OBJS-$(CONFIG_MPEGAUDIO_PARSER)        += mpegaudio_parser.o
 OBJS-$(CONFIG_MPEGVIDEO_PARSER)        += mpegvideo_parser.o    \
                                           mpeg12.o mpeg12data.o
-OBJS-$(CONFIG_OPUS_PARSER)             += opus_parser.o opus.o vorbis_data.o
+OBJS-$(CONFIG_OPUS_PARSER)             += opus_parser.o opus.o opustab.o \
+                                          opus_rc.o vorbis_data.o
 OBJS-$(CONFIG_PNG_PARSER)              += png_parser.o
 OBJS-$(CONFIG_PNM_PARSER)              += pnm_parser.o pnm.o
 OBJS-$(CONFIG_RV30_PARSER)             += rv34_parser.o
 OBJS-$(CONFIG_RV40_PARSER)             += rv34_parser.o
+OBJS-$(CONFIG_SBC_PARSER)              += sbc_parser.o
 OBJS-$(CONFIG_SIPR_PARSER)             += sipr_parser.o
 OBJS-$(CONFIG_TAK_PARSER)              += tak_parser.o tak.o
 OBJS-$(CONFIG_VC1_PARSER)              += vc1_parser.o vc1.o vc1data.o  \
@@ -1004,14 +1053,20 @@
 OBJS-$(CONFIG_XMA_PARSER)              += xma_parser.o
 
 # bitstream filters
-OBJS-$(CONFIG_AAC_ADTSTOASC_BSF)          += aac_adtstoasc_bsf.o aacadtsdec.o \
-                                             mpeg4audio.o
+OBJS-$(CONFIG_AAC_ADTSTOASC_BSF)          += aac_adtstoasc_bsf.o mpeg4audio.o
+OBJS-$(CONFIG_AV1_METADATA_BSF)           += av1_metadata_bsf.o
 OBJS-$(CONFIG_CHOMP_BSF)                  += chomp_bsf.o
 OBJS-$(CONFIG_DUMP_EXTRADATA_BSF)         += dump_extradata_bsf.o
 OBJS-$(CONFIG_DCA_CORE_BSF)               += dca_core_bsf.o
+OBJS-$(CONFIG_EAC3_CORE_BSF)              += eac3_core_bsf.o
 OBJS-$(CONFIG_EXTRACT_EXTRADATA_BSF)      += extract_extradata_bsf.o    \
-                                             h2645_parse.o
+                                             av1_parse.o h2645_parse.o
+OBJS-$(CONFIG_FILTER_UNITS_BSF)           += filter_units_bsf.o
+OBJS-$(CONFIG_H264_METADATA_BSF)          += h264_metadata_bsf.o h264_levels.o
 OBJS-$(CONFIG_H264_MP4TOANNEXB_BSF)       += h264_mp4toannexb_bsf.o
+OBJS-$(CONFIG_H264_REDUNDANT_PPS_BSF)     += h264_redundant_pps_bsf.o
+OBJS-$(CONFIG_HAPQA_EXTRACT_BSF)          += hapqa_extract_bsf.o hap.o
+OBJS-$(CONFIG_HEVC_METADATA_BSF)          += h265_metadata_bsf.o
 OBJS-$(CONFIG_HEVC_MP4TOANNEXB_BSF)       += hevc_mp4toannexb_bsf.o
 OBJS-$(CONFIG_IMX_DUMP_HEADER_BSF)        += imx_dump_header_bsf.o
 OBJS-$(CONFIG_MJPEG2JPEG_BSF)             += mjpeg2jpeg_bsf.o
@@ -1020,10 +1075,13 @@
 OBJS-$(CONFIG_MOV2TEXTSUB_BSF)            += movsub_bsf.o
 OBJS-$(CONFIG_MP3_HEADER_DECOMPRESS_BSF)  += mp3_header_decompress_bsf.o \
                                              mpegaudiodata.o
+OBJS-$(CONFIG_MPEG2_METADATA_BSF)         += mpeg2_metadata_bsf.o
 OBJS-$(CONFIG_NOISE_BSF)                  += noise_bsf.o
 OBJS-$(CONFIG_NULL_BSF)                   += null_bsf.o
 OBJS-$(CONFIG_REMOVE_EXTRADATA_BSF)       += remove_extradata_bsf.o
 OBJS-$(CONFIG_TEXT2MOVSUB_BSF)            += movsub_bsf.o
+OBJS-$(CONFIG_TRACE_HEADERS_BSF)          += trace_headers_bsf.o
+OBJS-$(CONFIG_VP9_METADATA_BSF)           += vp9_metadata_bsf.o
 OBJS-$(CONFIG_VP9_RAW_REORDER_BSF)        += vp9_raw_reorder_bsf.o
 OBJS-$(CONFIG_VP9_SUPERFRAME_BSF)         += vp9_superframe_bsf.o
 OBJS-$(CONFIG_VP9_SUPERFRAME_SPLIT_BSF)   += vp9_superframe_split_bsf.o
@@ -1048,25 +1106,27 @@
                                           aacenc_quantization_misc.h    \
                                           $(ARCH)/vp56_arith.h          \
 
+SKIPHEADERS-$(CONFIG_AMF)              += amfenc.h
 SKIPHEADERS-$(CONFIG_D3D11VA)          += d3d11va.h dxva2_internal.h
 SKIPHEADERS-$(CONFIG_DXVA2)            += dxva2.h dxva2_internal.h
 SKIPHEADERS-$(CONFIG_JNI)              += ffjni.h
 SKIPHEADERS-$(CONFIG_LIBVPX)           += libvpx.h
 SKIPHEADERS-$(CONFIG_LIBWEBP_ENCODER)  += libwebpenc_common.h
 SKIPHEADERS-$(CONFIG_MEDIACODEC)       += mediacodecdec_common.h mediacodec_surface.h mediacodec_wrapper.h mediacodec_sw_buffer.h
+SKIPHEADERS-$(CONFIG_NVDEC)            += nvdec.h
 SKIPHEADERS-$(CONFIG_NVENC)            += nvenc.h
 SKIPHEADERS-$(CONFIG_QSV)              += qsv.h qsv_internal.h
 SKIPHEADERS-$(CONFIG_QSVDEC)           += qsvdec.h
 SKIPHEADERS-$(CONFIG_QSVENC)           += qsvenc.h
 SKIPHEADERS-$(CONFIG_XVMC)             += xvmc.h
 SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_decode.h vaapi_encode.h
-SKIPHEADERS-$(CONFIG_VDA)              += vda.h vda_vt_internal.h
 SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
-SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vda_vt_internal.h
+SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vt_internal.h
 SKIPHEADERS-$(CONFIG_V4L2_M2M)         += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
 
 TESTPROGS = avpacket                                                    \
             celp_math                                                   \
+            codec_desc                                                  \
             htmlsubtitles                                               \
             imgconvert                                                  \
             jpeg2000dwt                                                 \
@@ -1082,6 +1142,8 @@
 TESTPROGS-$(CONFIG_IDCTDSP)               += dct
 TESTPROGS-$(CONFIG_IIRFILTER)             += iirfilter
 TESTPROGS-$(HAVE_MMX)                     += motion
+TESTPROGS-$(CONFIG_MPEGVIDEO)             += mpeg12framerate
+TESTPROGS-$(CONFIG_H264_METADATA_BSF)     += h264_levels
 TESTPROGS-$(CONFIG_RANGECODER)            += rangecoder
 TESTPROGS-$(CONFIG_SNOW_ENCODER)          += snowenc
 

diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index 4910c66..05bc953 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h

@@ -357,6 +357,8 @@
     int warned_num_aac_frames;
     int warned_960_sbr;
 
+    int warned_gain_control;
+
     /* aacdec functions pointers */
     void (*imdct_and_windowing)(AACContext *ac, SingleChannelElement *sce);
     void (*apply_ltp)(AACContext *ac, SingleChannelElement *sce);

diff --git a/libavcodec/aac_ac3_parser.c b/libavcodec/aac_ac3_parser.c
index c9ba6bf..54e4598 100644
--- a/libavcodec/aac_ac3_parser.c
+++ b/libavcodec/aac_ac3_parser.c

@@ -60,6 +60,9 @@
                     s->remaining_size += i;
                     goto get_next;
                 }
+                else if (i < 0) {
+                    s->remaining_size += i;
+                }
             }
         }
     }
@@ -86,17 +89,7 @@
            the frame). */
         if (avctx->codec_id != AV_CODEC_ID_AAC) {
             avctx->sample_rate = s->sample_rate;
-
-            /* (E-)AC-3: allow downmixing to stereo or mono */
-            if (s->channels > 1 &&
-                avctx->request_channel_layout == AV_CH_LAYOUT_MONO) {
-                avctx->channels       = 1;
-                avctx->channel_layout = AV_CH_LAYOUT_MONO;
-            } else if (s->channels > 2 &&
-                       avctx->request_channel_layout == AV_CH_LAYOUT_STEREO) {
-                avctx->channels       = 2;
-                avctx->channel_layout = AV_CH_LAYOUT_STEREO;
-            } else {
+            if (avctx->codec_id != AV_CODEC_ID_EAC3) {
                 avctx->channels = s->channels;
                 avctx->channel_layout = s->channel_layout;
             }
@@ -104,7 +97,8 @@
             avctx->audio_service_type = s->service_type;
         }
 
-        avctx->bit_rate = s->bit_rate;
+        if (avctx->codec_id != AV_CODEC_ID_EAC3)
+            avctx->bit_rate = s->bit_rate;
     }
 
     return i;

diff --git a/libavcodec/aac_adtstoasc_bsf.c b/libavcodec/aac_adtstoasc_bsf.c
index 4bcf55b..6541b11 100644
--- a/libavcodec/aac_adtstoasc_bsf.c
+++ b/libavcodec/aac_adtstoasc_bsf.c

@@ -19,8 +19,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "adts_header.h"
+#include "adts_parser.h"
 #include "avcodec.h"
-#include "aacadtsdec.h"
 #include "bsf.h"
 #include "put_bits.h"
 #include "get_bits.h"
@@ -35,29 +36,28 @@
  * This filter creates an MPEG-4 AudioSpecificConfig from an MPEG-2/4
  * ADTS header and removes the ADTS header.
  */
-static int aac_adtstoasc_filter(AVBSFContext *bsfc, AVPacket *out)
+static int aac_adtstoasc_filter(AVBSFContext *bsfc, AVPacket *pkt)
 {
     AACBSFContext *ctx = bsfc->priv_data;
 
     GetBitContext gb;
     PutBitContext pb;
     AACADTSHeaderInfo hdr;
-    AVPacket *in;
     int ret;
 
-    ret = ff_bsf_get_packet(bsfc, &in);
+    ret = ff_bsf_get_packet_ref(bsfc, pkt);
     if (ret < 0)
         return ret;
 
-    if (bsfc->par_in->extradata && in->size >= 2 && (AV_RB16(in->data) >> 4) != 0xfff)
-        goto finish;
+    if (bsfc->par_in->extradata && pkt->size >= 2 && (AV_RB16(pkt->data) >> 4) != 0xfff)
+        return 0;
 
-    if (in->size < AAC_ADTS_HEADER_SIZE)
+    if (pkt->size < AV_AAC_ADTS_HEADER_SIZE)
         goto packet_too_small;
 
-    init_get_bits(&gb, in->data, AAC_ADTS_HEADER_SIZE * 8);
+    init_get_bits(&gb, pkt->data, AV_AAC_ADTS_HEADER_SIZE * 8);
 
-    if (avpriv_aac_parse_header(&gb, &hdr) < 0) {
+    if (ff_adts_header_parse(&gb, &hdr) < 0) {
         av_log(bsfc, AV_LOG_ERROR, "Error parsing ADTS frame header!\n");
         ret = AVERROR_INVALIDDATA;
         goto fail;
@@ -70,10 +70,10 @@
         goto fail;
     }
 
-    in->size -= AAC_ADTS_HEADER_SIZE + 2 * !hdr.crc_absent;
-    if (in->size <= 0)
+    pkt->size -= AV_AAC_ADTS_HEADER_SIZE + 2 * !hdr.crc_absent;
+    if (pkt->size <= 0)
         goto packet_too_small;
-    in->data += AAC_ADTS_HEADER_SIZE + 2 * !hdr.crc_absent;
+    pkt->data += AV_AAC_ADTS_HEADER_SIZE + 2 * !hdr.crc_absent;
 
     if (!ctx->first_frame_done) {
         int            pce_size = 0;
@@ -81,7 +81,7 @@
         uint8_t       *extradata;
 
         if (!hdr.chan_config) {
-            init_get_bits(&gb, in->data, in->size * 8);
+            init_get_bits(&gb, pkt->data, pkt->size * 8);
             if (get_bits(&gb, 3) != 5) {
                 avpriv_report_missing_feature(bsfc,
                                               "PCE-based channel configuration "
@@ -91,13 +91,13 @@
                 goto fail;
             }
             init_put_bits(&pb, pce_data, MAX_PCE_SIZE);
-            pce_size = avpriv_copy_pce_data(&pb, &gb)/8;
+            pce_size = ff_copy_pce_data(&pb, &gb) / 8;
             flush_put_bits(&pb);
-            in->size -= get_bits_count(&gb)/8;
-            in->data += get_bits_count(&gb)/8;
+            pkt->size -= get_bits_count(&gb)/8;
+            pkt->data += get_bits_count(&gb)/8;
         }
 
-        extradata = av_packet_new_side_data(in, AV_PKT_DATA_NEW_EXTRADATA,
+        extradata = av_packet_new_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA,
                                             2 + pce_size);
         if (!extradata) {
             ret = AVERROR(ENOMEM);
@@ -119,17 +119,13 @@
         ctx->first_frame_done = 1;
     }
 
-finish:
-    av_packet_move_ref(out, in);
-    av_packet_free(&in);
-
     return 0;
 
 packet_too_small:
     av_log(bsfc, AV_LOG_ERROR, "Input packet too small\n");
     ret = AVERROR_INVALIDDATA;
 fail:
-    av_packet_free(&in);
+    av_packet_unref(pkt);
     return ret;
 }
 

diff --git a/libavcodec/aac_parser.c b/libavcodec/aac_parser.c
index 0b868ed..b869262 100644
--- a/libavcodec/aac_parser.c
+++ b/libavcodec/aac_parser.c

@@ -22,7 +22,8 @@
 
 #include "parser.h"
 #include "aac_ac3_parser.h"
-#include "aacadtsdec.h"
+#include "adts_header.h"
+#include "adts_parser.h"
 #include "get_bits.h"
 #include "mpeg4audio.h"
 
@@ -38,9 +39,10 @@
     } tmp;
 
     tmp.u64 = av_be2ne64(state);
-    init_get_bits(&bits, tmp.u8+8-AAC_ADTS_HEADER_SIZE, AAC_ADTS_HEADER_SIZE * 8);
+    init_get_bits(&bits, tmp.u8 + 8 - AV_AAC_ADTS_HEADER_SIZE,
+                  AV_AAC_ADTS_HEADER_SIZE * 8);
 
-    if ((size = avpriv_aac_parse_header(&bits, &hdr)) < 0)
+    if ((size = ff_adts_header_parse(&bits, &hdr)) < 0)
         return 0;
     *need_next_header = 0;
     *new_frame_start  = 1;
@@ -54,7 +56,7 @@
 static av_cold int aac_parse_init(AVCodecParserContext *s1)
 {
     AACAC3ParseContext *s = s1->priv_data;
-    s->header_size = AAC_ADTS_HEADER_SIZE;
+    s->header_size = AV_AAC_ADTS_HEADER_SIZE;
     s->sync = aac_sync;
     return 0;
 }

diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index fe50871..d394700 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c

@@ -50,11 +50,11 @@
 #include "aac.h"
 #include "aactab.h"
 #include "aacdectab.h"
+#include "adts_header.h"
 #include "cbrt_data.h"
 #include "sbr.h"
 #include "aacsbr.h"
 #include "mpeg4audio.h"
-#include "aacadtsdec.h"
 #include "profiles.h"
 #include "libavutil/intfloat.h"
 
@@ -318,8 +318,8 @@
         ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
         ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
 
-        if(latmctx->initialized) {
-            av_log(avctx, AV_LOG_INFO, "audio config changed\n");
+        if (latmctx->initialized) {
+            av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n", m4ac.sample_rate, m4ac.chan_config);
         } else {
             av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
         }

diff --git a/libavcodec/aacdec_fixed.c b/libavcodec/aacdec_fixed.c
index 32bd145..1bdb93f 100644
--- a/libavcodec/aacdec_fixed.c
+++ b/libavcodec/aacdec_fixed.c

@@ -75,11 +75,11 @@
 #include "aac.h"
 #include "aactab.h"
 #include "aacdectab.h"
+#include "adts_header.h"
 #include "cbrt_data.h"
 #include "sbr.h"
 #include "aacsbr.h"
 #include "mpeg4audio.h"
-#include "aacadtsdec.h"
 #include "profiles.h"
 #include "libavutil/intfloat.h"
 
@@ -385,7 +385,7 @@
                         for (k = offsets[i]; k < offsets[i + 1]; k++) {
                             tmp = (int)(((int64_t)src[group * 128 + k] * c + \
                                        (int64_t)0x1000000000) >> 37);
-                            dest[group * 128 + k] += (tmp + round) >> shift;
+                            dest[group * 128 + k] += (tmp + (int64_t)round) >> shift;
                         }
                     }
                 }
@@ -417,7 +417,7 @@
     int i, c, shift, round, tmp;
     const int gain = cce->coup.gain[index][0];
     const int *src = cce->ch[0].ret;
-    int *dest = target->ret;
+    unsigned int *dest = target->ret;
     const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
 
     c = cce_scale_fixed[gain & 7];
@@ -436,7 +436,7 @@
     else {
       for (i = 0; i < len; i++) {
           tmp = (int)(((int64_t)src[i] * c + (int64_t)0x1000000000) >> 37);
-          dest[i] += tmp * (1 << shift);
+          dest[i] += tmp * (1U << shift);
       }
     }
 }

diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index 082cc90..dce6035 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c

@@ -997,6 +997,7 @@
     switch (m4ac->object_type) {
     case AOT_AAC_MAIN:
     case AOT_AAC_LC:
+    case AOT_AAC_SSR:
     case AOT_AAC_LTP:
     case AOT_ER_AAC_LC:
     case AOT_ER_AAC_LD:
@@ -1967,6 +1968,33 @@
         reset_all_predictors(sce->predictor_state);
 }
 
+static void decode_gain_control(SingleChannelElement * sce, GetBitContext * gb)
+{
+    // wd_num, wd_test, aloc_size
+    static const uint8_t gain_mode[4][3] = {
+        {1, 0, 5},  // ONLY_LONG_SEQUENCE = 0,
+        {2, 1, 2},  // LONG_START_SEQUENCE,
+        {8, 0, 2},  // EIGHT_SHORT_SEQUENCE,
+        {2, 1, 5},  // LONG_STOP_SEQUENCE
+    };
+
+    const int mode = sce->ics.window_sequence[0];
+    uint8_t bd, wd, ad;
+
+    // FIXME: Store the gain control data on |sce| and do something with it.
+    uint8_t max_band = get_bits(gb, 2);
+    for (bd = 0; bd < max_band; bd++) {
+        for (wd = 0; wd < gain_mode[mode][0]; wd++) {
+            uint8_t adjust_num = get_bits(gb, 3);
+            for (ad = 0; ad < adjust_num; ad++) {
+                skip_bits(gb, 4 + ((wd == 0 && gain_mode[mode][1])
+                                     ? 4
+                                     : gain_mode[mode][2]));
+            }
+        }
+    }
+}
+
 /**
  * Decode an individual_channel_stream payload; reference: table 4.44.
  *
@@ -2034,9 +2062,11 @@
                 goto fail;
         }
         if (!eld_syntax && get_bits1(gb)) {
-            avpriv_request_sample(ac->avctx, "SSR");
-            ret = AVERROR_PATCHWELCOME;
-            goto fail;
+            decode_gain_control(sce, gb);
+            if (!ac->warned_gain_control) {
+                avpriv_report_missing_feature(ac->avctx, "Gain control");
+                ac->warned_gain_control = 1;
+            }
         }
         // I see no textual basis in the spec for this occurring after SSR gain
         // control, but this is what both reference and real implmentations do
@@ -2561,7 +2591,7 @@
         for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
             if (ltp->used[sfb])
                 for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
-                    sce->coeffs[i] += predFreq[i];
+                    sce->coeffs[i] += (UINTFLOAT)predFreq[i];
     }
 }
 
@@ -2955,7 +2985,7 @@
     uint8_t layout_map[MAX_ELEM_ID*4][3];
     int layout_map_tags, ret;
 
-    size = avpriv_aac_parse_header(gb, &hdr_info);
+    size = ff_adts_header_parse(gb, &hdr_info);
     if (size > 0) {
         if (!ac->warned_num_aac_frames && hdr_info.num_aac_frames != 1) {
             // This is 2 for "VLB " audio in NSV files.
@@ -3092,6 +3122,7 @@
     int samples = 0, multiplier, audio_found = 0, pce_found = 0;
     int is_dmono, sce_count = 0;
     int payload_alignment;
+    uint8_t che_presence[4][MAX_ELEM_ID] = {{0}};
 
     ac->frame = data;
 
@@ -3129,6 +3160,17 @@
         }
 
         if (elem_type < TYPE_DSE) {
+            if (che_presence[elem_type][elem_id]) {
+                int error = che_presence[elem_type][elem_id] > 1;
+                av_log(ac->avctx, error ? AV_LOG_ERROR : AV_LOG_DEBUG, "channel element %d.%d duplicate\n",
+                       elem_type, elem_id);
+                if (error) {
+                    err = AVERROR_INVALIDDATA;
+                    goto fail;
+                }
+            }
+            che_presence[elem_type][elem_id]++;
+
             if (!(che=get_che(ac, elem_type, elem_id))) {
                 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
                        elem_type, elem_id);
@@ -3294,20 +3336,14 @@
                                        AV_PKT_DATA_JP_DUALMONO,
                                        &jp_dualmono_size);
 
-    if (new_extradata && 0) {
-        av_free(avctx->extradata);
-        avctx->extradata = av_mallocz(new_extradata_size +
-                                      AV_INPUT_BUFFER_PADDING_SIZE);
-        if (!avctx->extradata)
-            return AVERROR(ENOMEM);
-        avctx->extradata_size = new_extradata_size;
-        memcpy(avctx->extradata, new_extradata, new_extradata_size);
-        push_output_configuration(ac);
-        if (decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
-                                         avctx->extradata,
-                                         avctx->extradata_size*8LL, 1) < 0) {
-            pop_output_configuration(ac);
-            return AVERROR_INVALIDDATA;
+    if (new_extradata) {
+        /* discard previous configuration */
+        ac->oc[1].status = OC_NONE;
+        err = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
+                                           new_extradata,
+                                           new_extradata_size * 8LL, 1);
+        if (err < 0) {
+            return err;
         }
     }
 

diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 11da260..4d0abb1 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c

@@ -50,17 +50,59 @@
 
 static AVOnce aac_table_init = AV_ONCE_INIT;
 
+static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
+{
+    int i, j;
+    AACEncContext *s = avctx->priv_data;
+    AACPCEInfo *pce = &s->pce;
+    const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT;
+    const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT;
+
+    put_bits(pb, 4, 0);
+
+    put_bits(pb, 2, avctx->profile);
+    put_bits(pb, 4, s->samplerate_index);
+
+    put_bits(pb, 4, pce->num_ele[0]); /* Front */
+    put_bits(pb, 4, pce->num_ele[1]); /* Side */
+    put_bits(pb, 4, pce->num_ele[2]); /* Back */
+    put_bits(pb, 2, pce->num_ele[3]); /* LFE */
+    put_bits(pb, 3, 0); /* Assoc data */
+    put_bits(pb, 4, 0); /* CCs */
+
+    put_bits(pb, 1, 0); /* Stereo mixdown */
+    put_bits(pb, 1, 0); /* Mono mixdown */
+    put_bits(pb, 1, 0); /* Something else */
+
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < pce->num_ele[i]; j++) {
+            if (i < 3)
+                put_bits(pb, 1, pce->pairing[i][j]);
+            put_bits(pb, 4, pce->index[i][j]);
+        }
+    }
+
+    avpriv_align_put_bits(pb);
+    put_bits(pb, 8, strlen(aux_data));
+    avpriv_put_string(pb, aux_data, 0);
+}
+
 /**
  * Make AAC audio config object.
  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
  */
-static void put_audio_specific_config(AVCodecContext *avctx)
+static int put_audio_specific_config(AVCodecContext *avctx)
 {
     PutBitContext pb;
     AACEncContext *s = avctx->priv_data;
-    int channels = s->channels - (s->channels == 8 ? 1 : 0);
+    int channels = (!s->needs_pce)*(s->channels - (s->channels == 8 ? 1 : 0));
+    const int max_size = 32;
 
-    init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
+    avctx->extradata = av_mallocz(max_size);
+    if (!avctx->extradata)
+        return AVERROR(ENOMEM);
+
+    init_put_bits(&pb, avctx->extradata, max_size);
     put_bits(&pb, 5, s->profile+1); //profile
     put_bits(&pb, 4, s->samplerate_index); //sample rate index
     put_bits(&pb, 4, channels);
@@ -68,12 +110,17 @@
     put_bits(&pb, 1, 0); //frame length - 1024 samples
     put_bits(&pb, 1, 0); //does not depend on core coder
     put_bits(&pb, 1, 0); //is not extension
+    if (s->needs_pce)
+        put_pce(&pb, avctx);
 
     //Explicitly Mark SBR absent
     put_bits(&pb, 11, 0x2b7); //sync extension
     put_bits(&pb, 5,  AOT_SBR);
     put_bits(&pb, 1,  0);
     flush_put_bits(&pb);
+    avctx->extradata_size = put_bits_count(&pb) >> 3;
+
+    return 0;
 }
 
 void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
@@ -488,7 +535,7 @@
 {
     int ch;
     int end = 2048 + (frame ? frame->nb_samples : 0);
-    const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
+    const uint8_t *channel_map = s->reorder_map;
 
     /* copy and remap input samples */
     for (ch = 0; ch < s->channels; ch++) {
@@ -895,7 +942,6 @@
     int ch;
     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
-    FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + AV_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
 
     for(ch = 0; ch < s->channels; ch++)
         s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
@@ -920,16 +966,37 @@
 
     /* Constants */
     s->last_frame_pb_count = 0;
-    avctx->extradata_size = 5;
     avctx->frame_size = 1024;
     avctx->initial_padding = 1024;
     s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
 
     /* Channel map and unspecified bitrate guessing */
     s->channels = avctx->channels;
-    ERROR_IF(s->channels > AAC_MAX_CHANNELS || s->channels == 7,
-             "Unsupported number of channels: %d\n", s->channels);
-    s->chan_map = aac_chan_configs[s->channels-1];
+
+    s->needs_pce = 1;
+    for (i = 0; i < FF_ARRAY_ELEMS(aac_normal_chan_layouts); i++) {
+        if (avctx->channel_layout == aac_normal_chan_layouts[i]) {
+            s->needs_pce = s->options.pce;
+            break;
+        }
+    }
+
+    if (s->needs_pce) {
+        char buf[64];
+        for (i = 0; i < FF_ARRAY_ELEMS(aac_pce_configs); i++)
+            if (avctx->channel_layout == aac_pce_configs[i].layout)
+                break;
+        av_get_channel_layout_string(buf, sizeof(buf), -1, avctx->channel_layout);
+        ERROR_IF(i == FF_ARRAY_ELEMS(aac_pce_configs), "Unsupported channel layout \"%s\"\n", buf);
+        av_log(avctx, AV_LOG_INFO, "Using a PCE to encode channel layout \"%s\"\n", buf);
+        s->pce = aac_pce_configs[i];
+        s->reorder_map = s->pce.reorder_map;
+        s->chan_map = s->pce.config_map;
+    } else {
+        s->reorder_map = aac_chan_maps[s->channels - 1];
+        s->chan_map = aac_chan_configs[s->channels - 1];
+    }
+
     if (!avctx->bit_rate) {
         for (i = 1; i <= s->chan_map[0]; i++) {
             avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */
@@ -1015,7 +1082,8 @@
     if ((ret = alloc_buffers(avctx, s)) < 0)
         goto fail;
 
-    put_audio_specific_config(avctx);
+    if ((ret = put_audio_specific_config(avctx)))
+        goto fail;
 
     sizes[0]   = ff_aac_swb_size_1024[s->samplerate_index];
     sizes[1]   = ff_aac_swb_size_128[s->samplerate_index];
@@ -1052,24 +1120,25 @@
 
 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
 static const AVOption aacenc_options[] = {
-    {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "coder"},
+    {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_FAST}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "coder"},
         {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
         {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
-        {"fast",     "Constant quantizer",        0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
+        {"fast",     "Default fast search",       0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
     {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AACENC_FLAGS},
     {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
     {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
     {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
     {"aac_ltp", "Long term prediction", offsetof(AACEncContext, options.ltp), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
     {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
+    {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
     {NULL}
 };
 
 static const AVClass aacenc_class = {
-    "AAC encoder",
-    av_default_item_name,
-    aacenc_options,
-    LIBAVUTIL_VERSION_INT,
+    .class_name = "AAC encoder",
+    .item_name  = av_default_item_name,
+    .option     = aacenc_options,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
 static const AVCodecDefault aac_encode_defaults[] = {

diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index ea2d3b9..5a015ca 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h

@@ -45,6 +45,7 @@
     int pns;
     int tns;
     int ltp;
+    int pce;
     int pred;
     int mid_side;
     int intensity_stereo;
@@ -89,6 +90,286 @@
     uint16_t generation;
 } AACQuantizeBandCostCacheEntry;
 
+typedef struct AACPCEInfo {
+    int64_t layout;
+    int num_ele[4];                              ///< front, side, back, lfe
+    int pairing[3][8];                           ///< front, side, back
+    int index[4][8];                             ///< front, side, back, lfe
+    uint8_t config_map[16];                      ///< configs the encoder's channel specific settings
+    uint8_t reorder_map[16];                     ///< maps channels from lavc to aac order
+} AACPCEInfo;
+
+/**
+ * List of PCE (Program Configuration Element) for the channel layouts listed
+ * in channel_layout.h
+ *
+ * For those wishing in the future to add other layouts:
+ *
+ * - num_ele: number of elements in each group of front, side, back, lfe channels
+ *            (an element is of type SCE (single channel), CPE (channel pair) for
+ *            the first 3 groups; and is LFE for LFE group).
+ *
+ * - pairing: 0 for an SCE element or 1 for a CPE; does not apply to LFE group
+ *
+ * - index: there are three independent indices for SCE, CPE and LFE;
+ *     they are incremented irrespective of the group to which the element belongs;
+ *     they are not reset when going from one group to another
+ *
+ *     Example: for 7.0 channel layout,
+ *        .pairing = { { 1, 0 }, { 1 }, { 1 }, }, (3 CPE and 1 SCE in front group)
+ *        .index = { { 0, 0 }, { 1 }, { 2 }, },
+ *               (index is 0 for the single SCE but goes from 0 to 2 for the CPEs)
+ *
+ *     The index order impacts the channel ordering. But is otherwise arbitrary
+ *     (the sequence could have been 2, 0, 1 instead of 0, 1, 2).
+ *
+ *     Spec allows for discontinuous indices, e.g. if one has a total of two SCE,
+ *     SCE.0 SCE.15 is OK per spec; BUT it won't be decoded by our AAC decoder
+ *     which at this time requires that indices fully cover some range starting
+ *     from 0 (SCE.1 SCE.0 is OK but not SCE.0 SCE.15).
+ *
+ * - config_map: total number of elements and their types. Beware, the way the
+ *               types are ordered impacts the final channel ordering.
+ *
+ * - reorder_map: reorders the channels.
+ *
+ */
+static const AACPCEInfo aac_pce_configs[] = {
+    {
+        .layout = AV_CH_LAYOUT_MONO,
+        .num_ele = { 1, 0, 0, 0 },
+        .pairing = { { 0 }, },
+        .index = { { 0 }, },
+        .config_map = { 1, TYPE_SCE, },
+        .reorder_map = { 0 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_STEREO,
+        .num_ele = { 1, 0, 0, 0 },
+        .pairing = { { 1 }, },
+        .index = { { 0 }, },
+        .config_map = { 1, TYPE_CPE, },
+        .reorder_map = { 0, 1 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_2POINT1,
+        .num_ele = { 1, 0, 0, 1 },
+        .pairing = { { 1 }, },
+        .index = { { 0 },{ 0 },{ 0 },{ 0 } },
+        .config_map = { 2, TYPE_CPE, TYPE_LFE },
+        .reorder_map = { 0, 1, 2 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_2_1,
+        .num_ele = { 1, 0, 1, 0 },
+        .pairing = { { 1 },{ 0 },{ 0 } },
+        .index = { { 0 },{ 0 },{ 0 }, },
+        .config_map = { 2, TYPE_CPE, TYPE_SCE },
+        .reorder_map = { 0, 1, 2 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_SURROUND,
+        .num_ele = { 2, 0, 0, 0 },
+        .pairing = { { 1, 0 }, },
+        .index = { { 0, 0 }, },
+        .config_map = { 2, TYPE_CPE, TYPE_SCE, },
+        .reorder_map = { 0, 1, 2 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_3POINT1,
+        .num_ele = { 2, 0, 0, 1 },
+        .pairing = { { 1, 0 }, },
+        .index = { { 0, 0 }, { 0 }, { 0 }, { 0 }, },
+        .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_LFE },
+        .reorder_map = { 0, 1, 2, 3 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_4POINT0,
+        .num_ele = { 2, 0, 1, 0 },
+        .pairing = { { 1, 0 }, { 0 }, { 0 }, },
+        .index = { { 0, 0 }, { 0 }, { 1 } },
+        .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_SCE },
+        .reorder_map = {  0, 1, 2, 3 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_4POINT1,
+        .num_ele = { 2, 1, 1, 0 },
+        .pairing = { { 1, 0 }, { 0 }, { 0 }, },
+        .index = { { 0, 0 }, { 1 }, { 2 }, { 0 } },
+        .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_SCE },
+        .reorder_map = { 0, 1, 2, 3, 4 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_2_2,
+        .num_ele = { 1, 1, 0, 0 },
+        .pairing = { { 1 }, { 1 }, },
+        .index = { { 0 }, { 1 }, },
+        .config_map = { 2, TYPE_CPE, TYPE_CPE },
+        .reorder_map = { 0, 1, 2, 3 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_QUAD,
+        .num_ele = { 1, 0, 1, 0 },
+        .pairing = { { 1 }, { 0 }, { 1 }, },
+        .index = { { 0 }, { 0 }, { 1 } },
+        .config_map = { 2, TYPE_CPE, TYPE_CPE },
+        .reorder_map = { 0, 1, 2, 3 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_5POINT0,
+        .num_ele = { 2, 1, 0, 0 },
+        .pairing = { { 1, 0 }, { 1 }, },
+        .index = { { 0, 0 }, { 1 } },
+        .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_CPE },
+        .reorder_map = { 0, 1, 2, 3, 4 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_5POINT1,
+        .num_ele = { 2, 1, 1, 0 },
+        .pairing = { { 1, 0 }, { 0 }, { 1 }, },
+        .index = { { 0, 0 }, { 1 }, { 1 } },
+        .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE },
+        .reorder_map = { 0, 1, 2, 3, 4, 5 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_5POINT0_BACK,
+        .num_ele = { 2, 0, 1, 0 },
+        .pairing = { { 1, 0 }, { 0 }, { 1 } },
+        .index = { { 0, 0 }, { 0 }, { 1 } },
+        .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_CPE },
+        .reorder_map = { 0, 1, 2, 3, 4 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_5POINT1_BACK,
+        .num_ele = { 2, 1, 1, 0 },
+        .pairing = { { 1, 0 }, { 0 }, { 1 }, },
+        .index = { { 0, 0 }, { 1 }, { 1 } },
+        .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE },
+        .reorder_map = { 0, 1, 2, 3, 4, 5 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_6POINT0,
+        .num_ele = { 2, 1, 1, 0 },
+        .pairing = { { 1, 0 }, { 1 }, { 0 }, },
+        .index = { { 0, 0 }, { 1 }, { 1 } },
+        .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
+        .reorder_map = { 0, 1, 2, 3, 4, 5 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_6POINT0_FRONT,
+        .num_ele = { 2, 1, 0, 0 },
+        .pairing = { { 1, 1 }, { 1 } },
+        .index = { { 1, 0 }, { 2 }, },
+        .config_map = { 3, TYPE_CPE, TYPE_CPE, TYPE_CPE, },
+        .reorder_map = { 0, 1, 2, 3, 4, 5 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_HEXAGONAL,
+        .num_ele = { 2, 0, 2, 0 },
+        .pairing = { { 1, 0 },{ 0 },{ 1, 0 }, },
+        .index = { { 0, 0 },{ 0 },{ 1, 1 } },
+        .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE, },
+        .reorder_map = { 0, 1, 2, 3, 4, 5 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_6POINT1,
+        .num_ele = { 2, 1, 2, 0 },
+        .pairing = { { 1, 0 },{ 0 },{ 1, 0 }, },
+        .index = { { 0, 0 },{ 1 },{ 1, 2 } },
+        .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
+        .reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_6POINT1_BACK,
+        .num_ele = { 2, 1, 2, 0 },
+        .pairing = { { 1, 0 }, { 0 }, { 1, 0 }, },
+        .index = { { 0, 0 }, { 1 }, { 1, 2 } },
+        .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
+        .reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_6POINT1_FRONT,
+        .num_ele = { 2, 1, 2, 0 },
+        .pairing = { { 1, 0 }, { 0 }, { 1, 0 }, },
+        .index = { { 0, 0 }, { 1 }, { 1, 2 } },
+        .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
+        .reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_7POINT0,
+        .num_ele = { 2, 1, 1, 0 },
+        .pairing = { { 1, 0 }, { 1 }, { 1 }, },
+        .index = { { 0, 0 }, { 1 }, { 2 }, },
+        .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
+        .reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_7POINT0_FRONT,
+        .num_ele = { 2, 1, 1, 0 },
+        .pairing = { { 1, 0 }, { 1 }, { 1 }, },
+        .index = { { 0, 0 }, { 1 }, { 2 }, },
+        .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
+        .reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_7POINT1,
+        .num_ele = { 2, 1, 2, 0 },
+        .pairing = { { 1, 0 }, { 0 }, { 1, 1 }, },
+        .index = { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } },
+        .config_map = { 5, TYPE_CPE, TYPE_SCE,  TYPE_SCE, TYPE_CPE, TYPE_CPE },
+        .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_7POINT1_WIDE,
+        .num_ele = { 2, 1, 2, 0 },
+        .pairing = { { 1, 0 }, { 0 },{  1, 1 }, },
+        .index = { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } },
+        .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
+        .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_7POINT1_WIDE_BACK,
+        .num_ele = { 2, 1, 2, 0 },
+        .pairing = { { 1, 0 }, { 0 }, { 1, 1 }, },
+        .index = { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } },
+        .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
+        .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_OCTAGONAL,
+        .num_ele = { 2, 1, 2, 0 },
+        .pairing = { { 1, 0 }, { 1 }, { 1, 0 }, },
+        .index = { { 0, 0 }, { 1 }, { 2, 1 } },
+        .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_SCE },
+        .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
+    },
+    {   /* Meant for order 2/mixed ambisonics */
+        .layout = AV_CH_LAYOUT_OCTAGONAL | AV_CH_TOP_CENTER,
+        .num_ele = { 2, 2, 2, 0 },
+        .pairing = { { 1, 0 }, { 1, 0 }, { 1, 0 }, },
+        .index = { { 0, 0 }, { 1, 1 }, { 2, 2 } },
+        .config_map = { 6, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
+        .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7, 8 },
+    },
+    {   /* Meant for order 2/mixed ambisonics */
+        .layout = AV_CH_LAYOUT_6POINT0_FRONT | AV_CH_BACK_CENTER |
+                  AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT | AV_CH_TOP_CENTER,
+        .num_ele = { 2, 2, 2, 0 },
+        .pairing = { { 1, 1 }, { 1, 0 }, { 1, 0 }, },
+        .index = { { 0, 1 }, { 2, 0 }, { 3, 1 } },
+        .config_map = { 6, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
+        .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_HEXADECAGONAL,
+        .num_ele = { 4, 2, 4, 0 },
+        .pairing = { { 1, 0, 1, 0 }, { 1, 1 }, { 1, 0, 1, 0 }, },
+        .index = { { 0, 0, 1, 1 }, { 2, 3 }, { 4, 2, 5, 3 } },
+        .config_map = { 10, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
+        .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+    },
+};
+
 /**
  * AAC encoder context
  */
@@ -99,12 +380,15 @@
     FFTContext mdct1024;                         ///< long (1024 samples) frame transform context
     FFTContext mdct128;                          ///< short (128 samples) frame transform context
     AVFloatDSPContext *fdsp;
-    float *planar_samples[8];                    ///< saved preprocessed input
+    AACPCEInfo pce;                              ///< PCE data, if needed
+    float *planar_samples[16];                   ///< saved preprocessed input
 
     int profile;                                 ///< copied from avctx
+    int needs_pce;                               ///< flag for non-standard layout
     LPCContext lpc;                              ///< used by TNS
     int samplerate_index;                        ///< MPEG-4 samplerate index
     int channels;                                ///< channel count
+    const uint8_t *reorder_map;                  ///< lavc to aac reorder map
     const uint8_t *chan_map;                     ///< channel configuration map
 
     ChannelElement *cpe;                         ///< channel elements

diff --git a/libavcodec/aacenctab.h b/libavcodec/aacenctab.h
index 5fc9411..64932d7 100644
--- a/libavcodec/aacenctab.h
+++ b/libavcodec/aacenctab.h

@@ -36,13 +36,24 @@
 /** Total number of codebooks, including special ones **/
 #define CB_TOT_ALL 15
 
-#define AAC_MAX_CHANNELS 8
+#define AAC_MAX_CHANNELS 16
 
 extern const uint8_t *ff_aac_swb_size_1024[];
 extern const int      ff_aac_swb_size_1024_len;
 extern const uint8_t *ff_aac_swb_size_128[];
 extern const int      ff_aac_swb_size_128_len;
 
+/* Supported layouts without using a PCE */
+static const int64_t aac_normal_chan_layouts[7] = {
+    AV_CH_LAYOUT_MONO,
+    AV_CH_LAYOUT_STEREO,
+    AV_CH_LAYOUT_SURROUND,
+    AV_CH_LAYOUT_4POINT0,
+    AV_CH_LAYOUT_5POINT0_BACK,
+    AV_CH_LAYOUT_5POINT1_BACK,
+    AV_CH_LAYOUT_7POINT1,
+};
+
 /** default channel configurations */
 static const uint8_t aac_chan_configs[AAC_MAX_CHANNELS][6] = {
     {1, TYPE_SCE},                                         // 1 channel  - single channel element

diff --git a/libavcodec/aacpsdsp_template.c b/libavcodec/aacpsdsp_template.c
index 19be200..5f4be01 100644
--- a/libavcodec/aacpsdsp_template.c
+++ b/libavcodec/aacpsdsp_template.c

@@ -150,10 +150,10 @@
     INTFLOAT h1 = h[0][1];
     INTFLOAT h2 = h[0][2];
     INTFLOAT h3 = h[0][3];
-    INTFLOAT hs0 = h_step[0][0];
-    INTFLOAT hs1 = h_step[0][1];
-    INTFLOAT hs2 = h_step[0][2];
-    INTFLOAT hs3 = h_step[0][3];
+    UINTFLOAT hs0 = h_step[0][0];
+    UINTFLOAT hs1 = h_step[0][1];
+    UINTFLOAT hs2 = h_step[0][2];
+    UINTFLOAT hs3 = h_step[0][3];
     int n;
 
     for (n = 0; n < len; n++) {
@@ -181,10 +181,10 @@
     INTFLOAT h01  = h[0][1],      h11  = h[1][1];
     INTFLOAT h02  = h[0][2],      h12  = h[1][2];
     INTFLOAT h03  = h[0][3],      h13  = h[1][3];
-    INTFLOAT hs00 = h_step[0][0], hs10 = h_step[1][0];
-    INTFLOAT hs01 = h_step[0][1], hs11 = h_step[1][1];
-    INTFLOAT hs02 = h_step[0][2], hs12 = h_step[1][2];
-    INTFLOAT hs03 = h_step[0][3], hs13 = h_step[1][3];
+    UINTFLOAT hs00 = h_step[0][0], hs10 = h_step[1][0];
+    UINTFLOAT hs01 = h_step[0][1], hs11 = h_step[1][1];
+    UINTFLOAT hs02 = h_step[0][2], hs12 = h_step[1][2];
+    UINTFLOAT hs03 = h_step[0][3], hs13 = h_step[1][3];
     int n;
 
     for (n = 0; n < len; n++) {

diff --git a/libavcodec/aacsbr_fixed.c b/libavcodec/aacsbr_fixed.c
index eeada0b..59cbba1 100644
--- a/libavcodec/aacsbr_fixed.c
+++ b/libavcodec/aacsbr_fixed.c

@@ -567,7 +567,7 @@
                 int idx = indexsine&1;
                 int A = (1-((indexsine+(kx & 1))&2));
                 int B = (A^(-idx)) + idx;
-                int *out = &Y1[i][kx][idx];
+                unsigned *out = &Y1[i][kx][idx];
                 int shift;
                 unsigned round;
 

diff --git a/libavcodec/aacsbr_tablegen_common.h b/libavcodec/aacsbr_tablegen_common.h
index 8c8f6ef..8e0dd9e 100644
--- a/libavcodec/aacsbr_tablegen_common.h
+++ b/libavcodec/aacsbr_tablegen_common.h

@@ -111,16 +111,4 @@
     Q31( 0.8537385600f),
 };
 
-static av_cold void aacsbr_tableinit(void)
-{
-    int n;
-    for (n = 1; n < 320; n++)
-        sbr_qmf_window_us[320 + n] = sbr_qmf_window_us[320 - n];
-    sbr_qmf_window_us[384] = -sbr_qmf_window_us[384];
-    sbr_qmf_window_us[512] = -sbr_qmf_window_us[512];
-
-    for (n = 0; n < 320; n++)
-        sbr_qmf_window_ds[n] = sbr_qmf_window_us[2*n];
-}
-
 #endif /* AVCODEC_AACSBR_TABLEGEN_COMMON_H */

diff --git a/libavcodec/aacsbr_template.c b/libavcodec/aacsbr_template.c
index 3fe78d5..821615f 100644
--- a/libavcodec/aacsbr_template.c
+++ b/libavcodec/aacsbr_template.c

@@ -34,6 +34,18 @@
 
 #include "libavutil/qsort.h"
 
+static av_cold void aacsbr_tableinit(void)
+{
+    int n;
+    for (n = 1; n < 320; n++)
+        sbr_qmf_window_us[320 + n] = sbr_qmf_window_us[320 - n];
+    sbr_qmf_window_us[384] = -sbr_qmf_window_us[384];
+    sbr_qmf_window_us[512] = -sbr_qmf_window_us[512];
+
+    for (n = 0; n < 320; n++)
+        sbr_qmf_window_ds[n] = sbr_qmf_window_us[2*n];
+}
+
 av_cold void AAC_RENAME(ff_aac_sbr_init)(void)
 {
     static const struct {

diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
index e0f378f..eb2014e 100644
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c

@@ -34,20 +34,20 @@
 void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
                                        int beta, int8_t *tc0);
 
-void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height,
+void ff_weight_h264_pixels_16_neon(uint8_t *dst, ptrdiff_t stride, int height,
                                    int log2_den, int weight, int offset);
-void ff_weight_h264_pixels_8_neon(uint8_t *dst, int stride, int height,
+void ff_weight_h264_pixels_8_neon(uint8_t *dst, ptrdiff_t stride, int height,
                                   int log2_den, int weight, int offset);
-void ff_weight_h264_pixels_4_neon(uint8_t *dst, int stride, int height,
+void ff_weight_h264_pixels_4_neon(uint8_t *dst, ptrdiff_t stride, int height,
                                   int log2_den, int weight, int offset);
 
-void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, int stride,
+void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
                                      int height, int log2_den, int weightd,
                                      int weights, int offset);
-void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, int stride,
+void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
                                     int height, int log2_den, int weightd,
                                     int weights, int offset);
-void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, int stride,
+void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
                                     int height, int log2_den, int weightd,
                                     int weights, int offset);
 

diff --git a/libavcodec/aarch64/mpegaudiodsp_neon.S b/libavcodec/aarch64/mpegaudiodsp_neon.S
index cc7d9b3..b6ef131 100644
--- a/libavcodec/aarch64/mpegaudiodsp_neon.S
+++ b/libavcodec/aarch64/mpegaudiodsp_neon.S

@@ -24,7 +24,7 @@
 #define WFRAC_BITS  16   // fractional bits for window
 #define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
 
-const   tbl_rev128.s, align=4
+const   tbl_rev128_s, align=4
         .byte           12, 13, 14, 15
         .byte            8,  9, 10, 11
         .byte            4,  5,  6,  7
@@ -39,7 +39,7 @@
         ld1             {v4.4s,v5.4s,v6.4s,v7.4s},  [x7],  #64
         st1             {v0.4s,v1.4s,v2.4s,v3.4s},  [x8],  #64
         st1             {v4.4s,v5.4s,v6.4s,v7.4s},  [x8],  #64
-        movrel          x15, tbl_rev128.s
+        movrel          x15, tbl_rev128_s
         ld1             {v27.4s}, [x15]
 .ifc \type, fixed
         lsl             x4,  x4,  #1

diff --git a/libavcodec/ac3.h b/libavcodec/ac3.h
index 5c9c377..f8f6a81 100644
--- a/libavcodec/ac3.h
+++ b/libavcodec/ac3.h

@@ -28,6 +28,7 @@
 #define AVCODEC_AC3_H
 
 #define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */
+#define EAC3_MAX_CHANNELS 16          /**< maximum number of channels in EAC3 */
 #define AC3_MAX_CHANNELS 7            /**< maximum number of channels, including coupling channel */
 #define CPL_CH 0                      /**< coupling channel index */
 

diff --git a/libavcodec/ac3_parser.c b/libavcodec/ac3_parser.c
index 83dd90f..1e203ae 100644
--- a/libavcodec/ac3_parser.c
+++ b/libavcodec/ac3_parser.c

@@ -20,15 +20,19 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "config.h"
+
 #include "libavutil/channel_layout.h"
 #include "parser.h"
 #include "ac3_parser.h"
+#include "ac3_parser_internal.h"
 #include "aac_ac3_parser.h"
 #include "get_bits.h"
 
 
 #define AC3_HEADER_SIZE 7
 
+#if CONFIG_AC3_PARSER
 
 static const uint8_t eac3_blocks[4] = {
     1, 2, 3, 6
@@ -47,16 +51,9 @@
 static const uint8_t surround_levels[4] = { 4, 6, 7, 6 };
 
 
-int avpriv_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo **phdr)
+int ff_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr)
 {
     int frame_size_code;
-    AC3HeaderInfo *hdr;
-
-    if (!*phdr)
-        *phdr = av_mallocz(sizeof(AC3HeaderInfo));
-    if (!*phdr)
-        return AVERROR(ENOMEM);
-    hdr = *phdr;
 
     memset(hdr, 0, sizeof(*hdr));
 
@@ -151,6 +148,48 @@
     return 0;
 }
 
+// TODO: Better way to pass AC3HeaderInfo fields to mov muxer.
+int avpriv_ac3_parse_header(AC3HeaderInfo **phdr, const uint8_t *buf,
+                            size_t size)
+{
+    GetBitContext gb;
+    AC3HeaderInfo *hdr;
+    int err;
+
+    if (!*phdr)
+        *phdr = av_mallocz(sizeof(AC3HeaderInfo));
+    if (!*phdr)
+        return AVERROR(ENOMEM);
+    hdr = *phdr;
+
+    err = init_get_bits8(&gb, buf, size);
+    if (err < 0)
+        return AVERROR_INVALIDDATA;
+    err = ff_ac3_parse_header(&gb, hdr);
+    if (err < 0)
+        return AVERROR_INVALIDDATA;
+
+    return get_bits_count(&gb);
+}
+
+int av_ac3_parse_header(const uint8_t *buf, size_t size,
+                        uint8_t *bitstream_id, uint16_t *frame_size)
+{
+    GetBitContext gb;
+    AC3HeaderInfo hdr;
+    int err;
+
+    init_get_bits8(&gb, buf, size);
+    err = ff_ac3_parse_header(&gb, &hdr);
+    if (err < 0)
+        return AVERROR_INVALIDDATA;
+
+    *bitstream_id = hdr.bitstream_id;
+    *frame_size   = hdr.frame_size;
+
+    return 0;
+}
+
 static int ac3_sync(uint64_t state, AACAC3ParseContext *hdr_info,
         int *need_next_header, int *new_frame_start)
 {
@@ -159,11 +198,11 @@
         uint64_t u64;
         uint8_t  u8[8 + AV_INPUT_BUFFER_PADDING_SIZE];
     } tmp = { av_be2ne64(state) };
-    AC3HeaderInfo hdr, *phdr = &hdr;
+    AC3HeaderInfo hdr;
     GetBitContext gbc;
 
     init_get_bits(&gbc, tmp.u8+8-AC3_HEADER_SIZE, 54);
-    err = avpriv_ac3_parse_header(&gbc, &phdr);
+    err = ff_ac3_parse_header(&gbc, &hdr);
 
     if(err < 0)
         return 0;
@@ -181,8 +220,8 @@
     else if (hdr_info->codec_id == AV_CODEC_ID_NONE)
         hdr_info->codec_id = AV_CODEC_ID_AC3;
 
-    *need_next_header = (hdr.frame_type != EAC3_FRAME_TYPE_AC3_CONVERT);
     *new_frame_start  = (hdr.frame_type != EAC3_FRAME_TYPE_DEPENDENT);
+    *need_next_header = *new_frame_start || (hdr.frame_type != EAC3_FRAME_TYPE_AC3_CONVERT);
     return hdr.frame_size;
 }
 
@@ -202,3 +241,18 @@
     .parser_parse   = ff_aac_ac3_parse,
     .parser_close   = ff_parse_close,
 };
+
+#else
+
+int avpriv_ac3_parse_header(AC3HeaderInfo **phdr, const uint8_t *buf,
+                            size_t size)
+{
+    return AVERROR(ENOSYS);
+}
+
+int av_ac3_parse_header(const uint8_t *buf, size_t size,
+                        uint8_t *bitstream_id, uint16_t *frame_size)
+{
+    return AVERROR(ENOSYS);
+}
+#endif

diff --git a/libavcodec/ac3_parser.h b/libavcodec/ac3_parser.h
index dc5d035..ff8cc4c 100644
--- a/libavcodec/ac3_parser.h
+++ b/libavcodec/ac3_parser.h

@@ -23,20 +23,14 @@
 #ifndef AVCODEC_AC3_PARSER_H
 #define AVCODEC_AC3_PARSER_H
 
-#include "ac3.h"
-#include "get_bits.h"
+#include <stddef.h>
+#include <stdint.h>
 
 /**
- * Parse AC-3 frame header.
- * Parse the header up to the lfeon element, which is the first 52 or 54 bits
- * depending on the audio coding mode.
- * @param[in]  gbc BitContext containing the first 54 bits of the frame.
- * @param[out] hdr Pointer to Pointer to struct where header info is written.
- *                 will be allocated if NULL
- * @return Returns 0 on success, -1 if there is a sync word mismatch,
- * -2 if the bsid (version) element is invalid, -3 if the fscod (sample rate)
- * element is invalid, or -4 if the frmsizecod (bit rate) element is invalid.
+ * Extract the bitstream ID and the frame size from AC-3 data.
  */
-int avpriv_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo **hdr);
+int av_ac3_parse_header(const uint8_t *buf, size_t size,
+                        uint8_t *bitstream_id, uint16_t *frame_size);
+
 
 #endif /* AVCODEC_AC3_PARSER_H */

diff --git a/libavcodec/ac3_parser_internal.h b/libavcodec/ac3_parser_internal.h
new file mode 100644
index 0000000..3648802
--- /dev/null
+++ b/libavcodec/ac3_parser_internal.h

@@ -0,0 +1,42 @@
+/*
+ * AC-3 parser internal code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AC3_PARSER_INTERNAL_H
+#define AVCODEC_AC3_PARSER_INTERNAL_H
+
+#include "ac3.h"
+#include "get_bits.h"
+
+/**
+ * Parse AC-3 frame header.
+ * Parse the header up to the lfeon element, which is the first 52 or 54 bits
+ * depending on the audio coding mode.
+ * @param[in]  gbc BitContext containing the first 54 bits of the frame.
+ * @param[out] hdr Pointer to struct where header info is written.
+ * @return Returns 0 on success, -1 if there is a sync word mismatch,
+ * -2 if the bsid (version) element is invalid, -3 if the fscod (sample rate)
+ * element is invalid, or -4 if the frmsizecod (bit rate) element is invalid.
+ */
+int ff_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr);
+
+int avpriv_ac3_parse_header(AC3HeaderInfo **hdr, const uint8_t *buf,
+                            size_t size);
+
+#endif /* AVCODEC_AC3_PARSER_INTERNAL_H */

diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c
index c393076..43b22b7 100644
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c

@@ -36,7 +36,7 @@
 #include "bswapdsp.h"
 #include "internal.h"
 #include "aac_ac3_parser.h"
-#include "ac3_parser.h"
+#include "ac3_parser_internal.h"
 #include "ac3dec.h"
 #include "ac3dec_data.h"
 #include "kbdwin.h"
@@ -297,10 +297,10 @@
  */
 static int parse_frame_header(AC3DecodeContext *s)
 {
-    AC3HeaderInfo hdr, *phdr=&hdr;
+    AC3HeaderInfo hdr;
     int err;
 
-    err = avpriv_ac3_parse_header(&s->gbc, &phdr);
+    err = ff_ac3_parse_header(&s->gbc, &hdr);
     if (err)
         return err;
 
@@ -317,6 +317,7 @@
     s->fbw_channels                 = s->channels - s->lfe_on;
     s->lfe_ch                       = s->fbw_channels + 1;
     s->frame_size                   = hdr.frame_size;
+    s->superframe_size             += hdr.frame_size;
     s->preferred_downmix            = AC3_DMIXMOD_NOTINDICATED;
     s->center_mix_level             = hdr.center_mix_level;
     s->center_mix_level_ltrt        = 4; // -3.0dB
@@ -683,7 +684,7 @@
  * Convert frequency domain coefficients to time-domain audio samples.
  * reference: Section 7.9.4 Transformation Equations
  */
-static inline void do_imdct(AC3DecodeContext *s, int channels)
+static inline void do_imdct(AC3DecodeContext *s, int channels, int offset)
 {
     int ch;
 
@@ -695,25 +696,25 @@
                 x[i] = s->transform_coeffs[ch][2 * i];
             s->imdct_256.imdct_half(&s->imdct_256, s->tmp_output, x);
 #if USE_FIXED
-            s->fdsp->vector_fmul_window_scaled(s->outptr[ch - 1], s->delay[ch - 1],
+            s->fdsp->vector_fmul_window_scaled(s->outptr[ch - 1], s->delay[ch - 1 + offset],
                                        s->tmp_output, s->window, 128, 8);
 #else
-            s->fdsp->vector_fmul_window(s->outptr[ch - 1], s->delay[ch - 1],
+            s->fdsp->vector_fmul_window(s->outptr[ch - 1], s->delay[ch - 1 + offset],
                                        s->tmp_output, s->window, 128);
 #endif
             for (i = 0; i < 128; i++)
                 x[i] = s->transform_coeffs[ch][2 * i + 1];
-            s->imdct_256.imdct_half(&s->imdct_256, s->delay[ch - 1], x);
+            s->imdct_256.imdct_half(&s->imdct_256, s->delay[ch - 1 + offset], x);
         } else {
             s->imdct_512.imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
 #if USE_FIXED
-            s->fdsp->vector_fmul_window_scaled(s->outptr[ch - 1], s->delay[ch - 1],
+            s->fdsp->vector_fmul_window_scaled(s->outptr[ch - 1], s->delay[ch - 1 + offset],
                                        s->tmp_output, s->window, 128, 8);
 #else
-            s->fdsp->vector_fmul_window(s->outptr[ch - 1], s->delay[ch - 1],
+            s->fdsp->vector_fmul_window(s->outptr[ch - 1], s->delay[ch - 1 + offset],
                                        s->tmp_output, s->window, 128);
 #endif
-            memcpy(s->delay[ch - 1], s->tmp_output + 128, 128 * sizeof(FFTSample));
+            memcpy(s->delay[ch - 1 + offset], s->tmp_output + 128, 128 * sizeof(FFTSample));
         }
     }
 }
@@ -1063,7 +1064,7 @@
 /**
  * Decode a single audio block from the AC-3 bitstream.
  */
-static int decode_audio_block(AC3DecodeContext *s, int blk)
+static int decode_audio_block(AC3DecodeContext *s, int blk, int offset)
 {
     int fbw_channels = s->fbw_channels;
     int channel_mode = s->channel_mode;
@@ -1426,7 +1427,7 @@
             ac3_upmix_delay(s);
         }
 
-        do_imdct(s, s->channels);
+        do_imdct(s, s->channels, offset);
 
         if (downmix_output) {
 #if USE_FIXED
@@ -1449,7 +1450,7 @@
                                           s->out_channels, s->fbw_channels, 128);
         }
 
-        do_imdct(s, s->out_channels);
+        do_imdct(s, s->out_channels, offset);
     }
 
     return 0;
@@ -1463,14 +1464,19 @@
 {
     AVFrame *frame     = data;
     const uint8_t *buf = avpkt->data;
-    int buf_size = avpkt->size;
+    int buf_size, full_buf_size = avpkt->size;
     AC3DecodeContext *s = avctx->priv_data;
-    int blk, ch, err, ret;
+    int blk, ch, err, offset, ret;
+    int got_independent_frame = 0;
     const uint8_t *channel_map;
+    uint8_t extended_channel_map[EAC3_MAX_CHANNELS];
     const SHORTFLOAT *output[AC3_MAX_CHANNELS];
     enum AVMatrixEncoding matrix_encoding;
     AVDownmixInfo *downmix_info;
 
+    s->superframe_size = 0;
+
+    buf_size = full_buf_size;
     /* copy input buffer to decoder context to avoid reading past the end
        of the buffer, which can be caused by a damaged input stream. */
     if (buf_size >= 2 && AV_RB16(buf) == 0x770B) {
@@ -1488,6 +1494,7 @@
         av_lfg_init_from_data(&s->dith_state, s->input_buffer, FFMIN(buf_size, AC3_FRAME_BUFFER_SIZE));
 
     buf = s->input_buffer;
+dependent_frame:
     /* initialize the GetBitContext with the start of valid AC-3 Frame */
     if ((ret = init_get_bits8(&s->gbc, buf, buf_size)) < 0)
         return ret;
@@ -1511,11 +1518,11 @@
             break;
         case AAC_AC3_PARSE_ERROR_FRAME_TYPE:
             /* skip frame if CRC is ok. otherwise use error concealment. */
-            /* TODO: add support for substreams and dependent frames */
-            if (s->frame_type == EAC3_FRAME_TYPE_DEPENDENT || s->substreamid) {
+            /* TODO: add support for substreams */
+            if (s->substreamid) {
                 av_log(avctx, AV_LOG_DEBUG,
-                       "unsupported frame type %d: skipping frame\n",
-                       s->frame_type);
+                       "unsupported substream %d: skipping frame\n",
+                       s->substreamid);
                 *got_frame_ptr = 0;
                 return buf_size;
             } else {
@@ -1546,10 +1553,10 @@
         }
     }
 
-    /* if frame is ok, set audio parameters */
-    if (!err) {
-        avctx->sample_rate = s->sample_rate;
-        avctx->bit_rate    = s->bit_rate;
+    if (s->frame_type == EAC3_FRAME_TYPE_DEPENDENT && !got_independent_frame) {
+        av_log(avctx, AV_LOG_WARNING, "Ignoring dependent frame without independent frame.\n");
+        *got_frame_ptr = 0;
+        return FFMIN(full_buf_size, s->frame_size);
     }
 
     /* channel config */
@@ -1594,29 +1601,25 @@
     if (s->bitstream_mode == 0x7 && s->channels > 1)
         avctx->audio_service_type = AV_AUDIO_SERVICE_TYPE_KARAOKE;
 
-    /* get output buffer */
-    frame->nb_samples = s->num_blocks * AC3_BLOCK_SIZE;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
-        return ret;
-
     /* decode the audio blocks */
     channel_map = ff_ac3_dec_channel_map[s->output_mode & ~AC3_OUTPUT_LFEON][s->lfe_on];
+    offset = s->frame_type == EAC3_FRAME_TYPE_DEPENDENT ? AC3_MAX_CHANNELS : 0;
     for (ch = 0; ch < AC3_MAX_CHANNELS; ch++) {
-        output[ch] = s->output[ch];
-        s->outptr[ch] = s->output[ch];
+        output[ch] = s->output[ch + offset];
+        s->outptr[ch] = s->output[ch + offset];
     }
     for (ch = 0; ch < s->channels; ch++) {
         if (ch < s->out_channels)
-            s->outptr[channel_map[ch]] = (SHORTFLOAT *)frame->data[ch];
+            s->outptr[channel_map[ch]] = s->output_buffer[ch + offset];
     }
     for (blk = 0; blk < s->num_blocks; blk++) {
-        if (!err && decode_audio_block(s, blk)) {
+        if (!err && decode_audio_block(s, blk, offset)) {
             av_log(avctx, AV_LOG_ERROR, "error decoding the audio block\n");
             err = 1;
         }
         if (err)
             for (ch = 0; ch < s->out_channels; ch++)
-                memcpy(((SHORTFLOAT*)frame->data[ch]) + AC3_BLOCK_SIZE*blk, output[ch], AC3_BLOCK_SIZE*sizeof(SHORTFLOAT));
+                memcpy(s->output_buffer[ch + offset] + AC3_BLOCK_SIZE*blk, output[ch], AC3_BLOCK_SIZE*sizeof(SHORTFLOAT));
         for (ch = 0; ch < s->out_channels; ch++)
             output[ch] = s->outptr[channel_map[ch]];
         for (ch = 0; ch < s->out_channels; ch++) {
@@ -1625,11 +1628,114 @@
         }
     }
 
-    frame->decode_error_flags = err ? FF_DECODE_ERROR_INVALID_BITSTREAM : 0;
-
     /* keep last block for error concealment in next frame */
     for (ch = 0; ch < s->out_channels; ch++)
-        memcpy(s->output[ch], output[ch], AC3_BLOCK_SIZE*sizeof(SHORTFLOAT));
+        memcpy(s->output[ch + offset], output[ch], AC3_BLOCK_SIZE*sizeof(SHORTFLOAT));
+
+    /* check if there is dependent frame */
+    if (buf_size > s->frame_size) {
+        AC3HeaderInfo hdr;
+        int err;
+
+        if ((ret = init_get_bits8(&s->gbc, buf + s->frame_size, buf_size - s->frame_size)) < 0)
+            return ret;
+
+        err = ff_ac3_parse_header(&s->gbc, &hdr);
+        if (err)
+            return err;
+
+        if (hdr.frame_type == EAC3_FRAME_TYPE_DEPENDENT) {
+            if (hdr.num_blocks != s->num_blocks || s->sample_rate != hdr.sample_rate) {
+                av_log(avctx, AV_LOG_WARNING, "Ignoring non-compatible dependent frame.\n");
+            } else {
+                buf += s->frame_size;
+                buf_size -= s->frame_size;
+                s->prev_output_mode = s->output_mode;
+                s->prev_bit_rate = s->bit_rate;
+                got_independent_frame = 1;
+                goto dependent_frame;
+            }
+        }
+    }
+
+    frame->decode_error_flags = err ? FF_DECODE_ERROR_INVALID_BITSTREAM : 0;
+
+    /* if frame is ok, set audio parameters */
+    if (!err) {
+        avctx->sample_rate = s->sample_rate;
+        avctx->bit_rate    = s->bit_rate + s->prev_bit_rate;
+    }
+
+    for (ch = 0; ch < EAC3_MAX_CHANNELS; ch++)
+        extended_channel_map[ch] = ch;
+
+    if (s->frame_type == EAC3_FRAME_TYPE_DEPENDENT) {
+        uint64_t ich_layout = avpriv_ac3_channel_layout_tab[s->prev_output_mode & ~AC3_OUTPUT_LFEON];
+        int channel_map_size = ff_ac3_channels_tab[s->output_mode & ~AC3_OUTPUT_LFEON] + s->lfe_on;
+        uint64_t channel_layout;
+        int extend = 0;
+
+        if (s->prev_output_mode & AC3_OUTPUT_LFEON)
+            ich_layout |= AV_CH_LOW_FREQUENCY;
+
+        channel_layout = ich_layout;
+        for (ch = 0; ch < 16; ch++) {
+            if (s->channel_map & (1 << (EAC3_MAX_CHANNELS - ch - 1))) {
+                channel_layout |= ff_eac3_custom_channel_map_locations[ch][1];
+            }
+        }
+        if (av_get_channel_layout_nb_channels(channel_layout) > EAC3_MAX_CHANNELS) {
+            av_log(avctx, AV_LOG_ERROR, "Too many channels (%d) coded\n",
+                   av_get_channel_layout_nb_channels(channel_layout));
+            return AVERROR_INVALIDDATA;
+        }
+
+        avctx->channel_layout = channel_layout;
+        avctx->channels = av_get_channel_layout_nb_channels(channel_layout);
+
+        for (ch = 0; ch < EAC3_MAX_CHANNELS; ch++) {
+            if (s->channel_map & (1 << (EAC3_MAX_CHANNELS - ch - 1))) {
+                if (ff_eac3_custom_channel_map_locations[ch][0]) {
+                    int index = av_get_channel_layout_channel_index(channel_layout,
+                                                                    ff_eac3_custom_channel_map_locations[ch][1]);
+                    if (index < 0)
+                        return AVERROR_INVALIDDATA;
+                    if (extend >= channel_map_size)
+                        return AVERROR_INVALIDDATA;
+
+                    extended_channel_map[index] = offset + channel_map[extend++];
+                } else {
+                    int i;
+
+                    for (i = 0; i < 64; i++) {
+                        if ((1ULL << i) & ff_eac3_custom_channel_map_locations[ch][1]) {
+                            int index = av_get_channel_layout_channel_index(channel_layout,
+                                                                            1ULL << i);
+                            if (index < 0)
+                                return AVERROR_INVALIDDATA;
+                            if (extend >= channel_map_size)
+                                return AVERROR_INVALIDDATA;
+
+                            extended_channel_map[index] = offset + channel_map[extend++];
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    /* get output buffer */
+    frame->nb_samples = s->num_blocks * AC3_BLOCK_SIZE;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    for (ch = 0; ch < avctx->channels; ch++) {
+        int map = extended_channel_map[ch];
+        av_assert0(ch>=AV_NUM_DATA_POINTERS || frame->extended_data[ch] == frame->data[ch]);
+        memcpy((SHORTFLOAT *)frame->extended_data[ch],
+               s->output_buffer[map],
+               s->num_blocks * AC3_BLOCK_SIZE * sizeof(SHORTFLOAT));
+    }
 
     /*
      * AVMatrixEncoding
@@ -1689,7 +1795,10 @@
 
     *got_frame_ptr = 1;
 
-    return FFMIN(buf_size, s->frame_size);
+    if (!s->superframe_size)
+        return FFMIN(full_buf_size, s->frame_size);
+
+    return FFMIN(full_buf_size, s->superframe_size);
 }
 
 /**

diff --git a/libavcodec/ac3dec.h b/libavcodec/ac3dec.h
index aa4cf04..ce1434b 100644
--- a/libavcodec/ac3dec.h
+++ b/libavcodec/ac3dec.h

@@ -76,6 +76,7 @@
 ///@{
     int frame_type;                         ///< frame type                             (strmtyp)
     int substreamid;                        ///< substream identification
+    int superframe_size;                    ///< current superframe size, in bytes
     int frame_size;                         ///< current frame size, in bytes
     int bit_rate;                           ///< stream bit rate, in bits-per-second
     int sample_rate;                        ///< sample frequency, in Hz
@@ -87,7 +88,7 @@
     int dialog_normalization[2];            ///< dialog level in dBFS                   (dialnorm)
     int compression_exists[2];              ///< compression field is valid for frame   (compre)
     int compression_gain[2];                ///< gain to apply for heavy compression    (compr)
-    int channel_map;                        ///< custom channel map
+    int channel_map;                        ///< custom channel map                     (chanmap)
     int preferred_downmix;                  ///< Preferred 2-channel downmix mode       (dmixmod)
     int center_mix_level;                   ///< Center mix level index
     int center_mix_level_ltrt;              ///< Center mix level index for Lt/Rt       (ltrtcmixlev)
@@ -164,7 +165,9 @@
     SHORTFLOAT *downmix_coeffs[2];              ///< stereo downmix coefficients
     int downmixed;                              ///< indicates if coeffs are currently downmixed
     int output_mode;                            ///< output channel configuration
+    int prev_output_mode;                       ///< output channel configuration for previous frame
     int out_channels;                           ///< number of output channels
+    int prev_bit_rate;                          ///< stream bit rate, in bits-per-second for previous frame
 ///@}
 
 ///@name Dynamic range
@@ -239,11 +242,12 @@
 ///@name Aligned arrays
     DECLARE_ALIGNED(16, int,   fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];       ///< fixed-point transform coefficients
     DECLARE_ALIGNED(32, INTFLOAT, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];   ///< transform coefficients
-    DECLARE_ALIGNED(32, INTFLOAT, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];             ///< delay - added to the next block
+    DECLARE_ALIGNED(32, INTFLOAT, delay)[EAC3_MAX_CHANNELS][AC3_BLOCK_SIZE];         ///< delay - added to the next block
     DECLARE_ALIGNED(32, INTFLOAT, window)[AC3_BLOCK_SIZE];                              ///< window coefficients
     DECLARE_ALIGNED(32, INTFLOAT, tmp_output)[AC3_BLOCK_SIZE];                          ///< temporary storage for output before windowing
-    DECLARE_ALIGNED(32, SHORTFLOAT, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];            ///< output after imdct transform and windowing
+    DECLARE_ALIGNED(32, SHORTFLOAT, output)[EAC3_MAX_CHANNELS][AC3_BLOCK_SIZE];            ///< output after imdct transform and windowing
     DECLARE_ALIGNED(32, uint8_t, input_buffer)[AC3_FRAME_BUFFER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE]; ///< temp buffer to prevent overread
+    DECLARE_ALIGNED(32, SHORTFLOAT, output_buffer)[EAC3_MAX_CHANNELS][AC3_BLOCK_SIZE * 6];  ///< final output buffer
 ///@}
 } AC3DecodeContext;
 

diff --git a/libavcodec/ac3tab.c b/libavcodec/ac3tab.c
index d62d8bf..bd88f32 100644
--- a/libavcodec/ac3tab.c
+++ b/libavcodec/ac3tab.c

@@ -314,3 +314,21 @@
     AC3_CHMAP_L |               AC3_CHMAP_R | AC3_CHMAP_L_SUR |                  AC3_CHMAP_R_SUR,
     AC3_CHMAP_L | AC3_CHMAP_C | AC3_CHMAP_R | AC3_CHMAP_L_SUR |                  AC3_CHMAP_R_SUR
 };
+const uint64_t ff_eac3_custom_channel_map_locations[16][2] = {
+    { 1, AV_CH_FRONT_LEFT },
+    { 1, AV_CH_FRONT_CENTER },
+    { 1, AV_CH_FRONT_RIGHT },
+    { 1, AV_CH_SIDE_LEFT },
+    { 1, AV_CH_SIDE_RIGHT },
+    { 0, AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER },
+    { 0, AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT },
+    { 0, AV_CH_BACK_CENTER },
+    { 0, AV_CH_TOP_CENTER },
+    { 0, AV_CH_SURROUND_DIRECT_LEFT | AV_CH_SURROUND_DIRECT_RIGHT },
+    { 0, AV_CH_WIDE_LEFT | AV_CH_WIDE_RIGHT },
+    { 0, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT},
+    { 0, AV_CH_TOP_FRONT_CENTER },
+    { 0, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+    { 0, AV_CH_LOW_FREQUENCY_2 },
+    { 1, AV_CH_LOW_FREQUENCY },
+};

diff --git a/libavcodec/ac3tab.h b/libavcodec/ac3tab.h
index f529fc8..aa71acb 100644
--- a/libavcodec/ac3tab.h
+++ b/libavcodec/ac3tab.h

@@ -26,10 +26,11 @@
 
 #include "libavutil/internal.h"
 #include "ac3.h"
+#include "internal.h"
 
 extern const uint16_t ff_ac3_frame_size_tab[38][3];
 extern const uint8_t  ff_ac3_channels_tab[8];
-extern av_export const uint16_t avpriv_ac3_channel_layout_tab[8];
+extern av_export_avcodec const uint16_t avpriv_ac3_channel_layout_tab[8];
 extern const uint8_t  ff_ac3_enc_channel_map[8][2][6];
 extern const uint8_t  ff_ac3_dec_channel_map[8][2][6];
 extern const uint16_t ff_ac3_sample_rate_tab[3];
@@ -49,6 +50,8 @@
 extern const uint16_t ff_eac3_default_chmap[8];
 extern const uint8_t  ff_ac3_band_start_tab[AC3_CRITICAL_BANDS+1];
 extern const uint8_t  ff_ac3_bin_to_band_tab[253];
+extern const uint64_t ff_eac3_custom_channel_map_locations[16][2];
+
 
 /** Custom channel map locations bitmask
  *  Other channels described in documentation:

diff --git a/libavcodec/adpcm.c b/libavcodec/adpcm.c
index be206c5..cd3bbd3 100644
--- a/libavcodec/adpcm.c
+++ b/libavcodec/adpcm.c

@@ -1115,6 +1115,7 @@
         int16_t *out1 = samples_p[1];
         int samples_per_block = 28 * (3 - avctx->channels) * 4;
         int sample_offset = 0;
+        int bytes_remaining;
         while (bytestream2_get_bytes_left(&gb) >= 128) {
             if ((ret = xa_decode(avctx, out0, out1, buf + bytestream2_tell(&gb),
                                  &c->status[0], &c->status[1],
@@ -1123,6 +1124,12 @@
             bytestream2_skipu(&gb, 128);
             sample_offset += samples_per_block;
         }
+        /* Less than a full block of data left, e.g. when reading from
+         * 2324 byte per sector XA; the remainder is padding */
+        bytes_remaining = bytestream2_get_bytes_left(&gb);
+        if (bytes_remaining > 0) {
+            bytestream2_skip(&gb, bytes_remaining);
+        }
         break;
     }
     case AV_CODEC_ID_ADPCM_IMA_EA_EACS:

diff --git a/libavcodec/aacadtsdec.c b/libavcodec/adts_header.c
similarity index 93%
rename from libavcodec/aacadtsdec.c
rename to libavcodec/adts_header.c
index d0814ac..0889820 100644
--- a/libavcodec/aacadtsdec.c
+++ b/libavcodec/adts_header.c

@@ -22,11 +22,12 @@
  */
 
 #include "aac_ac3_parser.h"
-#include "aacadtsdec.h"
+#include "adts_header.h"
+#include "adts_parser.h"
 #include "get_bits.h"
 #include "mpeg4audio.h"
 
-int avpriv_aac_parse_header(GetBitContext *gbc, AACADTSHeaderInfo *hdr)
+int ff_adts_header_parse(GetBitContext *gbc, AACADTSHeaderInfo *hdr)
 {
     int size, rdb, ch, sr;
     int aot, crc_abs;
@@ -51,7 +52,7 @@
     skip_bits1(gbc);             /* copyright_identification_bit */
     skip_bits1(gbc);             /* copyright_identification_start */
     size = get_bits(gbc, 13);    /* aac_frame_length */
-    if (size < AAC_ADTS_HEADER_SIZE)
+    if (size < AV_AAC_ADTS_HEADER_SIZE)
         return AAC_AC3_PARSE_ERROR_FRAME_SIZE;
 
     skip_bits(gbc, 11);          /* adts_buffer_fullness */

diff --git a/libavcodec/aacadtsdec.h b/libavcodec/adts_header.h
similarity index 86%
rename from libavcodec/aacadtsdec.h
rename to libavcodec/adts_header.h
index d0584ef..f615f6a 100644
--- a/libavcodec/aacadtsdec.h
+++ b/libavcodec/adts_header.h

@@ -20,14 +20,11 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_AACADTSDEC_H
-#define AVCODEC_AACADTSDEC_H
+#ifndef AVCODEC_ADTS_HEADER_H
+#define AVCODEC_ADTS_HEADER_H
 
-#include <stdint.h>
 #include "get_bits.h"
 
-#define AAC_ADTS_HEADER_SIZE 7
-
 typedef struct AACADTSHeaderInfo {
     uint32_t sample_rate;
     uint32_t samples;
@@ -40,7 +37,6 @@
 } AACADTSHeaderInfo;
 
 /**
- * Parse AAC frame header.
  * Parse the ADTS frame header to the end of the variable header, which is
  * the first 54 bits.
  * @param[in]  gbc BitContext containing the first 54 bits of the frame.
@@ -49,6 +45,6 @@
  * -2 if the version element is invalid, -3 if the sample rate
  * element is invalid, or -4 if the bit rate element is invalid.
  */
-int avpriv_aac_parse_header(GetBitContext *gbc, AACADTSHeaderInfo *hdr);
+int ff_adts_header_parse(GetBitContext *gbc, AACADTSHeaderInfo *hdr);
 
-#endif /* AVCODEC_AACADTSDEC_H */
+#endif /* AVCODEC_ADTS_HEADER_H */

diff --git a/libavcodec/adts_parser.c b/libavcodec/adts_parser.c
new file mode 100644
index 0000000..5c9f8ff
--- /dev/null
+++ b/libavcodec/adts_parser.c

@@ -0,0 +1,44 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "adts_header.h"
+#include "adts_parser.h"
+
+int av_adts_header_parse(const uint8_t *buf, uint32_t *samples, uint8_t *frames)
+{
+#if CONFIG_ADTS_HEADER
+    GetBitContext gb;
+    AACADTSHeaderInfo hdr;
+    int err = init_get_bits8(&gb, buf, AV_AAC_ADTS_HEADER_SIZE);
+    if (err < 0)
+        return err;
+    err = ff_adts_header_parse(&gb, &hdr);
+    if (err < 0)
+        return err;
+    *samples = hdr.samples;
+    *frames  = hdr.num_aac_frames;
+    return 0;
+#else
+    return AVERROR(ENOSYS);
+#endif
+}

diff --git a/libavcodec/adts_parser.h b/libavcodec/adts_parser.h
new file mode 100644
index 0000000..f85becd
--- /dev/null
+++ b/libavcodec/adts_parser.h

@@ -0,0 +1,37 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ADTS_PARSER_H
+#define AVCODEC_ADTS_PARSER_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define AV_AAC_ADTS_HEADER_SIZE 7
+
+/**
+ * Extract the number of samples and frames from AAC data.
+ * @param[in]  buf     pointer to AAC data buffer
+ * @param[out] samples Pointer to where number of samples is written
+ * @param[out] frames  Pointer to where number of frames is written
+ * @return Returns 0 on success, error code on failure.
+ */
+int av_adts_header_parse(const uint8_t *buf, uint32_t *samples,
+                         uint8_t *frames);
+
+#endif /* AVCODEC_ADTS_PARSER_H */

diff --git a/libavcodec/aic.c b/libavcodec/aic.c
index 67d78c5..9c6f806 100644
--- a/libavcodec/aic.c
+++ b/libavcodec/aic.c

@@ -308,6 +308,8 @@
     GetBitContext gb;
     int ret, i, mb, blk;
     int slice_width = FFMIN(ctx->slice_width, ctx->mb_width - mb_x);
+    int last_row = mb_y && mb_y == ctx->mb_height - 1;
+    int y_pos, c_pos;
     uint8_t *Y, *C[2];
     uint8_t *dst;
     int16_t *base_y = ctx->data_ptr[COEFF_LUMA];
@@ -316,10 +318,18 @@
     int16_t *ext_c  = ctx->data_ptr[COEFF_CHROMA_EXT];
     const int ystride = ctx->frame->linesize[0];
 
-    Y = ctx->frame->data[0] + mb_x * 16 + mb_y * 16 * ystride;
+    if (last_row) {
+        y_pos = (ctx->avctx->height - 16);
+        c_pos = ((ctx->avctx->height+1)/2 - 8);
+    } else {
+        y_pos = mb_y * 16;
+        c_pos = mb_y * 8;
+    }
+
+    Y = ctx->frame->data[0] + mb_x * 16 + y_pos * ystride;
     for (i = 0; i < 2; i++)
         C[i] = ctx->frame->data[i + 1] + mb_x * 8
-               + mb_y * 8 * ctx->frame->linesize[i + 1];
+               + c_pos * ctx->frame->linesize[i + 1];
     init_get_bits(&gb, src, src_size * 8);
 
     memset(ctx->slice_data, 0,

diff --git a/libavcodec/alac.c b/libavcodec/alac.c
index d6bd21b..93cf198 100644
--- a/libavcodec/alac.c
+++ b/libavcodec/alac.c

@@ -524,7 +524,7 @@
 
     alac->max_samples_per_frame = bytestream2_get_be32u(&gb);
     if (!alac->max_samples_per_frame ||
-        alac->max_samples_per_frame > INT_MAX / sizeof(int32_t)) {
+        alac->max_samples_per_frame > 4096 * 4096) {
         av_log(alac->avctx, AV_LOG_ERROR,
                "max samples per frame invalid: %"PRIu32"\n",
                alac->max_samples_per_frame);

diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 4f34312..c0b4d56 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c

@@ -29,732 +29,881 @@
 #include "avcodec.h"
 #include "version.h"
 
-#define REGISTER_HWACCEL(X, x)                                          \
-    {                                                                   \
-        extern AVHWAccel ff_##x##_hwaccel;                              \
-        if (CONFIG_##X##_HWACCEL)                                       \
-            av_register_hwaccel(&ff_##x##_hwaccel);                     \
-    }
+extern AVCodec ff_a64multi_encoder;
+extern AVCodec ff_a64multi5_encoder;
+extern AVCodec ff_aasc_decoder;
+extern AVCodec ff_aic_decoder;
+extern AVCodec ff_alias_pix_encoder;
+extern AVCodec ff_alias_pix_decoder;
+extern AVCodec ff_amv_encoder;
+extern AVCodec ff_amv_decoder;
+extern AVCodec ff_anm_decoder;
+extern AVCodec ff_ansi_decoder;
+extern AVCodec ff_apng_encoder;
+extern AVCodec ff_apng_decoder;
+extern AVCodec ff_asv1_encoder;
+extern AVCodec ff_asv1_decoder;
+extern AVCodec ff_asv2_encoder;
+extern AVCodec ff_asv2_decoder;
+extern AVCodec ff_aura_decoder;
+extern AVCodec ff_aura2_decoder;
+extern AVCodec ff_avrp_encoder;
+extern AVCodec ff_avrp_decoder;
+extern AVCodec ff_avrn_decoder;
+extern AVCodec ff_avs_decoder;
+extern AVCodec ff_avui_encoder;
+extern AVCodec ff_avui_decoder;
+extern AVCodec ff_ayuv_encoder;
+extern AVCodec ff_ayuv_decoder;
+extern AVCodec ff_bethsoftvid_decoder;
+extern AVCodec ff_bfi_decoder;
+extern AVCodec ff_bink_decoder;
+extern AVCodec ff_bitpacked_decoder;
+extern AVCodec ff_bmp_encoder;
+extern AVCodec ff_bmp_decoder;
+extern AVCodec ff_bmv_video_decoder;
+extern AVCodec ff_brender_pix_decoder;
+extern AVCodec ff_c93_decoder;
+extern AVCodec ff_cavs_decoder;
+extern AVCodec ff_cdgraphics_decoder;
+extern AVCodec ff_cdxl_decoder;
+extern AVCodec ff_cfhd_decoder;
+extern AVCodec ff_cinepak_encoder;
+extern AVCodec ff_cinepak_decoder;
+extern AVCodec ff_clearvideo_decoder;
+extern AVCodec ff_cljr_encoder;
+extern AVCodec ff_cljr_decoder;
+extern AVCodec ff_cllc_decoder;
+extern AVCodec ff_comfortnoise_encoder;
+extern AVCodec ff_comfortnoise_decoder;
+extern AVCodec ff_cpia_decoder;
+extern AVCodec ff_cscd_decoder;
+extern AVCodec ff_cyuv_decoder;
+extern AVCodec ff_dds_decoder;
+extern AVCodec ff_dfa_decoder;
+extern AVCodec ff_dirac_decoder;
+extern AVCodec ff_dnxhd_encoder;
+extern AVCodec ff_dnxhd_decoder;
+extern AVCodec ff_dpx_encoder;
+extern AVCodec ff_dpx_decoder;
+extern AVCodec ff_dsicinvideo_decoder;
+extern AVCodec ff_dvaudio_decoder;
+extern AVCodec ff_dvvideo_encoder;
+extern AVCodec ff_dvvideo_decoder;
+extern AVCodec ff_dxa_decoder;
+extern AVCodec ff_dxtory_decoder;
+extern AVCodec ff_dxv_decoder;
+extern AVCodec ff_eacmv_decoder;
+extern AVCodec ff_eamad_decoder;
+extern AVCodec ff_eatgq_decoder;
+extern AVCodec ff_eatgv_decoder;
+extern AVCodec ff_eatqi_decoder;
+extern AVCodec ff_eightbps_decoder;
+extern AVCodec ff_eightsvx_exp_decoder;
+extern AVCodec ff_eightsvx_fib_decoder;
+extern AVCodec ff_escape124_decoder;
+extern AVCodec ff_escape130_decoder;
+extern AVCodec ff_exr_decoder;
+extern AVCodec ff_ffv1_encoder;
+extern AVCodec ff_ffv1_decoder;
+extern AVCodec ff_ffvhuff_encoder;
+extern AVCodec ff_ffvhuff_decoder;
+extern AVCodec ff_fic_decoder;
+extern AVCodec ff_fits_encoder;
+extern AVCodec ff_fits_decoder;
+extern AVCodec ff_flashsv_encoder;
+extern AVCodec ff_flashsv_decoder;
+extern AVCodec ff_flashsv2_encoder;
+extern AVCodec ff_flashsv2_decoder;
+extern AVCodec ff_flic_decoder;
+extern AVCodec ff_flv_encoder;
+extern AVCodec ff_flv_decoder;
+extern AVCodec ff_fmvc_decoder;
+extern AVCodec ff_fourxm_decoder;
+extern AVCodec ff_fraps_decoder;
+extern AVCodec ff_frwu_decoder;
+extern AVCodec ff_g2m_decoder;
+extern AVCodec ff_gdv_decoder;
+extern AVCodec ff_gif_encoder;
+extern AVCodec ff_gif_decoder;
+extern AVCodec ff_h261_encoder;
+extern AVCodec ff_h261_decoder;
+extern AVCodec ff_h263_encoder;
+extern AVCodec ff_h263_decoder;
+extern AVCodec ff_h263i_decoder;
+extern AVCodec ff_h263p_encoder;
+extern AVCodec ff_h263p_decoder;
+extern AVCodec ff_h263_v4l2m2m_decoder;
+extern AVCodec ff_h264_decoder;
+extern AVCodec ff_h264_crystalhd_decoder;
+extern AVCodec ff_h264_v4l2m2m_decoder;
+extern AVCodec ff_h264_mediacodec_decoder;
+extern AVCodec ff_h264_mmal_decoder;
+extern AVCodec ff_h264_qsv_decoder;
+extern AVCodec ff_h264_rkmpp_decoder;
+extern AVCodec ff_hap_encoder;
+extern AVCodec ff_hap_decoder;
+extern AVCodec ff_hevc_decoder;
+extern AVCodec ff_hevc_qsv_decoder;
+extern AVCodec ff_hevc_rkmpp_decoder;
+extern AVCodec ff_hevc_v4l2m2m_decoder;
+extern AVCodec ff_hnm4_video_decoder;
+extern AVCodec ff_hq_hqa_decoder;
+extern AVCodec ff_hqx_decoder;
+extern AVCodec ff_huffyuv_encoder;
+extern AVCodec ff_huffyuv_decoder;
+extern AVCodec ff_idcin_decoder;
+extern AVCodec ff_iff_ilbm_decoder;
+extern AVCodec ff_imm4_decoder;
+extern AVCodec ff_indeo2_decoder;
+extern AVCodec ff_indeo3_decoder;
+extern AVCodec ff_indeo4_decoder;
+extern AVCodec ff_indeo5_decoder;
+extern AVCodec ff_interplay_video_decoder;
+extern AVCodec ff_jpeg2000_encoder;
+extern AVCodec ff_jpeg2000_decoder;
+extern AVCodec ff_jpegls_encoder;
+extern AVCodec ff_jpegls_decoder;
+extern AVCodec ff_jv_decoder;
+extern AVCodec ff_kgv1_decoder;
+extern AVCodec ff_kmvc_decoder;
+extern AVCodec ff_lagarith_decoder;
+extern AVCodec ff_ljpeg_encoder;
+extern AVCodec ff_loco_decoder;
+extern AVCodec ff_m101_decoder;
+extern AVCodec ff_magicyuv_encoder;
+extern AVCodec ff_magicyuv_decoder;
+extern AVCodec ff_mdec_decoder;
+extern AVCodec ff_mimic_decoder;
+extern AVCodec ff_mjpeg_encoder;
+extern AVCodec ff_mjpeg_decoder;
+extern AVCodec ff_mjpegb_decoder;
+extern AVCodec ff_mmvideo_decoder;
+extern AVCodec ff_motionpixels_decoder;
+extern AVCodec ff_mpeg1video_encoder;
+extern AVCodec ff_mpeg1video_decoder;
+extern AVCodec ff_mpeg2video_encoder;
+extern AVCodec ff_mpeg2video_decoder;
+extern AVCodec ff_mpeg4_encoder;
+extern AVCodec ff_mpeg4_decoder;
+extern AVCodec ff_mpeg4_crystalhd_decoder;
+extern AVCodec ff_mpeg4_v4l2m2m_decoder;
+extern AVCodec ff_mpeg4_mmal_decoder;
+extern AVCodec ff_mpegvideo_decoder;
+extern AVCodec ff_mpeg1_v4l2m2m_decoder;
+extern AVCodec ff_mpeg2_mmal_decoder;
+extern AVCodec ff_mpeg2_crystalhd_decoder;
+extern AVCodec ff_mpeg2_v4l2m2m_decoder;
+extern AVCodec ff_mpeg2_qsv_decoder;
+extern AVCodec ff_mpeg2_mediacodec_decoder;
+extern AVCodec ff_msa1_decoder;
+extern AVCodec ff_mscc_decoder;
+extern AVCodec ff_msmpeg4v1_decoder;
+extern AVCodec ff_msmpeg4v2_encoder;
+extern AVCodec ff_msmpeg4v2_decoder;
+extern AVCodec ff_msmpeg4v3_encoder;
+extern AVCodec ff_msmpeg4v3_decoder;
+extern AVCodec ff_msmpeg4_crystalhd_decoder;
+extern AVCodec ff_msrle_decoder;
+extern AVCodec ff_mss1_decoder;
+extern AVCodec ff_mss2_decoder;
+extern AVCodec ff_msvideo1_encoder;
+extern AVCodec ff_msvideo1_decoder;
+extern AVCodec ff_mszh_decoder;
+extern AVCodec ff_mts2_decoder;
+extern AVCodec ff_mvc1_decoder;
+extern AVCodec ff_mvc2_decoder;
+extern AVCodec ff_mwsc_decoder;
+extern AVCodec ff_mxpeg_decoder;
+extern AVCodec ff_nuv_decoder;
+extern AVCodec ff_paf_video_decoder;
+extern AVCodec ff_pam_encoder;
+extern AVCodec ff_pam_decoder;
+extern AVCodec ff_pbm_encoder;
+extern AVCodec ff_pbm_decoder;
+extern AVCodec ff_pcx_encoder;
+extern AVCodec ff_pcx_decoder;
+extern AVCodec ff_pgm_encoder;
+extern AVCodec ff_pgm_decoder;
+extern AVCodec ff_pgmyuv_encoder;
+extern AVCodec ff_pgmyuv_decoder;
+extern AVCodec ff_pictor_decoder;
+extern AVCodec ff_pixlet_decoder;
+extern AVCodec ff_png_encoder;
+extern AVCodec ff_png_decoder;
+extern AVCodec ff_ppm_encoder;
+extern AVCodec ff_ppm_decoder;
+extern AVCodec ff_prores_encoder;
+extern AVCodec ff_prores_decoder;
+extern AVCodec ff_prores_aw_encoder;
+extern AVCodec ff_prores_ks_encoder;
+extern AVCodec ff_prosumer_decoder;
+extern AVCodec ff_psd_decoder;
+extern AVCodec ff_ptx_decoder;
+extern AVCodec ff_qdraw_decoder;
+extern AVCodec ff_qpeg_decoder;
+extern AVCodec ff_qtrle_encoder;
+extern AVCodec ff_qtrle_decoder;
+extern AVCodec ff_r10k_encoder;
+extern AVCodec ff_r10k_decoder;
+extern AVCodec ff_r210_encoder;
+extern AVCodec ff_r210_decoder;
+extern AVCodec ff_rasc_decoder;
+extern AVCodec ff_rawvideo_encoder;
+extern AVCodec ff_rawvideo_decoder;
+extern AVCodec ff_rl2_decoder;
+extern AVCodec ff_roq_encoder;
+extern AVCodec ff_roq_decoder;
+extern AVCodec ff_rpza_decoder;
+extern AVCodec ff_rscc_decoder;
+extern AVCodec ff_rv10_encoder;
+extern AVCodec ff_rv10_decoder;
+extern AVCodec ff_rv20_encoder;
+extern AVCodec ff_rv20_decoder;
+extern AVCodec ff_rv30_decoder;
+extern AVCodec ff_rv40_decoder;
+extern AVCodec ff_s302m_encoder;
+extern AVCodec ff_s302m_decoder;
+extern AVCodec ff_sanm_decoder;
+extern AVCodec ff_scpr_decoder;
+extern AVCodec ff_screenpresso_decoder;
+extern AVCodec ff_sdx2_dpcm_decoder;
+extern AVCodec ff_sgi_encoder;
+extern AVCodec ff_sgi_decoder;
+extern AVCodec ff_sgirle_decoder;
+extern AVCodec ff_sheervideo_decoder;
+extern AVCodec ff_smacker_decoder;
+extern AVCodec ff_smc_decoder;
+extern AVCodec ff_smvjpeg_decoder;
+extern AVCodec ff_snow_encoder;
+extern AVCodec ff_snow_decoder;
+extern AVCodec ff_sp5x_decoder;
+extern AVCodec ff_speedhq_decoder;
+extern AVCodec ff_srgc_decoder;
+extern AVCodec ff_sunrast_encoder;
+extern AVCodec ff_sunrast_decoder;
+extern AVCodec ff_svq1_encoder;
+extern AVCodec ff_svq1_decoder;
+extern AVCodec ff_svq3_decoder;
+extern AVCodec ff_targa_encoder;
+extern AVCodec ff_targa_decoder;
+extern AVCodec ff_targa_y216_decoder;
+extern AVCodec ff_tdsc_decoder;
+extern AVCodec ff_theora_decoder;
+extern AVCodec ff_thp_decoder;
+extern AVCodec ff_tiertexseqvideo_decoder;
+extern AVCodec ff_tiff_encoder;
+extern AVCodec ff_tiff_decoder;
+extern AVCodec ff_tmv_decoder;
+extern AVCodec ff_truemotion1_decoder;
+extern AVCodec ff_truemotion2_decoder;
+extern AVCodec ff_truemotion2rt_decoder;
+extern AVCodec ff_tscc_decoder;
+extern AVCodec ff_tscc2_decoder;
+extern AVCodec ff_txd_decoder;
+extern AVCodec ff_ulti_decoder;
+extern AVCodec ff_utvideo_encoder;
+extern AVCodec ff_utvideo_decoder;
+extern AVCodec ff_v210_encoder;
+extern AVCodec ff_v210_decoder;
+extern AVCodec ff_v210x_decoder;
+extern AVCodec ff_v308_encoder;
+extern AVCodec ff_v308_decoder;
+extern AVCodec ff_v408_encoder;
+extern AVCodec ff_v408_decoder;
+extern AVCodec ff_v410_encoder;
+extern AVCodec ff_v410_decoder;
+extern AVCodec ff_vb_decoder;
+extern AVCodec ff_vble_decoder;
+extern AVCodec ff_vc1_decoder;
+extern AVCodec ff_vc1_crystalhd_decoder;
+extern AVCodec ff_vc1image_decoder;
+extern AVCodec ff_vc1_mmal_decoder;
+extern AVCodec ff_vc1_qsv_decoder;
+extern AVCodec ff_vc1_v4l2m2m_decoder;
+extern AVCodec ff_vc2_encoder;
+extern AVCodec ff_vcr1_decoder;
+extern AVCodec ff_vmdvideo_decoder;
+extern AVCodec ff_vmnc_decoder;
+extern AVCodec ff_vp3_decoder;
+extern AVCodec ff_vp5_decoder;
+extern AVCodec ff_vp6_decoder;
+extern AVCodec ff_vp6a_decoder;
+extern AVCodec ff_vp6f_decoder;
+extern AVCodec ff_vp7_decoder;
+extern AVCodec ff_vp8_decoder;
+extern AVCodec ff_vp8_rkmpp_decoder;
+extern AVCodec ff_vp8_v4l2m2m_decoder;
+extern AVCodec ff_vp9_decoder;
+extern AVCodec ff_vp9_rkmpp_decoder;
+extern AVCodec ff_vp9_v4l2m2m_decoder;
+extern AVCodec ff_vqa_decoder;
+extern AVCodec ff_webp_decoder;
+extern AVCodec ff_wcmv_decoder;
+extern AVCodec ff_wrapped_avframe_encoder;
+extern AVCodec ff_wrapped_avframe_decoder;
+extern AVCodec ff_wmv1_encoder;
+extern AVCodec ff_wmv1_decoder;
+extern AVCodec ff_wmv2_encoder;
+extern AVCodec ff_wmv2_decoder;
+extern AVCodec ff_wmv3_decoder;
+extern AVCodec ff_wmv3_crystalhd_decoder;
+extern AVCodec ff_wmv3image_decoder;
+extern AVCodec ff_wnv1_decoder;
+extern AVCodec ff_xan_wc3_decoder;
+extern AVCodec ff_xan_wc4_decoder;
+extern AVCodec ff_xbm_encoder;
+extern AVCodec ff_xbm_decoder;
+extern AVCodec ff_xface_encoder;
+extern AVCodec ff_xface_decoder;
+extern AVCodec ff_xl_decoder;
+extern AVCodec ff_xpm_decoder;
+extern AVCodec ff_xwd_encoder;
+extern AVCodec ff_xwd_decoder;
+extern AVCodec ff_y41p_encoder;
+extern AVCodec ff_y41p_decoder;
+extern AVCodec ff_ylc_decoder;
+extern AVCodec ff_yop_decoder;
+extern AVCodec ff_yuv4_encoder;
+extern AVCodec ff_yuv4_decoder;
+extern AVCodec ff_zero12v_decoder;
+extern AVCodec ff_zerocodec_decoder;
+extern AVCodec ff_zlib_encoder;
+extern AVCodec ff_zlib_decoder;
+extern AVCodec ff_zmbv_encoder;
+extern AVCodec ff_zmbv_decoder;
 
-#define REGISTER_ENCODER(X, x)                                          \
-    {                                                                   \
-        extern AVCodec ff_##x##_encoder;                                \
-        if (CONFIG_##X##_ENCODER)                                       \
-            avcodec_register(&ff_##x##_encoder);                        \
-    }
+/* audio codecs */
+extern AVCodec ff_aac_encoder;
+extern AVCodec ff_aac_decoder;
+extern AVCodec ff_aac_fixed_decoder;
+extern AVCodec ff_aac_latm_decoder;
+extern AVCodec ff_ac3_encoder;
+extern AVCodec ff_ac3_decoder;
+extern AVCodec ff_ac3_fixed_encoder;
+extern AVCodec ff_ac3_fixed_decoder;
+extern AVCodec ff_alac_encoder;
+extern AVCodec ff_alac_decoder;
+extern AVCodec ff_als_decoder;
+extern AVCodec ff_amrnb_decoder;
+extern AVCodec ff_amrwb_decoder;
+extern AVCodec ff_ape_decoder;
+extern AVCodec ff_aptx_encoder;
+extern AVCodec ff_aptx_decoder;
+extern AVCodec ff_aptx_hd_encoder;
+extern AVCodec ff_aptx_hd_decoder;
+extern AVCodec ff_atrac1_decoder;
+extern AVCodec ff_atrac3_decoder;
+extern AVCodec ff_atrac3al_decoder;
+extern AVCodec ff_atrac3p_decoder;
+extern AVCodec ff_atrac3pal_decoder;
+extern AVCodec ff_atrac9_decoder;
+extern AVCodec ff_binkaudio_dct_decoder;
+extern AVCodec ff_binkaudio_rdft_decoder;
+extern AVCodec ff_bmv_audio_decoder;
+extern AVCodec ff_cook_decoder;
+extern AVCodec ff_dca_encoder;
+extern AVCodec ff_dca_decoder;
+extern AVCodec ff_dolby_e_decoder;
+extern AVCodec ff_dsd_lsbf_decoder;
+extern AVCodec ff_dsd_msbf_decoder;
+extern AVCodec ff_dsd_lsbf_planar_decoder;
+extern AVCodec ff_dsd_msbf_planar_decoder;
+extern AVCodec ff_dsicinaudio_decoder;
+extern AVCodec ff_dss_sp_decoder;
+extern AVCodec ff_dst_decoder;
+extern AVCodec ff_eac3_encoder;
+extern AVCodec ff_eac3_decoder;
+extern AVCodec ff_evrc_decoder;
+extern AVCodec ff_ffwavesynth_decoder;
+extern AVCodec ff_flac_encoder;
+extern AVCodec ff_flac_decoder;
+extern AVCodec ff_g723_1_encoder;
+extern AVCodec ff_g723_1_decoder;
+extern AVCodec ff_g729_decoder;
+extern AVCodec ff_gsm_decoder;
+extern AVCodec ff_gsm_ms_decoder;
+extern AVCodec ff_iac_decoder;
+extern AVCodec ff_ilbc_decoder;
+extern AVCodec ff_imc_decoder;
+extern AVCodec ff_interplay_acm_decoder;
+extern AVCodec ff_mace3_decoder;
+extern AVCodec ff_mace6_decoder;
+extern AVCodec ff_metasound_decoder;
+extern AVCodec ff_mlp_encoder;
+extern AVCodec ff_mlp_decoder;
+extern AVCodec ff_mp1_decoder;
+extern AVCodec ff_mp1float_decoder;
+extern AVCodec ff_mp2_encoder;
+extern AVCodec ff_mp2_decoder;
+extern AVCodec ff_mp2float_decoder;
+extern AVCodec ff_mp2fixed_encoder;
+extern AVCodec ff_mp3float_decoder;
+extern AVCodec ff_mp3_decoder;
+extern AVCodec ff_mp3adufloat_decoder;
+extern AVCodec ff_mp3adu_decoder;
+extern AVCodec ff_mp3on4float_decoder;
+extern AVCodec ff_mp3on4_decoder;
+extern AVCodec ff_mpc7_decoder;
+extern AVCodec ff_mpc8_decoder;
+extern AVCodec ff_nellymoser_encoder;
+extern AVCodec ff_nellymoser_decoder;
+extern AVCodec ff_on2avc_decoder;
+extern AVCodec ff_opus_encoder;
+extern AVCodec ff_opus_decoder;
+extern AVCodec ff_paf_audio_decoder;
+extern AVCodec ff_qcelp_decoder;
+extern AVCodec ff_qdm2_decoder;
+extern AVCodec ff_qdmc_decoder;
+extern AVCodec ff_ra_144_encoder;
+extern AVCodec ff_ra_144_decoder;
+extern AVCodec ff_ra_288_decoder;
+extern AVCodec ff_ralf_decoder;
+extern AVCodec ff_sbc_encoder;
+extern AVCodec ff_sbc_decoder;
+extern AVCodec ff_shorten_decoder;
+extern AVCodec ff_sipr_decoder;
+extern AVCodec ff_smackaud_decoder;
+extern AVCodec ff_sonic_encoder;
+extern AVCodec ff_sonic_decoder;
+extern AVCodec ff_sonic_ls_encoder;
+extern AVCodec ff_tak_decoder;
+extern AVCodec ff_truehd_encoder;
+extern AVCodec ff_truehd_decoder;
+extern AVCodec ff_truespeech_decoder;
+extern AVCodec ff_tta_encoder;
+extern AVCodec ff_tta_decoder;
+extern AVCodec ff_twinvq_decoder;
+extern AVCodec ff_vmdaudio_decoder;
+extern AVCodec ff_vorbis_encoder;
+extern AVCodec ff_vorbis_decoder;
+extern AVCodec ff_wavpack_encoder;
+extern AVCodec ff_wavpack_decoder;
+extern AVCodec ff_wmalossless_decoder;
+extern AVCodec ff_wmapro_decoder;
+extern AVCodec ff_wmav1_encoder;
+extern AVCodec ff_wmav1_decoder;
+extern AVCodec ff_wmav2_encoder;
+extern AVCodec ff_wmav2_decoder;
+extern AVCodec ff_wmavoice_decoder;
+extern AVCodec ff_ws_snd1_decoder;
+extern AVCodec ff_xma1_decoder;
+extern AVCodec ff_xma2_decoder;
 
-#define REGISTER_DECODER(X, x)                                          \
-    {                                                                   \
-        extern AVCodec ff_##x##_decoder;                                \
-        if (CONFIG_##X##_DECODER)                                       \
-            avcodec_register(&ff_##x##_decoder);                        \
-    }
+/* PCM codecs */
+extern AVCodec ff_pcm_alaw_encoder;
+extern AVCodec ff_pcm_alaw_decoder;
+extern AVCodec ff_pcm_bluray_decoder;
+extern AVCodec ff_pcm_dvd_decoder;
+extern AVCodec ff_pcm_f16le_decoder;
+extern AVCodec ff_pcm_f24le_decoder;
+extern AVCodec ff_pcm_f32be_encoder;
+extern AVCodec ff_pcm_f32be_decoder;
+extern AVCodec ff_pcm_f32le_encoder;
+extern AVCodec ff_pcm_f32le_decoder;
+extern AVCodec ff_pcm_f64be_encoder;
+extern AVCodec ff_pcm_f64be_decoder;
+extern AVCodec ff_pcm_f64le_encoder;
+extern AVCodec ff_pcm_f64le_decoder;
+extern AVCodec ff_pcm_lxf_decoder;
+extern AVCodec ff_pcm_mulaw_encoder;
+extern AVCodec ff_pcm_mulaw_decoder;
+extern AVCodec ff_pcm_s8_encoder;
+extern AVCodec ff_pcm_s8_decoder;
+extern AVCodec ff_pcm_s8_planar_encoder;
+extern AVCodec ff_pcm_s8_planar_decoder;
+extern AVCodec ff_pcm_s16be_encoder;
+extern AVCodec ff_pcm_s16be_decoder;
+extern AVCodec ff_pcm_s16be_planar_encoder;
+extern AVCodec ff_pcm_s16be_planar_decoder;
+extern AVCodec ff_pcm_s16le_encoder;
+extern AVCodec ff_pcm_s16le_decoder;
+extern AVCodec ff_pcm_s16le_planar_encoder;
+extern AVCodec ff_pcm_s16le_planar_decoder;
+extern AVCodec ff_pcm_s24be_encoder;
+extern AVCodec ff_pcm_s24be_decoder;
+extern AVCodec ff_pcm_s24daud_encoder;
+extern AVCodec ff_pcm_s24daud_decoder;
+extern AVCodec ff_pcm_s24le_encoder;
+extern AVCodec ff_pcm_s24le_decoder;
+extern AVCodec ff_pcm_s24le_planar_encoder;
+extern AVCodec ff_pcm_s24le_planar_decoder;
+extern AVCodec ff_pcm_s32be_encoder;
+extern AVCodec ff_pcm_s32be_decoder;
+extern AVCodec ff_pcm_s32le_encoder;
+extern AVCodec ff_pcm_s32le_decoder;
+extern AVCodec ff_pcm_s32le_planar_encoder;
+extern AVCodec ff_pcm_s32le_planar_decoder;
+extern AVCodec ff_pcm_s64be_encoder;
+extern AVCodec ff_pcm_s64be_decoder;
+extern AVCodec ff_pcm_s64le_encoder;
+extern AVCodec ff_pcm_s64le_decoder;
+extern AVCodec ff_pcm_u8_encoder;
+extern AVCodec ff_pcm_u8_decoder;
+extern AVCodec ff_pcm_u16be_encoder;
+extern AVCodec ff_pcm_u16be_decoder;
+extern AVCodec ff_pcm_u16le_encoder;
+extern AVCodec ff_pcm_u16le_decoder;
+extern AVCodec ff_pcm_u24be_encoder;
+extern AVCodec ff_pcm_u24be_decoder;
+extern AVCodec ff_pcm_u24le_encoder;
+extern AVCodec ff_pcm_u24le_decoder;
+extern AVCodec ff_pcm_u32be_encoder;
+extern AVCodec ff_pcm_u32be_decoder;
+extern AVCodec ff_pcm_u32le_encoder;
+extern AVCodec ff_pcm_u32le_decoder;
+extern AVCodec ff_pcm_zork_decoder;
 
-#define REGISTER_ENCDEC(X, x) REGISTER_ENCODER(X, x); REGISTER_DECODER(X, x)
+/* DPCM codecs */
+extern AVCodec ff_gremlin_dpcm_decoder;
+extern AVCodec ff_interplay_dpcm_decoder;
+extern AVCodec ff_roq_dpcm_encoder;
+extern AVCodec ff_roq_dpcm_decoder;
+extern AVCodec ff_sol_dpcm_decoder;
+extern AVCodec ff_xan_dpcm_decoder;
 
-#define REGISTER_PARSER(X, x)                                           \
-    {                                                                   \
-        extern AVCodecParser ff_##x##_parser;                           \
-        if (CONFIG_##X##_PARSER)                                        \
-            av_register_codec_parser(&ff_##x##_parser);                 \
-    }
+/* ADPCM codecs */
+extern AVCodec ff_adpcm_4xm_decoder;
+extern AVCodec ff_adpcm_adx_encoder;
+extern AVCodec ff_adpcm_adx_decoder;
+extern AVCodec ff_adpcm_afc_decoder;
+extern AVCodec ff_adpcm_aica_decoder;
+extern AVCodec ff_adpcm_ct_decoder;
+extern AVCodec ff_adpcm_dtk_decoder;
+extern AVCodec ff_adpcm_ea_decoder;
+extern AVCodec ff_adpcm_ea_maxis_xa_decoder;
+extern AVCodec ff_adpcm_ea_r1_decoder;
+extern AVCodec ff_adpcm_ea_r2_decoder;
+extern AVCodec ff_adpcm_ea_r3_decoder;
+extern AVCodec ff_adpcm_ea_xas_decoder;
+extern AVCodec ff_adpcm_g722_encoder;
+extern AVCodec ff_adpcm_g722_decoder;
+extern AVCodec ff_adpcm_g726_encoder;
+extern AVCodec ff_adpcm_g726_decoder;
+extern AVCodec ff_adpcm_g726le_encoder;
+extern AVCodec ff_adpcm_g726le_decoder;
+extern AVCodec ff_adpcm_ima_amv_decoder;
+extern AVCodec ff_adpcm_ima_apc_decoder;
+extern AVCodec ff_adpcm_ima_dat4_decoder;
+extern AVCodec ff_adpcm_ima_dk3_decoder;
+extern AVCodec ff_adpcm_ima_dk4_decoder;
+extern AVCodec ff_adpcm_ima_ea_eacs_decoder;
+extern AVCodec ff_adpcm_ima_ea_sead_decoder;
+extern AVCodec ff_adpcm_ima_iss_decoder;
+extern AVCodec ff_adpcm_ima_oki_decoder;
+extern AVCodec ff_adpcm_ima_qt_encoder;
+extern AVCodec ff_adpcm_ima_qt_decoder;
+extern AVCodec ff_adpcm_ima_rad_decoder;
+extern AVCodec ff_adpcm_ima_smjpeg_decoder;
+extern AVCodec ff_adpcm_ima_wav_encoder;
+extern AVCodec ff_adpcm_ima_wav_decoder;
+extern AVCodec ff_adpcm_ima_ws_decoder;
+extern AVCodec ff_adpcm_ms_encoder;
+extern AVCodec ff_adpcm_ms_decoder;
+extern AVCodec ff_adpcm_mtaf_decoder;
+extern AVCodec ff_adpcm_psx_decoder;
+extern AVCodec ff_adpcm_sbpro_2_decoder;
+extern AVCodec ff_adpcm_sbpro_3_decoder;
+extern AVCodec ff_adpcm_sbpro_4_decoder;
+extern AVCodec ff_adpcm_swf_encoder;
+extern AVCodec ff_adpcm_swf_decoder;
+extern AVCodec ff_adpcm_thp_decoder;
+extern AVCodec ff_adpcm_thp_le_decoder;
+extern AVCodec ff_adpcm_vima_decoder;
+extern AVCodec ff_adpcm_xa_decoder;
+extern AVCodec ff_adpcm_yamaha_encoder;
+extern AVCodec ff_adpcm_yamaha_decoder;
 
-static void register_all(void)
-{
-    /* hardware accelerators */
-    REGISTER_HWACCEL(H263_VAAPI,        h263_vaapi);
-    REGISTER_HWACCEL(H263_VIDEOTOOLBOX, h263_videotoolbox);
-    REGISTER_HWACCEL(H264_CUVID,        h264_cuvid);
-    REGISTER_HWACCEL(H264_D3D11VA,      h264_d3d11va);
-    REGISTER_HWACCEL(H264_D3D11VA2,     h264_d3d11va2);
-    REGISTER_HWACCEL(H264_DXVA2,        h264_dxva2);
-    REGISTER_HWACCEL(H264_MEDIACODEC,   h264_mediacodec);
-    REGISTER_HWACCEL(H264_MMAL,         h264_mmal);
-    REGISTER_HWACCEL(H264_QSV,          h264_qsv);
-    REGISTER_HWACCEL(H264_VAAPI,        h264_vaapi);
-    REGISTER_HWACCEL(H264_VDA,          h264_vda);
-    REGISTER_HWACCEL(H264_VDA_OLD,      h264_vda_old);
-    REGISTER_HWACCEL(H264_VDPAU,        h264_vdpau);
-    REGISTER_HWACCEL(H264_VIDEOTOOLBOX, h264_videotoolbox);
-    REGISTER_HWACCEL(HEVC_CUVID,        hevc_cuvid);
-    REGISTER_HWACCEL(HEVC_D3D11VA,      hevc_d3d11va);
-    REGISTER_HWACCEL(HEVC_D3D11VA2,     hevc_d3d11va2);
-    REGISTER_HWACCEL(HEVC_DXVA2,        hevc_dxva2);
-    REGISTER_HWACCEL(HEVC_MEDIACODEC,   hevc_mediacodec);
-    REGISTER_HWACCEL(HEVC_QSV,          hevc_qsv);
-    REGISTER_HWACCEL(HEVC_VAAPI,        hevc_vaapi);
-    REGISTER_HWACCEL(HEVC_VDPAU,        hevc_vdpau);
-    REGISTER_HWACCEL(HEVC_VIDEOTOOLBOX, hevc_videotoolbox);
-    REGISTER_HWACCEL(MJPEG_CUVID,       mjpeg_cuvid);
-    REGISTER_HWACCEL(MPEG1_CUVID,       mpeg1_cuvid);
-    REGISTER_HWACCEL(MPEG1_XVMC,        mpeg1_xvmc);
-    REGISTER_HWACCEL(MPEG1_VDPAU,       mpeg1_vdpau);
-    REGISTER_HWACCEL(MPEG1_VIDEOTOOLBOX, mpeg1_videotoolbox);
-    REGISTER_HWACCEL(MPEG2_CUVID,       mpeg2_cuvid);
-    REGISTER_HWACCEL(MPEG2_XVMC,        mpeg2_xvmc);
-    REGISTER_HWACCEL(MPEG2_D3D11VA,     mpeg2_d3d11va);
-    REGISTER_HWACCEL(MPEG2_D3D11VA2,    mpeg2_d3d11va2);
-    REGISTER_HWACCEL(MPEG2_DXVA2,       mpeg2_dxva2);
-    REGISTER_HWACCEL(MPEG2_MMAL,        mpeg2_mmal);
-    REGISTER_HWACCEL(MPEG2_QSV,         mpeg2_qsv);
-    REGISTER_HWACCEL(MPEG2_VAAPI,       mpeg2_vaapi);
-    REGISTER_HWACCEL(MPEG2_VDPAU,       mpeg2_vdpau);
-    REGISTER_HWACCEL(MPEG2_VIDEOTOOLBOX, mpeg2_videotoolbox);
-    REGISTER_HWACCEL(MPEG2_MEDIACODEC,  mpeg2_mediacodec);
-    REGISTER_HWACCEL(MPEG4_CUVID,       mpeg4_cuvid);
-    REGISTER_HWACCEL(MPEG4_MEDIACODEC,  mpeg4_mediacodec);
-    REGISTER_HWACCEL(MPEG4_MMAL,        mpeg4_mmal);
-    REGISTER_HWACCEL(MPEG4_VAAPI,       mpeg4_vaapi);
-    REGISTER_HWACCEL(MPEG4_VDPAU,       mpeg4_vdpau);
-    REGISTER_HWACCEL(MPEG4_VIDEOTOOLBOX, mpeg4_videotoolbox);
-    REGISTER_HWACCEL(VC1_CUVID,         vc1_cuvid);
-    REGISTER_HWACCEL(VC1_D3D11VA,       vc1_d3d11va);
-    REGISTER_HWACCEL(VC1_D3D11VA2,      vc1_d3d11va2);
-    REGISTER_HWACCEL(VC1_DXVA2,         vc1_dxva2);
-    REGISTER_HWACCEL(VC1_VAAPI,         vc1_vaapi);
-    REGISTER_HWACCEL(VC1_VDPAU,         vc1_vdpau);
-    REGISTER_HWACCEL(VC1_MMAL,          vc1_mmal);
-    REGISTER_HWACCEL(VC1_QSV,           vc1_qsv);
-    REGISTER_HWACCEL(VP8_CUVID,         vp8_cuvid);
-    REGISTER_HWACCEL(VP8_MEDIACODEC,    vp8_mediacodec);
-    REGISTER_HWACCEL(VP8_QSV,           vp8_qsv);
-    REGISTER_HWACCEL(VP9_CUVID,         vp9_cuvid);
-    REGISTER_HWACCEL(VP9_D3D11VA,       vp9_d3d11va);
-    REGISTER_HWACCEL(VP9_D3D11VA2,      vp9_d3d11va2);
-    REGISTER_HWACCEL(VP9_DXVA2,         vp9_dxva2);
-    REGISTER_HWACCEL(VP9_MEDIACODEC,    vp9_mediacodec);
-    REGISTER_HWACCEL(VP9_VAAPI,         vp9_vaapi);
-    REGISTER_HWACCEL(WMV3_D3D11VA,      wmv3_d3d11va);
-    REGISTER_HWACCEL(WMV3_D3D11VA2,     wmv3_d3d11va2);
-    REGISTER_HWACCEL(WMV3_DXVA2,        wmv3_dxva2);
-    REGISTER_HWACCEL(WMV3_VAAPI,        wmv3_vaapi);
-    REGISTER_HWACCEL(WMV3_VDPAU,        wmv3_vdpau);
+/* subtitles */
+extern AVCodec ff_ssa_encoder;
+extern AVCodec ff_ssa_decoder;
+extern AVCodec ff_ass_encoder;
+extern AVCodec ff_ass_decoder;
+extern AVCodec ff_ccaption_decoder;
+extern AVCodec ff_dvbsub_encoder;
+extern AVCodec ff_dvbsub_decoder;
+extern AVCodec ff_dvdsub_encoder;
+extern AVCodec ff_dvdsub_decoder;
+extern AVCodec ff_jacosub_decoder;
+extern AVCodec ff_microdvd_decoder;
+extern AVCodec ff_movtext_encoder;
+extern AVCodec ff_movtext_decoder;
+extern AVCodec ff_mpl2_decoder;
+extern AVCodec ff_pgssub_decoder;
+extern AVCodec ff_pjs_decoder;
+extern AVCodec ff_realtext_decoder;
+extern AVCodec ff_sami_decoder;
+extern AVCodec ff_srt_encoder;
+extern AVCodec ff_srt_decoder;
+extern AVCodec ff_stl_decoder;
+extern AVCodec ff_subrip_encoder;
+extern AVCodec ff_subrip_decoder;
+extern AVCodec ff_subviewer_decoder;
+extern AVCodec ff_subviewer1_decoder;
+extern AVCodec ff_text_encoder;
+extern AVCodec ff_text_decoder;
+extern AVCodec ff_vplayer_decoder;
+extern AVCodec ff_webvtt_encoder;
+extern AVCodec ff_webvtt_decoder;
+extern AVCodec ff_xsub_encoder;
+extern AVCodec ff_xsub_decoder;
 
-    /* video codecs */
-    REGISTER_ENCODER(A64MULTI,          a64multi);
-    REGISTER_ENCODER(A64MULTI5,         a64multi5);
-    REGISTER_DECODER(AASC,              aasc);
-    REGISTER_DECODER(AIC,               aic);
-    REGISTER_ENCDEC (ALIAS_PIX,         alias_pix);
-    REGISTER_ENCDEC (AMV,               amv);
-    REGISTER_DECODER(ANM,               anm);
-    REGISTER_DECODER(ANSI,              ansi);
-    REGISTER_ENCDEC (APNG,              apng);
-    REGISTER_ENCDEC (ASV1,              asv1);
-    REGISTER_ENCDEC (ASV2,              asv2);
-    REGISTER_DECODER(AURA,              aura);
-    REGISTER_DECODER(AURA2,             aura2);
-    REGISTER_ENCDEC (AVRP,              avrp);
-    REGISTER_DECODER(AVRN,              avrn);
-    REGISTER_DECODER(AVS,               avs);
-    REGISTER_ENCDEC (AVUI,              avui);
-    REGISTER_ENCDEC (AYUV,              ayuv);
-    REGISTER_DECODER(BETHSOFTVID,       bethsoftvid);
-    REGISTER_DECODER(BFI,               bfi);
-    REGISTER_DECODER(BINK,              bink);
-    REGISTER_ENCDEC (BMP,               bmp);
-    REGISTER_DECODER(BMV_VIDEO,         bmv_video);
-    REGISTER_DECODER(BRENDER_PIX,       brender_pix);
-    REGISTER_DECODER(C93,               c93);
-    REGISTER_DECODER(CAVS,              cavs);
-    REGISTER_DECODER(CDGRAPHICS,        cdgraphics);
-    REGISTER_DECODER(CDXL,              cdxl);
-    REGISTER_DECODER(CFHD,              cfhd);
-    REGISTER_ENCDEC (CINEPAK,           cinepak);
-    REGISTER_DECODER(CLEARVIDEO,        clearvideo);
-    REGISTER_ENCDEC (CLJR,              cljr);
-    REGISTER_DECODER(CLLC,              cllc);
-    REGISTER_ENCDEC (COMFORTNOISE,      comfortnoise);
-    REGISTER_DECODER(CPIA,              cpia);
-    REGISTER_DECODER(CSCD,              cscd);
-    REGISTER_DECODER(CYUV,              cyuv);
-    REGISTER_DECODER(DDS,               dds);
-    REGISTER_DECODER(DFA,               dfa);
-    REGISTER_DECODER(DIRAC,             dirac);
-    REGISTER_ENCDEC (DNXHD,             dnxhd);
-    REGISTER_ENCDEC (DPX,               dpx);
-    REGISTER_DECODER(DSICINVIDEO,       dsicinvideo);
-    REGISTER_DECODER(DVAUDIO,           dvaudio);
-    REGISTER_ENCDEC (DVVIDEO,           dvvideo);
-    REGISTER_DECODER(DXA,               dxa);
-    REGISTER_DECODER(DXTORY,            dxtory);
-    REGISTER_DECODER(DXV,               dxv);
-    REGISTER_DECODER(EACMV,             eacmv);
-    REGISTER_DECODER(EAMAD,             eamad);
-    REGISTER_DECODER(EATGQ,             eatgq);
-    REGISTER_DECODER(EATGV,             eatgv);
-    REGISTER_DECODER(EATQI,             eatqi);
-    REGISTER_DECODER(EIGHTBPS,          eightbps);
-    REGISTER_DECODER(EIGHTSVX_EXP,      eightsvx_exp);
-    REGISTER_DECODER(EIGHTSVX_FIB,      eightsvx_fib);
-    REGISTER_DECODER(ESCAPE124,         escape124);
-    REGISTER_DECODER(ESCAPE130,         escape130);
-    REGISTER_DECODER(EXR,               exr);
-    REGISTER_ENCDEC (FFV1,              ffv1);
-    REGISTER_ENCDEC (FFVHUFF,           ffvhuff);
-    REGISTER_DECODER(FIC,               fic);
-    REGISTER_ENCDEC (FITS,              fits);
-    REGISTER_ENCDEC (FLASHSV,           flashsv);
-    REGISTER_ENCDEC (FLASHSV2,          flashsv2);
-    REGISTER_DECODER(FLIC,              flic);
-    REGISTER_ENCDEC (FLV,               flv);
-    REGISTER_DECODER(FMVC,              fmvc);
-    REGISTER_DECODER(FOURXM,            fourxm);
-    REGISTER_DECODER(FRAPS,             fraps);
-    REGISTER_DECODER(FRWU,              frwu);
-    REGISTER_DECODER(G2M,               g2m);
-    REGISTER_DECODER(GDV,               gdv);
-    REGISTER_ENCDEC (GIF,               gif);
-    REGISTER_ENCDEC (H261,              h261);
-    REGISTER_ENCDEC (H263,              h263);
-    REGISTER_DECODER(H263I,             h263i);
-    REGISTER_ENCDEC (H263P,             h263p);
-    REGISTER_DECODER(H263_V4L2M2M,      h263_v4l2m2m);
-    REGISTER_DECODER(H264,              h264);
-    REGISTER_DECODER(H264_CRYSTALHD,    h264_crystalhd);
-    REGISTER_DECODER(H264_V4L2M2M,      h264_v4l2m2m);
-    REGISTER_DECODER(H264_MEDIACODEC,   h264_mediacodec);
-    REGISTER_DECODER(H264_MMAL,         h264_mmal);
-    REGISTER_DECODER(H264_QSV,          h264_qsv);
-    REGISTER_DECODER(H264_RKMPP,        h264_rkmpp);
-    REGISTER_DECODER(H264_VDA,          h264_vda);
-#if FF_API_VDPAU
-    REGISTER_DECODER(H264_VDPAU,        h264_vdpau);
-#endif
-    REGISTER_ENCDEC (HAP,               hap);
-    REGISTER_DECODER(HEVC,              hevc);
-    REGISTER_DECODER(HEVC_QSV,          hevc_qsv);
-    REGISTER_DECODER(HEVC_RKMPP,        hevc_rkmpp);
-    REGISTER_DECODER(HEVC_V4L2M2M,      hevc_v4l2m2m);
-    REGISTER_DECODER(HNM4_VIDEO,        hnm4_video);
-    REGISTER_DECODER(HQ_HQA,            hq_hqa);
-    REGISTER_DECODER(HQX,               hqx);
-    REGISTER_ENCDEC (HUFFYUV,           huffyuv);
-    REGISTER_DECODER(IDCIN,             idcin);
-    REGISTER_DECODER(IFF_ILBM,          iff_ilbm);
-    REGISTER_DECODER(INDEO2,            indeo2);
-    REGISTER_DECODER(INDEO3,            indeo3);
-    REGISTER_DECODER(INDEO4,            indeo4);
-    REGISTER_DECODER(INDEO5,            indeo5);
-    REGISTER_DECODER(INTERPLAY_VIDEO,   interplay_video);
-    REGISTER_ENCDEC (JPEG2000,          jpeg2000);
-    REGISTER_ENCDEC (JPEGLS,            jpegls);
-    REGISTER_DECODER(JV,                jv);
-    REGISTER_DECODER(KGV1,              kgv1);
-    REGISTER_DECODER(KMVC,              kmvc);
-    REGISTER_DECODER(LAGARITH,          lagarith);
-    REGISTER_ENCODER(LJPEG,             ljpeg);
-    REGISTER_DECODER(LOCO,              loco);
-    REGISTER_DECODER(M101,              m101);
-    REGISTER_DECODER(MAGICYUV,          magicyuv);
-    REGISTER_DECODER(MDEC,              mdec);
-    REGISTER_DECODER(MIMIC,             mimic);
-    REGISTER_ENCDEC (MJPEG,             mjpeg);
-    REGISTER_DECODER(MJPEGB,            mjpegb);
-    REGISTER_DECODER(MMVIDEO,           mmvideo);
-    REGISTER_DECODER(MOTIONPIXELS,      motionpixels);
-#if FF_API_XVMC
-    REGISTER_DECODER(MPEG_XVMC,         mpeg_xvmc);
-#endif /* FF_API_XVMC */
-    REGISTER_ENCDEC (MPEG1VIDEO,        mpeg1video);
-    REGISTER_ENCDEC (MPEG2VIDEO,        mpeg2video);
-    REGISTER_ENCDEC (MPEG4,             mpeg4);
-    REGISTER_DECODER(MPEG4_CRYSTALHD,   mpeg4_crystalhd);
-    REGISTER_DECODER(MPEG4_V4L2M2M,     mpeg4_v4l2m2m);
-    REGISTER_DECODER(MPEG4_MMAL,        mpeg4_mmal);
-#if FF_API_VDPAU
-    REGISTER_DECODER(MPEG4_VDPAU,       mpeg4_vdpau);
-#endif
-    REGISTER_DECODER(MPEGVIDEO,         mpegvideo);
-#if FF_API_VDPAU
-    REGISTER_DECODER(MPEG_VDPAU,        mpeg_vdpau);
-    REGISTER_DECODER(MPEG1_VDPAU,       mpeg1_vdpau);
-#endif
-    REGISTER_DECODER(MPEG1_V4L2M2M,     mpeg1_v4l2m2m);
-    REGISTER_DECODER(MPEG2_MMAL,        mpeg2_mmal);
-    REGISTER_DECODER(MPEG2_CRYSTALHD,   mpeg2_crystalhd);
-    REGISTER_DECODER(MPEG2_V4L2M2M,     mpeg2_v4l2m2m);
-    REGISTER_DECODER(MPEG2_QSV,         mpeg2_qsv);
-    REGISTER_DECODER(MPEG2_MEDIACODEC,  mpeg2_mediacodec);
-    REGISTER_DECODER(MSA1,              msa1);
-    REGISTER_DECODER(MSCC,              mscc);
-    REGISTER_DECODER(MSMPEG4V1,         msmpeg4v1);
-    REGISTER_ENCDEC (MSMPEG4V2,         msmpeg4v2);
-    REGISTER_ENCDEC (MSMPEG4V3,         msmpeg4v3);
-    REGISTER_DECODER(MSMPEG4_CRYSTALHD, msmpeg4_crystalhd);
-    REGISTER_DECODER(MSRLE,             msrle);
-    REGISTER_DECODER(MSS1,              mss1);
-    REGISTER_DECODER(MSS2,              mss2);
-    REGISTER_ENCDEC (MSVIDEO1,          msvideo1);
-    REGISTER_DECODER(MSZH,              mszh);
-    REGISTER_DECODER(MTS2,              mts2);
-    REGISTER_DECODER(MVC1,              mvc1);
-    REGISTER_DECODER(MVC2,              mvc2);
-    REGISTER_DECODER(MXPEG,             mxpeg);
-    REGISTER_DECODER(NUV,               nuv);
-    REGISTER_DECODER(PAF_VIDEO,         paf_video);
-    REGISTER_ENCDEC (PAM,               pam);
-    REGISTER_ENCDEC (PBM,               pbm);
-    REGISTER_ENCDEC (PCX,               pcx);
-    REGISTER_ENCDEC (PGM,               pgm);
-    REGISTER_ENCDEC (PGMYUV,            pgmyuv);
-    REGISTER_DECODER(PICTOR,            pictor);
-    REGISTER_DECODER(PIXLET,            pixlet);
-    REGISTER_ENCDEC (PNG,               png);
-    REGISTER_ENCDEC (PPM,               ppm);
-    REGISTER_ENCDEC (PRORES,            prores);
-    REGISTER_ENCODER(PRORES_AW,         prores_aw);
-    REGISTER_ENCODER(PRORES_KS,         prores_ks);
-    REGISTER_DECODER(PRORES_LGPL,       prores_lgpl);
-    REGISTER_DECODER(PSD,               psd);
-    REGISTER_DECODER(PTX,               ptx);
-    REGISTER_DECODER(QDRAW,             qdraw);
-    REGISTER_DECODER(QPEG,              qpeg);
-    REGISTER_ENCDEC (QTRLE,             qtrle);
-    REGISTER_ENCDEC (R10K,              r10k);
-    REGISTER_ENCDEC (R210,              r210);
-    REGISTER_ENCDEC (RAWVIDEO,          rawvideo);
-    REGISTER_DECODER(RL2,               rl2);
-    REGISTER_ENCDEC (ROQ,               roq);
-    REGISTER_DECODER(RPZA,              rpza);
-    REGISTER_DECODER(RSCC,              rscc);
-    REGISTER_ENCDEC (RV10,              rv10);
-    REGISTER_ENCDEC (RV20,              rv20);
-    REGISTER_DECODER(RV30,              rv30);
-    REGISTER_DECODER(RV40,              rv40);
-    REGISTER_ENCDEC (S302M,             s302m);
-    REGISTER_DECODER(SANM,              sanm);
-    REGISTER_DECODER(SCPR,              scpr);
-    REGISTER_DECODER(SCREENPRESSO,      screenpresso);
-    REGISTER_DECODER(SDX2_DPCM,         sdx2_dpcm);
-    REGISTER_ENCDEC (SGI,               sgi);
-    REGISTER_DECODER(SGIRLE,            sgirle);
-    REGISTER_DECODER(SHEERVIDEO,        sheervideo);
-    REGISTER_DECODER(SMACKER,           smacker);
-    REGISTER_DECODER(SMC,               smc);
-    REGISTER_DECODER(SMVJPEG,           smvjpeg);
-    REGISTER_ENCDEC (SNOW,              snow);
-    REGISTER_DECODER(SP5X,              sp5x);
-    REGISTER_DECODER(SPEEDHQ,           speedhq);
-    REGISTER_DECODER(SRGC,              srgc);
-    REGISTER_ENCDEC (SUNRAST,           sunrast);
-    REGISTER_ENCDEC (SVQ1,              svq1);
-    REGISTER_DECODER(SVQ3,              svq3);
-    REGISTER_ENCDEC (TARGA,             targa);
-    REGISTER_DECODER(TARGA_Y216,        targa_y216);
-    REGISTER_DECODER(TDSC,              tdsc);
-    REGISTER_DECODER(THEORA,            theora);
-    REGISTER_DECODER(THP,               thp);
-    REGISTER_DECODER(TIERTEXSEQVIDEO,   tiertexseqvideo);
-    REGISTER_ENCDEC (TIFF,              tiff);
-    REGISTER_DECODER(TMV,               tmv);
-    REGISTER_DECODER(TRUEMOTION1,       truemotion1);
-    REGISTER_DECODER(TRUEMOTION2,       truemotion2);
-    REGISTER_DECODER(TRUEMOTION2RT,     truemotion2rt);
-    REGISTER_DECODER(TSCC,              tscc);
-    REGISTER_DECODER(TSCC2,             tscc2);
-    REGISTER_DECODER(TXD,               txd);
-    REGISTER_DECODER(ULTI,              ulti);
-    REGISTER_ENCDEC (UTVIDEO,           utvideo);
-    REGISTER_ENCDEC (V210,              v210);
-    REGISTER_DECODER(V210X,             v210x);
-    REGISTER_ENCDEC (V308,              v308);
-    REGISTER_ENCDEC (V408,              v408);
-    REGISTER_ENCDEC (V410,              v410);
-    REGISTER_DECODER(VB,                vb);
-    REGISTER_DECODER(VBLE,              vble);
-    REGISTER_DECODER(VC1,               vc1);
-    REGISTER_DECODER(VC1_CRYSTALHD,     vc1_crystalhd);
-#if FF_API_VDPAU
-    REGISTER_DECODER(VC1_VDPAU,         vc1_vdpau);
-#endif
-    REGISTER_DECODER(VC1IMAGE,          vc1image);
-    REGISTER_DECODER(VC1_MMAL,          vc1_mmal);
-    REGISTER_DECODER(VC1_QSV,           vc1_qsv);
-    REGISTER_DECODER(VC1_V4L2M2M,       vc1_v4l2m2m);
-    REGISTER_ENCODER(VC2,               vc2);
-    REGISTER_DECODER(VCR1,              vcr1);
-    REGISTER_DECODER(VMDVIDEO,          vmdvideo);
-    REGISTER_DECODER(VMNC,              vmnc);
-    REGISTER_DECODER(VP3,               vp3);
-    REGISTER_DECODER(VP5,               vp5);
-    REGISTER_DECODER(VP6,               vp6);
-    REGISTER_DECODER(VP6A,              vp6a);
-    REGISTER_DECODER(VP6F,              vp6f);
-    REGISTER_DECODER(VP7,               vp7);
-    REGISTER_DECODER(VP8,               vp8);
-    REGISTER_DECODER(VP8_RKMPP,         vp8_rkmpp);
-    REGISTER_DECODER(VP8_V4L2M2M,       vp8_v4l2m2m);
-    REGISTER_DECODER(VP9,               vp9);
-    REGISTER_DECODER(VP9_RKMPP,         vp9_rkmpp);
-    REGISTER_DECODER(VP9_V4L2M2M,       vp9_v4l2m2m);
-    REGISTER_DECODER(VQA,               vqa);
-    REGISTER_DECODER(BITPACKED,         bitpacked);
-    REGISTER_DECODER(WEBP,              webp);
-    REGISTER_ENCDEC (WRAPPED_AVFRAME,   wrapped_avframe);
-    REGISTER_ENCDEC (WMV1,              wmv1);
-    REGISTER_ENCDEC (WMV2,              wmv2);
-    REGISTER_DECODER(WMV3,              wmv3);
-    REGISTER_DECODER(WMV3_CRYSTALHD,    wmv3_crystalhd);
-#if FF_API_VDPAU
-    REGISTER_DECODER(WMV3_VDPAU,        wmv3_vdpau);
-#endif
-    REGISTER_DECODER(WMV3IMAGE,         wmv3image);
-    REGISTER_DECODER(WNV1,              wnv1);
-    REGISTER_DECODER(XAN_WC3,           xan_wc3);
-    REGISTER_DECODER(XAN_WC4,           xan_wc4);
-    REGISTER_ENCDEC (XBM,               xbm);
-    REGISTER_ENCDEC (XFACE,             xface);
-    REGISTER_DECODER(XL,                xl);
-    REGISTER_DECODER(XPM,               xpm);
-    REGISTER_ENCDEC (XWD,               xwd);
-    REGISTER_ENCDEC (Y41P,              y41p);
-    REGISTER_DECODER(YLC,               ylc);
-    REGISTER_DECODER(YOP,               yop);
-    REGISTER_ENCDEC (YUV4,              yuv4);
-    REGISTER_DECODER(ZERO12V,           zero12v);
-    REGISTER_DECODER(ZEROCODEC,         zerocodec);
-    REGISTER_ENCDEC (ZLIB,              zlib);
-    REGISTER_ENCDEC (ZMBV,              zmbv);
+/* external libraries */
+extern AVCodec ff_aac_at_encoder;
+extern AVCodec ff_aac_at_decoder;
+extern AVCodec ff_ac3_at_decoder;
+extern AVCodec ff_adpcm_ima_qt_at_decoder;
+extern AVCodec ff_alac_at_encoder;
+extern AVCodec ff_alac_at_decoder;
+extern AVCodec ff_amr_nb_at_decoder;
+extern AVCodec ff_eac3_at_decoder;
+extern AVCodec ff_gsm_ms_at_decoder;
+extern AVCodec ff_ilbc_at_encoder;
+extern AVCodec ff_ilbc_at_decoder;
+extern AVCodec ff_mp1_at_decoder;
+extern AVCodec ff_mp2_at_decoder;
+extern AVCodec ff_mp3_at_decoder;
+extern AVCodec ff_pcm_alaw_at_encoder;
+extern AVCodec ff_pcm_alaw_at_decoder;
+extern AVCodec ff_pcm_mulaw_at_encoder;
+extern AVCodec ff_pcm_mulaw_at_decoder;
+extern AVCodec ff_qdmc_at_decoder;
+extern AVCodec ff_qdm2_at_decoder;
+extern AVCodec ff_libaom_av1_decoder;
+extern AVCodec ff_libaom_av1_encoder;
+extern AVCodec ff_libcelt_decoder;
+extern AVCodec ff_libcodec2_encoder;
+extern AVCodec ff_libcodec2_decoder;
+extern AVCodec ff_libdavs2_decoder;
+extern AVCodec ff_libfdk_aac_encoder;
+extern AVCodec ff_libfdk_aac_decoder;
+extern AVCodec ff_libgsm_encoder;
+extern AVCodec ff_libgsm_decoder;
+extern AVCodec ff_libgsm_ms_encoder;
+extern AVCodec ff_libgsm_ms_decoder;
+extern AVCodec ff_libilbc_encoder;
+extern AVCodec ff_libilbc_decoder;
+extern AVCodec ff_libmp3lame_encoder;
+extern AVCodec ff_libopencore_amrnb_encoder;
+extern AVCodec ff_libopencore_amrnb_decoder;
+extern AVCodec ff_libopencore_amrwb_decoder;
+extern AVCodec ff_libopenjpeg_encoder;
+extern AVCodec ff_libopenjpeg_decoder;
+extern AVCodec ff_libopus_encoder;
+extern AVCodec ff_libopus_decoder;
+extern AVCodec ff_librsvg_decoder;
+extern AVCodec ff_libshine_encoder;
+extern AVCodec ff_libspeex_encoder;
+extern AVCodec ff_libspeex_decoder;
+extern AVCodec ff_libtheora_encoder;
+extern AVCodec ff_libtwolame_encoder;
+extern AVCodec ff_libvo_amrwbenc_encoder;
+extern AVCodec ff_libvorbis_encoder;
+extern AVCodec ff_libvorbis_decoder;
+extern AVCodec ff_libvpx_vp8_encoder;
+extern AVCodec ff_libvpx_vp8_decoder;
+extern AVCodec ff_libvpx_vp9_encoder;
+extern AVCodec ff_libvpx_vp9_decoder;
+extern AVCodec ff_libwavpack_encoder;
+/* preferred over libwebp */
+extern AVCodec ff_libwebp_anim_encoder;
+extern AVCodec ff_libwebp_encoder;
+extern AVCodec ff_libx262_encoder;
+extern AVCodec ff_libx264_encoder;
+extern AVCodec ff_libx264rgb_encoder;
+extern AVCodec ff_libx265_encoder;
+extern AVCodec ff_libxavs_encoder;
+extern AVCodec ff_libxavs2_encoder;
+extern AVCodec ff_libxvid_encoder;
+extern AVCodec ff_libzvbi_teletext_decoder;
 
-    /* audio codecs */
-    REGISTER_ENCDEC (AAC,               aac);
-    REGISTER_DECODER(AAC_FIXED,         aac_fixed);
-    REGISTER_DECODER(AAC_LATM,          aac_latm);
-    REGISTER_ENCDEC (AC3,               ac3);
-    REGISTER_ENCDEC (AC3_FIXED,         ac3_fixed);
-    REGISTER_ENCDEC (ALAC,              alac);
-    REGISTER_DECODER(ALS,               als);
-    REGISTER_DECODER(AMRNB,             amrnb);
-    REGISTER_DECODER(AMRWB,             amrwb);
-    REGISTER_DECODER(APE,               ape);
-    REGISTER_DECODER(ATRAC1,            atrac1);
-    REGISTER_DECODER(ATRAC3,            atrac3);
-    REGISTER_DECODER(ATRAC3AL,          atrac3al);
-    REGISTER_DECODER(ATRAC3P,           atrac3p);
-    REGISTER_DECODER(ATRAC3PAL,         atrac3pal);
-    REGISTER_DECODER(BINKAUDIO_DCT,     binkaudio_dct);
-    REGISTER_DECODER(BINKAUDIO_RDFT,    binkaudio_rdft);
-    REGISTER_DECODER(BMV_AUDIO,         bmv_audio);
-    REGISTER_DECODER(COOK,              cook);
-    REGISTER_ENCDEC (DCA,               dca);
-    REGISTER_DECODER(DOLBY_E,           dolby_e);
-    REGISTER_DECODER(DSD_LSBF,          dsd_lsbf);
-    REGISTER_DECODER(DSD_MSBF,          dsd_msbf);
-    REGISTER_DECODER(DSD_LSBF_PLANAR,   dsd_lsbf_planar);
-    REGISTER_DECODER(DSD_MSBF_PLANAR,   dsd_msbf_planar);
-    REGISTER_DECODER(DSICINAUDIO,       dsicinaudio);
-    REGISTER_DECODER(DSS_SP,            dss_sp);
-    REGISTER_DECODER(DST,               dst);
-    REGISTER_ENCDEC (EAC3,              eac3);
-    REGISTER_DECODER(EVRC,              evrc);
-    REGISTER_DECODER(FFWAVESYNTH,       ffwavesynth);
-    REGISTER_ENCDEC (FLAC,              flac);
-    REGISTER_ENCDEC (G723_1,            g723_1);
-    REGISTER_DECODER(G729,              g729);
-    REGISTER_DECODER(GSM,               gsm);
-    REGISTER_DECODER(GSM_MS,            gsm_ms);
-    REGISTER_DECODER(IAC,               iac);
-    REGISTER_DECODER(IMC,               imc);
-    REGISTER_DECODER(INTERPLAY_ACM,     interplay_acm);
-    REGISTER_DECODER(MACE3,             mace3);
-    REGISTER_DECODER(MACE6,             mace6);
-    REGISTER_DECODER(METASOUND,         metasound);
-    REGISTER_ENCDEC (MLP,               mlp);
-    REGISTER_DECODER(MP1,               mp1);
-    REGISTER_DECODER(MP1FLOAT,          mp1float);
-    REGISTER_ENCDEC (MP2,               mp2);
-    REGISTER_DECODER(MP2FLOAT,          mp2float);
-    REGISTER_ENCODER(MP2FIXED,          mp2fixed);
-    REGISTER_DECODER(MP3,               mp3);
-    REGISTER_DECODER(MP3FLOAT,          mp3float);
-    REGISTER_DECODER(MP3ADU,            mp3adu);
-    REGISTER_DECODER(MP3ADUFLOAT,       mp3adufloat);
-    REGISTER_DECODER(MP3ON4,            mp3on4);
-    REGISTER_DECODER(MP3ON4FLOAT,       mp3on4float);
-    REGISTER_DECODER(MPC7,              mpc7);
-    REGISTER_DECODER(MPC8,              mpc8);
-    REGISTER_ENCDEC (NELLYMOSER,        nellymoser);
-    REGISTER_DECODER(ON2AVC,            on2avc);
-    REGISTER_ENCDEC (OPUS,              opus);
-    REGISTER_DECODER(PAF_AUDIO,         paf_audio);
-    REGISTER_DECODER(QCELP,             qcelp);
-    REGISTER_DECODER(QDM2,              qdm2);
-    REGISTER_DECODER(QDMC,              qdmc);
-    REGISTER_ENCDEC (RA_144,            ra_144);
-    REGISTER_DECODER(RA_288,            ra_288);
-    REGISTER_DECODER(RALF,              ralf);
-    REGISTER_DECODER(SHORTEN,           shorten);
-    REGISTER_DECODER(SIPR,              sipr);
-    REGISTER_DECODER(SMACKAUD,          smackaud);
-    REGISTER_ENCDEC (SONIC,             sonic);
-    REGISTER_ENCODER(SONIC_LS,          sonic_ls);
-    REGISTER_DECODER(TAK,               tak);
-    REGISTER_ENCDEC (TRUEHD,            truehd);
-    REGISTER_DECODER(TRUESPEECH,        truespeech);
-    REGISTER_ENCDEC (TTA,               tta);
-    REGISTER_DECODER(TWINVQ,            twinvq);
-    REGISTER_DECODER(VMDAUDIO,          vmdaudio);
-    REGISTER_ENCDEC (VORBIS,            vorbis);
-    REGISTER_ENCDEC (WAVPACK,           wavpack);
-    REGISTER_DECODER(WMALOSSLESS,       wmalossless);
-    REGISTER_DECODER(WMAPRO,            wmapro);
-    REGISTER_ENCDEC (WMAV1,             wmav1);
-    REGISTER_ENCDEC (WMAV2,             wmav2);
-    REGISTER_DECODER(WMAVOICE,          wmavoice);
-    REGISTER_DECODER(WS_SND1,           ws_snd1);
-    REGISTER_DECODER(XMA1,              xma1);
-    REGISTER_DECODER(XMA2,              xma2);
+/* text */
+extern AVCodec ff_bintext_decoder;
+extern AVCodec ff_xbin_decoder;
+extern AVCodec ff_idf_decoder;
 
-    /* PCM codecs */
-    REGISTER_ENCDEC (PCM_ALAW,          pcm_alaw);
-    REGISTER_DECODER(PCM_BLURAY,        pcm_bluray);
-    REGISTER_DECODER(PCM_DVD,           pcm_dvd);
-    REGISTER_DECODER(PCM_F16LE,         pcm_f16le);
-    REGISTER_DECODER(PCM_F24LE,         pcm_f24le);
-    REGISTER_ENCDEC (PCM_F32BE,         pcm_f32be);
-    REGISTER_ENCDEC (PCM_F32LE,         pcm_f32le);
-    REGISTER_ENCDEC (PCM_F64BE,         pcm_f64be);
-    REGISTER_ENCDEC (PCM_F64LE,         pcm_f64le);
-    REGISTER_DECODER(PCM_LXF,           pcm_lxf);
-    REGISTER_ENCDEC (PCM_MULAW,         pcm_mulaw);
-    REGISTER_ENCDEC (PCM_S8,            pcm_s8);
-    REGISTER_ENCDEC (PCM_S8_PLANAR,     pcm_s8_planar);
-    REGISTER_ENCDEC (PCM_S16BE,         pcm_s16be);
-    REGISTER_ENCDEC (PCM_S16BE_PLANAR,  pcm_s16be_planar);
-    REGISTER_ENCDEC (PCM_S16LE,         pcm_s16le);
-    REGISTER_ENCDEC (PCM_S16LE_PLANAR,  pcm_s16le_planar);
-    REGISTER_ENCDEC (PCM_S24BE,         pcm_s24be);
-    REGISTER_ENCDEC (PCM_S24DAUD,       pcm_s24daud);
-    REGISTER_ENCDEC (PCM_S24LE,         pcm_s24le);
-    REGISTER_ENCDEC (PCM_S24LE_PLANAR,  pcm_s24le_planar);
-    REGISTER_ENCDEC (PCM_S32BE,         pcm_s32be);
-    REGISTER_ENCDEC (PCM_S32LE,         pcm_s32le);
-    REGISTER_ENCDEC (PCM_S32LE_PLANAR,  pcm_s32le_planar);
-    REGISTER_ENCDEC (PCM_S64BE,         pcm_s64be);
-    REGISTER_ENCDEC (PCM_S64LE,         pcm_s64le);
-    REGISTER_ENCDEC (PCM_U8,            pcm_u8);
-    REGISTER_ENCDEC (PCM_U16BE,         pcm_u16be);
-    REGISTER_ENCDEC (PCM_U16LE,         pcm_u16le);
-    REGISTER_ENCDEC (PCM_U24BE,         pcm_u24be);
-    REGISTER_ENCDEC (PCM_U24LE,         pcm_u24le);
-    REGISTER_ENCDEC (PCM_U32BE,         pcm_u32be);
-    REGISTER_ENCDEC (PCM_U32LE,         pcm_u32le);
-    REGISTER_DECODER(PCM_ZORK,          pcm_zork);
-
-    /* DPCM codecs */
-    REGISTER_DECODER(GREMLIN_DPCM,      gremlin_dpcm);
-    REGISTER_DECODER(INTERPLAY_DPCM,    interplay_dpcm);
-    REGISTER_ENCDEC (ROQ_DPCM,          roq_dpcm);
-    REGISTER_DECODER(SOL_DPCM,          sol_dpcm);
-    REGISTER_DECODER(XAN_DPCM,          xan_dpcm);
-
-    /* ADPCM codecs */
-    REGISTER_DECODER(ADPCM_4XM,         adpcm_4xm);
-    REGISTER_ENCDEC (ADPCM_ADX,         adpcm_adx);
-    REGISTER_DECODER(ADPCM_AFC,         adpcm_afc);
-    REGISTER_DECODER(ADPCM_AICA,        adpcm_aica);
-    REGISTER_DECODER(ADPCM_CT,          adpcm_ct);
-    REGISTER_DECODER(ADPCM_DTK,         adpcm_dtk);
-    REGISTER_DECODER(ADPCM_EA,          adpcm_ea);
-    REGISTER_DECODER(ADPCM_EA_MAXIS_XA, adpcm_ea_maxis_xa);
-    REGISTER_DECODER(ADPCM_EA_R1,       adpcm_ea_r1);
-    REGISTER_DECODER(ADPCM_EA_R2,       adpcm_ea_r2);
-    REGISTER_DECODER(ADPCM_EA_R3,       adpcm_ea_r3);
-    REGISTER_DECODER(ADPCM_EA_XAS,      adpcm_ea_xas);
-    REGISTER_ENCDEC (ADPCM_G722,        adpcm_g722);
-    REGISTER_ENCDEC (ADPCM_G726,        adpcm_g726);
-    REGISTER_ENCDEC (ADPCM_G726LE,      adpcm_g726le);
-    REGISTER_DECODER(ADPCM_IMA_AMV,     adpcm_ima_amv);
-    REGISTER_DECODER(ADPCM_IMA_APC,     adpcm_ima_apc);
-    REGISTER_DECODER(ADPCM_IMA_DAT4,    adpcm_ima_dat4);
-    REGISTER_DECODER(ADPCM_IMA_DK3,     adpcm_ima_dk3);
-    REGISTER_DECODER(ADPCM_IMA_DK4,     adpcm_ima_dk4);
-    REGISTER_DECODER(ADPCM_IMA_EA_EACS, adpcm_ima_ea_eacs);
-    REGISTER_DECODER(ADPCM_IMA_EA_SEAD, adpcm_ima_ea_sead);
-    REGISTER_DECODER(ADPCM_IMA_ISS,     adpcm_ima_iss);
-    REGISTER_DECODER(ADPCM_IMA_OKI,     adpcm_ima_oki);
-    REGISTER_ENCDEC (ADPCM_IMA_QT,      adpcm_ima_qt);
-    REGISTER_DECODER(ADPCM_IMA_RAD,     adpcm_ima_rad);
-    REGISTER_DECODER(ADPCM_IMA_SMJPEG,  adpcm_ima_smjpeg);
-    REGISTER_ENCDEC (ADPCM_IMA_WAV,     adpcm_ima_wav);
-    REGISTER_DECODER(ADPCM_IMA_WS,      adpcm_ima_ws);
-    REGISTER_ENCDEC (ADPCM_MS,          adpcm_ms);
-    REGISTER_DECODER(ADPCM_MTAF,        adpcm_mtaf);
-    REGISTER_DECODER(ADPCM_PSX,         adpcm_psx);
-    REGISTER_DECODER(ADPCM_SBPRO_2,     adpcm_sbpro_2);
-    REGISTER_DECODER(ADPCM_SBPRO_3,     adpcm_sbpro_3);
-    REGISTER_DECODER(ADPCM_SBPRO_4,     adpcm_sbpro_4);
-    REGISTER_ENCDEC (ADPCM_SWF,         adpcm_swf);
-    REGISTER_DECODER(ADPCM_THP,         adpcm_thp);
-    REGISTER_DECODER(ADPCM_THP_LE,      adpcm_thp_le);
-    REGISTER_DECODER(ADPCM_VIMA,        adpcm_vima);
-    REGISTER_DECODER(ADPCM_XA,          adpcm_xa);
-    REGISTER_ENCDEC (ADPCM_YAMAHA,      adpcm_yamaha);
-
-    /* subtitles */
-    REGISTER_ENCDEC (SSA,               ssa);
-    REGISTER_ENCDEC (ASS,               ass);
-    REGISTER_DECODER(CCAPTION,          ccaption);
-    REGISTER_ENCDEC (DVBSUB,            dvbsub);
-    REGISTER_ENCDEC (DVDSUB,            dvdsub);
-    REGISTER_DECODER(JACOSUB,           jacosub);
-    REGISTER_DECODER(MICRODVD,          microdvd);
-    REGISTER_ENCDEC (MOVTEXT,           movtext);
-    REGISTER_DECODER(MPL2,              mpl2);
-    REGISTER_DECODER(PGSSUB,            pgssub);
-    REGISTER_DECODER(PJS,               pjs);
-    REGISTER_DECODER(REALTEXT,          realtext);
-    REGISTER_DECODER(SAMI,              sami);
-    REGISTER_ENCDEC (SRT,               srt);
-    REGISTER_DECODER(STL,               stl);
-    REGISTER_ENCDEC (SUBRIP,            subrip);
-    REGISTER_DECODER(SUBVIEWER,         subviewer);
-    REGISTER_DECODER(SUBVIEWER1,        subviewer1);
-    REGISTER_ENCDEC (TEXT,              text);
-    REGISTER_DECODER(VPLAYER,           vplayer);
-    REGISTER_ENCDEC (WEBVTT,            webvtt);
-    REGISTER_ENCDEC (XSUB,              xsub);
-
-    /* external libraries */
-    REGISTER_ENCDEC (AAC_AT,            aac_at);
-    REGISTER_DECODER(AC3_AT,            ac3_at);
-    REGISTER_DECODER(ADPCM_IMA_QT_AT,   adpcm_ima_qt_at);
-    REGISTER_ENCDEC (ALAC_AT,           alac_at);
-    REGISTER_DECODER(AMR_NB_AT,         amr_nb_at);
-    REGISTER_DECODER(EAC3_AT,           eac3_at);
-    REGISTER_DECODER(GSM_MS_AT,         gsm_ms_at);
-    REGISTER_ENCDEC (ILBC_AT,           ilbc_at);
-    REGISTER_DECODER(MP1_AT,            mp1_at);
-    REGISTER_DECODER(MP2_AT,            mp2_at);
-    REGISTER_DECODER(MP3_AT,            mp3_at);
-    REGISTER_ENCDEC (PCM_ALAW_AT,       pcm_alaw_at);
-    REGISTER_ENCDEC (PCM_MULAW_AT,      pcm_mulaw_at);
-    REGISTER_DECODER(QDMC_AT,           qdmc_at);
-    REGISTER_DECODER(QDM2_AT,           qdm2_at);
-    REGISTER_DECODER(LIBCELT,           libcelt);
-    REGISTER_ENCDEC (LIBFDK_AAC,        libfdk_aac);
-    REGISTER_ENCDEC (LIBGSM,            libgsm);
-    REGISTER_ENCDEC (LIBGSM_MS,         libgsm_ms);
-    REGISTER_ENCDEC (LIBILBC,           libilbc);
-    REGISTER_ENCODER(LIBMP3LAME,        libmp3lame);
-    REGISTER_ENCDEC (LIBOPENCORE_AMRNB, libopencore_amrnb);
-    REGISTER_DECODER(LIBOPENCORE_AMRWB, libopencore_amrwb);
-    REGISTER_ENCDEC (LIBOPENJPEG,       libopenjpeg);
-    REGISTER_ENCDEC (LIBOPUS,           libopus);
-    REGISTER_DECODER(LIBRSVG,           librsvg);
-    REGISTER_ENCODER(LIBSHINE,          libshine);
-    REGISTER_ENCDEC (LIBSPEEX,          libspeex);
-    REGISTER_ENCODER(LIBTHEORA,         libtheora);
-    REGISTER_ENCODER(LIBTWOLAME,        libtwolame);
-    REGISTER_ENCODER(LIBVO_AMRWBENC,    libvo_amrwbenc);
-    REGISTER_ENCDEC (LIBVORBIS,         libvorbis);
-    REGISTER_ENCDEC (LIBVPX_VP8,        libvpx_vp8);
-    REGISTER_ENCDEC (LIBVPX_VP9,        libvpx_vp9);
-    REGISTER_ENCODER(LIBWAVPACK,        libwavpack);
-    REGISTER_ENCODER(LIBWEBP_ANIM,      libwebp_anim);  /* preferred over libwebp */
-    REGISTER_ENCODER(LIBWEBP,           libwebp);
-    REGISTER_ENCODER(LIBX262,           libx262);
-    REGISTER_ENCODER(LIBX264,           libx264);
-    REGISTER_ENCODER(LIBX264RGB,        libx264rgb);
-    REGISTER_ENCODER(LIBX265,           libx265);
-    REGISTER_ENCODER(LIBXAVS,           libxavs);
-    REGISTER_ENCODER(LIBXVID,           libxvid);
-    REGISTER_DECODER(LIBZVBI_TELETEXT,  libzvbi_teletext);
-
-    /* text */
-    REGISTER_DECODER(BINTEXT,           bintext);
-    REGISTER_DECODER(XBIN,              xbin);
-    REGISTER_DECODER(IDF,               idf);
-
-    /* external libraries, that shouldn't be used by default if one of the
-     * above is available */
-    REGISTER_ENCODER(H263_V4L2M2M,      h263_v4l2m2m);
-    REGISTER_ENCDEC (LIBOPENH264,       libopenh264);
-    REGISTER_DECODER(H264_CUVID,        h264_cuvid);
-    REGISTER_ENCODER(H264_NVENC,        h264_nvenc);
-    REGISTER_ENCODER(H264_OMX,          h264_omx);
-    REGISTER_ENCODER(H264_QSV,          h264_qsv);
-    REGISTER_ENCODER(H264_V4L2M2M,      h264_v4l2m2m);
-    REGISTER_ENCODER(H264_VAAPI,        h264_vaapi);
-    REGISTER_ENCODER(H264_VIDEOTOOLBOX, h264_videotoolbox);
+/* external libraries, that shouldn't be used by default if one of the
+ * above is available */
+extern AVCodec ff_h263_v4l2m2m_encoder;
+extern AVCodec ff_libopenh264_encoder;
+extern AVCodec ff_libopenh264_decoder;
+extern AVCodec ff_h264_amf_encoder;
+extern AVCodec ff_h264_cuvid_decoder;
+extern AVCodec ff_h264_nvenc_encoder;
+extern AVCodec ff_h264_omx_encoder;
+extern AVCodec ff_h264_qsv_encoder;
+extern AVCodec ff_h264_v4l2m2m_encoder;
+extern AVCodec ff_h264_vaapi_encoder;
+extern AVCodec ff_h264_videotoolbox_encoder;
 #if FF_API_NVENC_OLD_NAME
-    REGISTER_ENCODER(NVENC,             nvenc);
-    REGISTER_ENCODER(NVENC_H264,        nvenc_h264);
-    REGISTER_ENCODER(NVENC_HEVC,        nvenc_hevc);
+extern AVCodec ff_nvenc_encoder;
+extern AVCodec ff_nvenc_h264_encoder;
+extern AVCodec ff_nvenc_hevc_encoder;
 #endif
-    REGISTER_DECODER(HEVC_CUVID,        hevc_cuvid);
-    REGISTER_DECODER(HEVC_MEDIACODEC,   hevc_mediacodec);
-    REGISTER_ENCODER(HEVC_NVENC,        hevc_nvenc);
-    REGISTER_ENCODER(HEVC_QSV,          hevc_qsv);
-    REGISTER_ENCODER(HEVC_V4L2M2M,      hevc_v4l2m2m);
-    REGISTER_ENCODER(HEVC_VAAPI,        hevc_vaapi);
-    REGISTER_ENCODER(LIBKVAZAAR,        libkvazaar);
-    REGISTER_DECODER(MJPEG_CUVID,       mjpeg_cuvid);
-    REGISTER_ENCODER(MJPEG_VAAPI,       mjpeg_vaapi);
-    REGISTER_DECODER(MPEG1_CUVID,       mpeg1_cuvid);
-    REGISTER_DECODER(MPEG2_CUVID,       mpeg2_cuvid);
-    REGISTER_ENCODER(MPEG2_QSV,         mpeg2_qsv);
-    REGISTER_ENCODER(MPEG2_VAAPI,       mpeg2_vaapi);
-    REGISTER_DECODER(MPEG4_CUVID,       mpeg4_cuvid);
-    REGISTER_DECODER(MPEG4_MEDIACODEC,  mpeg4_mediacodec);
-    REGISTER_ENCODER(MPEG4_V4L2M2M,     mpeg4_v4l2m2m);
-    REGISTER_DECODER(VC1_CUVID,         vc1_cuvid);
-    REGISTER_DECODER(VP8_CUVID,         vp8_cuvid);
-    REGISTER_DECODER(VP8_MEDIACODEC,    vp8_mediacodec);
-    REGISTER_DECODER(VP8_QSV,           vp8_qsv);
-    REGISTER_ENCODER(VP8_V4L2M2M,       vp8_v4l2m2m);
-    REGISTER_ENCODER(VP8_VAAPI,         vp8_vaapi);
-    REGISTER_DECODER(VP9_CUVID,         vp9_cuvid);
-    REGISTER_DECODER(VP9_MEDIACODEC,    vp9_mediacodec);
-    REGISTER_ENCODER(VP9_VAAPI,         vp9_vaapi);
+extern AVCodec ff_hevc_amf_encoder;
+extern AVCodec ff_hevc_cuvid_decoder;
+extern AVCodec ff_hevc_mediacodec_decoder;
+extern AVCodec ff_hevc_nvenc_encoder;
+extern AVCodec ff_hevc_qsv_encoder;
+extern AVCodec ff_hevc_v4l2m2m_encoder;
+extern AVCodec ff_hevc_vaapi_encoder;
+extern AVCodec ff_hevc_videotoolbox_encoder;
+extern AVCodec ff_libkvazaar_encoder;
+extern AVCodec ff_mjpeg_cuvid_decoder;
+extern AVCodec ff_mjpeg_qsv_encoder;
+extern AVCodec ff_mjpeg_vaapi_encoder;
+extern AVCodec ff_mpeg1_cuvid_decoder;
+extern AVCodec ff_mpeg2_cuvid_decoder;
+extern AVCodec ff_mpeg2_qsv_encoder;
+extern AVCodec ff_mpeg2_vaapi_encoder;
+extern AVCodec ff_mpeg4_cuvid_decoder;
+extern AVCodec ff_mpeg4_mediacodec_decoder;
+extern AVCodec ff_mpeg4_v4l2m2m_encoder;
+extern AVCodec ff_vc1_cuvid_decoder;
+extern AVCodec ff_vp8_cuvid_decoder;
+extern AVCodec ff_vp8_mediacodec_decoder;
+extern AVCodec ff_vp8_qsv_decoder;
+extern AVCodec ff_vp8_v4l2m2m_encoder;
+extern AVCodec ff_vp8_vaapi_encoder;
+extern AVCodec ff_vp9_cuvid_decoder;
+extern AVCodec ff_vp9_mediacodec_decoder;
+extern AVCodec ff_vp9_vaapi_encoder;
 
-    /* parsers */
-    REGISTER_PARSER(AAC,                aac);
-    REGISTER_PARSER(AAC_LATM,           aac_latm);
-    REGISTER_PARSER(AC3,                ac3);
-    REGISTER_PARSER(ADX,                adx);
-    REGISTER_PARSER(BMP,                bmp);
-    REGISTER_PARSER(CAVSVIDEO,          cavsvideo);
-    REGISTER_PARSER(COOK,               cook);
-    REGISTER_PARSER(DCA,                dca);
-    REGISTER_PARSER(DIRAC,              dirac);
-    REGISTER_PARSER(DNXHD,              dnxhd);
-    REGISTER_PARSER(DPX,                dpx);
-    REGISTER_PARSER(DVAUDIO,            dvaudio);
-    REGISTER_PARSER(DVBSUB,             dvbsub);
-    REGISTER_PARSER(DVDSUB,             dvdsub);
-    REGISTER_PARSER(DVD_NAV,            dvd_nav);
-    REGISTER_PARSER(FLAC,               flac);
-    REGISTER_PARSER(G729,               g729);
-    REGISTER_PARSER(GSM,                gsm);
-    REGISTER_PARSER(H261,               h261);
-    REGISTER_PARSER(H263,               h263);
-    REGISTER_PARSER(H264,               h264);
-    REGISTER_PARSER(HEVC,               hevc);
-    REGISTER_PARSER(MJPEG,              mjpeg);
-    REGISTER_PARSER(MLP,                mlp);
-    REGISTER_PARSER(MPEG4VIDEO,         mpeg4video);
-    REGISTER_PARSER(MPEGAUDIO,          mpegaudio);
-    REGISTER_PARSER(MPEGVIDEO,          mpegvideo);
-    REGISTER_PARSER(OPUS,               opus);
-    REGISTER_PARSER(PNG,                png);
-    REGISTER_PARSER(PNM,                pnm);
-    REGISTER_PARSER(RV30,               rv30);
-    REGISTER_PARSER(RV40,               rv40);
-    REGISTER_PARSER(SIPR,               sipr);
-    REGISTER_PARSER(TAK,                tak);
-    REGISTER_PARSER(VC1,                vc1);
-    REGISTER_PARSER(VORBIS,             vorbis);
-    REGISTER_PARSER(VP3,                vp3);
-    REGISTER_PARSER(VP8,                vp8);
-    REGISTER_PARSER(VP9,                vp9);
-    REGISTER_PARSER(XMA,                xma);
+// The iterate API is not usable with ossfuzz due to the excessive size of binaries created
+#if CONFIG_OSSFUZZ
+AVCodec * codec_list[] = {
+    NULL,
+    NULL
+};
+#else
+#include "libavcodec/codec_list.c"
+#endif
+
+static AVOnce av_codec_static_init = AV_ONCE_INIT;
+static void av_codec_init_static(void)
+{
+    for (int i = 0; codec_list[i]; i++) {
+        if (codec_list[i]->init_static_data)
+            codec_list[i]->init_static_data((AVCodec*)codec_list[i]);
+    }
+}
+
+const AVCodec *av_codec_iterate(void **opaque)
+{
+    uintptr_t i = (uintptr_t)*opaque;
+    const AVCodec *c = codec_list[i];
+
+    ff_thread_once(&av_codec_static_init, av_codec_init_static);
+
+    if (c)
+        *opaque = (void*)(i + 1);
+
+    return c;
+}
+
+#if FF_API_NEXT
+FF_DISABLE_DEPRECATION_WARNINGS
+static AVOnce av_codec_next_init = AV_ONCE_INIT;
+
+static void av_codec_init_next(void)
+{
+    AVCodec *prev = NULL, *p;
+    void *i = 0;
+    while ((p = (AVCodec*)av_codec_iterate(&i))) {
+        if (prev)
+            prev->next = p;
+        prev = p;
+    }
+}
+
+
+
+av_cold void avcodec_register(AVCodec *codec)
+{
+    ff_thread_once(&av_codec_next_init, av_codec_init_next);
+}
+
+AVCodec *av_codec_next(const AVCodec *c)
+{
+    ff_thread_once(&av_codec_next_init, av_codec_init_next);
+
+    if (c)
+        return c->next;
+    else
+        return (AVCodec*)codec_list[0];
 }
 
 void avcodec_register_all(void)
 {
-    static AVOnce control = AV_ONCE_INIT;
+    ff_thread_once(&av_codec_next_init, av_codec_init_next);
+}
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
-    ff_thread_once(&control, register_all);
+static enum AVCodecID remap_deprecated_codec_id(enum AVCodecID id)
+{
+    switch(id){
+        //This is for future deprecatec codec ids, its empty since
+        //last major bump but will fill up again over time, please don't remove it
+        default                                         : return id;
+    }
+}
+
+static AVCodec *find_codec(enum AVCodecID id, int (*x)(const AVCodec *))
+{
+    const AVCodec *p, *experimental = NULL;
+    void *i = 0;
+
+    id = remap_deprecated_codec_id(id);
+
+    while ((p = av_codec_iterate(&i))) {
+        if (!x(p))
+            continue;
+        if (p->id == id) {
+            if (p->capabilities & AV_CODEC_CAP_EXPERIMENTAL && !experimental) {
+                experimental = p;
+            } else
+                return (AVCodec*)p;
+        }
+    }
+
+    return (AVCodec*)experimental;
+}
+
+AVCodec *avcodec_find_encoder(enum AVCodecID id)
+{
+    return find_codec(id, av_codec_is_encoder);
+}
+
+AVCodec *avcodec_find_decoder(enum AVCodecID id)
+{
+    return find_codec(id, av_codec_is_decoder);
+}
+
+static AVCodec *find_codec_by_name(const char *name, int (*x)(const AVCodec *))
+{
+    void *i = 0;
+    const AVCodec *p;
+
+    if (!name)
+        return NULL;
+
+    while ((p = av_codec_iterate(&i))) {
+        if (!x(p))
+            continue;
+        if (strcmp(name, p->name) == 0)
+            return (AVCodec*)p;
+    }
+
+    return NULL;
+}
+
+AVCodec *avcodec_find_encoder_by_name(const char *name)
+{
+    return find_codec_by_name(name, av_codec_is_encoder);
+}
+
+AVCodec *avcodec_find_decoder_by_name(const char *name)
+{
+    return find_codec_by_name(name, av_codec_is_decoder);
 }

diff --git a/libavcodec/alpha/asm.h b/libavcodec/alpha/asm.h
index 827721e..6d850ce 100644
--- a/libavcodec/alpha/asm.h
+++ b/libavcodec/alpha/asm.h

@@ -146,39 +146,6 @@
 #define unpkbw(a)    ({ uint64_t __r; __asm__ (".arch ev6; unpkbw  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
 #endif
 
-#elif defined(__DECC)           /* Digital/Compaq/hp "ccc" compiler */
-
-#include <c_asm.h>
-#define ldq(p) (*(const uint64_t *) (p))
-#define ldl(p) (*(const int32_t *)  (p))
-#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
-#define stl(l, p) do { *(int32_t *)  (p) = (l); } while (0)
-#define ldq_u(a)     asm ("ldq_u   %v0,0(%a0)", a)
-#define uldq(a)      (*(const __unaligned uint64_t *) (a))
-#define cmpbge(a, b) asm ("cmpbge  %a0,%a1,%v0", a, b)
-#define extql(a, b)  asm ("extql   %a0,%a1,%v0", a, b)
-#define extwl(a, b)  asm ("extwl   %a0,%a1,%v0", a, b)
-#define extqh(a, b)  asm ("extqh   %a0,%a1,%v0", a, b)
-#define zap(a, b)    asm ("zap     %a0,%a1,%v0", a, b)
-#define zapnot(a, b) asm ("zapnot  %a0,%a1,%v0", a, b)
-#define amask(a)     asm ("amask   %a0,%v0", a)
-#define implver()    asm ("implver %v0")
-#define rpcc()       asm ("rpcc           %v0")
-#define minub8(a, b) asm ("minub8  %a0,%a1,%v0", a, b)
-#define minsb8(a, b) asm ("minsb8  %a0,%a1,%v0", a, b)
-#define minuw4(a, b) asm ("minuw4  %a0,%a1,%v0", a, b)
-#define minsw4(a, b) asm ("minsw4  %a0,%a1,%v0", a, b)
-#define maxub8(a, b) asm ("maxub8  %a0,%a1,%v0", a, b)
-#define maxsb8(a, b) asm ("maxsb8  %a0,%a1,%v0", a, b)
-#define maxuw4(a, b) asm ("maxuw4  %a0,%a1,%v0", a, b)
-#define maxsw4(a, b) asm ("maxsw4  %a0,%a1,%v0", a, b)
-#define perr(a, b)   asm ("perr    %a0,%a1,%v0", a, b)
-#define pklb(a)      asm ("pklb    %a0,%v0", a)
-#define pkwb(a)      asm ("pkwb    %a0,%v0", a)
-#define unpkbl(a)    asm ("unpkbl  %a0,%v0", a)
-#define unpkbw(a)    asm ("unpkbw  %a0,%v0", a)
-#define wh64(a)      asm ("wh64    %a0", a)
-
 #else
 #error "Unknown compiler!"
 #endif

diff --git a/libavcodec/alpha/idctdsp_alpha.c b/libavcodec/alpha/idctdsp_alpha.c
index 1923ebb..bd43842 100644
--- a/libavcodec/alpha/idctdsp_alpha.c
+++ b/libavcodec/alpha/idctdsp_alpha.c

@@ -118,8 +118,7 @@
     add_pixels_clamped_axp_p = c->add_pixels_clamped;
 
     if (!high_bit_depth && !avctx->lowres &&
-        (avctx->idct_algo == FF_IDCT_AUTO ||
-         avctx->idct_algo == FF_IDCT_SIMPLEALPHA)) {
+        (avctx->idct_algo == FF_IDCT_AUTO)) {
         c->idct_put = ff_simple_idct_put_axp;
         c->idct_add = ff_simple_idct_add_axp;
         c->idct =     ff_simple_idct_axp;

diff --git a/libavcodec/alsdec.c b/libavcodec/alsdec.c
index 13bd52f..ca8701e 100644
--- a/libavcodec/alsdec.c
+++ b/libavcodec/alsdec.c

@@ -704,11 +704,6 @@
         } else {
             *bd->opt_order = sconf->max_order;
         }
-        if (*bd->opt_order > bd->block_length) {
-            *bd->opt_order = bd->block_length;
-            av_log(avctx, AV_LOG_ERROR, "Predictor order too large.\n");
-            return AVERROR_INVALIDDATA;
-        }
         opt_order = *bd->opt_order;
 
         if (opt_order) {
@@ -925,7 +920,7 @@
 
     // reconstruct all samples from residuals
     if (bd->ra_block) {
-        for (smp = 0; smp < opt_order; smp++) {
+        for (smp = 0; smp < FFMIN(opt_order, block_length); smp++) {
             y = 1 << 19;
 
             for (sb = 0; sb < smp; sb++)

diff --git a/libavcodec/amfenc.c b/libavcodec/amfenc.c
new file mode 100644
index 0000000..384d8ef
--- /dev/null
+++ b/libavcodec/amfenc.c

@@ -0,0 +1,780 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/hwcontext.h"
+#if CONFIG_D3D11VA
+#include "libavutil/hwcontext_d3d11va.h"
+#endif
+#if CONFIG_DXVA2
+#define COBJMACROS
+#include "libavutil/hwcontext_dxva2.h"
+#endif
+#include "libavutil/mem.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/time.h"
+
+#include "amfenc.h"
+#include "internal.h"
+
+#if CONFIG_D3D11VA
+#include <d3d11.h>
+#endif
+
+#ifdef _WIN32
+#include "compat/w32dlfcn.h"
+#else
+#include <dlfcn.h>
+#endif
+
+#define FFMPEG_AMF_WRITER_ID L"ffmpeg_amf"
+
+#define PTS_PROP L"PtsProp"
+
+const enum AVPixelFormat ff_amf_pix_fmts[] = {
+    AV_PIX_FMT_NV12,
+    AV_PIX_FMT_YUV420P,
+#if CONFIG_D3D11VA
+    AV_PIX_FMT_D3D11,
+#endif
+#if CONFIG_DXVA2
+    AV_PIX_FMT_DXVA2_VLD,
+#endif
+    AV_PIX_FMT_NONE
+};
+
+typedef struct FormatMap {
+    enum AVPixelFormat       av_format;
+    enum AMF_SURFACE_FORMAT  amf_format;
+} FormatMap;
+
+static const FormatMap format_map[] =
+{
+    { AV_PIX_FMT_NONE,       AMF_SURFACE_UNKNOWN },
+    { AV_PIX_FMT_NV12,       AMF_SURFACE_NV12 },
+    { AV_PIX_FMT_BGR0,       AMF_SURFACE_BGRA },
+    { AV_PIX_FMT_RGB0,       AMF_SURFACE_RGBA },
+    { AV_PIX_FMT_GRAY8,      AMF_SURFACE_GRAY8 },
+    { AV_PIX_FMT_YUV420P,    AMF_SURFACE_YUV420P },
+    { AV_PIX_FMT_YUYV422,    AMF_SURFACE_YUY2 },
+};
+
+static enum AMF_SURFACE_FORMAT amf_av_to_amf_format(enum AVPixelFormat fmt)
+{
+    int i;
+    for (i = 0; i < amf_countof(format_map); i++) {
+        if (format_map[i].av_format == fmt) {
+            return format_map[i].amf_format;
+        }
+    }
+    return AMF_SURFACE_UNKNOWN;
+}
+
+static void AMF_CDECL_CALL AMFTraceWriter_Write(AMFTraceWriter *pThis,
+    const wchar_t *scope, const wchar_t *message)
+{
+    AmfTraceWriter *tracer = (AmfTraceWriter*)pThis;
+    av_log(tracer->avctx, AV_LOG_DEBUG, "%ls: %ls", scope, message); // \n is provided from AMF
+}
+
+static void AMF_CDECL_CALL AMFTraceWriter_Flush(AMFTraceWriter *pThis)
+{
+}
+
+static AMFTraceWriterVtbl tracer_vtbl =
+{
+    .Write = AMFTraceWriter_Write,
+    .Flush = AMFTraceWriter_Flush,
+};
+
+static int amf_load_library(AVCodecContext *avctx)
+{
+    AmfContext        *ctx = avctx->priv_data;
+    AMFInit_Fn         init_fun;
+    AMFQueryVersion_Fn version_fun;
+    AMF_RESULT         res;
+
+    ctx->delayed_frame = av_frame_alloc();
+    if (!ctx->delayed_frame) {
+        return AVERROR(ENOMEM);
+    }
+    // hardcoded to current HW queue size - will realloc in timestamp_queue_enqueue() if too small
+    ctx->timestamp_list = av_fifo_alloc((avctx->max_b_frames + 16) * sizeof(int64_t));
+    if (!ctx->timestamp_list) {
+        return AVERROR(ENOMEM);
+    }
+    ctx->dts_delay = 0;
+
+
+    ctx->library = dlopen(AMF_DLL_NAMEA, RTLD_NOW | RTLD_LOCAL);
+    AMF_RETURN_IF_FALSE(ctx, ctx->library != NULL,
+        AVERROR_UNKNOWN, "DLL %s failed to open\n", AMF_DLL_NAMEA);
+
+    init_fun = (AMFInit_Fn)dlsym(ctx->library, AMF_INIT_FUNCTION_NAME);
+    AMF_RETURN_IF_FALSE(ctx, init_fun != NULL, AVERROR_UNKNOWN, "DLL %s failed to find function %s\n", AMF_DLL_NAMEA, AMF_INIT_FUNCTION_NAME);
+
+    version_fun = (AMFQueryVersion_Fn)dlsym(ctx->library, AMF_QUERY_VERSION_FUNCTION_NAME);
+    AMF_RETURN_IF_FALSE(ctx, version_fun != NULL, AVERROR_UNKNOWN, "DLL %s failed to find function %s\n", AMF_DLL_NAMEA, AMF_QUERY_VERSION_FUNCTION_NAME);
+
+    res = version_fun(&ctx->version);
+    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "%s failed with error %d\n", AMF_QUERY_VERSION_FUNCTION_NAME, res);
+    res = init_fun(AMF_FULL_VERSION, &ctx->factory);
+    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "%s failed with error %d\n", AMF_INIT_FUNCTION_NAME, res);
+    res = ctx->factory->pVtbl->GetTrace(ctx->factory, &ctx->trace);
+    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetTrace() failed with error %d\n", res);
+    res = ctx->factory->pVtbl->GetDebug(ctx->factory, &ctx->debug);
+    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetDebug() failed with error %d\n", res);
+    return 0;
+}
+
+#if CONFIG_D3D11VA
+static int amf_init_from_d3d11_device(AVCodecContext *avctx, AVD3D11VADeviceContext *hwctx)
+{
+    AmfContext *ctx = avctx->priv_data;
+    AMF_RESULT res;
+
+    res = ctx->context->pVtbl->InitDX11(ctx->context, hwctx->device, AMF_DX11_1);
+    if (res != AMF_OK) {
+        if (res == AMF_NOT_SUPPORTED)
+            av_log(avctx, AV_LOG_ERROR, "AMF via D3D11 is not supported on the given device.\n");
+        else
+            av_log(avctx, AV_LOG_ERROR, "AMF failed to initialise on the given D3D11 device: %d.\n", res);
+        return AVERROR(ENODEV);
+    }
+
+    return 0;
+}
+#endif
+
+#if CONFIG_DXVA2
+static int amf_init_from_dxva2_device(AVCodecContext *avctx, AVDXVA2DeviceContext *hwctx)
+{
+    AmfContext *ctx = avctx->priv_data;
+    HANDLE device_handle;
+    IDirect3DDevice9 *device;
+    HRESULT hr;
+    AMF_RESULT res;
+    int ret;
+
+    hr = IDirect3DDeviceManager9_OpenDeviceHandle(hwctx->devmgr, &device_handle);
+    if (FAILED(hr)) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to open device handle for Direct3D9 device: %lx.\n", (unsigned long)hr);
+        return AVERROR_EXTERNAL;
+    }
+
+    hr = IDirect3DDeviceManager9_LockDevice(hwctx->devmgr, device_handle, &device, FALSE);
+    if (SUCCEEDED(hr)) {
+        IDirect3DDeviceManager9_UnlockDevice(hwctx->devmgr, device_handle, FALSE);
+        ret = 0;
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "Failed to lock device handle for Direct3D9 device: %lx.\n", (unsigned long)hr);
+        ret = AVERROR_EXTERNAL;
+    }
+
+    IDirect3DDeviceManager9_CloseDeviceHandle(hwctx->devmgr, device_handle);
+
+    if (ret < 0)
+        return ret;
+
+    res = ctx->context->pVtbl->InitDX9(ctx->context, device);
+
+    IDirect3DDevice9_Release(device);
+
+    if (res != AMF_OK) {
+        if (res == AMF_NOT_SUPPORTED)
+            av_log(avctx, AV_LOG_ERROR, "AMF via D3D9 is not supported on the given device.\n");
+        else
+            av_log(avctx, AV_LOG_ERROR, "AMF failed to initialise on given D3D9 device: %d.\n", res);
+        return AVERROR(ENODEV);
+    }
+
+    return 0;
+}
+#endif
+
+static int amf_init_context(AVCodecContext *avctx)
+{
+    AmfContext *ctx = avctx->priv_data;
+    AMF_RESULT  res;
+    av_unused int ret;
+
+    ctx->hwsurfaces_in_queue = 0;
+    ctx->hwsurfaces_in_queue_max = 16;
+
+    // configure AMF logger
+    // the return of these functions indicates old state and do not affect behaviour
+    ctx->trace->pVtbl->EnableWriter(ctx->trace, AMF_TRACE_WRITER_DEBUG_OUTPUT, ctx->log_to_dbg != 0 );
+    if (ctx->log_to_dbg)
+        ctx->trace->pVtbl->SetWriterLevel(ctx->trace, AMF_TRACE_WRITER_DEBUG_OUTPUT, AMF_TRACE_TRACE);
+    ctx->trace->pVtbl->EnableWriter(ctx->trace, AMF_TRACE_WRITER_CONSOLE, 0);
+    ctx->trace->pVtbl->SetGlobalLevel(ctx->trace, AMF_TRACE_TRACE);
+
+    // connect AMF logger to av_log
+    ctx->tracer.vtbl = &tracer_vtbl;
+    ctx->tracer.avctx = avctx;
+    ctx->trace->pVtbl->RegisterWriter(ctx->trace, FFMPEG_AMF_WRITER_ID,(AMFTraceWriter*)&ctx->tracer, 1);
+    ctx->trace->pVtbl->SetWriterLevel(ctx->trace, FFMPEG_AMF_WRITER_ID, AMF_TRACE_TRACE);
+
+    res = ctx->factory->pVtbl->CreateContext(ctx->factory, &ctx->context);
+    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "CreateContext() failed with error %d\n", res);
+
+    // If a device was passed to the encoder, try to initialise from that.
+    if (avctx->hw_frames_ctx) {
+        AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+
+        if (amf_av_to_amf_format(frames_ctx->sw_format) == AMF_SURFACE_UNKNOWN) {
+            av_log(avctx, AV_LOG_ERROR, "Format of input frames context (%s) is not supported by AMF.\n",
+                   av_get_pix_fmt_name(frames_ctx->sw_format));
+            return AVERROR(EINVAL);
+        }
+
+        switch (frames_ctx->device_ctx->type) {
+#if CONFIG_D3D11VA
+        case AV_HWDEVICE_TYPE_D3D11VA:
+            ret = amf_init_from_d3d11_device(avctx, frames_ctx->device_ctx->hwctx);
+            if (ret < 0)
+                return ret;
+            break;
+#endif
+#if CONFIG_DXVA2
+        case AV_HWDEVICE_TYPE_DXVA2:
+            ret = amf_init_from_dxva2_device(avctx, frames_ctx->device_ctx->hwctx);
+            if (ret < 0)
+                return ret;
+            break;
+#endif
+        default:
+            av_log(avctx, AV_LOG_ERROR, "AMF initialisation from a %s frames context is not supported.\n",
+                   av_hwdevice_get_type_name(frames_ctx->device_ctx->type));
+            return AVERROR(ENOSYS);
+        }
+
+        ctx->hw_frames_ctx = av_buffer_ref(avctx->hw_frames_ctx);
+        if (!ctx->hw_frames_ctx)
+            return AVERROR(ENOMEM);
+
+        if (frames_ctx->initial_pool_size > 0)
+            ctx->hwsurfaces_in_queue_max = frames_ctx->initial_pool_size - 1;
+
+    } else if (avctx->hw_device_ctx) {
+        AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)avctx->hw_device_ctx->data;
+
+        switch (device_ctx->type) {
+#if CONFIG_D3D11VA
+        case AV_HWDEVICE_TYPE_D3D11VA:
+            ret = amf_init_from_d3d11_device(avctx, device_ctx->hwctx);
+            if (ret < 0)
+                return ret;
+            break;
+#endif
+#if CONFIG_DXVA2
+        case AV_HWDEVICE_TYPE_DXVA2:
+            ret = amf_init_from_dxva2_device(avctx, device_ctx->hwctx);
+            if (ret < 0)
+                return ret;
+            break;
+#endif
+        default:
+            av_log(avctx, AV_LOG_ERROR, "AMF initialisation from a %s device is not supported.\n",
+                   av_hwdevice_get_type_name(device_ctx->type));
+            return AVERROR(ENOSYS);
+        }
+
+        ctx->hw_device_ctx = av_buffer_ref(avctx->hw_device_ctx);
+        if (!ctx->hw_device_ctx)
+            return AVERROR(ENOMEM);
+
+    } else {
+        res = ctx->context->pVtbl->InitDX11(ctx->context, NULL, AMF_DX11_1);
+        if (res == AMF_OK) {
+            av_log(avctx, AV_LOG_VERBOSE, "AMF initialisation succeeded via D3D11.\n");
+        } else {
+            res = ctx->context->pVtbl->InitDX9(ctx->context, NULL);
+            if (res == AMF_OK) {
+                av_log(avctx, AV_LOG_VERBOSE, "AMF initialisation succeeded via D3D9.\n");
+            } else {
+                av_log(avctx, AV_LOG_ERROR, "AMF initialisation failed via D3D9: error %d.\n", res);
+                return AVERROR(ENOSYS);
+            }
+        }
+    }
+    return 0;
+}
+
+static int amf_init_encoder(AVCodecContext *avctx)
+{
+    AmfContext        *ctx = avctx->priv_data;
+    const wchar_t     *codec_id = NULL;
+    AMF_RESULT         res;
+    enum AVPixelFormat pix_fmt;
+
+    switch (avctx->codec->id) {
+        case AV_CODEC_ID_H264:
+            codec_id = AMFVideoEncoderVCE_AVC;
+            break;
+        case AV_CODEC_ID_HEVC:
+            codec_id = AMFVideoEncoder_HEVC;
+            break;
+        default:
+            break;
+    }
+    AMF_RETURN_IF_FALSE(ctx, codec_id != NULL, AVERROR(EINVAL), "Codec %d is not supported\n", avctx->codec->id);
+
+    if (ctx->hw_frames_ctx)
+        pix_fmt = ((AVHWFramesContext*)ctx->hw_frames_ctx->data)->sw_format;
+    else
+        pix_fmt = avctx->pix_fmt;
+
+    ctx->format = amf_av_to_amf_format(pix_fmt);
+    AMF_RETURN_IF_FALSE(ctx, ctx->format != AMF_SURFACE_UNKNOWN, AVERROR(EINVAL),
+                        "Format %s is not supported\n", av_get_pix_fmt_name(pix_fmt));
+
+    res = ctx->factory->pVtbl->CreateComponent(ctx->factory, ctx->context, codec_id, &ctx->encoder);
+    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_ENCODER_NOT_FOUND, "CreateComponent(%ls) failed with error %d\n", codec_id, res);
+
+    return 0;
+}
+
+int av_cold ff_amf_encode_close(AVCodecContext *avctx)
+{
+    AmfContext *ctx = avctx->priv_data;
+
+    if (ctx->delayed_surface) {
+        ctx->delayed_surface->pVtbl->Release(ctx->delayed_surface);
+        ctx->delayed_surface = NULL;
+    }
+
+    if (ctx->encoder) {
+        ctx->encoder->pVtbl->Terminate(ctx->encoder);
+        ctx->encoder->pVtbl->Release(ctx->encoder);
+        ctx->encoder = NULL;
+    }
+
+    if (ctx->context) {
+        ctx->context->pVtbl->Terminate(ctx->context);
+        ctx->context->pVtbl->Release(ctx->context);
+        ctx->context = NULL;
+    }
+    av_buffer_unref(&ctx->hw_device_ctx);
+    av_buffer_unref(&ctx->hw_frames_ctx);
+
+    if (ctx->trace) {
+        ctx->trace->pVtbl->UnregisterWriter(ctx->trace, FFMPEG_AMF_WRITER_ID);
+    }
+    if (ctx->library) {
+        dlclose(ctx->library);
+        ctx->library = NULL;
+    }
+    ctx->trace = NULL;
+    ctx->debug = NULL;
+    ctx->factory = NULL;
+    ctx->version = 0;
+    ctx->delayed_drain = 0;
+    av_frame_free(&ctx->delayed_frame);
+    av_fifo_freep(&ctx->timestamp_list);
+
+    return 0;
+}
+
+static int amf_copy_surface(AVCodecContext *avctx, const AVFrame *frame,
+    AMFSurface* surface)
+{
+    AMFPlane *plane;
+    uint8_t  *dst_data[4];
+    int       dst_linesize[4];
+    int       planes;
+    int       i;
+
+    planes = surface->pVtbl->GetPlanesCount(surface);
+    av_assert0(planes < FF_ARRAY_ELEMS(dst_data));
+
+    for (i = 0; i < planes; i++) {
+        plane = surface->pVtbl->GetPlaneAt(surface, i);
+        dst_data[i] = plane->pVtbl->GetNative(plane);
+        dst_linesize[i] = plane->pVtbl->GetHPitch(plane);
+    }
+    av_image_copy(dst_data, dst_linesize,
+        (const uint8_t**)frame->data, frame->linesize, frame->format,
+        avctx->width, avctx->height);
+
+    return 0;
+}
+
+static inline int timestamp_queue_enqueue(AVCodecContext *avctx, int64_t timestamp)
+{
+    AmfContext         *ctx = avctx->priv_data;
+    if (av_fifo_space(ctx->timestamp_list) < sizeof(timestamp)) {
+        if (av_fifo_grow(ctx->timestamp_list, sizeof(timestamp)) < 0) {
+            return AVERROR(ENOMEM);
+        }
+    }
+    av_fifo_generic_write(ctx->timestamp_list, &timestamp, sizeof(timestamp), NULL);
+    return 0;
+}
+
+static int amf_copy_buffer(AVCodecContext *avctx, AVPacket *pkt, AMFBuffer *buffer)
+{
+    AmfContext      *ctx = avctx->priv_data;
+    int              ret;
+    AMFVariantStruct var = {0};
+    int64_t          timestamp = AV_NOPTS_VALUE;
+    int64_t          size = buffer->pVtbl->GetSize(buffer);
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, size, 0)) < 0) {
+        return ret;
+    }
+    memcpy(pkt->data, buffer->pVtbl->GetNative(buffer), size);
+
+    switch (avctx->codec->id) {
+        case AV_CODEC_ID_H264:
+            buffer->pVtbl->GetProperty(buffer, AMF_VIDEO_ENCODER_OUTPUT_DATA_TYPE, &var);
+            if(var.int64Value == AMF_VIDEO_ENCODER_OUTPUT_DATA_TYPE_IDR) {
+                pkt->flags = AV_PKT_FLAG_KEY;
+            }
+            break;
+        case AV_CODEC_ID_HEVC:
+            buffer->pVtbl->GetProperty(buffer, AMF_VIDEO_ENCODER_HEVC_OUTPUT_DATA_TYPE, &var);
+            if (var.int64Value == AMF_VIDEO_ENCODER_HEVC_OUTPUT_DATA_TYPE_IDR) {
+                pkt->flags = AV_PKT_FLAG_KEY;
+            }
+            break;
+        default:
+            break;
+    }
+
+    buffer->pVtbl->GetProperty(buffer, PTS_PROP, &var);
+
+    pkt->pts = var.int64Value; // original pts
+
+
+    AMF_RETURN_IF_FALSE(ctx, av_fifo_size(ctx->timestamp_list) > 0, AVERROR_UNKNOWN, "timestamp_list is empty\n");
+
+    av_fifo_generic_read(ctx->timestamp_list, &timestamp, sizeof(timestamp), NULL);
+
+    // calc dts shift if max_b_frames > 0
+    if (avctx->max_b_frames > 0 && ctx->dts_delay == 0) {
+        int64_t timestamp_last = AV_NOPTS_VALUE;
+        AMF_RETURN_IF_FALSE(ctx, av_fifo_size(ctx->timestamp_list) > 0, AVERROR_UNKNOWN,
+            "timestamp_list is empty while max_b_frames = %d\n", avctx->max_b_frames);
+        av_fifo_generic_peek_at(
+            ctx->timestamp_list,
+            &timestamp_last,
+            (av_fifo_size(ctx->timestamp_list) / sizeof(timestamp) - 1) * sizeof(timestamp_last),
+            sizeof(timestamp_last),
+            NULL);
+        if (timestamp < 0 || timestamp_last < AV_NOPTS_VALUE) {
+            return AVERROR(ERANGE);
+        }
+        ctx->dts_delay = timestamp_last - timestamp;
+    }
+    pkt->dts = timestamp - ctx->dts_delay;
+    return 0;
+}
+
+// amfenc API implementation
+int ff_amf_encode_init(AVCodecContext *avctx)
+{
+    int ret;
+
+    if ((ret = amf_load_library(avctx)) == 0) {
+        if ((ret = amf_init_context(avctx)) == 0) {
+            if ((ret = amf_init_encoder(avctx)) == 0) {
+                return 0;
+            }
+        }
+    }
+    ff_amf_encode_close(avctx);
+    return ret;
+}
+
+static AMF_RESULT amf_set_property_buffer(AMFSurface *object, const wchar_t *name, AMFBuffer *val)
+{
+    AMF_RESULT res;
+    AMFVariantStruct var;
+    res = AMFVariantInit(&var);
+    if (res == AMF_OK) {
+        AMFGuid guid_AMFInterface = IID_AMFInterface();
+        AMFInterface *amf_interface;
+        res = val->pVtbl->QueryInterface(val, &guid_AMFInterface, (void**)&amf_interface);
+
+        if (res == AMF_OK) {
+            res = AMFVariantAssignInterface(&var, amf_interface);
+            amf_interface->pVtbl->Release(amf_interface);
+        }
+        if (res == AMF_OK) {
+            res = object->pVtbl->SetProperty(object, name, var);
+        }
+        AMFVariantClear(&var);
+    }
+    return res;
+}
+
+static AMF_RESULT amf_get_property_buffer(AMFData *object, const wchar_t *name, AMFBuffer **val)
+{
+    AMF_RESULT res;
+    AMFVariantStruct var;
+    res = AMFVariantInit(&var);
+    if (res == AMF_OK) {
+        res = object->pVtbl->GetProperty(object, name, &var);
+        if (res == AMF_OK) {
+            if (var.type == AMF_VARIANT_INTERFACE) {
+                AMFGuid guid_AMFBuffer = IID_AMFBuffer();
+                AMFInterface *amf_interface = AMFVariantInterface(&var);
+                res = amf_interface->pVtbl->QueryInterface(amf_interface, &guid_AMFBuffer, (void**)val);
+            } else {
+                res = AMF_INVALID_DATA_TYPE;
+            }
+        }
+        AMFVariantClear(&var);
+    }
+    return res;
+}
+
+static AMFBuffer *amf_create_buffer_with_frame_ref(const AVFrame *frame, AMFContext *context)
+{
+    AVFrame *frame_ref;
+    AMFBuffer *frame_ref_storage_buffer = NULL;
+    AMF_RESULT res;
+
+    res = context->pVtbl->AllocBuffer(context, AMF_MEMORY_HOST, sizeof(frame_ref), &frame_ref_storage_buffer);
+    if (res == AMF_OK) {
+        frame_ref = av_frame_clone(frame);
+        if (frame_ref) {
+            memcpy(frame_ref_storage_buffer->pVtbl->GetNative(frame_ref_storage_buffer), &frame_ref, sizeof(frame_ref));
+        } else {
+            frame_ref_storage_buffer->pVtbl->Release(frame_ref_storage_buffer);
+            frame_ref_storage_buffer = NULL;
+        }
+    }
+    return frame_ref_storage_buffer;
+}
+
+static void amf_release_buffer_with_frame_ref(AMFBuffer *frame_ref_storage_buffer)
+{
+    AVFrame *frame_ref;
+    memcpy(&frame_ref, frame_ref_storage_buffer->pVtbl->GetNative(frame_ref_storage_buffer), sizeof(frame_ref));
+    av_frame_free(&frame_ref);
+    frame_ref_storage_buffer->pVtbl->Release(frame_ref_storage_buffer);
+}
+
+int ff_amf_send_frame(AVCodecContext *avctx, const AVFrame *frame)
+{
+    AmfContext *ctx = avctx->priv_data;
+    AMFSurface *surface;
+    AMF_RESULT  res;
+    int         ret;
+
+    if (!ctx->encoder)
+        return AVERROR(EINVAL);
+
+    if (!frame) { // submit drain
+        if (!ctx->eof) { // submit drain one time only
+            if (ctx->delayed_surface != NULL) {
+                ctx->delayed_drain = 1; // input queue is full: resubmit Drain() in ff_amf_receive_packet
+            } else if(!ctx->delayed_drain) {
+                res = ctx->encoder->pVtbl->Drain(ctx->encoder);
+                if (res == AMF_INPUT_FULL) {
+                    ctx->delayed_drain = 1; // input queue is full: resubmit Drain() in ff_amf_receive_packet
+                } else {
+                    if (res == AMF_OK) {
+                        ctx->eof = 1; // drain started
+                    }
+                    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "Drain() failed with error %d\n", res);
+                }
+            }
+        } else{
+            return AVERROR_EOF;
+        }
+    } else { // submit frame
+        int hw_surface = 0;
+
+        if (ctx->delayed_surface != NULL) {
+            return AVERROR(EAGAIN); // should not happen when called from ffmpeg, other clients may resubmit
+        }
+        // prepare surface from frame
+        switch (frame->format) {
+#if CONFIG_D3D11VA
+        case AV_PIX_FMT_D3D11:
+            {
+                static const GUID AMFTextureArrayIndexGUID = { 0x28115527, 0xe7c3, 0x4b66, { 0x99, 0xd3, 0x4f, 0x2a, 0xe6, 0xb4, 0x7f, 0xaf } };
+                ID3D11Texture2D *texture = (ID3D11Texture2D*)frame->data[0]; // actual texture
+                int index = (intptr_t)frame->data[1]; // index is a slice in texture array is - set to tell AMF which slice to use
+
+                av_assert0(frame->hw_frames_ctx       && ctx->hw_frames_ctx &&
+                           frame->hw_frames_ctx->data == ctx->hw_frames_ctx->data);
+
+                texture->lpVtbl->SetPrivateData(texture, &AMFTextureArrayIndexGUID, sizeof(index), &index);
+
+                res = ctx->context->pVtbl->CreateSurfaceFromDX11Native(ctx->context, texture, &surface, NULL); // wrap to AMF surface
+                AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR(ENOMEM), "CreateSurfaceFromDX11Native() failed  with error %d\n", res);
+
+                hw_surface = 1;
+            }
+            break;
+#endif
+#if CONFIG_DXVA2
+        case AV_PIX_FMT_DXVA2_VLD:
+            {
+                IDirect3DSurface9 *texture = (IDirect3DSurface9 *)frame->data[3]; // actual texture
+
+                res = ctx->context->pVtbl->CreateSurfaceFromDX9Native(ctx->context, texture, &surface, NULL); // wrap to AMF surface
+                AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR(ENOMEM), "CreateSurfaceFromDX9Native() failed  with error %d\n", res);
+
+                hw_surface = 1;
+            }
+            break;
+#endif
+        default:
+            {
+                res = ctx->context->pVtbl->AllocSurface(ctx->context, AMF_MEMORY_HOST, ctx->format, avctx->width, avctx->height, &surface);
+                AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR(ENOMEM), "AllocSurface() failed  with error %d\n", res);
+                amf_copy_surface(avctx, frame, surface);
+            }
+            break;
+        }
+
+        if (hw_surface) {
+            AMFBuffer *frame_ref_storage_buffer;
+
+            // input HW surfaces can be vertically aligned by 16; tell AMF the real size
+            surface->pVtbl->SetCrop(surface, 0, 0, frame->width, frame->height);
+
+            frame_ref_storage_buffer = amf_create_buffer_with_frame_ref(frame, ctx->context);
+            AMF_RETURN_IF_FALSE(ctx, frame_ref_storage_buffer != NULL, AVERROR(ENOMEM), "create_buffer_with_frame_ref() returned NULL\n");
+
+            res = amf_set_property_buffer(surface, L"av_frame_ref", frame_ref_storage_buffer);
+            AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "SetProperty failed for \"av_frame_ref\" with error %d\n", res);
+            ctx->hwsurfaces_in_queue++;
+            frame_ref_storage_buffer->pVtbl->Release(frame_ref_storage_buffer);
+        }
+
+        surface->pVtbl->SetPts(surface, frame->pts);
+        AMF_ASSIGN_PROPERTY_INT64(res, surface, PTS_PROP, frame->pts);
+
+        switch (avctx->codec->id) {
+        case AV_CODEC_ID_H264:
+            AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_INSERT_AUD, !!ctx->aud);
+            break;
+        case AV_CODEC_ID_HEVC:
+            AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_HEVC_INSERT_AUD, !!ctx->aud);
+            break;
+        default:
+            break;
+        }
+
+
+        // submit surface
+        res = ctx->encoder->pVtbl->SubmitInput(ctx->encoder, (AMFData*)surface);
+        if (res == AMF_INPUT_FULL) { // handle full queue
+            //store surface for later submission
+            ctx->delayed_surface = surface;
+            if (surface->pVtbl->GetMemoryType(surface) == AMF_MEMORY_DX11) {
+                av_frame_ref(ctx->delayed_frame, frame);
+            }
+        } else {
+            surface->pVtbl->Release(surface);
+            AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "SubmitInput() failed with error %d\n", res);
+
+            if ((ret = timestamp_queue_enqueue(avctx, frame->pts)) < 0) {
+                return ret;
+            }
+
+        }
+    }
+    return 0;
+}
+int ff_amf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
+{
+    int             ret;
+    AMF_RESULT      res;
+    AMF_RESULT      res_query;
+    AmfContext     *ctx = avctx->priv_data;
+    AMFData        *data = NULL;
+    int             block_and_wait;
+
+    if (!ctx->encoder)
+        return AVERROR(EINVAL);
+
+    do {
+        block_and_wait = 0;
+        // poll data
+        res_query = ctx->encoder->pVtbl->QueryOutput(ctx->encoder, &data);
+        if (data) {
+            // copy data to packet
+            AMFBuffer* buffer;
+            AMFGuid guid = IID_AMFBuffer();
+            data->pVtbl->QueryInterface(data, &guid, (void**)&buffer); // query for buffer interface
+            ret = amf_copy_buffer(avctx, avpkt, buffer);
+
+            buffer->pVtbl->Release(buffer);
+
+            if (data->pVtbl->HasProperty(data, L"av_frame_ref")) {
+                AMFBuffer *frame_ref_storage_buffer;
+                res = amf_get_property_buffer(data, L"av_frame_ref", &frame_ref_storage_buffer);
+                AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetProperty failed for \"av_frame_ref\" with error %d\n", res);
+                amf_release_buffer_with_frame_ref(frame_ref_storage_buffer);
+                ctx->hwsurfaces_in_queue--;
+            }
+
+            data->pVtbl->Release(data);
+
+            AMF_RETURN_IF_FALSE(ctx, ret >= 0, ret, "amf_copy_buffer() failed with error %d\n", ret);
+
+            if (ctx->delayed_surface != NULL) { // try to resubmit frame
+                res = ctx->encoder->pVtbl->SubmitInput(ctx->encoder, (AMFData*)ctx->delayed_surface);
+                if (res != AMF_INPUT_FULL) {
+                    int64_t pts = ctx->delayed_surface->pVtbl->GetPts(ctx->delayed_surface);
+                    ctx->delayed_surface->pVtbl->Release(ctx->delayed_surface);
+                    ctx->delayed_surface = NULL;
+                    av_frame_unref(ctx->delayed_frame);
+                    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "Repeated SubmitInput() failed with error %d\n", res);
+
+                    if ((ret = timestamp_queue_enqueue(avctx, pts)) < 0) {
+                        return ret;
+                    }
+                } else {
+                    av_log(avctx, AV_LOG_WARNING, "Data acquired but delayed frame submission got AMF_INPUT_FULL- should not happen\n");
+                }
+            } else if (ctx->delayed_drain) { // try to resubmit drain
+                res = ctx->encoder->pVtbl->Drain(ctx->encoder);
+                if (res != AMF_INPUT_FULL) {
+                    ctx->delayed_drain = 0;
+                    ctx->eof = 1; // drain started
+                    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "Repeated Drain() failed with error %d\n", res);
+                } else {
+                    av_log(avctx, AV_LOG_WARNING, "Data acquired but delayed drain submission got AMF_INPUT_FULL- should not happen\n");
+                }
+            }
+        } else if (ctx->delayed_surface != NULL || ctx->delayed_drain || (ctx->eof && res_query != AMF_EOF) || (ctx->hwsurfaces_in_queue >= ctx->hwsurfaces_in_queue_max)) {
+            block_and_wait = 1;
+            av_usleep(1000); // wait and poll again
+        }
+    } while (block_and_wait);
+
+    if (res_query == AMF_EOF) {
+        ret = AVERROR_EOF;
+    } else if (data == NULL) {
+        ret = AVERROR(EAGAIN);
+    } else {
+        ret = 0;
+    }
+    return ret;
+}

diff --git a/libavcodec/amfenc.h b/libavcodec/amfenc.h
new file mode 100644
index 0000000..b136184
--- /dev/null
+++ b/libavcodec/amfenc.h

@@ -0,0 +1,150 @@
+/*
+* This file is part of FFmpeg.
+*
+* FFmpeg is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* FFmpeg is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with FFmpeg; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef AVCODEC_AMFENC_H
+#define AVCODEC_AMFENC_H
+
+#include <AMF/core/Factory.h>
+
+#include <AMF/components/VideoEncoderVCE.h>
+#include <AMF/components/VideoEncoderHEVC.h>
+
+#include "libavutil/fifo.h"
+
+#include "avcodec.h"
+
+
+/**
+* AMF trace writer callback class
+* Used to capture all AMF logging
+*/
+
+typedef struct AmfTraceWriter {
+    AMFTraceWriterVtbl *vtbl;
+    AVCodecContext     *avctx;
+} AmfTraceWriter;
+
+/**
+* AMF encoder context
+*/
+
+typedef struct AmfContext {
+    AVClass            *avclass;
+    // access to AMF runtime
+    amf_handle          library; ///< handle to DLL library
+    AMFFactory         *factory; ///< pointer to AMF factory
+    AMFDebug           *debug;   ///< pointer to AMF debug interface
+    AMFTrace           *trace;   ///< pointer to AMF trace interface
+
+    amf_uint64          version; ///< version of AMF runtime
+    AmfTraceWriter      tracer;  ///< AMF writer registered with AMF
+    AMFContext         *context; ///< AMF context
+    //encoder
+    AMFComponent       *encoder; ///< AMF encoder object
+    amf_bool            eof;     ///< flag indicating EOF happened
+    AMF_SURFACE_FORMAT  format;  ///< AMF surface format
+
+    AVBufferRef        *hw_device_ctx; ///< pointer to HW accelerator (decoder)
+    AVBufferRef        *hw_frames_ctx; ///< pointer to HW accelerator (frame allocator)
+
+    int                 hwsurfaces_in_queue;
+    int                 hwsurfaces_in_queue_max;
+
+    // helpers to handle async calls
+    int                 delayed_drain;
+    AMFSurface         *delayed_surface;
+    AVFrame            *delayed_frame;
+
+    // shift dts back by max_b_frames in timing
+    AVFifoBuffer       *timestamp_list;
+    int64_t             dts_delay;
+
+    // common encoder option options
+
+    int                 log_to_dbg;
+
+    // Static options, have to be set before Init() call
+    int                 usage;
+    int                 profile;
+    int                 level;
+    int                 preanalysis;
+    int                 quality;
+    int                 b_frame_delta_qp;
+    int                 ref_b_frame_delta_qp;
+
+    // Dynamic options, can be set after Init() call
+
+    int                 rate_control_mode;
+    int                 enforce_hrd;
+    int                 filler_data;
+    int                 enable_vbaq;
+    int                 skip_frame;
+    int                 qp_i;
+    int                 qp_p;
+    int                 qp_b;
+    int                 max_au_size;
+    int                 header_spacing;
+    int                 b_frame_ref;
+    int                 intra_refresh_mb;
+    int                 coding_mode;
+    int                 me_half_pel;
+    int                 me_quarter_pel;
+    int                 aud;
+
+    // HEVC - specific options
+
+    int                 gops_per_idr;
+    int                 header_insertion_mode;
+    int                 min_qp_i;
+    int                 max_qp_i;
+    int                 min_qp_p;
+    int                 max_qp_p;
+    int                 tier;
+} AmfContext;
+
+/**
+* Common encoder initization function
+*/
+int ff_amf_encode_init(AVCodecContext *avctx);
+/**
+* Common encoder termination function
+*/
+int ff_amf_encode_close(AVCodecContext *avctx);
+
+/**
+* Ecoding one frame - common function for all AMF encoders
+*/
+
+int ff_amf_send_frame(AVCodecContext *avctx, const AVFrame *frame);
+int ff_amf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt);
+
+/**
+* Supported formats
+*/
+extern const enum AVPixelFormat ff_amf_pix_fmts[];
+
+/**
+* Error handling helper
+*/
+#define AMF_RETURN_IF_FALSE(avctx, exp, ret_value, /*message,*/ ...) \
+    if (!(exp)) { \
+        av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
+        return ret_value; \
+    }
+
+#endif //AVCODEC_AMFENC_H

diff --git a/libavcodec/amfenc_h264.c b/libavcodec/amfenc_h264.c
new file mode 100644
index 0000000..2c082e9
--- /dev/null
+++ b/libavcodec/amfenc_h264.c

@@ -0,0 +1,395 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+#include "amfenc.h"
+#include "internal.h"
+
+#define OFFSET(x) offsetof(AmfContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+
+static const AVOption options[] = {
+    // Static
+    /// Usage
+    { "usage",          "Encoder Usage",        OFFSET(usage),  AV_OPT_TYPE_INT,   { .i64 = AMF_VIDEO_ENCODER_USAGE_TRANSCONDING      }, AMF_VIDEO_ENCODER_USAGE_TRANSCONDING, AMF_VIDEO_ENCODER_USAGE_WEBCAM, VE, "usage" },
+    { "transcoding",    "Generic Transcoding",  0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_USAGE_TRANSCONDING      }, 0, 0, VE, "usage" },
+    { "ultralowlatency","",                     0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_USAGE_ULTRA_LOW_LATENCY }, 0, 0, VE, "usage" },
+    { "lowlatency",     "",                     0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_USAGE_LOW_LATENCY       }, 0, 0, VE, "usage" },
+    { "webcam",         "Webcam",               0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_USAGE_WEBCAM            }, 0, 0, VE, "usage" },
+
+    /// Profile,
+    { "profile",        "Profile",              OFFSET(profile),AV_OPT_TYPE_INT,   { .i64 = AMF_VIDEO_ENCODER_PROFILE_MAIN                 }, AMF_VIDEO_ENCODER_PROFILE_BASELINE, AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_HIGH, VE, "profile" },
+    { "main",           "",                     0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_PROFILE_MAIN                 }, 0, 0, VE, "profile" },
+    { "high",           "",                     0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_PROFILE_HIGH                 }, 0, 0, VE, "profile" },
+    { "constrained_baseline", "",               0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_BASELINE }, 0, 0, VE, "profile" },
+    { "constrained_high",     "",               0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_HIGH     }, 0, 0, VE, "profile" },
+
+    /// Profile Level
+    { "level",          "Profile Level",        OFFSET(level),  AV_OPT_TYPE_INT,   { .i64 = 0  }, 0, 62, VE, "level" },
+    { "auto",           "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 0  }, 0, 0,  VE, "level" },
+    { "1.0",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 10 }, 0, 0,  VE, "level" },
+    { "1.1",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 11 }, 0, 0,  VE, "level" },
+    { "1.2",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 12 }, 0, 0,  VE, "level" },
+    { "1.3",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 13 }, 0, 0,  VE, "level" },
+    { "2.0",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 20 }, 0, 0,  VE, "level" },
+    { "2.1",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 21 }, 0, 0,  VE, "level" },
+    { "2.2",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 22 }, 0, 0,  VE, "level" },
+    { "3.0",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 30 }, 0, 0,  VE, "level" },
+    { "3.1",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 31 }, 0, 0,  VE, "level" },
+    { "3.2",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 32 }, 0, 0,  VE, "level" },
+    { "4.0",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 40 }, 0, 0,  VE, "level" },
+    { "4.1",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 41 }, 0, 0,  VE, "level" },
+    { "4.2",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 42 }, 0, 0,  VE, "level" },
+    { "5.0",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 50 }, 0, 0,  VE, "level" },
+    { "5.1",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 51 }, 0, 0,  VE, "level" },
+    { "5.2",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 52 }, 0, 0,  VE, "level" },
+    { "6.0",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 60 }, 0, 0,  VE, "level" },
+    { "6.1",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 61 }, 0, 0,  VE, "level" },
+    { "6.2",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 62 }, 0, 0,  VE, "level" },
+
+
+    /// Quality Preset
+    { "quality",        "Quality Preference",                   OFFSET(quality),    AV_OPT_TYPE_INT,   { .i64 = AMF_VIDEO_ENCODER_QUALITY_PRESET_SPEED    }, AMF_VIDEO_ENCODER_QUALITY_PRESET_BALANCED, AMF_VIDEO_ENCODER_QUALITY_PRESET_QUALITY, VE, "quality" },
+    { "speed",          "Prefer Speed",                         0,                  AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_QUALITY_PRESET_SPEED    },       0, 0, VE, "quality" },
+    { "balanced",       "Balanced",                             0,                  AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_QUALITY_PRESET_BALANCED },    0, 0, VE, "quality" },
+    { "quality",        "Prefer Quality",                       0,                  AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_QUALITY_PRESET_QUALITY  },     0, 0, VE, "quality" },
+
+    // Dynamic
+    /// Rate Control Method
+    { "rc",             "Rate Control Method",                  OFFSET(rate_control_mode), AV_OPT_TYPE_INT,   { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN }, AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN, AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR, VE, "rc" },
+    { "cqp",            "Constant Quantization Parameter",      0,                         AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP             }, 0, 0, VE, "rc" },
+    { "cbr",            "Constant Bitrate",                     0,                         AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR                     }, 0, 0, VE, "rc" },
+    { "vbr_peak",       "Peak Contrained Variable Bitrate",     0,                         AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR    }, 0, 0, VE, "rc" },
+    { "vbr_latency",    "Latency Constrained Variable Bitrate", 0,                         AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR }, 0, 0, VE, "rc" },
+
+    /// Enforce HRD, Filler Data, VBAQ, Frame Skipping
+    { "enforce_hrd",    "Enforce HRD",                          OFFSET(enforce_hrd),        AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+    { "filler_data",    "Filler Data Enable",                   OFFSET(filler_data),        AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+    { "vbaq",           "Enable VBAQ",                          OFFSET(enable_vbaq),        AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+    { "frame_skipping", "Rate Control Based Frame Skip",        OFFSET(skip_frame),         AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+
+    /// QP Values
+    { "qp_i",           "Quantization Parameter for I-Frame",   OFFSET(qp_i),               AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE },
+    { "qp_p",           "Quantization Parameter for P-Frame",   OFFSET(qp_p),               AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE },
+    { "qp_b",           "Quantization Parameter for B-Frame",   OFFSET(qp_b),               AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE },
+
+    /// Pre-Pass, Pre-Analysis, Two-Pass
+    { "preanalysis",    "Pre-Analysis Mode",                    OFFSET(preanalysis),        AV_OPT_TYPE_BOOL,{ .i64 = 0 }, 0, 1, VE, NULL },
+
+    /// Maximum Access Unit Size
+    { "max_au_size",    "Maximum Access Unit Size for rate control (in bits)",   OFFSET(max_au_size),        AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+
+    /// Header Insertion Spacing
+    { "header_spacing", "Header Insertion Spacing",             OFFSET(header_spacing),     AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1000, VE },
+
+    /// B-Frames
+    // BPicturesPattern=bf
+    { "bf_delta_qp",    "B-Picture Delta QP",                   OFFSET(b_frame_delta_qp),   AV_OPT_TYPE_INT,  { .i64 = 4 }, -10, 10, VE },
+    { "bf_ref",         "Enable Reference to B-Frames",         OFFSET(b_frame_ref),        AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE },
+    { "bf_ref_delta_qp","Reference B-Picture Delta QP",         OFFSET(ref_b_frame_delta_qp), AV_OPT_TYPE_INT,  { .i64 = 4 }, -10, 10, VE },
+
+    /// Intra-Refresh
+    { "intra_refresh_mb","Intra Refresh MBs Number Per Slot in Macroblocks",       OFFSET(intra_refresh_mb),    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+
+    /// coder
+    { "coder",          "Coding Type",                          OFFSET(coding_mode),   AV_OPT_TYPE_INT,   { .i64 = AMF_VIDEO_ENCODER_UNDEFINED }, AMF_VIDEO_ENCODER_UNDEFINED, AMF_VIDEO_ENCODER_CALV, VE, "coder" },
+    { "auto",           "Automatic",                            0,                     AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_UNDEFINED }, 0, 0, VE, "coder" },
+    { "cavlc",          "Context Adaptive Variable-Length Coding", 0,                  AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_CALV },      0, 0, VE, "coder" },
+    { "cabac",          "Context Adaptive Binary Arithmetic Coding", 0,                AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_CABAC },     0, 0, VE, "coder" },
+
+    { "me_half_pel",    "Enable ME Half Pixel",                 OFFSET(me_half_pel),   AV_OPT_TYPE_BOOL,  { .i64 = 1 }, 0, 1, VE },
+    { "me_quarter_pel", "Enable ME Quarter Pixel",              OFFSET(me_quarter_pel),AV_OPT_TYPE_BOOL,  { .i64 = 1 }, 0, 1, VE },
+
+    { "aud",            "Inserts AU Delimiter NAL unit",        OFFSET(aud)          ,AV_OPT_TYPE_BOOL,  { .i64 = 0 }, 0, 1, VE },
+
+    { "log_to_dbg",     "Enable AMF logging to debug output",   OFFSET(log_to_dbg)    , AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+
+    { NULL }
+};
+
+static av_cold int amf_encode_init_h264(AVCodecContext *avctx)
+{
+    int                              ret = 0;
+    AMF_RESULT                       res = AMF_OK;
+    AmfContext                      *ctx = avctx->priv_data;
+    AMFVariantStruct                 var = { 0 };
+    amf_int64                        profile = 0;
+    amf_int64                        profile_level = 0;
+    AMFBuffer                       *buffer;
+    AMFGuid                          guid;
+    AMFRate                          framerate;
+    AMFSize                          framesize = AMFConstructSize(avctx->width, avctx->height);
+    int                              deblocking_filter = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
+
+    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
+        framerate = AMFConstructRate(avctx->framerate.num, avctx->framerate.den);
+    } else {
+        framerate = AMFConstructRate(avctx->time_base.den, avctx->time_base.num * avctx->ticks_per_frame);
+    }
+
+    if ((ret = ff_amf_encode_init(avctx)) != 0)
+        return ret;
+
+    // Static parameters
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_USAGE, ctx->usage);
+
+    AMF_ASSIGN_PROPERTY_SIZE(res, ctx->encoder, AMF_VIDEO_ENCODER_FRAMESIZE, framesize);
+
+    AMF_ASSIGN_PROPERTY_RATE(res, ctx->encoder, AMF_VIDEO_ENCODER_FRAMERATE, framerate);
+
+    switch (avctx->profile) {
+    case FF_PROFILE_H264_BASELINE:
+        profile = AMF_VIDEO_ENCODER_PROFILE_BASELINE;
+        break;
+    case FF_PROFILE_H264_MAIN:
+        profile = AMF_VIDEO_ENCODER_PROFILE_MAIN;
+        break;
+    case FF_PROFILE_H264_HIGH:
+        profile = AMF_VIDEO_ENCODER_PROFILE_HIGH;
+        break;
+    case FF_PROFILE_H264_CONSTRAINED_BASELINE:
+        profile = AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_BASELINE;
+        break;
+    case (FF_PROFILE_H264_HIGH | FF_PROFILE_H264_CONSTRAINED):
+        profile = AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_HIGH;
+        break;
+    }
+    if (profile == 0) {
+        profile = ctx->profile;
+    }
+
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PROFILE, profile);
+
+    profile_level = avctx->level;
+    if (profile_level == FF_LEVEL_UNKNOWN) {
+        profile_level = ctx->level;
+    }
+    if (profile_level != 0) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PROFILE_LEVEL, profile_level);
+    }
+
+    // Maximum Reference Frames
+    if (avctx->refs != -1) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_NUM_REFRAMES, avctx->refs);
+    }
+    if (avctx->sample_aspect_ratio.den && avctx->sample_aspect_ratio.num) {
+        AMFRatio ratio = AMFConstructRatio(avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
+        AMF_ASSIGN_PROPERTY_RATIO(res, ctx->encoder, AMF_VIDEO_ENCODER_ASPECT_RATIO, ratio);
+    }
+
+    /// Color Range (Partial/TV/MPEG or Full/PC/JPEG)
+    if (avctx->color_range == AVCOL_RANGE_JPEG) {
+        AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_FULL_RANGE_COLOR, 1);
+    }
+
+    // autodetect rate control method
+    if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN) {
+        if (ctx->qp_i != -1 || ctx->qp_p != -1 || ctx->qp_b != -1) {
+            ctx->rate_control_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP;
+            av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CQP\n");
+        } else if (avctx->rc_max_rate > 0 ) {
+            ctx->rate_control_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR;
+            av_log(ctx, AV_LOG_DEBUG, "Rate control turned to Peak VBR\n");
+        } else {
+            ctx->rate_control_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR;
+            av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CBR\n");
+        }
+    }
+
+    if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_RATE_CONTROL_PREANALYSIS_ENABLE, AMF_VIDEO_ENCODER_PREENCODE_DISABLED);
+        if (ctx->preanalysis)
+            av_log(ctx, AV_LOG_WARNING, "Pre-Analysis is not supported by cqp Rate Control Method, automatically disabled\n");
+    } else {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_RATE_CONTROL_PREANALYSIS_ENABLE, ctx->preanalysis);
+    }
+
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_QUALITY_PRESET, ctx->quality);
+
+    // Dynamic parmaters
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD, ctx->rate_control_mode);
+
+    /// VBV Buffer
+    if (avctx->rc_buffer_size != 0) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_VBV_BUFFER_SIZE, avctx->rc_buffer_size);
+        if (avctx->rc_initial_buffer_occupancy != 0) {
+            int amf_buffer_fullness = avctx->rc_initial_buffer_occupancy * 64 / avctx->rc_buffer_size;
+            if (amf_buffer_fullness > 64)
+                amf_buffer_fullness = 64;
+            AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_INITIAL_VBV_BUFFER_FULLNESS, amf_buffer_fullness);
+        }
+    }
+    /// Maximum Access Unit Size
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_AU_SIZE, ctx->max_au_size);
+
+    if (ctx->max_au_size)
+        ctx->enforce_hrd = 1;
+
+    // QP Minimum / Maximum
+    if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MIN_QP, 0);
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_QP, 51);
+    } else {
+        if (avctx->qmin != -1) {
+            int qval = avctx->qmin > 51 ? 51 : avctx->qmin;
+            AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MIN_QP, qval);
+        }
+        if (avctx->qmax != -1) {
+            int qval = avctx->qmax > 51 ? 51 : avctx->qmax;
+            AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_QP, qval);
+        }
+    }
+    // QP Values
+    if (ctx->qp_i != -1)
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_QP_I, ctx->qp_i);
+    if (ctx->qp_p != -1)
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_QP_P, ctx->qp_p);
+    if (ctx->qp_b != -1)
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_QP_B, ctx->qp_b);
+
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_TARGET_BITRATE, avctx->bit_rate);
+
+    if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PEAK_BITRATE, avctx->bit_rate);
+    }
+    if (avctx->rc_max_rate) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PEAK_BITRATE, avctx->rc_max_rate);
+    } else if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR) {
+        av_log(ctx, AV_LOG_WARNING, "rate control mode is PEAK_CONSTRAINED_VBR but rc_max_rate is not set\n");
+    }
+
+    // Initialize Encoder
+    res = ctx->encoder->pVtbl->Init(ctx->encoder, ctx->format, avctx->width, avctx->height);
+    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "encoder->Init() failed with error %d\n", res);
+
+    // Enforce HRD, Filler Data, VBAQ, Frame Skipping, Deblocking Filter
+    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_ENFORCE_HRD, !!ctx->enforce_hrd);
+    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_FILLER_DATA_ENABLE, !!ctx->filler_data);
+    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_RATE_CONTROL_SKIP_FRAME_ENABLE, !!ctx->skip_frame);
+    if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP) {
+        AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_ENABLE_VBAQ, 0);
+        if (ctx->enable_vbaq)
+            av_log(ctx, AV_LOG_WARNING, "VBAQ is not supported by cqp Rate Control Method, automatically disabled\n");
+    } else {
+        AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_ENABLE_VBAQ, !!ctx->enable_vbaq);
+    }
+    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_DE_BLOCKING_FILTER, !!deblocking_filter);
+
+    // B-Frames
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_B_PIC_PATTERN, avctx->max_b_frames);
+    if (res != AMF_OK) {
+        res = ctx->encoder->pVtbl->GetProperty(ctx->encoder, AMF_VIDEO_ENCODER_B_PIC_PATTERN, &var);
+        av_log(ctx, AV_LOG_WARNING, "B-frames=%d is not supported by this GPU, switched to %d\n",
+            avctx->max_b_frames, (int)var.int64Value);
+        avctx->max_b_frames = (int)var.int64Value;
+    }
+    if (avctx->max_b_frames) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_B_PIC_DELTA_QP, ctx->b_frame_delta_qp);
+        AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_B_REFERENCE_ENABLE, !!ctx->b_frame_ref);
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_REF_B_PIC_DELTA_QP, ctx->ref_b_frame_delta_qp);
+    }
+
+    // Keyframe Interval
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_IDR_PERIOD, avctx->gop_size);
+
+    // Header Insertion Spacing
+    if (ctx->header_spacing >= 0)
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEADER_INSERTION_SPACING, ctx->header_spacing);
+
+    // Intra-Refresh, Slicing
+    if (ctx->intra_refresh_mb > 0)
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_INTRA_REFRESH_NUM_MBS_PER_SLOT, ctx->intra_refresh_mb);
+    if (avctx->slices > 1)
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_SLICES_PER_FRAME, avctx->slices);
+
+    // Coding
+    if (ctx->coding_mode != 0)
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_CABAC_ENABLE, ctx->coding_mode);
+
+    // Motion Estimation
+    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_MOTION_HALF_PIXEL, !!ctx->me_half_pel);
+    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_MOTION_QUARTERPIXEL, !!ctx->me_quarter_pel);
+
+    // fill extradata
+    res = AMFVariantInit(&var);
+    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "AMFVariantInit() failed with error %d\n", res);
+
+    res = ctx->encoder->pVtbl->GetProperty(ctx->encoder, AMF_VIDEO_ENCODER_EXTRADATA, &var);
+    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "GetProperty(AMF_VIDEO_ENCODER_EXTRADATA) failed with error %d\n", res);
+    AMF_RETURN_IF_FALSE(ctx, var.pInterface != NULL, AVERROR_BUG, "GetProperty(AMF_VIDEO_ENCODER_EXTRADATA) returned NULL\n");
+
+    guid = IID_AMFBuffer();
+
+    res = var.pInterface->pVtbl->QueryInterface(var.pInterface, &guid, (void**)&buffer); // query for buffer interface
+    if (res != AMF_OK) {
+        var.pInterface->pVtbl->Release(var.pInterface);
+    }
+    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "QueryInterface(IID_AMFBuffer) failed with error %d\n", res);
+
+    avctx->extradata_size = (int)buffer->pVtbl->GetSize(buffer);
+    avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!avctx->extradata) {
+        buffer->pVtbl->Release(buffer);
+        var.pInterface->pVtbl->Release(var.pInterface);
+        return AVERROR(ENOMEM);
+    }
+    memcpy(avctx->extradata, buffer->pVtbl->GetNative(buffer), avctx->extradata_size);
+
+    buffer->pVtbl->Release(buffer);
+    var.pInterface->pVtbl->Release(var.pInterface);
+
+    return 0;
+}
+
+static const AVCodecDefault defaults[] = {
+    { "refs",       "-1"  },
+    { "aspect",     "0"   },
+    { "qmin",       "-1"  },
+    { "qmax",       "-1"  },
+    { "b",          "2M"  },
+    { "g",          "250" },
+    { "slices",     "1"   },
+    { NULL                },
+};
+
+static const AVClass h264_amf_class = {
+    .class_name = "h264_amf",
+    .item_name = av_default_item_name,
+    .option = options,
+    .version = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_h264_amf_encoder = {
+    .name           = "h264_amf",
+    .long_name      = NULL_IF_CONFIG_SMALL("AMD AMF H.264 Encoder"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_H264,
+    .init           = amf_encode_init_h264,
+    .send_frame     = ff_amf_send_frame,
+    .receive_packet = ff_amf_receive_packet,
+    .close          = ff_amf_encode_close,
+    .priv_data_size = sizeof(AmfContext),
+    .priv_class     = &h264_amf_class,
+    .defaults       = defaults,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+    .pix_fmts       = ff_amf_pix_fmts,
+    .wrapper_name   = "amf",
+};

diff --git a/libavcodec/amfenc_hevc.c b/libavcodec/amfenc_hevc.c
new file mode 100644
index 0000000..7c9a33a
--- /dev/null
+++ b/libavcodec/amfenc_hevc.c

@@ -0,0 +1,326 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+#include "amfenc.h"
+#include "internal.h"
+
+#define OFFSET(x) offsetof(AmfContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "usage",          "Set the encoding usage",             OFFSET(usage),          AV_OPT_TYPE_INT,   { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCONDING }, AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCONDING, AMF_VIDEO_ENCODER_HEVC_USAGE_WEBCAM, VE, "usage" },
+    { "transcoding",    "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCONDING },         0, 0, VE, "usage" },
+    { "ultralowlatency","", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_ULTRA_LOW_LATENCY },    0, 0, VE, "usage" },
+    { "lowlatency",     "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_LOW_LATENCY },          0, 0, VE, "usage" },
+    { "webcam",         "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_WEBCAM },               0, 0, VE, "usage" },
+
+    { "profile",        "Set the profile (default main)",           OFFSET(profile),   AV_OPT_TYPE_INT,{ .i64 = AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN }, AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN, AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN, VE, "profile" },
+    { "main",           "", 0,                      AV_OPT_TYPE_CONST,{ .i64 = AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN }, 0, 0, VE, "profile" },
+
+    { "profile_tier",   "Set the profile tier (default main)",      OFFSET(tier), AV_OPT_TYPE_INT,{ .i64 = AMF_VIDEO_ENCODER_HEVC_TIER_MAIN }, AMF_VIDEO_ENCODER_HEVC_TIER_MAIN, AMF_VIDEO_ENCODER_HEVC_TIER_HIGH, VE, "tier" },
+    { "main",           "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_TIER_MAIN }, 0, 0, VE, "tier" },
+    { "high",           "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_TIER_HIGH }, 0, 0, VE, "tier" },
+
+    { "level",          "Set the encoding level (default auto)",    OFFSET(level), AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, AMF_LEVEL_6_2, VE, "level" },
+    { "auto",           "", 0, AV_OPT_TYPE_CONST, { .i64 = 0             }, 0, 0, VE, "level" },
+    { "1.0",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_1   }, 0, 0, VE, "level" },
+    { "2.0",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_2   }, 0, 0, VE, "level" },
+    { "2.1",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_2_1 }, 0, 0, VE, "level" },
+    { "3.0",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_3   }, 0, 0, VE, "level" },
+    { "3.1",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_3_1 }, 0, 0, VE, "level" },
+    { "4.0",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_4   }, 0, 0, VE, "level" },
+    { "4.1",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_4_1 }, 0, 0, VE, "level" },
+    { "5.0",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_5   }, 0, 0, VE, "level" },
+    { "5.1",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_5_1 }, 0, 0, VE, "level" },
+    { "5.2",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_5_2 }, 0, 0, VE, "level" },
+    { "6.0",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_6   }, 0, 0, VE, "level" },
+    { "6.1",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_6_1 }, 0, 0, VE, "level" },
+    { "6.2",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_6_2 }, 0, 0, VE, "level" },
+
+    { "quality",        "Set the encoding quality",                 OFFSET(quality),      AV_OPT_TYPE_INT,   { .i64 = AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_SPEED }, AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_QUALITY, AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_SPEED, VE, "quality" },
+    { "balanced",       "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_BALANCED }, 0, 0, VE, "quality" },
+    { "speed",          "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_SPEED    }, 0, 0, VE, "quality" },
+    { "quality",        "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_QUALITY  }, 0, 0, VE, "quality" },
+
+    { "rc",             "Set the rate control mode",            OFFSET(rate_control_mode), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_UNKNOWN }, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_UNKNOWN, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR, VE, "rc" },
+    { "cqp",            "Constant Quantization Parameter",      0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP             }, 0, 0, VE, "rc" },
+    { "cbr",            "Constant Bitrate",                     0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR                     }, 0, 0, VE, "rc" },
+    { "vbr_peak",       "Peak Contrained Variable Bitrate",     0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR    }, 0, 0, VE, "rc" },
+    { "vbr_latency",    "Latency Constrained Variable Bitrate", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR }, 0, 0, VE, "rc" },
+
+    { "header_insertion_mode",        "Set header insertion mode",  OFFSET(header_insertion_mode),      AV_OPT_TYPE_INT,{ .i64 = AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_NONE }, AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_NONE, AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_IDR_ALIGNED, VE, "hdrmode" },
+    { "none",           "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_NONE        }, 0, 0, VE, "hdrmode" },
+    { "gop",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_GOP_ALIGNED }, 0, 0, VE, "hdrmode" },
+    { "idr",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_IDR_ALIGNED }, 0, 0, VE, "hdrmode" },
+
+    { "gops_per_idr",    "GOPs per IDR 0-no IDR will be inserted",  OFFSET(gops_per_idr),  AV_OPT_TYPE_INT,  { .i64 = 60 },  0, INT_MAX, VE },
+    { "preanalysis",    "Enable preanalysis",                       OFFSET(preanalysis),   AV_OPT_TYPE_BOOL, { .i64 = 0  },  0, 1, VE},
+    { "vbaq",           "Enable VBAQ",                              OFFSET(enable_vbaq),   AV_OPT_TYPE_BOOL, { .i64 = 0  },  0, 1, VE},
+    { "enforce_hrd",    "Enforce HRD",                              OFFSET(enforce_hrd),   AV_OPT_TYPE_BOOL, { .i64 = 0  },  0, 1, VE},
+    { "filler_data",    "Filler Data Enable",                       OFFSET(filler_data),   AV_OPT_TYPE_BOOL, { .i64 = 0  },  0, 1, VE},
+    { "max_au_size",    "Maximum Access Unit Size for rate control (in bits)", OFFSET(max_au_size),   AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, INT_MAX, VE},
+    { "min_qp_i",       "min quantization parameter for I-frame",   OFFSET(min_qp_i),      AV_OPT_TYPE_INT, { .i64 = -1  }, -1, 51, VE },
+    { "max_qp_i",       "max quantization parameter for I-frame",   OFFSET(max_qp_i),      AV_OPT_TYPE_INT, { .i64 = -1  }, -1, 51, VE },
+    { "min_qp_p",       "min quantization parameter for P-frame",   OFFSET(min_qp_p),      AV_OPT_TYPE_INT, { .i64 = -1  }, -1, 51, VE },
+    { "max_qp_p",       "max quantization parameter for P-frame",   OFFSET(max_qp_p),      AV_OPT_TYPE_INT, { .i64 = -1  }, -1, 51, VE },
+    { "qp_p",           "quantization parameter for P-frame",       OFFSET(qp_p),          AV_OPT_TYPE_INT, { .i64 = -1  }, -1, 51, VE },
+    { "qp_i",           "quantization parameter for I-frame",       OFFSET(qp_i),          AV_OPT_TYPE_INT, { .i64 = -1  }, -1, 51, VE },
+    { "skip_frame",     "Rate Control Based Frame Skip",            OFFSET(skip_frame),    AV_OPT_TYPE_BOOL,{ .i64 = 0   },  0, 1, VE },
+    { "me_half_pel",    "Enable ME Half Pixel",                     OFFSET(me_half_pel),   AV_OPT_TYPE_BOOL,{ .i64 = 1   },  0, 1, VE },
+    { "me_quarter_pel", "Enable ME Quarter Pixel ",                 OFFSET(me_quarter_pel),AV_OPT_TYPE_BOOL,{ .i64 = 1   },  0, 1, VE },
+
+    { "aud",            "Inserts AU Delimiter NAL unit",            OFFSET(aud)           ,AV_OPT_TYPE_BOOL,{ .i64 = 0 }, 0, 1, VE },
+
+    { "log_to_dbg",     "Enable AMF logging to debug output",   OFFSET(log_to_dbg), AV_OPT_TYPE_BOOL,{ .i64 = 0 }, 0, 1, VE },
+    { NULL }
+};
+
+static av_cold int amf_encode_init_hevc(AVCodecContext *avctx)
+{
+    int                 ret = 0;
+    AMF_RESULT          res = AMF_OK;
+    AmfContext         *ctx = avctx->priv_data;
+    AMFVariantStruct    var = {0};
+    amf_int64           profile = 0;
+    amf_int64           profile_level = 0;
+    AMFBuffer          *buffer;
+    AMFGuid             guid;
+    AMFRate             framerate;
+    AMFSize             framesize = AMFConstructSize(avctx->width, avctx->height);
+    int                 deblocking_filter = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
+
+    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
+        framerate = AMFConstructRate(avctx->framerate.num, avctx->framerate.den);
+    } else {
+        framerate = AMFConstructRate(avctx->time_base.den, avctx->time_base.num * avctx->ticks_per_frame);
+    }
+
+    if ((ret = ff_amf_encode_init(avctx)) < 0)
+        return ret;
+
+    // init static parameters
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_USAGE, ctx->usage);
+
+    AMF_ASSIGN_PROPERTY_SIZE(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_FRAMESIZE, framesize);
+
+    AMF_ASSIGN_PROPERTY_RATE(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_FRAMERATE, framerate);
+
+    switch (avctx->profile) {
+    case FF_PROFILE_HEVC_MAIN:
+        profile = AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN;
+        break;
+    default:
+        break;
+    }
+    if (profile == 0) {
+        profile = ctx->profile;
+    }
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PROFILE, profile);
+
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_TIER, ctx->tier);
+
+    profile_level = avctx->level;
+    if (profile_level == 0) {
+        profile_level = ctx->level;
+    }
+    if (profile_level != 0) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PROFILE_LEVEL, profile_level);
+    }
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET, ctx->quality);
+    // Maximum Reference Frames
+    if (avctx->refs != 0) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_NUM_REFRAMES, avctx->refs);
+    }
+    // Aspect Ratio
+    if (avctx->sample_aspect_ratio.den && avctx->sample_aspect_ratio.num) {
+        AMFRatio ratio = AMFConstructRatio(avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
+        AMF_ASSIGN_PROPERTY_RATIO(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ASPECT_RATIO, ratio);
+    }
+
+    // Picture control properties
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_NUM_GOPS_PER_IDR, ctx->gops_per_idr);
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_GOP_SIZE, avctx->gop_size);
+    if (avctx->slices > 1) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_SLICES_PER_FRAME, avctx->slices);
+    }
+    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_DE_BLOCKING_FILTER_DISABLE, deblocking_filter);
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE, ctx->header_insertion_mode);
+
+    // Rate control
+    // autodetect rate control method
+    if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_UNKNOWN) {
+        if (ctx->min_qp_i != -1 || ctx->max_qp_i != -1 ||
+            ctx->min_qp_p != -1 || ctx->max_qp_p != -1 ||
+            ctx->qp_i !=-1 || ctx->qp_p != -1) {
+            ctx->rate_control_mode = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP;
+            av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CQP\n");
+        } else if (avctx->rc_max_rate > 0) {
+            ctx->rate_control_mode = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR;
+            av_log(ctx, AV_LOG_DEBUG, "Rate control turned to Peak VBR\n");
+        } else {
+            ctx->rate_control_mode = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR;
+            av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CBR\n");
+        }
+    }
+
+
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD, ctx->rate_control_mode);
+    if (avctx->rc_buffer_size) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_VBV_BUFFER_SIZE, avctx->rc_buffer_size);
+
+        if (avctx->rc_initial_buffer_occupancy != 0) {
+            int amf_buffer_fullness = avctx->rc_initial_buffer_occupancy * 64 / avctx->rc_buffer_size;
+            if (amf_buffer_fullness > 64)
+                amf_buffer_fullness = 64;
+            AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_INITIAL_VBV_BUFFER_FULLNESS, amf_buffer_fullness);
+        }
+    }
+    // Pre-Pass, Pre-Analysis, Two-Pass
+    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_PREANALYSIS_ENABLE, ctx->preanalysis);
+
+    if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP) {
+        AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ENABLE_VBAQ, false);
+        if (ctx->enable_vbaq)
+            av_log(ctx, AV_LOG_WARNING, "VBAQ is not supported by cqp Rate Control Method, automatically disabled\n");
+    } else {
+        AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ENABLE_VBAQ, !!ctx->enable_vbaq);
+    }
+    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MOTION_HALF_PIXEL, ctx->me_half_pel);
+    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MOTION_QUARTERPIXEL, ctx->me_quarter_pel);
+
+    // init dynamic rate control params
+    if (ctx->max_au_size)
+        ctx->enforce_hrd = 1;
+    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ENFORCE_HRD, ctx->enforce_hrd);
+    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_FILLER_DATA_ENABLE, ctx->filler_data);
+
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_TARGET_BITRATE, avctx->bit_rate);
+
+    if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PEAK_BITRATE, avctx->bit_rate);
+    }
+    if (avctx->rc_max_rate) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PEAK_BITRATE, avctx->rc_max_rate);
+    } else if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR) {
+        av_log(ctx, AV_LOG_WARNING, "rate control mode is PEAK_CONSTRAINED_VBR but rc_max_rate is not set\n");
+    }
+
+    // init encoder
+    res = ctx->encoder->pVtbl->Init(ctx->encoder, ctx->format, avctx->width, avctx->height);
+    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "encoder->Init() failed with error %d\n", res);
+
+    // init dynamic picture control params
+    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_AU_SIZE, ctx->max_au_size);
+
+    if (ctx->min_qp_i != -1) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, ctx->min_qp_i);
+    } else if (avctx->qmin != -1) {
+        int qval = avctx->qmin > 51 ? 51 : avctx->qmin;
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, qval);
+    }
+    if (ctx->max_qp_i != -1) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, ctx->max_qp_i);
+    } else if (avctx->qmax != -1) {
+        int qval = avctx->qmax > 51 ? 51 : avctx->qmax;
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, qval);
+    }
+    if (ctx->min_qp_p != -1) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, ctx->min_qp_p);
+    } else if (avctx->qmin != -1) {
+        int qval = avctx->qmin > 51 ? 51 : avctx->qmin;
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, qval);
+    }
+    if (ctx->max_qp_p != -1) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, ctx->max_qp_p);
+    } else if (avctx->qmax != -1) {
+        int qval = avctx->qmax > 51 ? 51 : avctx->qmax;
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, qval);
+    }
+
+    if (ctx->qp_p != -1) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_QP_I, ctx->qp_p);
+    }
+    if (ctx->qp_i != -1) {
+        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_QP_P, ctx->qp_i);
+    }
+    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_SKIP_FRAME_ENABLE, ctx->skip_frame);
+
+
+    // fill extradata
+    res = AMFVariantInit(&var);
+    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "AMFVariantInit() failed with error %d\n", res);
+
+    res = ctx->encoder->pVtbl->GetProperty(ctx->encoder, AMF_VIDEO_ENCODER_HEVC_EXTRADATA, &var);
+    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "GetProperty(AMF_VIDEO_ENCODER_EXTRADATA) failed with error %d\n", res);
+    AMF_RETURN_IF_FALSE(ctx, var.pInterface != NULL, AVERROR_BUG, "GetProperty(AMF_VIDEO_ENCODER_EXTRADATA) returned NULL\n");
+
+    guid = IID_AMFBuffer();
+
+    res = var.pInterface->pVtbl->QueryInterface(var.pInterface, &guid, (void**)&buffer); // query for buffer interface
+    if (res != AMF_OK) {
+        var.pInterface->pVtbl->Release(var.pInterface);
+    }
+    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "QueryInterface(IID_AMFBuffer) failed with error %d\n", res);
+
+    avctx->extradata_size = (int)buffer->pVtbl->GetSize(buffer);
+    avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!avctx->extradata) {
+        buffer->pVtbl->Release(buffer);
+        var.pInterface->pVtbl->Release(var.pInterface);
+        return AVERROR(ENOMEM);
+    }
+    memcpy(avctx->extradata, buffer->pVtbl->GetNative(buffer), avctx->extradata_size);
+
+    buffer->pVtbl->Release(buffer);
+    var.pInterface->pVtbl->Release(var.pInterface);
+
+    return 0;
+}
+static const AVCodecDefault defaults[] = {
+    { "refs",       "-1"  },
+    { "aspect",     "0"   },
+    { "b",          "2M"  },
+    { "g",          "250" },
+    { "slices",     "1"   },
+    { NULL                },
+};
+static const AVClass hevc_amf_class = {
+    .class_name = "hevc_amf",
+    .item_name = av_default_item_name,
+    .option = options,
+    .version = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_hevc_amf_encoder = {
+    .name           = "hevc_amf",
+    .long_name      = NULL_IF_CONFIG_SMALL("AMD AMF HEVC encoder"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_HEVC,
+    .init           = amf_encode_init_hevc,
+    .send_frame     = ff_amf_send_frame,
+    .receive_packet = ff_amf_receive_packet,
+    .close          = ff_amf_encode_close,
+    .priv_data_size = sizeof(AmfContext),
+    .priv_class     = &hevc_amf_class,
+    .defaults       = defaults,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+    .pix_fmts       = ff_amf_pix_fmts,
+    .wrapper_name   = "amf",
+};

diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
index 7f2874d..47fe7eb 100644
--- a/libavcodec/amrwbdec.c
+++ b/libavcodec/amrwbdec.c

@@ -862,15 +862,20 @@
 {
     int wsp = (vad > 0);
     float tilt;
+    float tmp;
 
     if (ctx->fr_cur_mode == MODE_23k85)
         return qua_hb_gain[hb_idx] * (1.0f / (1 << 14));
 
-    tilt = ctx->celpm_ctx.dot_productf(synth, synth + 1, AMRWB_SFR_SIZE - 1) /
-           ctx->celpm_ctx.dot_productf(synth, synth, AMRWB_SFR_SIZE);
+    tmp = ctx->celpm_ctx.dot_productf(synth, synth + 1, AMRWB_SFR_SIZE - 1);
+
+    if (tmp > 0) {
+        tilt = tmp / ctx->celpm_ctx.dot_productf(synth, synth, AMRWB_SFR_SIZE);
+    } else
+        tilt = 0;
 
     /* return gain bounded by [0.1, 1.0] */
-    return av_clipf((1.0 - FFMAX(0.0, tilt)) * (1.25 - 0.25 * wsp), 0.1, 1.0);
+    return av_clipf((1.0 - tilt) * (1.25 - 0.25 * wsp), 0.1, 1.0);
 }
 
 /**

diff --git a/libavcodec/anm.c b/libavcodec/anm.c
index 7268418..ab6a399 100644
--- a/libavcodec/anm.c
+++ b/libavcodec/anm.c

@@ -54,7 +54,7 @@
 
     bytestream2_skipu(&s->gb, 16 * 8);
     for (i = 0; i < 256; i++)
-        s->palette[i] = bytestream2_get_le32u(&s->gb);
+        s->palette[i] = (0xFFU << 24) | bytestream2_get_le32u(&s->gb);
 
     return 0;
 }

diff --git a/libavcodec/aptx.c b/libavcodec/aptx.c
new file mode 100644
index 0000000..8750d84
--- /dev/null
+++ b/libavcodec/aptx.c

@@ -0,0 +1,1162 @@
+/*
+ * Audio Processing Technology codec for Bluetooth (aptX)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "mathops.h"
+#include "audio_frame_queue.h"
+
+
+enum channels {
+    LEFT,
+    RIGHT,
+    NB_CHANNELS
+};
+
+enum subbands {
+    LF,  // Low Frequency (0-5.5 kHz)
+    MLF, // Medium-Low Frequency (5.5-11kHz)
+    MHF, // Medium-High Frequency (11-16.5kHz)
+    HF,  // High Frequency (16.5-22kHz)
+    NB_SUBBANDS
+};
+
+#define NB_FILTERS 2
+#define FILTER_TAPS 16
+
+typedef struct {
+    int pos;
+    int32_t buffer[2*FILTER_TAPS];
+} FilterSignal;
+
+typedef struct {
+    FilterSignal outer_filter_signal[NB_FILTERS];
+    FilterSignal inner_filter_signal[NB_FILTERS][NB_FILTERS];
+} QMFAnalysis;
+
+typedef struct {
+    int32_t quantized_sample;
+    int32_t quantized_sample_parity_change;
+    int32_t error;
+} Quantize;
+
+typedef struct {
+    int32_t quantization_factor;
+    int32_t factor_select;
+    int32_t reconstructed_difference;
+} InvertQuantize;
+
+typedef struct {
+    int32_t prev_sign[2];
+    int32_t s_weight[2];
+    int32_t d_weight[24];
+    int32_t pos;
+    int32_t reconstructed_differences[48];
+    int32_t previous_reconstructed_sample;
+    int32_t predicted_difference;
+    int32_t predicted_sample;
+} Prediction;
+
+typedef struct {
+    int32_t codeword_history;
+    int32_t dither_parity;
+    int32_t dither[NB_SUBBANDS];
+
+    QMFAnalysis qmf;
+    Quantize quantize[NB_SUBBANDS];
+    InvertQuantize invert_quantize[NB_SUBBANDS];
+    Prediction prediction[NB_SUBBANDS];
+} Channel;
+
+typedef struct {
+    int hd;
+    int block_size;
+    int32_t sync_idx;
+    Channel channels[NB_CHANNELS];
+    AudioFrameQueue afq;
+} AptXContext;
+
+
+static const int32_t quantize_intervals_LF[65] = {
+      -9948,    9948,   29860,   49808,   69822,   89926,  110144,  130502,
+     151026,  171738,  192666,  213832,  235264,  256982,  279014,  301384,
+     324118,  347244,  370790,  394782,  419250,  444226,  469742,  495832,
+     522536,  549890,  577936,  606720,  636290,  666700,  698006,  730270,
+     763562,  797958,  833538,  870398,  908640,  948376,  989740, 1032874,
+    1077948, 1125150, 1174700, 1226850, 1281900, 1340196, 1402156, 1468282,
+    1539182, 1615610, 1698514, 1789098, 1888944, 2000168, 2125700, 2269750,
+    2438670, 2642660, 2899462, 3243240, 3746078, 4535138, 5664098, 7102424,
+    8897462,
+};
+static const int32_t invert_quantize_dither_factors_LF[65] = {
+       9948,   9948,   9962,   9988,  10026,  10078,  10142,  10218,
+      10306,  10408,  10520,  10646,  10784,  10934,  11098,  11274,
+      11462,  11664,  11880,  12112,  12358,  12618,  12898,  13194,
+      13510,  13844,  14202,  14582,  14988,  15422,  15884,  16380,
+      16912,  17484,  18098,  18762,  19480,  20258,  21106,  22030,
+      23044,  24158,  25390,  26760,  28290,  30008,  31954,  34172,
+      36728,  39700,  43202,  47382,  52462,  58762,  66770,  77280,
+      91642, 112348, 144452, 199326, 303512, 485546, 643414, 794914,
+    1000124,
+};
+static const int32_t quantize_dither_factors_LF[65] = {
+        0,     4,     7,    10,    13,    16,    19,    22,
+       26,    28,    32,    35,    38,    41,    44,    47,
+       51,    54,    58,    62,    65,    70,    74,    79,
+       84,    90,    95,   102,   109,   116,   124,   133,
+      143,   154,   166,   180,   195,   212,   231,   254,
+      279,   308,   343,   383,   430,   487,   555,   639,
+      743,   876,  1045,  1270,  1575,  2002,  2628,  3591,
+     5177,  8026, 13719, 26047, 45509, 39467, 37875, 51303,
+        0,
+};
+static const int16_t quantize_factor_select_offset_LF[65] = {
+      0, -21, -19, -17, -15, -12, -10,  -8,
+     -6,  -4,  -1,   1,   3,   6,   8,  10,
+     13,  15,  18,  20,  23,  26,  29,  31,
+     34,  37,  40,  43,  47,  50,  53,  57,
+     60,  64,  68,  72,  76,  80,  85,  89,
+     94,  99, 105, 110, 116, 123, 129, 136,
+    144, 152, 161, 171, 182, 194, 207, 223,
+    241, 263, 291, 328, 382, 467, 522, 522,
+    522,
+};
+
+
+static const int32_t quantize_intervals_MLF[9] = {
+    -89806, 89806, 278502, 494338, 759442, 1113112, 1652322, 2720256, 5190186,
+};
+static const int32_t invert_quantize_dither_factors_MLF[9] = {
+    89806, 89806, 98890, 116946, 148158, 205512, 333698, 734236, 1735696,
+};
+static const int32_t quantize_dither_factors_MLF[9] = {
+    0, 2271, 4514, 7803, 14339, 32047, 100135, 250365, 0,
+};
+static const int16_t quantize_factor_select_offset_MLF[9] = {
+    0, -14, 6, 29, 58, 96, 154, 270, 521,
+};
+
+
+static const int32_t quantize_intervals_MHF[3] = {
+    -194080, 194080, 890562,
+};
+static const int32_t invert_quantize_dither_factors_MHF[3] = {
+    194080, 194080, 502402,
+};
+static const int32_t quantize_dither_factors_MHF[3] = {
+    0, 77081, 0,
+};
+static const int16_t quantize_factor_select_offset_MHF[3] = {
+    0, -33, 136,
+};
+
+
+static const int32_t quantize_intervals_HF[5] = {
+    -163006, 163006, 542708, 1120554, 2669238,
+};
+static const int32_t invert_quantize_dither_factors_HF[5] = {
+    163006, 163006, 216698, 361148, 1187538,
+};
+static const int32_t quantize_dither_factors_HF[5] = {
+    0, 13423, 36113, 206598, 0,
+};
+static const int16_t quantize_factor_select_offset_HF[5] = {
+    0, -8, 33, 95, 262,
+};
+
+
+static const int32_t hd_quantize_intervals_LF[257] = {
+      -2436,    2436,    7308,   12180,   17054,   21930,   26806,   31686,
+      36566,   41450,   46338,   51230,   56124,   61024,   65928,   70836,
+      75750,   80670,   85598,   90530,   95470,  100418,  105372,  110336,
+     115308,  120288,  125278,  130276,  135286,  140304,  145334,  150374,
+     155426,  160490,  165566,  170654,  175756,  180870,  185998,  191138,
+     196294,  201466,  206650,  211850,  217068,  222300,  227548,  232814,
+     238096,  243396,  248714,  254050,  259406,  264778,  270172,  275584,
+     281018,  286470,  291944,  297440,  302956,  308496,  314056,  319640,
+     325248,  330878,  336532,  342212,  347916,  353644,  359398,  365178,
+     370986,  376820,  382680,  388568,  394486,  400430,  406404,  412408,
+     418442,  424506,  430600,  436726,  442884,  449074,  455298,  461554,
+     467844,  474168,  480528,  486922,  493354,  499820,  506324,  512866,
+     519446,  526064,  532722,  539420,  546160,  552940,  559760,  566624,
+     573532,  580482,  587478,  594520,  601606,  608740,  615920,  623148,
+     630426,  637754,  645132,  652560,  660042,  667576,  675164,  682808,
+     690506,  698262,  706074,  713946,  721876,  729868,  737920,  746036,
+     754216,  762460,  770770,  779148,  787594,  796108,  804694,  813354,
+     822086,  830892,  839774,  848736,  857776,  866896,  876100,  885386,
+     894758,  904218,  913766,  923406,  933138,  942964,  952886,  962908,
+     973030,  983254,  993582, 1004020, 1014566, 1025224, 1035996, 1046886,
+    1057894, 1069026, 1080284, 1091670, 1103186, 1114838, 1126628, 1138558,
+    1150634, 1162858, 1175236, 1187768, 1200462, 1213320, 1226346, 1239548,
+    1252928, 1266490, 1280242, 1294188, 1308334, 1322688, 1337252, 1352034,
+    1367044, 1382284, 1397766, 1413494, 1429478, 1445728, 1462252, 1479058,
+    1496158, 1513562, 1531280, 1549326, 1567710, 1586446, 1605550, 1625034,
+    1644914, 1665208, 1685932, 1707108, 1728754, 1750890, 1773542, 1796732,
+    1820488, 1844840, 1869816, 1895452, 1921780, 1948842, 1976680, 2005338,
+    2034868, 2065322, 2096766, 2129260, 2162880, 2197708, 2233832, 2271352,
+    2310384, 2351050, 2393498, 2437886, 2484404, 2533262, 2584710, 2639036,
+    2696578, 2757738, 2822998, 2892940, 2968278, 3049896, 3138912, 3236760,
+    3345312, 3467068, 3605434, 3765154, 3952904, 4177962, 4452178, 4787134,
+    5187290, 5647128, 6159120, 6720518, 7332904, 8000032, 8726664, 9518152,
+    10380372,
+};
+static const int32_t hd_invert_quantize_dither_factors_LF[257] = {
+      2436,   2436,   2436,   2436,   2438,   2438,   2438,   2440,
+      2442,   2442,   2444,   2446,   2448,   2450,   2454,   2456,
+      2458,   2462,   2464,   2468,   2472,   2476,   2480,   2484,
+      2488,   2492,   2498,   2502,   2506,   2512,   2518,   2524,
+      2528,   2534,   2540,   2548,   2554,   2560,   2568,   2574,
+      2582,   2588,   2596,   2604,   2612,   2620,   2628,   2636,
+      2646,   2654,   2664,   2672,   2682,   2692,   2702,   2712,
+      2722,   2732,   2742,   2752,   2764,   2774,   2786,   2798,
+      2810,   2822,   2834,   2846,   2858,   2870,   2884,   2896,
+      2910,   2924,   2938,   2952,   2966,   2980,   2994,   3010,
+      3024,   3040,   3056,   3070,   3086,   3104,   3120,   3136,
+      3154,   3170,   3188,   3206,   3224,   3242,   3262,   3280,
+      3300,   3320,   3338,   3360,   3380,   3400,   3422,   3442,
+      3464,   3486,   3508,   3532,   3554,   3578,   3602,   3626,
+      3652,   3676,   3702,   3728,   3754,   3780,   3808,   3836,
+      3864,   3892,   3920,   3950,   3980,   4010,   4042,   4074,
+      4106,   4138,   4172,   4206,   4240,   4276,   4312,   4348,
+      4384,   4422,   4460,   4500,   4540,   4580,   4622,   4664,
+      4708,   4752,   4796,   4842,   4890,   4938,   4986,   5036,
+      5086,   5138,   5192,   5246,   5300,   5358,   5416,   5474,
+      5534,   5596,   5660,   5726,   5792,   5860,   5930,   6002,
+      6074,   6150,   6226,   6306,   6388,   6470,   6556,   6644,
+      6736,   6828,   6924,   7022,   7124,   7228,   7336,   7448,
+      7562,   7680,   7802,   7928,   8058,   8192,   8332,   8476,
+      8624,   8780,   8940,   9106,   9278,   9458,   9644,   9840,
+     10042,  10252,  10472,  10702,  10942,  11194,  11458,  11734,
+     12024,  12328,  12648,  12986,  13342,  13720,  14118,  14540,
+     14990,  15466,  15976,  16520,  17102,  17726,  18398,  19124,
+     19908,  20760,  21688,  22702,  23816,  25044,  26404,  27922,
+     29622,  31540,  33720,  36222,  39116,  42502,  46514,  51334,
+     57218,  64536,  73830,  85890, 101860, 123198, 151020, 183936,
+    216220, 243618, 268374, 293022, 319362, 347768, 378864, 412626, 449596,
+};
+static const int32_t hd_quantize_dither_factors_LF[256] = {
+       0,    0,    0,    1,    0,    0,    1,    1,
+       0,    1,    1,    1,    1,    1,    1,    1,
+       1,    1,    1,    1,    1,    1,    1,    1,
+       1,    2,    1,    1,    2,    2,    2,    1,
+       2,    2,    2,    2,    2,    2,    2,    2,
+       2,    2,    2,    2,    2,    2,    2,    3,
+       2,    3,    2,    3,    3,    3,    3,    3,
+       3,    3,    3,    3,    3,    3,    3,    3,
+       3,    3,    3,    3,    3,    4,    3,    4,
+       4,    4,    4,    4,    4,    4,    4,    4,
+       4,    4,    4,    4,    5,    4,    4,    5,
+       4,    5,    5,    5,    5,    5,    5,    5,
+       5,    5,    6,    5,    5,    6,    5,    6,
+       6,    6,    6,    6,    6,    6,    6,    7,
+       6,    7,    7,    7,    7,    7,    7,    7,
+       7,    7,    8,    8,    8,    8,    8,    8,
+       8,    9,    9,    9,    9,    9,    9,    9,
+      10,   10,   10,   10,   10,   11,   11,   11,
+      11,   11,   12,   12,   12,   12,   13,   13,
+      13,   14,   14,   14,   15,   15,   15,   15,
+      16,   16,   17,   17,   17,   18,   18,   18,
+      19,   19,   20,   21,   21,   22,   22,   23,
+      23,   24,   25,   26,   26,   27,   28,   29,
+      30,   31,   32,   33,   34,   35,   36,   37,
+      39,   40,   42,   43,   45,   47,   49,   51,
+      53,   55,   58,   60,   63,   66,   69,   73,
+      76,   80,   85,   89,   95,  100,  106,  113,
+     119,  128,  136,  146,  156,  168,  182,  196,
+     213,  232,  254,  279,  307,  340,  380,  425,
+     480,  545,  626,  724,  847, 1003, 1205, 1471,
+    1830, 2324, 3015, 3993, 5335, 6956, 8229, 8071,
+    6850, 6189, 6162, 6585, 7102, 7774, 8441, 9243,
+};
+static const int16_t hd_quantize_factor_select_offset_LF[257] = {
+      0, -22, -21, -21, -20, -20, -19, -19,
+    -18, -18, -17, -17, -16, -16, -15, -14,
+    -14, -13, -13, -12, -12, -11, -11, -10,
+    -10,  -9,  -9,  -8,  -7,  -7,  -6,  -6,
+     -5,  -5,  -4,  -4,  -3,  -3,  -2,  -1,
+     -1,   0,   0,   1,   1,   2,   2,   3,
+      4,   4,   5,   5,   6,   6,   7,   8,
+      8,   9,   9,  10,  11,  11,  12,  12,
+     13,  14,  14,  15,  15,  16,  17,  17,
+     18,  19,  19,  20,  20,  21,  22,  22,
+     23,  24,  24,  25,  26,  26,  27,  28,
+     28,  29,  30,  30,  31,  32,  33,  33,
+     34,  35,  35,  36,  37,  38,  38,  39,
+     40,  41,  41,  42,  43,  44,  44,  45,
+     46,  47,  48,  48,  49,  50,  51,  52,
+     52,  53,  54,  55,  56,  57,  58,  58,
+     59,  60,  61,  62,  63,  64,  65,  66,
+     67,  68,  69,  69,  70,  71,  72,  73,
+     74,  75,  77,  78,  79,  80,  81,  82,
+     83,  84,  85,  86,  87,  89,  90,  91,
+     92,  93,  94,  96,  97,  98,  99, 101,
+    102, 103, 105, 106, 107, 109, 110, 112,
+    113, 115, 116, 118, 119, 121, 122, 124,
+    125, 127, 129, 130, 132, 134, 136, 137,
+    139, 141, 143, 145, 147, 149, 151, 153,
+    155, 158, 160, 162, 164, 167, 169, 172,
+    174, 177, 180, 182, 185, 188, 191, 194,
+    197, 201, 204, 208, 211, 215, 219, 223,
+    227, 232, 236, 241, 246, 251, 257, 263,
+    269, 275, 283, 290, 298, 307, 317, 327,
+    339, 352, 367, 384, 404, 429, 458, 494,
+    522, 522, 522, 522, 522, 522, 522, 522, 522,
+};
+
+
+static const int32_t hd_quantize_intervals_MLF[33] = {
+      -21236,   21236,   63830,  106798,  150386,  194832,  240376,  287258,
+      335726,  386034,  438460,  493308,  550924,  611696,  676082,  744626,
+      817986,  896968,  982580, 1076118, 1179278, 1294344, 1424504, 1574386,
+     1751090, 1966260, 2240868, 2617662, 3196432, 4176450, 5658260, 7671068,
+    10380372,
+};
+static const int32_t hd_invert_quantize_dither_factors_MLF[33] = {
+    21236,  21236,  21360,  21608,  21978,  22468,  23076,   23806,
+    24660,  25648,  26778,  28070,  29544,  31228,  33158,   35386,
+    37974,  41008,  44606,  48934,  54226,  60840,  69320,   80564,
+    96140, 119032, 155576, 221218, 357552, 622468, 859344, 1153464, 1555840,
+};
+static const int32_t hd_quantize_dither_factors_MLF[32] = {
+       0,   31,    62,    93,   123,   152,   183,    214,
+     247,  283,   323,   369,   421,   483,   557,    647,
+     759,  900,  1082,  1323,  1654,  2120,  2811,   3894,
+    5723, 9136, 16411, 34084, 66229, 59219, 73530, 100594,
+};
+static const int16_t hd_quantize_factor_select_offset_MLF[33] = {
+      0, -21, -16, -12,  -7,  -2,   3,   8,
+     13,  19,  24,  30,  36,  43,  50,  57,
+     65,  74,  83,  93, 104, 117, 131, 147,
+    166, 189, 219, 259, 322, 427, 521, 521, 521,
+};
+
+
+static const int32_t hd_quantize_intervals_MHF[9] = {
+    -95044, 95044, 295844, 528780, 821332, 1226438, 1890540, 3344850, 6450664,
+};
+static const int32_t hd_invert_quantize_dither_factors_MHF[9] = {
+    95044, 95044, 105754, 127180, 165372, 39736, 424366, 1029946, 2075866,
+};
+static const int32_t hd_quantize_dither_factors_MHF[8] = {
+    0, 2678, 5357, 9548, -31409, 96158, 151395, 261480,
+};
+static const int16_t hd_quantize_factor_select_offset_MHF[9] = {
+    0, -17, 5, 30, 62, 105, 177, 334, 518,
+};
+
+
+static const int32_t hd_quantize_intervals_HF[17] = {
+     -45754,   45754,  138496,  234896,  337336,  448310,  570738,  708380,
+     866534, 1053262, 1281958, 1577438, 1993050, 2665984, 3900982, 5902844,
+    8897462,
+};
+static const int32_t hd_invert_quantize_dither_factors_HF[17] = {
+    45754,  45754,  46988,  49412,  53026,  57950,  64478,   73164,
+    84988, 101740, 126958, 168522, 247092, 425842, 809154, 1192708, 1801910,
+};
+static const int32_t hd_quantize_dither_factors_HF[16] = {
+       0,  309,   606,   904,  1231,  1632,  2172,   2956,
+    4188, 6305, 10391, 19643, 44688, 95828, 95889, 152301,
+};
+static const int16_t hd_quantize_factor_select_offset_HF[17] = {
+     0, -18,  -8,   2,  13,  25,  38,  53,
+    70,  90, 115, 147, 192, 264, 398, 521, 521,
+};
+
+typedef const struct {
+    const int32_t *quantize_intervals;
+    const int32_t *invert_quantize_dither_factors;
+    const int32_t *quantize_dither_factors;
+    const int16_t *quantize_factor_select_offset;
+    int tables_size;
+    int32_t factor_max;
+    int32_t prediction_order;
+} ConstTables;
+
+static ConstTables tables[2][NB_SUBBANDS] = {
+    {
+        [LF]  = { quantize_intervals_LF,
+                  invert_quantize_dither_factors_LF,
+                  quantize_dither_factors_LF,
+                  quantize_factor_select_offset_LF,
+                  FF_ARRAY_ELEMS(quantize_intervals_LF),
+                  0x11FF, 24 },
+        [MLF] = { quantize_intervals_MLF,
+                  invert_quantize_dither_factors_MLF,
+                  quantize_dither_factors_MLF,
+                  quantize_factor_select_offset_MLF,
+                  FF_ARRAY_ELEMS(quantize_intervals_MLF),
+                  0x14FF, 12 },
+        [MHF] = { quantize_intervals_MHF,
+                  invert_quantize_dither_factors_MHF,
+                  quantize_dither_factors_MHF,
+                  quantize_factor_select_offset_MHF,
+                  FF_ARRAY_ELEMS(quantize_intervals_MHF),
+                  0x16FF, 6 },
+        [HF]  = { quantize_intervals_HF,
+                  invert_quantize_dither_factors_HF,
+                  quantize_dither_factors_HF,
+                  quantize_factor_select_offset_HF,
+                  FF_ARRAY_ELEMS(quantize_intervals_HF),
+                  0x15FF, 12 },
+    },
+    {
+        [LF]  = { hd_quantize_intervals_LF,
+                  hd_invert_quantize_dither_factors_LF,
+                  hd_quantize_dither_factors_LF,
+                  hd_quantize_factor_select_offset_LF,
+                  FF_ARRAY_ELEMS(hd_quantize_intervals_LF),
+                  0x11FF, 24 },
+        [MLF] = { hd_quantize_intervals_MLF,
+                  hd_invert_quantize_dither_factors_MLF,
+                  hd_quantize_dither_factors_MLF,
+                  hd_quantize_factor_select_offset_MLF,
+                  FF_ARRAY_ELEMS(hd_quantize_intervals_MLF),
+                  0x14FF, 12 },
+        [MHF] = { hd_quantize_intervals_MHF,
+                  hd_invert_quantize_dither_factors_MHF,
+                  hd_quantize_dither_factors_MHF,
+                  hd_quantize_factor_select_offset_MHF,
+                  FF_ARRAY_ELEMS(hd_quantize_intervals_MHF),
+                  0x16FF, 6 },
+        [HF]  = { hd_quantize_intervals_HF,
+                  hd_invert_quantize_dither_factors_HF,
+                  hd_quantize_dither_factors_HF,
+                  hd_quantize_factor_select_offset_HF,
+                  FF_ARRAY_ELEMS(hd_quantize_intervals_HF),
+                  0x15FF, 12 },
+    }
+};
+
+static const int16_t quantization_factors[32] = {
+    2048, 2093, 2139, 2186, 2233, 2282, 2332, 2383,
+    2435, 2489, 2543, 2599, 2656, 2714, 2774, 2834,
+    2896, 2960, 3025, 3091, 3158, 3228, 3298, 3371,
+    3444, 3520, 3597, 3676, 3756, 3838, 3922, 4008,
+};
+
+
+/* Rounded right shift with optionnal clipping */
+#define RSHIFT_SIZE(size)                                                     \
+av_always_inline                                                              \
+static int##size##_t rshift##size(int##size##_t value, int shift)             \
+{                                                                             \
+    int##size##_t rounding = (int##size##_t)1 << (shift - 1);                 \
+    int##size##_t mask = ((int##size##_t)1 << (shift + 1)) - 1;               \
+    return ((value + rounding) >> shift) - ((value & mask) == rounding);      \
+}                                                                             \
+av_always_inline                                                              \
+static int##size##_t rshift##size##_clip24(int##size##_t value, int shift)    \
+{                                                                             \
+    return av_clip_intp2(rshift##size(value, shift), 23);                     \
+}
+RSHIFT_SIZE(32)
+RSHIFT_SIZE(64)
+
+
+av_always_inline
+static void aptx_update_codeword_history(Channel *channel)
+{
+    int32_t cw = ((channel->quantize[0].quantized_sample & 3) << 0) +
+                 ((channel->quantize[1].quantized_sample & 2) << 1) +
+                 ((channel->quantize[2].quantized_sample & 1) << 3);
+    channel->codeword_history = (cw << 8) + (channel->codeword_history << 4);
+}
+
+static void aptx_generate_dither(Channel *channel)
+{
+    int subband;
+    int64_t m;
+    int32_t d;
+
+    aptx_update_codeword_history(channel);
+
+    m = (int64_t)5184443 * (channel->codeword_history >> 7);
+    d = (m << 2) + (m >> 22);
+    for (subband = 0; subband < NB_SUBBANDS; subband++)
+        channel->dither[subband] = d << (23 - 5*subband);
+    channel->dither_parity = (d >> 25) & 1;
+}
+
+/*
+ * Convolution filter coefficients for the outer QMF of the QMF tree.
+ * The 2 sets are a mirror of each other.
+ */
+static const int32_t aptx_qmf_outer_coeffs[NB_FILTERS][FILTER_TAPS] = {
+    {
+        730, -413, -9611, 43626, -121026, 269973, -585547, 2801966,
+        697128, -160481, 27611, 8478, -10043, 3511, 688, -897,
+    },
+    {
+        -897, 688, 3511, -10043, 8478, 27611, -160481, 697128,
+        2801966, -585547, 269973, -121026, 43626, -9611, -413, 730,
+    },
+};
+
+/*
+ * Convolution filter coefficients for the inner QMF of the QMF tree.
+ * The 2 sets are a mirror of each other.
+ */
+static const int32_t aptx_qmf_inner_coeffs[NB_FILTERS][FILTER_TAPS] = {
+    {
+       1033, -584, -13592, 61697, -171156, 381799, -828088, 3962579,
+       985888, -226954, 39048, 11990, -14203, 4966, 973, -1268,
+    },
+    {
+      -1268, 973, 4966, -14203, 11990, 39048, -226954, 985888,
+      3962579, -828088, 381799, -171156, 61697, -13592, -584, 1033,
+    },
+};
+
+/*
+ * Push one sample into a circular signal buffer.
+ */
+av_always_inline
+static void aptx_qmf_filter_signal_push(FilterSignal *signal, int32_t sample)
+{
+    signal->buffer[signal->pos            ] = sample;
+    signal->buffer[signal->pos+FILTER_TAPS] = sample;
+    signal->pos = (signal->pos + 1) & (FILTER_TAPS - 1);
+}
+
+/*
+ * Compute the convolution of the signal with the coefficients, and reduce
+ * to 24 bits by applying the specified right shifting.
+ */
+av_always_inline
+static int32_t aptx_qmf_convolution(FilterSignal *signal,
+                                    const int32_t coeffs[FILTER_TAPS],
+                                    int shift)
+{
+    int32_t *sig = &signal->buffer[signal->pos];
+    int64_t e = 0;
+    int i;
+
+    for (i = 0; i < FILTER_TAPS; i++)
+        e += MUL64(sig[i], coeffs[i]);
+
+    return rshift64_clip24(e, shift);
+}
+
+/*
+ * Half-band QMF analysis filter realized with a polyphase FIR filter.
+ * Split into 2 subbands and downsample by 2.
+ * So for each pair of samples that goes in, one sample goes out,
+ * split into 2 separate subbands.
+ */
+av_always_inline
+static void aptx_qmf_polyphase_analysis(FilterSignal signal[NB_FILTERS],
+                                        const int32_t coeffs[NB_FILTERS][FILTER_TAPS],
+                                        int shift,
+                                        int32_t samples[NB_FILTERS],
+                                        int32_t *low_subband_output,
+                                        int32_t *high_subband_output)
+{
+    int32_t subbands[NB_FILTERS];
+    int i;
+
+    for (i = 0; i < NB_FILTERS; i++) {
+        aptx_qmf_filter_signal_push(&signal[i], samples[NB_FILTERS-1-i]);
+        subbands[i] = aptx_qmf_convolution(&signal[i], coeffs[i], shift);
+    }
+
+    *low_subband_output  = av_clip_intp2(subbands[0] + subbands[1], 23);
+    *high_subband_output = av_clip_intp2(subbands[0] - subbands[1], 23);
+}
+
+/*
+ * Two stage QMF analysis tree.
+ * Split 4 input samples into 4 subbands and downsample by 4.
+ * So for each group of 4 samples that goes in, one sample goes out,
+ * split into 4 separate subbands.
+ */
+static void aptx_qmf_tree_analysis(QMFAnalysis *qmf,
+                                   int32_t samples[4],
+                                   int32_t subband_samples[4])
+{
+    int32_t intermediate_samples[4];
+    int i;
+
+    /* Split 4 input samples into 2 intermediate subbands downsampled to 2 samples */
+    for (i = 0; i < 2; i++)
+        aptx_qmf_polyphase_analysis(qmf->outer_filter_signal,
+                                    aptx_qmf_outer_coeffs, 23,
+                                    &samples[2*i],
+                                    &intermediate_samples[0+i],
+                                    &intermediate_samples[2+i]);
+
+    /* Split 2 intermediate subband samples into 4 final subbands downsampled to 1 sample */
+    for (i = 0; i < 2; i++)
+        aptx_qmf_polyphase_analysis(qmf->inner_filter_signal[i],
+                                    aptx_qmf_inner_coeffs, 23,
+                                    &intermediate_samples[2*i],
+                                    &subband_samples[2*i+0],
+                                    &subband_samples[2*i+1]);
+}
+
+/*
+ * Half-band QMF synthesis filter realized with a polyphase FIR filter.
+ * Join 2 subbands and upsample by 2.
+ * So for each 2 subbands sample that goes in, a pair of samples goes out.
+ */
+av_always_inline
+static void aptx_qmf_polyphase_synthesis(FilterSignal signal[NB_FILTERS],
+                                         const int32_t coeffs[NB_FILTERS][FILTER_TAPS],
+                                         int shift,
+                                         int32_t low_subband_input,
+                                         int32_t high_subband_input,
+                                         int32_t samples[NB_FILTERS])
+{
+    int32_t subbands[NB_FILTERS];
+    int i;
+
+    subbands[0] = low_subband_input + high_subband_input;
+    subbands[1] = low_subband_input - high_subband_input;
+
+    for (i = 0; i < NB_FILTERS; i++) {
+        aptx_qmf_filter_signal_push(&signal[i], subbands[1-i]);
+        samples[i] = aptx_qmf_convolution(&signal[i], coeffs[i], shift);
+    }
+}
+
+/*
+ * Two stage QMF synthesis tree.
+ * Join 4 subbands and upsample by 4.
+ * So for each 4 subbands sample that goes in, a group of 4 samples goes out.
+ */
+static void aptx_qmf_tree_synthesis(QMFAnalysis *qmf,
+                                    int32_t subband_samples[4],
+                                    int32_t samples[4])
+{
+    int32_t intermediate_samples[4];
+    int i;
+
+    /* Join 4 subbands into 2 intermediate subbands upsampled to 2 samples. */
+    for (i = 0; i < 2; i++)
+        aptx_qmf_polyphase_synthesis(qmf->inner_filter_signal[i],
+                                     aptx_qmf_inner_coeffs, 22,
+                                     subband_samples[2*i+0],
+                                     subband_samples[2*i+1],
+                                     &intermediate_samples[2*i]);
+
+    /* Join 2 samples from intermediate subbands upsampled to 4 samples. */
+    for (i = 0; i < 2; i++)
+        aptx_qmf_polyphase_synthesis(qmf->outer_filter_signal,
+                                     aptx_qmf_outer_coeffs, 21,
+                                     intermediate_samples[0+i],
+                                     intermediate_samples[2+i],
+                                     &samples[2*i]);
+}
+
+
+av_always_inline
+static int32_t aptx_bin_search(int32_t value, int32_t factor,
+                               const int32_t *intervals, int32_t nb_intervals)
+{
+    int32_t idx = 0;
+    int i;
+
+    for (i = nb_intervals >> 1; i > 0; i >>= 1)
+        if (MUL64(factor, intervals[idx + i]) <= ((int64_t)value << 24))
+            idx += i;
+
+    return idx;
+}
+
+static void aptx_quantize_difference(Quantize *quantize,
+                                     int32_t sample_difference,
+                                     int32_t dither,
+                                     int32_t quantization_factor,
+                                     ConstTables *tables)
+{
+    const int32_t *intervals = tables->quantize_intervals;
+    int32_t quantized_sample, dithered_sample, parity_change;
+    int32_t d, mean, interval, inv, sample_difference_abs;
+    int64_t error;
+
+    sample_difference_abs = FFABS(sample_difference);
+    sample_difference_abs = FFMIN(sample_difference_abs, (1 << 23) - 1);
+
+    quantized_sample = aptx_bin_search(sample_difference_abs >> 4,
+                                       quantization_factor,
+                                       intervals, tables->tables_size);
+
+    d = rshift32_clip24(MULH(dither, dither), 7) - (1 << 23);
+    d = rshift64(MUL64(d, tables->quantize_dither_factors[quantized_sample]), 23);
+
+    intervals += quantized_sample;
+    mean = (intervals[1] + intervals[0]) / 2;
+    interval = (intervals[1] - intervals[0]) * (-(sample_difference < 0) | 1);
+
+    dithered_sample = rshift64_clip24(MUL64(dither, interval) + ((int64_t)av_clip_intp2(mean + d, 23) << 32), 32);
+    error = ((int64_t)sample_difference_abs << 20) - MUL64(dithered_sample, quantization_factor);
+    quantize->error = FFABS(rshift64(error, 23));
+
+    parity_change = quantized_sample;
+    if (error < 0)
+        quantized_sample--;
+    else
+        parity_change--;
+
+    inv = -(sample_difference < 0);
+    quantize->quantized_sample               = quantized_sample ^ inv;
+    quantize->quantized_sample_parity_change = parity_change    ^ inv;
+}
+
+static void aptx_encode_channel(Channel *channel, int32_t samples[4], int hd)
+{
+    int32_t subband_samples[4];
+    int subband;
+    aptx_qmf_tree_analysis(&channel->qmf, samples, subband_samples);
+    aptx_generate_dither(channel);
+    for (subband = 0; subband < NB_SUBBANDS; subband++) {
+        int32_t diff = av_clip_intp2(subband_samples[subband] - channel->prediction[subband].predicted_sample, 23);
+        aptx_quantize_difference(&channel->quantize[subband], diff,
+                                 channel->dither[subband],
+                                 channel->invert_quantize[subband].quantization_factor,
+                                 &tables[hd][subband]);
+    }
+}
+
+static void aptx_decode_channel(Channel *channel, int32_t samples[4])
+{
+    int32_t subband_samples[4];
+    int subband;
+    for (subband = 0; subband < NB_SUBBANDS; subband++)
+        subband_samples[subband] = channel->prediction[subband].previous_reconstructed_sample;
+    aptx_qmf_tree_synthesis(&channel->qmf, subband_samples, samples);
+}
+
+
+static void aptx_invert_quantization(InvertQuantize *invert_quantize,
+                                     int32_t quantized_sample, int32_t dither,
+                                     ConstTables *tables)
+{
+    int32_t qr, idx, shift, factor_select;
+
+    idx = (quantized_sample ^ -(quantized_sample < 0)) + 1;
+    qr = tables->quantize_intervals[idx] / 2;
+    if (quantized_sample < 0)
+        qr = -qr;
+
+    qr = rshift64_clip24(((int64_t)qr<<32) + MUL64(dither, tables->invert_quantize_dither_factors[idx]), 32);
+    invert_quantize->reconstructed_difference = MUL64(invert_quantize->quantization_factor, qr) >> 19;
+
+    /* update factor_select */
+    factor_select = 32620 * invert_quantize->factor_select;
+    factor_select = rshift32(factor_select + (tables->quantize_factor_select_offset[idx] << 15), 15);
+    invert_quantize->factor_select = av_clip(factor_select, 0, tables->factor_max);
+
+    /* update quantization factor */
+    idx = (invert_quantize->factor_select & 0xFF) >> 3;
+    shift = (tables->factor_max - invert_quantize->factor_select) >> 8;
+    invert_quantize->quantization_factor = (quantization_factors[idx] << 11) >> shift;
+}
+
+static int32_t *aptx_reconstructed_differences_update(Prediction *prediction,
+                                                      int32_t reconstructed_difference,
+                                                      int order)
+{
+    int32_t *rd1 = prediction->reconstructed_differences, *rd2 = rd1 + order;
+    int p = prediction->pos;
+
+    rd1[p] = rd2[p];
+    prediction->pos = p = (p + 1) % order;
+    rd2[p] = reconstructed_difference;
+    return &rd2[p];
+}
+
+static void aptx_prediction_filtering(Prediction *prediction,
+                                      int32_t reconstructed_difference,
+                                      int order)
+{
+    int32_t reconstructed_sample, predictor, srd0;
+    int32_t *reconstructed_differences;
+    int64_t predicted_difference = 0;
+    int i;
+
+    reconstructed_sample = av_clip_intp2(reconstructed_difference + prediction->predicted_sample, 23);
+    predictor = av_clip_intp2((MUL64(prediction->s_weight[0], prediction->previous_reconstructed_sample)
+                             + MUL64(prediction->s_weight[1], reconstructed_sample)) >> 22, 23);
+    prediction->previous_reconstructed_sample = reconstructed_sample;
+
+    reconstructed_differences = aptx_reconstructed_differences_update(prediction, reconstructed_difference, order);
+    srd0 = FFDIFFSIGN(reconstructed_difference, 0) << 23;
+    for (i = 0; i < order; i++) {
+        int32_t srd = FF_SIGNBIT(reconstructed_differences[-i-1]) | 1;
+        prediction->d_weight[i] -= rshift32(prediction->d_weight[i] - srd*srd0, 8);
+        predicted_difference += MUL64(reconstructed_differences[-i], prediction->d_weight[i]);
+    }
+
+    prediction->predicted_difference = av_clip_intp2(predicted_difference >> 22, 23);
+    prediction->predicted_sample = av_clip_intp2(predictor + prediction->predicted_difference, 23);
+}
+
+static void aptx_process_subband(InvertQuantize *invert_quantize,
+                                 Prediction *prediction,
+                                 int32_t quantized_sample, int32_t dither,
+                                 ConstTables *tables)
+{
+    int32_t sign, same_sign[2], weight[2], sw1, range;
+
+    aptx_invert_quantization(invert_quantize, quantized_sample, dither, tables);
+
+    sign = FFDIFFSIGN(invert_quantize->reconstructed_difference,
+                      -prediction->predicted_difference);
+    same_sign[0] = sign * prediction->prev_sign[0];
+    same_sign[1] = sign * prediction->prev_sign[1];
+    prediction->prev_sign[0] = prediction->prev_sign[1];
+    prediction->prev_sign[1] = sign | 1;
+
+    range = 0x100000;
+    sw1 = rshift32(-same_sign[1] * prediction->s_weight[1], 1);
+    sw1 = (av_clip(sw1, -range, range) & ~0xF) << 4;
+
+    range = 0x300000;
+    weight[0] = 254 * prediction->s_weight[0] + 0x800000*same_sign[0] + sw1;
+    prediction->s_weight[0] = av_clip(rshift32(weight[0], 8), -range, range);
+
+    range = 0x3C0000 - prediction->s_weight[0];
+    weight[1] = 255 * prediction->s_weight[1] + 0xC00000*same_sign[1];
+    prediction->s_weight[1] = av_clip(rshift32(weight[1], 8), -range, range);
+
+    aptx_prediction_filtering(prediction,
+                              invert_quantize->reconstructed_difference,
+                              tables->prediction_order);
+}
+
+static void aptx_invert_quantize_and_prediction(Channel *channel, int hd)
+{
+    int subband;
+    for (subband = 0; subband < NB_SUBBANDS; subband++)
+        aptx_process_subband(&channel->invert_quantize[subband],
+                             &channel->prediction[subband],
+                             channel->quantize[subband].quantized_sample,
+                             channel->dither[subband],
+                             &tables[hd][subband]);
+}
+
+static int32_t aptx_quantized_parity(Channel *channel)
+{
+    int32_t parity = channel->dither_parity;
+    int subband;
+
+    for (subband = 0; subband < NB_SUBBANDS; subband++)
+        parity ^= channel->quantize[subband].quantized_sample;
+
+    return parity & 1;
+}
+
+/* For each sample, ensure that the parity of all subbands of all channels
+ * is 0 except once every 8 samples where the parity is forced to 1. */
+static int aptx_check_parity(Channel channels[NB_CHANNELS], int32_t *idx)
+{
+    int32_t parity = aptx_quantized_parity(&channels[LEFT])
+                   ^ aptx_quantized_parity(&channels[RIGHT]);
+
+    int eighth = *idx == 7;
+    *idx = (*idx + 1) & 7;
+
+    return parity ^ eighth;
+}
+
+static void aptx_insert_sync(Channel channels[NB_CHANNELS], int32_t *idx)
+{
+    if (aptx_check_parity(channels, idx)) {
+        int i;
+        Channel *c;
+        static const int map[] = { 1, 2, 0, 3 };
+        Quantize *min = &channels[NB_CHANNELS-1].quantize[map[0]];
+        for (c = &channels[NB_CHANNELS-1]; c >= channels; c--)
+            for (i = 0; i < NB_SUBBANDS; i++)
+                if (c->quantize[map[i]].error < min->error)
+                    min = &c->quantize[map[i]];
+
+        /* Forcing the desired parity is done by offsetting by 1 the quantized
+         * sample from the subband featuring the smallest quantization error. */
+        min->quantized_sample = min->quantized_sample_parity_change;
+    }
+}
+
+static uint16_t aptx_pack_codeword(Channel *channel)
+{
+    int32_t parity = aptx_quantized_parity(channel);
+    return (((channel->quantize[3].quantized_sample & 0x06) | parity) << 13)
+         | (((channel->quantize[2].quantized_sample & 0x03)         ) << 11)
+         | (((channel->quantize[1].quantized_sample & 0x0F)         ) <<  7)
+         | (((channel->quantize[0].quantized_sample & 0x7F)         ) <<  0);
+}
+
+static uint32_t aptxhd_pack_codeword(Channel *channel)
+{
+    int32_t parity = aptx_quantized_parity(channel);
+    return (((channel->quantize[3].quantized_sample & 0x01E) | parity) << 19)
+         | (((channel->quantize[2].quantized_sample & 0x00F)         ) << 15)
+         | (((channel->quantize[1].quantized_sample & 0x03F)         ) <<  9)
+         | (((channel->quantize[0].quantized_sample & 0x1FF)         ) <<  0);
+}
+
+static void aptx_unpack_codeword(Channel *channel, uint16_t codeword)
+{
+    channel->quantize[0].quantized_sample = sign_extend(codeword >>  0, 7);
+    channel->quantize[1].quantized_sample = sign_extend(codeword >>  7, 4);
+    channel->quantize[2].quantized_sample = sign_extend(codeword >> 11, 2);
+    channel->quantize[3].quantized_sample = sign_extend(codeword >> 13, 3);
+    channel->quantize[3].quantized_sample = (channel->quantize[3].quantized_sample & ~1)
+                                          | aptx_quantized_parity(channel);
+}
+
+static void aptxhd_unpack_codeword(Channel *channel, uint32_t codeword)
+{
+    channel->quantize[0].quantized_sample = sign_extend(codeword >>  0, 9);
+    channel->quantize[1].quantized_sample = sign_extend(codeword >>  9, 6);
+    channel->quantize[2].quantized_sample = sign_extend(codeword >> 15, 4);
+    channel->quantize[3].quantized_sample = sign_extend(codeword >> 19, 5);
+    channel->quantize[3].quantized_sample = (channel->quantize[3].quantized_sample & ~1)
+                                          | aptx_quantized_parity(channel);
+}
+
+static void aptx_encode_samples(AptXContext *ctx,
+                                int32_t samples[NB_CHANNELS][4],
+                                uint8_t *output)
+{
+    int channel;
+    for (channel = 0; channel < NB_CHANNELS; channel++)
+        aptx_encode_channel(&ctx->channels[channel], samples[channel], ctx->hd);
+
+    aptx_insert_sync(ctx->channels, &ctx->sync_idx);
+
+    for (channel = 0; channel < NB_CHANNELS; channel++) {
+        aptx_invert_quantize_and_prediction(&ctx->channels[channel], ctx->hd);
+        if (ctx->hd)
+            AV_WB24(output + 3*channel,
+                    aptxhd_pack_codeword(&ctx->channels[channel]));
+        else
+            AV_WB16(output + 2*channel,
+                    aptx_pack_codeword(&ctx->channels[channel]));
+    }
+}
+
+static int aptx_decode_samples(AptXContext *ctx,
+                                const uint8_t *input,
+                                int32_t samples[NB_CHANNELS][4])
+{
+    int channel, ret;
+
+    for (channel = 0; channel < NB_CHANNELS; channel++) {
+        aptx_generate_dither(&ctx->channels[channel]);
+
+        if (ctx->hd)
+            aptxhd_unpack_codeword(&ctx->channels[channel],
+                                   AV_RB24(input + 3*channel));
+        else
+            aptx_unpack_codeword(&ctx->channels[channel],
+                                 AV_RB16(input + 2*channel));
+        aptx_invert_quantize_and_prediction(&ctx->channels[channel], ctx->hd);
+    }
+
+    ret = aptx_check_parity(ctx->channels, &ctx->sync_idx);
+
+    for (channel = 0; channel < NB_CHANNELS; channel++)
+        aptx_decode_channel(&ctx->channels[channel], samples[channel]);
+
+    return ret;
+}
+
+
+static av_cold int aptx_init(AVCodecContext *avctx)
+{
+    AptXContext *s = avctx->priv_data;
+    int chan, subband;
+
+    s->hd = avctx->codec->id == AV_CODEC_ID_APTX_HD;
+    s->block_size = s->hd ? 6 : 4;
+
+    if (avctx->frame_size == 0)
+        avctx->frame_size = 256 * s->block_size;
+
+    if (avctx->frame_size % s->block_size) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Frame size must be a multiple of %d samples\n", s->block_size);
+        return AVERROR(EINVAL);
+    }
+
+    for (chan = 0; chan < NB_CHANNELS; chan++) {
+        Channel *channel = &s->channels[chan];
+        for (subband = 0; subband < NB_SUBBANDS; subband++) {
+            Prediction *prediction = &channel->prediction[subband];
+            prediction->prev_sign[0] = 1;
+            prediction->prev_sign[1] = 1;
+        }
+    }
+
+    ff_af_queue_init(avctx, &s->afq);
+    return 0;
+}
+
+static int aptx_decode_frame(AVCodecContext *avctx, void *data,
+                             int *got_frame_ptr, AVPacket *avpkt)
+{
+    AptXContext *s = avctx->priv_data;
+    AVFrame *frame = data;
+    int pos, opos, channel, sample, ret;
+
+    if (avpkt->size < s->block_size) {
+        av_log(avctx, AV_LOG_ERROR, "Packet is too small\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* get output buffer */
+    frame->channels = NB_CHANNELS;
+    frame->format = AV_SAMPLE_FMT_S32P;
+    frame->nb_samples = 4 * avpkt->size / s->block_size;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    for (pos = 0, opos = 0; opos < frame->nb_samples; pos += s->block_size, opos += 4) {
+        int32_t samples[NB_CHANNELS][4];
+
+        if (aptx_decode_samples(s, &avpkt->data[pos], samples)) {
+            av_log(avctx, AV_LOG_ERROR, "Synchronization error\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        for (channel = 0; channel < NB_CHANNELS; channel++)
+            for (sample = 0; sample < 4; sample++)
+                AV_WN32A(&frame->data[channel][4*(opos+sample)],
+                         samples[channel][sample] << 8);
+    }
+
+    *got_frame_ptr = 1;
+    return s->block_size * frame->nb_samples / 4;
+}
+
+static int aptx_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                             const AVFrame *frame, int *got_packet_ptr)
+{
+    AptXContext *s = avctx->priv_data;
+    int pos, ipos, channel, sample, output_size, ret;
+
+    if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
+        return ret;
+
+    output_size = s->block_size * frame->nb_samples/4;
+    if ((ret = ff_alloc_packet2(avctx, avpkt, output_size, 0)) < 0)
+        return ret;
+
+    for (pos = 0, ipos = 0; pos < output_size; pos += s->block_size, ipos += 4) {
+        int32_t samples[NB_CHANNELS][4];
+
+        for (channel = 0; channel < NB_CHANNELS; channel++)
+            for (sample = 0; sample < 4; sample++)
+                samples[channel][sample] = (int32_t)AV_RN32A(&frame->data[channel][4*(ipos+sample)]) >> 8;
+
+        aptx_encode_samples(s, samples, avpkt->data + pos);
+    }
+
+    ff_af_queue_remove(&s->afq, frame->nb_samples, &avpkt->pts, &avpkt->duration);
+    *got_packet_ptr = 1;
+    return 0;
+}
+
+static av_cold int aptx_close(AVCodecContext *avctx)
+{
+    AptXContext *s = avctx->priv_data;
+    ff_af_queue_close(&s->afq);
+    return 0;
+}
+
+
+#if CONFIG_APTX_DECODER
+AVCodec ff_aptx_decoder = {
+    .name                  = "aptx",
+    .long_name             = NULL_IF_CONFIG_SMALL("aptX (Audio Processing Technology for Bluetooth)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_APTX,
+    .priv_data_size        = sizeof(AptXContext),
+    .init                  = aptx_init,
+    .decode                = aptx_decode_frame,
+    .close                 = aptx_close,
+    .capabilities          = AV_CODEC_CAP_DR1,
+    .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_STEREO, 0},
+    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S32P,
+                                                             AV_SAMPLE_FMT_NONE },
+};
+#endif
+
+#if CONFIG_APTX_HD_DECODER
+AVCodec ff_aptx_hd_decoder = {
+    .name                  = "aptx_hd",
+    .long_name             = NULL_IF_CONFIG_SMALL("aptX HD (Audio Processing Technology for Bluetooth)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_APTX_HD,
+    .priv_data_size        = sizeof(AptXContext),
+    .init                  = aptx_init,
+    .decode                = aptx_decode_frame,
+    .close                 = aptx_close,
+    .capabilities          = AV_CODEC_CAP_DR1,
+    .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_STEREO, 0},
+    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S32P,
+                                                             AV_SAMPLE_FMT_NONE },
+};
+#endif
+
+#if CONFIG_APTX_ENCODER
+AVCodec ff_aptx_encoder = {
+    .name                  = "aptx",
+    .long_name             = NULL_IF_CONFIG_SMALL("aptX (Audio Processing Technology for Bluetooth)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_APTX,
+    .priv_data_size        = sizeof(AptXContext),
+    .init                  = aptx_init,
+    .encode2               = aptx_encode_frame,
+    .close                 = aptx_close,
+    .capabilities          = AV_CODEC_CAP_SMALL_LAST_FRAME,
+    .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_STEREO, 0},
+    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S32P,
+                                                             AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]) {8000, 16000, 24000, 32000, 44100, 48000, 0},
+};
+#endif
+
+#if CONFIG_APTX_HD_ENCODER
+AVCodec ff_aptx_hd_encoder = {
+    .name                  = "aptx_hd",
+    .long_name             = NULL_IF_CONFIG_SMALL("aptX HD (Audio Processing Technology for Bluetooth)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_APTX_HD,
+    .priv_data_size        = sizeof(AptXContext),
+    .init                  = aptx_init,
+    .encode2               = aptx_encode_frame,
+    .close                 = aptx_close,
+    .capabilities          = AV_CODEC_CAP_SMALL_LAST_FRAME,
+    .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_STEREO, 0},
+    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S32P,
+                                                             AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]) {8000, 16000, 24000, 32000, 44100, 48000, 0},
+};
+#endif

diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 1eeac54..e656011 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile

@@ -42,6 +42,7 @@
 OBJS-$(CONFIG_HEVC_DECODER)            += arm/hevcdsp_init_arm.o
 OBJS-$(CONFIG_MLP_DECODER)             += arm/mlpdsp_init_arm.o
 OBJS-$(CONFIG_RV40_DECODER)            += arm/rv40dsp_init_arm.o
+OBJS-$(CONFIG_SBC_ENCODER)             += arm/sbcdsp_init_arm.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += arm/vorbisdsp_init_arm.o
 OBJS-$(CONFIG_VP6_DECODER)             += arm/vp6dsp_init_arm.o
 OBJS-$(CONFIG_VP9_DECODER)             += arm/vp9dsp_init_10bpp_arm.o   \
@@ -81,6 +82,7 @@
 
 # decoders/encoders
 ARMV6-OBJS-$(CONFIG_MLP_DECODER)       += arm/mlpdsp_armv6.o
+ARMV6-OBJS-$(CONFIG_SBC_ENCODER)       += arm/sbcdsp_armv6.o
 
 
 # VFP optimizations
@@ -136,10 +138,12 @@
 NEON-OBJS-$(CONFIG_HEVC_DECODER)       += arm/hevcdsp_init_neon.o       \
                                           arm/hevcdsp_deblock_neon.o    \
                                           arm/hevcdsp_idct_neon.o       \
-                                          arm/hevcdsp_qpel_neon.o
+                                          arm/hevcdsp_qpel_neon.o       \
+                                          arm/hevcdsp_sao_neon.o
 NEON-OBJS-$(CONFIG_RV30_DECODER)       += arm/rv34dsp_neon.o
 NEON-OBJS-$(CONFIG_RV40_DECODER)       += arm/rv34dsp_neon.o            \
                                           arm/rv40dsp_neon.o
+NEON-OBJS-$(CONFIG_SBC_ENCODER)        += arm/sbcdsp_neon.o
 NEON-OBJS-$(CONFIG_VORBIS_DECODER)     += arm/vorbisdsp_neon.o
 NEON-OBJS-$(CONFIG_VP6_DECODER)        += arm/vp6dsp_neon.o
 NEON-OBJS-$(CONFIG_VP9_DECODER)        += arm/vp9itxfm_16bpp_neon.o     \

diff --git a/libavcodec/arm/fmtconvert_init_arm.c b/libavcodec/arm/fmtconvert_init_arm.c
index a734dec..e88255d 100644
--- a/libavcodec/arm/fmtconvert_init_arm.c
+++ b/libavcodec/arm/fmtconvert_init_arm.c

@@ -42,10 +42,8 @@
     int cpu_flags = av_get_cpu_flags();
 
     if (have_vfp_vm(cpu_flags)) {
-        if (!have_vfpv3(cpu_flags)) {
-            c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp;
-            c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp;
-        }
+        c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp;
+        c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp;
     }
 
     if (have_neon(cpu_flags)) {

diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S
index 4f68bdb..93859db 100644
--- a/libavcodec/arm/h264idct_neon.S
+++ b/libavcodec/arm/h264idct_neon.S

@@ -21,6 +21,7 @@
 #include "libavutil/arm/asm.S"
 
 function ff_h264_idct_add_neon, export=1
+h264_idct_add_neon_nothumb:
         vld1.64         {d0-d3},  [r1,:128]
         vmov.i16        q15, #0
 
@@ -73,6 +74,7 @@
 endfunc
 
 function ff_h264_idct_dc_add_neon, export=1
+h264_idct_dc_add_neon_nothumb:
         mov             r3,       #0
         vld1.16         {d2[],d3[]}, [r1,:16]
         strh            r3,       [r1]
@@ -113,8 +115,8 @@
         movne           lr,  #0
         cmp             lr,  #0
         ite             ne
-        adrne           lr,  X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
-        adreq           lr,  X(ff_h264_idct_add_neon)    + CONFIG_THUMB
+        adrne           lr,  h264_idct_dc_add_neon_nothumb + CONFIG_THUMB
+        adreq           lr,  h264_idct_add_neon_nothumb    + CONFIG_THUMB
         blx             lr
 2:      subs            ip,  ip,  #1
         add             r1,  r1,  #32
@@ -138,8 +140,8 @@
         cmp             r8,  #0
         ldrsh           r8,  [r1]
         iteet           ne
-        adrne           lr,  X(ff_h264_idct_add_neon)    + CONFIG_THUMB
-        adreq           lr,  X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
+        adrne           lr,  h264_idct_add_neon_nothumb    + CONFIG_THUMB
+        adreq           lr,  h264_idct_dc_add_neon_nothumb + CONFIG_THUMB
         cmpeq           r8,  #0
         blxne           lr
         subs            ip,  ip,  #1
@@ -166,8 +168,8 @@
         cmp             r8,  #0
         ldrsh           r8,  [r1]
         iteet           ne
-        adrne           lr,  X(ff_h264_idct_add_neon)    + CONFIG_THUMB
-        adreq           lr,  X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
+        adrne           lr,  h264_idct_add_neon_nothumb    + CONFIG_THUMB
+        adreq           lr,  h264_idct_dc_add_neon_nothumb + CONFIG_THUMB
         cmpeq           r8,  #0
         blxne           lr
         add             r12, r12, #1
@@ -267,6 +269,7 @@
 .endm
 
 function ff_h264_idct8_add_neon, export=1
+h264_idct8_add_neon_nothumb:
         vmov.i16        q3,       #0
         vld1.16         {q8-q9},  [r1,:128]
         vst1.16         {q3},     [r1,:128]!
@@ -328,6 +331,7 @@
 endfunc
 
 function ff_h264_idct8_dc_add_neon, export=1
+h264_idct8_dc_add_neon_nothumb:
         mov             r3,       #0
         vld1.16         {d30[],d31[]},[r1,:16]
         strh            r3,       [r1]
@@ -388,8 +392,8 @@
         movne           lr,  #0
         cmp             lr,  #0
         ite             ne
-        adrne           lr,  X(ff_h264_idct8_dc_add_neon) + CONFIG_THUMB
-        adreq           lr,  X(ff_h264_idct8_add_neon)    + CONFIG_THUMB
+        adrne           lr,  h264_idct8_dc_add_neon_nothumb + CONFIG_THUMB
+        adreq           lr,  h264_idct8_add_neon_nothumb    + CONFIG_THUMB
         blx             lr
 2:      subs            r12, r12, #4
         add             r1,  r1,  #128

diff --git a/libavcodec/arm/hevcdsp_arm.h b/libavcodec/arm/hevcdsp_arm.h
index 7735df9..47cdfa5 100644
--- a/libavcodec/arm/hevcdsp_arm.h
+++ b/libavcodec/arm/hevcdsp_arm.h

@@ -21,6 +21,6 @@
 
 #include "libavcodec/hevcdsp.h"
 
-void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth);
+void ff_hevc_dsp_init_neon(HEVCDSPContext *c, const int bit_depth);
 
 #endif /* AVCODEC_ARM_HEVCDSP_ARM_H */

diff --git a/libavcodec/arm/hevcdsp_deblock_neon.S b/libavcodec/arm/hevcdsp_deblock_neon.S
index 166bddb..7cb7487 100644
--- a/libavcodec/arm/hevcdsp_deblock_neon.S
+++ b/libavcodec/arm/hevcdsp_deblock_neon.S

@@ -152,7 +152,7 @@
 
         and        r9, r8, r7
         cmp        r9, #0
-        beq        weakfilter_\@
+        beq        1f
 
         vadd.i16  q2, q11, q12
         vadd.i16  q4, q9, q8
@@ -210,11 +210,11 @@
         vbit      q13, q3, q5
         vbit      q14, q2, q5
 
-weakfilter_\@:
+1:
         mvn       r8, r8
         and       r9, r8, r7
         cmp       r9, #0
-        beq       ready_\@
+        beq       2f
 
         vdup.16    q4, r2
 
@@ -275,7 +275,7 @@
         vbit      q11, q0, q5
         vbit      q12, q4, q5
 
-ready_\@:
+2:
         vqmovun.s16 d16, q8
         vqmovun.s16 d18, q9
         vqmovun.s16 d20, q10

diff --git a/libavcodec/arm/hevcdsp_idct_neon.S b/libavcodec/arm/hevcdsp_idct_neon.S
index e39d006..75795e6 100644
--- a/libavcodec/arm/hevcdsp_idct_neon.S
+++ b/libavcodec/arm/hevcdsp_idct_neon.S

@@ -1,5 +1,7 @@
 /*
+ * ARM NEON optimised IDCT functions for HEVC decoding
  * Copyright (c) 2014 Seppo Tomperi <seppo.tomperi@vtt.fi>
+ * Copyright (c) 2017 Alexandra Hájková
  *
  * This file is part of FFmpeg.
  *
@@ -19,179 +21,967 @@
  */
 
 #include "libavutil/arm/asm.S"
-#include "neon.S"
 
-function ff_hevc_idct_4x4_dc_neon_8, export=1
-        ldrsh       r1, [r0]
-        ldr         r2, =0x20
-        add         r1, #1
-        asr         r1, #1
-        add         r1, r2
-        asr         r1, #6
-        vdup.16     q0, r1
-        vdup.16     q1, r1
-        vst1.16     {q0, q1}, [r0]
-        bx lr
-endfunc
+const trans, align=4
+        .short 64, 83, 64, 36
+        .short 89, 75, 50, 18
+        .short 90, 87, 80, 70
+        .short 57, 43, 25, 9
+        .short 90, 90, 88, 85
+        .short 82, 78, 73, 67
+        .short 61, 54, 46, 38
+        .short 31, 22, 13, 4
+endconst
 
-function ff_hevc_idct_8x8_dc_neon_8, export=1
-        ldrsh       r1, [r0]
-        ldr         r2, =0x20
-        add         r1, #1
-        asr         r1, #1
-        add         r1, r2
-        asr         r1, #6
-        vdup.16     q8, r1
-        vdup.16     q9, r1
-        vmov.16     q10, q8
-        vmov.16     q11, q8
-        vmov.16     q12, q8
-        vmov.16     q13, q8
-        vmov.16     q14, q8
-        vmov.16     q15, q8
-        vstm        r0, {q8-q15}
-        bx lr
-endfunc
-
-function ff_hevc_idct_16x16_dc_neon_8, export=1
-        ldrsh       r1, [r0]
-        ldr         r2, =0x20
-        add         r1, #1
-        asr         r1, #1
-        add         r1, r2
-        asr         r1, #6
-        vdup.16     q8, r1
-        vdup.16     q9, r1
-        vmov.16     q10, q8
-        vmov.16     q11, q8
-        vmov.16     q12, q8
-        vmov.16     q13, q8
-        vmov.16     q14, q8
-        vmov.16     q15, q8
-        vstm        r0!, {q8-q15}
-        vstm        r0!, {q8-q15}
-        vstm        r0!, {q8-q15}
-        vstm        r0, {q8-q15}
-        bx lr
-endfunc
-
-function ff_hevc_idct_32x32_dc_neon_8, export=1
-        ldrsh       r1, [r0]
-        ldr         r2, =0x20
-        add         r1, #1
-        asr         r1, #1
-        add         r1, r2
-        asr         r1, #6
-        mov         r3, #16
-        vdup.16     q8, r1
-        vdup.16     q9, r1
-        vmov.16     q10, q8
-        vmov.16     q11, q8
-        vmov.16     q12, q8
-        vmov.16     q13, q8
-        vmov.16     q14, q8
-        vmov.16     q15, q8
-1:      subs        r3, #1
-        vstm        r0!, {q8-q15}
-        bne         1b
-        bx lr
-endfunc
-
-function ff_hevc_add_residual_4x4_neon_8, export=1
-        vldm        r1, {q0-q1}
-        vld1.32     d4[0], [r0], r2
-        vld1.32     d4[1], [r0], r2
-        vld1.32     d5[0], [r0], r2
-        vld1.32     d5[1], [r0], r2
-        sub         r0, r0, r2, lsl #2
-        vmovl.u8    q8, d4
-        vmovl.u8    q9, d5
-        vqadd.s16   q0, q0, q8
-        vqadd.s16   q1, q1, q9
-        vqmovun.s16 d0, q0
-        vqmovun.s16 d1, q1
-        vst1.32     d0[0], [r0], r2
-        vst1.32     d0[1], [r0], r2
-        vst1.32     d1[0], [r0], r2
-        vst1.32     d1[1], [r0], r2
-        bx          lr
-endfunc
-
-function ff_hevc_add_residual_8x8_neon_8, export=1
-        mov         r3,   #8
-1:      subs        r3,   #1
-        vld1.16     {q0}, [r1]!
-        vld1.8      d16,  [r0]
-        vmovl.u8    q8,   d16
-        vqadd.s16   q0,   q8
-        vqmovun.s16 d0,   q0
-        vst1.32     d0,   [r0], r2
-        bne         1b
-        bx          lr
-endfunc
-
-function ff_hevc_add_residual_16x16_neon_8, export=1
-        mov         r3,   #16
-1:      subs        r3,   #1
-        vld1.16     {q0, q1}, [r1]!
-        vld1.8      {q8},  [r0]
-        vmovl.u8    q9,  d16
-        vmovl.u8    q10, d17
-        vqadd.s16   q0,  q9
-        vqadd.s16   q1,  q10
-        vqmovun.s16 d0,  q0
-        vqmovun.s16 d1,  q1
-        vst1.8      {q0},   [r0], r2
-        bne         1b
-        bx          lr
-endfunc
-
-function ff_hevc_add_residual_32x32_neon_8, export=1
-        mov         r3,   #32
-1:      subs        r3,   #1
-        vldm        r1!, {q0-q3}
-        vld1.8      {q8, q9},  [r0]
-        vmovl.u8    q10, d16
-        vmovl.u8    q11, d17
-        vmovl.u8    q12, d18
-        vmovl.u8    q13, d19
-        vqadd.s16   q0,  q10
-        vqadd.s16   q1,  q11
-        vqadd.s16   q2,  q12
-        vqadd.s16   q3,  q13
-        vqmovun.s16 d0,  q0
-        vqmovun.s16 d1,  q1
-        vqmovun.s16 d2,  q2
-        vqmovun.s16 d3,  q3
-        vst1.8     {q0, q1},   [r0], r2
-        bne         1b
-        bx          lr
-endfunc
-
-.macro  transpose_16b_8x8   r0, r1, r2, r3, r4, r5, r6, r7
-        vtrn.64         \r0, \r4
-        vtrn.64         \r1, \r5
-        vtrn.64         \r2, \r6
-        vtrn.64         \r3, \r7
-        vtrn.32         \r0, \r2
-        vtrn.32         \r1, \r3
-        vtrn.32         \r4, \r6
-        vtrn.32         \r5, \r7
-        vtrn.16         \r0, \r1
-        vtrn.16         \r2, \r3
-        vtrn.16         \r4, \r5
-        vtrn.16         \r6, \r7
+.macro clip10 in1, in2, c1, c2
+        vmax.s16        \in1, \in1, \c1
+        vmax.s16        \in2, \in2, \c1
+        vmin.s16        \in1, \in1, \c2
+        vmin.s16        \in2, \in2, \c2
 .endm
 
-// in 4 q regs
-// output 8 d regs
-.macro transpose_16b_4x4    r0, r1, r2, r3
-        vtrn.32         \r0, \r2
-        vtrn.32         \r1, \r3
-        vtrn.16         \r0, \r1
-        vtrn.16         \r2, \r3
+function ff_hevc_add_residual_4x4_8_neon, export=1
+        vld1.16         {q0-q1}, [r1, :128]
+        vld1.32         d4[0], [r0, :32], r2
+        vld1.32         d4[1], [r0, :32], r2
+        vld1.32         d5[0], [r0, :32], r2
+        vld1.32         d5[1], [r0, :32], r2
+        sub             r0, r0, r2, lsl #2
+        vmovl.u8        q8, d4
+        vmovl.u8        q9, d5
+        vqadd.s16       q0, q0, q8
+        vqadd.s16       q1, q1, q9
+        vqmovun.s16     d0, q0
+        vqmovun.s16     d1, q1
+        vst1.32         d0[0], [r0, :32], r2
+        vst1.32         d0[1], [r0, :32], r2
+        vst1.32         d1[0], [r0, :32], r2
+        vst1.32         d1[1], [r0, :32], r2
+        bx              lr
+endfunc
+
+function ff_hevc_add_residual_4x4_10_neon, export=1
+        mov             r12, r0
+        vld1.16         {q0-q1}, [r1, :128]
+        vld1.16         d4, [r12, :64], r2
+        vld1.16         d5, [r12, :64], r2
+        vld1.16         d6, [r12, :64], r2
+        vqadd.s16       q0, q2
+        vld1.16         d7, [r12, :64], r2
+        vmov.s16        q12, #0
+        vqadd.s16       q1, q3
+        vmvn.s16        q13, #0xFC00 @ vmov.s16 #0x3FF
+        clip10          q0, q1, q12, q13
+        vst1.16         d0, [r0, :64], r2
+        vst1.16         d1, [r0, :64], r2
+        vst1.16         d2, [r0, :64], r2
+        vst1.16         d3, [r0, :64], r2
+        bx              lr
+endfunc
+
+function ff_hevc_add_residual_8x8_8_neon, export=1
+        add             r12, r0, r2
+        add             r2,  r2, r2
+        mov             r3,   #8
+1:      subs            r3,   #2
+        vld1.8          {d16},   [r0,  :64]
+        vld1.8          {d17},   [r12, :64]
+        vmovl.u8        q9,   d16
+        vld1.16         {q0-q1}, [r1,  :128]!
+        vmovl.u8        q8,   d17
+        vqadd.s16       q0,   q9
+        vqadd.s16       q1,   q8
+        vqmovun.s16     d0,   q0
+        vqmovun.s16     d1,   q1
+        vst1.8          d0,   [r0,  :64], r2
+        vst1.8          d1,   [r12, :64], r2
+        bne             1b
+        bx              lr
+endfunc
+
+function ff_hevc_add_residual_8x8_10_neon, export=1
+        add             r12, r0, r2
+        add             r2,  r2, r2
+        mov             r3,  #8
+        vmov.s16        q12, #0
+        vmvn.s16        q13, #0xFC00 @ vmov.s16 #0x3FF
+1:      subs            r3,  #2
+        vld1.16         {q0-q1}, [r1, :128]!
+        vld1.16         {q8},    [r0, :128]
+        vqadd.s16       q0, q8
+        vld1.16         {q9},    [r12, :128]
+        vqadd.s16       q1, q9
+        clip10          q0, q1, q12, q13
+        vst1.16         {q0}, [r0, :128], r2
+        vst1.16         {q1}, [r12, :128], r2
+        bne             1b
+        bx              lr
+endfunc
+
+function ff_hevc_add_residual_16x16_8_neon, export=1
+        mov             r3,  #16
+        add             r12, r0, r2
+        add             r2,  r2, r2
+1:      subs            r3,  #2
+        vld1.8          {q8},     [r0, :128]
+        vld1.16         {q0, q1}, [r1, :128]!
+        vld1.8          {q11},    [r12, :128]
+        vld1.16         {q2, q3}, [r1, :128]!
+        vmovl.u8        q9,  d16
+        vmovl.u8        q10, d17
+        vmovl.u8        q12, d22
+        vmovl.u8        q13, d23
+        vqadd.s16       q0,  q9
+        vqadd.s16       q1,  q10
+        vqadd.s16       q2,  q12
+        vqadd.s16       q3,  q13
+        vqmovun.s16     d0,  q0
+        vqmovun.s16     d1,  q1
+        vqmovun.s16     d2,  q2
+        vqmovun.s16     d3,  q3
+        vst1.8          {q0},     [r0, :128], r2
+        vst1.8          {q1},     [r12, :128], r2
+        bne             1b
+        bx              lr
+endfunc
+
+function ff_hevc_add_residual_16x16_10_neon, export=1
+        mov             r3,  #16
+        vmov.s16        q12, #0
+        vmvn.s16        q13, #0xFC00 @ vmov.s16 #0x3FF
+        add             r12, r0, r2
+        add             r2,  r2, r2
+1:      subs            r3,  #2
+        vld1.16         {q8-q9},   [r0, :128]
+        vld1.16         {q0, q1},  [r1, :128]!
+        vqadd.s16       q0, q8
+        vld1.16         {q10-q11}, [r12, :128]
+        vqadd.s16       q1, q9
+        vld1.16         {q2, q3},  [r1, :128]!
+        vqadd.s16       q2, q10
+        vqadd.s16       q3, q11
+        clip10          q0, q1, q12, q13
+        clip10          q2, q3, q12, q13
+        vst1.16         {q0-q1},   [r0, :128], r2
+        vst1.16         {q2-q3},   [r12, :128], r2
+        bne             1b
+        bx              lr
+endfunc
+
+function ff_hevc_add_residual_32x32_8_neon, export=1
+        vpush           {q4-q7}
+        add             r12, r0, r2
+        add             r2,  r2, r2
+        mov             r3,  #32
+1:      subs            r3,  #2
+        vld1.8          {q12, q13}, [r0,  :128]
+        vmovl.u8        q8,  d24
+        vmovl.u8        q9,  d25
+        vld1.8          {q14, q15}, [r12, :128]
+        vmovl.u8        q10, d26
+        vmovl.u8        q11, d27
+        vmovl.u8        q12, d28
+        vldm            r1!, {q0-q7}
+        vmovl.u8        q13, d29
+        vmovl.u8        q14, d30
+        vmovl.u8        q15, d31
+        vqadd.s16       q0,  q8
+        vqadd.s16       q1,  q9
+        vqadd.s16       q2,  q10
+        vqadd.s16       q3,  q11
+        vqadd.s16       q4,  q12
+        vqadd.s16       q5,  q13
+        vqadd.s16       q6,  q14
+        vqadd.s16       q7,  q15
+        vqmovun.s16     d0,  q0
+        vqmovun.s16     d1,  q1
+        vqmovun.s16     d2,  q2
+        vqmovun.s16     d3,  q3
+        vqmovun.s16     d4,  q4
+        vqmovun.s16     d5,  q5
+        vst1.8          {q0, q1}, [r0, :128], r2
+        vqmovun.s16     d6,  q6
+        vqmovun.s16     d7,  q7
+        vst1.8          {q2, q3}, [r12, :128], r2
+        bne             1b
+        vpop            {q4-q7}
+        bx              lr
+endfunc
+
+function ff_hevc_add_residual_32x32_10_neon, export=1
+        mov             r3,  #32
+        add             r12, r0, #32
+        vmov.s16        q12, #0
+        vmvn.s16        q13, #0xFC00 @ vmov.s16 #0x3FF
+1:      subs            r3,  #1
+        vldm            r1!, {q0-q3}
+        vld1.16         {q8, q9},   [r0, :128]
+        vld1.16         {q10, q11}, [r12, :128]
+        vqadd.s16       q0, q8
+        vqadd.s16       q1, q9
+        vqadd.s16       q2, q10
+        vqadd.s16       q3, q11
+        clip10          q0, q1, q12, q13
+        clip10          q2, q3, q12, q13
+        vst1.16         {q0-q1},   [r0, :128], r2
+        vst1.16         {q2-q3},   [r12, :128], r2
+        bne             1b
+        bx              lr
+endfunc
+
+.macro idct_4x4_dc bitdepth
+function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1
+        ldrsh           r1, [r0]
+        ldr             r2, =(1 << (13 - \bitdepth))
+        add             r1, #1
+        asr             r1, #1
+        add             r1, r2
+        asr             r1, #(14 - \bitdepth)
+        vdup.16         q0, r1
+        vdup.16         q1, r1
+        vst1.16         {q0, q1}, [r0, :128]
+        bx              lr
+endfunc
 .endm
 
+.macro idct_8x8_dc bitdepth
+function ff_hevc_idct_8x8_dc_\bitdepth\()_neon, export=1
+        ldrsh           r1, [r0]
+        ldr             r2, =(1 << (13 - \bitdepth))
+        add             r1, #1
+        asr             r1, #1
+        add             r1, r2
+        asr             r1, #(14 - \bitdepth)
+        vdup.16         q8, r1
+        vdup.16         q9, r1
+        vmov.16         q10, q8
+        vmov.16         q11, q8
+        vmov.16         q12, q8
+        vmov.16         q13, q8
+        vmov.16         q14, q8
+        vmov.16         q15, q8
+        vstm            r0, {q8-q15}
+        bx              lr
+endfunc
+.endm
+
+.macro idct_16x16_dc bitdepth
+function ff_hevc_idct_16x16_dc_\bitdepth\()_neon, export=1
+        ldrsh           r1, [r0]
+        ldr             r2, =(1 << (13 - \bitdepth))
+        add             r1, #1
+        asr             r1, #1
+        add             r1, r2
+        asr             r1, #(14 - \bitdepth)
+        vdup.16         q8, r1
+        vdup.16         q9, r1
+        vmov.16         q10, q8
+        vmov.16         q11, q8
+        vmov.16         q12, q8
+        vmov.16         q13, q8
+        vmov.16         q14, q8
+        vmov.16         q15, q8
+        vstm            r0!, {q8-q15}
+        vstm            r0!, {q8-q15}
+        vstm            r0!, {q8-q15}
+        vstm            r0, {q8-q15}
+        bx              lr
+endfunc
+.endm
+
+.macro idct_32x32_dc bitdepth
+function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1
+        ldrsh           r1, [r0]
+        ldr             r2, =(1 << (13 - \bitdepth))
+        add             r1, #1
+        asr             r1, #1
+        add             r1, r2
+        asr             r1, #(14 - \bitdepth)
+        mov             r3, #16
+        vdup.16         q8, r1
+        vdup.16         q9, r1
+        vmov.16         q10, q8
+        vmov.16         q11, q8
+        vmov.16         q12, q8
+        vmov.16         q13, q8
+        vmov.16         q14, q8
+        vmov.16         q15, q8
+1:      subs            r3, #1
+        vstm            r0!, {q8-q15}
+        bne             1b
+        bx              lr
+endfunc
+.endm
+
+.macro sum_sub out, in, c, op
+  .ifc \op, +
+        vmlal.s16       \out, \in, \c
+  .else
+        vmlsl.s16       \out, \in, \c
+  .endif
+.endm
+
+.macro tr_4x4 in0, in1, in2, in3, out0, out1, out2, out3, shift, tmp0, tmp1, tmp2, tmp3, tmp4
+         vshll.s16      \tmp0, \in0, #6
+         vmull.s16      \tmp2, \in1, d4[1]
+         vmov           \tmp1, \tmp0
+         vmull.s16      \tmp3, \in1, d4[3]
+         vmlal.s16      \tmp0, \in2, d4[0] @e0
+         vmlsl.s16      \tmp1, \in2, d4[0] @e1
+         vmlal.s16      \tmp2, \in3, d4[3] @o0
+         vmlsl.s16      \tmp3, \in3, d4[1] @o1
+
+         vadd.s32       \tmp4, \tmp0, \tmp2
+         vsub.s32       \tmp0, \tmp0, \tmp2
+         vadd.s32       \tmp2, \tmp1, \tmp3
+         vsub.s32       \tmp1, \tmp1, \tmp3
+         vqrshrn.s32    \out0, \tmp4, #\shift
+         vqrshrn.s32    \out3, \tmp0, #\shift
+         vqrshrn.s32    \out1, \tmp2, #\shift
+         vqrshrn.s32    \out2, \tmp1, #\shift
+.endm
+
+.macro tr_4x4_8 in0, in1, in2, in3, out0, out1, out2, out3, tmp0, tmp1, tmp2, tmp3
+         vshll.s16      \tmp0, \in0, #6
+         vld1.s16       {\in0}, [r1, :64]!
+         vmov           \tmp1, \tmp0
+         vmull.s16      \tmp2, \in1, \in0[1]
+         vmull.s16      \tmp3, \in1, \in0[3]
+         vmlal.s16      \tmp0, \in2, \in0[0] @e0
+         vmlsl.s16      \tmp1, \in2, \in0[0] @e1
+         vmlal.s16      \tmp2, \in3, \in0[3] @o0
+         vmlsl.s16      \tmp3, \in3, \in0[1] @o1
+
+         vld1.s16       {\in0}, [r1, :64]
+
+         vadd.s32       \out0, \tmp0, \tmp2
+         vadd.s32       \out1, \tmp1, \tmp3
+         vsub.s32       \out2, \tmp1, \tmp3
+         vsub.s32       \out3, \tmp0, \tmp2
+
+         sub            r1,  r1,  #8
+.endm
+
+@ Do a 4x4 transpose, using q registers for the subtransposes that don't
+@ need to address the indiviudal d registers.
+@ r0,r1 == rq0, r2,r3 == rq1
+.macro transpose_4x4 rq0, rq1, r0, r1, r2, r3
+        vtrn.32         \rq0, \rq1
+        vtrn.16         \r0,  \r1
+        vtrn.16         \r2,  \r3
+.endm
+
+.macro idct_4x4 bitdepth
+function ff_hevc_idct_4x4_\bitdepth\()_neon, export=1
+@r0 - coeffs
+        vld1.s16        {q0-q1}, [r0, :128]
+
+        movrel          r1, trans
+        vld1.s16        {d4}, [r1, :64]
+
+        tr_4x4          d0, d1, d2, d3, d16, d17, d18, d19, 7, q10, q11, q12, q13, q0
+        transpose_4x4   q8, q9, d16, d17, d18, d19
+
+        tr_4x4          d16, d17, d18, d19, d0, d1, d2, d3, 20 - \bitdepth, q10, q11, q12, q13, q0
+        transpose_4x4   q0, q1, d0, d1, d2, d3
+        vst1.s16        {d0-d3}, [r0, :128]
+        bx lr
+endfunc
+.endm
+
+.macro transpose8_4x4 r0, r1, r2, r3
+        vtrn.16         \r0,  \r1
+        vtrn.16         \r2,  \r3
+        vtrn.32         \r0,  \r2
+        vtrn.32         \r1,  \r3
+.endm
+
+.macro transpose_8x8 r0, r1, r2, r3, r4, r5, r6, r7, l0, l1, l2, l3, l4, l5, l6, l7
+        transpose8_4x4  \r0, \r1, \r2, \r3
+        transpose8_4x4  \r4, \r5, \r6, \r7
+
+        transpose8_4x4  \l0, \l1, \l2, \l3
+        transpose8_4x4  \l4, \l5, \l6, \l7
+.endm
+
+.macro tr_8x4 shift, in0, in1, in2, in3, in4, in5, in6, in7
+        tr_4x4_8        \in0, \in2, \in4, \in6, q8, q9, q10, q11, q12, q13, q14, q15
+
+        vmull.s16       q14, \in1, \in0[2]
+        vmull.s16       q12, \in1, \in0[0]
+        vmull.s16       q13, \in1, \in0[1]
+        sum_sub         q14, \in3, \in0[0], -
+        sum_sub         q12, \in3, \in0[1], +
+        sum_sub         q13, \in3, \in0[3], -
+
+        sum_sub         q14, \in5, \in0[3], +
+        sum_sub         q12, \in5, \in0[2], +
+        sum_sub         q13, \in5, \in0[0], -
+
+        sum_sub         q14, \in7, \in0[1], +
+        sum_sub         q12, \in7, \in0[3], +
+        sum_sub         q13, \in7, \in0[2], -
+
+        vadd.s32        q15, q10, q14
+        vsub.s32        q10, q10, q14
+        vqrshrn.s32     \in2, q15, \shift
+
+        vmull.s16       q15, \in1, \in0[3]
+        sum_sub         q15, \in3, \in0[2], -
+        sum_sub         q15, \in5, \in0[1], +
+        sum_sub         q15, \in7, \in0[0], -
+
+        vqrshrn.s32     \in5, q10,  \shift
+
+        vadd.s32        q10, q8, q12
+        vsub.s32        q8,  q8, q12
+        vadd.s32        q12, q9, q13
+        vsub.s32        q9,  q9, q13
+        vadd.s32        q14, q11, q15
+        vsub.s32        q11, q11, q15
+
+        vqrshrn.s32     \in0, q10, \shift
+        vqrshrn.s32     \in7, q8,  \shift
+        vqrshrn.s32     \in1, q12, \shift
+        vqrshrn.s32     \in6, q9,  \shift
+        vqrshrn.s32     \in3, q14, \shift
+        vqrshrn.s32     \in4, q11, \shift
+.endm
+
+.macro idct_8x8 bitdepth
+function ff_hevc_idct_8x8_\bitdepth\()_neon, export=1
+@r0 - coeffs
+        vpush           {q4-q7}
+
+        mov             r1,  r0
+        mov             r2,  #64
+        add             r3,  r0,  #32
+        vld1.s16        {q0-q1}, [r1,:128], r2
+        vld1.s16        {q2-q3}, [r3,:128], r2
+        vld1.s16        {q4-q5}, [r1,:128], r2
+        vld1.s16        {q6-q7}, [r3,:128], r2
+
+        movrel          r1, trans
+
+        tr_8x4          7, d0, d2, d4, d6, d8, d10, d12, d14
+        tr_8x4          7, d1, d3, d5, d7, d9, d11, d13, d15
+
+        @ Transpose each 4x4 block, and swap how d4-d7 and d8-d11 are used.
+        @ Layout before:
+        @ d0  d1
+        @ d2  d3
+        @ d4  d5
+        @ d6  d7
+        @ d8  d9
+        @ d10 d11
+        @ d12 d13
+        @ d14 d15
+        transpose_8x8   d0, d2, d4, d6, d8, d10, d12, d14, d1, d3, d5, d7, d9, d11, d13, d15
+        @ Now the layout is:
+        @ d0  d8
+        @ d2  d10
+        @ d4  d12
+        @ d6  d14
+        @ d1  d9
+        @ d3  d11
+        @ d5  d13
+        @ d7  d15
+
+        tr_8x4          20 - \bitdepth, d0, d2, d4, d6, d1, d3, d5, d7
+        vswp            d0, d8
+        tr_8x4          20 - \bitdepth, d0, d10, d12, d14, d9, d11, d13, d15
+        vswp            d0, d8
+
+        transpose_8x8   d0, d2, d4, d6, d8, d10, d12, d14, d1, d3, d5, d7, d9, d11, d13, d15
+
+        mov             r1,  r0
+        mov             r2,  #64
+        add             r3,  r0,  #32
+        vst1.s16        {q0-q1}, [r1,:128], r2
+        vst1.s16        {q2-q3}, [r3,:128], r2
+        vst1.s16        {q4-q5}, [r1,:128], r2
+        vst1.s16        {q6-q7}, [r3,:128], r2
+
+        vpop            {q4-q7}
+        bx              lr
+endfunc
+.endm
+
+.macro butterfly e, o, tmp_p, tmp_m
+        vadd.s32        \tmp_p, \e, \o
+        vsub.s32        \tmp_m, \e, \o
+.endm
+
+.macro tr16_8x4 in0, in1, in2, in3, in4, in5, in6, in7, offset
+        tr_4x4_8        \in0, \in2, \in4, \in6, q8, q9, q10, q11, q12, q13, q14, q15
+
+        vmull.s16       q12, \in1, \in0[0]
+        vmull.s16       q13, \in1, \in0[1]
+        vmull.s16       q14, \in1, \in0[2]
+        vmull.s16       q15, \in1, \in0[3]
+        sum_sub         q12, \in3, \in0[1], +
+        sum_sub         q13, \in3, \in0[3], -
+        sum_sub         q14, \in3, \in0[0], -
+        sum_sub         q15, \in3, \in0[2], -
+
+        sum_sub         q12, \in5, \in0[2], +
+        sum_sub         q13, \in5, \in0[0], -
+        sum_sub         q14, \in5, \in0[3], +
+        sum_sub         q15, \in5, \in0[1], +
+
+        sum_sub         q12, \in7, \in0[3], +
+        sum_sub         q13, \in7, \in0[2], -
+        sum_sub         q14, \in7, \in0[1], +
+        sum_sub         q15, \in7, \in0[0], -
+
+        butterfly       q8,  q12, q0, q7
+        butterfly       q9,  q13, q1, q6
+        butterfly       q10, q14, q2, q5
+        butterfly       q11, q15, q3, q4
+        add             r4,  sp,  #\offset
+        vst1.s32        {q0-q1}, [r4, :128]!
+        vst1.s32        {q2-q3}, [r4, :128]!
+        vst1.s32        {q4-q5}, [r4, :128]!
+        vst1.s32        {q6-q7}, [r4, :128]
+.endm
+
+.macro load16 in0, in1, in2, in3, in4, in5, in6, in7
+        vld1.s16        {\in0}, [r1, :64], r2
+        vld1.s16        {\in1}, [r3, :64], r2
+        vld1.s16        {\in2}, [r1, :64], r2
+        vld1.s16        {\in3}, [r3, :64], r2
+        vld1.s16        {\in4}, [r1, :64], r2
+        vld1.s16        {\in5}, [r3, :64], r2
+        vld1.s16        {\in6}, [r1, :64], r2
+        vld1.s16        {\in7}, [r3, :64], r2
+.endm
+
+.macro add_member in, t0, t1, t2, t3, t4, t5, t6, t7, op0, op1, op2, op3, op4, op5, op6, op7
+        sum_sub q5,     \in, \t0, \op0
+        sum_sub q6,     \in, \t1, \op1
+        sum_sub q7,     \in, \t2, \op2
+        sum_sub q8,     \in, \t3, \op3
+        sum_sub q9,     \in, \t4, \op4
+        sum_sub q10,    \in, \t5, \op5
+        sum_sub q11,    \in, \t6, \op6
+        sum_sub q12,    \in, \t7, \op7
+.endm
+
+.macro butterfly16 in0, in1, in2, in3, in4, in5, in6, in7
+        vadd.s32        q4, \in0, \in1
+        vsub.s32        \in0, \in0, \in1
+        vadd.s32        \in1, \in2, \in3
+        vsub.s32        \in2, \in2, \in3
+        vadd.s32        \in3, \in4, \in5
+        vsub.s32        \in4, \in4, \in5
+        vadd.s32        \in5, \in6, \in7
+        vsub.s32        \in6, \in6, \in7
+.endm
+
+.macro store16 in0, in1, in2, in3, in4, in5, in6, in7, rx
+        vst1.s16        \in0, [r1, :64], r2
+        vst1.s16        \in1, [r3, :64], \rx
+        vst1.s16        \in2, [r1, :64], r2
+        vst1.s16        \in3, [r3, :64], \rx
+        vst1.s16        \in4, [r1, :64], r2
+        vst1.s16        \in5, [r3, :64], \rx
+        vst1.s16        \in6, [r1, :64], r2
+        vst1.s16        \in7, [r3, :64], \rx
+.endm
+
+.macro scale out0, out1, out2, out3, out4, out5, out6, out7, in0, in1, in2, in3, in4, in5, in6, in7, shift
+        vqrshrn.s32     \out0, \in0, \shift
+        vqrshrn.s32     \out1, \in1, \shift
+        vqrshrn.s32     \out2, \in2, \shift
+        vqrshrn.s32     \out3, \in3, \shift
+        vqrshrn.s32     \out4, \in4, \shift
+        vqrshrn.s32     \out5, \in5, \shift
+        vqrshrn.s32     \out6, \in6, \shift
+        vqrshrn.s32     \out7, \in7, \shift
+.endm
+
+@stores in1, in2, in4, in6 ascending from off1 and
+@stores in1, in3, in5, in7 descending from off2
+.macro store_to_stack off1, off2, in0, in2, in4, in6, in7, in5, in3, in1
+        add             r1, sp, #\off1
+        add             r3, sp, #\off2
+        mov             r2, #-16
+        vst1.s32        {\in0}, [r1, :128]!
+        vst1.s32        {\in1}, [r3, :128], r2
+        vst1.s32        {\in2}, [r1, :128]!
+        vst1.s32        {\in3}, [r3, :128], r2
+        vst1.s32        {\in4}, [r1, :128]!
+        vst1.s32        {\in5}, [r3, :128], r2
+        vst1.s32        {\in6}, [r1, :128]
+        vst1.s32        {\in7}, [r3, :128]
+.endm
+
+.macro tr_16x4 name, shift, offset, step
+function func_tr_16x4_\name
+        mov             r1,  r5
+        add             r3, r5, #(\step * 64)
+        mov             r2, #(\step * 128)
+        load16          d0, d1, d2, d3, d4, d5, d6, d7
+        movrel          r1, trans
+
+        tr16_8x4        d0, d1, d2, d3, d4, d5, d6, d7, \offset
+
+        add             r1,  r5, #(\step * 32)
+        add             r3,  r5, #(\step * 3 *32)
+        mov             r2,  #(\step * 128)
+        load16          d8, d9, d2, d3, d4, d5, d6, d7
+        movrel          r1, trans + 16
+        vld1.s16        {q0}, [r1, :128]
+        vmull.s16       q5, d8, d0[0]
+        vmull.s16       q6, d8, d0[1]
+        vmull.s16       q7, d8, d0[2]
+        vmull.s16       q8, d8, d0[3]
+        vmull.s16       q9, d8, d1[0]
+        vmull.s16       q10, d8, d1[1]
+        vmull.s16       q11, d8, d1[2]
+        vmull.s16       q12, d8, d1[3]
+
+        add_member      d9, d0[1], d1[0], d1[3], d1[1], d0[2], d0[0], d0[3], d1[2], +, +, +, -, -, -, -, -
+        add_member      d2, d0[2], d1[3], d0[3], d0[1], d1[2], d1[0], d0[0], d1[1], +, +, -, -, -, +, +, +
+        add_member      d3, d0[3], d1[1], d0[1], d1[3], d0[0], d1[2], d0[2], d1[0], +, -, -, +, +, +, -, -
+        add_member      d4, d1[0], d0[2], d1[2], d0[0], d1[3], d0[1], d1[1], d0[3], +, -, -, +, -, -, +, +
+        add_member      d5, d1[1], d0[0], d1[0], d1[2], d0[1], d0[3], d1[3], d0[2], +, -, +, +, -, +, +, -
+        add_member      d6, d1[2], d0[3], d0[0], d0[2], d1[1], d1[3], d1[0], d0[1], +, -, +, -, +, +, -, +
+        add_member      d7, d1[3], d1[2], d1[1], d1[0], d0[3], d0[2], d0[1], d0[0], +, -, +, -, +, -, +, -
+
+        add             r4, sp, #\offset
+        vld1.s32        {q0-q1}, [r4, :128]!
+        vld1.s32        {q2-q3}, [r4, :128]!
+
+        butterfly16     q0, q5, q1, q6, q2, q7, q3, q8
+    .if \shift > 0
+        scale           d26, d27, d28, d29, d30, d31, d16, d17, q4, q0, q5, q1, q6, q2, q7, q3, \shift
+        transpose8_4x4  d26, d28, d30, d16
+        transpose8_4x4  d17, d31, d29, d27
+        mov             r1, r6
+        add             r3, r6, #(24 +3*32)
+        mov             r2, #32
+        mov             r4, #-32
+        store16         d26, d27, d28, d29, d30, d31, d16, d17, r4
+    .else
+        store_to_stack  \offset, (\offset + 240), q4, q5, q6, q7, q3, q2, q1, q0
+    .endif
+
+        add             r4, sp, #(\offset + 64)
+        vld1.s32        {q0-q1}, [r4, :128]!
+        vld1.s32        {q2-q3}, [r4, :128]
+        butterfly16     q0, q9, q1, q10, q2, q11, q3, q12
+    .if \shift > 0
+        scale           d26, d27, d28, d29, d30, d31, d8, d9, q4, q0, q9, q1, q10, q2, q11, q3, \shift
+        transpose8_4x4  d26, d28, d30, d8
+        transpose8_4x4  d9, d31, d29, d27
+
+        add             r1, r6, #8
+        add             r3, r6, #(16 + 3 * 32)
+        mov             r2, #32
+        mov             r4, #-32
+        store16         d26, d27, d28, d29, d30, d31, d8, d9, r4
+    .else
+        store_to_stack (\offset + 64), (\offset + 176), q4, q9, q10, q11, q3, q2, q1, q0
+    .endif
+
+        bx              lr
+endfunc
+.endm
+
+.macro idct_16x16 bitdepth
+function ff_hevc_idct_16x16_\bitdepth\()_neon, export=1
+@r0 - coeffs
+        push            {r4-r7, lr}
+        vpush           {q4-q7}
+
+        @ Align the stack, allocate a temp buffer
+T       mov             r7,  sp
+T       and             r7,  r7,  #15
+A       and             r7,  sp,  #15
+        add             r7,  r7,  #640
+        sub             sp,  sp,  r7
+
+.irp i, 0, 1, 2, 3
+        add             r5, r0, #(8 * \i)
+        add             r6, sp, #(8 * \i * 16)
+        bl              func_tr_16x4_firstpass
+.endr
+
+.irp i, 0, 1, 2, 3
+        add             r5, sp, #(8 * \i)
+        add             r6, r0, #(8 * \i * 16)
+        bl              func_tr_16x4_secondpass_\bitdepth
+.endr
+
+        add             sp,  sp,  r7
+
+        vpop            {q4-q7}
+        pop             {r4-r7, pc}
+endfunc
+.endm
+
+.macro load32
+        add             r1,  r5, #64
+        add             r3,  r1, #128
+        mov             r2,  #256
+        vld1.s16        {d4}, [r1, :64], r2
+        vld1.s16        {d5}, [r3, :64], r2
+        vld1.s16        {d6}, [r1, :64], r2
+        vld1.s16        {d7}, [r3, :64], r2
+        vld1.s16        {d8}, [r1, :64], r2
+        vld1.s16        {d9}, [r3, :64], r2
+        vld1.s16        {d10}, [r1, :64], r2
+        vld1.s16        {d11}, [r3, :64], r2
+        vld1.s16        {d12}, [r1, :64], r2
+        vld1.s16        {d13}, [r3, :64], r2
+        vld1.s16        {d14}, [r1, :64], r2
+        vld1.s16        {d15}, [r3, :64], r2
+        vld1.s16        {d16}, [r1, :64], r2
+        vld1.s16        {d17}, [r3, :64], r2
+        vld1.s16        {d18}, [r1, :64], r2
+        vld1.s16        {d19}, [r3, :64], r2
+.endm
+
+.macro add_member32 in, t0, t1, t2, t3, op0, op1, op2, op3
+        sum_sub q10,     \in, \t0, \op0
+        sum_sub q11,     \in, \t1, \op1
+        sum_sub q12,     \in, \t2, \op2
+        sum_sub q13,     \in, \t3, \op3
+.endm
+
+.macro butterfly32 in0, in1, in2, in3
+        vadd.s32        q1, \in0, \in1
+        vsub.s32        \in0, \in0, \in1
+        vadd.s32        \in1, \in2, \in3
+        vsub.s32        \in2, \in2, \in3
+.endm
+
+.macro scale32 out0, out1, out2, out3, in0, in1, in2, in3, shift
+        vqrshrn.s32     \out0, \in0, \shift
+        vqrshrn.s32     \out1, \in1, \shift
+        vqrshrn.s32     \out2, \in2, \shift
+        vqrshrn.s32     \out3, \in3, \shift
+.endm
+
+.macro multiply in
+        vmull.s16       q10, d4, \in[0]
+        vmull.s16       q11, d4, \in[1]
+        vmull.s16       q12, d4, \in[2]
+        vmull.s16       q13, d4, \in[3]
+.endm
+
+.macro scale_store shift
+        vld1.s16        {q14-q15}, [r4, :128]!
+        butterfly32     q14, q10, q15, q11
+        scale32         d22, d23, d20, d21, q1, q14, q10, q15, \shift
+
+        vld1.s16        {q14-q15}, [r4, :128]!
+        butterfly32     q14, q12, q15, q13
+        scale32         d2, d3, d28, d29, q1, q14, q12, q15, \shift
+        transpose8_4x4  d22, d20, d2, d28
+        transpose8_4x4  d29, d3, d21, d23
+        store16         d22, d23, d20, d21, d2, d3, d28, d29, r8
+
+        @ reload multiplication coefficiens to q1
+        vld1.s16        {q1}, [r9, :128]
+.endm
+
+function tr_block1
+        multiply        d0
+        add_member32    d5,  d0[1], d1[0], d1[3], d2[2], +, +, +, +
+        add_member32    d6,  d0[2], d1[3], d3[0], d3[2], +, +, +, -
+        add_member32    d7,  d0[3], d2[2], d3[2], d1[3], +, +, -, -
+        add_member32    d8,  d1[0], d3[1], d2[1], d0[0], +, +, -, -
+        add_member32    d9,  d1[1], d3[3], d1[0], d1[2], +, -, -, -
+        add_member32    d10, d1[2], d3[0], d0[0], d3[1], +, -, -, -
+        add_member32    d11, d1[3], d2[1], d1[1], d2[3], +, -, -, +
+        add_member32    d12, d2[0], d1[2], d2[2], d1[0], +, -, -, +
+        add_member32    d13, d2[1], d0[3], d3[3], d0[2], +, -, -, +
+        add_member32    d14, d2[2], d0[1], d2[3], d2[1], +, -, +, +
+        add_member32    d15, d2[3], d0[2], d1[2], d3[3], +, -, +, -
+        add_member32    d16, d3[0], d1[1], d0[1], d2[0], +, -, +, -
+        add_member32    d17, d3[1], d2[0], d0[3], d0[1], +, -, +, -
+        add_member32    d18, d3[2], d2[3], d2[0], d1[1], +, -, +, -
+        add_member32    d19, d3[3], d3[2], d3[1], d3[0], +, -, +, -
+        bx              lr
+endfunc
+
+function tr_block2
+        multiply        d1
+        add_member32    d5,  d3[1], d3[3], d3[0], d2[1], +, -, -, -
+        add_member32    d6,  d2[1], d1[0], d0[0], d1[1], -, -, -, -
+        add_member32    d7,  d0[0], d1[2], d3[1], d2[3], -, -, -, +
+        add_member32    d8,  d2[0], d3[2], d1[1], d0[3], -, +, +, +
+        add_member32    d9,  d3[2], d0[3], d1[3], d3[1], +, +, +, -
+        add_member32    d10, d1[1], d1[3], d2[3], d0[0], +, +, -, -
+        add_member32    d11, d0[3], d3[1], d0[1], d3[3], +, -, -, +
+        add_member32    d12, d3[0], d0[2], d3[2], d0[1], +, -, -, +
+        add_member32    d13, d2[2], d2[0], d1[0], d3[2], -, -, +, +
+        add_member32    d14, d0[1], d3[0], d2[0], d0[2], -, +, +, -
+        add_member32    d15, d1[3], d0[1], d2[2], d3[0], -, +, -, -
+        add_member32    d16, d3[3], d2[1], d0[2], d1[0], +, +, -, +
+        add_member32    d17, d1[2], d2[3], d3[3], d2[2], +, -, -, +
+        add_member32    d18, d0[2], d0[1], d0[3], d1[2], +, -, +, -
+        add_member32    d19, d2[3], d2[2], d2[1], d2[0], +, -, +, -
+        bx              lr
+endfunc
+
+function tr_block3
+        multiply        d2
+        add_member32    d5,  d1[2], d0[3], d0[0], d0[2], -, -, -, -
+        add_member32    d6,  d2[2], d3[3], d2[3], d1[2], -, -, +, +
+        add_member32    d7,  d1[0], d0[2], d2[1], d3[3], +, +, +, -
+        add_member32    d8,  d3[0], d2[2], d0[1], d1[3], +, -, -, -
+        add_member32    d9,  d0[2], d2[0], d3[0], d0[0], -, -, +, +
+        add_member32    d10, d3[2], d1[0], d2[0], d2[2], -, +, +, -
+        add_member32    d11, d0[0], d3[2], d0[2], d3[0], +, +, -, -
+        add_member32    d12, d3[3], d0[1], d3[1], d0[3], -, -, +, +
+        add_member32    d13, d0[1], d2[3], d1[3], d1[1], -, +, +, -
+        add_member32    d14, d3[1], d1[3], d0[3], d3[2], +, +, -, +
+        add_member32    d15, d0[3], d1[1], d3[2], d2[0], +, -, +, +
+        add_member32    d16, d2[3], d3[1], d1[2], d0[1], -, -, +, -
+        add_member32    d17, d1[1], d0[0], d1[0], d2[1], -, +, -, +
+        add_member32    d18, d2[1], d3[0], d3[3], d3[1], +, -, +, +
+        add_member32    d19, d1[3], d1[2], d1[1], d1[0], +, -, +, -
+        bx              lr
+endfunc
+
+function tr_block4
+        multiply        d3
+        add_member32    d5,  d1[1], d2[0], d2[3], d3[2], -, -, -, -
+        add_member32    d6,  d0[0], d0[3], d2[0], d3[1], +, +, +, +
+        add_member32    d7,  d2[0], d0[0], d1[1], d3[0], -, -, -, -
+        add_member32    d8,  d3[3], d1[2], d0[2], d2[3], +, +, +, +
+        add_member32    d9,  d2[1], d2[3], d0[0], d2[2], +, -, -, -
+        add_member32    d10, d0[2], d3[3], d0[3], d2[1], -, -, +, +
+        add_member32    d11, d1[0], d2[2], d1[2], d2[0], +, +, -, -
+        add_member32    d12, d2[3], d1[1], d2[1], d1[3], -, -, +, +
+        add_member32    d13, d3[1], d0[1], d3[0], d1[2], -, +, -, -
+        add_member32    d14, d1[2], d1[0], d3[3], d1[1], +, -, +, +
+        add_member32    d15, d0[1], d2[1], d3[1], d1[0], -, +, +, -
+        add_member32    d16, d1[3], d3[2], d2[2], d0[3], +, -, -, +
+        add_member32    d17, d3[2], d3[0], d1[3], d0[2], -, -, +, -
+        add_member32    d18, d2[2], d1[3], d1[0], d0[1], -, +, -, +
+        add_member32    d19, d0[3], d0[2], d0[1], d0[0], +, -, +, -
+        bx              lr
+endfunc
+
+.macro tr_32x4 name, shift
+function func_tr_32x4_\name
+        mov             r10, lr
+        bl              func_tr_16x4_noscale
+
+        load32
+        movrel          r9, trans + 32
+        vld1.s16        {q0}, [r9, :128]!
+        vld1.s16        {q1}, [r9, :128]
+
+        bl              tr_block1
+
+        add             r4, sp, #2048
+        vld1.s16        {q14-q15}, [r4, :128]!
+        butterfly32     q14, q10, q15, q11
+        scale32         d22, d23, d20, d21, q1, q14, q10, q15, \shift
+
+        vld1.s16        {q14-q15}, [r4, :128]!
+        butterfly32     q14, q12, q15, q13
+        scale32         d2, d3, d28, d29, q1, q14, q12, q15, \shift
+
+        transpose8_4x4  d22, d20, d2, d28
+        transpose8_4x4  d29, d3, d21, d23
+        mov             r1, r11
+        mov             r2, #64
+        mov             r8, #-64
+        add             r3, r11, #(56 + 3 * 64)
+        store16         d22, d23, d20, d21, d2, d3, d28, d29, r8
+
+        @ reload multiplication coefficiens to q1
+        vld1.s16        {q1}, [r9, :128]
+
+        bl              tr_block2
+        add             r1, r11, #8
+        add             r3, r11, #(48 + 3 * 64)
+        mov             r2, #64
+        mov             r8, #-64
+        scale_store     \shift
+
+        bl              tr_block3
+        add             r1, r11, #16
+        add             r3, r11, #(40 + 3 * 64)
+        mov             r2, #64
+        mov             r8, #-64
+        scale_store     \shift
+
+        bl              tr_block4
+        add             r1, r11, #24
+        add             r3, r11, #(32 + 3 * 64)
+        mov             r2, #64
+        mov             r8, #-64
+        scale_store     \shift
+
+        bx               r10
+endfunc
+.endm
+
+.macro idct_32x32 bitdepth
+function ff_hevc_idct_32x32_\bitdepth\()_neon, export=1
+@r0 - coeffs
+        push            {r4-r11, lr}
+        vpush           {q4-q7}
+
+        @ Align the stack, allocate a temp buffer
+T       mov             r7,  sp
+T       and             r7,  r7,  #15
+A       and             r7,  sp,  #15
+        add             r7,  r7,  #2432
+        sub             sp,  sp,  r7
+
+.irp i, 0, 1, 2, 3, 4, 5, 6, 7
+        add             r5, r0, #(8 * \i)
+        add             r11, sp, #(8 * \i * 32)
+        bl              func_tr_32x4_firstpass
+.endr
+
+.irp i, 0, 1, 2, 3, 4, 5, 6, 7
+        add             r5, sp, #(8 * \i)
+        add             r11, r0, #(8 * \i * 32)
+        bl              func_tr_32x4_secondpass_\bitdepth
+.endr
+
+        add             sp,  sp,  r7
+        vpop            {q4-q7}
+        pop             {r4-r11, pc}
+endfunc
+.endm
+
+tr_16x4 firstpass, 7, 512, 1
+tr_16x4 secondpass_8, 20 - 8, 512, 1
+tr_16x4 secondpass_10, 20 - 10, 512, 1
+tr_16x4 noscale, 0, 2048, 4
+.ltorg
+tr_32x4 firstpass, 7
+tr_32x4 secondpass_8, 20 - 8
+tr_32x4 secondpass_10, 20 - 10
+.ltorg
+
+idct_4x4 8
+idct_4x4_dc 8
+idct_4x4 10
+idct_4x4_dc 10
+idct_8x8 8
+idct_8x8_dc 8
+idct_8x8 10
+idct_8x8_dc 10
+idct_16x16 8
+idct_16x16_dc 8
+idct_16x16 10
+idct_16x16_dc 10
+idct_32x32 8
+idct_32x32_dc 8
+idct_32x32 10
+idct_32x32_dc 10
+
 /* uses registers q2 - q9 for temp values */
 /* TODO: reorder */
 .macro tr4_luma_shift r0, r1, r2, r3, shift
@@ -225,67 +1015,7 @@
         vqrshrn.s32   \r3, q5, \shift
 .endm
 
-/* uses registers q2 - q6 for temp values */
-.macro tr4 r0, r1, r2, r3
-        vmull.s16  q4, \r1, d0[0]   // 83 * src1
-        vmull.s16  q6, \r1, d0[1]   // 36 * src1
-        vshll.s16  q2, \r0, #6   // 64 * src0
-        vshll.s16  q3, \r2, #6   // 64 * src2
-        vadd.s32   q5, q2, q3    // 64 * (src0 + src2)     e0
-        vsub.s32   q2, q2, q3    // 64 * (src0 - src2)     e1
-        vmlal.s16  q4, \r3, d0[1]   // 83 * src1 + 36 * src3  o0
-        vmlsl.s16  q6, \r3, d0[0]   // 36 * src1 - 83 * src3  o1
-
-        vsub.s32   q3, q5, q4    // e0 - o0
-        vadd.s32   q4, q5, q4    // e0 + o0
-        vadd.s32   q5, q2, q6    // e1 + o1
-        vsub.s32   q6, q2, q6    // e1 - o1
-.endm
-
-.macro tr4_shift r0, r1, r2, r3, shift
-        vmull.s16  q4, \r1, d0[0]   // 83 * src1
-        vmull.s16  q6, \r1, d0[1]   // 36 * src1
-        vshll.s16  q2, \r0, #6   // 64 * src0
-        vshll.s16  q3, \r2, #6   // 64 * src2
-        vadd.s32   q5, q2, q3    // 64 * (src0 + src2)     e0
-        vsub.s32   q2, q2, q3    // 64 * (src0 - src2)     e1
-        vmlal.s16  q4, \r3, d0[1]   // 83 * src1 + 36 * src3  o0
-        vmlsl.s16  q6, \r3, d0[0]   // 36 * src1 - 83 * src3  o1
-
-        vsub.s32   q3, q5, q4    // e0 - o0
-        vadd.s32   q4, q5, q4    // e0 + o0
-        vadd.s32   q5, q2, q6    // e1 + o1
-        vsub.s32   q6, q2, q6    // e1 - o1
-
-        vqrshrn.s32   \r0, q4, \shift
-        vqrshrn.s32   \r1, q5, \shift
-        vqrshrn.s32   \r2, q6, \shift
-        vqrshrn.s32   \r3, q3, \shift
-.endm
-
-function ff_hevc_transform_4x4_neon_8, export=1
-        vpush       {d8-d15}
-        vld1.16     {q14, q15}, [r0]  // coeffs
-        ldr         r3, =0x00240053 // 36 and 83
-        vmov.32     d0[0], r3
-
-        tr4_shift d28, d29, d30, d31, #7
-
-        vtrn.16     d28, d29
-        vtrn.16     d30, d31
-        vtrn.32     q14, q15
-
-        tr4_shift d28, d29, d30, d31, #12
-
-        vtrn.16     d28, d29
-        vtrn.16     d30, d31
-        vtrn.32     q14, q15
-
-        vst1.16     {q14, q15}, [r0]
-        vpop        {d8-d15}
-        bx lr
-endfunc
-
+.ltorg
 function ff_hevc_transform_luma_4x4_neon_8, export=1
         vpush       {d8-d15}
         vld1.16     {q14, q15}, [r0]  // coeffs
@@ -311,155 +1041,3 @@
         vpop        {d8-d15}
         bx lr
 endfunc
-
-.macro tr8_begin in0, in1, in2, in3
-        vmull.s16  q7, \in0, d1[1]   // 89 * src1
-        vmull.s16  q8, \in0, d1[0]   // 75 * src1
-        vmull.s16  q9, \in0, d1[3]   // 50 * src1
-        vmull.s16  q10, \in0, d1[2]  // 18 * src1
-
-        vmlal.s16  q7, \in1, d1[0]   // 75 * src3
-        vmlsl.s16  q8, \in1, d1[2]   //-18 * src3
-        vmlsl.s16  q9, \in1, d1[1]   //-89 * src3
-        vmlsl.s16  q10, \in1, d1[3]  //-50 * src3
-
-        vmlal.s16  q7, \in2, d1[3]   // 50 * src5
-        vmlsl.s16  q8, \in2, d1[1]   //-89 * src5
-        vmlal.s16  q9, \in2, d1[2]   // 18 * src5
-        vmlal.s16  q10, \in2, d1[0]  // 75 * src5
-
-        vmlal.s16  q7, \in3, d1[2]   // 18 * src7
-        vmlsl.s16  q8, \in3, d1[3]   //-50 * src7
-        vmlal.s16  q9, \in3, d1[0]   // 75 * src7
-        vmlsl.s16  q10, \in3, d1[1]  //-89 * src7
-.endm
-
-.macro tr8_end shift
-        vadd.s32   q1, q4, q7   //  e_8[0] + o_8[0], dst[0]
-        vsub.s32   q4, q4, q7   //  e_8[0] - o_8[0], dst[7]
-
-        vadd.s32   q2, q5, q8   // e_8[1] + o_8[1], dst[1]
-        vsub.s32   q5, q5, q8   // e_8[1] - o_8[1], dst[6]
-
-        vadd.s32   q11, q6, q9  // e_8[2] + o_8[2], dst[2]
-        vsub.s32    q6, q6, q9  // e_8[2] - o_8[2], dst[5]
-
-        vadd.s32   q12, q3, q10 // e_8[3] + o_8[3], dst[3]
-        vsub.s32   q3, q3, q10  // e_8[3] - o_8[3], dst[4]
-        vqrshrn.s32   d2, q1, \shift
-        vqrshrn.s32   d3, q2, \shift
-        vqrshrn.s32   d4, q11, \shift
-        vqrshrn.s32   d5, q12, \shift
-        vqrshrn.s32   d6, q3, \shift
-        vqrshrn.s32   d7, q6, \shift
-        vqrshrn.s32   d9, q4, \shift
-        vqrshrn.s32   d8, q5, \shift
-.endm
-
-function ff_hevc_transform_8x8_neon_8, export=1
-        push   {r4-r8}
-        vpush {d8-d15}
-        mov    r5, #16
-
-        adr       r3, tr4f
-        vld1.16   {d0, d1}, [r3]
-
-        // left half
-        vld1.16 {d24}, [r0], r5
-        vld1.16 {d25}, [r0], r5
-        vld1.16 {d26}, [r0], r5
-        vld1.16 {d27}, [r0], r5
-        vld1.16 {d28}, [r0], r5
-        vld1.16 {d29}, [r0], r5
-        vld1.16 {d30}, [r0], r5
-        vld1.16 {d31}, [r0], r5
-        sub      r0, #128
-        tr8_begin d25, d27, d29, d31
-        tr4       d24, d26, d28, d30
-        tr8_end   #7
-        vst1.16 {d2}, [r0], r5
-        vst1.16 {d3}, [r0], r5
-        vst1.16 {d4}, [r0], r5
-        vst1.16 {d5}, [r0], r5
-        vst1.16 {d6}, [r0], r5
-        vst1.16 {d7}, [r0], r5
-        vst1.16 {d8}, [r0], r5
-        vst1.16 {d9}, [r0], r5
-        sub      r0, #128
-        //skip right half if col_limit in r1 is less than 4
-        cmp      r1, #4
-        blt      1f
-        //right half
-        add      r0, #8
-        vld1.16 {d24}, [r0], r5
-        vld1.16 {d25}, [r0], r5
-        vld1.16 {d26}, [r0], r5
-        vld1.16 {d27}, [r0], r5
-        vld1.16 {d28}, [r0], r5
-        vld1.16 {d29}, [r0], r5
-        vld1.16 {d30}, [r0], r5
-        vld1.16 {d31}, [r0], r5
-        sub      r0, #128
-        tr8_begin d25, d27, d29, d31
-        tr4       d24, d26, d28, d30
-        tr8_end   #7
-        vst1.16 {d2}, [r0], r5
-        vst1.16 {d3}, [r0], r5
-        vst1.16 {d4}, [r0], r5
-        vst1.16 {d5}, [r0], r5
-        vst1.16 {d6}, [r0], r5
-        vst1.16 {d7}, [r0], r5
-        vst1.16 {d8}, [r0], r5
-        vst1.16 {d9}, [r0], r5
-        sub      r0, #136
-1:
-        // top half
-        vldm r0, {q12-q15} // coeffs
-        transpose_16b_4x4 d24, d26, d28, d30
-        transpose_16b_4x4 d25, d27, d29, d31
-        tr8_begin d26, d30, d27, d31
-        tr4 d24, d28, d25, d29
-        tr8_end #12
-        transpose_16b_4x4 d2, d3, d4, d5
-        transpose_16b_4x4 d6, d7, d8, d9
-        vswp     d7, d5
-        vswp     d7, d8
-        vswp     d3, d6
-        vswp     d6, d4
-        vstm r0!, {q1-q4}
-
-        // bottom half
-        vldm r0, {q12-q15} // coeffs
-        transpose_16b_4x4 d24, d26, d28, d30
-        transpose_16b_4x4 d25, d27, d29, d31
-        tr8_begin d26, d30, d27, d31
-        tr4 d24, d28, d25, d29
-        tr8_end #12
-        transpose_16b_4x4 d2, d3, d4, d5
-        transpose_16b_4x4 d6, d7, d8, d9
-        vswp     d7, d5
-        vswp     d7, d8
-        vswp     d3, d6
-        vswp     d6, d4
-        //vstm     r0, {q1-q4}
-        vst1.16 {q1-q2}, [r0]
-        add     r0, #32
-        vst1.16 {q3-q4}, [r0]
-        sub     r0, #32
-        vpop {d8-d15}
-        pop {r4-r8}
-        bx lr
-endfunc
-
-.align 4
-tr4f:
-.word 0x00240053  // 36 and d1[0] = 83
-.word 0x00000000
-tr8f:
-.word 0x0059004b  // 89, d0[0] = 75
-.word 0x00320012  // 50, d0[2] = 18
-tr16:
-.word 0x005a0057  // 90, d2[0] = 87
-.word 0x00500046  // 80, d2[2] = 70
-.word 0x0039002b  // 57, d2[0] = 43
-.word 0x00190009  // 25, d2[2] = 9

diff --git a/libavcodec/arm/hevcdsp_init_arm.c b/libavcodec/arm/hevcdsp_init_arm.c
index adcc454..e8fa1f7 100644
--- a/libavcodec/arm/hevcdsp_init_arm.c
+++ b/libavcodec/arm/hevcdsp_init_arm.c

@@ -19,14 +19,16 @@
  */
 
 #include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
 #include "libavutil/arm/cpu.h"
+
 #include "libavcodec/hevcdsp.h"
 #include "hevcdsp_arm.h"
 
-av_cold void ff_hevcdsp_init_arm(HEVCDSPContext *c, const int bit_depth)
+av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, const int bit_depth)
 {
     int cpu_flags = av_get_cpu_flags();
 
     if (have_neon(cpu_flags))
-        ff_hevcdsp_init_neon(c, bit_depth);
+        ff_hevc_dsp_init_neon(c, bit_depth);
 }

diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c
index 1a3912c..201a088 100644
--- a/libavcodec/arm/hevcdsp_init_neon.c
+++ b/libavcodec/arm/hevcdsp_init_neon.c

@@ -21,27 +21,53 @@
 #include "libavutil/attributes.h"
 #include "libavutil/arm/cpu.h"
 #include "libavcodec/hevcdsp.h"
+#include "libavcodec/avcodec.h"
 #include "hevcdsp_arm.h"
 
+void ff_hevc_sao_band_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src,
+                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
+                                  int16_t *sao_offset_val, int sao_left_class,
+                                  int width, int height);
+void ff_hevc_sao_edge_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
+                                  int eo, int width, int height);
+
 void ff_hevc_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
 void ff_hevc_h_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
 void ff_hevc_v_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
 void ff_hevc_h_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
-void ff_hevc_transform_4x4_neon_8(int16_t *coeffs, int col_limit);
-void ff_hevc_transform_8x8_neon_8(int16_t *coeffs, int col_limit);
-void ff_hevc_idct_4x4_dc_neon_8(int16_t *coeffs);
-void ff_hevc_idct_8x8_dc_neon_8(int16_t *coeffs);
-void ff_hevc_idct_16x16_dc_neon_8(int16_t *coeffs);
-void ff_hevc_idct_32x32_dc_neon_8(int16_t *coeffs);
+void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, int16_t *coeffs,
+                                     ptrdiff_t stride);
+void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, int16_t *coeffs,
+                                      ptrdiff_t stride);
+void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, int16_t *coeffs,
+                                     ptrdiff_t stride);
+void ff_hevc_add_residual_8x8_10_neon(uint8_t *_dst, int16_t *coeffs,
+                                      ptrdiff_t stride);
+void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, int16_t *coeffs,
+                                       ptrdiff_t stride);
+void ff_hevc_add_residual_16x16_10_neon(uint8_t *_dst, int16_t *coeffs,
+                                        ptrdiff_t stride);
+void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, int16_t *coeffs,
+                                       ptrdiff_t stride);
+void ff_hevc_add_residual_32x32_10_neon(uint8_t *_dst, int16_t *coeffs,
+                                        ptrdiff_t stride);
+void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
+void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
+void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
+void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
+void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs);
+void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_32x32_8_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
+void ff_hevc_idct_32x32_10_neon(int16_t *coeffs, int col_limit);
 void ff_hevc_transform_luma_4x4_neon_8(int16_t *coeffs);
-void ff_hevc_add_residual_4x4_neon_8(uint8_t *_dst, int16_t *coeffs,
-                                     ptrdiff_t stride);
-void ff_hevc_add_residual_8x8_neon_8(uint8_t *_dst, int16_t *coeffs,
-                                     ptrdiff_t stride);
-void ff_hevc_add_residual_16x16_neon_8(uint8_t *_dst, int16_t *coeffs,
-                                       ptrdiff_t stride);
-void ff_hevc_add_residual_32x32_neon_8(uint8_t *_dst, int16_t *coeffs,
-                                       ptrdiff_t stride);
 
 #define PUT_PIXELS(name) \
     void name(int16_t *dst, uint8_t *src, \
@@ -124,6 +150,47 @@
 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h3v3_neon_8);
 #undef QPEL_FUNC_UW
 
+void ff_hevc_sao_band_filter_neon_8(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int width, int height, int16_t *offset_table);
+
+void ff_hevc_sao_band_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src,
+                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
+                                  int16_t *sao_offset_val, int sao_left_class,
+                                  int width, int height) {
+    uint8_t *dst = _dst;
+    uint8_t *src = _src;
+    int16_t offset_table[32] = {0};
+    int k;
+
+    for (k = 0; k < 4; k++) {
+        offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
+    }
+
+    ff_hevc_sao_band_filter_neon_8(dst, src, stride_dst, stride_src, width, height, offset_table);
+}
+
+void ff_hevc_sao_edge_filter_neon_8(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int width, int height,
+                                    int a_stride, int b_stride, int16_t *sao_offset_val, uint8_t *edge_idx);
+
+void ff_hevc_sao_edge_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
+                                  int eo, int width, int height) {
+    static uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
+    static const int8_t pos[4][2][2] = {
+        { { -1,  0 }, {  1, 0 } }, // horizontal
+        { {  0, -1 }, {  0, 1 } }, // vertical
+        { { -1, -1 }, {  1, 1 } }, // 45 degree
+        { {  1, -1 }, { -1, 1 } }, // 135 degree
+    };
+    uint8_t *dst = _dst;
+    uint8_t *src = _src;
+    int a_stride, b_stride;
+    ptrdiff_t stride_src = (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE);
+
+    a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src;
+    b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src;
+
+    ff_hevc_sao_edge_filter_neon_8(dst, src, stride_dst, stride_src, width, height, a_stride, b_stride, sao_offset_val, edge_idx);
+}
+
 void ff_hevc_put_qpel_neon_wrapper(int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
                                    int height, intptr_t mx, intptr_t my, int width) {
 
@@ -142,7 +209,7 @@
     put_hevc_qpel_uw_neon[my][mx](dst, dststride, src, srcstride, width, height, src2, MAX_PB_SIZE);
 }
 
-av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth)
+av_cold void ff_hevc_dsp_init_neon(HEVCDSPContext *c, const int bit_depth)
 {
     if (bit_depth == 8) {
         int x;
@@ -150,16 +217,28 @@
         c->hevc_h_loop_filter_luma     = ff_hevc_h_loop_filter_luma_neon;
         c->hevc_v_loop_filter_chroma   = ff_hevc_v_loop_filter_chroma_neon;
         c->hevc_h_loop_filter_chroma   = ff_hevc_h_loop_filter_chroma_neon;
-        c->idct[0]                     = ff_hevc_transform_4x4_neon_8;
-        c->idct[1]                     = ff_hevc_transform_8x8_neon_8;
-        c->idct_dc[0]                  = ff_hevc_idct_4x4_dc_neon_8;
-        c->idct_dc[1]                  = ff_hevc_idct_8x8_dc_neon_8;
-        c->idct_dc[2]                  = ff_hevc_idct_16x16_dc_neon_8;
-        c->idct_dc[3]                  = ff_hevc_idct_32x32_dc_neon_8;
-        c->add_residual[0]             = ff_hevc_add_residual_4x4_neon_8;
-        c->add_residual[1]             = ff_hevc_add_residual_8x8_neon_8;
-        c->add_residual[2]             = ff_hevc_add_residual_16x16_neon_8;
-        c->add_residual[3]             = ff_hevc_add_residual_32x32_neon_8;
+        c->sao_band_filter[0]          = ff_hevc_sao_band_filter_neon_8_wrapper;
+        c->sao_band_filter[1]          = ff_hevc_sao_band_filter_neon_8_wrapper;
+        c->sao_band_filter[2]          = ff_hevc_sao_band_filter_neon_8_wrapper;
+        c->sao_band_filter[3]          = ff_hevc_sao_band_filter_neon_8_wrapper;
+        c->sao_band_filter[4]          = ff_hevc_sao_band_filter_neon_8_wrapper;
+        c->sao_edge_filter[0]          = ff_hevc_sao_edge_filter_neon_8_wrapper;
+        c->sao_edge_filter[1]          = ff_hevc_sao_edge_filter_neon_8_wrapper;
+        c->sao_edge_filter[2]          = ff_hevc_sao_edge_filter_neon_8_wrapper;
+        c->sao_edge_filter[3]          = ff_hevc_sao_edge_filter_neon_8_wrapper;
+        c->sao_edge_filter[4]          = ff_hevc_sao_edge_filter_neon_8_wrapper;
+        c->add_residual[0]             = ff_hevc_add_residual_4x4_8_neon;
+        c->add_residual[1]             = ff_hevc_add_residual_8x8_8_neon;
+        c->add_residual[2]             = ff_hevc_add_residual_16x16_8_neon;
+        c->add_residual[3]             = ff_hevc_add_residual_32x32_8_neon;
+        c->idct_dc[0]                  = ff_hevc_idct_4x4_dc_8_neon;
+        c->idct_dc[1]                  = ff_hevc_idct_8x8_dc_8_neon;
+        c->idct_dc[2]                  = ff_hevc_idct_16x16_dc_8_neon;
+        c->idct_dc[3]                  = ff_hevc_idct_32x32_dc_8_neon;
+        c->idct[0]                     = ff_hevc_idct_4x4_8_neon;
+        c->idct[1]                     = ff_hevc_idct_8x8_8_neon;
+        c->idct[2]                     = ff_hevc_idct_16x16_8_neon;
+        c->idct[3]                     = ff_hevc_idct_32x32_8_neon;
         c->transform_4x4_luma          = ff_hevc_transform_luma_4x4_neon_8;
         put_hevc_qpel_neon[1][0]       = ff_hevc_put_qpel_v1_neon_8;
         put_hevc_qpel_neon[2][0]       = ff_hevc_put_qpel_v2_neon_8;
@@ -221,4 +300,21 @@
         c->put_hevc_qpel_uni[8][0][0]  = ff_hevc_put_qpel_uw_pixels_w48_neon_8;
         c->put_hevc_qpel_uni[9][0][0]  = ff_hevc_put_qpel_uw_pixels_w64_neon_8;
     }
+
+    if (bit_depth == 10) {
+        c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon;
+        c->add_residual[1] = ff_hevc_add_residual_8x8_10_neon;
+        c->add_residual[2] = ff_hevc_add_residual_16x16_10_neon;
+        c->add_residual[3] = ff_hevc_add_residual_32x32_10_neon;
+
+        c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_neon;
+        c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_neon;
+        c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_neon;
+        c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_neon;
+
+        c->idct[0] = ff_hevc_idct_4x4_10_neon;
+        c->idct[1] = ff_hevc_idct_8x8_10_neon;
+        c->idct[2] = ff_hevc_idct_16x16_10_neon;
+        c->idct[3] = ff_hevc_idct_32x32_10_neon;
+    }
 }

diff --git a/libavcodec/arm/hevcdsp_qpel_neon.S b/libavcodec/arm/hevcdsp_qpel_neon.S
index 86f92cf..caa6efa 100644
--- a/libavcodec/arm/hevcdsp_qpel_neon.S
+++ b/libavcodec/arm/hevcdsp_qpel_neon.S

@@ -667,76 +667,76 @@
 
 
 function ff_hevc_put_qpel_h1v1_neon_8, export=1
-        hevc_put_qpel_hXvY_neon_8 qpel_filter_1 qpel_filter_1_32b
+        hevc_put_qpel_hXvY_neon_8 qpel_filter_1, qpel_filter_1_32b
 endfunc
 
 function ff_hevc_put_qpel_h2v1_neon_8, export=1
-        hevc_put_qpel_hXvY_neon_8 qpel_filter_2 qpel_filter_1_32b
+        hevc_put_qpel_hXvY_neon_8 qpel_filter_2, qpel_filter_1_32b
 endfunc
 
 function ff_hevc_put_qpel_h3v1_neon_8, export=1
-        hevc_put_qpel_hXvY_neon_8 qpel_filter_3 qpel_filter_1_32b
+        hevc_put_qpel_hXvY_neon_8 qpel_filter_3, qpel_filter_1_32b
 endfunc
 
 function ff_hevc_put_qpel_h1v2_neon_8, export=1
-        hevc_put_qpel_hXvY_neon_8 qpel_filter_1 qpel_filter_2_32b
+        hevc_put_qpel_hXvY_neon_8 qpel_filter_1, qpel_filter_2_32b
 endfunc
 
 function ff_hevc_put_qpel_h2v2_neon_8, export=1
-        hevc_put_qpel_hXvY_neon_8 qpel_filter_2 qpel_filter_2_32b
+        hevc_put_qpel_hXvY_neon_8 qpel_filter_2, qpel_filter_2_32b
 endfunc
 
 function ff_hevc_put_qpel_h3v2_neon_8, export=1
-        hevc_put_qpel_hXvY_neon_8 qpel_filter_3 qpel_filter_2_32b
+        hevc_put_qpel_hXvY_neon_8 qpel_filter_3, qpel_filter_2_32b
 endfunc
 
 function ff_hevc_put_qpel_h1v3_neon_8, export=1
-        hevc_put_qpel_hXvY_neon_8 qpel_filter_1 qpel_filter_3_32b
+        hevc_put_qpel_hXvY_neon_8 qpel_filter_1, qpel_filter_3_32b
 endfunc
 
 function ff_hevc_put_qpel_h2v3_neon_8, export=1
-        hevc_put_qpel_hXvY_neon_8 qpel_filter_2 qpel_filter_3_32b
+        hevc_put_qpel_hXvY_neon_8 qpel_filter_2, qpel_filter_3_32b
 endfunc
 
 function ff_hevc_put_qpel_h3v3_neon_8, export=1
-        hevc_put_qpel_hXvY_neon_8 qpel_filter_3 qpel_filter_3_32b
+        hevc_put_qpel_hXvY_neon_8 qpel_filter_3, qpel_filter_3_32b
 endfunc
 
 
 function ff_hevc_put_qpel_uw_h1v1_neon_8, export=1
-        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_1 qpel_filter_1_32b
+        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_1, qpel_filter_1_32b
 endfunc
 
 function ff_hevc_put_qpel_uw_h2v1_neon_8, export=1
-        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_2 qpel_filter_1_32b
+        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_2, qpel_filter_1_32b
 endfunc
 
 function ff_hevc_put_qpel_uw_h3v1_neon_8, export=1
-        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_3 qpel_filter_1_32b
+        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_3, qpel_filter_1_32b
 endfunc
 
 function ff_hevc_put_qpel_uw_h1v2_neon_8, export=1
-        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_1 qpel_filter_2_32b
+        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_1, qpel_filter_2_32b
 endfunc
 
 function ff_hevc_put_qpel_uw_h2v2_neon_8, export=1
-        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_2 qpel_filter_2_32b
+        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_2, qpel_filter_2_32b
 endfunc
 
 function ff_hevc_put_qpel_uw_h3v2_neon_8, export=1
-        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_3 qpel_filter_2_32b
+        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_3, qpel_filter_2_32b
 endfunc
 
 function ff_hevc_put_qpel_uw_h1v3_neon_8, export=1
-        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_1 qpel_filter_3_32b
+        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_1, qpel_filter_3_32b
 endfunc
 
 function ff_hevc_put_qpel_uw_h2v3_neon_8, export=1
-        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_2 qpel_filter_3_32b
+        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_2, qpel_filter_3_32b
 endfunc
 
 function ff_hevc_put_qpel_uw_h3v3_neon_8, export=1
-        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_3 qpel_filter_3_32b
+        hevc_put_qpel_uw_hXvY_neon_8 qpel_filter_3, qpel_filter_3_32b
 endfunc
 
 .macro init_put_pixels

diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S
new file mode 100644
index 0000000..3471679
--- /dev/null
+++ b/libavcodec/arm/hevcdsp_sao_neon.S

@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2017 Meng Wang <wangmeng.kids@bytedance.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#include "libavutil/arm/asm.S"
+#include "neon.S"
+
+function ff_hevc_sao_band_filter_neon_8, export=1
+        push    {r4-r10}
+        ldr     r5,  [sp, #28]   // width
+        ldr     r4,  [sp, #32]   // height
+        ldr     r8,  [sp, #36]   // offset_table
+        vpush   {d8-d15}
+        mov     r12,  r4         // r12 = height
+        mov     r6,   r0         // r6 = r0 = dst
+        mov     r7,   r1         // r7 = r1 = src
+        vldm    r8,   {q0-q3}
+        vmov.u16    q15,  #1
+        vmov.u8     q14,  #32
+0:      pld      [r1]
+        vld1.8   {d16},  [r1], r3
+        cmp      r5,    #4
+        beq      4f
+8:      subs     r4,    #1
+        vshr.u8  d17,   d16,  #3   // index = [src>>3]
+        vshll.u8 q9,    d17,  #1   // lowIndex = 2*index
+        vadd.u16 q11,   q9,   q15  // highIndex = (2*index+1) << 8
+        vshl.u16 q10,   q11,  #8   // q10: highIndex;  q9: lowIndex;
+        vadd.u16 q10,   q9         // combine high and low index;
+        // Look-up Table Round 1; index range: 0-15
+        vtbx.8   d24,   {q0-q1},   d20
+        vtbx.8   d25,   {q0-q1},   d21
+        // Look-up Table Round 2; index range: 16-31
+        vsub.u8  q10,   q14        // Look-up with 8bit
+        vtbx.8   d24,   {q2-q3},   d20
+        vtbx.8   d25,   {q2-q3},   d21
+        vaddw.u8 q13,   q12,       d16
+        vqmovun.s16      d8,         q13
+        vst1.8    d8,   [r0],      r2
+        vld1.8   {d16}, [r1],      r3
+        bne      8b
+        subs     r5,    #8
+        beq      99f
+        mov      r4,    r12
+        add r6, #8
+        mov r0, r6
+        add r7, #8
+        mov r1, r7
+        b        0b
+4:      subs     r4,    #1
+        vshr.u8  d17,   d16,  #3  // src>>3
+        vshll.u8 q9,    d17,  #1   // lowIndex = 2*index
+        vadd.u16 q11,   q9,   q15  // highIndex = (2*index+1) << 8
+        vshl.u16 q10,   q11,  #8   // q10: highIndex;  q9: lowIndex;
+        vadd.u16 q10,   q9         // combine high and low index;
+        // Look-up Table Round 1; index range: 0-15
+        vtbx.8   d24,   {q0-q1},   d20
+        vtbx.8   d25,   {q0-q1},   d21
+        // Look-up Table Round 2; index range: 16-32
+        vsub.u8  q10,   q14        // Look-up with 8bit
+        vtbx.8   d24,   {q2-q3},   d20
+        vtbx.8   d25,   {q2-q3},   d21
+        vaddw.u8 q13,   q12,       d16
+        vqmovun.s16     d14,       q13
+        vst1.32   d14[0],    [r0],     r2
+        vld1.32   {d16[0]},  [r1],     r3
+        bne      4b
+        b        99f
+99:
+        vpop {d8-d15}
+        pop  {r4-r10}
+        bx   lr
+endfunc
+
+function ff_hevc_sao_edge_filter_neon_8, export=1
+        push    {r4-r11}
+        ldr     r5,  [sp, #32]   // width
+        ldr     r4,  [sp, #36]   // height
+        ldr     r8,  [sp, #40]   // a_stride
+        ldr     r9,  [sp, #44]   // b_stride
+        ldr     r10, [sp, #48]   // sao_offset_val
+        ldr     r11, [sp, #52]   // edge_idx
+        vpush   {d8-d15}
+        mov     r12,  r4         // r12 = height
+        mov     r6,   r0         // r6 = r0 = dst
+        mov     r7,   r1         // r7 = r1 = src
+        vld1.8  {d0}, [r11]      // edge_idx tabel load in d0 5x8bit
+        vld1.16 {q1}, [r10]      // sao_offset_val table load in q1, 5x16bit
+        vmov.u8  d1,  #2
+        vmov.u16 q2,  #1
+0:      mov      r10,    r1
+        add      r10,    r8           // src[x + a_stride]
+        mov      r11,    r1
+        add      r11,    r9           // src[x + b_stride]
+        pld      [r1]
+        vld1.8   {d16},  [r1],  r3    // src[x]  8x8bit
+        vld1.8   {d17},  [r10], r3    // src[x + a_stride]
+        vld1.8   {d18},  [r11], r3    // src[x + b_stride]
+        cmp      r5,     #4
+        beq      4f
+8:      subs     r4,     #1
+        vcgt.u8  d8,     d16,   d17
+        vshr.u8  d9,     d8,    #7
+        vclt.u8  d8,     d16,   d17
+        vadd.u8  d8,     d9           // diff0
+        vcgt.u8  d10,    d16,   d18
+        vshr.u8  d11,    d10,   #7
+        vclt.u8  d10,    d16,   d18
+        vadd.u8  d10,    d11          // diff1
+        vadd.s8  d8,     d10
+        vadd.s8  d8,     d1
+        vtbx.8   d9,     {d0},  d8    // offset_val
+        vshll.u8 q6,     d9,    #1    // lowIndex
+        vadd.u16 q7,     q6,    q2
+        vshl.u16 q10,    q7,    #8    // highIndex
+        vadd.u16 q10,    q6           // combine lowIndex and highIndex, offset_val
+        vtbx.8   d22,    {q1},  d20
+        vtbx.8   d23,    {q1},  d21
+        vaddw.u8 q12,    q11,   d16
+        vqmovun.s16      d26,   q12
+        vst1.8   d26,    [r0],  r2
+        vld1.8   {d16},  [r1],  r3    // src[x]  8x8bit
+        vld1.8   {d17},  [r10], r3    // src[x + a_stride]
+        vld1.8   {d18},  [r11], r3    // src[x + b_stride]
+        bne      8b
+        subs     r5,     #8
+        beq      99f
+        mov      r4,     r12
+        add      r6,     #8
+        mov      r0,     r6
+        add      r7,     #8
+        mov      r1,     r7
+        b        0b
+4:      subs     r4,    #1
+        vcgt.u8  d8,     d16,   d17
+        vshr.u8  d9,     d8,    #7
+        vclt.u8  d8,     d16,   d17
+        vadd.u8  d8,     d9           // diff0
+        vcgt.u8  d10,    d16,   d18
+        vshr.u8  d11,    d10,   #7
+        vclt.u8  d10,    d16,   d18
+        vadd.u8  d10,    d11          // diff1
+        vadd.s8  d8,     d10
+        vadd.s8  d8,     d1
+        vtbx.8   d9,     {d0},  d8    // offset_val
+        vshll.u8 q6,     d9,    #1    // lowIndex
+        vadd.u16 q7,     q6,    q2
+        vshl.u16 q10,    q7,    #8    // highIndex
+        vadd.u16 q10,    q6           // combine lowIndex and highIndex, offset_val
+        vtbx.8   d22,    {q1},  d20
+        vtbx.8   d23,    {q1},  d21
+        vaddw.u8 q12,    q11,   d16
+        vqmovun.s16      d26,   q12
+        vst1.32  d26[0], [r0],  r2
+        vld1.32   {d16[0]},  [r1],  r3
+        vld1.32   {d17[0]},  [r10], r3    // src[x + a_stride]
+        vld1.32   {d18[0]},  [r11], r3    // src[x + b_stride]
+        bne      4b
+        b        99f
+99:
+        vpop {d8-d15}
+        pop  {r4-r11}
+        bx   lr
+endfunc

diff --git a/libavcodec/arm/sbcdsp_armv6.S b/libavcodec/arm/sbcdsp_armv6.S
new file mode 100644
index 0000000..f1ff845
--- /dev/null
+++ b/libavcodec/arm/sbcdsp_armv6.S

@@ -0,0 +1,245 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline.
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_sbc_analyze_4_armv6, export=1
+        @ r0 = in, r1 = out, r2 = consts
+        push            {r1, r3-r7, lr}
+        push            {r8-r12, r14}
+        ldrd            r4,  r5,  [r0, #0]
+        ldrd            r6,  r7,  [r2, #0]
+        ldrd            r8,  r9,  [r0, #16]
+        ldrd            r10, r11, [r2, #16]
+        mov             r14, #0x8000
+        smlad           r3,  r4,  r6,  r14
+        smlad           r12, r5,  r7,  r14
+        ldrd            r4,  r5,  [r0, #32]
+        ldrd            r6,  r7,  [r2, #32]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #48]
+        ldrd            r10, r11, [r2, #48]
+        smlad           r3,  r4,  r6,  r3
+        smlad           r12, r5,  r7,  r12
+        ldrd            r4,  r5,  [r0, #64]
+        ldrd            r6,  r7,  [r2, #64]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #8]
+        ldrd            r10, r11, [r2, #8]
+        smlad           r3,  r4,  r6,  r3        @ t1[0] is done
+        smlad           r12, r5,  r7,  r12       @ t1[1] is done
+        ldrd            r4,  r5,  [r0, #24]
+        ldrd            r6,  r7,  [r2, #24]
+        pkhtb           r3,  r12, r3, asr #16    @ combine t1[0] and t1[1]
+        smlad           r12, r8,  r10, r14
+        smlad           r14, r9,  r11, r14
+        ldrd            r8,  r9,  [r0, #40]
+        ldrd            r10, r11, [r2, #40]
+        smlad           r12, r4,  r6,  r12
+        smlad           r14, r5,  r7,  r14
+        ldrd            r4,  r5,  [r0, #56]
+        ldrd            r6,  r7,  [r2, #56]
+        smlad           r12, r8,  r10, r12
+        smlad           r14, r9,  r11, r14
+        ldrd            r8,  r9,  [r0, #72]
+        ldrd            r10, r11, [r2, #72]
+        smlad           r12, r4,  r6,  r12
+        smlad           r14, r5,  r7,  r14
+        ldrd            r4,  r5,  [r2, #80]      @ start loading cos table
+        smlad           r12, r8,  r10, r12       @ t1[2] is done
+        smlad           r14, r9,  r11, r14       @ t1[3] is done
+        ldrd            r6,  r7,  [r2, #88]
+        ldrd            r8,  r9,  [r2, #96]
+        ldrd            r10, r11, [r2, #104]     @ cos table fully loaded
+        pkhtb           r12, r14, r12, asr #16   @ combine t1[2] and t1[3]
+        smuad           r4,  r3,  r4
+        smuad           r5,  r3,  r5
+        smlad           r4,  r12, r8,  r4
+        smlad           r5,  r12, r9,  r5
+        smuad           r6,  r3,  r6
+        smuad           r7,  r3,  r7
+        smlad           r6,  r12, r10, r6
+        smlad           r7,  r12, r11, r7
+        pop             {r8-r12, r14}
+        stmia           r1, {r4, r5, r6, r7}
+        pop             {r1, r3-r7, pc}
+endfunc
+
+function ff_sbc_analyze_8_armv6, export=1
+        @ r0 = in, r1 = out, r2 = consts
+        push            {r1, r3-r7, lr}
+        push            {r8-r12, r14}
+        ldrd            r4,  r5,  [r0, #24]
+        ldrd            r6,  r7,  [r2, #24]
+        ldrd            r8,  r9,  [r0, #56]
+        ldrd            r10, r11, [r2, #56]
+        mov             r14, #0x8000
+        smlad           r3,  r4,  r6,  r14
+        smlad           r12, r5,  r7,  r14
+        ldrd            r4,  r5,  [r0, #88]
+        ldrd            r6,  r7,  [r2, #88]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #120]
+        ldrd            r10, r11, [r2, #120]
+        smlad           r3,  r4,  r6,  r3
+        smlad           r12, r5,  r7,  r12
+        ldrd            r4,  r5,  [r0, #152]
+        ldrd            r6,  r7,  [r2, #152]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #16]
+        ldrd            r10, r11, [r2, #16]
+        smlad           r3,  r4,  r6,  r3        @ t1[6] is done
+        smlad           r12, r5,  r7,  r12       @ t1[7] is done
+        ldrd            r4,  r5,  [r0, #48]
+        ldrd            r6,  r7,  [r2, #48]
+        pkhtb           r3,  r12, r3, asr #16    @ combine t1[6] and t1[7]
+        str             r3,  [sp, #-4]!          @ save to stack
+        smlad           r3,  r8,  r10, r14
+        smlad           r12, r9,  r11, r14
+        ldrd            r8,  r9,  [r0, #80]
+        ldrd            r10, r11, [r2, #80]
+        smlad           r3,  r4,  r6,  r3
+        smlad           r12, r5,  r7,  r12
+        ldrd            r4,  r5,  [r0, #112]
+        ldrd            r6,  r7,  [r2, #112]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #144]
+        ldrd            r10, r11, [r2, #144]
+        smlad           r3,  r4,  r6,  r3
+        smlad           r12, r5,  r7,  r12
+        ldrd            r4,  r5,  [r0, #0]
+        ldrd            r6,  r7,  [r2, #0]
+        smlad           r3,  r8,  r10, r3        @ t1[4] is done
+        smlad           r12, r9,  r11, r12       @ t1[5] is done
+        ldrd            r8,  r9,  [r0, #32]
+        ldrd            r10, r11, [r2, #32]
+        pkhtb           r3,  r12, r3, asr #16    @ combine t1[4] and t1[5]
+        str             r3,  [sp, #-4]!          @ save to stack
+        smlad           r3,  r4,  r6,  r14
+        smlad           r12, r5,  r7,  r14
+        ldrd            r4,  r5,  [r0, #64]
+        ldrd            r6,  r7,  [r2, #64]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #96]
+        ldrd            r10, r11, [r2, #96]
+        smlad           r3,  r4,  r6,  r3
+        smlad           r12, r5,  r7,  r12
+        ldrd            r4,  r5,  [r0, #128]
+        ldrd            r6,  r7,  [r2, #128]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #8]
+        ldrd            r10, r11, [r2, #8]
+        smlad           r3,  r4,  r6,  r3        @ t1[0] is done
+        smlad           r12, r5,  r7,  r12       @ t1[1] is done
+        ldrd            r4,  r5,  [r0, #40]
+        ldrd            r6,  r7,  [r2, #40]
+        pkhtb           r3,  r12, r3, asr #16    @ combine t1[0] and t1[1]
+        smlad           r12, r8,  r10, r14
+        smlad           r14, r9,  r11, r14
+        ldrd            r8,  r9,  [r0, #72]
+        ldrd            r10, r11, [r2, #72]
+        smlad           r12, r4,  r6,  r12
+        smlad           r14, r5,  r7,  r14
+        ldrd            r4,  r5,  [r0, #104]
+        ldrd            r6,  r7,  [r2, #104]
+        smlad           r12, r8,  r10, r12
+        smlad           r14, r9,  r11, r14
+        ldrd            r8,  r9,  [r0, #136]
+        ldrd            r10, r11, [r2, #136]!
+        smlad           r12, r4,  r6,  r12
+        smlad           r14, r5,  r7,  r14
+        ldrd            r4,  r5,  [r2, #(160 - 136 + 0)]
+        smlad           r12, r8,  r10, r12       @ t1[2] is done
+        smlad           r14, r9,  r11, r14       @ t1[3] is done
+        ldrd            r6,  r7,  [r2, #(160 - 136 + 8)]
+        smuad           r4,  r3,  r4
+        smuad           r5,  r3,  r5
+        pkhtb           r12, r14, r12, asr #16   @ combine t1[2] and t1[3]
+                                                 @ r3  = t2[0:1]
+                                                 @ r12 = t2[2:3]
+        pop             {r0, r14}                @ t2[4:5], t2[6:7]
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 32)]
+        smuad           r6,  r3,  r6
+        smuad           r7,  r3,  r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 40)]
+        smlad           r4,  r12, r8,  r4
+        smlad           r5,  r12, r9,  r5
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 64)]
+        smlad           r6,  r12, r10, r6
+        smlad           r7,  r12, r11, r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 72)]
+        smlad           r4,  r0,  r8,  r4
+        smlad           r5,  r0,  r9,  r5
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 96)]
+        smlad           r6,  r0,  r10, r6
+        smlad           r7,  r0,  r11, r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 104)]
+        smlad           r4,  r14, r8,  r4
+        smlad           r5,  r14, r9,  r5
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 0)]
+        smlad           r6,  r14, r10, r6
+        smlad           r7,  r14, r11, r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 8)]
+        stmia           r1!, {r4, r5}
+        smuad           r4,  r3,  r8
+        smuad           r5,  r3,  r9
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 32)]
+        stmia           r1!, {r6, r7}
+        smuad           r6,  r3,  r10
+        smuad           r7,  r3,  r11
+        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 40)]
+        smlad           r4,  r12, r8,  r4
+        smlad           r5,  r12, r9,  r5
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 64)]
+        smlad           r6,  r12, r10, r6
+        smlad           r7,  r12, r11, r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 72)]
+        smlad           r4,  r0,  r8,  r4
+        smlad           r5,  r0,  r9,  r5
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 96)]
+        smlad           r6,  r0,  r10, r6
+        smlad           r7,  r0,  r11, r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 104)]
+        smlad           r4,  r14, r8,  r4
+        smlad           r5,  r14, r9,  r5
+        smlad           r6,  r14, r10, r6
+        smlad           r7,  r14, r11, r7
+        pop             {r8-r12, r14}
+        stmia           r1!, {r4, r5, r6, r7}
+        pop             {r1, r3-r7, pc}
+endfunc

diff --git a/libavcodec/arm/sbcdsp_init_arm.c b/libavcodec/arm/sbcdsp_init_arm.c
new file mode 100644
index 0000000..6bf7e72
--- /dev/null
+++ b/libavcodec/arm/sbcdsp_init_arm.c

@@ -0,0 +1,105 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC ARMv6 optimization for some basic "building bricks"
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/sbcdsp.h"
+
+void ff_sbc_analyze_4_armv6(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_analyze_8_armv6(const int16_t *in, int32_t *out, const int16_t *consts);
+
+void ff_sbc_analyze_4_neon(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_analyze_8_neon(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_calc_scalefactors_neon(int32_t sb_sample_f[16][2][8],
+                                   uint32_t scale_factor[2][8],
+                                   int blocks, int channels, int subbands);
+int ff_sbc_calc_scalefactors_j_neon(int32_t sb_sample_f[16][2][8],
+                                    uint32_t scale_factor[2][8],
+                                    int blocks, int subbands);
+int ff_sbc_enc_process_input_4s_neon(int position, const uint8_t *pcm,
+                                     int16_t X[2][SBC_X_BUFFER_SIZE],
+                                     int nsamples, int nchannels);
+int ff_sbc_enc_process_input_8s_neon(int position, const uint8_t *pcm,
+                                     int16_t X[2][SBC_X_BUFFER_SIZE],
+                                     int nsamples, int nchannels);
+
+DECLARE_ALIGNED(SBC_ALIGN, int32_t, ff_sbcdsp_joint_bits_mask)[8] = {
+    8,   4,  2,  1, 128, 64, 32, 16
+};
+
+#if HAVE_BIGENDIAN
+#define PERM(a, b, c, d) {        \
+        (a * 2) + 1, (a * 2) + 0, \
+        (b * 2) + 1, (b * 2) + 0, \
+        (c * 2) + 1, (c * 2) + 0, \
+        (d * 2) + 1, (d * 2) + 0  \
+    }
+#else
+#define PERM(a, b, c, d) {        \
+        (a * 2) + 0, (a * 2) + 1, \
+        (b * 2) + 0, (b * 2) + 1, \
+        (c * 2) + 0, (c * 2) + 1, \
+        (d * 2) + 0, (d * 2) + 1  \
+    }
+#endif
+
+DECLARE_ALIGNED(SBC_ALIGN, uint8_t, ff_sbc_input_perm_4)[2][8] = {
+    PERM(7, 3, 6, 4),
+    PERM(0, 2, 1, 5)
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, uint8_t, ff_sbc_input_perm_8)[4][8] = {
+    PERM(15, 7, 14,  8),
+    PERM(13, 9, 12, 10),
+    PERM(11, 3,  6,  0),
+    PERM( 5, 1,  4,  2)
+};
+
+av_cold void ff_sbcdsp_init_arm(SBCDSPContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_armv6(cpu_flags)) {
+        s->sbc_analyze_4 = ff_sbc_analyze_4_armv6;
+        s->sbc_analyze_8 = ff_sbc_analyze_8_armv6;
+    }
+
+    if (have_neon(cpu_flags)) {
+        s->sbc_analyze_4 = ff_sbc_analyze_4_neon;
+        s->sbc_analyze_8 = ff_sbc_analyze_8_neon;
+        s->sbc_calc_scalefactors = ff_sbc_calc_scalefactors_neon;
+        s->sbc_calc_scalefactors_j = ff_sbc_calc_scalefactors_j_neon;
+        if (s->increment != 1) {
+            s->sbc_enc_process_input_4s = ff_sbc_enc_process_input_4s_neon;
+            s->sbc_enc_process_input_8s = ff_sbc_enc_process_input_8s_neon;
+        }
+    }
+}

diff --git a/libavcodec/arm/sbcdsp_neon.S b/libavcodec/arm/sbcdsp_neon.S
new file mode 100644
index 0000000..d83d21d
--- /dev/null
+++ b/libavcodec/arm/sbcdsp_neon.S

@@ -0,0 +1,714 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC ARM NEON optimizations
+ */
+
+#include "libavutil/arm/asm.S"
+#include "neon.S"
+
+#define SBC_PROTO_FIXED_SCALE 16
+
+function ff_sbc_analyze_4_neon, export=1
+        /* TODO: merge even and odd cases (or even merge all four calls to this
+         * function) in order to have only aligned reads from 'in' array
+         * and reduce number of load instructions */
+        vld1.16         {d4, d5}, [r0, :64]!
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmull.s16       q0, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmull.s16       q1, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+
+        vmlal.s16       q0, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmlal.s16       q1, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q0, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmlal.s16       q1, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+
+        vmlal.s16       q0, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmlal.s16       q1, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q0, d4, d8
+        vmlal.s16       q1, d5, d9
+
+        vpadd.s32       d0, d0, d1
+        vpadd.s32       d1, d2, d3
+
+        vrshrn.s32      d0, q0, SBC_PROTO_FIXED_SCALE
+
+        vld1.16         {d2, d3, d4, d5}, [r2, :128]!
+
+        vdup.i32        d1, d0[1]  /* TODO: can be eliminated */
+        vdup.i32        d0, d0[0]  /* TODO: can be eliminated */
+
+        vmull.s16       q3, d2, d0
+        vmull.s16       q4, d3, d0
+        vmlal.s16       q3, d4, d1
+        vmlal.s16       q4, d5, d1
+
+        vpadd.s32       d0, d6, d7 /* TODO: can be eliminated */
+        vpadd.s32       d1, d8, d9 /* TODO: can be eliminated */
+
+        vst1.32         {d0, d1}, [r1, :128]
+
+        bx              lr
+endfunc
+
+function ff_sbc_analyze_8_neon, export=1
+        /* TODO: merge even and odd cases (or even merge all four calls to this
+         * function) in order to have only aligned reads from 'in' array
+         * and reduce number of load instructions */
+        vld1.16         {d4, d5}, [r0, :64]!
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmull.s16       q6, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmull.s16       q7, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+        vmull.s16       q8, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmull.s16       q9, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q6, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmlal.s16       q7, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+        vmlal.s16       q8, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmlal.s16       q9, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q6, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmlal.s16       q7, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+        vmlal.s16       q8, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmlal.s16       q9, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q6, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmlal.s16       q7, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+        vmlal.s16       q8, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmlal.s16       q9, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q6, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmlal.s16       q7, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+
+        vmlal.s16       q8, d6, d10
+        vmlal.s16       q9, d7, d11
+
+        vpadd.s32       d0, d12, d13
+        vpadd.s32       d1, d14, d15
+        vpadd.s32       d2, d16, d17
+        vpadd.s32       d3, d18, d19
+
+        vrshr.s32       q0, q0, SBC_PROTO_FIXED_SCALE
+        vrshr.s32       q1, q1, SBC_PROTO_FIXED_SCALE
+        vmovn.s32       d0, q0
+        vmovn.s32       d1, q1
+
+        vdup.i32        d3, d1[1]  /* TODO: can be eliminated */
+        vdup.i32        d2, d1[0]  /* TODO: can be eliminated */
+        vdup.i32        d1, d0[1]  /* TODO: can be eliminated */
+        vdup.i32        d0, d0[0]  /* TODO: can be eliminated */
+
+        vld1.16         {d4, d5}, [r2, :128]!
+        vmull.s16       q6, d4, d0
+        vld1.16         {d6, d7}, [r2, :128]!
+        vmull.s16       q7, d5, d0
+        vmull.s16       q8, d6, d0
+        vmull.s16       q9, d7, d0
+
+        vld1.16         {d4, d5}, [r2, :128]!
+        vmlal.s16       q6, d4, d1
+        vld1.16         {d6, d7}, [r2, :128]!
+        vmlal.s16       q7, d5, d1
+        vmlal.s16       q8, d6, d1
+        vmlal.s16       q9, d7, d1
+
+        vld1.16         {d4, d5}, [r2, :128]!
+        vmlal.s16       q6, d4, d2
+        vld1.16         {d6, d7}, [r2, :128]!
+        vmlal.s16       q7, d5, d2
+        vmlal.s16       q8, d6, d2
+        vmlal.s16       q9, d7, d2
+
+        vld1.16         {d4, d5}, [r2, :128]!
+        vmlal.s16       q6, d4, d3
+        vld1.16         {d6, d7}, [r2, :128]!
+        vmlal.s16       q7, d5, d3
+        vmlal.s16       q8, d6, d3
+        vmlal.s16       q9, d7, d3
+
+        vpadd.s32       d0, d12, d13 /* TODO: can be eliminated */
+        vpadd.s32       d1, d14, d15 /* TODO: can be eliminated */
+        vpadd.s32       d2, d16, d17 /* TODO: can be eliminated */
+        vpadd.s32       d3, d18, d19 /* TODO: can be eliminated */
+
+        vst1.32         {d0, d1, d2, d3}, [r1, :128]
+
+        bx              lr
+endfunc
+
+function ff_sbc_calc_scalefactors_neon, export=1
+        @ parameters
+        @ r0 = sb_sample_f
+        @ r1 = scale_factor
+        @ r2 = blocks
+        @ r3 = channels
+        @ r4 = subbands
+        @ local variables
+        @ r5 = in_loop_1
+        @ r6 = in
+        @ r7 = out_loop_1
+        @ r8 = out
+        @ r9 = ch
+        @ r10 = sb
+        @ r11 = inc
+        @ r12 = blk
+
+        push            {r1-r2, r4-r12}
+        ldr             r4,  [sp, #44]
+        mov             r11, #64
+
+        mov             r9,  #0
+1:
+        add             r5,  r0,  r9, lsl#5
+        add             r7,  r1,  r9, lsl#5
+
+        mov             r10,  #0
+2:
+        add             r6,  r5,  r10, lsl#2
+        add             r8,  r7,  r10, lsl#2
+        mov             r12, r2
+
+        vmov.s32        q0,  #0
+        vmov.s32        q1,  #0x8000            @ 1 << SCALE_OUT_BITS
+        vmov.s32        q14, #1
+        vmov.s32        q15, #16                @ 31 - SCALE_OUT_BITS
+        vadd.s32        q1,  q1,  q14
+3:
+        vld1.32         {d16, d17}, [r6, :128], r11
+        vabs.s32        q8,  q8
+        vld1.32         {d18, d19}, [r6, :128], r11
+        vabs.s32        q9,  q9
+        vld1.32         {d20, d21}, [r6, :128], r11
+        vabs.s32        q10, q10
+        vld1.32         {d22, d23}, [r6, :128], r11
+        vabs.s32        q11, q11
+        vmax.s32        q0,  q0,  q8
+        vmax.s32        q1,  q1,  q9
+        vmax.s32        q0,  q0,  q10
+        vmax.s32        q1,  q1,  q11
+        subs            r12, r12, #4
+        bgt             3b
+        vmax.s32        q0,  q0,  q1
+        vsub.s32        q0,  q0,  q14
+        vclz.s32        q0,  q0
+        vsub.s32        q0,  q15, q0
+        vst1.32         {d0, d1}, [r8, :128]
+
+        add             r10, r10, #4
+        cmp             r10, r4
+        blt             2b
+
+        add             r9,  r9,  #1
+        cmp             r9,  r3
+        blt             1b
+
+        pop             {r1-r2, r4-r12}
+        bx              lr
+endfunc
+
+/*
+ * constants: q13 = (31 - SCALE_OUT_BITS)
+ *            q14 = 1
+ * input:     q0  - ((1 << SCALE_OUT_BITS) + 1)
+ *            r5  - samples for channel 0
+ *            r6  - samples for shannel 1
+ * output:    q0, q1 - scale factors without joint stereo
+ *            q2, q3 - scale factors with joint stereo
+ *            q15    - joint stereo selection mask
+ */
+.macro calc_scalefactors
+        vmov.s32        q1,  q0
+        vmov.s32        q2,  q0
+        vmov.s32        q3,  q0
+        mov             r3,  r2
+1:
+        vld1.32         {d18, d19}, [r6, :128], r11
+        vbic.s32        q11, q9,  q14
+        vld1.32         {d16, d17}, [r5, :128], r11
+        vhadd.s32       q10, q8,  q11
+        vhsub.s32       q11, q8,  q11
+        vabs.s32        q8,  q8
+        vabs.s32        q9,  q9
+        vabs.s32        q10, q10
+        vabs.s32        q11, q11
+        vmax.s32        q0,  q0,  q8
+        vmax.s32        q1,  q1,  q9
+        vmax.s32        q2,  q2,  q10
+        vmax.s32        q3,  q3,  q11
+        subs            r3,  r3,  #1
+        bgt             1b
+        vsub.s32        q0,  q0,  q14
+        vsub.s32        q1,  q1,  q14
+        vsub.s32        q2,  q2,  q14
+        vsub.s32        q3,  q3,  q14
+        vclz.s32        q0,  q0
+        vclz.s32        q1,  q1
+        vclz.s32        q2,  q2
+        vclz.s32        q3,  q3
+        vsub.s32        q0,  q13, q0
+        vsub.s32        q1,  q13, q1
+        vsub.s32        q2,  q13, q2
+        vsub.s32        q3,  q13, q3
+.endm
+
+/*
+ * constants: q14 = 1
+ * input: q15 - joint stereo selection mask
+ *        r5  - value set by calc_scalefactors macro
+ *        r6  - value set by calc_scalefactors macro
+ */
+.macro update_joint_stereo_samples
+        sub             r8,  r6,  r11
+        sub             r7,  r5,  r11
+        sub             r6,  r6,  r11, asl #1
+        sub             r5,  r5,  r11, asl #1
+        vld1.32         {d18, d19}, [r6, :128]
+        vbic.s32        q11, q9,  q14
+        vld1.32         {d16, d17}, [r5, :128]
+        vld1.32         {d2, d3}, [r8, :128]
+        vbic.s32        q3,  q1,  q14
+        vld1.32         {d0, d1}, [r7, :128]
+        vhsub.s32       q10, q8,  q11
+        vhadd.s32       q11, q8,  q11
+        vhsub.s32       q2,  q0,  q3
+        vhadd.s32       q3,  q0,  q3
+        vbif.s32        q10, q9,  q15
+        vbif.s32        d22, d16, d30
+        sub             r11, r10, r11, asl #1
+        sub             r3,  r2,  #2
+2:
+        vbif.s32        d23, d17, d31
+        vst1.32         {d20, d21}, [r6, :128], r11
+        vbif.s32        d4,  d2,  d30
+        vld1.32         {d18, d19}, [r6, :128]
+        vbif.s32        d5,  d3,  d31
+        vst1.32         {d22, d23}, [r5, :128], r11
+        vbif.s32        d6,  d0,  d30
+        vld1.32         {d16, d17}, [r5, :128]
+        vbif.s32        d7,  d1,  d31
+        vst1.32         {d4, d5}, [r8, :128], r11
+        vbic.s32        q11, q9,  q14
+        vld1.32         {d2, d3}, [r8, :128]
+        vst1.32         {d6, d7}, [r7, :128], r11
+        vbic.s32        q3,  q1,  q14
+        vld1.32         {d0, d1}, [r7, :128]
+        vhsub.s32       q10, q8,  q11
+        vhadd.s32       q11, q8,  q11
+        vhsub.s32       q2,  q0,  q3
+        vhadd.s32       q3,  q0,  q3
+        vbif.s32        q10, q9,  q15
+        vbif.s32        d22, d16, d30
+        subs            r3,  r3,  #2
+        bgt             2b
+        sub             r11, r10, r11, asr #1
+        vbif.s32        d23, d17, d31
+        vst1.32         {d20, d21}, [r6, :128]
+        vbif.s32        q2,  q1,  q15
+        vst1.32         {d22, d23}, [r5, :128]
+        vbif.s32        q3,  q0,  q15
+        vst1.32         {d4, d5}, [r8, :128]
+        vst1.32         {d6, d7}, [r7, :128]
+.endm
+
+function ff_sbc_calc_scalefactors_j_neon, export=1
+        @ parameters
+        @ r0 = in = sb_sample_f
+        @ r1 = out = scale_factor
+        @ r2 = blocks
+        @ r3 = subbands
+        @ local variables
+        @ r4 = consts = ff_sbcdsp_joint_bits_mask
+        @ r5 = in0
+        @ r6 = in1
+        @ r7 = out0
+        @ r8 = out1
+        @ r10 = zero
+        @ r11 = inc
+        @ return r0 = joint
+
+        push            {r3-r11}
+        movrelx         r4,  X(ff_sbcdsp_joint_bits_mask)
+        mov             r10, #0
+        mov             r11, #64
+
+        vmov.s32        q14, #1
+        vmov.s32        q13, #16    @ 31 - SCALE_OUT_BITS
+
+        cmp             r3, #4
+        bne             8f
+
+4:      @ 4 subbands
+        add             r5,  r0,  #0
+        add             r6,  r0,  #32
+        add             r7,  r1,  #0
+        add             r8,  r1,  #32
+        vmov.s32        q0,  #0x8000    @ 1 << SCALE_OUT_BITS
+        vadd.s32        q0,  q0,  q14
+
+        calc_scalefactors
+
+        @ check whether to use joint stereo for subbands 0, 1, 2
+        vadd.s32        q15, q0,  q1
+        vadd.s32        q9,  q2,  q3
+        vmov.s32        d31[1], r10    @ last subband -> no joint
+        vld1.32         {d16, d17}, [r4, :128]!
+        vcgt.s32        q15, q15, q9
+
+        @ calculate and save to memory 'joint' variable
+        @ update and save scale factors to memory
+        vand.s32        q8,  q8,  q15
+        vbit.s32        q0,  q2,  q15
+        vpadd.s32       d16, d16, d17
+        vbit.s32        q1,  q3,  q15
+        vpadd.s32       d16, d16, d16
+        vst1.32         {d0, d1}, [r7, :128]
+        vst1.32         {d2, d3}, [r8, :128]
+        vmov.32         r0, d16[0]
+
+        update_joint_stereo_samples
+        b               9f
+
+8:      @ 8 subbands
+        add             r5,  r0,  #16
+        add             r6,  r0,  #48
+        add             r7,  r1,  #16
+        add             r8,  r1,  #48
+        vmov.s32        q0,  #0x8000    @ 1 << SCALE_OUT_BITS
+        vadd.s32        q0,  q0,  q14
+
+        calc_scalefactors
+
+        @ check whether to use joint stereo for subbands 4, 5, 6
+        vadd.s32        q15, q0,  q1
+        vadd.s32        q9,  q2,  q3
+        vmov.s32        d31[1], r10    @ last subband -> no joint
+        vld1.32         {d16, d17}, [r4, :128]!
+        vcgt.s32        q15, q15, q9
+
+        @ calculate part of 'joint' variable and save it to d24
+        @ update and save scale factors to memory
+        vand.s32        q8,  q8,  q15
+        vbit.s32        q0,  q2,  q15
+        vpadd.s32       d16, d16, d17
+        vbit.s32        q1,  q3,  q15
+        vst1.32         {d0, d1}, [r7, :128]
+        vst1.32         {d2, d3}, [r8, :128]
+        vpadd.s32       d24, d16, d16
+
+        update_joint_stereo_samples
+
+        add             r5,  r0,  #0
+        add             r6,  r0,  #32
+        add             r7,  r1,  #0
+        add             r8,  r1,  #32
+        vmov.s32        q0,  #0x8000    @ 1 << SCALE_OUT_BITS
+        vadd.s32        q0,  q0,  q14
+
+        calc_scalefactors
+
+        @ check whether to use joint stereo for subbands 0, 1, 2, 3
+        vadd.s32        q15, q0,  q1
+        vadd.s32        q9,  q2,  q3
+        vld1.32         {d16, d17}, [r4, :128]!
+        vcgt.s32        q15, q15, q9
+
+        @ combine last part of 'joint' with d24 and save to memory
+        @ update and save scale factors to memory
+        vand.s32        q8,  q8,  q15
+        vbit.s32        q0,  q2,  q15
+        vpadd.s32       d16, d16, d17
+        vbit.s32        q1,  q3,  q15
+        vpadd.s32       d16, d16, d16
+        vst1.32         {d0, d1}, [r7, :128]
+        vadd.s32        d16, d16, d24
+        vst1.32         {d2, d3}, [r8, :128]
+        vmov.32         r0,  d16[0]
+
+        update_joint_stereo_samples
+9:
+        pop             {r3-r11}
+        bx              lr
+endfunc
+
+function ff_sbc_enc_process_input_4s_neon, export=1
+        @ parameters
+        @ r0 = positioin
+        @ r1 = pcm
+        @ r2 = X
+        @ r3 = nsamples
+        @ r4 = nchannels
+        @ local variables
+        @ r5 = ff_sbc_input_perm_4
+        @ r6 = src / x
+        @ r7 = dst / y
+
+        push            {r1, r3-r7}
+        ldr             r4,  [sp, #24]
+        movrelx         r5,  X(ff_sbc_input_perm_4)
+
+        @ handle X buffer wraparound
+        cmp             r0,  r3
+        bge             1f                     @ if (position < nsamples)
+        add             r7,  r2,  #576         @ &X[0][SBC_X_BUFFER_SIZE - 40]
+        add             r6,  r2,  r0, lsl#1    @ &X[0][position]
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0}, [r6, :64]!
+        vst1.16         {d0}, [r7, :64]!
+        cmp             r4,  #1
+        ble             2f                     @ if (nchannels > 1)
+        add             r7,  r2,  #1232        @ &X[1][SBC_X_BUFFER_SIZE - 40]
+        add             r6,  r2,  #656
+        add             r6,  r6,  r0, lsl#1    @ &X[1][position]
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0}, [r6, :64]!
+        vst1.16         {d0}, [r7, :64]!
+2:
+        mov             r0,  #288              @ SBC_X_BUFFER_SIZE - 40
+1:
+
+        add             r6,  r2,  r0, lsl#1    @ &X[0][position]
+        add             r7,  r6,  #656         @ &X[1][position]
+
+        cmp             r4,  #1
+        ble             8f                     @ if (nchannels > 1)
+        tst             r1,  #1
+        beq             7f                     @ if (pcm & 1)
+        @ poor 'pcm' alignment
+        vld1.8          {d0, d1}, [r5, :128]
+1:
+        sub             r6,  r6,  #16
+        sub             r7,  r7,  #16
+        sub             r0,  r0,  #8
+        vld1.8          {d4, d5}, [r1]!
+        vuzp.16         d4,  d5
+        vld1.8          {d20, d21}, [r1]!
+        vuzp.16         d20, d21
+        vswp            d5,  d20
+        vtbl.8          d16, {d4, d5}, d0
+        vtbl.8          d17, {d4, d5}, d1
+        vtbl.8          d18, {d20, d21}, d0
+        vtbl.8          d19, {d20, d21}, d1
+        vst1.16         {d16, d17}, [r6, :128]
+        vst1.16         {d18, d19}, [r7, :128]
+        subs            r3,  r3,  #8
+        bgt             1b
+        b               9f
+7:
+        @ proper 'pcm' alignment
+        vld1.8          {d0, d1}, [r5, :128]
+1:
+        sub             r6,  r6,  #16
+        sub             r7,  r7,  #16
+        sub             r0,  r0,  #8
+        vld2.16         {d4, d5}, [r1]!
+        vld2.16         {d20, d21}, [r1]!
+        vswp            d5,  d20
+        vtbl.8          d16, {d4, d5}, d0
+        vtbl.8          d17, {d4, d5}, d1
+        vtbl.8          d18, {d20, d21}, d0
+        vtbl.8          d19, {d20, d21}, d1
+        vst1.16         {d16, d17}, [r6, :128]
+        vst1.16         {d18, d19}, [r7, :128]
+        subs            r3,  r3,  #8
+        bgt             1b
+        b               9f
+8:
+        @ mono
+        vld1.8          {d0, d1}, [r5, :128]
+1:
+        sub             r6,  r6,  #16
+        sub             r0,  r0,  #8
+        vld1.8          {d4, d5}, [r1]!
+        vtbl.8          d16, {d4, d5}, d0
+        vtbl.8          d17, {d4, d5}, d1
+        vst1.16         {d16, d17}, [r6, :128]
+        subs            r3,  r3,  #8
+        bgt             1b
+9:
+        pop             {r1, r3-r7}
+        bx              lr
+endfunc
+
+function ff_sbc_enc_process_input_8s_neon, export=1
+        @ parameters
+        @ r0 = positioin
+        @ r1 = pcm
+        @ r2 = X
+        @ r3 = nsamples
+        @ r4 = nchannels
+        @ local variables
+        @ r5 = ff_sbc_input_perm_8
+        @ r6 = src
+        @ r7 = dst
+
+        push            {r1, r3-r7}
+        ldr             r4,  [sp, #24]
+        movrelx         r5,  X(ff_sbc_input_perm_8)
+
+        @ handle X buffer wraparound
+        cmp             r0,  r3
+        bge             1f                     @ if (position < nsamples)
+        add             r7,  r2,  #512         @ &X[0][SBC_X_BUFFER_SIZE - 72]
+        add             r6,  r2,  r0, lsl#1    @ &X[0][position]
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1}, [r6, :128]!
+        vst1.16         {d0, d1}, [r7, :128]!
+        cmp             r4,  #1
+        ble             2f                     @ if (nchannels > 1)
+        add             r7,  r2,  #1168        @ &X[1][SBC_X_BUFFER_SIZE - 72]
+        add             r6,  r2,  #656
+        add             r6,  r6,  r0, lsl#1    @ &X[1][position]
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1}, [r6, :128]!
+        vst1.16         {d0, d1}, [r7, :128]!
+2:
+        mov             r0,  #256              @ SBC_X_BUFFER_SIZE - 72
+1:
+
+        add             r6,  r2,  r0, lsl#1    @ &X[0][position]
+        add             r7,  r6,  #656         @ &X[1][position]
+
+        cmp             r4,  #1
+        ble             8f                     @ if (nchannels > 1)
+        tst             r1,  #1
+        beq             7f                     @ if (pcm & 1)
+        @ poor 'pcm' alignment
+        vld1.8          {d0, d1, d2, d3}, [r5, :128]
+1:
+        sub             r6,  r6,  #32
+        sub             r7,  r7,  #32
+        sub             r0,  r0,  #16
+        vld1.8          {d4, d5, d6, d7}, [r1]!
+        vuzp.16         q2,  q3
+        vld1.8          {d20, d21, d22, d23}, [r1]!
+        vuzp.16         q10, q11
+        vswp            q3,  q10
+        vtbl.8          d16, {d4, d5, d6, d7}, d0
+        vtbl.8          d17, {d4, d5, d6, d7}, d1
+        vtbl.8          d18, {d4, d5, d6, d7}, d2
+        vtbl.8          d19, {d4, d5, d6, d7}, d3
+        vst1.16         {d16, d17, d18, d19}, [r6, :128]
+        vtbl.8          d16, {d20, d21, d22, d23}, d0
+        vtbl.8          d17, {d20, d21, d22, d23}, d1
+        vtbl.8          d18, {d20, d21, d22, d23}, d2
+        vtbl.8          d19, {d20, d21, d22, d23}, d3
+        vst1.16         {d16, d17, d18, d19}, [r7, :128]
+        subs            r3,  r3,  #16
+        bgt             1b
+        b 9f
+7:
+        @ proper 'pcm' alignment
+        vld1.8          {d0, d1, d2, d3}, [r5, :128]
+1:
+        sub             r6,  r6,  #32
+        sub             r7,  r7,  #32
+        sub             r0,  r0,  #16
+        vld2.16         {d4, d5, d6, d7}, [r1]!
+        vld2.16         {d20, d21, d22, d23}, [r1]!
+        vswp            q3,  q10
+        vtbl.8          d16, {d4, d5, d6, d7}, d0
+        vtbl.8          d17, {d4, d5, d6, d7}, d1
+        vtbl.8          d18, {d4, d5, d6, d7}, d2
+        vtbl.8          d19, {d4, d5, d6, d7}, d3
+        vst1.16         {d16, d17, d18, d19}, [r6, :128]
+        vtbl.8          d16, {d20, d21, d22, d23}, d0
+        vtbl.8          d17, {d20, d21, d22, d23}, d1
+        vtbl.8          d18, {d20, d21, d22, d23}, d2
+        vtbl.8          d19, {d20, d21, d22, d23}, d3
+        vst1.16         {d16, d17, d18, d19}, [r7, :128]
+        subs            r3,  r3,  #16
+        bgt             1b
+        b               9f
+8:
+        @ mono
+        vld1.8          {d0, d1, d2, d3}, [r5, :128]
+1:
+        sub             r6,  r6,  #32
+        sub             r0,  r0,  #16
+        vld1.8          {d4, d5, d6, d7}, [r1]!
+        vtbl.8          d16, {d4, d5, d6, d7}, d0
+        vtbl.8          d17, {d4, d5, d6, d7}, d1
+        vtbl.8          d18, {d4, d5, d6, d7}, d2
+        vtbl.8          d19, {d4, d5, d6, d7}, d3
+        vst1.16         {d16, d17, d18, d19}, [r6, :128]
+        subs            r3,  r3,  #16
+        bgt             1b
+9:
+        pop             {r1, r3-r7}
+        bx              lr
+endfunc

diff --git a/libavcodec/arm/vc1dsp_init_neon.c b/libavcodec/arm/vc1dsp_init_neon.c
index 005d45c..2cca784 100644
--- a/libavcodec/arm/vc1dsp_init_neon.c
+++ b/libavcodec/arm/vc1dsp_init_neon.c

@@ -22,8 +22,6 @@
 #include "libavcodec/vc1dsp.h"
 #include "vc1dsp.h"
 
-#include "config.h"
-
 void ff_vc1_inv_trans_8x8_neon(int16_t *block);
 void ff_vc1_inv_trans_4x8_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
 void ff_vc1_inv_trans_8x4_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
@@ -95,7 +93,6 @@
     dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_neon;
 
     dsp->put_vc1_mspel_pixels_tab[1][ 0] = ff_put_pixels8x8_neon;
-    if (HAVE_AS_DN_DIRECTIVE) {
     FN_ASSIGN(1, 0);
     FN_ASSIGN(2, 0);
     FN_ASSIGN(3, 0);
@@ -114,7 +111,6 @@
     FN_ASSIGN(1, 3);
     FN_ASSIGN(2, 3);
     FN_ASSIGN(3, 3);
-    }
 
     dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_neon;
     dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_neon;

diff --git a/libavcodec/arm/vc1dsp_neon.S b/libavcodec/arm/vc1dsp_neon.S
index 611cbf2..93f043b 100644
--- a/libavcodec/arm/vc1dsp_neon.S
+++ b/libavcodec/arm/vc1dsp_neon.S

@@ -410,13 +410,13 @@
         @   src[48] q14
         @   src[56] q15
 
-        vc1_inv_trans_8x8_helper add=4 add1beforeshift=0 rshift=3
+        vc1_inv_trans_8x8_helper add=4, add1beforeshift=0, rshift=3
 
         @ Transpose result matrix of 8x8
         swap4           d17, d19, d21, d23, d24, d26, d28, d30
         transpose16_4x4 q8,  q9,  q10, q11, q12, q13, q14, q15
 
-        vc1_inv_trans_8x8_helper add=64 add1beforeshift=1 rshift=7
+        vc1_inv_trans_8x8_helper add=64, add1beforeshift=1, rshift=7
 
         vst1.64         {q8-q9},   [r0,:128]!
         vst1.64         {q10-q11}, [r0,:128]!
@@ -431,7 +431,7 @@
         vld1.64         {q0-q1}, [r2,:128]!     @ load 8 * 4 * 2 = 64 bytes / 16 bytes per quad = 4 quad registers
         vld1.64         {q2-q3}, [r2,:128]
 
-        transpose16     q0 q1 q2 q3             @ transpose rows to columns
+        transpose16     q0, q1, q2, q3          @ transpose rows to columns
 
         @ At this point:
         @   src[0]   d0
@@ -443,7 +443,7 @@
         @   src[6]   d5
         @   src[7]   d7
 
-        vc1_inv_trans_8x4_helper    add=4 add1beforeshift=0 rshift=3
+        vc1_inv_trans_8x4_helper    add=4, add1beforeshift=0, rshift=3
 
         @ Move output to more standardized registers
         vmov        d0, d16
@@ -465,7 +465,7 @@
         @   dst[6]   d5
         @   dst[7]   d7
 
-        transpose16     q0 q1 q2 q3   @ turn columns into rows
+        transpose16     q0, q1, q2, q3   @ turn columns into rows
 
         @ At this point:
         @   row[0] q0
@@ -473,7 +473,7 @@
         @   row[2] q2
         @   row[3] q3
 
-        vc1_inv_trans_4x8_helper    add=64 rshift=7
+        vc1_inv_trans_4x8_helper    add=64, rshift=7
 
         @ At this point:
         @   line[0].l   d0
@@ -523,7 +523,7 @@
         vld4.16         {d1[2], d3[2], d5[2], d7[2]}, [r2,:64], r12
         vld4.16         {d1[3], d3[3], d5[3], d7[3]}, [r2,:64]
 
-        vc1_inv_trans_4x8_helper    add=4 rshift=3
+        vc1_inv_trans_4x8_helper    add=4, rshift=3
 
         @ At this point:
         @   dst[0] = q0
@@ -531,9 +531,9 @@
         @   dst[2] = q2
         @   dst[3] = q3
 
-        transpose16     q0 q1 q2 q3     @ Transpose rows (registers) into columns
+        transpose16     q0, q1, q2, q3  @ Transpose rows (registers) into columns
 
-        vc1_inv_trans_8x4_helper    add=64 add1beforeshift=1 rshift=7
+        vc1_inv_trans_8x4_helper    add=64, add1beforeshift=1, rshift=7
 
         vld1.32         {d28[]},  [r0,:32], r1  @ read dest
         vld1.32         {d28[1]}, [r0,:32], r1
@@ -611,7 +611,7 @@
         @   src[2] = d1
         @   src[3] = d3
 
-        vc1_inv_trans_4x4_helper add=4 rshift=3      @ compute t1, t2, t3, t4 and combine them into dst[0-3]
+        vc1_inv_trans_4x4_helper add=4, rshift=3     @ compute t1, t2, t3, t4 and combine them into dst[0-3]
 
         @ At this point:
         @   dst[0] = d0
@@ -619,7 +619,7 @@
         @   dst[2] = d1
         @   dst[3] = d2
 
-        transpose16     d0 d3 d1 d2     @ Transpose rows (registers) into columns
+        transpose16     d0, d3, d1, d2  @ Transpose rows (registers) into columns
 
         @ At this point:
         @   src[0]  = d0
@@ -635,7 +635,7 @@
         @   src[16] = d1
         @   src[24] = d3
 
-        vc1_inv_trans_4x4_helper add=64 rshift=7              @ compute t1, t2, t3, t4 and combine them into dst[0-3]
+        vc1_inv_trans_4x4_helper add=64, rshift=7             @ compute t1, t2, t3, t4 and combine them into dst[0-3]
 
         @ At this point:
         @   line[0] = d0
@@ -663,48 +663,43 @@
         bx              lr
 endfunc
 
-#if HAVE_AS_DN_DIRECTIVE
 @ The absolute value of multiplication constants from vc1_mspel_filter and vc1_mspel_{ver,hor}_filter_16bits.
 @ The sign is embedded in the code below that carries out the multiplication (mspel_filter{,.16}).
-#define MSPEL_MODE_1_MUL_CONSTANTS  4 53 18 3
-#define MSPEL_MODE_2_MUL_CONSTANTS  1 9  9  1
-#define MSPEL_MODE_3_MUL_CONSTANTS  3 18 53 4
+#define MSPEL_MODE_1_MUL_CONSTANTS  4, 53, 18, 3
+#define MSPEL_MODE_2_MUL_CONSTANTS  1, 9,  9,  1
+#define MSPEL_MODE_3_MUL_CONSTANTS  3, 18, 53, 4
 
 @ These constants are from reading the source code of vc1_mspel_mc and determining the value that
 @ is added to `rnd` to result in the variable `r`, and the value of the variable `shift`.
-#define MSPEL_MODES_11_ADDSHIFT_CONSTANTS   15 5
-#define MSPEL_MODES_12_ADDSHIFT_CONSTANTS   3  3
-#define MSPEL_MODES_13_ADDSHIFT_CONSTANTS   15 5
+#define MSPEL_MODES_11_ADDSHIFT_CONSTANTS   15, 5
+#define MSPEL_MODES_12_ADDSHIFT_CONSTANTS   3,  3
+#define MSPEL_MODES_13_ADDSHIFT_CONSTANTS   15, 5
 #define MSPEL_MODES_21_ADDSHIFT_CONSTANTS   MSPEL_MODES_12_ADDSHIFT_CONSTANTS
-#define MSPEL_MODES_22_ADDSHIFT_CONSTANTS   0  1
-#define MSPEL_MODES_23_ADDSHIFT_CONSTANTS   3  3
+#define MSPEL_MODES_22_ADDSHIFT_CONSTANTS   0,  1
+#define MSPEL_MODES_23_ADDSHIFT_CONSTANTS   3,  3
 #define MSPEL_MODES_31_ADDSHIFT_CONSTANTS   MSPEL_MODES_13_ADDSHIFT_CONSTANTS
 #define MSPEL_MODES_32_ADDSHIFT_CONSTANTS   MSPEL_MODES_23_ADDSHIFT_CONSTANTS
-#define MSPEL_MODES_33_ADDSHIFT_CONSTANTS   15 5
+#define MSPEL_MODES_33_ADDSHIFT_CONSTANTS   15, 5
 
 @ The addition and shift constants from vc1_mspel_filter.
-#define MSPEL_MODE_1_ADDSHIFT_CONSTANTS     32 6
-#define MSPEL_MODE_2_ADDSHIFT_CONSTANTS     8  4
-#define MSPEL_MODE_3_ADDSHIFT_CONSTANTS     32 6
+#define MSPEL_MODE_1_ADDSHIFT_CONSTANTS     32, 6
+#define MSPEL_MODE_2_ADDSHIFT_CONSTANTS     8,  4
+#define MSPEL_MODE_3_ADDSHIFT_CONSTANTS     32, 6
 
 @ Setup constants in registers for a subsequent use of mspel_filter{,.16}.
 .macro mspel_constants typesize reg_a reg_b reg_c reg_d filter_a filter_b filter_c filter_d reg_add filter_add_register
-  @ Define double-word register aliases. Typesize should be i8 or i16.
-  ra .dn \reg_a\().\typesize
-  rb .dn \reg_b\().\typesize
-  rc .dn \reg_c\().\typesize
-  rd .dn \reg_d\().\typesize
+  @ Typesize should be i8 or i16.
 
   @ Only set the register if the value is not 1 and unique
   .if \filter_a != 1
-        vmov            ra,  #\filter_a              @ ra = filter_a
+        vmov.\typesize  \reg_a,  #\filter_a          @ reg_a = filter_a
   .endif
-        vmov            rb,  #\filter_b              @ rb = filter_b
+        vmov.\typesize  \reg_b,  #\filter_b          @ reg_b = filter_b
   .if \filter_b != \filter_c
-        vmov            rc,  #\filter_c              @ rc = filter_c
+        vmov.\typesize  \reg_c,  #\filter_c          @ reg_c = filter_c
   .endif
   .if \filter_d != 1
-        vmov            rd,  #\filter_d              @ rd = filter_d
+        vmov.\typesize  \reg_d,  #\filter_d          @ reg_d = filter_d
   .endif
   @ vdup to double the size of typesize
   .ifc \typesize,i8
@@ -712,11 +707,6 @@
   .else
         vdup.32         \reg_add,  \filter_add_register     @ reg_add = filter_add_register
   .endif
-
-  .unreq ra
-  .unreq rb
-  .unreq rc
-  .unreq rd
 .endm
 
 @ After mspel_constants has been used, do the filtering.
@@ -828,7 +818,7 @@
         sub             r1,  r1,  r2            @ r1 = &src[-stride]      @ slide back
 
         @ Do vertical filtering from src into tmp
-        mspel_constants i8 d28 d29 d30 d31 \filter_v_a \filter_v_b \filter_v_c \filter_v_d q13 r3
+        mspel_constants i8, d28, d29, d30, d31, \filter_v_a, \filter_v_b, \filter_v_c, \filter_v_d, q13, r3
 
         vld1.64         {d0,d1}, [r1], r2
         vld1.64         {d2,d3}, [r1], r2
@@ -838,23 +828,23 @@
         subs            r12,  r12,  #4
 
         vld1.64         {d6,d7}, [r1], r2
-        mspel_filter    q11 q11 d0 d2 d4 d6 \filter_v_a \filter_v_b \filter_v_c \filter_v_d d28 d29 d30 d31 q13 \filter_shift narrow=0
-        mspel_filter    q12 q12 d1 d3 d5 d7 \filter_v_a \filter_v_b \filter_v_c \filter_v_d d28 d29 d30 d31 q13 \filter_shift narrow=0
+        mspel_filter    q11, q11, d0, d2, d4, d6, \filter_v_a, \filter_v_b, \filter_v_c, \filter_v_d, d28, d29, d30, d31, q13, \filter_shift, narrow=0
+        mspel_filter    q12, q12, d1, d3, d5, d7, \filter_v_a, \filter_v_b, \filter_v_c, \filter_v_d, d28, d29, d30, d31, q13, \filter_shift, narrow=0
         vst1.64         {q11,q12}, [r4,:128]!   @ store and increment
 
         vld1.64         {d0,d1}, [r1], r2
-        mspel_filter    q11 q11 d2 d4 d6 d0 \filter_v_a \filter_v_b \filter_v_c \filter_v_d d28 d29 d30 d31 q13 \filter_shift narrow=0
-        mspel_filter    q12 q12 d3 d5 d7 d1 \filter_v_a \filter_v_b \filter_v_c \filter_v_d d28 d29 d30 d31 q13 \filter_shift narrow=0
+        mspel_filter    q11, q11, d2, d4, d6, d0, \filter_v_a, \filter_v_b, \filter_v_c, \filter_v_d, d28, d29, d30, d31, q13, \filter_shift, narrow=0
+        mspel_filter    q12, q12, d3, d5, d7, d1, \filter_v_a, \filter_v_b, \filter_v_c, \filter_v_d, d28, d29, d30, d31, q13, \filter_shift, narrow=0
         vst1.64         {q11,q12}, [r4,:128]!   @ store and increment
 
         vld1.64         {d2,d3}, [r1], r2
-        mspel_filter    q11 q11 d4 d6 d0 d2 \filter_v_a \filter_v_b \filter_v_c \filter_v_d d28 d29 d30 d31 q13 \filter_shift narrow=0
-        mspel_filter    q12 q12 d5 d7 d1 d3 \filter_v_a \filter_v_b \filter_v_c \filter_v_d d28 d29 d30 d31 q13 \filter_shift narrow=0
+        mspel_filter    q11, q11, d4, d6, d0, d2, \filter_v_a, \filter_v_b, \filter_v_c, \filter_v_d, d28, d29, d30, d31, q13, \filter_shift, narrow=0
+        mspel_filter    q12, q12, d5, d7, d1, d3, \filter_v_a, \filter_v_b, \filter_v_c, \filter_v_d, d28, d29, d30, d31, q13, \filter_shift, narrow=0
         vst1.64         {q11,q12}, [r4,:128]!   @ store and increment
 
         vld1.64         {d4,d5}, [r1], r2
-        mspel_filter    q11 q11 d6 d0 d2 d4 \filter_v_a \filter_v_b \filter_v_c \filter_v_d d28 d29 d30 d31 q13 \filter_shift narrow=0
-        mspel_filter    q12 q12 d7 d1 d3 d5 \filter_v_a \filter_v_b \filter_v_c \filter_v_d d28 d29 d30 d31 q13 \filter_shift narrow=0
+        mspel_filter    q11, q11, d6, d0, d2, d4, \filter_v_a, \filter_v_b, \filter_v_c, \filter_v_d, d28, d29, d30, d31, q13, \filter_shift, narrow=0
+        mspel_filter    q12, q12, d7, d1, d3, d5, \filter_v_a, \filter_v_b, \filter_v_c, \filter_v_d, d28, d29, d30, d31, q13, \filter_shift, narrow=0
         vst1.64         {q11,q12}, [r4,:128]!   @ store and increment
 
         bne             1b
@@ -864,7 +854,7 @@
         mov             r4,   sp                @ r4 = tmp
 
         @ Do horizontal filtering from temp to dst
-        mspel_constants i16 d28 d29 d30 d31 \filter_h_a \filter_h_b \filter_h_c \filter_h_d q13 r3
+        mspel_constants i16, d28, d29, d30, d31, \filter_h_a, \filter_h_b, \filter_h_c, \filter_h_d, q13, r3
 
 2:
         subs            r12,  r12,  #1
@@ -874,7 +864,7 @@
         vext.16         q3,   q0,   q1,  #3
         vext.16         q1,   q0,   q1,  #1     @ do last because it writes to q1 which is read by the other vext instructions
 
-        mspel_filter.16 q11 q12 d22 d23 d21 d0 d1 d2 d3 d4 d5 d6 d7 \filter_h_a \filter_h_b \filter_h_c \filter_h_d d28 d29 d30 d31 q13 7
+        mspel_filter.16 q11, q12, d22, d23, d21, d0, d1, d2, d3, d4, d5, d6, d7, \filter_h_a, \filter_h_b, \filter_h_c, \filter_h_d, d28, d29, d30, d31, q13, 7
 
         vst1.64         {d21}, [r0,:64], r2     @ store and increment dst
 
@@ -887,9 +877,9 @@
 
 @ Use C preprocessor and assembler macros to expand to functions for horizontal and vertical filtering.
 #define PUT_VC1_MSPEL_MC_HV(hmode, vmode)   \
-    put_vc1_mspel_mc_hv hmode vmode \
-        MSPEL_MODE_ ## hmode ## _MUL_CONSTANTS \
-        MSPEL_MODE_ ## vmode ## _MUL_CONSTANTS \
+    put_vc1_mspel_mc_hv hmode, vmode, \
+        MSPEL_MODE_ ## hmode ## _MUL_CONSTANTS, \
+        MSPEL_MODE_ ## vmode ## _MUL_CONSTANTS, \
         MSPEL_MODES_ ## hmode ## vmode ## _ADDSHIFT_CONSTANTS
 
 PUT_VC1_MSPEL_MC_HV(1, 1)
@@ -910,7 +900,7 @@
         mov             r12,  #8                        @ loop counter
         sub             r1,   r1,   #1                  @ slide back, using immediate
 
-        mspel_constants i8 d28 d29 d30 d31 \filter_a \filter_b \filter_c \filter_d q13 r3
+        mspel_constants i8, d28, d29, d30, d31, \filter_a, \filter_b, \filter_c, \filter_d, q13, r3
 
 1:
         subs            r12,  r12,  #1
@@ -920,7 +910,7 @@
         vext.8          d3,   d0,   d1,  #3
         vext.8          d1,   d0,   d1,  #1             @ do last because it writes to d1 which is read by the other vext instructions
 
-        mspel_filter    q11 d21 d0 d1 d2 d3 \filter_a \filter_b \filter_c \filter_d d28 d29 d30 d31 q13 \filter_shift
+        mspel_filter    q11, d21, d0, d1, d2, d3, \filter_a, \filter_b, \filter_c, \filter_d, d28, d29, d30, d31, q13, \filter_shift
 
         vst1.64         {d21}, [r0,:64], r2             @ store and increment dst
 
@@ -932,7 +922,7 @@
 
 @ Use C preprocessor and assembler macros to expand to functions for horizontal only filtering.
 #define PUT_VC1_MSPEL_MC_H_ONLY(hmode) \
-        put_vc1_mspel_mc_h_only hmode MSPEL_MODE_ ## hmode ## _MUL_CONSTANTS MSPEL_MODE_ ## hmode ## _ADDSHIFT_CONSTANTS
+        put_vc1_mspel_mc_h_only hmode, MSPEL_MODE_ ## hmode ## _MUL_CONSTANTS, MSPEL_MODE_ ## hmode ## _ADDSHIFT_CONSTANTS
 
 PUT_VC1_MSPEL_MC_H_ONLY(1)
 PUT_VC1_MSPEL_MC_H_ONLY(2)
@@ -947,7 +937,7 @@
         mov             r12,  #8                        @ loop counter
         sub             r1,   r1,   r2                  @ r1 = &src[-stride]      @ slide back
 
-        mspel_constants i8 d28 d29 d30 d31 \filter_a \filter_b \filter_c \filter_d q13 r3
+        mspel_constants i8, d28, d29, d30, d31, \filter_a, \filter_b, \filter_c, \filter_d, q13, r3
 
         vld1.64         {d0},  [r1], r2                 @ d0 = src[-stride]
         vld1.64         {d1},  [r1], r2                 @ d1 = src[0]
@@ -957,19 +947,19 @@
         subs            r12,  r12,  #4
 
         vld1.64         {d3},  [r1], r2                 @ d3 = src[stride * 2]
-        mspel_filter    q11 d21 d0 d1 d2 d3 \filter_a \filter_b \filter_c \filter_d d28 d29 d30 d31 q13 \filter_shift
+        mspel_filter    q11, d21, d0, d1, d2, d3, \filter_a, \filter_b, \filter_c, \filter_d, d28, d29, d30, d31, q13, \filter_shift
         vst1.64         {d21}, [r0,:64], r2             @ store and increment dst
 
         vld1.64         {d0},  [r1], r2                 @ d0 = next line
-        mspel_filter    q11 d21 d1 d2 d3 d0 \filter_a \filter_b \filter_c \filter_d d28 d29 d30 d31 q13 \filter_shift
+        mspel_filter    q11, d21, d1, d2, d3, d0, \filter_a, \filter_b, \filter_c, \filter_d, d28, d29, d30, d31, q13, \filter_shift
         vst1.64         {d21}, [r0,:64], r2             @ store and increment dst
 
         vld1.64         {d1},  [r1], r2                 @ d1 = next line
-        mspel_filter    q11 d21 d2 d3 d0 d1 \filter_a \filter_b \filter_c \filter_d d28 d29 d30 d31 q13 \filter_shift
+        mspel_filter    q11, d21, d2, d3, d0, d1, \filter_a, \filter_b, \filter_c, \filter_d, d28, d29, d30, d31, q13, \filter_shift
         vst1.64         {d21}, [r0,:64], r2             @ store and increment dst
 
         vld1.64         {d2},  [r1], r2                 @ d2 = next line
-        mspel_filter    q11 d21 d3 d0 d1 d2 \filter_a \filter_b \filter_c \filter_d d28 d29 d30 d31 q13 \filter_shift
+        mspel_filter    q11, d21, d3, d0, d1, d2, \filter_a, \filter_b, \filter_c, \filter_d, d28, d29, d30, d31, q13, \filter_shift
         vst1.64         {d21}, [r0,:64], r2             @ store and increment dst
 
         bne             1b
@@ -980,14 +970,13 @@
 
 @ Use C preprocessor and assembler macros to expand to functions for vertical only filtering.
 #define PUT_VC1_MSPEL_MC_V_ONLY(vmode) \
-        put_vc1_mspel_mc_v_only vmode MSPEL_MODE_ ## vmode ## _MUL_CONSTANTS MSPEL_MODE_ ## vmode ## _ADDSHIFT_CONSTANTS
+        put_vc1_mspel_mc_v_only vmode, MSPEL_MODE_ ## vmode ## _MUL_CONSTANTS, MSPEL_MODE_ ## vmode ## _ADDSHIFT_CONSTANTS
 
 PUT_VC1_MSPEL_MC_V_ONLY(1)
 PUT_VC1_MSPEL_MC_V_ONLY(2)
 PUT_VC1_MSPEL_MC_V_ONLY(3)
 
 #undef PUT_VC1_MSPEL_MC_V_ONLY
-#endif
 
 function ff_put_pixels8x8_neon, export=1
         vld1.64         {d0}, [r1], r2

diff --git a/libavcodec/atrac9dec.c b/libavcodec/atrac9dec.c
new file mode 100644
index 0000000..805d46f
--- /dev/null
+++ b/libavcodec/atrac9dec.c

@@ -0,0 +1,954 @@
+/*
+ * ATRAC9 decoder
+ * Copyright (c) 2018 Rostislav Pehlivanov <atomnuker@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "internal.h"
+#include "get_bits.h"
+#include "fft.h"
+#include "atrac9tab.h"
+#include "libavutil/lfg.h"
+#include "libavutil/float_dsp.h"
+
+typedef struct ATRAC9ChannelData {
+    int band_ext;
+    int q_unit_cnt;
+    int band_ext_data[4];
+    int32_t scalefactors[31];
+    int32_t scalefactors_prev[31];
+
+    int precision_coarse[30];
+    int precision_fine[30];
+    int precision_mask[30];
+
+    int codebookset[30];
+
+    int32_t q_coeffs_coarse[256];
+    int32_t q_coeffs_fine[256];
+
+    DECLARE_ALIGNED(32, float, coeffs  )[256];
+    DECLARE_ALIGNED(32, float, prev_win)[128];
+} ATRAC9ChannelData;
+
+typedef struct ATRAC9BlockData {
+    ATRAC9ChannelData channel[2];
+
+    /* Base */
+    int band_count;
+    int q_unit_cnt;
+    int q_unit_cnt_prev;
+
+    /* Stereo block only */
+    int stereo_q_unit;
+
+    /* Band extension only */
+    int has_band_ext;
+    int has_band_ext_data;
+    int band_ext_q_unit;
+
+    /* Gradient */
+    int grad_mode;
+    int grad_boundary;
+    int gradient[31];
+
+    /* Stereo */
+    int cpe_base_channel;
+    int is_signs[30];
+
+} ATRAC9BlockData;
+
+typedef struct ATRAC9Context {
+    AVCodecContext *avctx;
+    AVFloatDSPContext *fdsp;
+    FFTContext imdct;
+    ATRAC9BlockData block[5];
+    AVLFG lfg;
+
+    /* Set on init */
+    int frame_log2;
+    int avg_frame_size;
+    int frame_count;
+    int samplerate_idx;
+    const ATRAC9BlockConfig *block_config;
+
+    /* Generated on init */
+    VLC sf_vlc[2][8];            /* Signed/unsigned, length */
+    VLC coeff_vlc[2][8][4];      /* Cookbook, precision, cookbook index */
+    uint8_t alloc_curve[48][48];
+    DECLARE_ALIGNED(32, float, imdct_win)[256];
+
+    DECLARE_ALIGNED(32, float, temp)[256];
+} ATRAC9Context;
+
+static inline int parse_gradient(ATRAC9Context *s, ATRAC9BlockData *b,
+                                 GetBitContext *gb)
+{
+    int grad_range[2];
+    int grad_value[2];
+    int values, sign, base;
+    uint8_t *curve;
+    float scale;
+
+    b->grad_mode = get_bits(gb, 2);
+    if (b->grad_mode) {
+        grad_range[0] = get_bits(gb, 5);
+        grad_range[1] = 31;
+        grad_value[0] = get_bits(gb, 5);
+        grad_value[1] = 31;
+    } else {
+        grad_range[0] = get_bits(gb, 6);
+        grad_range[1] = get_bits(gb, 6) + 1;
+        grad_value[0] = get_bits(gb, 5);
+        grad_value[1] = get_bits(gb, 5);
+    }
+    b->grad_boundary = get_bits(gb, 4);
+
+    if (grad_range[0] >= grad_range[1] || grad_range[1] > 47)
+        return AVERROR_INVALIDDATA;
+
+    if (grad_value[0] > 31 || grad_value[1] > 31)
+        return AVERROR_INVALIDDATA;
+
+    if (b->grad_boundary > b->q_unit_cnt)
+        return AVERROR_INVALIDDATA;
+
+    values    = grad_value[1] - grad_value[0];
+    sign      = 1 - 2*(values < 0);
+    base      = grad_value[0] + sign;
+    scale     = (FFABS(values) - 1) / 31.0f;
+    curve     = s->alloc_curve[grad_range[1] - grad_range[0] - 1];
+
+    for (int i = 0; i <= b->q_unit_cnt; i++)
+        b->gradient[i] = grad_value[i >= grad_range[0]];
+
+    for (int i = grad_range[0]; i < grad_range[1]; i++)
+        b->gradient[i] = base + sign*((int)(scale*curve[i - grad_range[0]]));
+
+    return 0;
+}
+
+static inline void calc_precision(ATRAC9Context *s, ATRAC9BlockData *b,
+                                  ATRAC9ChannelData *c)
+{
+    memset(c->precision_mask, 0, sizeof(c->precision_mask));
+    for (int i = 1; i < b->q_unit_cnt; i++) {
+        const int delta = FFABS(c->scalefactors[i] - c->scalefactors[i - 1]) - 1;
+        if (delta > 0) {
+            const int neg = c->scalefactors[i - 1] > c->scalefactors[i];
+            c->precision_mask[i - neg] += FFMIN(delta, 5);
+        }
+    }
+
+    if (b->grad_mode) {
+        for (int i = 0; i < b->q_unit_cnt; i++) {
+            c->precision_coarse[i] = c->scalefactors[i];
+            c->precision_coarse[i] += c->precision_mask[i] - b->gradient[i];
+            if (c->precision_coarse[i] < 0)
+                continue;
+            switch (b->grad_mode) {
+            case 1:
+                c->precision_coarse[i] >>= 1;
+                break;
+            case 2:
+                c->precision_coarse[i] = (3 * c->precision_coarse[i]) >> 3;
+                break;
+            case 3:
+                c->precision_coarse[i] >>= 2;
+                break;
+            }
+        }
+    } else {
+        for (int i = 0; i < b->q_unit_cnt; i++)
+            c->precision_coarse[i] = c->scalefactors[i] - b->gradient[i];
+    }
+
+
+    for (int i = 0; i < b->q_unit_cnt; i++)
+        c->precision_coarse[i] = FFMAX(c->precision_coarse[i], 1);
+
+    for (int i = 0; i < b->grad_boundary; i++)
+        c->precision_coarse[i]++;
+
+    for (int i = 0; i < b->q_unit_cnt; i++) {
+        c->precision_fine[i] = 0;
+        if (c->precision_coarse[i] > 15) {
+            c->precision_fine[i] = c->precision_coarse[i] - 15;
+            c->precision_coarse[i] = 15;
+        }
+    }
+}
+
+static inline int parse_band_ext(ATRAC9Context *s, ATRAC9BlockData *b,
+                                 GetBitContext *gb, int stereo)
+{
+    int ext_band = 0;
+
+    if (b->has_band_ext) {
+        ext_band = at9_tab_band_ext_group[b->q_unit_cnt - 13][2];
+        if (stereo) {
+            b->channel[1].band_ext = get_bits(gb, 2);
+            b->channel[1].band_ext = ext_band > 2 ? b->channel[1].band_ext : 4;
+        } else {
+            skip_bits1(gb);
+        }
+    }
+
+    b->has_band_ext_data = get_bits1(gb);
+    if (!b->has_band_ext_data)
+        return 0;
+
+    if (!b->has_band_ext) {
+        skip_bits(gb, 2);
+        skip_bits_long(gb, get_bits(gb, 5));
+        return 0;
+    }
+
+    b->channel[0].band_ext = get_bits(gb, 2);
+    b->channel[0].band_ext = ext_band > 2 ? b->channel[0].band_ext : 4;
+
+    if (!get_bits(gb, 5))
+        return 0;
+
+    for (int i = 0; i <= stereo; i++) {
+        ATRAC9ChannelData *c = &b->channel[i];
+        const int count = at9_tab_band_ext_cnt[c->band_ext][ext_band];
+        for (int j = 0; j < count; j++) {
+            int len = at9_tab_band_ext_lengths[c->band_ext][ext_band][j];
+            c->band_ext_data[j] = get_bits(gb, len);
+        }
+    }
+
+    return 0;
+}
+
+static inline int read_scalefactors(ATRAC9Context *s, ATRAC9BlockData *b,
+                                    ATRAC9ChannelData *c, GetBitContext *gb,
+                                    int channel_idx, int first_in_pkt)
+{
+    static const int mode_map[2][4] = { { 0, 1, 2, 3 }, { 0, 2, 3, 4 } };
+    const int mode = mode_map[channel_idx][get_bits(gb, 2)];
+
+    memset(c->scalefactors, 0, sizeof(c->scalefactors));
+
+    if (first_in_pkt && (mode == 4 || ((mode == 3) && !channel_idx))) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid scalefactor coding mode!\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    switch (mode) {
+    case 0: { /* VLC delta offset */
+        const uint8_t *sf_weights = at9_tab_sf_weights[get_bits(gb, 3)];
+        const int base = get_bits(gb, 5);
+        const int len = get_bits(gb, 2) + 3;
+        const VLC *tab = &s->sf_vlc[0][len];
+
+        c->scalefactors[0] = get_bits(gb, len);
+
+        for (int i = 1; i < b->band_ext_q_unit; i++) {
+            int val = c->scalefactors[i - 1] + get_vlc2(gb, tab->table, 9, 2);
+            c->scalefactors[i] = val & ((1 << len) - 1);
+        }
+
+        for (int i = 0; i < b->band_ext_q_unit; i++)
+            c->scalefactors[i] += base - sf_weights[i];
+
+        break;
+    }
+    case 1: { /* CLC offset */
+        const int len = get_bits(gb, 2) + 2;
+        const int base = len < 5 ? get_bits(gb, 5) : 0;
+        for (int i = 0; i < b->band_ext_q_unit; i++)
+            c->scalefactors[i] = base + get_bits(gb, len);
+        break;
+    }
+    case 2:
+    case 4: { /* VLC dist to baseline */
+        const int *baseline = mode == 4 ? c->scalefactors_prev :
+                              channel_idx ? b->channel[0].scalefactors :
+                              c->scalefactors_prev;
+        const int baseline_len = mode == 4 ? b->q_unit_cnt_prev :
+                                 channel_idx ? b->band_ext_q_unit :
+                                 b->q_unit_cnt_prev;
+
+        const int len = get_bits(gb, 2) + 2;
+        const int unit_cnt = FFMIN(b->band_ext_q_unit, baseline_len);
+        const VLC *tab = &s->sf_vlc[1][len];
+
+        for (int i = 0; i < unit_cnt; i++) {
+            int dist = get_vlc2(gb, tab->table, 9, 2);
+            c->scalefactors[i] = baseline[i] + dist;
+        }
+
+        for (int i = unit_cnt; i < b->band_ext_q_unit; i++)
+            c->scalefactors[i] = get_bits(gb, 5);
+
+        break;
+    }
+    case 3: { /* VLC offset with baseline */
+        const int *baseline = channel_idx ? b->channel[0].scalefactors :
+                              c->scalefactors_prev;
+        const int baseline_len = channel_idx ? b->band_ext_q_unit :
+                                 b->q_unit_cnt_prev;
+
+        const int base = get_bits(gb, 5) - (1 << (5 - 1));
+        const int len = get_bits(gb, 2) + 1;
+        const int unit_cnt = FFMIN(b->band_ext_q_unit, baseline_len);
+        const VLC *tab = &s->sf_vlc[0][len];
+
+        c->scalefactors[0] = get_bits(gb, len);
+
+        for (int i = 1; i < unit_cnt; i++) {
+            int val = c->scalefactors[i - 1] + get_vlc2(gb, tab->table, 9, 2);
+            c->scalefactors[i] = val & ((1 << len) - 1);
+        }
+
+        for (int i = 0; i < unit_cnt; i++)
+            c->scalefactors[i] += base + baseline[i];
+
+        for (int i = unit_cnt; i < b->band_ext_q_unit; i++)
+            c->scalefactors[i] = get_bits(gb, 5);
+        break;
+    }
+    }
+
+    for (int i = 0; i < b->band_ext_q_unit; i++)
+        if (c->scalefactors[i] < 0 || c->scalefactors[i] > 31)
+            return AVERROR_INVALIDDATA;
+
+    memcpy(c->scalefactors_prev, c->scalefactors, sizeof(c->scalefactors));
+
+    return 0;
+}
+
+static inline void calc_codebook_idx(ATRAC9Context *s, ATRAC9BlockData *b,
+                                     ATRAC9ChannelData *c)
+{
+    int avg = 0;
+    const int last_sf = c->scalefactors[c->q_unit_cnt];
+
+    memset(c->codebookset, 0, sizeof(c->codebookset));
+
+    if (c->q_unit_cnt <= 1)
+        return;
+    if (s->samplerate_idx > 7)
+        return;
+
+    c->scalefactors[c->q_unit_cnt] = c->scalefactors[c->q_unit_cnt - 1];
+
+    if (c->q_unit_cnt > 12) {
+        for (int i = 0; i < 12; i++)
+            avg += c->scalefactors[i];
+        avg = (avg + 6) / 12;
+    }
+
+    for (int i = 8; i < c->q_unit_cnt; i++) {
+        const int prev = c->scalefactors[i - 1];
+        const int cur  = c->scalefactors[i    ];
+        const int next = c->scalefactors[i + 1];
+        const int min  = FFMIN(prev, next);
+        if ((cur - min >= 3 || 2*cur - prev - next >= 3))
+            c->codebookset[i] = 1;
+    }
+
+
+    for (int i = 12; i < c->q_unit_cnt; i++) {
+        const int cur = c->scalefactors[i];
+        const int cnd = at9_q_unit_to_coeff_cnt[i] == 16;
+        const int min = FFMIN(c->scalefactors[i + 1], c->scalefactors[i - 1]);
+        if (c->codebookset[i])
+            continue;
+
+        c->codebookset[i] = (((cur - min) >= 2) && (cur >= (avg - cnd)));
+    }
+
+    c->scalefactors[c->q_unit_cnt] = last_sf;
+}
+
+static inline void read_coeffs_coarse(ATRAC9Context *s, ATRAC9BlockData *b,
+                                      ATRAC9ChannelData *c, GetBitContext *gb)
+{
+    const int max_prec = s->samplerate_idx > 7 ? 1 : 7;
+
+    memset(c->q_coeffs_coarse, 0, sizeof(c->q_coeffs_coarse));
+
+    for (int i = 0; i < c->q_unit_cnt; i++) {
+        int *coeffs = &c->q_coeffs_coarse[at9_q_unit_to_coeff_idx[i]];
+        const int bands = at9_q_unit_to_coeff_cnt[i];
+        const int prec = c->precision_coarse[i] + 1;
+
+        if (prec <= max_prec) {
+            const int cb = c->codebookset[i];
+            const int cbi = at9_q_unit_to_codebookidx[i];
+            const VLC *tab = &s->coeff_vlc[cb][prec][cbi];
+            const HuffmanCodebook *huff = &at9_huffman_coeffs[cb][prec][cbi];
+            const int groups = bands >> huff->value_cnt_pow;
+
+            for (int j = 0; j < groups; j++) {
+                uint16_t val = get_vlc2(gb, tab->table, 9, huff->max_bit_size);
+
+                for (int k = 0; k < huff->value_cnt; k++) {
+                    coeffs[k] = sign_extend(val, huff->value_bits);
+                    val >>= huff->value_bits;
+                }
+
+                coeffs += huff->value_cnt;
+            }
+        } else {
+            for (int j = 0; j < bands; j++)
+                coeffs[j] = sign_extend(get_bits(gb, prec), prec);
+        }
+    }
+}
+
+static inline void read_coeffs_fine(ATRAC9Context *s, ATRAC9BlockData *b,
+                                    ATRAC9ChannelData *c, GetBitContext *gb)
+{
+    memset(c->q_coeffs_fine, 0, sizeof(c->q_coeffs_fine));
+
+    for (int i = 0; i < c->q_unit_cnt; i++) {
+        const int start = at9_q_unit_to_coeff_idx[i + 0];
+        const int end   = at9_q_unit_to_coeff_idx[i + 1];
+        const int len   = c->precision_fine[i] + 1;
+
+        if (c->precision_fine[i] <= 0)
+            continue;
+
+        for (int j = start; j < end; j++)
+            c->q_coeffs_fine[j] = sign_extend(get_bits(gb, len), len);
+    }
+}
+
+static inline void dequantize(ATRAC9Context *s, ATRAC9BlockData *b,
+                              ATRAC9ChannelData *c)
+{
+    memset(c->coeffs, 0, sizeof(c->coeffs));
+
+    for (int i = 0; i < c->q_unit_cnt; i++) {
+        const int start = at9_q_unit_to_coeff_idx[i + 0];
+        const int end   = at9_q_unit_to_coeff_idx[i + 1];
+
+        const float coarse_c = at9_quant_step_coarse[c->precision_coarse[i]];
+        const float fine_c   = at9_quant_step_fine[c->precision_fine[i]];
+
+        for (int j = start; j < end; j++) {
+            const float vc = c->q_coeffs_coarse[j] * coarse_c;
+            const float vf = c->q_coeffs_fine[j]   * fine_c;
+            c->coeffs[j] = vc + vf;
+        }
+    }
+}
+
+static inline void apply_intensity_stereo(ATRAC9Context *s, ATRAC9BlockData *b,
+                                          const int stereo)
+{
+    float *src = b->channel[ b->cpe_base_channel].coeffs;
+    float *dst = b->channel[!b->cpe_base_channel].coeffs;
+
+    if (!stereo)
+        return;
+
+    if (b->q_unit_cnt <= b->stereo_q_unit)
+        return;
+
+    for (int i = b->stereo_q_unit; i < b->q_unit_cnt; i++) {
+        const int sign  = b->is_signs[i];
+        const int start = at9_q_unit_to_coeff_idx[i + 0];
+        const int end   = at9_q_unit_to_coeff_idx[i + 1];
+        for (int j = start; j < end; j++)
+            dst[j] = sign*src[j];
+    }
+}
+
+static inline void apply_scalefactors(ATRAC9Context *s, ATRAC9BlockData *b,
+                                      const int stereo)
+{
+    for (int i = 0; i <= stereo; i++) {
+        float *coeffs = b->channel[i].coeffs;
+        for (int j = 0; j < b->q_unit_cnt; j++) {
+            const int start = at9_q_unit_to_coeff_idx[j + 0];
+            const int end   = at9_q_unit_to_coeff_idx[j + 1];
+            const int scalefactor = b->channel[i].scalefactors[j];
+            const float scale = at9_scalefactor_c[scalefactor];
+            for (int k = start; k < end; k++)
+                coeffs[k] *= scale;
+        }
+    }
+}
+
+static inline void fill_with_noise(ATRAC9Context *s, ATRAC9ChannelData *c,
+                                   int start, int count)
+{
+    float maxval = 0.0f;
+    for (int i = 0; i < count; i += 2) {
+        double tmp[2];
+        av_bmg_get(&s->lfg, tmp);
+        c->coeffs[start + i + 0] = tmp[0];
+        c->coeffs[start + i + 1] = tmp[1];
+        maxval = FFMAX(FFMAX(FFABS(tmp[0]), FFABS(tmp[1])), maxval);
+    }
+    /* Normalize */
+    for (int i = 0; i < count; i++)
+        c->coeffs[start + i] /= maxval;
+}
+
+static inline void scale_band_ext_coeffs(ATRAC9ChannelData *c, float sf[6],
+                                         const int s_unit, const int e_unit)
+{
+    for (int i = s_unit; i < e_unit; i++) {
+        const int start = at9_q_unit_to_coeff_idx[i + 0];
+        const int end   = at9_q_unit_to_coeff_idx[i + 1];
+        for (int j = start; j < end; j++)
+            c->coeffs[j] *= sf[i - s_unit];
+    }
+}
+
+static inline void apply_band_extension(ATRAC9Context *s, ATRAC9BlockData *b,
+                                       const int stereo)
+{
+    const int g_units[4] = { /* A, B, C, total units */
+        b->q_unit_cnt,
+        at9_tab_band_ext_group[b->q_unit_cnt - 13][0],
+        at9_tab_band_ext_group[b->q_unit_cnt - 13][1],
+        FFMAX(g_units[2], 22),
+    };
+
+    const int g_bins[4] = { /* A, B, C, total bins */
+        at9_q_unit_to_coeff_idx[g_units[0]],
+        at9_q_unit_to_coeff_idx[g_units[1]],
+        at9_q_unit_to_coeff_idx[g_units[2]],
+        at9_q_unit_to_coeff_idx[g_units[3]],
+    };
+
+    if (!b->has_band_ext || !b->has_band_ext_data)
+        return;
+
+    for (int ch = 0; ch <= stereo; ch++) {
+        ATRAC9ChannelData *c = &b->channel[ch];
+
+        /* Mirror the spectrum */
+        for (int i = 0; i < 3; i++)
+            for (int j = 0; j < (g_bins[i + 1] - g_bins[i + 0]); j++)
+                c->coeffs[g_bins[i] + j] = c->coeffs[g_bins[i] - j - 1];
+
+        switch (c->band_ext) {
+        case 0: {
+            float sf[6] = { 0.0f };
+            const int l = g_units[3] - g_units[0] - 1;
+            const int n_start = at9_q_unit_to_coeff_idx[g_units[3] - 1];
+            const int n_cnt   = at9_q_unit_to_coeff_cnt[g_units[3] - 1];
+            switch (at9_tab_band_ext_group[b->q_unit_cnt - 13][2]) {
+            case 3:
+                sf[0] = at9_band_ext_scales_m0[0][0][c->band_ext_data[0]];
+                sf[1] = at9_band_ext_scales_m0[0][1][c->band_ext_data[0]];
+                sf[2] = at9_band_ext_scales_m0[0][2][c->band_ext_data[1]];
+                sf[3] = at9_band_ext_scales_m0[0][3][c->band_ext_data[2]];
+                sf[4] = at9_band_ext_scales_m0[0][4][c->band_ext_data[3]];
+                break;
+            case 4:
+                sf[0] = at9_band_ext_scales_m0[1][0][c->band_ext_data[0]];
+                sf[1] = at9_band_ext_scales_m0[1][1][c->band_ext_data[0]];
+                sf[2] = at9_band_ext_scales_m0[1][2][c->band_ext_data[1]];
+                sf[3] = at9_band_ext_scales_m0[1][3][c->band_ext_data[2]];
+                sf[4] = at9_band_ext_scales_m0[1][4][c->band_ext_data[3]];
+                break;
+            case 5:
+                sf[0] = at9_band_ext_scales_m0[2][0][c->band_ext_data[0]];
+                sf[1] = at9_band_ext_scales_m0[2][1][c->band_ext_data[1]];
+                sf[2] = at9_band_ext_scales_m0[2][2][c->band_ext_data[1]];
+                break;
+            }
+
+            sf[l] = at9_scalefactor_c[c->scalefactors[g_units[0]]];
+
+            fill_with_noise(s, c, n_start, n_cnt);
+            scale_band_ext_coeffs(c, sf, g_units[0], g_units[3]);
+            break;
+        }
+        case 1: {
+            float sf[6];
+            for (int i = g_units[0]; i < g_units[3]; i++)
+                sf[i - g_units[0]] = at9_scalefactor_c[c->scalefactors[i]];
+
+            fill_with_noise(s, c, g_bins[0], g_bins[3] - g_bins[0]);
+            scale_band_ext_coeffs(c, sf, g_units[0], g_units[3]);
+            break;
+        }
+        case 2: {
+            const float g_sf[2] = {
+                at9_band_ext_scales_m2[c->band_ext_data[0]],
+                at9_band_ext_scales_m2[c->band_ext_data[1]],
+            };
+
+            for (int i = 0; i < 2; i++)
+                for (int j = g_bins[i + 0]; j < g_bins[i + 1]; j++)
+                    c->coeffs[j] *= g_sf[i];
+            break;
+        }
+        case 3: {
+            float scale = at9_band_ext_scales_m3[c->band_ext_data[0]][0];
+            float rate  = at9_band_ext_scales_m3[c->band_ext_data[1]][1];
+            rate = pow(2, rate);
+            for (int i = g_bins[0]; i < g_bins[3]; i++) {
+                scale *= rate;
+                c->coeffs[i] *= scale;
+            }
+            break;
+        }
+        case 4: {
+            const float m = at9_band_ext_scales_m4[c->band_ext_data[0]];
+            const float g_sf[3] = { 0.7079468f*m, 0.5011902f*m, 0.3548279f*m };
+
+            for (int i = 0; i < 3; i++)
+                for (int j = g_bins[i + 0]; j < g_bins[i + 1]; j++)
+                    c->coeffs[j] *= g_sf[i];
+            break;
+        }
+        }
+    }
+}
+
+static int atrac9_decode_block(ATRAC9Context *s, GetBitContext *gb,
+                               ATRAC9BlockData *b, AVFrame *frame,
+                               int frame_idx, int block_idx)
+{
+    const int first_in_pkt = !get_bits1(gb);
+    const int reuse_params =  get_bits1(gb);
+    const int stereo = s->block_config->type[block_idx] == ATRAC9_BLOCK_TYPE_CPE;
+
+    if (s->block_config->type[block_idx] == ATRAC9_BLOCK_TYPE_LFE) {
+        ATRAC9ChannelData *c = &b->channel[0];
+        const int precision = reuse_params ? 8 : 4;
+        c->q_unit_cnt = b->q_unit_cnt = 2;
+
+        memset(c->scalefactors, 0, sizeof(c->scalefactors));
+        memset(c->q_coeffs_fine, 0, sizeof(c->q_coeffs_fine));
+        memset(c->q_coeffs_coarse, 0, sizeof(c->q_coeffs_coarse));
+
+        for (int i = 0; i < b->q_unit_cnt; i++) {
+            c->scalefactors[i] = get_bits(gb, 5);
+            c->precision_coarse[i] = precision;
+            c->precision_fine[i] = 0;
+        }
+
+        for (int i = 0; i < c->q_unit_cnt; i++) {
+            const int start = at9_q_unit_to_coeff_idx[i + 0];
+            const int end   = at9_q_unit_to_coeff_idx[i + 1];
+            for (int j = start; j < end; j++)
+                c->q_coeffs_coarse[j] = get_bits(gb, c->precision_coarse[i] + 1);
+        }
+
+        dequantize        (s, b, c);
+        apply_scalefactors(s, b, 0);
+
+        goto imdct;
+    }
+
+    if (first_in_pkt && reuse_params) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid block flags!\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* Band parameters */
+    if (!reuse_params) {
+        int stereo_band, ext_band;
+        const int min_band_count = s->samplerate_idx > 7 ? 1 : 3;
+        b->band_count = get_bits(gb, 4) + min_band_count;
+        b->q_unit_cnt = at9_tab_band_q_unit_map[b->band_count];
+
+        b->band_ext_q_unit = b->stereo_q_unit = b->q_unit_cnt;
+
+        if (b->band_count > at9_tab_sri_max_bands[s->samplerate_idx]) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid band count %i!\n",
+                   b->band_count);
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (stereo) {
+            stereo_band = get_bits(gb, 4) + min_band_count;
+            if (stereo_band > b->band_count) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid stereo band %i!\n",
+                       stereo_band);
+                return AVERROR_INVALIDDATA;
+            }
+            b->stereo_q_unit = at9_tab_band_q_unit_map[stereo_band];
+        }
+
+        b->has_band_ext = get_bits1(gb);
+        if (b->has_band_ext) {
+            ext_band = get_bits(gb, 4) + min_band_count;
+            if (ext_band < b->band_count) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid extension band %i!\n",
+                       ext_band);
+                return AVERROR_INVALIDDATA;
+            }
+            b->band_ext_q_unit = at9_tab_band_q_unit_map[ext_band];
+        }
+    }
+
+    /* Calculate bit alloc gradient */
+    if (parse_gradient(s, b, gb))
+        return AVERROR_INVALIDDATA;
+
+    /* IS data */
+    b->cpe_base_channel = 0;
+    if (stereo) {
+        b->cpe_base_channel = get_bits1(gb);
+        if (get_bits1(gb)) {
+            for (int i = b->stereo_q_unit; i < b->q_unit_cnt; i++)
+                b->is_signs[i] = 1 - 2*get_bits1(gb);
+        } else {
+            for (int i = 0; i < FF_ARRAY_ELEMS(b->is_signs); i++)
+                b->is_signs[i] = 1;
+        }
+    }
+
+    /* Band extension */
+    if (parse_band_ext(s, b, gb, stereo))
+        return AVERROR_INVALIDDATA;
+
+    /* Scalefactors */
+    for (int i = 0; i <= stereo; i++) {
+        ATRAC9ChannelData *c = &b->channel[i];
+        c->q_unit_cnt = i == b->cpe_base_channel ? b->q_unit_cnt :
+                                                   b->stereo_q_unit;
+        if (read_scalefactors(s, b, c, gb, i, first_in_pkt))
+            return AVERROR_INVALIDDATA;
+
+        calc_precision    (s, b, c);
+        calc_codebook_idx (s, b, c);
+        read_coeffs_coarse(s, b, c, gb);
+        read_coeffs_fine  (s, b, c, gb);
+        dequantize        (s, b, c);
+    }
+
+    b->q_unit_cnt_prev = b->has_band_ext ? b->band_ext_q_unit : b->q_unit_cnt;
+
+    apply_intensity_stereo(s, b, stereo);
+    apply_scalefactors    (s, b, stereo);
+    apply_band_extension  (s, b, stereo);
+
+imdct:
+    for (int i = 0; i <= stereo; i++) {
+        ATRAC9ChannelData *c = &b->channel[i];
+        const int dst_idx = s->block_config->plane_map[block_idx][i];
+        const int wsize = 1 << s->frame_log2;
+        const ptrdiff_t offset = wsize*frame_idx*sizeof(float);
+        float *dst = (float *)(frame->extended_data[dst_idx] + offset);
+
+        s->imdct.imdct_half(&s->imdct, s->temp, c->coeffs);
+        s->fdsp->vector_fmul_window(dst, c->prev_win, s->temp,
+                                    s->imdct_win, wsize >> 1);
+        memcpy(c->prev_win, s->temp + (wsize >> 1), sizeof(float)*wsize >> 1);
+    }
+
+    return 0;
+}
+
+static int atrac9_decode_frame(AVCodecContext *avctx, void *data,
+                               int *got_frame_ptr, AVPacket *avpkt)
+{
+    int ret;
+    GetBitContext gb;
+    AVFrame *frame = data;
+    ATRAC9Context *s = avctx->priv_data;
+    const int frames = FFMIN(avpkt->size / s->avg_frame_size, s->frame_count);
+
+    frame->nb_samples = (1 << s->frame_log2) * frames;
+    ret = ff_get_buffer(avctx, frame, 0);
+    if (ret < 0)
+        return ret;
+
+    init_get_bits8(&gb, avpkt->data, avpkt->size);
+
+    for (int i = 0; i < frames; i++) {
+        for (int j = 0; j < s->block_config->count; j++) {
+            ret = atrac9_decode_block(s, &gb, &s->block[j], frame, i, j);
+            if (ret)
+                return ret;
+            align_get_bits(&gb);
+        }
+    }
+
+    *got_frame_ptr = 1;
+
+    return avctx->block_align;
+}
+
+static void atrac9_decode_flush(AVCodecContext *avctx)
+{
+    ATRAC9Context *s = avctx->priv_data;
+
+    for (int j = 0; j < s->block_config->count; j++) {
+        ATRAC9BlockData *b = &s->block[j];
+        const int stereo = s->block_config->type[j] == ATRAC9_BLOCK_TYPE_CPE;
+        for (int i = 0; i <= stereo; i++) {
+            ATRAC9ChannelData *c = &b->channel[i];
+            memset(c->prev_win, 0, sizeof(c->prev_win));
+        }
+    }
+}
+
+static av_cold int atrac9_decode_close(AVCodecContext *avctx)
+{
+    ATRAC9Context *s = avctx->priv_data;
+
+    for (int i = 1; i < 7; i++)
+        ff_free_vlc(&s->sf_vlc[0][i]);
+    for (int i = 2; i < 6; i++)
+        ff_free_vlc(&s->sf_vlc[1][i]);
+    for (int i = 0; i < 2; i++)
+        for (int j = 0; j < 8; j++)
+            for (int k = 0; k < 4; k++)
+                ff_free_vlc(&s->coeff_vlc[i][j][k]);
+
+    ff_mdct_end(&s->imdct);
+    av_free(s->fdsp);
+
+    return 0;
+}
+
+static av_cold int atrac9_decode_init(AVCodecContext *avctx)
+{
+    GetBitContext gb;
+    ATRAC9Context *s = avctx->priv_data;
+    int version, block_config_idx, superframe_idx, alloc_c_len;
+
+    s->avctx = avctx;
+
+    av_lfg_init(&s->lfg, 0xFBADF00D);
+
+    if (avctx->extradata_size != 12) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid extradata length!\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    version = AV_RL32(avctx->extradata);
+    if (version > 2) {
+        av_log(avctx, AV_LOG_ERROR, "Unsupported version (%i)!\n", version);
+        return AVERROR_INVALIDDATA;
+    }
+
+    init_get_bits8(&gb, avctx->extradata + 4, avctx->extradata_size);
+
+    if (get_bits(&gb, 8) != 0xFE) {
+        av_log(avctx, AV_LOG_ERROR, "Incorrect magic byte!\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->samplerate_idx = get_bits(&gb, 4);
+    avctx->sample_rate = at9_tab_samplerates[s->samplerate_idx];
+
+    block_config_idx = get_bits(&gb, 3);
+    if (block_config_idx > 5) {
+        av_log(avctx, AV_LOG_ERROR, "Incorrect block config!\n");
+        return AVERROR_INVALIDDATA;
+    }
+    s->block_config = &at9_block_layout[block_config_idx];
+
+    avctx->channel_layout = s->block_config->channel_layout;
+    avctx->sample_fmt     = AV_SAMPLE_FMT_FLTP;
+
+    if (get_bits1(&gb)) {
+        av_log(avctx, AV_LOG_ERROR, "Incorrect verification bit!\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* Average frame size in bytes */
+    s->avg_frame_size = get_bits(&gb, 11) + 1;
+
+    superframe_idx = get_bits(&gb, 2);
+    if (superframe_idx & 1) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid superframe index!\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->frame_count = 1 << superframe_idx;
+    s->frame_log2  = at9_tab_sri_frame_log2[s->samplerate_idx];
+
+    if (ff_mdct_init(&s->imdct, s->frame_log2 + 1, 1, 1.0f / 32768.0f))
+        return AVERROR(ENOMEM);
+
+    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
+    if (!s->fdsp)
+        return AVERROR(ENOMEM);
+
+    /* iMDCT window */
+    for (int i = 0; i < (1 << s->frame_log2); i++) {
+        const int   len  = 1 << s->frame_log2;
+        const float sidx = (      i + 0.5f) / len;
+        const float eidx = (len - i - 0.5f) / len;
+        const float s_c  = sinf(sidx*M_PI - M_PI_2)*0.5f + 0.5f;
+        const float e_c  = sinf(eidx*M_PI - M_PI_2)*0.5f + 0.5f;
+        s->imdct_win[i]  = s_c / ((s_c * s_c) + (e_c * e_c));
+    }
+
+    /* Allocation curve */
+    alloc_c_len = FF_ARRAY_ELEMS(at9_tab_b_dist);
+    for (int i = 1; i <= alloc_c_len; i++)
+        for (int j = 0; j < i; j++)
+            s->alloc_curve[i - 1][j] = at9_tab_b_dist[(j * alloc_c_len) / i];
+
+    /* Unsigned scalefactor VLCs */
+    for (int i = 1; i < 7; i++) {
+        const HuffmanCodebook *hf = &at9_huffman_sf_unsigned[i];
+
+        init_vlc(&s->sf_vlc[0][i], 9, hf->size, hf->bits, 1, 1, hf->codes,
+                 2, 2, 0);
+    }
+
+    /* Signed scalefactor VLCs */
+    for (int i = 2; i < 6; i++) {
+        const HuffmanCodebook *hf = &at9_huffman_sf_signed[i];
+
+        int nums = hf->size;
+        int16_t sym[32];
+        for (int j = 0; j < nums; j++)
+            sym[j] = sign_extend(j, hf->value_bits);
+
+        ff_init_vlc_sparse(&s->sf_vlc[1][i], 9, hf->size, hf->bits, 1, 1,
+                           hf->codes, 2, 2, sym, sizeof(*sym), sizeof(*sym), 0);
+    }
+
+    /* Coefficient VLCs */
+    for (int i = 0; i < 2; i++) {
+        for (int j = 0; j < 8; j++) {
+            for (int k = 0; k < 4; k++) {
+                const HuffmanCodebook *hf = &at9_huffman_coeffs[i][j][k];
+                init_vlc(&s->coeff_vlc[i][j][k], 9, hf->size, hf->bits, 1, 1,
+                         hf->codes, 2, 2, 0);
+            }
+        }
+    }
+
+    return 0;
+}
+
+AVCodec ff_atrac9_decoder = {
+    .name           = "atrac9",
+    .long_name      = NULL_IF_CONFIG_SMALL("ATRAC9 (Adaptive TRansform Acoustic Coding 9)"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_ATRAC9,
+    .priv_data_size = sizeof(ATRAC9Context),
+    .init           = atrac9_decode_init,
+    .close          = atrac9_decode_close,
+    .decode         = atrac9_decode_frame,
+    .flush          = atrac9_decode_flush,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
+    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
+};

diff --git a/libavcodec/atrac9tab.h b/libavcodec/atrac9tab.h
new file mode 100644
index 0000000..d25c6f1
--- /dev/null
+++ b/libavcodec/atrac9tab.h

@@ -0,0 +1,1633 @@
+/*
+ * ATRAC9 decoder
+ * Copyright (c) 2018 Rostislav Pehlivanov <atomnuker@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ATRAC9TAB_H
+#define AVCODEC_ATRAC9TAB_H
+
+#include <stdint.h>
+
+#include "libavutil/channel_layout.h"
+
+enum ATRAC9BlockType {
+    ATRAC9_BLOCK_TYPE_SCE, /* Single channel */
+    ATRAC9_BLOCK_TYPE_CPE, /* 2 coupled channels */
+    ATRAC9_BLOCK_TYPE_LFE, /* Single LFE channel */
+};
+
+typedef struct ATRAC9BlockConfig {
+    uint64_t channel_layout;
+    enum ATRAC9BlockType type[5];
+    int plane_map[5][2];
+    int count;
+} ATRAC9BlockConfig;
+
+static const ATRAC9BlockConfig at9_block_layout[] = {
+    { /* Mono */
+        AV_CH_LAYOUT_MONO,
+        {
+            ATRAC9_BLOCK_TYPE_SCE,
+        },
+        { { 0 }, },
+        1,
+    },
+    { /* Dual Mono */
+        AV_CH_LAYOUT_STEREO,
+        {
+            ATRAC9_BLOCK_TYPE_SCE,
+            ATRAC9_BLOCK_TYPE_SCE,
+        },
+        { { 0 }, { 1 }, },
+        2,
+    },
+    { /* Stereo */
+        AV_CH_LAYOUT_STEREO,
+        {
+            ATRAC9_BLOCK_TYPE_CPE,
+        },
+        { { 0, 1 }, },
+        1,
+    },
+    { /* 5.1 */
+        AV_CH_LAYOUT_5POINT1,
+        {
+            ATRAC9_BLOCK_TYPE_CPE,
+            ATRAC9_BLOCK_TYPE_SCE,
+            ATRAC9_BLOCK_TYPE_LFE,
+            ATRAC9_BLOCK_TYPE_CPE,
+        },
+        { { 0, 1 }, { 2 }, { 3 }, { 4, 5 }, },
+        4,
+    },
+    { /* 5.1 */
+        AV_CH_LAYOUT_7POINT1,
+        {
+            ATRAC9_BLOCK_TYPE_CPE,
+            ATRAC9_BLOCK_TYPE_SCE,
+            ATRAC9_BLOCK_TYPE_LFE,
+            ATRAC9_BLOCK_TYPE_CPE,
+            ATRAC9_BLOCK_TYPE_CPE,
+        },
+        { { 0, 1 }, { 2 }, { 3 }, { 4, 5 }, { 6, 7 }, },
+        5,
+    },
+    { /* Quad */
+        AV_CH_LAYOUT_QUAD,
+        {
+            ATRAC9_BLOCK_TYPE_CPE,
+            ATRAC9_BLOCK_TYPE_CPE,
+        },
+        { { 0, 1 }, { 2, 3 }, },
+        2,
+    },
+};
+
+static const uint8_t at9_tab_sri_frame_log2[] = {
+    6, 6, 7, 7, 7, 8, 8, 8, 6, 6, 7, 7, 7, 8, 8, 8,
+};
+
+static const uint8_t at9_tab_band_q_unit_map[] = {
+    0, 4, 8, 10, 12, 13, 14, 15, 16, 18, 20, 21, 22, 23, 24, 25, 26, 28, 30,
+};
+
+static const uint8_t at9_q_unit_to_coeff_cnt[] = {
+    2, 2, 2, 2, 2,  2,  2,  2,  4,  4,  4,  4,  8,  8,  8,
+    8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+};
+
+static const int at9_q_unit_to_coeff_idx[] = {
+    0, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64,
+    72, 80, 88, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256
+};
+
+const uint8_t at9_q_unit_to_codebookidx[] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2,
+    2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+};
+
+static const uint8_t at9_tab_sri_max_bands[] = {
+    8, 8, 12, 12, 12, 18, 18, 18, 8, 8, 12, 12, 12, 16, 16, 16,
+};
+
+static const int at9_tab_samplerates[] = {
+    11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000, 44100, 48000,
+    64000, 88200, 96000, 128000, 176400, 192000,
+};
+
+static const uint8_t at9_tab_band_ext_cnt[][6] = {
+    { 0, 0, 0, 4, 4, 2 },
+    { 0, 0, 0, 0, 0, 0 },
+    { 0, 0, 0, 2, 2, 1 },
+    { 0, 0, 0, 2, 2, 2 },
+    { 1, 1, 1, 0, 0, 0 },
+};
+
+/* B unit, C unit, Band count */
+static const uint8_t at9_tab_band_ext_group[][3] = {
+    { 16, 21, 0 },
+    { 18, 22, 1 },
+    { 20, 22, 2 },
+    { 21, 22, 3 },
+    { 21, 22, 3 },
+    { 23, 24, 4 },
+    { 23, 24, 4 },
+    { 24, 24, 5 },
+};
+
+static const uint8_t at9_tab_band_ext_lengths[][6][4] = {
+    {
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+        { 5, 4, 3, 3 },
+        { 4, 4, 3, 4 },
+        { 4, 5, 0, 0 },
+    },
+    {
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+    },
+    {
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+        { 6, 6, 0, 0 },
+        { 6, 6, 0, 0 },
+        { 6, 0, 0, 0 },
+    },
+    {
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+        { 4, 4, 0, 0 },
+        { 4, 4, 0, 0 },
+        { 4, 4, 0, 0 },
+    },
+    {
+        { 3, 0, 0, 0 },
+        { 3, 0, 0, 0 },
+        { 3, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+    },
+};
+
+static const float at9_band_ext_scales_m0[][5][32] = {
+    {
+        {
+            0.000000e+0f, 1.988220e-1f, 2.514343e-1f, 2.960510e-1f,
+            3.263550e-1f, 3.771362e-1f, 3.786926e-1f, 4.540405e-1f,
+            4.877625e-1f, 5.262451e-1f, 5.447083e-1f, 5.737000e-1f,
+            6.212158e-1f, 6.222839e-1f, 6.560974e-1f, 6.896667e-1f,
+            7.555542e-1f, 7.677917e-1f, 7.918091e-1f, 7.971497e-1f,
+            8.188171e-1f, 8.446045e-1f, 9.790649e-1f, 9.822083e-1f,
+            9.846191e-1f, 9.859314e-1f, 9.863586e-1f, 9.863892e-1f,
+            9.873352e-1f, 9.881287e-1f, 9.898682e-1f, 9.913330e-1f,
+        }, {
+            0.000000e+0f, 9.982910e-1f, 7.592773e-2f, 7.179565e-1f,
+            9.851379e-1f, 5.340271e-1f, 9.013672e-1f, 6.349182e-1f,
+            7.226257e-1f, 1.948547e-1f, 7.628174e-1f, 9.873657e-1f,
+            8.112183e-1f, 2.715454e-1f, 9.734192e-1f, 1.443787e-1f,
+            4.640198e-1f, 3.249207e-1f, 3.790894e-1f, 8.276367e-2f,
+            5.954590e-1f, 2.864380e-1f, 9.806824e-1f, 7.929077e-1f,
+            6.292114e-1f, 4.887085e-1f, 2.905273e-1f, 1.301880e-1f,
+            3.140869e-1f, 5.482483e-1f, 4.210815e-1f, 1.182861e-1f,
+        }, {
+            0.000000e+0f, 3.155518e-2f, 8.581543e-2f, 1.364746e-1f,
+            1.858826e-1f, 2.368469e-1f, 2.888184e-1f, 3.432617e-1f,
+            4.012451e-1f, 4.623108e-1f, 5.271301e-1f, 5.954895e-1f,
+            6.681213e-1f, 7.448425e-1f, 8.245239e-1f, 9.097290e-1f,
+        }, {
+            0.000000e+0f, 4.418945e-2f, 1.303711e-1f, 2.273560e-1f,
+            3.395996e-1f, 4.735718e-1f, 6.267090e-1f, 8.003845e-1f,
+        }, {
+            0.000000e+0f, 2.804565e-2f, 9.683228e-2f, 1.849976e-1f,
+            3.005981e-1f, 4.470520e-1f, 6.168518e-1f, 8.007813e-1f,
+        },
+    },
+    {
+        {
+            0.000000e+0f, 2.708740e-1f, 3.479614e-1f, 3.578186e-1f,
+            5.083618e-1f, 5.299072e-1f, 5.819092e-1f, 6.381836e-1f,
+            7.276917e-1f, 7.595520e-1f, 7.878723e-1f, 9.707336e-1f,
+            9.713135e-1f, 9.736023e-1f, 9.759827e-1f, 9.832458e-1f,
+        }, {
+            0.000000e+0f, 2.330627e-1f, 5.891418e-1f, 7.170410e-1f,
+            2.036438e-1f, 1.613464e-1f, 6.668701e-1f, 9.481201e-1f,
+            9.769897e-1f, 5.111694e-1f, 3.522644e-1f, 8.209534e-1f,
+            2.933960e-1f, 9.757690e-1f, 5.289917e-1f, 4.372253e-1f,
+        }, {
+            0.000000e+0f, 4.360962e-2f, 1.056519e-1f, 1.590576e-1f,
+            2.078857e-1f, 2.572937e-1f, 3.082581e-1f, 3.616028e-1f,
+            4.191589e-1f, 4.792175e-1f, 5.438538e-1f, 6.125183e-1f,
+            6.841125e-1f, 7.589417e-1f, 8.365173e-1f, 9.148254e-1f,
+        }, {
+            0.000000e+0f, 4.074097e-2f, 1.164551e-1f, 2.077026e-1f,
+            3.184509e-1f, 4.532166e-1f, 6.124268e-1f, 7.932129e-1f,
+        }, {
+            0.000000e+0f, 8.880615e-3f, 2.932739e-2f, 5.593872e-2f,
+            8.825684e-2f, 1.259155e-1f, 1.721497e-1f, 2.270813e-1f,
+            2.901611e-1f, 3.579712e-1f, 4.334106e-1f, 5.147095e-1f,
+            6.023254e-1f, 6.956177e-1f, 7.952881e-1f, 8.977356e-1f,
+        },
+    },
+    {
+        {
+            0.000000e+0f, 7.379150e-2f, 1.806335e-1f, 2.687073e-1f,
+            3.407898e-1f, 4.047546e-1f, 4.621887e-1f, 5.168762e-1f,
+            5.703125e-1f, 6.237488e-1f, 6.763611e-1f, 7.288208e-1f,
+            7.808533e-1f, 8.337708e-1f, 8.874512e-1f, 9.418030e-1f,
+        }, {
+            0.000000e+0f, 7.980347e-2f, 1.615295e-1f, 1.665649e-1f,
+            1.822205e-1f, 2.185669e-1f, 2.292175e-1f, 2.456665e-1f,
+            2.666321e-1f, 3.306580e-1f, 3.330688e-1f, 3.765259e-1f,
+            4.085083e-1f, 4.400024e-1f, 4.407654e-1f, 4.817505e-1f,
+            4.924011e-1f, 5.320740e-1f, 5.893860e-1f, 6.131287e-1f,
+            6.212463e-1f, 6.278076e-1f, 6.308899e-1f, 7.660828e-1f,
+            7.850647e-1f, 7.910461e-1f, 7.929382e-1f, 8.038330e-1f,
+            9.834900e-1f, 9.846191e-1f, 9.852295e-1f, 9.862671e-1f,
+        }, {
+            0.000000e+0f, 6.084290e-1f, 3.672791e-1f, 3.151855e-1f,
+            1.488953e-1f, 2.571716e-1f, 5.103455e-1f, 3.311157e-1f,
+            5.426025e-2f, 4.254456e-1f, 7.998352e-1f, 7.873230e-1f,
+            5.418701e-1f, 2.925110e-1f, 8.468628e-2f, 1.410522e-1f,
+            9.819641e-1f, 9.609070e-1f, 3.530884e-2f, 9.729004e-2f,
+            5.758362e-1f, 9.941711e-1f, 7.215576e-1f, 7.183228e-1f,
+            2.028809e-1f, 9.588623e-2f, 2.032166e-1f, 1.338806e-1f,
+            5.003357e-1f, 1.874390e-1f, 9.804993e-1f, 1.107788e-1f,
+        },
+    },
+};
+
+static const float at9_band_ext_scales_m2[] = {
+    4.272461e-4f, 1.312256e-3f, 2.441406e-3f, 3.692627e-3f,
+    4.913330e-3f, 6.134033e-3f, 7.507324e-3f, 8.972168e-3f,
+    1.049805e-2f, 1.223755e-2f, 1.406860e-2f, 1.599121e-2f,
+    1.800537e-2f, 2.026367e-2f, 2.264404e-2f, 2.517700e-2f,
+    2.792358e-2f, 3.073120e-2f, 3.344727e-2f, 3.631592e-2f,
+    3.952026e-2f, 4.275513e-2f, 4.608154e-2f, 4.968262e-2f,
+    5.355835e-2f, 5.783081e-2f, 6.195068e-2f, 6.677246e-2f,
+    7.196045e-2f, 7.745361e-2f, 8.319092e-2f, 8.993530e-2f,
+    9.759521e-2f, 1.056213e-1f, 1.138916e-1f, 1.236267e-1f,
+    1.348267e-1f, 1.470337e-1f, 1.603394e-1f, 1.755676e-1f,
+    1.905823e-1f, 2.071228e-1f, 2.245178e-1f, 2.444153e-1f,
+    2.658997e-1f, 2.897644e-1f, 3.146057e-1f, 3.450012e-1f,
+    3.766174e-1f, 4.122620e-1f, 4.505615e-1f, 4.893799e-1f,
+    5.305481e-1f, 5.731201e-1f, 6.157837e-1f, 6.580811e-1f,
+    6.985168e-1f, 7.435303e-1f, 7.865906e-1f, 8.302612e-1f,
+    8.718567e-1f, 9.125671e-1f, 9.575806e-1f, 9.996643e-1f,
+};
+
+static const float at9_band_ext_scales_m3[][2] = {
+    { 3.491211e-1f, -2.913818e-1f, }, { 5.371094e-1f, -2.541504e-1f, },
+    { 6.782227e-1f, -1.664429e-1f, }, { 7.910156e-1f, -1.476440e-1f, },
+    { 9.057617e-1f, -1.342163e-1f, }, { 1.024902e+0f, -1.220703e-1f, },
+    { 1.156250e+0f, -1.117554e-1f, }, { 1.290527e+0f, -1.026611e-1f, },
+    { 1.458984e+0f, -9.436035e-2f, }, { 1.664551e+0f, -8.483887e-2f, },
+    { 1.929688e+0f, -7.476807e-2f, }, { 2.278320e+0f, -6.304932e-2f, },
+    { 2.831543e+0f, -4.492188e-2f, }, { 3.659180e+0f, -2.447510e-2f, },
+    { 5.257813e+0f, +1.831055e-4f, }, { 8.373047e+0f, +4.174805e-2f, },
+};
+
+static const float at9_band_ext_scales_m4[] = {
+    3.610229e-2f, 1.260681e-1f, 2.227478e-1f, 3.338318e-1f,
+    4.662170e-1f, 6.221313e-1f, 7.989197e-1f, 9.939575e-1f,
+};
+
+static const float at9_quant_step_coarse[] = {
+    2.0000000000000000e+0f, 6.6666666666666663e-1f, 2.8571428571428570e-1f,
+    1.3333333333333333e-1f, 6.4516129032258063e-2f, 3.1746031746031744e-2f,
+    1.5748031496062992e-2f, 7.8431372549019607e-3f, 3.9138943248532287e-3f,
+    1.9550342130987292e-3f, 9.7703957010258913e-4f, 4.8840048840048840e-4f,
+    2.4417043096081065e-4f, 1.2207776353537203e-4f, 6.1037018951994385e-5f,
+    3.0518043793392844e-5f,
+};
+
+static const float at9_quant_step_fine[] = {
+    3.0518043793392844e-05f, 1.0172681264464281e-05f, 4.3597205419132631e-06f,
+    2.0345362528928561e-06f, 9.8445302559331759e-07f, 4.8441339354591809e-07f,
+    2.4029955742829012e-07f, 1.1967860311134448e-07f, 5.9722199204291275e-08f,
+    2.9831909866464167e-08f, 1.4908668194134265e-08f, 7.4525137468602791e-09f,
+    3.7258019525568114e-09f, 1.8627872668859698e-09f, 9.3136520869755679e-10f,
+    4.6567549848772173e-10f,
+};
+
+static const float at9_scalefactor_c[] = {
+    3.0517578125e-5f, 6.1035156250e-5f, 1.2207031250e-4f, 2.4414062500e-4f,
+    4.8828125000e-4f, 9.7656250000e-4f, 1.9531250000e-3f, 3.9062500000e-3f,
+    7.8125000000e-3f, 1.5625000000e-2f, 3.1250000000e-2f, 6.2500000000e-2f,
+    1.2500000000e-1f, 2.5000000000e-1f, 5.0000000000e-1f, 1.0000000000e+0f,
+    2.0000000000e+0f, 4.0000000000e+0f, 8.0000000000e+0f, 1.6000000000e+1f,
+    3.2000000000e+1f, 6.4000000000e+1f, 1.2800000000e+2f, 2.5600000000e+2f,
+    5.1200000000e+2f, 1.0240000000e+3f, 2.0480000000e+3f, 4.0960000000e+3f,
+    8.1920000000e+3f, 1.6384000000e+4f, 3.2768000000e+4f, 6.5536000000e+4f,
+};
+
+static const uint8_t at9_tab_sf_weights[][32] = {
+    {
+        0,  0,  0,  1,  1,  2,  2,  2,  2,  2,  2,  3,  2,  3,  3,  4,  4,  4,
+        4,  4,  4,  5,  5,  6,  6,  7,  7,  8, 10, 12, 12, 12,
+    },
+    {
+        3,  2,  2,  1,  1,  1,  1,  1,  0,  1,  1,  1,  0,  0,  0,  1,  0,  1,
+        1,  1,  1,  1,  1,  2,  3,  3,  4,  5,  7, 10, 10, 10,
+    },
+    {
+        0,  2,  4,  5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+        6,  6,  6,  6,  6,  7,  7,  7,  7,  8,  9, 12, 12, 12,
+    },
+    {
+        0,  1,  1,  2,  2,  2,  3,  3,  3,  3,  3,  4,  4,  4,  5,  5,  5,  6,
+        6,  6,  6,  7,  8,  8, 10, 11, 11, 12, 13, 13, 13, 13,
+    },
+    {
+        0,  2,  2,  3,  3,  4,  4,  5,  4,  5,  5,  5,  5,  6,  7,  8,  8,  8,
+        8,  9,  9,  9, 10, 10, 11, 12, 12, 13, 13, 14, 14, 14,
+    },
+    {
+        1,  1,  0,  0,  0,  0,  1,  0,  0,  1,  1,  1,  1,  1,  2,  2,  2,  2,
+        2,  3,  3,  3,  4,  4,  5,  6,  7,  7,  9, 11, 11, 11,
+    },
+    {
+        0,  5,  8, 10, 11, 11, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+        13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 13, 15, 15, 15,
+    },
+    {
+        0,  2,  3,  4,  5,  6,  6,  7,  7,  8,  8,  8,  9,  9, 10, 10, 10, 11,
+        11, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 15, 15, 15,
+    },
+};
+
+static const uint8_t at9_tab_b_dist[] = {
+         1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,  5,  5,  6,  7,  8,
+         9, 10, 11, 12, 13, 15,  6, 18, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27,
+        27, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30,
+};
+
+static const uint8_t huff_sfb_a1_bits[] = {
+    1, 1,
+};
+
+static const uint16_t huff_sfb_a1_codes[] = {
+    0x00, 0x01,
+};
+
+static const uint8_t huff_sfb_a2_bits[] = {
+    1, 3, 3, 2,
+};
+
+static const uint16_t huff_sfb_a2_codes[] = {
+    0x00, 0x06, 0x07, 0x02,
+};
+
+static const uint8_t huff_sfb_a3_bits[] = {
+    2, 2, 4, 6, 6, 5, 3, 2,
+};
+
+static const uint16_t huff_sfb_a3_codes[] = {
+    0x00, 0x01, 0x0E, 0x3E, 0x3F, 0x1E, 0x06, 0x02,
+};
+
+static const uint8_t huff_sfb_a4_bits[] = {
+    2, 2, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 6, 5, 4, 2,
+};
+
+static const uint16_t huff_sfb_a4_codes[] = {
+    0x01, 0x02, 0x00, 0x06, 0x0F, 0x13, 0x23, 0x24,
+    0x25, 0x22, 0x21, 0x20, 0x0E, 0x05, 0x01, 0x03,
+};
+
+static const uint8_t huff_sfb_a5_bits[] = {
+    2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 6, 5, 5, 4, 3,
+};
+
+static const uint16_t huff_sfb_a5_codes[] = {
+    0x02, 0x01, 0x07, 0x0D, 0x0C, 0x18, 0x1B, 0x21, 0x3F, 0x6A, 0x6B, 0x68,
+    0x73, 0x79, 0x7C, 0x7D, 0x7A, 0x7B, 0x78, 0x72, 0x44, 0x45, 0x47, 0x46,
+    0x69, 0x38, 0x20, 0x1D, 0x19, 0x09, 0x05, 0x00,
+};
+
+static const uint8_t huff_sfb_a6_bits[] = {
+    3, 3, 4, 4, 5, 5, 6, 6, 6, 7, 7, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 7, 7, 7, 6, 6, 5, 5, 5, 4, 4, 4,
+};
+
+static const uint16_t huff_sfb_a6_codes[] = {
+    0x00, 0x01, 0x04, 0x05, 0x12, 0x13, 0x2E, 0x2F, 0x30, 0x66, 0x67, 0xD6,
+    0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2,
+    0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE,
+    0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
+    0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x68, 0x69, 0x6A, 0x31, 0x32, 0x14, 0x15,
+    0x16, 0x06, 0x07, 0x08,
+};
+
+static const uint8_t huff_sfb_b2_bits[] = {
+    1, 2, 0, 2,
+};
+
+static const uint16_t huff_sfb_b2_codes[] = {
+    0x00, 0x03, 0x00, 0x02,
+};
+
+static const uint8_t huff_sfb_b3_bits[] = {
+    1, 3, 5, 6, 0, 6, 4, 2,
+};
+
+static const uint16_t huff_sfb_b3_codes[] = {
+    0x01, 0x00, 0x04, 0x0B, 0x00, 0x0A, 0x03, 0x01,
+};
+
+static const uint8_t huff_sfb_b4_bits[] = {
+    1, 3, 4, 5, 5, 7, 8, 8, 0, 8, 8, 7, 6, 6, 4, 3,
+};
+
+static const uint16_t huff_sfb_b4_codes[] = {
+    0x01, 0x01, 0x04, 0x0E, 0x0F, 0x2C, 0x5A, 0x5D, 0x00, 0x5C, 0x5B, 0x2F,
+    0x15, 0x14, 0x06, 0x00,
+};
+
+static const uint8_t huff_sfb_b5_bits[] = {
+    3, 3, 4, 4, 4, 4, 4, 4, 4, 5, 6, 7, 7, 7, 8, 8,
+    8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 3,
+};
+
+static const uint16_t huff_sfb_b5_codes[] = {
+    0x00, 0x05, 0x07, 0x0C, 0x04, 0x02, 0x03, 0x05, 0x09, 0x10, 0x23, 0x33,
+    0x36, 0x6E, 0x60, 0x65, 0x62, 0x61, 0x63, 0x64, 0x6F, 0x6D, 0x6C, 0x6B,
+    0x6A, 0x68, 0x69, 0x45, 0x44, 0x37, 0x1A, 0x07,
+};
+
+typedef struct HuffmanCodebook {
+    const uint8_t *bits;
+    const uint16_t *codes;
+    const int size;
+    const int value_cnt;
+    const int value_cnt_pow;
+    const int value_bits;
+    const int max_bit_size;
+} HuffmanCodebook;
+
+static const HuffmanCodebook at9_huffman_sf_unsigned[] = {
+    { 0 },
+    { huff_sfb_a1_bits, huff_sfb_a1_codes,  2,  1,  0,  1,  1, },
+    { huff_sfb_a2_bits, huff_sfb_a2_codes,  4,  1,  0,  2,  3, },
+    { huff_sfb_a3_bits, huff_sfb_a3_codes,  8,  1,  0,  3,  6, },
+    { huff_sfb_a4_bits, huff_sfb_a4_codes, 16,  1,  0,  4,  8, },
+    { huff_sfb_a5_bits, huff_sfb_a5_codes, 32,  1,  0,  5,  8, },
+    { huff_sfb_a6_bits, huff_sfb_a6_codes, 64,  1,  0,  6,  8, },
+};
+
+static const HuffmanCodebook at9_huffman_sf_signed[] = {
+    { 0 },
+    { 0 },
+    { huff_sfb_b2_bits, huff_sfb_b2_codes,  4,  1,  0,  2,  2, },
+    { huff_sfb_b3_bits, huff_sfb_b3_codes,  8,  1,  0,  3,  6, },
+    { huff_sfb_b4_bits, huff_sfb_b4_codes, 16,  1,  0,  4,  8, },
+    { huff_sfb_b5_bits, huff_sfb_b5_codes, 32,  1,  0,  5,  8, },
+};
+
+static const uint8_t huff_spec_a21_bits[] = {
+    0, 3, 0, 3, 3, 3, 0, 3, 0, 0, 0, 0, 3, 3, 0, 3,
+};
+
+static const uint16_t huff_spec_a21_codes[] = {
+    0x00, 0x00, 0x00, 0x01, 0x03, 0x07, 0x00, 0x04,
+    0x00, 0x00, 0x00, 0x00, 0x02, 0x05, 0x00, 0x06,
+};
+
+static const uint8_t huff_spec_a22_bits[] = {
+    0, 4, 0, 4, 5, 6, 0, 6, 0, 0, 0, 0, 5, 6, 0, 6,
+    5, 6, 0, 6, 6, 7, 0, 7, 0, 0, 0, 0, 6, 7, 0, 7,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    5, 6, 0, 6, 6, 7, 0, 7, 0, 0, 0, 0, 6, 7, 0, 7,
+    5, 6, 0, 6, 7, 7, 0, 7, 0, 0, 0, 0, 6, 7, 0, 7,
+    6, 7, 0, 7, 7, 8, 0, 8, 0, 0, 0, 0, 7, 8, 0, 7,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    6, 7, 0, 7, 7, 8, 0, 8, 0, 0, 0, 0, 7, 7, 0, 8,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    5, 6, 0, 6, 6, 7, 0, 7, 0, 0, 0, 0, 7, 7, 0, 7,
+    6, 7, 0, 7, 7, 8, 0, 7, 0, 0, 0, 0, 7, 8, 0, 8,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    6, 7, 0, 7, 7, 7, 0, 8, 0, 0, 0, 0, 7, 8, 0, 8,
+};
+
+static const uint16_t huff_spec_a22_codes[] = {
+    0x00, 0x02, 0x00, 0x03, 0x10, 0x3C, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x11, 0x3E, 0x00, 0x3D,
+    0x0E, 0x00, 0x00, 0x39, 0x18, 0x26, 0x00, 0x75, 0x00, 0x00, 0x00, 0x00, 0x1B, 0x24, 0x00, 0x6D,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x0F, 0x38, 0x00, 0x01, 0x1A, 0x6C, 0x00, 0x25, 0x00, 0x00, 0x00, 0x00, 0x19, 0x74, 0x00, 0x27,
+    0x16, 0x14, 0x00, 0x17, 0x76, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x35, 0x64, 0x00, 0x6F,
+    0x26, 0x04, 0x00, 0x63, 0x22, 0xA2, 0x00, 0x97, 0x00, 0x00, 0x00, 0x00, 0x67, 0xA0, 0x00, 0x0D,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x2B, 0x52, 0x00, 0x0B, 0x20, 0x92, 0x00, 0x91, 0x00, 0x00, 0x00, 0x00, 0x61, 0x0E, 0x00, 0x95,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x17, 0x16, 0x00, 0x15, 0x34, 0x6E, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x77, 0x08, 0x00, 0x07,
+    0x2A, 0x0A, 0x00, 0x53, 0x60, 0x94, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x00, 0x21, 0x90, 0x00, 0x93,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x27, 0x62, 0x00, 0x05, 0x66, 0x0C, 0x00, 0xA1, 0x00, 0x00, 0x00, 0x00, 0x23, 0x96, 0x00, 0xA3,
+};
+
+static const uint8_t huff_spec_a23_bits[] = {
+    3, 4, 0, 4, 5, 6, 0, 6, 0, 0, 0, 0, 5, 6, 0, 6,
+    5, 7, 0, 6, 6, 8, 0, 7, 0, 0, 0, 0, 6, 8, 0, 7,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    5, 6, 0, 7, 6, 7, 0, 8, 0, 0, 0, 0, 6, 7, 0, 8,
+    5, 6, 0, 6, 7, 8, 0, 8, 0, 0, 0, 0, 6, 7, 0, 7,
+    6, 8, 0, 7, 8, 9, 0, 9, 0, 0, 0, 0, 7, 9, 0, 8,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    6, 8, 0, 8, 8, 9, 0, 9, 0, 0, 0, 0, 7, 8, 0, 9,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    5, 6, 0, 6, 6, 7, 0, 7, 0, 0, 0, 0, 7, 8, 0, 8,
+    6, 8, 0, 8, 7, 9, 0, 8, 0, 0, 0, 0, 8, 9, 0, 9,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    6, 7, 0, 8, 7, 8, 0, 9, 0, 0, 0, 0, 8, 9, 0, 9,
+};
+
+static const uint16_t huff_spec_a23_codes[] = {
+    0x006, 0x002, 0x000, 0x003, 0x016, 0x01E, 0x000, 0x021, 0x000, 0x000, 0x000, 0x000,
+    0x017, 0x020, 0x000, 0x01F, 0x01C, 0x054, 0x000, 0x027, 0x010, 0x0A6, 0x000, 0x027,
+    0x000, 0x000, 0x000, 0x000, 0x015, 0x0A4, 0x000, 0x02D, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x01D, 0x026, 0x000, 0x055, 0x014, 0x02C, 0x000, 0x0A5, 0x000, 0x000, 0x000, 0x000,
+    0x011, 0x026, 0x000, 0x0A7, 0x01E, 0x000, 0x000, 0x003, 0x04A, 0x074, 0x000, 0x071,
+    0x000, 0x000, 0x000, 0x000, 0x023, 0x00A, 0x000, 0x009, 0x018, 0x072, 0x000, 0x00D,
+    0x0A2, 0x15A, 0x000, 0x123, 0x000, 0x000, 0x000, 0x000, 0x00F, 0x158, 0x000, 0x05D,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x01B, 0x0AE, 0x000, 0x077, 0x092, 0x140, 0x000, 0x121,
+    0x000, 0x000, 0x000, 0x000, 0x025, 0x05E, 0x000, 0x143, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x01F, 0x002, 0x000, 0x001, 0x022, 0x008, 0x000, 0x00B, 0x000, 0x000, 0x000, 0x000,
+    0x04B, 0x070, 0x000, 0x075, 0x01A, 0x076, 0x000, 0x0AF, 0x024, 0x142, 0x000, 0x05F,
+    0x000, 0x000, 0x000, 0x000, 0x093, 0x120, 0x000, 0x141, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x019, 0x00C, 0x000, 0x073, 0x00E, 0x05C, 0x000, 0x159, 0x000, 0x000, 0x000, 0x000,
+    0x0A3, 0x122, 0x000, 0x15B,
+};
+
+static const uint8_t huff_spec_a24_bits[] = {
+    2,  4,  0,  4,  5,  6,  0,  6,  0,  0,  0,  0,  5,  6,  0,  6,
+    5,  7,  0,  6,  6,  8,  0,  8,  0,  0,  0,  0,  6,  8,  0,  8,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    5,  6,  0,  7,  6,  8,  0,  8,  0,  0,  0,  0,  6,  8,  0,  8,
+    5,  7,  0,  7,  7,  9,  0,  9,  0,  0,  0,  0,  6,  8,  0,  8,
+    6,  9,  0,  8,  8, 10,  0, 10,  0,  0,  0,  0,  8, 10,  0,  9,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    6,  8,  0,  9,  9, 10,  0, 10,  0,  0,  0,  0,  8,  9,  0, 10,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    5,  7,  0,  7,  6,  8,  0,  8,  0,  0,  0,  0,  7,  9,  0,  9,
+    6,  9,  0,  8,  8, 10,  0,  9,  0,  0,  0,  0,  9, 10,  0, 10,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    6,  8,  0,  9,  8,  9,  0, 10,  0,  0,  0,  0,  8, 10,  0, 10,
+};
+
+static const uint16_t huff_spec_a24_codes[] = {
+    0x002, 0x002, 0x000, 0x003, 0x01E, 0x010, 0x000, 0x013, 0x000, 0x000, 0x000, 0x000,
+    0x01F, 0x012, 0x000, 0x011, 0x01A, 0x030, 0x000, 0x01B, 0x000, 0x064, 0x000, 0x0C1,
+    0x000, 0x000, 0x000, 0x000, 0x003, 0x052, 0x000, 0x07D, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x01B, 0x01A, 0x000, 0x031, 0x002, 0x07C, 0x000, 0x053, 0x000, 0x000, 0x000, 0x000,
+    0x001, 0x0C0, 0x000, 0x065, 0x01C, 0x062, 0x000, 0x065, 0x02A, 0x198, 0x000, 0x19B,
+    0x000, 0x000, 0x000, 0x000, 0x017, 0x078, 0x000, 0x07B, 0x004, 0x0FE, 0x000, 0x077,
+    0x050, 0x33A, 0x000, 0x1F9, 0x000, 0x000, 0x000, 0x000, 0x073, 0x338, 0x000, 0x0E1,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x007, 0x066, 0x000, 0x187, 0x19E, 0x308, 0x000, 0x30B,
+    0x000, 0x000, 0x000, 0x000, 0x075, 0x0E2, 0x000, 0x1FB, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x01D, 0x064, 0x000, 0x063, 0x016, 0x07A, 0x000, 0x079, 0x000, 0x000, 0x000, 0x000,
+    0x02B, 0x19A, 0x000, 0x199, 0x006, 0x186, 0x000, 0x067, 0x074, 0x1FA, 0x000, 0x0E3,
+    0x000, 0x000, 0x000, 0x000, 0x19F, 0x30A, 0x000, 0x309, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x005, 0x076, 0x000, 0x0FF, 0x072, 0x0E0, 0x000, 0x339, 0x000, 0x000, 0x000, 0x000,
+    0x051, 0x1F8, 0x000, 0x33B,
+};
+
+static const uint8_t huff_spec_a31_bits[] = {
+    0, 0, 4, 5, 0, 5, 4, 0, 0, 0, 5, 5, 0, 5, 5, 0,
+    5, 5, 6, 6, 0, 6, 5, 5, 5, 6, 6, 7, 0, 7, 6, 6,
+    0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 6, 7, 0, 7, 6, 6,
+    5, 5, 5, 6, 0, 6, 6, 5, 0, 0, 5, 5, 0, 5, 5, 0,
+};
+
+static const uint16_t huff_spec_a31_codes[] = {
+    0x00, 0x00, 0x02, 0x18, 0x00, 0x19, 0x03, 0x00, 0x00, 0x00, 0x12, 0x02, 0x00, 0x09, 0x15, 0x00,
+    0x1A, 0x0A, 0x3E, 0x2C, 0x00, 0x2F, 0x01, 0x0D, 0x0E, 0x38, 0x20, 0x78, 0x00, 0x7B, 0x23, 0x3B,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x3A, 0x22, 0x7A, 0x00, 0x79, 0x21, 0x39,
+    0x1B, 0x0C, 0x00, 0x2E, 0x00, 0x2D, 0x3F, 0x0B, 0x00, 0x00, 0x14, 0x08, 0x00, 0x03, 0x13, 0x00,
+};
+
+static const uint8_t huff_spec_a32_bits[] = {
+    4, 5, 5, 6, 0, 6, 5, 5, 5, 6, 5, 6, 0, 6, 5, 5,
+    5, 5, 6, 7, 0, 7, 6, 5, 6, 6, 7, 7, 0, 7, 7, 6,
+    0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 7, 7, 0, 7, 7, 6,
+    5, 5, 6, 7, 0, 7, 6, 5, 5, 5, 5, 6, 0, 6, 5, 6,
+};
+
+static const uint16_t huff_spec_a32_codes[] = {
+    0x0D, 0x18, 0x16, 0x3A, 0x00, 0x3B, 0x17, 0x19, 0x12, 0x3E, 0x08, 0x1C, 0x00, 0x1B, 0x07, 0x01,
+    0x10, 0x02, 0x28, 0x78, 0x00, 0x7B, 0x1F, 0x05, 0x2A, 0x16, 0x72, 0x2A, 0x00, 0x29, 0x71, 0x19,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x18, 0x70, 0x28, 0x00, 0x2B, 0x73, 0x17,
+    0x11, 0x04, 0x1E, 0x7A, 0x00, 0x79, 0x29, 0x03, 0x13, 0x00, 0x06, 0x1A, 0x00, 0x1D, 0x09, 0x3F,
+};
+
+static const uint8_t huff_spec_a33_bits[] = {
+    3, 4, 5, 6, 0, 6, 5, 4, 4, 5, 6, 7, 0, 7, 6, 5,
+    5, 6, 6, 7, 0, 7, 6, 6, 6, 7, 8, 8, 0, 8, 8, 7,
+    0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 8, 0, 8, 8, 7,
+    5, 6, 6, 7, 0, 7, 6, 6, 4, 5, 6, 7, 0, 7, 6, 5,
+};
+
+static const uint16_t huff_spec_a33_codes[] = {
+    0x05, 0x06, 0x10, 0x08, 0x00, 0x09, 0x11, 0x07, 0x04, 0x12, 0x3E, 0x6A, 0x00, 0x6D, 0x3D, 0x19,
+    0x06, 0x3A, 0x06, 0x02, 0x00, 0x01, 0x05, 0x39, 0x02, 0x16, 0xDC, 0x2A, 0x00, 0x29, 0xDF, 0x69,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x68, 0xDE, 0x28, 0x00, 0x2B, 0xDD, 0x17,
+    0x07, 0x38, 0x04, 0x00, 0x00, 0x03, 0x07, 0x3B, 0x05, 0x18, 0x3C, 0x6C, 0x00, 0x6B, 0x3F, 0x13,
+};
+
+static const uint8_t huff_spec_a34_bits[] = {
+    2,  4,  5,  7,  0,  7,  5,  4,  4,  5,  6,  8,  0,  8,  6,  5,
+    5,  6,  7,  8,  0,  8,  7,  6,  7,  8,  8, 10,  0, 10,  9,  8,
+    0,  0,  0,  0,  0,  0,  0,  0,  7,  8,  9, 10,  0, 10,  8,  8,
+    5,  6,  7,  8,  0,  8,  7,  6,  4,  5,  6,  8,  0,  8,  6,  5,
+};
+
+static const uint16_t huff_spec_a34_codes[] = {
+    0x000, 0x00A, 0x00A, 0x034, 0x000, 0x035, 0x00B, 0x00B, 0x008, 0x01C, 0x032, 0x0DA,
+    0x000, 0x0DD, 0x035, 0x01F, 0x008, 0x01E, 0x03A, 0x06C, 0x000, 0x063, 0x039, 0x031,
+    0x032, 0x06E, 0x060, 0x37A, 0x000, 0x379, 0x1BF, 0x0D9, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x033, 0x0D8, 0x1BE, 0x378, 0x000, 0x37B, 0x061, 0x06F,
+    0x009, 0x030, 0x038, 0x062, 0x000, 0x06D, 0x03B, 0x01F, 0x009, 0x01E, 0x034, 0x0DC,
+    0x000, 0x0DB, 0x033, 0x01D,
+};
+
+static const uint8_t huff_spec_a41_bits[] = {
+    0, 0, 0, 0, 6, 6, 7, 7, 0, 7, 7, 6, 6, 0, 0, 0,
+    0, 0, 0, 0, 7, 7, 7, 7, 0, 7, 7, 7, 6, 0, 0, 0,
+    0, 0, 0, 0, 7, 7, 7, 8, 0, 8, 7, 7, 7, 0, 0, 0,
+    0, 0, 0, 0, 7, 7, 8, 8, 0, 8, 8, 7, 7, 0, 0, 0,
+    7, 7, 7, 8, 7, 8, 8, 8, 0, 8, 8, 8, 7, 8, 7, 7,
+    7, 7, 7, 7, 8, 8, 8, 9, 0, 8, 8, 8, 8, 7, 7, 7,
+    7, 7, 8, 8, 8, 8, 9, 9, 0, 9, 8, 8, 8, 8, 8, 7,
+    8, 8, 8, 8, 8, 9, 9, 9, 0, 9, 9, 9, 8, 8, 8, 8,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    8, 8, 8, 8, 8, 9, 9, 9, 0, 9, 9, 9, 8, 8, 8, 8,
+    7, 7, 8, 8, 8, 8, 8, 9, 0, 9, 9, 8, 8, 8, 8, 7,
+    7, 7, 7, 7, 8, 8, 8, 8, 0, 9, 8, 8, 8, 7, 7, 7,
+    7, 7, 7, 8, 7, 8, 8, 8, 0, 8, 8, 8, 7, 8, 7, 7,
+    0, 0, 0, 0, 7, 7, 8, 8, 0, 8, 8, 7, 7, 0, 0, 0,
+    0, 0, 0, 0, 7, 7, 7, 8, 0, 8, 7, 7, 7, 0, 0, 0,
+    0, 0, 0, 0, 6, 7, 7, 7, 0, 7, 7, 7, 7, 0, 0, 0,
+};
+
+static const uint16_t huff_spec_a41_codes[] = {
+    0x000, 0x000, 0x000, 0x000, 0x018, 0x00E, 0x05E, 0x028, 0x000, 0x029, 0x05F, 0x00F,
+    0x019, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x076, 0x06E, 0x03E, 0x004,
+    0x000, 0x017, 0x045, 0x07B, 0x013, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x04A, 0x048, 0x010, 0x0CE, 0x000, 0x0E1, 0x023, 0x055, 0x053, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x008, 0x018, 0x0D6, 0x09E, 0x000, 0x09D, 0x0E5, 0x02B,
+    0x01B, 0x000, 0x000, 0x000, 0x07C, 0x05C, 0x038, 0x0FC, 0x002, 0x0D2, 0x09A, 0x05C,
+    0x000, 0x06B, 0x0A3, 0x0D9, 0x00F, 0x0FF, 0x03D, 0x061, 0x074, 0x056, 0x036, 0x000,
+    0x0CC, 0x08C, 0x058, 0x1E2, 0x000, 0x00F, 0x05F, 0x0A1, 0x0D5, 0x00D, 0x03B, 0x059,
+    0x040, 0x014, 0x0DA, 0x0B6, 0x084, 0x040, 0x1E0, 0x196, 0x000, 0x1A1, 0x00D, 0x043,
+    0x087, 0x0C7, 0x0E3, 0x00B, 0x0F2, 0x0C4, 0x08E, 0x05A, 0x024, 0x1CC, 0x194, 0x168,
+    0x000, 0x16B, 0x1A3, 0x1CF, 0x027, 0x069, 0x099, 0x0C9, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x0F3, 0x0C8, 0x098, 0x068, 0x026, 0x1CE, 0x1A2, 0x16A, 0x000, 0x169, 0x195, 0x1CD,
+    0x025, 0x05B, 0x08F, 0x0C5, 0x041, 0x00A, 0x0E2, 0x0C6, 0x086, 0x042, 0x00C, 0x1A0,
+    0x000, 0x197, 0x1E1, 0x041, 0x085, 0x0B7, 0x0DB, 0x015, 0x075, 0x058, 0x03A, 0x00C,
+    0x0D4, 0x0A0, 0x05E, 0x00E, 0x000, 0x1E3, 0x059, 0x08D, 0x0CD, 0x001, 0x037, 0x057,
+    0x07D, 0x060, 0x03C, 0x0FE, 0x00E, 0x0D8, 0x0A2, 0x06A, 0x000, 0x05D, 0x09B, 0x0D3,
+    0x003, 0x0FD, 0x039, 0x05D, 0x000, 0x000, 0x000, 0x000, 0x01A, 0x02A, 0x0E4, 0x09C,
+    0x000, 0x09F, 0x0D7, 0x019, 0x009, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x052, 0x054, 0x022, 0x0E0, 0x000, 0x0CF, 0x011, 0x049, 0x04B, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x012, 0x07A, 0x044, 0x016, 0x000, 0x005, 0x03F, 0x06F,
+    0x077, 0x000, 0x000, 0x000,
+};
+
+static const uint8_t huff_spec_a42_bits[] = {
+    5,  6,  7,  7,  7,  7,  8,  8,  0,  8,  8,  7,  7,  7,  7,  6,
+    6,  7,  7,  8,  7,  7,  8,  8,  0,  8,  8,  7,  7,  8,  7,  7,
+    7,  7,  8,  8,  7,  8,  8,  9,  0,  9,  8,  8,  7,  8,  8,  7,
+    8,  8,  8,  8,  8,  8,  8,  9,  0,  9,  8,  8,  8,  8,  8,  8,
+    7,  7,  7,  8,  8,  8,  9,  9,  0,  9,  9,  8,  8,  8,  7,  7,
+    7,  7,  8,  8,  8,  9,  9,  9,  0,  9,  9,  9,  8,  8,  8,  7,
+    8,  8,  8,  8,  9,  9,  9, 10,  0, 10,  9,  9,  9,  8,  8,  8,
+    8,  8,  9,  9,  9,  9, 10, 10,  0, 10, 10,  9,  9,  9,  9,  9,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    8,  9,  9,  9,  9,  9, 10, 10,  0, 10, 10,  9,  9,  9,  9,  8,
+    8,  8,  8,  8,  9,  9,  9, 10,  0, 10,  9,  9,  9,  8,  8,  8,
+    7,  7,  8,  8,  8,  9,  9,  9,  0,  9,  9,  9,  8,  8,  8,  7,
+    7,  7,  7,  8,  8,  8,  9,  9,  0,  9,  9,  8,  8,  8,  7,  7,
+    8,  8,  8,  8,  8,  8,  8,  9,  0,  9,  8,  8,  8,  8,  8,  8,
+    7,  7,  8,  8,  7,  8,  8,  9,  0,  9,  8,  8,  7,  8,  8,  7,
+    6,  7,  7,  8,  7,  7,  8,  8,  0,  8,  8,  7,  7,  8,  7,  7,
+};
+
+static const uint16_t huff_spec_a42_codes[] = {
+    0x003, 0x018, 0x058, 0x000, 0x066, 0x03C, 0x0D6, 0x07C, 0x000, 0x07D, 0x0D7, 0x03D,
+    0x067, 0x001, 0x059, 0x019, 0x002, 0x064, 0x036, 0x0DA, 0x04C, 0x01C, 0x0BE, 0x02C,
+    0x000, 0x037, 0x0C5, 0x029, 0x04B, 0x0E7, 0x03B, 0x069, 0x044, 0x02E, 0x0FA, 0x092,
+    0x020, 0x0F8, 0x086, 0x1FC, 0x000, 0x1E7, 0x07F, 0x0F5, 0x023, 0x0AD, 0x0FD, 0x02D,
+    0x0F6, 0x0DC, 0x09C, 0x03E, 0x0F0, 0x0B6, 0x026, 0x186, 0x000, 0x18D, 0x02F, 0x0B5,
+    0x0E1, 0x03D, 0x0AF, 0x0D9, 0x054, 0x040, 0x014, 0x0EC, 0x0BC, 0x054, 0x1C6, 0x108,
+    0x000, 0x10B, 0x1C5, 0x069, 0x0B9, 0x0DF, 0x019, 0x047, 0x026, 0x008, 0x0E4, 0x0A2,
+    0x056, 0x1DC, 0x142, 0x06A, 0x000, 0x091, 0x123, 0x1DF, 0x04B, 0x0A7, 0x0EB, 0x00B,
+    0x0C0, 0x09E, 0x06A, 0x022, 0x1AA, 0x140, 0x092, 0x3CA, 0x000, 0x3A7, 0x04B, 0x121,
+    0x18F, 0x007, 0x071, 0x0A5, 0x020, 0x004, 0x1A8, 0x174, 0x0E4, 0x068, 0x3A4, 0x2EE,
+    0x000, 0x2ED, 0x3C9, 0x049, 0x0E7, 0x185, 0x1D1, 0x1FF, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x021, 0x1FE, 0x1D0, 0x184, 0x0E6, 0x048, 0x3C8, 0x2EC, 0x000, 0x2EF, 0x3A5, 0x069,
+    0x0E5, 0x175, 0x1A9, 0x005, 0x0C1, 0x0A4, 0x070, 0x006, 0x18E, 0x120, 0x04A, 0x3A6,
+    0x000, 0x3CB, 0x093, 0x141, 0x1AB, 0x023, 0x06B, 0x09F, 0x027, 0x00A, 0x0EA, 0x0A6,
+    0x04A, 0x1DE, 0x122, 0x090, 0x000, 0x06B, 0x143, 0x1DD, 0x057, 0x0A3, 0x0E5, 0x009,
+    0x055, 0x046, 0x018, 0x0DE, 0x0B8, 0x068, 0x1C4, 0x10A, 0x000, 0x109, 0x1C7, 0x055,
+    0x0BD, 0x0ED, 0x015, 0x041, 0x0F7, 0x0D8, 0x0AE, 0x03C, 0x0E0, 0x0B4, 0x02E, 0x18C,
+    0x000, 0x187, 0x027, 0x0B7, 0x0F1, 0x03F, 0x09D, 0x0DD, 0x045, 0x02C, 0x0FC, 0x0AC,
+    0x022, 0x0F4, 0x07E, 0x1E6, 0x000, 0x1FD, 0x087, 0x0F9, 0x021, 0x093, 0x0FB, 0x02F,
+    0x003, 0x068, 0x03A, 0x0E6, 0x04A, 0x028, 0x0C4, 0x036, 0x000, 0x02D, 0x0BF, 0x01D,
+    0x04D, 0x0DB, 0x037, 0x065,
+};
+
+static const uint8_t huff_spec_a43_bits[] = {
+    4,  6,  6,  7,  7,  8,  8,  9,  0,  9,  8,  8,  7,  7,  6,  6,
+    5,  6,  7,  7,  7,  8,  8,  9,  0,  9,  8,  8,  7,  7,  7,  6,
+    6,  7,  7,  7,  8,  8,  9,  9,  0,  9,  9,  8,  8,  7,  7,  7,
+    7,  7,  7,  8,  8,  8,  9, 10,  0, 10,  9,  9,  8,  8,  7,  7,
+    7,  7,  8,  8,  8,  9, 10, 10,  0, 10, 10,  9,  8,  8,  8,  7,
+    8,  8,  8,  9,  9,  9, 10, 10,  0, 10, 10,  9,  9,  9,  8,  8,
+    8,  9,  9,  9, 10, 10, 10, 10,  0, 10, 10, 10, 10,  9,  9,  9,
+    9,  9, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10,  9,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    9,  9, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10,  9,
+    8,  9,  9,  9, 10, 10, 10, 10,  0, 10, 10, 10, 10,  9,  9,  9,
+    8,  8,  8,  9,  9,  9, 10, 10,  0, 10, 10,  9,  9,  9,  8,  8,
+    7,  7,  8,  8,  8,  9, 10, 10,  0, 10, 10,  9,  8,  8,  8,  7,
+    7,  7,  7,  8,  8,  9,  9, 10,  0, 10,  9,  8,  8,  8,  7,  7,
+    6,  7,  7,  7,  8,  8,  9,  9,  0,  9,  9,  8,  8,  7,  7,  7,
+    5,  6,  7,  7,  7,  8,  8,  9,  0,  9,  8,  8,  7,  7,  7,  6,
+};
+
+static const uint16_t huff_spec_a43_codes[] = {
+    0x002, 0x03E, 0x016, 0x060, 0x04E, 0x0DC, 0x04A, 0x130, 0x000, 0x131, 0x04B, 0x0DD,
+    0x04F, 0x061, 0x017, 0x03F, 0x002, 0x02C, 0x076, 0x042, 0x034, 0x0CE, 0x002, 0x0E8,
+    0x000, 0x0CF, 0x001, 0x0D1, 0x037, 0x045, 0x07B, 0x02F, 0x014, 0x072, 0x052, 0x01A,
+    0x0E0, 0x080, 0x198, 0x01E, 0x000, 0x01D, 0x19B, 0x083, 0x0DF, 0x019, 0x055, 0x079,
+    0x050, 0x03C, 0x004, 0x0C4, 0x096, 0x00C, 0x0EA, 0x34A, 0x000, 0x34F, 0x0ED, 0x1D7,
+    0x095, 0x0AF, 0x003, 0x03F, 0x046, 0x026, 0x0D6, 0x092, 0x046, 0x15A, 0x3A8, 0x108,
+    0x000, 0x10F, 0x3A3, 0x135, 0x039, 0x091, 0x0D9, 0x031, 0x0D4, 0x0CA, 0x072, 0x1C6,
+    0x136, 0x090, 0x2B2, 0x104, 0x000, 0x103, 0x111, 0x08B, 0x133, 0x1D3, 0x071, 0x0C9,
+    0x03E, 0x1B4, 0x18C, 0x0CC, 0x38A, 0x2B0, 0x106, 0x0F2, 0x000, 0x0EF, 0x101, 0x113,
+    0x3A1, 0x0CB, 0x18F, 0x1B7, 0x0EE, 0x092, 0x388, 0x348, 0x10A, 0x0F4, 0x0F0, 0x0EA,
+    0x000, 0x0E9, 0x0ED, 0x0F7, 0x10D, 0x34D, 0x3AB, 0x0C9, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x0EF, 0x0C8, 0x3AA, 0x34C, 0x10C, 0x0F6, 0x0EC, 0x0E8, 0x000, 0x0EB, 0x0F1, 0x0F5,
+    0x10B, 0x349, 0x389, 0x093, 0x03F, 0x1B6, 0x18E, 0x0CA, 0x3A0, 0x112, 0x100, 0x0EE,
+    0x000, 0x0F3, 0x107, 0x2B1, 0x38B, 0x0CD, 0x18D, 0x1B5, 0x0D5, 0x0C8, 0x070, 0x1D2,
+    0x132, 0x08A, 0x110, 0x102, 0x000, 0x105, 0x2B3, 0x091, 0x137, 0x1C7, 0x073, 0x0CB,
+    0x047, 0x030, 0x0D8, 0x090, 0x038, 0x134, 0x3A2, 0x10E, 0x000, 0x109, 0x3A9, 0x15B,
+    0x047, 0x093, 0x0D7, 0x027, 0x051, 0x03E, 0x002, 0x0AE, 0x094, 0x1D6, 0x0EC, 0x34E,
+    0x000, 0x34B, 0x0EB, 0x00D, 0x097, 0x0C5, 0x005, 0x03D, 0x015, 0x078, 0x054, 0x018,
+    0x0DE, 0x082, 0x19A, 0x01C, 0x000, 0x01F, 0x199, 0x081, 0x0E1, 0x01B, 0x053, 0x073,
+    0x003, 0x02E, 0x07A, 0x044, 0x036, 0x0D0, 0x000, 0x0CE, 0x000, 0x0E9, 0x003, 0x0CF,
+    0x035, 0x043, 0x077, 0x02D,
+};
+
+static const uint8_t huff_spec_a44_bits[] = {
+    4,  5,  6,  7,  7,  8,  9, 10,  0, 10,  9,  8,  7,  7,  6,  5,
+    5,  6,  6,  7,  7,  8,  9, 10,  0, 10,  9,  8,  7,  7,  6,  6,
+    6,  6,  7,  7,  8,  9, 10, 10,  0, 10, 10,  9,  8,  7,  7,  6,
+    7,  7,  7,  8,  8,  9, 10, 10,  0, 10, 10,  9,  8,  8,  7,  7,
+    7,  8,  8,  8,  9, 10, 10, 10,  0, 10, 10, 10,  9,  8,  8,  7,
+    8,  8,  9,  9, 10, 10, 10, 10,  0, 10, 10, 10, 10,  9,  9,  8,
+    9,  9, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10,  9,
+    10, 10, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10, 10,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    10, 10, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10, 10,
+    9,  9, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10,  9,
+    8,  8,  9,  9, 10, 10, 10, 10,  0, 10, 10, 10, 10,  9,  9,  8,
+    7,  7,  8,  8,  9, 10, 10, 10,  0, 10, 10, 10,  9,  8,  8,  8,
+    7,  7,  7,  8,  8,  9, 10, 10,  0, 10, 10,  9,  8,  8,  7,  7,
+    6,  6,  7,  7,  8,  9, 10, 10,  0, 10, 10,  9,  8,  7,  7,  6,
+    5,  6,  6,  7,  7,  8,  9, 10,  0, 10,  9,  8,  7,  7,  6,  6,
+};
+
+static const uint16_t huff_spec_a44_codes[] = {
+    0x00A, 0x012, 0x030, 0x06E, 0x024, 0x074, 0x0EC, 0x07E, 0x000, 0x07F, 0x0ED, 0x075,
+    0x025, 0x06F, 0x031, 0x013, 0x010, 0x03C, 0x018, 0x05A, 0x002, 0x046, 0x09E, 0x07C,
+    0x000, 0x079, 0x0E5, 0x04D, 0x007, 0x065, 0x01B, 0x03F, 0x02E, 0x016, 0x072, 0x01A,
+    0x0D6, 0x1C6, 0x3B4, 0x066, 0x000, 0x06B, 0x3B7, 0x1D9, 0x0D5, 0x021, 0x075, 0x015,
+    0x06C, 0x03E, 0x01E, 0x0CC, 0x044, 0x0F2, 0x082, 0x05C, 0x000, 0x05F, 0x087, 0x0F5,
+    0x031, 0x0CF, 0x017, 0x059, 0x01C, 0x0EE, 0x0D0, 0x024, 0x1C0, 0x08E, 0x06E, 0x048,
+    0x000, 0x04D, 0x06D, 0x089, 0x0F7, 0x033, 0x0D3, 0x001, 0x070, 0x028, 0x1C2, 0x0F0,
+    0x08A, 0x074, 0x054, 0x040, 0x000, 0x043, 0x053, 0x073, 0x099, 0x0EF, 0x1C5, 0x02B,
+    0x0E6, 0x04E, 0x08C, 0x080, 0x068, 0x058, 0x046, 0x02A, 0x000, 0x029, 0x045, 0x051,
+    0x065, 0x085, 0x09B, 0x09D, 0x07A, 0x076, 0x060, 0x056, 0x04E, 0x02C, 0x024, 0x022,
+    0x000, 0x021, 0x027, 0x02F, 0x04B, 0x05B, 0x063, 0x071, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x07B, 0x070, 0x062, 0x05A, 0x04A, 0x02E, 0x026, 0x020, 0x000, 0x023, 0x025, 0x02D,
+    0x04F, 0x057, 0x061, 0x077, 0x0E7, 0x09C, 0x09A, 0x084, 0x064, 0x050, 0x044, 0x028,
+    0x000, 0x02B, 0x047, 0x059, 0x069, 0x081, 0x08D, 0x04F, 0x071, 0x02A, 0x1C4, 0x0EE,
+    0x098, 0x072, 0x052, 0x042, 0x000, 0x041, 0x055, 0x075, 0x08B, 0x0F1, 0x1C3, 0x029,
+    0x01D, 0x000, 0x0D2, 0x032, 0x0F6, 0x088, 0x06C, 0x04C, 0x000, 0x049, 0x06F, 0x08F,
+    0x1C1, 0x025, 0x0D1, 0x0EF, 0x06D, 0x058, 0x016, 0x0CE, 0x030, 0x0F4, 0x086, 0x05E,
+    0x000, 0x05D, 0x083, 0x0F3, 0x045, 0x0CD, 0x01F, 0x03F, 0x02F, 0x014, 0x074, 0x020,
+    0x0D4, 0x1D8, 0x3B6, 0x06A, 0x000, 0x067, 0x3B5, 0x1C7, 0x0D7, 0x01B, 0x073, 0x017,
+    0x011, 0x03E, 0x01A, 0x064, 0x006, 0x04C, 0x0E4, 0x078, 0x000, 0x07D, 0x09F, 0x047,
+    0x003, 0x05B, 0x019, 0x03D,
+};
+
+static const uint8_t huff_spec_a51_bits[] = {
+    5, 5, 5, 5, 5, 6, 6, 6, 4, 4, 5, 5, 5, 5, 5, 5,
+    0, 5, 5, 5, 5, 5, 5, 4, 4, 6, 6, 6, 5, 5, 5, 5,
+};
+
+static const uint16_t huff_spec_a51_codes[] = {
+    0x19, 0x16, 0x12, 0x0E, 0x06, 0x3A, 0x38, 0x30, 0x00, 0x04, 0x1E, 0x1A,
+    0x14, 0x10, 0x0C, 0x04, 0x00, 0x05, 0x0D, 0x11, 0x15, 0x1B, 0x1F, 0x05,
+    0x01, 0x31, 0x39, 0x3B, 0x07, 0x0F, 0x13, 0x17,
+};
+
+static const uint8_t huff_spec_a52_bits[] = {
+    4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6,
+    0, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4,
+};
+
+static const uint16_t huff_spec_a52_codes[] = {
+    0x09, 0x04, 0x00, 0x1E, 0x1A, 0x14, 0x0C, 0x06, 0x18, 0x16, 0x0E, 0x04,
+    0x3A, 0x38, 0x22, 0x20, 0x00, 0x21, 0x23, 0x39, 0x3B, 0x05, 0x0F, 0x17,
+    0x19, 0x07, 0x0D, 0x15, 0x1B, 0x1F, 0x01, 0x05,
+};
+
+static const uint8_t huff_spec_a53_bits[] = {
+    3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 7,
+    0, 7, 7, 7, 7, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4,
+};
+
+static const uint16_t huff_spec_a53_codes[] = {
+    0x00, 0x0C, 0x08, 0x04, 0x1E, 0x16, 0x14, 0x06, 0x0C, 0x04, 0x38, 0x1E,
+    0x76, 0x74, 0x3A, 0x38, 0x00, 0x39, 0x3B, 0x75, 0x77, 0x1F, 0x39, 0x05,
+    0x0D, 0x07, 0x15, 0x17, 0x1F, 0x05, 0x09, 0x0D,
+};
+
+static const uint8_t huff_spec_a54_bits[] = {
+    3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8,
+    0, 8, 8, 7, 7, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4,
+};
+
+static const uint16_t huff_spec_a54_codes[] = {
+    0x02, 0x0E, 0x0A, 0x08, 0x02, 0x1A, 0x0E, 0x02, 0x00, 0x30, 0x18, 0x66,
+    0x36, 0x34, 0xCA, 0xC8, 0x00, 0xC9, 0xCB, 0x35, 0x37, 0x67, 0x19, 0x31,
+    0x01, 0x03, 0x0F, 0x1B, 0x03, 0x09, 0x0B, 0x0F,
+};
+
+static const uint8_t huff_spec_a61_bits[] = {
+    6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7,
+    5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+    0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5,
+    5, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6,
+};
+
+static const uint16_t huff_spec_a61_codes[] = {
+    0x35, 0x30, 0x2A, 0x28, 0x24, 0x20, 0x18, 0x0E, 0x0C, 0x7E, 0x7C, 0x72,
+    0x70, 0x68, 0x5E, 0x5C, 0x04, 0x0E, 0x08, 0x00, 0x3C, 0x3A, 0x36, 0x32,
+    0x2C, 0x26, 0x22, 0x1A, 0x16, 0x14, 0x06, 0x04, 0x00, 0x05, 0x07, 0x15,
+    0x17, 0x1B, 0x23, 0x27, 0x2D, 0x33, 0x37, 0x3B, 0x3D, 0x01, 0x09, 0x0F,
+    0x05, 0x5D, 0x5F, 0x69, 0x71, 0x73, 0x7D, 0x7F, 0x0D, 0x0F, 0x19, 0x21,
+    0x25, 0x29, 0x2B, 0x31,
+};
+
+static const uint8_t huff_spec_a62_bits[] = {
+    5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+    6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6,
+    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5,
+};
+
+static const uint16_t huff_spec_a62_codes[] = {
+    0x14, 0x0E, 0x08, 0x04, 0x02, 0x3E, 0x3C, 0x38, 0x34, 0x30, 0x2A, 0x24,
+    0x1A, 0x18, 0x0E, 0x02, 0x32, 0x36, 0x2C, 0x26, 0x20, 0x16, 0x0C, 0x00,
+    0x76, 0x74, 0x5E, 0x5C, 0x46, 0x44, 0x2A, 0x28, 0x00, 0x29, 0x2B, 0x45,
+    0x47, 0x5D, 0x5F, 0x75, 0x77, 0x01, 0x0D, 0x17, 0x21, 0x27, 0x2D, 0x37,
+    0x33, 0x03, 0x0F, 0x19, 0x1B, 0x25, 0x2B, 0x31, 0x35, 0x39, 0x3D, 0x3F,
+    0x03, 0x05, 0x09, 0x0F,
+};
+
+static const uint8_t huff_spec_a63_bits[] = {
+    4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6,
+    6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8,
+    0, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 6, 6,
+    6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5,
+};
+
+static const uint16_t huff_spec_a63_codes[] = {
+    0x00, 0x1C, 0x18, 0x14, 0x10, 0x0A, 0x08, 0x02, 0x3E, 0x36, 0x2E, 0x2C,
+    0x24, 0x1C, 0x0E, 0x08, 0x1E, 0x1A, 0x0C, 0x7A, 0x6A, 0x68, 0x4C, 0x32,
+    0x16, 0x14, 0xF2, 0xF0, 0x9E, 0x9C, 0x62, 0x60, 0x00, 0x61, 0x63, 0x9D,
+    0x9F, 0xF1, 0xF3, 0x15, 0x17, 0x33, 0x4D, 0x69, 0x6B, 0x7B, 0x0D, 0x1B,
+    0x1F, 0x09, 0x0F, 0x1D, 0x25, 0x2D, 0x2F, 0x37, 0x3F, 0x03, 0x09, 0x0B,
+    0x11, 0x15, 0x19, 0x1D,
+};
+
+static const uint8_t huff_spec_a64_bits[] = {
+    4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7,
+    6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9,
+    0, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7,
+    6, 7, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 4,
+};
+
+static const uint16_t huff_spec_a64_codes[] = {
+    0x006, 0x002, 0x01C, 0x01A, 0x016, 0x012, 0x00E, 0x00A, 0x002, 0x03E,
+    0x032, 0x02A, 0x022, 0x020, 0x010, 0x07A, 0x000, 0x078, 0x060, 0x050,
+    0x024, 0x006, 0x0C6, 0x0C4, 0x0A4, 0x04E, 0x00A, 0x008, 0x14E, 0x14C,
+    0x09A, 0x098, 0x000, 0x099, 0x09B, 0x14D, 0x14F, 0x009, 0x00B, 0x04F,
+    0x0A5, 0x0C5, 0x0C7, 0x007, 0x025, 0x051, 0x061, 0x079, 0x001, 0x07B,
+    0x011, 0x021, 0x023, 0x02B, 0x033, 0x03F, 0x003, 0x00B, 0x00F, 0x013,
+    0x017, 0x01B, 0x01D, 0x003,
+};
+
+static const uint8_t huff_spec_a71_bits[] = {
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6,
+    6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+};
+
+static const uint16_t huff_spec_a71_codes[] = {
+    0x6C, 0x66, 0x62, 0x5C, 0x56, 0x50, 0x52, 0x4E, 0x48, 0x3E, 0x36, 0x34, 0x2A, 0x26, 0x1E, 0x16,
+    0x0E, 0x08, 0x00, 0xF6, 0xF4, 0xEE, 0xEC, 0xE2, 0xE0, 0xDA, 0xD2, 0xD0, 0xBE, 0xBC, 0xB2, 0xB0,
+    0x0C, 0x20, 0x1C, 0x16, 0x10, 0x08, 0x02, 0x7E, 0x7C, 0x78, 0x74, 0x72, 0x6E, 0x6A, 0x64, 0x60,
+    0x5A, 0x54, 0x4C, 0x4A, 0x46, 0x44, 0x3C, 0x32, 0x30, 0x28, 0x24, 0x1C, 0x14, 0x0C, 0x0A, 0x02,
+    0x00, 0x03, 0x0B, 0x0D, 0x15, 0x1D, 0x25, 0x29, 0x31, 0x33, 0x3D, 0x45, 0x47, 0x4B, 0x4D, 0x55,
+    0x5B, 0x61, 0x65, 0x6B, 0x6F, 0x73, 0x75, 0x79, 0x7D, 0x7F, 0x03, 0x09, 0x11, 0x17, 0x1D, 0x21,
+    0x0D, 0xB1, 0xB3, 0xBD, 0xBF, 0xD1, 0xD3, 0xDB, 0xE1, 0xE3, 0xED, 0xEF, 0xF5, 0xF7, 0x01, 0x09,
+    0x0F, 0x17, 0x1F, 0x27, 0x2B, 0x35, 0x37, 0x3F, 0x49, 0x4F, 0x53, 0x51, 0x57, 0x5D, 0x63, 0x67,
+};
+
+static const uint8_t huff_spec_a72_bits[] = {
+    6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6,
+};
+
+static const uint16_t huff_spec_a72_codes[] = {
+    0x2A, 0x24, 0x1C, 0x18, 0x12, 0x0E, 0x0A, 0x06, 0x02, 0x7E, 0x7C, 0x7A, 0x76, 0x72, 0x70, 0x6A,
+    0x68, 0x62, 0x5C, 0x5A, 0x52, 0x4E, 0x46, 0x42, 0x3C, 0x34, 0x2A, 0x28, 0x20, 0x12, 0x10, 0x08,
+    0x66, 0x74, 0x6C, 0x64, 0x5E, 0x58, 0x50, 0x44, 0x40, 0x36, 0x2C, 0x22, 0x1A, 0x0A, 0x02, 0x00,
+    0xF2, 0xF0, 0xDE, 0xDC, 0xC2, 0xC0, 0xAE, 0xAC, 0x9A, 0x98, 0x7E, 0x7C, 0x5E, 0x5C, 0x32, 0x30,
+    0x00, 0x31, 0x33, 0x5D, 0x5F, 0x7D, 0x7F, 0x99, 0x9B, 0xAD, 0xAF, 0xC1, 0xC3, 0xDD, 0xDF, 0xF1,
+    0xF3, 0x01, 0x03, 0x0B, 0x1B, 0x23, 0x2D, 0x37, 0x41, 0x45, 0x51, 0x59, 0x5F, 0x65, 0x6D, 0x75,
+    0x67, 0x09, 0x11, 0x13, 0x21, 0x29, 0x2B, 0x35, 0x3D, 0x43, 0x47, 0x4F, 0x53, 0x5B, 0x5D, 0x63,
+    0x69, 0x6B, 0x71, 0x73, 0x77, 0x7B, 0x7D, 0x7F, 0x03, 0x07, 0x0B, 0x0F, 0x13, 0x19, 0x1D, 0x25,
+};
+
+static const uint8_t huff_spec_a73_bits[] = {
+    5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+    0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+};
+
+static const uint16_t huff_spec_a73_codes[] = {
+    0x003, 0x03E, 0x038, 0x034, 0x030, 0x02C, 0x028, 0x024, 0x020, 0x01C, 0x016, 0x014,
+    0x00E, 0x00A, 0x004, 0x000, 0x07A, 0x076, 0x06E, 0x06C, 0x064, 0x05E, 0x056, 0x04E,
+    0x04C, 0x044, 0x036, 0x030, 0x022, 0x018, 0x012, 0x004, 0x03C, 0x03E, 0x032, 0x024,
+    0x020, 0x010, 0x0F2, 0x0F0, 0x0E8, 0x0CE, 0x0BA, 0x0B8, 0x0A8, 0x08C, 0x06A, 0x04E,
+    0x04C, 0x034, 0x00E, 0x00C, 0x1D6, 0x1D4, 0x19A, 0x198, 0x156, 0x154, 0x11E, 0x11C,
+    0x0D2, 0x0D0, 0x06E, 0x06C, 0x000, 0x06D, 0x06F, 0x0D1, 0x0D3, 0x11D, 0x11F, 0x155,
+    0x157, 0x199, 0x19B, 0x1D5, 0x1D7, 0x00D, 0x00F, 0x035, 0x04D, 0x04F, 0x06B, 0x08D,
+    0x0A9, 0x0B9, 0x0BB, 0x0CF, 0x0E9, 0x0F1, 0x0F3, 0x011, 0x021, 0x025, 0x033, 0x03F,
+    0x03D, 0x005, 0x013, 0x019, 0x023, 0x031, 0x037, 0x045, 0x04D, 0x04F, 0x057, 0x05F,
+    0x065, 0x06D, 0x06F, 0x077, 0x07B, 0x001, 0x005, 0x00B, 0x00F, 0x015, 0x017, 0x01D,
+    0x021, 0x025, 0x029, 0x02D, 0x031, 0x035, 0x039, 0x03F,
+};
+
+static const uint8_t huff_spec_a74_bits[] = {
+    5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+    6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,
+    7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,
+    9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,
+    9,  9,  9,  9,  9,  9,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+    7,  8,  8,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+    6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,
+};
+
+static const uint16_t huff_spec_a74_codes[] = {
+    0x00D, 0x00A, 0x004, 0x000, 0x03A, 0x036, 0x032, 0x030, 0x02C, 0x028, 0x026, 0x022,
+    0x01E, 0x018, 0x012, 0x00E, 0x006, 0x07E, 0x07A, 0x070, 0x06A, 0x05E, 0x056, 0x054,
+    0x048, 0x040, 0x038, 0x022, 0x01A, 0x00A, 0x0F8, 0x0E6, 0x008, 0x0FA, 0x0F0, 0x0D2,
+    0x0BA, 0x0B8, 0x094, 0x084, 0x074, 0x042, 0x032, 0x1E6, 0x1CA, 0x1C8, 0x1A2, 0x12E,
+    0x10E, 0x10C, 0x0EC, 0x082, 0x062, 0x060, 0x3CA, 0x3C8, 0x342, 0x340, 0x25A, 0x258,
+    0x1DE, 0x1DC, 0x102, 0x100, 0x000, 0x101, 0x103, 0x1DD, 0x1DF, 0x259, 0x25B, 0x341,
+    0x343, 0x3C9, 0x3CB, 0x061, 0x063, 0x083, 0x0ED, 0x10D, 0x10F, 0x12F, 0x1A3, 0x1C9,
+    0x1CB, 0x1E7, 0x033, 0x043, 0x075, 0x085, 0x095, 0x0B9, 0x0BB, 0x0D3, 0x0F1, 0x0FB,
+    0x009, 0x0E7, 0x0F9, 0x00B, 0x01B, 0x023, 0x039, 0x041, 0x049, 0x055, 0x057, 0x05F,
+    0x06B, 0x071, 0x07B, 0x07F, 0x007, 0x00F, 0x013, 0x019, 0x01F, 0x023, 0x027, 0x029,
+    0x02D, 0x031, 0x033, 0x037, 0x03B, 0x001, 0x005, 0x00B,
+};
+
+static const uint8_t huff_spec_b22_bits[] = {
+    0,  4,  0,  4,  4,  5,  0,  5,  0,  0,  0,  0,  4,  5,  0,  5,
+    4,  7,  0,  6,  6,  9,  0,  7,  0,  0,  0,  0,  6,  9,  0,  7,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    4,  6,  0,  7,  6,  7,  0,  9,  0,  0,  0,  0,  6,  7,  0,  9,
+    4,  8,  0,  8,  8, 10,  0, 10,  0,  0,  0,  0,  6,  9,  0,  9,
+    5, 10,  0,  9,  9, 10,  0, 10,  0,  0,  0,  0,  7, 10,  0, 10,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    6,  9,  0, 10,  9, 10,  0, 10,  0,  0,  0,  0,  7, 10,  0, 10,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    4,  8,  0,  8,  6,  9,  0,  9,  0,  0,  0,  0,  8, 10,  0, 10,
+    6, 10,  0,  9,  7, 10,  0, 10,  0,  0,  0,  0,  9, 10,  0, 10,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    5,  9,  0, 10,  7, 10,  0, 10,  0,  0,  0,  0,  9, 10,  0, 10,
+};
+
+static const uint16_t huff_spec_b22_codes[] = {
+    0x000, 0x00E, 0x000, 0x00F, 0x008, 0x006, 0x000, 0x00B, 0x000, 0x000, 0x000, 0x000,
+    0x009, 0x00A, 0x000, 0x007, 0x006, 0x00A, 0x000, 0x029, 0x006, 0x158, 0x000, 0x023,
+    0x000, 0x000, 0x000, 0x000, 0x013, 0x174, 0x000, 0x021, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x007, 0x028, 0x000, 0x00B, 0x012, 0x020, 0x000, 0x175, 0x000, 0x000, 0x000, 0x000,
+    0x007, 0x022, 0x000, 0x159, 0x00C, 0x0BC, 0x000, 0x0BF, 0x022, 0x2B8, 0x000, 0x2BB,
+    0x000, 0x000, 0x000, 0x000, 0x00B, 0x170, 0x000, 0x15B, 0x000, 0x04E, 0x000, 0x15F,
+    0x042, 0x04A, 0x000, 0x041, 0x000, 0x000, 0x000, 0x000, 0x055, 0x044, 0x000, 0x04D,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x02D, 0x172, 0x000, 0x2ED, 0x040, 0x042, 0x000, 0x047,
+    0x000, 0x000, 0x000, 0x000, 0x013, 0x2EE, 0x000, 0x049, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x00D, 0x0BE, 0x000, 0x0BD, 0x00A, 0x15A, 0x000, 0x171, 0x000, 0x000, 0x000, 0x000,
+    0x023, 0x2BA, 0x000, 0x2B9, 0x02C, 0x2EC, 0x000, 0x173, 0x012, 0x048, 0x000, 0x2EF,
+    0x000, 0x000, 0x000, 0x000, 0x041, 0x046, 0x000, 0x043, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x001, 0x15E, 0x000, 0x04F, 0x054, 0x04C, 0x000, 0x045, 0x000, 0x000, 0x000, 0x000,
+    0x043, 0x040, 0x000, 0x04B,
+};
+
+static const uint8_t huff_spec_b23_bits[] = {
+    2,  4,  0,  4,  4,  6,  0,  6,  0,  0,  0,  0,  4,  6,  0,  6,
+    4,  9,  0,  7,  7,  9,  0,  8,  0,  0,  0,  0,  7,  9,  0,  8,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    4,  7,  0,  9,  7,  8,  0,  9,  0,  0,  0,  0,  7,  8,  0,  9,
+    4,  8,  0,  8,  9, 10,  0, 10,  0,  0,  0,  0,  7, 10,  0, 10,
+    7, 10,  0, 10, 10, 10,  0, 10,  0,  0,  0,  0,  9, 10,  0, 10,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    7, 10,  0, 10, 10, 10,  0, 10,  0,  0,  0,  0,  8, 10,  0, 10,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    4,  8,  0,  8,  7, 10,  0, 10,  0,  0,  0,  0,  9, 10,  0, 10,
+    7, 10,  0, 10,  8, 10,  0, 10,  0,  0,  0,  0, 10, 10,  0, 10,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    7, 10,  0, 10,  9, 10,  0, 10,  0,  0,  0,  0, 10, 10,  0, 10,
+};
+
+static const uint16_t huff_spec_b23_codes[] = {
+    0x003, 0x008, 0x000, 0x009, 0x002, 0x018, 0x000, 0x01B, 0x000, 0x000, 0x000, 0x000,
+    0x003, 0x01A, 0x000, 0x019, 0x000, 0x17C, 0x000, 0x055, 0x056, 0x0E8, 0x000, 0x07D,
+    0x000, 0x000, 0x000, 0x000, 0x059, 0x0F6, 0x000, 0x07F, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x001, 0x054, 0x000, 0x17D, 0x058, 0x07E, 0x000, 0x0F7, 0x000, 0x000, 0x000, 0x000,
+    0x057, 0x07C, 0x000, 0x0E9, 0x004, 0x0A2, 0x000, 0x0A1, 0x17A, 0x1DA, 0x000, 0x1D9,
+    0x000, 0x000, 0x000, 0x000, 0x053, 0x1E8, 0x000, 0x2F3, 0x05C, 0x1D6, 0x000, 0x1E7,
+    0x1EA, 0x1E2, 0x000, 0x1CF, 0x000, 0x000, 0x000, 0x000, 0x17F, 0x1CA, 0x000, 0x1DD,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x05B, 0x2F0, 0x000, 0x1DF, 0x1E4, 0x1CC, 0x000, 0x1D5,
+    0x000, 0x000, 0x000, 0x000, 0x071, 0x1E0, 0x000, 0x1C9, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x005, 0x0A0, 0x000, 0x0A3, 0x052, 0x2F2, 0x000, 0x1E9, 0x000, 0x000, 0x000, 0x000,
+    0x17B, 0x1D8, 0x000, 0x1DB, 0x05A, 0x1DE, 0x000, 0x2F1, 0x070, 0x1C8, 0x000, 0x1E1,
+    0x000, 0x000, 0x000, 0x000, 0x1E5, 0x1D4, 0x000, 0x1CD, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x05D, 0x1E6, 0x000, 0x1D7, 0x17E, 0x1DC, 0x000, 0x1CB, 0x000, 0x000, 0x000, 0x000,
+    0x1EB, 0x1CE, 0x000, 0x1E3,
+};
+
+static const uint8_t huff_spec_b24_bits[] = {
+    1,  4,  0,  4,  5,  7,  0,  7,  0,  0,  0,  0,  5,  7,  0,  7,
+    5,  9,  0,  7,  8, 10,  0,  9,  0,  0,  0,  0,  7, 10,  0,  9,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    5,  7,  0,  9,  7,  9,  0, 10,  0,  0,  0,  0,  8,  9,  0, 10,
+    5,  9,  0,  8,  9, 10,  0, 10,  0,  0,  0,  0,  7, 10,  0, 10,
+    7, 10,  0, 10, 10, 10,  0, 10,  0,  0,  0,  0, 10, 10,  0, 10,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    7, 10,  0, 10, 10, 10,  0, 10,  0,  0,  0,  0, 10, 10,  0, 10,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    5,  8,  0,  9,  7, 10,  0, 10,  0,  0,  0,  0,  9, 10,  0, 10,
+    7, 10,  0, 10, 10, 10,  0, 10,  0,  0,  0,  0, 10, 10,  0, 10,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    7, 10,  0, 10, 10, 10,  0, 10,  0,  0,  0,  0, 10, 10,  0, 10,
+};
+
+static const uint16_t huff_spec_b24_codes[] = {
+    0x001, 0x000, 0x000, 0x001, 0x00A, 0x01C, 0x000, 0x033, 0x000, 0x000, 0x000, 0x000,
+    0x00B, 0x032, 0x000, 0x01D, 0x008, 0x0D8, 0x000, 0x031, 0x06E, 0x0FA, 0x000, 0x0D7,
+    0x000, 0x000, 0x000, 0x000, 0x011, 0x0F4, 0x000, 0x0D5, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x009, 0x030, 0x000, 0x0D9, 0x010, 0x0D4, 0x000, 0x0F5, 0x000, 0x000, 0x000, 0x000,
+    0x06F, 0x0D6, 0x000, 0x0FB, 0x00E, 0x0DA, 0x000, 0x025, 0x0D2, 0x0D4, 0x000, 0x0DB,
+    0x000, 0x000, 0x000, 0x000, 0x017, 0x0FE, 0x000, 0x0FD, 0x014, 0x0DC, 0x000, 0x0F9,
+    0x0F2, 0x0D6, 0x000, 0x09B, 0x000, 0x000, 0x000, 0x000, 0x1A3, 0x09C, 0x000, 0x0D3,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x019, 0x0F6, 0x000, 0x0D9, 0x0F0, 0x09E, 0x000, 0x0D1,
+    0x000, 0x000, 0x000, 0x000, 0x1A1, 0x0DE, 0x000, 0x099, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x00F, 0x024, 0x000, 0x0DB, 0x016, 0x0FC, 0x000, 0x0FF, 0x000, 0x000, 0x000, 0x000,
+    0x0D3, 0x0DA, 0x000, 0x0D5, 0x018, 0x0D8, 0x000, 0x0F7, 0x1A0, 0x098, 0x000, 0x0DF,
+    0x000, 0x000, 0x000, 0x000, 0x0F1, 0x0D0, 0x000, 0x09F, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x015, 0x0F8, 0x000, 0x0DD, 0x1A2, 0x0D2, 0x000, 0x09D, 0x000, 0x000, 0x000, 0x000,
+    0x0F3, 0x09A, 0x000, 0x0D7
+};
+
+static const uint8_t huff_spec_b32_bits[] = {
+    2, 4, 5, 6, 0, 6, 5, 4, 5, 6, 6, 7, 0, 6, 5, 6,
+    5, 6, 7, 7, 0, 8, 7, 6, 6, 7, 8, 9, 0, 9, 8, 7,
+    0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 0, 9, 8, 7,
+    5, 6, 7, 8, 0, 7, 7, 6, 5, 6, 5, 6, 0, 7, 6, 6,
+};
+
+static const uint16_t huff_spec_b32_codes[] = {
+    0x001, 0x002, 0x01E, 0x02A, 0x000, 0x02B, 0x01F, 0x003, 0x016, 0x020, 0x03A, 0x064,
+    0x000, 0x005, 0x001, 0x023, 0x01A, 0x026, 0x070, 0x00C, 0x000, 0x0CF, 0x073, 0x031,
+    0x024, 0x00E, 0x0CC, 0x146, 0x000, 0x145, 0x0A1, 0x053, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x025, 0x052, 0x0A0, 0x144, 0x000, 0x147, 0x0CD, 0x00F,
+    0x01B, 0x030, 0x072, 0x0CE, 0x000, 0x00D, 0x071, 0x027, 0x017, 0x022, 0x000, 0x004,
+    0x000, 0x065, 0x03B, 0x021,
+};
+
+static const uint8_t huff_spec_b33_bits[] = {
+    2,  4,  5,  7,  0,  7,  5,  4,  4,  5,  6,  8,  0,  7,  6,  5,
+    5,  6,  7,  9,  0,  8,  7,  6,  7,  8,  9, 10,  0, 10,  9,  8,
+    0,  0,  0,  0,  0,  0,  0,  0,  7,  8,  9, 10,  0, 10,  9,  8,
+    5,  6,  7,  8,  0,  9,  7,  6,  4,  5,  6,  7,  0,  8,  6,  5,
+};
+
+static const uint16_t huff_spec_b33_codes[] = {
+    0x003, 0x008, 0x014, 0x05E, 0x000, 0x05F, 0x015, 0x009, 0x004, 0x002, 0x01C, 0x0BA,
+    0x000, 0x011, 0x01F, 0x001, 0x00C, 0x00C, 0x014, 0x166, 0x000, 0x02D, 0x013, 0x00F,
+    0x05A, 0x0B0, 0x05E, 0x0B8, 0x000, 0x0BB, 0x165, 0x0B9, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x05B, 0x0B8, 0x164, 0x0BA, 0x000, 0x0B9, 0x05F, 0x0B1,
+    0x00D, 0x00E, 0x012, 0x02C, 0x000, 0x167, 0x015, 0x00D, 0x005, 0x000, 0x01E, 0x010,
+    0x000, 0x0BB, 0x01D, 0x003
+};
+
+static const uint8_t huff_spec_b34_bits[] = {
+    1,  4,  6,  8,  0,  8,  6,  4,  4,  6,  7,  9,  0,  8,  7,  6,
+    6,  7,  8, 10,  0, 10,  8,  7,  8,  9, 10, 10,  0, 10, 10,  9,
+    0,  0,  0,  0,  0,  0,  0,  0,  8,  9, 10, 10,  0, 10, 10,  9,
+    6,  7,  8, 10,  0, 10,  8,  7,  4,  6,  7,  8,  0,  9,  7,  6,
+};
+
+static const uint16_t huff_spec_b34_codes[] = {
+    0x000, 0x00A, 0x038, 0x0EE, 0x000, 0x0EF, 0x039, 0x00B, 0x008, 0x03C, 0x06E, 0x1D8,
+    0x000, 0x0C1, 0x075, 0x03F, 0x032, 0x068, 0x0C4, 0x358, 0x000, 0x30F, 0x0C7, 0x06D,
+    0x0D4, 0x1AE, 0x30C, 0x308, 0x000, 0x30B, 0x35B, 0x1DB, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x0D5, 0x1DA, 0x35A, 0x30A, 0x000, 0x309, 0x30D, 0x1AF,
+    0x033, 0x06C, 0x0C6, 0x30E, 0x000, 0x359, 0x0C5, 0x069, 0x009, 0x03E, 0x074, 0x0C0,
+    0x000, 0x1D9, 0x06F, 0x03D,
+};
+
+static const uint8_t huff_spec_b42_bits[] = {
+    4,  5,  6,  8,  6,  7,  8,  8,  0,  8,  8,  7,  6,  8,  6,  5,
+    5,  6,  7,  8,  7,  7,  8,  9,  0,  8,  8,  7,  7,  8,  7,  6,
+    7,  7,  8,  9,  7,  8,  9,  9,  0,  9,  9,  8,  7,  9,  8,  7,
+    8,  9,  9, 10,  8,  8,  9, 10,  0, 10,  9,  8,  8, 10,  9,  8,
+    6,  7,  8,  8,  9,  9, 10, 10,  0, 10, 10,  9,  9,  8,  8,  7,
+    7,  7,  8,  9,  9, 10, 10, 10,  0, 10, 10, 10,  9,  9,  8,  7,
+    8,  8,  9,  9, 10, 10, 10, 10,  0, 10, 10, 10, 10,  9,  9,  8,
+    8,  9,  9, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10,  9,  9,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    8,  9,  9, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10,  9,  9,
+    8,  8,  9,  9, 10, 10, 10, 10,  0, 10, 10, 10, 10,  9,  9,  8,
+    7,  7,  8,  9,  9, 10, 10, 10,  0, 10, 10, 10,  9,  9,  8,  7,
+    6,  7,  8,  8,  9,  9, 10, 10,  0, 10, 10,  9,  9,  8,  8,  7,
+    8,  8,  9, 10,  8,  8,  9, 10,  0, 10,  9,  8,  8, 10,  9,  9,
+    7,  7,  8,  9,  7,  8,  9,  9,  0,  9,  9,  8,  7,  9,  8,  7,
+    5,  6,  7,  8,  7,  7,  8,  8,  0,  9,  8,  7,  7,  8,  7,  6,
+};
+
+static const uint16_t huff_spec_b42_codes[] = {
+    0x00E, 0x018, 0x010, 0x0F0, 0x024, 0x05A, 0x0F6, 0x078, 0x000, 0x079, 0x0F7, 0x05B,
+    0x025, 0x0F1, 0x011, 0x019, 0x00C, 0x014, 0x01C, 0x036, 0x05C, 0x012, 0x09E, 0x1E4,
+    0x000, 0x00B, 0x0A9, 0x03B, 0x05F, 0x071, 0x019, 0x017, 0x06E, 0x000, 0x03E, 0x114,
+    0x002, 0x0B0, 0x1AA, 0x07A, 0x000, 0x099, 0x1E7, 0x0B3, 0x00B, 0x131, 0x07F, 0x00D,
+    0x0D8, 0x1FE, 0x112, 0x22E, 0x086, 0x010, 0x134, 0x35C, 0x000, 0x35F, 0x133, 0x013,
+    0x081, 0x22D, 0x119, 0x07B, 0x00A, 0x050, 0x0F8, 0x04E, 0x1B4, 0x154, 0x3EC, 0x0D2,
+    0x000, 0x0D7, 0x3D7, 0x137, 0x1FD, 0x073, 0x0FD, 0x057, 0x052, 0x010, 0x08E, 0x1E8,
+    0x11A, 0x3EE, 0x0F2, 0x03C, 0x000, 0x03F, 0x0F1, 0x3D5, 0x111, 0x1F5, 0x09D, 0x025,
+    0x0D2, 0x082, 0x1A0, 0x0F8, 0x36E, 0x0D4, 0x072, 0x03A, 0x000, 0x027, 0x071, 0x07D,
+    0x36D, 0x0FB, 0x1AD, 0x085, 0x00C, 0x1A8, 0x03C, 0x346, 0x0D0, 0x076, 0x024, 0x020,
+    0x000, 0x023, 0x039, 0x075, 0x07F, 0x345, 0x09B, 0x157, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x00D, 0x156, 0x09A, 0x344, 0x07E, 0x074, 0x038, 0x022, 0x000, 0x021, 0x025, 0x077,
+    0x0D1, 0x347, 0x03D, 0x1A9, 0x0D3, 0x084, 0x1AC, 0x0FA, 0x36C, 0x07C, 0x070, 0x026,
+    0x000, 0x03B, 0x073, 0x0D5, 0x36F, 0x0F9, 0x1A1, 0x083, 0x053, 0x024, 0x09C, 0x1F4,
+    0x110, 0x3D4, 0x0F0, 0x03E, 0x000, 0x03D, 0x0F3, 0x3EF, 0x11B, 0x1E9, 0x08F, 0x011,
+    0x00B, 0x056, 0x0FC, 0x072, 0x1FC, 0x136, 0x3D6, 0x0D6, 0x000, 0x0D3, 0x3ED, 0x155,
+    0x1B5, 0x04F, 0x0F9, 0x051, 0x0D9, 0x07A, 0x118, 0x22C, 0x080, 0x012, 0x132, 0x35E,
+    0x000, 0x35D, 0x135, 0x011, 0x087, 0x22F, 0x113, 0x1FF, 0x06F, 0x00C, 0x07E, 0x130,
+    0x00A, 0x0B2, 0x1E6, 0x098, 0x000, 0x07B, 0x1AB, 0x0B1, 0x003, 0x115, 0x03F, 0x001,
+    0x00D, 0x016, 0x018, 0x070, 0x05E, 0x03A, 0x0A8, 0x00A, 0x000, 0x1E5, 0x09F, 0x013,
+    0x05D, 0x037, 0x01D, 0x015,
+};
+
+static const uint8_t huff_spec_b43_bits[] = {
+    2,  5,  6,  7,  7,  8,  8,  9,  0,  9,  8,  8,  7,  7,  6,  5,
+    5,  6,  7,  8,  7,  8,  9, 10,  0, 10,  9,  8,  7,  8,  7,  6,
+    6,  7,  8,  9,  8,  9, 10, 10,  0, 10, 10,  9,  8,  9,  8,  7,
+    7,  8,  9, 10,  9,  9, 10, 10,  0, 10, 10, 10,  9, 10,  9,  8,
+    7,  8,  8,  9, 10, 10, 10, 10,  0, 10, 10, 10, 10,  9,  8,  7,
+    8,  8,  9, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10,  9,  8,
+    9,  9, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10,  9,
+    10, 10, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10, 10,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    10, 10, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10, 10,
+    9,  9, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10,  9,
+    8,  8,  9, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10,  9,  8,
+    7,  7,  8,  9, 10, 10, 10, 10,  0, 10, 10, 10, 10,  9,  8,  8,
+    7,  8,  9, 10,  9, 10, 10, 10,  0, 10, 10,  9,  9, 10,  9,  8,
+    6,  7,  8,  9,  8,  9, 10, 10,  0, 10, 10,  9,  8,  9,  8,  7,
+    5,  6,  7,  8,  7,  8,  9, 10,  0, 10,  9,  8,  7,  8,  7,  6,
+};
+
+static const uint16_t huff_spec_b43_codes[] = {
+    0x001, 0x01E, 0x022, 0x018, 0x064, 0x0EC, 0x008, 0x100, 0x000, 0x101, 0x009, 0x0ED,
+    0x065, 0x019, 0x023, 0x01F, 0x01A, 0x030, 0x056, 0x09A, 0x00A, 0x090, 0x12C, 0x0A6,
+    0x000, 0x0A9, 0x12F, 0x093, 0x00F, 0x09F, 0x059, 0x039, 0x00E, 0x054, 0x0BC, 0x19E,
+    0x082, 0x176, 0x0AC, 0x088, 0x000, 0x08B, 0x0AF, 0x19D, 0x095, 0x1D1, 0x0BF, 0x051,
+    0x002, 0x098, 0x1D4, 0x0B8, 0x170, 0x046, 0x090, 0x060, 0x000, 0x067, 0x095, 0x0BD,
+    0x173, 0x0B5, 0x1D3, 0x09D, 0x052, 0x0EE, 0x034, 0x174, 0x0BA, 0x09C, 0x080, 0x044,
+    0x000, 0x047, 0x06D, 0x099, 0x0BF, 0x16F, 0x085, 0x001, 0x0CC, 0x036, 0x16C, 0x0B0,
+    0x09A, 0x084, 0x04E, 0x03E, 0x000, 0x037, 0x04B, 0x06B, 0x0A1, 0x0B3, 0x16B, 0x087,
+    0x1D6, 0x102, 0x0A4, 0x092, 0x068, 0x04C, 0x034, 0x030, 0x000, 0x02D, 0x03D, 0x049,
+    0x083, 0x097, 0x0AB, 0x169, 0x0B6, 0x09E, 0x06E, 0x064, 0x040, 0x038, 0x02E, 0x02A,
+    0x000, 0x029, 0x033, 0x03B, 0x043, 0x063, 0x087, 0x0A3, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x0B7, 0x0A2, 0x086, 0x062, 0x042, 0x03A, 0x032, 0x028, 0x000, 0x02B, 0x02F, 0x039,
+    0x041, 0x065, 0x06F, 0x09F, 0x1D7, 0x168, 0x0AA, 0x096, 0x082, 0x048, 0x03C, 0x02C,
+    0x000, 0x031, 0x035, 0x04D, 0x069, 0x093, 0x0A5, 0x103, 0x0CD, 0x086, 0x16A, 0x0B2,
+    0x0A0, 0x06A, 0x04A, 0x036, 0x000, 0x03F, 0x04F, 0x085, 0x09B, 0x0B1, 0x16D, 0x037,
+    0x053, 0x000, 0x084, 0x16E, 0x0BE, 0x098, 0x06C, 0x046, 0x000, 0x045, 0x081, 0x09D,
+    0x0BB, 0x175, 0x035, 0x0EF, 0x003, 0x09C, 0x1D2, 0x0B4, 0x172, 0x0BC, 0x094, 0x066,
+    0x000, 0x061, 0x091, 0x047, 0x171, 0x0B9, 0x1D5, 0x099, 0x00F, 0x050, 0x0BE, 0x1D0,
+    0x094, 0x19C, 0x0AE, 0x08A, 0x000, 0x089, 0x0AD, 0x177, 0x083, 0x19F, 0x0BD, 0x055,
+    0x01B, 0x038, 0x058, 0x09E, 0x00E, 0x092, 0x12E, 0x0A8, 0x000, 0x0A7, 0x12D, 0x091,
+    0x00B, 0x09B, 0x057, 0x031,
+};
+
+static const uint8_t huff_spec_b44_bits[] = {
+    2,  4,  6,  7,  7,  8, 10, 10,  0, 10, 10,  8,  7,  7,  6,  4,
+    5,  5,  7,  8,  8, 10, 10, 10,  0, 10, 10, 10,  8,  8,  7,  5,
+    6,  7,  8,  9,  9, 10, 10, 10,  0, 10, 10, 10, 10,  9,  8,  7,
+    8,  8,  9, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10,  8,
+    8,  8, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10,  8,
+    9, 10, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10, 10,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+    10, 10, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10, 10,
+    9, 10, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10, 10,
+    8,  8, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10,  8,
+    8,  8, 10, 10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10,  9,  8,
+    6,  7,  8,  9, 10, 10, 10, 10,  0, 10, 10, 10,  9,  9,  8,  7,
+    5,  5,  7,  8,  8, 10, 10, 10,  0, 10, 10, 10,  8,  8,  7,  5,
+};
+
+static const uint16_t huff_spec_b44_codes[] = {
+    0x002, 0x002, 0x030, 0x000, 0x002, 0x00C, 0x1D2, 0x1AE, 0x000, 0x1AF, 0x1D3, 0x00D,
+    0x003, 0x001, 0x031, 0x003, 0x01E, 0x002, 0x070, 0x0C8, 0x07E, 0x1E8, 0x1C0, 0x176,
+    0x000, 0x17F, 0x1C3, 0x1EB, 0x0CF, 0x0D3, 0x073, 0x009, 0x018, 0x06A, 0x0EC, 0x1DE,
+    0x1A2, 0x1CA, 0x1AA, 0x164, 0x000, 0x16D, 0x1AD, 0x1D1, 0x1EF, 0x1DD, 0x0EB, 0x06D,
+    0x0E8, 0x0CA, 0x1BE, 0x1CE, 0x1DA, 0x1B6, 0x170, 0x154, 0x000, 0x153, 0x173, 0x1B1,
+    0x1D7, 0x1D5, 0x343, 0x0CD, 0x0DC, 0x078, 0x340, 0x1CC, 0x1BA, 0x1A8, 0x156, 0x148,
+    0x000, 0x145, 0x15F, 0x1A1, 0x1BD, 0x1D9, 0x1ED, 0x07D, 0x1BC, 0x1DC, 0x1C4, 0x1B2,
+    0x17C, 0x15A, 0x14A, 0x03A, 0x000, 0x039, 0x147, 0x16B, 0x17B, 0x1B5, 0x1C9, 0x1DF,
+    0x1C6, 0x1B8, 0x1A2, 0x168, 0x160, 0x14C, 0x02E, 0x024, 0x000, 0x027, 0x03D, 0x151,
+    0x15D, 0x16F, 0x1A7, 0x1BF, 0x1A4, 0x174, 0x162, 0x14E, 0x140, 0x02C, 0x02A, 0x022,
+    0x000, 0x021, 0x029, 0x03F, 0x143, 0x159, 0x167, 0x179, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x1A5, 0x178, 0x166, 0x158, 0x142, 0x03E, 0x028, 0x020, 0x000, 0x023, 0x02B, 0x02D,
+    0x141, 0x14F, 0x163, 0x175, 0x1C7, 0x1BE, 0x1A6, 0x16E, 0x15C, 0x150, 0x03C, 0x026,
+    0x000, 0x025, 0x02F, 0x14D, 0x161, 0x169, 0x1A3, 0x1B9, 0x1BD, 0x1DE, 0x1C8, 0x1B4,
+    0x17A, 0x16A, 0x146, 0x038, 0x000, 0x03B, 0x14B, 0x15B, 0x17D, 0x1B3, 0x1C5, 0x1DD,
+    0x0DD, 0x07C, 0x1EC, 0x1D8, 0x1BC, 0x1A0, 0x15E, 0x144, 0x000, 0x149, 0x157, 0x1A9,
+    0x1BB, 0x1CD, 0x341, 0x079, 0x0E9, 0x0CC, 0x342, 0x1D4, 0x1D6, 0x1B0, 0x172, 0x152,
+    0x000, 0x155, 0x171, 0x1B7, 0x1DB, 0x1CF, 0x1BF, 0x0CB, 0x019, 0x06C, 0x0EA, 0x1DC,
+    0x1EE, 0x1D0, 0x1AC, 0x16C, 0x000, 0x165, 0x1AB, 0x1CB, 0x1A3, 0x1DF, 0x0ED, 0x06B,
+    0x01F, 0x008, 0x072, 0x0D2, 0x0CE, 0x1EA, 0x1C2, 0x17E, 0x000, 0x177, 0x1C1, 0x1E9,
+    0x07F, 0x0C9, 0x071, 0x003,
+};
+
+static const uint8_t huff_spec_b52_bits[] = {
+    3, 4, 4, 4, 5, 5, 6, 6, 5, 5, 5, 6, 6, 6, 7, 7,
+    0, 7, 7, 6, 6, 6, 5, 5, 5, 6, 6, 5, 5, 4, 4, 4,
+};
+
+static const uint16_t huff_spec_b52_codes[] = {
+    0x06, 0x0E, 0x06, 0x00, 0x0A, 0x04, 0x2C, 0x12, 0x14, 0x10, 0x06, 0x2E, 0x24, 0x10, 0x4E, 0x4C,
+    0x00, 0x4D, 0x4F, 0x11, 0x25, 0x2F, 0x07, 0x11, 0x15, 0x13, 0x2D, 0x05, 0x0B, 0x01, 0x07, 0x0F,
+};
+
+static const uint8_t huff_spec_b53_bits[] = {
+    2, 3, 4, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 8, 8,
+    0, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 6, 5, 5, 4, 3,
+};
+
+static const uint16_t huff_spec_b53_codes[] = {
+    0x02, 0x00, 0x06, 0x1C, 0x18, 0x3E, 0x16, 0x10, 0x3C, 0x36, 0x14, 0x6A, 0x26, 0x24, 0xD2, 0xD0,
+    0x00, 0xD1, 0xD3, 0x25, 0x27, 0x6B, 0x15, 0x37, 0x3D, 0x11, 0x17, 0x3F, 0x19, 0x1D, 0x07, 0x01,
+};
+
+static const uint8_t huff_spec_b54_bits[] = {
+    2, 3, 4, 4, 5, 6, 6, 7, 6, 6, 7, 8, 8, 8, 9, 9,
+    0, 9, 9, 8, 8, 8, 7, 6, 6, 7, 6, 6, 5, 4, 4, 3,
+};
+
+static const uint16_t huff_spec_b54_codes[] = {
+    0x003, 0x002, 0x008, 0x000, 0x014, 0x02E, 0x00E, 0x05A, 0x00A, 0x008, 0x01A, 0x0B2,
+    0x032, 0x030, 0x162, 0x160, 0x000, 0x161, 0x163, 0x031, 0x033, 0x0B3, 0x01B, 0x009,
+    0x00B, 0x05B, 0x00F, 0x02F, 0x015, 0x001, 0x009, 0x003,
+};
+
+static const uint8_t huff_spec_b62_bits[] = {
+    4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7,
+    6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
+    0, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6,
+    6, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 4,
+};
+
+static const uint16_t huff_spec_b62_codes[] = {
+    0x0D, 0x06, 0x1C, 0x14, 0x0A, 0x04, 0x3E, 0x2E, 0x22, 0x0E, 0x06, 0x00, 0x5A, 0x4E, 0x40, 0x20,
+    0x30, 0x32, 0x24, 0x12, 0x0C, 0x02, 0x78, 0x58, 0x42, 0x22, 0x0A, 0x08, 0xF6, 0xF4, 0x9A, 0x98,
+    0x00, 0x99, 0x9B, 0xF5, 0xF7, 0x09, 0x0B, 0x23, 0x43, 0x59, 0x79, 0x03, 0x0D, 0x13, 0x25, 0x33,
+    0x31, 0x21, 0x41, 0x4F, 0x5B, 0x01, 0x07, 0x0F, 0x23, 0x2F, 0x3F, 0x05, 0x0B, 0x15, 0x1D, 0x07,
+};
+
+static const uint8_t huff_spec_b63_bits[] = {
+    3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8,
+    6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,
+    0, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 6,
+    6, 8, 7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4,
+};
+
+static const uint16_t huff_spec_b63_codes[] = {
+    0x006, 0x00E, 0x004, 0x014, 0x010, 0x006, 0x000, 0x026, 0x01C, 0x018, 0x004, 0x05C,
+    0x04A, 0x03C, 0x016, 0x0BC, 0x006, 0x008, 0x058, 0x03E, 0x036, 0x014, 0x0B6, 0x0B4,
+    0x090, 0x068, 0x17E, 0x17C, 0x126, 0x124, 0x0D6, 0x0D4, 0x000, 0x0D5, 0x0D7, 0x125,
+    0x127, 0x17D, 0x17F, 0x069, 0x091, 0x0B5, 0x0B7, 0x015, 0x037, 0x03F, 0x059, 0x009,
+    0x007, 0x0BD, 0x017, 0x03D, 0x04B, 0x05D, 0x005, 0x019, 0x01D, 0x027, 0x001, 0x007,
+    0x011, 0x015, 0x005, 0x00F,
+};
+
+static const uint8_t huff_spec_b64_bits[] = {
+    3,  3,  4,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  7,  8,
+    7,  7,  7,  8,  8,  8,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10,
+    0, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,  8,  8,  8,  7,  7,
+    7,  8,  7,  7,  7,  7,  7,  6,  6,  6,  6,  5,  5,  5,  4,  3,
+};
+
+static const uint16_t huff_spec_b64_codes[] = {
+    0x007, 0x000, 0x008, 0x01A, 0x014, 0x00C, 0x032, 0x02E, 0x01E, 0x014, 0x062, 0x05A,
+    0x03A, 0x026, 0x020, 0x0B2, 0x038, 0x02C, 0x022, 0x0C0, 0x05E, 0x04A, 0x186, 0x184,
+    0x160, 0x0BA, 0x092, 0x090, 0x2C6, 0x2C4, 0x172, 0x170, 0x000, 0x171, 0x173, 0x2C5,
+    0x2C7, 0x091, 0x093, 0x0BB, 0x161, 0x185, 0x187, 0x04B, 0x05F, 0x0C1, 0x023, 0x02D,
+    0x039, 0x0B3, 0x021, 0x027, 0x03B, 0x05B, 0x063, 0x015, 0x01F, 0x02F, 0x033, 0x00D,
+    0x015, 0x01B, 0x009, 0x001,
+};
+
+static const uint8_t huff_spec_b72_bits[] = {
+    5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+    0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5,
+};
+
+static const uint16_t huff_spec_b72_codes[] = {
+    0x01E, 0x016, 0x00C, 0x000, 0x038, 0x032, 0x028, 0x022, 0x01C, 0x012, 0x00E, 0x006,
+    0x076, 0x06C, 0x060, 0x04E, 0x03E, 0x02A, 0x022, 0x01A, 0x012, 0x00A, 0x0FC, 0x0DC,
+    0x0C6, 0x0A8, 0x094, 0x086, 0x058, 0x042, 0x040, 0x02A, 0x068, 0x07C, 0x06A, 0x056,
+    0x048, 0x040, 0x02E, 0x028, 0x016, 0x010, 0x008, 0x0EA, 0x0DE, 0x0AA, 0x09A, 0x096,
+    0x07A, 0x078, 0x05A, 0x032, 0x030, 0x028, 0x1FE, 0x1FC, 0x1D2, 0x1D0, 0x18A, 0x188,
+    0x132, 0x130, 0x10A, 0x108, 0x000, 0x109, 0x10B, 0x131, 0x133, 0x189, 0x18B, 0x1D1,
+    0x1D3, 0x1FD, 0x1FF, 0x029, 0x031, 0x033, 0x05B, 0x079, 0x07B, 0x097, 0x09B, 0x0AB,
+    0x0DF, 0x0EB, 0x009, 0x011, 0x017, 0x029, 0x02F, 0x041, 0x049, 0x057, 0x06B, 0x07D,
+    0x069, 0x02B, 0x041, 0x043, 0x059, 0x087, 0x095, 0x0A9, 0x0C7, 0x0DD, 0x0FD, 0x00B,
+    0x013, 0x01B, 0x023, 0x02B, 0x03F, 0x04F, 0x061, 0x06D, 0x077, 0x007, 0x00F, 0x013,
+    0x01D, 0x023, 0x029, 0x033, 0x039, 0x001, 0x00D, 0x017,
+};
+
+static const uint8_t huff_spec_b73_bits[] = {
+    3,  4,  5,  5,  5,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,
+    7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,
+    8,  7,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,
+    9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,
+    9,  9,  9,  9,  9,  9,  9,  8,  8,  8,  8,  8,  8,  8,  8,  7,
+    8,  9,  9,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  7,  7,  7,
+    7,  7,  7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  5,  5,  5,  4,
+};
+
+static const uint16_t huff_spec_b73_codes[] = {
+    0x000, 0x006, 0x018, 0x010, 0x004, 0x03A, 0x034, 0x02A, 0x026, 0x014, 0x010, 0x07E,
+    0x072, 0x06E, 0x05C, 0x052, 0x04A, 0x02C, 0x024, 0x018, 0x0F4, 0x0E0, 0x0DA, 0x0B6,
+    0x0B2, 0x0A0, 0x05E, 0x04E, 0x038, 0x034, 0x1E6, 0x1B2, 0x0FA, 0x01E, 0x0F8, 0x0F0,
+    0x0BE, 0x0B4, 0x0A2, 0x090, 0x04C, 0x03A, 0x1EE, 0x1E4, 0x1C6, 0x1B0, 0x178, 0x162,
+    0x126, 0x124, 0x0B8, 0x06C, 0x3DA, 0x3D8, 0x38A, 0x388, 0x2F6, 0x2F4, 0x2C2, 0x2C0,
+    0x176, 0x174, 0x0DC, 0x0DE, 0x000, 0x0DF, 0x0DD, 0x175, 0x177, 0x2C1, 0x2C3, 0x2F5,
+    0x2F7, 0x389, 0x38B, 0x3D9, 0x3DB, 0x06D, 0x0B9, 0x125, 0x127, 0x163, 0x179, 0x1B1,
+    0x1C7, 0x1E5, 0x1EF, 0x03B, 0x04D, 0x091, 0x0A3, 0x0B5, 0x0BF, 0x0F1, 0x0F9, 0x01F,
+    0x0FB, 0x1B3, 0x1E7, 0x035, 0x039, 0x04F, 0x05F, 0x0A1, 0x0B3, 0x0B7, 0x0DB, 0x0E1,
+    0x0F5, 0x019, 0x025, 0x02D, 0x04B, 0x053, 0x05D, 0x06F, 0x073, 0x07F, 0x011, 0x015,
+    0x027, 0x02B, 0x035, 0x03B, 0x005, 0x011, 0x019, 0x007,
+};
+
+static const uint8_t huff_spec_b74_bits[] = {
+    3,  4,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,
+    7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,
+    8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,  9,  8,  8,  8,  8,
+    8,  9,  9,  9,  8,  8,  8,  8,  8,  8,  8,  8,  8,  7,  7,  7,
+    7,  7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  4,
+};
+
+static const uint16_t huff_spec_b74_codes[] = {
+    0x001, 0x008, 0x01E, 0x018, 0x00C, 0x002, 0x03A, 0x034, 0x02C, 0x01E, 0x016, 0x012,
+    0x072, 0x06E, 0x05E, 0x056, 0x050, 0x038, 0x022, 0x004, 0x0E2, 0x0DA, 0x0BA, 0x0A8,
+    0x076, 0x054, 0x050, 0x002, 0x000, 0x1C0, 0x1B0, 0x156, 0x0A4, 0x0A6, 0x074, 0x052,
+    0x004, 0x1C2, 0x1B2, 0x170, 0x154, 0x0AE, 0x0AC, 0x086, 0x2E6, 0x2E4, 0x10A, 0x108,
+    0x106, 0x104, 0x102, 0x100, 0x03E, 0x03A, 0x03C, 0x038, 0x036, 0x034, 0x032, 0x030,
+    0x01E, 0x01A, 0x01C, 0x018, 0x000, 0x019, 0x01D, 0x01B, 0x01F, 0x031, 0x033, 0x035,
+    0x037, 0x039, 0x03D, 0x03B, 0x03F, 0x101, 0x103, 0x105, 0x107, 0x109, 0x10B, 0x2E5,
+    0x2E7, 0x087, 0x0AD, 0x0AF, 0x155, 0x171, 0x1B3, 0x1C3, 0x005, 0x053, 0x075, 0x0A7,
+    0x0A5, 0x157, 0x1B1, 0x1C1, 0x001, 0x003, 0x051, 0x055, 0x077, 0x0A9, 0x0BB, 0x0DB,
+    0x0E3, 0x005, 0x023, 0x039, 0x051, 0x057, 0x05F, 0x06F, 0x073, 0x013, 0x017, 0x01F,
+    0x02D, 0x035, 0x03B, 0x003, 0x00D, 0x019, 0x01F, 0x009,
+};
+
+static const HuffmanCodebook at9_huffman_coeffs[][8][4] = {
+    {
+        { { 0 } },
+        { { 0 } },
+        {
+            { huff_spec_a21_bits, huff_spec_a21_codes,  16,   2,   1,   2,   3, },
+            { huff_spec_a22_bits, huff_spec_a22_codes, 256,   4,   2,   2,   8, },
+            { huff_spec_a23_bits, huff_spec_a23_codes, 256,   4,   2,   2,   9, },
+            { huff_spec_a24_bits, huff_spec_a24_codes, 256,   4,   2,   2,  10, },
+        },
+        {
+            { huff_spec_a31_bits, huff_spec_a31_codes,  64,   2,   1,   3,   7, },
+            { huff_spec_a32_bits, huff_spec_a32_codes,  64,   2,   1,   3,   7, },
+            { huff_spec_a33_bits, huff_spec_a33_codes,  64,   2,   1,   3,   8, },
+            { huff_spec_a34_bits, huff_spec_a34_codes,  64,   2,   1,   3,  10, },
+        },
+        {
+            { huff_spec_a41_bits, huff_spec_a41_codes, 256,   2,   1,   4,   9, },
+            { huff_spec_a42_bits, huff_spec_a42_codes, 256,   2,   1,   4,  10, },
+            { huff_spec_a43_bits, huff_spec_a43_codes, 256,   2,   1,   4,  10, },
+            { huff_spec_a44_bits, huff_spec_a44_codes, 256,   2,   1,   4,  10, },
+        },
+        {
+            { huff_spec_a51_bits, huff_spec_a51_codes,  32,   1,   0,   5,   6, },
+            { huff_spec_a52_bits, huff_spec_a52_codes,  32,   1,   0,   5,   6, },
+            { huff_spec_a53_bits, huff_spec_a53_codes,  32,   1,   0,   5,   7, },
+            { huff_spec_a54_bits, huff_spec_a54_codes,  32,   1,   0,   5,   8, },
+        },
+        {
+            { huff_spec_a61_bits, huff_spec_a61_codes,  64,   1,   0,   6,   7, },
+            { huff_spec_a62_bits, huff_spec_a62_codes,  64,   1,   0,   6,   7, },
+            { huff_spec_a63_bits, huff_spec_a63_codes,  64,   1,   0,   6,   8, },
+            { huff_spec_a64_bits, huff_spec_a64_codes,  64,   1,   0,   6,   9, },
+        },
+        {
+            { huff_spec_a71_bits, huff_spec_a71_codes, 128,   1,   0,   7,   8, },
+            { huff_spec_a72_bits, huff_spec_a72_codes, 128,   1,   0,   7,   8, },
+            { huff_spec_a73_bits, huff_spec_a73_codes, 128,   1,   0,   7,   9, },
+            { huff_spec_a74_bits, huff_spec_a74_codes, 128,   1,   0,   7,  10, },
+        },
+    },
+    {
+        { { 0 } },
+        { { 0 } },
+        {
+            { 0 },
+            { huff_spec_b22_bits, huff_spec_b22_codes,  256,  4,   2,   2,  10, },
+            { huff_spec_b23_bits, huff_spec_b23_codes,  256,  4,   2,   2,  10, },
+            { huff_spec_b24_bits, huff_spec_b24_codes,  256,  4,   2,   2,  10, },
+        },
+        {
+            { 0 },
+            { huff_spec_b32_bits, huff_spec_b32_codes,  64,   2,   1,   3,   9, },
+            { huff_spec_b33_bits, huff_spec_b33_codes,  64,   2,   1,   3,  10, },
+            { huff_spec_b34_bits, huff_spec_b34_codes,  64,   2,   1,   3,  10, },
+        },
+        {
+            { 0 },
+            { huff_spec_b42_bits, huff_spec_b42_codes, 256,   2,   1,   4,  10, },
+            { huff_spec_b43_bits, huff_spec_b43_codes, 256,   2,   1,   4,  10, },
+            { huff_spec_b44_bits, huff_spec_b44_codes, 256,   2,   1,   4,  10, },
+        },
+        {
+            { 0 },
+            { huff_spec_b52_bits, huff_spec_b52_codes,  32,   1,   0,   5,   7, },
+            { huff_spec_b53_bits, huff_spec_b53_codes,  32,   1,   0,   5,   8, },
+            { huff_spec_b54_bits, huff_spec_b54_codes,  32,   1,   0,   5,   9, },
+        },
+        {
+            { 0 },
+            { huff_spec_b62_bits, huff_spec_b62_codes,  64,   1,   0,   6,   8, },
+            { huff_spec_b63_bits, huff_spec_b63_codes,  64,   1,   0,   6,   9, },
+            { huff_spec_b64_bits, huff_spec_b64_codes,  64,   1,   0,   6,  10, },
+        },
+        {
+            { 0 },
+            { huff_spec_b72_bits, huff_spec_b72_codes, 128,   1,   0,   7,   9, },
+            { huff_spec_b73_bits, huff_spec_b73_codes, 128,   1,   0,   7,  10, },
+            { huff_spec_b74_bits, huff_spec_b74_codes, 128,   1,   0,   7,  10, },
+        },
+    },
+};
+
+#endif /* AVCODEC_ATRAC9TAB_H */

diff --git a/libavcodec/audioconvert.c b/libavcodec/audioconvert.c
deleted file mode 100644
index 5e46fae..0000000
--- a/libavcodec/audioconvert.c
+++ /dev/null

@@ -1,120 +0,0 @@
-/*
- * audio conversion
- * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * audio conversion
- * @author Michael Niedermayer <michaelni@gmx.at>
- */
-
-#include "libavutil/avstring.h"
-#include "libavutil/common.h"
-#include "libavutil/libm.h"
-#include "libavutil/samplefmt.h"
-#include "avcodec.h"
-#include "audioconvert.h"
-
-#if FF_API_AUDIO_CONVERT
-
-struct AVAudioConvert {
-    int in_channels, out_channels;
-    int fmt_pair;
-};
-
-AVAudioConvert *av_audio_convert_alloc(enum AVSampleFormat out_fmt, int out_channels,
-                                       enum AVSampleFormat in_fmt, int in_channels,
-                                       const float *matrix, int flags)
-{
-    AVAudioConvert *ctx;
-    if (in_channels!=out_channels)
-        return NULL;  /* FIXME: not supported */
-    ctx = av_malloc(sizeof(AVAudioConvert));
-    if (!ctx)
-        return NULL;
-    ctx->in_channels = in_channels;
-    ctx->out_channels = out_channels;
-    ctx->fmt_pair = out_fmt + AV_SAMPLE_FMT_NB*in_fmt;
-    return ctx;
-}
-
-void av_audio_convert_free(AVAudioConvert *ctx)
-{
-    av_free(ctx);
-}
-
-int av_audio_convert(AVAudioConvert *ctx,
-                           void * const out[6], const int out_stride[6],
-                     const void * const  in[6], const int  in_stride[6], int len)
-{
-    int ch;
-
-    //FIXME optimize common cases
-
-    for(ch=0; ch<ctx->out_channels; ch++){
-        const int is=  in_stride[ch];
-        const int os= out_stride[ch];
-        const uint8_t *pi=  in[ch];
-        uint8_t *po= out[ch];
-        uint8_t *end= po + os*len;
-        if(!out[ch])
-            continue;
-
-#define CONV(ofmt, otype, ifmt, expr)\
-if(ctx->fmt_pair == ofmt + AV_SAMPLE_FMT_NB*ifmt){\
-    do{\
-        *(otype*)po = expr; pi += is; po += os;\
-    }while(po < end);\
-}
-
-//FIXME put things below under ifdefs so we do not waste space for cases no codec will need
-//FIXME rounding ?
-
-             CONV(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 ,  *(const uint8_t*)pi)
-        else CONV(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)<<8)
-        else CONV(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)<<24)
-        else CONV(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0 / (1<<7)))
-        else CONV(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0 / (1<<7)))
-        else CONV(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, (*(const int16_t*)pi>>8) + 0x80)
-        else CONV(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16,  *(const int16_t*)pi)
-        else CONV(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16,  *(const int16_t*)pi<<16)
-        else CONV(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S16,  *(const int16_t*)pi*(1.0 / (1<<15)))
-        else CONV(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16,  *(const int16_t*)pi*(1.0 / (1<<15)))
-        else CONV(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, (*(const int32_t*)pi>>24) + 0x80)
-        else CONV(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32,  *(const int32_t*)pi>>16)
-        else CONV(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32,  *(const int32_t*)pi)
-        else CONV(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S32,  *(const int32_t*)pi*(1.0 / (1U<<31)))
-        else CONV(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32,  *(const int32_t*)pi*(1.0 / (1U<<31)))
-        else CONV(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8(  lrintf(*(const float*)pi * (1<<7)) + 0x80))
-        else CONV(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16(  lrintf(*(const float*)pi * (1<<15))))
-        else CONV(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(const float*)pi * (1U<<31))))
-        else CONV(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_FLT, *(const float*)pi)
-        else CONV(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, *(const float*)pi)
-        else CONV(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8(  lrint(*(const double*)pi * (1<<7)) + 0x80))
-        else CONV(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16(  lrint(*(const double*)pi * (1<<15))))
-        else CONV(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(const double*)pi * (1U<<31))))
-        else CONV(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_DBL, *(const double*)pi)
-        else CONV(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, *(const double*)pi)
-        else return -1;
-    }
-    return 0;
-}
-
-#endif /* FF_API_AUDIO_CONVERT */

diff --git a/libavcodec/audioconvert.h b/libavcodec/audioconvert.h
deleted file mode 100644
index 996c3f3..0000000
--- a/libavcodec/audioconvert.h
+++ /dev/null

@@ -1,86 +0,0 @@
-/*
- * audio conversion
- * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
- * Copyright (c) 2008 Peter Ross
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_AUDIOCONVERT_H
-#define AVCODEC_AUDIOCONVERT_H
-
-#include "version.h"
-
-/**
- * @file
- * Audio format conversion routines
- * This interface is deprecated and will be dropped in a future
- * version. You should use the libswresample library instead.
- */
-
-#if FF_API_AUDIO_CONVERT
-
-#include "libavutil/cpu.h"
-#include "avcodec.h"
-#include "libavutil/channel_layout.h"
-
-struct AVAudioConvert;
-typedef struct AVAudioConvert AVAudioConvert;
-
-/**
- * Create an audio sample format converter context
- * @param out_fmt Output sample format
- * @param out_channels Number of output channels
- * @param in_fmt Input sample format
- * @param in_channels Number of input channels
- * @param[in] matrix Channel mixing matrix (of dimension in_channel*out_channels). Set to NULL to ignore.
- * @param flags See AV_CPU_FLAG_xx
- * @return NULL on error
- * @deprecated See libswresample
- */
-
-attribute_deprecated
-AVAudioConvert *av_audio_convert_alloc(enum AVSampleFormat out_fmt, int out_channels,
-                                       enum AVSampleFormat in_fmt, int in_channels,
-                                       const float *matrix, int flags);
-
-/**
- * Free audio sample format converter context
- * @deprecated See libswresample
- */
-
-attribute_deprecated
-void av_audio_convert_free(AVAudioConvert *ctx);
-
-/**
- * Convert between audio sample formats
- * @param[in] out array of output buffers for each channel. set to NULL to ignore processing of the given channel.
- * @param[in] out_stride distance between consecutive output samples (measured in bytes)
- * @param[in] in array of input buffers for each channel
- * @param[in] in_stride distance between consecutive input samples (measured in bytes)
- * @param len length of audio frame size (measured in samples)
- * @deprecated See libswresample
- */
-
-attribute_deprecated
-int av_audio_convert(AVAudioConvert *ctx,
-                           void * const out[6], const int out_stride[6],
-                     const void * const  in[6], const int  in_stride[6], int len);
-
-#endif /* FF_API_AUDIO_CONVERT */
-
-#endif /* AVCODEC_AUDIOCONVERT_H */

diff --git a/libavcodec/audiotoolboxdec.c b/libavcodec/audiotoolboxdec.c
index 607d3ba..5c0a9de 100644
--- a/libavcodec/audiotoolboxdec.c
+++ b/libavcodec/audiotoolboxdec.c

@@ -24,7 +24,7 @@
 
 #include "config.h"
 #include "avcodec.h"
-#include "ac3_parser.h"
+#include "ac3_parser_internal.h"
 #include "bytestream.h"
 #include "internal.h"
 #include "mpegaudiodecheader.h"
@@ -350,10 +350,10 @@
     } else if (pkt && pkt->size >= 7 &&
                (avctx->codec_id == AV_CODEC_ID_AC3 ||
                 avctx->codec_id == AV_CODEC_ID_EAC3)) {
-        AC3HeaderInfo hdr, *phdr = &hdr;
+        AC3HeaderInfo hdr;
         GetBitContext gbc;
         init_get_bits(&gbc, pkt->data, pkt->size);
-        if (avpriv_ac3_parse_header(&gbc, &phdr) < 0)
+        if (ff_ac3_parse_header(&gbc, &hdr) < 0)
             return AVERROR_INVALIDDATA;
         in_format.mSampleRate = hdr.sample_rate;
         in_format.mChannelsPerFrame = hdr.channels;
@@ -597,6 +597,7 @@
         .bsfs           = bsf_name, \
         .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY, \
         .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP, \
+        .wrapper_name   = "at", \
     };
 
 FFAT_DEC(aac,          AV_CODEC_ID_AAC, "aac_adtstoasc")

diff --git a/libavcodec/audiotoolboxenc.c b/libavcodec/audiotoolboxenc.c
index c47fbd1..2c18916 100644
--- a/libavcodec/audiotoolboxenc.c
+++ b/libavcodec/audiotoolboxenc.c

@@ -48,6 +48,8 @@
     AudioFrameQueue afq;
     int eof;
     int frame_size;
+
+    AVFrame* encoding_frame;
 } ATDecodeContext;
 
 static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
@@ -442,6 +444,10 @@
 
     ff_af_queue_init(avctx, &at->afq);
 
+    at->encoding_frame = av_frame_alloc();
+    if (!at->encoding_frame)
+        return AVERROR(ENOMEM);
+
     return 0;
 }
 
@@ -453,6 +459,7 @@
     AVCodecContext *avctx = inctx;
     ATDecodeContext *at = avctx->priv_data;
     AVFrame *frame;
+    int ret;
 
     if (!at->frame_queue.available) {
         if (at->eof) {
@@ -475,6 +482,13 @@
     if (*nb_packets > frame->nb_samples)
         *nb_packets = frame->nb_samples;
 
+    av_frame_unref(at->encoding_frame);
+    ret = av_frame_ref(at->encoding_frame, frame);
+    if (ret < 0) {
+        *nb_packets = 0;
+        return ret;
+    }
+
     ff_bufqueue_add(avctx, &at->used_frame_queue, frame);
 
     return 0;
@@ -565,6 +579,7 @@
     ff_bufqueue_discard_all(&at->frame_queue);
     ff_bufqueue_discard_all(&at->used_frame_queue);
     ff_af_queue_close(&at->afq);
+    av_frame_free(&at->encoding_frame);
     return 0;
 }
 
@@ -619,6 +634,7 @@
         }, \
         .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE, \
         .profiles       = PROFILES, \
+        .wrapper_name   = "at", \
     };
 
 static const uint64_t aac_at_channel_layouts[] = {

diff --git a/libavcodec/av1.h b/libavcodec/av1.h
new file mode 100644
index 0000000..f2ec39c
--- /dev/null
+++ b/libavcodec/av1.h

@@ -0,0 +1,130 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AV1 common definitions
+ */
+
+#ifndef AVCODEC_AV1_H
+#define AVCODEC_AV1_H
+
+// OBU types (section 6.2.2).
+typedef enum {
+    // 0 reserved.
+    AV1_OBU_SEQUENCE_HEADER        = 1,
+    AV1_OBU_TEMPORAL_DELIMITER     = 2,
+    AV1_OBU_FRAME_HEADER           = 3,
+    AV1_OBU_TILE_GROUP             = 4,
+    AV1_OBU_METADATA               = 5,
+    AV1_OBU_FRAME                  = 6,
+    AV1_OBU_REDUNDANT_FRAME_HEADER = 7,
+    AV1_OBU_TILE_LIST              = 8,
+    // 9-14 reserved.
+    AV1_OBU_PADDING                = 15,
+} AV1_OBU_Type;
+
+// Metadata types (section 6.7.1).
+enum {
+    AV1_METADATA_TYPE_HDR_CLL     = 1,
+    AV1_METADATA_TYPE_HDR_MDCV    = 2,
+    AV1_METADATA_TYPE_SCALABILITY = 3,
+    AV1_METADATA_TYPE_ITUT_T35    = 4,
+    AV1_METADATA_TYPE_TIMECODE    = 5,
+};
+
+// Frame types (section 6.8.2).
+enum {
+    AV1_FRAME_KEY        = 0,
+    AV1_FRAME_INTER      = 1,
+    AV1_FRAME_INTRA_ONLY = 2,
+    AV1_FRAME_SWITCH     = 3,
+};
+
+// Reference frames (section 6.10.24).
+enum {
+    AV1_REF_FRAME_INTRA   = 0,
+    AV1_REF_FRAME_LAST    = 1,
+    AV1_REF_FRAME_LAST2   = 2,
+    AV1_REF_FRAME_LAST3   = 3,
+    AV1_REF_FRAME_GOLDEN  = 4,
+    AV1_REF_FRAME_BWDREF  = 5,
+    AV1_REF_FRAME_ALTREF2 = 6,
+    AV1_REF_FRAME_ALTREF  = 7,
+};
+
+// Constants (section 3).
+enum {
+    AV1_MAX_OPERATING_POINTS = 32,
+
+    AV1_MAX_SB_SIZE    = 128,
+    AV1_MI_SIZE        = 4,
+
+    AV1_MAX_TILE_WIDTH = 4096,
+    AV1_MAX_TILE_AREA  = 4096 * 2304,
+    AV1_MAX_TILE_ROWS  = 64,
+    AV1_MAX_TILE_COLS  = 64,
+
+    AV1_NUM_REF_FRAMES       = 8,
+    AV1_REFS_PER_FRAME       = 7,
+    AV1_TOTAL_REFS_PER_FRAME = 8,
+    AV1_PRIMARY_REF_NONE     = 7,
+
+    AV1_MAX_SEGMENTS = 8,
+    AV1_SEG_LVL_MAX  = 8,
+
+    AV1_SEG_LVL_ALT_Q      = 0,
+    AV1_SEG_LVL_ALT_LF_Y_V = 1,
+    AV1_SEG_LVL_REF_FRAME  = 5,
+    AV1_SEG_LVL_SKIP       = 6,
+    AV1_SEG_LVL_GLOBAL_MV  = 7,
+
+    AV1_SELECT_SCREEN_CONTENT_TOOLS = 2,
+    AV1_SELECT_INTEGER_MV           = 2,
+
+    AV1_SUPERRES_NUM       = 8,
+    AV1_SUPERRES_DENOM_MIN = 9,
+
+    AV1_INTERPOLATION_FILTER_SWITCHABLE = 4,
+
+    AV1_GM_ABS_ALPHA_BITS       = 12,
+    AV1_GM_ALPHA_PREC_BITS      = 15,
+    AV1_GM_ABS_TRANS_ONLY_BITS  = 9,
+    AV1_GM_TRANS_ONLY_PREC_BITS = 3,
+    AV1_GM_ABS_TRANS_BITS       = 12,
+    AV1_GM_TRANS_PREC_BITS      = 6,
+    AV1_WARPEDMODEL_PREC_BITS   = 16,
+
+    AV1_WARP_MODEL_IDENTITY    = 0,
+    AV1_WARP_MODEL_TRANSLATION = 1,
+    AV1_WARP_MODEL_ROTZOOM     = 2,
+    AV1_WARP_MODEL_AFFINE      = 3,
+};
+
+
+// The main colour configuration information uses the same ISO/IEC 23001-8
+// (H.273) enums as FFmpeg does, so separate definitions are not required.
+
+// Chroma sample position.
+enum {
+    AV1_CSP_UNKNOWN   = 0,
+    AV1_CSP_VERTICAL  = 1, // -> AVCHROMA_LOC_LEFT.
+    AV1_CSP_COLOCATED = 2, // -> AVCHROMA_LOC_TOPLEFT.
+};
+
+#endif /* AVCODEC_AV1_H */

diff --git a/libavcodec/av1_metadata_bsf.c b/libavcodec/av1_metadata_bsf.c
new file mode 100644
index 0000000..52d3836
--- /dev/null
+++ b/libavcodec/av1_metadata_bsf.c

@@ -0,0 +1,298 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "bsf.h"
+#include "cbs.h"
+#include "cbs_av1.h"
+
+enum {
+    PASS,
+    INSERT,
+    REMOVE,
+};
+
+typedef struct AV1MetadataContext {
+    const AVClass *class;
+
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment access_unit;
+
+    int td;
+
+    int color_primaries;
+    int transfer_characteristics;
+    int matrix_coefficients;
+
+    int color_range;
+    int chroma_sample_position;
+
+    AVRational tick_rate;
+    int num_ticks_per_picture;
+} AV1MetadataContext;
+
+
+static int av1_metadata_update_sequence_header(AVBSFContext *bsf,
+                                               AV1RawSequenceHeader *seq)
+{
+    AV1MetadataContext *ctx = bsf->priv_data;
+    AV1RawColorConfig  *clc = &seq->color_config;
+    AV1RawTimingInfo   *tim = &seq->timing_info;
+
+    if (ctx->color_primaries >= 0          ||
+        ctx->transfer_characteristics >= 0 ||
+        ctx->matrix_coefficients >= 0) {
+        if (!clc->color_description_present_flag) {
+            clc->color_description_present_flag = 1;
+            clc->color_primaries          = AVCOL_PRI_UNSPECIFIED;
+            clc->transfer_characteristics = AVCOL_TRC_UNSPECIFIED;
+            clc->matrix_coefficients      = AVCOL_SPC_UNSPECIFIED;
+        }
+
+        if (ctx->color_primaries >= 0)
+            clc->color_primaries = ctx->color_primaries;
+        if (ctx->transfer_characteristics >= 0)
+            clc->transfer_characteristics = ctx->transfer_characteristics;
+        if (ctx->matrix_coefficients >= 0)
+            clc->matrix_coefficients = ctx->matrix_coefficients;
+    }
+
+    if (ctx->color_range >= 0) {
+        if (clc->color_primaries          == AVCOL_PRI_BT709        &&
+            clc->transfer_characteristics == AVCOL_TRC_IEC61966_2_1 &&
+            clc->matrix_coefficients      == AVCOL_SPC_RGB) {
+            av_log(bsf, AV_LOG_WARNING, "Warning: color_range cannot be set "
+                   "on RGB streams encoded in BT.709 sRGB.\n");
+        } else {
+            clc->color_range = ctx->color_range;
+        }
+    }
+
+    if (ctx->chroma_sample_position >= 0) {
+        if (clc->mono_chrome || !clc->subsampling_x || !clc->subsampling_y) {
+            av_log(bsf, AV_LOG_WARNING, "Warning: chroma_sample_position "
+                   "can only be set for 4:2:0 streams.\n");
+        } else {
+            clc->chroma_sample_position = ctx->chroma_sample_position;
+        }
+    }
+
+    if (ctx->tick_rate.num && ctx->tick_rate.den) {
+        int num, den;
+
+        av_reduce(&num, &den, ctx->tick_rate.num, ctx->tick_rate.den,
+                  UINT32_MAX > INT_MAX ? UINT32_MAX : INT_MAX);
+
+        tim->time_scale                = num;
+        tim->num_units_in_display_tick = den;
+        seq->timing_info_present_flag  = 1;
+
+        if (ctx->num_ticks_per_picture > 0) {
+            tim->equal_picture_interval = 1;
+            tim->num_ticks_per_picture_minus_1 =
+                ctx->num_ticks_per_picture - 1;
+        }
+    }
+
+    return 0;
+}
+
+static int av1_metadata_filter(AVBSFContext *bsf, AVPacket *out)
+{
+    AV1MetadataContext *ctx = bsf->priv_data;
+    AVPacket *in = NULL;
+    CodedBitstreamFragment *frag = &ctx->access_unit;
+    AV1RawOBU td, *obu;
+    int err, i;
+
+    err = ff_bsf_get_packet(bsf, &in);
+    if (err < 0)
+        return err;
+
+    err = ff_cbs_read_packet(ctx->cbc, frag, in);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to read packet.\n");
+        goto fail;
+    }
+
+    for (i = 0; i < frag->nb_units; i++) {
+        if (frag->units[i].type == AV1_OBU_SEQUENCE_HEADER) {
+            obu = frag->units[i].content;
+            err = av1_metadata_update_sequence_header(bsf, &obu->obu.sequence_header);
+            if (err < 0)
+                goto fail;
+        }
+    }
+
+    // If a Temporal Delimiter is present, it must be the first OBU.
+    if (frag->units[0].type == AV1_OBU_TEMPORAL_DELIMITER) {
+        if (ctx->td == REMOVE)
+            ff_cbs_delete_unit(ctx->cbc, frag, 0);
+    } else if (ctx->td == INSERT) {
+        td = (AV1RawOBU) {
+            .header.obu_type = AV1_OBU_TEMPORAL_DELIMITER,
+        };
+
+        err = ff_cbs_insert_unit_content(ctx->cbc, frag, 0, AV1_OBU_TEMPORAL_DELIMITER,
+                                         &td, NULL);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to insert Temporal Delimiter.\n");
+            goto fail;
+        }
+    }
+
+    err = ff_cbs_write_packet(ctx->cbc, out, frag);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
+        goto fail;
+    }
+
+    err = av_packet_copy_props(out, in);
+    if (err < 0)
+        goto fail;
+
+    err = 0;
+fail:
+    ff_cbs_fragment_uninit(ctx->cbc, frag);
+
+    if (err < 0)
+        av_packet_unref(out);
+    av_packet_free(&in);
+
+    return err;
+}
+
+static int av1_metadata_init(AVBSFContext *bsf)
+{
+    AV1MetadataContext *ctx = bsf->priv_data;
+    CodedBitstreamFragment *frag = &ctx->access_unit;
+    AV1RawOBU *obu;
+    int err, i;
+
+    err = ff_cbs_init(&ctx->cbc, AV_CODEC_ID_AV1, bsf);
+    if (err < 0)
+        return err;
+
+    if (bsf->par_in->extradata) {
+        err = ff_cbs_read_extradata(ctx->cbc, frag, bsf->par_in);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to read extradata.\n");
+            goto fail;
+        }
+
+        for (i = 0; i < frag->nb_units; i++) {
+            if (frag->units[i].type == AV1_OBU_SEQUENCE_HEADER) {
+                obu = frag->units[i].content;
+                err = av1_metadata_update_sequence_header(bsf, &obu->obu.sequence_header);
+                if (err < 0)
+                    goto fail;
+            }
+        }
+
+        err = ff_cbs_write_extradata(ctx->cbc, bsf->par_out, frag);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to write extradata.\n");
+            goto fail;
+        }
+    }
+
+    err = 0;
+fail:
+    ff_cbs_fragment_uninit(ctx->cbc, frag);
+    return err;
+}
+
+static void av1_metadata_close(AVBSFContext *bsf)
+{
+    AV1MetadataContext *ctx = bsf->priv_data;
+    ff_cbs_close(&ctx->cbc);
+}
+
+#define OFFSET(x) offsetof(AV1MetadataContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_BSF_PARAM)
+static const AVOption av1_metadata_options[] = {
+    { "td", "Temporal Delimiter OBU",
+        OFFSET(td), AV_OPT_TYPE_INT,
+        { .i64 = PASS }, PASS, REMOVE, FLAGS, "td" },
+    { "pass",   NULL, 0, AV_OPT_TYPE_CONST,
+        { .i64 = PASS   }, .flags = FLAGS, .unit = "td" },
+    { "insert", NULL, 0, AV_OPT_TYPE_CONST,
+        { .i64 = INSERT }, .flags = FLAGS, .unit = "td" },
+    { "remove", NULL, 0, AV_OPT_TYPE_CONST,
+        { .i64 = REMOVE }, .flags = FLAGS, .unit = "td" },
+
+    { "color_primaries", "Set color primaries (section 6.4.2)",
+        OFFSET(color_primaries), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 255, FLAGS },
+    { "transfer_characteristics", "Set transfer characteristics (section 6.4.2)",
+        OFFSET(transfer_characteristics), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 255, FLAGS },
+    { "matrix_coefficients", "Set matrix coefficients (section 6.4.2)",
+        OFFSET(matrix_coefficients), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 255, FLAGS },
+
+    { "color_range", "Set color range flag (section 6.4.2)",
+        OFFSET(color_range), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 1, FLAGS, "cr" },
+    { "tv", "TV (limited) range", 0, AV_OPT_TYPE_CONST,
+        { .i64 = 0 }, .flags = FLAGS, .unit = "cr" },
+    { "pc", "PC (full) range",    0, AV_OPT_TYPE_CONST,
+        { .i64 = 1 }, .flags = FLAGS, .unit = "cr" },
+
+    { "chroma_sample_position", "Set chroma sample position (section 6.4.2)",
+        OFFSET(chroma_sample_position), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 3, FLAGS, "csp" },
+    { "unknown",   "Unknown chroma sample position",  0, AV_OPT_TYPE_CONST,
+        { .i64 = AV1_CSP_UNKNOWN },   .flags = FLAGS, .unit = "csp" },
+    { "vertical",  "Left chroma sample position",     0, AV_OPT_TYPE_CONST,
+        { .i64 = AV1_CSP_VERTICAL },  .flags = FLAGS, .unit = "csp" },
+    { "colocated", "Top-left chroma sample position", 0, AV_OPT_TYPE_CONST,
+        { .i64 = AV1_CSP_COLOCATED }, .flags = FLAGS, .unit = "csp" },
+
+    { "tick_rate", "Set display tick rate (num_units_in_display_tick / time_scale)",
+        OFFSET(tick_rate), AV_OPT_TYPE_RATIONAL,
+        { .dbl = 0.0 }, 0, UINT_MAX, FLAGS },
+    { "num_ticks_per_picture", "Set display ticks per picture for CFR streams",
+        OFFSET(num_ticks_per_picture), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, INT_MAX, FLAGS },
+
+    { NULL }
+};
+
+static const AVClass av1_metadata_class = {
+    .class_name = "av1_metadata_bsf",
+    .item_name  = av_default_item_name,
+    .option     = av1_metadata_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const enum AVCodecID av1_metadata_codec_ids[] = {
+    AV_CODEC_ID_AV1, AV_CODEC_ID_NONE,
+};
+
+const AVBitStreamFilter ff_av1_metadata_bsf = {
+    .name           = "av1_metadata",
+    .priv_data_size = sizeof(AV1MetadataContext),
+    .priv_class     = &av1_metadata_class,
+    .init           = &av1_metadata_init,
+    .close          = &av1_metadata_close,
+    .filter         = &av1_metadata_filter,
+    .codec_ids      = av1_metadata_codec_ids,
+};

diff --git a/libavcodec/av1_parse.c b/libavcodec/av1_parse.c
new file mode 100644
index 0000000..cdd524b
--- /dev/null
+++ b/libavcodec/av1_parse.c

@@ -0,0 +1,107 @@
+/*
+ * AV1 common parsing code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/mem.h"
+
+#include "av1.h"
+#include "av1_parse.h"
+#include "bytestream.h"
+
+int ff_av1_extract_obu(AV1OBU *obu, const uint8_t *buf, int length, void *logctx)
+{
+    int64_t obu_size;
+    int start_pos, type, temporal_id, spatial_id;
+    int len;
+
+    len = parse_obu_header(buf, length, &obu_size, &start_pos,
+                           &type, &temporal_id, &spatial_id);
+    if (len < 0)
+        return len;
+
+    obu->type        = type;
+    obu->temporal_id = temporal_id;
+    obu->spatial_id  = spatial_id;
+
+    obu->data     = buf + start_pos;
+    obu->size     = obu_size;
+    obu->raw_data = buf;
+    obu->raw_size = len;
+
+    av_log(logctx, AV_LOG_DEBUG,
+           "obu_type: %d, temporal_id: %d, spatial_id: %d, payload size: %d\n",
+           obu->type, obu->temporal_id, obu->spatial_id, obu->size);
+
+    return len;
+}
+
+int ff_av1_packet_split(AV1Packet *pkt, const uint8_t *buf, int length, void *logctx)
+{
+    GetByteContext bc;
+    int ret, consumed;
+
+    bytestream2_init(&bc, buf, length);
+    pkt->nb_obus = 0;
+
+    while (bytestream2_get_bytes_left(&bc) > 0) {
+        AV1OBU *obu;
+
+        if (pkt->obus_allocated < pkt->nb_obus + 1) {
+            int new_size = pkt->obus_allocated + 1;
+            AV1OBU *tmp = av_realloc_array(pkt->obus, new_size, sizeof(*tmp));
+            if (!tmp)
+                return AVERROR(ENOMEM);
+
+            pkt->obus = tmp;
+            memset(pkt->obus + pkt->obus_allocated, 0,
+                   (new_size - pkt->obus_allocated) * sizeof(*tmp));
+            pkt->obus_allocated = new_size;
+        }
+        obu = &pkt->obus[pkt->nb_obus];
+
+        consumed = ff_av1_extract_obu(obu, bc.buffer, bytestream2_get_bytes_left(&bc), logctx);
+        if (consumed < 0)
+            return consumed;
+
+        bytestream2_skip(&bc, consumed);
+
+        obu->size_bits = get_obu_bit_length(obu->data, obu->size, obu->type);
+
+        if (obu->size_bits < 0 || (!obu->size_bits && obu->type != AV1_OBU_TEMPORAL_DELIMITER)) {
+            av_log(logctx, AV_LOG_ERROR, "Invalid OBU of type %d, skipping.\n", obu->type);
+            continue;
+        }
+
+        pkt->nb_obus++;
+
+        ret = init_get_bits(&obu->gb, obu->data, obu->size_bits);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+void ff_av1_packet_uninit(AV1Packet *pkt)
+{
+    av_freep(&pkt->obus);
+    pkt->obus_allocated = 0;
+}

diff --git a/libavcodec/av1_parse.h b/libavcodec/av1_parse.h
new file mode 100644
index 0000000..864308f
--- /dev/null
+++ b/libavcodec/av1_parse.h

@@ -0,0 +1,174 @@
+/*
+ * AV1 common parsing code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AV1_PARSE_H
+#define AVCODEC_AV1_PARSE_H
+
+#include <stdint.h>
+
+#include "av1.h"
+#include "avcodec.h"
+#include "get_bits.h"
+
+typedef struct AV1OBU {
+    /** Size of payload */
+    int size;
+    const uint8_t *data;
+
+    /**
+     * Size, in bits, of just the data, excluding the trailing_one_bit and
+     * any trailing padding.
+     */
+    int size_bits;
+
+    /** Size of entire OBU, including header */
+    int raw_size;
+    const uint8_t *raw_data;
+
+    /** GetBitContext initialized to the start of the payload */
+    GetBitContext gb;
+
+    int type;
+
+    int temporal_id;
+    int spatial_id;
+} AV1OBU;
+
+/** An input packet split into OBUs */
+typedef struct AV1Packet {
+    AV1OBU *obus;
+    int nb_obus;
+    int obus_allocated;
+} AV1Packet;
+
+/**
+ * Extract an OBU from a raw bitstream.
+ *
+ * @note This function does not copy or store any bitstream data. All
+ *       the pointers in the AV1OBU structure will be valid as long
+ *       as the input buffer also is.
+ */
+int ff_av1_extract_obu(AV1OBU *obu, const uint8_t *buf, int length,
+                       void *logctx);
+
+/**
+ * Split an input packet into OBUs.
+ *
+ * @note This function does not copy or store any bitstream data. All
+ *       the pointers in the AV1Packet structure will be valid as
+ *       long as the input buffer also is.
+ */
+int ff_av1_packet_split(AV1Packet *pkt, const uint8_t *buf, int length,
+                        void *logctx);
+
+/**
+ * Free all the allocated memory in the packet.
+ */
+void ff_av1_packet_uninit(AV1Packet *pkt);
+
+static inline int64_t leb128(GetBitContext *gb) {
+    int64_t ret = 0;
+    int i;
+
+    for (i = 0; i < 8; i++) {
+        int byte = get_bits(gb, 8);
+        ret |= (int64_t)(byte & 0x7f) << (i * 7);
+        if (!(byte & 0x80))
+            break;
+    }
+    return ret;
+}
+
+static inline int parse_obu_header(const uint8_t *buf, int buf_size,
+                                   int64_t *obu_size, int *start_pos, int *type,
+                                   int *temporal_id, int *spatial_id)
+{
+    GetBitContext gb;
+    int ret, extension_flag, has_size_flag;
+    int64_t size;
+
+    ret = init_get_bits8(&gb, buf, FFMIN(buf_size, 2 + 8)); // OBU header fields + max leb128 length
+    if (ret < 0)
+        return ret;
+
+    if (get_bits1(&gb) != 0) // obu_forbidden_bit
+        return AVERROR_INVALIDDATA;
+
+    *type      = get_bits(&gb, 4);
+    extension_flag = get_bits1(&gb);
+    has_size_flag  = get_bits1(&gb);
+    skip_bits1(&gb); // obu_reserved_1bit
+
+    if (extension_flag) {
+        *temporal_id = get_bits(&gb, 3);
+        *spatial_id  = get_bits(&gb, 2);
+        skip_bits(&gb, 3); // extension_header_reserved_3bits
+    } else {
+        *temporal_id = *spatial_id = 0;
+    }
+
+    *obu_size  = has_size_flag ? leb128(&gb)
+                               : buf_size - 1 - extension_flag;
+
+    if (get_bits_left(&gb) < 0)
+        return AVERROR_INVALIDDATA;
+
+    *start_pos = get_bits_count(&gb) / 8;
+
+    size = *obu_size + *start_pos;
+
+    if (size > buf_size)
+        return AVERROR_INVALIDDATA;
+
+    return size;
+}
+
+static inline int get_obu_bit_length(const uint8_t *buf, int size, int type)
+{
+    int v;
+
+    /* There are no trailing bits on these */
+    if (type == AV1_OBU_TILE_GROUP || type == AV1_OBU_FRAME) {
+        if (size > INT_MAX / 8)
+            return AVERROR(ERANGE);
+        else
+            return size * 8;
+    }
+
+    while (size > 0 && buf[size - 1] == 0)
+        size--;
+
+    if (!size)
+        return 0;
+
+    v = buf[size - 1];
+
+    if (size > INT_MAX / 8)
+        return AVERROR(ERANGE);
+    size *= 8;
+
+    /* Remove the trailing_one_bit and following trailing zeros */
+    if (v)
+        size -= ff_ctz(v) + 1;
+
+    return size;
+}
+
+#endif /* AVCODEC_AV1_PARSE_H */

diff --git a/libavcodec/av1_parser.c b/libavcodec/av1_parser.c
new file mode 100644
index 0000000..8df6649
--- /dev/null
+++ b/libavcodec/av1_parser.c

@@ -0,0 +1,228 @@
+/*
+ * AV1 parser
+ *
+ * Copyright (C) 2018 James Almer <jamrial@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "av1_parse.h"
+#include "cbs.h"
+#include "cbs_av1.h"
+#include "parser.h"
+
+typedef struct AV1ParseContext {
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment temporal_unit;
+    int parsed_extradata;
+} AV1ParseContext;
+
+static const enum AVPixelFormat pix_fmts_8bit[2][2] = {
+    { AV_PIX_FMT_YUV444P, AV_PIX_FMT_NONE },
+    { AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P },
+};
+static const enum AVPixelFormat pix_fmts_10bit[2][2] = {
+    { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_NONE },
+    { AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV420P10 },
+};
+static const enum AVPixelFormat pix_fmts_12bit[2][2] = {
+    { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_NONE },
+    { AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV420P12 },
+};
+
+static int av1_parser_parse(AVCodecParserContext *ctx,
+                            AVCodecContext *avctx,
+                            const uint8_t **out_data, int *out_size,
+                            const uint8_t *data, int size)
+{
+    AV1ParseContext *s = ctx->priv_data;
+    CodedBitstreamFragment *td = &s->temporal_unit;
+    CodedBitstreamAV1Context *av1 = s->cbc->priv_data;
+    int ret;
+
+    *out_data = data;
+    *out_size = size;
+
+    ctx->key_frame         = -1;
+    ctx->pict_type         = AV_PICTURE_TYPE_NONE;
+    ctx->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
+
+    s->cbc->log_ctx = avctx;
+
+    if (avctx->extradata_size && !s->parsed_extradata) {
+        s->parsed_extradata = 1;
+
+        ret = ff_cbs_read(s->cbc, td, avctx->extradata, avctx->extradata_size);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to parse extradata.\n");
+            goto end;
+        }
+
+        ff_cbs_fragment_uninit(s->cbc, td);
+    }
+
+    ret = ff_cbs_read(s->cbc, td, data, size);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to parse temporal unit.\n");
+        goto end;
+    }
+
+    if (!av1->sequence_header) {
+        av_log(avctx, AV_LOG_ERROR, "No sequence header available\n");
+        goto end;
+    }
+
+    for (int i = 0; i < td->nb_units; i++) {
+        CodedBitstreamUnit *unit = &td->units[i];
+        AV1RawOBU *obu = unit->content;
+        AV1RawSequenceHeader *seq = av1->sequence_header;
+        AV1RawColorConfig *color = &seq->color_config;
+        AV1RawFrameHeader *frame;
+        int frame_type;
+
+        if (unit->type == AV1_OBU_FRAME)
+            frame = &obu->obu.frame.header;
+        else if (unit->type == AV1_OBU_FRAME_HEADER)
+            frame = &obu->obu.frame_header;
+        else
+            continue;
+
+        if (frame->show_existing_frame) {
+            AV1ReferenceFrameState *ref = &av1->ref[frame->frame_to_show_map_idx];
+
+            if (!ref->valid) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid reference frame\n");
+                goto end;
+            }
+
+            ctx->width  = ref->frame_width;
+            ctx->height = ref->frame_height;
+            frame_type  = ref->frame_type;
+
+            ctx->key_frame = 0;
+        } else if (!frame->show_frame) {
+            continue;
+        } else {
+            ctx->width  = av1->frame_width;
+            ctx->height = av1->frame_height;
+            frame_type  = frame->frame_type;
+
+            ctx->key_frame = frame_type == AV1_FRAME_KEY;
+        }
+
+        avctx->profile = seq->seq_profile;
+        avctx->level   = seq->seq_level_idx[0];
+
+        switch (frame_type) {
+        case AV1_FRAME_KEY:
+        case AV1_FRAME_INTRA_ONLY:
+            ctx->pict_type = AV_PICTURE_TYPE_I;
+            break;
+        case AV1_FRAME_INTER:
+            ctx->pict_type = AV_PICTURE_TYPE_P;
+            break;
+        case AV1_FRAME_SWITCH:
+            ctx->pict_type = AV_PICTURE_TYPE_SP;
+            break;
+        }
+        ctx->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
+
+        switch (av1->bit_depth) {
+        case 8:
+            ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY8
+                                             : pix_fmts_8bit [color->subsampling_x][color->subsampling_y];
+            break;
+        case 10:
+            ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY10
+                                             : pix_fmts_10bit[color->subsampling_x][color->subsampling_y];
+            break;
+        case 12:
+            ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY12
+                                             : pix_fmts_12bit[color->subsampling_x][color->subsampling_y];
+            break;
+        }
+        av_assert2(ctx->format != AV_PIX_FMT_NONE);
+    }
+
+end:
+    ff_cbs_fragment_uninit(s->cbc, td);
+
+    s->cbc->log_ctx = NULL;
+
+    return size;
+}
+
+static const CodedBitstreamUnitType decompose_unit_types[] = {
+    AV1_OBU_TEMPORAL_DELIMITER,
+    AV1_OBU_SEQUENCE_HEADER,
+    AV1_OBU_FRAME_HEADER,
+    AV1_OBU_TILE_GROUP,
+    AV1_OBU_FRAME,
+};
+
+static av_cold int av1_parser_init(AVCodecParserContext *ctx)
+{
+    AV1ParseContext *s = ctx->priv_data;
+    int ret;
+
+    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, NULL);
+    if (ret < 0)
+        return ret;
+
+    s->cbc->decompose_unit_types    = (CodedBitstreamUnitType *)decompose_unit_types;
+    s->cbc->nb_decompose_unit_types = FF_ARRAY_ELEMS(decompose_unit_types);
+
+    return 0;
+}
+
+static void av1_parser_close(AVCodecParserContext *ctx)
+{
+    AV1ParseContext *s = ctx->priv_data;
+
+    ff_cbs_close(&s->cbc);
+}
+
+static int av1_parser_split(AVCodecContext *avctx,
+                            const uint8_t *buf, int buf_size)
+{
+    AV1OBU obu;
+    const uint8_t *ptr = buf, *end = buf + buf_size;
+
+    while (ptr < end) {
+        int len = ff_av1_extract_obu(&obu, ptr, buf_size, avctx);
+        if (len < 0)
+            break;
+
+        if (obu.type == AV1_OBU_FRAME_HEADER ||
+            obu.type == AV1_OBU_FRAME) {
+            return ptr - buf;
+        }
+        ptr      += len;
+        buf_size -= len;
+    }
+
+    return 0;
+}
+
+AVCodecParser ff_av1_parser = {
+    .codec_ids      = { AV_CODEC_ID_AV1 },
+    .priv_data_size = sizeof(AV1ParseContext),
+    .parser_init    = av1_parser_init,
+    .parser_close   = av1_parser_close,
+    .parser_parse   = av1_parser_parse,
+    .split          = av1_parser_split,
+};

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 1d7af0c..c15b329 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h

@@ -36,6 +36,7 @@
 #include "libavutil/channel_layout.h"
 #include "libavutil/dict.h"
 #include "libavutil/frame.h"
+#include "libavutil/hwcontext.h"
 #include "libavutil/log.h"
 #include "libavutil/pixfmt.h"
 #include "libavutil/rational.h"
@@ -222,9 +223,6 @@
     /* video codecs */
     AV_CODEC_ID_MPEG1VIDEO,
     AV_CODEC_ID_MPEG2VIDEO, ///< preferred ID for MPEG-1/2 video decoding
-#if FF_API_XVMC
-    AV_CODEC_ID_MPEG2VIDEO_XVMC,
-#endif /* FF_API_XVMC */
     AV_CODEC_ID_H261,
     AV_CODEC_ID_H263,
     AV_CODEC_ID_RV10,
@@ -416,6 +414,7 @@
     AV_CODEC_ID_DXV,
     AV_CODEC_ID_SCREENPRESSO,
     AV_CODEC_ID_RSCC,
+    AV_CODEC_ID_AVS2,
 
     AV_CODEC_ID_Y41P = 0x8000,
     AV_CODEC_ID_AVRP,
@@ -453,6 +452,11 @@
     AV_CODEC_ID_SVG,
     AV_CODEC_ID_GDV,
     AV_CODEC_ID_FITS,
+    AV_CODEC_ID_IMM4,
+    AV_CODEC_ID_PROSUMER,
+    AV_CODEC_ID_MWSC,
+    AV_CODEC_ID_WCMV,
+    AV_CODEC_ID_RASC,
 
     /* various PCM "codecs" */
     AV_CODEC_ID_FIRST_AUDIO = 0x10000,     ///< A dummy id pointing at the start of audio codecs
@@ -525,9 +529,6 @@
     AV_CODEC_ID_ADPCM_G722,
     AV_CODEC_ID_ADPCM_IMA_APC,
     AV_CODEC_ID_ADPCM_VIMA,
-#if FF_API_VIMA_DECODER
-    AV_CODEC_ID_VIMA = AV_CODEC_ID_ADPCM_VIMA,
-#endif
 
     AV_CODEC_ID_ADPCM_AFC = 0x11800,
     AV_CODEC_ID_ADPCM_IMA_OKI,
@@ -590,9 +591,6 @@
     AV_CODEC_ID_MLP,
     AV_CODEC_ID_GSM_MS, /* as found in WAV */
     AV_CODEC_ID_ATRAC3,
-#if FF_API_VOXWARE
-    AV_CODEC_ID_VOXWARE,
-#endif
     AV_CODEC_ID_APE,
     AV_CODEC_ID_NELLYMOSER,
     AV_CODEC_ID_MUSEPACK8,
@@ -628,6 +626,7 @@
     AV_CODEC_ID_PAF_AUDIO,
     AV_CODEC_ID_ON2AVC,
     AV_CODEC_ID_DSS_SP,
+    AV_CODEC_ID_CODEC2,
 
     AV_CODEC_ID_FFWAVESYNTH = 0x15800,
     AV_CODEC_ID_SONIC,
@@ -646,6 +645,10 @@
     AV_CODEC_ID_ATRAC3AL,
     AV_CODEC_ID_ATRAC3PAL,
     AV_CODEC_ID_DOLBY_E,
+    AV_CODEC_ID_APTX,
+    AV_CODEC_ID_APTX_HD,
+    AV_CODEC_ID_SBC,
+    AV_CODEC_ID_ATRAC9,
 
     /* subtitle codecs */
     AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
@@ -674,6 +677,7 @@
     AV_CODEC_ID_PJS,
     AV_CODEC_ID_ASS,
     AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
+    AV_CODEC_ID_TTML,
 
     /* other specific kind of codecs (generally used for attachments) */
     AV_CODEC_ID_FIRST_UNKNOWN = 0x18000,           ///< A dummy ID pointing at the start of various fake codecs.
@@ -779,7 +783,7 @@
  * Note: If the first 23 bits of the additional bytes are not 0, then damaged
  * MPEG bitstreams could cause overread and segfault.
  */
-#define AV_INPUT_BUFFER_PADDING_SIZE 32
+#define AV_INPUT_BUFFER_PADDING_SIZE 64
 
 /**
  * @ingroup lavc_encoding
@@ -788,38 +792,6 @@
  */
 #define AV_INPUT_BUFFER_MIN_SIZE 16384
 
-#if FF_API_WITHOUT_PREFIX
-/**
- * @deprecated use AV_INPUT_BUFFER_PADDING_SIZE instead
- */
-#define FF_INPUT_BUFFER_PADDING_SIZE 32
-
-/**
- * @deprecated use AV_INPUT_BUFFER_MIN_SIZE instead
- */
-#define FF_MIN_BUFFER_SIZE 16384
-#endif /* FF_API_WITHOUT_PREFIX */
-
-/**
- * @ingroup lavc_encoding
- * motion estimation type.
- * @deprecated use codec private option instead
- */
-#if FF_API_MOTION_EST
-enum Motion_Est_ID {
-    ME_ZERO = 1,    ///< no search, that is use 0,0 vector whenever one is needed
-    ME_FULL,
-    ME_LOG,
-    ME_PHODS,
-    ME_EPZS,        ///< enhanced predictive zonal search
-    ME_X1,          ///< reserved for experiments
-    ME_HEX,         ///< hexagon based search
-    ME_UMH,         ///< uneven multi-hexagon search
-    ME_TESA,        ///< transformed exhaustive search algorithm
-    ME_ITER=50,     ///< iterative search
-};
-#endif
-
 /**
  * @ingroup lavc_decoding
  */
@@ -858,13 +830,6 @@
     float quality_factor;
 } RcOverride;
 
-#if FF_API_MAX_BFRAMES
-/**
- * @deprecated there is no libavcodec-wide limit on the number of B-frames
- */
-#define FF_MAX_B_FRAMES 16
-#endif
-
 /* encoding support
    These flags can be passed in AVCodecContext.flags before initialization.
    Note: Not everything is supported yet.
@@ -1036,13 +1001,6 @@
  */
 #define AV_CODEC_CAP_SMALL_LAST_FRAME    (1 <<  6)
 
-#if FF_API_CAP_VDPAU
-/**
- * Codec can export data for HW decoding (VDPAU).
- */
-#define AV_CODEC_CAP_HWACCEL_VDPAU       (1 <<  7)
-#endif
-
 /**
  * Codec can output multiple frames per AVPacket
  * Normally demuxers return one frame at a time, demuxers which do not do
@@ -1103,231 +1061,26 @@
  */
 #define AV_CODEC_CAP_LOSSLESS         0x80000000
 
-
-#if FF_API_WITHOUT_PREFIX
 /**
- * Allow decoders to produce frames with data planes that are not aligned
- * to CPU requirements (e.g. due to cropping).
+ * Codec is backed by a hardware implementation. Typically used to
+ * identify a non-hwaccel hardware decoder. For information about hwaccels, use
+ * avcodec_get_hw_config() instead.
  */
-#define CODEC_FLAG_UNALIGNED AV_CODEC_FLAG_UNALIGNED
-#define CODEC_FLAG_QSCALE AV_CODEC_FLAG_QSCALE
-#define CODEC_FLAG_4MV    AV_CODEC_FLAG_4MV
-#define CODEC_FLAG_OUTPUT_CORRUPT AV_CODEC_FLAG_OUTPUT_CORRUPT
-#define CODEC_FLAG_QPEL   AV_CODEC_FLAG_QPEL
-#if FF_API_GMC
-/**
- * @deprecated use the "gmc" private option of the libxvid encoder
- */
-#define CODEC_FLAG_GMC    0x0020  ///< Use GMC.
-#endif
-#if FF_API_MV0
-/**
- * @deprecated use the flag "mv0" in the "mpv_flags" private option of the
- * mpegvideo encoders
- */
-#define CODEC_FLAG_MV0    0x0040
-#endif
-#if FF_API_INPUT_PRESERVED
-/**
- * @deprecated passing reference-counted frames to the encoders replaces this
- * flag
- */
-#define CODEC_FLAG_INPUT_PRESERVED 0x0100
-#endif
-#define CODEC_FLAG_PASS1           AV_CODEC_FLAG_PASS1
-#define CODEC_FLAG_PASS2           AV_CODEC_FLAG_PASS2
-#define CODEC_FLAG_GRAY            AV_CODEC_FLAG_GRAY
-#if FF_API_EMU_EDGE
-/**
- * @deprecated edges are not used/required anymore. I.e. this flag is now always
- * set.
- */
-#define CODEC_FLAG_EMU_EDGE        0x4000
-#endif
-#define CODEC_FLAG_PSNR            AV_CODEC_FLAG_PSNR
-#define CODEC_FLAG_TRUNCATED       AV_CODEC_FLAG_TRUNCATED
-
-#if FF_API_NORMALIZE_AQP
-/**
- * @deprecated use the flag "naq" in the "mpv_flags" private option of the
- * mpegvideo encoders
- */
-#define CODEC_FLAG_NORMALIZE_AQP  0x00020000
-#endif
-#define CODEC_FLAG_INTERLACED_DCT AV_CODEC_FLAG_INTERLACED_DCT
-#define CODEC_FLAG_LOW_DELAY      AV_CODEC_FLAG_LOW_DELAY
-#define CODEC_FLAG_GLOBAL_HEADER  AV_CODEC_FLAG_GLOBAL_HEADER
-#define CODEC_FLAG_BITEXACT       AV_CODEC_FLAG_BITEXACT
-#define CODEC_FLAG_AC_PRED        AV_CODEC_FLAG_AC_PRED
-#define CODEC_FLAG_LOOP_FILTER    AV_CODEC_FLAG_LOOP_FILTER
-#define CODEC_FLAG_INTERLACED_ME  AV_CODEC_FLAG_INTERLACED_ME
-#define CODEC_FLAG_CLOSED_GOP     AV_CODEC_FLAG_CLOSED_GOP
-#define CODEC_FLAG2_FAST          AV_CODEC_FLAG2_FAST
-#define CODEC_FLAG2_NO_OUTPUT     AV_CODEC_FLAG2_NO_OUTPUT
-#define CODEC_FLAG2_LOCAL_HEADER  AV_CODEC_FLAG2_LOCAL_HEADER
-#define CODEC_FLAG2_DROP_FRAME_TIMECODE AV_CODEC_FLAG2_DROP_FRAME_TIMECODE
-#define CODEC_FLAG2_IGNORE_CROP   AV_CODEC_FLAG2_IGNORE_CROP
-
-#define CODEC_FLAG2_CHUNKS        AV_CODEC_FLAG2_CHUNKS
-#define CODEC_FLAG2_SHOW_ALL      AV_CODEC_FLAG2_SHOW_ALL
-#define CODEC_FLAG2_EXPORT_MVS    AV_CODEC_FLAG2_EXPORT_MVS
-#define CODEC_FLAG2_SKIP_MANUAL   AV_CODEC_FLAG2_SKIP_MANUAL
-
-/* Unsupported options :
- *              Syntax Arithmetic coding (SAC)
- *              Reference Picture Selection
- *              Independent Segment Decoding */
-/* /Fx */
-/* codec capabilities */
-
-#define CODEC_CAP_DRAW_HORIZ_BAND AV_CODEC_CAP_DRAW_HORIZ_BAND ///< Decoder can use draw_horiz_band callback.
-/**
- * Codec uses get_buffer() for allocating buffers and supports custom allocators.
- * If not set, it might not use get_buffer() at all or use operations that
- * assume the buffer was allocated by avcodec_default_get_buffer.
- */
-#define CODEC_CAP_DR1             AV_CODEC_CAP_DR1
-#define CODEC_CAP_TRUNCATED       AV_CODEC_CAP_TRUNCATED
-#if FF_API_XVMC
-/* Codec can export data for HW decoding. This flag indicates that
- * the codec would call get_format() with list that might contain HW accelerated
- * pixel formats (XvMC, VDPAU, VAAPI, etc). The application can pick any of them
- * including raw image format.
- * The application can use the passed context to determine bitstream version,
- * chroma format, resolution etc.
- */
-#define CODEC_CAP_HWACCEL         0x0010
-#endif /* FF_API_XVMC */
-/**
- * Encoder or decoder requires flushing with NULL input at the end in order to
- * give the complete and correct output.
- *
- * NOTE: If this flag is not set, the codec is guaranteed to never be fed with
- *       with NULL data. The user can still send NULL data to the public encode
- *       or decode function, but libavcodec will not pass it along to the codec
- *       unless this flag is set.
- *
- * Decoders:
- * The decoder has a non-zero delay and needs to be fed with avpkt->data=NULL,
- * avpkt->size=0 at the end to get the delayed data until the decoder no longer
- * returns frames.
- *
- * Encoders:
- * The encoder needs to be fed with NULL data at the end of encoding until the
- * encoder no longer returns data.
- *
- * NOTE: For encoders implementing the AVCodec.encode2() function, setting this
- *       flag also means that the encoder must set the pts and duration for
- *       each output packet. If this flag is not set, the pts and duration will
- *       be determined by libavcodec from the input frame.
- */
-#define CODEC_CAP_DELAY           AV_CODEC_CAP_DELAY
-/**
- * Codec can be fed a final frame with a smaller size.
- * This can be used to prevent truncation of the last audio samples.
- */
-#define CODEC_CAP_SMALL_LAST_FRAME AV_CODEC_CAP_SMALL_LAST_FRAME
-#if FF_API_CAP_VDPAU
-/**
- * Codec can export data for HW decoding (VDPAU).
- */
-#define CODEC_CAP_HWACCEL_VDPAU    AV_CODEC_CAP_HWACCEL_VDPAU
-#endif
-/**
- * Codec can output multiple frames per AVPacket
- * Normally demuxers return one frame at a time, demuxers which do not do
- * are connected to a parser to split what they return into proper frames.
- * This flag is reserved to the very rare category of codecs which have a
- * bitstream that cannot be split into frames without timeconsuming
- * operations like full decoding. Demuxers carrying such bitstreams thus
- * may return multiple frames in a packet. This has many disadvantages like
- * prohibiting stream copy in many cases thus it should only be considered
- * as a last resort.
- */
-#define CODEC_CAP_SUBFRAMES        AV_CODEC_CAP_SUBFRAMES
-/**
- * Codec is experimental and is thus avoided in favor of non experimental
- * encoders
- */
-#define CODEC_CAP_EXPERIMENTAL     AV_CODEC_CAP_EXPERIMENTAL
-/**
- * Codec should fill in channel configuration and samplerate instead of container
- */
-#define CODEC_CAP_CHANNEL_CONF     AV_CODEC_CAP_CHANNEL_CONF
-#if FF_API_NEG_LINESIZES
-/**
- * @deprecated no codecs use this capability
- */
-#define CODEC_CAP_NEG_LINESIZES    0x0800
-#endif
-/**
- * Codec supports frame-level multithreading.
- */
-#define CODEC_CAP_FRAME_THREADS    AV_CODEC_CAP_FRAME_THREADS
-/**
- * Codec supports slice-based (or partition-based) multithreading.
- */
-#define CODEC_CAP_SLICE_THREADS    AV_CODEC_CAP_SLICE_THREADS
-/**
- * Codec supports changed parameters at any point.
- */
-#define CODEC_CAP_PARAM_CHANGE     AV_CODEC_CAP_PARAM_CHANGE
-/**
- * Codec supports avctx->thread_count == 0 (auto).
- */
-#define CODEC_CAP_AUTO_THREADS     AV_CODEC_CAP_AUTO_THREADS
-/**
- * Audio encoder supports receiving a different number of samples in each call.
- */
-#define CODEC_CAP_VARIABLE_FRAME_SIZE AV_CODEC_CAP_VARIABLE_FRAME_SIZE
-/**
- * Codec is intra only.
- */
-#define CODEC_CAP_INTRA_ONLY       AV_CODEC_CAP_INTRA_ONLY
-/**
- * Codec is lossless.
- */
-#define CODEC_CAP_LOSSLESS         AV_CODEC_CAP_LOSSLESS
+#define AV_CODEC_CAP_HARDWARE            (1 << 18)
 
 /**
- * HWAccel is experimental and is thus avoided in favor of non experimental
- * codecs
+ * Codec is potentially backed by a hardware implementation, but not
+ * necessarily. This is used instead of AV_CODEC_CAP_HARDWARE, if the
+ * implementation provides some sort of internal fallback.
  */
-#define HWACCEL_CODEC_CAP_EXPERIMENTAL     0x0200
-#endif /* FF_API_WITHOUT_PREFIX */
-
-#if FF_API_MB_TYPE
-//The following defines may change, don't expect compatibility if you use them.
-#define MB_TYPE_INTRA4x4   0x0001
-#define MB_TYPE_INTRA16x16 0x0002 //FIXME H.264-specific
-#define MB_TYPE_INTRA_PCM  0x0004 //FIXME H.264-specific
-#define MB_TYPE_16x16      0x0008
-#define MB_TYPE_16x8       0x0010
-#define MB_TYPE_8x16       0x0020
-#define MB_TYPE_8x8        0x0040
-#define MB_TYPE_INTERLACED 0x0080
-#define MB_TYPE_DIRECT2    0x0100 //FIXME
-#define MB_TYPE_ACPRED     0x0200
-#define MB_TYPE_GMC        0x0400
-#define MB_TYPE_SKIP       0x0800
-#define MB_TYPE_P0L0       0x1000
-#define MB_TYPE_P1L0       0x2000
-#define MB_TYPE_P0L1       0x4000
-#define MB_TYPE_P1L1       0x8000
-#define MB_TYPE_L0         (MB_TYPE_P0L0 | MB_TYPE_P1L0)
-#define MB_TYPE_L1         (MB_TYPE_P0L1 | MB_TYPE_P1L1)
-#define MB_TYPE_L0L1       (MB_TYPE_L0   | MB_TYPE_L1)
-#define MB_TYPE_QUANT      0x00010000
-#define MB_TYPE_CBP        0x00020000
-// Note bits 24-31 are reserved for codec specific use (H.264 ref0, MPEG-1 0mv, ...)
-#endif
+#define AV_CODEC_CAP_HYBRID              (1 << 19)
 
 /**
  * Pan Scan area.
  * This specifies the area which should be displayed.
  * Note there may be multiple such areas for one frame.
  */
-typedef struct AVPanScan{
+typedef struct AVPanScan {
     /**
      * id
      * - encoding: Set by user.
@@ -1349,7 +1102,7 @@
      * - decoding: Set by libavcodec.
      */
     int16_t position[3][2];
-}AVPanScan;
+} AVPanScan;
 
 /**
  * This structure describes the bitrate properties of an encoded bitstream. It
@@ -1389,13 +1142,6 @@
     uint64_t vbv_delay;
 } AVCPBProperties;
 
-#if FF_API_QSCALE_TYPE
-#define FF_QSCALE_TYPE_MPEG1 0
-#define FF_QSCALE_TYPE_MPEG2 1
-#define FF_QSCALE_TYPE_H264  2
-#define FF_QSCALE_TYPE_VP56  3
-#endif
-
 /**
  * The decoder will keep a reference to the frame and may reuse it later.
  */
@@ -1523,7 +1269,7 @@
      * u8    reason for end   skip (0=padding silence, 1=convergence)
      * @endcode
      */
-    AV_PKT_DATA_SKIP_SAMPLES=70,
+    AV_PKT_DATA_SKIP_SAMPLES,
 
     /**
      * An AV_PKT_DATA_JP_DUALMONO side data packet indicates that
@@ -1612,7 +1358,26 @@
     AV_PKT_DATA_A53_CC,
 
     /**
-     * The number of side data elements (in fact a bit more than it).
+     * This side data is encryption initialization data.
+     * The format is not part of ABI, use av_encryption_init_info_* methods to
+     * access.
+     */
+    AV_PKT_DATA_ENCRYPTION_INIT_INFO,
+
+    /**
+     * This side data contains encryption info for how to decrypt the packet.
+     * The format is not part of ABI, use av_encryption_info_* methods to access.
+     */
+    AV_PKT_DATA_ENCRYPTION_INFO,
+
+    /**
+     * Active Format Description data consisting of a single byte as specified
+     * in ETSI TS 101 154 using AVActiveFormatDescription enum.
+     */
+    AV_PKT_DATA_AFD,
+
+    /**
+     * The number of side data types.
      * This is not part of the public API/ABI in the sense that it may
      * change when new side data types are added.
      * This must stay the last enum value.
@@ -1728,6 +1493,12 @@
  * outside the packet may be followed.
  */
 #define AV_PKT_FLAG_TRUSTED   0x0008
+/**
+ * Flag is used to indicate packets that contain frames that can
+ * be discarded by the decoder.  I.e. Non-reference frames.
+ */
+#define AV_PKT_FLAG_DISPOSABLE 0x0010
+
 
 enum AVSideDataParamChangeFlags {
     AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT  = 0x0001,
@@ -1773,13 +1544,6 @@
 
     enum AVMediaType codec_type; /* see AVMEDIA_TYPE_xxx */
     const struct AVCodec  *codec;
-#if FF_API_CODEC_NAME
-    /**
-     * @deprecated this field is not used for anything in libavcodec
-     */
-    attribute_deprecated
-    char             codec_name[32];
-#endif
     enum AVCodecID     codec_id; /* see AV_CODEC_ID_xxx */
 
     /**
@@ -1797,14 +1561,6 @@
      */
     unsigned int codec_tag;
 
-#if FF_API_STREAM_CODEC_TAG
-    /**
-     * @deprecated this field is unused
-     */
-    attribute_deprecated
-    unsigned int stream_codec_tag;
-#endif
-
     void *priv_data;
 
     /**
@@ -1875,6 +1631,7 @@
      * The allocated memory should be AV_INPUT_BUFFER_PADDING_SIZE bytes larger
      * than extradata_size to avoid problems if it is read with the bitstream reader.
      * The bytewise contents of extradata must not depend on the architecture or CPU endianness.
+     * Must be allocated with the av_malloc() family of functions.
      * - encoding: Set/allocated/freed by libavcodec.
      * - decoding: Set/allocated/freed by user.
      */
@@ -1967,10 +1724,6 @@
      */
     int coded_width, coded_height;
 
-#if FF_API_ASPECT_EXTENDED
-#define FF_ASPECT_EXTENDED 15
-#endif
-
     /**
      * the number of pictures in a group of pictures, or 0 for intra_only
      * - encoding: Set by user.
@@ -1993,14 +1746,6 @@
      */
     enum AVPixelFormat pix_fmt;
 
-#if FF_API_MOTION_EST
-    /**
-     * This option does nothing
-     * @deprecated use codec private options instead
-     */
-    attribute_deprecated int me_method;
-#endif
-
     /**
      * If non NULL, 'draw_horiz_band' is called by the libavcodec
      * decoder to draw a horizontal band. It improves cache usage. Not
@@ -2060,12 +1805,6 @@
      */
     float b_quant_factor;
 
-#if FF_API_RC_STRATEGY
-    /** @deprecated use codec private option instead */
-    attribute_deprecated int rc_strategy;
-#define FF_RC_STRATEGY_XVID 1
-#endif
-
 #if FF_API_PRIVATE_OPT
     /** @deprecated use encoder private options instead */
     attribute_deprecated
@@ -2259,26 +1998,6 @@
      */
     int me_subpel_quality;
 
-#if FF_API_AFD
-    /**
-     * DTG active format information (additional aspect ratio
-     * information only used in DVB MPEG-2 transport streams)
-     * 0 if not set.
-     *
-     * - encoding: unused
-     * - decoding: Set by decoder.
-     * @deprecated Deprecated in favor of AVSideData
-     */
-    attribute_deprecated int dtg_active_format;
-#define FF_DTG_AFD_SAME         8
-#define FF_DTG_AFD_4_3          9
-#define FF_DTG_AFD_16_9         10
-#define FF_DTG_AFD_14_9         11
-#define FF_DTG_AFD_4_3_SP_14_9  13
-#define FF_DTG_AFD_16_9_SP_14_9 14
-#define FF_DTG_AFD_SP_4_3       15
-#endif /* FF_API_AFD */
-
     /**
      * maximum motion estimation search range in subpel units
      * If 0 then no limit.
@@ -2288,19 +2007,6 @@
      */
     int me_range;
 
-#if FF_API_QUANT_BIAS
-    /**
-     * @deprecated use encoder private option instead
-     */
-    attribute_deprecated int intra_quant_bias;
-#define FF_DEFAULT_QUANT_BIAS 999999
-
-    /**
-     * @deprecated use encoder private option instead
-     */
-    attribute_deprecated int inter_quant_bias;
-#endif
-
     /**
      * slice flags
      * - encoding: unused
@@ -2311,16 +2017,6 @@
 #define SLICE_FLAG_ALLOW_FIELD    0x0002 ///< allow draw_horiz_band() with field slices (MPEG-2 field pics)
 #define SLICE_FLAG_ALLOW_PLANE    0x0004 ///< allow draw_horiz_band() with 1 component at a time (SVQ1)
 
-#if FF_API_XVMC
-    /**
-     * XVideo Motion Acceleration
-     * - encoding: forbidden
-     * - decoding: set by decoder
-     * @deprecated XvMC doesn't need it anymore.
-     */
-    attribute_deprecated int xvmc_acceleration;
-#endif /* FF_API_XVMC */
-
     /**
      * macroblock decision mode
      * - encoding: Set by user.
@@ -2355,20 +2051,6 @@
     int noise_reduction;
 #endif
 
-#if FF_API_MPV_OPT
-    /**
-     * @deprecated this field is unused
-     */
-    attribute_deprecated
-    int me_threshold;
-
-    /**
-     * @deprecated this field is unused
-     */
-    attribute_deprecated
-    int mb_threshold;
-#endif
-
     /**
      * precision of the intra DC coefficient - 8
      * - encoding: Set by user.
@@ -2390,14 +2072,6 @@
      */
     int skip_bottom;
 
-#if FF_API_MPV_OPT
-    /**
-     * @deprecated use encoder private options instead
-     */
-    attribute_deprecated
-    float border_masking;
-#endif
-
     /**
      * minimum MB Lagrange multiplier
      * - encoding: Set by user.
@@ -2452,15 +2126,6 @@
     int chromaoffset;
 #endif
 
-#if FF_API_UNUSED_MEMBERS
-    /**
-     * Multiplied by qscale for each frame and added to scene_change_score.
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
-    attribute_deprecated int scenechange_factor;
-#endif
-
     /**
      * Note: Value depends upon the compare function used for fullpel ME.
      * - encoding: Set by user.
@@ -2723,19 +2388,6 @@
      */
     int max_qdiff;
 
-#if FF_API_MPV_OPT
-    /**
-     * @deprecated use encoder private options instead
-     */
-    attribute_deprecated
-    float rc_qsquish;
-
-    attribute_deprecated
-    float rc_qmod_amp;
-    attribute_deprecated
-    int rc_qmod_freq;
-#endif
-
     /**
      * decoder bitstream buffer size
      * - encoding: Set by user.
@@ -2751,14 +2403,6 @@
     int rc_override_count;
     RcOverride *rc_override;
 
-#if FF_API_MPV_OPT
-    /**
-     * @deprecated use encoder private options instead
-     */
-    attribute_deprecated
-    const char *rc_eq;
-#endif
-
     /**
      * maximum bitrate
      * - encoding: Set by user.
@@ -2773,17 +2417,6 @@
      */
     int64_t rc_min_rate;
 
-#if FF_API_MPV_OPT
-    /**
-     * @deprecated use encoder private options instead
-     */
-    attribute_deprecated
-    float rc_buffer_aggressivity;
-
-    attribute_deprecated
-    float rc_initial_cplx;
-#endif
-
     /**
      * Ratecontrol attempt to use, at maximum, <value> of what can be used without an underflow.
      * - encoding: Set by user.
@@ -2810,9 +2443,6 @@
 #define FF_CODER_TYPE_AC        1
 #define FF_CODER_TYPE_RAW       2
 #define FF_CODER_TYPE_RLE       3
-#if FF_API_UNUSED_MEMBERS
-#define FF_CODER_TYPE_DEFLATE   4
-#endif /* FF_API_UNUSED_MEMBERS */
     /**
      * @deprecated use encoder private options instead
      */
@@ -2826,20 +2456,6 @@
     int context_model;
 #endif
 
-#if FF_API_MPV_OPT
-    /**
-     * @deprecated use encoder private options instead
-     */
-    attribute_deprecated
-    int lmin;
-
-    /**
-     * @deprecated use encoder private options instead
-     */
-    attribute_deprecated
-    int lmax;
-#endif
-
 #if FF_API_PRIVATE_OPT
     /** @deprecated use encoder private options instead */
     attribute_deprecated
@@ -2950,16 +2566,10 @@
      */
     int workaround_bugs;
 #define FF_BUG_AUTODETECT       1  ///< autodetection
-#if FF_API_OLD_MSMPEG4
-#define FF_BUG_OLD_MSMPEG4      2
-#endif
 #define FF_BUG_XVID_ILACE       4
 #define FF_BUG_UMP4             8
 #define FF_BUG_NO_PADDING       16
 #define FF_BUG_AMV              32
-#if FF_API_AC_VLC
-#define FF_BUG_AC_VLC           0  ///< Will be removed, libavcodec can now handle these non-compliant files by default.
-#endif
 #define FF_BUG_QPEL_CHROMA      64
 #define FF_BUG_STD_QPEL         128
 #define FF_BUG_QPEL_CHROMA2     256
@@ -3020,9 +2630,6 @@
 #define FF_DEBUG_DCT_COEFF   0x00000040
 #define FF_DEBUG_SKIP        0x00000080
 #define FF_DEBUG_STARTCODE   0x00000100
-#if FF_API_UNUSED_MEMBERS
-#define FF_DEBUG_PTS         0x00000200
-#endif /* FF_API_UNUSED_MEMBERS */
 #define FF_DEBUG_ER          0x00000400
 #define FF_DEBUG_MMCO        0x00000800
 #define FF_DEBUG_BUGS        0x00001000
@@ -3084,7 +2691,7 @@
      * - encoding: unused.
      * - decoding: Set by libavcodec
      */
-    struct AVHWAccel *hwaccel;
+    const struct AVHWAccel *hwaccel;
 
     /**
      * Hardware accelerator context.
@@ -3130,27 +2737,12 @@
 #define FF_IDCT_SIMPLEMMX     3
 #define FF_IDCT_ARM           7
 #define FF_IDCT_ALTIVEC       8
-#if FF_API_ARCH_SH4
-#define FF_IDCT_SH4           9
-#endif
 #define FF_IDCT_SIMPLEARM     10
-#if FF_API_UNUSED_MEMBERS
-#define FF_IDCT_IPP           13
-#endif /* FF_API_UNUSED_MEMBERS */
 #define FF_IDCT_XVID          14
-#if FF_API_IDCT_XVIDMMX
-#define FF_IDCT_XVIDMMX       14
-#endif /* FF_API_IDCT_XVIDMMX */
 #define FF_IDCT_SIMPLEARMV5TE 16
 #define FF_IDCT_SIMPLEARMV6   17
-#if FF_API_ARCH_SPARC
-#define FF_IDCT_SIMPLEVIS     18
-#endif
 #define FF_IDCT_FAAN          20
 #define FF_IDCT_SIMPLENEON    22
-#if FF_API_ARCH_ALPHA
-#define FF_IDCT_SIMPLEALPHA   23
-#endif
 #define FF_IDCT_NONE          24 /* Used by XvMC to extract IDCT coefficients with FF_IDCT_PERM_NONE */
 #define FF_IDCT_SIMPLEAUTO    128
 
@@ -3361,6 +2953,18 @@
 #define FF_PROFILE_HEVC_MAIN_STILL_PICTURE          3
 #define FF_PROFILE_HEVC_REXT                        4
 
+#define FF_PROFILE_AV1_MAIN                         0
+#define FF_PROFILE_AV1_HIGH                         1
+#define FF_PROFILE_AV1_PROFESSIONAL                 2
+
+#define FF_PROFILE_MJPEG_HUFFMAN_BASELINE_DCT            0xc0
+#define FF_PROFILE_MJPEG_HUFFMAN_EXTENDED_SEQUENTIAL_DCT 0xc1
+#define FF_PROFILE_MJPEG_HUFFMAN_PROGRESSIVE_DCT         0xc2
+#define FF_PROFILE_MJPEG_HUFFMAN_LOSSLESS                0xc3
+#define FF_PROFILE_MJPEG_JPEG_LS                         0xf7
+
+#define FF_PROFILE_SBC_MSBC                         1
+
     /**
      * level
      * - encoding: Set by user.
@@ -3401,15 +3005,6 @@
     uint8_t *subtitle_header;
     int subtitle_header_size;
 
-#if FF_API_ERROR_RATE
-    /**
-     * @deprecated use the 'error_rate' private AVOption of the mpegvideo
-     * encoders
-     */
-    attribute_deprecated
-    int error_rate;
-#endif
-
 #if FF_API_VBV_DELAY
     /**
      * VBV delay coded in the last frame (in periods of a 27 MHz clock).
@@ -3521,6 +3116,7 @@
 #define FF_SUB_CHARENC_MODE_DO_NOTHING  -1  ///< do nothing (demuxer outputs a stream supposed to be already in UTF-8, or the codec is bitmap for instance)
 #define FF_SUB_CHARENC_MODE_AUTOMATIC    0  ///< libavcodec will select the mode itself
 #define FF_SUB_CHARENC_MODE_PRE_DECODER  1  ///< the AVPacket data needs to be recoded to UTF-8 before being fed to the decoder, requires iconv
+#define FF_SUB_CHARENC_MODE_IGNORE       2  ///< neither convert the subtitles, nor check them for valid UTF-8
 
     /**
      * Skip processing alpha if supported by codec.
@@ -3707,24 +3303,57 @@
      * (with the display dimensions being determined by the crop_* fields).
      */
     int apply_cropping;
+
+    /*
+     * Video decoding only.  Sets the number of extra hardware frames which
+     * the decoder will allocate for use by the caller.  This must be set
+     * before avcodec_open2() is called.
+     *
+     * Some hardware decoders require all frames that they will use for
+     * output to be defined in advance before decoding starts.  For such
+     * decoders, the hardware frame pool must therefore be of a fixed size.
+     * The extra frames set here are on top of any number that the decoder
+     * needs internally in order to operate normally (for example, frames
+     * used as reference pictures).
+     */
+    int extra_hw_frames;
 } AVCodecContext;
 
+#if FF_API_CODEC_GET_SET
+/**
+ * Accessors for some AVCodecContext fields. These used to be provided for ABI
+ * compatibility, and do not need to be used anymore.
+ */
+attribute_deprecated
 AVRational av_codec_get_pkt_timebase         (const AVCodecContext *avctx);
+attribute_deprecated
 void       av_codec_set_pkt_timebase         (AVCodecContext *avctx, AVRational val);
 
+attribute_deprecated
 const AVCodecDescriptor *av_codec_get_codec_descriptor(const AVCodecContext *avctx);
+attribute_deprecated
 void                     av_codec_set_codec_descriptor(AVCodecContext *avctx, const AVCodecDescriptor *desc);
 
+attribute_deprecated
 unsigned av_codec_get_codec_properties(const AVCodecContext *avctx);
 
+#if FF_API_LOWRES
+attribute_deprecated
 int  av_codec_get_lowres(const AVCodecContext *avctx);
+attribute_deprecated
 void av_codec_set_lowres(AVCodecContext *avctx, int val);
+#endif
 
+attribute_deprecated
 int  av_codec_get_seek_preroll(const AVCodecContext *avctx);
+attribute_deprecated
 void av_codec_set_seek_preroll(AVCodecContext *avctx, int val);
 
+attribute_deprecated
 uint16_t *av_codec_get_chroma_intra_matrix(const AVCodecContext *avctx);
+attribute_deprecated
 void av_codec_set_chroma_intra_matrix(AVCodecContext *avctx, uint16_t *val);
+#endif
 
 /**
  * AVProfile.
@@ -3734,6 +3363,61 @@
     const char *name; ///< short name for the profile
 } AVProfile;
 
+enum {
+    /**
+     * The codec supports this format via the hw_device_ctx interface.
+     *
+     * When selecting this format, AVCodecContext.hw_device_ctx should
+     * have been set to a device of the specified type before calling
+     * avcodec_open2().
+     */
+    AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX = 0x01,
+    /**
+     * The codec supports this format via the hw_frames_ctx interface.
+     *
+     * When selecting this format for a decoder,
+     * AVCodecContext.hw_frames_ctx should be set to a suitable frames
+     * context inside the get_format() callback.  The frames context
+     * must have been created on a device of the specified type.
+     */
+    AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX = 0x02,
+    /**
+     * The codec supports this format by some internal method.
+     *
+     * This format can be selected without any additional configuration -
+     * no device or frames context is required.
+     */
+    AV_CODEC_HW_CONFIG_METHOD_INTERNAL      = 0x04,
+    /**
+     * The codec supports this format by some ad-hoc method.
+     *
+     * Additional settings and/or function calls are required.  See the
+     * codec-specific documentation for details.  (Methods requiring
+     * this sort of configuration are deprecated and others should be
+     * used in preference.)
+     */
+    AV_CODEC_HW_CONFIG_METHOD_AD_HOC        = 0x08,
+};
+
+typedef struct AVCodecHWConfig {
+    /**
+     * A hardware pixel format which the codec can use.
+     */
+    enum AVPixelFormat pix_fmt;
+    /**
+     * Bit set of AV_CODEC_HW_CONFIG_METHOD_* flags, describing the possible
+     * setup methods which can be used with this configuration.
+     */
+    int methods;
+    /**
+     * The device type associated with the configuration.
+     *
+     * Must be set for AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX and
+     * AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX, otherwise unused.
+     */
+    enum AVHWDeviceType device_type;
+} AVCodecHWConfig;
+
 typedef struct AVCodecDefault AVCodecDefault;
 
 struct AVSubtitle;
@@ -3770,6 +3454,18 @@
     const AVClass *priv_class;              ///< AVClass for the private context
     const AVProfile *profiles;              ///< array of recognized profiles, or NULL if unknown, array is terminated by {FF_PROFILE_UNKNOWN}
 
+    /**
+     * Group name of the codec implementation.
+     * This is a short symbolic name of the wrapper backing this codec. A
+     * wrapper uses some kind of external implementation for the codec, such
+     * as an external library, or a codec implementation provided by the OS or
+     * the hardware.
+     * If this field is NULL, this is a builtin, libavcodec native codec.
+     * If non-NULL, this will be the suffix in AVCodec.name in most cases
+     * (usually AVCodec.name will be of the form "<codec_name>_<wrapper_name>").
+     */
+    const char *wrapper_name;
+
     /*****************************************************************
      * No fields below this line are part of the public API. They
      * may not be used outside of libavcodec and can be changed and
@@ -3806,6 +3502,9 @@
 
     /**
      * Initialize codec static data, called from avcodec_register().
+     *
+     * This is not intended for time consuming operations as it is
+     * run for every codec regardless of that codec being used.
      */
     void (*init_static_data)(struct AVCodec *codec);
 
@@ -3859,14 +3558,39 @@
      * packets before decoding.
      */
     const char *bsfs;
+
+    /**
+     * Array of pointers to hardware configurations supported by the codec,
+     * or NULL if no hardware supported.  The array is terminated by a NULL
+     * pointer.
+     *
+     * The user can only access this field via avcodec_get_hw_config().
+     */
+    const struct AVCodecHWConfigInternal **hw_configs;
 } AVCodec;
 
+#if FF_API_CODEC_GET_SET
+attribute_deprecated
 int av_codec_get_max_lowres(const AVCodec *codec);
+#endif
 
 struct MpegEncContext;
 
 /**
+ * Retrieve supported hardware configurations for a codec.
+ *
+ * Values of index from zero to some maximum return the indexed configuration
+ * descriptor; all other values return NULL.  If the codec does not support
+ * any hardware configurations then it will always return NULL.
+ */
+const AVCodecHWConfig *avcodec_get_hw_config(const AVCodec *codec, int index);
+
+/**
  * @defgroup lavc_hwaccel AVHWAccel
+ *
+ * @note  Nothing in this structure should be accessed by the user.  At some
+ *        point in future it will not be externally visible at all.
+ *
  * @{
  */
 typedef struct AVHWAccel {
@@ -3911,7 +3635,6 @@
      * New public fields should be added right above.
      *****************************************************************
      */
-    struct AVHWAccel *next;
 
     /**
      * Allocate a custom buffer
@@ -3935,6 +3658,20 @@
     int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
 
     /**
+     * Callback for parameter data (SPS/PPS/VPS etc).
+     *
+     * Useful for hardware decoders which keep persistent state about the
+     * video parameters, and need to receive any changes to update that state.
+     *
+     * @param avctx the codec context
+     * @param type the nal unit type
+     * @param buf the nal unit data buffer
+     * @param buf_size the size of the nal unit in bytes
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*decode_params)(AVCodecContext *avctx, int type, const uint8_t *buf, uint32_t buf_size);
+
+    /**
      * Callback for each slice.
      *
      * Meaningful slice information (codec specific) is guaranteed to
@@ -4006,6 +3743,16 @@
      * Internal hwaccel capabilities.
      */
     int caps_internal;
+
+    /**
+     * Fill the given hw_frames context with current codec parameters. Called
+     * from get_format. Refer to avcodec_get_hw_frames_parameters() for
+     * details.
+     *
+     * This CAN be called before AVHWAccel.init is called, and you must assume
+     * that avctx->hwaccel_priv_data is invalid.
+     */
+    int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
 } AVHWAccel;
 
 /**
@@ -4298,11 +4045,25 @@
 } AVCodecParameters;
 
 /**
+ * Iterate over all registered codecs.
+ *
+ * @param opaque a pointer where libavcodec will store the iteration state. Must
+ *               point to NULL to start the iteration.
+ *
+ * @return the next registered codec or NULL when the iteration is
+ *         finished
+ */
+const AVCodec *av_codec_iterate(void **opaque);
+
+#if FF_API_NEXT
+/**
  * If c is NULL, returns the first registered codec,
  * if c is non-NULL, returns the next registered codec after c,
  * or NULL if c is the last one.
  */
+attribute_deprecated
 AVCodec *av_codec_next(const AVCodec *c);
+#endif
 
 /**
  * Return the LIBAVCODEC_VERSION_INT constant.
@@ -4319,6 +4080,7 @@
  */
 const char *avcodec_license(void);
 
+#if FF_API_NEXT
 /**
  * Register the codec codec and initialize libavcodec.
  *
@@ -4327,6 +4089,7 @@
  *
  * @see avcodec_register_all()
  */
+attribute_deprecated
 void avcodec_register(AVCodec *codec);
 
 /**
@@ -4339,7 +4102,9 @@
  * @see av_register_codec_parser
  * @see av_register_bitstream_filter
  */
+attribute_deprecated
 void avcodec_register_all(void);
+#endif
 
 /**
  * Allocate an AVCodecContext and set its fields to default values. The
@@ -4620,7 +4385,7 @@
  * @warning This is a hack - the packet memory allocation stuff is broken. The
  * packet is allocated if it was not really allocated.
  *
- * @deprecated Use av_packet_ref
+ * @deprecated Use av_packet_ref or av_packet_make_refcounted
  */
 attribute_deprecated
 int av_dup_packet(AVPacket *pkt);
@@ -4792,6 +4557,33 @@
 int av_packet_copy_props(AVPacket *dst, const AVPacket *src);
 
 /**
+ * Ensure the data described by a given packet is reference counted.
+ *
+ * @note This function does not ensure that the reference will be writable.
+ *       Use av_packet_make_writable instead for that purpose.
+ *
+ * @see av_packet_ref
+ * @see av_packet_make_writable
+ *
+ * @param pkt packet whose data should be made reference counted.
+ *
+ * @return 0 on success, a negative AVERROR on error. On failure, the
+ *         packet is unchanged.
+ */
+int av_packet_make_refcounted(AVPacket *pkt);
+
+/**
+ * Create a writable reference for the data described by a given packet,
+ * avoiding data copy if possible.
+ *
+ * @param pkt Packet whose data should be made writable.
+ *
+ * @return 0 on success, a negative AVERROR on failure. On failure, the
+ *         packet is unchanged.
+ */
+int av_packet_make_writable(AVPacket *pkt);
+
+/**
  * Convert valid timing fields (timestamps / durations) in a packet from one
  * timebase to another. Timestamps with unknown values (AV_NOPTS_VALUE) will be
  * ignored.
@@ -4836,21 +4628,6 @@
  */
 int avcodec_default_get_buffer2(AVCodecContext *s, AVFrame *frame, int flags);
 
-#if FF_API_EMU_EDGE
-/**
- * Return the amount of padding in pixels which the get_buffer callback must
- * provide around the edge of the image for codecs which do not have the
- * CODEC_FLAG_EMU_EDGE flag.
- *
- * @return Required padding in pixels.
- *
- * @deprecated CODEC_FLAG_EMU_EDGE is deprecated, so this function is no longer
- * needed
- */
-attribute_deprecated
-unsigned avcodec_get_edge_width(void);
-#endif
-
 /**
  * Modify width and height values so that they will result in a memory
  * buffer that is acceptable for the codec if you do not use any horizontal
@@ -5156,6 +4933,109 @@
  */
 int avcodec_receive_packet(AVCodecContext *avctx, AVPacket *avpkt);
 
+/**
+ * Create and return a AVHWFramesContext with values adequate for hardware
+ * decoding. This is meant to get called from the get_format callback, and is
+ * a helper for preparing a AVHWFramesContext for AVCodecContext.hw_frames_ctx.
+ * This API is for decoding with certain hardware acceleration modes/APIs only.
+ *
+ * The returned AVHWFramesContext is not initialized. The caller must do this
+ * with av_hwframe_ctx_init().
+ *
+ * Calling this function is not a requirement, but makes it simpler to avoid
+ * codec or hardware API specific details when manually allocating frames.
+ *
+ * Alternatively to this, an API user can set AVCodecContext.hw_device_ctx,
+ * which sets up AVCodecContext.hw_frames_ctx fully automatically, and makes
+ * it unnecessary to call this function or having to care about
+ * AVHWFramesContext initialization at all.
+ *
+ * There are a number of requirements for calling this function:
+ *
+ * - It must be called from get_format with the same avctx parameter that was
+ *   passed to get_format. Calling it outside of get_format is not allowed, and
+ *   can trigger undefined behavior.
+ * - The function is not always supported (see description of return values).
+ *   Even if this function returns successfully, hwaccel initialization could
+ *   fail later. (The degree to which implementations check whether the stream
+ *   is actually supported varies. Some do this check only after the user's
+ *   get_format callback returns.)
+ * - The hw_pix_fmt must be one of the choices suggested by get_format. If the
+ *   user decides to use a AVHWFramesContext prepared with this API function,
+ *   the user must return the same hw_pix_fmt from get_format.
+ * - The device_ref passed to this function must support the given hw_pix_fmt.
+ * - After calling this API function, it is the user's responsibility to
+ *   initialize the AVHWFramesContext (returned by the out_frames_ref parameter),
+ *   and to set AVCodecContext.hw_frames_ctx to it. If done, this must be done
+ *   before returning from get_format (this is implied by the normal
+ *   AVCodecContext.hw_frames_ctx API rules).
+ * - The AVHWFramesContext parameters may change every time time get_format is
+ *   called. Also, AVCodecContext.hw_frames_ctx is reset before get_format. So
+ *   you are inherently required to go through this process again on every
+ *   get_format call.
+ * - It is perfectly possible to call this function without actually using
+ *   the resulting AVHWFramesContext. One use-case might be trying to reuse a
+ *   previously initialized AVHWFramesContext, and calling this API function
+ *   only to test whether the required frame parameters have changed.
+ * - Fields that use dynamically allocated values of any kind must not be set
+ *   by the user unless setting them is explicitly allowed by the documentation.
+ *   If the user sets AVHWFramesContext.free and AVHWFramesContext.user_opaque,
+ *   the new free callback must call the potentially set previous free callback.
+ *   This API call may set any dynamically allocated fields, including the free
+ *   callback.
+ *
+ * The function will set at least the following fields on AVHWFramesContext
+ * (potentially more, depending on hwaccel API):
+ *
+ * - All fields set by av_hwframe_ctx_alloc().
+ * - Set the format field to hw_pix_fmt.
+ * - Set the sw_format field to the most suited and most versatile format. (An
+ *   implication is that this will prefer generic formats over opaque formats
+ *   with arbitrary restrictions, if possible.)
+ * - Set the width/height fields to the coded frame size, rounded up to the
+ *   API-specific minimum alignment.
+ * - Only _if_ the hwaccel requires a pre-allocated pool: set the initial_pool_size
+ *   field to the number of maximum reference surfaces possible with the codec,
+ *   plus 1 surface for the user to work (meaning the user can safely reference
+ *   at most 1 decoded surface at a time), plus additional buffering introduced
+ *   by frame threading. If the hwaccel does not require pre-allocation, the
+ *   field is left to 0, and the decoder will allocate new surfaces on demand
+ *   during decoding.
+ * - Possibly AVHWFramesContext.hwctx fields, depending on the underlying
+ *   hardware API.
+ *
+ * Essentially, out_frames_ref returns the same as av_hwframe_ctx_alloc(), but
+ * with basic frame parameters set.
+ *
+ * The function is stateless, and does not change the AVCodecContext or the
+ * device_ref AVHWDeviceContext.
+ *
+ * @param avctx The context which is currently calling get_format, and which
+ *              implicitly contains all state needed for filling the returned
+ *              AVHWFramesContext properly.
+ * @param device_ref A reference to the AVHWDeviceContext describing the device
+ *                   which will be used by the hardware decoder.
+ * @param hw_pix_fmt The hwaccel format you are going to return from get_format.
+ * @param out_frames_ref On success, set to a reference to an _uninitialized_
+ *                       AVHWFramesContext, created from the given device_ref.
+ *                       Fields will be set to values required for decoding.
+ *                       Not changed if an error is returned.
+ * @return zero on success, a negative value on error. The following error codes
+ *         have special semantics:
+ *      AVERROR(ENOENT): the decoder does not support this functionality. Setup
+ *                       is always manual, or it is a decoder which does not
+ *                       support setting AVCodecContext.hw_frames_ctx at all,
+ *                       or it is a software format.
+ *      AVERROR(EINVAL): it is known that hardware decoding is not supported for
+ *                       this configuration, or the device_ref is not supported
+ *                       for the hwaccel referenced by hw_pix_fmt.
+ */
+int avcodec_get_hw_frames_parameters(AVCodecContext *avctx,
+                                     AVBufferRef *device_ref,
+                                     enum AVPixelFormat hw_pix_fmt,
+                                     AVBufferRef **out_frames_ref);
+
+
 
 /**
  * @defgroup lavc_parsing Frame parsing
@@ -5351,8 +5231,21 @@
     struct AVCodecParser *next;
 } AVCodecParser;
 
+/**
+ * Iterate over all registered codec parsers.
+ *
+ * @param opaque a pointer where libavcodec will store the iteration state. Must
+ *               point to NULL to start the iteration.
+ *
+ * @return the next registered codec parser or NULL when the iteration is
+ *         finished
+ */
+const AVCodecParser *av_parser_iterate(void **opaque);
+
+attribute_deprecated
 AVCodecParser *av_parser_next(const AVCodecParser *c);
 
+attribute_deprecated
 void av_register_codec_parser(AVCodecParser *parser);
 AVCodecParserContext *av_parser_init(int codec_id);
 
@@ -5521,103 +5414,6 @@
  * @}
  */
 
-#if FF_API_AVCODEC_RESAMPLE
-/**
- * @defgroup lavc_resample Audio resampling
- * @ingroup libavc
- * @deprecated use libswresample instead
- *
- * @{
- */
-struct ReSampleContext;
-struct AVResampleContext;
-
-typedef struct ReSampleContext ReSampleContext;
-
-/**
- *  Initialize audio resampling context.
- *
- * @param output_channels  number of output channels
- * @param input_channels   number of input channels
- * @param output_rate      output sample rate
- * @param input_rate       input sample rate
- * @param sample_fmt_out   requested output sample format
- * @param sample_fmt_in    input sample format
- * @param filter_length    length of each FIR filter in the filterbank relative to the cutoff frequency
- * @param log2_phase_count log2 of the number of entries in the polyphase filterbank
- * @param linear           if 1 then the used FIR filter will be linearly interpolated
-                           between the 2 closest, if 0 the closest will be used
- * @param cutoff           cutoff frequency, 1.0 corresponds to half the output sampling rate
- * @return allocated ReSampleContext, NULL if error occurred
- */
-attribute_deprecated
-ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
-                                        int output_rate, int input_rate,
-                                        enum AVSampleFormat sample_fmt_out,
-                                        enum AVSampleFormat sample_fmt_in,
-                                        int filter_length, int log2_phase_count,
-                                        int linear, double cutoff);
-
-attribute_deprecated
-int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples);
-
-/**
- * Free resample context.
- *
- * @param s a non-NULL pointer to a resample context previously
- *          created with av_audio_resample_init()
- */
-attribute_deprecated
-void audio_resample_close(ReSampleContext *s);
-
-
-/**
- * Initialize an audio resampler.
- * Note, if either rate is not an integer then simply scale both rates up so they are.
- * @param filter_length length of each FIR filter in the filterbank relative to the cutoff freq
- * @param log2_phase_count log2 of the number of entries in the polyphase filterbank
- * @param linear If 1 then the used FIR filter will be linearly interpolated
-                 between the 2 closest, if 0 the closest will be used
- * @param cutoff cutoff frequency, 1.0 corresponds to half the output sampling rate
- */
-attribute_deprecated
-struct AVResampleContext *av_resample_init(int out_rate, int in_rate, int filter_length, int log2_phase_count, int linear, double cutoff);
-
-/**
- * Resample an array of samples using a previously configured context.
- * @param src an array of unconsumed samples
- * @param consumed the number of samples of src which have been consumed are returned here
- * @param src_size the number of unconsumed samples available
- * @param dst_size the amount of space in samples available in dst
- * @param update_ctx If this is 0 then the context will not be modified, that way several channels can be resampled with the same context.
- * @return the number of samples written in dst or -1 if an error occurred
- */
-attribute_deprecated
-int av_resample(struct AVResampleContext *c, short *dst, short *src, int *consumed, int src_size, int dst_size, int update_ctx);
-
-
-/**
- * Compensate samplerate/timestamp drift. The compensation is done by changing
- * the resampler parameters, so no audible clicks or similar distortions occur
- * @param compensation_distance distance in output samples over which the compensation should be performed
- * @param sample_delta number of output samples which should be output less
- *
- * example: av_resample_compensate(c, 10, 500)
- * here instead of 510 samples only 500 samples would be output
- *
- * note, due to rounding the actual compensation might be slightly different,
- * especially if the compensation_distance is large and the in_rate used during init is small
- */
-attribute_deprecated
-void av_resample_compensate(struct AVResampleContext *c, int sample_delta, int compensation_distance);
-attribute_deprecated
-void av_resample_close(struct AVResampleContext *c);
-
-/**
- * @}
- */
-#endif
-
 #if FF_API_AVPICTURE
 /**
  * @addtogroup lavc_picture
@@ -5758,14 +5554,6 @@
  * @}
  */
 
-#if FF_API_SET_DIMENSIONS
-/**
- * @deprecated this function is not supposed to be used from outside of lavc
- */
-attribute_deprecated
-void avcodec_set_dimensions(AVCodecContext *s, int width, int height);
-#endif
-
 #if FF_API_TAG_STRING
 /**
  * Put a string representing the codec tag codec_tag in buf.
@@ -5998,88 +5786,47 @@
     int (*init)(AVBSFContext *ctx);
     int (*filter)(AVBSFContext *ctx, AVPacket *pkt);
     void (*close)(AVBSFContext *ctx);
+    void (*flush)(AVBSFContext *ctx);
 } AVBitStreamFilter;
 
 #if FF_API_OLD_BSF
 /**
- * Register a bitstream filter.
- *
- * The filter will be accessible to the application code through
- * av_bitstream_filter_next() or can be directly initialized with
- * av_bitstream_filter_init().
- *
- * @see avcodec_register_all()
+ * @deprecated the old bitstream filtering API (using AVBitStreamFilterContext)
+ * is deprecated. Use the new bitstream filtering API (using AVBSFContext).
  */
 attribute_deprecated
 void av_register_bitstream_filter(AVBitStreamFilter *bsf);
-
 /**
- * Create and initialize a bitstream filter context given a bitstream
- * filter name.
- *
- * The returned context must be freed with av_bitstream_filter_close().
- *
- * @param name    the name of the bitstream filter
- * @return a bitstream filter context if a matching filter was found
- * and successfully initialized, NULL otherwise
+ * @deprecated the old bitstream filtering API (using AVBitStreamFilterContext)
+ * is deprecated. Use av_bsf_get_by_name(), av_bsf_alloc(), and av_bsf_init()
+ * from the new bitstream filtering API (using AVBSFContext).
  */
 attribute_deprecated
 AVBitStreamFilterContext *av_bitstream_filter_init(const char *name);
-
 /**
- * Filter bitstream.
- *
- * This function filters the buffer buf with size buf_size, and places the
- * filtered buffer in the buffer pointed to by poutbuf.
- *
- * The output buffer must be freed by the caller.
- *
- * @param bsfc            bitstream filter context created by av_bitstream_filter_init()
- * @param avctx           AVCodecContext accessed by the filter, may be NULL.
- *                        If specified, this must point to the encoder context of the
- *                        output stream the packet is sent to.
- * @param args            arguments which specify the filter configuration, may be NULL
- * @param poutbuf         pointer which is updated to point to the filtered buffer
- * @param poutbuf_size    pointer which is updated to the filtered buffer size in bytes
- * @param buf             buffer containing the data to filter
- * @param buf_size        size in bytes of buf
- * @param keyframe        set to non-zero if the buffer to filter corresponds to a key-frame packet data
- * @return >= 0 in case of success, or a negative error code in case of failure
- *
- * If the return value is positive, an output buffer is allocated and
- * is available in *poutbuf, and is distinct from the input buffer.
- *
- * If the return value is 0, the output buffer is not allocated and
- * should be considered identical to the input buffer, or in case
- * *poutbuf was set it points to the input buffer (not necessarily to
- * its starting address). A special case is if *poutbuf was set to NULL and
- * *poutbuf_size was set to 0, which indicates the packet should be dropped.
+ * @deprecated the old bitstream filtering API (using AVBitStreamFilterContext)
+ * is deprecated. Use av_bsf_send_packet() and av_bsf_receive_packet() from the
+ * new bitstream filtering API (using AVBSFContext).
  */
 attribute_deprecated
 int av_bitstream_filter_filter(AVBitStreamFilterContext *bsfc,
                                AVCodecContext *avctx, const char *args,
                                uint8_t **poutbuf, int *poutbuf_size,
                                const uint8_t *buf, int buf_size, int keyframe);
-
 /**
- * Release bitstream filter context.
- *
- * @param bsf the bitstream filter context created with
- * av_bitstream_filter_init(), can be NULL
+ * @deprecated the old bitstream filtering API (using AVBitStreamFilterContext)
+ * is deprecated. Use av_bsf_free() from the new bitstream filtering API (using
+ * AVBSFContext).
  */
 attribute_deprecated
 void av_bitstream_filter_close(AVBitStreamFilterContext *bsf);
-
 /**
- * If f is NULL, return the first registered bitstream filter,
- * if f is non-NULL, return the next registered bitstream filter
- * after f, or NULL if f is the last one.
- *
- * This function can be used to iterate over all registered bitstream
- * filters.
+ * @deprecated the old bitstream filtering API (using AVBitStreamFilterContext)
+ * is deprecated. Use av_bsf_iterate() from the new bitstream filtering API (using
+ * AVBSFContext).
  */
 attribute_deprecated
-AVBitStreamFilter *av_bitstream_filter_next(const AVBitStreamFilter *f);
+const AVBitStreamFilter *av_bitstream_filter_next(const AVBitStreamFilter *f);
 #endif
 
 /**
@@ -6097,7 +5844,11 @@
  * @return the next registered bitstream filter or NULL when the iteration is
  *         finished
  */
+const AVBitStreamFilter *av_bsf_iterate(void **opaque);
+#if FF_API_NEXT
+attribute_deprecated
 const AVBitStreamFilter *av_bsf_next(void **opaque);
+#endif
 
 /**
  * Allocate a context for a given bitstream filter. The caller must fill in the
@@ -6163,6 +5914,11 @@
 int av_bsf_receive_packet(AVBSFContext *ctx, AVPacket *pkt);
 
 /**
+ * Reset the internal bitstream filter state / flush internal buffers.
+ */
+void av_bsf_flush(AVBSFContext *ctx);
+
+/**
  * Free a bitstream filter context and everything associated with it; write NULL
  * into the supplied pointer.
  */
@@ -6287,51 +6043,32 @@
  */
 unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
 
-#if FF_API_MISSING_SAMPLE
-/**
- * Log a generic warning message about a missing feature. This function is
- * intended to be used internally by FFmpeg (libavcodec, libavformat, etc.)
- * only, and would normally not be used by applications.
- * @param[in] avc a pointer to an arbitrary struct of which the first field is
- * a pointer to an AVClass struct
- * @param[in] feature string containing the name of the missing feature
- * @param[in] want_sample indicates if samples are wanted which exhibit this feature.
- * If want_sample is non-zero, additional verbiage will be added to the log
- * message which tells the user how to report samples to the development
- * mailing list.
- * @deprecated Use avpriv_report_missing_feature() instead.
- */
-attribute_deprecated
-void av_log_missing_feature(void *avc, const char *feature, int want_sample);
-
-/**
- * Log a generic warning message asking for a sample. This function is
- * intended to be used internally by FFmpeg (libavcodec, libavformat, etc.)
- * only, and would normally not be used by applications.
- * @param[in] avc a pointer to an arbitrary struct of which the first field is
- * a pointer to an AVClass struct
- * @param[in] msg string containing an optional message, or NULL if no message
- * @deprecated Use avpriv_request_sample() instead.
- */
-attribute_deprecated
-void av_log_ask_for_sample(void *avc, const char *msg, ...) av_printf_format(2, 3);
-#endif /* FF_API_MISSING_SAMPLE */
-
+#if FF_API_USER_VISIBLE_AVHWACCEL
 /**
  * Register the hardware accelerator hwaccel.
+ *
+ * @deprecated  This function doesn't do anything.
  */
+attribute_deprecated
 void av_register_hwaccel(AVHWAccel *hwaccel);
 
 /**
  * If hwaccel is NULL, returns the first registered hardware accelerator,
  * if hwaccel is non-NULL, returns the next registered hardware accelerator
  * after hwaccel, or NULL if hwaccel is the last one.
+ *
+ * @deprecated  AVHWaccel structures contain no user-serviceable parts, so
+ *              this function should not be used.
  */
+attribute_deprecated
 AVHWAccel *av_hwaccel_next(const AVHWAccel *hwaccel);
+#endif
 
-
+#if FF_API_LOCKMGR
 /**
  * Lock operation used by lockmgr
+ *
+ * @deprecated Deprecated together with av_lockmgr_register().
  */
 enum AVLockOp {
   AV_LOCK_CREATE,  ///< Create a mutex
@@ -6362,8 +6099,13 @@
  *           mechanism (i.e. do not use a single static object to
  *           implement your lock manager). If cb is set to NULL the
  *           lockmgr will be unregistered.
+ *
+ * @deprecated This function does nothing, and always returns 0. Be sure to
+ *             build with thread support to get basic thread safety.
  */
+attribute_deprecated
 int av_lockmgr_register(int (*cb)(void **mutex, enum AVLockOp op));
+#endif
 
 /**
  * Get the type of the given codec.

diff --git a/libavcodec/avdct.c b/libavcodec/avdct.c
index 80aca88..47e5f71 100644
--- a/libavcodec/avdct.c
+++ b/libavcodec/avdct.c

@@ -48,19 +48,10 @@
 {"simplemmx", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEMMX }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"arm", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_ARM }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"altivec", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_ALTIVEC }, INT_MIN, INT_MAX, V|E|D, "idct"},
-#if FF_API_ARCH_SH4
-{"sh4", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SH4 }, INT_MIN, INT_MAX, V|E|D, "idct"},
-#endif
 {"simplearm", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARM }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"simplearmv5te", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV5TE }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"simplearmv6", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV6 }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"simpleneon", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLENEON }, INT_MIN, INT_MAX, V|E|D, "idct"},
-#if FF_API_ARCH_ALPHA
-{"simplealpha", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEALPHA }, INT_MIN, INT_MAX, V|E|D, "idct"},
-#endif
-#if FF_API_UNUSED_MEMBERS
-{"ipp", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_IPP }, INT_MIN, INT_MAX, V|E|D, "idct"},
-#endif
 {"xvid", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVID }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"xvidmmx", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVID }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"faani", "floating point AAN IDCT (experimental / for debugging)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_FAAN }, INT_MIN, INT_MAX, V|D|E, "idct"},
@@ -132,8 +123,7 @@
     }
 #endif
 
-    avcodec_close(avctx);
-    av_free(avctx);
+    avcodec_free_context(&avctx);
 
     return 0;
 }

diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c
index d1f4ea9..e160ad3 100644
--- a/libavcodec/avpacket.c
+++ b/libavcodec/avpacket.c

@@ -375,6 +375,9 @@
     case AV_PKT_DATA_DISPLAYMATRIX:              return "Display Matrix";
     case AV_PKT_DATA_STEREO3D:                   return "Stereo 3D";
     case AV_PKT_DATA_AUDIO_SERVICE_TYPE:         return "Audio Service Type";
+    case AV_PKT_DATA_QUALITY_STATS:              return "Quality stats";
+    case AV_PKT_DATA_FALLBACK_TRACK:             return "Fallback track";
+    case AV_PKT_DATA_CPB_PROPERTIES:             return "CPB properties";
     case AV_PKT_DATA_SKIP_SAMPLES:               return "Skip Samples";
     case AV_PKT_DATA_JP_DUALMONO:                return "JP Dual Mono";
     case AV_PKT_DATA_STRINGS_METADATA:           return "Strings Metadata";
@@ -388,6 +391,9 @@
     case AV_PKT_DATA_CONTENT_LIGHT_LEVEL:        return "Content light level metadata";
     case AV_PKT_DATA_SPHERICAL:                  return "Spherical Mapping";
     case AV_PKT_DATA_A53_CC:                     return "A53 Closed Captions";
+    case AV_PKT_DATA_ENCRYPTION_INIT_INFO:       return "Encryption initialization data";
+    case AV_PKT_DATA_ENCRYPTION_INFO:            return "Encryption info";
+    case AV_PKT_DATA_AFD:                        return "Active Format Description data";
     }
     return NULL;
 }
@@ -479,34 +485,6 @@
 }
 #endif
 
-#if FF_API_MERGE_SD
-int ff_packet_split_and_drop_side_data(AVPacket *pkt){
-    if (!pkt->side_data_elems && pkt->size >12 && AV_RB64(pkt->data + pkt->size - 8) == FF_MERGE_MARKER){
-        int i;
-        unsigned int size;
-        uint8_t *p;
-
-        p = pkt->data + pkt->size - 8 - 5;
-        for (i=1; ; i++){
-            size = AV_RB32(p);
-            if (size>INT_MAX - 5 || p - pkt->data < size)
-                return 0;
-            if (p[4]&128)
-                break;
-            if (p - pkt->data < size + 5)
-                return 0;
-            p-= size+5;
-            if (i > AV_PKT_DATA_NB)
-                return 0;
-        }
-        pkt->size = p - pkt->data - size;
-        av_assert0(pkt->size >= 0);
-        return 1;
-    }
-    return 0;
-}
-#endif
-
 uint8_t *av_packet_pack_dictionary(AVDictionary *dict, int *size)
 {
     AVDictionaryEntry *t = NULL;
@@ -599,11 +577,13 @@
     dst->flags                = src->flags;
     dst->stream_index         = src->stream_index;
 
+    dst->side_data            = NULL;
+    dst->side_data_elems      = 0;
     for (i = 0; i < src->side_data_elems; i++) {
-         enum AVPacketSideDataType type = src->side_data[i].type;
-         int size          = src->side_data[i].size;
-         uint8_t *src_data = src->side_data[i].data;
-         uint8_t *dst_data = av_packet_new_side_data(dst, type, size);
+        enum AVPacketSideDataType type = src->side_data[i].type;
+        int size          = src->side_data[i].size;
+        uint8_t *src_data = src->side_data[i].data;
+        uint8_t *dst_data = av_packet_new_side_data(dst, type, size);
 
         if (!dst_data) {
             av_packet_free_side_data(dst);
@@ -678,6 +658,45 @@
     src->size = 0;
 }
 
+int av_packet_make_refcounted(AVPacket *pkt)
+{
+    int ret;
+
+    if (pkt->buf)
+        return 0;
+
+    ret = packet_alloc(&pkt->buf, pkt->size);
+    if (ret < 0)
+        return ret;
+    if (pkt->size)
+        memcpy(pkt->buf->data, pkt->data, pkt->size);
+
+    pkt->data = pkt->buf->data;
+
+    return 0;
+}
+
+int av_packet_make_writable(AVPacket *pkt)
+{
+    AVBufferRef *buf = NULL;
+    int ret;
+
+    if (pkt->buf && av_buffer_is_writable(pkt->buf))
+        return 0;
+
+    ret = packet_alloc(&buf, pkt->size);
+    if (ret < 0)
+        return ret;
+    if (pkt->size)
+        memcpy(buf->data, pkt->data, pkt->size);
+
+    av_buffer_unref(&pkt->buf);
+    pkt->buf  = buf;
+    pkt->data = buf->data;
+
+    return 0;
+}
+
 void av_packet_rescale_ts(AVPacket *pkt, AVRational src_tb, AVRational dst_tb)
 {
     if (pkt->pts != AV_NOPTS_VALUE)

diff --git a/libavcodec/avrndec.c b/libavcodec/avrndec.c
index c37f996..104ff2d 100644
--- a/libavcodec/avrndec.c
+++ b/libavcodec/avrndec.c

@@ -168,7 +168,6 @@
     .init           = init,
     .close          = end,
     .decode         = decode_frame,
-    .capabilities   = AV_CODEC_CAP_DR1,
     .max_lowres     = 3,
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
 };

diff --git a/libavcodec/avs2_parser.c b/libavcodec/avs2_parser.c
new file mode 100644
index 0000000..1c9b342
--- /dev/null
+++ b/libavcodec/avs2_parser.c

@@ -0,0 +1,95 @@
+/*
+ * AVS2-P2/IEEE1857.4 video parser.
+ * Copyright (c) 2018  Huiwen Ren <hwrenx@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "parser.h"
+
+#define SLICE_MAX_START_CODE    0x000001af
+
+#define ISPIC(x)  ((x) == 0xB3 || (x) == 0xB6)
+#define ISUNIT(x) ((x) == 0xB0 || (x) == 0xB1 || (x) == 0xB2 || ISPIC(x))
+
+static int avs2_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size)
+{
+    int pic_found  = pc->frame_start_found;
+    uint32_t state = pc->state;
+    int cur = 0;
+
+    if (!pic_found) {
+        for (; cur < buf_size; ++cur) {
+            state = (state<<8) | buf[cur];
+            if (ISUNIT(buf[cur])){
+                ++cur;
+                pic_found = 1;
+                break;
+            }
+        }
+    }
+
+    if (pic_found) {
+        if (!buf_size)
+            return END_NOT_FOUND;
+        for (; cur < buf_size; ++cur) {
+            state = (state << 8) | buf[cur];
+            if ((state & 0xFFFFFF00) == 0x100 && state > SLICE_MAX_START_CODE) {
+                pc->frame_start_found = 0;
+                pc->state = -1;
+                return cur - 3;
+            }
+        }
+    }
+
+    pc->frame_start_found = pic_found;
+    pc->state = state;
+
+    return END_NOT_FOUND;
+}
+
+static int avs2_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                      const uint8_t **poutbuf, int *poutbuf_size,
+                      const uint8_t *buf, int buf_size)
+{
+    ParseContext *pc = s->priv_data;
+    int next;
+
+    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES)  {
+        next = buf_size;
+    } else {
+        next = avs2_find_frame_end(pc, buf, buf_size);
+        if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
+            *poutbuf = NULL;
+            *poutbuf_size = 0;
+            return buf_size;
+        }
+    }
+
+    *poutbuf = buf;
+    *poutbuf_size = buf_size;
+
+    return next;
+}
+
+AVCodecParser ff_avs2_parser = {
+    .codec_ids      = { AV_CODEC_ID_AVS2 },
+    .priv_data_size = sizeof(ParseContext),
+    .parser_parse   = avs2_parse,
+    .parser_close   = ff_parse_close,
+    .split          = ff_mpeg4video_split,
+};

diff --git a/libavcodec/bink.c b/libavcodec/bink.c
index 346b6cd..9c17ded 100644
--- a/libavcodec/bink.c
+++ b/libavcodec/bink.c

@@ -371,11 +371,19 @@
 
 static int read_block_types(AVCodecContext *avctx, GetBitContext *gb, Bundle *b)
 {
+    BinkContext * const c = avctx->priv_data;
     int t, v;
     int last = 0;
     const uint8_t *dec_end;
 
     CHECK_READ_VAL(gb, b, t);
+    if (c->version == 'k') {
+        t ^= 0xBBu;
+        if (t == 0) {
+            b->cur_dec = NULL;
+            return 0;
+        }
+    }
     dec_end = b->cur_dec + t;
     if (dec_end > b->data_end) {
         av_log(avctx, AV_LOG_ERROR, "Too many block type values\n");
@@ -601,17 +609,16 @@
  * @param quant_matrices quantization matrices
  * @return 0 for success, negative value in other cases
  */
-static int read_dct_coeffs(GetBitContext *gb, int32_t block[64], const uint8_t *scan,
-                           const int32_t quant_matrices[16][64], int q)
+static int read_dct_coeffs(GetBitContext *gb, int32_t block[64],
+                           const uint8_t *scan, int *coef_count_,
+                           int coef_idx[64], int q)
 {
     int coef_list[128];
     int mode_list[128];
     int i, t, bits, ccoef, mode, sign;
     int list_start = 64, list_end = 64, list_pos;
     int coef_count = 0;
-    int coef_idx[64];
     int quant_idx;
-    const int32_t *quant;
 
     coef_list[list_end] = 4;  mode_list[list_end++] = 0;
     coef_list[list_end] = 24; mode_list[list_end++] = 0;
@@ -690,15 +697,21 @@
         }
     }
 
-    quant = quant_matrices[quant_idx];
+    *coef_count_ = coef_count;
 
+    return quant_idx;
+}
+
+static void unquantize_dct_coeffs(int32_t block[64], const int32_t quant[64],
+                                  int coef_count, int coef_idx[64],
+                                  const uint8_t *scan)
+{
+    int i;
     block[0] = (block[0] * quant[0]) >> 11;
     for (i = 0; i < coef_count; i++) {
         int idx = coef_idx[i];
         block[scan[idx]] = (block[scan[idx]] * quant[idx]) >> 11;
     }
-
-    return 0;
 }
 
 /**
@@ -817,7 +830,7 @@
     LOCAL_ALIGNED_16(int32_t, dctblock, [64]);
     int coordmap[64];
     int ybias = is_key ? -15 : 0;
-    int qp;
+    int qp, quant_idx, coef_count, coef_idx[64];
 
     const int stride = frame->linesize[plane_idx];
     int bw = is_chroma ? (c->avctx->width  + 15) >> 4 : (c->avctx->width  + 7) >> 3;
@@ -872,7 +885,9 @@
                 memset(dctblock, 0, sizeof(*dctblock) * 64);
                 dctblock[0] = binkb_get_value(c, BINKB_SRC_INTRA_DC);
                 qp = binkb_get_value(c, BINKB_SRC_INTRA_Q);
-                read_dct_coeffs(gb, dctblock, bink_scan, (const int32_t (*)[64])binkb_intra_quant, qp);
+                if ((quant_idx = read_dct_coeffs(gb, dctblock, bink_scan, &coef_count, coef_idx, qp)) < 0)
+                    return quant_idx;
+                unquantize_dct_coeffs(dctblock, binkb_intra_quant[quant_idx], coef_count, coef_idx, bink_scan);
                 c->binkdsp.idct_put(dst, stride, dctblock);
                 break;
             case 3:
@@ -905,7 +920,9 @@
                 memset(dctblock, 0, sizeof(*dctblock) * 64);
                 dctblock[0] = binkb_get_value(c, BINKB_SRC_INTER_DC);
                 qp = binkb_get_value(c, BINKB_SRC_INTER_Q);
-                read_dct_coeffs(gb, dctblock, bink_scan, (const int32_t (*)[64])binkb_inter_quant, qp);
+                if ((quant_idx = read_dct_coeffs(gb, dctblock, bink_scan, &coef_count, coef_idx, qp)) < 0)
+                    return quant_idx;
+                unquantize_dct_coeffs(dctblock, binkb_inter_quant[quant_idx], coef_count, coef_idx, bink_scan);
                 c->binkdsp.idct_add(dst, stride, dctblock);
                 break;
             case 5:
@@ -979,12 +996,23 @@
     LOCAL_ALIGNED_32(int16_t, block, [64]);
     LOCAL_ALIGNED_16(uint8_t, ublock, [64]);
     LOCAL_ALIGNED_16(int32_t, dctblock, [64]);
-    int coordmap[64];
+    int coordmap[64], quant_idx, coef_count, coef_idx[64];
 
     const int stride = frame->linesize[plane_idx];
     int bw = is_chroma ? (c->avctx->width  + 15) >> 4 : (c->avctx->width  + 7) >> 3;
     int bh = is_chroma ? (c->avctx->height + 15) >> 4 : (c->avctx->height + 7) >> 3;
     int width = c->avctx->width >> is_chroma;
+    int height = c->avctx->height >> is_chroma;
+
+    if (c->version == 'k' && get_bits1(gb)) {
+        int fill = get_bits(gb, 8);
+
+        dst = frame->data[plane_idx];
+
+        for (i = 0; i < height; i++)
+            memset(dst + i * stride, fill, width);
+        goto end;
+    }
 
     init_lengths(c, FFMAX(width, 8), bw);
     for (i = 0; i < BINK_NB_SRC; i++)
@@ -1065,7 +1093,9 @@
                 case INTRA_BLOCK:
                     memset(dctblock, 0, sizeof(*dctblock) * 64);
                     dctblock[0] = get_value(c, BINK_SRC_INTRA_DC);
-                    read_dct_coeffs(gb, dctblock, bink_scan, bink_intra_quant, -1);
+                    if ((quant_idx = read_dct_coeffs(gb, dctblock, bink_scan, &coef_count, coef_idx, -1)) < 0)
+                        return quant_idx;
+                    unquantize_dct_coeffs(dctblock, bink_intra_quant[quant_idx], coef_count, coef_idx, bink_scan);
                     c->binkdsp.idct_put(ublock, 8, dctblock);
                     break;
                 case FILL_BLOCK:
@@ -1138,7 +1168,9 @@
             case INTRA_BLOCK:
                 memset(dctblock, 0, sizeof(*dctblock) * 64);
                 dctblock[0] = get_value(c, BINK_SRC_INTRA_DC);
-                read_dct_coeffs(gb, dctblock, bink_scan, bink_intra_quant, -1);
+                if ((quant_idx = read_dct_coeffs(gb, dctblock, bink_scan, &coef_count, coef_idx, -1)) < 0)
+                    return quant_idx;
+                unquantize_dct_coeffs(dctblock, bink_intra_quant[quant_idx], coef_count, coef_idx, bink_scan);
                 c->binkdsp.idct_put(dst, stride, dctblock);
                 break;
             case FILL_BLOCK:
@@ -1152,7 +1184,9 @@
                     return ret;
                 memset(dctblock, 0, sizeof(*dctblock) * 64);
                 dctblock[0] = get_value(c, BINK_SRC_INTER_DC);
-                read_dct_coeffs(gb, dctblock, bink_scan, bink_inter_quant, -1);
+                if ((quant_idx = read_dct_coeffs(gb, dctblock, bink_scan, &coef_count, coef_idx, -1)) < 0)
+                    return quant_idx;
+                unquantize_dct_coeffs(dctblock, bink_inter_quant[quant_idx], coef_count, coef_idx, bink_scan);
                 c->binkdsp.idct_add(dst, stride, dctblock);
                 break;
             case PATTERN_BLOCK:
@@ -1175,6 +1209,8 @@
             }
         }
     }
+
+end:
     if (get_bits_count(gb) & 0x1F) //next plane data starts at 32-bit boundary
         skip_bits_long(gb, 32 - (get_bits_count(gb) & 0x1F));
 
@@ -1307,6 +1343,7 @@
         return ret;
 
     avctx->pix_fmt = c->has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P;
+    avctx->color_range = c->version == 'k' ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
 
     ff_blockdsp_init(&c->bdsp, avctx);
     ff_hpeldsp_init(&c->hdsp, avctx->flags);

diff --git a/libavcodec/bintext.c b/libavcodec/bintext.c
index 90bbe67..d85f2c2 100644
--- a/libavcodec/bintext.c
+++ b/libavcodec/bintext.c

@@ -35,6 +35,8 @@
 #include "bintext.h"
 #include "internal.h"
 
+#define FONT_WIDTH 8
+
 typedef struct XbinContext {
     AVFrame *frame;
     int palette[16];
@@ -91,10 +93,8 @@
             break;
         }
     }
-
-    s->frame = av_frame_alloc();
-    if (!s->frame)
-        return AVERROR(ENOMEM);
+    if (avctx->width < FONT_WIDTH || avctx->height < s->font_height)
+        return AVERROR_INVALIDDATA;
 
     return 0;
 }
@@ -113,8 +113,6 @@
     }
 }
 
-#define FONT_WIDTH 8
-
 /**
  * Draw character to screen
  */
@@ -143,8 +141,12 @@
     const uint8_t *buf_end = buf+buf_size;
     int ret;
 
+    if ((avctx->width / FONT_WIDTH) * (avctx->height / s->font_height) / 256 > buf_size)
+        return AVERROR_INVALIDDATA;
+
+    s->frame = data;
     s->x = s->y = 0;
-    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
+    if ((ret = ff_get_buffer(avctx, s->frame, 0)) < 0)
         return ret;
     s->frame->pict_type           = AV_PICTURE_TYPE_I;
     s->frame->palette_has_changed = 1;
@@ -202,21 +204,10 @@
         }
     }
 
-    if ((ret = av_frame_ref(data, s->frame)) < 0)
-        return ret;
     *got_frame      = 1;
     return buf_size;
 }
 
-static av_cold int decode_end(AVCodecContext *avctx)
-{
-    XbinContext *s = avctx->priv_data;
-
-    av_frame_free(&s->frame);
-
-    return 0;
-}
-
 #if CONFIG_BINTEXT_DECODER
 AVCodec ff_bintext_decoder = {
     .name           = "bintext",
@@ -225,7 +216,6 @@
     .id             = AV_CODEC_ID_BINTEXT,
     .priv_data_size = sizeof(XbinContext),
     .init           = decode_init,
-    .close          = decode_end,
     .decode         = decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1,
 };
@@ -238,7 +228,6 @@
     .id             = AV_CODEC_ID_XBIN,
     .priv_data_size = sizeof(XbinContext),
     .init           = decode_init,
-    .close          = decode_end,
     .decode         = decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1,
 };
@@ -251,7 +240,6 @@
     .id             = AV_CODEC_ID_IDF,
     .priv_data_size = sizeof(XbinContext),
     .init           = decode_init,
-    .close          = decode_end,
     .decode         = decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1,
 };

diff --git a/libavcodec/bit_depth_template.c b/libavcodec/bit_depth_template.c
index 8018489..d44d47e 100644
--- a/libavcodec/bit_depth_template.c
+++ b/libavcodec/bit_depth_template.c

@@ -29,6 +29,7 @@
 #   undef pixel2
 #   undef pixel4
 #   undef dctcoef
+#   undef idctin
 #   undef INIT_CLIP
 #   undef no_rnd_avg_pixel4
 #   undef rnd_avg_pixel4
@@ -53,6 +54,16 @@
 #   define pixel4 uint64_t
 #   define dctcoef int32_t
 
+#ifdef IN_IDCT_DEPTH
+#if IN_IDCT_DEPTH == 32
+#   define idctin int32_t
+#else
+#   define idctin int16_t
+#endif
+#else
+#   define idctin int16_t
+#endif
+
 #   define INIT_CLIP
 #   define no_rnd_avg_pixel4 no_rnd_avg64
 #   define    rnd_avg_pixel4    rnd_avg64
@@ -71,6 +82,7 @@
 #   define pixel2 uint16_t
 #   define pixel4 uint32_t
 #   define dctcoef int16_t
+#   define idctin  int16_t
 
 #   define INIT_CLIP
 #   define no_rnd_avg_pixel4 no_rnd_avg32
@@ -87,7 +99,10 @@
 #   define CLIP(a) av_clip_uint8(a)
 #endif
 
-#define FUNC3(a, b, c)  a ## _ ## b ## c
+#define FUNC3(a, b, c)  a ## _ ## b ##  c
 #define FUNC2(a, b, c)  FUNC3(a, b, c)
 #define FUNC(a)  FUNC2(a, BIT_DEPTH,)
 #define FUNCC(a) FUNC2(a, BIT_DEPTH, _c)
+#define FUNC4(a, b, c)  a ## _int ## b ## _ ## c ## bit
+#define FUNC5(a, b, c)  FUNC4(a, b, c)
+#define FUNC6(a)  FUNC5(a, IN_IDCT_DEPTH, BIT_DEPTH)

diff --git a/libavcodec/bitstream_filter.c b/libavcodec/bitstream_filter.c
index 8599b90..ca11ed3 100644
--- a/libavcodec/bitstream_filter.c
+++ b/libavcodec/bitstream_filter.c

@@ -28,15 +28,15 @@
 #if FF_API_OLD_BSF
 FF_DISABLE_DEPRECATION_WARNINGS
 
-AVBitStreamFilter *av_bitstream_filter_next(const AVBitStreamFilter *f)
+const AVBitStreamFilter *av_bitstream_filter_next(const AVBitStreamFilter *f)
 {
     const AVBitStreamFilter *filter = NULL;
     void *opaque = NULL;
 
     while (filter != f)
-        filter = av_bsf_next(&opaque);
+        filter = av_bsf_iterate(&opaque);
 
-    return av_bsf_next(&opaque);
+    return av_bsf_iterate(&opaque);
 }
 
 void av_register_bitstream_filter(AVBitStreamFilter *bsf)
@@ -131,7 +131,7 @@
             return ret;
     }
 
-    pkt.data = buf;
+    pkt.data = (uint8_t *)buf;
     pkt.size = buf_size;
 
     ret = av_bsf_send_packet(priv->ctx, &pkt);

diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
index ce34de6..96b1746 100644
--- a/libavcodec/bitstream_filters.c
+++ b/libavcodec/bitstream_filters.c

@@ -25,29 +25,39 @@
 #include "bsf.h"
 
 extern const AVBitStreamFilter ff_aac_adtstoasc_bsf;
+extern const AVBitStreamFilter ff_av1_metadata_bsf;
 extern const AVBitStreamFilter ff_chomp_bsf;
 extern const AVBitStreamFilter ff_dump_extradata_bsf;
 extern const AVBitStreamFilter ff_dca_core_bsf;
+extern const AVBitStreamFilter ff_eac3_core_bsf;
 extern const AVBitStreamFilter ff_extract_extradata_bsf;
+extern const AVBitStreamFilter ff_filter_units_bsf;
+extern const AVBitStreamFilter ff_h264_metadata_bsf;
 extern const AVBitStreamFilter ff_h264_mp4toannexb_bsf;
+extern const AVBitStreamFilter ff_h264_redundant_pps_bsf;
+extern const AVBitStreamFilter ff_hapqa_extract_bsf;
+extern const AVBitStreamFilter ff_hevc_metadata_bsf;
 extern const AVBitStreamFilter ff_hevc_mp4toannexb_bsf;
 extern const AVBitStreamFilter ff_imx_dump_header_bsf;
 extern const AVBitStreamFilter ff_mjpeg2jpeg_bsf;
 extern const AVBitStreamFilter ff_mjpega_dump_header_bsf;
 extern const AVBitStreamFilter ff_mp3_header_decompress_bsf;
+extern const AVBitStreamFilter ff_mpeg2_metadata_bsf;
 extern const AVBitStreamFilter ff_mpeg4_unpack_bframes_bsf;
 extern const AVBitStreamFilter ff_mov2textsub_bsf;
 extern const AVBitStreamFilter ff_noise_bsf;
 extern const AVBitStreamFilter ff_null_bsf;
 extern const AVBitStreamFilter ff_remove_extradata_bsf;
 extern const AVBitStreamFilter ff_text2movsub_bsf;
+extern const AVBitStreamFilter ff_trace_headers_bsf;
+extern const AVBitStreamFilter ff_vp9_metadata_bsf;
 extern const AVBitStreamFilter ff_vp9_raw_reorder_bsf;
 extern const AVBitStreamFilter ff_vp9_superframe_bsf;
 extern const AVBitStreamFilter ff_vp9_superframe_split_bsf;
 
 #include "libavcodec/bsf_list.c"
 
-const AVBitStreamFilter *av_bsf_next(void **opaque)
+const AVBitStreamFilter *av_bsf_iterate(void **opaque)
 {
     uintptr_t i = (uintptr_t)*opaque;
     const AVBitStreamFilter *f = bitstream_filters[i];
@@ -58,12 +68,21 @@
     return f;
 }
 
+#if FF_API_NEXT
+const AVBitStreamFilter *av_bsf_next(void **opaque) {
+    return av_bsf_iterate(opaque);
+}
+#endif
+
 const AVBitStreamFilter *av_bsf_get_by_name(const char *name)
 {
-    int i;
+    const AVBitStreamFilter *f = NULL;
+    void *i = 0;
 
-    for (i = 0; bitstream_filters[i]; i++) {
-        const AVBitStreamFilter *f = bitstream_filters[i];
+    if (!name)
+        return NULL;
+
+    while ((f = av_bsf_iterate(&i))) {
         if (!strcmp(f->name, name))
             return f;
     }
@@ -73,19 +92,20 @@
 
 const AVClass *ff_bsf_child_class_next(const AVClass *prev)
 {
-    int i;
+    const AVBitStreamFilter *f = NULL;
+    void *i = 0;
 
     /* find the filter that corresponds to prev */
-    for (i = 0; prev && bitstream_filters[i]; i++) {
-        if (bitstream_filters[i]->priv_class == prev) {
-            i++;
+    while (prev && (f = av_bsf_iterate(&i))) {
+        if (f->priv_class == prev) {
             break;
         }
     }
 
     /* find next filter with priv options */
-    for (; bitstream_filters[i]; i++)
-        if (bitstream_filters[i]->priv_class)
-            return bitstream_filters[i]->priv_class;
+    while ((f = av_bsf_iterate(&i))) {
+        if (f->priv_class)
+            return f->priv_class;
+    }
     return NULL;
 }

diff --git a/libavcodec/blockdsp.h b/libavcodec/blockdsp.h
index 6e27a02..26fc2ea 100644
--- a/libavcodec/blockdsp.h
+++ b/libavcodec/blockdsp.h

@@ -33,8 +33,8 @@
                              uint8_t value, ptrdiff_t line_size, int h);
 
 typedef struct BlockDSPContext {
-    void (*clear_block)(int16_t *block /* align 16 */);
-    void (*clear_blocks)(int16_t *blocks /* align 16 */);
+    void (*clear_block)(int16_t *block /* align 32 */);
+    void (*clear_blocks)(int16_t *blocks /* align 32 */);
 
     op_fill_func fill_block_tab[2];
 } BlockDSPContext;

diff --git a/libavcodec/bsf.c b/libavcodec/bsf.c
index 38b4231..03841da 100644
--- a/libavcodec/bsf.c
+++ b/libavcodec/bsf.c

@@ -172,8 +172,20 @@
     return 0;
 }
 
+void av_bsf_flush(AVBSFContext *ctx)
+{
+    ctx->internal->eof = 0;
+
+    av_packet_unref(ctx->internal->buffer_pkt);
+
+    if (ctx->filter->flush)
+        ctx->filter->flush(ctx);
+}
+
 int av_bsf_send_packet(AVBSFContext *ctx, AVPacket *pkt)
 {
+    int ret;
+
     if (!pkt || (!pkt->data && !pkt->side_data_elems)) {
         ctx->internal->eof = 1;
         return 0;
@@ -188,6 +200,9 @@
         ctx->internal->buffer_pkt->side_data_elems)
         return AVERROR(EAGAIN);
 
+    ret = av_packet_make_refcounted(pkt);
+    if (ret < 0)
+        return ret;
     av_packet_move_ref(ctx->internal->buffer_pkt, pkt);
 
     return 0;

diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
index dd2b057..e51139d 100644
--- a/libavcodec/cabac.c
+++ b/libavcodec/cabac.c

@@ -32,7 +32,7 @@
 #include "cabac.h"
 #include "cabac_functions.h"
 
-const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63] = {
+DECLARE_ASM_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 + 4*64 + 63] = {
     9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
     4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
     3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,

diff --git a/libavcodec/cabac_functions.h b/libavcodec/cabac_functions.h
index fe72a82..bb2b421 100644
--- a/libavcodec/cabac_functions.h
+++ b/libavcodec/cabac_functions.h

@@ -45,6 +45,9 @@
 #if ARCH_X86
 #   include "x86/cabac.h"
 #endif
+#if ARCH_MIPS
+#   include "mips/cabac.h"
+#endif
 
 static const uint8_t * const ff_h264_norm_shift = ff_h264_cabac_tables + H264_NORM_SHIFT_OFFSET;
 static const uint8_t * const ff_h264_lps_range = ff_h264_cabac_tables + H264_LPS_RANGE_OFFSET;

diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c
index 06c7527..c7fff67 100644
--- a/libavcodec/cavsdec.c
+++ b/libavcodec/cavsdec.c

@@ -1067,6 +1067,11 @@
     if (!h->loop_filter_disable && get_bits1(&h->gb)) {
         h->alpha_offset        = get_se_golomb(&h->gb);
         h->beta_offset         = get_se_golomb(&h->gb);
+        if (   h->alpha_offset < -64 || h->alpha_offset > 64
+            || h-> beta_offset < -64 || h-> beta_offset > 64) {
+            h->alpha_offset = h->beta_offset  = 0;
+            return AVERROR_INVALIDDATA;
+        }
     } else {
         h->alpha_offset = h->beta_offset  = 0;
     }

diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c
new file mode 100644
index 0000000..ecbf57c
--- /dev/null
+++ b/libavcodec/cbs.c

@@ -0,0 +1,667 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+
+#include "config.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/buffer.h"
+#include "libavutil/common.h"
+
+#include "cbs.h"
+#include "cbs_internal.h"
+
+
+static const CodedBitstreamType *cbs_type_table[] = {
+#if CONFIG_CBS_AV1
+    &ff_cbs_type_av1,
+#endif
+#if CONFIG_CBS_H264
+    &ff_cbs_type_h264,
+#endif
+#if CONFIG_CBS_H265
+    &ff_cbs_type_h265,
+#endif
+#if CONFIG_CBS_JPEG
+    &ff_cbs_type_jpeg,
+#endif
+#if CONFIG_CBS_MPEG2
+    &ff_cbs_type_mpeg2,
+#endif
+#if CONFIG_CBS_VP9
+    &ff_cbs_type_vp9,
+#endif
+};
+
+const enum AVCodecID ff_cbs_all_codec_ids[] = {
+#if CONFIG_CBS_AV1
+    AV_CODEC_ID_AV1,
+#endif
+#if CONFIG_CBS_H264
+    AV_CODEC_ID_H264,
+#endif
+#if CONFIG_CBS_H265
+    AV_CODEC_ID_H265,
+#endif
+#if CONFIG_CBS_JPEG
+    AV_CODEC_ID_MJPEG,
+#endif
+#if CONFIG_CBS_MPEG2
+    AV_CODEC_ID_MPEG2VIDEO,
+#endif
+#if CONFIG_CBS_VP9
+    AV_CODEC_ID_VP9,
+#endif
+    AV_CODEC_ID_NONE
+};
+
+int ff_cbs_init(CodedBitstreamContext **ctx_ptr,
+                enum AVCodecID codec_id, void *log_ctx)
+{
+    CodedBitstreamContext *ctx;
+    const CodedBitstreamType *type;
+    int i;
+
+    type = NULL;
+    for (i = 0; i < FF_ARRAY_ELEMS(cbs_type_table); i++) {
+        if (cbs_type_table[i]->codec_id == codec_id) {
+            type = cbs_type_table[i];
+            break;
+        }
+    }
+    if (!type)
+        return AVERROR(EINVAL);
+
+    ctx = av_mallocz(sizeof(*ctx));
+    if (!ctx)
+        return AVERROR(ENOMEM);
+
+    ctx->log_ctx = log_ctx;
+    ctx->codec   = type;
+
+    ctx->priv_data = av_mallocz(ctx->codec->priv_data_size);
+    if (!ctx->priv_data) {
+        av_freep(&ctx);
+        return AVERROR(ENOMEM);
+    }
+
+    ctx->decompose_unit_types = NULL;
+
+    ctx->trace_enable = 0;
+    ctx->trace_level  = AV_LOG_TRACE;
+
+    *ctx_ptr = ctx;
+    return 0;
+}
+
+void ff_cbs_close(CodedBitstreamContext **ctx_ptr)
+{
+    CodedBitstreamContext *ctx = *ctx_ptr;
+
+    if (!ctx)
+        return;
+
+    if (ctx->codec && ctx->codec->close)
+        ctx->codec->close(ctx);
+
+    av_freep(&ctx->priv_data);
+    av_freep(ctx_ptr);
+}
+
+static void cbs_unit_uninit(CodedBitstreamContext *ctx,
+                            CodedBitstreamUnit *unit)
+{
+    av_buffer_unref(&unit->content_ref);
+    unit->content = NULL;
+
+    av_buffer_unref(&unit->data_ref);
+    unit->data             = NULL;
+    unit->data_size        = 0;
+    unit->data_bit_padding = 0;
+}
+
+void ff_cbs_fragment_uninit(CodedBitstreamContext *ctx,
+                            CodedBitstreamFragment *frag)
+{
+    int i;
+
+    for (i = 0; i < frag->nb_units; i++)
+        cbs_unit_uninit(ctx, &frag->units[i]);
+    av_freep(&frag->units);
+    frag->nb_units = 0;
+
+    av_buffer_unref(&frag->data_ref);
+    frag->data             = NULL;
+    frag->data_size        = 0;
+    frag->data_bit_padding = 0;
+}
+
+static int cbs_read_fragment_content(CodedBitstreamContext *ctx,
+                                     CodedBitstreamFragment *frag)
+{
+    int err, i, j;
+
+    for (i = 0; i < frag->nb_units; i++) {
+        CodedBitstreamUnit *unit = &frag->units[i];
+
+        if (ctx->decompose_unit_types) {
+            for (j = 0; j < ctx->nb_decompose_unit_types; j++) {
+                if (ctx->decompose_unit_types[j] == unit->type)
+                    break;
+            }
+            if (j >= ctx->nb_decompose_unit_types)
+                continue;
+        }
+
+        av_buffer_unref(&unit->content_ref);
+        unit->content = NULL;
+
+        av_assert0(unit->data && unit->data_ref);
+
+        err = ctx->codec->read_unit(ctx, unit);
+        if (err == AVERROR(ENOSYS)) {
+            av_log(ctx->log_ctx, AV_LOG_VERBOSE,
+                   "Decomposition unimplemented for unit %d "
+                   "(type %"PRIu32").\n", i, unit->type);
+        } else if (err < 0) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to read unit %d "
+                   "(type %"PRIu32").\n", i, unit->type);
+            return err;
+        }
+    }
+
+    return 0;
+}
+
+static int cbs_fill_fragment_data(CodedBitstreamContext *ctx,
+                                  CodedBitstreamFragment *frag,
+                                  const uint8_t *data, size_t size)
+{
+    av_assert0(!frag->data && !frag->data_ref);
+
+    frag->data_ref =
+        av_buffer_alloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!frag->data_ref)
+        return AVERROR(ENOMEM);
+
+    frag->data      = frag->data_ref->data;
+    frag->data_size = size;
+
+    memcpy(frag->data, data, size);
+    memset(frag->data + size, 0,
+           AV_INPUT_BUFFER_PADDING_SIZE);
+
+    return 0;
+}
+
+int ff_cbs_read_extradata(CodedBitstreamContext *ctx,
+                          CodedBitstreamFragment *frag,
+                          const AVCodecParameters *par)
+{
+    int err;
+
+    memset(frag, 0, sizeof(*frag));
+
+    err = cbs_fill_fragment_data(ctx, frag, par->extradata,
+                                 par->extradata_size);
+    if (err < 0)
+        return err;
+
+    err = ctx->codec->split_fragment(ctx, frag, 1);
+    if (err < 0)
+        return err;
+
+    return cbs_read_fragment_content(ctx, frag);
+}
+
+int ff_cbs_read_packet(CodedBitstreamContext *ctx,
+                       CodedBitstreamFragment *frag,
+                       const AVPacket *pkt)
+{
+    int err;
+
+    memset(frag, 0, sizeof(*frag));
+
+    if (pkt->buf) {
+        frag->data_ref = av_buffer_ref(pkt->buf);
+        if (!frag->data_ref)
+            return AVERROR(ENOMEM);
+
+        frag->data      = pkt->data;
+        frag->data_size = pkt->size;
+
+    } else {
+        err = cbs_fill_fragment_data(ctx, frag, pkt->data, pkt->size);
+        if (err < 0)
+            return err;
+    }
+
+    err = ctx->codec->split_fragment(ctx, frag, 0);
+    if (err < 0)
+        return err;
+
+    return cbs_read_fragment_content(ctx, frag);
+}
+
+int ff_cbs_read(CodedBitstreamContext *ctx,
+                CodedBitstreamFragment *frag,
+                const uint8_t *data, size_t size)
+{
+    int err;
+
+    memset(frag, 0, sizeof(*frag));
+
+    err = cbs_fill_fragment_data(ctx, frag, data, size);
+    if (err < 0)
+        return err;
+
+    err = ctx->codec->split_fragment(ctx, frag, 0);
+    if (err < 0)
+        return err;
+
+    return cbs_read_fragment_content(ctx, frag);
+}
+
+
+int ff_cbs_write_fragment_data(CodedBitstreamContext *ctx,
+                               CodedBitstreamFragment *frag)
+{
+    int err, i;
+
+    for (i = 0; i < frag->nb_units; i++) {
+        CodedBitstreamUnit *unit = &frag->units[i];
+
+        if (!unit->content)
+            continue;
+
+        av_buffer_unref(&unit->data_ref);
+        unit->data = NULL;
+
+        err = ctx->codec->write_unit(ctx, unit);
+        if (err < 0) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to write unit %d "
+                   "(type %"PRIu32").\n", i, unit->type);
+            return err;
+        }
+        av_assert0(unit->data && unit->data_ref);
+    }
+
+    av_buffer_unref(&frag->data_ref);
+    frag->data = NULL;
+
+    err = ctx->codec->assemble_fragment(ctx, frag);
+    if (err < 0) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to assemble fragment.\n");
+        return err;
+    }
+    av_assert0(frag->data && frag->data_ref);
+
+    return 0;
+}
+
+int ff_cbs_write_extradata(CodedBitstreamContext *ctx,
+                           AVCodecParameters *par,
+                           CodedBitstreamFragment *frag)
+{
+    int err;
+
+    err = ff_cbs_write_fragment_data(ctx, frag);
+    if (err < 0)
+        return err;
+
+    av_freep(&par->extradata);
+
+    par->extradata = av_malloc(frag->data_size +
+                               AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!par->extradata)
+        return AVERROR(ENOMEM);
+
+    memcpy(par->extradata, frag->data, frag->data_size);
+    memset(par->extradata + frag->data_size, 0,
+           AV_INPUT_BUFFER_PADDING_SIZE);
+    par->extradata_size = frag->data_size;
+
+    return 0;
+}
+
+int ff_cbs_write_packet(CodedBitstreamContext *ctx,
+                        AVPacket *pkt,
+                        CodedBitstreamFragment *frag)
+{
+    AVBufferRef *buf;
+    int err;
+
+    err = ff_cbs_write_fragment_data(ctx, frag);
+    if (err < 0)
+        return err;
+
+    buf = av_buffer_ref(frag->data_ref);
+    if (!buf)
+        return AVERROR(ENOMEM);
+
+    av_init_packet(pkt);
+    pkt->buf  = buf;
+    pkt->data = frag->data;
+    pkt->size = frag->data_size;
+
+    return 0;
+}
+
+
+void ff_cbs_trace_header(CodedBitstreamContext *ctx,
+                         const char *name)
+{
+    if (!ctx->trace_enable)
+        return;
+
+    av_log(ctx->log_ctx, ctx->trace_level, "%s\n", name);
+}
+
+void ff_cbs_trace_syntax_element(CodedBitstreamContext *ctx, int position,
+                                 const char *str, const int *subscripts,
+                                 const char *bits, int64_t value)
+{
+    char name[256];
+    size_t name_len, bits_len;
+    int pad, subs, i, j, k, n;
+
+    if (!ctx->trace_enable)
+        return;
+
+    av_assert0(value >= INT_MIN && value <= UINT32_MAX);
+
+    subs = subscripts ? subscripts[0] : 0;
+    n = 0;
+    for (i = j = 0; str[i];) {
+        if (str[i] == '[') {
+            if (n < subs) {
+                ++n;
+                k = snprintf(name + j, sizeof(name) - j, "[%d", subscripts[n]);
+                av_assert0(k > 0 && j + k < sizeof(name));
+                j += k;
+                for (++i; str[i] && str[i] != ']'; i++);
+                av_assert0(str[i] == ']');
+            } else {
+                while (str[i] && str[i] != ']')
+                    name[j++] = str[i++];
+                av_assert0(str[i] == ']');
+            }
+        } else {
+            av_assert0(j + 1 < sizeof(name));
+            name[j++] = str[i++];
+        }
+    }
+    av_assert0(j + 1 < sizeof(name));
+    name[j] = 0;
+    av_assert0(n == subs);
+
+    name_len = strlen(name);
+    bits_len = strlen(bits);
+
+    if (name_len + bits_len > 60)
+        pad = bits_len + 2;
+    else
+        pad = 61 - name_len;
+
+    av_log(ctx->log_ctx, ctx->trace_level, "%-10d  %s%*s = %"PRId64"\n",
+           position, name, pad, bits, value);
+}
+
+int ff_cbs_read_unsigned(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                         int width, const char *name,
+                         const int *subscripts, uint32_t *write_to,
+                         uint32_t range_min, uint32_t range_max)
+{
+    uint32_t value;
+    int position;
+
+    av_assert0(width > 0 && width <= 32);
+
+    if (get_bits_left(gbc) < width) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid value at "
+               "%s: bitstream ended.\n", name);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    value = get_bits_long(gbc, width);
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < width; i++)
+            bits[i] = value >> (width - i - 1) & 1 ? '1' : '0';
+        bits[i] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, position, name, subscripts,
+                                    bits, value);
+    }
+
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [%"PRIu32",%"PRIu32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    *write_to = value;
+    return 0;
+}
+
+int ff_cbs_write_unsigned(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                          int width, const char *name,
+                          const int *subscripts, uint32_t value,
+                          uint32_t range_min, uint32_t range_max)
+{
+    av_assert0(width > 0 && width <= 32);
+
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [%"PRIu32",%"PRIu32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (put_bits_left(pbc) < width)
+        return AVERROR(ENOSPC);
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < width; i++)
+            bits[i] = value >> (width - i - 1) & 1 ? '1' : '0';
+        bits[i] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc),
+                                    name, subscripts, bits, value);
+    }
+
+    if (width < 32)
+        put_bits(pbc, width, value);
+    else
+        put_bits32(pbc, value);
+
+    return 0;
+}
+
+
+int ff_cbs_alloc_unit_content(CodedBitstreamContext *ctx,
+                              CodedBitstreamUnit *unit,
+                              size_t size,
+                              void (*free)(void *opaque, uint8_t *data))
+{
+    av_assert0(!unit->content && !unit->content_ref);
+
+    unit->content = av_mallocz(size);
+    if (!unit->content)
+        return AVERROR(ENOMEM);
+
+    unit->content_ref = av_buffer_create(unit->content, size,
+                                         free, ctx, 0);
+    if (!unit->content_ref) {
+        av_freep(&unit->content);
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+int ff_cbs_alloc_unit_data(CodedBitstreamContext *ctx,
+                           CodedBitstreamUnit *unit,
+                           size_t size)
+{
+    av_assert0(!unit->data && !unit->data_ref);
+
+    unit->data_ref = av_buffer_alloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!unit->data_ref)
+        return AVERROR(ENOMEM);
+
+    unit->data      = unit->data_ref->data;
+    unit->data_size = size;
+
+    memset(unit->data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+    return 0;
+}
+
+static int cbs_insert_unit(CodedBitstreamContext *ctx,
+                           CodedBitstreamFragment *frag,
+                           int position)
+{
+    CodedBitstreamUnit *units;
+
+    units = av_malloc_array(frag->nb_units + 1, sizeof(*units));
+    if (!units)
+        return AVERROR(ENOMEM);
+
+    if (position > 0)
+        memcpy(units, frag->units, position * sizeof(*units));
+    if (position < frag->nb_units)
+        memcpy(units + position + 1, frag->units + position,
+               (frag->nb_units - position) * sizeof(*units));
+
+    memset(units + position, 0, sizeof(*units));
+
+    av_freep(&frag->units);
+    frag->units = units;
+    ++frag->nb_units;
+
+    return 0;
+}
+
+int ff_cbs_insert_unit_content(CodedBitstreamContext *ctx,
+                               CodedBitstreamFragment *frag,
+                               int position,
+                               CodedBitstreamUnitType type,
+                               void *content,
+                               AVBufferRef *content_buf)
+{
+    CodedBitstreamUnit *unit;
+    AVBufferRef *content_ref;
+    int err;
+
+    if (position == -1)
+        position = frag->nb_units;
+    av_assert0(position >= 0 && position <= frag->nb_units);
+
+    if (content_buf) {
+        content_ref = av_buffer_ref(content_buf);
+        if (!content_ref)
+            return AVERROR(ENOMEM);
+    } else {
+        content_ref = NULL;
+    }
+
+    err = cbs_insert_unit(ctx, frag, position);
+    if (err < 0) {
+        av_buffer_unref(&content_ref);
+        return err;
+    }
+
+    unit = &frag->units[position];
+    unit->type        = type;
+    unit->content     = content;
+    unit->content_ref = content_ref;
+
+    return 0;
+}
+
+int ff_cbs_insert_unit_data(CodedBitstreamContext *ctx,
+                            CodedBitstreamFragment *frag,
+                            int position,
+                            CodedBitstreamUnitType type,
+                            uint8_t *data, size_t data_size,
+                            AVBufferRef *data_buf)
+{
+    CodedBitstreamUnit *unit;
+    AVBufferRef *data_ref;
+    int err;
+
+    if (position == -1)
+        position = frag->nb_units;
+    av_assert0(position >= 0 && position <= frag->nb_units);
+
+    if (data_buf)
+        data_ref = av_buffer_ref(data_buf);
+    else
+        data_ref = av_buffer_create(data, data_size, NULL, NULL, 0);
+    if (!data_ref)
+        return AVERROR(ENOMEM);
+
+    err = cbs_insert_unit(ctx, frag, position);
+    if (err < 0) {
+        av_buffer_unref(&data_ref);
+        return err;
+    }
+
+    unit = &frag->units[position];
+    unit->type      = type;
+    unit->data      = data;
+    unit->data_size = data_size;
+    unit->data_ref  = data_ref;
+
+    return 0;
+}
+
+int ff_cbs_delete_unit(CodedBitstreamContext *ctx,
+                       CodedBitstreamFragment *frag,
+                       int position)
+{
+    if (position < 0 || position >= frag->nb_units)
+        return AVERROR(EINVAL);
+
+    cbs_unit_uninit(ctx, &frag->units[position]);
+
+    --frag->nb_units;
+
+    if (frag->nb_units == 0) {
+        av_freep(&frag->units);
+
+    } else {
+        memmove(frag->units + position,
+                frag->units + position + 1,
+                (frag->nb_units - position) * sizeof(*frag->units));
+
+        // Don't bother reallocating the unit array.
+    }
+
+    return 0;
+}

diff --git a/libavcodec/cbs.h b/libavcodec/cbs.h
new file mode 100644
index 0000000..53ac360
--- /dev/null
+++ b/libavcodec/cbs.h

@@ -0,0 +1,356 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CBS_H
+#define AVCODEC_CBS_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/buffer.h"
+
+#include "avcodec.h"
+
+
+/*
+ * This defines a framework for converting between a coded bitstream
+ * and structures defining all individual syntax elements found in
+ * such a stream.
+ *
+ * Conversion in both directions is possible.  Given a coded bitstream
+ * (any meaningful fragment), it can be parsed and decomposed into
+ * syntax elements stored in a set of codec-specific structures.
+ * Similarly, given a set of those same codec-specific structures the
+ * syntax elements can be serialised and combined to create a coded
+ * bitstream.
+ */
+
+struct CodedBitstreamType;
+
+/**
+ * The codec-specific type of a bitstream unit.
+ *
+ * H.264 / AVC: nal_unit_type
+ * H.265 / HEVC: nal_unit_type
+ * MPEG-2: start code value (without prefix)
+ * VP9: unused, set to zero (every unit is a frame)
+ */
+typedef uint32_t CodedBitstreamUnitType;
+
+/**
+ * Coded bitstream unit structure.
+ *
+ * A bitstream unit the smallest element of a bitstream which
+ * is meaningful on its own.  For example, an H.264 NAL unit.
+ *
+ * See the codec-specific header for the meaning of this for any
+ * particular codec.
+ */
+typedef struct CodedBitstreamUnit {
+    /**
+     * Codec-specific type of this unit.
+     */
+    CodedBitstreamUnitType type;
+
+    /**
+     * Pointer to the directly-parsable bitstream form of this unit.
+     *
+     * May be NULL if the unit currently only exists in decomposed form.
+     */
+    uint8_t *data;
+    /**
+     * The number of bytes in the bitstream (including any padding bits
+     * in the final byte).
+     */
+    size_t   data_size;
+    /**
+     * The number of bits which should be ignored in the final byte.
+     *
+     * This supports non-byte-aligned bitstreams.
+     */
+    size_t   data_bit_padding;
+    /**
+     * A reference to the buffer containing data.
+     *
+     * Must be set if data is not NULL.
+     */
+    AVBufferRef *data_ref;
+
+    /**
+     * Pointer to the decomposed form of this unit.
+     *
+     * The type of this structure depends on both the codec and the
+     * type of this unit.  May be NULL if the unit only exists in
+     * bitstream form.
+     */
+    void *content;
+    /**
+     * If content is reference counted, a reference to the buffer containing
+     * content.  Null if content is not reference counted.
+     */
+    AVBufferRef *content_ref;
+} CodedBitstreamUnit;
+
+/**
+ * Coded bitstream fragment structure, combining one or more units.
+ *
+ * This is any sequence of units.  It need not form some greater whole,
+ * though in many cases it will.  For example, an H.264 access unit,
+ * which is composed of a sequence of H.264 NAL units.
+ */
+typedef struct CodedBitstreamFragment {
+    /**
+     * Pointer to the bitstream form of this fragment.
+     *
+     * May be NULL if the fragment only exists as component units.
+     */
+    uint8_t *data;
+    /**
+     * The number of bytes in the bitstream.
+     *
+     * The number of bytes in the bitstream (including any padding bits
+     * in the final byte).
+     */
+    size_t   data_size;
+    /**
+     * The number of bits which should be ignored in the final byte.
+     */
+    size_t data_bit_padding;
+    /**
+     * A reference to the buffer containing data.
+     *
+     * Must be set if data is not NULL.
+     */
+    AVBufferRef *data_ref;
+
+    /**
+     * Number of units in this fragment.
+     *
+     * This may be zero if the fragment only exists in bitstream form
+     * and has not been decomposed.
+     */
+    int              nb_units;
+    /**
+     * Pointer to an array of units of length nb_units.
+     *
+     * Must be NULL if nb_units is zero.
+     */
+    CodedBitstreamUnit *units;
+} CodedBitstreamFragment;
+
+/**
+ * Context structure for coded bitstream operations.
+ */
+typedef struct CodedBitstreamContext {
+    /**
+     * Logging context to be passed to all av_log() calls associated
+     * with this context.
+     */
+    void *log_ctx;
+
+    /**
+     * Internal codec-specific hooks.
+     */
+    const struct CodedBitstreamType *codec;
+
+    /**
+     * Internal codec-specific data.
+     *
+     * This contains any information needed when reading/writing
+     * bitsteams which will not necessarily be present in a fragment.
+     * For example, for H.264 it contains all currently visible
+     * parameter sets - they are required to determine the bitstream
+     * syntax but need not be present in every access unit.
+     */
+    void *priv_data;
+
+    /**
+     * Array of unit types which should be decomposed when reading.
+     *
+     * Types not in this list will be available in bitstream form only.
+     * If NULL, all supported types will be decomposed.
+     */
+    CodedBitstreamUnitType *decompose_unit_types;
+    /**
+     * Length of the decompose_unit_types array.
+     */
+    int nb_decompose_unit_types;
+
+    /**
+     * Enable trace output during read/write operations.
+     */
+    int trace_enable;
+    /**
+     * Log level to use for trace output.
+     *
+     * From AV_LOG_*; defaults to AV_LOG_TRACE.
+     */
+    int trace_level;
+} CodedBitstreamContext;
+
+
+/**
+ * Table of all supported codec IDs.
+ *
+ * Terminated by AV_CODEC_ID_NONE.
+ */
+extern const enum AVCodecID ff_cbs_all_codec_ids[];
+
+
+/**
+ * Create and initialise a new context for the given codec.
+ */
+int ff_cbs_init(CodedBitstreamContext **ctx,
+                enum AVCodecID codec_id, void *log_ctx);
+
+/**
+ * Close a context and free all internal state.
+ */
+void ff_cbs_close(CodedBitstreamContext **ctx);
+
+
+/**
+ * Read the extradata bitstream found in codec parameters into a
+ * fragment, then split into units and decompose.
+ *
+ * This also updates the internal state, so will need to be called for
+ * codecs with extradata to read parameter sets necessary for further
+ * parsing even if the fragment itself is not desired.
+ */
+int ff_cbs_read_extradata(CodedBitstreamContext *ctx,
+                          CodedBitstreamFragment *frag,
+                          const AVCodecParameters *par);
+
+/**
+ * Read the data bitstream from a packet into a fragment, then
+ * split into units and decompose.
+ *
+ * This also updates the internal state of the coded bitstream context
+ * with any persistent data from the fragment which may be required to
+ * read following fragments (e.g. parameter sets).
+ */
+int ff_cbs_read_packet(CodedBitstreamContext *ctx,
+                       CodedBitstreamFragment *frag,
+                       const AVPacket *pkt);
+
+/**
+ * Read a bitstream from a memory region into a fragment, then
+ * split into units and decompose.
+ *
+ * This also updates the internal state of the coded bitstream context
+ * with any persistent data from the fragment which may be required to
+ * read following fragments (e.g. parameter sets).
+ */
+int ff_cbs_read(CodedBitstreamContext *ctx,
+                CodedBitstreamFragment *frag,
+                const uint8_t *data, size_t size);
+
+
+/**
+ * Write the content of the fragment to its own internal buffer.
+ *
+ * Writes the content of all units and then assembles them into a new
+ * data buffer.  When modifying the content of decomposed units, this
+ * can be used to regenerate the bitstream form of units or the whole
+ * fragment so that it can be extracted for other use.
+ *
+ * This also updates the internal state of the coded bitstream context
+ * with any persistent data from the fragment which may be required to
+ * write following fragments (e.g. parameter sets).
+ */
+int ff_cbs_write_fragment_data(CodedBitstreamContext *ctx,
+                               CodedBitstreamFragment *frag);
+
+/**
+ * Write the bitstream of a fragment to the extradata in codec parameters.
+ *
+ * This replaces any existing extradata in the structure.
+ */
+int ff_cbs_write_extradata(CodedBitstreamContext *ctx,
+                           AVCodecParameters *par,
+                           CodedBitstreamFragment *frag);
+
+/**
+ * Write the bitstream of a fragment to a packet.
+ */
+int ff_cbs_write_packet(CodedBitstreamContext *ctx,
+                        AVPacket *pkt,
+                        CodedBitstreamFragment *frag);
+
+
+/**
+ * Free all allocated memory in a fragment.
+ */
+void ff_cbs_fragment_uninit(CodedBitstreamContext *ctx,
+                            CodedBitstreamFragment *frag);
+
+
+/**
+ * Allocate a new internal content buffer of the given size in the unit.
+ *
+ * The content will be zeroed.
+ */
+int ff_cbs_alloc_unit_content(CodedBitstreamContext *ctx,
+                              CodedBitstreamUnit *unit,
+                              size_t size,
+                              void (*free)(void *unit, uint8_t *content));
+
+/**
+ * Allocate a new internal data buffer of the given size in the unit.
+ *
+ * The data buffer will have input padding.
+ */
+int ff_cbs_alloc_unit_data(CodedBitstreamContext *ctx,
+                           CodedBitstreamUnit *unit,
+                           size_t size);
+
+/**
+ * Insert a new unit into a fragment with the given content.
+ *
+ * The content structure continues to be owned by the caller if
+ * content_buf is not supplied.
+ */
+int ff_cbs_insert_unit_content(CodedBitstreamContext *ctx,
+                               CodedBitstreamFragment *frag,
+                               int position,
+                               CodedBitstreamUnitType type,
+                               void *content,
+                               AVBufferRef *content_buf);
+
+/**
+ * Insert a new unit into a fragment with the given data bitstream.
+ *
+ * If data_buf is not supplied then data must have been allocated with
+ * av_malloc() and will become owned by the unit after this call.
+ */
+int ff_cbs_insert_unit_data(CodedBitstreamContext *ctx,
+                            CodedBitstreamFragment *frag,
+                            int position,
+                            CodedBitstreamUnitType type,
+                            uint8_t *data, size_t data_size,
+                            AVBufferRef *data_buf);
+
+/**
+ * Delete a unit from a fragment and free all memory it uses.
+ */
+int ff_cbs_delete_unit(CodedBitstreamContext *ctx,
+                       CodedBitstreamFragment *frag,
+                       int position);
+
+
+#endif /* AVCODEC_CBS_H */

diff --git a/libavcodec/cbs_av1.c b/libavcodec/cbs_av1.c
new file mode 100644
index 0000000..9bac9dd
--- /dev/null
+++ b/libavcodec/cbs_av1.c

@@ -0,0 +1,1320 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/pixfmt.h"
+
+#include "cbs.h"
+#include "cbs_internal.h"
+#include "cbs_av1.h"
+#include "internal.h"
+
+
+static int cbs_av1_read_uvlc(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                             const char *name, uint32_t *write_to,
+                             uint32_t range_min, uint32_t range_max)
+{
+    uint32_t value;
+    int position, zeroes, i, j;
+    char bits[65];
+
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    zeroes = i = 0;
+    while (1) {
+        if (get_bits_left(gbc) < zeroes + 1) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid uvlc code at "
+                   "%s: bitstream ended.\n", name);
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (get_bits1(gbc)) {
+            bits[i++] = '1';
+            break;
+        } else {
+            bits[i++] = '0';
+            ++zeroes;
+        }
+    }
+
+    if (zeroes >= 32) {
+        value = MAX_UINT_BITS(32);
+    } else {
+        value = get_bits_long(gbc, zeroes);
+
+        for (j = 0; j < zeroes; j++)
+            bits[i++] = (value >> (zeroes - j - 1) & 1) ? '1' : '0';
+
+        value += (1 << zeroes) - 1;
+    }
+
+    if (ctx->trace_enable) {
+        bits[i] = 0;
+        ff_cbs_trace_syntax_element(ctx, position, name, NULL,
+                                    bits, value);
+    }
+
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [%"PRIu32",%"PRIu32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    *write_to = value;
+    return 0;
+}
+
+static int cbs_av1_write_uvlc(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                              const char *name, uint32_t value,
+                              uint32_t range_min, uint32_t range_max)
+{
+    uint32_t v;
+    int position, zeroes;
+
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [%"PRIu32",%"PRIu32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (ctx->trace_enable)
+        position = put_bits_count(pbc);
+
+    if (value == 0) {
+        zeroes = 0;
+        put_bits(pbc, 1, 1);
+    } else {
+        zeroes = av_log2(value + 1);
+        v = value - (1 << zeroes) + 1;
+        put_bits(pbc, zeroes + 1, 1);
+        put_bits(pbc, zeroes, v);
+    }
+
+    if (ctx->trace_enable) {
+        char bits[65];
+        int i, j;
+        i = 0;
+        for (j = 0; j < zeroes; j++)
+            bits[i++] = '0';
+        bits[i++] = '1';
+        for (j = 0; j < zeroes; j++)
+            bits[i++] = (v >> (zeroes - j - 1) & 1) ? '1' : '0';
+        bits[i++] = 0;
+        ff_cbs_trace_syntax_element(ctx, position, name, NULL,
+                                    bits, value);
+    }
+
+    return 0;
+}
+
+static int cbs_av1_read_leb128(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                               const char *name, uint64_t *write_to)
+{
+    uint64_t value;
+    int position, err, i;
+
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    value = 0;
+    for (i = 0; i < 8; i++) {
+        int subscript[2] = { 1, i };
+        uint32_t byte;
+        err = ff_cbs_read_unsigned(ctx, gbc, 8, "leb128_byte[i]", subscript,
+                                   &byte, 0x00, 0xff);
+        if (err < 0)
+            return err;
+
+        value |= (uint64_t)(byte & 0x7f) << (i * 7);
+        if (!(byte & 0x80))
+            break;
+    }
+
+    if (ctx->trace_enable)
+        ff_cbs_trace_syntax_element(ctx, position, name, NULL, "", value);
+
+    *write_to = value;
+    return 0;
+}
+
+static int cbs_av1_write_leb128(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                                const char *name, uint64_t value)
+{
+    int position, err, len, i;
+    uint8_t byte;
+
+    len = (av_log2(value) + 7) / 7;
+
+    if (ctx->trace_enable)
+        position = put_bits_count(pbc);
+
+    for (i = 0; i < len; i++) {
+        int subscript[2] = { 1, i };
+
+        byte = value >> (7 * i) & 0x7f;
+        if (i < len - 1)
+            byte |= 0x80;
+
+        err = ff_cbs_write_unsigned(ctx, pbc, 8, "leb128_byte[i]", subscript,
+                                    byte, 0x00, 0xff);
+        if (err < 0)
+            return err;
+    }
+
+    if (ctx->trace_enable)
+        ff_cbs_trace_syntax_element(ctx, position, name, NULL, "", value);
+
+    return 0;
+}
+
+static int cbs_av1_read_su(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                           int width, const char *name,
+                           const int *subscripts, int32_t *write_to)
+{
+    uint32_t magnitude;
+    int position, sign;
+    int32_t value;
+
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    if (get_bits_left(gbc) < width + 1) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid signed value at "
+               "%s: bitstream ended.\n", name);
+        return AVERROR_INVALIDDATA;
+    }
+
+    magnitude = get_bits(gbc, width);
+    sign      = get_bits1(gbc);
+    value     = sign ? -(int32_t)magnitude : magnitude;
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < width; i++)
+            bits[i] = magnitude >> (width - i - 1) & 1 ? '1' : '0';
+        bits[i] = sign ? '1' : '0';
+        bits[i + 1] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, position,
+                                    name, subscripts, bits, value);
+    }
+
+    *write_to = value;
+    return 0;
+}
+
+static int cbs_av1_write_su(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                            int width, const char *name,
+                            const int *subscripts, int32_t value)
+{
+    uint32_t magnitude;
+    int sign;
+
+    if (put_bits_left(pbc) < width + 1)
+        return AVERROR(ENOSPC);
+
+    sign      = value < 0;
+    magnitude = sign ? -value : value;
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < width; i++)
+            bits[i] = magnitude >> (width - i - 1) & 1 ? '1' : '0';
+        bits[i] = sign ? '1' : '0';
+        bits[i + 1] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc),
+                                    name, subscripts, bits, value);
+    }
+
+    put_bits(pbc, width, magnitude);
+    put_bits(pbc, 1, sign);
+
+    return 0;
+}
+
+static int cbs_av1_read_ns(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                           uint32_t n, const char *name,
+                           const int *subscripts, uint32_t *write_to)
+{
+    uint32_t w, m, v, extra_bit, value;
+    int position;
+
+    av_assert0(n > 0);
+
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    w = av_log2(n) + 1;
+    m = (1 << w) - n;
+
+    if (get_bits_left(gbc) < w) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid non-symmetric value at "
+               "%s: bitstream ended.\n", name);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (w - 1 > 0)
+        v = get_bits(gbc, w - 1);
+    else
+        v = 0;
+
+    if (v < m) {
+        value = v;
+    } else {
+        extra_bit = get_bits1(gbc);
+        value = (v << 1) - m + extra_bit;
+    }
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < w - 1; i++)
+            bits[i] = (v >> i & 1) ? '1' : '0';
+        if (v >= m)
+            bits[i++] = extra_bit ? '1' : '0';
+        bits[i] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, position,
+                                    name, subscripts, bits, value);
+    }
+
+    *write_to = value;
+    return 0;
+}
+
+static int cbs_av1_write_ns(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                            uint32_t n, const char *name,
+                            const int *subscripts, uint32_t value)
+{
+    uint32_t w, m, v, extra_bit;
+    int position;
+
+    if (value > n) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [0,%"PRIu32"].\n",
+               name, value, n);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (ctx->trace_enable)
+        position = put_bits_count(pbc);
+
+    w = av_log2(n) + 1;
+    m = (1 << w) - n;
+
+    if (put_bits_left(pbc) < w)
+        return AVERROR(ENOSPC);
+
+    if (value < m) {
+        v = value;
+        put_bits(pbc, w - 1, v);
+    } else {
+        v = m + ((value - m) >> 1);
+        extra_bit = (value - m) & 1;
+        put_bits(pbc, w - 1, v);
+        put_bits(pbc, 1, extra_bit);
+    }
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < w - 1; i++)
+            bits[i] = (v >> i & 1) ? '1' : '0';
+        if (value >= m)
+            bits[i++] = extra_bit ? '1' : '0';
+        bits[i] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, position,
+                                    name, subscripts, bits, value);
+    }
+
+    return 0;
+}
+
+static int cbs_av1_read_increment(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                                  uint32_t range_min, uint32_t range_max,
+                                  const char *name, uint32_t *write_to)
+{
+    uint32_t value;
+    int position, i;
+    char bits[33];
+
+    av_assert0(range_min <= range_max && range_max - range_min < sizeof(bits) - 1);
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    for (i = 0, value = range_min; value < range_max;) {
+        if (get_bits_left(gbc) < 1) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid increment value at "
+                   "%s: bitstream ended.\n", name);
+            return AVERROR_INVALIDDATA;
+        }
+        if (get_bits1(gbc)) {
+            bits[i++] = '1';
+            ++value;
+        } else {
+            bits[i++] = '0';
+            break;
+        }
+    }
+
+    if (ctx->trace_enable) {
+        bits[i] = 0;
+        ff_cbs_trace_syntax_element(ctx, position,
+                                    name, NULL, bits, value);
+    }
+
+    *write_to = value;
+    return 0;
+}
+
+static int cbs_av1_write_increment(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                                   uint32_t range_min, uint32_t range_max,
+                                   const char *name, uint32_t value)
+{
+    int len;
+
+    av_assert0(range_min <= range_max && range_max - range_min < 32);
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [%"PRIu32",%"PRIu32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (value == range_max)
+        len = range_max - range_min;
+    else
+        len = value - range_min + 1;
+    if (put_bits_left(pbc) < len)
+        return AVERROR(ENOSPC);
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < len; i++) {
+            if (range_min + i == value)
+                bits[i] = '0';
+            else
+                bits[i] = '1';
+        }
+        bits[i] = 0;
+        ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc),
+                                    name, NULL, bits, value);
+    }
+
+    if (len > 0)
+        put_bits(pbc, len, (1 << len) - 1 - (value != range_max));
+
+    return 0;
+}
+
+static int cbs_av1_read_subexp(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                               uint32_t range_max, const char *name,
+                               const int *subscripts, uint32_t *write_to)
+{
+    uint32_t value;
+    int position, err;
+    uint32_t max_len, len, range_offset, range_bits;
+
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    av_assert0(range_max > 0);
+    max_len = av_log2(range_max - 1) - 3;
+
+    err = cbs_av1_read_increment(ctx, gbc, 0, max_len,
+                                 "subexp_more_bits", &len);
+    if (err < 0)
+        return err;
+
+    if (len) {
+        range_bits   = 2 + len;
+        range_offset = 1 << range_bits;
+    } else {
+        range_bits   = 3;
+        range_offset = 0;
+    }
+
+    if (len < max_len) {
+        err = ff_cbs_read_unsigned(ctx, gbc, range_bits,
+                                   "subexp_bits", NULL, &value,
+                                   0, MAX_UINT_BITS(range_bits));
+        if (err < 0)
+            return err;
+
+    } else {
+        err = cbs_av1_read_ns(ctx, gbc, range_max - range_offset,
+                              "subexp_final_bits", NULL, &value);
+        if (err < 0)
+            return err;
+    }
+    value += range_offset;
+
+    if (ctx->trace_enable)
+        ff_cbs_trace_syntax_element(ctx, position,
+                                    name, subscripts, "", value);
+
+    *write_to = value;
+    return err;
+}
+
+static int cbs_av1_write_subexp(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                                uint32_t range_max, const char *name,
+                                const int *subscripts, uint32_t value)
+{
+    int position, err;
+    uint32_t max_len, len, range_offset, range_bits;
+
+    if (value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [0,%"PRIu32"].\n",
+               name, value, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (ctx->trace_enable)
+        position = put_bits_count(pbc);
+
+    av_assert0(range_max > 0);
+    max_len = av_log2(range_max - 1) - 3;
+
+    if (value < 8) {
+        range_bits   = 3;
+        range_offset = 0;
+        len = 0;
+    } else {
+        range_bits = av_log2(value);
+        len = range_bits - 2;
+        if (len > max_len) {
+            // The top bin is combined with the one below it.
+            av_assert0(len == max_len + 1);
+            --range_bits;
+            len = max_len;
+        }
+        range_offset = 1 << range_bits;
+    }
+
+    err = cbs_av1_write_increment(ctx, pbc, 0, max_len,
+                                  "subexp_more_bits", len);
+    if (err < 0)
+        return err;
+
+    if (len < max_len) {
+        err = ff_cbs_write_unsigned(ctx, pbc, range_bits,
+                                    "subexp_bits", NULL,
+                                    value - range_offset,
+                                    0, MAX_UINT_BITS(range_bits));
+        if (err < 0)
+            return err;
+
+    } else {
+        err = cbs_av1_write_ns(ctx, pbc, range_max - range_offset,
+                               "subexp_final_bits", NULL,
+                               value - range_offset);
+        if (err < 0)
+            return err;
+    }
+
+    if (ctx->trace_enable)
+        ff_cbs_trace_syntax_element(ctx, position,
+                                    name, subscripts, "", value);
+
+    return err;
+}
+
+
+static int cbs_av1_tile_log2(int blksize, int target)
+{
+    int k;
+    for (k = 0; (blksize << k) < target; k++);
+    return k;
+}
+
+static int cbs_av1_get_relative_dist(const AV1RawSequenceHeader *seq,
+                                     unsigned int a, unsigned int b)
+{
+    unsigned int diff, m;
+    if (!seq->enable_order_hint)
+        return 0;
+    diff = a - b;
+    m = 1 << seq->order_hint_bits_minus_1;
+    diff = (diff & (m - 1)) - (diff & m);
+    return diff;
+}
+
+
+#define HEADER(name) do { \
+        ff_cbs_trace_header(ctx, name); \
+    } while (0)
+
+#define CHECK(call) do { \
+        err = (call); \
+        if (err < 0) \
+            return err; \
+    } while (0)
+
+#define FUNC_NAME(rw, codec, name) cbs_ ## codec ## _ ## rw ## _ ## name
+#define FUNC_AV1(rw, name) FUNC_NAME(rw, av1, name)
+#define FUNC(name) FUNC_AV1(READWRITE, name)
+
+#define SUBSCRIPTS(subs, ...) (subs > 0 ? ((int[subs + 1]){ subs, __VA_ARGS__ }) : NULL)
+
+#define fb(width, name) \
+        xf(width, name, current->name, 0, MAX_UINT_BITS(width), 0)
+#define fc(width, name, range_min, range_max) \
+        xf(width, name, current->name, range_min, range_max, 0)
+#define flag(name) fb(1, name)
+#define su(width, name) \
+        xsu(width, name, current->name, 0)
+
+#define fbs(width, name, subs, ...) \
+        xf(width, name, current->name, 0, MAX_UINT_BITS(width), subs, __VA_ARGS__)
+#define fcs(width, name, range_min, range_max, subs, ...) \
+        xf(width, name, current->name, range_min, range_max, subs, __VA_ARGS__)
+#define flags(name, subs, ...) \
+        xf(1, name, current->name, 0, 1, subs, __VA_ARGS__)
+#define sus(width, name, subs, ...) \
+        xsu(width, name, current->name, subs, __VA_ARGS__)
+
+#define fixed(width, name, value) do { \
+        av_unused uint32_t fixed_value = value; \
+        xf(width, name, fixed_value, value, value, 0); \
+    } while (0)
+
+
+#define READ
+#define READWRITE read
+#define RWContext GetBitContext
+
+#define xf(width, name, var, range_min, range_max, subs, ...) do { \
+        uint32_t value = range_min; \
+        CHECK(ff_cbs_read_unsigned(ctx, rw, width, #name, \
+                                   SUBSCRIPTS(subs, __VA_ARGS__), \
+                                   &value, range_min, range_max)); \
+        var = value; \
+    } while (0)
+
+#define xsu(width, name, var, subs, ...) do { \
+        int32_t value = 0; \
+        CHECK(cbs_av1_read_su(ctx, rw, width, #name, \
+                              SUBSCRIPTS(subs, __VA_ARGS__), &value)); \
+        var = value; \
+    } while (0)
+
+#define uvlc(name, range_min, range_max) do { \
+        uint32_t value = range_min; \
+        CHECK(cbs_av1_read_uvlc(ctx, rw, #name, \
+                                &value, range_min, range_max)); \
+        current->name = value; \
+    } while (0)
+
+#define ns(max_value, name, subs, ...) do { \
+        uint32_t value = 0; \
+        CHECK(cbs_av1_read_ns(ctx, rw, max_value, #name, \
+                              SUBSCRIPTS(subs, __VA_ARGS__), &value)); \
+        current->name = value; \
+    } while (0)
+
+#define increment(name, min, max) do { \
+        uint32_t value = 0; \
+        CHECK(cbs_av1_read_increment(ctx, rw, min, max, #name, &value)); \
+        current->name = value; \
+    } while (0)
+
+#define subexp(name, max, subs, ...) do { \
+        uint32_t value = 0; \
+        CHECK(cbs_av1_read_subexp(ctx, rw, max, #name, \
+                                  SUBSCRIPTS(subs, __VA_ARGS__), &value)); \
+        current->name = value; \
+    } while (0)
+
+#define delta_q(name) do { \
+        uint8_t delta_coded; \
+        int8_t delta_q; \
+        xf(1, name.delta_coded, delta_coded, 0, 1, 0); \
+        if (delta_coded) \
+            xsu(1 + 6, name.delta_q, delta_q, 0); \
+        else \
+            delta_q = 0; \
+        current->name = delta_q; \
+    } while (0)
+
+#define leb128(name) do { \
+        uint64_t value = 0; \
+        CHECK(cbs_av1_read_leb128(ctx, rw, #name, &value)); \
+        current->name = value; \
+    } while (0)
+
+#define infer(name, value) do { \
+        current->name = value; \
+    } while (0)
+
+#define byte_alignment(rw) (get_bits_count(rw) % 8)
+
+#include "cbs_av1_syntax_template.c"
+
+#undef READ
+#undef READWRITE
+#undef RWContext
+#undef xf
+#undef xsu
+#undef uvlc
+#undef leb128
+#undef ns
+#undef increment
+#undef subexp
+#undef delta_q
+#undef leb128
+#undef infer
+#undef byte_alignment
+
+
+#define WRITE
+#define READWRITE write
+#define RWContext PutBitContext
+
+#define xf(width, name, var, range_min, range_max, subs, ...) do { \
+        CHECK(ff_cbs_write_unsigned(ctx, rw, width, #name, \
+                                    SUBSCRIPTS(subs, __VA_ARGS__), \
+                                    var, range_min, range_max)); \
+    } while (0)
+
+#define xsu(width, name, var, subs, ...) do { \
+        CHECK(cbs_av1_write_su(ctx, rw, width, #name, \
+                               SUBSCRIPTS(subs, __VA_ARGS__), var)); \
+    } while (0)
+
+#define uvlc(name, range_min, range_max) do { \
+        CHECK(cbs_av1_write_uvlc(ctx, rw, #name, current->name, \
+                                 range_min, range_max)); \
+    } while (0)
+
+#define ns(max_value, name, subs, ...) do { \
+        CHECK(cbs_av1_write_ns(ctx, rw, max_value, #name, \
+                               SUBSCRIPTS(subs, __VA_ARGS__), \
+                               current->name)); \
+    } while (0)
+
+#define increment(name, min, max) do { \
+        CHECK(cbs_av1_write_increment(ctx, rw, min, max, #name, \
+                                      current->name)); \
+    } while (0)
+
+#define subexp(name, max, subs, ...) do { \
+        CHECK(cbs_av1_write_subexp(ctx, rw, max, #name, \
+                                   SUBSCRIPTS(subs, __VA_ARGS__), \
+                                   current->name)); \
+    } while (0)
+
+#define delta_q(name) do { \
+        xf(1, name.delta_coded, current->name != 0, 0, 1, 0); \
+        if (current->name) \
+            xsu(1 + 6, name.delta_q, current->name, 0); \
+    } while (0)
+
+#define leb128(name) do { \
+        CHECK(cbs_av1_write_leb128(ctx, rw, #name, current->name)); \
+    } while (0)
+
+#define infer(name, value) do { \
+        if (current->name != (value)) { \
+            av_log(ctx->log_ctx, AV_LOG_WARNING, "Warning: " \
+                   "%s does not match inferred value: " \
+                   "%"PRId64", but should be %"PRId64".\n", \
+                   #name, (int64_t)current->name, (int64_t)(value)); \
+        } \
+    } while (0)
+
+#define byte_alignment(rw) (put_bits_count(rw) % 8)
+
+#include "cbs_av1_syntax_template.c"
+
+#undef READ
+#undef READWRITE
+#undef RWContext
+#undef xf
+#undef xsu
+#undef uvlc
+#undef leb128
+#undef ns
+#undef increment
+#undef subexp
+#undef delta_q
+#undef infer
+#undef byte_alignment
+
+
+static int cbs_av1_split_fragment(CodedBitstreamContext *ctx,
+                                  CodedBitstreamFragment *frag,
+                                  int header)
+{
+    GetBitContext gbc;
+    uint8_t *data;
+    size_t size;
+    uint64_t obu_length;
+    int pos, err, trace;
+
+    // Don't include this parsing in trace output.
+    trace = ctx->trace_enable;
+    ctx->trace_enable = 0;
+
+    data = frag->data;
+    size = frag->data_size;
+
+    if (INT_MAX / 8 < size) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid fragment: "
+               "too large (%zu bytes).\n", size);
+        err = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    while (size > 0) {
+        AV1RawOBUHeader header;
+        uint64_t obu_size;
+
+        init_get_bits(&gbc, data, 8 * size);
+
+        err = cbs_av1_read_obu_header(ctx, &gbc, &header);
+        if (err < 0)
+            goto fail;
+
+        if (!header.obu_has_size_field) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid OBU for raw "
+                   "stream: size field must be present.\n");
+            err = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        if (get_bits_left(&gbc) < 8) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid OBU: fragment "
+                   "too short (%zu bytes).\n", size);
+            err = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        err = cbs_av1_read_leb128(ctx, &gbc, "obu_size", &obu_size);
+        if (err < 0)
+            goto fail;
+
+        pos = get_bits_count(&gbc);
+        av_assert0(pos % 8 == 0 && pos / 8 <= size);
+
+        obu_length = pos / 8 + obu_size;
+
+        if (size < obu_length) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid OBU length: "
+                   "%"PRIu64", but only %zu bytes remaining in fragment.\n",
+                   obu_length, size);
+            err = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        err = ff_cbs_insert_unit_data(ctx, frag, -1, header.obu_type,
+                                      data, obu_length, frag->data_ref);
+        if (err < 0)
+            goto fail;
+
+        data += obu_length;
+        size -= obu_length;
+    }
+
+    err = 0;
+fail:
+    ctx->trace_enable = trace;
+    return err;
+}
+
+static void cbs_av1_free_tile_data(AV1RawTileData *td)
+{
+    av_buffer_unref(&td->data_ref);
+}
+
+static void cbs_av1_free_metadata(AV1RawMetadata *md)
+{
+    switch (md->metadata_type) {
+    case AV1_METADATA_TYPE_ITUT_T35:
+        av_buffer_unref(&md->metadata.itut_t35.payload_ref);
+        break;
+    }
+}
+
+static void cbs_av1_free_obu(void *unit, uint8_t *content)
+{
+    AV1RawOBU *obu = (AV1RawOBU*)content;
+
+    switch (obu->header.obu_type) {
+    case AV1_OBU_TILE_GROUP:
+        cbs_av1_free_tile_data(&obu->obu.tile_group.tile_data);
+        break;
+    case AV1_OBU_FRAME:
+        cbs_av1_free_tile_data(&obu->obu.frame.tile_group.tile_data);
+        break;
+    case AV1_OBU_TILE_LIST:
+        cbs_av1_free_tile_data(&obu->obu.tile_list.tile_data);
+        break;
+    case AV1_OBU_METADATA:
+        cbs_av1_free_metadata(&obu->obu.metadata);
+        break;
+    }
+
+    av_freep(&obu);
+}
+
+static int cbs_av1_ref_tile_data(CodedBitstreamContext *ctx,
+                                 CodedBitstreamUnit *unit,
+                                 GetBitContext *gbc,
+                                 AV1RawTileData *td)
+{
+    int pos;
+
+    pos = get_bits_count(gbc);
+    if (pos >= 8 * unit->data_size) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Bitstream ended before "
+               "any data in tile group (%d bits read).\n", pos);
+        return AVERROR_INVALIDDATA;
+    }
+    // Must be byte-aligned at this point.
+    av_assert0(pos % 8 == 0);
+
+    td->data_ref = av_buffer_ref(unit->data_ref);
+    if (!td->data_ref)
+        return AVERROR(ENOMEM);
+
+    td->data      = unit->data      + pos / 8;
+    td->data_size = unit->data_size - pos / 8;
+
+    return 0;
+}
+
+static int cbs_av1_read_unit(CodedBitstreamContext *ctx,
+                             CodedBitstreamUnit *unit)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    AV1RawOBU *obu;
+    GetBitContext gbc;
+    int err, start_pos, end_pos;
+
+    err = ff_cbs_alloc_unit_content(ctx, unit, sizeof(*obu),
+                                    &cbs_av1_free_obu);
+    if (err < 0)
+        return err;
+    obu = unit->content;
+
+    err = init_get_bits(&gbc, unit->data, 8 * unit->data_size);
+    if (err < 0)
+        return err;
+
+    err = cbs_av1_read_obu_header(ctx, &gbc, &obu->header);
+    if (err < 0)
+        return err;
+    av_assert0(obu->header.obu_type == unit->type);
+
+    if (obu->header.obu_has_size_field) {
+        uint64_t obu_size;
+        err = cbs_av1_read_leb128(ctx, &gbc, "obu_size", &obu_size);
+        if (err < 0)
+            return err;
+        obu->obu_size = obu_size;
+    } else {
+        if (unit->data_size < 1 + obu->header.obu_extension_flag) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid OBU length: "
+                   "unit too short (%zu).\n", unit->data_size);
+            return AVERROR_INVALIDDATA;
+        }
+        obu->obu_size = unit->data_size - 1 - obu->header.obu_extension_flag;
+    }
+
+    start_pos = get_bits_count(&gbc);
+
+    if (obu->header.obu_extension_flag) {
+        priv->temporal_id = obu->header.temporal_id;
+        priv->spatial_id  = obu->header.temporal_id;
+
+        if (obu->header.obu_type != AV1_OBU_SEQUENCE_HEADER &&
+            obu->header.obu_type != AV1_OBU_TEMPORAL_DELIMITER &&
+            priv->operating_point_idc) {
+            int in_temporal_layer =
+                (priv->operating_point_idc >>  priv->temporal_id    ) & 1;
+            int in_spatial_layer  =
+                (priv->operating_point_idc >> (priv->spatial_id + 8)) & 1;
+            if (!in_temporal_layer || !in_spatial_layer) {
+                // Decoding will drop this OBU at this operating point.
+            }
+        }
+    } else {
+        priv->temporal_id = 0;
+        priv->spatial_id  = 0;
+    }
+
+    switch (obu->header.obu_type) {
+    case AV1_OBU_SEQUENCE_HEADER:
+        {
+            err = cbs_av1_read_sequence_header_obu(ctx, &gbc,
+                                                   &obu->obu.sequence_header);
+            if (err < 0)
+                return err;
+
+            av_buffer_unref(&priv->sequence_header_ref);
+            priv->sequence_header = NULL;
+
+            priv->sequence_header_ref = av_buffer_ref(unit->content_ref);
+            if (!priv->sequence_header_ref)
+                return AVERROR(ENOMEM);
+            priv->sequence_header = &obu->obu.sequence_header;
+        }
+        break;
+    case AV1_OBU_TEMPORAL_DELIMITER:
+        {
+            err = cbs_av1_read_temporal_delimiter_obu(ctx, &gbc);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case AV1_OBU_FRAME_HEADER:
+    case AV1_OBU_REDUNDANT_FRAME_HEADER:
+        {
+            err = cbs_av1_read_frame_header_obu(ctx, &gbc,
+                                                &obu->obu.frame_header);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case AV1_OBU_TILE_GROUP:
+        {
+            err = cbs_av1_read_tile_group_obu(ctx, &gbc,
+                                              &obu->obu.tile_group);
+            if (err < 0)
+                return err;
+
+            err = cbs_av1_ref_tile_data(ctx, unit, &gbc,
+                                        &obu->obu.tile_group.tile_data);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case AV1_OBU_FRAME:
+        {
+            err = cbs_av1_read_frame_obu(ctx, &gbc, &obu->obu.frame);
+            if (err < 0)
+                return err;
+
+            err = cbs_av1_ref_tile_data(ctx, unit, &gbc,
+                                        &obu->obu.frame.tile_group.tile_data);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case AV1_OBU_TILE_LIST:
+        {
+            err = cbs_av1_read_tile_list_obu(ctx, &gbc,
+                                             &obu->obu.tile_list);
+            if (err < 0)
+                return err;
+
+            err = cbs_av1_ref_tile_data(ctx, unit, &gbc,
+                                        &obu->obu.tile_list.tile_data);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case AV1_OBU_METADATA:
+        {
+            err = cbs_av1_read_metadata_obu(ctx, &gbc, &obu->obu.metadata);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case AV1_OBU_PADDING:
+    default:
+        return AVERROR(ENOSYS);
+    }
+
+    end_pos = get_bits_count(&gbc);
+    av_assert0(end_pos <= unit->data_size * 8);
+
+    if (obu->obu_size > 0 &&
+        obu->header.obu_type != AV1_OBU_TILE_GROUP &&
+        obu->header.obu_type != AV1_OBU_FRAME) {
+        err = cbs_av1_read_trailing_bits(ctx, &gbc,
+                                         obu->obu_size * 8 + start_pos - end_pos);
+        if (err < 0)
+            return err;
+    }
+
+    return 0;
+}
+
+static int cbs_av1_write_obu(CodedBitstreamContext *ctx,
+                             CodedBitstreamUnit *unit,
+                             PutBitContext *pbc)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    AV1RawOBU *obu = unit->content;
+    PutBitContext pbc_tmp;
+    AV1RawTileData *td;
+    size_t header_size;
+    int err, start_pos, end_pos, data_pos;
+
+    // OBUs in the normal bitstream format must contain a size field
+    // in every OBU (in annex B it is optional, but we don't support
+    // writing that).
+    obu->header.obu_has_size_field = 1;
+
+    err = cbs_av1_write_obu_header(ctx, pbc, &obu->header);
+    if (err < 0)
+        return err;
+
+    if (obu->header.obu_has_size_field) {
+        pbc_tmp = *pbc;
+        // Add space for the size field to fill later.
+        put_bits32(pbc, 0);
+        put_bits32(pbc, 0);
+    }
+
+    td = NULL;
+    start_pos = put_bits_count(pbc);
+
+    switch (obu->header.obu_type) {
+    case AV1_OBU_SEQUENCE_HEADER:
+        {
+            err = cbs_av1_write_sequence_header_obu(ctx, pbc,
+                                                    &obu->obu.sequence_header);
+            if (err < 0)
+                return err;
+
+            av_buffer_unref(&priv->sequence_header_ref);
+            priv->sequence_header = NULL;
+
+            priv->sequence_header_ref = av_buffer_ref(unit->content_ref);
+            if (!priv->sequence_header_ref)
+                return AVERROR(ENOMEM);
+            priv->sequence_header = &obu->obu.sequence_header;
+        }
+        break;
+    case AV1_OBU_TEMPORAL_DELIMITER:
+        {
+            err = cbs_av1_write_temporal_delimiter_obu(ctx, pbc);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case AV1_OBU_FRAME_HEADER:
+    case AV1_OBU_REDUNDANT_FRAME_HEADER:
+        {
+            err = cbs_av1_write_frame_header_obu(ctx, pbc,
+                                                 &obu->obu.frame_header);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case AV1_OBU_TILE_GROUP:
+        {
+            err = cbs_av1_write_tile_group_obu(ctx, pbc,
+                                               &obu->obu.tile_group);
+            if (err < 0)
+                return err;
+
+            td = &obu->obu.tile_group.tile_data;
+        }
+        break;
+    case AV1_OBU_FRAME:
+        {
+            err = cbs_av1_write_frame_obu(ctx, pbc, &obu->obu.frame);
+            if (err < 0)
+                return err;
+
+            td = &obu->obu.frame.tile_group.tile_data;
+        }
+        break;
+    case AV1_OBU_TILE_LIST:
+        {
+            err = cbs_av1_write_tile_list_obu(ctx, pbc, &obu->obu.tile_list);
+            if (err < 0)
+                return err;
+
+            td = &obu->obu.tile_list.tile_data;
+        }
+        break;
+    case AV1_OBU_METADATA:
+        {
+            err = cbs_av1_write_metadata_obu(ctx, pbc, &obu->obu.metadata);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case AV1_OBU_PADDING:
+    default:
+        return AVERROR(ENOSYS);
+    }
+
+    end_pos = put_bits_count(pbc);
+    header_size = (end_pos - start_pos + 7) / 8;
+    if (td) {
+        obu->obu_size = header_size + td->data_size;
+    } else if (header_size > 0) {
+        // Add trailing bits and recalculate.
+        err = cbs_av1_write_trailing_bits(ctx, pbc, 8 - end_pos % 8);
+        if (err < 0)
+            return err;
+        end_pos = put_bits_count(pbc);
+        obu->obu_size = (end_pos - start_pos + 7) / 8;
+    } else {
+        // Empty OBU.
+        obu->obu_size = 0;
+    }
+
+    end_pos = put_bits_count(pbc);
+    // Must now be byte-aligned.
+    av_assert0(end_pos % 8 == 0);
+    flush_put_bits(pbc);
+    start_pos /= 8;
+    end_pos   /= 8;
+
+    *pbc = pbc_tmp;
+    err = cbs_av1_write_leb128(ctx, pbc, "obu_size", obu->obu_size);
+    if (err < 0)
+        return err;
+
+    data_pos = put_bits_count(pbc) / 8;
+    flush_put_bits(pbc);
+    av_assert0(data_pos <= start_pos);
+
+    if (8 * obu->obu_size > put_bits_left(pbc))
+        return AVERROR(ENOSPC);
+
+    if (obu->obu_size > 0) {
+        memmove(priv->write_buffer + data_pos,
+                priv->write_buffer + start_pos, header_size);
+        skip_put_bytes(pbc, header_size);
+
+        if (td) {
+            memcpy(priv->write_buffer + data_pos + header_size,
+                   td->data, td->data_size);
+            skip_put_bytes(pbc, td->data_size);
+        }
+    }
+
+    return 0;
+}
+
+static int cbs_av1_write_unit(CodedBitstreamContext *ctx,
+                              CodedBitstreamUnit *unit)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    PutBitContext pbc;
+    int err;
+
+    if (!priv->write_buffer) {
+        // Initial write buffer size is 1MB.
+        priv->write_buffer_size = 1024 * 1024;
+
+    reallocate_and_try_again:
+        err = av_reallocp(&priv->write_buffer, priv->write_buffer_size);
+        if (err < 0) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Unable to allocate a "
+                   "sufficiently large write buffer (last attempt "
+                   "%zu bytes).\n", priv->write_buffer_size);
+            return err;
+        }
+    }
+
+    init_put_bits(&pbc, priv->write_buffer, priv->write_buffer_size);
+
+    err = cbs_av1_write_obu(ctx, unit, &pbc);
+    if (err == AVERROR(ENOSPC)) {
+        // Overflow.
+        priv->write_buffer_size *= 2;
+        goto reallocate_and_try_again;
+    }
+    if (err < 0)
+        return err;
+
+    // Overflow but we didn't notice.
+    av_assert0(put_bits_count(&pbc) <= 8 * priv->write_buffer_size);
+
+    // OBU data must be byte-aligned.
+    av_assert0(put_bits_count(&pbc) % 8 == 0);
+
+    unit->data_size = put_bits_count(&pbc) / 8;
+    flush_put_bits(&pbc);
+
+    err = ff_cbs_alloc_unit_data(ctx, unit, unit->data_size);
+    if (err < 0)
+        return err;
+
+    memcpy(unit->data, priv->write_buffer, unit->data_size);
+
+    return 0;
+}
+
+static int cbs_av1_assemble_fragment(CodedBitstreamContext *ctx,
+                                     CodedBitstreamFragment *frag)
+{
+    size_t size, pos;
+    int i;
+
+    size = 0;
+    for (i = 0; i < frag->nb_units; i++)
+        size += frag->units[i].data_size;
+
+    frag->data_ref = av_buffer_alloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!frag->data_ref)
+        return AVERROR(ENOMEM);
+    frag->data = frag->data_ref->data;
+    memset(frag->data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+    pos = 0;
+    for (i = 0; i < frag->nb_units; i++) {
+        memcpy(frag->data + pos, frag->units[i].data,
+               frag->units[i].data_size);
+        pos += frag->units[i].data_size;
+    }
+    av_assert0(pos == size);
+    frag->data_size = size;
+
+    return 0;
+}
+
+static void cbs_av1_close(CodedBitstreamContext *ctx)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+
+    av_buffer_unref(&priv->sequence_header_ref);
+
+    av_freep(&priv->write_buffer);
+}
+
+const CodedBitstreamType ff_cbs_type_av1 = {
+    .codec_id          = AV_CODEC_ID_AV1,
+
+    .priv_data_size    = sizeof(CodedBitstreamAV1Context),
+
+    .split_fragment    = &cbs_av1_split_fragment,
+    .read_unit         = &cbs_av1_read_unit,
+    .write_unit        = &cbs_av1_write_unit,
+    .assemble_fragment = &cbs_av1_assemble_fragment,
+
+    .close             = &cbs_av1_close,
+};

diff --git a/libavcodec/cbs_av1.h b/libavcodec/cbs_av1.h
new file mode 100644
index 0000000..0d7fd76
--- /dev/null
+++ b/libavcodec/cbs_av1.h

@@ -0,0 +1,429 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CBS_AV1_H
+#define AVCODEC_CBS_AV1_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "av1.h"
+#include "cbs.h"
+
+
+typedef struct AV1RawOBUHeader {
+    uint8_t obu_forbidden_bit;
+    uint8_t obu_type;
+    uint8_t obu_extension_flag;
+    uint8_t obu_has_size_field;
+    uint8_t obu_reserved_1bit;
+
+    uint8_t temporal_id;
+    uint8_t spatial_id;
+    uint8_t extension_header_reserved_3bits;
+} AV1RawOBUHeader;
+
+typedef struct AV1RawColorConfig {
+    uint8_t high_bitdepth;
+    uint8_t twelve_bit;
+    uint8_t mono_chrome;
+
+    uint8_t color_description_present_flag;
+    uint8_t color_primaries;
+    uint8_t transfer_characteristics;
+    uint8_t matrix_coefficients;
+
+    uint8_t color_range;
+    uint8_t subsampling_x;
+    uint8_t subsampling_y;
+    uint8_t chroma_sample_position;
+    uint8_t separate_uv_delta_q;
+} AV1RawColorConfig;
+
+typedef struct AV1RawTimingInfo {
+    uint32_t num_units_in_display_tick;
+    uint32_t time_scale;
+
+    uint8_t equal_picture_interval;
+    uint32_t num_ticks_per_picture_minus_1;
+} AV1RawTimingInfo;
+
+typedef struct AV1RawDecoderModelInfo {
+    uint8_t  buffer_delay_length_minus_1;
+    uint32_t num_units_in_decoding_tick;
+    uint8_t  buffer_removal_time_length_minus_1;
+    uint8_t  frame_presentation_time_length_minus_1;
+} AV1RawDecoderModelInfo;
+
+typedef struct AV1RawSequenceHeader {
+    uint8_t seq_profile;
+    uint8_t still_picture;
+    uint8_t reduced_still_picture_header;
+
+    uint8_t timing_info_present_flag;
+    uint8_t decoder_model_info_present_flag;
+    uint8_t initial_display_delay_present_flag;
+    uint8_t operating_points_cnt_minus_1;
+
+    AV1RawTimingInfo       timing_info;
+    AV1RawDecoderModelInfo decoder_model_info;
+
+    uint16_t operating_point_idc[AV1_MAX_OPERATING_POINTS];
+    uint8_t  seq_level_idx[AV1_MAX_OPERATING_POINTS];
+    uint8_t  seq_tier[AV1_MAX_OPERATING_POINTS];
+    uint8_t  decoder_model_present_for_this_op[AV1_MAX_OPERATING_POINTS];
+    uint8_t  decoder_buffer_delay[AV1_MAX_OPERATING_POINTS];
+    uint8_t  encoder_buffer_delay[AV1_MAX_OPERATING_POINTS];
+    uint8_t  low_delay_mode_flag[AV1_MAX_OPERATING_POINTS];
+    uint8_t  initial_display_delay_present_for_this_op[AV1_MAX_OPERATING_POINTS];
+    uint8_t  initial_display_delay_minus_1[AV1_MAX_OPERATING_POINTS];
+
+    uint8_t  frame_width_bits_minus_1;
+    uint8_t  frame_height_bits_minus_1;
+    uint16_t max_frame_width_minus_1;
+    uint16_t max_frame_height_minus_1;
+
+    uint8_t frame_id_numbers_present_flag;
+    uint8_t delta_frame_id_length_minus_2;
+    uint8_t additional_frame_id_length_minus_1;
+
+    uint8_t use_128x128_superblock;
+    uint8_t enable_filter_intra;
+    uint8_t enable_intra_edge_filter;
+    uint8_t enable_intraintra_compound;
+    uint8_t enable_masked_compound;
+    uint8_t enable_warped_motion;
+    uint8_t enable_dual_filter;
+
+    uint8_t enable_order_hint;
+    uint8_t enable_jnt_comp;
+    uint8_t enable_ref_frame_mvs;
+
+    uint8_t seq_choose_screen_content_tools;
+    uint8_t seq_force_screen_content_tools;
+    uint8_t seq_choose_integer_mv;
+    uint8_t seq_force_integer_mv;
+
+    uint8_t order_hint_bits_minus_1;
+
+    uint8_t enable_superres;
+    uint8_t enable_cdef;
+    uint8_t enable_restoration;
+
+    AV1RawColorConfig color_config;
+
+    uint8_t film_grain_params_present;
+} AV1RawSequenceHeader;
+
+typedef struct AV1RawFrameHeader {
+    uint8_t  show_existing_frame;
+    uint8_t  frame_to_show_map_idx;
+    uint32_t frame_presentation_time;
+    uint32_t display_frame_id;
+
+    uint8_t frame_type;
+    uint8_t show_frame;
+    uint8_t showable_frame;
+
+    uint8_t error_resilient_mode;
+    uint8_t disable_cdf_update;
+    uint8_t allow_screen_content_tools;
+    uint8_t force_integer_mv;
+
+    uint32_t current_frame_id;
+    uint8_t  frame_size_override_flag;
+    uint8_t  order_hint;
+
+    uint8_t  buffer_removal_time_present_flag;
+    uint32_t buffer_removal_time[AV1_MAX_OPERATING_POINTS];
+
+    uint8_t  primary_ref_frame;
+    uint16_t frame_width_minus_1;
+    uint16_t frame_height_minus_1;
+    uint8_t  use_superres;
+    uint8_t  coded_denom;
+    uint8_t  render_and_frame_size_different;
+    uint8_t  render_width_minus_1;
+    uint8_t  render_height_minus_1;
+
+    uint8_t found_ref;
+
+    uint8_t refresh_frame_flags;
+    uint8_t allow_intrabc;
+    uint8_t ref_order_hint[AV1_NUM_REF_FRAMES];
+    uint8_t frame_refs_short_signaling;
+    uint8_t last_frame_idx;
+    uint8_t golden_frame_idx;
+    int8_t  ref_frame_idx[AV1_REFS_PER_FRAME];
+    uint8_t delta_frame_id_minus1;
+
+    uint8_t allow_high_precision_mv;
+    uint8_t is_filter_switchable;
+    uint8_t interpolation_filter;
+    uint8_t is_motion_mode_switchable;
+    uint8_t use_ref_frame_mvs;
+
+    uint8_t disable_frame_end_update_cdf;
+
+    uint8_t uniform_tile_spacing_flag;
+    uint8_t tile_cols_log2;
+    uint8_t tile_rows_log2;
+    uint8_t width_in_sbs_minus_1[AV1_MAX_TILE_COLS];
+    uint8_t height_in_sbs_minus_1[AV1_MAX_TILE_ROWS];
+    uint16_t context_update_tile_id;
+    uint8_t tile_size_bytes_minus1;
+
+    // These are derived values, but it's very unhelpful to have to
+    // recalculate them all the time so we store them here.
+    uint16_t tile_cols;
+    uint16_t tile_rows;
+
+    uint8_t base_q_idx;
+    int8_t  delta_q_y_dc;
+    uint8_t diff_uv_delta;
+    int8_t  delta_q_u_dc;
+    int8_t  delta_q_u_ac;
+    int8_t  delta_q_v_dc;
+    int8_t  delta_q_v_ac;
+    uint8_t using_qmatrix;
+    uint8_t qm_y;
+    uint8_t qm_u;
+    uint8_t qm_v;
+
+    uint8_t segmentation_enabled;
+    uint8_t segmentation_update_map;
+    uint8_t segmentation_temporal_update;
+    uint8_t segmentation_update_data;
+    uint8_t feature_enabled[AV1_MAX_SEGMENTS][AV1_SEG_LVL_MAX];
+    uint8_t feature_value[AV1_MAX_SEGMENTS][AV1_SEG_LVL_MAX];
+
+    uint8_t delta_q_present;
+    uint8_t delta_q_res;
+    uint8_t delta_lf_present;
+    uint8_t delta_lf_res;
+    uint8_t delta_lf_multi;
+
+    uint8_t loop_filter_level[4];
+    uint8_t loop_filter_sharpness;
+    uint8_t loop_filter_delta_enabled;
+    uint8_t loop_filter_delta_update;
+    uint8_t update_ref_delta[AV1_TOTAL_REFS_PER_FRAME];
+    int8_t  loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME];
+    uint8_t update_mode_delta[2];
+    int8_t  loop_filter_mode_deltas[2];
+
+    uint8_t cdef_damping_minus_3;
+    uint8_t cdef_bits;
+    uint8_t cdef_y_pri_strength[8];
+    uint8_t cdef_y_sec_strength[8];
+    uint8_t cdef_uv_pri_strength[8];
+    uint8_t cdef_uv_sec_strength[8];
+
+    uint8_t lr_type[3];
+    uint8_t lr_unit_shift;
+    uint8_t lr_uv_shift;
+
+    uint8_t tx_mode;
+    uint8_t reference_select;
+    uint8_t skip_mode_present;
+
+    uint8_t allow_warped_motion;
+    uint8_t reduced_tx_set;
+
+    uint8_t is_global[AV1_TOTAL_REFS_PER_FRAME];
+    uint8_t is_rot_zoom[AV1_TOTAL_REFS_PER_FRAME];
+    uint8_t is_translation[AV1_TOTAL_REFS_PER_FRAME];
+    //AV1RawSubexp gm_params[AV1_TOTAL_REFS_PER_FRAME][6];
+    uint32_t gm_params[AV1_TOTAL_REFS_PER_FRAME][6];
+
+    uint8_t  apply_grain;
+    uint16_t grain_seed;
+    uint8_t  update_grain;
+    uint8_t  film_grain_params_ref_idx;
+    uint8_t  num_y_points;
+    uint8_t  point_y_value[16];
+    uint8_t  point_y_scaling[16];
+    uint8_t  chroma_scaling_from_luma;
+    uint8_t  num_cb_points;
+    uint8_t  point_cb_value[16];
+    uint8_t  point_cb_scaling[16];
+    uint8_t  num_cr_points;
+    uint8_t  point_cr_value[16];
+    uint8_t  point_cr_scaling[16];
+    uint8_t  grain_scaling_minus_8;
+    uint8_t  ar_coeff_lag;
+    uint8_t  ar_coeffs_y_plus_128[24];
+    uint8_t  ar_coeffs_cb_plus_128[24];
+    uint8_t  ar_coeffs_cr_plus_128[24];
+    uint8_t  ar_coeff_shift_minus_6;
+    uint8_t  grain_scale_shift;
+    uint8_t  cb_mult;
+    uint8_t  cb_luma_mult;
+    uint16_t cb_offset;
+    uint8_t  cr_mult;
+    uint8_t  cr_luma_mult;
+    uint16_t cr_offset;
+    uint8_t  overlap_flag;
+    uint8_t  clip_to_restricted_range;
+} AV1RawFrameHeader;
+
+typedef struct AV1RawTileData {
+    uint8_t     *data;
+    size_t       data_size;
+    AVBufferRef *data_ref;
+} AV1RawTileData;
+
+typedef struct AV1RawTileGroup {
+    uint8_t  tile_start_and_end_present_flag;
+    uint16_t tg_start;
+    uint16_t tg_end;
+
+    AV1RawTileData tile_data;
+} AV1RawTileGroup;
+
+typedef struct AV1RawFrame {
+    AV1RawFrameHeader header;
+    AV1RawTileGroup   tile_group;
+} AV1RawFrame;
+
+typedef struct AV1RawTileList {
+    uint8_t output_frame_width_in_tiles_minus_1;
+    uint8_t output_frame_height_in_tiles_minus_1;
+    uint16_t tile_count_minus_1;
+
+    AV1RawTileData tile_data;
+} AV1RawTileList;
+
+typedef struct AV1RawMetadataHDRCLL {
+    uint16_t max_cll;
+    uint16_t max_fall;
+} AV1RawMetadataHDRCLL;
+
+typedef struct AV1RawMetadataHDRMDCV {
+    uint16_t primary_chromaticity_x[3];
+    uint16_t primary_chromaticity_y[3];
+    uint16_t white_point_chromaticity_x;
+    uint16_t white_point_chromaticity_y;
+    uint32_t luminance_max;
+    uint32_t luminance_min;
+} AV1RawMetadataHDRMDCV;
+
+typedef struct AV1RawMetadataScalability {
+    uint8_t scalability_mode_idc;
+    // TODO: more stuff.
+} AV1RawMetadataScalability;
+
+typedef struct AV1RawMetadataITUTT35 {
+    uint8_t itu_t_t35_country_code;
+    uint8_t itu_t_t35_country_code_extension_byte;
+
+    uint8_t     *payload;
+    size_t       payload_size;
+    AVBufferRef *payload_ref;
+} AV1RawMetadataITUTT35;
+
+typedef struct AV1RawMetadataTimecode {
+    uint8_t  counting_type;
+    uint8_t  full_timestamp_flag;
+    uint8_t  discontinuity_flag;
+    uint8_t  cnt_dropped_flag;
+    uint16_t n_frames;
+    uint8_t  seconds_value;
+    uint8_t  minutes_value;
+    uint8_t  hours_value;
+    uint8_t  seconds_flag;
+    uint8_t  minutes_flag;
+    uint8_t  hours_flag;
+    uint8_t  time_offset_length;
+    uint32_t time_offset_value;
+} AV1RawMetadataTimecode;
+
+typedef struct AV1RawMetadata {
+    uint64_t metadata_type;
+    union {
+        AV1RawMetadataHDRCLL      hdr_cll;
+        AV1RawMetadataHDRMDCV     hdr_mdcv;
+        AV1RawMetadataScalability scalability;
+        AV1RawMetadataITUTT35     itut_t35;
+        AV1RawMetadataTimecode    timecode;
+    } metadata;
+} AV1RawMetadata;
+
+
+typedef struct AV1RawOBU {
+    AV1RawOBUHeader header;
+
+    size_t obu_size;
+
+    union {
+        AV1RawSequenceHeader sequence_header;
+        AV1RawFrameHeader    frame_header;
+        AV1RawFrame          frame;
+        AV1RawTileGroup      tile_group;
+        AV1RawTileList       tile_list;
+        AV1RawMetadata       metadata;
+    } obu;
+} AV1RawOBU;
+
+typedef struct AV1ReferenceFrameState {
+    int valid;          // RefValid
+    int frame_id;       // RefFrameId
+    int upscaled_width; // RefUpscaledWidth
+    int frame_width;    // RefFrameWidth
+    int frame_height;   // RefFrameHeight
+    int render_width;   // RefRenderWidth
+    int render_height;  // RefRenderHeight
+    int frame_type;     // RefFrameType
+    int subsampling_x;  // RefSubsamplingX
+    int subsampling_y;  // RefSubsamplingY
+    int bit_depth;      // RefBitDepth
+    int order_hint;     // RefOrderHint
+} AV1ReferenceFrameState;
+
+typedef struct CodedBitstreamAV1Context {
+    AV1RawSequenceHeader *sequence_header;
+    AVBufferRef          *sequence_header_ref;
+
+    int seen_frame_header;
+
+    int temporal_id;
+    int spatial_id;
+    int operating_point_idc;
+
+    int bit_depth;
+    int frame_width;
+    int frame_height;
+    int upscaled_width;
+    int render_width;
+    int render_height;
+
+    int num_planes;
+    int coded_lossless;
+    int all_lossless;
+    int tile_cols;
+    int tile_rows;
+
+    AV1ReferenceFrameState ref[AV1_NUM_REF_FRAMES];
+
+    // Write buffer.
+    uint8_t *write_buffer;
+    size_t   write_buffer_size;
+} CodedBitstreamAV1Context;
+
+
+#endif /* AVCODEC_CBS_AV1_H */

diff --git a/libavcodec/cbs_av1_syntax_template.c b/libavcodec/cbs_av1_syntax_template.c
new file mode 100644
index 0000000..84ab297
--- /dev/null
+++ b/libavcodec/cbs_av1_syntax_template.c

@@ -0,0 +1,1694 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+static int FUNC(obu_header)(CodedBitstreamContext *ctx, RWContext *rw,
+                            AV1RawOBUHeader *current)
+{
+    int err;
+    av_unused int zero = 0;
+
+    HEADER("OBU header");
+
+    fc(1, obu_forbidden_bit, 0, 0);
+
+    fc(4, obu_type, 0, AV1_OBU_PADDING);
+    flag(obu_extension_flag);
+    flag(obu_has_size_field);
+
+    fc(1, obu_reserved_1bit, 0, 0);
+
+    if (current->obu_extension_flag) {
+        fb(3, temporal_id);
+        fb(2, spatial_id);
+        fc(3, extension_header_reserved_3bits, 0, 0);
+    }
+
+    return 0;
+}
+
+static int FUNC(trailing_bits)(CodedBitstreamContext *ctx, RWContext *rw, int nb_bits)
+{
+    int err;
+
+    av_assert0(nb_bits > 0);
+
+    fixed(1, trailing_one_bit, 1);
+    --nb_bits;
+
+    while (nb_bits > 0) {
+        fixed(1, trailing_zero_bit, 0);
+        --nb_bits;
+    }
+
+    return 0;
+}
+
+static int FUNC(byte_alignment)(CodedBitstreamContext *ctx, RWContext *rw)
+{
+    int err;
+
+    while (byte_alignment(rw) != 0)
+        fixed(1, zero_bit, 0);
+
+    return 0;
+}
+
+static int FUNC(color_config)(CodedBitstreamContext *ctx, RWContext *rw,
+                              AV1RawColorConfig *current, int seq_profile)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    int err;
+
+    flag(high_bitdepth);
+
+    if (seq_profile == FF_PROFILE_AV1_PROFESSIONAL &&
+        current->high_bitdepth) {
+        flag(twelve_bit);
+        priv->bit_depth = current->twelve_bit ? 12 : 10;
+    } else {
+        priv->bit_depth = current->high_bitdepth ? 10 : 8;
+    }
+
+    if (seq_profile == FF_PROFILE_AV1_HIGH)
+        infer(mono_chrome, 0);
+    else
+        flag(mono_chrome);
+    priv->num_planes = current->mono_chrome ? 1 : 3;
+
+    flag(color_description_present_flag);
+    if (current->color_description_present_flag) {
+        fb(8, color_primaries);
+        fb(8, transfer_characteristics);
+        fb(8, matrix_coefficients);
+    } else {
+        infer(color_primaries,          AVCOL_PRI_UNSPECIFIED);
+        infer(transfer_characteristics, AVCOL_TRC_UNSPECIFIED);
+        infer(matrix_coefficients,      AVCOL_SPC_UNSPECIFIED);
+    }
+
+    if (current->mono_chrome) {
+        flag(color_range);
+
+        infer(subsampling_x, 1);
+        infer(subsampling_y, 1);
+        infer(chroma_sample_position, AV1_CSP_UNKNOWN);
+        infer(separate_uv_delta_q, 0);
+
+    } else if (current->color_primaries          == AVCOL_PRI_BT709 &&
+               current->transfer_characteristics == AVCOL_TRC_IEC61966_2_1 &&
+               current->matrix_coefficients      == AVCOL_SPC_RGB) {
+        infer(color_range,   1);
+        infer(subsampling_x, 0);
+        infer(subsampling_y, 0);
+        flag(separate_uv_delta_q);
+
+    } else {
+        flag(color_range);
+
+        if (seq_profile == FF_PROFILE_AV1_MAIN) {
+            infer(subsampling_x, 1);
+            infer(subsampling_y, 1);
+        } else if (seq_profile == FF_PROFILE_AV1_HIGH) {
+            infer(subsampling_x, 0);
+            infer(subsampling_y, 0);
+        } else {
+            if (priv->bit_depth == 12) {
+                fb(1, subsampling_x);
+                if (current->subsampling_x)
+                    fb(1, subsampling_y);
+                else
+                    infer(subsampling_y, 0);
+            } else {
+                infer(subsampling_x, 1);
+                infer(subsampling_y, 0);
+            }
+        }
+        if (current->subsampling_x && current->subsampling_y) {
+            fc(2, chroma_sample_position, AV1_CSP_UNKNOWN,
+                                          AV1_CSP_COLOCATED);
+        }
+
+        flag(separate_uv_delta_q);
+    }
+
+    return 0;
+}
+
+static int FUNC(timing_info)(CodedBitstreamContext *ctx, RWContext *rw,
+                             AV1RawTimingInfo *current)
+{
+    int err;
+
+    fc(32, num_units_in_display_tick, 1, MAX_UINT_BITS(32));
+    fc(32, time_scale,                1, MAX_UINT_BITS(32));
+
+    flag(equal_picture_interval);
+    if (current->equal_picture_interval)
+        uvlc(num_ticks_per_picture_minus_1, 0, MAX_UINT_BITS(32) - 1);
+
+    return 0;
+}
+
+static int FUNC(decoder_model_info)(CodedBitstreamContext *ctx, RWContext *rw,
+                                    AV1RawDecoderModelInfo *current)
+{
+    int err;
+
+    fb(5, buffer_delay_length_minus_1);
+    fb(32, num_units_in_decoding_tick);
+    fb(5,  buffer_removal_time_length_minus_1);
+    fb(5,  frame_presentation_time_length_minus_1);
+
+    return 0;
+}
+
+static int FUNC(sequence_header_obu)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     AV1RawSequenceHeader *current)
+{
+    int i, err;
+
+    HEADER("Sequence Header");
+
+    fc(3, seq_profile, FF_PROFILE_AV1_MAIN,
+                       FF_PROFILE_AV1_PROFESSIONAL);
+    flag(still_picture);
+    flag(reduced_still_picture_header);
+
+    if (current->reduced_still_picture_header) {
+        infer(timing_info_present_flag,           0);
+        infer(decoder_model_info_present_flag,    0);
+        infer(initial_display_delay_present_flag, 0);
+        infer(operating_points_cnt_minus_1,       0);
+        infer(operating_point_idc[0],             0);
+
+        fb(5, seq_level_idx[0]);
+
+        infer(seq_tier[0], 0);
+        infer(decoder_model_present_for_this_op[0],         0);
+        infer(initial_display_delay_present_for_this_op[0], 0);
+
+    } else {
+        flag(timing_info_present_flag);
+        if (current->timing_info_present_flag) {
+            CHECK(FUNC(timing_info)(ctx, rw, &current->timing_info));
+
+            flag(decoder_model_info_present_flag);
+            if (current->decoder_model_info_present_flag) {
+                CHECK(FUNC(decoder_model_info)
+                          (ctx, rw, &current->decoder_model_info));
+            }
+        } else {
+            infer(decoder_model_info_present_flag, 0);
+        }
+
+        flag(initial_display_delay_present_flag);
+
+        fb(5, operating_points_cnt_minus_1);
+        for (i = 0; i <= current->operating_points_cnt_minus_1; i++) {
+            fbs(12, operating_point_idc[i], 1, i);
+            fbs(5,  seq_level_idx[i], 1, i);
+
+            if (current->seq_level_idx[i] > 7)
+                flags(seq_tier[i], 1, i);
+            else
+                infer(seq_tier[i], 0);
+
+            if (current->decoder_model_info_present_flag) {
+                flags(decoder_model_present_for_this_op[i], 1, i);
+                if (current->decoder_model_present_for_this_op[i]) {
+                    int n = current->decoder_model_info.buffer_delay_length_minus_1 + 1;
+                    fbs(n, decoder_buffer_delay[i], 1, i);
+                    fbs(n, encoder_buffer_delay[i], 1, i);
+                    flags(low_delay_mode_flag[i], 1, i);
+                }
+            } else {
+                infer(decoder_model_present_for_this_op[i], 0);
+            }
+
+            if (current->initial_display_delay_present_flag) {
+                flags(initial_display_delay_present_for_this_op[i], 1, i);
+                if (current->initial_display_delay_present_for_this_op[i])
+                    fbs(4, initial_display_delay_minus_1[i], 1, i);
+            }
+        }
+    }
+
+    fb(4, frame_width_bits_minus_1);
+    fb(4, frame_height_bits_minus_1);
+
+    fb(current->frame_width_bits_minus_1  + 1, max_frame_width_minus_1);
+    fb(current->frame_height_bits_minus_1 + 1, max_frame_height_minus_1);
+
+    if (current->reduced_still_picture_header)
+        infer(frame_id_numbers_present_flag, 0);
+    else
+        flag(frame_id_numbers_present_flag);
+    if (current->frame_id_numbers_present_flag) {
+        fb(4, delta_frame_id_length_minus_2);
+        fb(3, additional_frame_id_length_minus_1);
+    }
+
+    flag(use_128x128_superblock);
+    flag(enable_filter_intra);
+    flag(enable_intra_edge_filter);
+
+    if (current->reduced_still_picture_header) {
+        infer(enable_intraintra_compound, 0);
+        infer(enable_masked_compound,     0);
+        infer(enable_warped_motion,       0);
+        infer(enable_dual_filter,         0);
+        infer(enable_order_hint,          0);
+        infer(enable_jnt_comp,            0);
+        infer(enable_ref_frame_mvs,       0);
+
+        infer(seq_force_screen_content_tools,
+              AV1_SELECT_SCREEN_CONTENT_TOOLS);
+        infer(seq_force_integer_mv,
+              AV1_SELECT_INTEGER_MV);
+    } else {
+        flag(enable_intraintra_compound);
+        flag(enable_masked_compound);
+        flag(enable_warped_motion);
+        flag(enable_dual_filter);
+
+        flag(enable_order_hint);
+        if (current->enable_order_hint) {
+            flag(enable_jnt_comp);
+            flag(enable_ref_frame_mvs);
+        } else {
+            infer(enable_jnt_comp,      0);
+            infer(enable_ref_frame_mvs, 0);
+        }
+
+        flag(seq_choose_screen_content_tools);
+        if (current->seq_choose_screen_content_tools)
+            infer(seq_force_screen_content_tools,
+                  AV1_SELECT_SCREEN_CONTENT_TOOLS);
+        else
+            fb(1, seq_force_screen_content_tools);
+        if (current->seq_force_screen_content_tools > 0) {
+            flag(seq_choose_integer_mv);
+            if (current->seq_choose_integer_mv)
+                infer(seq_force_integer_mv,
+                      AV1_SELECT_INTEGER_MV);
+            else
+                fb(1, seq_force_integer_mv);
+        } else {
+            infer(seq_force_integer_mv, AV1_SELECT_INTEGER_MV);
+        }
+
+        if (current->enable_order_hint)
+            fb(3, order_hint_bits_minus_1);
+    }
+
+    flag(enable_superres);
+    flag(enable_cdef);
+    flag(enable_restoration);
+
+    CHECK(FUNC(color_config)(ctx, rw, &current->color_config,
+                             current->seq_profile));
+
+    flag(film_grain_params_present);
+
+    return 0;
+}
+
+static int FUNC(temporal_delimiter_obu)(CodedBitstreamContext *ctx, RWContext *rw)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+
+    HEADER("Temporal Delimiter");
+
+    priv->seen_frame_header = 0;
+
+    return 0;
+}
+
+static int FUNC(superres_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                 AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int denom, err;
+
+    if (seq->enable_superres)
+        flag(use_superres);
+    else
+        infer(use_superres, 0);
+
+    if (current->use_superres) {
+        fb(3, coded_denom);
+        denom = current->coded_denom + AV1_SUPERRES_DENOM_MIN;
+    } else {
+        denom = AV1_SUPERRES_NUM;
+    }
+
+    priv->upscaled_width = priv->frame_width;
+    priv->frame_width = (priv->upscaled_width * AV1_SUPERRES_NUM +
+                         denom / 2) / denom;
+
+    return 0;
+}
+
+static int FUNC(frame_size)(CodedBitstreamContext *ctx, RWContext *rw,
+                            AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int err;
+
+    if (current->frame_size_override_flag) {
+        fb(seq->frame_width_bits_minus_1 + 1,  frame_width_minus_1);
+        fb(seq->frame_height_bits_minus_1 + 1, frame_height_minus_1);
+
+        priv->frame_width  = current->frame_width_minus_1  + 1;
+        priv->frame_height = current->frame_height_minus_1 + 1;
+    } else {
+        priv->frame_width  = seq->max_frame_width_minus_1  + 1;
+        priv->frame_height = seq->max_frame_height_minus_1 + 1;
+    }
+
+    CHECK(FUNC(superres_params)(ctx, rw, current));
+
+    return 0;
+}
+
+static int FUNC(render_size)(CodedBitstreamContext *ctx, RWContext *rw,
+                             AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    int err;
+
+    flag(render_and_frame_size_different);
+
+    if (current->render_and_frame_size_different) {
+        fb(16, render_width_minus_1);
+        fb(16, render_height_minus_1);
+
+        priv->render_width  = current->render_width_minus_1  + 1;
+        priv->render_height = current->render_height_minus_1 + 1;
+    } else {
+        priv->render_width  = priv->upscaled_width;
+        priv->render_height = priv->frame_height;
+    }
+
+    return 0;
+}
+
+static int FUNC(frame_size_with_refs)(CodedBitstreamContext *ctx, RWContext *rw,
+                                      AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    int i, err;
+
+    for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+        flag(found_ref);
+        if (current->found_ref) {
+            AV1ReferenceFrameState *ref =
+                &priv->ref[current->ref_frame_idx[i]];
+
+            if (!ref->valid) {
+                av_log(ctx->log_ctx, AV_LOG_ERROR,
+                       "Missing reference frame needed for frame size "
+                       "(ref = %d, ref_frame_idx = %d).\n",
+                       i, current->ref_frame_idx[i]);
+                return AVERROR_INVALIDDATA;
+            }
+
+            priv->upscaled_width = ref->upscaled_width;
+            priv->frame_width    = ref->frame_width;
+            priv->frame_height   = ref->frame_height;
+            priv->render_width   = ref->render_width;
+            priv->render_height  = ref->render_height;
+            break;
+        }
+    }
+
+    if (current->found_ref == 0) {
+        CHECK(FUNC(frame_size)(ctx, rw, current));
+        CHECK(FUNC(render_size)(ctx, rw, current));
+    } else {
+        CHECK(FUNC(superres_params)(ctx, rw, current));
+    }
+
+    return 0;
+}
+
+static int FUNC(interpolation_filter)(CodedBitstreamContext *ctx, RWContext *rw,
+                                      AV1RawFrameHeader *current)
+{
+    int err;
+
+    flag(is_filter_switchable);
+    if (current->is_filter_switchable)
+        infer(interpolation_filter,
+              AV1_INTERPOLATION_FILTER_SWITCHABLE);
+    else
+        fb(2, interpolation_filter);
+
+    return 0;
+}
+
+static int FUNC(tile_info)(CodedBitstreamContext *ctx, RWContext *rw,
+                           AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int mi_cols, mi_rows, sb_cols, sb_rows, sb_shift, sb_size;
+    int max_tile_width_sb, max_tile_height_sb, max_tile_area_sb;
+    int min_log2_tile_cols, max_log2_tile_cols, max_log2_tile_rows;
+    int min_log2_tiles, min_log2_tile_rows;
+    int i, err;
+
+    mi_cols = 2 * ((priv->frame_width  + 7) >> 3);
+    mi_rows = 2 * ((priv->frame_height + 7) >> 3);
+
+    sb_cols = seq->use_128x128_superblock ? ((mi_cols + 31) >> 5)
+                                          : ((mi_cols + 15) >> 4);
+    sb_rows = seq->use_128x128_superblock ? ((mi_rows + 31) >> 5)
+                                          : ((mi_rows + 15) >> 4);
+
+    sb_shift = seq->use_128x128_superblock ? 5 : 4;
+    sb_size  = sb_shift + 2;
+
+    max_tile_width_sb = AV1_MAX_TILE_WIDTH >> sb_size;
+    max_tile_area_sb  = AV1_MAX_TILE_AREA  >> (2 * sb_size);
+
+    min_log2_tile_cols = cbs_av1_tile_log2(max_tile_width_sb, sb_cols);
+    max_log2_tile_cols = cbs_av1_tile_log2(1, FFMIN(sb_cols, AV1_MAX_TILE_COLS));
+    max_log2_tile_rows = cbs_av1_tile_log2(1, FFMIN(sb_rows, AV1_MAX_TILE_ROWS));
+    min_log2_tiles = FFMAX(min_log2_tile_cols,
+                           cbs_av1_tile_log2(max_tile_area_sb, sb_rows * sb_cols));
+
+    flag(uniform_tile_spacing_flag);
+
+    if (current->uniform_tile_spacing_flag) {
+        int tile_width_sb, tile_height_sb;
+
+        increment(tile_cols_log2, min_log2_tile_cols, max_log2_tile_cols);
+
+        tile_width_sb = (sb_cols + (1 << current->tile_cols_log2) - 1) >>
+            current->tile_cols_log2;
+        current->tile_cols = (sb_cols + tile_width_sb - 1) / tile_width_sb;
+
+        min_log2_tile_rows = FFMAX(min_log2_tiles - current->tile_cols_log2, 0);
+
+        increment(tile_rows_log2, min_log2_tile_rows, max_log2_tile_rows);
+
+        tile_height_sb = (sb_rows + (1 << current->tile_rows_log2) - 1) >>
+            current->tile_rows_log2;
+        current->tile_rows = (sb_rows + tile_height_sb - 1) / tile_height_sb;
+
+    } else {
+        int widest_tile_sb, start_sb, size_sb, max_width, max_height;
+
+        widest_tile_sb = 0;
+
+        start_sb = 0;
+        for (i = 0; start_sb < sb_cols && i < AV1_MAX_TILE_COLS; i++) {
+            max_width = FFMIN(sb_cols - start_sb, max_tile_width_sb);
+            ns(max_width, width_in_sbs_minus_1[i], 1, i);
+            size_sb = current->width_in_sbs_minus_1[i] + 1;
+            widest_tile_sb = FFMAX(size_sb, widest_tile_sb);
+            start_sb += size_sb;
+        }
+        current->tile_cols_log2 = cbs_av1_tile_log2(1, i);
+        current->tile_cols = i;
+
+        if (min_log2_tiles > 0)
+            max_tile_area_sb = (sb_rows * sb_cols) >> (min_log2_tiles + 1);
+        else
+            max_tile_area_sb = sb_rows * sb_cols;
+        max_tile_height_sb = FFMAX(max_tile_area_sb / widest_tile_sb, 1);
+
+        start_sb = 0;
+        for (i = 0; start_sb < sb_rows && i < AV1_MAX_TILE_ROWS; i++) {
+            max_height = FFMIN(sb_rows - start_sb, max_tile_height_sb);
+            ns(max_height, height_in_sbs_minus_1[i], 1, i);
+            size_sb = current->height_in_sbs_minus_1[i] + 1;
+            start_sb += size_sb;
+        }
+        current->tile_rows_log2 = cbs_av1_tile_log2(1, i);
+        current->tile_rows = i;
+    }
+
+    if (current->tile_cols_log2 > 0 ||
+        current->tile_rows_log2 > 0) {
+        fb(current->tile_cols_log2 + current->tile_rows_log2,
+           context_update_tile_id);
+        fb(2, tile_size_bytes_minus1);
+    } else {
+        infer(context_update_tile_id, 0);
+    }
+
+    priv->tile_cols = current->tile_cols;
+    priv->tile_rows = current->tile_rows;
+
+    return 0;
+}
+
+static int FUNC(quantization_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int err;
+
+    fb(8, base_q_idx);
+
+    delta_q(delta_q_y_dc);
+
+    if (priv->num_planes > 1) {
+        if (seq->color_config.separate_uv_delta_q)
+            flag(diff_uv_delta);
+        else
+            infer(diff_uv_delta, 0);
+
+        delta_q(delta_q_u_dc);
+        delta_q(delta_q_u_ac);
+
+        if (current->diff_uv_delta) {
+            delta_q(delta_q_v_dc);
+            delta_q(delta_q_v_ac);
+        } else {
+            infer(delta_q_v_dc, current->delta_q_u_dc);
+            infer(delta_q_v_ac, current->delta_q_u_ac);
+        }
+    } else {
+        infer(delta_q_u_dc, 0);
+        infer(delta_q_u_ac, 0);
+        infer(delta_q_v_dc, 0);
+        infer(delta_q_v_ac, 0);
+    }
+
+    flag(using_qmatrix);
+    if (current->using_qmatrix) {
+        fb(4, qm_y);
+        fb(4, qm_u);
+        if (seq->color_config.separate_uv_delta_q)
+            fb(4, qm_v);
+        else
+            infer(qm_v, current->qm_u);
+    }
+
+    return 0;
+}
+
+static int FUNC(segmentation_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     AV1RawFrameHeader *current)
+{
+    static const uint8_t bits[AV1_SEG_LVL_MAX] = { 8, 6, 6, 6, 6, 3, 0, 0 };
+    static const uint8_t sign[AV1_SEG_LVL_MAX] = { 1, 1, 1, 1, 1, 0, 0, 0 };
+    int i, j, err;
+
+    flag(segmentation_enabled);
+
+    if (current->segmentation_enabled) {
+        if (current->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
+            infer(segmentation_update_map,      1);
+            infer(segmentation_temporal_update, 0);
+            infer(segmentation_update_data,     1);
+        } else {
+            flag(segmentation_update_map);
+            if (current->segmentation_update_map)
+                flag(segmentation_temporal_update);
+            else
+                infer(segmentation_temporal_update, 0);
+            flag(segmentation_update_data);
+        }
+
+        if (current->segmentation_update_data) {
+            for (i = 0; i < AV1_MAX_SEGMENTS; i++) {
+                for (j = 0; j < AV1_SEG_LVL_MAX; j++) {
+                    flags(feature_enabled[i][j], 2, i, j);
+
+                    if (current->feature_enabled[i][j] && bits[j] > 0) {
+                        if (sign[j])
+                            sus(1 + bits[j], feature_value[i][j], 2, i, j);
+                        else
+                            fbs(bits[j], feature_value[i][j], 2, i, j);
+                    } else {
+                        infer(feature_value[i][j], 0);
+                    }
+                }
+            }
+        }
+    } else {
+        for (i = 0; i < AV1_MAX_SEGMENTS; i++) {
+            for (j = 0; j < AV1_SEG_LVL_MAX; j++) {
+                infer(feature_enabled[i][j], 0);
+                infer(feature_value[i][j],   0);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(delta_q_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                AV1RawFrameHeader *current)
+{
+    int err;
+
+    if (current->base_q_idx > 0)
+        flag(delta_q_present);
+    else
+        infer(delta_q_present, 0);
+
+    if (current->delta_q_present)
+        fb(2, delta_q_res);
+
+    return 0;
+}
+
+static int FUNC(delta_lf_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                 AV1RawFrameHeader *current)
+{
+    int err;
+
+    if (current->delta_q_present) {
+        if (!current->allow_intrabc)
+            flag(delta_lf_present);
+        else
+            infer(delta_lf_present, 0);
+        if (current->delta_lf_present) {
+            fb(2, delta_lf_res);
+            flag(delta_lf_multi);
+        } else {
+            infer(delta_lf_res,   0);
+            infer(delta_lf_multi, 0);
+        }
+    } else {
+        infer(delta_lf_present, 0);
+        infer(delta_lf_res,     0);
+        infer(delta_lf_multi,   0);
+    }
+
+    return 0;
+}
+
+static int FUNC(loop_filter_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                    AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    int i, err;
+
+    if (priv->coded_lossless || current->allow_intrabc) {
+        infer(loop_filter_level[0], 0);
+        infer(loop_filter_level[1], 0);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_INTRA],    1);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_LAST],     0);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_LAST2],    0);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_LAST3],    0);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_BWDREF],   0);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_GOLDEN],  -1);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF],  -1);
+        infer(loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF2], -1);
+        for (i = 0; i < 2; i++)
+            infer(loop_filter_mode_deltas[i], 0);
+        return 0;
+    }
+
+    fb(6, loop_filter_level[0]);
+    fb(6, loop_filter_level[1]);
+
+    if (priv->num_planes > 1) {
+        if (current->loop_filter_level[0] ||
+            current->loop_filter_level[1]) {
+            fb(6, loop_filter_level[2]);
+            fb(6, loop_filter_level[3]);
+        }
+    }
+
+    fb(3, loop_filter_sharpness);
+
+    flag(loop_filter_delta_enabled);
+    if (current->loop_filter_delta_enabled) {
+        flag(loop_filter_delta_update);
+        if (current->loop_filter_delta_update) {
+            for (i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++) {
+                flags(update_ref_delta[i], 1, i);
+                if (current->update_ref_delta[i])
+                    sus(1 + 6, loop_filter_ref_deltas[i], 1, i);
+            }
+            for (i = 0; i < 2; i++) {
+                flags(update_mode_delta[i], 1, i);
+                if (current->update_mode_delta[i])
+                    sus(1 + 6, loop_filter_mode_deltas[i], 1, i);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(cdef_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                             AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int i, err;
+
+    if (priv->coded_lossless || current->allow_intrabc ||
+        !seq->enable_cdef) {
+        infer(cdef_damping_minus_3, 0);
+        infer(cdef_bits, 0);
+        infer(cdef_y_pri_strength[0],  0);
+        infer(cdef_y_sec_strength[0],  0);
+        infer(cdef_uv_pri_strength[0], 0);
+        infer(cdef_uv_sec_strength[0], 0);
+
+        return 0;
+    }
+
+    fb(2, cdef_damping_minus_3);
+    fb(2, cdef_bits);
+
+    for (i = 0; i < (1 << current->cdef_bits); i++) {
+        fbs(4, cdef_y_pri_strength[i], 1, i);
+        fbs(2, cdef_y_sec_strength[i], 1, i);
+
+        if (priv->num_planes > 1) {
+            fbs(4, cdef_uv_pri_strength[i], 1, i);
+            fbs(2, cdef_uv_sec_strength[i], 1, i);
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(lr_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                           AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int uses_lr,  uses_chroma_lr;
+    int i, err;
+
+    if (priv->all_lossless || current->allow_intrabc ||
+        !seq->enable_restoration) {
+        return 0;
+    }
+
+    uses_lr = uses_chroma_lr = 0;
+    for (i = 0; i < priv->num_planes; i++) {
+        fbs(2, lr_type[i], 1, i);
+
+        if (current->lr_type[i] != 0) {
+            uses_lr = 1;
+            if (i > 0)
+                uses_chroma_lr = 1;
+        }
+    }
+
+    if (uses_lr) {
+        if (seq->use_128x128_superblock)
+            increment(lr_unit_shift, 1, 2);
+        else
+            increment(lr_unit_shift, 0, 2);
+
+        if(seq->color_config.subsampling_x &&
+           seq->color_config.subsampling_y && uses_chroma_lr) {
+            fb(1, lr_uv_shift);
+        } else {
+            infer(lr_uv_shift, 0);
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(read_tx_mode)(CodedBitstreamContext *ctx, RWContext *rw,
+                              AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    int err;
+
+    if (priv->coded_lossless)
+        infer(tx_mode, 0);
+    else
+        increment(tx_mode, 1, 2);
+
+    return 0;
+}
+
+static int FUNC(frame_reference_mode)(CodedBitstreamContext *ctx, RWContext *rw,
+                                      AV1RawFrameHeader *current)
+{
+    int err;
+
+    if (current->frame_type == AV1_FRAME_INTRA_ONLY ||
+        current->frame_type == AV1_FRAME_KEY)
+        infer(reference_select, 0);
+    else
+        flag(reference_select);
+
+    return 0;
+}
+
+static int FUNC(skip_mode_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                  AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int skip_mode_allowed;
+    int err;
+
+    if (current->frame_type == AV1_FRAME_KEY ||
+        current->frame_type == AV1_FRAME_INTRA_ONLY ||
+        !current->reference_select || !seq->enable_order_hint) {
+        skip_mode_allowed = 0;
+    } else {
+        int forward_idx,  backward_idx;
+        int forward_hint, backward_hint;
+        int ref_hint, dist, i;
+
+        forward_idx  = -1;
+        backward_idx = -1;
+        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+            ref_hint = priv->ref[i].order_hint;
+            dist = cbs_av1_get_relative_dist(seq, ref_hint,
+                                             current->order_hint);
+            if (dist < 0) {
+                if (forward_idx < 0 ||
+                    cbs_av1_get_relative_dist(seq, ref_hint,
+                                              forward_hint) > 0) {
+                    forward_idx  = i;
+                    forward_hint = ref_hint;
+                }
+            } else if (dist > 0) {
+                if (backward_idx < 0 ||
+                    cbs_av1_get_relative_dist(seq, ref_hint,
+                                              backward_hint) < 0) {
+                    backward_idx  = i;
+                    backward_hint = ref_hint;
+                }
+            }
+        }
+
+        if (forward_idx < 0) {
+            skip_mode_allowed = 0;
+        } else if (backward_idx >= 0) {
+            skip_mode_allowed = 1;
+            // Frames for skip mode are forward_idx and backward_idx.
+        } else {
+            int second_forward_idx;
+            int second_forward_hint;
+
+            second_forward_idx = -1;
+            for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+                ref_hint = priv->ref[i].order_hint;
+                if (cbs_av1_get_relative_dist(seq, ref_hint,
+                                              forward_hint) < 0) {
+                    if (second_forward_idx < 0 ||
+                        cbs_av1_get_relative_dist(seq, ref_hint,
+                                                  second_forward_hint) > 0) {
+                        second_forward_idx  = i;
+                        second_forward_hint = ref_hint;
+                    }
+                }
+            }
+
+            if (second_forward_idx < 0) {
+                skip_mode_allowed = 0;
+            } else {
+                skip_mode_allowed = 1;
+                // Frames for skip mode are forward_idx and second_forward_idx.
+            }
+        }
+    }
+
+    if (skip_mode_allowed)
+        flag(skip_mode_present);
+    else
+        infer(skip_mode_present, 0);
+
+    return 0;
+}
+
+static int FUNC(global_motion_param)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     AV1RawFrameHeader *current,
+                                     int type, int ref, int idx)
+{
+    uint32_t abs_bits, prec_bits, num_syms;
+    int err;
+
+    if (idx < 2) {
+        if (type == AV1_WARP_MODEL_TRANSLATION) {
+            abs_bits  = AV1_GM_ABS_TRANS_ONLY_BITS  - !current->allow_high_precision_mv;
+            prec_bits = AV1_GM_TRANS_ONLY_PREC_BITS - !current->allow_high_precision_mv;
+        } else {
+            abs_bits  = AV1_GM_ABS_TRANS_BITS;
+            prec_bits = AV1_GM_TRANS_PREC_BITS;
+        }
+    } else {
+        abs_bits  = AV1_GM_ABS_ALPHA_BITS;
+        prec_bits = AV1_GM_ALPHA_PREC_BITS;
+    }
+
+    num_syms = 2 * (1 << abs_bits) + 1;
+    subexp(gm_params[ref][idx], num_syms, 2, ref, idx);
+
+    // Actual gm_params value is not reconstructed here.
+    (void)prec_bits;
+
+    return 0;
+}
+
+static int FUNC(global_motion_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                      AV1RawFrameHeader *current)
+{
+    int ref, type;
+    int err;
+
+    if (current->frame_type == AV1_FRAME_KEY ||
+        current->frame_type == AV1_FRAME_INTRA_ONLY)
+        return 0;
+
+    for (ref = AV1_REF_FRAME_LAST; ref <= AV1_REF_FRAME_ALTREF; ref++) {
+        flags(is_global[ref], 1, ref);
+        if (current->is_global[ref]) {
+            flags(is_rot_zoom[ref], 1, ref);
+            if (current->is_rot_zoom[ref]) {
+                type = AV1_WARP_MODEL_ROTZOOM;
+            } else {
+                flags(is_translation[ref], 1, ref);
+                type = current->is_translation[ref] ? AV1_WARP_MODEL_TRANSLATION
+                                                    : AV1_WARP_MODEL_AFFINE;
+            }
+        } else {
+            type = AV1_WARP_MODEL_IDENTITY;
+        }
+
+        if (type >= AV1_WARP_MODEL_ROTZOOM) {
+            CHECK(FUNC(global_motion_param)(ctx, rw, current, type, ref, 2));
+            CHECK(FUNC(global_motion_param)(ctx, rw, current, type, ref, 3));
+            if (type == AV1_WARP_MODEL_AFFINE) {
+                CHECK(FUNC(global_motion_param)(ctx, rw, current, type, ref, 4));
+                CHECK(FUNC(global_motion_param)(ctx, rw, current, type, ref, 5));
+            } else {
+                // gm_params[ref][4] = -gm_params[ref][3]
+                // gm_params[ref][5] =  gm_params[ref][2]
+            }
+        }
+        if (type >= AV1_WARP_MODEL_TRANSLATION) {
+            CHECK(FUNC(global_motion_param)(ctx, rw, current, type, ref, 0));
+            CHECK(FUNC(global_motion_param)(ctx, rw, current, type, ref, 1));
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(film_grain_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context  *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq = priv->sequence_header;
+    int num_pos_luma, num_pos_chroma;
+    int i, err;
+
+    if (!seq->film_grain_params_present ||
+        (!current->show_frame && !current->showable_frame))
+        return 0;
+
+    flag(apply_grain);
+
+    if (!current->apply_grain)
+        return 0;
+
+    fb(16, grain_seed);
+
+    if (current->frame_type == AV1_FRAME_INTER)
+        flag(update_grain);
+    else
+        infer(update_grain, 1);
+
+    if (!current->update_grain) {
+        fb(3, film_grain_params_ref_idx);
+        return 0;
+    }
+
+    fb(4, num_y_points);
+    for (i = 0; i < current->num_y_points; i++) {
+        fbs(8, point_y_value[i],   1, i);
+        fbs(8, point_y_scaling[i], 1, i);
+    }
+
+    if (seq->color_config.mono_chrome)
+        infer(chroma_scaling_from_luma, 0);
+    else
+        flag(chroma_scaling_from_luma);
+
+    if (seq->color_config.mono_chrome ||
+        current->chroma_scaling_from_luma ||
+        (seq->color_config.subsampling_x == 1 &&
+         seq->color_config.subsampling_y == 1 &&
+         current->num_y_points == 0)) {
+        infer(num_cb_points, 0);
+        infer(num_cr_points, 0);
+    } else {
+        fb(4, num_cb_points);
+        for (i = 0; i < current->num_cb_points; i++) {
+            fbs(8, point_cb_value[i],   1, i);
+            fbs(8, point_cb_scaling[i], 1, i);
+        }
+        fb(4, num_cr_points);
+        for (i = 0; i < current->num_cr_points; i++) {
+            fbs(8, point_cr_value[i],   1, i);
+            fbs(8, point_cr_scaling[i], 1, i);
+        }
+    }
+
+    fb(2, grain_scaling_minus_8);
+    fb(2, ar_coeff_lag);
+    num_pos_luma = 2 * current->ar_coeff_lag * (current->ar_coeff_lag + 1);
+    if (current->num_y_points) {
+        num_pos_chroma = num_pos_luma + 1;
+        for (i = 0; i < num_pos_luma; i++)
+            fbs(8, ar_coeffs_y_plus_128[i], 1, i);
+    } else {
+        num_pos_chroma = num_pos_luma;
+    }
+    if (current->chroma_scaling_from_luma || current->num_cb_points) {
+        for (i = 0; i < num_pos_chroma; i++)
+            fbs(8, ar_coeffs_cb_plus_128[i], 1, i);
+    }
+    if (current->chroma_scaling_from_luma || current->num_cr_points) {
+        for (i = 0; i < num_pos_chroma; i++)
+            fbs(8, ar_coeffs_cr_plus_128[i], 1, i);
+    }
+    fb(2, ar_coeff_shift_minus_6);
+    fb(2, grain_scale_shift);
+    if (current->num_cb_points) {
+        fb(8, cb_mult);
+        fb(8, cb_luma_mult);
+        fb(9, cb_offset);
+    }
+    if (current->num_cr_points) {
+        fb(8, cr_mult);
+        fb(8, cr_luma_mult);
+        fb(9, cr_offset);
+    }
+
+    flag(overlap_flag);
+    flag(clip_to_restricted_range);
+
+    return 0;
+}
+
+static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    const AV1RawSequenceHeader *seq;
+    int id_len, diff_len, all_frames, frame_is_intra, order_hint_bits;
+    int i, err;
+
+    if (!priv->sequence_header) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "No sequence header available: "
+               "unable to decode frame header.\n");
+        return AVERROR_INVALIDDATA;
+    }
+    seq = priv->sequence_header;
+
+    id_len = seq->additional_frame_id_length_minus_1 +
+             seq->delta_frame_id_length_minus_2 + 3;
+    all_frames = (1 << AV1_NUM_REF_FRAMES) - 1;
+
+    if (seq->reduced_still_picture_header) {
+        infer(show_existing_frame, 0);
+        infer(frame_type,     AV1_FRAME_KEY);
+        infer(show_frame,     1);
+        infer(showable_frame, 0);
+        frame_is_intra = 1;
+
+    } else {
+        flag(show_existing_frame);
+
+        if (current->show_existing_frame) {
+            AV1ReferenceFrameState *frame;
+
+            fb(3, frame_to_show_map_idx);
+            frame = &priv->ref[current->frame_to_show_map_idx];
+
+            if (seq->decoder_model_info_present_flag &&
+                !seq->timing_info.equal_picture_interval) {
+                fb(seq->decoder_model_info.frame_presentation_time_length_minus_1 + 1,
+                   frame_presentation_time);
+            }
+
+            if (seq->frame_id_numbers_present_flag)
+                fb(id_len, display_frame_id);
+
+            if (frame->frame_type == AV1_FRAME_KEY)
+                infer(refresh_frame_flags, all_frames);
+            else
+                infer(refresh_frame_flags, 0);
+
+            return 0;
+        }
+
+        fb(2, frame_type);
+        frame_is_intra = (current->frame_type == AV1_FRAME_INTRA_ONLY ||
+                          current->frame_type == AV1_FRAME_KEY);
+
+        flag(show_frame);
+        if (current->show_frame &&
+            seq->decoder_model_info_present_flag &&
+            !seq->timing_info.equal_picture_interval) {
+            fb(seq->decoder_model_info.frame_presentation_time_length_minus_1 + 1,
+               frame_presentation_time);
+        }
+        if (current->show_frame)
+            infer(showable_frame, current->frame_type != AV1_FRAME_KEY);
+        else
+            flag(showable_frame);
+
+        if (current->frame_type == AV1_FRAME_SWITCH ||
+            (current->frame_type == AV1_FRAME_KEY && current->show_frame))
+            infer(error_resilient_mode, 1);
+        else
+            flag(error_resilient_mode);
+    }
+
+    if (current->frame_type == AV1_FRAME_KEY && current->show_frame) {
+        for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+            priv->ref[i].valid = 0;
+            priv->ref[i].order_hint = 0;
+        }
+    }
+
+    flag(disable_cdf_update);
+
+    if (seq->seq_force_screen_content_tools ==
+        AV1_SELECT_SCREEN_CONTENT_TOOLS) {
+        flag(allow_screen_content_tools);
+    } else {
+        infer(allow_screen_content_tools,
+              seq->seq_force_screen_content_tools);
+    }
+    if (current->allow_screen_content_tools) {
+        if (seq->seq_force_integer_mv == AV1_SELECT_INTEGER_MV)
+            flag(force_integer_mv);
+        else
+            infer(force_integer_mv, seq->seq_force_integer_mv);
+    } else {
+        infer(force_integer_mv, 0);
+    }
+
+    if (seq->frame_id_numbers_present_flag) {
+        fb(id_len, current_frame_id);
+
+        diff_len = seq->delta_frame_id_length_minus_2 + 2;
+        for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+            if (current->current_frame_id > (1 << diff_len)) {
+                if (priv->ref[i].frame_id > current->current_frame_id ||
+                    priv->ref[i].frame_id < (current->current_frame_id -
+                                             (1 << diff_len)))
+                    priv->ref[i].valid = 0;
+            } else {
+                if (priv->ref[i].frame_id > current->current_frame_id &&
+                    priv->ref[i].frame_id < ((1 << id_len) +
+                                             current->current_frame_id -
+                                             (1 << diff_len)))
+                    priv->ref[i].valid = 0;
+            }
+        }
+    } else {
+        infer(current_frame_id, 0);
+    }
+
+    if (current->frame_type == AV1_FRAME_SWITCH)
+        infer(frame_size_override_flag, 1);
+    else if(seq->reduced_still_picture_header)
+        infer(frame_size_override_flag, 0);
+    else
+        flag(frame_size_override_flag);
+
+    order_hint_bits =
+        seq->enable_order_hint ? seq->order_hint_bits_minus_1 + 1 : 0;
+    if (order_hint_bits > 0)
+        fb(order_hint_bits, order_hint);
+    else
+        infer(order_hint, 0);
+
+    if (frame_is_intra || current->error_resilient_mode)
+        infer(primary_ref_frame, AV1_PRIMARY_REF_NONE);
+    else
+        fb(3, primary_ref_frame);
+
+    if (seq->decoder_model_info_present_flag) {
+        flag(buffer_removal_time_present_flag);
+        if (current->buffer_removal_time_present_flag) {
+            for (i = 0; i <= seq->operating_points_cnt_minus_1; i++) {
+                if (seq->decoder_model_present_for_this_op[i]) {
+                    int op_pt_idc = seq->operating_point_idc[i];
+                    int in_temporal_layer = (op_pt_idc >>  priv->temporal_id    ) & 1;
+                    int in_spatial_layer  = (op_pt_idc >> (priv->spatial_id + 8)) & 1;
+                    if (seq->operating_point_idc[i] == 0 ||
+                        in_temporal_layer || in_spatial_layer) {
+                        fbs(seq->decoder_model_info.buffer_removal_time_length_minus_1 + 1,
+                            buffer_removal_time[i], 1, i);
+                    }
+                }
+            }
+        }
+    }
+
+    if (current->frame_type == AV1_FRAME_SWITCH ||
+        (current->frame_type == AV1_FRAME_KEY && current->show_frame))
+        infer(refresh_frame_flags, all_frames);
+    else
+        fb(8, refresh_frame_flags);
+
+    if (!frame_is_intra || current->refresh_frame_flags != all_frames) {
+        if (current->error_resilient_mode && seq->enable_order_hint) {
+            for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+                fbs(order_hint_bits, ref_order_hint[i], 1, i);
+                if (current->ref_order_hint[i] != priv->ref[i].order_hint)
+                    priv->ref[i].valid = 0;
+            }
+        }
+    }
+
+    if (current->frame_type == AV1_FRAME_KEY ||
+        current->frame_type == AV1_FRAME_INTRA_ONLY) {
+        CHECK(FUNC(frame_size)(ctx, rw, current));
+        CHECK(FUNC(render_size)(ctx, rw, current));
+
+        if (current->allow_screen_content_tools &&
+            priv->upscaled_width == priv->frame_width)
+            flag(allow_intrabc);
+        else
+            infer(allow_intrabc, 0);
+
+    } else {
+        if (!seq->enable_order_hint) {
+            infer(frame_refs_short_signaling, 0);
+        } else {
+            flag(frame_refs_short_signaling);
+            if (current->frame_refs_short_signaling) {
+                fb(3, last_frame_idx);
+                fb(3, golden_frame_idx);
+
+                for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+                    if (i == 0)
+                        infer(ref_frame_idx[i], current->last_frame_idx);
+                    else if (i == AV1_REF_FRAME_GOLDEN -
+                                  AV1_REF_FRAME_LAST)
+                        infer(ref_frame_idx[i], current->golden_frame_idx);
+                    else
+                        infer(ref_frame_idx[i], -1);
+                }
+            }
+        }
+
+        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+            if (!current->frame_refs_short_signaling)
+                fbs(3, ref_frame_idx[i], 1, i);
+            if (seq->frame_id_numbers_present_flag) {
+                fb(seq->delta_frame_id_length_minus_2 + 2,
+                   delta_frame_id_minus1);
+            }
+        }
+
+        if (current->frame_size_override_flag &&
+            !current->error_resilient_mode) {
+            CHECK(FUNC(frame_size_with_refs)(ctx, rw, current));
+        } else {
+            CHECK(FUNC(frame_size)(ctx, rw, current));
+            CHECK(FUNC(render_size)(ctx, rw, current));
+        }
+
+        if (current->force_integer_mv)
+            infer(allow_high_precision_mv, 0);
+        else
+            flag(allow_high_precision_mv);
+
+        CHECK(FUNC(interpolation_filter)(ctx, rw, current));
+
+        flag(is_motion_mode_switchable);
+
+        if (current->error_resilient_mode ||
+            !seq->enable_ref_frame_mvs)
+            infer(use_ref_frame_mvs, 0);
+        else
+            flag(use_ref_frame_mvs);
+
+        infer(allow_intrabc, 0);
+    }
+
+    if (!frame_is_intra) {
+        // Derive reference frame sign biases.
+    }
+
+    if (seq->reduced_still_picture_header || current->disable_cdf_update)
+        infer(disable_frame_end_update_cdf, 1);
+    else
+        flag(disable_frame_end_update_cdf);
+
+    if (current->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
+        // Init non-coeff CDFs.
+        // Setup past independence.
+    } else {
+        // Load CDF tables from previous frame.
+        // Load params from previous frame.
+    }
+
+    if (current->use_ref_frame_mvs) {
+        // Perform motion field estimation process.
+    }
+
+    CHECK(FUNC(tile_info)(ctx, rw, current));
+
+    CHECK(FUNC(quantization_params)(ctx, rw, current));
+
+    CHECK(FUNC(segmentation_params)(ctx, rw, current));
+
+    CHECK(FUNC(delta_q_params)(ctx, rw, current));
+
+    CHECK(FUNC(delta_lf_params)(ctx, rw, current));
+
+    // Init coeff CDFs / load previous segments.
+
+    priv->coded_lossless = 1;
+    for (i = 0; i < AV1_MAX_SEGMENTS; i++) {
+        int qindex;
+        if (current->feature_enabled[i][AV1_SEG_LVL_ALT_Q]) {
+            qindex = (current->base_q_idx +
+                      current->feature_value[i][AV1_SEG_LVL_ALT_Q]);
+        } else {
+            qindex = current->base_q_idx;
+        }
+        qindex = av_clip_uintp2(qindex, 8);
+
+        if (qindex                || current->delta_q_y_dc ||
+            current->delta_q_u_ac || current->delta_q_u_dc ||
+            current->delta_q_v_ac || current->delta_q_v_dc) {
+            priv->coded_lossless = 0;
+        }
+    }
+    priv->all_lossless = priv->coded_lossless &&
+        priv->frame_width == priv->upscaled_width;
+
+    CHECK(FUNC(loop_filter_params)(ctx, rw, current));
+
+    CHECK(FUNC(cdef_params)(ctx, rw, current));
+
+    CHECK(FUNC(lr_params)(ctx, rw, current));
+
+    CHECK(FUNC(read_tx_mode)(ctx, rw, current));
+
+    CHECK(FUNC(frame_reference_mode)(ctx, rw, current));
+
+    CHECK(FUNC(skip_mode_params)(ctx, rw, current));
+
+    if (frame_is_intra || current->error_resilient_mode ||
+        !seq->enable_warped_motion)
+        infer(allow_warped_motion, 0);
+    else
+        flag(allow_warped_motion);
+
+    flag(reduced_tx_set);
+
+    CHECK(FUNC(global_motion_params)(ctx, rw, current));
+
+    CHECK(FUNC(film_grain_params)(ctx, rw, current));
+
+    for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+        if (current->refresh_frame_flags & (1 << i)) {
+            priv->ref[i] = (AV1ReferenceFrameState) {
+                .valid          = 1,
+                .frame_id       = current->current_frame_id,
+                .upscaled_width = priv->upscaled_width,
+                .frame_width    = priv->frame_width,
+                .frame_height   = priv->frame_height,
+                .render_width   = priv->render_width,
+                .render_height  = priv->render_height,
+                .frame_type     = current->frame_type,
+                .subsampling_x  = seq->color_config.subsampling_x,
+                .subsampling_y  = seq->color_config.subsampling_y,
+                .bit_depth      = priv->bit_depth,
+                .order_hint     = current->order_hint,
+            };
+        }
+    }
+
+    av_log(ctx->log_ctx, AV_LOG_DEBUG, "Frame %d:  size %dx%d  "
+           "upscaled %d  render %dx%d  subsample %dx%d  "
+           "bitdepth %d  tiles %dx%d.\n", current->order_hint,
+           priv->frame_width, priv->frame_height, priv->upscaled_width,
+           priv->render_width, priv->render_height,
+           seq->color_config.subsampling_x + 1,
+           seq->color_config.subsampling_y + 1, priv->bit_depth,
+           priv->tile_rows, priv->tile_cols);
+
+    return 0;
+}
+
+static int FUNC(frame_header_obu)(CodedBitstreamContext *ctx, RWContext *rw,
+                                  AV1RawFrameHeader *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    int err;
+
+    HEADER("Frame Header");
+
+    if (priv->seen_frame_header) {
+        // Nothing to do.
+    } else {
+        priv->seen_frame_header = 1;
+
+        CHECK(FUNC(uncompressed_header)(ctx, rw, current));
+
+        if (current->show_existing_frame) {
+            priv->seen_frame_header = 0;
+        } else {
+            priv->seen_frame_header = 1;
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(tile_group_obu)(CodedBitstreamContext *ctx, RWContext *rw,
+                                AV1RawTileGroup *current)
+{
+    CodedBitstreamAV1Context *priv = ctx->priv_data;
+    int num_tiles, tile_bits;
+    int err;
+
+    HEADER("Tile Group");
+
+    num_tiles = priv->tile_cols * priv->tile_rows;
+    if (num_tiles > 1)
+        flag(tile_start_and_end_present_flag);
+    else
+        infer(tile_start_and_end_present_flag, 0);
+
+    if (num_tiles == 1 || !current->tile_start_and_end_present_flag) {
+        infer(tg_start, 0);
+        infer(tg_end, num_tiles - 1);
+    } else {
+        tile_bits = cbs_av1_tile_log2(1, priv->tile_cols) +
+                    cbs_av1_tile_log2(1, priv->tile_rows);
+        fb(tile_bits, tg_start);
+        fb(tile_bits, tg_end);
+    }
+
+    CHECK(FUNC(byte_alignment)(ctx, rw));
+
+    // Reset header for next frame.
+    if (current->tg_end == num_tiles - 1)
+        priv->seen_frame_header = 0;
+
+    // Tile data follows.
+
+    return 0;
+}
+
+static int FUNC(frame_obu)(CodedBitstreamContext *ctx, RWContext *rw,
+                           AV1RawFrame *current)
+{
+    int err;
+
+    CHECK(FUNC(frame_header_obu)(ctx, rw, &current->header));
+
+    CHECK(FUNC(byte_alignment)(ctx, rw));
+
+    CHECK(FUNC(tile_group_obu)(ctx, rw, &current->tile_group));
+
+    return 0;
+}
+
+static int FUNC(tile_list_obu)(CodedBitstreamContext *ctx, RWContext *rw,
+                               AV1RawTileList *current)
+{
+    int err;
+
+    fb(8, output_frame_width_in_tiles_minus_1);
+    fb(8, output_frame_height_in_tiles_minus_1);
+
+    fb(16, tile_count_minus_1);
+
+    // Tile data follows.
+
+    return 0;
+}
+
+static int FUNC(metadata_hdr_cll)(CodedBitstreamContext *ctx, RWContext *rw,
+                                  AV1RawMetadataHDRCLL *current)
+{
+    int err;
+
+    fb(16, max_cll);
+    fb(16, max_fall);
+
+    return 0;
+}
+
+static int FUNC(metadata_hdr_mdcv)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   AV1RawMetadataHDRMDCV *current)
+{
+    int err, i;
+
+    for (i = 0; i < 3; i++) {
+        fcs(16, primary_chromaticity_x[i], 0, 50000, 1, i);
+        fcs(16, primary_chromaticity_y[i], 0, 50000, 1, i);
+    }
+
+    fc(16, white_point_chromaticity_x, 0, 50000);
+    fc(16, white_point_chromaticity_y, 0, 50000);
+
+    fc(32, luminance_max, 1, MAX_UINT_BITS(32));
+    fc(32, luminance_min, 0, current->luminance_max >> 6);
+
+    return 0;
+}
+
+static int FUNC(metadata_scalability)(CodedBitstreamContext *ctx, RWContext *rw,
+                                      AV1RawMetadataScalability *current)
+{
+    // TODO: scalability metadata.
+
+    return AVERROR_PATCHWELCOME;
+}
+
+static int FUNC(metadata_itut_t35)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   AV1RawMetadataITUTT35 *current)
+{
+    int err;
+    size_t i;
+
+    fb(8, itu_t_t35_country_code);
+    if (current->itu_t_t35_country_code == 0xff)
+        fb(8, itu_t_t35_country_code_extension_byte);
+
+#ifdef READ
+    // The payload runs up to the start of the trailing bits, but there might
+    // be arbitrarily many trailing zeroes so we need to read through twice.
+    {
+        GetBitContext tmp = *rw;
+        current->payload_size = 0;
+        for (i = 0; get_bits_left(rw) >= 8; i++) {
+            if (get_bits(rw, 8))
+                current->payload_size = i;
+        }
+        *rw = tmp;
+    }
+
+    current->payload_ref = av_buffer_alloc(current->payload_size);
+    if (!current->payload_ref)
+        return AVERROR(ENOMEM);
+    current->payload = current->payload_ref->data;
+#endif
+
+    for (i = 0; i < current->payload_size; i++)
+        xf(8, itu_t_t35_payload_bytes[i], current->payload[i],
+           0x00, 0xff, 1, i);
+
+    return 0;
+}
+
+static int FUNC(metadata_timecode)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   AV1RawMetadataTimecode *current)
+{
+    int err;
+
+    fb(5, counting_type);
+    flag(full_timestamp_flag);
+    flag(discontinuity_flag);
+    flag(cnt_dropped_flag);
+    fb(9, n_frames);
+
+    if (current->full_timestamp_flag) {
+        fb(6, seconds_value);
+        fb(6, minutes_value);
+        fb(5, hours_value);
+    } else {
+        flag(seconds_flag);
+        if (current->seconds_flag) {
+            fb(6, seconds_value);
+            flag(minutes_flag);
+            if (current->minutes_flag) {
+                fb(6, minutes_value);
+                flag(hours_flag);
+                if (current->hours_flag)
+                    fb(5, hours_value);
+            }
+        }
+    }
+
+    fb(5, time_offset_length);
+    if (current->time_offset_length > 0)
+        fb(current->time_offset_length, time_offset_value);
+
+    return 0;
+}
+
+static int FUNC(metadata_obu)(CodedBitstreamContext *ctx, RWContext *rw,
+                              AV1RawMetadata *current)
+{
+    int err;
+
+    leb128(metadata_type);
+
+    switch (current->metadata_type) {
+    case AV1_METADATA_TYPE_HDR_CLL:
+        CHECK(FUNC(metadata_hdr_cll)(ctx, rw, &current->metadata.hdr_cll));
+        break;
+    case AV1_METADATA_TYPE_HDR_MDCV:
+        CHECK(FUNC(metadata_hdr_mdcv)(ctx, rw, &current->metadata.hdr_mdcv));
+        break;
+    case AV1_METADATA_TYPE_SCALABILITY:
+        CHECK(FUNC(metadata_scalability)(ctx, rw, &current->metadata.scalability));
+        break;
+    case AV1_METADATA_TYPE_ITUT_T35:
+        CHECK(FUNC(metadata_itut_t35)(ctx, rw, &current->metadata.itut_t35));
+        break;
+    case AV1_METADATA_TYPE_TIMECODE:
+        CHECK(FUNC(metadata_timecode)(ctx, rw, &current->metadata.timecode));
+        break;
+    default:
+        // Unknown metadata type.
+        return AVERROR_PATCHWELCOME;
+    }
+
+    return 0;
+}

diff --git a/libavcodec/cbs_h264.h b/libavcodec/cbs_h264.h
new file mode 100644
index 0000000..92277e4
--- /dev/null
+++ b/libavcodec/cbs_h264.h

@@ -0,0 +1,482 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CBS_H264_H
+#define AVCODEC_CBS_H264_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "cbs.h"
+#include "cbs_h2645.h"
+#include "h264.h"
+
+
+enum {
+    // This limit is arbitrary - it is sufficient for one message of each
+    // type plus some repeats, and will therefore easily cover all sane
+    // streams.  However, it is possible to make technically-valid streams
+    // for which it will fail (for example, by including a large number of
+    // user-data-unregistered messages).
+    H264_MAX_SEI_PAYLOADS = 64,
+};
+
+
+typedef struct H264RawNALUnitHeader {
+    uint8_t forbidden_zero_bit;
+    uint8_t nal_ref_idc;
+    uint8_t nal_unit_type;
+
+    uint8_t svc_extension_flag;
+    uint8_t avc_3d_extension_flag;
+} H264RawNALUnitHeader;
+
+typedef struct H264RawScalingList {
+    int8_t delta_scale[64];
+} H264RawScalingList;
+
+typedef struct H264RawHRD {
+    uint8_t cpb_cnt_minus1;
+    uint8_t bit_rate_scale;
+    uint8_t cpb_size_scale;
+
+    uint32_t bit_rate_value_minus1[H264_MAX_CPB_CNT];
+    uint32_t cpb_size_value_minus1[H264_MAX_CPB_CNT];
+    uint8_t cbr_flag[H264_MAX_CPB_CNT];
+
+    uint8_t initial_cpb_removal_delay_length_minus1;
+    uint8_t cpb_removal_delay_length_minus1;
+    uint8_t dpb_output_delay_length_minus1;
+    uint8_t time_offset_length;
+} H264RawHRD;
+
+typedef struct H264RawVUI {
+    uint8_t aspect_ratio_info_present_flag;
+    uint8_t aspect_ratio_idc;
+    uint16_t sar_width;
+    uint16_t sar_height;
+
+    uint8_t overscan_info_present_flag;
+    uint8_t overscan_appropriate_flag;
+
+    uint8_t video_signal_type_present_flag;
+    uint8_t video_format;
+    uint8_t video_full_range_flag;
+    uint8_t colour_description_present_flag;
+    uint8_t colour_primaries;
+    uint8_t transfer_characteristics;
+    uint8_t matrix_coefficients;
+
+    uint8_t chroma_loc_info_present_flag;
+    uint8_t chroma_sample_loc_type_top_field;
+    uint8_t chroma_sample_loc_type_bottom_field;
+
+    uint8_t timing_info_present_flag;
+    uint32_t num_units_in_tick;
+    uint32_t time_scale;
+    uint8_t fixed_frame_rate_flag;
+
+    uint8_t nal_hrd_parameters_present_flag;
+    H264RawHRD nal_hrd_parameters;
+    uint8_t vcl_hrd_parameters_present_flag;
+    H264RawHRD vcl_hrd_parameters;
+    uint8_t low_delay_hrd_flag;
+
+    uint8_t pic_struct_present_flag;
+
+    uint8_t bitstream_restriction_flag;
+    uint8_t motion_vectors_over_pic_boundaries_flag;
+    uint8_t max_bytes_per_pic_denom;
+    uint8_t max_bits_per_mb_denom;
+    uint8_t log2_max_mv_length_horizontal;
+    uint8_t log2_max_mv_length_vertical;
+    uint8_t max_num_reorder_frames;
+    uint8_t max_dec_frame_buffering;
+} H264RawVUI;
+
+typedef struct H264RawSPS {
+    H264RawNALUnitHeader nal_unit_header;
+
+    uint8_t profile_idc;
+    uint8_t constraint_set0_flag;
+    uint8_t constraint_set1_flag;
+    uint8_t constraint_set2_flag;
+    uint8_t constraint_set3_flag;
+    uint8_t constraint_set4_flag;
+    uint8_t constraint_set5_flag;
+    uint8_t reserved_zero_2bits;
+    uint8_t level_idc;
+
+    uint8_t seq_parameter_set_id;
+
+    uint8_t chroma_format_idc;
+    uint8_t separate_colour_plane_flag;
+    uint8_t bit_depth_luma_minus8;
+    uint8_t bit_depth_chroma_minus8;
+    uint8_t qpprime_y_zero_transform_bypass_flag;
+
+    uint8_t seq_scaling_matrix_present_flag;
+    uint8_t seq_scaling_list_present_flag[12];
+    H264RawScalingList scaling_list_4x4[6];
+    H264RawScalingList scaling_list_8x8[6];
+
+    uint8_t log2_max_frame_num_minus4;
+    uint8_t pic_order_cnt_type;
+    uint8_t log2_max_pic_order_cnt_lsb_minus4;
+    uint8_t delta_pic_order_always_zero_flag;
+    int32_t offset_for_non_ref_pic;
+    int32_t offset_for_top_to_bottom_field;
+    uint8_t num_ref_frames_in_pic_order_cnt_cycle;
+    int32_t offset_for_ref_frame[256];
+
+    uint8_t max_num_ref_frames;
+    uint8_t gaps_in_frame_num_allowed_flag;
+
+    uint16_t pic_width_in_mbs_minus1;
+    uint16_t pic_height_in_map_units_minus1;
+
+    uint8_t frame_mbs_only_flag;
+    uint8_t mb_adaptive_frame_field_flag;
+    uint8_t direct_8x8_inference_flag;
+
+    uint8_t frame_cropping_flag;
+    uint16_t frame_crop_left_offset;
+    uint16_t frame_crop_right_offset;
+    uint16_t frame_crop_top_offset;
+    uint16_t frame_crop_bottom_offset;
+
+    uint8_t vui_parameters_present_flag;
+    H264RawVUI vui;
+} H264RawSPS;
+
+typedef struct H264RawSPSExtension {
+    H264RawNALUnitHeader nal_unit_header;
+
+    uint8_t seq_parameter_set_id;
+
+    uint8_t aux_format_idc;
+    uint8_t bit_depth_aux_minus8;
+    uint8_t alpha_incr_flag;
+    uint16_t alpha_opaque_value;
+    uint16_t alpha_transparent_value;
+
+    uint8_t additional_extension_flag;
+} H264RawSPSExtension;
+
+typedef struct H264RawPPS {
+    H264RawNALUnitHeader nal_unit_header;
+
+    uint8_t pic_parameter_set_id;
+    uint8_t seq_parameter_set_id;
+
+    uint8_t entropy_coding_mode_flag;
+    uint8_t bottom_field_pic_order_in_frame_present_flag;
+
+    uint8_t num_slice_groups_minus1;
+    uint8_t slice_group_map_type;
+    uint16_t run_length_minus1[H264_MAX_SLICE_GROUPS];
+    uint16_t top_left[H264_MAX_SLICE_GROUPS];
+    uint16_t bottom_right[H264_MAX_SLICE_GROUPS];
+    uint8_t slice_group_change_direction_flag;
+    uint16_t slice_group_change_rate_minus1;
+    uint16_t pic_size_in_map_units_minus1;
+
+    uint8_t *slice_group_id;
+    AVBufferRef *slice_group_id_ref;
+
+    uint8_t num_ref_idx_l0_default_active_minus1;
+    uint8_t num_ref_idx_l1_default_active_minus1;
+
+    uint8_t weighted_pred_flag;
+    uint8_t weighted_bipred_idc;
+
+    int8_t pic_init_qp_minus26;
+    int8_t pic_init_qs_minus26;
+    int8_t chroma_qp_index_offset;
+
+    uint8_t deblocking_filter_control_present_flag;
+    uint8_t constrained_intra_pred_flag;
+
+    uint8_t more_rbsp_data;
+
+    uint8_t redundant_pic_cnt_present_flag;
+    uint8_t transform_8x8_mode_flag;
+
+    uint8_t pic_scaling_matrix_present_flag;
+    uint8_t pic_scaling_list_present_flag[12];
+    H264RawScalingList scaling_list_4x4[6];
+    H264RawScalingList scaling_list_8x8[6];
+
+    int8_t second_chroma_qp_index_offset;
+} H264RawPPS;
+
+typedef struct H264RawAUD {
+    H264RawNALUnitHeader nal_unit_header;
+
+    uint8_t primary_pic_type;
+} H264RawAUD;
+
+typedef struct H264RawSEIBufferingPeriod {
+    uint8_t seq_parameter_set_id;
+    struct {
+        uint32_t initial_cpb_removal_delay[H264_MAX_CPB_CNT];
+        uint32_t initial_cpb_removal_delay_offset[H264_MAX_CPB_CNT];
+    } nal, vcl;
+} H264RawSEIBufferingPeriod;
+
+typedef struct H264RawSEIPicTimestamp {
+    uint8_t ct_type;
+    uint8_t nuit_field_based_flag;
+    uint8_t counting_type;
+    uint8_t full_timestamp_flag;
+    uint8_t discontinuity_flag;
+    uint8_t cnt_dropped_flag;
+    uint8_t n_frames;
+    uint8_t seconds_flag;
+    uint8_t seconds_value;
+    uint8_t minutes_flag;
+    uint8_t minutes_value;
+    uint8_t hours_flag;
+    uint8_t hours_value;
+    uint32_t time_offset;
+} H264RawSEIPicTimestamp;
+
+typedef struct H264RawSEIPicTiming {
+    uint32_t cpb_removal_delay;
+    uint32_t dpb_output_delay;
+    uint8_t pic_struct;
+    uint8_t clock_timestamp_flag[3];
+    H264RawSEIPicTimestamp timestamp[3];
+} H264RawSEIPicTiming;
+
+typedef struct H264RawSEIPanScanRect {
+    uint32_t pan_scan_rect_id;
+    uint8_t  pan_scan_rect_cancel_flag;
+    uint8_t  pan_scan_cnt_minus1;
+    int32_t  pan_scan_rect_left_offset[3];
+    int32_t  pan_scan_rect_right_offset[3];
+    int32_t  pan_scan_rect_top_offset[3];
+    int32_t  pan_scan_rect_bottom_offset[3];
+    uint16_t pan_scan_rect_repetition_period;
+} H264RawSEIPanScanRect;
+
+typedef struct H264RawSEIUserDataRegistered {
+    uint8_t itu_t_t35_country_code;
+    uint8_t itu_t_t35_country_code_extension_byte;
+    uint8_t *data;
+    size_t data_length;
+    AVBufferRef *data_ref;
+} H264RawSEIUserDataRegistered;
+
+typedef struct H264RawSEIUserDataUnregistered {
+    uint8_t uuid_iso_iec_11578[16];
+    uint8_t *data;
+    size_t data_length;
+    AVBufferRef *data_ref;
+} H264RawSEIUserDataUnregistered;
+
+typedef struct H264RawSEIRecoveryPoint {
+    uint16_t recovery_frame_cnt;
+    uint8_t exact_match_flag;
+    uint8_t broken_link_flag;
+    uint8_t changing_slice_group_idc;
+} H264RawSEIRecoveryPoint;
+
+typedef struct H264RawSEIDisplayOrientation {
+    uint8_t display_orientation_cancel_flag;
+    uint8_t hor_flip;
+    uint8_t ver_flip;
+    uint16_t anticlockwise_rotation;
+    uint16_t display_orientation_repetition_period;
+    uint8_t display_orientation_extension_flag;
+} H264RawSEIDisplayOrientation;
+
+typedef struct H264RawSEIMasteringDisplayColourVolume {
+    uint16_t display_primaries_x[3];
+    uint16_t display_primaries_y[3];
+    uint16_t white_point_x;
+    uint16_t white_point_y;
+    uint32_t max_display_mastering_luminance;
+    uint32_t min_display_mastering_luminance;
+} H264RawSEIMasteringDisplayColourVolume;
+
+typedef struct H264RawSEIPayload {
+    uint32_t payload_type;
+    uint32_t payload_size;
+    union {
+        H264RawSEIBufferingPeriod buffering_period;
+        H264RawSEIPicTiming pic_timing;
+        H264RawSEIPanScanRect pan_scan_rect;
+        // H264RawSEIFiller filler -> no fields.
+        H264RawSEIUserDataRegistered user_data_registered;
+        H264RawSEIUserDataUnregistered user_data_unregistered;
+        H264RawSEIRecoveryPoint recovery_point;
+        H264RawSEIDisplayOrientation display_orientation;
+        H264RawSEIMasteringDisplayColourVolume mastering_display_colour_volume;
+        struct {
+            uint8_t *data;
+            size_t data_length;
+            AVBufferRef *data_ref;
+        } other;
+    } payload;
+} H264RawSEIPayload;
+
+typedef struct H264RawSEI {
+    H264RawNALUnitHeader nal_unit_header;
+
+    H264RawSEIPayload payload[H264_MAX_SEI_PAYLOADS];
+    uint8_t payload_count;
+} H264RawSEI;
+
+typedef struct H264RawSliceHeader {
+    H264RawNALUnitHeader nal_unit_header;
+
+    uint32_t first_mb_in_slice;
+    uint8_t slice_type;
+
+    uint8_t pic_parameter_set_id;
+
+    uint8_t colour_plane_id;
+
+    uint16_t frame_num;
+    uint8_t field_pic_flag;
+    uint8_t bottom_field_flag;
+
+    uint16_t idr_pic_id;
+
+    uint16_t pic_order_cnt_lsb;
+    int32_t delta_pic_order_cnt_bottom;
+    int32_t delta_pic_order_cnt[2];
+
+    uint8_t redundant_pic_cnt;
+    uint8_t direct_spatial_mv_pred_flag;
+
+    uint8_t num_ref_idx_active_override_flag;
+    uint8_t num_ref_idx_l0_active_minus1;
+    uint8_t num_ref_idx_l1_active_minus1;
+
+    uint8_t ref_pic_list_modification_flag_l0;
+    uint8_t ref_pic_list_modification_flag_l1;
+    struct {
+        uint8_t modification_of_pic_nums_idc;
+        int32_t abs_diff_pic_num_minus1;
+        uint8_t long_term_pic_num;
+    } rplm_l0[H264_MAX_RPLM_COUNT], rplm_l1[H264_MAX_RPLM_COUNT];
+
+    uint8_t luma_log2_weight_denom;
+    uint8_t chroma_log2_weight_denom;
+
+    uint8_t luma_weight_l0_flag[H264_MAX_REFS];
+    int8_t luma_weight_l0[H264_MAX_REFS];
+    int8_t luma_offset_l0[H264_MAX_REFS];
+    uint8_t chroma_weight_l0_flag[H264_MAX_REFS];
+    int8_t chroma_weight_l0[H264_MAX_REFS][2];
+    int8_t chroma_offset_l0[H264_MAX_REFS][2];
+
+    uint8_t luma_weight_l1_flag[H264_MAX_REFS];
+    int8_t luma_weight_l1[H264_MAX_REFS];
+    int8_t luma_offset_l1[H264_MAX_REFS];
+    uint8_t chroma_weight_l1_flag[H264_MAX_REFS];
+    int8_t chroma_weight_l1[H264_MAX_REFS][2];
+    int8_t chroma_offset_l1[H264_MAX_REFS][2];
+
+    uint8_t no_output_of_prior_pics_flag;
+    uint8_t long_term_reference_flag;
+
+    uint8_t adaptive_ref_pic_marking_mode_flag;
+    struct {
+        uint8_t memory_management_control_operation;
+        int32_t difference_of_pic_nums_minus1;
+        uint8_t long_term_pic_num;
+        uint8_t long_term_frame_idx;
+        uint8_t max_long_term_frame_idx_plus1;
+    } mmco[H264_MAX_MMCO_COUNT];
+
+    uint8_t cabac_init_idc;
+
+    int8_t slice_qp_delta;
+
+    uint8_t sp_for_switch_flag;
+    int8_t slice_qs_delta;
+
+    uint8_t disable_deblocking_filter_idc;
+    int8_t slice_alpha_c0_offset_div2;
+    int8_t slice_beta_offset_div2;
+
+    uint16_t slice_group_change_cycle;
+} H264RawSliceHeader;
+
+typedef struct H264RawSlice {
+    H264RawSliceHeader header;
+
+    uint8_t *data;
+    size_t   data_size;
+    int      data_bit_start;
+    AVBufferRef *data_ref;
+} H264RawSlice;
+
+typedef struct H264RawFiller {
+    H264RawNALUnitHeader nal_unit_header;
+
+    uint32_t filler_size;
+} H264RawFiller;
+
+
+typedef struct CodedBitstreamH264Context {
+    // Reader/writer context in common with the H.265 implementation.
+    CodedBitstreamH2645Context common;
+
+    // All currently available parameter sets.  These are updated when
+    // any parameter set NAL unit is read/written with this context.
+    AVBufferRef *sps_ref[H264_MAX_SPS_COUNT];
+    AVBufferRef *pps_ref[H264_MAX_PPS_COUNT];
+    H264RawSPS *sps[H264_MAX_SPS_COUNT];
+    H264RawPPS *pps[H264_MAX_PPS_COUNT];
+
+    // The currently active parameter sets.  These are updated when any
+    // NAL unit refers to the relevant parameter set.  These pointers
+    // must also be present in the arrays above.
+    const H264RawSPS *active_sps;
+    const H264RawPPS *active_pps;
+
+    // The NAL unit type of the most recent normal slice.  This is required
+    // to be able to read/write auxiliary slices, because IdrPicFlag is
+    // otherwise unknown.
+    uint8_t last_slice_nal_unit_type;
+} CodedBitstreamH264Context;
+
+
+/**
+ * Add an SEI message to an access unit.
+ */
+int ff_cbs_h264_add_sei_message(CodedBitstreamContext *ctx,
+                                CodedBitstreamFragment *access_unit,
+                                const H264RawSEIPayload *payload);
+
+/**
+ * Delete an SEI message from an access unit.
+ *
+ * Deletes from nal_unit, which must be an SEI NAL unit.  If this is the
+ * last message in nal_unit, also deletes it from access_unit.
+ */
+int ff_cbs_h264_delete_sei_message(CodedBitstreamContext *ctx,
+                                   CodedBitstreamFragment *access_unit,
+                                   CodedBitstreamUnit *nal_unit,
+                                   int position);
+
+#endif /* AVCODEC_CBS_H264_H */

diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c
new file mode 100644
index 0000000..4b31601
--- /dev/null
+++ b/libavcodec/cbs_h2645.c

@@ -0,0 +1,1586 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+
+#include "bytestream.h"
+#include "cbs.h"
+#include "cbs_internal.h"
+#include "cbs_h264.h"
+#include "cbs_h265.h"
+#include "golomb.h"
+#include "h264.h"
+#include "h264_sei.h"
+#include "h2645_parse.h"
+#include "hevc.h"
+#include "hevc_sei.h"
+
+
+static int cbs_read_ue_golomb(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                              const char *name, const int *subscripts,
+                              uint32_t *write_to,
+                              uint32_t range_min, uint32_t range_max)
+{
+    uint32_t value;
+    int position, i, j;
+    unsigned int k;
+    char bits[65];
+
+    position = get_bits_count(gbc);
+
+    for (i = 0; i < 32; i++) {
+        if (get_bits_left(gbc) < i + 1) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid ue-golomb code at "
+                   "%s: bitstream ended.\n", name);
+            return AVERROR_INVALIDDATA;
+        }
+        k = get_bits1(gbc);
+        bits[i] = k ? '1' : '0';
+        if (k)
+            break;
+    }
+    if (i >= 32) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid ue-golomb code at "
+               "%s: more than 31 zeroes.\n", name);
+        return AVERROR_INVALIDDATA;
+    }
+    value = 1;
+    for (j = 0; j < i; j++) {
+        k = get_bits1(gbc);
+        bits[i + j + 1] = k ? '1' : '0';
+        value = value << 1 | k;
+    }
+    bits[i + j + 1] = 0;
+    --value;
+
+    if (ctx->trace_enable)
+        ff_cbs_trace_syntax_element(ctx, position, name, subscripts,
+                                    bits, value);
+
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [%"PRIu32",%"PRIu32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    *write_to = value;
+    return 0;
+}
+
+static int cbs_read_se_golomb(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                              const char *name, const int *subscripts,
+                              int32_t *write_to,
+                              int32_t range_min, int32_t range_max)
+{
+    int32_t value;
+    int position, i, j;
+    unsigned int k;
+    uint32_t v;
+    char bits[65];
+
+    position = get_bits_count(gbc);
+
+    for (i = 0; i < 32; i++) {
+        if (get_bits_left(gbc) < i + 1) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid se-golomb code at "
+                   "%s: bitstream ended.\n", name);
+            return AVERROR_INVALIDDATA;
+        }
+        k = get_bits1(gbc);
+        bits[i] = k ? '1' : '0';
+        if (k)
+            break;
+    }
+    if (i >= 32) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid se-golomb code at "
+               "%s: more than 31 zeroes.\n", name);
+        return AVERROR_INVALIDDATA;
+    }
+    v = 1;
+    for (j = 0; j < i; j++) {
+        k = get_bits1(gbc);
+        bits[i + j + 1] = k ? '1' : '0';
+        v = v << 1 | k;
+    }
+    bits[i + j + 1] = 0;
+    if (v & 1)
+        value = -(int32_t)(v / 2);
+    else
+        value = v / 2;
+
+    if (ctx->trace_enable)
+        ff_cbs_trace_syntax_element(ctx, position, name, subscripts,
+                                    bits, value);
+
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRId32", but must be in [%"PRId32",%"PRId32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    *write_to = value;
+    return 0;
+}
+
+static int cbs_write_ue_golomb(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                               const char *name, const int *subscripts,
+                               uint32_t value,
+                               uint32_t range_min, uint32_t range_max)
+{
+    int len;
+
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [%"PRIu32",%"PRIu32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+    av_assert0(value != UINT32_MAX);
+
+    len = av_log2(value + 1);
+    if (put_bits_left(pbc) < 2 * len + 1)
+        return AVERROR(ENOSPC);
+
+    if (ctx->trace_enable) {
+        char bits[65];
+        int i;
+
+        for (i = 0; i < len; i++)
+            bits[i] = '0';
+        bits[len] = '1';
+        for (i = 0; i < len; i++)
+            bits[len + i + 1] = (value + 1) >> (len - i - 1) & 1 ? '1' : '0';
+        bits[len + len + 1] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc),
+                                    name, subscripts, bits, value);
+    }
+
+    put_bits(pbc, len, 0);
+    if (len + 1 < 32)
+        put_bits(pbc, len + 1, value + 1);
+    else
+        put_bits32(pbc, value + 1);
+
+    return 0;
+}
+
+static int cbs_write_se_golomb(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                               const char *name, const int *subscripts,
+                               int32_t value,
+                               int32_t range_min, int32_t range_max)
+{
+    int len;
+    uint32_t uvalue;
+
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRId32", but must be in [%"PRId32",%"PRId32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+    av_assert0(value != INT32_MIN);
+
+    if (value == 0)
+        uvalue = 0;
+    else if (value > 0)
+        uvalue = 2 * (uint32_t)value - 1;
+    else
+        uvalue = 2 * (uint32_t)-value;
+
+    len = av_log2(uvalue + 1);
+    if (put_bits_left(pbc) < 2 * len + 1)
+        return AVERROR(ENOSPC);
+
+    if (ctx->trace_enable) {
+        char bits[65];
+        int i;
+
+        for (i = 0; i < len; i++)
+            bits[i] = '0';
+        bits[len] = '1';
+        for (i = 0; i < len; i++)
+            bits[len + i + 1] = (uvalue + 1) >> (len - i - 1) & 1 ? '1' : '0';
+        bits[len + len + 1] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc),
+                                    name, subscripts, bits, value);
+    }
+
+    put_bits(pbc, len, 0);
+    if (len + 1 < 32)
+        put_bits(pbc, len + 1, uvalue + 1);
+    else
+        put_bits32(pbc, uvalue + 1);
+
+    return 0;
+}
+
+#define HEADER(name) do { \
+        ff_cbs_trace_header(ctx, name); \
+    } while (0)
+
+#define CHECK(call) do { \
+        err = (call); \
+        if (err < 0) \
+            return err; \
+    } while (0)
+
+#define FUNC_NAME(rw, codec, name) cbs_ ## codec ## _ ## rw ## _ ## name
+#define FUNC_H264(rw, name) FUNC_NAME(rw, h264, name)
+#define FUNC_H265(rw, name) FUNC_NAME(rw, h265, name)
+
+#define SUBSCRIPTS(subs, ...) (subs > 0 ? ((int[subs + 1]){ subs, __VA_ARGS__ }) : NULL)
+
+#define u(width, name, range_min, range_max) \
+        xu(width, name, current->name, range_min, range_max, 0)
+#define flag(name) u(1, name, 0, 1)
+#define ue(name, range_min, range_max) \
+        xue(name, current->name, range_min, range_max, 0)
+#define se(name, range_min, range_max) \
+        xse(name, current->name, range_min, range_max, 0)
+
+#define us(width, name, range_min, range_max, subs, ...) \
+        xu(width, name, current->name, range_min, range_max, subs, __VA_ARGS__)
+#define flags(name, subs, ...) \
+        xu(1, name, current->name, 0, 1, subs, __VA_ARGS__)
+#define ues(name, range_min, range_max, subs, ...) \
+        xue(name, current->name, range_min, range_max, subs, __VA_ARGS__)
+#define ses(name, range_min, range_max, subs, ...) \
+        xse(name, current->name, range_min, range_max, subs, __VA_ARGS__)
+
+#define fixed(width, name, value) do { \
+        av_unused uint32_t fixed_value = value; \
+        xu(width, name, fixed_value, value, value, 0); \
+    } while (0)
+
+
+#define READ
+#define READWRITE read
+#define RWContext GetBitContext
+
+#define xu(width, name, var, range_min, range_max, subs, ...) do { \
+        uint32_t value = range_min; \
+        CHECK(ff_cbs_read_unsigned(ctx, rw, width, #name, \
+                                   SUBSCRIPTS(subs, __VA_ARGS__), \
+                                   &value, range_min, range_max)); \
+        var = value; \
+    } while (0)
+#define xue(name, var, range_min, range_max, subs, ...) do { \
+        uint32_t value = range_min; \
+        CHECK(cbs_read_ue_golomb(ctx, rw, #name, \
+                                 SUBSCRIPTS(subs, __VA_ARGS__), \
+                                 &value, range_min, range_max)); \
+        var = value; \
+    } while (0)
+#define xse(name, var, range_min, range_max, subs, ...) do { \
+        int32_t value = range_min; \
+        CHECK(cbs_read_se_golomb(ctx, rw, #name, \
+                                 SUBSCRIPTS(subs, __VA_ARGS__), \
+                                 &value, range_min, range_max)); \
+        var = value; \
+    } while (0)
+
+
+#define infer(name, value) do { \
+        current->name = value; \
+    } while (0)
+
+static int cbs_h2645_read_more_rbsp_data(GetBitContext *gbc)
+{
+    int bits_left = get_bits_left(gbc);
+    if (bits_left > 8)
+        return 1;
+    if (show_bits(gbc, bits_left) == 1 << (bits_left - 1))
+        return 0;
+    return 1;
+}
+
+#define more_rbsp_data(var) ((var) = cbs_h2645_read_more_rbsp_data(rw))
+
+#define byte_alignment(rw) (get_bits_count(rw) % 8)
+
+#define allocate(name, size) do { \
+        name ## _ref = av_buffer_allocz(size); \
+        if (!name ## _ref) \
+            return AVERROR(ENOMEM); \
+        name = name ## _ref->data; \
+    } while (0)
+
+#define FUNC(name) FUNC_H264(READWRITE, name)
+#include "cbs_h264_syntax_template.c"
+#undef FUNC
+
+#define FUNC(name) FUNC_H265(READWRITE, name)
+#include "cbs_h265_syntax_template.c"
+#undef FUNC
+
+#undef READ
+#undef READWRITE
+#undef RWContext
+#undef xu
+#undef xue
+#undef xse
+#undef infer
+#undef more_rbsp_data
+#undef byte_alignment
+#undef allocate
+
+
+#define WRITE
+#define READWRITE write
+#define RWContext PutBitContext
+
+#define xu(width, name, var, range_min, range_max, subs, ...) do { \
+        uint32_t value = var; \
+        CHECK(ff_cbs_write_unsigned(ctx, rw, width, #name, \
+                                    SUBSCRIPTS(subs, __VA_ARGS__), \
+                                    value, range_min, range_max)); \
+    } while (0)
+#define xue(name, var, range_min, range_max, subs, ...) do { \
+        uint32_t value = var; \
+        CHECK(cbs_write_ue_golomb(ctx, rw, #name, \
+                                  SUBSCRIPTS(subs, __VA_ARGS__), \
+                                  value, range_min, range_max)); \
+    } while (0)
+#define xse(name, var, range_min, range_max, subs, ...) do { \
+        int32_t value = var; \
+        CHECK(cbs_write_se_golomb(ctx, rw, #name, \
+                                  SUBSCRIPTS(subs, __VA_ARGS__), \
+                                  value, range_min, range_max)); \
+    } while (0)
+
+#define infer(name, value) do { \
+        if (current->name != (value)) { \
+            av_log(ctx->log_ctx, AV_LOG_WARNING, "Warning: " \
+                   "%s does not match inferred value: " \
+                   "%"PRId64", but should be %"PRId64".\n", \
+                   #name, (int64_t)current->name, (int64_t)(value)); \
+        } \
+    } while (0)
+
+#define more_rbsp_data(var) (var)
+
+#define byte_alignment(rw) (put_bits_count(rw) % 8)
+
+#define allocate(name, size) do { \
+        if (!name) { \
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "%s must be set " \
+                   "for writing.\n", #name); \
+            return AVERROR_INVALIDDATA; \
+        } \
+    } while (0)
+
+#define FUNC(name) FUNC_H264(READWRITE, name)
+#include "cbs_h264_syntax_template.c"
+#undef FUNC
+
+#define FUNC(name) FUNC_H265(READWRITE, name)
+#include "cbs_h265_syntax_template.c"
+#undef FUNC
+
+#undef WRITE
+#undef READWRITE
+#undef RWContext
+#undef xu
+#undef xue
+#undef xse
+#undef u
+#undef flag
+#undef ue
+#undef se
+#undef infer
+#undef more_rbsp_data
+#undef byte_alignment
+#undef allocate
+
+
+static void cbs_h264_free_pps(void *unit, uint8_t *content)
+{
+    H264RawPPS *pps = (H264RawPPS*)content;
+    av_buffer_unref(&pps->slice_group_id_ref);
+    av_freep(&content);
+}
+
+static void cbs_h264_free_sei_payload(H264RawSEIPayload *payload)
+{
+    switch (payload->payload_type) {
+    case H264_SEI_TYPE_BUFFERING_PERIOD:
+    case H264_SEI_TYPE_PIC_TIMING:
+    case H264_SEI_TYPE_PAN_SCAN_RECT:
+    case H264_SEI_TYPE_RECOVERY_POINT:
+    case H264_SEI_TYPE_DISPLAY_ORIENTATION:
+    case H264_SEI_TYPE_MASTERING_DISPLAY_COLOUR_VOLUME:
+        break;
+    case H264_SEI_TYPE_USER_DATA_REGISTERED:
+        av_buffer_unref(&payload->payload.user_data_registered.data_ref);
+        break;
+    case H264_SEI_TYPE_USER_DATA_UNREGISTERED:
+        av_buffer_unref(&payload->payload.user_data_unregistered.data_ref);
+        break;
+    default:
+        av_buffer_unref(&payload->payload.other.data_ref);
+        break;
+    }
+}
+
+static void cbs_h264_free_sei(void *unit, uint8_t *content)
+{
+    H264RawSEI *sei = (H264RawSEI*)content;
+    int i;
+    for (i = 0; i < sei->payload_count; i++)
+        cbs_h264_free_sei_payload(&sei->payload[i]);
+    av_freep(&content);
+}
+
+static void cbs_h264_free_slice(void *unit, uint8_t *content)
+{
+    H264RawSlice *slice = (H264RawSlice*)content;
+    av_buffer_unref(&slice->data_ref);
+    av_freep(&content);
+}
+
+static void cbs_h265_free_vps(void *unit, uint8_t *content)
+{
+    H265RawVPS *vps = (H265RawVPS*)content;
+    av_buffer_unref(&vps->extension_data.data_ref);
+    av_freep(&content);
+}
+
+static void cbs_h265_free_sps(void *unit, uint8_t *content)
+{
+    H265RawSPS *sps = (H265RawSPS*)content;
+    av_buffer_unref(&sps->extension_data.data_ref);
+    av_freep(&content);
+}
+
+static void cbs_h265_free_pps(void *unit, uint8_t *content)
+{
+    H265RawPPS *pps = (H265RawPPS*)content;
+    av_buffer_unref(&pps->extension_data.data_ref);
+    av_freep(&content);
+}
+
+static void cbs_h265_free_slice(void *unit, uint8_t *content)
+{
+    H265RawSlice *slice = (H265RawSlice*)content;
+    av_buffer_unref(&slice->data_ref);
+    av_freep(&content);
+}
+
+static void cbs_h265_free_sei_payload(H265RawSEIPayload *payload)
+{
+    switch (payload->payload_type) {
+    case HEVC_SEI_TYPE_MASTERING_DISPLAY_INFO:
+    case HEVC_SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO:
+        break;
+    default:
+        av_buffer_unref(&payload->payload.other.data_ref);
+        break;
+    }
+}
+
+static void cbs_h265_free_sei(void *unit, uint8_t *content)
+{
+    H265RawSEI *sei = (H265RawSEI*)content;
+    int i;
+    for (i = 0; i < sei->payload_count; i++)
+        cbs_h265_free_sei_payload(&sei->payload[i]);
+    av_freep(&content);
+}
+
+static int cbs_h2645_fragment_add_nals(CodedBitstreamContext *ctx,
+                                       CodedBitstreamFragment *frag,
+                                       const H2645Packet *packet)
+{
+    int err, i;
+
+    for (i = 0; i < packet->nb_nals; i++) {
+        const H2645NAL *nal = &packet->nals[i];
+        size_t size = nal->size;
+        uint8_t *data;
+
+        // Remove trailing zeroes.
+        while (size > 0 && nal->data[size - 1] == 0)
+            --size;
+        av_assert0(size > 0);
+
+        data = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!data)
+            return AVERROR(ENOMEM);
+        memcpy(data, nal->data, size);
+        memset(data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+        err = ff_cbs_insert_unit_data(ctx, frag, -1, nal->type,
+                                      data, size, NULL);
+        if (err < 0) {
+            av_freep(&data);
+            return err;
+        }
+    }
+
+    return 0;
+}
+
+static int cbs_h2645_split_fragment(CodedBitstreamContext *ctx,
+                                    CodedBitstreamFragment *frag,
+                                    int header)
+{
+    enum AVCodecID codec_id = ctx->codec->codec_id;
+    CodedBitstreamH2645Context *priv = ctx->priv_data;
+    GetByteContext gbc;
+    int err;
+
+    av_assert0(frag->data && frag->nb_units == 0);
+    if (frag->data_size == 0)
+        return 0;
+
+    if (header && frag->data[0] && codec_id == AV_CODEC_ID_H264) {
+        // AVCC header.
+        size_t size, start, end;
+        int i, count, version;
+
+        priv->mp4 = 1;
+
+        bytestream2_init(&gbc, frag->data, frag->data_size);
+
+        if (bytestream2_get_bytes_left(&gbc) < 6)
+            return AVERROR_INVALIDDATA;
+
+        version = bytestream2_get_byte(&gbc);
+        if (version != 1) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid AVCC header: "
+                   "first byte %u.", version);
+            return AVERROR_INVALIDDATA;
+        }
+
+        bytestream2_skip(&gbc, 3);
+        priv->nal_length_size = (bytestream2_get_byte(&gbc) & 3) + 1;
+
+        // SPS array.
+        count = bytestream2_get_byte(&gbc) & 0x1f;
+        start = bytestream2_tell(&gbc);
+        for (i = 0; i < count; i++) {
+            if (bytestream2_get_bytes_left(&gbc) < 2 * (count - i))
+                return AVERROR_INVALIDDATA;
+            size = bytestream2_get_be16(&gbc);
+            if (bytestream2_get_bytes_left(&gbc) < size)
+                return AVERROR_INVALIDDATA;
+            bytestream2_skip(&gbc, size);
+        }
+        end = bytestream2_tell(&gbc);
+
+        err = ff_h2645_packet_split(&priv->read_packet,
+                                    frag->data + start, end - start,
+                                    ctx->log_ctx, 1, 2, AV_CODEC_ID_H264, 1);
+        if (err < 0) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to split AVCC SPS array.\n");
+            return err;
+        }
+        err = cbs_h2645_fragment_add_nals(ctx, frag, &priv->read_packet);
+        if (err < 0)
+            return err;
+
+        // PPS array.
+        count = bytestream2_get_byte(&gbc);
+        start = bytestream2_tell(&gbc);
+        for (i = 0; i < count; i++) {
+            if (bytestream2_get_bytes_left(&gbc) < 2 * (count - i))
+                return AVERROR_INVALIDDATA;
+            size = bytestream2_get_be16(&gbc);
+            if (bytestream2_get_bytes_left(&gbc) < size)
+                return AVERROR_INVALIDDATA;
+            bytestream2_skip(&gbc, size);
+        }
+        end = bytestream2_tell(&gbc);
+
+        err = ff_h2645_packet_split(&priv->read_packet,
+                                    frag->data + start, end - start,
+                                    ctx->log_ctx, 1, 2, AV_CODEC_ID_H264, 1);
+        if (err < 0) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to split AVCC PPS array.\n");
+            return err;
+        }
+        err = cbs_h2645_fragment_add_nals(ctx, frag, &priv->read_packet);
+        if (err < 0)
+            return err;
+
+        if (bytestream2_get_bytes_left(&gbc) > 0) {
+            av_log(ctx->log_ctx, AV_LOG_WARNING, "%u bytes left at end of AVCC "
+                   "header.\n", bytestream2_get_bytes_left(&gbc));
+        }
+
+    } else if (header && frag->data[0] && codec_id == AV_CODEC_ID_HEVC) {
+        // HVCC header.
+        size_t size, start, end;
+        int i, j, nb_arrays, nal_unit_type, nb_nals, version;
+
+        priv->mp4 = 1;
+
+        bytestream2_init(&gbc, frag->data, frag->data_size);
+
+        if (bytestream2_get_bytes_left(&gbc) < 23)
+            return AVERROR_INVALIDDATA;
+
+        version = bytestream2_get_byte(&gbc);
+        if (version != 1) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid HVCC header: "
+                   "first byte %u.", version);
+            return AVERROR_INVALIDDATA;
+        }
+
+        bytestream2_skip(&gbc, 20);
+        priv->nal_length_size = (bytestream2_get_byte(&gbc) & 3) + 1;
+
+        nb_arrays = bytestream2_get_byte(&gbc);
+        for (i = 0; i < nb_arrays; i++) {
+            nal_unit_type = bytestream2_get_byte(&gbc) & 0x3f;
+            nb_nals = bytestream2_get_be16(&gbc);
+
+            start = bytestream2_tell(&gbc);
+            for (j = 0; j < nb_nals; j++) {
+                if (bytestream2_get_bytes_left(&gbc) < 2)
+                    return AVERROR_INVALIDDATA;
+                size = bytestream2_get_be16(&gbc);
+                if (bytestream2_get_bytes_left(&gbc) < size)
+                    return AVERROR_INVALIDDATA;
+                bytestream2_skip(&gbc, size);
+            }
+            end = bytestream2_tell(&gbc);
+
+            err = ff_h2645_packet_split(&priv->read_packet,
+                                        frag->data + start, end - start,
+                                        ctx->log_ctx, 1, 2, AV_CODEC_ID_HEVC, 1);
+            if (err < 0) {
+                av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to split "
+                       "HVCC array %d (%d NAL units of type %d).\n",
+                       i, nb_nals, nal_unit_type);
+                return err;
+            }
+            err = cbs_h2645_fragment_add_nals(ctx, frag, &priv->read_packet);
+            if (err < 0)
+                return err;
+        }
+
+    } else {
+        // Annex B, or later MP4 with already-known parameters.
+
+        err = ff_h2645_packet_split(&priv->read_packet,
+                                    frag->data, frag->data_size,
+                                    ctx->log_ctx,
+                                    priv->mp4, priv->nal_length_size,
+                                    codec_id, 1);
+        if (err < 0)
+            return err;
+
+        err = cbs_h2645_fragment_add_nals(ctx, frag, &priv->read_packet);
+        if (err < 0)
+            return err;
+    }
+
+    return 0;
+}
+
+#define cbs_h2645_replace_ps(h26n, ps_name, ps_var, id_element) \
+static int cbs_h26 ## h26n ## _replace_ ## ps_var(CodedBitstreamContext *ctx, \
+                                                  CodedBitstreamUnit *unit)  \
+{ \
+    CodedBitstreamH26 ## h26n ## Context *priv = ctx->priv_data; \
+    H26 ## h26n ## Raw ## ps_name *ps_var = unit->content; \
+    unsigned int id = ps_var->id_element; \
+    if (id > FF_ARRAY_ELEMS(priv->ps_var)) { \
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid " #ps_name \
+               " id : %d.\n", id); \
+        return AVERROR_INVALIDDATA; \
+    } \
+    if (priv->ps_var[id] == priv->active_ ## ps_var) \
+        priv->active_ ## ps_var = NULL ; \
+    av_buffer_unref(&priv->ps_var ## _ref[id]); \
+    if (unit->content_ref) \
+        priv->ps_var ## _ref[id] = av_buffer_ref(unit->content_ref); \
+    else \
+        priv->ps_var ## _ref[id] = av_buffer_alloc(sizeof(*ps_var)); \
+    if (!priv->ps_var ## _ref[id]) \
+        return AVERROR(ENOMEM); \
+    priv->ps_var[id] = (H26 ## h26n ## Raw ## ps_name *)priv->ps_var ## _ref[id]->data; \
+    if (!unit->content_ref) \
+        memcpy(priv->ps_var[id], ps_var, sizeof(*ps_var)); \
+    return 0; \
+}
+
+cbs_h2645_replace_ps(4, SPS, sps, seq_parameter_set_id)
+cbs_h2645_replace_ps(4, PPS, pps, pic_parameter_set_id)
+cbs_h2645_replace_ps(5, VPS, vps, vps_video_parameter_set_id)
+cbs_h2645_replace_ps(5, SPS, sps, sps_seq_parameter_set_id)
+cbs_h2645_replace_ps(5, PPS, pps, pps_pic_parameter_set_id)
+
+static int cbs_h264_read_nal_unit(CodedBitstreamContext *ctx,
+                                  CodedBitstreamUnit *unit)
+{
+    GetBitContext gbc;
+    int err;
+
+    err = init_get_bits(&gbc, unit->data, 8 * unit->data_size);
+    if (err < 0)
+        return err;
+
+    switch (unit->type) {
+    case H264_NAL_SPS:
+        {
+            H264RawSPS *sps;
+
+            err = ff_cbs_alloc_unit_content(ctx, unit, sizeof(*sps), NULL);
+            if (err < 0)
+                return err;
+            sps = unit->content;
+
+            err = cbs_h264_read_sps(ctx, &gbc, sps);
+            if (err < 0)
+                return err;
+
+            err = cbs_h264_replace_sps(ctx, unit);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case H264_NAL_SPS_EXT:
+        {
+            err = ff_cbs_alloc_unit_content(ctx, unit,
+                                            sizeof(H264RawSPSExtension),
+                                            NULL);
+            if (err < 0)
+                return err;
+
+            err = cbs_h264_read_sps_extension(ctx, &gbc, unit->content);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case H264_NAL_PPS:
+        {
+            H264RawPPS *pps;
+
+            err = ff_cbs_alloc_unit_content(ctx, unit, sizeof(*pps),
+                                            &cbs_h264_free_pps);
+            if (err < 0)
+                return err;
+            pps = unit->content;
+
+            err = cbs_h264_read_pps(ctx, &gbc, pps);
+            if (err < 0)
+                return err;
+
+            err = cbs_h264_replace_pps(ctx, unit);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case H264_NAL_SLICE:
+    case H264_NAL_IDR_SLICE:
+    case H264_NAL_AUXILIARY_SLICE:
+        {
+            H264RawSlice *slice;
+            int pos, len;
+
+            err = ff_cbs_alloc_unit_content(ctx, unit, sizeof(*slice),
+                                            &cbs_h264_free_slice);
+            if (err < 0)
+                return err;
+            slice = unit->content;
+
+            err = cbs_h264_read_slice_header(ctx, &gbc, &slice->header);
+            if (err < 0)
+                return err;
+
+            pos = get_bits_count(&gbc);
+            len = unit->data_size;
+            if (!unit->data[len - 1]) {
+                int z;
+                for (z = 0; z < len && !unit->data[len - z - 1]; z++);
+                av_log(ctx->log_ctx, AV_LOG_DEBUG, "Deleted %d trailing zeroes "
+                       "from slice data.\n", z);
+                len -= z;
+            }
+
+            slice->data_size = len - pos / 8;
+            slice->data_ref  = av_buffer_ref(unit->data_ref);
+            if (!slice->data_ref)
+                return AVERROR(ENOMEM);
+            slice->data = unit->data + pos / 8;
+            slice->data_bit_start = pos % 8;
+        }
+        break;
+
+    case H264_NAL_AUD:
+        {
+            err = ff_cbs_alloc_unit_content(ctx, unit,
+                                            sizeof(H264RawAUD), NULL);
+            if (err < 0)
+                return err;
+
+            err = cbs_h264_read_aud(ctx, &gbc, unit->content);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case H264_NAL_SEI:
+        {
+            err = ff_cbs_alloc_unit_content(ctx, unit, sizeof(H264RawSEI),
+                                            &cbs_h264_free_sei);
+            if (err < 0)
+                return err;
+
+            err = cbs_h264_read_sei(ctx, &gbc, unit->content);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case H264_NAL_FILLER_DATA:
+        {
+            err = ff_cbs_alloc_unit_content(ctx, unit,
+                                            sizeof(H264RawFiller), NULL);
+            if (err < 0)
+                return err;
+
+            err = cbs_h264_read_filler(ctx, &gbc, unit->content);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case H264_NAL_END_SEQUENCE:
+        return 0;
+
+    default:
+        return AVERROR(ENOSYS);
+    }
+
+    return 0;
+}
+
+static int cbs_h265_read_nal_unit(CodedBitstreamContext *ctx,
+                                  CodedBitstreamUnit *unit)
+{
+    GetBitContext gbc;
+    int err;
+
+    err = init_get_bits(&gbc, unit->data, 8 * unit->data_size);
+    if (err < 0)
+        return err;
+
+    switch (unit->type) {
+    case HEVC_NAL_VPS:
+        {
+            H265RawVPS *vps;
+
+            err = ff_cbs_alloc_unit_content(ctx, unit, sizeof(*vps),
+                                            &cbs_h265_free_vps);
+            if (err < 0)
+                return err;
+            vps = unit->content;
+
+            err = cbs_h265_read_vps(ctx, &gbc, vps);
+            if (err < 0)
+                return err;
+
+            err = cbs_h265_replace_vps(ctx, unit);
+            if (err < 0)
+                return err;
+        }
+        break;
+    case HEVC_NAL_SPS:
+        {
+            H265RawSPS *sps;
+
+            err = ff_cbs_alloc_unit_content(ctx, unit, sizeof(*sps),
+                                            &cbs_h265_free_sps);
+            if (err < 0)
+                return err;
+            sps = unit->content;
+
+            err = cbs_h265_read_sps(ctx, &gbc, sps);
+            if (err < 0)
+                return err;
+
+            err = cbs_h265_replace_sps(ctx, unit);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case HEVC_NAL_PPS:
+        {
+            H265RawPPS *pps;
+
+            err = ff_cbs_alloc_unit_content(ctx, unit, sizeof(*pps),
+                                            &cbs_h265_free_pps);
+            if (err < 0)
+                return err;
+            pps = unit->content;
+
+            err = cbs_h265_read_pps(ctx, &gbc, pps);
+            if (err < 0)
+                return err;
+
+            err = cbs_h265_replace_pps(ctx, unit);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case HEVC_NAL_TRAIL_N:
+    case HEVC_NAL_TRAIL_R:
+    case HEVC_NAL_TSA_N:
+    case HEVC_NAL_TSA_R:
+    case HEVC_NAL_STSA_N:
+    case HEVC_NAL_STSA_R:
+    case HEVC_NAL_RADL_N:
+    case HEVC_NAL_RADL_R:
+    case HEVC_NAL_RASL_N:
+    case HEVC_NAL_RASL_R:
+    case HEVC_NAL_BLA_W_LP:
+    case HEVC_NAL_BLA_W_RADL:
+    case HEVC_NAL_BLA_N_LP:
+    case HEVC_NAL_IDR_W_RADL:
+    case HEVC_NAL_IDR_N_LP:
+    case HEVC_NAL_CRA_NUT:
+        {
+            H265RawSlice *slice;
+            int pos, len;
+
+            err = ff_cbs_alloc_unit_content(ctx, unit, sizeof(*slice),
+                                            &cbs_h265_free_slice);
+            if (err < 0)
+                return err;
+            slice = unit->content;
+
+            err = cbs_h265_read_slice_segment_header(ctx, &gbc, &slice->header);
+            if (err < 0)
+                return err;
+
+            pos = get_bits_count(&gbc);
+            len = unit->data_size;
+            if (!unit->data[len - 1]) {
+                int z;
+                for (z = 0; z < len && !unit->data[len - z - 1]; z++);
+                av_log(ctx->log_ctx, AV_LOG_DEBUG, "Deleted %d trailing zeroes "
+                       "from slice data.\n", z);
+                len -= z;
+            }
+
+            slice->data_size = len - pos / 8;
+            slice->data_ref  = av_buffer_ref(unit->data_ref);
+            if (!slice->data_ref)
+                return AVERROR(ENOMEM);
+            slice->data = unit->data + pos / 8;
+            slice->data_bit_start = pos % 8;
+        }
+        break;
+
+    case HEVC_NAL_AUD:
+        {
+            err = ff_cbs_alloc_unit_content(ctx, unit,
+                                            sizeof(H265RawAUD), NULL);
+            if (err < 0)
+                return err;
+
+            err = cbs_h265_read_aud(ctx, &gbc, unit->content);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case HEVC_NAL_SEI_PREFIX:
+        {
+            err = ff_cbs_alloc_unit_content(ctx, unit, sizeof(H265RawSEI),
+                                            &cbs_h265_free_sei);
+
+            if (err < 0)
+                return err;
+
+            err = cbs_h265_read_sei(ctx, &gbc, unit->content);
+
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    default:
+        return AVERROR(ENOSYS);
+    }
+
+    return 0;
+}
+
+static int cbs_h264_write_nal_unit(CodedBitstreamContext *ctx,
+                                   CodedBitstreamUnit *unit,
+                                   PutBitContext *pbc)
+{
+    int err;
+
+    switch (unit->type) {
+    case H264_NAL_SPS:
+        {
+            H264RawSPS *sps = unit->content;
+
+            err = cbs_h264_write_sps(ctx, pbc, sps);
+            if (err < 0)
+                return err;
+
+            err = cbs_h264_replace_sps(ctx, unit);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case H264_NAL_SPS_EXT:
+        {
+            H264RawSPSExtension *sps_ext = unit->content;
+
+            err = cbs_h264_write_sps_extension(ctx, pbc, sps_ext);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case H264_NAL_PPS:
+        {
+            H264RawPPS *pps = unit->content;
+
+            err = cbs_h264_write_pps(ctx, pbc, pps);
+            if (err < 0)
+                return err;
+
+            err = cbs_h264_replace_pps(ctx, unit);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case H264_NAL_SLICE:
+    case H264_NAL_IDR_SLICE:
+    case H264_NAL_AUXILIARY_SLICE:
+        {
+            H264RawSlice *slice = unit->content;
+            GetBitContext gbc;
+            int bits_left, end, zeroes;
+
+            err = cbs_h264_write_slice_header(ctx, pbc, &slice->header);
+            if (err < 0)
+                return err;
+
+            if (slice->data) {
+                if (slice->data_size * 8 + 8 > put_bits_left(pbc))
+                    return AVERROR(ENOSPC);
+
+                init_get_bits(&gbc, slice->data, slice->data_size * 8);
+                skip_bits_long(&gbc, slice->data_bit_start);
+
+                // Copy in two-byte blocks, but stop before copying the
+                // rbsp_stop_one_bit in the final byte.
+                while (get_bits_left(&gbc) > 23)
+                    put_bits(pbc, 16, get_bits(&gbc, 16));
+
+                bits_left = get_bits_left(&gbc);
+                end = get_bits(&gbc, bits_left);
+
+                // rbsp_stop_one_bit must be present here.
+                av_assert0(end);
+                zeroes = ff_ctz(end);
+                if (bits_left > zeroes + 1)
+                    put_bits(pbc, bits_left - zeroes - 1,
+                             end >> (zeroes + 1));
+                put_bits(pbc, 1, 1);
+                while (put_bits_count(pbc) % 8 != 0)
+                    put_bits(pbc, 1, 0);
+            } else {
+                // No slice data - that was just the header.
+                // (Bitstream may be unaligned!)
+            }
+        }
+        break;
+
+    case H264_NAL_AUD:
+        {
+            err = cbs_h264_write_aud(ctx, pbc, unit->content);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case H264_NAL_SEI:
+        {
+            err = cbs_h264_write_sei(ctx, pbc, unit->content);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case H264_NAL_FILLER_DATA:
+        {
+            err = cbs_h264_write_filler(ctx, pbc, unit->content);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    default:
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Write unimplemented for "
+               "NAL unit type %"PRIu32".\n", unit->type);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    return 0;
+}
+
+static int cbs_h265_write_nal_unit(CodedBitstreamContext *ctx,
+                                   CodedBitstreamUnit *unit,
+                                   PutBitContext *pbc)
+{
+    int err;
+
+    switch (unit->type) {
+    case HEVC_NAL_VPS:
+        {
+            H265RawVPS *vps = unit->content;
+
+            err = cbs_h265_write_vps(ctx, pbc, vps);
+            if (err < 0)
+                return err;
+
+            err = cbs_h265_replace_vps(ctx, unit);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case HEVC_NAL_SPS:
+        {
+            H265RawSPS *sps = unit->content;
+
+            err = cbs_h265_write_sps(ctx, pbc, sps);
+            if (err < 0)
+                return err;
+
+            err = cbs_h265_replace_sps(ctx, unit);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case HEVC_NAL_PPS:
+        {
+            H265RawPPS *pps = unit->content;
+
+            err = cbs_h265_write_pps(ctx, pbc, pps);
+            if (err < 0)
+                return err;
+
+            err = cbs_h265_replace_pps(ctx, unit);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case HEVC_NAL_TRAIL_N:
+    case HEVC_NAL_TRAIL_R:
+    case HEVC_NAL_TSA_N:
+    case HEVC_NAL_TSA_R:
+    case HEVC_NAL_STSA_N:
+    case HEVC_NAL_STSA_R:
+    case HEVC_NAL_RADL_N:
+    case HEVC_NAL_RADL_R:
+    case HEVC_NAL_RASL_N:
+    case HEVC_NAL_RASL_R:
+    case HEVC_NAL_BLA_W_LP:
+    case HEVC_NAL_BLA_W_RADL:
+    case HEVC_NAL_BLA_N_LP:
+    case HEVC_NAL_IDR_W_RADL:
+    case HEVC_NAL_IDR_N_LP:
+    case HEVC_NAL_CRA_NUT:
+        {
+            H265RawSlice *slice = unit->content;
+            GetBitContext gbc;
+            int bits_left, end, zeroes;
+
+            err = cbs_h265_write_slice_segment_header(ctx, pbc, &slice->header);
+            if (err < 0)
+                return err;
+
+            if (slice->data) {
+                if (slice->data_size * 8 + 8 > put_bits_left(pbc))
+                    return AVERROR(ENOSPC);
+
+                init_get_bits(&gbc, slice->data, slice->data_size * 8);
+                skip_bits_long(&gbc, slice->data_bit_start);
+
+                // Copy in two-byte blocks, but stop before copying the
+                // rbsp_stop_one_bit in the final byte.
+                while (get_bits_left(&gbc) > 23)
+                    put_bits(pbc, 16, get_bits(&gbc, 16));
+
+                bits_left = get_bits_left(&gbc);
+                end = get_bits(&gbc, bits_left);
+
+                // rbsp_stop_one_bit must be present here.
+                av_assert0(end);
+                zeroes = ff_ctz(end);
+                if (bits_left > zeroes + 1)
+                    put_bits(pbc, bits_left - zeroes - 1,
+                             end >> (zeroes + 1));
+                put_bits(pbc, 1, 1);
+                while (put_bits_count(pbc) % 8 != 0)
+                    put_bits(pbc, 1, 0);
+            } else {
+                // No slice data - that was just the header.
+            }
+        }
+        break;
+
+    case HEVC_NAL_AUD:
+        {
+            err = cbs_h265_write_aud(ctx, pbc, unit->content);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case HEVC_NAL_SEI_PREFIX:
+        {
+            err = cbs_h265_write_sei(ctx, pbc, unit->content);
+
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    default:
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Write unimplemented for "
+               "NAL unit type %"PRIu32".\n", unit->type);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    return 0;
+}
+
+static int cbs_h2645_write_nal_unit(CodedBitstreamContext *ctx,
+                                    CodedBitstreamUnit *unit)
+{
+    CodedBitstreamH2645Context *priv = ctx->priv_data;
+    enum AVCodecID codec_id = ctx->codec->codec_id;
+    PutBitContext pbc;
+    int err;
+
+    if (!priv->write_buffer) {
+        // Initial write buffer size is 1MB.
+        priv->write_buffer_size = 1024 * 1024;
+
+    reallocate_and_try_again:
+        err = av_reallocp(&priv->write_buffer, priv->write_buffer_size);
+        if (err < 0) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Unable to allocate a "
+                   "sufficiently large write buffer (last attempt "
+                   "%"SIZE_SPECIFIER" bytes).\n", priv->write_buffer_size);
+            return err;
+        }
+    }
+
+    init_put_bits(&pbc, priv->write_buffer, priv->write_buffer_size);
+
+    if (codec_id == AV_CODEC_ID_H264)
+        err = cbs_h264_write_nal_unit(ctx, unit, &pbc);
+    else
+        err = cbs_h265_write_nal_unit(ctx, unit, &pbc);
+
+    if (err == AVERROR(ENOSPC)) {
+        // Overflow.
+        priv->write_buffer_size *= 2;
+        goto reallocate_and_try_again;
+    }
+    // Overflow but we didn't notice.
+    av_assert0(put_bits_count(&pbc) <= 8 * priv->write_buffer_size);
+
+    if (err < 0) {
+        // Write failed for some other reason.
+        return err;
+    }
+
+    if (put_bits_count(&pbc) % 8)
+        unit->data_bit_padding = 8 - put_bits_count(&pbc) % 8;
+    else
+        unit->data_bit_padding = 0;
+
+    unit->data_size = (put_bits_count(&pbc) + 7) / 8;
+    flush_put_bits(&pbc);
+
+    err = ff_cbs_alloc_unit_data(ctx, unit, unit->data_size);
+    if (err < 0)
+        return err;
+
+    memcpy(unit->data, priv->write_buffer, unit->data_size);
+
+    return 0;
+}
+
+static int cbs_h2645_assemble_fragment(CodedBitstreamContext *ctx,
+                                       CodedBitstreamFragment *frag)
+{
+    uint8_t *data;
+    size_t max_size, dp, sp;
+    int err, i, zero_run;
+
+    for (i = 0; i < frag->nb_units; i++) {
+        // Data should already all have been written when we get here.
+        av_assert0(frag->units[i].data);
+    }
+
+    max_size = 0;
+    for (i = 0; i < frag->nb_units; i++) {
+        // Start code + content with worst-case emulation prevention.
+        max_size += 3 + frag->units[i].data_size * 3 / 2;
+    }
+
+    data = av_malloc(max_size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!data)
+        return AVERROR(ENOMEM);
+
+    dp = 0;
+    for (i = 0; i < frag->nb_units; i++) {
+        CodedBitstreamUnit *unit = &frag->units[i];
+
+        if (unit->data_bit_padding > 0) {
+            if (i < frag->nb_units - 1)
+                av_log(ctx->log_ctx, AV_LOG_WARNING, "Probably invalid "
+                       "unaligned padding on non-final NAL unit.\n");
+            else
+                frag->data_bit_padding = unit->data_bit_padding;
+        }
+
+        if ((ctx->codec->codec_id == AV_CODEC_ID_H264 &&
+             (unit->type == H264_NAL_SPS ||
+              unit->type == H264_NAL_PPS)) ||
+            (ctx->codec->codec_id == AV_CODEC_ID_HEVC &&
+             (unit->type == HEVC_NAL_VPS ||
+              unit->type == HEVC_NAL_SPS ||
+              unit->type == HEVC_NAL_PPS)) ||
+            i == 0 /* (Assume this is the start of an access unit.) */) {
+            // zero_byte
+            data[dp++] = 0;
+        }
+        // start_code_prefix_one_3bytes
+        data[dp++] = 0;
+        data[dp++] = 0;
+        data[dp++] = 1;
+
+        zero_run = 0;
+        for (sp = 0; sp < unit->data_size; sp++) {
+            if (zero_run < 2) {
+                if (unit->data[sp] == 0)
+                    ++zero_run;
+                else
+                    zero_run = 0;
+            } else {
+                if ((unit->data[sp] & ~3) == 0) {
+                    // emulation_prevention_three_byte
+                    data[dp++] = 3;
+                }
+                zero_run = unit->data[sp] == 0;
+            }
+            data[dp++] = unit->data[sp];
+        }
+    }
+
+    av_assert0(dp <= max_size);
+    err = av_reallocp(&data, dp + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (err)
+        return err;
+    memset(data + dp, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+    frag->data_ref = av_buffer_create(data, dp + AV_INPUT_BUFFER_PADDING_SIZE,
+                                      NULL, NULL, 0);
+    if (!frag->data_ref) {
+        av_freep(&data);
+        return AVERROR(ENOMEM);
+    }
+
+    frag->data = data;
+    frag->data_size = dp;
+
+    return 0;
+}
+
+static void cbs_h264_close(CodedBitstreamContext *ctx)
+{
+    CodedBitstreamH264Context *h264 = ctx->priv_data;
+    int i;
+
+    ff_h2645_packet_uninit(&h264->common.read_packet);
+
+    av_freep(&h264->common.write_buffer);
+
+    for (i = 0; i < FF_ARRAY_ELEMS(h264->sps); i++)
+        av_buffer_unref(&h264->sps_ref[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(h264->pps); i++)
+        av_buffer_unref(&h264->pps_ref[i]);
+}
+
+static void cbs_h265_close(CodedBitstreamContext *ctx)
+{
+    CodedBitstreamH265Context *h265 = ctx->priv_data;
+    int i;
+
+    ff_h2645_packet_uninit(&h265->common.read_packet);
+
+    av_freep(&h265->common.write_buffer);
+
+    for (i = 0; i < FF_ARRAY_ELEMS(h265->vps); i++)
+        av_buffer_unref(&h265->vps_ref[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(h265->sps); i++)
+        av_buffer_unref(&h265->sps_ref[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(h265->pps); i++)
+        av_buffer_unref(&h265->pps_ref[i]);
+}
+
+const CodedBitstreamType ff_cbs_type_h264 = {
+    .codec_id          = AV_CODEC_ID_H264,
+
+    .priv_data_size    = sizeof(CodedBitstreamH264Context),
+
+    .split_fragment    = &cbs_h2645_split_fragment,
+    .read_unit         = &cbs_h264_read_nal_unit,
+    .write_unit        = &cbs_h2645_write_nal_unit,
+    .assemble_fragment = &cbs_h2645_assemble_fragment,
+
+    .close             = &cbs_h264_close,
+};
+
+const CodedBitstreamType ff_cbs_type_h265 = {
+    .codec_id          = AV_CODEC_ID_HEVC,
+
+    .priv_data_size    = sizeof(CodedBitstreamH265Context),
+
+    .split_fragment    = &cbs_h2645_split_fragment,
+    .read_unit         = &cbs_h265_read_nal_unit,
+    .write_unit        = &cbs_h2645_write_nal_unit,
+    .assemble_fragment = &cbs_h2645_assemble_fragment,
+
+    .close             = &cbs_h265_close,
+};
+
+int ff_cbs_h264_add_sei_message(CodedBitstreamContext *ctx,
+                                CodedBitstreamFragment *au,
+                                const H264RawSEIPayload *payload)
+{
+    H264RawSEI *sei;
+    CodedBitstreamUnit *nal = NULL;
+    int err, i;
+
+    // Find an existing SEI NAL unit to add to.
+    for (i = 0; i < au->nb_units; i++) {
+        if (au->units[i].type == H264_NAL_SEI) {
+            nal = &au->units[i];
+            break;
+        }
+    }
+    if (nal) {
+        sei = nal->content;
+
+    } else {
+        // Need to make a new SEI NAL unit.  Insert it before the first
+        // slice data NAL unit; if no slice data, add at the end.
+        AVBufferRef *sei_ref;
+
+        sei = av_mallocz(sizeof(*sei));
+        if (!sei)
+            return AVERROR(ENOMEM);
+
+        sei->nal_unit_header.nal_unit_type = H264_NAL_SEI;
+        sei->nal_unit_header.nal_ref_idc   = 0;
+
+        sei_ref = av_buffer_create((uint8_t*)sei, sizeof(*sei),
+                                   &cbs_h264_free_sei, ctx, 0);
+        if (!sei_ref) {
+            av_freep(&sei);
+            return AVERROR(ENOMEM);
+        }
+
+        for (i = 0; i < au->nb_units; i++) {
+            if (au->units[i].type == H264_NAL_SLICE ||
+                au->units[i].type == H264_NAL_IDR_SLICE)
+                break;
+        }
+
+        err = ff_cbs_insert_unit_content(ctx, au, i, H264_NAL_SEI,
+                                         sei, sei_ref);
+        av_buffer_unref(&sei_ref);
+        if (err < 0)
+            return err;
+    }
+
+    if (sei->payload_count >= H264_MAX_SEI_PAYLOADS) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Too many payloads in "
+               "SEI NAL unit.\n");
+        return AVERROR(EINVAL);
+    }
+
+    memcpy(&sei->payload[sei->payload_count], payload, sizeof(*payload));
+    ++sei->payload_count;
+
+    return 0;
+}
+
+int ff_cbs_h264_delete_sei_message(CodedBitstreamContext *ctx,
+                                   CodedBitstreamFragment *au,
+                                   CodedBitstreamUnit *nal,
+                                   int position)
+{
+    H264RawSEI *sei = nal->content;
+
+    av_assert0(nal->type == H264_NAL_SEI);
+    av_assert0(position >= 0 && position < sei->payload_count);
+
+    if (position == 0 && sei->payload_count == 1) {
+        // Deleting NAL unit entirely.
+        int i;
+
+        for (i = 0; i < au->nb_units; i++) {
+            if (&au->units[i] == nal)
+                break;
+        }
+        av_assert0(i < au->nb_units && "NAL unit not in access unit.");
+
+        return ff_cbs_delete_unit(ctx, au, i);
+    } else {
+        cbs_h264_free_sei_payload(&sei->payload[position]);
+
+        --sei->payload_count;
+        memmove(sei->payload + position,
+                sei->payload + position + 1,
+                (sei->payload_count - position) * sizeof(*sei->payload));
+
+        return 0;
+    }
+}

diff --git a/libavcodec/cbs_h2645.h b/libavcodec/cbs_h2645.h
new file mode 100644
index 0000000..f4cf65b
--- /dev/null
+++ b/libavcodec/cbs_h2645.h

@@ -0,0 +1,43 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CBS_H2645_H
+#define AVCODEC_CBS_H2645_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "h2645_parse.h"
+
+
+typedef struct CodedBitstreamH2645Context {
+    // If set, the stream being read is in MP4 (AVCC/HVCC) format.  If not
+    // set, the stream is assumed to be in annex B format.
+    int mp4;
+    // Size in bytes of the NAL length field for MP4 format.
+    int nal_length_size;
+    // Packet reader.
+    H2645Packet read_packet;
+
+    // Write buffer
+    uint8_t *write_buffer;
+    size_t write_buffer_size;
+} CodedBitstreamH2645Context;
+
+
+#endif /* AVCODEC_CBS_H2645_H */

diff --git a/libavcodec/cbs_h264_syntax_template.c b/libavcodec/cbs_h264_syntax_template.c
new file mode 100644
index 0000000..1c8d7d5
--- /dev/null
+++ b/libavcodec/cbs_h264_syntax_template.c

@@ -0,0 +1,1377 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+static int FUNC(rbsp_trailing_bits)(CodedBitstreamContext *ctx, RWContext *rw)
+{
+    int err;
+
+    fixed(1, rbsp_stop_one_bit, 1);
+    while (byte_alignment(rw) != 0)
+        fixed(1, rbsp_alignment_zero_bit, 0);
+
+    return 0;
+}
+
+static int FUNC(nal_unit_header)(CodedBitstreamContext *ctx, RWContext *rw,
+                                 H264RawNALUnitHeader *current,
+                                 uint32_t valid_type_mask)
+{
+    int err;
+
+    u(1, forbidden_zero_bit, 0, 0);
+    u(2, nal_ref_idc,        0, 3);
+    u(5, nal_unit_type,      0, 31);
+
+    if (!(1 << current->nal_unit_type & valid_type_mask)) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid NAL unit type %d.\n",
+               current->nal_unit_type);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (current->nal_unit_type == 14 ||
+        current->nal_unit_type == 20 ||
+        current->nal_unit_type == 21) {
+        if (current->nal_unit_type != 21)
+            flag(svc_extension_flag);
+        else
+            flag(avc_3d_extension_flag);
+
+        if (current->svc_extension_flag) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "SVC not supported.\n");
+            return AVERROR_PATCHWELCOME;
+
+        } else if (current->avc_3d_extension_flag) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "3DAVC not supported.\n");
+            return AVERROR_PATCHWELCOME;
+
+        } else {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "MVC not supported.\n");
+            return AVERROR_PATCHWELCOME;
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(scaling_list)(CodedBitstreamContext *ctx, RWContext *rw,
+                              H264RawScalingList *current,
+                              int size_of_scaling_list)
+{
+    int err, i, scale;
+
+    scale = 8;
+    for (i = 0; i < size_of_scaling_list; i++) {
+        ses(delta_scale[i], -128, +127, 1, i);
+        scale = (scale + current->delta_scale[i] + 256) % 256;
+        if (scale == 0)
+            break;
+    }
+
+    return 0;
+}
+
+static int FUNC(hrd_parameters)(CodedBitstreamContext *ctx, RWContext *rw,
+                                H264RawHRD *current)
+{
+    int err, i;
+
+    ue(cpb_cnt_minus1, 0, 31);
+    u(4, bit_rate_scale, 0, 15);
+    u(4, cpb_size_scale, 0, 15);
+
+    for (i = 0; i <= current->cpb_cnt_minus1; i++) {
+        ues(bit_rate_value_minus1[i], 0, UINT32_MAX - 1, 1, i);
+        ues(cpb_size_value_minus1[i], 0, UINT32_MAX - 1, 1, i);
+        flags(cbr_flag[i], 1, i);
+    }
+
+    u(5, initial_cpb_removal_delay_length_minus1, 0, 31);
+    u(5, cpb_removal_delay_length_minus1,         0, 31);
+    u(5, dpb_output_delay_length_minus1,          0, 31);
+    u(5, time_offset_length,                      0, 31);
+
+    return 0;
+}
+
+static int FUNC(vui_parameters)(CodedBitstreamContext *ctx, RWContext *rw,
+                                H264RawVUI *current, H264RawSPS *sps)
+{
+    int err;
+
+    flag(aspect_ratio_info_present_flag);
+    if (current->aspect_ratio_info_present_flag) {
+        u(8, aspect_ratio_idc, 0, 255);
+        if (current->aspect_ratio_idc == 255) {
+            u(16, sar_width,  0, 65535);
+            u(16, sar_height, 0, 65535);
+        }
+    } else {
+        infer(aspect_ratio_idc, 0);
+    }
+
+    flag(overscan_info_present_flag);
+    if (current->overscan_info_present_flag)
+        flag(overscan_appropriate_flag);
+
+    flag(video_signal_type_present_flag);
+    if (current->video_signal_type_present_flag) {
+        u(3, video_format, 0, 7);
+        flag(video_full_range_flag);
+        flag(colour_description_present_flag);
+        if (current->colour_description_present_flag) {
+            u(8, colour_primaries,         0, 255);
+            u(8, transfer_characteristics, 0, 255);
+            u(8, matrix_coefficients,      0, 255);
+        }
+    } else {
+        infer(video_format,             5);
+        infer(video_full_range_flag,    0);
+        infer(colour_primaries,         2);
+        infer(transfer_characteristics, 2);
+        infer(matrix_coefficients,      2);
+    }
+
+    flag(chroma_loc_info_present_flag);
+    if (current->chroma_loc_info_present_flag) {
+        ue(chroma_sample_loc_type_top_field,    0, 5);
+        ue(chroma_sample_loc_type_bottom_field, 0, 5);
+    } else {
+        infer(chroma_sample_loc_type_top_field,    0);
+        infer(chroma_sample_loc_type_bottom_field, 0);
+    }
+
+    flag(timing_info_present_flag);
+    if (current->timing_info_present_flag) {
+        u(32, num_units_in_tick, 1, UINT32_MAX);
+        u(32, time_scale,        1, UINT32_MAX);
+        flag(fixed_frame_rate_flag);
+    } else {
+        infer(fixed_frame_rate_flag, 0);
+    }
+
+    flag(nal_hrd_parameters_present_flag);
+    if (current->nal_hrd_parameters_present_flag)
+        CHECK(FUNC(hrd_parameters)(ctx, rw, &current->nal_hrd_parameters));
+
+    flag(vcl_hrd_parameters_present_flag);
+    if (current->vcl_hrd_parameters_present_flag)
+        CHECK(FUNC(hrd_parameters)(ctx, rw, &current->vcl_hrd_parameters));
+
+    if (current->nal_hrd_parameters_present_flag ||
+        current->vcl_hrd_parameters_present_flag)
+        flag(low_delay_hrd_flag);
+    else
+        infer(low_delay_hrd_flag, 1 - current->fixed_frame_rate_flag);
+
+    flag(pic_struct_present_flag);
+
+    flag(bitstream_restriction_flag);
+    if (current->bitstream_restriction_flag) {
+        flag(motion_vectors_over_pic_boundaries_flag);
+        ue(max_bytes_per_pic_denom, 0, 16);
+        ue(max_bits_per_mb_denom,   0, 16);
+        // The current version of the standard constrains this to be in
+        // [0,15], but older versions allow 16.
+        ue(log2_max_mv_length_horizontal, 0, 16);
+        ue(log2_max_mv_length_vertical,   0, 16);
+        ue(max_num_reorder_frames,  0, H264_MAX_DPB_FRAMES);
+        ue(max_dec_frame_buffering, 0, H264_MAX_DPB_FRAMES);
+    } else {
+        infer(motion_vectors_over_pic_boundaries_flag, 1);
+        infer(max_bytes_per_pic_denom, 2);
+        infer(max_bits_per_mb_denom,   1);
+        infer(log2_max_mv_length_horizontal, 15);
+        infer(log2_max_mv_length_vertical,   15);
+
+        if ((sps->profile_idc ==  44 || sps->profile_idc ==  86 ||
+             sps->profile_idc == 100 || sps->profile_idc == 110 ||
+             sps->profile_idc == 122 || sps->profile_idc == 244) &&
+            sps->constraint_set3_flag) {
+            infer(max_num_reorder_frames,  0);
+            infer(max_dec_frame_buffering, 0);
+        } else {
+            infer(max_num_reorder_frames,  H264_MAX_DPB_FRAMES);
+            infer(max_dec_frame_buffering, H264_MAX_DPB_FRAMES);
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(vui_parameters_default)(CodedBitstreamContext *ctx,
+                                        RWContext *rw, H264RawVUI *current,
+                                        H264RawSPS *sps)
+{
+    infer(aspect_ratio_idc, 0);
+
+    infer(video_format,             5);
+    infer(video_full_range_flag,    0);
+    infer(colour_primaries,         2);
+    infer(transfer_characteristics, 2);
+    infer(matrix_coefficients,      2);
+
+    infer(chroma_sample_loc_type_top_field,    0);
+    infer(chroma_sample_loc_type_bottom_field, 0);
+
+    infer(fixed_frame_rate_flag, 0);
+    infer(low_delay_hrd_flag,    1);
+
+    infer(pic_struct_present_flag, 0);
+
+    infer(motion_vectors_over_pic_boundaries_flag, 1);
+    infer(max_bytes_per_pic_denom, 2);
+    infer(max_bits_per_mb_denom,   1);
+    infer(log2_max_mv_length_horizontal, 15);
+    infer(log2_max_mv_length_vertical,   15);
+
+    if ((sps->profile_idc ==  44 || sps->profile_idc ==  86 ||
+         sps->profile_idc == 100 || sps->profile_idc == 110 ||
+         sps->profile_idc == 122 || sps->profile_idc == 244) &&
+        sps->constraint_set3_flag) {
+        infer(max_num_reorder_frames,  0);
+        infer(max_dec_frame_buffering, 0);
+    } else {
+        infer(max_num_reorder_frames,  H264_MAX_DPB_FRAMES);
+        infer(max_dec_frame_buffering, H264_MAX_DPB_FRAMES);
+    }
+
+    return 0;
+}
+
+static int FUNC(sps)(CodedBitstreamContext *ctx, RWContext *rw,
+                     H264RawSPS *current)
+{
+    int err, i;
+
+    HEADER("Sequence Parameter Set");
+
+    CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header,
+                                1 << H264_NAL_SPS));
+
+    u(8, profile_idc, 0, 255);
+
+    flag(constraint_set0_flag);
+    flag(constraint_set1_flag);
+    flag(constraint_set2_flag);
+    flag(constraint_set3_flag);
+    flag(constraint_set4_flag);
+    flag(constraint_set5_flag);
+
+    u(2, reserved_zero_2bits,  0, 0);
+
+    u(8, level_idc, 0, 255);
+
+    ue(seq_parameter_set_id, 0, 31);
+
+    if (current->profile_idc == 100 || current->profile_idc == 110 ||
+        current->profile_idc == 122 || current->profile_idc == 244 ||
+        current->profile_idc ==  44 || current->profile_idc ==  83 ||
+        current->profile_idc ==  86 || current->profile_idc == 118 ||
+        current->profile_idc == 128 || current->profile_idc == 138) {
+        ue(chroma_format_idc, 0, 3);
+
+        if (current->chroma_format_idc == 3)
+            flag(separate_colour_plane_flag);
+        else
+            infer(separate_colour_plane_flag, 0);
+
+        ue(bit_depth_luma_minus8,   0, 6);
+        ue(bit_depth_chroma_minus8, 0, 6);
+
+        flag(qpprime_y_zero_transform_bypass_flag);
+
+        flag(seq_scaling_matrix_present_flag);
+        if (current->seq_scaling_matrix_present_flag) {
+            for (i = 0; i < ((current->chroma_format_idc != 3) ? 8 : 12); i++) {
+                flags(seq_scaling_list_present_flag[i], 1, i);
+                if (current->seq_scaling_list_present_flag[i]) {
+                    if (i < 6)
+                        CHECK(FUNC(scaling_list)(ctx, rw,
+                                                 &current->scaling_list_4x4[i],
+                                                 16));
+                    else
+                        CHECK(FUNC(scaling_list)(ctx, rw,
+                                                 &current->scaling_list_8x8[i - 6],
+                                                 64));
+                }
+            }
+        }
+    } else {
+        infer(chroma_format_idc, current->profile_idc == 183 ? 0 : 1);
+
+        infer(separate_colour_plane_flag, 0);
+        infer(bit_depth_luma_minus8,      0);
+        infer(bit_depth_chroma_minus8,    0);
+    }
+
+    ue(log2_max_frame_num_minus4, 0, 12);
+    ue(pic_order_cnt_type, 0, 2);
+
+    if (current->pic_order_cnt_type == 0) {
+        ue(log2_max_pic_order_cnt_lsb_minus4, 0, 12);
+    } else if (current->pic_order_cnt_type == 1) {
+        flag(delta_pic_order_always_zero_flag);
+        se(offset_for_non_ref_pic,         INT32_MIN + 1, INT32_MAX);
+        se(offset_for_top_to_bottom_field, INT32_MIN + 1, INT32_MAX);
+        ue(num_ref_frames_in_pic_order_cnt_cycle, 0, 255);
+
+        for (i = 0; i < current->num_ref_frames_in_pic_order_cnt_cycle; i++)
+            ses(offset_for_ref_frame[i], INT32_MIN + 1, INT32_MAX, 1, i);
+    }
+
+    ue(max_num_ref_frames, 0, H264_MAX_DPB_FRAMES);
+    flag(gaps_in_frame_num_allowed_flag);
+
+    ue(pic_width_in_mbs_minus1,        0, H264_MAX_MB_WIDTH);
+    ue(pic_height_in_map_units_minus1, 0, H264_MAX_MB_HEIGHT);
+
+    flag(frame_mbs_only_flag);
+    if (!current->frame_mbs_only_flag)
+        flag(mb_adaptive_frame_field_flag);
+
+    flag(direct_8x8_inference_flag);
+
+    flag(frame_cropping_flag);
+    if (current->frame_cropping_flag) {
+        ue(frame_crop_left_offset,   0, H264_MAX_WIDTH);
+        ue(frame_crop_right_offset,  0, H264_MAX_WIDTH);
+        ue(frame_crop_top_offset,    0, H264_MAX_HEIGHT);
+        ue(frame_crop_bottom_offset, 0, H264_MAX_HEIGHT);
+    }
+
+    flag(vui_parameters_present_flag);
+    if (current->vui_parameters_present_flag)
+        CHECK(FUNC(vui_parameters)(ctx, rw, &current->vui, current));
+    else
+        CHECK(FUNC(vui_parameters_default)(ctx, rw, &current->vui, current));
+
+    CHECK(FUNC(rbsp_trailing_bits)(ctx, rw));
+
+    return 0;
+}
+
+static int FUNC(sps_extension)(CodedBitstreamContext *ctx, RWContext *rw,
+                               H264RawSPSExtension *current)
+{
+    int err;
+
+    HEADER("Sequence Parameter Set Extension");
+
+    CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header,
+                                1 << H264_NAL_SPS_EXT));
+
+    ue(seq_parameter_set_id, 0, 31);
+
+    ue(aux_format_idc, 0, 3);
+
+    if (current->aux_format_idc != 0) {
+        int bits;
+
+        ue(bit_depth_aux_minus8, 0, 4);
+        flag(alpha_incr_flag);
+
+        bits = current->bit_depth_aux_minus8 + 9;
+        u(bits, alpha_opaque_value,      0, MAX_UINT_BITS(bits));
+        u(bits, alpha_transparent_value, 0, MAX_UINT_BITS(bits));
+    }
+
+    flag(additional_extension_flag);
+
+    CHECK(FUNC(rbsp_trailing_bits)(ctx, rw));
+
+    return 0;
+}
+
+static int FUNC(pps)(CodedBitstreamContext *ctx, RWContext *rw,
+                     H264RawPPS *current)
+{
+    CodedBitstreamH264Context *h264 = ctx->priv_data;
+    const H264RawSPS *sps;
+    int err, i;
+
+    HEADER("Picture Parameter Set");
+
+    CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header,
+                                1 << H264_NAL_PPS));
+
+    ue(pic_parameter_set_id, 0, 255);
+    ue(seq_parameter_set_id, 0, 31);
+
+    sps = h264->sps[current->seq_parameter_set_id];
+    if (!sps) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "SPS id %d not available.\n",
+               current->seq_parameter_set_id);
+        return AVERROR_INVALIDDATA;
+    }
+
+    flag(entropy_coding_mode_flag);
+    flag(bottom_field_pic_order_in_frame_present_flag);
+
+    ue(num_slice_groups_minus1, 0, 7);
+    if (current->num_slice_groups_minus1 > 0) {
+        unsigned int pic_size;
+        int iGroup;
+
+        pic_size = (sps->pic_width_in_mbs_minus1 + 1) *
+                   (sps->pic_height_in_map_units_minus1 + 1);
+
+        ue(slice_group_map_type, 0, 6);
+
+        if (current->slice_group_map_type == 0) {
+            for (iGroup = 0; iGroup <= current->num_slice_groups_minus1; iGroup++)
+                ues(run_length_minus1[iGroup], 0, pic_size - 1, 1, iGroup);
+
+        } else if (current->slice_group_map_type == 2) {
+            for (iGroup = 0; iGroup < current->num_slice_groups_minus1; iGroup++) {
+                ues(top_left[iGroup],       0, pic_size - 1, 1, iGroup);
+                ues(bottom_right[iGroup],
+                    current->top_left[iGroup], pic_size - 1, 1, iGroup);
+            }
+        } else if (current->slice_group_map_type == 3 ||
+                   current->slice_group_map_type == 4 ||
+                   current->slice_group_map_type == 5) {
+            flag(slice_group_change_direction_flag);
+            ue(slice_group_change_rate_minus1, 0, pic_size - 1);
+        } else if (current->slice_group_map_type == 6) {
+            ue(pic_size_in_map_units_minus1, pic_size - 1, pic_size - 1);
+
+            allocate(current->slice_group_id,
+                     current->pic_size_in_map_units_minus1 + 1);
+            for (i = 0; i <= current->pic_size_in_map_units_minus1; i++)
+                us(av_log2(2 * current->num_slice_groups_minus1 + 1),
+                   slice_group_id[i], 0, current->num_slice_groups_minus1, 1, i);
+        }
+    }
+
+    ue(num_ref_idx_l0_default_active_minus1, 0, 31);
+    ue(num_ref_idx_l1_default_active_minus1, 0, 31);
+
+    flag(weighted_pred_flag);
+    u(2, weighted_bipred_idc, 0, 2);
+
+    se(pic_init_qp_minus26, -26 - 6 * sps->bit_depth_luma_minus8, +25);
+    se(pic_init_qs_minus26, -26, +25);
+    se(chroma_qp_index_offset, -12, +12);
+
+    flag(deblocking_filter_control_present_flag);
+    flag(constrained_intra_pred_flag);
+    flag(redundant_pic_cnt_present_flag);
+
+    if (more_rbsp_data(current->more_rbsp_data))
+    {
+        flag(transform_8x8_mode_flag);
+
+        flag(pic_scaling_matrix_present_flag);
+        if (current->pic_scaling_matrix_present_flag) {
+            for (i = 0; i < 6 + (((sps->chroma_format_idc != 3) ? 2 : 6) *
+                                 current->transform_8x8_mode_flag); i++) {
+                flags(pic_scaling_list_present_flag[i], 1, i);
+                if (current->pic_scaling_list_present_flag[i]) {
+                    if (i < 6)
+                        CHECK(FUNC(scaling_list)(ctx, rw,
+                                                 &current->scaling_list_4x4[i],
+                                                 16));
+                    else
+                        CHECK(FUNC(scaling_list)(ctx, rw,
+                                                 &current->scaling_list_8x8[i - 6],
+                                                 64));
+                }
+            }
+        }
+
+        se(second_chroma_qp_index_offset, -12, +12);
+    } else {
+        infer(transform_8x8_mode_flag, 0);
+        infer(pic_scaling_matrix_present_flag, 0);
+        infer(second_chroma_qp_index_offset, current->chroma_qp_index_offset);
+    }
+
+    CHECK(FUNC(rbsp_trailing_bits)(ctx, rw));
+
+    return 0;
+}
+
+static int FUNC(sei_buffering_period)(CodedBitstreamContext *ctx, RWContext *rw,
+                                      H264RawSEIBufferingPeriod *current)
+{
+    CodedBitstreamH264Context *h264 = ctx->priv_data;
+    const H264RawSPS *sps;
+    int err, i, length;
+
+    ue(seq_parameter_set_id, 0, 31);
+
+    sps = h264->sps[current->seq_parameter_set_id];
+    if (!sps) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "SPS id %d not available.\n",
+               current->seq_parameter_set_id);
+        return AVERROR_INVALIDDATA;
+    }
+    h264->active_sps = sps;
+
+    if (sps->vui.nal_hrd_parameters_present_flag) {
+        for (i = 0; i <= sps->vui.nal_hrd_parameters.cpb_cnt_minus1; i++) {
+            length = sps->vui.nal_hrd_parameters.initial_cpb_removal_delay_length_minus1 + 1;
+            xu(length, initial_cpb_removal_delay[SchedSelIdx],
+               current->nal.initial_cpb_removal_delay[i],
+               1, MAX_UINT_BITS(length), 1, i);
+            xu(length, initial_cpb_removal_delay_offset[SchedSelIdx],
+               current->nal.initial_cpb_removal_delay_offset[i],
+               0, MAX_UINT_BITS(length), 1, i);
+        }
+    }
+
+    if (sps->vui.vcl_hrd_parameters_present_flag) {
+        for (i = 0; i <= sps->vui.vcl_hrd_parameters.cpb_cnt_minus1; i++) {
+            length = sps->vui.vcl_hrd_parameters.initial_cpb_removal_delay_length_minus1 + 1;
+            xu(length, initial_cpb_removal_delay[SchedSelIdx],
+               current->vcl.initial_cpb_removal_delay[i],
+               1, MAX_UINT_BITS(length), 1, i);
+            xu(length, initial_cpb_removal_delay_offset[SchedSelIdx],
+               current->vcl.initial_cpb_removal_delay_offset[i],
+               0, MAX_UINT_BITS(length), 1, i);
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(sei_pic_timestamp)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   H264RawSEIPicTimestamp *current)
+{
+    CodedBitstreamH264Context *h264 = ctx->priv_data;
+    const H264RawSPS *sps;
+    uint8_t time_offset_length;
+    int err;
+
+    u(2, ct_type, 0, 2);
+    flag(nuit_field_based_flag);
+    u(5, counting_type, 0, 6);
+    flag(full_timestamp_flag);
+    flag(discontinuity_flag);
+    flag(cnt_dropped_flag);
+    u(8, n_frames, 0, 255);
+    if (current->full_timestamp_flag) {
+            u(6, seconds_value, 0, 59);
+            u(6, minutes_value, 0, 59);
+            u(5, hours_value,   0, 23);
+    } else {
+        flag(seconds_flag);
+        if (current->seconds_flag) {
+            u(6, seconds_value, 0, 59);
+            flag(minutes_flag);
+            if (current->minutes_flag) {
+                u(6, minutes_value, 0, 59);
+                flag(hours_flag);
+                if (current->hours_flag)
+                    u(5, hours_value, 0, 23);
+            }
+        }
+    }
+
+    sps = h264->active_sps;
+    if (!sps) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR,
+               "No active SPS for pic_timestamp.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (sps->vui.nal_hrd_parameters_present_flag)
+        time_offset_length = sps->vui.nal_hrd_parameters.time_offset_length;
+    else if (sps->vui.vcl_hrd_parameters_present_flag)
+        time_offset_length = sps->vui.vcl_hrd_parameters.time_offset_length;
+    else
+        time_offset_length = 24;
+
+    if (time_offset_length > 0)
+        u(time_offset_length, time_offset,
+          0, MAX_UINT_BITS(time_offset_length));
+    else
+        infer(time_offset, 0);
+
+    return 0;
+}
+
+static int FUNC(sei_pic_timing)(CodedBitstreamContext *ctx, RWContext *rw,
+                                H264RawSEIPicTiming *current)
+{
+    CodedBitstreamH264Context *h264 = ctx->priv_data;
+    const H264RawSPS *sps;
+    int err;
+
+    sps = h264->active_sps;
+    if (!sps) {
+        // If there is exactly one possible SPS but it is not yet active
+        // then just assume that it should be the active one.
+        int i, k = -1;
+        for (i = 0; i < H264_MAX_SPS_COUNT; i++) {
+            if (h264->sps[i]) {
+                if (k >= 0) {
+                    k = -1;
+                    break;
+                }
+                k = i;
+            }
+        }
+        if (k >= 0)
+            sps = h264->sps[k];
+    }
+    if (!sps) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR,
+               "No active SPS for pic_timing.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (sps->vui.nal_hrd_parameters_present_flag ||
+        sps->vui.vcl_hrd_parameters_present_flag) {
+        const H264RawHRD *hrd;
+
+        if (sps->vui.nal_hrd_parameters_present_flag)
+            hrd = &sps->vui.nal_hrd_parameters;
+        else if (sps->vui.vcl_hrd_parameters_present_flag)
+            hrd = &sps->vui.vcl_hrd_parameters;
+        else {
+            av_log(ctx->log_ctx, AV_LOG_ERROR,
+                   "No HRD parameters for pic_timing.\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        u(hrd->cpb_removal_delay_length_minus1 + 1, cpb_removal_delay,
+          0, MAX_UINT_BITS(hrd->cpb_removal_delay_length_minus1 + 1));
+        u(hrd->dpb_output_delay_length_minus1 + 1, dpb_output_delay,
+          0, MAX_UINT_BITS(hrd->dpb_output_delay_length_minus1 + 1));
+    }
+
+    if (sps->vui.pic_struct_present_flag) {
+        static const int num_clock_ts[9] = {
+            1, 1, 1, 2, 2, 3, 3, 2, 3
+        };
+        int i;
+
+        u(4, pic_struct, 0, 8);
+        if (current->pic_struct > 8)
+            return AVERROR_INVALIDDATA;
+
+        for (i = 0; i < num_clock_ts[current->pic_struct]; i++) {
+            flags(clock_timestamp_flag[i], 1, i);
+            if (current->clock_timestamp_flag[i])
+                CHECK(FUNC(sei_pic_timestamp)(ctx, rw, &current->timestamp[i]));
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(sei_pan_scan_rect)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   H264RawSEIPanScanRect *current)
+{
+    int err, i;
+
+    ue(pan_scan_rect_id, 0, UINT32_MAX - 1);
+    flag(pan_scan_rect_cancel_flag);
+
+    if (!current->pan_scan_rect_cancel_flag) {
+        ue(pan_scan_cnt_minus1, 0, 2);
+
+        for (i = 0; i <= current->pan_scan_cnt_minus1; i++) {
+            ses(pan_scan_rect_left_offset[i],   INT32_MIN + 1, INT32_MAX, 1, i);
+            ses(pan_scan_rect_right_offset[i],  INT32_MIN + 1, INT32_MAX, 1, i);
+            ses(pan_scan_rect_top_offset[i],    INT32_MIN + 1, INT32_MAX, 1, i);
+            ses(pan_scan_rect_bottom_offset[i], INT32_MIN + 1, INT32_MAX, 1, i);
+        }
+
+        ue(pan_scan_rect_repetition_period, 0, 16384);
+    }
+
+    return 0;
+}
+
+static int FUNC(sei_user_data_registered)(CodedBitstreamContext *ctx, RWContext *rw,
+                                          H264RawSEIUserDataRegistered *current,
+                                          uint32_t *payload_size)
+{
+    int err, i, j;
+
+    u(8, itu_t_t35_country_code, 0x00, 0xff);
+    if (current->itu_t_t35_country_code != 0xff)
+        i = 1;
+    else {
+        u(8, itu_t_t35_country_code_extension_byte, 0x00, 0xff);
+        i = 2;
+    }
+
+#ifdef READ
+    if (*payload_size < i) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR,
+               "Invalid SEI user data registered payload.\n");
+        return AVERROR_INVALIDDATA;
+    }
+    current->data_length = *payload_size - i;
+#else
+    *payload_size = i + current->data_length;
+#endif
+
+    allocate(current->data, current->data_length + AV_INPUT_BUFFER_PADDING_SIZE);
+    for (j = 0; j < current->data_length; j++)
+        xu(8, itu_t_t35_payload_byte[i], current->data[j], 0x00, 0xff, 1, i + j);
+
+    return 0;
+}
+
+static int FUNC(sei_user_data_unregistered)(CodedBitstreamContext *ctx, RWContext *rw,
+                                            H264RawSEIUserDataUnregistered *current,
+                                            uint32_t *payload_size)
+{
+    int err, i;
+
+#ifdef READ
+    if (*payload_size < 16) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR,
+               "Invalid SEI user data unregistered payload.\n");
+        return AVERROR_INVALIDDATA;
+    }
+    current->data_length = *payload_size - 16;
+#else
+    *payload_size = 16 + current->data_length;
+#endif
+
+    for (i = 0; i < 16; i++)
+        us(8, uuid_iso_iec_11578[i], 0x00, 0xff, 1, i);
+
+    allocate(current->data, current->data_length);
+
+    for (i = 0; i < current->data_length; i++)
+        xu(8, user_data_payload_byte[i], current->data[i], 0x00, 0xff, 1, i);
+
+    return 0;
+}
+
+static int FUNC(sei_recovery_point)(CodedBitstreamContext *ctx, RWContext *rw,
+                                    H264RawSEIRecoveryPoint *current)
+{
+    int err;
+
+    ue(recovery_frame_cnt, 0, 65535);
+    flag(exact_match_flag);
+    flag(broken_link_flag);
+    u(2, changing_slice_group_idc, 0, 2);
+
+    return 0;
+}
+
+static int FUNC(sei_display_orientation)(CodedBitstreamContext *ctx, RWContext *rw,
+                                         H264RawSEIDisplayOrientation *current)
+{
+    int err;
+
+    flag(display_orientation_cancel_flag);
+    if (!current->display_orientation_cancel_flag) {
+        flag(hor_flip);
+        flag(ver_flip);
+        u(16, anticlockwise_rotation, 0, 65535);
+        ue(display_orientation_repetition_period, 0, 16384);
+        flag(display_orientation_extension_flag);
+    }
+
+    return 0;
+}
+
+static int FUNC(sei_mastering_display_colour_volume)(CodedBitstreamContext *ctx, RWContext *rw,
+                                                     H264RawSEIMasteringDisplayColourVolume *current)
+{
+    int err, c;
+
+    for (c = 0; c < 3; c++) {
+        us(16, display_primaries_x[c], 0, 50000, 1, c);
+        us(16, display_primaries_y[c], 0, 50000, 1, c);
+    }
+
+    u(16, white_point_x, 0, 50000);
+    u(16, white_point_y, 0, 50000);
+
+    u(32, max_display_mastering_luminance, 1, MAX_UINT_BITS(32));
+    u(32, min_display_mastering_luminance, 0, current->max_display_mastering_luminance - 1);
+
+    return 0;
+}
+
+static int FUNC(sei_payload)(CodedBitstreamContext *ctx, RWContext *rw,
+                             H264RawSEIPayload *current)
+{
+    int err, i;
+    int start_position, end_position;
+
+#ifdef READ
+    start_position = get_bits_count(rw);
+#else
+    start_position = put_bits_count(rw);
+#endif
+
+    switch (current->payload_type) {
+    case H264_SEI_TYPE_BUFFERING_PERIOD:
+        CHECK(FUNC(sei_buffering_period)
+              (ctx, rw, &current->payload.buffering_period));
+        break;
+    case H264_SEI_TYPE_PIC_TIMING:
+        CHECK(FUNC(sei_pic_timing)
+              (ctx, rw, &current->payload.pic_timing));
+        break;
+    case H264_SEI_TYPE_PAN_SCAN_RECT:
+        CHECK(FUNC(sei_pan_scan_rect)
+              (ctx, rw, &current->payload.pan_scan_rect));
+        break;
+    case H264_SEI_TYPE_FILLER_PAYLOAD:
+        {
+            for (i = 0; i  < current->payload_size; i++)
+                fixed(8, ff_byte, 0xff);
+        }
+        break;
+    case H264_SEI_TYPE_USER_DATA_REGISTERED:
+        CHECK(FUNC(sei_user_data_registered)
+              (ctx, rw, &current->payload.user_data_registered, &current->payload_size));
+        break;
+    case H264_SEI_TYPE_USER_DATA_UNREGISTERED:
+        CHECK(FUNC(sei_user_data_unregistered)
+              (ctx, rw, &current->payload.user_data_unregistered, &current->payload_size));
+        break;
+    case H264_SEI_TYPE_RECOVERY_POINT:
+        CHECK(FUNC(sei_recovery_point)
+              (ctx, rw, &current->payload.recovery_point));
+        break;
+    case H264_SEI_TYPE_DISPLAY_ORIENTATION:
+        CHECK(FUNC(sei_display_orientation)
+              (ctx, rw, &current->payload.display_orientation));
+        break;
+    case H264_SEI_TYPE_MASTERING_DISPLAY_COLOUR_VOLUME:
+        CHECK(FUNC(sei_mastering_display_colour_volume)
+              (ctx, rw, &current->payload.mastering_display_colour_volume));
+        break;
+    default:
+        {
+#ifdef READ
+            current->payload.other.data_length = current->payload_size;
+#endif
+            allocate(current->payload.other.data, current->payload.other.data_length);
+            for (i = 0; i < current->payload.other.data_length; i++)
+                xu(8, payload_byte[i], current->payload.other.data[i], 0, 255, 1, i);
+        }
+    }
+
+    if (byte_alignment(rw)) {
+        fixed(1, bit_equal_to_one, 1);
+        while (byte_alignment(rw))
+            fixed(1, bit_equal_to_zero, 0);
+    }
+
+#ifdef READ
+    end_position = get_bits_count(rw);
+    if (end_position < start_position + 8 * current->payload_size) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Incorrect SEI payload length: "
+               "header %"PRIu32" bits, actually %d bits.\n",
+               8 * current->payload_size,
+               end_position - start_position);
+        return AVERROR_INVALIDDATA;
+    }
+#else
+    end_position = put_bits_count(rw);
+    current->payload_size = (end_position - start_position) / 8;
+#endif
+
+    return 0;
+}
+
+static int FUNC(sei)(CodedBitstreamContext *ctx, RWContext *rw,
+                     H264RawSEI *current)
+{
+    int err, k;
+
+    HEADER("Supplemental Enhancement Information");
+
+    CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header,
+                                1 << H264_NAL_SEI));
+
+#ifdef READ
+    for (k = 0; k < H264_MAX_SEI_PAYLOADS; k++) {
+        uint32_t payload_type = 0;
+        uint32_t payload_size = 0;
+        uint32_t tmp;
+
+        while (show_bits(rw, 8) == 0xff) {
+            fixed(8, ff_byte, 0xff);
+            payload_type += 255;
+        }
+        xu(8, last_payload_type_byte, tmp, 0, 254, 0);
+        payload_type += tmp;
+
+        while (show_bits(rw, 8) == 0xff) {
+            fixed(8, ff_byte, 0xff);
+            payload_size += 255;
+        }
+        xu(8, last_payload_size_byte, tmp, 0, 254, 0);
+        payload_size += tmp;
+
+        current->payload[k].payload_type = payload_type;
+        current->payload[k].payload_size = payload_size;
+
+        CHECK(FUNC(sei_payload)(ctx, rw, &current->payload[k]));
+
+        if (!cbs_h2645_read_more_rbsp_data(rw))
+            break;
+    }
+    if (k >= H264_MAX_SEI_PAYLOADS) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Too many payloads in "
+               "SEI message: found %d.\n", k);
+        return AVERROR_INVALIDDATA;
+    }
+    current->payload_count = k + 1;
+#else
+    for (k = 0; k < current->payload_count; k++) {
+        PutBitContext start_state;
+        uint32_t tmp;
+        int need_size, i;
+
+        // Somewhat clumsy: we write the payload twice when
+        // we don't know the size in advance.  This will mess
+        // with trace output, but is otherwise harmless.
+        start_state = *rw;
+        need_size = !current->payload[k].payload_size;
+        for (i = 0; i < 1 + need_size; i++) {
+            *rw = start_state;
+
+            tmp = current->payload[k].payload_type;
+            while (tmp >= 255) {
+                fixed(8, ff_byte, 0xff);
+                tmp -= 255;
+            }
+            xu(8, last_payload_type_byte, tmp, 0, 254, 0);
+
+            tmp = current->payload[k].payload_size;
+            while (tmp >= 255) {
+                fixed(8, ff_byte, 0xff);
+                tmp -= 255;
+            }
+            xu(8, last_payload_size_byte, tmp, 0, 254, 0);
+
+            CHECK(FUNC(sei_payload)(ctx, rw, &current->payload[k]));
+        }
+    }
+#endif
+
+    CHECK(FUNC(rbsp_trailing_bits)(ctx, rw));
+
+    return 0;
+}
+
+static int FUNC(aud)(CodedBitstreamContext *ctx, RWContext *rw,
+                     H264RawAUD *current)
+{
+    int err;
+
+    HEADER("Access Unit Delimiter");
+
+    CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header,
+                                1 << H264_NAL_AUD));
+
+    u(3, primary_pic_type, 0, 7);
+
+    CHECK(FUNC(rbsp_trailing_bits)(ctx, rw));
+
+    return 0;
+}
+
+static int FUNC(ref_pic_list_modification)(CodedBitstreamContext *ctx, RWContext *rw,
+                                           H264RawSliceHeader *current)
+{
+    CodedBitstreamH264Context *h264 = ctx->priv_data;
+    const H264RawSPS *sps = h264->active_sps;
+    int err, i, mopn;
+
+    if (current->slice_type % 5 != 2 &&
+        current->slice_type % 5 != 4) {
+        flag(ref_pic_list_modification_flag_l0);
+        if (current->ref_pic_list_modification_flag_l0) {
+            for (i = 0; i < H264_MAX_RPLM_COUNT; i++) {
+                xue(modification_of_pic_nums_idc,
+                    current->rplm_l0[i].modification_of_pic_nums_idc, 0, 3, 0);
+
+                mopn = current->rplm_l0[i].modification_of_pic_nums_idc;
+                if (mopn == 3)
+                    break;
+
+                if (mopn == 0 || mopn == 1)
+                    xue(abs_diff_pic_num_minus1,
+                        current->rplm_l0[i].abs_diff_pic_num_minus1,
+                        0, (1 + current->field_pic_flag) *
+                        (1 << (sps->log2_max_frame_num_minus4 + 4)), 0);
+                else if (mopn == 2)
+                    xue(long_term_pic_num,
+                        current->rplm_l0[i].long_term_pic_num,
+                        0, sps->max_num_ref_frames - 1, 0);
+            }
+        }
+    }
+
+    if (current->slice_type % 5 == 1) {
+        flag(ref_pic_list_modification_flag_l1);
+        if (current->ref_pic_list_modification_flag_l1) {
+            for (i = 0; i < H264_MAX_RPLM_COUNT; i++) {
+                xue(modification_of_pic_nums_idc,
+                    current->rplm_l1[i].modification_of_pic_nums_idc, 0, 3, 0);
+
+                mopn = current->rplm_l1[i].modification_of_pic_nums_idc;
+                if (mopn == 3)
+                    break;
+
+                if (mopn == 0 || mopn == 1)
+                    xue(abs_diff_pic_num_minus1,
+                        current->rplm_l1[i].abs_diff_pic_num_minus1,
+                        0, (1 + current->field_pic_flag) *
+                        (1 << (sps->log2_max_frame_num_minus4 + 4)), 0);
+                else if (mopn == 2)
+                    xue(long_term_pic_num,
+                        current->rplm_l1[i].long_term_pic_num,
+                        0, sps->max_num_ref_frames - 1, 0);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(pred_weight_table)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   H264RawSliceHeader *current)
+{
+    CodedBitstreamH264Context *h264 = ctx->priv_data;
+    const H264RawSPS *sps = h264->active_sps;
+    int chroma;
+    int err, i, j;
+
+    ue(luma_log2_weight_denom, 0, 7);
+
+    chroma = !sps->separate_colour_plane_flag && sps->chroma_format_idc != 0;
+    if (chroma)
+        ue(chroma_log2_weight_denom, 0, 7);
+
+    for (i = 0; i <= current->num_ref_idx_l0_active_minus1; i++) {
+        flags(luma_weight_l0_flag[i], 1, i);
+        if (current->luma_weight_l0_flag[i]) {
+            ses(luma_weight_l0[i], -128, +127, 1, i);
+            ses(luma_offset_l0[i], -128, +127, 1, i);
+        }
+        if (chroma) {
+            flags(chroma_weight_l0_flag[i], 1, i);
+            if (current->chroma_weight_l0_flag[i]) {
+                for (j = 0; j < 2; j++) {
+                    ses(chroma_weight_l0[i][j], -128, +127, 2, i, j);
+                    ses(chroma_offset_l0[i][j], -128, +127, 2, i, j);
+                }
+            }
+        }
+    }
+
+    if (current->slice_type % 5 == 1) {
+        for (i = 0; i <= current->num_ref_idx_l1_active_minus1; i++) {
+            flags(luma_weight_l1_flag[i], 1, i);
+            if (current->luma_weight_l1_flag[i]) {
+                ses(luma_weight_l1[i], -128, +127, 1, i);
+                ses(luma_offset_l1[i], -128, +127, 1, i);
+            }
+            if (chroma) {
+                flags(chroma_weight_l1_flag[i], 1, i);
+                if (current->chroma_weight_l1_flag[i]) {
+                    for (j = 0; j < 2; j++) {
+                        ses(chroma_weight_l1[i][j], -128, +127, 2, i, j);
+                        ses(chroma_offset_l1[i][j], -128, +127, 2, i, j);
+                    }
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(dec_ref_pic_marking)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     H264RawSliceHeader *current, int idr_pic_flag)
+{
+    CodedBitstreamH264Context *h264 = ctx->priv_data;
+    const H264RawSPS *sps = h264->active_sps;
+    int err, i;
+    uint32_t mmco;
+
+    if (idr_pic_flag) {
+        flag(no_output_of_prior_pics_flag);
+        flag(long_term_reference_flag);
+    } else {
+        flag(adaptive_ref_pic_marking_mode_flag);
+        if (current->adaptive_ref_pic_marking_mode_flag) {
+            for (i = 0; i < H264_MAX_MMCO_COUNT; i++) {
+                xue(memory_management_control_operation,
+                    current->mmco[i].memory_management_control_operation,
+                    0, 6, 0);
+
+                mmco = current->mmco[i].memory_management_control_operation;
+                if (mmco == 0)
+                    break;
+
+                if (mmco == 1 || mmco == 3)
+                    xue(difference_of_pic_nums_minus1,
+                        current->mmco[i].difference_of_pic_nums_minus1,
+                        0, INT32_MAX, 0);
+                if (mmco == 2)
+                    xue(long_term_pic_num,
+                        current->mmco[i].long_term_pic_num,
+                        0, sps->max_num_ref_frames - 1, 0);
+                if (mmco == 3 || mmco == 6)
+                    xue(long_term_frame_idx,
+                        current->mmco[i].long_term_frame_idx,
+                        0, sps->max_num_ref_frames - 1, 0);
+                if (mmco == 4)
+                    xue(max_long_term_frame_idx_plus1,
+                        current->mmco[i].max_long_term_frame_idx_plus1,
+                        0, sps->max_num_ref_frames, 0);
+            }
+            if (i == H264_MAX_MMCO_COUNT) {
+                av_log(ctx->log_ctx, AV_LOG_ERROR, "Too many "
+                       "memory management control operations.\n");
+                return AVERROR_INVALIDDATA;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(slice_header)(CodedBitstreamContext *ctx, RWContext *rw,
+                              H264RawSliceHeader *current)
+{
+    CodedBitstreamH264Context *h264 = ctx->priv_data;
+    const H264RawSPS *sps;
+    const H264RawPPS *pps;
+    int err;
+    int idr_pic_flag;
+    int slice_type_i, slice_type_p, slice_type_b;
+    int slice_type_si, slice_type_sp;
+
+    HEADER("Slice Header");
+
+    CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header,
+                                1 << H264_NAL_SLICE     |
+                                1 << H264_NAL_IDR_SLICE |
+                                1 << H264_NAL_AUXILIARY_SLICE));
+
+    if (current->nal_unit_header.nal_unit_type == H264_NAL_AUXILIARY_SLICE) {
+        if (!h264->last_slice_nal_unit_type) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Auxiliary slice "
+                   "is not decodable without the main picture "
+                   "in the same access unit.\n");
+            return AVERROR_INVALIDDATA;
+        }
+    } else {
+        h264->last_slice_nal_unit_type =
+            current->nal_unit_header.nal_unit_type;
+    }
+    idr_pic_flag = h264->last_slice_nal_unit_type == H264_NAL_IDR_SLICE;
+
+    ue(first_mb_in_slice, 0, H264_MAX_MB_PIC_SIZE - 1);
+    ue(slice_type, 0, 9);
+
+    slice_type_i  = current->slice_type % 5 == 2;
+    slice_type_p  = current->slice_type % 5 == 0;
+    slice_type_b  = current->slice_type % 5 == 1;
+    slice_type_si = current->slice_type % 5 == 4;
+    slice_type_sp = current->slice_type % 5 == 3;
+
+    if (idr_pic_flag && !(slice_type_i || slice_type_si)) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid slice type %d "
+               "for IDR picture.\n", current->slice_type);
+        return AVERROR_INVALIDDATA;
+    }
+
+    ue(pic_parameter_set_id, 0, 255);
+
+    pps = h264->pps[current->pic_parameter_set_id];
+    if (!pps) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "PPS id %d not available.\n",
+               current->pic_parameter_set_id);
+        return AVERROR_INVALIDDATA;
+    }
+    h264->active_pps = pps;
+
+    sps = h264->sps[pps->seq_parameter_set_id];
+    if (!sps) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "SPS id %d not available.\n",
+               pps->seq_parameter_set_id);
+        return AVERROR_INVALIDDATA;
+    }
+    h264->active_sps = sps;
+
+    if (sps->separate_colour_plane_flag)
+        u(2, colour_plane_id, 0, 2);
+
+    u(sps->log2_max_frame_num_minus4 + 4, frame_num,
+      0, MAX_UINT_BITS(sps->log2_max_frame_num_minus4 + 4));
+
+    if (!sps->frame_mbs_only_flag) {
+        flag(field_pic_flag);
+        if (current->field_pic_flag)
+            flag(bottom_field_flag);
+        else
+            infer(bottom_field_flag, 0);
+    } else {
+        infer(field_pic_flag,    0);
+        infer(bottom_field_flag, 0);
+    }
+
+    if (idr_pic_flag)
+        ue(idr_pic_id, 0, 65535);
+
+    if (sps->pic_order_cnt_type == 0) {
+        u(sps->log2_max_pic_order_cnt_lsb_minus4 + 4, pic_order_cnt_lsb,
+          0, MAX_UINT_BITS(sps->log2_max_pic_order_cnt_lsb_minus4 + 4));
+        if (pps->bottom_field_pic_order_in_frame_present_flag &&
+            !current->field_pic_flag)
+            se(delta_pic_order_cnt_bottom, INT32_MIN + 1, INT32_MAX);
+
+    } else if (sps->pic_order_cnt_type == 1) {
+        if (!sps->delta_pic_order_always_zero_flag) {
+            se(delta_pic_order_cnt[0], INT32_MIN + 1, INT32_MAX);
+            if (pps->bottom_field_pic_order_in_frame_present_flag &&
+                !current->field_pic_flag)
+                se(delta_pic_order_cnt[1], INT32_MIN + 1, INT32_MAX);
+            else
+                infer(delta_pic_order_cnt[1], 0);
+        } else {
+            infer(delta_pic_order_cnt[0], 0);
+            infer(delta_pic_order_cnt[1], 0);
+        }
+    }
+
+    if (pps->redundant_pic_cnt_present_flag)
+        ue(redundant_pic_cnt, 0, 127);
+
+    if (slice_type_b)
+        flag(direct_spatial_mv_pred_flag);
+
+    if (slice_type_p || slice_type_sp || slice_type_b) {
+        flag(num_ref_idx_active_override_flag);
+        if (current->num_ref_idx_active_override_flag) {
+            ue(num_ref_idx_l0_active_minus1, 0, 31);
+            if (slice_type_b)
+                ue(num_ref_idx_l1_active_minus1, 0, 31);
+        } else {
+            infer(num_ref_idx_l0_active_minus1,
+                  pps->num_ref_idx_l0_default_active_minus1);
+            infer(num_ref_idx_l1_active_minus1,
+                  pps->num_ref_idx_l1_default_active_minus1);
+        }
+    }
+
+    if (current->nal_unit_header.nal_unit_type == 20 ||
+        current->nal_unit_header.nal_unit_type == 21) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "MVC / 3DAVC not supported.\n");
+        return AVERROR_PATCHWELCOME;
+    } else {
+        CHECK(FUNC(ref_pic_list_modification)(ctx, rw, current));
+    }
+
+    if ((pps->weighted_pred_flag && (slice_type_p || slice_type_sp)) ||
+        (pps->weighted_bipred_idc == 1 && slice_type_b)) {
+        CHECK(FUNC(pred_weight_table)(ctx, rw, current));
+    }
+
+    if (current->nal_unit_header.nal_ref_idc != 0) {
+        CHECK(FUNC(dec_ref_pic_marking)(ctx, rw, current, idr_pic_flag));
+    }
+
+    if (pps->entropy_coding_mode_flag &&
+        !slice_type_i && !slice_type_si) {
+        ue(cabac_init_idc, 0, 2);
+    }
+
+    se(slice_qp_delta, - 51 - 6 * sps->bit_depth_luma_minus8,
+                       + 51 + 6 * sps->bit_depth_luma_minus8);
+    if (slice_type_sp || slice_type_si) {
+        if (slice_type_sp)
+            flag(sp_for_switch_flag);
+        se(slice_qs_delta, -51, +51);
+    }
+
+    if (pps->deblocking_filter_control_present_flag) {
+        ue(disable_deblocking_filter_idc, 0, 2);
+        if (current->disable_deblocking_filter_idc != 1) {
+            se(slice_alpha_c0_offset_div2, -6, +6);
+            se(slice_beta_offset_div2,     -6, +6);
+        } else {
+            infer(slice_alpha_c0_offset_div2, 0);
+            infer(slice_beta_offset_div2,     0);
+        }
+    } else {
+        infer(disable_deblocking_filter_idc, 0);
+        infer(slice_alpha_c0_offset_div2,    0);
+        infer(slice_beta_offset_div2,        0);
+    }
+
+    if (pps->num_slice_groups_minus1 > 0 &&
+        pps->slice_group_map_type >= 3 &&
+        pps->slice_group_map_type <= 5) {
+        unsigned int pic_size, max, bits;
+
+        pic_size = (sps->pic_width_in_mbs_minus1 + 1) *
+                   (sps->pic_height_in_map_units_minus1 + 1);
+        max = (pic_size + pps->slice_group_change_rate_minus1) /
+              (pps->slice_group_change_rate_minus1 + 1);
+        bits = av_log2(2 * max - 1);
+
+        u(bits, slice_group_change_cycle, 0, max);
+    }
+
+    if (pps->entropy_coding_mode_flag) {
+        while (byte_alignment(rw))
+            fixed(1, cabac_alignment_one_bit, 1);
+    }
+
+    return 0;
+}
+
+static int FUNC(filler)(CodedBitstreamContext *ctx, RWContext *rw,
+                        H264RawFiller *current)
+{
+    int err;
+
+    HEADER("Filler Data");
+
+    CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header,
+                                1 << H264_NAL_FILLER_DATA));
+
+#ifdef READ
+    while (show_bits(rw, 8) == 0xff) {
+        fixed(8, ff_byte, 0xff);
+        ++current->filler_size;
+    }
+#else
+    {
+        uint32_t i;
+        for (i = 0; i < current->filler_size; i++)
+            fixed(8, ff_byte, 0xff);
+    }
+#endif
+
+    CHECK(FUNC(rbsp_trailing_bits)(ctx, rw));
+
+    return 0;
+}

diff --git a/libavcodec/cbs_h265.h b/libavcodec/cbs_h265.h
new file mode 100644
index 0000000..cca1d75
--- /dev/null
+++ b/libavcodec/cbs_h265.h

@@ -0,0 +1,584 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CBS_H265_H
+#define AVCODEC_CBS_H265_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "cbs_h2645.h"
+#include "hevc.h"
+
+enum {
+    // This limit is arbitrary - it is sufficient for one message of each
+    // type plus some repeats, and will therefore easily cover all sane
+    // streams.  However, it is possible to make technically-valid streams
+    // for which it will fail (for example, by including a large number of
+    // user-data-unregistered messages).
+    H265_MAX_SEI_PAYLOADS = 64,
+};
+
+typedef struct H265RawNALUnitHeader {
+    uint8_t forbidden_zero_bit;
+    uint8_t nal_unit_type;
+    uint8_t nuh_layer_id;
+    uint8_t nuh_temporal_id_plus1;
+} H265RawNALUnitHeader;
+
+typedef struct H265RawProfileTierLevel {
+    uint8_t general_profile_space;
+    uint8_t general_tier_flag;
+    uint8_t general_profile_idc;
+
+    uint8_t general_profile_compatibility_flag[32];
+
+    uint8_t general_progressive_source_flag;
+    uint8_t general_interlaced_source_flag;
+    uint8_t general_non_packed_constraint_flag;
+    uint8_t general_frame_only_constraint_flag;
+
+    uint8_t general_max_12bit_constraint_flag;
+    uint8_t general_max_10bit_constraint_flag;
+    uint8_t general_max_8bit_constraint_flag;
+    uint8_t general_max_422chroma_constraint_flag;
+    uint8_t general_max_420chroma_constraint_flag;
+    uint8_t general_max_monochrome_constraint_flag;
+    uint8_t general_intra_constraint_flag;
+    uint8_t general_one_picture_only_constraint_flag;
+    uint8_t general_lower_bit_rate_constraint_flag;
+    uint8_t general_max_14bit_constraint_flag;
+
+    uint8_t general_inbld_flag;
+
+    uint8_t general_level_idc;
+
+    uint8_t sub_layer_profile_present_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_level_present_flag[HEVC_MAX_SUB_LAYERS];
+
+    // TODO: much of that again for each sub-layer.
+} H265RawProfileTierLevel;
+
+typedef struct H265RawSubLayerHRDParameters {
+    uint32_t bit_rate_value_minus1[HEVC_MAX_CPB_CNT];
+    uint32_t cpb_size_value_minus1[HEVC_MAX_CPB_CNT];
+    uint32_t cpb_size_du_value_minus1[HEVC_MAX_CPB_CNT];
+    uint32_t bit_rate_du_value_minus1[HEVC_MAX_CPB_CNT];
+    uint8_t cbr_flag[HEVC_MAX_CPB_CNT];
+} H265RawSubLayerHRDParameters;
+
+typedef struct H265RawHRDParameters {
+    uint8_t nal_hrd_parameters_present_flag;
+    uint8_t vcl_hrd_parameters_present_flag;
+
+    uint8_t sub_pic_hrd_params_present_flag;
+    uint8_t tick_divisor_minus2;
+    uint8_t du_cpb_removal_delay_increment_length_minus1;
+    uint8_t sub_pic_cpb_params_in_pic_timing_sei_flag;
+    uint8_t dpb_output_delay_du_length_minus1;
+
+    uint8_t bit_rate_scale;
+    uint8_t cpb_size_scale;
+    uint8_t cpb_size_du_scale;
+
+    uint8_t initial_cpb_removal_delay_length_minus1;
+    uint8_t au_cpb_removal_delay_length_minus1;
+    uint8_t dpb_output_delay_length_minus1;
+
+    uint8_t fixed_pic_rate_general_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t fixed_pic_rate_within_cvs_flag[HEVC_MAX_SUB_LAYERS];
+    uint16_t elemental_duration_in_tc_minus1[HEVC_MAX_SUB_LAYERS];
+    uint8_t low_delay_hrd_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t cpb_cnt_minus1[HEVC_MAX_SUB_LAYERS];
+    H265RawSubLayerHRDParameters nal_sub_layer_hrd_parameters[HEVC_MAX_SUB_LAYERS];
+    H265RawSubLayerHRDParameters vcl_sub_layer_hrd_parameters[HEVC_MAX_SUB_LAYERS];
+} H265RawHRDParameters;
+
+typedef struct H265RawVUI {
+    uint8_t aspect_ratio_info_present_flag;
+    uint8_t aspect_ratio_idc;
+    uint16_t sar_width;
+    uint16_t sar_height;
+
+    uint8_t overscan_info_present_flag;
+    uint8_t overscan_appropriate_flag;
+
+    uint8_t video_signal_type_present_flag;
+    uint8_t video_format;
+    uint8_t video_full_range_flag;
+    uint8_t colour_description_present_flag;
+    uint8_t colour_primaries;
+    uint8_t transfer_characteristics;
+    uint8_t matrix_coefficients;
+
+    uint8_t chroma_loc_info_present_flag;
+    uint8_t chroma_sample_loc_type_top_field;
+    uint8_t chroma_sample_loc_type_bottom_field;
+
+    uint8_t neutral_chroma_indication_flag;
+    uint8_t field_seq_flag;
+    uint8_t frame_field_info_present_flag;
+
+    uint8_t default_display_window_flag;
+    uint16_t def_disp_win_left_offset;
+    uint16_t def_disp_win_right_offset;
+    uint16_t def_disp_win_top_offset;
+    uint16_t def_disp_win_bottom_offset;
+
+    uint8_t vui_timing_info_present_flag;
+    uint32_t vui_num_units_in_tick;
+    uint32_t vui_time_scale;
+    uint8_t vui_poc_proportional_to_timing_flag;
+    uint32_t vui_num_ticks_poc_diff_one_minus1;
+    uint8_t vui_hrd_parameters_present_flag;
+    H265RawHRDParameters hrd_parameters;
+
+    uint8_t bitstream_restriction_flag;
+    uint8_t tiles_fixed_structure_flag;
+    uint8_t motion_vectors_over_pic_boundaries_flag;
+    uint8_t restricted_ref_pic_lists_flag;
+    uint16_t min_spatial_segmentation_idc;
+    uint8_t max_bytes_per_pic_denom;
+    uint8_t max_bits_per_min_cu_denom;
+    uint8_t log2_max_mv_length_horizontal;
+    uint8_t log2_max_mv_length_vertical;
+} H265RawVUI;
+
+typedef struct H265RawPSExtensionData {
+    uint8_t *data;
+    size_t bit_length;
+    AVBufferRef *data_ref;
+} H265RawPSExtensionData;
+
+typedef struct H265RawVPS {
+    H265RawNALUnitHeader nal_unit_header;
+
+    uint8_t vps_video_parameter_set_id;
+
+    uint8_t vps_base_layer_internal_flag;
+    uint8_t vps_base_layer_available_flag;
+    uint8_t vps_max_layers_minus1;
+    uint8_t vps_max_sub_layers_minus1;
+    uint8_t vps_temporal_id_nesting_flag;
+
+    H265RawProfileTierLevel profile_tier_level;
+
+    uint8_t vps_sub_layer_ordering_info_present_flag;
+    uint8_t vps_max_dec_pic_buffering_minus1[HEVC_MAX_SUB_LAYERS];
+    uint8_t vps_max_num_reorder_pics[HEVC_MAX_SUB_LAYERS];
+    uint32_t vps_max_latency_increase_plus1[HEVC_MAX_SUB_LAYERS];
+
+    uint8_t vps_max_layer_id;
+    uint16_t vps_num_layer_sets_minus1;
+    uint8_t layer_id_included_flag[HEVC_MAX_LAYER_SETS][HEVC_MAX_LAYERS];
+
+    uint8_t vps_timing_info_present_flag;
+    uint32_t vps_num_units_in_tick;
+    uint32_t vps_time_scale;
+    uint8_t vps_poc_proportional_to_timing_flag;
+    uint32_t vps_num_ticks_poc_diff_one_minus1;
+    uint16_t vps_num_hrd_parameters;
+    uint16_t hrd_layer_set_idx[HEVC_MAX_LAYER_SETS];
+    uint8_t cprms_present_flag[HEVC_MAX_LAYER_SETS];
+    H265RawHRDParameters hrd_parameters[HEVC_MAX_LAYER_SETS];
+
+    uint8_t vps_extension_flag;
+    H265RawPSExtensionData extension_data;
+} H265RawVPS;
+
+typedef struct H265RawSTRefPicSet {
+    uint8_t inter_ref_pic_set_prediction_flag;
+
+    uint8_t delta_idx_minus1;
+    uint8_t delta_rps_sign;
+    uint16_t abs_delta_rps_minus1;
+
+    uint8_t used_by_curr_pic_flag[HEVC_MAX_REFS];
+    uint8_t use_delta_flag[HEVC_MAX_REFS];
+
+    uint8_t num_negative_pics;
+    uint8_t num_positive_pics;
+    uint16_t delta_poc_s0_minus1[HEVC_MAX_REFS];
+    uint8_t used_by_curr_pic_s0_flag[HEVC_MAX_REFS];
+    uint16_t delta_poc_s1_minus1[HEVC_MAX_REFS];
+    uint8_t used_by_curr_pic_s1_flag[HEVC_MAX_REFS];
+} H265RawSTRefPicSet;
+
+typedef struct H265RawScalingList {
+    uint8_t scaling_list_pred_mode_flag[4][6];
+    uint8_t scaling_list_pred_matrix_id_delta[4][6];
+    int16_t scaling_list_dc_coef_minus8[4][6];
+    int8_t scaling_list_delta_coeff[4][6][64];
+} H265RawScalingList;
+
+typedef struct H265RawSPS {
+    H265RawNALUnitHeader nal_unit_header;
+
+    uint8_t sps_video_parameter_set_id;
+
+    uint8_t sps_max_sub_layers_minus1;
+    uint8_t sps_temporal_id_nesting_flag;
+
+    H265RawProfileTierLevel profile_tier_level;
+
+    uint8_t sps_seq_parameter_set_id;
+
+    uint8_t chroma_format_idc;
+    uint8_t separate_colour_plane_flag;
+
+    uint16_t pic_width_in_luma_samples;
+    uint16_t pic_height_in_luma_samples;
+
+    uint8_t conformance_window_flag;
+    uint16_t conf_win_left_offset;
+    uint16_t conf_win_right_offset;
+    uint16_t conf_win_top_offset;
+    uint16_t conf_win_bottom_offset;
+
+    uint8_t bit_depth_luma_minus8;
+    uint8_t bit_depth_chroma_minus8;
+
+    uint8_t log2_max_pic_order_cnt_lsb_minus4;
+
+    uint8_t sps_sub_layer_ordering_info_present_flag;
+    uint8_t sps_max_dec_pic_buffering_minus1[HEVC_MAX_SUB_LAYERS];
+    uint8_t sps_max_num_reorder_pics[HEVC_MAX_SUB_LAYERS];
+    uint32_t sps_max_latency_increase_plus1[HEVC_MAX_SUB_LAYERS];
+
+    uint8_t log2_min_luma_coding_block_size_minus3;
+    uint8_t log2_diff_max_min_luma_coding_block_size;
+    uint8_t log2_min_luma_transform_block_size_minus2;
+    uint8_t log2_diff_max_min_luma_transform_block_size;
+    uint8_t max_transform_hierarchy_depth_inter;
+    uint8_t max_transform_hierarchy_depth_intra;
+
+    uint8_t scaling_list_enabled_flag;
+    uint8_t sps_scaling_list_data_present_flag;
+    H265RawScalingList scaling_list;
+
+    uint8_t amp_enabled_flag;
+    uint8_t sample_adaptive_offset_enabled_flag;
+
+    uint8_t pcm_enabled_flag;
+    uint8_t pcm_sample_bit_depth_luma_minus1;
+    uint8_t pcm_sample_bit_depth_chroma_minus1;
+    uint8_t log2_min_pcm_luma_coding_block_size_minus3;
+    uint8_t log2_diff_max_min_pcm_luma_coding_block_size;
+    uint8_t pcm_loop_filter_disabled_flag;
+
+    uint8_t num_short_term_ref_pic_sets;
+    H265RawSTRefPicSet st_ref_pic_set[HEVC_MAX_SHORT_TERM_REF_PIC_SETS];
+
+    uint8_t long_term_ref_pics_present_flag;
+    uint8_t num_long_term_ref_pics_sps;
+    uint16_t lt_ref_pic_poc_lsb_sps[HEVC_MAX_LONG_TERM_REF_PICS];
+    uint8_t used_by_curr_pic_lt_sps_flag[HEVC_MAX_LONG_TERM_REF_PICS];
+
+    uint8_t sps_temporal_mvp_enabled_flag;
+    uint8_t strong_intra_smoothing_enabled_flag;
+
+    uint8_t vui_parameters_present_flag;
+    H265RawVUI vui;
+
+    uint8_t sps_extension_present_flag;
+    uint8_t sps_range_extension_flag;
+    uint8_t sps_multilayer_extension_flag;
+    uint8_t sps_3d_extension_flag;
+    uint8_t sps_scc_extension_flag;
+    uint8_t sps_extension_4bits;
+
+    H265RawPSExtensionData extension_data;
+
+    // Range extension.
+    uint8_t transform_skip_rotation_enabled_flag;
+    uint8_t transform_skip_context_enabled_flag;
+    uint8_t implicit_rdpcm_enabled_flag;
+    uint8_t explicit_rdpcm_enabled_flag;
+    uint8_t extended_precision_processing_flag;
+    uint8_t intra_smoothing_disabled_flag;
+    uint8_t high_precision_offsets_enabled_flag;
+    uint8_t persistent_rice_adaptation_enabled_flag;
+    uint8_t cabac_bypass_alignment_enabled_flag;
+
+    // Screen content coding extension.
+    uint8_t sps_curr_pic_ref_enabled_flag;
+    uint8_t palette_mode_enabled_flag;
+    uint8_t palette_max_size;
+    uint8_t delta_palette_max_predictor_size;
+    uint8_t sps_palette_predictor_initializer_present_flag;
+    uint8_t sps_num_palette_predictor_initializer_minus1;
+    uint16_t sps_palette_predictor_initializers[3][128];
+
+    uint8_t motion_vector_resolution_control_idc;
+    uint8_t intra_boundary_filtering_disable_flag;
+} H265RawSPS;
+
+typedef struct H265RawPPS {
+    H265RawNALUnitHeader nal_unit_header;
+
+    uint8_t pps_pic_parameter_set_id;
+    uint8_t pps_seq_parameter_set_id;
+
+    uint8_t dependent_slice_segments_enabled_flag;
+    uint8_t output_flag_present_flag;
+    uint8_t num_extra_slice_header_bits;
+    uint8_t sign_data_hiding_enabled_flag;
+    uint8_t cabac_init_present_flag;
+
+    uint8_t num_ref_idx_l0_default_active_minus1;
+    uint8_t num_ref_idx_l1_default_active_minus1;
+
+    int8_t init_qp_minus26;
+
+    uint8_t constrained_intra_pred_flag;
+    uint8_t transform_skip_enabled_flag;
+    uint8_t cu_qp_delta_enabled_flag;
+    uint8_t diff_cu_qp_delta_depth;
+
+    int8_t pps_cb_qp_offset;
+    int8_t pps_cr_qp_offset;
+    uint8_t pps_slice_chroma_qp_offsets_present_flag;
+
+    uint8_t weighted_pred_flag;
+    uint8_t weighted_bipred_flag;
+
+    uint8_t transquant_bypass_enabled_flag;
+    uint8_t tiles_enabled_flag;
+    uint8_t entropy_coding_sync_enabled_flag;
+
+    uint8_t num_tile_columns_minus1;
+    uint8_t num_tile_rows_minus1;
+    uint8_t uniform_spacing_flag;
+    uint16_t column_width_minus1[HEVC_MAX_TILE_COLUMNS];
+    uint16_t row_height_minus1[HEVC_MAX_TILE_ROWS];
+    uint8_t loop_filter_across_tiles_enabled_flag;
+
+    uint8_t pps_loop_filter_across_slices_enabled_flag;
+    uint8_t deblocking_filter_control_present_flag;
+    uint8_t deblocking_filter_override_enabled_flag;
+    uint8_t pps_deblocking_filter_disabled_flag;
+    int8_t pps_beta_offset_div2;
+    int8_t pps_tc_offset_div2;
+
+    uint8_t pps_scaling_list_data_present_flag;
+    H265RawScalingList scaling_list;
+
+    uint8_t lists_modification_present_flag;
+    uint8_t log2_parallel_merge_level_minus2;
+
+    uint8_t slice_segment_header_extension_present_flag;
+
+    uint8_t pps_extension_present_flag;
+    uint8_t pps_range_extension_flag;
+    uint8_t pps_multilayer_extension_flag;
+    uint8_t pps_3d_extension_flag;
+    uint8_t pps_scc_extension_flag;
+    uint8_t pps_extension_4bits;
+
+    H265RawPSExtensionData extension_data;
+
+    // Range extension.
+    uint8_t log2_max_transform_skip_block_size_minus2;
+    uint8_t cross_component_prediction_enabled_flag;
+    uint8_t chroma_qp_offset_list_enabled_flag;
+    uint8_t diff_cu_chroma_qp_offset_depth;
+    uint8_t chroma_qp_offset_list_len_minus1;
+    int8_t cb_qp_offset_list[6];
+    int8_t cr_qp_offset_list[6];
+    uint8_t log2_sao_offset_scale_luma;
+    uint8_t log2_sao_offset_scale_chroma;
+
+    // Screen content coding extension.
+    uint8_t pps_curr_pic_ref_enabled_flag;
+    uint8_t residual_adaptive_colour_transform_enabled_flag;
+    uint8_t pps_slice_act_qp_offsets_present_flag;
+    int8_t pps_act_y_qp_offset_plus5;
+    int8_t pps_act_cb_qp_offset_plus5;
+    int8_t pps_act_cr_qp_offset_plus3;
+
+    uint8_t pps_palette_predictor_initializer_present_flag;
+    uint8_t pps_num_palette_predictor_initializer;
+    uint8_t monochrome_palette_flag;
+    uint8_t luma_bit_depth_entry_minus8;
+    uint8_t chroma_bit_depth_entry_minus8;
+    uint16_t pps_palette_predictor_initializers[3][128];
+} H265RawPPS;
+
+typedef struct H265RawAUD {
+    H265RawNALUnitHeader nal_unit_header;
+
+    uint8_t pic_type;
+} H265RawAUD;
+
+typedef struct  H265RawSliceHeader {
+    H265RawNALUnitHeader nal_unit_header;
+
+    uint8_t first_slice_segment_in_pic_flag;
+    uint8_t no_output_of_prior_pics_flag;
+    uint8_t slice_pic_parameter_set_id;
+
+    uint8_t dependent_slice_segment_flag;
+    uint16_t slice_segment_address;
+
+    uint8_t slice_reserved_flag[8];
+    uint8_t slice_type;
+
+    uint8_t pic_output_flag;
+    uint8_t colour_plane_id;
+
+    uint16_t slice_pic_order_cnt_lsb;
+
+    uint8_t short_term_ref_pic_set_sps_flag;
+    H265RawSTRefPicSet short_term_ref_pic_set;
+    uint8_t short_term_ref_pic_set_idx;
+
+    uint8_t num_long_term_sps;
+    uint8_t num_long_term_pics;
+    uint8_t lt_idx_sps[HEVC_MAX_REFS];
+    uint8_t poc_lsb_lt[HEVC_MAX_REFS];
+    uint8_t used_by_curr_pic_lt_flag[HEVC_MAX_REFS];
+    uint8_t delta_poc_msb_present_flag[HEVC_MAX_REFS];
+    uint32_t delta_poc_msb_cycle_lt[HEVC_MAX_REFS];
+
+    uint8_t slice_temporal_mvp_enabled_flag;
+
+    uint8_t slice_sao_luma_flag;
+    uint8_t slice_sao_chroma_flag;
+
+    uint8_t num_ref_idx_active_override_flag;
+    uint8_t num_ref_idx_l0_active_minus1;
+    uint8_t num_ref_idx_l1_active_minus1;
+
+    uint8_t ref_pic_list_modification_flag_l0;
+    uint8_t list_entry_l0[HEVC_MAX_REFS];
+    uint8_t ref_pic_list_modification_flag_l1;
+    uint8_t list_entry_l1[HEVC_MAX_REFS];
+
+    uint8_t mvd_l1_zero_flag;
+    uint8_t cabac_init_flag;
+    uint8_t collocated_from_l0_flag;
+    uint8_t collocated_ref_idx;
+
+    uint8_t luma_log2_weight_denom;
+    int8_t delta_chroma_log2_weight_denom;
+    uint8_t luma_weight_l0_flag[HEVC_MAX_REFS];
+    uint8_t chroma_weight_l0_flag[HEVC_MAX_REFS];
+    int8_t delta_luma_weight_l0[HEVC_MAX_REFS];
+    int16_t luma_offset_l0[HEVC_MAX_REFS];
+    int8_t delta_chroma_weight_l0[HEVC_MAX_REFS][2];
+    int16_t chroma_offset_l0[HEVC_MAX_REFS][2];
+    uint8_t luma_weight_l1_flag[HEVC_MAX_REFS];
+    uint8_t chroma_weight_l1_flag[HEVC_MAX_REFS];
+    int8_t delta_luma_weight_l1[HEVC_MAX_REFS];
+    int16_t luma_offset_l1[HEVC_MAX_REFS];
+    int8_t delta_chroma_weight_l1[HEVC_MAX_REFS][2];
+    int16_t chroma_offset_l1[HEVC_MAX_REFS][2];
+
+    uint8_t five_minus_max_num_merge_cand;
+    uint8_t use_integer_mv_flag;
+
+    int8_t slice_qp_delta;
+    int8_t slice_cb_qp_offset;
+    int8_t slice_cr_qp_offset;
+    int8_t slice_act_y_qp_offset;
+    int8_t slice_act_cb_qp_offset;
+    int8_t slice_act_cr_qp_offset;
+    uint8_t cu_chroma_qp_offset_enabled_flag;
+
+    uint8_t deblocking_filter_override_flag;
+    uint8_t slice_deblocking_filter_disabled_flag;
+    int8_t slice_beta_offset_div2;
+    int8_t slice_tc_offset_div2;
+    uint8_t slice_loop_filter_across_slices_enabled_flag;
+
+    uint16_t num_entry_point_offsets;
+    uint8_t offset_len_minus1;
+    uint32_t entry_point_offset_minus1[HEVC_MAX_ENTRY_POINT_OFFSETS];
+
+    uint16_t slice_segment_header_extension_length;
+    uint8_t slice_segment_header_extension_data_byte[256];
+} H265RawSliceHeader;
+
+
+typedef struct H265RawSlice {
+    H265RawSliceHeader header;
+
+    uint8_t *data;
+    size_t   data_size;
+    int      data_bit_start;
+    AVBufferRef *data_ref;
+} H265RawSlice;
+
+typedef struct H265RawSEIMasteringDisplayColourVolume {
+    uint16_t display_primaries_x[3];
+    uint16_t display_primaries_y[3];
+    uint16_t white_point_x;
+    uint16_t white_point_y;
+    uint32_t max_display_mastering_luminance;
+    uint32_t min_display_mastering_luminance;
+} H265RawSEIMasteringDisplayColourVolume;
+
+typedef struct H265RawSEIContentLightLevelInfo {
+    uint16_t max_content_light_level;
+    uint16_t max_pic_average_light_level;
+} H265RawSEIContentLightLevelInfo;
+
+typedef struct H265RawSEIPayload {
+    uint32_t payload_type;
+    uint32_t payload_size;
+    union {
+        H265RawSEIMasteringDisplayColourVolume mastering_display;
+        H265RawSEIContentLightLevelInfo content_light_level;
+        struct {
+            uint8_t *data;
+            size_t data_length;
+            AVBufferRef *data_ref;
+        } other;
+    } payload;
+} H265RawSEIPayload;
+
+typedef struct H265RawSEI {
+    H265RawNALUnitHeader nal_unit_header;
+
+    H265RawSEIPayload payload[H265_MAX_SEI_PAYLOADS];
+    uint8_t payload_count;
+} H265RawSEI;
+
+typedef struct CodedBitstreamH265Context {
+    // Reader/writer context in common with the H.264 implementation.
+    CodedBitstreamH2645Context common;
+
+    // All currently available parameter sets.  These are updated when
+    // any parameter set NAL unit is read/written with this context.
+    AVBufferRef *vps_ref[HEVC_MAX_VPS_COUNT];
+    AVBufferRef *sps_ref[HEVC_MAX_SPS_COUNT];
+    AVBufferRef *pps_ref[HEVC_MAX_PPS_COUNT];
+    H265RawVPS *vps[HEVC_MAX_VPS_COUNT];
+    H265RawSPS *sps[HEVC_MAX_SPS_COUNT];
+    H265RawPPS *pps[HEVC_MAX_PPS_COUNT];
+
+    // The currently active parameter sets.  These are updated when any
+    // NAL unit refers to the relevant parameter set.  These pointers
+    // must also be present in the arrays above.
+    const H265RawVPS *active_vps;
+    const H265RawSPS *active_sps;
+    const H265RawPPS *active_pps;
+} CodedBitstreamH265Context;
+
+
+#endif /* AVCODEC_CBS_H265_H */

diff --git a/libavcodec/cbs_h265_syntax_template.c b/libavcodec/cbs_h265_syntax_template.c
new file mode 100644
index 0000000..d4e4f7b
--- /dev/null
+++ b/libavcodec/cbs_h265_syntax_template.c

@@ -0,0 +1,1681 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+static int FUNC(rbsp_trailing_bits)(CodedBitstreamContext *ctx, RWContext *rw)
+{
+    int err;
+
+    fixed(1, rbsp_stop_one_bit, 1);
+    while (byte_alignment(rw) != 0)
+        fixed(1, rbsp_alignment_zero_bit, 0);
+
+    return 0;
+}
+
+static int FUNC(nal_unit_header)(CodedBitstreamContext *ctx, RWContext *rw,
+                                 H265RawNALUnitHeader *current,
+                                 int expected_nal_unit_type)
+{
+    int err;
+
+    u(1, forbidden_zero_bit, 0, 0);
+
+    if (expected_nal_unit_type >= 0)
+        u(6, nal_unit_type, expected_nal_unit_type,
+                            expected_nal_unit_type);
+    else
+        u(6, nal_unit_type, 0, 63);
+
+    u(6, nuh_layer_id,          0, 62);
+    u(3, nuh_temporal_id_plus1, 1,  7);
+
+    return 0;
+}
+
+static int FUNC(byte_alignment)(CodedBitstreamContext *ctx, RWContext *rw)
+{
+    int err;
+
+    fixed(1, alignment_bit_equal_to_one, 1);
+    while (byte_alignment(rw) != 0)
+        fixed(1, alignment_bit_equal_to_zero, 0);
+
+    return 0;
+}
+
+static int FUNC(extension_data)(CodedBitstreamContext *ctx, RWContext *rw,
+                                H265RawPSExtensionData *current)
+{
+    int err;
+    size_t k;
+#ifdef READ
+    GetBitContext start;
+    uint8_t bit;
+    start = *rw;
+    for (k = 0; cbs_h2645_read_more_rbsp_data(rw); k++)
+        skip_bits(rw, 1);
+    current->bit_length = k;
+    if (k > 0) {
+        *rw = start;
+        allocate(current->data, (current->bit_length + 7) / 8);
+        for (k = 0; k < current->bit_length; k++) {
+            xu(1, extension_data, bit, 0, 1, 0);
+            current->data[k / 8] |= bit << (7 - k % 8);
+        }
+    }
+#else
+    for (k = 0; k < current->bit_length; k++)
+        xu(1, extension_data, current->data[k / 8] >> (7 - k % 8), 0, 1, 0);
+#endif
+    return 0;
+}
+
+static int FUNC(profile_tier_level)(CodedBitstreamContext *ctx, RWContext *rw,
+                                    H265RawProfileTierLevel *current,
+                                    int profile_present_flag,
+                                    int max_num_sub_layers_minus1)
+{
+    int err, i, j;
+
+    if (profile_present_flag) {
+        u(2, general_profile_space, 0, 0);
+        flag(general_tier_flag);
+        u(5, general_profile_idc, 0, 31);
+
+        for (j = 0; j < 32; j++)
+            flags(general_profile_compatibility_flag[j], 1, j);
+
+        flag(general_progressive_source_flag);
+        flag(general_interlaced_source_flag);
+        flag(general_non_packed_constraint_flag);
+        flag(general_frame_only_constraint_flag);
+
+#define profile_compatible(x) (current->general_profile_idc == (x) || \
+                               current->general_profile_compatibility_flag[x])
+        if (profile_compatible(4) || profile_compatible(5) ||
+            profile_compatible(6) || profile_compatible(7) ||
+            profile_compatible(8) || profile_compatible(9) ||
+            profile_compatible(10)) {
+            flag(general_max_12bit_constraint_flag);
+            flag(general_max_10bit_constraint_flag);
+            flag(general_max_8bit_constraint_flag);
+            flag(general_max_422chroma_constraint_flag);
+            flag(general_max_420chroma_constraint_flag);
+            flag(general_max_monochrome_constraint_flag);
+            flag(general_intra_constraint_flag);
+            flag(general_one_picture_only_constraint_flag);
+            flag(general_lower_bit_rate_constraint_flag);
+
+            if (profile_compatible(5) || profile_compatible(9) ||
+                profile_compatible(10)) {
+                flag(general_max_14bit_constraint_flag);
+                fixed(24, general_reserved_zero_33bits, 0);
+                fixed( 9, general_reserved_zero_33bits, 0);
+            } else {
+                fixed(24, general_reserved_zero_34bits, 0);
+                fixed(10, general_reserved_zero_34bits, 0);
+            }
+        } else {
+            fixed(24, general_reserved_zero_43bits, 0);
+            fixed(19, general_reserved_zero_43bits, 0);
+        }
+
+        if (profile_compatible(1) || profile_compatible(2) ||
+            profile_compatible(3) || profile_compatible(4) ||
+            profile_compatible(5) || profile_compatible(9)) {
+            flag(general_inbld_flag);
+        } else {
+            fixed(1, general_reserved_zero_bit, 0);
+        }
+#undef profile_compatible
+    }
+
+    u(8, general_level_idc, 0, 255);
+
+    for (i = 0; i < max_num_sub_layers_minus1; i++) {
+        flags(sub_layer_profile_present_flag[i], 1, i);
+        flags(sub_layer_level_present_flag[i],   1, i);
+    }
+
+    if (max_num_sub_layers_minus1 > 0) {
+        for (i = max_num_sub_layers_minus1; i < 8; i++)
+            fixed(2, reserved_zero_2bits, 0);
+    }
+
+    for (i = 0; i < max_num_sub_layers_minus1; i++) {
+        if (current->sub_layer_profile_present_flag[i])
+            return AVERROR_PATCHWELCOME;
+        if (current->sub_layer_level_present_flag[i])
+            return AVERROR_PATCHWELCOME;
+    }
+
+    return 0;
+}
+
+static int FUNC(sub_layer_hrd_parameters)(CodedBitstreamContext *ctx, RWContext *rw,
+                                          H265RawHRDParameters *hrd,
+                                          int nal, int sub_layer_id)
+{
+    H265RawSubLayerHRDParameters *current;
+    int err, i;
+
+    if (nal)
+        current = &hrd->nal_sub_layer_hrd_parameters[sub_layer_id];
+    else
+        current = &hrd->vcl_sub_layer_hrd_parameters[sub_layer_id];
+
+    for (i = 0; i <= hrd->cpb_cnt_minus1[sub_layer_id]; i++) {
+        ues(bit_rate_value_minus1[i], 0, UINT32_MAX - 1, 1, i);
+        ues(cpb_size_value_minus1[i], 0, UINT32_MAX - 1, 1, i);
+        if (hrd->sub_pic_hrd_params_present_flag) {
+            ues(cpb_size_du_value_minus1[i], 0, UINT32_MAX - 1, 1, i);
+            ues(bit_rate_du_value_minus1[i], 0, UINT32_MAX - 1, 1, i);
+        }
+        flags(cbr_flag[i], 1, i);
+    }
+
+    return 0;
+}
+
+static int FUNC(hrd_parameters)(CodedBitstreamContext *ctx, RWContext *rw,
+                                H265RawHRDParameters *current, int common_inf_present_flag,
+                                int max_num_sub_layers_minus1)
+{
+    int err, i;
+
+    if (common_inf_present_flag) {
+        flag(nal_hrd_parameters_present_flag);
+        flag(vcl_hrd_parameters_present_flag);
+
+        if (current->nal_hrd_parameters_present_flag ||
+            current->vcl_hrd_parameters_present_flag) {
+            flag(sub_pic_hrd_params_present_flag);
+            if (current->sub_pic_hrd_params_present_flag) {
+                u(8, tick_divisor_minus2, 0, 255);
+                u(5, du_cpb_removal_delay_increment_length_minus1, 0, 31);
+                flag(sub_pic_cpb_params_in_pic_timing_sei_flag);
+                u(5, dpb_output_delay_du_length_minus1, 0, 31);
+            }
+
+            u(4, bit_rate_scale, 0, 15);
+            u(4, cpb_size_scale, 0, 15);
+            if (current->sub_pic_hrd_params_present_flag)
+                u(4, cpb_size_du_scale, 0, 15);
+
+            u(5, initial_cpb_removal_delay_length_minus1, 0, 31);
+            u(5, au_cpb_removal_delay_length_minus1,      0, 31);
+            u(5, dpb_output_delay_length_minus1,          0, 31);
+        } else {
+            infer(sub_pic_hrd_params_present_flag, 0);
+
+            infer(initial_cpb_removal_delay_length_minus1, 23);
+            infer(au_cpb_removal_delay_length_minus1,      23);
+            infer(dpb_output_delay_length_minus1,          23);
+        }
+    }
+
+    for (i = 0; i <= max_num_sub_layers_minus1; i++) {
+        flags(fixed_pic_rate_general_flag[i], 1, i);
+
+        if (!current->fixed_pic_rate_general_flag[i])
+            flags(fixed_pic_rate_within_cvs_flag[i], 1, i);
+        else
+            infer(fixed_pic_rate_within_cvs_flag[i], 1);
+
+        if (current->fixed_pic_rate_within_cvs_flag[i]) {
+            ues(elemental_duration_in_tc_minus1[i], 0, 2047, 1, i);
+            infer(low_delay_hrd_flag[i], 0);
+        } else
+            flags(low_delay_hrd_flag[i], 1, i);
+
+        if (!current->low_delay_hrd_flag[i])
+            ues(cpb_cnt_minus1[i], 0, 31, 1, i);
+        else
+            infer(cpb_cnt_minus1[i], 0);
+
+        if (current->nal_hrd_parameters_present_flag)
+            CHECK(FUNC(sub_layer_hrd_parameters)(ctx, rw, current, 0, i));
+        if (current->vcl_hrd_parameters_present_flag)
+            CHECK(FUNC(sub_layer_hrd_parameters)(ctx, rw, current, 1, i));
+    }
+
+    return 0;
+}
+
+static int FUNC(vui_parameters)(CodedBitstreamContext *ctx, RWContext *rw,
+                                H265RawVUI *current, const H265RawSPS *sps)
+{
+    int err;
+
+    flag(aspect_ratio_info_present_flag);
+    if (current->aspect_ratio_info_present_flag) {
+        u(8, aspect_ratio_idc, 0, 255);
+        if (current->aspect_ratio_idc == 255) {
+            u(16, sar_width,  0, 65535);
+            u(16, sar_height, 0, 65535);
+        }
+    } else {
+        infer(aspect_ratio_idc, 0);
+    }
+
+    flag(overscan_info_present_flag);
+    if (current->overscan_info_present_flag)
+        flag(overscan_appropriate_flag);
+
+    flag(video_signal_type_present_flag);
+    if (current->video_signal_type_present_flag) {
+        u(3, video_format, 0, 7);
+        flag(video_full_range_flag);
+        flag(colour_description_present_flag);
+        if (current->colour_description_present_flag) {
+            u(8, colour_primaries,         0, 255);
+            u(8, transfer_characteristics, 0, 255);
+            u(8, matrix_coefficients,      0, 255);
+        } else {
+            infer(colour_primaries,         2);
+            infer(transfer_characteristics, 2);
+            infer(matrix_coefficients,      2);
+        }
+    } else {
+        infer(video_format,             5);
+        infer(video_full_range_flag,    0);
+        infer(colour_primaries,         2);
+        infer(transfer_characteristics, 2);
+        infer(matrix_coefficients,      2);
+    }
+
+    flag(chroma_loc_info_present_flag);
+    if (current->chroma_loc_info_present_flag) {
+        ue(chroma_sample_loc_type_top_field,    0, 5);
+        ue(chroma_sample_loc_type_bottom_field, 0, 5);
+    } else {
+        infer(chroma_sample_loc_type_top_field,    0);
+        infer(chroma_sample_loc_type_bottom_field, 0);
+    }
+
+    flag(neutral_chroma_indication_flag);
+    flag(field_seq_flag);
+    flag(frame_field_info_present_flag);
+
+    flag(default_display_window_flag);
+    if (current->default_display_window_flag) {
+        ue(def_disp_win_left_offset,   0, 16384);
+        ue(def_disp_win_right_offset,  0, 16384);
+        ue(def_disp_win_top_offset,    0, 16384);
+        ue(def_disp_win_bottom_offset, 0, 16384);
+    }
+
+    flag(vui_timing_info_present_flag);
+    if (current->vui_timing_info_present_flag) {
+        u(32, vui_num_units_in_tick, 1, UINT32_MAX);
+        u(32, vui_time_scale,        1, UINT32_MAX);
+        flag(vui_poc_proportional_to_timing_flag);
+        if (current->vui_poc_proportional_to_timing_flag)
+            ue(vui_num_ticks_poc_diff_one_minus1, 0, UINT32_MAX - 1);
+
+        flag(vui_hrd_parameters_present_flag);
+        if (current->vui_hrd_parameters_present_flag) {
+            CHECK(FUNC(hrd_parameters)(ctx, rw, &current->hrd_parameters,
+                                       1, sps->sps_max_sub_layers_minus1));
+        }
+    }
+
+    flag(bitstream_restriction_flag);
+    if (current->bitstream_restriction_flag) {
+        flag(tiles_fixed_structure_flag);
+        flag(motion_vectors_over_pic_boundaries_flag);
+        flag(restricted_ref_pic_lists_flag);
+        ue(min_spatial_segmentation_idc,  0, 4095);
+        ue(max_bytes_per_pic_denom,       0, 16);
+        ue(max_bits_per_min_cu_denom,     0, 16);
+        ue(log2_max_mv_length_horizontal, 0, 16);
+        ue(log2_max_mv_length_vertical,   0, 16);
+    } else {
+        infer(tiles_fixed_structure_flag,    0);
+        infer(motion_vectors_over_pic_boundaries_flag, 1);
+        infer(min_spatial_segmentation_idc,  0);
+        infer(max_bytes_per_pic_denom,       2);
+        infer(max_bits_per_min_cu_denom,     1);
+        infer(log2_max_mv_length_horizontal, 15);
+        infer(log2_max_mv_length_vertical,   15);
+    }
+
+    return 0;
+}
+
+static int FUNC(vps)(CodedBitstreamContext *ctx, RWContext *rw,
+                     H265RawVPS *current)
+{
+    int err, i, j;
+
+    HEADER("Video Parameter Set");
+
+    CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header, HEVC_NAL_VPS));
+
+    u(4, vps_video_parameter_set_id, 0, 15);
+
+    flag(vps_base_layer_internal_flag);
+    flag(vps_base_layer_available_flag);
+    u(6, vps_max_layers_minus1,     0, HEVC_MAX_LAYERS - 1);
+    u(3, vps_max_sub_layers_minus1, 0, HEVC_MAX_SUB_LAYERS - 1);
+    flag(vps_temporal_id_nesting_flag);
+
+    if (current->vps_max_sub_layers_minus1 == 0 &&
+        current->vps_temporal_id_nesting_flag != 1) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid stream: "
+               "vps_temporal_id_nesting_flag must be 1 if "
+               "vps_max_sub_layers_minus1 is 0.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    fixed(16, vps_reserved_0xffff_16bits, 0xffff);
+
+    CHECK(FUNC(profile_tier_level)(ctx, rw, &current->profile_tier_level,
+                                   1, current->vps_max_sub_layers_minus1));
+
+    flag(vps_sub_layer_ordering_info_present_flag);
+    for (i = (current->vps_sub_layer_ordering_info_present_flag ?
+              0 : current->vps_max_sub_layers_minus1);
+         i <= current->vps_max_sub_layers_minus1; i++) {
+        ues(vps_max_dec_pic_buffering_minus1[i],
+            0, HEVC_MAX_DPB_SIZE - 1,                        1, i);
+        ues(vps_max_num_reorder_pics[i],
+            0, current->vps_max_dec_pic_buffering_minus1[i], 1, i);
+        ues(vps_max_latency_increase_plus1[i],
+            0, UINT32_MAX - 1,                               1, i);
+    }
+    if (!current->vps_sub_layer_ordering_info_present_flag) {
+        for (i = 0; i < current->vps_max_sub_layers_minus1; i++) {
+            infer(vps_max_dec_pic_buffering_minus1[i],
+                  current->vps_max_dec_pic_buffering_minus1[current->vps_max_sub_layers_minus1]);
+            infer(vps_max_num_reorder_pics[i],
+                  current->vps_max_num_reorder_pics[current->vps_max_sub_layers_minus1]);
+            infer(vps_max_latency_increase_plus1[i],
+                  current->vps_max_latency_increase_plus1[current->vps_max_sub_layers_minus1]);
+        }
+    }
+
+    u(6, vps_max_layer_id,        0, HEVC_MAX_LAYERS - 1);
+    ue(vps_num_layer_sets_minus1, 0, HEVC_MAX_LAYER_SETS - 1);
+    for (i = 1; i <= current->vps_num_layer_sets_minus1; i++) {
+        for (j = 0; j <= current->vps_max_layer_id; j++)
+            flags(layer_id_included_flag[i][j], 2, i, j);
+    }
+    for (j = 0; j <= current->vps_max_layer_id; j++)
+        infer(layer_id_included_flag[0][j], j == 0);
+
+    flag(vps_timing_info_present_flag);
+    if (current->vps_timing_info_present_flag) {
+        u(32, vps_num_units_in_tick, 1, UINT32_MAX);
+        u(32, vps_time_scale,        1, UINT32_MAX);
+        flag(vps_poc_proportional_to_timing_flag);
+        if (current->vps_poc_proportional_to_timing_flag)
+            ue(vps_num_ticks_poc_diff_one_minus1, 0, UINT32_MAX - 1);
+        ue(vps_num_hrd_parameters, 0, current->vps_num_layer_sets_minus1 + 1);
+        for (i = 0; i < current->vps_num_hrd_parameters; i++) {
+            ues(hrd_layer_set_idx[i],
+                current->vps_base_layer_internal_flag ? 0 : 1,
+                current->vps_num_layer_sets_minus1, 1, i);
+            if (i > 0)
+                flags(cprms_present_flag[i], 1, i);
+            else
+                infer(cprms_present_flag[0], 1);
+
+            CHECK(FUNC(hrd_parameters)(ctx, rw, &current->hrd_parameters[i],
+                                       current->cprms_present_flag[i],
+                                       current->vps_max_sub_layers_minus1));
+        }
+    }
+
+    flag(vps_extension_flag);
+    if (current->vps_extension_flag)
+        CHECK(FUNC(extension_data)(ctx, rw, &current->extension_data));
+
+    CHECK(FUNC(rbsp_trailing_bits)(ctx, rw));
+
+    return 0;
+}
+
+static int FUNC(st_ref_pic_set)(CodedBitstreamContext *ctx, RWContext *rw,
+                                H265RawSTRefPicSet *current, int st_rps_idx,
+                                const H265RawSPS *sps)
+{
+    int err, i, j;
+
+    if (st_rps_idx != 0)
+        flag(inter_ref_pic_set_prediction_flag);
+    else
+        infer(inter_ref_pic_set_prediction_flag, 0);
+
+    if (current->inter_ref_pic_set_prediction_flag) {
+        unsigned int ref_rps_idx, num_delta_pocs;
+        const H265RawSTRefPicSet *ref;
+        int delta_rps, d_poc;
+        int ref_delta_poc_s0[HEVC_MAX_REFS], ref_delta_poc_s1[HEVC_MAX_REFS];
+        int delta_poc_s0[HEVC_MAX_REFS], delta_poc_s1[HEVC_MAX_REFS];
+        uint8_t used_by_curr_pic_s0[HEVC_MAX_REFS],
+                used_by_curr_pic_s1[HEVC_MAX_REFS];
+
+        if (st_rps_idx == sps->num_short_term_ref_pic_sets)
+            ue(delta_idx_minus1, 0, st_rps_idx - 1);
+        else
+            infer(delta_idx_minus1, 0);
+
+        ref_rps_idx = st_rps_idx - (current->delta_idx_minus1 + 1);
+        ref = &sps->st_ref_pic_set[ref_rps_idx];
+        num_delta_pocs = ref->num_negative_pics + ref->num_positive_pics;
+
+        flag(delta_rps_sign);
+        ue(abs_delta_rps_minus1, 0, INT16_MAX);
+        delta_rps = (1 - 2 * current->delta_rps_sign) *
+            (current->abs_delta_rps_minus1 + 1);
+
+        for (j = 0; j <= num_delta_pocs; j++) {
+            flags(used_by_curr_pic_flag[j], 1, j);
+            if (!current->used_by_curr_pic_flag[j])
+                flags(use_delta_flag[j], 1, j);
+            else
+                infer(use_delta_flag[j], 1);
+        }
+
+        // Since the stored form of an RPS here is actually the delta-step
+        // form used when inter_ref_pic_set_prediction_flag is not set, we
+        // need to reconstruct that here in order to be able to refer to
+        // the RPS later (which is required for parsing, because we don't
+        // even know what syntax elements appear without it).  Therefore,
+        // this code takes the delta-step form of the reference set, turns
+        // it into the delta-array form, applies the prediction process of
+        // 7.4.8, converts the result back to the delta-step form, and
+        // stores that as the current set for future use.  Note that the
+        // inferences here mean that writers using prediction will need
+        // to fill in the delta-step values correctly as well - since the
+        // whole RPS prediction process is somewhat overly sophisticated,
+        // this hopefully forms a useful check for them to ensure their
+        // predicted form actually matches what was intended rather than
+        // an onerous additional requirement.
+
+        d_poc = 0;
+        for (i = 0; i < ref->num_negative_pics; i++) {
+            d_poc -= ref->delta_poc_s0_minus1[i] + 1;
+            ref_delta_poc_s0[i] = d_poc;
+        }
+        d_poc = 0;
+        for (i = 0; i < ref->num_positive_pics; i++) {
+            d_poc += ref->delta_poc_s1_minus1[i] + 1;
+            ref_delta_poc_s1[i] = d_poc;
+        }
+
+        i = 0;
+        for (j = ref->num_positive_pics - 1; j >= 0; j--) {
+            d_poc = ref_delta_poc_s1[j] + delta_rps;
+            if (d_poc < 0 && current->use_delta_flag[ref->num_negative_pics + j]) {
+                delta_poc_s0[i] = d_poc;
+                used_by_curr_pic_s0[i++] =
+                    current->used_by_curr_pic_flag[ref->num_negative_pics + j];
+            }
+        }
+        if (delta_rps < 0 && current->use_delta_flag[num_delta_pocs]) {
+            delta_poc_s0[i] = delta_rps;
+            used_by_curr_pic_s0[i++] =
+                current->used_by_curr_pic_flag[num_delta_pocs];
+        }
+        for (j = 0; j < ref->num_negative_pics; j++) {
+            d_poc = ref_delta_poc_s0[j] + delta_rps;
+            if (d_poc < 0 && current->use_delta_flag[j]) {
+                delta_poc_s0[i] = d_poc;
+                used_by_curr_pic_s0[i++] = current->used_by_curr_pic_flag[j];
+            }
+        }
+
+        infer(num_negative_pics, i);
+        for (i = 0; i < current->num_negative_pics; i++) {
+            infer(delta_poc_s0_minus1[i],
+                  -(delta_poc_s0[i] - (i == 0 ? 0 : delta_poc_s0[i - 1])) - 1);
+            infer(used_by_curr_pic_s0_flag[i], used_by_curr_pic_s0[i]);
+        }
+
+        i = 0;
+        for (j = ref->num_negative_pics - 1; j >= 0; j--) {
+            d_poc = ref_delta_poc_s0[j] + delta_rps;
+            if (d_poc > 0 && current->use_delta_flag[j]) {
+                delta_poc_s1[i] = d_poc;
+                used_by_curr_pic_s1[i++] = current->used_by_curr_pic_flag[j];
+            }
+        }
+        if (delta_rps > 0 && current->use_delta_flag[num_delta_pocs]) {
+            delta_poc_s1[i] = delta_rps;
+            used_by_curr_pic_s1[i++] =
+                current->used_by_curr_pic_flag[num_delta_pocs];
+        }
+        for (j = 0; j < ref->num_positive_pics; j++) {
+            d_poc = ref_delta_poc_s1[j] + delta_rps;
+            if (d_poc > 0 && current->use_delta_flag[ref->num_negative_pics + j]) {
+                delta_poc_s1[i] = d_poc;
+                used_by_curr_pic_s1[i++] =
+                    current->used_by_curr_pic_flag[ref->num_negative_pics + j];
+            }
+        }
+
+        infer(num_positive_pics, i);
+        for (i = 0; i < current->num_positive_pics; i++) {
+            infer(delta_poc_s1_minus1[i],
+                  delta_poc_s1[i] - (i == 0 ? 0 : delta_poc_s1[i - 1]) - 1);
+            infer(used_by_curr_pic_s1_flag[i], used_by_curr_pic_s1[i]);
+        }
+
+    } else {
+        ue(num_negative_pics, 0, 15);
+        ue(num_positive_pics, 0, 15 - current->num_negative_pics);
+
+        for (i = 0; i < current->num_negative_pics; i++) {
+            ues(delta_poc_s0_minus1[i], 0, INT16_MAX, 1, i);
+            flags(used_by_curr_pic_s0_flag[i],        1, i);
+        }
+
+        for (i = 0; i < current->num_positive_pics; i++) {
+            ues(delta_poc_s1_minus1[i], 0, INT16_MAX, 1, i);
+            flags(used_by_curr_pic_s1_flag[i],        1, i);
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(scaling_list_data)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   H265RawScalingList *current)
+{
+    int sizeId, matrixId;
+    int err, n, i;
+
+    for (sizeId = 0; sizeId < 4; sizeId++) {
+        for (matrixId = 0; matrixId < 6; matrixId += (sizeId == 3 ? 3 : 1)) {
+            flags(scaling_list_pred_mode_flag[sizeId][matrixId],
+                  2, sizeId, matrixId);
+            if (!current->scaling_list_pred_mode_flag[sizeId][matrixId]) {
+                ues(scaling_list_pred_matrix_id_delta[sizeId][matrixId],
+                    0, sizeId == 3 ? matrixId / 3 : matrixId,
+                    2, sizeId, matrixId);
+            } else {
+                n = FFMIN(64, 1 << (4 + (sizeId << 1)));
+                if (sizeId > 1) {
+                    ses(scaling_list_dc_coef_minus8[sizeId - 2][matrixId], -7, +247,
+                        2, sizeId - 2, matrixId);
+                }
+                for (i = 0; i < n; i++) {
+                    ses(scaling_list_delta_coeff[sizeId][matrixId][i],
+                        -128, +127, 3, sizeId, matrixId, i);
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(sps_range_extension)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     H265RawSPS *current)
+{
+    int err;
+
+    flag(transform_skip_rotation_enabled_flag);
+    flag(transform_skip_context_enabled_flag);
+    flag(implicit_rdpcm_enabled_flag);
+    flag(explicit_rdpcm_enabled_flag);
+    flag(extended_precision_processing_flag);
+    flag(intra_smoothing_disabled_flag);
+    flag(high_precision_offsets_enabled_flag);
+    flag(persistent_rice_adaptation_enabled_flag);
+    flag(cabac_bypass_alignment_enabled_flag);
+
+    return 0;
+}
+
+static int FUNC(sps_scc_extension)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   H265RawSPS *current)
+{
+    int err, comp, i;
+
+    flag(sps_curr_pic_ref_enabled_flag);
+
+    flag(palette_mode_enabled_flag);
+    if (current->palette_mode_enabled_flag) {
+        ue(palette_max_size, 0, 64);
+        ue(delta_palette_max_predictor_size, 0, 128);
+
+        flag(sps_palette_predictor_initializer_present_flag);
+        if (current->sps_palette_predictor_initializer_present_flag) {
+            ue(sps_num_palette_predictor_initializer_minus1, 0, 128);
+            for (comp = 0; comp < (current->chroma_format_idc ? 3 : 1); comp++) {
+                int bit_depth = comp == 0 ? current->bit_depth_luma_minus8 + 8
+                                          : current->bit_depth_chroma_minus8 + 8;
+                for (i = 0; i <= current->sps_num_palette_predictor_initializer_minus1; i++)
+                    us(bit_depth, sps_palette_predictor_initializers[comp][i],
+                       0, MAX_UINT_BITS(bit_depth), 2, comp, i);
+            }
+        }
+    }
+
+    u(2, motion_vector_resolution_control_idc, 0, 2);
+    flag(intra_boundary_filtering_disable_flag);
+
+    return 0;
+}
+
+static int FUNC(sps)(CodedBitstreamContext *ctx, RWContext *rw,
+                     H265RawSPS *current)
+{
+    CodedBitstreamH265Context *h265 = ctx->priv_data;
+    const H265RawVPS *vps;
+    int err, i;
+    unsigned int min_cb_log2_size_y, ctb_log2_size_y,
+                 min_cb_size_y,   min_tb_log2_size_y;
+
+    HEADER("Sequence Parameter Set");
+
+    CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header, HEVC_NAL_SPS));
+
+    u(4, sps_video_parameter_set_id, 0, 15);
+    h265->active_vps = vps = h265->vps[current->sps_video_parameter_set_id];
+
+    u(3, sps_max_sub_layers_minus1, 0, HEVC_MAX_SUB_LAYERS - 1);
+    flag(sps_temporal_id_nesting_flag);
+    if (vps) {
+        if (vps->vps_max_sub_layers_minus1 > current->sps_max_sub_layers_minus1) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid stream: "
+                   "sps_max_sub_layers_minus1 (%d) must be less than or equal to "
+                   "vps_max_sub_layers_minus1 (%d).\n",
+                   vps->vps_max_sub_layers_minus1,
+                   current->sps_max_sub_layers_minus1);
+            return AVERROR_INVALIDDATA;
+        }
+        if (vps->vps_temporal_id_nesting_flag &&
+            !current->sps_temporal_id_nesting_flag) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid stream: "
+                   "sps_temporal_id_nesting_flag must be 1 if "
+                   "vps_temporal_id_nesting_flag is 1.\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    CHECK(FUNC(profile_tier_level)(ctx, rw, &current->profile_tier_level,
+                                   1, current->sps_max_sub_layers_minus1));
+
+    ue(sps_seq_parameter_set_id, 0, 15);
+
+    ue(chroma_format_idc, 0, 3);
+    if (current->chroma_format_idc == 3)
+        flag(separate_colour_plane_flag);
+    else
+        infer(separate_colour_plane_flag, 0);
+
+    ue(pic_width_in_luma_samples,  1, HEVC_MAX_WIDTH);
+    ue(pic_height_in_luma_samples, 1, HEVC_MAX_HEIGHT);
+
+    flag(conformance_window_flag);
+    if (current->conformance_window_flag) {
+        ue(conf_win_left_offset,   0, current->pic_width_in_luma_samples);
+        ue(conf_win_right_offset,  0, current->pic_width_in_luma_samples);
+        ue(conf_win_top_offset,    0, current->pic_height_in_luma_samples);
+        ue(conf_win_bottom_offset, 0, current->pic_height_in_luma_samples);
+    } else {
+        infer(conf_win_left_offset,   0);
+        infer(conf_win_right_offset,  0);
+        infer(conf_win_top_offset,    0);
+        infer(conf_win_bottom_offset, 0);
+    }
+
+    ue(bit_depth_luma_minus8,   0, 8);
+    ue(bit_depth_chroma_minus8, 0, 8);
+
+    ue(log2_max_pic_order_cnt_lsb_minus4, 0, 12);
+
+    flag(sps_sub_layer_ordering_info_present_flag);
+    for (i = (current->sps_sub_layer_ordering_info_present_flag ?
+              0 : current->sps_max_sub_layers_minus1);
+         i <= current->sps_max_sub_layers_minus1; i++) {
+        ues(sps_max_dec_pic_buffering_minus1[i],
+            0, HEVC_MAX_DPB_SIZE - 1,                        1, i);
+        ues(sps_max_num_reorder_pics[i],
+            0, current->sps_max_dec_pic_buffering_minus1[i], 1, i);
+        ues(sps_max_latency_increase_plus1[i],
+            0, UINT32_MAX - 1,                               1, i);
+    }
+    if (!current->sps_sub_layer_ordering_info_present_flag) {
+        for (i = 0; i < current->sps_max_sub_layers_minus1; i++) {
+            infer(sps_max_dec_pic_buffering_minus1[i],
+                  current->sps_max_dec_pic_buffering_minus1[current->sps_max_sub_layers_minus1]);
+            infer(sps_max_num_reorder_pics[i],
+                  current->sps_max_num_reorder_pics[current->sps_max_sub_layers_minus1]);
+            infer(sps_max_latency_increase_plus1[i],
+                  current->sps_max_latency_increase_plus1[current->sps_max_sub_layers_minus1]);
+        }
+    }
+
+    ue(log2_min_luma_coding_block_size_minus3,   0, 3);
+    min_cb_log2_size_y = current->log2_min_luma_coding_block_size_minus3 + 3;
+
+    ue(log2_diff_max_min_luma_coding_block_size, 0, 3);
+    ctb_log2_size_y = min_cb_log2_size_y +
+        current->log2_diff_max_min_luma_coding_block_size;
+
+    min_cb_size_y = 1 << min_cb_log2_size_y;
+    if (current->pic_width_in_luma_samples  % min_cb_size_y ||
+        current->pic_height_in_luma_samples % min_cb_size_y) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid dimensions: %ux%u not divisible "
+               "by MinCbSizeY = %u.\n", current->pic_width_in_luma_samples,
+               current->pic_height_in_luma_samples, min_cb_size_y);
+        return AVERROR_INVALIDDATA;
+    }
+
+    ue(log2_min_luma_transform_block_size_minus2, 0, min_cb_log2_size_y - 3);
+    min_tb_log2_size_y = current->log2_min_luma_transform_block_size_minus2 + 2;
+
+    ue(log2_diff_max_min_luma_transform_block_size,
+       0, FFMIN(ctb_log2_size_y, 5) - min_tb_log2_size_y);
+
+    ue(max_transform_hierarchy_depth_inter,
+       0, ctb_log2_size_y - min_tb_log2_size_y);
+    ue(max_transform_hierarchy_depth_intra,
+       0, ctb_log2_size_y - min_tb_log2_size_y);
+
+    flag(scaling_list_enabled_flag);
+    if (current->scaling_list_enabled_flag) {
+        flag(sps_scaling_list_data_present_flag);
+        if (current->sps_scaling_list_data_present_flag)
+            CHECK(FUNC(scaling_list_data)(ctx, rw, &current->scaling_list));
+    } else {
+        infer(sps_scaling_list_data_present_flag, 0);
+    }
+
+    flag(amp_enabled_flag);
+    flag(sample_adaptive_offset_enabled_flag);
+
+    flag(pcm_enabled_flag);
+    if (current->pcm_enabled_flag) {
+        u(4, pcm_sample_bit_depth_luma_minus1,
+          0, current->bit_depth_luma_minus8 + 8 - 1);
+        u(4, pcm_sample_bit_depth_chroma_minus1,
+          0, current->bit_depth_chroma_minus8 + 8 - 1);
+
+        ue(log2_min_pcm_luma_coding_block_size_minus3,
+           FFMIN(min_cb_log2_size_y, 5) - 3, FFMIN(ctb_log2_size_y, 5) - 3);
+        ue(log2_diff_max_min_pcm_luma_coding_block_size,
+           0, FFMIN(ctb_log2_size_y, 5) - (current->log2_min_pcm_luma_coding_block_size_minus3 + 3));
+
+        flag(pcm_loop_filter_disabled_flag);
+    }
+
+    ue(num_short_term_ref_pic_sets, 0, HEVC_MAX_SHORT_TERM_REF_PIC_SETS);
+    for (i = 0; i < current->num_short_term_ref_pic_sets; i++)
+        CHECK(FUNC(st_ref_pic_set)(ctx, rw, &current->st_ref_pic_set[i], i, current));
+
+    flag(long_term_ref_pics_present_flag);
+    if (current->long_term_ref_pics_present_flag) {
+        ue(num_long_term_ref_pics_sps, 0, HEVC_MAX_LONG_TERM_REF_PICS);
+        for (i = 0; i < current->num_long_term_ref_pics_sps; i++) {
+            us(current->log2_max_pic_order_cnt_lsb_minus4 + 4,
+               lt_ref_pic_poc_lsb_sps[i],
+               0, MAX_UINT_BITS(current->log2_max_pic_order_cnt_lsb_minus4 + 4), 1, i);
+            flags(used_by_curr_pic_lt_sps_flag[i], 1, i);
+        }
+    }
+
+    flag(sps_temporal_mvp_enabled_flag);
+    flag(strong_intra_smoothing_enabled_flag);
+
+    flag(vui_parameters_present_flag);
+    if (current->vui_parameters_present_flag)
+        CHECK(FUNC(vui_parameters)(ctx, rw, &current->vui, current));
+
+    flag(sps_extension_present_flag);
+    if (current->sps_extension_present_flag) {
+        flag(sps_range_extension_flag);
+        flag(sps_multilayer_extension_flag);
+        flag(sps_3d_extension_flag);
+        flag(sps_scc_extension_flag);
+        u(4, sps_extension_4bits, 0, MAX_UINT_BITS(4));
+    }
+
+    if (current->sps_range_extension_flag)
+        CHECK(FUNC(sps_range_extension)(ctx, rw, current));
+    if (current->sps_multilayer_extension_flag)
+        return AVERROR_PATCHWELCOME;
+    if (current->sps_3d_extension_flag)
+        return AVERROR_PATCHWELCOME;
+    if (current->sps_scc_extension_flag)
+        CHECK(FUNC(sps_scc_extension)(ctx, rw, current));
+    if (current->sps_extension_4bits)
+        CHECK(FUNC(extension_data)(ctx, rw, &current->extension_data));
+
+    CHECK(FUNC(rbsp_trailing_bits)(ctx, rw));
+
+    return 0;
+}
+
+static int FUNC(pps_range_extension)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     H265RawPPS *current)
+{
+    CodedBitstreamH265Context *h265 = ctx->priv_data;
+    const H265RawSPS *sps = h265->active_sps;
+    int err, i;
+
+    if (current->transform_skip_enabled_flag)
+        ue(log2_max_transform_skip_block_size_minus2, 0, 3);
+    flag(cross_component_prediction_enabled_flag);
+
+    flag(chroma_qp_offset_list_enabled_flag);
+    if (current->chroma_qp_offset_list_enabled_flag) {
+        ue(diff_cu_chroma_qp_offset_depth,
+           0, sps->log2_diff_max_min_luma_coding_block_size);
+        ue(chroma_qp_offset_list_len_minus1, 0, 5);
+        for (i = 0; i <= current->chroma_qp_offset_list_len_minus1; i++) {
+            ses(cb_qp_offset_list[i], -12, +12, 1, i);
+            ses(cr_qp_offset_list[i], -12, +12, 1, i);
+        }
+    }
+
+    ue(log2_sao_offset_scale_luma,   0, FFMAX(0, sps->bit_depth_luma_minus8   - 2));
+    ue(log2_sao_offset_scale_chroma, 0, FFMAX(0, sps->bit_depth_chroma_minus8 - 2));
+
+    return 0;
+}
+
+static int FUNC(pps_scc_extension)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   H265RawPPS *current)
+{
+    int err, comp, i;
+
+    flag(pps_curr_pic_ref_enabled_flag);
+
+    flag(residual_adaptive_colour_transform_enabled_flag);
+    if (current->residual_adaptive_colour_transform_enabled_flag) {
+        flag(pps_slice_act_qp_offsets_present_flag);
+        se(pps_act_y_qp_offset_plus5,  -7, +17);
+        se(pps_act_cb_qp_offset_plus5, -7, +17);
+        se(pps_act_cr_qp_offset_plus3, -9, +15);
+    } else {
+        infer(pps_slice_act_qp_offsets_present_flag, 0);
+        infer(pps_act_y_qp_offset_plus5,  0);
+        infer(pps_act_cb_qp_offset_plus5, 0);
+        infer(pps_act_cr_qp_offset_plus3, 0);
+    }
+
+    flag(pps_palette_predictor_initializer_present_flag);
+    if (current->pps_palette_predictor_initializer_present_flag) {
+        ue(pps_num_palette_predictor_initializer, 0, 128);
+        if (current->pps_num_palette_predictor_initializer > 0) {
+            flag(monochrome_palette_flag);
+            ue(luma_bit_depth_entry_minus8, 0, 8);
+            if (!current->monochrome_palette_flag)
+                ue(chroma_bit_depth_entry_minus8, 0, 8);
+            for (comp = 0; comp < (current->monochrome_palette_flag ? 1 : 3); comp++) {
+                int bit_depth = comp == 0 ? current->luma_bit_depth_entry_minus8 + 8
+                                          : current->chroma_bit_depth_entry_minus8 + 8;
+                for (i = 0; i < current->pps_num_palette_predictor_initializer; i++)
+                    us(bit_depth, pps_palette_predictor_initializers[comp][i],
+                       0, MAX_UINT_BITS(bit_depth), 2, comp, i);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(pps)(CodedBitstreamContext *ctx, RWContext *rw,
+                     H265RawPPS *current)
+{
+    CodedBitstreamH265Context *h265 = ctx->priv_data;
+    const H265RawSPS *sps;
+    int err, i;
+
+    HEADER("Picture Parameter Set");
+
+    CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header, HEVC_NAL_PPS));
+
+    ue(pps_pic_parameter_set_id, 0, 63);
+    ue(pps_seq_parameter_set_id, 0, 15);
+    sps = h265->sps[current->pps_seq_parameter_set_id];
+    if (!sps) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "SPS id %d not available.\n",
+               current->pps_seq_parameter_set_id);
+        return AVERROR_INVALIDDATA;
+    }
+    h265->active_sps = sps;
+
+    flag(dependent_slice_segments_enabled_flag);
+    flag(output_flag_present_flag);
+    u(3, num_extra_slice_header_bits, 0, 7);
+    flag(sign_data_hiding_enabled_flag);
+    flag(cabac_init_present_flag);
+
+    ue(num_ref_idx_l0_default_active_minus1, 0, 14);
+    ue(num_ref_idx_l1_default_active_minus1, 0, 14);
+
+    se(init_qp_minus26, -(26 + 6 * sps->bit_depth_luma_minus8), +25);
+
+    flag(constrained_intra_pred_flag);
+    flag(transform_skip_enabled_flag);
+    flag(cu_qp_delta_enabled_flag);
+    if (current->cu_qp_delta_enabled_flag)
+        ue(diff_cu_qp_delta_depth,
+           0, sps->log2_diff_max_min_luma_coding_block_size);
+    else
+        infer(diff_cu_qp_delta_depth, 0);
+
+    se(pps_cb_qp_offset, -12, +12);
+    se(pps_cr_qp_offset, -12, +12);
+    flag(pps_slice_chroma_qp_offsets_present_flag);
+
+    flag(weighted_pred_flag);
+    flag(weighted_bipred_flag);
+
+    flag(transquant_bypass_enabled_flag);
+    flag(tiles_enabled_flag);
+    flag(entropy_coding_sync_enabled_flag);
+
+    if (current->tiles_enabled_flag) {
+        ue(num_tile_columns_minus1, 0, HEVC_MAX_TILE_COLUMNS);
+        ue(num_tile_rows_minus1,    0, HEVC_MAX_TILE_ROWS);
+        flag(uniform_spacing_flag);
+        if (!current->uniform_spacing_flag) {
+            for (i = 0; i < current->num_tile_columns_minus1; i++)
+                ues(column_width_minus1[i], 0, sps->pic_width_in_luma_samples,  1, i);
+            for (i = 0; i < current->num_tile_rows_minus1; i++)
+                ues(row_height_minus1[i],   0, sps->pic_height_in_luma_samples, 1, i);
+        }
+        flag(loop_filter_across_tiles_enabled_flag);
+    } else {
+        infer(num_tile_columns_minus1, 0);
+        infer(num_tile_rows_minus1,    0);
+    }
+
+    flag(pps_loop_filter_across_slices_enabled_flag);
+    flag(deblocking_filter_control_present_flag);
+    if (current->deblocking_filter_control_present_flag) {
+        flag(deblocking_filter_override_enabled_flag);
+        flag(pps_deblocking_filter_disabled_flag);
+        if (!current->pps_deblocking_filter_disabled_flag) {
+            se(pps_beta_offset_div2, -6, +6);
+            se(pps_tc_offset_div2,   -6, +6);
+        } else {
+            infer(pps_beta_offset_div2, 0);
+            infer(pps_tc_offset_div2,   0);
+        }
+    } else {
+        infer(deblocking_filter_override_enabled_flag, 0);
+        infer(pps_deblocking_filter_disabled_flag,     0);
+        infer(pps_beta_offset_div2, 0);
+        infer(pps_tc_offset_div2,   0);
+    }
+
+    flag(pps_scaling_list_data_present_flag);
+    if (current->pps_scaling_list_data_present_flag)
+        CHECK(FUNC(scaling_list_data)(ctx, rw, &current->scaling_list));
+
+    flag(lists_modification_present_flag);
+
+    ue(log2_parallel_merge_level_minus2,
+       0, (sps->log2_min_luma_coding_block_size_minus3 + 3 +
+           sps->log2_diff_max_min_luma_coding_block_size - 2));
+
+    flag(slice_segment_header_extension_present_flag);
+
+    flag(pps_extension_present_flag);
+    if (current->pps_extension_present_flag) {
+        flag(pps_range_extension_flag);
+        flag(pps_multilayer_extension_flag);
+        flag(pps_3d_extension_flag);
+        flag(pps_scc_extension_flag);
+        u(4, pps_extension_4bits, 0, MAX_UINT_BITS(4));
+    }
+    if (current->pps_range_extension_flag)
+        CHECK(FUNC(pps_range_extension)(ctx, rw, current));
+    if (current->pps_multilayer_extension_flag)
+        return AVERROR_PATCHWELCOME;
+    if (current->pps_3d_extension_flag)
+        return AVERROR_PATCHWELCOME;
+    if (current->pps_scc_extension_flag)
+        CHECK(FUNC(pps_scc_extension)(ctx, rw, current));
+    if (current->pps_extension_4bits)
+        CHECK(FUNC(extension_data)(ctx, rw, &current->extension_data));
+
+    CHECK(FUNC(rbsp_trailing_bits)(ctx, rw));
+
+    return 0;
+}
+
+static int FUNC(aud)(CodedBitstreamContext *ctx, RWContext *rw,
+                     H265RawAUD *current)
+{
+    int err;
+
+    HEADER("Access Unit Delimiter");
+
+    CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header, HEVC_NAL_AUD));
+
+    u(3, pic_type, 0, 2);
+
+    CHECK(FUNC(rbsp_trailing_bits)(ctx, rw));
+
+    return 0;
+}
+
+static int FUNC(ref_pic_lists_modification)(CodedBitstreamContext *ctx, RWContext *rw,
+                                            H265RawSliceHeader *current,
+                                            unsigned int num_pic_total_curr)
+{
+    unsigned int entry_size;
+    int err, i;
+
+    entry_size = av_log2(num_pic_total_curr - 1) + 1;
+
+    flag(ref_pic_list_modification_flag_l0);
+    if (current->ref_pic_list_modification_flag_l0) {
+        for (i = 0; i <= current->num_ref_idx_l0_active_minus1; i++)
+            us(entry_size, list_entry_l0[i], 0, num_pic_total_curr - 1, 1, i);
+    }
+
+    if (current->slice_type == HEVC_SLICE_B) {
+        flag(ref_pic_list_modification_flag_l1);
+        if (current->ref_pic_list_modification_flag_l1) {
+            for (i = 0; i <= current->num_ref_idx_l1_active_minus1; i++)
+                us(entry_size, list_entry_l1[i], 0, num_pic_total_curr - 1, 1, i);
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(pred_weight_table)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   H265RawSliceHeader *current)
+{
+    CodedBitstreamH265Context *h265 = ctx->priv_data;
+    const H265RawSPS *sps = h265->active_sps;
+    int err, i, j;
+    int chroma = !sps->separate_colour_plane_flag &&
+                  sps->chroma_format_idc != 0;
+
+    ue(luma_log2_weight_denom, 0, 7);
+    if (chroma)
+        se(delta_chroma_log2_weight_denom, -7, 7);
+    else
+        infer(delta_chroma_log2_weight_denom, 0);
+
+    for (i = 0; i <= current->num_ref_idx_l0_active_minus1; i++) {
+        if (1 /* is not same POC and same layer_id */)
+            flags(luma_weight_l0_flag[i], 1, i);
+        else
+            infer(luma_weight_l0_flag[i], 0);
+    }
+    if (chroma) {
+        for (i = 0; i <= current->num_ref_idx_l0_active_minus1; i++) {
+            if (1 /* is not same POC and same layer_id */)
+                flags(chroma_weight_l0_flag[i], 1, i);
+            else
+                infer(chroma_weight_l0_flag[i], 0);
+        }
+    }
+
+    for (i = 0; i <= current->num_ref_idx_l0_active_minus1; i++) {
+        if (current->luma_weight_l0_flag[i]) {
+            ses(delta_luma_weight_l0[i], -128, +127, 1, i);
+            ses(luma_offset_l0[i],
+                -(1 << (sps->bit_depth_luma_minus8 + 8 - 1)),
+                ((1 << (sps->bit_depth_luma_minus8 + 8 - 1)) - 1), 1, i);
+        } else {
+            infer(delta_luma_weight_l0[i], 0);
+            infer(luma_offset_l0[i],       0);
+        }
+        if (current->chroma_weight_l0_flag[i]) {
+            for (j = 0; j < 2; j++) {
+                ses(delta_chroma_weight_l0[i][j], -128, +127, 2, i, j);
+                ses(chroma_offset_l0[i][j],
+                    -(4 << (sps->bit_depth_chroma_minus8 + 8 - 1)),
+                    ((4 << (sps->bit_depth_chroma_minus8 + 8 - 1)) - 1), 2, i, j);
+            }
+        } else {
+            for (j = 0; j < 2; j++) {
+                infer(delta_chroma_weight_l0[i][j], 0);
+                infer(chroma_offset_l0[i][j],       0);
+            }
+        }
+    }
+
+    if (current->slice_type == HEVC_SLICE_B) {
+        for (i = 0; i <= current->num_ref_idx_l1_active_minus1; i++) {
+            if (1 /* RefPicList1[i] is not CurrPic, nor is it in a different layer */)
+                flags(luma_weight_l1_flag[i], 1, i);
+            else
+                infer(luma_weight_l1_flag[i], 0);
+        }
+        if (chroma) {
+            for (i = 0; i <= current->num_ref_idx_l1_active_minus1; i++) {
+                if (1 /* RefPicList1[i] is not CurrPic, nor is it in a different layer */)
+                    flags(chroma_weight_l1_flag[i], 1, i);
+                else
+                    infer(chroma_weight_l1_flag[i], 0);
+            }
+        }
+
+        for (i = 0; i <= current->num_ref_idx_l1_active_minus1; i++) {
+            if (current->luma_weight_l1_flag[i]) {
+                ses(delta_luma_weight_l1[i], -128, +127, 1, i);
+                ses(luma_offset_l1[i],
+                    -(1 << (sps->bit_depth_luma_minus8 + 8 - 1)),
+                    ((1 << (sps->bit_depth_luma_minus8 + 8 - 1)) - 1), 1, i);
+            } else {
+                infer(delta_luma_weight_l1[i], 0);
+                infer(luma_offset_l1[i],       0);
+            }
+            if (current->chroma_weight_l1_flag[i]) {
+                for (j = 0; j < 2; j++) {
+                    ses(delta_chroma_weight_l1[i][j], -128, +127, 2, i, j);
+                    ses(chroma_offset_l1[i][j],
+                        -(4 << (sps->bit_depth_chroma_minus8 + 8 - 1)),
+                        ((4 << (sps->bit_depth_chroma_minus8 + 8 - 1)) - 1), 2, i, j);
+                }
+            } else {
+                for (j = 0; j < 2; j++) {
+                    infer(delta_chroma_weight_l1[i][j], 0);
+                    infer(chroma_offset_l1[i][j],       0);
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(slice_segment_header)(CodedBitstreamContext *ctx, RWContext *rw,
+                                      H265RawSliceHeader *current)
+{
+    CodedBitstreamH265Context *h265 = ctx->priv_data;
+    const H265RawSPS *sps;
+    const H265RawPPS *pps;
+    unsigned int min_cb_log2_size_y, ctb_log2_size_y, ctb_size_y;
+    unsigned int pic_width_in_ctbs_y, pic_height_in_ctbs_y, pic_size_in_ctbs_y;
+    unsigned int num_pic_total_curr = 0;
+    int err, i;
+
+    HEADER("Slice Segment Header");
+
+    CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header, -1));
+
+    flag(first_slice_segment_in_pic_flag);
+
+    if (current->nal_unit_header.nal_unit_type >= HEVC_NAL_BLA_W_LP &&
+        current->nal_unit_header.nal_unit_type <= HEVC_NAL_IRAP_VCL23)
+        flag(no_output_of_prior_pics_flag);
+
+    ue(slice_pic_parameter_set_id, 0, 63);
+
+    pps = h265->pps[current->slice_pic_parameter_set_id];
+    if (!pps) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "PPS id %d not available.\n",
+               current->slice_pic_parameter_set_id);
+        return AVERROR_INVALIDDATA;
+    }
+    h265->active_pps = pps;
+
+    sps = h265->sps[pps->pps_seq_parameter_set_id];
+    if (!sps) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "SPS id %d not available.\n",
+               pps->pps_seq_parameter_set_id);
+        return AVERROR_INVALIDDATA;
+    }
+    h265->active_sps = sps;
+
+    min_cb_log2_size_y = sps->log2_min_luma_coding_block_size_minus3 + 3;
+    ctb_log2_size_y = min_cb_log2_size_y + sps->log2_diff_max_min_luma_coding_block_size;
+    ctb_size_y = 1 << ctb_log2_size_y;
+    pic_width_in_ctbs_y =
+        (sps->pic_width_in_luma_samples + ctb_size_y - 1) / ctb_size_y;
+    pic_height_in_ctbs_y =
+        (sps->pic_height_in_luma_samples + ctb_size_y - 1) / ctb_size_y;
+    pic_size_in_ctbs_y = pic_width_in_ctbs_y * pic_height_in_ctbs_y;
+
+    if (!current->first_slice_segment_in_pic_flag) {
+        unsigned int address_size = av_log2(pic_size_in_ctbs_y - 1) + 1;
+        if (pps->dependent_slice_segments_enabled_flag)
+            flag(dependent_slice_segment_flag);
+        else
+            infer(dependent_slice_segment_flag, 0);
+        u(address_size, slice_segment_address, 0, pic_size_in_ctbs_y - 1);
+    } else {
+        infer(dependent_slice_segment_flag, 0);
+    }
+
+    if (!current->dependent_slice_segment_flag) {
+        for (i = 0; i < pps->num_extra_slice_header_bits; i++)
+            flags(slice_reserved_flag[i], 1, i);
+
+        ue(slice_type, 0, 2);
+
+        if (pps->output_flag_present_flag)
+            flag(pic_output_flag);
+
+        if (sps->separate_colour_plane_flag)
+            u(2, colour_plane_id, 0, 2);
+
+        if (current->nal_unit_header.nal_unit_type != HEVC_NAL_IDR_W_RADL &&
+            current->nal_unit_header.nal_unit_type != HEVC_NAL_IDR_N_LP) {
+            const H265RawSTRefPicSet *rps;
+
+            u(sps->log2_max_pic_order_cnt_lsb_minus4 + 4, slice_pic_order_cnt_lsb,
+              0, MAX_UINT_BITS(sps->log2_max_pic_order_cnt_lsb_minus4 + 4));
+
+            flag(short_term_ref_pic_set_sps_flag);
+            if (!current->short_term_ref_pic_set_sps_flag) {
+                CHECK(FUNC(st_ref_pic_set)(ctx, rw, &current->short_term_ref_pic_set,
+                                           sps->num_short_term_ref_pic_sets, sps));
+                rps = &current->short_term_ref_pic_set;
+            } else if (sps->num_short_term_ref_pic_sets > 1) {
+                unsigned int idx_size = av_log2(sps->num_short_term_ref_pic_sets - 1) + 1;
+                u(idx_size, short_term_ref_pic_set_idx,
+                  0, sps->num_short_term_ref_pic_sets - 1);
+                rps = &sps->st_ref_pic_set[current->short_term_ref_pic_set_idx];
+            } else {
+                infer(short_term_ref_pic_set_idx, 0);
+                rps = &sps->st_ref_pic_set[0];
+            }
+
+            num_pic_total_curr = 0;
+            for (i = 0; i < rps->num_negative_pics; i++)
+                if (rps->used_by_curr_pic_s0_flag[i])
+                    ++num_pic_total_curr;
+            for (i = 0; i < rps->num_positive_pics; i++)
+                if (rps->used_by_curr_pic_s1_flag[i])
+                    ++num_pic_total_curr;
+
+            if (sps->long_term_ref_pics_present_flag) {
+                unsigned int idx_size;
+
+                if (sps->num_long_term_ref_pics_sps > 0) {
+                    ue(num_long_term_sps, 0, sps->num_long_term_ref_pics_sps);
+                    idx_size = av_log2(sps->num_long_term_ref_pics_sps - 1) + 1;
+                } else {
+                    infer(num_long_term_sps, 0);
+                    idx_size = 0;
+                }
+                ue(num_long_term_pics, 0, HEVC_MAX_LONG_TERM_REF_PICS);
+
+                for (i = 0; i < current->num_long_term_sps +
+                                current->num_long_term_pics; i++) {
+                    if (i < current->num_long_term_sps) {
+                        if (sps->num_long_term_ref_pics_sps > 1)
+                            us(idx_size, lt_idx_sps[i],
+                               0, sps->num_long_term_ref_pics_sps - 1, 1, i);
+                        if (sps->used_by_curr_pic_lt_sps_flag[current->lt_idx_sps[i]])
+                            ++num_pic_total_curr;
+                    } else {
+                        us(sps->log2_max_pic_order_cnt_lsb_minus4 + 4, poc_lsb_lt[i],
+                           0, MAX_UINT_BITS(sps->log2_max_pic_order_cnt_lsb_minus4 + 4), 1, i);
+                        flags(used_by_curr_pic_lt_flag[i], 1, i);
+                        if (current->used_by_curr_pic_lt_flag[i])
+                            ++num_pic_total_curr;
+                    }
+                    flags(delta_poc_msb_present_flag[i], 1, i);
+                    if (current->delta_poc_msb_present_flag[i])
+                        ues(delta_poc_msb_cycle_lt[i], 0, UINT32_MAX - 1, 1, i);
+                    else
+                        infer(delta_poc_msb_cycle_lt[i], 0);
+                }
+            }
+
+            if (sps->sps_temporal_mvp_enabled_flag)
+                flag(slice_temporal_mvp_enabled_flag);
+            else
+                infer(slice_temporal_mvp_enabled_flag, 0);
+
+            if (pps->pps_curr_pic_ref_enabled_flag)
+                ++num_pic_total_curr;
+        }
+
+        if (sps->sample_adaptive_offset_enabled_flag) {
+            flag(slice_sao_luma_flag);
+            if (!sps->separate_colour_plane_flag && sps->chroma_format_idc != 0)
+                flag(slice_sao_chroma_flag);
+            else
+                infer(slice_sao_chroma_flag, 0);
+        } else {
+            infer(slice_sao_luma_flag,   0);
+            infer(slice_sao_chroma_flag, 0);
+        }
+
+        if (current->slice_type == HEVC_SLICE_P ||
+            current->slice_type == HEVC_SLICE_B) {
+            flag(num_ref_idx_active_override_flag);
+            if (current->num_ref_idx_active_override_flag) {
+                ue(num_ref_idx_l0_active_minus1, 0, 14);
+                if (current->slice_type == HEVC_SLICE_B)
+                    ue(num_ref_idx_l1_active_minus1, 0, 14);
+                else
+                    infer(num_ref_idx_l1_active_minus1, pps->num_ref_idx_l1_default_active_minus1);
+            } else {
+                infer(num_ref_idx_l0_active_minus1, pps->num_ref_idx_l0_default_active_minus1);
+                infer(num_ref_idx_l1_active_minus1, pps->num_ref_idx_l1_default_active_minus1);
+            }
+
+            if (pps->lists_modification_present_flag && num_pic_total_curr > 1)
+                CHECK(FUNC(ref_pic_lists_modification)(ctx, rw, current,
+                                                       num_pic_total_curr));
+
+            if (current->slice_type == HEVC_SLICE_B)
+                flag(mvd_l1_zero_flag);
+            if (pps->cabac_init_present_flag)
+                flag(cabac_init_flag);
+            else
+                infer(cabac_init_flag, 0);
+            if (current->slice_temporal_mvp_enabled_flag) {
+                if (current->slice_type == HEVC_SLICE_B)
+                    flag(collocated_from_l0_flag);
+                else
+                    infer(collocated_from_l0_flag, 1);
+                if (current->collocated_from_l0_flag) {
+                    if (current->num_ref_idx_l0_active_minus1 > 0)
+                        ue(collocated_ref_idx, 0, current->num_ref_idx_l0_active_minus1);
+                    else
+                        infer(collocated_ref_idx, 0);
+                } else {
+                    if (current->num_ref_idx_l1_active_minus1 > 0)
+                        ue(collocated_ref_idx, 0, current->num_ref_idx_l1_active_minus1);
+                    else
+                        infer(collocated_ref_idx, 0);
+                }
+            }
+
+            if ((pps->weighted_pred_flag   && current->slice_type == HEVC_SLICE_P) ||
+                (pps->weighted_bipred_flag && current->slice_type == HEVC_SLICE_B))
+                CHECK(FUNC(pred_weight_table)(ctx, rw, current));
+
+            ue(five_minus_max_num_merge_cand, 0, 4);
+            if (sps->motion_vector_resolution_control_idc == 2)
+                flag(use_integer_mv_flag);
+            else
+                infer(use_integer_mv_flag, sps->motion_vector_resolution_control_idc);
+        }
+
+        se(slice_qp_delta,
+           - 6 * sps->bit_depth_luma_minus8 - (pps->init_qp_minus26 + 26),
+           + 51 - (pps->init_qp_minus26 + 26));
+        if (pps->pps_slice_chroma_qp_offsets_present_flag) {
+            se(slice_cb_qp_offset, -12, +12);
+            se(slice_cr_qp_offset, -12, +12);
+        } else {
+            infer(slice_cb_qp_offset, 0);
+            infer(slice_cr_qp_offset, 0);
+        }
+        if (pps->pps_slice_act_qp_offsets_present_flag) {
+            se(slice_act_y_qp_offset,
+               -12 - (pps->pps_act_y_qp_offset_plus5 - 5),
+               +12 - (pps->pps_act_y_qp_offset_plus5 - 5));
+            se(slice_act_cb_qp_offset,
+               -12 - (pps->pps_act_cb_qp_offset_plus5 - 5),
+               +12 - (pps->pps_act_cb_qp_offset_plus5 - 5));
+            se(slice_act_cr_qp_offset,
+               -12 - (pps->pps_act_cr_qp_offset_plus3 - 3),
+               +12 - (pps->pps_act_cr_qp_offset_plus3 - 3));
+        } else {
+            infer(slice_act_y_qp_offset,  0);
+            infer(slice_act_cb_qp_offset, 0);
+            infer(slice_act_cr_qp_offset, 0);
+        }
+        if (pps->chroma_qp_offset_list_enabled_flag)
+            flag(cu_chroma_qp_offset_enabled_flag);
+        else
+            infer(cu_chroma_qp_offset_enabled_flag, 0);
+
+        if (pps->deblocking_filter_override_enabled_flag)
+            flag(deblocking_filter_override_flag);
+        else
+            infer(deblocking_filter_override_flag, 0);
+        if (current->deblocking_filter_override_flag) {
+            flag(slice_deblocking_filter_disabled_flag);
+            if (!current->slice_deblocking_filter_disabled_flag) {
+                se(slice_beta_offset_div2, -6, +6);
+                se(slice_tc_offset_div2,   -6, +6);
+            } else {
+                infer(slice_beta_offset_div2, pps->pps_beta_offset_div2);
+                infer(slice_tc_offset_div2,   pps->pps_tc_offset_div2);
+            }
+        } else {
+            infer(slice_deblocking_filter_disabled_flag,
+                  pps->pps_deblocking_filter_disabled_flag);
+            infer(slice_beta_offset_div2, pps->pps_beta_offset_div2);
+            infer(slice_tc_offset_div2,   pps->pps_tc_offset_div2);
+        }
+        if (pps->pps_loop_filter_across_slices_enabled_flag &&
+            (current->slice_sao_luma_flag || current->slice_sao_chroma_flag ||
+             !current->slice_deblocking_filter_disabled_flag))
+            flag(slice_loop_filter_across_slices_enabled_flag);
+        else
+            infer(slice_loop_filter_across_slices_enabled_flag,
+                  pps->pps_loop_filter_across_slices_enabled_flag);
+    }
+
+    if (pps->tiles_enabled_flag || pps->entropy_coding_sync_enabled_flag) {
+        unsigned int num_entry_point_offsets_limit;
+        if (!pps->tiles_enabled_flag && pps->entropy_coding_sync_enabled_flag)
+            num_entry_point_offsets_limit = pic_height_in_ctbs_y - 1;
+        else if (pps->tiles_enabled_flag && !pps->entropy_coding_sync_enabled_flag)
+            num_entry_point_offsets_limit =
+                (pps->num_tile_columns_minus1 + 1) * (pps->num_tile_rows_minus1 + 1);
+        else
+            num_entry_point_offsets_limit =
+                (pps->num_tile_columns_minus1 + 1) * pic_height_in_ctbs_y - 1;
+        ue(num_entry_point_offsets, 0, num_entry_point_offsets_limit);
+
+        if (current->num_entry_point_offsets > HEVC_MAX_ENTRY_POINT_OFFSETS) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Too many entry points: "
+                   "%"PRIu16".\n", current->num_entry_point_offsets);
+            return AVERROR_PATCHWELCOME;
+        }
+
+        if (current->num_entry_point_offsets > 0) {
+            ue(offset_len_minus1, 0, 31);
+            for (i = 0; i < current->num_entry_point_offsets; i++)
+                us(current->offset_len_minus1 + 1, entry_point_offset_minus1[i],
+                   0, MAX_UINT_BITS(current->offset_len_minus1 + 1), 1, i);
+        }
+    }
+
+    if (pps->slice_segment_header_extension_present_flag) {
+        ue(slice_segment_header_extension_length, 0, 256);
+        for (i = 0; i < current->slice_segment_header_extension_length; i++)
+            us(8, slice_segment_header_extension_data_byte[i], 0x00, 0xff, 1, i);
+    }
+
+    CHECK(FUNC(byte_alignment)(ctx, rw));
+
+    return 0;
+}
+
+static int FUNC(sei_mastering_display)(CodedBitstreamContext *ctx, RWContext *rw,
+                                       H265RawSEIMasteringDisplayColourVolume *current)
+{
+    int err, c;
+
+    for (c = 0; c < 3; c++) {
+        us(16, display_primaries_x[c], 0, 50000, 1, c);
+        us(16, display_primaries_y[c], 0, 50000, 1, c);
+    }
+
+    u(16, white_point_x, 0, 50000);
+    u(16, white_point_y, 0, 50000);
+
+    u(32, max_display_mastering_luminance,
+      1, MAX_UINT_BITS(32));
+    u(32, min_display_mastering_luminance,
+      0, current->max_display_mastering_luminance - 1);
+
+    return 0;
+}
+
+static int FUNC(sei_content_light_level)(CodedBitstreamContext *ctx, RWContext *rw,
+                                         H265RawSEIContentLightLevelInfo *current)
+{
+    int err;
+
+    u(16, max_content_light_level, 0, MAX_UINT_BITS(16));
+    u(16, max_pic_average_light_level, 0, MAX_UINT_BITS(16));
+
+    return 0;
+}
+
+static int FUNC(sei_payload)(CodedBitstreamContext *ctx, RWContext *rw,
+                             H265RawSEIPayload *current)
+{
+    int err, i;
+    int start_position, end_position;
+
+#ifdef READ
+    start_position = get_bits_count(rw);
+#else
+    start_position = put_bits_count(rw);
+#endif
+
+    switch (current->payload_type) {
+    case HEVC_SEI_TYPE_MASTERING_DISPLAY_INFO:
+        CHECK(FUNC(sei_mastering_display)
+              (ctx, rw, &current->payload.mastering_display));
+
+        break;
+
+    case HEVC_SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO:
+        CHECK(FUNC(sei_content_light_level)
+              (ctx, rw, &current->payload.content_light_level));
+
+        break;
+
+    default:
+        {
+#ifdef READ
+            current->payload.other.data_length = current->payload_size;
+#endif
+            allocate(current->payload.other.data, current->payload.other.data_length);
+
+            for (i = 0; i < current->payload_size; i++)
+                xu(8, payload_byte[i], current->payload.other.data[i], 0, 255,
+                   1, i);
+        }
+    }
+
+    if (byte_alignment(rw)) {
+        fixed(1, bit_equal_to_one, 1);
+        while (byte_alignment(rw))
+            fixed(1, bit_equal_to_zero, 0);
+    }
+
+#ifdef READ
+    end_position = get_bits_count(rw);
+    if (end_position < start_position + 8 * current->payload_size) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Incorrect SEI payload length: "
+               "header %"PRIu32" bits, actually %d bits.\n",
+               8 * current->payload_size,
+               end_position - start_position);
+        return AVERROR_INVALIDDATA;
+    }
+#else
+    end_position = put_bits_count(rw);
+    current->payload_size = (end_position - start_position) >> 3;
+#endif
+
+    return 0;
+}
+
+static int FUNC(sei)(CodedBitstreamContext *ctx, RWContext *rw,
+                     H265RawSEI *current)
+{
+    int err, k;
+
+    HEADER("Supplemental Enhancement Information");
+
+    CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header,
+                                HEVC_NAL_SEI_PREFIX));
+
+#ifdef READ
+    for (k = 0; k < H265_MAX_SEI_PAYLOADS; k++) {
+        uint32_t payload_type = 0;
+        uint32_t payload_size = 0;
+        uint32_t tmp;
+
+        while (show_bits(rw, 8) == 0xff) {
+            fixed(8, ff_byte, 0xff);
+            payload_type += 255;
+        }
+        xu(8, last_payload_type_byte, tmp, 0, 254, 0);
+        payload_type += tmp;
+
+        while (show_bits(rw, 8) == 0xff) {
+            fixed(8, ff_byte, 0xff);
+            payload_size += 255;
+        }
+        xu(8, last_payload_size_byte, tmp, 0, 254, 0);
+        payload_size += tmp;
+
+        current->payload[k].payload_type = payload_type;
+        current->payload[k].payload_size = payload_size;
+
+        CHECK(FUNC(sei_payload)(ctx, rw, &current->payload[k]));
+
+        if (!cbs_h2645_read_more_rbsp_data(rw))
+            break;
+    }
+    if (k >= H265_MAX_SEI_PAYLOADS) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Too many payloads in "
+               "SEI message: found %d.\n", k);
+        return AVERROR_INVALIDDATA;
+    }
+    current->payload_count = k + 1;
+#else
+    for (k = 0; k < current->payload_count; k++) {
+        PutBitContext start_state;
+        uint32_t tmp;
+        int need_size, i;
+
+        // Somewhat clumsy: we write the payload twice when
+        // we don't know the size in advance.  This will mess
+        // with trace output, but is otherwise harmless.
+        start_state = *rw;
+        need_size = !current->payload[k].payload_size;
+        for (i = 0; i < 1 + need_size; i++) {
+            *rw = start_state;
+
+            tmp = current->payload[k].payload_type;
+            while (tmp >= 255) {
+                fixed(8, ff_byte, 0xff);
+                tmp -= 255;
+            }
+            xu(8, last_payload_type_byte, tmp, 0, 254, 0);
+
+            tmp = current->payload[k].payload_size;
+            while (tmp >= 255) {
+                fixed(8, ff_byte, 0xff);
+                tmp -= 255;
+            }
+            xu(8, last_payload_size_byte, tmp, 0, 254, 0);
+
+            CHECK(FUNC(sei_payload)(ctx, rw, &current->payload[k]));
+        }
+    }
+#endif
+
+    CHECK(FUNC(rbsp_trailing_bits)(ctx, rw));
+
+    return 0;
+}

diff --git a/libavcodec/cbs_internal.h b/libavcodec/cbs_internal.h
new file mode 100644
index 0000000..53f2e5d
--- /dev/null
+++ b/libavcodec/cbs_internal.h

@@ -0,0 +1,97 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CBS_INTERNAL_H
+#define AVCODEC_CBS_INTERNAL_H
+
+#include "avcodec.h"
+#include "cbs.h"
+#include "get_bits.h"
+#include "put_bits.h"
+
+
+typedef struct CodedBitstreamType {
+    enum AVCodecID codec_id;
+
+    size_t priv_data_size;
+
+    // Split frag->data into coded bitstream units, creating the
+    // frag->units array.  Fill data but not content on each unit.
+    // The header argument should be set if the fragment came from
+    // a header block, which may require different parsing for some
+    // codecs (e.g. the AVCC header in H.264).
+    int (*split_fragment)(CodedBitstreamContext *ctx,
+                          CodedBitstreamFragment *frag,
+                          int header);
+
+    // Read the unit->data bitstream and decompose it, creating
+    // unit->content.
+    int (*read_unit)(CodedBitstreamContext *ctx,
+                     CodedBitstreamUnit *unit);
+
+    // Write the unit->data bitstream from unit->content.
+    int (*write_unit)(CodedBitstreamContext *ctx,
+                      CodedBitstreamUnit *unit);
+
+    // Read the data from all of frag->units and assemble it into
+    // a bitstream for the whole fragment.
+    int (*assemble_fragment)(CodedBitstreamContext *ctx,
+                             CodedBitstreamFragment *frag);
+
+    // Free the codec internal state.
+    void (*close)(CodedBitstreamContext *ctx);
+} CodedBitstreamType;
+
+
+// Helper functions for trace output.
+
+void ff_cbs_trace_header(CodedBitstreamContext *ctx,
+                         const char *name);
+
+void ff_cbs_trace_syntax_element(CodedBitstreamContext *ctx, int position,
+                                 const char *name, const int *subscripts,
+                                 const char *bitstring, int64_t value);
+
+
+// Helper functions for read/write of common bitstream elements, including
+// generation of trace output.
+
+int ff_cbs_read_unsigned(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                         int width, const char *name,
+                         const int *subscripts, uint32_t *write_to,
+                         uint32_t range_min, uint32_t range_max);
+
+int ff_cbs_write_unsigned(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                          int width, const char *name,
+                          const int *subscripts, uint32_t value,
+                          uint32_t range_min, uint32_t range_max);
+
+// The largest value representable in N bits, suitable for use as
+// range_max in the above functions.
+#define MAX_UINT_BITS(length) ((UINT64_C(1) << (length)) - 1)
+
+
+extern const CodedBitstreamType ff_cbs_type_av1;
+extern const CodedBitstreamType ff_cbs_type_h264;
+extern const CodedBitstreamType ff_cbs_type_h265;
+extern const CodedBitstreamType ff_cbs_type_jpeg;
+extern const CodedBitstreamType ff_cbs_type_mpeg2;
+extern const CodedBitstreamType ff_cbs_type_vp9;
+
+
+#endif /* AVCODEC_CBS_INTERNAL_H */

diff --git a/libavcodec/cbs_jpeg.c b/libavcodec/cbs_jpeg.c
new file mode 100644
index 0000000..5a72f0e
--- /dev/null
+++ b/libavcodec/cbs_jpeg.c

@@ -0,0 +1,520 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "cbs.h"
+#include "cbs_internal.h"
+#include "cbs_jpeg.h"
+
+
+#define HEADER(name) do { \
+        ff_cbs_trace_header(ctx, name); \
+    } while (0)
+
+#define CHECK(call) do { \
+        err = (call); \
+        if (err < 0) \
+            return err; \
+    } while (0)
+
+#define SUBSCRIPTS(subs, ...) (subs > 0 ? ((int[subs + 1]){ subs, __VA_ARGS__ }) : NULL)
+
+#define u(width, name, range_min, range_max) \
+    xu(width, name, range_min, range_max, 0)
+#define us(width, name, sub, range_min, range_max) \
+    xu(width, name, range_min, range_max, 1, sub)
+
+
+#define READ
+#define READWRITE read
+#define RWContext GetBitContext
+#define FUNC(name) cbs_jpeg_read_ ## name
+
+#define xu(width, name, range_min, range_max, subs, ...) do { \
+        uint32_t value = range_min; \
+        CHECK(ff_cbs_read_unsigned(ctx, rw, width, #name, \
+                                   SUBSCRIPTS(subs, __VA_ARGS__), \
+                                   &value, range_min, range_max)); \
+        current->name = value; \
+    } while (0)
+
+#include "cbs_jpeg_syntax_template.c"
+
+#undef READ
+#undef READWRITE
+#undef RWContext
+#undef FUNC
+#undef xu
+
+#define WRITE
+#define READWRITE write
+#define RWContext PutBitContext
+#define FUNC(name) cbs_jpeg_write_ ## name
+
+#define xu(width, name, range_min, range_max, subs, ...) do { \
+        uint32_t value = current->name; \
+        CHECK(ff_cbs_write_unsigned(ctx, rw, width, #name, \
+                                    SUBSCRIPTS(subs, __VA_ARGS__), \
+                                    value, range_min, range_max)); \
+    } while (0)
+
+
+#include "cbs_jpeg_syntax_template.c"
+
+#undef READ
+#undef READWRITE
+#undef RWContext
+#undef FUNC
+#undef xu
+
+
+static void cbs_jpeg_free_application_data(void *unit, uint8_t *content)
+{
+    JPEGRawApplicationData *ad = (JPEGRawApplicationData*)content;
+    av_buffer_unref(&ad->Ap_ref);
+    av_freep(&content);
+}
+
+static void cbs_jpeg_free_comment(void *unit, uint8_t *content)
+{
+    JPEGRawComment *comment = (JPEGRawComment*)content;
+    av_buffer_unref(&comment->Cm_ref);
+    av_freep(&content);
+}
+
+static void cbs_jpeg_free_scan(void *unit, uint8_t *content)
+{
+    JPEGRawScan *scan = (JPEGRawScan*)content;
+    av_buffer_unref(&scan->data_ref);
+    av_freep(&content);
+}
+
+static int cbs_jpeg_split_fragment(CodedBitstreamContext *ctx,
+                                   CodedBitstreamFragment *frag,
+                                   int header)
+{
+    AVBufferRef *data_ref;
+    uint8_t *data;
+    size_t data_size;
+    int unit, start, end, marker, next_start, next_marker;
+    int err, i, j, length;
+
+    if (frag->data_size < 4) {
+        // Definitely too short to be meaningful.
+        return AVERROR_INVALIDDATA;
+    }
+
+    for (i = 0; i + 1 < frag->data_size && frag->data[i] != 0xff; i++);
+    if (i > 0) {
+        av_log(ctx->log_ctx, AV_LOG_WARNING, "Discarding %d bytes at "
+               "beginning of image.\n", i);
+    }
+    for (++i; i + 1 < frag->data_size && frag->data[i] == 0xff; i++);
+    if (i + 1 >= frag->data_size && frag->data[i]) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid JPEG image: "
+               "no SOI marker found.\n");
+        return AVERROR_INVALIDDATA;
+    }
+    marker = frag->data[i];
+    if (marker != JPEG_MARKER_SOI) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid JPEG image: first "
+               "marker is %02x, should be SOI.\n", marker);
+        return AVERROR_INVALIDDATA;
+    }
+    for (++i; i + 1 < frag->data_size && frag->data[i] == 0xff; i++);
+    if (i + 1 >= frag->data_size) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid JPEG image: "
+               "no image content found.\n");
+        return AVERROR_INVALIDDATA;
+    }
+    marker = frag->data[i];
+    start  = i + 1;
+
+    for (unit = 0;; unit++) {
+        if (marker == JPEG_MARKER_EOI) {
+            break;
+        } else if (marker == JPEG_MARKER_SOS) {
+            for (i = start; i + 1 < frag->data_size; i++) {
+                if (frag->data[i] != 0xff)
+                    continue;
+                end = i;
+                for (++i; i + 1 < frag->data_size &&
+                          frag->data[i] == 0xff; i++);
+                if (i + 1 >= frag->data_size) {
+                    next_marker = -1;
+                } else {
+                    if (frag->data[i] == 0x00)
+                        continue;
+                    next_marker = frag->data[i];
+                    next_start  = i + 1;
+                }
+                break;
+            }
+        } else {
+            i = start;
+            if (i + 2 > frag->data_size) {
+                av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid JPEG image: "
+                       "truncated at %02x marker.\n", marker);
+                return AVERROR_INVALIDDATA;
+            }
+            length = AV_RB16(frag->data + i);
+            if (i + length > frag->data_size) {
+                av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid JPEG image: "
+                       "truncated at %02x marker segment.\n", marker);
+                return AVERROR_INVALIDDATA;
+            }
+            end = start + length;
+
+            i = end;
+            if (frag->data[i] != 0xff) {
+                next_marker = -1;
+            } else {
+                for (++i; i + 1 < frag->data_size &&
+                          frag->data[i] == 0xff; i++);
+                if (i + 1 >= frag->data_size) {
+                    next_marker = -1;
+                } else {
+                    next_marker = frag->data[i];
+                    next_start  = i + 1;
+                }
+            }
+        }
+
+        if (marker == JPEG_MARKER_SOS) {
+            length = AV_RB16(frag->data + start);
+
+            data_ref = NULL;
+            data     = av_malloc(end - start +
+                                 AV_INPUT_BUFFER_PADDING_SIZE);
+            if (!data)
+                return AVERROR(ENOMEM);
+
+            memcpy(data, frag->data + start, length);
+            for (i = start + length, j = length; i < end; i++, j++) {
+                if (frag->data[i] == 0xff) {
+                    while (frag->data[i] == 0xff)
+                        ++i;
+                    data[j] = 0xff;
+                } else {
+                    data[j] = frag->data[i];
+                }
+            }
+            data_size = j;
+
+            memset(data + data_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+        } else {
+            data      = frag->data + start;
+            data_size = end - start;
+            data_ref  = frag->data_ref;
+        }
+
+        err = ff_cbs_insert_unit_data(ctx, frag, unit, marker,
+                                      data, data_size, data_ref);
+        if (err < 0) {
+            if (!data_ref)
+                av_freep(&data);
+            return err;
+        }
+
+        if (next_marker == -1)
+            break;
+        marker = next_marker;
+        start  = next_start;
+    }
+
+    return 0;
+}
+
+static int cbs_jpeg_read_unit(CodedBitstreamContext *ctx,
+                              CodedBitstreamUnit *unit)
+{
+    GetBitContext gbc;
+    int err;
+
+    err = init_get_bits(&gbc, unit->data, 8 * unit->data_size);
+    if (err < 0)
+        return err;
+
+    if (unit->type >= JPEG_MARKER_SOF0 &&
+        unit->type <= JPEG_MARKER_SOF3) {
+        err = ff_cbs_alloc_unit_content(ctx, unit,
+                                        sizeof(JPEGRawFrameHeader),
+                                        NULL);
+        if (err < 0)
+            return err;
+
+        err = cbs_jpeg_read_frame_header(ctx, &gbc, unit->content);
+        if (err < 0)
+            return err;
+
+    } else if (unit->type >= JPEG_MARKER_APPN &&
+               unit->type <= JPEG_MARKER_APPN + 15) {
+        err = ff_cbs_alloc_unit_content(ctx, unit,
+                                        sizeof(JPEGRawApplicationData),
+                                        &cbs_jpeg_free_application_data);
+        if (err < 0)
+            return err;
+
+        err = cbs_jpeg_read_application_data(ctx, &gbc, unit->content);
+        if (err < 0)
+            return err;
+
+    } else if (unit->type == JPEG_MARKER_SOS) {
+        JPEGRawScan *scan;
+        int pos;
+
+        err = ff_cbs_alloc_unit_content(ctx, unit,
+                                        sizeof(JPEGRawScan),
+                                        &cbs_jpeg_free_scan);
+        if (err < 0)
+            return err;
+        scan = unit->content;
+
+        err = cbs_jpeg_read_scan_header(ctx, &gbc, &scan->header);
+        if (err < 0)
+            return err;
+
+        pos = get_bits_count(&gbc);
+        av_assert0(pos % 8 == 0);
+        if (pos > 0) {
+            scan->data_size = unit->data_size - pos / 8;
+            scan->data_ref  = av_buffer_ref(unit->data_ref);
+            if (!scan->data_ref)
+                return AVERROR(ENOMEM);
+            scan->data = unit->data + pos / 8;
+        }
+
+    } else {
+        switch (unit->type) {
+#define SEGMENT(marker, type, func, free) \
+        case JPEG_MARKER_ ## marker: \
+            { \
+                err = ff_cbs_alloc_unit_content(ctx, unit, \
+                                                sizeof(type), free); \
+                if (err < 0) \
+                    return err; \
+                err = cbs_jpeg_read_ ## func(ctx, &gbc, unit->content); \
+                if (err < 0) \
+                    return err; \
+            } \
+            break
+            SEGMENT(DQT, JPEGRawQuantisationTableSpecification, dqt, NULL);
+            SEGMENT(DHT, JPEGRawHuffmanTableSpecification,      dht, NULL);
+            SEGMENT(COM, JPEGRawComment,  comment, &cbs_jpeg_free_comment);
+#undef SEGMENT
+        default:
+            return AVERROR(ENOSYS);
+        }
+    }
+
+    return 0;
+}
+
+static int cbs_jpeg_write_scan(CodedBitstreamContext *ctx,
+                               CodedBitstreamUnit *unit,
+                               PutBitContext *pbc)
+{
+    JPEGRawScan *scan = unit->content;
+    int i, err;
+
+    err = cbs_jpeg_write_scan_header(ctx, pbc, &scan->header);
+    if (err < 0)
+        return err;
+
+    if (scan->data) {
+        if (scan->data_size * 8 > put_bits_left(pbc))
+            return AVERROR(ENOSPC);
+
+        for (i = 0; i < scan->data_size; i++)
+            put_bits(pbc, 8, scan->data[i]);
+    }
+
+    return 0;
+}
+
+static int cbs_jpeg_write_segment(CodedBitstreamContext *ctx,
+                                  CodedBitstreamUnit *unit,
+                                  PutBitContext *pbc)
+{
+    int err;
+
+    if (unit->type >= JPEG_MARKER_SOF0 &&
+        unit->type <= JPEG_MARKER_SOF3) {
+        err = cbs_jpeg_write_frame_header(ctx, pbc, unit->content);
+    } else if (unit->type >= JPEG_MARKER_APPN &&
+               unit->type <= JPEG_MARKER_APPN + 15) {
+        err = cbs_jpeg_write_application_data(ctx, pbc, unit->content);
+    } else {
+        switch (unit->type) {
+#define SEGMENT(marker, func) \
+            case JPEG_MARKER_ ## marker: \
+                err = cbs_jpeg_write_ ## func(ctx, pbc, unit->content); \
+                break;
+            SEGMENT(DQT, dqt);
+            SEGMENT(DHT, dht);
+            SEGMENT(COM, comment);
+        default:
+            return AVERROR_PATCHWELCOME;
+        }
+    }
+
+    return err;
+}
+
+static int cbs_jpeg_write_unit(CodedBitstreamContext *ctx,
+                                CodedBitstreamUnit *unit)
+{
+    CodedBitstreamJPEGContext *priv = ctx->priv_data;
+    PutBitContext pbc;
+    int err;
+
+    if (!priv->write_buffer) {
+        // Initial write buffer size is 1MB.
+        priv->write_buffer_size = 1024 * 1024;
+
+    reallocate_and_try_again:
+        err = av_reallocp(&priv->write_buffer, priv->write_buffer_size);
+        if (err < 0) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Unable to allocate a "
+                   "sufficiently large write buffer (last attempt "
+                   "%"SIZE_SPECIFIER" bytes).\n", priv->write_buffer_size);
+            return err;
+        }
+    }
+
+    init_put_bits(&pbc, priv->write_buffer, priv->write_buffer_size);
+
+    if (unit->type == JPEG_MARKER_SOS)
+        err = cbs_jpeg_write_scan(ctx, unit, &pbc);
+    else
+        err = cbs_jpeg_write_segment(ctx, unit, &pbc);
+
+    if (err == AVERROR(ENOSPC)) {
+        // Overflow.
+        priv->write_buffer_size *= 2;
+        goto reallocate_and_try_again;
+    }
+    if (err < 0) {
+        // Write failed for some other reason.
+        return err;
+    }
+
+    if (put_bits_count(&pbc) % 8)
+        unit->data_bit_padding = 8 - put_bits_count(&pbc) % 8;
+    else
+        unit->data_bit_padding = 0;
+
+    unit->data_size = (put_bits_count(&pbc) + 7) / 8;
+    flush_put_bits(&pbc);
+
+    err = ff_cbs_alloc_unit_data(ctx, unit, unit->data_size);
+    if (err < 0)
+        return err;
+
+    memcpy(unit->data, priv->write_buffer, unit->data_size);
+
+    return 0;
+}
+
+static int cbs_jpeg_assemble_fragment(CodedBitstreamContext *ctx,
+                                       CodedBitstreamFragment *frag)
+{
+    const CodedBitstreamUnit *unit;
+    uint8_t *data;
+    size_t size, dp, sp;
+    int i;
+
+    size = 4; // SOI + EOI.
+    for (i = 0; i < frag->nb_units; i++) {
+        unit = &frag->units[i];
+        size += 2 + unit->data_size;
+        if (unit->type == JPEG_MARKER_SOS) {
+            for (sp = 0; sp < unit->data_size; sp++) {
+                if (unit->data[sp] == 0xff)
+                    ++size;
+            }
+        }
+    }
+
+    frag->data_ref = av_buffer_alloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!frag->data_ref)
+        return AVERROR(ENOMEM);
+    data = frag->data_ref->data;
+
+    dp = 0;
+
+    data[dp++] = 0xff;
+    data[dp++] = JPEG_MARKER_SOI;
+
+    for (i = 0; i < frag->nb_units; i++) {
+        unit = &frag->units[i];
+
+        data[dp++] = 0xff;
+        data[dp++] = unit->type;
+
+        if (unit->type != JPEG_MARKER_SOS) {
+            memcpy(data + dp, unit->data, unit->data_size);
+            dp += unit->data_size;
+        } else {
+            sp = AV_RB16(unit->data);
+            av_assert0(sp <= unit->data_size);
+            memcpy(data + dp, unit->data, sp);
+            dp += sp;
+
+            for (; sp < unit->data_size; sp++) {
+                if (unit->data[sp] == 0xff) {
+                    data[dp++] = 0xff;
+                    data[dp++] = 0x00;
+                } else {
+                    data[dp++] = unit->data[sp];
+                }
+            }
+        }
+    }
+
+    data[dp++] = 0xff;
+    data[dp++] = JPEG_MARKER_EOI;
+
+    av_assert0(dp == size);
+
+    memset(data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+    frag->data      = data;
+    frag->data_size = size;
+
+    return 0;
+}
+
+static void cbs_jpeg_close(CodedBitstreamContext *ctx)
+{
+    CodedBitstreamJPEGContext *priv = ctx->priv_data;
+
+    av_freep(&priv->write_buffer);
+}
+
+const CodedBitstreamType ff_cbs_type_jpeg = {
+    .codec_id          = AV_CODEC_ID_MJPEG,
+
+    .priv_data_size    = sizeof(CodedBitstreamJPEGContext),
+
+    .split_fragment    = &cbs_jpeg_split_fragment,
+    .read_unit         = &cbs_jpeg_read_unit,
+    .write_unit        = &cbs_jpeg_write_unit,
+    .assemble_fragment = &cbs_jpeg_assemble_fragment,
+
+    .close             = &cbs_jpeg_close,
+};

diff --git a/libavcodec/cbs_jpeg.h b/libavcodec/cbs_jpeg.h
new file mode 100644
index 0000000..913d3f9
--- /dev/null
+++ b/libavcodec/cbs_jpeg.h

@@ -0,0 +1,130 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CBS_JPEG_H
+#define AVCODEC_CBS_JPEG_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/buffer.h"
+
+
+enum {
+    JPEG_MARKER_SOF0    = 0xc0,
+    JPEG_MARKER_SOF1    = 0xc1,
+    JPEG_MARKER_SOF2    = 0xc2,
+    JPEG_MARKER_SOF3    = 0xc3,
+
+    JPEG_MARKER_DHT     = 0xc4,
+    JPEG_MARKER_SOI     = 0xd8,
+    JPEG_MARKER_EOI     = 0xd9,
+    JPEG_MARKER_SOS     = 0xda,
+    JPEG_MARKER_DQT     = 0xdb,
+
+    JPEG_MARKER_APPN    = 0xe0,
+    JPEG_MARKER_JPGN    = 0xf0,
+    JPEG_MARKER_COM     = 0xfe,
+};
+
+enum {
+    JPEG_MAX_COMPONENTS = 255,
+
+    JPEG_MAX_HEIGHT = 65535,
+    JPEG_MAX_WIDTH  = 65535,
+};
+
+
+typedef struct JPEGRawFrameHeader {
+    uint16_t Lf;
+    uint8_t  P;
+    uint16_t Y;
+    uint16_t X;
+    uint16_t Nf;
+
+    uint8_t  C [JPEG_MAX_COMPONENTS];
+    uint8_t  H [JPEG_MAX_COMPONENTS];
+    uint8_t  V [JPEG_MAX_COMPONENTS];
+    uint8_t  Tq[JPEG_MAX_COMPONENTS];
+} JPEGRawFrameHeader;
+
+typedef struct JPEGRawScanHeader {
+    uint16_t Ls;
+    uint8_t  Ns;
+
+    uint8_t  Cs[JPEG_MAX_COMPONENTS];
+    uint8_t  Td[JPEG_MAX_COMPONENTS];
+    uint8_t  Ta[JPEG_MAX_COMPONENTS];
+
+    uint8_t  Ss;
+    uint8_t  Se;
+    uint8_t  Ah;
+    uint8_t  Al;
+} JPEGRawScanHeader;
+
+typedef struct JPEGRawScan {
+    JPEGRawScanHeader header;
+    uint8_t          *data;
+    size_t            data_size;
+    AVBufferRef      *data_ref;
+} JPEGRawScan;
+
+typedef struct JPEGRawQuantisationTable {
+    uint8_t  Pq;
+    uint8_t  Tq;
+    uint16_t Q[64];
+} JPEGRawQuantisationTable;
+
+typedef struct JPEGRawQuantisationTableSpecification {
+    uint16_t Lq;
+    JPEGRawQuantisationTable table[4];
+} JPEGRawQuantisationTableSpecification;
+
+typedef struct JPEGRawHuffmanTable {
+    uint8_t  Tc;
+    uint8_t  Th;
+    uint8_t  L[16];
+    uint8_t  V[224];
+} JPEGRawHuffmanTable;
+
+typedef struct JPEGRawHuffmanTableSpecification {
+    uint16_t Lh;
+    JPEGRawHuffmanTable table[8];
+} JPEGRawHuffmanTableSpecification;
+
+typedef struct JPEGRawApplicationData {
+    uint16_t     Lp;
+    uint8_t     *Ap;
+    AVBufferRef *Ap_ref;
+} JPEGRawApplicationData;
+
+typedef struct JPEGRawComment {
+    uint16_t     Lc;
+    uint8_t     *Cm;
+    AVBufferRef *Cm_ref;
+} JPEGRawComment;
+
+
+typedef struct CodedBitstreamJPEGContext {
+    // Write buffer.
+    uint8_t *write_buffer;
+    size_t write_buffer_size;
+} CodedBitstreamJPEGContext;
+
+
+#endif /* AVCODEC_CBS_JPEG_H */

diff --git a/libavcodec/cbs_jpeg_syntax_template.c b/libavcodec/cbs_jpeg_syntax_template.c
new file mode 100644
index 0000000..d3cd9ff
--- /dev/null
+++ b/libavcodec/cbs_jpeg_syntax_template.c

@@ -0,0 +1,191 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+static int FUNC(frame_header)(CodedBitstreamContext *ctx, RWContext *rw,
+                              JPEGRawFrameHeader *current)
+{
+    int err, i;
+
+    HEADER("Frame Header");
+
+    u(16, Lf, 8, 8 + 3 * JPEG_MAX_COMPONENTS);
+
+    u(8,  P,  2, 16);
+    u(16, Y,  0, JPEG_MAX_HEIGHT);
+    u(16, X,  1, JPEG_MAX_WIDTH);
+    u(8,  Nf, 1, JPEG_MAX_COMPONENTS);
+
+    for (i = 0; i < current->Nf; i++) {
+        us(8, C[i],  i, 0, JPEG_MAX_COMPONENTS);
+        us(4, H[i],  i, 1, 4);
+        us(4, V[i],  i, 1, 4);
+        us(8, Tq[i], i, 0, 3);
+    }
+
+    return 0;
+}
+
+static int FUNC(quantisation_table)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     JPEGRawQuantisationTable *current)
+{
+    int err, i;
+
+    u(4, Pq, 0, 1);
+    u(4, Tq, 0, 3);
+
+    if (current->Pq) {
+        for (i = 0; i < 64; i++)
+            us(16, Q[i], i, 1, 255);
+    } else {
+        for (i = 0; i < 64; i++)
+            us(8,  Q[i], i, 1, 255);
+    }
+
+    return 0;
+}
+
+static int FUNC(dqt)(CodedBitstreamContext *ctx, RWContext *rw,
+                     JPEGRawQuantisationTableSpecification *current)
+{
+    int err, i, n;
+
+    HEADER("Quantisation Tables");
+
+    u(16, Lq, 2, 2 + 4 * 65);
+    n = current->Lq / 65;
+
+    for (i = 0; i < n; i++)
+        CHECK(FUNC(quantisation_table)(ctx, rw, &current->table[i]));
+
+    return 0;
+}
+
+static int FUNC(huffman_table)(CodedBitstreamContext *ctx, RWContext *rw,
+                               JPEGRawHuffmanTable *current)
+{
+    int err, i, j, ij;
+
+    u(4, Tc, 0, 1);
+    u(4, Th, 0, 3);
+
+    for (i = 0; i < 16; i++)
+        us(8, L[i], i, 0, 224);
+
+    ij = 0;
+    for (i = 0; i < 16; i++) {
+        for (j = 0; j < current->L[i]; j++) {
+            us(8, V[ij], ij, 0, 255);
+            ++ij;
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(dht)(CodedBitstreamContext *ctx, RWContext *rw,
+                     JPEGRawHuffmanTableSpecification *current)
+{
+    int err, i, j, n;
+
+    HEADER("Huffman Tables");
+
+    u(16, Lh, 2, 2 + 8 * (1 + 16 + 256));
+
+    n = 2;
+    for (i = 0; n < current->Lh; i++) {
+        CHECK(FUNC(huffman_table)(ctx, rw, &current->table[i]));
+
+        ++n;
+        for (j = 0; j < 16; j++)
+            n += 1 + current->table[i].L[j];
+    }
+
+    return 0;
+}
+
+static int FUNC(scan_header)(CodedBitstreamContext *ctx, RWContext *rw,
+                             JPEGRawScanHeader *current)
+{
+    int err, j;
+
+    HEADER("Scan");
+
+    u(16, Ls, 6, 6 + 2 * JPEG_MAX_COMPONENTS);
+
+    u(8, Ns, 1, 4);
+    for (j = 0; j < current->Ns; j++) {
+        us(8, Cs[j], j, 0, JPEG_MAX_COMPONENTS);
+        us(4, Td[j], j, 0, 3);
+        us(4, Ta[j], j, 0, 3);
+    }
+
+    u(8, Ss, 0, 63);
+    u(8, Se, 0, 63);
+    u(4, Ah, 0, 13);
+    u(4, Al, 0, 15);
+
+    return 0;
+}
+
+static int FUNC(application_data)(CodedBitstreamContext *ctx, RWContext *rw,
+                                  JPEGRawApplicationData *current)
+{
+    int err, i;
+
+    HEADER("Application Data");
+
+    u(16, Lp, 2, 65535);
+
+    if (current->Lp > 2) {
+#ifdef READ
+        current->Ap_ref = av_buffer_alloc(current->Lp - 2);
+        if (!current->Ap_ref)
+            return AVERROR(ENOMEM);
+        current->Ap = current->Ap_ref->data;
+#endif
+
+        for (i = 0; i < current->Lp - 2; i++)
+            us(8, Ap[i], i, 0, 255);
+    }
+
+    return 0;
+}
+
+static int FUNC(comment)(CodedBitstreamContext *ctx, RWContext *rw,
+                         JPEGRawComment *current)
+{
+    int err, i;
+
+    HEADER("Comment");
+
+    u(16, Lc, 2, 65535);
+
+    if (current->Lc > 2) {
+#ifdef READ
+        current->Cm_ref = av_buffer_alloc(current->Lc - 2);
+        if (!current->Cm_ref)
+            return AVERROR(ENOMEM);
+        current->Cm = current->Cm_ref->data;
+#endif
+
+        for (i = 0; i < current->Lc - 2; i++)
+            us(8, Cm[i], i, 0, 255);
+    }
+
+    return 0;
+}

diff --git a/libavcodec/cbs_mpeg2.c b/libavcodec/cbs_mpeg2.c
new file mode 100644
index 0000000..0df4234
--- /dev/null
+++ b/libavcodec/cbs_mpeg2.c

@@ -0,0 +1,406 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+
+#include "cbs.h"
+#include "cbs_internal.h"
+#include "cbs_mpeg2.h"
+#include "internal.h"
+
+
+#define HEADER(name) do { \
+        ff_cbs_trace_header(ctx, name); \
+    } while (0)
+
+#define CHECK(call) do { \
+        err = (call); \
+        if (err < 0) \
+            return err; \
+    } while (0)
+
+#define FUNC_NAME(rw, codec, name) cbs_ ## codec ## _ ## rw ## _ ## name
+#define FUNC_MPEG2(rw, name) FUNC_NAME(rw, mpeg2, name)
+#define FUNC(name) FUNC_MPEG2(READWRITE, name)
+
+#define SUBSCRIPTS(subs, ...) (subs > 0 ? ((int[subs + 1]){ subs, __VA_ARGS__ }) : NULL)
+
+#define ui(width, name) \
+        xui(width, name, current->name, 0)
+#define uis(width, name, subs, ...) \
+        xui(width, name, current->name, subs, __VA_ARGS__)
+
+
+#define READ
+#define READWRITE read
+#define RWContext GetBitContext
+
+#define xui(width, name, var, subs, ...) do { \
+        uint32_t value = 0; \
+        CHECK(ff_cbs_read_unsigned(ctx, rw, width, #name, \
+                                   SUBSCRIPTS(subs, __VA_ARGS__), \
+                                   &value, 0, (1 << width) - 1)); \
+        var = value; \
+    } while (0)
+
+#define marker_bit() do { \
+        av_unused uint32_t one; \
+        CHECK(ff_cbs_read_unsigned(ctx, rw, 1, "marker_bit", NULL, &one, 1, 1)); \
+    } while (0)
+
+#define nextbits(width, compare, var) \
+    (get_bits_left(rw) >= width && \
+     (var = show_bits(rw, width)) == (compare))
+
+#include "cbs_mpeg2_syntax_template.c"
+
+#undef READ
+#undef READWRITE
+#undef RWContext
+#undef xui
+#undef marker_bit
+#undef nextbits
+
+
+#define WRITE
+#define READWRITE write
+#define RWContext PutBitContext
+
+#define xui(width, name, var, subs, ...) do { \
+        CHECK(ff_cbs_write_unsigned(ctx, rw, width, #name, \
+                                    SUBSCRIPTS(subs, __VA_ARGS__), \
+                                    var, 0, (1 << width) - 1)); \
+    } while (0)
+
+#define marker_bit() do { \
+        CHECK(ff_cbs_write_unsigned(ctx, rw, 1, "marker_bit", NULL, 1, 1, 1)); \
+    } while (0)
+
+#define nextbits(width, compare, var) (var)
+
+#include "cbs_mpeg2_syntax_template.c"
+
+#undef READ
+#undef READWRITE
+#undef RWContext
+#undef xui
+#undef marker_bit
+#undef nextbits
+
+
+static void cbs_mpeg2_free_user_data(void *unit, uint8_t *content)
+{
+    MPEG2RawUserData *user = (MPEG2RawUserData*)content;
+    av_buffer_unref(&user->user_data_ref);
+    av_freep(&content);
+}
+
+static void cbs_mpeg2_free_slice(void *unit, uint8_t *content)
+{
+    MPEG2RawSlice *slice = (MPEG2RawSlice*)content;
+    av_buffer_unref(&slice->header.extra_information_ref);
+    av_buffer_unref(&slice->data_ref);
+    av_freep(&content);
+}
+
+static int cbs_mpeg2_split_fragment(CodedBitstreamContext *ctx,
+                                    CodedBitstreamFragment *frag,
+                                    int header)
+{
+    const uint8_t *start, *end;
+    uint8_t *unit_data;
+    uint32_t start_code = -1, next_start_code = -1;
+    size_t unit_size;
+    int err, i, unit_type;
+
+    start = avpriv_find_start_code(frag->data, frag->data + frag->data_size,
+                                   &start_code);
+    for (i = 0;; i++) {
+        end = avpriv_find_start_code(start, frag->data + frag->data_size,
+                                     &next_start_code);
+
+        unit_type = start_code & 0xff;
+
+        // The start and end pointers point at to the byte following the
+        // start_code_identifier in the start code that they found.
+        if (end == frag->data + frag->data_size) {
+            // We didn't find a start code, so this is the final unit.
+            unit_size = end - (start - 1);
+        } else {
+            // Unit runs from start to the beginning of the start code
+            // pointed to by end (including any padding zeroes).
+            unit_size = (end - 4) - (start - 1);
+        }
+
+        unit_data = (uint8_t *)start - 1;
+
+        err = ff_cbs_insert_unit_data(ctx, frag, i, unit_type,
+                                      unit_data, unit_size, frag->data_ref);
+        if (err < 0)
+            return err;
+
+        if (end == frag->data + frag->data_size)
+            break;
+
+        start_code = next_start_code;
+        start = end;
+    }
+
+    return 0;
+}
+
+static int cbs_mpeg2_read_unit(CodedBitstreamContext *ctx,
+                               CodedBitstreamUnit *unit)
+{
+    GetBitContext gbc;
+    int err;
+
+    err = init_get_bits(&gbc, unit->data, 8 * unit->data_size);
+    if (err < 0)
+        return err;
+
+    if (MPEG2_START_IS_SLICE(unit->type)) {
+        MPEG2RawSlice *slice;
+        int pos, len;
+
+        err = ff_cbs_alloc_unit_content(ctx, unit, sizeof(*slice),
+                                        &cbs_mpeg2_free_slice);
+        if (err < 0)
+            return err;
+        slice = unit->content;
+
+        err = cbs_mpeg2_read_slice_header(ctx, &gbc, &slice->header);
+        if (err < 0)
+            return err;
+
+        pos = get_bits_count(&gbc);
+        len = unit->data_size;
+
+        slice->data_size = len - pos / 8;
+        slice->data_ref  = av_buffer_ref(unit->data_ref);
+        if (!slice->data_ref)
+            return AVERROR(ENOMEM);
+        slice->data = unit->data + pos / 8;
+
+        slice->data_bit_start = pos % 8;
+
+    } else {
+        switch (unit->type) {
+#define START(start_code, type, read_func, free_func) \
+        case start_code: \
+            { \
+                type *header; \
+                err = ff_cbs_alloc_unit_content(ctx, unit, \
+                                                sizeof(*header), free_func); \
+                if (err < 0) \
+                    return err; \
+                header = unit->content; \
+                err = cbs_mpeg2_read_ ## read_func(ctx, &gbc, header); \
+                if (err < 0) \
+                    return err; \
+            } \
+            break;
+            START(0x00, MPEG2RawPictureHeader,  picture_header,  NULL);
+            START(0xb2, MPEG2RawUserData,       user_data,
+                                            &cbs_mpeg2_free_user_data);
+            START(0xb3, MPEG2RawSequenceHeader, sequence_header, NULL);
+            START(0xb5, MPEG2RawExtensionData,  extension_data,  NULL);
+            START(0xb8, MPEG2RawGroupOfPicturesHeader,
+                                       group_of_pictures_header, NULL);
+#undef START
+        default:
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Unknown start code %02"PRIx32".\n",
+                   unit->type);
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    return 0;
+}
+
+static int cbs_mpeg2_write_header(CodedBitstreamContext *ctx,
+                                  CodedBitstreamUnit *unit,
+                                  PutBitContext *pbc)
+{
+    int err;
+
+    switch (unit->type) {
+#define START(start_code, type, func) \
+    case start_code: \
+        err = cbs_mpeg2_write_ ## func(ctx, pbc, unit->content); \
+        break;
+        START(0x00, MPEG2RawPictureHeader,  picture_header);
+        START(0xb2, MPEG2RawUserData,       user_data);
+        START(0xb3, MPEG2RawSequenceHeader, sequence_header);
+        START(0xb5, MPEG2RawExtensionData,  extension_data);
+        START(0xb8, MPEG2RawGroupOfPicturesHeader, group_of_pictures_header);
+#undef START
+    default:
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Write unimplemented for start "
+               "code %02"PRIx32".\n", unit->type);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    return err;
+}
+
+static int cbs_mpeg2_write_slice(CodedBitstreamContext *ctx,
+                                 CodedBitstreamUnit *unit,
+                                 PutBitContext *pbc)
+{
+    MPEG2RawSlice *slice = unit->content;
+    GetBitContext gbc;
+    size_t bits_left;
+    int err;
+
+    err = cbs_mpeg2_write_slice_header(ctx, pbc, &slice->header);
+    if (err < 0)
+        return err;
+
+    if (slice->data) {
+        if (slice->data_size * 8 + 8 > put_bits_left(pbc))
+            return AVERROR(ENOSPC);
+
+        init_get_bits(&gbc, slice->data, slice->data_size * 8);
+        skip_bits_long(&gbc, slice->data_bit_start);
+
+        while (get_bits_left(&gbc) > 15)
+            put_bits(pbc, 16, get_bits(&gbc, 16));
+
+        bits_left = get_bits_left(&gbc);
+        put_bits(pbc, bits_left, get_bits(&gbc, bits_left));
+
+        // Align with zeroes.
+        while (put_bits_count(pbc) % 8 != 0)
+            put_bits(pbc, 1, 0);
+    }
+
+    return 0;
+}
+
+static int cbs_mpeg2_write_unit(CodedBitstreamContext *ctx,
+                                CodedBitstreamUnit *unit)
+{
+    CodedBitstreamMPEG2Context *priv = ctx->priv_data;
+    PutBitContext pbc;
+    int err;
+
+    if (!priv->write_buffer) {
+        // Initial write buffer size is 1MB.
+        priv->write_buffer_size = 1024 * 1024;
+
+    reallocate_and_try_again:
+        err = av_reallocp(&priv->write_buffer, priv->write_buffer_size);
+        if (err < 0) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Unable to allocate a "
+                   "sufficiently large write buffer (last attempt "
+                   "%"SIZE_SPECIFIER" bytes).\n", priv->write_buffer_size);
+            return err;
+        }
+    }
+
+    init_put_bits(&pbc, priv->write_buffer, priv->write_buffer_size);
+
+    if (unit->type >= 0x01 && unit->type <= 0xaf)
+        err = cbs_mpeg2_write_slice(ctx, unit, &pbc);
+    else
+        err = cbs_mpeg2_write_header(ctx, unit, &pbc);
+
+    if (err == AVERROR(ENOSPC)) {
+        // Overflow.
+        priv->write_buffer_size *= 2;
+        goto reallocate_and_try_again;
+    }
+    if (err < 0) {
+        // Write failed for some other reason.
+        return err;
+    }
+
+    if (put_bits_count(&pbc) % 8)
+        unit->data_bit_padding = 8 - put_bits_count(&pbc) % 8;
+    else
+        unit->data_bit_padding = 0;
+
+    unit->data_size = (put_bits_count(&pbc) + 7) / 8;
+    flush_put_bits(&pbc);
+
+    err = ff_cbs_alloc_unit_data(ctx, unit, unit->data_size);
+    if (err < 0)
+        return err;
+
+    memcpy(unit->data, priv->write_buffer, unit->data_size);
+
+    return 0;
+}
+
+static int cbs_mpeg2_assemble_fragment(CodedBitstreamContext *ctx,
+                                       CodedBitstreamFragment *frag)
+{
+    uint8_t *data;
+    size_t size, dp;
+    int i;
+
+    size = 0;
+    for (i = 0; i < frag->nb_units; i++)
+        size += 3 + frag->units[i].data_size;
+
+    frag->data_ref = av_buffer_alloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!frag->data_ref)
+        return AVERROR(ENOMEM);
+    data = frag->data_ref->data;
+
+    dp = 0;
+    for (i = 0; i < frag->nb_units; i++) {
+        CodedBitstreamUnit *unit = &frag->units[i];
+
+        data[dp++] = 0;
+        data[dp++] = 0;
+        data[dp++] = 1;
+
+        memcpy(data + dp, unit->data, unit->data_size);
+        dp += unit->data_size;
+    }
+
+    av_assert0(dp == size);
+
+    memset(data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+    frag->data      = data;
+    frag->data_size = size;
+
+    return 0;
+}
+
+static void cbs_mpeg2_close(CodedBitstreamContext *ctx)
+{
+    CodedBitstreamMPEG2Context *priv = ctx->priv_data;
+
+    av_freep(&priv->write_buffer);
+}
+
+const CodedBitstreamType ff_cbs_type_mpeg2 = {
+    .codec_id          = AV_CODEC_ID_MPEG2VIDEO,
+
+    .priv_data_size    = sizeof(CodedBitstreamMPEG2Context),
+
+    .split_fragment    = &cbs_mpeg2_split_fragment,
+    .read_unit         = &cbs_mpeg2_read_unit,
+    .write_unit        = &cbs_mpeg2_write_unit,
+    .assemble_fragment = &cbs_mpeg2_assemble_fragment,
+
+    .close             = &cbs_mpeg2_close,
+};

diff --git a/libavcodec/cbs_mpeg2.h b/libavcodec/cbs_mpeg2.h
new file mode 100644
index 0000000..92caa99
--- /dev/null
+++ b/libavcodec/cbs_mpeg2.h

@@ -0,0 +1,229 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CBS_MPEG2_H
+#define AVCODEC_CBS_MPEG2_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/buffer.h"
+
+
+enum {
+    MPEG2_START_PICTURE         = 0x00,
+    MPEG2_START_SLICE_MIN       = 0x01,
+    MPEG2_START_SLICE_MAX       = 0xaf,
+    MPEG2_START_USER_DATA       = 0xb2,
+    MPEG2_START_SEQUENCE_HEADER = 0xb3,
+    MPEG2_START_SEQUENCE_ERROR  = 0xb4,
+    MPEG2_START_EXTENSION       = 0xb5,
+    MPEG2_START_SEQUENCE_END    = 0xb7,
+    MPEG2_START_GROUP           = 0xb8,
+};
+
+#define MPEG2_START_IS_SLICE(type) \
+    ((type) >= MPEG2_START_SLICE_MIN && \
+     (type) <= MPEG2_START_SLICE_MAX)
+
+enum {
+    MPEG2_EXTENSION_SEQUENCE                  = 0x1,
+    MPEG2_EXTENSION_SEQUENCE_DISPLAY          = 0x2,
+    MPEG2_EXTENSION_QUANT_MATRIX              = 0x3,
+    MPEG2_EXTENSION_COPYRIGHT                 = 0x4,
+    MPEG2_EXTENSION_SEQUENCE_SCALABLE         = 0x5,
+    MPEG2_EXTENSION_PICTURE_DISPLAY           = 0x7,
+    MPEG2_EXTENSION_PICTURE_CODING            = 0x8,
+    MPEG2_EXTENSION_PICTURE_SPATIAL_SCALABLE  = 0x9,
+    MPEG2_EXTENSION_PICTURE_TEMPORAL_SCALABLE = 0xa,
+    MPEG2_EXTENSION_CAMAERA_PARAMETERS        = 0xb,
+    MPEG2_EXTENSION_ITU_T                     = 0xc,
+};
+
+
+typedef struct MPEG2RawSequenceHeader {
+    uint8_t sequence_header_code;
+
+    uint16_t horizontal_size_value;
+    uint16_t vertical_size_value;
+    uint8_t aspect_ratio_information;
+    uint8_t frame_rate_code;
+    uint32_t bit_rate_value;
+    uint16_t vbv_buffer_size_value;
+    uint8_t constrained_parameters_flag;
+
+    uint8_t load_intra_quantiser_matrix;
+    uint8_t intra_quantiser_matrix[64];
+    uint8_t load_non_intra_quantiser_matrix;
+    uint8_t non_intra_quantiser_matrix[64];
+} MPEG2RawSequenceHeader;
+
+typedef struct MPEG2RawUserData {
+    uint8_t user_data_start_code;
+
+    uint8_t *user_data;
+    size_t user_data_length;
+    AVBufferRef *user_data_ref;
+} MPEG2RawUserData;
+
+typedef struct MPEG2RawSequenceExtension {
+    uint8_t profile_and_level_indication;
+    uint8_t progressive_sequence;
+    uint8_t chroma_format;
+    uint8_t horizontal_size_extension;
+    uint8_t vertical_size_extension;
+    uint16_t bit_rate_extension;
+    uint8_t vbv_buffer_size_extension;
+    uint8_t low_delay;
+    uint8_t frame_rate_extension_n;
+    uint8_t frame_rate_extension_d;
+} MPEG2RawSequenceExtension;
+
+typedef struct MPEG2RawSequenceDisplayExtension {
+    uint8_t video_format;
+
+    uint8_t colour_description;
+    uint8_t colour_primaries;
+    uint8_t transfer_characteristics;
+    uint8_t matrix_coefficients;
+
+    uint16_t display_horizontal_size;
+    uint16_t display_vertical_size;
+} MPEG2RawSequenceDisplayExtension;
+
+typedef struct MPEG2RawGroupOfPicturesHeader {
+    uint8_t group_start_code;
+
+    uint32_t time_code;
+    uint8_t closed_gop;
+    uint8_t broken_link;
+} MPEG2RawGroupOfPicturesHeader;
+
+typedef struct MPEG2RawPictureHeader {
+    uint8_t picture_start_code;
+
+    uint16_t temporal_reference;
+    uint8_t picture_coding_type;
+    uint16_t vbv_delay;
+
+    uint8_t full_pel_forward_vector;
+    uint8_t forward_f_code;
+    uint8_t full_pel_backward_vector;
+    uint8_t backward_f_code;
+
+    uint8_t extra_bit_picture;
+} MPEG2RawPictureHeader;
+
+typedef struct MPEG2RawPictureCodingExtension {
+    uint8_t f_code[2][2];
+
+    uint8_t intra_dc_precision;
+    uint8_t picture_structure;
+    uint8_t top_field_first;
+    uint8_t frame_pred_frame_dct;
+    uint8_t concealment_motion_vectors;
+    uint8_t q_scale_type;
+    uint8_t intra_vlc_format;
+    uint8_t alternate_scan;
+    uint8_t repeat_first_field;
+    uint8_t chroma_420_type;
+    uint8_t progressive_frame;
+
+    uint8_t composite_display_flag;
+    uint8_t v_axis;
+    uint8_t field_sequence;
+    uint8_t sub_carrier;
+    uint8_t burst_amplitude;
+    uint8_t sub_carrier_phase;
+} MPEG2RawPictureCodingExtension;
+
+typedef struct MPEG2RawQuantMatrixExtension {
+    uint8_t load_intra_quantiser_matrix;
+    uint8_t intra_quantiser_matrix[64];
+    uint8_t load_non_intra_quantiser_matrix;
+    uint8_t non_intra_quantiser_matrix[64];
+    uint8_t load_chroma_intra_quantiser_matrix;
+    uint8_t chroma_intra_quantiser_matrix[64];
+    uint8_t load_chroma_non_intra_quantiser_matrix;
+    uint8_t chroma_non_intra_quantiser_matrix[64];
+} MPEG2RawQuantMatrixExtension;
+
+typedef struct MPEG2RawPictureDisplayExtension {
+    uint16_t frame_centre_horizontal_offset[3];
+    uint16_t frame_centre_vertical_offset[3];
+} MPEG2RawPictureDisplayExtension;
+
+typedef struct MPEG2RawExtensionData {
+    uint8_t extension_start_code;
+    uint8_t extension_start_code_identifier;
+
+    union {
+        MPEG2RawSequenceExtension sequence;
+        MPEG2RawSequenceDisplayExtension sequence_display;
+        MPEG2RawQuantMatrixExtension quant_matrix;
+        MPEG2RawPictureCodingExtension picture_coding;
+        MPEG2RawPictureDisplayExtension picture_display;
+    } data;
+} MPEG2RawExtensionData;
+
+typedef struct MPEG2RawSliceHeader {
+    uint8_t slice_vertical_position;
+
+    uint8_t slice_vertical_position_extension;
+    uint8_t priority_breakpoint;
+
+    uint8_t quantiser_scale_code;
+
+    uint8_t slice_extension_flag;
+    uint8_t intra_slice;
+    uint8_t slice_picture_id_enable;
+    uint8_t slice_picture_id;
+
+    uint8_t extra_bit_slice;
+
+    size_t extra_information_length;
+    uint8_t *extra_information;
+    AVBufferRef *extra_information_ref;
+} MPEG2RawSliceHeader;
+
+typedef struct MPEG2RawSlice {
+    MPEG2RawSliceHeader header;
+
+    uint8_t *data;
+    size_t   data_size;
+    int      data_bit_start;
+    AVBufferRef *data_ref;
+} MPEG2RawSlice;
+
+
+typedef struct CodedBitstreamMPEG2Context {
+    // Elements stored in headers which are required for other decoding.
+    uint16_t horizontal_size;
+    uint16_t vertical_size;
+    uint8_t scalable;
+    uint8_t scalable_mode;
+    uint8_t progressive_sequence;
+    uint8_t number_of_frame_centre_offsets;
+
+    // Write buffer.
+    uint8_t *write_buffer;
+    size_t write_buffer_size;
+} CodedBitstreamMPEG2Context;
+
+
+#endif /* AVCODEC_CBS_MPEG2_H */

diff --git a/libavcodec/cbs_mpeg2_syntax_template.c b/libavcodec/cbs_mpeg2_syntax_template.c
new file mode 100644
index 0000000..88cf453
--- /dev/null
+++ b/libavcodec/cbs_mpeg2_syntax_template.c

@@ -0,0 +1,386 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+static int FUNC(sequence_header)(CodedBitstreamContext *ctx, RWContext *rw,
+                                 MPEG2RawSequenceHeader *current)
+{
+    CodedBitstreamMPEG2Context *mpeg2 = ctx->priv_data;
+    int err, i;
+
+    HEADER("Sequence Header");
+
+    ui(8,  sequence_header_code);
+
+    ui(12, horizontal_size_value);
+    ui(12, vertical_size_value);
+
+    mpeg2->horizontal_size = current->horizontal_size_value;
+    mpeg2->vertical_size   = current->vertical_size_value;
+
+    ui(4,  aspect_ratio_information);
+    ui(4,  frame_rate_code);
+    ui(18, bit_rate_value);
+
+    marker_bit();
+
+    ui(10, vbv_buffer_size_value);
+    ui(1,  constrained_parameters_flag);
+
+    ui(1, load_intra_quantiser_matrix);
+    if (current->load_intra_quantiser_matrix) {
+        for (i = 0; i < 64; i++)
+            uis(8, intra_quantiser_matrix[i], 1, i);
+    }
+
+    ui(1, load_non_intra_quantiser_matrix);
+    if (current->load_non_intra_quantiser_matrix) {
+        for (i = 0; i < 64; i++)
+            uis(8, non_intra_quantiser_matrix[i], 1, i);
+    }
+
+    return 0;
+}
+
+static int FUNC(user_data)(CodedBitstreamContext *ctx, RWContext *rw,
+                           MPEG2RawUserData *current)
+{
+    size_t k;
+    int err;
+
+    HEADER("User Data");
+
+    ui(8, user_data_start_code);
+
+#ifdef READ
+    k = get_bits_left(rw);
+    av_assert0(k % 8 == 0);
+    current->user_data_length = k /= 8;
+    if (k > 0) {
+        current->user_data_ref = av_buffer_allocz(k + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!current->user_data_ref)
+            return AVERROR(ENOMEM);
+        current->user_data = current->user_data_ref->data;
+    }
+#endif
+
+    for (k = 0; k < current->user_data_length; k++)
+        xui(8, user_data, current->user_data[k], 0);
+
+    return 0;
+}
+
+static int FUNC(sequence_extension)(CodedBitstreamContext *ctx, RWContext *rw,
+                                    MPEG2RawSequenceExtension *current)
+{
+    CodedBitstreamMPEG2Context *mpeg2 = ctx->priv_data;
+    int err;
+
+    HEADER("Sequence Extension");
+
+    ui(8,  profile_and_level_indication);
+    ui(1,  progressive_sequence);
+    ui(2,  chroma_format);
+    ui(2,  horizontal_size_extension);
+    ui(2,  vertical_size_extension);
+
+    mpeg2->horizontal_size = (mpeg2->horizontal_size & 0xfff) |
+        current->horizontal_size_extension << 12;
+    mpeg2->vertical_size = (mpeg2->vertical_size & 0xfff) |
+        current->vertical_size_extension << 12;
+    mpeg2->progressive_sequence = current->progressive_sequence;
+
+    ui(12, bit_rate_extension);
+    marker_bit();
+    ui(8,  vbv_buffer_size_extension);
+    ui(1,  low_delay);
+    ui(2,  frame_rate_extension_n);
+    ui(5,  frame_rate_extension_d);
+
+    return 0;
+}
+
+static int FUNC(sequence_display_extension)(CodedBitstreamContext *ctx, RWContext *rw,
+                                            MPEG2RawSequenceDisplayExtension *current)
+{
+    int err;
+
+    HEADER("Sequence Display Extension");
+
+    ui(3, video_format);
+
+    ui(1, colour_description);
+    if (current->colour_description) {
+        ui(8, colour_primaries);
+        ui(8, transfer_characteristics);
+        ui(8, matrix_coefficients);
+    }
+
+    ui(14, display_horizontal_size);
+    marker_bit();
+    ui(14, display_vertical_size);
+
+    return 0;
+}
+
+static int FUNC(group_of_pictures_header)(CodedBitstreamContext *ctx, RWContext *rw,
+                                          MPEG2RawGroupOfPicturesHeader *current)
+{
+    int err;
+
+    HEADER("Group of Pictures Header");
+
+    ui(8,  group_start_code);
+
+    ui(25, time_code);
+    ui(1,  closed_gop);
+    ui(1,  broken_link);
+
+    return 0;
+}
+
+static int FUNC(picture_header)(CodedBitstreamContext *ctx, RWContext *rw,
+                                MPEG2RawPictureHeader *current)
+{
+    int err;
+
+    HEADER("Picture Header");
+
+    ui(8,  picture_start_code);
+
+    ui(10, temporal_reference);
+    ui(3,  picture_coding_type);
+    ui(16, vbv_delay);
+
+    if (current->picture_coding_type == 2 ||
+        current->picture_coding_type == 3) {
+        ui(1, full_pel_forward_vector);
+        ui(3, forward_f_code);
+    }
+
+    if (current->picture_coding_type == 3) {
+        ui(1, full_pel_backward_vector);
+        ui(3, backward_f_code);
+    }
+
+    ui(1, extra_bit_picture);
+
+    return 0;
+}
+
+static int FUNC(picture_coding_extension)(CodedBitstreamContext *ctx, RWContext *rw,
+                                          MPEG2RawPictureCodingExtension *current)
+{
+    CodedBitstreamMPEG2Context *mpeg2 = ctx->priv_data;
+    int err;
+
+    HEADER("Picture Coding Extension");
+
+    ui(4, f_code[0][0]);
+    ui(4, f_code[0][1]);
+    ui(4, f_code[1][0]);
+    ui(4, f_code[1][1]);
+
+    ui(2, intra_dc_precision);
+    ui(2, picture_structure);
+    ui(1, top_field_first);
+    ui(1, frame_pred_frame_dct);
+    ui(1, concealment_motion_vectors);
+    ui(1, q_scale_type);
+    ui(1, intra_vlc_format);
+    ui(1, alternate_scan);
+    ui(1, repeat_first_field);
+    ui(1, chroma_420_type);
+    ui(1, progressive_frame);
+
+    if (mpeg2->progressive_sequence) {
+        if (current->repeat_first_field) {
+            if (current->top_field_first)
+                mpeg2->number_of_frame_centre_offsets = 3;
+            else
+                mpeg2->number_of_frame_centre_offsets = 2;
+        } else {
+            mpeg2->number_of_frame_centre_offsets = 1;
+        }
+    } else {
+        if (current->picture_structure == 1 || // Top field.
+            current->picture_structure == 2) { // Bottom field.
+            mpeg2->number_of_frame_centre_offsets = 1;
+        } else {
+            if (current->repeat_first_field)
+                mpeg2->number_of_frame_centre_offsets = 3;
+            else
+                mpeg2->number_of_frame_centre_offsets = 2;
+        }
+    }
+
+    ui(1, composite_display_flag);
+    if (current->composite_display_flag) {
+        ui(1, v_axis);
+        ui(3, field_sequence);
+        ui(1, sub_carrier);
+        ui(7, burst_amplitude);
+        ui(8, sub_carrier_phase);
+    }
+
+    return 0;
+}
+
+static int FUNC(quant_matrix_extension)(CodedBitstreamContext *ctx, RWContext *rw,
+                                        MPEG2RawQuantMatrixExtension *current)
+{
+    int err, i;
+
+    HEADER("Quant Matrix Extension");
+
+    ui(1, load_intra_quantiser_matrix);
+    if (current->load_intra_quantiser_matrix) {
+        for (i = 0; i < 64; i++)
+            uis(8, intra_quantiser_matrix[i], 1, i);
+    }
+
+    ui(1, load_non_intra_quantiser_matrix);
+    if (current->load_non_intra_quantiser_matrix) {
+        for (i = 0; i < 64; i++)
+            uis(8, non_intra_quantiser_matrix[i], 1, i);
+    }
+
+    ui(1, load_chroma_intra_quantiser_matrix);
+    if (current->load_chroma_intra_quantiser_matrix) {
+        for (i = 0; i < 64; i++)
+            uis(8, intra_quantiser_matrix[i], 1, i);
+    }
+
+    ui(1, load_chroma_non_intra_quantiser_matrix);
+    if (current->load_chroma_non_intra_quantiser_matrix) {
+        for (i = 0; i < 64; i++)
+            uis(8, chroma_non_intra_quantiser_matrix[i], 1, i);
+    }
+
+    return 0;
+}
+
+static int FUNC(picture_display_extension)(CodedBitstreamContext *ctx, RWContext *rw,
+                                           MPEG2RawPictureDisplayExtension *current)
+{
+    CodedBitstreamMPEG2Context *mpeg2 = ctx->priv_data;
+    int err, i;
+
+    HEADER("Picture Display Extension");
+
+    for (i = 0; i < mpeg2->number_of_frame_centre_offsets; i++) {
+        ui(16, frame_centre_horizontal_offset[i]);
+        marker_bit();
+        ui(16, frame_centre_vertical_offset[i]);
+        marker_bit();
+    }
+
+    return 0;
+}
+
+static int FUNC(extension_data)(CodedBitstreamContext *ctx, RWContext *rw,
+                                MPEG2RawExtensionData *current)
+{
+    int err;
+
+    HEADER("Extension Data");
+
+    ui(8, extension_start_code);
+    ui(4, extension_start_code_identifier);
+
+    switch (current->extension_start_code_identifier) {
+    case 1:
+        return FUNC(sequence_extension)
+            (ctx, rw, &current->data.sequence);
+    case 2:
+        return FUNC(sequence_display_extension)
+            (ctx, rw, &current->data.sequence_display);
+    case 3:
+        return FUNC(quant_matrix_extension)
+            (ctx, rw, &current->data.quant_matrix);
+    case 7:
+        return FUNC(picture_display_extension)
+            (ctx, rw, &current->data.picture_display);
+    case 8:
+        return FUNC(picture_coding_extension)
+            (ctx, rw, &current->data.picture_coding);
+    default:
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid extension ID %d.\n",
+               current->extension_start_code_identifier);
+        return AVERROR_INVALIDDATA;
+    }
+}
+
+static int FUNC(slice_header)(CodedBitstreamContext *ctx, RWContext *rw,
+                              MPEG2RawSliceHeader *current)
+{
+    CodedBitstreamMPEG2Context *mpeg2 = ctx->priv_data;
+    int err;
+
+    HEADER("Slice Header");
+
+    ui(8, slice_vertical_position);
+
+    if (mpeg2->vertical_size > 2800)
+        ui(3, slice_vertical_position_extension);
+    if (mpeg2->scalable) {
+        if (mpeg2->scalable_mode == 0)
+            ui(7, priority_breakpoint);
+    }
+
+    ui(5, quantiser_scale_code);
+
+    if (nextbits(1, 1, current->slice_extension_flag)) {
+        ui(1, slice_extension_flag);
+        ui(1, intra_slice);
+        ui(1, slice_picture_id_enable);
+        ui(6, slice_picture_id);
+
+        {
+            size_t k;
+#ifdef READ
+            GetBitContext start;
+            uint8_t bit;
+            start = *rw;
+            for (k = 0; nextbits(1, 1, bit); k++)
+                skip_bits(rw, 8);
+            current->extra_information_length = k;
+            if (k > 0) {
+                *rw = start;
+                current->extra_information =
+                    av_malloc(current->extra_information_length);
+                if (!current->extra_information)
+                    return AVERROR(ENOMEM);
+                for (k = 0; k < current->extra_information_length; k++) {
+                    xui(1, extra_bit_slice, bit, 0);
+                    xui(8, extra_information_slice[k],
+                        current->extra_information[k], 1, k);
+                }
+            }
+#else
+            for (k = 0; k < current->extra_information_length; k++) {
+                xui(1, extra_bit_slice, 1, 0);
+                xui(8, extra_information_slice[k],
+                    current->extra_information[k], 1, k);
+            }
+#endif
+        }
+    }
+    ui(1, extra_bit_slice);
+
+    return 0;
+}

diff --git a/libavcodec/cbs_vp9.c b/libavcodec/cbs_vp9.c
new file mode 100644
index 0000000..7498be4
--- /dev/null
+++ b/libavcodec/cbs_vp9.c

@@ -0,0 +1,679 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+
+#include "cbs.h"
+#include "cbs_internal.h"
+#include "cbs_vp9.h"
+#include "internal.h"
+
+
+static int cbs_vp9_read_s(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                          int width, const char *name,
+                          const int *subscripts, int32_t *write_to)
+{
+    uint32_t magnitude;
+    int position, sign;
+    int32_t value;
+
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    if (get_bits_left(gbc) < width + 1) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid signed value at "
+               "%s: bitstream ended.\n", name);
+        return AVERROR_INVALIDDATA;
+    }
+
+    magnitude = get_bits(gbc, width);
+    sign      = get_bits1(gbc);
+    value     = sign ? -(int32_t)magnitude : magnitude;
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < width; i++)
+            bits[i] = magnitude >> (width - i - 1) & 1 ? '1' : '0';
+        bits[i] = sign ? '1' : '0';
+        bits[i + 1] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, position, name, subscripts,
+                                    bits, value);
+    }
+
+    *write_to = value;
+    return 0;
+}
+
+static int cbs_vp9_write_s(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                           int width, const char *name,
+                           const int *subscripts, int32_t value)
+{
+    uint32_t magnitude;
+    int sign;
+
+    if (put_bits_left(pbc) < width + 1)
+        return AVERROR(ENOSPC);
+
+    sign      = value < 0;
+    magnitude = sign ? -value : value;
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (i = 0; i < width; i++)
+            bits[i] = magnitude >> (width - i - 1) & 1 ? '1' : '0';
+        bits[i] = sign ? '1' : '0';
+        bits[i + 1] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc),
+                                    name, subscripts, bits, value);
+    }
+
+    put_bits(pbc, width, magnitude);
+    put_bits(pbc, 1, sign);
+
+    return 0;
+}
+
+static int cbs_vp9_read_increment(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                                  uint32_t range_min, uint32_t range_max,
+                                  const char *name, uint32_t *write_to)
+{
+    uint32_t value;
+    int position, i;
+    char bits[8];
+
+    av_assert0(range_min <= range_max && range_max - range_min < sizeof(bits) - 1);
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    for (i = 0, value = range_min; value < range_max;) {
+        if (get_bits_left(gbc) < 1) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid increment value at "
+                   "%s: bitstream ended.\n", name);
+            return AVERROR_INVALIDDATA;
+        }
+        if (get_bits1(gbc)) {
+            bits[i++] = '1';
+            ++value;
+        } else {
+            bits[i++] = '0';
+            break;
+        }
+    }
+
+    if (ctx->trace_enable) {
+        bits[i] = 0;
+        ff_cbs_trace_syntax_element(ctx, position, name, NULL, bits, value);
+    }
+
+    *write_to = value;
+    return 0;
+}
+
+static int cbs_vp9_write_increment(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                                   uint32_t range_min, uint32_t range_max,
+                                   const char *name, uint32_t value)
+{
+    int len;
+
+    av_assert0(range_min <= range_max && range_max - range_min < 8);
+    if (value < range_min || value > range_max) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "%s out of range: "
+               "%"PRIu32", but must be in [%"PRIu32",%"PRIu32"].\n",
+               name, value, range_min, range_max);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (value == range_max)
+        len = range_max - range_min;
+    else
+        len = value - range_min + 1;
+    if (put_bits_left(pbc) < len)
+        return AVERROR(ENOSPC);
+
+    if (ctx->trace_enable) {
+        char bits[8];
+        int i;
+        for (i = 0; i < len; i++) {
+            if (range_min + i == value)
+                bits[i] = '0';
+            else
+                bits[i] = '1';
+        }
+        bits[i] = 0;
+        ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc),
+                                    name, NULL, bits, value);
+    }
+
+    if (len > 0)
+        put_bits(pbc, len, (1 << len) - 1 - (value != range_max));
+
+    return 0;
+}
+
+static int cbs_vp9_read_le(CodedBitstreamContext *ctx, GetBitContext *gbc,
+                           int width, const char *name,
+                           const int *subscripts, uint32_t *write_to)
+{
+    uint32_t value;
+    int position, b;
+
+    av_assert0(width % 8 == 0);
+
+    if (ctx->trace_enable)
+        position = get_bits_count(gbc);
+
+    if (get_bits_left(gbc) < width) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid le value at "
+               "%s: bitstream ended.\n", name);
+        return AVERROR_INVALIDDATA;
+    }
+
+    value = 0;
+    for (b = 0; b < width; b += 8)
+        value |= get_bits(gbc, 8) << b;
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (b = 0; b < width; b += 8)
+            for (i = 0; i < 8; i++)
+                bits[b + i] = value >> (b + i) & 1 ? '1' : '0';
+        bits[b] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, position, name, subscripts,
+                                    bits, value);
+    }
+
+    *write_to = value;
+    return 0;
+}
+
+static int cbs_vp9_write_le(CodedBitstreamContext *ctx, PutBitContext *pbc,
+                            int width, const char *name,
+                            const int *subscripts, uint32_t value)
+{
+    int b;
+
+    av_assert0(width % 8 == 0);
+
+    if (put_bits_left(pbc) < width)
+        return AVERROR(ENOSPC);
+
+    if (ctx->trace_enable) {
+        char bits[33];
+        int i;
+        for (b = 0; b < width; b += 8)
+            for (i = 0; i < 8; i++)
+                bits[b + i] = value >> (b + i) & 1 ? '1' : '0';
+        bits[b] = 0;
+
+        ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc),
+                                    name, subscripts, bits, value);
+    }
+
+    for (b = 0; b < width; b += 8)
+        put_bits(pbc, 8, value >> b & 0xff);
+
+    return 0;
+}
+
+#define HEADER(name) do { \
+        ff_cbs_trace_header(ctx, name); \
+    } while (0)
+
+#define CHECK(call) do { \
+        err = (call); \
+        if (err < 0) \
+            return err; \
+    } while (0)
+
+#define FUNC_NAME(rw, codec, name) cbs_ ## codec ## _ ## rw ## _ ## name
+#define FUNC_VP9(rw, name) FUNC_NAME(rw, vp9, name)
+#define FUNC(name) FUNC_VP9(READWRITE, name)
+
+#define SUBSCRIPTS(subs, ...) (subs > 0 ? ((int[subs + 1]){ subs, __VA_ARGS__ }) : NULL)
+
+#define f(width, name) \
+        xf(width, name, current->name, 0)
+#define s(width, name) \
+        xs(width, name, current->name, 0)
+#define fs(width, name, subs, ...) \
+        xf(width, name, current->name, subs, __VA_ARGS__)
+#define ss(width, name, subs, ...) \
+        xs(width, name, current->name, subs, __VA_ARGS__)
+
+
+#define READ
+#define READWRITE read
+#define RWContext GetBitContext
+
+#define xf(width, name, var, subs, ...) do { \
+        uint32_t value = 0; \
+        CHECK(ff_cbs_read_unsigned(ctx, rw, width, #name, \
+                                   SUBSCRIPTS(subs, __VA_ARGS__), \
+                                   &value, 0, (1 << width) - 1)); \
+        var = value; \
+    } while (0)
+#define xs(width, name, var, subs, ...) do { \
+        int32_t value = 0; \
+        CHECK(cbs_vp9_read_s(ctx, rw, width, #name, \
+                             SUBSCRIPTS(subs, __VA_ARGS__), &value)); \
+        var = value; \
+    } while (0)
+
+
+#define increment(name, min, max) do { \
+        uint32_t value = 0; \
+        CHECK(cbs_vp9_read_increment(ctx, rw, min, max, #name, &value)); \
+        current->name = value; \
+    } while (0)
+
+#define fle(width, name, subs, ...) do { \
+        CHECK(cbs_vp9_read_le(ctx, rw, width, #name, \
+                              SUBSCRIPTS(subs, __VA_ARGS__), &current->name)); \
+    } while (0)
+
+#define delta_q(name) do { \
+        uint8_t delta_coded; \
+        int8_t delta_q; \
+        xf(1, name.delta_coded, delta_coded, 0); \
+        if (delta_coded) \
+            xs(4, name.delta_q, delta_q, 0); \
+        else \
+            delta_q = 0; \
+        current->name = delta_q; \
+    } while (0)
+
+#define prob(name, subs, ...) do { \
+        uint8_t prob_coded; \
+        int8_t prob; \
+        xf(1, name.prob_coded, prob_coded, subs, __VA_ARGS__); \
+        if (prob_coded) \
+            xf(8, name.prob, prob, subs, __VA_ARGS__); \
+        else \
+            prob = 255; \
+        current->name = prob; \
+    } while (0)
+
+#define infer(name, value) do { \
+        current->name = value; \
+    } while (0)
+
+#define byte_alignment(rw) (get_bits_count(rw) % 8)
+
+#include "cbs_vp9_syntax_template.c"
+
+#undef READ
+#undef READWRITE
+#undef RWContext
+#undef xf
+#undef xs
+#undef increment
+#undef fle
+#undef delta_q
+#undef prob
+#undef infer
+#undef byte_alignment
+
+
+#define WRITE
+#define READWRITE write
+#define RWContext PutBitContext
+
+#define xf(width, name, var, subs, ...) do { \
+        CHECK(ff_cbs_write_unsigned(ctx, rw, width, #name, \
+                                    SUBSCRIPTS(subs, __VA_ARGS__), \
+                                    var, 0, (1 << width) - 1)); \
+    } while (0)
+#define xs(width, name, var, subs, ...) do { \
+        CHECK(cbs_vp9_write_s(ctx, rw, width, #name, \
+                              SUBSCRIPTS(subs, __VA_ARGS__), var)); \
+    } while (0)
+
+#define increment(name, min, max) do { \
+        CHECK(cbs_vp9_write_increment(ctx, rw, min, max, #name, current->name)); \
+    } while (0)
+
+#define fle(width, name, subs, ...) do { \
+        CHECK(cbs_vp9_write_le(ctx, rw, width, #name, \
+                               SUBSCRIPTS(subs, __VA_ARGS__), current->name)); \
+    } while (0)
+
+#define delta_q(name) do { \
+        xf(1, name.delta_coded, !!current->name, 0); \
+        if (current->name) \
+            xs(4, name.delta_q, current->name, 0); \
+    } while (0)
+
+#define prob(name, subs, ...) do { \
+        xf(1, name.prob_coded, current->name != 255, subs, __VA_ARGS__); \
+        if (current->name != 255) \
+            xf(8, name.prob, current->name, subs, __VA_ARGS__); \
+    } while (0)
+
+#define infer(name, value) do { \
+        if (current->name != (value)) { \
+            av_log(ctx->log_ctx, AV_LOG_WARNING, "Warning: " \
+                   "%s does not match inferred value: " \
+                   "%"PRId64", but should be %"PRId64".\n", \
+                   #name, (int64_t)current->name, (int64_t)(value)); \
+        } \
+    } while (0)
+
+#define byte_alignment(rw) (put_bits_count(rw) % 8)
+
+#include "cbs_vp9_syntax_template.c"
+
+#undef READ
+#undef READWRITE
+#undef RWContext
+#undef xf
+#undef xs
+#undef increment
+#undef fle
+#undef delta_q
+#undef prob
+#undef infer
+#undef byte_alignment
+
+
+static int cbs_vp9_split_fragment(CodedBitstreamContext *ctx,
+                                  CodedBitstreamFragment *frag,
+                                  int header)
+{
+    uint8_t superframe_header;
+    int err;
+
+    // Last byte in the packet.
+    superframe_header = frag->data[frag->data_size - 1];
+
+    if ((superframe_header & 0xe0) == 0xc0) {
+        VP9RawSuperframeIndex sfi;
+        GetBitContext gbc;
+        size_t index_size, pos;
+        int i;
+
+        index_size = 2 + (((superframe_header & 0x18) >> 3) + 1) *
+                          ((superframe_header & 0x07) + 1);
+
+        err = init_get_bits(&gbc, frag->data + frag->data_size - index_size,
+                            8 * index_size);
+        if (err < 0)
+            return err;
+
+        err = cbs_vp9_read_superframe_index(ctx, &gbc, &sfi);
+        if (err < 0)
+            return err;
+
+        pos = 0;
+        for (i = 0; i <= sfi.frames_in_superframe_minus_1; i++) {
+            if (pos + sfi.frame_sizes[i] + index_size > frag->data_size) {
+                av_log(ctx->log_ctx, AV_LOG_ERROR, "Frame %d too large "
+                       "in superframe: %"PRIu32" bytes.\n",
+                       i, sfi.frame_sizes[i]);
+                return AVERROR_INVALIDDATA;
+            }
+
+            err = ff_cbs_insert_unit_data(ctx, frag, -1, 0,
+                                          frag->data + pos,
+                                          sfi.frame_sizes[i],
+                                          frag->data_ref);
+            if (err < 0)
+                return err;
+
+            pos += sfi.frame_sizes[i];
+        }
+        if (pos + index_size != frag->data_size) {
+            av_log(ctx->log_ctx, AV_LOG_WARNING, "Extra padding at "
+                   "end of superframe: %zu bytes.\n",
+                   frag->data_size - (pos + index_size));
+        }
+
+        return 0;
+
+    } else {
+        err = ff_cbs_insert_unit_data(ctx, frag, -1, 0,
+                                      frag->data, frag->data_size,
+                                      frag->data_ref);
+        if (err < 0)
+            return err;
+    }
+
+    return 0;
+}
+
+static void cbs_vp9_free_frame(void *unit, uint8_t *content)
+{
+    VP9RawFrame *frame = (VP9RawFrame*)content;
+    av_buffer_unref(&frame->data_ref);
+    av_freep(&frame);
+}
+
+static int cbs_vp9_read_unit(CodedBitstreamContext *ctx,
+                             CodedBitstreamUnit *unit)
+{
+    VP9RawFrame *frame;
+    GetBitContext gbc;
+    int err, pos;
+
+    err = init_get_bits(&gbc, unit->data, 8 * unit->data_size);
+    if (err < 0)
+        return err;
+
+    err = ff_cbs_alloc_unit_content(ctx, unit, sizeof(*frame),
+                                    &cbs_vp9_free_frame);
+    if (err < 0)
+        return err;
+    frame = unit->content;
+
+    err = cbs_vp9_read_frame(ctx, &gbc, frame);
+    if (err < 0)
+        return err;
+
+    pos = get_bits_count(&gbc);
+    av_assert0(pos % 8 == 0);
+    pos /= 8;
+    av_assert0(pos <= unit->data_size);
+
+    if (pos == unit->data_size) {
+        // No data (e.g. a show-existing-frame frame).
+    } else {
+        frame->data_ref = av_buffer_ref(unit->data_ref);
+        if (!frame->data_ref)
+            return AVERROR(ENOMEM);
+
+        frame->data      = unit->data      + pos;
+        frame->data_size = unit->data_size - pos;
+    }
+
+    return 0;
+}
+
+static int cbs_vp9_write_unit(CodedBitstreamContext *ctx,
+                              CodedBitstreamUnit *unit)
+{
+    CodedBitstreamVP9Context *priv = ctx->priv_data;
+    VP9RawFrame *frame = unit->content;
+    PutBitContext pbc;
+    int err;
+
+    if (!priv->write_buffer) {
+        // Initial write buffer size is 1MB.
+        priv->write_buffer_size = 1024 * 1024;
+
+    reallocate_and_try_again:
+        err = av_reallocp(&priv->write_buffer, priv->write_buffer_size);
+        if (err < 0) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Unable to allocate a "
+                   "sufficiently large write buffer (last attempt "
+                   "%zu bytes).\n", priv->write_buffer_size);
+            return err;
+        }
+    }
+
+    init_put_bits(&pbc, priv->write_buffer, priv->write_buffer_size);
+
+    err = cbs_vp9_write_frame(ctx, &pbc, frame);
+    if (err == AVERROR(ENOSPC)) {
+        priv->write_buffer_size *= 2;
+        goto reallocate_and_try_again;
+    }
+    if (err < 0)
+        return err;
+
+    // Frame must be byte-aligned.
+    av_assert0(put_bits_count(&pbc) % 8 == 0);
+
+    unit->data_size        = put_bits_count(&pbc) / 8;
+    unit->data_bit_padding = 0;
+    flush_put_bits(&pbc);
+
+    if (frame->data) {
+        if (unit->data_size + frame->data_size >
+            priv->write_buffer_size) {
+            priv->write_buffer_size *= 2;
+            goto reallocate_and_try_again;
+        }
+
+        memcpy(priv->write_buffer + unit->data_size,
+               frame->data, frame->data_size);
+        unit->data_size += frame->data_size;
+    }
+
+    err = ff_cbs_alloc_unit_data(ctx, unit, unit->data_size);
+    if (err < 0)
+        return err;
+
+    memcpy(unit->data, priv->write_buffer, unit->data_size);
+
+    return 0;
+}
+
+static int cbs_vp9_assemble_fragment(CodedBitstreamContext *ctx,
+                                     CodedBitstreamFragment *frag)
+{
+    int err;
+
+    if (frag->nb_units == 1) {
+        // Output is just the content of the single frame.
+
+        CodedBitstreamUnit *frame = &frag->units[0];
+
+        frag->data_ref = av_buffer_ref(frame->data_ref);
+        if (!frag->data_ref)
+            return AVERROR(ENOMEM);
+
+        frag->data      = frame->data;
+        frag->data_size = frame->data_size;
+
+    } else {
+        // Build superframe out of frames.
+
+        VP9RawSuperframeIndex sfi;
+        PutBitContext pbc;
+        AVBufferRef *ref;
+        uint8_t *data;
+        size_t size, max, pos;
+        int i, size_len;
+
+        if (frag->nb_units > 8) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Too many frames to "
+                   "make superframe: %d.\n", frag->nb_units);
+            return AVERROR(EINVAL);
+        }
+
+        max = 0;
+        for (i = 0; i < frag->nb_units; i++)
+            if (max < frag->units[i].data_size)
+                max = frag->units[i].data_size;
+
+        if (max < 2)
+            size_len = 1;
+        else
+            size_len = av_log2(max) / 8 + 1;
+        av_assert0(size_len <= 4);
+
+        sfi.superframe_marker            = VP9_SUPERFRAME_MARKER;
+        sfi.bytes_per_framesize_minus_1  = size_len - 1;
+        sfi.frames_in_superframe_minus_1 = frag->nb_units - 1;
+
+        size = 2;
+        for (i = 0; i < frag->nb_units; i++) {
+            size += size_len + frag->units[i].data_size;
+            sfi.frame_sizes[i] = frag->units[i].data_size;
+        }
+
+        ref = av_buffer_alloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!ref)
+            return AVERROR(ENOMEM);
+        data = ref->data;
+        memset(data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+        pos = 0;
+        for (i = 0; i < frag->nb_units; i++) {
+            av_assert0(size - pos > frag->units[i].data_size);
+            memcpy(data + pos, frag->units[i].data,
+                   frag->units[i].data_size);
+            pos += frag->units[i].data_size;
+        }
+        av_assert0(size - pos == 2 + frag->nb_units * size_len);
+
+        init_put_bits(&pbc, data + pos, size - pos);
+
+        err = cbs_vp9_write_superframe_index(ctx, &pbc, &sfi);
+        if (err < 0) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to write "
+                   "superframe index.\n");
+            av_buffer_unref(&ref);
+            return err;
+        }
+
+        av_assert0(put_bits_left(&pbc) == 0);
+        flush_put_bits(&pbc);
+
+        frag->data_ref  = ref;
+        frag->data      = data;
+        frag->data_size = size;
+    }
+
+    return 0;
+}
+
+static void cbs_vp9_close(CodedBitstreamContext *ctx)
+{
+    CodedBitstreamVP9Context *priv = ctx->priv_data;
+
+    av_freep(&priv->write_buffer);
+}
+
+const CodedBitstreamType ff_cbs_type_vp9 = {
+    .codec_id          = AV_CODEC_ID_VP9,
+
+    .priv_data_size    = sizeof(CodedBitstreamVP9Context),
+
+    .split_fragment    = &cbs_vp9_split_fragment,
+    .read_unit         = &cbs_vp9_read_unit,
+    .write_unit        = &cbs_vp9_write_unit,
+    .assemble_fragment = &cbs_vp9_assemble_fragment,
+
+    .close             = &cbs_vp9_close,
+};

diff --git a/libavcodec/cbs_vp9.h b/libavcodec/cbs_vp9.h
new file mode 100644
index 0000000..5b99c90
--- /dev/null
+++ b/libavcodec/cbs_vp9.h

@@ -0,0 +1,201 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CBS_VP9_H
+#define AVCODEC_CBS_VP9_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "cbs.h"
+
+
+// Miscellaneous constants (section 3).
+enum {
+    VP9_REFS_PER_FRAME = 3,
+
+    VP9_MIN_TILE_WIDTH_B64 = 4,
+    VP9_MAX_TILE_WIDTH_B64 = 64,
+
+    VP9_NUM_REF_FRAMES = 8,
+    VP9_MAX_REF_FRAMES = 4,
+
+    VP9_MAX_SEGMENTS = 8,
+    VP9_SEG_LVL_MAX  = 4,
+};
+
+// Frame types (section 7.2).
+enum {
+    VP9_KEY_FRAME     = 0,
+    VP9_NON_KEY_FRAME = 1,
+};
+
+// Frame sync bytes (section 7.2.1).
+enum {
+    VP9_FRAME_SYNC_0 = 0x49,
+    VP9_FRAME_SYNC_1 = 0x83,
+    VP9_FRAME_SYNC_2 = 0x42,
+};
+
+// Color space values (section 7.2.2).
+enum {
+    VP9_CS_UNKNOWN   = 0,
+    VP9_CS_BT_601    = 1,
+    VP9_CS_BT_709    = 2,
+    VP9_CS_SMPTE_170 = 3,
+    VP9_CS_SMPTE_240 = 4,
+    VP9_CS_BT_2020   = 5,
+    VP9_CS_RESERVED  = 6,
+    VP9_CS_RGB       = 7,
+};
+
+// Reference frame types (section 7.4.12).
+enum {
+    VP9_INTRA_FRAME  = 0,
+    VP9_LAST_FRAME   = 1,
+    VP9_GOLDEN_FRAME = 2,
+    VP9_ALTREF_FRAME = 3,
+};
+
+// Superframe properties (section B.3).
+enum {
+    VP9_MAX_FRAMES_IN_SUPERFRAME = 8,
+
+    VP9_SUPERFRAME_MARKER = 6,
+};
+
+
+typedef struct VP9RawFrameHeader {
+    uint8_t frame_marker;
+    uint8_t profile_low_bit;
+    uint8_t profile_high_bit;
+    uint8_t profile_reserved_zero;
+
+    uint8_t show_existing_frame;
+    uint8_t frame_to_show_map_idx;
+
+    uint8_t frame_type;
+    uint8_t show_frame;
+    uint8_t error_resilient_mode;
+
+    // Color config.
+    uint8_t ten_or_twelve_bit;
+    uint8_t color_space;
+    uint8_t color_range;
+    uint8_t subsampling_x;
+    uint8_t subsampling_y;
+    uint8_t color_config_reserved_zero;
+
+    uint8_t refresh_frame_flags;
+
+    uint8_t intra_only;
+    uint8_t reset_frame_context;
+
+    uint8_t ref_frame_idx[VP9_REFS_PER_FRAME];
+    uint8_t ref_frame_sign_bias[VP9_MAX_REF_FRAMES];
+
+    uint8_t allow_high_precision_mv;
+
+    uint8_t refresh_frame_context;
+    uint8_t frame_parallel_decoding_mode;
+
+    uint8_t frame_context_idx;
+
+    // Frame/render size.
+    uint8_t found_ref[VP9_REFS_PER_FRAME];
+    uint16_t frame_width_minus_1;
+    uint16_t frame_height_minus_1;
+    uint8_t render_and_frame_size_different;
+    uint16_t render_width_minus_1;
+    uint16_t render_height_minus_1;
+
+    // Interpolation filter.
+    uint8_t is_filter_switchable;
+    uint8_t raw_interpolation_filter_type;
+
+    // Loop filter params.
+    uint8_t loop_filter_level;
+    uint8_t loop_filter_sharpness;
+    uint8_t loop_filter_delta_enabled;
+    uint8_t loop_filter_delta_update;
+    uint8_t update_ref_delta[VP9_MAX_REF_FRAMES];
+    int8_t loop_filter_ref_deltas[VP9_MAX_REF_FRAMES];
+    uint8_t update_mode_delta[2];
+    int8_t loop_filter_mode_deltas[2];
+
+    // Quantization params.
+    uint8_t base_q_idx;
+    int8_t delta_q_y_dc;
+    int8_t delta_q_uv_dc;
+    int8_t delta_q_uv_ac;
+
+    // Segmentation params.
+    uint8_t segmentation_enabled;
+    uint8_t segmentation_update_map;
+    uint8_t segmentation_tree_probs[7];
+    uint8_t segmentation_temporal_update;
+    uint8_t segmentation_pred_prob[3];
+    uint8_t segmentation_update_data;
+    uint8_t segmentation_abs_or_delta_update;
+    uint8_t feature_enabled[VP9_MAX_SEGMENTS][VP9_SEG_LVL_MAX];
+    uint8_t feature_value[VP9_MAX_SEGMENTS][VP9_SEG_LVL_MAX];
+    uint8_t feature_sign[VP9_MAX_SEGMENTS][VP9_SEG_LVL_MAX];
+
+    // Tile info.
+    uint8_t tile_cols_log2;
+    uint8_t tile_rows_log2;
+
+    uint16_t header_size_in_bytes;
+} VP9RawFrameHeader;
+
+typedef struct VP9RawFrame {
+    VP9RawFrameHeader header;
+
+    uint8_t     *data;
+    size_t       data_size;
+    AVBufferRef *data_ref;
+} VP9RawFrame;
+
+typedef struct VP9RawSuperframeIndex {
+    uint8_t superframe_marker;
+    uint8_t bytes_per_framesize_minus_1;
+    uint8_t frames_in_superframe_minus_1;
+    uint32_t frame_sizes[VP9_MAX_FRAMES_IN_SUPERFRAME];
+} VP9RawSuperframeIndex;
+
+typedef struct VP9RawSuperframe {
+    VP9RawFrame frames[VP9_MAX_FRAMES_IN_SUPERFRAME];
+    VP9RawSuperframeIndex index;
+} VP9RawSuperframe;
+
+
+typedef struct CodedBitstreamVP9Context {
+    // Frame dimensions in 8x8 mode info blocks.
+    uint16_t mi_cols;
+    uint16_t mi_rows;
+    // Frame dimensions in 64x64 superblocks.
+    uint16_t sb64_cols;
+    uint16_t sb64_rows;
+
+    // Write buffer.
+    uint8_t *write_buffer;
+    size_t write_buffer_size;
+} CodedBitstreamVP9Context;
+
+
+#endif /* AVCODEC_CBS_VP9_H */

diff --git a/libavcodec/cbs_vp9_syntax_template.c b/libavcodec/cbs_vp9_syntax_template.c
new file mode 100644
index 0000000..0db0f52
--- /dev/null
+++ b/libavcodec/cbs_vp9_syntax_template.c

@@ -0,0 +1,390 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+static int FUNC(frame_sync_code)(CodedBitstreamContext *ctx, RWContext *rw,
+                                 VP9RawFrameHeader *current)
+{
+    uint8_t frame_sync_byte_0 = VP9_FRAME_SYNC_0;
+    uint8_t frame_sync_byte_1 = VP9_FRAME_SYNC_1;
+    uint8_t frame_sync_byte_2 = VP9_FRAME_SYNC_2;
+    int err;
+
+    xf(8, frame_sync_byte_0, frame_sync_byte_0, 0);
+    xf(8, frame_sync_byte_1, frame_sync_byte_1, 0);
+    xf(8, frame_sync_byte_2, frame_sync_byte_2, 0);
+
+    if (frame_sync_byte_0 != VP9_FRAME_SYNC_0 ||
+        frame_sync_byte_1 != VP9_FRAME_SYNC_1 ||
+        frame_sync_byte_2 != VP9_FRAME_SYNC_2) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid frame sync code: "
+               "%02x %02x %02x.\n", frame_sync_byte_0,
+               frame_sync_byte_1, frame_sync_byte_2);
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+static int FUNC(color_config)(CodedBitstreamContext *ctx, RWContext *rw,
+                              VP9RawFrameHeader *current, int profile)
+{
+    int err;
+
+    if (profile >= 2)
+        f(1, ten_or_twelve_bit);
+
+    f(3, color_space);
+
+    if (current->color_space != VP9_CS_RGB) {
+        f(1, color_range);
+        if (profile == 1 || profile == 3) {
+            f(1, subsampling_x);
+            f(1, subsampling_y);
+            f(1, color_config_reserved_zero);
+        } else {
+            infer(subsampling_x, 1);
+            infer(subsampling_y, 1);
+        }
+    } else {
+        infer(color_range, 1);
+        if (profile == 1 || profile == 3) {
+            infer(subsampling_x, 0);
+            infer(subsampling_y, 0);
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(frame_size)(CodedBitstreamContext *ctx, RWContext *rw,
+                            VP9RawFrameHeader *current)
+{
+    CodedBitstreamVP9Context *vp9 = ctx->priv_data;
+    int err;
+
+    f(16, frame_width_minus_1);
+    f(16, frame_height_minus_1);
+
+    vp9->mi_cols = (current->frame_width_minus_1  + 8) >> 3;
+    vp9->mi_rows = (current->frame_height_minus_1 + 8) >> 3;
+    vp9->sb64_cols = (vp9->mi_cols + 7) >> 3;
+    vp9->sb64_rows = (vp9->mi_rows + 7) >> 3;
+
+    return 0;
+}
+
+static int FUNC(render_size)(CodedBitstreamContext *ctx, RWContext *rw,
+                             VP9RawFrameHeader *current)
+{
+    int err;
+
+    f(1, render_and_frame_size_different);
+
+    if (current->render_and_frame_size_different) {
+        f(16, render_width_minus_1);
+        f(16, render_height_minus_1);
+    }
+
+    return 0;
+}
+
+static int FUNC(frame_size_with_refs)(CodedBitstreamContext *ctx, RWContext *rw,
+                                      VP9RawFrameHeader *current)
+{
+    int err, i;
+
+    for (i = 0; i < VP9_REFS_PER_FRAME; i++) {
+        fs(1, found_ref[i], 1, i);
+        if (current->found_ref[i])
+            break;
+    }
+    if (i >= VP9_REFS_PER_FRAME)
+        CHECK(FUNC(frame_size)(ctx, rw, current));
+    CHECK(FUNC(render_size)(ctx, rw, current));
+
+    return 0;
+}
+
+static int FUNC(interpolation_filter)(CodedBitstreamContext *ctx, RWContext *rw,
+                                      VP9RawFrameHeader *current)
+{
+    int err;
+
+    f(1, is_filter_switchable);
+    if (!current->is_filter_switchable)
+        f(2, raw_interpolation_filter_type);
+
+    return 0;
+}
+
+static int FUNC(loop_filter_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                    VP9RawFrameHeader *current)
+{
+    int err, i;
+
+    f(6, loop_filter_level);
+    f(3, loop_filter_sharpness);
+
+    f(1, loop_filter_delta_enabled);
+    if (current->loop_filter_delta_enabled) {
+        f(1, loop_filter_delta_update);
+        if (current->loop_filter_delta_update) {
+            for (i = 0; i < VP9_MAX_REF_FRAMES; i++) {
+                fs(1, update_ref_delta[i], 1, i);
+                if (current->update_ref_delta[i])
+                    ss(6, loop_filter_ref_deltas[i], 1, i);
+            }
+            for (i = 0; i < 2; i++) {
+                fs(1, update_mode_delta[i], 1, i);
+                if (current->update_mode_delta[i])
+                    ss(6, loop_filter_mode_deltas[i], 1, i);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(quantization_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     VP9RawFrameHeader *current)
+{
+    int err;
+
+    f(8, base_q_idx);
+
+    delta_q(delta_q_y_dc);
+    delta_q(delta_q_uv_dc);
+    delta_q(delta_q_uv_ac);
+
+    return 0;
+}
+
+static int FUNC(segmentation_params)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     VP9RawFrameHeader *current)
+{
+    static const int segmentation_feature_bits[VP9_SEG_LVL_MAX]   = { 8, 6, 2, 0 };
+    static const int segmentation_feature_signed[VP9_SEG_LVL_MAX] = { 1, 1, 0, 0 };
+
+    int err, i, j;
+
+    f(1, segmentation_enabled);
+
+    if (current->segmentation_enabled) {
+        f(1, segmentation_update_map);
+        if (current->segmentation_update_map) {
+            for (i = 0; i < 7; i++)
+                prob(segmentation_tree_probs[i], 1, i);
+            f(1, segmentation_temporal_update);
+            for (i = 0; i < 3; i++) {
+                if (current->segmentation_temporal_update)
+                    prob(segmentation_pred_prob[i], 1, i);
+                else
+                    infer(segmentation_pred_prob[i], 255);
+            }
+        }
+
+        f(1, segmentation_update_data);
+        if (current->segmentation_update_data) {
+            f(1, segmentation_abs_or_delta_update);
+            for (i = 0; i < VP9_MAX_SEGMENTS; i++) {
+                for (j = 0; j < VP9_SEG_LVL_MAX; j++) {
+                    fs(1, feature_enabled[i][j], 2, i, j);
+                    if (current->feature_enabled[i][j] &&
+                        segmentation_feature_bits[j]) {
+                        fs(segmentation_feature_bits[j],
+                           feature_value[i][j], 2, i, j);
+                        if (segmentation_feature_signed[j])
+                            fs(1, feature_sign[i][j], 2, i, j);
+                        else
+                            infer(feature_sign[i][j], 0);
+                    } else {
+                        infer(feature_value[i][j], 0);
+                        infer(feature_sign[i][j],  0);
+                    }
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(tile_info)(CodedBitstreamContext *ctx, RWContext *rw,
+                           VP9RawFrameHeader *current)
+{
+    CodedBitstreamVP9Context *vp9 = ctx->priv_data;
+    int min_log2_tile_cols, max_log2_tile_cols;
+    int err;
+
+    min_log2_tile_cols = 0;
+    while ((VP9_MAX_TILE_WIDTH_B64 << min_log2_tile_cols) < vp9->sb64_cols)
+        ++min_log2_tile_cols;
+    max_log2_tile_cols = 0;
+    while ((vp9->sb64_cols >> (max_log2_tile_cols + 1)) >= VP9_MIN_TILE_WIDTH_B64)
+        ++max_log2_tile_cols;
+
+    increment(tile_cols_log2, min_log2_tile_cols, max_log2_tile_cols);
+
+    increment(tile_rows_log2, 0, 2);
+
+    return 0;
+}
+
+static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
+                                     VP9RawFrameHeader *current)
+{
+    int profile, i;
+    int err;
+
+    f(2, frame_marker);
+
+    f(1, profile_low_bit);
+    f(1, profile_high_bit);
+    profile = (current->profile_high_bit << 1) + current->profile_low_bit;
+    if (profile == 3)
+        f(1, profile_reserved_zero);
+
+    f(1, show_existing_frame);
+    if (current->show_existing_frame) {
+        f(3, frame_to_show_map_idx);
+        infer(header_size_in_bytes, 0);
+        infer(refresh_frame_flags,  0x00);
+        infer(loop_filter_level,    0);
+        return 0;
+    }
+
+    f(1, frame_type);
+    f(1, show_frame);
+    f(1, error_resilient_mode);
+
+    if (current->frame_type == VP9_KEY_FRAME) {
+        CHECK(FUNC(frame_sync_code)(ctx, rw, current));
+        CHECK(FUNC(color_config)(ctx, rw, current, profile));
+        CHECK(FUNC(frame_size)(ctx, rw, current));
+        CHECK(FUNC(render_size)(ctx, rw, current));
+
+        infer(refresh_frame_flags, 0xff);
+
+    } else {
+         if (current->show_frame == 0)
+             f(1, intra_only);
+         else
+             infer(intra_only, 0);
+
+         if (current->error_resilient_mode == 0)
+             f(2, reset_frame_context);
+         else
+             infer(reset_frame_context, 0);
+
+         if (current->intra_only == 1) {
+             CHECK(FUNC(frame_sync_code)(ctx, rw, current));
+
+             if (profile > 0) {
+                 CHECK(FUNC(color_config)(ctx, rw, current, profile));
+             } else {
+                 infer(color_space,   1);
+                 infer(subsampling_x, 1);
+                 infer(subsampling_y, 1);
+             }
+
+             f(8, refresh_frame_flags);
+
+             CHECK(FUNC(frame_size)(ctx, rw, current));
+             CHECK(FUNC(render_size)(ctx, rw, current));
+         } else {
+             f(8, refresh_frame_flags);
+
+             for (i = 0; i < VP9_REFS_PER_FRAME; i++) {
+                 fs(3, ref_frame_idx[i], 1, i);
+                 fs(1, ref_frame_sign_bias[VP9_LAST_FRAME + i],
+                    1, VP9_LAST_FRAME + i);
+             }
+
+             CHECK(FUNC(frame_size_with_refs)(ctx, rw, current));
+             f(1, allow_high_precision_mv);
+             CHECK(FUNC(interpolation_filter)(ctx, rw, current));
+         }
+    }
+
+    if (current->error_resilient_mode == 0) {
+        f(1, refresh_frame_context);
+        f(1, frame_parallel_decoding_mode);
+    } else {
+        infer(refresh_frame_context,        0);
+        infer(frame_parallel_decoding_mode, 1);
+    }
+
+    f(2, frame_context_idx);
+
+    CHECK(FUNC(loop_filter_params)(ctx, rw, current));
+    CHECK(FUNC(quantization_params)(ctx, rw, current));
+    CHECK(FUNC(segmentation_params)(ctx, rw, current));
+    CHECK(FUNC(tile_info)(ctx, rw, current));
+
+    f(16, header_size_in_bytes);
+
+    return 0;
+}
+
+static int FUNC(trailing_bits)(CodedBitstreamContext *ctx, RWContext *rw)
+{
+    int err;
+    av_unused int zero = 0;
+    while (byte_alignment(rw) != 0)
+        xf(1, zero_bit, zero, 0);
+
+    return 0;
+}
+
+static int FUNC(frame)(CodedBitstreamContext *ctx, RWContext *rw,
+                       VP9RawFrame *current)
+{
+    int err;
+
+    HEADER("Frame");
+
+    CHECK(FUNC(uncompressed_header)(ctx, rw, &current->header));
+
+    CHECK(FUNC(trailing_bits)(ctx, rw));
+
+    return 0;
+}
+
+static int FUNC(superframe_index)(CodedBitstreamContext *ctx, RWContext *rw,
+                                  VP9RawSuperframeIndex *current)
+{
+    int err, i;
+
+    HEADER("Superframe Index");
+
+    f(3, superframe_marker);
+    f(2, bytes_per_framesize_minus_1);
+    f(3, frames_in_superframe_minus_1);
+
+    for (i = 0; i <= current->frames_in_superframe_minus_1; i++) {
+        // Surprise little-endian!
+        fle(8 * (current->bytes_per_framesize_minus_1 + 1),
+            frame_sizes[i], 1, i);
+    }
+
+    f(3, superframe_marker);
+    f(2, bytes_per_framesize_minus_1);
+    f(3, frames_in_superframe_minus_1);
+
+    return 0;
+}

diff --git a/libavcodec/cdgraphics.c b/libavcodec/cdgraphics.c
index 87ad5e7..be85e54 100644
--- a/libavcodec/cdgraphics.c
+++ b/libavcodec/cdgraphics.c

@@ -69,6 +69,7 @@
     int hscroll;
     int vscroll;
     int transparency;
+    int cleared;
 } CDGraphicsContext;
 
 static av_cold int cdg_decode_init(AVCodecContext *avctx)
@@ -287,9 +288,10 @@
 
     if ((ret = ff_reget_buffer(avctx, cc->frame)) < 0)
         return ret;
-    if (!avctx->frame_number) {
+    if (!cc->cleared) {
         memset(cc->frame->data[0], 0, cc->frame->linesize[0] * avctx->height);
         memset(cc->frame->data[1], 0, AVPALETTE_SIZE);
+        cc->cleared = 1;
     }
 
     command = bytestream2_get_byte(&gb);

diff --git a/libavcodec/cfhd.c b/libavcodec/cfhd.c
index 5ea8f24..846d334 100644
--- a/libavcodec/cfhd.c
+++ b/libavcodec/cfhd.c

@@ -20,24 +20,45 @@
 
 /**
  * @file
- * CFHD Video Decoder
+ * Cineform HD video decoder
  */
 
+#include "libavutil/attributes.h"
 #include "libavutil/buffer.h"
 #include "libavutil/common.h"
-#include "libavutil/intreadwrite.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/intreadwrite.h"
 #include "libavutil/opt.h"
 
 #include "avcodec.h"
-#include "internal.h"
 #include "bytestream.h"
+#include "get_bits.h"
+#include "internal.h"
 #include "thread.h"
 #include "cfhd.h"
 
-#define SUBBAND_COUNT 10
+#define ALPHA_COMPAND_DC_OFFSET 256
+#define ALPHA_COMPAND_GAIN 9400
 
-static av_cold int cfhd_decode_init(AVCodecContext *avctx)
+enum CFHDParam {
+    ChannelCount     =  12,
+    SubbandCount     =  14,
+    ImageWidth       =  20,
+    ImageHeight      =  21,
+    LowpassPrecision =  35,
+    SubbandNumber    =  48,
+    Quantization     =  53,
+    ChannelNumber    =  62,
+    SampleFlags      =  68,
+    BitsPerComponent = 101,
+    ChannelWidth     = 104,
+    ChannelHeight    = 105,
+    PrescaleShift    = 109,
+};
+
+
+
+static av_cold int cfhd_init(AVCodecContext *avctx)
 {
     CFHDContext *s = avctx->priv_data;
 
@@ -54,35 +75,89 @@
     s->subband_num_actual = 0;
 }
 
+static void init_peak_table_defaults(CFHDContext *s)
+{
+    s->peak.level  = 0;
+    s->peak.offset = 0;
+    memset(&s->peak.base, 0, sizeof(s->peak.base));
+}
+
 static void init_frame_defaults(CFHDContext *s)
 {
     s->coded_width       = 0;
     s->coded_height      = 0;
+    s->cropped_height    = 0;
     s->bpc               = 10;
     s->channel_cnt       = 4;
-    s->subband_cnt       = 10;
+    s->subband_cnt       = SUBBAND_COUNT;
     s->channel_num       = 0;
     s->lowpass_precision = 16;
     s->quantisation      = 1;
     s->wavelet_depth     = 3;
     s->pshift            = 1;
     s->codebook          = 0;
+    s->difference_coding = 0;
+    s->progressive       = 0;
     init_plane_defaults(s);
+    init_peak_table_defaults(s);
 }
 
 /* TODO: merge with VLC tables or use LUT */
-static inline int dequant_and_decompand(int level, int quantisation)
+static inline int dequant_and_decompand(int level, int quantisation, int codebook)
 {
-    int64_t abslevel = abs(level);
-    return (abslevel + ((768 * abslevel * abslevel * abslevel) / (255 * 255 * 255))) * FFSIGN(level) * quantisation;
+    if (codebook == 0 || codebook == 1) {
+        int64_t abslevel = abs(level);
+        if (level < 264)
+            return (abslevel + ((768 * abslevel * abslevel * abslevel) / (255 * 255 * 255))) *
+               FFSIGN(level) * quantisation;
+        else
+            return level * quantisation;
+    } else
+        return level * quantisation;
 }
 
-static inline void filter(int16_t *output, ptrdiff_t out_stride, int16_t *low, ptrdiff_t low_stride,
-                          int16_t *high, ptrdiff_t high_stride, int len, uint8_t clip)
+static inline void difference_coding(int16_t *band, int width, int height)
+{
+
+    int i,j;
+    for (i = 0; i < height; i++) {
+        for (j = 1; j < width; j++) {
+          band[j] += band[j-1];
+        }
+        band += width;
+    }
+}
+
+static inline void peak_table(int16_t *band, Peak *peak, int length)
+{
+    int i;
+    for (i = 0; i < length; i++)
+        if (abs(band[i]) > peak->level)
+            band[i] = bytestream2_get_le16(&peak->base);
+}
+
+static inline void process_alpha(int16_t *alpha, int width)
+{
+    int i, channel;
+    for (i = 0; i < width; i++) {
+        channel   = alpha[i];
+        channel  -= ALPHA_COMPAND_DC_OFFSET;
+        channel <<= 3;
+        channel  *= ALPHA_COMPAND_GAIN;
+        channel >>= 16;
+        channel   = av_clip_uintp2(channel, 12);
+        alpha[i]  = channel;
+    }
+}
+
+static inline void filter(int16_t *output, ptrdiff_t out_stride,
+                          int16_t *low, ptrdiff_t low_stride,
+                          int16_t *high, ptrdiff_t high_stride,
+                          int len, int clip)
 {
     int16_t tmp;
-
     int i;
+
     for (i = 0; i < len; i++) {
         if (i == 0) {
             tmp = (11*low[0*low_stride] - 4*low[1*low_stride] + low[2*low_stride] + 4) >> 3;
@@ -118,28 +193,42 @@
     }
 }
 
-static void horiz_filter(int16_t *output, int16_t *low, int16_t *high, int width)
+static inline void interlaced_vertical_filter(int16_t *output, int16_t *low, int16_t *high,
+                         int width, int linesize, int plane)
+{
+    int i;
+    int16_t even, odd;
+    for (i = 0; i < width; i++) {
+        even = (low[i] - high[i])/2;
+        odd  = (low[i] + high[i])/2;
+        output[i]            = av_clip_uintp2(even, 10);
+        output[i + linesize] = av_clip_uintp2(odd, 10);
+    }
+}
+static void horiz_filter(int16_t *output, int16_t *low, int16_t *high,
+                         int width)
 {
     filter(output, 1, low, 1, high, 1, width, 0);
 }
 
-static void horiz_filter_clip(int16_t *output, int16_t *low, int16_t *high, int width, uint8_t clip)
+static void horiz_filter_clip(int16_t *output, int16_t *low, int16_t *high,
+                              int width, int clip)
 {
     filter(output, 1, low, 1, high, 1, width, clip);
 }
 
-static void vert_filter(int16_t *output, int out_stride, int16_t *low, int low_stride,
-                        int16_t *high, int high_stride, int len)
+static void vert_filter(int16_t *output, ptrdiff_t out_stride,
+                        int16_t *low, ptrdiff_t low_stride,
+                        int16_t *high, ptrdiff_t high_stride, int len)
 {
     filter(output, out_stride, low, low_stride, high, high_stride, len, 0);
 }
 
-static void free_buffers(AVCodecContext *avctx)
+static void free_buffers(CFHDContext *s)
 {
-    CFHDContext *s = avctx->priv_data;
     int i, j;
 
-    for (i = 0; i < 4; i++) {
+    for (i = 0; i < FF_ARRAY_ELEMS(s->plane); i++) {
         av_freep(&s->plane[i].idwt_buf);
         av_freep(&s->plane[i].idwt_tmp);
 
@@ -156,37 +245,44 @@
 static int alloc_buffers(AVCodecContext *avctx)
 {
     CFHDContext *s = avctx->priv_data;
-    int i, j, k, ret, planes;
+    int i, j, ret, planes;
+    int chroma_x_shift, chroma_y_shift;
+    unsigned k;
 
     if ((ret = ff_set_dimensions(avctx, s->coded_width, s->coded_height)) < 0)
         return ret;
     avctx->pix_fmt = s->coded_format;
 
-    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
-    planes = av_pix_fmt_count_planes(avctx->pix_fmt);
+    if ((ret = av_pix_fmt_get_chroma_sub_sample(s->coded_format,
+                                                &chroma_x_shift,
+                                                &chroma_y_shift)) < 0)
+        return ret;
+    planes = av_pix_fmt_count_planes(s->coded_format);
 
     for (i = 0; i < planes; i++) {
-        int width = i ? avctx->width >> s->chroma_x_shift : avctx->width;
-        int height = i ? avctx->height >> s->chroma_y_shift : avctx->height;
-        int stride = FFALIGN(width / 8, 8) * 8;
         int w8, h8, w4, h4, w2, h2;
-        height = FFALIGN(height / 8, 2) * 8;
-        s->plane[i].width = width;
+        int width  = i ? avctx->width  >> chroma_x_shift : avctx->width;
+        int height = i ? avctx->height >> chroma_y_shift : avctx->height;
+        ptrdiff_t stride = FFALIGN(width  / 8, 8) * 8;
+        if (chroma_y_shift)
+            height = FFALIGN(height / 8, 2) * 8;
+        s->plane[i].width  = width;
         s->plane[i].height = height;
         s->plane[i].stride = stride;
 
-        w8 = FFALIGN(s->plane[i].width / 8, 8);
-        h8 = FFALIGN(s->plane[i].height / 8, 2);
+        w8 = FFALIGN(s->plane[i].width  / 8, 8);
+        h8 = height / 8;
         w4 = w8 * 2;
         h4 = h8 * 2;
         w2 = w4 * 2;
         h2 = h4 * 2;
 
-        s->plane[i].idwt_buf = av_mallocz_array(height * stride, sizeof(*s->plane[i].idwt_buf));
-        s->plane[i].idwt_tmp = av_malloc_array(height * stride, sizeof(*s->plane[i].idwt_tmp));
-        if (!s->plane[i].idwt_buf || !s->plane[i].idwt_tmp) {
+        s->plane[i].idwt_buf =
+            av_mallocz_array(height * stride, sizeof(*s->plane[i].idwt_buf));
+        s->plane[i].idwt_tmp =
+            av_malloc_array(height * stride, sizeof(*s->plane[i].idwt_tmp));
+        if (!s->plane[i].idwt_buf || !s->plane[i].idwt_tmp)
             return AVERROR(ENOMEM);
-        }
 
         s->plane[i].subband[0] = s->plane[i].idwt_buf;
         s->plane[i].subband[1] = s->plane[i].idwt_buf + 2 * w8 * h8;
@@ -200,7 +296,7 @@
         s->plane[i].subband[9] = s->plane[i].idwt_buf + 3 * w2 * h2;
 
         for (j = 0; j < DWT_LEVELS; j++) {
-            for(k = 0; k < 4; k++) {
+            for (k = 0; k < FF_ARRAY_ELEMS(s->plane[i].band[j]); k++) {
                 s->plane[i].band[j][k].a_width  = w8 << j;
                 s->plane[i].band[j][k].a_height = h8 << j;
             }
@@ -209,10 +305,10 @@
         /* ll2 and ll1 commented out because they are done in-place */
         s->plane[i].l_h[0] = s->plane[i].idwt_tmp;
         s->plane[i].l_h[1] = s->plane[i].idwt_tmp + 2 * w8 * h8;
-        //s->plane[i].l_h[2] = ll2;
+        // s->plane[i].l_h[2] = ll2;
         s->plane[i].l_h[3] = s->plane[i].idwt_tmp;
         s->plane[i].l_h[4] = s->plane[i].idwt_tmp + 2 * w4 * h4;
-        //s->plane[i].l_h[5] = ll1;
+        // s->plane[i].l_h[5] = ll1;
         s->plane[i].l_h[6] = s->plane[i].idwt_tmp;
         s->plane[i].l_h[7] = s->plane[i].idwt_tmp + 2 * w2 * h2;
     }
@@ -250,10 +346,13 @@
         uint16_t data   = bytestream2_get_be16(&gb);
         if (abs_tag8 >= 0x60 && abs_tag8 <= 0x6f) {
             av_log(avctx, AV_LOG_DEBUG, "large len %x\n", ((tagu & 0xff) << 16) | data);
-        } else if (tag == 20) {
+        } else if (tag == SampleFlags) {
+            av_log(avctx, AV_LOG_DEBUG, "Progressive?%"PRIu16"\n", data);
+            s->progressive = data & 0x0001;
+        } else if (tag == ImageWidth) {
             av_log(avctx, AV_LOG_DEBUG, "Width %"PRIu16"\n", data);
             s->coded_width = data;
-        } else if (tag == 21) {
+        } else if (tag == ImageHeight) {
             av_log(avctx, AV_LOG_DEBUG, "Height %"PRIu16"\n", data);
             s->coded_height = data;
         } else if (tag == 101) {
@@ -264,7 +363,7 @@
                 break;
             }
             s->bpc = data;
-        } else if (tag == 12) {
+        } else if (tag == ChannelCount) {
             av_log(avctx, AV_LOG_DEBUG, "Channel Count: %"PRIu16"\n", data);
             s->channel_cnt = data;
             if (data > 4) {
@@ -272,14 +371,14 @@
                 ret = AVERROR_PATCHWELCOME;
                 break;
             }
-        } else if (tag == 14) {
+        } else if (tag == SubbandCount) {
             av_log(avctx, AV_LOG_DEBUG, "Subband Count: %"PRIu16"\n", data);
             if (data != SUBBAND_COUNT) {
                 av_log(avctx, AV_LOG_ERROR, "Subband Count of %"PRIu16" is unsupported\n", data);
                 ret = AVERROR_PATCHWELCOME;
                 break;
             }
-        } else if (tag == 62) {
+        } else if (tag == ChannelNumber) {
             s->channel_num = data;
             av_log(avctx, AV_LOG_DEBUG, "Channel number %"PRIu16"\n", data);
             if (s->channel_num >= planes) {
@@ -288,7 +387,7 @@
                 break;
             }
             init_plane_defaults(s);
-        } else if (tag == 48) {
+        } else if (tag == SubbandNumber) {
             if (s->subband_num != 0 && data == 1)  // hack
                 s->level++;
             av_log(avctx, AV_LOG_DEBUG, "Subband number %"PRIu16"\n", data);
@@ -311,12 +410,12 @@
                 ret = AVERROR(EINVAL);
                 break;
             }
-        } else if (tag == 35)
+        } else if (tag == LowpassPrecision)
             av_log(avctx, AV_LOG_DEBUG, "Lowpass precision bits: %"PRIu16"\n", data);
-        else if (tag == 53) {
+        else if (tag == Quantization) {
             s->quantisation = data;
             av_log(avctx, AV_LOG_DEBUG, "Quantisation: %"PRIu16"\n", data);
-        } else if (tag == 109) {
+        } else if (tag == PrescaleShift) {
             s->prescale_shift[0] = (data >> 0) & 0x7;
             s->prescale_shift[1] = (data >> 3) & 0x7;
             s->prescale_shift[2] = (data >> 6) & 0x7;
@@ -348,6 +447,8 @@
             }
             av_log(avctx, AV_LOG_DEBUG, "Transform-type? %"PRIu16"\n", data);
         } else if (abstag >= 0x4000 && abstag <= 0x40ff) {
+            if (abstag == 0x4001)
+                s->peak.level = 0;
             av_log(avctx, AV_LOG_DEBUG, "Small chunk length %d %s\n", data * 4, tag < 0 ? "optional" : "required");
             bytestream2_skipu(&gb, data * 4);
         } else if (tag == 23) {
@@ -405,7 +506,8 @@
             s->codebook = data;
             av_log(avctx, AV_LOG_DEBUG, "Codebook %i\n", s->codebook);
         } else if (tag == 72) {
-            s->codebook = data;
+            s->codebook = data & 0xf;
+            s->difference_coding = (data >> 4) & 1;
             av_log(avctx, AV_LOG_DEBUG, "Other codebook? %i\n", s->codebook);
         } else if (tag == 70) {
             av_log(avctx, AV_LOG_DEBUG, "Subsampling or bit-depth flag? %i\n", data);
@@ -429,6 +531,22 @@
                 break;
             }
             planes = av_pix_fmt_count_planes(s->coded_format);
+        } else if (tag == -85) {
+            av_log(avctx, AV_LOG_DEBUG, "Cropped height %"PRIu16"\n", data);
+            s->cropped_height = data;
+        } else if (tag == -75) {
+            s->peak.offset &= ~0xffff;
+            s->peak.offset |= (data & 0xffff);
+            s->peak.base    = gb;
+            s->peak.level   = 0;
+        } else if (tag == -76) {
+            s->peak.offset &= 0xffff;
+            s->peak.offset |= (data & 0xffffU)<<16;
+            s->peak.base    = gb;
+            s->peak.level   = 0;
+        } else if (tag == -74 && s->peak.offset) {
+            s->peak.level = data;
+            bytestream2_seek(&s->peak.base, s->peak.offset - 4, SEEK_CUR);
         } else
             av_log(avctx, AV_LOG_DEBUG,  "Unknown tag %i data %x\n", tag, data);
 
@@ -437,15 +555,17 @@
             s->coded_format != AV_PIX_FMT_NONE) {
             if (s->a_width != s->coded_width || s->a_height != s->coded_height ||
                 s->a_format != s->coded_format) {
-                free_buffers(avctx);
+                free_buffers(s);
                 if ((ret = alloc_buffers(avctx)) < 0) {
-                    free_buffers(avctx);
+                    free_buffers(s);
                     return ret;
                 }
             }
             ret = ff_set_dimensions(avctx, s->coded_width, s->coded_height);
             if (ret < 0)
                 return ret;
+            if (s->cropped_height)
+                avctx->height = s->cropped_height;
             frame.f->width =
             frame.f->height = 0;
 
@@ -544,7 +664,7 @@
                         if (count > expected)
                             break;
 
-                        coeff = dequant_and_decompand(level, s->quantisation);
+                        coeff = dequant_and_decompand(level, s->quantisation, 0);
                         for (i = 0; i < run; i++)
                             *coeff_data++ = coeff;
                     }
@@ -563,7 +683,7 @@
                         if (count > expected)
                             break;
 
-                        coeff = dequant_and_decompand(level, s->quantisation);
+                        coeff = dequant_and_decompand(level, s->quantisation, s->codebook);
                         for (i = 0; i < run; i++)
                             *coeff_data++ = coeff;
                     }
@@ -576,8 +696,12 @@
                 ret = AVERROR(EINVAL);
                 goto end;
             }
+            if (s->peak.level)
+                peak_table(coeff_data - count, &s->peak, count);
+            if (s->difference_coding)
+                difference_coding(s->plane[s->channel_num].subband[s->subband_num_actual], highpass_width, highpass_height);
 
-            bytes = FFALIGN(FF_CEIL_RSHIFT(get_bits_count(&s->gb), 3), 4);
+            bytes = FFALIGN(AV_CEIL_RSHIFT(get_bits_count(&s->gb), 3), 4);
             if (bytes > bytestream2_get_bytes_left(&gb)) {
                 av_log(avctx, AV_LOG_ERROR, "Bitstream overread error\n");
                 ret = AVERROR(EINVAL);
@@ -734,35 +858,68 @@
         }
 
         av_log(avctx, AV_LOG_DEBUG, "Level 3 plane %i %i %i %i\n", plane, lowpass_height, lowpass_width, highpass_stride);
+        if (s->progressive) {
+            low    = s->plane[plane].subband[0];
+            high   = s->plane[plane].subband[8];
+            output = s->plane[plane].l_h[6];
+            for (i = 0; i < lowpass_width; i++) {
+                vert_filter(output, lowpass_width, low, lowpass_width, high, highpass_stride, lowpass_height);
+                low++;
+                high++;
+                output++;
+            }
 
-        low    = s->plane[plane].subband[0];
-        high   = s->plane[plane].subband[8];
-        output = s->plane[plane].l_h[6];
-        for (i = 0; i < lowpass_width; i++) {
-            vert_filter(output, lowpass_width, low, lowpass_width, high, highpass_stride, lowpass_height);
-            low++;
-            high++;
-            output++;
-        }
+            low    = s->plane[plane].subband[7];
+            high   = s->plane[plane].subband[9];
+            output = s->plane[plane].l_h[7];
+            for (i = 0; i < lowpass_width; i++) {
+                vert_filter(output, lowpass_width, low, highpass_stride, high, highpass_stride, lowpass_height);
+                low++;
+                high++;
+                output++;
+            }
 
-        low    = s->plane[plane].subband[7];
-        high   = s->plane[plane].subband[9];
-        output = s->plane[plane].l_h[7];
-        for (i = 0; i < lowpass_width; i++) {
-            vert_filter(output, lowpass_width, low, highpass_stride, high, highpass_stride, lowpass_height);
-            low++;
-            high++;
-            output++;
-        }
+            dst = (int16_t *)pic->data[act_plane];
+            low  = s->plane[plane].l_h[6];
+            high = s->plane[plane].l_h[7];
+            for (i = 0; i < lowpass_height * 2; i++) {
+                horiz_filter_clip(dst, low, high, lowpass_width, s->bpc);
+                low  += lowpass_width;
+                high += lowpass_width;
+                dst  += pic->linesize[act_plane] / 2;
+            }
+        } else {
+            av_log(avctx, AV_LOG_DEBUG, "interlaced frame ? %d", pic->interlaced_frame);
+            pic->interlaced_frame = 1;
+            low    = s->plane[plane].subband[0];
+            high   = s->plane[plane].subband[7];
+            output = s->plane[plane].l_h[6];
+            for (i = 0; i < lowpass_height; i++) {
+                horiz_filter(output, low, high, lowpass_width);
+                low    += lowpass_width;
+                high   += lowpass_width;
+                output += lowpass_width * 2;
+            }
 
-        dst = (int16_t *)pic->data[act_plane];
-        low  = s->plane[plane].l_h[6];
-        high = s->plane[plane].l_h[7];
-        for (i = 0; i < lowpass_height * 2; i++) {
-            horiz_filter_clip(dst, low, high, lowpass_width, s->bpc);
-            low  += lowpass_width;
-            high += lowpass_width;
-            dst  += pic->linesize[act_plane] / 2;
+            low    = s->plane[plane].subband[8];
+            high   = s->plane[plane].subband[9];
+            output = s->plane[plane].l_h[7];
+            for (i = 0; i < lowpass_height; i++) {
+                horiz_filter(output, low, high, lowpass_width);
+                low    += lowpass_width;
+                high   += lowpass_width;
+                output += lowpass_width * 2;
+            }
+
+            dst  = (int16_t *)pic->data[act_plane];
+            low  = s->plane[plane].l_h[6];
+            high = s->plane[plane].l_h[7];
+            for (i = 0; i < lowpass_height; i++) {
+                interlaced_vertical_filter(dst, low, high, lowpass_width * 2,  pic->linesize[act_plane]/2, act_plane);
+                low  += lowpass_width * 2;
+                high += lowpass_width * 2;
+                dst  += pic->linesize[act_plane];
+            }
         }
     }
 
@@ -775,11 +932,11 @@
     return avpkt->size;
 }
 
-static av_cold int cfhd_close_decoder(AVCodecContext *avctx)
+static av_cold int cfhd_close(AVCodecContext *avctx)
 {
     CFHDContext *s = avctx->priv_data;
 
-    free_buffers(avctx);
+    free_buffers(s);
 
     if (!avctx->internal->is_copy) {
         ff_free_vlc(&s->vlc_9);
@@ -790,14 +947,14 @@
 }
 
 AVCodec ff_cfhd_decoder = {
-    .name           = "cfhd",
-    .long_name      = NULL_IF_CONFIG_SMALL("Cineform HD"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_CFHD,
-    .priv_data_size = sizeof(CFHDContext),
-    .init           = cfhd_decode_init,
-    .close          = cfhd_close_decoder,
-    .decode         = cfhd_decode,
-    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
-    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
+    .name             = "cfhd",
+    .long_name        = NULL_IF_CONFIG_SMALL("Cineform HD"),
+    .type             = AVMEDIA_TYPE_VIDEO,
+    .id               = AV_CODEC_ID_CFHD,
+    .priv_data_size   = sizeof(CFHDContext),
+    .init             = cfhd_init,
+    .close            = cfhd_close,
+    .decode           = cfhd_decode,
+    .capabilities     = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
+    .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
 };

diff --git a/libavcodec/cfhd.h b/libavcodec/cfhd.h
index 67a0e4c..4f2c82d 100644
--- a/libavcodec/cfhd.h
+++ b/libavcodec/cfhd.h

@@ -26,11 +26,12 @@
 #include "libavutil/avassert.h"
 
 #include "avcodec.h"
+#include "bytestream.h"
 #include "get_bits.h"
+#include "vlc.h"
 
-#define VLC_BITS 9
-#define NB_VLC_TABLE_9 (71+3)
-#define NB_VLC_TABLE_18 (263+1)
+#define VLC_BITS       9
+#define SUBBAND_COUNT 10
 
 typedef struct CFHD_RL_VLC_ELEM {
     int16_t level;
@@ -43,7 +44,7 @@
 typedef struct SubBand {
     int level;
     int orientation;
-    int stride;
+    ptrdiff_t stride;
     int a_width;
     int width;
     int a_height;
@@ -62,12 +63,18 @@
     int16_t *idwt_tmp;
 
     /* TODO: merge this into SubBand structure */
-    int16_t *subband[10];
+    int16_t *subband[SUBBAND_COUNT];
     int16_t *l_h[8];
 
     SubBand band[DWT_LEVELS][4];
 } Plane;
 
+typedef struct Peak {
+    int level;
+    int offset;
+    GetByteContext base;
+} Peak;
+
 typedef struct CFHDContext {
     AVCodecContext *avctx;
 
@@ -79,18 +86,17 @@
 
     GetBitContext gb;
 
-    int chroma_x_shift;
-    int chroma_y_shift;
-
     int coded_width;
     int coded_height;
-    int coded_format;
+    int cropped_height;
+    enum AVPixelFormat coded_format;
+    int progressive;
 
     int a_width;
     int a_height;
     int a_format;
 
-    int bpc;
+    int bpc; // bits per channel/component
     int channel_cnt;
     int subband_cnt;
     int channel_num;
@@ -100,13 +106,14 @@
     int pshift;
 
     int codebook;
+    int difference_coding;
     int subband_num;
     int level;
     int subband_num_actual;
 
     uint8_t prescale_shift[3];
     Plane plane[4];
-
+    Peak peak;
 } CFHDContext;
 
 int ff_cfhd_init_vlcs(CFHDContext *s);

diff --git a/libavcodec/cfhddata.c b/libavcodec/cfhddata.c
index 9330d34..5df68d4 100644
--- a/libavcodec/cfhddata.c
+++ b/libavcodec/cfhddata.c

@@ -18,7 +18,10 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "stdint.h"
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+
 #include "cfhd.h"
 
 /* some special codewords, not sure what they all mean */
@@ -29,6 +32,31 @@
 #define TABLE_9_BAND_END3 0x38F0B3Eh
 #define TABLE_9_BAND_END_LEN3 26
 
+#define NB_VLC_TABLE_9   (71 + 3)
+#define NB_VLC_TABLE_18 (263 + 1)
+
+static const uint32_t table_9_vlc_bits[NB_VLC_TABLE_9] = {
+            0,       0x2,       0xc,      0x1a,
+         0x1d,      0x1e,      0x39,      0x3e,
+         0x37,      0x7e,      0x6c,      0xe2,
+         0xfe,      0xdb,      0xe0,     0x1c3,
+        0x1c6,     0x1ff,     0x1fe,     0x1b5,
+        0x369,     0x385,     0x71d,     0x6d0,
+        0x708,     0x71f,     0xe3d,     0xe39,
+        0xe13,     0xe12,    0x1c71,    0x1b45,
+       0x1b47,    0x3689,    0x38f2,    0x38e1,
+       0x38e0,    0x38f1,    0x3688,    0x6d1b,
+       0x71e0,    0x6d19,    0x71e7,    0xe3cd,
+       0xda35,    0xda30,    0xe3c3,   0x1b469,
+      0x1b462,   0x1c798,   0x1b463,   0x1c799,
+      0x38f08,   0x38f09,   0x38f0a,   0x6d1a0,
+      0x6d1a3,   0x6d1a1,   0xda345,   0xda344,
+      0xe3c2d,   0xe3c2f,   0xe3c2e,  0x38f0b2,
+     0x71e160,  0x71e162,  0x71e166,  0x71e161,
+     0xe3c2ce,  0xe3c2c6,  0xe3c2c7, 0x1C7859E,
+    0x38F0B3F, 0x38F0B3E,
+};
+
 static const uint8_t table_9_vlc_len[NB_VLC_TABLE_9] = {
      1,    2,    4,    5,    5,    5,    6,    6,
      6,    7,    7,    8,    8,    8,    8,    9,
@@ -42,19 +70,6 @@
     26,   26,
 };
 
-static const uint32_t table_9_vlc_bits[NB_VLC_TABLE_9] = {
-            0,       0x2,       0xc,      0x1a,      0x1d,      0x1e,      0x39,      0x3e,
-         0x37,      0x7e,      0x6c,      0xe2,      0xfe,      0xdb,      0xe0,     0x1c3,
-        0x1c6,     0x1ff,     0x1fe,     0x1b5,     0x369,     0x385,     0x71d,     0x6d0,
-        0x708,     0x71f,     0xe3d,     0xe39,     0xe13,     0xe12,    0x1c71,    0x1b45,
-       0x1b47,    0x3689,    0x38f2,    0x38e1,    0x38e0,    0x38f1,    0x3688,    0x6d1b,
-       0x71e0,    0x6d19,    0x71e7,    0xe3cd,    0xda35,    0xda30,    0xe3c3,   0x1b469,
-      0x1b462,   0x1c798,   0x1b463,   0x1c799,   0x38f08,   0x38f09,   0x38f0a,   0x6d1a0,
-      0x6d1a3,   0x6d1a1,   0xda345,   0xda344,   0xe3c2d,   0xe3c2f,   0xe3c2e,  0x38f0b2,
-     0x71e160,  0x71e162,  0x71e166,  0x71e161,  0xe3c2ce,  0xe3c2c6,  0xe3c2c7, 0x1C7859E,
-    0x38F0B3F, 0x38F0B3E,
-};
-
 static const uint16_t table_9_vlc_run[NB_VLC_TABLE_9] = {
     1,    1,    1,    1,   12,    1,   32,  160,
     1,    1,    1,  320,    1,    1,   80,  120,
@@ -82,39 +97,72 @@
 };
 
 static const uint32_t table_18_vlc_bits[NB_VLC_TABLE_18] = {
-            0,       0x2,       0x7,      0x19,      0x30,      0x36,      0x6f,      0x63,
-         0x69,      0x6b,      0xd1,      0xd4,      0xdc,     0x189,     0x18a,     0x1a0,
-        0x1ab,     0x377,     0x310,     0x316,     0x343,     0x354,     0x375,     0x623,
-        0x684,     0x685,     0x6ab,     0x6ec,     0xddb,     0xc5c,     0xc5e,     0xc44,
-        0xd55,     0xdd1,     0xdd3,    0x1bb5,    0x188b,    0x18bb,    0x18bf,    0x1aa8,
-       0x1ba0,    0x1ba5,    0x1ba4,    0x3115,    0x3175,    0x317d,    0x3553,    0x3768,
-       0x6e87,    0x6ed3,    0x62e8,    0x62f8,    0x6228,    0x6aa4,    0x6e85,    0xc453,
-       0xc5d3,    0xc5f3,    0xdda4,    0xdd08,    0xdd0c,   0x1bb4b,   0x1bb4a,   0x18ba5,
-      0x18be5,   0x1aa95,   0x1aa97,   0x188a4,   0x1ba13,   0x31748,   0x317c8,   0x35528,
-      0x3552c,   0x37424,   0x37434,   0x37436,   0x62294,   0x62e92,   0x62f92,   0x6aa52,
-      0x6aa5a,   0x6e86a,   0x6e86e,   0x6e84a,   0xc452a,   0xc5d27,   0xc5f26,   0xd54a6,
-      0xd54b6,   0xdd096,   0xdd0d6,   0xdd0de,  0x188a56,  0x18ba4d,  0x18be4e,  0x18be4f,
-     0x1aa96e,  0x1ba12e,  0x1ba12f,  0x1ba1af,  0x1ba1bf,  0x37435d,  0x37437d,  0x317498,
-     0x35529c,  0x35529d,  0x3552de,  0x3552df,  0x62e933,  0x62295d,  0x6aa53d,  0x6aa53f,
-     0x6aa53e,  0x6e86b9,  0x6e86f8,  0xd54a79,  0xc5d265,  0xc452b8,  0xdd0d71,  0xd54a78,
-     0xdd0d70,  0xdd0df2,  0xdd0df3, 0x188a5f6, 0x188a5f5, 0x188a5f4, 0x188a5f3, 0x188a5f2,
-    0x188a5f1, 0x188a5f0, 0x188a5ef, 0x188a5ee, 0x188a5ed, 0x188a5aa, 0x188a5e3, 0x188a5df,
-    0x188a589, 0x188a5dd, 0x188a578, 0x188a5e0, 0x188a588, 0x188a5d6, 0x188a5db, 0x188a5e1,
-    0x188a587, 0x188a59a, 0x188a5c4, 0x188a5ec, 0x188a586, 0x188a573, 0x188a59c, 0x188a5c8,
-    0x188a5fb, 0x188a5a1, 0x188a5eb, 0x188a5a8, 0x188a584, 0x188a5d2, 0x188a599, 0x188a598,
-    0x188a583, 0x18ba4c9, 0x188a5d0, 0x188a594, 0x188a582, 0x188a5cb, 0x188a5d8, 0x188a5e7,
-    0x188a581, 0x188a5ea, 0x188a5a9, 0x188a5a6, 0x188a580, 0x188a5a0, 0x188a59d, 0x188a5c3,
-    0x188a57f, 0x188a5c0, 0x188a5de, 0x188a5d4, 0x188a57e, 0x188a5c2, 0x188a592, 0x188a5cd,
-    0x188a57d, 0x188a5a3, 0x188a5e8, 0x188a5a2, 0x188a57c, 0x188a58e, 0x188a5b3, 0x188a5b2,
-    0x188a5b1, 0x188a5b0, 0x188a5af, 0x188a5ae, 0x188a5ad, 0x188a5ac, 0x188a5ab, 0x188a5da,
-    0x188a5e4, 0x188a5e5, 0x188a5d9, 0x188a5b5, 0x188a5bc, 0x188a5bd, 0x188a5e9, 0x188a5cc,
-    0x188a585, 0x188a5d3, 0x188a5e2, 0x188a595, 0x188a596, 0x188a5b8, 0x188a590, 0x188a5c9,
-    0x188a5a4, 0x188a5e6, 0x188a5a5, 0x188a5ce, 0x188a5bf, 0x188a572, 0x188a59b, 0x188a5be,
-    0x188a5c7, 0x188a5ca, 0x188a5d5, 0x188a57b, 0x188a58d, 0x188a58c, 0x188a58b, 0x188a58a,
-    0x18ba4c8, 0x188a5c5, 0x188a5fa, 0x188a5bb, 0x188a5c1, 0x188a5cf, 0x188a5b9, 0x188a5b6,
-    0x188a597, 0x188a5fe, 0x188a5d7, 0x188a5ba, 0x188a591, 0x188a5c6, 0x188a5dc, 0x188a57a,
-    0x188a59f, 0x188a5f9, 0x188a5b4, 0x188a5a7, 0x188a58f, 0x188a5fd, 0x188a5b7, 0x188a593,
-    0x188a59e, 0x188a5f8, 0x188a5ff, 0x188a5fc, 0x188a579, 0x188a5f7, 0x3114ba2, 0x3114ba3,
+            0,       0x2,       0x7,      0x19,
+         0x30,      0x36,      0x6f,      0x63,
+         0x69,      0x6b,      0xd1,      0xd4,
+         0xdc,     0x189,     0x18a,     0x1a0,
+        0x1ab,     0x377,     0x310,     0x316,
+        0x343,     0x354,     0x375,     0x623,
+        0x684,     0x685,     0x6ab,     0x6ec,
+        0xddb,     0xc5c,     0xc5e,     0xc44,
+        0xd55,     0xdd1,     0xdd3,    0x1bb5,
+       0x188b,    0x18bb,    0x18bf,    0x1aa8,
+       0x1ba0,    0x1ba5,    0x1ba4,    0x3115,
+       0x3175,    0x317d,    0x3553,    0x3768,
+       0x6e87,    0x6ed3,    0x62e8,    0x62f8,
+       0x6228,    0x6aa4,    0x6e85,    0xc453,
+       0xc5d3,    0xc5f3,    0xdda4,    0xdd08,
+       0xdd0c,   0x1bb4b,   0x1bb4a,   0x18ba5,
+      0x18be5,   0x1aa95,   0x1aa97,   0x188a4,
+      0x1ba13,   0x31748,   0x317c8,   0x35528,
+      0x3552c,   0x37424,   0x37434,   0x37436,
+      0x62294,   0x62e92,   0x62f92,   0x6aa52,
+      0x6aa5a,   0x6e86a,   0x6e86e,   0x6e84a,
+      0xc452a,   0xc5d27,   0xc5f26,   0xd54a6,
+      0xd54b6,   0xdd096,   0xdd0d6,   0xdd0de,
+     0x188a56,  0x18ba4d,  0x18be4e,  0x18be4f,
+     0x1aa96e,  0x1ba12e,  0x1ba12f,  0x1ba1af,
+     0x1ba1bf,  0x37435d,  0x37437d,  0x317498,
+     0x35529c,  0x35529d,  0x3552de,  0x3552df,
+     0x62e933,  0x62295d,  0x6aa53d,  0x6aa53f,
+     0x6aa53e,  0x6e86b9,  0x6e86f8,  0xd54a79,
+     0xc5d265,  0xc452b8,  0xdd0d71,  0xd54a78,
+     0xdd0d70,  0xdd0df2,  0xdd0df3, 0x188a5f6,
+    0x188a5f5, 0x188a5f4, 0x188a5f3, 0x188a5f2,
+    0x188a5f1, 0x188a5f0, 0x188a5ef, 0x188a5ee,
+    0x188a5ed, 0x188a5aa, 0x188a5e3, 0x188a5df,
+    0x188a589, 0x188a5dd, 0x188a578, 0x188a5e0,
+    0x188a588, 0x188a5d6, 0x188a5db, 0x188a5e1,
+    0x188a587, 0x188a59a, 0x188a5c4, 0x188a5ec,
+    0x188a586, 0x188a573, 0x188a59c, 0x188a5c8,
+    0x188a5fb, 0x188a5a1, 0x188a5eb, 0x188a5a8,
+    0x188a584, 0x188a5d2, 0x188a599, 0x188a598,
+    0x188a583, 0x18ba4c9, 0x188a5d0, 0x188a594,
+    0x188a582, 0x188a5cb, 0x188a5d8, 0x188a5e7,
+    0x188a581, 0x188a5ea, 0x188a5a9, 0x188a5a6,
+    0x188a580, 0x188a5a0, 0x188a59d, 0x188a5c3,
+    0x188a57f, 0x188a5c0, 0x188a5de, 0x188a5d4,
+    0x188a57e, 0x188a5c2, 0x188a592, 0x188a5cd,
+    0x188a57d, 0x188a5a3, 0x188a5e8, 0x188a5a2,
+    0x188a57c, 0x188a58e, 0x188a5b3, 0x188a5b2,
+    0x188a5b1, 0x188a5b0, 0x188a5af, 0x188a5ae,
+    0x188a5ad, 0x188a5ac, 0x188a5ab, 0x188a5da,
+    0x188a5e4, 0x188a5e5, 0x188a5d9, 0x188a5b5,
+    0x188a5bc, 0x188a5bd, 0x188a5e9, 0x188a5cc,
+    0x188a585, 0x188a5d3, 0x188a5e2, 0x188a595,
+    0x188a596, 0x188a5b8, 0x188a590, 0x188a5c9,
+    0x188a5a4, 0x188a5e6, 0x188a5a5, 0x188a5ce,
+    0x188a5bf, 0x188a572, 0x188a59b, 0x188a5be,
+    0x188a5c7, 0x188a5ca, 0x188a5d5, 0x188a57b,
+    0x188a58d, 0x188a58c, 0x188a58b, 0x188a58a,
+    0x18ba4c8, 0x188a5c5, 0x188a5fa, 0x188a5bb,
+    0x188a5c1, 0x188a5cf, 0x188a5b9, 0x188a5b6,
+    0x188a597, 0x188a5fe, 0x188a5d7, 0x188a5ba,
+    0x188a591, 0x188a5c6, 0x188a5dc, 0x188a57a,
+    0x188a59f, 0x188a5f9, 0x188a5b4, 0x188a5a7,
+    0x188a58f, 0x188a5fd, 0x188a5b7, 0x188a593,
+    0x188a59e, 0x188a5f8, 0x188a5ff, 0x188a5fc,
+    0x188a579, 0x188a5f7, 0x3114ba2, 0x3114ba3,
 };
 
 static const uint8_t table_18_vlc_len[NB_VLC_TABLE_18] = {
@@ -154,39 +202,39 @@
 };
 
 static const uint16_t table_18_vlc_run[NB_VLC_TABLE_18] = {
-    1,    1,    1,    1,    1,    1,    1,    1,
-   12,    1,   20,    1,    1,    1,   32,    1,
-    1,    1,    1,    1,   60,    1,    1,    1,
-    1,  100,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,  180,    1,
-    1,  320,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    1,
-    1,    1,    1,    1,    1,    1,    1,    2,
+     1,    1,    1,    1,    1,    1,    1,    1,
+    12,    1,   20,    1,    1,    1,   32,    1,
+     1,    1,    1,    1,   60,    1,    1,    1,
+     1,  100,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,  180,    1,
+     1,  320,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    1,
+     1,    1,    1,    1,    1,    1,    1,    2,
 };
 
 static const uint8_t table_18_vlc_level[NB_VLC_TABLE_18] = {

diff --git a/libavcodec/chomp_bsf.c b/libavcodec/chomp_bsf.c
index cc94380..3ba45f3 100644
--- a/libavcodec/chomp_bsf.c
+++ b/libavcodec/chomp_bsf.c

@@ -23,20 +23,16 @@
 #include "bsf.h"
 #include "internal.h"
 
-static int chomp_filter(AVBSFContext *ctx, AVPacket *out)
+static int chomp_filter(AVBSFContext *ctx, AVPacket *pkt)
 {
-    AVPacket *in;
     int ret;
 
-    ret = ff_bsf_get_packet(ctx, &in);
+    ret = ff_bsf_get_packet_ref(ctx, pkt);
     if (ret < 0)
         return ret;
 
-    while (in->size > 0 && !in->data[in->size - 1])
-        in->size--;
-
-    av_packet_move_ref(out, in);
-    av_packet_free(&in);
+    while (pkt->size > 0 && !pkt->data[pkt->size - 1])
+        pkt->size--;
 
     return 0;
 }

diff --git a/libavcodec/cinepak.c b/libavcodec/cinepak.c
index 89e940a..9b00774 100644
--- a/libavcodec/cinepak.c
+++ b/libavcodec/cinepak.c

@@ -315,14 +315,11 @@
     return AVERROR_INVALIDDATA;
 }
 
-static int cinepak_decode (CinepakContext *s)
+static int cinepak_predecode_check (CinepakContext *s)
 {
-    const uint8_t  *eod = (s->data + s->size);
-    int           i, result, strip_size, frame_flags, num_strips;
-    int           y0 = 0;
+    int           num_strips;
     int           encoded_buf_size;
 
-    frame_flags = s->data[0];
     num_strips  = AV_RB16 (&s->data[8]);
     encoded_buf_size = AV_RB24(&s->data[1]);
 
@@ -353,6 +350,21 @@
             s->sega_film_skip_bytes = 0;
     }
 
+    if (s->size < 10 + s->sega_film_skip_bytes + num_strips * 12)
+        return AVERROR_INVALIDDATA;
+
+    return 0;
+}
+
+static int cinepak_decode (CinepakContext *s)
+{
+    const uint8_t  *eod = (s->data + s->size);
+    int           i, result, strip_size, frame_flags, num_strips;
+    int           y0 = 0;
+
+    frame_flags = s->data[0];
+    num_strips  = AV_RB16 (&s->data[8]);
+
     s->data += 10 + s->sega_film_skip_bytes;
 
     num_strips = FFMIN(num_strips, MAX_STRIPS);
@@ -432,6 +444,7 @@
     const uint8_t *buf = avpkt->data;
     int ret = 0, buf_size = avpkt->size;
     CinepakContext *s = avctx->priv_data;
+    int num_strips;
 
     s->data = buf;
     s->size = buf_size;
@@ -439,6 +452,17 @@
     if (s->size < 10)
         return AVERROR_INVALIDDATA;
 
+    num_strips = AV_RB16 (&s->data[8]);
+
+    //Empty frame, do not waste time
+    if (!num_strips && (!s->palette_video || !av_packet_get_side_data(avpkt, AV_PKT_DATA_PALETTE, NULL)))
+        return buf_size;
+
+    if ((ret = cinepak_predecode_check(s)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "cinepak_predecode_check failed\n");
+        return ret;
+    }
+
     if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
 

diff --git a/libavcodec/cinepakenc.c b/libavcodec/cinepakenc.c
index a28f669..93917fa 100644
--- a/libavcodec/cinepakenc.c
+++ b/libavcodec/cinepakenc.c

@@ -4,90 +4,61 @@
  *
  * Fixes and improvements, vintage decoders compatibility
  *  (c) 2013, 2014 Rl, Aetey Global Technologies AB
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
 
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the "Software"),
-to deal in the Software without restriction, including without limitation
-the rights to use, copy, modify, merge, publish, distribute, sublicense,
-and/or sell copies of the Software, and to permit persons to whom the
-Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included
-in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.
-
+/*
+ * TODO:
+ * - optimize: color space conversion (move conversion to libswscale), ...
  * MAYBE:
  * - "optimally" split the frame into several non-regular areas
  *   using a separate codebook pair for each area and approximating
  *   the area by several rectangular strips (generally not full width ones)
  *   (use quadtree splitting? a simple fixed-granularity grid?)
- *
- *
- * version 2014-01-23 Rl
- * - added option handling for flexibility
- *
- * version 2014-01-21 Rl
- * - believe it or not, now we get even smaller files, with better quality
- *   (which means I missed an optimization earlier :)
- *
- * version 2014-01-20 Rl
- * - made the encoder compatible with vintage decoders
- *   and added some yet unused code for possible future
- *   incremental codebook updates
- * - fixed a small memory leak
- *
- * version 2013-04-28 Rl
- * - bugfixed codebook optimization logic
- *
- * version 2013-02-14 Rl
- * "Valentine's Day" version:
- * - made strip division more robust
- * - minimized bruteforcing the number of strips,
- *   (costs some R/D but speeds up compession a lot), the heuristic
- *   assumption is that score as a function of the number of strips has
- *   one wide minimum which moves slowly, of course not fully true
- * - simplified codebook generation,
- *   the old code was meant for other optimizations than we actually do
- * - optimized the codebook generation / error estimation for MODE_MC
- *
- * version 2013-02-12 Rl
- * - separated codebook training sets, avoided the transfer of wasted bytes,
- *   which yields both better quality and smaller files
- * - now using the correct colorspace (TODO: move conversion to libswscale)
- *
- * version 2013-02-08 Rl
- * - fixes/optimization in multistrip encoding and codebook size choice,
- *   quality/bitrate is now better than that of the binary proprietary encoder
  */
 
-#include "libavutil/intreadwrite.h"
-#include "avcodec.h"
-#include "libavutil/lfg.h"
-#include "elbg.h"
-#include "internal.h"
+#include <string.h>
 
 #include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/lfg.h"
 #include "libavutil/opt.h"
 
+#include "avcodec.h"
+#include "elbg.h"
+#include "internal.h"
+
 #define CVID_HEADER_SIZE 10
 #define STRIP_HEADER_SIZE 12
 #define CHUNK_HEADER_SIZE 4
 
 #define MB_SIZE 4           //4x4 MBs
-#define MB_AREA (MB_SIZE*MB_SIZE)
+#define MB_AREA (MB_SIZE * MB_SIZE)
 
-#define VECTOR_MAX 6        //six or four entries per vector depending on format
-#define CODEBOOK_MAX 256    //size of a codebook
+#define VECTOR_MAX     6    // six or four entries per vector depending on format
+#define CODEBOOK_MAX 256    // size of a codebook
 
-#define MAX_STRIPS  32      //Note: having fewer choices regarding the number of strips speeds up encoding (obviously)
-#define MIN_STRIPS  1       //Note: having more strips speeds up encoding the frame (this is less obvious)
+#define MAX_STRIPS  32      // Note: having fewer choices regarding the number of strips speeds up encoding (obviously)
+#define MIN_STRIPS   1      // Note: having more strips speeds up encoding the frame (this is less obvious)
 // MAX_STRIPS limits the maximum quality you can reach
 //            when you want high quality on high resolutions,
 // MIN_STRIPS limits the minimum efficiently encodable bit rate
@@ -97,7 +68,7 @@
 // NOTE the decoder in ffmpeg has its own arbitrary limitation on the number
 // of strips, currently 32
 
-typedef enum {
+typedef enum CinepakMode {
     MODE_V1_ONLY = 0,
     MODE_V1_V4,
     MODE_MC,
@@ -105,7 +76,7 @@
     MODE_COUNT,
 } CinepakMode;
 
-typedef enum {
+typedef enum mb_encoding {
     ENC_V1,
     ENC_V4,
     ENC_SKIP,
@@ -113,24 +84,24 @@
     ENC_UNCERTAIN
 } mb_encoding;
 
-typedef struct {
-    int v1_vector;                  //index into v1 codebook
-    int v1_error;                   //error when using V1 encoding
-    int v4_vector[4];               //indices into v4 codebook
-    int v4_error;                   //error when using V4 encoding
-    int skip_error;                 //error when block is skipped (aka copied from last frame)
-    mb_encoding best_encoding;      //last result from calculate_mode_score()
+typedef struct mb_info {
+    int v1_vector;              // index into v1 codebook
+    int v1_error;               // error when using V1 encoding
+    int v4_vector[4];           // indices into v4 codebook
+    int v4_error;               // error when using V4 encoding
+    int skip_error;             // error when block is skipped (aka copied from last frame)
+    mb_encoding best_encoding;  // last result from calculate_mode_score()
 } mb_info;
 
-typedef struct {
-    int v1_codebook[CODEBOOK_MAX*VECTOR_MAX];
-    int v4_codebook[CODEBOOK_MAX*VECTOR_MAX];
+typedef struct strip_info {
+    int v1_codebook[CODEBOOK_MAX * VECTOR_MAX];
+    int v4_codebook[CODEBOOK_MAX * VECTOR_MAX];
     int v1_size;
     int v4_size;
     CinepakMode mode;
 } strip_info;
 
-typedef struct {
+typedef struct CinepakEncContext {
     const AVClass *class;
     AVCodecContext *avctx;
     unsigned char *pict_bufs[4], *strip_buf, *frame_buf;
@@ -146,15 +117,10 @@
     uint64_t lambda;
     int *codebook_input;
     int *codebook_closest;
-    mb_info *mb;                                //MB RD state
-    int min_strips;          //the current limit
-    int max_strips;          //the current limit
-#ifdef CINEPAKENC_DEBUG
-    mb_info *best_mb;                           //TODO: remove. only used for printing stats
-    int num_v1_mode, num_v4_mode, num_mc_mode;
-    int num_v1_encs, num_v4_encs, num_skips;
-#endif
-// options
+    mb_info *mb;                // MB RD state
+    int min_strips;             // the current limit
+    int max_strips;             // the current limit
+    // options
     int max_extra_cb_iterations;
     int skip_empty_cb;
     int min_min_strips;
@@ -165,11 +131,16 @@
 #define OFFSET(x) offsetof(CinepakEncContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "max_extra_cb_iterations", "Max extra codebook recalculation passes, more is better and slower", OFFSET(max_extra_cb_iterations), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, VE },
-    { "skip_empty_cb", "Avoid wasting bytes, ignore vintage MacOS decoder", OFFSET(skip_empty_cb), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
-    { "max_strips", "Limit strips/frame, vintage compatible is 1..3, otherwise the more the better", OFFSET(max_max_strips), AV_OPT_TYPE_INT, { .i64 = 3 }, MIN_STRIPS, MAX_STRIPS, VE },
-    { "min_strips", "Enforce min strips/frame, more is worse and faster, must be <= max_strips", OFFSET(min_min_strips), AV_OPT_TYPE_INT, { .i64 = MIN_STRIPS }, MIN_STRIPS, MAX_STRIPS, VE },
-    { "strip_number_adaptivity", "How fast the strip number adapts, more is slightly better, much slower", OFFSET(strip_number_delta_range), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_STRIPS-MIN_STRIPS, VE },
+    { "max_extra_cb_iterations", "Max extra codebook recalculation passes, more is better and slower",
+      OFFSET(max_extra_cb_iterations),  AV_OPT_TYPE_INT, { .i64 =          2 },          0, INT_MAX,                 VE },
+    { "skip_empty_cb",           "Avoid wasting bytes, ignore vintage MacOS decoder",
+      OFFSET(skip_empty_cb),            AV_OPT_TYPE_BOOL, { .i64 =         0 },          0, 1,                       VE },
+    { "max_strips",              "Limit strips/frame, vintage compatible is 1..3, otherwise the more the better",
+      OFFSET(max_max_strips),           AV_OPT_TYPE_INT, { .i64 =          3 }, MIN_STRIPS, MAX_STRIPS,              VE },
+    { "min_strips",              "Enforce min strips/frame, more is worse and faster, must be <= max_strips",
+      OFFSET(min_min_strips),           AV_OPT_TYPE_INT, { .i64 = MIN_STRIPS }, MIN_STRIPS, MAX_STRIPS,              VE },
+    { "strip_number_adaptivity", "How fast the strip number adapts, more is slightly better, much slower",
+      OFFSET(strip_number_delta_range), AV_OPT_TYPE_INT, { .i64 =          0 },          0, MAX_STRIPS - MIN_STRIPS, VE },
     { NULL },
 };
 
@@ -187,13 +158,13 @@
 
     if (avctx->width & 3 || avctx->height & 3) {
         av_log(avctx, AV_LOG_ERROR, "width and height must be multiples of four (got %ix%i)\n",
-                avctx->width, avctx->height);
+               avctx->width, avctx->height);
         return AVERROR(EINVAL);
     }
 
     if (s->min_min_strips > s->max_max_strips) {
-        av_log(avctx, AV_LOG_ERROR, "minimal number of strips can not exceed maximal (got %i and %i)\n",
-                s->min_min_strips, s->max_max_strips);
+        av_log(avctx, AV_LOG_ERROR, "minimum number of strips must not exceed maximum (got %i and %i)\n",
+               s->min_min_strips, s->max_max_strips);
         return AVERROR(EINVAL);
     }
 
@@ -207,23 +178,23 @@
         if (!(s->input_frame = av_frame_alloc()))
             goto enomem;
 
-    if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
+    if (!(s->codebook_input = av_malloc_array((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2, sizeof(*s->codebook_input))))
         goto enomem;
 
-    if (!(s->codebook_closest = av_malloc(sizeof(int) * (avctx->width * avctx->height) >> 2)))
+    if (!(s->codebook_closest = av_malloc_array((avctx->width * avctx->height) >> 2, sizeof(*s->codebook_closest))))
         goto enomem;
 
-    for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
-        if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
+    for (x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
+        if (!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
             goto enomem;
 
     mb_count = avctx->width * avctx->height / MB_AREA;
 
-    //the largest possible chunk is 0x31 with all MBs encoded in V4 mode
-    //and full codebooks being replaced in INTER mode,
+    // the largest possible chunk is 0x31 with all MBs encoded in V4 mode
+    // and full codebooks being replaced in INTER mode,
     // which is 34 bits per MB
-    //and 2*256 extra flag bits per strip
-    strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX)/8;
+    // and 2*256 extra flag bits per strip
+    strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX) / 8;
 
     frame_buf_size = CVID_HEADER_SIZE + s->max_max_strips * strip_buf_size;
 
@@ -236,21 +207,16 @@
     if (!(s->mb = av_malloc_array(mb_count, sizeof(mb_info))))
         goto enomem;
 
-#ifdef CINEPAKENC_DEBUG
-    if (!(s->best_mb = av_malloc_array(mb_count, sizeof(mb_info))))
-        goto enomem;
-#endif
-
     av_lfg_init(&s->randctx, 1);
-    s->avctx = avctx;
-    s->w = avctx->width;
-    s->h = avctx->height;
+    s->avctx          = avctx;
+    s->w              = avctx->width;
+    s->h              = avctx->height;
     s->frame_buf_size = frame_buf_size;
-    s->curframe = 0;
-    s->keyint = avctx->keyint_min;
-    s->pix_fmt = avctx->pix_fmt;
+    s->curframe       = 0;
+    s->keyint         = avctx->keyint_min;
+    s->pix_fmt        = avctx->pix_fmt;
 
-    //set up AVFrames
+    // set up AVFrames
     s->last_frame->data[0]        = s->pict_bufs[0];
     s->last_frame->linesize[0]    = s->w;
     s->best_frame->data[0]        = s->pict_bufs[1];
@@ -259,32 +225,32 @@
     s->scratch_frame->linesize[0] = s->w;
 
     if (s->pix_fmt == AV_PIX_FMT_RGB24) {
-        s->last_frame->data[1]        = s->last_frame->data[0] + s->w * s->h;
-        s->last_frame->data[2]        = s->last_frame->data[1] + ((s->w * s->h) >> 2);
-        s->last_frame->linesize[1]    = s->last_frame->linesize[2] = s->w >> 1;
+        s->last_frame->data[1]     = s->last_frame->data[0] +   s->w * s->h;
+        s->last_frame->data[2]     = s->last_frame->data[1] + ((s->w * s->h) >> 2);
+        s->last_frame->linesize[1] =
+        s->last_frame->linesize[2] = s->w >> 1;
 
-        s->best_frame->data[1]        = s->best_frame->data[0] + s->w * s->h;
-        s->best_frame->data[2]        = s->best_frame->data[1] + ((s->w * s->h) >> 2);
-        s->best_frame->linesize[1]    = s->best_frame->linesize[2] = s->w >> 1;
+        s->best_frame->data[1]     = s->best_frame->data[0] +   s->w * s->h;
+        s->best_frame->data[2]     = s->best_frame->data[1] + ((s->w * s->h) >> 2);
+        s->best_frame->linesize[1] =
+        s->best_frame->linesize[2] = s->w >> 1;
 
-        s->scratch_frame->data[1]     = s->scratch_frame->data[0] + s->w * s->h;
+        s->scratch_frame->data[1]     = s->scratch_frame->data[0] +   s->w * s->h;
         s->scratch_frame->data[2]     = s->scratch_frame->data[1] + ((s->w * s->h) >> 2);
-        s->scratch_frame->linesize[1] = s->scratch_frame->linesize[2] = s->w >> 1;
+        s->scratch_frame->linesize[1] =
+        s->scratch_frame->linesize[2] = s->w >> 1;
 
-        s->input_frame->data[0]       = s->pict_bufs[3];
-        s->input_frame->linesize[0]   = s->w;
-        s->input_frame->data[1]       = s->input_frame->data[0] + s->w * s->h;
-        s->input_frame->data[2]       = s->input_frame->data[1] + ((s->w * s->h) >> 2);
-        s->input_frame->linesize[1]   = s->input_frame->linesize[2] = s->w >> 1;
+        s->input_frame->data[0]     = s->pict_bufs[3];
+        s->input_frame->linesize[0] = s->w;
+        s->input_frame->data[1]     = s->input_frame->data[0] +   s->w * s->h;
+        s->input_frame->data[2]     = s->input_frame->data[1] + ((s->w * s->h) >> 2);
+        s->input_frame->linesize[1] =
+        s->input_frame->linesize[2] = s->w >> 1;
     }
 
     s->min_strips = s->min_min_strips;
     s->max_strips = s->max_max_strips;
 
-#ifdef CINEPAKENC_DEBUG
-    s->num_v1_mode = s->num_v4_mode = s->num_mc_mode = s->num_v1_encs = s->num_v4_encs = s->num_skips = 0;
-#endif
-
     return 0;
 
 enomem:
@@ -298,90 +264,68 @@
     av_freep(&s->strip_buf);
     av_freep(&s->frame_buf);
     av_freep(&s->mb);
-#ifdef CINEPAKENC_DEBUG
-    av_freep(&s->best_mb);
-#endif
 
-    for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
+    for (x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
         av_freep(&s->pict_bufs[x]);
 
     return AVERROR(ENOMEM);
 }
 
-static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *info, int report, int *training_set_v1_shrunk, int *training_set_v4_shrunk
-#ifdef CINEPAK_REPORT_SERR
-, int64_t *serr
-#endif
-)
+static int64_t calculate_mode_score(CinepakEncContext *s, int h,
+                                    strip_info *info, int report,
+                                    int *training_set_v1_shrunk,
+                                    int *training_set_v4_shrunk)
 {
-    //score = FF_LAMBDA_SCALE * error + lambda * bits
+    // score = FF_LAMBDA_SCALE * error + lambda * bits
     int x;
     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
-    int mb_count = s->w * h / MB_AREA;
+    int mb_count   = s->w * h / MB_AREA;
     mb_info *mb;
     int64_t score1, score2, score3;
     int64_t ret = s->lambda * ((info->v1_size ? CHUNK_HEADER_SIZE + info->v1_size * entry_size : 0) +
-                   (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
-                   CHUNK_HEADER_SIZE) << 3;
+                               (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
+                               CHUNK_HEADER_SIZE) << 3;
 
-    //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9"PRId64" score mb_count %i", info->v1_size, info->v4_size, ret, mb_count);
-
-#ifdef CINEPAK_REPORT_SERR
-    *serr = 0;
-#endif
-
-    switch(info->mode) {
+    switch (info->mode) {
     case MODE_V1_ONLY:
-        //one byte per MB
+        // one byte per MB
         ret += s->lambda * 8 * mb_count;
 
-// while calculating we assume all blocks are ENC_V1
-        for(x = 0; x < mb_count; x++) {
-            mb = &s->mb[x];
+        // while calculating we assume all blocks are ENC_V1
+        for (x = 0; x < mb_count; x++) {
+            mb   = &s->mb[x];
             ret += FF_LAMBDA_SCALE * mb->v1_error;
-#ifdef CINEPAK_REPORT_SERR
-            *serr += mb->v1_error;
-#endif
-// this function is never called for report in MODE_V1_ONLY
-//            if(!report)
+            // this function is never called for report in MODE_V1_ONLY
+            // if (!report)
             mb->best_encoding = ENC_V1;
         }
 
         break;
     case MODE_V1_V4:
-        //9 or 33 bits per MB
-        if(report) {
-// no moves between the corresponding training sets are allowed
+        // 9 or 33 bits per MB
+        if (report) {
+            // no moves between the corresponding training sets are allowed
             *training_set_v1_shrunk = *training_set_v4_shrunk = 0;
-            for(x = 0; x < mb_count; x++) {
+            for (x = 0; x < mb_count; x++) {
                 int mberr;
                 mb = &s->mb[x];
-                if(mb->best_encoding == ENC_V1)
-                    score1 = s->lambda * 9  + FF_LAMBDA_SCALE * (mberr=mb->v1_error);
+                if (mb->best_encoding == ENC_V1)
+                    score1 = s->lambda * 9 + FF_LAMBDA_SCALE * (mberr = mb->v1_error);
                 else
-                    score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr=mb->v4_error);
+                    score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr = mb->v4_error);
                 ret += score1;
-#ifdef CINEPAK_REPORT_SERR
-                *serr += mberr;
-#endif
             }
         } else { // find best mode per block
-            for(x = 0; x < mb_count; x++) {
-                mb = &s->mb[x];
-                score1 = s->lambda * 9  + FF_LAMBDA_SCALE * mb->v1_error;
+            for (x = 0; x < mb_count; x++) {
+                mb     = &s->mb[x];
+                score1 = s->lambda * 9 + FF_LAMBDA_SCALE * mb->v1_error;
                 score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error;
 
-                if(score1 <= score2) {
+                if (score1 <= score2) {
                     ret += score1;
-#ifdef CINEPAK_REPORT_SERR
-                    *serr += mb->v1_error;
-#endif
                     mb->best_encoding = ENC_V1;
                 } else {
                     ret += score2;
-#ifdef CINEPAK_REPORT_SERR
-                    *serr += mb->v4_error;
-#endif
                     mb->best_encoding = ENC_V4;
                 }
             }
@@ -389,75 +333,51 @@
 
         break;
     case MODE_MC:
-        //1, 10 or 34 bits per MB
-        if(report) {
+        // 1, 10 or 34 bits per MB
+        if (report) {
             int v1_shrunk = 0, v4_shrunk = 0;
-            for(x = 0; x < mb_count; x++) {
+            for (x = 0; x < mb_count; x++) {
                 mb = &s->mb[x];
-// it is OK to move blocks to ENC_SKIP here
-// but not to any codebook encoding!
-                score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
-                if(mb->best_encoding == ENC_SKIP) {
+                // it is OK to move blocks to ENC_SKIP here
+                // but not to any codebook encoding!
+                score1 = s->lambda * 1 + FF_LAMBDA_SCALE * mb->skip_error;
+                if (mb->best_encoding == ENC_SKIP) {
                     ret += score1;
-#ifdef CINEPAK_REPORT_SERR
-                    *serr += mb->skip_error;
-#endif
-                } else if(mb->best_encoding == ENC_V1) {
-                    if((score2=s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) {
+                } else if (mb->best_encoding == ENC_V1) {
+                    if ((score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) {
                         mb->best_encoding = ENC_SKIP;
                         ++v1_shrunk;
                         ret += score1;
-#ifdef CINEPAK_REPORT_SERR
-                        *serr += mb->skip_error;
-#endif
                     } else {
                         ret += score2;
-#ifdef CINEPAK_REPORT_SERR
-                        *serr += mb->v1_error;
-#endif
                     }
                 } else {
-                    if((score3=s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) {
+                    if ((score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) {
                         mb->best_encoding = ENC_SKIP;
                         ++v4_shrunk;
                         ret += score1;
-#ifdef CINEPAK_REPORT_SERR
-                        *serr += mb->skip_error;
-#endif
                     } else {
                         ret += score3;
-#ifdef CINEPAK_REPORT_SERR
-                        *serr += mb->v4_error;
-#endif
                     }
                 }
             }
             *training_set_v1_shrunk = v1_shrunk;
             *training_set_v4_shrunk = v4_shrunk;
         } else { // find best mode per block
-            for(x = 0; x < mb_count; x++) {
-                mb = &s->mb[x];
-                score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
+            for (x = 0; x < mb_count; x++) {
+                mb     = &s->mb[x];
+                score1 = s->lambda * 1 + FF_LAMBDA_SCALE * mb->skip_error;
                 score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error;
                 score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error;
 
-                if(score1 <= score2 && score1 <= score3) {
+                if (score1 <= score2 && score1 <= score3) {
                     ret += score1;
-#ifdef CINEPAK_REPORT_SERR
-                    *serr += mb->skip_error;
-#endif
                     mb->best_encoding = ENC_SKIP;
-                } else if(score2 <= score3) {
+                } else if (score2 <= score3) {
                     ret += score2;
-#ifdef CINEPAK_REPORT_SERR
-                    *serr += mb->v1_error;
-#endif
                     mb->best_encoding = ENC_V1;
                 } else {
                     ret += score3;
-#ifdef CINEPAK_REPORT_SERR
-                    *serr += mb->v4_error;
-#endif
                     mb->best_encoding = ENC_V4;
                 }
             }
@@ -476,123 +396,125 @@
     return CHUNK_HEADER_SIZE;
 }
 
-static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int chunk_type_yuv, int chunk_type_gray, unsigned char *buf)
+static int encode_codebook(CinepakEncContext *s, int *codebook, int size,
+                           int chunk_type_yuv, int chunk_type_gray,
+                           unsigned char *buf)
 {
     int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
     int incremental_codebook_replacement_mode = 0; // hardcoded here,
-                // the compiler should notice that this is a constant -- rl
+    // the compiler should notice that this is a constant -- rl
 
     ret = write_chunk_header(buf,
-          s->pix_fmt == AV_PIX_FMT_RGB24 ?
-           chunk_type_yuv+(incremental_codebook_replacement_mode?1:0) :
-           chunk_type_gray+(incremental_codebook_replacement_mode?1:0),
-          entry_size * size
-           + (incremental_codebook_replacement_mode?(size+31)/32*4:0) );
+                             s->pix_fmt == AV_PIX_FMT_RGB24 ?
+                             chunk_type_yuv  + (incremental_codebook_replacement_mode ? 1 : 0) :
+                             chunk_type_gray + (incremental_codebook_replacement_mode ? 1 : 0),
+                             entry_size * size +
+                             (incremental_codebook_replacement_mode ? (size + 31) / 32 * 4 : 0));
 
-// we do codebook encoding according to the "intra" mode
-// but we keep the "dead" code for reference in case we will want
-// to use incremental codebook updates (which actually would give us
-// "kind of" motion compensation, especially in 1 strip/frame case) -- rl
-// (of course, the code will be not useful as-is)
-    if(incremental_codebook_replacement_mode) {
+    // we do codebook encoding according to the "intra" mode
+    // but we keep the "dead" code for reference in case we will want
+    // to use incremental codebook updates (which actually would give us
+    // "kind of" motion compensation, especially in 1 strip/frame case) -- rl
+    // (of course, the code will be not useful as-is)
+    if (incremental_codebook_replacement_mode) {
         int flags = 0;
         int flagsind;
-        for(x = 0; x < size; x++) {
-            if(flags == 0) {
+        for (x = 0; x < size; x++) {
+            if (flags == 0) {
                 flagsind = ret;
-                ret += 4;
-                flags = 0x80000000;
+                ret     += 4;
+                flags    = 0x80000000;
             } else
-                flags = ((flags>>1) | 0x80000000);
-            for(y = 0; y < entry_size; y++)
-                buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
-            if((flags&0xffffffff) == 0xffffffff) {
+                flags = ((flags >> 1) | 0x80000000);
+            for (y = 0; y < entry_size; y++)
+                buf[ret++] = codebook[y + x * entry_size] ^ (y >= 4 ? 0x80 : 0);
+            if ((flags & 0xffffffff) == 0xffffffff) {
                 AV_WB32(&buf[flagsind], flags);
                 flags = 0;
             }
         }
-        if(flags)
+        if (flags)
             AV_WB32(&buf[flagsind], flags);
     } else
-        for(x = 0; x < size; x++)
-            for(y = 0; y < entry_size; y++)
-                buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
+        for (x = 0; x < size; x++)
+            for (y = 0; y < entry_size; y++)
+                buf[ret++] = codebook[y + x * entry_size] ^ (y >= 4 ? 0x80 : 0);
 
     return ret;
 }
 
-//sets out to the sub picture starting at (x,y) in in
+// sets out to the sub picture starting at (x,y) in in
 static void get_sub_picture(CinepakEncContext *s, int x, int y,
                             uint8_t * in_data[4], int  in_linesize[4],
                             uint8_t *out_data[4], int out_linesize[4])
 {
-    out_data[0] = in_data[0] + x + y * in_linesize[0];
+    out_data[0]     = in_data[0] + x + y * in_linesize[0];
     out_linesize[0] = in_linesize[0];
 
-    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
-        out_data[1] = in_data[1] + (x >> 1) + (y >> 1) * in_linesize[1];
+    if (s->pix_fmt == AV_PIX_FMT_RGB24) {
+        out_data[1]     = in_data[1] + (x >> 1) + (y >> 1) * in_linesize[1];
         out_linesize[1] = in_linesize[1];
 
-        out_data[2] = in_data[2] + (x >> 1) + (y >> 1) * in_linesize[2];
+        out_data[2]     = in_data[2] + (x >> 1) + (y >> 1) * in_linesize[2];
         out_linesize[2] = in_linesize[2];
     }
 }
 
-//decodes the V1 vector in mb into the 4x4 MB pointed to by data
+// decodes the V1 vector in mb into the 4x4 MB pointed to by data
 static void decode_v1_vector(CinepakEncContext *s, uint8_t *data[4],
                              int linesize[4], int v1_vector, strip_info *info)
 {
     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 
     data[0][0] =
-            data[0][1] =
-            data[0][    linesize[0]] =
-            data[0][1+  linesize[0]] = info->v1_codebook[v1_vector*entry_size];
+    data[0][1] =
+    data[0][    linesize[0]] =
+    data[0][1 + linesize[0]] = info->v1_codebook[v1_vector * entry_size];
 
     data[0][2] =
-            data[0][3] =
-            data[0][2+  linesize[0]] =
-            data[0][3+  linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
+    data[0][3] =
+    data[0][2 + linesize[0]] =
+    data[0][3 + linesize[0]] = info->v1_codebook[v1_vector * entry_size + 1];
 
-    data[0][2*linesize[0]] =
-            data[0][1+2*linesize[0]] =
-            data[0][  3*linesize[0]] =
-            data[0][1+3*linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
+    data[0][    2 * linesize[0]] =
+    data[0][1 + 2 * linesize[0]] =
+    data[0][    3 * linesize[0]] =
+    data[0][1 + 3 * linesize[0]] = info->v1_codebook[v1_vector * entry_size + 2];
 
-    data[0][2+2*linesize[0]] =
-            data[0][3+2*linesize[0]] =
-            data[0][2+3*linesize[0]] =
-            data[0][3+3*linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
+    data[0][2 + 2 * linesize[0]] =
+    data[0][3 + 2 * linesize[0]] =
+    data[0][2 + 3 * linesize[0]] =
+    data[0][3 + 3 * linesize[0]] = info->v1_codebook[v1_vector * entry_size + 3];
 
-    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
+    if (s->pix_fmt == AV_PIX_FMT_RGB24) {
         data[1][0] =
-            data[1][1] =
-            data[1][    linesize[1]] =
-            data[1][1+  linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
+        data[1][1] =
+        data[1][    linesize[1]] =
+        data[1][1 + linesize[1]] = info->v1_codebook[v1_vector * entry_size + 4];
 
         data[2][0] =
-            data[2][1] =
-            data[2][    linesize[2]] =
-            data[2][1+  linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
+        data[2][1] =
+        data[2][    linesize[2]] =
+        data[2][1 + linesize[2]] = info->v1_codebook[v1_vector * entry_size + 5];
     }
 }
 
-//decodes the V4 vectors in mb into the 4x4 MB pointed to by data
+// decodes the V4 vectors in mb into the 4x4 MB pointed to by data
 static void decode_v4_vector(CinepakEncContext *s, uint8_t *data[4],
                              int linesize[4], int *v4_vector, strip_info *info)
 {
     int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 
-    for(i = y = 0; y < 4; y += 2) {
-        for(x = 0; x < 4; x += 2, i++) {
-            data[0][x   +     y*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size];
-            data[0][x+1 +     y*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+1];
-            data[0][x   + (y+1)*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
-            data[0][x+1 + (y+1)*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
+    for (i = y = 0; y < 4; y += 2) {
+        for (x = 0; x < 4; x += 2, i++) {
+            data[0][x     +  y      * linesize[0]] = info->v4_codebook[v4_vector[i] * entry_size];
+            data[0][x + 1 +  y      * linesize[0]] = info->v4_codebook[v4_vector[i] * entry_size + 1];
+            data[0][x     + (y + 1) * linesize[0]] = info->v4_codebook[v4_vector[i] * entry_size + 2];
+            data[0][x + 1 + (y + 1) * linesize[0]] = info->v4_codebook[v4_vector[i] * entry_size + 3];
 
-            if(s->pix_fmt == AV_PIX_FMT_RGB24) {
-                data[1][(x>>1) + (y>>1)*linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
-                data[2][(x>>1) + (y>>1)*linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
+            if (s->pix_fmt == AV_PIX_FMT_RGB24) {
+                data[1][(x >> 1) + (y >> 1) * linesize[1]] = info->v4_codebook[v4_vector[i] * entry_size + 4];
+                data[2][(x >> 1) + (y >> 1) * linesize[2]] = info->v4_codebook[v4_vector[i] * entry_size + 5];
             }
         }
     }
@@ -604,19 +526,16 @@
 {
     int y, p;
 
-    for(y = 0; y < MB_SIZE; y++) {
-        memcpy(a_data[0]+y*a_linesize[0], b_data[0]+y*b_linesize[0],
+    for (y = 0; y < MB_SIZE; y++)
+        memcpy(a_data[0] + y * a_linesize[0], b_data[0] + y * b_linesize[0],
                MB_SIZE);
-    }
 
-    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
-        for(p = 1; p <= 2; p++) {
-            for(y = 0; y < MB_SIZE/2; y++) {
-                memcpy(a_data[p] + y*a_linesize[p],
-                       b_data[p] + y*b_linesize[p],
-                       MB_SIZE/2);
-            }
-        }
+    if (s->pix_fmt == AV_PIX_FMT_RGB24) {
+        for (p = 1; p <= 2; p++)
+            for (y = 0; y < MB_SIZE / 2; y++)
+                memcpy(a_data[p] + y * a_linesize[p],
+                       b_data[p] + y * b_linesize[p],
+                       MB_SIZE / 2);
     }
 }
 
@@ -627,74 +546,71 @@
 {
     int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA;
     int needs_extra_bit, should_write_temp;
-    unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B
+    unsigned char temp[64]; // 32/2 = 16 V4 blocks at 4 B each -> 64 B
     mb_info *mb;
-    uint8_t *sub_scratch_data[4] = {0}, *sub_last_data[4] = {0};
-    int sub_scratch_linesize[4] = {0}, sub_last_linesize[4] = {0};
+    uint8_t *sub_scratch_data[4] = { 0 }, *sub_last_data[4] = { 0 };
+    int sub_scratch_linesize[4] = { 0 }, sub_last_linesize[4] = { 0 };
 
-    //encode codebooks
-////// MacOS vintage decoder compatibility dictates the presence of
-////// the codebook chunk even when the codebook is empty - pretty dumb...
-////// and also the certain order of the codebook chunks -- rl
-    if(info->v4_size || !s->skip_empty_cb)
+    // encode codebooks
+    ////// MacOS vintage decoder compatibility dictates the presence of
+    ////// the codebook chunk even when the codebook is empty - pretty dumb...
+    ////// and also the certain order of the codebook chunks -- rl
+    if (info->v4_size || !s->skip_empty_cb)
         ret += encode_codebook(s, info->v4_codebook, info->v4_size, 0x20, 0x24, buf + ret);
 
-    if(info->v1_size || !s->skip_empty_cb)
+    if (info->v1_size || !s->skip_empty_cb)
         ret += encode_codebook(s, info->v1_codebook, info->v1_size, 0x22, 0x26, buf + ret);
 
-    //update scratch picture
-    for(z = y = 0; y < h; y += MB_SIZE) {
-        for(x = 0; x < s->w; x += MB_SIZE, z++) {
+    // update scratch picture
+    for (z = y = 0; y < h; y += MB_SIZE)
+        for (x = 0; x < s->w; x += MB_SIZE, z++) {
             mb = &s->mb[z];
 
             get_sub_picture(s, x, y, scratch_data, scratch_linesize,
                             sub_scratch_data, sub_scratch_linesize);
 
-            if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
-                get_sub_picture(s, x, y,
-                                last_data, last_linesize,
+            if (info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
+                get_sub_picture(s, x, y, last_data, last_linesize,
                                 sub_last_data, sub_last_linesize);
                 copy_mb(s, sub_scratch_data, sub_scratch_linesize,
                         sub_last_data, sub_last_linesize);
-            } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
+            } else if (info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
                 decode_v1_vector(s, sub_scratch_data, sub_scratch_linesize,
                                  mb->v1_vector, info);
             else
                 decode_v4_vector(s, sub_scratch_data, sub_scratch_linesize,
                                  mb->v4_vector, info);
         }
-    }
 
-    switch(info->mode) {
+    switch (info->mode) {
     case MODE_V1_ONLY:
-        //av_log(s->avctx, AV_LOG_INFO, "mb_count = %i\n", mb_count);
         ret += write_chunk_header(buf + ret, 0x32, mb_count);
 
-        for(x = 0; x < mb_count; x++)
+        for (x = 0; x < mb_count; x++)
             buf[ret++] = s->mb[x].v1_vector;
 
         break;
     case MODE_V1_V4:
-        //remember header position
+        // remember header position
         header_ofs = ret;
-        ret += CHUNK_HEADER_SIZE;
+        ret       += CHUNK_HEADER_SIZE;
 
-        for(x = 0; x < mb_count; x += 32) {
+        for (x = 0; x < mb_count; x += 32) {
             flags = 0;
-            for(y = x; y < FFMIN(x+32, mb_count); y++)
-                if(s->mb[y].best_encoding == ENC_V4)
+            for (y = x; y < FFMIN(x + 32, mb_count); y++)
+                if (s->mb[y].best_encoding == ENC_V4)
                     flags |= 1 << (31 - y + x);
 
             AV_WB32(&buf[ret], flags);
             ret += 4;
 
-            for(y = x; y < FFMIN(x+32, mb_count); y++) {
+            for (y = x; y < FFMIN(x + 32, mb_count); y++) {
                 mb = &s->mb[y];
 
-                if(mb->best_encoding == ENC_V1)
+                if (mb->best_encoding == ENC_V1)
                     buf[ret++] = mb->v1_vector;
                 else
-                    for(z = 0; z < 4; z++)
+                    for (z = 0; z < 4; z++)
                         buf[ret++] = mb->v4_vector[z];
             }
         }
@@ -703,56 +619,56 @@
 
         break;
     case MODE_MC:
-        //remember header position
+        // remember header position
         header_ofs = ret;
-        ret += CHUNK_HEADER_SIZE;
-        flags = bits = temp_size = 0;
+        ret       += CHUNK_HEADER_SIZE;
+        flags      = bits = temp_size = 0;
 
-        for(x = 0; x < mb_count; x++) {
-            mb = &s->mb[x];
-            flags |= (mb->best_encoding != ENC_SKIP) << (31 - bits++);
-            needs_extra_bit = 0;
+        for (x = 0; x < mb_count; x++) {
+            mb                = &s->mb[x];
+            flags            |= (mb->best_encoding != ENC_SKIP) << (31 - bits++);
+            needs_extra_bit   = 0;
             should_write_temp = 0;
 
-            if(mb->best_encoding != ENC_SKIP) {
-                if(bits < 32)
+            if (mb->best_encoding != ENC_SKIP) {
+                if (bits < 32)
                     flags |= (mb->best_encoding == ENC_V4) << (31 - bits++);
                 else
                     needs_extra_bit = 1;
             }
 
-            if(bits == 32) {
+            if (bits == 32) {
                 AV_WB32(&buf[ret], flags);
-                ret += 4;
+                ret  += 4;
                 flags = bits = 0;
 
-                if(mb->best_encoding == ENC_SKIP || needs_extra_bit) {
+                if (mb->best_encoding == ENC_SKIP || needs_extra_bit) {
                     memcpy(&buf[ret], temp, temp_size);
-                    ret += temp_size;
+                    ret      += temp_size;
                     temp_size = 0;
                 } else
                     should_write_temp = 1;
             }
 
-            if(needs_extra_bit) {
+            if (needs_extra_bit) {
                 flags = (mb->best_encoding == ENC_V4) << 31;
-                bits = 1;
+                bits  = 1;
             }
 
-            if(mb->best_encoding == ENC_V1)
+            if (mb->best_encoding == ENC_V1)
                 temp[temp_size++] = mb->v1_vector;
-            else if(mb->best_encoding == ENC_V4)
-                for(z = 0; z < 4; z++)
+            else if (mb->best_encoding == ENC_V4)
+                for (z = 0; z < 4; z++)
                     temp[temp_size++] = mb->v4_vector[z];
 
-            if(should_write_temp) {
+            if (should_write_temp) {
                 memcpy(&buf[ret], temp, temp_size);
-                ret += temp_size;
+                ret      += temp_size;
                 temp_size = 0;
             }
         }
 
-        if(bits > 0) {
+        if (bits > 0) {
             AV_WB32(&buf[ret], flags);
             ret += 4;
             memcpy(&buf[ret], temp, temp_size);
@@ -767,28 +683,26 @@
     return ret;
 }
 
-//computes distortion of 4x4 MB in b compared to a
+// computes distortion of 4x4 MB in b compared to a
 static int compute_mb_distortion(CinepakEncContext *s,
                                  uint8_t *a_data[4], int a_linesize[4],
                                  uint8_t *b_data[4], int b_linesize[4])
 {
     int x, y, p, d, ret = 0;
 
-    for(y = 0; y < MB_SIZE; y++) {
-        for(x = 0; x < MB_SIZE; x++) {
-            d = a_data[0][x + y*a_linesize[0]] - b_data[0][x + y*b_linesize[0]];
-            ret += d*d;
+    for (y = 0; y < MB_SIZE; y++)
+        for (x = 0; x < MB_SIZE; x++) {
+            d = a_data[0][x + y * a_linesize[0]] - b_data[0][x + y * b_linesize[0]];
+            ret += d * d;
         }
-    }
 
-    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
-        for(p = 1; p <= 2; p++) {
-            for(y = 0; y < MB_SIZE/2; y++) {
-                for(x = 0; x < MB_SIZE/2; x++) {
-                    d = a_data[p][x + y*a_linesize[p]] - b_data[p][x + y*b_linesize[p]];
-                    ret += d*d;
+    if (s->pix_fmt == AV_PIX_FMT_RGB24) {
+        for (p = 1; p <= 2; p++) {
+            for (y = 0; y < MB_SIZE / 2; y++)
+                for (x = 0; x < MB_SIZE / 2; x++) {
+                    d = a_data[p][x + y * a_linesize[p]] - b_data[p][x + y * b_linesize[p]];
+                    ret += d * d;
                 }
-            }
         }
     }
 
@@ -796,114 +710,108 @@
 }
 
 // return the possibly adjusted size of the codebook
-#define CERTAIN(x) ((x)!=ENC_UNCERTAIN)
-static int quantize(CinepakEncContext *s, int h,
-                    uint8_t *data[4], int linesize[4],
-                    int v1mode, strip_info *info,
+#define CERTAIN(x) ((x) != ENC_UNCERTAIN)
+static int quantize(CinepakEncContext *s, int h, uint8_t *data[4],
+                    int linesize[4], int v1mode, strip_info *info,
                     mb_encoding encoding)
 {
     int x, y, i, j, k, x2, y2, x3, y3, plane, shift, mbn;
-    int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
-    int *codebook = v1mode ? info->v1_codebook : info->v4_codebook;
-    int size = v1mode ? info->v1_size : info->v4_size;
+    int entry_size      = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
+    int *codebook       = v1mode ? info->v1_codebook : info->v4_codebook;
+    int size            = v1mode ? info->v1_size : info->v4_size;
     int64_t total_error = 0;
-    uint8_t vq_pict_buf[(MB_AREA*3)/2];
-    uint8_t *sub_data    [4], *vq_data    [4];
+    uint8_t vq_pict_buf[(MB_AREA * 3) / 2];
+    uint8_t     *sub_data[4],     *vq_data[4];
     int      sub_linesize[4],  vq_linesize[4];
 
-    for(mbn = i = y = 0; y < h; y += MB_SIZE) {
-        for(x = 0; x < s->w; x += MB_SIZE, ++mbn) {
+    for (mbn = i = y = 0; y < h; y += MB_SIZE) {
+        for (x = 0; x < s->w; x += MB_SIZE, ++mbn) {
             int *base;
 
-            if(CERTAIN(encoding)) {
-// use for the training only the blocks known to be to be encoded [sic:-]
-               if(s->mb[mbn].best_encoding != encoding) continue;
+            if (CERTAIN(encoding)) {
+                // use for the training only the blocks known to be to be encoded [sic:-]
+                if (s->mb[mbn].best_encoding != encoding)
+                    continue;
             }
 
-            base = s->codebook_input + i*entry_size;
-            if(v1mode) {
-                //subsample
-                for(j = y2 = 0; y2 < entry_size; y2 += 2) {
-                    for(x2 = 0; x2 < 4; x2 += 2, j++) {
-                        plane = y2 < 4 ? 0 : 1 + (x2 >> 1);
-                        shift = y2 < 4 ? 0 : 1;
-                        x3 = shift ? 0 : x2;
-                        y3 = shift ? 0 : y2;
-                        base[j] = (data[plane][((x+x3) >> shift) +      ((y+y3) >> shift)      * linesize[plane]] +
-                                   data[plane][((x+x3) >> shift) + 1 +  ((y+y3) >> shift)      * linesize[plane]] +
-                                   data[plane][((x+x3) >> shift) +     (((y+y3) >> shift) + 1) * linesize[plane]] +
-                                   data[plane][((x+x3) >> shift) + 1 + (((y+y3) >> shift) + 1) * linesize[plane]]) >> 2;
+            base = s->codebook_input + i * entry_size;
+            if (v1mode) {
+                // subsample
+                for (j = y2 = 0; y2 < entry_size; y2 += 2)
+                    for (x2 = 0; x2 < 4; x2 += 2, j++) {
+                        plane   = y2 < 4 ? 0 : 1 + (x2 >> 1);
+                        shift   = y2 < 4 ? 0 : 1;
+                        x3      = shift ? 0 : x2;
+                        y3      = shift ? 0 : y2;
+                        base[j] = (data[plane][((x + x3) >> shift) +      ((y + y3) >> shift)      * linesize[plane]] +
+                                   data[plane][((x + x3) >> shift) + 1 +  ((y + y3) >> shift)      * linesize[plane]] +
+                                   data[plane][((x + x3) >> shift) +     (((y + y3) >> shift) + 1) * linesize[plane]] +
+                                   data[plane][((x + x3) >> shift) + 1 + (((y + y3) >> shift) + 1) * linesize[plane]]) >> 2;
                     }
-                }
             } else {
-                //copy
-                for(j = y2 = 0; y2 < MB_SIZE; y2 += 2) {
-                    for(x2 = 0; x2 < MB_SIZE; x2 += 2) {
-                        for(k = 0; k < entry_size; k++, j++) {
+                // copy
+                for (j = y2 = 0; y2 < MB_SIZE; y2 += 2) {
+                    for (x2 = 0; x2 < MB_SIZE; x2 += 2)
+                        for (k = 0; k < entry_size; k++, j++) {
                             plane = k >= 4 ? k - 3 : 0;
 
-                            if(k >= 4) {
-                                x3 = (x+x2) >> 1;
-                                y3 = (y+y2) >> 1;
+                            if (k >= 4) {
+                                x3 = (x + x2) >> 1;
+                                y3 = (y + y2) >> 1;
                             } else {
                                 x3 = x + x2 + (k & 1);
                                 y3 = y + y2 + (k >> 1);
                             }
 
-                            base[j] = data[plane][x3 + y3*linesize[plane]];
+                            base[j] = data[plane][x3 + y3 * linesize[plane]];
                         }
-                    }
                 }
             }
             i += v1mode ? 1 : 4;
         }
     }
-//    if(i < mbn*(v1mode ? 1 : 4)) {
-//        av_log(s->avctx, AV_LOG_INFO, "reducing training set for %s from %i to %i (encoding %i)\n", v1mode?"v1":"v4", mbn*(v1mode ? 1 : 4), i, encoding);
-//    }
 
-    if(i == 0) // empty training set, nothing to do
+    if (i == 0) // empty training set, nothing to do
         return 0;
-    if(i < size) {
-        //av_log(s->avctx, (CERTAIN(encoding) ? AV_LOG_ERROR : AV_LOG_INFO), "WOULD WASTE: %s cbsize %i bigger than training set size %i (encoding %i)\n", v1mode?"v1":"v4", size, i, encoding);
+    if (i < size)
         size = i;
-    }
 
     avpriv_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
     avpriv_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
 
-    //setup vq_data, which contains a single MB
-    vq_data[0] = vq_pict_buf;
+    // set up vq_data, which contains a single MB
+    vq_data[0]     = vq_pict_buf;
     vq_linesize[0] = MB_SIZE;
-    vq_data[1] = &vq_pict_buf[MB_AREA];
-    vq_data[2] = vq_data[1] + (MB_AREA >> 2);
-    vq_linesize[1] = vq_linesize[2] = MB_SIZE >> 1;
+    vq_data[1]     = &vq_pict_buf[MB_AREA];
+    vq_data[2]     = vq_data[1] + (MB_AREA >> 2);
+    vq_linesize[1] =
+    vq_linesize[2] = MB_SIZE >> 1;
 
-    //copy indices
-    for(i = j = y = 0; y < h; y += MB_SIZE) {
-        for(x = 0; x < s->w; x += MB_SIZE, j++) {
+    // copy indices
+    for (i = j = y = 0; y < h; y += MB_SIZE)
+        for (x = 0; x < s->w; x += MB_SIZE, j++) {
             mb_info *mb = &s->mb[j];
-// skip uninteresting blocks if we know their preferred encoding
-            if(CERTAIN(encoding) && mb->best_encoding != encoding)
+            // skip uninteresting blocks if we know their preferred encoding
+            if (CERTAIN(encoding) && mb->best_encoding != encoding)
                 continue;
 
-            //point sub_data to current MB
+            // point sub_data to current MB
             get_sub_picture(s, x, y, data, linesize, sub_data, sub_linesize);
 
-            if(v1mode) {
+            if (v1mode) {
                 mb->v1_vector = s->codebook_closest[i];
 
-                //fill in vq_data with V1 data
+                // fill in vq_data with V1 data
                 decode_v1_vector(s, vq_data, vq_linesize, mb->v1_vector, info);
 
                 mb->v1_error = compute_mb_distortion(s, sub_data, sub_linesize,
                                                      vq_data, vq_linesize);
                 total_error += mb->v1_error;
             } else {
-                for(k = 0; k < 4; k++)
-                    mb->v4_vector[k] = s->codebook_closest[i+k];
+                for (k = 0; k < 4; k++)
+                    mb->v4_vector[k] = s->codebook_closest[i + k];
 
-                //fill in vq_data with V4 data
+                // fill in vq_data with V4 data
                 decode_v4_vector(s, vq_data, vq_linesize, mb->v4_vector, info);
 
                 mb->v4_error = compute_mb_distortion(s, sub_data, sub_linesize,
@@ -912,12 +820,9 @@
             }
             i += v1mode ? 1 : 4;
         }
-    }
-// check that we did it right in the beginning of the function
+    // check that we did it right in the beginning of the function
     av_assert0(i >= size); // training set is no smaller than the codebook
 
-    //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %"PRId64"\n", v1mode, size, i, total_error);
-
     return size;
 }
 
@@ -930,221 +835,160 @@
     uint8_t *sub_last_data    [4], *sub_pict_data    [4];
     int      sub_last_linesize[4],  sub_pict_linesize[4];
 
-    for(i = y = 0; y < h; y += MB_SIZE) {
-        for(x = 0; x < s->w; x += MB_SIZE, i++) {
-            get_sub_picture(s, x, y, last_data,     last_linesize,
-                                 sub_last_data, sub_last_linesize);
-            get_sub_picture(s, x, y,      data,          linesize,
-                                 sub_pict_data, sub_pict_linesize);
+    for (i = y = 0; y < h; y += MB_SIZE)
+        for (x = 0; x < s->w; x += MB_SIZE, i++) {
+            get_sub_picture(s, x, y, last_data, last_linesize,
+                            sub_last_data, sub_last_linesize);
+            get_sub_picture(s, x, y, data, linesize,
+                            sub_pict_data, sub_pict_linesize);
 
-            s->mb[i].skip_error = compute_mb_distortion(s,
-                                            sub_last_data, sub_last_linesize,
-                                            sub_pict_data, sub_pict_linesize);
+            s->mb[i].skip_error =
+                compute_mb_distortion(s,
+                                      sub_last_data, sub_last_linesize,
+                                      sub_pict_data, sub_pict_linesize);
         }
-    }
 }
 
-static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe, unsigned char *buf, int strip_size)
+static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe,
+                               unsigned char *buf, int strip_size)
 {
-// actually we are exclusively using intra strip coding (how much can we win
-// otherwise? how to choose which part of a codebook to update?),
-// keyframes are different only because we disallow ENC_SKIP on them -- rl
-// (besides, the logic here used to be inverted: )
-//    buf[0] = keyframe ? 0x11: 0x10;
-    buf[0] = keyframe ? 0x10: 0x11;
+    // actually we are exclusively using intra strip coding (how much can we win
+    // otherwise? how to choose which part of a codebook to update?),
+    // keyframes are different only because we disallow ENC_SKIP on them -- rl
+    // (besides, the logic here used to be inverted: )
+    //    buf[0] = keyframe ? 0x11: 0x10;
+    buf[0] = keyframe ? 0x10 : 0x11;
     AV_WB24(&buf[1], strip_size + STRIP_HEADER_SIZE);
-//    AV_WB16(&buf[4], y); /* using absolute y values works -- rl */
+    // AV_WB16(&buf[4], y); /* using absolute y values works -- rl */
     AV_WB16(&buf[4], 0); /* using relative values works as well -- rl */
     AV_WB16(&buf[6], 0);
-//    AV_WB16(&buf[8], y+h); /* using absolute y values works -- rl */
+    // AV_WB16(&buf[8], y + h); /* using absolute y values works -- rl */
     AV_WB16(&buf[8], h); /* using relative values works as well -- rl */
     AV_WB16(&buf[10], s->w);
-    //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe);
 }
 
 static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe,
                     uint8_t *last_data[4], int last_linesize[4],
                     uint8_t *data[4], int linesize[4],
                     uint8_t *scratch_data[4], int scratch_linesize[4],
-                    unsigned char *buf, int64_t *best_score
-#ifdef CINEPAK_REPORT_SERR
-, int64_t *best_serr
-#endif
-)
+                    unsigned char *buf, int64_t *best_score)
 {
     int64_t score = 0;
-#ifdef CINEPAK_REPORT_SERR
-    int64_t serr;
-#endif
     int best_size = 0;
     strip_info info;
-// for codebook optimization:
+    // for codebook optimization:
     int v1enough, v1_size, v4enough, v4_size;
     int new_v1_size, new_v4_size;
     int v1shrunk, v4shrunk;
 
-    if(!keyframe)
+    if (!keyframe)
         calculate_skip_errors(s, h, last_data, last_linesize, data, linesize,
                               &info);
 
-    //try some powers of 4 for the size of the codebooks
-    //constraint the v4 codebook to be no bigger than v1 one,
-    //(and no less than v1_size/4)
-    //thus making v1 preferable and possibly losing small details? should be ok
+    // try some powers of 4 for the size of the codebooks
+    // constraint the v4 codebook to be no bigger than v1 one,
+    // (and no less than v1_size/4)
+    // thus making v1 preferable and possibly losing small details? should be ok
 #define SMALLEST_CODEBOOK 1
-    for(v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) {
-        for(v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) {
-            //try all modes
-            for(CinepakMode mode = 0; mode < MODE_COUNT; mode++) {
-                //don't allow MODE_MC in intra frames
-                if(keyframe && mode == MODE_MC)
+    for (v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) {
+        for (v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) {
+            CinepakMode mode;
+            // try all modes
+            for (mode = 0; mode < MODE_COUNT; mode++) {
+                // don't allow MODE_MC in intra frames
+                if (keyframe && mode == MODE_MC)
                     continue;
 
-                if(mode == MODE_V1_ONLY) {
+                if (mode == MODE_V1_ONLY) {
                     info.v1_size = v1_size;
-// the size may shrink even before optimizations if the input is short:
+                    // the size may shrink even before optimizations if the input is short:
                     info.v1_size = quantize(s, h, data, linesize, 1,
                                             &info, ENC_UNCERTAIN);
-                    if(info.v1_size < v1_size)
-// too few eligible blocks, no sense in trying bigger sizes
+                    if (info.v1_size < v1_size)
+                        // too few eligible blocks, no sense in trying bigger sizes
                         v1enough = 1;
 
                     info.v4_size = 0;
                 } else { // mode != MODE_V1_ONLY
                     // if v4 codebook is empty then only allow V1-only mode
-                    if(!v4_size)
+                    if (!v4_size)
                         continue;
 
-                    if(mode == MODE_V1_V4) {
+                    if (mode == MODE_V1_V4) {
                         info.v4_size = v4_size;
                         info.v4_size = quantize(s, h, data, linesize, 0,
                                                 &info, ENC_UNCERTAIN);
-                        if(info.v4_size < v4_size)
-// too few eligible blocks, no sense in trying bigger sizes
+                        if (info.v4_size < v4_size)
+                            // too few eligible blocks, no sense in trying bigger sizes
                             v4enough = 1;
                     }
                 }
 
                 info.mode = mode;
-// choose the best encoding per block, based on current experience
+                // choose the best encoding per block, based on current experience
                 score = calculate_mode_score(s, h, &info, 0,
-                                             &v1shrunk, &v4shrunk
-#ifdef CINEPAK_REPORT_SERR
-, &serr
-#endif
-);
+                                             &v1shrunk, &v4shrunk);
 
-                if(mode != MODE_V1_ONLY){
+                if (mode != MODE_V1_ONLY) {
                     int extra_iterations_limit = s->max_extra_cb_iterations;
-// recompute the codebooks, omitting the extra blocks
-// we assume we _may_ come here with more blocks to encode than before
+                    // recompute the codebooks, omitting the extra blocks
+                    // we assume we _may_ come here with more blocks to encode than before
                     info.v1_size = v1_size;
                     new_v1_size = quantize(s, h, data, linesize, 1, &info, ENC_V1);
-                    if(new_v1_size < info.v1_size){
-                        //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
+                    if (new_v1_size < info.v1_size)
                         info.v1_size = new_v1_size;
-                    }
-// we assume we _may_ come here with more blocks to encode than before
+                    // we assume we _may_ come here with more blocks to encode than before
                     info.v4_size = v4_size;
                     new_v4_size = quantize(s, h, data, linesize, 0, &info, ENC_V4);
-                    if(new_v4_size < info.v4_size) {
-                        //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size);
+                    if (new_v4_size < info.v4_size)
                         info.v4_size = new_v4_size;
-                    }
-// calculate the resulting score
-// (do not move blocks to codebook encodings now, as some blocks may have
-// got bigger errors despite a smaller training set - but we do not
-// ever grow the training sets back)
-                    for(;;) {
+                    // calculate the resulting score
+                    // (do not move blocks to codebook encodings now, as some blocks may have
+                    // got bigger errors despite a smaller training set - but we do not
+                    // ever grow the training sets back)
+                    for (;;) {
                         score = calculate_mode_score(s, h, &info, 1,
-                                                     &v1shrunk, &v4shrunk
-#ifdef CINEPAK_REPORT_SERR
-, &serr
-#endif
-);
-// do we have a reason to reiterate? if so, have we reached the limit?
-                        if((!v1shrunk && !v4shrunk) || !extra_iterations_limit--) break;
-// recompute the codebooks, omitting the extra blocks
-                        if(v1shrunk) {
+                                                     &v1shrunk, &v4shrunk);
+                        // do we have a reason to reiterate? if so, have we reached the limit?
+                        if ((!v1shrunk && !v4shrunk) || !extra_iterations_limit--)
+                            break;
+                        // recompute the codebooks, omitting the extra blocks
+                        if (v1shrunk) {
                             info.v1_size = v1_size;
                             new_v1_size = quantize(s, h, data, linesize, 1, &info, ENC_V1);
-                            if(new_v1_size < info.v1_size){
-                                //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
+                            if (new_v1_size < info.v1_size)
                                 info.v1_size = new_v1_size;
-                            }
                         }
-                        if(v4shrunk) {
+                        if (v4shrunk) {
                             info.v4_size = v4_size;
                             new_v4_size = quantize(s, h, data, linesize, 0, &info, ENC_V4);
-                            if(new_v4_size < info.v4_size) {
-                                //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size);
+                            if (new_v4_size < info.v4_size)
                                 info.v4_size = new_v4_size;
-                            }
                         }
                     }
                 }
 
-                //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %"PRId64"\n", v1_size, v4_size, score);
-
-                if(best_size == 0 || score < *best_score) {
-
+                if (best_size == 0 || score < *best_score) {
                     *best_score = score;
-#ifdef CINEPAK_REPORT_SERR
-                    *best_serr = serr;
-#endif
                     best_size = encode_mode(s, h,
                                             scratch_data, scratch_linesize,
                                             last_data, last_linesize, &info,
                                             s->strip_buf + STRIP_HEADER_SIZE);
 
-                    //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B", mode, info.v1_size, info.v4_size, score, best_size);
-                    //av_log(s->avctx, AV_LOG_INFO, "\n");
-#ifdef CINEPAK_REPORT_SERR
-                    av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B\n", mode, v1_size, v4_size, serr, best_size);
-#endif
-
-#ifdef CINEPAKENC_DEBUG
-                    //save MB encoding choices
-                    memcpy(s->best_mb, s->mb, mb_count*sizeof(mb_info));
-#endif
-
-                    //memcpy(strip_temp + STRIP_HEADER_SIZE, strip_temp, best_size);
                     write_strip_header(s, y, h, keyframe, s->strip_buf, best_size);
-
                 }
             }
         }
     }
 
-#ifdef CINEPAKENC_DEBUG
-    //gather stats. this will only work properly of MAX_STRIPS == 1
-    if(best_info.mode == MODE_V1_ONLY) {
-        s->num_v1_mode++;
-        s->num_v1_encs += s->w*h/MB_AREA;
-    } else {
-        if(best_info.mode == MODE_V1_V4)
-            s->num_v4_mode++;
-        else
-            s->num_mc_mode++;
-
-        int x;
-        for(x = 0; x < s->w*h/MB_AREA; x++)
-            if(s->best_mb[x].best_encoding == ENC_V1)
-                s->num_v1_encs++;
-            else if(s->best_mb[x].best_encoding == ENC_V4)
-                s->num_v4_encs++;
-            else
-                s->num_skips++;
-    }
-#endif
-
     best_size += STRIP_HEADER_SIZE;
     memcpy(buf, s->strip_buf, best_size);
 
     return best_size;
 }
 
-static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size, int isakeyframe)
+static int write_cvid_header(CinepakEncContext *s, unsigned char *buf,
+                             int num_strips, int data_size, int isakeyframe)
 {
     buf[0] = isakeyframe ? 0 : 1;
     AV_WB24(&buf[1], data_size + CVID_HEADER_SIZE);
@@ -1158,91 +1002,93 @@
 static int rd_frame(CinepakEncContext *s, const AVFrame *frame,
                     int isakeyframe, unsigned char *buf, int buf_size)
 {
-    int num_strips, strip, i, y, nexty, size, temp_size;
+    int num_strips, strip, i, y, nexty, size, temp_size, best_size;
     uint8_t *last_data    [4], *data    [4], *scratch_data    [4];
     int      last_linesize[4],  linesize[4],  scratch_linesize[4];
     int64_t best_score = 0, score, score_temp;
-#ifdef CINEPAK_REPORT_SERR
-    int64_t best_serr = 0, serr, serr_temp;
-#endif
+    int best_nstrips;
 
-    int best_nstrips = -1, best_size = -1; // mark as uninitialzed
-
-    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
+    if (s->pix_fmt == AV_PIX_FMT_RGB24) {
         int x;
-// build a copy of the given frame in the correct colorspace
-        for(y = 0; y < s->h; y += 2) {
-            for(x = 0; x < s->w; x += 2) {
-                uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb;
-                ir[0] = frame->data[0] + x*3 + y*frame->linesize[0];
+        // build a copy of the given frame in the correct colorspace
+        for (y = 0; y < s->h; y += 2)
+            for (x = 0; x < s->w; x += 2) {
+                uint8_t *ir[2];
+                int32_t r, g, b, rr, gg, bb;
+                ir[0] = frame->data[0] + x * 3 + y * frame->linesize[0];
                 ir[1] = ir[0] + frame->linesize[0];
                 get_sub_picture(s, x, y,
                                 s->input_frame->data, s->input_frame->linesize,
                                 scratch_data, scratch_linesize);
                 r = g = b = 0;
-                for(i=0; i<4; ++i) {
+                for (i = 0; i < 4; ++i) {
                     int i1, i2;
-                    i1 = (i&1); i2 = (i>=2);
-                    rr = ir[i2][i1*3+0];
-                    gg = ir[i2][i1*3+1];
-                    bb = ir[i2][i1*3+2];
-                    r += rr; g += gg; b += bb;
-// using fixed point arithmetic for portable repeatability, scaling by 2^23
-// "Y"
-//                    rr = 0.2857*rr + 0.5714*gg + 0.1429*bb;
-                    rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23;
-                    if(      rr <   0) rr =   0;
-                    else if (rr > 255) rr = 255;
-                    scratch_data[0][i1 + i2*scratch_linesize[0]] = rr;
+                    i1 = (i & 1);
+                    i2 = (i >= 2);
+                    rr = ir[i2][i1 * 3 + 0];
+                    gg = ir[i2][i1 * 3 + 1];
+                    bb = ir[i2][i1 * 3 + 2];
+                    r += rr;
+                    g += gg;
+                    b += bb;
+                    // using fixed point arithmetic for portable repeatability, scaling by 2^23
+                    // "Y"
+                    // rr = 0.2857 * rr + 0.5714 * gg + 0.1429 * bb;
+                    rr = (2396625 * rr + 4793251 * gg + 1198732 * bb) >> 23;
+                    if (rr < 0)
+                        rr = 0;
+                    else if (rr > 255)
+                        rr = 255;
+                    scratch_data[0][i1 + i2 * scratch_linesize[0]] = rr;
                 }
-// let us scale down as late as possible
-//                r /= 4; g /= 4; b /= 4;
-// "U"
-//                rr = -0.1429*r - 0.2857*g + 0.4286*b;
-                rr = (-299683*r - 599156*g + 898839*b) >> 23;
-                if(      rr < -128) rr = -128;
-                else if (rr >  127) rr =  127;
+                // let us scale down as late as possible
+                //                r /= 4; g /= 4; b /= 4;
+                // "U"
+                // rr = -0.1429 * r - 0.2857 * g + 0.4286 * b;
+                rr = (-299683 * r - 599156 * g + 898839 * b) >> 23;
+                if (rr < -128)
+                    rr = -128;
+                else if (rr > 127)
+                    rr = 127;
                 scratch_data[1][0] = rr + 128; // quantize needs unsigned
-// "V"
-//                rr = 0.3571*r - 0.2857*g - 0.0714*b;
-                rr = (748893*r - 599156*g - 149737*b) >> 23;
-                if(      rr < -128) rr = -128;
-                else if (rr >  127) rr =  127;
+                // "V"
+                // rr = 0.3571 * r - 0.2857 * g - 0.0714 * b;
+                rr = (748893 * r - 599156 * g - 149737 * b) >> 23;
+                if (rr < -128)
+                    rr = -128;
+                else if (rr > 127)
+                    rr = 127;
                 scratch_data[2][0] = rr + 128; // quantize needs unsigned
             }
-        }
     }
 
-    //would be nice but quite certainly incompatible with vintage players:
+    // would be nice but quite certainly incompatible with vintage players:
     // support encoding zero strips (meaning skip the whole frame)
-    for(num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) {
+    for (num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) {
         score = 0;
-        size = 0;
-#ifdef CINEPAK_REPORT_SERR
-        serr = 0;
-#endif
+        size  = 0;
 
-        for(y = 0, strip = 1; y < s->h; strip++, y = nexty) {
+        for (y = 0, strip = 1; y < s->h; strip++, y = nexty) {
             int strip_height;
 
             nexty = strip * s->h / num_strips; // <= s->h
-            //make nexty the next multiple of 4 if not already there
-            if(nexty & 3)
+            // make nexty the next multiple of 4 if not already there
+            if (nexty & 3)
                 nexty += 4 - (nexty & 3);
 
             strip_height = nexty - y;
-            if(strip_height <= 0) { // can this ever happen?
+            if (strip_height <= 0) { // can this ever happen?
                 av_log(s->avctx, AV_LOG_INFO, "skipping zero height strip %i of %i\n", strip, num_strips);
                 continue;
             }
 
-            if(s->pix_fmt == AV_PIX_FMT_RGB24)
+            if (s->pix_fmt == AV_PIX_FMT_RGB24)
                 get_sub_picture(s, 0, y,
                                 s->input_frame->data, s->input_frame->linesize,
                                 data, linesize);
             else
                 get_sub_picture(s, 0, y,
-                                (uint8_t **)frame->data, (int*)frame->linesize,
+                                (uint8_t **)frame->data, (int *)frame->linesize,
                                 data, linesize);
             get_sub_picture(s, 0, y,
                             s->last_frame->data, s->last_frame->linesize,
@@ -1251,68 +1097,51 @@
                             s->scratch_frame->data, s->scratch_frame->linesize,
                             scratch_data, scratch_linesize);
 
-            if((temp_size = rd_strip(s, y, strip_height, isakeyframe,
-                                     last_data, last_linesize, data, linesize,
-                                     scratch_data, scratch_linesize,
-                                     s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
-#ifdef CINEPAK_REPORT_SERR
-, &serr_temp
-#endif
-)) < 0)
+            if ((temp_size = rd_strip(s, y, strip_height, isakeyframe,
+                                      last_data, last_linesize, data, linesize,
+                                      scratch_data, scratch_linesize,
+                                      s->frame_buf + size + CVID_HEADER_SIZE,
+                                      &score_temp)) < 0)
                 return temp_size;
 
             score += score_temp;
-#ifdef CINEPAK_REPORT_SERR
-            serr += serr_temp;
-#endif
             size += temp_size;
-            //av_log(s->avctx, AV_LOG_INFO, "strip %d, isakeyframe=%d", strip, isakeyframe);
-            //av_log(s->avctx, AV_LOG_INFO, "\n");
         }
 
-        if(best_score == 0 || score < best_score) {
+        if (best_score == 0 || score < best_score) {
             best_score = score;
-#ifdef CINEPAK_REPORT_SERR
-            best_serr = serr;
-#endif
             best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe);
-            //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, score, best_size);
-#ifdef CINEPAK_REPORT_SERR
-            av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, serr, best_size);
-#endif
 
             FFSWAP(AVFrame *, s->best_frame, s->scratch_frame);
             memcpy(buf, s->frame_buf, best_size);
             best_nstrips = num_strips;
         }
-// avoid trying too many strip numbers without a real reason
-// (this makes the processing of the very first frame faster)
-        if(num_strips - best_nstrips > 4)
+        // avoid trying too many strip numbers without a real reason
+        // (this makes the processing of the very first frame faster)
+        if (num_strips - best_nstrips > 4)
             break;
     }
 
-    av_assert0(best_nstrips >= 0 && best_size >= 0);
-
-// let the number of strips slowly adapt to the changes in the contents,
-// compared to full bruteforcing every time this will occasionally lead
-// to some r/d performance loss but makes encoding up to several times faster
-    if(!s->strip_number_delta_range) {
-        if(best_nstrips == s->max_strips) { // let us try to step up
+    // let the number of strips slowly adapt to the changes in the contents,
+    // compared to full bruteforcing every time this will occasionally lead
+    // to some r/d performance loss but makes encoding up to several times faster
+    if (!s->strip_number_delta_range) {
+        if (best_nstrips == s->max_strips) { // let us try to step up
             s->max_strips = best_nstrips + 1;
-            if(s->max_strips >= s->max_max_strips)
+            if (s->max_strips >= s->max_max_strips)
                 s->max_strips = s->max_max_strips;
         } else { // try to step down
             s->max_strips = best_nstrips;
         }
         s->min_strips = s->max_strips - 1;
-        if(s->min_strips < s->min_min_strips)
+        if (s->min_strips < s->min_min_strips)
             s->min_strips = s->min_min_strips;
     } else {
         s->max_strips = best_nstrips + s->strip_number_delta_range;
-        if(s->max_strips >= s->max_max_strips)
+        if (s->max_strips >= s->max_max_strips)
             s->max_strips = s->max_max_strips;
         s->min_strips = best_nstrips - s->strip_number_delta_range;
-        if(s->min_strips < s->min_min_strips)
+        if (s->min_strips < s->min_min_strips)
             s->min_strips = s->min_min_strips;
     }
 
@@ -1329,7 +1158,7 @@
 
     if ((ret = ff_alloc_packet2(avctx, pkt, s->frame_buf_size, 0)) < 0)
         return ret;
-    ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
+    ret       = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
     pkt->size = ret;
     if (s->curframe == 0)
         pkt->flags |= AV_PKT_FLAG_KEY;
@@ -1358,30 +1187,22 @@
     av_freep(&s->strip_buf);
     av_freep(&s->frame_buf);
     av_freep(&s->mb);
-#ifdef CINEPAKENC_DEBUG
-    av_freep(&s->best_mb);
-#endif
 
-    for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
+    for (x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
         av_freep(&s->pict_bufs[x]);
 
-#ifdef CINEPAKENC_DEBUG
-    av_log(avctx, AV_LOG_INFO, "strip coding stats: %i V1 mode, %i V4 mode, %i MC mode (%i V1 encs, %i V4 encs, %i skips)\n",
-        s->num_v1_mode, s->num_v4_mode, s->num_mc_mode, s->num_v1_encs, s->num_v4_encs, s->num_skips);
-#endif
-
     return 0;
 }
 
 AVCodec ff_cinepak_encoder = {
     .name           = "cinepak",
+    .long_name      = NULL_IF_CONFIG_SMALL("Cinepak"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_CINEPAK,
     .priv_data_size = sizeof(CinepakEncContext),
     .init           = cinepak_encode_init,
     .encode2        = cinepak_encode_frame,
     .close          = cinepak_encode_end,
-    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_RGB24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
-    .long_name      = NULL_IF_CONFIG_SMALL("Cinepak"),
+    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_RGB24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE },
     .priv_class     = &cinepak_class,
 };

diff --git a/libavcodec/clearvideo.c b/libavcodec/clearvideo.c
index 067942a..0e3c772 100644
--- a/libavcodec/clearvideo.c
+++ b/libavcodec/clearvideo.c

@@ -1,6 +1,6 @@
 /*
  * ClearVideo decoder
- * Copyright (c) 2012 Konstantin Shishkov
+ * Copyright (c) 2012-2018 Konstantin Shishkov
  *
  * This file is part of FFmpeg.
  *
@@ -25,107 +25,56 @@
  */
 
 #include "avcodec.h"
+#include "bytestream.h"
+#include "get_bits.h"
 #include "idctdsp.h"
 #include "internal.h"
-#include "get_bits.h"
-#include "bytestream.h"
+#include "mathops.h"
+#include "clearvideodata.h"
 
-#define NUM_DC_CODES 127
-#define NUM_AC_CODES 103
+typedef struct LevelCodes {
+    uint16_t    mv_esc;
+    uint16_t    bias_esc;
+    VLC         flags_cb;
+    VLC         mv_cb;
+    VLC         bias_cb;
+} LevelCodes;
 
-static const uint8_t clv_dc_codes[NUM_DC_CODES] = {
-    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-    0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
-    0x10, 0x11, 0x12, 0x13, 0x13, 0x14, 0x07, 0x0B,
-    0x0C, 0x08, 0x08, 0x09, 0x04, 0x06, 0x07, 0x05,
-    0x04, 0x05, 0x04, 0x06, 0x05, 0x06, 0x07, 0x05,
-    0x06, 0x07, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08,
-    0x09, 0x0A, 0x0B, 0x07, 0x08, 0x09, 0x07, 0x08,
-    0x06, 0x07, 0x08, 0x06, 0x04, 0x05, 0x02, 0x01,
-    0x03, 0x06, 0x07, 0x07, 0x09, 0x0A, 0x0B, 0x09,
-    0x0A, 0x0B, 0x0A, 0x0B, 0x0C, 0x0D, 0x0C, 0x09,
-    0x0D, 0x0A, 0x0B, 0x08, 0x09, 0x0A, 0x0B, 0x07,
-    0x08, 0x09, 0x0A, 0x0B, 0x06, 0x07, 0x06, 0x08,
-    0x07, 0x09, 0x0A, 0x0B, 0x09, 0x0A, 0x0B, 0x0C,
-    0x14, 0x0D, 0x0D, 0x0E, 0x0F, 0x15, 0x15, 0x16,
-    0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E,
-    0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25,
-};
+typedef struct MV {
+    int16_t x, y;
+} MV;
 
-static const uint8_t clv_dc_bits[NUM_DC_CODES] = {
-    22, 22, 22, 22, 22, 22, 22, 22,
-    22, 22, 22, 22, 22, 22, 22, 22,
-    22, 22, 22, 21, 22, 22, 19, 20,
-    20, 19, 18, 18, 15, 17, 17, 16,
-    14, 15, 12, 13, 14, 14, 14, 12,
-    12, 12, 11, 11, 11, 10, 10, 10,
-    10, 10, 10,  9,  9,  9,  8,  8,
-     7,  7,  7,  6,  5,  5,  3,  1,
-     3,  5,  5,  6,  7,  7,  7,  8,
-     8,  8,  9,  9,  9,  9, 10, 11,
-    10, 11, 11, 12, 12, 12, 12, 13,
-    14, 14, 14, 14, 15, 15, 16, 17,
-    16, 17, 18, 18, 19, 19, 19, 19,
-    21, 19, 20, 19, 19, 21, 22, 22,
-    22, 22, 22, 22, 22, 22, 22, 22,
-    22, 22, 22, 22, 22, 22, 22,
-};
+static const MV zero_mv = { 0 };
 
-static const uint16_t clv_ac_syms[NUM_AC_CODES] = {
-    0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
-    0x0009, 0x000A, 0x000B, 0x000C, 0x0011, 0x0012, 0x0013, 0x0014,
-    0x0015, 0x0016, 0x0021, 0x0022, 0x0023, 0x0024, 0x0031, 0x0032,
-    0x0033, 0x0041, 0x0042, 0x0043, 0x0051, 0x0052, 0x0053, 0x0061,
-    0x0062, 0x0063, 0x0071, 0x0072, 0x0081, 0x0082, 0x0091, 0x0092,
-    0x00A1, 0x00A2, 0x00B1, 0x00C1, 0x00D1, 0x00E1, 0x00F1, 0x0101,
-    0x0111, 0x0121, 0x0131, 0x0141, 0x0151, 0x0161, 0x0171, 0x0181,
-    0x0191, 0x01A1, 0x1001, 0x1002, 0x1003, 0x1011, 0x1012, 0x1021,
-    0x1031, 0x1041, 0x1051, 0x1061, 0x1071, 0x1081, 0x1091, 0x10A1,
-    0x10B1, 0x10C1, 0x10D1, 0x10E1, 0x10F1, 0x1101, 0x1111, 0x1121,
-    0x1131, 0x1141, 0x1151, 0x1161, 0x1171, 0x1181, 0x1191, 0x11A1,
-    0x11B1, 0x11C1, 0x11D1, 0x11E1, 0x11F1, 0x1201, 0x1211, 0x1221,
-    0x1231, 0x1241, 0x1251, 0x1261, 0x1271, 0x1281, 0x1BFF,
-};
+typedef struct MVInfo {
+    int mb_w;
+    int mb_h;
+    int mb_size;
+    int mb_stride;
+    int top;
+    MV  *mv;
+} MVInfo;
 
-static const uint8_t clv_ac_codes[NUM_AC_CODES] = {
-    0x02, 0x0F, 0x15, 0x17, 0x1F, 0x25, 0x24, 0x21,
-    0x20, 0x07, 0x06, 0x20, 0x06, 0x14, 0x1E, 0x0F,
-    0x21, 0x50, 0x0E, 0x1D, 0x0E, 0x51, 0x0D, 0x23,
-    0x0D, 0x0C, 0x22, 0x52, 0x0B, 0x0C, 0x53, 0x13,
-    0x0B, 0x54, 0x12, 0x0A, 0x11, 0x09, 0x10, 0x08,
-    0x16, 0x55, 0x15, 0x14, 0x1C, 0x1B, 0x21, 0x20,
-    0x1F, 0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x22, 0x23,
-    0x56, 0x57, 0x07, 0x19, 0x05, 0x0F, 0x04, 0x0E,
-    0x0D, 0x0C, 0x13, 0x12, 0x11, 0x10, 0x1A, 0x19,
-    0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x18, 0x17,
-    0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x07, 0x06,
-    0x05, 0x04, 0x24, 0x25, 0x26, 0x27, 0x58, 0x59,
-    0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x03,
-};
-
-static const uint8_t clv_ac_bits[NUM_AC_CODES] = {
-     2,  4,  6,  7,  8,  9,  9, 10,
-    10, 11, 11, 11,  3,  6,  8, 10,
-    11, 12,  4,  8, 10, 12,  5,  9,
-    10,  5,  9, 12,  5, 10, 12,  6,
-    10, 12,  6, 10,  6, 10,  6, 10,
-     7, 12,  7,  7,  8,  8,  9,  9,
-     9,  9,  9,  9,  9,  9, 11, 11,
-    12, 12,  4,  9, 11,  6, 11,  6,
-     6,  6,  7,  7,  7,  7,  8,  8,
-     8,  8,  8,  8,  8,  8,  9,  9,
-     9,  9,  9,  9,  9,  9, 10, 10,
-    10, 10, 11, 11, 11, 11, 12, 12,
-    12, 12, 12, 12, 12, 12,  7,
-};
+typedef struct TileInfo {
+    uint16_t        flags;
+    int16_t         bias;
+    MV              mv;
+    struct TileInfo *child[4];
+} TileInfo;
 
 typedef struct CLVContext {
     AVCodecContext *avctx;
     IDCTDSPContext idsp;
     AVFrame        *pic;
+    AVFrame        *prev;
     GetBitContext  gb;
     int            mb_width, mb_height;
+    int            pmb_width, pmb_height;
+    MVInfo         mvi;
+    int            tile_size;
+    int            tile_shift;
     VLC            dc_vlc, ac_vlc;
+    LevelCodes     ylev[4], ulev[3], vlev[3];
     int            luma_dc_quant, chroma_dc_quant, ac_quant;
     DECLARE_ALIGNED(16, int16_t, block)[64];
     int            top_dc[3], left_dc[4];
@@ -179,12 +128,12 @@
 }
 
 #define DCT_TEMPLATE(blk, step, bias, shift, dshift, OP)                \
-    const int t0 = OP( 2841 * blk[1 * step] +  565 * blk[7 * step]);    \
-    const int t1 = OP(  565 * blk[1 * step] - 2841 * blk[7 * step]);    \
-    const int t2 = OP( 1609 * blk[5 * step] + 2408 * blk[3 * step]);    \
-    const int t3 = OP( 2408 * blk[5 * step] - 1609 * blk[3 * step]);    \
-    const int t4 = OP( 1108 * blk[2 * step] - 2676 * blk[6 * step]);    \
-    const int t5 = OP( 2676 * blk[2 * step] + 1108 * blk[6 * step]);    \
+    const int t0 = OP(2841 * blk[1 * step] +  565 * blk[7 * step]);     \
+    const int t1 = OP( 565 * blk[1 * step] - 2841 * blk[7 * step]);     \
+    const int t2 = OP(1609 * blk[5 * step] + 2408 * blk[3 * step]);     \
+    const int t3 = OP(2408 * blk[5 * step] - 1609 * blk[3 * step]);     \
+    const int t4 = OP(1108 * blk[2 * step] - 2676 * blk[6 * step]);     \
+    const int t5 = OP(2676 * blk[2 * step] + 1108 * blk[6 * step]);     \
     const int t6 = ((blk[0 * step] + blk[4 * step]) * (1 << dshift)) + bias;  \
     const int t7 = ((blk[0 * step] - blk[4 * step]) * (1 << dshift)) + bias;  \
     const int t8 = t0 + t2;                                             \
@@ -225,9 +174,7 @@
 
 static int decode_mb(CLVContext *c, int x, int y)
 {
-    int i;
-    int has_ac[6];
-    int off;
+    int i, has_ac[6], off;
 
     for (i = 0; i < 6; i++)
         has_ac[i] = get_bits1(&c->gb);
@@ -247,7 +194,8 @@
         clv_dct(c->block);
         if (i == 2)
             off += c->pic->linesize[0] * 8;
-        c->idsp.put_pixels_clamped(c->block, c->pic->data[0] + off + (i & 1) * 8,
+        c->idsp.put_pixels_clamped(c->block,
+                                   c->pic->data[0] + off + (i & 1) * 8,
                                    c->pic->linesize[0]);
     }
 
@@ -271,6 +219,283 @@
     return 0;
 }
 
+static int copy_block(AVCodecContext *avctx, AVFrame *dst, AVFrame *src,
+                      int plane, int x, int y, int dx, int dy, int size)
+{
+    int shift = plane > 0;
+    int sx = x + dx;
+    int sy = y + dy;
+    int sstride, dstride, soff, doff;
+    uint8_t *sbuf, *dbuf;
+    int i;
+
+    if (x < 0 || sx < 0 || y < 0 || sy < 0 ||
+        x + size > avctx->coded_width >> shift ||
+        y + size > avctx->coded_height >> shift ||
+        sx + size > avctx->coded_width >> shift ||
+        sy + size > avctx->coded_height >> shift)
+        return AVERROR_INVALIDDATA;
+
+    sstride = src->linesize[plane];
+    dstride = dst->linesize[plane];
+    soff    = sx + sy * sstride;
+    sbuf    = src->data[plane];
+    doff    = x + y * dstride;
+    dbuf    = dst->data[plane];
+
+    for (i = 0; i < size; i++) {
+        uint8_t *dptr = &dbuf[doff];
+        uint8_t *sptr = &sbuf[soff];
+
+        memcpy(dptr, sptr, size);
+        doff += dstride;
+        soff += sstride;
+    }
+
+    return 0;
+}
+
+static int copyadd_block(AVCodecContext *avctx, AVFrame *dst, AVFrame *src,
+                         int plane, int x, int y, int dx, int dy, int size, int bias)
+{
+    int shift = plane > 0;
+    int sx = x + dx;
+    int sy = y + dy;
+    int sstride   = src->linesize[plane];
+    int dstride   = dst->linesize[plane];
+    int soff      = sx + sy * sstride;
+    uint8_t *sbuf = src->data[plane];
+    int doff      = x + y * dstride;
+    uint8_t *dbuf = dst->data[plane];
+    int i, j;
+
+    if (x < 0 || sx < 0 || y < 0 || sy < 0 ||
+        x + size > avctx->coded_width >> shift ||
+        y + size > avctx->coded_height >> shift ||
+        sx + size > avctx->coded_width >> shift ||
+        sy + size > avctx->coded_height >> shift)
+        return AVERROR_INVALIDDATA;
+
+    for (j = 0; j < size; j++) {
+        uint8_t *dptr = &dbuf[doff];
+        uint8_t *sptr = &sbuf[soff];
+
+        for (i = 0; i < size; i++) {
+            int val = sptr[i] + bias;
+
+            dptr[i] = av_clip_uint8(val);
+        }
+
+        doff += dstride;
+        soff += sstride;
+    }
+
+    return 0;
+}
+
+static MV mvi_predict(MVInfo *mvi, int mb_x, int mb_y, MV diff)
+{
+    MV res, pred_mv;
+    int left_mv, right_mv, top_mv, bot_mv;
+
+    if (mvi->top) {
+        if (mb_x > 0) {
+            pred_mv = mvi->mv[mvi->mb_stride + mb_x - 1];
+        } else {
+            pred_mv = zero_mv;
+        }
+    } else if ((mb_x == 0) || (mb_x == mvi->mb_w - 1)) {
+        pred_mv = mvi->mv[mb_x];
+    } else {
+        MV A = mvi->mv[mvi->mb_stride + mb_x - 1];
+        MV B = mvi->mv[                 mb_x    ];
+        MV C = mvi->mv[                 mb_x + 1];
+        pred_mv.x = mid_pred(A.x, B.x, C.x);
+        pred_mv.y = mid_pred(A.y, B.y, C.y);
+    }
+
+    res = pred_mv;
+
+    left_mv = -((mb_x * mvi->mb_size));
+    right_mv = ((mvi->mb_w - mb_x - 1) * mvi->mb_size);
+    if (res.x < left_mv) {
+        res.x = left_mv;
+    }
+    if (res.x > right_mv) {
+        res.x = right_mv;
+    }
+    top_mv = -((mb_y * mvi->mb_size));
+    bot_mv = ((mvi->mb_h - mb_y - 1) * mvi->mb_size);
+    if (res.y < top_mv) {
+        res.y = top_mv;
+    }
+    if (res.y > bot_mv) {
+        res.y = bot_mv;
+    }
+
+    mvi->mv[mvi->mb_stride + mb_x].x = res.x + diff.x;
+    mvi->mv[mvi->mb_stride + mb_x].y = res.y + diff.y;
+
+    return res;
+}
+
+static void mvi_reset(MVInfo *mvi, int mb_w, int mb_h, int mb_size)
+{
+    mvi->top       = 1;
+    mvi->mb_w      = mb_w;
+    mvi->mb_h      = mb_h;
+    mvi->mb_size   = mb_size;
+    mvi->mb_stride = mb_w;
+    memset(mvi->mv, 0, sizeof(MV) * mvi->mb_stride * 2);
+}
+
+static void mvi_update_row(MVInfo *mvi)
+{
+    int i;
+
+    mvi->top = 0;
+    for (i = 0 ; i < mvi->mb_stride; i++) {
+        mvi->mv[i] = mvi->mv[mvi->mb_stride + i];
+    }
+}
+
+static TileInfo* decode_tile_info(GetBitContext *gb, LevelCodes *lc, int level)
+{
+    TileInfo *ti;
+    int i, flags = 0;
+    int16_t bias = 0;
+    MV mv = { 0 };
+
+    if (lc[level].flags_cb.table) {
+        flags = get_vlc2(gb, lc[level].flags_cb.table, lc[level].flags_cb.bits, 2);
+    }
+
+    if (lc[level].mv_cb.table) {
+        uint16_t mv_code = get_vlc2(gb, lc[level].mv_cb.table, lc[level].mv_cb.bits, 3);
+
+        if (mv_code != lc[level].mv_esc) {
+            mv.x = (int8_t)(mv_code & 0xff);
+            mv.y = (int8_t)(mv_code >> 8);
+        } else {
+            mv.x = get_sbits(gb, 8);
+            mv.y = get_sbits(gb, 8);
+        }
+    }
+
+    if (lc[level].bias_cb.table) {
+        uint16_t bias_val = get_vlc2(gb, lc[level].bias_cb.table, lc[level].bias_cb.bits, 2);
+
+        if (bias_val != lc[level].bias_esc) {
+            bias = (int16_t)(bias_val);
+        } else {
+            bias = get_sbits(gb, 16);
+        }
+    }
+
+    ti = av_calloc(1, sizeof(*ti));
+    if (!ti)
+        return NULL;
+
+    ti->flags = flags;
+    ti->mv = mv;
+    ti->bias = bias;
+
+    if (ti->flags) {
+        for (i = 0; i < 4; i++) {
+            if (ti->flags & (1 << i)) {
+                TileInfo *subti = decode_tile_info(gb, lc, level + 1);
+                ti->child[i] = subti;
+            }
+        }
+    }
+
+    return ti;
+}
+
+static int tile_do_block(AVCodecContext *avctx, AVFrame *dst, AVFrame *src,
+                         int plane, int x, int y, int dx, int dy, int size, int bias)
+{
+    int ret;
+
+    if (!bias) {
+        ret = copy_block(avctx, dst, src, plane, x, y, dx, dy, size);
+    } else {
+        ret = copyadd_block(avctx, dst, src, plane, x, y, dx, dy, size, bias);
+    }
+
+    return ret;
+}
+
+static int restore_tree(AVCodecContext *avctx, AVFrame *dst, AVFrame *src,
+                        int plane, int x, int y, int size,
+                        TileInfo *tile, MV root_mv)
+{
+    int ret;
+    MV mv;
+
+    mv.x = root_mv.x + tile->mv.x;
+    mv.y = root_mv.y + tile->mv.y;
+
+    if (!tile->flags) {
+        ret = tile_do_block(avctx, dst, src, plane, x, y, mv.x, mv.y, size, tile->bias);
+    } else {
+        int i, hsize = size >> 1;
+
+        for (i = 0; i < 4; i++) {
+            int xoff = (i & 2) == 0 ? 0 : hsize;
+            int yoff = (i & 1) == 0 ? 0 : hsize;
+
+            if (tile->child[i]) {
+                ret = restore_tree(avctx, dst, src, plane, x + xoff, y + yoff, hsize, tile->child[i], root_mv);
+                av_freep(&tile->child[i]);
+            } else {
+                ret = tile_do_block(avctx, dst, src, plane, x + xoff, y + yoff, mv.x, mv.y, hsize, tile->bias);
+            }
+        }
+    }
+
+    return ret;
+}
+
+static void extend_edges(AVFrame *buf, int tile_size)
+{
+    int comp, i, j;
+
+    for (comp = 0; comp < 3; comp++) {
+        int shift = comp > 0;
+        int w = buf->width  >> shift;
+        int h = buf->height >> shift;
+        int size = comp == 0 ? tile_size : tile_size >> 1;
+        int stride = buf->linesize[comp];
+        uint8_t *framebuf = buf->data[comp];
+
+        int right  = size - (w & (size - 1));
+        int bottom = size - (h & (size - 1));
+
+        if ((right == size) && (bottom == size)) {
+            return;
+        }
+        if (right != size) {
+            int off = w;
+            for (j = 0; j < h; j++) {
+                for (i = 0; i < right; i++) {
+                    framebuf[off + i] = 0x80;
+                }
+                off += stride;
+            }
+        }
+        if (bottom != size) {
+            int off = h * stride;
+            for (j = 0; j < bottom; j++) {
+                for (i = 0; i < stride; i++) {
+                    framebuf[off + i] = 0x80;
+                }
+                off += stride;
+            }
+        }
+    }
+}
+
 static int clv_decode_frame(AVCodecContext *avctx, void *data,
                             int *got_frame, AVPacket *avpkt)
 {
@@ -279,19 +504,21 @@
     CLVContext *c = avctx->priv_data;
     GetByteContext gb;
     uint32_t frame_type;
-    int i, j;
-    int ret;
+    int i, j, ret;
     int mb_ret = 0;
 
     bytestream2_init(&gb, buf, buf_size);
-    if (avctx->codec_tag == MKTAG('C','L','V','1')) {
+    if (avctx->codec_tag == MKTAG('C', 'L', 'V', '1')) {
         int skip = bytestream2_get_byte(&gb);
         bytestream2_skip(&gb, (skip + 1) * 8);
     }
 
     frame_type = bytestream2_get_byte(&gb);
 
-    if (frame_type & 0x2) {
+    if ((frame_type & 0x7f) == 0x30) {
+        *got_frame = 0;
+        return buf_size;
+    } else if (frame_type & 0x2) {
         if (buf_size < c->mb_width * c->mb_height) {
             av_log(avctx, AV_LOG_ERROR, "Packet too small\n");
             return AVERROR_INVALIDDATA;
@@ -300,8 +527,8 @@
         if ((ret = ff_reget_buffer(avctx, c->pic)) < 0)
             return ret;
 
-        c->pic->key_frame = frame_type & 0x20 ? 1 : 0;
-        c->pic->pict_type = frame_type & 0x20 ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+        c->pic->key_frame = 1;
+        c->pic->pict_type = AV_PICTURE_TYPE_I;
 
         bytestream2_get_be32(&gb); // frame size;
         c->ac_quant        = bytestream2_get_byte(&gb);
@@ -309,7 +536,7 @@
         c->chroma_dc_quant = 32;
 
         if ((ret = init_get_bits8(&c->gb, buf + bytestream2_tell(&gb),
-                                  (buf_size - bytestream2_tell(&gb)))) < 0)
+                                  buf_size - bytestream2_tell(&gb))) < 0)
             return ret;
 
         for (i = 0; i < 3; i++)
@@ -324,33 +551,140 @@
                     mb_ret = ret;
             }
         }
+        extend_edges(c->pic, c->tile_size);
+    } else {
+        int plane;
 
-        if ((ret = av_frame_ref(data, c->pic)) < 0)
+        if ((ret = ff_reget_buffer(avctx, c->pic)) < 0)
             return ret;
 
-        *got_frame = 1;
-    } else {
+        ret = av_frame_copy(c->pic, c->prev);
+        if (ret < 0)
+            return ret;
+
+        if ((ret = init_get_bits8(&c->gb, buf + bytestream2_tell(&gb),
+                                  buf_size - bytestream2_tell(&gb))) < 0)
+            return ret;
+
+        mvi_reset(&c->mvi, c->pmb_width, c->pmb_height, 1 << c->tile_shift);
+
+        for (j = 0; j < c->pmb_height; j++) {
+            for (i = 0; i < c->pmb_width; i++) {
+                if (get_bits1(&c->gb)) {
+                    MV mv = mvi_predict(&c->mvi, i, j, zero_mv);
+
+                    for (plane = 0; plane < 3; plane++) {
+                        int16_t x = plane == 0 ? i << c->tile_shift : i << (c->tile_shift - 1);
+                        int16_t y = plane == 0 ? j << c->tile_shift : j << (c->tile_shift - 1);
+                        int16_t size = plane == 0 ? 1 << c->tile_shift : 1 << (c->tile_shift - 1);
+                        int16_t mx = plane == 0 ? mv.x : mv.x / 2;
+                        int16_t my = plane == 0 ? mv.y : mv.y / 2;
+
+                        ret = copy_block(avctx, c->pic, c->prev, plane, x, y, mx, my, size);
+                        if (ret < 0)
+                            mb_ret = ret;
+                    }
+                } else {
+                    int x = i << c->tile_shift;
+                    int y = j << c->tile_shift;
+                    int size = 1 << c->tile_shift;
+                    TileInfo *tile;
+                    MV mv, cmv;
+
+                    tile = decode_tile_info(&c->gb, c->ylev, 0);
+                    if (!tile)
+                        return AVERROR(ENOMEM);
+                    mv = mvi_predict(&c->mvi, i, j, tile->mv);
+                    ret = restore_tree(avctx, c->pic, c->prev, 0, x, y, size, tile, mv);
+                    if (ret < 0)
+                        mb_ret = ret;
+                    x = i << (c->tile_shift - 1);
+                    y = j << (c->tile_shift - 1);
+                    size = 1 << (c->tile_shift - 1);
+                    cmv.x = mv.x + tile->mv.x;
+                    cmv.y = mv.y + tile->mv.y;
+                    cmv.x /= 2;
+                    cmv.y /= 2;
+                    av_freep(&tile);
+                    tile = decode_tile_info(&c->gb, c->ulev, 0);
+                    if (!tile)
+                        return AVERROR(ENOMEM);
+                    ret = restore_tree(avctx, c->pic, c->prev, 1, x, y, size, tile, cmv);
+                    if (ret < 0)
+                        mb_ret = ret;
+                    av_freep(&tile);
+                    tile = decode_tile_info(&c->gb, c->vlev, 0);
+                    if (!tile)
+                        return AVERROR(ENOMEM);
+                    ret = restore_tree(avctx, c->pic, c->prev, 2, x, y, size, tile, cmv);
+                    if (ret < 0)
+                        mb_ret = ret;
+                    av_freep(&tile);
+                }
+            }
+            mvi_update_row(&c->mvi);
+        }
+        extend_edges(c->pic, c->tile_size);
+
+        c->pic->key_frame = 0;
+        c->pic->pict_type = AV_PICTURE_TYPE_P;
     }
 
+    if ((ret = av_frame_ref(data, c->pic)) < 0)
+        return ret;
+
+    FFSWAP(AVFrame *, c->pic, c->prev);
+
+    *got_frame = 1;
+
+    if (get_bits_left(&c->gb) < 0)
+        av_log(c->avctx, AV_LOG_WARNING, "overread %d\n", -get_bits_left(&c->gb));
+
     return mb_ret < 0 ? mb_ret : buf_size;
 }
 
 static av_cold int clv_decode_init(AVCodecContext *avctx)
 {
-    CLVContext * const c = avctx->priv_data;
-    int ret;
+    CLVContext *const c = avctx->priv_data;
+    int ret, w, h;
 
-    c->avctx = avctx;
+    if (avctx->extradata_size == 110) {
+        c->tile_size = AV_RL32(&avctx->extradata[94]);
+    } else if (avctx->extradata_size == 150) {
+        c->tile_size = AV_RB32(&avctx->extradata[134]);
+    } else if (!avctx->extradata_size) {
+        c->tile_size = 16;
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "Unsupported extradata size: %d\n", avctx->extradata_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    c->tile_shift = av_log2(c->tile_size);
+    if (1 << c->tile_shift != c->tile_size) {
+        av_log(avctx, AV_LOG_ERROR, "Tile size: %d, is not power of 2.\n", c->tile_size);
+        return AVERROR_INVALIDDATA;
+    }
 
     avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+    w = avctx->width;
+    h = avctx->height;
+    ret = ff_set_dimensions(avctx, FFALIGN(w, 1 << c->tile_shift), FFALIGN(h, 1 << c->tile_shift));
+    if (ret < 0)
+        return ret;
+    avctx->width  = w;
+    avctx->height = h;
 
-    c->pic = av_frame_alloc();
-    if (!c->pic)
+    c->avctx           = avctx;
+    c->mb_width        = FFALIGN(avctx->width,  16) >> 4;
+    c->mb_height       = FFALIGN(avctx->height, 16) >> 4;
+    c->pmb_width       = (w + c->tile_size - 1) >> c->tile_shift;
+    c->pmb_height      = (h + c->tile_size - 1) >> c->tile_shift;
+    c->pic             = av_frame_alloc();
+    c->prev            = av_frame_alloc();
+    c->mvi.mv          = av_calloc(c->pmb_width * 2, sizeof(*c->mvi.mv));
+    if (!c->pic || !c->prev || !c->mvi.mv)
         return AVERROR(ENOMEM);
 
-    c->mb_width  = FFALIGN(avctx->width,  16) >> 4;
-    c->mb_height = FFALIGN(avctx->height, 16) >> 4;
-
     ff_idctdsp_init(&c->idsp, avctx);
     ret = init_vlc(&c->dc_vlc, 9, NUM_DC_CODES,
                    clv_dc_bits,  1, 1,
@@ -368,23 +702,205 @@
         return ret;
     }
 
+    ret = init_vlc(&c->ylev[0].flags_cb, 9, FF_ARRAY_ELEMS(clv_flagsy_0_bits),
+                   clv_flagsy_0_bits,  1, 1,
+                   clv_flagsy_0_codes, 2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = init_vlc(&c->ylev[1].flags_cb, 9, FF_ARRAY_ELEMS(clv_flagsy_1_bits),
+                   clv_flagsy_1_bits,  1, 1,
+                   clv_flagsy_1_codes, 2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = init_vlc(&c->ylev[2].flags_cb, 9, FF_ARRAY_ELEMS(clv_flagsy_2_bits),
+                   clv_flagsy_2_bits,  1, 1,
+                   clv_flagsy_2_codes, 2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = init_vlc(&c->ulev[0].flags_cb, 9, FF_ARRAY_ELEMS(clv_flagsu_0_bits),
+                   clv_flagsu_0_bits,  1, 1,
+                   clv_flagsu_0_codes, 2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = init_vlc(&c->ulev[1].flags_cb, 9, FF_ARRAY_ELEMS(clv_flagsu_1_bits),
+                   clv_flagsu_1_bits,  1, 1,
+                   clv_flagsu_1_codes, 2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = init_vlc(&c->vlev[0].flags_cb, 9, FF_ARRAY_ELEMS(clv_flagsv_0_bits),
+                   clv_flagsv_0_bits,  1, 1,
+                   clv_flagsv_0_codes, 2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = init_vlc(&c->vlev[1].flags_cb, 9, FF_ARRAY_ELEMS(clv_flagsv_1_bits),
+                   clv_flagsv_1_bits,  1, 1,
+                   clv_flagsv_1_codes, 2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = ff_init_vlc_sparse(&c->ylev[0].mv_cb, 9, FF_ARRAY_ELEMS(clv_mvy_0_bits),
+                             clv_mvy_0_bits,  1, 1,
+                             clv_mvy_0_codes, 2, 2,
+                             clv_mvy_0_syms,  2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = ff_init_vlc_sparse(&c->ylev[1].mv_cb, 9, FF_ARRAY_ELEMS(clv_mvy_1_bits),
+                             clv_mvy_1_bits,  1, 1,
+                             clv_mvy_1_codes, 2, 2,
+                             clv_mvy_1_syms,  2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = ff_init_vlc_sparse(&c->ylev[2].mv_cb, 9, FF_ARRAY_ELEMS(clv_mvy_2_bits),
+                             clv_mvy_2_bits,  1, 1,
+                             clv_mvy_2_codes, 2, 2,
+                             clv_mvy_2_syms,  2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = ff_init_vlc_sparse(&c->ylev[3].mv_cb, 9, FF_ARRAY_ELEMS(clv_mvy_3_bits),
+                             clv_mvy_3_bits,  1, 1,
+                             clv_mvy_3_codes, 2, 2,
+                             clv_mvy_3_syms,  2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = ff_init_vlc_sparse(&c->ulev[1].mv_cb, 9, FF_ARRAY_ELEMS(clv_mvu_1_bits),
+                             clv_mvu_1_bits,  1, 1,
+                             clv_mvu_1_codes, 2, 2,
+                             clv_mvu_1_syms,  2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = ff_init_vlc_sparse(&c->ulev[2].mv_cb, 9, FF_ARRAY_ELEMS(clv_mvu_2_bits),
+                             clv_mvu_2_bits,  1, 1,
+                             clv_mvu_2_codes, 2, 2,
+                             clv_mvu_2_syms,  2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = ff_init_vlc_sparse(&c->vlev[1].mv_cb, 9, FF_ARRAY_ELEMS(clv_mvv_1_bits),
+                             clv_mvv_1_bits,  1, 1,
+                             clv_mvv_1_codes, 2, 2,
+                             clv_mvv_1_syms,  2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = ff_init_vlc_sparse(&c->vlev[2].mv_cb, 9, FF_ARRAY_ELEMS(clv_mvv_2_bits),
+                             clv_mvv_2_bits,  1, 1,
+                             clv_mvv_2_codes, 2, 2,
+                             clv_mvv_2_syms,  2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = ff_init_vlc_sparse(&c->ylev[1].bias_cb, 9, FF_ARRAY_ELEMS(clv_biasy_1_bits),
+                             clv_biasy_1_bits,  1, 1,
+                             clv_biasy_1_codes, 2, 2,
+                             clv_biasy_1_syms,  2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = ff_init_vlc_sparse(&c->ylev[2].bias_cb, 9, FF_ARRAY_ELEMS(clv_biasy_2_bits),
+                             clv_biasy_2_bits,  1, 1,
+                             clv_biasy_2_codes, 2, 2,
+                             clv_biasy_2_syms,  2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = ff_init_vlc_sparse(&c->ylev[3].bias_cb, 9, FF_ARRAY_ELEMS(clv_biasy_3_bits),
+                             clv_biasy_3_bits,  1, 1,
+                             clv_biasy_3_codes, 2, 2,
+                             clv_biasy_3_syms,  2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = ff_init_vlc_sparse(&c->ulev[1].bias_cb, 9, FF_ARRAY_ELEMS(clv_biasu_1_bits),
+                             clv_biasu_1_bits,  1, 1,
+                             clv_biasu_1_codes, 2, 2,
+                             clv_biasu_1_syms,  2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = ff_init_vlc_sparse(&c->ulev[2].bias_cb, 9, FF_ARRAY_ELEMS(clv_biasu_2_bits),
+                             clv_biasu_2_bits,  1, 1,
+                             clv_biasu_2_codes, 2, 2,
+                             clv_biasu_2_syms,  2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = ff_init_vlc_sparse(&c->vlev[1].bias_cb, 9, FF_ARRAY_ELEMS(clv_biasv_1_bits),
+                             clv_biasv_1_bits,  1, 1,
+                             clv_biasv_1_codes, 2, 2,
+                             clv_biasv_1_syms,  2, 2, 0);
+    if (ret)
+        return ret;
+
+    ret = ff_init_vlc_sparse(&c->vlev[2].bias_cb, 9, FF_ARRAY_ELEMS(clv_biasv_2_bits),
+                             clv_biasv_2_bits,  1, 1,
+                             clv_biasv_2_codes, 2, 2,
+                             clv_biasv_2_syms,  2, 2, 0);
+    if (ret)
+        return ret;
+
+    c->ylev[0].mv_esc = 0x0909;
+    c->ylev[1].mv_esc = 0x0A0A;
+    c->ylev[2].mv_esc = 0x1010;
+    c->ylev[3].mv_esc = 0x1313;
+    c->ulev[1].mv_esc = 0x0808;
+    c->ulev[2].mv_esc = 0x0B0B;
+    c->vlev[1].mv_esc = 0x0808;
+    c->vlev[2].mv_esc = 0x0B0B;
+
+    c->ylev[1].bias_esc = 0x100;
+    c->ylev[2].bias_esc = 0x100;
+    c->ylev[3].bias_esc = 0x100;
+    c->ulev[1].bias_esc = 0x100;
+    c->ulev[2].bias_esc = 0x100;
+    c->vlev[1].bias_esc = 0x100;
+    c->vlev[2].bias_esc = 0x100;
+
     return 0;
 }
 
 static av_cold int clv_decode_end(AVCodecContext *avctx)
 {
-    CLVContext * const c = avctx->priv_data;
+    CLVContext *const c = avctx->priv_data;
+    int i;
 
+    av_frame_free(&c->prev);
     av_frame_free(&c->pic);
 
+    av_freep(&c->mvi.mv);
+
     ff_free_vlc(&c->dc_vlc);
     ff_free_vlc(&c->ac_vlc);
+    for (i = 0; i < 4; i++) {
+        ff_free_vlc(&c->ylev[i].mv_cb);
+        ff_free_vlc(&c->ylev[i].flags_cb);
+        ff_free_vlc(&c->ylev[i].bias_cb);
+    }
+    for (i = 0; i < 3; i++) {
+        ff_free_vlc(&c->ulev[i].mv_cb);
+        ff_free_vlc(&c->ulev[i].flags_cb);
+        ff_free_vlc(&c->ulev[i].bias_cb);
+        ff_free_vlc(&c->vlev[i].mv_cb);
+        ff_free_vlc(&c->vlev[i].flags_cb);
+        ff_free_vlc(&c->vlev[i].bias_cb);
+    }
 
     return 0;
 }
 
 AVCodec ff_clearvideo_decoder = {
     .name           = "clearvideo",
+    .long_name      = NULL_IF_CONFIG_SMALL("Iterated Systems ClearVideo"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_CLEARVIDEO,
     .priv_data_size = sizeof(CLVContext),
@@ -392,5 +908,5 @@
     .close          = clv_decode_end,
     .decode         = clv_decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1,
-    .long_name      = NULL_IF_CONFIG_SMALL("Iterated Systems ClearVideo"),
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
 };

diff --git a/libavcodec/clearvideodata.h b/libavcodec/clearvideodata.h
new file mode 100644
index 0000000..43d12de
--- /dev/null
+++ b/libavcodec/clearvideodata.h

@@ -0,0 +1,1832 @@
+/*
+ * ClearVideo decoder
+ * Copyright (c) 2012-2018 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CLEARVIDEODATA_H
+#define AVCODEC_CLEARVIDEODATA_H
+
+#include "libavutil/common.h"
+
+#define NUM_DC_CODES 127
+#define NUM_AC_CODES 103
+
+static const uint8_t clv_dc_codes[NUM_DC_CODES] = {
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+    0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+    0x10, 0x11, 0x12, 0x13, 0x13, 0x14, 0x07, 0x0B,
+    0x0C, 0x08, 0x08, 0x09, 0x04, 0x06, 0x07, 0x05,
+    0x04, 0x05, 0x04, 0x06, 0x05, 0x06, 0x07, 0x05,
+    0x06, 0x07, 0x06, 0x07, 0x08, 0x06, 0x07, 0x08,
+    0x09, 0x0A, 0x0B, 0x07, 0x08, 0x09, 0x07, 0x08,
+    0x06, 0x07, 0x08, 0x06, 0x04, 0x05, 0x02, 0x01,
+    0x03, 0x06, 0x07, 0x07, 0x09, 0x0A, 0x0B, 0x09,
+    0x0A, 0x0B, 0x0A, 0x0B, 0x0C, 0x0D, 0x0C, 0x09,
+    0x0D, 0x0A, 0x0B, 0x08, 0x09, 0x0A, 0x0B, 0x07,
+    0x08, 0x09, 0x0A, 0x0B, 0x06, 0x07, 0x06, 0x08,
+    0x07, 0x09, 0x0A, 0x0B, 0x09, 0x0A, 0x0B, 0x0C,
+    0x14, 0x0D, 0x0D, 0x0E, 0x0F, 0x15, 0x15, 0x16,
+    0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E,
+    0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25,
+};
+
+static const uint8_t clv_dc_bits[NUM_DC_CODES] = {
+    22, 22, 22, 22, 22, 22, 22, 22,
+    22, 22, 22, 22, 22, 22, 22, 22,
+    22, 22, 22, 21, 22, 22, 19, 20,
+    20, 19, 18, 18, 15, 17, 17, 16,
+    14, 15, 12, 13, 14, 14, 14, 12,
+    12, 12, 11, 11, 11, 10, 10, 10,
+    10, 10, 10,  9,  9,  9,  8,  8,
+     7,  7,  7,  6,  5,  5,  3,  1,
+     3,  5,  5,  6,  7,  7,  7,  8,
+     8,  8,  9,  9,  9,  9, 10, 11,
+    10, 11, 11, 12, 12, 12, 12, 13,
+    14, 14, 14, 14, 15, 15, 16, 17,
+    16, 17, 18, 18, 19, 19, 19, 19,
+    21, 19, 20, 19, 19, 21, 22, 22,
+    22, 22, 22, 22, 22, 22, 22, 22,
+    22, 22, 22, 22, 22, 22, 22,
+};
+
+static const uint16_t clv_ac_syms[NUM_AC_CODES] = {
+    0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
+    0x0009, 0x000A, 0x000B, 0x000C, 0x0011, 0x0012, 0x0013, 0x0014,
+    0x0015, 0x0016, 0x0021, 0x0022, 0x0023, 0x0024, 0x0031, 0x0032,
+    0x0033, 0x0041, 0x0042, 0x0043, 0x0051, 0x0052, 0x0053, 0x0061,
+    0x0062, 0x0063, 0x0071, 0x0072, 0x0081, 0x0082, 0x0091, 0x0092,
+    0x00A1, 0x00A2, 0x00B1, 0x00C1, 0x00D1, 0x00E1, 0x00F1, 0x0101,
+    0x0111, 0x0121, 0x0131, 0x0141, 0x0151, 0x0161, 0x0171, 0x0181,
+    0x0191, 0x01A1, 0x1001, 0x1002, 0x1003, 0x1011, 0x1012, 0x1021,
+    0x1031, 0x1041, 0x1051, 0x1061, 0x1071, 0x1081, 0x1091, 0x10A1,
+    0x10B1, 0x10C1, 0x10D1, 0x10E1, 0x10F1, 0x1101, 0x1111, 0x1121,
+    0x1131, 0x1141, 0x1151, 0x1161, 0x1171, 0x1181, 0x1191, 0x11A1,
+    0x11B1, 0x11C1, 0x11D1, 0x11E1, 0x11F1, 0x1201, 0x1211, 0x1221,
+    0x1231, 0x1241, 0x1251, 0x1261, 0x1271, 0x1281, 0x1BFF,
+};
+
+static const uint8_t clv_ac_codes[NUM_AC_CODES] = {
+    0x02, 0x0F, 0x15, 0x17, 0x1F, 0x25, 0x24, 0x21,
+    0x20, 0x07, 0x06, 0x20, 0x06, 0x14, 0x1E, 0x0F,
+    0x21, 0x50, 0x0E, 0x1D, 0x0E, 0x51, 0x0D, 0x23,
+    0x0D, 0x0C, 0x22, 0x52, 0x0B, 0x0C, 0x53, 0x13,
+    0x0B, 0x54, 0x12, 0x0A, 0x11, 0x09, 0x10, 0x08,
+    0x16, 0x55, 0x15, 0x14, 0x1C, 0x1B, 0x21, 0x20,
+    0x1F, 0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x22, 0x23,
+    0x56, 0x57, 0x07, 0x19, 0x05, 0x0F, 0x04, 0x0E,
+    0x0D, 0x0C, 0x13, 0x12, 0x11, 0x10, 0x1A, 0x19,
+    0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x18, 0x17,
+    0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x07, 0x06,
+    0x05, 0x04, 0x24, 0x25, 0x26, 0x27, 0x58, 0x59,
+    0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x03,
+};
+
+static const uint8_t clv_ac_bits[NUM_AC_CODES] = {
+     2,  4,  6,  7,  8,  9,  9, 10,
+    10, 11, 11, 11,  3,  6,  8, 10,
+    11, 12,  4,  8, 10, 12,  5,  9,
+    10,  5,  9, 12,  5, 10, 12,  6,
+    10, 12,  6, 10,  6, 10,  6, 10,
+     7, 12,  7,  7,  8,  8,  9,  9,
+     9,  9,  9,  9,  9,  9, 11, 11,
+    12, 12,  4,  9, 11,  6, 11,  6,
+     6,  6,  7,  7,  7,  7,  8,  8,
+     8,  8,  8,  8,  8,  8,  9,  9,
+     9,  9,  9,  9,  9,  9, 10, 10,
+    10, 10, 11, 11, 11, 11, 12, 12,
+    12, 12, 12, 12, 12, 12,  7,
+};
+
+static const uint8_t clv_flagsy_0_bits[] = {
+     3,  4,  4,  4,  4,  4,  6,  5,  4,  7,  4,  5,  4,  7,  5,  2,
+};
+
+static const uint16_t clv_flagsy_0_codes[] = {
+    0x0002, 0x0009, 0x000B, 0x0006, 0x000C, 0x0007, 0x003E, 0x001C,
+    0x000D, 0x007E, 0x000A, 0x001D, 0x0008, 0x007F, 0x001E, 0x0000,
+};
+
+static const uint8_t clv_flagsy_1_bits[] = {
+     2,  4,  4,  3,  4,  4,  7,  6,  4,  6,  4,  6,  4,  8,  8,  3,
+};
+
+static const uint16_t clv_flagsy_1_codes[] = {
+    0x0000, 0x000A, 0x000C, 0x0003, 0x000B, 0x0009, 0x007E, 0x003D,
+    0x000D, 0x003E, 0x000E, 0x003C, 0x0008, 0x00FE, 0x00FF, 0x0002,
+};
+
+static const uint8_t clv_flagsy_2_bits[] = {
+     1,  4,  4,  4,  4,  5,  7,  5,  4,  6,  5,  8,  4,  9, 10, 10,
+};
+
+static const uint16_t clv_flagsy_2_codes[] = {
+    0x0000, 0x000C, 0x000B, 0x0008, 0x000A, 0x001C, 0x007E, 0x001D,
+    0x000D, 0x003E, 0x001E, 0x00FE, 0x0009, 0x01FE, 0x03FE, 0x03FF,
+};
+
+static const uint8_t clv_flagsu_0_bits[] = {
+     1,  4,  4,  4,  5,  5,  9,  7,  5,  9,  4,  7,  4,  8,  7,  4,
+};
+
+static const uint16_t clv_flagsu_0_codes[] = {
+    0x0000, 0x000B, 0x000D, 0x0009, 0x001D, 0x001C, 0x01FF, 0x007D,
+    0x001E, 0x01FE, 0x000C, 0x007C, 0x000A, 0x00FE, 0x007E, 0x0008,
+};
+
+static const uint8_t clv_flagsu_1_bits[] = {
+     1,  4,  4,  4,  4,  4,  8,  6,  4,  8,  5,  8,  4, 10,  9, 10,
+};
+
+static const uint16_t clv_flagsu_1_codes[] = {
+    0x0000, 0x000C, 0x0008, 0x000A, 0x000B, 0x000E, 0x00FD, 0x003E,
+    0x000D, 0x00FC, 0x001E, 0x00FE, 0x0009, 0x03FE, 0x01FE, 0x03FF,
+};
+
+static const uint8_t clv_flagsv_0_bits[] = {
+     1,  4,  5,  4,  5,  5,  8, 10,  5,  9,  5,  6,  4, 10,  7,  3,
+};
+
+static const uint16_t clv_flagsv_0_codes[] = {
+    0x0000, 0x000A, 0x001B, 0x000C, 0x001E, 0x001C, 0x00FE, 0x03FE,
+    0x001D, 0x01FE, 0x001A, 0x003E, 0x000B, 0x03FF, 0x007E, 0x0004,
+};
+
+static const uint8_t clv_flagsv_1_bits[] = {
+     1,  4,  4,  4,  4,  5,  8,  6,  3,  7,  5, 10,  5, 11,  9, 11,
+};
+
+static const uint16_t clv_flagsv_1_codes[] = {
+    0x0000, 0x000D, 0x000C, 0x000A, 0x000B, 0x001D, 0x00FE, 0x003E,
+    0x0004, 0x007E, 0x001E, 0x03FE, 0x001C, 0x07FE, 0x01FE, 0x07FF,
+};
+
+static const uint8_t clv_mvy_0_bits[] = {
+    16, 14, 13, 13, 13, 12, 11, 11,  9, 11, 11, 12, 13, 13, 13, 14,
+    16, 15, 14, 14, 14, 13, 13, 12, 10,  7, 10, 12, 13, 13, 14, 14,
+    14, 15, 15, 14, 14, 14, 13, 13, 11, 10,  7, 10, 11, 13, 13, 14,
+    14, 14, 15, 15, 14, 14, 13, 13, 12, 11, 10,  7, 10, 11, 12, 13,
+    13, 14, 14, 15, 16, 15, 14, 12, 12, 12, 11, 10,  6, 10, 11, 12,
+    12, 12, 14, 15, 16, 15, 14, 13, 13, 12, 11, 10,  9,  6,  9, 10,
+    11, 12, 13, 13, 14, 15, 14, 14, 13, 12, 12, 11, 10,  8,  6,  8,
+    10, 11, 12, 12, 13, 14, 14, 14, 13, 13, 13, 11, 11,  9,  7,  4,
+     7,  9, 11, 11, 12, 13, 13, 14, 11, 10, 10,  9,  9,  8,  7,  5,
+     1,  5,  7,  8,  9,  9, 10, 10, 11, 14, 13, 13, 12, 11, 11,  9,
+     7,  4,  7,  9, 11, 11, 13, 13, 13, 14, 14, 14, 13, 12, 12, 11,
+    10,  8,  6,  8, 10, 11, 12, 12, 13, 14, 14, 15, 14, 13, 13, 12,
+    11, 10,  9,  7,  9, 10, 11, 12, 13, 13, 14, 15, 16, 15, 14, 12,
+    12, 12, 11, 10,  6, 10, 11, 12, 12, 12, 14, 15, 16, 15, 14, 14,
+    13, 13, 12, 11, 10,  7, 10, 11, 12, 13, 13, 14, 14, 15, 15, 14,
+    14, 14, 13, 13, 11, 10,  7, 10, 11, 13, 13, 14, 14, 14, 15, 15,
+    14, 14, 14, 13, 13, 12, 10,  7, 10, 12, 13, 13, 14, 14, 14, 15,
+    16, 14, 13, 13, 13, 12, 11, 11,  9, 11, 11, 12, 13, 13, 13, 14,
+    16,  6,
+};
+
+static const uint16_t clv_mvy_0_codes[] = {
+    0xFFFD, 0x3FE5, 0x1FD8, 0x1FC4, 0x1FBC, 0x0FCB, 0x07CF, 0x07C4,
+    0x01D7, 0x07C6, 0x07CE, 0x0FCA, 0x1FBD, 0x1FC2, 0x1FD9, 0x3FE4,
+    0xFFFE, 0x7FF0, 0x3FEF, 0x3FD2, 0x3FC9, 0x1FCC, 0x1FC0, 0x0FB6,
+    0x03D6, 0x0070, 0x03D7, 0x0FB7, 0x1FC1, 0x1FCD, 0x3FCB, 0x3FD0,
+    0x3FED, 0x7FF2, 0x7FFB, 0x3FDC, 0x3FD9, 0x3FD4, 0x1FB6, 0x1FAE,
+    0x07C0, 0x03BC, 0x006D, 0x03BD, 0x07C1, 0x1FAF, 0x1FB7, 0x3FD1,
+    0x3FDB, 0x3FDF, 0x7FF9, 0x7FEE, 0x3FF0, 0x3FC7, 0x1FC9, 0x1FA7,
+    0x0FAD, 0x07D2, 0x03CE, 0x006C, 0x03CF, 0x07D0, 0x0FAF, 0x1FA6,
+    0x1FC6, 0x3FC4, 0x3FF1, 0x7FED, 0xFFFB, 0x7FF6, 0x3FE6, 0x0FCC,
+    0x0FC4, 0x0FB0, 0x07B0, 0x03C6, 0x0031, 0x03C7, 0x07B1, 0x0FB1,
+    0x0FC5, 0x0FCD, 0x3FEA, 0x7FF7, 0xFFF9, 0x7FE9, 0x3FCE, 0x1FCF,
+    0x1FB2, 0x0FB8, 0x07BC, 0x03D0, 0x01DA, 0x002F, 0x01DB, 0x03D1,
+    0x07BE, 0x0FBA, 0x1FB4, 0x1FD0, 0x3FCD, 0x7FEB, 0x3FE1, 0x3FC1,
+    0x1FD3, 0x0FC3, 0x0FBE, 0x07B6, 0x03C4, 0x00E4, 0x002D, 0x00E5,
+    0x03C5, 0x07B7, 0x0FBF, 0x0FC1, 0x1FD2, 0x3FC3, 0x3FE2, 0x3FBF,
+    0x1FDB, 0x1FAD, 0x1FA5, 0x07CB, 0x07BB, 0x01D5, 0x0068, 0x0008,
+    0x0065, 0x01D2, 0x07B8, 0x07C8, 0x0FD0, 0x1FAA, 0x1FDA, 0x3FBC,
+    0x07D4, 0x03CA, 0x03C0, 0x01D8, 0x01D0, 0x00E6, 0x0069, 0x0014,
+    0x0000, 0x0015, 0x006A, 0x00E7, 0x01D1, 0x01D9, 0x03C1, 0x03CB,
+    0x07D5, 0x3FBE, 0x1FDC, 0x1FAB, 0x0FD1, 0x07C9, 0x07B9, 0x01D3,
+    0x0066, 0x0009, 0x0067, 0x01D4, 0x07BA, 0x07CA, 0x1FA4, 0x1FAC,
+    0x1FDD, 0x3FBD, 0x3FE0, 0x3FC0, 0x1FD5, 0x0FC0, 0x0FBC, 0x07B4,
+    0x03C2, 0x00E2, 0x002C, 0x00E3, 0x03C3, 0x07B5, 0x0FBD, 0x0FC2,
+    0x1FD7, 0x3FC2, 0x3FE3, 0x7FEA, 0x3FCC, 0x1FCE, 0x1FB3, 0x0FB9,
+    0x07BD, 0x03D2, 0x01DC, 0x0064, 0x01DD, 0x03D3, 0x07BF, 0x0FBB,
+    0x1FB5, 0x1FD1, 0x3FCF, 0x7FE8, 0xFFFA, 0x7FF4, 0x3FEB, 0x0FCE,
+    0x0FC6, 0x0FB2, 0x07B2, 0x03C8, 0x0030, 0x03C9, 0x07B3, 0x0FB3,
+    0x0FC7, 0x0FCF, 0x3FE9, 0x7FF5, 0xFFF8, 0x7FF3, 0x3FF3, 0x3FC6,
+    0x1FC8, 0x1FA8, 0x0FAC, 0x07D1, 0x03CC, 0x006B, 0x03CD, 0x07D3,
+    0x0FAE, 0x1FA9, 0x1FC7, 0x3FC5, 0x3FF2, 0x7FEC, 0x7FFA, 0x3FDE,
+    0x3FDA, 0x3FD7, 0x1FB9, 0x1FB0, 0x07C2, 0x03BE, 0x006E, 0x03BF,
+    0x07C3, 0x1FB1, 0x1FB8, 0x3FD3, 0x3FD8, 0x3FDD, 0x7FF8, 0x7FEF,
+    0x3FEE, 0x3FD6, 0x3FC8, 0x1FCB, 0x1FBE, 0x0FB5, 0x03D4, 0x006F,
+    0x03D5, 0x0FB4, 0x1FBF, 0x1FCA, 0x3FCA, 0x3FD5, 0x3FEC, 0x7FF1,
+    0xFFFF, 0x3FE8, 0x1FD4, 0x1FC5, 0x1FBA, 0x0FC9, 0x07CD, 0x07C7,
+    0x01D6, 0x07C5, 0x07CC, 0x0FC8, 0x1FBB, 0x1FC3, 0x1FD6, 0x3FE7,
+    0xFFFC, 0x002E,
+};
+
+static const uint16_t clv_mvy_0_syms[] = {
+    0xF8F8, 0xF9F8, 0xFAF8, 0xFBF8, 0xFCF8, 0xFDF8, 0xFEF8, 0xFFF8,
+    0x00F8, 0x01F8, 0x02F8, 0x03F8, 0x04F8, 0x05F8, 0x06F8, 0x07F8,
+    0x08F8, 0xF8F9, 0xF9F9, 0xFAF9, 0xFBF9, 0xFCF9, 0xFDF9, 0xFEF9,
+    0xFFF9, 0x00F9, 0x01F9, 0x02F9, 0x03F9, 0x04F9, 0x05F9, 0x06F9,
+    0x07F9, 0x08F9, 0xF8FA, 0xF9FA, 0xFAFA, 0xFBFA, 0xFCFA, 0xFDFA,
+    0xFEFA, 0xFFFA, 0x00FA, 0x01FA, 0x02FA, 0x03FA, 0x04FA, 0x05FA,
+    0x06FA, 0x07FA, 0x08FA, 0xF8FB, 0xF9FB, 0xFAFB, 0xFBFB, 0xFCFB,
+    0xFDFB, 0xFEFB, 0xFFFB, 0x00FB, 0x01FB, 0x02FB, 0x03FB, 0x04FB,
+    0x05FB, 0x06FB, 0x07FB, 0x08FB, 0xF8FC, 0xF9FC, 0xFAFC, 0xFBFC,
+    0xFCFC, 0xFDFC, 0xFEFC, 0xFFFC, 0x00FC, 0x01FC, 0x02FC, 0x03FC,
+    0x04FC, 0x05FC, 0x06FC, 0x07FC, 0x08FC, 0xF8FD, 0xF9FD, 0xFAFD,
+    0xFBFD, 0xFCFD, 0xFDFD, 0xFEFD, 0xFFFD, 0x00FD, 0x01FD, 0x02FD,
+    0x03FD, 0x04FD, 0x05FD, 0x06FD, 0x07FD, 0x08FD, 0xF8FE, 0xF9FE,
+    0xFAFE, 0xFBFE, 0xFCFE, 0xFDFE, 0xFEFE, 0xFFFE, 0x00FE, 0x01FE,
+    0x02FE, 0x03FE, 0x04FE, 0x05FE, 0x06FE, 0x07FE, 0x08FE, 0xF8FF,
+    0xF9FF, 0xFAFF, 0xFBFF, 0xFCFF, 0xFDFF, 0xFEFF, 0xFFFF, 0x00FF,
+    0x01FF, 0x02FF, 0x03FF, 0x04FF, 0x05FF, 0x06FF, 0x07FF, 0x08FF,
+    0xF800, 0xF900, 0xFA00, 0xFB00, 0xFC00, 0xFD00, 0xFE00, 0xFF00,
+    0x0000, 0x0100, 0x0200, 0x0300, 0x0400, 0x0500, 0x0600, 0x0700,
+    0x0800, 0xF801, 0xF901, 0xFA01, 0xFB01, 0xFC01, 0xFD01, 0xFE01,
+    0xFF01, 0x0001, 0x0101, 0x0201, 0x0301, 0x0401, 0x0501, 0x0601,
+    0x0701, 0x0801, 0xF802, 0xF902, 0xFA02, 0xFB02, 0xFC02, 0xFD02,
+    0xFE02, 0xFF02, 0x0002, 0x0102, 0x0202, 0x0302, 0x0402, 0x0502,
+    0x0602, 0x0702, 0x0802, 0xF803, 0xF903, 0xFA03, 0xFB03, 0xFC03,
+    0xFD03, 0xFE03, 0xFF03, 0x0003, 0x0103, 0x0203, 0x0303, 0x0403,
+    0x0503, 0x0603, 0x0703, 0x0803, 0xF804, 0xF904, 0xFA04, 0xFB04,
+    0xFC04, 0xFD04, 0xFE04, 0xFF04, 0x0004, 0x0104, 0x0204, 0x0304,
+    0x0404, 0x0504, 0x0604, 0x0704, 0x0804, 0xF805, 0xF905, 0xFA05,
+    0xFB05, 0xFC05, 0xFD05, 0xFE05, 0xFF05, 0x0005, 0x0105, 0x0205,
+    0x0305, 0x0405, 0x0505, 0x0605, 0x0705, 0x0805, 0xF806, 0xF906,
+    0xFA06, 0xFB06, 0xFC06, 0xFD06, 0xFE06, 0xFF06, 0x0006, 0x0106,
+    0x0206, 0x0306, 0x0406, 0x0506, 0x0606, 0x0706, 0x0806, 0xF807,
+    0xF907, 0xFA07, 0xFB07, 0xFC07, 0xFD07, 0xFE07, 0xFF07, 0x0007,
+    0x0107, 0x0207, 0x0307, 0x0407, 0x0507, 0x0607, 0x0707, 0x0807,
+    0xF808, 0xF908, 0xFA08, 0xFB08, 0xFC08, 0xFD08, 0xFE08, 0xFF08,
+    0x0008, 0x0108, 0x0208, 0x0308, 0x0408, 0x0508, 0x0608, 0x0708,
+    0x0808, 0x0909,
+};
+
+static const uint8_t clv_mvy_1_bits[] = {
+    15, 15, 15, 15, 14, 14, 13, 13, 11,  9, 11, 13, 13, 14, 14, 15,
+    15, 15, 15, 15, 14, 14, 13, 13, 12, 12, 12, 10,  9, 10, 12, 12,
+    12, 13, 13, 14, 14, 15, 15, 15, 14, 14, 13, 13, 13, 12, 11,  8,
+    11, 12, 13, 13, 13, 14, 14, 15, 15, 14, 14, 14, 14, 13, 12, 12,
+    12, 10,  8, 10, 12, 12, 12, 13, 14, 14, 14, 14, 15, 14, 14, 13,
+    13, 12, 12, 11, 10,  8, 10, 11, 12, 12, 13, 13, 14, 14, 15, 14,
+    14, 13, 13, 13, 12, 12, 11,  9,  7,  9, 11, 12, 12, 13, 13, 13,
+    14, 14, 14, 14, 13, 13, 13, 12, 11, 10,  9,  7,  9, 10, 11, 12,
+    13, 13, 13, 14, 14, 14, 13, 13, 12, 12, 11, 11, 10,  8,  7,  8,
+    10, 11, 11, 12, 12, 13, 13, 14, 13, 13, 13, 12, 11, 11, 10,  9,
+     6,  4,  6,  9, 10, 11, 12, 12, 13, 13, 13, 12, 11, 10, 10, 10,
+     9,  9,  7,  5,  1,  5,  7,  9,  9, 10, 10, 10, 11, 12, 13, 13,
+    13, 12, 11, 11, 10,  9,  6,  4,  6,  9, 10, 11, 11, 12, 13, 13,
+    13, 14, 13, 13, 12, 12, 11, 11, 10,  8,  7,  8, 10, 11, 11, 12,
+    12, 13, 13, 14, 14, 14, 13, 13, 13, 12, 11, 10,  9,  7,  9, 10,
+    11, 12, 13, 13, 13, 14, 14, 14, 14, 13, 13, 13, 12, 12, 11,  9,
+     7,  9, 11, 12, 12, 13, 13, 13, 14, 14, 15, 14, 14, 13, 13, 12,
+    12, 11, 10,  8, 10, 11, 12, 12, 13, 13, 14, 14, 15, 14, 14, 14,
+    14, 13, 12, 12, 12, 10,  8, 10, 12, 12, 12, 13, 14, 14, 14, 14,
+    15, 15, 14, 14, 13, 13, 13, 12, 11,  8, 11, 12, 13, 13, 13, 14,
+    14, 15, 15, 15, 14, 14, 13, 13, 12, 12, 12, 10,  9, 10, 12, 12,
+    12, 13, 13, 14, 14, 15, 15, 15, 15, 15, 14, 14, 13, 13, 11,  9,
+    11, 13, 13, 14, 14, 15, 15, 15, 15,  5,
+};
+
+static const uint16_t clv_mvy_1_codes[] = {
+    0x7FF9, 0x7FF6, 0x7FEB, 0x7FE3, 0x3FCF, 0x3FB3, 0x1FBD, 0x1FA1,
+    0x07AD, 0x01CE, 0x07AF, 0x1FA0, 0x1FBB, 0x3FB0, 0x3FCC, 0x7FE2,
+    0x7FE9, 0x7FF4, 0x7FFB, 0x7FF1, 0x3FE7, 0x3FBD, 0x1FA5, 0x1F9B,
+    0x0FB4, 0x0FAF, 0x0FAA, 0x03CC, 0x01CD, 0x03CD, 0x0FAB, 0x0FAD,
+    0x0FB1, 0x1F9C, 0x1FA3, 0x3FBE, 0x3FE6, 0x7FF0, 0x7FFC, 0x7FE5,
+    0x3FB5, 0x3FAE, 0x1FB4, 0x1FAA, 0x1F97, 0x0F85, 0x07A2, 0x00DD,
+    0x07A3, 0x0F86, 0x1F99, 0x1FAD, 0x1FB2, 0x3FAC, 0x3FB7, 0x7FE4,
+    0x7FFD, 0x3FEA, 0x3FD8, 0x3FC3, 0x3FBB, 0x1FC9, 0x0FBC, 0x0F97,
+    0x0F8F, 0x03B8, 0x00DA, 0x03B9, 0x0F90, 0x0F98, 0x0FB9, 0x1FC6,
+    0x3FBA, 0x3FC0, 0x3FD9, 0x3FEB, 0x7FEF, 0x3FEE, 0x3FD7, 0x1FC3,
+    0x1F96, 0x0FC0, 0x0FA8, 0x07AA, 0x03BE, 0x00D9, 0x03BF, 0x07AB,
+    0x0FA7, 0x0FBF, 0x1F98, 0x1FC5, 0x3FD6, 0x3FEF, 0x7FEE, 0x3FDC,
+    0x3FCA, 0x1FBF, 0x1F8B, 0x1F87, 0x0FA2, 0x0F94, 0x07A5, 0x01D4,
+    0x0069, 0x01D5, 0x07A6, 0x0F95, 0x0FA3, 0x1F89, 0x1F8D, 0x1FC0,
+    0x3FC6, 0x3FDE, 0x3FE0, 0x3FD3, 0x1FB8, 0x1F8F, 0x1F84, 0x0F89,
+    0x07BC, 0x03C6, 0x01C6, 0x0067, 0x01C7, 0x03C7, 0x07BD, 0x0F87,
+    0x1F82, 0x1F8A, 0x1FB6, 0x3FD1, 0x3FE2, 0x3FC5, 0x1FCE, 0x1FAE,
+    0x0FB5, 0x0F8B, 0x07B4, 0x07B0, 0x03B4, 0x00DE, 0x0064, 0x00DF,
+    0x03B5, 0x07B1, 0x07B5, 0x0F8C, 0x0FB6, 0x1FAF, 0x1FD1, 0x3FCB,
+    0x1FD3, 0x1FCC, 0x1FA7, 0x0F9B, 0x07BE, 0x079C, 0x03C0, 0x01C8,
+    0x002E, 0x0008, 0x002F, 0x01C9, 0x03C1, 0x079D, 0x0F82, 0x0F9A,
+    0x1FA9, 0x1FCA, 0x1FD4, 0x0F9F, 0x07B6, 0x03C8, 0x03B2, 0x03B0,
+    0x01D6, 0x01D0, 0x006A, 0x0014, 0x0000, 0x0015, 0x006B, 0x01D1,
+    0x01D7, 0x03B1, 0x03B3, 0x03C9, 0x07B7, 0x0FA0, 0x1FD5, 0x1FCB,
+    0x1FAB, 0x0F9C, 0x07BF, 0x079E, 0x03C2, 0x01CA, 0x0030, 0x0009,
+    0x0031, 0x01CB, 0x03C3, 0x079F, 0x07C0, 0x0F9D, 0x1FAC, 0x1FCD,
+    0x1FD2, 0x3FC8, 0x1FD0, 0x1FB0, 0x0FB7, 0x0F8D, 0x07B8, 0x07B2,
+    0x03B6, 0x00E0, 0x0065, 0x00E1, 0x03B7, 0x07B3, 0x07B9, 0x0F8E,
+    0x0FB8, 0x1FB1, 0x1FCF, 0x3FC9, 0x3FE1, 0x3FD2, 0x1FB7, 0x1F8E,
+    0x1F83, 0x0F88, 0x07BA, 0x03C4, 0x01C4, 0x0066, 0x01C5, 0x03C5,
+    0x07BB, 0x0F8A, 0x1F85, 0x1F90, 0x1FB9, 0x3FD0, 0x3FE3, 0x3FDD,
+    0x3FC7, 0x1FC1, 0x1F91, 0x1F88, 0x0FA4, 0x0F96, 0x07A7, 0x01D2,
+    0x0068, 0x01D3, 0x07A4, 0x0F93, 0x0FA1, 0x1F86, 0x1F8C, 0x1FBE,
+    0x3FC4, 0x3FDF, 0x7FED, 0x3FEC, 0x3FD4, 0x1FC4, 0x1F92, 0x0FBD,
+    0x0FA5, 0x07A8, 0x03BC, 0x00D8, 0x03BD, 0x07A9, 0x0FA6, 0x0FBE,
+    0x1F93, 0x1FC2, 0x3FD5, 0x3FED, 0x7FEC, 0x3FE8, 0x3FDB, 0x3FC1,
+    0x3FB9, 0x1FC7, 0x0FBA, 0x0F9E, 0x0F91, 0x03BA, 0x00DB, 0x03BB,
+    0x0F92, 0x0F99, 0x0FBB, 0x1FC8, 0x3FB8, 0x3FC2, 0x3FDA, 0x3FE9,
+    0x7FFF, 0x7FE7, 0x3FB6, 0x3FAF, 0x1FB3, 0x1FA6, 0x1F94, 0x0F83,
+    0x07A0, 0x00DC, 0x07A1, 0x0F84, 0x1F95, 0x1FA8, 0x1FB5, 0x3FAD,
+    0x3FB4, 0x7FE6, 0x7FFE, 0x7FF3, 0x3FE5, 0x3FBC, 0x1FA4, 0x1F9D,
+    0x0FB2, 0x0FAE, 0x0FA9, 0x03CA, 0x01CC, 0x03CB, 0x0FAC, 0x0FB0,
+    0x0FB3, 0x1F9A, 0x1FA2, 0x3FBF, 0x3FE4, 0x7FF2, 0x7FF8, 0x7FF5,
+    0x7FEA, 0x7FE0, 0x3FCD, 0x3FB1, 0x1FBA, 0x1F9F, 0x07AE, 0x01CF,
+    0x07AC, 0x1F9E, 0x1FBC, 0x3FB2, 0x3FCE, 0x7FE1, 0x7FE8, 0x7FF7,
+    0x7FFA, 0x0016,
+};
+
+static const uint16_t clv_mvy_1_syms[] = {
+    0xF7F7, 0xF8F7, 0xF9F7, 0xFAF7, 0xFBF7, 0xFCF7, 0xFDF7, 0xFEF7,
+    0xFFF7, 0x00F7, 0x01F7, 0x02F7, 0x03F7, 0x04F7, 0x05F7, 0x06F7,
+    0x07F7, 0x08F7, 0x09F7, 0xF7F8, 0xF8F8, 0xF9F8, 0xFAF8, 0xFBF8,
+    0xFCF8, 0xFDF8, 0xFEF8, 0xFFF8, 0x00F8, 0x01F8, 0x02F8, 0x03F8,
+    0x04F8, 0x05F8, 0x06F8, 0x07F8, 0x08F8, 0x09F8, 0xF7F9, 0xF8F9,
+    0xF9F9, 0xFAF9, 0xFBF9, 0xFCF9, 0xFDF9, 0xFEF9, 0xFFF9, 0x00F9,
+    0x01F9, 0x02F9, 0x03F9, 0x04F9, 0x05F9, 0x06F9, 0x07F9, 0x08F9,
+    0x09F9, 0xF7FA, 0xF8FA, 0xF9FA, 0xFAFA, 0xFBFA, 0xFCFA, 0xFDFA,
+    0xFEFA, 0xFFFA, 0x00FA, 0x01FA, 0x02FA, 0x03FA, 0x04FA, 0x05FA,
+    0x06FA, 0x07FA, 0x08FA, 0x09FA, 0xF7FB, 0xF8FB, 0xF9FB, 0xFAFB,
+    0xFBFB, 0xFCFB, 0xFDFB, 0xFEFB, 0xFFFB, 0x00FB, 0x01FB, 0x02FB,
+    0x03FB, 0x04FB, 0x05FB, 0x06FB, 0x07FB, 0x08FB, 0x09FB, 0xF7FC,
+    0xF8FC, 0xF9FC, 0xFAFC, 0xFBFC, 0xFCFC, 0xFDFC, 0xFEFC, 0xFFFC,
+    0x00FC, 0x01FC, 0x02FC, 0x03FC, 0x04FC, 0x05FC, 0x06FC, 0x07FC,
+    0x08FC, 0x09FC, 0xF7FD, 0xF8FD, 0xF9FD, 0xFAFD, 0xFBFD, 0xFCFD,
+    0xFDFD, 0xFEFD, 0xFFFD, 0x00FD, 0x01FD, 0x02FD, 0x03FD, 0x04FD,
+    0x05FD, 0x06FD, 0x07FD, 0x08FD, 0x09FD, 0xF7FE, 0xF8FE, 0xF9FE,
+    0xFAFE, 0xFBFE, 0xFCFE, 0xFDFE, 0xFEFE, 0xFFFE, 0x00FE, 0x01FE,
+    0x02FE, 0x03FE, 0x04FE, 0x05FE, 0x06FE, 0x07FE, 0x08FE, 0x09FE,
+    0xF7FF, 0xF8FF, 0xF9FF, 0xFAFF, 0xFBFF, 0xFCFF, 0xFDFF, 0xFEFF,
+    0xFFFF, 0x00FF, 0x01FF, 0x02FF, 0x03FF, 0x04FF, 0x05FF, 0x06FF,
+    0x07FF, 0x08FF, 0x09FF, 0xF700, 0xF800, 0xF900, 0xFA00, 0xFB00,
+    0xFC00, 0xFD00, 0xFE00, 0xFF00, 0x0000, 0x0100, 0x0200, 0x0300,
+    0x0400, 0x0500, 0x0600, 0x0700, 0x0800, 0x0900, 0xF701, 0xF801,
+    0xF901, 0xFA01, 0xFB01, 0xFC01, 0xFD01, 0xFE01, 0xFF01, 0x0001,
+    0x0101, 0x0201, 0x0301, 0x0401, 0x0501, 0x0601, 0x0701, 0x0801,
+    0x0901, 0xF702, 0xF802, 0xF902, 0xFA02, 0xFB02, 0xFC02, 0xFD02,
+    0xFE02, 0xFF02, 0x0002, 0x0102, 0x0202, 0x0302, 0x0402, 0x0502,
+    0x0602, 0x0702, 0x0802, 0x0902, 0xF703, 0xF803, 0xF903, 0xFA03,
+    0xFB03, 0xFC03, 0xFD03, 0xFE03, 0xFF03, 0x0003, 0x0103, 0x0203,
+    0x0303, 0x0403, 0x0503, 0x0603, 0x0703, 0x0803, 0x0903, 0xF704,
+    0xF804, 0xF904, 0xFA04, 0xFB04, 0xFC04, 0xFD04, 0xFE04, 0xFF04,
+    0x0004, 0x0104, 0x0204, 0x0304, 0x0404, 0x0504, 0x0604, 0x0704,
+    0x0804, 0x0904, 0xF705, 0xF805, 0xF905, 0xFA05, 0xFB05, 0xFC05,
+    0xFD05, 0xFE05, 0xFF05, 0x0005, 0x0105, 0x0205, 0x0305, 0x0405,
+    0x0505, 0x0605, 0x0705, 0x0805, 0x0905, 0xF706, 0xF806, 0xF906,
+    0xFA06, 0xFB06, 0xFC06, 0xFD06, 0xFE06, 0xFF06, 0x0006, 0x0106,
+    0x0206, 0x0306, 0x0406, 0x0506, 0x0606, 0x0706, 0x0806, 0x0906,
+    0xF707, 0xF807, 0xF907, 0xFA07, 0xFB07, 0xFC07, 0xFD07, 0xFE07,
+    0xFF07, 0x0007, 0x0107, 0x0207, 0x0307, 0x0407, 0x0507, 0x0607,
+    0x0707, 0x0807, 0x0907, 0xF708, 0xF808, 0xF908, 0xFA08, 0xFB08,
+    0xFC08, 0xFD08, 0xFE08, 0xFF08, 0x0008, 0x0108, 0x0208, 0x0308,
+    0x0408, 0x0508, 0x0608, 0x0708, 0x0808, 0x0908, 0xF709, 0xF809,
+    0xF909, 0xFA09, 0xFB09, 0xFC09, 0xFD09, 0xFE09, 0xFF09, 0x0009,
+    0x0109, 0x0209, 0x0309, 0x0409, 0x0509, 0x0609, 0x0709, 0x0809,
+    0x0909, 0x0A0A,
+};
+
+static const uint8_t clv_mvy_2_bits[] = {
+    16, 16, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 13, 13, 12, 11,
+    12, 13, 13, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16,
+    15, 15, 15, 15, 15, 14, 14, 14, 14, 13, 13, 13, 13, 11, 10, 11,
+    13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 15, 16, 16, 15,
+    15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 11, 10, 11, 13,
+    13, 13, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 15, 15,
+    15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 12, 11,  9, 11, 12, 13,
+    13, 13, 14, 14, 14, 14, 14, 14, 15, 15, 15, 16, 16, 15, 15, 14,
+    14, 13, 13, 13, 13, 13, 13, 13, 12, 11, 11,  9, 11, 11, 12, 13,
+    13, 13, 13, 13, 13, 13, 14, 14, 14, 15, 16, 16, 15, 15, 15, 14,
+    13, 13, 13, 13, 13, 13, 13, 12, 11, 11,  9, 11, 11, 12, 13, 13,
+    13, 13, 13, 13, 13, 14, 15, 15, 15, 16, 16, 15, 15, 15, 15, 14,
+    14, 13, 13, 13, 12, 12, 12, 11, 11,  9, 11, 11, 12, 12, 12, 13,
+    13, 13, 14, 14, 15, 15, 15, 15, 16, 16, 15, 15, 14, 14, 14, 13,
+    13, 13, 12, 12, 12, 12, 11, 10,  8, 10, 11, 12, 12, 12, 12, 13,
+    13, 13, 14, 14, 14, 15, 15, 16, 16, 15, 14, 14, 14, 13, 13, 13,
+    13, 12, 12, 12, 11, 11, 10,  8, 10, 11, 11, 12, 12, 12, 13, 13,
+    13, 13, 14, 14, 14, 15, 16, 15, 15, 14, 14, 13, 13, 13, 13, 13,
+    12, 12, 12, 11, 10,  9,  8,  9, 10, 11, 12, 12, 12, 13, 13, 13,
+    13, 13, 14, 14, 15, 15, 15, 15, 14, 14, 13, 13, 13, 13, 13, 12,
+    12, 11, 11, 10, 10,  7, 10, 10, 11, 11, 12, 12, 13, 13, 13, 13,
+    13, 14, 14, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12,
+    11, 11, 10,  9,  7,  9, 10, 11, 11, 12, 12, 12, 12, 12, 13, 13,
+    14, 14, 14, 14, 15, 13, 13, 13, 13, 13, 12, 12, 12, 11, 11, 11,
+    11, 10,  8,  7,  8, 10, 11, 11, 11, 11, 12, 12, 12, 13, 13, 13,
+    13, 13, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10,
+     9,  8,  6,  8,  9, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 14,
+    14, 14, 16, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10,  9,  9,  8,
+     6,  4,  6,  8,  9,  9, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13,
+    16, 12, 12, 12, 12, 11, 11, 10, 10,  9,  9,  8,  8,  8,  7,  4,
+     2,  4,  7,  8,  8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 12, 12,
+    16, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10,  9,  9,  8,  6,  4,
+     6,  8,  9,  9, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 16, 14,
+    14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10,  9,  8,  6,  8,
+     9, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 14, 14, 14, 15, 13,
+    13, 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 10,  8,  7,  8, 10,
+    11, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 13, 15, 14, 14, 14,
+    14, 13, 13, 12, 12, 12, 12, 12, 11, 11, 10,  9,  7,  9, 10, 11,
+    11, 12, 12, 12, 12, 12, 13, 13, 14, 14, 14, 14, 15, 15, 14, 14,
+    13, 13, 13, 13, 13, 12, 12, 11, 11, 10, 10,  7, 10, 10, 11, 11,
+    12, 12, 13, 13, 13, 13, 13, 14, 14, 15, 15, 15, 15, 14, 14, 13,
+    13, 13, 13, 13, 12, 12, 12, 11, 10,  9,  8,  9, 10, 11, 12, 12,
+    12, 13, 13, 13, 13, 13, 14, 14, 15, 15, 16, 15, 14, 14, 14, 13,
+    13, 13, 13, 12, 12, 12, 11, 11, 10,  8, 10, 11, 11, 12, 12, 12,
+    13, 13, 13, 13, 14, 14, 14, 15, 16, 16, 15, 15, 14, 14, 14, 13,
+    13, 13, 12, 12, 12, 12, 11, 10,  8, 10, 11, 12, 12, 12, 12, 13,
+    13, 13, 14, 14, 14, 15, 15, 16, 16, 15, 15, 15, 15, 14, 14, 13,
+    13, 13, 12, 12, 12, 11, 11,  9, 11, 11, 12, 12, 12, 13, 13, 13,
+    14, 14, 15, 15, 15, 15, 16, 16, 15, 15, 15, 14, 13, 13, 13, 13,
+    13, 13, 13, 12, 11, 11,  9, 11, 11, 12, 13, 13, 13, 13, 13, 13,
+    13, 14, 15, 15, 15, 16, 16, 15, 15, 14, 14, 13, 13, 13, 13, 13,
+    13, 13, 12, 11, 11,  9, 11, 11, 12, 13, 13, 13, 13, 13, 13, 13,
+    14, 14, 14, 15, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 13,
+    13, 13, 12, 11,  9, 11, 12, 13, 13, 13, 14, 14, 14, 14, 14, 14,
+    15, 15, 15, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 13,
+    13, 13, 11, 10, 11, 13, 13, 13, 14, 14, 14, 14, 14, 14, 15, 15,
+    15, 15, 16, 16, 15, 15, 15, 15, 15, 14, 14, 14, 14, 13, 13, 13,
+    13, 12, 10, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
+    15, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 13, 13,
+    12, 11, 12, 13, 13, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 16,
+    16,  7,
+};
+
+static const uint16_t clv_mvy_2_codes[] = {
+    0xFFF5, 0xFFD8, 0x7FE6, 0x7FB9, 0x7FB5, 0x7FB0, 0x7FA0, 0x7F99,
+    0x7F93, 0x3FAA, 0x3F9B, 0x3F52, 0x1F76, 0x1EF5, 0x0F0B, 0x06F0,
+    0x0F08, 0x1EF0, 0x1F75, 0x3F53, 0x3F9A, 0x3FA8, 0x7F94, 0x7F98,
+    0x7F9E, 0x7FAE, 0x7FAF, 0x7FB7, 0x7FE9, 0xFFDB, 0xFFF6, 0xFFFD,
+    0x7FD9, 0x7FCC, 0x7FC6, 0x7F9C, 0x7F80, 0x3FA5, 0x3F80, 0x3F6A,
+    0x3F31, 0x1F54, 0x1F40, 0x1F11, 0x1F05, 0x075E, 0x0360, 0x075F,
+    0x1F07, 0x1F12, 0x1F43, 0x1F56, 0x3F33, 0x3F68, 0x3F83, 0x3FA6,
+    0x7F7F, 0x7F9A, 0x7FC9, 0x7FCA, 0x7FDB, 0xFFF8, 0xFFEC, 0x7FDE,
+    0x7FE2, 0x7FA6, 0x7F6F, 0x3FA1, 0x3F8D, 0x3F5C, 0x3F39, 0x3F21,
+    0x3F18, 0x1F58, 0x1F1E, 0x1EF1, 0x0740, 0x035A, 0x0741, 0x1EF2,
+    0x1F1F, 0x1F5A, 0x3F19, 0x3F22, 0x3F3B, 0x3F5E, 0x3F8E, 0x3FA3,
+    0x7F6B, 0x7FA2, 0x7FE3, 0x7FE1, 0xFFEE, 0xFFFC, 0x7FC3, 0x7FBC,
+    0x7F71, 0x3F96, 0x3F86, 0x3F7A, 0x3F72, 0x3F59, 0x3F46, 0x1F0A,
+    0x1EFD, 0x1ED0, 0x0F02, 0x0712, 0x019F, 0x0713, 0x0F03, 0x1ED3,
+    0x1EFF, 0x1F09, 0x3F4A, 0x3F5A, 0x3F76, 0x3F7B, 0x3F87, 0x3F97,
+    0x7F73, 0x7FBB, 0x7FBF, 0xFFFB, 0xFFEB, 0x7F88, 0x7F5C, 0x3F7C,
+    0x3F3C, 0x1F60, 0x1F4C, 0x1F14, 0x1F0C, 0x1F00, 0x1EF9, 0x1ED8,
+    0x0F42, 0x075A, 0x0714, 0x0186, 0x0715, 0x075B, 0x0F43, 0x1EDA,
+    0x1EFA, 0x1F01, 0x1F0E, 0x1F15, 0x1F4D, 0x1F62, 0x3F3D, 0x3F7D,
+    0x3FAC, 0x7F86, 0xFFE8, 0xFFE7, 0x7FA7, 0x7F8C, 0x7F68, 0x3F9C,
+    0x1F7C, 0x1F6C, 0x1F69, 0x1EEC, 0x1EE4, 0x1ED5, 0x1ECD, 0x0EF0,
+    0x0752, 0x06F6, 0x018C, 0x06F7, 0x0753, 0x0EF1, 0x1ECE, 0x1ED6,
+    0x1EE5, 0x1EED, 0x1F6A, 0x1F6E, 0x1F7D, 0x3F9F, 0x7F66, 0x7F8A,
+    0x7FA5, 0xFFE6, 0xFFDC, 0x7FDA, 0x7FC0, 0x7FAC, 0x7F61, 0x3F42,
+    0x3F0E, 0x1F45, 0x1F2C, 0x1ECA, 0x0F27, 0x0EF6, 0x0EEE, 0x072E,
+    0x06F4, 0x0185, 0x06F5, 0x072F, 0x0EEF, 0x0EF7, 0x0F28, 0x1ECB,
+    0x1F2F, 0x1F46, 0x3F0F, 0x3F40, 0x7F5F, 0x7FB4, 0x7FC2, 0x7FDC,
+    0xFFDA, 0xFFE0, 0x7F72, 0x7F63, 0x3F70, 0x3F1C, 0x3F16, 0x1F82,
+    0x1EE2, 0x1EB2, 0x0F4C, 0x0EFC, 0x0EE0, 0x0ED3, 0x0722, 0x036C,
+    0x00BF, 0x036D, 0x0723, 0x0ECC, 0x0EE1, 0x0EFF, 0x0F4D, 0x1EB3,
+    0x1EE3, 0x1F83, 0x3F17, 0x3F1F, 0x3F75, 0x7F65, 0x7F70, 0xFFE2,
+    0xFFD7, 0x7F76, 0x3F2B, 0x3F13, 0x3F0A, 0x1F33, 0x1F23, 0x1EB4,
+    0x1EA0, 0x0F46, 0x0F32, 0x0F1A, 0x0756, 0x0728, 0x0356, 0x00B0,
+    0x0357, 0x0729, 0x0757, 0x0F1B, 0x0F33, 0x0F47, 0x1EA1, 0x1EB5,
+    0x1F20, 0x1F30, 0x3F08, 0x3F10, 0x3F28, 0x7F77, 0xFFD5, 0x7FD5,
+    0x7FD1, 0x3F5D, 0x3F25, 0x1F34, 0x1F24, 0x1EE8, 0x1EBC, 0x1EA8,
+    0x0F3A, 0x0F2E, 0x0EE2, 0x071C, 0x0374, 0x01A0, 0x00AE, 0x01A1,
+    0x0375, 0x071D, 0x0EE3, 0x0F2F, 0x0F3C, 0x1EA9, 0x1EBD, 0x1EE9,
+    0x1F25, 0x1F36, 0x3F24, 0x3F61, 0x7FCE, 0x7FD2, 0x7F91, 0x7F7D,
+    0x3F6C, 0x3F34, 0x1F72, 0x1F61, 0x1EDD, 0x1EC5, 0x1EA5, 0x0F05,
+    0x0ED6, 0x0750, 0x073E, 0x0368, 0x034A, 0x0052, 0x034B, 0x0369,
+    0x073F, 0x0751, 0x0ED7, 0x0F07, 0x1EA7, 0x1EC7, 0x1EDF, 0x1F65,
+    0x1F70, 0x3F36, 0x3F6F, 0x7F7C, 0x7F8F, 0x3F90, 0x3F66, 0x3F58,
+    0x3F4E, 0x1F48, 0x1EBB, 0x0F40, 0x0F18, 0x0F10, 0x0EDA, 0x0ECF,
+    0x0732, 0x0704, 0x0354, 0x0190, 0x004F, 0x0191, 0x0355, 0x0705,
+    0x0733, 0x0ED0, 0x0EDB, 0x0F11, 0x0F19, 0x0F41, 0x1EB8, 0x1F4B,
+    0x3F4F, 0x3F55, 0x3F65, 0x3F92, 0x7F85, 0x1F51, 0x1F39, 0x1F2B,
+    0x1F18, 0x1EC2, 0x0F38, 0x0F14, 0x0ECA, 0x074C, 0x0736, 0x0700,
+    0x06FC, 0x0350, 0x00BA, 0x004D, 0x00BB, 0x0351, 0x06FD, 0x0701,
+    0x0737, 0x074D, 0x0ECB, 0x0F15, 0x0F39, 0x1EC3, 0x1F1B, 0x1F2E,
+    0x1F3A, 0x1F53, 0x7F82, 0x3F8A, 0x3F47, 0x3F2E, 0x1F5E, 0x1E9E,
+    0x0F24, 0x0F20, 0x0EC6, 0x0746, 0x0726, 0x070E, 0x0370, 0x035E,
+    0x018A, 0x00AC, 0x0021, 0x00AD, 0x018B, 0x035F, 0x0371, 0x070F,
+    0x0727, 0x0747, 0x0EC7, 0x0F21, 0x0F25, 0x1E9F, 0x1F5F, 0x3F2D,
+    0x3F48, 0x3F8B, 0xFFF1, 0x1F78, 0x1F3D, 0x1EAD, 0x0F2B, 0x0EF9,
+    0x0EE9, 0x0739, 0x0719, 0x0709, 0x0363, 0x019B, 0x0195, 0x00B3,
+    0x0023, 0x0005, 0x0024, 0x00B4, 0x0196, 0x019C, 0x0364, 0x070A,
+    0x071A, 0x073A, 0x0EEA, 0x0EFA, 0x0F2C, 0x1EAE, 0x1F3E, 0x1F79,
+    0xFFF0, 0x0F0C, 0x0EE6, 0x0EDC, 0x0EC2, 0x0748, 0x0706, 0x0372,
+    0x034C, 0x0198, 0x0192, 0x00C0, 0x00BC, 0x00B6, 0x0053, 0x0006,
+    0x0000, 0x0007, 0x0054, 0x00B7, 0x00BD, 0x00C1, 0x0193, 0x0199,
+    0x034D, 0x0373, 0x0707, 0x0749, 0x0EC3, 0x0EDD, 0x0EE7, 0x0F0D,
+    0xFFF2, 0x1F7A, 0x1F3F, 0x1EAF, 0x0F2D, 0x0EFB, 0x0EEB, 0x073B,
+    0x071B, 0x070B, 0x0365, 0x019D, 0x0197, 0x00B5, 0x0025, 0x0004,
+    0x0022, 0x00B2, 0x0194, 0x019A, 0x0362, 0x0708, 0x0718, 0x0738,
+    0x0EE8, 0x0EF8, 0x0F2A, 0x1EAC, 0x1F3C, 0x1F7B, 0xFFF3, 0x3F89,
+    0x3F44, 0x3F2F, 0x1F5C, 0x1E9C, 0x0F22, 0x0F1E, 0x0EC4, 0x0744,
+    0x0724, 0x070C, 0x036E, 0x035C, 0x0188, 0x00AA, 0x0020, 0x00AB,
+    0x0189, 0x035D, 0x036F, 0x070D, 0x0725, 0x0745, 0x0EC5, 0x0F1F,
+    0x0F23, 0x1E9D, 0x1F5D, 0x3F2C, 0x3F45, 0x3F88, 0x7F81, 0x1F52,
+    0x1F38, 0x1F28, 0x1F19, 0x1EC0, 0x0F36, 0x0F12, 0x0EC8, 0x074A,
+    0x0734, 0x06FE, 0x06FA, 0x034E, 0x00B8, 0x004C, 0x00B9, 0x034F,
+    0x06FB, 0x06FF, 0x0735, 0x074B, 0x0EC9, 0x0F13, 0x0F37, 0x1EC1,
+    0x1F1A, 0x1F29, 0x1F3B, 0x1F50, 0x7F84, 0x3F91, 0x3F64, 0x3F54,
+    0x3F4C, 0x1F49, 0x1EB9, 0x0F3B, 0x0F16, 0x0F0E, 0x0ED8, 0x0ECD,
+    0x0730, 0x0702, 0x0352, 0x018E, 0x004E, 0x018F, 0x0353, 0x0703,
+    0x0731, 0x0ECE, 0x0ED9, 0x0F0F, 0x0F17, 0x0F3D, 0x1EBA, 0x1F4A,
+    0x3F4D, 0x3F51, 0x3F67, 0x3F93, 0x7F90, 0x7F7A, 0x3F6E, 0x3F37,
+    0x1F71, 0x1F63, 0x1EDC, 0x1EC4, 0x1EA4, 0x0F04, 0x0ED4, 0x074E,
+    0x073C, 0x0366, 0x0348, 0x0051, 0x0349, 0x0367, 0x073D, 0x074F,
+    0x0ED5, 0x0F06, 0x1EA6, 0x1EC6, 0x1EDE, 0x1F64, 0x1F73, 0x3F35,
+    0x3F6D, 0x7F7B, 0x7F8E, 0x7FD4, 0x7FD0, 0x3F5F, 0x3F26, 0x1F35,
+    0x1F27, 0x1EEA, 0x1EBE, 0x1EAA, 0x0F3E, 0x0F30, 0x0EE4, 0x071E,
+    0x0376, 0x01A2, 0x00AF, 0x01A3, 0x0377, 0x071F, 0x0EE5, 0x0F31,
+    0x0F3F, 0x1EAB, 0x1EBF, 0x1EEB, 0x1F26, 0x1F37, 0x3F27, 0x3F62,
+    0x7FCF, 0x7FD3, 0xFFD4, 0x7F78, 0x3F29, 0x3F11, 0x3F0B, 0x1F32,
+    0x1F22, 0x1EB6, 0x1EA2, 0x0F48, 0x0F34, 0x0F1C, 0x0758, 0x072A,
+    0x0358, 0x00B1, 0x0359, 0x072B, 0x0759, 0x0F1D, 0x0F35, 0x0F49,
+    0x1EA3, 0x1EB7, 0x1F21, 0x1F31, 0x3F09, 0x3F12, 0x3F2A, 0x7F79,
+    0xFFD6, 0xFFE1, 0x7F6D, 0x7F64, 0x3F73, 0x3F1D, 0x3F14, 0x1F81,
+    0x1EE0, 0x1EB0, 0x0F4A, 0x0EFD, 0x0EDE, 0x0ED1, 0x0720, 0x036A,
+    0x00BE, 0x036B, 0x0721, 0x0ED2, 0x0EDF, 0x0EFE, 0x0F4B, 0x1EB1,
+    0x1EE1, 0x1F7E, 0x3F15, 0x3F1E, 0x3F74, 0x7F62, 0x7F75, 0xFFE3,
+    0xFFDE, 0x7FDD, 0x7FBE, 0x7FB3, 0x7F60, 0x3F43, 0x3F0C, 0x1F47,
+    0x1F2D, 0x1EC8, 0x0F26, 0x0EF4, 0x0EEC, 0x072C, 0x06F2, 0x0184,
+    0x06F3, 0x072D, 0x0EED, 0x0EF5, 0x0F29, 0x1EC9, 0x1F2A, 0x1F44,
+    0x3F0D, 0x3F41, 0x7F5E, 0x7FB1, 0x7FC1, 0x7FD7, 0xFFDF, 0xFFEA,
+    0x7FA3, 0x7F8B, 0x7F69, 0x3F9E, 0x1F7F, 0x1F6D, 0x1F6B, 0x1EEE,
+    0x1EE6, 0x1ED4, 0x1ECF, 0x0EF2, 0x0754, 0x06F8, 0x018D, 0x06F9,
+    0x0755, 0x0EF3, 0x1ECC, 0x1ED7, 0x1EE7, 0x1EEF, 0x1F68, 0x1F6F,
+    0x1F80, 0x3F9D, 0x7F67, 0x7F8D, 0x7FA8, 0xFFE9, 0xFFE5, 0x7F89,
+    0x7F5D, 0x3F7F, 0x3F3F, 0x1F67, 0x1F4F, 0x1F17, 0x1F0F, 0x1F02,
+    0x1EFB, 0x1ED9, 0x0F45, 0x075C, 0x0716, 0x0187, 0x0717, 0x075D,
+    0x0F44, 0x1EDB, 0x1EF8, 0x1F03, 0x1F0D, 0x1F16, 0x1F4E, 0x1F66,
+    0x3F3E, 0x3F7E, 0x3FAD, 0x7F87, 0xFFE4, 0xFFF9, 0x7FC4, 0x7FBA,
+    0x7F6E, 0x3F95, 0x3F85, 0x3F78, 0x3F77, 0x3F5B, 0x3F49, 0x1F08,
+    0x1EFE, 0x1ED2, 0x0F01, 0x0710, 0x019E, 0x0711, 0x0F00, 0x1ED1,
+    0x1EFC, 0x1F0B, 0x3F4B, 0x3F57, 0x3F71, 0x3F79, 0x3F84, 0x3F94,
+    0x7F74, 0x7FBD, 0x7FC5, 0xFFFE, 0xFFED, 0x7FE0, 0x7FDF, 0x7FA4,
+    0x7F6A, 0x3FA0, 0x3F8F, 0x3F63, 0x3F3A, 0x3F23, 0x3F1A, 0x1F59,
+    0x1F1D, 0x1EF3, 0x0743, 0x035B, 0x0742, 0x1EF7, 0x1F1C, 0x1F5B,
+    0x3F1B, 0x3F20, 0x3F38, 0x3F60, 0x3F8C, 0x3FA2, 0x7F6C, 0x7FA9,
+    0x7FE5, 0x7FE4, 0xFFEF, 0xFFFF, 0x7FD6, 0x7FCB, 0x7FC7, 0x7F9B,
+    0x7F83, 0x3FA4, 0x3F81, 0x3F69, 0x3F30, 0x1F57, 0x1F41, 0x1F13,
+    0x1F06, 0x0EC0, 0x0361, 0x0EC1, 0x1F04, 0x1F10, 0x1F42, 0x1F55,
+    0x3F32, 0x3F6B, 0x3F82, 0x3FA7, 0x7F7E, 0x7F9D, 0x7FC8, 0x7FCD,
+    0x7FD8, 0xFFFA, 0xFFF7, 0xFFD9, 0x7FE8, 0x7FB6, 0x7FAB, 0x7FAA,
+    0x7FA1, 0x7F96, 0x7F95, 0x3FAB, 0x3F98, 0x3F50, 0x1F77, 0x1EF4,
+    0x0F0A, 0x06F1, 0x0F09, 0x1EF6, 0x1F74, 0x3F56, 0x3F99, 0x3FA9,
+    0x7F92, 0x7F97, 0x7F9F, 0x7FAD, 0x7FB2, 0x7FB8, 0x7FE7, 0xFFDD,
+    0xFFF4, 0x0050,
+};
+
+static const uint16_t clv_mvy_2_syms[] = {
+    0xF1F1, 0xF2F1, 0xF3F1, 0xF4F1, 0xF5F1, 0xF6F1, 0xF7F1, 0xF8F1,
+    0xF9F1, 0xFAF1, 0xFBF1, 0xFCF1, 0xFDF1, 0xFEF1, 0xFFF1, 0x00F1,
+    0x01F1, 0x02F1, 0x03F1, 0x04F1, 0x05F1, 0x06F1, 0x07F1, 0x08F1,
+    0x09F1, 0x0AF1, 0x0BF1, 0x0CF1, 0x0DF1, 0x0EF1, 0x0FF1, 0xF1F2,
+    0xF2F2, 0xF3F2, 0xF4F2, 0xF5F2, 0xF6F2, 0xF7F2, 0xF8F2, 0xF9F2,
+    0xFAF2, 0xFBF2, 0xFCF2, 0xFDF2, 0xFEF2, 0xFFF2, 0x00F2, 0x01F2,
+    0x02F2, 0x03F2, 0x04F2, 0x05F2, 0x06F2, 0x07F2, 0x08F2, 0x09F2,
+    0x0AF2, 0x0BF2, 0x0CF2, 0x0DF2, 0x0EF2, 0x0FF2, 0xF1F3, 0xF2F3,
+    0xF3F3, 0xF4F3, 0xF5F3, 0xF6F3, 0xF7F3, 0xF8F3, 0xF9F3, 0xFAF3,
+    0xFBF3, 0xFCF3, 0xFDF3, 0xFEF3, 0xFFF3, 0x00F3, 0x01F3, 0x02F3,
+    0x03F3, 0x04F3, 0x05F3, 0x06F3, 0x07F3, 0x08F3, 0x09F3, 0x0AF3,
+    0x0BF3, 0x0CF3, 0x0DF3, 0x0EF3, 0x0FF3, 0xF1F4, 0xF2F4, 0xF3F4,
+    0xF4F4, 0xF5F4, 0xF6F4, 0xF7F4, 0xF8F4, 0xF9F4, 0xFAF4, 0xFBF4,
+    0xFCF4, 0xFDF4, 0xFEF4, 0xFFF4, 0x00F4, 0x01F4, 0x02F4, 0x03F4,
+    0x04F4, 0x05F4, 0x06F4, 0x07F4, 0x08F4, 0x09F4, 0x0AF4, 0x0BF4,
+    0x0CF4, 0x0DF4, 0x0EF4, 0x0FF4, 0xF1F5, 0xF2F5, 0xF3F5, 0xF4F5,
+    0xF5F5, 0xF6F5, 0xF7F5, 0xF8F5, 0xF9F5, 0xFAF5, 0xFBF5, 0xFCF5,
+    0xFDF5, 0xFEF5, 0xFFF5, 0x00F5, 0x01F5, 0x02F5, 0x03F5, 0x04F5,
+    0x05F5, 0x06F5, 0x07F5, 0x08F5, 0x09F5, 0x0AF5, 0x0BF5, 0x0CF5,
+    0x0DF5, 0x0EF5, 0x0FF5, 0xF1F6, 0xF2F6, 0xF3F6, 0xF4F6, 0xF5F6,
+    0xF6F6, 0xF7F6, 0xF8F6, 0xF9F6, 0xFAF6, 0xFBF6, 0xFCF6, 0xFDF6,
+    0xFEF6, 0xFFF6, 0x00F6, 0x01F6, 0x02F6, 0x03F6, 0x04F6, 0x05F6,
+    0x06F6, 0x07F6, 0x08F6, 0x09F6, 0x0AF6, 0x0BF6, 0x0CF6, 0x0DF6,
+    0x0EF6, 0x0FF6, 0xF1F7, 0xF2F7, 0xF3F7, 0xF4F7, 0xF5F7, 0xF6F7,
+    0xF7F7, 0xF8F7, 0xF9F7, 0xFAF7, 0xFBF7, 0xFCF7, 0xFDF7, 0xFEF7,
+    0xFFF7, 0x00F7, 0x01F7, 0x02F7, 0x03F7, 0x04F7, 0x05F7, 0x06F7,
+    0x07F7, 0x08F7, 0x09F7, 0x0AF7, 0x0BF7, 0x0CF7, 0x0DF7, 0x0EF7,
+    0x0FF7, 0xF1F8, 0xF2F8, 0xF3F8, 0xF4F8, 0xF5F8, 0xF6F8, 0xF7F8,
+    0xF8F8, 0xF9F8, 0xFAF8, 0xFBF8, 0xFCF8, 0xFDF8, 0xFEF8, 0xFFF8,
+    0x00F8, 0x01F8, 0x02F8, 0x03F8, 0x04F8, 0x05F8, 0x06F8, 0x07F8,
+    0x08F8, 0x09F8, 0x0AF8, 0x0BF8, 0x0CF8, 0x0DF8, 0x0EF8, 0x0FF8,
+    0xF1F9, 0xF2F9, 0xF3F9, 0xF4F9, 0xF5F9, 0xF6F9, 0xF7F9, 0xF8F9,
+    0xF9F9, 0xFAF9, 0xFBF9, 0xFCF9, 0xFDF9, 0xFEF9, 0xFFF9, 0x00F9,
+    0x01F9, 0x02F9, 0x03F9, 0x04F9, 0x05F9, 0x06F9, 0x07F9, 0x08F9,
+    0x09F9, 0x0AF9, 0x0BF9, 0x0CF9, 0x0DF9, 0x0EF9, 0x0FF9, 0xF1FA,
+    0xF2FA, 0xF3FA, 0xF4FA, 0xF5FA, 0xF6FA, 0xF7FA, 0xF8FA, 0xF9FA,
+    0xFAFA, 0xFBFA, 0xFCFA, 0xFDFA, 0xFEFA, 0xFFFA, 0x00FA, 0x01FA,
+    0x02FA, 0x03FA, 0x04FA, 0x05FA, 0x06FA, 0x07FA, 0x08FA, 0x09FA,
+    0x0AFA, 0x0BFA, 0x0CFA, 0x0DFA, 0x0EFA, 0x0FFA, 0xF1FB, 0xF2FB,
+    0xF3FB, 0xF4FB, 0xF5FB, 0xF6FB, 0xF7FB, 0xF8FB, 0xF9FB, 0xFAFB,
+    0xFBFB, 0xFCFB, 0xFDFB, 0xFEFB, 0xFFFB, 0x00FB, 0x01FB, 0x02FB,
+    0x03FB, 0x04FB, 0x05FB, 0x06FB, 0x07FB, 0x08FB, 0x09FB, 0x0AFB,
+    0x0BFB, 0x0CFB, 0x0DFB, 0x0EFB, 0x0FFB, 0xF1FC, 0xF2FC, 0xF3FC,
+    0xF4FC, 0xF5FC, 0xF6FC, 0xF7FC, 0xF8FC, 0xF9FC, 0xFAFC, 0xFBFC,
+    0xFCFC, 0xFDFC, 0xFEFC, 0xFFFC, 0x00FC, 0x01FC, 0x02FC, 0x03FC,
+    0x04FC, 0x05FC, 0x06FC, 0x07FC, 0x08FC, 0x09FC, 0x0AFC, 0x0BFC,
+    0x0CFC, 0x0DFC, 0x0EFC, 0x0FFC, 0xF1FD, 0xF2FD, 0xF3FD, 0xF4FD,
+    0xF5FD, 0xF6FD, 0xF7FD, 0xF8FD, 0xF9FD, 0xFAFD, 0xFBFD, 0xFCFD,
+    0xFDFD, 0xFEFD, 0xFFFD, 0x00FD, 0x01FD, 0x02FD, 0x03FD, 0x04FD,
+    0x05FD, 0x06FD, 0x07FD, 0x08FD, 0x09FD, 0x0AFD, 0x0BFD, 0x0CFD,
+    0x0DFD, 0x0EFD, 0x0FFD, 0xF1FE, 0xF2FE, 0xF3FE, 0xF4FE, 0xF5FE,
+    0xF6FE, 0xF7FE, 0xF8FE, 0xF9FE, 0xFAFE, 0xFBFE, 0xFCFE, 0xFDFE,
+    0xFEFE, 0xFFFE, 0x00FE, 0x01FE, 0x02FE, 0x03FE, 0x04FE, 0x05FE,
+    0x06FE, 0x07FE, 0x08FE, 0x09FE, 0x0AFE, 0x0BFE, 0x0CFE, 0x0DFE,
+    0x0EFE, 0x0FFE, 0xF1FF, 0xF2FF, 0xF3FF, 0xF4FF, 0xF5FF, 0xF6FF,
+    0xF7FF, 0xF8FF, 0xF9FF, 0xFAFF, 0xFBFF, 0xFCFF, 0xFDFF, 0xFEFF,
+    0xFFFF, 0x00FF, 0x01FF, 0x02FF, 0x03FF, 0x04FF, 0x05FF, 0x06FF,
+    0x07FF, 0x08FF, 0x09FF, 0x0AFF, 0x0BFF, 0x0CFF, 0x0DFF, 0x0EFF,
+    0x0FFF, 0xF100, 0xF200, 0xF300, 0xF400, 0xF500, 0xF600, 0xF700,
+    0xF800, 0xF900, 0xFA00, 0xFB00, 0xFC00, 0xFD00, 0xFE00, 0xFF00,
+    0x0000, 0x0100, 0x0200, 0x0300, 0x0400, 0x0500, 0x0600, 0x0700,
+    0x0800, 0x0900, 0x0A00, 0x0B00, 0x0C00, 0x0D00, 0x0E00, 0x0F00,
+    0xF101, 0xF201, 0xF301, 0xF401, 0xF501, 0xF601, 0xF701, 0xF801,
+    0xF901, 0xFA01, 0xFB01, 0xFC01, 0xFD01, 0xFE01, 0xFF01, 0x0001,
+    0x0101, 0x0201, 0x0301, 0x0401, 0x0501, 0x0601, 0x0701, 0x0801,
+    0x0901, 0x0A01, 0x0B01, 0x0C01, 0x0D01, 0x0E01, 0x0F01, 0xF102,
+    0xF202, 0xF302, 0xF402, 0xF502, 0xF602, 0xF702, 0xF802, 0xF902,
+    0xFA02, 0xFB02, 0xFC02, 0xFD02, 0xFE02, 0xFF02, 0x0002, 0x0102,
+    0x0202, 0x0302, 0x0402, 0x0502, 0x0602, 0x0702, 0x0802, 0x0902,
+    0x0A02, 0x0B02, 0x0C02, 0x0D02, 0x0E02, 0x0F02, 0xF103, 0xF203,
+    0xF303, 0xF403, 0xF503, 0xF603, 0xF703, 0xF803, 0xF903, 0xFA03,
+    0xFB03, 0xFC03, 0xFD03, 0xFE03, 0xFF03, 0x0003, 0x0103, 0x0203,
+    0x0303, 0x0403, 0x0503, 0x0603, 0x0703, 0x0803, 0x0903, 0x0A03,
+    0x0B03, 0x0C03, 0x0D03, 0x0E03, 0x0F03, 0xF104, 0xF204, 0xF304,
+    0xF404, 0xF504, 0xF604, 0xF704, 0xF804, 0xF904, 0xFA04, 0xFB04,
+    0xFC04, 0xFD04, 0xFE04, 0xFF04, 0x0004, 0x0104, 0x0204, 0x0304,
+    0x0404, 0x0504, 0x0604, 0x0704, 0x0804, 0x0904, 0x0A04, 0x0B04,
+    0x0C04, 0x0D04, 0x0E04, 0x0F04, 0xF105, 0xF205, 0xF305, 0xF405,
+    0xF505, 0xF605, 0xF705, 0xF805, 0xF905, 0xFA05, 0xFB05, 0xFC05,
+    0xFD05, 0xFE05, 0xFF05, 0x0005, 0x0105, 0x0205, 0x0305, 0x0405,
+    0x0505, 0x0605, 0x0705, 0x0805, 0x0905, 0x0A05, 0x0B05, 0x0C05,
+    0x0D05, 0x0E05, 0x0F05, 0xF106, 0xF206, 0xF306, 0xF406, 0xF506,
+    0xF606, 0xF706, 0xF806, 0xF906, 0xFA06, 0xFB06, 0xFC06, 0xFD06,
+    0xFE06, 0xFF06, 0x0006, 0x0106, 0x0206, 0x0306, 0x0406, 0x0506,
+    0x0606, 0x0706, 0x0806, 0x0906, 0x0A06, 0x0B06, 0x0C06, 0x0D06,
+    0x0E06, 0x0F06, 0xF107, 0xF207, 0xF307, 0xF407, 0xF507, 0xF607,
+    0xF707, 0xF807, 0xF907, 0xFA07, 0xFB07, 0xFC07, 0xFD07, 0xFE07,
+    0xFF07, 0x0007, 0x0107, 0x0207, 0x0307, 0x0407, 0x0507, 0x0607,
+    0x0707, 0x0807, 0x0907, 0x0A07, 0x0B07, 0x0C07, 0x0D07, 0x0E07,
+    0x0F07, 0xF108, 0xF208, 0xF308, 0xF408, 0xF508, 0xF608, 0xF708,
+    0xF808, 0xF908, 0xFA08, 0xFB08, 0xFC08, 0xFD08, 0xFE08, 0xFF08,
+    0x0008, 0x0108, 0x0208, 0x0308, 0x0408, 0x0508, 0x0608, 0x0708,
+    0x0808, 0x0908, 0x0A08, 0x0B08, 0x0C08, 0x0D08, 0x0E08, 0x0F08,
+    0xF109, 0xF209, 0xF309, 0xF409, 0xF509, 0xF609, 0xF709, 0xF809,
+    0xF909, 0xFA09, 0xFB09, 0xFC09, 0xFD09, 0xFE09, 0xFF09, 0x0009,
+    0x0109, 0x0209, 0x0309, 0x0409, 0x0509, 0x0609, 0x0709, 0x0809,
+    0x0909, 0x0A09, 0x0B09, 0x0C09, 0x0D09, 0x0E09, 0x0F09, 0xF10A,
+    0xF20A, 0xF30A, 0xF40A, 0xF50A, 0xF60A, 0xF70A, 0xF80A, 0xF90A,
+    0xFA0A, 0xFB0A, 0xFC0A, 0xFD0A, 0xFE0A, 0xFF0A, 0x000A, 0x010A,
+    0x020A, 0x030A, 0x040A, 0x050A, 0x060A, 0x070A, 0x080A, 0x090A,
+    0x0A0A, 0x0B0A, 0x0C0A, 0x0D0A, 0x0E0A, 0x0F0A, 0xF10B, 0xF20B,
+    0xF30B, 0xF40B, 0xF50B, 0xF60B, 0xF70B, 0xF80B, 0xF90B, 0xFA0B,
+    0xFB0B, 0xFC0B, 0xFD0B, 0xFE0B, 0xFF0B, 0x000B, 0x010B, 0x020B,
+    0x030B, 0x040B, 0x050B, 0x060B, 0x070B, 0x080B, 0x090B, 0x0A0B,
+    0x0B0B, 0x0C0B, 0x0D0B, 0x0E0B, 0x0F0B, 0xF10C, 0xF20C, 0xF30C,
+    0xF40C, 0xF50C, 0xF60C, 0xF70C, 0xF80C, 0xF90C, 0xFA0C, 0xFB0C,
+    0xFC0C, 0xFD0C, 0xFE0C, 0xFF0C, 0x000C, 0x010C, 0x020C, 0x030C,
+    0x040C, 0x050C, 0x060C, 0x070C, 0x080C, 0x090C, 0x0A0C, 0x0B0C,
+    0x0C0C, 0x0D0C, 0x0E0C, 0x0F0C, 0xF10D, 0xF20D, 0xF30D, 0xF40D,
+    0xF50D, 0xF60D, 0xF70D, 0xF80D, 0xF90D, 0xFA0D, 0xFB0D, 0xFC0D,
+    0xFD0D, 0xFE0D, 0xFF0D, 0x000D, 0x010D, 0x020D, 0x030D, 0x040D,
+    0x050D, 0x060D, 0x070D, 0x080D, 0x090D, 0x0A0D, 0x0B0D, 0x0C0D,
+    0x0D0D, 0x0E0D, 0x0F0D, 0xF10E, 0xF20E, 0xF30E, 0xF40E, 0xF50E,
+    0xF60E, 0xF70E, 0xF80E, 0xF90E, 0xFA0E, 0xFB0E, 0xFC0E, 0xFD0E,
+    0xFE0E, 0xFF0E, 0x000E, 0x010E, 0x020E, 0x030E, 0x040E, 0x050E,
+    0x060E, 0x070E, 0x080E, 0x090E, 0x0A0E, 0x0B0E, 0x0C0E, 0x0D0E,
+    0x0E0E, 0x0F0E, 0xF10F, 0xF20F, 0xF30F, 0xF40F, 0xF50F, 0xF60F,
+    0xF70F, 0xF80F, 0xF90F, 0xFA0F, 0xFB0F, 0xFC0F, 0xFD0F, 0xFE0F,
+    0xFF0F, 0x000F, 0x010F, 0x020F, 0x030F, 0x040F, 0x050F, 0x060F,
+    0x070F, 0x080F, 0x090F, 0x0A0F, 0x0B0F, 0x0C0F, 0x0D0F, 0x0E0F,
+    0x0F0F, 0x1010,
+};
+
+static const uint8_t clv_mvy_3_bits[] = {
+    16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 14, 14, 13,
+    13, 12, 11, 12, 13, 13, 14, 14, 15, 15, 15, 15, 15, 15, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15,
+    15, 15, 14, 14, 14, 13, 12, 11, 12, 13, 14, 14, 14, 15, 15, 15,
+    15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15,
+    15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 12, 11, 12, 14, 14,
+    14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16,
+    16, 16, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13,
+    12, 11, 12, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15,
+    15, 16, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 13, 13, 12, 10, 12, 13, 13, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 15, 15, 15, 15,
+    14, 14, 14, 14, 13, 13, 13, 13, 13, 12, 12, 10, 12, 12, 13, 13,
+    13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 15, 15,
+    15, 15, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 11,
+    10, 11, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 15,
+    15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 13, 13, 13, 13, 12, 12,
+    12, 12, 12, 11, 10,  9, 10, 11, 12, 12, 12, 12, 12, 13, 13, 13,
+    13, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 13, 13,
+    12, 12, 12, 12, 12, 12, 12, 11, 11, 10,  9, 10, 11, 11, 12, 12,
+    12, 12, 12, 12, 12, 13, 13, 14, 14, 14, 15, 15, 15, 16, 15, 15,
+    15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 12, 11, 11, 11, 10,  9,
+    10, 11, 11, 11, 12, 12, 12, 12, 12, 13, 13, 14, 14, 15, 15, 15,
+    15, 16, 15, 15, 14, 14, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12,
+    11, 11, 11, 10,  9, 10, 11, 11, 11, 12, 12, 12, 12, 12, 13, 13,
+    14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 14, 14, 14, 13, 13, 12,
+    12, 12, 12, 12, 12, 11, 11, 11, 10,  8, 10, 11, 11, 11, 12, 12,
+    12, 12, 12, 12, 13, 13, 14, 14, 14, 15, 15, 15, 15, 15, 14, 14,
+    14, 14, 13, 13, 12, 12, 12, 12, 12, 11, 11, 11, 10, 10,  8, 10,
+    10, 11, 11, 11, 12, 12, 12, 12, 12, 13, 13, 14, 14, 14, 14, 15,
+    15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11,
+    10, 10,  9,  7,  9, 10, 10, 11, 11, 11, 12, 12, 12, 12, 13, 13,
+    13, 14, 14, 14, 14, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 12,
+    12, 12, 11, 11, 11, 10, 10,  9,  7,  9, 10, 10, 11, 11, 11, 12,
+    12, 12, 12, 13, 13, 13, 14, 14, 14, 14, 15, 15, 14, 14, 14, 14,
+    13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 10,  9,  9,  7,  9,  9,
+    10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 14, 14, 14, 14, 15,
+    15, 14, 14, 14, 13, 13, 13, 13, 12, 11, 11, 11, 11, 10, 10, 10,
+     9,  8,  6,  8,  9, 10, 10, 10, 11, 11, 11, 11, 12, 13, 13, 13,
+    13, 14, 14, 14, 15, 15, 14, 14, 14, 13, 13, 12, 12, 11, 11, 11,
+    11, 10, 10,  9,  9,  8,  6,  4,  6,  8,  9,  9, 10, 10, 11, 11,
+    11, 11, 12, 12, 13, 13, 14, 14, 14, 15, 13, 13, 13, 12, 12, 12,
+    12, 11, 10, 10, 10, 10,  9,  9,  8,  8,  7,  5,  2,  5,  7,  8,
+     8,  9,  9, 10, 10, 10, 10, 11, 12, 12, 12, 12, 13, 13, 13, 15,
+    14, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10,  9,  9,  8,
+     6,  4,  6,  8,  9,  9, 10, 10, 11, 11, 11, 11, 12, 12, 13, 13,
+    14, 14, 14, 15, 15, 14, 14, 14, 13, 13, 13, 13, 12, 11, 11, 11,
+    11, 10, 10, 10,  9,  8,  6,  8,  9, 10, 10, 10, 11, 11, 11, 11,
+    12, 13, 13, 13, 13, 14, 14, 14, 15, 15, 14, 14, 14, 14, 13, 13,
+    12, 12, 12, 12, 11, 11, 11, 11, 10,  9,  9,  7,  9,  9, 10, 11,
+    11, 11, 11, 12, 12, 12, 12, 13, 13, 14, 14, 14, 14, 15, 15, 14,
+    14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 10,  9,
+     7,  9, 10, 10, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 14, 14,
+    14, 14, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11,
+    11, 11, 10, 10,  9,  7,  9, 10, 10, 11, 11, 11, 12, 12, 12, 12,
+    13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 14, 14, 14, 14, 13, 13,
+    12, 12, 12, 12, 12, 11, 11, 11, 10, 10,  8, 10, 10, 11, 11, 11,
+    12, 12, 12, 12, 12, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 15,
+    14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 10,  8,
+    10, 11, 11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 14, 14, 14, 15,
+    15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12,
+    11, 11, 11, 10,  9, 10, 11, 11, 11, 12, 12, 12, 12, 12, 13, 13,
+    14, 14, 14, 14, 14, 15, 15, 16, 15, 15, 15, 15, 14, 14, 13, 13,
+    12, 12, 12, 12, 12, 11, 11, 11, 10,  9, 10, 11, 11, 11, 12, 12,
+    12, 12, 12, 13, 13, 14, 14, 15, 15, 15, 15, 16, 15, 15, 15, 14,
+    14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 12, 11, 11, 10,  9, 10,
+    11, 11, 12, 12, 12, 12, 12, 12, 12, 13, 13, 14, 14, 14, 15, 15,
+    15, 15, 15, 15, 15, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, 12,
+    12, 11, 10,  9, 10, 11, 12, 12, 12, 12, 12, 13, 13, 13, 13, 14,
+    14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14,
+    13, 13, 13, 13, 13, 13, 13, 11, 10, 11, 13, 13, 13, 13, 13, 13,
+    14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 15, 15,
+    15, 15, 14, 14, 14, 14, 13, 13, 13, 13, 13, 12, 12, 10, 12, 12,
+    13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16,
+    16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13,
+    13, 12, 10, 12, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15,
+    15, 15, 15, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 14,
+    14, 14, 14, 14, 13, 13, 12, 11, 12, 13, 13, 14, 14, 14, 14, 14,
+    15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 15, 15, 15,
+    15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 12, 11, 12, 14, 14,
+    14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 13,
+    12, 11, 12, 13, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15,
+    15, 14, 14, 13, 13, 12, 11, 12, 13, 13, 14, 14, 15, 15, 15, 15,
+    15, 15, 16, 16, 16, 16, 16, 16, 16,  7,
+};
+
+static const uint16_t clv_mvy_3_codes[] = {
+    0xFFF6, 0xFFEE, 0xFFDC, 0xFFD7, 0xFFB4, 0xFFAA, 0xFFA0, 0x7FCC,
+    0x7F8E, 0x7F7B, 0x7F77, 0x7F13, 0x7F11, 0x3F56, 0x3F48, 0x1F16,
+    0x1E94, 0x0F28, 0x06F0, 0x0F2A, 0x1E96, 0x1F1C, 0x3F46, 0x3F58,
+    0x7F15, 0x7F0E, 0x7F76, 0x7F80, 0x7F90, 0x7FC8, 0xFFA2, 0xFFA7,
+    0xFFB5, 0xFFD8, 0xFFDD, 0xFFEC, 0xFFF5, 0xFFF1, 0xFFEA, 0xFFE4,
+    0xFFD1, 0xFFC1, 0xFFA8, 0x7F70, 0x7F5C, 0x7F44, 0x7F40, 0x7F2A,
+    0x7F16, 0x7EDF, 0x3ED6, 0x3ECA, 0x3ECC, 0x1EB5, 0x0EDE, 0x06D3,
+    0x0EDF, 0x1EB6, 0x3ECE, 0x3ED0, 0x3ED8, 0x7EE5, 0x7F19, 0x7F31,
+    0x7F3E, 0x7F45, 0x7F5B, 0x7F6F, 0xFFA9, 0xFFC0, 0xFFCE, 0xFFE5,
+    0xFFE8, 0xFFF2, 0xFFFE, 0xFFE2, 0xFFBE, 0x7FBB, 0x7F75, 0x7F6B,
+    0x7F58, 0x7EF9, 0x7EDC, 0x3F68, 0x3F27, 0x3F28, 0x3EDF, 0x3ED2,
+    0x3EC1, 0x3EA6, 0x3E87, 0x0F0F, 0x06B3, 0x0F10, 0x3E89, 0x3EA8,
+    0x3EC3, 0x3ED5, 0x3EE0, 0x3F2B, 0x3F26, 0x3F67, 0x7EDA, 0x7EFB,
+    0x7F56, 0x7F6A, 0x7F72, 0x7FC1, 0xFFBC, 0xFFE0, 0xFFFF, 0xFFF8,
+    0xFFD3, 0xFFB9, 0x7FAC, 0x7F94, 0x7F8B, 0x7F62, 0x7F4A, 0x7F05,
+    0x7EEE, 0x3F44, 0x3EA4, 0x3E78, 0x3E6F, 0x3E54, 0x1EEF, 0x1EA9,
+    0x0ED4, 0x06A8, 0x0ED5, 0x1EA8, 0x1EEC, 0x3E51, 0x3E69, 0x3E75,
+    0x3EA3, 0x3F43, 0x7EF1, 0x7F04, 0x7F4B, 0x7F5D, 0x7F89, 0x7F95,
+    0x7FAA, 0xFFBB, 0xFFC8, 0xFFF9, 0xFFD9, 0xFFCA, 0x7FC3, 0x7F8C,
+    0x7F38, 0x7F02, 0x3F5A, 0x3F4A, 0x3F30, 0x3EFF, 0x3EF4, 0x3EE9,
+    0x3E95, 0x3E73, 0x3E43, 0x1F08, 0x1E81, 0x0E84, 0x0349, 0x0E85,
+    0x1E82, 0x1F07, 0x3E45, 0x3E74, 0x3E96, 0x3EEA, 0x3EF1, 0x3F00,
+    0x3F32, 0x3F4F, 0x3F5C, 0x7F01, 0x7F3B, 0x7F8D, 0x7FC2, 0xFFCF,
+    0xFFD6, 0xFFC7, 0xFFB1, 0xFFA6, 0x7FA1, 0x7F2F, 0x7F24, 0x7F0A,
+    0x3EF6, 0x3E97, 0x3E83, 0x3E7F, 0x1F04, 0x1EE1, 0x1ECB, 0x1EAE,
+    0x1E7E, 0x0EBA, 0x0E8C, 0x0333, 0x0E8D, 0x0EBB, 0x1E7F, 0x1EAF,
+    0x1ECC, 0x1EE2, 0x1F05, 0x3E80, 0x3E84, 0x3E98, 0x3EF9, 0x7F06,
+    0x7F1B, 0x7F2D, 0x7F9F, 0xFFAF, 0xFFB0, 0xFFC6, 0x7FB5, 0x7FB1,
+    0x7F36, 0x7F25, 0x3F63, 0x3F3A, 0x3F1B, 0x3EBB, 0x3E63, 0x3E5B,
+    0x3E40, 0x1F14, 0x1EF5, 0x1EEB, 0x1EBE, 0x1E92, 0x1E6A, 0x070C,
+    0x032D, 0x070D, 0x1E6B, 0x1E93, 0x1EBF, 0x1EE8, 0x1EF3, 0x1F15,
+    0x3E3F, 0x3E5C, 0x3E64, 0x3EB9, 0x3F1C, 0x3F3C, 0x3F62, 0x7F22,
+    0x7F39, 0x7FAF, 0x7FAE, 0x7FC6, 0x7F7F, 0x7F53, 0x7F32, 0x3F66,
+    0x3EF0, 0x3E65, 0x1EDA, 0x1ED3, 0x1E98, 0x1E74, 0x0F20, 0x0F1D,
+    0x0EEA, 0x0EA9, 0x0E70, 0x0718, 0x033E, 0x0179, 0x033F, 0x0719,
+    0x0E71, 0x0EAB, 0x0EE9, 0x0F1F, 0x0F22, 0x1E76, 0x1E9B, 0x1ED5,
+    0x1EDD, 0x3E68, 0x3EF3, 0x3F6C, 0x7F33, 0x7F4E, 0x7F7D, 0x7FCB,
+    0x7FBD, 0x7F21, 0x7EF6, 0x3F0C, 0x3EBE, 0x3EAE, 0x1EA3, 0x1E85,
+    0x0F0C, 0x0F06, 0x0EEE, 0x0EE6, 0x0ECA, 0x0EA2, 0x0E78, 0x0720,
+    0x06BA, 0x0326, 0x015D, 0x0327, 0x06BB, 0x0721, 0x0E79, 0x0EA3,
+    0x0ECB, 0x0EE7, 0x0EEF, 0x0F07, 0x0F0D, 0x1E86, 0x1EA4, 0x3EAF,
+    0x3EBF, 0x3F0A, 0x7EF8, 0x7F1E, 0x7FBF, 0xFF9F, 0x7FB3, 0x7F96,
+    0x7F0B, 0x7EF5, 0x3E85, 0x3E4E, 0x1EB3, 0x1E6E, 0x0F2D, 0x0EBD,
+    0x0E97, 0x0E93, 0x0E86, 0x071A, 0x06E4, 0x06BC, 0x0338, 0x015A,
+    0x0339, 0x06BD, 0x06E5, 0x071B, 0x0E87, 0x0E94, 0x0E98, 0x0EBE,
+    0x0F2E, 0x1E6F, 0x1EB4, 0x3E4F, 0x3E88, 0x7EF4, 0x7F07, 0x7F99,
+    0x7FB7, 0xFF9D, 0x7FA8, 0x7EEB, 0x3F5D, 0x3EB2, 0x3EAA, 0x3E5D,
+    0x3E49, 0x1EFE, 0x1E89, 0x0F16, 0x0F12, 0x0EE0, 0x0E7A, 0x0E6A,
+    0x070E, 0x06FA, 0x06B4, 0x0314, 0x0158, 0x0315, 0x06B5, 0x06FB,
+    0x070F, 0x0E6B, 0x0E7B, 0x0EE1, 0x0F13, 0x0F17, 0x1E8A, 0x1EFF,
+    0x3E4A, 0x3E5E, 0x3EAB, 0x3EB3, 0x3F59, 0x7EEC, 0x7FA2, 0x7F82,
+    0x7F5E, 0x7F28, 0x3EDA, 0x3EC9, 0x3E7A, 0x1ED6, 0x1ECE, 0x0EFC,
+    0x0EF0, 0x0E9A, 0x0E7E, 0x0E66, 0x0E5E, 0x0722, 0x06C0, 0x06A0,
+    0x02FA, 0x00A8, 0x02FB, 0x06A1, 0x06C1, 0x0723, 0x0E5F, 0x0E67,
+    0x0E7F, 0x0E9B, 0x0EF1, 0x0EFD, 0x1ECF, 0x1ED7, 0x3E79, 0x3ECD,
+    0x3EDB, 0x7F26, 0x7F5A, 0x7F83, 0x7F54, 0x7EE8, 0x3F54, 0x3F0E,
+    0x3EFB, 0x3E47, 0x1EC1, 0x1EB9, 0x0EF9, 0x0EF3, 0x0EC1, 0x0E8F,
+    0x0E74, 0x0714, 0x06F4, 0x06B0, 0x0336, 0x030A, 0x009F, 0x030B,
+    0x0337, 0x06B1, 0x06F5, 0x0715, 0x0E75, 0x0E91, 0x0EC3, 0x0EF6,
+    0x0EFB, 0x1EBB, 0x1EC3, 0x3E41, 0x3EF8, 0x3F10, 0x3F4D, 0x7EE9,
+    0x7F52, 0x7F9A, 0x3F3F, 0x3F1F, 0x3F03, 0x3EA0, 0x1F0F, 0x1E72,
+    0x1E62, 0x0EDA, 0x0ED2, 0x0EB2, 0x0E64, 0x0708, 0x06EA, 0x06DA,
+    0x0346, 0x032A, 0x0176, 0x004B, 0x0177, 0x032B, 0x0347, 0x06DB,
+    0x06EB, 0x0709, 0x0E65, 0x0EB3, 0x0ED3, 0x0EDB, 0x1E63, 0x1E6C,
+    0x1F0E, 0x3E9E, 0x3F01, 0x3F1D, 0x3F3D, 0x7F9B, 0x7EDE, 0x3F36,
+    0x3F2E, 0x3F07, 0x3E99, 0x1F0D, 0x1EA1, 0x1E8C, 0x0EB4, 0x0EAC,
+    0x0E5A, 0x0E50, 0x0702, 0x06D4, 0x06C6, 0x032E, 0x0310, 0x0162,
+    0x0046, 0x0163, 0x0311, 0x032F, 0x06C7, 0x06D5, 0x0703, 0x0E51,
+    0x0E5B, 0x0EAD, 0x0EB5, 0x1E8F, 0x1EA7, 0x1F0C, 0x3E9A, 0x3F02,
+    0x3F2D, 0x3F35, 0x7EE0, 0x7F66, 0x3F11, 0x3EE1, 0x3EBC, 0x3E56,
+    0x1EC4, 0x1E64, 0x0F04, 0x0EC7, 0x0E56, 0x0E4C, 0x06EC, 0x06DC,
+    0x069C, 0x0694, 0x0320, 0x016C, 0x0154, 0x0044, 0x0155, 0x016D,
+    0x0321, 0x0695, 0x069D, 0x06DD, 0x06ED, 0x0E4D, 0x0E57, 0x0EC4,
+    0x0F05, 0x1E65, 0x1EC5, 0x3E55, 0x3EB7, 0x3EE3, 0x3F13, 0x7F67,
+    0x7FA7, 0x3F49, 0x3F22, 0x3EE5, 0x1EF6, 0x1EE5, 0x1E9C, 0x1E78,
+    0x0EA4, 0x06F6, 0x06DE, 0x06CE, 0x06AA, 0x0340, 0x0318, 0x02FE,
+    0x015E, 0x009A, 0x001C, 0x009B, 0x015F, 0x02FF, 0x0319, 0x0341,
+    0x06AB, 0x06CF, 0x06DF, 0x06F7, 0x0EA5, 0x1E79, 0x1E9D, 0x1EE7,
+    0x1EF7, 0x3EE6, 0x3F23, 0x3F4B, 0x7FA3, 0x7F49, 0x3F14, 0x3E8D,
+    0x3E6B, 0x1F17, 0x1EF8, 0x0F24, 0x0ECC, 0x06FE, 0x06CA, 0x06A4,
+    0x0698, 0x030C, 0x0302, 0x0170, 0x0168, 0x00A0, 0x001E, 0x0004,
+    0x001F, 0x00A1, 0x0169, 0x0171, 0x0303, 0x030D, 0x0699, 0x06A5,
+    0x06CB, 0x06FF, 0x0ECD, 0x0F25, 0x1EF9, 0x1F18, 0x3E6C, 0x3E8E,
+    0x3F15, 0x7F46, 0x1EF0, 0x1EDE, 0x1EC8, 0x0F1A, 0x0F00, 0x0E9E,
+    0x0E54, 0x06C4, 0x031E, 0x031C, 0x0306, 0x02F8, 0x017A, 0x0166,
+    0x00A6, 0x00A4, 0x0048, 0x000C, 0x0000, 0x000D, 0x0049, 0x00A5,
+    0x00A7, 0x0167, 0x017B, 0x02F9, 0x0307, 0x031D, 0x031F, 0x06C5,
+    0x0E55, 0x0E9F, 0x0F01, 0x0F1B, 0x1EC9, 0x1EDF, 0x1EF1, 0x7F47,
+    0x3F12, 0x3E8F, 0x3E6D, 0x1F1B, 0x1EFA, 0x0F26, 0x0ECE, 0x0700,
+    0x06CC, 0x06A6, 0x069A, 0x030E, 0x0304, 0x0172, 0x016A, 0x00A2,
+    0x0020, 0x0005, 0x0021, 0x00A3, 0x016B, 0x0173, 0x0305, 0x030F,
+    0x069B, 0x06A7, 0x06CD, 0x0701, 0x0ECF, 0x0F27, 0x1EFB, 0x1F1D,
+    0x3E6E, 0x3E90, 0x3F17, 0x7F48, 0x7FA9, 0x3F50, 0x3F24, 0x3EE7,
+    0x1EFC, 0x1EE9, 0x1E9E, 0x1E7A, 0x0EA6, 0x06F8, 0x06E0, 0x06D0,
+    0x06AC, 0x0342, 0x031A, 0x0300, 0x0160, 0x009C, 0x001D, 0x009D,
+    0x0161, 0x0301, 0x031B, 0x0343, 0x06AD, 0x06D1, 0x06E1, 0x06F9,
+    0x0EA7, 0x1E7B, 0x1E9F, 0x1EEA, 0x1EFD, 0x3EE8, 0x3F21, 0x3F52,
+    0x7F9E, 0x7F68, 0x3F16, 0x3EE2, 0x3EBA, 0x3E57, 0x1EC6, 0x1E66,
+    0x0F08, 0x0EC5, 0x0E58, 0x0E4E, 0x06EE, 0x06E2, 0x069E, 0x0696,
+    0x0322, 0x016E, 0x0156, 0x0045, 0x0157, 0x016F, 0x0323, 0x0697,
+    0x069F, 0x06E3, 0x06EF, 0x0E4F, 0x0E59, 0x0EC6, 0x0F09, 0x1E67,
+    0x1EC7, 0x3E58, 0x3EB8, 0x3EE4, 0x3F18, 0x7F69, 0x7EE2, 0x3F38,
+    0x3F33, 0x3F04, 0x3E9C, 0x1F0A, 0x1EA5, 0x1E8D, 0x0EB6, 0x0EAE,
+    0x0E5C, 0x0E52, 0x0704, 0x06D6, 0x06C8, 0x0330, 0x0312, 0x0164,
+    0x0047, 0x0165, 0x0313, 0x0331, 0x06C9, 0x06D7, 0x0705, 0x0E53,
+    0x0E5D, 0x0EAF, 0x0EB7, 0x1E8E, 0x1EA6, 0x1F0B, 0x3E9B, 0x3F05,
+    0x3F31, 0x3F39, 0x7EE1, 0x7F9C, 0x3F40, 0x3F1E, 0x3F06, 0x3E9D,
+    0x1F10, 0x1E70, 0x1E60, 0x0ED8, 0x0ED0, 0x0EB0, 0x0E62, 0x0706,
+    0x06E8, 0x06D8, 0x0344, 0x0328, 0x0174, 0x004A, 0x0175, 0x0329,
+    0x0345, 0x06D9, 0x06E9, 0x0707, 0x0E63, 0x0EB1, 0x0ED1, 0x0ED9,
+    0x1E61, 0x1E71, 0x1F11, 0x3E9F, 0x3F08, 0x3F20, 0x3F3E, 0x7F9D,
+    0x7F4F, 0x7EE6, 0x3F53, 0x3F0D, 0x3EFA, 0x3E46, 0x1EC0, 0x1EB8,
+    0x0EF8, 0x0EF2, 0x0EC0, 0x0E8E, 0x0E72, 0x0712, 0x06F2, 0x06AE,
+    0x0334, 0x0308, 0x009E, 0x0309, 0x0335, 0x06AF, 0x06F3, 0x0713,
+    0x0E73, 0x0E90, 0x0EC2, 0x0EF4, 0x0EFA, 0x1EBA, 0x1EC2, 0x3E48,
+    0x3EFC, 0x3F0B, 0x3F51, 0x7EE7, 0x7F51, 0x7F84, 0x7F61, 0x7F27,
+    0x3EDC, 0x3EC5, 0x3E7C, 0x1ED8, 0x1ED0, 0x0EFE, 0x0EF5, 0x0E9C,
+    0x0E80, 0x0E68, 0x0E60, 0x0724, 0x06C2, 0x06A2, 0x02FC, 0x00A9,
+    0x02FD, 0x06A3, 0x06C3, 0x0725, 0x0E61, 0x0E69, 0x0E81, 0x0E9D,
+    0x0EF7, 0x0EFF, 0x1ED1, 0x1ED9, 0x3E7B, 0x3ECF, 0x3ED9, 0x7F29,
+    0x7F65, 0x7F85, 0x7FA0, 0x7EEA, 0x3F60, 0x3EB4, 0x3EAC, 0x3E5F,
+    0x3E4B, 0x1F00, 0x1E8B, 0x0F18, 0x0F14, 0x0EE2, 0x0E7C, 0x0E6C,
+    0x0710, 0x06FC, 0x06B6, 0x0316, 0x0159, 0x0317, 0x06B7, 0x06FD,
+    0x0711, 0x0E6D, 0x0E7D, 0x0EE3, 0x0F15, 0x0F19, 0x1E88, 0x1F01,
+    0x3E4C, 0x3E60, 0x3EA9, 0x3EB1, 0x3F5B, 0x7EED, 0x7FA5, 0xFF9E,
+    0x7FBE, 0x7F98, 0x7F0D, 0x7EF3, 0x3E8C, 0x3E50, 0x1EB7, 0x1E73,
+    0x0F2F, 0x0EBF, 0x0E99, 0x0E95, 0x0E88, 0x071C, 0x06E6, 0x06BE,
+    0x033A, 0x015B, 0x033B, 0x06BF, 0x06E7, 0x071D, 0x0E89, 0x0E92,
+    0x0E96, 0x0EBC, 0x0F2C, 0x1E6D, 0x1EB0, 0x3E4D, 0x3E8B, 0x7EF2,
+    0x7F08, 0x7F97, 0x7FB0, 0xFF9C, 0x7FB2, 0x7F23, 0x7EFA, 0x3F0F,
+    0x3EBD, 0x3EAD, 0x1EA0, 0x1E87, 0x0F0A, 0x0F02, 0x0EED, 0x0EE4,
+    0x0EC8, 0x0EA0, 0x0E76, 0x071E, 0x06B8, 0x0324, 0x015C, 0x0325,
+    0x06B9, 0x071F, 0x0E77, 0x0EA1, 0x0EC9, 0x0EE5, 0x0EEC, 0x0F03,
+    0x0F0B, 0x1E84, 0x1EA2, 0x3EB0, 0x3EC4, 0x3F09, 0x7EF7, 0x7F1D,
+    0x7FBC, 0x7FCD, 0x7F81, 0x7F50, 0x7F34, 0x3F65, 0x3EED, 0x3E67,
+    0x1EDB, 0x1ED2, 0x1E99, 0x1E77, 0x0F21, 0x0F1C, 0x0EE8, 0x0EA8,
+    0x0E6E, 0x0716, 0x033C, 0x0178, 0x033D, 0x0717, 0x0E6F, 0x0EAA,
+    0x0EEB, 0x0F1E, 0x0F23, 0x1E75, 0x1E9A, 0x1ED4, 0x1EDC, 0x3E66,
+    0x3EEE, 0x3F6B, 0x7F35, 0x7F55, 0x7F7A, 0x7FCA, 0x7FB6, 0x7FB8,
+    0x7F37, 0x7F1F, 0x3F61, 0x3F37, 0x3F1A, 0x3EB5, 0x3E62, 0x3E5A,
+    0x1F1E, 0x1F13, 0x1EF2, 0x1EE6, 0x1EBC, 0x1E91, 0x1E68, 0x070A,
+    0x032C, 0x070B, 0x1E69, 0x1E90, 0x1EBD, 0x1EE4, 0x1EF4, 0x1F12,
+    0x3E3E, 0x3E59, 0x3E61, 0x3EB6, 0x3F19, 0x3F3B, 0x3F64, 0x7F1C,
+    0x7F3D, 0x7FB4, 0x7FB9, 0xFFC5, 0xFFB6, 0xFFAD, 0x7FA4, 0x7F2C,
+    0x7F20, 0x7F09, 0x3EF5, 0x3E93, 0x3E82, 0x3E7E, 0x1F02, 0x1EE3,
+    0x1ECA, 0x1EAD, 0x1E7C, 0x0EB8, 0x0E8A, 0x0332, 0x0E8B, 0x0EB9,
+    0x1E7D, 0x1EAC, 0x1ECD, 0x1EE0, 0x1F03, 0x3E7D, 0x3E81, 0x3E91,
+    0x3EF7, 0x7F0C, 0x7F1A, 0x7F2E, 0x7FA6, 0xFFA4, 0xFFB7, 0xFFC4,
+    0xFFD4, 0xFFCC, 0x7FC5, 0x7F8A, 0x7F3C, 0x7EFF, 0x3F5E, 0x3F4E,
+    0x3F2F, 0x3EFD, 0x3EEF, 0x3EEB, 0x3E92, 0x3E71, 0x3E42, 0x1F09,
+    0x1E83, 0x0E82, 0x0348, 0x0E83, 0x1E80, 0x1F06, 0x3E44, 0x3E72,
+    0x3E94, 0x3EEC, 0x3EF2, 0x3EFE, 0x3F34, 0x3F4C, 0x3F5F, 0x7EFE,
+    0x7F3A, 0x7F86, 0x7FC4, 0xFFD2, 0xFFD5, 0xFFFA, 0xFFCD, 0xFFBA,
+    0x7FAB, 0x7F92, 0x7F87, 0x7F63, 0x7F4C, 0x7F03, 0x7EF0, 0x3F42,
+    0x3EA1, 0x3E76, 0x3E6A, 0x3E52, 0x1EEE, 0x1EAB, 0x0ED6, 0x06A9,
+    0x0ED7, 0x1EAA, 0x1EED, 0x3E53, 0x3E70, 0x3E77, 0x3EA2, 0x3F41,
+    0x7EEF, 0x7F00, 0x7F4D, 0x7F60, 0x7F88, 0x7F93, 0x7FAD, 0xFFB8,
+    0xFFCB, 0xFFFB, 0xFFFC, 0xFFE1, 0xFFBF, 0x7FBA, 0x7F73, 0x7F6C,
+    0x7F57, 0x7EFC, 0x7EDD, 0x3F6A, 0x3F2A, 0x3F25, 0x3EDD, 0x3ED3,
+    0x3EC2, 0x3EA7, 0x3E86, 0x0F11, 0x06B2, 0x0F0E, 0x3E8A, 0x3EA5,
+    0x3EC0, 0x3ED1, 0x3EDE, 0x3F2C, 0x3F29, 0x3F69, 0x7EDB, 0x7EFD,
+    0x7F59, 0x7F6D, 0x7F74, 0x7FC0, 0xFFC3, 0xFFE3, 0xFFFD, 0xFFF0,
+    0xFFE9, 0xFFE7, 0xFFC9, 0xFFBD, 0xFFAE, 0x7F71, 0x7F5F, 0x7F42,
+    0x7F41, 0x7F30, 0x7F17, 0x7EE4, 0x3ED7, 0x3EC7, 0x3ECB, 0x1EB1,
+    0x0EDC, 0x06D2, 0x0EDD, 0x1EB2, 0x3EC6, 0x3EC8, 0x3ED4, 0x7EE3,
+    0x7F18, 0x7F2B, 0x7F3F, 0x7F43, 0x7F64, 0x7F6E, 0xFFAB, 0xFFC2,
+    0xFFD0, 0xFFE6, 0xFFEF, 0xFFF3, 0xFFF7, 0xFFEB, 0xFFDE, 0xFFDA,
+    0xFFB3, 0xFFAC, 0xFFA3, 0x7FC9, 0x7F8F, 0x7F7C, 0x7F79, 0x7F0F,
+    0x7F10, 0x3F55, 0x3F45, 0x1F1A, 0x1E95, 0x0F2B, 0x06F1, 0x0F29,
+    0x1E97, 0x1F19, 0x3F47, 0x3F57, 0x7F14, 0x7F12, 0x7F78, 0x7F7E,
+    0x7F91, 0x7FC7, 0xFFA1, 0xFFA5, 0xFFB2, 0xFFDB, 0xFFDF, 0xFFED,
+    0xFFF4, 0x004C,
+};
+
+static const uint16_t clv_mvy_3_syms[] = {
+    0xEEEE, 0xEFEE, 0xF0EE, 0xF1EE, 0xF2EE, 0xF3EE, 0xF4EE, 0xF5EE,
+    0xF6EE, 0xF7EE, 0xF8EE, 0xF9EE, 0xFAEE, 0xFBEE, 0xFCEE, 0xFDEE,
+    0xFEEE, 0xFFEE, 0x00EE, 0x01EE, 0x02EE, 0x03EE, 0x04EE, 0x05EE,
+    0x06EE, 0x07EE, 0x08EE, 0x09EE, 0x0AEE, 0x0BEE, 0x0CEE, 0x0DEE,
+    0x0EEE, 0x0FEE, 0x10EE, 0x11EE, 0x12EE, 0xEEEF, 0xEFEF, 0xF0EF,
+    0xF1EF, 0xF2EF, 0xF3EF, 0xF4EF, 0xF5EF, 0xF6EF, 0xF7EF, 0xF8EF,
+    0xF9EF, 0xFAEF, 0xFBEF, 0xFCEF, 0xFDEF, 0xFEEF, 0xFFEF, 0x00EF,
+    0x01EF, 0x02EF, 0x03EF, 0x04EF, 0x05EF, 0x06EF, 0x07EF, 0x08EF,
+    0x09EF, 0x0AEF, 0x0BEF, 0x0CEF, 0x0DEF, 0x0EEF, 0x0FEF, 0x10EF,
+    0x11EF, 0x12EF, 0xEEF0, 0xEFF0, 0xF0F0, 0xF1F0, 0xF2F0, 0xF3F0,
+    0xF4F0, 0xF5F0, 0xF6F0, 0xF7F0, 0xF8F0, 0xF9F0, 0xFAF0, 0xFBF0,
+    0xFCF0, 0xFDF0, 0xFEF0, 0xFFF0, 0x00F0, 0x01F0, 0x02F0, 0x03F0,
+    0x04F0, 0x05F0, 0x06F0, 0x07F0, 0x08F0, 0x09F0, 0x0AF0, 0x0BF0,
+    0x0CF0, 0x0DF0, 0x0EF0, 0x0FF0, 0x10F0, 0x11F0, 0x12F0, 0xEEF1,
+    0xEFF1, 0xF0F1, 0xF1F1, 0xF2F1, 0xF3F1, 0xF4F1, 0xF5F1, 0xF6F1,
+    0xF7F1, 0xF8F1, 0xF9F1, 0xFAF1, 0xFBF1, 0xFCF1, 0xFDF1, 0xFEF1,
+    0xFFF1, 0x00F1, 0x01F1, 0x02F1, 0x03F1, 0x04F1, 0x05F1, 0x06F1,
+    0x07F1, 0x08F1, 0x09F1, 0x0AF1, 0x0BF1, 0x0CF1, 0x0DF1, 0x0EF1,
+    0x0FF1, 0x10F1, 0x11F1, 0x12F1, 0xEEF2, 0xEFF2, 0xF0F2, 0xF1F2,
+    0xF2F2, 0xF3F2, 0xF4F2, 0xF5F2, 0xF6F2, 0xF7F2, 0xF8F2, 0xF9F2,
+    0xFAF2, 0xFBF2, 0xFCF2, 0xFDF2, 0xFEF2, 0xFFF2, 0x00F2, 0x01F2,
+    0x02F2, 0x03F2, 0x04F2, 0x05F2, 0x06F2, 0x07F2, 0x08F2, 0x09F2,
+    0x0AF2, 0x0BF2, 0x0CF2, 0x0DF2, 0x0EF2, 0x0FF2, 0x10F2, 0x11F2,
+    0x12F2, 0xEEF3, 0xEFF3, 0xF0F3, 0xF1F3, 0xF2F3, 0xF3F3, 0xF4F3,
+    0xF5F3, 0xF6F3, 0xF7F3, 0xF8F3, 0xF9F3, 0xFAF3, 0xFBF3, 0xFCF3,
+    0xFDF3, 0xFEF3, 0xFFF3, 0x00F3, 0x01F3, 0x02F3, 0x03F3, 0x04F3,
+    0x05F3, 0x06F3, 0x07F3, 0x08F3, 0x09F3, 0x0AF3, 0x0BF3, 0x0CF3,
+    0x0DF3, 0x0EF3, 0x0FF3, 0x10F3, 0x11F3, 0x12F3, 0xEEF4, 0xEFF4,
+    0xF0F4, 0xF1F4, 0xF2F4, 0xF3F4, 0xF4F4, 0xF5F4, 0xF6F4, 0xF7F4,
+    0xF8F4, 0xF9F4, 0xFAF4, 0xFBF4, 0xFCF4, 0xFDF4, 0xFEF4, 0xFFF4,
+    0x00F4, 0x01F4, 0x02F4, 0x03F4, 0x04F4, 0x05F4, 0x06F4, 0x07F4,
+    0x08F4, 0x09F4, 0x0AF4, 0x0BF4, 0x0CF4, 0x0DF4, 0x0EF4, 0x0FF4,
+    0x10F4, 0x11F4, 0x12F4, 0xEEF5, 0xEFF5, 0xF0F5, 0xF1F5, 0xF2F5,
+    0xF3F5, 0xF4F5, 0xF5F5, 0xF6F5, 0xF7F5, 0xF8F5, 0xF9F5, 0xFAF5,
+    0xFBF5, 0xFCF5, 0xFDF5, 0xFEF5, 0xFFF5, 0x00F5, 0x01F5, 0x02F5,
+    0x03F5, 0x04F5, 0x05F5, 0x06F5, 0x07F5, 0x08F5, 0x09F5, 0x0AF5,
+    0x0BF5, 0x0CF5, 0x0DF5, 0x0EF5, 0x0FF5, 0x10F5, 0x11F5, 0x12F5,
+    0xEEF6, 0xEFF6, 0xF0F6, 0xF1F6, 0xF2F6, 0xF3F6, 0xF4F6, 0xF5F6,
+    0xF6F6, 0xF7F6, 0xF8F6, 0xF9F6, 0xFAF6, 0xFBF6, 0xFCF6, 0xFDF6,
+    0xFEF6, 0xFFF6, 0x00F6, 0x01F6, 0x02F6, 0x03F6, 0x04F6, 0x05F6,
+    0x06F6, 0x07F6, 0x08F6, 0x09F6, 0x0AF6, 0x0BF6, 0x0CF6, 0x0DF6,
+    0x0EF6, 0x0FF6, 0x10F6, 0x11F6, 0x12F6, 0xEEF7, 0xEFF7, 0xF0F7,
+    0xF1F7, 0xF2F7, 0xF3F7, 0xF4F7, 0xF5F7, 0xF6F7, 0xF7F7, 0xF8F7,
+    0xF9F7, 0xFAF7, 0xFBF7, 0xFCF7, 0xFDF7, 0xFEF7, 0xFFF7, 0x00F7,
+    0x01F7, 0x02F7, 0x03F7, 0x04F7, 0x05F7, 0x06F7, 0x07F7, 0x08F7,
+    0x09F7, 0x0AF7, 0x0BF7, 0x0CF7, 0x0DF7, 0x0EF7, 0x0FF7, 0x10F7,
+    0x11F7, 0x12F7, 0xEEF8, 0xEFF8, 0xF0F8, 0xF1F8, 0xF2F8, 0xF3F8,
+    0xF4F8, 0xF5F8, 0xF6F8, 0xF7F8, 0xF8F8, 0xF9F8, 0xFAF8, 0xFBF8,
+    0xFCF8, 0xFDF8, 0xFEF8, 0xFFF8, 0x00F8, 0x01F8, 0x02F8, 0x03F8,
+    0x04F8, 0x05F8, 0x06F8, 0x07F8, 0x08F8, 0x09F8, 0x0AF8, 0x0BF8,
+    0x0CF8, 0x0DF8, 0x0EF8, 0x0FF8, 0x10F8, 0x11F8, 0x12F8, 0xEEF9,
+    0xEFF9, 0xF0F9, 0xF1F9, 0xF2F9, 0xF3F9, 0xF4F9, 0xF5F9, 0xF6F9,
+    0xF7F9, 0xF8F9, 0xF9F9, 0xFAF9, 0xFBF9, 0xFCF9, 0xFDF9, 0xFEF9,
+    0xFFF9, 0x00F9, 0x01F9, 0x02F9, 0x03F9, 0x04F9, 0x05F9, 0x06F9,
+    0x07F9, 0x08F9, 0x09F9, 0x0AF9, 0x0BF9, 0x0CF9, 0x0DF9, 0x0EF9,
+    0x0FF9, 0x10F9, 0x11F9, 0x12F9, 0xEEFA, 0xEFFA, 0xF0FA, 0xF1FA,
+    0xF2FA, 0xF3FA, 0xF4FA, 0xF5FA, 0xF6FA, 0xF7FA, 0xF8FA, 0xF9FA,
+    0xFAFA, 0xFBFA, 0xFCFA, 0xFDFA, 0xFEFA, 0xFFFA, 0x00FA, 0x01FA,
+    0x02FA, 0x03FA, 0x04FA, 0x05FA, 0x06FA, 0x07FA, 0x08FA, 0x09FA,
+    0x0AFA, 0x0BFA, 0x0CFA, 0x0DFA, 0x0EFA, 0x0FFA, 0x10FA, 0x11FA,
+    0x12FA, 0xEEFB, 0xEFFB, 0xF0FB, 0xF1FB, 0xF2FB, 0xF3FB, 0xF4FB,
+    0xF5FB, 0xF6FB, 0xF7FB, 0xF8FB, 0xF9FB, 0xFAFB, 0xFBFB, 0xFCFB,
+    0xFDFB, 0xFEFB, 0xFFFB, 0x00FB, 0x01FB, 0x02FB, 0x03FB, 0x04FB,
+    0x05FB, 0x06FB, 0x07FB, 0x08FB, 0x09FB, 0x0AFB, 0x0BFB, 0x0CFB,
+    0x0DFB, 0x0EFB, 0x0FFB, 0x10FB, 0x11FB, 0x12FB, 0xEEFC, 0xEFFC,
+    0xF0FC, 0xF1FC, 0xF2FC, 0xF3FC, 0xF4FC, 0xF5FC, 0xF6FC, 0xF7FC,
+    0xF8FC, 0xF9FC, 0xFAFC, 0xFBFC, 0xFCFC, 0xFDFC, 0xFEFC, 0xFFFC,
+    0x00FC, 0x01FC, 0x02FC, 0x03FC, 0x04FC, 0x05FC, 0x06FC, 0x07FC,
+    0x08FC, 0x09FC, 0x0AFC, 0x0BFC, 0x0CFC, 0x0DFC, 0x0EFC, 0x0FFC,
+    0x10FC, 0x11FC, 0x12FC, 0xEEFD, 0xEFFD, 0xF0FD, 0xF1FD, 0xF2FD,
+    0xF3FD, 0xF4FD, 0xF5FD, 0xF6FD, 0xF7FD, 0xF8FD, 0xF9FD, 0xFAFD,
+    0xFBFD, 0xFCFD, 0xFDFD, 0xFEFD, 0xFFFD, 0x00FD, 0x01FD, 0x02FD,
+    0x03FD, 0x04FD, 0x05FD, 0x06FD, 0x07FD, 0x08FD, 0x09FD, 0x0AFD,
+    0x0BFD, 0x0CFD, 0x0DFD, 0x0EFD, 0x0FFD, 0x10FD, 0x11FD, 0x12FD,
+    0xEEFE, 0xEFFE, 0xF0FE, 0xF1FE, 0xF2FE, 0xF3FE, 0xF4FE, 0xF5FE,
+    0xF6FE, 0xF7FE, 0xF8FE, 0xF9FE, 0xFAFE, 0xFBFE, 0xFCFE, 0xFDFE,
+    0xFEFE, 0xFFFE, 0x00FE, 0x01FE, 0x02FE, 0x03FE, 0x04FE, 0x05FE,
+    0x06FE, 0x07FE, 0x08FE, 0x09FE, 0x0AFE, 0x0BFE, 0x0CFE, 0x0DFE,
+    0x0EFE, 0x0FFE, 0x10FE, 0x11FE, 0x12FE, 0xEEFF, 0xEFFF, 0xF0FF,
+    0xF1FF, 0xF2FF, 0xF3FF, 0xF4FF, 0xF5FF, 0xF6FF, 0xF7FF, 0xF8FF,
+    0xF9FF, 0xFAFF, 0xFBFF, 0xFCFF, 0xFDFF, 0xFEFF, 0xFFFF, 0x00FF,
+    0x01FF, 0x02FF, 0x03FF, 0x04FF, 0x05FF, 0x06FF, 0x07FF, 0x08FF,
+    0x09FF, 0x0AFF, 0x0BFF, 0x0CFF, 0x0DFF, 0x0EFF, 0x0FFF, 0x10FF,
+    0x11FF, 0x12FF, 0xEE00, 0xEF00, 0xF000, 0xF100, 0xF200, 0xF300,
+    0xF400, 0xF500, 0xF600, 0xF700, 0xF800, 0xF900, 0xFA00, 0xFB00,
+    0xFC00, 0xFD00, 0xFE00, 0xFF00, 0x0000, 0x0100, 0x0200, 0x0300,
+    0x0400, 0x0500, 0x0600, 0x0700, 0x0800, 0x0900, 0x0A00, 0x0B00,
+    0x0C00, 0x0D00, 0x0E00, 0x0F00, 0x1000, 0x1100, 0x1200, 0xEE01,
+    0xEF01, 0xF001, 0xF101, 0xF201, 0xF301, 0xF401, 0xF501, 0xF601,
+    0xF701, 0xF801, 0xF901, 0xFA01, 0xFB01, 0xFC01, 0xFD01, 0xFE01,
+    0xFF01, 0x0001, 0x0101, 0x0201, 0x0301, 0x0401, 0x0501, 0x0601,
+    0x0701, 0x0801, 0x0901, 0x0A01, 0x0B01, 0x0C01, 0x0D01, 0x0E01,
+    0x0F01, 0x1001, 0x1101, 0x1201, 0xEE02, 0xEF02, 0xF002, 0xF102,
+    0xF202, 0xF302, 0xF402, 0xF502, 0xF602, 0xF702, 0xF802, 0xF902,
+    0xFA02, 0xFB02, 0xFC02, 0xFD02, 0xFE02, 0xFF02, 0x0002, 0x0102,
+    0x0202, 0x0302, 0x0402, 0x0502, 0x0602, 0x0702, 0x0802, 0x0902,
+    0x0A02, 0x0B02, 0x0C02, 0x0D02, 0x0E02, 0x0F02, 0x1002, 0x1102,
+    0x1202, 0xEE03, 0xEF03, 0xF003, 0xF103, 0xF203, 0xF303, 0xF403,
+    0xF503, 0xF603, 0xF703, 0xF803, 0xF903, 0xFA03, 0xFB03, 0xFC03,
+    0xFD03, 0xFE03, 0xFF03, 0x0003, 0x0103, 0x0203, 0x0303, 0x0403,
+    0x0503, 0x0603, 0x0703, 0x0803, 0x0903, 0x0A03, 0x0B03, 0x0C03,
+    0x0D03, 0x0E03, 0x0F03, 0x1003, 0x1103, 0x1203, 0xEE04, 0xEF04,
+    0xF004, 0xF104, 0xF204, 0xF304, 0xF404, 0xF504, 0xF604, 0xF704,
+    0xF804, 0xF904, 0xFA04, 0xFB04, 0xFC04, 0xFD04, 0xFE04, 0xFF04,
+    0x0004, 0x0104, 0x0204, 0x0304, 0x0404, 0x0504, 0x0604, 0x0704,
+    0x0804, 0x0904, 0x0A04, 0x0B04, 0x0C04, 0x0D04, 0x0E04, 0x0F04,
+    0x1004, 0x1104, 0x1204, 0xEE05, 0xEF05, 0xF005, 0xF105, 0xF205,
+    0xF305, 0xF405, 0xF505, 0xF605, 0xF705, 0xF805, 0xF905, 0xFA05,
+    0xFB05, 0xFC05, 0xFD05, 0xFE05, 0xFF05, 0x0005, 0x0105, 0x0205,
+    0x0305, 0x0405, 0x0505, 0x0605, 0x0705, 0x0805, 0x0905, 0x0A05,
+    0x0B05, 0x0C05, 0x0D05, 0x0E05, 0x0F05, 0x1005, 0x1105, 0x1205,
+    0xEE06, 0xEF06, 0xF006, 0xF106, 0xF206, 0xF306, 0xF406, 0xF506,
+    0xF606, 0xF706, 0xF806, 0xF906, 0xFA06, 0xFB06, 0xFC06, 0xFD06,
+    0xFE06, 0xFF06, 0x0006, 0x0106, 0x0206, 0x0306, 0x0406, 0x0506,
+    0x0606, 0x0706, 0x0806, 0x0906, 0x0A06, 0x0B06, 0x0C06, 0x0D06,
+    0x0E06, 0x0F06, 0x1006, 0x1106, 0x1206, 0xEE07, 0xEF07, 0xF007,
+    0xF107, 0xF207, 0xF307, 0xF407, 0xF507, 0xF607, 0xF707, 0xF807,
+    0xF907, 0xFA07, 0xFB07, 0xFC07, 0xFD07, 0xFE07, 0xFF07, 0x0007,
+    0x0107, 0x0207, 0x0307, 0x0407, 0x0507, 0x0607, 0x0707, 0x0807,
+    0x0907, 0x0A07, 0x0B07, 0x0C07, 0x0D07, 0x0E07, 0x0F07, 0x1007,
+    0x1107, 0x1207, 0xEE08, 0xEF08, 0xF008, 0xF108, 0xF208, 0xF308,
+    0xF408, 0xF508, 0xF608, 0xF708, 0xF808, 0xF908, 0xFA08, 0xFB08,
+    0xFC08, 0xFD08, 0xFE08, 0xFF08, 0x0008, 0x0108, 0x0208, 0x0308,
+    0x0408, 0x0508, 0x0608, 0x0708, 0x0808, 0x0908, 0x0A08, 0x0B08,
+    0x0C08, 0x0D08, 0x0E08, 0x0F08, 0x1008, 0x1108, 0x1208, 0xEE09,
+    0xEF09, 0xF009, 0xF109, 0xF209, 0xF309, 0xF409, 0xF509, 0xF609,
+    0xF709, 0xF809, 0xF909, 0xFA09, 0xFB09, 0xFC09, 0xFD09, 0xFE09,
+    0xFF09, 0x0009, 0x0109, 0x0209, 0x0309, 0x0409, 0x0509, 0x0609,
+    0x0709, 0x0809, 0x0909, 0x0A09, 0x0B09, 0x0C09, 0x0D09, 0x0E09,
+    0x0F09, 0x1009, 0x1109, 0x1209, 0xEE0A, 0xEF0A, 0xF00A, 0xF10A,
+    0xF20A, 0xF30A, 0xF40A, 0xF50A, 0xF60A, 0xF70A, 0xF80A, 0xF90A,
+    0xFA0A, 0xFB0A, 0xFC0A, 0xFD0A, 0xFE0A, 0xFF0A, 0x000A, 0x010A,
+    0x020A, 0x030A, 0x040A, 0x050A, 0x060A, 0x070A, 0x080A, 0x090A,
+    0x0A0A, 0x0B0A, 0x0C0A, 0x0D0A, 0x0E0A, 0x0F0A, 0x100A, 0x110A,
+    0x120A, 0xEE0B, 0xEF0B, 0xF00B, 0xF10B, 0xF20B, 0xF30B, 0xF40B,
+    0xF50B, 0xF60B, 0xF70B, 0xF80B, 0xF90B, 0xFA0B, 0xFB0B, 0xFC0B,
+    0xFD0B, 0xFE0B, 0xFF0B, 0x000B, 0x010B, 0x020B, 0x030B, 0x040B,
+    0x050B, 0x060B, 0x070B, 0x080B, 0x090B, 0x0A0B, 0x0B0B, 0x0C0B,
+    0x0D0B, 0x0E0B, 0x0F0B, 0x100B, 0x110B, 0x120B, 0xEE0C, 0xEF0C,
+    0xF00C, 0xF10C, 0xF20C, 0xF30C, 0xF40C, 0xF50C, 0xF60C, 0xF70C,
+    0xF80C, 0xF90C, 0xFA0C, 0xFB0C, 0xFC0C, 0xFD0C, 0xFE0C, 0xFF0C,
+    0x000C, 0x010C, 0x020C, 0x030C, 0x040C, 0x050C, 0x060C, 0x070C,
+    0x080C, 0x090C, 0x0A0C, 0x0B0C, 0x0C0C, 0x0D0C, 0x0E0C, 0x0F0C,
+    0x100C, 0x110C, 0x120C, 0xEE0D, 0xEF0D, 0xF00D, 0xF10D, 0xF20D,
+    0xF30D, 0xF40D, 0xF50D, 0xF60D, 0xF70D, 0xF80D, 0xF90D, 0xFA0D,
+    0xFB0D, 0xFC0D, 0xFD0D, 0xFE0D, 0xFF0D, 0x000D, 0x010D, 0x020D,
+    0x030D, 0x040D, 0x050D, 0x060D, 0x070D, 0x080D, 0x090D, 0x0A0D,
+    0x0B0D, 0x0C0D, 0x0D0D, 0x0E0D, 0x0F0D, 0x100D, 0x110D, 0x120D,
+    0xEE0E, 0xEF0E, 0xF00E, 0xF10E, 0xF20E, 0xF30E, 0xF40E, 0xF50E,
+    0xF60E, 0xF70E, 0xF80E, 0xF90E, 0xFA0E, 0xFB0E, 0xFC0E, 0xFD0E,
+    0xFE0E, 0xFF0E, 0x000E, 0x010E, 0x020E, 0x030E, 0x040E, 0x050E,
+    0x060E, 0x070E, 0x080E, 0x090E, 0x0A0E, 0x0B0E, 0x0C0E, 0x0D0E,
+    0x0E0E, 0x0F0E, 0x100E, 0x110E, 0x120E, 0xEE0F, 0xEF0F, 0xF00F,
+    0xF10F, 0xF20F, 0xF30F, 0xF40F, 0xF50F, 0xF60F, 0xF70F, 0xF80F,
+    0xF90F, 0xFA0F, 0xFB0F, 0xFC0F, 0xFD0F, 0xFE0F, 0xFF0F, 0x000F,
+    0x010F, 0x020F, 0x030F, 0x040F, 0x050F, 0x060F, 0x070F, 0x080F,
+    0x090F, 0x0A0F, 0x0B0F, 0x0C0F, 0x0D0F, 0x0E0F, 0x0F0F, 0x100F,
+    0x110F, 0x120F, 0xEE10, 0xEF10, 0xF010, 0xF110, 0xF210, 0xF310,
+    0xF410, 0xF510, 0xF610, 0xF710, 0xF810, 0xF910, 0xFA10, 0xFB10,
+    0xFC10, 0xFD10, 0xFE10, 0xFF10, 0x0010, 0x0110, 0x0210, 0x0310,
+    0x0410, 0x0510, 0x0610, 0x0710, 0x0810, 0x0910, 0x0A10, 0x0B10,
+    0x0C10, 0x0D10, 0x0E10, 0x0F10, 0x1010, 0x1110, 0x1210, 0xEE11,
+    0xEF11, 0xF011, 0xF111, 0xF211, 0xF311, 0xF411, 0xF511, 0xF611,
+    0xF711, 0xF811, 0xF911, 0xFA11, 0xFB11, 0xFC11, 0xFD11, 0xFE11,
+    0xFF11, 0x0011, 0x0111, 0x0211, 0x0311, 0x0411, 0x0511, 0x0611,
+    0x0711, 0x0811, 0x0911, 0x0A11, 0x0B11, 0x0C11, 0x0D11, 0x0E11,
+    0x0F11, 0x1011, 0x1111, 0x1211, 0xEE12, 0xEF12, 0xF012, 0xF112,
+    0xF212, 0xF312, 0xF412, 0xF512, 0xF612, 0xF712, 0xF812, 0xF912,
+    0xFA12, 0xFB12, 0xFC12, 0xFD12, 0xFE12, 0xFF12, 0x0012, 0x0112,
+    0x0212, 0x0312, 0x0412, 0x0512, 0x0612, 0x0712, 0x0812, 0x0912,
+    0x0A12, 0x0B12, 0x0C12, 0x0D12, 0x0E12, 0x0F12, 0x1012, 0x1112,
+    0x1212, 0x1313,
+};
+
+static const uint8_t clv_mvu_1_bits[] = {
+    16, 14, 14, 14, 13, 12, 12, 10, 12, 12, 13, 14, 14, 14, 16, 15,
+    13, 13, 12, 12, 11, 11,  7, 11, 11, 12, 12, 13, 13, 15, 16, 16,
+    16, 13, 12, 10, 10,  6, 10, 10, 12, 13, 16, 16, 16, 14, 14, 11,
+    11, 11, 11,  9,  7,  9, 11, 11, 11, 11, 14, 14, 15, 13, 13, 12,
+     9,  8,  8,  4,  8,  8,  9, 12, 13, 13, 15, 14, 14, 11, 11, 10,
+     9,  8,  4,  8,  9, 10, 11, 12, 14, 14, 12, 12, 11, 10, 10,  8,
+     6,  3,  6,  8, 10, 10, 11, 12, 12, 11, 10,  9,  6,  6,  6,  5,
+     4,  5,  6,  6,  6,  9, 10, 11, 12, 12, 11, 10, 10,  8,  6,  3,
+     6,  7, 10, 10, 11, 12, 12, 14, 14, 11, 11, 10,  9,  8,  4,  8,
+     9, 10, 11, 11, 14, 14, 15, 13, 13, 12,  9,  8,  8,  4,  8,  8,
+     9, 12, 13, 13, 15, 14, 14, 11, 11, 11, 11,  9,  7,  9, 11, 11,
+    11, 11, 14, 14, 16, 16, 16, 13, 12, 10, 10,  6, 10, 10, 12, 13,
+    16, 16, 16, 15, 13, 13, 12, 12, 11, 11,  7, 11, 11, 12, 12, 13,
+    13, 15, 16, 14, 14, 14, 13, 12, 12, 10, 12, 12, 13, 14, 14, 14,
+    16,  7,
+};
+
+static const uint16_t clv_mvu_1_codes[] = {
+    0xFFFC, 0x3FED, 0x3FE5, 0x3FE3, 0x1FD9, 0x0FD9, 0x0FD6, 0x03CE,
+    0x0FD3, 0x0FD8, 0x1FD6, 0x3FE0, 0x3FE8, 0x3FEC, 0xFFFE, 0x7FF3,
+    0x1FE7, 0x1FDA, 0x0FCF, 0x0FCC, 0x07DD, 0x07CC, 0x006B, 0x07CD,
+    0x07DE, 0x0FCE, 0x0FD2, 0x1FDD, 0x1FEC, 0x7FF0, 0xFFF6, 0xFFFA,
+    0xFFF2, 0x1FDE, 0x0FDB, 0x03D8, 0x03CA, 0x002E, 0x03CB, 0x03D9,
+    0x0FDC, 0x1FDF, 0xFFF3, 0xFFF9, 0xFFF5, 0x3FF3, 0x3FDD, 0x07DA,
+    0x07D2, 0x07CA, 0x07C2, 0x01DE, 0x0069, 0x01DF, 0x07C3, 0x07CB,
+    0x07D3, 0x07DB, 0x3FDF, 0x3FF6, 0x7FF7, 0x1FED, 0x1FE5, 0x0FDF,
+    0x01D8, 0x00E3, 0x00DF, 0x0007, 0x00E0, 0x00E4, 0x01D9, 0x0FE0,
+    0x1FE3, 0x1FE9, 0x7FF4, 0x3FF5, 0x3FE7, 0x07E4, 0x07C6, 0x03D2,
+    0x01E2, 0x00E9, 0x0006, 0x00EA, 0x01E3, 0x03D3, 0x07C7, 0x0FCA,
+    0x3FE9, 0x3FF0, 0x0FE8, 0x0FE5, 0x07D5, 0x03DD, 0x03D5, 0x00DC,
+    0x002B, 0x0001, 0x002C, 0x00DD, 0x03D6, 0x03DE, 0x07D6, 0x0FE4,
+    0x0FE9, 0x07E0, 0x03C8, 0x01D6, 0x0032, 0x0030, 0x0028, 0x0012,
+    0x0004, 0x0013, 0x0029, 0x0031, 0x0033, 0x01D7, 0x03C9, 0x07E1,
+    0x0FEA, 0x0FE6, 0x07D7, 0x03DF, 0x03D7, 0x00DE, 0x002D, 0x0000,
+    0x002A, 0x006D, 0x03D4, 0x03DC, 0x07D4, 0x0FE3, 0x0FE7, 0x3FF1,
+    0x3FE4, 0x07E2, 0x07C4, 0x03D0, 0x01E0, 0x00E7, 0x0005, 0x00E8,
+    0x01E1, 0x03D1, 0x07C5, 0x07E3, 0x3FEA, 0x3FF7, 0x7FF6, 0x1FE6,
+    0x1FE4, 0x0FE1, 0x01DA, 0x00E5, 0x00E1, 0x0008, 0x00E2, 0x00E6,
+    0x01DB, 0x0FE2, 0x1FE2, 0x1FEB, 0x7FF5, 0x3FF4, 0x3FDC, 0x07D9,
+    0x07D0, 0x07C8, 0x07C0, 0x01DC, 0x0068, 0x01DD, 0x07C1, 0x07C9,
+    0x07D1, 0x07D8, 0x3FDE, 0x3FF2, 0xFFFB, 0xFFF4, 0xFFF0, 0x1FE1,
+    0x0FDD, 0x03DA, 0x03CC, 0x002F, 0x03CD, 0x03DB, 0x0FDE, 0x1FE0,
+    0xFFF1, 0xFFF7, 0xFFF8, 0x7FF2, 0x1FEA, 0x1FDC, 0x0FD1, 0x0FCD,
+    0x07DC, 0x07CF, 0x006C, 0x07CE, 0x07DF, 0x0FCB, 0x0FD0, 0x1FDB,
+    0x1FE8, 0x7FF1, 0xFFFD, 0x3FEE, 0x3FEB, 0x3FE1, 0x1FD7, 0x0FD7,
+    0x0FD5, 0x03CF, 0x0FD4, 0x0FDA, 0x1FD8, 0x3FE2, 0x3FE6, 0x3FEF,
+    0xFFFF, 0x006A,
+};
+
+static const uint16_t clv_mvu_1_syms[] = {
+    0xF9F9, 0xFAF9, 0xFBF9, 0xFCF9, 0xFDF9, 0xFEF9, 0xFFF9, 0x00F9,
+    0x01F9, 0x02F9, 0x03F9, 0x04F9, 0x05F9, 0x06F9, 0x07F9, 0xF9FA,
+    0xFAFA, 0xFBFA, 0xFCFA, 0xFDFA, 0xFEFA, 0xFFFA, 0x00FA, 0x01FA,
+    0x02FA, 0x03FA, 0x04FA, 0x05FA, 0x06FA, 0x07FA, 0xF9FB, 0xFAFB,
+    0xFBFB, 0xFCFB, 0xFDFB, 0xFEFB, 0xFFFB, 0x00FB, 0x01FB, 0x02FB,
+    0x03FB, 0x04FB, 0x05FB, 0x06FB, 0x07FB, 0xF9FC, 0xFAFC, 0xFBFC,
+    0xFCFC, 0xFDFC, 0xFEFC, 0xFFFC, 0x00FC, 0x01FC, 0x02FC, 0x03FC,
+    0x04FC, 0x05FC, 0x06FC, 0x07FC, 0xF9FD, 0xFAFD, 0xFBFD, 0xFCFD,
+    0xFDFD, 0xFEFD, 0xFFFD, 0x00FD, 0x01FD, 0x02FD, 0x03FD, 0x04FD,
+    0x05FD, 0x06FD, 0x07FD, 0xF9FE, 0xFAFE, 0xFBFE, 0xFCFE, 0xFDFE,
+    0xFEFE, 0xFFFE, 0x00FE, 0x01FE, 0x02FE, 0x03FE, 0x04FE, 0x05FE,
+    0x06FE, 0x07FE, 0xF9FF, 0xFAFF, 0xFBFF, 0xFCFF, 0xFDFF, 0xFEFF,
+    0xFFFF, 0x00FF, 0x01FF, 0x02FF, 0x03FF, 0x04FF, 0x05FF, 0x06FF,
+    0x07FF, 0xF900, 0xFA00, 0xFB00, 0xFC00, 0xFD00, 0xFE00, 0xFF00,
+    0x0000, 0x0100, 0x0200, 0x0300, 0x0400, 0x0500, 0x0600, 0x0700,
+    0xF901, 0xFA01, 0xFB01, 0xFC01, 0xFD01, 0xFE01, 0xFF01, 0x0001,
+    0x0101, 0x0201, 0x0301, 0x0401, 0x0501, 0x0601, 0x0701, 0xF902,
+    0xFA02, 0xFB02, 0xFC02, 0xFD02, 0xFE02, 0xFF02, 0x0002, 0x0102,
+    0x0202, 0x0302, 0x0402, 0x0502, 0x0602, 0x0702, 0xF903, 0xFA03,
+    0xFB03, 0xFC03, 0xFD03, 0xFE03, 0xFF03, 0x0003, 0x0103, 0x0203,
+    0x0303, 0x0403, 0x0503, 0x0603, 0x0703, 0xF904, 0xFA04, 0xFB04,
+    0xFC04, 0xFD04, 0xFE04, 0xFF04, 0x0004, 0x0104, 0x0204, 0x0304,
+    0x0404, 0x0504, 0x0604, 0x0704, 0xF905, 0xFA05, 0xFB05, 0xFC05,
+    0xFD05, 0xFE05, 0xFF05, 0x0005, 0x0105, 0x0205, 0x0305, 0x0405,
+    0x0505, 0x0605, 0x0705, 0xF906, 0xFA06, 0xFB06, 0xFC06, 0xFD06,
+    0xFE06, 0xFF06, 0x0006, 0x0106, 0x0206, 0x0306, 0x0406, 0x0506,
+    0x0606, 0x0706, 0xF907, 0xFA07, 0xFB07, 0xFC07, 0xFD07, 0xFE07,
+    0xFF07, 0x0007, 0x0107, 0x0207, 0x0307, 0x0407, 0x0507, 0x0607,
+    0x0707, 0x0808,
+};
+
+static const uint8_t clv_mvu_2_bits[] = {
+    16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 15, 15,
+    15, 15, 16, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13,
+    13, 14, 14, 14, 14, 14, 15, 15, 15, 16, 16, 15, 15, 14, 14, 14,
+    14, 14, 14, 13, 13, 13, 14, 14, 14, 14, 14, 14, 15, 15, 16, 16,
+    15, 15, 14, 13, 13, 13, 13, 13, 12, 12, 12, 13, 13, 13, 13, 13,
+    14, 15, 15, 16, 14, 14, 14, 14, 13, 12, 12, 12, 12, 11, 10, 11,
+    12, 12, 12, 12, 13, 14, 14, 14, 14, 14, 14, 14, 13, 12, 12, 12,
+    12, 11, 10,  9, 10, 11, 12, 12, 12, 12, 13, 14, 14, 14, 15, 14,
+    13, 13, 12, 12, 12, 12, 11, 10,  8, 10, 11, 12, 12, 12, 12, 13,
+    13, 14, 15, 14, 14, 14, 13, 12, 12, 11, 11, 10,  9,  7,  9, 10,
+    11, 11, 12, 12, 13, 14, 14, 14, 14, 14, 13, 12, 12, 11, 11, 10,
+     9,  8,  7,  8,  9, 10, 11, 11, 12, 12, 13, 14, 14, 14, 13, 13,
+    12, 11,  9,  9,  8,  7,  6,  5,  6,  7,  8,  9,  9, 11, 12, 13,
+    13, 14, 14, 13, 13, 13, 11, 11, 10,  8,  7,  4,  1,  4,  7,  8,
+    10, 11, 11, 13, 13, 13, 14, 14, 13, 13, 12, 11,  9,  9,  8,  7,
+     6,  5,  6,  7,  8,  9,  9, 11, 12, 13, 13, 14, 14, 14, 13, 12,
+    11, 11, 11,  9,  9,  8,  7,  8,  9, 10, 11, 11, 12, 12, 13, 14,
+    14, 14, 14, 14, 13, 12, 12, 11, 11, 10,  9,  7,  9, 10, 11, 11,
+    12, 12, 13, 14, 14, 14, 15, 14, 13, 13, 12, 12, 12, 12, 11, 10,
+     9, 10, 11, 12, 12, 12, 12, 13, 13, 14, 15, 14, 14, 14, 13, 12,
+    12, 12, 12, 11, 10,  8, 10, 11, 12, 12, 12, 12, 13, 14, 14, 14,
+    14, 14, 14, 14, 13, 12, 12, 12, 12, 11, 10, 11, 12, 12, 12, 12,
+    13, 14, 14, 14, 14, 16, 15, 15, 14, 13, 13, 13, 13, 13, 12, 12,
+    12, 13, 13, 13, 13, 13, 14, 15, 15, 16, 16, 15, 15, 14, 14, 14,
+    14, 14, 14, 13, 12, 13, 14, 14, 14, 14, 14, 14, 15, 15, 16, 16,
+    15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 13, 14, 14, 14, 14, 14,
+    15, 15, 15, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14,
+    14, 14, 15, 15, 15, 15, 16, 16, 16,  6,
+};
+
+static const uint16_t clv_mvu_2_codes[] = {
+    0xFFF2, 0xFFEF, 0xFFEA, 0x7FEC, 0x7FD5, 0x7FC5, 0x7FCF, 0x3FD3,
+    0x3FC9, 0x3FB4, 0x3F72, 0x3FAE, 0x3FDC, 0x3FE1, 0x7FC4, 0x7FC8,
+    0x7FD7, 0x7FF0, 0xFFE9, 0xFFEC, 0xFFF8, 0xFFF4, 0x7FF3, 0x7FDE,
+    0x7FD3, 0x3FC0, 0x3F99, 0x3FA8, 0x3FAC, 0x3F8E, 0x1FA8, 0x1F79,
+    0x1FA2, 0x3F89, 0x3F93, 0x3F95, 0x3F9C, 0x3FB8, 0x7FD2, 0x7FE6,
+    0x7FDA, 0xFFF7, 0xFFFE, 0x7FED, 0x7FE2, 0x3FD5, 0x3FD7, 0x3FB3,
+    0x3FA2, 0x3F80, 0x3F7A, 0x1F88, 0x1F70, 0x1F8A, 0x3F83, 0x3F84,
+    0x3FAA, 0x3FC4, 0x3FDA, 0x3FDF, 0x7FDB, 0x7FE3, 0xFFF1, 0xFFFB,
+    0x7FE1, 0x7FC7, 0x3FB0, 0x1FAA, 0x1FAB, 0x1FA3, 0x1F8E, 0x1F81,
+    0x0FA7, 0x0F7F, 0x0FA8, 0x1F82, 0x1F8F, 0x1FA4, 0x1FAC, 0x1FAD,
+    0x3FB5, 0x7FC6, 0x7FDD, 0xFFF3, 0x3FDD, 0x3FBE, 0x3FB9, 0x3F7C,
+    0x1F77, 0x0FB5, 0x0F9D, 0x0F99, 0x0F90, 0x0794, 0x03BE, 0x0795,
+    0x0F92, 0x0F9A, 0x0F9E, 0x0FB6, 0x1F78, 0x3F78, 0x3FB2, 0x3FBF,
+    0x3FD2, 0x3FA7, 0x3F8C, 0x3F75, 0x1F9E, 0x0F93, 0x0F94, 0x0F7B,
+    0x0F73, 0x07B4, 0x03BB, 0x01D9, 0x03BC, 0x07B5, 0x0F74, 0x0F7C,
+    0x0F95, 0x0F96, 0x1FA0, 0x3F76, 0x3F8F, 0x3FA5, 0x7FE5, 0x3FC7,
+    0x1F95, 0x1F71, 0x0FAB, 0x0FAC, 0x0F9F, 0x0F7D, 0x0796, 0x03BF,
+    0x00DE, 0x03C0, 0x0797, 0x0F7E, 0x0FA0, 0x0FAD, 0x0FAE, 0x1F72,
+    0x1F97, 0x3FD1, 0x7FD8, 0x3FA9, 0x3FA0, 0x3F6D, 0x1F99, 0x0F87,
+    0x0F77, 0x07A8, 0x079C, 0x03C5, 0x01CA, 0x0067, 0x01CB, 0x03C6,
+    0x079D, 0x07A9, 0x0F78, 0x0F8A, 0x1F9F, 0x3F6E, 0x3F98, 0x3F9A,
+    0x3FCA, 0x3F70, 0x1FAF, 0x0F8D, 0x0F6F, 0x07AC, 0x07A0, 0x03B7,
+    0x01CE, 0x00DA, 0x0063, 0x00DB, 0x01CF, 0x03B8, 0x07A1, 0x07AD,
+    0x0F70, 0x0F91, 0x1FB2, 0x3F73, 0x3FD6, 0x3F7D, 0x1F91, 0x1F85,
+    0x0FA3, 0x07A2, 0x01D4, 0x01D0, 0x00E0, 0x0068, 0x002C, 0x0014,
+    0x002D, 0x0069, 0x00E1, 0x01D1, 0x01D5, 0x07A3, 0x0FA4, 0x1F89,
+    0x1F92, 0x3F81, 0x3FA6, 0x1F7B, 0x1F7C, 0x1F7D, 0x07B0, 0x07AE,
+    0x03C7, 0x00DC, 0x0064, 0x0008, 0x0000, 0x0009, 0x0065, 0x00DD,
+    0x03C8, 0x07AF, 0x07B1, 0x1F7E, 0x1F7F, 0x1F80, 0x3F9E, 0x3F85,
+    0x1F93, 0x1F8B, 0x0FA5, 0x07A4, 0x01D6, 0x01D2, 0x00E2, 0x006A,
+    0x002E, 0x0015, 0x002F, 0x006B, 0x00E3, 0x01D3, 0x01D7, 0x07A5,
+    0x0FA6, 0x1F8C, 0x1F94, 0x3F87, 0x3FCE, 0x3F77, 0x1FB4, 0x0F83,
+    0x07B6, 0x07AA, 0x079E, 0x01DA, 0x01CC, 0x00D8, 0x0062, 0x00D9,
+    0x01CD, 0x03B6, 0x079F, 0x07AB, 0x0F6E, 0x0F84, 0x1FA9, 0x3F6A,
+    0x3FCD, 0x3F90, 0x3F92, 0x3F6B, 0x1F96, 0x0F85, 0x0F75, 0x07A6,
+    0x079A, 0x03C3, 0x01C8, 0x0066, 0x01C9, 0x03C4, 0x079B, 0x07A7,
+    0x0F76, 0x0F86, 0x1F9D, 0x3F6C, 0x3F96, 0x3F97, 0x7FE9, 0x3FD8,
+    0x1F98, 0x1F73, 0x0FAF, 0x0FB0, 0x0FA1, 0x0F80, 0x0798, 0x03C1,
+    0x01D8, 0x03C2, 0x0799, 0x0F81, 0x0FA2, 0x0FB1, 0x0FB2, 0x1F74,
+    0x1F9A, 0x3FE0, 0x7FEE, 0x3F94, 0x3F8B, 0x3F6F, 0x1F9B, 0x0F88,
+    0x0F89, 0x0F79, 0x0F71, 0x07B2, 0x03B9, 0x00DF, 0x03BA, 0x07B3,
+    0x0F72, 0x0F7A, 0x0F8B, 0x0F8C, 0x1F9C, 0x3F71, 0x3F8A, 0x3F9B,
+    0x3FC8, 0x3FBD, 0x3FC5, 0x3F79, 0x1F75, 0x0FB3, 0x0F9C, 0x0F97,
+    0x0F8E, 0x0792, 0x03BD, 0x0793, 0x0F8F, 0x0F98, 0x0F9B, 0x0FB4,
+    0x1F76, 0x3F82, 0x3FC3, 0x3FBA, 0x3FC6, 0xFFFD, 0x7FDF, 0x7FCC,
+    0x3FBB, 0x1FB0, 0x1FAE, 0x1FA1, 0x1F90, 0x1F83, 0x0FAA, 0x0F82,
+    0x0FA9, 0x1F84, 0x1F8D, 0x1FA5, 0x1FB1, 0x1FB3, 0x3FC2, 0x7FCA,
+    0x7FE8, 0xFFF5, 0xFFFF, 0x7FDC, 0x7FD9, 0x3FCC, 0x3FD9, 0x3FB7,
+    0x3F9F, 0x3F7E, 0x3F86, 0x1F86, 0x0FB7, 0x1F87, 0x3F7B, 0x3F7F,
+    0x3FA1, 0x3FBC, 0x3FCB, 0x3FD4, 0x7FF1, 0x7FF2, 0xFFFA, 0xFFFC,
+    0x7FE4, 0x7FE7, 0x7FD4, 0x3FAF, 0x3FA3, 0x3F91, 0x3F9D, 0x3F88,
+    0x1FA6, 0x1F7A, 0x1FA7, 0x3F8D, 0x3FAB, 0x3FAD, 0x3FA4, 0x3FB6,
+    0x7FD0, 0x7FE0, 0x7FEB, 0xFFF0, 0xFFF9, 0xFFEB, 0xFFED, 0x7FEA,
+    0x7FD6, 0x7FC9, 0x7FCB, 0x3FD0, 0x3FDE, 0x3FB1, 0x3F74, 0x3FC1,
+    0x3FCF, 0x3FDB, 0x7FCD, 0x7FCE, 0x7FD1, 0x7FEF, 0xFFE8, 0xFFEE,
+    0xFFF6, 0x0030,
+};
+
+static const uint16_t clv_mvu_2_syms[] = {
+    0xF6F6, 0xF7F6, 0xF8F6, 0xF9F6, 0xFAF6, 0xFBF6, 0xFCF6, 0xFDF6,
+    0xFEF6, 0xFFF6, 0x00F6, 0x01F6, 0x02F6, 0x03F6, 0x04F6, 0x05F6,
+    0x06F6, 0x07F6, 0x08F6, 0x09F6, 0x0AF6, 0xF6F7, 0xF7F7, 0xF8F7,
+    0xF9F7, 0xFAF7, 0xFBF7, 0xFCF7, 0xFDF7, 0xFEF7, 0xFFF7, 0x00F7,
+    0x01F7, 0x02F7, 0x03F7, 0x04F7, 0x05F7, 0x06F7, 0x07F7, 0x08F7,
+    0x09F7, 0x0AF7, 0xF6F8, 0xF7F8, 0xF8F8, 0xF9F8, 0xFAF8, 0xFBF8,
+    0xFCF8, 0xFDF8, 0xFEF8, 0xFFF8, 0x00F8, 0x01F8, 0x02F8, 0x03F8,
+    0x04F8, 0x05F8, 0x06F8, 0x07F8, 0x08F8, 0x09F8, 0x0AF8, 0xF6F9,
+    0xF7F9, 0xF8F9, 0xF9F9, 0xFAF9, 0xFBF9, 0xFCF9, 0xFDF9, 0xFEF9,
+    0xFFF9, 0x00F9, 0x01F9, 0x02F9, 0x03F9, 0x04F9, 0x05F9, 0x06F9,
+    0x07F9, 0x08F9, 0x09F9, 0x0AF9, 0xF6FA, 0xF7FA, 0xF8FA, 0xF9FA,
+    0xFAFA, 0xFBFA, 0xFCFA, 0xFDFA, 0xFEFA, 0xFFFA, 0x00FA, 0x01FA,
+    0x02FA, 0x03FA, 0x04FA, 0x05FA, 0x06FA, 0x07FA, 0x08FA, 0x09FA,
+    0x0AFA, 0xF6FB, 0xF7FB, 0xF8FB, 0xF9FB, 0xFAFB, 0xFBFB, 0xFCFB,
+    0xFDFB, 0xFEFB, 0xFFFB, 0x00FB, 0x01FB, 0x02FB, 0x03FB, 0x04FB,
+    0x05FB, 0x06FB, 0x07FB, 0x08FB, 0x09FB, 0x0AFB, 0xF6FC, 0xF7FC,
+    0xF8FC, 0xF9FC, 0xFAFC, 0xFBFC, 0xFCFC, 0xFDFC, 0xFEFC, 0xFFFC,
+    0x00FC, 0x01FC, 0x02FC, 0x03FC, 0x04FC, 0x05FC, 0x06FC, 0x07FC,
+    0x08FC, 0x09FC, 0x0AFC, 0xF6FD, 0xF7FD, 0xF8FD, 0xF9FD, 0xFAFD,
+    0xFBFD, 0xFCFD, 0xFDFD, 0xFEFD, 0xFFFD, 0x00FD, 0x01FD, 0x02FD,
+    0x03FD, 0x04FD, 0x05FD, 0x06FD, 0x07FD, 0x08FD, 0x09FD, 0x0AFD,
+    0xF6FE, 0xF7FE, 0xF8FE, 0xF9FE, 0xFAFE, 0xFBFE, 0xFCFE, 0xFDFE,
+    0xFEFE, 0xFFFE, 0x00FE, 0x01FE, 0x02FE, 0x03FE, 0x04FE, 0x05FE,
+    0x06FE, 0x07FE, 0x08FE, 0x09FE, 0x0AFE, 0xF6FF, 0xF7FF, 0xF8FF,
+    0xF9FF, 0xFAFF, 0xFBFF, 0xFCFF, 0xFDFF, 0xFEFF, 0xFFFF, 0x00FF,
+    0x01FF, 0x02FF, 0x03FF, 0x04FF, 0x05FF, 0x06FF, 0x07FF, 0x08FF,
+    0x09FF, 0x0AFF, 0xF600, 0xF700, 0xF800, 0xF900, 0xFA00, 0xFB00,
+    0xFC00, 0xFD00, 0xFE00, 0xFF00, 0x0000, 0x0100, 0x0200, 0x0300,
+    0x0400, 0x0500, 0x0600, 0x0700, 0x0800, 0x0900, 0x0A00, 0xF601,
+    0xF701, 0xF801, 0xF901, 0xFA01, 0xFB01, 0xFC01, 0xFD01, 0xFE01,
+    0xFF01, 0x0001, 0x0101, 0x0201, 0x0301, 0x0401, 0x0501, 0x0601,
+    0x0701, 0x0801, 0x0901, 0x0A01, 0xF602, 0xF702, 0xF802, 0xF902,
+    0xFA02, 0xFB02, 0xFC02, 0xFD02, 0xFE02, 0xFF02, 0x0002, 0x0102,
+    0x0202, 0x0302, 0x0402, 0x0502, 0x0602, 0x0702, 0x0802, 0x0902,
+    0x0A02, 0xF603, 0xF703, 0xF803, 0xF903, 0xFA03, 0xFB03, 0xFC03,
+    0xFD03, 0xFE03, 0xFF03, 0x0003, 0x0103, 0x0203, 0x0303, 0x0403,
+    0x0503, 0x0603, 0x0703, 0x0803, 0x0903, 0x0A03, 0xF604, 0xF704,
+    0xF804, 0xF904, 0xFA04, 0xFB04, 0xFC04, 0xFD04, 0xFE04, 0xFF04,
+    0x0004, 0x0104, 0x0204, 0x0304, 0x0404, 0x0504, 0x0604, 0x0704,
+    0x0804, 0x0904, 0x0A04, 0xF605, 0xF705, 0xF805, 0xF905, 0xFA05,
+    0xFB05, 0xFC05, 0xFD05, 0xFE05, 0xFF05, 0x0005, 0x0105, 0x0205,
+    0x0305, 0x0405, 0x0505, 0x0605, 0x0705, 0x0805, 0x0905, 0x0A05,
+    0xF606, 0xF706, 0xF806, 0xF906, 0xFA06, 0xFB06, 0xFC06, 0xFD06,
+    0xFE06, 0xFF06, 0x0006, 0x0106, 0x0206, 0x0306, 0x0406, 0x0506,
+    0x0606, 0x0706, 0x0806, 0x0906, 0x0A06, 0xF607, 0xF707, 0xF807,
+    0xF907, 0xFA07, 0xFB07, 0xFC07, 0xFD07, 0xFE07, 0xFF07, 0x0007,
+    0x0107, 0x0207, 0x0307, 0x0407, 0x0507, 0x0607, 0x0707, 0x0807,
+    0x0907, 0x0A07, 0xF608, 0xF708, 0xF808, 0xF908, 0xFA08, 0xFB08,
+    0xFC08, 0xFD08, 0xFE08, 0xFF08, 0x0008, 0x0108, 0x0208, 0x0308,
+    0x0408, 0x0508, 0x0608, 0x0708, 0x0808, 0x0908, 0x0A08, 0xF609,
+    0xF709, 0xF809, 0xF909, 0xFA09, 0xFB09, 0xFC09, 0xFD09, 0xFE09,
+    0xFF09, 0x0009, 0x0109, 0x0209, 0x0309, 0x0409, 0x0509, 0x0609,
+    0x0709, 0x0809, 0x0909, 0x0A09, 0xF60A, 0xF70A, 0xF80A, 0xF90A,
+    0xFA0A, 0xFB0A, 0xFC0A, 0xFD0A, 0xFE0A, 0xFF0A, 0x000A, 0x010A,
+    0x020A, 0x030A, 0x040A, 0x050A, 0x060A, 0x070A, 0x080A, 0x090A,
+    0x0A0A, 0x0B0B,
+};
+
+static const uint8_t clv_mvv_1_bits[] = {
+    16, 15, 13, 13, 13, 12, 10, 10, 10, 12, 13, 13, 13, 15, 16, 16,
+    15, 14, 13, 12, 11, 10,  9, 10, 11, 12, 13, 14, 15, 16, 15, 14,
+    13, 13, 11, 10, 10,  5, 10, 10, 11, 13, 13, 14, 15, 12, 12, 12,
+    11, 10, 10,  9,  5,  9, 10, 10, 11, 12, 12, 12, 14, 12, 12, 12,
+    11,  9,  8,  5,  8,  9, 11, 12, 12, 12, 14, 14, 11, 11,  9,  9,
+     9,  7,  5,  7,  9,  9,  9, 11, 11, 14, 13, 12, 11, 10, 10,  8,
+     6,  3,  6,  8, 10, 10, 11, 12, 13, 11, 10,  9,  7,  6,  6,  4,
+     4,  4,  6,  7,  7, 10, 10, 11, 13, 12, 11, 10, 10,  8,  6,  3,
+     6,  8, 10, 10, 11, 12, 13, 14, 11, 11,  9,  9,  9,  7,  5,  7,
+     9,  9,  9, 11, 11, 14, 14, 12, 12, 12, 11,  9,  8,  5,  8,  9,
+    11, 12, 12, 12, 14, 12, 12, 12, 11, 10, 10,  9,  5,  9, 10, 10,
+    11, 12, 12, 12, 15, 14, 13, 13, 11, 10, 10,  5, 10, 10, 11, 13,
+    13, 14, 15, 16, 15, 14, 13, 12, 11, 10,  9, 10, 11, 12, 13, 14,
+    15, 16, 16, 15, 13, 13, 13, 12, 10, 10, 10, 12, 13, 13, 13, 15,
+    16,  7,
+};
+
+static const uint16_t clv_mvv_1_codes[] = {
+    0xFFFD, 0x7FF8, 0x1FF2, 0x1FDC, 0x1FDB, 0x0FD2, 0x03D6, 0x03BF,
+    0x03D3, 0x0FD0, 0x1FDA, 0x1FDE, 0x1FF0, 0x7FF9, 0xFFFE, 0xFFFA,
+    0x7FFB, 0x3FF3, 0x1FE9, 0x0FD6, 0x07CB, 0x03E1, 0x01C8, 0x03E2,
+    0x07CC, 0x0FD9, 0x1FE8, 0x3FF6, 0x7FFA, 0xFFF9, 0x7FF1, 0x3FEE,
+    0x1FE4, 0x1FE0, 0x07D4, 0x03DB, 0x03CB, 0x0014, 0x03CC, 0x03DC,
+    0x07D6, 0x1FE3, 0x1FE7, 0x3FEC, 0x7FF3, 0x0FEA, 0x0FE0, 0x0FDE,
+    0x07DE, 0x03C9, 0x03C3, 0x01DC, 0x0013, 0x01DD, 0x03C4, 0x03CA,
+    0x07DF, 0x0FDF, 0x0FE3, 0x0FEB, 0x3FF1, 0x0FE7, 0x0FCF, 0x0FC8,
+    0x07D8, 0x01D2, 0x00E0, 0x0010, 0x00E1, 0x01D4, 0x07D9, 0x0FC9,
+    0x0FCC, 0x0FE6, 0x3FF5, 0x3FEA, 0x07E2, 0x07D2, 0x01D7, 0x01D0,
+    0x01CC, 0x006A, 0x000F, 0x006B, 0x01CD, 0x01D1, 0x01D9, 0x07D3,
+    0x07E3, 0x3FEB, 0x1FEE, 0x0FD5, 0x07C7, 0x03D8, 0x03D0, 0x00DD,
+    0x002D, 0x0001, 0x002E, 0x00DE, 0x03D1, 0x03D9, 0x07C8, 0x0FD8,
+    0x1FEF, 0x07CE, 0x03C5, 0x01DE, 0x006C, 0x0032, 0x0030, 0x0005,
+    0x0004, 0x0006, 0x0031, 0x0066, 0x006D, 0x03BE, 0x03C6, 0x07CF,
+    0x1FEC, 0x0FDA, 0x07C9, 0x03DA, 0x03D2, 0x00DF, 0x002F, 0x0000,
+    0x002C, 0x00DC, 0x03CF, 0x03D7, 0x07C6, 0x0FD4, 0x1FED, 0x3FE9,
+    0x07E0, 0x07D0, 0x01D3, 0x01CE, 0x01CA, 0x0068, 0x000E, 0x0069,
+    0x01CB, 0x01CF, 0x01D5, 0x07D1, 0x07E1, 0x3FE8, 0x3FF4, 0x0FE4,
+    0x0FCD, 0x0FCB, 0x07DA, 0x01D6, 0x00E2, 0x0011, 0x00E3, 0x01D8,
+    0x07DB, 0x0FCA, 0x0FCE, 0x0FE5, 0x3FF7, 0x0FE8, 0x0FE1, 0x0FDD,
+    0x07DD, 0x03C7, 0x03C1, 0x01DA, 0x0012, 0x01DB, 0x03C2, 0x03C8,
+    0x07DC, 0x0FDC, 0x0FE2, 0x0FE9, 0x7FF0, 0x3FEF, 0x1FE5, 0x1FE1,
+    0x07D7, 0x03DD, 0x03CD, 0x0015, 0x03CE, 0x03DE, 0x07D5, 0x1FE2,
+    0x1FE6, 0x3FED, 0x7FF2, 0xFFF8, 0x7FF4, 0x3FF2, 0x1FEB, 0x0FD7,
+    0x07CD, 0x03DF, 0x01C9, 0x03E0, 0x07CA, 0x0FDB, 0x1FEA, 0x3FF0,
+    0x7FF5, 0xFFFB, 0xFFFC, 0x7FF6, 0x1FF3, 0x1FDD, 0x1FD9, 0x0FD1,
+    0x03D5, 0x03C0, 0x03D4, 0x0FD3, 0x1FD8, 0x1FDF, 0x1FF1, 0x7FF7,
+    0xFFFF, 0x0067,
+};
+
+static const uint16_t clv_mvv_1_syms[] = {
+    0xF9F9, 0xFAF9, 0xFBF9, 0xFCF9, 0xFDF9, 0xFEF9, 0xFFF9, 0x00F9,
+    0x01F9, 0x02F9, 0x03F9, 0x04F9, 0x05F9, 0x06F9, 0x07F9, 0xF9FA,
+    0xFAFA, 0xFBFA, 0xFCFA, 0xFDFA, 0xFEFA, 0xFFFA, 0x00FA, 0x01FA,
+    0x02FA, 0x03FA, 0x04FA, 0x05FA, 0x06FA, 0x07FA, 0xF9FB, 0xFAFB,
+    0xFBFB, 0xFCFB, 0xFDFB, 0xFEFB, 0xFFFB, 0x00FB, 0x01FB, 0x02FB,
+    0x03FB, 0x04FB, 0x05FB, 0x06FB, 0x07FB, 0xF9FC, 0xFAFC, 0xFBFC,
+    0xFCFC, 0xFDFC, 0xFEFC, 0xFFFC, 0x00FC, 0x01FC, 0x02FC, 0x03FC,
+    0x04FC, 0x05FC, 0x06FC, 0x07FC, 0xF9FD, 0xFAFD, 0xFBFD, 0xFCFD,
+    0xFDFD, 0xFEFD, 0xFFFD, 0x00FD, 0x01FD, 0x02FD, 0x03FD, 0x04FD,
+    0x05FD, 0x06FD, 0x07FD, 0xF9FE, 0xFAFE, 0xFBFE, 0xFCFE, 0xFDFE,
+    0xFEFE, 0xFFFE, 0x00FE, 0x01FE, 0x02FE, 0x03FE, 0x04FE, 0x05FE,
+    0x06FE, 0x07FE, 0xF9FF, 0xFAFF, 0xFBFF, 0xFCFF, 0xFDFF, 0xFEFF,
+    0xFFFF, 0x00FF, 0x01FF, 0x02FF, 0x03FF, 0x04FF, 0x05FF, 0x06FF,
+    0x07FF, 0xF900, 0xFA00, 0xFB00, 0xFC00, 0xFD00, 0xFE00, 0xFF00,
+    0x0000, 0x0100, 0x0200, 0x0300, 0x0400, 0x0500, 0x0600, 0x0700,
+    0xF901, 0xFA01, 0xFB01, 0xFC01, 0xFD01, 0xFE01, 0xFF01, 0x0001,
+    0x0101, 0x0201, 0x0301, 0x0401, 0x0501, 0x0601, 0x0701, 0xF902,
+    0xFA02, 0xFB02, 0xFC02, 0xFD02, 0xFE02, 0xFF02, 0x0002, 0x0102,
+    0x0202, 0x0302, 0x0402, 0x0502, 0x0602, 0x0702, 0xF903, 0xFA03,
+    0xFB03, 0xFC03, 0xFD03, 0xFE03, 0xFF03, 0x0003, 0x0103, 0x0203,
+    0x0303, 0x0403, 0x0503, 0x0603, 0x0703, 0xF904, 0xFA04, 0xFB04,
+    0xFC04, 0xFD04, 0xFE04, 0xFF04, 0x0004, 0x0104, 0x0204, 0x0304,
+    0x0404, 0x0504, 0x0604, 0x0704, 0xF905, 0xFA05, 0xFB05, 0xFC05,
+    0xFD05, 0xFE05, 0xFF05, 0x0005, 0x0105, 0x0205, 0x0305, 0x0405,
+    0x0505, 0x0605, 0x0705, 0xF906, 0xFA06, 0xFB06, 0xFC06, 0xFD06,
+    0xFE06, 0xFF06, 0x0006, 0x0106, 0x0206, 0x0306, 0x0406, 0x0506,
+    0x0606, 0x0706, 0xF907, 0xFA07, 0xFB07, 0xFC07, 0xFD07, 0xFE07,
+    0xFF07, 0x0007, 0x0107, 0x0207, 0x0307, 0x0407, 0x0507, 0x0607,
+    0x0707, 0x0808,
+};
+
+static const uint8_t clv_mvv_2_bits[] = {
+    16, 15, 15, 15, 15, 15, 14, 14, 14, 13, 12, 13, 14, 14, 14, 15,
+    15, 15, 15, 15, 16, 16, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12,
+    12, 13, 14, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 14, 14, 14,
+    14, 14, 14, 13, 13, 13, 14, 14, 14, 14, 14, 14, 16, 16, 16, 15,
+    15, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, 13, 13, 13, 13, 14,
+    14, 14, 15, 15, 16, 14, 14, 14, 13, 12, 12, 12, 10, 10, 10, 10,
+    10, 12, 12, 12, 13, 14, 14, 14, 16, 14, 14, 14, 13, 13, 12, 12,
+    12, 10, 10,  7, 10, 10, 12, 12, 12, 13, 13, 14, 14, 14, 14, 14,
+    13, 12, 12, 12, 12, 10,  9,  8,  7,  8,  9, 10, 12, 12, 12, 12,
+    13, 14, 14, 14, 14, 13, 12, 12, 12, 12, 10,  9,  8,  7,  8,  9,
+    10, 12, 12, 12, 12, 13, 14, 14, 14, 14, 13, 13, 12, 12, 11, 10,
+     9,  8,  7,  8,  9, 10, 11, 12, 12, 13, 13, 14, 14, 14, 13, 13,
+    12, 10, 10,  9,  8,  7,  6,  5,  6,  7,  8,  9, 10, 10, 12, 13,
+    13, 14, 13, 13, 13, 13, 11, 10,  9,  8,  7,  5,  1,  5,  7,  8,
+     9, 10, 11, 13, 13, 13, 13, 14, 13, 13, 12, 10, 10,  9,  8,  7,
+     6,  5,  6,  7,  8,  9, 10, 10, 12, 13, 13, 14, 14, 14, 13, 13,
+    12, 12, 11,  9,  9,  8,  7,  8,  9,  9, 11, 12, 12, 13, 13, 14,
+    14, 14, 14, 13, 12, 12, 12, 12, 10,  9,  8,  7,  8,  9, 10, 12,
+    12, 12, 12, 13, 14, 14, 14, 14, 13, 12, 12, 12, 12, 10,  9,  8,
+     7,  8,  9, 10, 12, 12, 12, 12, 13, 14, 14, 14, 14, 14, 13, 13,
+    12, 12, 12, 10, 10,  7, 10, 10, 12, 12, 12, 13, 13, 14, 14, 14,
+    16, 14, 14, 14, 13, 12, 12, 12, 10, 10, 10, 10, 10, 12, 12, 12,
+    13, 14, 14, 14, 16, 15, 15, 14, 14, 14, 13, 13, 13, 13, 12, 12,
+    12, 13, 13, 13, 13, 14, 14, 14, 15, 15, 16, 16, 16, 14, 14, 14,
+    14, 14, 14, 13, 13, 13, 14, 14, 14, 14, 14, 14, 16, 16, 16, 16,
+    16, 16, 15, 15, 14, 14, 14, 13, 12, 12, 12, 13, 14, 14, 14, 15,
+    15, 16, 16, 16, 16, 15, 15, 15, 15, 15, 14, 14, 14, 13, 10, 13,
+    14, 14, 14, 15, 15, 15, 15, 15, 16,  6,
+};
+
+static const uint16_t clv_mvv_2_codes[] = {
+    0xFFFF, 0x7FE7, 0x7FD9, 0x7FE6, 0x7FE5, 0x7FCE, 0x3FD6, 0x3FD3,
+    0x3F9C, 0x1FB2, 0x0F7A, 0x1FB5, 0x3FA8, 0x3FDD, 0x3FE5, 0x7FD0,
+    0x7FEA, 0x7FEC, 0x7FEF, 0x7FDB, 0xFFF3, 0xFFF5, 0xFFE2, 0xFFEB,
+    0x7FEB, 0x7FE0, 0x3FA7, 0x3F84, 0x3F79, 0x1FAE, 0x1F70, 0x0F78,
+    0x0FAA, 0x1FAA, 0x3F76, 0x3F7E, 0x3FAC, 0x7FE1, 0x7FDD, 0xFFEC,
+    0xFFEE, 0xFFF8, 0xFFF9, 0xFFEA, 0xFFE4, 0x3FE1, 0x3FBA, 0x3FC5,
+    0x3FB9, 0x3FA1, 0x3FAA, 0x1F8B, 0x1F8D, 0x1F8E, 0x3FA6, 0x3FA9,
+    0x3FC4, 0x3FBC, 0x3FC6, 0x3FDB, 0xFFE3, 0xFFE1, 0xFFFB, 0x7FD4,
+    0x7FCC, 0x3FCD, 0x3F88, 0x3F7C, 0x1FA1, 0x1FA2, 0x1F95, 0x1F77,
+    0x0F95, 0x0F79, 0x0F97, 0x1F78, 0x1F96, 0x1FA3, 0x1FA4, 0x3F7F,
+    0x3F8B, 0x3FCB, 0x7FCF, 0x7FD5, 0xFFF6, 0x3FD7, 0x3FE0, 0x3F91,
+    0x1F7B, 0x0FB4, 0x0FA5, 0x0FA6, 0x03D5, 0x03CB, 0x03BF, 0x03CC,
+    0x03D6, 0x0FA7, 0x0FA8, 0x0FB5, 0x1F7D, 0x3F87, 0x3FD2, 0x3FDF,
+    0xFFF0, 0x3F95, 0x3F8A, 0x3F96, 0x1FB7, 0x1F9F, 0x0F9F, 0x0F8F,
+    0x0F76, 0x03D1, 0x03BC, 0x0067, 0x03BD, 0x03D2, 0x0F77, 0x0F90,
+    0x0FA0, 0x1FA0, 0x1FB9, 0x3F97, 0x3F98, 0x3F99, 0x3FC3, 0x3FAF,
+    0x1F81, 0x0FA9, 0x0F91, 0x0F7E, 0x0F68, 0x03B2, 0x01C6, 0x00D4,
+    0x0062, 0x00D5, 0x01C7, 0x03B3, 0x0F6A, 0x0F81, 0x0F93, 0x0FAC,
+    0x1F83, 0x3FB3, 0x3FB4, 0x3FC7, 0x3FBB, 0x1F86, 0x0FAF, 0x0F98,
+    0x0F84, 0x0F6D, 0x03B8, 0x01CC, 0x00DA, 0x0065, 0x00DB, 0x01CD,
+    0x03B9, 0x0F6F, 0x0F86, 0x0F9B, 0x0FB1, 0x1F88, 0x3FB8, 0x3FC9,
+    0x3FDC, 0x3F80, 0x1FB3, 0x1F93, 0x0F87, 0x0F72, 0x07B0, 0x03B0,
+    0x01D4, 0x00E0, 0x0061, 0x00E1, 0x01D5, 0x03B1, 0x07B1, 0x0F73,
+    0x0F88, 0x1F94, 0x1FB4, 0x3F81, 0x3FD4, 0x3F9E, 0x1F99, 0x1F73,
+    0x0F89, 0x03C4, 0x03C0, 0x01CE, 0x00D0, 0x005C, 0x0028, 0x0010,
+    0x0029, 0x005D, 0x00D1, 0x01CF, 0x03C1, 0x03C5, 0x0F8A, 0x1F74,
+    0x1F9A, 0x3FA4, 0x1FAC, 0x1F8C, 0x1F7E, 0x1F71, 0x07B2, 0x03CF,
+    0x01C4, 0x00DC, 0x005A, 0x0012, 0x0000, 0x0013, 0x005B, 0x00DD,
+    0x01C5, 0x03D0, 0x07B3, 0x1F72, 0x1F7F, 0x1F8F, 0x1FAD, 0x3FAB,
+    0x1F9B, 0x1F75, 0x0F8B, 0x03C6, 0x03C2, 0x01D0, 0x00D2, 0x005E,
+    0x002A, 0x0011, 0x002B, 0x005F, 0x00D3, 0x01D1, 0x03C3, 0x03C7,
+    0x0F8C, 0x1F76, 0x1F9C, 0x3FAD, 0x3FCF, 0x3F85, 0x1FBA, 0x1F91,
+    0x0F7D, 0x0F70, 0x07AE, 0x01D6, 0x01D2, 0x00DE, 0x0060, 0x00DF,
+    0x01D3, 0x01D7, 0x07AF, 0x0F71, 0x0F7F, 0x1F92, 0x1FAF, 0x3F7B,
+    0x3FD0, 0x3FC0, 0x3FB1, 0x1F82, 0x0FAB, 0x0F92, 0x0F80, 0x0F69,
+    0x03B4, 0x01C8, 0x00D6, 0x0063, 0x00D7, 0x01C9, 0x03B5, 0x0F6B,
+    0x0F82, 0x0F94, 0x0FAD, 0x1F84, 0x3FB5, 0x3FC2, 0x3FB6, 0x3FBF,
+    0x1F85, 0x0FAE, 0x0F96, 0x0F83, 0x0F6C, 0x03B6, 0x01CA, 0x00D8,
+    0x0064, 0x00D9, 0x01CB, 0x03B7, 0x0F6E, 0x0F85, 0x0F99, 0x0FB0,
+    0x1F87, 0x3FB0, 0x3FB2, 0x3F93, 0x3F86, 0x3F8E, 0x1FB1, 0x1F9D,
+    0x0F9A, 0x0F8D, 0x0F74, 0x03CD, 0x03BA, 0x0066, 0x03BB, 0x03CE,
+    0x0F75, 0x0F8E, 0x0F9C, 0x1F9E, 0x1FB0, 0x3F8C, 0x3F94, 0x3F8D,
+    0xFFFC, 0x3FCA, 0x3FD5, 0x3F8F, 0x1F79, 0x0FB2, 0x0FA1, 0x0FA2,
+    0x03D3, 0x03C9, 0x03BE, 0x03CA, 0x03D4, 0x0FA3, 0x0FA4, 0x0FB3,
+    0x1F7A, 0x3F90, 0x3FE3, 0x3FD8, 0xFFF2, 0x7FD7, 0x7FCD, 0x3FE4,
+    0x3F92, 0x3F82, 0x1FA6, 0x1FA8, 0x1F98, 0x1F7C, 0x0F9D, 0x0F7B,
+    0x0F9E, 0x1F80, 0x1F97, 0x1FA7, 0x1FA5, 0x3F7A, 0x3F89, 0x3FDA,
+    0x7FD2, 0x7FD6, 0xFFFE, 0xFFED, 0xFFE8, 0x3FCC, 0x3FBD, 0x3FAE,
+    0x3FC1, 0x3F9F, 0x3F9A, 0x1F89, 0x1F90, 0x1F8A, 0x3FA3, 0x3FA0,
+    0x3FC8, 0x3FBE, 0x3FB7, 0x3FD1, 0xFFE7, 0xFFE9, 0xFFFD, 0xFFF4,
+    0xFFE5, 0xFFEF, 0x7FD8, 0x7FDC, 0x3FA2, 0x3F83, 0x3F78, 0x1FA9,
+    0x0FB6, 0x0F7C, 0x0FB7, 0x1FAB, 0x3F77, 0x3F7D, 0x3F9B, 0x7FDE,
+    0x7FED, 0xFFE6, 0xFFE0, 0xFFF7, 0xFFF1, 0x7FDA, 0x7FE9, 0x7FE2,
+    0x7FE3, 0x7FD1, 0x3FD9, 0x3FE2, 0x3FA5, 0x1FB8, 0x03C8, 0x1FB6,
+    0x3F9D, 0x3FDE, 0x3FCE, 0x7FD3, 0x7FEE, 0x7FDF, 0x7FE8, 0x7FE4,
+    0xFFFA, 0x002C,
+};
+
+static const uint16_t clv_mvv_2_syms[] = {
+    0xF6F6, 0xF7F6, 0xF8F6, 0xF9F6, 0xFAF6, 0xFBF6, 0xFCF6, 0xFDF6,
+    0xFEF6, 0xFFF6, 0x00F6, 0x01F6, 0x02F6, 0x03F6, 0x04F6, 0x05F6,
+    0x06F6, 0x07F6, 0x08F6, 0x09F6, 0x0AF6, 0xF6F7, 0xF7F7, 0xF8F7,
+    0xF9F7, 0xFAF7, 0xFBF7, 0xFCF7, 0xFDF7, 0xFEF7, 0xFFF7, 0x00F7,
+    0x01F7, 0x02F7, 0x03F7, 0x04F7, 0x05F7, 0x06F7, 0x07F7, 0x08F7,
+    0x09F7, 0x0AF7, 0xF6F8, 0xF7F8, 0xF8F8, 0xF9F8, 0xFAF8, 0xFBF8,
+    0xFCF8, 0xFDF8, 0xFEF8, 0xFFF8, 0x00F8, 0x01F8, 0x02F8, 0x03F8,
+    0x04F8, 0x05F8, 0x06F8, 0x07F8, 0x08F8, 0x09F8, 0x0AF8, 0xF6F9,
+    0xF7F9, 0xF8F9, 0xF9F9, 0xFAF9, 0xFBF9, 0xFCF9, 0xFDF9, 0xFEF9,
+    0xFFF9, 0x00F9, 0x01F9, 0x02F9, 0x03F9, 0x04F9, 0x05F9, 0x06F9,
+    0x07F9, 0x08F9, 0x09F9, 0x0AF9, 0xF6FA, 0xF7FA, 0xF8FA, 0xF9FA,
+    0xFAFA, 0xFBFA, 0xFCFA, 0xFDFA, 0xFEFA, 0xFFFA, 0x00FA, 0x01FA,
+    0x02FA, 0x03FA, 0x04FA, 0x05FA, 0x06FA, 0x07FA, 0x08FA, 0x09FA,
+    0x0AFA, 0xF6FB, 0xF7FB, 0xF8FB, 0xF9FB, 0xFAFB, 0xFBFB, 0xFCFB,
+    0xFDFB, 0xFEFB, 0xFFFB, 0x00FB, 0x01FB, 0x02FB, 0x03FB, 0x04FB,
+    0x05FB, 0x06FB, 0x07FB, 0x08FB, 0x09FB, 0x0AFB, 0xF6FC, 0xF7FC,
+    0xF8FC, 0xF9FC, 0xFAFC, 0xFBFC, 0xFCFC, 0xFDFC, 0xFEFC, 0xFFFC,
+    0x00FC, 0x01FC, 0x02FC, 0x03FC, 0x04FC, 0x05FC, 0x06FC, 0x07FC,
+    0x08FC, 0x09FC, 0x0AFC, 0xF6FD, 0xF7FD, 0xF8FD, 0xF9FD, 0xFAFD,
+    0xFBFD, 0xFCFD, 0xFDFD, 0xFEFD, 0xFFFD, 0x00FD, 0x01FD, 0x02FD,
+    0x03FD, 0x04FD, 0x05FD, 0x06FD, 0x07FD, 0x08FD, 0x09FD, 0x0AFD,
+    0xF6FE, 0xF7FE, 0xF8FE, 0xF9FE, 0xFAFE, 0xFBFE, 0xFCFE, 0xFDFE,
+    0xFEFE, 0xFFFE, 0x00FE, 0x01FE, 0x02FE, 0x03FE, 0x04FE, 0x05FE,
+    0x06FE, 0x07FE, 0x08FE, 0x09FE, 0x0AFE, 0xF6FF, 0xF7FF, 0xF8FF,
+    0xF9FF, 0xFAFF, 0xFBFF, 0xFCFF, 0xFDFF, 0xFEFF, 0xFFFF, 0x00FF,
+    0x01FF, 0x02FF, 0x03FF, 0x04FF, 0x05FF, 0x06FF, 0x07FF, 0x08FF,
+    0x09FF, 0x0AFF, 0xF600, 0xF700, 0xF800, 0xF900, 0xFA00, 0xFB00,
+    0xFC00, 0xFD00, 0xFE00, 0xFF00, 0x0000, 0x0100, 0x0200, 0x0300,
+    0x0400, 0x0500, 0x0600, 0x0700, 0x0800, 0x0900, 0x0A00, 0xF601,
+    0xF701, 0xF801, 0xF901, 0xFA01, 0xFB01, 0xFC01, 0xFD01, 0xFE01,
+    0xFF01, 0x0001, 0x0101, 0x0201, 0x0301, 0x0401, 0x0501, 0x0601,
+    0x0701, 0x0801, 0x0901, 0x0A01, 0xF602, 0xF702, 0xF802, 0xF902,
+    0xFA02, 0xFB02, 0xFC02, 0xFD02, 0xFE02, 0xFF02, 0x0002, 0x0102,
+    0x0202, 0x0302, 0x0402, 0x0502, 0x0602, 0x0702, 0x0802, 0x0902,
+    0x0A02, 0xF603, 0xF703, 0xF803, 0xF903, 0xFA03, 0xFB03, 0xFC03,
+    0xFD03, 0xFE03, 0xFF03, 0x0003, 0x0103, 0x0203, 0x0303, 0x0403,
+    0x0503, 0x0603, 0x0703, 0x0803, 0x0903, 0x0A03, 0xF604, 0xF704,
+    0xF804, 0xF904, 0xFA04, 0xFB04, 0xFC04, 0xFD04, 0xFE04, 0xFF04,
+    0x0004, 0x0104, 0x0204, 0x0304, 0x0404, 0x0504, 0x0604, 0x0704,
+    0x0804, 0x0904, 0x0A04, 0xF605, 0xF705, 0xF805, 0xF905, 0xFA05,
+    0xFB05, 0xFC05, 0xFD05, 0xFE05, 0xFF05, 0x0005, 0x0105, 0x0205,
+    0x0305, 0x0405, 0x0505, 0x0605, 0x0705, 0x0805, 0x0905, 0x0A05,
+    0xF606, 0xF706, 0xF806, 0xF906, 0xFA06, 0xFB06, 0xFC06, 0xFD06,
+    0xFE06, 0xFF06, 0x0006, 0x0106, 0x0206, 0x0306, 0x0406, 0x0506,
+    0x0606, 0x0706, 0x0806, 0x0906, 0x0A06, 0xF607, 0xF707, 0xF807,
+    0xF907, 0xFA07, 0xFB07, 0xFC07, 0xFD07, 0xFE07, 0xFF07, 0x0007,
+    0x0107, 0x0207, 0x0307, 0x0407, 0x0507, 0x0607, 0x0707, 0x0807,
+    0x0907, 0x0A07, 0xF608, 0xF708, 0xF808, 0xF908, 0xFA08, 0xFB08,
+    0xFC08, 0xFD08, 0xFE08, 0xFF08, 0x0008, 0x0108, 0x0208, 0x0308,
+    0x0408, 0x0508, 0x0608, 0x0708, 0x0808, 0x0908, 0x0A08, 0xF609,
+    0xF709, 0xF809, 0xF909, 0xFA09, 0xFB09, 0xFC09, 0xFD09, 0xFE09,
+    0xFF09, 0x0009, 0x0109, 0x0209, 0x0309, 0x0409, 0x0509, 0x0609,
+    0x0709, 0x0809, 0x0909, 0x0A09, 0xF60A, 0xF70A, 0xF80A, 0xF90A,
+    0xFA0A, 0xFB0A, 0xFC0A, 0xFD0A, 0xFE0A, 0xFF0A, 0x000A, 0x010A,
+    0x020A, 0x030A, 0x040A, 0x050A, 0x060A, 0x070A, 0x080A, 0x090A,
+    0x0A0A, 0x0B0B,
+};
+
+static const uint8_t clv_biasy_1_bits[] = {
+    16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 13, 13, 13,
+    13, 13, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 10, 10,  9,
+     8,  8,  7,  7,  5,  2,  1,  3,  5,  7,  7,  8,  9,  9, 10, 10,
+    10, 11, 11, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
+    14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 12,
+};
+
+static const uint16_t clv_biasy_1_codes[] = {
+    0xFFFE, 0x7FFE, 0x7FFC, 0x7FFA, 0x7FF6, 0x7FF7, 0x7FF3, 0x7FF2,
+    0x7FEF, 0x7FEE, 0x3FF5, 0x3FF3, 0x3FF1, 0x1FF7, 0x1FF5, 0x1FF2,
+    0x1FF0, 0x1FEE, 0x1FEC, 0x1FEA, 0x1FE8, 0x0FF2, 0x0FF0, 0x0FEE,
+    0x0FEB, 0x07F4, 0x07F3, 0x07F1, 0x03F7, 0x03F5, 0x03F3, 0x01F7,
+    0x00FA, 0x00F8, 0x007A, 0x0078, 0x001C, 0x0002, 0x0000, 0x0006,
+    0x001D, 0x0079, 0x007B, 0x00F9, 0x01F6, 0x01F8, 0x03F2, 0x03F4,
+    0x03F6, 0x07F0, 0x07F2, 0x0FEA, 0x0FEC, 0x0FEF, 0x0FF1, 0x0FF3,
+    0x1FE9, 0x1FEB, 0x1FED, 0x1FEF, 0x1FF1, 0x1FF3, 0x1FF4, 0x1FF6,
+    0x3FF0, 0x3FF2, 0x3FF4, 0x3FF6, 0x7FF0, 0x7FF1, 0x7FF4, 0x7FF8,
+    0x7FF5, 0x7FF9, 0x7FFB, 0x7FFD, 0xFFFF, 0x0FED,
+};
+
+static const uint16_t clv_biasy_1_syms[] = {
+    0xFF68, 0xFF6C, 0xFF70, 0xFF74, 0xFF78, 0xFF7C, 0xFF80, 0xFF84,
+    0xFF88, 0xFF8C, 0xFF90, 0xFF94, 0xFF98, 0xFF9C, 0xFFA0, 0xFFA4,
+    0xFFA8, 0xFFAC, 0xFFB0, 0xFFB4, 0xFFB8, 0xFFBC, 0xFFC0, 0xFFC4,
+    0xFFC8, 0xFFCC, 0xFFD0, 0xFFD4, 0xFFD8, 0xFFDC, 0xFFE0, 0xFFE4,
+    0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC, 0x0000, 0x0004,
+    0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, 0x0024,
+    0x0028, 0x002C, 0x0030, 0x0034, 0x0038, 0x003C, 0x0040, 0x0044,
+    0x0048, 0x004C, 0x0050, 0x0054, 0x0058, 0x005C, 0x0060, 0x0064,
+    0x0068, 0x006C, 0x0070, 0x0074, 0x0078, 0x007C, 0x0080, 0x0084,
+    0x0088, 0x008C, 0x0090, 0x0094, 0x0098, 0x0100,
+};
+
+static const uint8_t clv_biasy_2_bits[] = {
+    16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, 12, 12, 11,
+    11, 11, 10, 10, 10, 10,  9,  9,  8,  8,  8,  7,  6,  6,  4,  3,
+     1,  3,  4,  6,  6,  7,  8,  8,  8,  9,  9, 10, 10, 10, 10, 11,
+    11, 11, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 14, 14, 14, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
+    16, 15,
+};
+
+static const uint16_t clv_biasy_2_codes[] = {
+    0xFFFE, 0xFFFC, 0xFFFA, 0xFFF9, 0xFFF6, 0xFFF5, 0xFFF3, 0x7FF7,
+    0x7FF5, 0x7FF1, 0x7FF3, 0x7FF0, 0x7FEE, 0x7FEC, 0x7FE9, 0x7FE6,
+    0x7FE4, 0x7FE2, 0x3FEF, 0x3FEE, 0x3FEC, 0x3FEA, 0x1FF2, 0x1FF1,
+    0x1FEF, 0x1FED, 0x0FF4, 0x0FF3, 0x0FF1, 0x0FEF, 0x0FED, 0x07F4,
+    0x07F3, 0x07F1, 0x03F6, 0x03F4, 0x03F2, 0x03F0, 0x01F6, 0x01F4,
+    0x00F8, 0x00F6, 0x00F4, 0x0078, 0x003A, 0x0038, 0x000C, 0x0004,
+    0x0000, 0x0005, 0x000D, 0x0039, 0x003B, 0x0079, 0x00F5, 0x00F7,
+    0x00F9, 0x01F5, 0x01F7, 0x03F1, 0x03F3, 0x03F5, 0x03F7, 0x07F0,
+    0x07F2, 0x07F5, 0x0FEC, 0x0FEE, 0x0FF0, 0x0FF2, 0x0FF5, 0x1FEC,
+    0x1FEE, 0x1FF0, 0x1FF3, 0x1FF4, 0x3FEB, 0x3FED, 0x3FF0, 0x7FE3,
+    0x7FE5, 0x7FE7, 0x7FEA, 0x7FEB, 0x7FED, 0x7FEF, 0x7FF4, 0x7FF2,
+    0x7FF6, 0x7FF8, 0xFFF2, 0xFFF4, 0xFFF7, 0xFFF8, 0xFFFB, 0xFFFD,
+    0xFFFF, 0x7FE8,
+};
+
+static const uint16_t clv_biasy_2_syms[] = {
+    0xFF40, 0xFF44, 0xFF48, 0xFF4C, 0xFF50, 0xFF54, 0xFF58, 0xFF5C,
+    0xFF60, 0xFF64, 0xFF68, 0xFF6C, 0xFF70, 0xFF74, 0xFF78, 0xFF7C,
+    0xFF80, 0xFF84, 0xFF88, 0xFF8C, 0xFF90, 0xFF94, 0xFF98, 0xFF9C,
+    0xFFA0, 0xFFA4, 0xFFA8, 0xFFAC, 0xFFB0, 0xFFB4, 0xFFB8, 0xFFBC,
+    0xFFC0, 0xFFC4, 0xFFC8, 0xFFCC, 0xFFD0, 0xFFD4, 0xFFD8, 0xFFDC,
+    0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC,
+    0x0000, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C,
+    0x0020, 0x0024, 0x0028, 0x002C, 0x0030, 0x0034, 0x0038, 0x003C,
+    0x0040, 0x0044, 0x0048, 0x004C, 0x0050, 0x0054, 0x0058, 0x005C,
+    0x0060, 0x0064, 0x0068, 0x006C, 0x0070, 0x0074, 0x0078, 0x007C,
+    0x0080, 0x0084, 0x0088, 0x008C, 0x0090, 0x0094, 0x0098, 0x009C,
+    0x00A0, 0x00A4, 0x00A8, 0x00AC, 0x00B0, 0x00B4, 0x00B8, 0x00BC,
+    0x00C0, 0x0100,
+};
+
+static const uint8_t clv_biasy_3_bits[] = {
+    16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14,
+    14, 14, 13, 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10,
+    10,  9,  9,  9,  9,  8,  8,  7,  7,  6,  5,  4,  4,  2,  2,  3,
+     4,  5,  6,  6,  7,  7,  8,  8,  9,  9,  9,  9, 10, 10, 10, 10,
+    11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 14, 14, 14, 14, 14,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 15,
+};
+
+static const uint16_t clv_biasy_3_codes[] = {
+    0xFFFF, 0xFFFC, 0xFFFA, 0x7FFC, 0x7FF9, 0x7FF6, 0x7FF4, 0x7FF2,
+    0x7FF1, 0x7FEF, 0x7FEC, 0x7FEB, 0x7FE9, 0x3FF3, 0x3FF0, 0x3FEE,
+    0x3FED, 0x3FEA, 0x1FF3, 0x1FF2, 0x1FF0, 0x1FEE, 0x0FF4, 0x0FF3,
+    0x0FF1, 0x07F7, 0x07F5, 0x07F3, 0x07F1, 0x03F7, 0x03F4, 0x03F2,
+    0x03F0, 0x01F6, 0x01F4, 0x01F2, 0x01F0, 0x00F6, 0x00F4, 0x0078,
+    0x0076, 0x0039, 0x001B, 0x000C, 0x000A, 0x0001, 0x0000, 0x0004,
+    0x000B, 0x001A, 0x0038, 0x003A, 0x0077, 0x0079, 0x00F5, 0x00F7,
+    0x01F1, 0x01F3, 0x01F5, 0x01F7, 0x03F1, 0x03F3, 0x03F5, 0x03F6,
+    0x07F0, 0x07F2, 0x07F4, 0x07F6, 0x0FF0, 0x0FF2, 0x0FF5, 0x0FF6,
+    0x1FEF, 0x1FF1, 0x1FF4, 0x3FEB, 0x3FEC, 0x3FEF, 0x3FF1, 0x3FF2,
+    0x7FE8, 0x7FEA, 0x7FED, 0x7FEE, 0x7FF0, 0x7FF3, 0x7FF5, 0x7FF7,
+    0x7FFA, 0x7FFB, 0xFFFB, 0xFFFD, 0xFFFE, 0x7FF8,
+};
+
+static const uint16_t clv_biasy_3_syms[] = {
+    0xFF48, 0xFF4C, 0xFF50, 0xFF54, 0xFF58, 0xFF5C, 0xFF60, 0xFF64,
+    0xFF68, 0xFF6C, 0xFF70, 0xFF74, 0xFF78, 0xFF7C, 0xFF80, 0xFF84,
+    0xFF88, 0xFF8C, 0xFF90, 0xFF94, 0xFF98, 0xFF9C, 0xFFA0, 0xFFA4,
+    0xFFA8, 0xFFAC, 0xFFB0, 0xFFB4, 0xFFB8, 0xFFBC, 0xFFC0, 0xFFC4,
+    0xFFC8, 0xFFCC, 0xFFD0, 0xFFD4, 0xFFD8, 0xFFDC, 0xFFE0, 0xFFE4,
+    0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC, 0x0000, 0x0004,
+    0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, 0x0024,
+    0x0028, 0x002C, 0x0030, 0x0034, 0x0038, 0x003C, 0x0040, 0x0044,
+    0x0048, 0x004C, 0x0050, 0x0054, 0x0058, 0x005C, 0x0060, 0x0064,
+    0x0068, 0x006C, 0x0070, 0x0074, 0x0078, 0x007C, 0x0080, 0x0084,
+    0x0088, 0x008C, 0x0090, 0x0094, 0x0098, 0x009C, 0x00A0, 0x00A4,
+    0x00A8, 0x00AC, 0x00B0, 0x00B4, 0x00B8, 0x0100,
+};
+
+static const uint8_t clv_biasu_1_bits[] = {
+    16, 15, 14, 13, 13, 13, 12, 12, 12, 12, 11, 10, 10,  9,  9,  8,
+     7,  6,  5,  2,  1,  3,  5,  7,  7,  8,  9,  9, 10, 10, 11, 12,
+    12, 12, 12, 13, 13, 13, 14, 15, 15, 16,
+};
+
+static const uint16_t clv_biasu_1_codes[] = {
+    0xFFFE, 0x7FFC, 0x3FFC, 0x1FFC, 0x1FFA, 0x1FF9, 0x0FFA, 0x0FF7,
+    0x0FF8, 0x0FF5, 0x07F8, 0x03FA, 0x03F8, 0x01FA, 0x01F9, 0x00FA,
+    0x007B, 0x003C, 0x001C, 0x0002, 0x0000, 0x0006, 0x001D, 0x007A,
+    0x007C, 0x00FB, 0x01F8, 0x01FB, 0x03F9, 0x03FB, 0x07F9, 0x0FF4,
+    0x0FF6, 0x0FF9, 0x0FFB, 0x1FF8, 0x1FFB, 0x1FFD, 0x3FFD, 0x7FFD,
+    0x7FFE, 0xFFFF,
+};
+
+static const uint16_t clv_biasu_1_syms[] = {
+    0xFFB0, 0xFFB4, 0xFFB8, 0xFFBC, 0xFFC0, 0xFFC4, 0xFFC8, 0xFFCC,
+    0xFFD0, 0xFFD4, 0xFFD8, 0xFFDC, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC,
+    0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC, 0x0000, 0x0004, 0x0008, 0x000C,
+    0x0010, 0x0014, 0x0018, 0x001C, 0x0020, 0x0024, 0x0028, 0x002C,
+    0x0030, 0x0034, 0x0038, 0x003C, 0x0040, 0x0044, 0x0048, 0x004C,
+    0x0050, 0x0100,
+};
+
+static const uint8_t clv_biasu_2_bits[] = {
+    16, 16, 16, 16, 15, 15, 15, 14, 14, 14, 13, 12, 12, 11, 11, 10,
+    10,  9,  9,  8,  8,  7,  6,  5,  4,  3,  1,  3,  4,  6,  6,  7,
+     8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 13, 14, 14, 14, 15, 15,
+    15, 16, 16, 16, 16, 14,
+};
+
+static const uint16_t clv_biasu_2_codes[] = {
+    0xFFFC, 0xFFF8, 0xFFFA, 0xFFFD, 0x7FF8, 0x7FFA, 0x7FF7, 0x3FF6,
+    0x3FF7, 0x3FF4, 0x1FF9, 0x0FFB, 0x0FF9, 0x07FB, 0x07F9, 0x03FA,
+    0x03F8, 0x01FA, 0x01F9, 0x00FB, 0x00F9, 0x007B, 0x003B, 0x001C,
+    0x000C, 0x0004, 0x0000, 0x0005, 0x000D, 0x003A, 0x003C, 0x007A,
+    0x00F8, 0x00FA, 0x01F8, 0x01FB, 0x03F9, 0x03FB, 0x07F8, 0x07FA,
+    0x0FF8, 0x0FFA, 0x1FF8, 0x3FF5, 0x3FF8, 0x3FF9, 0x7FFB, 0x7FF9,
+    0x7FF6, 0xFFF9, 0xFFFF, 0xFFFE, 0xFFFB, 0x3FFA,
+};
+
+static const uint16_t clv_biasu_2_syms[] = {
+    0xFF98, 0xFF9C, 0xFFA0, 0xFFA4, 0xFFA8, 0xFFAC, 0xFFB0, 0xFFB4,
+    0xFFB8, 0xFFBC, 0xFFC0, 0xFFC4, 0xFFC8, 0xFFCC, 0xFFD0, 0xFFD4,
+    0xFFD8, 0xFFDC, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4,
+    0xFFF8, 0xFFFC, 0x0000, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014,
+    0x0018, 0x001C, 0x0020, 0x0024, 0x0028, 0x002C, 0x0030, 0x0034,
+    0x0038, 0x003C, 0x0040, 0x0044, 0x0048, 0x004C, 0x0050, 0x0054,
+    0x0058, 0x005C, 0x0060, 0x0064, 0x0068, 0x0100,
+};
+
+static const uint8_t clv_biasv_1_bits[] = {
+    16, 15, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11, 10, 10,  9,  8,
+     7,  6,  5,  2,  1,  3,  5,  6,  8,  8,  9, 10, 10, 11, 12, 12,
+    12, 13, 13, 13, 14, 14, 15, 15, 16, 14,
+};
+
+static const uint16_t clv_biasv_1_codes[] = {
+    0xFFFF, 0x7FFD, 0x3FFD, 0x3FFB, 0x3FF9, 0x1FFB, 0x1FF8, 0x1FF6,
+    0x0FFA, 0x0FF8, 0x07FA, 0x07F8, 0x03FA, 0x03F8, 0x01FB, 0x00FB,
+    0x007C, 0x003C, 0x001C, 0x0002, 0x0000, 0x0006, 0x001D, 0x003D,
+    0x00FA, 0x00FC, 0x01FA, 0x03F9, 0x03FB, 0x07F9, 0x0FF6, 0x0FF7,
+    0x0FF9, 0x1FF7, 0x1FF9, 0x1FFA, 0x3FFA, 0x3FFC, 0x7FFC, 0x7FFE,
+    0xFFFE, 0x3FF8,
+};
+
+static const uint16_t clv_biasv_1_syms[] = {
+    0xFFB0, 0xFFB4, 0xFFB8, 0xFFBC, 0xFFC0, 0xFFC4, 0xFFC8, 0xFFCC,
+    0xFFD0, 0xFFD4, 0xFFD8, 0xFFDC, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC,
+    0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC, 0x0000, 0x0004, 0x0008, 0x000C,
+    0x0010, 0x0014, 0x0018, 0x001C, 0x0020, 0x0024, 0x0028, 0x002C,
+    0x0030, 0x0034, 0x0038, 0x003C, 0x0040, 0x0044, 0x0048, 0x004C,
+    0x0050, 0x0100,
+};
+
+static const uint8_t clv_biasv_2_bits[] = {
+    16, 15, 14, 13, 13, 13, 13, 13, 12, 12, 11, 10, 10,  9,  9,  8,
+     7,  6,  5,  4,  3,  1,  3,  4,  5,  7,  7,  8,  9,  9, 10, 10,
+    10, 12, 12, 13, 13, 13, 13, 13, 14, 16, 15, 15,
+};
+
+static const uint16_t clv_biasv_2_codes[] = {
+    0xFFFE, 0x7FFD, 0x3FFC, 0x1FFC, 0x1FFB, 0x1FF8, 0x1FF7, 0x1FF4,
+    0x0FF8, 0x0FF7, 0x07FA, 0x03FB, 0x03F8, 0x01FA, 0x01F9, 0x00FA,
+    0x007B, 0x003C, 0x001C, 0x000C, 0x0004, 0x0000, 0x0005, 0x000D,
+    0x001D, 0x007A, 0x007C, 0x00FB, 0x01F8, 0x01FB, 0x03F9, 0x03FA,
+    0x03FC, 0x0FF6, 0x0FF9, 0x1FF5, 0x1FF9, 0x1FF6, 0x1FFA, 0x1FFD,
+    0x3FFD, 0xFFFF, 0x7FFE, 0x7FFC,
+};
+
+static const uint16_t clv_biasv_2_syms[] = {
+    0xFFAC, 0xFFB0, 0xFFB4, 0xFFB8, 0xFFBC, 0xFFC0, 0xFFC4, 0xFFC8,
+    0xFFCC, 0xFFD0, 0xFFD4, 0xFFD8, 0xFFDC, 0xFFE0, 0xFFE4, 0xFFE8,
+    0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC, 0x0000, 0x0004, 0x0008,
+    0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, 0x0024, 0x0028,
+    0x002C, 0x0030, 0x0034, 0x0038, 0x003C, 0x0040, 0x0044, 0x0048,
+    0x004C, 0x0050, 0x0054, 0x0100,
+};
+
+#endif /* AVCODEC_CLEARVIDEODATA_H */

diff --git a/libavcodec/codec2utils.c b/libavcodec/codec2utils.c
new file mode 100644
index 0000000..931478f
--- /dev/null
+++ b/libavcodec/codec2utils.c

@@ -0,0 +1,80 @@
+/*
+ * codec2 utility functions
+ * Copyright (c) 2017 Tomas Härdin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+#include "internal.h"
+#include "libavcodec/codec2utils.h"
+
+int avpriv_codec2_mode_bit_rate(void *logctx, int mode)
+{
+    int frame_size  = avpriv_codec2_mode_frame_size(logctx, mode);
+    int block_align = avpriv_codec2_mode_block_align(logctx, mode);
+
+    if (frame_size <= 0 || block_align <= 0) {
+        return 0;
+    }
+
+    return 8 * 8000 * block_align / frame_size;
+}
+
+int avpriv_codec2_mode_frame_size(void *logctx, int mode)
+{
+    int frame_size_table[AVPRIV_CODEC2_MODE_MAX+1] = {
+        160,    // 3200
+        160,    // 2400
+        320,    // 1600
+        320,    // 1400
+        320,    // 1300
+        320,    // 1200
+        320,    // 700
+        320,    // 700B
+        320,    // 700C
+    };
+
+    if (mode < 0 || mode > AVPRIV_CODEC2_MODE_MAX) {
+        av_log(logctx, AV_LOG_ERROR, "unknown codec2 mode %i, can't find frame_size\n", mode);
+        return 0;
+    } else {
+        return frame_size_table[mode];
+    }
+}
+
+int avpriv_codec2_mode_block_align(void *logctx, int mode)
+{
+    int block_align_table[AVPRIV_CODEC2_MODE_MAX+1] = {
+        8,      // 3200
+        6,      // 2400
+        8,      // 1600
+        7,      // 1400
+        7,      // 1300
+        6,      // 1200
+        4,      // 700
+        4,      // 700B
+        4,      // 700C
+    };
+
+    if (mode < 0 || mode > AVPRIV_CODEC2_MODE_MAX) {
+        av_log(logctx, AV_LOG_ERROR, "unknown codec2 mode %i, can't find block_align\n", mode);
+        return 0;
+    } else {
+        return block_align_table[mode];
+    }
+}

diff --git a/libavcodec/codec2utils.h b/libavcodec/codec2utils.h
new file mode 100644
index 0000000..6def4d4
--- /dev/null
+++ b/libavcodec/codec2utils.h

@@ -0,0 +1,82 @@
+/*
+ * codec2 utility functions
+ * Copyright (c) 2017 Tomas Härdin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CODEC2UTILS_H
+#define AVCODEC_CODEC2UTILS_H
+
+#include <stdint.h>
+
+//Highest mode we're willing to use.
+//Don't want to let users accidentally produce files that can't be decoded in the future.
+//CODEC2_MODE_WB (9) is experimental/unstable as of 2017-11-23.
+#define AVPRIV_CODEC2_MODE_MAX 8 //CODEC2_MODE_700C
+
+//Used by both codec2raw demuxer and libcodec2 encoder.
+//The integers match the values in codec2.h, so "3200" -> CODEC2_MODE_3000 = 0 and so on.
+//It is possible that we're linked to a version of libcodec2 that lacks some of these modes.
+//For example Debian stretch ships with libcodec2.so.0.4 which lacks CODEC2_MODE_700C.
+#define AVPRIV_CODEC2_AVOPTIONS(desc, classname, min_val, default_val, option_flags) \
+    { "mode", desc, offsetof(classname, mode), AV_OPT_TYPE_INT, {.i64 = default_val}, min_val, AVPRIV_CODEC2_MODE_MAX, .flags=option_flags, .unit="codec2_mode"},\
+    { "3200", "3200", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, .flags=option_flags, .unit="codec2_mode"},\
+    { "2400", "2400", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, .flags=option_flags, .unit="codec2_mode"},\
+    { "1600", "1600", 0, AV_OPT_TYPE_CONST, {.i64 = 2}, .flags=option_flags, .unit="codec2_mode"},\
+    { "1400", "1400", 0, AV_OPT_TYPE_CONST, {.i64 = 3}, .flags=option_flags, .unit="codec2_mode"},\
+    { "1300", "1300", 0, AV_OPT_TYPE_CONST, {.i64 = 4}, .flags=option_flags, .unit="codec2_mode"},\
+    { "1200", "1200", 0, AV_OPT_TYPE_CONST, {.i64 = 5}, .flags=option_flags, .unit="codec2_mode"},\
+    { "700",  "700",  0, AV_OPT_TYPE_CONST, {.i64 = 6}, .flags=option_flags, .unit="codec2_mode"},\
+    { "700B", "700B", 0, AV_OPT_TYPE_CONST, {.i64 = 7}, .flags=option_flags, .unit="codec2_mode"},\
+    { "700C", "700C", 0, AV_OPT_TYPE_CONST, {.i64 = 8}, .flags=option_flags, .unit="codec2_mode"}
+
+//The three following functions are here to avoid needing libavformat/codec2.c to depend on libcodec2
+
+//Computes bitrate from mode, with frames rounded up to the nearest octet.
+//So 700 bit/s (28 bits/frame) becomes 800 bits/s (32 bits/frame).
+//logctx is used for av_log()
+//Returns <0 if mode is invalid
+int avpriv_codec2_mode_bit_rate(void *logctx, int mode);
+
+//Mimics codec2_samples_per_frame()
+int avpriv_codec2_mode_frame_size(void *logctx, int mode);
+
+//Mimics (codec2_bits_per_frame()+7)/8
+int avpriv_codec2_mode_block_align(void *logctx, int mode);
+
+#define AVPRIV_CODEC2_EXTRADATA_SIZE 4
+
+//Used in codec2raw demuxer and libcodec2 encoder
+static inline void avpriv_codec2_make_extradata(uint8_t *ptr, int mode) {
+    //version 0.8 as of 2017-12-23 (r3386)
+    ptr[0] = 0;     //major
+    ptr[1] = 8;     //minor
+    ptr[2] = mode;  //mode
+    ptr[3] = 0;     //flags
+}
+
+//Returns version as a 16-bit value. 0.8 -> 0x0008
+static inline uint16_t avpriv_codec2_version_from_extradata(uint8_t *ptr) {
+    return (ptr[0] << 8) + ptr[1];
+}
+
+static inline uint8_t avpriv_codec2_mode_from_extradata(uint8_t *ptr) {
+    return ptr[2];
+}
+
+#endif /* AVCODEC_CODEC2UTILS_H */

diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 6a13bbb..67a3054 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c

@@ -46,15 +46,6 @@
         .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
         .profiles  = NULL_IF_CONFIG_SMALL(ff_mpeg2_video_profiles),
     },
-#if FF_API_XVMC
-    {
-        .id        = AV_CODEC_ID_MPEG2VIDEO_XVMC,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "mpegvideo_xvmc",
-        .long_name = NULL_IF_CONFIG_SMALL("MPEG-1/2 video XvMC (X-Video Motion Compensation)"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-#endif /* FF_API_XVMC */
     {
         .id        = AV_CODEC_ID_H261,
         .type      = AVMEDIA_TYPE_VIDEO,
@@ -99,6 +90,28 @@
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_LJPEG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ljpeg",
+        .long_name = NULL_IF_CONFIG_SMALL("Lossless JPEG"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_SP5X,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sp5x",
+        .long_name = NULL_IF_CONFIG_SMALL("Sunplus JPEG (SP5X)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_JPEGLS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "jpegls",
+        .long_name = NULL_IF_CONFIG_SMALL("JPEG-LS"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+    },
+    {
         .id        = AV_CODEC_ID_MPEG4,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "mpeg4",
@@ -170,14 +183,6 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
-        .id        = AV_CODEC_ID_SVG,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "svg",
-        .long_name = NULL_IF_CONFIG_SMALL("Scalable Vector Graphics"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-        .mime_types= MT("image/svg+xml"),
-    },
-    {
         .id        = AV_CODEC_ID_SVQ1,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "svq1",
@@ -417,13 +422,6 @@
         .props     = AV_CODEC_PROP_LOSSLESS,
     },
     {
-        .id        = AV_CODEC_ID_SNOW,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "snow",
-        .long_name = NULL_IF_CONFIG_SMALL("Snow"),
-        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
         .id        = AV_CODEC_ID_TSCC,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "tscc",
@@ -459,6 +457,50 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_PNG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "png",
+        .long_name = NULL_IF_CONFIG_SMALL("PNG (Portable Network Graphics) image"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/png"),
+    },
+    {
+        .id        = AV_CODEC_ID_PPM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ppm",
+        .long_name = NULL_IF_CONFIG_SMALL("PPM (Portable PixelMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PBM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pbm",
+        .long_name = NULL_IF_CONFIG_SMALL("PBM (Portable BitMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PGM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pgm",
+        .long_name = NULL_IF_CONFIG_SMALL("PGM (Portable GrayMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PGMYUV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pgmyuv",
+        .long_name = NULL_IF_CONFIG_SMALL("PGMYUV (Portable GrayMap YUV) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PAM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pam",
+        .long_name = NULL_IF_CONFIG_SMALL("PAM (Portable AnyMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-portable-pixmap"),
+    },
+    {
         .id        = AV_CODEC_ID_FFVHUFF,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "ffvhuff",
@@ -647,6 +689,14 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_TARGA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "targa",
+        .long_name = NULL_IF_CONFIG_SMALL("Truevision Targa image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-targa", "image/x-tga"),
+    },
+    {
         .id        = AV_CODEC_ID_DSICINVIDEO,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "dsicinvideo",
@@ -661,6 +711,22 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_TIFF,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tiff",
+        .long_name = NULL_IF_CONFIG_SMALL("TIFF image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/tiff"),
+    },
+    {
+        .id        = AV_CODEC_ID_GIF,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "gif",
+        .long_name = NULL_IF_CONFIG_SMALL("GIF (Graphics Interchange Format)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/gif"),
+    },
+    {
         .id        = AV_CODEC_ID_DXA,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "dxa",
@@ -683,6 +749,13 @@
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_SGI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sgi",
+        .long_name = NULL_IF_CONFIG_SMALL("SGI image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
         .id        = AV_CODEC_ID_C93,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "c93",
@@ -697,6 +770,20 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_PTX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ptx",
+        .long_name = NULL_IF_CONFIG_SMALL("V.Flash PTX image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TXD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "txd",
+        .long_name = NULL_IF_CONFIG_SMALL("Renderware TXD (TeXture Dictionary) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
         .id        = AV_CODEC_ID_VP6A,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "vp6a",
@@ -718,6 +805,21 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_PCX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pcx",
+        .long_name = NULL_IF_CONFIG_SMALL("PC Paintbrush PCX image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-pcx"),
+    },
+    {
+        .id        = AV_CODEC_ID_SUNRAST,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sunrast",
+        .long_name = NULL_IF_CONFIG_SMALL("Sun Rasterfile image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
         .id        = AV_CODEC_ID_INDEO4,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "indeo4",
@@ -753,13 +855,6 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
-        .id        = AV_CODEC_ID_DAALA,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "daala",
-        .long_name = NULL_IF_CONFIG_SMALL("Daala"),
-        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
         .id        = AV_CODEC_ID_DIRAC,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "dirac",
@@ -844,6 +939,13 @@
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
     },
     {
+        .id        = AV_CODEC_ID_DPX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dpx",
+        .long_name = NULL_IF_CONFIG_SMALL("DPX (Digital Picture Exchange) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
         .id        = AV_CODEC_ID_MAD,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "mad",
@@ -921,14 +1023,6 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
-        .id        = AV_CODEC_ID_VP9,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "vp9",
-        .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
-        .props     = AV_CODEC_PROP_LOSSY,
-        .profiles  = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
-    },
-    {
         .id        = AV_CODEC_ID_PICTOR,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "pictor",
@@ -936,6 +1030,13 @@
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_ANSI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ansi",
+        .long_name = NULL_IF_CONFIG_SMALL("ASCII/ANSI art"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
         .id        = AV_CODEC_ID_A64_MULTI,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "a64_multi",
@@ -957,27 +1058,6 @@
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
     },
     {
-        .id        = AV_CODEC_ID_M101,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "m101",
-        .long_name = NULL_IF_CONFIG_SMALL("Matrox Uncompressed SD"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_MVC1,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "mvc1",
-        .long_name = NULL_IF_CONFIG_SMALL("Silicon Graphics Motion Video Compressor 1"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_MVC2,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "mvc2",
-        .long_name = NULL_IF_CONFIG_SMALL("Silicon Graphics Motion Video Compressor 2"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
-    },
-    {
         .id        = AV_CODEC_ID_MXPEG,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "mxpeg",
@@ -1013,6 +1093,20 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_WMV3IMAGE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wmv3image",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9 Image"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VC1IMAGE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vc1image",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9 Image v2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
         .id        = AV_CODEC_ID_UTVIDEO,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "utvideo",
@@ -1048,6 +1142,14 @@
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
     },
     {
+        .id        = AV_CODEC_ID_XWD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xwd",
+        .long_name = NULL_IF_CONFIG_SMALL("XWD (X Window Dump) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-xwindowdump"),
+    },
+    {
         .id        = AV_CODEC_ID_CDXL,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "cdxl",
@@ -1055,6 +1157,14 @@
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_XBM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xbm",
+        .long_name = NULL_IF_CONFIG_SMALL("XBM (X BitMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-xbitmap"),
+    },
+    {
         .id        = AV_CODEC_ID_ZEROCODEC,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "zerocodec",
@@ -1104,6 +1214,14 @@
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_VP9,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp9",
+        .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
+        .props     = AV_CODEC_PROP_LOSSY,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
+    },
+    {
         .id        = AV_CODEC_ID_AIC,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "aic",
@@ -1111,13 +1229,6 @@
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
     {
-        .id        = AV_CODEC_ID_Y41P,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "y41p",
-        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed YUV 4:1:1 12-bit"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
         .id        = AV_CODEC_ID_ESCAPE130,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "escape130",
@@ -1125,6 +1236,179 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_G2M,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "g2m",
+        .long_name = NULL_IF_CONFIG_SMALL("Go2Meeting"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WEBP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "webp",
+        .long_name = NULL_IF_CONFIG_SMALL("WebP"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/webp"),
+    },
+    {
+        .id        = AV_CODEC_ID_HNM4_VIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hnm4video",
+        .long_name = NULL_IF_CONFIG_SMALL("HNM 4 video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HEVC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hevc",
+        .long_name = NULL_IF_CONFIG_SMALL("H.265 / HEVC (High Efficiency Video Coding)"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_FIC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "fic",
+        .long_name = NULL_IF_CONFIG_SMALL("Mirillis FIC"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ALIAS_PIX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "alias_pix",
+        .long_name = NULL_IF_CONFIG_SMALL("Alias/Wavefront PIX image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_BRENDER_PIX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "brender_pix",
+        .long_name = NULL_IF_CONFIG_SMALL("BRender PIX image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PAF_VIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "paf_video",
+        .long_name = NULL_IF_CONFIG_SMALL("Amazing Studio Packed Animation File Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_EXR,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "exr",
+        .long_name = NULL_IF_CONFIG_SMALL("OpenEXR image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_VP7,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp7",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SANM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sanm",
+        .long_name = NULL_IF_CONFIG_SMALL("LucasArts SANM/SMUSH video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SGIRLE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sgirle",
+        .long_name = NULL_IF_CONFIG_SMALL("SGI RLE 8-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MVC1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mvc1",
+        .long_name = NULL_IF_CONFIG_SMALL("Silicon Graphics Motion Video Compressor 1"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MVC2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mvc2",
+        .long_name = NULL_IF_CONFIG_SMALL("Silicon Graphics Motion Video Compressor 2"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HQX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hqx",
+        .long_name = NULL_IF_CONFIG_SMALL("Canopus HQX"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TDSC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tdsc",
+        .long_name = NULL_IF_CONFIG_SMALL("TDSC"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HQ_HQA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hq_hqa",
+        .long_name = NULL_IF_CONFIG_SMALL("Canopus HQ/HQA"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HAP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hap",
+        .long_name = NULL_IF_CONFIG_SMALL("Vidvox Hap"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DDS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dds",
+        .long_name = NULL_IF_CONFIG_SMALL("DirectDraw Surface image decoder"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_DXV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dxv",
+        .long_name = NULL_IF_CONFIG_SMALL("Resolume DXV"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SCREENPRESSO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "screenpresso",
+        .long_name = NULL_IF_CONFIG_SMALL("Screenpresso"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_RSCC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rscc",
+        .long_name = NULL_IF_CONFIG_SMALL("innoHeim/Rsupport Screen Capture Codec"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_AVS2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "avs2",
+        .long_name = NULL_IF_CONFIG_SMALL("AVS2-P2/IEEE1857.4"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_Y41P,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "y41p",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed YUV 4:1:1 12-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
         .id        = AV_CODEC_ID_AVRP,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "avrp",
@@ -1200,138 +1484,32 @@
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_SNOW,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "snow",
+        .long_name = NULL_IF_CONFIG_SMALL("Snow"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
         .id        = AV_CODEC_ID_SMVJPEG,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "smvjpeg",
         .long_name = NULL_IF_CONFIG_SMALL("Sigmatel Motion Video"),
     },
-
     {
-        .id        = AV_CODEC_ID_G2M,
+        .id        = AV_CODEC_ID_APNG,
         .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "g2m",
-        .long_name = NULL_IF_CONFIG_SMALL("Go2Meeting"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_HNM4_VIDEO,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "hnm4video",
-        .long_name = NULL_IF_CONFIG_SMALL("HNM 4 video"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_HEVC,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "hevc",
-        .long_name = NULL_IF_CONFIG_SMALL("H.265 / HEVC (High Efficiency Video Coding)"),
-        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
-        .profiles  = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
-    },
-    {
-        .id        = AV_CODEC_ID_FIC,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "fic",
-        .long_name = NULL_IF_CONFIG_SMALL("Mirillis FIC"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_PAF_VIDEO,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "paf_video",
-        .long_name = NULL_IF_CONFIG_SMALL("Amazing Studio Packed Animation File Video"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_VP7,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "vp7",
-        .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_SANM,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "sanm",
-        .long_name = NULL_IF_CONFIG_SMALL("LucasArts SANM/SMUSH video"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_SGIRLE,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "sgirle",
-        .long_name = NULL_IF_CONFIG_SMALL("SGI RLE 8-bit"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_HQX,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "hqx",
-        .long_name = NULL_IF_CONFIG_SMALL("Canopus HQX"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_HQ_HQA,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "hq_hqa",
-        .long_name = NULL_IF_CONFIG_SMALL("Canopus HQ/HQA"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_HAP,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "hap",
-        .long_name = NULL_IF_CONFIG_SMALL("Vidvox Hap"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_DXV,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "dxv",
-        .long_name = NULL_IF_CONFIG_SMALL("Resolume DXV"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_SCREENPRESSO,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "screenpresso",
-        .long_name = NULL_IF_CONFIG_SMALL("Screenpresso"),
+        .name      = "apng",
+        .long_name = NULL_IF_CONFIG_SMALL("APNG (Animated Portable Network Graphics) image"),
         .props     = AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/png"),
     },
     {
-        .id        = AV_CODEC_ID_SPEEDHQ,
+        .id        = AV_CODEC_ID_DAALA,
         .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "speedhq",
-        .long_name = NULL_IF_CONFIG_SMALL("NewTek SpeedHQ"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_WRAPPED_AVFRAME,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "wrapped_avframe",
-        .long_name = NULL_IF_CONFIG_SMALL("AVFrame to AVPacket passthrough"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_RSCC,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "rscc",
-        .long_name = NULL_IF_CONFIG_SMALL("innoHeim/Rsupport Screen Capture Codec"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_MAGICYUV,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "magicyuv",
-        .long_name = NULL_IF_CONFIG_SMALL("MagicYUV video"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_TRUEMOTION2RT,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "truemotion2rt",
-        .long_name = NULL_IF_CONFIG_SMALL("Duck TrueMotion 2.0 Real Time"),
-        .props     = AV_CODEC_PROP_LOSSY,
+        .name      = "daala",
+        .long_name = NULL_IF_CONFIG_SMALL("Daala"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
     },
     {
         .id        = AV_CODEC_ID_CFHD,
@@ -1341,6 +1519,27 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_TRUEMOTION2RT,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "truemotion2rt",
+        .long_name = NULL_IF_CONFIG_SMALL("Duck TrueMotion 2.0 Real Time"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_M101,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "m101",
+        .long_name = NULL_IF_CONFIG_SMALL("Matrox Uncompressed SD"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MAGICYUV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "magicyuv",
+        .long_name = NULL_IF_CONFIG_SMALL("MagicYUV video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
         .id        = AV_CODEC_ID_SHEERVIDEO,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "sheervideo",
@@ -1355,6 +1554,13 @@
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
     },
     {
+        .id        = AV_CODEC_ID_PSD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "psd",
+        .long_name = NULL_IF_CONFIG_SMALL("Photoshop PSD file"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
         .id        = AV_CODEC_ID_PIXLET,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "pixlet",
@@ -1362,6 +1568,13 @@
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_SPEEDHQ,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "speedhq",
+        .long_name = NULL_IF_CONFIG_SMALL("NewTek SpeedHQ"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
         .id        = AV_CODEC_ID_FMVC,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "fmvc",
@@ -1383,11 +1596,20 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_XPM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xpm",
+        .long_name = NULL_IF_CONFIG_SMALL("XPM (X PixMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-xpixmap"),
+    },
+    {
         .id        = AV_CODEC_ID_AV1,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "av1",
         .long_name = NULL_IF_CONFIG_SMALL("Alliance for Open Media AV1"),
         .props     = AV_CODEC_PROP_LOSSY,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_av1_profiles),
     },
     {
         .id        = AV_CODEC_ID_BITPACKED,
@@ -1411,58 +1633,20 @@
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
     },
     {
+        .id        = AV_CODEC_ID_SVG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "svg",
+        .long_name = NULL_IF_CONFIG_SMALL("Scalable Vector Graphics"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/svg+xml"),
+    },
+    {
         .id        = AV_CODEC_ID_GDV,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "gdv",
         .long_name = NULL_IF_CONFIG_SMALL("Gremlin Digital Video"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
-
-    /* image codecs */
-    {
-        .id        = AV_CODEC_ID_ALIAS_PIX,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "alias_pix",
-        .long_name = NULL_IF_CONFIG_SMALL("Alias/Wavefront PIX image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_ANSI,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "ansi",
-        .long_name = NULL_IF_CONFIG_SMALL("ASCII/ANSI art"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_BRENDER_PIX,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "brender_pix",
-        .long_name = NULL_IF_CONFIG_SMALL("BRender PIX image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_DDS,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "dds",
-        .long_name = NULL_IF_CONFIG_SMALL("DirectDraw Surface image decoder"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
-                     AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_DPX,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "dpx",
-        .long_name = NULL_IF_CONFIG_SMALL("DPX (Digital Picture Exchange) image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_EXR,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "exr",
-        .long_name = NULL_IF_CONFIG_SMALL("OpenEXR image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
-                     AV_CODEC_PROP_LOSSLESS,
-    },
     {
         .id        = AV_CODEC_ID_FITS,
         .type      = AVMEDIA_TYPE_VIDEO,
@@ -1471,199 +1655,39 @@
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
     },
     {
-        .id        = AV_CODEC_ID_GIF,
+        .id        = AV_CODEC_ID_IMM4,
         .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "gif",
-        .long_name = NULL_IF_CONFIG_SMALL("GIF (Graphics Interchange Format)"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-        .mime_types= MT("image/gif"),
-    },
-    {
-        .id        = AV_CODEC_ID_JPEGLS,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "jpegls",
-        .long_name = NULL_IF_CONFIG_SMALL("JPEG-LS"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
-                     AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_LJPEG,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "ljpeg",
-        .long_name = NULL_IF_CONFIG_SMALL("Lossless JPEG"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_PAM,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "pam",
-        .long_name = NULL_IF_CONFIG_SMALL("PAM (Portable AnyMap) image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-        .mime_types= MT("image/x-portable-pixmap"),
-    },
-    {
-        .id        = AV_CODEC_ID_PBM,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "pbm",
-        .long_name = NULL_IF_CONFIG_SMALL("PBM (Portable BitMap) image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_PCX,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "pcx",
-        .long_name = NULL_IF_CONFIG_SMALL("PC Paintbrush PCX image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-        .mime_types= MT("image/x-pcx"),
-    },
-    {
-        .id        = AV_CODEC_ID_PGM,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "pgm",
-        .long_name = NULL_IF_CONFIG_SMALL("PGM (Portable GrayMap) image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_PGMYUV,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "pgmyuv",
-        .long_name = NULL_IF_CONFIG_SMALL("PGMYUV (Portable GrayMap YUV) image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_PNG,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "png",
-        .long_name = NULL_IF_CONFIG_SMALL("PNG (Portable Network Graphics) image"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-        .mime_types= MT("image/png"),
-    },
-    {
-        .id        = AV_CODEC_ID_PPM,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "ppm",
-        .long_name = NULL_IF_CONFIG_SMALL("PPM (Portable PixelMap) image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_PSD,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "psd",
-        .long_name = NULL_IF_CONFIG_SMALL("Photoshop PSD file"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_PTX,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "ptx",
-        .long_name = NULL_IF_CONFIG_SMALL("V.Flash PTX image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_SGI,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "sgi",
-        .long_name = NULL_IF_CONFIG_SMALL("SGI image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_SP5X,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "sp5x",
-        .long_name = NULL_IF_CONFIG_SMALL("Sunplus JPEG (SP5X)"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_SUNRAST,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "sunrast",
-        .long_name = NULL_IF_CONFIG_SMALL("Sun Rasterfile image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_TARGA,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "targa",
-        .long_name = NULL_IF_CONFIG_SMALL("Truevision Targa image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-        .mime_types= MT("image/x-targa", "image/x-tga"),
-    },
-    {
-        .id        = AV_CODEC_ID_TDSC,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "tdsc",
-        .long_name = NULL_IF_CONFIG_SMALL("TDSC"),
+        .name      = "imm4",
+        .long_name = NULL_IF_CONFIG_SMALL("Infinity IMM4"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
-        .id        = AV_CODEC_ID_TIFF,
+        .id        = AV_CODEC_ID_PROSUMER,
         .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "tiff",
-        .long_name = NULL_IF_CONFIG_SMALL("TIFF image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-        .mime_types= MT("image/tiff"),
-    },
-    {
-        .id        = AV_CODEC_ID_TXD,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "txd",
-        .long_name = NULL_IF_CONFIG_SMALL("Renderware TXD (TeXture Dictionary) image"),
+        .name      = "prosumer",
+        .long_name = NULL_IF_CONFIG_SMALL("Brooktree ProSumer Video"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
     {
-        .id        = AV_CODEC_ID_VC1IMAGE,
+        .id        = AV_CODEC_ID_MWSC,
         .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "vc1image",
-        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9 Image v2"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_WEBP,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "webp",
-        .long_name = NULL_IF_CONFIG_SMALL("WebP"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
-                     AV_CODEC_PROP_LOSSLESS,
-        .mime_types= MT("image/webp"),
-    },
-    {
-        .id        = AV_CODEC_ID_WMV3IMAGE,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "wmv3image",
-        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9 Image"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_XBM,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "xbm",
-        .long_name = NULL_IF_CONFIG_SMALL("XBM (X BitMap) image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-        .mime_types= MT("image/x-xbitmap"),
-    },
-    {
-        .id        = AV_CODEC_ID_XPM,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "xpm",
-        .long_name = NULL_IF_CONFIG_SMALL("XPM (X PixMap) image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-        .mime_types= MT("image/x-xpixmap"),
-    },
-    {
-        .id        = AV_CODEC_ID_XWD,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "xwd",
-        .long_name = NULL_IF_CONFIG_SMALL("XWD (X Window Dump) image"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-        .mime_types= MT("image/x-xwindowdump"),
-    },
-    {
-        .id        = AV_CODEC_ID_APNG,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "apng",
-        .long_name = NULL_IF_CONFIG_SMALL("APNG (Animated Portable Network Graphics) image"),
+        .name      = "mwsc",
+        .long_name = NULL_IF_CONFIG_SMALL("MatchWare Screen Capture Codec"),
         .props     = AV_CODEC_PROP_LOSSLESS,
-        .mime_types= MT("image/png"),
+    },
+    {
+        .id        = AV_CODEC_ID_WCMV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wcmv",
+        .long_name = NULL_IF_CONFIG_SMALL("WinCAM Motion Video"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_RASC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rasc",
+        .long_name = NULL_IF_CONFIG_SMALL("RemotelyAnywhere Screen Capture"),
+        .props     = AV_CODEC_PROP_LOSSY,
     },
 
     /* various PCM "codecs" */
@@ -1738,20 +1762,6 @@
         .props     = AV_CODEC_PROP_LOSSLESS,
     },
     {
-        .id        = AV_CODEC_ID_PCM_S64LE,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "pcm_s64le",
-        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 64-bit little-endian"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_PCM_S64BE,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "pcm_s64be",
-        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 64-bit big-endian"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-    },
-    {
         .id        = AV_CODEC_ID_PCM_U32LE,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "pcm_u32le",
@@ -1808,13 +1818,6 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
-        .id        = AV_CODEC_ID_PCM_S16BE_PLANAR,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "pcm_s16be_planar",
-        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit big-endian planar"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-    },
-    {
         .id        = AV_CODEC_ID_PCM_S16LE_PLANAR,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "pcm_s16le_planar",
@@ -1822,20 +1825,6 @@
         .props     = AV_CODEC_PROP_LOSSLESS,
     },
     {
-        .id        = AV_CODEC_ID_PCM_S24LE_PLANAR,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "pcm_s24le_planar",
-        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 24-bit little-endian planar"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_PCM_S32LE_PLANAR,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "pcm_s32le_planar",
-        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 32-bit little-endian planar"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-    },
-    {
         .id        = AV_CODEC_ID_PCM_DVD,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "pcm_dvd",
@@ -1843,20 +1832,6 @@
         .props     = AV_CODEC_PROP_LOSSLESS,
     },
     {
-        .id        = AV_CODEC_ID_PCM_F16LE,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "pcm_f16le",
-        .long_name = NULL_IF_CONFIG_SMALL("PCM 16.8 floating point little-endian"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_PCM_F24LE,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "pcm_f24le",
-        .long_name = NULL_IF_CONFIG_SMALL("PCM 24.0 floating point little-endian"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-    },
-    {
         .id        = AV_CODEC_ID_PCM_F32BE,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "pcm_f32be",
@@ -1912,6 +1887,55 @@
         .long_name = NULL_IF_CONFIG_SMALL("PCM signed 8-bit planar"),
         .props     = AV_CODEC_PROP_LOSSLESS,
     },
+    {
+        .id        = AV_CODEC_ID_PCM_S24LE_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s24le_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 24-bit little-endian planar"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S32LE_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s32le_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 32-bit little-endian planar"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S16BE_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s16be_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit big-endian planar"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S64LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s64le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 64-bit little-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S64BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s64be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 64-bit big-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_F16LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_f16le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM 16.8 floating point little-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_F24LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_f24le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM 24.0 floating point little-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
 
     /* various ADPCM codecs */
     {
@@ -2048,13 +2072,6 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
-        .id        = AV_CODEC_ID_ADPCM_THP_LE,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "adpcm_thp_le",
-        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo THP (Little-Endian)"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
         .id        = AV_CODEC_ID_ADPCM_IMA_AMV,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "adpcm_ima_amv",
@@ -2132,6 +2149,13 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_ADPCM_VIMA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_vima",
+        .long_name = NULL_IF_CONFIG_SMALL("LucasArts VIMA audio"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
         .id        = AV_CODEC_ID_ADPCM_AFC,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "adpcm_afc",
@@ -2167,10 +2191,10 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
-        .id        = AV_CODEC_ID_ADPCM_VIMA,
+        .id        = AV_CODEC_ID_ADPCM_THP_LE,
         .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "adpcm_vima",
-        .long_name = NULL_IF_CONFIG_SMALL("LucasArts VIMA audio"),
+        .name      = "adpcm_thp_le",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo THP (Little-Endian)"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
@@ -2194,6 +2218,13 @@
         .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Eurocom DAT4"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_ADPCM_MTAF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_mtaf",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM MTAF"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* AMR */
     {
@@ -2499,15 +2530,6 @@
         .long_name = NULL_IF_CONFIG_SMALL("ATRAC3 (Adaptive TRansform Acoustic Coding 3)"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
-#if FF_API_VOXWARE
-    {
-        .id        = AV_CODEC_ID_VOXWARE,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "voxware",
-        .long_name = NULL_IF_CONFIG_SMALL("Voxware RT29 Metasound"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-#endif
     {
         .id        = AV_CODEC_ID_APE,
         .type      = AVMEDIA_TYPE_AUDIO,
@@ -2565,20 +2587,6 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
-        .id        = AV_CODEC_ID_ATRAC3PAL,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "atrac3pal",
-        .long_name = NULL_IF_CONFIG_SMALL("ATRAC3+ AL (Adaptive TRansform Acoustic Coding 3+ Advanced Lossless)"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_ATRAC3AL,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "atrac3al",
-        .long_name = NULL_IF_CONFIG_SMALL("ATRAC3 AL (Adaptive TRansform Acoustic Coding 3 Advanced Lossless)"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-    },
-    {
         .id        = AV_CODEC_ID_EAC3,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "eac3",
@@ -2671,20 +2679,6 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
-        .id        = AV_CODEC_ID_DSS_SP,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "dss_sp",
-        .long_name = NULL_IF_CONFIG_SMALL("Digital Speech Standard - Standard Play mode (DSS SP)"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_DOLBY_E,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "dolby_e",
-        .long_name = NULL_IF_CONFIG_SMALL("Dolby E"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
         .id        = AV_CODEC_ID_G729,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "g729",
@@ -2734,24 +2728,6 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
-        .id        = AV_CODEC_ID_FFWAVESYNTH,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "wavesynth",
-        .long_name = NULL_IF_CONFIG_SMALL("Wave synthesis pseudo-codec"),
-    },
-    {
-        .id        = AV_CODEC_ID_SONIC,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "sonic",
-        .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
-    },
-    {
-        .id        = AV_CODEC_ID_SONIC_LS,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "sonicls",
-        .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
-    },
-    {
         .id        = AV_CODEC_ID_OPUS,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "opus",
@@ -2794,6 +2770,38 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_DSS_SP,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dss_sp",
+        .long_name = NULL_IF_CONFIG_SMALL("Digital Speech Standard - Standard Play mode (DSS SP)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CODEC2,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "codec2",
+        .long_name = NULL_IF_CONFIG_SMALL("codec2 (very low bitrate speech codec)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FFWAVESYNTH,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wavesynth",
+        .long_name = NULL_IF_CONFIG_SMALL("Wave synthesis pseudo-codec"),
+    },
+    {
+        .id        = AV_CODEC_ID_SONIC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sonic",
+        .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
+    },
+    {
+        .id        = AV_CODEC_ID_SONIC_LS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sonicls",
+        .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
+    },
+    {
         .id        = AV_CODEC_ID_EVRC,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "evrc",
@@ -2808,13 +2816,6 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
-        .id        = AV_CODEC_ID_4GV,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "4gv",
-        .long_name = NULL_IF_CONFIG_SMALL("4GV (Fourth Generation Vocoder)"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
         .id        = AV_CODEC_ID_DSD_LSBF,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "dsd_lsbf",
@@ -2843,6 +2844,13 @@
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_4GV,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "4gv",
+        .long_name = NULL_IF_CONFIG_SMALL("4GV (Fourth Generation Vocoder)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
         .id        = AV_CODEC_ID_INTERPLAY_ACM,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "interplayacm",
@@ -2871,10 +2879,52 @@
         .props     = AV_CODEC_PROP_LOSSLESS,
     },
     {
-        .id        = AV_CODEC_ID_ADPCM_MTAF,
+        .id        = AV_CODEC_ID_ATRAC3AL,
         .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "adpcm_mtaf",
-        .long_name = NULL_IF_CONFIG_SMALL("ADPCM MTAF"),
+        .name      = "atrac3al",
+        .long_name = NULL_IF_CONFIG_SMALL("ATRAC3 AL (Adaptive TRansform Acoustic Coding 3 Advanced Lossless)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ATRAC3PAL,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "atrac3pal",
+        .long_name = NULL_IF_CONFIG_SMALL("ATRAC3+ AL (Adaptive TRansform Acoustic Coding 3+ Advanced Lossless)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_DOLBY_E,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dolby_e",
+        .long_name = NULL_IF_CONFIG_SMALL("Dolby E"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_APTX,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "aptx",
+        .long_name = NULL_IF_CONFIG_SMALL("aptX (Audio Processing Technology for Bluetooth)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_APTX_HD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "aptx_hd",
+        .long_name = NULL_IF_CONFIG_SMALL("aptX HD (Audio Processing Technology for Bluetooth)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SBC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sbc",
+        .long_name = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband codec)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ATRAC9,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "atrac9",
+        .long_name = NULL_IF_CONFIG_SMALL("ATRAC9 (Adaptive TRansform Acoustic Coding 9)"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
 
@@ -2908,13 +2958,6 @@
         .props     = AV_CODEC_PROP_BITMAP_SUB,
     },
     {
-        .id        = AV_CODEC_ID_ASS,
-        .type      = AVMEDIA_TYPE_SUBTITLE,
-        .name      = "ass",
-        .long_name = NULL_IF_CONFIG_SMALL("ASS (Advanced SSA) subtitle"),
-        .props     = AV_CODEC_PROP_TEXT_SUB,
-    },
-    {
         .id        = AV_CODEC_ID_SSA,
         .type      = AVMEDIA_TYPE_SUBTITLE,
         .name      = "ssa",
@@ -2949,13 +2992,6 @@
         .props     = AV_CODEC_PROP_TEXT_SUB,
     },
     {
-        .id        = AV_CODEC_ID_SUBRIP,
-        .type      = AVMEDIA_TYPE_SUBTITLE,
-        .name      = "subrip",
-        .long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle"),
-        .props     = AV_CODEC_PROP_TEXT_SUB,
-    },
-    {
         .id        = AV_CODEC_ID_MICRODVD,
         .type      = AVMEDIA_TYPE_SUBTITLE,
         .name      = "microdvd",
@@ -2963,13 +2999,6 @@
         .props     = AV_CODEC_PROP_TEXT_SUB,
     },
     {
-        .id        = AV_CODEC_ID_MPL2,
-        .type      = AVMEDIA_TYPE_SUBTITLE,
-        .name      = "mpl2",
-        .long_name = NULL_IF_CONFIG_SMALL("MPL2 subtitle"),
-        .props     = AV_CODEC_PROP_TEXT_SUB,
-    },
-    {
         .id        = AV_CODEC_ID_EIA_608,
         .type      = AVMEDIA_TYPE_SUBTITLE,
         .name      = "eia_608",
@@ -2984,13 +3013,6 @@
         .props     = AV_CODEC_PROP_TEXT_SUB,
     },
     {
-        .id        = AV_CODEC_ID_PJS,
-        .type      = AVMEDIA_TYPE_SUBTITLE,
-        .name      = "pjs",
-        .long_name = NULL_IF_CONFIG_SMALL("PJS (Phoenix Japanimation Society) subtitle"),
-        .props     = AV_CODEC_PROP_TEXT_SUB,
-    },
-    {
         .id        = AV_CODEC_ID_SAMI,
         .type      = AVMEDIA_TYPE_SUBTITLE,
         .name      = "sami",
@@ -3026,10 +3048,10 @@
         .props     = AV_CODEC_PROP_TEXT_SUB,
     },
     {
-        .id        = AV_CODEC_ID_VPLAYER,
+        .id        = AV_CODEC_ID_SUBRIP,
         .type      = AVMEDIA_TYPE_SUBTITLE,
-        .name      = "vplayer",
-        .long_name = NULL_IF_CONFIG_SMALL("VPlayer subtitle"),
+        .name      = "subrip",
+        .long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle"),
         .props     = AV_CODEC_PROP_TEXT_SUB,
     },
     {
@@ -3040,12 +3062,48 @@
         .props     = AV_CODEC_PROP_TEXT_SUB,
     },
     {
+        .id        = AV_CODEC_ID_MPL2,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "mpl2",
+        .long_name = NULL_IF_CONFIG_SMALL("MPL2 subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_VPLAYER,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "vplayer",
+        .long_name = NULL_IF_CONFIG_SMALL("VPlayer subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_PJS,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "pjs",
+        .long_name = NULL_IF_CONFIG_SMALL("PJS (Phoenix Japanimation Society) subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_ASS,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "ass",
+        .long_name = NULL_IF_CONFIG_SMALL("ASS (Advanced SSA) subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
         .id        = AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
         .type      = AVMEDIA_TYPE_SUBTITLE,
         .name      = "hdmv_text_subtitle",
         .long_name = NULL_IF_CONFIG_SMALL("HDMV Text subtitle"),
         .props     = AV_CODEC_PROP_TEXT_SUB,
     },
+    {
+        .id        = AV_CODEC_ID_TTML,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "ttml",
+        .long_name = NULL_IF_CONFIG_SMALL("Timed Text Markup Language"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+
 
     /* other kind of codecs and pseudo-codecs */
     {
@@ -3056,6 +3114,12 @@
         .mime_types= MT("application/x-truetype-font", "application/x-font"),
     },
     {
+        .id        = AV_CODEC_ID_SCTE_35,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "scte_35",
+        .long_name = NULL_IF_CONFIG_SMALL("SCTE 35 Message Queue"),
+    },
+    {
         .id        = AV_CODEC_ID_BINTEXT,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "bintext",
@@ -3109,23 +3173,26 @@
         .mime_types= MT("application/octet-stream"),
     },
     {
-        .id        = AV_CODEC_ID_SCTE_35,
-        .type      = AVMEDIA_TYPE_DATA,
-        .name      = "scte_35",
-        .long_name = NULL_IF_CONFIG_SMALL("SCTE 35 Message Queue"),
+        .id        = AV_CODEC_ID_WRAPPED_AVFRAME,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wrapped_avframe",
+        .long_name = NULL_IF_CONFIG_SMALL("AVFrame to AVPacket passthrough"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
     },
-
-    /* deprecated codec ids */
 };
 
+static int descriptor_compare(const void *key, const void *member)
+{
+    enum AVCodecID id = *(const enum AVCodecID *) key;
+    const AVCodecDescriptor *desc = member;
+
+    return id - desc->id;
+}
+
 const AVCodecDescriptor *avcodec_descriptor_get(enum AVCodecID id)
 {
-    int i;
-
-    for (i = 0; i < FF_ARRAY_ELEMS(codec_descriptors); i++)
-        if (codec_descriptors[i].id == id)
-            return &codec_descriptors[i];
-    return NULL;
+    return bsearch(&id, codec_descriptors, FF_ARRAY_ELEMS(codec_descriptors),
+                   sizeof(codec_descriptors[0]), descriptor_compare);
 }
 
 const AVCodecDescriptor *avcodec_descriptor_next(const AVCodecDescriptor *prev)

diff --git a/libavcodec/cook.c b/libavcodec/cook.c
index 53cb8385..c5f68c9 100644
--- a/libavcodec/cook.c
+++ b/libavcodec/cook.c

@@ -1282,6 +1282,7 @@
     .close          = cook_decode_close,
     .decode         = cook_decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };

diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c
index 83bc8bf..e3c5955 100644
--- a/libavcodec/crystalhd.c
+++ b/libavcodec/crystalhd.c

@@ -786,8 +786,9 @@
         .receive_frame  = crystalhd_receive_frame, \
         .flush          = flush, \
         .bsfs           = bsf_name, \
-        .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
+        .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
         .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE}, \
+        .wrapper_name   = "crystalhd", \
     };
 
 #if CONFIG_H264_CRYSTALHD_DECODER

diff --git a/libavcodec/cscd.c b/libavcodec/cscd.c
index 9e1dec9..8781df1 100644
--- a/libavcodec/cscd.c
+++ b/libavcodec/cscd.c

@@ -38,7 +38,8 @@
 } CamStudioContext;
 
 static void copy_frame_default(AVFrame *f, const uint8_t *src,
-                               int linelen, int height) {
+                               int linelen, int height)
+{
     int i, src_stride = FFALIGN(linelen, 4);
     uint8_t *dst = f->data[0];
     dst += (height - 1) * f->linesize[0];
@@ -50,7 +51,8 @@
 }
 
 static void add_frame_default(AVFrame *f, const uint8_t *src,
-                              int linelen, int height) {
+                              int linelen, int height)
+{
     int i, j, src_stride = FFALIGN(linelen, 4);
     uint8_t *dst = f->data[0];
     dst += (height - 1) * f->linesize[0];
@@ -63,7 +65,8 @@
 }
 
 static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
-                        AVPacket *avpkt) {
+                        AVPacket *avpkt)
+{
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     CamStudioContext *c = avctx->priv_data;
@@ -79,26 +82,30 @@
 
     // decompress data
     switch ((buf[0] >> 1) & 7) {
-        case 0: { // lzo compression
-            int outlen = c->decomp_size, inlen = buf_size - 2;
-            if (av_lzo1x_decode(c->decomp_buf, &outlen, &buf[2], &inlen))
-                av_log(avctx, AV_LOG_ERROR, "error during lzo decompression\n");
-            break;
-        }
-        case 1: { // zlib compression
-#if CONFIG_ZLIB
-            unsigned long dlen = c->decomp_size;
-            if (uncompress(c->decomp_buf, &dlen, &buf[2], buf_size - 2) != Z_OK)
-                av_log(avctx, AV_LOG_ERROR, "error during zlib decompression\n");
-            break;
-#else
-            av_log(avctx, AV_LOG_ERROR, "compiled without zlib support\n");
-            return AVERROR(ENOSYS);
-#endif
-        }
-        default:
-            av_log(avctx, AV_LOG_ERROR, "unknown compression\n");
+    case 0: { // lzo compression
+        int outlen = c->decomp_size, inlen = buf_size - 2;
+        if (av_lzo1x_decode(c->decomp_buf, &outlen, &buf[2], &inlen) || outlen) {
+            av_log(avctx, AV_LOG_ERROR, "error during lzo decompression\n");
             return AVERROR_INVALIDDATA;
+        }
+        break;
+    }
+    case 1: { // zlib compression
+#if CONFIG_ZLIB
+        unsigned long dlen = c->decomp_size;
+        if (uncompress(c->decomp_buf, &dlen, &buf[2], buf_size - 2) != Z_OK) {
+            av_log(avctx, AV_LOG_ERROR, "error during zlib decompression\n");
+            return AVERROR_INVALIDDATA;
+        }
+        break;
+#else
+        av_log(avctx, AV_LOG_ERROR, "compiled without zlib support\n");
+        return AVERROR(ENOSYS);
+#endif
+    }
+    default:
+        av_log(avctx, AV_LOG_ERROR, "unknown compression\n");
+        return AVERROR_INVALIDDATA;
     }
 
     // flip upside down, add difference frame
@@ -121,18 +128,19 @@
     return buf_size;
 }
 
-static av_cold int decode_init(AVCodecContext *avctx) {
+static av_cold int decode_init(AVCodecContext *avctx)
+{
     CamStudioContext *c = avctx->priv_data;
     int stride;
     switch (avctx->bits_per_coded_sample) {
-        case 16: avctx->pix_fmt = AV_PIX_FMT_RGB555LE; break;
-        case 24: avctx->pix_fmt = AV_PIX_FMT_BGR24; break;
-        case 32: avctx->pix_fmt = AV_PIX_FMT_BGR0; break;
-        default:
-            av_log(avctx, AV_LOG_ERROR,
-                   "CamStudio codec error: invalid depth %i bpp\n",
-                   avctx->bits_per_coded_sample);
-            return AVERROR_INVALIDDATA;
+    case 16: avctx->pix_fmt = AV_PIX_FMT_RGB555LE; break;
+    case 24: avctx->pix_fmt = AV_PIX_FMT_BGR24; break;
+    case 32: avctx->pix_fmt = AV_PIX_FMT_BGR0; break;
+    default:
+        av_log(avctx, AV_LOG_ERROR,
+               "CamStudio codec error: invalid depth %i bpp\n",
+               avctx->bits_per_coded_sample);
+        return AVERROR_INVALIDDATA;
     }
     c->bpp = avctx->bits_per_coded_sample;
     c->linelen = avctx->width * avctx->bits_per_coded_sample / 8;
@@ -150,7 +158,8 @@
     return 0;
 }
 
-static av_cold int decode_end(AVCodecContext *avctx) {
+static av_cold int decode_end(AVCodecContext *avctx)
+{
     CamStudioContext *c = avctx->priv_data;
     av_freep(&c->decomp_buf);
     av_frame_free(&c->pic);
@@ -166,5 +175,6 @@
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
     .capabilities   = AV_CODEC_CAP_DR1,
 };

diff --git a/libavcodec/cuvid.c b/libavcodec/cuviddec.c
similarity index 96%
rename from libavcodec/cuvid.c
rename to libavcodec/cuviddec.c
index 2ba8e00..f21273c 100644
--- a/libavcodec/cuvid.c
+++ b/libavcodec/cuviddec.c

@@ -32,6 +32,7 @@
 
 #include "avcodec.h"
 #include "decode.h"
+#include "hwaccel.h"
 #include "internal.h"
 
 typedef struct CuvidContext
@@ -73,6 +74,8 @@
     int internal_error;
     int decoder_flushing;
 
+    int *key_frame;
+
     cudaVideoCodec codec_type;
     cudaVideoChromaFormat chroma_format;
 
@@ -339,6 +342,8 @@
 
     av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");
 
+    ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag;
+
     ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
     if (ctx->internal_error < 0)
         return 0;
@@ -373,7 +378,11 @@
 {
     CuvidContext *ctx = avctx->priv_data;
 
-    return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + 2 > ctx->nb_surfaces;
+    int delay = ctx->cuparseinfo.ulMaxDisplayDelay;
+    if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field)
+        delay *= 2;
+
+    return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + delay >= ctx->nb_surfaces;
 }
 
 static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
@@ -545,12 +554,16 @@
                     .Height        = avctx->height >> (i ? 1 : 0),
                 };
 
-                ret = CHECK_CU(ctx->cudl->cuMemcpy2D(&cpy));
+                ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream));
                 if (ret < 0)
                     goto error;
 
                 offset += avctx->height;
             }
+
+            ret = CHECK_CU(ctx->cudl->cuStreamSynchronize(device_hwctx->stream));
+            if (ret < 0)
+                goto error;
         } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
                    avctx->pix_fmt == AV_PIX_FMT_P010 ||
                    avctx->pix_fmt == AV_PIX_FMT_P016) {
@@ -589,6 +602,7 @@
             goto error;
         }
 
+        frame->key_frame = ctx->key_frame[parsed_frame.dispinfo.picture_index];
         frame->width = avctx->width;
         frame->height = avctx->height;
         if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
@@ -692,6 +706,8 @@
     av_buffer_unref(&ctx->hwframe);
     av_buffer_unref(&ctx->hwdevice);
 
+    av_freep(&ctx->key_frame);
+
     cuvid_free_functions(&ctx->cvdl);
 
     return 0;
@@ -835,7 +851,7 @@
         goto error;
     }
 
-    ret = cuvid_load_functions(&ctx->cvdl);
+    ret = cuvid_load_functions(&ctx->cvdl, avctx);
     if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
         goto error;
@@ -976,6 +992,12 @@
                FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), avctx->extradata_size));
     }
 
+    ctx->key_frame = av_mallocz(ctx->nb_surfaces * sizeof(int));
+    if (!ctx->key_frame) {
+        ret = AVERROR(ENOMEM);
+        goto error;
+    }
+
     ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
     ctx->cuparseinfo.ulMaxDisplayDelay = 4;
     ctx->cuparseinfo.pUserData = avctx;
@@ -1094,6 +1116,19 @@
     { NULL }
 };
 
+static const AVCodecHWConfigInternal *cuvid_hw_configs[] = {
+    &(const AVCodecHWConfigInternal) {
+        .public = {
+            .pix_fmt     = AV_PIX_FMT_CUDA,
+            .methods     = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX |
+                           AV_CODEC_HW_CONFIG_METHOD_INTERNAL,
+            .device_type = AV_HWDEVICE_TYPE_CUDA
+        },
+        .hwaccel = NULL,
+    },
+    NULL
+};
+
 #define DEFINE_CUVID_CODEC(x, X) \
     static const AVClass x##_cuvid_class = { \
         .class_name = #x "_cuvid", \
@@ -1101,12 +1136,6 @@
         .option = options, \
         .version = LIBAVUTIL_VERSION_INT, \
     }; \
-    AVHWAccel ff_##x##_cuvid_hwaccel = { \
-        .name           = #x "_cuvid", \
-        .type           = AVMEDIA_TYPE_VIDEO, \
-        .id             = AV_CODEC_ID_##X, \
-        .pix_fmt        = AV_PIX_FMT_CUDA, \
-    }; \
     AVCodec ff_##x##_cuvid_decoder = { \
         .name           = #x "_cuvid", \
         .long_name      = NULL_IF_CONFIG_SMALL("Nvidia CUVID " #X " decoder"), \
@@ -1119,12 +1148,14 @@
         .decode         = cuvid_decode_frame, \
         .receive_frame  = cuvid_output_frame, \
         .flush          = cuvid_flush, \
-        .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
+        .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
         .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
                                                         AV_PIX_FMT_NV12, \
                                                         AV_PIX_FMT_P010, \
                                                         AV_PIX_FMT_P016, \
                                                         AV_PIX_FMT_NONE }, \
+        .hw_configs     = cuvid_hw_configs, \
+        .wrapper_name   = "cuvid", \
     };
 
 #if CONFIG_HEVC_CUVID_DECODER

diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index 307b214..a0729e6 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c

@@ -146,12 +146,17 @@
     return 0;
 }
 
-int avpriv_dca_parse_core_frame_header(DCACoreFrameHeader *h, uint8_t *buf, int size)
+int avpriv_dca_parse_core_frame_header(DCACoreFrameHeader *h, const uint8_t *buf, int size)
 {
     GetBitContext gb;
+    int ret;
 
-    if (init_get_bits8(&gb, buf, size) < 0)
-        return DCA_PARSE_ERROR_INVALIDDATA;
+    ret = init_get_bits8(&gb, buf, size);
+    if (ret < 0)
+        return ret;
 
-    return ff_dca_parse_core_frame_header(h, &gb);
+    if (ff_dca_parse_core_frame_header(h, &gb) < 0)
+        return AVERROR_INVALIDDATA;
+
+    return 0;
 }

diff --git a/libavcodec/dca.h b/libavcodec/dca.h
index 172c965..e96c589 100644
--- a/libavcodec/dca.h
+++ b/libavcodec/dca.h

@@ -29,10 +29,10 @@
 #include <stdint.h>
 
 #include "libavutil/common.h"
-#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 
 #include "get_bits.h"
+#include "internal.h"
 
 #define DCA_CORE_FRAME_HEADER_SIZE      18
 
@@ -46,7 +46,6 @@
     DCA_PARSE_ERROR_RESERVED_BIT    = -7,
     DCA_PARSE_ERROR_LFE_FLAG        = -8,
     DCA_PARSE_ERROR_PCM_RES         = -9,
-    DCA_PARSE_ERROR_INVALIDDATA     = -10,
 };
 
 typedef struct DCACoreFrameHeader {
@@ -196,7 +195,7 @@
     DCA_DMIX_TYPE_COUNT
 };
 
-extern av_export const uint32_t avpriv_dca_sample_rates[16];
+extern av_export_avcodec const uint32_t avpriv_dca_sample_rates[16];
 
 extern const uint32_t ff_dca_sampling_freqs[16];
 extern const uint8_t ff_dca_freq_ranges[16];
@@ -211,10 +210,19 @@
 
 /**
  * Parse and validate core frame header
+ * @param[out] h    Pointer to struct where header info is written.
+ * @param[in]  buf  Pointer to the data buffer
+ * @param[in]  size Size of the data buffer
+ * @return 0 on success, negative AVERROR code on failure
+ */
+int avpriv_dca_parse_core_frame_header(DCACoreFrameHeader *h, const uint8_t *buf, int size);
+
+/**
+ * Parse and validate core frame header
+ * @param[out] h   Pointer to struct where header info is written.
+ * @param[in]  gbc BitContext containing the first 120 bits of the frame.
  * @return 0 on success, negative DCA_PARSE_ERROR_ code on failure
  */
-int avpriv_dca_parse_core_frame_header(DCACoreFrameHeader *h, uint8_t *buf, int size);
-
 int ff_dca_parse_core_frame_header(DCACoreFrameHeader *h, GetBitContext *gb);
 
 #endif /* AVCODEC_DCA_H */

diff --git a/libavcodec/dca_core_bsf.c b/libavcodec/dca_core_bsf.c
index 9edc0cf..8565796 100644
--- a/libavcodec/dca_core_bsf.c
+++ b/libavcodec/dca_core_bsf.c

@@ -24,18 +24,17 @@
 #include "dca_syncwords.h"
 #include "libavutil/mem.h"
 
-static int dca_core_filter(AVBSFContext *ctx, AVPacket *out)
+static int dca_core_filter(AVBSFContext *ctx, AVPacket *pkt)
 {
-    AVPacket *in;
     GetByteContext gb;
     uint32_t syncword;
     int core_size = 0, ret;
 
-    ret = ff_bsf_get_packet(ctx, &in);
+    ret = ff_bsf_get_packet_ref(ctx, pkt);
     if (ret < 0)
         return ret;
 
-    bytestream2_init(&gb, in->data, in->size);
+    bytestream2_init(&gb, pkt->data, pkt->size);
     syncword = bytestream2_get_be32(&gb);
     bytestream2_skip(&gb, 1);
 
@@ -45,11 +44,8 @@
         break;
     }
 
-    av_packet_move_ref(out, in);
-    av_packet_free(&in);
-
-    if (core_size > 0 && core_size <= out->size) {
-        out->size = core_size;
+    if (core_size > 0 && core_size <= pkt->size) {
+        pkt->size = core_size;
     }
 
     return 0;

diff --git a/libavcodec/dcaenc.c b/libavcodec/dcaenc.c
index dd601ff..4b4ceef 100644
--- a/libavcodec/dcaenc.c
+++ b/libavcodec/dcaenc.c

@@ -21,6 +21,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#define FFT_FLOAT 0
+#define FFT_FIXED_32 1
+
 #include "libavutil/avassert.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/common.h"
@@ -33,6 +36,7 @@
 #include "dca_core.h"
 #include "dcadata.h"
 #include "dcaenc.h"
+#include "fft.h"
 #include "internal.h"
 #include "mathops.h"
 #include "put_bits.h"
@@ -48,6 +52,8 @@
 #define SUBBAND_SAMPLES (SUBFRAMES * SUBSUBFRAMES * 8)
 #define AUBANDS 25
 
+#define COS_T(x) (c->cos_table[(x) & 2047])
+
 typedef struct CompressionOptions {
     int adpcm_mode;
 } CompressionOptions;
@@ -56,6 +62,7 @@
     AVClass *class;
     PutBitContext pb;
     DCAADPCMEncContext adpcm_ctx;
+    FFTContext mdct;
     CompressionOptions options;
     int frame_size;
     int frame_bits;
@@ -92,15 +99,15 @@
     int32_t worst_noise_ever;
     int consumed_bits;
     int consumed_adpcm_bits; ///< Number of bits to transmit ADPCM related info
-} DCAEncContext;
 
-static int32_t cos_table[2048];
-static int32_t band_interpolation[2][512];
-static int32_t band_spectrum[2][8];
-static int32_t auf[9][AUBANDS][256];
-static int32_t cb_to_add[256];
-static int32_t cb_to_level[2048];
-static int32_t lfe_fir_64i[512];
+    int32_t cos_table[2048];
+    int32_t band_interpolation_tab[2][512];
+    int32_t band_spectrum_tab[2][8];
+    int32_t auf[9][AUBANDS][256];
+    int32_t cb_to_add[256];
+    int32_t cb_to_level[2048];
+    int32_t lfe_fir_64i[512];
+} DCAEncContext;
 
 /* Transfer function of outer and middle ear, Hz -> dB */
 static double hom(double f)
@@ -145,23 +152,27 @@
 
 static void subband_bufer_free(DCAEncContext *c)
 {
-    int32_t *bufer = c->subband[0][0] - DCA_ADPCM_COEFFS;
-    av_freep(&bufer);
+    if (c->subband[0][0]) {
+        int32_t *bufer = c->subband[0][0] - DCA_ADPCM_COEFFS;
+        av_free(bufer);
+        c->subband[0][0] = NULL;
+    }
 }
 
 static int encode_init(AVCodecContext *avctx)
 {
     DCAEncContext *c = avctx->priv_data;
     uint64_t layout = avctx->channel_layout;
-    int i, j, min_frame_bits;
+    int i, j, k, min_frame_bits;
+    int ret;
 
     if (subband_bufer_alloc(c))
         return AVERROR(ENOMEM);
 
     c->fullband_channels = c->channels = avctx->channels;
     c->lfe_channel = (avctx->channels == 3 || avctx->channels == 6);
-    c->band_interpolation = band_interpolation[1];
-    c->band_spectrum = band_spectrum[1];
+    c->band_interpolation = c->band_interpolation_tab[1];
+    c->band_spectrum = c->band_spectrum_tab[1];
     c->worst_quantization_noise = -2047;
     c->worst_noise_ever = -2047;
     c->consumed_adpcm_bits = 0;
@@ -231,93 +242,79 @@
 
     avctx->frame_size = 32 * SUBBAND_SAMPLES;
 
-    if (!cos_table[0]) {
-        int j, k;
+    if ((ret = ff_mdct_init(&c->mdct, 9, 0, 1.0)) < 0)
+        return ret;
 
-        cos_table[0] = 0x7fffffff;
-        cos_table[512] = 0;
-        cos_table[1024] = -cos_table[0];
-        for (i = 1; i < 512; i++) {
-            cos_table[i]   = (int32_t)(0x7fffffff * cos(M_PI * i / 1024));
-            cos_table[1024-i] = -cos_table[i];
-            cos_table[1024+i] = -cos_table[i];
-            cos_table[2048-i] = cos_table[i];
-        }
-        for (i = 0; i < 2048; i++) {
-            cb_to_level[i] = (int32_t)(0x7fffffff * ff_exp10(-0.005 * i));
-        }
+    /* Init all tables */
+    c->cos_table[0] = 0x7fffffff;
+    c->cos_table[512] = 0;
+    c->cos_table[1024] = -c->cos_table[0];
+    for (i = 1; i < 512; i++) {
+        c->cos_table[i]   = (int32_t)(0x7fffffff * cos(M_PI * i / 1024));
+        c->cos_table[1024-i] = -c->cos_table[i];
+        c->cos_table[1024+i] = -c->cos_table[i];
+        c->cos_table[2048-i] = +c->cos_table[i];
+    }
 
-        for (k = 0; k < 32; k++) {
-            for (j = 0; j < 8; j++) {
-                lfe_fir_64i[64 * j + k] = (int32_t)(0xffffff800000ULL * ff_dca_lfe_fir_64[8 * k + j]);
-                lfe_fir_64i[64 * (7-j) + (63 - k)] = (int32_t)(0xffffff800000ULL * ff_dca_lfe_fir_64[8 * k + j]);
-            }
-        }
+    for (i = 0; i < 2048; i++)
+        c->cb_to_level[i] = (int32_t)(0x7fffffff * ff_exp10(-0.005 * i));
 
-        for (i = 0; i < 512; i++) {
-            band_interpolation[0][i] = (int32_t)(0x1000000000ULL * ff_dca_fir_32bands_perfect[i]);
-            band_interpolation[1][i] = (int32_t)(0x1000000000ULL * ff_dca_fir_32bands_nonperfect[i]);
-        }
-
-        for (i = 0; i < 9; i++) {
-            for (j = 0; j < AUBANDS; j++) {
-                for (k = 0; k < 256; k++) {
-                    double freq = sample_rates[i] * (k + 0.5) / 512;
-
-                    auf[i][j][k] = (int32_t)(10 * (hom(freq) + gammafilter(j, freq)));
-                }
-            }
-        }
-
-        for (i = 0; i < 256; i++) {
-            double add = 1 + ff_exp10(-0.01 * i);
-            cb_to_add[i] = (int32_t)(100 * log10(add));
-        }
+    for (k = 0; k < 32; k++) {
         for (j = 0; j < 8; j++) {
-            double accum = 0;
-            for (i = 0; i < 512; i++) {
-                double reconst = ff_dca_fir_32bands_perfect[i] * ((i & 64) ? (-1) : 1);
-                accum += reconst * cos(2 * M_PI * (i + 0.5 - 256) * (j + 0.5) / 512);
-            }
-            band_spectrum[0][j] = (int32_t)(200 * log10(accum));
-        }
-        for (j = 0; j < 8; j++) {
-            double accum = 0;
-            for (i = 0; i < 512; i++) {
-                double reconst = ff_dca_fir_32bands_nonperfect[i] * ((i & 64) ? (-1) : 1);
-                accum += reconst * cos(2 * M_PI * (i + 0.5 - 256) * (j + 0.5) / 512);
-            }
-            band_spectrum[1][j] = (int32_t)(200 * log10(accum));
+            c->lfe_fir_64i[64 * j + k] = (int32_t)(0xffffff800000ULL * ff_dca_lfe_fir_64[8 * k + j]);
+            c->lfe_fir_64i[64 * (7-j) + (63 - k)] = (int32_t)(0xffffff800000ULL * ff_dca_lfe_fir_64[8 * k + j]);
         }
     }
+
+    for (i = 0; i < 512; i++) {
+        c->band_interpolation_tab[0][i] = (int32_t)(0x1000000000ULL * ff_dca_fir_32bands_perfect[i]);
+        c->band_interpolation_tab[1][i] = (int32_t)(0x1000000000ULL * ff_dca_fir_32bands_nonperfect[i]);
+    }
+
+    for (i = 0; i < 9; i++) {
+        for (j = 0; j < AUBANDS; j++) {
+            for (k = 0; k < 256; k++) {
+                double freq = sample_rates[i] * (k + 0.5) / 512;
+
+                c->auf[i][j][k] = (int32_t)(10 * (hom(freq) + gammafilter(j, freq)));
+            }
+        }
+    }
+
+    for (i = 0; i < 256; i++) {
+        double add = 1 + ff_exp10(-0.01 * i);
+        c->cb_to_add[i] = (int32_t)(100 * log10(add));
+    }
+    for (j = 0; j < 8; j++) {
+        double accum = 0;
+        for (i = 0; i < 512; i++) {
+            double reconst = ff_dca_fir_32bands_perfect[i] * ((i & 64) ? (-1) : 1);
+            accum += reconst * cos(2 * M_PI * (i + 0.5 - 256) * (j + 0.5) / 512);
+        }
+        c->band_spectrum_tab[0][j] = (int32_t)(200 * log10(accum));
+    }
+    for (j = 0; j < 8; j++) {
+        double accum = 0;
+        for (i = 0; i < 512; i++) {
+            double reconst = ff_dca_fir_32bands_nonperfect[i] * ((i & 64) ? (-1) : 1);
+            accum += reconst * cos(2 * M_PI * (i + 0.5 - 256) * (j + 0.5) / 512);
+        }
+        c->band_spectrum_tab[1][j] = (int32_t)(200 * log10(accum));
+    }
+
     return 0;
 }
 
 static av_cold int encode_close(AVCodecContext *avctx)
 {
-    if (avctx->priv_data) {
-        DCAEncContext *c = avctx->priv_data;
-        subband_bufer_free(c);
-        ff_dcaadpcm_free(&c->adpcm_ctx);
-    }
+    DCAEncContext *c = avctx->priv_data;
+    ff_mdct_end(&c->mdct);
+    subband_bufer_free(c);
+    ff_dcaadpcm_free(&c->adpcm_ctx);
+
     return 0;
 }
 
-static inline int32_t cos_t(int x)
-{
-    return cos_table[x & 2047];
-}
-
-static inline int32_t sin_t(int x)
-{
-    return cos_t(x - 512);
-}
-
-static inline int32_t half32(int32_t a)
-{
-    return (a + 1) >> 1;
-}
-
 static void subband_transform(DCAEncContext *c, const int32_t *input)
 {
     int ch, subs, i, k, j;
@@ -353,7 +350,7 @@
                 resp = 0;
                 for (i = 16; i < 48; i++) {
                     int s = (2 * band + 1) * (2 * (i + 16) + 1);
-                    resp += mul32(accum[i], cos_t(s << 3)) >> 3;
+                    resp += mul32(accum[i], COS_T(s << 3)) >> 3;
                 }
 
                 c->subband[ch][band][subs] = ((band + 1) & 2) ? -resp : resp;
@@ -384,9 +381,9 @@
         accum = 0;
 
         for (i = hist_start, j = 0; i < 512; i++, j++)
-            accum += mul32(hist[i], lfe_fir_64i[j]);
+            accum += mul32(hist[i], c->lfe_fir_64i[j]);
         for (i = 0; i < hist_start; i++, j++)
-            accum += mul32(hist[i], lfe_fir_64i[j]);
+            accum += mul32(hist[i], c->lfe_fir_64i[j]);
 
         c->downsampled_lfe[lfes] = accum;
 
@@ -398,131 +395,72 @@
     }
 }
 
-typedef struct {
-    int32_t re;
-    int32_t im;
-} cplx32;
-
-static void fft(const int32_t in[2 * 256], cplx32 out[256])
+static int32_t get_cb(DCAEncContext *c, int32_t in)
 {
-    cplx32 buf[256], rin[256], rout[256];
-    int i, j, k, l;
+    int i, res = 0;
+    in = FFABS(in);
 
-    /* do two transforms in parallel */
-    for (i = 0; i < 256; i++) {
-        /* Apply the Hann window */
-        rin[i].re = mul32(in[2 * i], 0x3fffffff - (cos_t(8 * i + 2) >> 1));
-        rin[i].im = mul32(in[2 * i + 1], 0x3fffffff - (cos_t(8 * i + 6) >> 1));
-    }
-    /* pre-rotation */
-    for (i = 0; i < 256; i++) {
-        buf[i].re = mul32(cos_t(4 * i + 2), rin[i].re)
-                  - mul32(sin_t(4 * i + 2), rin[i].im);
-        buf[i].im = mul32(cos_t(4 * i + 2), rin[i].im)
-                  + mul32(sin_t(4 * i + 2), rin[i].re);
-    }
-
-    for (j = 256, l = 1; j != 1; j >>= 1, l <<= 1) {
-        for (k = 0; k < 256; k += j) {
-            for (i = k; i < k + j / 2; i++) {
-                cplx32 sum, diff;
-                int t = 8 * l * i;
-
-                sum.re = buf[i].re + buf[i + j / 2].re;
-                sum.im = buf[i].im + buf[i + j / 2].im;
-
-                diff.re = buf[i].re - buf[i + j / 2].re;
-                diff.im = buf[i].im - buf[i + j / 2].im;
-
-                buf[i].re = half32(sum.re);
-                buf[i].im = half32(sum.im);
-
-                buf[i + j / 2].re = mul32(diff.re, cos_t(t))
-                                  - mul32(diff.im, sin_t(t));
-                buf[i + j / 2].im = mul32(diff.im, cos_t(t))
-                                  + mul32(diff.re, sin_t(t));
-            }
-        }
-    }
-    /* post-rotation */
-    for (i = 0; i < 256; i++) {
-        int b = ff_reverse[i];
-        rout[i].re = mul32(buf[b].re, cos_t(4 * i))
-                   - mul32(buf[b].im, sin_t(4 * i));
-        rout[i].im = mul32(buf[b].im, cos_t(4 * i))
-                   + mul32(buf[b].re, sin_t(4 * i));
-    }
-    for (i = 0; i < 256; i++) {
-        /* separate the results of the two transforms */
-        cplx32 o1, o2;
-
-        o1.re =  rout[i].re - rout[255 - i].re;
-        o1.im =  rout[i].im + rout[255 - i].im;
-
-        o2.re =  rout[i].im - rout[255 - i].im;
-        o2.im = -rout[i].re - rout[255 - i].re;
-
-        /* combine them into one long transform */
-        out[i].re = mul32( o1.re + o2.re, cos_t(2 * i + 1))
-                  + mul32( o1.im - o2.im, sin_t(2 * i + 1));
-        out[i].im = mul32( o1.im + o2.im, cos_t(2 * i + 1))
-                  + mul32(-o1.re + o2.re, sin_t(2 * i + 1));
-    }
-}
-
-static int32_t get_cb(int32_t in)
-{
-    int i, res;
-
-    res = 0;
-    if (in < 0)
-        in = -in;
     for (i = 1024; i > 0; i >>= 1) {
-        if (cb_to_level[i + res] >= in)
+        if (c->cb_to_level[i + res] >= in)
             res += i;
     }
     return -res;
 }
 
-static int32_t add_cb(int32_t a, int32_t b)
+static int32_t add_cb(DCAEncContext *c, int32_t a, int32_t b)
 {
     if (a < b)
         FFSWAP(int32_t, a, b);
 
     if (a - b >= 256)
         return a;
-    return a + cb_to_add[a - b];
+    return a + c->cb_to_add[a - b];
 }
 
-static void adjust_jnd(int samplerate_index,
+static void calc_power(DCAEncContext *c,
+                       const int32_t in[2 * 256], int32_t power[256])
+{
+    int i;
+    LOCAL_ALIGNED_32(int32_t, data,  [512]);
+    LOCAL_ALIGNED_32(int32_t, coeff, [256]);
+
+    for (i = 0; i < 512; i++)
+        data[i] = norm__(mul32(in[i], 0x3fffffff - (COS_T(4 * i + 2) >> 1)), 4);
+
+    c->mdct.mdct_calc(&c->mdct, coeff, data);
+    for (i = 0; i < 256; i++) {
+        const int32_t cb = get_cb(c, coeff[i]);
+        power[i] = add_cb(c, cb, cb);
+    }
+}
+
+static void adjust_jnd(DCAEncContext *c,
                        const int32_t in[512], int32_t out_cb[256])
 {
     int32_t power[256];
-    cplx32 out[256];
     int32_t out_cb_unnorm[256];
     int32_t denom;
     const int32_t ca_cb = -1114;
     const int32_t cs_cb = 928;
+    const int samplerate_index = c->samplerate_index;
     int i, j;
 
-    fft(in, out);
+    calc_power(c, in, power);
 
-    for (j = 0; j < 256; j++) {
-        power[j] = add_cb(get_cb(out[j].re), get_cb(out[j].im));
+    for (j = 0; j < 256; j++)
         out_cb_unnorm[j] = -2047; /* and can only grow */
-    }
 
     for (i = 0; i < AUBANDS; i++) {
         denom = ca_cb; /* and can only grow */
         for (j = 0; j < 256; j++)
-            denom = add_cb(denom, power[j] + auf[samplerate_index][i][j]);
+            denom = add_cb(c, denom, power[j] + c->auf[samplerate_index][i][j]);
         for (j = 0; j < 256; j++)
-            out_cb_unnorm[j] = add_cb(out_cb_unnorm[j],
-                    -denom + auf[samplerate_index][i][j]);
+            out_cb_unnorm[j] = add_cb(c, out_cb_unnorm[j],
+                                      -denom + c->auf[samplerate_index][i][j]);
     }
 
     for (j = 0; j < 256; j++)
-        out_cb[j] = add_cb(out_cb[j], -out_cb_unnorm[j] - ca_cb - cs_cb);
+        out_cb[j] = add_cb(c, out_cb[j], -out_cb_unnorm[j] - ca_cb - cs_cb);
 }
 
 typedef void (*walk_band_t)(DCAEncContext *c, int band1, int band2, int f,
@@ -586,7 +524,7 @@
                 data[i] = c->history[ch][k];
             for (k -= 512; i < 512; i++, k++)
                 data[i] = input[k * c->channels + chi];
-            adjust_jnd(c->samplerate_index, data, c->masking_curve_cb[ssf]);
+            adjust_jnd(c, data, c->masking_curve_cb[ssf]);
         }
     for (i = 0; i < 256; i++) {
         int32_t m = 2048;
@@ -604,16 +542,16 @@
     }
 }
 
-static inline int32_t find_peak(const int32_t *in, int len) {
+static inline int32_t find_peak(DCAEncContext *c, const int32_t *in, int len)
+{
     int sample;
     int32_t m = 0;
     for (sample = 0; sample < len; sample++) {
         int32_t s = abs(in[sample]);
-        if (m < s) {
+        if (m < s)
             m = s;
-        }
     }
-    return get_cb(m);
+    return get_cb(c, m);
 }
 
 static void find_peaks(DCAEncContext *c)
@@ -621,14 +559,13 @@
     int band, ch;
 
     for (ch = 0; ch < c->fullband_channels; ch++) {
-        for (band = 0; band < 32; band++) {
-            c->peak_cb[ch][band] = find_peak(c->subband[ch][band], SUBBAND_SAMPLES);
-        }
+        for (band = 0; band < 32; band++)
+            c->peak_cb[ch][band] = find_peak(c, c->subband[ch][band],
+                                             SUBBAND_SAMPLES);
     }
 
-    if (c->lfe_channel) {
-        c->lfe_peak_cb = find_peak(c->downsampled_lfe, DCA_LFE_SAMPLES);
-    }
+    if (c->lfe_channel)
+        c->lfe_peak_cb = find_peak(c, c->downsampled_lfe, DCA_LFE_SAMPLES);
 }
 
 static void adpcm_analysis(DCAEncContext *c)
@@ -642,11 +579,12 @@
     for (ch = 0; ch < c->fullband_channels; ch++) {
         for (band = 0; band < 32; band++) {
             samples = c->subband[ch][band] - DCA_ADPCM_COEFFS;
-            pred_vq_id = ff_dcaadpcm_subband_analysis(&c->adpcm_ctx, samples, SUBBAND_SAMPLES, estimated_diff);
+            pred_vq_id = ff_dcaadpcm_subband_analysis(&c->adpcm_ctx, samples,
+                                                      SUBBAND_SAMPLES, estimated_diff);
             if (pred_vq_id >= 0) {
                 c->prediction_mode[ch][band] = pred_vq_id;
                 c->consumed_adpcm_bits += 12; //12 bits to transmit prediction vq index
-                c->diff_peak_cb[ch][band] = find_peak(estimated_diff, 16);
+                c->diff_peak_cb[ch][band] = find_peak(c, estimated_diff, 16);
             } else {
                 c->prediction_mode[ch][band] = -1;
             }
@@ -658,7 +596,7 @@
 #define USED_1ABITS 1
 #define USED_26ABITS 4
 
-static inline int32_t get_step_size(const DCAEncContext *c, int ch, int band)
+static inline int32_t get_step_size(DCAEncContext *c, int ch, int band)
 {
     int32_t step_size;
 
@@ -670,7 +608,8 @@
     return step_size;
 }
 
-static int calc_one_scale(int32_t peak_cb, int abits, softfloat *quant)
+static int calc_one_scale(DCAEncContext *c, int32_t peak_cb, int abits,
+                          softfloat *quant)
 {
     int32_t peak;
     int our_nscale, try_remove;
@@ -680,7 +619,7 @@
     av_assert0(peak_cb >= -2047);
 
     our_nscale = 127;
-    peak = cb_to_level[-peak_cb];
+    peak = c->cb_to_level[-peak_cb];
 
     for (try_remove = 64; try_remove > 0; try_remove >>= 1) {
         if (scalefactor_inv[our_nscale - try_remove].e + stepsize_inv[abits].e <= 17)
@@ -706,15 +645,17 @@
 {
     int32_t step_size;
     int32_t diff_peak_cb = c->diff_peak_cb[ch][band];
-    c->scale_factor[ch][band] = calc_one_scale(diff_peak_cb,
+    c->scale_factor[ch][band] = calc_one_scale(c, diff_peak_cb,
                                                c->abits[ch][band],
                                                &c->quant[ch][band]);
 
     step_size = get_step_size(c, ch, band);
     ff_dcaadpcm_do_real(c->prediction_mode[ch][band],
-                        c->quant[ch][band], ff_dca_scale_factor_quant7[c->scale_factor[ch][band]], step_size,
-                        c->adpcm_history[ch][band], c->subband[ch][band], c->adpcm_history[ch][band]+4, c->quantized[ch][band],
-                        SUBBAND_SAMPLES, cb_to_level[-diff_peak_cb]);
+                        c->quant[ch][band],
+                        ff_dca_scale_factor_quant7[c->scale_factor[ch][band]],
+                        step_size, c->adpcm_history[ch][band], c->subband[ch][band],
+                        c->adpcm_history[ch][band] + 4, c->quantized[ch][band],
+                        SUBBAND_SAMPLES, c->cb_to_level[-diff_peak_cb]);
 }
 
 static void quantize_adpcm(DCAEncContext *c)
@@ -731,21 +672,31 @@
 {
     int sample, band, ch;
 
-    for (ch = 0; ch < c->fullband_channels; ch++)
-        for (band = 0; band < 32; band++)
-            if (c->prediction_mode[ch][band] == -1)
-                for (sample = 0; sample < SUBBAND_SAMPLES; sample++)
-                    c->quantized[ch][band][sample] = quantize_value(c->subband[ch][band][sample], c->quant[ch][band]);
+    for (ch = 0; ch < c->fullband_channels; ch++) {
+        for (band = 0; band < 32; band++) {
+            if (c->prediction_mode[ch][band] == -1) {
+                for (sample = 0; sample < SUBBAND_SAMPLES; sample++) {
+                    int32_t val = quantize_value(c->subband[ch][band][sample],
+                                                 c->quant[ch][band]);
+                    c->quantized[ch][band][sample] = val;
+                }
+            }
+        }
+    }
 }
 
-static void accumulate_huff_bit_consumption(int abits, int32_t *quantized, uint32_t *result)
+static void accumulate_huff_bit_consumption(int abits, int32_t *quantized,
+                                            uint32_t *result)
 {
     uint8_t sel, id = abits - 1;
     for (sel = 0; sel < ff_dca_quant_index_group_size[id]; sel++)
-        result[sel] += ff_dca_vlc_calc_quant_bits(quantized, SUBBAND_SAMPLES, sel, id);
+        result[sel] += ff_dca_vlc_calc_quant_bits(quantized, SUBBAND_SAMPLES,
+                                                  sel, id);
 }
 
-static uint32_t set_best_code(uint32_t vlc_bits[DCA_CODE_BOOKS][7], uint32_t clc_bits[DCA_CODE_BOOKS], int32_t res[DCA_CODE_BOOKS])
+static uint32_t set_best_code(uint32_t vlc_bits[DCA_CODE_BOOKS][7],
+                              uint32_t clc_bits[DCA_CODE_BOOKS],
+                              int32_t res[DCA_CODE_BOOKS])
 {
     uint8_t i, sel;
     uint32_t best_sel_bits[DCA_CODE_BOOKS];
@@ -784,7 +735,8 @@
     return bits;
 }
 
-static uint32_t set_best_abits_code(int abits[DCAENC_SUBBANDS], int bands, int32_t *res)
+static uint32_t set_best_abits_code(int abits[DCAENC_SUBBANDS], int bands,
+                                    int32_t *res)
 {
     uint8_t i;
     uint32_t t;
@@ -845,7 +797,8 @@
                 ret &= ~(USED_26ABITS | USED_1ABITS);
             }
         }
-        c->consumed_bits += set_best_abits_code(c->abits[ch], 32, &c->bit_allocation_sel[ch]);
+        c->consumed_bits += set_best_abits_code(c->abits[ch], 32,
+                                                &c->bit_allocation_sel[ch]);
     }
 
     /* Recalc scale_factor each time to get bits consumption in case of Huffman coding.
@@ -854,7 +807,7 @@
     for (ch = 0; ch < c->fullband_channels; ch++) {
         for (band = 0; band < 32; band++) {
             if (c->prediction_mode[ch][band] == -1) {
-                c->scale_factor[ch][band] = calc_one_scale(c->peak_cb[ch][band],
+                c->scale_factor[ch][band] = calc_one_scale(c, c->peak_cb[ch][band],
                                                            c->abits[ch][band],
                                                            &c->quant[ch][band]);
             }
@@ -868,7 +821,9 @@
     for (ch = 0; ch < c->fullband_channels; ch++) {
         for (band = 0; band < 32; band++) {
             if (c->abits[ch][band] && c->abits[ch][band] <= DCA_CODE_BOOKS) {
-                accumulate_huff_bit_consumption(c->abits[ch][band], c->quantized[ch][band], huff_bit_count_accum[ch][c->abits[ch][band] - 1]);
+                accumulate_huff_bit_consumption(c->abits[ch][band],
+                                                c->quantized[ch][band],
+                                                huff_bit_count_accum[ch][c->abits[ch][band] - 1]);
                 clc_bit_count_accum[ch][c->abits[ch][band] - 1] += bit_consumption[c->abits[ch][band]];
             } else {
                 bits_counter += bit_consumption[c->abits[ch][band]];
@@ -877,7 +832,9 @@
     }
 
     for (ch = 0; ch < c->fullband_channels; ch++) {
-        bits_counter += set_best_code(huff_bit_count_accum[ch], clc_bit_count_accum[ch], c->quant_index_sel[ch]);
+        bits_counter += set_best_code(huff_bit_count_accum[ch],
+                                      clc_bit_count_accum[ch],
+                                      c->quant_index_sel[ch]);
     }
 
     c->consumed_bits += bits_counter;
@@ -954,7 +911,8 @@
                 step_size = get_step_size(c, ch, band);
 
                 ff_dca_core_dequantize(c->adpcm_history[ch][band],
-                                       c->quantized[ch][band]+12, step_size, ff_dca_scale_factor_quant7[c->scale_factor[ch][band]], 0, 4);
+                                       c->quantized[ch][band]+12, step_size,
+                                       ff_dca_scale_factor_quant7[c->scale_factor[ch][band]], 0, 4);
             } else {
                 AV_COPY128U(c->adpcm_history[ch][band], c->adpcm_history[ch][band]+4);
             }
@@ -977,7 +935,7 @@
 static void calc_lfe_scales(DCAEncContext *c)
 {
     if (c->lfe_channel)
-        c->lfe_scale_factor = calc_one_scale(c->lfe_peak_cb, 11, &c->lfe_quant);
+        c->lfe_scale_factor = calc_one_scale(c, c->lfe_peak_cb, 11, &c->lfe_quant);
 }
 
 static void put_frame_header(DCAEncContext *c)
@@ -1118,7 +1076,8 @@
         sel = c->quant_index_sel[ch][c->abits[ch][band] - 1];
         // Huffman codes
         if (sel < ff_dca_quant_index_group_size[c->abits[ch][band] - 1]) {
-            ff_dca_vlc_enc_quant(&c->pb, &c->quantized[ch][band][ss * 8], 8, sel, c->abits[ch][band] - 1);
+            ff_dca_vlc_enc_quant(&c->pb, &c->quantized[ch][band][ss * 8], 8,
+                                 sel, c->abits[ch][band] - 1);
             return;
         }
 
@@ -1171,7 +1130,8 @@
                 put_bits(&c->pb, 5, c->abits[ch][band]);
             }
         } else {
-            ff_dca_vlc_enc_alloc(&c->pb, c->abits[ch], DCAENC_SUBBANDS, c->bit_allocation_sel[ch]);
+            ff_dca_vlc_enc_alloc(&c->pb, c->abits[ch], DCAENC_SUBBANDS,
+                                 c->bit_allocation_sel[ch]);
         }
     }
 
@@ -1287,6 +1247,7 @@
     .close                 = encode_close,
     .encode2               = encode_frame,
     .capabilities          = AV_CODEC_CAP_EXPERIMENTAL,
+    .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
     .sample_fmts           = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32,
                                                             AV_SAMPLE_FMT_NONE },
     .supported_samplerates = sample_rates,

diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index fb1824b..4607e9f 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c

@@ -36,10 +36,12 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
 #include "libavutil/intmath.h"
+#include "libavutil/opt.h"
 
 #include "avcodec.h"
 #include "bytestream.h"
 #include "decode.h"
+#include "hwaccel.h"
 #include "internal.h"
 #include "thread.h"
 
@@ -129,7 +131,7 @@
     if (pkt) {
         ret = av_packet_copy_props(avci->last_pkt_props, pkt);
         if (!ret)
-            avci->last_pkt_props->size = pkt->size; // HACK: Needed for ff_init_buffer_info().
+            avci->last_pkt_props->size = pkt->size; // HACK: Needed for ff_decode_frame_props().
     }
     return ret;
 }
@@ -180,7 +182,7 @@
     return 0;
 }
 
-static int bsfs_init(AVCodecContext *avctx)
+int ff_decode_bsfs_init(AVCodecContext *avctx)
 {
     AVCodecInternal *avci = avctx->internal;
     DecodeFilterContext *s = &avci->filter;
@@ -194,27 +196,33 @@
     while (bsfs_str && *bsfs_str) {
         AVBSFContext **tmp;
         const AVBitStreamFilter *filter;
-        char *bsf;
+        char *bsf, *bsf_options_str, *bsf_name;
 
         bsf = av_get_token(&bsfs_str, ",");
         if (!bsf) {
             ret = AVERROR(ENOMEM);
             goto fail;
         }
+        bsf_name = av_strtok(bsf, "=", &bsf_options_str);
+        if (!bsf_name) {
+            av_freep(&bsf);
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
 
-        filter = av_bsf_get_by_name(bsf);
+        filter = av_bsf_get_by_name(bsf_name);
         if (!filter) {
             av_log(avctx, AV_LOG_ERROR, "A non-existing bitstream filter %s "
                    "requested by a decoder. This is a bug, please report it.\n",
-                   bsf);
-            ret = AVERROR_BUG;
+                   bsf_name);
             av_freep(&bsf);
+            ret = AVERROR_BUG;
             goto fail;
         }
-        av_freep(&bsf);
 
         tmp = av_realloc_array(s->bsfs, s->nb_bsfs + 1, sizeof(*s->bsfs));
         if (!tmp) {
+            av_freep(&bsf);
             ret = AVERROR(ENOMEM);
             goto fail;
         }
@@ -222,8 +230,10 @@
         s->nb_bsfs++;
 
         ret = av_bsf_alloc(filter, &s->bsfs[s->nb_bsfs - 1]);
-        if (ret < 0)
+        if (ret < 0) {
+            av_freep(&bsf);
             goto fail;
+        }
 
         if (s->nb_bsfs == 1) {
             /* We do not currently have an API for passing the input timebase into decoders,
@@ -237,14 +247,44 @@
             ret = avcodec_parameters_copy(s->bsfs[s->nb_bsfs - 1]->par_in,
                                           s->bsfs[s->nb_bsfs - 2]->par_out);
         }
-        if (ret < 0)
+        if (ret < 0) {
+            av_freep(&bsf);
             goto fail;
+        }
+
+        if (bsf_options_str && filter->priv_class) {
+            const AVOption *opt = av_opt_next(s->bsfs[s->nb_bsfs - 1]->priv_data, NULL);
+            const char * shorthand[2] = {NULL};
+
+            if (opt)
+                shorthand[0] = opt->name;
+
+            ret = av_opt_set_from_string(s->bsfs[s->nb_bsfs - 1]->priv_data, bsf_options_str, shorthand, "=", ":");
+            if (ret < 0) {
+                if (ret != AVERROR(ENOMEM)) {
+                    av_log(avctx, AV_LOG_ERROR, "Invalid options for bitstream filter %s "
+                           "requested by the decoder. This is a bug, please report it.\n",
+                           bsf_name);
+                    ret = AVERROR_BUG;
+                }
+                av_freep(&bsf);
+                goto fail;
+            }
+        }
+        av_freep(&bsf);
 
         ret = av_bsf_init(s->bsfs[s->nb_bsfs - 1]);
         if (ret < 0)
             goto fail;
+
+        if (*bsfs_str)
+            bsfs_str++;
     }
 
+    ret = avcodec_parameters_to_context(avctx, s->bsfs[s->nb_bsfs - 1]->par_out);
+    if (ret < 0)
+        return ret;
+
     return 0;
 fail:
     ff_decode_bsfs_uninit(avctx);
@@ -369,8 +409,7 @@
     DecodeSimpleContext *ds = &avci->ds;
     AVPacket           *pkt = ds->in_pkt;
     // copy to ensure we do not change pkt
-    AVPacket tmp;
-    int got_frame, actual_got_frame, did_split;
+    int got_frame, actual_got_frame;
     int ret;
 
     if (!pkt->data && !avci->draining) {
@@ -390,31 +429,12 @@
           avctx->active_thread_type & FF_THREAD_FRAME))
         return AVERROR_EOF;
 
-    tmp = *pkt;
-#if FF_API_MERGE_SD
-FF_DISABLE_DEPRECATION_WARNINGS
-    did_split = avci->compat_decode_partial_size ?
-                ff_packet_split_and_drop_side_data(&tmp) :
-                av_packet_split_side_data(&tmp);
-
-    if (did_split) {
-        ret = extract_packet_props(avctx->internal, &tmp);
-        if (ret < 0)
-            return ret;
-
-        ret = apply_param_change(avctx, &tmp);
-        if (ret < 0)
-            return ret;
-    }
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
     got_frame = 0;
 
     if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME) {
-        ret = ff_thread_decode_frame(avctx, frame, &got_frame, &tmp);
+        ret = ff_thread_decode_frame(avctx, frame, &got_frame, pkt);
     } else {
-        ret = avctx->codec->decode(avctx, frame, &got_frame, &tmp);
+        ret = avctx->codec->decode(avctx, frame, &got_frame, pkt);
 
         if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
             frame->pkt_dts = pkt->dts;
@@ -544,13 +564,6 @@
             }
         }
     }
-#if FF_API_MERGE_SD
-    if (did_split) {
-        av_packet_free_side_data(&tmp);
-        if(ret == tmp.size)
-            ret = pkt->size;
-    }
-#endif
 
     if (avctx->codec->type == AVMEDIA_TYPE_AUDIO &&
         !avci->showed_multi_packet_warning &&
@@ -640,6 +653,28 @@
     if (ret == AVERROR_EOF)
         avci->draining_done = 1;
 
+    if (!ret) {
+        /* the only case where decode data is not set should be decoders
+         * that do not call ff_get_buffer() */
+        av_assert0((frame->private_ref && frame->private_ref->size == sizeof(FrameDecodeData)) ||
+                   !(avctx->codec->capabilities & AV_CODEC_CAP_DR1));
+
+        if (frame->private_ref) {
+            FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
+
+            if (fdd->post_process) {
+                ret = fdd->post_process(avctx, frame);
+                if (ret < 0) {
+                    av_frame_unref(frame);
+                    return ret;
+                }
+            }
+        }
+    }
+
+    /* free the per-frame decode data */
+    av_buffer_unref(&frame->private_ref);
+
     return ret;
 }
 
@@ -657,10 +692,6 @@
     if (avpkt && !avpkt->size && avpkt->data)
         return AVERROR(EINVAL);
 
-    ret = bsfs_init(avctx);
-    if (ret < 0)
-        return ret;
-
     av_packet_unref(avci->buffer_pkt);
     if (avpkt && (avpkt->data || avpkt->side_data_elems)) {
         ret = av_packet_ref(avci->buffer_pkt, avpkt);
@@ -720,10 +751,6 @@
     if (!avcodec_is_open(avctx) || !av_codec_is_decoder(avctx->codec))
         return AVERROR(EINVAL);
 
-    ret = bsfs_init(avctx);
-    if (ret < 0)
-        return ret;
-
     if (avci->buffer_frame->buf[0]) {
         av_frame_move_ref(frame, avci->buffer_frame);
     } else {
@@ -1004,7 +1031,6 @@
                              AVPacket *avpkt)
 {
     int i, ret = 0;
-    AVCodecInternal *avci = avctx->internal;
 
     if (!avpkt->data && avpkt->size) {
         av_log(avctx, AV_LOG_ERROR, "invalid packet: NULL data, size != 0\n");
@@ -1021,29 +1047,9 @@
     get_subtitle_defaults(sub);
 
     if ((avctx->codec->capabilities & AV_CODEC_CAP_DELAY) || avpkt->size) {
-        AVPacket pkt_recoded;
-        AVPacket tmp = *avpkt;
-#if FF_API_MERGE_SD
-FF_DISABLE_DEPRECATION_WARNINGS
-        int did_split = avci->compat_decode_partial_size ?
-                        ff_packet_split_and_drop_side_data(&tmp) :
-                        av_packet_split_side_data(&tmp);
-        //apply_param_change(avctx, &tmp);
+        AVPacket pkt_recoded = *avpkt;
 
-        if (did_split) {
-            /* FFMIN() prevents overflow in case the packet wasn't allocated with
-             * proper padding.
-             * If the side data is smaller than the buffer padding size, the
-             * remaining bytes should have already been filled with zeros by the
-             * original packet allocation anyway. */
-            memset(tmp.data + tmp.size, 0,
-                   FFMIN(avpkt->size - tmp.size, AV_INPUT_BUFFER_PADDING_SIZE));
-        }
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
-        pkt_recoded = tmp;
-        ret = recode_subtitle(avctx, &pkt_recoded, &tmp);
+        ret = recode_subtitle(avctx, &pkt_recoded, avpkt);
         if (ret < 0) {
             *got_sub_ptr = 0;
         } else {
@@ -1082,7 +1088,8 @@
                 sub->format = 1;
 
             for (i = 0; i < sub->num_rects; i++) {
-                if (sub->rects[i]->ass && !utf8_check(sub->rects[i]->ass)) {
+                if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_IGNORE &&
+                    sub->rects[i]->ass && !utf8_check(sub->rects[i]->ass)) {
                     av_log(avctx, AV_LOG_ERROR,
                            "Invalid UTF-8 in decoded subtitles text; "
                            "maybe missing -sub_charenc option\n");
@@ -1092,7 +1099,7 @@
                 }
             }
 
-            if (tmp.data != pkt_recoded.data) { // did we recode?
+            if (avpkt->data != pkt_recoded.data) { // did we recode?
                 /* prevent from destroying side data from original packet */
                 pkt_recoded.side_data = NULL;
                 pkt_recoded.side_data_elems = 0;
@@ -1101,14 +1108,6 @@
             }
         }
 
-#if FF_API_MERGE_SD
-        if (did_split) {
-            av_packet_free_side_data(&tmp);
-            if(ret == tmp.size)
-                ret = avpkt->size;
-        }
-#endif
-
         if (*got_sub_ptr)
             avctx->frame_number++;
     }
@@ -1116,84 +1115,238 @@
     return ret;
 }
 
-static int is_hwaccel_pix_fmt(enum AVPixelFormat pix_fmt)
+enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *avctx,
+                                              const enum AVPixelFormat *fmt)
 {
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    return desc->flags & AV_PIX_FMT_FLAG_HWACCEL;
-}
+    const AVPixFmtDescriptor *desc;
+    const AVCodecHWConfig *config;
+    int i, n;
 
-enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum AVPixelFormat *fmt)
-{
-    while (*fmt != AV_PIX_FMT_NONE && is_hwaccel_pix_fmt(*fmt))
-        ++fmt;
-    return fmt[0];
-}
+    // If a device was supplied when the codec was opened, assume that the
+    // user wants to use it.
+    if (avctx->hw_device_ctx && avctx->codec->hw_configs) {
+        AVHWDeviceContext *device_ctx =
+            (AVHWDeviceContext*)avctx->hw_device_ctx->data;
+        for (i = 0;; i++) {
+            config = &avctx->codec->hw_configs[i]->public;
+            if (!config)
+                break;
+            if (!(config->methods &
+                  AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX))
+                continue;
+            if (device_ctx->type != config->device_type)
+                continue;
+            for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++) {
+                if (config->pix_fmt == fmt[n])
+                    return fmt[n];
+            }
+        }
+    }
+    // No device or other setup, so we have to choose from things which
+    // don't any other external information.
 
-static AVHWAccel *find_hwaccel(enum AVCodecID codec_id,
-                               enum AVPixelFormat pix_fmt)
-{
-    AVHWAccel *hwaccel = NULL;
+    // If the last element of the list is a software format, choose it
+    // (this should be best software format if any exist).
+    for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++);
+    desc = av_pix_fmt_desc_get(fmt[n - 1]);
+    if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL))
+        return fmt[n - 1];
 
-    while ((hwaccel = av_hwaccel_next(hwaccel)))
-        if (hwaccel->id == codec_id
-            && hwaccel->pix_fmt == pix_fmt)
-            return hwaccel;
-    return NULL;
-}
-
-static int setup_hwaccel(AVCodecContext *avctx,
-                         const enum AVPixelFormat fmt,
-                         const char *name)
-{
-    AVHWAccel *hwa = find_hwaccel(avctx->codec_id, fmt);
-    int ret        = 0;
-
-    if (!hwa) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Could not find an AVHWAccel for the pixel format: %s",
-               name);
-        return AVERROR(ENOENT);
+    // Finally, traverse the list in order and choose the first entry
+    // with no external dependencies (if there is no hardware configuration
+    // information available then this just picks the first entry).
+    for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++) {
+        for (i = 0;; i++) {
+            config = avcodec_get_hw_config(avctx->codec, i);
+            if (!config)
+                break;
+            if (config->pix_fmt == fmt[n])
+                break;
+        }
+        if (!config) {
+            // No specific config available, so the decoder must be able
+            // to handle this format without any additional setup.
+            return fmt[n];
+        }
+        if (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL) {
+            // Usable with only internal setup.
+            return fmt[n];
+        }
     }
 
-    if (hwa->capabilities & AV_HWACCEL_CODEC_CAP_EXPERIMENTAL &&
+    // Nothing is usable, give up.
+    return AV_PIX_FMT_NONE;
+}
+
+int ff_decode_get_hw_frames_ctx(AVCodecContext *avctx,
+                                enum AVHWDeviceType dev_type)
+{
+    AVHWDeviceContext *device_ctx;
+    AVHWFramesContext *frames_ctx;
+    int ret;
+
+    if (!avctx->hwaccel)
+        return AVERROR(ENOSYS);
+
+    if (avctx->hw_frames_ctx)
+        return 0;
+    if (!avctx->hw_device_ctx) {
+        av_log(avctx, AV_LOG_ERROR, "A hardware frames or device context is "
+                "required for hardware accelerated decoding.\n");
+        return AVERROR(EINVAL);
+    }
+
+    device_ctx = (AVHWDeviceContext *)avctx->hw_device_ctx->data;
+    if (device_ctx->type != dev_type) {
+        av_log(avctx, AV_LOG_ERROR, "Device type %s expected for hardware "
+               "decoding, but got %s.\n", av_hwdevice_get_type_name(dev_type),
+               av_hwdevice_get_type_name(device_ctx->type));
+        return AVERROR(EINVAL);
+    }
+
+    ret = avcodec_get_hw_frames_parameters(avctx,
+                                           avctx->hw_device_ctx,
+                                           avctx->hwaccel->pix_fmt,
+                                           &avctx->hw_frames_ctx);
+    if (ret < 0)
+        return ret;
+
+    frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+
+
+    if (frames_ctx->initial_pool_size) {
+        // We guarantee 4 base work surfaces. The function above guarantees 1
+        // (the absolute minimum), so add the missing count.
+        frames_ctx->initial_pool_size += 3;
+    }
+
+    ret = av_hwframe_ctx_init(avctx->hw_frames_ctx);
+    if (ret < 0) {
+        av_buffer_unref(&avctx->hw_frames_ctx);
+        return ret;
+    }
+
+    return 0;
+}
+
+int avcodec_get_hw_frames_parameters(AVCodecContext *avctx,
+                                     AVBufferRef *device_ref,
+                                     enum AVPixelFormat hw_pix_fmt,
+                                     AVBufferRef **out_frames_ref)
+{
+    AVBufferRef *frames_ref = NULL;
+    const AVCodecHWConfigInternal *hw_config;
+    const AVHWAccel *hwa;
+    int i, ret;
+
+    for (i = 0;; i++) {
+        hw_config = avctx->codec->hw_configs[i];
+        if (!hw_config)
+            return AVERROR(ENOENT);
+        if (hw_config->public.pix_fmt == hw_pix_fmt)
+            break;
+    }
+
+    hwa = hw_config->hwaccel;
+    if (!hwa || !hwa->frame_params)
+        return AVERROR(ENOENT);
+
+    frames_ref = av_hwframe_ctx_alloc(device_ref);
+    if (!frames_ref)
+        return AVERROR(ENOMEM);
+
+    ret = hwa->frame_params(avctx, frames_ref);
+    if (ret >= 0) {
+        AVHWFramesContext *frames_ctx = (AVHWFramesContext*)frames_ref->data;
+
+        if (frames_ctx->initial_pool_size) {
+            // If the user has requested that extra output surfaces be
+            // available then add them here.
+            if (avctx->extra_hw_frames > 0)
+                frames_ctx->initial_pool_size += avctx->extra_hw_frames;
+
+            // If frame threading is enabled then an extra surface per thread
+            // is also required.
+            if (avctx->active_thread_type & FF_THREAD_FRAME)
+                frames_ctx->initial_pool_size += avctx->thread_count;
+        }
+
+        *out_frames_ref = frames_ref;
+    } else {
+        av_buffer_unref(&frames_ref);
+    }
+    return ret;
+}
+
+static int hwaccel_init(AVCodecContext *avctx,
+                        const AVCodecHWConfigInternal *hw_config)
+{
+    const AVHWAccel *hwaccel;
+    int err;
+
+    hwaccel = hw_config->hwaccel;
+    if (hwaccel->capabilities & AV_HWACCEL_CODEC_CAP_EXPERIMENTAL &&
         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
         av_log(avctx, AV_LOG_WARNING, "Ignoring experimental hwaccel: %s\n",
-               hwa->name);
+               hwaccel->name);
         return AVERROR_PATCHWELCOME;
     }
 
-    if (hwa->priv_data_size) {
-        avctx->internal->hwaccel_priv_data = av_mallocz(hwa->priv_data_size);
+    if (hwaccel->priv_data_size) {
+        avctx->internal->hwaccel_priv_data =
+            av_mallocz(hwaccel->priv_data_size);
         if (!avctx->internal->hwaccel_priv_data)
             return AVERROR(ENOMEM);
     }
 
-    avctx->hwaccel = hwa;
-    if (hwa->init) {
-        ret = hwa->init(avctx);
-        if (ret < 0) {
+    avctx->hwaccel = hwaccel;
+    if (hwaccel->init) {
+        err = hwaccel->init(avctx);
+        if (err < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Failed setup for format %s: "
+                   "hwaccel initialisation returned error.\n",
+                   av_get_pix_fmt_name(hw_config->public.pix_fmt));
             av_freep(&avctx->internal->hwaccel_priv_data);
             avctx->hwaccel = NULL;
-            return ret;
+            return err;
         }
     }
 
     return 0;
 }
 
+static void hwaccel_uninit(AVCodecContext *avctx)
+{
+    if (avctx->hwaccel && avctx->hwaccel->uninit)
+        avctx->hwaccel->uninit(avctx);
+
+    av_freep(&avctx->internal->hwaccel_priv_data);
+
+    avctx->hwaccel = NULL;
+
+    av_buffer_unref(&avctx->hw_frames_ctx);
+}
+
 int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
 {
     const AVPixFmtDescriptor *desc;
     enum AVPixelFormat *choices;
-    enum AVPixelFormat ret;
-    unsigned n = 0;
+    enum AVPixelFormat ret, user_choice;
+    const AVCodecHWConfigInternal *hw_config;
+    const AVCodecHWConfig *config;
+    int i, n, err;
 
-    while (fmt[n] != AV_PIX_FMT_NONE)
-        ++n;
-
+    // Find end of list.
+    for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++);
+    // Must contain at least one entry.
     av_assert0(n >= 1);
-    avctx->sw_pix_fmt = fmt[n - 1];
-    av_assert2(!is_hwaccel_pix_fmt(avctx->sw_pix_fmt));
+    // If a software format is available, it must be the last entry.
+    desc = av_pix_fmt_desc_get(fmt[n - 1]);
+    if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) {
+        // No software format is available.
+    } else {
+        avctx->sw_pix_fmt = fmt[n - 1];
+    }
 
     choices = av_malloc_array(n + 1, sizeof(*choices));
     if (!choices)
@@ -1202,48 +1355,108 @@
     memcpy(choices, fmt, (n + 1) * sizeof(*choices));
 
     for (;;) {
-        if (avctx->hwaccel && avctx->hwaccel->uninit)
-            avctx->hwaccel->uninit(avctx);
-        av_freep(&avctx->internal->hwaccel_priv_data);
-        avctx->hwaccel = NULL;
+        // Remove the previous hwaccel, if there was one.
+        hwaccel_uninit(avctx);
 
-        av_buffer_unref(&avctx->hw_frames_ctx);
-
-        ret = avctx->get_format(avctx, choices);
-
-        desc = av_pix_fmt_desc_get(ret);
-        if (!desc) {
+        user_choice = avctx->get_format(avctx, choices);
+        if (user_choice == AV_PIX_FMT_NONE) {
+            // Explicitly chose nothing, give up.
             ret = AV_PIX_FMT_NONE;
             break;
         }
 
-        if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL))
+        desc = av_pix_fmt_desc_get(user_choice);
+        if (!desc) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid format returned by "
+                   "get_format() callback.\n");
+            ret = AV_PIX_FMT_NONE;
             break;
-#if FF_API_CAP_VDPAU
-        if (avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU)
-            break;
-#endif
+        }
+        av_log(avctx, AV_LOG_DEBUG, "Format %s chosen by get_format().\n",
+               desc->name);
 
-        if (avctx->hw_frames_ctx) {
-            AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
-            if (hw_frames_ctx->format != ret) {
-                av_log(avctx, AV_LOG_ERROR, "Format returned from get_buffer() "
-                       "does not match the format of provided AVHWFramesContext\n");
-                ret = AV_PIX_FMT_NONE;
+        for (i = 0; i < n; i++) {
+            if (choices[i] == user_choice)
                 break;
-            }
+        }
+        if (i == n) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid return from get_format(): "
+                   "%s not in possible list.\n", desc->name);
+            break;
         }
 
-        if (!setup_hwaccel(avctx, ret, desc->name))
+        if (avctx->codec->hw_configs) {
+            for (i = 0;; i++) {
+                hw_config = avctx->codec->hw_configs[i];
+                if (!hw_config)
+                    break;
+                if (hw_config->public.pix_fmt == user_choice)
+                    break;
+            }
+        } else {
+            hw_config = NULL;
+        }
+
+        if (!hw_config) {
+            // No config available, so no extra setup required.
+            ret = user_choice;
             break;
+        }
+        config = &hw_config->public;
 
-        /* Remove failed hwaccel from choices */
-        for (n = 0; choices[n] != ret; n++)
-            av_assert0(choices[n] != AV_PIX_FMT_NONE);
+        if (config->methods &
+            AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX &&
+            avctx->hw_frames_ctx) {
+            const AVHWFramesContext *frames_ctx =
+                (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+            if (frames_ctx->format != user_choice) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid setup for format %s: "
+                       "does not match the format of the provided frames "
+                       "context.\n", desc->name);
+                goto try_again;
+            }
+        } else if (config->methods &
+                   AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX &&
+                   avctx->hw_device_ctx) {
+            const AVHWDeviceContext *device_ctx =
+                (AVHWDeviceContext*)avctx->hw_device_ctx->data;
+            if (device_ctx->type != config->device_type) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid setup for format %s: "
+                       "does not match the type of the provided device "
+                       "context.\n", desc->name);
+                goto try_again;
+            }
+        } else if (config->methods &
+                   AV_CODEC_HW_CONFIG_METHOD_INTERNAL) {
+            // Internal-only setup, no additional configuration.
+        } else if (config->methods &
+                   AV_CODEC_HW_CONFIG_METHOD_AD_HOC) {
+            // Some ad-hoc configuration we can't see and can't check.
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "Invalid setup for format %s: "
+                   "missing configuration.\n", desc->name);
+            goto try_again;
+        }
+        if (hw_config->hwaccel) {
+            av_log(avctx, AV_LOG_DEBUG, "Format %s requires hwaccel "
+                   "initialisation.\n", desc->name);
+            err = hwaccel_init(avctx, hw_config);
+            if (err < 0)
+                goto try_again;
+        }
+        ret = user_choice;
+        break;
 
-        do
-            choices[n] = choices[n + 1];
-        while (choices[n++] != AV_PIX_FMT_NONE);
+    try_again:
+        av_log(avctx, AV_LOG_DEBUG, "Format %s not usable, retrying "
+               "get_format() without it.\n", desc->name);
+        for (i = 0; i < n; i++) {
+            if (choices[i] == user_choice)
+                break;
+        }
+        for (; i + 1 < n; i++)
+            choices[i] = choices[i + 1];
+        --n;
     }
 
     av_freep(&choices);
@@ -1432,7 +1645,7 @@
         pic->linesize[i] = 0;
     }
     if (desc->flags & AV_PIX_FMT_FLAG_PAL ||
-        desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL)
+        ((desc->flags & FF_PSEUDOPAL) && pic->data[1]))
         avpriv_set_systematic_pal2((uint32_t *)pic->data[1], pic->format);
 
     if (s->debug & FF_DEBUG_BUFFERS)
@@ -1480,7 +1693,7 @@
     return av_packet_unpack_dictionary(side_metadata, size, frame_md);
 }
 
-int ff_init_buffer_info(AVCodecContext *avctx, AVFrame *frame)
+int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame)
 {
     const AVPacket *pkt = avctx->internal->last_pkt_props;
     int i;
@@ -1588,11 +1801,6 @@
     return 0;
 }
 
-int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame)
-{
-    return ff_init_buffer_info(avctx, frame);
-}
-
 static void validate_avframe_allocation(AVCodecContext *avctx, AVFrame *frame)
 {
     if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
@@ -1602,12 +1810,11 @@
         int flags = desc ? desc->flags : 0;
         if (num_planes == 1 && (flags & AV_PIX_FMT_FLAG_PAL))
             num_planes = 2;
+        if ((flags & FF_PSEUDOPAL) && frame->data[1])
+            num_planes = 2;
         for (i = 0; i < num_planes; i++) {
             av_assert0(frame->data[i]);
         }
-        // For now do not enforce anything for palette of pseudopal formats
-        if (num_planes == 1 && (flags & AV_PIX_FMT_FLAG_PSEUDOPAL))
-            num_planes = 2;
         // For formats without data like hwaccel allow unused pointers to be non-NULL.
         for (i = num_planes; num_planes > 0 && i < FF_ARRAY_ELEMS(frame->data); i++) {
             if (frame->data[i])
@@ -1617,6 +1824,43 @@
     }
 }
 
+static void decode_data_free(void *opaque, uint8_t *data)
+{
+    FrameDecodeData *fdd = (FrameDecodeData*)data;
+
+    if (fdd->post_process_opaque_free)
+        fdd->post_process_opaque_free(fdd->post_process_opaque);
+
+    if (fdd->hwaccel_priv_free)
+        fdd->hwaccel_priv_free(fdd->hwaccel_priv);
+
+    av_freep(&fdd);
+}
+
+int ff_attach_decode_data(AVFrame *frame)
+{
+    AVBufferRef *fdd_buf;
+    FrameDecodeData *fdd;
+
+    av_assert1(!frame->private_ref);
+    av_buffer_unref(&frame->private_ref);
+
+    fdd = av_mallocz(sizeof(*fdd));
+    if (!fdd)
+        return AVERROR(ENOMEM);
+
+    fdd_buf = av_buffer_create((uint8_t*)fdd, sizeof(*fdd), decode_data_free,
+                               NULL, AV_BUFFER_FLAG_READONLY);
+    if (!fdd_buf) {
+        av_freep(&fdd);
+        return AVERROR(ENOMEM);
+    }
+
+    frame->private_ref = fdd_buf;
+
+    return 0;
+}
+
 static int get_buffer_internal(AVCodecContext *avctx, AVFrame *frame, int flags)
 {
     const AVHWAccel *hwaccel = avctx->hwaccel;
@@ -1624,7 +1868,7 @@
     int ret;
 
     if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
-        if ((ret = av_image_check_size2(avctx->width, avctx->height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx)) < 0 || avctx->pix_fmt<0) {
+        if ((ret = av_image_check_size2(FFALIGN(avctx->width, STRIDE_ALIGN), avctx->height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx)) < 0 || avctx->pix_fmt<0) {
             av_log(avctx, AV_LOG_ERROR, "video_get_buffer: image parameters invalid\n");
             return AVERROR(EINVAL);
         }
@@ -1653,8 +1897,14 @@
         avctx->sw_pix_fmt = avctx->pix_fmt;
 
     ret = avctx->get_buffer2(avctx, frame, flags);
-    if (ret >= 0)
-        validate_avframe_allocation(avctx, frame);
+    if (ret < 0)
+        goto end;
+
+    validate_avframe_allocation(avctx, frame);
+
+    ret = ff_attach_decode_data(frame);
+    if (ret < 0)
+        goto end;
 
 end:
     if (avctx->codec_type == AVMEDIA_TYPE_VIDEO && !override_dimensions &&
@@ -1663,6 +1913,9 @@
         frame->height = avctx->height;
     }
 
+    if (ret < 0)
+        av_frame_unref(frame);
+
     return ret;
 }
 
@@ -1689,8 +1942,6 @@
         av_frame_unref(frame);
     }
 
-    ff_init_buffer_info(avctx, frame);
-
     if (!frame->data[0])
         return ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF);
 
@@ -1723,6 +1974,14 @@
     return ret;
 }
 
+static void bsfs_flush(AVCodecContext *avctx)
+{
+    DecodeFilterContext *s = &avctx->internal->filter;
+
+    for (int i = 0; i < s->nb_bsfs; i++)
+        av_bsf_flush(s->bsfs[i]);
+}
+
 void avcodec_flush_buffers(AVCodecContext *avctx)
 {
     avctx->internal->draining      = 0;
@@ -1743,7 +2002,7 @@
     avctx->pts_correction_last_pts =
     avctx->pts_correction_last_dts = INT64_MIN;
 
-    ff_decode_bsfs_uninit(avctx);
+    bsfs_flush(avctx);
 
     if (!avctx->refcounted_frames)
         av_frame_unref(avctx->internal->to_free);

diff --git a/libavcodec/decode.h b/libavcodec/decode.h
index c963022..c3e0e82 100644
--- a/libavcodec/decode.h
+++ b/libavcodec/decode.h

@@ -21,9 +21,39 @@
 #ifndef AVCODEC_DECODE_H
 #define AVCODEC_DECODE_H
 
+#include "libavutil/buffer.h"
+#include "libavutil/frame.h"
+#include "libavutil/hwcontext.h"
+
 #include "avcodec.h"
 
 /**
+ * This struct stores per-frame lavc-internal data and is attached to it via
+ * private_ref.
+ */
+typedef struct FrameDecodeData {
+    /**
+     * The callback to perform some delayed processing on the frame right
+     * before it is returned to the caller.
+     *
+     * @note This code is called at some unspecified point after the frame is
+     * returned from the decoder's decode/receive_frame call. Therefore it cannot rely
+     * on AVCodecContext being in any specific state, so it does not get to
+     * access AVCodecContext directly at all. All the state it needs must be
+     * stored in the post_process_opaque object.
+     */
+    int (*post_process)(void *logctx, AVFrame *frame);
+    void *post_process_opaque;
+    void (*post_process_opaque_free)(void *opaque);
+
+    /**
+     * Per-frame private data for hwaccels.
+     */
+    void *hwaccel_priv;
+    void (*hwaccel_priv_free)(void *priv);
+} FrameDecodeData;
+
+/**
  * Called by decoders to get the next packet for decoding.
  *
  * @param pkt An empty packet to be filled with data.
@@ -34,6 +64,18 @@
  */
 int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt);
 
+int ff_decode_bsfs_init(AVCodecContext *avctx);
+
 void ff_decode_bsfs_uninit(AVCodecContext *avctx);
 
+/**
+ * Make sure avctx.hw_frames_ctx is set. If it's not set, the function will
+ * try to allocate it from hw_device_ctx. If that is not possible, an error
+ * message is printed, and an error code is returned.
+ */
+int ff_decode_get_hw_frames_ctx(AVCodecContext *avctx,
+                                enum AVHWDeviceType dev_type);
+
+int ff_attach_decode_data(AVFrame *frame);
+
 #endif /* AVCODEC_DECODE_H */

diff --git a/libavcodec/dfa.c b/libavcodec/dfa.c
index 43dba2c..970175f 100644
--- a/libavcodec/dfa.c
+++ b/libavcodec/dfa.c

@@ -41,7 +41,7 @@
 
     avctx->pix_fmt = AV_PIX_FMT_PAL8;
 
-    if (!avctx->width || !avctx->height)
+    if (!avctx->width || !avctx->height || FFMAX(avctx->width, avctx->height) >= (1<<16))
         return AVERROR_INVALIDDATA;
 
     av_assert0(av_image_check_size(avctx->width, avctx->height, 0, avctx) >= 0);
@@ -149,6 +149,8 @@
     int mask = 0x10000, bitbuf = 0;
     int i, v, offset, count, segments;
 
+    if ((width | height) & 1)
+        return AVERROR_INVALIDDATA;
     segments = bytestream2_get_le16(gb);
     while (segments--) {
         if (bytestream2_get_bytes_left(gb) < 2)
@@ -176,7 +178,7 @@
                 return AVERROR_INVALIDDATA;
             frame += v;
         } else {
-            if (frame_end - frame < width + 4)
+            if (width < 4 || frame_end - frame < width + 4)
                 return AVERROR_INVALIDDATA;
             frame[0] = frame[1] =
             frame[width] = frame[width + 1] =  bytestream2_get_byte(gb);

diff --git a/libavcodec/dirac.c b/libavcodec/dirac.c
index 027ce79..d5870d6 100644
--- a/libavcodec/dirac.c
+++ b/libavcodec/dirac.c

@@ -147,6 +147,7 @@
     unsigned luma_depth = 8, luma_offset = 16;
     int idx;
     int chroma_x_shift, chroma_y_shift;
+    int ret;
 
     /* [DIRAC_STD] 10.3.2 Frame size. frame_size(video_params) */
     /* [DIRAC_STD] custom_dimensions_flag */
@@ -269,7 +270,10 @@
         return AVERROR_INVALIDDATA;
 
     dsh->pix_fmt = dirac_pix_fmt[dsh->chroma_format][dsh->pixel_range_index-2];
-    avcodec_get_chroma_sub_sample(dsh->pix_fmt, &chroma_x_shift, &chroma_y_shift);
+    ret = av_pix_fmt_get_chroma_sub_sample(dsh->pix_fmt, &chroma_x_shift, &chroma_y_shift);
+    if (ret)
+        return ret;
+
     if ((dsh->width % (1<<chroma_x_shift)) || (dsh->height % (1<<chroma_y_shift))) {
         if (log_ctx)
             av_log(log_ctx, AV_LOG_ERROR, "Dimensions must be an integer multiple of the chroma subsampling\n");

diff --git a/libavcodec/dirac_dwt.h b/libavcodec/dirac_dwt.h
index 68ebd19..994dc21 100644
--- a/libavcodec/dirac_dwt.h
+++ b/libavcodec/dirac_dwt.h

@@ -99,10 +99,10 @@
     (b1 + (unsigned)((int)(b0 + (unsigned)(b2) + 1) >> 1))
 
 #define COMPOSE_DD97iH0(b0, b1, b2, b3, b4)\
-    (int)(((unsigned)(b2) + ((int)(-b0 + 9U*b1 + 9U*b3 - b4 + 8) >> 4)))
+    (int)(((unsigned)(b2) + ((int)(9U*b1 + 9U*b3 - b4 - b0 +  8) >> 4)))
 
 #define COMPOSE_DD137iL0(b0, b1, b2, b3, b4)\
-    (int)(((unsigned)(b2) - ((int)(-b0 + 9U*b1 + 9U*b3 - b4 + 16) >> 5)))
+    (int)(((unsigned)(b2) - ((int)(9U*b1 + 9U*b3 - b4 - b0 + 16) >> 5)))
 
 #define COMPOSE_HAARiL0(b0, b1)\
     ((int)(b0 - (unsigned)((int)(b1 + 1U) >> 1)))

diff --git a/libavcodec/dirac_dwt_template.c b/libavcodec/dirac_dwt_template.c
index e68cc4d..5d55d93 100644
--- a/libavcodec/dirac_dwt_template.c
+++ b/libavcodec/dirac_dwt_template.c

@@ -57,8 +57,8 @@
 {
     int i;
     for (i = 0; i < w2; i++) {
-        dst[2*i  ] = (src0[i] + add) >> shift;
-        dst[2*i+1] = (src1[i] + add) >> shift;
+        dst[2*i  ] = ((int)(src0[i] + (unsigned)add)) >> shift;
+        dst[2*i+1] = ((int)(src1[i] + (unsigned)add)) >> shift;
     }
 }
 
@@ -95,8 +95,8 @@
     tmp[w2+1] = tmp[w2] = tmp[w2-1];
 
     for (x = 0; x < w2; x++) {
-        b[2*x  ] = (tmp[x] + 1)>>1;
-        b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
+        b[2*x  ] = ((int)(tmp[x] + 1U))>>1;
+        b[2*x+1] = ((int)(COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1U))>>1;
     }
 }
 
@@ -118,8 +118,8 @@
     tmp[w2+1] = tmp[w2] = tmp[w2-1];
 
     for (x = 0; x < w2; x++) {
-        b[2*x  ] = (tmp[x] + 1)>>1;
-        b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
+        b[2*x  ] = ((int)(tmp[x] + 1U))>>1;
+        b[2*x+1] = ((int)(COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1U))>>1;
     }
 }
 
@@ -190,15 +190,15 @@
 
     // second stage combined with interleave and shift
     b0 = b2 = COMPOSE_DAUB97iL0(temp[w2], temp[0], temp[w2]);
-    b[0] = (b0 + 1) >> 1;
+    b[0] = ~((~b0) >> 1);
     for (x = 1; x < w2; x++) {
         b2 = COMPOSE_DAUB97iL0(temp[x+w2-1], temp[x     ], temp[x+w2]);
         b1 = COMPOSE_DAUB97iH0(          b0, temp[x+w2-1], b2        );
-        b[2*x-1] = (b1 + 1) >> 1;
-        b[2*x  ] = (b2 + 1) >> 1;
+        b[2*x-1] = ~((~b1) >> 1);
+        b[2*x  ] = ~((~b2) >> 1);
         b0 = b2;
     }
-    b[w-1] = (COMPOSE_DAUB97iH0(b2, temp[w-1], b2) + 1) >> 1;
+    b[w-1] = ~((~COMPOSE_DAUB97iH0(b2, temp[w-1], b2)) >> 1);
 }
 
 static void RENAME(vertical_compose_dirac53iH0)(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2,

diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c
index 7be7f33..af561d1 100644
--- a/libavcodec/diracdec.c
+++ b/libavcodec/diracdec.c

@@ -26,6 +26,7 @@
  * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
  */
 
+#include "libavutil/pixdesc.h"
 #include "libavutil/thread.h"
 #include "avcodec.h"
 #include "get_bits.h"
@@ -140,7 +141,7 @@
     GetBitContext gb;
     AVDiracSeqHeader seq;
     int seen_sequence_header;
-    int frame_number;           /* number of the next frame to display       */
+    int64_t frame_number;       /* number of the next frame to display       */
     Plane plane[3];
     int chroma_x_shift;
     int chroma_y_shift;
@@ -487,7 +488,7 @@
  * Decode the coeffs in the rectangle defined by left, right, top, bottom
  * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
  */
-static inline void codeblock(DiracContext *s, SubBand *b,
+static inline int codeblock(DiracContext *s, SubBand *b,
                              GetBitContext *gb, DiracArith *c,
                              int left, int right, int top, int bottom,
                              int blockcnt_one, int is_arith)
@@ -504,7 +505,7 @@
             zero_block = get_bits1(gb);
 
         if (zero_block)
-            return;
+            return 0;
     }
 
     if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
@@ -515,7 +516,7 @@
             quant = dirac_get_se_golomb(gb);
         if (quant > INT_MAX - b->quant || b->quant + quant < 0) {
             av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
-            return;
+            return AVERROR_INVALIDDATA;
         }
         b->quant += quant;
     }
@@ -523,7 +524,7 @@
     if (b->quant > (DIRAC_MAX_QUANT_INDEX - 1)) {
         av_log(s->avctx, AV_LOG_ERROR, "Unsupported quant %d\n", b->quant);
         b->quant = 0;
-        return;
+        return AVERROR_INVALIDDATA;
     }
 
     qfactor = ff_dirac_qscale_tab[b->quant];
@@ -547,6 +548,8 @@
         }
     } else {
         for (y = top; y < bottom; y++) {
+            if (get_bits_left(gb) < 1)
+                return AVERROR_INVALIDDATA;
             for (x = left; x < right; x++) {
                 int val = coeff_unpack_golomb(gb, qfactor, qoffset);
                 if (b->pshift) {
@@ -558,6 +561,7 @@
             buf += b->stride;
          }
      }
+     return 0;
 }
 
 /**
@@ -592,7 +596,7 @@
  * Dirac Specification ->
  * 13.4.2 Non-skipped subbands.  subband_coeffs()
  */
-static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
+static av_always_inline int decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
 {
     int cb_x, cb_y, left, right, top, bottom;
     DiracArith c;
@@ -600,9 +604,10 @@
     int cb_width  = s->codeblock[b->level + (b->orientation != subband_ll)].width;
     int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
     int blockcnt_one = (cb_width + cb_height) == 2;
+    int ret;
 
     if (!b->length)
-        return;
+        return 0;
 
     init_get_bits8(&gb, b->coeff_data, b->length);
 
@@ -615,7 +620,9 @@
         left = 0;
         for (cb_x = 0; cb_x < cb_width; cb_x++) {
             right = (b->width * (cb_x+1LL)) / cb_width;
-            codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
+            ret = codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
+            if (ret < 0)
+                return ret;
             left = right;
         }
         top = bottom;
@@ -628,33 +635,35 @@
             intra_dc_prediction_8(b);
         }
     }
+    return 0;
 }
 
 static int decode_subband_arith(AVCodecContext *avctx, void *b)
 {
     DiracContext *s = avctx->priv_data;
-    decode_subband_internal(s, b, 1);
-    return 0;
+    return decode_subband_internal(s, b, 1);
 }
 
 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
 {
     DiracContext *s = avctx->priv_data;
     SubBand **b     = arg;
-    decode_subband_internal(s, *b, 0);
-    return 0;
+    return decode_subband_internal(s, *b, 0);
 }
 
 /**
  * Dirac Specification ->
  * [DIRAC_STD] 13.4.1 core_transform_data()
  */
-static void decode_component(DiracContext *s, int comp)
+static int decode_component(DiracContext *s, int comp)
 {
     AVCodecContext *avctx = s->avctx;
     SubBand *bands[3*MAX_DWT_LEVELS+1];
     enum dirac_subband orientation;
     int level, num_bands = 0;
+    int ret[3*MAX_DWT_LEVELS+1];
+    int i;
+    int damaged_count = 0;
 
     /* Unpack all subbands at all levels. */
     for (level = 0; level < s->wavelet_depth; level++) {
@@ -676,11 +685,20 @@
         /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
         if (s->is_arith)
             avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
-                           NULL, 4-!!level, sizeof(SubBand));
+                           ret + 3*level + !!level, 4-!!level, sizeof(SubBand));
     }
     /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
     if (!s->is_arith)
-        avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
+        avctx->execute(avctx, decode_subband_golomb, bands, ret, num_bands, sizeof(SubBand*));
+
+    for (i = 0; i < s->wavelet_depth * 3 + 1; i++) {
+        if (ret[i] < 0)
+            damaged_count++;
+    }
+    if (damaged_count > (s->wavelet_depth * 3 + 1) /2)
+        return AVERROR_INVALIDDATA;
+
+    return 0;
 }
 
 #define PARSE_VALUES(type, x, gb, ebits, buf1, buf2) \
@@ -985,6 +1003,10 @@
             for (slice_x = 0; bufsize > 0 && slice_x < s->num_x; slice_x++) {
                 bytes = (slice_num+1) * (int64_t)s->lowdelay.bytes.num / s->lowdelay.bytes.den
                        - slice_num    * (int64_t)s->lowdelay.bytes.num / s->lowdelay.bytes.den;
+                if (bytes >= INT_MAX || bytes*8 > bufsize) {
+                    av_log(s->avctx, AV_LOG_ERROR, "too many bytes\n");
+                    return AVERROR_INVALIDDATA;
+                }
                 slices[slice_num].bytes   = bytes;
                 slices[slice_num].slice_x = slice_x;
                 slices[slice_num].slice_y = slice_y;
@@ -1242,7 +1264,10 @@
     else {
         s->num_x        = get_interleaved_ue_golomb(gb);
         s->num_y        = get_interleaved_ue_golomb(gb);
-        if (s->num_x * s->num_y == 0 || s->num_x * (uint64_t)s->num_y > INT_MAX) {
+        if (s->num_x * s->num_y == 0 || s->num_x * (uint64_t)s->num_y > INT_MAX ||
+            s->num_x * (uint64_t)s->avctx->width  > INT_MAX ||
+            s->num_y * (uint64_t)s->avctx->height > INT_MAX
+        ) {
             av_log(s->avctx,AV_LOG_ERROR,"Invalid numx/y\n");
             s->num_x = s->num_y = 0;
             return AVERROR_INVALIDDATA;
@@ -1398,8 +1423,8 @@
     int *c      = s->globalmc[ref].perspective;
 
     int m       = (1<<ep) - (c[0]*x + c[1]*y);
-    int mx      = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
-    int my      = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
+    int64_t mx  = m * (int64_t)((A[0][0] * (int64_t)x + A[0][1]*(int64_t)y) + (1<<ez) * b[0]);
+    int64_t my  = m * (int64_t)((A[1][0] * (int64_t)x + A[1][1]*(int64_t)y) + (1<<ez) * b[1]);
 
     block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
     block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
@@ -1436,8 +1461,8 @@
                 global_mv(s, block, x, y, i);
             } else {
                 pred_mv(block, stride, x, y, i);
-                block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
-                block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
+                block->u.mv[i][0] += (unsigned)dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
+                block->u.mv[i][1] += (unsigned)dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
             }
         }
 }
@@ -1859,7 +1884,9 @@
         if (!s->zero_res && !s->low_delay)
         {
             memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height);
-            decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
+            ret = decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
+            if (ret < 0)
+                return ret;
         }
         ret = ff_spatial_idwt_init(&d, &p->idwt, s->wavelet_idx+2,
                                    s->wavelet_depth, s->bit_depth);
@@ -1927,7 +1954,10 @@
 {
     int ret, i;
     int chroma_x_shift, chroma_y_shift;
-    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
+    ret = av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift,
+                                           &chroma_y_shift);
+    if (ret < 0)
+        return ret;
 
     f->width  = avctx->width  + 2 * EDGE_WIDTH;
     f->height = avctx->height + 2 * EDGE_WIDTH + 2;
@@ -2126,7 +2156,11 @@
 
         s->pshift = s->bit_depth > 8;
 
-        avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
+        ret = av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt,
+                                               &s->chroma_x_shift,
+                                               &s->chroma_y_shift);
+        if (ret < 0)
+            return ret;
 
         ret = alloc_sequence_buffers(s);
         if (ret < 0)
@@ -2302,7 +2336,7 @@
     }
 
     if (*got_frame)
-        s->frame_number = picture->display_picture_number + 1;
+        s->frame_number = picture->display_picture_number + 1LL;
 
     return buf_idx;
 }

diff --git a/libavcodec/dnxhd_parser.c b/libavcodec/dnxhd_parser.c
index 79ca1d6..7c16e25 100644
--- a/libavcodec/dnxhd_parser.c
+++ b/libavcodec/dnxhd_parser.c

@@ -75,7 +75,7 @@
 
                 remaining = avpriv_dnxhd_get_frame_size(cid);
                 if (remaining <= 0) {
-                    remaining = ff_dnxhd_get_hr_frame_size(cid, dctx->w, dctx->h);
+                    remaining = avpriv_dnxhd_get_hr_frame_size(cid, dctx->w, dctx->h);
                     if (remaining <= 0)
                         continue;
                 }

diff --git a/libavcodec/dnxhddata.c b/libavcodec/dnxhddata.c
index 4462df3..154be89 100644
--- a/libavcodec/dnxhddata.c
+++ b/libavcodec/dnxhddata.c

@@ -939,96 +939,84 @@
       dnxhd_1235_dc_codes, dnxhd_1235_dc_bits,
       dnxhd_1235_ac_codes, dnxhd_1235_ac_bits, dnxhd_1235_ac_info,
       dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1235_run,
-      { 175, 185, 365, 440 },
-      { { 24000, 1001 }, { 25, 1 }, { 50, 1 }, { 60000, 1001 } } },
+      { 175, 185, 365, 440 } },
     { 1237, 1920, 1080, 606208, 606208,
       0, 4, 8, 3,
       dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
       dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
       dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
-      { 115, 120, 145, 240, 290 },
-      { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
+      { 115, 120, 145, 240, 290 } },
     { 1238, 1920, 1080, 917504, 917504,
       0, 4, 8, 4,
       dnxhd_1238_luma_weight, dnxhd_1238_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
       dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_info,
       dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1238_run,
-      { 175, 185, 220, 365, 440 },
-      { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
+      { 175, 185, 220, 365, 440 } },
     { 1241, 1920, 1080, 917504, 458752,
       DNXHD_INTERLACED, 6, 10, 4,
       dnxhd_1241_luma_weight, dnxhd_1241_chroma_weight,
       dnxhd_1235_dc_codes, dnxhd_1235_dc_bits,
       dnxhd_1235_ac_codes, dnxhd_1235_ac_bits, dnxhd_1235_ac_info,
       dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1235_run,
-      { 185, 220 },
-      { { 25, 1 }, { 30000, 1001 } } },
+      { 185, 220 } },
     { 1242, 1920, 1080, 606208, 303104,
       DNXHD_INTERLACED, 4, 8, 3,
       dnxhd_1242_luma_weight, dnxhd_1242_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
       dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
       dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
-      { 120, 145 },
-      { { 25, 1 }, { 30000, 1001 } } },
+      { 120, 145 } },
     { 1243, 1920, 1080, 917504, 458752,
       DNXHD_INTERLACED, 4, 8, 4,
       dnxhd_1243_luma_weight, dnxhd_1243_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
       dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_info,
       dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1238_run,
-      { 185, 220 },
-      { { 25, 1 }, { 30000, 1001 } } },
+      { 185, 220 } },
     { 1244, 1440, 1080, 606208, 303104,
       DNXHD_INTERLACED, 4, 8, 3,
       dnxhd_1260_luma_weight, dnxhd_1260_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
       dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
       dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
-      { 120, 145 },
-      { { 25, 1 }, { 30000, 1001 } } },
+      { 120, 145 } },
     { 1250, 1280,  720, 458752, 458752,
       0, 6, 10, 4,
       dnxhd_1250_luma_weight, dnxhd_1250_chroma_weight,
       dnxhd_1235_dc_codes, dnxhd_1235_dc_bits,
       dnxhd_1250_ac_codes, dnxhd_1250_ac_bits, dnxhd_1250_ac_info,
       dnxhd_1250_run_codes, dnxhd_1250_run_bits, dnxhd_1250_run,
-      { 90, 90, 180, 220 },
-      { { 24000, 1001 }, { 25, 1 }, { 50, 1 }, { 60000, 1001 } } },
+      { 90, 180, 220 } },
     { 1251, 1280,  720, 458752, 458752,
       0, 4, 8, 4,
       dnxhd_1251_luma_weight, dnxhd_1251_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
       dnxhd_1251_ac_codes, dnxhd_1251_ac_bits, dnxhd_1251_ac_info,
       dnxhd_1250_run_codes, dnxhd_1250_run_bits, dnxhd_1250_run,
-      { 90, 90, 110, 180, 220 },
-      { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
+      { 90, 110, 180, 220 } },
     { 1252, 1280,  720, 303104, 303104,
       0, 4, 8, 5,
       dnxhd_1252_luma_weight, dnxhd_1252_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
       dnxhd_1252_ac_codes, dnxhd_1252_ac_bits, dnxhd_1252_ac_info,
       dnxhd_1250_run_codes, dnxhd_1250_run_bits, dnxhd_1250_run,
-      { 60, 60, 75, 120, 145 },
-      { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
+      { 60, 75, 120, 145 } },
     { 1253, 1920, 1080, 188416, 188416,
       0, 4, 8, 3,
       dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
       dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
       dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
-      { 36, 36, 45, 75, 90 },
-      { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
+      { 36, 45, 75, 90 } },
     { 1256, 1920, 1080, 1835008, 1835008,
       DNXHD_444, 6, 10, 4,
       dnxhd_1235_luma_weight, dnxhd_1235_luma_weight,
       dnxhd_1235_dc_codes, dnxhd_1235_dc_bits,
       dnxhd_1235_ac_codes, dnxhd_1235_ac_bits, dnxhd_1235_ac_info,
       dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1235_run,
-      { 350, 390, 440, 730, 880 },
-      { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
+      { 350, 390, 440, 730, 880 } },
     { 1258, 960, 720, 212992, 212992,
       0, 4, 8, 5,
       dnxhd_1252_luma_weight, dnxhd_1252_chroma_weight,
@@ -1056,35 +1044,35 @@
       dnxhd_1235_dc_codes, dnxhd_1235_dc_bits,
       dnxhd_1235_ac_codes, dnxhd_1235_ac_bits, dnxhd_1235_ac_info,
       dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1235_run,
-      { 0 }, { { 0 } }, { 57344, 255} },
+      { 0 }, { 57344, 255} },
     { 1271, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE,
       0, 6, DNXHD_VARIABLE, 4,
       dnxhd_1241_luma_weight, dnxhd_1241_chroma_weight,
       dnxhd_1235_dc_codes, dnxhd_1235_dc_bits,
       dnxhd_1235_ac_codes, dnxhd_1235_ac_bits, dnxhd_1235_ac_info,
       dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1235_run,
-      { 0 }, { { 0 } }, { 28672, 255} },
+      { 0 }, { 28672, 255} },
     { 1272, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE,
       0, 4, 8, 4,
       dnxhd_1238_luma_weight, dnxhd_1238_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
       dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_info,
       dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1238_run,
-      { 0 }, { { 0 } }, { 28672, 255} },
+      { 0 }, { 28672, 255} },
     { 1273, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE,
       0, 4, 8, 3,
       dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
       dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
       dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
-      { 0 }, { { 0 } }, { 18944, 255} },
+      { 0 }, { 18944, 255} },
     { 1274, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE,
       0, 4, 8, 3,
       dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
       dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
       dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
-      { 0 }, { { 0 } }, { 5888, 255} },
+      { 0 }, { 5888, 255} },
 };
 
 int ff_dnxhd_get_cid_table(int cid)
@@ -1104,6 +1092,19 @@
     return ff_dnxhd_cid_table[i].frame_size;
 }
 
+int avpriv_dnxhd_get_hr_frame_size(int cid, int w, int h)
+{
+    int result, i = ff_dnxhd_get_cid_table(cid);
+
+    if (i < 0)
+        return i;
+
+    result = ((h + 15) / 16) * ((w + 15) / 16) * (int64_t)ff_dnxhd_cid_table[i].packet_scale.num / ff_dnxhd_cid_table[i].packet_scale.den;
+    result = (result + 2048) / 4096 * 4096;
+
+    return FFMAX(result, 8192);
+}
+
 int avpriv_dnxhd_get_interlaced(int cid)
 {
     int i = ff_dnxhd_get_cid_table(cid);
@@ -1112,13 +1113,6 @@
     return ff_dnxhd_cid_table[i].flags & DNXHD_INTERLACED ? 1 : 0;
 }
 
-#if LIBAVCODEC_VERSION_MAJOR < 58
-uint64_t avpriv_dnxhd_parse_header_prefix(const uint8_t *buf)
-{
-    return ff_dnxhd_parse_header_prefix(buf);
-}
-#endif
-
 static int dnxhd_find_hr_cid(AVCodecContext *avctx)
 {
     switch (avctx->profile) {
@@ -1175,9 +1169,9 @@
             if (!cid->bit_rates[j])
                 break;
 
-            av_log(avctx, loglevel, "Frame size: %dx%d%c; bitrate: %dMbps; pixel format: %s; framerate: %d/%d\n",
+            av_log(avctx, loglevel, "Frame size: %dx%d%c; bitrate: %dMbps; pixel format: %s\n",
                    cid->width, cid->height, cid->flags & DNXHD_INTERLACED ? 'i' : 'p', cid->bit_rates[j],
-                   cid->bit_depth == 10 ? "yuv422p10" : "yuv422p", cid->frame_rates[j].num, cid->frame_rates[j].den);
+                   cid->flags & DNXHD_444 ? "yuv444p10, gbrp10" : cid->bit_depth == 10 ? "yuv422p10" : "yuv422p");
         }
     }
 }

diff --git a/libavcodec/dnxhddata.h b/libavcodec/dnxhddata.h
index c96c5e8..cfa6b0c 100644
--- a/libavcodec/dnxhddata.h
+++ b/libavcodec/dnxhddata.h

@@ -55,7 +55,6 @@
     const uint16_t *run_codes;
     const uint8_t *run_bits, *run;
     int bit_rates[5]; ///< Helper to choose variants, rounded to nearest 5Mb/s
-    AVRational frame_rates[5];
     AVRational packet_scale;
 } CIDEntry;
 
@@ -91,23 +90,8 @@
     return ff_dnxhd_check_header_prefix(prefix);
 }
 
-static av_always_inline int ff_dnxhd_get_hr_frame_size(int cid, int w, int h)
-{
-    int result, i = ff_dnxhd_get_cid_table(cid);
-
-    if (i < 0)
-        return i;
-
-    result = ((h + 15) / 16) * ((w + 15) / 16) * (int64_t)ff_dnxhd_cid_table[i].packet_scale.num / ff_dnxhd_cid_table[i].packet_scale.den;
-    result = (result + 2048) / 4096 * 4096;
-
-    return FFMAX(result, 8192);
-}
-
 int avpriv_dnxhd_get_frame_size(int cid);
+int avpriv_dnxhd_get_hr_frame_size(int cid, int w, int h);
 int avpriv_dnxhd_get_interlaced(int cid);
-#if LIBAVCODEC_VERSION_MAJOR < 58
-attribute_deprecated
-uint64_t avpriv_dnxhd_parse_header_prefix(const uint8_t *buf);
-#endif
+
 #endif /* AVCODEC_DNXHDDATA_H */

diff --git a/libavcodec/dnxhddec.c b/libavcodec/dnxhddec.c
index 1f93f9d..ae8b0ff 100644
--- a/libavcodec/dnxhddec.c
+++ b/libavcodec/dnxhddec.c

@@ -67,6 +67,8 @@
     const CIDEntry *cid_table;
     int bit_depth; // 8, 10, 12 or 0 if not initialized at all.
     int is_444;
+    int alpha;
+    int lla;
     int mbaff;
     int act;
     int (*decode_dct_block)(const struct DNXHDContext *ctx,
@@ -93,7 +95,9 @@
 
     ctx->avctx = avctx;
     ctx->cid = -1;
-    avctx->colorspace = AVCOL_SPC_BT709;
+    if (avctx->colorspace == AVCOL_SPC_UNSPECIFIED) {
+        avctx->colorspace = AVCOL_SPC_BT709;
+    }
 
     avctx->coded_width  = FFALIGN(avctx->width,  16);
     avctx->coded_height = FFALIGN(avctx->height, 16);
@@ -203,6 +207,10 @@
         ctx->cur_field = 0;
     }
     ctx->mbaff = (buf[0x6] >> 5) & 1;
+    ctx->alpha = buf[0x7] & 1;
+    ctx->lla   = (buf[0x7] >> 1) & 1;
+    if (ctx->alpha)
+        avpriv_request_sample(ctx->avctx, "alpha");
 
     ctx->height = AV_RB16(buf + 0x18);
     ctx->width  = AV_RB16(buf + 0x1a);

diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c
index 0d80381..41b8079 100644
--- a/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c

@@ -207,17 +207,17 @@
     int i, j, level, run;
     int max_level = 1 << (ctx->bit_depth + 2);
 
-    FF_ALLOCZ_ARRAY_OR_GOTO(ctx->m.avctx, ctx->vlc_codes,
-                      max_level, 4 * sizeof(*ctx->vlc_codes), fail);
-    FF_ALLOCZ_ARRAY_OR_GOTO(ctx->m.avctx, ctx->vlc_bits,
-                      max_level, 4 * sizeof(*ctx->vlc_bits), fail);
+    FF_ALLOCZ_ARRAY_OR_GOTO(ctx->m.avctx, ctx->orig_vlc_codes,
+                      max_level, 4 * sizeof(*ctx->orig_vlc_codes), fail);
+    FF_ALLOCZ_ARRAY_OR_GOTO(ctx->m.avctx, ctx->orig_vlc_bits,
+                      max_level, 4 * sizeof(*ctx->orig_vlc_bits), fail);
     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_codes,
                       63 * 2, fail);
     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_bits,
                       63, fail);
 
-    ctx->vlc_codes += max_level * 2;
-    ctx->vlc_bits  += max_level * 2;
+    ctx->vlc_codes = ctx->orig_vlc_codes + max_level * 2;
+    ctx->vlc_bits  = ctx->orig_vlc_bits + max_level * 2;
     for (level = -max_level; level < max_level; level++) {
         for (run = 0; run < 2; run++) {
             int index = (level << 1) | run;
@@ -473,10 +473,16 @@
         ctx->m.mb_height /= 2;
     }
 
+    if (ctx->interlaced && ctx->profile != FF_PROFILE_DNXHD) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Interlaced encoding is not supported for DNxHR profiles.\n");
+        return AVERROR(EINVAL);
+    }
+
     ctx->m.mb_num = ctx->m.mb_height * ctx->m.mb_width;
 
     if (ctx->cid_table->frame_size == DNXHD_VARIABLE) {
-        ctx->frame_size = ff_dnxhd_get_hr_frame_size(ctx->cid,
+        ctx->frame_size = avpriv_dnxhd_get_hr_frame_size(ctx->cid,
                                                      avctx->width, avctx->height);
         av_assert0(ctx->frame_size >= 0);
         ctx->coding_unit_size = ctx->frame_size;
@@ -490,12 +496,6 @@
     else
         ctx->data_offset = 0x280;
 
-#if FF_API_QUANT_BIAS
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
-        ctx->intra_quant_bias = avctx->intra_quant_bias;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
     // XXX tune lbias/cbias
     if ((ret = dnxhd_init_qmat(ctx, ctx->intra_quant_bias, 0)) < 0)
         return ret;
@@ -1354,11 +1354,10 @@
 static av_cold int dnxhd_encode_end(AVCodecContext *avctx)
 {
     DNXHDEncContext *ctx = avctx->priv_data;
-    int max_level        = 1 << (ctx->bit_depth + 2);
     int i;
 
-    av_free(ctx->vlc_codes - max_level * 2);
-    av_free(ctx->vlc_bits - max_level * 2);
+    av_freep(&ctx->orig_vlc_codes);
+    av_freep(&ctx->orig_vlc_bits);
     av_freep(&ctx->run_codes);
     av_freep(&ctx->run_bits);
 
@@ -1375,8 +1374,10 @@
     av_freep(&ctx->qmatrix_c16);
     av_freep(&ctx->qmatrix_l16);
 
-    for (i = 1; i < avctx->thread_count; i++)
-        av_freep(&ctx->thread[i]);
+    if (avctx->active_thread_type == FF_THREAD_SLICE) {
+        for (i = 1; i < avctx->thread_count; i++)
+            av_freep(&ctx->thread[i]);
+    }
 
     return 0;
 }
@@ -1396,6 +1397,7 @@
     .encode2        = dnxhd_encode_picture,
     .close          = dnxhd_encode_end,
     .capabilities   = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV422P,
         AV_PIX_FMT_YUV422P10,

diff --git a/libavcodec/dnxhdenc.h b/libavcodec/dnxhdenc.h
index 963821a..7b0d862 100644
--- a/libavcodec/dnxhdenc.h
+++ b/libavcodec/dnxhdenc.h

@@ -86,6 +86,8 @@
     unsigned frame_bits;
     uint8_t *src[3];
 
+    uint32_t *orig_vlc_codes;
+    uint8_t  *orig_vlc_bits;
     uint32_t *vlc_codes;
     uint8_t  *vlc_bits;
     uint16_t *run_codes;

diff --git a/libavcodec/dolby_e.c b/libavcodec/dolby_e.c
index 91a00ce..429612e 100644
--- a/libavcodec/dolby_e.c
+++ b/libavcodec/dolby_e.c

@@ -681,6 +681,93 @@
     return 0;
 }
 
+
+static av_cold void init_tables(void)
+{
+    int i, j;
+
+    for (i = 1; i < 17; i++)
+        mantissa_tab1[i][0] = 1.0f / (1 << i - 1);
+
+    for (i = 2; i < 16; i++) {
+        mantissa_tab1[i][1] = 1.0f  / ((1 << i) - 1);
+        mantissa_tab1[i][2] = 0.5f  / ((1 << i) - 1);
+        mantissa_tab1[i][3] = 0.25f / ((1 << i) - 1);
+    }
+
+    mantissa_tab1[i][1] = 0.5f   / (1 << 15);
+    mantissa_tab1[i][2] = 0.75f  / (1 << 15);
+    mantissa_tab1[i][3] = 0.875f / (1 << 15);
+
+    for (i = 1; i < 17; i++) {
+        mantissa_tab2[i][1] = mantissa_tab1[i][0] * 0.5f;
+        mantissa_tab2[i][2] = mantissa_tab1[i][0] * 0.75f;
+        mantissa_tab2[i][3] = mantissa_tab1[i][0] * 0.875f;
+        for (j = 1; j < 4; j++)
+            mantissa_tab3[i][j] = 1.0f / (1 << i) + 1.0f / (1 << j) - 1.0f / (1 << i + j);
+    }
+
+    mantissa_tab3[1][3] = 0.6875f;
+
+    for (i = 0; i < 25; i++) {
+        exponent_tab[i * 2    ] = 1.0f      / (1 << i);
+        exponent_tab[i * 2 + 1] = M_SQRT1_2 / (1 << i);
+    }
+
+    for (i = 1; i < 1024; i++)
+        gain_tab[i] = exp2f((i - 960) / 64.0f);
+
+    // short 1
+    ff_kbd_window_init(window, 3.0f, 128);
+    for (i = 0; i < 128; i++)
+        window[128 + i] = window[127 - i];
+
+    // start
+    for (i = 0; i < 192; i++)
+        window[256 + i] = start_window[i];
+
+    // short 2
+    for (i = 0; i < 192; i++)
+        window[448 + i] = short_window2[i];
+    for (i = 0; i < 64; i++)
+        window[640 + i] = window[63 - i];
+
+    // short 3
+    for (i = 0; i < 64; i++)
+        window[704 + i] = short_window3[i];
+    for (i = 0; i < 192; i++)
+        window[768 + i] = window[64 + i];
+
+    // bridge
+    for (i = 0; i < 128; i++)
+        window[960 + i] = window[i];
+    for (i = 0; i < 64; i++)
+        window[1088 + i] = 1.0f;
+
+    // long
+    ff_kbd_window_init(window + 1408, 3.0f, 256);
+    for (i = 0; i < 640; i++)
+        window[1664 + i] = 1.0f;
+    for (i = 0; i < 256; i++)
+        window[2304 + i] = window[1152 + i] = window[1663 - i];
+
+    // reverse start
+    for (i = 0; i < 192; i++)
+        window[2560 + i] = window[447 - i];
+
+    // reverse short 2
+    for (i = 0; i < 256; i++)
+        window[2752 + i] = window[703 - i];
+
+    // reverse short 3
+    for (i = 0; i < 256; i++)
+        window[3008 + i] = window[959 - i];
+
+    // reverse bridge
+    for (i = 0; i < 448; i++)
+        window[3264 + i] = window[1407 - i];
+}
+
 static av_cold int dolby_e_init(AVCodecContext *avctx)
 {
     static AVOnce init_once = AV_ONCE_INIT;

diff --git a/libavcodec/dolby_e.h b/libavcodec/dolby_e.h
index 0390233..ae04bf6 100644
--- a/libavcodec/dolby_e.h
+++ b/libavcodec/dolby_e.h

@@ -644,90 +644,4 @@
 
 DECLARE_ALIGNED(32, static float, window)[3712];
 
-static av_cold void init_tables(void)
-{
-    int i, j;
-
-    for (i = 1; i < 17; i++)
-        mantissa_tab1[i][0] = 1.0f / (1 << i - 1);
-
-    for (i = 2; i < 16; i++) {
-        mantissa_tab1[i][1] = 1.0f  / ((1 << i) - 1);
-        mantissa_tab1[i][2] = 0.5f  / ((1 << i) - 1);
-        mantissa_tab1[i][3] = 0.25f / ((1 << i) - 1);
-    }
-
-    mantissa_tab1[i][1] = 0.5f   / (1 << 15);
-    mantissa_tab1[i][2] = 0.75f  / (1 << 15);
-    mantissa_tab1[i][3] = 0.875f / (1 << 15);
-
-    for (i = 1; i < 17; i++) {
-        mantissa_tab2[i][1] = mantissa_tab1[i][0] * 0.5f;
-        mantissa_tab2[i][2] = mantissa_tab1[i][0] * 0.75f;
-        mantissa_tab2[i][3] = mantissa_tab1[i][0] * 0.875f;
-        for (j = 1; j < 4; j++)
-            mantissa_tab3[i][j] = 1.0f / (1 << i) + 1.0f / (1 << j) - 1.0f / (1 << i + j);
-    }
-
-    mantissa_tab3[1][3] = 0.6875f;
-
-    for (i = 0; i < 25; i++) {
-        exponent_tab[i * 2    ] = 1.0f      / (1 << i);
-        exponent_tab[i * 2 + 1] = M_SQRT1_2 / (1 << i);
-    }
-
-    for (i = 1; i < 1024; i++)
-        gain_tab[i] = exp2f((i - 960) / 64.0f);
-
-    // short 1
-    ff_kbd_window_init(window, 3.0f, 128);
-    for (i = 0; i < 128; i++)
-        window[128 + i] = window[127 - i];
-
-    // start
-    for (i = 0; i < 192; i++)
-        window[256 + i] = start_window[i];
-
-    // short 2
-    for (i = 0; i < 192; i++)
-        window[448 + i] = short_window2[i];
-    for (i = 0; i < 64; i++)
-        window[640 + i] = window[63 - i];
-
-    // short 3
-    for (i = 0; i < 64; i++)
-        window[704 + i] = short_window3[i];
-    for (i = 0; i < 192; i++)
-        window[768 + i] = window[64 + i];
-
-    // bridge
-    for (i = 0; i < 128; i++)
-        window[960 + i] = window[i];
-    for (i = 0; i < 64; i++)
-        window[1088 + i] = 1.0f;
-
-    // long
-    ff_kbd_window_init(window + 1408, 3.0f, 256);
-    for (i = 0; i < 640; i++)
-        window[1664 + i] = 1.0f;
-    for (i = 0; i < 256; i++)
-        window[2304 + i] = window[1152 + i] = window[1663 - i];
-
-    // reverse start
-    for (i = 0; i < 192; i++)
-        window[2560 + i] = window[447 - i];
-
-    // reverse short 2
-    for (i = 0; i < 256; i++)
-        window[2752 + i] = window[703 - i];
-
-    // reverse short 3
-    for (i = 0; i < 256; i++)
-        window[3008 + i] = window[959 - i];
-
-    // reverse bridge
-    for (i = 0; i < 448; i++)
-        window[3264 + i] = window[1407 - i];
-}
-
 #endif

diff --git a/libavcodec/dpx.c b/libavcodec/dpx.c
index 1aa2cbd..cf23bb6 100644
--- a/libavcodec/dpx.c
+++ b/libavcodec/dpx.c

@@ -51,7 +51,7 @@
 }
 
 static uint16_t read10in32(const uint8_t **ptr, uint32_t * lbuf,
-                                  int * n_datum, int is_big)
+                                  int * n_datum, int is_big, int shift)
 {
     if (*n_datum)
         (*n_datum)--;
@@ -60,11 +60,43 @@
         *n_datum = 2;
     }
 
-    *lbuf = (*lbuf << 10) | (*lbuf >> 22);
+    *lbuf = *lbuf << 10 | *lbuf >> shift & 0x3FFFFF;
 
     return *lbuf & 0x3FF;
 }
 
+static uint16_t read12in32(const uint8_t **ptr, uint32_t * lbuf,
+                                  int * n_datum, int is_big)
+{
+    if (*n_datum)
+        (*n_datum)--;
+    else {
+        *lbuf = read32(ptr, is_big);
+        *n_datum = 7;
+    }
+
+    switch (*n_datum){
+    case 7: return *lbuf & 0xFFF;
+    case 6: return (*lbuf >> 12) & 0xFFF;
+    case 5: {
+            uint32_t c = *lbuf >> 24;
+            *lbuf = read32(ptr, is_big);
+            c |= *lbuf << 8;
+            return c & 0xFFF;
+            }
+    case 4: return (*lbuf >> 4) & 0xFFF;
+    case 3: return (*lbuf >> 16) & 0xFFF;
+    case 2: {
+            uint32_t c = *lbuf >> 28;
+            *lbuf = read32(ptr, is_big);
+            c |= *lbuf << 4;
+            return c & 0xFFF;
+            }
+    case 1: return (*lbuf >> 8) & 0xFFF;
+    default: return *lbuf >> 20;
+    }
+}
+
 static int decode_frame(AVCodecContext *avctx,
                         void *data,
                         int *got_frame,
@@ -138,10 +170,6 @@
     packing = read16(&buf, endian);
     encoding = read16(&buf, endian);
 
-    if (packing > 1) {
-        avpriv_report_missing_feature(avctx, "Packing %d", packing);
-        return AVERROR_PATCHWELCOME;
-    }
     if (encoding) {
         avpriv_report_missing_feature(avctx, "Encoding %d", encoding);
         return AVERROR_PATCHWELCOME;
@@ -200,11 +228,18 @@
         stride = (avctx->width * elements + 2) / 3 * 4;
         break;
     case 12:
-        if (!packing) {
-            av_log(avctx, AV_LOG_ERROR, "Packing to 16bit required\n");
-            return -1;
+        stride = avctx->width * elements;
+        if (packing) {
+            stride *= 2;
+        } else {
+            stride *= 3;
+            if (stride % 8) {
+                stride /= 8;
+                stride++;
+                stride *= 8;
+            }
+            stride /= 2;
         }
-        stride = 2 * avctx->width * elements;
         break;
     case 16:
         stride = 2 * avctx->width * elements;
@@ -325,17 +360,18 @@
                                 (uint16_t*)ptr[1],
                                 (uint16_t*)ptr[2],
                                 (uint16_t*)ptr[3]};
+            int shift = packing == 1 ? 22 : 20;
             for (y = 0; y < avctx->width; y++) {
                 *dst[2]++ = read10in32(&buf, &rgbBuffer,
-                                       &n_datum, endian);
+                                       &n_datum, endian, shift);
                 *dst[0]++ = read10in32(&buf, &rgbBuffer,
-                                       &n_datum, endian);
+                                       &n_datum, endian, shift);
                 *dst[1]++ = read10in32(&buf, &rgbBuffer,
-                                       &n_datum, endian);
+                                       &n_datum, endian, shift);
                 if (elements == 4)
                     *dst[3]++ =
                     read10in32(&buf, &rgbBuffer,
-                               &n_datum, endian);
+                               &n_datum, endian, shift);
             }
             n_datum = 0;
             for (i = 0; i < elements; i++)
@@ -348,16 +384,31 @@
                                 (uint16_t*)ptr[1],
                                 (uint16_t*)ptr[2],
                                 (uint16_t*)ptr[3]};
+            int shift = packing == 1 ? 4 : 0;
             for (y = 0; y < avctx->width; y++) {
-                if (elements >= 3)
-                    *dst[2]++ = read16(&buf, endian) >> 4;
-                *dst[0] = read16(&buf, endian) >> 4;
-                dst[0]++;
-                if (elements >= 2)
-                    *dst[1]++ = read16(&buf, endian) >> 4;
-                if (elements == 4)
-                    *dst[3]++ = read16(&buf, endian) >> 4;
+                if (packing) {
+                    if (elements >= 3)
+                        *dst[2]++ = read16(&buf, endian) >> shift & 0xFFF;
+                    *dst[0]++ = read16(&buf, endian) >> shift & 0xFFF;
+                    if (elements >= 2)
+                        *dst[1]++ = read16(&buf, endian) >> shift & 0xFFF;
+                    if (elements == 4)
+                        *dst[3]++ = read16(&buf, endian) >> shift & 0xFFF;
+                } else {
+                    if (elements >= 3)
+                        *dst[2]++ = read12in32(&buf, &rgbBuffer,
+                                               &n_datum, endian);
+                    *dst[0]++ = read12in32(&buf, &rgbBuffer,
+                                           &n_datum, endian);
+                    if (elements >= 2)
+                        *dst[1]++ = read12in32(&buf, &rgbBuffer,
+                                               &n_datum, endian);
+                    if (elements == 4)
+                        *dst[3]++ = read12in32(&buf, &rgbBuffer,
+                                               &n_datum, endian);
+                }
             }
+            n_datum = 0;
             for (i = 0; i < elements; i++)
                 ptr[i] += p->linesize[i];
             // Jump to next aligned position

diff --git a/libavcodec/dsicinvideo.c b/libavcodec/dsicinvideo.c
index f95cbc7..37175d6 100644
--- a/libavcodec/dsicinvideo.c
+++ b/libavcodec/dsicinvideo.c

@@ -158,6 +158,9 @@
         }
     }
 
+    if (dst_end - dst > dst_size - dst_size/10)
+        return AVERROR_INVALIDDATA;
+
     return 0;
 }
 
@@ -184,6 +187,10 @@
         }
         dst += len;
     }
+
+    if (dst_end - dst > dst_size - dst_size/10)
+        return AVERROR_INVALIDDATA;
+
     return 0;
 }
 
@@ -226,27 +233,35 @@
      * surface.width = surface.pitch */
     switch (bitmap_frame_type) {
     case 9:
-        cin_decode_rle(buf, bitmap_frame_size,
+        res =  cin_decode_rle(buf, bitmap_frame_size,
                        cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
+        if (res < 0)
+            return res;
         break;
     case 34:
-        cin_decode_rle(buf, bitmap_frame_size,
+        res =  cin_decode_rle(buf, bitmap_frame_size,
                        cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
+        if (res < 0)
+            return res;
         cin_apply_delta_data(cin->bitmap_table[CIN_PRE_BMP],
                              cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
         break;
     case 35:
         bitmap_frame_size = cin_decode_huffman(buf, bitmap_frame_size,
                            cin->bitmap_table[CIN_INT_BMP], cin->bitmap_size);
-        cin_decode_rle(cin->bitmap_table[CIN_INT_BMP], bitmap_frame_size,
+        res =  cin_decode_rle(cin->bitmap_table[CIN_INT_BMP], bitmap_frame_size,
                        cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
+        if (res < 0)
+            return res;
         break;
     case 36:
         bitmap_frame_size = cin_decode_huffman(buf, bitmap_frame_size,
                                                cin->bitmap_table[CIN_INT_BMP],
                                                cin->bitmap_size);
-        cin_decode_rle(cin->bitmap_table[CIN_INT_BMP], bitmap_frame_size,
+        res = cin_decode_rle(cin->bitmap_table[CIN_INT_BMP], bitmap_frame_size,
                        cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
+        if (res < 0)
+            return res;
         cin_apply_delta_data(cin->bitmap_table[CIN_PRE_BMP],
                              cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
         break;
@@ -313,5 +328,6 @@
     .init           = cinvideo_decode_init,
     .close          = cinvideo_decode_end,
     .decode         = cinvideo_decode_frame,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
     .capabilities   = AV_CODEC_CAP_DR1,
 };

diff --git a/libavcodec/dump_extradata_bsf.c b/libavcodec/dump_extradata_bsf.c
index fa7bc86..188a1c6 100644
--- a/libavcodec/dump_extradata_bsf.c
+++ b/libavcodec/dump_extradata_bsf.c

@@ -34,16 +34,17 @@
 
 typedef struct DumpExtradataContext {
     const AVClass *class;
+    AVPacket pkt;
     int freq;
 } DumpExtradataContext;
 
 static int dump_extradata(AVBSFContext *ctx, AVPacket *out)
 {
     DumpExtradataContext *s = ctx->priv_data;
-    AVPacket *in;
+    AVPacket *in = &s->pkt;
     int ret = 0;
 
-    ret = ff_bsf_get_packet(ctx, &in);
+    ret = ff_bsf_get_packet_ref(ctx, in);
     if (ret < 0)
         return ret;
 
@@ -72,19 +73,20 @@
     }
 
 fail:
-    av_packet_free(&in);
+    av_packet_unref(in);
 
     return ret;
 }
 
 #define OFFSET(x) offsetof(DumpExtradataContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_BSF_PARAM)
 static const AVOption options[] = {
     { "freq", "When do dump extradata", OFFSET(freq), AV_OPT_TYPE_INT,
-        { .i64 = DUMP_FREQ_KEYFRAME }, DUMP_FREQ_KEYFRAME, DUMP_FREQ_ALL, 0, "freq" },
-        { "k",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = DUMP_FREQ_KEYFRAME }, .unit = "freq" },
-        { "keyframe", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = DUMP_FREQ_KEYFRAME }, .unit = "freq" },
-        { "e",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = DUMP_FREQ_ALL      }, .unit = "freq" },
-        { "all",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = DUMP_FREQ_ALL      }, .unit = "freq" },
+        { .i64 = DUMP_FREQ_KEYFRAME }, DUMP_FREQ_KEYFRAME, DUMP_FREQ_ALL, FLAGS, "freq" },
+        { "k",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = DUMP_FREQ_KEYFRAME }, .flags = FLAGS, .unit = "freq" },
+        { "keyframe", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = DUMP_FREQ_KEYFRAME }, .flags = FLAGS, .unit = "freq" },
+        { "e",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = DUMP_FREQ_ALL      }, .flags = FLAGS, .unit = "freq" },
+        { "all",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = DUMP_FREQ_ALL      }, .flags = FLAGS, .unit = "freq" },
     { NULL },
 };
 
@@ -92,7 +94,7 @@
     .class_name = "dump_extradata bsf",
     .item_name  = av_default_item_name,
     .option     = options,
-    .version    = LIBAVUTIL_VERSION_MAJOR,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
 const AVBitStreamFilter ff_dump_extradata_bsf = {

diff --git a/libavcodec/dvbsub.c b/libavcodec/dvbsub.c
index 3cdbade..8cce702 100644
--- a/libavcodec/dvbsub.c
+++ b/libavcodec/dvbsub.c

@@ -239,9 +239,9 @@
             x += len;
         }
         /* end of line */
-        // 00000000 00000000 end of 8-bit/pixel_code_string
+        // 00000000 end of 8-bit/pixel_code_string
         *q++ = 0x00;
-        *q++ = 0x00;
+        *q++ = 0xf0;
         bitmap += linesize;
     }
     *pq = q;
@@ -342,6 +342,9 @@
         } else if (h->rects[region_id]->nb_colors <= 16) {
             /* 4 bpp, standard encoding */
             bpp_index = 1;
+        } else if (h->rects[region_id]->nb_colors <= 256) {
+            /* 8 bpp, standard encoding */
+            bpp_index = 2;
         } else {
             return -1;
         }

diff --git a/libavcodec/dvbsub_parser.c b/libavcodec/dvbsub_parser.c
index e77b965..8ced3c4 100644
--- a/libavcodec/dvbsub_parser.c
+++ b/libavcodec/dvbsub_parser.c

@@ -57,6 +57,7 @@
     DVBSubParseContext *pc = s->priv_data;
     uint8_t *p, *p_end;
     int i, len, buf_pos = 0;
+    int out_size = 0;
 
     ff_dlog(avctx, "DVB parse packet pts=%"PRIx64", lpts=%"PRIx64", cpts=%"PRIx64":\n",
             s->pts, s->last_pts, s->cur_frame_pts[s->cur_frame_start_index]);
@@ -71,8 +72,8 @@
     if (i % 16 != 0)
         ff_dlog(avctx, "\n");
 
-    *poutbuf = NULL;
-    *poutbuf_size = 0;
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
 
     s->fetch_timestamp = 1;
 
@@ -89,7 +90,7 @@
 
         if (buf_size < 2 || buf[0] != 0x20 || buf[1] != 0x00) {
             ff_dlog(avctx, "Bad packet header\n");
-            return -1;
+            return buf_size;
         }
 
         buf_pos = 2;
@@ -113,9 +114,9 @@
     }
 
     if (buf_size - buf_pos + pc->packet_index > PARSE_BUF_SIZE)
-        return -1;
+        return buf_size;
 
-/* if not currently in a packet, discard data */
+/* if not currently in a packet, pass data */
     if (pc->in_packet == 0)
         return buf_size;
 
@@ -135,7 +136,7 @@
 
                 if (len + 6 <= p_end - p)
                 {
-                    *poutbuf_size += len + 6;
+                    out_size += len + 6;
 
                     p += len + 6;
                 } else
@@ -159,9 +160,10 @@
         }
     }
 
-    if (*poutbuf_size > 0)
+    if (out_size > 0)
     {
         *poutbuf = pc->packet_buf;
+        *poutbuf_size = out_size;
         pc->packet_start = *poutbuf_size;
     }
 

diff --git a/libavcodec/dvbsubdec.c b/libavcodec/dvbsubdec.c
index b683109..b59e836 100644
--- a/libavcodec/dvbsubdec.c
+++ b/libavcodec/dvbsubdec.c

@@ -96,6 +96,9 @@
     int clut;
     int bgcolor;
 
+    uint8_t computed_clut[4*256];
+    int has_computed_clut;
+
     uint8_t *pbuf;
     int buf_size;
     int dirty;
@@ -124,6 +127,7 @@
     int compute_edt; /**< if 1 end display time calculated using pts
                           if 0 (Default) calculated using time out */
     int compute_clut;
+    int clut_count2[257][256];
     int substream;
     int64_t prev_start;
     DVBSubRegion *region_list;
@@ -647,14 +651,18 @@
     return pixels_read;
 }
 
-static void compute_default_clut(AVSubtitleRect *rect, int w, int h)
+static void compute_default_clut(DVBSubContext *ctx, uint8_t *clut, AVSubtitleRect *rect, int w, int h)
 {
     uint8_t list[256] = {0};
     uint8_t list_inv[256];
     int counttab[256] = {0};
+    int (*counttab2)[256] = ctx->clut_count2;
     int count, i, x, y;
+    ptrdiff_t stride = rect->linesize[0];
 
-#define V(x,y) rect->data[0][(x) + (y)*rect->linesize[0]]
+    memset(ctx->clut_count2, 0 , sizeof(ctx->clut_count2));
+
+#define V(x,y) rect->data[0][(x) + (y)*stride]
     for (y = 0; y<h; y++) {
         for (x = 0; x<w; x++) {
             int v = V(x,y) + 1;
@@ -663,30 +671,35 @@
             int vt = y     ? V(x,y-1) + 1 : 0;
             int vb = y+1<h ? V(x,y+1) + 1 : 0;
             counttab[v-1] += !!((v!=vl) + (v!=vr) + (v!=vt) + (v!=vb));
+            counttab2[vl][v-1] ++;
+            counttab2[vr][v-1] ++;
+            counttab2[vt][v-1] ++;
+            counttab2[vb][v-1] ++;
         }
     }
-#define L(x,y) list[ rect->data[0][(x) + (y)*rect->linesize[0]] ]
+#define L(x,y) list[d[(x) + (y)*stride]]
 
     for (i = 0; i<256; i++) {
-        int scoretab[256] = {0};
+        counttab2[i+1][i] = 0;
+    }
+    for (i = 0; i<256; i++) {
         int bestscore = 0;
         int bestv = 0;
-        for (y = 0; y<h; y++) {
-            for (x = 0; x<w; x++) {
-                int v = rect->data[0][x + y*rect->linesize[0]];
-                int l_m = list[v];
-                int l_l = x     ? L(x-1, y) : 1;
-                int l_r = x+1<w ? L(x+1, y) : 1;
-                int l_t = y     ? L(x, y-1) : 1;
-                int l_b = y+1<h ? L(x, y+1) : 1;
-                int score;
-                if (l_m)
-                    continue;
-                scoretab[v] += l_l + l_r + l_t + l_b;
-                score = 1024LL*scoretab[v] / counttab[v];
+
+        for (x = 0; x < 256; x++) {
+            int scorev = 0;
+            if (list[x])
+                continue;
+            scorev += counttab2[0][x];
+            for (y = 0; y < 256; y++) {
+                scorev += list[y] * counttab2[y+1][x];
+            }
+
+            if (scorev) {
+                int score = 1024LL*scorev / counttab[x];
                 if (score > bestscore) {
                     bestscore = score;
-                    bestv = v;
+                    bestv = x;
                 }
             }
         }
@@ -699,7 +712,7 @@
     count = FFMAX(i - 1, 1);
     for (i--; i>=0; i--) {
         int v = i*255/count;
-        AV_WN32(rect->data[1] + 4*list_inv[i], RGBA(v/2,v,v/2,v));
+        AV_WN32(clut + 4*list_inv[i], RGBA(v/2,v,v/2,v));
     }
 }
 
@@ -749,8 +762,13 @@
             goto fail;
         }
 
-        for(i=0; i<sub->num_rects; i++)
+        for (i = 0; i < sub->num_rects; i++) {
             sub->rects[i] = av_mallocz(sizeof(*sub->rects[i]));
+            if (!sub->rects[i]) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+        }
 
         i = 0;
 
@@ -805,8 +823,14 @@
 
             memcpy(rect->data[0], region->pbuf, region->buf_size);
 
-            if ((clut == &default_clut && ctx->compute_clut == -1) || ctx->compute_clut == 1)
-                compute_default_clut(rect, rect->w, rect->h);
+            if ((clut == &default_clut && ctx->compute_clut == -1) || ctx->compute_clut == 1) {
+                if (!region->has_computed_clut) {
+                    compute_default_clut(ctx, region->computed_clut, rect, rect->w, rect->h);
+                    region->has_computed_clut = 1;
+                }
+
+                memcpy(rect->data[1], region->computed_clut, sizeof(region->computed_clut));
+            }
 
 #if FF_API_AVPICTURE
 FF_DISABLE_DEPRECATION_WARNINGS
@@ -955,6 +979,7 @@
         }
     }
 
+    region->has_computed_clut = 0;
 }
 
 static int dvbsub_parse_object_segment(AVCodecContext *avctx,

diff --git a/libavcodec/dvdsub_parser.c b/libavcodec/dvdsub_parser.c
index 32a945e..698ccb6 100644
--- a/libavcodec/dvdsub_parser.c
+++ b/libavcodec/dvdsub_parser.c

@@ -44,6 +44,9 @@
 {
     DVDSubParseContext *pc = s->priv_data;
 
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+
     if (pc->packet_index == 0) {
         if (buf_size < 2 || AV_RB16(buf) && buf_size < 6) {
             if (buf_size)
@@ -54,7 +57,11 @@
         if (pc->packet_len == 0) /* HD-DVD subpicture packet */
             pc->packet_len = AV_RB32(buf+2);
         av_freep(&pc->packet);
-        pc->packet = av_malloc(pc->packet_len);
+        if ((unsigned)pc->packet_len > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
+            av_log(avctx, AV_LOG_ERROR, "packet length %d is invalid\n", pc->packet_len);
+            return buf_size;
+        }
+        pc->packet = av_malloc(pc->packet_len + AV_INPUT_BUFFER_PADDING_SIZE);
     }
     if (pc->packet) {
         if (pc->packet_index + buf_size <= pc->packet_len) {

diff --git a/libavcodec/dvdsubdec.c b/libavcodec/dvdsubdec.c
index e18113c..632a53a 100644
--- a/libavcodec/dvdsubdec.c
+++ b/libavcodec/dvdsubdec.c

@@ -42,6 +42,7 @@
   uint8_t  buf[0x10000];
   int      buf_size;
   int      forced_subs_only;
+  uint8_t  used_color[256];
 #ifdef DEBUG
   int sub_id;
 #endif
@@ -82,10 +83,7 @@
 {
     int len;
     int has_run = get_bits1(gb);
-    if (get_bits1(gb))
-        *color = get_bits(gb, 8);
-    else
-        *color = get_bits(gb, 2);
+    *color = get_bits(gb, 2 + 6*get_bits1(gb));
     if (has_run) {
         if (get_bits1(gb)) {
             len = get_bits(gb, 7);
@@ -100,7 +98,7 @@
     return len;
 }
 
-static int decode_rle(uint8_t *bitmap, int linesize, int w, int h,
+static int decode_rle(uint8_t *bitmap, int linesize, int w, int h, uint8_t used_color[256],
                       const uint8_t *buf, int start, int buf_size, int is_8bit)
 {
     GetBitContext gb;
@@ -127,8 +125,11 @@
             len = decode_run_8bit(&gb, &color);
         else
             len = decode_run_2bit(&gb, &color);
+        if (len != INT_MAX && len > w - x)
+            return AVERROR_INVALIDDATA;
         len = FFMIN(len, w - x);
         memset(d + x, color, len);
+        used_color[color] = 1;
         x += len;
         if (x >= w) {
             y++;
@@ -370,7 +371,7 @@
                 h = 0;
             if (w > 0 && h > 1) {
                 reset_rects(sub_header);
-
+                memset(ctx->used_color, 0, sizeof(ctx->used_color));
                 sub_header->rects = av_mallocz(sizeof(*sub_header->rects));
                 if (!sub_header->rects)
                     goto fail;
@@ -381,10 +382,10 @@
                 bitmap = sub_header->rects[0]->data[0] = av_malloc(w * h);
                 if (!bitmap)
                     goto fail;
-                if (decode_rle(bitmap, w * 2, w, (h + 1) / 2,
+                if (decode_rle(bitmap, w * 2, w, (h + 1) / 2, ctx->used_color,
                                buf, offset1, buf_size, is_8bit) < 0)
                     goto fail;
-                if (decode_rle(bitmap + w, w * 2, w, h / 2,
+                if (decode_rle(bitmap + w, w * 2, w, h / 2, ctx->used_color,
                                buf, offset2, buf_size, is_8bit) < 0)
                     goto fail;
                 sub_header->rects[0]->data[1] = av_mallocz(AVPALETTE_SIZE);
@@ -448,19 +449,24 @@
 }
 
 /* return 0 if empty rectangle, 1 if non empty */
-static int find_smallest_bounding_rectangle(AVSubtitle *s)
+static int find_smallest_bounding_rectangle(DVDSubContext *ctx, AVSubtitle *s)
 {
     uint8_t transp_color[256] = { 0 };
     int y1, y2, x1, x2, y, w, h, i;
     uint8_t *bitmap;
+    int transparent = 1;
 
     if (s->num_rects == 0 || !s->rects || s->rects[0]->w <= 0 || s->rects[0]->h <= 0)
         return 0;
 
     for(i = 0; i < s->rects[0]->nb_colors; i++) {
-        if ((((uint32_t *)s->rects[0]->data[1])[i] >> 24) == 0)
+        if ((((uint32_t *)s->rects[0]->data[1])[i] >> 24) == 0) {
             transp_color[i] = 1;
+        } else if (ctx->used_color[i])
+            transparent = 0;
     }
+    if (transparent)
+        return 0;
     y1 = 0;
     while (y1 < s->rects[0]->h && is_transp(s->rects[0]->data[0] + y1 * s->rects[0]->linesize[0],
                                   1, s->rects[0]->w, transp_color))
@@ -595,7 +601,7 @@
 
         return buf_size;
     }
-    if (!is_menu && find_smallest_bounding_rectangle(sub) == 0)
+    if (!is_menu && find_smallest_bounding_rectangle(ctx, sub) == 0)
         goto no_subtitle;
 
     if (ctx->forced_subs_only && !(sub->rects[0]->flags & AV_SUBTITLE_FLAG_FORCED))

diff --git a/libavcodec/dvdsubenc.c b/libavcodec/dvdsubenc.c
index 26afdc6..ff95ed2 100644
--- a/libavcodec/dvdsubenc.c
+++ b/libavcodec/dvdsubenc.c

@@ -438,7 +438,7 @@
     av_assert0(sizeof(dvdc->global_palette) == sizeof(default_palette));
     memcpy(dvdc->global_palette, default_palette, sizeof(dvdc->global_palette));
 
-    av_bprint_init(&extradata, 0, 1);
+    av_bprint_init(&extradata, 0, AV_BPRINT_SIZE_AUTOMATIC);
     if (avctx->width && avctx->height)
         av_bprintf(&extradata, "size: %dx%d\n", avctx->width, avctx->height);
     av_bprintf(&extradata, "palette:");

diff --git a/libavcodec/dxtory.c b/libavcodec/dxtory.c
index e736cec..285ca38 100644
--- a/libavcodec/dxtory.c
+++ b/libavcodec/dxtory.c

@@ -305,11 +305,7 @@
     }
 
     if (avctx->height - line) {
-        av_log(avctx, AV_LOG_VERBOSE,
-               "Not enough slice data available, "
-               "cropping the frame by %d pixels\n",
-                avctx->height - line);
-        avctx->height = line;
+        avpriv_request_sample(avctx, "Not enough slice data available");
     }
 
     return 0;

diff --git a/libavcodec/dxv.c b/libavcodec/dxv.c
index 529e211..08aca73 100644
--- a/libavcodec/dxv.c
+++ b/libavcodec/dxv.c

@@ -1,6 +1,7 @@
 /*
  * Resolume DXV decoder
  * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
+ * Copyright (C) 2018 Paul B Mahol
  *
  * This file is part of FFmpeg.
  *
@@ -23,6 +24,7 @@
 
 #include "libavutil/imgutils.h"
 
+#include "mathops.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
@@ -34,50 +36,211 @@
     TextureDSPContext texdsp;
     GetByteContext gbc;
 
-    uint8_t *tex_data;  // Compressed texture
-    int tex_rat;        // Compression ratio
-    int tex_step;       // Distance between blocks
-    int64_t tex_size;   // Texture size
+    uint8_t *tex_data;   // Compressed texture
+    uint8_t *ctex_data;  // Compressed texture
+    int tex_rat;         // Compression ratio
+    int tex_step;        // Distance between blocks
+    int ctex_step;       // Distance between blocks
+    int64_t tex_size;    // Texture size
+    int64_t ctex_size;   // Texture size
 
     /* Optimal number of slices for parallel decoding */
     int slice_count;
 
+    uint8_t *op_data[4]; // Opcodes
+    int64_t op_size[4];  // Opcodes size
+
+    int texture_block_w;
+    int texture_block_h;
+
+    int ctexture_block_w;
+    int ctexture_block_h;
+
     /* Pointer to the selected decompression function */
     int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*tex_funct_planar[2])(uint8_t *plane0, ptrdiff_t stride0,
+                               uint8_t *plane1, ptrdiff_t stride1,
+                               const uint8_t *block);
 } DXVContext;
 
+static void decompress_indices(uint8_t *dst, const uint8_t *src)
+{
+    int block, i;
+
+    for (block = 0; block < 2; block++) {
+        int tmp = AV_RL24(src);
+
+        /* Unpack 8x3 bit from last 3 byte block */
+        for (i = 0; i < 8; i++)
+            dst[i] = (tmp >> (i * 3)) & 0x7;
+
+        src += 3;
+        dst += 8;
+    }
+}
+
+static int extract_component(int yo0, int yo1, int code)
+{
+    int yo;
+
+    if (yo0 == yo1) {
+        yo = yo0;
+    } else if (code == 0) {
+        yo = yo0;
+    } else if (code == 1) {
+        yo = yo1;
+    } else {
+        if (yo0 > yo1) {
+            yo = (uint8_t) (((8 - code) * yo0 +
+                             (code - 1) * yo1) / 7);
+        } else {
+            if (code == 6) {
+                yo = 0;
+            } else if (code == 7) {
+                yo = 255;
+            } else {
+                yo = (uint8_t) (((6 - code) * yo0 +
+                                 (code - 1) * yo1) / 5);
+            }
+        }
+    }
+
+    return yo;
+}
+
+static int cocg_block(uint8_t *plane0, ptrdiff_t stride0,
+                      uint8_t *plane1, ptrdiff_t stride1,
+                      const uint8_t *block)
+{
+    uint8_t co_indices[16];
+    uint8_t cg_indices[16];
+    uint8_t co0 = *(block);
+    uint8_t co1 = *(block + 1);
+    uint8_t cg0 = *(block + 8);
+    uint8_t cg1 = *(block + 9);
+    int x, y;
+
+    decompress_indices(co_indices, block + 2);
+    decompress_indices(cg_indices, block + 10);
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int co_code = co_indices[x + y * 4];
+            int cg_code = cg_indices[x + y * 4];
+
+            plane0[x] = extract_component(cg0, cg1, cg_code);
+            plane1[x] = extract_component(co0, co1, co_code);
+        }
+        plane0 += stride0;
+        plane1 += stride1;
+    }
+
+    return 16;
+}
+
+static void yao_subblock(uint8_t *dst, uint8_t *yo_indices,
+                        ptrdiff_t stride, const uint8_t *block)
+{
+    uint8_t yo0 = *(block);
+    uint8_t yo1 = *(block + 1);
+    int x, y;
+
+    decompress_indices(yo_indices, block + 2);
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int yo_code = yo_indices[x + y * 4];
+
+            dst[x] = extract_component(yo0, yo1, yo_code);
+        }
+        dst += stride;
+    }
+}
+
+static int yo_block(uint8_t *dst, ptrdiff_t stride,
+                    uint8_t *unused0, ptrdiff_t unused1,
+                    const uint8_t *block)
+{
+    uint8_t yo_indices[16];
+
+    yao_subblock(dst,      yo_indices, stride, block);
+    yao_subblock(dst + 4,  yo_indices, stride, block + 8);
+    yao_subblock(dst + 8,  yo_indices, stride, block + 16);
+    yao_subblock(dst + 12, yo_indices, stride, block + 24);
+
+    return 32;
+}
+
+static int yao_block(uint8_t *plane0, ptrdiff_t stride0,
+                     uint8_t *plane3, ptrdiff_t stride1,
+                     const uint8_t *block)
+{
+    uint8_t yo_indices[16];
+    uint8_t a_indices[16];
+
+    yao_subblock(plane0,      yo_indices, stride0, block);
+    yao_subblock(plane3,      a_indices,  stride1, block + 8);
+    yao_subblock(plane0 + 4,  yo_indices, stride0, block + 16);
+    yao_subblock(plane3 + 4,  a_indices,  stride1, block + 24);
+    yao_subblock(plane0 + 8,  yo_indices, stride0, block + 32);
+    yao_subblock(plane3 + 8,  a_indices,  stride1, block + 40);
+    yao_subblock(plane0 + 12, yo_indices, stride0, block + 48);
+    yao_subblock(plane3 + 12, a_indices,  stride1, block + 56);
+
+    return 64;
+}
+
 static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
                                      int slice, int thread_nb)
 {
     DXVContext *ctx = avctx->priv_data;
     AVFrame *frame = arg;
     const uint8_t *d = ctx->tex_data;
-    int w_block = avctx->coded_width / TEXTURE_BLOCK_W;
-    int h_block = avctx->coded_height / TEXTURE_BLOCK_H;
+    int w_block = avctx->coded_width / ctx->texture_block_w;
+    int h_block = avctx->coded_height / ctx->texture_block_h;
     int x, y;
     int start_slice, end_slice;
-    int base_blocks_per_slice = h_block / ctx->slice_count;
-    int remainder_blocks = h_block % ctx->slice_count;
 
-    /* When the frame height (in blocks) doesn't divide evenly between the
-     * number of slices, spread the remaining blocks evenly between the first
-     * operations */
-    start_slice = slice * base_blocks_per_slice;
-    /* Add any extra blocks (one per slice) that have been added
-     * before this slice */
-    start_slice += FFMIN(slice, remainder_blocks);
+    start_slice = h_block * slice / ctx->slice_count;
+    end_slice = h_block * (slice + 1) / ctx->slice_count;
 
-    end_slice = start_slice + base_blocks_per_slice;
-    /* Add an extra block if there are remainder blocks to be accounted for */
-    if (slice < remainder_blocks)
-        end_slice++;
+    if (ctx->tex_funct) {
+        for (y = start_slice; y < end_slice; y++) {
+            uint8_t *p = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h;
+            int off = y * w_block;
+            for (x = 0; x < w_block; x++) {
+                ctx->tex_funct(p + x * 4 * ctx->texture_block_w, frame->linesize[0],
+                               d + (off + x) * ctx->tex_step);
+            }
+        }
+    } else {
+        const uint8_t *c = ctx->ctex_data;
 
-    for (y = start_slice; y < end_slice; y++) {
-        uint8_t *p = frame->data[0] + y * frame->linesize[0] * TEXTURE_BLOCK_H;
-        int off  = y * w_block;
-        for (x = 0; x < w_block; x++) {
-            ctx->tex_funct(p + x * 16, frame->linesize[0],
-                           d + (off + x) * ctx->tex_step);
+        for (y = start_slice; y < end_slice; y++) {
+            uint8_t *p0 = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h;
+            uint8_t *p3 = ctx->tex_step != 64 ? NULL : frame->data[3] + y * frame->linesize[3] * ctx->texture_block_h;
+            int off = y * w_block;
+            for (x = 0; x < w_block; x++) {
+                ctx->tex_funct_planar[0](p0 + x * ctx->texture_block_w, frame->linesize[0],
+                                         p3 != NULL ? p3 + x * ctx->texture_block_w : NULL, frame->linesize[3],
+                                         d + (off + x) * ctx->tex_step);
+            }
+        }
+
+        w_block = (avctx->coded_width / 2) / ctx->ctexture_block_w;
+        h_block = (avctx->coded_height / 2) / ctx->ctexture_block_h;
+        start_slice = h_block * slice / ctx->slice_count;
+        end_slice = h_block * (slice + 1) / ctx->slice_count;
+
+        for (y = start_slice; y < end_slice; y++) {
+            uint8_t *p0 = frame->data[1] + y * frame->linesize[1] * ctx->ctexture_block_h;
+            uint8_t *p1 = frame->data[2] + y * frame->linesize[2] * ctx->ctexture_block_h;
+            int off = y * w_block;
+            for (x = 0; x < w_block; x++) {
+                ctx->tex_funct_planar[1](p0 + x * ctx->ctexture_block_w, frame->linesize[1],
+                                         p1 + x * ctx->ctexture_block_w, frame->linesize[2],
+                                         c + (off + x) * ctx->ctex_step);
+            }
         }
     }
 
@@ -169,6 +332,529 @@
     return 0;
 }
 
+typedef struct OpcodeTable {
+    int16_t next;
+    uint8_t val1;
+    uint8_t val2;
+} OpcodeTable;
+
+static int fill_ltable(GetByteContext *gb, uint32_t *table, int *nb_elements)
+{
+    unsigned half = 512, bits = 1023, left = 1024, input, mask;
+    int value, counter = 0, rshift = 10, lshift = 30;
+
+    mask = bytestream2_get_le32(gb) >> 2;
+    while (left) {
+        if (counter >= 256)
+            return AVERROR_INVALIDDATA;
+        value = bits & mask;
+        left -= bits & mask;
+        mask >>= rshift;
+        lshift -= rshift;
+        table[counter++] = value;
+        if (lshift < 16) {
+            if (bytestream2_get_bytes_left(gb) <= 0)
+                return AVERROR_INVALIDDATA;
+
+            input = bytestream2_get_le16(gb);
+            mask += input << lshift;
+            lshift += 16;
+        }
+        if (left < half) {
+            half >>= 1;
+            bits >>= 1;
+            rshift--;
+        }
+    }
+
+    for (; !table[counter - 1]; counter--)
+        if (counter <= 0)
+            return AVERROR_INVALIDDATA;
+
+    *nb_elements = counter;
+
+    if (counter < 256)
+        memset(&table[counter], 0, 4 * (256 - counter));
+
+    if (lshift >= 16)
+        bytestream2_seek(gb, -2, SEEK_CUR);
+
+    return 0;
+}
+
+static int fill_optable(unsigned *table0, OpcodeTable *table1, int nb_elements)
+{
+    unsigned table2[256] = { 0 };
+    unsigned x = 0;
+    int val0, val1, i, j = 2, k = 0;
+
+    table2[0] = table0[0];
+    for (i = 0; i < nb_elements - 1; i++, table2[i] = val0) {
+        val0 = table0[i + 1] + table2[i];
+    }
+
+    if (!table2[0]) {
+        do {
+            k++;
+        } while (!table2[k]);
+    }
+
+    j = 2;
+    for (i = 1024; i > 0; i--) {
+        for (table1[x].val1 = k; k < 256 && j > table2[k]; k++);
+        x = (x - 383) & 0x3FF;
+        j++;
+    }
+
+    if (nb_elements > 0)
+        memcpy(&table2[0], table0, 4 * nb_elements);
+
+    for (i = 0; i < 1024; i++) {
+        val0 = table1[i].val1;
+        val1 = table2[val0];
+        table2[val0]++;
+        x = 31 - ff_clz(val1);
+        if (x > 10)
+            return AVERROR_INVALIDDATA;
+        table1[i].val2 = 10 - x;
+        table1[i].next = (val1 << table1[i].val2) - 1024;
+    }
+
+    return 0;
+}
+
+static int get_opcodes(GetByteContext *gb, uint32_t *table, uint8_t *dst, int op_size, int nb_elements)
+{
+    OpcodeTable optable[1024];
+    int sum, x, val, lshift, rshift, ret, size_in_bits, i, idx;
+    unsigned endoffset, newoffset, offset;
+    unsigned next;
+    uint8_t *src = (uint8_t *)gb->buffer;
+
+    ret = fill_optable(table, optable, nb_elements);
+    if (ret < 0)
+        return ret;
+
+    size_in_bits = bytestream2_get_le32(gb);
+    endoffset = ((size_in_bits + 7) >> 3) - 4;
+    if (endoffset <= 0 || bytestream2_get_bytes_left(gb) < endoffset)
+        return AVERROR_INVALIDDATA;
+
+    offset = endoffset;
+    next = AV_RL32(src + endoffset);
+    rshift = (((size_in_bits & 0xFF) - 1) & 7) + 15;
+    lshift = 32 - rshift;
+    idx = (next >> rshift) & 0x3FF;
+    for (i = 0; i < op_size; i++) {
+        dst[i] = optable[idx].val1;
+        val = optable[idx].val2;
+        sum = val + lshift;
+        x = (next << lshift) >> 1 >> (31 - val);
+        newoffset = offset - (sum >> 3);
+        lshift = sum & 7;
+        idx = x + optable[idx].next;
+        offset = newoffset;
+        if (offset > endoffset)
+            return AVERROR_INVALIDDATA;
+        next = AV_RL32(src + offset);
+    }
+
+    bytestream2_skip(gb, (size_in_bits + 7 >> 3) - 4);
+
+    return 0;
+}
+
+static int dxv_decompress_opcodes(GetByteContext *gb, void *dstp, size_t op_size)
+{
+    int pos = bytestream2_tell(gb);
+    int flag = bytestream2_peek_byte(gb);
+
+    if ((flag & 3) == 0) {
+        bytestream2_skip(gb, 1);
+        bytestream2_get_buffer(gb, dstp, op_size);
+    } else if ((flag & 3) == 1) {
+        bytestream2_skip(gb, 1);
+        memset(dstp, bytestream2_get_byte(gb), op_size);
+    } else {
+        uint32_t table[256];
+        int ret, elements = 0;
+
+        ret = fill_ltable(gb, table, &elements);
+        if (ret < 0)
+            return ret;
+        ret = get_opcodes(gb, table, dstp, op_size, elements);
+        if (ret < 0)
+            return ret;
+    }
+    return bytestream2_tell(gb) - pos;
+}
+
+static int dxv_decompress_cgo(DXVContext *ctx, GetByteContext *gb,
+                              uint8_t *tex_data, int tex_size,
+                              uint8_t *op_data, int *oindex,
+                              int op_size,
+                              uint8_t **dstp, int *statep,
+                              uint8_t **tab0, uint8_t **tab1,
+                              int offset)
+{
+    uint8_t *dst = *dstp;
+    uint8_t *tptr0, *tptr1, *tptr3;
+    int oi = *oindex;
+    int state = *statep;
+    int opcode, v, vv;
+
+    if (state <= 0) {
+        if (oi >= op_size)
+            return AVERROR_INVALIDDATA;
+        opcode = op_data[oi++];
+        if (!opcode) {
+            v = bytestream2_get_byte(gb);
+            if (v == 255) {
+                do {
+                    if (bytestream2_get_bytes_left(gb) <= 0)
+                        return AVERROR_INVALIDDATA;
+                    opcode = bytestream2_get_le16(gb);
+                    v += opcode;
+                } while (opcode == 0xFFFF);
+            }
+            AV_WL32(dst, AV_RL32(dst - (8 + offset)));
+            AV_WL32(dst + 4, AV_RL32(dst - (4 + offset)));
+            state = v + 4;
+            goto done;
+        }
+
+        switch (opcode) {
+        case 1:
+            AV_WL32(dst, AV_RL32(dst - (8 + offset)));
+            AV_WL32(dst + 4, AV_RL32(dst - (4 + offset)));
+            break;
+        case 2:
+            vv = (8 + offset) * (bytestream2_get_le16(gb) + 1);
+            if (vv < 0 || vv > dst - tex_data)
+                return AVERROR_INVALIDDATA;
+            tptr0 = dst - vv;
+            v = AV_RL32(tptr0);
+            AV_WL32(dst, AV_RL32(tptr0));
+            AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
+            tab0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 3:
+            AV_WL32(dst, bytestream2_get_le32(gb));
+            AV_WL32(dst + 4, bytestream2_get_le32(gb));
+            tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 4:
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, bytestream2_get_le16(gb));
+            AV_WL16(dst + 2, AV_RL16(tptr3));
+            dst[4] = tptr3[2];
+            AV_WL16(dst + 5, bytestream2_get_le16(gb));
+            dst[7] = bytestream2_get_byte(gb);
+            tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+            break;
+        case 5:
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, bytestream2_get_le16(gb));
+            AV_WL16(dst + 2, bytestream2_get_le16(gb));
+            dst[4] = bytestream2_get_byte(gb);
+            AV_WL16(dst + 5, AV_RL16(tptr3));
+            dst[7] = tptr3[2];
+            tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 6:
+            tptr0 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr0)
+                return AVERROR_INVALIDDATA;
+            tptr1 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr1)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, bytestream2_get_le16(gb));
+            AV_WL16(dst + 2, AV_RL16(tptr0));
+            dst[4] = tptr0[2];
+            AV_WL16(dst + 5, AV_RL16(tptr1));
+            dst[7] = tptr1[2];
+            tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+            break;
+        case 7:
+            v = (8 + offset) * (bytestream2_get_le16(gb) + 1);
+            if (v < 0 || v > dst - tex_data)
+                return AVERROR_INVALIDDATA;
+            tptr0 = dst - v;
+            AV_WL16(dst, bytestream2_get_le16(gb));
+            AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
+            AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
+            tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 8:
+            tptr1 = tab0[bytestream2_get_byte(gb)];
+            if (!tptr1)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(tptr1));
+            AV_WL16(dst + 2, bytestream2_get_le16(gb));
+            AV_WL32(dst + 4, bytestream2_get_le32(gb));
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 9:
+            tptr1 = tab0[bytestream2_get_byte(gb)];
+            if (!tptr1)
+                return AVERROR_INVALIDDATA;
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(tptr1));
+            AV_WL16(dst + 2, AV_RL16(tptr3));
+            dst[4] = tptr3[2];
+            AV_WL16(dst + 5, bytestream2_get_le16(gb));
+            dst[7] = bytestream2_get_byte(gb);
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 10:
+            tptr1 = tab0[bytestream2_get_byte(gb)];
+            if (!tptr1)
+                return AVERROR_INVALIDDATA;
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(tptr1));
+            AV_WL16(dst + 2, bytestream2_get_le16(gb));
+            dst[4] = bytestream2_get_byte(gb);
+            AV_WL16(dst + 5, AV_RL16(tptr3));
+            dst[7] = tptr3[2];
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 11:
+            tptr0 = tab0[bytestream2_get_byte(gb)];
+            if (!tptr0)
+                return AVERROR_INVALIDDATA;
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            tptr1 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr1)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(tptr0));
+            AV_WL16(dst + 2, AV_RL16(tptr3));
+            dst[4] = tptr3[2];
+            AV_WL16(dst + 5, AV_RL16(tptr1));
+            dst[7] = tptr1[2];
+            break;
+        case 12:
+            tptr1 = tab0[bytestream2_get_byte(gb)];
+            if (!tptr1)
+                return AVERROR_INVALIDDATA;
+            v = (8 + offset) * (bytestream2_get_le16(gb) + 1);
+            if (v < 0 || v > dst - tex_data)
+                return AVERROR_INVALIDDATA;
+            tptr0 = dst - v;
+            AV_WL16(dst, AV_RL16(tptr1));
+            AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
+            AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 13:
+            AV_WL16(dst, AV_RL16(dst - (8 + offset)));
+            AV_WL16(dst + 2, bytestream2_get_le16(gb));
+            AV_WL32(dst + 4, bytestream2_get_le32(gb));
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 14:
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(dst - (8 + offset)));
+            AV_WL16(dst + 2, AV_RL16(tptr3));
+            dst[4] = tptr3[2];
+            AV_WL16(dst + 5, bytestream2_get_le16(gb));
+            dst[7] = bytestream2_get_byte(gb);
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 15:
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(dst - (8 + offset)));
+            AV_WL16(dst + 2, bytestream2_get_le16(gb));
+            dst[4] = bytestream2_get_byte(gb);
+            AV_WL16(dst + 5, AV_RL16(tptr3));
+            dst[7] = tptr3[2];
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 16:
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            tptr1 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr1)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(dst - (8 + offset)));
+            AV_WL16(dst + 2, AV_RL16(tptr3));
+            dst[4] = tptr3[2];
+            AV_WL16(dst + 5, AV_RL16(tptr1));
+            dst[7] = tptr1[2];
+            break;
+        case 17:
+            v = (8 + offset) * (bytestream2_get_le16(gb) + 1);
+            if (v < 0 || v > dst - tex_data)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(dst - (8 + offset)));
+            AV_WL16(dst + 2, AV_RL16(&dst[-v + 2]));
+            AV_WL32(dst + 4, AV_RL32(&dst[-v + 4]));
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        default:
+            break;
+        }
+    } else {
+done:
+        AV_WL32(dst, AV_RL32(dst - (8 + offset)));
+        AV_WL32(dst + 4, AV_RL32(dst - (4 + offset)));
+        state--;
+    }
+    if (dst - tex_data + 8 > tex_size)
+        return AVERROR_INVALIDDATA;
+    dst += 8;
+
+    *oindex = oi;
+    *dstp = dst;
+    *statep = state;
+
+    return 0;
+}
+
+static int dxv_decompress_cocg(DXVContext *ctx, GetByteContext *gb,
+                               uint8_t *tex_data, int tex_size,
+                               uint8_t *op_data0, uint8_t *op_data1,
+                               int max_op_size0, int max_op_size1)
+{
+    uint8_t *dst, *tab2[256] = { 0 }, *tab0[256] = { 0 }, *tab3[256] = { 0 }, *tab1[256] = { 0 };
+    int op_offset = bytestream2_get_le32(gb);
+    unsigned op_size0 = bytestream2_get_le32(gb);
+    unsigned op_size1 = bytestream2_get_le32(gb);
+    int data_start = bytestream2_tell(gb);
+    int skip0, skip1, oi0 = 0, oi1 = 0;
+    int ret, state0 = 0, state1 = 0;
+
+    dst = tex_data;
+    bytestream2_skip(gb, op_offset - 12);
+    if (op_size0 > max_op_size0)
+        return AVERROR_INVALIDDATA;
+    skip0 = dxv_decompress_opcodes(gb, op_data0, op_size0);
+    if (skip0 < 0)
+        return skip0;
+    bytestream2_seek(gb, data_start + op_offset + skip0 - 12, SEEK_SET);
+    if (op_size1 > max_op_size1)
+        return AVERROR_INVALIDDATA;
+    skip1 = dxv_decompress_opcodes(gb, op_data1, op_size1);
+    if (skip1 < 0)
+        return skip1;
+    bytestream2_seek(gb, data_start, SEEK_SET);
+
+    AV_WL32(dst, bytestream2_get_le32(gb));
+    AV_WL32(dst + 4, bytestream2_get_le32(gb));
+    AV_WL32(dst + 8, bytestream2_get_le32(gb));
+    AV_WL32(dst + 12, bytestream2_get_le32(gb));
+
+    tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+    tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
+    tab2[0x9E3779B1 * AV_RL16(dst + 8) >> 24] = dst + 8;
+    tab3[0x9E3779B1 * (AV_RL32(dst + 10) & 0xFFFFFF) >> 24] = dst + 10;
+    dst += 16;
+    while (dst + 10 < tex_data + tex_size) {
+        ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data0, &oi0, op_size0,
+                                 &dst, &state0, tab0, tab1, 8);
+        if (ret < 0)
+            return ret;
+        ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data1, &oi1, op_size1,
+                                 &dst, &state1, tab2, tab3, 8);
+        if (ret < 0)
+            return ret;
+    }
+
+    bytestream2_seek(gb, data_start + op_offset + skip0 + skip1 - 12, SEEK_SET);
+
+    return 0;
+}
+
+static int dxv_decompress_yo(DXVContext *ctx, GetByteContext *gb,
+                             uint8_t *tex_data, int tex_size,
+                             uint8_t *op_data, int max_op_size)
+{
+    int op_offset = bytestream2_get_le32(gb);
+    unsigned op_size = bytestream2_get_le32(gb);
+    int data_start = bytestream2_tell(gb);
+    uint8_t *dst, *table0[256] = { 0 }, *table1[256] = { 0 };
+    int ret, state = 0, skip, oi = 0, v, vv;
+
+    dst = tex_data;
+    bytestream2_skip(gb, op_offset - 8);
+    if (op_size > max_op_size)
+        return AVERROR_INVALIDDATA;
+    skip = dxv_decompress_opcodes(gb, op_data, op_size);
+    if (skip < 0)
+        return skip;
+    bytestream2_seek(gb, data_start, SEEK_SET);
+
+    v = bytestream2_get_le32(gb);
+    AV_WL32(dst, v);
+    vv = bytestream2_get_le32(gb);
+    table0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
+    AV_WL32(dst + 4, vv);
+    table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
+    dst += 8;
+
+    while (dst < tex_data + tex_size) {
+        ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data, &oi, op_size,
+                                 &dst, &state, table0, table1, 0);
+        if (ret < 0)
+            return ret;
+    }
+
+    bytestream2_seek(gb, data_start + op_offset + skip - 8, SEEK_SET);
+
+    return 0;
+}
+
+static int dxv_decompress_ycg6(AVCodecContext *avctx)
+{
+    DXVContext *ctx = avctx->priv_data;
+    GetByteContext *gb = &ctx->gbc;
+    int ret;
+
+    ret = dxv_decompress_yo(ctx, gb, ctx->tex_data, ctx->tex_size,
+                            ctx->op_data[0], ctx->op_size[0]);
+    if (ret < 0)
+        return ret;
+
+    return dxv_decompress_cocg(ctx, gb, ctx->ctex_data, ctx->ctex_size,
+                               ctx->op_data[1], ctx->op_data[2],
+                               ctx->op_size[1], ctx->op_size[2]);
+}
+
+static int dxv_decompress_yg10(AVCodecContext *avctx)
+{
+    DXVContext *ctx = avctx->priv_data;
+    GetByteContext *gb = &ctx->gbc;
+    int ret;
+
+    ret = dxv_decompress_cocg(ctx, gb, ctx->tex_data, ctx->tex_size,
+                              ctx->op_data[0], ctx->op_data[3],
+                              ctx->op_size[0], ctx->op_size[3]);
+    if (ret < 0)
+        return ret;
+
+    return dxv_decompress_cocg(ctx, gb, ctx->ctex_data, ctx->ctex_size,
+                               ctx->op_data[1], ctx->op_data[2],
+                               ctx->op_size[1], ctx->op_size[2]);
+}
+
 static int dxv_decompress_dxt5(AVCodecContext *avctx)
 {
     DXVContext *ctx = avctx->priv_data;
@@ -359,6 +1045,12 @@
 
     bytestream2_init(gbc, avpkt->data, avpkt->size);
 
+    ctx->texture_block_h = 4;
+    ctx->texture_block_w = 4;
+
+    avctx->pix_fmt = AV_PIX_FMT_RGBA;
+    avctx->colorspace = AVCOL_SPC_RGB;
+
     tag = bytestream2_get_le32(gbc);
     switch (tag) {
     case MKBETAG('D', 'X', 'T', '1'):
@@ -378,9 +1070,39 @@
         msgtext = "DXT5";
         break;
     case MKBETAG('Y', 'C', 'G', '6'):
+        decompress_tex = dxv_decompress_ycg6;
+        ctx->tex_funct_planar[0] = yo_block;
+        ctx->tex_funct_planar[1] = cocg_block;
+        ctx->tex_rat   = 8;
+        ctx->tex_step  = 32;
+        ctx->ctex_step = 16;
+        msgcomp = "YOCOCG6";
+        msgtext = "YCG6";
+        ctx->ctex_size = avctx->coded_width * avctx->coded_height / 4;
+        ctx->texture_block_h = 4;
+        ctx->texture_block_w = 16;
+        ctx->ctexture_block_h = 4;
+        ctx->ctexture_block_w = 4;
+        avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+        avctx->colorspace = AVCOL_SPC_YCOCG;
+        break;
     case MKBETAG('Y', 'G', '1', '0'):
-        avpriv_report_missing_feature(avctx, "Tag 0x%08"PRIX32, tag);
-        return AVERROR_PATCHWELCOME;
+        decompress_tex = dxv_decompress_yg10;
+        ctx->tex_funct_planar[0] = yao_block;
+        ctx->tex_funct_planar[1] = cocg_block;
+        ctx->tex_rat   = 4;
+        ctx->tex_step  = 64;
+        ctx->ctex_step = 16;
+        msgcomp = "YAOCOCG10";
+        msgtext = "YG10";
+        ctx->ctex_size = avctx->coded_width * avctx->coded_height / 4;
+        ctx->texture_block_h = 4;
+        ctx->texture_block_w = 16;
+        ctx->ctexture_block_h = 4;
+        ctx->ctexture_block_w = 4;
+        avctx->pix_fmt = AV_PIX_FMT_YUVA420P;
+        avctx->colorspace = AVCOL_SPC_YCOCG;
+        break;
     default:
         /* Old version does not have a real header, just size and type. */
         size = tag & 0x00FFFFFF;
@@ -413,6 +1135,10 @@
         break;
     }
 
+    ctx->slice_count = av_clip(avctx->thread_count, 1,
+                               avctx->coded_height / FFMAX(ctx->texture_block_h,
+                                                           ctx->ctexture_block_h));
+
     /* New header is 12 bytes long. */
     if (!old_type) {
         version_major = bytestream2_get_byte(gbc) - 1;
@@ -440,10 +1166,28 @@
     }
 
     ctx->tex_size = avctx->coded_width * avctx->coded_height * 4 / ctx->tex_rat;
-    ret = av_reallocp(&ctx->tex_data, ctx->tex_size);
+    ret = av_reallocp(&ctx->tex_data, ctx->tex_size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (ret < 0)
         return ret;
 
+    if (ctx->ctex_size) {
+        int i;
+
+        ctx->op_size[0] = avctx->coded_width * avctx->coded_height / 16;
+        ctx->op_size[1] = avctx->coded_width * avctx->coded_height / 32;
+        ctx->op_size[2] = avctx->coded_width * avctx->coded_height / 32;
+        ctx->op_size[3] = avctx->coded_width * avctx->coded_height / 16;
+
+        ret = av_reallocp(&ctx->ctex_data, ctx->ctex_size + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (ret < 0)
+            return ret;
+        for (i = 0; i < 4; i++) {
+            ret = av_reallocp(&ctx->op_data[i], ctx->op_size[i]);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
     /* Decompress texture out of the intermediate compression. */
     ret = decompress_tex(avctx);
     if (ret < 0)
@@ -482,10 +1226,6 @@
     avctx->coded_height = FFALIGN(avctx->height, 16);
 
     ff_texturedsp_init(&ctx->texdsp);
-    avctx->pix_fmt = AV_PIX_FMT_RGBA;
-
-    ctx->slice_count = av_clip(avctx->thread_count, 1,
-                               avctx->coded_height / TEXTURE_BLOCK_H);
 
     return 0;
 }
@@ -495,6 +1235,11 @@
     DXVContext *ctx = avctx->priv_data;
 
     av_freep(&ctx->tex_data);
+    av_freep(&ctx->ctex_data);
+    av_freep(&ctx->op_data[0]);
+    av_freep(&ctx->op_data[1]);
+    av_freep(&ctx->op_data[2]);
+    av_freep(&ctx->op_data[3]);
 
     return 0;
 }

diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
index afcd361..3241611 100644
--- a/libavcodec/dxva2.c
+++ b/libavcodec/dxva2.c

@@ -29,6 +29,7 @@
 #include "libavutil/time.h"
 
 #include "avcodec.h"
+#include "decode.h"
 #include "dxva2_internal.h"
 
 /* define all the GUIDs used directly here,
@@ -43,6 +44,7 @@
 DEFINE_GUID(ff_DXVA2_ModeHEVC_VLD_Main,  0x5b11d51b, 0x2f4c,0x4452,0xbc,0xc3,0x09,0xf2,0xa1,0x16,0x0c,0xc0);
 DEFINE_GUID(ff_DXVA2_ModeHEVC_VLD_Main10,0x107af0e0, 0xef1a,0x4d19,0xab,0xa8,0x67,0xa1,0x63,0x07,0x3d,0x13);
 DEFINE_GUID(ff_DXVA2_ModeVP9_VLD_Profile0,0x463707f8,0xa1d0,0x4585,0x87,0x6d,0x83,0xaa,0x6d,0x60,0xb8,0x9e);
+DEFINE_GUID(ff_DXVA2_ModeVP9_VLD_10bit_Profile2,0xa4c749ef,0x6ecf,0x48aa,0x84,0x48,0x50,0xa7,0xa1,0x16,0x5f,0xf7);
 DEFINE_GUID(ff_DXVA2_NoEncrypt,          0x1b81beD0, 0xa0c7,0x11d3,0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5);
 DEFINE_GUID(ff_GUID_NULL,                0x00000000, 0x0000,0x0000,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00);
 DEFINE_GUID(ff_IID_IDirectXVideoDecoderService, 0xfc51a551,0xd5e7,0x11d9,0xaf,0x55,0x00,0x05,0x4e,0x43,0xff,0x02);
@@ -66,6 +68,10 @@
                                         FF_PROFILE_UNKNOWN};
 static const int prof_hevc_main10[]  = {FF_PROFILE_HEVC_MAIN_10,
                                         FF_PROFILE_UNKNOWN};
+static const int prof_vp9_profile0[] = {FF_PROFILE_VP9_0,
+                                        FF_PROFILE_UNKNOWN};
+static const int prof_vp9_profile2[] = {FF_PROFILE_VP9_2,
+                                        FF_PROFILE_UNKNOWN};
 
 static const dxva_mode dxva_modes[] = {
     /* MPEG-2 */
@@ -89,7 +95,8 @@
     { &ff_DXVA2_ModeHEVC_VLD_Main,   AV_CODEC_ID_HEVC, prof_hevc_main },
 
     /* VP8/9 */
-    { &ff_DXVA2_ModeVP9_VLD_Profile0,AV_CODEC_ID_VP9 },
+    { &ff_DXVA2_ModeVP9_VLD_Profile0,       AV_CODEC_ID_VP9, prof_vp9_profile0 },
+    { &ff_DXVA2_ModeVP9_VLD_10bit_Profile2, AV_CODEC_ID_VP9, prof_vp9_profile2 },
 
     { NULL,                          0 },
 };
@@ -576,14 +583,20 @@
 #endif
 }
 
-// This must work before the decoder is created.
-// This somehow needs to be exported to the user.
-static void dxva_adjust_hwframes(AVCodecContext *avctx, AVHWFramesContext *frames_ctx)
+int ff_dxva2_common_frame_params(AVCodecContext *avctx,
+                                 AVBufferRef *hw_frames_ctx)
 {
-    FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
+    AVHWFramesContext *frames_ctx = (AVHWFramesContext *)hw_frames_ctx->data;
+    AVHWDeviceContext *device_ctx = frames_ctx->device_ctx;
     int surface_alignment, num_surfaces;
 
-    frames_ctx->format = sctx->pix_fmt;
+    if (device_ctx->type == AV_HWDEVICE_TYPE_DXVA2) {
+        frames_ctx->format = AV_PIX_FMT_DXVA2_VLD;
+    } else if (device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
+        frames_ctx->format = AV_PIX_FMT_D3D11;
+    } else {
+        return AVERROR(EINVAL);
+    }
 
     /* decoding MPEG-2 requires additional alignment on some Intel GPUs,
     but it causes issues for H.264 on certain AMD GPUs..... */
@@ -596,8 +609,8 @@
     else
         surface_alignment = 16;
 
-    /* 4 base work surfaces */
-    num_surfaces = 4;
+    /* 1 base work surface */
+    num_surfaces = 1;
 
     /* add surfaces based on number of possible refs */
     if (avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_HEVC)
@@ -607,10 +620,6 @@
     else
         num_surfaces += 2;
 
-    /* add extra surfaces for frame threading */
-    if (avctx->active_thread_type & FF_THREAD_FRAME)
-        num_surfaces += avctx->thread_count;
-
     frames_ctx->sw_format = avctx->sw_pix_fmt == AV_PIX_FMT_YUV420P10 ?
                             AV_PIX_FMT_P010 : AV_PIX_FMT_NV12;
     frames_ctx->width = FFALIGN(avctx->coded_width, surface_alignment);
@@ -633,12 +642,16 @@
         frames_hwctx->BindFlags |= D3D11_BIND_DECODER;
     }
 #endif
+
+    return 0;
 }
 
 int ff_dxva2_decode_init(AVCodecContext *avctx)
 {
     FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
-    AVHWFramesContext *frames_ctx = NULL;
+    AVHWFramesContext *frames_ctx;
+    enum AVHWDeviceType dev_type = avctx->hwaccel->pix_fmt == AV_PIX_FMT_DXVA2_VLD
+                            ? AV_HWDEVICE_TYPE_DXVA2 : AV_HWDEVICE_TYPE_D3D11VA;
     int ret = 0;
 
     // Old API.
@@ -648,32 +661,14 @@
     // (avctx->pix_fmt is not updated yet at this point)
     sctx->pix_fmt = avctx->hwaccel->pix_fmt;
 
-    if (!avctx->hw_frames_ctx && !avctx->hw_device_ctx) {
-        av_log(avctx, AV_LOG_ERROR, "Either a hw_frames_ctx or a hw_device_ctx needs to be set for hardware decoding.\n");
-        return AVERROR(EINVAL);
-    }
+    ret = ff_decode_get_hw_frames_ctx(avctx, dev_type);
+    if (ret < 0)
+        return ret;
 
-    if (avctx->hw_frames_ctx) {
-        frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
-    } else {
-        avctx->hw_frames_ctx = av_hwframe_ctx_alloc(avctx->hw_device_ctx);
-        if (!avctx->hw_frames_ctx)
-            return AVERROR(ENOMEM);
-
-        frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
-
-        dxva_adjust_hwframes(avctx, frames_ctx);
-
-        ret = av_hwframe_ctx_init(avctx->hw_frames_ctx);
-        if (ret < 0)
-            goto fail;
-    }
-
+    frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
     sctx->device_ctx = frames_ctx->device_ctx;
 
-    if (frames_ctx->format != sctx->pix_fmt ||
-        !((sctx->pix_fmt == AV_PIX_FMT_D3D11 && CONFIG_D3D11VA) ||
-          (sctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD && CONFIG_DXVA2))) {
+    if (frames_ctx->format != sctx->pix_fmt) {
         av_log(avctx, AV_LOG_ERROR, "Invalid pixfmt for hwaccel!\n");
         ret = AVERROR(EINVAL);
         goto fail;

diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c
index e3a3f78..5b23b28 100644
--- a/libavcodec/dxva2_h264.c
+++ b/libavcodec/dxva2_h264.c

@@ -22,16 +22,12 @@
 
 #include "libavutil/avassert.h"
 
+#include "dxva2_internal.h"
 #include "h264dec.h"
 #include "h264data.h"
 #include "h264_ps.h"
 #include "mpegutils.h"
 
-// The headers above may include w32threads.h, which uses the original
-// _WIN32_WINNT define, while dxva2_internal.h redefines it to target a
-// potentially newer version.
-#include "dxva2_internal.h"
-
 struct dxva2_picture_context {
     DXVA_PicParams_H264   pp;
     DXVA_Qmatrix_H264     qm;
@@ -518,7 +514,7 @@
 }
 
 #if CONFIG_H264_DXVA2_HWACCEL
-AVHWAccel ff_h264_dxva2_hwaccel = {
+const AVHWAccel ff_h264_dxva2_hwaccel = {
     .name           = "h264_dxva2",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H264,
@@ -528,13 +524,14 @@
     .start_frame    = dxva2_h264_start_frame,
     .decode_slice   = dxva2_h264_decode_slice,
     .end_frame      = dxva2_h264_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
 #if CONFIG_H264_D3D11VA_HWACCEL
-AVHWAccel ff_h264_d3d11va_hwaccel = {
+const AVHWAccel ff_h264_d3d11va_hwaccel = {
     .name           = "h264_d3d11va",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H264,
@@ -544,13 +541,14 @@
     .start_frame    = dxva2_h264_start_frame,
     .decode_slice   = dxva2_h264_decode_slice,
     .end_frame      = dxva2_h264_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
 #if CONFIG_H264_D3D11VA2_HWACCEL
-AVHWAccel ff_h264_d3d11va2_hwaccel = {
+const AVHWAccel ff_h264_d3d11va2_hwaccel = {
     .name           = "h264_d3d11va2",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H264,
@@ -560,6 +558,7 @@
     .start_frame    = dxva2_h264_start_frame,
     .decode_slice   = dxva2_h264_decode_slice,
     .end_frame      = dxva2_h264_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };

diff --git a/libavcodec/dxva2_hevc.c b/libavcodec/dxva2_hevc.c
index 88f887a..dbb701f 100644
--- a/libavcodec/dxva2_hevc.c
+++ b/libavcodec/dxva2_hevc.c

@@ -22,14 +22,10 @@
 
 #include "libavutil/avassert.h"
 
+#include "dxva2_internal.h"
 #include "hevc_data.h"
 #include "hevcdec.h"
 
-// The headers above may include w32threads.h, which uses the original
-// _WIN32_WINNT define, while dxva2_internal.h redefines it to target a
-// potentially newer version.
-#include "dxva2_internal.h"
-
 #define MAX_SLICES 256
 
 struct hevc_dxva2_picture_context {
@@ -422,7 +418,7 @@
 }
 
 #if CONFIG_HEVC_DXVA2_HWACCEL
-AVHWAccel ff_hevc_dxva2_hwaccel = {
+const AVHWAccel ff_hevc_dxva2_hwaccel = {
     .name           = "hevc_dxva2",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_HEVC,
@@ -432,13 +428,14 @@
     .start_frame    = dxva2_hevc_start_frame,
     .decode_slice   = dxva2_hevc_decode_slice,
     .end_frame      = dxva2_hevc_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct hevc_dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
 #if CONFIG_HEVC_D3D11VA_HWACCEL
-AVHWAccel ff_hevc_d3d11va_hwaccel = {
+const AVHWAccel ff_hevc_d3d11va_hwaccel = {
     .name           = "hevc_d3d11va",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_HEVC,
@@ -448,13 +445,14 @@
     .start_frame    = dxva2_hevc_start_frame,
     .decode_slice   = dxva2_hevc_decode_slice,
     .end_frame      = dxva2_hevc_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct hevc_dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
 #if CONFIG_HEVC_D3D11VA2_HWACCEL
-AVHWAccel ff_hevc_d3d11va2_hwaccel = {
+const AVHWAccel ff_hevc_d3d11va2_hwaccel = {
     .name           = "hevc_d3d11va2",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_HEVC,
@@ -464,6 +462,7 @@
     .start_frame    = dxva2_hevc_start_frame,
     .decode_slice   = dxva2_hevc_decode_slice,
     .end_frame      = dxva2_hevc_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct hevc_dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };

diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h
index 352a9db..b822af5 100644
--- a/libavcodec/dxva2_internal.h
+++ b/libavcodec/dxva2_internal.h

@@ -104,9 +104,9 @@
 #if CONFIG_D3D11VA && CONFIG_DXVA2
 #define DXVA_CONTEXT_WORKAROUND(avctx, ctx)     (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.workaround : ctx->dxva2.workaround)
 #define DXVA_CONTEXT_COUNT(avctx, ctx)          (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.surface_count : ctx->dxva2.surface_count)
-#define DXVA_CONTEXT_DECODER(avctx, ctx)        (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.decoder : ctx->dxva2.decoder)
+#define DXVA_CONTEXT_DECODER(avctx, ctx)        (ff_dxva2_is_d3d11(avctx) ? (void *)ctx->d3d11va.decoder : (void *)ctx->dxva2.decoder)
 #define DXVA_CONTEXT_REPORT_ID(avctx, ctx)      (*(ff_dxva2_is_d3d11(avctx) ? &ctx->d3d11va.report_id : &ctx->dxva2.report_id))
-#define DXVA_CONTEXT_CFG(avctx, ctx)            (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.cfg : ctx->dxva2.cfg)
+#define DXVA_CONTEXT_CFG(avctx, ctx)            (ff_dxva2_is_d3d11(avctx) ? (void *)ctx->d3d11va.cfg : (void *)ctx->dxva2.cfg)
 #define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx)  (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.cfg->ConfigBitstreamRaw : ctx->dxva2.cfg->ConfigBitstreamRaw)
 #define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.cfg->ConfigIntraResidUnsigned : ctx->dxva2.cfg->ConfigIntraResidUnsigned)
 #define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.cfg->ConfigResidDiffAccelerator : ctx->dxva2.cfg->ConfigResidDiffAccelerator)
@@ -156,6 +156,9 @@
 
 int ff_dxva2_decode_uninit(AVCodecContext *avctx);
 
+int ff_dxva2_common_frame_params(AVCodecContext *avctx,
+                                 AVBufferRef *hw_frames_ctx);
+
 int ff_dxva2_is_d3d11(const AVCodecContext *avctx);
 
 #endif /* AVCODEC_DXVA2_INTERNAL_H */

diff --git a/libavcodec/dxva2_mpeg2.c b/libavcodec/dxva2_mpeg2.c
index b7c6937..8cc21bf 100644
--- a/libavcodec/dxva2_mpeg2.c
+++ b/libavcodec/dxva2_mpeg2.c

@@ -21,14 +21,11 @@
  */
 
 #include "libavutil/log.h"
+
+#include "dxva2_internal.h"
 #include "mpegutils.h"
 #include "mpegvideo.h"
 
-// The headers above may include w32threads.h, which uses the original
-// _WIN32_WINNT define, while dxva2_internal.h redefines it to target a
-// potentially newer version.
-#include "dxva2_internal.h"
-
 #define MAX_SLICES 1024
 struct dxva2_picture_context {
     DXVA_PictureParameters pp;
@@ -317,7 +314,7 @@
 }
 
 #if CONFIG_MPEG2_DXVA2_HWACCEL
-AVHWAccel ff_mpeg2_dxva2_hwaccel = {
+const AVHWAccel ff_mpeg2_dxva2_hwaccel = {
     .name           = "mpeg2_dxva2",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG2VIDEO,
@@ -327,13 +324,14 @@
     .start_frame    = dxva2_mpeg2_start_frame,
     .decode_slice   = dxva2_mpeg2_decode_slice,
     .end_frame      = dxva2_mpeg2_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
 #if CONFIG_MPEG2_D3D11VA_HWACCEL
-AVHWAccel ff_mpeg2_d3d11va_hwaccel = {
+const AVHWAccel ff_mpeg2_d3d11va_hwaccel = {
     .name           = "mpeg2_d3d11va",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG2VIDEO,
@@ -343,13 +341,14 @@
     .start_frame    = dxva2_mpeg2_start_frame,
     .decode_slice   = dxva2_mpeg2_decode_slice,
     .end_frame      = dxva2_mpeg2_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
 #if CONFIG_MPEG2_D3D11VA2_HWACCEL
-AVHWAccel ff_mpeg2_d3d11va2_hwaccel = {
+const AVHWAccel ff_mpeg2_d3d11va2_hwaccel = {
     .name           = "mpeg2_d3d11va2",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG2VIDEO,
@@ -359,6 +358,7 @@
     .start_frame    = dxva2_mpeg2_start_frame,
     .decode_slice   = dxva2_mpeg2_decode_slice,
     .end_frame      = dxva2_mpeg2_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };

diff --git a/libavcodec/dxva2_vc1.c b/libavcodec/dxva2_vc1.c
index e5353cd..f08ac8b 100644
--- a/libavcodec/dxva2_vc1.c
+++ b/libavcodec/dxva2_vc1.c

@@ -20,16 +20,13 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "dxva2_internal.h"
 #include "mpegutils.h"
 #include "vc1.h"
 #include "vc1data.h"
 
-// The headers above may include w32threads.h, which uses the original
-// _WIN32_WINNT define, while dxva2_internal.h redefines it to target a
-// potentially newer version.
-#include "dxva2_internal.h"
-
 #define MAX_SLICES 1024
+
 struct dxva2_picture_context {
     DXVA_PictureParameters pp;
     unsigned               slice_count;
@@ -378,7 +375,7 @@
 }
 
 #if CONFIG_WMV3_DXVA2_HWACCEL
-AVHWAccel ff_wmv3_dxva2_hwaccel = {
+const AVHWAccel ff_wmv3_dxva2_hwaccel = {
     .name           = "wmv3_dxva2",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_WMV3,
@@ -388,13 +385,14 @@
     .start_frame    = dxva2_vc1_start_frame,
     .decode_slice   = dxva2_vc1_decode_slice,
     .end_frame      = dxva2_vc1_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
 #if CONFIG_VC1_DXVA2_HWACCEL
-AVHWAccel ff_vc1_dxva2_hwaccel = {
+const AVHWAccel ff_vc1_dxva2_hwaccel = {
     .name           = "vc1_dxva2",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_VC1,
@@ -404,13 +402,14 @@
     .start_frame    = dxva2_vc1_start_frame,
     .decode_slice   = dxva2_vc1_decode_slice,
     .end_frame      = dxva2_vc1_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
 #if CONFIG_WMV3_D3D11VA_HWACCEL
-AVHWAccel ff_wmv3_d3d11va_hwaccel = {
+const AVHWAccel ff_wmv3_d3d11va_hwaccel = {
     .name           = "wmv3_d3d11va",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_WMV3,
@@ -420,13 +419,14 @@
     .start_frame    = dxva2_vc1_start_frame,
     .decode_slice   = dxva2_vc1_decode_slice,
     .end_frame      = dxva2_vc1_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
 #if CONFIG_WMV3_D3D11VA2_HWACCEL
-AVHWAccel ff_wmv3_d3d11va2_hwaccel = {
+const AVHWAccel ff_wmv3_d3d11va2_hwaccel = {
     .name           = "wmv3_d3d11va2",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_WMV3,
@@ -436,13 +436,14 @@
     .start_frame    = dxva2_vc1_start_frame,
     .decode_slice   = dxva2_vc1_decode_slice,
     .end_frame      = dxva2_vc1_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
 #if CONFIG_VC1_D3D11VA_HWACCEL
-AVHWAccel ff_vc1_d3d11va_hwaccel = {
+const AVHWAccel ff_vc1_d3d11va_hwaccel = {
     .name           = "vc1_d3d11va",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_VC1,
@@ -452,13 +453,14 @@
     .start_frame    = dxva2_vc1_start_frame,
     .decode_slice   = dxva2_vc1_decode_slice,
     .end_frame      = dxva2_vc1_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
 #if CONFIG_VC1_D3D11VA2_HWACCEL
-AVHWAccel ff_vc1_d3d11va2_hwaccel = {
+const AVHWAccel ff_vc1_d3d11va2_hwaccel = {
     .name           = "vc1_d3d11va2",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_VC1,
@@ -468,6 +470,7 @@
     .start_frame    = dxva2_vc1_start_frame,
     .decode_slice   = dxva2_vc1_decode_slice,
     .end_frame      = dxva2_vc1_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };

diff --git a/libavcodec/dxva2_vp9.c b/libavcodec/dxva2_vp9.c
index 6d87fdd..eaeab3a 100644
--- a/libavcodec/dxva2_vp9.c
+++ b/libavcodec/dxva2_vp9.c

@@ -23,12 +23,8 @@
 #include "libavutil/avassert.h"
 #include "libavutil/pixdesc.h"
 
-#include "vp9shared.h"
-
-// The headers above may include w32threads.h, which uses the original
-// _WIN32_WINNT define, while dxva2_internal.h redefines it to target a
-// potentially newer version.
 #include "dxva2_internal.h"
+#include "vp9shared.h"
 
 struct vp9_dxva2_picture_context {
     DXVA_PicParams_VP9    pp;
@@ -309,7 +305,7 @@
 }
 
 #if CONFIG_VP9_DXVA2_HWACCEL
-AVHWAccel ff_vp9_dxva2_hwaccel = {
+const AVHWAccel ff_vp9_dxva2_hwaccel = {
     .name           = "vp9_dxva2",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_VP9,
@@ -319,13 +315,14 @@
     .start_frame    = dxva2_vp9_start_frame,
     .decode_slice   = dxva2_vp9_decode_slice,
     .end_frame      = dxva2_vp9_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct vp9_dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
 #if CONFIG_VP9_D3D11VA_HWACCEL
-AVHWAccel ff_vp9_d3d11va_hwaccel = {
+const AVHWAccel ff_vp9_d3d11va_hwaccel = {
     .name           = "vp9_d3d11va",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_VP9,
@@ -335,13 +332,14 @@
     .start_frame    = dxva2_vp9_start_frame,
     .decode_slice   = dxva2_vp9_decode_slice,
     .end_frame      = dxva2_vp9_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct vp9_dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
 #if CONFIG_VP9_D3D11VA2_HWACCEL
-AVHWAccel ff_vp9_d3d11va2_hwaccel = {
+const AVHWAccel ff_vp9_d3d11va2_hwaccel = {
     .name           = "vp9_d3d11va2",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_VP9,
@@ -351,6 +349,7 @@
     .start_frame    = dxva2_vp9_start_frame,
     .decode_slice   = dxva2_vp9_decode_slice,
     .end_frame      = dxva2_vp9_end_frame,
+    .frame_params   = ff_dxva2_common_frame_params,
     .frame_priv_data_size = sizeof(struct vp9_dxva2_picture_context),
     .priv_data_size = sizeof(FFDXVASharedContext),
 };

diff --git a/libavcodec/eac3_core_bsf.c b/libavcodec/eac3_core_bsf.c
new file mode 100644
index 0000000..3e4dc2e
--- /dev/null
+++ b/libavcodec/eac3_core_bsf.c

@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "bsf.h"
+#include "get_bits.h"
+#include "ac3_parser_internal.h"
+
+static int eac3_core_filter(AVBSFContext *ctx, AVPacket *pkt)
+{
+    AC3HeaderInfo hdr;
+    GetBitContext gbc;
+    int ret;
+
+    ret = ff_bsf_get_packet_ref(ctx, pkt);
+    if (ret < 0)
+        return ret;
+    ret = init_get_bits8(&gbc, pkt->data, pkt->size);
+    if (ret < 0)
+        goto fail;
+
+    ret = ff_ac3_parse_header(&gbc, &hdr);
+    if (ret < 0) {
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    if (hdr.frame_type == EAC3_FRAME_TYPE_INDEPENDENT ||
+        hdr.frame_type == EAC3_FRAME_TYPE_AC3_CONVERT) {
+        pkt->size = FFMIN(hdr.frame_size, pkt->size);
+    } else if (hdr.frame_type == EAC3_FRAME_TYPE_DEPENDENT && pkt->size > hdr.frame_size) {
+        AC3HeaderInfo hdr2;
+
+        ret = init_get_bits8(&gbc, pkt->data + hdr.frame_size, pkt->size - hdr.frame_size);
+        if (ret < 0)
+            goto fail;
+
+        ret = ff_ac3_parse_header(&gbc, &hdr2);
+        if (ret < 0) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        if (hdr2.frame_type == EAC3_FRAME_TYPE_INDEPENDENT ||
+            hdr2.frame_type == EAC3_FRAME_TYPE_AC3_CONVERT) {
+            pkt->size -= hdr.frame_size;
+            pkt->data += hdr.frame_size;
+        } else {
+            pkt->size = 0;
+        }
+    } else {
+        pkt->size = 0;
+    }
+
+    return 0;
+fail:
+    av_packet_unref(pkt);
+    return ret;
+}
+
+static const enum AVCodecID codec_ids[] = {
+    AV_CODEC_ID_EAC3, AV_CODEC_ID_NONE,
+};
+
+const AVBitStreamFilter ff_eac3_core_bsf = {
+    .name      = "eac3_core",
+    .filter    = eac3_core_filter,
+    .codec_ids = codec_ids,
+};

diff --git a/libavcodec/eac3dec.c b/libavcodec/eac3dec.c
index c971879..73067de 100644
--- a/libavcodec/eac3dec.c
+++ b/libavcodec/eac3dec.c

@@ -48,7 +48,6 @@
 #include "internal.h"
 #include "aac_ac3_parser.h"
 #include "ac3.h"
-#include "ac3_parser.h"
 #include "ac3dec.h"
 #include "ac3dec_data.h"
 #include "eac3_data.h"
@@ -304,13 +303,7 @@
     /* An E-AC-3 stream can have multiple independent streams which the
        application can select from. each independent stream can also contain
        dependent streams which are used to add or replace channels. */
-    if (s->frame_type == EAC3_FRAME_TYPE_DEPENDENT) {
-        if (!s->eac3_frame_dependent_found) {
-            s->eac3_frame_dependent_found = 1;
-            avpriv_request_sample(s->avctx, "Dependent substream decoding");
-        }
-        return AAC_AC3_PARSE_ERROR_FRAME_TYPE;
-    } else if (s->frame_type == EAC3_FRAME_TYPE_RESERVED) {
+    if (s->frame_type == EAC3_FRAME_TYPE_RESERVED) {
         av_log(s->avctx, AV_LOG_ERROR, "Reserved frame type\n");
         return AAC_AC3_PARSE_ERROR_FRAME_TYPE;
     }
@@ -356,7 +349,18 @@
     /* dependent stream channel map */
     if (s->frame_type == EAC3_FRAME_TYPE_DEPENDENT) {
         if (get_bits1(gbc)) {
-            skip_bits(gbc, 16); // skip custom channel map
+            int64_t channel_layout = 0;
+            int channel_map = get_bits(gbc, 16);
+            av_log(s->avctx, AV_LOG_DEBUG, "channel_map: %0X\n", channel_map);
+
+            for (i = 0; i < 16; i++)
+                if (channel_map & (1 << (EAC3_MAX_CHANNELS - i - 1)))
+                    channel_layout |= ff_eac3_custom_channel_map_locations[i][1];
+
+            if (av_popcount64(channel_layout) > EAC3_MAX_CHANNELS) {
+                return AVERROR_INVALIDDATA;
+            }
+            s->channel_map = channel_map;
         }
     }
 

diff --git a/libavcodec/elsdec.c b/libavcodec/elsdec.c
index 4797965..cb0e9c6 100644
--- a/libavcodec/elsdec.c
+++ b/libavcodec/elsdec.c

@@ -271,7 +271,7 @@
 
 void ff_els_decoder_uninit(ElsUnsignedRung *rung)
 {
-    av_free(rung->rem_rung_list);
+    av_freep(&rung->rem_rung_list);
 }
 
 static int els_import_byte(ElsDecCtx *ctx)
@@ -391,12 +391,10 @@
                 if (ur->rung_list_size <= (ur->avail_index + 2) * sizeof(ElsRungNode)) {
                     // remember rung_node position
                     ptrdiff_t pos     = rung_node - ur->rem_rung_list;
-                    ur->rem_rung_list = av_realloc(ur->rem_rung_list,
+                    ctx->err = av_reallocp(&ur->rem_rung_list,
                                                    ur->rung_list_size +
                                                    RUNG_SPACE);
-                    if (!ur->rem_rung_list) {
-                        av_free(ur->rem_rung_list);
-                        ctx->err = AVERROR(ENOMEM);
+                    if (ctx->err < 0) {
                         return 0;
                     }
                     memset((uint8_t *) ur->rem_rung_list + ur->rung_list_size, 0,

diff --git a/libavcodec/encode.c b/libavcodec/encode.c
index c961dba..d12c425 100644
--- a/libavcodec/encode.c
+++ b/libavcodec/encode.c

@@ -135,7 +135,6 @@
 
     if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY) && !frame) {
         av_packet_unref(avpkt);
-        av_init_packet(avpkt);
         return 0;
     }
 
@@ -223,12 +222,9 @@
             avpkt->buf      = user_pkt.buf;
             avpkt->data     = user_pkt.data;
         } else if (!avpkt->buf) {
-            AVPacket tmp = { 0 };
-            ret = av_packet_ref(&tmp, avpkt);
-            av_packet_unref(avpkt);
+            ret = av_packet_make_refcounted(avpkt);
             if (ret < 0)
                 goto end;
-            *avpkt = tmp;
         }
     }
 
@@ -238,13 +234,12 @@
             if (ret >= 0)
                 avpkt->data = avpkt->buf->data;
         }
-
-        avctx->frame_number++;
+        if (frame)
+            avctx->frame_number++;
     }
 
     if (ret < 0 || !*got_packet_ptr) {
         av_packet_unref(avpkt);
-        av_init_packet(avpkt);
         goto end;
     }
 
@@ -257,10 +252,6 @@
     av_frame_free(&padded_frame);
     av_free(extended_frame);
 
-#if FF_API_AUDIOENC_DELAY
-    avctx->delay = avctx->initial_padding;
-#endif
-
     return ret;
 }
 
@@ -289,8 +280,6 @@
 
     if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY) && !frame) {
         av_packet_unref(avpkt);
-        av_init_packet(avpkt);
-        avpkt->size = 0;
         return 0;
     }
 
@@ -322,12 +311,9 @@
             avpkt->buf      = user_pkt.buf;
             avpkt->data     = user_pkt.data;
         } else if (!avpkt->buf) {
-            AVPacket tmp = { 0 };
-            ret = av_packet_ref(&tmp, avpkt);
-            av_packet_unref(avpkt);
+            ret = av_packet_make_refcounted(avpkt);
             if (ret < 0)
                 return ret;
-            *avpkt = tmp;
         }
     }
 
@@ -343,7 +329,8 @@
                 avpkt->data = avpkt->buf->data;
         }
 
-        avctx->frame_number++;
+        if (frame)
+            avctx->frame_number++;
     }
 
     if (ret < 0 || !*got_packet_ptr)

diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c
index 5364940..1abae53 100644
--- a/libavcodec/error_resilience.c
+++ b/libavcodec/error_resilience.c

@@ -27,7 +27,6 @@
 
 #include <limits.h>
 
-#include "libavutil/atomic.h"
 #include "libavutil/internal.h"
 #include "avcodec.h"
 #include "error_resilience.h"
@@ -108,7 +107,7 @@
             dc = -prev_dc +
                  data[x     + y * stride] * 8 -
                  data[x + 1 + y * stride];
-            dc = (dc * 10923 + 32768) >> 16;
+            dc = (av_clip(dc, INT_MIN/10923, INT_MAX/10923 - 32768) * 10923 + 32768) >> 16;
             prev_dc = data[x + y * stride];
             data[x + y * stride] = dc;
         }
@@ -124,7 +123,7 @@
             dc = -prev_dc +
                  data[x +  y      * stride] * 8 -
                  data[x + (y + 1) * stride];
-            dc = (dc * 10923 + 32768) >> 16;
+            dc = (av_clip(dc, INT_MIN/10923, INT_MAX/10923 - 32768) * 10923 + 32768) >> 16;
             prev_dc = data[x + y * stride];
             data[x + y * stride] = dc;
         }
@@ -807,16 +806,13 @@
 
     memset(s->error_status_table, ER_MB_ERROR | VP_START | ER_MB_END,
            s->mb_stride * s->mb_height * sizeof(uint8_t));
-    s->error_count    = 3 * s->mb_num;
+    atomic_init(&s->error_count, 3 * s->mb_num);
     s->error_occurred = 0;
 }
 
 static int er_supported(ERContext *s)
 {
     if(s->avctx->hwaccel && s->avctx->hwaccel->decode_slice           ||
-#if FF_API_CAP_VDPAU
-       s->avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU          ||
-#endif
        !s->cur_pic.f                                                  ||
        s->cur_pic.field_picture
     )
@@ -855,20 +851,20 @@
     mask &= ~VP_START;
     if (status & (ER_AC_ERROR | ER_AC_END)) {
         mask           &= ~(ER_AC_ERROR | ER_AC_END);
-        avpriv_atomic_int_add_and_fetch(&s->error_count, start_i - end_i - 1);
+        atomic_fetch_add(&s->error_count, start_i - end_i - 1);
     }
     if (status & (ER_DC_ERROR | ER_DC_END)) {
         mask           &= ~(ER_DC_ERROR | ER_DC_END);
-        avpriv_atomic_int_add_and_fetch(&s->error_count, start_i - end_i - 1);
+        atomic_fetch_add(&s->error_count, start_i - end_i - 1);
     }
     if (status & (ER_MV_ERROR | ER_MV_END)) {
         mask           &= ~(ER_MV_ERROR | ER_MV_END);
-        avpriv_atomic_int_add_and_fetch(&s->error_count, start_i - end_i - 1);
+        atomic_fetch_add(&s->error_count, start_i - end_i - 1);
     }
 
     if (status & ER_MB_ERROR) {
         s->error_occurred = 1;
-        avpriv_atomic_int_set(&s->error_count, INT_MAX);
+        atomic_store(&s->error_count, INT_MAX);
     }
 
     if (mask == ~0x7F) {
@@ -881,7 +877,7 @@
     }
 
     if (end_i == s->mb_num)
-        avpriv_atomic_int_set(&s->error_count, INT_MAX);
+        atomic_store(&s->error_count, INT_MAX);
     else {
         s->error_status_table[end_xy] &= mask;
         s->error_status_table[end_xy] |= status;
@@ -896,7 +892,7 @@
         prev_status &= ~ VP_START;
         if (prev_status != (ER_MV_END | ER_DC_END | ER_AC_END)) {
             s->error_occurred = 1;
-            avpriv_atomic_int_set(&s->error_count, INT_MAX);
+            atomic_store(&s->error_count, INT_MAX);
         }
     }
 }
@@ -913,10 +909,10 @@
 
     /* We do not support ER of field pictures yet,
      * though it should not crash if enabled. */
-    if (!s->avctx->error_concealment || s->error_count == 0            ||
+    if (!s->avctx->error_concealment || !atomic_load(&s->error_count)  ||
         s->avctx->lowres                                               ||
         !er_supported(s)                                               ||
-        s->error_count == 3 * s->mb_width *
+        atomic_load(&s->error_count) == 3 * s->mb_width *
                           (s->avctx->skip_top + s->avctx->skip_bottom)) {
         return;
     }
@@ -930,7 +926,7 @@
     if (   mb_x == s->mb_width
         && s->avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO
         && (FFALIGN(s->avctx->height, 16)&16)
-        && s->error_count == 3 * s->mb_width * (s->avctx->skip_top + s->avctx->skip_bottom + 1)
+        && atomic_load(&s->error_count) == 3 * s->mb_width * (s->avctx->skip_top + s->avctx->skip_bottom + 1)
     ) {
         av_log(s->avctx, AV_LOG_DEBUG, "ignoring last missing slice\n");
         return;

diff --git a/libavcodec/error_resilience.h b/libavcodec/error_resilience.h
index 27c2008..664a765 100644
--- a/libavcodec/error_resilience.h
+++ b/libavcodec/error_resilience.h

@@ -20,6 +20,7 @@
 #define AVCODEC_ERROR_RESILIENCE_H
 
 #include <stdint.h>
+#include <stdatomic.h>
 
 #include "avcodec.h"
 #include "me_cmp.h"
@@ -60,7 +61,7 @@
     ptrdiff_t mb_stride;
     ptrdiff_t b8_stride;
 
-    volatile int error_count;
+    atomic_int error_count;
     int error_occurred;
     uint8_t *error_status_table;
     uint8_t *er_temp_buffer;

diff --git a/libavcodec/escape124.c b/libavcodec/escape124.c
index eb051eb..cffd3e1 100644
--- a/libavcodec/escape124.c
+++ b/libavcodec/escape124.c

@@ -221,7 +221,11 @@
 
     // This call also guards the potential depth reads for the
     // codebook unpacking.
-    if (get_bits_left(&gb) < 64)
+    // Check if the amount we will read minimally is available on input.
+    // The 64 represent the immediately next 2 frame_* elements read, the 23/4320
+    // represent a lower bound of the space needed for skipped superblocks. Non
+    // skipped SBs need more space.
+    if (get_bits_left(&gb) < 64 + s->num_superblocks * 23LL / 4320)
         return -1;
 
     frame_flags = get_bits_long(&gb, 32);

diff --git a/libavcodec/exif.c b/libavcodec/exif.c
index 07ce174..2874772 100644
--- a/libavcodec/exif.c
+++ b/libavcodec/exif.c

@@ -92,7 +92,7 @@
     // store metadata or proceed with next IFD
     ret = ff_tis_ifd(id);
     if (ret) {
-        ret = avpriv_exif_decode_ifd(logctx, gbytes, le, depth + 1, metadata);
+        ret = ff_exif_decode_ifd(logctx, gbytes, le, depth + 1, metadata);
     } else {
         const char *name = exif_get_tag_name(id);
         char *use_name   = (char*) name;
@@ -119,8 +119,8 @@
 }
 
 
-int avpriv_exif_decode_ifd(void *logctx, GetByteContext *gbytes, int le,
-                           int depth, AVDictionary **metadata)
+int ff_exif_decode_ifd(void *logctx, GetByteContext *gbytes,
+                       int le, int depth, AVDictionary **metadata)
 {
     int i, ret;
     int entries;
@@ -140,3 +140,13 @@
     // return next IDF offset or 0x000000000 or a value < 0 for failure
     return ff_tget_long(gbytes, le);
 }
+
+int avpriv_exif_decode_ifd(void *logctx, const uint8_t *buf, int size,
+                           int le, int depth, AVDictionary **metadata)
+{
+    GetByteContext gb;
+
+    bytestream2_init(&gb, buf, size);
+
+    return ff_exif_decode_ifd(logctx, &gb, le, depth, metadata);
+}

diff --git a/libavcodec/exif.h b/libavcodec/exif.h
index 5f09208..05af756 100644
--- a/libavcodec/exif.h
+++ b/libavcodec/exif.h

@@ -164,7 +164,10 @@
 
 /** Recursively decodes all IFD's and
  *  adds included TAGS into the metadata dictionary. */
-int avpriv_exif_decode_ifd(void *logctx, GetByteContext *gbytes, int le,
-                           int depth, AVDictionary **metadata);
+int avpriv_exif_decode_ifd(void *logctx, const uint8_t *buf, int size,
+                           int le, int depth, AVDictionary **metadata);
+
+int ff_exif_decode_ifd(void *logctx, GetByteContext *gbytes, int le,
+                       int depth, AVDictionary **metadata);
 
 #endif /* AVCODEC_EXIF_H */

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index f08576a..5253cc3 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c

@@ -558,7 +558,7 @@
     while (lc > 0) {
         const HufDec pl = hdecod[(c << (HUF_DECBITS - lc)) & HUF_DECMASK];
 
-        if (pl.len) {
+        if (pl.len && lc >= pl.len) {
             lc -= pl.len;
             get_code(pl.lit, rlc, c, lc, gb, out, oe, outb);
         } else {
@@ -899,7 +899,7 @@
 
 static void unpack_14(const uint8_t b[14], uint16_t s[16])
 {
-    unsigned short shift = (b[ 2] >> 2);
+    unsigned short shift = (b[ 2] >> 2) & 15;
     unsigned short bias = (0x20 << shift);
     int i;
 
@@ -1350,12 +1350,14 @@
 
     flags = bytestream2_get_le24(&s->gb);
 
-    if (flags == 0x00)
-        s->is_tile = 0;
-    else if (flags & 0x02)
+    if (flags & 0x02)
         s->is_tile = 1;
-    else{
-        avpriv_report_missing_feature(s->avctx, "flags %d", flags);
+    if (flags & 0x08) {
+        avpriv_report_missing_feature(s->avctx, "deep data");
+        return AVERROR_PATCHWELCOME;
+    }
+    if (flags & 0x10) {
+        avpriv_report_missing_feature(s->avctx, "multipart");
         return AVERROR_PATCHWELCOME;
     }
 

diff --git a/libavcodec/extract_extradata_bsf.c b/libavcodec/extract_extradata_bsf.c
index d40907a..9c9fb0c 100644
--- a/libavcodec/extract_extradata_bsf.c
+++ b/libavcodec/extract_extradata_bsf.c

@@ -24,6 +24,8 @@
 #include "libavutil/opt.h"
 
 #include "avcodec.h"
+#include "av1.h"
+#include "av1_parse.h"
 #include "bsf.h"
 #include "h2645_parse.h"
 #include "h264.h"
@@ -36,6 +38,12 @@
     int (*extract)(AVBSFContext *ctx, AVPacket *pkt,
                    uint8_t **data, int *size);
 
+    /* AV1 specifc fields */
+    AV1Packet av1_pkt;
+
+    /* H264/HEVC specifc fields */
+    H2645Packet h2645_pkt;
+
     /* AVOptions */
     int remove;
 } ExtractExtradataContext;
@@ -49,6 +57,80 @@
     return 0;
 }
 
+static int extract_extradata_av1(AVBSFContext *ctx, AVPacket *pkt,
+                                 uint8_t **data, int *size)
+{
+    static const int extradata_obu_types[] = {
+        AV1_OBU_SEQUENCE_HEADER, AV1_OBU_METADATA,
+    };
+    ExtractExtradataContext *s = ctx->priv_data;
+
+    int extradata_size = 0, filtered_size = 0;
+    int nb_extradata_obu_types = FF_ARRAY_ELEMS(extradata_obu_types);
+    int i, has_seq = 0, ret = 0;
+
+    ret = ff_av1_packet_split(&s->av1_pkt, pkt->data, pkt->size, ctx);
+    if (ret < 0)
+        return ret;
+
+    for (i = 0; i < s->av1_pkt.nb_obus; i++) {
+        AV1OBU *obu = &s->av1_pkt.obus[i];
+        if (val_in_array(extradata_obu_types, nb_extradata_obu_types, obu->type)) {
+            extradata_size += obu->raw_size;
+            if (obu->type == AV1_OBU_SEQUENCE_HEADER)
+                has_seq = 1;
+        } else if (s->remove) {
+            filtered_size += obu->raw_size;
+        }
+    }
+
+    if (extradata_size && has_seq) {
+        AVBufferRef *filtered_buf;
+        uint8_t *extradata, *filtered_data;
+
+        if (s->remove) {
+            filtered_buf = av_buffer_alloc(filtered_size + AV_INPUT_BUFFER_PADDING_SIZE);
+            if (!filtered_buf) {
+                return AVERROR(ENOMEM);
+            }
+            memset(filtered_buf->data + filtered_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+            filtered_data = filtered_buf->data;
+        }
+
+        extradata = av_malloc(extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!extradata) {
+            av_buffer_unref(&filtered_buf);
+            return AVERROR(ENOMEM);
+        }
+        memset(extradata + extradata_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+        *data = extradata;
+        *size = extradata_size;
+
+        for (i = 0; i < s->av1_pkt.nb_obus; i++) {
+            AV1OBU *obu = &s->av1_pkt.obus[i];
+            if (val_in_array(extradata_obu_types, nb_extradata_obu_types,
+                             obu->type)) {
+                memcpy(extradata, obu->raw_data, obu->raw_size);
+                extradata += obu->raw_size;
+            } else if (s->remove) {
+                memcpy(filtered_data, obu->raw_data, obu->raw_size);
+                filtered_data += obu->raw_size;
+            }
+        }
+
+        if (s->remove) {
+            av_buffer_unref(&pkt->buf);
+            pkt->buf  = filtered_buf;
+            pkt->data = filtered_buf->data;
+            pkt->size = filtered_size;
+        }
+    }
+
+    return 0;
+}
+
 static int extract_extradata_h2645(AVBSFContext *ctx, AVPacket *pkt,
                                    uint8_t **data, int *size)
 {
@@ -61,8 +143,7 @@
 
     ExtractExtradataContext *s = ctx->priv_data;
 
-    H2645Packet h2645_pkt = { 0 };
-    int extradata_size = 0;
+    int extradata_size = 0, filtered_size = 0;
     const int *extradata_nal_types;
     int nb_extradata_nal_types;
     int i, has_sps = 0, has_vps = 0, ret = 0;
@@ -75,13 +156,13 @@
         nb_extradata_nal_types = FF_ARRAY_ELEMS(extradata_nal_types_h264);
     }
 
-    ret = ff_h2645_packet_split(&h2645_pkt, pkt->data, pkt->size,
+    ret = ff_h2645_packet_split(&s->h2645_pkt, pkt->data, pkt->size,
                                 ctx, 0, 0, ctx->par_in->codec_id, 1);
     if (ret < 0)
         goto fail;
 
-    for (i = 0; i < h2645_pkt.nb_nals; i++) {
-        H2645NAL *nal = &h2645_pkt.nals[i];
+    for (i = 0; i < s->h2645_pkt.nb_nals; i++) {
+        H2645NAL *nal = &s->h2645_pkt.nals[i];
         if (val_in_array(extradata_nal_types, nb_extradata_nal_types, nal->type)) {
             extradata_size += nal->raw_size + 3;
             if (ctx->par_in->codec_id == AV_CODEC_ID_HEVC) {
@@ -90,6 +171,8 @@
             } else {
                 if (nal->type == H264_NAL_SPS) has_sps = 1;
             }
+        } else if (s->remove) {
+            filtered_size += nal->raw_size + 3;
         }
     }
 
@@ -100,26 +183,27 @@
         uint8_t *extradata, *filtered_data;
 
         if (s->remove) {
-            filtered_buf = av_buffer_alloc(pkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
+            filtered_buf = av_buffer_alloc(filtered_size + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!filtered_buf) {
-                ret = AVERROR(ENOMEM);
-                goto fail;
+                return AVERROR(ENOMEM);
             }
+            memset(filtered_buf->data + filtered_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
             filtered_data = filtered_buf->data;
         }
 
         extradata = av_malloc(extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!extradata) {
             av_buffer_unref(&filtered_buf);
-            ret = AVERROR(ENOMEM);
-            goto fail;
+            return AVERROR(ENOMEM);
         }
+        memset(extradata + extradata_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
         *data = extradata;
         *size = extradata_size;
 
-        for (i = 0; i < h2645_pkt.nb_nals; i++) {
-            H2645NAL *nal = &h2645_pkt.nals[i];
+        for (i = 0; i < s->h2645_pkt.nb_nals; i++) {
+            H2645NAL *nal = &s->h2645_pkt.nals[i];
             if (val_in_array(extradata_nal_types, nb_extradata_nal_types,
                              nal->type)) {
                 AV_WB24(extradata, 1); // startcode
@@ -136,13 +220,11 @@
             av_buffer_unref(&pkt->buf);
             pkt->buf  = filtered_buf;
             pkt->data = filtered_buf->data;
-            pkt->size = filtered_data - filtered_buf->data;
+            pkt->size = filtered_size;
         }
     }
 
-fail:
-    ff_h2645_packet_uninit(&h2645_pkt);
-    return ret;
+    return 0;
 }
 
 static int extract_extradata_vc1(AVBSFContext *ctx, AVPacket *pkt,
@@ -169,6 +251,7 @@
             return AVERROR(ENOMEM);
 
         memcpy(*data, pkt->data, extradata_size);
+        memset(*data + extradata_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
         *size = extradata_size;
 
         if (s->remove) {
@@ -199,6 +282,7 @@
                     return AVERROR(ENOMEM);
 
                 memcpy(*data, pkt->data, *size);
+                memset(*data + *size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
                 if (s->remove) {
                     pkt->data += *size;
@@ -228,6 +312,7 @@
                     return AVERROR(ENOMEM);
 
                 memcpy(*data, pkt->data, *size);
+                memset(*data + *size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
                 if (s->remove) {
                     pkt->data += *size;
@@ -245,6 +330,8 @@
     int (*extract)(AVBSFContext *ctx, AVPacket *pkt,
                    uint8_t **data, int *size);
 } extract_tab[] = {
+    { AV_CODEC_ID_AV1,        extract_extradata_av1     },
+    { AV_CODEC_ID_AVS2,       extract_extradata_mpeg4   },
     { AV_CODEC_ID_CAVS,       extract_extradata_mpeg4   },
     { AV_CODEC_ID_H264,       extract_extradata_h2645   },
     { AV_CODEC_ID_HEVC,       extract_extradata_h2645   },
@@ -271,24 +358,23 @@
     return 0;
 }
 
-static int extract_extradata_filter(AVBSFContext *ctx, AVPacket *out)
+static int extract_extradata_filter(AVBSFContext *ctx, AVPacket *pkt)
 {
     ExtractExtradataContext *s = ctx->priv_data;
-    AVPacket *in;
     uint8_t *extradata = NULL;
     int extradata_size;
     int ret = 0;
 
-    ret = ff_bsf_get_packet(ctx, &in);
+    ret = ff_bsf_get_packet_ref(ctx, pkt);
     if (ret < 0)
         return ret;
 
-    ret = s->extract(ctx, in, &extradata, &extradata_size);
+    ret = s->extract(ctx, pkt, &extradata, &extradata_size);
     if (ret < 0)
         goto fail;
 
     if (extradata) {
-        ret = av_packet_add_side_data(in, AV_PKT_DATA_NEW_EXTRADATA,
+        ret = av_packet_add_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA,
                                       extradata, extradata_size);
         if (ret < 0) {
             av_freep(&extradata);
@@ -296,14 +382,23 @@
         }
     }
 
-    av_packet_move_ref(out, in);
+    return 0;
 
 fail:
-    av_packet_free(&in);
+    av_packet_unref(pkt);
     return ret;
 }
 
+static void extract_extradata_close(AVBSFContext *ctx)
+{
+    ExtractExtradataContext *s = ctx->priv_data;
+    ff_av1_packet_uninit(&s->av1_pkt);
+    ff_h2645_packet_uninit(&s->h2645_pkt);
+}
+
 static const enum AVCodecID codec_ids[] = {
+    AV_CODEC_ID_AV1,
+    AV_CODEC_ID_AVS2,
     AV_CODEC_ID_CAVS,
     AV_CODEC_ID_H264,
     AV_CODEC_ID_HEVC,
@@ -315,9 +410,10 @@
 };
 
 #define OFFSET(x) offsetof(ExtractExtradataContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_BSF_PARAM)
 static const AVOption options[] = {
     { "remove", "remove the extradata from the bitstream", OFFSET(remove), AV_OPT_TYPE_INT,
-        { .i64 = 0 }, 0, 1 },
+        { .i64 = 0 }, 0, 1, FLAGS },
     { NULL },
 };
 
@@ -335,4 +431,5 @@
     .priv_class     = &extract_extradata_class,
     .init           = extract_extradata_init,
     .filter         = extract_extradata_filter,
+    .close          = extract_extradata_close,
 };

diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
index 6c77854..762c014 100644
--- a/libavcodec/fft_template.c
+++ b/libavcodec/fft_template.c

@@ -523,9 +523,11 @@
 }
 
 PASS(pass)
+#if !CONFIG_SMALL
 #undef BUTTERFLIES
 #define BUTTERFLIES BUTTERFLIES_BIG
 PASS(pass_big)
+#endif
 
 #define DECL_FFT(n,n2,n4)\
 static void fft##n(FFTComplex *z)\

diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index 5eadb6b..261e0cf 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c

@@ -336,14 +336,16 @@
          decode_plane(fs, p->data[0] + ps*x + y*p->linesize[0]    , width, height, p->linesize[0], 0, 2);
          decode_plane(fs, p->data[0] + ps*x + y*p->linesize[0] + 1, width, height, p->linesize[0], 1, 2);
     } else if (f->use32bit) {
-        uint8_t *planes[3] = { p->data[0] + ps * x + y * p->linesize[0],
+        uint8_t *planes[4] = { p->data[0] + ps * x + y * p->linesize[0],
                                p->data[1] + ps * x + y * p->linesize[1],
-                               p->data[2] + ps * x + y * p->linesize[2] };
+                               p->data[2] + ps * x + y * p->linesize[2],
+                               p->data[3] + ps * x + y * p->linesize[3] };
         decode_rgb_frame32(fs, planes, width, height, p->linesize);
     } else {
-        uint8_t *planes[3] = { p->data[0] + ps * x + y * p->linesize[0],
+        uint8_t *planes[4] = { p->data[0] + ps * x + y * p->linesize[0],
                                p->data[1] + ps * x + y * p->linesize[1],
-                               p->data[2] + ps * x + y * p->linesize[2] };
+                               p->data[2] + ps * x + y * p->linesize[2],
+                               p->data[3] + ps * x + y * p->linesize[3] };
         decode_rgb_frame(fs, planes, width, height, p->linesize);
     }
     if (fs->ac != AC_GOLOMB_RICE && f->version > 2) {
@@ -544,8 +546,14 @@
         f->ac = get_symbol(c, state, 0);
 
         if (f->ac == AC_RANGE_CUSTOM_TAB) {
-            for (i = 1; i < 256; i++)
-                f->state_transition[i] = get_symbol(c, state, 1) + c->one_state[i];
+            for (i = 1; i < 256; i++) {
+                int st = get_symbol(c, state, 1) + c->one_state[i];
+                if (st < 1 || st > 255) {
+                    av_log(f->avctx, AV_LOG_ERROR, "invalid state transition %d\n", st);
+                    return AVERROR_INVALIDDATA;
+                }
+                f->state_transition[i] = st;
+            }
         }
 
         colorspace          = get_symbol(c, state, 0); //YUV cs type
@@ -589,7 +597,10 @@
         if (!f->transparency && !f->chroma_planes) {
             if (f->avctx->bits_per_raw_sample <= 8)
                 f->avctx->pix_fmt = AV_PIX_FMT_GRAY8;
-            else if (f->avctx->bits_per_raw_sample == 10) {
+            else if (f->avctx->bits_per_raw_sample == 9) {
+                f->packed_at_lsb = 1;
+                f->avctx->pix_fmt = AV_PIX_FMT_GRAY9;
+            } else if (f->avctx->bits_per_raw_sample == 10) {
                 f->packed_at_lsb = 1;
                 f->avctx->pix_fmt = AV_PIX_FMT_GRAY10;
             } else if (f->avctx->bits_per_raw_sample == 12) {
@@ -640,6 +651,7 @@
             f->packed_at_lsb = 1;
             switch(16 * f->chroma_h_shift + f->chroma_v_shift) {
             case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUV444P10; break;
+            case 0x01: f->avctx->pix_fmt = AV_PIX_FMT_YUV440P10; break;
             case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUV422P10; break;
             case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUV420P10; break;
             }
@@ -654,9 +666,17 @@
             f->packed_at_lsb = 1;
             switch(16 * f->chroma_h_shift + f->chroma_v_shift) {
             case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUV444P12; break;
+            case 0x01: f->avctx->pix_fmt = AV_PIX_FMT_YUV440P12; break;
             case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUV422P12; break;
             case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUV420P12; break;
             }
+        } else if (f->avctx->bits_per_raw_sample == 14 && !f->transparency) {
+            f->packed_at_lsb = 1;
+            switch(16 * f->chroma_h_shift + f->chroma_v_shift) {
+            case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUV444P14; break;
+            case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUV422P14; break;
+            case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUV420P14; break;
+            }
         } else if (f->avctx->bits_per_raw_sample == 16 && !f->transparency){
             f->packed_at_lsb = 1;
             switch(16 * f->chroma_h_shift + f->chroma_v_shift) {
@@ -686,14 +706,22 @@
             f->avctx->pix_fmt = AV_PIX_FMT_GBRP9;
         else if (f->avctx->bits_per_raw_sample == 10 && !f->transparency)
             f->avctx->pix_fmt = AV_PIX_FMT_GBRP10;
+        else if (f->avctx->bits_per_raw_sample == 10 && f->transparency)
+            f->avctx->pix_fmt = AV_PIX_FMT_GBRAP10;
         else if (f->avctx->bits_per_raw_sample == 12 && !f->transparency)
             f->avctx->pix_fmt = AV_PIX_FMT_GBRP12;
+        else if (f->avctx->bits_per_raw_sample == 12 && f->transparency)
+            f->avctx->pix_fmt = AV_PIX_FMT_GBRAP12;
         else if (f->avctx->bits_per_raw_sample == 14 && !f->transparency)
             f->avctx->pix_fmt = AV_PIX_FMT_GBRP14;
         else if (f->avctx->bits_per_raw_sample == 16 && !f->transparency) {
             f->avctx->pix_fmt = AV_PIX_FMT_GBRP16;
             f->use32bit = 1;
         }
+        else if (f->avctx->bits_per_raw_sample == 16 && f->transparency) {
+            f->avctx->pix_fmt = AV_PIX_FMT_GBRAP16;
+            f->use32bit = 1;
+        }
     } else {
         av_log(f->avctx, AV_LOG_ERROR, "colorspace not supported\n");
         return AVERROR(ENOSYS);
@@ -928,7 +956,7 @@
 
             }
             if (desc->flags & AV_PIX_FMT_FLAG_PAL ||
-                desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) {
+                desc->flags & FF_PSEUDOPAL) {
                 dst[1] = p->data[1];
                 src[1] = f->last_picture.f->data[1];
             }

diff --git a/libavcodec/ffv1dec_template.c b/libavcodec/ffv1dec_template.c
index 37df766..25032fe 100644
--- a/libavcodec/ffv1dec_template.c
+++ b/libavcodec/ffv1dec_template.c

@@ -50,6 +50,11 @@
     for (x = 0; x < w; x++) {
         int diff, context, sign;
 
+        if (!(x & 1023)) {
+            if (is_input_end(s))
+                return AVERROR_INVALIDDATA;
+        }
+
         context = RENAME(get_context)(p, sample[1] + x, sample[0] + x, sample[1] + x);
         if (context < 0) {
             context = -context;
@@ -107,13 +112,14 @@
     return 0;
 }
 
-static void RENAME(decode_rgb_frame)(FFV1Context *s, uint8_t *src[3], int w, int h, int stride[3])
+static int RENAME(decode_rgb_frame)(FFV1Context *s, uint8_t *src[4], int w, int h, int stride[4])
 {
     int x, y, p;
     TYPE *sample[4][2];
     int lbd    = s->avctx->bits_per_raw_sample <= 8;
     int bits   = s->avctx->bits_per_raw_sample > 0 ? s->avctx->bits_per_raw_sample : 8;
     int offset = 1 << bits;
+    int transparency = s->transparency;
 
     for (x = 0; x < 4; x++) {
         sample[x][0] = RENAME(s->sample_buffer) +  x * 2      * (w + 6) + 3;
@@ -125,7 +131,8 @@
     memset(RENAME(s->sample_buffer), 0, 8 * (w + 6) * sizeof(*RENAME(s->sample_buffer)));
 
     for (y = 0; y < h; y++) {
-        for (p = 0; p < 3 + s->transparency; p++) {
+        for (p = 0; p < 3 + transparency; p++) {
+            int ret;
             TYPE *temp = sample[p][0]; // FIXME: try a normal buffer
 
             sample[p][0] = sample[p][1];
@@ -134,9 +141,11 @@
             sample[p][1][-1]= sample[p][0][0  ];
             sample[p][0][ w]= sample[p][0][w-1];
             if (lbd && s->slice_coding_mode == 0)
-                RENAME(decode_line)(s, w, sample[p], (p + 1)/2, 9);
+                ret = RENAME(decode_line)(s, w, sample[p], (p + 1)/2, 9);
             else
-                RENAME(decode_line)(s, w, sample[p], (p + 1)/2, bits + (s->slice_coding_mode != 1));
+                ret = RENAME(decode_line)(s, w, sample[p], (p + 1)/2, bits + (s->slice_coding_mode != 1));
+            if (ret < 0)
+                return ret;
         }
         for (x = 0; x < w; x++) {
             int g = sample[0][1][x];
@@ -154,10 +163,12 @@
 
             if (lbd)
                 *((uint32_t*)(src[0] + x*4 + stride[0]*y)) = b + ((unsigned)g<<8) + ((unsigned)r<<16) + ((unsigned)a<<24);
-            else if (sizeof(TYPE) == 4) {
+            else if (sizeof(TYPE) == 4 || transparency) {
                 *((uint16_t*)(src[0] + x*2 + stride[0]*y)) = g;
                 *((uint16_t*)(src[1] + x*2 + stride[1]*y)) = b;
                 *((uint16_t*)(src[2] + x*2 + stride[2]*y)) = r;
+                if (transparency)
+                    *((uint16_t*)(src[3] + x*2 + stride[3]*y)) = a;
             } else {
                 *((uint16_t*)(src[0] + x*2 + stride[0]*y)) = b;
                 *((uint16_t*)(src[1] + x*2 + stride[1]*y)) = g;
@@ -165,4 +176,5 @@
             }
         }
     }
+    return 0;
 }

diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index c9a885e..f5eb0fe 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c

@@ -123,7 +123,7 @@
      40,  40,  41,  79,  43,  44,  45,  45, 48,   48,  64,  50,  51,  52,  88,  52,
      53,  74,  55,  57,  58,  58,  74,  60, 101,  61,  62,  84,  66,  66,  68,  69,
      87,  82,  71,  97,  73,  73,  82,  75, 111,  77,  94,  78,  87,  81,  83,  97,
-     85,  83,  94,  86,  99,  89,  90,  99, 111,  92,  93,  134, 95,  98,  105, 98,
+     85,  83,  94,  86,  99,  89,  90,  99, 111,  92,  93,  134, 95,  98, 105,  98,
     105, 110, 102, 108, 102, 118, 103, 106, 106, 113, 109, 112, 114, 112, 116, 125,
     115, 116, 117, 117, 126, 119, 125, 121, 121, 123, 145, 124, 126, 131, 127, 129,
     165, 130, 132, 138, 133, 135, 145, 136, 137, 139, 146, 141, 143, 142, 144, 148,
@@ -539,6 +539,10 @@
         s->ec = (s->version >= 3);
     }
 
+    // CRC requires version 3+
+    if (s->ec)
+        s->version = FFMAX(s->version, 3);
+
     if ((s->version == 2 || s->version>3) && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
         av_log(avctx, AV_LOG_ERROR, "Version 2 needed for requested features but version 2 is experimental and not enabled\n");
         return AVERROR_INVALIDDATA;
@@ -558,6 +562,7 @@
 
     s->plane_count = 3;
     switch(avctx->pix_fmt) {
+    case AV_PIX_FMT_GRAY9:
     case AV_PIX_FMT_YUV444P9:
     case AV_PIX_FMT_YUV422P9:
     case AV_PIX_FMT_YUV420P9:
@@ -568,6 +573,7 @@
             s->bits_per_raw_sample = 9;
     case AV_PIX_FMT_GRAY10:
     case AV_PIX_FMT_YUV444P10:
+    case AV_PIX_FMT_YUV440P10:
     case AV_PIX_FMT_YUV420P10:
     case AV_PIX_FMT_YUV422P10:
     case AV_PIX_FMT_YUVA444P10:
@@ -577,11 +583,17 @@
             s->bits_per_raw_sample = 10;
     case AV_PIX_FMT_GRAY12:
     case AV_PIX_FMT_YUV444P12:
+    case AV_PIX_FMT_YUV440P12:
     case AV_PIX_FMT_YUV420P12:
     case AV_PIX_FMT_YUV422P12:
-        s->packed_at_lsb = 1;
         if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample)
             s->bits_per_raw_sample = 12;
+    case AV_PIX_FMT_YUV444P14:
+    case AV_PIX_FMT_YUV420P14:
+    case AV_PIX_FMT_YUV422P14:
+        if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample)
+            s->bits_per_raw_sample = 14;
+        s->packed_at_lsb = 1;
     case AV_PIX_FMT_GRAY16:
     case AV_PIX_FMT_YUV444P16:
     case AV_PIX_FMT_YUV422P16:
@@ -612,7 +624,7 @@
     case AV_PIX_FMT_YUVA420P:
         s->chroma_planes = desc->nb_components < 3 ? 0 : 1;
         s->colorspace = 0;
-        s->transparency = desc->nb_components == 4 || desc->nb_components == 2;
+        s->transparency = !!(desc->flags & AV_PIX_FMT_FLAG_ALPHA);
         if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample)
             s->bits_per_raw_sample = 8;
         else if (!s->bits_per_raw_sample)
@@ -624,16 +636,20 @@
         s->chroma_planes = 1;
         s->bits_per_raw_sample = 8;
         break;
+    case AV_PIX_FMT_RGBA64:
+        s->colorspace = 1;
+        s->transparency = 1;
+        s->chroma_planes = 1;
+        s->bits_per_raw_sample = 16;
+        s->use32bit = 1;
+        s->version = FFMAX(s->version, 1);
+        break;
     case AV_PIX_FMT_RGB48:
         s->colorspace = 1;
         s->chroma_planes = 1;
         s->bits_per_raw_sample = 16;
         s->use32bit = 1;
         s->version = FFMAX(s->version, 1);
-        if (avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
-            av_log(avctx, AV_LOG_ERROR, "16bit RGB is experimental and under development, only use it for experiments\n");
-            return AVERROR_INVALIDDATA;
-        }
         break;
     case AV_PIX_FMT_0RGB32:
         s->colorspace = 1;
@@ -644,27 +660,27 @@
         if (!avctx->bits_per_raw_sample)
             s->bits_per_raw_sample = 9;
     case AV_PIX_FMT_GBRP10:
+    case AV_PIX_FMT_GBRAP10:
         if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample)
             s->bits_per_raw_sample = 10;
     case AV_PIX_FMT_GBRP12:
+    case AV_PIX_FMT_GBRAP12:
         if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample)
             s->bits_per_raw_sample = 12;
     case AV_PIX_FMT_GBRP14:
         if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample)
             s->bits_per_raw_sample = 14;
     case AV_PIX_FMT_GBRP16:
+    case AV_PIX_FMT_GBRAP16:
         if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample)
             s->bits_per_raw_sample = 16;
         else if (!s->bits_per_raw_sample)
             s->bits_per_raw_sample = avctx->bits_per_raw_sample;
+        s->transparency = !!(desc->flags & AV_PIX_FMT_FLAG_ALPHA);
         s->colorspace = 1;
         s->chroma_planes = 1;
         if (s->bits_per_raw_sample >= 16) {
             s->use32bit = 1;
-            if (avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
-                av_log(avctx, AV_LOG_ERROR, "16bit RGB is experimental and under development, only use it for experiments\n");
-                return AVERROR_INVALIDDATA;
-            }
         }
         s->version = FFMAX(s->version, 1);
         break;
@@ -681,9 +697,6 @@
             s->ac = AC_RANGE_CUSTOM_TAB;
         }
     }
-    if (s->transparency) {
-        av_log(avctx, AV_LOG_WARNING, "Storing alpha plane, this will require a recent FFV1 decoder to playback!\n");
-    }
 #if FF_API_PRIVATE_OPT
 FF_DISABLE_DEPRECATION_WARNINGS
     if (avctx->context_model)
@@ -754,7 +767,10 @@
     if (!s->chroma_planes && s->version > 3)
         s->plane_count--;
 
-    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
+    ret = av_pix_fmt_get_chroma_sub_sample (avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
+    if (ret)
+        return ret;
+
     s->picture_number = 0;
 
     if (avctx->flags & (AV_CODEC_FLAG_PASS1 | AV_CODEC_FLAG_PASS2)) {
@@ -1029,9 +1045,10 @@
     const int ps     = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step;
     int ret;
     RangeCoder c_bak = fs->c;
-    const uint8_t *planes[3] = {p->data[0] + ps*x + y*p->linesize[0],
+    const uint8_t *planes[4] = {p->data[0] + ps*x + y*p->linesize[0],
                                 p->data[1] ? p->data[1] + ps*x + y*p->linesize[1] : NULL,
-                                p->data[2] ? p->data[2] + ps*x + y*p->linesize[2] : NULL};
+                                p->data[2] ? p->data[2] + ps*x + y*p->linesize[2] : NULL,
+                                p->data[3] ? p->data[3] + ps*x + y*p->linesize[3] : NULL};
 
     fs->slice_coding_mode = 0;
     if (f->version > 3) {
@@ -1320,9 +1337,14 @@
         AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA420P9,
         AV_PIX_FMT_GRAY16,    AV_PIX_FMT_GRAY8,     AV_PIX_FMT_GBRP9,     AV_PIX_FMT_GBRP10,
         AV_PIX_FMT_GBRP12,    AV_PIX_FMT_GBRP14,
+        AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12,
         AV_PIX_FMT_YA8,
         AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12,
         AV_PIX_FMT_GBRP16, AV_PIX_FMT_RGB48,
+        AV_PIX_FMT_GBRAP16, AV_PIX_FMT_RGBA64,
+        AV_PIX_FMT_GRAY9,
+        AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
+        AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV440P12,
         AV_PIX_FMT_NONE
 
     },

diff --git a/libavcodec/ffv1enc_template.c b/libavcodec/ffv1enc_template.c
index b7eea0d..bc0add5 100644
--- a/libavcodec/ffv1enc_template.c
+++ b/libavcodec/ffv1enc_template.c

@@ -122,8 +122,8 @@
     return 0;
 }
 
-static int RENAME(encode_rgb_frame)(FFV1Context *s, const uint8_t *src[3],
-                                    int w, int h, const int stride[3])
+static int RENAME(encode_rgb_frame)(FFV1Context *s, const uint8_t *src[4],
+                                    int w, int h, const int stride[4])
 {
     int x, y, p, i;
     const int ring_size = s->context_model ? 3 : 2;
@@ -132,6 +132,8 @@
     int packed = !src[1];
     int bits   = s->bits_per_raw_sample > 0 ? s->bits_per_raw_sample : 8;
     int offset = 1 << bits;
+    int transparency = s->transparency;
+    int packed_size = (3 + transparency)*2;
 
     s->run_index = 0;
 
@@ -152,14 +154,18 @@
                 r = (v >> 16) & 0xFF;
                 a =  v >> 24;
             } else if (packed) {
-                const uint16_t *p = ((const uint16_t*)(src[0] + x*6 + stride[0]*y));
+                const uint16_t *p = ((const uint16_t*)(src[0] + x*packed_size + stride[0]*y));
                 r = p[0];
                 g = p[1];
                 b = p[2];
-            } else if (sizeof(TYPE) == 4) {
+                if (transparency)
+                  a = p[3];
+            } else if (sizeof(TYPE) == 4 || transparency) {
                 g = *((const uint16_t *)(src[0] + x*2 + stride[0]*y));
                 b = *((const uint16_t *)(src[1] + x*2 + stride[1]*y));
                 r = *((const uint16_t *)(src[2] + x*2 + stride[2]*y));
+                if (transparency)
+                    a = *((const uint16_t *)(src[3] + x*2 + stride[3]*y));
             } else {
                 b = *((const uint16_t *)(src[0] + x*2 + stride[0]*y));
                 g = *((const uint16_t *)(src[1] + x*2 + stride[1]*y));
@@ -179,7 +185,7 @@
             sample[2][0][x] = r;
             sample[3][0][x] = a;
         }
-        for (p = 0; p < 3 + s->transparency; p++) {
+        for (p = 0; p < 3 + transparency; p++) {
             int ret;
             sample[p][0][-1] = sample[p][1][0  ];
             sample[p][1][ w] = sample[p][1][w-1];

diff --git a/libavcodec/fic.c b/libavcodec/fic.c
index d7ee370..dcf0777 100644
--- a/libavcodec/fic.c
+++ b/libavcodec/fic.c

@@ -82,6 +82,7 @@
 static const uint8_t fic_header[7] = { 0, 0, 1, 'F', 'I', 'C', 'V' };
 
 #define FIC_HEADER_SIZE 27
+#define CURSOR_OFFSET 59
 
 static av_always_inline void fic_idct(int16_t *blk, int step, int shift, int rnd)
 {
@@ -337,6 +338,10 @@
         skip_cursor = 1;
     }
 
+    if (!skip_cursor && avpkt->size < CURSOR_OFFSET + sizeof(ctx->cursor_buf)) {
+        skip_cursor = 1;
+    }
+
     /* Slice height for all but the last slice. */
     ctx->slice_h = 16 * (ctx->aligned_height >> 4) / nslices;
     if (ctx->slice_h % 16)
@@ -416,7 +421,7 @@
 
     /* Draw cursor. */
     if (!skip_cursor) {
-        memcpy(ctx->cursor_buf, src + 59, 32 * 32 * 4);
+        memcpy(ctx->cursor_buf, src + CURSOR_OFFSET, sizeof(ctx->cursor_buf));
         fic_draw_cursor(avctx, cur_x, cur_y);
     }
 
@@ -464,7 +469,7 @@
 };
 
 static const AVClass fic_decoder_class = {
-    .class_name = "FIC encoder",
+    .class_name = "FIC decoder",
     .item_name  = av_default_item_name,
     .option     = options,
     .version    = LIBAVUTIL_VERSION_INT,

diff --git a/libavcodec/filter_units_bsf.c b/libavcodec/filter_units_bsf.c
new file mode 100644
index 0000000..1ee0afd
--- /dev/null
+++ b/libavcodec/filter_units_bsf.c

@@ -0,0 +1,256 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "bsf.h"
+#include "cbs.h"
+
+
+typedef struct FilterUnitsContext {
+    const AVClass *class;
+
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment fragment;
+
+    const char *pass_types;
+    const char *remove_types;
+
+    enum {
+        NOOP,
+        PASS,
+        REMOVE,
+    } mode;
+    CodedBitstreamUnitType *type_list;
+    int nb_types;
+} FilterUnitsContext;
+
+
+static int filter_units_make_type_list(const char *list_string,
+                                       CodedBitstreamUnitType **type_list,
+                                       int *nb_types)
+{
+    CodedBitstreamUnitType *list = NULL;
+    int pass, count;
+
+    for (pass = 1; pass <= 2; pass++) {
+        long value, range_start, range_end;
+        const char *str;
+        char *value_end;
+
+        count = 0;
+        for (str = list_string; *str;) {
+            value = strtol(str, &value_end, 0);
+            if (str == value_end)
+                goto invalid;
+            str = (const char *)value_end;
+            if (*str == '-') {
+                ++str;
+                range_start = value;
+                range_end   = strtol(str, &value_end, 0);
+                if (str == value_end)
+                    goto invalid;
+
+                for (value = range_start; value < range_end; value++) {
+                    if (pass == 2)
+                        list[count] = value;
+                    ++count;
+                }
+            } else {
+                if (pass == 2)
+                    list[count] = value;
+                ++count;
+            }
+            if (*str == '|')
+                ++str;
+        }
+        if (pass == 1) {
+            list = av_malloc_array(count, sizeof(*list));
+            if (!list)
+                return AVERROR(ENOMEM);
+        }
+    }
+
+    *type_list = list;
+    *nb_types  = count;
+    return 0;
+
+invalid:
+    av_freep(&list);
+    return AVERROR(EINVAL);
+}
+
+static int filter_units_filter(AVBSFContext *bsf, AVPacket *out)
+{
+    FilterUnitsContext      *ctx = bsf->priv_data;
+    CodedBitstreamFragment *frag = &ctx->fragment;
+    AVPacket *in = NULL;
+    int err, i, j;
+
+    while (1) {
+        err = ff_bsf_get_packet(bsf, &in);
+        if (err < 0)
+            return err;
+
+        if (ctx->mode == NOOP) {
+            av_packet_move_ref(out, in);
+            av_packet_free(&in);
+            return 0;
+        }
+
+        err = ff_cbs_read_packet(ctx->cbc, frag, in);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to read packet.\n");
+            goto fail;
+        }
+
+        for (i = 0; i < frag->nb_units; i++) {
+            for (j = 0; j < ctx->nb_types; j++) {
+                if (frag->units[i].type == ctx->type_list[j])
+                    break;
+            }
+            if (ctx->mode == REMOVE ? j <  ctx->nb_types
+                                    : j >= ctx->nb_types) {
+                ff_cbs_delete_unit(ctx->cbc, frag, i);
+                --i;
+            }
+        }
+
+        if (frag->nb_units > 0)
+            break;
+
+        // Don't return packets with nothing in them.
+        av_packet_free(&in);
+        ff_cbs_fragment_uninit(ctx->cbc, frag);
+    }
+
+    err = ff_cbs_write_packet(ctx->cbc, out, frag);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
+        goto fail;
+    }
+
+    err = av_packet_copy_props(out, in);
+    if (err < 0)
+        goto fail;
+
+fail:
+    ff_cbs_fragment_uninit(ctx->cbc, frag);
+    av_packet_free(&in);
+
+    return err;
+}
+
+static int filter_units_init(AVBSFContext *bsf)
+{
+    FilterUnitsContext *ctx = bsf->priv_data;
+    int err;
+
+    if (ctx->pass_types && ctx->remove_types) {
+        av_log(bsf, AV_LOG_ERROR, "Exactly one of pass_types or "
+               "remove_types is required.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (ctx->pass_types) {
+        ctx->mode = PASS;
+        err = filter_units_make_type_list(ctx->pass_types,
+                                          &ctx->type_list, &ctx->nb_types);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to parse pass_types.\n");
+            return err;
+        }
+    } else if (ctx->remove_types) {
+        ctx->mode = REMOVE;
+        err = filter_units_make_type_list(ctx->remove_types,
+                                          &ctx->type_list, &ctx->nb_types);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to parse remove_types.\n");
+            return err;
+        }
+    } else {
+        return 0;
+    }
+
+    err = ff_cbs_init(&ctx->cbc, bsf->par_in->codec_id, bsf);
+    if (err < 0)
+        return err;
+
+    // Don't actually decompose anything, we only want the unit data.
+    ctx->cbc->decompose_unit_types    = ctx->type_list;
+    ctx->cbc->nb_decompose_unit_types = 0;
+
+    if (bsf->par_in->extradata) {
+        CodedBitstreamFragment ps;
+
+        err = ff_cbs_read_extradata(ctx->cbc, &ps, bsf->par_in);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to read extradata.\n");
+        } else {
+            err = ff_cbs_write_extradata(ctx->cbc, bsf->par_out, &ps);
+            if (err < 0)
+                av_log(bsf, AV_LOG_ERROR, "Failed to write extradata.\n");
+        }
+
+        ff_cbs_fragment_uninit(ctx->cbc, &ps);
+    }
+
+    return err;
+}
+
+static void filter_units_close(AVBSFContext *bsf)
+{
+    FilterUnitsContext *ctx = bsf->priv_data;
+
+    av_freep(&ctx->type_list);
+
+    ff_cbs_close(&ctx->cbc);
+}
+
+#define OFFSET(x) offsetof(FilterUnitsContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_BSF_PARAM)
+static const AVOption filter_units_options[] = {
+    { "pass_types",   "List of unit types to pass through the filter.",
+        OFFSET(pass_types),   AV_OPT_TYPE_STRING,
+        { .str = NULL }, .flags = FLAGS },
+    { "remove_types", "List of unit types to remove in the filter.",
+        OFFSET(remove_types), AV_OPT_TYPE_STRING,
+        { .str = NULL }, .flags = FLAGS },
+
+    { NULL }
+};
+
+static const AVClass filter_units_class = {
+    .class_name = "filter_units",
+    .item_name  = av_default_item_name,
+    .option     = filter_units_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+const AVBitStreamFilter ff_filter_units_bsf = {
+    .name           = "filter_units",
+    .priv_data_size = sizeof(FilterUnitsContext),
+    .priv_class     = &filter_units_class,
+    .init           = &filter_units_init,
+    .close          = &filter_units_close,
+    .filter         = &filter_units_filter,
+    .codec_ids      = ff_cbs_all_codec_ids,
+};

diff --git a/libavcodec/flac_parser.c b/libavcodec/flac_parser.c
index 84da23f..2721286 100644
--- a/libavcodec/flac_parser.c
+++ b/libavcodec/flac_parser.c

@@ -686,12 +686,17 @@
     }
 
     for (curr = fpc->headers; curr; curr = curr->next) {
-        if (curr->max_score > 0 &&
-            (!fpc->best_header || curr->max_score > fpc->best_header->max_score)) {
+        if (!fpc->best_header || curr->max_score > fpc->best_header->max_score) {
             fpc->best_header = curr;
         }
     }
 
+    if (fpc->best_header && fpc->best_header->max_score <= 0) {
+        // Only accept a bad header if there is no other option to continue
+        if (!buf_size || !buf || read_end != buf || fpc->nb_headers_buffered < FLAC_MIN_HEADERS)
+            fpc->best_header = NULL;
+    }
+
     if (fpc->best_header) {
         fpc->best_header_valid = 1;
         if (fpc->best_header->offset > 0) {

diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
index 3d41a1a..c8eb456 100644
--- a/libavcodec/flacdec.c
+++ b/libavcodec/flacdec.c

@@ -220,20 +220,27 @@
 
 static int decode_residuals(FLACContext *s, int32_t *decoded, int pred_order)
 {
+    GetBitContext gb = s->gb;
     int i, tmp, partition, method_type, rice_order;
     int rice_bits, rice_esc;
     int samples;
 
-    method_type = get_bits(&s->gb, 2);
+    method_type = get_bits(&gb, 2);
+    rice_order  = get_bits(&gb, 4);
+
+    samples   = s->blocksize >> rice_order;
+    rice_bits = 4 + method_type;
+    rice_esc  = (1 << rice_bits) - 1;
+
+    decoded += pred_order;
+    i        = pred_order;
+
     if (method_type > 1) {
         av_log(s->avctx, AV_LOG_ERROR, "illegal residual coding method %d\n",
                method_type);
         return AVERROR_INVALIDDATA;
     }
 
-    rice_order = get_bits(&s->gb, 4);
-
-    samples= s->blocksize >> rice_order;
     if (samples << rice_order != s->blocksize) {
         av_log(s->avctx, AV_LOG_ERROR, "invalid rice order: %i blocksize %i\n",
                rice_order, s->blocksize);
@@ -246,21 +253,16 @@
         return AVERROR_INVALIDDATA;
     }
 
-    rice_bits = 4 + method_type;
-    rice_esc  = (1 << rice_bits) - 1;
-
-    decoded += pred_order;
-    i= pred_order;
     for (partition = 0; partition < (1 << rice_order); partition++) {
-        tmp = get_bits(&s->gb, rice_bits);
+        tmp = get_bits(&gb, rice_bits);
         if (tmp == rice_esc) {
-            tmp = get_bits(&s->gb, 5);
+            tmp = get_bits(&gb, 5);
             for (; i < samples; i++)
-                *decoded++ = get_sbits_long(&s->gb, tmp);
+                *decoded++ = get_sbits_long(&gb, tmp);
         } else {
             int real_limit = tmp ? (INT_MAX >> tmp) + 2 : INT_MAX;
             for (; i < samples; i++) {
-                int v = get_sr_golomb_flac(&s->gb, tmp, real_limit, 0);
+                int v = get_sr_golomb_flac(&gb, tmp, real_limit, 0);
                 if (v == 0x80000000){
                     av_log(s->avctx, AV_LOG_ERROR, "invalid residual\n");
                     return AVERROR_INVALIDDATA;
@@ -272,6 +274,8 @@
         i= 0;
     }
 
+    s->gb = gb;
+
     return 0;
 }
 

diff --git a/libavcodec/flashsv.c b/libavcodec/flashsv.c
index 90e1d43..1dc3c71 100644
--- a/libavcodec/flashsv.c
+++ b/libavcodec/flashsv.c

@@ -551,7 +551,11 @@
 static av_cold int flashsv2_decode_init(AVCodecContext *avctx)
 {
     FlashSVContext *s = avctx->priv_data;
-    flashsv_decode_init(avctx);
+    int ret;
+
+    ret = flashsv_decode_init(avctx);
+    if (ret < 0)
+        return ret;
     s->pal = ff_flashsv2_default_palette;
     s->ver = 2;
 

diff --git a/libavcodec/fmvc.c b/libavcodec/fmvc.c
index 74e9bdd..5778d7b 100644
--- a/libavcodec/fmvc.c
+++ b/libavcodec/fmvc.c

@@ -44,11 +44,11 @@
     size_t          buffer_size;
     uint8_t        *pbuffer;
     size_t          pbuffer_size;
-    int             stride;
+    ptrdiff_t       stride;
     int             bpp;
     int             yb, xb;
     InterBlock     *blocks;
-    int             nb_blocks;
+    unsigned        nb_blocks;
 } FMVCContext;
 
 static int decode_type2(GetByteContext *gb, PutByteContext *pb)
@@ -150,7 +150,7 @@
             if (opcode >= 0x40) {
                 bytestream2_skip(gb, 1);
                 pos = - ((opcode >> 2) & 7) - 1 - 8 * bytestream2_get_byte(gb);
-                len = (opcode >> 5) - 1;
+                len =    (opcode >> 5)      - 1;
 
                 bytestream2_init(&gbc, pb->buffer_start, pb->buffer_end - pb->buffer_start);
                 bytestream2_seek(&gbc, bytestream2_tell_p(pb) + pos, SEEK_SET);
@@ -305,7 +305,7 @@
                     break;
                 opcode = bytestream2_get_byte(gb);
                 if (opcode < 0xF8) {
-                    opcode = opcode + 32;
+                    opcode += 32;
                     break;
                 }
                 i = opcode - 0xF8;
@@ -393,9 +393,8 @@
     return 0;
 }
 
-static int decode_frame(AVCodecContext *avctx,
-                        void *data, int *got_frame,
-                        AVPacket *avpkt)
+static int decode_frame(AVCodecContext *avctx, void *data,
+                        int *got_frame, AVPacket *avpkt)
 {
     FMVCContext *s = avctx->priv_data;
     GetByteContext *gb = &s->gb;
@@ -414,7 +413,7 @@
 
     if (frame->key_frame) {
         const uint8_t *src;
-        int type, size;
+        unsigned type, size;
         uint8_t *dst;
 
         type = bytestream2_get_le16(gb);
@@ -428,7 +427,7 @@
         } else if (type == 2){
             decode_type2(gb, pb);
         } else {
-            avpriv_report_missing_feature(avctx, "compression %d", type);
+            avpriv_report_missing_feature(avctx, "Compression type %d", type);
             return AVERROR_PATCHWELCOME;
         }
 
@@ -440,7 +439,8 @@
             src += s->stride * 4;
         }
     } else {
-        int block, nb_blocks, type, k, l;
+        unsigned block, nb_blocks;
+        int type, k, l;
         uint8_t *ssrc, *ddst;
         const uint32_t *src;
         uint32_t *dst;
@@ -456,7 +456,8 @@
 
         type = bytestream2_get_le16(gb);
         for (block = 0; block < nb_blocks; block++) {
-            int size, offset, start = 0;
+            unsigned size, offset;
+            int start = 0;
 
             offset = bytestream2_get_le16(gb);
             if (offset >= s->nb_blocks)
@@ -472,7 +473,7 @@
             } else if (type == 2){
                 decode_type2(gb, pb);
             } else {
-                avpriv_report_missing_feature(avctx, "compression %d", type);
+                avpriv_report_missing_feature(avctx, "Compression type %d", type);
                 return AVERROR_PATCHWELCOME;
             }
 
@@ -497,9 +498,8 @@
                 if (s->blocks[block].xor) {
                     for (k = 0; k < block_h; k++) {
                         uint32_t *column = dst;
-                        for (l = 0; l < block_w; l++) {
+                        for (l = 0; l < block_w; l++)
                             *dst++ ^= *src++;
-                        }
                         dst = &column[s->stride];
                     }
                 }
@@ -529,17 +529,24 @@
     int i, j, m, block = 0, h = BLOCK_HEIGHT, w = BLOCK_WIDTH;
 
     switch (avctx->bits_per_coded_sample) {
-    case 16: avctx->pix_fmt = AV_PIX_FMT_RGB555; break;
-    case 24: avctx->pix_fmt = AV_PIX_FMT_BGR24;  break;
-    case 32: avctx->pix_fmt = AV_PIX_FMT_BGRA;   break;
+    case 16:
+        avctx->pix_fmt = AV_PIX_FMT_RGB555LE;
+        break;
+    case 24:
+        avctx->pix_fmt = AV_PIX_FMT_BGR24;
+        break;
+    case 32:
+        avctx->pix_fmt = AV_PIX_FMT_BGRA;
+        break;
     default:
-        av_log(avctx, AV_LOG_ERROR, "Unsupported bitdepth %i\n", avctx->bits_per_coded_sample);
+        av_log(avctx, AV_LOG_ERROR, "Unsupported bitdepth %i\n",
+               avctx->bits_per_coded_sample);
         return AVERROR_INVALIDDATA;
     }
 
     s->stride = (avctx->width * avctx->bits_per_coded_sample + 31) / 32;
-    s->xb = s->stride / BLOCK_WIDTH;
-    m = s->stride % BLOCK_WIDTH;
+    s->xb     = s->stride / BLOCK_WIDTH;
+    m         = s->stride % BLOCK_WIDTH;
     if (m) {
         if (m < 37) {
             w = m + BLOCK_WIDTH;
@@ -550,7 +557,7 @@
     }
 
     s->yb = avctx->height / BLOCK_HEIGHT;
-    m = avctx->height % BLOCK_HEIGHT;
+    m     = avctx->height % BLOCK_HEIGHT;
     if (m) {
         if (m < 49) {
             h = m + BLOCK_HEIGHT;
@@ -563,8 +570,7 @@
     s->nb_blocks = s->xb * s->yb;
     if (!s->nb_blocks)
         return AVERROR_INVALIDDATA;
-
-    s->blocks = av_calloc(s->nb_blocks, sizeof(*s->blocks));
+    s->blocks    = av_calloc(s->nb_blocks, sizeof(*s->blocks));
     if (!s->blocks)
         return AVERROR(ENOMEM);
 
@@ -572,32 +578,32 @@
         for (j = 0; j < s->xb; j++) {
             if (i != (s->yb - 1) || j != (s->xb - 1)) {
                 if (i == s->yb - 1) {
-                    s->blocks[block].w = BLOCK_WIDTH;
-                    s->blocks[block].h = h;
+                    s->blocks[block].w    = BLOCK_WIDTH;
+                    s->blocks[block].h    = h;
                     s->blocks[block].size = BLOCK_WIDTH * h;
                 } else if (j == s->xb - 1) {
-                    s->blocks[block].w = w;
-                    s->blocks[block].h = BLOCK_HEIGHT;
+                    s->blocks[block].w    = w;
+                    s->blocks[block].h    = BLOCK_HEIGHT;
                     s->blocks[block].size = BLOCK_HEIGHT * w;
                 } else {
-                    s->blocks[block].w = BLOCK_WIDTH;
-                    s->blocks[block].h = BLOCK_HEIGHT;
+                    s->blocks[block].w    = BLOCK_WIDTH;
+                    s->blocks[block].h    = BLOCK_HEIGHT;
                     s->blocks[block].size = BLOCK_WIDTH * BLOCK_HEIGHT;
                 }
             } else {
-                s->blocks[block].w = w;
-                s->blocks[block].h = h;
+                s->blocks[block].w    = w;
+                s->blocks[block].h    = h;
                 s->blocks[block].size = w * h;
             }
             block++;
         }
     }
 
-    s->bpp = avctx->bits_per_coded_sample >> 3;
-    s->buffer_size = avctx->width * avctx->height * 4;
+    s->bpp          = avctx->bits_per_coded_sample >> 3;
+    s->buffer_size  = avctx->width * avctx->height * 4;
     s->pbuffer_size = avctx->width * avctx->height * 4;
-    s->buffer = av_mallocz(s->buffer_size);
-    s->pbuffer = av_mallocz(s->pbuffer_size);
+    s->buffer       = av_mallocz(s->buffer_size);
+    s->pbuffer      = av_mallocz(s->pbuffer_size);
     if (!s->buffer || !s->pbuffer)
         return AVERROR(ENOMEM);
 

diff --git a/libavcodec/frame_thread_encoder.c b/libavcodec/frame_thread_encoder.c
index 215aee9..55756c4 100644
--- a/libavcodec/frame_thread_encoder.c
+++ b/libavcodec/frame_thread_encoder.c

@@ -92,7 +92,7 @@
         pthread_mutex_unlock(&c->buffer_mutex);
         av_frame_free(&frame);
         if(got_packet) {
-            int ret2 = av_dup_packet(pkt);
+            int ret2 = av_packet_make_refcounted(pkt);
             if (ret >= 0 && ret2 < 0)
                 ret = ret2;
         } else {
@@ -251,6 +251,23 @@
          pthread_join(c->worker[i], NULL);
     }
 
+    while (av_fifo_size(c->task_fifo) > 0) {
+        Task task;
+        AVFrame *frame;
+        av_fifo_generic_read(c->task_fifo, &task, sizeof(task), NULL);
+        frame = task.indata;
+        av_frame_free(&frame);
+        task.indata = NULL;
+    }
+
+    for (i=0; i<BUFFER_SIZE; i++) {
+        if (c->finished_tasks[i].outdata != NULL) {
+            AVPacket *pkt = c->finished_tasks[i].outdata;
+            av_packet_free(&pkt);
+            c->finished_tasks[i].outdata = NULL;
+        }
+    }
+
     pthread_mutex_destroy(&c->task_fifo_mutex);
     pthread_mutex_destroy(&c->finished_task_mutex);
     pthread_mutex_destroy(&c->buffer_mutex);

diff --git a/libavcodec/g2meet.c b/libavcodec/g2meet.c
index 842095b..a1dec8d 100644
--- a/libavcodec/g2meet.c
+++ b/libavcodec/g2meet.c

@@ -28,6 +28,7 @@
 #include <inttypes.h>
 #include <zlib.h>
 
+#include "libavutil/imgutils.h"
 #include "libavutil/intreadwrite.h"
 
 #include "avcodec.h"
@@ -555,8 +556,8 @@
         B     = ((pred >> B_shift) & 0xFF) - TOSIGNED(delta);
     }
 
-    if (R<0 || G<0 || B<0) {
-        av_log(NULL, AV_LOG_ERROR, "RGB %d %d %d is out of range\n", R, G, B);
+    if (R<0 || G<0 || B<0 || R > 255 || G > 255 || B > 255) {
+        avpriv_request_sample(NULL, "RGB %d %d %d is out of range\n", R, G, B);
         return 0;
     }
 
@@ -926,6 +927,7 @@
         if (c->ec.els_ctx.err != 0) {
             av_log(avctx, AV_LOG_ERROR,
                    "ePIC: couldn't decode transparency pixel!\n");
+            ff_els_decoder_uninit(&c->ec.unsigned_rung);
             return AVERROR_INVALIDDATA;
         }
 
@@ -1354,14 +1356,16 @@
     } else {
         dst    +=  x * 3;
     }
-    if (y < 0) {
+
+    if (y < 0)
         h      +=  y;
+    if (w < 0 || h < 0)
+        return;
+    if (y < 0) {
         cursor += -y * c->cursor_stride;
     } else {
         dst    +=  y * stride;
     }
-    if (w < 0 || h < 0)
-        return;
 
     for (j = 0; j < h; j++) {
         for (i = 0; i < w; i++) {
@@ -1451,7 +1455,8 @@
             c->tile_height = bytestream2_get_be32(&bc);
             if (c->tile_width <= 0 || c->tile_height <= 0 ||
                 ((c->tile_width | c->tile_height) & 0xF) ||
-                c->tile_width * (uint64_t)c->tile_height >= INT_MAX / 4
+                c->tile_width * (uint64_t)c->tile_height >= INT_MAX / 4 ||
+                av_image_check_size2(c->tile_width, c->tile_height, avctx->max_pixels, avctx->pix_fmt, 0, avctx) < 0
             ) {
                 av_log(avctx, AV_LOG_ERROR,
                        "Invalid tile dimensions %dx%d\n",

diff --git a/libavcodec/g723_1dec.c b/libavcodec/g723_1dec.c
index c8202a9..ab952ec 100644
--- a/libavcodec/g723_1dec.c
+++ b/libavcodec/g723_1dec.c

@@ -549,7 +549,7 @@
         denom <<= bits2;
 
         bits2 = 5 + bits1 - bits2;
-        bits2 = FFMAX(0, bits2);
+        bits2 = av_clip_uintp2(bits2, 5);
 
         gain = (num >> 1) / (denom >> 16);
         gain = square_root(gain << 16 >> bits2);

diff --git a/libavcodec/gdv.c b/libavcodec/gdv.c
index dc91869..538bc38 100644
--- a/libavcodec/gdv.c
+++ b/libavcodec/gdv.c

@@ -74,53 +74,55 @@
 
 static void rescale(GDVContext *gdv, uint8_t *dst, int w, int h, int scale_v, int scale_h)
 {
-    int i, j, y, x;
+    int j, y, x;
 
     if ((gdv->scale_v == scale_v) && (gdv->scale_h == scale_h)) {
         return;
     }
 
-    if (gdv->scale_h && gdv->scale_v) {
+    if (gdv->scale_v) {
         for (j = 0; j < h; j++) {
             int y = h - j - 1;
-            for (i = 0; i < w; i++) {
-                int x = w - i - 1;
-                dst[PREAMBLE_SIZE + x + y * w] = dst[PREAMBLE_SIZE + x/2 + (y/2) * (w/2)];
+            uint8_t *dst1 = dst + PREAMBLE_SIZE + y * w;
+            uint8_t *src1 = dst + PREAMBLE_SIZE + (y>>!!gdv->scale_h) * (w>>1);
+
+            for (x = w - 1; x >= 0 && !(x&1); x--) {
+                dst1[x] = src1[(x>>1)];
+            }
+
+            for (x--; x >= 0; x-=2) {
+                dst1[x  ] =
+                dst1[x+1] = src1[(x>>1)];
             }
         }
     } else if (gdv->scale_h) {
         for (j = 0; j < h; j++) {
             int y = h - j - 1;
-            for (x = 0; x < w; x++) {
-                dst[PREAMBLE_SIZE + x + y * w] = dst[PREAMBLE_SIZE + x + (y/2) * w];
-            }
-        }
-    } else if (gdv->scale_v) {
-        for (j = 0; j < h; j++) {
-            int y = h - j - 1;
-            for (i = 0; i < w; i++) {
-                int x = w - i - 1;
-                dst[PREAMBLE_SIZE + x + y * w] = dst[PREAMBLE_SIZE + x/2 + y * (w/2)];
-            }
+            uint8_t *dst1 = dst + PREAMBLE_SIZE + y * w;
+            uint8_t *src1 = dst + PREAMBLE_SIZE + (y>>1) * w;
+            memcpy(dst1, src1, w);
         }
     }
 
     if (scale_h && scale_v) {
-        for (y = 0; y < h/2; y++) {
-            for (x = 0; x < w/2; x++) {
-                dst[PREAMBLE_SIZE + x + y * (w/2)] = dst[PREAMBLE_SIZE + x*2 + y*2 * w];
+        for (y = 0; y < (h>>1); y++) {
+            uint8_t *dst1 = dst + PREAMBLE_SIZE + y * (w>>1);
+            uint8_t *src1 = dst + PREAMBLE_SIZE + y*2 * w;
+            for (x = 0; x < (w>>1); x++) {
+                dst1[x] = src1[x*2];
             }
         }
     } else if (scale_h) {
-        for (y = 0; y < h/2; y++) {
-            for (x = 0; x < w; x++) {
-                dst[PREAMBLE_SIZE + x + y * w] = dst[PREAMBLE_SIZE + x + y*2 * w];
-            }
+        for (y = 0; y < (h>>1); y++) {
+            uint8_t *dst1 = dst + PREAMBLE_SIZE + y * w;
+            uint8_t *src1 = dst + PREAMBLE_SIZE + y*2 * w;
+            memcpy(dst1, src1, w);
         }
     } else if (scale_v) {
         for (y = 0; y < h; y++) {
-            for (x = 0; x < w/2; x++) {
-                dst[PREAMBLE_SIZE + x + y * w] = dst[PREAMBLE_SIZE + x*2 + y * w];
+            uint8_t *dst1 = dst + PREAMBLE_SIZE + y * w;
+            for (x = 0; x < (w>>1); x++) {
+                dst1[x] = dst1[x*2];
             }
         }
     }
@@ -228,6 +230,10 @@
             break;
         }
     }
+
+    if (bytestream2_get_bytes_left_p(pb) > 0)
+        return AVERROR_INVALIDDATA;
+
     return 0;
 }
 
@@ -409,17 +415,20 @@
     unsigned flags;
     uint8_t *dst;
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
-        return ret;
-    if (pal && pal_size == AVPALETTE_SIZE)
-        memcpy(gdv->pal, pal, AVPALETTE_SIZE);
-
     bytestream2_init(gb, avpkt->data, avpkt->size);
     bytestream2_init_writer(pb, gdv->frame, gdv->frame_size);
 
     flags = bytestream2_get_le32(gb);
     compression = flags & 0xF;
 
+    if (compression == 4 || compression == 7 || compression > 8)
+        return AVERROR_INVALIDDATA;
+
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+    if (pal && pal_size == AVPALETTE_SIZE)
+        memcpy(gdv->pal, pal, AVPALETTE_SIZE);
+
     rescale(gdv, gdv->frame, avctx->width, avctx->height,
             !!(flags & 0x10), !!(flags & 0x20));
 
@@ -451,8 +460,10 @@
         ret = decompress_68(avctx, flags >> 8, 1);
         break;
     default:
-        return AVERROR_INVALIDDATA;
+        av_assert0(0);
     }
+    if (ret < 0)
+        return ret;
 
     memcpy(frame->data[1], gdv->pal, AVPALETTE_SIZE);
     dst = frame->data[0];
@@ -474,12 +485,14 @@
 
         for (y = 0; y < avctx->height; y++) {
             if (!gdv->scale_v) {
-                for (x = 0; x < avctx->width; x++) {
-                    dst[didx + x] = gdv->frame[sidx + x];
-                }
+                memcpy(dst + didx, gdv->frame + sidx, avctx->width);
             } else {
-                for (x = 0; x < avctx->width; x++) {
-                    dst[didx + x] = gdv->frame[sidx + x/2];
+                for (x = 0; x < avctx->width - 1; x+=2) {
+                    dst[didx + x    ] =
+                    dst[didx + x + 1] = gdv->frame[sidx + (x>>1)];
+                }
+                for (; x < avctx->width; x++) {
+                    dst[didx + x] = gdv->frame[sidx + (x>>1)];
                 }
             }
             if (!gdv->scale_h || ((y & 1) == 1)) {

diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 0c7f5ff..26a5b3e 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h

@@ -1,5 +1,6 @@
 /*
- * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2016 Alexandra Hájková
  *
  * This file is part of FFmpeg.
  *
@@ -32,6 +33,7 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/log.h"
 #include "libavutil/avassert.h"
+#include "avcodec.h"
 #include "mathops.h"
 #include "vlc.h"
 
@@ -52,13 +54,25 @@
 #define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
 #endif
 
+#ifndef CACHED_BITSTREAM_READER
+#define CACHED_BITSTREAM_READER 0
+#endif
+
 typedef struct GetBitContext {
     const uint8_t *buffer, *buffer_end;
+#if CACHED_BITSTREAM_READER
+    uint64_t cache;
+    unsigned bits_left;
+#endif
     int index;
     int size_in_bits;
     int size_in_bits_plus8;
 } GetBitContext;
 
+static inline unsigned int get_bits(GetBitContext *s, int n);
+static inline void skip_bits(GetBitContext *s, int n);
+static inline unsigned int show_bits(GetBitContext *s, int n);
+
 /* Bitstream reader API docs:
  * name
  *   arbitrary name which is used as prefix for the internal variables
@@ -106,12 +120,16 @@
  * For examples see get_bits, show_bits, skip_bits, get_vlc.
  */
 
-#ifdef LONG_BITSTREAM_READER
+#if CACHED_BITSTREAM_READER
+#   define MIN_CACHE_BITS 64
+#elif defined LONG_BITSTREAM_READER
 #   define MIN_CACHE_BITS 32
 #else
 #   define MIN_CACHE_BITS 25
 #endif
 
+#if !CACHED_BITSTREAM_READER
+
 #define OPEN_READER_NOSIZE(name, gb)            \
     unsigned int name ## _index = (gb)->index;  \
     unsigned int av_unused name ## _cache
@@ -196,20 +214,107 @@
 
 #define GET_CACHE(name, gb) ((uint32_t) name ## _cache)
 
+#endif
+
 static inline int get_bits_count(const GetBitContext *s)
 {
+#if CACHED_BITSTREAM_READER
+    return s->index - s->bits_left;
+#else
     return s->index;
+#endif
 }
 
+#if CACHED_BITSTREAM_READER
+static inline void refill_32(GetBitContext *s)
+{
+#if !UNCHECKED_BITSTREAM_READER
+    if (s->index >> 3 >= s->buffer_end - s->buffer)
+        return;
+#endif
+
+#ifdef BITSTREAM_READER_LE
+    s->cache       = (uint64_t)AV_RL32(s->buffer + (s->index >> 3)) << s->bits_left | s->cache;
+#else
+    s->cache       = s->cache | (uint64_t)AV_RB32(s->buffer + (s->index >> 3)) << (32 - s->bits_left);
+#endif
+    s->index     += 32;
+    s->bits_left += 32;
+}
+
+static inline void refill_64(GetBitContext *s)
+{
+#if !UNCHECKED_BITSTREAM_READER
+    if (s->index >> 3 >= s->buffer_end - s->buffer)
+        return;
+#endif
+
+#ifdef BITSTREAM_READER_LE
+    s->cache = AV_RL64(s->buffer + (s->index >> 3));
+#else
+    s->cache = AV_RB64(s->buffer + (s->index >> 3));
+#endif
+    s->index += 64;
+    s->bits_left = 64;
+}
+
+static inline uint64_t get_val(GetBitContext *s, unsigned n, int is_le)
+{
+    uint64_t ret;
+    av_assert2(n>0 && n<=63);
+    if (is_le) {
+        ret = s->cache & ((UINT64_C(1) << n) - 1);
+        s->cache >>= n;
+    } else {
+        ret = s->cache >> (64 - n);
+        s->cache <<= n;
+    }
+    s->bits_left -= n;
+    return ret;
+}
+
+static inline unsigned show_val(const GetBitContext *s, unsigned n)
+{
+#ifdef BITSTREAM_READER_LE
+    return s->cache & ((UINT64_C(1) << n) - 1);
+#else
+    return s->cache >> (64 - n);
+#endif
+}
+#endif
+
+/**
+ * Skips the specified number of bits.
+ * @param n the number of bits to skip,
+ *          For the UNCHECKED_BITSTREAM_READER this must not cause the distance
+ *          from the start to overflow int32_t. Staying within the bitstream + padding
+ *          is sufficient, too.
+ */
 static inline void skip_bits_long(GetBitContext *s, int n)
 {
+#if CACHED_BITSTREAM_READER
+    skip_bits(s, n);
+#else
 #if UNCHECKED_BITSTREAM_READER
     s->index += n;
 #else
     s->index += av_clip(n, -s->index, s->size_in_bits_plus8 - s->index);
 #endif
+#endif
 }
 
+#if CACHED_BITSTREAM_READER
+static inline void skip_remaining(GetBitContext *s, unsigned n)
+{
+#ifdef BITSTREAM_READER_LE
+    s->cache >>= n;
+#else
+    s->cache <<= n;
+#endif
+    s->bits_left -= n;
+}
+#endif
+
 /**
  * Read MPEG-1 dc-style VLC (sign bit + mantissa with no MSB).
  * if MSB not set it is negative
@@ -217,6 +322,13 @@
  */
 static inline int get_xbits(GetBitContext *s, int n)
 {
+#if CACHED_BITSTREAM_READER
+    int32_t cache = show_bits(s, 32);
+    int sign = ~cache >> 31;
+    skip_remaining(s, n);
+
+    return ((((uint32_t)(sign ^ cache)) >> (32 - n)) ^ sign) - sign;
+#else
     register int sign;
     register int32_t cache;
     OPEN_READER(re, s);
@@ -227,8 +339,10 @@
     LAST_SKIP_BITS(re, s, n);
     CLOSE_READER(re, s);
     return (NEG_USR32(sign ^ cache, n) ^ sign) - sign;
+#endif
 }
 
+#if !CACHED_BITSTREAM_READER
 static inline int get_xbits_le(GetBitContext *s, int n)
 {
     register int sign;
@@ -242,16 +356,22 @@
     CLOSE_READER(re, s);
     return (zero_extend(sign ^ cache, n) ^ sign) - sign;
 }
+#endif
 
 static inline int get_sbits(GetBitContext *s, int n)
 {
     register int tmp;
+#if CACHED_BITSTREAM_READER
+    av_assert2(n>0 && n<=25);
+    tmp = sign_extend(get_bits(s, n), n);
+#else
     OPEN_READER(re, s);
     av_assert2(n>0 && n<=25);
     UPDATE_CACHE(re, s);
     tmp = SHOW_SBITS(re, s, n);
     LAST_SKIP_BITS(re, s, n);
     CLOSE_READER(re, s);
+#endif
     return tmp;
 }
 
@@ -261,12 +381,28 @@
 static inline unsigned int get_bits(GetBitContext *s, int n)
 {
     register int tmp;
+#if CACHED_BITSTREAM_READER
+
+    av_assert2(n>0 && n<=32);
+    if (n > s->bits_left) {
+        refill_32(s);
+        if (s->bits_left < 32)
+            s->bits_left = n;
+    }
+
+#ifdef BITSTREAM_READER_LE
+    tmp = get_val(s, n, 1);
+#else
+    tmp = get_val(s, n, 0);
+#endif
+#else
     OPEN_READER(re, s);
     av_assert2(n>0 && n<=25);
     UPDATE_CACHE(re, s);
     tmp = SHOW_UBITS(re, s, n);
     LAST_SKIP_BITS(re, s, n);
     CLOSE_READER(re, s);
+#endif
     return tmp;
 }
 
@@ -280,6 +416,16 @@
 
 static inline unsigned int get_bits_le(GetBitContext *s, int n)
 {
+#if CACHED_BITSTREAM_READER
+    av_assert2(n>0 && n<=32);
+    if (n > s->bits_left) {
+        refill_32(s);
+        if (s->bits_left < 32)
+            s->bits_left = n;
+    }
+
+    return get_val(s, n, 1);
+#else
     register int tmp;
     OPEN_READER(re, s);
     av_assert2(n>0 && n<=25);
@@ -288,6 +434,7 @@
     LAST_SKIP_BITS(re, s, n);
     CLOSE_READER(re, s);
     return tmp;
+#endif
 }
 
 /**
@@ -296,22 +443,59 @@
 static inline unsigned int show_bits(GetBitContext *s, int n)
 {
     register int tmp;
+#if CACHED_BITSTREAM_READER
+    if (n > s->bits_left)
+        refill_32(s);
+
+    tmp = show_val(s, n);
+#else
     OPEN_READER_NOSIZE(re, s);
     av_assert2(n>0 && n<=25);
     UPDATE_CACHE(re, s);
     tmp = SHOW_UBITS(re, s, n);
+#endif
     return tmp;
 }
 
 static inline void skip_bits(GetBitContext *s, int n)
 {
+#if CACHED_BITSTREAM_READER
+    if (n < s->bits_left)
+        skip_remaining(s, n);
+    else {
+        n -= s->bits_left;
+        s->cache = 0;
+        s->bits_left = 0;
+
+        if (n >= 64) {
+            unsigned skip = (n / 8) * 8;
+
+            n -= skip;
+            s->index += skip;
+        }
+        refill_64(s);
+        if (n)
+            skip_remaining(s, n);
+    }
+#else
     OPEN_READER(re, s);
     LAST_SKIP_BITS(re, s, n);
     CLOSE_READER(re, s);
+#endif
 }
 
 static inline unsigned int get_bits1(GetBitContext *s)
 {
+#if CACHED_BITSTREAM_READER
+    if (!s->bits_left)
+        refill_64(s);
+
+#ifdef BITSTREAM_READER_LE
+    return get_val(s, 1, 1);
+#else
+    return get_val(s, 1, 0);
+#endif
+#else
     unsigned int index = s->index;
     uint8_t result     = s->buffer[index >> 3];
 #ifdef BITSTREAM_READER_LE
@@ -328,6 +512,7 @@
     s->index = index;
 
     return result;
+#endif
 }
 
 static inline unsigned int show_bits1(GetBitContext *s)
@@ -348,6 +533,10 @@
     av_assert2(n>=0 && n<=32);
     if (!n) {
         return 0;
+#if CACHED_BITSTREAM_READER
+    }
+    return get_bits(s, n);
+#else
     } else if (n <= MIN_CACHE_BITS) {
         return get_bits(s, n);
     } else {
@@ -359,6 +548,7 @@
         return ret | get_bits(s, n - 16);
 #endif
     }
+#endif
 }
 
 /**
@@ -428,7 +618,7 @@
     int buffer_size;
     int ret = 0;
 
-    if (bit_size >= INT_MAX - 7 || bit_size < 0 || !buffer) {
+    if (bit_size >= INT_MAX - FFMAX(7, AV_INPUT_BUFFER_PADDING_SIZE*8) || bit_size < 0 || !buffer) {
         bit_size    = 0;
         buffer      = NULL;
         ret         = AVERROR_INVALIDDATA;
@@ -442,6 +632,10 @@
     s->buffer_end         = buffer + buffer_size;
     s->index              = 0;
 
+#if CACHED_BITSTREAM_READER
+    refill_64(s);
+#endif
+
     return ret;
 }
 
@@ -543,6 +737,19 @@
         SKIP_BITS(name, gb, n);                                 \
     } while (0)
 
+/* Return the LUT element for the given bitstream configuration. */
+static inline int set_idx(GetBitContext *s, int code, int *n, int *nb_bits,
+                          VLC_TYPE (*table)[2])
+{
+    unsigned idx;
+
+    *nb_bits = -*n;
+    idx = show_bits(s, *nb_bits) + code;
+    *n = table[idx][1];
+
+    return table[idx][0];
+}
+
 /**
  * Parse a vlc code.
  * @param bits is the number of bits which will be read at once, must be
@@ -555,6 +762,24 @@
 static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
                                      int bits, int max_depth)
 {
+#if CACHED_BITSTREAM_READER
+    int nb_bits;
+    unsigned idx = show_bits(s, bits);
+    int code = table[idx][0];
+    int n    = table[idx][1];
+
+    if (max_depth > 1 && n < 0) {
+        skip_remaining(s, bits);
+        code = set_idx(s, code, &n, &nb_bits, table);
+        if (max_depth > 2 && n < 0) {
+            skip_remaining(s, nb_bits);
+            code = set_idx(s, code, &n, &nb_bits, table);
+        }
+    }
+    skip_remaining(s, n);
+
+    return code;
+#else
     int code;
 
     OPEN_READER(re, s);
@@ -565,6 +790,7 @@
     CLOSE_READER(re, s);
 
     return code;
+#endif
 }
 
 static inline int decode012(GetBitContext *gb)

diff --git a/libavcodec/gifdec.c b/libavcodec/gifdec.c
index 2eeed4c..54f1d4c 100644
--- a/libavcodec/gifdec.c
+++ b/libavcodec/gifdec.c

@@ -451,6 +451,8 @@
     if (!s->frame)
         return AVERROR(ENOMEM);
     ff_lzw_decode_open(&s->lzw);
+    if (!s->lzw)
+        return AVERROR(ENOMEM);
     return 0;
 }
 
@@ -559,5 +561,7 @@
     .close          = gif_decode_close,
     .decode         = gif_decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
+                      FF_CODEC_CAP_INIT_CLEANUP,
     .priv_class     = &decoder_class,
 };

diff --git a/libavcodec/golomb.h b/libavcodec/golomb.h
index efb1eff..5c25883 100644
--- a/libavcodec/golomb.h
+++ b/libavcodec/golomb.h

@@ -54,6 +54,23 @@
 {
     unsigned int buf;
 
+#if CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    if (buf >= (1 << 27)) {
+        buf >>= 32 - 9;
+        skip_bits_long(gb, ff_golomb_vlc_len[buf]);
+
+        return ff_ue_golomb_vlc_code[buf];
+    } else {
+        int log = 2 * av_log2(buf) - 31;
+        buf >>= log;
+        buf--;
+        skip_bits_long(gb, 32 - log);
+
+        return buf;
+    }
+#else
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf = GET_CACHE(re, gb);
@@ -77,6 +94,7 @@
 
         return buf;
     }
+#endif
 }
 
 /**
@@ -101,6 +119,13 @@
 {
     unsigned int buf;
 
+#if CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    buf >>= 32 - 9;
+    skip_bits_long(gb, ff_golomb_vlc_len[buf]);
+#else
+
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf = GET_CACHE(re, gb);
@@ -108,6 +133,7 @@
     buf >>= 32 - 9;
     LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]);
     CLOSE_READER(re, gb);
+#endif
 
     return ff_ue_golomb_vlc_code[buf];
 }
@@ -116,6 +142,33 @@
 {
     uint32_t buf;
 
+#if CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    if (buf & 0xAA800000) {
+        buf >>= 32 - 8;
+        skip_bits_long(gb, ff_interleaved_golomb_vlc_len[buf]);
+
+        return ff_interleaved_ue_golomb_vlc_code[buf];
+    } else {
+        unsigned ret = 1;
+
+        do {
+            buf >>= 32 - 8;
+            skip_bits_long(gb, FFMIN(ff_interleaved_golomb_vlc_len[buf], 8));
+
+            if (ff_interleaved_golomb_vlc_len[buf] != 9) {
+                ret <<= (ff_interleaved_golomb_vlc_len[buf] - 1) >> 1;
+                ret  |= ff_interleaved_dirac_golomb_vlc_code[buf];
+                break;
+            }
+            ret = (ret << 4) | ff_interleaved_dirac_golomb_vlc_code[buf];
+            buf = show_bits_long(gb, 32);
+        } while (get_bits_left(gb) > 0);
+
+        return ret - 1;
+    }
+#else
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf = GET_CACHE(re, gb);
@@ -147,6 +200,7 @@
         CLOSE_READER(re, gb);
         return ret - 1;
     }
+#endif
 }
 
 /**
@@ -184,6 +238,28 @@
 {
     unsigned int buf;
 
+#if CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    if (buf >= (1 << 27)) {
+        buf >>= 32 - 9;
+        skip_bits_long(gb, ff_golomb_vlc_len[buf]);
+
+        return ff_se_golomb_vlc_code[buf];
+    } else {
+        int log = 2 * av_log2(buf) - 31;
+        buf >>= log;
+
+        skip_bits_long(gb, 32 - log);
+
+        if (buf & 1)
+            buf = -(buf >> 1);
+        else
+            buf = (buf >> 1);
+
+        return buf;
+    }
+#else
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf = GET_CACHE(re, gb);
@@ -210,6 +286,7 @@
 
         return buf;
     }
+#endif
 }
 
 static inline int get_se_golomb_long(GetBitContext *gb)
@@ -223,6 +300,30 @@
 {
     unsigned int buf;
 
+#if CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    if (buf & 0xAA800000) {
+        buf >>= 32 - 8;
+        skip_bits_long(gb, ff_interleaved_golomb_vlc_len[buf]);
+
+        return ff_interleaved_se_golomb_vlc_code[buf];
+    } else {
+        int log;
+        skip_bits(gb, 8);
+        buf |= 1 | show_bits_long(gb, 24);
+
+        if ((buf & 0xAAAAAAAA) == 0)
+            return INVALID_VLC;
+
+        for (log = 31; (buf & 0x80000000) == 0; log--)
+            buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30);
+
+        skip_bits_long(gb, 63 - 2 * log - 8);
+
+        return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1;
+    }
+#else
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf = GET_CACHE(re, gb);
@@ -250,6 +351,7 @@
 
         return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1;
     }
+#endif
 }
 
 static inline int dirac_get_se_golomb(GetBitContext *gb)
@@ -273,6 +375,24 @@
     unsigned int buf;
     int log;
 
+#if CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    log = av_log2(buf);
+
+    if (log > 31 - limit) {
+        buf >>= log - k;
+        buf  += (30 - log) << k;
+        skip_bits_long(gb, 32 + k - log);
+
+        return buf;
+    } else {
+        skip_bits_long(gb, limit);
+        buf = get_bits_long(gb, esc_len);
+
+        return buf + limit - 1;
+    }
+#else
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf = GET_CACHE(re, gb);
@@ -297,6 +417,7 @@
 
         return buf + limit - 1;
     }
+#endif
 }
 
 /**
@@ -308,6 +429,35 @@
     unsigned int buf;
     int log;
 
+#if CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    log = av_log2(buf);
+
+    if (log - k >= 1 && 32 - log < limit) {
+        buf >>= log - k;
+        buf  += (30 - log) << k;
+        skip_bits_long(gb, 32 + k - log);
+
+        return buf;
+    } else {
+        int i;
+        for (i = 0;
+             i < limit && get_bits1(gb) == 0 && get_bits_left(gb) > 0;
+             i++);
+
+        if (i < limit - 1) {
+            buf = get_bits_long(gb, k);
+
+            return buf + (i << k);
+        } else if (i == limit - 1) {
+            buf = get_bits_long(gb, esc_len);
+
+            return buf + 1;
+        } else
+            return -1;
+    }
+#else
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf = GET_CACHE(re, gb);
@@ -364,6 +514,7 @@
         CLOSE_READER(re, gb);
         return buf;
     }
+#endif
 }
 
 /**

diff --git a/libavcodec/h263.h b/libavcodec/h263.h
index d154d36..f891f72 100644
--- a/libavcodec/h263.h
+++ b/libavcodec/h263.h

@@ -27,9 +27,7 @@
 #include "h263data.h"
 #include "rl.h"
 
-#if !FF_API_ASPECT_EXTENDED
 #define FF_ASPECT_EXTENDED 15
-#endif
 #define INT_BIT (CHAR_BIT * sizeof(int))
 
 // The defines below define the number of bits that are read at once for

diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index bcb2b08..2cf01e3 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c

@@ -33,6 +33,7 @@
 #include "flv.h"
 #include "h263.h"
 #include "h263_parser.h"
+#include "hwaccel.h"
 #include "internal.h"
 #include "mpeg_er.h"
 #include "mpeg4video.h"
@@ -41,12 +42,17 @@
 #include "mpegvideo.h"
 #include "msmpeg4.h"
 #include "qpeldsp.h"
-#include "vdpau_compat.h"
 #include "thread.h"
 #include "wmv2.h"
 
 static enum AVPixelFormat h263_get_format(AVCodecContext *avctx)
 {
+    /* MPEG-4 Studio Profile only, not supported by hardware */
+    if (avctx->bits_per_raw_sample > 8) {
+        av_assert1(((MpegEncContext *)avctx->priv_data)->studio_profile);
+        return avctx->pix_fmt;
+    }
+
     if (avctx->codec->id == AV_CODEC_ID_MSS2)
         return AV_PIX_FMT_YUV420P;
 
@@ -197,6 +203,11 @@
 
     ff_set_qscale(s, s->qscale);
 
+    if (s->studio_profile) {
+        if ((ret = ff_mpeg4_decode_studio_slice_header(s->avctx->priv_data)) < 0)
+            return ret;
+    }
+
     if (s->avctx->hwaccel) {
         const uint8_t *start = s->gb.buffer + get_bits_count(&s->gb) / 8;
         ret = s->avctx->hwaccel->decode_slice(s->avctx, start, s->gb.buffer_end - start);
@@ -307,6 +318,7 @@
 
     av_assert1(s->mb_x == 0 && s->mb_y == s->mb_height);
 
+    // Detect incorrect padding with wrong stuffing codes used by NEC N-02B
     if (s->codec_id == AV_CODEC_ID_MPEG4         &&
         (s->workaround_bugs & FF_BUG_AUTODETECT) &&
         get_bits_left(&s->gb) >= 48              &&
@@ -534,6 +546,8 @@
     if (CONFIG_MPEG4_DECODER && avctx->codec_id == AV_CODEC_ID_MPEG4) {
         if (ff_mpeg4_workaround_bugs(avctx) == 1)
             goto retry;
+        if (s->studio_profile != (s->idsp.idct == NULL))
+            ff_mpv_idct_init(s);
     }
 
     /* After H.263 & MPEG-4 header decode we have the height, width,
@@ -603,13 +617,6 @@
     if (!s->divx_packed)
         ff_thread_finish_setup(avctx);
 
-#if FF_API_CAP_VDPAU
-    if (CONFIG_MPEG4_VDPAU_DECODER && (s->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU)) {
-        ff_vdpau_mpeg4_decode_picture(avctx->priv_data, s->gb.buffer, s->gb.buffer_end - s->gb.buffer);
-        goto frame_end;
-    }
-#endif
-
     if (avctx->hwaccel) {
         ret = avctx->hwaccel->start_frame(avctx, s->gb.buffer,
                                           s->gb.buffer_end - s->gb.buffer);
@@ -637,7 +644,7 @@
     slice_ret = decode_slice(s);
     while (s->mb_y < s->mb_height) {
         if (s->msmpeg4_version) {
-            if (s->slice_height == 0 || s->mb_x != 0 ||
+            if (s->slice_height == 0 || s->mb_x != 0 || slice_ret < 0 ||
                 (s->mb_y % s->slice_height) != 0 || get_bits_left(&s->gb) < 0)
                 break;
         } else {
@@ -663,7 +670,8 @@
 
     av_assert1(s->bitstream_buffer_size == 0);
 frame_end:
-    ff_er_frame_end(&s->er);
+    if (!s->studio_profile)
+        ff_er_frame_end(&s->er);
 
     if (avctx->hwaccel) {
         ret = avctx->hwaccel->end_frame(avctx);
@@ -722,6 +730,9 @@
 #if CONFIG_H263_VAAPI_HWACCEL || CONFIG_MPEG4_VAAPI_HWACCEL
     AV_PIX_FMT_VAAPI,
 #endif
+#if CONFIG_MPEG4_NVDEC_HWACCEL
+    AV_PIX_FMT_CUDA,
+#endif
 #if CONFIG_MPEG4_VDPAU_HWACCEL
     AV_PIX_FMT_VDPAU,
 #endif
@@ -764,4 +775,16 @@
     .flush          = ff_mpeg_flush,
     .max_lowres     = 3,
     .pix_fmts       = ff_h263_hwaccel_pixfmt_list_420,
+    .hw_configs     = (const AVCodecHWConfigInternal*[]) {
+#if CONFIG_H263_VAAPI_HWACCEL
+                        HWACCEL_VAAPI(h263),
+#endif
+#if CONFIG_MPEG4_VDPAU_HWACCEL
+                        HWACCEL_VDPAU(mpeg4),
+#endif
+#if CONFIG_H263_VIDEOTOOLBOX_HWACCEL
+                        HWACCEL_VIDEOTOOLBOX(h263),
+#endif
+                        NULL
+                    },
 };

diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 86df5eb..7a1fb6d 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h

@@ -26,8 +26,12 @@
 
 #define QP_MAX_NUM (51 + 6*6)           // The maximum supported qp
 
-/* NAL unit types */
+/*
+ * Table 7-1 – NAL unit type codes, syntax element categories, and NAL unit type classes in
+ * T-REC-H.264-201704
+ */
 enum {
+    H264_NAL_UNSPECIFIED     = 0,
     H264_NAL_SLICE           = 1,
     H264_NAL_DPA             = 2,
     H264_NAL_DPB             = 3,
@@ -41,7 +45,69 @@
     H264_NAL_END_STREAM      = 11,
     H264_NAL_FILLER_DATA     = 12,
     H264_NAL_SPS_EXT         = 13,
+    H264_NAL_PREFIX          = 14,
+    H264_NAL_SUB_SPS         = 15,
+    H264_NAL_DPS             = 16,
+    H264_NAL_RESERVED17      = 17,
+    H264_NAL_RESERVED18      = 18,
     H264_NAL_AUXILIARY_SLICE = 19,
+    H264_NAL_EXTEN_SLICE     = 20,
+    H264_NAL_DEPTH_EXTEN_SLICE = 21,
+    H264_NAL_RESERVED22      = 22,
+    H264_NAL_RESERVED23      = 23,
+    H264_NAL_UNSPECIFIED24   = 24,
+    H264_NAL_UNSPECIFIED25   = 25,
+    H264_NAL_UNSPECIFIED26   = 26,
+    H264_NAL_UNSPECIFIED27   = 27,
+    H264_NAL_UNSPECIFIED28   = 28,
+    H264_NAL_UNSPECIFIED29   = 29,
+    H264_NAL_UNSPECIFIED30   = 30,
+    H264_NAL_UNSPECIFIED31   = 31,
 };
 
+
+enum {
+    // 7.4.2.1.1: seq_parameter_set_id is in [0, 31].
+    H264_MAX_SPS_COUNT = 32,
+    // 7.4.2.2: pic_parameter_set_id is in [0, 255].
+    H264_MAX_PPS_COUNT = 256,
+
+    // A.3: MaxDpbFrames is bounded above by 16.
+    H264_MAX_DPB_FRAMES = 16,
+    // 7.4.2.1.1: max_num_ref_frames is in [0, MaxDpbFrames], and
+    // each reference frame can have two fields.
+    H264_MAX_REFS       = 2 * H264_MAX_DPB_FRAMES,
+
+    // 7.4.3.1: modification_of_pic_nums_idc is not equal to 3 at most
+    // num_ref_idx_lN_active_minus1 + 1 times (that is, once for each
+    // possible reference), then equal to 3 once.
+    H264_MAX_RPLM_COUNT = H264_MAX_REFS + 1,
+
+    // 7.4.3.3: in the worst case, we begin with a full short-term
+    // reference picture list.  Each picture in turn is moved to the
+    // long-term list (type 3) and then discarded from there (type 2).
+    // Then, we set the length of the long-term list (type 4), mark
+    // the current picture as long-term (type 6) and terminate the
+    // process (type 0).
+    H264_MAX_MMCO_COUNT = H264_MAX_REFS * 2 + 3,
+
+    // A.2.1, A.2.3: profiles supporting FMO constrain
+    // num_slice_groups_minus1 to be in [0, 7].
+    H264_MAX_SLICE_GROUPS = 8,
+
+    // E.2.2: cpb_cnt_minus1 is in [0, 31].
+    H264_MAX_CPB_CNT = 32,
+
+    // A.3: in table A-1 the highest level allows a MaxFS of 139264.
+    H264_MAX_MB_PIC_SIZE = 139264,
+    // A.3.1, A.3.2: PicWidthInMbs and PicHeightInMbs are constrained
+    // to be not greater than sqrt(MaxFS * 8).  Hence height/width are
+    // bounded above by sqrt(139264 * 8) = 1055.5 macroblocks.
+    H264_MAX_MB_WIDTH    = 1055,
+    H264_MAX_MB_HEIGHT   = 1055,
+    H264_MAX_WIDTH       = H264_MAX_MB_WIDTH  * 16,
+    H264_MAX_HEIGHT      = H264_MAX_MB_HEIGHT * 16,
+};
+
+
 #endif /* AVCODEC_H264_H */

diff --git a/libavcodec/h2645_parse.c b/libavcodec/h2645_parse.c
index b0d9ff6..aaa4b8f 100644
--- a/libavcodec/h2645_parse.c
+++ b/libavcodec/h2645_parse.c

@@ -26,15 +26,16 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mem.h"
 
+#include "bytestream.h"
 #include "hevc.h"
+#include "h264.h"
 #include "h2645_parse.h"
 
 int ff_h2645_extract_rbsp(const uint8_t *src, int length,
-                          H2645NAL *nal, int small_padding)
+                          H2645RBSP *rbsp, H2645NAL *nal, int small_padding)
 {
     int i, si, di;
     uint8_t *dst;
-    int64_t padding = small_padding ? 0 : MAX_MBPAIR_SIZE;
 
     nal->skipped_bytes = 0;
 #define STARTCODE_TEST                                                  \
@@ -53,8 +54,8 @@
             i++
 #if HAVE_FAST_64BIT
     for (i = 0; i + 1 < length; i += 9) {
-        if (!((~AV_RN64A(src + i) &
-               (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
+        if (!((~AV_RN64(src + i) &
+               (AV_RN64(src + i) - 0x0100010001000101ULL)) &
               0x8000800080008080ULL))
             continue;
         FIND_FIRST_ZERO;
@@ -63,8 +64,8 @@
     }
 #else
     for (i = 0; i + 1 < length; i += 5) {
-        if (!((~AV_RN32A(src + i) &
-               (AV_RN32A(src + i) - 0x01000101U)) &
+        if (!((~AV_RN32(src + i) &
+               (AV_RN32(src + i) - 0x01000101U)) &
               0x80008080U))
             continue;
         FIND_FIRST_ZERO;
@@ -91,11 +92,7 @@
     } else if (i > length)
         i = length;
 
-    av_fast_padded_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
-                          length + padding);
-    if (!nal->rbsp_buffer)
-        return AVERROR(ENOMEM);
-
+    nal->rbsp_buffer = &rbsp->rbsp_buffer[rbsp->rbsp_buffer_size];
     dst = nal->rbsp_buffer;
 
     memcpy(dst, src, i);
@@ -144,39 +141,123 @@
     nal->size = di;
     nal->raw_data = src;
     nal->raw_size = si;
+    rbsp->rbsp_buffer_size += si;
+
     return si;
 }
 
-static const char *nal_unit_name(int nal_type)
+static const char *hevc_nal_type_name[64] = {
+    "TRAIL_N", // HEVC_NAL_TRAIL_N
+    "TRAIL_R", // HEVC_NAL_TRAIL_R
+    "TSA_N", // HEVC_NAL_TSA_N
+    "TSA_R", // HEVC_NAL_TSA_R
+    "STSA_N", // HEVC_NAL_STSA_N
+    "STSA_R", // HEVC_NAL_STSA_R
+    "RADL_N", // HEVC_NAL_RADL_N
+    "RADL_R", // HEVC_NAL_RADL_R
+    "RASL_N", // HEVC_NAL_RASL_N
+    "RASL_R", // HEVC_NAL_RASL_R
+    "RSV_VCL_N10", // HEVC_NAL_VCL_N10
+    "RSV_VCL_R11", // HEVC_NAL_VCL_R11
+    "RSV_VCL_N12", // HEVC_NAL_VCL_N12
+    "RSV_VLC_R13", // HEVC_NAL_VCL_R13
+    "RSV_VCL_N14", // HEVC_NAL_VCL_N14
+    "RSV_VCL_R15", // HEVC_NAL_VCL_R15
+    "BLA_W_LP", // HEVC_NAL_BLA_W_LP
+    "BLA_W_RADL", // HEVC_NAL_BLA_W_RADL
+    "BLA_N_LP", // HEVC_NAL_BLA_N_LP
+    "IDR_W_RADL", // HEVC_NAL_IDR_W_RADL
+    "IDR_N_LP", // HEVC_NAL_IDR_N_LP
+    "CRA_NUT", // HEVC_NAL_CRA_NUT
+    "IRAP_IRAP_VCL22", // HEVC_NAL_IRAP_VCL22
+    "IRAP_IRAP_VCL23", // HEVC_NAL_IRAP_VCL23
+    "RSV_VCL24", // HEVC_NAL_RSV_VCL24
+    "RSV_VCL25", // HEVC_NAL_RSV_VCL25
+    "RSV_VCL26", // HEVC_NAL_RSV_VCL26
+    "RSV_VCL27", // HEVC_NAL_RSV_VCL27
+    "RSV_VCL28", // HEVC_NAL_RSV_VCL28
+    "RSV_VCL29", // HEVC_NAL_RSV_VCL29
+    "RSV_VCL30", // HEVC_NAL_RSV_VCL30
+    "RSV_VCL31", // HEVC_NAL_RSV_VCL31
+    "VPS", // HEVC_NAL_VPS
+    "SPS", // HEVC_NAL_SPS
+    "PPS", // HEVC_NAL_PPS
+    "AUD", // HEVC_NAL_AUD
+    "EOS_NUT", // HEVC_NAL_EOS_NUT
+    "EOB_NUT", // HEVC_NAL_EOB_NUT
+    "FD_NUT", // HEVC_NAL_FD_NUT
+    "SEI_PREFIX", // HEVC_NAL_SEI_PREFIX
+    "SEI_SUFFIX", // HEVC_NAL_SEI_SUFFIX
+    "RSV_NVCL41", // HEVC_NAL_RSV_NVCL41
+    "RSV_NVCL42", // HEVC_NAL_RSV_NVCL42
+    "RSV_NVCL43", // HEVC_NAL_RSV_NVCL43
+    "RSV_NVCL44", // HEVC_NAL_RSV_NVCL44
+    "RSV_NVCL45", // HEVC_NAL_RSV_NVCL45
+    "RSV_NVCL46", // HEVC_NAL_RSV_NVCL46
+    "RSV_NVCL47", // HEVC_NAL_RSV_NVCL47
+    "UNSPEC48", // HEVC_NAL_UNSPEC48
+    "UNSPEC49", // HEVC_NAL_UNSPEC49
+    "UNSPEC50", // HEVC_NAL_UNSPEC50
+    "UNSPEC51", // HEVC_NAL_UNSPEC51
+    "UNSPEC52", // HEVC_NAL_UNSPEC52
+    "UNSPEC53", // HEVC_NAL_UNSPEC53
+    "UNSPEC54", // HEVC_NAL_UNSPEC54
+    "UNSPEC55", // HEVC_NAL_UNSPEC55
+    "UNSPEC56", // HEVC_NAL_UNSPEC56
+    "UNSPEC57", // HEVC_NAL_UNSPEC57
+    "UNSPEC58", // HEVC_NAL_UNSPEC58
+    "UNSPEC59", // HEVC_NAL_UNSPEC59
+    "UNSPEC60", // HEVC_NAL_UNSPEC60
+    "UNSPEC61", // HEVC_NAL_UNSPEC61
+    "UNSPEC62", // HEVC_NAL_UNSPEC62
+    "UNSPEC63", // HEVC_NAL_UNSPEC63
+};
+
+static const char *hevc_nal_unit_name(int nal_type)
 {
-    switch(nal_type) {
-    case HEVC_NAL_TRAIL_N    : return "TRAIL_N";
-    case HEVC_NAL_TRAIL_R    : return "TRAIL_R";
-    case HEVC_NAL_TSA_N      : return "TSA_N";
-    case HEVC_NAL_TSA_R      : return "TSA_R";
-    case HEVC_NAL_STSA_N     : return "STSA_N";
-    case HEVC_NAL_STSA_R     : return "STSA_R";
-    case HEVC_NAL_RADL_N     : return "RADL_N";
-    case HEVC_NAL_RADL_R     : return "RADL_R";
-    case HEVC_NAL_RASL_N     : return "RASL_N";
-    case HEVC_NAL_RASL_R     : return "RASL_R";
-    case HEVC_NAL_BLA_W_LP   : return "BLA_W_LP";
-    case HEVC_NAL_BLA_W_RADL : return "BLA_W_RADL";
-    case HEVC_NAL_BLA_N_LP   : return "BLA_N_LP";
-    case HEVC_NAL_IDR_W_RADL : return "IDR_W_RADL";
-    case HEVC_NAL_IDR_N_LP   : return "IDR_N_LP";
-    case HEVC_NAL_CRA_NUT    : return "CRA_NUT";
-    case HEVC_NAL_VPS        : return "VPS";
-    case HEVC_NAL_SPS        : return "SPS";
-    case HEVC_NAL_PPS        : return "PPS";
-    case HEVC_NAL_AUD        : return "AUD";
-    case HEVC_NAL_EOS_NUT    : return "EOS_NUT";
-    case HEVC_NAL_EOB_NUT    : return "EOB_NUT";
-    case HEVC_NAL_FD_NUT     : return "FD_NUT";
-    case HEVC_NAL_SEI_PREFIX : return "SEI_PREFIX";
-    case HEVC_NAL_SEI_SUFFIX : return "SEI_SUFFIX";
-    default : return "?";
-    }
+    av_assert0(nal_type >= 0 && nal_type < 64);
+    return hevc_nal_type_name[nal_type];
+}
+
+static const char *h264_nal_type_name[32] = {
+    "Unspecified 0", //H264_NAL_UNSPECIFIED
+    "Coded slice of a non-IDR picture", // H264_NAL_SLICE
+    "Coded slice data partition A", // H264_NAL_DPA
+    "Coded slice data partition B", // H264_NAL_DPB
+    "Coded slice data partition C", // H264_NAL_DPC
+    "IDR", // H264_NAL_IDR_SLICE
+    "SEI", // H264_NAL_SEI
+    "SPS", // H264_NAL_SPS
+    "PPS", // H264_NAL_PPS
+    "AUD", // H264_NAL_AUD
+    "End of sequence", // H264_NAL_END_SEQUENCE
+    "End of stream", // H264_NAL_END_STREAM
+    "Filler data", // H264_NAL_FILLER_DATA
+    "SPS extension", // H264_NAL_SPS_EXT
+    "Prefix", // H264_NAL_PREFIX
+    "Subset SPS", // H264_NAL_SUB_SPS
+    "Depth parameter set", // H264_NAL_DPS
+    "Reserved 17", // H264_NAL_RESERVED17
+    "Reserved 18", // H264_NAL_RESERVED18
+    "Auxiliary coded picture without partitioning", // H264_NAL_AUXILIARY_SLICE
+    "Slice extension", // H264_NAL_EXTEN_SLICE
+    "Slice extension for a depth view or a 3D-AVC texture view", // H264_NAL_DEPTH_EXTEN_SLICE
+    "Reserved 22", // H264_NAL_RESERVED22
+    "Reserved 23", // H264_NAL_RESERVED23
+    "Unspecified 24", // H264_NAL_UNSPECIFIED24
+    "Unspecified 25", // H264_NAL_UNSPECIFIED25
+    "Unspecified 26", // H264_NAL_UNSPECIFIED26
+    "Unspecified 27", // H264_NAL_UNSPECIFIED27
+    "Unspecified 28", // H264_NAL_UNSPECIFIED28
+    "Unspecified 29", // H264_NAL_UNSPECIFIED29
+    "Unspecified 30", // H264_NAL_UNSPECIFIED30
+    "Unspecified 31", // H264_NAL_UNSPECIFIED31
+};
+
+static const char *h264_nal_unit_name(int nal_type)
+{
+    av_assert0(nal_type >= 0 && nal_type < 32);
+    return h264_nal_type_name[nal_type];
 }
 
 static int get_bit_length(H2645NAL *nal, int skip_trailing_zeros)
@@ -225,7 +306,7 @@
 
     av_log(logctx, AV_LOG_DEBUG,
            "nal_unit_type: %d(%s), nuh_layer_id: %d, temporal_id: %d\n",
-           nal->type, nal_unit_name(nal->type), nuh_layer_id, nal->temporal_id);
+           nal->type, hevc_nal_unit_name(nal->type), nuh_layer_id, nal->temporal_id);
 
     return nuh_layer_id == 0;
 }
@@ -241,66 +322,85 @@
     nal->type    = get_bits(gb, 5);
 
     av_log(logctx, AV_LOG_DEBUG,
-           "nal_unit_type: %d, nal_ref_idc: %d\n",
-           nal->type, nal->ref_idc);
+           "nal_unit_type: %d(%s), nal_ref_idc: %d\n",
+           nal->type, h264_nal_unit_name(nal->type), nal->ref_idc);
 
     return 1;
 }
 
+static int find_next_start_code(const uint8_t *buf, const uint8_t *next_avc)
+{
+    int i = 0;
+
+    if (buf + 3 >= next_avc)
+        return next_avc - buf;
+
+    while (buf + i + 3 < next_avc) {
+        if (buf[i] == 0 && buf[i + 1] == 0 && buf[i + 2] == 1)
+            break;
+        i++;
+    }
+    return i + 3;
+}
+
 int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length,
                           void *logctx, int is_nalff, int nal_length_size,
                           enum AVCodecID codec_id, int small_padding)
 {
+    GetByteContext bc;
     int consumed, ret = 0;
-    const uint8_t *next_avc = is_nalff ? buf : buf + length;
+    int next_avc = is_nalff ? 0 : length;
+    int64_t padding = small_padding ? 0 : MAX_MBPAIR_SIZE;
 
+    bytestream2_init(&bc, buf, length);
+    av_fast_padded_malloc(&pkt->rbsp.rbsp_buffer, &pkt->rbsp.rbsp_buffer_alloc_size, length + padding);
+    if (!pkt->rbsp.rbsp_buffer)
+        return AVERROR(ENOMEM);
+
+    pkt->rbsp.rbsp_buffer_size = 0;
     pkt->nb_nals = 0;
-    while (length >= 4) {
+    while (bytestream2_get_bytes_left(&bc) >= 4) {
         H2645NAL *nal;
         int extract_length = 0;
         int skip_trailing_zeros = 1;
 
-        if (buf == next_avc) {
+        if (bytestream2_tell(&bc) == next_avc) {
             int i = 0;
             extract_length = get_nalsize(nal_length_size,
-                                         buf, length, &i, logctx);
+                                         bc.buffer, bytestream2_get_bytes_left(&bc), &i, logctx);
             if (extract_length < 0)
                 return extract_length;
 
-            buf    += nal_length_size;
-            length -= nal_length_size;
+            bytestream2_skip(&bc, nal_length_size);
 
-            next_avc = buf + extract_length;
+            next_avc = bytestream2_tell(&bc) + extract_length;
         } else {
-            if (buf > next_avc)
+            int buf_index;
+
+            if (bytestream2_tell(&bc) > next_avc)
                 av_log(logctx, AV_LOG_WARNING, "Exceeded next NALFF position, re-syncing.\n");
 
             /* search start code */
-            while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
-                ++buf;
-                --length;
-                if (length < 4) {
-                    if (pkt->nb_nals > 0) {
-                        // No more start codes: we discarded some irrelevant
-                        // bytes at the end of the packet.
-                        return 0;
-                    } else {
-                        av_log(logctx, AV_LOG_ERROR, "No start code is found.\n");
-                        return AVERROR_INVALIDDATA;
-                    }
-                } else if (buf >= (next_avc - 3))
-                    break;
+            buf_index = find_next_start_code(bc.buffer, buf + next_avc);
+
+            bytestream2_skip(&bc, buf_index);
+
+            if (!bytestream2_get_bytes_left(&bc)) {
+                if (pkt->nb_nals > 0) {
+                    // No more start codes: we discarded some irrelevant
+                    // bytes at the end of the packet.
+                    return 0;
+                } else {
+                    av_log(logctx, AV_LOG_ERROR, "No start code is found.\n");
+                    return AVERROR_INVALIDDATA;
+                }
             }
 
-            buf           += 3;
-            length        -= 3;
-            extract_length = FFMIN(length, next_avc - buf);
+            extract_length = FFMIN(bytestream2_get_bytes_left(&bc), next_avc - bytestream2_tell(&bc));
 
-            if (buf >= next_avc) {
+            if (bytestream2_tell(&bc) >= next_avc) {
                 /* skip to the start of the next NAL */
-                int offset = next_avc - buf;
-                buf    += offset;
-                length -= offset;
+                bytestream2_skip(&bc, next_avc - bytestream2_tell(&bc));
                 continue;
             }
         }
@@ -326,7 +426,7 @@
         }
         nal = &pkt->nals[pkt->nb_nals];
 
-        consumed = ff_h2645_extract_rbsp(buf, extract_length, nal, small_padding);
+        consumed = ff_h2645_extract_rbsp(bc.buffer, extract_length, &pkt->rbsp, nal, small_padding);
         if (consumed < 0)
             return consumed;
 
@@ -337,10 +437,11 @@
 
         pkt->nb_nals++;
 
+        bytestream2_skip(&bc, consumed);
+
         /* see commit 3566042a0 */
-        if (consumed < length - 3 &&
-            buf[consumed]     == 0x00 && buf[consumed + 1] == 0x00 &&
-            buf[consumed + 2] == 0x01 && buf[consumed + 3] == 0xE0)
+        if (bytestream2_get_bytes_left(&bc) >= 4 &&
+            bytestream2_peek_be32(&bc) == 0x000001E0)
             skip_trailing_zeros = 0;
 
         nal->size_bits = get_bit_length(nal, skip_trailing_zeros);
@@ -353,16 +454,13 @@
             ret = hevc_parse_nal_header(nal, logctx);
         else
             ret = h264_parse_nal_header(nal, logctx);
-        if (ret <= 0 || nal->size <= 0) {
+        if (ret <= 0 || nal->size <= 0 || nal->size_bits <= 0) {
             if (ret < 0) {
                 av_log(logctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
                        nal->type);
             }
             pkt->nb_nals--;
         }
-
-        buf    += consumed;
-        length -= consumed;
     }
 
     return 0;
@@ -372,9 +470,10 @@
 {
     int i;
     for (i = 0; i < pkt->nals_allocated; i++) {
-        av_freep(&pkt->nals[i].rbsp_buffer);
         av_freep(&pkt->nals[i].skipped_bytes_pos);
     }
     av_freep(&pkt->nals);
     pkt->nals_allocated = 0;
+    av_freep(&pkt->rbsp.rbsp_buffer);
+    pkt->rbsp.rbsp_buffer_alloc_size = pkt->rbsp.rbsp_buffer_size = 0;
 }

diff --git a/libavcodec/h2645_parse.h b/libavcodec/h2645_parse.h
index 5f3e17a..2e29ad2 100644
--- a/libavcodec/h2645_parse.h
+++ b/libavcodec/h2645_parse.h

@@ -30,7 +30,6 @@
 
 typedef struct H2645NAL {
     uint8_t *rbsp_buffer;
-    int rbsp_buffer_size;
 
     int size;
     const uint8_t *data;
@@ -65,9 +64,16 @@
     int ref_idc;
 } H2645NAL;
 
+typedef struct H2645RBSP {
+    uint8_t *rbsp_buffer;
+    int rbsp_buffer_alloc_size;
+    int rbsp_buffer_size;
+} H2645RBSP;
+
 /* an input packet split into unescaped NAL units */
 typedef struct H2645Packet {
     H2645NAL *nals;
+    H2645RBSP rbsp;
     int nb_nals;
     int nals_allocated;
 } H2645Packet;
@@ -75,7 +81,7 @@
 /**
  * Extract the raw (unescaped) bitstream.
  */
-int ff_h2645_extract_rbsp(const uint8_t *src, int length,
+int ff_h2645_extract_rbsp(const uint8_t *src, int length, H2645RBSP *rbsp,
                           H2645NAL *nal, int small_padding);
 
 /**

diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 3458346..815149a 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c

@@ -1735,7 +1735,7 @@
 \
             if( coeff_abs >= 15 ) { \
                 int j = 0; \
-                while (get_cabac_bypass(CC) && j < 30) { \
+                while (get_cabac_bypass(CC) && j < 16+7) { \
                     j++; \
                 } \
 \
@@ -2347,7 +2347,7 @@
     if (CHROMA444(h) && IS_8x8DCT(mb_type)){
         int i;
         uint8_t *nnz_cache = sl->non_zero_count_cache;
-        if (h->sei.unregistered.x264_build < 151U) {
+        if (h->x264_build < 151U) {
             for (i = 0; i < 2; i++){
                 if (sl->left_type[LEFT(i)] && !IS_8x8DCT(sl->left_type[LEFT(i)])) {
                     nnz_cache[3+8* 1 + 2*8*i]=

diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index 187b1c6..5e6a203 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c

@@ -1111,6 +1111,7 @@
             else                sl->qscale -= max_qp+1;
             if (((unsigned)sl->qscale) > max_qp){
                 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y);
+                sl->qscale = max_qp;
                 return -1;
             }
         }

diff --git a/libavcodec/h264_direct.c b/libavcodec/h264_direct.c
index a7a107c..ec9fca0 100644
--- a/libavcodec/h264_direct.c
+++ b/libavcodec/h264_direct.c

@@ -410,7 +410,7 @@
              (l1ref0[0] < 0 && !l1ref1[0] &&
               FFABS(l1mv1[0][0]) <= 1 &&
               FFABS(l1mv1[0][1]) <= 1 &&
-              h->sei.unregistered.x264_build > 33U))) {
+              h->x264_build > 33U))) {
             a = b = 0;
             if (ref[0] > 0)
                 a = mv[0];
@@ -445,7 +445,7 @@
                 (l1ref0[i8] == 0 ||
                  (l1ref0[i8] < 0 &&
                   l1ref1[i8] == 0 &&
-                  h->sei.unregistered.x264_build > 33U))) {
+                  h->x264_build > 33U))) {
                 const int16_t (*l1mv)[2] = l1ref0[i8] == 0 ? l1mv0 : l1mv1;
                 if (IS_SUB_8X8(sub_mb_type)) {
                     const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];

diff --git a/libavcodec/h264_levels.c b/libavcodec/h264_levels.c
new file mode 100644
index 0000000..737b7dc
--- /dev/null
+++ b/libavcodec/h264_levels.c

@@ -0,0 +1,130 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "h264_levels.h"
+
+// H.264 table A-1.
+static const H264LevelDescriptor h264_levels[] = {
+    // Name          MaxMBPS                   MaxBR              MinCR
+    //  | level_idc     |       MaxFS            |    MaxCPB        | MaxMvsPer2Mb
+    //  |     | cs3f    |         |  MaxDpbMbs   |       |  MaxVmvR |   |
+    { "1",   10, 0,     1485,     99,    396,     64,    175,   64, 2,  0 },
+    { "1b",  10, 1,     1485,     99,    396,    128,    350,   64, 2,  0 },
+    { "1b",   9, 0,     1485,     99,    396,    128,    350,   64, 2,  0 },
+    { "1.1", 11, 0,     3000,    396,    900,    192,    500,  128, 2,  0 },
+    { "1.2", 12, 0,     6000,    396,   2376,    384,   1000,  128, 2,  0 },
+    { "1.3", 13, 0,    11880,    396,   2376,    768,   2000,  128, 2,  0 },
+    { "2",   20, 0,    11880,    396,   2376,   2000,   2000,  128, 2,  0 },
+    { "2.1", 21, 0,    19800,    792,   4752,   4000,   4000,  256, 2,  0 },
+    { "2.2", 22, 0,    20250,   1620,   8100,   4000,   4000,  256, 2,  0 },
+    { "3",   30, 0,    40500,   1620,   8100,  10000,  10000,  256, 2, 32 },
+    { "3.1", 31, 0,   108000,   3600,  18000,  14000,  14000,  512, 4, 16 },
+    { "3.2", 32, 0,   216000,   5120,  20480,  20000,  20000,  512, 4, 16 },
+    { "4",   40, 0,   245760,   8192,  32768,  20000,  25000,  512, 4, 16 },
+    { "4.1", 41, 0,   245760,   8192,  32768,  50000,  62500,  512, 2, 16 },
+    { "4.2", 42, 0,   522240,   8704,  34816,  50000,  62500,  512, 2, 16 },
+    { "5",   50, 0,   589824,  22080, 110400, 135000, 135000,  512, 2, 16 },
+    { "5.1", 51, 0,   983040,  36864, 184320, 240000, 240000,  512, 2, 16 },
+    { "5.2", 52, 0,  2073600,  36864, 184320, 240000, 240000,  512, 2, 16 },
+    { "6",   60, 0,  4177920, 139264, 696320, 240000, 240000, 8192, 2, 16 },
+    { "6.1", 61, 0,  8355840, 139264, 696320, 480000, 480000, 8192, 2, 16 },
+    { "6.2", 62, 0, 16711680, 139264, 696320, 800000, 800000, 8192, 2, 16 },
+};
+
+// H.264 table A-2 plus values from A-1.
+static const struct {
+    int profile_idc;
+    int cpb_br_vcl_factor;
+    int cpb_br_nal_factor;
+} h264_br_factors[] = {
+    {  66, 1000, 1200 },
+    {  77, 1000, 1200 },
+    {  88, 1000, 1200 },
+    { 100, 1250, 1500 },
+    { 110, 3000, 3600 },
+    { 122, 4000, 4800 },
+    { 244, 4000, 4800 },
+    {  44, 4000, 4800 },
+};
+
+// We are only ever interested in the NAL bitrate factor.
+static int h264_get_br_factor(int profile_idc)
+{
+    int i;
+    for (i = 0; i < FF_ARRAY_ELEMS(h264_br_factors); i++) {
+        if (h264_br_factors[i].profile_idc == profile_idc)
+            return h264_br_factors[i].cpb_br_nal_factor;
+    }
+    // Default to the non-high profile value if not specified.
+    return 1200;
+}
+
+const H264LevelDescriptor *ff_h264_get_level(int level_idc,
+                                             int constraint_set3_flag)
+{
+    int i;
+    for (i = 0; i < FF_ARRAY_ELEMS(h264_levels); i++) {
+        if (h264_levels[i].level_idc            == level_idc &&
+            h264_levels[i].constraint_set3_flag == constraint_set3_flag)
+            return &h264_levels[i];
+    }
+    return NULL;
+}
+
+const H264LevelDescriptor *ff_h264_guess_level(int profile_idc,
+                                               int64_t bitrate,
+                                               int width, int height,
+                                               int max_dec_frame_buffering)
+{
+    int width_mbs  = (width  + 15) / 16;
+    int height_mbs = (height + 15) / 16;
+    int no_cs3f = !(profile_idc == 66 ||
+                    profile_idc == 77 ||
+                    profile_idc == 88);
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(h264_levels); i++) {
+        const H264LevelDescriptor *level = &h264_levels[i];
+
+        if (level->constraint_set3_flag && no_cs3f)
+            continue;
+
+        if (bitrate > (int64_t)level->max_br * h264_get_br_factor(profile_idc))
+            continue;
+
+        if (width_mbs  * height_mbs > level->max_fs)
+            continue;
+        if (width_mbs  * width_mbs  > 8 * level->max_fs)
+            continue;
+        if (height_mbs * height_mbs > 8 * level->max_fs)
+            continue;
+
+        if (width_mbs && height_mbs) {
+            int max_dpb_frames =
+                FFMIN(level->max_dpb_mbs / (width_mbs * height_mbs), 16);
+            if (max_dec_frame_buffering > max_dpb_frames)
+                continue;
+        }
+
+        return level;
+    }
+
+    // No usable levels found - frame is too big or bitrate is too high.
+    return NULL;
+}

diff --git a/libavcodec/h264_levels.h b/libavcodec/h264_levels.h
new file mode 100644
index 0000000..4189fc6
--- /dev/null
+++ b/libavcodec/h264_levels.h

@@ -0,0 +1,53 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_H264_LEVELS_H
+#define AVCODEC_H264_LEVELS_H
+
+
+#include <stdint.h>
+
+typedef struct H264LevelDescriptor {
+    const char *name;
+    uint8_t     level_idc;
+    uint8_t     constraint_set3_flag;
+    uint32_t    max_mbps;
+    uint32_t    max_fs;
+    uint32_t    max_dpb_mbs;
+    uint32_t    max_br;
+    uint32_t    max_cpb;
+    uint16_t    max_v_mv_r;
+    uint8_t     min_cr;
+    uint8_t     max_mvs_per_2mb;
+} H264LevelDescriptor;
+
+const H264LevelDescriptor *ff_h264_get_level(int level_idc,
+                                             int constraint_set3_flag);
+
+/**
+ * Guess the level of a stream from some parameters.
+ *
+ * Unknown parameters may be zero, in which case they are ignored.
+ */
+const H264LevelDescriptor *ff_h264_guess_level(int profile_idc,
+                                               int64_t bitrate,
+                                               int width, int height,
+                                               int max_dec_frame_buffering);
+
+
+#endif /* AVCODEC_H264_LEVELS_H */

diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c
index cb9fe85..3cd17b7 100644
--- a/libavcodec/h264_mb.c
+++ b/libavcodec/h264_mb.c

@@ -637,7 +637,7 @@
                 uint8_t *const ptr = dest_y + block_offset[i];
                 const int dir      = sl->intra4x4_pred_mode_cache[scan8[i]];
                 if (transform_bypass && h->ps.sps->profile_idc == 244 && dir <= 1) {
-                    if (h->sei.unregistered.x264_build < 151U) {
+                    if (h->x264_build < 151U) {
                         h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize);
                     } else
                         h->hpc.pred8x8l_filter_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift),

diff --git a/libavcodec/h264_mc_template.c b/libavcodec/h264_mc_template.c
index 58c0504..d02e2bf 100644
--- a/libavcodec/h264_mc_template.c
+++ b/libavcodec/h264_mc_template.c

@@ -78,7 +78,8 @@
 
     if (HAVE_THREADS && (h->avctx->active_thread_type & FF_THREAD_FRAME))
         await_references(h, sl);
-    prefetch_motion(h, sl, 0, PIXEL_SHIFT, CHROMA_IDC);
+    if (USES_LIST(mb_type, 0))
+        prefetch_motion(h, sl, 0, PIXEL_SHIFT, CHROMA_IDC);
 
     if (IS_16X16(mb_type)) {
         mc_part(h, sl, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,

diff --git a/libavcodec/h264_metadata_bsf.c b/libavcodec/h264_metadata_bsf.c
new file mode 100644
index 0000000..bf37528
--- /dev/null
+++ b/libavcodec/h264_metadata_bsf.c

@@ -0,0 +1,798 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/display.h"
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "bsf.h"
+#include "cbs.h"
+#include "cbs_h264.h"
+#include "h264.h"
+#include "h264_levels.h"
+#include "h264_sei.h"
+
+enum {
+    PASS,
+    INSERT,
+    REMOVE,
+    EXTRACT,
+};
+
+enum {
+    FLIP_HORIZONTAL = 1,
+    FLIP_VERTICAL   = 2,
+};
+
+enum {
+    LEVEL_UNSET = -2,
+    LEVEL_AUTO  = -1,
+};
+
+typedef struct H264MetadataContext {
+    const AVClass *class;
+
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment access_unit;
+
+    int done_first_au;
+
+    int aud;
+
+    AVRational sample_aspect_ratio;
+
+    int video_format;
+    int video_full_range_flag;
+    int colour_primaries;
+    int transfer_characteristics;
+    int matrix_coefficients;
+
+    int chroma_sample_loc_type;
+
+    AVRational tick_rate;
+    int fixed_frame_rate_flag;
+
+    int crop_left;
+    int crop_right;
+    int crop_top;
+    int crop_bottom;
+
+    const char *sei_user_data;
+
+    int delete_filler;
+
+    int display_orientation;
+    double rotate;
+    int flip;
+
+    int level;
+} H264MetadataContext;
+
+
+static int h264_metadata_update_sps(AVBSFContext *bsf,
+                                    H264RawSPS *sps)
+{
+    H264MetadataContext *ctx = bsf->priv_data;
+    int need_vui = 0;
+    int crop_unit_x, crop_unit_y;
+
+    if (ctx->sample_aspect_ratio.num && ctx->sample_aspect_ratio.den) {
+        // Table E-1.
+        static const AVRational sar_idc[] = {
+            {   0,  0 }, // Unspecified (never written here).
+            {   1,  1 }, {  12, 11 }, {  10, 11 }, {  16, 11 },
+            {  40, 33 }, {  24, 11 }, {  20, 11 }, {  32, 11 },
+            {  80, 33 }, {  18, 11 }, {  15, 11 }, {  64, 33 },
+            { 160, 99 }, {   4,  3 }, {   3,  2 }, {   2,  1 },
+        };
+        int num, den, i;
+
+        av_reduce(&num, &den, ctx->sample_aspect_ratio.num,
+                  ctx->sample_aspect_ratio.den, 65535);
+
+        for (i = 1; i < FF_ARRAY_ELEMS(sar_idc); i++) {
+            if (num == sar_idc[i].num &&
+                den == sar_idc[i].den)
+                break;
+        }
+        if (i == FF_ARRAY_ELEMS(sar_idc)) {
+            sps->vui.aspect_ratio_idc = 255;
+            sps->vui.sar_width  = num;
+            sps->vui.sar_height = den;
+        } else {
+            sps->vui.aspect_ratio_idc = i;
+        }
+        sps->vui.aspect_ratio_info_present_flag = 1;
+        need_vui = 1;
+    }
+
+#define SET_OR_INFER(field, value, present_flag, infer) do { \
+        if (value >= 0) { \
+            field = value; \
+            need_vui = 1; \
+        } else if (!present_flag) \
+            field = infer; \
+    } while (0)
+
+    if (ctx->video_format             >= 0 ||
+        ctx->video_full_range_flag    >= 0 ||
+        ctx->colour_primaries         >= 0 ||
+        ctx->transfer_characteristics >= 0 ||
+        ctx->matrix_coefficients      >= 0) {
+
+        SET_OR_INFER(sps->vui.video_format, ctx->video_format,
+                     sps->vui.video_signal_type_present_flag, 5);
+
+        SET_OR_INFER(sps->vui.video_full_range_flag,
+                     ctx->video_full_range_flag,
+                     sps->vui.video_signal_type_present_flag, 0);
+
+        if (ctx->colour_primaries         >= 0 ||
+            ctx->transfer_characteristics >= 0 ||
+            ctx->matrix_coefficients      >= 0) {
+
+            SET_OR_INFER(sps->vui.colour_primaries,
+                         ctx->colour_primaries,
+                         sps->vui.colour_description_present_flag, 2);
+
+            SET_OR_INFER(sps->vui.transfer_characteristics,
+                         ctx->transfer_characteristics,
+                         sps->vui.colour_description_present_flag, 2);
+
+            SET_OR_INFER(sps->vui.matrix_coefficients,
+                         ctx->matrix_coefficients,
+                         sps->vui.colour_description_present_flag, 2);
+
+            sps->vui.colour_description_present_flag = 1;
+        }
+        sps->vui.video_signal_type_present_flag = 1;
+        need_vui = 1;
+    }
+
+    if (ctx->chroma_sample_loc_type >= 0) {
+        sps->vui.chroma_sample_loc_type_top_field =
+            ctx->chroma_sample_loc_type;
+        sps->vui.chroma_sample_loc_type_bottom_field =
+            ctx->chroma_sample_loc_type;
+        sps->vui.chroma_loc_info_present_flag = 1;
+        need_vui = 1;
+    }
+
+    if (ctx->tick_rate.num && ctx->tick_rate.den) {
+        int num, den;
+
+        av_reduce(&num, &den, ctx->tick_rate.num, ctx->tick_rate.den,
+                  UINT32_MAX > INT_MAX ? UINT32_MAX : INT_MAX);
+
+        sps->vui.time_scale        = num;
+        sps->vui.num_units_in_tick = den;
+
+        sps->vui.timing_info_present_flag = 1;
+        need_vui = 1;
+    }
+    SET_OR_INFER(sps->vui.fixed_frame_rate_flag,
+                 ctx->fixed_frame_rate_flag,
+                 sps->vui.timing_info_present_flag, 0);
+
+    if (sps->separate_colour_plane_flag || sps->chroma_format_idc == 0) {
+        crop_unit_x = 1;
+        crop_unit_y = 2 - sps->frame_mbs_only_flag;
+    } else {
+        crop_unit_x = 1 + (sps->chroma_format_idc < 3);
+        crop_unit_y = (1 + (sps->chroma_format_idc < 2)) *
+                       (2 - sps->frame_mbs_only_flag);
+    }
+#define CROP(border, unit) do { \
+        if (ctx->crop_ ## border >= 0) { \
+            if (ctx->crop_ ## border % unit != 0) { \
+                av_log(bsf, AV_LOG_ERROR, "Invalid value for crop_%s: " \
+                       "must be a multiple of %d.\n", #border, unit); \
+                return AVERROR(EINVAL); \
+            } \
+            sps->frame_crop_ ## border ## _offset = \
+                  ctx->crop_ ## border / unit; \
+            sps->frame_cropping_flag = 1; \
+        } \
+    } while (0)
+    CROP(left,   crop_unit_x);
+    CROP(right,  crop_unit_x);
+    CROP(top,    crop_unit_y);
+    CROP(bottom, crop_unit_y);
+#undef CROP
+
+    if (ctx->level != LEVEL_UNSET) {
+        int level_idc;
+
+        if (ctx->level == LEVEL_AUTO) {
+            const H264LevelDescriptor *desc;
+            int64_t bit_rate;
+            int width, height;
+
+            if (sps->vui.nal_hrd_parameters_present_flag) {
+                bit_rate = (sps->vui.nal_hrd_parameters.bit_rate_value_minus1[0] + 1) *
+                    (INT64_C(1) << (sps->vui.nal_hrd_parameters.bit_rate_scale + 6));
+            } else if (sps->vui.vcl_hrd_parameters_present_flag) {
+                bit_rate = (sps->vui.vcl_hrd_parameters.bit_rate_value_minus1[0] + 1) *
+                    (INT64_C(1) << (sps->vui.vcl_hrd_parameters.bit_rate_scale + 6));
+                // Adjust for VCL vs. NAL limits.
+                bit_rate = bit_rate * 6 / 5;
+            } else {
+                bit_rate = 0;
+            }
+
+            width  = 16 * (sps->pic_width_in_mbs_minus1 + 1);
+            height = 16 * (sps->pic_height_in_map_units_minus1 + 1) *
+                (2 - sps->frame_mbs_only_flag);
+
+            desc = ff_h264_guess_level(sps->profile_idc, bit_rate,
+                                       width, height,
+                                       sps->vui.max_dec_frame_buffering);
+            if (desc) {
+                level_idc = desc->level_idc;
+            } else {
+                av_log(bsf, AV_LOG_WARNING, "Stream does not appear to "
+                       "conform to any level: using level 6.2.\n");
+                level_idc = 62;
+            }
+        } else {
+            level_idc = ctx->level;
+        }
+
+        if (level_idc == 9) {
+            if (sps->profile_idc == 66 ||
+                sps->profile_idc == 77 ||
+                sps->profile_idc == 88) {
+                sps->level_idc = 10;
+                sps->constraint_set3_flag = 1;
+            } else {
+                sps->level_idc = 9;
+            }
+        } else {
+            sps->level_idc = level_idc;
+        }
+    }
+
+    if (need_vui)
+        sps->vui_parameters_present_flag = 1;
+
+    return 0;
+}
+
+static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
+{
+    H264MetadataContext *ctx = bsf->priv_data;
+    AVPacket *in = NULL;
+    CodedBitstreamFragment *au = &ctx->access_unit;
+    int err, i, j, has_sps;
+    H264RawAUD aud;
+    uint8_t *displaymatrix_side_data = NULL;
+    size_t displaymatrix_side_data_size = 0;
+
+    err = ff_bsf_get_packet(bsf, &in);
+    if (err < 0)
+        return err;
+
+    err = ff_cbs_read_packet(ctx->cbc, au, in);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to read packet.\n");
+        goto fail;
+    }
+
+    if (au->nb_units == 0) {
+        av_log(bsf, AV_LOG_ERROR, "No NAL units in packet.\n");
+        err = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    // If an AUD is present, it must be the first NAL unit.
+    if (au->units[0].type == H264_NAL_AUD) {
+        if (ctx->aud == REMOVE)
+            ff_cbs_delete_unit(ctx->cbc, au, 0);
+    } else {
+        if (ctx->aud == INSERT) {
+            static const int primary_pic_type_table[] = {
+                0x084, // 2, 7
+                0x0a5, // 0, 2, 5, 7
+                0x0e7, // 0, 1, 2, 5, 6, 7
+                0x210, // 4, 9
+                0x318, // 3, 4, 8, 9
+                0x294, // 2, 4, 7, 9
+                0x3bd, // 0, 2, 3, 4, 5, 7, 8, 9
+                0x3ff, // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
+            };
+            int primary_pic_type_mask = 0xff;
+
+            for (i = 0; i < au->nb_units; i++) {
+                if (au->units[i].type == H264_NAL_SLICE ||
+                    au->units[i].type == H264_NAL_IDR_SLICE) {
+                    H264RawSlice *slice = au->units[i].content;
+                    for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++) {
+                         if (!(primary_pic_type_table[j] &
+                               (1 << slice->header.slice_type)))
+                             primary_pic_type_mask &= ~(1 << j);
+                    }
+                }
+            }
+            for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++)
+                if (primary_pic_type_mask & (1 << j))
+                    break;
+            if (j >= FF_ARRAY_ELEMS(primary_pic_type_table)) {
+                av_log(bsf, AV_LOG_ERROR, "No usable primary_pic_type: "
+                       "invalid slice types?\n");
+                err = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+
+            aud = (H264RawAUD) {
+                .nal_unit_header.nal_unit_type = H264_NAL_AUD,
+                .primary_pic_type = j,
+            };
+
+            err = ff_cbs_insert_unit_content(ctx->cbc, au,
+                                             0, H264_NAL_AUD, &aud, NULL);
+            if (err < 0) {
+                av_log(bsf, AV_LOG_ERROR, "Failed to insert AUD.\n");
+                goto fail;
+            }
+        }
+    }
+
+    has_sps = 0;
+    for (i = 0; i < au->nb_units; i++) {
+        if (au->units[i].type == H264_NAL_SPS) {
+            err = h264_metadata_update_sps(bsf, au->units[i].content);
+            if (err < 0)
+                goto fail;
+            has_sps = 1;
+        }
+    }
+
+    // Only insert the SEI in access units containing SPSs, and also
+    // unconditionally in the first access unit we ever see.
+    if (ctx->sei_user_data && (has_sps || !ctx->done_first_au)) {
+        H264RawSEIPayload payload = {
+            .payload_type = H264_SEI_TYPE_USER_DATA_UNREGISTERED,
+        };
+        H264RawSEIUserDataUnregistered *udu =
+            &payload.payload.user_data_unregistered;
+
+        for (i = j = 0; j < 32 && ctx->sei_user_data[i]; i++) {
+            int c, v;
+            c = ctx->sei_user_data[i];
+            if (c == '-') {
+                continue;
+            } else if (av_isxdigit(c)) {
+                c = av_tolower(c);
+                v = (c <= '9' ? c - '0' : c - 'a' + 10);
+            } else {
+                goto invalid_user_data;
+            }
+            if (i & 1)
+                udu->uuid_iso_iec_11578[j / 2] |= v;
+            else
+                udu->uuid_iso_iec_11578[j / 2] = v << 4;
+            ++j;
+        }
+        if (j == 32 && ctx->sei_user_data[i] == '+') {
+            size_t len = strlen(ctx->sei_user_data + i + 1);
+
+            udu->data_ref = av_buffer_alloc(len + 1);
+            if (!udu->data_ref) {
+                err = AVERROR(ENOMEM);
+                goto fail;
+            }
+
+            udu->data        = udu->data_ref->data;
+            udu->data_length = len + 1;
+            memcpy(udu->data, ctx->sei_user_data + i + 1, len + 1);
+
+            err = ff_cbs_h264_add_sei_message(ctx->cbc, au, &payload);
+            if (err < 0) {
+                av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
+                       "message to access unit.\n");
+                goto fail;
+            }
+
+        } else {
+        invalid_user_data:
+            av_log(bsf, AV_LOG_ERROR, "Invalid user data: "
+                   "must be \"UUID+string\".\n");
+            err = AVERROR(EINVAL);
+            goto fail;
+        }
+    }
+
+    if (ctx->delete_filler) {
+        for (i = 0; i < au->nb_units; i++) {
+            if (au->units[i].type == H264_NAL_FILLER_DATA) {
+                // Filler NAL units.
+                err = ff_cbs_delete_unit(ctx->cbc, au, i);
+                if (err < 0) {
+                    av_log(bsf, AV_LOG_ERROR, "Failed to delete "
+                           "filler NAL.\n");
+                    goto fail;
+                }
+                --i;
+                continue;
+            }
+
+            if (au->units[i].type == H264_NAL_SEI) {
+                // Filler SEI messages.
+                H264RawSEI *sei = au->units[i].content;
+
+                for (j = 0; j < sei->payload_count; j++) {
+                    if (sei->payload[j].payload_type ==
+                        H264_SEI_TYPE_FILLER_PAYLOAD) {
+                        err = ff_cbs_h264_delete_sei_message(ctx->cbc, au,
+                                                             &au->units[i], j);
+                        if (err < 0) {
+                            av_log(bsf, AV_LOG_ERROR, "Failed to delete "
+                                   "filler SEI message.\n");
+                            goto fail;
+                        }
+                        // Renumbering might have happened, start again at
+                        // the same NAL unit position.
+                        --i;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    if (ctx->display_orientation != PASS) {
+        for (i = 0; i < au->nb_units; i++) {
+            H264RawSEI *sei;
+            if (au->units[i].type != H264_NAL_SEI)
+                continue;
+            sei = au->units[i].content;
+
+            for (j = 0; j < sei->payload_count; j++) {
+                H264RawSEIDisplayOrientation *disp;
+                int32_t *matrix;
+
+                if (sei->payload[j].payload_type !=
+                    H264_SEI_TYPE_DISPLAY_ORIENTATION)
+                    continue;
+                disp = &sei->payload[j].payload.display_orientation;
+
+                if (ctx->display_orientation == REMOVE ||
+                    ctx->display_orientation == INSERT) {
+                    err = ff_cbs_h264_delete_sei_message(ctx->cbc, au,
+                                                         &au->units[i], j);
+                    if (err < 0) {
+                        av_log(bsf, AV_LOG_ERROR, "Failed to delete "
+                               "display orientation SEI message.\n");
+                        goto fail;
+                    }
+                    --i;
+                    break;
+                }
+
+                matrix = av_mallocz(9 * sizeof(int32_t));
+                if (!matrix) {
+                    err = AVERROR(ENOMEM);
+                    goto fail;
+                }
+
+                av_display_rotation_set(matrix,
+                                        disp->anticlockwise_rotation *
+                                        180.0 / 65536.0);
+                av_display_matrix_flip(matrix, disp->hor_flip, disp->ver_flip);
+
+                // If there are multiple display orientation messages in an
+                // access unit then ignore all but the last one.
+                av_freep(&displaymatrix_side_data);
+
+                displaymatrix_side_data      = (uint8_t*)matrix;
+                displaymatrix_side_data_size = 9 * sizeof(int32_t);
+            }
+        }
+    }
+    if (ctx->display_orientation == INSERT) {
+        H264RawSEIPayload payload = {
+            .payload_type = H264_SEI_TYPE_DISPLAY_ORIENTATION,
+        };
+        H264RawSEIDisplayOrientation *disp =
+            &payload.payload.display_orientation;
+        uint8_t *data;
+        int size;
+        int write = 0;
+
+        data = av_packet_get_side_data(in, AV_PKT_DATA_DISPLAYMATRIX, &size);
+        if (data && size >= 9 * sizeof(int32_t)) {
+            int32_t matrix[9];
+            int hflip, vflip;
+            double angle;
+
+            memcpy(matrix, data, sizeof(matrix));
+
+            hflip = vflip = 0;
+            if (matrix[0] < 0 && matrix[4] > 0)
+                hflip = 1;
+            else if (matrix[0] > 0 && matrix[4] < 0)
+                vflip = 1;
+            av_display_matrix_flip(matrix, hflip, vflip);
+
+            angle = av_display_rotation_get(matrix);
+
+            if (!(angle >= -180.0 && angle <= 180.0 /* also excludes NaN */) ||
+                matrix[2] != 0 || matrix[5] != 0 ||
+                matrix[6] != 0 || matrix[7] != 0) {
+                av_log(bsf, AV_LOG_WARNING, "Input display matrix is not "
+                       "representable in H.264 parameters.\n");
+            } else {
+                disp->hor_flip = hflip;
+                disp->ver_flip = vflip;
+                disp->anticlockwise_rotation =
+                    (uint16_t)rint((angle >= 0.0 ? angle
+                                                 : angle + 360.0) *
+                                   65536.0 / 360.0);
+                write = 1;
+            }
+        }
+
+        if (has_sps || !ctx->done_first_au) {
+            if (!isnan(ctx->rotate)) {
+                disp->anticlockwise_rotation =
+                    (uint16_t)rint((ctx->rotate >= 0.0 ? ctx->rotate
+                                                       : ctx->rotate + 360.0) *
+                                   65536.0 / 360.0);
+                write = 1;
+            }
+            if (ctx->flip) {
+                disp->hor_flip = !!(ctx->flip & FLIP_HORIZONTAL);
+                disp->ver_flip = !!(ctx->flip & FLIP_VERTICAL);
+                write = 1;
+            }
+        }
+
+        if (write) {
+            disp->display_orientation_repetition_period = 1;
+
+            err = ff_cbs_h264_add_sei_message(ctx->cbc, au, &payload);
+            if (err < 0) {
+                av_log(bsf, AV_LOG_ERROR, "Failed to add display orientation "
+                       "SEI message to access unit.\n");
+                goto fail;
+            }
+        }
+    }
+
+    err = ff_cbs_write_packet(ctx->cbc, out, au);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
+        goto fail;
+    }
+
+    err = av_packet_copy_props(out, in);
+    if (err < 0)
+        goto fail;
+
+    if (displaymatrix_side_data) {
+        err = av_packet_add_side_data(out, AV_PKT_DATA_DISPLAYMATRIX,
+                                      displaymatrix_side_data,
+                                      displaymatrix_side_data_size);
+        if (err) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted "
+                   "displaymatrix side data to packet.\n");
+            goto fail;
+        }
+        displaymatrix_side_data = NULL;
+    }
+
+    ctx->done_first_au = 1;
+
+    err = 0;
+fail:
+    ff_cbs_fragment_uninit(ctx->cbc, au);
+    av_freep(&displaymatrix_side_data);
+
+    if (err < 0)
+        av_packet_unref(out);
+    av_packet_free(&in);
+
+    return err;
+}
+
+static int h264_metadata_init(AVBSFContext *bsf)
+{
+    H264MetadataContext *ctx = bsf->priv_data;
+    CodedBitstreamFragment *au = &ctx->access_unit;
+    int err, i;
+
+    err = ff_cbs_init(&ctx->cbc, AV_CODEC_ID_H264, bsf);
+    if (err < 0)
+        return err;
+
+    if (bsf->par_in->extradata) {
+        err = ff_cbs_read_extradata(ctx->cbc, au, bsf->par_in);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to read extradata.\n");
+            goto fail;
+        }
+
+        for (i = 0; i < au->nb_units; i++) {
+            if (au->units[i].type == H264_NAL_SPS) {
+                err = h264_metadata_update_sps(bsf, au->units[i].content);
+                if (err < 0)
+                    goto fail;
+            }
+        }
+
+        err = ff_cbs_write_extradata(ctx->cbc, bsf->par_out, au);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to write extradata.\n");
+            goto fail;
+        }
+    }
+
+    err = 0;
+fail:
+    ff_cbs_fragment_uninit(ctx->cbc, au);
+    return err;
+}
+
+static void h264_metadata_close(AVBSFContext *bsf)
+{
+    H264MetadataContext *ctx = bsf->priv_data;
+    ff_cbs_close(&ctx->cbc);
+}
+
+#define OFFSET(x) offsetof(H264MetadataContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_BSF_PARAM)
+static const AVOption h264_metadata_options[] = {
+    { "aud", "Access Unit Delimiter NAL units",
+        OFFSET(aud), AV_OPT_TYPE_INT,
+        { .i64 = PASS }, PASS, REMOVE, FLAGS, "aud" },
+    { "pass",   NULL, 0, AV_OPT_TYPE_CONST,
+        { .i64 = PASS   }, .flags = FLAGS, .unit = "aud" },
+    { "insert", NULL, 0, AV_OPT_TYPE_CONST,
+        { .i64 = INSERT }, .flags = FLAGS, .unit = "aud" },
+    { "remove", NULL, 0, AV_OPT_TYPE_CONST,
+        { .i64 = REMOVE }, .flags = FLAGS, .unit = "aud" },
+
+    { "sample_aspect_ratio", "Set sample aspect ratio (table E-1)",
+        OFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL,
+        { .dbl = 0.0 }, 0, 65535, FLAGS },
+
+    { "video_format", "Set video format (table E-2)",
+        OFFSET(video_format), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 7, FLAGS},
+    { "video_full_range_flag", "Set video full range flag",
+        OFFSET(video_full_range_flag), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 1, FLAGS },
+    { "colour_primaries", "Set colour primaries (table E-3)",
+        OFFSET(colour_primaries), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 255, FLAGS },
+    { "transfer_characteristics", "Set transfer characteristics (table E-4)",
+        OFFSET(transfer_characteristics), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 255, FLAGS },
+    { "matrix_coefficients", "Set matrix coefficients (table E-5)",
+        OFFSET(matrix_coefficients), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 255, FLAGS },
+
+    { "chroma_sample_loc_type", "Set chroma sample location type (figure E-1)",
+        OFFSET(chroma_sample_loc_type), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 6, FLAGS },
+
+    { "tick_rate", "Set VUI tick rate (num_units_in_tick / time_scale)",
+        OFFSET(tick_rate), AV_OPT_TYPE_RATIONAL,
+        { .dbl = 0.0 }, 0, UINT_MAX, FLAGS },
+    { "fixed_frame_rate_flag", "Set VUI fixed frame rate flag",
+        OFFSET(fixed_frame_rate_flag), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 1, FLAGS },
+
+    { "crop_left", "Set left border crop offset",
+        OFFSET(crop_left), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, H264_MAX_WIDTH, FLAGS },
+    { "crop_right", "Set right border crop offset",
+        OFFSET(crop_right), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, H264_MAX_WIDTH, FLAGS },
+    { "crop_top", "Set top border crop offset",
+        OFFSET(crop_top), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, H264_MAX_HEIGHT, FLAGS },
+    { "crop_bottom", "Set bottom border crop offset",
+        OFFSET(crop_bottom), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, H264_MAX_HEIGHT, FLAGS },
+
+    { "sei_user_data", "Insert SEI user data (UUID+string)",
+        OFFSET(sei_user_data), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
+
+    { "delete_filler", "Delete all filler (both NAL and SEI)",
+        OFFSET(delete_filler), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS},
+
+    { "display_orientation", "Display orientation SEI",
+        OFFSET(display_orientation), AV_OPT_TYPE_INT,
+        { .i64 = PASS }, PASS, EXTRACT, FLAGS, "disp_or" },
+    { "pass",    NULL, 0, AV_OPT_TYPE_CONST,
+        { .i64 = PASS    }, .flags = FLAGS, .unit = "disp_or" },
+    { "insert",  NULL, 0, AV_OPT_TYPE_CONST,
+        { .i64 = INSERT  }, .flags = FLAGS, .unit = "disp_or" },
+    { "remove",  NULL, 0, AV_OPT_TYPE_CONST,
+        { .i64 = REMOVE  }, .flags = FLAGS, .unit = "disp_or" },
+    { "extract", NULL, 0, AV_OPT_TYPE_CONST,
+        { .i64 = EXTRACT }, .flags = FLAGS, .unit = "disp_or" },
+
+    { "rotate", "Set rotation in display orientation SEI (anticlockwise angle in degrees)",
+        OFFSET(rotate), AV_OPT_TYPE_DOUBLE,
+        { .dbl = NAN }, -360.0, +360.0, FLAGS },
+    { "flip", "Set flip in display orientation SEI",
+        OFFSET(flip), AV_OPT_TYPE_FLAGS,
+        { .i64 = 0 }, 0, FLIP_HORIZONTAL | FLIP_VERTICAL, FLAGS, "flip" },
+    { "horizontal", "Set hor_flip",
+        0, AV_OPT_TYPE_CONST,
+        { .i64 = FLIP_HORIZONTAL }, .flags = FLAGS, .unit = "flip" },
+    { "vertical",   "Set ver_flip",
+        0, AV_OPT_TYPE_CONST,
+        { .i64 = FLIP_VERTICAL },   .flags = FLAGS, .unit = "flip" },
+
+    { "level", "Set level (table A-1)",
+        OFFSET(level), AV_OPT_TYPE_INT,
+        { .i64 = LEVEL_UNSET }, LEVEL_UNSET, 0xff, FLAGS, "level" },
+    { "auto", "Attempt to guess level from stream properties",
+        0, AV_OPT_TYPE_CONST,
+        { .i64 = LEVEL_AUTO }, .flags = FLAGS, .unit = "level" },
+#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
+        { .i64 = value },      .flags = FLAGS, .unit = "level"
+    { LEVEL("1",   10) },
+    { LEVEL("1b",   9) },
+    { LEVEL("1.1", 11) },
+    { LEVEL("1.2", 12) },
+    { LEVEL("1.3", 13) },
+    { LEVEL("2",   20) },
+    { LEVEL("2.1", 21) },
+    { LEVEL("2.2", 22) },
+    { LEVEL("3",   30) },
+    { LEVEL("3.1", 31) },
+    { LEVEL("3.2", 32) },
+    { LEVEL("4",   40) },
+    { LEVEL("4.1", 41) },
+    { LEVEL("4.2", 42) },
+    { LEVEL("5",   50) },
+    { LEVEL("5.1", 51) },
+    { LEVEL("5.2", 52) },
+    { LEVEL("6",   60) },
+    { LEVEL("6.1", 61) },
+    { LEVEL("6.2", 62) },
+#undef LEVEL
+
+    { NULL }
+};
+
+static const AVClass h264_metadata_class = {
+    .class_name = "h264_metadata_bsf",
+    .item_name  = av_default_item_name,
+    .option     = h264_metadata_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const enum AVCodecID h264_metadata_codec_ids[] = {
+    AV_CODEC_ID_H264, AV_CODEC_ID_NONE,
+};
+
+const AVBitStreamFilter ff_h264_metadata_bsf = {
+    .name           = "h264_metadata",
+    .priv_data_size = sizeof(H264MetadataContext),
+    .priv_class     = &h264_metadata_class,
+    .init           = &h264_metadata_init,
+    .close          = &h264_metadata_close,
+    .filter         = &h264_metadata_filter,
+    .codec_ids      = h264_metadata_codec_ids,
+};

diff --git a/libavcodec/h264_mp4toannexb_bsf.c b/libavcodec/h264_mp4toannexb_bsf.c
index 163d0f5..fb3f24e 100644
--- a/libavcodec/h264_mp4toannexb_bsf.c
+++ b/libavcodec/h264_mp4toannexb_bsf.c

@@ -26,6 +26,7 @@
 
 #include "avcodec.h"
 #include "bsf.h"
+#include "h264.h"
 
 typedef struct H264BSFContext {
     int32_t  sps_offset;
@@ -39,21 +40,21 @@
 
 static int alloc_and_copy(AVPacket *out,
                           const uint8_t *sps_pps, uint32_t sps_pps_size,
-                          const uint8_t *in, uint32_t in_size)
+                          const uint8_t *in, uint32_t in_size, int ps)
 {
     uint32_t offset         = out->size;
-    uint8_t nal_header_size = offset ? 3 : 4;
+    uint8_t start_code_size = offset == 0 || ps ? 4 : 3;
     int err;
 
-    err = av_grow_packet(out, sps_pps_size + in_size + nal_header_size);
+    err = av_grow_packet(out, sps_pps_size + in_size + start_code_size);
     if (err < 0)
         return err;
 
     if (sps_pps)
         memcpy(out->data + offset, sps_pps, sps_pps_size);
-    memcpy(out->data + sps_pps_size + nal_header_size + offset, in, in_size);
-    if (!offset) {
-        AV_WB32(out->data + sps_pps_size, 1);
+    memcpy(out->data + sps_pps_size + start_code_size + offset, in, in_size);
+    if (start_code_size == 4) {
+        AV_WB32(out->data + offset + sps_pps_size, 1);
     } else {
         (out->data + offset + sps_pps_size)[0] =
         (out->data + offset + sps_pps_size)[1] = 0;
@@ -209,9 +210,9 @@
         if (nal_size > buf_end - buf || nal_size < 0)
             goto fail;
 
-        if (unit_type == 7)
+        if (unit_type == H264_NAL_SPS)
             s->idr_sps_seen = s->new_idr = 1;
-        else if (unit_type == 8) {
+        else if (unit_type == H264_NAL_PPS) {
             s->idr_pps_seen = s->new_idr = 1;
             /* if SPS has not been seen yet, prepend the AVCC one to PPS */
             if (!s->idr_sps_seen) {
@@ -221,7 +222,7 @@
                     if ((ret = alloc_and_copy(out,
                                          ctx->par_out->extradata + s->sps_offset,
                                          s->pps_offset != -1 ? s->pps_offset : ctx->par_out->extradata_size - s->sps_offset,
-                                         buf, nal_size)) < 0)
+                                         buf, nal_size, 1)) < 0)
                         goto fail;
                     s->idr_sps_seen = 1;
                     goto next_nal;
@@ -232,30 +233,30 @@
         /* if this is a new IDR picture following an IDR picture, reset the idr flag.
          * Just check first_mb_in_slice to be 0 as this is the simplest solution.
          * This could be checking idr_pic_id instead, but would complexify the parsing. */
-        if (!s->new_idr && unit_type == 5 && (buf[1] & 0x80))
+        if (!s->new_idr && unit_type == H264_NAL_IDR_SLICE && (buf[1] & 0x80))
             s->new_idr = 1;
 
         /* prepend only to the first type 5 NAL unit of an IDR picture, if no sps/pps are already present */
-        if (s->new_idr && unit_type == 5 && !s->idr_sps_seen && !s->idr_pps_seen) {
+        if (s->new_idr && unit_type == H264_NAL_IDR_SLICE && !s->idr_sps_seen && !s->idr_pps_seen) {
             if ((ret=alloc_and_copy(out,
                                ctx->par_out->extradata, ctx->par_out->extradata_size,
-                               buf, nal_size)) < 0)
+                               buf, nal_size, 1)) < 0)
                 goto fail;
             s->new_idr = 0;
         /* if only SPS has been seen, also insert PPS */
-        } else if (s->new_idr && unit_type == 5 && s->idr_sps_seen && !s->idr_pps_seen) {
+        } else if (s->new_idr && unit_type == H264_NAL_IDR_SLICE && s->idr_sps_seen && !s->idr_pps_seen) {
             if (s->pps_offset == -1) {
                 av_log(ctx, AV_LOG_WARNING, "PPS not present in the stream, nor in AVCC, stream may be unreadable\n");
-                if ((ret = alloc_and_copy(out, NULL, 0, buf, nal_size)) < 0)
+                if ((ret = alloc_and_copy(out, NULL, 0, buf, nal_size, 0)) < 0)
                     goto fail;
             } else if ((ret = alloc_and_copy(out,
                                         ctx->par_out->extradata + s->pps_offset, ctx->par_out->extradata_size - s->pps_offset,
-                                        buf, nal_size)) < 0)
+                                        buf, nal_size, 1)) < 0)
                 goto fail;
         } else {
-            if ((ret=alloc_and_copy(out, NULL, 0, buf, nal_size)) < 0)
+            if ((ret=alloc_and_copy(out, NULL, 0, buf, nal_size, unit_type == H264_NAL_SPS || unit_type == H264_NAL_PPS)) < 0)
                 goto fail;
-            if (!s->new_idr && unit_type == 1) {
+            if (!s->new_idr && unit_type == H264_NAL_SLICE) {
                 s->new_idr = 1;
                 s->idr_sps_seen = 0;
                 s->idr_pps_seen = 0;
@@ -279,6 +280,15 @@
     return ret;
 }
 
+static void h264_mp4toannexb_flush(AVBSFContext *ctx)
+{
+    H264BSFContext *s = ctx->priv_data;
+
+    s->idr_sps_seen = 0;
+    s->idr_pps_seen = 0;
+    s->new_idr      = s->extradata_parsed;
+}
+
 static const enum AVCodecID codec_ids[] = {
     AV_CODEC_ID_H264, AV_CODEC_ID_NONE,
 };
@@ -288,5 +298,6 @@
     .priv_data_size = sizeof(H264BSFContext),
     .init           = h264_mp4toannexb_init,
     .filter         = h264_mp4toannexb_filter,
+    .flush          = h264_mp4toannexb_flush,
     .codec_ids      = codec_ids,
 };

diff --git a/libavcodec/h264_parse.c b/libavcodec/h264_parse.c
index fee28d9..34ffe3b 100644
--- a/libavcodec/h264_parse.c
+++ b/libavcodec/h264_parse.c

@@ -82,8 +82,11 @@
                         pwt->chroma_weight[i][list][j][0] = get_se_golomb(gb);
                         pwt->chroma_weight[i][list][j][1] = get_se_golomb(gb);
                         if ((int8_t)pwt->chroma_weight[i][list][j][0] != pwt->chroma_weight[i][list][j][0] ||
-                            (int8_t)pwt->chroma_weight[i][list][j][1] != pwt->chroma_weight[i][list][j][1])
+                            (int8_t)pwt->chroma_weight[i][list][j][1] != pwt->chroma_weight[i][list][j][1]) {
+                            pwt->chroma_weight[i][list][j][0] = chroma_def;
+                            pwt->chroma_weight[i][list][j][1] = 0;
                             goto out_range_weight;
+                        }
                         if (pwt->chroma_weight[i][list][j][0] != chroma_def ||
                             pwt->chroma_weight[i][list][j][1] != 0) {
                             pwt->use_weight_chroma        = 1;
@@ -117,7 +120,7 @@
     pwt->use_weight = pwt->use_weight || pwt->use_weight_chroma;
     return 0;
 out_range_weight:
-    avpriv_request_sample(logctx, "Out of range weight\n");
+    avpriv_request_sample(logctx, "Out of range weight");
     return AVERROR_INVALIDDATA;
 }
 
@@ -271,7 +274,7 @@
                      int picture_structure, int nal_ref_idc)
 {
     const int max_frame_num = 1 << sps->log2_max_frame_num;
-    int field_poc[2];
+    int64_t field_poc[2];
 
     pc->frame_num_offset = pc->prev_frame_num_offset;
     if (pc->frame_num < pc->prev_frame_num)
@@ -337,6 +340,10 @@
         field_poc[1] = poc;
     }
 
+    if (   field_poc[0] != (int)field_poc[0]
+        || field_poc[1] != (int)field_poc[1])
+        return AVERROR_INVALIDDATA;
+
     if (picture_structure != PICT_BOTTOM_FIELD)
         pic_field_poc[0] = field_poc[0];
     if (picture_structure != PICT_TOP_FIELD)

diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
index dd0a965..5f9a9c4 100644
--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c

@@ -121,20 +121,23 @@
             }
             state = 7;
         } else {
+            unsigned int mb, last_mb = p->parse_last_mb;
+            GetBitContext gb;
             p->parse_history[p->parse_history_count++] = buf[i];
-            if (p->parse_history_count > 5) {
-                unsigned int mb, last_mb = p->parse_last_mb;
-                GetBitContext gb;
 
-                init_get_bits(&gb, p->parse_history, 8*p->parse_history_count);
-                p->parse_history_count = 0;
-                mb= get_ue_golomb_long(&gb);
+            init_get_bits(&gb, p->parse_history, 8*p->parse_history_count);
+            mb= get_ue_golomb_long(&gb);
+            if (get_bits_left(&gb) > 0 || p->parse_history_count > 5) {
                 p->parse_last_mb = mb;
                 if (pc->frame_start_found) {
-                    if (mb <= last_mb)
+                    if (mb <= last_mb) {
+                        i -= p->parse_history_count - 1;
+                        p->parse_history_count = 0;
                         goto found;
+                    }
                 } else
                     pc->frame_start_found = 1;
+                p->parse_history_count = 0;
                 state = 7;
             }
         }
@@ -149,7 +152,7 @@
     pc->frame_start_found = 0;
     if (p->is_avc)
         return next_avc;
-    return i - (state & 5) - 5 * (state > 7);
+    return i - (state & 5);
 }
 
 static int scan_mmco_reset(AVCodecParserContext *s, GetBitContext *gb,
@@ -243,6 +246,7 @@
                                   const uint8_t * const buf, int buf_size)
 {
     H264ParseContext *p = s->priv_data;
+    H2645RBSP rbsp = { NULL };
     H2645NAL nal = { NULL };
     int buf_index, next_avc;
     unsigned int pps_id;
@@ -258,11 +262,15 @@
     s->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
 
     ff_h264_sei_uninit(&p->sei);
-    p->sei.frame_packing.frame_packing_arrangement_cancel_flag = -1;
+    p->sei.frame_packing.arrangement_cancel_flag = -1;
 
     if (!buf_size)
         return 0;
 
+    av_fast_padded_malloc(&rbsp.rbsp_buffer, &rbsp.rbsp_buffer_alloc_size, buf_size);
+    if (!rbsp.rbsp_buffer)
+        return AVERROR(ENOMEM);
+
     buf_index     = 0;
     next_avc      = p->is_avc ? 0 : buf_size;
     for (;;) {
@@ -300,7 +308,7 @@
             }
             break;
         }
-        consumed = ff_h2645_extract_rbsp(buf + buf_index, src_length, &nal, 1);
+        consumed = ff_h2645_extract_rbsp(buf + buf_index, src_length, &rbsp, &nal, 1);
         if (consumed < 0)
             break;
 
@@ -444,8 +452,10 @@
             /* Decode POC of this picture.
              * The prev_ values needed for decoding POC of the next picture are not set here. */
             field_poc[0] = field_poc[1] = INT_MAX;
-            ff_h264_init_poc(field_poc, &s->output_picture_number, sps,
+            ret = ff_h264_init_poc(field_poc, &s->output_picture_number, sps,
                              &p->poc, p->picture_structure, nal.ref_idc);
+            if (ret < 0)
+                goto fail;
 
             /* Continue parsing to check if MMCO_RESET is present.
              * FIXME: MMCO_RESET could appear in non-first slice.
@@ -544,18 +554,18 @@
                 p->last_frame_num = p->poc.frame_num;
             }
 
-            av_freep(&nal.rbsp_buffer);
+            av_freep(&rbsp.rbsp_buffer);
             return 0; /* no need to evaluate the rest */
         }
     }
     if (q264) {
-        av_freep(&nal.rbsp_buffer);
+        av_freep(&rbsp.rbsp_buffer);
         return 0;
     }
     /* didn't find a picture! */
     av_log(avctx, AV_LOG_ERROR, "missing picture in access unit with size %d\n", buf_size);
 fail:
-    av_freep(&nal.rbsp_buffer);
+    av_freep(&rbsp.rbsp_buffer);
     return -1;
 }
 

diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index 99d9f90..e833835 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c

@@ -41,7 +41,6 @@
 #include "mpegutils.h"
 #include "rectangle.h"
 #include "thread.h"
-#include "vdpau_compat.h"
 
 void ff_h264_unref_picture(H264Context *h, H264Picture *pic)
 {
@@ -79,24 +78,30 @@
 
     dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf);
     dst->mb_type_buf      = av_buffer_ref(src->mb_type_buf);
-    if (!dst->qscale_table_buf || !dst->mb_type_buf)
+    if (!dst->qscale_table_buf || !dst->mb_type_buf) {
+        ret = AVERROR(ENOMEM);
         goto fail;
+    }
     dst->qscale_table = src->qscale_table;
     dst->mb_type      = src->mb_type;
 
     for (i = 0; i < 2; i++) {
         dst->motion_val_buf[i] = av_buffer_ref(src->motion_val_buf[i]);
         dst->ref_index_buf[i]  = av_buffer_ref(src->ref_index_buf[i]);
-        if (!dst->motion_val_buf[i] || !dst->ref_index_buf[i])
+        if (!dst->motion_val_buf[i] || !dst->ref_index_buf[i]) {
+            ret = AVERROR(ENOMEM);
             goto fail;
+        }
         dst->motion_val[i] = src->motion_val[i];
         dst->ref_index[i]  = src->ref_index[i];
     }
 
     if (src->hwaccel_picture_private) {
         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
-        if (!dst->hwaccel_priv_buf)
+        if (!dst->hwaccel_priv_buf) {
+            ret = AVERROR(ENOMEM);
             goto fail;
+        }
         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
     }
 
@@ -152,12 +157,6 @@
     int err = 0;
     h->mb_y = 0;
 
-#if FF_API_CAP_VDPAU
-    if (CONFIG_H264_VDPAU_DECODER &&
-        h->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU)
-        ff_vdpau_h264_set_reference_frames(h);
-#endif
-
     if (in_setup || !(avctx->active_thread_type & FF_THREAD_FRAME)) {
         if (!h->droppable) {
             err = ff_h264_execute_ref_pic_marking(h);
@@ -175,12 +174,6 @@
                    "hardware accelerator failed to decode picture\n");
     }
 
-#if FF_API_CAP_VDPAU
-    if (CONFIG_H264_VDPAU_DECODER &&
-        h->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU)
-        ff_vdpau_h264_picture_complete(h);
-#endif
-
     if (!in_setup && !h->droppable)
         ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
                                   h->picture_structure == PICT_BOTTOM_FIELD);

diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index b7d5f65..17bfa78 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c

@@ -35,7 +35,6 @@
 #include "h264_ps.h"
 #include "golomb.h"
 
-#define MAX_LOG2_MAX_FRAME_NUM    (12 + 4)
 #define MIN_LOG2_MAX_FRAME_NUM    4
 
 #define EXTENDED_SAR       255
@@ -348,7 +347,7 @@
 
     sps->data_size = gb->buffer_end - gb->buffer;
     if (sps->data_size > sizeof(sps->data)) {
-        av_log(avctx, AV_LOG_WARNING, "Truncating likely oversized SPS\n");
+        av_log(avctx, AV_LOG_DEBUG, "Truncating likely oversized SPS\n");
         sps->data_size = sizeof(sps->data);
     }
     memcpy(sps->data, gb->buffer, sps->data_size);
@@ -745,7 +744,7 @@
 
     pps->data_size = gb->buffer_end - gb->buffer;
     if (pps->data_size > sizeof(pps->data)) {
-        av_log(avctx, AV_LOG_WARNING, "Truncating likely oversized PPS "
+        av_log(avctx, AV_LOG_DEBUG, "Truncating likely oversized PPS "
                "(%"SIZE_SPECIFIER" > %"SIZE_SPECIFIER")\n",
                pps->data_size, sizeof(pps->data));
         pps->data_size = sizeof(pps->data);

diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index 51b6694..e967b9c 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h

@@ -36,6 +36,7 @@
 
 #define MAX_SPS_COUNT          32
 #define MAX_PPS_COUNT         256
+#define MAX_LOG2_MAX_FRAME_NUM    (12 + 4)
 
 /**
  * Sequence parameter set

diff --git a/libavcodec/h264_redundant_pps_bsf.c b/libavcodec/h264_redundant_pps_bsf.c
new file mode 100644
index 0000000..cc5a306
--- /dev/null
+++ b/libavcodec/h264_redundant_pps_bsf.c

@@ -0,0 +1,187 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/mem.h"
+
+#include "bsf.h"
+#include "cbs.h"
+#include "cbs_h264.h"
+#include "h264.h"
+
+
+typedef struct H264RedundantPPSContext {
+    CodedBitstreamContext *input;
+    CodedBitstreamContext *output;
+
+    CodedBitstreamFragment access_unit;
+
+    int global_pic_init_qp;
+    int current_pic_init_qp;
+    int extradata_pic_init_qp;
+} H264RedundantPPSContext;
+
+
+static int h264_redundant_pps_fixup_pps(H264RedundantPPSContext *ctx,
+                                        H264RawPPS *pps)
+{
+    // Record the current value of pic_init_qp in order to fix up
+    // following slices, then overwrite with the global value.
+    ctx->current_pic_init_qp = pps->pic_init_qp_minus26 + 26;
+    pps->pic_init_qp_minus26 = ctx->global_pic_init_qp - 26;
+
+    // Some PPSs have this set, so it must be set in all of them.
+    // (Slices which do not use such a PPS on input will still have
+    // *_weight_l*flag as zero and therefore write equivalently.)
+    pps->weighted_pred_flag = 1;
+
+    return 0;
+}
+
+static int h264_redundant_pps_fixup_slice(H264RedundantPPSContext *ctx,
+                                          H264RawSliceHeader *slice)
+{
+    int qp;
+
+    qp = ctx->current_pic_init_qp + slice->slice_qp_delta;
+    slice->slice_qp_delta = qp - ctx->global_pic_init_qp;
+
+    return 0;
+}
+
+static int h264_redundant_pps_filter(AVBSFContext *bsf, AVPacket *out)
+{
+    H264RedundantPPSContext *ctx = bsf->priv_data;
+    AVPacket *in;
+    CodedBitstreamFragment *au = &ctx->access_unit;
+    int au_has_sps;
+    int err, i;
+
+    err = ff_bsf_get_packet(bsf, &in);
+    if (err < 0)
+        return err;
+
+    err = ff_cbs_read_packet(ctx->input, au, in);
+    if (err < 0)
+        return err;
+
+    au_has_sps = 0;
+    for (i = 0; i < au->nb_units; i++) {
+        CodedBitstreamUnit *nal = &au->units[i];
+
+        if (nal->type == H264_NAL_SPS)
+            au_has_sps = 1;
+        if (nal->type == H264_NAL_PPS) {
+            h264_redundant_pps_fixup_pps(ctx, nal->content);
+            if (!au_has_sps) {
+                av_log(ctx, AV_LOG_VERBOSE, "Deleting redundant PPS "
+                       "at %"PRId64".\n", in->pts);
+                ff_cbs_delete_unit(ctx->input, au, i);
+            }
+        }
+        if (nal->type == H264_NAL_SLICE ||
+            nal->type == H264_NAL_IDR_SLICE) {
+            H264RawSlice *slice = nal->content;
+            h264_redundant_pps_fixup_slice(ctx, &slice->header);
+        }
+    }
+
+    err = ff_cbs_write_packet(ctx->output, out, au);
+    if (err < 0)
+        return err;
+
+    ff_cbs_fragment_uninit(ctx->output, au);
+
+    err = av_packet_copy_props(out, in);
+    if (err < 0)
+        return err;
+
+    av_packet_free(&in);
+
+    return 0;
+}
+
+static int h264_redundant_pps_init(AVBSFContext *bsf)
+{
+    H264RedundantPPSContext *ctx = bsf->priv_data;
+    CodedBitstreamFragment *au = &ctx->access_unit;
+    int err, i;
+
+    err = ff_cbs_init(&ctx->input, AV_CODEC_ID_H264, bsf);
+    if (err < 0)
+        return err;
+
+    err = ff_cbs_init(&ctx->output, AV_CODEC_ID_H264, bsf);
+    if (err < 0)
+        return err;
+
+    ctx->global_pic_init_qp = 26;
+
+    if (bsf->par_in->extradata) {
+        err = ff_cbs_read_extradata(ctx->input, au, bsf->par_in);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to read extradata.\n");
+            return err;
+        }
+
+        for (i = 0; i < au->nb_units; i++) {
+            if (au->units[i].type == H264_NAL_PPS)
+                h264_redundant_pps_fixup_pps(ctx, au->units[i].content);
+        }
+
+        ctx->extradata_pic_init_qp = ctx->current_pic_init_qp;
+        err = ff_cbs_write_extradata(ctx->output, bsf->par_out, au);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to write extradata.\n");
+            return err;
+        }
+
+        ff_cbs_fragment_uninit(ctx->output, au);
+    }
+
+    return 0;
+}
+
+static void h264_redundant_pps_flush(AVBSFContext *bsf)
+{
+    H264RedundantPPSContext *ctx = bsf->priv_data;
+    ctx->current_pic_init_qp = ctx->extradata_pic_init_qp;
+}
+
+static void h264_redundant_pps_close(AVBSFContext *bsf)
+{
+    H264RedundantPPSContext *ctx = bsf->priv_data;
+    ff_cbs_close(&ctx->input);
+    ff_cbs_close(&ctx->output);
+}
+
+static const enum AVCodecID h264_redundant_pps_codec_ids[] = {
+    AV_CODEC_ID_H264, AV_CODEC_ID_NONE,
+};
+
+const AVBitStreamFilter ff_h264_redundant_pps_bsf = {
+    .name           = "h264_redundant_pps",
+    .priv_data_size = sizeof(H264RedundantPPSContext),
+    .init           = &h264_redundant_pps_init,
+    .flush          = &h264_redundant_pps_flush,
+    .close          = &h264_redundant_pps_close,
+    .filter         = &h264_redundant_pps_filter,
+    .codec_ids      = h264_redundant_pps_codec_ids,
+};

diff --git a/libavcodec/h264_refs.c b/libavcodec/h264_refs.c
index af70829..eaf965e 100644
--- a/libavcodec/h264_refs.c
+++ b/libavcodec/h264_refs.c

@@ -614,6 +614,12 @@
     int current_ref_assigned = 0, err = 0;
     H264Picture *av_uninit(pic);
 
+    if (!h->ps.sps) {
+        av_log(h->avctx, AV_LOG_ERROR, "SPS is unset\n");
+        err = AVERROR_INVALIDDATA;
+        goto out;
+    }
+
     if (!h->explicit_ref_marking)
         generate_sliding_window_mmcos(h);
     mmco_count = h->nb_mmco;
@@ -806,9 +812,11 @@
         }
     }
 
+    // Detect unmarked random access points
     if (   err >= 0
         && h->long_ref_count==0
         && (   h->short_ref_count<=2
+            || pps_ref_count[0] <= 2 && pps_ref_count[1] <= 1 && h->avctx->has_b_frames
             || pps_ref_count[0] <= 1 + (h->picture_structure != PICT_FRAME) && pps_ref_count[1] <= 1)
         && pps_ref_count[0]<=2 + (h->picture_structure != PICT_FRAME) + (2*!h->has_recovery_point)
         && h->cur_pic_ptr->f->pict_type == AV_PICTURE_TYPE_I){
@@ -817,6 +825,7 @@
             h->frame_recovered |= FRAME_RECOVERED_SEI;
     }
 
+out:
     return (h->avctx->err_recognition & AV_EF_EXPLODE) ? err : 0;
 }
 

diff --git a/libavcodec/h264_sei.c b/libavcodec/h264_sei.c
index 332ae50..43593d3 100644
--- a/libavcodec/h264_sei.c
+++ b/libavcodec/h264_sei.c

@@ -51,8 +51,7 @@
     h->display_orientation.present = 0;
     h->afd.present                 =  0;
 
-    h->a53_caption.a53_caption_size = 0;
-    av_freep(&h->a53_caption.a53_caption);
+    av_buffer_unref(&h->a53_caption.buf_ref);
 }
 
 static int decode_picture_timing(H264SEIPictureTiming *h, GetBitContext *gb,
@@ -169,7 +168,8 @@
             size -= 2;
 
             if (cc_count && size >= cc_count * 3) {
-                const uint64_t new_size = (h->a53_caption_size + cc_count
+                int old_size = h->buf_ref ? h->buf_ref->size : 0;
+                const uint64_t new_size = (old_size + cc_count
                                            * UINT64_C(3));
                 int i, ret;
 
@@ -177,14 +177,15 @@
                     return AVERROR(EINVAL);
 
                 /* Allow merging of the cc data from two fields. */
-                ret = av_reallocp(&h->a53_caption, new_size);
+                ret = av_buffer_realloc(&h->buf_ref, new_size);
                 if (ret < 0)
                     return ret;
 
+                /* Use of av_buffer_realloc assumes buffer is writeable */
                 for (i = 0; i < cc_count; i++) {
-                    h->a53_caption[h->a53_caption_size++] = get_bits(gb, 8);
-                    h->a53_caption[h->a53_caption_size++] = get_bits(gb, 8);
-                    h->a53_caption[h->a53_caption_size++] = get_bits(gb, 8);
+                    h->buf_ref->data[old_size++] = get_bits(gb, 8);
+                    h->buf_ref->data[old_size++] = get_bits(gb, 8);
+                    h->buf_ref->data[old_size++] = get_bits(gb, 8);
                 }
 
                 skip_bits(gb, 8);   // marker_bits
@@ -257,17 +258,20 @@
     if (e == 1 && build == 1 && !strncmp(user_data+16, "x264 - core 0000", 16))
         h->x264_build = 67;
 
-    if (strlen(user_data + 16) > 0)
-        av_log(logctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data + 16);
-
     av_free(user_data);
     return 0;
 }
 
-static int decode_recovery_point(H264SEIRecoveryPoint *h, GetBitContext *gb)
+static int decode_recovery_point(H264SEIRecoveryPoint *h, GetBitContext *gb, void *logctx)
 {
-    h->recovery_frame_cnt = get_ue_golomb_long(gb);
+    unsigned recovery_frame_cnt = get_ue_golomb_long(gb);
 
+    if (recovery_frame_cnt >= (1<<MAX_LOG2_MAX_FRAME_NUM)) {
+        av_log(logctx, AV_LOG_ERROR, "recovery_frame_cnt %u is out of range\n", recovery_frame_cnt);
+        return AVERROR_INVALIDDATA;
+    }
+
+    h->recovery_frame_cnt = recovery_frame_cnt;
     /* 1b exact_match_flag,
      * 1b broken_link_flag,
      * 2b changing_slice_group_idc */
@@ -316,24 +320,25 @@
 static int decode_frame_packing_arrangement(H264SEIFramePacking *h,
                                             GetBitContext *gb)
 {
-    h->frame_packing_arrangement_id          = get_ue_golomb_long(gb);
-    h->frame_packing_arrangement_cancel_flag = get_bits1(gb);
-    h->present = !h->frame_packing_arrangement_cancel_flag;
+    h->arrangement_id          = get_ue_golomb_long(gb);
+    h->arrangement_cancel_flag = get_bits1(gb);
+    h->present = !h->arrangement_cancel_flag;
 
     if (h->present) {
-        h->frame_packing_arrangement_type = get_bits(gb, 7);
+        h->arrangement_type = get_bits(gb, 7);
         h->quincunx_sampling_flag         = get_bits1(gb);
         h->content_interpretation_type    = get_bits(gb, 6);
 
-        // the following skips: spatial_flipping_flag, frame0_flipped_flag,
-        // field_views_flag, current_frame_is_frame0_flag,
+        // spatial_flipping_flag, frame0_flipped_flag, field_views_flag
+        skip_bits(gb, 3);
+        h->current_frame_is_frame0_flag = get_bits1(gb);
         // frame0_self_contained_flag, frame1_self_contained_flag
-        skip_bits(gb, 6);
+        skip_bits(gb, 2);
 
-        if (!h->quincunx_sampling_flag && h->frame_packing_arrangement_type != 5)
+        if (!h->quincunx_sampling_flag && h->arrangement_type != 5)
             skip_bits(gb, 16);      // frame[01]_grid_position_[xy]
         skip_bits(gb, 8);           // frame_packing_arrangement_reserved_byte
-        h->frame_packing_arrangement_repetition_period = get_ue_golomb_long(gb);
+        h->arrangement_repetition_period = get_ue_golomb_long(gb);
     }
     skip_bits1(gb);                 // frame_packing_arrangement_extension_flag
 
@@ -431,7 +436,7 @@
             ret = decode_unregistered_user_data(&h->unregistered, gb, logctx, size);
             break;
         case H264_SEI_TYPE_RECOVERY_POINT:
-            ret = decode_recovery_point(&h->recovery_point, gb);
+            ret = decode_recovery_point(&h->recovery_point, gb, logctx);
             break;
         case H264_SEI_TYPE_BUFFERING_PERIOD:
             ret = decode_buffering_period(&h->buffering_period, gb, ps, logctx);
@@ -467,8 +472,8 @@
 
 const char *ff_h264_sei_stereo_mode(const H264SEIFramePacking *h)
 {
-    if (h->frame_packing_arrangement_cancel_flag == 0) {
-        switch (h->frame_packing_arrangement_type) {
+    if (h->arrangement_cancel_flag == 0) {
+        switch (h->arrangement_type) {
             case H264_SEI_FPA_TYPE_CHECKERBOARD:
                 if (h->content_interpretation_type == 2)
                     return "checkerboard_rl";
@@ -503,7 +508,7 @@
             default:
                 return "mono";
         }
-    } else if (h->frame_packing_arrangement_cancel_flag == 1) {
+    } else if (h->arrangement_cancel_flag == 1) {
         return "mono";
     } else {
         return NULL;

diff --git a/libavcodec/h264_sei.h b/libavcodec/h264_sei.h
index a53f189..5b7c8ef 100644
--- a/libavcodec/h264_sei.h
+++ b/libavcodec/h264_sei.h

@@ -27,6 +27,7 @@
 typedef enum {
     H264_SEI_TYPE_BUFFERING_PERIOD       = 0,   ///< buffering period (H.264, D.1.1)
     H264_SEI_TYPE_PIC_TIMING             = 1,   ///< picture timing
+    H264_SEI_TYPE_PAN_SCAN_RECT          = 2,   ///< pan-scan rectangle
     H264_SEI_TYPE_FILLER_PAYLOAD         = 3,   ///< filler data
     H264_SEI_TYPE_USER_DATA_REGISTERED   = 4,   ///< registered user data as specified by Rec. ITU-T T.35
     H264_SEI_TYPE_USER_DATA_UNREGISTERED = 5,   ///< unregistered user data
@@ -34,6 +35,7 @@
     H264_SEI_TYPE_FRAME_PACKING          = 45,  ///< frame packing arrangement
     H264_SEI_TYPE_DISPLAY_ORIENTATION    = 47,  ///< display orientation
     H264_SEI_TYPE_GREEN_METADATA         = 56,  ///< GreenMPEG information
+    H264_SEI_TYPE_MASTERING_DISPLAY_COLOUR_VOLUME = 137,  ///< mastering display properties
     H264_SEI_TYPE_ALTERNATIVE_TRANSFER   = 147, ///< alternative transfer
 } H264_SEI_Type;
 
@@ -93,8 +95,7 @@
 } H264SEIAFD;
 
 typedef struct H264SEIA53Caption {
-    int a53_caption_size;
-    uint8_t *a53_caption;
+    AVBufferRef *buf_ref;
 } H264SEIA53Caption;
 
 typedef struct H264SEIUnregistered {
@@ -119,12 +120,13 @@
 
 typedef struct H264SEIFramePacking {
     int present;
-    int frame_packing_arrangement_id;
-    int frame_packing_arrangement_cancel_flag;  ///< is previous arrangement canceled, -1 if never received
-    H264_SEI_FpaType frame_packing_arrangement_type;
-    int frame_packing_arrangement_repetition_period;
+    int arrangement_id;
+    int arrangement_cancel_flag;  ///< is previous arrangement canceled, -1 if never received
+    H264_SEI_FpaType arrangement_type;
+    int arrangement_repetition_period;
     int content_interpretation_type;
     int quincunx_sampling_flag;
+    int current_frame_is_frame0_flag;
 } H264SEIFramePacking;
 
 typedef struct H264SEIDisplayOrientation {

diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index cf1b22f..d09cee4 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c

@@ -358,6 +358,7 @@
         h->mb_num    = h1->mb_num;
         h->mb_stride = h1->mb_stride;
         h->b_stride  = h1->b_stride;
+        h->x264_build = h1->x264_build;
 
         if (h->context_initialized || h1->context_initialized) {
             if ((err = h264_slice_header_init(h)) < 0) {
@@ -404,7 +405,6 @@
     // extradata/NAL handling
     h->is_avc = h1->is_avc;
     h->nal_length_size = h1->nal_length_size;
-    h->sei.unregistered.x264_build = h1->sei.unregistered.x264_build;
 
     memcpy(&h->poc,        &h1->poc,        sizeof(h->poc));
 
@@ -430,6 +430,13 @@
 
     h->frame_recovered       = h1->frame_recovered;
 
+    av_buffer_unref(&h->sei.a53_caption.buf_ref);
+    if (h1->sei.a53_caption.buf_ref) {
+        h->sei.a53_caption.buf_ref = av_buffer_ref(h1->sei.a53_caption.buf_ref);
+        if (!h->sei.a53_caption.buf_ref)
+            return AVERROR(ENOMEM);
+    }
+
     if (!h->cur_pic_ptr)
         return 0;
 
@@ -497,11 +504,7 @@
 
     if ((ret = alloc_picture(h, pic)) < 0)
         return ret;
-    if(!h->frame_recovered && !h->avctx->hwaccel
-#if FF_API_CAP_VDPAU
-       && !(h->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU)
-#endif
-       )
+    if(!h->frame_recovered && !h->avctx->hwaccel)
         ff_color_frame(pic->f, c);
 
     h->cur_pic_ptr = pic;
@@ -549,6 +552,9 @@
 
     h->mb_aff_frame = h->ps.sps->mb_aff && (h->picture_structure == PICT_FRAME);
 
+    if (h->sei.unregistered.x264_build >= 0)
+        h->x264_build = h->sei.unregistered.x264_build;
+
     assert(h->cur_pic_ptr->long_ref == 0);
 
     return 0;
@@ -758,8 +764,8 @@
 {
 #define HWACCEL_MAX (CONFIG_H264_DXVA2_HWACCEL + \
                      (CONFIG_H264_D3D11VA_HWACCEL * 2) + \
+                     CONFIG_H264_NVDEC_HWACCEL + \
                      CONFIG_H264_VAAPI_HWACCEL + \
-                     (CONFIG_H264_VDA_HWACCEL * 2) + \
                      CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \
                      CONFIG_H264_VDPAU_HWACCEL)
     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
@@ -815,6 +821,9 @@
 #if CONFIG_H264_VDPAU_HWACCEL
         *fmt++ = AV_PIX_FMT_VDPAU;
 #endif
+#if CONFIG_H264_NVDEC_HWACCEL
+        *fmt++ = AV_PIX_FMT_CUDA;
+#endif
         if (CHROMA444(h)) {
             if (h->avctx->colorspace == AVCOL_SPC_RGB)
                 *fmt++ = AV_PIX_FMT_GBRP;
@@ -838,10 +847,6 @@
 #if CONFIG_H264_VAAPI_HWACCEL
             *fmt++ = AV_PIX_FMT_VAAPI;
 #endif
-#if CONFIG_H264_VDA_HWACCEL
-            *fmt++ = AV_PIX_FMT_VDA_VLD;
-            *fmt++ = AV_PIX_FMT_VDA;
-#endif
 #if CONFIG_H264_VIDEOTOOLBOX_HWACCEL
             *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
 #endif
@@ -921,7 +926,7 @@
 
     if (sps->timing_info_present_flag) {
         int64_t den = sps->time_scale;
-        if (h->sei.unregistered.x264_build < 44U)
+        if (h->x264_build < 44U)
             den *= 2;
         av_reduce(&h->avctx->framerate.den, &h->avctx->framerate.num,
                   sps->num_units_in_tick * h->avctx->ticks_per_frame, den, 1 << 30);
@@ -939,17 +944,6 @@
         goto fail;
     }
 
-#if FF_API_CAP_VDPAU
-    if (h->avctx->codec &&
-        h->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU &&
-        (sps->bit_depth_luma != 8 || sps->chroma_format_idc > 1)) {
-        av_log(h->avctx, AV_LOG_ERROR,
-                "VDPAU decoding does not support video colorspace.\n");
-        ret = AVERROR_INVALIDDATA;
-        goto fail;
-    }
-#endif
-
     if (sps->bit_depth_luma < 8 || sps->bit_depth_luma > 14 ||
         sps->bit_depth_luma == 11 || sps->bit_depth_luma == 13
     ) {
@@ -1211,41 +1205,48 @@
     }
 
     if (h->sei.frame_packing.present &&
-        h->sei.frame_packing.frame_packing_arrangement_type <= 6 &&
+        h->sei.frame_packing.arrangement_type <= 6 &&
         h->sei.frame_packing.content_interpretation_type > 0 &&
         h->sei.frame_packing.content_interpretation_type < 3) {
         H264SEIFramePacking *fp = &h->sei.frame_packing;
         AVStereo3D *stereo = av_stereo3d_create_side_data(cur->f);
         if (stereo) {
-        switch (fp->frame_packing_arrangement_type) {
-        case 0:
+        switch (fp->arrangement_type) {
+        case H264_SEI_FPA_TYPE_CHECKERBOARD:
             stereo->type = AV_STEREO3D_CHECKERBOARD;
             break;
-        case 1:
+        case H264_SEI_FPA_TYPE_INTERLEAVE_COLUMN:
             stereo->type = AV_STEREO3D_COLUMNS;
             break;
-        case 2:
+        case H264_SEI_FPA_TYPE_INTERLEAVE_ROW:
             stereo->type = AV_STEREO3D_LINES;
             break;
-        case 3:
+        case H264_SEI_FPA_TYPE_SIDE_BY_SIDE:
             if (fp->quincunx_sampling_flag)
                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
             else
                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
             break;
-        case 4:
+        case H264_SEI_FPA_TYPE_TOP_BOTTOM:
             stereo->type = AV_STEREO3D_TOPBOTTOM;
             break;
-        case 5:
+        case H264_SEI_FPA_TYPE_INTERLEAVE_TEMPORAL:
             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
             break;
-        case 6:
+        case H264_SEI_FPA_TYPE_2D:
             stereo->type = AV_STEREO3D_2D;
             break;
         }
 
         if (fp->content_interpretation_type == 2)
             stereo->flags = AV_STEREO3D_FLAG_INVERT;
+
+        if (fp->arrangement_type == H264_SEI_FPA_TYPE_INTERLEAVE_TEMPORAL) {
+            if (fp->current_frame_is_frame0_flag)
+                stereo->view = AV_STEREO3D_VIEW_LEFT;
+            else
+                stereo->view = AV_STEREO3D_VIEW_RIGHT;
+        }
         }
     }
 
@@ -1275,15 +1276,14 @@
         }
     }
 
-    if (h->sei.a53_caption.a53_caption) {
+    if (h->sei.a53_caption.buf_ref) {
         H264SEIA53Caption *a53 = &h->sei.a53_caption;
-        AVFrameSideData *sd = av_frame_new_side_data(cur->f,
-                                                     AV_FRAME_DATA_A53_CC,
-                                                     a53->a53_caption_size);
-        if (sd)
-            memcpy(sd->data, a53->a53_caption, a53->a53_caption_size);
-        av_freep(&a53->a53_caption);
-        a53->a53_caption_size = 0;
+
+        AVFrameSideData *sd = av_frame_new_side_data_from_buf(cur->f, AV_FRAME_DATA_A53_CC, a53->buf_ref);
+        if (!sd)
+            av_buffer_unref(&a53->buf_ref);
+        a53->buf_ref = NULL;
+
         h->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
     }
 
@@ -1322,7 +1322,7 @@
     }
     out_of_order = MAX_DELAYED_PIC_COUNT - i;
     if(   cur->f->pict_type == AV_PICTURE_TYPE_B
-       || (h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > INT_MIN && h->last_pocs[MAX_DELAYED_PIC_COUNT-1] - h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > 2))
+       || (h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > INT_MIN && h->last_pocs[MAX_DELAYED_PIC_COUNT-1] - (int64_t)h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > 2))
         out_of_order = FFMAX(out_of_order, 1);
     if (out_of_order == MAX_DELAYED_PIC_COUNT) {
         av_log(h->avctx, AV_LOG_VERBOSE, "Invalid POC %d<%d\n", cur->poc, h->last_pocs[0]);
@@ -1413,6 +1413,11 @@
 
     sps = h->ps.sps;
 
+    if (sps && sps->bitstream_restriction_flag &&
+        h->avctx->has_b_frames < sps->num_reorder_frames) {
+        h->avctx->has_b_frames = sps->num_reorder_frames;
+    }
+
     last_pic_droppable   = h->droppable;
     last_pic_structure   = h->picture_structure;
     h->droppable         = (nal->ref_idc == 0);
@@ -1613,8 +1618,10 @@
             (h->mb_height * h->mb_stride - 1) * sizeof(*h->slice_table));
     }
 
-    ff_h264_init_poc(h->cur_pic_ptr->field_poc, &h->cur_pic_ptr->poc,
+    ret = ff_h264_init_poc(h->cur_pic_ptr->field_poc, &h->cur_pic_ptr->poc,
                      h->ps.sps, &h->poc, h->picture_structure, nal->ref_idc);
+    if (ret < 0)
+        return ret;
 
     memcpy(h->mmco, sl->mmco, sl->nb_mmco * sizeof(*h->mmco));
     h->nb_mmco = sl->nb_mmco;
@@ -2744,11 +2751,7 @@
 
     h->slice_ctx[0].next_slice_idx = INT_MAX;
 
-    if (h->avctx->hwaccel || context_count < 1
-#if FF_API_CAP_VDPAU
-        || h->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU
-#endif
-        )
+    if (h->avctx->hwaccel || context_count < 1)
         return 0;
 
     av_assert0(context_count && h->slice_ctx[context_count - 1].mb_y < h->mb_height);

diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index f29c3f9..7b4c5c7 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c

@@ -47,13 +47,13 @@
 #include "h264_mvpred.h"
 #include "h264_ps.h"
 #include "golomb.h"
+#include "hwaccel.h"
 #include "mathops.h"
 #include "me_cmp.h"
 #include "mpegutils.h"
 #include "profiles.h"
 #include "rectangle.h"
 #include "thread.h"
-#include "vdpau_compat.h"
 
 const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 };
 
@@ -317,7 +317,7 @@
     h->recovery_frame        = -1;
     h->frame_recovered       = 0;
     h->poc.prev_frame_num    = -1;
-    h->sei.frame_packing.frame_packing_arrangement_cancel_flag = -1;
+    h->sei.frame_packing.arrangement_cancel_flag = -1;
     h->sei.unregistered.x264_build = -1;
 
     h->next_outputed_poc = INT_MIN;
@@ -527,10 +527,6 @@
     h->context_initialized = 0;
 }
 
-#if FF_API_CAP_VDPAU
-static const uint8_t start_code[] = { 0x00, 0x00, 0x01 };
-#endif
-
 static int get_last_needed_nal(H264Context *h)
 {
     int nals_needed = 0;
@@ -613,9 +609,10 @@
 
     if (!(avctx->flags2 & AV_CODEC_FLAG2_CHUNKS)) {
         h->current_slice = 0;
-        if (!h->first_field)
+        if (!h->first_field) {
             h->cur_pic_ptr = NULL;
-        ff_h264_sei_uninit(&h->sei);
+            ff_h264_sei_uninit(&h->sei);
+        }
     }
 
     if (h->nal_length_size == 4) {
@@ -688,11 +685,6 @@
                 if (h->avctx->hwaccel &&
                     (ret = h->avctx->hwaccel->start_frame(h->avctx, buf, buf_size)) < 0)
                     goto end;
-#if FF_API_CAP_VDPAU
-                if (CONFIG_H264_VDPAU_DECODER &&
-                    h->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU)
-                    ff_vdpau_h264_picture_start(h);
-#endif
             }
 
             max_slice_ctx = avctx->hwaccel ? 1 : h->nb_slice_ctx;
@@ -701,18 +693,6 @@
                     ret = avctx->hwaccel->decode_slice(avctx, nal->raw_data, nal->raw_size);
                     h->nb_slice_ctx_queued = 0;
                 } else
-#if FF_API_CAP_VDPAU
-            if (CONFIG_H264_VDPAU_DECODER &&
-                       h->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU) {
-                ff_vdpau_add_data_chunk(h->cur_pic_ptr->f->data[0],
-                                        start_code,
-                                        sizeof(start_code));
-                ff_vdpau_add_data_chunk(h->cur_pic_ptr->f->data[0],
-                                        nal->raw_data,
-                                        nal->raw_size);
-                ret = 0;
-            } else
-#endif
                     ret = ff_h264_execute_decode_slices(h);
                 if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE))
                     goto end;
@@ -728,16 +708,19 @@
             h->has_recovery_point = h->has_recovery_point || h->sei.recovery_point.recovery_frame_cnt != -1;
             if (avctx->debug & FF_DEBUG_GREEN_MD)
                 debug_green_metadata(&h->sei.green_metadata, h->avctx);
-#if FF_API_AFD
-FF_DISABLE_DEPRECATION_WARNINGS
-            h->avctx->dtg_active_format = h->sei.afd.active_format_description;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_AFD */
             if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE))
                 goto end;
             break;
         case H264_NAL_SPS: {
             GetBitContext tmp_gb = nal->gb;
+            if (avctx->hwaccel && avctx->hwaccel->decode_params) {
+                ret = avctx->hwaccel->decode_params(avctx,
+                                                    nal->type,
+                                                    nal->raw_data,
+                                                    nal->raw_size);
+                if (ret < 0)
+                    goto end;
+            }
             if (ff_h264_decode_seq_parameter_set(&tmp_gb, avctx, &h->ps, 0) >= 0)
                 break;
             av_log(h->avctx, AV_LOG_DEBUG,
@@ -749,6 +732,14 @@
             break;
         }
         case H264_NAL_PPS:
+            if (avctx->hwaccel && avctx->hwaccel->decode_params) {
+                ret = avctx->hwaccel->decode_params(avctx,
+                                                    nal->type,
+                                                    nal->raw_data,
+                                                    nal->raw_size);
+                if (ret < 0)
+                    goto end;
+            }
             ret = ff_h264_decode_picture_parameter_set(&nal->gb, avctx, &h->ps,
                                                        nal->size_bits);
             if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE))
@@ -848,9 +839,6 @@
     AVFrame *src = srcp->f;
     int ret;
 
-    if (src->format == AV_PIX_FMT_VIDEOTOOLBOX && src->buf[0]->size == 1)
-        return AVERROR_EXTERNAL;
-
     ret = av_frame_ref(dst, src);
     if (ret < 0)
         return ret;
@@ -867,6 +855,8 @@
 {
     int cnt= buf[5]&0x1f;
     const uint8_t *p= buf+6;
+    if (!cnt)
+        return 0;
     while(cnt--){
         int nalsize= AV_RB16(p) + 2;
         if(nalsize > buf_size - (p-buf) || (p[2] & 0x9F) != 7)
@@ -996,7 +986,7 @@
                                      &h->ps, &h->is_avc, &h->nal_length_size,
                                      avctx->err_recognition, avctx);
     }
-    if(h->is_avc && buf_size >= 9 && buf[0]==1 && buf[2]==0 && (buf[4]&0xFC)==0xFC && (buf[5]&0x1F) && buf[8]==0x67){
+    if (h->is_avc && buf_size >= 9 && buf[0]==1 && buf[2]==0 && (buf[4]&0xFC)==0xFC) {
         if (is_extra(buf, buf_size))
             return ff_h264_decode_extradata(buf, buf_size,
                                             &h->ps, &h->is_avc, &h->nal_length_size,
@@ -1046,6 +1036,7 @@
     { "is_avc", "is avc", OFFSET(is_avc), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, 0 },
     { "nal_length_size", "nal_length_size", OFFSET(nal_length_size), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 4, 0 },
     { "enable_er", "Enable error resilience on damaged frames (unsafe)", OFFSET(enable_er), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VD },
+    { "x264_build", "Assume this x264 version if no x264 version found in any SEI", OFFSET(x264_build), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VD },
     { NULL },
 };
 
@@ -1068,6 +1059,30 @@
     .capabilities          = /*AV_CODEC_CAP_DRAW_HORIZ_BAND |*/ AV_CODEC_CAP_DR1 |
                              AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SLICE_THREADS |
                              AV_CODEC_CAP_FRAME_THREADS,
+    .hw_configs            = (const AVCodecHWConfigInternal*[]) {
+#if CONFIG_H264_DXVA2_HWACCEL
+                               HWACCEL_DXVA2(h264),
+#endif
+#if CONFIG_H264_D3D11VA_HWACCEL
+                               HWACCEL_D3D11VA(h264),
+#endif
+#if CONFIG_H264_D3D11VA2_HWACCEL
+                               HWACCEL_D3D11VA2(h264),
+#endif
+#if CONFIG_H264_NVDEC_HWACCEL
+                               HWACCEL_NVDEC(h264),
+#endif
+#if CONFIG_H264_VAAPI_HWACCEL
+                               HWACCEL_VAAPI(h264),
+#endif
+#if CONFIG_H264_VDPAU_HWACCEL
+                               HWACCEL_VDPAU(h264),
+#endif
+#if CONFIG_H264_VIDEOTOOLBOX_HWACCEL
+                               HWACCEL_VIDEOTOOLBOX(h264),
+#endif
+                               NULL
+                           },
     .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING,
     .flush                 = flush_dpb,
     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
@@ -1075,29 +1090,3 @@
     .profiles              = NULL_IF_CONFIG_SMALL(ff_h264_profiles),
     .priv_class            = &h264_class,
 };
-
-#if CONFIG_H264_VDPAU_DECODER && FF_API_VDPAU
-static const AVClass h264_vdpau_class = {
-    .class_name = "H264 VDPAU Decoder",
-    .item_name  = av_default_item_name,
-    .option     = h264_options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
-
-AVCodec ff_h264_vdpau_decoder = {
-    .name           = "h264_vdpau",
-    .long_name      = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_H264,
-    .priv_data_size = sizeof(H264Context),
-    .init           = h264_decode_init,
-    .close          = h264_decode_end,
-    .decode         = h264_decode_frame,
-    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HWACCEL_VDPAU,
-    .flush          = flush_dpb,
-    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_VDPAU_H264,
-                                                     AV_PIX_FMT_NONE},
-    .profiles       = NULL_IF_CONFIG_SMALL(ff_h264_profiles),
-    .priv_class     = &h264_vdpau_class,
-};
-#endif

diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index de8b7c3..1d97232 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h

@@ -365,6 +365,7 @@
     int context_initialized;
     int flags;
     int workaround_bugs;
+    int x264_build;
     /* Set when slice threading is used and at least one slice uses deblocking
      * mode 1 (i.e. across slice boundaries). Then we disable the loop filter
      * during normal MB decoding and execute it serially at the end.

diff --git a/libavcodec/h265_metadata_bsf.c b/libavcodec/h265_metadata_bsf.c
new file mode 100644
index 0000000..26eb2d0
--- /dev/null
+++ b/libavcodec/h265_metadata_bsf.c

@@ -0,0 +1,464 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "bsf.h"
+#include "cbs.h"
+#include "cbs_h265.h"
+#include "hevc.h"
+
+enum {
+    PASS,
+    INSERT,
+    REMOVE,
+};
+
+typedef struct H265MetadataContext {
+    const AVClass *class;
+
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment access_unit;
+
+    H265RawAUD aud_nal;
+
+    int aud;
+
+    AVRational sample_aspect_ratio;
+
+    int video_format;
+    int video_full_range_flag;
+    int colour_primaries;
+    int transfer_characteristics;
+    int matrix_coefficients;
+
+    int chroma_sample_loc_type;
+
+    AVRational tick_rate;
+    int poc_proportional_to_timing_flag;
+    int num_ticks_poc_diff_one;
+
+    int crop_left;
+    int crop_right;
+    int crop_top;
+    int crop_bottom;
+} H265MetadataContext;
+
+
+static int h265_metadata_update_vps(AVBSFContext *bsf,
+                                    H265RawVPS *vps)
+{
+    H265MetadataContext *ctx = bsf->priv_data;
+
+    if (ctx->tick_rate.num && ctx->tick_rate.den) {
+        int num, den;
+
+        av_reduce(&num, &den, ctx->tick_rate.num, ctx->tick_rate.den,
+                  UINT32_MAX > INT_MAX ? UINT32_MAX : INT_MAX);
+
+        vps->vps_time_scale        = num;
+        vps->vps_num_units_in_tick = den;
+
+        vps->vps_timing_info_present_flag = 1;
+
+        if (ctx->num_ticks_poc_diff_one > 0) {
+            vps->vps_num_ticks_poc_diff_one_minus1 =
+                ctx->num_ticks_poc_diff_one - 1;
+            vps->vps_poc_proportional_to_timing_flag = 1;
+        } else if (ctx->num_ticks_poc_diff_one == 0) {
+            vps->vps_poc_proportional_to_timing_flag = 0;
+        }
+    }
+
+    return 0;
+}
+
+static int h265_metadata_update_sps(AVBSFContext *bsf,
+                                    H265RawSPS *sps)
+{
+    H265MetadataContext *ctx = bsf->priv_data;
+    int need_vui = 0;
+    int crop_unit_x, crop_unit_y;
+
+    if (ctx->sample_aspect_ratio.num && ctx->sample_aspect_ratio.den) {
+        // Table E-1.
+        static const AVRational sar_idc[] = {
+            {   0,  0 }, // Unspecified (never written here).
+            {   1,  1 }, {  12, 11 }, {  10, 11 }, {  16, 11 },
+            {  40, 33 }, {  24, 11 }, {  20, 11 }, {  32, 11 },
+            {  80, 33 }, {  18, 11 }, {  15, 11 }, {  64, 33 },
+            { 160, 99 }, {   4,  3 }, {   3,  2 }, {   2,  1 },
+        };
+        int num, den, i;
+
+        av_reduce(&num, &den, ctx->sample_aspect_ratio.num,
+                  ctx->sample_aspect_ratio.den, 65535);
+
+        for (i = 1; i < FF_ARRAY_ELEMS(sar_idc); i++) {
+            if (num == sar_idc[i].num &&
+                den == sar_idc[i].den)
+                break;
+        }
+        if (i == FF_ARRAY_ELEMS(sar_idc)) {
+            sps->vui.aspect_ratio_idc = 255;
+            sps->vui.sar_width  = num;
+            sps->vui.sar_height = den;
+        } else {
+            sps->vui.aspect_ratio_idc = i;
+        }
+        sps->vui.aspect_ratio_info_present_flag = 1;
+        need_vui = 1;
+    }
+
+#define SET_OR_INFER(field, value, present_flag, infer) do { \
+        if (value >= 0) { \
+            field = value; \
+            need_vui = 1; \
+        } else if (!present_flag) \
+            field = infer; \
+    } while (0)
+
+    if (ctx->video_format             >= 0 ||
+        ctx->video_full_range_flag    >= 0 ||
+        ctx->colour_primaries         >= 0 ||
+        ctx->transfer_characteristics >= 0 ||
+        ctx->matrix_coefficients      >= 0) {
+
+        SET_OR_INFER(sps->vui.video_format, ctx->video_format,
+                     sps->vui.video_signal_type_present_flag, 5);
+
+        SET_OR_INFER(sps->vui.video_full_range_flag,
+                     ctx->video_full_range_flag,
+                     sps->vui.video_signal_type_present_flag, 0);
+
+        if (ctx->colour_primaries         >= 0 ||
+            ctx->transfer_characteristics >= 0 ||
+            ctx->matrix_coefficients      >= 0) {
+
+            SET_OR_INFER(sps->vui.colour_primaries,
+                         ctx->colour_primaries,
+                         sps->vui.colour_description_present_flag, 2);
+
+            SET_OR_INFER(sps->vui.transfer_characteristics,
+                         ctx->transfer_characteristics,
+                         sps->vui.colour_description_present_flag, 2);
+
+            SET_OR_INFER(sps->vui.matrix_coefficients,
+                         ctx->matrix_coefficients,
+                         sps->vui.colour_description_present_flag, 2);
+
+            sps->vui.colour_description_present_flag = 1;
+        }
+        sps->vui.video_signal_type_present_flag = 1;
+        need_vui = 1;
+    }
+
+    if (ctx->chroma_sample_loc_type >= 0) {
+        sps->vui.chroma_sample_loc_type_top_field =
+            ctx->chroma_sample_loc_type;
+        sps->vui.chroma_sample_loc_type_bottom_field =
+            ctx->chroma_sample_loc_type;
+        sps->vui.chroma_loc_info_present_flag = 1;
+        need_vui = 1;
+    }
+
+    if (ctx->tick_rate.num && ctx->tick_rate.den) {
+        int num, den;
+
+        av_reduce(&num, &den, ctx->tick_rate.num, ctx->tick_rate.den,
+                  UINT32_MAX > INT_MAX ? UINT32_MAX : INT_MAX);
+
+        sps->vui.vui_time_scale        = num;
+        sps->vui.vui_num_units_in_tick = den;
+
+        sps->vui.vui_timing_info_present_flag = 1;
+        need_vui = 1;
+
+        if (ctx->num_ticks_poc_diff_one > 0) {
+            sps->vui.vui_num_ticks_poc_diff_one_minus1 =
+                ctx->num_ticks_poc_diff_one - 1;
+            sps->vui.vui_poc_proportional_to_timing_flag = 1;
+        } else if (ctx->num_ticks_poc_diff_one == 0) {
+            sps->vui.vui_poc_proportional_to_timing_flag = 0;
+        }
+    }
+
+    if (sps->separate_colour_plane_flag || sps->chroma_format_idc == 0) {
+        crop_unit_x = 1;
+        crop_unit_y = 1;
+    } else {
+        crop_unit_x = 1 + (sps->chroma_format_idc < 3);
+        crop_unit_y = 1 + (sps->chroma_format_idc < 2);
+    }
+#define CROP(border, unit) do { \
+        if (ctx->crop_ ## border >= 0) { \
+            if (ctx->crop_ ## border % unit != 0) { \
+                av_log(bsf, AV_LOG_ERROR, "Invalid value for crop_%s: " \
+                       "must be a multiple of %d.\n", #border, unit); \
+                return AVERROR(EINVAL); \
+            } \
+            sps->conf_win_ ## border ## _offset = \
+                ctx->crop_ ## border / unit; \
+            sps->conformance_window_flag = 1; \
+        } \
+    } while (0)
+    CROP(left,   crop_unit_x);
+    CROP(right,  crop_unit_x);
+    CROP(top,    crop_unit_y);
+    CROP(bottom, crop_unit_y);
+#undef CROP
+
+    if (need_vui)
+        sps->vui_parameters_present_flag = 1;
+
+    return 0;
+}
+
+static int h265_metadata_filter(AVBSFContext *bsf, AVPacket *out)
+{
+    H265MetadataContext *ctx = bsf->priv_data;
+    AVPacket *in = NULL;
+    CodedBitstreamFragment *au = &ctx->access_unit;
+    int err, i;
+
+    err = ff_bsf_get_packet(bsf, &in);
+    if (err < 0)
+        return err;
+
+    err = ff_cbs_read_packet(ctx->cbc, au, in);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to read packet.\n");
+        goto fail;
+    }
+
+    if (au->nb_units == 0) {
+        av_log(bsf, AV_LOG_ERROR, "No NAL units in packet.\n");
+        err = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    // If an AUD is present, it must be the first NAL unit.
+    if (au->units[0].type == HEVC_NAL_AUD) {
+        if (ctx->aud == REMOVE)
+            ff_cbs_delete_unit(ctx->cbc, au, 0);
+    } else {
+        if (ctx->aud == INSERT) {
+            H265RawAUD *aud = &ctx->aud_nal;
+            int pic_type = 0, temporal_id = 8, layer_id = 0;
+
+            for (i = 0; i < au->nb_units; i++) {
+                const H265RawNALUnitHeader *nal = au->units[i].content;
+                if (!nal)
+                    continue;
+                if (nal->nuh_temporal_id_plus1 < temporal_id + 1)
+                    temporal_id = nal->nuh_temporal_id_plus1 - 1;
+
+                if (au->units[i].type <= HEVC_NAL_RSV_VCL31) {
+                    const H265RawSlice *slice = au->units[i].content;
+                    layer_id = nal->nuh_layer_id;
+                    if (slice->header.slice_type == HEVC_SLICE_B &&
+                        pic_type < 2)
+                        pic_type = 2;
+                    if (slice->header.slice_type == HEVC_SLICE_P &&
+                        pic_type < 1)
+                        pic_type = 1;
+                }
+            }
+
+            aud->nal_unit_header = (H265RawNALUnitHeader) {
+                .nal_unit_type         = HEVC_NAL_AUD,
+                .nuh_layer_id          = layer_id,
+                .nuh_temporal_id_plus1 = temporal_id + 1,
+            };
+            aud->pic_type = pic_type;
+
+            err = ff_cbs_insert_unit_content(ctx->cbc, au,
+                                             0, HEVC_NAL_AUD, aud, NULL);
+            if (err) {
+                av_log(bsf, AV_LOG_ERROR, "Failed to insert AUD.\n");
+                goto fail;
+            }
+        }
+    }
+
+    for (i = 0; i < au->nb_units; i++) {
+        if (au->units[i].type == HEVC_NAL_VPS) {
+            err = h265_metadata_update_vps(bsf, au->units[i].content);
+            if (err < 0)
+                goto fail;
+        }
+        if (au->units[i].type == HEVC_NAL_SPS) {
+            err = h265_metadata_update_sps(bsf, au->units[i].content);
+            if (err < 0)
+                goto fail;
+        }
+    }
+
+    err = ff_cbs_write_packet(ctx->cbc, out, au);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
+        goto fail;
+    }
+
+    err = av_packet_copy_props(out, in);
+    if (err < 0)
+        goto fail;
+
+    err = 0;
+fail:
+    ff_cbs_fragment_uninit(ctx->cbc, au);
+
+    if (err < 0)
+        av_packet_unref(out);
+    av_packet_free(&in);
+
+    return err;
+}
+
+static int h265_metadata_init(AVBSFContext *bsf)
+{
+    H265MetadataContext *ctx = bsf->priv_data;
+    CodedBitstreamFragment *au = &ctx->access_unit;
+    int err, i;
+
+    err = ff_cbs_init(&ctx->cbc, AV_CODEC_ID_HEVC, bsf);
+    if (err < 0)
+        return err;
+
+    if (bsf->par_in->extradata) {
+        err = ff_cbs_read_extradata(ctx->cbc, au, bsf->par_in);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to read extradata.\n");
+            goto fail;
+        }
+
+        for (i = 0; i < au->nb_units; i++) {
+            if (au->units[i].type == HEVC_NAL_VPS) {
+                err = h265_metadata_update_vps(bsf, au->units[i].content);
+                if (err < 0)
+                    goto fail;
+            }
+            if (au->units[i].type == HEVC_NAL_SPS) {
+                err = h265_metadata_update_sps(bsf, au->units[i].content);
+                if (err < 0)
+                    goto fail;
+            }
+        }
+
+        err = ff_cbs_write_extradata(ctx->cbc, bsf->par_out, au);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to write extradata.\n");
+            goto fail;
+        }
+    }
+
+    err = 0;
+fail:
+    ff_cbs_fragment_uninit(ctx->cbc, au);
+    return err;
+}
+
+static void h265_metadata_close(AVBSFContext *bsf)
+{
+    H265MetadataContext *ctx = bsf->priv_data;
+    ff_cbs_close(&ctx->cbc);
+}
+
+#define OFFSET(x) offsetof(H265MetadataContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_BSF_PARAM)
+static const AVOption h265_metadata_options[] = {
+    { "aud", "Access Unit Delimiter NAL units",
+        OFFSET(aud), AV_OPT_TYPE_INT,
+        { .i64 = PASS }, PASS, REMOVE, FLAGS, "aud" },
+    { "pass",   NULL, 0, AV_OPT_TYPE_CONST,
+        { .i64 = PASS   }, .flags = FLAGS, .unit = "aud" },
+    { "insert", NULL, 0, AV_OPT_TYPE_CONST,
+        { .i64 = INSERT }, .flags = FLAGS, .unit = "aud" },
+    { "remove", NULL, 0, AV_OPT_TYPE_CONST,
+        { .i64 = REMOVE }, .flags = FLAGS, .unit = "aud" },
+
+    { "sample_aspect_ratio", "Set sample aspect ratio (table E-1)",
+        OFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL,
+        { .dbl = 0.0 }, 0, 65535, FLAGS },
+
+    { "video_format", "Set video format (table E-2)",
+        OFFSET(video_format), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 7, FLAGS },
+    { "video_full_range_flag", "Set video full range flag",
+        OFFSET(video_full_range_flag), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 1, FLAGS },
+    { "colour_primaries", "Set colour primaries (table E-3)",
+        OFFSET(colour_primaries), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 255, FLAGS },
+    { "transfer_characteristics", "Set transfer characteristics (table E-4)",
+        OFFSET(transfer_characteristics), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 255, FLAGS },
+    { "matrix_coefficients", "Set matrix coefficients (table E-5)",
+        OFFSET(matrix_coefficients), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 255, FLAGS },
+
+    { "chroma_sample_loc_type", "Set chroma sample location type (figure E-1)",
+        OFFSET(chroma_sample_loc_type), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 6, FLAGS },
+
+    { "tick_rate",
+        "Set VPS and VUI tick rate (num_units_in_tick / time_scale)",
+        OFFSET(tick_rate), AV_OPT_TYPE_RATIONAL,
+        { .dbl = 0.0 }, 0, UINT_MAX, FLAGS },
+    { "num_ticks_poc_diff_one",
+        "Set VPS and VUI number of ticks per POC increment",
+        OFFSET(num_ticks_poc_diff_one), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, INT_MAX, FLAGS },
+
+    { "crop_left", "Set left border crop offset",
+        OFFSET(crop_left), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, HEVC_MAX_WIDTH, FLAGS },
+    { "crop_right", "Set right border crop offset",
+        OFFSET(crop_right), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, HEVC_MAX_WIDTH, FLAGS },
+    { "crop_top", "Set top border crop offset",
+        OFFSET(crop_top), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, HEVC_MAX_HEIGHT, FLAGS },
+    { "crop_bottom", "Set bottom border crop offset",
+        OFFSET(crop_bottom), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, HEVC_MAX_HEIGHT, FLAGS },
+
+    { NULL }
+};
+
+static const AVClass h265_metadata_class = {
+    .class_name = "h265_metadata_bsf",
+    .item_name  = av_default_item_name,
+    .option     = h265_metadata_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const enum AVCodecID h265_metadata_codec_ids[] = {
+    AV_CODEC_ID_HEVC, AV_CODEC_ID_NONE,
+};
+
+const AVBitStreamFilter ff_hevc_metadata_bsf = {
+    .name           = "hevc_metadata",
+    .priv_data_size = sizeof(H265MetadataContext),
+    .priv_class     = &h265_metadata_class,
+    .init           = &h265_metadata_init,
+    .close          = &h265_metadata_close,
+    .filter         = &h265_metadata_filter,
+    .codec_ids      = h265_metadata_codec_ids,
+};

diff --git a/libavcodec/h265_profile_level.c b/libavcodec/h265_profile_level.c
new file mode 100644
index 0000000..6604ca2
--- /dev/null
+++ b/libavcodec/h265_profile_level.c

@@ -0,0 +1,249 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h265_profile_level.h"
+
+
+static const H265LevelDescriptor h265_levels[] = {
+    // Name             CpbFactor-Main    MaxSliceSegmentsPerPicture
+    // |  level_idc            | CpbFactor-High           MaxLumaSr      BrFactor-High
+    // |      |   MaxLumaPs    |       |      | MaxTileRows   |   BrFactor-Main | MinCr-Main
+    // |      |      |         |       |      |   | MaxTileCols         |       |    |  MinCr-High
+    { "1",    30,    36864,    350,      0,  16,  1,  1,     552960,    128,      0, 2, 2 },
+    { "2",    60,   122880,   1500,      0,  16,  1,  1,    3686400,   1500,      0, 2, 2 },
+    { "2.1",  63,   245760,   3000,      0,  20,  1,  1,    7372800,   3000,      0, 2, 2 },
+    { "3",    90,   552960,   6000,      0,  30,  2,  2,   16588800,   6000,      0, 2, 2 },
+    { "3.1",  93,   983040,  10000,      0,  40,  3,  3,   33177600,  10000,      0, 2, 2 },
+    { "4",   120,  2228224,  12000,  30000,  75,  5,  5,   66846720,  12000,  30000, 4, 4 },
+    { "4.1", 123,  2228224,  20000,  50000,  75,  5,  5,  133693440,  20000,  50000, 4, 4 },
+    { "5",   150,  8912896,  25000, 100000, 200, 11, 10,  267386880,  25000, 100000, 6, 4 },
+    { "5.1", 153,  8912896,  40000, 160000, 200, 11, 10,  534773760,  40000, 160000, 8, 4 },
+    { "5.2", 156,  8912896,  60000, 240000, 200, 11, 10, 1069547520,  60000, 240000, 8, 4 },
+    { "6",   180, 35651584,  60000, 240000, 600, 22, 20, 1069547520,  60000, 240000, 8, 4 },
+    { "6.1", 183, 35651584, 120000, 480000, 600, 22, 20, 2139095040, 120000, 480000, 8, 4 },
+    { "6.2", 186, 35651584, 240000, 800000, 600, 22, 20, 4278190080, 240000, 800000, 6, 4 },
+};
+
+static const H265ProfileDescriptor h265_profiles[] = {
+    // profile_idc   8bit       one-picture
+    //   HT-profile  | 422chroma    | lower-bit-rate
+    //   |  14bit    |  | 420chroma |  | CpbVclFactor     MinCrScaleFactor
+    //   |  |  12bit |  |  | monochrome|    | CpbNalFactor    |
+    //   |  |  |  10bit |  |  | intra  |    |     | FormatCapabilityFactor
+    { "Monochrome", //  |  |  |  |  |  |    |     |     |     |
+      4, 0, 2, 1, 1, 1, 1, 1, 1, 0, 0, 1,  667,  733, 1.000, 1.0 },
+    { "Monochrome 12",
+      4, 0, 2, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1000, 1100, 1.500, 1.0 },
+    { "Monochrome 16",
+      4, 0, 2, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1333, 1467, 2.000, 1.0 },
+    { "Main",
+      1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1000, 1100, 1.500, 1.0 },
+    { "Screen-Extended Main",
+      9, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1000, 1100, 1.500, 1.0 },
+    { "Main 10",
+      2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1000, 1100, 1.875, 1.0 },
+    { "Screen-Extended Main 10",
+      9, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1000, 1100, 1.875, 1.0 },
+    { "Main 12",
+      4, 0, 2, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1500, 1650, 2.250, 1.0 },
+    { "Main Still Picture",
+      3, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1000, 1100, 1.500, 1.0 },
+    { "Main 4:2:2 10",
+      4, 0, 2, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1667, 1833, 2.500, 0.5 },
+    { "Main 4:2:2 12",
+      4, 0, 2, 1, 0, 0, 1, 0, 0, 0, 0, 1, 2000, 2200, 3.000, 0.5 },
+    { "Main 4:4:4",
+      4, 0, 2, 1, 1, 1, 0, 0, 0, 0, 0, 1, 2000, 2200, 3.000, 0.5 },
+    { "High Throughput 4:4:4",
+      5, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 2000, 2200, 3.000, 0.5 },
+    { "Screen-Extended Main 4:4:4",
+      9, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 2000, 2200, 3.000, 0.5 },
+    { "Screen-Extended High Throughput 4:4:4",
+      9, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 2000, 2200, 3.000, 0.5 },
+    { "Main 4:4:4 10",
+      4, 0, 2, 1, 1, 0, 0, 0, 0, 0, 0, 1, 2500, 2750, 3.750, 0.5 },
+    { "High Throughput 4:4:4 10",
+      5, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 2500, 2750, 3.750, 0.5 },
+    { "Screen-Extended Main 4:4:4 10",
+      9, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 2500, 2750, 3.750, 0.5 },
+    { "Screen-Extended High Throughput 4:4:4 10",
+      9, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 2500, 2750, 3.750, 0.5 },
+    { "Main 4:4:4 12",
+      4, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, 3000, 3300, 4.500, 0.5 },
+    { "High Throughput 4:4:4 14",
+      5, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3500, 3850, 5.250, 0.5 },
+    { "Screen-Extended High Throughput 4:4:4 14",
+      9, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3500, 3850, 5.250, 0.5 },
+    { "Main Intra",
+      4, 0, 2, 1, 1, 1, 1, 1, 0, 1, 0, 2, 1000, 1100, 1.500, 1.0 },
+    { "Main 10 Intra",
+      4, 0, 2, 1, 1, 0, 1, 1, 0, 1, 0, 2, 1000, 1100, 1.875, 1.0 },
+    { "Main 12 Intra",
+      4, 0, 2, 1, 0, 0, 1, 1, 0, 1, 0, 2, 1500, 1650, 2.250, 1.0 },
+    { "Main 4:2:2 10 Intra",
+      4, 0, 2, 1, 1, 0, 1, 0, 0, 1, 0, 2, 1667, 1833, 2.500, 0.5 },
+    { "Main 4:2:2 12 Intra",
+      4, 0, 2, 1, 0, 0, 1, 0, 0, 1, 0, 2, 2000, 2200, 3.000, 0.5 },
+    { "Main 4:4:4 Intra",
+      4, 0, 2, 1, 1, 1, 0, 0, 0, 1, 0, 2, 2000, 2200, 3.000, 0.5 },
+    { "Main 4:4:4 10 Intra",
+      4, 0, 2, 1, 1, 0, 0, 0, 0, 1, 0, 2, 2500, 2750, 3.750, 0.5 },
+    { "Main 4:4:4 12 Intra",
+      4, 0, 2, 1, 0, 0, 0, 0, 0, 1, 0, 2, 3000, 3300, 4.500, 0.5 },
+    { "Main 4:4:4 16 Intra",
+      4, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 2, 4000, 4400, 6.000, 0.5 },
+    { "Main 4:4:4 Still Picture",
+      4, 0, 2, 1, 1, 1, 0, 0, 0, 1, 1, 2, 2000, 2200, 3.000, 0.5 },
+    { "Main 4:4:4 16 Still Picture",
+      4, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 2, 4000, 4400, 6.000, 0.5 },
+    { "High Throughput 4:4:4 16 Intra",
+      5, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 4000, 4400, 6.000, 0.5 },
+};
+
+
+const H265LevelDescriptor *ff_h265_get_level(int level_idc)
+{
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(h265_levels); i++) {
+        if (h265_levels[i].level_idc == level_idc)
+            return &h265_levels[i];
+    }
+
+    return NULL;
+}
+
+const H265ProfileDescriptor *ff_h265_get_profile(const H265RawProfileTierLevel *ptl)
+{
+    int i;
+
+    if (ptl->general_profile_space)
+        return NULL;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(h265_profiles); i++) {
+        const H265ProfileDescriptor *profile = &h265_profiles[i];
+
+        if (ptl->general_profile_idc &&
+            ptl->general_profile_idc != profile->profile_idc)
+            continue;
+        if (!ptl->general_profile_compatibility_flag[profile->profile_idc])
+            continue;
+
+#define check_flag(name) \
+        if (profile->name < 2) { \
+            if (profile->name != ptl->general_ ## name ## _constraint_flag) \
+                continue; \
+        }
+        check_flag(max_14bit);
+        check_flag(max_12bit);
+        check_flag(max_10bit);
+        check_flag(max_8bit);
+        check_flag(max_422chroma);
+        check_flag(max_420chroma);
+        check_flag(max_monochrome);
+        check_flag(intra);
+        check_flag(one_picture_only);
+        check_flag(lower_bit_rate);
+#undef check_flag
+
+        return profile;
+    }
+
+    return NULL;
+}
+
+const H265LevelDescriptor *ff_h265_guess_level(const H265RawProfileTierLevel *ptl,
+                                               int64_t bitrate,
+                                               int width, int height,
+                                               int slice_segments,
+                                               int tile_rows, int tile_cols,
+                                               int max_dec_pic_buffering)
+{
+    const H265ProfileDescriptor *profile;
+    int pic_size, lbr_flag, hbr_factor;
+    int i;
+
+    if (ptl)
+        profile = ff_h265_get_profile(ptl);
+    else
+        profile = NULL;
+    if (!profile) {
+        // Default to using multiplication factors for Main profile.
+        profile = &h265_profiles[3];
+    }
+
+    pic_size = width * height;
+
+    if (ptl)
+        lbr_flag = ptl->general_lower_bit_rate_constraint_flag;
+    else
+        lbr_flag = profile->lower_bit_rate > 0;
+    if (profile->profile_idc == 1 || profile->profile_idc == 2) {
+        hbr_factor = 1;
+    } else if (profile->high_throughput) {
+        if (profile->intra)
+            hbr_factor = 24 - 12 * lbr_flag;
+        else
+            hbr_factor = 6;
+    } else {
+        hbr_factor = 2 - lbr_flag;
+    }
+
+    for (i = 0; i < FF_ARRAY_ELEMS(h265_levels); i++) {
+        const H265LevelDescriptor *level = &h265_levels[i];
+        int max_br, max_dpb_size;
+
+        if (pic_size > level->max_luma_ps)
+            continue;
+        if (width  * width  > 8 * level->max_luma_ps)
+            continue;
+        if (height * height > 8 * level->max_luma_ps)
+            continue;
+
+        if (slice_segments > level->max_slice_segments_per_picture)
+            continue;
+        if (tile_rows > level->max_tile_rows)
+            continue;
+        if (tile_cols > level->max_tile_cols)
+            continue;
+
+        if (ptl && ptl->general_tier_flag)
+            max_br = level->max_br_high;
+        else
+            max_br = level->max_br_main;
+        if (!max_br)
+            continue;
+        if (bitrate > (int64_t)profile->cpb_nal_factor * hbr_factor * max_br)
+            continue;
+
+        if (pic_size < (level->max_luma_ps >> 2))
+            max_dpb_size = 16;
+        else if (pic_size < (level->max_luma_ps >> 1))
+            max_dpb_size = 14;
+        else if (pic_size < (3 * level->max_luma_ps >> 2))
+            max_dpb_size = 9;
+        else
+            max_dpb_size = 7;
+        if (max_dec_pic_buffering > max_dpb_size)
+            continue;
+
+        return level;
+    }
+
+    return NULL;
+}

diff --git a/libavcodec/h265_profile_level.h b/libavcodec/h265_profile_level.h
new file mode 100644
index 0000000..12c00f0
--- /dev/null
+++ b/libavcodec/h265_profile_level.h

@@ -0,0 +1,89 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_H265_PROFILE_LEVEL_H
+#define AVCODEC_H265_PROFILE_LEVEL_H
+
+#include <stdint.h>
+
+#include "cbs_h265.h"
+
+
+typedef struct H265LevelDescriptor {
+    const char *name;
+    uint8_t     level_idc;
+
+    // Table A.6.
+    uint32_t    max_luma_ps;
+    uint32_t    max_cpb_main;
+    uint32_t    max_cpb_high;
+    uint16_t    max_slice_segments_per_picture;
+    uint8_t     max_tile_rows;
+    uint8_t     max_tile_cols;
+
+    // Table A.7.
+    uint32_t    max_luma_sr;
+    uint32_t    max_br_main;
+    uint32_t    max_br_high;
+    uint8_t     min_cr_base_main;
+    uint8_t     min_cr_base_high;
+} H265LevelDescriptor;
+
+typedef struct H265ProfileDescriptor {
+    const char *name;
+    uint8_t profile_idc;
+    uint8_t high_throughput;
+
+    // Tables A.2, A.3 and A.5.
+    uint8_t max_14bit;
+    uint8_t max_12bit;
+    uint8_t max_10bit;
+    uint8_t max_8bit;
+    uint8_t max_422chroma;
+    uint8_t max_420chroma;
+    uint8_t max_monochrome;
+    uint8_t intra;
+    uint8_t one_picture_only;
+    uint8_t lower_bit_rate;
+
+    // Table A.8.
+    uint16_t cpb_vcl_factor;
+    uint16_t cpb_nal_factor;
+    float format_capability_factor;
+    float min_cr_scale_factor;
+} H265ProfileDescriptor;
+
+
+const H265LevelDescriptor *ff_h265_get_level(int level_idc);
+
+const H265ProfileDescriptor *ff_h265_get_profile(const H265RawProfileTierLevel *ptl);
+
+
+/**
+ * Guess the level of a stream from some parameters.
+ *
+ * Unknown parameters may be zero, in which case they are ignored.
+ */
+const H265LevelDescriptor *ff_h265_guess_level(const H265RawProfileTierLevel *ptl,
+                                               int64_t bitrate,
+                                               int width, int height,
+                                               int slice_segments,
+                                               int tile_rows, int tile_cols,
+                                               int max_dec_pic_buffering);
+
+#endif /* AVCODEC_H265_PROFILE_LEVEL_H */

diff --git a/libavcodec/hap.c b/libavcodec/hap.c
index 5b3af5e..1a330c9 100644
--- a/libavcodec/hap.c
+++ b/libavcodec/hap.c

@@ -53,3 +53,25 @@
     av_freep(&ctx->chunks);
     av_freep(&ctx->chunk_results);
 }
+
+int ff_hap_parse_section_header(GetByteContext *gbc, int *section_size,
+                                enum HapSectionType *section_type)
+{
+    if (bytestream2_get_bytes_left(gbc) < 4)
+        return AVERROR_INVALIDDATA;
+
+    *section_size = bytestream2_get_le24(gbc);
+    *section_type = bytestream2_get_byte(gbc);
+
+    if (*section_size == 0) {
+        if (bytestream2_get_bytes_left(gbc) < 4)
+            return AVERROR_INVALIDDATA;
+
+        *section_size = bytestream2_get_le32(gbc);
+    }
+
+    if (*section_size > bytestream2_get_bytes_left(gbc) || *section_size < 0)
+        return AVERROR_INVALIDDATA;
+    else
+        return 0;
+}

diff --git a/libavcodec/hap.h b/libavcodec/hap.h
index 0ee6533..bbeed11 100644
--- a/libavcodec/hap.h
+++ b/libavcodec/hap.h

@@ -73,6 +73,7 @@
     int *chunk_results;      /* Results from threaded operations */
 
     int tex_rat;             /* Compression ratio */
+    int tex_rat2;             /* Compression ratio of the second texture */
     const uint8_t *tex_data; /* Compressed texture */
     uint8_t *tex_buf;        /* Buffer for compressed texture */
     size_t tex_size;         /* Size of the compressed texture */
@@ -81,8 +82,13 @@
 
     int slice_count;         /* Number of slices for threaded operations */
 
+    int texture_count;      /* 2 for HAQA, 1 for other version */
+    int texture_section_size; /* size of the part of the texture section (for HAPQA) */
+    int uncompress_pix_size; /* nb of byte / pixel for the target picture */
+
     /* Pointer to the selected compress or decompress function */
     int (*tex_fun)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*tex_fun2)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
 } HapContext;
 
 /*
@@ -97,4 +103,10 @@
  */
 av_cold void ff_hap_free_context(HapContext *ctx);
 
+/* The first three bytes are the size of the section past the header, or zero
+ * if the length is stored in the next long word. The fourth byte in the first
+ * long word indicates the type of the current section. */
+int ff_hap_parse_section_header(GetByteContext *gbc, int *section_size,
+                                enum HapSectionType *section_type);
+
 #endif /* AVCODEC_HAP_H */

diff --git a/libavcodec/hapdec.c b/libavcodec/hapdec.c
index fc9dff1..8c84577 100644
--- a/libavcodec/hapdec.c
+++ b/libavcodec/hapdec.c

@@ -3,6 +3,8 @@
  * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
  * Copyright (C) 2015 Tom Butterworth <bangnoise@gmail.com>
  *
+ * HapQA and HAPAlphaOnly added by Jokyo Images
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -41,31 +43,6 @@
 #include "texturedsp.h"
 #include "thread.h"
 
-/* The first three bytes are the size of the section past the header, or zero
- * if the length is stored in the next long word. The fourth byte in the first
- * long word indicates the type of the current section. */
-static int parse_section_header(GetByteContext *gbc, int *section_size,
-                                enum HapSectionType *section_type)
-{
-    if (bytestream2_get_bytes_left(gbc) < 4)
-        return AVERROR_INVALIDDATA;
-
-    *section_size = bytestream2_get_le24(gbc);
-    *section_type = bytestream2_get_byte(gbc);
-
-    if (*section_size == 0) {
-        if (bytestream2_get_bytes_left(gbc) < 4)
-            return AVERROR_INVALIDDATA;
-
-        *section_size = bytestream2_get_le32(gbc);
-    }
-
-    if (*section_size > bytestream2_get_bytes_left(gbc) || *section_size < 0)
-        return AVERROR_INVALIDDATA;
-    else
-        return 0;
-}
-
 static int hap_parse_decode_instructions(HapContext *ctx, int size)
 {
     GetByteContext *gbc = &ctx->gbc;
@@ -76,7 +53,7 @@
 
     while (size > 0) {
         int stream_remaining = bytestream2_get_bytes_left(gbc);
-        ret = parse_section_header(gbc, &section_size, &section_type);
+        ret = ff_hap_parse_section_header(gbc, &section_size, &section_type);
         if (ret != 0)
             return ret;
 
@@ -157,14 +134,16 @@
     const char *compressorstr;
     int i, ret;
 
-    ret = parse_section_header(gbc, &section_size, &section_type);
+    ret = ff_hap_parse_section_header(gbc, &ctx->texture_section_size, &section_type);
     if (ret != 0)
         return ret;
 
     if ((avctx->codec_tag == MKTAG('H','a','p','1') && (section_type & 0x0F) != HAP_FMT_RGBDXT1) ||
         (avctx->codec_tag == MKTAG('H','a','p','5') && (section_type & 0x0F) != HAP_FMT_RGBADXT5) ||
         (avctx->codec_tag == MKTAG('H','a','p','Y') && (section_type & 0x0F) != HAP_FMT_YCOCGDXT5) ||
-        (avctx->codec_tag == MKTAG('H','a','p','A') && (section_type & 0x0F) != HAP_FMT_RGTC1)) {
+        (avctx->codec_tag == MKTAG('H','a','p','A') && (section_type & 0x0F) != HAP_FMT_RGTC1) ||
+        ((avctx->codec_tag == MKTAG('H','a','p','M') && (section_type & 0x0F) != HAP_FMT_RGTC1) &&
+                                                        (section_type & 0x0F) != HAP_FMT_YCOCGDXT5)) {
         av_log(avctx, AV_LOG_ERROR,
                "Invalid texture format %#04x.\n", section_type & 0x0F);
         return AVERROR_INVALIDDATA;
@@ -177,7 +156,7 @@
             if (ret == 0) {
                 ctx->chunks[0].compressor = section_type & 0xF0;
                 ctx->chunks[0].compressed_offset = 0;
-                ctx->chunks[0].compressed_size = section_size;
+                ctx->chunks[0].compressed_size = ctx->texture_section_size;
             }
             if (ctx->chunks[0].compressor == HAP_COMP_NONE) {
                 compressorstr = "none";
@@ -186,7 +165,7 @@
             }
             break;
         case HAP_COMP_COMPLEX:
-            ret = parse_section_header(gbc, &section_size, &section_type);
+            ret = ff_hap_parse_section_header(gbc, &section_size, &section_type);
             if (ret == 0 && section_type != HAP_ST_DECODE_INSTRUCTIONS)
                 ret = AVERROR_INVALIDDATA;
             if (ret == 0)
@@ -266,8 +245,8 @@
     return 0;
 }
 
-static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
-                                     int slice, int thread_nb)
+static int decompress_texture_thread_internal(AVCodecContext *avctx, void *arg,
+                                              int slice, int thread_nb, int texture_num)
 {
     HapContext *ctx = avctx->priv_data;
     AVFrame *frame = arg;
@@ -295,70 +274,118 @@
         uint8_t *p = frame->data[0] + y * frame->linesize[0] * TEXTURE_BLOCK_H;
         int off  = y * w_block;
         for (x = 0; x < w_block; x++) {
-            ctx->tex_fun(p + x * 16, frame->linesize[0],
-                         d + (off + x) * ctx->tex_rat);
+            if (texture_num == 0) {
+                ctx->tex_fun(p + x * 4 * ctx->uncompress_pix_size, frame->linesize[0],
+                             d + (off + x) * ctx->tex_rat);
+            } else {
+                ctx->tex_fun2(p + x * 4 * ctx->uncompress_pix_size, frame->linesize[0],
+                              d + (off + x) * ctx->tex_rat2);
+            }
         }
     }
 
     return 0;
 }
 
+static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
+                                     int slice, int thread_nb)
+{
+    return decompress_texture_thread_internal(avctx, arg, slice, thread_nb, 0);
+}
+
+static int decompress_texture2_thread(AVCodecContext *avctx, void *arg,
+                                      int slice, int thread_nb)
+{
+    return decompress_texture_thread_internal(avctx, arg, slice, thread_nb, 1);
+}
+
 static int hap_decode(AVCodecContext *avctx, void *data,
                       int *got_frame, AVPacket *avpkt)
 {
     HapContext *ctx = avctx->priv_data;
     ThreadFrame tframe;
-    int ret, i;
+    int ret, i, t;
     int tex_size;
+    int section_size;
+    enum HapSectionType section_type;
+    int start_texture_section = 0;
+    int tex_rat[2] = {0, 0};
 
     bytestream2_init(&ctx->gbc, avpkt->data, avpkt->size);
 
-    /* Check for section header */
-    ret = hap_parse_frame_header(avctx);
-    if (ret < 0)
-        return ret;
+    tex_rat[0] = ctx->tex_rat;
+
+    /* check for multi texture header */
+    if (ctx->texture_count == 2) {
+        ret = ff_hap_parse_section_header(&ctx->gbc, &section_size, &section_type);
+        if (ret != 0)
+            return ret;
+        if ((section_type & 0x0F) != 0x0D) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid section type in 2 textures mode %#04x.\n", section_type);
+            return AVERROR_INVALIDDATA;
+        }
+        start_texture_section = 4;
+        tex_rat[1] = ctx->tex_rat2;
+    }
 
     /* Get the output frame ready to receive data */
     tframe.f = data;
     ret = ff_thread_get_buffer(avctx, &tframe, 0);
     if (ret < 0)
         return ret;
-    if (avctx->codec->update_thread_context)
-        ff_thread_finish_setup(avctx);
 
-    /* Unpack the DXT texture */
-    if (hap_can_use_tex_in_place(ctx)) {
-        /* Only DXTC texture compression in a contiguous block */
-        ctx->tex_data = ctx->gbc.buffer;
-        tex_size = bytestream2_get_bytes_left(&ctx->gbc);
-    } else {
-        /* Perform the second-stage decompression */
-        ret = av_reallocp(&ctx->tex_buf, ctx->tex_size);
+    for (t = 0; t < ctx->texture_count; t++) {
+        bytestream2_seek(&ctx->gbc, start_texture_section, SEEK_SET);
+
+        /* Check for section header */
+        ret = hap_parse_frame_header(avctx);
         if (ret < 0)
             return ret;
 
-        avctx->execute2(avctx, decompress_chunks_thread, NULL,
-                        ctx->chunk_results, ctx->chunk_count);
+        start_texture_section += ctx->texture_section_size + 4;
 
-        for (i = 0; i < ctx->chunk_count; i++) {
-            if (ctx->chunk_results[i] < 0)
-                return ctx->chunk_results[i];
+        if (avctx->codec->update_thread_context)
+            ff_thread_finish_setup(avctx);
+
+        /* Unpack the DXT texture */
+        if (hap_can_use_tex_in_place(ctx)) {
+            /* Only DXTC texture compression in a contiguous block */
+            ctx->tex_data = ctx->gbc.buffer;
+            tex_size = FFMIN(ctx->texture_section_size, bytestream2_get_bytes_left(&ctx->gbc));
+        } else {
+            /* Perform the second-stage decompression */
+            ret = av_reallocp(&ctx->tex_buf, ctx->tex_size);
+            if (ret < 0)
+                return ret;
+
+            avctx->execute2(avctx, decompress_chunks_thread, NULL,
+                            ctx->chunk_results, ctx->chunk_count);
+
+            for (i = 0; i < ctx->chunk_count; i++) {
+                if (ctx->chunk_results[i] < 0)
+                    return ctx->chunk_results[i];
+            }
+
+            ctx->tex_data = ctx->tex_buf;
+            tex_size = ctx->tex_size;
         }
 
-        ctx->tex_data = ctx->tex_buf;
-        tex_size = ctx->tex_size;
-    }
+        if (tex_size < (avctx->coded_width  / TEXTURE_BLOCK_W)
+            *(avctx->coded_height / TEXTURE_BLOCK_H)
+            *tex_rat[t]) {
+            av_log(avctx, AV_LOG_ERROR, "Insufficient data\n");
+            return AVERROR_INVALIDDATA;
+        }
 
-    if (tex_size < (avctx->coded_width  / TEXTURE_BLOCK_W)
-                  *(avctx->coded_height / TEXTURE_BLOCK_H)
-                  *ctx->tex_rat) {
-        av_log(avctx, AV_LOG_ERROR, "Insufficient data\n");
-        return AVERROR_INVALIDDATA;
+        /* Use the decompress function on the texture, one block per thread */
+        if (t == 0){
+            avctx->execute2(avctx, decompress_texture_thread, tframe.f, NULL, ctx->slice_count);
+        } else{
+            tframe.f = data;
+            avctx->execute2(avctx, decompress_texture2_thread, tframe.f, NULL, ctx->slice_count);
+        }
     }
 
-    /* Use the decompress function on the texture, one block per thread */
-    avctx->execute2(avctx, decompress_texture_thread, tframe.f, NULL, ctx->slice_count);
-
     /* Frame is ready to be output */
     tframe.f->pict_type = AV_PICTURE_TYPE_I;
     tframe.f->key_frame = 1;
@@ -385,6 +412,9 @@
 
     ff_texturedsp_init(&ctx->dxtc);
 
+    ctx->texture_count  = 1;
+    ctx->uncompress_pix_size = 4;
+
     switch (avctx->codec_tag) {
     case MKTAG('H','a','p','1'):
         texture_name = "DXT1";
@@ -407,12 +437,19 @@
     case MKTAG('H','a','p','A'):
         texture_name = "RGTC1";
         ctx->tex_rat = 8;
-        ctx->tex_fun = ctx->dxtc.rgtc1u_block;
-        avctx->pix_fmt = AV_PIX_FMT_RGB0;
+        ctx->tex_fun = ctx->dxtc.rgtc1u_gray_block;
+        avctx->pix_fmt = AV_PIX_FMT_GRAY8;
+        ctx->uncompress_pix_size = 1;
         break;
     case MKTAG('H','a','p','M'):
-        avpriv_report_missing_feature(avctx, "HapQAlpha");
-        return AVERROR_PATCHWELCOME;
+        texture_name  = "DXT5-YCoCg-scaled / RGTC1";
+        ctx->tex_rat  = 16;
+        ctx->tex_rat2 = 8;
+        ctx->tex_fun  = ctx->dxtc.dxt5ys_block;
+        ctx->tex_fun2 = ctx->dxtc.rgtc1u_alpha_block;
+        avctx->pix_fmt = AV_PIX_FMT_RGBA;
+        ctx->texture_count = 2;
+        break;
     default:
         return AVERROR_DECODER_NOT_FOUND;
     }

diff --git a/libavcodec/hapqa_extract_bsf.c b/libavcodec/hapqa_extract_bsf.c
new file mode 100644
index 0000000..5c221848
--- /dev/null
+++ b/libavcodec/hapqa_extract_bsf.c

@@ -0,0 +1,134 @@
+/*
+ * HAPQA extract bitstream filter
+ * Copyright (c) 2017 Jokyo Images
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * HAPQA extract bitstream filter
+ * extract one of the two textures of the HAQA
+ */
+
+#include "avcodec.h"
+#include "bsf.h"
+#include "bytestream.h"
+#include "hap.h"
+
+typedef struct HapqaExtractContext {
+    const AVClass *class;
+    int texture;/* index of the texture to keep (0 for rgb or 1 for alpha) */
+} HapqaExtractContext;
+
+static int check_texture(HapqaExtractContext *ctx, int section_type) {
+    if (((ctx->texture == 0)&&((section_type & 0x0F) == 0x0F)) || /* HapQ texture and rgb extract */
+        ((ctx->texture == 1)&&((section_type & 0x0F) == 0x01))) /* HapAlphaOnly texture and alpha extract */
+    {
+        return 1; /* the texture is the one to keep */
+    } else {
+        return 0;
+    }
+}
+
+static int hapqa_extract(AVBSFContext *bsf, AVPacket *pkt)
+{
+    HapqaExtractContext *ctx = bsf->priv_data;
+    GetByteContext gbc;
+    int section_size;
+    enum HapSectionType section_type;
+    int start_section_size;
+    int target_packet_size = 0;
+    int ret = 0;
+
+    ret = ff_bsf_get_packet_ref(bsf, pkt);
+    if (ret < 0)
+        return ret;
+
+    bytestream2_init(&gbc, pkt->data, pkt->size);
+    ret = ff_hap_parse_section_header(&gbc, &section_size, &section_type);
+    if (ret != 0)
+        goto fail;
+
+    if ((section_type & 0x0F) != 0x0D) {
+        av_log(bsf, AV_LOG_ERROR, "Invalid section type for HAPQA %#04x.\n", section_type & 0x0F);
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    start_section_size = 4;
+
+    bytestream2_seek(&gbc, start_section_size, SEEK_SET);/* go to start of the first texture */
+
+    ret = ff_hap_parse_section_header(&gbc, &section_size, &section_type);
+    if (ret != 0)
+        goto fail;
+
+    target_packet_size = section_size + 4;
+
+    if (check_texture(ctx, section_type) == 0) { /* the texture is not the one to keep */
+        start_section_size += 4 + section_size;
+        bytestream2_seek(&gbc, start_section_size, SEEK_SET);/* go to start of the second texture */
+        ret = ff_hap_parse_section_header(&gbc, &section_size, &section_type);
+        if (ret != 0)
+            goto fail;
+
+        target_packet_size = section_size + 4;
+
+        if (check_texture(ctx, section_type) == 0){ /* the second texture is not the one to keep */
+            av_log(bsf, AV_LOG_ERROR, "No valid texture found.\n");
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+    }
+
+    pkt->data += start_section_size;
+    pkt->size = target_packet_size;
+
+fail:
+    if (ret < 0)
+        av_packet_unref(pkt);
+    return ret;
+}
+
+static const enum AVCodecID codec_ids[] = {
+    AV_CODEC_ID_HAP, AV_CODEC_ID_NONE,
+};
+
+#define OFFSET(x) offsetof(HapqaExtractContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_BSF_PARAM)
+static const AVOption options[] = {
+    { "texture", "texture to keep", OFFSET(texture), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, "texture" },
+        { "color", "keep HapQ texture",         0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "texture" },
+        { "alpha", "keep HapAlphaOnly texture", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "texture" },
+    { NULL },
+};
+
+static const AVClass hapqa_extract_class = {
+    .class_name = "hapqa_extract_bsf",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+const AVBitStreamFilter ff_hapqa_extract_bsf = {
+    .name       = "hapqa_extract",
+    .filter     = hapqa_extract,
+    .priv_data_size = sizeof(HapqaExtractContext),
+    .priv_class = &hapqa_extract_class,
+    .codec_ids  = codec_ids,
+};

diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h
index f0fb919..670168e 100644
--- a/libavcodec/hevc.h
+++ b/libavcodec/hevc.h

@@ -22,7 +22,8 @@
 #define AVCODEC_HEVC_H
 
 /**
- * Table 7-3: NAL unit type codes
+ * Table 7-1 – NAL unit type codes and NAL unit type classes in
+ * T-REC-H.265-201802
  */
 enum HEVCNALUnitType {
     HEVC_NAL_TRAIL_N    = 0,
@@ -66,6 +67,29 @@
     HEVC_NAL_FD_NUT     = 38,
     HEVC_NAL_SEI_PREFIX = 39,
     HEVC_NAL_SEI_SUFFIX = 40,
+    HEVC_NAL_RSV_NVCL41 = 41,
+    HEVC_NAL_RSV_NVCL42 = 42,
+    HEVC_NAL_RSV_NVCL43 = 43,
+    HEVC_NAL_RSV_NVCL44 = 44,
+    HEVC_NAL_RSV_NVCL45 = 45,
+    HEVC_NAL_RSV_NVCL46 = 46,
+    HEVC_NAL_RSV_NVCL47 = 47,
+    HEVC_NAL_UNSPEC48   = 48,
+    HEVC_NAL_UNSPEC49   = 49,
+    HEVC_NAL_UNSPEC50   = 50,
+    HEVC_NAL_UNSPEC51   = 51,
+    HEVC_NAL_UNSPEC52   = 52,
+    HEVC_NAL_UNSPEC53   = 53,
+    HEVC_NAL_UNSPEC54   = 54,
+    HEVC_NAL_UNSPEC55   = 55,
+    HEVC_NAL_UNSPEC56   = 56,
+    HEVC_NAL_UNSPEC57   = 57,
+    HEVC_NAL_UNSPEC58   = 58,
+    HEVC_NAL_UNSPEC59   = 59,
+    HEVC_NAL_UNSPEC60   = 60,
+    HEVC_NAL_UNSPEC61   = 61,
+    HEVC_NAL_UNSPEC62   = 62,
+    HEVC_NAL_UNSPEC63   = 63,
 };
 
 enum HEVCSliceType {
@@ -74,19 +98,60 @@
     HEVC_SLICE_I = 2,
 };
 
-/**
- * 7.4.2.1
- */
-#define HEVC_MAX_SUB_LAYERS 7
-#define HEVC_MAX_VPS_COUNT 16
-#define HEVC_MAX_SPS_COUNT 32
-#define HEVC_MAX_PPS_COUNT 256
-#define HEVC_MAX_SHORT_TERM_RPS_COUNT 64
-#define HEVC_MAX_CU_SIZE 128
+enum {
+    // 7.4.3.1: vps_max_layers_minus1 is in [0, 62].
+    HEVC_MAX_LAYERS     = 63,
+    // 7.4.3.1: vps_max_sub_layers_minus1 is in [0, 6].
+    HEVC_MAX_SUB_LAYERS = 7,
+    // 7.4.3.1: vps_num_layer_sets_minus1 is in [0, 1023].
+    HEVC_MAX_LAYER_SETS = 1024,
 
-#define HEVC_MAX_REFS 16
-#define HEVC_MAX_DPB_SIZE 16 // A.4.1
+    // 7.4.2.1: vps_video_parameter_set_id is u(4).
+    HEVC_MAX_VPS_COUNT = 16,
+    // 7.4.3.2.1: sps_seq_parameter_set_id is in [0, 15].
+    HEVC_MAX_SPS_COUNT = 16,
+    // 7.4.3.3.1: pps_pic_parameter_set_id is in [0, 63].
+    HEVC_MAX_PPS_COUNT = 64,
 
-#define HEVC_MAX_LOG2_CTB_SIZE 6
+    // A.4.2: MaxDpbSize is bounded above by 16.
+    HEVC_MAX_DPB_SIZE = 16,
+    // 7.4.3.1: vps_max_dec_pic_buffering_minus1[i] is in [0, MaxDpbSize - 1].
+    HEVC_MAX_REFS     = HEVC_MAX_DPB_SIZE,
+
+    // 7.4.3.2.1: num_short_term_ref_pic_sets is in [0, 64].
+    HEVC_MAX_SHORT_TERM_REF_PIC_SETS = 64,
+    // 7.4.3.2.1: num_long_term_ref_pics_sps is in [0, 32].
+    HEVC_MAX_LONG_TERM_REF_PICS      = 32,
+
+    // A.3: all profiles require that CtbLog2SizeY is in [4, 6].
+    HEVC_MIN_LOG2_CTB_SIZE = 4,
+    HEVC_MAX_LOG2_CTB_SIZE = 6,
+
+    // E.3.2: cpb_cnt_minus1[i] is in [0, 31].
+    HEVC_MAX_CPB_CNT = 32,
+
+    // A.4.1: in table A.6 the highest level allows a MaxLumaPs of 35 651 584.
+    HEVC_MAX_LUMA_PS = 35651584,
+    // A.4.1: pic_width_in_luma_samples and pic_height_in_luma_samples are
+    // constrained to be not greater than sqrt(MaxLumaPs * 8).  Hence height/
+    // width are bounded above by sqrt(8 * 35651584) = 16888.2 samples.
+    HEVC_MAX_WIDTH  = 16888,
+    HEVC_MAX_HEIGHT = 16888,
+
+    // A.4.1: table A.6 allows at most 22 tile rows for any level.
+    HEVC_MAX_TILE_ROWS    = 22,
+    // A.4.1: table A.6 allows at most 20 tile columns for any level.
+    HEVC_MAX_TILE_COLUMNS = 20,
+
+    // 7.4.7.1: in the worst case (tiles_enabled_flag and
+    // entropy_coding_sync_enabled_flag are both set), entry points can be
+    // placed at the beginning of every Ctb row in every tile, giving an
+    // upper bound of (num_tile_columns_minus1 + 1) * PicHeightInCtbsY - 1.
+    // Only a stream with very high resolution and perverse parameters could
+    // get near that, though, so set a lower limit here with the maximum
+    // possible value for 4K video (at most 135 16x16 Ctb rows).
+    HEVC_MAX_ENTRY_POINT_OFFSETS = HEVC_MAX_TILE_COLUMNS * 135,
+};
+
 
 #endif /* AVCODEC_HEVC_H */

diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c
index b53f4cc..6b98240 100644
--- a/libavcodec/hevc_filter.c
+++ b/libavcodec/hevc_filter.c

@@ -842,9 +842,20 @@
 void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size)
 {
     int x_end = x >= s->ps.sps->width  - ctb_size;
-    if (s->avctx->skip_loop_filter < AVDISCARD_ALL)
+    int skip = 0;
+    if (s->avctx->skip_loop_filter >= AVDISCARD_ALL ||
+        (s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && !IS_IDR(s)) ||
+        (s->avctx->skip_loop_filter >= AVDISCARD_NONINTRA &&
+         s->sh.slice_type != HEVC_SLICE_I) ||
+        (s->avctx->skip_loop_filter >= AVDISCARD_BIDIR &&
+         s->sh.slice_type == HEVC_SLICE_B) ||
+        (s->avctx->skip_loop_filter >= AVDISCARD_NONREF &&
+        ff_hevc_nal_is_nonref(s->nal_unit_type)))
+        skip = 1;
+
+    if (!skip)
         deblocking_filter_CTB(s, x, y);
-    if (s->ps.sps->sao_enabled) {
+    if (s->ps.sps->sao_enabled && !skip) {
         int y_end = y >= s->ps.sps->height - ctb_size;
         if (y && x)
             sao_filter_CTB(s, x - ctb_size, y - ctb_size);

diff --git a/libavcodec/hevc_mvs.c b/libavcodec/hevc_mvs.c
index a8f7876..fd0dbd9 100644
--- a/libavcodec/hevc_mvs.c
+++ b/libavcodec/hevc_mvs.c

@@ -482,7 +482,7 @@
 {
     int singleMCLFlag = 0;
     int nCS = 1 << log2_cb_size;
-    LOCAL_ALIGNED(4, MvField, mergecand_list, [MRG_MAX_NUM_CANDS]);
+    MvField mergecand_list[MRG_MAX_NUM_CANDS];
     int nPbW2 = nPbW;
     int nPbH2 = nPbH;
     HEVCLocalContext *lc = s->HEVClc;

diff --git a/libavcodec/hevc_parse.c b/libavcodec/hevc_parse.c
index 1122a60..b1b27ee 100644
--- a/libavcodec/hevc_parse.c
+++ b/libavcodec/hevc_parse.c

@@ -22,7 +22,7 @@
 #include "hevc_parse.h"
 
 static int hevc_decode_nal_units(const uint8_t *buf, int buf_size, HEVCParamSets *ps,
-                                 HEVCSEIContext *sei, int is_nalff, int nal_length_size,
+                                 HEVCSEI *sei, int is_nalff, int nal_length_size,
                                  int err_recognition, int apply_defdispwin, void *logctx)
 {
     int i;
@@ -75,7 +75,7 @@
 }
 
 int ff_hevc_decode_extradata(const uint8_t *data, int size, HEVCParamSets *ps,
-                             HEVCSEIContext *sei, int *is_nalff, int *nal_length_size,
+                             HEVCSEI *sei, int *is_nalff, int *nal_length_size,
                              int err_recognition, int apply_defdispwin, void *logctx)
 {
     int ret = 0;

diff --git a/libavcodec/hevc_parse.h b/libavcodec/hevc_parse.h
index 02e1d5b..4ab96ab 100644
--- a/libavcodec/hevc_parse.h
+++ b/libavcodec/hevc_parse.h

@@ -30,7 +30,7 @@
 #include "hevc_sei.h"
 
 int ff_hevc_decode_extradata(const uint8_t *data, int size, HEVCParamSets *ps,
-                             HEVCSEIContext *sei, int *is_nalff, int *nal_length_size,
+                             HEVCSEI *sei, int *is_nalff, int *nal_length_size,
                              int err_recognition, int apply_defdispwin, void *logctx);
 
 #endif /* AVCODEC_HEVC_PARSE_H */

diff --git a/libavcodec/hevc_parser.c b/libavcodec/hevc_parser.c
index 3bef236..369d133 100644
--- a/libavcodec/hevc_parser.c
+++ b/libavcodec/hevc_parser.c

@@ -24,6 +24,7 @@
 
 #include "golomb.h"
 #include "hevc.h"
+#include "hevc_parse.h"
 #include "hevc_ps.h"
 #include "hevc_sei.h"
 #include "h2645_parse.h"
@@ -40,9 +41,11 @@
 
     H2645Packet pkt;
     HEVCParamSets ps;
-    HEVCSEIContext sei;
+    HEVCSEI sei;
     SliceHeader sh;
 
+    int is_avc;
+    int nal_length_size;
     int parsed_extradata;
 
     int poc;
@@ -54,7 +57,7 @@
 {
     HEVCParserContext *ctx = s->priv_data;
     HEVCParamSets *ps = &ctx->ps;
-    HEVCSEIContext *sei = &ctx->sei;
+    HEVCSEI *sei = &ctx->sei;
     SliceHeader *sh = &ctx->sh;
     GetBitContext *gb = &nal->gb;
     const HEVCWindow *ow;
@@ -180,8 +183,7 @@
 {
     HEVCParserContext *ctx = s->priv_data;
     HEVCParamSets *ps = &ctx->ps;
-    HEVCSEIContext *sei = &ctx->sei;
-    int is_global = buf == avctx->extradata;
+    HEVCSEI *sei = &ctx->sei;
     int ret, i;
 
     /* set some sane default values */
@@ -191,8 +193,8 @@
 
     ff_hevc_reset_sei(sei);
 
-    ret = ff_h2645_packet_split(&ctx->pkt, buf, buf_size, avctx, 0, 0,
-                                AV_CODEC_ID_HEVC, 1);
+    ret = ff_h2645_packet_split(&ctx->pkt, buf, buf_size, avctx, ctx->is_avc,
+                                ctx->nal_length_size, AV_CODEC_ID_HEVC, 1);
     if (ret < 0)
         return ret;
 
@@ -230,12 +232,6 @@
         case HEVC_NAL_RADL_R:
         case HEVC_NAL_RASL_N:
         case HEVC_NAL_RASL_R:
-
-            if (is_global) {
-                av_log(avctx, AV_LOG_ERROR, "Invalid NAL unit: %d\n", nal->type);
-                return AVERROR_INVALIDDATA;
-            }
-
             ret = hevc_parse_slice_header(s, nal, avctx);
             if (ret)
                 return ret;
@@ -243,8 +239,7 @@
         }
     }
     /* didn't find a picture! */
-    if (!is_global)
-        av_log(avctx, AV_LOG_ERROR, "missing picture in access unit\n");
+    av_log(avctx, AV_LOG_ERROR, "missing picture in access unit\n");
     return -1;
 }
 
@@ -299,9 +294,13 @@
     int next;
     HEVCParserContext *ctx = s->priv_data;
     ParseContext *pc = &ctx->pc;
+    int is_dummy_buf = !buf_size;
+    const uint8_t *dummy_buf = buf;
 
     if (avctx->extradata && !ctx->parsed_extradata) {
-        parse_nal_units(s, avctx->extradata, avctx->extradata_size, avctx);
+        ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size, &ctx->ps, &ctx->sei,
+                                 &ctx->is_avc, &ctx->nal_length_size, avctx->err_recognition,
+                                 1, avctx);
         ctx->parsed_extradata = 1;
     }
 
@@ -316,7 +315,10 @@
         }
     }
 
-    parse_nal_units(s, buf, buf_size, avctx);
+    is_dummy_buf &= (dummy_buf == buf);
+
+    if (!is_dummy_buf)
+        parse_nal_units(s, buf, buf_size, avctx);
 
     *poutbuf      = buf;
     *poutbuf_size = buf_size;

diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index 6f3af2d..ea984af 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c

@@ -70,6 +70,14 @@
     {  2,   1 },
 };
 
+static const uint8_t hevc_sub_width_c[] = {
+    1, 2, 2, 1
+};
+
+static const uint8_t hevc_sub_height_c[] = {
+    1, 2, 1, 1
+};
+
 static void remove_pps(HEVCParamSets *s, int id)
 {
     if (s->pps_list[id] && s->pps == (const HEVCPPS*)s->pps_list[id]->data)
@@ -628,8 +636,8 @@
         vui->default_display_window_flag = get_bits1(gb);
 
     if (vui->default_display_window_flag) {
-        int vert_mult  = 1 + (sps->chroma_format_idc < 2);
-        int horiz_mult = 1 + (sps->chroma_format_idc < 3);
+        int vert_mult  = hevc_sub_height_c[sps->chroma_format_idc];
+        int horiz_mult = hevc_sub_width_c[sps->chroma_format_idc];
         vui->def_disp_win.left_offset   = get_ue_golomb_long(gb) * horiz_mult;
         vui->def_disp_win.right_offset  = get_ue_golomb_long(gb) * horiz_mult;
         vui->def_disp_win.top_offset    = get_ue_golomb_long(gb) *  vert_mult;
@@ -825,7 +833,7 @@
         if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P;
        break;
     case 9:
-        if (sps->chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY16;
+        if (sps->chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY9;
         if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P9;
         if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P9;
         if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P9;
@@ -923,8 +931,8 @@
         return ret;
 
     if (get_bits1(gb)) { // pic_conformance_flag
-        int vert_mult  = 1 + (sps->chroma_format_idc < 2);
-        int horiz_mult = 1 + (sps->chroma_format_idc < 3);
+        int vert_mult  = hevc_sub_height_c[sps->chroma_format_idc];
+        int horiz_mult = hevc_sub_width_c[sps->chroma_format_idc];
         sps->pic_conf_win.left_offset   = get_ue_golomb_long(gb) * horiz_mult;
         sps->pic_conf_win.right_offset  = get_ue_golomb_long(gb) * horiz_mult;
         sps->pic_conf_win.top_offset    = get_ue_golomb_long(gb) *  vert_mult;
@@ -1061,7 +1069,7 @@
     }
 
     sps->nb_st_rps = get_ue_golomb_long(gb);
-    if (sps->nb_st_rps > HEVC_MAX_SHORT_TERM_RPS_COUNT) {
+    if (sps->nb_st_rps > HEVC_MAX_SHORT_TERM_REF_PIC_SETS) {
         av_log(avctx, AV_LOG_ERROR, "Too many short term RPS: %d.\n",
                sps->nb_st_rps);
         return AVERROR_INVALIDDATA;
@@ -1075,8 +1083,8 @@
     sps->long_term_ref_pics_present_flag = get_bits1(gb);
     if (sps->long_term_ref_pics_present_flag) {
         sps->num_long_term_ref_pics_sps = get_ue_golomb_long(gb);
-        if (sps->num_long_term_ref_pics_sps > 31U) {
-            av_log(avctx, AV_LOG_ERROR, "num_long_term_ref_pics_sps %d is out of range.\n",
+        if (sps->num_long_term_ref_pics_sps > HEVC_MAX_LONG_TERM_REF_PICS) {
+            av_log(avctx, AV_LOG_ERROR, "Too many long term ref pics: %d.\n",
                    sps->num_long_term_ref_pics_sps);
             return AVERROR_INVALIDDATA;
         }
@@ -1094,13 +1102,10 @@
         decode_vui(gb, avctx, apply_defdispwin, sps);
 
     if (get_bits1(gb)) { // sps_extension_flag
-        int sps_extension_flag[1];
-        for (i = 0; i < 1; i++)
-            sps_extension_flag[i] = get_bits1(gb);
+        int sps_range_extension_flag = get_bits1(gb);
         skip_bits(gb, 7); //sps_extension_7bits = get_bits(gb, 7);
-        if (sps_extension_flag[0]) {
+        if (sps_range_extension_flag) {
             int extended_precision_processing_flag;
-            int high_precision_offsets_enabled_flag;
             int cabac_bypass_alignment_enabled_flag;
 
             sps->transform_skip_rotation_enabled_flag = get_bits1(gb);
@@ -1115,8 +1120,8 @@
                    "extended_precision_processing_flag not yet implemented\n");
 
             sps->intra_smoothing_disabled_flag       = get_bits1(gb);
-            high_precision_offsets_enabled_flag  = get_bits1(gb);
-            if (high_precision_offsets_enabled_flag)
+            sps->high_precision_offsets_enabled_flag = get_bits1(gb);
+            if (sps->high_precision_offsets_enabled_flag)
                 av_log(avctx, AV_LOG_WARNING,
                    "high_precision_offsets_enabled_flag not yet implemented\n");
 
@@ -1682,7 +1687,7 @@
 
     if (get_bits1(gb)) { // pps_extension_present_flag
         int pps_range_extensions_flag = get_bits1(gb);
-        /* int pps_extension_7bits = */ get_bits(gb, 7);
+        skip_bits(gb, 7); // pps_extension_7bits
         if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps_range_extensions_flag) {
             if ((ret = pps_range_extensions(gb, avctx, pps, sps)) < 0)
                 goto err;

diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index f19d022..1fbda19 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h

@@ -254,14 +254,14 @@
     ScalingList scaling_list;
 
     unsigned int nb_st_rps;
-    ShortTermRPS st_rps[HEVC_MAX_SHORT_TERM_RPS_COUNT];
+    ShortTermRPS st_rps[HEVC_MAX_SHORT_TERM_REF_PIC_SETS];
 
     uint8_t amp_enabled_flag;
     uint8_t sao_enabled;
 
     uint8_t long_term_ref_pics_present_flag;
-    uint16_t lt_ref_pic_poc_lsb_sps[32];
-    uint8_t used_by_curr_pic_lt_sps_flag[32];
+    uint16_t lt_ref_pic_poc_lsb_sps[HEVC_MAX_LONG_TERM_REF_PICS];
+    uint8_t used_by_curr_pic_lt_sps_flag[HEVC_MAX_LONG_TERM_REF_PICS];
     uint8_t num_long_term_ref_pics_sps;
 
     struct {
@@ -289,6 +289,7 @@
     int implicit_rdpcm_enabled_flag;
     int explicit_rdpcm_enabled_flag;
     int intra_smoothing_disabled_flag;
+    int high_precision_offsets_enabled_flag;
     int persistent_rice_adaptation_enabled_flag;
 
     ///< coded frame dimension in various units

diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c
index ac462d3..7cf3a55 100644
--- a/libavcodec/hevc_refs.c
+++ b/libavcodec/hevc_refs.c

@@ -208,9 +208,6 @@
         if (nb_output) {
             HEVCFrame *frame = &s->DPB[min_idx];
 
-            if (frame->frame->format == AV_PIX_FMT_VIDEOTOOLBOX && frame->frame->buf[0]->size == 1)
-                return 0;
-
             ret = av_frame_ref(out, frame->frame);
             if (frame->flags & HEVC_FRAME_FLAG_BUMPING)
                 ff_hevc_unref_frame(s, frame, HEVC_FRAME_FLAG_OUTPUT | HEVC_FRAME_FLAG_BUMPING);
@@ -511,12 +508,12 @@
     return ret;
 }
 
-int ff_hevc_frame_nb_refs(HEVCContext *s)
+int ff_hevc_frame_nb_refs(const HEVCContext *s)
 {
     int ret = 0;
     int i;
     const ShortTermRPS *rps = s->sh.short_term_rps;
-    LongTermRPS *long_rps   = &s->sh.long_term_rps;
+    const LongTermRPS *long_rps = &s->sh.long_term_rps;
 
     if (rps) {
         for (i = 0; i < rps->num_negative_pics; i++)

diff --git a/libavcodec/hevc_sei.c b/libavcodec/hevc_sei.c
index 4fae797..c59bd43 100644
--- a/libavcodec/hevc_sei.c
+++ b/libavcodec/hevc_sei.c

@@ -95,10 +95,11 @@
         s->quincunx_subsampling           = get_bits1(gb);
         s->content_interpretation_type    = get_bits(gb, 6);
 
-        // the following skips spatial_flipping_flag frame0_flipped_flag
-        // field_views_flag current_frame_is_frame0_flag
-        // frame0_self_contained_flag frame1_self_contained_flag
-        skip_bits(gb, 6);
+        // spatial_flipping_flag, frame0_flipped_flag, field_views_flag
+        skip_bits(gb, 3);
+        s->current_frame_is_frame0_flag = get_bits1(gb);
+        // frame0_self_contained_flag, frame1_self_contained_flag
+        skip_bits(gb, 2);
 
         if (!s->quincunx_subsampling && s->arrangement_type != 5)
             skip_bits(gb, 16);  // frame[01]_grid_position_[xy]
@@ -124,7 +125,7 @@
     return 0;
 }
 
-static int decode_nal_sei_pic_timing(HEVCSEIContext *s, GetBitContext *gb, const HEVCParamSets *ps,
+static int decode_nal_sei_pic_timing(HEVCSEI *s, GetBitContext *gb, const HEVCParamSets *ps,
                                      void *logctx, int size)
 {
     HEVCSEIPictureTiming *h = &s->picture_timing;
@@ -205,7 +206,7 @@
     return 0;
 }
 
-static int decode_nal_sei_user_data_registered_itu_t_t35(HEVCSEIContext *s, GetBitContext *gb,
+static int decode_nal_sei_user_data_registered_itu_t_t35(HEVCSEI *s, GetBitContext *gb,
                                                          int size)
 {
     uint32_t country_code;
@@ -236,7 +237,7 @@
     return 0;
 }
 
-static int decode_nal_sei_active_parameter_sets(HEVCSEIContext *s, GetBitContext *gb, void *logctx)
+static int decode_nal_sei_active_parameter_sets(HEVCSEI *s, GetBitContext *gb, void *logctx)
 {
     int num_sps_ids_minus1;
     int i;
@@ -272,8 +273,8 @@
     return 0;
 }
 
-static int decode_nal_sei_prefix(GetBitContext *gb, HEVCSEIContext *s, const HEVCParamSets *ps,
-                                 int type, int size, void *logctx)
+static int decode_nal_sei_prefix(GetBitContext *gb, void *logctx, HEVCSEI *s,
+                                 const HEVCParamSets *ps, int type, int size)
 {
     switch (type) {
     case 256:  // Mismatched value from HM 8.1
@@ -301,8 +302,8 @@
     }
 }
 
-static int decode_nal_sei_suffix(GetBitContext *gb, HEVCSEIContext *s,
-                                 int type, int size, void *logctx)
+static int decode_nal_sei_suffix(GetBitContext *gb, void *logctx, HEVCSEI *s,
+                                 int type, int size)
 {
     switch (type) {
     case HEVC_SEI_TYPE_DECODED_PICTURE_HASH:
@@ -314,9 +315,8 @@
     }
 }
 
-static int decode_nal_sei_message(GetBitContext *gb, HEVCSEIContext *s,
-                                  const HEVCParamSets *ps, int nal_unit_type,
-                                  void *logctx)
+static int decode_nal_sei_message(GetBitContext *gb, void *logctx, HEVCSEI *s,
+                                  const HEVCParamSets *ps, int nal_unit_type)
 {
     int payload_type = 0;
     int payload_size = 0;
@@ -337,9 +337,9 @@
         payload_size += byte;
     }
     if (nal_unit_type == HEVC_NAL_SEI_PREFIX) {
-        return decode_nal_sei_prefix(gb, s, ps, payload_type, payload_size, logctx);
+        return decode_nal_sei_prefix(gb, logctx, s, ps, payload_type, payload_size);
     } else { /* nal_unit_type == NAL_SEI_SUFFIX */
-        return decode_nal_sei_suffix(gb, s, payload_type, payload_size, logctx);
+        return decode_nal_sei_suffix(gb, logctx, s, payload_type, payload_size);
     }
 }
 
@@ -348,20 +348,20 @@
     return get_bits_left(gb) > 0 && show_bits(gb, 8) != 0x80;
 }
 
-int ff_hevc_decode_nal_sei(GetBitContext *gb, void *logctx, HEVCSEIContext *s,
+int ff_hevc_decode_nal_sei(GetBitContext *gb, void *logctx, HEVCSEI *s,
                            const HEVCParamSets *ps, int type)
 {
     int ret;
 
     do {
-        ret = decode_nal_sei_message(gb, s, ps, type, logctx);
+        ret = decode_nal_sei_message(gb, logctx, s, ps, type);
         if (ret < 0)
             return ret;
     } while (more_rbsp_data(gb));
     return 1;
 }
 
-void ff_hevc_reset_sei(HEVCSEIContext *s)
+void ff_hevc_reset_sei(HEVCSEI *s)
 {
     s->a53_caption.a53_caption_size = 0;
     av_freep(&s->a53_caption.a53_caption);

diff --git a/libavcodec/hevc_sei.h b/libavcodec/hevc_sei.h
index 1b52222..e92da25 100644
--- a/libavcodec/hevc_sei.h
+++ b/libavcodec/hevc_sei.h

@@ -23,8 +23,6 @@
 
 #include <stdint.h>
 
-#include "libavutil/md5.h"
-
 #include "get_bits.h"
 
 /**
@@ -60,7 +58,6 @@
 } HEVC_SEI_Type;
 
 typedef struct HEVCSEIPictureHash {
-    struct AVMD5 *md5_ctx;
     uint8_t       md5[3][16];
     uint8_t is_md5;
 } HEVCSEIPictureHash;
@@ -70,6 +67,7 @@
     int arrangement_type;
     int content_interpretation_type;
     int quincunx_subsampling;
+    int current_frame_is_frame0_flag;
 } HEVCSEIFramePacking;
 
 typedef struct HEVCSEIDisplayOrientation {
@@ -106,7 +104,7 @@
     int preferred_transfer_characteristics;
 } HEVCSEIAlternativeTransfer;
 
-typedef struct HEVCSEIContext {
+typedef struct HEVCSEI {
     HEVCSEIPictureHash picture_hash;
     HEVCSEIFramePacking frame_packing;
     HEVCSEIDisplayOrientation display_orientation;
@@ -116,11 +114,11 @@
     HEVCSEIContentLight content_light;
     int active_seq_parameter_set_id;
     HEVCSEIAlternativeTransfer alternative_transfer;
-} HEVCSEIContext;
+} HEVCSEI;
 
 struct HEVCParamSets;
 
-int ff_hevc_decode_nal_sei(GetBitContext *gb, void *logctx, HEVCSEIContext *s,
+int ff_hevc_decode_nal_sei(GetBitContext *gb, void *logctx, HEVCSEI *s,
                            const struct HEVCParamSets *ps, int type);
 
 /**
@@ -130,6 +128,6 @@
  *
  * @param s HEVCContext.
  */
-void ff_hevc_reset_sei(HEVCSEIContext *s);
+void ff_hevc_reset_sei(HEVCSEI *s);
 
 #endif /* AVCODEC_HEVC_SEI_H */

diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 67ac9ab..a3b5c8c 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c

@@ -41,6 +41,7 @@
 #include "hevc_data.h"
 #include "hevc_parse.h"
 #include "hevcdec.h"
+#include "hwaccel.h"
 #include "profiles.h"
 
 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
@@ -150,12 +151,18 @@
     int luma_log2_weight_denom;
 
     luma_log2_weight_denom = get_ue_golomb_long(gb);
-    if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
+    if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7) {
         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
+        return AVERROR_INVALIDDATA;
+    }
     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
     if (s->ps.sps->chroma_format_idc != 0) {
-        int delta = get_se_golomb(gb);
-        s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
+        int64_t chroma_log2_weight_denom = luma_log2_weight_denom + (int64_t)get_se_golomb(gb);
+        if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) {
+            av_log(s->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %"PRId64" is invalid\n", chroma_log2_weight_denom);
+            return AVERROR_INVALIDDATA;
+        }
+        s->sh.chroma_log2_weight_denom = chroma_log2_weight_denom;
     }
 
     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
@@ -354,6 +361,7 @@
 {
 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
+                     CONFIG_HEVC_NVDEC_HWACCEL + \
                      CONFIG_HEVC_VAAPI_HWACCEL + \
                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
                      CONFIG_HEVC_VDPAU_HWACCEL)
@@ -375,6 +383,9 @@
 #if CONFIG_HEVC_VDPAU_HWACCEL
         *fmt++ = AV_PIX_FMT_VDPAU;
 #endif
+#if CONFIG_HEVC_NVDEC_HWACCEL
+        *fmt++ = AV_PIX_FMT_CUDA;
+#endif
 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
 #endif
@@ -393,6 +404,14 @@
 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
 #endif
+#if CONFIG_HEVC_NVDEC_HWACCEL
+        *fmt++ = AV_PIX_FMT_CUDA;
+#endif
+        break;
+    case AV_PIX_FMT_YUV420P12:
+#if CONFIG_HEVC_NVDEC_HWACCEL
+        *fmt++ = AV_PIX_FMT_CUDA;
+#endif
         break;
     }
 
@@ -503,13 +522,14 @@
         }
         ff_hevc_clear_refs(s);
 
+        ret = set_sps(s, sps, sps->pix_fmt);
+        if (ret < 0)
+            return ret;
+
         pix_fmt = get_format(s, sps);
         if (pix_fmt < 0)
             return pix_fmt;
-
-        ret = set_sps(s, sps, pix_fmt);
-        if (ret < 0)
-            return ret;
+        s->avctx->pix_fmt = pix_fmt;
 
         s->seq_decode = (s->seq_decode + 1) & 0xff;
         s->max_ra     = INT_MAX;
@@ -2638,6 +2658,13 @@
 
         if (s->sei.frame_packing.content_interpretation_type == 2)
             stereo->flags = AV_STEREO3D_FLAG_INVERT;
+
+        if (s->sei.frame_packing.arrangement_type == 5) {
+            if (s->sei.frame_packing.current_frame_is_frame0_flag)
+                stereo->view = AV_STEREO3D_VIEW_LEFT;
+            else
+                stereo->view = AV_STEREO3D_VIEW_RIGHT;
+        }
     }
 
     if (s->sei.display_orientation.present &&
@@ -2816,23 +2843,55 @@
 
     switch (s->nal_unit_type) {
     case HEVC_NAL_VPS:
+        if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
+            ret = s->avctx->hwaccel->decode_params(s->avctx,
+                                                   nal->type,
+                                                   nal->raw_data,
+                                                   nal->raw_size);
+            if (ret < 0)
+                goto fail;
+        }
         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
         if (ret < 0)
             goto fail;
         break;
     case HEVC_NAL_SPS:
+        if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
+            ret = s->avctx->hwaccel->decode_params(s->avctx,
+                                                   nal->type,
+                                                   nal->raw_data,
+                                                   nal->raw_size);
+            if (ret < 0)
+                goto fail;
+        }
         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
                                      s->apply_defdispwin);
         if (ret < 0)
             goto fail;
         break;
     case HEVC_NAL_PPS:
+        if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
+            ret = s->avctx->hwaccel->decode_params(s->avctx,
+                                                   nal->type,
+                                                   nal->raw_data,
+                                                   nal->raw_size);
+            if (ret < 0)
+                goto fail;
+        }
         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
         if (ret < 0)
             goto fail;
         break;
     case HEVC_NAL_SEI_PREFIX:
     case HEVC_NAL_SEI_SUFFIX:
+        if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
+            ret = s->avctx->hwaccel->decode_params(s->avctx,
+                                                   nal->type,
+                                                   nal->raw_data,
+                                                   nal->raw_size);
+            if (ret < 0)
+                goto fail;
+        }
         ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type);
         if (ret < 0)
             goto fail;
@@ -2857,6 +2916,13 @@
         if (ret < 0)
             return ret;
 
+        if (
+            (s->avctx->skip_frame >= AVDISCARD_BIDIR && s->sh.slice_type == HEVC_SLICE_B) ||
+            (s->avctx->skip_frame >= AVDISCARD_NONINTRA && s->sh.slice_type != HEVC_SLICE_I) ||
+            (s->avctx->skip_frame >= AVDISCARD_NONKEY && !IS_IRAP(s))) {
+            break;
+        }
+
         if (s->sh.first_slice_in_pic_flag) {
             if (s->max_ra == INT_MAX) {
                 if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
@@ -2980,7 +3046,14 @@
 
     /* decode the NAL units */
     for (i = 0; i < s->pkt.nb_nals; i++) {
-        ret = decode_nal_unit(s, &s->pkt.nals[i]);
+        H2645NAL *nal = &s->pkt.nals[i];
+
+        if (s->avctx->skip_frame >= AVDISCARD_ALL ||
+            (s->avctx->skip_frame >= AVDISCARD_NONREF
+            && ff_hevc_nal_is_nonref(nal->type)))
+            continue;
+
+        ret = decode_nal_unit(s, nal);
         if (ret < 0) {
             av_log(s->avctx, AV_LOG_WARNING,
                    "Error parsing NAL unit #%d.\n", i);
@@ -3035,7 +3108,7 @@
         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
         uint8_t md5[16];
 
-        av_md5_init(s->sei.picture_hash.md5_ctx);
+        av_md5_init(s->md5_ctx);
         for (j = 0; j < h; j++) {
             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
 #if HAVE_BIGENDIAN
@@ -3045,9 +3118,9 @@
                 src = s->checksum_buf;
             }
 #endif
-            av_md5_update(s->sei.picture_hash.md5_ctx, src, w << pixel_shift);
+            av_md5_update(s->md5_ctx, src, w << pixel_shift);
         }
-        av_md5_final(s->sei.picture_hash.md5_ctx, md5);
+        av_md5_final(s->md5_ctx, md5);
 
         if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
@@ -3200,7 +3273,7 @@
 
     pic_arrays_free(s);
 
-    av_freep(&s->sei.picture_hash.md5_ctx);
+    av_freep(&s->md5_ctx);
 
     av_freep(&s->cabac_state);
 
@@ -3267,8 +3340,8 @@
 
     s->max_ra = INT_MAX;
 
-    s->sei.picture_hash.md5_ctx = av_md5_alloc();
-    if (!s->sei.picture_hash.md5_ctx)
+    s->md5_ctx = av_md5_alloc();
+    if (!s->md5_ctx)
         goto fail;
 
     ff_bswapdsp_init(&s->bdsp);
@@ -3285,6 +3358,7 @@
     return AVERROR(ENOMEM);
 }
 
+#if HAVE_THREADS
 static int hevc_update_thread_context(AVCodecContext *dst,
                                       const AVCodecContext *src)
 {
@@ -3366,6 +3440,7 @@
 
     return 0;
 }
+#endif
 
 static av_cold int hevc_decode_init(AVCodecContext *avctx)
 {
@@ -3405,6 +3480,7 @@
     return 0;
 }
 
+#if HAVE_THREADS
 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
 {
     HEVCContext *s = avctx->priv_data;
@@ -3418,6 +3494,7 @@
 
     return 0;
 }
+#endif
 
 static void hevc_decode_flush(AVCodecContext *avctx)
 {
@@ -3456,10 +3533,34 @@
     .close                 = hevc_decode_free,
     .decode                = hevc_decode_frame,
     .flush                 = hevc_decode_flush,
-    .update_thread_context = hevc_update_thread_context,
-    .init_thread_copy      = hevc_init_thread_copy,
+    .update_thread_context = ONLY_IF_THREADS_ENABLED(hevc_update_thread_context),
+    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(hevc_init_thread_copy),
     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING,
     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
+    .hw_configs            = (const AVCodecHWConfigInternal*[]) {
+#if CONFIG_HEVC_DXVA2_HWACCEL
+                               HWACCEL_DXVA2(hevc),
+#endif
+#if CONFIG_HEVC_D3D11VA_HWACCEL
+                               HWACCEL_D3D11VA(hevc),
+#endif
+#if CONFIG_HEVC_D3D11VA2_HWACCEL
+                               HWACCEL_D3D11VA2(hevc),
+#endif
+#if CONFIG_HEVC_NVDEC_HWACCEL
+                               HWACCEL_NVDEC(hevc),
+#endif
+#if CONFIG_HEVC_VAAPI_HWACCEL
+                               HWACCEL_VAAPI(hevc),
+#endif
+#if CONFIG_HEVC_VDPAU_HWACCEL
+                               HWACCEL_VDPAU(hevc),
+#endif
+#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
+                               HWACCEL_VIDEOTOOLBOX(hevc),
+#endif
+                               NULL
+                           },
 };

diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
index 293beb7..f0f588f 100644
--- a/libavcodec/hevcdec.h
+++ b/libavcodec/hevcdec.h

@@ -26,6 +26,7 @@
 #include <stdatomic.h>
 
 #include "libavutil/buffer.h"
+#include "libavutil/md5.h"
 
 #include "avcodec.h"
 #include "bswapdsp.h"
@@ -363,7 +364,7 @@
     DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2];
     /* The extended size between the new edge emu buffer is abused by SAO */
     DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer2)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2];
-    DECLARE_ALIGNED(32, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
+    DECLARE_ALIGNED(32, int16_t, tmp)[MAX_PB_SIZE * MAX_PB_SIZE];
 
     int ct_depth;
     CodingUnit cu;
@@ -405,6 +406,8 @@
     uint8_t *sao_pixel_buffer_v[3];
 
     HEVCParamSets ps;
+    HEVCSEI sei;
+    struct AVMD5 *md5_ctx;
 
     AVBufferPool *tab_mvf_pool;
     AVBufferPool *rpl_tab_pool;
@@ -480,8 +483,6 @@
 
     int nal_length_size;    ///< Number of bytes used for nal length (1, 2 or 4)
     int nuh_layer_id;
-
-    HEVCSEIContext sei;
 } HEVCContext;
 
 /**
@@ -543,10 +544,30 @@
 /**
  * Get the number of candidate references for the current frame.
  */
-int ff_hevc_frame_nb_refs(HEVCContext *s);
+int ff_hevc_frame_nb_refs(const HEVCContext *s);
 
 int ff_hevc_set_new_ref(HEVCContext *s, AVFrame **frame, int poc);
 
+static av_always_inline int ff_hevc_nal_is_nonref(enum HEVCNALUnitType type)
+{
+    switch (type) {
+    case HEVC_NAL_TRAIL_N:
+    case HEVC_NAL_TSA_N:
+    case HEVC_NAL_STSA_N:
+    case HEVC_NAL_RADL_N:
+    case HEVC_NAL_RASL_N:
+    case HEVC_NAL_VCL_N10:
+    case HEVC_NAL_VCL_N12:
+    case HEVC_NAL_VCL_N14:
+    case HEVC_NAL_BLA_N_LP:
+    case HEVC_NAL_IDR_N_LP:
+        return 1;
+        break;
+    default: break;
+    }
+    return 0;
+}
+
 /**
  * Find next frame in output order and put a reference to it in frame.
  * @return 1 if a frame was output, 0 otherwise

diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index 76ae72b..957e40d 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c

@@ -91,7 +91,7 @@
       90, -90,  88, -85,  82, -78,  73, -67,  61, -54,  46, -38,  31, -22,  13,  -4 },
 };
 
-DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters[7][4]) = {
+DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters)[7][4] = {
     { -2, 58, 10, -2},
     { -4, 54, 16, -2},
     { -6, 46, 28, -4},
@@ -101,7 +101,7 @@
     { -2, 10, 58, -2},
 };
 
-DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters[3][16]) = {
+DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters)[3][16] = {
     { -1,  4,-10, 58, 17, -5,  1,  0, -1,  4,-10, 58, 17, -5,  1,  0},
     { -1,  4,-11, 40, 40,-11,  4, -1, -1,  4,-11, 40, 40,-11,  4, -1},
     {  0,  1, -5, 17, 58,-10,  4, -1,  0,  1, -5, 17, 58,-10,  4, -1}
@@ -257,12 +257,12 @@
         break;
     }
 
+    if (ARCH_ARM)
+        ff_hevc_dsp_init_arm(hevcdsp, bit_depth);
     if (ARCH_PPC)
         ff_hevc_dsp_init_ppc(hevcdsp, bit_depth);
     if (ARCH_X86)
         ff_hevc_dsp_init_x86(hevcdsp, bit_depth);
-    if (ARCH_ARM)
-        ff_hevcdsp_init_arm(hevcdsp, bit_depth);
     if (ARCH_MIPS)
         ff_hevc_dsp_init_mips(hevcdsp, bit_depth);
 }

diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
index dc48ebc..0ae67cb 100644
--- a/libavcodec/hevcdsp.h
+++ b/libavcodec/hevcdsp.h

@@ -127,8 +127,9 @@
 extern const int8_t ff_hevc_epel_filters[7][4];
 extern const int8_t ff_hevc_qpel_filters[3][16];
 
+void ff_hevc_dsp_init_arm(HEVCDSPContext *c, const int bit_depth);
 void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth);
 void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
-void ff_hevcdsp_init_arm(HEVCDSPContext *c, const int bit_depth);
 void ff_hevc_dsp_init_mips(HEVCDSPContext *c, const int bit_depth);
+
 #endif /* AVCODEC_HEVCDSP_H */

diff --git a/libavcodec/hnm4video.c b/libavcodec/hnm4video.c
index a64dbb1..9e1ac49 100644
--- a/libavcodec/hnm4video.c
+++ b/libavcodec/hnm4video.c

@@ -375,6 +375,7 @@
             hnm->palette[writeoffset] = bytestream2_get_be24(&gb);
             if (!eight_bit_colors)
                 hnm->palette[writeoffset] <<= 2;
+            hnm->palette[writeoffset] |= (0xFFU << 24);
             count--;
             writeoffset++;
         }

diff --git a/libavcodec/hq_hqa.c b/libavcodec/hq_hqa.c
index f88ad7d..ec9da3e0 100644
--- a/libavcodec/hq_hqa.c
+++ b/libavcodec/hq_hqa.c

@@ -181,6 +181,9 @@
     int flag = 0;
     int i, ret, cbp;
 
+    if (get_bits_left(gb) < 1)
+        return AVERROR_INVALIDDATA;
+
     cbp = get_vlc2(gb, c->hqa_cbp_vlc.table, 5, 1);
 
     for (i = 0; i < 12; i++)

diff --git a/libavcodec/huffyuvenc.c b/libavcodec/huffyuvenc.c
index 89639b7..8be7528 100644
--- a/libavcodec/huffyuvenc.c
+++ b/libavcodec/huffyuvenc.c

@@ -52,42 +52,30 @@
                                       const uint8_t *src, int w, int left)
 {
     int i;
+    int min_width = FFMIN(w, 32);
+
     if (s->bps <= 8) {
-        if (w < 32) {
-            for (i = 0; i < w; i++) {
-                const int temp = src[i];
-                dst[i] = temp - left;
-                left   = temp;
-            }
-            return left;
-        } else {
-            for (i = 0; i < 32; i++) {
-                const int temp = src[i];
-                dst[i] = temp - left;
-                left   = temp;
-            }
-            s->llvidencdsp.diff_bytes(dst + 32, src + 32, src + 31, w - 32);
-            return src[w-1];
+        for (i = 0; i < min_width; i++) { /* scalar loop before dsp call */
+            const int temp = src[i];
+            dst[i] = temp - left;
+            left   = temp;
         }
+        if (w < 32)
+            return left;
+        s->llvidencdsp.diff_bytes(dst + 32, src + 32, src + 31, w - 32);
+        return src[w-1];
     } else {
         const uint16_t *src16 = (const uint16_t *)src;
         uint16_t       *dst16 = (      uint16_t *)dst;
-        if (w < 32) {
-            for (i = 0; i < w; i++) {
-                const int temp = src16[i];
-                dst16[i] = temp - left;
-                left   = temp;
-            }
-            return left;
-        } else {
-            for (i = 0; i < 16; i++) {
-                const int temp = src16[i];
-                dst16[i] = temp - left;
-                left   = temp;
-            }
-            s->hencdsp.diff_int16(dst16 + 16, src16 + 16, src16 + 15, s->n - 1, w - 16);
-            return src16[w-1];
+        for (i = 0; i < min_width; i++) { /* scalar loop before dsp call */
+            const int temp = src16[i];
+            dst16[i] = temp - left;
+            left   = temp;
         }
+        if (w < 32)
+            return left;
+        s->hencdsp.diff_int16(dst16 + 32, src16 + 32, src16 + 31, s->n - 1, w - 32);
+        return src16[w-1];
     }
 }
 
@@ -98,12 +86,13 @@
 {
     int i;
     int r, g, b, a;
+    int min_width = FFMIN(w, 8);
     r = *red;
     g = *green;
     b = *blue;
     a = *alpha;
 
-    for (i = 0; i < FFMIN(w, 4); i++) {
+    for (i = 0; i < min_width; i++) {
         const int rt = src[i * 4 + R];
         const int gt = src[i * 4 + G];
         const int bt = src[i * 4 + B];
@@ -118,7 +107,7 @@
         a = at;
     }
 
-    s->llvidencdsp.diff_bytes(dst + 16, src + 16, src + 12, w * 4 - 16);
+    s->llvidencdsp.diff_bytes(dst + 32, src + 32, src + 32 - 4, w * 4 - 32);
 
     *red   = src[(w - 1) * 4 + R];
     *green = src[(w - 1) * 4 + G];

diff --git a/libavcodec/hwaccel.h b/libavcodec/hwaccel.h
index 124fbbf..3aaa925 100644
--- a/libavcodec/hwaccel.h
+++ b/libavcodec/hwaccel.h

@@ -19,6 +19,66 @@
 #ifndef AVCODEC_HWACCEL_H
 #define AVCODEC_HWACCEL_H
 
+#include "avcodec.h"
+#include "hwaccels.h"
+
+
 #define HWACCEL_CAP_ASYNC_SAFE      (1 << 0)
 
+
+typedef struct AVCodecHWConfigInternal {
+    /**
+     * This is the structure which will be returned to the user by
+     * avcodec_get_hw_config().
+     */
+    AVCodecHWConfig public;
+    /**
+     * If this configuration uses a hwaccel, a pointer to it.
+     * If not, NULL.
+     */
+    const AVHWAccel *hwaccel;
+} AVCodecHWConfigInternal;
+
+
+// These macros are used to simplify AVCodecHWConfigInternal definitions.
+
+#define HW_CONFIG_HWACCEL(device, frames, ad_hoc, format, device_type_, name) \
+    &(const AVCodecHWConfigInternal) { \
+        .public          = { \
+            .pix_fmt     = AV_PIX_FMT_ ## format, \
+            .methods     = (device ? AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX : 0) | \
+                           (frames ? AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX : 0) | \
+                           (ad_hoc ? AV_CODEC_HW_CONFIG_METHOD_AD_HOC        : 0),  \
+            .device_type = AV_HWDEVICE_TYPE_ ## device_type_, \
+        }, \
+        .hwaccel         = &name, \
+    }
+
+#define HW_CONFIG_INTERNAL(format) \
+    &(const AVCodecHWConfigInternal) { \
+        .public          = { \
+            .pix_fmt     = AV_PIX_FMT_ ## format, \
+            .methods     = AV_CODEC_HW_CONFIG_METHOD_INTERNAL, \
+            .device_type = AV_HWDEVICE_TYPE_NONE, \
+        }, \
+        .hwaccel         = NULL, \
+    }
+
+#define HWACCEL_DXVA2(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 1, DXVA2_VLD,    DXVA2,        ff_ ## codec ## _dxva2_hwaccel)
+#define HWACCEL_D3D11VA2(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 0, D3D11,        D3D11VA,      ff_ ## codec ## _d3d11va2_hwaccel)
+#define HWACCEL_NVDEC(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 0, CUDA,         CUDA,         ff_ ## codec ## _nvdec_hwaccel)
+#define HWACCEL_VAAPI(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 1, VAAPI,        VAAPI,        ff_ ## codec ## _vaapi_hwaccel)
+#define HWACCEL_VDPAU(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 1, VDPAU,        VDPAU,        ff_ ## codec ## _vdpau_hwaccel)
+#define HWACCEL_VIDEOTOOLBOX(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 1, VIDEOTOOLBOX, VIDEOTOOLBOX, ff_ ## codec ## _videotoolbox_hwaccel)
+#define HWACCEL_D3D11VA(codec) \
+    HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD,  NONE,         ff_ ## codec ## _d3d11va_hwaccel)
+#define HWACCEL_XVMC(codec) \
+    HW_CONFIG_HWACCEL(0, 0, 1, XVMC,         NONE,         ff_ ## codec ## _xvmc_hwaccel)
+
 #endif /* AVCODEC_HWACCEL_H */

diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
new file mode 100644
index 0000000..7d73da8
--- /dev/null
+++ b/libavcodec/hwaccels.h

@@ -0,0 +1,78 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HWACCELS_H
+#define AVCODEC_HWACCELS_H
+
+#include "avcodec.h"
+
+extern const AVHWAccel ff_h263_vaapi_hwaccel;
+extern const AVHWAccel ff_h263_videotoolbox_hwaccel;
+extern const AVHWAccel ff_h264_d3d11va_hwaccel;
+extern const AVHWAccel ff_h264_d3d11va2_hwaccel;
+extern const AVHWAccel ff_h264_dxva2_hwaccel;
+extern const AVHWAccel ff_h264_nvdec_hwaccel;
+extern const AVHWAccel ff_h264_vaapi_hwaccel;
+extern const AVHWAccel ff_h264_vdpau_hwaccel;
+extern const AVHWAccel ff_h264_videotoolbox_hwaccel;
+extern const AVHWAccel ff_hevc_d3d11va_hwaccel;
+extern const AVHWAccel ff_hevc_d3d11va2_hwaccel;
+extern const AVHWAccel ff_hevc_dxva2_hwaccel;
+extern const AVHWAccel ff_hevc_nvdec_hwaccel;
+extern const AVHWAccel ff_hevc_vaapi_hwaccel;
+extern const AVHWAccel ff_hevc_vdpau_hwaccel;
+extern const AVHWAccel ff_hevc_videotoolbox_hwaccel;
+extern const AVHWAccel ff_mjpeg_nvdec_hwaccel;
+extern const AVHWAccel ff_mjpeg_vaapi_hwaccel;
+extern const AVHWAccel ff_mpeg1_nvdec_hwaccel;
+extern const AVHWAccel ff_mpeg1_vdpau_hwaccel;
+extern const AVHWAccel ff_mpeg1_videotoolbox_hwaccel;
+extern const AVHWAccel ff_mpeg1_xvmc_hwaccel;
+extern const AVHWAccel ff_mpeg2_d3d11va_hwaccel;
+extern const AVHWAccel ff_mpeg2_d3d11va2_hwaccel;
+extern const AVHWAccel ff_mpeg2_nvdec_hwaccel;
+extern const AVHWAccel ff_mpeg2_dxva2_hwaccel;
+extern const AVHWAccel ff_mpeg2_vaapi_hwaccel;
+extern const AVHWAccel ff_mpeg2_vdpau_hwaccel;
+extern const AVHWAccel ff_mpeg2_videotoolbox_hwaccel;
+extern const AVHWAccel ff_mpeg2_xvmc_hwaccel;
+extern const AVHWAccel ff_mpeg4_nvdec_hwaccel;
+extern const AVHWAccel ff_mpeg4_vaapi_hwaccel;
+extern const AVHWAccel ff_mpeg4_vdpau_hwaccel;
+extern const AVHWAccel ff_mpeg4_videotoolbox_hwaccel;
+extern const AVHWAccel ff_vc1_d3d11va_hwaccel;
+extern const AVHWAccel ff_vc1_d3d11va2_hwaccel;
+extern const AVHWAccel ff_vc1_dxva2_hwaccel;
+extern const AVHWAccel ff_vc1_nvdec_hwaccel;
+extern const AVHWAccel ff_vc1_vaapi_hwaccel;
+extern const AVHWAccel ff_vc1_vdpau_hwaccel;
+extern const AVHWAccel ff_vp8_nvdec_hwaccel;
+extern const AVHWAccel ff_vp8_vaapi_hwaccel;
+extern const AVHWAccel ff_vp9_d3d11va_hwaccel;
+extern const AVHWAccel ff_vp9_d3d11va2_hwaccel;
+extern const AVHWAccel ff_vp9_dxva2_hwaccel;
+extern const AVHWAccel ff_vp9_nvdec_hwaccel;
+extern const AVHWAccel ff_vp9_vaapi_hwaccel;
+extern const AVHWAccel ff_wmv3_d3d11va_hwaccel;
+extern const AVHWAccel ff_wmv3_d3d11va2_hwaccel;
+extern const AVHWAccel ff_wmv3_dxva2_hwaccel;
+extern const AVHWAccel ff_wmv3_nvdec_hwaccel;
+extern const AVHWAccel ff_wmv3_vaapi_hwaccel;
+extern const AVHWAccel ff_wmv3_vdpau_hwaccel;
+
+#endif /* AVCODEC_HWACCELS_H */

diff --git a/libavcodec/idctdsp.c b/libavcodec/idctdsp.c
index 0122d29..846ed0b 100644
--- a/libavcodec/idctdsp.c
+++ b/libavcodec/idctdsp.c

@@ -256,14 +256,22 @@
         c->perm_type = FF_IDCT_PERM_NONE;
     } else {
         if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) {
-            c->idct_put              = ff_simple_idct_put_10;
-            c->idct_add              = ff_simple_idct_add_10;
-            c->idct                  = ff_simple_idct_10;
+            /* 10-bit MPEG-4 Simple Studio Profile requires a higher precision IDCT
+               However, it only uses idct_put */
+            if (c->mpeg4_studio_profile) {
+                c->idct_put              = ff_simple_idct_put_int32_10bit;
+                c->idct_add              = NULL;
+                c->idct                  = NULL;
+            } else {
+                c->idct_put              = ff_simple_idct_put_int16_10bit;
+                c->idct_add              = ff_simple_idct_add_int16_10bit;
+                c->idct                  = ff_simple_idct_int16_10bit;
+            }
             c->perm_type             = FF_IDCT_PERM_NONE;
         } else if (avctx->bits_per_raw_sample == 12) {
-            c->idct_put              = ff_simple_idct_put_12;
-            c->idct_add              = ff_simple_idct_add_12;
-            c->idct                  = ff_simple_idct_12;
+            c->idct_put              = ff_simple_idct_put_int16_12bit;
+            c->idct_add              = ff_simple_idct_add_int16_12bit;
+            c->idct                  = ff_simple_idct_int16_12bit;
             c->perm_type             = FF_IDCT_PERM_NONE;
         } else {
             if (avctx->idct_algo == FF_IDCT_INT) {
@@ -280,9 +288,9 @@
 #endif /* CONFIG_FAANIDCT */
             } else { // accurate/default
                 /* Be sure FF_IDCT_NONE will select this one, since it uses FF_IDCT_PERM_NONE */
-                c->idct_put  = ff_simple_idct_put_8;
-                c->idct_add  = ff_simple_idct_add_8;
-                c->idct      = ff_simple_idct_8;
+                c->idct_put  = ff_simple_idct_put_int16_8bit;
+                c->idct_add  = ff_simple_idct_add_int16_8bit;
+                c->idct      = ff_simple_idct_int16_8bit;
                 c->perm_type = FF_IDCT_PERM_NONE;
             }
         }

diff --git a/libavcodec/idctdsp.h b/libavcodec/idctdsp.h
index 26221f6..ca21a31 100644
--- a/libavcodec/idctdsp.h
+++ b/libavcodec/idctdsp.h

@@ -95,6 +95,8 @@
      */
     uint8_t idct_permutation[64];
     enum idct_permutation_type perm_type;
+
+    int mpeg4_studio_profile;
 } IDCTDSPContext;
 
 void ff_put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,

diff --git a/libavcodec/ilbcdata.h b/libavcodec/ilbcdata.h
new file mode 100644
index 0000000..8d145bc
--- /dev/null
+++ b/libavcodec/ilbcdata.h

@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2013, The WebRTC project authors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *
+ *   * Neither the name of Google nor the names of its contributors may
+ *     be used to endorse or promote products derived from this software
+ *     without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef AVCODEC_ILBCDATA_H
+#define AVCODEC_ILBCDATA_H
+
+#include "libavutil/common.h"
+
+static const uint8_t lsf_dim_codebook[] = { 3, 3, 4 };
+static const uint8_t lsf_size_codebook[] = { 64, 128, 128 };
+static const int16_t lsf_weight_20ms[] = { 12288, 8192, 4096, 0 };
+static const int16_t lsf_weight_30ms[] = { 8192, 16384, 10923, 5461, 0, 0 };
+
+static const int16_t hp_out_coeffs[] = { 3849, -7699, 3849, 7918, -3833 };
+
+static const int16_t kPlcPfSlope[] = { 26667, 18729, 13653, 10258, 7901, 6214 };
+
+static const int16_t kPlcPitchFact[] = { 0, 5462, 10922, 16384, 21846, 27306 };
+
+static const int16_t kCbFiltersRev[] = {
+    -140, 446, -755, 3302, 2922, -590, 343, -138
+};
+
+static const int16_t kPlcPerSqr[] = { 839, 1343, 2048, 2998, 4247, 5849 };
+
+static const int16_t alpha[] = {
+    6554, 13107, 19661, 26214
+};
+
+static const int16_t kLpcChirpSyntDenum[] = {
+    32767, 29573, 26690, 24087, 21739, 19619, 17707, 15980, 14422, 13016, 11747
+};
+
+static const int16_t LpcChirpWeightDenum[] = {
+    32767, 13835, 5841, 2466, 1041, 440, 186, 78,  33,  14,  6
+};
+
+static const int16_t cos_tbl[64] = {
+    32767,  32729,  32610,  32413,  32138,  31786,  31357,   30853,
+    30274,  29622,  28899,  28106,  27246,  26320,  25330,   24279,
+    23170,  22006,  20788,  19520,  18205,  16846,  15447,   14010,
+    12540,  11039,   9512,   7962,   6393,   4808,   3212,    1608,
+    0,      -1608,  -3212,  -4808,  -6393,  -7962,  -9512,  -11039,
+    -12540, -14010, -15447, -16846, -18205, -19520, -20788, -22006,
+    -23170, -24279, -25330, -26320, -27246, -28106, -28899, -29622,
+    -30274, -30853, -31357, -31786, -32138, -32413, -32610, -32729,
+};
+
+static const int16_t cos_derivative_tbl[64] = {
+    -632,  -1893,  -3150,  -4399,  -5638,  -6863,  -8072,  -9261,
+    -10428, -11570, -12684, -13767, -14817, -15832, -16808, -17744,
+    -18637, -19486, -20287, -21039, -21741, -22390, -22986, -23526,
+    -24009, -24435, -24801, -25108, -25354, -25540, -25664, -25726,
+    -25726, -25664, -25540, -25354, -25108, -24801, -24435, -24009,
+    -23526, -22986, -22390, -21741, -21039, -20287, -19486, -18637,
+    -17744, -16808, -15832, -14817, -13767, -12684, -11570, -10428,
+    -9261,  -8072,  -6863,  -5638,  -4399,  -3150,  -1893,   -632
+};
+
+static const int16_t lsf_codebook[64 * 3 + 128 * 3 + 128 * 4] = {
+    1273, 2238, 3696, 3199, 5309, 8209, 3606, 5671, 7829,
+    2815, 5262, 8778, 2608, 4027, 5493, 1582, 3076, 5945,
+    2983, 4181, 5396, 2437, 4322, 6902, 1861, 2998, 4613,
+    2007, 3250, 5214, 1388, 2459, 4262, 2563, 3805, 5269,
+    2036, 3522, 5129, 1935, 4025, 6694, 2744, 5121, 7338,
+    2810, 4248, 5723, 3054, 5405, 7745, 1449, 2593, 4763,
+    3411, 5128, 6596, 2484, 4659, 7496, 1668, 2879, 4818,
+    1812, 3072, 5036, 1638, 2649, 3900, 2464, 3550, 4644,
+    1853, 2900, 4158, 2458, 4163, 5830, 2556, 4036, 6254,
+    2703, 4432, 6519, 3062, 4953, 7609, 1725, 3703, 6187,
+    2221, 3877, 5427, 2339, 3579, 5197, 2021, 4633, 7037,
+    2216, 3328, 4535, 2961, 4739, 6667, 2807, 3955, 5099,
+    2788, 4501, 6088, 1642, 2755, 4431, 3341, 5282, 7333,
+    2414, 3726, 5727, 1582, 2822, 5269, 2259, 3447, 4905,
+    3117, 4986, 7054, 1825, 3491, 5542, 3338, 5736, 8627,
+    1789, 3090, 5488, 2566, 3720, 4923, 2846, 4682, 7161,
+    1950, 3321, 5976, 1834, 3383, 6734, 3238, 4769, 6094,
+    2031, 3978, 5903, 1877, 4068, 7436, 2131, 4644, 8296,
+    2764, 5010, 8013, 2194, 3667, 6302, 2053, 3127, 4342,
+    3523, 6595, 10010, 3134, 4457, 5748, 3142, 5819, 9414,
+    2223, 4334, 6353, 2022, 3224, 4822, 2186, 3458, 5544,
+    2552, 4757, 6870, 10905, 12917, 14578, 9503, 11485, 14485,
+    9518, 12494, 14052, 6222, 7487, 9174, 7759, 9186, 10506,
+    8315, 12755, 14786, 9609, 11486, 13866, 8909, 12077, 13643,
+    7369, 9054, 11520, 9408, 12163, 14715, 6436, 9911, 12843,
+    7109, 9556, 11884, 7557, 10075, 11640, 6482, 9202, 11547,
+    6463, 7914, 10980, 8611, 10427, 12752, 7101, 9676, 12606,
+    7428, 11252, 13172, 10197, 12955, 15842, 7487, 10955, 12613,
+    5575, 7858, 13621, 7268, 11719, 14752, 7476, 11744, 13795,
+    7049, 8686, 11922, 8234, 11314, 13983, 6560, 11173, 14984,
+    6405, 9211, 12337, 8222, 12054, 13801, 8039, 10728, 13255,
+    10066, 12733, 14389, 6016, 7338, 10040, 6896, 8648, 10234,
+    7538, 9170, 12175, 7327, 12608, 14983, 10516, 12643, 15223,
+    5538, 7644, 12213, 6728, 12221, 14253, 7563, 9377, 12948,
+    8661, 11023, 13401, 7280, 8806, 11085, 7723, 9793, 12333,
+    12225, 14648, 16709, 8768, 13389, 15245, 10267, 12197, 13812,
+    5301, 7078, 11484, 7100, 10280, 11906, 8716, 12555, 14183,
+    9567, 12464, 15434, 7832, 12305, 14300, 7608, 10556, 12121,
+    8913, 11311, 12868, 7414, 9722, 11239, 8666, 11641, 13250,
+    9079, 10752, 12300, 8024, 11608, 13306, 10453, 13607, 16449,
+    8135, 9573, 10909, 6375, 7741, 10125, 10025, 12217, 14874,
+    6985, 11063, 14109, 9296, 13051, 14642, 8613, 10975, 12542,
+    6583, 10414, 13534, 6191, 9368, 13430, 5742, 6859, 9260,
+    7723, 9813, 13679, 8137, 11291, 12833, 6562, 8973, 10641,
+    6062, 8462, 11335, 6928, 8784, 12647, 7501, 8784, 10031,
+    8372, 10045, 12135, 8191, 9864, 12746, 5917, 7487, 10979,
+    5516, 6848, 10318, 6819, 9899, 11421, 7882, 12912, 15670,
+    9558, 11230, 12753, 7752, 9327, 11472, 8479, 9980, 11358,
+    11418, 14072, 16386, 7968, 10330, 14423, 8423, 10555, 12162,
+    6337, 10306, 14391, 8850, 10879, 14276, 6750, 11885, 15710,
+    7037, 8328, 9764, 6914, 9266, 13476, 9746, 13949, 15519,
+    11032, 14444, 16925, 8032, 10271, 11810, 10962, 13451, 15833,
+    10021, 11667, 13324, 6273, 8226, 12936, 8543, 10397, 13496,
+    7936, 10302, 12745, 6769, 8138, 10446, 6081, 7786, 11719,
+    8637, 11795, 14975, 8790, 10336, 11812, 7040, 8490, 10771,
+    7338, 10381, 13153, 6598, 7888, 9358, 6518, 8237, 12030,
+    9055, 10763, 12983, 6490, 10009, 12007, 9589, 12023, 13632,
+    6867, 9447, 10995, 7930, 9816, 11397, 10241, 13300, 14939,
+    5830, 8670, 12387, 9870, 11915, 14247, 9318, 11647, 13272,
+    6721, 10836, 12929, 6543, 8233, 9944, 8034, 10854, 12394,
+    9112, 11787, 14218, 9302, 11114, 13400, 9022, 11366, 13816,
+    6962, 10461, 12480, 11288, 13333, 15222, 7249, 8974, 10547,
+    10566, 12336, 14390, 6697, 11339, 13521, 11851, 13944, 15826,
+    6847, 8381, 11349, 7509, 9331, 10939, 8029, 9618, 11909,
+    13973, 17644, 19647, 22474, 14722, 16522, 20035, 22134, 16305, 18179, 21106, 23048,
+    15150, 17948, 21394, 23225, 13582, 15191, 17687, 22333, 11778, 15546, 18458, 21753,
+    16619, 18410, 20827, 23559, 14229, 15746, 17907, 22474, 12465, 15327, 20700, 22831,
+    15085, 16799, 20182, 23410, 13026, 16935, 19890, 22892, 14310, 16854, 19007, 22944,
+    14210, 15897, 18891, 23154, 14633, 18059, 20132, 22899, 15246, 17781, 19780, 22640,
+    16396, 18904, 20912, 23035, 14618, 17401, 19510, 21672, 15473, 17497, 19813, 23439,
+    18851, 20736, 22323, 23864, 15055, 16804, 18530, 20916, 16490, 18196, 19990, 21939,
+    11711, 15223, 21154, 23312, 13294, 15546, 19393, 21472, 12956, 16060, 20610, 22417,
+    11628, 15843, 19617, 22501, 14106, 16872, 19839, 22689, 15655, 18192, 20161, 22452,
+    12953, 15244, 20619, 23549, 15322, 17193, 19926, 21762, 16873, 18676, 20444, 22359,
+    14874, 17871, 20083, 21959, 11534, 14486, 19194, 21857, 17766, 19617, 21338, 23178,
+    13404, 15284, 19080, 23136, 15392, 17527, 19470, 21953, 14462, 16153, 17985, 21192,
+    17734, 19750, 21903, 23783, 16973, 19096, 21675, 23815, 16597, 18936, 21257, 23461,
+    15966, 17865, 20602, 22920, 15416, 17456, 20301, 22972, 18335, 20093, 21732, 23497,
+    15548, 17217, 20679, 23594, 15208, 16995, 20816, 22870, 13890, 18015, 20531, 22468,
+    13211, 15377, 19951, 22388, 12852, 14635, 17978, 22680, 16002, 17732, 20373, 23544,
+    11373, 14134, 19534, 22707, 17329, 19151, 21241, 23462, 15612, 17296, 19362, 22850,
+    15422, 19104, 21285, 23164, 13792, 17111, 19349, 21370, 15352, 17876, 20776, 22667,
+    15253, 16961, 18921, 22123, 14108, 17264, 20294, 23246, 15785, 17897, 20010, 21822,
+    17399, 19147, 20915, 22753, 13010, 15659, 18127, 20840, 16826, 19422, 22218, 24084,
+    18108, 20641, 22695, 24237, 18018, 20273, 22268, 23920, 16057, 17821, 21365, 23665,
+    16005, 17901, 19892, 23016, 13232, 16683, 21107, 23221, 13280, 16615, 19915, 21829,
+    14950, 18575, 20599, 22511, 16337, 18261, 20277, 23216, 14306, 16477, 21203, 23158,
+    12803, 17498, 20248, 22014, 14327, 17068, 20160, 22006, 14402, 17461, 21599, 23688,
+    16968, 18834, 20896, 23055, 15070, 17157, 20451, 22315, 15419, 17107, 21601, 23946,
+    16039, 17639, 19533, 21424, 16326, 19261, 21745, 23673, 16489, 18534, 21658, 23782,
+    16594, 18471, 20549, 22807, 18973, 21212, 22890, 24278, 14264, 18674, 21123, 23071,
+    15117, 16841, 19239, 23118, 13762, 15782, 20478, 23230, 14111, 15949, 20058, 22354,
+    14990, 16738, 21139, 23492, 13735, 16971, 19026, 22158, 14676, 17314, 20232, 22807,
+    16196, 18146, 20459, 22339, 14747, 17258, 19315, 22437, 14973, 17778, 20692, 23367,
+    15715, 17472, 20385, 22349, 15702, 18228, 20829, 23410, 14428, 16188, 20541, 23630,
+    16824, 19394, 21365, 23246, 13069, 16392, 18900, 21121, 12047, 16640, 19463, 21689,
+    14757, 17433, 19659, 23125, 15185, 16930, 19900, 22540, 16026, 17725, 19618, 22399,
+    16086, 18643, 21179, 23472, 15462, 17248, 19102, 21196, 17368, 20016, 22396, 24096,
+    12340, 14475, 19665, 23362, 13636, 16229, 19462, 22728, 14096, 16211, 19591, 21635,
+    12152, 14867, 19943, 22301, 14492, 17503, 21002, 22728, 14834, 16788, 19447, 21411,
+    14650, 16433, 19326, 22308, 14624, 16328, 19659, 23204, 13888, 16572, 20665, 22488,
+    12977, 16102, 18841, 22246, 15523, 18431, 21757, 23738, 14095, 16349, 18837, 20947,
+    13266, 17809, 21088, 22839, 15427, 18190, 20270, 23143, 11859, 16753, 20935, 22486,
+    12310, 17667, 21736, 23319, 14021, 15926, 18702, 22002, 12286, 15299, 19178, 21126,
+    15703, 17491, 21039, 23151, 12272, 14018, 18213, 22570, 14817, 16364, 18485, 22598,
+    17109, 19683, 21851, 23677, 12657, 14903, 19039, 22061, 14713, 16487, 20527, 22814,
+    14635, 16726, 18763, 21715, 15878, 18550, 20718, 22906
+};
+
+static const int16_t gain3[9]={
+    -16384, -10813, -5407, 0, 4096, 8192, 12288, 16384, 32767
+};
+
+static const int16_t gain4[17]={
+    -17203, -14746, -12288, -9830, -7373, -4915, -2458, 0, 2458, 4915, 7373, 9830,
+    12288, 14746, 17203, 19661, 32767
+};
+
+static const int16_t gain5[33]={
+    614,   1229,  1843,  2458,  3072,  3686,
+    4301,  4915,  5530,  6144,  6758,  7373,
+    7987,  8602,  9216,  9830,  10445, 11059,
+    11674, 12288, 12902, 13517, 14131, 14746,
+    15360, 15974, 16589, 17203, 17818, 18432,
+    19046, 19661, 32767
+};
+
+static const int16_t *const ilbc_gain[] = {
+    gain5, gain4, gain3,
+};
+
+static const int16_t ilbc_state[8] = {
+   -30473, -17838, -9257, -2537, 3639, 10893, 19958, 32636
+};
+
+static const int16_t frg_quant_mod[64] = {
+    /* First 37 values in Q8 */
+    569, 671, 786, 916, 1077, 1278,
+    1529, 1802, 2109, 2481, 2898, 3440,
+    3943, 4535, 5149, 5778, 6464, 7208,
+    7904, 8682, 9397, 10285, 11240, 12246,
+    13313, 14382, 15492, 16735, 18131, 19693,
+    21280, 22912, 24624, 26544, 28432, 30488,
+    32720,
+    /* 22 values in Q5 */
+    4383, 4684, 5012, 5363, 5739, 6146,
+    6603, 7113, 7679, 8285, 9040, 9850,
+    10838, 11882, 13103, 14467, 15950, 17669,
+    19712, 22016, 24800, 28576,
+    /* 5 values in Q3 */
+    8240, 9792, 12040, 15440, 22472
+};
+
+#endif /* AVCODEC_ILBCDATA_H */

diff --git a/libavcodec/ilbcdec.c b/libavcodec/ilbcdec.c
new file mode 100644
index 0000000..dc8f961
--- /dev/null
+++ b/libavcodec/ilbcdec.c

@@ -0,0 +1,1487 @@
+/*
+ * Copyright (c) 2013, The WebRTC project authors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *
+ *   * Neither the name of Google nor the names of its contributors may
+ *     be used to endorse or promote products derived from this software
+ *     without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+#include "get_bits.h"
+#include "ilbcdata.h"
+
+#define LPC_N_20MS            1
+#define LPC_N_30MS            2
+#define LPC_N_MAX             2
+#define LSF_NSPLIT            3
+#define NASUB_MAX             4
+#define LPC_FILTERORDER       10
+#define NSUB_MAX              6
+#define SUBL                  40
+
+#define ST_MEM_L_TBL          85
+#define MEM_LF_TBL            147
+#define STATE_SHORT_LEN_20MS  57
+#define STATE_SHORT_LEN_30MS  58
+
+#define BLOCKL_MAX            240
+#define CB_MEML               147
+#define CB_NSTAGES            3
+#define CB_HALFFILTERLEN      4
+#define CB_FILTERLEN          8
+
+#define ENH_NBLOCKS_TOT 8
+#define ENH_BLOCKL     80
+#define ENH_BUFL     (ENH_NBLOCKS_TOT)*ENH_BLOCKL
+#define ENH_BUFL_FILTEROVERHEAD  3
+#define BLOCKL_MAX      240
+#define NSUB_20MS         4
+#define NSUB_30MS         6
+#define NSUB_MAX          6
+#define NASUB_20MS        2
+#define NASUB_30MS        4
+#define NASUB_MAX         4
+#define STATE_LEN        80
+#define STATE_SHORT_LEN_30MS  58
+#define STATE_SHORT_LEN_20MS  57
+
+#define SPL_MUL_16_16(a, b) ((int32_t) (((int16_t)(a)) * ((int16_t)(b))))
+#define SPL_MUL_16_16_RSFT(a, b, c) (SPL_MUL_16_16(a, b) >> (c))
+
+typedef struct ILBCFrame {
+    int16_t  lsf[LSF_NSPLIT*LPC_N_MAX];
+    int16_t  cb_index[CB_NSTAGES*(NASUB_MAX + 1)];
+    int16_t  gain_index[CB_NSTAGES*(NASUB_MAX + 1)];
+    int16_t  ifm;
+    int16_t  state_first;
+    int16_t  idx[STATE_SHORT_LEN_30MS];
+    int16_t  firstbits;
+    int16_t  start;
+} ILBCFrame;
+
+typedef struct ILBCContext {
+    AVClass         *class;
+    int              enhancer;
+
+    int              mode;
+    GetBitContext    gb;
+    ILBCFrame        frame;
+
+    int              prev_enh_pl;
+    int              consPLICount;
+    int              last_lag;
+    int              state_short_len;
+    int              lpc_n;
+    int16_t          nasub;
+    int16_t          nsub;
+    int              block_samples;
+    int16_t          no_of_words;
+    int16_t          no_of_bytes;
+    int16_t          lsfdeq[LPC_FILTERORDER*LPC_N_MAX];
+    int16_t          lsfold[LPC_FILTERORDER];
+    int16_t          syntMem[LPC_FILTERORDER];
+    int16_t          lsfdeqold[LPC_FILTERORDER];
+    int16_t          weightdenum[(LPC_FILTERORDER + 1) * NSUB_MAX];
+    int16_t          syntdenum[NSUB_MAX * (LPC_FILTERORDER + 1)];
+    int16_t          old_syntdenum[NSUB_MAX * (LPC_FILTERORDER + 1)];
+    int16_t          enh_buf[ENH_BUFL+ENH_BUFL_FILTEROVERHEAD];
+    int16_t          enh_period[ENH_NBLOCKS_TOT];
+    int16_t          prevResidual[NSUB_MAX*SUBL];
+    int16_t          decresidual[BLOCKL_MAX];
+    int16_t          plc_residual[BLOCKL_MAX + LPC_FILTERORDER];
+    int16_t          seed;
+    int16_t          prevPLI;
+    int16_t          prevScale;
+    int16_t          prevLag;
+    int16_t          per_square;
+    int16_t          prev_lpc[LPC_FILTERORDER + 1];
+    int16_t          plc_lpc[LPC_FILTERORDER + 1];
+    int16_t          hpimemx[2];
+    int16_t          hpimemy[4];
+} ILBCContext;
+
+static int unpack_frame(ILBCContext *s)
+{
+    ILBCFrame *frame = &s->frame;
+    GetBitContext *gb = &s->gb;
+    int j;
+
+    frame->lsf[0] = get_bits(gb, 6);
+    frame->lsf[1] = get_bits(gb, 7);
+    frame->lsf[2] = get_bits(gb, 7);
+
+    if (s->mode == 20) {
+        frame->start          = get_bits(gb, 2);
+        frame->state_first    = get_bits1(gb);
+        frame->ifm            = get_bits(gb, 6);
+        frame->cb_index[0]    = get_bits(gb, 6) << 1;
+        frame->gain_index[0]  = get_bits(gb, 2) << 3;
+        frame->gain_index[1]  = get_bits1(gb) << 3;
+        frame->cb_index[3]    = get_bits(gb, 7) << 1;
+        frame->gain_index[3]  = get_bits1(gb) << 4;
+        frame->gain_index[4]  = get_bits1(gb) << 3;
+        frame->gain_index[6]  = get_bits1(gb) << 4;
+    } else {
+        frame->lsf[3]         = get_bits(gb, 6);
+        frame->lsf[4]         = get_bits(gb, 7);
+        frame->lsf[5]         = get_bits(gb, 7);
+        frame->start          = get_bits(gb, 3);
+        frame->state_first    = get_bits1(gb);
+        frame->ifm            = get_bits(gb, 6);
+        frame->cb_index[0]    = get_bits(gb, 4) << 3;
+        frame->gain_index[0]  = get_bits1(gb) << 4;
+        frame->gain_index[1]  = get_bits1(gb) << 3;
+        frame->cb_index[3]    = get_bits(gb, 6) << 2;
+        frame->gain_index[3]  = get_bits1(gb) << 4;
+        frame->gain_index[4]  = get_bits1(gb) << 3;
+    }
+
+    for (j = 0; j < 48; j++)
+        frame->idx[j] = get_bits1(gb) << 2;
+
+    if (s->mode == 20) {
+        for (; j < 57; j++)
+            frame->idx[j] = get_bits1(gb) << 2;
+
+        frame->gain_index[1] |= get_bits1(gb) << 2;
+        frame->gain_index[3] |= get_bits(gb, 2) << 2;
+        frame->gain_index[4] |= get_bits1(gb) << 2;
+        frame->gain_index[6] |= get_bits1(gb) << 3;
+        frame->gain_index[7]  = get_bits(gb, 2) << 2;
+    } else {
+        for (; j < 58; j++)
+            frame->idx[j] = get_bits1(gb) << 2;
+
+        frame->cb_index[0]    |= get_bits(gb, 2) << 1;
+        frame->gain_index[0]  |= get_bits1(gb) << 3;
+        frame->gain_index[1]  |= get_bits1(gb) << 2;
+        frame->cb_index[3]    |= get_bits1(gb) << 1;
+        frame->cb_index[6]     = get_bits1(gb) << 7;
+        frame->cb_index[6]    |= get_bits(gb, 6) << 1;
+        frame->cb_index[9]     = get_bits(gb, 7) << 1;
+        frame->cb_index[12]    = get_bits(gb, 3) << 5;
+        frame->cb_index[12]   |= get_bits(gb, 4) << 1;
+        frame->gain_index[3]  |= get_bits(gb, 2) << 2;
+        frame->gain_index[4]  |= get_bits(gb, 2) << 1;
+        frame->gain_index[6]   = get_bits(gb, 2) << 3;
+        frame->gain_index[7]   = get_bits(gb, 2) << 2;
+        frame->gain_index[9]   = get_bits1(gb) << 4;
+        frame->gain_index[10]  = get_bits1(gb) << 3;
+        frame->gain_index[12]  = get_bits1(gb) << 4;
+        frame->gain_index[13]  = get_bits1(gb) << 3;
+    }
+
+    for (j = 0; j < 56; j++)
+        frame->idx[j] |= get_bits(gb, 2);
+
+    if (s->mode == 20) {
+        frame->idx[56]        |= get_bits(gb, 2);
+        frame->cb_index[0]    |= get_bits1(gb);
+        frame->cb_index[1]     = get_bits(gb, 7);
+        frame->cb_index[2]     = get_bits(gb, 6) << 1;
+        frame->cb_index[2]    |= get_bits1(gb);
+        frame->gain_index[0]  |= get_bits(gb, 3);
+        frame->gain_index[1]  |= get_bits(gb, 2);
+        frame->gain_index[2]   = get_bits(gb, 3);
+        frame->cb_index[3]    |= get_bits1(gb);
+        frame->cb_index[4]     = get_bits(gb, 6) << 1;
+        frame->cb_index[4]    |= get_bits1(gb);
+        frame->cb_index[5]     = get_bits(gb, 7);
+        frame->cb_index[6]     = get_bits(gb, 8);
+        frame->cb_index[7]     = get_bits(gb, 8);
+        frame->cb_index[8]     = get_bits(gb, 8);
+        frame->gain_index[3]  |= get_bits(gb, 2);
+        frame->gain_index[4]  |= get_bits(gb, 2);
+        frame->gain_index[5]   = get_bits(gb, 3);
+        frame->gain_index[6]  |= get_bits(gb, 3);
+        frame->gain_index[7]  |= get_bits(gb, 2);
+        frame->gain_index[8]   = get_bits(gb, 3);
+    } else {
+        frame->idx[56]        |= get_bits(gb, 2);
+        frame->idx[57]        |= get_bits(gb, 2);
+        frame->cb_index[0]    |= get_bits1(gb);
+        frame->cb_index[1]     = get_bits(gb, 7);
+        frame->cb_index[2]     = get_bits(gb, 4) << 3;
+        frame->cb_index[2]    |= get_bits(gb, 3);
+        frame->gain_index[0]  |= get_bits(gb, 3);
+        frame->gain_index[1]  |= get_bits(gb, 2);
+        frame->gain_index[2]   = get_bits(gb, 3);
+        frame->cb_index[3]    |= get_bits1(gb);
+        frame->cb_index[4]     = get_bits(gb, 4) << 3;
+        frame->cb_index[4]    |= get_bits(gb, 3);
+        frame->cb_index[5]     = get_bits(gb, 7);
+        frame->cb_index[6]    |= get_bits1(gb);
+        frame->cb_index[7]     = get_bits(gb, 5) << 3;
+        frame->cb_index[7]    |= get_bits(gb, 3);
+        frame->cb_index[8]     = get_bits(gb, 8);
+        frame->cb_index[9]    |= get_bits1(gb);
+        frame->cb_index[10]    = get_bits(gb, 4) << 4;
+        frame->cb_index[10]   |= get_bits(gb, 4);
+        frame->cb_index[11]    = get_bits(gb, 8);
+        frame->cb_index[12]   |= get_bits1(gb);
+        frame->cb_index[13]    = get_bits(gb, 3) << 5;
+        frame->cb_index[13]   |= get_bits(gb, 5);
+        frame->cb_index[14]    = get_bits(gb, 8);
+        frame->gain_index[3]  |= get_bits(gb, 2);
+        frame->gain_index[4]  |= get_bits1(gb);
+        frame->gain_index[5]   = get_bits(gb, 3);
+        frame->gain_index[6]  |= get_bits(gb, 3);
+        frame->gain_index[7]  |= get_bits(gb, 2);
+        frame->gain_index[8]   = get_bits(gb, 3);
+        frame->gain_index[9]  |= get_bits(gb, 4);
+        frame->gain_index[10] |= get_bits1(gb) << 2;
+        frame->gain_index[10] |= get_bits(gb, 2);
+        frame->gain_index[11]  = get_bits(gb, 3);
+        frame->gain_index[12] |= get_bits(gb, 4);
+        frame->gain_index[13] |= get_bits(gb, 3);
+        frame->gain_index[14]  = get_bits(gb, 3);
+    }
+
+    return get_bits1(gb);
+}
+
+static void index_conv(int16_t *index)
+{
+    int k;
+
+    for (k = 4; k < 6; k++) {
+        if (index[k] >= 44 && index[k] < 108) {
+            index[k] += 64;
+        } else if (index[k] >= 108 && index[k] < 128) {
+            index[k] += 128;
+        }
+    }
+}
+
+static void lsf_dequantization(int16_t *lsfdeq, int16_t *index, int16_t lpc_n)
+{
+    int i, j, pos = 0, cb_pos = 0;
+
+    for (i = 0; i < LSF_NSPLIT; i++) {
+        for (j = 0; j < lsf_dim_codebook[i]; j++) {
+            lsfdeq[pos + j] = lsf_codebook[cb_pos + index[i] * lsf_dim_codebook[i] + j];
+        }
+
+        pos    += lsf_dim_codebook[i];
+        cb_pos += lsf_size_codebook[i] * lsf_dim_codebook[i];
+    }
+
+    if (lpc_n > 1) {
+        pos = 0;
+        cb_pos = 0;
+        for (i = 0; i < LSF_NSPLIT; i++) {
+            for (j = 0; j < lsf_dim_codebook[i]; j++) {
+                lsfdeq[LPC_FILTERORDER + pos + j] = lsf_codebook[cb_pos +
+                    index[LSF_NSPLIT + i] * lsf_dim_codebook[i] + j];
+            }
+
+            pos    += lsf_dim_codebook[i];
+            cb_pos += lsf_size_codebook[i] * lsf_dim_codebook[i];
+        }
+    }
+}
+
+static void lsf_check_stability(int16_t *lsf, int dim, int nb_vectors)
+{
+    for (int n = 0; n < 2; n++) {
+        for (int m = 0; m < nb_vectors; m++) {
+            for (int k = 0; k < dim - 1; k++) {
+                int i = m * dim + k;
+
+                if ((lsf[i + 1] - lsf[i]) < 319) {
+                    if (lsf[i + 1] < lsf[i]) {
+                        lsf[i + 1] = lsf[i] + 160;
+                        lsf[i]     = lsf[i + 1] - 160;
+                    } else {
+                        lsf[i]     -= 160;
+                        lsf[i + 1] += 160;
+                    }
+                }
+
+                lsf[i] = av_clip(lsf[i], 82, 25723);
+            }
+        }
+    }
+}
+
+static void lsf_interpolate(int16_t *out, int16_t *in1,
+                            int16_t *in2, int16_t coef,
+                            int size)
+{
+    int invcoef = 16384 - coef, i;
+
+    for (i = 0; i < size; i++)
+        out[i] = (coef * in1[i] + invcoef * in2[i] + 8192) >> 14;
+}
+
+static void lsf2lsp(int16_t *lsf, int16_t *lsp, int order)
+{
+    int16_t diff, freq;
+    int32_t tmp;
+    int i, k;
+
+    for (i = 0; i < order; i++) {
+        freq = (lsf[i] * 20861) >> 15;
+        /* 20861: 1.0/(2.0*PI) in Q17 */
+        /*
+           Upper 8 bits give the index k and
+           Lower 8 bits give the difference, which needs
+           to be approximated linearly
+         */
+        k = FFMIN(freq >> 8, 63);
+        diff = freq & 0xFF;
+
+        /* Calculate linear approximation */
+        tmp = cos_derivative_tbl[k] * diff;
+        lsp[i] = cos_tbl[k] + (tmp >> 12);
+    }
+}
+
+static void get_lsp_poly(int16_t *lsp, int32_t *f)
+{
+    int16_t high, low;
+    int i, j, k, l;
+    int32_t tmp;
+
+    f[0] = 16777216;
+    f[1] = lsp[0] * -1024;
+
+    for (i = 2, k = 2, l = 2; i <= 5; i++, k += 2) {
+        f[l] = f[l - 2];
+
+        for (j = i; j > 1; j--, l--) {
+            high = f[l - 1] >> 16;
+            low = (f[l - 1] - (high << 16)) >> 1;
+
+            tmp = ((high * lsp[k]) << 2) + (((low * lsp[k]) >> 15) << 2);
+
+            f[l] += f[l - 2];
+            f[l] -= tmp;
+        }
+
+        f[l] -= lsp[k] << 10;
+        l += i;
+    }
+}
+
+static void lsf2poly(int16_t *a, int16_t *lsf)
+{
+    int32_t f[2][6];
+    int16_t lsp[10];
+    int32_t tmp;
+    int i;
+
+    lsf2lsp(lsf, lsp, LPC_FILTERORDER);
+
+    get_lsp_poly(&lsp[0], f[0]);
+    get_lsp_poly(&lsp[1], f[1]);
+
+    for (i = 5; i > 0; i--) {
+        f[0][i] += f[0][i - 1];
+        f[1][i] -= f[1][i - 1];
+    }
+
+    a[0] = 4096;
+    for (i = 5; i > 0; i--) {
+        tmp = f[0][6 - i] + f[1][6 - i];
+        a[6 - i] = (tmp + 4096) >> 13;
+
+        tmp = f[0][6 - i] - f[1][6 - i];
+        a[5 + i] = (tmp + 4096) >> 13;
+    }
+}
+
+static void lsp_interpolate2polydec(int16_t *a, int16_t *lsf1,
+                                   int16_t *lsf2, int coef, int length)
+{
+    int16_t lsftmp[LPC_FILTERORDER];
+
+    lsf_interpolate(lsftmp, lsf1, lsf2, coef, length);
+    lsf2poly(a, lsftmp);
+}
+
+static void bw_expand(int16_t *out, const int16_t *in, const int16_t *coef, int length)
+{
+    int i;
+
+    out[0] = in[0];
+    for (i = 1; i < length; i++)
+        out[i] = (coef[i] * in[i] + 16384) >> 15;
+}
+
+static void lsp_interpolate(int16_t *syntdenum, int16_t *weightdenum,
+                            int16_t *lsfdeq, int16_t length,
+                            ILBCContext *s)
+{
+    int16_t lp[LPC_FILTERORDER + 1], *lsfdeq2;
+    int i, pos, lp_length;
+
+    lsfdeq2 = lsfdeq + length;
+    lp_length = length + 1;
+
+    if (s->mode == 30) {
+        lsp_interpolate2polydec(lp, (*s).lsfdeqold, lsfdeq, lsf_weight_30ms[0], length);
+        memcpy(syntdenum, lp, lp_length * 2);
+        bw_expand(weightdenum, lp, kLpcChirpSyntDenum, lp_length);
+
+        pos = lp_length;
+        for (i = 1; i < 6; i++) {
+            lsp_interpolate2polydec(lp, lsfdeq, lsfdeq2,
+                                                 lsf_weight_30ms[i],
+                                                 length);
+            memcpy(syntdenum + pos, lp, lp_length * 2);
+            bw_expand(weightdenum + pos, lp, kLpcChirpSyntDenum, lp_length);
+            pos += lp_length;
+        }
+    } else {
+        pos = 0;
+        for (i = 0; i < s->nsub; i++) {
+            lsp_interpolate2polydec(lp, s->lsfdeqold, lsfdeq,
+                                    lsf_weight_20ms[i], length);
+            memcpy(syntdenum + pos, lp, lp_length * 2);
+            bw_expand(weightdenum + pos, lp, kLpcChirpSyntDenum, lp_length);
+            pos += lp_length;
+        }
+    }
+
+    if (s->mode == 30) {
+        memcpy(s->lsfdeqold, lsfdeq2, length * 2);
+    } else {
+        memcpy(s->lsfdeqold, lsfdeq, length * 2);
+    }
+}
+
+static void filter_mafq12(int16_t *in_ptr, int16_t *out_ptr,
+                          int16_t *B, int16_t B_length,
+                          int16_t length)
+{
+    int o, i, j;
+
+    for (i = 0; i < length; i++) {
+        const int16_t *b_ptr = &B[0];
+        const int16_t *x_ptr = &in_ptr[i];
+
+        o = 0;
+        for (j = 0; j < B_length; j++)
+            o += b_ptr[j] * *x_ptr--;
+
+        o = av_clip(o, -134217728, 134215679);
+
+        out_ptr[i] = ((o + 2048) >> 12);
+    }
+}
+
+static void filter_arfq12(const int16_t *data_in,
+                          int16_t *data_out,
+                          const int16_t *coefficients,
+                          int coefficients_length,
+                          int data_length)
+{
+    int i, j;
+
+    for (i = 0; i < data_length; i++) {
+        int output = 0, sum = 0;
+
+        for (j = coefficients_length - 1; j > 0; j--) {
+            sum += coefficients[j] * data_out[i - j];
+        }
+
+        output = coefficients[0] * data_in[i] - sum;
+        output = av_clip(output, -134217728, 134215679);
+
+        data_out[i] = (output + 2048) >> 12;
+    }
+}
+
+static void state_construct(int16_t ifm, int16_t *idx,
+                           int16_t *synt_denum, int16_t *Out_fix,
+                           int16_t len)
+{
+    int k;
+    int16_t maxVal;
+    int16_t *tmp1, *tmp2, *tmp3;
+    /* Stack based */
+    int16_t numerator[1 + LPC_FILTERORDER];
+    int16_t sampleValVec[2 * STATE_SHORT_LEN_30MS + LPC_FILTERORDER];
+    int16_t sampleMaVec[2 * STATE_SHORT_LEN_30MS + LPC_FILTERORDER];
+    int16_t *sampleVal = &sampleValVec[LPC_FILTERORDER];
+    int16_t *sampleMa = &sampleMaVec[LPC_FILTERORDER];
+    int16_t *sampleAr = &sampleValVec[LPC_FILTERORDER];
+
+    /* initialization of coefficients */
+
+    for (k = 0; k < LPC_FILTERORDER + 1; k++) {
+        numerator[k] = synt_denum[LPC_FILTERORDER - k];
+    }
+
+    /* decoding of the maximum value */
+
+    maxVal = frg_quant_mod[ifm];
+
+    /* decoding of the sample values */
+    tmp1 = sampleVal;
+    tmp2 = &idx[len - 1];
+
+    if (ifm < 37) {
+        for (k = 0; k < len; k++) {
+            /*the shifting is due to the Q13 in sq4_fixQ13[i], also the adding of 2097152 (= 0.5 << 22)
+               maxVal is in Q8 and result is in Q(-1) */
+            (*tmp1) = (int16_t) ((SPL_MUL_16_16(maxVal, ilbc_state[(*tmp2)]) + 2097152) >> 22);
+            tmp1++;
+            tmp2--;
+        }
+    } else if (ifm < 59) {
+        for (k = 0; k < len; k++) {
+            /*the shifting is due to the Q13 in sq4_fixQ13[i], also the adding of 262144 (= 0.5 << 19)
+               maxVal is in Q5 and result is in Q(-1) */
+            (*tmp1) = (int16_t) ((SPL_MUL_16_16(maxVal, ilbc_state[(*tmp2)]) + 262144) >> 19);
+            tmp1++;
+            tmp2--;
+        }
+    } else {
+        for (k = 0; k < len; k++) {
+            /*the shifting is due to the Q13 in sq4_fixQ13[i], also the adding of 65536 (= 0.5 << 17)
+               maxVal is in Q3 and result is in Q(-1) */
+            (*tmp1) = (int16_t) ((SPL_MUL_16_16(maxVal, ilbc_state[(*tmp2)]) + 65536) >> 17);
+            tmp1++;
+            tmp2--;
+        }
+    }
+
+    /* Set the rest of the data to zero */
+    memset(&sampleVal[len], 0, len * 2);
+
+    /* circular convolution with all-pass filter */
+
+    /* Set the state to zero */
+    memset(sampleValVec, 0, LPC_FILTERORDER * 2);
+
+    /* Run MA filter + AR filter */
+    filter_mafq12(sampleVal, sampleMa, numerator, LPC_FILTERORDER + 1, len + LPC_FILTERORDER);
+    memset(&sampleMa[len + LPC_FILTERORDER], 0, (len - LPC_FILTERORDER) * 2);
+    filter_arfq12(sampleMa, sampleAr, synt_denum, LPC_FILTERORDER + 1, 2 * len);
+
+    tmp1 = &sampleAr[len - 1];
+    tmp2 = &sampleAr[2 * len - 1];
+    tmp3 = Out_fix;
+    for (k = 0; k < len; k++) {
+        (*tmp3) = (*tmp1) + (*tmp2);
+        tmp1--;
+        tmp2--;
+        tmp3++;
+    }
+}
+
+static int16_t gain_dequantization(int index, int max_in, int stage)
+{
+    int16_t scale = FFMAX(1638, FFABS(max_in));
+
+    return ((scale * ilbc_gain[stage][index]) + 8192) >> 14;
+}
+
+static void vector_rmultiplication(int16_t *out, const int16_t *in,
+                                   const int16_t *win,
+                                   int length, int shift)
+{
+    for (int i = 0; i < length; i++)
+        out[i] = (in[i] * win[-i]) >> shift;
+}
+
+static void vector_multiplication(int16_t *out, const int16_t *in,
+                                  const int16_t *win, int length,
+                                  int shift)
+{
+    for (int i = 0; i < length; i++)
+        out[i] = (in[i] * win[i]) >> shift;
+}
+
+static void add_vector_and_shift(int16_t *out, const int16_t *in1,
+                                 const int16_t *in2, int length,
+                                 int shift)
+{
+    for (int i = 0; i < length; i++)
+        out[i] = (in1[i] + in2[i]) >> shift;
+}
+
+static void create_augmented_vector(int index, int16_t *buffer, int16_t *cbVec)
+{
+    int16_t cbVecTmp[4];
+    int16_t ilow = index - 4;
+
+    memcpy(cbVec, buffer - index, index * 2);
+
+    vector_multiplication(&cbVec[ilow], buffer - index - 4, alpha, 4, 15);
+    vector_rmultiplication(cbVecTmp, buffer - 4, &alpha[3], 4, 15);
+    add_vector_and_shift(&cbVec[ilow], &cbVec[ilow], cbVecTmp, 4, 0);
+
+    memcpy(cbVec + index, buffer - index, (SUBL - index) * sizeof(*cbVec));
+}
+
+static void get_codebook(int16_t * cbvec,   /* (o) Constructed codebook vector */
+                     int16_t * mem,     /* (i) Codebook buffer */
+                     int16_t index,     /* (i) Codebook index */
+                     int16_t lMem,      /* (i) Length of codebook buffer */
+                     int16_t cbveclen   /* (i) Codebook vector length */
+)
+{
+    int16_t k, base_size;
+    int16_t lag;
+    /* Stack based */
+    int16_t tempbuff2[SUBL + 5];
+
+    /* Determine size of codebook sections */
+    base_size = lMem - cbveclen + 1;
+
+    if (cbveclen == SUBL) {
+        base_size += cbveclen / 2;
+    }
+
+    /* No filter -> First codebook section */
+    if (index < lMem - cbveclen + 1) {
+        /* first non-interpolated vectors */
+
+        k = index + cbveclen;
+        /* get vector */
+        memcpy(cbvec, mem + lMem - k, cbveclen * 2);
+    } else if (index < base_size) {
+
+        /* Calculate lag */
+
+        k = (int16_t) SPL_MUL_16_16(2, (index - (lMem - cbveclen + 1))) + cbveclen;
+
+        lag = k / 2;
+
+        create_augmented_vector(lag, mem + lMem, cbvec);
+    } else {
+        int16_t memIndTest;
+
+        /* first non-interpolated vectors */
+
+        if (index - base_size < lMem - cbveclen + 1) {
+
+            /* Set up filter memory, stuff zeros outside memory buffer */
+
+            memIndTest = lMem - (index - base_size + cbveclen);
+
+            memset(mem - CB_HALFFILTERLEN, 0, CB_HALFFILTERLEN * 2);
+            memset(mem + lMem, 0, CB_HALFFILTERLEN * 2);
+
+            /* do filtering to get the codebook vector */
+
+            filter_mafq12(&mem[memIndTest + 4], cbvec, (int16_t *) kCbFiltersRev, CB_FILTERLEN, cbveclen);
+        } else {
+            /* interpolated vectors */
+            /* Stuff zeros outside memory buffer  */
+            memIndTest = lMem - cbveclen - CB_FILTERLEN;
+            memset(mem + lMem, 0, CB_HALFFILTERLEN * 2);
+
+            /* do filtering */
+            filter_mafq12(&mem[memIndTest + 7], tempbuff2, (int16_t *) kCbFiltersRev, CB_FILTERLEN, (int16_t) (cbveclen + 5));
+
+            /* Calculate lag index */
+            lag = (cbveclen << 1) - 20 + index - base_size - lMem - 1;
+
+            create_augmented_vector(lag, tempbuff2 + SUBL + 5, cbvec);
+        }
+    }
+}
+
+static void construct_vector (
+    int16_t *decvector,   /* (o) Decoded vector */
+    int16_t *index,       /* (i) Codebook indices */
+    int16_t *gain_index,  /* (i) Gain quantization indices */
+    int16_t *mem,         /* (i) Buffer for codevector construction */
+    int16_t lMem,         /* (i) Length of buffer */
+    int16_t veclen)
+{
+    int16_t gain[CB_NSTAGES];
+    int16_t cbvec0[SUBL];
+    int16_t cbvec1[SUBL];
+    int16_t cbvec2[SUBL];
+    int32_t a32;
+    int16_t *gainPtr;
+    int j;
+
+    /* gain de-quantization */
+
+    gain[0] = gain_dequantization(gain_index[0], 16384, 0);
+    gain[1] = gain_dequantization(gain_index[1], gain[0], 1);
+    gain[2] = gain_dequantization(gain_index[2], gain[1], 2);
+
+    /* codebook vector construction and construction of total vector */
+
+    /* Stack based */
+    get_codebook(cbvec0, mem, index[0], lMem, veclen);
+    get_codebook(cbvec1, mem, index[1], lMem, veclen);
+    get_codebook(cbvec2, mem, index[2], lMem, veclen);
+
+    gainPtr = &gain[0];
+    for (j = 0; j < veclen; j++) {
+        a32 = SPL_MUL_16_16(*gainPtr++, cbvec0[j]);
+        a32 += SPL_MUL_16_16(*gainPtr++, cbvec1[j]);
+        a32 += SPL_MUL_16_16(*gainPtr, cbvec2[j]);
+        gainPtr -= 2;
+        decvector[j] = (a32 + 8192) >> 14;
+    }
+}
+
+static void reverse_memcpy(int16_t *dest, int16_t *source, int length)
+{
+    int16_t* destPtr = dest;
+    int16_t* sourcePtr = source;
+    int j;
+
+    for (j = 0; j < length; j++)
+        *destPtr-- = *sourcePtr++;
+}
+
+static void decode_residual(ILBCContext *s,
+                            ILBCFrame *encbits,
+                            int16_t *decresidual,
+                            int16_t *syntdenum)
+{
+    int16_t meml_gotten, Nfor, Nback, diff, start_pos;
+    int16_t subcount, subframe;
+    int16_t *reverseDecresidual = s->enh_buf;        /* Reversed decoded data, used for decoding backwards in time (reuse memory in state) */
+    int16_t *memVec = s->prevResidual;
+    int16_t *mem = &memVec[CB_HALFFILTERLEN];   /* Memory for codebook */
+
+    diff = STATE_LEN - s->state_short_len;
+
+    if (encbits->state_first == 1) {
+        start_pos = (encbits->start - 1) * SUBL;
+    } else {
+        start_pos = (encbits->start - 1) * SUBL + diff;
+    }
+
+    /* decode scalar part of start state */
+
+    state_construct(encbits->ifm, encbits->idx, &syntdenum[(encbits->start - 1) * (LPC_FILTERORDER + 1)], &decresidual[start_pos], s->state_short_len);
+
+    if (encbits->state_first) { /* put adaptive part in the end */
+        /* setup memory */
+        memset(mem, 0, (int16_t) (CB_MEML - s->state_short_len) * 2);
+        memcpy(mem + CB_MEML - s->state_short_len, decresidual + start_pos, s->state_short_len * 2);
+
+        /* construct decoded vector */
+
+        construct_vector(&decresidual[start_pos + s->state_short_len], encbits->cb_index, encbits->gain_index, mem + CB_MEML - ST_MEM_L_TBL, ST_MEM_L_TBL, (int16_t) diff);
+
+    } else { /* put adaptive part in the beginning */
+        /* setup memory */
+        meml_gotten = s->state_short_len;
+        reverse_memcpy(mem + CB_MEML - 1, decresidual + start_pos, meml_gotten);
+        memset(mem, 0, (int16_t) (CB_MEML - meml_gotten) * 2);
+
+        /* construct decoded vector */
+        construct_vector(reverseDecresidual, encbits->cb_index, encbits->gain_index, mem + CB_MEML - ST_MEM_L_TBL, ST_MEM_L_TBL, diff);
+
+        /* get decoded residual from reversed vector */
+        reverse_memcpy(&decresidual[start_pos - 1], reverseDecresidual, diff);
+    }
+
+    /* counter for predicted subframes */
+    subcount = 1;
+
+    /* forward prediction of subframes */
+    Nfor = s->nsub - encbits->start - 1;
+
+    if (Nfor > 0) {
+        /* setup memory */
+        memset(mem, 0, (CB_MEML - STATE_LEN) * 2);
+        memcpy(mem + CB_MEML - STATE_LEN, decresidual + (encbits->start - 1) * SUBL, STATE_LEN * 2);
+
+        /* loop over subframes to encode */
+        for (subframe = 0; subframe < Nfor; subframe++) {
+            /* construct decoded vector */
+            construct_vector(&decresidual[(encbits->start + 1 + subframe) * SUBL], encbits->cb_index + subcount * CB_NSTAGES, encbits->gain_index + subcount * CB_NSTAGES, mem, MEM_LF_TBL, SUBL);
+
+            /* update memory */
+            memmove(mem, mem + SUBL, (CB_MEML - SUBL) * sizeof(*mem));
+            memcpy(mem + CB_MEML - SUBL, &decresidual[(encbits->start + 1 + subframe) * SUBL], SUBL * 2);
+
+            subcount++;
+        }
+
+    }
+
+    /* backward prediction of subframes */
+    Nback = encbits->start - 1;
+
+    if (Nback > 0) {
+        /* setup memory */
+        meml_gotten = SUBL * (s->nsub + 1 - encbits->start);
+        if (meml_gotten > CB_MEML) {
+            meml_gotten = CB_MEML;
+        }
+
+        reverse_memcpy(mem + CB_MEML - 1, decresidual + (encbits->start - 1) * SUBL, meml_gotten);
+        memset(mem, 0, (int16_t) (CB_MEML - meml_gotten) * 2);
+
+        /* loop over subframes to decode */
+        for (subframe = 0; subframe < Nback; subframe++) {
+            /* construct decoded vector */
+            construct_vector(&reverseDecresidual[subframe * SUBL], encbits->cb_index + subcount * CB_NSTAGES,
+                        encbits->gain_index + subcount * CB_NSTAGES, mem, MEM_LF_TBL, SUBL);
+
+            /* update memory */
+            memmove(mem, mem + SUBL, (CB_MEML - SUBL) * sizeof(*mem));
+            memcpy(mem + CB_MEML - SUBL, &reverseDecresidual[subframe * SUBL], SUBL * 2);
+
+            subcount++;
+        }
+
+        /* get decoded residual from reversed vector */
+        reverse_memcpy(decresidual + SUBL * Nback - 1, reverseDecresidual, SUBL * Nback);
+    }
+}
+
+static int16_t max_abs_value_w16(const int16_t* vector, int length)
+{
+    int i = 0, absolute = 0, maximum = 0;
+
+    if (vector == NULL || length <= 0) {
+        return -1;
+    }
+
+    for (i = 0; i < length; i++) {
+        absolute = FFABS(vector[i]);
+        if (absolute > maximum)
+            maximum = absolute;
+    }
+
+    // Guard the case for abs(-32768).
+    return FFMIN(maximum, INT16_MAX);
+}
+
+static int16_t get_size_in_bits(uint32_t n)
+{
+    int16_t bits;
+
+    if (0xFFFF0000 & n) {
+        bits = 16;
+    } else {
+        bits = 0;
+    }
+
+    if (0x0000FF00 & (n >> bits)) bits += 8;
+    if (0x000000F0 & (n >> bits)) bits += 4;
+    if (0x0000000C & (n >> bits)) bits += 2;
+    if (0x00000002 & (n >> bits)) bits += 1;
+    if (0x00000001 & (n >> bits)) bits += 1;
+
+    return bits;
+}
+
+static int32_t scale_dot_product(const int16_t *v1, const int16_t *v2, int length, int scaling)
+{
+    int32_t sum = 0;
+
+    for (int i = 0; i < length; i++)
+        sum += (v1[i] * v2[i]) >> scaling;
+
+    return sum;
+}
+
+static void correlation(int32_t *corr, int32_t *ener, int16_t *buffer,
+                        int16_t lag, int16_t blen, int16_t srange, int16_t scale)
+{
+    int16_t *w16ptr;
+
+    w16ptr = &buffer[blen - srange - lag];
+
+    *corr = scale_dot_product(&buffer[blen - srange], w16ptr, srange, scale);
+    *ener = scale_dot_product(w16ptr, w16ptr, srange, scale);
+
+    if (*ener == 0) {
+        *corr = 0;
+        *ener = 1;
+    }
+}
+
+#define SPL_SHIFT_W32(x, c) (((c) >= 0) ? ((x) << (c)) : ((x) >> (-(c))))
+
+static int16_t norm_w32(int32_t a)
+{
+    if (a == 0) {
+        return 0;
+    } else if (a < 0) {
+        a = ~a;
+    }
+
+    return ff_clz(a);
+}
+
+static int32_t div_w32_w16(int32_t num, int16_t den)
+{
+    if (den != 0)
+        return num / den;
+    else
+        return 0x7FFFFFFF;
+}
+
+static void do_plc(int16_t *plc_residual,      /* (o) concealed residual */
+                   int16_t *plc_lpc,           /* (o) concealed LP parameters */
+                   int16_t PLI,                /* (i) packet loss indicator
+                                                      0 - no PL, 1 = PL */
+                   int16_t *decresidual,       /* (i) decoded residual */
+                   int16_t *lpc,               /* (i) decoded LPC (only used for no PL) */
+                   int16_t inlag,              /* (i) pitch lag */
+                   ILBCContext *s)             /* (i/o) decoder instance */
+{
+    int16_t i, pick;
+    int32_t cross, ener, cross_comp, ener_comp = 0;
+    int32_t measure, max_measure, energy;
+    int16_t max, cross_square_max, cross_square;
+    int16_t j, lag, tmp1, tmp2, randlag;
+    int16_t shift1, shift2, shift3, shift_max;
+    int16_t scale3;
+    int16_t corrLen;
+    int32_t tmpW32, tmp2W32;
+    int16_t use_gain;
+    int16_t tot_gain;
+    int16_t max_perSquare;
+    int16_t scale1, scale2;
+    int16_t totscale;
+    int32_t nom;
+    int16_t denom;
+    int16_t pitchfact;
+    int16_t use_lag;
+    int ind;
+    int16_t randvec[BLOCKL_MAX];
+
+    /* Packet Loss */
+    if (PLI == 1) {
+
+        s->consPLICount += 1;
+
+        /* if previous frame not lost,
+           determine pitch pred. gain */
+
+        if (s->prevPLI != 1) {
+
+            /* Maximum 60 samples are correlated, preserve as high accuracy
+               as possible without getting overflow */
+            max = max_abs_value_w16(s->prevResidual, s->block_samples);
+            scale3 = (get_size_in_bits(max) << 1) - 25;
+            if (scale3 < 0) {
+                scale3 = 0;
+            }
+
+            /* Store scale for use when interpolating between the
+             * concealment and the received packet */
+            s->prevScale = scale3;
+
+            /* Search around the previous lag +/-3 to find the
+               best pitch period */
+            lag = inlag - 3;
+
+            /* Guard against getting outside the frame */
+            corrLen = FFMIN(60, s->block_samples - (inlag + 3));
+
+            correlation(&cross, &ener, s->prevResidual, lag, s->block_samples, corrLen, scale3);
+
+            /* Normalize and store cross^2 and the number of shifts */
+            shift_max = get_size_in_bits(FFABS(cross)) - 15;
+            cross_square_max = (int16_t) SPL_MUL_16_16_RSFT(SPL_SHIFT_W32(cross, -shift_max), SPL_SHIFT_W32(cross, -shift_max), 15);
+
+            for (j = inlag - 2; j <= inlag + 3; j++) {
+                correlation(&cross_comp, &ener_comp, s->prevResidual, j, s->block_samples, corrLen, scale3);
+
+                /* Use the criteria (corr*corr)/energy to compare if
+                   this lag is better or not. To avoid the division,
+                   do a cross multiplication */
+                shift1 = get_size_in_bits(FFABS(cross_comp)) - 15;
+                cross_square = (int16_t) SPL_MUL_16_16_RSFT(SPL_SHIFT_W32(cross_comp, -shift1), SPL_SHIFT_W32(cross_comp, -shift1), 15);
+
+                shift2 = get_size_in_bits(ener) - 15;
+                measure = SPL_MUL_16_16(SPL_SHIFT_W32(ener, -shift2), cross_square);
+
+                shift3 = get_size_in_bits(ener_comp) - 15;
+                max_measure = SPL_MUL_16_16(SPL_SHIFT_W32(ener_comp, -shift3), cross_square_max);
+
+                /* Calculate shift value, so that the two measures can
+                   be put in the same Q domain */
+                if (((shift_max << 1) + shift3) > ((shift1 << 1) + shift2)) {
+                    tmp1 = FFMIN(31, (shift_max << 1) + shift3 - (shift1 << 1) - shift2);
+                    tmp2 = 0;
+                } else {
+                    tmp1 = 0;
+                    tmp2 = FFMIN(31, (shift1 << 1) + shift2 - (shift_max << 1) - shift3);
+                }
+
+                if ((measure >> tmp1) > (max_measure >> tmp2)) {
+                    /* New lag is better => record lag, measure and domain */
+                    lag = j;
+                    cross_square_max = cross_square;
+                    cross = cross_comp;
+                    shift_max = shift1;
+                    ener = ener_comp;
+                }
+            }
+
+            /* Calculate the periodicity for the lag with the maximum correlation.
+
+               Definition of the periodicity:
+               abs(corr(vec1, vec2))/(sqrt(energy(vec1))*sqrt(energy(vec2)))
+
+               Work in the Square domain to simplify the calculations
+               max_perSquare is less than 1 (in Q15)
+             */
+            tmp2W32 = scale_dot_product(&s->prevResidual[s->block_samples - corrLen], &s->prevResidual[s->block_samples - corrLen], corrLen, scale3);
+
+            if ((tmp2W32 > 0) && (ener_comp > 0)) {
+                /* norm energies to int16_t, compute the product of the energies and
+                   use the upper int16_t as the denominator */
+
+                scale1 = norm_w32(tmp2W32) - 16;
+                tmp1 = SPL_SHIFT_W32(tmp2W32, scale1);
+
+                scale2 = norm_w32(ener) - 16;
+                tmp2 =  SPL_SHIFT_W32(ener, scale2);
+                denom = SPL_MUL_16_16_RSFT(tmp1, tmp2, 16);    /* denom in Q(scale1+scale2-16) */
+
+                /* Square the cross correlation and norm it such that max_perSquare
+                   will be in Q15 after the division */
+
+                totscale = scale1 + scale2 - 1;
+                tmp1 = SPL_SHIFT_W32(cross, (totscale >> 1));
+                tmp2 = SPL_SHIFT_W32(cross, totscale - (totscale >> 1));
+
+                nom = SPL_MUL_16_16(tmp1, tmp2);
+                max_perSquare = div_w32_w16(nom, denom);
+            } else {
+                max_perSquare = 0;
+            }
+        } else {
+            /* previous frame lost, use recorded lag and gain */
+            lag = s->prevLag;
+            max_perSquare = s->per_square;
+        }
+
+        /* Attenuate signal and scale down pitch pred gain if
+           several frames lost consecutively */
+
+        use_gain = 32767;       /* 1.0 in Q15 */
+
+        if (s->consPLICount * s->block_samples > 320) {
+            use_gain = 29491;   /* 0.9 in Q15 */
+        } else if (s->consPLICount * s->block_samples > 640) {
+            use_gain = 22938;   /* 0.7 in Q15 */
+        } else if (s->consPLICount * s->block_samples > 960) {
+            use_gain = 16384;   /* 0.5 in Q15 */
+        } else if (s->consPLICount * s->block_samples > 1280) {
+            use_gain = 0;       /* 0.0 in Q15 */
+        }
+
+        /* Compute mixing factor of picth repeatition and noise:
+           for max_per>0.7 set periodicity to 1.0
+           0.4<max_per<0.7 set periodicity to (maxper-0.4)/0.7-0.4)
+           max_per<0.4 set periodicity to 0.0
+         */
+
+        if (max_perSquare > 7868) {     /* periodicity > 0.7  (0.7^4=0.2401 in Q15) */
+            pitchfact = 32767;
+        } else if (max_perSquare > 839) {       /* 0.4 < periodicity < 0.7 (0.4^4=0.0256 in Q15) */
+            /* find best index and interpolate from that */
+            ind = 5;
+            while ((max_perSquare < kPlcPerSqr[ind]) && (ind > 0)) {
+                ind--;
+            }
+            /* pitch fact is approximated by first order */
+            tmpW32 = kPlcPitchFact[ind] + SPL_MUL_16_16_RSFT(kPlcPfSlope[ind], (max_perSquare - kPlcPerSqr[ind]), 11);
+
+            pitchfact = FFMIN(tmpW32, 32767); /* guard against overflow */
+
+        } else {                /* periodicity < 0.4 */
+            pitchfact = 0;
+        }
+
+        /* avoid repetition of same pitch cycle (buzzyness) */
+        use_lag = lag;
+        if (lag < 80) {
+            use_lag = 2 * lag;
+        }
+
+        /* compute concealed residual */
+        energy = 0;
+
+        for (i = 0; i < s->block_samples; i++) {
+            /* noise component -  52 < randlagFIX < 117 */
+            s->seed = SPL_MUL_16_16(s->seed, 31821) + 13849;
+            randlag = 53 + (s->seed & 63);
+
+            pick = i - randlag;
+
+            if (pick < 0) {
+                randvec[i] = s->prevResidual[s->block_samples + pick];
+            } else {
+                randvec[i] = s->prevResidual[pick];
+            }
+
+            /* pitch repeatition component */
+            pick = i - use_lag;
+
+            if (pick < 0) {
+                plc_residual[i] = s->prevResidual[s->block_samples + pick];
+            } else {
+                plc_residual[i] = plc_residual[pick];
+            }
+
+            /* Attinuate total gain for each 10 ms */
+            if (i < 80) {
+                tot_gain = use_gain;
+            } else if (i < 160) {
+                tot_gain = SPL_MUL_16_16_RSFT(31130, use_gain, 15);    /* 0.95*use_gain */
+            } else {
+                tot_gain = SPL_MUL_16_16_RSFT(29491, use_gain, 15);    /* 0.9*use_gain */
+            }
+
+            /* mix noise and pitch repeatition */
+            plc_residual[i] = SPL_MUL_16_16_RSFT(tot_gain, (pitchfact * plc_residual[i] + (32767 - pitchfact) * randvec[i] + 16384) >> 15, 15);
+
+            /* Shifting down the result one step extra to ensure that no overflow
+               will occur */
+            energy += SPL_MUL_16_16_RSFT(plc_residual[i], plc_residual[i], (s->prevScale + 1));
+
+        }
+
+        /* less than 30 dB, use only noise */
+        if (energy < SPL_SHIFT_W32(s->block_samples * 900, -s->prevScale - 1)) {
+            energy = 0;
+            for (i = 0; i < s->block_samples; i++) {
+                plc_residual[i] = randvec[i];
+            }
+        }
+
+        /* use the old LPC */
+        memcpy(plc_lpc, (*s).prev_lpc, (LPC_FILTERORDER + 1) * 2);
+
+        /* Update state in case there are multiple frame losses */
+        s->prevLag = lag;
+        s->per_square = max_perSquare;
+    } else { /* no packet loss, copy input */
+        memcpy(plc_residual, decresidual, s->block_samples * 2);
+        memcpy(plc_lpc, lpc, (LPC_FILTERORDER + 1) * 2);
+        s->consPLICount = 0;
+    }
+
+    /* update state */
+    s->prevPLI = PLI;
+    memcpy(s->prev_lpc, plc_lpc, (LPC_FILTERORDER + 1) * 2);
+    memcpy(s->prevResidual, plc_residual, s->block_samples * 2);
+
+    return;
+}
+
+static int xcorr_coeff(int16_t *target, int16_t *regressor,
+                       int16_t subl, int16_t searchLen,
+                       int16_t offset, int16_t step)
+{
+    int16_t maxlag;
+    int16_t pos;
+    int16_t max;
+    int16_t cross_corr_scale, energy_scale;
+    int16_t cross_corr_sg_mod, cross_corr_sg_mod_max;
+    int32_t cross_corr, energy;
+    int16_t cross_corr_mod, energy_mod, enery_mod_max;
+    int16_t *tp, *rp;
+    int16_t *rp_beg, *rp_end;
+    int16_t totscale, totscale_max;
+    int16_t scalediff;
+    int32_t new_crit, max_crit;
+    int shifts;
+    int k;
+
+    /* Initializations, to make sure that the first one is selected */
+    cross_corr_sg_mod_max = 0;
+    enery_mod_max = INT16_MAX;
+    totscale_max = -500;
+    maxlag = 0;
+    pos = 0;
+
+    /* Find scale value and start position */
+    if (step == 1) {
+        max = max_abs_value_w16(regressor, (int16_t) (subl + searchLen - 1));
+        rp_beg = regressor;
+        rp_end = &regressor[subl];
+    } else {                    /* step== -1 */
+        max = max_abs_value_w16(&regressor[-searchLen], (int16_t) (subl + searchLen - 1));
+        rp_beg = &regressor[-1];
+        rp_end = &regressor[subl - 1];
+    }
+
+    /* Introduce a scale factor on the energy in int32_t in
+       order to make sure that the calculation does not
+       overflow */
+
+    if (max > 5000) {
+        shifts = 2;
+    } else {
+        shifts = 0;
+    }
+
+    /* Calculate the first energy, then do a +/- to get the other energies */
+    energy = scale_dot_product(regressor, regressor, subl, shifts);
+
+    for (k = 0; k < searchLen; k++) {
+        tp = target;
+        rp = &regressor[pos];
+
+        cross_corr = scale_dot_product(tp, rp, subl, shifts);
+
+        if ((energy > 0) && (cross_corr > 0)) {
+            /* Put cross correlation and energy on 16 bit word */
+            cross_corr_scale = norm_w32(cross_corr) - 16;
+            cross_corr_mod = (int16_t) SPL_SHIFT_W32(cross_corr, cross_corr_scale);
+            energy_scale = norm_w32(energy) - 16;
+            energy_mod = (int16_t) SPL_SHIFT_W32(energy, energy_scale);
+
+            /* Square cross correlation and store upper int16_t */
+            cross_corr_sg_mod = (int16_t) SPL_MUL_16_16_RSFT(cross_corr_mod, cross_corr_mod, 16);
+
+            /* Calculate the total number of (dynamic) right shifts that have
+               been performed on (cross_corr*cross_corr)/energy
+             */
+            totscale = energy_scale - (cross_corr_scale << 1);
+
+            /* Calculate the shift difference in order to be able to compare the two
+               (cross_corr*cross_corr)/energy in the same domain
+             */
+            scalediff = totscale - totscale_max;
+            scalediff = FFMIN(scalediff, 31);
+            scalediff = FFMAX(scalediff, -31);
+
+            /* Compute the cross multiplication between the old best criteria
+               and the new one to be able to compare them without using a
+               division */
+
+            if (scalediff < 0) {
+                new_crit = ((int32_t) cross_corr_sg_mod * enery_mod_max) >> (-scalediff);
+                max_crit = ((int32_t) cross_corr_sg_mod_max * energy_mod);
+            } else {
+                new_crit = ((int32_t) cross_corr_sg_mod * enery_mod_max);
+                max_crit = ((int32_t) cross_corr_sg_mod_max * energy_mod) >> scalediff;
+            }
+
+            /* Store the new lag value if the new criteria is larger
+               than previous largest criteria */
+
+            if (new_crit > max_crit) {
+                cross_corr_sg_mod_max = cross_corr_sg_mod;
+                enery_mod_max = energy_mod;
+                totscale_max = totscale;
+                maxlag = k;
+            }
+        }
+        pos += step;
+
+        /* Do a +/- to get the next energy */
+        energy += step * ((*rp_end * *rp_end - *rp_beg * *rp_beg) >> shifts);
+        rp_beg += step;
+        rp_end += step;
+    }
+
+    return maxlag + offset;
+}
+
+static void hp_output(int16_t *signal, const int16_t *ba, int16_t *y,
+                      int16_t *x, int16_t len)
+{
+    int32_t tmp;
+
+    for (int i = 0; i < len; i++) {
+        tmp = SPL_MUL_16_16(y[1], ba[3]);     /* (-a[1])*y[i-1] (low part) */
+        tmp += SPL_MUL_16_16(y[3], ba[4]);    /* (-a[2])*y[i-2] (low part) */
+        tmp = (tmp >> 15);
+        tmp += SPL_MUL_16_16(y[0], ba[3]);    /* (-a[1])*y[i-1] (high part) */
+        tmp += SPL_MUL_16_16(y[2], ba[4]);    /* (-a[2])*y[i-2] (high part) */
+        tmp = (tmp << 1);
+
+        tmp += SPL_MUL_16_16(signal[i], ba[0]);       /* b[0]*x[0] */
+        tmp += SPL_MUL_16_16(x[0], ba[1]);    /* b[1]*x[i-1] */
+        tmp += SPL_MUL_16_16(x[1], ba[2]);    /* b[2]*x[i-2] */
+
+        /* Update state (input part) */
+        x[1] = x[0];
+        x[0] = signal[i];
+
+        /* Convert back to Q0 and multiply with 2 */
+        signal[i] = av_clip_intp2(tmp + 1024, 26) >> 11;
+
+        /* Update state (filtered part) */
+        y[2] = y[0];
+        y[3] = y[1];
+
+        /* upshift tmp by 3 with saturation */
+        if (tmp > 268435455) {
+            tmp = INT32_MAX;
+        } else if (tmp < -268435456) {
+            tmp = INT32_MIN;
+        } else {
+            tmp = tmp << 3;
+        }
+
+        y[0] = tmp >> 16;
+        y[1] = (tmp - (y[0] << 16)) >> 1;
+    }
+}
+
+static int ilbc_decode_frame(AVCodecContext *avctx, void *data,
+                             int *got_frame_ptr, AVPacket *avpkt)
+{
+    const uint8_t *buf = avpkt->data;
+    AVFrame *frame     = data;
+    ILBCContext *s     = avctx->priv_data;
+    int mode = s->mode, ret;
+    int16_t *plc_data = &s->plc_residual[LPC_FILTERORDER];
+
+    if ((ret = init_get_bits8(&s->gb, buf, avpkt->size)) < 0)
+        return ret;
+    memset(&s->frame, 0, sizeof(ILBCFrame));
+
+    frame->nb_samples = s->block_samples;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    if (unpack_frame(s))
+        mode = 0;
+    if (s->frame.start < 1)
+        mode = 0;
+
+    if (mode) {
+        index_conv(s->frame.cb_index);
+
+        lsf_dequantization(s->lsfdeq, s->frame.lsf, s->lpc_n);
+        lsf_check_stability(s->lsfdeq, LPC_FILTERORDER, s->lpc_n);
+        lsp_interpolate(s->syntdenum, s->weightdenum,
+                        s->lsfdeq, LPC_FILTERORDER, s);
+        decode_residual(s, &s->frame, s->decresidual, s->syntdenum);
+
+        do_plc(s->plc_residual, s->plc_lpc, 0,
+                               s->decresidual, s->syntdenum + (LPC_FILTERORDER + 1) * (s->nsub - 1),
+                               s->last_lag, s);
+
+        memcpy(s->decresidual, s->plc_residual, s->block_samples * 2);
+    }
+
+    if (s->enhancer) {
+        /* TODO */
+    } else {
+        int16_t lag, i;
+
+        /* Find last lag (since the enhancer is not called to give this info) */
+        if (s->mode == 20) {
+            lag = xcorr_coeff(&s->decresidual[s->block_samples-60], &s->decresidual[s->block_samples-80],
+                              60, 80, 20, -1);
+        } else {
+            lag = xcorr_coeff(&s->decresidual[s->block_samples-ENH_BLOCKL],
+                              &s->decresidual[s->block_samples-ENH_BLOCKL-20],
+                              ENH_BLOCKL, 100, 20, -1);
+        }
+
+        /* Store lag (it is needed if next packet is lost) */
+        s->last_lag = lag;
+
+        /* copy data and run synthesis filter */
+        memcpy(plc_data, s->decresidual, s->block_samples * 2);
+
+        /* Set up the filter state */
+        memcpy(&plc_data[-LPC_FILTERORDER], s->syntMem, LPC_FILTERORDER * 2);
+
+        for (i = 0; i < s->nsub; i++) {
+            filter_arfq12(plc_data+i*SUBL, plc_data+i*SUBL,
+                                      s->syntdenum + i*(LPC_FILTERORDER + 1),
+                                      LPC_FILTERORDER + 1, SUBL);
+        }
+
+        /* Save the filter state */
+        memcpy(s->syntMem, &plc_data[s->block_samples-LPC_FILTERORDER], LPC_FILTERORDER * 2);
+    }
+
+    memcpy(frame->data[0], plc_data, s->block_samples * 2);
+
+    hp_output((int16_t *)frame->data[0], hp_out_coeffs,
+              s->hpimemy, s->hpimemx, s->block_samples);
+
+    memcpy(s->old_syntdenum, s->syntdenum, s->nsub*(LPC_FILTERORDER + 1) * 2);
+
+    s->prev_enh_pl = 0;
+    if (mode == 0)
+        s->prev_enh_pl = 1;
+
+    *got_frame_ptr = 1;
+
+    return avpkt->size;
+}
+
+static av_cold int ilbc_decode_init(AVCodecContext *avctx)
+{
+    ILBCContext *s  = avctx->priv_data;
+
+    if (avctx->block_align == 38)
+        s->mode = 20;
+    else if (avctx->block_align == 50)
+        s->mode = 30;
+    else if (avctx->bit_rate > 0)
+        s->mode = avctx->bit_rate <= 14000 ? 30 : 20;
+    else
+        return AVERROR_INVALIDDATA;
+
+    avctx->channels       = 1;
+    avctx->channel_layout = AV_CH_LAYOUT_MONO;
+    avctx->sample_rate    = 8000;
+    avctx->sample_fmt     = AV_SAMPLE_FMT_S16;
+
+    if (s->mode == 30) {
+        s->block_samples = 240;
+        s->nsub = NSUB_30MS;
+        s->nasub = NASUB_30MS;
+        s->lpc_n = LPC_N_30MS;
+        s->state_short_len = STATE_SHORT_LEN_30MS;
+    } else {
+        s->block_samples = 160;
+        s->nsub = NSUB_20MS;
+        s->nasub = NASUB_20MS;
+        s->lpc_n = LPC_N_20MS;
+        s->state_short_len = STATE_SHORT_LEN_20MS;
+    }
+
+    return 0;
+}
+
+AVCodec ff_ilbc_decoder = {
+    .name           = "ilbc",
+    .long_name      = NULL_IF_CONFIG_SMALL("iLBC (Internet Low Bitrate Codec)"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_ILBC,
+    .init           = ilbc_decode_init,
+    .decode         = ilbc_decode_frame,
+    .capabilities   = AV_CODEC_CAP_DR1,
+    .priv_data_size = sizeof(ILBCContext),
+};

diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c
index 1547f18..1fd636c 100644
--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c

@@ -69,10 +69,15 @@
     int i;
 
     enum AVPixelFormat best = AV_PIX_FMT_NONE;
+    int loss;
 
-    for(i=0; pix_fmt_list[i] != AV_PIX_FMT_NONE; i++)
-        best = avcodec_find_best_pix_fmt_of_2(best, pix_fmt_list[i], src_pix_fmt, has_alpha, loss_ptr);
+    for (i=0; pix_fmt_list[i] != AV_PIX_FMT_NONE; i++) {
+        loss = loss_ptr ? *loss_ptr : 0;
+        best = avcodec_find_best_pix_fmt_of_2(best, pix_fmt_list[i], src_pix_fmt, has_alpha, &loss);
+    }
 
+    if (loss_ptr)
+        *loss_ptr = loss;
     return best;
 }
 

diff --git a/libavcodec/imm4.c b/libavcodec/imm4.c
new file mode 100644
index 0000000..a4e9b5d
--- /dev/null
+++ b/libavcodec/imm4.c

@@ -0,0 +1,542 @@
+/*
+ * Infinity IMM4 decoder
+ *
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libavutil/thread.h"
+
+#include "avcodec.h"
+#include "bswapdsp.h"
+#include "copy_block.h"
+#include "get_bits.h"
+#include "idctdsp.h"
+#include "internal.h"
+
+typedef struct IMM4Context {
+    BswapDSPContext bdsp;
+    GetBitContext  gb;
+
+    AVFrame *prev_frame;
+    uint8_t *bitstream;
+    int bitstream_size;
+
+    int changed_size;
+    int factor;
+    unsigned lo;
+    unsigned hi;
+
+    ScanTable intra_scantable;
+    DECLARE_ALIGNED(32, int16_t, block)[6][64];
+    IDCTDSPContext idsp;
+} IMM4Context;
+
+static const uint8_t intra_cb[] = {
+    24, 18, 12
+};
+
+static const uint8_t inter_cb[] = {
+    30, 20, 15
+};
+
+static const uint8_t cbplo_symbols[] = {
+    3, 4, 19, 20, 35, 36, 51, 52
+};
+
+static const uint8_t cbplo_bits[] = {
+    1, 4, 3, 6, 3, 6, 3, 6
+};
+
+static const uint8_t cbplo_codes[] = {
+    1, 1, 1, 1, 2, 2, 3, 3
+};
+
+static const uint8_t cbphi_bits[] = {
+    4, 5, 5, 4, 5, 4, 6, 4, 5, 6, 4, 4, 4, 4, 4, 2
+};
+
+static const uint8_t cbphi_codes[] = {
+    3, 5, 4, 9, 3, 7, 2, 11, 2, 3, 5, 10, 4, 8, 6, 3
+};
+
+static const uint8_t blktype_symbols[] = {
+    0, 1, 2, 3, 4, 16, 17, 18, 19, 20, 32, 33, 34, 35, 48, 50, 51, 52
+};
+
+static const uint8_t blktype_bits[] = {
+    1, 3, 3, 5, 6, 4, 7, 7, 8, 9, 4, 7, 7, 8, 6, 8, 7, 9
+};
+
+static const uint8_t blktype_codes[] = {
+    1, 3, 2, 3, 4, 3, 7, 5, 4, 4, 2, 6, 4, 3, 5, 5, 3, 2
+};
+
+static const uint16_t block_symbols[] = {
+    0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0x81, 0x82, 0x83,
+    0x84, 0x85, 0x86, 0x101, 0x102, 0x103, 0x104, 0x181, 0x182, 0x183, 0x201, 0x202,
+    0x203, 0x281, 0x282, 0x283, 0x301, 0x302, 0x303, 0x381, 0x382, 0x401, 0x402,
+    0x481, 0x482, 0x501, 0x502, 0x581, 0x601, 0x681, 0x701, 0x781, 0x801, 0x881,
+    0x901, 0x981, 0xA01, 0xA81, 0xB01, 0xB81, 0xC01, 0xC81, 0xD01, 0x4001, 0x4002,
+    0x4003, 0x4081, 0x4082, 0x4101, 0x4181, 0x4201, 0x4281, 0x4301, 0x4381, 0x4401,
+    0x4481, 0x4501, 0x4581, 0x4601, 0x4681, 0x4701, 0x4781, 0x4801, 0x4881, 0x4901,
+    0x4981, 0x4A01, 0x4A81, 0x4B01, 0x4B81, 0x4C01, 0x4C81, 0x4D01, 0x4D81, 0x4E01,
+    0x4E81, 0x4F01, 0x4F81, 0x5001, 0x5081, 0x5101, 0x5181, 0x5201, 0x5281, 0x5301,
+    0x5381, 0x5401
+};
+
+static const uint8_t block_bits[] = {
+    7, 2, 4, 6, 7, 8, 9, 9, 10, 10, 11, 11, 11, 3, 6, 8, 10, 11, 12, 4, 8,
+    10, 12, 5, 9, 10, 5, 9, 12, 5, 10, 12, 6, 10, 12, 6, 10, 6, 10, 6,
+    10, 7, 12, 7, 7, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 11, 11, 12, 12, 4, 9,
+    11, 6, 11, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9,
+    9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12,
+    12, 12
+};
+
+static const uint8_t block_codes[] = {
+    3, 2, 15, 21, 23, 31, 37, 36, 33, 32, 7, 6, 32, 6, 20, 30, 15, 33, 80,
+    14, 29, 14, 81, 13, 35, 13, 12, 34, 82, 11, 12, 83, 19, 11, 84, 18,
+    10, 17, 9, 16, 8, 22, 85, 21, 20, 28, 27, 33, 32, 31, 30, 29, 28,
+    27, 26, 34, 35, 86, 87, 7, 25, 5, 15, 4, 14, 13, 12, 19, 18, 17, 16,
+    26, 25, 24, 23, 22, 21, 20, 19, 24, 23, 22, 21, 20, 19, 18, 17, 7,
+    6, 5, 4, 36, 37, 38, 39, 88, 89, 90, 91, 92, 93, 94, 95
+};
+
+static VLC cbplo_tab;
+static VLC cbphi_tab;
+static VLC blktype_tab;
+static VLC block_tab;
+
+static int get_cbphi(GetBitContext *gb, int x)
+{
+    int value;
+
+    value = get_vlc2(gb, cbphi_tab.table, cbphi_tab.bits, 1);
+    if (value < 0)
+        return AVERROR_INVALIDDATA;
+
+    return x ? value : 15 - value;
+}
+
+static int decode_block(AVCodecContext *avctx, GetBitContext *gb,
+                        int block, int factor, int flag, int offset)
+{
+    IMM4Context *s = avctx->priv_data;
+    const uint8_t *scantable = s->intra_scantable.permutated;
+    int i, last, len, factor2;
+
+    for (i = !flag; i < 64; i++) {
+        int value;
+
+        value = get_vlc2(gb, block_tab.table, block_tab.bits, 1);
+        if (value < 0)
+            return AVERROR_INVALIDDATA;
+        if (value == 0) {
+            last = get_bits1(gb);
+            len = get_bits(gb, 6);
+            factor2 = get_sbits(gb, 8);
+        } else {
+            factor2 = value & 0x7F;
+            last = (value >> 14) & 1;
+            len = (value >> 7) & 0x3F;
+            if (get_bits1(gb))
+                factor2 = -factor2;
+        }
+        i += len;
+        if (i >= 64)
+            break;
+        s->block[block][scantable[i]] = offset * (factor2 < 0 ? -1 : 1) + factor * factor2;
+        if (last)
+            break;
+    }
+
+    return 0;
+}
+
+static int decode_blocks(AVCodecContext *avctx, GetBitContext *gb,
+                         unsigned cbp, int flag, int offset)
+{
+    IMM4Context *s = avctx->priv_data;
+    const uint8_t *scantable = s->intra_scantable.permutated;
+    int ret, i;
+
+    memset(s->block, 0, sizeof(s->block));
+
+    for (i = 0; i < 6; i++) {
+        if (!flag) {
+            int x = get_bits(gb, 8);
+
+            if (x == 255)
+                x = 128;
+            x *= 8;
+
+            s->block[i][scantable[0]] = x;
+        }
+
+        if (cbp & (1 << (5 - i))) {
+            ret = decode_block(avctx, gb, i, s->factor, flag, offset);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
+    return 0;
+}
+
+static int decode_intra(AVCodecContext *avctx, GetBitContext *gb, AVFrame *frame)
+{
+    IMM4Context *s = avctx->priv_data;
+    int ret, x, y, offset = 0;
+
+    if (s->hi == 0) {
+        if (s->lo > 2)
+            return AVERROR_INVALIDDATA;
+        s->factor = intra_cb[s->lo];
+    } else {
+        if (s->hi == 1) {
+            s->factor = s->lo * 2;
+        } else {
+            s->factor = s->lo * 2;
+        }
+    }
+
+    if (s->hi) {
+        offset = s->factor;
+        offset >>= 1;
+        if (!(offset & 1))
+            offset--;
+    }
+
+    for (y = 0; y < avctx->height; y += 16) {
+        for (x = 0; x < avctx->width; x += 16) {
+            unsigned cbphi, cbplo;
+
+            cbplo = get_vlc2(gb, cbplo_tab.table, cbplo_tab.bits, 1) >> 4;
+            skip_bits1(gb);
+
+            cbphi = get_cbphi(gb, 1);
+
+            ret = decode_blocks(avctx, gb, cbplo | (cbphi << 2), 0, offset);
+            if (ret < 0)
+                return ret;
+
+            s->idsp.idct_put(frame->data[0] + y * frame->linesize[0] + x,
+                             frame->linesize[0], s->block[0]);
+            s->idsp.idct_put(frame->data[0] + y * frame->linesize[0] + x + 8,
+                             frame->linesize[0], s->block[1]);
+            s->idsp.idct_put(frame->data[0] + (y + 8) * frame->linesize[0] + x,
+                             frame->linesize[0], s->block[2]);
+            s->idsp.idct_put(frame->data[0] + (y + 8) * frame->linesize[0] + x + 8,
+                             frame->linesize[0], s->block[3]);
+            s->idsp.idct_put(frame->data[1] + (y >> 1) * frame->linesize[1] + (x >> 1),
+                             frame->linesize[1], s->block[4]);
+            s->idsp.idct_put(frame->data[2] + (y >> 1) * frame->linesize[2] + (x >> 1),
+                             frame->linesize[2], s->block[5]);
+        }
+    }
+
+    return 0;
+}
+
+static int decode_inter(AVCodecContext *avctx, GetBitContext *gb,
+                        AVFrame *frame, AVFrame *prev)
+{
+    IMM4Context *s = avctx->priv_data;
+    int ret, x, y, offset = 0;
+
+    if (s->hi == 0) {
+        if (s->lo > 2)
+            return AVERROR_INVALIDDATA;
+        s->factor = inter_cb[s->lo];
+    } else {
+        if (s->hi == 1) {
+            s->factor = s->lo * 2;
+        } else {
+            s->factor = s->lo * 2;
+        }
+    }
+
+    if (s->hi) {
+        offset = s->factor;
+        offset >>= 1;
+        if (!(offset & 1))
+            offset--;
+    }
+
+    for (y = 0; y < avctx->height; y += 16) {
+        for (x = 0; x < avctx->width; x += 16) {
+            int reverse, intra_block, value;
+            unsigned cbphi, cbplo;
+
+            if (get_bits1(gb)) {
+                copy_block16(frame->data[0] + y * frame->linesize[0] + x,
+                             prev->data[0] + y * prev->linesize[0] + x,
+                             frame->linesize[0], prev->linesize[0], 16);
+                copy_block8(frame->data[1] + (y >> 1) * frame->linesize[1] + (x >> 1),
+                            prev->data[1] + (y >> 1) * prev->linesize[1] + (x >> 1),
+                            frame->linesize[1], prev->linesize[1], 8);
+                copy_block8(frame->data[2] + (y >> 1) * frame->linesize[2] + (x >> 1),
+                            prev->data[2] + (y >> 1) * prev->linesize[2] + (x >> 1),
+                            frame->linesize[2], prev->linesize[2], 8);
+                continue;
+            }
+
+            value = get_vlc2(gb, blktype_tab.table, blktype_tab.bits, 1);
+            if (value < 0)
+                return AVERROR_INVALIDDATA;
+
+            intra_block = value & 0x07;
+            reverse = intra_block == 3;
+            if (reverse)
+                skip_bits1(gb);
+
+            cbplo = value >> 4;
+            cbphi = get_cbphi(gb, reverse);
+            if (intra_block) {
+                ret = decode_blocks(avctx, gb, cbplo | (cbphi << 2), 0, offset);
+                if (ret < 0)
+                    return ret;
+
+                s->idsp.idct_put(frame->data[0] + y * frame->linesize[0] + x,
+                                 frame->linesize[0], s->block[0]);
+                s->idsp.idct_put(frame->data[0] + y * frame->linesize[0] + x + 8,
+                                 frame->linesize[0], s->block[1]);
+                s->idsp.idct_put(frame->data[0] + (y + 8) * frame->linesize[0] + x,
+                                 frame->linesize[0], s->block[2]);
+                s->idsp.idct_put(frame->data[0] + (y + 8) * frame->linesize[0] + x + 8,
+                                 frame->linesize[0], s->block[3]);
+                s->idsp.idct_put(frame->data[1] + (y >> 1) * frame->linesize[1] + (x >> 1),
+                                 frame->linesize[1], s->block[4]);
+                s->idsp.idct_put(frame->data[2] + (y >> 1) * frame->linesize[2] + (x >> 1),
+                                 frame->linesize[2], s->block[5]);
+            } else {
+                skip_bits(gb, 2);
+                ret = decode_blocks(avctx, gb, cbplo | (cbphi << 2), 1, offset);
+                if (ret < 0)
+                    return ret;
+
+                copy_block16(frame->data[0] + y * frame->linesize[0] + x,
+                             prev->data[0] + y * prev->linesize[0] + x,
+                             frame->linesize[0], prev->linesize[0], 16);
+                copy_block8(frame->data[1] + (y >> 1) * frame->linesize[1] + (x >> 1),
+                            prev->data[1] + (y >> 1) * prev->linesize[1] + (x >> 1),
+                            frame->linesize[1], prev->linesize[1], 8);
+                copy_block8(frame->data[2] + (y >> 1) * frame->linesize[2] + (x >> 1),
+                            prev->data[2] + (y >> 1) * prev->linesize[2] + (x >> 1),
+                            frame->linesize[2], prev->linesize[2], 8);
+
+                s->idsp.idct_add(frame->data[0] + y * frame->linesize[0] + x,
+                                 frame->linesize[0], s->block[0]);
+                s->idsp.idct_add(frame->data[0] + y * frame->linesize[0] + x + 8,
+                                 frame->linesize[0], s->block[1]);
+                s->idsp.idct_add(frame->data[0] + (y + 8) * frame->linesize[0] + x,
+                                 frame->linesize[0], s->block[2]);
+                s->idsp.idct_add(frame->data[0] + (y + 8) * frame->linesize[0] + x + 8,
+                                 frame->linesize[0], s->block[3]);
+                s->idsp.idct_add(frame->data[1] + (y >> 1) * frame->linesize[1] + (x >> 1),
+                                 frame->linesize[1], s->block[4]);
+                s->idsp.idct_add(frame->data[2] + (y >> 1) * frame->linesize[2] + (x >> 1),
+                                 frame->linesize[2], s->block[5]);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data,
+                        int *got_frame, AVPacket *avpkt)
+{
+    IMM4Context *s = avctx->priv_data;
+    GetBitContext *gb = &s->gb;
+    AVFrame *frame = data;
+    unsigned type;
+    int ret, scaled;
+
+    if (avpkt->size <= 32)
+        return AVERROR_INVALIDDATA;
+
+    av_fast_padded_malloc(&s->bitstream, &s->bitstream_size,
+                          FFALIGN(avpkt->size, 4));
+    if (!s->bitstream)
+        return AVERROR(ENOMEM);
+
+    s->bdsp.bswap_buf((uint32_t *)s->bitstream,
+                      (uint32_t *)avpkt->data,
+                      (avpkt->size + 3) >> 2);
+
+    if ((ret = init_get_bits8(gb, s->bitstream, FFALIGN(avpkt->size, 4))) < 0)
+        return ret;
+
+    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+    avctx->color_range = AVCOL_RANGE_JPEG;
+
+    scaled = avpkt->data[8];
+    if (scaled < 2) {
+        int width, height;
+        int mode = avpkt->data[10];
+
+        switch (mode) {
+        case 1:
+            width = 352;
+            height = 240;
+            break;
+        case 2:
+            width = 704;
+            height = 240;
+            break;
+        case 4:
+            width = 480;
+            height = 704;
+            break;
+        case 17:
+            width = 352;
+            height = 288;
+            break;
+        case 18:
+            width = 704;
+            height = 288;
+            break;
+        default:
+            width = 704;
+            height = 576;
+            break;
+        }
+
+        if (s->changed_size == 1 &&
+            (avctx->width != width || avctx->height != height)) {
+            av_log(avctx, AV_LOG_ERROR, "Frame size change is unsupported.\n");
+            return AVERROR_INVALIDDATA;
+        }
+        avctx->width = width;
+        avctx->height = height;
+    }
+
+    s->changed_size = 1;
+    skip_bits_long(gb, 24 * 8);
+    type = get_bits_long(gb, 32);
+    s->hi = get_bits(gb, 16);
+    s->lo = get_bits(gb, 16);
+
+    switch (type) {
+    case 0x19781977:
+        frame->key_frame = 1;
+        frame->pict_type = AV_PICTURE_TYPE_I;
+        break;
+    case 0x12250926:
+        frame->key_frame = 0;
+        frame->pict_type = AV_PICTURE_TYPE_P;
+        break;
+    default:
+        avpriv_request_sample(avctx, "type %X", type);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    if ((ret = ff_get_buffer(avctx, frame, frame->key_frame ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
+        return ret;
+
+    if (frame->key_frame) {
+        ret = decode_intra(avctx, gb, frame);
+        if (ret < 0)
+            return ret;
+
+        av_frame_unref(s->prev_frame);
+        if ((ret = av_frame_ref(s->prev_frame, frame)) < 0)
+            return ret;
+    } else {
+        if (!s->prev_frame->data[0]) {
+            av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        ret = decode_inter(avctx, gb, frame, s->prev_frame);
+        if (ret < 0)
+            return ret;
+    }
+
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+static av_cold void imm4_init_static_data(void)
+{
+    INIT_VLC_SPARSE_STATIC(&cbplo_tab, 9, FF_ARRAY_ELEMS(cbplo_bits),
+                           cbplo_bits, 1, 1, cbplo_codes, 1, 1, cbplo_symbols, 1, 1, 512);
+
+    INIT_VLC_SPARSE_STATIC(&cbphi_tab, 6, FF_ARRAY_ELEMS(cbphi_bits),
+                           cbphi_bits, 1, 1, cbphi_codes, 1, 1, NULL, 0, 0, 64);
+
+    INIT_VLC_SPARSE_STATIC(&blktype_tab, 9, FF_ARRAY_ELEMS(blktype_bits),
+                           blktype_bits, 1, 1, blktype_codes, 1, 1, blktype_symbols, 1, 1, 512);
+
+    INIT_VLC_SPARSE_STATIC(&block_tab, 12, FF_ARRAY_ELEMS(block_bits),
+                           block_bits, 1, 1, block_codes, 1, 1, block_symbols, 2, 2, 4096);
+}
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    static AVOnce init_static_once = AV_ONCE_INIT;
+    IMM4Context *s = avctx->priv_data;
+    uint8_t table[64];
+
+    for (int i = 0; i < 64; i++)
+        table[i] = i;
+
+    ff_bswapdsp_init(&s->bdsp);
+    ff_idctdsp_init(&s->idsp, avctx);
+    ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, table);
+
+    s->prev_frame = av_frame_alloc();
+    if (!s->prev_frame)
+        return AVERROR(ENOMEM);
+
+    ff_thread_once(&init_static_once, imm4_init_static_data);
+
+    return 0;
+}
+
+static av_cold int decode_close(AVCodecContext *avctx)
+{
+    IMM4Context *s = avctx->priv_data;
+
+    av_frame_free(&s->prev_frame);
+    av_freep(&s->bitstream);
+    s->bitstream_size = 0;
+
+    return 0;
+}
+
+AVCodec ff_imm4_decoder = {
+    .name             = "imm4",
+    .long_name        = NULL_IF_CONFIG_SMALL("Infinity IMM4"),
+    .type             = AVMEDIA_TYPE_VIDEO,
+    .id               = AV_CODEC_ID_IMM4,
+    .priv_data_size   = sizeof(IMM4Context),
+    .init             = decode_init,
+    .close            = decode_close,
+    .decode           = decode_frame,
+    .capabilities     = AV_CODEC_CAP_DR1,
+    .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
+                        FF_CODEC_CAP_INIT_CLEANUP,
+};

diff --git a/libavcodec/indeo4.c b/libavcodec/indeo4.c
index a3562f6..4bfc6cd 100644
--- a/libavcodec/indeo4.c
+++ b/libavcodec/indeo4.c

@@ -30,6 +30,7 @@
 #define BITSTREAM_READER_LE
 #include "avcodec.h"
 #include "get_bits.h"
+#include "libavutil/imgutils.h"
 #include "indeo4data.h"
 #include "internal.h"
 #include "ivi.h"
@@ -178,6 +179,13 @@
     pic_conf.chroma_bands = 0;
     if (pic_conf.luma_bands)
         pic_conf.chroma_bands = decode_plane_subdivision(&ctx->gb);
+
+    if (av_image_check_size2(pic_conf.pic_width, pic_conf.pic_height, avctx->max_pixels, AV_PIX_FMT_YUV410P, 0, avctx) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "picture dimensions %d %d cannot be decoded\n",
+               pic_conf.pic_width, pic_conf.pic_height);
+        return AVERROR_INVALIDDATA;
+    }
+
     ctx->is_scalable = pic_conf.luma_bands != 1 || pic_conf.chroma_bands != 1;
     if (ctx->is_scalable && (pic_conf.luma_bands != 4 || pic_conf.chroma_bands != 1)) {
         av_log(avctx, AV_LOG_ERROR, "Scalability: unsupported subdivision! Luma bands: %d, chroma bands: %d\n",
@@ -260,12 +268,14 @@
  *  @param[in]     avctx     pointer to the AVCodecContext
  *  @return        result code: 0 = OK, negative number = error
  */
-static int decode_band_hdr(IVI45DecContext *ctx, IVIBandDesc *band,
+static int decode_band_hdr(IVI45DecContext *ctx, IVIBandDesc *arg_band,
                            AVCodecContext *avctx)
 {
     int plane, band_num, indx, transform_id, scan_indx;
     int i;
     int quant_mat;
+    IVIBandDesc temp_band, *band = &temp_band;
+    memcpy(&temp_band, arg_band, sizeof(temp_band));
 
     plane    = get_bits(&ctx->gb, 2);
     band_num = get_bits(&ctx->gb, 4);
@@ -395,10 +405,10 @@
 
         /* decode block huffman codebook */
         if (!get_bits1(&ctx->gb))
-            band->blk_vlc.tab = ctx->blk_vlc.tab;
+            arg_band->blk_vlc.tab = ctx->blk_vlc.tab;
         else
             if (ff_ivi_dec_huff_desc(&ctx->gb, 1, IVI_BLK_HUFF,
-                                     &band->blk_vlc, avctx))
+                                     &arg_band->blk_vlc, avctx))
                 return AVERROR_INVALIDDATA;
 
         /* select appropriate rvmap table for this band */
@@ -439,6 +449,9 @@
         return AVERROR_INVALIDDATA;
     }
 
+    band->blk_vlc = arg_band->blk_vlc;
+    memcpy(arg_band, band, sizeof(*arg_band));
+
     return 0;
 }
 
@@ -487,6 +500,11 @@
             mb->b_mv_x   =
             mb->b_mv_y   = 0;
 
+            if (get_bits_left(&ctx->gb) < 1) {
+                av_log(avctx, AV_LOG_ERROR, "Insufficient input for mb info\n");
+                return AVERROR_INVALIDDATA;
+            }
+
             if (get_bits1(&ctx->gb)) {
                 if (ctx->frame_type == IVI4_FRAMETYPE_INTRA) {
                     av_log(avctx, AV_LOG_ERROR, "Empty macroblock in an INTRA picture!\n");

diff --git a/libavcodec/indeo5.c b/libavcodec/indeo5.c
index b39cffd..7b9da53 100644
--- a/libavcodec/indeo5.c
+++ b/libavcodec/indeo5.c

@@ -642,6 +642,8 @@
     IVI45DecContext  *ctx = avctx->priv_data;
     int             result;
 
+    ctx->gop_invalid = 1;
+
     ff_ivi_init_static_vlc();
 
     /* copy rvmap tables in our context so we can apply changes to them */

diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index faa923c..0c2133f 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h

@@ -76,22 +76,20 @@
 #endif
 
 
-#if !FF_API_QUANT_BIAS
 #define FF_DEFAULT_QUANT_BIAS 999999
-#endif
 
-#if !FF_API_QSCALE_TYPE
 #define FF_QSCALE_TYPE_MPEG1 0
 #define FF_QSCALE_TYPE_MPEG2 1
 #define FF_QSCALE_TYPE_H264  2
 #define FF_QSCALE_TYPE_VP56  3
-#endif
 
-#define FF_SANE_NB_CHANNELS 64U
+#define FF_SANE_NB_CHANNELS 256U
 
 #define FF_SIGNBIT(x) ((x) >> CHAR_BIT * sizeof(x) - 1)
 
-#if HAVE_SIMD_ALIGN_32
+#if HAVE_SIMD_ALIGN_64
+#   define STRIDE_ALIGN 64 /* AVX-512 */
+#elif HAVE_SIMD_ALIGN_32
 #   define STRIDE_ALIGN 32
 #elif HAVE_SIMD_ALIGN_16
 #   define STRIDE_ALIGN 16
@@ -237,21 +235,8 @@
 
 unsigned int avpriv_toupper4(unsigned int x);
 
-/**
- * does needed setup of pkt_pts/pos and such for (re)get_buffer();
- */
-int ff_init_buffer_info(AVCodecContext *s, AVFrame *frame);
-
-
 void ff_color_frame(AVFrame *frame, const int color[4]);
 
-extern volatile int ff_avcodec_locked;
-int ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec);
-int ff_unlock_avcodec(const AVCodec *codec);
-
-int avpriv_lock_avformat(void);
-int avpriv_unlock_avformat(void);
-
 /**
  * Maximum size in bytes of extradata.
  * This value was chosen such that every bit of the buffer is
@@ -373,14 +358,16 @@
 int ff_side_data_update_matrix_encoding(AVFrame *frame,
                                         enum AVMatrixEncoding matrix_encoding);
 
-#if FF_API_MERGE_SD
-int ff_packet_split_and_drop_side_data(AVPacket *pkt);
-#endif
-
 /**
  * Select the (possibly hardware accelerated) pixel format.
  * This is a wrapper around AVCodecContext.get_format() and should be used
  * instead of calling get_format() directly.
+ *
+ * The list of pixel formats must contain at least one valid entry, and is
+ * terminated with AV_PIX_FMT_NONE.  If it is possible to decode to software,
+ * the last entry in the list must be the most accurate software format.
+ * If it is not possible to decode to software, AVCodecContext.sw_pix_fmt
+ * must be set before calling this function.
  */
 int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt);
 
@@ -417,4 +404,10 @@
  */
 int64_t ff_guess_coded_bitrate(AVCodecContext *avctx);
 
+#if defined(_WIN32) && CONFIG_SHARED && !defined(BUILDING_avcodec)
+#    define av_export_avcodec __declspec(dllimport)
+#else
+#    define av_export_avcodec
+#endif
+
 #endif /* AVCODEC_INTERNAL_H */

diff --git a/libavcodec/interplayacm.c b/libavcodec/interplayacm.c
index c897e72..5639d8d 100644
--- a/libavcodec/interplayacm.c
+++ b/libavcodec/interplayacm.c

@@ -629,5 +629,6 @@
     .close          = decode_close,
     .decode         = decode_frame,
     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
     .priv_data_size = sizeof(InterplayACMContext),
 };

diff --git a/libavcodec/ituh263dec.c b/libavcodec/ituh263dec.c
index fc95a53..1b57e53 100644
--- a/libavcodec/ituh263dec.c
+++ b/libavcodec/ituh263dec.c

@@ -207,12 +207,27 @@
 }
 
 /**
- * Decode the group of blocks / video packet header.
+ * Decode the group of blocks / video packet header / slice header (MPEG-4 Studio).
  * @return bit position of the resync_marker, or <0 if none was found
  */
 int ff_h263_resync(MpegEncContext *s){
     int left, pos, ret;
 
+    /* In MPEG-4 studio mode look for a new slice startcode
+     * and decode slice header */
+    if(s->codec_id==AV_CODEC_ID_MPEG4 && s->studio_profile) {
+        align_get_bits(&s->gb);
+
+        while (get_bits_left(&s->gb) >= 32 && show_bits_long(&s->gb, 32) != SLICE_START_CODE) {
+            get_bits(&s->gb, 8);
+        }
+
+        if (show_bits_long(&s->gb, 32) == SLICE_START_CODE)
+            return get_bits_count(&s->gb);
+        else
+            return -1;
+    }
+
     if(s->codec_id==AV_CODEC_ID_MPEG4){
         skip_bits1(&s->gb);
         align_get_bits(&s->gb);

diff --git a/libavcodec/ivi.c b/libavcodec/ivi.c
index cea40d8..b23d4af 100644
--- a/libavcodec/ivi.c
+++ b/libavcodec/ivi.c

@@ -913,8 +913,16 @@
         return;
 
     for (y = 0; y < plane->height; y++) {
-        for (x = 0; x < plane->width; x++)
-            dst[x] = av_clip_uint8(src[x] + 128);
+        int m = 0;
+        int w = plane->width;
+        for (x = 0; x < w; x++) {
+            int t = src[x] + 128;
+            dst[x] = t;
+            m |= t;
+        }
+        if (m & ~255)
+            for (x = 0; x < w; x++)
+                dst[x] = av_clip_uint8(src[x] + 128);
         src += pitch;
         dst += dst_pitch;
     }

diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c
index baaf474..e91d932 100644
--- a/libavcodec/j2kenc.c
+++ b/libavcodec/j2kenc.c

@@ -69,7 +69,9 @@
 #include "bytestream.h"
 #include "jpeg2000.h"
 #include "libavutil/common.h"
+#include "libavutil/pixdesc.h"
 #include "libavutil/opt.h"
+#include "libavutil/intreadwrite.h"
 
 #define NMSEDEC_BITS 7
 #define NMSEDEC_FRACBITS (NMSEDEC_BITS-1)
@@ -662,7 +664,8 @@
         bpno = cblk->nonzerobits - 1;
     }
 
-    ff_mqc_initenc(&t1->mqc, cblk->data);
+    cblk->data[0] = 0;
+    ff_mqc_initenc(&t1->mqc, cblk->data + 1);
 
     for (passno = 0; bpno >= 0; passno++){
         nmsedec=0;
@@ -796,7 +799,7 @@
                 if (cblk->ninclpasses){
                     if (s->buf_end - s->buf < cblk->passes[cblk->ninclpasses-1].rate)
                         return -1;
-                    bytestream_put_buffer(&s->buf, cblk->data,   cblk->passes[cblk->ninclpasses-1].rate
+                    bytestream_put_buffer(&s->buf, cblk->data + 1,   cblk->passes[cblk->ninclpasses-1].rate
                                                                - cblk->passes[cblk->ninclpasses-1].flushed_len);
                     bytestream_put_buffer(&s->buf, cblk->passes[cblk->ninclpasses-1].flushed,
                                                    cblk->passes[cblk->ninclpasses-1].flushed_len);
@@ -937,6 +940,12 @@
                                 }
                             }
                         }
+                        if (!prec->cblk[cblkno].data)
+                            prec->cblk[cblkno].data = av_malloc(1 + 8192);
+                        if (!prec->cblk[cblkno].passes)
+                            prec->cblk[cblkno].passes = av_malloc_array(JPEG2000_MAX_PASSES, sizeof (*prec->cblk[cblkno].passes));
+                        if (!prec->cblk[cblkno].data || !prec->cblk[cblkno].passes)
+                            return AVERROR(ENOMEM);
                         encode_cblk(s, &t1, prec->cblk + cblkno, tile, xx1 - xx0, yy1 - yy0,
                                     bandpos, codsty->nreslevels - reslevelno - 1);
                         xx0 = xx1;
@@ -1046,14 +1055,38 @@
         bytestream_put_byte(&s->buf, 1);
         bytestream_put_byte(&s->buf, 0);
         bytestream_put_byte(&s->buf, 0);
-        if (s->ncomponents == 1) {
-            bytestream_put_be32(&s->buf, 17);
-        } else if (avctx->pix_fmt == AV_PIX_FMT_RGB24) {
+        if (avctx->pix_fmt == AV_PIX_FMT_RGB24 || avctx->pix_fmt == AV_PIX_FMT_PAL8) {
             bytestream_put_be32(&s->buf, 16);
+        } else if (s->ncomponents == 1) {
+            bytestream_put_be32(&s->buf, 17);
         } else {
             bytestream_put_be32(&s->buf, 18);
         }
         update_size(chunkstart, s->buf);
+        if (avctx->pix_fmt == AV_PIX_FMT_PAL8) {
+            int i;
+            uint8_t *palette = pict->data[1];
+            chunkstart = s->buf;
+            bytestream_put_be32(&s->buf, 0);
+            bytestream_put_buffer(&s->buf, "pclr", 4);
+            bytestream_put_be16(&s->buf, AVPALETTE_COUNT);
+            bytestream_put_byte(&s->buf, 3); // colour channels
+            bytestream_put_be24(&s->buf, 0x070707); //colour depths
+            for (i = 0; i < AVPALETTE_COUNT; i++) {
+                bytestream_put_be24(&s->buf, HAVE_BIGENDIAN ? AV_RB24(palette + 1) : AV_RL24(palette));
+                palette += 4;
+            }
+            update_size(chunkstart, s->buf);
+            chunkstart = s->buf;
+            bytestream_put_be32(&s->buf, 0);
+            bytestream_put_buffer(&s->buf, "cmap", 4);
+            for (i = 0; i < 3; i++) {
+                bytestream_put_be16(&s->buf, 0); // component
+                bytestream_put_byte(&s->buf, 1); // palette mapping
+                bytestream_put_byte(&s->buf, i); // index
+            }
+            update_size(chunkstart, s->buf);
+        }
         update_size(jp2hstart, s->buf);
 
         jp2cstart = s->buf;
@@ -1116,6 +1149,12 @@
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
+    if (avctx->pix_fmt == AV_PIX_FMT_PAL8 && (s->pred != FF_DWT97_INT || s->format != CODEC_JP2)) {
+        av_log(s->avctx, AV_LOG_WARNING, "Forcing lossless jp2 for pal8\n");
+        s->pred = FF_DWT97_INT;
+        s->format = CODEC_JP2;
+    }
+
     // defaults:
     // TODO: implement setting non-standard precinct size
     memset(codsty->log2_prec_widths , 15, sizeof(codsty->log2_prec_widths ));
@@ -1146,13 +1185,15 @@
 
     if (avctx->pix_fmt == AV_PIX_FMT_RGB24){
         s->ncomponents = 3;
-    } else if (avctx->pix_fmt == AV_PIX_FMT_GRAY8){
+    } else if (avctx->pix_fmt == AV_PIX_FMT_GRAY8 || avctx->pix_fmt == AV_PIX_FMT_PAL8){
         s->ncomponents = 1;
     } else{ // planar YUV
         s->planar = 1;
         s->ncomponents = 3;
-        avcodec_get_chroma_sub_sample(avctx->pix_fmt,
-                s->chroma_shift, s->chroma_shift + 1);
+        ret = av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt,
+                                               s->chroma_shift, s->chroma_shift + 1);
+        if (ret)
+            return ret;
     }
 
     ff_jpeg2000_init_tier1_luts();
@@ -1213,6 +1254,7 @@
         AV_PIX_FMT_RGB24, AV_PIX_FMT_YUV444P, AV_PIX_FMT_GRAY8,
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
         AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_PAL8,
         AV_PIX_FMT_NONE
     },
     .priv_class     = &j2k_class,

diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 026b2db..8e90980 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c

@@ -357,10 +357,8 @@
                                  comp->reslevel[reslevelno-1].coord[1][0];
         }
 
-        cblk->zero      = 0;
         cblk->lblock    = 3;
         cblk->length    = 0;
-        memset(cblk->lengthinc, 0, sizeof(cblk->lengthinc));
         cblk->npasses   = 0;
     }
 
@@ -601,9 +599,21 @@
             for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++) {
                 if (band->prec) {
                     Jpeg2000Prec *prec = band->prec + precno;
+                    int nb_code_blocks = prec->nb_codeblocks_height * prec->nb_codeblocks_width;
+
                     av_freep(&prec->zerobits);
                     av_freep(&prec->cblkincl);
-                    av_freep(&prec->cblk);
+                    if (prec->cblk) {
+                        int cblkno;
+                        for (cblkno = 0; cblkno < nb_code_blocks; cblkno ++) {
+                            Jpeg2000Cblk *cblk = &prec->cblk[cblkno];
+                            av_freep(&cblk->data);
+                            av_freep(&cblk->passes);
+                            av_freep(&cblk->lengthinc);
+                            av_freep(&cblk->data_start);
+                        }
+                        av_freep(&prec->cblk);
+                    }
                 }
             }
 

diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h
index 8a022ad..c429ca5 100644
--- a/libavcodec/jpeg2000.h
+++ b/libavcodec/jpeg2000.h

@@ -165,15 +165,15 @@
     uint8_t ninclpasses; // number coding of passes included in codestream
     uint8_t nonzerobits;
     uint16_t length;
-    uint16_t lengthinc[JPEG2000_MAX_PASSES];
+    uint16_t *lengthinc;
     uint8_t nb_lengthinc;
     uint8_t lblock;
-    uint8_t zero;
-    uint8_t data[8192];
+    uint8_t *data;
+    size_t data_allocated;
     int nb_terminations;
     int nb_terminationsinc;
-    int data_start[JPEG2000_MAX_PASSES];
-    Jpeg2000Pass passes[JPEG2000_MAX_PASSES];
+    int *data_start;
+    Jpeg2000Pass *passes;
     int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
 } Jpeg2000Cblk; // code block
 

diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 9a5e64e..96dab8e 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c

@@ -34,6 +34,7 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
+#include "libavutil/thread.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
@@ -81,7 +82,7 @@
     Jpeg2000CodingStyle codsty[4];
     Jpeg2000QuantStyle  qntsty[4];
     Jpeg2000POC         poc;
-    Jpeg2000TilePart    tile_part[256];
+    Jpeg2000TilePart    tile_part[32];
     uint16_t tp_idx;                    // Tile-part index
     int coord[2][2];                    // border coordinates {{x0, x1}, {y0, y1}}
 } Jpeg2000Tile;
@@ -284,7 +285,7 @@
         avpriv_request_sample(s->avctx, "Support for image offsets");
         return AVERROR_PATCHWELCOME;
     }
-    if (av_image_check_size(s->width, s->height, 0, s->avctx)) {
+    if (av_image_check_size2(s->width, s->height, s->avctx->max_pixels, AV_PIX_FMT_NONE, 0, s->avctx)) {
         avpriv_request_sample(s->avctx, "Large Dimensions");
         return AVERROR_PATCHWELCOME;
     }
@@ -342,7 +343,10 @@
     s->numXtiles = ff_jpeg2000_ceildiv(s->width  - s->tile_offset_x, s->tile_width);
     s->numYtiles = ff_jpeg2000_ceildiv(s->height - s->tile_offset_y, s->tile_height);
 
-    if (s->numXtiles * (uint64_t)s->numYtiles > INT_MAX/sizeof(*s->tile)) {
+    // There must be at least a SOT and SOD per tile, their minimum size is 14
+    if (s->numXtiles * (uint64_t)s->numYtiles > INT_MAX/sizeof(*s->tile) ||
+        s->numXtiles * s->numYtiles * 14LL > bytestream2_size(&s->g)
+    ) {
         s->numXtiles = s->numYtiles = 0;
         return AVERROR(EINVAL);
     }
@@ -760,7 +764,10 @@
         return AVERROR_INVALIDDATA;
     }
 
-    av_assert0(TPsot < FF_ARRAY_ELEMS(s->tile[Isot].tile_part));
+    if (TPsot >= FF_ARRAY_ELEMS(s->tile[Isot].tile_part)) {
+        avpriv_request_sample(s->avctx, "Too many tile parts");
+        return AVERROR_PATCHWELCOME;
+    }
 
     s->tile[Isot].tp_idx = TPsot;
     tp             = s->tile[Isot].tile_part + TPsot;
@@ -949,6 +956,7 @@
         for (cblkno = 0; cblkno < nb_code_blocks; cblkno++) {
             Jpeg2000Cblk *cblk = prec->cblk + cblkno;
             int incl, newpasses, llen;
+            void *tmp;
 
             if (cblk->npasses)
                 incl = get_bits(s, 1);
@@ -988,6 +996,14 @@
 
             cblk->nb_lengthinc = 0;
             cblk->nb_terminationsinc = 0;
+            av_free(cblk->lengthinc);
+            cblk->lengthinc  = av_mallocz_array(newpasses    , sizeof(*cblk->lengthinc));
+            if (!cblk->lengthinc)
+                return AVERROR(ENOMEM);
+            tmp = av_realloc_array(cblk->data_start, cblk->nb_terminations + newpasses + 1, sizeof(*cblk->data_start));
+            if (!tmp)
+                return AVERROR(ENOMEM);
+            cblk->data_start = tmp;
             do {
                 int newpasses1 = 0;
 
@@ -1001,10 +1017,18 @@
 
                 if ((ret = get_bits(s, av_log2(newpasses1) + cblk->lblock)) < 0)
                     return ret;
-                if (ret > sizeof(cblk->data)) {
+                if (ret > cblk->data_allocated) {
+                    size_t new_size = FFMAX(2*cblk->data_allocated, ret);
+                    void *new = av_realloc(cblk->data, new_size);
+                    if (new) {
+                        cblk->data = new;
+                        cblk->data_allocated = new_size;
+                    }
+                }
+                if (ret > cblk->data_allocated) {
                     avpriv_request_sample(s->avctx,
                                         "Block with lengthinc greater than %"SIZE_SPECIFIER"",
-                                        sizeof(cblk->data));
+                                        cblk->data_allocated);
                     return AVERROR_PATCHWELCOME;
                 }
                 cblk->lengthinc[cblk->nb_lengthinc++] = ret;
@@ -1029,9 +1053,19 @@
         nb_code_blocks = prec->nb_codeblocks_height * prec->nb_codeblocks_width;
         for (cblkno = 0; cblkno < nb_code_blocks; cblkno++) {
             Jpeg2000Cblk *cblk = prec->cblk + cblkno;
+            if (!cblk->nb_terminationsinc && !cblk->lengthinc)
+                continue;
             for (cwsno = 0; cwsno < cblk->nb_lengthinc; cwsno ++) {
+                if (cblk->data_allocated < cblk->length + cblk->lengthinc[cwsno] + 4) {
+                    size_t new_size = FFMAX(2*cblk->data_allocated, cblk->length + cblk->lengthinc[cwsno] + 4);
+                    void *new = av_realloc(cblk->data, new_size);
+                    if (new) {
+                        cblk->data = new;
+                        cblk->data_allocated = new_size;
+                    }
+                }
                 if (   bytestream2_get_bytes_left(&s->g) < cblk->lengthinc[cwsno]
-                    || sizeof(cblk->data) < cblk->length + cblk->lengthinc[cwsno] + 4
+                    || cblk->data_allocated < cblk->length + cblk->lengthinc[cwsno] + 4
                 ) {
                     av_log(s->avctx, AV_LOG_ERROR,
                         "Block length %"PRIu16" or lengthinc %d is too large, left %d\n",
@@ -1050,6 +1084,7 @@
                     cblk->data_start[cblk->nb_terminations] = cblk->length;
                 }
             }
+            av_freep(&cblk->lengthinc);
         }
     }
     return 0;
@@ -1127,6 +1162,9 @@
             step_x = 32;
             step_y = 32;
 
+            if (RSpoc > FFMIN(codsty->nreslevels, REpoc))
+                continue;
+
             for (reslevelno = RSpoc; reslevelno < FFMIN(codsty->nreslevels, REpoc); reslevelno++) {
                 uint8_t reducedresno = codsty->nreslevels - 1 -reslevelno; //  ==> N_L - r
                 Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
@@ -1146,10 +1184,10 @@
                         int xc = x / s->cdx[compno];
                         int yc = y / s->cdy[compno];
 
-                        if (yc % (1 << (rlevel->log2_prec_height + reducedresno)) && y != tile->coord[1][0]) //FIXME this is a subset of the check
+                        if (yc % (1LL << (rlevel->log2_prec_height + reducedresno)) && y != tile->coord[1][0]) //FIXME this is a subset of the check
                             continue;
 
-                        if (xc % (1 << (rlevel->log2_prec_width + reducedresno)) && x != tile->coord[0][0]) //FIXME this is a subset of the check
+                        if (xc % (1LL << (rlevel->log2_prec_width + reducedresno)) && x != tile->coord[0][0]) //FIXME this is a subset of the check
                             continue;
 
                         // check if a precinct exists
@@ -1216,10 +1254,10 @@
                         if (reslevelno >= codsty->nreslevels)
                             continue;
 
-                        if (yc % (1 << (rlevel->log2_prec_height + reducedresno)) && y != tile->coord[1][0]) //FIXME this is a subset of the check
+                        if (yc % (1LL << (rlevel->log2_prec_height + reducedresno)) && y != tile->coord[1][0]) //FIXME this is a subset of the check
                             continue;
 
-                        if (xc % (1 << (rlevel->log2_prec_width + reducedresno)) && x != tile->coord[0][0]) //FIXME this is a subset of the check
+                        if (xc % (1LL << (rlevel->log2_prec_width + reducedresno)) && x != tile->coord[0][0]) //FIXME this is a subset of the check
                             continue;
 
                         // check if a precinct exists
@@ -1287,10 +1325,10 @@
                         uint8_t reducedresno = codsty->nreslevels - 1 -reslevelno; //  ==> N_L - r
                         Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
 
-                        if (yc % (1 << (rlevel->log2_prec_height + reducedresno)) && y != tile->coord[1][0]) //FIXME this is a subset of the check
+                        if (yc % (1LL << (rlevel->log2_prec_height + reducedresno)) && y != tile->coord[1][0]) //FIXME this is a subset of the check
                             continue;
 
-                        if (xc % (1 << (rlevel->log2_prec_width + reducedresno)) && x != tile->coord[0][0]) //FIXME this is a subset of the check
+                        if (xc % (1LL << (rlevel->log2_prec_width + reducedresno)) && x != tile->coord[0][0]) //FIXME this is a subset of the check
                             continue;
 
                         // check if a precinct exists
@@ -1981,7 +2019,16 @@
            bytestream2_get_bytes_left(&s->g) >= 8) {
         atom_size = bytestream2_get_be32u(&s->g);
         atom      = bytestream2_get_be32u(&s->g);
-        atom_end  = bytestream2_tell(&s->g) + atom_size - 8;
+        if (atom_size == 1) {
+            if (bytestream2_get_be32u(&s->g)) {
+                avpriv_request_sample(s->avctx, "Huge atom");
+                return 0;
+            }
+            atom_size = bytestream2_get_be32u(&s->g);
+            atom_end  = bytestream2_tell(&s->g) + atom_size - 16;
+        } else {
+            atom_end  = bytestream2_tell(&s->g) + atom_size -  8;
+        }
 
         if (atom == JP2_CODESTREAM)
             return 1;
@@ -2009,7 +2056,6 @@
                     }
                 } else if (atom2 == MKBETAG('p','c','l','r') && atom2_size >= 6) {
                     int i, size, colour_count, colour_channels, colour_depth[3];
-                    uint32_t r, g, b;
                     colour_count = bytestream2_get_be16u(&s->g);
                     colour_channels = bytestream2_get_byteu(&s->g);
                     // FIXME: Do not ignore channel_sign
@@ -2019,7 +2065,7 @@
                     size = (colour_depth[0] + 7 >> 3) * colour_count +
                            (colour_depth[1] + 7 >> 3) * colour_count +
                            (colour_depth[2] + 7 >> 3) * colour_count;
-                    if (colour_count > 256   ||
+                    if (colour_count > AVPALETTE_COUNT ||
                         colour_channels != 3 ||
                         colour_depth[0] > 16 ||
                         colour_depth[1] > 16 ||
@@ -2031,6 +2077,7 @@
                     }
                     s->pal8 = 1;
                     for (i = 0; i < colour_count; i++) {
+                        uint32_t r, g, b;
                         if (colour_depth[0] <= 8) {
                             r = bytestream2_get_byteu(&s->g) << 8 - colour_depth[0];
                             r |= r >> colour_depth[0];
@@ -2039,13 +2086,13 @@
                         }
                         if (colour_depth[1] <= 8) {
                             g = bytestream2_get_byteu(&s->g) << 8 - colour_depth[1];
-                            r |= r >> colour_depth[1];
+                            g |= g >> colour_depth[1];
                         } else {
                             g = bytestream2_get_be16u(&s->g) >> colour_depth[1] - 8;
                         }
                         if (colour_depth[2] <= 8) {
                             b = bytestream2_get_byteu(&s->g) << 8 - colour_depth[2];
-                            r |= r >> colour_depth[2];
+                            b |= b >> colour_depth[2];
                         } else {
                             b = bytestream2_get_be16u(&s->g) >> colour_depth[2] - 8;
                         }
@@ -2105,10 +2152,18 @@
     return 0;
 }
 
+static av_cold void jpeg2000_init_static_data(void)
+{
+    ff_jpeg2000_init_tier1_luts();
+    ff_mqc_init_context_tables();
+}
+
 static av_cold int jpeg2000_decode_init(AVCodecContext *avctx)
 {
+    static AVOnce init_static_once = AV_ONCE_INIT;
     Jpeg2000DecoderContext *s = avctx->priv_data;
 
+    ff_thread_once(&init_static_once, jpeg2000_init_static_data);
     ff_jpeg2000dsp_init(&s->dsp);
 
     return 0;
@@ -2186,12 +2241,6 @@
     return ret;
 }
 
-static av_cold void jpeg2000_init_static_data(AVCodec *codec)
-{
-    ff_jpeg2000_init_tier1_luts();
-    ff_mqc_init_context_tables();
-}
-
 #define OFFSET(x) offsetof(Jpeg2000DecoderContext, x)
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 
@@ -2215,7 +2264,6 @@
     .id               = AV_CODEC_ID_JPEG2000,
     .capabilities     = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_DR1,
     .priv_data_size   = sizeof(Jpeg2000DecoderContext),
-    .init_static_data = jpeg2000_init_static_data,
     .init             = jpeg2000_decode_init,
     .decode           = jpeg2000_decode_frame,
     .priv_class       = &jpeg2000_class,

diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c
index 55dd5e8..ce1678a 100644
--- a/libavcodec/jpeg2000dwt.c
+++ b/libavcodec/jpeg2000dwt.c

@@ -305,22 +305,22 @@
         t[i] = (t[i] + ((1<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
 }
 
-static void sr_1d53(int *p, int i0, int i1)
+static void sr_1d53(unsigned *p, int i0, int i1)
 {
     int i;
 
     if (i1 <= i0 + 1) {
         if (i0 == 1)
-            p[1] >>= 1;
+            p[1] = (int)p[1] >> 1;
         return;
     }
 
     extend53(p, i0, i1);
 
     for (i = (i0 >> 1); i < (i1 >> 1) + 1; i++)
-        p[2 * i] -= (p[2 * i - 1] + p[2 * i + 1] + 2) >> 2;
+        p[2 * i] -= (int)(p[2 * i - 1] + p[2 * i + 1] + 2) >> 2;
     for (i = (i0 >> 1); i < (i1 >> 1); i++)
-        p[2 * i + 1] += (p[2 * i] + p[2 * i + 2]) >> 1;
+        p[2 * i + 1] += (int)(p[2 * i] + p[2 * i + 2]) >> 1;
 }
 
 static void dwt_decode53(DWTContext *s, int *t)

diff --git a/libavcodec/jpegls.h b/libavcodec/jpegls.h
index c8997c7..6b89b2a 100644
--- a/libavcodec/jpegls.h
+++ b/libavcodec/jpegls.h

@@ -32,6 +32,8 @@
 #include "avcodec.h"
 #include "internal.h"
 
+#undef near /* This file uses struct member 'near' which in windows.h is defined as empty. */
+
 typedef struct JpeglsContext {
     AVCodecContext *avctx;
 } JpeglsContext;

diff --git a/libavcodec/jpegtables.h b/libavcodec/jpegtables.h
index 6833b4b..aa38df4 100644
--- a/libavcodec/jpegtables.h
+++ b/libavcodec/jpegtables.h

@@ -23,18 +23,18 @@
 
 #include <stdint.h>
 
-#include "libavutil/internal.h"
+#include "internal.h"
 
-extern av_export const uint8_t avpriv_mjpeg_bits_dc_luminance[];
-extern av_export const uint8_t avpriv_mjpeg_val_dc[];
+extern av_export_avcodec const uint8_t avpriv_mjpeg_bits_dc_luminance[];
+extern av_export_avcodec const uint8_t avpriv_mjpeg_val_dc[];
 
-extern av_export const uint8_t avpriv_mjpeg_bits_dc_chrominance[];
+extern av_export_avcodec const uint8_t avpriv_mjpeg_bits_dc_chrominance[];
 
-extern av_export const uint8_t avpriv_mjpeg_bits_ac_luminance[];
-extern av_export const uint8_t avpriv_mjpeg_val_ac_luminance[];
+extern av_export_avcodec const uint8_t avpriv_mjpeg_bits_ac_luminance[];
+extern av_export_avcodec const uint8_t avpriv_mjpeg_val_ac_luminance[];
 
-extern av_export const uint8_t avpriv_mjpeg_bits_ac_chrominance[];
-extern av_export const uint8_t avpriv_mjpeg_val_ac_chrominance[];
+extern av_export_avcodec const uint8_t avpriv_mjpeg_bits_ac_chrominance[];
+extern av_export_avcodec const uint8_t avpriv_mjpeg_val_ac_chrominance[];
 
 void ff_mjpeg_build_huffman_codes(uint8_t *huff_size, uint16_t *huff_code,
                                   const uint8_t *bits_table,

diff --git a/libavcodec/lagarith.c b/libavcodec/lagarith.c
index 0f4aa89..5763504 100644
--- a/libavcodec/lagarith.c
+++ b/libavcodec/lagarith.c

@@ -53,9 +53,6 @@
     LLVidDSPContext llviddsp;
     int zeros;                  /**< number of consecutive zero bytes encountered */
     int zeros_rem;              /**< number of zero bytes remaining to output */
-    uint8_t *rgb_planes;
-    int      rgb_planes_allocated;
-    int rgb_stride;
 } LagarithContext;
 
 /**
@@ -141,6 +138,7 @@
     unsigned prob, cumulative_target;
     unsigned cumul_prob = 0;
     unsigned scaled_cumul_prob = 0;
+    int nnz = 0;
 
     rac->prob[0] = 0;
     rac->prob[257] = UINT_MAX;
@@ -164,6 +162,8 @@
                 prob = 256 - i;
             for (j = 0; j < prob; j++)
                 rac->prob[++i] = 0;
+        }else {
+            nnz++;
         }
     }
 
@@ -172,6 +172,10 @@
         return -1;
     }
 
+    if (nnz == 1 && (show_bits_long(gb, 32) & 0xFFFFFF)) {
+        return AVERROR_INVALIDDATA;
+    }
+
     /* Scale probabilities so cumulative probability is an even power of 2. */
     scale_factor = av_log2(cumul_prob);
 
@@ -534,14 +538,15 @@
     LagarithContext *l = avctx->priv_data;
     ThreadFrame frame = { .f = data };
     AVFrame *const p  = data;
-    uint8_t frametype = 0;
+    uint8_t frametype;
     uint32_t offset_gu = 0, offset_bv = 0, offset_ry = 9;
     uint32_t offs[4];
-    uint8_t *srcs[4], *dst;
+    uint8_t *srcs[4];
     int i, j, planes = 3;
     int ret;
 
     p->key_frame = 1;
+    p->pict_type = AV_PICTURE_TYPE_I;
 
     frametype = buf[0];
 
@@ -550,64 +555,60 @@
 
     switch (frametype) {
     case FRAME_SOLID_RGBA:
-        avctx->pix_fmt = AV_PIX_FMT_RGB32;
+        avctx->pix_fmt = AV_PIX_FMT_GBRAP;
     case FRAME_SOLID_GRAY:
         if (frametype == FRAME_SOLID_GRAY)
             if (avctx->bits_per_coded_sample == 24) {
-                avctx->pix_fmt = AV_PIX_FMT_RGB24;
+                avctx->pix_fmt = AV_PIX_FMT_GBRP;
             } else {
-                avctx->pix_fmt = AV_PIX_FMT_0RGB32;
+                avctx->pix_fmt = AV_PIX_FMT_GBRAP;
                 planes = 4;
             }
 
         if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
             return ret;
 
-        dst = p->data[0];
         if (frametype == FRAME_SOLID_RGBA) {
-        for (j = 0; j < avctx->height; j++) {
-            for (i = 0; i < avctx->width; i++)
-                AV_WN32(dst + i * 4, offset_gu);
-            dst += p->linesize[0];
-        }
+            for (i = 0; i < avctx->height; i++) {
+                memset(p->data[0] + i * p->linesize[0], buf[2], avctx->width);
+                memset(p->data[1] + i * p->linesize[1], buf[1], avctx->width);
+                memset(p->data[2] + i * p->linesize[2], buf[3], avctx->width);
+                memset(p->data[3] + i * p->linesize[3], buf[4], avctx->width);
+            }
         } else {
-            for (j = 0; j < avctx->height; j++) {
-                memset(dst, buf[1], avctx->width * planes);
-                dst += p->linesize[0];
+            for (i = 0; i < avctx->height; i++) {
+                for (j = 0; j < planes; j++)
+                    memset(p->data[j] + i * p->linesize[j], buf[1], avctx->width);
             }
         }
         break;
     case FRAME_SOLID_COLOR:
         if (avctx->bits_per_coded_sample == 24) {
-            avctx->pix_fmt = AV_PIX_FMT_RGB24;
+            avctx->pix_fmt = AV_PIX_FMT_GBRP;
         } else {
-            avctx->pix_fmt = AV_PIX_FMT_RGB32;
-            offset_gu |= 0xFFU << 24;
+            avctx->pix_fmt = AV_PIX_FMT_GBRAP;
         }
 
         if ((ret = ff_thread_get_buffer(avctx, &frame,0)) < 0)
             return ret;
 
-        dst = p->data[0];
-        for (j = 0; j < avctx->height; j++) {
-            for (i = 0; i < avctx->width; i++)
-                if (avctx->bits_per_coded_sample == 24) {
-                    AV_WB24(dst + i * 3, offset_gu);
-                } else {
-                    AV_WN32(dst + i * 4, offset_gu);
-                }
-            dst += p->linesize[0];
+        for (i = 0; i < avctx->height; i++) {
+            memset(p->data[0] + i * p->linesize[0], buf[2], avctx->width);
+            memset(p->data[1] + i * p->linesize[1], buf[1], avctx->width);
+            memset(p->data[2] + i * p->linesize[2], buf[3], avctx->width);
+            if (avctx->pix_fmt == AV_PIX_FMT_GBRAP)
+                memset(p->data[3] + i * p->linesize[3], 0xFFu, avctx->width);
         }
         break;
     case FRAME_ARITH_RGBA:
-        avctx->pix_fmt = AV_PIX_FMT_RGB32;
+        avctx->pix_fmt = AV_PIX_FMT_GBRAP;
         planes = 4;
         offset_ry += 4;
         offs[3] = AV_RL32(buf + 9);
     case FRAME_ARITH_RGB24:
     case FRAME_U_RGB24:
         if (frametype == FRAME_ARITH_RGB24 || frametype == FRAME_U_RGB24)
-            avctx->pix_fmt = AV_PIX_FMT_RGB24;
+            avctx->pix_fmt = AV_PIX_FMT_GBRP;
 
         if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
             return ret;
@@ -616,15 +617,8 @@
         offs[1] = offset_gu;
         offs[2] = offset_ry;
 
-        l->rgb_stride = FFALIGN(avctx->width, 16);
-        av_fast_malloc(&l->rgb_planes, &l->rgb_planes_allocated,
-                       l->rgb_stride * avctx->height * planes + 1);
-        if (!l->rgb_planes) {
-            av_log(avctx, AV_LOG_ERROR, "cannot allocate temporary buffer\n");
-            return AVERROR(ENOMEM);
-        }
         for (i = 0; i < planes; i++)
-            srcs[i] = l->rgb_planes + (i + 1) * l->rgb_stride * avctx->height - l->rgb_stride;
+            srcs[i] = p->data[i] + (avctx->height - 1) * p->linesize[i];
         for (i = 0; i < planes; i++)
             if (buf_size <= offs[i]) {
                 av_log(avctx, AV_LOG_ERROR,
@@ -635,32 +629,16 @@
         for (i = 0; i < planes; i++)
             lag_decode_arith_plane(l, srcs[i],
                                    avctx->width, avctx->height,
-                                   -l->rgb_stride, buf + offs[i],
+                                   -p->linesize[i], buf + offs[i],
                                    buf_size - offs[i]);
-        dst = p->data[0];
-        for (i = 0; i < planes; i++)
-            srcs[i] = l->rgb_planes + i * l->rgb_stride * avctx->height;
-        for (j = 0; j < avctx->height; j++) {
-            for (i = 0; i < avctx->width; i++) {
-                uint8_t r, g, b, a;
-                r = srcs[0][i];
-                g = srcs[1][i];
-                b = srcs[2][i];
-                r += g;
-                b += g;
-                if (frametype == FRAME_ARITH_RGBA) {
-                    a = srcs[3][i];
-                    AV_WN32(dst + i * 4, MKBETAG(a, r, g, b));
-                } else {
-                    dst[i * 3 + 0] = r;
-                    dst[i * 3 + 1] = g;
-                    dst[i * 3 + 2] = b;
-                }
-            }
-            dst += p->linesize[0];
-            for (i = 0; i < planes; i++)
-                srcs[i] += l->rgb_stride;
+        for (i = 0; i < avctx->height; i++) {
+            l->llviddsp.add_bytes(p->data[0] + i * p->linesize[0], p->data[1] + i * p->linesize[1], avctx->width);
+            l->llviddsp.add_bytes(p->data[2] + i * p->linesize[2], p->data[1] + i * p->linesize[1], avctx->width);
         }
+        FFSWAP(uint8_t*, p->data[0], p->data[1]);
+        FFSWAP(int, p->linesize[0], p->linesize[1]);
+        FFSWAP(uint8_t*, p->data[2], p->data[1]);
+        FFSWAP(int, p->linesize[2], p->linesize[1]);
         break;
     case FRAME_ARITH_YUY2:
         avctx->pix_fmt = AV_PIX_FMT_YUV422P;
@@ -744,15 +722,6 @@
 }
 #endif
 
-static av_cold int lag_decode_end(AVCodecContext *avctx)
-{
-    LagarithContext *l = avctx->priv_data;
-
-    av_freep(&l->rgb_planes);
-
-    return 0;
-}
-
 AVCodec ff_lagarith_decoder = {
     .name           = "lagarith",
     .long_name      = NULL_IF_CONFIG_SMALL("Lagarith lossless"),
@@ -761,7 +730,6 @@
     .priv_data_size = sizeof(LagarithContext),
     .init           = lag_decode_init,
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(lag_decode_init_thread_copy),
-    .close          = lag_decode_end,
     .decode         = lag_decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
 };

diff --git a/libavcodec/libaomdec.c b/libavcodec/libaomdec.c
new file mode 100644
index 0000000..a72ac98
--- /dev/null
+++ b/libavcodec/libaomdec.c

@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2010, Google, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AV1 decoder support via libaom
+ */
+
+#include <aom/aom_decoder.h>
+#include <aom/aomdx.h>
+
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+
+#include "avcodec.h"
+#include "internal.h"
+#include "profiles.h"
+
+typedef struct AV1DecodeContext {
+    struct aom_codec_ctx decoder;
+} AV1DecodeContext;
+
+static av_cold int aom_init(AVCodecContext *avctx,
+                            const struct aom_codec_iface *iface)
+{
+    AV1DecodeContext *ctx           = avctx->priv_data;
+    struct aom_codec_dec_cfg deccfg = {
+        .threads = FFMIN(avctx->thread_count ? avctx->thread_count : av_cpu_count(), 16)
+    };
+
+    av_log(avctx, AV_LOG_INFO, "%s\n", aom_codec_version_str());
+    av_log(avctx, AV_LOG_VERBOSE, "%s\n", aom_codec_build_config());
+
+    if (aom_codec_dec_init(&ctx->decoder, iface, &deccfg, 0) != AOM_CODEC_OK) {
+        const char *error = aom_codec_error(&ctx->decoder);
+        av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder: %s\n",
+               error);
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+static void image_copy_16_to_8(AVFrame *pic, struct aom_image *img)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pic->format);
+    int i;
+
+    for (i = 0; i < desc->nb_components; i++) {
+        int w = img->d_w;
+        int h = img->d_h;
+        int x, y;
+
+        if (i) {
+            w = (w + img->x_chroma_shift) >> img->x_chroma_shift;
+            h = (h + img->y_chroma_shift) >> img->y_chroma_shift;
+        }
+
+        for (y = 0; y < h; y++) {
+            uint16_t *src = (uint16_t *)(img->planes[i] + y * img->stride[i]);
+            uint8_t *dst = pic->data[i] + y * pic->linesize[i];
+            for (x = 0; x < w; x++)
+                *dst++ = *src++;
+        }
+    }
+}
+
+// returns 0 on success, AVERROR_INVALIDDATA otherwise
+static int set_pix_fmt(AVCodecContext *avctx, struct aom_image *img)
+{
+    static const enum AVColorRange color_ranges[] = {
+        AVCOL_RANGE_MPEG, AVCOL_RANGE_JPEG
+    };
+    avctx->color_range = color_ranges[img->range];
+    avctx->color_primaries = img->cp;
+    avctx->colorspace  = img->mc;
+    avctx->color_trc   = img->tc;
+
+    switch (img->fmt) {
+    case AOM_IMG_FMT_I420:
+    case AOM_IMG_FMT_I42016:
+        if (img->bit_depth == 8) {
+            avctx->pix_fmt = img->monochrome ?
+                             AV_PIX_FMT_GRAY8 : AV_PIX_FMT_YUV420P;
+            avctx->profile = FF_PROFILE_AV1_MAIN;
+            return 0;
+        } else if (img->bit_depth == 10) {
+            avctx->pix_fmt = img->monochrome ?
+                             AV_PIX_FMT_GRAY10 : AV_PIX_FMT_YUV420P10;
+            avctx->profile = FF_PROFILE_AV1_MAIN;
+            return 0;
+        } else if (img->bit_depth == 12) {
+            avctx->pix_fmt = img->monochrome ?
+                             AV_PIX_FMT_GRAY12 : AV_PIX_FMT_YUV420P12;
+            avctx->profile = FF_PROFILE_AV1_PROFESSIONAL;
+            return 0;
+        } else {
+            return AVERROR_INVALIDDATA;
+        }
+    case AOM_IMG_FMT_I422:
+    case AOM_IMG_FMT_I42216:
+        if (img->bit_depth == 8) {
+            avctx->pix_fmt = AV_PIX_FMT_YUV422P;
+            avctx->profile = FF_PROFILE_AV1_PROFESSIONAL;
+            return 0;
+        } else if (img->bit_depth == 10) {
+            avctx->pix_fmt = AV_PIX_FMT_YUV422P10;
+            avctx->profile = FF_PROFILE_AV1_PROFESSIONAL;
+            return 0;
+        } else if (img->bit_depth == 12) {
+            avctx->pix_fmt = AV_PIX_FMT_YUV422P12;
+            avctx->profile = FF_PROFILE_AV1_PROFESSIONAL;
+            return 0;
+        } else {
+            return AVERROR_INVALIDDATA;
+        }
+    case AOM_IMG_FMT_I444:
+    case AOM_IMG_FMT_I44416:
+        if (img->bit_depth == 8) {
+            avctx->pix_fmt = AV_PIX_FMT_YUV444P;
+            avctx->profile = FF_PROFILE_AV1_HIGH;
+            return 0;
+        } else if (img->bit_depth == 10) {
+            avctx->pix_fmt = AV_PIX_FMT_YUV444P10;
+            avctx->profile = FF_PROFILE_AV1_HIGH;
+            return 0;
+        } else if (img->bit_depth == 12) {
+            avctx->pix_fmt = AV_PIX_FMT_YUV444P12;
+            avctx->profile = FF_PROFILE_AV1_PROFESSIONAL;
+            return 0;
+        } else {
+            return AVERROR_INVALIDDATA;
+        }
+
+    default:
+        return AVERROR_INVALIDDATA;
+    }
+}
+
+static int aom_decode(AVCodecContext *avctx, void *data, int *got_frame,
+                      AVPacket *avpkt)
+{
+    AV1DecodeContext *ctx = avctx->priv_data;
+    AVFrame *picture      = data;
+    const void *iter      = NULL;
+    struct aom_image *img;
+    int ret;
+
+    if (aom_codec_decode(&ctx->decoder, avpkt->data, avpkt->size, NULL) !=
+        AOM_CODEC_OK) {
+        const char *error  = aom_codec_error(&ctx->decoder);
+        const char *detail = aom_codec_error_detail(&ctx->decoder);
+
+        av_log(avctx, AV_LOG_ERROR, "Failed to decode frame: %s\n", error);
+        if (detail)
+            av_log(avctx, AV_LOG_ERROR, "  Additional information: %s\n",
+                   detail);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if ((img = aom_codec_get_frame(&ctx->decoder, &iter))) {
+        if (img->d_w > img->w || img->d_h > img->h) {
+            av_log(avctx, AV_LOG_ERROR, "Display dimensions %dx%d exceed storage %dx%d\n",
+                   img->d_w, img->d_h, img->w, img->h);
+            return AVERROR_EXTERNAL;
+        }
+
+        if ((ret = set_pix_fmt(avctx, img)) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Unsupported output colorspace (%d) / bit_depth (%d)\n",
+                   img->fmt, img->bit_depth);
+            return ret;
+        }
+
+        if ((int)img->d_w != avctx->width || (int)img->d_h != avctx->height) {
+            av_log(avctx, AV_LOG_INFO, "dimension change! %dx%d -> %dx%d\n",
+                   avctx->width, avctx->height, img->d_w, img->d_h);
+            ret = ff_set_dimensions(avctx, img->d_w, img->d_h);
+            if (ret < 0)
+                return ret;
+        }
+        if ((ret = ff_get_buffer(avctx, picture, 0)) < 0)
+            return ret;
+        if ((img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) && img->bit_depth == 8)
+            image_copy_16_to_8(picture, img);
+        else
+            av_image_copy(picture->data, picture->linesize, (const uint8_t **)img->planes,
+                          img->stride, avctx->pix_fmt, img->d_w, img->d_h);
+        *got_frame = 1;
+    }
+    return avpkt->size;
+}
+
+static av_cold int aom_free(AVCodecContext *avctx)
+{
+    AV1DecodeContext *ctx = avctx->priv_data;
+    aom_codec_destroy(&ctx->decoder);
+    return 0;
+}
+
+static av_cold int av1_init(AVCodecContext *avctx)
+{
+    return aom_init(avctx, &aom_codec_av1_dx_algo);
+}
+
+AVCodec ff_libaom_av1_decoder = {
+    .name           = "libaom-av1",
+    .long_name      = NULL_IF_CONFIG_SMALL("libaom AV1"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_AV1,
+    .priv_data_size = sizeof(AV1DecodeContext),
+    .init           = av1_init,
+    .close          = aom_free,
+    .decode         = aom_decode,
+    .capabilities   = AV_CODEC_CAP_AUTO_THREADS | AV_CODEC_CAP_DR1,
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_av1_profiles),
+    .wrapper_name   = "libaom",
+};

diff --git a/libavcodec/libaomenc.c b/libavcodec/libaomenc.c
new file mode 100644
index 0000000..045c519
--- /dev/null
+++ b/libavcodec/libaomenc.c

@@ -0,0 +1,832 @@
+/*
+ * Copyright (c) 2010, Google, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AV1 encoder support via libaom
+ */
+
+#define AOM_DISABLE_CTRL_TYPECHECKS 1
+#include <aom/aom_encoder.h>
+#include <aom/aomcx.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/base64.h"
+#include "libavutil/common.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avcodec.h"
+#include "internal.h"
+#include "profiles.h"
+
+/*
+ * Portion of struct aom_codec_cx_pkt from aom_encoder.h.
+ * One encoded frame returned from the library.
+ */
+struct FrameListData {
+    void *buf;                       /**< compressed data buffer */
+    size_t sz;                       /**< length of compressed data */
+    int64_t pts;                     /**< time stamp to show frame
+                                          (in timebase units) */
+    unsigned long duration;          /**< duration to show frame
+                                          (in timebase units) */
+    uint32_t flags;                  /**< flags for this frame */
+    uint64_t sse[4];
+    int have_sse;                    /**< true if we have pending sse[] */
+    uint64_t frame_number;
+    struct FrameListData *next;
+};
+
+typedef struct AOMEncoderContext {
+    AVClass *class;
+    AVBSFContext *bsf;
+    struct aom_codec_ctx encoder;
+    struct aom_image rawimg;
+    struct aom_fixed_buf twopass_stats;
+    struct FrameListData *coded_frame_list;
+    int cpu_used;
+    int auto_alt_ref;
+    int lag_in_frames;
+    int error_resilient;
+    int crf;
+    int static_thresh;
+    int drop_threshold;
+    int noise_sensitivity;
+    uint64_t sse[4];
+    int have_sse; /**< true if we have pending sse[] */
+    uint64_t frame_number;
+} AOMContext;
+
+static const char *const ctlidstr[] = {
+    [AOME_SET_CPUUSED]          = "AOME_SET_CPUUSED",
+    [AOME_SET_CQ_LEVEL]         = "AOME_SET_CQ_LEVEL",
+    [AOME_SET_ENABLEAUTOALTREF] = "AOME_SET_ENABLEAUTOALTREF",
+    [AOME_SET_STATIC_THRESHOLD] = "AOME_SET_STATIC_THRESHOLD",
+    [AV1E_SET_COLOR_RANGE]      = "AV1E_SET_COLOR_RANGE",
+    [AV1E_SET_COLOR_PRIMARIES]  = "AV1E_SET_COLOR_PRIMARIES",
+    [AV1E_SET_MATRIX_COEFFICIENTS] = "AV1E_SET_MATRIX_COEFFICIENTS",
+    [AV1E_SET_TRANSFER_CHARACTERISTICS] = "AV1E_SET_TRANSFER_CHARACTERISTICS",
+};
+
+static av_cold void log_encoder_error(AVCodecContext *avctx, const char *desc)
+{
+    AOMContext *ctx    = avctx->priv_data;
+    const char *error  = aom_codec_error(&ctx->encoder);
+    const char *detail = aom_codec_error_detail(&ctx->encoder);
+
+    av_log(avctx, AV_LOG_ERROR, "%s: %s\n", desc, error);
+    if (detail)
+        av_log(avctx, AV_LOG_ERROR, "  Additional information: %s\n", detail);
+}
+
+static av_cold void dump_enc_cfg(AVCodecContext *avctx,
+                                 const struct aom_codec_enc_cfg *cfg)
+{
+    int width = -30;
+    int level = AV_LOG_DEBUG;
+
+    av_log(avctx, level, "aom_codec_enc_cfg\n");
+    av_log(avctx, level, "generic settings\n"
+                         "  %*s%u\n  %*s%u\n  %*s%u\n  %*s%u\n  %*s%u\n"
+                         "  %*s%u\n  %*s%u\n"
+                         "  %*s{%u/%u}\n  %*s%u\n  %*s%d\n  %*s%u\n",
+           width, "g_usage:",           cfg->g_usage,
+           width, "g_threads:",         cfg->g_threads,
+           width, "g_profile:",         cfg->g_profile,
+           width, "g_w:",               cfg->g_w,
+           width, "g_h:",               cfg->g_h,
+           width, "g_bit_depth:",       cfg->g_bit_depth,
+           width, "g_input_bit_depth:", cfg->g_input_bit_depth,
+           width, "g_timebase:",        cfg->g_timebase.num, cfg->g_timebase.den,
+           width, "g_error_resilient:", cfg->g_error_resilient,
+           width, "g_pass:",            cfg->g_pass,
+           width, "g_lag_in_frames:",   cfg->g_lag_in_frames);
+    av_log(avctx, level, "rate control settings\n"
+                         "  %*s%u\n  %*s%d\n  %*s%p(%"SIZE_SPECIFIER")\n  %*s%u\n",
+           width, "rc_dropframe_thresh:", cfg->rc_dropframe_thresh,
+           width, "rc_end_usage:",        cfg->rc_end_usage,
+           width, "rc_twopass_stats_in:", cfg->rc_twopass_stats_in.buf, cfg->rc_twopass_stats_in.sz,
+           width, "rc_target_bitrate:",   cfg->rc_target_bitrate);
+    av_log(avctx, level, "quantizer settings\n"
+                         "  %*s%u\n  %*s%u\n",
+           width, "rc_min_quantizer:", cfg->rc_min_quantizer,
+           width, "rc_max_quantizer:", cfg->rc_max_quantizer);
+    av_log(avctx, level, "bitrate tolerance\n"
+                         "  %*s%u\n  %*s%u\n",
+           width, "rc_undershoot_pct:", cfg->rc_undershoot_pct,
+           width, "rc_overshoot_pct:",  cfg->rc_overshoot_pct);
+    av_log(avctx, level, "decoder buffer model\n"
+                         "  %*s%u\n  %*s%u\n  %*s%u\n",
+           width, "rc_buf_sz:",         cfg->rc_buf_sz,
+           width, "rc_buf_initial_sz:", cfg->rc_buf_initial_sz,
+           width, "rc_buf_optimal_sz:", cfg->rc_buf_optimal_sz);
+    av_log(avctx, level, "2 pass rate control settings\n"
+                         "  %*s%u\n  %*s%u\n  %*s%u\n",
+           width, "rc_2pass_vbr_bias_pct:",       cfg->rc_2pass_vbr_bias_pct,
+           width, "rc_2pass_vbr_minsection_pct:", cfg->rc_2pass_vbr_minsection_pct,
+           width, "rc_2pass_vbr_maxsection_pct:", cfg->rc_2pass_vbr_maxsection_pct);
+    av_log(avctx, level, "keyframing settings\n"
+                         "  %*s%d\n  %*s%u\n  %*s%u\n",
+           width, "kf_mode:",     cfg->kf_mode,
+           width, "kf_min_dist:", cfg->kf_min_dist,
+           width, "kf_max_dist:", cfg->kf_max_dist);
+    av_log(avctx, level, "\n");
+}
+
+static void coded_frame_add(void *list, struct FrameListData *cx_frame)
+{
+    struct FrameListData **p = list;
+
+    while (*p)
+        p = &(*p)->next;
+    *p = cx_frame;
+    cx_frame->next = NULL;
+}
+
+static av_cold void free_coded_frame(struct FrameListData *cx_frame)
+{
+    av_freep(&cx_frame->buf);
+    av_freep(&cx_frame);
+}
+
+static av_cold void free_frame_list(struct FrameListData *list)
+{
+    struct FrameListData *p = list;
+
+    while (p) {
+        list = list->next;
+        free_coded_frame(p);
+        p = list;
+    }
+}
+
+static av_cold int codecctl_int(AVCodecContext *avctx,
+                                enum aome_enc_control_id id, int val)
+{
+    AOMContext *ctx = avctx->priv_data;
+    char buf[80];
+    int width = -30;
+    int res;
+
+    snprintf(buf, sizeof(buf), "%s:", ctlidstr[id]);
+    av_log(avctx, AV_LOG_DEBUG, "  %*s%d\n", width, buf, val);
+
+    res = aom_codec_control(&ctx->encoder, id, val);
+    if (res != AOM_CODEC_OK) {
+        snprintf(buf, sizeof(buf), "Failed to set %s codec control",
+                 ctlidstr[id]);
+        log_encoder_error(avctx, buf);
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+static av_cold int aom_free(AVCodecContext *avctx)
+{
+    AOMContext *ctx = avctx->priv_data;
+
+    aom_codec_destroy(&ctx->encoder);
+    av_freep(&ctx->twopass_stats.buf);
+    av_freep(&avctx->stats_out);
+    free_frame_list(ctx->coded_frame_list);
+    av_bsf_free(&ctx->bsf);
+    return 0;
+}
+
+static int set_pix_fmt(AVCodecContext *avctx, aom_codec_caps_t codec_caps,
+                       struct aom_codec_enc_cfg *enccfg, aom_codec_flags_t *flags,
+                       aom_img_fmt_t *img_fmt)
+{
+    AOMContext av_unused *ctx = avctx->priv_data;
+    enccfg->g_bit_depth = enccfg->g_input_bit_depth = 8;
+    switch (avctx->pix_fmt) {
+    case AV_PIX_FMT_YUV420P:
+        enccfg->g_profile = FF_PROFILE_AV1_MAIN;
+        *img_fmt = AOM_IMG_FMT_I420;
+        return 0;
+    case AV_PIX_FMT_YUV422P:
+        enccfg->g_profile = FF_PROFILE_AV1_PROFESSIONAL;
+        *img_fmt = AOM_IMG_FMT_I422;
+        return 0;
+    case AV_PIX_FMT_YUV444P:
+        enccfg->g_profile = FF_PROFILE_AV1_HIGH;
+        *img_fmt = AOM_IMG_FMT_I444;
+        return 0;
+    case AV_PIX_FMT_YUV420P10:
+    case AV_PIX_FMT_YUV420P12:
+        if (codec_caps & AOM_CODEC_CAP_HIGHBITDEPTH) {
+            enccfg->g_bit_depth = enccfg->g_input_bit_depth =
+                avctx->pix_fmt == AV_PIX_FMT_YUV420P10 ? 10 : 12;
+            enccfg->g_profile =
+                enccfg->g_bit_depth == 10 ? FF_PROFILE_AV1_MAIN : FF_PROFILE_AV1_PROFESSIONAL;
+            *img_fmt = AOM_IMG_FMT_I42016;
+            *flags |= AOM_CODEC_USE_HIGHBITDEPTH;
+            return 0;
+        }
+        break;
+    case AV_PIX_FMT_YUV422P10:
+    case AV_PIX_FMT_YUV422P12:
+        if (codec_caps & AOM_CODEC_CAP_HIGHBITDEPTH) {
+            enccfg->g_bit_depth = enccfg->g_input_bit_depth =
+                avctx->pix_fmt == AV_PIX_FMT_YUV422P10 ? 10 : 12;
+            enccfg->g_profile = FF_PROFILE_AV1_PROFESSIONAL;
+            *img_fmt = AOM_IMG_FMT_I42216;
+            *flags |= AOM_CODEC_USE_HIGHBITDEPTH;
+            return 0;
+        }
+        break;
+    case AV_PIX_FMT_YUV444P10:
+    case AV_PIX_FMT_YUV444P12:
+        if (codec_caps & AOM_CODEC_CAP_HIGHBITDEPTH) {
+            enccfg->g_bit_depth = enccfg->g_input_bit_depth =
+                avctx->pix_fmt == AV_PIX_FMT_YUV444P10 ? 10 : 12;
+            enccfg->g_profile =
+                enccfg->g_bit_depth == 10 ? FF_PROFILE_AV1_HIGH : FF_PROFILE_AV1_PROFESSIONAL;
+            *img_fmt = AOM_IMG_FMT_I44416;
+            *flags |= AOM_CODEC_USE_HIGHBITDEPTH;
+            return 0;
+        }
+        break;
+    default:
+        break;
+    }
+    av_log(avctx, AV_LOG_ERROR, "Unsupported pixel format.\n");
+    return AVERROR_INVALIDDATA;
+}
+
+static void set_color_range(AVCodecContext *avctx)
+{
+    enum aom_color_range aom_cr;
+    switch (avctx->color_range) {
+    case AVCOL_RANGE_UNSPECIFIED:
+    case AVCOL_RANGE_MPEG:       aom_cr = AOM_CR_STUDIO_RANGE; break;
+    case AVCOL_RANGE_JPEG:       aom_cr = AOM_CR_FULL_RANGE;   break;
+    default:
+        av_log(avctx, AV_LOG_WARNING, "Unsupported color range (%d)\n",
+               avctx->color_range);
+        return;
+    }
+
+    codecctl_int(avctx, AV1E_SET_COLOR_RANGE, aom_cr);
+}
+
+static av_cold int aom_init(AVCodecContext *avctx,
+                            const struct aom_codec_iface *iface)
+{
+    AOMContext *ctx = avctx->priv_data;
+    struct aom_codec_enc_cfg enccfg = { 0 };
+#ifdef AOM_FRAME_IS_INTRAONLY
+    aom_codec_flags_t flags =
+        (avctx->flags & AV_CODEC_FLAG_PSNR) ? AOM_CODEC_USE_PSNR : 0;
+#else
+    aom_codec_flags_t flags = 0;
+#endif
+    AVCPBProperties *cpb_props;
+    int res;
+    aom_img_fmt_t img_fmt;
+    aom_codec_caps_t codec_caps = aom_codec_get_caps(iface);
+
+    av_log(avctx, AV_LOG_INFO, "%s\n", aom_codec_version_str());
+    av_log(avctx, AV_LOG_VERBOSE, "%s\n", aom_codec_build_config());
+
+    if ((res = aom_codec_enc_config_default(iface, &enccfg, 0)) != AOM_CODEC_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to get config: %s\n",
+               aom_codec_err_to_string(res));
+        return AVERROR(EINVAL);
+    }
+
+    if (set_pix_fmt(avctx, codec_caps, &enccfg, &flags, &img_fmt))
+        return AVERROR(EINVAL);
+
+    if(!avctx->bit_rate)
+        if(avctx->rc_max_rate || avctx->rc_buffer_size || avctx->rc_initial_buffer_occupancy) {
+            av_log( avctx, AV_LOG_ERROR, "Rate control parameters set without a bitrate\n");
+            return AVERROR(EINVAL);
+        }
+
+    dump_enc_cfg(avctx, &enccfg);
+
+    enccfg.g_w            = avctx->width;
+    enccfg.g_h            = avctx->height;
+    enccfg.g_timebase.num = avctx->time_base.num;
+    enccfg.g_timebase.den = avctx->time_base.den;
+    enccfg.g_threads      = avctx->thread_count ? avctx->thread_count : av_cpu_count();
+
+    if (ctx->lag_in_frames >= 0)
+        enccfg.g_lag_in_frames = ctx->lag_in_frames;
+
+    if (avctx->flags & AV_CODEC_FLAG_PASS1)
+        enccfg.g_pass = AOM_RC_FIRST_PASS;
+    else if (avctx->flags & AV_CODEC_FLAG_PASS2)
+        enccfg.g_pass = AOM_RC_LAST_PASS;
+    else
+        enccfg.g_pass = AOM_RC_ONE_PASS;
+
+    if (avctx->rc_min_rate == avctx->rc_max_rate &&
+        avctx->rc_min_rate == avctx->bit_rate && avctx->bit_rate) {
+        enccfg.rc_end_usage = AOM_CBR;
+    } else if (ctx->crf >= 0) {
+        enccfg.rc_end_usage = AOM_CQ;
+        if (!avctx->bit_rate)
+            enccfg.rc_end_usage = AOM_Q;
+    }
+
+    if (avctx->bit_rate) {
+        enccfg.rc_target_bitrate = av_rescale_rnd(avctx->bit_rate, 1, 1000,
+                                                  AV_ROUND_NEAR_INF);
+    } else if (enccfg.rc_end_usage != AOM_Q) {
+        if (enccfg.rc_end_usage == AOM_CQ) {
+            enccfg.rc_target_bitrate = 1000000;
+        } else {
+            avctx->bit_rate = enccfg.rc_target_bitrate * 1000;
+            av_log(avctx, AV_LOG_WARNING,
+                   "Neither bitrate nor constrained quality specified, using default bitrate of %dkbit/sec\n",
+                   enccfg.rc_target_bitrate);
+        }
+    }
+
+    if (avctx->qmin >= 0)
+        enccfg.rc_min_quantizer = avctx->qmin;
+    if (avctx->qmax >= 0)
+        enccfg.rc_max_quantizer = avctx->qmax;
+
+    if (enccfg.rc_end_usage == AOM_CQ || enccfg.rc_end_usage == AOM_Q) {
+        if (ctx->crf < enccfg.rc_min_quantizer || ctx->crf > enccfg.rc_max_quantizer) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "CQ level %d must be between minimum and maximum quantizer value (%d-%d)\n",
+                   ctx->crf, enccfg.rc_min_quantizer, enccfg.rc_max_quantizer);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    enccfg.rc_dropframe_thresh = ctx->drop_threshold;
+
+    // 0-100 (0 => CBR, 100 => VBR)
+    enccfg.rc_2pass_vbr_bias_pct       = round(avctx->qcompress * 100);
+    if (avctx->bit_rate)
+        enccfg.rc_2pass_vbr_minsection_pct =
+            avctx->rc_min_rate * 100LL / avctx->bit_rate;
+    if (avctx->rc_max_rate)
+        enccfg.rc_2pass_vbr_maxsection_pct =
+            avctx->rc_max_rate * 100LL / avctx->bit_rate;
+
+    if (avctx->rc_buffer_size)
+        enccfg.rc_buf_sz =
+            avctx->rc_buffer_size * 1000LL / avctx->bit_rate;
+    if (avctx->rc_initial_buffer_occupancy)
+        enccfg.rc_buf_initial_sz =
+            avctx->rc_initial_buffer_occupancy * 1000LL / avctx->bit_rate;
+    enccfg.rc_buf_optimal_sz = enccfg.rc_buf_sz * 5 / 6;
+
+    // _enc_init() will balk if kf_min_dist differs from max w/AOM_KF_AUTO
+    if (avctx->keyint_min >= 0 && avctx->keyint_min == avctx->gop_size)
+        enccfg.kf_min_dist = avctx->keyint_min;
+    if (avctx->gop_size >= 0)
+        enccfg.kf_max_dist = avctx->gop_size;
+
+    if (enccfg.g_pass == AOM_RC_FIRST_PASS)
+        enccfg.g_lag_in_frames = 0;
+    else if (enccfg.g_pass == AOM_RC_LAST_PASS) {
+        int decode_size, ret;
+
+        if (!avctx->stats_in) {
+            av_log(avctx, AV_LOG_ERROR, "No stats file for second pass\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        ctx->twopass_stats.sz = strlen(avctx->stats_in) * 3 / 4;
+        ret                   = av_reallocp(&ctx->twopass_stats.buf, ctx->twopass_stats.sz);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Stat buffer alloc (%"SIZE_SPECIFIER" bytes) failed\n",
+                   ctx->twopass_stats.sz);
+            ctx->twopass_stats.sz = 0;
+            return ret;
+        }
+        decode_size = av_base64_decode(ctx->twopass_stats.buf, avctx->stats_in,
+                                       ctx->twopass_stats.sz);
+        if (decode_size < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Stat buffer decode failed\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        ctx->twopass_stats.sz      = decode_size;
+        enccfg.rc_twopass_stats_in = ctx->twopass_stats;
+    }
+
+    /* 0-3: For non-zero values the encoder increasingly optimizes for reduced
+     * complexity playback on low powered devices at the expense of encode
+     * quality. */
+    if (avctx->profile != FF_PROFILE_UNKNOWN)
+        enccfg.g_profile = avctx->profile;
+
+    enccfg.g_error_resilient = ctx->error_resilient;
+
+    dump_enc_cfg(avctx, &enccfg);
+    /* Construct Encoder Context */
+    res = aom_codec_enc_init(&ctx->encoder, iface, &enccfg, flags);
+    if (res != AOM_CODEC_OK) {
+        log_encoder_error(avctx, "Failed to initialize encoder");
+        return AVERROR(EINVAL);
+    }
+
+    // codec control failures are currently treated only as warnings
+    av_log(avctx, AV_LOG_DEBUG, "aom_codec_control\n");
+    codecctl_int(avctx, AOME_SET_CPUUSED, ctx->cpu_used);
+    if (ctx->auto_alt_ref >= 0)
+        codecctl_int(avctx, AOME_SET_ENABLEAUTOALTREF, ctx->auto_alt_ref);
+
+    codecctl_int(avctx, AOME_SET_STATIC_THRESHOLD, ctx->static_thresh);
+    if (ctx->crf >= 0)
+        codecctl_int(avctx, AOME_SET_CQ_LEVEL,          ctx->crf);
+
+    codecctl_int(avctx, AV1E_SET_COLOR_PRIMARIES, avctx->color_primaries);
+    codecctl_int(avctx, AV1E_SET_MATRIX_COEFFICIENTS, avctx->colorspace);
+    codecctl_int(avctx, AV1E_SET_TRANSFER_CHARACTERISTICS, avctx->color_trc);
+    set_color_range(avctx);
+
+    // provide dummy value to initialize wrapper, values will be updated each _encode()
+    aom_img_wrap(&ctx->rawimg, img_fmt, avctx->width, avctx->height, 1,
+                 (unsigned char*)1);
+
+    if (codec_caps & AOM_CODEC_CAP_HIGHBITDEPTH)
+        ctx->rawimg.bit_depth = enccfg.g_bit_depth;
+
+    cpb_props = ff_add_cpb_side_data(avctx);
+    if (!cpb_props)
+        return AVERROR(ENOMEM);
+
+    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
+        const AVBitStreamFilter *filter = av_bsf_get_by_name("extract_extradata");
+        int ret;
+
+        if (!filter) {
+            av_log(avctx, AV_LOG_ERROR, "extract_extradata bitstream filter "
+                   "not found. This is a bug, please report it.\n");
+            return AVERROR_BUG;
+        }
+        ret = av_bsf_alloc(filter, &ctx->bsf);
+        if (ret < 0)
+            return ret;
+
+        ret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx);
+        if (ret < 0)
+           return ret;
+
+        ret = av_bsf_init(ctx->bsf);
+        if (ret < 0)
+           return ret;
+    }
+
+    if (enccfg.rc_end_usage == AOM_CBR ||
+        enccfg.g_pass != AOM_RC_ONE_PASS) {
+        cpb_props->max_bitrate = avctx->rc_max_rate;
+        cpb_props->min_bitrate = avctx->rc_min_rate;
+        cpb_props->avg_bitrate = avctx->bit_rate;
+    }
+    cpb_props->buffer_size = avctx->rc_buffer_size;
+
+    return 0;
+}
+
+static inline void cx_pktcpy(AOMContext *ctx,
+                             struct FrameListData *dst,
+                             const struct aom_codec_cx_pkt *src)
+{
+    dst->pts      = src->data.frame.pts;
+    dst->duration = src->data.frame.duration;
+    dst->flags    = src->data.frame.flags;
+    dst->sz       = src->data.frame.sz;
+    dst->buf      = src->data.frame.buf;
+#ifdef AOM_FRAME_IS_INTRAONLY
+    dst->have_sse = 0;
+    dst->frame_number = ++ctx->frame_number;
+    dst->have_sse = ctx->have_sse;
+    if (ctx->have_sse) {
+        /* associate last-seen SSE to the frame. */
+        /* Transfers ownership from ctx to dst. */
+        memcpy(dst->sse, ctx->sse, sizeof(dst->sse));
+        ctx->have_sse = 0;
+    }
+#endif
+}
+
+/**
+ * Store coded frame information in format suitable for return from encode2().
+ *
+ * Write information from @a cx_frame to @a pkt
+ * @return packet data size on success
+ * @return a negative AVERROR on error
+ */
+static int storeframe(AVCodecContext *avctx, struct FrameListData *cx_frame,
+                      AVPacket *pkt)
+{
+    AOMContext *ctx = avctx->priv_data;
+    int pict_type;
+    int ret = ff_alloc_packet2(avctx, pkt, cx_frame->sz, 0);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Error getting output packet of size %"SIZE_SPECIFIER".\n", cx_frame->sz);
+        return ret;
+    }
+    memcpy(pkt->data, cx_frame->buf, pkt->size);
+    pkt->pts = pkt->dts = cx_frame->pts;
+
+    if (!!(cx_frame->flags & AOM_FRAME_IS_KEY)) {
+        pkt->flags |= AV_PKT_FLAG_KEY;
+#ifdef AOM_FRAME_IS_INTRAONLY
+        pict_type = AV_PICTURE_TYPE_I;
+    } else if (cx_frame->flags & AOM_FRAME_IS_INTRAONLY) {
+        pict_type = AV_PICTURE_TYPE_I;
+    } else {
+        pict_type = AV_PICTURE_TYPE_P;
+    }
+
+    ff_side_data_set_encoder_stats(pkt, 0, cx_frame->sse + 1,
+                                   cx_frame->have_sse ? 3 : 0, pict_type);
+
+    if (cx_frame->have_sse) {
+        int i;
+        for (i = 0; i < 3; ++i) {
+            avctx->error[i] += cx_frame->sse[i + 1];
+        }
+        cx_frame->have_sse = 0;
+#endif
+    }
+
+    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
+        ret = av_bsf_send_packet(ctx->bsf, pkt);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR, "extract_extradata filter "
+                   "failed to send input packet\n");
+            return ret;
+        }
+        ret = av_bsf_receive_packet(ctx->bsf, pkt);
+
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR, "extract_extradata filter "
+                   "failed to receive output packet\n");
+            return ret;
+        }
+    }
+    return pkt->size;
+}
+
+/**
+ * Queue multiple output frames from the encoder, returning the front-most.
+ * In cases where aom_codec_get_cx_data() returns more than 1 frame append
+ * the frame queue. Return the head frame if available.
+ * @return Stored frame size
+ * @return AVERROR(EINVAL) on output size error
+ * @return AVERROR(ENOMEM) on coded frame queue data allocation error
+ */
+static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out)
+{
+    AOMContext *ctx = avctx->priv_data;
+    const struct aom_codec_cx_pkt *pkt;
+    const void *iter = NULL;
+    int size = 0;
+
+    if (ctx->coded_frame_list) {
+        struct FrameListData *cx_frame = ctx->coded_frame_list;
+        /* return the leading frame if we've already begun queueing */
+        size = storeframe(avctx, cx_frame, pkt_out);
+        if (size < 0)
+            return size;
+        ctx->coded_frame_list = cx_frame->next;
+        free_coded_frame(cx_frame);
+    }
+
+    /* consume all available output from the encoder before returning. buffers
+     * are only good through the next aom_codec call */
+    while ((pkt = aom_codec_get_cx_data(&ctx->encoder, &iter))) {
+        switch (pkt->kind) {
+        case AOM_CODEC_CX_FRAME_PKT:
+            if (!size) {
+                struct FrameListData cx_frame;
+
+                /* avoid storing the frame when the list is empty and we haven't yet
+                 * provided a frame for output */
+                av_assert0(!ctx->coded_frame_list);
+                cx_pktcpy(ctx, &cx_frame, pkt);
+                size = storeframe(avctx, &cx_frame, pkt_out);
+                if (size < 0)
+                    return size;
+            } else {
+                struct FrameListData *cx_frame =
+                    av_malloc(sizeof(struct FrameListData));
+
+                if (!cx_frame) {
+                    av_log(avctx, AV_LOG_ERROR,
+                           "Frame queue element alloc failed\n");
+                    return AVERROR(ENOMEM);
+                }
+                cx_pktcpy(ctx, cx_frame, pkt);
+                cx_frame->buf = av_malloc(cx_frame->sz);
+
+                if (!cx_frame->buf) {
+                    av_log(avctx, AV_LOG_ERROR,
+                           "Data buffer alloc (%"SIZE_SPECIFIER" bytes) failed\n",
+                           cx_frame->sz);
+                    av_freep(&cx_frame);
+                    return AVERROR(ENOMEM);
+                }
+                memcpy(cx_frame->buf, pkt->data.frame.buf, pkt->data.frame.sz);
+                coded_frame_add(&ctx->coded_frame_list, cx_frame);
+            }
+            break;
+        case AOM_CODEC_STATS_PKT:
+        {
+            struct aom_fixed_buf *stats = &ctx->twopass_stats;
+            int err;
+            if ((err = av_reallocp(&stats->buf,
+                                   stats->sz +
+                                   pkt->data.twopass_stats.sz)) < 0) {
+                stats->sz = 0;
+                av_log(avctx, AV_LOG_ERROR, "Stat buffer realloc failed\n");
+                return err;
+            }
+            memcpy((uint8_t *)stats->buf + stats->sz,
+                   pkt->data.twopass_stats.buf, pkt->data.twopass_stats.sz);
+            stats->sz += pkt->data.twopass_stats.sz;
+            break;
+        }
+#ifdef AOM_FRAME_IS_INTRAONLY
+        case AOM_CODEC_PSNR_PKT:
+        {
+            av_assert0(!ctx->have_sse);
+            ctx->sse[0] = pkt->data.psnr.sse[0];
+            ctx->sse[1] = pkt->data.psnr.sse[1];
+            ctx->sse[2] = pkt->data.psnr.sse[2];
+            ctx->sse[3] = pkt->data.psnr.sse[3];
+            ctx->have_sse = 1;
+            break;
+        }
+#endif
+        case AOM_CODEC_CUSTOM_PKT:
+            // ignore unsupported/unrecognized packet types
+            break;
+        }
+    }
+
+    return size;
+}
+
+static int aom_encode(AVCodecContext *avctx, AVPacket *pkt,
+                      const AVFrame *frame, int *got_packet)
+{
+    AOMContext *ctx = avctx->priv_data;
+    struct aom_image *rawimg = NULL;
+    int64_t timestamp = 0;
+    int res, coded_size;
+    aom_enc_frame_flags_t flags = 0;
+
+    if (frame) {
+        rawimg                      = &ctx->rawimg;
+        rawimg->planes[AOM_PLANE_Y] = frame->data[0];
+        rawimg->planes[AOM_PLANE_U] = frame->data[1];
+        rawimg->planes[AOM_PLANE_V] = frame->data[2];
+        rawimg->stride[AOM_PLANE_Y] = frame->linesize[0];
+        rawimg->stride[AOM_PLANE_U] = frame->linesize[1];
+        rawimg->stride[AOM_PLANE_V] = frame->linesize[2];
+        timestamp                   = frame->pts;
+        switch (frame->color_range) {
+        case AVCOL_RANGE_MPEG:
+            rawimg->range = AOM_CR_STUDIO_RANGE;
+            break;
+        case AVCOL_RANGE_JPEG:
+            rawimg->range = AOM_CR_FULL_RANGE;
+            break;
+        }
+
+        if (frame->pict_type == AV_PICTURE_TYPE_I)
+            flags |= AOM_EFLAG_FORCE_KF;
+    }
+
+    res = aom_codec_encode(&ctx->encoder, rawimg, timestamp,
+                           avctx->ticks_per_frame, flags);
+    if (res != AOM_CODEC_OK) {
+        log_encoder_error(avctx, "Error encoding frame");
+        return AVERROR_INVALIDDATA;
+    }
+    coded_size = queue_frames(avctx, pkt);
+
+    if (!frame && avctx->flags & AV_CODEC_FLAG_PASS1) {
+        size_t b64_size = AV_BASE64_SIZE(ctx->twopass_stats.sz);
+
+        avctx->stats_out = av_malloc(b64_size);
+        if (!avctx->stats_out) {
+            av_log(avctx, AV_LOG_ERROR, "Stat buffer alloc (%"SIZE_SPECIFIER" bytes) failed\n",
+                   b64_size);
+            return AVERROR(ENOMEM);
+        }
+        av_base64_encode(avctx->stats_out, b64_size, ctx->twopass_stats.buf,
+                         ctx->twopass_stats.sz);
+    }
+
+    *got_packet = !!coded_size;
+    return 0;
+}
+
+static const enum AVPixelFormat av1_pix_fmts[] = {
+    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUV444P,
+    AV_PIX_FMT_NONE
+};
+
+static const enum AVPixelFormat av1_pix_fmts_highbd[] = {
+    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUV444P,
+    AV_PIX_FMT_YUV420P10,
+    AV_PIX_FMT_YUV422P10,
+    AV_PIX_FMT_YUV444P10,
+    AV_PIX_FMT_YUV420P12,
+    AV_PIX_FMT_YUV422P12,
+    AV_PIX_FMT_YUV444P12,
+    AV_PIX_FMT_NONE
+};
+
+static av_cold void av1_init_static(AVCodec *codec)
+{
+    aom_codec_caps_t codec_caps = aom_codec_get_caps(aom_codec_av1_cx());
+    if (codec_caps & AOM_CODEC_CAP_HIGHBITDEPTH)
+        codec->pix_fmts = av1_pix_fmts_highbd;
+    else
+        codec->pix_fmts = av1_pix_fmts;
+}
+
+static av_cold int av1_init(AVCodecContext *avctx)
+{
+    return aom_init(avctx, aom_codec_av1_cx());
+}
+
+#define OFFSET(x) offsetof(AOMContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "cpu-used",        "Quality/Speed ratio modifier",           OFFSET(cpu_used),        AV_OPT_TYPE_INT, {.i64 = 1}, 0, 8, VE},
+    { "auto-alt-ref",    "Enable use of alternate reference "
+                         "frames (2-pass only)",                   OFFSET(auto_alt_ref),    AV_OPT_TYPE_INT, {.i64 = -1},      -1,      2,       VE},
+    { "lag-in-frames",   "Number of frames to look ahead at for "
+                         "alternate reference frame selection",    OFFSET(lag_in_frames),   AV_OPT_TYPE_INT, {.i64 = -1},      -1,      INT_MAX, VE},
+    { "error-resilience", "Error resilience configuration", OFFSET(error_resilient), AV_OPT_TYPE_FLAGS, {.i64 = 0}, INT_MIN, INT_MAX, VE, "er"},
+    { "default",         "Improve resiliency against losses of whole frames", 0, AV_OPT_TYPE_CONST, {.i64 = AOM_ERROR_RESILIENT_DEFAULT}, 0, 0, VE, "er"},
+    { "crf",              "Select the quality for constant quality mode", offsetof(AOMContext, crf), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 63, VE },
+    { "static-thresh",    "A change threshold on blocks below which they will be skipped by the encoder", OFFSET(static_thresh), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+    { "drop-threshold",   "Frame drop threshold", offsetof(AOMContext, drop_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, VE },
+    { "noise-sensitivity", "Noise sensitivity", OFFSET(noise_sensitivity), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 4, VE},
+    { NULL }
+};
+
+static const AVCodecDefault defaults[] = {
+    { "qmin",             "-1" },
+    { "qmax",             "-1" },
+    { "g",                "-1" },
+    { "keyint_min",       "-1" },
+    { NULL },
+};
+
+static const AVClass class_aom = {
+    .class_name = "libaom-av1 encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_libaom_av1_encoder = {
+    .name           = "libaom-av1",
+    .long_name      = NULL_IF_CONFIG_SMALL("libaom AV1"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_AV1,
+    .priv_data_size = sizeof(AOMContext),
+    .init           = av1_init,
+    .encode2        = aom_encode,
+    .close          = aom_free,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS | AV_CODEC_CAP_EXPERIMENTAL,
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_av1_profiles),
+    .priv_class     = &class_aom,
+    .defaults       = defaults,
+    .init_static_data = av1_init_static,
+    .wrapper_name   = "libaom",
+};

diff --git a/libavcodec/libavcodec.v b/libavcodec/libavcodec.v
index 304c2ef..f1d5e5b 100644
--- a/libavcodec/libavcodec.v
+++ b/libavcodec/libavcodec.v

@@ -1,9 +1,6 @@
 LIBAVCODEC_MAJOR {
     global:
         av*;
-        #deprecated, remove after next bump
-        audio_resample;
-        audio_resample_close;
     local:
         *;
 };

diff --git a/libavcodec/libcelt_dec.c b/libavcodec/libcelt_dec.c
index 878e4cc..75b438b 100644
--- a/libavcodec/libcelt_dec.c
+++ b/libavcodec/libcelt_dec.c

@@ -137,4 +137,5 @@
     .close          = libcelt_dec_close,
     .decode         = libcelt_dec_decode,
     .capabilities   = AV_CODEC_CAP_DR1,
+    .wrapper_name   = "libcelt",
 };

diff --git a/libavcodec/libcodec2.c b/libavcodec/libcodec2.c
new file mode 100644
index 0000000..1d6bed0
--- /dev/null
+++ b/libavcodec/libcodec2.c

@@ -0,0 +1,213 @@
+/*
+ * codec2 encoder/decoder using libcodec2
+ * Copyright (c) 2017 Tomas Härdin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <codec2/codec2.h>
+#include "avcodec.h"
+#include "libavutil/opt.h"
+#include "internal.h"
+#include "codec2utils.h"
+
+typedef struct {
+    const AVClass *class;
+    struct CODEC2 *codec;
+    int mode;
+} LibCodec2Context;
+
+static const AVOption options[] = {
+    //not AV_OPT_FLAG_DECODING_PARAM since mode should come from the demuxer
+    //1300 (aka FreeDV 1600) is the most common mode on-the-air, default to it here as well
+    AVPRIV_CODEC2_AVOPTIONS("codec2 mode", LibCodec2Context, 0, 4 /*CODEC2_MODE_1300*/, AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_ENCODING_PARAM),
+    { NULL },
+};
+
+static const AVClass libcodec2_enc_class = {
+    .class_name = "libcodec2 encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVClass libcodec2_dec_class = {
+    .class_name = "libcodec2 decoder",
+    .item_name  = av_default_item_name,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static av_cold int libcodec2_init_common(AVCodecContext *avctx, int mode)
+{
+    LibCodec2Context *c2 = avctx->priv_data;
+    //Grab mode name from options, unless it's some weird number.
+    const char *modename = mode >= 0 && mode <= AVPRIV_CODEC2_MODE_MAX ? options[mode+1].name : "?";
+
+    c2->codec = codec2_create(mode);
+    if (!c2->codec) {
+        //Out of memory or unsupported mode. The latter seems most likely,
+        //but we can't tell for sure with the current API.
+        goto libcodec2_init_common_error;
+    }
+
+    avctx->frame_size = codec2_samples_per_frame(c2->codec);
+    avctx->block_align = (codec2_bits_per_frame(c2->codec) + 7) / 8;
+
+    if (avctx->frame_size <= 0 || avctx->block_align <= 0) {
+        //codec2_create() may succeed for some modes but still fail at codec2_samples_per_frame()
+        //example is -mode 700C on libcodec2 0.4
+        codec2_destroy(c2->codec);
+        c2->codec = NULL;
+        goto libcodec2_init_common_error;
+    }
+
+    codec2_set_natural_or_gray(c2->codec, 1);
+
+    return 0;
+
+libcodec2_init_common_error:
+    av_log(avctx, AV_LOG_ERROR,
+        "Mode %i (%s) not supported with the linked version of libcodec2\n",
+        mode, modename);
+    return AVERROR(EINVAL);
+}
+
+static av_cold int libcodec2_init_decoder(AVCodecContext *avctx)
+{
+    avctx->sample_rate      = 8000;
+    avctx->channels         = 1;
+    avctx->sample_fmt       = AV_SAMPLE_FMT_S16;
+    avctx->channel_layout   = AV_CH_LAYOUT_MONO;
+
+    if (avctx->extradata_size != AVPRIV_CODEC2_EXTRADATA_SIZE) {
+        av_log(avctx, AV_LOG_ERROR, "must have exactly %i bytes of extradata (got %i)\n",
+               AVPRIV_CODEC2_EXTRADATA_SIZE, avctx->extradata_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    return libcodec2_init_common(avctx, avpriv_codec2_mode_from_extradata(avctx->extradata));
+}
+
+static av_cold int libcodec2_init_encoder(AVCodecContext *avctx)
+{
+    LibCodec2Context *c2 = avctx->priv_data;
+
+    //will need to be smarter once we get wideband support
+    if (avctx->sample_rate != 8000 ||
+        avctx->channels != 1 ||
+        avctx->sample_fmt != AV_SAMPLE_FMT_S16) {
+        av_log(avctx, AV_LOG_ERROR, "only 8 kHz 16-bit mono allowed\n");
+        return AVERROR(EINVAL);
+    }
+
+    avctx->extradata = av_mallocz(AVPRIV_CODEC2_EXTRADATA_SIZE + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!avctx->extradata) {
+        return AVERROR(ENOMEM);
+    }
+
+    avctx->extradata_size = AVPRIV_CODEC2_EXTRADATA_SIZE;
+    avpriv_codec2_make_extradata(avctx->extradata, c2->mode);
+
+    return libcodec2_init_common(avctx, c2->mode);
+}
+
+static av_cold int libcodec2_close(AVCodecContext *avctx)
+{
+    LibCodec2Context *c2 = avctx->priv_data;
+
+    codec2_destroy(c2->codec);
+    return 0;
+}
+
+static int libcodec2_decode(AVCodecContext *avctx, void *data,
+                            int *got_frame_ptr, AVPacket *pkt)
+{
+    LibCodec2Context *c2 = avctx->priv_data;
+    AVFrame *frame = data;
+    int ret, nframes, i;
+    uint8_t *input;
+    int16_t *output;
+
+    nframes           = pkt->size / avctx->block_align;
+    frame->nb_samples = avctx->frame_size * nframes;
+
+    ret = ff_get_buffer(avctx, frame, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    input  = pkt->data;
+    output = (int16_t *)frame->data[0];
+
+    for (i = 0; i < nframes; i++) {
+        codec2_decode(c2->codec, output, input);
+        input  += avctx->block_align;
+        output += avctx->frame_size;
+    }
+
+    *got_frame_ptr = nframes > 0;
+    return nframes * avctx->block_align;
+}
+
+static int libcodec2_encode(AVCodecContext *avctx, AVPacket *avpkt,
+                            const AVFrame *frame, int *got_packet_ptr)
+{
+    LibCodec2Context *c2 = avctx->priv_data;
+    int16_t *samples = (int16_t *)frame->data[0];
+
+    int ret = ff_alloc_packet2(avctx, avpkt, avctx->block_align, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    codec2_encode(c2->codec, avpkt->data, samples);
+    *got_packet_ptr = 1;
+
+    return 0;
+}
+
+AVCodec ff_libcodec2_decoder = {
+    .name                   = "libcodec2",
+    .long_name              = NULL_IF_CONFIG_SMALL("codec2 decoder using libcodec2"),
+    .type                   = AVMEDIA_TYPE_AUDIO,
+    .id                     = AV_CODEC_ID_CODEC2,
+    .priv_data_size         = sizeof(LibCodec2Context),
+    .init                   = libcodec2_init_decoder,
+    .close                  = libcodec2_close,
+    .decode                 = libcodec2_decode,
+    .capabilities           = 0,
+    .supported_samplerates  = (const int[]){ 8000, 0 },
+    .sample_fmts            = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
+    .channel_layouts        = (const uint64_t[]) { AV_CH_LAYOUT_MONO, 0 },
+    .priv_class             = &libcodec2_dec_class,
+};
+
+AVCodec ff_libcodec2_encoder = {
+    .name                   = "libcodec2",
+    .long_name              = NULL_IF_CONFIG_SMALL("codec2 encoder using libcodec2"),
+    .type                   = AVMEDIA_TYPE_AUDIO,
+    .id                     = AV_CODEC_ID_CODEC2,
+    .priv_data_size         = sizeof(LibCodec2Context),
+    .init                   = libcodec2_init_encoder,
+    .close                  = libcodec2_close,
+    .encode2                = libcodec2_encode,
+    .capabilities           = 0,
+    .supported_samplerates  = (const int[]){ 8000, 0 },
+    .sample_fmts            = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
+    .channel_layouts        = (const uint64_t[]) { AV_CH_LAYOUT_MONO, 0 },
+    .priv_class             = &libcodec2_enc_class,
+};

diff --git a/libavcodec/libdavs2.c b/libavcodec/libdavs2.c
new file mode 100644
index 0000000..aa14782
--- /dev/null
+++ b/libavcodec/libdavs2.c

@@ -0,0 +1,171 @@
+/*
+ * AVS2 decoding using the davs2 library
+ *
+ * Copyright (C) 2018 Yiqun Xu, <yiqun.xu@vipl.ict.ac.cn>
+ *                    Falei Luo, <falei.luo@gmail.com>
+ *                    Huiwen Ren, <hwrenx@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "davs2.h"
+
+typedef struct DAVS2Context {
+    void *decoder;
+
+    AVFrame *frame;
+    davs2_param_t    param;      // decoding parameters
+    davs2_packet_t   packet;     // input bitstream
+
+    int decoded_frames;
+
+    davs2_picture_t  out_frame;  // output data, frame data
+    davs2_seq_info_t headerset;  // output data, sequence header
+
+}DAVS2Context;
+
+static av_cold int davs2_init(AVCodecContext *avctx)
+{
+    DAVS2Context *cad = avctx->priv_data;
+
+    /* init the decoder */
+    cad->param.threads      = avctx->thread_count;
+    cad->param.info_level   = 0;
+    cad->decoder            = davs2_decoder_open(&cad->param);
+
+    if (!cad->decoder) {
+        av_log(avctx, AV_LOG_ERROR, "decoder created error.");
+        return AVERROR_EXTERNAL;
+    }
+
+    av_log(avctx, AV_LOG_VERBOSE, "decoder created. %p\n", cad->decoder);
+    return 0;
+}
+
+static int davs2_dump_frames(AVCodecContext *avctx, davs2_picture_t *pic,
+                             davs2_seq_info_t *headerset, int ret_type, AVFrame *frame)
+{
+    DAVS2Context *cad    = avctx->priv_data;
+    int bytes_per_sample = pic->bytes_per_sample;
+    int plane = 0;
+    int line  = 0;
+
+    if (!headerset)
+        return 0;
+
+    if (!pic || ret_type == DAVS2_GOT_HEADER) {
+        avctx->width     = headerset->width;
+        avctx->height    = headerset->height;
+        avctx->pix_fmt   = headerset->output_bit_depth == 10 ?
+                           AV_PIX_FMT_YUV420P10 : AV_PIX_FMT_YUV420P;
+
+        avctx->framerate = av_d2q(headerset->frame_rate,4096);
+        return 0;
+    }
+
+    for (plane = 0; plane < 3; ++plane) {
+        int size_line = pic->widths[plane] * bytes_per_sample;
+        frame->buf[plane]  = av_buffer_alloc(size_line * pic->lines[plane]);
+
+        if (!frame->buf[plane]){
+            av_log(avctx, AV_LOG_ERROR, "dump error: alloc failed.\n");
+            return AVERROR(ENOMEM);
+        }
+
+        frame->data[plane]     = frame->buf[plane]->data;
+        frame->linesize[plane] = pic->widths[plane];
+
+        for (line = 0; line < pic->lines[plane]; ++line)
+            memcpy(frame->data[plane] + line * size_line,
+                   pic->planes[plane] + line * pic->strides[plane],
+                   pic->widths[plane] * bytes_per_sample);
+    }
+
+    frame->width     = cad->headerset.width;
+    frame->height    = cad->headerset.height;
+    frame->pts       = cad->out_frame.pts;
+    frame->pict_type = pic->type;
+    frame->format    = avctx->pix_fmt;
+
+    cad->decoded_frames++;
+    return 1;
+}
+
+static av_cold int davs2_end(AVCodecContext *avctx)
+{
+    DAVS2Context *cad = avctx->priv_data;
+
+    /* close the decoder */
+    if (cad->decoder) {
+        davs2_decoder_close(cad->decoder);
+        cad->decoder = NULL;
+    }
+
+    return 0;
+}
+
+static int davs2_decode_frame(AVCodecContext *avctx, void *data,
+                              int *got_frame, AVPacket *avpkt)
+{
+    DAVS2Context *cad      = avctx->priv_data;
+    int           buf_size = avpkt->size;
+    uint8_t      *buf_ptr  = avpkt->data;
+    AVFrame      *frame    = data;
+    int           ret      = DAVS2_DEFAULT;
+
+    if (!buf_size) {
+        return 0;
+    }
+
+    cad->packet.data = buf_ptr;
+    cad->packet.len  = buf_size;
+    cad->packet.pts  = avpkt->pts;
+    cad->packet.dts  = avpkt->dts;
+
+    ret = davs2_decoder_send_packet(cad->decoder, &cad->packet);
+
+
+    if (ret == DAVS2_ERROR) {
+        av_log(avctx, AV_LOG_ERROR, "Decoder error: can't read packet\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    ret = davs2_decoder_recv_frame(cad->decoder, &cad->headerset, &cad->out_frame);
+
+    if (ret != DAVS2_DEFAULT) {
+        *got_frame = davs2_dump_frames(avctx, &cad->out_frame, &cad->headerset, ret, frame);
+        davs2_decoder_frame_unref(cad->decoder, &cad->out_frame);
+    }
+
+    return buf_size;
+}
+
+AVCodec ff_libdavs2_decoder = {
+    .name           = "libdavs2",
+    .long_name      = NULL_IF_CONFIG_SMALL("libdavs2 AVS2-P2/IEEE1857.4"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_AVS2,
+    .priv_data_size = sizeof(DAVS2Context),
+    .init           = davs2_init,
+    .close          = davs2_end,
+    .decode         = davs2_decode_frame,
+    .capabilities   =  AV_CODEC_CAP_DELAY,//AV_CODEC_CAP_DR1 |
+    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10,
+                                                     AV_PIX_FMT_NONE },
+    .wrapper_name   = "libdavs2",
+};

diff --git a/libavcodec/libfdk-aacdec.c b/libavcodec/libfdk-aacdec.c
index 2857b94..1abe1d8 100644
--- a/libavcodec/libfdk-aacdec.c
+++ b/libavcodec/libfdk-aacdec.c

@@ -25,9 +25,15 @@
 #include "avcodec.h"
 #include "internal.h"
 
-/* The version macro is introduced the same time as the setting enum was
- * changed, so this check should suffice. */
-#ifndef AACDECODER_LIB_VL0
+#ifdef AACDECODER_LIB_VL0
+#define FDKDEC_VER_AT_LEAST(vl0, vl1) \
+    ((AACDECODER_LIB_VL0 > vl0) || \
+     (AACDECODER_LIB_VL0 == vl0 && AACDECODER_LIB_VL1 >= vl1))
+#else
+#define FDKDEC_VER_AT_LEAST(vl0, vl1) 0
+#endif
+
+#if !FDKDEC_VER_AT_LEAST(2, 5) // < 2.5.10
 #define AAC_PCM_MAX_OUTPUT_CHANNELS AAC_PCM_OUTPUT_CHANNELS
 #endif
 
@@ -48,6 +54,7 @@
     int drc_level;
     int drc_boost;
     int drc_heavy;
+    int drc_effect;
     int drc_cut;
     int level_limit;
 } FDKAACDecContext;
@@ -72,14 +79,21 @@
                      OFFSET(drc_level),      AV_OPT_TYPE_INT,   { .i64 = -1},  -1, 127, AD, NULL    },
     { "drc_heavy", "Dynamic Range Control: heavy compression, where [1] is on (RF mode) and [0] is off",
                      OFFSET(drc_heavy),      AV_OPT_TYPE_INT,   { .i64 = -1},  -1, 1,   AD, NULL    },
-#ifdef AACDECODER_LIB_VL0
+#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
     { "level_limit", "Signal level limiting", OFFSET(level_limit), AV_OPT_TYPE_INT, { .i64 = 0 }, -1, 1, AD },
 #endif
+#if FDKDEC_VER_AT_LEAST(3, 0) // 3.0.0
+    { "drc_effect","Dynamic Range Control: effect type, where e.g. [0] is none and [6] is general",
+                     OFFSET(drc_effect),     AV_OPT_TYPE_INT,   { .i64 = -1},  -1, 8,   AD, NULL    },
+#endif
     { NULL }
 };
 
 static const AVClass fdk_aac_dec_class = {
-    "libfdk-aac decoder", av_default_item_name, fdk_aac_dec_options, LIBAVUTIL_VERSION_INT
+    .class_name = "libfdk-aac decoder",
+    .item_name  = av_default_item_name,
+    .option     = fdk_aac_dec_options,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
 static int get_stream_info(AVCodecContext *avctx)
@@ -293,13 +307,22 @@
         }
     }
 
-#ifdef AACDECODER_LIB_VL0
+#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
     if (aacDecoder_SetParam(s->handle, AAC_PCM_LIMITER_ENABLE, s->level_limit) != AAC_DEC_OK) {
         av_log(avctx, AV_LOG_ERROR, "Unable to set in signal level limiting in the decoder\n");
         return AVERROR_UNKNOWN;
     }
 #endif
 
+#if FDKDEC_VER_AT_LEAST(3, 0) // 3.0.0
+    if (s->drc_effect != -1) {
+        if (aacDecoder_SetParam(s->handle, AAC_UNIDRC_SET_EFFECT, s->drc_effect) != AAC_DEC_OK) {
+            av_log(avctx, AV_LOG_ERROR, "Unable to set DRC effect type in the decoder\n");
+            return AVERROR_UNKNOWN;
+        }
+    }
+#endif
+
     avctx->sample_fmt = AV_SAMPLE_FMT_S16;
 
     s->decoder_buffer_size = DECODER_BUFFSIZE * DECODER_MAX_CHANNELS;
@@ -382,4 +405,5 @@
     .priv_class     = &fdk_aac_dec_class,
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
                       FF_CODEC_CAP_INIT_CLEANUP,
+    .wrapper_name   = "libfdk",
 };

diff --git a/libavcodec/libfdk-aacenc.c b/libavcodec/libfdk-aacenc.c
index 0e2051b..5620bb5 100644
--- a/libavcodec/libfdk-aacenc.c
+++ b/libavcodec/libfdk-aacenc.c

@@ -26,11 +26,20 @@
 #include "audio_frame_queue.h"
 #include "internal.h"
 
+#ifdef AACENCODER_LIB_VL0
+#define FDKENC_VER_AT_LEAST(vl0, vl1) \
+    ((AACENCODER_LIB_VL0 > vl0) || \
+     (AACENCODER_LIB_VL0 == vl0 && AACENCODER_LIB_VL1 >= vl1))
+#else
+#define FDKENC_VER_AT_LEAST(vl0, vl1) 0
+#endif
+
 typedef struct AACContext {
     const AVClass *class;
     HANDLE_AACENCODER handle;
     int afterburner;
     int eld_sbr;
+    int eld_v2;
     int signaling;
     int latm;
     int header_period;
@@ -42,6 +51,9 @@
 static const AVOption aac_enc_options[] = {
     { "afterburner", "Afterburner (improved quality)", offsetof(AACContext, afterburner), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
     { "eld_sbr", "Enable SBR for ELD (for SBR in other configurations, use the -profile parameter)", offsetof(AACContext, eld_sbr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
+#if FDKENC_VER_AT_LEAST(4, 0) // 4.0.0
+    { "eld_v2", "Enable ELDv2 (LD-MPS extension for ELD stereo signals)", offsetof(AACContext, eld_v2), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
+#endif
     { "signaling", "SBR/PS signaling style", offsetof(AACContext, signaling), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 2, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
     { "default", "Choose signaling implicitly (explicit hierarchical by default, implicit if global header is disabled)", 0, AV_OPT_TYPE_CONST, { .i64 = -1 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
     { "implicit", "Implicit backwards compatible signaling", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
@@ -54,7 +66,10 @@
 };
 
 static const AVClass aac_enc_class = {
-    "libfdk_aac", av_default_item_name, aac_enc_options, LIBAVUTIL_VERSION_INT
+    .class_name = "libfdk_aac",
+    .item_name  = av_default_item_name,
+    .option     = aac_enc_options,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
 static const char *aac_get_error(AACENC_ERROR err)
@@ -144,14 +159,35 @@
 
     switch (avctx->channels) {
     case 1: mode = MODE_1;       sce = 1; cpe = 0; break;
-    case 2: mode = MODE_2;       sce = 0; cpe = 1; break;
+    case 2:
+#if FDKENC_VER_AT_LEAST(4, 0) // 4.0.0
+      // (profile + 1) to map from profile range to AOT range
+      if (aot == FF_PROFILE_AAC_ELD + 1 && s->eld_v2) {
+          if ((err = aacEncoder_SetParam(s->handle, AACENC_CHANNELMODE,
+                                         128)) != AACENC_OK) {
+              av_log(avctx, AV_LOG_ERROR, "Unable to enable ELDv2: %s\n",
+                     aac_get_error(err));
+              goto error;
+          } else {
+            mode = MODE_212;
+            sce = 1;
+            cpe = 0;
+          }
+      } else
+#endif
+      {
+        mode = MODE_2;
+        sce = 0;
+        cpe = 1;
+      }
+      break;
     case 3: mode = MODE_1_2;     sce = 1; cpe = 1; break;
     case 4: mode = MODE_1_2_1;   sce = 2; cpe = 1; break;
     case 5: mode = MODE_1_2_2;   sce = 1; cpe = 2; break;
     case 6: mode = MODE_1_2_2_1; sce = 2; cpe = 2; break;
 /* The version macro is introduced the same time as the 7.1 support, so this
    should suffice. */
-#ifdef AACENCODER_LIB_VL0
+#if FDKENC_VER_AT_LEAST(3, 4) // 3.4.12
     case 8:
         sce = 2;
         cpe = 3;
@@ -224,7 +260,8 @@
     /* Choose bitstream format - if global header is requested, use
      * raw access units, otherwise use ADTS. */
     if ((err = aacEncoder_SetParam(s->handle, AACENC_TRANSMUX,
-                                   avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER ? 0 : s->latm ? 10 : 2)) != AACENC_OK) {
+                                   avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER ? TT_MP4_RAW :
+                                   s->latm ? TT_MP4_LOAS : TT_MP4_ADTS)) != AACENC_OK) {
         av_log(avctx, AV_LOG_ERROR, "Unable to set the transmux format: %s\n",
                aac_get_error(err));
         goto error;
@@ -286,7 +323,11 @@
     }
 
     avctx->frame_size = info.frameLength;
+#if FDKENC_VER_AT_LEAST(4, 0) // 4.0.0
+    avctx->initial_padding = info.nDelay;
+#else
     avctx->initial_padding = info.encoderDelay;
+#endif
     ff_af_queue_init(avctx, &s->afq);
 
     if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
@@ -319,28 +360,35 @@
     int out_buffer_size, out_buffer_element_size;
     void *in_ptr, *out_ptr;
     int ret;
+    uint8_t dummy_buf[1];
     AACENC_ERROR err;
 
     /* handle end-of-stream small frame and flushing */
     if (!frame) {
+        /* Must be a non-null pointer, even if it's a dummy. We could use
+         * the address of anything else on the stack as well. */
+        in_ptr               = dummy_buf;
+        in_buffer_size       = 0;
+
         in_args.numInSamples = -1;
     } else {
-        in_ptr                   = frame->data[0];
-        in_buffer_size           = 2 * avctx->channels * frame->nb_samples;
-        in_buffer_element_size   = 2;
+        in_ptr               = frame->data[0];
+        in_buffer_size       = 2 * avctx->channels * frame->nb_samples;
 
-        in_args.numInSamples     = avctx->channels * frame->nb_samples;
-        in_buf.numBufs           = 1;
-        in_buf.bufs              = &in_ptr;
-        in_buf.bufferIdentifiers = &in_buffer_identifier;
-        in_buf.bufSizes          = &in_buffer_size;
-        in_buf.bufElSizes        = &in_buffer_element_size;
+        in_args.numInSamples = avctx->channels * frame->nb_samples;
 
         /* add current frame to the queue */
         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
             return ret;
     }
 
+    in_buffer_element_size   = 2;
+    in_buf.numBufs           = 1;
+    in_buf.bufs              = &in_ptr;
+    in_buf.bufferIdentifiers = &in_buffer_identifier;
+    in_buf.bufSizes          = &in_buffer_size;
+    in_buf.bufElSizes        = &in_buffer_element_size;
+
     /* The maximum packet size is 6144 bits aka 768 bytes per channel. */
     if ((ret = ff_alloc_packet2(avctx, avpkt, FFMAX(8192, 768 * avctx->channels), 0)) < 0)
         return ret;
@@ -396,7 +444,7 @@
     AV_CH_LAYOUT_4POINT0,
     AV_CH_LAYOUT_5POINT0_BACK,
     AV_CH_LAYOUT_5POINT1_BACK,
-#ifdef AACENCODER_LIB_VL0
+#if FDKENC_VER_AT_LEAST(3, 4) // 3.4.12
     AV_CH_LAYOUT_7POINT1_WIDE_BACK,
     AV_CH_LAYOUT_7POINT1,
 #endif
@@ -425,4 +473,5 @@
     .profiles              = profiles,
     .supported_samplerates = aac_sample_rates,
     .channel_layouts       = aac_channel_layout,
+    .wrapper_name          = "libfdk",
 };

diff --git a/libavcodec/libgsmdec.c b/libavcodec/libgsmdec.c
index a503215..89e1de0 100644
--- a/libavcodec/libgsmdec.c
+++ b/libavcodec/libgsmdec.c

@@ -135,6 +135,7 @@
     .decode         = libgsm_decode_frame,
     .flush          = libgsm_flush,
     .capabilities   = AV_CODEC_CAP_DR1,
+    .wrapper_name   = "libgsm",
 };
 #endif
 #if CONFIG_LIBGSM_MS_DECODER
@@ -149,5 +150,6 @@
     .decode         = libgsm_decode_frame,
     .flush          = libgsm_flush,
     .capabilities   = AV_CODEC_CAP_DR1,
+    .wrapper_name   = "libgsm",
 };
 #endif

diff --git a/libavcodec/libgsmenc.c b/libavcodec/libgsmenc.c
index e25db95..c9e7ba0 100644
--- a/libavcodec/libgsmenc.c
+++ b/libavcodec/libgsmenc.c

@@ -126,6 +126,7 @@
     .close          = libgsm_encode_close,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
+    .wrapper_name   = "libgsm",
 };
 #endif
 #if CONFIG_LIBGSM_MS_ENCODER
@@ -139,5 +140,6 @@
     .close          = libgsm_encode_close,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
+    .wrapper_name   = "libgsm",
 };
 #endif

diff --git a/libavcodec/libilbc.c b/libavcodec/libilbc.c
index c4c054f..9a56cc8 100644
--- a/libavcodec/libilbc.c
+++ b/libavcodec/libilbc.c

@@ -193,4 +193,5 @@
                                                      AV_SAMPLE_FMT_NONE },
     .defaults       = ilbc_encode_defaults,
     .priv_class     = &ilbc_enc_class,
+    .wrapper_name   = "libbilbc",
 };

diff --git a/libavcodec/libkvazaar.c b/libavcodec/libkvazaar.c
index f35b0df..5bc5b4e 100644
--- a/libavcodec/libkvazaar.c
+++ b/libavcodec/libkvazaar.c

@@ -207,7 +207,7 @@
               0
             };
             av_image_copy(input_pic->data, dst_linesizes,
-                          frame->data, frame->linesize,
+                          (const uint8_t **)frame->data, frame->linesize,
                           frame->format, frame->width, frame->height);
         }
 
@@ -231,7 +231,7 @@
         kvz_data_chunk *chunk = NULL;
         uint64_t written = 0;
 
-        retval = ff_alloc_packet(avpkt, len_out);
+        retval = ff_alloc_packet2(avctx, avpkt, len_out, len_out);
         if (retval < 0) {
             av_log(avctx, AV_LOG_ERROR, "Failed to allocate output packet.\n");
             goto done;
@@ -305,4 +305,6 @@
     .close            = libkvazaar_close,
 
     .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
+
+    .wrapper_name     = "libkvazaar",
 };

diff --git a/libavcodec/libmp3lame.c b/libavcodec/libmp3lame.c
index 5e26743..ecdd2e3 100644
--- a/libavcodec/libmp3lame.c
+++ b/libavcodec/libmp3lame.c

@@ -349,4 +349,5 @@
                                                   0 },
     .priv_class            = &libmp3lame_class,
     .defaults              = libmp3lame_defaults,
+    .wrapper_name          = "libmp3lame",
 };

diff --git a/libavcodec/libopencore-amr.c b/libavcodec/libopencore-amr.c
index 8545ffe..516f625 100644
--- a/libavcodec/libopencore-amr.c
+++ b/libavcodec/libopencore-amr.c

@@ -183,7 +183,10 @@
 };
 
 static const AVClass amrnb_class = {
-    "libopencore_amrnb", av_default_item_name, options, LIBAVUTIL_VERSION_INT
+    .class_name = "libopencore_amrnb",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
 static av_cold int amr_nb_encode_init(AVCodecContext *avctx)
@@ -375,6 +378,7 @@
     .close          = amr_wb_decode_close,
     .decode         = amr_wb_decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1,
+    .wrapper_name   = "libopencore_amrwb",
 };
 
 #endif /* CONFIG_LIBOPENCORE_AMRWB_DECODER */

diff --git a/libavcodec/libopenh264dec.c b/libavcodec/libopenh264dec.c
index d12e715..f9b91ce 100644
--- a/libavcodec/libopenh264dec.c
+++ b/libavcodec/libopenh264dec.c

@@ -95,8 +95,22 @@
     int ret, linesize[3];
     AVFrame *avframe = data;
     DECODING_STATE state;
+#if OPENH264_VER_AT_LEAST(1, 7)
+    int opt;
+#endif
 
-    state = (*s->decoder)->DecodeFrame2(s->decoder, avpkt->data, avpkt->size, ptrs, &info);
+    if (!avpkt->data) {
+#if OPENH264_VER_AT_LEAST(1, 9)
+        int end_of_stream = 1;
+        (*s->decoder)->SetOption(s->decoder, DECODER_OPTION_END_OF_STREAM, &end_of_stream);
+        state = (*s->decoder)->FlushFrame(s->decoder, ptrs, &info);
+#else
+        return 0;
+#endif
+    } else {
+        info.uiInBsTimeStamp = avpkt->pts;
+        state = (*s->decoder)->DecodeFrame2(s->decoder, avpkt->data, avpkt->size, ptrs, &info);
+    }
     if (state != dsErrorFree) {
         av_log(avctx, AV_LOG_ERROR, "DecodeFrame2 failed\n");
         return AVERROR_UNKNOWN;
@@ -120,13 +134,19 @@
     linesize[1] = linesize[2] = info.UsrData.sSystemBuffer.iStride[1];
     av_image_copy(avframe->data, avframe->linesize, (const uint8_t **) ptrs, linesize, avctx->pix_fmt, avctx->width, avctx->height);
 
-    avframe->pts     = avpkt->pts;
-    avframe->pkt_dts = avpkt->dts;
+    avframe->pts     = info.uiOutYuvTimeStamp;
+    avframe->pkt_dts = AV_NOPTS_VALUE;
 #if FF_API_PKT_PTS
 FF_DISABLE_DEPRECATION_WARNINGS
     avframe->pkt_pts = avpkt->pts;
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
+#if OPENH264_VER_AT_LEAST(1, 7)
+    (*s->decoder)->GetOption(s->decoder, DECODER_OPTION_PROFILE, &opt);
+    avctx->profile = opt;
+    (*s->decoder)->GetOption(s->decoder, DECODER_OPTION_LEVEL, &opt);
+    avctx->level = opt;
+#endif
 
     *got_frame = 1;
     return avpkt->size;
@@ -141,10 +161,9 @@
     .init           = svc_decode_init,
     .decode         = svc_decode_frame,
     .close          = svc_decode_close,
-    // The decoder doesn't currently support B-frames, and the decoder's API
-    // doesn't support reordering/delay, but the BSF could incur delay.
     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
     .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_THREADSAFE |
                       FF_CODEC_CAP_INIT_CLEANUP,
     .bsfs           = "h264_mp4toannexb",
+    .wrapper_name   = "libopenh264",
 };

diff --git a/libavcodec/libopenh264enc.c b/libavcodec/libopenh264enc.c
index 9c22bf4..83c3f0c 100644
--- a/libavcodec/libopenh264enc.c
+++ b/libavcodec/libopenh264enc.c

@@ -75,7 +75,10 @@
 };
 
 static const AVClass class = {
-    "libopenh264enc", av_default_item_name, options, LIBAVUTIL_VERSION_INT
+    .class_name = "libvo_amrwbenc",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
 static av_cold int svc_encode_close(AVCodecContext *avctx)
@@ -243,6 +246,10 @@
     sp.iPicWidth  = avctx->width;
     sp.iPicHeight = avctx->height;
 
+    if (frame->pict_type == AV_PICTURE_TYPE_I) {
+        (*s->encoder)->ForceIntraFrame(s->encoder, true);
+    }
+
     encoded = (*s->encoder)->EncodeFrame(s->encoder, &sp, &fbi);
     if (encoded != cmResultSuccess) {
         av_log(avctx, AV_LOG_ERROR, "EncodeFrame failed\n");
@@ -298,4 +305,5 @@
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P,
                                                     AV_PIX_FMT_NONE },
     .priv_class     = &class,
+    .wrapper_name   = "libopenh264",
 };

diff --git a/libavcodec/libopenjpegdec.c b/libavcodec/libopenjpegdec.c
index 67d47bd..344c5ba 100644
--- a/libavcodec/libopenjpegdec.c
+++ b/libavcodec/libopenjpegdec.c

@@ -34,27 +34,7 @@
 #include "internal.h"
 #include "thread.h"
 
-#if HAVE_OPENJPEG_2_3_OPENJPEG_H
-#  include <openjpeg-2.3/openjpeg.h>
-#elif HAVE_OPENJPEG_2_2_OPENJPEG_H
-#  include <openjpeg-2.2/openjpeg.h>
-#elif HAVE_OPENJPEG_2_1_OPENJPEG_H
-#  include <openjpeg-2.1/openjpeg.h>
-#elif HAVE_OPENJPEG_2_0_OPENJPEG_H
-#  include <openjpeg-2.0/openjpeg.h>
-#elif HAVE_OPENJPEG_1_5_OPENJPEG_H
-#  include <openjpeg-1.5/openjpeg.h>
-#else
-#  include <openjpeg.h>
-#endif
-
-#if HAVE_OPENJPEG_2_3_OPENJPEG_H || HAVE_OPENJPEG_2_2_OPENJPEG_H || HAVE_OPENJPEG_2_1_OPENJPEG_H || HAVE_OPENJPEG_2_0_OPENJPEG_H
-#  define OPENJPEG_MAJOR_VERSION 2
-#  define OPJ(x) OPJ_##x
-#else
-#  define OPENJPEG_MAJOR_VERSION 1
-#  define OPJ(x) x
-#endif
+#include <openjpeg.h>
 
 #define JP2_SIG_TYPE    0x6A502020
 #define JP2_SIG_VALUE   0x0D0A870A
@@ -65,6 +45,7 @@
                            AV_PIX_FMT_RGB48, AV_PIX_FMT_RGBA64
 
 #define GRAY_PIXEL_FORMATS AV_PIX_FMT_GRAY8, AV_PIX_FMT_YA8,                  \
+                           AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, \
                            AV_PIX_FMT_GRAY16, AV_PIX_FMT_YA16
 
 #define YUV_PIXEL_FORMATS  AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUVA420P, \
@@ -97,9 +78,6 @@
 typedef struct LibOpenJPEGContext {
     AVClass *class;
     opj_dparameters_t dec_params;
-#if OPENJPEG_MAJOR_VERSION == 1
-    opj_event_mgr_t event_mgr;
-#endif // OPENJPEG_MAJOR_VERSION == 1
     int lowqual;
 } LibOpenJPEGContext;
 
@@ -118,7 +96,6 @@
     av_log(data, AV_LOG_DEBUG, "%s", msg);
 }
 
-#if OPENJPEG_MAJOR_VERSION == 2
 typedef struct BufferReader {
     int pos;
     int size;
@@ -176,7 +153,6 @@
     reader->pos = (int)nb_bytes;
     return OPJ_TRUE;
 }
-#endif // OPENJPEG_MAJOR_VERSION == 2
 
 static inline int libopenjpeg_matches_pix_fmt(const opj_image_t *image, enum AVPixelFormat pix_fmt)
 {
@@ -221,15 +197,15 @@
     int possible_fmts_nb = 0;
 
     switch (image->color_space) {
-    case OPJ(CLRSPC_SRGB):
+    case OPJ_CLRSPC_SRGB:
         possible_fmts    = libopenjpeg_rgb_pix_fmts;
         possible_fmts_nb = FF_ARRAY_ELEMS(libopenjpeg_rgb_pix_fmts);
         break;
-    case OPJ(CLRSPC_GRAY):
+    case OPJ_CLRSPC_GRAY:
         possible_fmts    = libopenjpeg_gray_pix_fmts;
         possible_fmts_nb = FF_ARRAY_ELEMS(libopenjpeg_gray_pix_fmts);
         break;
-    case OPJ(CLRSPC_SYCC):
+    case OPJ_CLRSPC_SYCC:
         possible_fmts    = libopenjpeg_yuv_pix_fmts;
         possible_fmts_nb = FF_ARRAY_ELEMS(libopenjpeg_yuv_pix_fmts);
         break;
@@ -356,14 +332,9 @@
     int ispacked   = 0;
     int i;
     opj_image_t *image = NULL;
-#if OPENJPEG_MAJOR_VERSION == 1
-    opj_dinfo_t *dec = NULL;
-    opj_cio_t *stream = NULL;
-#else // OPENJPEG_MAJOR_VERSION == 2
     BufferReader reader = {0, avpkt->size, avpkt->data};
     opj_codec_t *dec = NULL;
     opj_stream_t *stream = NULL;
-#endif // OPENJPEG_MAJOR_VERSION == 1
 
     *got_frame = 0;
 
@@ -371,13 +342,13 @@
     if ((AV_RB32(buf) == 12) &&
         (AV_RB32(buf + 4) == JP2_SIG_TYPE) &&
         (AV_RB32(buf + 8) == JP2_SIG_VALUE)) {
-        dec = opj_create_decompress(OPJ(CODEC_JP2));
+        dec = opj_create_decompress(OPJ_CODEC_JP2);
     } else {
         /* If the AVPacket contains a jp2c box, then skip to
          * the starting byte of the codestream. */
         if (AV_RB32(buf + 4) == AV_RB32("jp2c"))
             buf += 8;
-        dec = opj_create_decompress(OPJ(CODEC_J2K));
+        dec = opj_create_decompress(OPJ_CODEC_J2K);
     }
 
     if (!dec) {
@@ -386,15 +357,6 @@
         goto done;
     }
 
-#if OPENJPEG_MAJOR_VERSION == 1
-    memset(&ctx->event_mgr, 0, sizeof(ctx->event_mgr));
-    ctx->event_mgr.info_handler    = info_callback;
-    ctx->event_mgr.error_handler   = error_callback;
-    ctx->event_mgr.warning_handler = warning_callback;
-    opj_set_event_mgr((opj_common_ptr) dec, &ctx->event_mgr, avctx);
-    ctx->dec_params.cp_limit_decoding = LIMIT_TO_MAIN_HEADER;
-    ctx->dec_params.cp_layer          = ctx->lowqual;
-#else // OPENJPEG_MAJOR_VERSION == 2
     if (!opj_set_error_handler(dec, error_callback, avctx) ||
         !opj_set_warning_handler(dec, warning_callback, avctx) ||
         !opj_set_info_handler(dec, info_callback, avctx)) {
@@ -405,16 +367,11 @@
 
     ctx->dec_params.cp_layer = ctx->lowqual;
     ctx->dec_params.cp_reduce = avctx->lowres;
-#endif // OPENJPEG_MAJOR_VERSION == 1
 
     // Tie decoder with decoding parameters
     opj_setup_decoder(dec, &ctx->dec_params);
 
-#if OPENJPEG_MAJOR_VERSION == 1
-    stream = opj_cio_open((opj_common_ptr) dec, buf, buf_size);
-#else // OPENJPEG_MAJOR_VERSION == 2
     stream = opj_stream_default_create(OPJ_STREAM_READ);
-#endif // OPENJPEG_MAJOR_VERSION == 1
 
     if (!stream) {
         av_log(avctx, AV_LOG_ERROR,
@@ -423,27 +380,13 @@
         goto done;
     }
 
-#if OPENJPEG_MAJOR_VERSION == 1
-    // Decode the header only.
-    image = opj_decode_with_info(dec, stream, NULL);
-    opj_cio_close(stream);
-    stream = NULL;
-    ret = !image;
-#else // OPENJPEG_MAJOR_VERSION == 2
     opj_stream_set_read_function(stream, stream_read);
     opj_stream_set_skip_function(stream, stream_skip);
     opj_stream_set_seek_function(stream, stream_seek);
-#if HAVE_OPENJPEG_2_3_OPENJPEG_H || HAVE_OPENJPEG_2_2_OPENJPEG_H || HAVE_OPENJPEG_2_1_OPENJPEG_H
     opj_stream_set_user_data(stream, &reader, NULL);
-#elif HAVE_OPENJPEG_2_0_OPENJPEG_H
-    opj_stream_set_user_data(stream, &reader);
-#else
-#error Missing call to opj_stream_set_user_data
-#endif
     opj_stream_set_user_data_length(stream, avpkt->size);
     // Decode the header only.
     ret = !opj_read_header(stream, dec, &image);
-#endif // OPENJPEG_MAJOR_VERSION == 1
 
     if (ret) {
         av_log(avctx, AV_LOG_ERROR, "Error decoding codestream header.\n");
@@ -477,25 +420,7 @@
     if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
         goto done;
 
-#if OPENJPEG_MAJOR_VERSION == 1
-    ctx->dec_params.cp_limit_decoding = NO_LIMITATION;
-    ctx->dec_params.cp_reduce = avctx->lowres;
-    // Tie decoder with decoding parameters.
-    opj_setup_decoder(dec, &ctx->dec_params);
-    stream = opj_cio_open((opj_common_ptr) dec, buf, buf_size);
-    if (!stream) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Codestream could not be opened for reading.\n");
-        ret = AVERROR_EXTERNAL;
-        goto done;
-    }
-    opj_image_destroy(image);
-    // Decode the codestream
-    image = opj_decode_with_info(dec, stream, NULL);
-    ret = !image;
-#else // OPENJPEG_MAJOR_VERSION == 2
     ret = !opj_decode(dec, stream, image);
-#endif // OPENJPEG_MAJOR_VERSION == 1
 
     if (ret) {
         av_log(avctx, AV_LOG_ERROR, "Error decoding codestream.\n");
@@ -556,25 +481,11 @@
 
 done:
     opj_image_destroy(image);
-#if OPENJPEG_MAJOR_VERSION == 2
     opj_stream_destroy(stream);
     opj_destroy_codec(dec);
-#else
-    opj_cio_close(stream);
-    opj_destroy_decompress(dec);
-#endif
     return ret;
 }
 
-static av_cold void libopenjpeg_static_init(AVCodec *codec)
-{
-    const char *version = opj_version();
-    int major, minor;
-
-    if (sscanf(version, "%d.%d", &major, &minor) == 2 && 1000*major + minor <= 1003)
-        codec->capabilities |= AV_CODEC_CAP_EXPERIMENTAL;
-}
-
 #define OFFSET(x) offsetof(LibOpenJPEGContext, x)
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 
@@ -602,5 +513,5 @@
     .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .max_lowres     = 31,
     .priv_class     = &openjpeg_class,
-    .init_static_data = libopenjpeg_static_init,
+    .wrapper_name   = "libopenjpeg",
 };

diff --git a/libavcodec/libopenjpegenc.c b/libavcodec/libopenjpegenc.c
index 92b4433..1998008 100644
--- a/libavcodec/libopenjpegenc.c
+++ b/libavcodec/libopenjpegenc.c

@@ -31,38 +31,11 @@
 #include "libavutil/opt.h"
 #include "avcodec.h"
 #include "internal.h"
-
-#if HAVE_OPENJPEG_2_3_OPENJPEG_H
-#  include <openjpeg-2.3/openjpeg.h>
-#elif HAVE_OPENJPEG_2_2_OPENJPEG_H
-#  include <openjpeg-2.2/openjpeg.h>
-#elif HAVE_OPENJPEG_2_1_OPENJPEG_H
-#  include <openjpeg-2.1/openjpeg.h>
-#elif HAVE_OPENJPEG_2_0_OPENJPEG_H
-#  include <openjpeg-2.0/openjpeg.h>
-#elif HAVE_OPENJPEG_1_5_OPENJPEG_H
-#  include <openjpeg-1.5/openjpeg.h>
-#else
-#  include <openjpeg.h>
-#endif
-
-#if HAVE_OPENJPEG_2_3_OPENJPEG_H || HAVE_OPENJPEG_2_2_OPENJPEG_H || HAVE_OPENJPEG_2_1_OPENJPEG_H || HAVE_OPENJPEG_2_0_OPENJPEG_H
-#  define OPENJPEG_MAJOR_VERSION 2
-#  define OPJ(x) OPJ_##x
-#else
-#  define OPENJPEG_MAJOR_VERSION 1
-#  define OPJ(x) x
-#endif
+#include <openjpeg.h>
 
 typedef struct LibOpenJPEGContext {
     AVClass *avclass;
-#if OPENJPEG_MAJOR_VERSION == 1
-    opj_image_t *image;
-#endif // OPENJPEG_MAJOR_VERSION == 1
     opj_cparameters_t enc_params;
-#if OPENJPEG_MAJOR_VERSION == 1
-    opj_event_mgr_t event_mgr;
-#endif // OPENJPEG_MAJOR_VERSION == 1
     int format;
     int profile;
     int prog_order;
@@ -88,7 +61,6 @@
     av_log(data, AV_LOG_DEBUG, "%s\n", msg);
 }
 
-#if OPENJPEG_MAJOR_VERSION == 2
 typedef struct PacketWriter {
     int pos;
     AVPacket *packet;
@@ -158,7 +130,6 @@
     writer->pos = (int)nb_bytes;
     return OPJ_TRUE;
 }
-#endif // OPENJPEG_MAJOR_VERSION == 2
 
 static void cinema_parameters(opj_cparameters_t *p)
 {
@@ -182,7 +153,7 @@
     p->csty |= 0x01;
 
     /* The progression order shall be CPRL */
-    p->prog_order = OPJ(CPRL);
+    p->prog_order = OPJ_CPRL;
 
     /* No ROI */
     p->roi_compno = -1;
@@ -206,7 +177,7 @@
     int sub_dx[4];
     int sub_dy[4];
     int numcomps;
-    OPJ_COLOR_SPACE color_space = OPJ(CLRSPC_UNKNOWN);
+    OPJ_COLOR_SPACE color_space = OPJ_CLRSPC_UNKNOWN;
 
     sub_dx[0] = sub_dx[3] = 1;
     sub_dy[0] = sub_dy[3] = 1;
@@ -218,9 +189,12 @@
     switch (avctx->pix_fmt) {
     case AV_PIX_FMT_GRAY8:
     case AV_PIX_FMT_YA8:
+    case AV_PIX_FMT_GRAY10:
+    case AV_PIX_FMT_GRAY12:
+    case AV_PIX_FMT_GRAY14:
     case AV_PIX_FMT_GRAY16:
     case AV_PIX_FMT_YA16:
-        color_space = OPJ(CLRSPC_GRAY);
+        color_space = OPJ_CLRSPC_GRAY;
         break;
     case AV_PIX_FMT_RGB24:
     case AV_PIX_FMT_RGBA:
@@ -233,7 +207,7 @@
     case AV_PIX_FMT_GBRP14:
     case AV_PIX_FMT_GBRP16:
     case AV_PIX_FMT_XYZ12:
-        color_space = OPJ(CLRSPC_SRGB);
+        color_space = OPJ_CLRSPC_SRGB;
         break;
     case AV_PIX_FMT_YUV410P:
     case AV_PIX_FMT_YUV411P:
@@ -268,7 +242,7 @@
     case AV_PIX_FMT_YUVA420P16:
     case AV_PIX_FMT_YUVA422P16:
     case AV_PIX_FMT_YUVA444P16:
-        color_space = OPJ(CLRSPC_SYCC);
+        color_space = OPJ_CLRSPC_SYCC;
         break;
     default:
         av_log(avctx, AV_LOG_ERROR,
@@ -309,7 +283,6 @@
 
     opj_set_default_encoder_parameters(&ctx->enc_params);
 
-#if HAVE_OPENJPEG_2_3_OPENJPEG_H || HAVE_OPENJPEG_2_2_OPENJPEG_H || HAVE_OPENJPEG_2_1_OPENJPEG_H
     switch (ctx->cinema_mode) {
     case OPJ_CINEMA2K_24:
         ctx->enc_params.rsiz = OPJ_PROFILE_CINEMA_2K;
@@ -348,12 +321,8 @@
     if (err) {
         av_log(avctx, AV_LOG_ERROR,
                "Invalid parameter pairing: cinema_mode and profile conflict.\n");
-        goto fail;
+        return err;
     }
-#else
-    ctx->enc_params.cp_rsiz = ctx->profile;
-    ctx->enc_params.cp_cinema = ctx->cinema_mode;
-#endif
 
     if (!ctx->numresolution) {
         ctx->numresolution = 6;
@@ -373,23 +342,7 @@
         cinema_parameters(&ctx->enc_params);
     }
 
-#if OPENJPEG_MAJOR_VERSION == 1
-    ctx->image = mj2_create_image(avctx, &ctx->enc_params);
-    if (!ctx->image) {
-        av_log(avctx, AV_LOG_ERROR, "Error creating the mj2 image\n");
-        err = AVERROR(EINVAL);
-        goto fail;
-    }
-#endif // OPENJPEG_MAJOR_VERSION == 1
-
     return 0;
-
-fail:
-#if OPENJPEG_MAJOR_VERSION == 1
-    opj_image_destroy(ctx->image);
-    ctx->image = NULL;
-#endif // OPENJPEG_MAJOR_VERSION == 1
-    return err;
 }
 
 static int libopenjpeg_copy_packed8(AVCodecContext *avctx, const AVFrame *frame, opj_image_t *image)
@@ -602,12 +555,6 @@
     int ret;
     AVFrame *gbrframe;
     int cpyresult = 0;
-#if OPENJPEG_MAJOR_VERSION == 1
-    opj_image_t *image      = ctx->image;
-    opj_cinfo_t *compress   = NULL;
-    opj_cio_t *stream       = NULL;
-    int len;
-#else // OPENJPEG_MAJOR_VERSION == 2
     PacketWriter writer     = { 0 };
     opj_codec_t *compress   = NULL;
     opj_stream_t *stream    = NULL;
@@ -617,7 +564,6 @@
         ret = AVERROR(EINVAL);
         goto done;
     }
-#endif // OPENJPEG_MAJOR_VERSION == 1
 
     switch (avctx->pix_fmt) {
     case AV_PIX_FMT_RGB24:
@@ -669,6 +615,9 @@
     case AV_PIX_FMT_YUVA444P:
         cpyresult = libopenjpeg_copy_unpacked8(avctx, frame, image);
         break;
+    case AV_PIX_FMT_GRAY10:
+    case AV_PIX_FMT_GRAY12:
+    case AV_PIX_FMT_GRAY14:
     case AV_PIX_FMT_GRAY16:
     case AV_PIX_FMT_YUV420P9:
     case AV_PIX_FMT_YUV422P9:
@@ -712,11 +661,9 @@
         goto done;
     }
 
-#if OPENJPEG_MAJOR_VERSION == 2
     if ((ret = ff_alloc_packet2(avctx, pkt, 1024, 0)) < 0) {
         goto done;
     }
-#endif // OPENJPEG_MAJOR_VERSION == 2
 
     compress = opj_create_compress(ctx->format);
     if (!compress) {
@@ -725,10 +672,6 @@
         goto done;
     }
 
-#if OPENJPEG_MAJOR_VERSION == 1
-    opj_setup_encoder(compress, &ctx->enc_params, image);
-    stream = opj_cio_open((opj_common_ptr) compress, NULL, 0);
-#else // OPENJPEG_MAJOR_VERSION == 2
     if (!opj_set_error_handler(compress, error_callback, avctx) ||
         !opj_set_warning_handler(compress, warning_callback, avctx) ||
         !opj_set_info_handler(compress, info_callback, avctx)) {
@@ -743,43 +686,18 @@
         goto done;
     }
     stream = opj_stream_default_create(OPJ_STREAM_WRITE);
-#endif // OPENJPEG_MAJOR_VERSION == 1
 
     if (!stream) {
         av_log(avctx, AV_LOG_ERROR, "Error creating the cio stream\n");
         ret = AVERROR(ENOMEM);
         goto done;
     }
-#if OPENJPEG_MAJOR_VERSION == 1
-    memset(&ctx->event_mgr, 0, sizeof(ctx->event_mgr));
-    ctx->event_mgr.info_handler    = info_callback;
-    ctx->event_mgr.error_handler   = error_callback;
-    ctx->event_mgr.warning_handler = warning_callback;
-    opj_set_event_mgr((opj_common_ptr) compress, &ctx->event_mgr, avctx);
-    if (!opj_encode(compress, stream, image, NULL)) {
-        av_log(avctx, AV_LOG_ERROR, "Error during the opj encode\n");
-        ret = AVERROR_EXTERNAL;
-        goto done;
-    }
 
-    len = cio_tell(stream);
-    if ((ret = ff_alloc_packet2(avctx, pkt, len, 0)) < 0) {
-        goto done;
-    }
-
-    memcpy(pkt->data, stream->buffer, len);
-#else // OPENJPEG_MAJOR_VERSION == 2
     writer.packet = pkt;
     opj_stream_set_write_function(stream, stream_write);
     opj_stream_set_skip_function(stream, stream_skip);
     opj_stream_set_seek_function(stream, stream_seek);
-#if HAVE_OPENJPEG_2_3_OPENJPEG_H || HAVE_OPENJPEG_2_2_OPENJPEG_H || HAVE_OPENJPEG_2_1_OPENJPEG_H
     opj_stream_set_user_data(stream, &writer, NULL);
-#elif HAVE_OPENJPEG_2_0_OPENJPEG_H
-    opj_stream_set_user_data(stream, &writer);
-#else
-#error Missing call to opj_stream_set_user_data
-#endif
 
     if (!opj_start_compress(compress, image, stream) ||
         !opj_encode(compress, stream) ||
@@ -790,56 +708,39 @@
     }
 
     av_shrink_packet(pkt, writer.pos);
-#endif // OPENJPEG_MAJOR_VERSION == 1
 
     pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;
     ret = 0;
 
 done:
-#if OPENJPEG_MAJOR_VERSION == 2
     opj_stream_destroy(stream);
     opj_destroy_codec(compress);
     opj_image_destroy(image);
-#else
-    opj_cio_close(stream);
-    opj_destroy_compress(compress);
-#endif
     return ret;
 }
 
-static av_cold int libopenjpeg_encode_close(AVCodecContext *avctx)
-{
-#if OPENJPEG_MAJOR_VERSION == 1
-    LibOpenJPEGContext *ctx = avctx->priv_data;
-
-    opj_image_destroy(ctx->image);
-    ctx->image = NULL;
-#endif // OPENJPEG_MAJOR_VERSION == 1
-    return 0;
-}
-
 #define OFFSET(x) offsetof(LibOpenJPEGContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "format",        "Codec Format",      OFFSET(format),        AV_OPT_TYPE_INT,   { .i64 = OPJ(CODEC_JP2)   }, OPJ(CODEC_J2K), OPJ(CODEC_JP2),   VE, "format"      },
-    { "j2k",           NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CODEC_J2K)   }, 0,         0,           VE, "format"      },
-    { "jp2",           NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CODEC_JP2)   }, 0,         0,           VE, "format"      },
-    { "profile",       NULL,                OFFSET(profile),       AV_OPT_TYPE_INT,   { .i64 = OPJ(STD_RSIZ)    }, OPJ(STD_RSIZ),  OPJ(CINEMA4K),    VE, "profile"     },
-    { "jpeg2000",      NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(STD_RSIZ)    }, 0,         0,           VE, "profile"     },
-    { "cinema2k",      NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CINEMA2K)    }, 0,         0,           VE, "profile"     },
-    { "cinema4k",      NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CINEMA4K)    }, 0,         0,           VE, "profile"     },
-    { "cinema_mode",   "Digital Cinema",    OFFSET(cinema_mode),   AV_OPT_TYPE_INT,   { .i64 = OPJ(OFF)         }, OPJ(OFF),       OPJ(CINEMA4K_24), VE, "cinema_mode" },
-    { "off",           NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(OFF)         }, 0,         0,           VE, "cinema_mode" },
-    { "2k_24",         NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CINEMA2K_24) }, 0,         0,           VE, "cinema_mode" },
-    { "2k_48",         NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CINEMA2K_48) }, 0,         0,           VE, "cinema_mode" },
-    { "4k_24",         NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CINEMA4K_24) }, 0,         0,           VE, "cinema_mode" },
-    { "prog_order",    "Progression Order", OFFSET(prog_order),    AV_OPT_TYPE_INT,   { .i64 = OPJ(LRCP)    }, OPJ(LRCP),  OPJ(CPRL),    VE, "prog_order"  },
-    { "lrcp",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(LRCP)    }, 0,         0,           VE, "prog_order"  },
-    { "rlcp",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(RLCP)    }, 0,         0,           VE, "prog_order"  },
-    { "rpcl",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(RPCL)    }, 0,         0,           VE, "prog_order"  },
-    { "pcrl",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(PCRL)    }, 0,         0,           VE, "prog_order"  },
-    { "cprl",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CPRL)    }, 0,         0,           VE, "prog_order"  },
+    { "format",        "Codec Format",      OFFSET(format),        AV_OPT_TYPE_INT,   { .i64 = OPJ_CODEC_JP2   }, OPJ_CODEC_J2K, OPJ_CODEC_JP2,   VE, "format"      },
+    { "j2k",           NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ_CODEC_J2K   }, 0,         0,           VE, "format"      },
+    { "jp2",           NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ_CODEC_JP2   }, 0,         0,           VE, "format"      },
+    { "profile",       NULL,                OFFSET(profile),       AV_OPT_TYPE_INT,   { .i64 = OPJ_STD_RSIZ    }, OPJ_STD_RSIZ,  OPJ_CINEMA4K,    VE, "profile"     },
+    { "jpeg2000",      NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ_STD_RSIZ    }, 0,         0,           VE, "profile"     },
+    { "cinema2k",      NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ_CINEMA2K    }, 0,         0,           VE, "profile"     },
+    { "cinema4k",      NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ_CINEMA4K    }, 0,         0,           VE, "profile"     },
+    { "cinema_mode",   "Digital Cinema",    OFFSET(cinema_mode),   AV_OPT_TYPE_INT,   { .i64 = OPJ_OFF         }, OPJ_OFF,       OPJ_CINEMA4K_24, VE, "cinema_mode" },
+    { "off",           NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ_OFF         }, 0,         0,           VE, "cinema_mode" },
+    { "2k_24",         NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ_CINEMA2K_24 }, 0,         0,           VE, "cinema_mode" },
+    { "2k_48",         NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ_CINEMA2K_48 }, 0,         0,           VE, "cinema_mode" },
+    { "4k_24",         NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ_CINEMA4K_24 }, 0,         0,           VE, "cinema_mode" },
+    { "prog_order",    "Progression Order", OFFSET(prog_order),    AV_OPT_TYPE_INT,   { .i64 = OPJ_LRCP    }, OPJ_LRCP,  OPJ_CPRL,    VE, "prog_order"  },
+    { "lrcp",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ_LRCP    }, 0,         0,           VE, "prog_order"  },
+    { "rlcp",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ_RLCP    }, 0,         0,           VE, "prog_order"  },
+    { "rpcl",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ_RPCL    }, 0,         0,           VE, "prog_order"  },
+    { "pcrl",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ_PCRL    }, 0,         0,           VE, "prog_order"  },
+    { "cprl",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ_CPRL    }, 0,         0,           VE, "prog_order"  },
     { "numresolution", NULL,                OFFSET(numresolution), AV_OPT_TYPE_INT,   { .i64 = 6            }, 0,         33,          VE                },
     { "irreversible",  NULL,                OFFSET(irreversible),  AV_OPT_TYPE_INT,   { .i64 = 0            }, 0,         1,           VE                },
     { "disto_alloc",   NULL,                OFFSET(disto_alloc),   AV_OPT_TYPE_INT,   { .i64 = 1            }, 0,         1,           VE                },
@@ -862,13 +763,13 @@
     .priv_data_size = sizeof(LibOpenJPEGContext),
     .init           = libopenjpeg_encode_init,
     .encode2        = libopenjpeg_encode_frame,
-    .close          = libopenjpeg_encode_close,
     .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA, AV_PIX_FMT_RGB48,
         AV_PIX_FMT_RGBA64, AV_PIX_FMT_GBR24P,
         AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
         AV_PIX_FMT_GRAY8, AV_PIX_FMT_YA8, AV_PIX_FMT_GRAY16, AV_PIX_FMT_YA16,
+        AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14,
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P,
         AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVA422P,
         AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUVA444P,
@@ -884,4 +785,5 @@
         AV_PIX_FMT_NONE
     },
     .priv_class     = &openjpeg_class,
+    .wrapper_name   = "libopenjpeg",
 };

diff --git a/libavcodec/libopusdec.c b/libavcodec/libopusdec.c
index e6ca61a..2a97811 100644
--- a/libavcodec/libopusdec.c
+++ b/libavcodec/libopusdec.c

@@ -24,6 +24,8 @@
 
 #include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/ffmath.h"
+#include "libavutil/opt.h"
 
 #include "avcodec.h"
 #include "internal.h"
@@ -32,11 +34,15 @@
 #include "libopus.h"
 
 struct libopus_context {
+    AVClass *class;
     OpusMSDecoder *dec;
     int pre_skip;
 #ifndef OPUS_SET_GAIN
     union { int i; double d; } gain;
 #endif
+#ifdef OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST
+    int apply_phase_inv;
+#endif
 };
 
 #define OPUS_HEAD_SIZE 19
@@ -57,8 +63,6 @@
     avc->sample_rate    = 48000;
     avc->sample_fmt     = avc->request_sample_fmt == AV_SAMPLE_FMT_FLT ?
                           AV_SAMPLE_FMT_FLT : AV_SAMPLE_FMT_S16;
-    avc->channel_layout = avc->channels > 8 ? 0 :
-                          ff_vorbis_channel_layouts[avc->channels - 1];
 
     if (avc->extradata_size >= OPUS_HEAD_SIZE) {
         opus->pre_skip = AV_RL16(avc->extradata + 10);
@@ -82,14 +86,35 @@
         mapping    = mapping_arr;
     }
 
-    if (avc->channels > 2 && avc->channels <= 8) {
-        const uint8_t *vorbis_offset = ff_vorbis_channel_layout_offsets[avc->channels - 1];
-        int ch;
+    if (channel_map == 1) {
+        avc->channel_layout = avc->channels > 8 ? 0 :
+                              ff_vorbis_channel_layouts[avc->channels - 1];
+        if (avc->channels > 2 && avc->channels <= 8) {
+            const uint8_t *vorbis_offset = ff_vorbis_channel_layout_offsets[avc->channels - 1];
+            int ch;
 
-        /* Remap channels from Vorbis order to ffmpeg order */
-        for (ch = 0; ch < avc->channels; ch++)
-            mapping_arr[ch] = mapping[vorbis_offset[ch]];
-        mapping = mapping_arr;
+            /* Remap channels from Vorbis order to ffmpeg order */
+            for (ch = 0; ch < avc->channels; ch++)
+                mapping_arr[ch] = mapping[vorbis_offset[ch]];
+            mapping = mapping_arr;
+        }
+    } else if (channel_map == 2) {
+        int ambisonic_order = ff_sqrt(avc->channels) - 1;
+        if (avc->channels != (ambisonic_order + 1) * (ambisonic_order + 1) &&
+            avc->channels != (ambisonic_order + 1) * (ambisonic_order + 1) + 2) {
+            av_log(avc, AV_LOG_ERROR,
+                   "Channel mapping 2 is only specified for channel counts"
+                   " which can be written as (n + 1)^2 or (n + 2)^2 + 2"
+                   " for nonnegative integer n\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if (avc->channels > 227) {
+            av_log(avc, AV_LOG_ERROR, "Too many channels\n");
+            return AVERROR_INVALIDDATA;
+        }
+        avc->channel_layout = 0;
+    } else {
+        avc->channel_layout = 0;
     }
 
     opus->dec = opus_multistream_decoder_create(avc->sample_rate, avc->channels,
@@ -116,6 +141,15 @@
     }
 #endif
 
+#ifdef OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST
+    ret = opus_multistream_decoder_ctl(opus->dec,
+                                       OPUS_SET_PHASE_INVERSION_DISABLED(!opus->apply_phase_inv));
+    if (ret != OPUS_OK)
+        av_log(avc, AV_LOG_WARNING,
+               "Unable to set phase inversion: %s\n",
+               opus_strerror(ret));
+#endif
+
     /* Decoder delay (in samples) at 48kHz */
     avc->delay = avc->internal->skip_samples = opus->pre_skip;
 
@@ -126,7 +160,10 @@
 {
     struct libopus_context *opus = avc->priv_data;
 
-    opus_multistream_decoder_destroy(opus->dec);
+    if (opus->dec) {
+        opus_multistream_decoder_destroy(opus->dec);
+        opus->dec = NULL;
+    }
     return 0;
 }
 
@@ -189,6 +226,24 @@
     avc->internal->skip_samples = opus->pre_skip;
 }
 
+
+#define OFFSET(x) offsetof(struct libopus_context, x)
+#define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM
+static const AVOption libopusdec_options[] = {
+#ifdef OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST
+    { "apply_phase_inv", "Apply intensity stereo phase inversion", OFFSET(apply_phase_inv), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
+#endif
+    { NULL },
+};
+
+static const AVClass libopusdec_class = {
+    .class_name = "libopusdec",
+    .item_name  = av_default_item_name,
+    .option     = libopusdec_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+
 AVCodec ff_libopus_decoder = {
     .name           = "libopus",
     .long_name      = NULL_IF_CONFIG_SMALL("libopus Opus"),
@@ -200,7 +255,10 @@
     .decode         = libopus_decode,
     .flush          = libopus_flush,
     .capabilities   = AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
                                                      AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
+    .priv_class     = &libopusdec_class,
+    .wrapper_name   = "libopus",
 };

diff --git a/libavcodec/libopusenc.c b/libavcodec/libopusenc.c
index 3d88c29..7c025a6 100644
--- a/libavcodec/libopusenc.c
+++ b/libavcodec/libopusenc.c

@@ -39,6 +39,9 @@
     int packet_size;
     int max_bandwidth;
     int mapping_family;
+#ifdef OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST
+    int apply_phase_inv;
+#endif
 } LibopusEncOpts;
 
 typedef struct LibopusEncContext {
@@ -154,6 +157,14 @@
                    "Unable to set maximum bandwidth: %s\n", opus_strerror(ret));
     }
 
+#ifdef OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST
+    ret = opus_multistream_encoder_ctl(enc,
+                                       OPUS_SET_PHASE_INVERSION_DISABLED(!opts->apply_phase_inv));
+    if (ret != OPUS_OK)
+        av_log(avctx, AV_LOG_WARNING,
+               "Unable to set phase inversion: %s\n",
+               opus_strerror(ret));
+#endif
     return OPUS_OK;
 }
 
@@ -260,12 +271,22 @@
     case 960:
     case 1920:
     case 2880:
+#ifdef OPUS_FRAMESIZE_120_MS
+    case 3840:
+    case 4800:
+    case 5760:
+#endif
         opus->opts.packet_size =
         avctx->frame_size      = frame_size * avctx->sample_rate / 48000;
         break;
     default:
         av_log(avctx, AV_LOG_ERROR, "Invalid frame duration: %g.\n"
-               "Frame duration must be exactly one of: 2.5, 5, 10, 20, 40 or 60.\n",
+               "Frame duration must be exactly one of: 2.5, 5, 10, 20, 40"
+#ifdef OPUS_FRAMESIZE_120_MS
+               ", 60, 80, 100 or 120.\n",
+#else
+               " or 60.\n",
+#endif
                opus->opts.frame_duration);
         return AVERROR(EINVAL);
     }
@@ -452,10 +473,10 @@
         memset(audio, 0, opus->opts.packet_size * sample_size);
     }
 
-    /* Maximum packet size taken from opusenc in opus-tools. 60ms packets
-     * consist of 3 frames in one packet. The maximum frame size is 1275
+    /* Maximum packet size taken from opusenc in opus-tools. 120ms packets
+     * consist of 6 frames in one packet. The maximum frame size is 1275
      * bytes along with the largest possible packet header of 7 bytes. */
-    if ((ret = ff_alloc_packet2(avctx, avpkt, (1275 * 3 + 7) * opus->stream_count, 0)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, (1275 * 6 + 7) * opus->stream_count, 0)) < 0)
         return ret;
 
     if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT)
@@ -523,13 +544,16 @@
         { "voip",           "Favor improved speech intelligibility",   0, AV_OPT_TYPE_CONST, { .i64 = OPUS_APPLICATION_VOIP },                0, 0, FLAGS, "application" },
         { "audio",          "Favor faithfulness to the input",         0, AV_OPT_TYPE_CONST, { .i64 = OPUS_APPLICATION_AUDIO },               0, 0, FLAGS, "application" },
         { "lowdelay",       "Restrict to only the lowest delay modes", 0, AV_OPT_TYPE_CONST, { .i64 = OPUS_APPLICATION_RESTRICTED_LOWDELAY }, 0, 0, FLAGS, "application" },
-    { "frame_duration", "Duration of a frame in milliseconds", OFFSET(frame_duration), AV_OPT_TYPE_FLOAT, { .dbl = 20.0 }, 2.5, 60.0, FLAGS },
+    { "frame_duration", "Duration of a frame in milliseconds", OFFSET(frame_duration), AV_OPT_TYPE_FLOAT, { .dbl = 20.0 }, 2.5, 120.0, FLAGS },
     { "packet_loss",    "Expected packet loss percentage",     OFFSET(packet_loss),    AV_OPT_TYPE_INT,   { .i64 = 0 },    0,   100,  FLAGS },
     { "vbr",            "Variable bit rate mode",              OFFSET(vbr),            AV_OPT_TYPE_INT,   { .i64 = 1 },    0,   2,    FLAGS, "vbr" },
         { "off",            "Use constant bit rate", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "vbr" },
         { "on",             "Use variable bit rate", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "vbr" },
         { "constrained",    "Use constrained VBR",   0, AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0, FLAGS, "vbr" },
     { "mapping_family", "Channel Mapping Family",              OFFSET(mapping_family), AV_OPT_TYPE_INT,   { .i64 = -1 },   -1,  255,  FLAGS, "mapping_family" },
+#ifdef OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST
+    { "apply_phase_inv", "Apply intensity stereo phase inversion", OFFSET(apply_phase_inv), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
+#endif
     { NULL },
 };
 
@@ -566,4 +590,5 @@
     .supported_samplerates = libopus_sample_rates,
     .priv_class      = &libopus_class,
     .defaults        = libopus_defaults,
+    .wrapper_name    = "libopus",
 };

diff --git a/libavcodec/librsvgdec.c b/libavcodec/librsvgdec.c
index 77c7710..6697785 100644
--- a/libavcodec/librsvgdec.c
+++ b/libavcodec/librsvgdec.c

@@ -82,8 +82,10 @@
 
     crender = cairo_create(image);
 
-    cairo_set_source_rgba(crender, 0.0, 0.0, 0.0, 1.0f);
-    cairo_paint_with_alpha(crender, 0.0f);
+    cairo_save(crender);
+    cairo_set_operator(crender, CAIRO_OPERATOR_CLEAR);
+    cairo_paint(crender);
+    cairo_restore(crender);
 
     cairo_scale(crender, dimensions.width / (double)unscaled_dimensions.width,
                 dimensions.height / (double)unscaled_dimensions.height);
@@ -124,4 +126,5 @@
     .decode         = librsvg_decode_frame,
     .priv_data_size = sizeof(LibRSVGContext),
     .capabilities   = AV_CODEC_CAP_LOSSLESS | AV_CODEC_CAP_DR1,
+    .wrapper_name    = "librsvg",
 };

diff --git a/libavcodec/libshine.c b/libavcodec/libshine.c
index f4cf598..7056fcd 100644
--- a/libavcodec/libshine.c
+++ b/libavcodec/libshine.c

@@ -146,4 +146,5 @@
     .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
                                                   AV_CH_LAYOUT_STEREO,
                                                   0 },
+    .wrapper_name          = "libshine",
 };

diff --git a/libavcodec/libspeexdec.c b/libavcodec/libspeexdec.c
index 044883a..d67c68c 100644
--- a/libavcodec/libspeexdec.c
+++ b/libavcodec/libspeexdec.c

@@ -200,4 +200,5 @@
     .decode         = libspeex_decode_frame,
     .flush          = libspeex_decode_flush,
     .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
+    .wrapper_name   = "libspeex",
 };

diff --git a/libavcodec/libspeexenc.c b/libavcodec/libspeexenc.c
index a2b07a4..6a37dbc 100644
--- a/libavcodec/libspeexenc.c
+++ b/libavcodec/libspeexenc.c

@@ -159,9 +159,9 @@
 
     /* sample rate and encoding mode */
     switch (avctx->sample_rate) {
-    case  8000: mode = &speex_nb_mode;  break;
-    case 16000: mode = &speex_wb_mode;  break;
-    case 32000: mode = &speex_uwb_mode; break;
+    case  8000: mode = speex_lib_get_mode(SPEEX_MODEID_NB);  break;
+    case 16000: mode = speex_lib_get_mode(SPEEX_MODEID_WB);  break;
+    case 32000: mode = speex_lib_get_mode(SPEEX_MODEID_UWB); break;
     default:
         av_log(avctx, AV_LOG_ERROR, "Sample rate of %d Hz is not supported. "
                "Resample to 8, 16, or 32 kHz.\n", avctx->sample_rate);
@@ -365,4 +365,5 @@
     .supported_samplerates = (const int[]){ 8000, 16000, 32000, 0 },
     .priv_class     = &speex_class,
     .defaults       = defaults,
+    .wrapper_name   = "libspeex",
 };

diff --git a/libavcodec/libtheoraenc.c b/libavcodec/libtheoraenc.c
index fae55e8..16966ed 100644
--- a/libavcodec/libtheoraenc.c
+++ b/libavcodec/libtheoraenc.c

@@ -208,7 +208,9 @@
         av_log(avc_context, AV_LOG_ERROR, "Unsupported pix_fmt\n");
         return AVERROR(EINVAL);
     }
-    avcodec_get_chroma_sub_sample(avc_context->pix_fmt, &h->uv_hshift, &h->uv_vshift);
+    ret = av_pix_fmt_get_chroma_sub_sample(avc_context->pix_fmt, &h->uv_hshift, &h->uv_vshift);
+    if (ret)
+        return ret;
 
     if (avc_context->flags & AV_CODEC_FLAG_QSCALE) {
         /* Clip global_quality in QP units to the [0 - 10] range
@@ -383,4 +385,5 @@
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_NONE
     },
+    .wrapper_name   = "libtheora",
 };

diff --git a/libavcodec/libtwolame.c b/libavcodec/libtwolame.c
index 12d71e7..030f888 100644
--- a/libavcodec/libtwolame.c
+++ b/libavcodec/libtwolame.c

@@ -226,4 +226,5 @@
         AV_CH_LAYOUT_STEREO,
         0 },
     .supported_samplerates = twolame_samplerates,
+    .wrapper_name   = "libtwolame",
 };

diff --git a/libavcodec/libvo-amrwbenc.c b/libavcodec/libvo-amrwbenc.c
index 2a15650..77d0cce 100644
--- a/libavcodec/libvo-amrwbenc.c
+++ b/libavcodec/libvo-amrwbenc.c

@@ -46,7 +46,10 @@
 };
 
 static const AVClass amrwb_class = {
-    "libvo_amrwbenc", av_default_item_name, options, LIBAVUTIL_VERSION_INT
+    .class_name = "libvo_amrwbenc",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
 static int get_wb_bitrate_mode(int bitrate, void *log_ctx)
@@ -149,4 +152,5 @@
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
     .priv_class     = &amrwb_class,
+    .wrapper_name   = "libvo_amrwbenc",
 };

diff --git a/libavcodec/libvorbisenc.c b/libavcodec/libvorbisenc.c
index 3ca5b55..f78f872 100644
--- a/libavcodec/libvorbisenc.c
+++ b/libavcodec/libvorbisenc.c

@@ -377,4 +377,5 @@
                                                       AV_SAMPLE_FMT_NONE },
     .priv_class     = &vorbis_class,
     .defaults       = defaults,
+    .wrapper_name   = "libvorbis",
 };

diff --git a/libavcodec/libvpx.c b/libavcodec/libvpx.c
index 1eca97a..cc055a0 100644
--- a/libavcodec/libvpx.c
+++ b/libavcodec/libvpx.c

@@ -40,9 +40,7 @@
     AV_PIX_FMT_YUV422P,
     AV_PIX_FMT_YUV440P,
     AV_PIX_FMT_YUV444P,
-#if VPX_IMAGE_ABI_VERSION >= 3
     AV_PIX_FMT_GBRP,
-#endif
     AV_PIX_FMT_NONE
 };
 
@@ -60,88 +58,23 @@
     AV_PIX_FMT_YUV422P12,
     AV_PIX_FMT_YUV440P12,
     AV_PIX_FMT_YUV444P12,
-#if VPX_IMAGE_ABI_VERSION >= 3
     AV_PIX_FMT_GBRP,
     AV_PIX_FMT_GBRP10,
     AV_PIX_FMT_GBRP12,
-#endif
     AV_PIX_FMT_NONE
 };
 #endif
 
 av_cold void ff_vp9_init_static(AVCodec *codec)
 {
-    if (    vpx_codec_version_major() < 1
-        || (vpx_codec_version_major() == 1 && vpx_codec_version_minor() < 3))
-        codec->capabilities |= AV_CODEC_CAP_EXPERIMENTAL;
     codec->pix_fmts = vp9_pix_fmts_def;
 #if CONFIG_LIBVPX_VP9_ENCODER
-    if (    vpx_codec_version_major() > 1
-        || (vpx_codec_version_major() == 1 && vpx_codec_version_minor() >= 4)) {
-#ifdef VPX_CODEC_CAP_HIGHBITDEPTH
+    {
         vpx_codec_caps_t codec_caps = vpx_codec_get_caps(vpx_codec_vp9_cx());
         if (codec_caps & VPX_CODEC_CAP_HIGHBITDEPTH)
             codec->pix_fmts = vp9_pix_fmts_highbd;
         else
-#endif
             codec->pix_fmts = vp9_pix_fmts_highcol;
     }
 #endif
 }
-#if 0
-enum AVPixelFormat ff_vpx_imgfmt_to_pixfmt(vpx_img_fmt_t img)
-{
-    switch (img) {
-    case VPX_IMG_FMT_RGB24:     return AV_PIX_FMT_RGB24;
-    case VPX_IMG_FMT_RGB565:    return AV_PIX_FMT_RGB565BE;
-    case VPX_IMG_FMT_RGB555:    return AV_PIX_FMT_RGB555BE;
-    case VPX_IMG_FMT_UYVY:      return AV_PIX_FMT_UYVY422;
-    case VPX_IMG_FMT_YUY2:      return AV_PIX_FMT_YUYV422;
-    case VPX_IMG_FMT_YVYU:      return AV_PIX_FMT_YVYU422;
-    case VPX_IMG_FMT_BGR24:     return AV_PIX_FMT_BGR24;
-    case VPX_IMG_FMT_ARGB:      return AV_PIX_FMT_ARGB;
-    case VPX_IMG_FMT_ARGB_LE:   return AV_PIX_FMT_BGRA;
-    case VPX_IMG_FMT_RGB565_LE: return AV_PIX_FMT_RGB565LE;
-    case VPX_IMG_FMT_RGB555_LE: return AV_PIX_FMT_RGB555LE;
-    case VPX_IMG_FMT_I420:      return AV_PIX_FMT_YUV420P;
-    case VPX_IMG_FMT_I422:      return AV_PIX_FMT_YUV422P;
-    case VPX_IMG_FMT_I444:      return AV_PIX_FMT_YUV444P;
-    case VPX_IMG_FMT_444A:      return AV_PIX_FMT_YUVA444P;
-#if VPX_IMAGE_ABI_VERSION >= 3
-    case VPX_IMG_FMT_I440:      return AV_PIX_FMT_YUV440P;
-    case VPX_IMG_FMT_I42016:    return AV_PIX_FMT_YUV420P16BE;
-    case VPX_IMG_FMT_I42216:    return AV_PIX_FMT_YUV422P16BE;
-    case VPX_IMG_FMT_I44416:    return AV_PIX_FMT_YUV444P16BE;
-#endif
-    default:                    return AV_PIX_FMT_NONE;
-    }
-}
-
-vpx_img_fmt_t ff_vpx_pixfmt_to_imgfmt(enum AVPixelFormat pix)
-{
-    switch (pix) {
-    case AV_PIX_FMT_RGB24:        return VPX_IMG_FMT_RGB24;
-    case AV_PIX_FMT_RGB565BE:     return VPX_IMG_FMT_RGB565;
-    case AV_PIX_FMT_RGB555BE:     return VPX_IMG_FMT_RGB555;
-    case AV_PIX_FMT_UYVY422:      return VPX_IMG_FMT_UYVY;
-    case AV_PIX_FMT_YUYV422:      return VPX_IMG_FMT_YUY2;
-    case AV_PIX_FMT_YVYU422:      return VPX_IMG_FMT_YVYU;
-    case AV_PIX_FMT_BGR24:        return VPX_IMG_FMT_BGR24;
-    case AV_PIX_FMT_ARGB:         return VPX_IMG_FMT_ARGB;
-    case AV_PIX_FMT_BGRA:         return VPX_IMG_FMT_ARGB_LE;
-    case AV_PIX_FMT_RGB565LE:     return VPX_IMG_FMT_RGB565_LE;
-    case AV_PIX_FMT_RGB555LE:     return VPX_IMG_FMT_RGB555_LE;
-    case AV_PIX_FMT_YUV420P:      return VPX_IMG_FMT_I420;
-    case AV_PIX_FMT_YUV422P:      return VPX_IMG_FMT_I422;
-    case AV_PIX_FMT_YUV444P:      return VPX_IMG_FMT_I444;
-    case AV_PIX_FMT_YUVA444P:     return VPX_IMG_FMT_444A;
-#if VPX_IMAGE_ABI_VERSION >= 3
-    case AV_PIX_FMT_YUV440P:      return VPX_IMG_FMT_I440;
-    case AV_PIX_FMT_YUV420P16BE:  return VPX_IMG_FMT_I42016;
-    case AV_PIX_FMT_YUV422P16BE:  return VPX_IMG_FMT_I42216;
-    case AV_PIX_FMT_YUV444P16BE:  return VPX_IMG_FMT_I44416;
-#endif
-    default:                      return VPX_IMG_FMT_NONE;
-    }
-}
-#endif

diff --git a/libavcodec/libvpxdec.c b/libavcodec/libvpxdec.c
index ad0ea3b..04f27d3 100644
--- a/libavcodec/libvpxdec.c
+++ b/libavcodec/libvpxdec.c

@@ -70,7 +70,6 @@
 static int set_pix_fmt(AVCodecContext *avctx, struct vpx_image *img,
                        int has_alpha_channel)
 {
-#if VPX_IMAGE_ABI_VERSION >= 3
     static const enum AVColorSpace colorspaces[8] = {
         AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
         AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
@@ -82,7 +81,6 @@
     avctx->color_range = color_ranges[img->range];
 #endif
     avctx->colorspace = colorspaces[img->cs];
-#endif
     if (avctx->codec_id == AV_CODEC_ID_VP8 && img->fmt != VPX_IMG_FMT_I420)
         return AVERROR_INVALIDDATA;
     switch (img->fmt) {
@@ -97,22 +95,15 @@
         avctx->profile = FF_PROFILE_VP9_1;
         avctx->pix_fmt = AV_PIX_FMT_YUV422P;
         return 0;
-#if VPX_IMAGE_ABI_VERSION >= 3
     case VPX_IMG_FMT_I440:
         avctx->profile = FF_PROFILE_VP9_1;
         avctx->pix_fmt = AV_PIX_FMT_YUV440P;
         return 0;
-#endif
     case VPX_IMG_FMT_I444:
         avctx->profile = FF_PROFILE_VP9_1;
-#if VPX_IMAGE_ABI_VERSION >= 3
         avctx->pix_fmt = avctx->colorspace == AVCOL_SPC_RGB ?
                          AV_PIX_FMT_GBRP : AV_PIX_FMT_YUV444P;
-#else
-        avctx->pix_fmt = AV_PIX_FMT_YUV444P;
-#endif
         return 0;
-#ifdef VPX_IMG_FMT_HIGHBITDEPTH
     case VPX_IMG_FMT_I42016:
         avctx->profile = FF_PROFILE_VP9_2;
         if (img->bit_depth == 10) {
@@ -135,7 +126,6 @@
         } else {
             return AVERROR_INVALIDDATA;
         }
-#if VPX_IMAGE_ABI_VERSION >= 3
     case VPX_IMG_FMT_I44016:
         avctx->profile = FF_PROFILE_VP9_3;
         if (img->bit_depth == 10) {
@@ -147,30 +137,20 @@
         } else {
             return AVERROR_INVALIDDATA;
         }
-#endif
     case VPX_IMG_FMT_I44416:
         avctx->profile = FF_PROFILE_VP9_3;
         if (img->bit_depth == 10) {
-#if VPX_IMAGE_ABI_VERSION >= 3
             avctx->pix_fmt = avctx->colorspace == AVCOL_SPC_RGB ?
                              AV_PIX_FMT_GBRP10 : AV_PIX_FMT_YUV444P10;
-#else
-            avctx->pix_fmt = AV_PIX_FMT_YUV444P10;
-#endif
             return 0;
         } else if (img->bit_depth == 12) {
-#if VPX_IMAGE_ABI_VERSION >= 3
             avctx->pix_fmt = avctx->colorspace == AVCOL_SPC_RGB ?
                              AV_PIX_FMT_GBRP12 : AV_PIX_FMT_YUV444P12;
-#else
-            avctx->pix_fmt = AV_PIX_FMT_YUV444P12;
-#endif
             return 0;
         } else {
             return AVERROR_INVALIDDATA;
         }
 #endif
-#endif
     default:
         return AVERROR_INVALIDDATA;
     }
@@ -252,13 +232,8 @@
         }
 
         if ((ret = set_pix_fmt(avctx, img, ctx->has_alpha_channel)) < 0) {
-#ifdef VPX_IMG_FMT_HIGHBITDEPTH
             av_log(avctx, AV_LOG_ERROR, "Unsupported output colorspace (%d) / bit_depth (%d)\n",
                    img->fmt, img->bit_depth);
-#else
-            av_log(avctx, AV_LOG_ERROR, "Unsupported output colorspace (%d) / bit_depth (%d)\n",
-                   img->fmt, 8);
-#endif
             return ret;
         }
 
@@ -314,6 +289,7 @@
     .close          = vpx_free,
     .decode         = vpx_decode,
     .capabilities   = AV_CODEC_CAP_AUTO_THREADS | AV_CODEC_CAP_DR1,
+    .wrapper_name   = "libvpx",
 };
 #endif /* CONFIG_LIBVPX_VP8_DECODER */
 
@@ -335,5 +311,6 @@
     .capabilities   = AV_CODEC_CAP_AUTO_THREADS | AV_CODEC_CAP_DR1,
     .init_static_data = ff_vp9_init_static,
     .profiles       = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
+    .wrapper_name   = "libvpx",
 };
 #endif /* CONFIG_LIBVPX_VP9_DECODER */

diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
index 64a7459..09f7a88 100644
--- a/libavcodec/libvpxenc.c
+++ b/libavcodec/libvpxenc.c

@@ -109,6 +109,8 @@
     int vpx_cs;
     float level;
     int row_mt;
+    int tune_content;
+    int corpus_complexity;
 } VPxContext;
 
 /** String mappings for enum vp8e_enc_control_id */
@@ -130,9 +132,7 @@
     [VP9E_SET_TILE_ROWS]               = "VP9E_SET_TILE_ROWS",
     [VP9E_SET_FRAME_PARALLEL_DECODING] = "VP9E_SET_FRAME_PARALLEL_DECODING",
     [VP9E_SET_AQ_MODE]                 = "VP9E_SET_AQ_MODE",
-#if VPX_ENCODER_ABI_VERSION > 8
     [VP9E_SET_COLOR_SPACE]             = "VP9E_SET_COLOR_SPACE",
-#endif
 #if VPX_ENCODER_ABI_VERSION >= 11
     [VP9E_SET_COLOR_RANGE]             = "VP9E_SET_COLOR_RANGE",
 #endif
@@ -143,6 +143,9 @@
 #ifdef VPX_CTRL_VP9E_SET_ROW_MT
     [VP9E_SET_ROW_MT]                  = "VP9E_SET_ROW_MT",
 #endif
+#ifdef VPX_CTRL_VP9E_SET_TUNE_CONTENT
+    [VP9E_SET_TUNE_CONTENT]            = "VP9E_SET_TUNE_CONTENT",
+#endif
 #endif
 };
 
@@ -166,7 +169,7 @@
     av_log(avctx, level, "vpx_codec_enc_cfg\n");
     av_log(avctx, level, "generic settings\n"
            "  %*s%u\n  %*s%u\n  %*s%u\n  %*s%u\n  %*s%u\n"
-#if CONFIG_LIBVPX_VP9_ENCODER && defined(VPX_IMG_FMT_HIGHBITDEPTH)
+#if CONFIG_LIBVPX_VP9_ENCODER
            "  %*s%u\n  %*s%u\n"
 #endif
            "  %*s{%u/%u}\n  %*s%u\n  %*s%d\n  %*s%u\n",
@@ -175,7 +178,7 @@
            width, "g_profile:",         cfg->g_profile,
            width, "g_w:",               cfg->g_w,
            width, "g_h:",               cfg->g_h,
-#if CONFIG_LIBVPX_VP9_ENCODER && defined(VPX_IMG_FMT_HIGHBITDEPTH)
+#if CONFIG_LIBVPX_VP9_ENCODER
            width, "g_bit_depth:",       cfg->g_bit_depth,
            width, "g_input_bit_depth:", cfg->g_input_bit_depth,
 #endif
@@ -211,6 +214,10 @@
            width, "rc_2pass_vbr_bias_pct:",       cfg->rc_2pass_vbr_bias_pct,
            width, "rc_2pass_vbr_minsection_pct:", cfg->rc_2pass_vbr_minsection_pct,
            width, "rc_2pass_vbr_maxsection_pct:", cfg->rc_2pass_vbr_maxsection_pct);
+#if VPX_ENCODER_ABI_VERSION >= 14
+    av_log(avctx, level, "  %*s%u\n",
+           width, "rc_2pass_vbr_corpus_complexity:", cfg->rc_2pass_vbr_corpus_complexity);
+#endif
     av_log(avctx, level, "keyframing settings\n"
            "  %*s%d\n  %*s%u\n  %*s%u\n",
            width, "kf_mode:",     cfg->kf_mode,
@@ -320,9 +327,7 @@
                        vpx_img_fmt_t *img_fmt)
 {
     VPxContext av_unused *ctx = avctx->priv_data;
-#ifdef VPX_IMG_FMT_HIGHBITDEPTH
     enccfg->g_bit_depth = enccfg->g_input_bit_depth = 8;
-#endif
     switch (avctx->pix_fmt) {
     case AV_PIX_FMT_YUV420P:
     case AV_PIX_FMT_YUVA420P:
@@ -333,19 +338,16 @@
         enccfg->g_profile = 1;
         *img_fmt = VPX_IMG_FMT_I422;
         return 0;
-#if VPX_IMAGE_ABI_VERSION >= 3
     case AV_PIX_FMT_YUV440P:
         enccfg->g_profile = 1;
         *img_fmt = VPX_IMG_FMT_I440;
         return 0;
     case AV_PIX_FMT_GBRP:
         ctx->vpx_cs = VPX_CS_SRGB;
-#endif
     case AV_PIX_FMT_YUV444P:
         enccfg->g_profile = 1;
         *img_fmt = VPX_IMG_FMT_I444;
         return 0;
-#ifdef VPX_IMG_FMT_HIGHBITDEPTH
     case AV_PIX_FMT_YUV420P10:
     case AV_PIX_FMT_YUV420P12:
         if (codec_caps & VPX_CODEC_CAP_HIGHBITDEPTH) {
@@ -368,7 +370,6 @@
             return 0;
         }
         break;
-#if VPX_IMAGE_ABI_VERSION >= 3
     case AV_PIX_FMT_YUV440P10:
     case AV_PIX_FMT_YUV440P12:
         if (codec_caps & VPX_CODEC_CAP_HIGHBITDEPTH) {
@@ -383,7 +384,6 @@
     case AV_PIX_FMT_GBRP10:
     case AV_PIX_FMT_GBRP12:
         ctx->vpx_cs = VPX_CS_SRGB;
-#endif
     case AV_PIX_FMT_YUV444P10:
     case AV_PIX_FMT_YUV444P12:
         if (codec_caps & VPX_CODEC_CAP_HIGHBITDEPTH) {
@@ -396,7 +396,6 @@
             return 0;
         }
         break;
-#endif
     default:
         break;
     }
@@ -404,7 +403,6 @@
     return AVERROR_INVALIDDATA;
 }
 
-#if VPX_ENCODER_ABI_VERSION > 8
 static void set_colorspace(AVCodecContext *avctx)
 {
     enum vpx_color_space vpx_cs;
@@ -430,7 +428,6 @@
     }
     codecctl_int(avctx, VP9E_SET_COLOR_SPACE, vpx_cs);
 }
-#endif
 
 #if VPX_ENCODER_ABI_VERSION >= 11
 static void set_color_range(AVCodecContext *avctx)
@@ -496,7 +493,7 @@
     enccfg.g_h            = avctx->height;
     enccfg.g_timebase.num = avctx->time_base.num;
     enccfg.g_timebase.den = avctx->time_base.den;
-    enccfg.g_threads      = avctx->thread_count;
+    enccfg.g_threads      = avctx->thread_count ? avctx->thread_count : av_cpu_count();
     enccfg.g_lag_in_frames= ctx->lag_in_frames;
 
     if (avctx->flags & AV_CODEC_FLAG_PASS1)
@@ -573,6 +570,14 @@
     if (avctx->rc_max_rate)
         enccfg.rc_2pass_vbr_maxsection_pct =
             avctx->rc_max_rate * 100LL / avctx->bit_rate;
+#if CONFIG_LIBVPX_VP9_ENCODER
+    if (avctx->codec_id == AV_CODEC_ID_VP9) {
+#if VPX_ENCODER_ABI_VERSION >= 14
+        if (ctx->corpus_complexity >= 0)
+            enccfg.rc_2pass_vbr_corpus_complexity = ctx->corpus_complexity;
+#endif
+    }
+#endif
 
     if (avctx->rc_buffer_size)
         enccfg.rc_buf_sz         =
@@ -581,15 +586,6 @@
         enccfg.rc_buf_initial_sz =
             avctx->rc_initial_buffer_occupancy * 1000LL / avctx->bit_rate;
     enccfg.rc_buf_optimal_sz     = enccfg.rc_buf_sz * 5 / 6;
-#if FF_API_MPV_OPT
-    FF_DISABLE_DEPRECATION_WARNINGS
-    if (avctx->rc_buffer_aggressivity != 1.0) {
-        av_log(avctx, AV_LOG_WARNING, "The rc_buffer_aggressivity option is "
-               "deprecated, use the undershoot-pct private option instead.\n");
-        enccfg.rc_undershoot_pct = lrint(avctx->rc_buffer_aggressivity * 100);
-    }
-    FF_ENABLE_DEPRECATION_WARNINGS
-#endif
     if (ctx->rc_undershoot_pct >= 0)
         enccfg.rc_undershoot_pct = ctx->rc_undershoot_pct;
     if (ctx->rc_overshoot_pct >= 0)
@@ -688,15 +684,6 @@
         codecctl_int(avctx, VP8E_SET_NOISE_SENSITIVITY, ctx->noise_sensitivity);
         codecctl_int(avctx, VP8E_SET_TOKEN_PARTITIONS,  av_log2(avctx->slices));
     }
-#if FF_API_MPV_OPT
-    FF_DISABLE_DEPRECATION_WARNINGS
-    if (avctx->mb_threshold) {
-        av_log(avctx, AV_LOG_WARNING, "The mb_threshold option is deprecated, "
-               "use the static-thresh private option instead.\n");
-        ctx->static_thresh = avctx->mb_threshold;
-    }
-    FF_ENABLE_DEPRECATION_WARNINGS
-#endif
     codecctl_int(avctx, VP8E_SET_STATIC_THRESHOLD,  ctx->static_thresh);
     if (ctx->crf >= 0)
         codecctl_int(avctx, VP8E_SET_CQ_LEVEL,          ctx->crf);
@@ -715,9 +702,7 @@
             codecctl_int(avctx, VP9E_SET_FRAME_PARALLEL_DECODING, ctx->frame_parallel);
         if (ctx->aq_mode >= 0)
             codecctl_int(avctx, VP9E_SET_AQ_MODE, ctx->aq_mode);
-#if VPX_ENCODER_ABI_VERSION > 8
         set_colorspace(avctx);
-#endif
 #if VPX_ENCODER_ABI_VERSION >= 11
         set_color_range(avctx);
 #endif
@@ -728,6 +713,10 @@
         if (ctx->row_mt >= 0)
             codecctl_int(avctx, VP9E_SET_ROW_MT, ctx->row_mt);
 #endif
+#ifdef VPX_CTRL_VP9E_SET_TUNE_CONTENT
+        if (ctx->tune_content >= 0)
+            codecctl_int(avctx, VP9E_SET_TUNE_CONTENT, ctx->tune_content);
+#endif
     }
 #endif
 
@@ -736,7 +725,7 @@
     //provide dummy value to initialize wrapper, values will be updated each _encode()
     vpx_img_wrap(&ctx->rawimg, img_fmt, avctx->width, avctx->height, 1,
                  (unsigned char*)1);
-#if CONFIG_LIBVPX_VP9_ENCODER && defined(VPX_IMG_FMT_HIGHBITDEPTH)
+#if CONFIG_LIBVPX_VP9_ENCODER
     if (avctx->codec_id == AV_CODEC_ID_VP9 && (codec_caps & VPX_CODEC_CAP_HIGHBITDEPTH))
         ctx->rawimg.bit_depth = enccfg.g_bit_depth;
 #endif
@@ -1077,11 +1066,6 @@
 #define OFFSET(x) offsetof(VPxContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 
-#ifndef VPX_ERROR_RESILIENT_DEFAULT
-#define VPX_ERROR_RESILIENT_DEFAULT 1
-#define VPX_ERROR_RESILIENT_PARTITIONS 2
-#endif
-
 #define COMMON_OPTIONS \
     { "auto-alt-ref",    "Enable use of alternate reference " \
                          "frames (2-pass only)",                   OFFSET(auto_alt_ref),    AV_OPT_TYPE_INT, {.i64 = -1},      -1,      2,       VE}, \
@@ -1158,6 +1142,21 @@
 #ifdef VPX_CTRL_VP9E_SET_ROW_MT
     {"row-mt", "Row based multi-threading", OFFSET(row_mt), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
 #endif
+#ifdef VPX_CTRL_VP9E_SET_TUNE_CONTENT
+#if VPX_ENCODER_ABI_VERSION >= 14
+    { "tune-content",    "Tune content type", OFFSET(tune_content), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 2, VE, "tune_content" },
+#else
+    { "tune-content",    "Tune content type", OFFSET(tune_content), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1, VE, "tune_content" },
+#endif
+    { "default",         "Regular video content",                  0, AV_OPT_TYPE_CONST, {.i64 = 0}, 0, 0, VE, "tune_content" },
+    { "screen",          "Screen capture content",                 0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 0, VE, "tune_content" },
+#if VPX_ENCODER_ABI_VERSION >= 14
+    { "film",            "Film content; improves grain retention", 0, AV_OPT_TYPE_CONST, {.i64 = 2}, 0, 0, VE, "tune_content" },
+#endif
+#endif
+#if VPX_ENCODER_ABI_VERSION >= 14
+    { "corpus-complexity", "corpus vbr complexity midpoint", OFFSET(corpus_complexity), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 10000, VE },
+#endif
     LEGACY_OPTIONS
     { NULL }
 };
@@ -1200,6 +1199,7 @@
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_NONE },
     .priv_class     = &class_vp8,
     .defaults       = defaults,
+    .wrapper_name   = "libvpx",
 };
 #endif /* CONFIG_LIBVPX_VP8_ENCODER */
 
@@ -1230,5 +1230,6 @@
     .priv_class     = &class_vp9,
     .defaults       = defaults,
     .init_static_data = ff_vp9_init_static,
+    .wrapper_name   = "libvpx",
 };
 #endif /* CONFIG_LIBVPX_VP9_ENCODER */

diff --git a/libavcodec/libwavpackenc.c b/libavcodec/libwavpackenc.c
index 6d57089..e84b074 100644
--- a/libavcodec/libwavpackenc.c
+++ b/libavcodec/libwavpackenc.c

@@ -191,4 +191,5 @@
     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SMALL_LAST_FRAME,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32,
                                                      AV_SAMPLE_FMT_NONE },
+    .wrapper_name   = "libwavpack",
 };

diff --git a/libavcodec/libwebpenc.c b/libavcodec/libwebpenc.c
index 0bcf628..48f45b6 100644
--- a/libavcodec/libwebpenc.c
+++ b/libavcodec/libwebpenc.c

@@ -109,4 +109,5 @@
     },
     .priv_class     = &class,
     .defaults       = libwebp_defaults,
+    .wrapper_name   = "libwebp",
 };

diff --git a/libavcodec/libwebpenc_animencoder.c b/libavcodec/libwebpenc_animencoder.c
index 91bf64c..7f35a0b 100644
--- a/libavcodec/libwebpenc_animencoder.c
+++ b/libavcodec/libwebpenc_animencoder.c

@@ -148,4 +148,5 @@
     },
     .priv_class     = &class,
     .defaults       = libwebp_defaults,
+    .wrapper_name   = "libwebp",
 };

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index 6b05846..7ab7453 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c

@@ -161,6 +161,8 @@
     case AV_PIX_FMT_BGR0:
     case AV_PIX_FMT_BGR24:
     case AV_PIX_FMT_RGB24:
+    case AV_PIX_FMT_GRAY8:
+    case AV_PIX_FMT_GRAY10:
         return 1;
 
     default:
@@ -280,7 +282,11 @@
 
     x264_picture_init( &x4->pic );
     x4->pic.img.i_csp   = x4->params.i_csp;
-    if (desc->comp[0].depth > 8)
+#if X264_BUILD >= 153
+    if (x4->params.i_bitdepth > 8)
+#else
+    if (x264_bit_depth > 8)
+#endif
         x4->pic.img.i_csp |= X264_CSP_HIGH_DEPTH;
     x4->pic.img.i_plane = avfmt2_num_planes(ctx->pix_fmt);
 
@@ -440,6 +446,10 @@
 #ifdef X264_CSP_NV21
     case AV_PIX_FMT_NV21:      return X264_CSP_NV21;
 #endif
+#ifdef X264_CSP_I400
+    case AV_PIX_FMT_GRAY8:
+    case AV_PIX_FMT_GRAY10:    return X264_CSP_I400;
+#endif
     };
     return 0;
 }
@@ -491,6 +501,9 @@
     x4->params.p_log_private        = avctx;
     x4->params.i_log_level          = X264_LOG_DEBUG;
     x4->params.i_csp                = convert_pix_fmt(avctx->pix_fmt);
+#if X264_BUILD >= 153
+    x4->params.i_bitdepth           = av_pix_fmt_desc_get(avctx->pix_fmt)->comp[0].depth;
+#endif
 
     PARSE_X264_OPT("weightp", wpredp);
 
@@ -702,24 +715,8 @@
     if (x4->nal_hrd >= 0)
         x4->params.i_nal_hrd = x4->nal_hrd;
 
-    if (x4->motion_est >= 0) {
+    if (x4->motion_est >= 0)
         x4->params.analyse.i_me_method = x4->motion_est;
-#if FF_API_MOTION_EST
-FF_DISABLE_DEPRECATION_WARNINGS
-    } else {
-        if (avctx->me_method == ME_EPZS)
-            x4->params.analyse.i_me_method = X264_ME_DIA;
-        else if (avctx->me_method == ME_HEX)
-            x4->params.analyse.i_me_method = X264_ME_HEX;
-        else if (avctx->me_method == ME_UMH)
-            x4->params.analyse.i_me_method = X264_ME_UMH;
-        else if (avctx->me_method == ME_FULL)
-            x4->params.analyse.i_me_method = X264_ME_ESA;
-        else if (avctx->me_method == ME_TESA)
-            x4->params.analyse.i_me_method = X264_ME_TESA;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-    }
 
     if (x4->coder >= 0)
         x4->params.b_cabac = x4->coder;
@@ -900,6 +897,28 @@
     AV_PIX_FMT_NV20,
     AV_PIX_FMT_NONE
 };
+static const enum AVPixelFormat pix_fmts_all[] = {
+    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_YUVJ420P,
+    AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUVJ422P,
+    AV_PIX_FMT_YUV444P,
+    AV_PIX_FMT_YUVJ444P,
+    AV_PIX_FMT_NV12,
+    AV_PIX_FMT_NV16,
+#ifdef X264_CSP_NV21
+    AV_PIX_FMT_NV21,
+#endif
+    AV_PIX_FMT_YUV420P10,
+    AV_PIX_FMT_YUV422P10,
+    AV_PIX_FMT_YUV444P10,
+    AV_PIX_FMT_NV20,
+#ifdef X264_CSP_I400
+    AV_PIX_FMT_GRAY8,
+    AV_PIX_FMT_GRAY10,
+#endif
+    AV_PIX_FMT_NONE
+};
 #if CONFIG_LIBX264RGB_ENCODER
 static const enum AVPixelFormat pix_fmts_8bit_rgb[] = {
     AV_PIX_FMT_BGR0,
@@ -919,7 +938,7 @@
     else if (x264_bit_depth == 10)
         codec->pix_fmts = pix_fmts_10bit;
 #else
-    codec->pix_fmts = pix_fmts;
+    codec->pix_fmts = pix_fmts_all;
 #endif
 }
 
@@ -984,6 +1003,7 @@
     { "vbr",           NULL, 0, AV_OPT_TYPE_CONST, {.i64 = X264_NAL_HRD_VBR},  INT_MIN, INT_MAX, VE, "nal-hrd" },
     { "cbr",           NULL, 0, AV_OPT_TYPE_CONST, {.i64 = X264_NAL_HRD_CBR},  INT_MIN, INT_MAX, VE, "nal-hrd" },
     { "avcintra-class","AVC-Intra class 50/100/200",                      OFFSET(avcintra_class),AV_OPT_TYPE_INT,     { .i64 = -1 }, -1, 200   , VE},
+    { "me_method",    "Set motion estimation method",                     OFFSET(motion_est),    AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, X264_ME_TESA, VE, "motion-est"},
     { "motion-est",   "Set motion estimation method",                     OFFSET(motion_est),    AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, X264_ME_TESA, VE, "motion-est"},
     { "dia",           NULL, 0, AV_OPT_TYPE_CONST, { .i64 = X264_ME_DIA },  INT_MIN, INT_MAX, VE, "motion-est" },
     { "hex",           NULL, 0, AV_OPT_TYPE_CONST, { .i64 = X264_ME_HEX },  INT_MIN, INT_MAX, VE, "motion-est" },
@@ -1028,9 +1048,6 @@
     { "nr",               "-1" },
 #endif
     { "me_range",         "-1" },
-#if FF_API_MOTION_EST
-    { "me_method",        "-1" },
-#endif
     { "subq",             "-1" },
 #if FF_API_PRIVATE_OPT
     { "b_strategy",       "-1" },
@@ -1070,6 +1087,7 @@
     .init_static_data = X264_init_static,
     .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
                         FF_CODEC_CAP_INIT_CLEANUP,
+    .wrapper_name     = "libx264",
 };
 #endif
 
@@ -1094,6 +1112,7 @@
     .priv_class     = &rgbclass,
     .defaults       = x264_defaults,
     .pix_fmts       = pix_fmts_8bit_rgb,
+    .wrapper_name   = "libx264",
 };
 #endif
 
@@ -1120,5 +1139,6 @@
     .pix_fmts         = pix_fmts_8bit,
     .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
                         FF_CODEC_CAP_INIT_CLEANUP,
+    .wrapper_name     = "libx264",
 };
 #endif

diff --git a/libavcodec/libx265.c b/libavcodec/libx265.c
index 784b51c..27c90b3 100644
--- a/libavcodec/libx265.c
+++ b/libavcodec/libx265.c

@@ -45,6 +45,7 @@
     int   forced_idr;
     char *preset;
     char *tune;
+    char *profile;
     char *x265_opts;
 } libx265Context;
 
@@ -113,12 +114,24 @@
     ctx->params->sourceWidth     = avctx->width;
     ctx->params->sourceHeight    = avctx->height;
     ctx->params->bEnablePsnr     = !!(avctx->flags & AV_CODEC_FLAG_PSNR);
+    ctx->params->bOpenGOP        = !(avctx->flags & AV_CODEC_FLAG_CLOSED_GOP);
 
-    if ((avctx->color_primaries <= AVCOL_PRI_BT2020 &&
+    /* Tune the CTU size based on input resolution. */
+    if (ctx->params->sourceWidth < 64 || ctx->params->sourceHeight < 64)
+        ctx->params->maxCUSize = 32;
+    if (ctx->params->sourceWidth < 32 || ctx->params->sourceHeight < 32)
+        ctx->params->maxCUSize = 16;
+    if (ctx->params->sourceWidth < 16 || ctx->params->sourceHeight < 16) {
+        av_log(avctx, AV_LOG_ERROR, "Image size is too small (%dx%d).\n",
+               ctx->params->sourceWidth, ctx->params->sourceHeight);
+        return AVERROR(EINVAL);
+    }
+
+    if ((avctx->color_primaries <= AVCOL_PRI_SMPTE432 &&
          avctx->color_primaries != AVCOL_PRI_UNSPECIFIED) ||
-        (avctx->color_trc <= AVCOL_TRC_BT2020_12 &&
+        (avctx->color_trc <= AVCOL_TRC_ARIB_STD_B67 &&
          avctx->color_trc != AVCOL_TRC_UNSPECIFIED) ||
-        (avctx->colorspace <= AVCOL_SPC_BT2020_CL &&
+        (avctx->colorspace <= AVCOL_SPC_ICTCP &&
          avctx->colorspace != AVCOL_SPC_UNSPECIFIED)) {
 
         ctx->params->vui.bEnableVideoSignalTypePresentFlag  = 1;
@@ -192,6 +205,9 @@
         ctx->params->rc.rateControlMode = X265_RC_ABR;
     }
 
+    ctx->params->rc.vbvBufferSize = avctx->rc_buffer_size / 1000;
+    ctx->params->rc.vbvMaxBitrate = avctx->rc_max_rate    / 1000;
+
     if (!(avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER))
         ctx->params->bRepeatHeaders = 1;
 
@@ -220,6 +236,23 @@
         }
     }
 
+    if (ctx->params->rc.vbvBufferSize && avctx->rc_initial_buffer_occupancy > 1000 &&
+        ctx->params->rc.vbvBufferInit == 0.9) {
+        ctx->params->rc.vbvBufferInit = (float)avctx->rc_initial_buffer_occupancy / 1000;
+    }
+
+    if (ctx->profile) {
+        if (ctx->api->param_apply_profile(ctx->params, ctx->profile) < 0) {
+            int i;
+            av_log(avctx, AV_LOG_ERROR, "Invalid or incompatible profile set: %s.\n", ctx->profile);
+            av_log(avctx, AV_LOG_INFO, "Possible profiles:");
+            for (i = 0; x265_profile_names[i]; i++)
+                av_log(avctx, AV_LOG_INFO, " %s", x265_profile_names[i]);
+            av_log(avctx, AV_LOG_INFO, "\n");
+            return AVERROR(EINVAL);
+        }
+    }
+
     ctx->encoder = ctx->api->encoder_open(ctx->params);
     if (!ctx->encoder) {
         av_log(avctx, AV_LOG_ERROR, "Cannot open libx265 encoder.\n");
@@ -294,7 +327,7 @@
     for (i = 0; i < nnal; i++)
         payload += nal[i].sizeBytes;
 
-    ret = ff_alloc_packet(pkt, payload);
+    ret = ff_alloc_packet2(avctx, pkt, payload, payload);
     if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
         return ret;
@@ -329,6 +362,13 @@
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
+#if X265_BUILD >= 130
+    if (x265pic_out.sliceType == X265_TYPE_B)
+#else
+    if (x265pic_out.frameData.sliceType == 'b')
+#endif
+        pkt->flags |= AV_PKT_FLAG_DISPOSABLE;
+
     *got_packet = 1;
     return 0;
 }
@@ -392,6 +432,7 @@
     { "forced-idr",  "if forcing keyframes, force them as IDR frames",                              OFFSET(forced_idr),AV_OPT_TYPE_BOOL,   { .i64 =  0 },  0,       1, VE },
     { "preset",      "set the x265 preset",                                                         OFFSET(preset),    AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
     { "tune",        "set the x265 tune parameter",                                                 OFFSET(tune),      AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
+    { "profile",     "set the x265 profile",                                                        OFFSET(profile),   AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
     { "x265-params", "set the x265 configuration using a :-separated list of key=value parameters", OFFSET(x265_opts), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
     { NULL }
 };
@@ -421,4 +462,5 @@
     .priv_class       = &class,
     .defaults         = x265_defaults,
     .capabilities     = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
+    .wrapper_name     = "libx265",
 };

diff --git a/libavcodec/libxavs.c b/libavcodec/libxavs.c
index f257e55..801a05d 100644
--- a/libavcodec/libxavs.c
+++ b/libavcodec/libxavs.c

@@ -283,32 +283,6 @@
     if (x4->cplxblur >= 0)
         x4->params.rc.f_complexity_blur = x4->cplxblur;
 
-#if FF_API_MOTION_EST
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (x4->motion_est < 0) {
-        switch (avctx->me_method) {
-        case  ME_EPZS:
-            x4->params.analyse.i_me_method = XAVS_ME_DIA;
-            break;
-        case  ME_HEX:
-            x4->params.analyse.i_me_method = XAVS_ME_HEX;
-            break;
-        case  ME_UMH:
-            x4->params.analyse.i_me_method = XAVS_ME_UMH;
-            break;
-        case  ME_FULL:
-            x4->params.analyse.i_me_method = XAVS_ME_ESA;
-            break;
-        case  ME_TESA:
-            x4->params.analyse.i_me_method = XAVS_ME_TESA;
-            break;
-        default:
-            x4->params.analyse.i_me_method = XAVS_ME_HEX;
-        }
-    }
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
     x4->params.i_bframe          = avctx->max_b_frames;
     /* cabac is not included in AVS JiZhun Profile */
     x4->params.b_cabac           = 0;
@@ -465,7 +439,7 @@
     { "mbtree",        "Use macroblock tree ratecontrol.",                OFFSET(mbtree),        AV_OPT_TYPE_BOOL,    {.i64 = -1 }, -1, 1, VE},
     { "mixed-refs",    "One reference per partition, as opposed to one reference per macroblock", OFFSET(mixed_refs), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE },
     { "fast-pskip",    NULL,                                              OFFSET(fast_pskip),    AV_OPT_TYPE_BOOL,    {.i64 = -1 }, -1, 1, VE},
-    { "motion-est",   "Set motion estimation method",                     OFFSET(motion_est),    AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, XAVS_ME_TESA, VE, "motion-est"},
+    { "motion-est",   "Set motion estimation method",                     OFFSET(motion_est),    AV_OPT_TYPE_INT,    { .i64 = XAVS_ME_DIA }, -1, XAVS_ME_TESA, VE, "motion-est"},
     { "dia",           NULL,      0,    AV_OPT_TYPE_CONST, { .i64 = XAVS_ME_DIA },               INT_MIN, INT_MAX, VE, "motion-est" },
     { "hex",           NULL,      0,    AV_OPT_TYPE_CONST, { .i64 = XAVS_ME_HEX },               INT_MIN, INT_MAX, VE, "motion-est" },
     { "umh",           NULL,      0,    AV_OPT_TYPE_CONST, { .i64 = XAVS_ME_UMH },               INT_MIN, INT_MAX, VE, "motion-est" },
@@ -504,4 +478,5 @@
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
     .priv_class     = &xavs_class,
     .defaults       = xavs_defaults,
+    .wrapper_name   = "libxavs",
 };

diff --git a/libavcodec/libxavs2.c b/libavcodec/libxavs2.c
new file mode 100644
index 0000000..2b47d0c
--- /dev/null
+++ b/libavcodec/libxavs2.c

@@ -0,0 +1,293 @@
+/*
+ * AVS2 encoding using the xavs2 library
+ *
+ * Copyright (C) 2018 Yiqun Xu,   <yiqun.xu@vipl.ict.ac.cn>
+ *                    Falei Luo,  <falei.luo@gmail.com>
+ *                    Huiwen Ren, <hwrenx@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "xavs2.h"
+#include "mpeg12.h"
+#include "libavutil/avstring.h"
+
+#define xavs2_opt_set2(name, format, ...) do{ \
+    char opt_str[16] = {0}; \
+    int err; \
+    av_strlcatf(opt_str, sizeof(opt_str), format, __VA_ARGS__); \
+    err = cae->api->opt_set2(cae->param, name, opt_str); \
+    if (err) {\
+        av_log(avctx, AV_LOG_WARNING, "Invalid value for %s: %s\n", name, opt_str);\
+    }\
+} while(0);
+
+typedef struct XAVS2EContext {
+    AVClass *class;
+
+    int lcu_row_threads;
+    int initial_qp;
+    int qp;
+    int max_qp;
+    int min_qp;
+    int preset_level;
+    int log_level;
+    int hierarchical_reference;
+
+    void *encoder;
+    char *xavs2_opts;
+
+    xavs2_outpacket_t packet;
+    xavs2_param_t *param;
+
+    const xavs2_api_t *api;
+
+} XAVS2EContext;
+
+static av_cold int xavs2_init(AVCodecContext *avctx)
+{
+    XAVS2EContext *cae= avctx->priv_data;
+    int bit_depth, code;
+
+    bit_depth = avctx->pix_fmt == AV_PIX_FMT_YUV420P ? 8 : 10;
+
+    /* get API handler */
+    cae->api = xavs2_api_get(bit_depth);
+    if (!cae->api) {
+        av_log(avctx, AV_LOG_ERROR, "api get failed\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    cae->param = cae->api->opt_alloc();
+    if (!cae->param) {
+        av_log(avctx, AV_LOG_ERROR, "param alloc failed\n");
+        return AVERROR(ENOMEM);
+    }
+
+    xavs2_opt_set2("width",     "%d", avctx->width);
+    xavs2_opt_set2("height",    "%d", avctx->height);
+    xavs2_opt_set2("bframes",   "%d", avctx->max_b_frames);
+    xavs2_opt_set2("bitdepth",  "%d", bit_depth);
+    xavs2_opt_set2("log",       "%d", cae->log_level);
+    xavs2_opt_set2("preset",    "%d", cae->preset_level);
+
+    /* not the same parameter as the IntraPeriod in xavs2 log */
+    xavs2_opt_set2("intraperiod",       "%d", avctx->gop_size);
+
+    xavs2_opt_set2("thread_frames",     "%d", avctx->thread_count);
+    xavs2_opt_set2("thread_rows",       "%d", cae->lcu_row_threads);
+
+    xavs2_opt_set2("OpenGOP",  "%d", 1);
+
+    if (cae->xavs2_opts) {
+        AVDictionary *dict    = NULL;
+        AVDictionaryEntry *en = NULL;
+
+        if (!av_dict_parse_string(&dict, cae->xavs2_opts, "=", ":", 0)) {
+            while ((en = av_dict_get(dict, "", en, AV_DICT_IGNORE_SUFFIX))) {
+                xavs2_opt_set2(en->key, "%s", en->value);
+            }
+            av_dict_free(&dict);
+        }
+    }
+
+    /* Rate control */
+    if (avctx->bit_rate > 0) {
+        xavs2_opt_set2("RateControl",   "%d", 1);
+        xavs2_opt_set2("TargetBitRate", "%"PRId64"", avctx->bit_rate);
+        xavs2_opt_set2("initial_qp",    "%d", cae->initial_qp);
+        xavs2_opt_set2("max_qp",        "%d", cae->max_qp);
+        xavs2_opt_set2("min_qp",        "%d", cae->min_qp);
+    } else {
+        xavs2_opt_set2("initial_qp",    "%d", cae->qp);
+    }
+
+
+    ff_mpeg12_find_best_frame_rate(avctx->framerate, &code, NULL, NULL, 0);
+
+    xavs2_opt_set2("FrameRate",   "%d", code);
+
+    cae->encoder = cae->api->encoder_create(cae->param);
+
+    if (!cae->encoder) {
+        av_log(avctx,AV_LOG_ERROR, "Can not create encoder. Null pointer returned\n");
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+static void xavs2_copy_frame_with_shift(xavs2_picture_t *pic, const AVFrame *frame, const int shift_in)
+{
+    int j, k;
+    for (k = 0; k < 3; k++) {
+        int i_stride = pic->img.i_stride[k];
+        for (j = 0; j < pic->img.i_lines[k]; j++) {
+            uint16_t *p_plane = (uint16_t *)&pic->img.img_planes[k][j * i_stride];
+            int i;
+            uint8_t *p_buffer = frame->data[k] + frame->linesize[k] * j;
+            memset(p_plane, 0, i_stride);
+            for (i = 0; i < pic->img.i_width[k]; i++) {
+                p_plane[i] = p_buffer[i] << shift_in;
+            }
+        }
+    }
+}
+
+static void xavs2_copy_frame(xavs2_picture_t *pic, const AVFrame *frame)
+{
+    int j, k;
+    for (k = 0; k < 3; k++) {
+        for (j = 0; j < pic->img.i_lines[k]; j++) {
+            memcpy( pic->img.img_planes[k] + pic->img.i_stride[k] * j,
+                    frame->data[k]+frame->linesize[k] * j,
+                    pic->img.i_width[k] * pic->img.in_sample_size);
+        }
+    }
+}
+
+static int xavs2_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                      const AVFrame *frame, int *got_packet)
+{
+    XAVS2EContext *cae = avctx->priv_data;
+    xavs2_picture_t pic;
+    int ret;
+
+    /* create the XAVS2 video encoder */
+    /* read frame data and send to the XAVS2 video encoder */
+    if (cae->api->encoder_get_buffer(cae->encoder, &pic) < 0) {
+        av_log(avctx,AV_LOG_ERROR, "failed to get frame buffer\n");
+        return AVERROR_EXTERNAL;
+    }
+    if (frame) {
+        switch (frame->format) {
+            case AV_PIX_FMT_YUV420P:
+                if (pic.img.in_sample_size == pic.img.enc_sample_size) {
+                    xavs2_copy_frame(&pic, frame);
+                } else {
+                    const int shift_in = atoi(cae->api->opt_get(cae->param, "SampleShift"));
+                    xavs2_copy_frame_with_shift(&pic, frame, shift_in);
+                }
+            break;
+            case AV_PIX_FMT_YUV420P10:
+                if (pic.img.in_sample_size == pic.img.enc_sample_size) {
+                    xavs2_copy_frame(&pic, frame);
+                    break;
+                }
+            default:
+                av_log(avctx, AV_LOG_ERROR, "Unsupported pixel format\n");
+                return AVERROR(EINVAL);
+            break;
+        }
+
+        pic.i_state = 0;
+        pic.i_pts   = frame->pts;
+        pic.i_type  = XAVS2_TYPE_AUTO;
+
+        ret = cae->api->encoder_encode(cae->encoder, &pic, &cae->packet);
+
+        if (ret) {
+            av_log(avctx, AV_LOG_ERROR, "encode failed\n");
+            return AVERROR_EXTERNAL;
+        }
+
+    } else {
+        cae->api->encoder_encode(cae->encoder, NULL, &cae->packet);
+    }
+
+    if ((cae->packet.len) && (cae->packet.state != XAVS2_STATE_FLUSH_END)){
+
+        if (av_new_packet(pkt, cae->packet.len) < 0){
+            av_log(avctx, AV_LOG_ERROR, "packet alloc failed\n");
+            cae->api->encoder_packet_unref(cae->encoder, &cae->packet);
+            return AVERROR(ENOMEM);
+        }
+
+        pkt->pts = cae->packet.pts;
+        pkt->dts = cae->packet.dts;
+
+        memcpy(pkt->data, cae->packet.stream, cae->packet.len);
+        pkt->size = cae->packet.len;
+
+        cae->api->encoder_packet_unref(cae->encoder, &cae->packet);
+
+        *got_packet = 1;
+    } else {
+        *got_packet = 0;
+    }
+
+    return 0;
+}
+
+static av_cold int xavs2_close(AVCodecContext *avctx)
+{
+    XAVS2EContext *cae = avctx->priv_data;
+    /* destroy the encoder */
+    if (cae->api) {
+        cae->api->encoder_destroy(cae->encoder);
+
+        if (cae->param) {
+            cae->api->opt_destroy(cae->param);
+        }
+    }
+    return 0;
+}
+
+#define OFFSET(x) offsetof(XAVS2EContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+
+static const AVOption options[] = {
+    { "lcu_row_threads" ,   "number of parallel threads for rows" ,     OFFSET(lcu_row_threads) , AV_OPT_TYPE_INT, {.i64 =  0 },  0, INT_MAX,  VE },
+    { "initial_qp"      ,   "Quantization initial parameter"      ,     OFFSET(initial_qp)      , AV_OPT_TYPE_INT, {.i64 = 34 },  1,      63,  VE },
+    { "qp"              ,   "Quantization parameter"  ,                 OFFSET(qp)              , AV_OPT_TYPE_INT, {.i64 = 34 },  1,      63,  VE },
+    { "max_qp"          ,   "max qp for rate control" ,                 OFFSET(max_qp)          , AV_OPT_TYPE_INT, {.i64 = 55 },  0,      63,  VE },
+    { "min_qp"          ,   "min qp for rate control" ,                 OFFSET(min_qp)          , AV_OPT_TYPE_INT, {.i64 = 20 },  0,      63,  VE },
+    { "speed_level"     ,   "Speed level, higher is better but slower", OFFSET(preset_level)    , AV_OPT_TYPE_INT, {.i64 =  0 },  0,       9,  VE },
+    { "log_level"       ,   "log level: -1: none, 0: error, 1: warning, 2: info, 3: debug", OFFSET(log_level)    , AV_OPT_TYPE_INT, {.i64 =  0 },  -1,       3,  VE },
+    { "xavs2-params"    ,   "set the xavs2 configuration using a :-separated list of key=value parameters", OFFSET(xavs2_opts), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
+    { NULL },
+};
+
+static const AVClass libxavs2 = {
+    .class_name = "XAVS2EContext",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVCodecDefault xavs2_defaults[] = {
+    { "b",                "0" },
+    { "g",                "48" },
+    { "bf",               "7" },
+    { NULL },
+};
+
+AVCodec ff_libxavs2_encoder = {
+    .name           = "libxavs2",
+    .long_name      = NULL_IF_CONFIG_SMALL("libxavs2 AVS2-P2/IEEE1857.4"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_AVS2,
+    .priv_data_size = sizeof(XAVS2EContext),
+    .init           = xavs2_init,
+    .encode2        = xavs2_encode_frame,
+    .close          = xavs2_close,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
+    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_NONE },
+    .priv_class     = &libxavs2,
+    .defaults       = xavs2_defaults,
+    .wrapper_name   = "libxavs2",
+} ;

diff --git a/libavcodec/libxvid.c b/libavcodec/libxvid.c
index d916f11..cdaae20 100644
--- a/libavcodec/libxvid.c
+++ b/libavcodec/libxvid.c

@@ -418,30 +418,6 @@
     case 1:
         x->me_flags |= XVID_ME_ADVANCEDDIAMOND16 |
                        XVID_ME_HALFPELREFINE16;
-#if FF_API_MOTION_EST
-FF_DISABLE_DEPRECATION_WARNINGS
-        break;
-    default:
-        switch (avctx->me_method) {
-        case ME_FULL:   /* Quality 6 */
-             x->me_flags |= XVID_ME_EXTSEARCH16 |
-                            XVID_ME_EXTSEARCH8;
-        case ME_EPZS:   /* Quality 4 */
-             x->me_flags |= XVID_ME_ADVANCEDDIAMOND8 |
-                            XVID_ME_HALFPELREFINE8   |
-                            XVID_ME_CHROMA_PVOP      |
-                            XVID_ME_CHROMA_BVOP;
-        case ME_LOG:    /* Quality 2 */
-        case ME_PHODS:
-        case ME_X1:
-             x->me_flags |= XVID_ME_ADVANCEDDIAMOND16 |
-                            XVID_ME_HALFPELREFINE16;
-        case ME_ZERO:   /* Quality 0 */
-        default:
-            break;
-        }
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
     }
 
     /* Decide how we should decide blocks */
@@ -462,11 +438,6 @@
     }
 
     /* Bring in VOL flags from ffmpeg command-line */
-#if FF_API_GMC
-    if (avctx->flags & CODEC_FLAG_GMC)
-        x->gmc = 1;
-#endif
-
     x->vol_flags = 0;
     if (x->gmc) {
         x->vol_flags |= XVID_VOL_GMC;
@@ -938,7 +909,7 @@
     { "frame",       NULL,                                                0, AV_OPT_TYPE_CONST, { .i64 = 2 }, INT_MIN, INT_MAX, VE, "ssim" },
     { "ssim_acc",    "SSIM accuracy",                   OFFSET(ssim_acc),    AV_OPT_TYPE_INT,   { .i64 = 2 },       0,       4, VE         },
     { "gmc",         "use GMC",                         OFFSET(gmc),         AV_OPT_TYPE_INT,   { .i64 = 0 },       0,       1, VE         },
-    { "me_quality",  "Motion estimation quality",       OFFSET(me_quality),  AV_OPT_TYPE_INT,   { .i64 = 0 },       0,       6, VE         },
+    { "me_quality",  "Motion estimation quality",       OFFSET(me_quality),  AV_OPT_TYPE_INT,   { .i64 = 4 },       0,       6, VE         },
     { "mpeg_quant",  "Use MPEG quantizers instead of H.263", OFFSET(mpeg_quant), AV_OPT_TYPE_INT, { .i64 = 0 },     0,       1, VE         },
     { NULL },
 };
@@ -963,4 +934,5 @@
     .priv_class     = &xvid_class,
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
                       FF_CODEC_CAP_INIT_CLEANUP,
+    .wrapper_name   = "libxvid",
 };

diff --git a/libavcodec/libxvid_rc.c b/libavcodec/libxvid_rc.c
deleted file mode 100644
index 076c32c..0000000
--- a/libavcodec/libxvid_rc.c
+++ /dev/null

@@ -1,164 +0,0 @@
-/*
- * Xvid rate control wrapper for lavc video encoders
- *
- * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "config.h"
-
-#if HAVE_IO_H
-#include <io.h>
-#endif
-
-#if HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#include <xvid.h>
-
-#include "libavutil/attributes.h"
-#include "libavutil/internal.h"
-
-#include "avcodec.h"
-#include "libxvid.h"
-#include "mpegvideo.h"
-
-av_cold int ff_xvid_rate_control_init(MpegEncContext *s)
-{
-    char *tmp_name;
-    int fd, i;
-    xvid_plg_create_t xvid_plg_create = { 0 };
-    xvid_plugin_2pass2_t xvid_2pass2  = { 0 };
-
-    fd = avpriv_tempfile("xvidrc.", &tmp_name, 0, s->avctx);
-    if (fd < 0) {
-        av_log(s, AV_LOG_ERROR, "Can't create temporary pass2 file.\n");
-        return fd;
-    }
-
-    for (i = 0; i < s->rc_context.num_entries; i++) {
-        static const char frame_types[] = " ipbs";
-        char tmp[256];
-        RateControlEntry *rce;
-
-        rce = &s->rc_context.entry[i];
-
-        snprintf(tmp, sizeof(tmp), "%c %d %d %d %d %d %d\n",
-                 frame_types[rce->pict_type],
-                 (int) lrintf(rce->qscale / FF_QP2LAMBDA),
-                 rce->i_count, s->mb_num - rce->i_count - rce->skip_count,
-                 rce->skip_count,
-                 (rce->i_tex_bits + rce->p_tex_bits + rce->misc_bits + 7) / 8,
-                 (rce->header_bits + rce->mv_bits + 7) / 8);
-
-        if (write(fd, tmp, strlen(tmp)) < 0) {
-            int ret = AVERROR(errno);
-            av_log(s, AV_LOG_ERROR, "Error %s writing 2pass logfile\n", av_err2str(ret));
-            av_free(tmp_name);
-            close(fd);
-            return ret;
-        }
-    }
-
-    close(fd);
-
-    xvid_2pass2.version     = XVID_MAKE_VERSION(1, 1, 0);
-    xvid_2pass2.filename    = tmp_name;
-    xvid_2pass2.bitrate     = s->avctx->bit_rate;
-    xvid_2pass2.vbv_size    = s->avctx->rc_buffer_size;
-    xvid_2pass2.vbv_maxrate = s->avctx->rc_max_rate;
-    xvid_2pass2.vbv_initial = s->avctx->rc_initial_buffer_occupancy;
-
-    xvid_plg_create.version = XVID_MAKE_VERSION(1, 1, 0);
-    xvid_plg_create.fbase   = s->avctx->time_base.den;
-    xvid_plg_create.fincr   = s->avctx->time_base.num;
-    xvid_plg_create.param   = &xvid_2pass2;
-
-    if (xvid_plugin_2pass2(NULL, XVID_PLG_CREATE, &xvid_plg_create,
-                           &s->rc_context.non_lavc_opaque) < 0) {
-        av_log(s, AV_LOG_ERROR, "xvid_plugin_2pass2 failed\n");
-        return -1;
-    }
-    return 0;
-}
-
-float ff_xvid_rate_estimate_qscale(MpegEncContext *s, int dry_run)
-{
-    xvid_plg_data_t xvid_plg_data = { 0 };
-
-    xvid_plg_data.version       = XVID_MAKE_VERSION(1, 1, 0);
-    xvid_plg_data.width         = s->width;
-    xvid_plg_data.height        = s->height;
-    xvid_plg_data.mb_width      = s->mb_width;
-    xvid_plg_data.mb_height     = s->mb_height;
-    xvid_plg_data.fbase         = s->avctx->time_base.den;
-    xvid_plg_data.fincr         = s->avctx->time_base.num;
-    xvid_plg_data.min_quant[0]  = s->avctx->qmin;
-    xvid_plg_data.min_quant[1]  = s->avctx->qmin;
-    xvid_plg_data.min_quant[2]  = s->avctx->qmin;   // FIXME i/b factor & offset
-    xvid_plg_data.max_quant[0]  = s->avctx->qmax;
-    xvid_plg_data.max_quant[1]  = s->avctx->qmax;
-    xvid_plg_data.max_quant[2]  = s->avctx->qmax;   // FIXME i/b factor & offset
-    xvid_plg_data.bquant_offset = 0;      //  100 * s->avctx->b_quant_offset;
-    xvid_plg_data.bquant_ratio  = 100;    //      * s->avctx->b_quant_factor;
-
-    if (!s->rc_context.dry_run_qscale) {
-        if (s->picture_number) {
-            xvid_plg_data.length        =
-            xvid_plg_data.stats.length  = (s->frame_bits + 7) / 8;
-            xvid_plg_data.frame_num     = s->rc_context.last_picture_number;
-            xvid_plg_data.quant         = s->qscale;
-            xvid_plg_data.type          = s->last_pict_type;
-            if (xvid_plugin_2pass2(s->rc_context.non_lavc_opaque,
-                                   XVID_PLG_AFTER, &xvid_plg_data, NULL)) {
-                av_log(s, AV_LOG_ERROR,
-                       "xvid_plugin_2pass2(handle, XVID_PLG_AFTER, ...) FAILED\n");
-                return -1;
-            }
-        }
-        s->rc_context.last_picture_number =
-        xvid_plg_data.frame_num           = s->picture_number;
-        xvid_plg_data.quant               = 0;
-        if (xvid_plugin_2pass2(s->rc_context.non_lavc_opaque,
-                               XVID_PLG_BEFORE, &xvid_plg_data, NULL)) {
-            av_log(s, AV_LOG_ERROR,
-                   "xvid_plugin_2pass2(handle, XVID_PLG_BEFORE, ...) FAILED\n");
-            return -1;
-        }
-        s->rc_context.dry_run_qscale = xvid_plg_data.quant;
-    }
-    xvid_plg_data.quant = s->rc_context.dry_run_qscale;
-    if (!dry_run)
-        s->rc_context.dry_run_qscale = 0;
-
-    // FIXME this is not exactly identical to Xvid
-    if (s->pict_type == AV_PICTURE_TYPE_B)
-        return xvid_plg_data.quant * FF_QP2LAMBDA * s->avctx->b_quant_factor +
-            s->avctx->b_quant_offset;
-    else
-        return xvid_plg_data.quant * FF_QP2LAMBDA;
-}
-
-av_cold void ff_xvid_rate_control_uninit(MpegEncContext *s)
-{
-    xvid_plg_destroy_t xvid_plg_destroy;
-
-    xvid_plugin_2pass2(s->rc_context.non_lavc_opaque, XVID_PLG_DESTROY,
-                       &xvid_plg_destroy, NULL);
-}

diff --git a/libavcodec/libzvbi-teletextdec.c b/libavcodec/libzvbi-teletextdec.c
index 687b6af..3515f33 100644
--- a/libavcodec/libzvbi-teletextdec.c
+++ b/libavcodec/libzvbi-teletextdec.c

@@ -26,6 +26,7 @@
 #include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/log.h"
+#include "libavutil/common.h"
 
 #include <libzvbi.h>
 
@@ -56,7 +57,7 @@
     char           *pgno;
     int             x_offset;
     int             y_offset;
-    int             format_id; /* 0 = bitmap, 1 = text/ass */
+    int             format_id; /* 0 = bitmap, 1 = text/ass, 2 = ass */
     int             chop_top;
     int             sub_duration; /* in msec */
     int             transparent_bg;
@@ -70,14 +71,61 @@
     int             handler_ret;
 
     vbi_decoder *   vbi;
-#ifdef DEBUG
-    vbi_export *    ex;
-#endif
     vbi_sliced      sliced[MAX_SLICES];
 
     int             readorder;
+    uint8_t         subtitle_map[2048];
+    int             last_pgno;
+    int             last_p5;
+    int             last_ass_alignment;
 } TeletextContext;
 
+static int my_ass_subtitle_header(AVCodecContext *avctx)
+{
+    int ret = ff_ass_subtitle_header_default(avctx);
+    char *new_header;
+    uint8_t *event_pos;
+
+    if (ret < 0)
+        return ret;
+
+    event_pos = strstr(avctx->subtitle_header, "\r\n[Events]\r\n");
+    if (!event_pos)
+        return AVERROR_BUG;
+
+    new_header = av_asprintf("%.*s%s%s",
+        (int)(event_pos - avctx->subtitle_header), avctx->subtitle_header,
+        "Style: "
+        "Teletext,"            /* Name */
+        "Monospace,11,"        /* Font{name,size} */
+        "&Hffffff,&Hffffff,&H0,&H0," /* {Primary,Secondary,Outline,Back}Colour */
+        "0,0,0,0,"             /* Bold, Italic, Underline, StrikeOut */
+        "160,100,"             /* Scale{X,Y} */
+        "0,0,"                 /* Spacing, Angle */
+        "3,0.1,0,"             /* BorderStyle, Outline, Shadow */
+        "5,1,1,1,"             /* Alignment, Margin[LRV] */
+        "0\r\n"                /* Encoding */
+        "Style: "
+        "Subtitle,"            /* Name */
+        "Monospace,16,"        /* Font{name,size} */
+        "&Hffffff,&Hffffff,&H0,&H0," /* {Primary,Secondary,Outline,Back}Colour */
+        "0,0,0,0,"             /* Bold, Italic, Underline, StrikeOut */
+        "100,100,"             /* Scale{X,Y} */
+        "0,0,"                 /* Spacing, Angle */
+        "1,1,1,"               /* BorderStyle, Outline, Shadow */
+        "8,48,48,20,"          /* Alignment, Margin[LRV] */
+        "0\r\n"                /* Encoding */
+        , event_pos);
+
+    if (!new_header)
+        return AVERROR(ENOMEM);
+
+    av_free(avctx->subtitle_header);
+    avctx->subtitle_header = new_header;
+    avctx->subtitle_header_size = strlen(new_header);
+    return 0;
+}
+
 static int chop_spaces_utf8(const unsigned char* t, int len)
 {
     t += len;
@@ -181,6 +229,184 @@
     return 0;
 }
 
+static void bprint_color(const char *type, AVBPrint *buf, vbi_page *page, unsigned ci)
+{
+    int r = VBI_R(page->color_map[ci]);
+    int g = VBI_G(page->color_map[ci]);
+    int b = VBI_B(page->color_map[ci]);
+    av_bprintf(buf, "{\\%s&H%02X%02X%02X&}", type, b, g, r);
+}
+
+#define IS_TXT_SPACE(ch) ((ch).unicode < 0x0020 || (ch).unicode >= 0xe000 || (ch).unicode == 0x00a0 ||\
+                          (ch).size > VBI_DOUBLE_SIZE || (ch).opacity == VBI_TRANSPARENT_SPACE)
+
+static void get_trim_info(vbi_page *page, vbi_char *row, int *leading, int *trailing, int *olen)
+{
+    int i, len = 0;
+    int char_seen = 0;
+
+    *leading = 0;
+
+    for (i = 0; i < page->columns; i++) {
+        uint16_t out = IS_TXT_SPACE(row[i]) ? 32 : row[i].unicode;
+
+        if (out == 32 && !char_seen)
+            (*leading)++;
+        else if (out != 32)
+            char_seen = 1, len = i - (*leading) + 1;
+    }
+
+    *olen = len;
+    *trailing = len > 0 ? page->columns - *leading - len : page->columns;
+}
+
+static void decode_string(vbi_page *page, vbi_char *row, AVBPrint *buf,
+                          int start, int end, vbi_color *cur_color, vbi_color *cur_back_color)
+{
+    int i;
+
+    for (i = start; i < end; i++) {
+        uint16_t out = IS_TXT_SPACE(row[i]) ? 32 : row[i].unicode;
+
+        if (*cur_color != row[i].foreground) {
+            bprint_color("c", buf, page, row[i].foreground);
+            *cur_color = row[i].foreground;
+        }
+        if (*cur_back_color != row[i].background) {
+            bprint_color("3c", buf, page, row[i].background);
+            *cur_back_color = row[i].background;
+        }
+
+        if (out == 32) {
+            av_bprintf(buf, "\\h");
+        } else if (out == '\\' || out == '{' || out == '}') {
+            av_bprintf(buf, "\\%c", (char)out);
+        } else {
+            char tmp;
+            /* convert to utf-8 */
+            PUT_UTF8(out, tmp, av_bprint_chars(buf, tmp, 1););
+        }
+    }
+}
+
+/* Draw a page as ass formatted text */
+static int gen_sub_ass(TeletextContext *ctx, AVSubtitleRect *sub_rect, vbi_page *page, int chop_top)
+{
+    int i;
+    int leading, trailing, len;
+    int last_trailing = -1, last_leading = -1;
+    int min_trailing = page->columns, min_leading = page->columns;
+    int alignment = 2;
+    int vertical_align = -1;
+    int can_align_left = 1, can_align_right = 1, can_align_center = 1;
+    int is_subtitle_page = ctx->subtitle_map[page->pgno & 0x7ff];
+    int empty_lines = 0;
+    vbi_color cur_color = VBI_WHITE;
+    vbi_color cur_back_color = VBI_BLACK;
+    AVBPrint buf;
+
+    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+    for (i = chop_top; i < page->rows; i++) {
+        vbi_char *row = page->text + i * page->columns;
+
+        get_trim_info(page, row, &leading, &trailing, &len);
+
+        if (len) {
+            if (last_leading != -1 && last_leading != leading || leading > 5)
+                can_align_left = 0;
+            if (last_trailing != -1 && last_trailing != trailing || trailing > 2)
+                can_align_right = 0;
+            if (last_trailing != -1 && (FFABS((trailing - leading) - (last_trailing - last_leading)) > 1) || trailing - leading > 4)
+                can_align_center = 0;
+            last_leading = leading;
+            last_trailing = trailing;
+            min_leading = FFMIN(leading, min_leading);
+            min_trailing = FFMIN(trailing, min_trailing);
+        }
+    }
+
+    if (!can_align_right && can_align_left && !can_align_center) {
+        ctx->last_ass_alignment = alignment = 1;
+    } else if (!can_align_right && !can_align_left && can_align_center) {
+        ctx->last_ass_alignment = alignment = 2;
+    } else if (can_align_right && !can_align_left && !can_align_center) {
+        ctx->last_ass_alignment = alignment = 3;
+    } else {
+        if (ctx->last_ass_alignment == 1 && can_align_left ||
+            ctx->last_ass_alignment == 2 && can_align_center ||
+            ctx->last_ass_alignment == 3 && can_align_right)
+            alignment = ctx->last_ass_alignment;
+    }
+
+    for (i = chop_top; i < page->rows; i++) {
+        int j;
+        vbi_char *row = page->text + i * page->columns;
+        int is_transparent_line;
+
+        for (j = 0; j < page->columns; j++)
+            if (row[j].opacity != VBI_TRANSPARENT_SPACE)
+                break;
+        is_transparent_line = (j == page->columns);
+
+        len = is_transparent_line ? 0 : page->columns;
+        leading = trailing = is_transparent_line ? page->columns : 0;
+
+        if (is_subtitle_page) {
+            if (!is_transparent_line)
+                get_trim_info(page, row, &leading, &trailing, &len);
+
+            if (vertical_align == -1 && len) {
+                vertical_align = (2 - (av_clip(i + 1, 0, 23) / 8));
+                av_bprintf(&buf, "{\\an%d}", alignment + vertical_align * 3);
+                if (vertical_align != 2)
+                    empty_lines = 0;
+            }
+
+            if (len && empty_lines > 1)
+                for (empty_lines /= 2; empty_lines > 0; empty_lines--)
+                    av_bprintf(&buf, " \\N");
+
+            if (alignment == 1 || alignment == 2 && !can_align_center)
+                leading = min_leading;
+            if (alignment == 3 || alignment == 2 && !can_align_center)
+                trailing = min_trailing;
+        }
+
+        if (len || !is_subtitle_page) {
+            decode_string(page, row, &buf, leading, page->columns - trailing, &cur_color, &cur_back_color);
+            av_bprintf(&buf, " \\N");
+            empty_lines = 0;
+        } else {
+            empty_lines++;
+        }
+    }
+
+    if (vertical_align == 0)
+        for (empty_lines = (empty_lines - 1) / 2; empty_lines > 0; empty_lines--)
+            av_bprintf(&buf, " \\N");
+
+    if (!av_bprint_is_complete(&buf)) {
+        av_bprint_finalize(&buf, NULL);
+        return AVERROR(ENOMEM);
+    }
+
+    if (buf.len) {
+        sub_rect->type = SUBTITLE_ASS;
+        sub_rect->ass = ff_ass_get_dialog(ctx->readorder++, 0, is_subtitle_page ? "Subtitle" : "Teletext", NULL, buf.str);
+
+        if (!sub_rect->ass) {
+            av_bprint_finalize(&buf, NULL);
+            return AVERROR(ENOMEM);
+        }
+        av_log(ctx, AV_LOG_DEBUG, "subtext:%s:txetbus\n", sub_rect->ass);
+    } else {
+        sub_rect->type = SUBTITLE_NONE;
+    }
+    av_bprint_finalize(&buf, NULL);
+    return 0;
+}
+
 static void fix_transparency(TeletextContext *ctx, AVSubtitleRect *sub_rect, vbi_page *page,
                              int chop_top, int resx, int resy)
 {
@@ -284,16 +510,14 @@
     vbi_page page;
     int res;
     char pgno_str[12];
-    vbi_subno subno;
-    vbi_page_type vpt;
     int chop_top;
-    char *lang;
+    int is_subtitle_page = ctx->subtitle_map[ev->ev.ttx_page.pgno & 0x7ff];
 
     snprintf(pgno_str, sizeof pgno_str, "%03x", ev->ev.ttx_page.pgno);
     av_log(ctx, AV_LOG_DEBUG, "decoded page %s.%02x\n",
            pgno_str, ev->ev.ttx_page.subno & 0xFF);
 
-    if (strcmp(ctx->pgno, "*") && !strstr(ctx->pgno, pgno_str))
+    if (strcmp(ctx->pgno, "*") && (strcmp(ctx->pgno, "subtitle") || !is_subtitle_page) && !strstr(ctx->pgno, pgno_str))
         return;
     if (ctx->handler_ret < 0)
         return;
@@ -306,18 +530,7 @@
     if (!res)
         return;
 
-#ifdef DEBUG
-    fprintf(stderr, "\nSaving res=%d dy0=%d dy1=%d...\n",
-            res, page.dirty.y0, page.dirty.y1);
-    fflush(stderr);
-
-    if (!vbi_export_stdio(ctx->ex, stderr, &page))
-        fprintf(stderr, "failed: %s\n", vbi_export_errstr(ctx->ex));
-#endif
-
-    vpt = vbi_classify_page(ctx->vbi, ev->ev.ttx_page.pgno, &subno, &lang);
-    chop_top = ctx->chop_top ||
-        ((page.rows > 1) && (vpt == VBI_SUBTITLE_PAGE));
+    chop_top = ctx->chop_top || ((page.rows > 1) && is_subtitle_page);
 
     av_log(ctx, AV_LOG_DEBUG, "%d x %d page chop:%d\n",
            page.columns, page.rows, chop_top);
@@ -331,9 +544,20 @@
             cur_page->pgno = ev->ev.ttx_page.pgno;
             cur_page->subno = ev->ev.ttx_page.subno;
             if (cur_page->sub_rect) {
-                res = (ctx->format_id == 0) ?
-                    gen_sub_bitmap(ctx, cur_page->sub_rect, &page, chop_top) :
-                    gen_sub_text  (ctx, cur_page->sub_rect, &page, chop_top);
+                switch (ctx->format_id) {
+                    case 0:
+                        res = gen_sub_bitmap(ctx, cur_page->sub_rect, &page, chop_top);
+                        break;
+                    case 1:
+                        res = gen_sub_text(ctx, cur_page->sub_rect, &page, chop_top);
+                        break;
+                    case 2:
+                        res = gen_sub_ass(ctx, cur_page->sub_rect, &page, chop_top);
+                        break;
+                    default:
+                        res = AVERROR_BUG;
+                        break;
+                }
                 if (res < 0) {
                     av_freep(&cur_page->sub_rect);
                     ctx->handler_ret = res;
@@ -369,11 +593,37 @@
             else {
                 int line_offset  = buf[2] & 0x1f;
                 int field_parity = buf[2] & 0x20;
-                int i;
+                uint8_t *p = ctx->sliced[lines].data;
+                int i, pmag;
                 ctx->sliced[lines].id = VBI_SLICED_TELETEXT_B;
                 ctx->sliced[lines].line = (line_offset > 0 ? (line_offset + (field_parity ? 0 : 313)) : 0);
                 for (i = 0; i < 42; i++)
-                    ctx->sliced[lines].data[i] = vbi_rev8(buf[4 + i]);
+                    p[i] = vbi_rev8(buf[4 + i]);
+                /* Unfortunately libzvbi does not expose page flags, and
+                 * vbi_classify_page only checks MIP, so we have to manually
+                 * decode the page flags and store the results. */
+                pmag = vbi_unham16p(p);
+                if (pmag >= 0 && pmag >> 3 == 0) {   // We found a row 0 header
+                    int page = vbi_unham16p(p + 2);
+                    int flags1 = vbi_unham16p(p + 6);
+                    int flags2 = vbi_unham16p(p + 8);
+                    if (page >= 0 && flags1 >= 0 && flags2 >= 0) {
+                        int pgno = ((pmag & 7) << 8) + page;
+                        // Check for disabled NEWSFLASH flag and enabled SUBTITLE and SUPRESS_HEADER flags
+                        ctx->subtitle_map[pgno] = (!(flags1 & 0x40) && flags1 & 0x80 && flags2 & 0x01);
+                        // Propagate ERASE_PAGE flag for repeated page headers to work around a libzvbi bug
+                        if (ctx->subtitle_map[pgno] && pgno == ctx->last_pgno) {
+                            int last_byte9 = vbi_unham8(ctx->last_p5);
+                            if (last_byte9 >= 0 && last_byte9 & 0x8) {
+                                int byte9 = vbi_unham8(p[5]);
+                                if (byte9 >= 0)
+                                    p[5] = vbi_ham8(byte9 | 0x8);
+                            }
+                        }
+                        ctx->last_pgno = pgno;
+                        ctx->last_p5 = p[5];
+                    }
+                }
                 lines++;
             }
         }
@@ -421,13 +671,6 @@
             ff_dlog(avctx, "ctx=%p buf_size=%d lines=%u pkt_pts=%7.3f\n",
                     ctx, pkt->size, lines, (double)pkt->pts/90000.0);
             if (lines > 0) {
-#ifdef DEBUG
-                int i;
-                av_log(avctx, AV_LOG_DEBUG, "line numbers:");
-                for(i = 0; i < lines; i++)
-                    av_log(avctx, AV_LOG_DEBUG, " %d", ctx->sliced[i].line);
-                av_log(avctx, AV_LOG_DEBUG, "\n");
-#endif
                 vbi_decode(ctx->vbi, ctx->sliced, lines, 0.0);
                 ctx->lines_processed += lines;
             }
@@ -442,7 +685,7 @@
     // is there a subtitle to pass?
     if (ctx->nb_pages) {
         int i;
-        sub->format = ctx->format_id;
+        sub->format = !!ctx->format_id;
         sub->start_display_time = 0;
         sub->end_display_time = ctx->sub_duration;
         sub->num_rects = 0;
@@ -501,18 +744,23 @@
 
     ctx->vbi = NULL;
     ctx->pts = AV_NOPTS_VALUE;
+    ctx->last_pgno = -1;
+    ctx->last_ass_alignment = 2;
 
     if (ctx->opacity == -1)
         ctx->opacity = ctx->transparent_bg ? 0 : 255;
 
-#ifdef DEBUG
-    {
-        char *t;
-        ctx->ex = vbi_export_new("text", &t);
-    }
-#endif
     av_log(avctx, AV_LOG_VERBOSE, "page filter: %s\n", ctx->pgno);
-    return (ctx->format_id == 1) ? ff_ass_subtitle_header_default(avctx) : 0;
+
+    switch (ctx->format_id) {
+        case 0:
+            return 0;
+        case 1:
+            return ff_ass_subtitle_header_default(avctx);
+        case 2:
+            return my_ass_subtitle_header(avctx);
+    }
+    return AVERROR_BUG;
 }
 
 static int teletext_close_decoder(AVCodecContext *avctx)
@@ -527,6 +775,9 @@
     vbi_decoder_delete(ctx->vbi);
     ctx->vbi = NULL;
     ctx->pts = AV_NOPTS_VALUE;
+    ctx->last_pgno = -1;
+    ctx->last_ass_alignment = 2;
+    memset(ctx->subtitle_map, 0, sizeof(ctx->subtitle_map));
     if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
         ctx->readorder = 0;
     return 0;
@@ -540,15 +791,16 @@
 #define OFFSET(x) offsetof(TeletextContext, x)
 #define SD AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
-    {"txt_page",        "list of teletext page numbers to decode, * is all", OFFSET(pgno),           AV_OPT_TYPE_STRING, {.str = "*"},      0, 0,        SD},
+    {"txt_page",        "page numbers to decode, subtitle for subtitles, * for all", OFFSET(pgno),   AV_OPT_TYPE_STRING, {.str = "*"},      0, 0,        SD},
     {"txt_chop_top",    "discards the top teletext line",                    OFFSET(chop_top),       AV_OPT_TYPE_INT,    {.i64 = 1},        0, 1,        SD},
-    {"txt_format",      "format of the subtitles (bitmap or text)",          OFFSET(format_id),      AV_OPT_TYPE_INT,    {.i64 = 0},        0, 1,        SD,  "txt_format"},
+    {"txt_format",      "format of the subtitles (bitmap or text or ass)",   OFFSET(format_id),      AV_OPT_TYPE_INT,    {.i64 = 0},        0, 2,        SD,  "txt_format"},
     {"bitmap",          NULL,                                                0,                      AV_OPT_TYPE_CONST,  {.i64 = 0},        0, 0,        SD,  "txt_format"},
     {"text",            NULL,                                                0,                      AV_OPT_TYPE_CONST,  {.i64 = 1},        0, 0,        SD,  "txt_format"},
+    {"ass",             NULL,                                                0,                      AV_OPT_TYPE_CONST,  {.i64 = 2},        0, 0,        SD,  "txt_format"},
     {"txt_left",        "x offset of generated bitmaps",                     OFFSET(x_offset),       AV_OPT_TYPE_INT,    {.i64 = 0},        0, 65535,    SD},
     {"txt_top",         "y offset of generated bitmaps",                     OFFSET(y_offset),       AV_OPT_TYPE_INT,    {.i64 = 0},        0, 65535,    SD},
     {"txt_chop_spaces", "chops leading and trailing spaces from text",       OFFSET(chop_spaces),    AV_OPT_TYPE_INT,    {.i64 = 1},        0, 1,        SD},
-    {"txt_duration",    "display duration of teletext pages in msecs",       OFFSET(sub_duration),   AV_OPT_TYPE_INT,    {.i64 = 30000},    0, 86400000, SD},
+    {"txt_duration",    "display duration of teletext pages in msecs",       OFFSET(sub_duration),   AV_OPT_TYPE_INT,    {.i64 = -1},      -1, 86400000, SD},
     {"txt_transparent", "force transparent background of the teletext",      OFFSET(transparent_bg), AV_OPT_TYPE_INT,    {.i64 = 0},        0, 1,        SD},
     {"txt_opacity",     "set opacity of the transparent background",         OFFSET(opacity),        AV_OPT_TYPE_INT,    {.i64 = -1},      -1, 255,      SD},
     { NULL },
@@ -573,4 +825,5 @@
     .capabilities = AV_CODEC_CAP_DELAY,
     .flush     = teletext_flush,
     .priv_class= &teletext_class,
+    .wrapper_name = "libzvbi",
 };

diff --git a/libavcodec/loco.c b/libavcodec/loco.c
index 9d0f144..741db3b 100644
--- a/libavcodec/loco.c
+++ b/libavcodec/loco.c

@@ -114,19 +114,19 @@
 }
 
 /* LOCO main predictor - LOCO-I/JPEG-LS predictor */
-static inline int loco_predict(uint8_t* data, int stride, int step)
+static inline int loco_predict(uint8_t* data, int stride)
 {
     int a, b, c;
 
     a = data[-stride];
-    b = data[-step];
-    c = data[-stride - step];
+    b = data[-1];
+    c = data[-stride - 1];
 
     return mid_pred(a, a + b - c, b);
 }
 
 static int loco_decode_plane(LOCOContext *l, uint8_t *data, int width, int height,
-                             int stride, const uint8_t *buf, int buf_size, int step)
+                             int stride, const uint8_t *buf, int buf_size)
 {
     RICEContext rc;
     int val;
@@ -153,7 +153,7 @@
     /* restore top line */
     for (i = 1; i < width; i++) {
         val = loco_get_rice(&rc);
-        data[i * step] = data[i * step - step] + val;
+        data[i] = data[i - 1] + val;
     }
     data += stride;
     for (j = 1; j < height; j++) {
@@ -163,7 +163,7 @@
         /* restore all other pixels */
         for (i = 1; i < width; i++) {
             val = loco_get_rice(&rc);
-            data[i * step] = loco_predict(&data[i * step], stride, step) + val;
+            data[i] = loco_predict(&data[i], stride) + val;
         }
         data += stride;
     }
@@ -171,19 +171,18 @@
     return (get_bits_count(&rc.gb) + 7) >> 3;
 }
 
-static void rotate_faulty_loco(uint8_t *data, int width, int height, int stride, int step)
+static void rotate_faulty_loco(uint8_t *data, int width, int height, int stride)
 {
     int y;
 
     for (y=1; y<height; y++) {
         if (width>=y) {
             memmove(data + y*stride,
-                    data + y*(stride + step),
-                    step*(width-y));
+                    data + y*(stride + 1),
+                    (width-y));
             if (y+1 < height)
-                memmove(data + y*stride + step*(width-y),
-                        data + (y+1)*stride,
-                        step*y);
+                memmove(data + y*stride + (width-y),
+                        data + (y+1)*stride, y);
         }
     }
 }
@@ -209,49 +208,52 @@
     switch(l->mode) {
     case LOCO_CYUY2: case LOCO_YUY2: case LOCO_UYVY:
         decoded = loco_decode_plane(l, p->data[0], avctx->width, avctx->height,
-                                    p->linesize[0], buf, buf_size, 1);
+                                    p->linesize[0], buf, buf_size);
         ADVANCE_BY_DECODED;
         decoded = loco_decode_plane(l, p->data[1], avctx->width / 2, avctx->height,
-                                    p->linesize[1], buf, buf_size, 1);
+                                    p->linesize[1], buf, buf_size);
         ADVANCE_BY_DECODED;
         decoded = loco_decode_plane(l, p->data[2], avctx->width / 2, avctx->height,
-                                    p->linesize[2], buf, buf_size, 1);
+                                    p->linesize[2], buf, buf_size);
         break;
     case LOCO_CYV12: case LOCO_YV12:
         decoded = loco_decode_plane(l, p->data[0], avctx->width, avctx->height,
-                                    p->linesize[0], buf, buf_size, 1);
+                                    p->linesize[0], buf, buf_size);
         ADVANCE_BY_DECODED;
         decoded = loco_decode_plane(l, p->data[2], avctx->width / 2, avctx->height / 2,
-                                    p->linesize[2], buf, buf_size, 1);
+                                    p->linesize[2], buf, buf_size);
         ADVANCE_BY_DECODED;
         decoded = loco_decode_plane(l, p->data[1], avctx->width / 2, avctx->height / 2,
-                                    p->linesize[1], buf, buf_size, 1);
+                                    p->linesize[1], buf, buf_size);
         break;
     case LOCO_CRGB: case LOCO_RGB:
+        decoded = loco_decode_plane(l, p->data[1] + p->linesize[1]*(avctx->height-1), avctx->width, avctx->height,
+                                    -p->linesize[1], buf, buf_size);
+        ADVANCE_BY_DECODED;
         decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1), avctx->width, avctx->height,
-                                    -p->linesize[0], buf, buf_size, 3);
+                                    -p->linesize[0], buf, buf_size);
         ADVANCE_BY_DECODED;
-        decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1) + 1, avctx->width, avctx->height,
-                                    -p->linesize[0], buf, buf_size, 3);
-        ADVANCE_BY_DECODED;
-        decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1) + 2, avctx->width, avctx->height,
-                                    -p->linesize[0], buf, buf_size, 3);
-        if (avctx->width & 1)
-            rotate_faulty_loco(p->data[0] + p->linesize[0]*(avctx->height-1), avctx->width, avctx->height, -p->linesize[0], 3);
+        decoded = loco_decode_plane(l, p->data[2] + p->linesize[2]*(avctx->height-1), avctx->width, avctx->height,
+                                    -p->linesize[2], buf, buf_size);
+        if (avctx->width & 1) {
+            rotate_faulty_loco(p->data[0] + p->linesize[0]*(avctx->height-1), avctx->width, avctx->height, -p->linesize[0]);
+            rotate_faulty_loco(p->data[1] + p->linesize[1]*(avctx->height-1), avctx->width, avctx->height, -p->linesize[1]);
+            rotate_faulty_loco(p->data[2] + p->linesize[2]*(avctx->height-1), avctx->width, avctx->height, -p->linesize[2]);
+        }
         break;
     case LOCO_CRGBA:
     case LOCO_RGBA:
+        decoded = loco_decode_plane(l, p->data[1] + p->linesize[1]*(avctx->height-1), avctx->width, avctx->height,
+                                    -p->linesize[1], buf, buf_size);
+        ADVANCE_BY_DECODED;
         decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1), avctx->width, avctx->height,
-                                    -p->linesize[0], buf, buf_size, 4);
+                                    -p->linesize[0], buf, buf_size);
         ADVANCE_BY_DECODED;
-        decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1) + 1, avctx->width, avctx->height,
-                                    -p->linesize[0], buf, buf_size, 4);
+        decoded = loco_decode_plane(l, p->data[2] + p->linesize[2]*(avctx->height-1), avctx->width, avctx->height,
+                                    -p->linesize[2], buf, buf_size);
         ADVANCE_BY_DECODED;
-        decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1) + 2, avctx->width, avctx->height,
-                                    -p->linesize[0], buf, buf_size, 4);
-        ADVANCE_BY_DECODED;
-        decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1) + 3, avctx->width, avctx->height,
-                                    -p->linesize[0], buf, buf_size, 4);
+        decoded = loco_decode_plane(l, p->data[3] + p->linesize[3]*(avctx->height-1), avctx->width, avctx->height,
+                                    -p->linesize[3], buf, buf_size);
         break;
     default:
         av_assert0(0);
@@ -302,7 +304,7 @@
         break;
     case LOCO_CRGB:
     case LOCO_RGB:
-        avctx->pix_fmt = AV_PIX_FMT_BGR24;
+        avctx->pix_fmt = AV_PIX_FMT_GBRP;
         break;
     case LOCO_CYV12:
     case LOCO_YV12:
@@ -310,7 +312,7 @@
         break;
     case LOCO_CRGBA:
     case LOCO_RGBA:
-        avctx->pix_fmt = AV_PIX_FMT_BGRA;
+        avctx->pix_fmt = AV_PIX_FMT_GBRAP;
         break;
     default:
         av_log(avctx, AV_LOG_INFO, "Unknown colorspace, index = %i\n", l->mode);

diff --git a/libavcodec/lossless_videodsp.c b/libavcodec/lossless_videodsp.c
index b5b96e6..cff94c2 100644
--- a/libavcodec/lossless_videodsp.c
+++ b/libavcodec/lossless_videodsp.c

@@ -98,6 +98,16 @@
     return acc;
 }
 
+static void add_gradient_pred_c(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width){
+    int A, B, C, i;
+
+    for (i = 0; i < width; i++) {
+        A = src[i - stride];
+        B = src[i - (stride + 1)];
+        C = src[i - 1];
+        src[i] = (A - B + C + src[i]) & 0xFF;
+    }
+}
 
 void ff_llviddsp_init(LLVidDSPContext *c)
 {
@@ -106,6 +116,7 @@
     c->add_left_pred              = add_left_pred_c;
 
     c->add_left_pred_int16        = add_left_pred_int16_c;
+    c->add_gradient_pred          = add_gradient_pred_c;
 
     if (ARCH_PPC)
         ff_llviddsp_init_ppc(c);

diff --git a/libavcodec/lossless_videodsp.h b/libavcodec/lossless_videodsp.h
index cecf0fe..8077898 100644
--- a/libavcodec/lossless_videodsp.h
+++ b/libavcodec/lossless_videodsp.h

@@ -29,7 +29,7 @@
 #include "libavutil/cpu.h"
 
 typedef struct LLVidDSPContext {
-    void (*add_bytes)(uint8_t *dst /* align 16 */, uint8_t *src /* align 16 */,
+    void (*add_bytes)(uint8_t *dst /* align 32 */, uint8_t *src /* align 32 */,
                       ptrdiff_t w);
     void (*add_median_pred)(uint8_t *dst, const uint8_t *top,
                             const uint8_t *diff, ptrdiff_t w,
@@ -39,6 +39,7 @@
 
     int  (*add_left_pred_int16)(uint16_t *dst, const uint16_t *src,
                                 unsigned mask, ptrdiff_t w, unsigned left);
+    void (*add_gradient_pred)(uint8_t *src /* align 32 */, const ptrdiff_t stride, const ptrdiff_t width);
 } LLVidDSPContext;
 
 void ff_llviddsp_init(LLVidDSPContext *llviddsp);

diff --git a/libavcodec/lossless_videoencdsp.c b/libavcodec/lossless_videoencdsp.c
index 5cc4934..ed70329 100644
--- a/libavcodec/lossless_videoencdsp.c
+++ b/libavcodec/lossless_videoencdsp.c

@@ -74,10 +74,25 @@
     *left_top = lt;
 }
 
+static void sub_left_predict_c(uint8_t *dst, uint8_t *src,
+                               ptrdiff_t stride, ptrdiff_t width, int height)
+{
+    int i, j;
+    uint8_t prev = 0x80; /* Set the initial value */
+    for (j = 0; j < height; j++) {
+        for (i = 0; i < width; i++) {
+            *dst++ = src[i] - prev;
+            prev   = src[i];
+        }
+        src += stride;
+    }
+}
+
 av_cold void ff_llvidencdsp_init(LLVidEncDSPContext *c)
 {
     c->diff_bytes      = diff_bytes_c;
     c->sub_median_pred = sub_median_pred_c;
+    c->sub_left_predict = sub_left_predict_c;
 
     if (ARCH_X86)
         ff_llvidencdsp_init_x86(c);

diff --git a/libavcodec/lossless_videoencdsp.h b/libavcodec/lossless_videoencdsp.h
index 3d645b1..faa6c32 100644
--- a/libavcodec/lossless_videoencdsp.h
+++ b/libavcodec/lossless_videoencdsp.h

@@ -21,6 +21,8 @@
 
 #include <stdint.h>
 
+#include "avcodec.h"
+
 typedef struct LLVidEncDSPContext {
     void (*diff_bytes)(uint8_t *dst /* align 16 */,
                        const uint8_t *src1 /* align 16 */,
@@ -33,6 +35,9 @@
     void (*sub_median_pred)(uint8_t *dst, const uint8_t *src1,
                             const uint8_t *src2, intptr_t w,
                             int *left, int *left_top);
+
+    void (*sub_left_predict)(uint8_t *dst, uint8_t *src,
+                          ptrdiff_t stride, ptrdiff_t width, int height);
 } LLVidEncDSPContext;
 
 void ff_llvidencdsp_init(LLVidEncDSPContext *c);

diff --git a/libavcodec/magicyuv.c b/libavcodec/magicyuv.c
index 3c1a890..1a129c2 100644
--- a/libavcodec/magicyuv.c
+++ b/libavcodec/magicyuv.c

@@ -240,6 +240,8 @@
 
         dst = (uint16_t *)p->data[i] + j * sheight * stride;
         if (flags & 1) {
+            if (get_bits_left(&gb) < bps * width * height)
+                return AVERROR_INVALIDDATA;
             for (k = 0; k < height; k++) {
                 for (x = 0; x < width; x++)
                     dst[x] = get_bits(&gb, bps);
@@ -280,11 +282,9 @@
         case GRADIENT:
             dst = (uint16_t *)p->data[i] + j * sheight * stride;
             s->llviddsp.add_left_pred_int16(dst, dst, max, width, 0);
-            left = lefttop = 0;
             dst += stride;
             if (interlaced) {
                 s->llviddsp.add_left_pred_int16(dst, dst, max, width, 0);
-                left = lefttop = 0;
                 dst += stride;
             }
             for (k = 1 + interlaced; k < height; k++) {
@@ -302,14 +302,13 @@
             break;
         case MEDIAN:
             dst = (uint16_t *)p->data[i] + j * sheight * stride;
-            lefttop = left = dst[0];
             s->llviddsp.add_left_pred_int16(dst, dst, max, width, 0);
             dst += stride;
             if (interlaced) {
-                lefttop = left = dst[0];
                 s->llviddsp.add_left_pred_int16(dst, dst, max, width, 0);
                 dst += stride;
             }
+            lefttop = left = dst[0];
             for (k = 1 + interlaced; k < height; k++) {
                 magicyuv_median_pred16(dst, dst - fake_stride, dst, width, &left, &lefttop, max);
                 lefttop = left = dst[0];
@@ -348,7 +347,7 @@
     MagicYUVContext *s = avctx->priv_data;
     int interlaced = s->interlaced;
     AVFrame *p = s->p;
-    int i, k, x;
+    int i, k, x, min_width;
     GetBitContext gb;
     uint8_t *dst;
 
@@ -371,6 +370,8 @@
 
         dst = p->data[i] + j * sheight * stride;
         if (flags & 1) {
+            if (get_bits_left(&gb) < 8* width * height)
+                return AVERROR_INVALIDDATA;
             for (k = 0; k < height; k++) {
                 for (x = 0; x < width; x++)
                     dst[x] = get_bits(&gb, 8);
@@ -411,36 +412,36 @@
         case GRADIENT:
             dst = p->data[i] + j * sheight * stride;
             s->llviddsp.add_left_pred(dst, dst, width, 0);
-            left = lefttop = 0;
             dst += stride;
             if (interlaced) {
                 s->llviddsp.add_left_pred(dst, dst, width, 0);
-                left = lefttop = 0;
                 dst += stride;
             }
+            min_width = FFMIN(width, 32);
             for (k = 1 + interlaced; k < height; k++) {
                 top = dst[-fake_stride];
                 left = top + dst[0];
                 dst[0] = left;
-                for (x = 1; x < width; x++) {
+                for (x = 1; x < min_width; x++) { /* dsp need aligned 32 */
                     top = dst[x - fake_stride];
                     lefttop = dst[x - (fake_stride + 1)];
                     left += top - lefttop + dst[x];
                     dst[x] = left;
                 }
+                if (width > 32)
+                    s->llviddsp.add_gradient_pred(dst + 32, fake_stride, width - 32);
                 dst += stride;
             }
             break;
         case MEDIAN:
             dst = p->data[i] + j * sheight * stride;
-            lefttop = left = dst[0];
             s->llviddsp.add_left_pred(dst, dst, width, 0);
             dst += stride;
             if (interlaced) {
-                lefttop = left = dst[0];
                 s->llviddsp.add_left_pred(dst, dst, width, 0);
                 dst += stride;
             }
+            lefttop = left = dst[0];
             for (k = 1 + interlaced; k < height; k++) {
                 s->llviddsp.add_median_pred(dst, dst - fake_stride,
                                              dst, width, &left, &lefttop);

diff --git a/libavcodec/magicyuvenc.c b/libavcodec/magicyuvenc.c
new file mode 100644
index 0000000..16e9a1c
--- /dev/null
+++ b/libavcodec/magicyuvenc.c

@@ -0,0 +1,590 @@
+/*
+ * MagicYUV encoder
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/qsort.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "put_bits.h"
+#include "internal.h"
+#include "thread.h"
+#include "lossless_videoencdsp.h"
+
+typedef enum Prediction {
+    LEFT = 1,
+    GRADIENT,
+    MEDIAN,
+} Prediction;
+
+typedef struct HuffEntry {
+    uint8_t  sym;
+    uint8_t  len;
+    uint32_t code;
+} HuffEntry;
+
+typedef struct PTable {
+    int     value;  ///< input value
+    int64_t prob;   ///< number of occurences of this value in input
+} PTable;
+
+typedef struct MagicYUVContext {
+    const AVClass       *class;
+    int                  frame_pred;
+    PutBitContext        pb;
+    int                  planes;
+    uint8_t              format;
+    AVFrame             *p;
+    int                  slice_height;
+    int                  nb_slices;
+    int                  correlate;
+    int                  hshift[4];
+    int                  vshift[4];
+    uint8_t             *slices[4];
+    unsigned             slice_pos[4];
+    unsigned             tables_size;
+    HuffEntry            he[4][256];
+    LLVidEncDSPContext   llvidencdsp;
+    void (*predict)(struct MagicYUVContext *s, uint8_t *src, uint8_t *dst,
+                    ptrdiff_t stride, int width, int height);
+} MagicYUVContext;
+
+static void left_predict(MagicYUVContext *s,
+                         uint8_t *src, uint8_t *dst, ptrdiff_t stride,
+                         int width, int height)
+{
+    uint8_t prev = 0;
+    int i, j;
+
+    for (i = 0; i < width; i++) {
+        dst[i] = src[i] - prev;
+        prev   = src[i];
+    }
+    dst += width;
+    src += stride;
+    for (j = 1; j < height; j++) {
+        prev = src[-stride];
+        for (i = 0; i < width; i++) {
+            dst[i] = src[i] - prev;
+            prev   = src[i];
+        }
+        dst += width;
+        src += stride;
+    }
+}
+
+static void gradient_predict(MagicYUVContext *s,
+                             uint8_t *src, uint8_t *dst, ptrdiff_t stride,
+                             int width, int height)
+{
+    int left = 0, top, lefttop;
+    int i, j;
+
+    for (i = 0; i < width; i++) {
+        dst[i] = src[i] - left;
+        left   = src[i];
+    }
+    dst += width;
+    src += stride;
+    for (j = 1; j < height; j++) {
+        top = src[-stride];
+        left = src[0] - top;
+        dst[0] = left;
+        for (i = 1; i < width; i++) {
+            top = src[i - stride];
+            lefttop = src[i - (stride + 1)];
+            left = src[i-1];
+            dst[i] = (src[i] - top) - left + lefttop;
+        }
+        dst += width;
+        src += stride;
+    }
+}
+
+static void median_predict(MagicYUVContext *s,
+                           uint8_t *src, uint8_t *dst, ptrdiff_t stride,
+                           int width, int height)
+{
+    int left = 0, lefttop;
+    int i, j;
+
+    for (i = 0; i < width; i++) {
+        dst[i] = src[i] - left;
+        left   = src[i];
+    }
+    dst += width;
+    src += stride;
+    for (j = 1; j < height; j++) {
+        left = lefttop = src[-stride];
+        s->llvidencdsp.sub_median_pred(dst, src - stride, src, width, &left, &lefttop);
+        dst += width;
+        src += stride;
+    }
+}
+
+static av_cold int magy_encode_init(AVCodecContext *avctx)
+{
+    MagicYUVContext *s = avctx->priv_data;
+    PutByteContext pb;
+    int i;
+
+    switch (avctx->pix_fmt) {
+    case AV_PIX_FMT_GBRP:
+        avctx->codec_tag = MKTAG('M', '8', 'R', 'G');
+        s->correlate = 1;
+        s->format = 0x65;
+        break;
+    case AV_PIX_FMT_GBRAP:
+        avctx->codec_tag = MKTAG('M', '8', 'R', 'A');
+        s->correlate = 1;
+        s->format = 0x66;
+        break;
+    case AV_PIX_FMT_YUV420P:
+        avctx->codec_tag = MKTAG('M', '8', 'Y', '0');
+        s->hshift[1] =
+        s->vshift[1] =
+        s->hshift[2] =
+        s->vshift[2] = 1;
+        s->format = 0x69;
+        break;
+    case AV_PIX_FMT_YUV422P:
+        avctx->codec_tag = MKTAG('M', '8', 'Y', '2');
+        s->hshift[1] =
+        s->hshift[2] = 1;
+        s->format = 0x68;
+        break;
+    case AV_PIX_FMT_YUV444P:
+        avctx->codec_tag = MKTAG('M', '8', 'Y', '4');
+        s->format = 0x67;
+        break;
+    case AV_PIX_FMT_YUVA444P:
+        avctx->codec_tag = MKTAG('M', '8', 'Y', 'A');
+        s->format = 0x6a;
+        break;
+    case AV_PIX_FMT_GRAY8:
+        avctx->codec_tag = MKTAG('M', '8', 'G', '0');
+        s->format = 0x6b;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unsupported pixel format: %d\n",
+               avctx->pix_fmt);
+        return AVERROR_INVALIDDATA;
+    }
+
+    ff_llvidencdsp_init(&s->llvidencdsp);
+
+    s->planes = av_pix_fmt_count_planes(avctx->pix_fmt);
+
+    s->nb_slices = 1;
+
+    for (i = 0; i < s->planes; i++) {
+        s->slices[i] = av_malloc(avctx->width * (avctx->height + 2) +
+                                 AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!s->slices[i]) {
+            av_log(avctx, AV_LOG_ERROR, "Cannot allocate temporary buffer.\n");
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    switch (s->frame_pred) {
+    case LEFT:     s->predict = left_predict;     break;
+    case GRADIENT: s->predict = gradient_predict; break;
+    case MEDIAN:   s->predict = median_predict;   break;
+    }
+
+    avctx->extradata_size = 32;
+
+    avctx->extradata = av_mallocz(avctx->extradata_size +
+                                  AV_INPUT_BUFFER_PADDING_SIZE);
+
+    if (!avctx->extradata) {
+        av_log(avctx, AV_LOG_ERROR, "Could not allocate extradata.\n");
+        return AVERROR(ENOMEM);
+    }
+
+    bytestream2_init_writer(&pb, avctx->extradata, avctx->extradata_size);
+    bytestream2_put_le32(&pb, MKTAG('M', 'A', 'G', 'Y'));
+    bytestream2_put_le32(&pb, 32);
+    bytestream2_put_byte(&pb, 7);
+    bytestream2_put_byte(&pb, s->format);
+    bytestream2_put_byte(&pb, 12);
+    bytestream2_put_byte(&pb, 0);
+
+    bytestream2_put_byte(&pb, 0);
+    bytestream2_put_byte(&pb, 0);
+    bytestream2_put_byte(&pb, 32);
+    bytestream2_put_byte(&pb, 0);
+
+    bytestream2_put_le32(&pb, avctx->width);
+    bytestream2_put_le32(&pb, avctx->height);
+    bytestream2_put_le32(&pb, avctx->width);
+    bytestream2_put_le32(&pb, avctx->height);
+
+    return 0;
+}
+
+static int magy_huff_cmp_len(const void *a, const void *b)
+{
+    const HuffEntry *aa = a, *bb = b;
+    return (aa->len - bb->len) * 256 + aa->sym - bb->sym;
+}
+
+static int huff_cmp_sym(const void *a, const void *b)
+{
+    const HuffEntry *aa = a, *bb = b;
+    return bb->sym - aa->sym;
+}
+
+static void calculate_codes(HuffEntry *he)
+{
+    uint32_t code;
+    int i;
+
+    AV_QSORT(he, 256, HuffEntry, magy_huff_cmp_len);
+
+    code = 1;
+    for (i = 255; i >= 0; i--) {
+        he[i].code  = code >> (32 - he[i].len);
+        code       += 0x80000000u >> (he[i].len - 1);
+    }
+
+    AV_QSORT(he, 256, HuffEntry, huff_cmp_sym);
+}
+
+static void count_usage(uint8_t *src, int width,
+                        int height, PTable *counts)
+{
+    int i, j;
+
+    for (j = 0; j < height; j++) {
+        for (i = 0; i < width; i++) {
+            counts[src[i]].prob++;
+        }
+        src += width;
+    }
+}
+
+typedef struct PackageMergerList {
+    int nitems;             ///< number of items in the list and probability      ex. 4
+    int item_idx[515];      ///< index range for each item in items                   0, 2, 5, 9, 13
+    int probability[514];   ///< probability of each item                             3, 8, 18, 46
+    int items[257 * 16];    ///< chain of all individual values that make up items    A, B, A, B, C, A, B, C, D, C, D, D, E
+} PackageMergerList;
+
+static int compare_by_prob(const void *a, const void *b)
+{
+    PTable a_val = *(PTable *)a;
+    PTable b_val = *(PTable *)b;
+    return a_val.prob - b_val.prob;
+}
+
+static void magy_huffman_compute_bits(PTable *prob_table, HuffEntry *distincts,
+                                      int size, int max_length)
+{
+    PackageMergerList list_a, list_b, *to = &list_a, *from = &list_b, *temp;
+    int times, i, j, k;
+    int nbits[257] = {0};
+    int min;
+
+    av_assert0(max_length > 0);
+
+    to->nitems = 0;
+    from->nitems = 0;
+    to->item_idx[0] = 0;
+    from->item_idx[0] = 0;
+    AV_QSORT(prob_table, size, PTable, compare_by_prob);
+
+    for (times = 0; times <= max_length; times++) {
+        to->nitems = 0;
+        to->item_idx[0] = 0;
+
+        j = 0;
+        k = 0;
+
+        if (times < max_length) {
+            i = 0;
+        }
+        while (i < size || j + 1 < from->nitems) {
+            to->nitems++;
+            to->item_idx[to->nitems] = to->item_idx[to->nitems - 1];
+            if (i < size &&
+                (j + 1 >= from->nitems ||
+                 prob_table[i].prob <
+                     from->probability[j] + from->probability[j + 1])) {
+                to->items[to->item_idx[to->nitems]++] = prob_table[i].value;
+                to->probability[to->nitems - 1] = prob_table[i].prob;
+                i++;
+            } else {
+                for (k = from->item_idx[j]; k < from->item_idx[j + 2]; k++) {
+                    to->items[to->item_idx[to->nitems]++] = from->items[k];
+                }
+                to->probability[to->nitems - 1] =
+                    from->probability[j] + from->probability[j + 1];
+                j += 2;
+            }
+        }
+        temp = to;
+        to = from;
+        from = temp;
+    }
+
+    min = (size - 1 < from->nitems) ? size - 1 : from->nitems;
+    for (i = 0; i < from->item_idx[min]; i++) {
+        nbits[from->items[i]]++;
+    }
+
+    for (i = 0; i < size; i++) {
+        distincts[i].sym = i;
+        distincts[i].len = nbits[i];
+    }
+}
+
+static int encode_table(AVCodecContext *avctx, uint8_t *dst,
+                        int width, int height,
+                        PutBitContext *pb, HuffEntry *he)
+{
+    PTable counts[256] = { {0} };
+    int i;
+
+    count_usage(dst, width, height, counts);
+
+    for (i = 0; i < 256; i++) {
+        counts[i].prob++;
+        counts[i].value = 255 - i;
+    }
+
+    magy_huffman_compute_bits(counts, he, 256, 12);
+
+    calculate_codes(he);
+
+    for (i = 0; i < 256; i++) {
+        put_bits(pb, 1, 0);
+        put_bits(pb, 7, he[i].len);
+    }
+
+    return 0;
+}
+
+static int encode_slice(uint8_t *src, uint8_t *dst, int dst_size,
+                        int width, int height, HuffEntry *he, int prediction)
+{
+    PutBitContext pb;
+    int i, j;
+    int count;
+
+    init_put_bits(&pb, dst, dst_size);
+
+    put_bits(&pb, 8, 0);
+    put_bits(&pb, 8, prediction);
+
+    for (j = 0; j < height; j++) {
+        for (i = 0; i < width; i++) {
+            const int idx = src[i];
+            put_bits(&pb, he[idx].len, he[idx].code);
+        }
+
+        src += width;
+    }
+
+    count = put_bits_count(&pb) & 0x1F;
+
+    if (count)
+        put_bits(&pb, 32 - count, 0);
+
+    count = put_bits_count(&pb);
+
+    flush_put_bits(&pb);
+
+    return count >> 3;
+}
+
+static int magy_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                             const AVFrame *frame, int *got_packet)
+{
+    MagicYUVContext *s = avctx->priv_data;
+    PutByteContext pb;
+    const int width = avctx->width, height = avctx->height;
+    int pos, slice, i, j, ret = 0;
+
+    ret = ff_alloc_packet2(avctx, pkt, (256 + 4 * s->nb_slices + width * height) *
+                           s->planes + 256, 0);
+    if (ret < 0)
+        return ret;
+
+    bytestream2_init_writer(&pb, pkt->data, pkt->size);
+    bytestream2_put_le32(&pb, MKTAG('M', 'A', 'G', 'Y'));
+    bytestream2_put_le32(&pb, 32); // header size
+    bytestream2_put_byte(&pb, 7);  // version
+    bytestream2_put_byte(&pb, s->format);
+    bytestream2_put_byte(&pb, 12); // max huffman length
+    bytestream2_put_byte(&pb, 0);
+
+    bytestream2_put_byte(&pb, 0);
+    bytestream2_put_byte(&pb, 0);
+    bytestream2_put_byte(&pb, 32); // coder type
+    bytestream2_put_byte(&pb, 0);
+
+    bytestream2_put_le32(&pb, avctx->width);
+    bytestream2_put_le32(&pb, avctx->height);
+    bytestream2_put_le32(&pb, avctx->width);
+    bytestream2_put_le32(&pb, avctx->height);
+    bytestream2_put_le32(&pb, 0);
+
+    for (i = 0; i < s->planes; i++) {
+        bytestream2_put_le32(&pb, 0);
+        for (j = 1; j < s->nb_slices; j++) {
+            bytestream2_put_le32(&pb, 0);
+        }
+    }
+
+    bytestream2_put_byte(&pb, s->planes);
+
+    for (i = 0; i < s->planes; i++) {
+        for (slice = 0; slice < s->nb_slices; slice++) {
+            bytestream2_put_byte(&pb, i);
+        }
+    }
+
+    if (s->correlate) {
+        uint8_t *r, *g, *b;
+        AVFrame *p = av_frame_clone(frame);
+
+        g = p->data[0];
+        b = p->data[1];
+        r = p->data[2];
+
+        for (i = 0; i < height; i++) {
+            s->llvidencdsp.diff_bytes(b, b, g, width);
+            s->llvidencdsp.diff_bytes(r, r, g, width);
+            g += p->linesize[0];
+            b += p->linesize[1];
+            r += p->linesize[2];
+        }
+
+        FFSWAP(uint8_t*, p->data[0], p->data[1]);
+        FFSWAP(int, p->linesize[0], p->linesize[1]);
+
+        for (i = 0; i < s->planes; i++) {
+            for (slice = 0; slice < s->nb_slices; slice++) {
+                s->predict(s, p->data[i], s->slices[i], p->linesize[i],
+                               p->width, p->height);
+            }
+        }
+
+        av_frame_free(&p);
+    } else {
+        for (i = 0; i < s->planes; i++) {
+            for (slice = 0; slice < s->nb_slices; slice++) {
+                s->predict(s, frame->data[i], s->slices[i], frame->linesize[i],
+                           AV_CEIL_RSHIFT(frame->width, s->hshift[i]),
+                           AV_CEIL_RSHIFT(frame->height, s->vshift[i]));
+            }
+        }
+    }
+
+    init_put_bits(&s->pb, pkt->data + bytestream2_tell_p(&pb), bytestream2_get_bytes_left_p(&pb));
+
+    for (i = 0; i < s->planes; i++) {
+        encode_table(avctx, s->slices[i],
+                     AV_CEIL_RSHIFT(frame->width,  s->hshift[i]),
+                     AV_CEIL_RSHIFT(frame->height, s->vshift[i]),
+                     &s->pb, s->he[i]);
+    }
+    s->tables_size = (put_bits_count(&s->pb) + 7) >> 3;
+    bytestream2_skip_p(&pb, s->tables_size);
+
+    for (i = 0; i < s->planes; i++) {
+        unsigned slice_size;
+
+        s->slice_pos[i] = bytestream2_tell_p(&pb);
+        slice_size = encode_slice(s->slices[i], pkt->data + bytestream2_tell_p(&pb),
+                                  bytestream2_get_bytes_left_p(&pb),
+                                  AV_CEIL_RSHIFT(frame->width,  s->hshift[i]),
+                                  AV_CEIL_RSHIFT(frame->height, s->vshift[i]),
+                                  s->he[i], s->frame_pred);
+        bytestream2_skip_p(&pb, slice_size);
+    }
+
+    pos = bytestream2_tell_p(&pb);
+    bytestream2_seek_p(&pb, 32, SEEK_SET);
+    bytestream2_put_le32(&pb, s->slice_pos[0] - 32);
+    for (i = 0; i < s->planes; i++) {
+        bytestream2_put_le32(&pb, s->slice_pos[i] - 32);
+    }
+    bytestream2_seek_p(&pb, pos, SEEK_SET);
+
+    pkt->size   = bytestream2_tell_p(&pb);
+    pkt->flags |= AV_PKT_FLAG_KEY;
+
+    *got_packet = 1;
+
+    return 0;
+}
+
+static av_cold int magy_encode_close(AVCodecContext *avctx)
+{
+    MagicYUVContext *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < s->planes; i++)
+        av_freep(&s->slices[i]);
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(MagicYUVContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "pred", "Prediction method", OFFSET(frame_pred), AV_OPT_TYPE_INT, {.i64=LEFT}, LEFT, MEDIAN, VE, "pred" },
+    { "left",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LEFT },     0, 0, VE, "pred" },
+    { "gradient", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = GRADIENT }, 0, 0, VE, "pred" },
+    { "median",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MEDIAN },   0, 0, VE, "pred" },
+    { NULL},
+};
+
+static const AVClass magicyuv_class = {
+    .class_name = "magicyuv",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_magicyuv_encoder = {
+    .name             = "magicyuv",
+    .long_name        = NULL_IF_CONFIG_SMALL("MagicYUV video"),
+    .type             = AVMEDIA_TYPE_VIDEO,
+    .id               = AV_CODEC_ID_MAGICYUV,
+    .priv_data_size   = sizeof(MagicYUVContext),
+    .priv_class       = &magicyuv_class,
+    .init             = magy_encode_init,
+    .close            = magy_encode_close,
+    .encode2          = magy_encode_frame,
+    .capabilities     = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
+    .pix_fmts         = (const enum AVPixelFormat[]) {
+                          AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP, AV_PIX_FMT_YUV422P,
+                          AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVA444P, AV_PIX_FMT_GRAY8,
+                          AV_PIX_FMT_NONE
+                      },
+};

diff --git a/libavcodec/me_cmp.c b/libavcodec/me_cmp.c
index 5e34a11..ae248c5 100644
--- a/libavcodec/me_cmp.c
+++ b/libavcodec/me_cmp.c

@@ -29,13 +29,47 @@
 #include "mpegvideo.h"
 #include "config.h"
 
-uint32_t ff_square_tab[512] = { 0, };
+/* (i - 256) * (i - 256) */
+const uint32_t ff_square_tab[512] = {
+    65536, 65025, 64516, 64009, 63504, 63001, 62500, 62001, 61504, 61009, 60516, 60025, 59536, 59049, 58564, 58081,
+    57600, 57121, 56644, 56169, 55696, 55225, 54756, 54289, 53824, 53361, 52900, 52441, 51984, 51529, 51076, 50625,
+    50176, 49729, 49284, 48841, 48400, 47961, 47524, 47089, 46656, 46225, 45796, 45369, 44944, 44521, 44100, 43681,
+    43264, 42849, 42436, 42025, 41616, 41209, 40804, 40401, 40000, 39601, 39204, 38809, 38416, 38025, 37636, 37249,
+    36864, 36481, 36100, 35721, 35344, 34969, 34596, 34225, 33856, 33489, 33124, 32761, 32400, 32041, 31684, 31329,
+    30976, 30625, 30276, 29929, 29584, 29241, 28900, 28561, 28224, 27889, 27556, 27225, 26896, 26569, 26244, 25921,
+    25600, 25281, 24964, 24649, 24336, 24025, 23716, 23409, 23104, 22801, 22500, 22201, 21904, 21609, 21316, 21025,
+    20736, 20449, 20164, 19881, 19600, 19321, 19044, 18769, 18496, 18225, 17956, 17689, 17424, 17161, 16900, 16641,
+    16384, 16129, 15876, 15625, 15376, 15129, 14884, 14641, 14400, 14161, 13924, 13689, 13456, 13225, 12996, 12769,
+    12544, 12321, 12100, 11881, 11664, 11449, 11236, 11025, 10816, 10609, 10404, 10201, 10000,  9801,  9604,  9409,
+     9216,  9025,  8836,  8649,  8464,  8281,  8100,  7921,  7744,  7569,  7396,  7225,  7056,  6889,  6724,  6561,
+     6400,  6241,  6084,  5929,  5776,  5625,  5476,  5329,  5184,  5041,  4900,  4761,  4624,  4489,  4356,  4225,
+     4096,  3969,  3844,  3721,  3600,  3481,  3364,  3249,  3136,  3025,  2916,  2809,  2704,  2601,  2500,  2401,
+     2304,  2209,  2116,  2025,  1936,  1849,  1764,  1681,  1600,  1521,  1444,  1369,  1296,  1225,  1156,  1089,
+     1024,   961,   900,   841,   784,   729,   676,   625,   576,   529,   484,   441,   400,   361,   324,   289,
+      256,   225,   196,   169,   144,   121,   100,    81,    64,    49,    36,    25,    16,     9,     4,     1,
+        0,     1,     4,     9,    16,    25,    36,    49,    64,    81,   100,   121,   144,   169,   196,   225,
+      256,   289,   324,   361,   400,   441,   484,   529,   576,   625,   676,   729,   784,   841,   900,   961,
+     1024,  1089,  1156,  1225,  1296,  1369,  1444,  1521,  1600,  1681,  1764,  1849,  1936,  2025,  2116,  2209,
+     2304,  2401,  2500,  2601,  2704,  2809,  2916,  3025,  3136,  3249,  3364,  3481,  3600,  3721,  3844,  3969,
+     4096,  4225,  4356,  4489,  4624,  4761,  4900,  5041,  5184,  5329,  5476,  5625,  5776,  5929,  6084,  6241,
+     6400,  6561,  6724,  6889,  7056,  7225,  7396,  7569,  7744,  7921,  8100,  8281,  8464,  8649,  8836,  9025,
+     9216,  9409,  9604,  9801, 10000, 10201, 10404, 10609, 10816, 11025, 11236, 11449, 11664, 11881, 12100, 12321,
+    12544, 12769, 12996, 13225, 13456, 13689, 13924, 14161, 14400, 14641, 14884, 15129, 15376, 15625, 15876, 16129,
+    16384, 16641, 16900, 17161, 17424, 17689, 17956, 18225, 18496, 18769, 19044, 19321, 19600, 19881, 20164, 20449,
+    20736, 21025, 21316, 21609, 21904, 22201, 22500, 22801, 23104, 23409, 23716, 24025, 24336, 24649, 24964, 25281,
+    25600, 25921, 26244, 26569, 26896, 27225, 27556, 27889, 28224, 28561, 28900, 29241, 29584, 29929, 30276, 30625,
+    30976, 31329, 31684, 32041, 32400, 32761, 33124, 33489, 33856, 34225, 34596, 34969, 35344, 35721, 36100, 36481,
+    36864, 37249, 37636, 38025, 38416, 38809, 39204, 39601, 40000, 40401, 40804, 41209, 41616, 42025, 42436, 42849,
+    43264, 43681, 44100, 44521, 44944, 45369, 45796, 46225, 46656, 47089, 47524, 47961, 48400, 48841, 49284, 49729,
+    50176, 50625, 51076, 51529, 51984, 52441, 52900, 53361, 53824, 54289, 54756, 55225, 55696, 56169, 56644, 57121,
+    57600, 58081, 58564, 59049, 59536, 60025, 60516, 61009, 61504, 62001, 62500, 63001, 63504, 64009, 64516, 65025,
+};
 
 static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
                   ptrdiff_t stride, int h)
 {
     int s = 0, i;
-    uint32_t *sq = ff_square_tab + 256;
+    const uint32_t *sq = ff_square_tab + 256;
 
     for (i = 0; i < h; i++) {
         s    += sq[pix1[0] - pix2[0]];
@@ -52,7 +86,7 @@
                   ptrdiff_t stride, int h)
 {
     int s = 0, i;
-    uint32_t *sq = ff_square_tab + 256;
+    const uint32_t *sq = ff_square_tab + 256;
 
     for (i = 0; i < h; i++) {
         s    += sq[pix1[0] - pix2[0]];
@@ -73,7 +107,7 @@
                    ptrdiff_t stride, int h)
 {
     int s = 0, i;
-    uint32_t *sq = ff_square_tab + 256;
+    const uint32_t *sq = ff_square_tab + 256;
 
     for (i = 0; i < h; i++) {
         s += sq[pix1[0]  - pix2[0]];
@@ -721,7 +755,7 @@
     s->block_last_index[0 /* FIXME */] =
         s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
     s->dct_unquantize_inter(s, temp, 0, s->qscale);
-    ff_simple_idct_8(temp); // FIXME
+    ff_simple_idct_int16_8bit(temp); // FIXME
 
     for (i = 0; i < 64; i++)
         sum += (temp[i] - bak[i]) * (temp[i] - bak[i]);
@@ -977,14 +1011,6 @@
 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
 
-av_cold void ff_me_cmp_init_static(void)
-{
-    int i;
-
-    for (i = 0; i < 512; i++)
-        ff_square_tab[i] = (i - 256) * (i - 256);
-}
-
 int ff_check_alignment(void)
 {
     static int did_fail = 0;

diff --git a/libavcodec/me_cmp.h b/libavcodec/me_cmp.h
index 0dbbcbb..0a589e3 100644
--- a/libavcodec/me_cmp.h
+++ b/libavcodec/me_cmp.h

@@ -23,7 +23,7 @@
 
 #include "avcodec.h"
 
-extern uint32_t ff_square_tab[512];
+extern const uint32_t ff_square_tab[512];
 
 
 /* minimum alignment rules ;)
@@ -79,8 +79,6 @@
     me_cmp_func median_sad[6];
 } MECmpContext;
 
-void ff_me_cmp_init_static(void);
-
 int ff_check_alignment(void);
 
 void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx);

diff --git a/libavcodec/mediacodec.c b/libavcodec/mediacodec.c
index 610bb49..aa14624 100644
--- a/libavcodec/mediacodec.c
+++ b/libavcodec/mediacodec.c

@@ -26,7 +26,7 @@
 
 #include "mediacodec.h"
 
-#if CONFIG_H264_MEDIACODEC_HWACCEL
+#if CONFIG_MEDIACODEC
 
 #include <jni.h>
 
@@ -91,13 +91,33 @@
     MediaCodecDecContext *ctx = buffer->ctx;
     int released = atomic_fetch_add(&buffer->released, 1);
 
-    if (!released) {
+    if (!released && (ctx->delay_flush || buffer->serial == atomic_load(&ctx->serial))) {
+        atomic_fetch_sub(&ctx->hw_buffer_count, 1);
+        av_log(ctx->avctx, AV_LOG_DEBUG,
+               "Releasing output buffer %zd (%p) ts=%"PRId64" with render=%d [%d pending]\n",
+               buffer->index, buffer, buffer->pts, render, atomic_load(&ctx->hw_buffer_count));
         return ff_AMediaCodec_releaseOutputBuffer(ctx->codec, buffer->index, render);
     }
 
     return 0;
 }
 
+int av_mediacodec_render_buffer_at_time(AVMediaCodecBuffer *buffer, int64_t time)
+{
+    MediaCodecDecContext *ctx = buffer->ctx;
+    int released = atomic_fetch_add(&buffer->released, 1);
+
+    if (!released && (ctx->delay_flush || buffer->serial == atomic_load(&ctx->serial))) {
+        atomic_fetch_sub(&ctx->hw_buffer_count, 1);
+        av_log(ctx->avctx, AV_LOG_DEBUG,
+               "Rendering output buffer %zd (%p) ts=%"PRId64" with time=%"PRId64" [%d pending]\n",
+               buffer->index, buffer, buffer->pts, time, atomic_load(&ctx->hw_buffer_count));
+        return ff_AMediaCodec_releaseOutputBufferAtTime(ctx->codec, buffer->index, time);
+    }
+
+    return 0;
+}
+
 #else
 
 #include <stdlib.h>
@@ -121,4 +141,9 @@
     return AVERROR(ENOSYS);
 }
 
+int av_mediacodec_render_buffer_at_time(AVMediaCodecBuffer *buffer, int64_t time)
+{
+    return AVERROR(ENOSYS);
+}
+
 #endif

diff --git a/libavcodec/mediacodec.h b/libavcodec/mediacodec.h
index 5606d24..4c8545d 100644
--- a/libavcodec/mediacodec.h
+++ b/libavcodec/mediacodec.h

@@ -85,4 +85,17 @@
  */
 int av_mediacodec_release_buffer(AVMediaCodecBuffer *buffer, int render);
 
+/**
+ * Release a MediaCodec buffer and render it at the given time to the surface
+ * that is associated with the decoder. The timestamp must be within one second
+ * of the current java/lang/System#nanoTime() (which is implemented using
+ * CLOCK_MONOTONIC on Android). See the Android MediaCodec documentation
+ * of android/media/MediaCodec#releaseOutputBuffer(int,long) for more details.
+ *
+ * @param buffer the buffer to render
+ * @param time timestamp in nanoseconds of when to render the buffer
+ * @return 0 on success, < 0 otherwise
+ */
+int av_mediacodec_render_buffer_at_time(AVMediaCodecBuffer *buffer, int64_t time);
+
 #endif /* AVCODEC_MEDIACODEC_H */

diff --git a/libavcodec/mediacodec_wrapper.c b/libavcodec/mediacodec_wrapper.c
index f34450a..a024e3b 100644
--- a/libavcodec/mediacodec_wrapper.c
+++ b/libavcodec/mediacodec_wrapper.c

@@ -111,6 +111,8 @@
 
     jmethodID init_id;
 
+    jmethodID contains_key_id;
+
     jmethodID get_integer_id;
     jmethodID get_long_id;
     jmethodID get_float_id;
@@ -132,6 +134,8 @@
 
         { "android/media/MediaFormat", "<init>", "()V", FF_JNI_METHOD, offsetof(struct JNIAMediaFormatFields, init_id), 1 },
 
+        { "android/media/MediaFormat", "containsKey", "(Ljava/lang/String;)Z", FF_JNI_METHOD,offsetof(struct JNIAMediaFormatFields, contains_key_id), 1 },
+
         { "android/media/MediaFormat", "getInteger", "(Ljava/lang/String;)I", FF_JNI_METHOD, offsetof(struct JNIAMediaFormatFields, get_integer_id), 1 },
         { "android/media/MediaFormat", "getLong", "(Ljava/lang/String;)J", FF_JNI_METHOD, offsetof(struct JNIAMediaFormatFields, get_long_id), 1 },
         { "android/media/MediaFormat", "getFloat", "(Ljava/lang/String;)F", FF_JNI_METHOD, offsetof(struct JNIAMediaFormatFields, get_float_id), 1 },
@@ -152,7 +156,7 @@
 static const AVClass amediaformat_class = {
     .class_name = "amediaformat",
     .item_name  = av_default_item_name,
-    .version    = LIBAVCODEC_VERSION_INT,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
 struct FFAMediaFormat {
@@ -264,7 +268,7 @@
 static const AVClass amediacodec_class = {
     .class_name = "amediacodec",
     .item_name  = av_default_item_name,
-    .version    = LIBAVCODEC_VERSION_INT,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
 struct FFAMediaCodec {
@@ -274,6 +278,7 @@
     struct JNIAMediaCodecFields jfields;
 
     jobject object;
+    jobject buffer_info;
 
     jobject input_buffers;
     jobject output_buffers;
@@ -464,7 +469,12 @@
                     goto done;
                 }
 
-                if (strstr(name, "OMX.google")) {
+                /* Skip software decoders */
+                if (
+                    strstr(name, "OMX.google") ||
+                    strstr(name, "OMX.ffmpeg") ||
+                    (strstr(name, "OMX.SEC") && strstr(name, ".sw.")) ||
+                    !strcmp(name, "OMX.qcom.video.decoder.hevcswvdec")) {
                     av_freep(&name);
                     goto done_with_type;
                 }
@@ -737,6 +747,7 @@
 
     JNIEnv *env = NULL;
     jstring key = NULL;
+    jboolean contains_key;
 
     av_assert0(format != NULL);
 
@@ -748,6 +759,12 @@
         goto fail;
     }
 
+    contains_key = (*env)->CallBooleanMethod(env, format->object, format->jfields.contains_key_id, key);
+    if (!contains_key || (ret = ff_jni_exception_check(env, 1, format)) < 0) {
+        ret = 0;
+        goto fail;
+    }
+
     *out = (*env)->CallIntMethod(env, format->object, format->jfields.get_integer_id, key);
     if ((ret = ff_jni_exception_check(env, 1, format)) < 0) {
         ret = 0;
@@ -769,6 +786,7 @@
 
     JNIEnv *env = NULL;
     jstring key = NULL;
+    jboolean contains_key;
 
     av_assert0(format != NULL);
 
@@ -780,6 +798,12 @@
         goto fail;
     }
 
+    contains_key = (*env)->CallBooleanMethod(env, format->object, format->jfields.contains_key_id, key);
+    if (!contains_key || (ret = ff_jni_exception_check(env, 1, format)) < 0) {
+        ret = 0;
+        goto fail;
+    }
+
     *out = (*env)->CallLongMethod(env, format->object, format->jfields.get_long_id, key);
     if ((ret = ff_jni_exception_check(env, 1, format)) < 0) {
         ret = 0;
@@ -801,6 +825,7 @@
 
     JNIEnv *env = NULL;
     jstring key = NULL;
+    jboolean contains_key;
 
     av_assert0(format != NULL);
 
@@ -812,6 +837,12 @@
         goto fail;
     }
 
+    contains_key = (*env)->CallBooleanMethod(env, format->object, format->jfields.contains_key_id, key);
+    if (!contains_key || (ret = ff_jni_exception_check(env, 1, format)) < 0) {
+        ret = 0;
+        goto fail;
+    }
+
     *out = (*env)->CallFloatMethod(env, format->object, format->jfields.get_float_id, key);
     if ((ret = ff_jni_exception_check(env, 1, format)) < 0) {
         ret = 0;
@@ -833,6 +864,7 @@
 
     JNIEnv *env = NULL;
     jstring key = NULL;
+    jboolean contains_key;
     jobject result = NULL;
 
     av_assert0(format != NULL);
@@ -845,6 +877,12 @@
         goto fail;
     }
 
+    contains_key = (*env)->CallBooleanMethod(env, format->object, format->jfields.contains_key_id, key);
+    if (!contains_key || (ret = ff_jni_exception_check(env, 1, format)) < 0) {
+        ret = 0;
+        goto fail;
+    }
+
     result = (*env)->CallObjectMethod(env, format->object, format->jfields.get_bytebuffer_id, key);
     if ((ret = ff_jni_exception_check(env, 1, format)) < 0) {
         ret = 0;
@@ -884,6 +922,7 @@
 
     JNIEnv *env = NULL;
     jstring key = NULL;
+    jboolean contains_key;
     jstring result = NULL;
 
     av_assert0(format != NULL);
@@ -896,6 +935,12 @@
         goto fail;
     }
 
+    contains_key = (*env)->CallBooleanMethod(env, format->object, format->jfields.contains_key_id, key);
+    if (!contains_key || (ret = ff_jni_exception_check(env, 1, format)) < 0) {
+        ret = 0;
+        goto fail;
+    }
+
     result = (*env)->CallObjectMethod(env, format->object, format->jfields.get_string_id, key);
     if ((ret = ff_jni_exception_check(env, 1, format)) < 0) {
         ret = 0;
@@ -1132,13 +1177,19 @@
     return ret;
 }
 
-FFAMediaCodec* ff_AMediaCodec_createCodecByName(const char *name)
+#define CREATE_CODEC_BY_NAME   0
+#define CREATE_DECODER_BY_TYPE 1
+#define CREATE_ENCODER_BY_TYPE 2
+
+static inline FFAMediaCodec *codec_create(int method, const char *arg)
 {
     int ret = -1;
     JNIEnv *env = NULL;
     FFAMediaCodec *codec = NULL;
-    jstring codec_name = NULL;
+    jstring jarg = NULL;
     jobject object = NULL;
+    jobject buffer_info = NULL;
+    jmethodID create_id = NULL;
 
     codec = av_mallocz(sizeof(FFAMediaCodec));
     if (!codec) {
@@ -1156,12 +1207,23 @@
         goto fail;
     }
 
-    codec_name = ff_jni_utf_chars_to_jstring(env, name, codec);
-    if (!codec_name) {
+    jarg = ff_jni_utf_chars_to_jstring(env, arg, codec);
+    if (!jarg) {
         goto fail;
     }
 
-    object = (*env)->CallStaticObjectMethod(env, codec->jfields.mediacodec_class, codec->jfields.create_by_codec_name_id, codec_name);
+    switch (method) {
+    case CREATE_CODEC_BY_NAME:   create_id = codec->jfields.create_by_codec_name_id;   break;
+    case CREATE_DECODER_BY_TYPE: create_id = codec->jfields.create_decoder_by_type_id; break;
+    case CREATE_ENCODER_BY_TYPE: create_id = codec->jfields.create_encoder_by_type_id; break;
+    default:
+        av_assert0(0);
+    }
+
+    object = (*env)->CallStaticObjectMethod(env,
+                                            codec->jfields.mediacodec_class,
+                                            create_id,
+                                            jarg);
     if (ff_jni_exception_check(env, 1, codec) < 0) {
         goto fail;
     }
@@ -1179,17 +1241,39 @@
         codec->has_get_i_o_buffer = 1;
     }
 
+    buffer_info = (*env)->NewObject(env, codec->jfields.mediainfo_class, codec->jfields.init_id);
+    if (ff_jni_exception_check(env, 1, codec) < 0) {
+        goto fail;
+    }
+
+    codec->buffer_info = (*env)->NewGlobalRef(env, buffer_info);
+    if (!codec->buffer_info) {
+        goto fail;
+    }
+
     ret = 0;
 fail:
-    if (codec_name) {
-        (*env)->DeleteLocalRef(env, codec_name);
+    if (jarg) {
+        (*env)->DeleteLocalRef(env, jarg);
     }
 
     if (object) {
         (*env)->DeleteLocalRef(env, object);
     }
 
+    if (buffer_info) {
+        (*env)->DeleteLocalRef(env, buffer_info);
+    }
+
     if (ret < 0) {
+        if (codec->object) {
+            (*env)->DeleteGlobalRef(env, codec->object);
+        }
+
+        if (codec->buffer_info) {
+            (*env)->DeleteGlobalRef(env, codec->buffer_info);
+        }
+
         ff_jni_reset_jfields(env, &codec->jfields, jni_amediacodec_mapping, 1, codec);
         av_freep(&codec);
     }
@@ -1197,135 +1281,15 @@
     return codec;
 }
 
-FFAMediaCodec* ff_AMediaCodec_createDecoderByType(const char *mime)
-{
-    int ret = -1;
-    JNIEnv *env = NULL;
-    FFAMediaCodec *codec = NULL;
-    jstring mime_type = NULL;
-    jobject object = NULL;
+#define DECLARE_FF_AMEDIACODEC_CREATE_FUNC(name, method) \
+FFAMediaCodec *ff_AMediaCodec_##name(const char *arg)    \
+{                                                        \
+    return codec_create(method, arg);                    \
+}                                                        \
 
-    codec = av_mallocz(sizeof(FFAMediaCodec));
-    if (!codec) {
-        return NULL;
-    }
-    codec->class = &amediacodec_class;
-
-    env = ff_jni_get_env(codec);
-    if (!env) {
-        av_freep(&codec);
-        return NULL;
-    }
-
-    if (ff_jni_init_jfields(env, &codec->jfields, jni_amediacodec_mapping, 1, codec) < 0) {
-        goto fail;
-    }
-
-    mime_type = ff_jni_utf_chars_to_jstring(env, mime, codec);
-    if (!mime_type) {
-        goto fail;
-    }
-
-    object = (*env)->CallStaticObjectMethod(env, codec->jfields.mediacodec_class, codec->jfields.create_decoder_by_type_id, mime_type);
-    if (ff_jni_exception_check(env, 1, codec) < 0) {
-        goto fail;
-    }
-
-    codec->object = (*env)->NewGlobalRef(env, object);
-    if (!codec->object) {
-        goto fail;
-    }
-
-    if (codec_init_static_fields(codec) < 0) {
-        goto fail;
-    }
-
-    if (codec->jfields.get_input_buffer_id && codec->jfields.get_output_buffer_id) {
-        codec->has_get_i_o_buffer = 1;
-    }
-
-    ret = 0;
-fail:
-    if (mime_type) {
-        (*env)->DeleteLocalRef(env, mime_type);
-    }
-
-    if (object) {
-        (*env)->DeleteLocalRef(env, object);
-    }
-
-    if (ret < 0) {
-        ff_jni_reset_jfields(env, &codec->jfields, jni_amediacodec_mapping, 1, codec);
-        av_freep(&codec);
-    }
-
-    return codec;
-}
-
-FFAMediaCodec* ff_AMediaCodec_createEncoderByType(const char *mime)
-{
-    int ret = -1;
-    JNIEnv *env = NULL;
-    FFAMediaCodec *codec = NULL;
-    jstring mime_type = NULL;
-    jobject object = NULL;
-
-    codec = av_mallocz(sizeof(FFAMediaCodec));
-    if (!codec) {
-        return NULL;
-    }
-    codec->class = &amediacodec_class;
-
-    env = ff_jni_get_env(codec);
-    if (!env) {
-        av_freep(&codec);
-        return NULL;
-    }
-
-    if (ff_jni_init_jfields(env, &codec->jfields, jni_amediacodec_mapping, 1, codec) < 0) {
-        goto fail;
-    }
-
-    mime_type = ff_jni_utf_chars_to_jstring(env, mime, codec);
-    if (!mime_type) {
-        goto fail;
-    }
-
-    object = (*env)->CallStaticObjectMethod(env, codec->jfields.mediacodec_class, codec->jfields.create_encoder_by_type_id, mime_type);
-    if (ff_jni_exception_check(env, 1, codec) < 0) {
-        goto fail;
-    }
-
-    codec->object = (*env)->NewGlobalRef(env, object);
-    if (!codec->object) {
-        goto fail;
-    }
-
-    if (codec_init_static_fields(codec) < 0) {
-        goto fail;
-    }
-
-    if (codec->jfields.get_input_buffer_id && codec->jfields.get_output_buffer_id) {
-        codec->has_get_i_o_buffer = 1;
-    }
-
-    ret = 0;
-fail:
-    if (mime_type) {
-        (*env)->DeleteLocalRef(env, mime_type);
-    }
-
-    if (object) {
-        (*env)->DeleteLocalRef(env, object);
-    }
-
-    if  (ret < 0) {
-        ff_jni_reset_jfields(env, &codec->jfields, jni_amediacodec_mapping, 1, codec);
-        av_freep(&codec);
-    }
-
-    return codec;
-}
+DECLARE_FF_AMEDIACODEC_CREATE_FUNC(createCodecByName,   CREATE_CODEC_BY_NAME)
+DECLARE_FF_AMEDIACODEC_CREATE_FUNC(createDecoderByType, CREATE_DECODER_BY_TYPE)
+DECLARE_FF_AMEDIACODEC_CREATE_FUNC(createEncoderByType, CREATE_ENCODER_BY_TYPE)
 
 int ff_AMediaCodec_delete(FFAMediaCodec* codec)
 {
@@ -1347,6 +1311,9 @@
     (*env)->DeleteGlobalRef(env, codec->object);
     codec->object = NULL;
 
+    (*env)->DeleteGlobalRef(env, codec->buffer_info);
+    codec->buffer_info = NULL;
+
     ff_jni_reset_jfields(env, &codec->jfields, jni_amediacodec_mapping, 1, codec);
 
     av_freep(&codec);
@@ -1465,7 +1432,7 @@
 
     JNI_GET_ENV_OR_RETURN(env, codec, AVERROR_EXTERNAL);
 
-    (*env)->CallVoidMethod(env, codec->object, codec->jfields.release_output_buffer_at_time_id, (jint)idx, timestampNs);
+    (*env)->CallVoidMethod(env, codec->object, codec->jfields.release_output_buffer_at_time_id, (jint)idx, (jlong)timestampNs);
     if (ff_jni_exception_check(env, 1, codec) < 0) {
         ret = AVERROR_EXTERNAL;
         goto fail;
@@ -1514,48 +1481,31 @@
     int ret = 0;
     JNIEnv *env = NULL;
 
-    jobject mediainfo = NULL;
-
     JNI_GET_ENV_OR_RETURN(env, codec, AVERROR_EXTERNAL);
 
-    mediainfo = (*env)->NewObject(env, codec->jfields.mediainfo_class, codec->jfields.init_id);
+    ret = (*env)->CallIntMethod(env, codec->object, codec->jfields.dequeue_output_buffer_id, codec->buffer_info, timeoutUs);
     if (ff_jni_exception_check(env, 1, codec) < 0) {
-        ret = AVERROR_EXTERNAL;
-        goto fail;
+        return AVERROR_EXTERNAL;
     }
 
-    ret = (*env)->CallIntMethod(env, codec->object, codec->jfields.dequeue_output_buffer_id, mediainfo, timeoutUs);
+    info->flags = (*env)->GetIntField(env, codec->buffer_info, codec->jfields.flags_id);
     if (ff_jni_exception_check(env, 1, codec) < 0) {
-        ret = AVERROR_EXTERNAL;
-        goto fail;
+        return AVERROR_EXTERNAL;
     }
 
-    info->flags = (*env)->GetIntField(env, mediainfo, codec->jfields.flags_id);
+    info->offset = (*env)->GetIntField(env, codec->buffer_info, codec->jfields.offset_id);
     if (ff_jni_exception_check(env, 1, codec) < 0) {
-        ret = AVERROR_EXTERNAL;
-        goto fail;
+        return AVERROR_EXTERNAL;
     }
 
-    info->offset = (*env)->GetIntField(env, mediainfo, codec->jfields.offset_id);
+    info->presentationTimeUs = (*env)->GetLongField(env, codec->buffer_info, codec->jfields.presentation_time_us_id);
     if (ff_jni_exception_check(env, 1, codec) < 0) {
-        ret = AVERROR_EXTERNAL;
-        goto fail;
+        return AVERROR_EXTERNAL;
     }
 
-    info->presentationTimeUs = (*env)->GetLongField(env, mediainfo, codec->jfields.presentation_time_us_id);
+    info->size = (*env)->GetIntField(env, codec->buffer_info, codec->jfields.size_id);
     if (ff_jni_exception_check(env, 1, codec) < 0) {
-        ret = AVERROR_EXTERNAL;
-        goto fail;
-    }
-
-    info->size = (*env)->GetIntField(env, mediainfo, codec->jfields.size_id);
-    if (ff_jni_exception_check(env, 1, codec) < 0) {
-        ret = AVERROR_EXTERNAL;
-        goto fail;
-    }
-fail:
-    if (mediainfo) {
-        (*env)->DeleteLocalRef(env, mediainfo);
+        return AVERROR_EXTERNAL;
     }
 
     return ret;
@@ -1737,3 +1687,18 @@
 fail:
     return ret;
 }
+
+int ff_Build_SDK_INT(AVCodecContext *avctx)
+{
+    int ret = -1;
+    JNIEnv *env = NULL;
+    jclass versionClass;
+    jfieldID sdkIntFieldID;
+    JNI_GET_ENV_OR_RETURN(env, avctx, -1);
+
+    versionClass = (*env)->FindClass(env, "android/os/Build$VERSION");
+    sdkIntFieldID = (*env)->GetStaticFieldID(env, versionClass, "SDK_INT", "I");
+    ret = (*env)->GetStaticIntField(env, versionClass, sdkIntFieldID);
+    (*env)->DeleteLocalRef(env, versionClass);
+    return ret;
+}

diff --git a/libavcodec/mediacodec_wrapper.h b/libavcodec/mediacodec_wrapper.h
index 1b4f3a9..f0de16d 100644
--- a/libavcodec/mediacodec_wrapper.h
+++ b/libavcodec/mediacodec_wrapper.h

@@ -124,4 +124,6 @@
 
 int ff_AMediaCodec_cleanOutputBuffers(FFAMediaCodec *codec);
 
+int ff_Build_SDK_INT(AVCodecContext *avctx);
+
 #endif /* AVCODEC_MEDIACODEC_WRAPPER_H */

diff --git a/libavcodec/mediacodecdec.c b/libavcodec/mediacodecdec.c
index 366c039..3a4240a 100644
--- a/libavcodec/mediacodecdec.c
+++ b/libavcodec/mediacodecdec.c

@@ -25,26 +25,31 @@
 
 #include "libavutil/avassert.h"
 #include "libavutil/common.h"
-#include "libavutil/fifo.h"
 #include "libavutil/opt.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/pixfmt.h"
+#include "libavutil/internal.h"
 
 #include "avcodec.h"
+#include "decode.h"
 #include "h264_parse.h"
 #include "hevc_parse.h"
+#include "hwaccel.h"
 #include "internal.h"
 #include "mediacodec_wrapper.h"
 #include "mediacodecdec_common.h"
 
 typedef struct MediaCodecH264DecContext {
 
+    AVClass *avclass;
+
     MediaCodecDecContext *ctx;
 
-    AVFifoBuffer *fifo;
-
     AVPacket buffered_pkt;
 
+    int delay_flush;
+    int amlogic_mpeg2_api23_workaround;
+
 } MediaCodecH264DecContext;
 
 static av_cold int mediacodec_decode_close(AVCodecContext *avctx)
@@ -54,8 +59,6 @@
     ff_mediacodec_dec_close(avctx, s->ctx);
     s->ctx = NULL;
 
-    av_fifo_free(s->fifo);
-
     av_packet_unref(&s->buffered_pkt);
 
     return 0;
@@ -182,7 +185,7 @@
     int ret;
 
     HEVCParamSets ps;
-    HEVCSEIContext sei;
+    HEVCSEI sei;
 
     const HEVCVPS *vps = NULL;
     const HEVCPPS *pps = NULL;
@@ -266,34 +269,11 @@
 }
 #endif
 
-#if CONFIG_MPEG2_MEDIACODEC_DECODER
-static int mpeg2_set_extradata(AVCodecContext *avctx, FFAMediaFormat *format)
-{
-    int ret = 0;
-
-    if (avctx->extradata) {
-        ff_AMediaFormat_setBuffer(format, "csd-0", avctx->extradata, avctx->extradata_size);
-    }
-
-    return ret;
-}
-#endif
-
-#if CONFIG_MPEG4_MEDIACODEC_DECODER
-static int mpeg4_set_extradata(AVCodecContext *avctx, FFAMediaFormat *format)
-{
-    int ret = 0;
-
-    if (avctx->extradata) {
-        ff_AMediaFormat_setBuffer(format, "csd-0", avctx->extradata, avctx->extradata_size);
-    }
-
-    return ret;
-}
-#endif
-
-#if CONFIG_VP8_MEDIACODEC_DECODER || CONFIG_VP9_MEDIACODEC_DECODER
-static int vpx_set_extradata(AVCodecContext *avctx, FFAMediaFormat *format)
+#if CONFIG_MPEG2_MEDIACODEC_DECODER || \
+    CONFIG_MPEG4_MEDIACODEC_DECODER || \
+    CONFIG_VP8_MEDIACODEC_DECODER   || \
+    CONFIG_VP9_MEDIACODEC_DECODER
+static int common_set_extradata(AVCodecContext *avctx, FFAMediaFormat *format)
 {
     int ret = 0;
 
@@ -308,6 +288,7 @@
 static av_cold int mediacodec_decode_init(AVCodecContext *avctx)
 {
     int ret;
+    int sdk_int;
 
     const char *codec_mime = NULL;
 
@@ -344,7 +325,7 @@
     case AV_CODEC_ID_MPEG2VIDEO:
         codec_mime = "video/mpeg2";
 
-        ret = mpeg2_set_extradata(avctx, format);
+        ret = common_set_extradata(avctx, format);
         if (ret < 0)
             goto done;
         break;
@@ -353,7 +334,7 @@
     case AV_CODEC_ID_MPEG4:
         codec_mime = "video/mp4v-es",
 
-        ret = mpeg4_set_extradata(avctx, format);
+        ret = common_set_extradata(avctx, format);
         if (ret < 0)
             goto done;
         break;
@@ -362,7 +343,7 @@
     case AV_CODEC_ID_VP8:
         codec_mime = "video/x-vnd.on2.vp8";
 
-        ret = vpx_set_extradata(avctx, format);
+        ret = common_set_extradata(avctx, format);
         if (ret < 0)
             goto done;
         break;
@@ -371,7 +352,7 @@
     case AV_CODEC_ID_VP9:
         codec_mime = "video/x-vnd.on2.vp9";
 
-        ret = vpx_set_extradata(avctx, format);
+        ret = common_set_extradata(avctx, format);
         if (ret < 0)
             goto done;
         break;
@@ -391,17 +372,23 @@
         goto done;
     }
 
+    s->ctx->delay_flush = s->delay_flush;
+
     if ((ret = ff_mediacodec_dec_init(avctx, s->ctx, codec_mime, format)) < 0) {
         s->ctx = NULL;
         goto done;
     }
 
-    av_log(avctx, AV_LOG_INFO, "MediaCodec started successfully, ret = %d\n", ret);
+    av_log(avctx, AV_LOG_INFO,
+           "MediaCodec started successfully: codec = %s, ret = %d\n",
+           s->ctx->codec_name, ret);
 
-    s->fifo = av_fifo_alloc(sizeof(AVPacket));
-    if (!s->fifo) {
-        ret = AVERROR(ENOMEM);
-        goto done;
+    sdk_int = ff_Build_SDK_INT(avctx);
+    if (sdk_int <= 23 &&
+        strcmp(s->ctx->codec_name, "OMX.amlogic.mpeg2.decoder.awesome") == 0) {
+        av_log(avctx, AV_LOG_INFO, "Enabling workaround for %s on API=%d\n",
+               s->ctx->codec_name, sdk_int);
+        s->amlogic_mpeg2_api23_workaround = 1;
     }
 
 done:
@@ -416,201 +403,152 @@
     return ret;
 }
 
-
-static int mediacodec_process_data(AVCodecContext *avctx, AVFrame *frame,
-                                   int *got_frame, AVPacket *pkt)
+static int mediacodec_receive_frame(AVCodecContext *avctx, AVFrame *frame)
 {
     MediaCodecH264DecContext *s = avctx->priv_data;
-
-    return ff_mediacodec_dec_decode(avctx, s->ctx, frame, got_frame, pkt);
-}
-
-static int mediacodec_decode_frame(AVCodecContext *avctx, void *data,
-                                   int *got_frame, AVPacket *avpkt)
-{
-    MediaCodecH264DecContext *s = avctx->priv_data;
-    AVFrame *frame    = data;
     int ret;
+    ssize_t index;
 
-    /* buffer the input packet */
-    if (avpkt->size) {
-        AVPacket input_pkt = { 0 };
-
-        if (av_fifo_space(s->fifo) < sizeof(input_pkt)) {
-            ret = av_fifo_realloc2(s->fifo,
-                                   av_fifo_size(s->fifo) + sizeof(input_pkt));
-            if (ret < 0)
-                return ret;
-        }
-
-        ret = av_packet_ref(&input_pkt, avpkt);
-        if (ret < 0)
-            return ret;
-        av_fifo_generic_write(s->fifo, &input_pkt, sizeof(input_pkt), NULL);
-    }
-
-    /*
-     * MediaCodec.flush() discards both input and output buffers, thus we
-     * need to delay the call to this function until the user has released or
-     * renderered the frames he retains.
-     *
-     * After we have buffered an input packet, check if the codec is in the
-     * flushing state. If it is, we need to call ff_mediacodec_dec_flush.
-     *
-     * ff_mediacodec_dec_flush returns 0 if the flush cannot be performed on
-     * the codec (because the user retains frames). The codec stays in the
-     * flushing state.
-     *
-     * ff_mediacodec_dec_flush returns 1 if the flush can actually be
-     * performed on the codec. The codec leaves the flushing state and can
-     * process again packets.
-     *
-     * ff_mediacodec_dec_flush returns a negative value if an error has
-     * occurred.
-     *
-     */
-    if (ff_mediacodec_dec_is_flushing(avctx, s->ctx)) {
+    /* In delay_flush mode, wait until the user has released or rendered
+       all retained frames. */
+    if (s->delay_flush && ff_mediacodec_dec_is_flushing(avctx, s->ctx)) {
         if (!ff_mediacodec_dec_flush(avctx, s->ctx)) {
-            return avpkt->size;
+            return AVERROR(EAGAIN);
         }
     }
 
-    /* process buffered data */
-    while (!*got_frame) {
-        /* prepare the input data */
-        if (s->buffered_pkt.size <= 0) {
-            av_packet_unref(&s->buffered_pkt);
+    /* poll for new frame */
+    ret = ff_mediacodec_dec_receive(avctx, s->ctx, frame, false);
+    if (ret != AVERROR(EAGAIN))
+        return ret;
 
-            /* no more data */
-            if (av_fifo_size(s->fifo) < sizeof(AVPacket)) {
-                return avpkt->size ? avpkt->size :
-                    ff_mediacodec_dec_decode(avctx, s->ctx, frame, got_frame, avpkt);
+    /* feed decoder */
+    while (1) {
+        if (s->ctx->current_input_buffer < 0) {
+            /* poll for input space */
+            index = ff_AMediaCodec_dequeueInputBuffer(s->ctx->codec, 0);
+            if (index < 0) {
+                /* no space, block for an output frame to appear */
+                return ff_mediacodec_dec_receive(avctx, s->ctx, frame, true);
+            }
+            s->ctx->current_input_buffer = index;
+        }
+
+        /* try to flush any buffered packet data */
+        if (s->buffered_pkt.size > 0) {
+            ret = ff_mediacodec_dec_send(avctx, s->ctx, &s->buffered_pkt, false);
+            if (ret >= 0) {
+                s->buffered_pkt.size -= ret;
+                s->buffered_pkt.data += ret;
+                if (s->buffered_pkt.size <= 0)
+                    av_packet_unref(&s->buffered_pkt);
+            } else if (ret < 0 && ret != AVERROR(EAGAIN)) {
+                return ret;
             }
 
-            av_fifo_generic_read(s->fifo, &s->buffered_pkt, sizeof(s->buffered_pkt), NULL);
+            if (s->amlogic_mpeg2_api23_workaround && s->buffered_pkt.size <= 0) {
+                /* fallthrough to fetch next packet regardless of input buffer space */
+            } else {
+                /* poll for space again */
+                continue;
+            }
         }
 
-        ret = mediacodec_process_data(avctx, frame, got_frame, &s->buffered_pkt);
-        if (ret < 0)
+        /* fetch new packet or eof */
+        ret = ff_decode_get_packet(avctx, &s->buffered_pkt);
+        if (ret == AVERROR_EOF) {
+            AVPacket null_pkt = { 0 };
+            ret = ff_mediacodec_dec_send(avctx, s->ctx, &null_pkt, true);
+            if (ret < 0)
+                return ret;
+        } else if (ret == AVERROR(EAGAIN) && s->ctx->current_input_buffer < 0) {
+            return ff_mediacodec_dec_receive(avctx, s->ctx, frame, true);
+        } else if (ret < 0) {
             return ret;
-
-        s->buffered_pkt.size -= ret;
-        s->buffered_pkt.data += ret;
+        }
     }
 
-    return avpkt->size;
+    return AVERROR(EAGAIN);
 }
 
 static void mediacodec_decode_flush(AVCodecContext *avctx)
 {
     MediaCodecH264DecContext *s = avctx->priv_data;
 
-    while (av_fifo_size(s->fifo)) {
-        AVPacket pkt;
-        av_fifo_generic_read(s->fifo, &pkt, sizeof(pkt), NULL);
-        av_packet_unref(&pkt);
-    }
-    av_fifo_reset(s->fifo);
-
     av_packet_unref(&s->buffered_pkt);
 
     ff_mediacodec_dec_flush(avctx, s->ctx);
 }
 
-#if CONFIG_H264_MEDIACODEC_DECODER
-AVCodec ff_h264_mediacodec_decoder = {
-    .name           = "h264_mediacodec",
-    .long_name      = NULL_IF_CONFIG_SMALL("H.264 Android MediaCodec decoder"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_H264,
-    .priv_data_size = sizeof(MediaCodecH264DecContext),
-    .init           = mediacodec_decode_init,
-    .decode         = mediacodec_decode_frame,
-    .flush          = mediacodec_decode_flush,
-    .close          = mediacodec_decode_close,
-    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING,
-    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS,
-    .bsfs           = "h264_mp4toannexb",
+static const AVCodecHWConfigInternal *mediacodec_hw_configs[] = {
+    &(const AVCodecHWConfigInternal) {
+        .public          = {
+            .pix_fmt     = AV_PIX_FMT_MEDIACODEC,
+            .methods     = AV_CODEC_HW_CONFIG_METHOD_AD_HOC |
+                           AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX,
+            .device_type = AV_HWDEVICE_TYPE_MEDIACODEC,
+        },
+        .hwaccel         = NULL,
+    },
+    NULL
 };
+
+#define OFFSET(x) offsetof(MediaCodecH264DecContext, x)
+#define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
+static const AVOption ff_mediacodec_vdec_options[] = {
+    { "delay_flush", "Delay flush until hw output buffers are returned to the decoder",
+                     OFFSET(delay_flush), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, VD },
+    { NULL }
+};
+
+#define DECLARE_MEDIACODEC_VCLASS(short_name)                   \
+static const AVClass ff_##short_name##_mediacodec_dec_class = { \
+    .class_name = #short_name "_mediacodec",                    \
+    .item_name  = av_default_item_name,                         \
+    .option     = ff_mediacodec_vdec_options,                   \
+    .version    = LIBAVUTIL_VERSION_INT,                        \
+};
+
+#define DECLARE_MEDIACODEC_VDEC(short_name, full_name, codec_id, bsf)                          \
+DECLARE_MEDIACODEC_VCLASS(short_name)                                                          \
+AVCodec ff_##short_name##_mediacodec_decoder = {                                               \
+    .name           = #short_name "_mediacodec",                                               \
+    .long_name      = NULL_IF_CONFIG_SMALL(full_name " Android MediaCodec decoder"),           \
+    .type           = AVMEDIA_TYPE_VIDEO,                                                      \
+    .id             = codec_id,                                                                \
+    .priv_class     = &ff_##short_name##_mediacodec_dec_class,                                 \
+    .priv_data_size = sizeof(MediaCodecH264DecContext),                                        \
+    .init           = mediacodec_decode_init,                                                  \
+    .receive_frame  = mediacodec_receive_frame,                                                \
+    .flush          = mediacodec_decode_flush,                                                 \
+    .close          = mediacodec_decode_close,                                                 \
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
+    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS,                                               \
+    .bsfs           = bsf,                                                                     \
+    .hw_configs     = mediacodec_hw_configs,                                                   \
+    .wrapper_name   = "mediacodec",                                                            \
+};                                                                                             \
+
+#if CONFIG_H264_MEDIACODEC_DECODER
+DECLARE_MEDIACODEC_VDEC(h264, "H.264", AV_CODEC_ID_H264, "h264_mp4toannexb")
 #endif
 
 #if CONFIG_HEVC_MEDIACODEC_DECODER
-AVCodec ff_hevc_mediacodec_decoder = {
-    .name           = "hevc_mediacodec",
-    .long_name      = NULL_IF_CONFIG_SMALL("H.265 Android MediaCodec decoder"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_HEVC,
-    .priv_data_size = sizeof(MediaCodecH264DecContext),
-    .init           = mediacodec_decode_init,
-    .decode         = mediacodec_decode_frame,
-    .flush          = mediacodec_decode_flush,
-    .close          = mediacodec_decode_close,
-    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING,
-    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS,
-    .bsfs           = "hevc_mp4toannexb",
-};
+DECLARE_MEDIACODEC_VDEC(hevc, "H.265", AV_CODEC_ID_HEVC, "hevc_mp4toannexb")
 #endif
 
 #if CONFIG_MPEG2_MEDIACODEC_DECODER
-AVCodec ff_mpeg2_mediacodec_decoder = {
-    .name           = "mpeg2_mediacodec",
-    .long_name      = NULL_IF_CONFIG_SMALL("MPEG-2 Android MediaCodec decoder"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_MPEG2VIDEO,
-    .priv_data_size = sizeof(MediaCodecH264DecContext),
-    .init           = mediacodec_decode_init,
-    .decode         = mediacodec_decode_frame,
-    .flush          = mediacodec_decode_flush,
-    .close          = mediacodec_decode_close,
-    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING,
-    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS,
-};
+DECLARE_MEDIACODEC_VDEC(mpeg2, "MPEG-2", AV_CODEC_ID_MPEG2VIDEO, NULL)
 #endif
 
 #if CONFIG_MPEG4_MEDIACODEC_DECODER
-AVCodec ff_mpeg4_mediacodec_decoder = {
-    .name           = "mpeg4_mediacodec",
-    .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 Android MediaCodec decoder"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_MPEG4,
-    .priv_data_size = sizeof(MediaCodecH264DecContext),
-    .init           = mediacodec_decode_init,
-    .decode         = mediacodec_decode_frame,
-    .flush          = mediacodec_decode_flush,
-    .close          = mediacodec_decode_close,
-    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING,
-    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS,
-};
+DECLARE_MEDIACODEC_VDEC(mpeg4, "MPEG-4", AV_CODEC_ID_MPEG4, NULL)
 #endif
 
 #if CONFIG_VP8_MEDIACODEC_DECODER
-AVCodec ff_vp8_mediacodec_decoder = {
-    .name           = "vp8_mediacodec",
-    .long_name      = NULL_IF_CONFIG_SMALL("VP8 Android MediaCodec decoder"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_VP8,
-    .priv_data_size = sizeof(MediaCodecH264DecContext),
-    .init           = mediacodec_decode_init,
-    .decode         = mediacodec_decode_frame,
-    .flush          = mediacodec_decode_flush,
-    .close          = mediacodec_decode_close,
-    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING,
-    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS,
-};
+DECLARE_MEDIACODEC_VDEC(vp8, "VP8", AV_CODEC_ID_VP8, NULL)
 #endif
 
 #if CONFIG_VP9_MEDIACODEC_DECODER
-AVCodec ff_vp9_mediacodec_decoder = {
-    .name           = "vp9_mediacodec",
-    .long_name      = NULL_IF_CONFIG_SMALL("VP9 Android MediaCodec decoder"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_VP9,
-    .priv_data_size = sizeof(MediaCodecH264DecContext),
-    .init           = mediacodec_decode_init,
-    .decode         = mediacodec_decode_frame,
-    .flush          = mediacodec_decode_flush,
-    .close          = mediacodec_decode_close,
-    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING,
-    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS,
-};
+DECLARE_MEDIACODEC_VDEC(vp9, "VP9", AV_CODEC_ID_VP9, NULL)
 #endif

diff --git a/libavcodec/mediacodecdec_common.c b/libavcodec/mediacodecdec_common.c
index afa054f..7c2661f 100644
--- a/libavcodec/mediacodecdec_common.c
+++ b/libavcodec/mediacodecdec_common.c

@@ -24,6 +24,7 @@
 #include <sys/types.h>
 
 #include "libavutil/common.h"
+#include "libavutil/hwcontext_mediacodec.h"
 #include "libavutil/mem.h"
 #include "libavutil/log.h"
 #include "libavutil/pixfmt.h"
@@ -177,11 +178,16 @@
     MediaCodecDecContext *ctx = buffer->ctx;
     int released = atomic_load(&buffer->released);
 
-    if (!released) {
+    if (!released && (ctx->delay_flush || buffer->serial == atomic_load(&ctx->serial))) {
+        atomic_fetch_sub(&ctx->hw_buffer_count, 1);
+        av_log(ctx->avctx, AV_LOG_DEBUG,
+               "Releasing output buffer %zd (%p) ts=%"PRId64" on free() [%d pending]\n",
+               buffer->index, buffer, buffer->pts, atomic_load(&ctx->hw_buffer_count));
         ff_AMediaCodec_releaseOutputBuffer(ctx->codec, buffer->index, 0);
     }
 
-    ff_mediacodec_dec_unref(ctx);
+    if (ctx->delay_flush)
+        ff_mediacodec_dec_unref(ctx);
     av_freep(&buffer);
 }
 
@@ -199,10 +205,11 @@
     frame->width = avctx->width;
     frame->height = avctx->height;
     frame->format = avctx->pix_fmt;
+    frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
 
     if (avctx->pkt_timebase.num && avctx->pkt_timebase.den) {
         frame->pts = av_rescale_q(info->presentationTimeUs,
-                                      av_make_q(1, 1000000),
+                                      AV_TIME_BASE_Q,
                                       avctx->pkt_timebase);
     } else {
         frame->pts = info->presentationTimeUs;
@@ -235,13 +242,20 @@
     }
 
     buffer->ctx = s;
-    ff_mediacodec_dec_ref(s);
+    buffer->serial = atomic_load(&s->serial);
+    if (s->delay_flush)
+        ff_mediacodec_dec_ref(s);
 
     buffer->index = index;
     buffer->pts = info->presentationTimeUs;
 
     frame->data[3] = (uint8_t *)buffer;
 
+    atomic_fetch_add(&s->hw_buffer_count, 1);
+    av_log(avctx, AV_LOG_DEBUG,
+            "Wrapping output buffer %zd (%p) ts=%"PRId64" [%d pending]\n",
+            buffer->index, buffer, buffer->pts, atomic_load(&s->hw_buffer_count));
+
     return 0;
 fail:
     av_freep(buffer);
@@ -284,7 +298,7 @@
      *   * 0-sized avpackets are pushed to flush remaining frames at EOS */
     if (avctx->pkt_timebase.num && avctx->pkt_timebase.den) {
         frame->pts = av_rescale_q(info->presentationTimeUs,
-                                      av_make_q(1, 1000000),
+                                      AV_TIME_BASE_Q,
                                       avctx->pkt_timebase);
     } else {
         frame->pts = info->presentationTimeUs;
@@ -296,9 +310,9 @@
 #endif
     frame->pkt_dts = AV_NOPTS_VALUE;
 
-    av_log(avctx, AV_LOG_DEBUG,
+    av_log(avctx, AV_LOG_TRACE,
             "Frame: width=%d stride=%d height=%d slice-height=%d "
-            "crop-top=%d crop-bottom=%d crop-left=%d crop-right=%d encoder=%s\n"
+            "crop-top=%d crop-bottom=%d crop-left=%d crop-right=%d encoder=%s "
             "destination linesizes=%d,%d,%d\n" ,
             avctx->width, s->stride, avctx->height, s->slice_height,
             s->crop_top, s->crop_bottom, s->crop_left, s->crop_right, s->codec_name,
@@ -338,11 +352,22 @@
     return ret;
 }
 
+#define AMEDIAFORMAT_GET_INT32(name, key, mandatory) do {                              \
+    int32_t value = 0;                                                                 \
+    if (ff_AMediaFormat_getInt32(s->format, key, &value)) {                            \
+        (name) = value;                                                                \
+    } else if (mandatory) {                                                            \
+        av_log(avctx, AV_LOG_ERROR, "Could not get %s from format %s\n", key, format); \
+        ret = AVERROR_EXTERNAL;                                                        \
+        goto fail;                                                                     \
+    }                                                                                  \
+} while (0)                                                                            \
+
 static int mediacodec_dec_parse_format(AVCodecContext *avctx, MediaCodecDecContext *s)
 {
+    int ret = 0;
     int width = 0;
     int height = 0;
-    int32_t value = 0;
     char *format = NULL;
 
     if (!s->format) {
@@ -355,88 +380,65 @@
         return AVERROR_EXTERNAL;
     }
     av_log(avctx, AV_LOG_DEBUG, "Parsing MediaFormat %s\n", format);
-    av_freep(&format);
 
     /* Mandatory fields */
-    if (!ff_AMediaFormat_getInt32(s->format, "width", &value)) {
-        format = ff_AMediaFormat_toString(s->format);
-        av_log(avctx, AV_LOG_ERROR, "Could not get %s from format %s\n", "width", format);
-        av_freep(&format);
-        return AVERROR_EXTERNAL;
-    }
-    s->width = value;
+    AMEDIAFORMAT_GET_INT32(s->width,  "width", 1);
+    AMEDIAFORMAT_GET_INT32(s->height, "height", 1);
 
-    if (!ff_AMediaFormat_getInt32(s->format, "height", &value)) {
-        format = ff_AMediaFormat_toString(s->format);
-        av_log(avctx, AV_LOG_ERROR, "Could not get %s from format %s\n", "height", format);
-        av_freep(&format);
-        return AVERROR_EXTERNAL;
-    }
-    s->height = value;
+    AMEDIAFORMAT_GET_INT32(s->stride, "stride", 0);
+    s->stride = s->stride > 0 ? s->stride : s->width;
 
-    if (!ff_AMediaFormat_getInt32(s->format, "stride", &value)) {
-        format = ff_AMediaFormat_toString(s->format);
-        av_log(avctx, AV_LOG_ERROR, "Could not get %s from format %s\n", "stride", format);
-        av_freep(&format);
-        return AVERROR_EXTERNAL;
-    }
-    s->stride = value > 0 ? value : s->width;
+    AMEDIAFORMAT_GET_INT32(s->slice_height, "slice-height", 0);
 
-    if (!ff_AMediaFormat_getInt32(s->format, "slice-height", &value)) {
-        format = ff_AMediaFormat_toString(s->format);
-        av_log(avctx, AV_LOG_ERROR, "Could not get %s from format %s\n", "slice-height", format);
-        av_freep(&format);
-        return AVERROR_EXTERNAL;
-    }
-    s->slice_height = value > 0 ? value : s->height;
-
-    if (strstr(s->codec_name, "OMX.Nvidia.")) {
+    if (strstr(s->codec_name, "OMX.Nvidia.") && s->slice_height == 0) {
         s->slice_height = FFALIGN(s->height, 16);
     } else if (strstr(s->codec_name, "OMX.SEC.avc.dec")) {
         s->slice_height = avctx->height;
         s->stride = avctx->width;
+    } else if (s->slice_height == 0) {
+        s->slice_height = s->height;
     }
 
-    if (!ff_AMediaFormat_getInt32(s->format, "color-format", &value)) {
-        format = ff_AMediaFormat_toString(s->format);
-        av_log(avctx, AV_LOG_ERROR, "Could not get %s from format %s\n", "color-format", format);
-        av_freep(&format);
-        return AVERROR_EXTERNAL;
-    }
-    s->color_format = value;
-
-    s->pix_fmt = avctx->pix_fmt = mcdec_map_color_format(avctx, s, value);
+    AMEDIAFORMAT_GET_INT32(s->color_format, "color-format", 1);
+    avctx->pix_fmt = mcdec_map_color_format(avctx, s, s->color_format);
     if (avctx->pix_fmt == AV_PIX_FMT_NONE) {
         av_log(avctx, AV_LOG_ERROR, "Output color format is not supported\n");
-        return AVERROR(EINVAL);
+        ret = AVERROR(EINVAL);
+        goto fail;
     }
 
     /* Optional fields */
-    if (ff_AMediaFormat_getInt32(s->format, "crop-top", &value))
-        s->crop_top = value;
-
-    if (ff_AMediaFormat_getInt32(s->format, "crop-bottom", &value))
-        s->crop_bottom = value;
-
-    if (ff_AMediaFormat_getInt32(s->format, "crop-left", &value))
-        s->crop_left = value;
-
-    if (ff_AMediaFormat_getInt32(s->format, "crop-right", &value))
-        s->crop_right = value;
+    AMEDIAFORMAT_GET_INT32(s->crop_top,    "crop-top",    0);
+    AMEDIAFORMAT_GET_INT32(s->crop_bottom, "crop-bottom", 0);
+    AMEDIAFORMAT_GET_INT32(s->crop_left,   "crop-left",   0);
+    AMEDIAFORMAT_GET_INT32(s->crop_right,  "crop-right",  0);
 
     width = s->crop_right + 1 - s->crop_left;
     height = s->crop_bottom + 1 - s->crop_top;
 
+    AMEDIAFORMAT_GET_INT32(s->display_width,  "display-width",  0);
+    AMEDIAFORMAT_GET_INT32(s->display_height, "display-height", 0);
+
+    if (s->display_width && s->display_height) {
+        AVRational sar = av_div_q(
+            (AVRational){ s->display_width, s->display_height },
+            (AVRational){ width, height });
+        ff_set_sar(avctx, sar);
+    }
+
     av_log(avctx, AV_LOG_INFO,
         "Output crop parameters top=%d bottom=%d left=%d right=%d, "
         "resulting dimensions width=%d height=%d\n",
         s->crop_top, s->crop_bottom, s->crop_left, s->crop_right,
         width, height);
 
+    av_freep(&format);
     return ff_set_dimensions(avctx, width, height);
+fail:
+    av_freep(&format);
+    return ret;
 }
 
-
 static int mediacodec_dec_flush_codec(AVCodecContext *avctx, MediaCodecDecContext *s)
 {
     FFAMediaCodec *codec = s->codec;
@@ -447,6 +449,9 @@
     s->draining = 0;
     s->flushing = 0;
     s->eos = 0;
+    atomic_fetch_add(&s->serial, 1);
+    atomic_init(&s->hw_buffer_count, 0);
+    s->current_input_buffer = -1;
 
     status = ff_AMediaCodec_flush(codec);
     if (status < 0) {
@@ -470,13 +475,28 @@
         AV_PIX_FMT_NONE,
     };
 
+    s->avctx = avctx;
     atomic_init(&s->refcount, 1);
+    atomic_init(&s->hw_buffer_count, 0);
+    atomic_init(&s->serial, 1);
+    s->current_input_buffer = -1;
 
     pix_fmt = ff_get_format(avctx, pix_fmts);
     if (pix_fmt == AV_PIX_FMT_MEDIACODEC) {
         AVMediaCodecContext *user_ctx = avctx->hwaccel_context;
 
-        if (user_ctx && user_ctx->surface) {
+        if (avctx->hw_device_ctx) {
+            AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)(avctx->hw_device_ctx->data);
+            if (device_ctx->type == AV_HWDEVICE_TYPE_MEDIACODEC) {
+                if (device_ctx->hwctx) {
+                    AVMediaCodecDeviceContext *mediacodec_ctx = (AVMediaCodecDeviceContext *)device_ctx->hwctx;
+                    s->surface = ff_mediacodec_surface_ref(mediacodec_ctx->surface, avctx);
+                    av_log(avctx, AV_LOG_INFO, "Using surface %p\n", s->surface);
+                }
+            }
+        }
+
+        if (!s->surface && user_ctx && user_ctx->surface) {
             s->surface = ff_mediacodec_surface_ref(user_ctx->surface, avctx);
             av_log(avctx, AV_LOG_INFO, "Using surface %p\n", s->surface);
         }
@@ -543,23 +563,18 @@
     return ret;
 }
 
-int ff_mediacodec_dec_decode(AVCodecContext *avctx, MediaCodecDecContext *s,
-                             AVFrame *frame, int *got_frame,
-                             AVPacket *pkt)
+int ff_mediacodec_dec_send(AVCodecContext *avctx, MediaCodecDecContext *s,
+                           AVPacket *pkt, bool wait)
 {
-    int ret;
     int offset = 0;
     int need_draining = 0;
     uint8_t *data;
-    ssize_t index;
+    ssize_t index = s->current_input_buffer;
     size_t size;
     FFAMediaCodec *codec = s->codec;
-    FFAMediaCodecBufferInfo info = { 0 };
-
     int status;
-
-    int64_t input_dequeue_timeout_us = INPUT_DEQUEUE_TIMEOUT_US;
-    int64_t output_dequeue_timeout_us = OUTPUT_DEQUEUE_TIMEOUT_US;
+    int64_t input_dequeue_timeout_us = wait ? INPUT_DEQUEUE_TIMEOUT_US : 0;
+    int64_t pts;
 
     if (s->flushing) {
         av_log(avctx, AV_LOG_ERROR, "Decoder is flushing and cannot accept new buffer "
@@ -572,20 +587,23 @@
     }
 
     if (s->draining && s->eos) {
-        return 0;
+        return AVERROR_EOF;
     }
 
     while (offset < pkt->size || (need_draining && !s->draining)) {
-
-        index = ff_AMediaCodec_dequeueInputBuffer(codec, input_dequeue_timeout_us);
-        if (ff_AMediaCodec_infoTryAgainLater(codec, index)) {
-            break;
-        }
-
         if (index < 0) {
-            av_log(avctx, AV_LOG_ERROR, "Failed to dequeue input buffer (status=%zd)\n", index);
-            return AVERROR_EXTERNAL;
+            index = ff_AMediaCodec_dequeueInputBuffer(codec, input_dequeue_timeout_us);
+            if (ff_AMediaCodec_infoTryAgainLater(codec, index)) {
+                av_log(avctx, AV_LOG_TRACE, "No input buffer available, try again later\n");
+                break;
+            }
+
+            if (index < 0) {
+                av_log(avctx, AV_LOG_ERROR, "Failed to dequeue input buffer (status=%zd)\n", index);
+                return AVERROR_EXTERNAL;
+            }
         }
+        s->current_input_buffer = -1;
 
         data = ff_AMediaCodec_getInputBuffer(codec, index, &size);
         if (!data) {
@@ -593,13 +611,13 @@
             return AVERROR_EXTERNAL;
         }
 
-        if (need_draining) {
-            int64_t pts = pkt->pts;
-            uint32_t flags = ff_AMediaCodec_getBufferFlagEndOfStream(codec);
+        pts = pkt->pts;
+        if (pts != AV_NOPTS_VALUE && avctx->pkt_timebase.num && avctx->pkt_timebase.den) {
+            pts = av_rescale_q(pts, avctx->pkt_timebase, AV_TIME_BASE_Q);
+        }
 
-            if (s->surface) {
-                pts = av_rescale_q(pts, avctx->pkt_timebase, av_make_q(1, 1000000));
-            }
+        if (need_draining) {
+            uint32_t flags = ff_AMediaCodec_getBufferFlagEndOfStream(codec);
 
             av_log(avctx, AV_LOG_DEBUG, "Sending End Of Stream signal\n");
 
@@ -609,33 +627,53 @@
                 return AVERROR_EXTERNAL;
             }
 
+            av_log(avctx, AV_LOG_TRACE,
+                   "Queued input buffer %zd size=%zd ts=%"PRIi64"\n", index, size, pts);
+
             s->draining = 1;
             break;
         } else {
-            int64_t pts = pkt->pts;
-
             size = FFMIN(pkt->size - offset, size);
-
             memcpy(data, pkt->data + offset, size);
             offset += size;
 
-            if (avctx->pkt_timebase.num && avctx->pkt_timebase.den) {
-                pts = av_rescale_q(pts, avctx->pkt_timebase, av_make_q(1, 1000000));
-            }
-
             status = ff_AMediaCodec_queueInputBuffer(codec, index, 0, size, pts, 0);
             if (status < 0) {
                 av_log(avctx, AV_LOG_ERROR, "Failed to queue input buffer (status = %d)\n", status);
                 return AVERROR_EXTERNAL;
             }
+
+            av_log(avctx, AV_LOG_TRACE,
+                   "Queued input buffer %zd size=%zd ts=%"PRIi64"\n", index, size, pts);
         }
     }
 
-    if (need_draining || s->draining) {
+    if (offset == 0)
+        return AVERROR(EAGAIN);
+    return offset;
+}
+
+int ff_mediacodec_dec_receive(AVCodecContext *avctx, MediaCodecDecContext *s,
+                              AVFrame *frame, bool wait)
+{
+    int ret;
+    uint8_t *data;
+    ssize_t index;
+    size_t size;
+    FFAMediaCodec *codec = s->codec;
+    FFAMediaCodecBufferInfo info = { 0 };
+    int status;
+    int64_t output_dequeue_timeout_us = OUTPUT_DEQUEUE_TIMEOUT_US;
+
+    if (s->draining && s->eos) {
+        return AVERROR_EOF;
+    }
+
+    if (s->draining) {
         /* If the codec is flushing or need to be flushed, block for a fair
          * amount of time to ensure we got a frame */
         output_dequeue_timeout_us = OUTPUT_DEQUEUE_BLOCK_TIMEOUT_US;
-    } else if (s->output_buffer_count == 0) {
+    } else if (s->output_buffer_count == 0 || !wait) {
         /* If the codec hasn't produced any frames, do not block so we
          * can push data to it as fast as possible, and get the first
          * frame */
@@ -644,9 +682,7 @@
 
     index = ff_AMediaCodec_dequeueOutputBuffer(codec, &info, output_dequeue_timeout_us);
     if (index >= 0) {
-        int ret;
-
-        av_log(avctx, AV_LOG_DEBUG, "Got output buffer %zd"
+        av_log(avctx, AV_LOG_TRACE, "Got output buffer %zd"
                 " offset=%" PRIi32 " size=%" PRIi32 " ts=%" PRIi64
                 " flags=%" PRIu32 "\n", index, info.offset, info.size,
                 info.presentationTimeUs, info.flags);
@@ -674,8 +710,8 @@
                 }
             }
 
-            *got_frame = 1;
             s->output_buffer_count++;
+            return 0;
         } else {
             status = ff_AMediaCodec_releaseOutputBuffer(codec, index, 0);
             if (status < 0) {
@@ -718,16 +754,28 @@
                                         "while draining remaining frames, output will probably lack frames\n",
                                         output_dequeue_timeout_us / 1000);
         } else {
-            av_log(avctx, AV_LOG_DEBUG, "No output buffer available, try again later\n");
+            av_log(avctx, AV_LOG_TRACE, "No output buffer available, try again later\n");
         }
     } else {
         av_log(avctx, AV_LOG_ERROR, "Failed to dequeue output buffer (status=%zd)\n", index);
         return AVERROR_EXTERNAL;
     }
 
-    return offset;
+    return AVERROR(EAGAIN);
 }
 
+/*
+* ff_mediacodec_dec_flush returns 0 if the flush cannot be performed on
+* the codec (because the user retains frames). The codec stays in the
+* flushing state.
+*
+* ff_mediacodec_dec_flush returns 1 if the flush can actually be
+* performed on the codec. The codec leaves the flushing state and can
+* process again packets.
+*
+* ff_mediacodec_dec_flush returns a negative value if an error has
+* occurred.
+*/
 int ff_mediacodec_dec_flush(AVCodecContext *avctx, MediaCodecDecContext *s)
 {
     if (!s->surface || atomic_load(&s->refcount) == 1) {
@@ -757,45 +805,3 @@
 {
     return s->flushing;
 }
-
-AVHWAccel ff_h264_mediacodec_hwaccel = {
-    .name    = "mediacodec",
-    .type    = AVMEDIA_TYPE_VIDEO,
-    .id      = AV_CODEC_ID_H264,
-    .pix_fmt = AV_PIX_FMT_MEDIACODEC,
-};
-
-AVHWAccel ff_hevc_mediacodec_hwaccel = {
-    .name    = "mediacodec",
-    .type    = AVMEDIA_TYPE_VIDEO,
-    .id      = AV_CODEC_ID_HEVC,
-    .pix_fmt = AV_PIX_FMT_MEDIACODEC,
-};
-
-AVHWAccel ff_mpeg2_mediacodec_hwaccel = {
-    .name    = "mediacodec",
-    .type    = AVMEDIA_TYPE_VIDEO,
-    .id      = AV_CODEC_ID_MPEG2VIDEO,
-    .pix_fmt = AV_PIX_FMT_MEDIACODEC,
-};
-
-AVHWAccel ff_mpeg4_mediacodec_hwaccel = {
-    .name    = "mediacodec",
-    .type    = AVMEDIA_TYPE_VIDEO,
-    .id      = AV_CODEC_ID_MPEG4,
-    .pix_fmt = AV_PIX_FMT_MEDIACODEC,
-};
-
-AVHWAccel ff_vp8_mediacodec_hwaccel = {
-    .name    = "mediacodec",
-    .type    = AVMEDIA_TYPE_VIDEO,
-    .id      = AV_CODEC_ID_VP8,
-    .pix_fmt = AV_PIX_FMT_MEDIACODEC,
-};
-
-AVHWAccel ff_vp9_mediacodec_hwaccel = {
-    .name    = "mediacodec",
-    .type    = AVMEDIA_TYPE_VIDEO,
-    .id      = AV_CODEC_ID_VP9,
-    .pix_fmt = AV_PIX_FMT_MEDIACODEC,
-};

diff --git a/libavcodec/mediacodecdec_common.h b/libavcodec/mediacodecdec_common.h
index 10f3827..0b21129 100644
--- a/libavcodec/mediacodecdec_common.h
+++ b/libavcodec/mediacodecdec_common.h

@@ -25,6 +25,7 @@
 
 #include <stdint.h>
 #include <stdatomic.h>
+#include <stdbool.h>
 #include <sys/types.h>
 
 #include "libavutil/frame.h"
@@ -35,7 +36,9 @@
 
 typedef struct MediaCodecDecContext {
 
+    AVCodecContext *avctx;
     atomic_int refcount;
+    atomic_int hw_buffer_count;
 
     char *codec_name;
 
@@ -54,13 +57,18 @@
     int stride;
     int slice_height;
     int color_format;
-    enum AVPixelFormat pix_fmt;
     int crop_top;
     int crop_bottom;
     int crop_left;
     int crop_right;
+    int display_width;
+    int display_height;
 
     uint64_t output_buffer_count;
+    ssize_t current_input_buffer;
+
+    bool delay_flush;
+    atomic_int serial;
 
 } MediaCodecDecContext;
 
@@ -69,11 +77,15 @@
                            const char *mime,
                            FFAMediaFormat *format);
 
-int ff_mediacodec_dec_decode(AVCodecContext *avctx,
-                             MediaCodecDecContext *s,
-                             AVFrame *frame,
-                             int *got_frame,
-                             AVPacket *pkt);
+int ff_mediacodec_dec_send(AVCodecContext *avctx,
+                           MediaCodecDecContext *s,
+                           AVPacket *pkt,
+                           bool wait);
+
+int ff_mediacodec_dec_receive(AVCodecContext *avctx,
+                              MediaCodecDecContext *s,
+                              AVFrame *frame,
+                              bool wait);
 
 int ff_mediacodec_dec_flush(AVCodecContext *avctx,
                             MediaCodecDecContext *s);
@@ -90,6 +102,7 @@
     ssize_t index;
     int64_t pts;
     atomic_int released;
+    int serial;
 
 } MediaCodecBuffer;
 

diff --git a/libavcodec/microdvddec.c b/libavcodec/microdvddec.c
index 4a34267..dad0ec8 100644
--- a/libavcodec/microdvddec.c
+++ b/libavcodec/microdvddec.c

@@ -99,7 +99,7 @@
         case 'Y':
             tag.persistent = MICRODVD_PERSISTENT_ON;
         case 'y':
-            while (*s && *s != '}') {
+            while (*s && *s != '}' && s - start < 256) {
                 int style_index = indexof(MICRODVD_STYLES, *s);
 
                 if (style_index >= 0)

diff --git a/libavcodec/mips/cabac.h b/libavcodec/mips/cabac.h
new file mode 100644
index 0000000..82cee29
--- /dev/null
+++ b/libavcodec/mips/cabac.h

@@ -0,0 +1,119 @@
+/*
+ * Loongson SIMD optimized h264chroma
+ *
+ * Copyright (c) 2018 Loongson Technology Corporation Limited
+ * Copyright (c) 2018 Shiyou Yin <yinshiyou-hf@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_CABAC_H
+#define AVCODEC_MIPS_CABAC_H
+
+#include "libavcodec/cabac.h"
+#include "libavutil/mips/mmiutils.h"
+#include "config.h"
+
+#define get_cabac_inline get_cabac_inline_mips
+static av_always_inline int get_cabac_inline(CABACContext *c,
+                                             uint8_t * const state){
+    mips_reg tmp0, tmp1, tmp2, bit;
+
+    __asm__ volatile (
+        "lbu          %[bit],        0(%[state])                   \n\t"
+        "and          %[tmp0],       %[c_range],     0xC0          \n\t"
+        PTR_ADDU     "%[tmp0],       %[tmp0],        %[tmp0]       \n\t"
+        PTR_ADDU     "%[tmp0],       %[tmp0],        %[tables]     \n\t"
+        PTR_ADDU     "%[tmp0],       %[tmp0],        %[bit]        \n\t"
+        /* tmp1: RangeLPS */
+        "lbu          %[tmp1],       %[lps_off](%[tmp0])           \n\t"
+
+        PTR_SUBU     "%[c_range],    %[c_range],     %[tmp1]       \n\t"
+        PTR_SLL      "%[tmp0],       %[c_range],     0x11          \n\t"
+        PTR_SUBU     "%[tmp0],       %[tmp0],        %[c_low]      \n\t"
+
+        /* tmp2: lps_mask */
+        PTR_SRA      "%[tmp2],       %[tmp0],        0x1F          \n\t"
+        /* If tmp0 < 0, lps_mask ==  0xffffffff*/
+        /* If tmp0 >= 0, lps_mask ==  0x00000000*/
+        "beqz         %[tmp2],       1f                            \n\t"
+        PTR_SLL      "%[tmp0],       %[c_range],     0x11          \n\t"
+        PTR_SUBU     "%[c_low],      %[c_low],       %[tmp0]       \n\t"
+        PTR_SUBU     "%[tmp0],       %[tmp1],        %[c_range]    \n\t"
+        PTR_ADDU     "%[c_range],    %[c_range],     %[tmp0]       \n\t"
+        "xor          %[bit],        %[bit],         %[tmp2]       \n\t"
+
+        "1:                                                        \n\t"
+        /* tmp1: *state */
+        PTR_ADDU     "%[tmp0],       %[tables],      %[bit]        \n\t"
+        "lbu          %[tmp1],       %[mlps_off](%[tmp0])          \n\t"
+        /* tmp2: lps_mask */
+        PTR_ADDU     "%[tmp0],       %[tables],      %[c_range]    \n\t"
+        "lbu          %[tmp2],       %[norm_off](%[tmp0])          \n\t"
+
+        "sb           %[tmp1],       0(%[state])                   \n\t"
+        "and          %[bit],        %[bit],         0x01          \n\t"
+        PTR_SLL      "%[c_range],    %[c_range],     %[tmp2]       \n\t"
+        PTR_SLL      "%[c_low],      %[c_low],       %[tmp2]       \n\t"
+
+        "and          %[tmp0],       %[c_low],       %[cabac_mask] \n\t"
+        "bnez         %[tmp0],       1f                            \n\t"
+        PTR_ADDI     "%[tmp0],       %[c_low],       -0X01         \n\t"
+        "xor          %[tmp0],       %[c_low],       %[tmp0]       \n\t"
+        PTR_SRA      "%[tmp0],       %[tmp0],        0x0f          \n\t"
+        PTR_ADDU     "%[tmp0],       %[tmp0],        %[tables]     \n\t"
+        "lbu          %[tmp2],       %[norm_off](%[tmp0])          \n\t"
+#if CABAC_BITS == 16
+        "lbu          %[tmp0],       0(%[c_bytestream])            \n\t"
+        "lbu          %[tmp1],       1(%[c_bytestream])            \n\t"
+        PTR_SLL      "%[tmp0],       %[tmp0],        0x09          \n\t"
+        PTR_SLL      "%[tmp1],       %[tmp1],        0x01          \n\t"
+        PTR_ADDU     "%[tmp0],       %[tmp0],        %[tmp1]       \n\t"
+#else
+        "lbu          %[tmp0],       0(%[c_bytestream])            \n\t"
+        PTR_SLL      "%[tmp0],       %[tmp0],        0x01          \n\t"
+#endif
+        PTR_SUBU     "%[tmp0],       %[tmp0],        %[cabac_mask] \n\t"
+
+        "li           %[tmp1],       0x07                          \n\t"
+        PTR_SUBU     "%[tmp1],       %[tmp1],        %[tmp2]       \n\t"
+        PTR_SLL      "%[tmp0],       %[tmp0],        %[tmp1]       \n\t"
+        PTR_ADDU     "%[c_low],      %[c_low],       %[tmp0]       \n\t"
+
+#if !UNCHECKED_BITSTREAM_READER
+        "bge          %[c_bytestream], %[c_bytestream_end], 1f     \n\t"
+#endif
+        PTR_ADDIU    "%[c_bytestream], %[c_bytestream],     0X02   \n\t"
+        "1:                                                        \n\t"
+    : [bit]"=&r"(bit), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2),
+      [c_range]"+&r"(c->range), [c_low]"+&r"(c->low),
+      [c_bytestream]"+&r"(c->bytestream)
+    : [state]"r"(state), [tables]"r"(ff_h264_cabac_tables),
+#if !UNCHECKED_BITSTREAM_READER
+      [c_bytestream_end]"r"(c->bytestream_end),
+#endif
+      [lps_off]"i"(H264_LPS_RANGE_OFFSET),
+      [mlps_off]"i"(H264_MLPS_STATE_OFFSET + 128),
+      [norm_off]"i"(H264_NORM_SHIFT_OFFSET),
+      [cabac_mask]"i"(CABAC_MASK)
+    : "memory"
+    );
+
+    return bit;
+}
+
+#endif /* AVCODEC_MIPS_CABAC_H */

diff --git a/libavcodec/mips/h264chroma_mmi.c b/libavcodec/mips/h264chroma_mmi.c
index bafe0f9..91b2cc4 100644
--- a/libavcodec/mips/h264chroma_mmi.c
+++ b/libavcodec/mips/h264chroma_mmi.c

@@ -29,318 +29,310 @@
 void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
         int h, int x, int y)
 {
-    const int A = (8 - x) * (8 - y);
-    const int B = x * (8 - y);
-    const int C = (8 - x) * y;
-    const int D = x * y;
-    const int E = B + C;
+    int A = 64, B, C, D, E;
     double ftmp[10];
     uint64_t tmp[1];
-    mips_reg addr[1];
-    DECLARE_VAR_ALL64;
 
-    if (D) {
+    if (!(x || y)) {
+        /* x=0, y=0, A=64 */
         __asm__ volatile (
-            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-            "dli        %[tmp0],    0x06                                \n\t"
-            "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
-            "pshufh     %[B],       %[B],           %[ftmp0]            \n\t"
-            "mtc1       %[tmp0],    %[ftmp9]                            \n\t"
-            "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
-            "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
+            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
+            "dli        %[tmp0],    0x06                               \n\t"
+            "mtc1       %[tmp0],    %[ftmp4]                           \n\t"
 
-            "1:                                                         \n\t"
-            PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
+            "1:                                                        \n\t"
             MMI_ULDC1(%[ftmp1], %[src], 0x00)
-            MMI_ULDC1(%[ftmp2], %[src], 0x01)
-            MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
-            MMI_ULDC1(%[ftmp4], %[addr0], 0x01)
+            "addi       %[h],       %[h],           -0x04              \n\t"
+            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
+            MMI_ULDC1(%[ftmp5], %[src], 0x00)
+            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
+            MMI_ULDC1(%[ftmp6], %[src], 0x00)
+            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
+            MMI_ULDC1(%[ftmp7], %[src], 0x00)
 
-            "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]            \n\t"
-            "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]            \n\t"
-            "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
-            "pmullh     %[ftmp7],   %[ftmp7],       %[B]                \n\t"
-            "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]            \n\t"
-            "pmullh     %[ftmp6],   %[ftmp6],       %[A]                \n\t"
-            "pmullh     %[ftmp8],   %[ftmp8],       %[B]                \n\t"
-            "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]            \n\t"
-
-            "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]            \n\t"
-            "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]            \n\t"
-            "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
-            "pmullh     %[ftmp7],   %[ftmp7],       %[D]                \n\t"
-            "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]            \n\t"
-            "pmullh     %[ftmp6],   %[ftmp6],       %[C]                \n\t"
-            "pmullh     %[ftmp8],   %[ftmp8],       %[D]                \n\t"
-            "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]            \n\t"
-
-            "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
-            "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
-            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
-            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]            \n\t"
-            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]            \n\t"
-            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            "addi       %[h],       %[h],           -0x01               \n\t"
+            "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]           \n\t"
+            "psllh      %[ftmp1],   %[ftmp2],       %[ftmp4]           \n\t"
+            "psllh      %[ftmp2],   %[ftmp3],       %[ftmp4]           \n\t"
+            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
+            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
+            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]           \n\t"
+            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
+            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
             MMI_SDC1(%[ftmp1], %[dst], 0x00)
-            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
-            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
-            "bnez       %[h],       1b                                  \n\t"
-            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
-              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
-              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
-              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
-              [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
-              [tmp0]"=&r"(tmp[0]),
-              RESTRICT_ASM_ALL64
-              [addr0]"=&r"(addr[0]),
-              [dst]"+&r"(dst),              [src]"+&r"(src),
-              [h]"+&r"(h)
-            : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
-              [A]"f"(A),                    [B]"f"(B),
-              [C]"f"(C),                    [D]"f"(D)
-            : "memory"
-        );
-    } else if (E) {
-        const int step = C ? stride : 1;
 
-        __asm__ volatile (
-            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-            "dli        %[tmp0],    0x06                                \n\t"
-            "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
-            "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
-            "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
-
-            "1:                                                         \n\t"
-            PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
-            MMI_ULDC1(%[ftmp1], %[src], 0x00)
-            MMI_ULDC1(%[ftmp2], %[addr0], 0x00)
-
-            "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t"
-            "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
-            "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
-            "pmullh     %[ftmp5],   %[ftmp5],       %[E]                \n\t"
-            "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]            \n\t"
-            "pmullh     %[ftmp4],   %[ftmp4],       %[A]                \n\t"
-            "pmullh     %[ftmp6],   %[ftmp6],       %[E]                \n\t"
-            "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]            \n\t"
-
-            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
-            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
-            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
-            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            "addi       %[h],       %[h],           -0x01               \n\t"
+            "punpcklbh  %[ftmp2],   %[ftmp5],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp3],   %[ftmp5],       %[ftmp0]           \n\t"
+            "psllh      %[ftmp1],   %[ftmp2],       %[ftmp4]           \n\t"
+            "psllh      %[ftmp2],   %[ftmp3],       %[ftmp4]           \n\t"
+            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
+            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
+            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]           \n\t"
+            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
+            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
             MMI_SDC1(%[ftmp1], %[dst], 0x00)
-            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
-            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
-            "bnez       %[h],       1b                                  \n\t"
+
+            "punpcklbh  %[ftmp2],   %[ftmp6],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp3],   %[ftmp6],       %[ftmp0]           \n\t"
+            "psllh      %[ftmp1],   %[ftmp2],       %[ftmp4]           \n\t"
+            "psllh      %[ftmp2],   %[ftmp3],       %[ftmp4]           \n\t"
+            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
+            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
+            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]           \n\t"
+            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
+            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
+            MMI_SDC1(%[ftmp1], %[dst], 0x00)
+
+            "punpcklbh  %[ftmp2],   %[ftmp7],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp3],   %[ftmp7],       %[ftmp0]           \n\t"
+            "psllh      %[ftmp1],   %[ftmp2],       %[ftmp4]           \n\t"
+            "psllh      %[ftmp2],   %[ftmp3],       %[ftmp4]           \n\t"
+            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
+            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
+            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]           \n\t"
+            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
+            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
+            MMI_SDC1(%[ftmp1], %[dst], 0x00)
+
+            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
+            "bnez       %[h],       1b                                 \n\t"
             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
               [tmp0]"=&r"(tmp[0]),
-              RESTRICT_ASM_ALL64
-              [addr0]"=&r"(addr[0]),
               [dst]"+&r"(dst),              [src]"+&r"(src),
               [h]"+&r"(h)
-            : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
-              [ff_pw_32]"f"(ff_pw_32),
-              [A]"f"(A),                    [E]"f"(E)
+            : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32)
             : "memory"
         );
     } else {
-        __asm__ volatile (
-            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-            "dli        %[tmp0],    0x06                                \n\t"
-            "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
-            "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
+        if (x && y) {
+            /* x!=0, y!=0 */
+            D = x * y;
+            B = (x << 3) - D;
+            C = (y << 3) - D;
+            A = 64 - D - B - C;
 
-            "1:                                                         \n\t"
-            MMI_ULDC1(%[ftmp1], %[src], 0x00)
-            "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
-            "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
-            "pmullh     %[ftmp2],   %[ftmp3],       %[A]                \n\t"
-            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
-            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
-            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
-            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
-            MMI_SDC1(%[ftmp1], %[dst], 0x00)
+            __asm__ volatile (
+                "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
+                "dli        %[tmp0],    0x06                               \n\t"
+                "pshufh     %[A],       %[A],           %[ftmp0]           \n\t"
+                "pshufh     %[B],       %[B],           %[ftmp0]           \n\t"
+                "mtc1       %[tmp0],    %[ftmp9]                           \n\t"
+                "pshufh     %[C],       %[C],           %[ftmp0]           \n\t"
+                "pshufh     %[D],       %[D],           %[ftmp0]           \n\t"
 
-            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
-            MMI_ULDC1(%[ftmp1], %[src], 0x00)
-            "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
-            "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
-            "pmullh     %[ftmp2],   %[ftmp3],       %[A]                \n\t"
-            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
-            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
-            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
-            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            "addi       %[h],       %[h],           -0x02               \n\t"
-            MMI_SDC1(%[ftmp1], %[dst], 0x00)
+                "1:                                                        \n\t"
+                MMI_ULDC1(%[ftmp1], %[src], 0x00)
+                MMI_ULDC1(%[ftmp2], %[src], 0x01)
+                PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
+                MMI_ULDC1(%[ftmp3], %[src], 0x00)
+                MMI_ULDC1(%[ftmp4], %[src], 0x01)
+                "addi       %[h],       %[h],           -0x02              \n\t"
 
-            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
-            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
-            "bnez       %[h],       1b                                  \n\t"
-            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
-              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
-              [ftmp4]"=&f"(ftmp[4]),
-              [tmp0]"=&r"(tmp[0]),
-              RESTRICT_ASM_ALL64
-              [dst]"+&r"(dst),              [src]"+&r"(src),
-              [h]"+&r"(h)
-            : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
-              [A]"f"(A)
-            : "memory"
-        );
+                "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]           \n\t"
+                "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]           \n\t"
+                "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]           \n\t"
+                "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]           \n\t"
+                "pmullh     %[ftmp5],   %[ftmp5],       %[A]               \n\t"
+                "pmullh     %[ftmp7],   %[ftmp7],       %[B]               \n\t"
+                "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]           \n\t"
+                "pmullh     %[ftmp6],   %[ftmp6],       %[A]               \n\t"
+                "pmullh     %[ftmp8],   %[ftmp8],       %[B]               \n\t"
+                "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]           \n\t"
+
+                "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]           \n\t"
+                "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]           \n\t"
+                "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]           \n\t"
+                "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]           \n\t"
+                "pmullh     %[ftmp5],   %[ftmp5],       %[C]               \n\t"
+                "pmullh     %[ftmp7],   %[ftmp7],       %[D]               \n\t"
+                "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]           \n\t"
+                "pmullh     %[ftmp6],   %[ftmp6],       %[C]               \n\t"
+                "pmullh     %[ftmp8],   %[ftmp8],       %[D]               \n\t"
+                "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]           \n\t"
+
+                "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]           \n\t"
+                "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
+                "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
+                "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
+                "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]           \n\t"
+                "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]           \n\t"
+                "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
+                MMI_SDC1(%[ftmp1], %[dst], 0x00)
+                PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
+
+                MMI_ULDC1(%[ftmp1], %[src], 0x00)
+                MMI_ULDC1(%[ftmp2], %[src], 0x01)
+                PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
+                MMI_ULDC1(%[ftmp3], %[src], 0x00)
+                MMI_ULDC1(%[ftmp4], %[src], 0x01)
+
+                "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]           \n\t"
+                "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]           \n\t"
+                "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]           \n\t"
+                "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]           \n\t"
+                "pmullh     %[ftmp5],   %[ftmp5],       %[A]               \n\t"
+                "pmullh     %[ftmp7],   %[ftmp7],       %[B]               \n\t"
+                "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]           \n\t"
+                "pmullh     %[ftmp6],   %[ftmp6],       %[A]               \n\t"
+                "pmullh     %[ftmp8],   %[ftmp8],       %[B]               \n\t"
+                "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]           \n\t"
+
+                "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]           \n\t"
+                "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]           \n\t"
+                "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]           \n\t"
+                "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]           \n\t"
+                "pmullh     %[ftmp5],   %[ftmp5],       %[C]               \n\t"
+                "pmullh     %[ftmp7],   %[ftmp7],       %[D]               \n\t"
+                "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]           \n\t"
+                "pmullh     %[ftmp6],   %[ftmp6],       %[C]               \n\t"
+                "pmullh     %[ftmp8],   %[ftmp8],       %[D]               \n\t"
+                "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]           \n\t"
+
+                "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]           \n\t"
+                "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
+                "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
+                "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
+                "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]           \n\t"
+                "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]           \n\t"
+                "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
+                MMI_SDC1(%[ftmp1], %[dst], 0x00)
+                PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
+
+                "bnez       %[h],       1b                                 \n\t"
+                : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
+                  [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
+                  [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
+                  [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
+                  [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
+                  [tmp0]"=&r"(tmp[0]),
+                  [dst]"+&r"(dst),              [src]"+&r"(src),
+                  [h]"+&r"(h)
+                : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
+                  [A]"f"(A),                    [B]"f"(B),
+                  [C]"f"(C),                    [D]"f"(D)
+                : "memory"
+            );
+        } else {
+            if (x) {
+                /* x!=0, y==0 */
+                E = x << 3;
+                A = 64 - E;
+
+                __asm__ volatile (
+                    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
+                    "dli        %[tmp0],    0x06                               \n\t"
+                    "pshufh     %[A],       %[A],           %[ftmp0]           \n\t"
+                    "pshufh     %[E],       %[E],           %[ftmp0]           \n\t"
+                    "mtc1       %[tmp0],    %[ftmp7]                           \n\t"
+
+                    "1:                                                        \n\t"
+                    MMI_ULDC1(%[ftmp1], %[src], 0x00)
+                    MMI_ULDC1(%[ftmp2], %[src], 0x01)
+                    "addi       %[h],       %[h],           -0x01              \n\t"
+                    PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
+
+                    "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]           \n\t"
+                    "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]           \n\t"
+                    "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]           \n\t"
+                    "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]           \n\t"
+                    "pmullh     %[ftmp3],   %[ftmp3],       %[A]               \n\t"
+                    "pmullh     %[ftmp5],   %[ftmp5],       %[E]               \n\t"
+                    "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]           \n\t"
+                    "pmullh     %[ftmp4],   %[ftmp4],       %[A]               \n\t"
+                    "pmullh     %[ftmp6],   %[ftmp6],       %[E]               \n\t"
+                    "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]           \n\t"
+
+                    "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
+                    "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
+                    "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]           \n\t"
+                    "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]           \n\t"
+                    "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
+                    MMI_SDC1(%[ftmp1], %[dst], 0x00)
+                    PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
+                    "bnez       %[h],       1b                                 \n\t"
+                    : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
+                      [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
+                      [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
+                      [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
+                      [tmp0]"=&r"(tmp[0]),
+                      [dst]"+&r"(dst),              [src]"+&r"(src),
+                      [h]"+&r"(h)
+                    : [stride]"r"((mips_reg)stride),
+                      [ff_pw_32]"f"(ff_pw_32),
+                      [A]"f"(A),                    [E]"f"(E)
+                    : "memory"
+                );
+            } else {
+                /* x==0, y!=0 */
+                E = y << 3;
+                A = 64 - E;
+
+                __asm__ volatile (
+                    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
+                    "dli        %[tmp0],    0x06                               \n\t"
+                    "pshufh     %[A],       %[A],           %[ftmp0]           \n\t"
+                    "pshufh     %[E],       %[E],           %[ftmp0]           \n\t"
+                    "mtc1       %[tmp0],    %[ftmp7]                           \n\t"
+
+                    "1:                                                        \n\t"
+                    MMI_ULDC1(%[ftmp1], %[src], 0x00)
+                    PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
+                    MMI_ULDC1(%[ftmp2], %[src], 0x00)
+                    "addi       %[h],       %[h],           -0x01              \n\t"
+
+                    "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]           \n\t"
+                    "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]           \n\t"
+                    "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]           \n\t"
+                    "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]           \n\t"
+                    "pmullh     %[ftmp3],   %[ftmp3],       %[A]               \n\t"
+                    "pmullh     %[ftmp5],   %[ftmp5],       %[E]               \n\t"
+                    "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]           \n\t"
+                    "pmullh     %[ftmp4],   %[ftmp4],       %[A]               \n\t"
+                    "pmullh     %[ftmp6],   %[ftmp6],       %[E]               \n\t"
+                    "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]           \n\t"
+
+                    "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
+                    "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
+                    "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]           \n\t"
+                    "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]           \n\t"
+                    "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
+                    MMI_SDC1(%[ftmp1], %[dst], 0x00)
+
+                    PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
+                    "bnez       %[h],       1b                                 \n\t"
+                    : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
+                      [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
+                      [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
+                      [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
+                      [tmp0]"=&r"(tmp[0]),
+                      [dst]"+&r"(dst),              [src]"+&r"(src),
+                      [h]"+&r"(h)
+                    : [stride]"r"((mips_reg)stride),
+                      [ff_pw_32]"f"(ff_pw_32),
+                      [A]"f"(A),                    [E]"f"(E)
+                    : "memory"
+                );
+            }
+        }
     }
 }
 
 void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
         int h, int x, int y)
 {
-    const int A = (8 - x) * (8 - y);
-    const int B = x * (8 - y);
-    const int C = (8 - x) * y;
-    const int D = x * y;
-    const int E = B + C;
+    int A = 64, B, C, D, E;
     double ftmp[10];
     uint64_t tmp[1];
-    mips_reg addr[1];
-    DECLARE_VAR_ALL64;
 
-    if (D) {
-        __asm__ volatile (
-            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-            "dli        %[tmp0],    0x06                                \n\t"
-            "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
-            "pshufh     %[B],       %[B],           %[ftmp0]            \n\t"
-            "mtc1       %[tmp0],    %[ftmp9]                            \n\t"
-            "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
-            "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
-
-            "1:                                                         \n\t"
-            PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
-            MMI_ULDC1(%[ftmp1], %[src], 0x00)
-            MMI_ULDC1(%[ftmp2], %[src], 0x01)
-            MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
-            MMI_ULDC1(%[ftmp4], %[addr0], 0x01)
-
-            "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]            \n\t"
-            "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]            \n\t"
-            "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
-            "pmullh     %[ftmp7],   %[ftmp7],       %[B]                \n\t"
-            "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]            \n\t"
-            "pmullh     %[ftmp6],   %[ftmp6],       %[A]                \n\t"
-            "pmullh     %[ftmp8],   %[ftmp8],       %[B]                \n\t"
-            "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]            \n\t"
-
-            "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]            \n\t"
-            "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]            \n\t"
-            "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
-            "pmullh     %[ftmp7],   %[ftmp7],       %[D]                \n\t"
-            "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]            \n\t"
-            "pmullh     %[ftmp6],   %[ftmp6],       %[C]                \n\t"
-            "pmullh     %[ftmp8],   %[ftmp8],       %[D]                \n\t"
-            "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]            \n\t"
-
-            "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
-            "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
-            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
-            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]            \n\t"
-            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]            \n\t"
-            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            MMI_LDC1(%[ftmp2], %[dst], 0x00)
-            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            "addi       %[h],       %[h],           -0x01               \n\t"
-            MMI_SDC1(%[ftmp1], %[dst], 0x00)
-            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
-            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
-            "bnez       %[h],       1b                                  \n\t"
-            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
-              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
-              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
-              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
-              [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
-              [tmp0]"=&r"(tmp[0]),
-              RESTRICT_ASM_ALL64
-              [addr0]"=&r"(addr[0]),
-              [dst]"+&r"(dst),              [src]"+&r"(src),
-              [h]"+&r"(h)
-            : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
-              [A]"f"(A),                    [B]"f"(B),
-              [C]"f"(C),                    [D]"f"(D)
-            : "memory"
-        );
-    } else if (E) {
-        const int step = C ? stride : 1;
-
-        __asm__ volatile (
-            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-            "dli        %[tmp0],    0x06                                \n\t"
-            "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
-            "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
-            "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
-
-            "1:                                                         \n\t"
-            PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
-            MMI_ULDC1(%[ftmp1], %[src], 0x00)
-            MMI_ULDC1(%[ftmp2], %[addr0], 0x00)
-
-            "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t"
-            "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
-            "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
-            "pmullh     %[ftmp5],   %[ftmp5],       %[E]                \n\t"
-            "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]            \n\t"
-            "pmullh     %[ftmp4],   %[ftmp4],       %[A]                \n\t"
-            "pmullh     %[ftmp6],   %[ftmp6],       %[E]                \n\t"
-            "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]            \n\t"
-
-            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
-            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
-            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
-            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            MMI_LDC1(%[ftmp2], %[dst], 0x00)
-            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            "addi       %[h],       %[h],           -0x01               \n\t"
-            MMI_SDC1(%[ftmp1], %[dst], 0x00)
-            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
-            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
-            "bnez       %[h],       1b                                  \n\t"
-            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
-              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
-              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
-              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
-              [tmp0]"=&r"(tmp[0]),
-              RESTRICT_ASM_ALL64
-              [addr0]"=&r"(addr[0]),
-              [dst]"+&r"(dst),              [src]"+&r"(src),
-              [h]"+&r"(h)
-            : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
-              [ff_pw_32]"f"(ff_pw_32),
-              [A]"f"(A),                    [E]"f"(E)
-            : "memory"
-        );
-    } else {
+    if(!(x || y)){
+        /* x=0, y=0, A=64 */
         __asm__ volatile (
             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
             "dli        %[tmp0],    0x06                                \n\t"
@@ -349,6 +341,10 @@
 
             "1:                                                         \n\t"
             MMI_ULDC1(%[ftmp1], %[src], 0x00)
+            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
+            MMI_ULDC1(%[ftmp5], %[src], 0x00)
+            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
+
             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
@@ -360,13 +356,11 @@
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             MMI_LDC1(%[ftmp2], %[dst], 0x00)
             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             MMI_SDC1(%[ftmp1], %[dst], 0x00)
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
 
-            MMI_ULDC1(%[ftmp1], %[src], 0x00)
-            "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
+            "punpcklbh  %[ftmp2],   %[ftmp5],       %[ftmp0]            \n\t"
+            "punpckhbh  %[ftmp3],   %[ftmp5],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
             "pmullh     %[ftmp2],   %[ftmp3],       %[A]                \n\t"
             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
@@ -376,23 +370,195 @@
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             MMI_LDC1(%[ftmp2], %[dst], 0x00)
             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
+            MMI_SDC1(%[ftmp1], %[dst], 0x00)
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
+
             "addi       %[h],       %[h],           -0x02               \n\t"
-            MMI_SDC1(%[ftmp1], %[dst], 0x00)
-
-            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
-            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             "bnez       %[h],       1b                                  \n\t"
             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
-              [ftmp4]"=&f"(ftmp[4]),
+              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
               [tmp0]"=&r"(tmp[0]),
-              RESTRICT_ASM_ALL64
               [dst]"+&r"(dst),              [src]"+&r"(src),
               [h]"+&r"(h)
             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
               [A]"f"(A)
             : "memory"
         );
+    } else {
+        if(x && y) {
+            /* x!=0, y!=0 */
+            D = x * y;
+            B = (x << 3) - D;
+            C = (y << 3) - D;
+            A = 64 - D - B - C;
+            __asm__ volatile (
+                "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]       \n\t"
+                "dli        %[tmp0],    0x06                           \n\t"
+                "pshufh     %[A],       %[A],           %[ftmp0]       \n\t"
+                "pshufh     %[B],       %[B],           %[ftmp0]       \n\t"
+                "mtc1       %[tmp0],    %[ftmp9]                       \n\t"
+                "pshufh     %[C],       %[C],           %[ftmp0]       \n\t"
+                "pshufh     %[D],       %[D],           %[ftmp0]       \n\t"
+
+                "1:                                                    \n\t"
+                MMI_ULDC1(%[ftmp1], %[src], 0x00)
+                MMI_ULDC1(%[ftmp2], %[src], 0x01)
+                PTR_ADDU   "%[src],     %[src],         %[stride]      \n\t"
+                MMI_ULDC1(%[ftmp3], %[src], 0x00)
+                MMI_ULDC1(%[ftmp4], %[src], 0x01)
+                "addi       %[h],       %[h],           -0x01          \n\t"
+
+                "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]       \n\t"
+                "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]       \n\t"
+                "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]       \n\t"
+                "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]       \n\t"
+                "pmullh     %[ftmp5],   %[ftmp5],       %[A]           \n\t"
+                "pmullh     %[ftmp7],   %[ftmp7],       %[B]           \n\t"
+                "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]       \n\t"
+                "pmullh     %[ftmp6],   %[ftmp6],       %[A]           \n\t"
+                "pmullh     %[ftmp8],   %[ftmp8],       %[B]           \n\t"
+                "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]       \n\t"
+
+                "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]       \n\t"
+                "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]       \n\t"
+                "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]       \n\t"
+                "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]       \n\t"
+                "pmullh     %[ftmp5],   %[ftmp5],       %[C]           \n\t"
+                "pmullh     %[ftmp7],   %[ftmp7],       %[D]           \n\t"
+                "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]       \n\t"
+                "pmullh     %[ftmp6],   %[ftmp6],       %[C]           \n\t"
+                "pmullh     %[ftmp8],   %[ftmp8],       %[D]           \n\t"
+                "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]       \n\t"
+
+                "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]       \n\t"
+                "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]       \n\t"
+                "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]    \n\t"
+                "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]    \n\t"
+                "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]       \n\t"
+                "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]       \n\t"
+                "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
+                MMI_LDC1(%[ftmp2], %[dst], 0x00)
+                "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
+                MMI_SDC1(%[ftmp1], %[dst], 0x00)
+                PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
+                "bnez       %[h],       1b                             \n\t"
+                : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
+                  [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
+                  [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
+                  [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
+                  [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
+                  [tmp0]"=&r"(tmp[0]),
+                  [dst]"+&r"(dst),              [src]"+&r"(src),
+                  [h]"+&r"(h)
+                : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
+                  [A]"f"(A),                    [B]"f"(B),
+                  [C]"f"(C),                    [D]"f"(D)
+                : "memory"
+            );
+        } else {
+            if(x) {
+                /* x!=0, y==0 */
+                E = x << 3;
+                A = 64 - E;
+                __asm__ volatile (
+                    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]       \n\t"
+                    "dli        %[tmp0],    0x06                           \n\t"
+                    "pshufh     %[A],       %[A],           %[ftmp0]       \n\t"
+                    "pshufh     %[E],       %[E],           %[ftmp0]       \n\t"
+                    "mtc1       %[tmp0],    %[ftmp7]                       \n\t"
+
+                    "1:                                                    \n\t"
+                    MMI_ULDC1(%[ftmp1], %[src], 0x00)
+                    MMI_ULDC1(%[ftmp2], %[src], 0x01)
+                    PTR_ADDU   "%[src],     %[src],         %[stride]      \n\t"
+                    "addi       %[h],       %[h],           -0x01          \n\t"
+
+                    "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]       \n\t"
+                    "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]       \n\t"
+                    "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]       \n\t"
+                    "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]       \n\t"
+                    "pmullh     %[ftmp3],   %[ftmp3],       %[A]           \n\t"
+                    "pmullh     %[ftmp5],   %[ftmp5],       %[E]           \n\t"
+                    "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]       \n\t"
+                    "pmullh     %[ftmp4],   %[ftmp4],       %[A]           \n\t"
+                    "pmullh     %[ftmp6],   %[ftmp6],       %[E]           \n\t"
+                    "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]       \n\t"
+
+                    "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]    \n\t"
+                    "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]    \n\t"
+                    "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]       \n\t"
+                    "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]       \n\t"
+                    "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
+                    MMI_LDC1(%[ftmp2], %[dst], 0x00)
+                    "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
+                    MMI_SDC1(%[ftmp1], %[dst], 0x00)
+                    PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
+                    "bnez       %[h],       1b                             \n\t"
+                    : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
+                      [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
+                      [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
+                      [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
+                      [tmp0]"=&r"(tmp[0]),
+                      [dst]"+&r"(dst),              [src]"+&r"(src),
+                      [h]"+&r"(h)
+                    : [stride]"r"((mips_reg)stride),
+                      [ff_pw_32]"f"(ff_pw_32),
+                      [A]"f"(A),                    [E]"f"(E)
+                    : "memory"
+                );
+            } else {
+                /* x==0, y!=0 */
+                E = y << 3;
+                A = 64 - E;
+                __asm__ volatile (
+                    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]       \n\t"
+                    "dli        %[tmp0],    0x06                           \n\t"
+                    "pshufh     %[A],       %[A],           %[ftmp0]       \n\t"
+                    "pshufh     %[E],       %[E],           %[ftmp0]       \n\t"
+                    "mtc1       %[tmp0],    %[ftmp7]                       \n\t"
+
+                    "1:                                                    \n\t"
+                    MMI_ULDC1(%[ftmp1], %[src], 0x00)
+                    PTR_ADDU   "%[src],     %[src],         %[stride]      \n\t"
+                    MMI_ULDC1(%[ftmp2], %[src], 0x00)
+                    "addi       %[h],       %[h],           -0x01          \n\t"
+
+                    "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]       \n\t"
+                    "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]       \n\t"
+                    "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]       \n\t"
+                    "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]       \n\t"
+                    "pmullh     %[ftmp3],   %[ftmp3],       %[A]           \n\t"
+                    "pmullh     %[ftmp5],   %[ftmp5],       %[E]           \n\t"
+                    "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]       \n\t"
+                    "pmullh     %[ftmp4],   %[ftmp4],       %[A]           \n\t"
+                    "pmullh     %[ftmp6],   %[ftmp6],       %[E]           \n\t"
+                    "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]       \n\t"
+
+                    "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]    \n\t"
+                    "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]    \n\t"
+                    "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]       \n\t"
+                    "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]       \n\t"
+                    "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
+                    MMI_LDC1(%[ftmp2], %[dst], 0x00)
+                    "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
+                    MMI_SDC1(%[ftmp1], %[dst], 0x00)
+                    PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
+                    "bnez       %[h],       1b                             \n\t"
+                    : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
+                      [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
+                      [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
+                      [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
+                      [tmp0]"=&r"(tmp[0]),
+                      [dst]"+&r"(dst),              [src]"+&r"(src),
+                      [h]"+&r"(h)
+                    : [stride]"r"((mips_reg)stride),
+                      [ff_pw_32]"f"(ff_pw_32),
+                      [A]"f"(A),                    [E]"f"(E)
+                    : "memory"
+                );
+            }
+        }
     }
 }
 

diff --git a/libavcodec/mips/h264chroma_msa.c b/libavcodec/mips/h264chroma_msa.c
index b8fcf6d..4c25761 100644
--- a/libavcodec/mips/h264chroma_msa.c
+++ b/libavcodec/mips/h264chroma_msa.c

@@ -839,12 +839,11 @@
     }
 }
 
-static void avc_chroma_hz_and_aver_dst_2x2_msa(uint8_t *src, int32_t src_stride,
-                                               uint8_t *dst, int32_t dst_stride,
-                                               uint32_t coeff0, uint32_t coeff1)
+static void avc_chroma_hz_and_aver_dst_2x2_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride, uint32_t coeff0,
+                                               uint32_t coeff1)
 {
     uint16_t out0, out1;
-    uint32_t load0, load1;
     v16i8 src0, src1;
     v16u8 dst_data = { 0 };
     v8u16 res_r;
@@ -856,12 +855,13 @@
 
     mask = LD_SB(&chroma_mask_arr[0]);
 
-    LD_SB2(src, src_stride, src0, src1);
+    LD_SB2(src, stride, src0, src1);
 
-    load0 = LW(dst);
-    load1 = LW(dst + dst_stride);
+    out0 = LH(dst);
+    out1 = LH(dst + stride);
 
-    INSERT_W2_UB(load0, load1, dst_data);
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 0, out0);
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 2, out1);
 
     src0 = __msa_vshf_b(mask, src1, src0);
 
@@ -877,30 +877,34 @@
     out1 = __msa_copy_u_h((v8i16) dst_data, 2);
 
     SH(out0, dst);
-    dst += dst_stride;
+    dst += stride;
     SH(out1, dst);
 }
 
-static void avc_chroma_hz_and_aver_dst_2x4_msa(uint8_t *src, int32_t src_stride,
-                                               uint8_t *dst, int32_t dst_stride,
-                                               uint32_t coeff0, uint32_t coeff1)
+static void avc_chroma_hz_and_aver_dst_2x4_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride, uint32_t coeff0,
+                                               uint32_t coeff1)
 {
+    uint16_t tp0, tp1, tp2, tp3;
     v16u8 src0, src1, src2, src3;
-    v16u8 dst0, dst1, dst2, dst3;
+    v16u8 dst0, dst_data = { 0 };
     v8u16 res_r;
-    v16i8 res, mask;
+    v16i8 mask;
     v16i8 coeff_vec0 = __msa_fill_b(coeff0);
     v16i8 coeff_vec1 = __msa_fill_b(coeff1);
     v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
 
     mask = LD_SB(&chroma_mask_arr[64]);
 
-    LD_UB4(src, src_stride, src0, src1, src2, src3);
-    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-
-    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 1, (v8i16) dst1);
-    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 2, (v8i16) dst2);
-    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 3, (v8i16) dst3);
+    LD_UB4(src, stride, src0, src1, src2, src3);
+    tp0 = LH(dst);
+    tp1 = LH(dst + stride);
+    tp2 = LH(dst + 2 * stride);
+    tp3 = LH(dst + 3 * stride);
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 0, tp0);
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 1, tp1);
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 2, tp2);
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 3, tp3);
 
     VSHF_B2_UB(src0, src1, src2, src3, mask, mask, src0, src2);
 
@@ -911,75 +915,26 @@
     res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
     res_r = __msa_sat_u_h(res_r, 7);
 
-    res = __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
-    dst0 = __msa_aver_u_b((v16u8) res, dst0);
+    dst0 = (v16u8) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+    dst0 = __msa_aver_u_b(dst0, dst_data);
 
-    ST2x4_UB(dst0, 0, dst, dst_stride);
+    ST2x4_UB(dst0, 0, dst, stride);
 }
 
-static void avc_chroma_hz_and_aver_dst_2x8_msa(uint8_t *src, int32_t src_stride,
-                                               uint8_t *dst, int32_t dst_stride,
-                                               uint32_t coeff0, uint32_t coeff1)
-{
-    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
-    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
-    v8u16 res0_r, res1_r;
-    v16u8 res0, res1, mask;
-    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
-    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
-    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
-
-    mask = LD_UB(&chroma_mask_arr[64]);
-
-    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
-    LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
-
-    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 1, (v8i16) dst1);
-    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 2, (v8i16) dst2);
-    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 3, (v8i16) dst3);
-
-    dst4 = (v16u8) __msa_insve_h((v8i16) dst4, 1, (v8i16) dst5);
-    dst4 = (v16u8) __msa_insve_h((v8i16) dst4, 2, (v8i16) dst6);
-    dst4 = (v16u8) __msa_insve_h((v8i16) dst4, 3, (v8i16) dst7);
-
-    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, src0, src2);
-    VSHF_B2_UB(src4, src5, src6, src7, mask, mask, src4, src6);
-    ILVR_D2_UB(src2, src0, src6, src4, src0, src4);
-    DOTP_UB2_UH(src0, src4, coeff_vec, coeff_vec, res0_r, res1_r);
-
-    res0_r <<= 3;
-    res1_r <<= 3;
-
-    SRARI_H2_UH(res0_r, res1_r, 6);
-    SAT_UH2_UH(res0_r, res1_r, 7);
-    PCKEV_B2_UB(res0_r, res0_r, res1_r, res1_r, res0, res1);
-    AVER_UB2_UB(res0, dst0, res1, dst4, dst0, dst4);
-
-    ST2x4_UB(dst0, 0, dst, dst_stride);
-    dst += (4 * dst_stride);
-    ST2x4_UB(dst4, 0, dst, dst_stride);
-}
-
-static void avc_chroma_hz_and_aver_dst_2w_msa(uint8_t *src, int32_t src_stride,
-                                              uint8_t *dst, int32_t dst_stride,
-                                              uint32_t coeff0, uint32_t coeff1,
-                                              int32_t height)
+static void avc_chroma_hz_and_aver_dst_2w_msa(uint8_t *src, uint8_t *dst,
+                                              int32_t stride, uint32_t coeff0,
+                                              uint32_t coeff1, int32_t height)
 {
     if (2 == height) {
-        avc_chroma_hz_and_aver_dst_2x2_msa(src, src_stride, dst, dst_stride,
-                                           coeff0, coeff1);
+        avc_chroma_hz_and_aver_dst_2x2_msa(src, dst, stride, coeff0, coeff1);
     } else if (4 == height) {
-        avc_chroma_hz_and_aver_dst_2x4_msa(src, src_stride, dst, dst_stride,
-                                           coeff0, coeff1);
-    } else if (8 == height) {
-        avc_chroma_hz_and_aver_dst_2x8_msa(src, src_stride, dst, dst_stride,
-                                           coeff0, coeff1);
+        avc_chroma_hz_and_aver_dst_2x4_msa(src, dst, stride, coeff0, coeff1);
     }
 }
 
-static void avc_chroma_hz_and_aver_dst_4x2_msa(uint8_t *src, int32_t src_stride,
-                                               uint8_t *dst, int32_t dst_stride,
-                                               uint32_t coeff0, uint32_t coeff1)
+static void avc_chroma_hz_and_aver_dst_4x2_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride, uint32_t coeff0,
+                                               uint32_t coeff1)
 {
     uint32_t load0, load1;
     v16i8 src0, src1;
@@ -992,10 +947,9 @@
 
     mask = LD_SB(&chroma_mask_arr[0]);
 
-    LD_SB2(src, src_stride, src0, src1);
+    LD_SB2(src, stride, src0, src1);
 
-    load0 = LW(dst);
-    load1 = LW(dst + dst_stride);
+    LW2(dst, stride, load0, load1);
 
     INSERT_W2_UB(load0, load1, dst_data);
 
@@ -1008,84 +962,122 @@
     res = __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
     dst_data = __msa_aver_u_b((v16u8) res, dst_data);
 
-    ST4x2_UB(dst_data, dst, dst_stride);
+    ST4x2_UB(dst_data, dst, stride);
 }
 
-static void avc_chroma_hz_and_aver_dst_4x4multiple_msa(uint8_t *src,
-                                                       int32_t src_stride,
-                                                       uint8_t *dst,
-                                                       int32_t dst_stride,
-                                                       uint32_t coeff0,
-                                                       uint32_t coeff1,
-                                                       int32_t height)
+static void avc_chroma_hz_and_aver_dst_4x4_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride, uint32_t coeff0,
+                                               uint32_t coeff1)
 {
-    uint32_t load0, load1;
-    uint32_t row;
+    uint32_t tp0, tp1, tp2, tp3;
     v16u8 src0, src1, src2, src3;
-    v16u8 dst0 = { 0 };
-    v16u8 dst1 = { 0 };
+    v16u8 out, dst_data = { 0 };
+    v16i8 mask;
     v8u16 res0_r, res1_r;
-    v16u8 res0, res1, mask;
     v16i8 coeff_vec0 = __msa_fill_b(coeff0);
     v16i8 coeff_vec1 = __msa_fill_b(coeff1);
     v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
 
-    mask = LD_UB(&chroma_mask_arr[0]);
+    mask = LD_SB(&chroma_mask_arr[0]);
 
-    for (row = (height >> 2); row--;) {
-        LD_UB4(src, src_stride, src0, src1, src2, src3);
-        src += (4 * src_stride);
-
-        load0 = LW(dst);
-        load1 = LW(dst + dst_stride);
-
-        INSERT_W2_UB(load0, load1, dst0);
-
-        load0 = LW(dst + 2 * dst_stride);
-        load1 = LW(dst + 3 * dst_stride);
-
-        INSERT_W2_UB(load0, load1, dst1);
-
-        VSHF_B2_UB(src0, src1, src2, src3, mask, mask, src0, src2);
-        DOTP_UB2_UH(src0, src2, coeff_vec, coeff_vec, res0_r, res1_r);
-
-        res0_r <<= 3;
-        res1_r <<= 3;
-
-        SRARI_H2_UH(res0_r, res1_r, 6);
-        SAT_UH2_UH(res0_r, res1_r, 7);
-        PCKEV_B2_UB(res0_r, res0_r, res1_r, res1_r, res0, res1);
-        AVER_UB2_UB(res0, dst0, res1, dst1, dst0, dst1);
-
-        ST4x4_UB(dst0, dst1, 0, 1, 0, 1, dst, dst_stride);
-        dst += (4 * dst_stride);
-    }
+    LD_UB4(src, stride, src0, src1, src2, src3);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst_data);
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, src0, src2);
+    DOTP_UB2_UH(src0, src2, coeff_vec, coeff_vec, res0_r, res1_r);
+    res0_r <<= 3;
+    res1_r <<= 3;
+    SRARI_H2_UH(res0_r, res1_r, 6);
+    SAT_UH2_UH(res0_r, res1_r, 7);
+    out = (v16u8) __msa_pckev_b((v16i8) res1_r, (v16i8) res0_r);
+    out = __msa_aver_u_b(out, dst_data);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
 }
 
-static void avc_chroma_hz_and_aver_dst_4w_msa(uint8_t *src, int32_t src_stride,
-                                              uint8_t *dst, int32_t dst_stride,
-                                              uint32_t coeff0, uint32_t coeff1,
-                                              int32_t height)
+static void avc_chroma_hz_and_aver_dst_4x8_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride, uint32_t coeff0,
+                                               uint32_t coeff1)
+{
+    uint32_t tp0, tp1, tp2, tp3;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, out0, out1;
+    v16u8 dst0 = { 0 }, dst1 = { 0 };
+    v16i8 mask;
+    v8u16 res0, res1, res2, res3;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    mask = LD_SB(&chroma_mask_arr[0]);
+
+    LD_UB8(src, stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+    LW4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst1);
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, src0, src2);
+    VSHF_B2_UB(src4, src5, src6, src7, mask, mask, src4, src6);
+    DOTP_UB2_UH(src0, src2, coeff_vec, coeff_vec, res0, res1);
+    DOTP_UB2_UH(src4, src6, coeff_vec, coeff_vec, res2, res3);
+    SLLI_4V(res0, res1, res2, res3, 3);
+    SRARI_H4_UH(res0, res1, res2, res3, 6);
+    SAT_UH4_UH(res0, res1, res2, res3, 7);
+    PCKEV_B2_UB(res1, res0, res3, res2, out0, out1);
+    AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1);
+    ST4x8_UB(out0, out1, dst, stride);
+}
+
+static void avc_chroma_hz_and_aver_dst_4w_msa(uint8_t *src, uint8_t *dst,
+                                              int32_t stride, uint32_t coeff0,
+                                              uint32_t coeff1, int32_t height)
 {
     if (2 == height) {
-        avc_chroma_hz_and_aver_dst_4x2_msa(src, src_stride, dst, dst_stride,
-                                           coeff0, coeff1);
-    } else {
-        avc_chroma_hz_and_aver_dst_4x4multiple_msa(src, src_stride,
-                                                   dst, dst_stride,
-                                                   coeff0, coeff1, height);
+        avc_chroma_hz_and_aver_dst_4x2_msa(src, dst, stride, coeff0, coeff1);
+    } else if (4 == height) {
+        avc_chroma_hz_and_aver_dst_4x4_msa(src, dst, stride, coeff0, coeff1);
+    } else if (8 == height) {
+        avc_chroma_hz_and_aver_dst_4x8_msa(src, dst, stride, coeff0, coeff1);
     }
 }
 
-static void avc_chroma_hz_and_aver_dst_8w_msa(uint8_t *src, int32_t src_stride,
-                                              uint8_t *dst, int32_t dst_stride,
-                                              uint32_t coeff0, uint32_t coeff1,
-                                              int32_t height)
+static void avc_chroma_hz_and_aver_dst_8x4_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride, uint32_t coeff0,
+                                               uint32_t coeff1)
 {
-    uint32_t row;
+    uint64_t tp0, tp1, tp2, tp3;
     v16u8 src0, src1, src2, src3, out0, out1;
+    v16u8 dst0 = { 0 }, dst1 = { 0 };
     v8u16 res0, res1, res2, res3;
-    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 mask;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    mask = LD_SB(&chroma_mask_arr[32]);
+    LD_UB4(src, stride, src0, src1, src2, src3);
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+    VSHF_B2_UB(src0, src0, src1, src1, mask, mask, src0, src1);
+    VSHF_B2_UB(src2, src2, src3, src3, mask, mask, src2, src3);
+    DOTP_UB4_UH(src0, src1, src2, src3, coeff_vec, coeff_vec, coeff_vec,
+                coeff_vec, res0, res1, res2, res3);
+    SLLI_4V(res0, res1, res2, res3, 3);
+    SRARI_H4_UH(res0, res1, res2, res3, 6);
+    SAT_UH4_UH(res0, res1, res2, res3, 7);
+    PCKEV_B2_UB(res1, res0, res3, res2, out0, out1);
+    AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
+    ST8x4_UB(dst0, dst1, dst, stride);
+}
+
+static void avc_chroma_hz_and_aver_dst_8x8_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride, uint32_t coeff0,
+                                               uint32_t coeff1)
+{
+    uint64_t tp0, tp1, tp2, tp3;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16u8 out0, out1, out2, out3;
+    v16u8 dst0 = { 0 }, dst1 = { 0 }, dst2 = { 0 }, dst3 = { 0 };
+    v8u16 res0, res1, res2, res3, res4, res5, res6, res7;
     v16i8 mask;
     v16i8 coeff_vec0 = __msa_fill_b(coeff0);
     v16i8 coeff_vec1 = __msa_fill_b(coeff1);
@@ -1093,43 +1085,64 @@
 
     mask = LD_SB(&chroma_mask_arr[32]);
 
-    for (row = height >> 2; row--;) {
-        LD_UB4(src, src_stride, src0, src1, src2, src3);
-        src += (4 * src_stride);
-        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-        VSHF_B2_UB(src0, src0, src1, src1, mask, mask, src0, src1);
-        VSHF_B2_UB(src2, src2, src3, src3, mask, mask, src2, src3);
-        DOTP_UB4_UH(src0, src1, src2, src3, coeff_vec, coeff_vec, coeff_vec,
-                    coeff_vec, res0, res1, res2, res3);
-        SLLI_4V(res0, res1, res2, res3, 3);
-        SRARI_H4_UH(res0, res1, res2, res3, 6);
-        SAT_UH4_UH(res0, res1, res2, res3, 7);
-        PCKEV_B2_UB(res1, res0, res3, res2, out0, out1);
-        PCKEV_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
-        AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1);
-        ST8x4_UB(out0, out1, dst, dst_stride);
-        dst += (4 * dst_stride);
+    LD_UB8(src, stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+    LD4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst2);
+    INSERT_D2_UB(tp2, tp3, dst3);
+    VSHF_B2_UB(src0, src0, src1, src1, mask, mask, src0, src1);
+    VSHF_B2_UB(src2, src2, src3, src3, mask, mask, src2, src3);
+    VSHF_B2_UB(src4, src4, src5, src5, mask, mask, src4, src5);
+    VSHF_B2_UB(src6, src6, src7, src7, mask, mask, src6, src7);
+    DOTP_UB4_UH(src0, src1, src2, src3, coeff_vec, coeff_vec, coeff_vec,
+                coeff_vec, res0, res1, res2, res3);
+    DOTP_UB4_UH(src4, src5, src6, src7, coeff_vec, coeff_vec, coeff_vec,
+                coeff_vec, res4, res5, res6, res7);
+    SLLI_4V(res0, res1, res2, res3, 3);
+    SLLI_4V(res4, res5, res6, res7, 3);
+    SRARI_H4_UH(res0, res1, res2, res3, 6);
+    SRARI_H4_UH(res4, res5, res6, res7, 6);
+    SAT_UH4_UH(res0, res1, res2, res3, 7);
+    SAT_UH4_UH(res4, res5, res6, res7, 7);
+    PCKEV_B2_UB(res1, res0, res3, res2, out0, out1);
+    PCKEV_B2_UB(res5, res4, res7, res6, out2, out3);
+    AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1);
+    AVER_UB2_UB(out2, dst2, out3, dst3, out2, out3);
+    ST8x8_UB(out0, out1, out2, out3, dst, stride);
+}
+
+static void avc_chroma_hz_and_aver_dst_8w_msa(uint8_t *src, uint8_t *dst,
+                                              int32_t stride, uint32_t coeff0,
+                                              uint32_t coeff1, int32_t height)
+{
+    if (4 == height) {
+        avc_chroma_hz_and_aver_dst_8x4_msa(src, dst, stride, coeff0, coeff1);
+    } else if (8 == height) {
+        avc_chroma_hz_and_aver_dst_8x8_msa(src, dst, stride, coeff0, coeff1);
     }
 }
 
-static void avc_chroma_vt_and_aver_dst_2x2_msa(uint8_t *src, int32_t src_stride,
-                                               uint8_t *dst, int32_t dst_stride,
-                                               uint32_t coeff0, uint32_t coeff1)
+static void avc_chroma_vt_and_aver_dst_2x2_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride, uint32_t coeff0,
+                                               uint32_t coeff1)
 {
     uint16_t out0, out1;
-    uint32_t load0, load1;
     v16i8 src0, src1, src2, tmp0, tmp1, res;
     v16u8 dst_data = { 0 };
+    v8i16 out;
     v8u16 res_r;
     v16i8 coeff_vec0 = __msa_fill_b(coeff0);
     v16i8 coeff_vec1 = __msa_fill_b(coeff1);
     v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
 
-    LD_SB3(src, src_stride, src0, src1, src2);
-    load0 = LW(dst);
-    load1 = LW(dst + dst_stride);
+    LD_SB3(src, stride, src0, src1, src2);
+    out0 = LH(dst);
+    out1 = LH(dst + stride);
 
-    INSERT_W2_UB(load0, load1, dst_data);
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 0, out0);
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 2, out1);
 
     ILVR_B2_SB(src1, src0, src2, src1, tmp0, tmp1);
 
@@ -1139,20 +1152,20 @@
     res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
     res_r = __msa_sat_u_h(res_r, 7);
     res = __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
-    dst_data = __msa_aver_u_b((v16u8) res, dst_data);
-    out0 = __msa_copy_u_h((v8i16) dst_data, 0);
-    out1 = __msa_copy_u_h((v8i16) dst_data, 2);
+    out = (v8i16) __msa_aver_u_b((v16u8) res, dst_data);
+    out0 = __msa_copy_u_h(out, 0);
+    out1 = __msa_copy_u_h(out, 2);
 
     SH(out0, dst);
-    dst += dst_stride;
+    dst += stride;
     SH(out1, dst);
 }
 
-static void avc_chroma_vt_and_aver_dst_2x4_msa(uint8_t *src, int32_t src_stride,
-                                               uint8_t *dst, int32_t dst_stride,
-                                               uint32_t coeff0, uint32_t coeff1)
+static void avc_chroma_vt_and_aver_dst_2x4_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride, uint32_t coeff0,
+                                               uint32_t coeff1)
 {
-    uint32_t load0, load1;
+    uint16_t tp0, tp1, tp2, tp3;
     v16i8 src0, src1, src2, src3, src4;
     v16u8 tmp0, tmp1, tmp2, tmp3;
     v8u16 res_r;
@@ -1162,19 +1175,16 @@
     v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
     v16u8 dst_data = { 0 };
 
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
 
-    load0 = LW(dst);
-    load1 = LW(dst + dst_stride);
-
-    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 0, load0);
-    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 1, load1);
-
-    load0 = LW(dst + 2 * dst_stride);
-    load1 = LW(dst + 3 * dst_stride);
-
-    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 2, load0);
-    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 3, load1);
+    tp0 = LH(dst);
+    tp1 = LH(dst + stride);
+    tp2 = LH(dst + 2 * stride);
+    tp3 = LH(dst + 3 * stride);
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 0, tp0);
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 1, tp1);
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 2, tp2);
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 3, tp3);
 
     ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
                tmp0, tmp1, tmp2, tmp3);
@@ -1190,102 +1200,26 @@
     res = (v8i16) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
     res = (v8i16) __msa_aver_u_b((v16u8) res, dst_data);
 
-    ST2x4_UB(res, 0, dst, dst_stride);
-    dst += (4 * dst_stride);
+    ST2x4_UB(res, 0, dst, stride);
 }
 
-static void avc_chroma_vt_and_aver_dst_2x8_msa(uint8_t *src, int32_t src_stride,
-                                               uint8_t *dst, int32_t dst_stride,
-                                               uint32_t coeff0, uint32_t coeff1)
-{
-    uint32_t load0, load1, load2, load3;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
-    v16u8 tmp0, tmp1, tmp2, tmp3;
-    v8i16 res;
-    v8u16 res_r;
-    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
-    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
-    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
-    v16u8 dst_data0 = { 0 };
-    v16u8 dst_data1 = { 0 };
-
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-    LD_SB4(src, src_stride, src5, src6, src7, src8);
-
-    LW4(dst, dst_stride, load0, load1, load2, load3);
-
-    dst_data0 = (v16u8) __msa_insert_h((v8i16) dst_data0, 0, load0);
-    dst_data0 = (v16u8) __msa_insert_h((v8i16) dst_data0, 1, load1);
-    dst_data0 = (v16u8) __msa_insert_h((v8i16) dst_data0, 2, load2);
-    dst_data0 = (v16u8) __msa_insert_h((v8i16) dst_data0, 3, load3);
-
-    LW4(dst + 4 * dst_stride, dst_stride, load0, load1, load2, load3);
-
-    dst_data1 = (v16u8) __msa_insert_h((v8i16) dst_data1, 0, load0);
-    dst_data1 = (v16u8) __msa_insert_h((v8i16) dst_data1, 1, load1);
-    dst_data1 = (v16u8) __msa_insert_h((v8i16) dst_data1, 2, load2);
-    dst_data1 = (v16u8) __msa_insert_h((v8i16) dst_data1, 3, load3);
-
-    ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
-               tmp0, tmp1, tmp2, tmp3);
-
-    ILVR_W2_UB(tmp1, tmp0, tmp3, tmp2, tmp0, tmp2);
-
-    tmp0 = (v16u8) __msa_ilvr_d((v2i64) tmp2, (v2i64) tmp0);
-
-    res_r = __msa_dotp_u_h(tmp0, coeff_vec);
-    res_r <<= 3;
-    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
-    res_r = __msa_sat_u_h(res_r, 7);
-
-    res = (v8i16) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
-    res = (v8i16) __msa_aver_u_b((v16u8) res, dst_data0);
-
-    ST2x4_UB(res, 0, dst, dst_stride);
-    dst += (4 * dst_stride);
-
-    ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7,
-               tmp0, tmp1, tmp2, tmp3);
-
-    ILVR_W2_UB(tmp1, tmp0, tmp3, tmp2, tmp0, tmp2);
-
-    tmp0 = (v16u8) __msa_ilvr_d((v2i64) tmp2, (v2i64) tmp0);
-
-    res_r = __msa_dotp_u_h(tmp0, coeff_vec);
-    res_r <<= 3;
-    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
-    res_r = __msa_sat_u_h(res_r, 7);
-
-    res = (v8i16) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
-    res = (v8i16) __msa_aver_u_b((v16u8) res, dst_data1);
-
-    ST2x4_UB(res, 0, dst, dst_stride);
-}
-
-static void avc_chroma_vt_and_aver_dst_2w_msa(uint8_t *src, int32_t src_stride,
-                                              uint8_t *dst, int32_t dst_stride,
-                                              uint32_t coeff0, uint32_t coeff1,
-                                              int32_t height)
+static void avc_chroma_vt_and_aver_dst_2w_msa(uint8_t *src, uint8_t *dst,
+                                              int32_t stride, uint32_t coeff0,
+                                              uint32_t coeff1, int32_t height)
 {
     if (2 == height) {
-        avc_chroma_vt_and_aver_dst_2x2_msa(src, src_stride, dst, dst_stride,
-                                           coeff0, coeff1);
+        avc_chroma_vt_and_aver_dst_2x2_msa(src, dst, stride, coeff0, coeff1);
     } else if (4 == height) {
-        avc_chroma_vt_and_aver_dst_2x4_msa(src, src_stride, dst, dst_stride,
-                                           coeff0, coeff1);
-    } else if (8 == height) {
-        avc_chroma_vt_and_aver_dst_2x8_msa(src, src_stride, dst, dst_stride,
-                                           coeff0, coeff1);
+        avc_chroma_vt_and_aver_dst_2x4_msa(src, dst, stride, coeff0, coeff1);
     }
 }
 
-static void avc_chroma_vt_and_aver_dst_4x2_msa(uint8_t *src, int32_t src_stride,
-                                               uint8_t *dst, int32_t dst_stride,
-                                               uint32_t coeff0, uint32_t coeff1)
+static void avc_chroma_vt_and_aver_dst_4x2_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride, uint32_t coeff0,
+                                               uint32_t coeff1)
 {
     uint32_t load0, load1;
-    v16i8 src0, src1, src2, tmp0, tmp1;
+    v16u8 src0, src1, src2, tmp0, tmp1;
     v16u8 dst_data = { 0 };
     v8u16 res_r;
     v16u8 res;
@@ -1293,140 +1227,196 @@
     v16i8 coeff_vec1 = __msa_fill_b(coeff1);
     v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
 
-    LD_SB3(src, src_stride, src0, src1, src2);
+    LD_UB3(src, stride, src0, src1, src2);
 
-    load0 = LW(dst);
-    load1 = LW(dst + dst_stride);
+    LW2(dst, stride, load0, load1);
 
     INSERT_W2_UB(load0, load1, dst_data);
-    ILVR_B2_SB(src1, src0, src2, src1, tmp0, tmp1);
+    ILVR_B2_UB(src1, src0, src2, src1, tmp0, tmp1);
 
-    tmp0 = (v16i8) __msa_ilvr_d((v2i64) tmp1, (v2i64) tmp0);
+    tmp0 = (v16u8) __msa_ilvr_d((v2i64) tmp1, (v2i64) tmp0);
 
-    res_r = __msa_dotp_u_h((v16u8) tmp0, coeff_vec);
+    res_r = __msa_dotp_u_h(tmp0, coeff_vec);
     res_r <<= 3;
     res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
     res_r = __msa_sat_u_h(res_r, 7);
     res = (v16u8) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
     res = __msa_aver_u_b(res, dst_data);
 
-    ST4x2_UB(res, dst, dst_stride);
+    ST4x2_UB(res, dst, stride);
 }
 
-static void avc_chroma_vt_and_aver_dst_4x4mul_msa(uint8_t *src,
-                                                  int32_t src_stride,
-                                                  uint8_t *dst,
-                                                  int32_t dst_stride,
-                                                  uint32_t coeff0,
-                                                  uint32_t coeff1,
-                                                  int32_t height)
+static void avc_chroma_vt_and_aver_dst_4x4_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride, uint32_t coeff0,
+                                               uint32_t coeff1)
 {
-    uint32_t load0, load1, row;
-    v16i8 src0, src1, src2, src3, src4;
+    uint32_t tp0, tp1, tp2, tp3;
+    v16u8 src0, src1, src2, src3, src4;
     v16u8 tmp0, tmp1, tmp2, tmp3;
     v16u8 dst0 = { 0 };
-    v16u8 dst1 = { 0 };
     v8u16 res0_r, res1_r;
-    v16u8 res0, res1;
+    v16u8 out;
     v16i8 coeff_vec0 = __msa_fill_b(coeff0);
     v16i8 coeff_vec1 = __msa_fill_b(coeff1);
     v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
 
-    src0 = LD_SB(src);
-    src += src_stride;
-
-    for (row = (height >> 2); row--;) {
-        LD_SB4(src, src_stride, src1, src2, src3, src4);
-        src += (4 * src_stride);
-
-        load0 = LW(dst);
-        load1 = LW(dst + dst_stride);
-
-        INSERT_W2_UB(load0, load1, dst0);
-        load0 = LW(dst + 2 * dst_stride);
-        load1 = LW(dst + 3 * dst_stride);
-        INSERT_W2_UB(load0, load1, dst1);
-
-        ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
-                   tmp0, tmp1, tmp2, tmp3);
-        ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, tmp0, tmp2);
-        DOTP_UB2_UH(tmp0, tmp2, coeff_vec, coeff_vec, res0_r, res1_r);
-
-        res0_r <<= 3;
-        res1_r <<= 3;
-
-        SRARI_H2_UH(res0_r, res1_r, 6);
-        SAT_UH2_UH(res0_r, res1_r, 7);
-        PCKEV_B2_UB(res0_r, res0_r, res1_r, res1_r, res0, res1);
-        AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
-
-        ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
-        dst += (4 * dst_stride);
-        src0 = src4;
-    }
+    LD_UB5(src, stride, src0, src1, src2, src3, src4);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+    ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, tmp0, tmp1, tmp2,
+               tmp3);
+    ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, tmp0, tmp2);
+    DOTP_UB2_UH(tmp0, tmp2, coeff_vec, coeff_vec, res0_r, res1_r);
+    res0_r <<= 3;
+    res1_r <<= 3;
+    SRARI_H2_UH(res0_r, res1_r, 6);
+    SAT_UH2_UH(res0_r, res1_r, 7);
+    out = (v16u8) __msa_pckev_b((v16i8) res1_r, (v16i8) res0_r);
+    out = __msa_aver_u_b(out, dst0);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
 }
 
-static void avc_chroma_vt_and_aver_dst_4w_msa(uint8_t *src, int32_t src_stride,
-                                              uint8_t *dst, int32_t dst_stride,
-                                              uint32_t coeff0, uint32_t coeff1,
-                                              int32_t height)
+static void avc_chroma_vt_and_aver_dst_4x8_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride, uint32_t coeff0,
+                                               uint32_t coeff1)
+{
+    uint32_t tp0, tp1, tp2, tp3;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, out0, out1;
+    v16u8 dst0 = { 0 }, dst1 = { 0 };
+    v8u16 res0, res1, res2, res3;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    LD_UB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    LD_UB4(src, stride, src5, src6, src7, src8);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+    LW4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst1);
+    ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, tmp0, tmp1, tmp2,
+               tmp3);
+    ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7, tmp4, tmp5, tmp6,
+               tmp7);
+    ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, tmp0, tmp2);
+    ILVR_D2_UB(tmp5, tmp4, tmp7, tmp6, tmp4, tmp6);
+    DOTP_UB2_UH(tmp0, tmp2, coeff_vec, coeff_vec, res0, res1);
+    DOTP_UB2_UH(tmp4, tmp6, coeff_vec, coeff_vec, res2, res3);
+    SLLI_4V(res0, res1, res2, res3, 3);
+    SRARI_H4_UH(res0, res1, res2, res3, 6);
+    SAT_UH4_UH(res0, res1, res2, res3, 7);
+    PCKEV_B2_UB(res1, res0, res3, res2, out0, out1);
+    AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1);
+    ST4x8_UB(out0, out1, dst, stride);
+}
+
+static void avc_chroma_vt_and_aver_dst_4w_msa(uint8_t *src, uint8_t *dst,
+                                              int32_t stride, uint32_t coeff0,
+                                              uint32_t coeff1, int32_t height)
 {
     if (2 == height) {
-        avc_chroma_vt_and_aver_dst_4x2_msa(src, src_stride, dst, dst_stride,
-                                           coeff0, coeff1);
-    } else {
-        avc_chroma_vt_and_aver_dst_4x4mul_msa(src, src_stride, dst, dst_stride,
-                                              coeff0, coeff1, height);
+        avc_chroma_vt_and_aver_dst_4x2_msa(src, dst, stride, coeff0, coeff1);
+    } else if (4 == height) {
+        avc_chroma_vt_and_aver_dst_4x4_msa(src, dst, stride, coeff0, coeff1);
+    } else if (8 == height) {
+        avc_chroma_vt_and_aver_dst_4x8_msa(src, dst, stride, coeff0, coeff1);
     }
 }
 
-static void avc_chroma_vt_and_aver_dst_8w_msa(uint8_t *src, int32_t src_stride,
-                                              uint8_t *dst, int32_t dst_stride,
-                                              uint32_t coeff0, uint32_t coeff1,
-                                              int32_t height)
+static void avc_chroma_vt_and_aver_dst_8x4_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride, uint32_t coeff0,
+                                               uint32_t coeff1)
 {
-    uint32_t row;
+    uint64_t tp0, tp1, tp2, tp3;
     v16u8 src0, src1, src2, src3, src4;
     v16u8 out0, out1;
     v8u16 res0, res1, res2, res3;
-    v16u8 dst0, dst1, dst2, dst3;
+    v16u8 dst0 = { 0 }, dst1 = { 0 };
     v16i8 coeff_vec0 = __msa_fill_b(coeff0);
     v16i8 coeff_vec1 = __msa_fill_b(coeff1);
     v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
 
-    src0 = LD_UB(src);
-    src += src_stride;
+    LD_UB5(src, stride, src0, src1, src2, src3, src4);
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+    ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src0, src1, src2, src3);
+    DOTP_UB4_UH(src0, src1, src2, src3, coeff_vec, coeff_vec, coeff_vec,
+                coeff_vec, res0, res1, res2, res3);
+    SLLI_4V(res0, res1, res2, res3, 3);
+    SRARI_H4_UH(res0, res1, res2, res3, 6);
+    SAT_UH4_UH(res0, res1, res2, res3, 7);
+    PCKEV_B2_UB(res1, res0, res3, res2, out0, out1);
+    AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1);
+    ST8x4_UB(out0, out1, dst, stride);
+}
 
-    for (row = height >> 2; row--;) {
-        LD_UB4(src, src_stride, src1, src2, src3, src4);
-        src += (4 * src_stride);
-        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-        ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
-                   src0, src1, src2, src3);
-        DOTP_UB4_UH(src0, src1, src2, src3, coeff_vec, coeff_vec, coeff_vec,
-                    coeff_vec, res0, res1, res2, res3);
-        SLLI_4V(res0, res1, res2, res3, 3);
-        SRARI_H4_UH(res0, res1, res2, res3, 6);
-        SAT_UH4_UH(res0, res1, res2, res3, 7);
-        PCKEV_B2_UB(res1, res0, res3, res2, out0, out1);
-        PCKEV_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
-        AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1);
-        ST8x4_UB(out0, out1, dst, dst_stride);
+static void avc_chroma_vt_and_aver_dst_8x8_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride, uint32_t coeff0,
+                                               uint32_t coeff1)
+{
+    uint64_t tp0, tp1, tp2, tp3;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 out0, out1, out2, out3;
+    v16u8 dst0 = { 0 }, dst1 = { 0 }, dst2 = { 0 }, dst3 = { 0 };
+    v8u16 res0, res1, res2, res3, res4, res5, res6, res7;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
 
-        dst += (4 * dst_stride);
-        src0 = src4;
+    LD_UB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    LD_UB4(src, stride, src5, src6, src7, src8);
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+    LD4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst2);
+    INSERT_D2_UB(tp2, tp3, dst3);
+    ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src0, src1, src2, src3);
+    ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7,
+               src4, src5, src6, src7);
+    DOTP_UB4_UH(src0, src1, src2, src3, coeff_vec, coeff_vec, coeff_vec,
+                coeff_vec, res0, res1, res2, res3);
+    DOTP_UB4_UH(src4, src5, src6, src7, coeff_vec, coeff_vec, coeff_vec,
+                coeff_vec, res4, res5, res6, res7);
+    SLLI_4V(res0, res1, res2, res3, 3);
+    SLLI_4V(res4, res5, res6, res7, 3);
+    SRARI_H4_UH(res0, res1, res2, res3, 6);
+    SRARI_H4_UH(res4, res5, res6, res7, 6);
+    SAT_UH4_UH(res0, res1, res2, res3, 7);
+    SAT_UH4_UH(res0, res1, res2, res3, 7);
+    PCKEV_B2_UB(res1, res0, res3, res2, out0, out1);
+    PCKEV_B2_UB(res5, res4, res7, res6, out2, out3);
+    AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1);
+    AVER_UB2_UB(out2, dst2, out3, dst3, out2, out3);
+    ST8x8_UB(out0, out1, out2, out3, dst, stride);
+}
+
+static void avc_chroma_vt_and_aver_dst_8w_msa(uint8_t *src, uint8_t *dst,
+                                              int32_t stride, uint32_t coeff0,
+                                              uint32_t coeff1, int32_t height)
+{
+    if (4 == height) {
+        avc_chroma_vt_and_aver_dst_8x4_msa(src, dst, stride, coeff0, coeff1);
+    } else if (8 == height) {
+        avc_chroma_vt_and_aver_dst_8x8_msa(src, dst, stride, coeff0, coeff1);
     }
 }
 
-static void avc_chroma_hv_and_aver_dst_2x2_msa(uint8_t *src, int32_t src_stride,
-                                               uint8_t *dst, int32_t dst_stride,
+static void avc_chroma_hv_and_aver_dst_2x2_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride,
                                                uint32_t coef_hor0,
                                                uint32_t coef_hor1,
                                                uint32_t coef_ver0,
                                                uint32_t coef_ver1)
 {
     uint16_t out0, out1;
-    v16u8 dst0, dst1;
+    v16u8 dst0 = { 0 };
     v16u8 src0, src1, src2;
     v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
     v16i8 res, mask;
@@ -1438,8 +1428,11 @@
 
     mask = LD_SB(&chroma_mask_arr[48]);
 
-    LD_UB3(src, src_stride, src0, src1, src2);
-    LD_UB2(dst, dst_stride, dst0, dst1);
+    LD_UB3(src, stride, src0, src1, src2);
+    out0 = LH(dst);
+    out1 = LH(dst + stride);
+    dst0 = (v16u8) __msa_insert_h((v8i16) dst0, 0, out0);
+    dst0 = (v16u8) __msa_insert_h((v8i16) dst0, 1, out1);
     VSHF_B2_UB(src0, src1, src1, src2, mask, mask, src0, src1);
     DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
     MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
@@ -1448,26 +1441,26 @@
     res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
     res_vt0 = __msa_sat_u_h(res_vt0, 7);
     res = __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
-    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 1, (v8i16) dst1);
     dst0 = __msa_aver_u_b((v16u8) res, dst0);
     out0 = __msa_copy_u_h((v8i16) dst0, 0);
     out1 = __msa_copy_u_h((v8i16) dst0, 1);
 
     SH(out0, dst);
-    dst += dst_stride;
+    dst += stride;
     SH(out1, dst);
 }
 
-static void avc_chroma_hv_and_aver_dst_2x4_msa(uint8_t *src, int32_t src_stride,
-                                               uint8_t *dst, int32_t dst_stride,
+static void avc_chroma_hv_and_aver_dst_2x4_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride,
                                                uint32_t coef_hor0,
                                                uint32_t coef_hor1,
                                                uint32_t coef_ver0,
                                                uint32_t coef_ver1)
 {
+    uint16_t tp0, tp1, tp2, tp3;
     v16u8 src0, src1, src2, src3, src4;
     v16u8 tmp0, tmp1, tmp2, tmp3;
-    v16u8 dst0, dst1, dst2, dst3;
+    v16u8 dst0 = { 0 };
     v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
     v16i8 res, mask;
     v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
@@ -1478,8 +1471,15 @@
 
     mask = LD_SB(&chroma_mask_arr[48]);
 
-    LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
-    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    LD_UB5(src, stride, src0, src1, src2, src3, src4);
+    tp0 = LH(dst);
+    tp1 = LH(dst + stride);
+    tp2 = LH(dst + 2 * stride);
+    tp3 = LH(dst + 3 * stride);
+    dst0 = (v16u8) __msa_insert_h((v8i16) dst0, 0, tp0);
+    dst0 = (v16u8) __msa_insert_h((v8i16) dst0, 1, tp1);
+    dst0 = (v16u8) __msa_insert_h((v8i16) dst0, 2, tp2);
+    dst0 = (v16u8) __msa_insert_h((v8i16) dst0, 3, tp3);
     VSHF_B2_UB(src0, src1, src2, src3, mask, mask, tmp0, tmp1);
     VSHF_B2_UB(src1, src2, src3, src4, mask, mask, tmp2, tmp3);
     ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1);
@@ -1490,81 +1490,13 @@
     res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
     res_vt0 = __msa_sat_u_h(res_vt0, 7);
     res = __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
-
-    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 1, (v8i16) dst1);
-    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 2, (v8i16) dst2);
-    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 3, (v8i16) dst3);
     dst0 = __msa_aver_u_b((v16u8) res, dst0);
 
-    ST2x4_UB(dst0, 0, dst, dst_stride);
+    ST2x4_UB(dst0, 0, dst, stride);
 }
 
-static void avc_chroma_hv_and_aver_dst_2x8_msa(uint8_t *src, int32_t src_stride,
-                                               uint8_t *dst, int32_t dst_stride,
-                                               uint32_t coef_hor0,
-                                               uint32_t coef_hor1,
-                                               uint32_t coef_ver0,
-                                               uint32_t coef_ver1)
-{
-    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
-    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
-    v16u8 tmp0, tmp1, tmp2, tmp3;
-    v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
-    v16i8 res, mask;
-    v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
-    v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
-    v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
-    v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
-    v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
-
-    mask = LD_SB(&chroma_mask_arr[48]);
-
-    LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-    LD_UB4(src, src_stride, src5, src6, src7, src8);
-
-    LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
-
-    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 1, (v8i16) dst1);
-    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 2, (v8i16) dst2);
-    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 3, (v8i16) dst3);
-
-    dst4 = (v16u8) __msa_insve_h((v8i16) dst4, 1, (v8i16) dst5);
-    dst4 = (v16u8) __msa_insve_h((v8i16) dst4, 2, (v8i16) dst6);
-    dst4 = (v16u8) __msa_insve_h((v8i16) dst4, 3, (v8i16) dst7);
-
-    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, tmp0, tmp1);
-    VSHF_B2_UB(src1, src2, src3, src4, mask, mask, tmp2, tmp3);
-    ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1);
-    VSHF_B2_UB(src4, src5, src6, src7, mask, mask, tmp0, tmp1);
-    VSHF_B2_UB(src5, src6, src7, src8, mask, mask, tmp2, tmp3);
-    ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, src4, src5);
-    DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
-    MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
-
-    res_vt0 += res_vt1;
-    res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
-    res_vt0 = __msa_sat_u_h(res_vt0, 7);
-    res = __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
-    dst0 = __msa_aver_u_b((v16u8) res, dst0);
-
-    ST2x4_UB(dst0, 0, dst, dst_stride);
-    dst += (4 * dst_stride);
-
-    DOTP_UB2_UH(src4, src5, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
-    MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
-
-    res_vt0 += res_vt1;
-    res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
-    res_vt0 = __msa_sat_u_h(res_vt0, 7);
-    res = __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
-    dst4 = __msa_aver_u_b((v16u8) res, dst4);
-
-    ST2x4_UB(dst4, 0, dst, dst_stride);
-}
-
-static void avc_chroma_hv_and_aver_dst_2w_msa(uint8_t *src, int32_t src_stride,
-                                              uint8_t *dst, int32_t dst_stride,
+static void avc_chroma_hv_and_aver_dst_2w_msa(uint8_t *src, uint8_t *dst,
+                                              int32_t stride,
                                               uint32_t coef_hor0,
                                               uint32_t coef_hor1,
                                               uint32_t coef_ver0,
@@ -1572,31 +1504,26 @@
                                               int32_t height)
 {
     if (2 == height) {
-        avc_chroma_hv_and_aver_dst_2x2_msa(src, src_stride, dst, dst_stride,
-                                           coef_hor0, coef_hor1,
-                                           coef_ver0, coef_ver1);
+        avc_chroma_hv_and_aver_dst_2x2_msa(src, dst, stride, coef_hor0,
+                                           coef_hor1, coef_ver0, coef_ver1);
     } else if (4 == height) {
-        avc_chroma_hv_and_aver_dst_2x4_msa(src, src_stride, dst, dst_stride,
-                                           coef_hor0, coef_hor1,
-                                           coef_ver0, coef_ver1);
-    } else if (8 == height) {
-        avc_chroma_hv_and_aver_dst_2x8_msa(src, src_stride, dst, dst_stride,
-                                           coef_hor0, coef_hor1,
-                                           coef_ver0, coef_ver1);
+        avc_chroma_hv_and_aver_dst_2x4_msa(src, dst, stride, coef_hor0,
+                                           coef_hor1, coef_ver0, coef_ver1);
     }
 }
 
-static void avc_chroma_hv_and_aver_dst_4x2_msa(uint8_t *src, int32_t src_stride,
-                                               uint8_t *dst, int32_t dst_stride,
+static void avc_chroma_hv_and_aver_dst_4x2_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride,
                                                uint32_t coef_hor0,
                                                uint32_t coef_hor1,
                                                uint32_t coef_ver0,
                                                uint32_t coef_ver1)
 {
+    uint32_t tp0, tp1;
     v16u8 src0, src1, src2;
-    v16u8 dst0, dst1;
+    v16u8 dst0, dst_data = { 0 };
     v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
-    v16i8 res, mask;
+    v16i8 mask;
     v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
     v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
     v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
@@ -1605,8 +1532,9 @@
 
     mask = LD_SB(&chroma_mask_arr[0]);
 
-    LD_UB3(src, src_stride, src0, src1, src2);
-    LD_UB2(dst, dst_stride, dst0, dst1);
+    LD_UB3(src, stride, src0, src1, src2);
+    LW2(dst, stride, tp0, tp1);
+    INSERT_W2_UB(tp0, tp1, dst_data);
     VSHF_B2_UB(src0, src1, src1, src2, mask, mask, src0, src1);
     DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
     MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
@@ -1614,26 +1542,22 @@
     res_vt0 += res_vt1;
     res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
     res_vt0 = __msa_sat_u_h(res_vt0, 7);
-    res = __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
-    dst0 = (v16u8) __msa_insve_w((v4i32) dst0, 1, (v4i32) dst1);
-    dst0 = __msa_aver_u_b((v16u8) res, dst0);
+    dst0 = (v16u8) __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
+    dst0 = __msa_aver_u_b(dst0, dst_data);
 
-    ST4x2_UB(dst0, dst, dst_stride);
+    ST4x2_UB(dst0, dst, stride);
 }
 
-static void avc_chroma_hv_and_aver_dst_4x4mul_msa(uint8_t *src,
-                                                  int32_t src_stride,
-                                                  uint8_t *dst,
-                                                  int32_t dst_stride,
-                                                  uint32_t coef_hor0,
-                                                  uint32_t coef_hor1,
-                                                  uint32_t coef_ver0,
-                                                  uint32_t coef_ver1,
-                                                  int32_t height)
+static void avc_chroma_hv_and_aver_dst_4x4_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride,
+                                               uint32_t coef_hor0,
+                                               uint32_t coef_hor1,
+                                               uint32_t coef_ver0,
+                                               uint32_t coef_ver1)
 {
-    uint32_t row;
+    uint32_t tp0, tp1, tp2, tp3;
     v16u8 src0, src1, src2, src3, src4;
-    v16u8 dst0, dst1, dst2, dst3;
+    v16u8 out, dst_data = { 0 };
     v8u16 res_hz0, res_hz1, res_hz2, res_hz3;
     v8u16 res_vt0, res_vt1, res_vt2, res_vt3;
     v16i8 mask;
@@ -1642,45 +1566,78 @@
     v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
     v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
     v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
-    v16u8 res0, res1;
 
     mask = LD_SB(&chroma_mask_arr[0]);
 
-    src0 = LD_UB(src);
-    src += src_stride;
-
-    for (row = (height >> 2); row--;) {
-        LD_UB4(src, src_stride, src1, src2, src3, src4);
-        src += (4 * src_stride);
-
-        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-
-        VSHF_B2_UB(src0, src1, src1, src2, mask, mask, src0, src1);
-        VSHF_B2_UB(src2, src3, src3, src4, mask, mask, src2, src3);
-        DOTP_UB4_UH(src0, src1, src2, src3, coeff_hz_vec, coeff_hz_vec,
-                    coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1, res_hz2,
-                    res_hz3);
-        MUL4(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_hz2,
-             coeff_vt_vec1, res_hz3, coeff_vt_vec0, res_vt0, res_vt1, res_vt2,
-             res_vt3);
-        ADD2(res_vt0, res_vt1, res_vt2, res_vt3, res_vt0, res_vt1);
-        SRARI_H2_UH(res_vt0, res_vt1, 6);
-        SAT_UH2_UH(res_vt0, res_vt1, 7);
-        PCKEV_B2_UB(res_vt0, res_vt0, res_vt1, res_vt1, res0, res1);
-
-        dst0 = (v16u8) __msa_insve_w((v4i32) dst0, 1, (v4i32) dst1);
-        dst1 = (v16u8) __msa_insve_w((v4i32) dst2, 1, (v4i32) dst3);
-
-        AVER_UB2_UB(res0, dst0, res1, dst1, dst0, dst1);
-
-        ST4x4_UB(dst0, dst1, 0, 1, 0, 1, dst, dst_stride);
-        dst += (4 * dst_stride);
-        src0 = src4;
-    }
+    LD_UB5(src, stride, src0, src1, src2, src3, src4);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst_data);
+    VSHF_B2_UB(src0, src1, src1, src2, mask, mask, src0, src1);
+    VSHF_B2_UB(src2, src3, src3, src4, mask, mask, src2, src3);
+    DOTP_UB4_UH(src0, src1, src2, src3, coeff_hz_vec, coeff_hz_vec,
+                coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1, res_hz2,
+                res_hz3);
+    MUL4(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_hz2, coeff_vt_vec1,
+         res_hz3, coeff_vt_vec0, res_vt0, res_vt1, res_vt2, res_vt3);
+    ADD2(res_vt0, res_vt1, res_vt2, res_vt3, res_vt0, res_vt1);
+    SRARI_H2_UH(res_vt0, res_vt1, 6);
+    SAT_UH2_UH(res_vt0, res_vt1, 7);
+    out = (v16u8) __msa_pckev_b((v16i8) res_vt1, (v16i8) res_vt0);
+    out = __msa_aver_u_b(out, dst_data);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
 }
 
-static void avc_chroma_hv_and_aver_dst_4w_msa(uint8_t *src, int32_t src_stride,
-                                              uint8_t *dst, int32_t dst_stride,
+static void avc_chroma_hv_and_aver_dst_4x8_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride,
+                                               uint32_t coef_hor0,
+                                               uint32_t coef_hor1,
+                                               uint32_t coef_ver0,
+                                               uint32_t coef_ver1)
+{
+    uint32_t tp0, tp1, tp2, tp3;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, res0, res1;
+    v16u8 dst0 = { 0 }, dst1 = { 0 };
+    v8u16 res_hz0, res_hz1, res_hz2, res_hz3, res_hz4, res_hz5, res_hz6, res_hz7;
+    v8u16 res_vt0, res_vt1, res_vt2, res_vt3, res_vt4, res_vt5, res_vt6, res_vt7;
+    v16i8 mask;
+    v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
+    v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
+    v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
+    v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
+    v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
+
+    mask = LD_SB(&chroma_mask_arr[0]);
+
+    LD_UB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    LD_UB4(src, stride, src5, src6, src7, src8);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+    LW4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst1);
+    VSHF_B2_UB(src0, src1, src1, src2, mask, mask, src0, src1);
+    VSHF_B2_UB(src2, src3, src3, src4, mask, mask, src2, src3);
+    VSHF_B2_UB(src4, src5, src5, src6, mask, mask, src4, src5);
+    VSHF_B2_UB(src6, src7, src7, src8, mask, mask, src6, src7);
+    DOTP_UB4_UH(src0, src1, src2, src3, coeff_hz_vec, coeff_hz_vec,
+                coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1, res_hz2, res_hz3);
+    DOTP_UB4_UH(src4, src5, src6, src7, coeff_hz_vec, coeff_hz_vec,
+                coeff_hz_vec, coeff_hz_vec, res_hz4, res_hz5, res_hz6, res_hz7);
+    MUL4(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_hz2, coeff_vt_vec1,
+         res_hz3, coeff_vt_vec0, res_vt0, res_vt1, res_vt2, res_vt3);
+    MUL4(res_hz4, coeff_vt_vec1, res_hz5, coeff_vt_vec0, res_hz6, coeff_vt_vec1,
+         res_hz7, coeff_vt_vec0, res_vt4, res_vt5, res_vt6, res_vt7);
+    ADD2(res_vt0, res_vt1, res_vt2, res_vt3, res_vt0, res_vt1);
+    ADD2(res_vt4, res_vt5, res_vt6, res_vt7, res_vt2, res_vt3);
+    SRARI_H4_UH(res_vt0, res_vt1, res_vt2, res_vt3, 6);
+    SAT_UH4_UH(res_vt0, res_vt1, res_vt2, res_vt3, 7);
+    PCKEV_B2_UB(res_vt1, res_vt0, res_vt3, res_vt2, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST4x8_UB(res0, res1, dst, stride);
+}
+
+static void avc_chroma_hv_and_aver_dst_4w_msa(uint8_t *src, uint8_t *dst,
+                                              int32_t stride,
                                               uint32_t coef_hor0,
                                               uint32_t coef_hor1,
                                               uint32_t coef_ver0,
@@ -1688,30 +1645,30 @@
                                               int32_t height)
 {
     if (2 == height) {
-        avc_chroma_hv_and_aver_dst_4x2_msa(src, src_stride, dst, dst_stride,
-                                           coef_hor0, coef_hor1,
-                                           coef_ver0, coef_ver1);
-    } else {
-        avc_chroma_hv_and_aver_dst_4x4mul_msa(src, src_stride, dst, dst_stride,
-                                              coef_hor0, coef_hor1,
-                                              coef_ver0, coef_ver1, height);
+        avc_chroma_hv_and_aver_dst_4x2_msa(src, dst, stride, coef_hor0,
+                                           coef_hor1, coef_ver0, coef_ver1);
+    } else if (4 == height) {
+        avc_chroma_hv_and_aver_dst_4x4_msa(src, dst, stride, coef_hor0,
+                                           coef_hor1, coef_ver0, coef_ver1);
+    } else if (8 == height) {
+        avc_chroma_hv_and_aver_dst_4x8_msa(src, dst, stride, coef_hor0,
+                                           coef_hor1, coef_ver0, coef_ver1);
     }
 }
 
-static void avc_chroma_hv_and_aver_dst_8w_msa(uint8_t *src, int32_t src_stride,
-                                              uint8_t *dst, int32_t dst_stride,
-                                              uint32_t coef_hor0,
-                                              uint32_t coef_hor1,
-                                              uint32_t coef_ver0,
-                                              uint32_t coef_ver1,
-                                              int32_t height)
+static void avc_chroma_hv_and_aver_dst_8x4_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride,
+                                               uint32_t coef_hor0,
+                                               uint32_t coef_hor1,
+                                               uint32_t coef_ver0,
+                                               uint32_t coef_ver1)
 {
-    uint32_t row;
+    uint64_t tp0, tp1, tp2, tp3;
     v16u8 src0, src1, src2, src3, src4, out0, out1;
     v8u16 res_hz0, res_hz1, res_hz2;
     v8u16 res_hz3, res_hz4;
     v8u16 res_vt0, res_vt1, res_vt2, res_vt3;
-    v16u8 dst0, dst1, dst2, dst3;
+    v16u8 dst0 = { 0 }, dst1 = { 0 };
     v16i8 mask;
     v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
     v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
@@ -1722,197 +1679,226 @@
     mask = LD_SB(&chroma_mask_arr[32]);
 
     src0 = LD_UB(src);
-    src += src_stride;
-
+    src += stride;
     src0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, (v16i8) src0);
     res_hz0 = __msa_dotp_u_h(src0, coeff_hz_vec);
+    LD_UB4(src, stride, src1, src2, src3, src4);
+    src += (4 * stride);
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+    VSHF_B2_UB(src1, src1, src2, src2, mask, mask, src1, src2);
+    VSHF_B2_UB(src3, src3, src4, src4, mask, mask, src3, src4);
+    DOTP_UB4_UH(src1, src2, src3, src4, coeff_hz_vec, coeff_hz_vec,
+                coeff_hz_vec, coeff_hz_vec, res_hz1, res_hz2, res_hz3, res_hz4);
+    MUL4(res_hz1, coeff_vt_vec0, res_hz2, coeff_vt_vec0, res_hz3, coeff_vt_vec0,
+         res_hz4, coeff_vt_vec0, res_vt0, res_vt1, res_vt2, res_vt3);
+    res_vt0 += (res_hz0 * coeff_vt_vec1);
+    res_vt1 += (res_hz1 * coeff_vt_vec1);
+    res_vt2 += (res_hz2 * coeff_vt_vec1);
+    res_vt3 += (res_hz3 * coeff_vt_vec1);
+    SRARI_H4_UH(res_vt0, res_vt1, res_vt2, res_vt3, 6);
+    SAT_UH4_UH(res_vt0, res_vt1, res_vt2, res_vt3, 7);
+    PCKEV_B2_UB(res_vt1, res_vt0, res_vt3, res_vt2, out0, out1);
+    AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1);
+    ST8x4_UB(out0, out1, dst, stride);
+}
 
-    for (row = (height >> 2); row--;) {
-        LD_UB4(src, src_stride, src1, src2, src3, src4);
-        src += (4 * src_stride);
+static void avc_chroma_hv_and_aver_dst_8x8_msa(uint8_t *src, uint8_t *dst,
+                                               int32_t stride,
+                                               uint32_t coef_hor0,
+                                               uint32_t coef_hor1,
+                                               uint32_t coef_ver0,
+                                               uint32_t coef_ver1)
+{
+    uint64_t tp0, tp1, tp2, tp3;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 out0, out1, out2, out3;
+    v16u8 dst0 = { 0 }, dst1 = { 0 }, dst2 = { 0 }, dst3 = { 0 };
+    v8u16 res_hz0, res_hz1, res_hz2, res_hz3, res_hz4;
+    v8u16 res_hz5, res_hz6, res_hz7, res_hz8;
+    v8u16 res_vt0, res_vt1, res_vt2, res_vt3;
+    v8u16 res_vt4, res_vt5, res_vt6, res_vt7;
+    v16i8 mask;
+    v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
+    v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
+    v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
+    v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
+    v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
 
-        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-        VSHF_B2_UB(src1, src1, src2, src2, mask, mask, src1, src2);
-        VSHF_B2_UB(src3, src3, src4, src4, mask, mask, src3, src4);
-        DOTP_UB4_UH(src1, src2, src3, src4, coeff_hz_vec, coeff_hz_vec,
-                    coeff_hz_vec, coeff_hz_vec, res_hz1, res_hz2, res_hz3,
-                    res_hz4);
-        MUL4(res_hz1, coeff_vt_vec0, res_hz2, coeff_vt_vec0, res_hz3,
-             coeff_vt_vec0, res_hz4, coeff_vt_vec0, res_vt0, res_vt1, res_vt2,
-             res_vt3);
+    mask = LD_SB(&chroma_mask_arr[32]);
 
-        res_vt0 += (res_hz0 * coeff_vt_vec1);
-        res_vt1 += (res_hz1 * coeff_vt_vec1);
-        res_vt2 += (res_hz2 * coeff_vt_vec1);
-        res_vt3 += (res_hz3 * coeff_vt_vec1);
+    LD_UB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    LD_UB4(src, stride, src5, src6, src7, src8);
+    src0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, (v16i8) src0);
+    VSHF_B2_UB(src1, src1, src2, src2, mask, mask, src1, src2);
+    VSHF_B2_UB(src3, src3, src4, src4, mask, mask, src3, src4);
+    VSHF_B2_UB(src5, src5, src6, src6, mask, mask, src5, src6);
+    VSHF_B2_UB(src7, src7, src8, src8, mask, mask, src7, src8);
+    res_hz0 = __msa_dotp_u_h(src0, coeff_hz_vec);
+    DOTP_UB4_UH(src1, src2, src3, src4, coeff_hz_vec, coeff_hz_vec,
+                coeff_hz_vec, coeff_hz_vec, res_hz1, res_hz2, res_hz3,
+                res_hz4);
+    DOTP_UB4_UH(src5, src6, src7, src8, coeff_hz_vec, coeff_hz_vec,
+                coeff_hz_vec, coeff_hz_vec, res_hz5, res_hz6, res_hz7, res_hz8);
+    MUL4(res_hz1, coeff_vt_vec0, res_hz2, coeff_vt_vec0, res_hz3,
+         coeff_vt_vec0, res_hz4, coeff_vt_vec0, res_vt0, res_vt1, res_vt2,
+         res_vt3);
+    MUL4(res_hz5, coeff_vt_vec0, res_hz6, coeff_vt_vec0, res_hz7,
+         coeff_vt_vec0, res_hz8, coeff_vt_vec0, res_vt4, res_vt5, res_vt6,
+         res_vt7);
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+    LD4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst2);
+    INSERT_D2_UB(tp2, tp3, dst3);
+    res_vt0 += (res_hz0 * coeff_vt_vec1);
+    res_vt1 += (res_hz1 * coeff_vt_vec1);
+    res_vt2 += (res_hz2 * coeff_vt_vec1);
+    res_vt3 += (res_hz3 * coeff_vt_vec1);
+    res_vt4 += (res_hz4 * coeff_vt_vec1);
+    res_vt5 += (res_hz5 * coeff_vt_vec1);
+    res_vt6 += (res_hz6 * coeff_vt_vec1);
+    res_vt7 += (res_hz7 * coeff_vt_vec1);
+    SRARI_H4_UH(res_vt0, res_vt1, res_vt2, res_vt3, 6);
+    SRARI_H4_UH(res_vt4, res_vt5, res_vt6, res_vt7, 6);
+    SAT_UH4_UH(res_vt0, res_vt1, res_vt2, res_vt3, 7);
+    SAT_UH4_UH(res_vt4, res_vt5, res_vt6, res_vt7, 7);
+    PCKEV_B2_UB(res_vt1, res_vt0, res_vt3, res_vt2, out0, out1);
+    PCKEV_B2_UB(res_vt5, res_vt4, res_vt7, res_vt6, out2, out3);
+    AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1);
+    AVER_UB2_UB(out2, dst2, out3, dst3, out2, out3);
+    ST8x8_UB(out0, out1, out2, out3, dst, stride);
+}
 
-        SRARI_H4_UH(res_vt0, res_vt1, res_vt2, res_vt3, 6);
-        SAT_UH4_UH(res_vt0, res_vt1, res_vt2, res_vt3, 7);
-
-        PCKEV_B2_UB(res_vt1, res_vt0, res_vt3, res_vt2, out0, out1);
-        PCKEV_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
-        AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1);
-        ST8x4_UB(out0, out1, dst, dst_stride);
-        dst += (4 * dst_stride);
-
-        res_hz0 = res_hz4;
+static void avc_chroma_hv_and_aver_dst_8w_msa(uint8_t *src, uint8_t *dst,
+                                              int32_t stride,
+                                              uint32_t coef_hor0,
+                                              uint32_t coef_hor1,
+                                              uint32_t coef_ver0,
+                                              uint32_t coef_ver1,
+                                              int32_t height)
+{
+    if (4 == height) {
+        avc_chroma_hv_and_aver_dst_8x4_msa(src, dst, stride, coef_hor0,
+                                           coef_hor1, coef_ver0, coef_ver1);
+    } else if (8 == height) {
+        avc_chroma_hv_and_aver_dst_8x8_msa(src, dst, stride, coef_hor0,
+                                           coef_hor1, coef_ver0, coef_ver1);
     }
 }
 
-static void copy_width8_msa(uint8_t *src, int32_t src_stride,
-                            uint8_t *dst, int32_t dst_stride,
+static void copy_width4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
                             int32_t height)
 {
-    int32_t cnt;
-    uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+    uint32_t tp0, tp1, tp2, tp3, tp4, tp5, tp6, tp7;
 
-    if (0 == height % 12) {
-        for (cnt = (height / 12); cnt--;) {
-            LD_UB8(src, src_stride,
-                   src0, src1, src2, src3, src4, src5, src6, src7);
-            src += (8 * src_stride);
-
-            out0 = __msa_copy_u_d((v2i64) src0, 0);
-            out1 = __msa_copy_u_d((v2i64) src1, 0);
-            out2 = __msa_copy_u_d((v2i64) src2, 0);
-            out3 = __msa_copy_u_d((v2i64) src3, 0);
-            out4 = __msa_copy_u_d((v2i64) src4, 0);
-            out5 = __msa_copy_u_d((v2i64) src5, 0);
-            out6 = __msa_copy_u_d((v2i64) src6, 0);
-            out7 = __msa_copy_u_d((v2i64) src7, 0);
-
-            SD4(out0, out1, out2, out3, dst, dst_stride);
-            dst += (4 * dst_stride);
-            SD4(out4, out5, out6, out7, dst, dst_stride);
-            dst += (4 * dst_stride);
-
-            LD_UB4(src, src_stride, src0, src1, src2, src3);
-            src += (4 * src_stride);
-
-            out0 = __msa_copy_u_d((v2i64) src0, 0);
-            out1 = __msa_copy_u_d((v2i64) src1, 0);
-            out2 = __msa_copy_u_d((v2i64) src2, 0);
-            out3 = __msa_copy_u_d((v2i64) src3, 0);
-
-            SD4(out0, out1, out2, out3, dst, dst_stride);
-            dst += (4 * dst_stride);
-        }
-    } else if (0 == height % 8) {
-        for (cnt = height >> 3; cnt--;) {
-            LD_UB8(src, src_stride,
-                   src0, src1, src2, src3, src4, src5, src6, src7);
-            src += (8 * src_stride);
-
-            out0 = __msa_copy_u_d((v2i64) src0, 0);
-            out1 = __msa_copy_u_d((v2i64) src1, 0);
-            out2 = __msa_copy_u_d((v2i64) src2, 0);
-            out3 = __msa_copy_u_d((v2i64) src3, 0);
-            out4 = __msa_copy_u_d((v2i64) src4, 0);
-            out5 = __msa_copy_u_d((v2i64) src5, 0);
-            out6 = __msa_copy_u_d((v2i64) src6, 0);
-            out7 = __msa_copy_u_d((v2i64) src7, 0);
-
-            SD4(out0, out1, out2, out3, dst, dst_stride);
-            dst += (4 * dst_stride);
-            SD4(out4, out5, out6, out7, dst, dst_stride);
-            dst += (4 * dst_stride);
-        }
-    } else if (0 == height % 4) {
-        for (cnt = (height / 4); cnt--;) {
-            LD_UB4(src, src_stride, src0, src1, src2, src3);
-            src += (4 * src_stride);
-            out0 = __msa_copy_u_d((v2i64) src0, 0);
-            out1 = __msa_copy_u_d((v2i64) src1, 0);
-            out2 = __msa_copy_u_d((v2i64) src2, 0);
-            out3 = __msa_copy_u_d((v2i64) src3, 0);
-
-            SD4(out0, out1, out2, out3, dst, dst_stride);
-            dst += (4 * dst_stride);
-        }
-    } else if (0 == height % 2) {
-        for (cnt = (height / 2); cnt--;) {
-            LD_UB2(src, src_stride, src0, src1);
-            src += (2 * src_stride);
-            out0 = __msa_copy_u_d((v2i64) src0, 0);
-            out1 = __msa_copy_u_d((v2i64) src1, 0);
-
-            SD(out0, dst);
-            dst += dst_stride;
-            SD(out1, dst);
-            dst += dst_stride;
-        }
+    if (8 == height) {
+        LW4(src, stride, tp0, tp1, tp2, tp3);
+        src += 4 * stride;
+        LW4(src, stride, tp4, tp5, tp6, tp7);
+        SW4(tp0, tp1, tp2, tp3, dst, stride);
+        dst += 4 * stride;
+        SW4(tp4, tp5, tp6, tp7, dst, stride);
+    } else if (4 == height) {
+        LW4(src, stride, tp0, tp1, tp2, tp3);
+        SW4(tp0, tp1, tp2, tp3, dst, stride);
+    } else if (2 == height) {
+        LW2(src, stride, tp0, tp1);
+        SW(tp0, dst);
+        dst += stride;
+        SW(tp1, dst);
     }
 }
 
-static void avg_width4_msa(uint8_t *src, int32_t src_stride,
-                           uint8_t *dst, int32_t dst_stride,
-                           int32_t height)
+static void copy_width8_msa(uint8_t *src, uint8_t *dst, int32_t stride,
+                            int32_t height)
 {
-    int32_t cnt;
-    uint32_t out0, out1, out2, out3;
-    v16u8 src0, src1, src2, src3;
-    v16u8 dst0, dst1, dst2, dst3;
+    uint64_t src0, src1, src2, src3, src4, src5, src6, src7;
 
-    if (0 == (height % 4)) {
-        for (cnt = (height / 4); cnt--;) {
-            LD_UB4(src, src_stride, src0, src1, src2, src3);
-            src += (4 * src_stride);
-
-            LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-
-            AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
-                        dst0, dst1, dst2, dst3);
-
-            out0 = __msa_copy_u_w((v4i32) dst0, 0);
-            out1 = __msa_copy_u_w((v4i32) dst1, 0);
-            out2 = __msa_copy_u_w((v4i32) dst2, 0);
-            out3 = __msa_copy_u_w((v4i32) dst3, 0);
-            SW4(out0, out1, out2, out3, dst, dst_stride);
-            dst += (4 * dst_stride);
-        }
-    } else if (0 == (height % 2)) {
-        for (cnt = (height / 2); cnt--;) {
-            LD_UB2(src, src_stride, src0, src1);
-            src += (2 * src_stride);
-
-            LD_UB2(dst, dst_stride, dst0, dst1);
-
-            AVER_UB2_UB(src0, dst0, src1, dst1, dst0, dst1);
-
-            out0 = __msa_copy_u_w((v4i32) dst0, 0);
-            out1 = __msa_copy_u_w((v4i32) dst1, 0);
-            SW(out0, dst);
-            dst += dst_stride;
-            SW(out1, dst);
-            dst += dst_stride;
-        }
+    if (8 == height) {
+        LD4(src, stride, src0, src1, src2, src3);
+        src += 4 * stride;
+        LD4(src, stride, src4, src5, src6, src7);
+        SD4(src0, src1, src2, src3, dst, stride);
+        dst += 4 * stride;
+        SD4(src4, src5, src6, src7, dst, stride);
+    } else if (4 == height) {
+        LD4(src, stride, src0, src1, src2, src3);
+        SD4(src0, src1, src2, src3, dst, stride);
     }
 }
 
-static void avg_width8_msa(uint8_t *src, int32_t src_stride,
-                           uint8_t *dst, int32_t dst_stride,
+static void avg_width4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
                            int32_t height)
 {
-    int32_t cnt;
-    uint64_t out0, out1, out2, out3;
-    v16u8 src0, src1, src2, src3;
-    v16u8 dst0, dst1, dst2, dst3;
+    uint32_t tp0, tp1, tp2, tp3;
+    v16u8 src0 = { 0 }, src1 = { 0 }, dst0 = { 0 }, dst1 = { 0 };
 
-    for (cnt = (height / 4); cnt--;) {
-        LD_UB4(src, src_stride, src0, src1, src2, src3);
-        src += (4 * src_stride);
-        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    if (8 == height) {
+        LW4(src, stride, tp0, tp1, tp2, tp3);
+        src += 4 * stride;
+        INSERT_W4_UB(tp0, tp1, tp2, tp3, src0);
+        LW4(src, stride, tp0, tp1, tp2, tp3);
+        INSERT_W4_UB(tp0, tp1, tp2, tp3, src1);
+        LW4(dst, stride, tp0, tp1, tp2, tp3);
+        INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+        LW4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
+        INSERT_W4_UB(tp0, tp1, tp2, tp3, dst1);
+        AVER_UB2_UB(src0, dst0, src1, dst1, dst0, dst1);
+        ST4x8_UB(dst0, dst1, dst, stride);
+    } else if (4 == height) {
+        LW4(src, stride, tp0, tp1, tp2, tp3);
+        INSERT_W4_UB(tp0, tp1, tp2, tp3, src0);
+        LW4(dst, stride, tp0, tp1, tp2, tp3);
+        INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+        dst0 = __msa_aver_u_b(src0, dst0);
+        ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
+    } else if (2 == height) {
+        LW2(src, stride, tp0, tp1);
+        INSERT_W2_UB(tp0, tp1, src0);
+        LW2(dst, stride, tp0, tp1);
+        INSERT_W2_UB(tp0, tp1, dst0);
+        dst0 = __msa_aver_u_b(src0, dst0);
+        ST4x2_UB(dst0, dst, stride);
+    }
+}
 
-        AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
-                    dst0, dst1, dst2, dst3);
+static void avg_width8_msa(uint8_t *src, uint8_t *dst, int32_t stride,
+                           int32_t height)
+{
+    uint64_t tp0, tp1, tp2, tp3, tp4, tp5, tp6, tp7;
+    v16u8 src0 = { 0 }, src1 = { 0 }, src2 = { 0 }, src3 = { 0 };
+    v16u8 dst0 = { 0 }, dst1 = { 0 }, dst2 = { 0 }, dst3 = { 0 };
 
-        out0 = __msa_copy_u_d((v2i64) dst0, 0);
-        out1 = __msa_copy_u_d((v2i64) dst1, 0);
-        out2 = __msa_copy_u_d((v2i64) dst2, 0);
-        out3 = __msa_copy_u_d((v2i64) dst3, 0);
-        SD4(out0, out1, out2, out3, dst, dst_stride);
-        dst += (4 * dst_stride);
+    if (8 == height) {
+        LD4(src, stride, tp0, tp1, tp2, tp3);
+        src += 4 * stride;
+        LD4(src, stride, tp4, tp5, tp6, tp7);
+        INSERT_D2_UB(tp0, tp1, src0);
+        INSERT_D2_UB(tp2, tp3, src1);
+        INSERT_D2_UB(tp4, tp5, src2);
+        INSERT_D2_UB(tp6, tp7, src3);
+        LD4(dst, stride, tp0, tp1, tp2, tp3);
+        LD4(dst + 4 * stride, stride, tp4, tp5, tp6, tp7);
+        INSERT_D2_UB(tp0, tp1, dst0);
+        INSERT_D2_UB(tp2, tp3, dst1);
+        INSERT_D2_UB(tp4, tp5, dst2);
+        INSERT_D2_UB(tp6, tp7, dst3);
+        AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3, dst0, dst1,
+                    dst2, dst3);
+        ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride);
+    } else if (4 == height) {
+        LD4(src, stride, tp0, tp1, tp2, tp3);
+        INSERT_D2_UB(tp0, tp1, src0);
+        INSERT_D2_UB(tp2, tp3, src1);
+        LD4(dst, stride, tp0, tp1, tp2, tp3);
+        INSERT_D2_UB(tp0, tp1, dst0);
+        INSERT_D2_UB(tp2, tp3, dst1);
+        AVER_UB2_UB(src0, dst0, src1, dst1, dst0, dst1);
+        ST8x4_UB(dst0, dst1, dst, stride);
     }
 }
 
@@ -1928,15 +1914,13 @@
     } else if (y) {
         avc_chroma_vt_8w_msa(src, dst, stride, y, (8 - y), height);
     } else {
-        copy_width8_msa(src, stride, dst, stride, height);
+        copy_width8_msa(src, dst, stride, height);
     }
 }
 
 void ff_put_h264_chroma_mc4_msa(uint8_t *dst, uint8_t *src,
                                 ptrdiff_t stride, int height, int x, int y)
 {
-    int32_t cnt;
-
     av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
 
     if (x && y) {
@@ -1946,12 +1930,7 @@
     } else if (y) {
         avc_chroma_vt_4w_msa(src, dst, stride, y, (8 - y), height);
     } else {
-        for (cnt = height; cnt--;) {
-            *((uint32_t *) dst) = *((uint32_t *) src);
-
-            src += stride;
-            dst += stride;
-        }
+        copy_width4_msa(src, dst, stride, height);
     }
 }
 
@@ -1985,17 +1964,14 @@
 
 
     if (x && y) {
-        avc_chroma_hv_and_aver_dst_8w_msa(src, stride, dst,
-                                          stride, x, (8 - x), y,
+        avc_chroma_hv_and_aver_dst_8w_msa(src, dst, stride, x, (8 - x), y,
                                           (8 - y), height);
     } else if (x) {
-        avc_chroma_hz_and_aver_dst_8w_msa(src, stride, dst,
-                                          stride, x, (8 - x), height);
+        avc_chroma_hz_and_aver_dst_8w_msa(src, dst, stride, x, (8 - x), height);
     } else if (y) {
-        avc_chroma_vt_and_aver_dst_8w_msa(src, stride, dst,
-                                          stride, y, (8 - y), height);
+        avc_chroma_vt_and_aver_dst_8w_msa(src, dst, stride, y, (8 - y), height);
     } else {
-        avg_width8_msa(src, stride, dst, stride, height);
+        avg_width8_msa(src, dst, stride, height);
     }
 }
 
@@ -2005,17 +1981,14 @@
     av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
 
     if (x && y) {
-        avc_chroma_hv_and_aver_dst_4w_msa(src, stride, dst,
-                                          stride, x, (8 - x), y,
+        avc_chroma_hv_and_aver_dst_4w_msa(src, dst, stride, x, (8 - x), y,
                                           (8 - y), height);
     } else if (x) {
-        avc_chroma_hz_and_aver_dst_4w_msa(src, stride, dst,
-                                          stride, x, (8 - x), height);
+        avc_chroma_hz_and_aver_dst_4w_msa(src, dst, stride, x, (8 - x), height);
     } else if (y) {
-        avc_chroma_vt_and_aver_dst_4w_msa(src, stride, dst,
-                                          stride, y, (8 - y), height);
+        avc_chroma_vt_and_aver_dst_4w_msa(src, dst, stride, y, (8 - y), height);
     } else {
-        avg_width4_msa(src, stride, dst, stride, height);
+        avg_width4_msa(src, dst, stride, height);
     }
 }
 
@@ -2027,15 +2000,12 @@
     av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
 
     if (x && y) {
-        avc_chroma_hv_and_aver_dst_2w_msa(src, stride, dst,
-                                          stride, x, (8 - x), y,
+        avc_chroma_hv_and_aver_dst_2w_msa(src, dst, stride, x, (8 - x), y,
                                           (8 - y), height);
     } else if (x) {
-        avc_chroma_hz_and_aver_dst_2w_msa(src, stride, dst,
-                                          stride, x, (8 - x), height);
+        avc_chroma_hz_and_aver_dst_2w_msa(src, dst, stride, x, (8 - x), height);
     } else if (y) {
-        avc_chroma_vt_and_aver_dst_2w_msa(src, stride, dst,
-                                          stride, y, (8 - y), height);
+        avc_chroma_vt_and_aver_dst_2w_msa(src, dst, stride, y, (8 - y), height);
     } else {
         for (cnt = height; cnt--;) {
             dst[0] = (dst[0] + src[0] + 1) >> 1;

diff --git a/libavcodec/mips/h264dsp_mmi.c b/libavcodec/mips/h264dsp_mmi.c
index ac6fa99..ac65a20 100644
--- a/libavcodec/mips/h264dsp_mmi.c
+++ b/libavcodec/mips/h264dsp_mmi.c

@@ -31,7 +31,6 @@
 {
     double ftmp[9];
     DECLARE_VAR_LOW32;
-    DECLARE_VAR_ALL64;
 
     __asm__ volatile (
         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
@@ -59,12 +58,16 @@
         MMI_SWC1(%[ftmp2], %[dst1], 0x00)
         MMI_SWC1(%[ftmp3], %[dst2], 0x00)
         MMI_SWC1(%[ftmp4], %[dst3], 0x00)
+
+        /* memset(src, 0, 32); */
+        "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
+        "gssqc1     %[ftmp0],   %[ftmp0],       0x00(%[src])            \n\t"
+        "gssqc1     %[ftmp0],   %[ftmp0],       0x10(%[src])            \n\t"
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
           RESTRICT_ASM_LOW32
-          RESTRICT_ASM_ALL64
           [ftmp8]"=&f"(ftmp[8])
         : [dst0]"r"(dst),                   [dst1]"r"(dst+stride),
           [dst2]"r"(dst+2*stride),          [dst3]"r"(dst+3*stride),
@@ -72,7 +75,6 @@
         : "memory"
     );
 
-    memset(src, 0, 32);
 }
 
 void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
@@ -80,7 +82,6 @@
     double ftmp[12];
     uint64_t tmp[1];
     DECLARE_VAR_LOW32;
-    DECLARE_VAR_ALL64;
     DECLARE_VAR_ADDRT;
 
     __asm__ volatile (
@@ -152,6 +153,11 @@
         MMI_SWC1(%[ftmp2], %[dst], 0x00)
         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
         MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
+
+        /* memset(block, 0, 32) */
+        "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
+        "gssqc1     %[ftmp0],   %[ftmp0],       0x00(%[block])          \n\t"
+        "gssqc1     %[ftmp0],   %[ftmp0],       0x10(%[block])          \n\t"
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
@@ -159,7 +165,6 @@
           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
           [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
           RESTRICT_ASM_LOW32
-          RESTRICT_ASM_ALL64
           RESTRICT_ASM_ADDRT
           [tmp0]"=&r"(tmp[0])
         : [dst]"r"(dst),                    [block]"r"(block),
@@ -167,7 +172,6 @@
         : "memory"
     );
 
-    memset(block, 0, 32);
 }
 
 void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
@@ -176,7 +180,6 @@
     uint64_t tmp[7];
     mips_reg addr[1];
     DECLARE_VAR_LOW32;
-    DECLARE_VAR_ALL64;
     DECLARE_VAR_ADDRT;
 
     __asm__ volatile (
@@ -617,6 +620,17 @@
         MMI_SWC1(%[ftmp6], %[addr0], 0x00)
         MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
         PTR_ADDIU  "$29,        $29,            0x20                    \n\t"
+
+        /* memset(block, 0, 128) */
+        "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
+        "gssqc1     %[ftmp0],   %[ftmp0],       0x00(%[block])          \n\t"
+        "gssqc1     %[ftmp0],   %[ftmp0],       0x10(%[block])          \n\t"
+        "gssqc1     %[ftmp0],   %[ftmp0],       0x20(%[block])          \n\t"
+        "gssqc1     %[ftmp0],   %[ftmp0],       0x30(%[block])          \n\t"
+        "gssqc1     %[ftmp0],   %[ftmp0],       0x40(%[block])          \n\t"
+        "gssqc1     %[ftmp0],   %[ftmp0],       0x50(%[block])          \n\t"
+        "gssqc1     %[ftmp0],   %[ftmp0],       0x60(%[block])          \n\t"
+        "gssqc1     %[ftmp0],   %[ftmp0],       0x70(%[block])          \n\t"
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
@@ -630,7 +644,6 @@
           [tmp4]"=&r"(tmp[4]),              [tmp5]"=&r"(tmp[5]),
           [tmp6]"=&r"(tmp[6]),
           RESTRICT_ASM_LOW32
-          RESTRICT_ASM_ALL64
           RESTRICT_ASM_ADDRT
           [addr0]"=&r"(addr[0])
         : [dst]"r"(dst),                    [block]"r"(block),
@@ -638,7 +651,6 @@
         : "$29","memory"
     );
 
-    memset(block, 0, 128);
 }
 
 void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)

diff --git a/libavcodec/mips/h264qpel_msa.c b/libavcodec/mips/h264qpel_msa.c
index a22a482..9c779bd 100644
--- a/libavcodec/mips/h264qpel_msa.c
+++ b/libavcodec/mips/h264qpel_msa.c

@@ -21,31 +21,7 @@
 #include "libavutil/mips/generic_macros_msa.h"
 #include "h264dsp_mips.h"
 
-#define AVC_CALC_DPADD_H_6PIX_2COEFF_SH(in0, in1, in2, in3, in4, in5)    \
-( {                                                                      \
-    v4i32 tmp0_m, tmp1_m;                                                \
-    v8i16 out0_m, out1_m, out2_m, out3_m;                                \
-    v8i16 minus5h_m = __msa_ldi_h(-5);                                   \
-    v8i16 plus20h_m = __msa_ldi_h(20);                                   \
-                                                                         \
-    ILVRL_H2_SW(in5, in0, tmp0_m, tmp1_m);                               \
-                                                                         \
-    tmp0_m = __msa_hadd_s_w((v8i16) tmp0_m, (v8i16) tmp0_m);             \
-    tmp1_m = __msa_hadd_s_w((v8i16) tmp1_m, (v8i16) tmp1_m);             \
-                                                                         \
-    ILVRL_H2_SH(in1, in4, out0_m, out1_m);                               \
-    DPADD_SH2_SW(out0_m, out1_m, minus5h_m, minus5h_m, tmp0_m, tmp1_m);  \
-    ILVRL_H2_SH(in2, in3, out2_m, out3_m);                               \
-    DPADD_SH2_SW(out2_m, out3_m, plus20h_m, plus20h_m, tmp0_m, tmp1_m);  \
-                                                                         \
-    SRARI_W2_SW(tmp0_m, tmp1_m, 10);                                     \
-    SAT_SW2_SW(tmp0_m, tmp1_m, 7);                                       \
-    out0_m = __msa_pckev_h((v8i16) tmp1_m, (v8i16) tmp0_m);              \
-                                                                         \
-    out0_m;                                                              \
-} )
-
-static const uint8_t luma_mask_arr[16 * 8] = {
+static const uint8_t luma_mask_arr[16 * 6] __attribute__((aligned(0x40))) = {
     /* 8 width cases */
     0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12,
     1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 9, 7, 10, 8, 11,
@@ -55,9 +31,6 @@
     0, 5, 1, 6, 2, 7, 3, 8, 16, 21, 17, 22, 18, 23, 19, 24,
     1, 4, 2, 5, 3, 6, 4, 7, 17, 20, 18, 21, 19, 22, 20, 23,
     2, 3, 3, 4, 4, 5, 5, 6, 18, 19, 19, 20, 20, 21, 21, 22,
-
-    2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 25,
-    3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26
 };
 
 #define AVC_CALC_DPADD_B_6PIX_2COEFF_SH(vec0, vec1, vec2, vec3, vec4, vec5,  \
@@ -75,60 +48,6 @@
     DPADD_SB2_SH(tmp0_m, tmp1_m, plus20b_m, plus20b_m, out1, out2);          \
 }
 
-#define AVC_CALC_DPADD_B_6PIX_2COEFF_R_SH(vec0, vec1, vec2, vec3, vec4, vec5)  \
-( {                                                                            \
-    v8i16 tmp1_m;                                                              \
-    v16i8 tmp0_m, tmp2_m;                                                      \
-    v16i8 minus5b_m = __msa_ldi_b(-5);                                         \
-    v16i8 plus20b_m = __msa_ldi_b(20);                                         \
-                                                                               \
-    tmp1_m = (v8i16) __msa_ilvr_b((v16i8) vec5, (v16i8) vec0);                 \
-    tmp1_m = __msa_hadd_s_h((v16i8) tmp1_m, (v16i8) tmp1_m);                   \
-                                                                               \
-    ILVR_B2_SB(vec4, vec1, vec3, vec2, tmp0_m, tmp2_m);                        \
-    DPADD_SB2_SH(tmp0_m, tmp2_m, minus5b_m, plus20b_m, tmp1_m, tmp1_m);        \
-                                                                               \
-    tmp1_m;                                                                    \
-} )
-
-#define AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(vec0, vec1, vec2, vec3, vec4, vec5)  \
-( {                                                                            \
-    v4i32 tmp1_m;                                                              \
-    v8i16 tmp2_m, tmp3_m;                                                      \
-    v8i16 minus5h_m = __msa_ldi_h(-5);                                         \
-    v8i16 plus20h_m = __msa_ldi_h(20);                                         \
-                                                                               \
-    tmp1_m = (v4i32) __msa_ilvr_h((v8i16) vec5, (v8i16) vec0);                 \
-    tmp1_m = __msa_hadd_s_w((v8i16) tmp1_m, (v8i16) tmp1_m);                   \
-                                                                               \
-    ILVR_H2_SH(vec1, vec4, vec2, vec3, tmp2_m, tmp3_m);                        \
-    DPADD_SH2_SW(tmp2_m, tmp3_m, minus5h_m, plus20h_m, tmp1_m, tmp1_m);        \
-                                                                               \
-    tmp1_m = __msa_srari_w(tmp1_m, 10);                                        \
-    tmp1_m = __msa_sat_s_w(tmp1_m, 7);                                         \
-                                                                               \
-    tmp2_m = __msa_pckev_h((v8i16) tmp1_m, (v8i16) tmp1_m);                    \
-                                                                               \
-    tmp2_m;                                                                    \
-} )
-
-#define AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,              \
-                                                    mask0, mask1, mask2)     \
-( {                                                                          \
-    v8i16 hz_out_m;                                                          \
-    v16i8 vec0_m, vec1_m, vec2_m;                                            \
-    v16i8 minus5b_m = __msa_ldi_b(-5);                                       \
-    v16i8 plus20b_m = __msa_ldi_b(20);                                       \
-                                                                             \
-    vec0_m = __msa_vshf_b((v16i8) mask0, (v16i8) src1, (v16i8) src0);        \
-    hz_out_m = __msa_hadd_s_h(vec0_m, vec0_m);                               \
-                                                                             \
-    VSHF_B2_SB(src0, src1, src0, src1, mask1, mask2, vec1_m, vec2_m);        \
-    DPADD_SB2_SH(vec1_m, vec2_m, minus5b_m, plus20b_m, hz_out_m, hz_out_m);  \
-                                                                             \
-    hz_out_m;                                                                \
-} )
-
 #define AVC_HORZ_FILTER_SH(in0, in1, mask0, mask1, mask2)  \
 ( {                                                        \
     v8i16 out0_m;                                          \
@@ -171,258 +90,27 @@
     out0_m;                                                         \
 } )
 
-static void avc_luma_mid_4w_msa(const uint8_t *src, int32_t src_stride,
-                                uint8_t *dst, int32_t dst_stride,
-                                int32_t height)
+static void avc_luma_hv_qrt_4x4_msa(const uint8_t *src_x, const uint8_t *src_y,
+                                    uint8_t *dst, int32_t stride)
 {
-    uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4;
-    v16i8 mask0, mask1, mask2;
-    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
-    v8i16 hz_out4, hz_out5, hz_out6, hz_out7, hz_out8;
-    v8i16 dst0, dst1, dst2, dst3;
-
-    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-
-    XORI_B5_128_SB(src0, src1, src2, src3, src4);
-
-    hz_out0 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
-                                                          mask0, mask1, mask2);
-    hz_out2 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src2, src3,
-                                                          mask0, mask1, mask2);
-
-    PCKOD_D2_SH(hz_out0, hz_out0, hz_out2, hz_out2, hz_out1, hz_out3);
-
-    hz_out4 = AVC_HORZ_FILTER_SH(src4, src4, mask0, mask1, mask2);
-
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB4(src, src_stride, src0, src1, src2, src3);
-        src += (4 * src_stride);
-
-        XORI_B4_128_SB(src0, src1, src2, src3);
-
-        hz_out5 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
-                                                              mask0, mask1,
-                                                              mask2);
-        hz_out7 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src2, src3,
-                                                              mask0, mask1,
-                                                              mask2);
-
-        PCKOD_D2_SH(hz_out5, hz_out5, hz_out7, hz_out7, hz_out6, hz_out8);
-
-        dst0 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out0, hz_out1, hz_out2,
-                                                 hz_out3, hz_out4, hz_out5);
-        dst1 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out1, hz_out2, hz_out3,
-                                                 hz_out4, hz_out5, hz_out6);
-        dst2 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out2, hz_out3, hz_out4,
-                                                 hz_out5, hz_out6, hz_out7);
-        dst3 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out3, hz_out4, hz_out5,
-                                                 hz_out6, hz_out7, hz_out8);
-
-        PCKEV_B2_SB(dst1, dst0, dst3, dst2, src0, src1);
-        XORI_B2_128_SB(src0, src1);
-
-        ST4x4_UB(src0, src1, 0, 2, 0, 2, dst, dst_stride);
-
-        dst += (4 * dst_stride);
-
-        hz_out0 = hz_out4;
-        hz_out1 = hz_out5;
-        hz_out2 = hz_out6;
-        hz_out3 = hz_out7;
-        hz_out4 = hz_out8;
-    }
-}
-
-static void avc_luma_mid_8w_msa(const uint8_t *src, int32_t src_stride,
-                                uint8_t *dst, int32_t dst_stride,
-                                int32_t height)
-{
-    uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4;
-    v16i8 mask0, mask1, mask2;
-    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
-    v8i16 hz_out4, hz_out5, hz_out6, hz_out7, hz_out8;
-    v8i16 dst0, dst1, dst2, dst3;
-    v16u8 out0, out1;
-
-    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
-
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-    XORI_B5_128_SB(src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-
-    hz_out0 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
-    hz_out1 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
-    hz_out2 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
-    hz_out3 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
-    hz_out4 = AVC_HORZ_FILTER_SH(src4, src4, mask0, mask1, mask2);
-
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB4(src, src_stride, src0, src1, src2, src3);
-        XORI_B4_128_SB(src0, src1, src2, src3);
-        src += (4 * src_stride);
-
-        hz_out5 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
-        hz_out6 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
-        hz_out7 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
-        hz_out8 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
-        dst0 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out0, hz_out1, hz_out2,
-                                               hz_out3, hz_out4, hz_out5);
-        dst1 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out1, hz_out2, hz_out3,
-                                               hz_out4, hz_out5, hz_out6);
-        dst2 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out2, hz_out3, hz_out4,
-                                               hz_out5, hz_out6, hz_out7);
-        dst3 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out3, hz_out4, hz_out5,
-                                               hz_out6, hz_out7, hz_out8);
-        out0 = PCKEV_XORI128_UB(dst0, dst1);
-        out1 = PCKEV_XORI128_UB(dst2, dst3);
-        ST8x4_UB(out0, out1, dst, dst_stride);
-
-        dst += (4 * dst_stride);
-        hz_out3 = hz_out7;
-        hz_out1 = hz_out5;
-        hz_out5 = hz_out4;
-        hz_out4 = hz_out8;
-        hz_out2 = hz_out6;
-        hz_out0 = hz_out5;
-    }
-}
-
-static void avc_luma_mid_16w_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int32_t height)
-{
-    uint32_t multiple8_cnt;
-
-    for (multiple8_cnt = 2; multiple8_cnt--;) {
-        avc_luma_mid_8w_msa(src, src_stride, dst, dst_stride, height);
-        src += 8;
-        dst += 8;
-    }
-}
-
-static void avc_luma_midh_qrt_4w_msa(const uint8_t *src, int32_t src_stride,
-                                     uint8_t *dst, int32_t dst_stride,
-                                     int32_t height, uint8_t horiz_offset)
-{
-    uint32_t row;
-    v16i8 src0, src1, src2, src3, src4, src5, src6;
-    v8i16 vt_res0, vt_res1, vt_res2, vt_res3;
-    v4i32 hz_res0, hz_res1;
-    v8i16 dst0, dst1;
-    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5;
-    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
-    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
-    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
-    v8i16 minus5h = __msa_ldi_h(-5);
-    v8i16 plus20h = __msa_ldi_h(20);
-    v8i16 zeros = { 0 };
+    const int16_t filt_const0 = 0xfb01;
+    const int16_t filt_const1 = 0x1414;
+    const int16_t filt_const2 = 0x1fb;
     v16u8 out;
+    v16i8 src_hz0, src_hz1, src_hz2, src_hz3, src_vt7, src_vt8;
+    v16i8 src_vt0, src_vt1, src_vt2, src_vt3, src_vt4, src_vt5, src_vt6;
+    v16i8 src_vt10_r, src_vt32_r, src_vt54_r, src_vt76_r;
+    v16i8 mask0, mask1, mask2, filt0, filt1, filt2;
+    v8i16 hz_out0, hz_out1, vt_out0, vt_out1, out0, out1;
 
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-    XORI_B5_128_SB(src0, src1, src2, src3, src4);
-
-    for (row = (height >> 1); row--;) {
-        LD_SB2(src, src_stride, src5, src6);
-        src += (2 * src_stride);
-
-        XORI_B2_128_SB(src5, src6);
-        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src0, src1, src2, src3, src4, src5,
-                                        vt_res0, vt_res1);
-        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src1, src2, src3, src4, src5, src6,
-                                        vt_res2, vt_res3);
-        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1,
-                   mask0, mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
-        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3,
-                   mask0, mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
-        hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
-        DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
-        hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
-        DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
-
-        SRARI_W2_SW(hz_res0, hz_res1, 10);
-        SAT_SW2_SW(hz_res0, hz_res1, 7);
-
-        dst0 = __msa_srari_h(shf_vec2, 5);
-        dst1 = __msa_srari_h(shf_vec5, 5);
-
-        SAT_SH2_SH(dst0, dst1, 7);
-
-        if (horiz_offset) {
-            dst0 = __msa_ilvod_h(zeros, dst0);
-            dst1 = __msa_ilvod_h(zeros, dst1);
-        } else {
-            ILVEV_H2_SH(dst0, zeros, dst1, zeros, dst0, dst1);
-        }
-
-        hz_res0 = __msa_aver_s_w(hz_res0, (v4i32) dst0);
-        hz_res1 = __msa_aver_s_w(hz_res1, (v4i32) dst1);
-        dst0 = __msa_pckev_h((v8i16) hz_res1, (v8i16) hz_res0);
-
-        out = PCKEV_XORI128_UB(dst0, dst0);
-        ST4x2_UB(out, dst, dst_stride);
-
-        dst += (2 * dst_stride);
-
-        src0 = src2;
-        src1 = src3;
-        src2 = src4;
-        src3 = src5;
-        src4 = src6;
-    }
-}
-
-static void avc_luma_midh_qrt_8w_msa(const uint8_t *src, int32_t src_stride,
-                                     uint8_t *dst, int32_t dst_stride,
-                                     int32_t height, uint8_t horiz_offset)
-{
-    uint32_t multiple8_cnt;
-
-    for (multiple8_cnt = 2; multiple8_cnt--;) {
-        avc_luma_midh_qrt_4w_msa(src, src_stride, dst, dst_stride, height,
-                                 horiz_offset);
-
-        src += 4;
-        dst += 4;
-    }
-}
-
-static void avc_luma_midh_qrt_16w_msa(const uint8_t *src, int32_t src_stride,
-                                      uint8_t *dst, int32_t dst_stride,
-                                      int32_t height, uint8_t horiz_offset)
-{
-    uint32_t multiple8_cnt;
-
-    for (multiple8_cnt = 4; multiple8_cnt--;) {
-        avc_luma_midh_qrt_4w_msa(src, src_stride, dst, dst_stride, height,
-                                 horiz_offset);
-
-        src += 4;
-        dst += 4;
-    }
-}
-
-static void avc_luma_hv_qrt_4w_msa(const uint8_t *src_x, const uint8_t *src_y,
-                                   int32_t src_stride, uint8_t *dst,
-                                   int32_t dst_stride, int32_t height)
-{
-    uint32_t loop_cnt;
-    v16i8 src_hz0, src_hz1, src_hz2, src_hz3;
-    v16i8 src_vt0, src_vt1, src_vt2, src_vt3, src_vt4;
-    v16i8 src_vt5, src_vt6, src_vt7, src_vt8;
-    v16i8 mask0, mask1, mask2;
-    v8i16 hz_out0, hz_out1, vert_out0, vert_out1;
-    v8i16 out0, out1;
-    v16u8 out;
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
 
     LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
 
-    LD_SB5(src_y, src_stride, src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
-    src_y += (5 * src_stride);
+    LD_SB5(src_y, stride, src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
+    src_y += (5 * stride);
 
     src_vt0 = (v16i8) __msa_insve_w((v4i32) src_vt0, 1, (v4i32) src_vt1);
     src_vt1 = (v16i8) __msa_insve_w((v4i32) src_vt1, 1, (v4i32) src_vt2);
@@ -431,1329 +119,15 @@
 
     XORI_B4_128_SB(src_vt0, src_vt1, src_vt2, src_vt3);
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB4(src_x, src_stride, src_hz0, src_hz1, src_hz2, src_hz3);
-        src_x += (4 * src_stride);
-
-        XORI_B4_128_SB(src_hz0, src_hz1, src_hz2, src_hz3);
-
-        hz_out0 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src_hz0,
-                                                              src_hz1, mask0,
-                                                              mask1, mask2);
-        hz_out1 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src_hz2,
-                                                              src_hz3, mask0,
-                                                              mask1, mask2);
-
-        SRARI_H2_SH(hz_out0, hz_out1, 5);
-        SAT_SH2_SH(hz_out0, hz_out1, 7);
-
-        LD_SB4(src_y, src_stride, src_vt5, src_vt6, src_vt7, src_vt8);
-        src_y += (4 * src_stride);
-
-        src_vt4 = (v16i8) __msa_insve_w((v4i32) src_vt4, 1, (v4i32) src_vt5);
-        src_vt5 = (v16i8) __msa_insve_w((v4i32) src_vt5, 1, (v4i32) src_vt6);
-        src_vt6 = (v16i8) __msa_insve_w((v4i32) src_vt6, 1, (v4i32) src_vt7);
-        src_vt7 = (v16i8) __msa_insve_w((v4i32) src_vt7, 1, (v4i32) src_vt8);
-
-        XORI_B4_128_SB(src_vt4, src_vt5, src_vt6, src_vt7);
-
-        /* filter calc */
-        vert_out0 = AVC_CALC_DPADD_B_6PIX_2COEFF_R_SH(src_vt0, src_vt1,
-                                                      src_vt2, src_vt3,
-                                                      src_vt4, src_vt5);
-        vert_out1 = AVC_CALC_DPADD_B_6PIX_2COEFF_R_SH(src_vt2, src_vt3,
-                                                      src_vt4, src_vt5,
-                                                      src_vt6, src_vt7);
-
-        SRARI_H2_SH(vert_out0, vert_out1, 5);
-        SAT_SH2_SH(vert_out0, vert_out1, 7);
-
-        out0 = __msa_srari_h((hz_out0 + vert_out0), 1);
-        out1 = __msa_srari_h((hz_out1 + vert_out1), 1);
-
-        SAT_SH2_SH(out0, out1, 7);
-        out = PCKEV_XORI128_UB(out0, out1);
-        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
-        dst += (4 * dst_stride);
-
-        src_vt3 = src_vt7;
-        src_vt1 = src_vt5;
-        src_vt0 = src_vt4;
-        src_vt4 = src_vt8;
-        src_vt2 = src_vt6;
-    }
-}
-
-static void avc_luma_hv_qrt_8w_msa(const uint8_t *src_x, const uint8_t *src_y,
-                                   int32_t src_stride, uint8_t *dst,
-                                   int32_t dst_stride, int32_t height)
-{
-    uint32_t loop_cnt;
-    v16i8 src_hz0, src_hz1, src_hz2, src_hz3;
-    v16i8 src_vt0, src_vt1, src_vt2, src_vt3, src_vt4;
-    v16i8 src_vt5, src_vt6, src_vt7, src_vt8;
-    v16i8 mask0, mask1, mask2;
-    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
-    v8i16 vert_out0, vert_out1, vert_out2, vert_out3;
-    v8i16 out0, out1, out2, out3;
-    v16u8 tmp0, tmp1;
-
-    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
-    LD_SB5(src_y, src_stride, src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
-    src_y += (5 * src_stride);
-
-    src_vt0 = (v16i8) __msa_insve_d((v2i64) src_vt0, 1, (v2i64) src_vt1);
-    src_vt1 = (v16i8) __msa_insve_d((v2i64) src_vt1, 1, (v2i64) src_vt2);
-    src_vt2 = (v16i8) __msa_insve_d((v2i64) src_vt2, 1, (v2i64) src_vt3);
-    src_vt3 = (v16i8) __msa_insve_d((v2i64) src_vt3, 1, (v2i64) src_vt4);
-
-    XORI_B4_128_SB(src_vt0, src_vt1, src_vt2, src_vt3);
-
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB4(src_x, src_stride, src_hz0, src_hz1, src_hz2, src_hz3);
-        XORI_B4_128_SB(src_hz0, src_hz1, src_hz2, src_hz3);
-        src_x += (4 * src_stride);
-
-        hz_out0 = AVC_HORZ_FILTER_SH(src_hz0, src_hz0, mask0, mask1, mask2);
-        hz_out1 = AVC_HORZ_FILTER_SH(src_hz1, src_hz1, mask0, mask1, mask2);
-        hz_out2 = AVC_HORZ_FILTER_SH(src_hz2, src_hz2, mask0, mask1, mask2);
-        hz_out3 = AVC_HORZ_FILTER_SH(src_hz3, src_hz3, mask0, mask1, mask2);
-
-        SRARI_H4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 5);
-        SAT_SH4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 7);
-
-        LD_SB4(src_y, src_stride, src_vt5, src_vt6, src_vt7, src_vt8);
-        src_y += (4 * src_stride);
-
-        src_vt4 = (v16i8) __msa_insve_d((v2i64) src_vt4, 1, (v2i64) src_vt5);
-        src_vt5 = (v16i8) __msa_insve_d((v2i64) src_vt5, 1, (v2i64) src_vt6);
-        src_vt6 = (v16i8) __msa_insve_d((v2i64) src_vt6, 1, (v2i64) src_vt7);
-        src_vt7 = (v16i8) __msa_insve_d((v2i64) src_vt7, 1, (v2i64) src_vt8);
-
-        XORI_B4_128_SB(src_vt4, src_vt5, src_vt6, src_vt7);
-
-        /* filter calc */
-        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src_vt0, src_vt1, src_vt2, src_vt3,
-                                        src_vt4, src_vt5, vert_out0, vert_out1);
-        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src_vt2, src_vt3, src_vt4, src_vt5,
-                                        src_vt6, src_vt7, vert_out2, vert_out3);
-
-        SRARI_H4_SH(vert_out0, vert_out1, vert_out2, vert_out3, 5);
-        SAT_SH4_SH(vert_out0, vert_out1, vert_out2, vert_out3, 7);
-
-        out0 = __msa_srari_h((hz_out0 + vert_out0), 1);
-        out1 = __msa_srari_h((hz_out1 + vert_out1), 1);
-        out2 = __msa_srari_h((hz_out2 + vert_out2), 1);
-        out3 = __msa_srari_h((hz_out3 + vert_out3), 1);
-
-        SAT_SH4_SH(out0, out1, out2, out3, 7);
-        tmp0 = PCKEV_XORI128_UB(out0, out1);
-        tmp1 = PCKEV_XORI128_UB(out2, out3);
-        ST8x4_UB(tmp0, tmp1, dst, dst_stride);
-
-        dst += (4 * dst_stride);
-        src_vt3 = src_vt7;
-        src_vt1 = src_vt5;
-        src_vt5 = src_vt4;
-        src_vt4 = src_vt8;
-        src_vt2 = src_vt6;
-        src_vt0 = src_vt5;
-    }
-}
-
-static void avc_luma_hv_qrt_16w_msa(const uint8_t *src_x, const uint8_t *src_y,
-                                    int32_t src_stride, uint8_t *dst,
-                                    int32_t dst_stride, int32_t height)
-{
-    uint32_t multiple8_cnt;
-
-    for (multiple8_cnt = 2; multiple8_cnt--;) {
-        avc_luma_hv_qrt_8w_msa(src_x, src_y, src_stride, dst, dst_stride,
-                               height);
-
-        src_x += 8;
-        src_y += 8;
-        dst += 8;
-    }
-}
-
-static void avc_luma_hz_and_aver_dst_4x4_msa(const uint8_t *src,
-                                             int32_t src_stride,
-                                             uint8_t *dst, int32_t dst_stride)
-{
-    v16i8 src0, src1, src2, src3;
-    v16u8 dst0, dst1, dst2, dst3, res;
-    v8i16 res0, res1;
-    v16i8 mask0, mask1, mask2;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v16i8 minus5b = __msa_ldi_b(-5);
-    v16i8 plus20b = __msa_ldi_b(20);
-
-    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
-    LD_SB4(src, src_stride, src0, src1, src2, src3);
-
-    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-    XORI_B4_128_SB(src0, src1, src2, src3);
-    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0, vec1);
-    HADD_SB2_SH(vec0, vec1, res0, res1);
-    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2, vec3);
-    DPADD_SB2_SH(vec2, vec3, minus5b, minus5b, res0, res1);
-    VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4, vec5);
-    DPADD_SB2_SH(vec4, vec5, plus20b, plus20b, res0, res1);
-    SRARI_H2_SH(res0, res1, 5);
-    SAT_SH2_SH(res0, res1, 7);
-    res = PCKEV_XORI128_UB(res0, res1);
-    ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
-
-    dst0 = (v16u8) __msa_pckev_d((v2i64) dst1, (v2i64) dst0);
-    res = __msa_aver_u_b(res, dst0);
-
-    ST4x4_UB(res, res, 0, 1, 2, 3, dst, dst_stride);
-}
-
-static void avc_luma_hz_and_aver_dst_8x8_msa(const uint8_t *src,
-                                             int32_t src_stride,
-                                             uint8_t *dst, int32_t dst_stride)
-{
-    uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3;
-    v16u8 dst0, dst1, dst2, dst3;
-    v8i16 res0, res1, res2, res3;
-    v16i8 mask0, mask1, mask2;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v16i8 vec6, vec7, vec8, vec9, vec10, vec11;
-    v16i8 minus5b = __msa_ldi_b(-5);
-    v16i8 plus20b = __msa_ldi_b(20);
-
-    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
-
-    for (loop_cnt = 2; loop_cnt--;) {
-        LD_SB4(src, src_stride, src0, src1, src2, src3);
-        src += (4 * src_stride);
-
-        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-
-        XORI_B4_128_SB(src0, src1, src2, src3);
-        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
-        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
-        HADD_SB4_SH(vec0, vec1, vec2, vec3, res0, res1, res2, res3);
-        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4, vec5);
-        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6, vec7);
-        DPADD_SB4_SH(vec4, vec5, vec6, vec7, minus5b, minus5b, minus5b, minus5b,
-                     res0, res1, res2, res3);
-        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec8, vec9);
-        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec10, vec11);
-        DPADD_SB4_SH(vec8, vec9, vec10, vec11, plus20b, plus20b, plus20b,
-                     plus20b, res0, res1, res2, res3);
-        SRARI_H4_SH(res0, res1, res2, res3, 5);
-        SAT_SH4_SH(res0, res1, res2, res3, 7);
-        ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
-        CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1,
-                                dst, dst_stride);
-
-        dst += (4 * dst_stride);
-    }
-}
-
-static void avc_luma_hz_and_aver_dst_16x16_msa(const uint8_t *src,
-                                               int32_t src_stride,
-                                               uint8_t *dst, int32_t dst_stride)
-{
-    uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
-    v16u8 dst0, dst1, dst2, dst3;
-    v16i8 mask0, mask1, mask2;
-    v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v16i8 vec6, vec7, vec8, vec9, vec10, vec11;
-    v16i8 minus5b = __msa_ldi_b(-5);
-    v16i8 plus20b = __msa_ldi_b(20);
-
-    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
-
-    for (loop_cnt = 4; loop_cnt--;) {
-        LD_SB2(src, 8, src0, src1);
-        src += src_stride;
-        LD_SB2(src, 8, src2, src3);
-        src += src_stride;
-
-        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-
-        XORI_B4_128_SB(src0, src1, src2, src3);
-        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec3);
-        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec6, vec9);
-        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec1, vec4);
-        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec7, vec10);
-        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec2, vec5);
-        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec8, vec11);
-        HADD_SB4_SH(vec0, vec3, vec6, vec9, res0, res1, res2, res3);
-        DPADD_SB4_SH(vec1, vec4, vec7, vec10, minus5b, minus5b, minus5b,
-                     minus5b, res0, res1, res2, res3);
-        DPADD_SB4_SH(vec2, vec5, vec8, vec11, plus20b, plus20b, plus20b,
-                     plus20b, res0, res1, res2, res3);
-        LD_SB2(src, 8, src4, src5);
-        src += src_stride;
-        LD_SB2(src, 8, src6, src7);
-        src += src_stride;
-        XORI_B4_128_SB(src4, src5, src6, src7);
-        VSHF_B2_SB(src4, src4, src5, src5, mask0, mask0, vec0, vec3);
-        VSHF_B2_SB(src6, src6, src7, src7, mask0, mask0, vec6, vec9);
-        VSHF_B2_SB(src4, src4, src5, src5, mask1, mask1, vec1, vec4);
-        VSHF_B2_SB(src6, src6, src7, src7, mask1, mask1, vec7, vec10);
-        VSHF_B2_SB(src4, src4, src5, src5, mask2, mask2, vec2, vec5);
-        VSHF_B2_SB(src6, src6, src7, src7, mask2, mask2, vec8, vec11);
-        HADD_SB4_SH(vec0, vec3, vec6, vec9, res4, res5, res6, res7);
-        DPADD_SB4_SH(vec1, vec4, vec7, vec10, minus5b, minus5b, minus5b,
-                     minus5b, res4, res5, res6, res7);
-        DPADD_SB4_SH(vec2, vec5, vec8, vec11, plus20b, plus20b, plus20b,
-                     plus20b, res4, res5, res6, res7);
-        SRARI_H4_SH(res0, res1, res2, res3, 5);
-        SRARI_H4_SH(res4, res5, res6, res7, 5);
-        SAT_SH4_SH(res0, res1, res2, res3, 7);
-        SAT_SH4_SH(res4, res5, res6, res7, 7);
-        PCKEV_B4_SB(res1, res0, res3, res2, res5, res4, res7, res6,
-                    vec0, vec1, vec2, vec3);
-        XORI_B4_128_SB(vec0, vec1, vec2, vec3);
-        AVER_UB4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
-                    dst0, dst1, dst2, dst3);
-        ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
-        dst += (4 * dst_stride);
-    }
-}
-
-static void avc_luma_hz_qrt_and_aver_dst_4x4_msa(const uint8_t *src,
-                                                 int32_t src_stride,
-                                                 uint8_t *dst,
-                                                 int32_t dst_stride,
-                                                 uint8_t hor_offset)
-{
-    uint8_t slide;
-    v16i8 src0, src1, src2, src3;
-    v16u8 dst0, dst1, dst2, dst3;
-    v16i8 mask0, mask1, mask2;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 out0, out1;
-    v16i8 minus5b = __msa_ldi_b(-5);
-    v16i8 plus20b = __msa_ldi_b(20);
-    v16u8 res0, res1;
-
-    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
-
-    if (hor_offset) {
-        slide = 3;
-    } else {
-        slide = 2;
-    }
-
-    LD_SB4(src, src_stride, src0, src1, src2, src3);
-    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-
-    XORI_B4_128_SB(src0, src1, src2, src3);
-    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0, vec1);
-    HADD_SB2_SH(vec0, vec1, out0, out1);
-    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2, vec3);
-    DPADD_SB2_SH(vec2, vec3, minus5b, minus5b, out0, out1);
-    VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4, vec5);
-    DPADD_SB2_SH(vec4, vec5, plus20b, plus20b, out0, out1);
-    SRARI_H2_SH(out0, out1, 5);
-    SAT_SH2_SH(out0, out1, 7);
-
-    PCKEV_B2_UB(out0, out0, out1, out1, res0, res1);
-
-    src0 = __msa_sld_b(src0, src0, slide);
-    src1 = __msa_sld_b(src1, src1, slide);
-    src2 = __msa_sld_b(src2, src2, slide);
-    src3 = __msa_sld_b(src3, src3, slide);
-    src0 = (v16i8) __msa_insve_w((v4i32) src0, 1, (v4i32) src1);
-    src1 = (v16i8) __msa_insve_w((v4i32) src2, 1, (v4i32) src3);
-    res0 = (v16u8) __msa_aver_s_b((v16i8) res0, src0);
-    res1 = (v16u8) __msa_aver_s_b((v16i8) res1, src1);
-
-    XORI_B2_128_UB(res0, res1);
-
-    dst0 = (v16u8) __msa_insve_w((v4i32) dst0, 1, (v4i32) dst1);
-    dst1 = (v16u8) __msa_insve_w((v4i32) dst2, 1, (v4i32) dst3);
-
-    AVER_UB2_UB(res0, dst0, res1, dst1, dst0, dst1);
-
-    ST4x4_UB(dst0, dst1, 0, 1, 0, 1, dst, dst_stride);
-}
-
-static void avc_luma_hz_qrt_and_aver_dst_8x8_msa(const uint8_t *src,
-                                                 int32_t src_stride,
-                                                 uint8_t *dst,
-                                                 int32_t dst_stride,
-                                                 uint8_t hor_offset)
-{
-    uint8_t slide;
-    uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3;
-    v16i8 mask0, mask1, mask2;
-    v16u8 dst0, dst1, dst2, dst3;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v16i8 vec6, vec7, vec8, vec9, vec10, vec11;
-    v8i16 out0, out1, out2, out3;
-    v16i8 minus5b = __msa_ldi_b(-5);
-    v16i8 plus20b = __msa_ldi_b(20);
-    v16i8 res0, res1, res2, res3;
-
-    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
-
-    if (hor_offset) {
-        slide = 3;
-    } else {
-        slide = 2;
-    }
-
-    for (loop_cnt = 2; loop_cnt--;) {
-        LD_SB4(src, src_stride, src0, src1, src2, src3);
-        src += (4 * src_stride);
-
-        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-
-        XORI_B4_128_SB(src0, src1, src2, src3);
-        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
-        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
-        HADD_SB4_SH(vec0, vec1, vec2, vec3, out0, out1, out2, out3);
-        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4, vec5);
-        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6, vec7);
-        DPADD_SB4_SH(vec4, vec5, vec6, vec7, minus5b, minus5b, minus5b, minus5b,
-                     out0, out1, out2, out3);
-        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec8, vec9);
-        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec10, vec11);
-        DPADD_SB4_SH(vec8, vec9, vec10, vec11, plus20b, plus20b, plus20b,
-                     plus20b, out0, out1, out2, out3);
-
-        src0 = __msa_sld_b(src0, src0, slide);
-        src1 = __msa_sld_b(src1, src1, slide);
-        src2 = __msa_sld_b(src2, src2, slide);
-        src3 = __msa_sld_b(src3, src3, slide);
-
-        SRARI_H4_SH(out0, out1, out2, out3, 5);
-        SAT_SH4_SH(out0, out1, out2, out3, 7);
-
-        PCKEV_B4_SB(out0, out0, out1, out1, out2, out2, out3, out3,
-                    res0, res1, res2, res3);
-
-        res0 = __msa_aver_s_b(res0, src0);
-        res1 = __msa_aver_s_b(res1, src1);
-        res2 = __msa_aver_s_b(res2, src2);
-        res3 = __msa_aver_s_b(res3, src3);
-
-        XORI_B4_128_SB(res0, res1, res2, res3);
-        AVER_ST8x4_UB(res0, dst0, res1, dst1, res2, dst2, res3, dst3,
-                      dst, dst_stride);
-
-        dst += (4 * dst_stride);
-    }
-}
-
-static void avc_luma_hz_qrt_and_aver_dst_16x16_msa(const uint8_t *src,
-                                                   int32_t src_stride,
-                                                   uint8_t *dst,
-                                                   int32_t dst_stride,
-                                                   uint8_t hor_offset)
-{
-    uint32_t loop_cnt;
-    v16i8 out0, out1;
-    v16i8 src0, src1, src2, src3;
-    v16i8 mask0, mask1, mask2, vshf;
-    v16u8 dst0, dst1;
-    v8i16 res0, res1, res2, res3;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v16i8 vec6, vec7, vec8, vec9, vec10, vec11;
-    v16i8 minus5b = __msa_ldi_b(-5);
-    v16i8 plus20b = __msa_ldi_b(20);
-
-    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
-
-    if (hor_offset) {
-        vshf = LD_SB(&luma_mask_arr[16 + 96]);
-    } else {
-        vshf = LD_SB(&luma_mask_arr[96]);
-    }
-
-    for (loop_cnt = 8; loop_cnt--;) {
-        LD_SB2(src, 8, src0, src1);
-        src += src_stride;
-        LD_SB2(src, 8, src2, src3);
-        src += src_stride;
-
-        LD_UB2(dst, dst_stride, dst0, dst1);
-
-        XORI_B4_128_SB(src0, src1, src2, src3);
-        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec3);
-        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec6, vec9);
-        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec1, vec4);
-        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec7, vec10);
-        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec2, vec5);
-        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec8, vec11);
-        HADD_SB4_SH(vec0, vec3, vec6, vec9, res0, res1, res2, res3);
-        DPADD_SB4_SH(vec1, vec4, vec7, vec10, minus5b, minus5b, minus5b,
-                     minus5b, res0, res1, res2, res3);
-        DPADD_SB4_SH(vec2, vec5, vec8, vec11, plus20b, plus20b, plus20b,
-                     plus20b, res0, res1, res2, res3);
-        VSHF_B2_SB(src0, src1, src2, src3, vshf, vshf, src0, src2);
-        SRARI_H4_SH(res0, res1, res2, res3, 5);
-        SAT_SH4_SH(res0, res1, res2, res3, 7);
-        PCKEV_B2_SB(res1, res0, res3, res2, out0, out1);
-
-        out0 = __msa_aver_s_b(out0, src0);
-        out1 = __msa_aver_s_b(out1, src2);
-
-        XORI_B2_128_SB(out0, out1);
-        AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
-        ST_UB2(dst0, dst1, dst, dst_stride);
-        dst += (2 * dst_stride);
-    }
-}
-
-static void avc_luma_vt_and_aver_dst_4x4_msa(const uint8_t *src,
-                                             int32_t src_stride,
-                                             uint8_t *dst, int32_t dst_stride)
-{
-    int16_t filt_const0 = 0xfb01;
-    int16_t filt_const1 = 0x1414;
-    int16_t filt_const2 = 0x1fb;
-    v16u8 dst0, dst1, dst2, dst3;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
-    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
-    v16i8 src87_r, src2110, src4332, src6554, src8776;
-    v8i16 out10, out32;
-    v16i8 filt0, filt1, filt2;
-    v16u8 res;
-
-    filt0 = (v16i8) __msa_fill_h(filt_const0);
-    filt1 = (v16i8) __msa_fill_h(filt_const1);
-    filt2 = (v16i8) __msa_fill_h(filt_const2);
-
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-
-    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
-               src10_r, src21_r, src32_r, src43_r);
-    ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
-    XORI_B2_128_SB(src2110, src4332);
-    LD_SB4(src, src_stride, src5, src6, src7, src8);
-    ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
-               src54_r, src65_r, src76_r, src87_r);
-    ILVR_D2_SB(src65_r, src54_r, src87_r, src76_r, src6554, src8776);
-    XORI_B2_128_SB(src6554, src8776);
-    out10 = DPADD_SH3_SH(src2110, src4332, src6554, filt0, filt1, filt2);
-    out32 = DPADD_SH3_SH(src4332, src6554, src8776, filt0, filt1, filt2);
-    SRARI_H2_SH(out10, out32, 5);
-    SAT_SH2_SH(out10, out32, 7);
-    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-    res = PCKEV_XORI128_UB(out10, out32);
-
-    ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
-
-    dst0 = (v16u8) __msa_pckev_d((v2i64) dst1, (v2i64) dst0);
-    dst0 = __msa_aver_u_b(res, dst0);
-
-    ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, dst_stride);
-}
-
-static void avc_luma_vt_and_aver_dst_8x8_msa(const uint8_t *src,
-                                             int32_t src_stride,
-                                             uint8_t *dst, int32_t dst_stride)
-{
-    int32_t loop_cnt;
-    int16_t filt_const0 = 0xfb01;
-    int16_t filt_const1 = 0x1414;
-    int16_t filt_const2 = 0x1fb;
-    v16u8 dst0, dst1, dst2, dst3;
-    v16i8 src0, src1, src2, src3, src4, src7, src8, src9, src10;
-    v16i8 src10_r, src32_r, src76_r, src98_r;
-    v16i8 src21_r, src43_r, src87_r, src109_r;
-    v8i16 out0, out1, out2, out3;
-    v16i8 filt0, filt1, filt2;
-
-    filt0 = (v16i8) __msa_fill_h(filt_const0);
-    filt1 = (v16i8) __msa_fill_h(filt_const1);
-    filt2 = (v16i8) __msa_fill_h(filt_const2);
-
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-
-    XORI_B5_128_SB(src0, src1, src2, src3, src4);
-    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
-               src10_r, src21_r, src32_r, src43_r);
-
-    for (loop_cnt = 2; loop_cnt--;) {
-        LD_SB4(src, src_stride, src7, src8, src9, src10);
-        src += (4 * src_stride);
-
-        XORI_B4_128_SB(src7, src8, src9, src10);
-        ILVR_B4_SB(src7, src4, src8, src7, src9, src8, src10, src9,
-                   src76_r, src87_r, src98_r, src109_r);
-        out0 = DPADD_SH3_SH(src10_r, src32_r, src76_r, filt0, filt1, filt2);
-        out1 = DPADD_SH3_SH(src21_r, src43_r, src87_r, filt0, filt1, filt2);
-        out2 = DPADD_SH3_SH(src32_r, src76_r, src98_r, filt0, filt1, filt2);
-        out3 = DPADD_SH3_SH(src43_r, src87_r, src109_r, filt0, filt1, filt2);
-        SRARI_H4_SH(out0, out1, out2, out3, 5);
-        SAT_SH4_SH(out0, out1, out2, out3, 7);
-        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-        ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
-        CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1,
-                                dst, dst_stride);
-        dst += (4 * dst_stride);
-
-        src10_r = src76_r;
-        src32_r = src98_r;
-        src21_r = src87_r;
-        src43_r = src109_r;
-        src4 = src10;
-    }
-}
-
-static void avc_luma_vt_and_aver_dst_16x16_msa(const uint8_t *src,
-                                               int32_t src_stride,
-                                               uint8_t *dst, int32_t dst_stride)
-{
-    int32_t loop_cnt;
-    int16_t filt_const0 = 0xfb01;
-    int16_t filt_const1 = 0x1414;
-    int16_t filt_const2 = 0x1fb;
-    v16u8 dst0, dst1, dst2, dst3;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
-    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
-    v16i8 src87_r, src10_l, src32_l, src54_l, src76_l, src21_l, src43_l;
-    v16i8 src65_l, src87_l;
-    v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
-    v16i8 filt0, filt1, filt2;
-    v16u8 res0, res1, res2, res3;
-
-    filt0 = (v16i8) __msa_fill_h(filt_const0);
-    filt1 = (v16i8) __msa_fill_h(filt_const1);
-    filt2 = (v16i8) __msa_fill_h(filt_const2);
-
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-
-    XORI_B5_128_SB(src0, src1, src2, src3, src4);
-    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
-               src10_r, src21_r, src32_r, src43_r);
-    ILVL_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
-               src10_l, src21_l, src32_l, src43_l);
-
-    for (loop_cnt = 4; loop_cnt--;) {
-        LD_SB4(src, src_stride, src5, src6, src7, src8);
-        src += (4 * src_stride);
-
-        XORI_B4_128_SB(src5, src6, src7, src8);
-        ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
-                   src54_r, src65_r, src76_r, src87_r);
-        ILVL_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
-                   src54_l, src65_l, src76_l, src87_l);
-        out0_r = DPADD_SH3_SH(src10_r, src32_r, src54_r, filt0, filt1, filt2);
-        out1_r = DPADD_SH3_SH(src21_r, src43_r, src65_r, filt0, filt1, filt2);
-        out2_r = DPADD_SH3_SH(src32_r, src54_r, src76_r, filt0, filt1, filt2);
-        out3_r = DPADD_SH3_SH(src43_r, src65_r, src87_r, filt0, filt1, filt2);
-        out0_l = DPADD_SH3_SH(src10_l, src32_l, src54_l, filt0, filt1, filt2);
-        out1_l = DPADD_SH3_SH(src21_l, src43_l, src65_l, filt0, filt1, filt2);
-        out2_l = DPADD_SH3_SH(src32_l, src54_l, src76_l, filt0, filt1, filt2);
-        out3_l = DPADD_SH3_SH(src43_l, src65_l, src87_l, filt0, filt1, filt2);
-        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
-        SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 5);
-        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
-        SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
-        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-        PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
-                    out3_r, res0, res1, res2, res3);
-        XORI_B4_128_UB(res0, res1, res2, res3);
-        AVER_UB4_UB(res0, dst0, res1, dst1, res2, dst2, res3, dst3,
-                    res0, res1, res2, res3);
-        ST_UB4(res0, res1, res2, res3, dst, dst_stride);
-        dst += (4 * dst_stride);
-
-        src10_r = src54_r;
-        src32_r = src76_r;
-        src21_r = src65_r;
-        src43_r = src87_r;
-        src10_l = src54_l;
-        src32_l = src76_l;
-        src21_l = src65_l;
-        src43_l = src87_l;
-        src4 = src8;
-    }
-}
-
-static void avc_luma_vt_qrt_and_aver_dst_4x4_msa(const uint8_t *src,
-                                                 int32_t src_stride,
-                                                 uint8_t *dst,
-                                                 int32_t dst_stride,
-                                                 uint8_t ver_offset)
-{
-    int16_t filt_const0 = 0xfb01;
-    int16_t filt_const1 = 0x1414;
-    int16_t filt_const2 = 0x1fb;
-    v16u8 dst0, dst1, dst2, dst3;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
-    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
-    v16i8 src87_r, src2110, src4332, src6554, src8776;
-    v8i16 out10, out32;
-    v16i8 filt0, filt1, filt2;
-    v16u8 res;
-
-    filt0 = (v16i8) __msa_fill_h(filt_const0);
-    filt1 = (v16i8) __msa_fill_h(filt_const1);
-    filt2 = (v16i8) __msa_fill_h(filt_const2);
-
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-
-    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
-               src10_r, src21_r, src32_r, src43_r);
-    ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
-    XORI_B2_128_SB(src2110, src4332);
-    LD_SB4(src, src_stride, src5, src6, src7, src8);
-    ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
-               src54_r, src65_r, src76_r, src87_r);
-    ILVR_D2_SB(src65_r, src54_r, src87_r, src76_r, src6554, src8776);
-    XORI_B2_128_SB(src6554, src8776);
-    out10 = DPADD_SH3_SH(src2110, src4332, src6554, filt0, filt1, filt2);
-    out32 = DPADD_SH3_SH(src4332, src6554, src8776, filt0, filt1, filt2);
-    SRARI_H2_SH(out10, out32, 5);
-    SAT_SH2_SH(out10, out32, 7);
-    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-    res = PCKEV_XORI128_UB(out10, out32);
-
-    if (ver_offset) {
-        src32_r = (v16i8) __msa_insve_w((v4i32) src3, 1, (v4i32) src4);
-        src54_r = (v16i8) __msa_insve_w((v4i32) src5, 1, (v4i32) src6);
-    } else {
-        src32_r = (v16i8) __msa_insve_w((v4i32) src2, 1, (v4i32) src3);
-        src54_r = (v16i8) __msa_insve_w((v4i32) src4, 1, (v4i32) src5);
-    }
-
-    src32_r = (v16i8) __msa_insve_d((v2i64) src32_r, 1, (v2i64) src54_r);
-    res = __msa_aver_u_b(res, (v16u8) src32_r);
-
-    ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
-
-    dst0 = (v16u8) __msa_pckev_d((v2i64) dst1, (v2i64) dst0);
-    dst0 = __msa_aver_u_b(res, dst0);
-
-    ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, dst_stride);
-}
-
-static void avc_luma_vt_qrt_and_aver_dst_8x8_msa(const uint8_t *src,
-                                                 int32_t src_stride,
-                                                 uint8_t *dst,
-                                                 int32_t dst_stride,
-                                                 uint8_t ver_offset)
-{
-    int32_t loop_cnt;
-    int16_t filt_const0 = 0xfb01;
-    int16_t filt_const1 = 0x1414;
-    int16_t filt_const2 = 0x1fb;
-    v16u8 dst0, dst1, dst2, dst3;
-    v16i8 src0, src1, src2, src3, src4, src7, src8, src9, src10;
-    v16i8 src10_r, src32_r, src76_r, src98_r;
-    v16i8 src21_r, src43_r, src87_r, src109_r;
-    v8i16 out0_r, out1_r, out2_r, out3_r;
-    v16i8 res0, res1;
-    v16u8 vec0, vec1;
-    v16i8 filt0, filt1, filt2;
-
-    filt0 = (v16i8) __msa_fill_h(filt_const0);
-    filt1 = (v16i8) __msa_fill_h(filt_const1);
-    filt2 = (v16i8) __msa_fill_h(filt_const2);
-
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-
-    XORI_B5_128_SB(src0, src1, src2, src3, src4);
-    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
-               src10_r, src21_r, src32_r, src43_r);
-
-    for (loop_cnt = 2; loop_cnt--;) {
-        LD_SB4(src, src_stride, src7, src8, src9, src10);
-        src += (4 * src_stride);
-
-        XORI_B4_128_SB(src7, src8, src9, src10);
-        ILVR_B4_SB(src7, src4, src8, src7, src9, src8, src10, src9,
-                   src76_r, src87_r, src98_r, src109_r);
-        out0_r = DPADD_SH3_SH(src10_r, src32_r, src76_r, filt0, filt1, filt2);
-        out1_r = DPADD_SH3_SH(src21_r, src43_r, src87_r, filt0, filt1, filt2);
-        out2_r = DPADD_SH3_SH(src32_r, src76_r, src98_r, filt0, filt1, filt2);
-        out3_r = DPADD_SH3_SH(src43_r, src87_r, src109_r, filt0, filt1, filt2);
-        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
-        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
-        PCKEV_B2_SB(out1_r, out0_r, out3_r, out2_r, res0, res1);
-
-        if (ver_offset) {
-            PCKEV_D2_SB(src4, src3, src8, src7, src10_r, src32_r);
-        } else {
-            PCKEV_D2_SB(src3, src2, src7, src4, src10_r, src32_r);
-        }
-
-        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-        ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
-
-        vec0 = (v16u8) __msa_aver_s_b(res0, src10_r);
-        vec1 = (v16u8) __msa_aver_s_b(res1, src32_r);
-
-        XORI_B2_128_UB(vec0, vec1);
-        AVER_UB2_UB(vec0, dst0, vec1, dst1, vec0, vec1);
-        ST8x4_UB(vec0, vec1, dst, dst_stride);
-        dst += (4 * dst_stride);
-
-        src10_r = src76_r;
-        src32_r = src98_r;
-        src21_r = src87_r;
-        src43_r = src109_r;
-        src2 = src8;
-        src3 = src9;
-        src4 = src10;
-    }
-}
-
-static void avc_luma_vt_qrt_and_aver_dst_16x16_msa(const uint8_t *src,
-                                                   int32_t src_stride,
-                                                   uint8_t *dst,
-                                                   int32_t dst_stride,
-                                                   uint8_t ver_offset)
-{
-    int32_t loop_cnt;
-    int16_t filt_const0 = 0xfb01;
-    int16_t filt_const1 = 0x1414;
-    int16_t filt_const2 = 0x1fb;
-    v16u8 dst0, dst1, dst2, dst3;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
-    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
-    v16i8 src87_r, src10_l, src32_l, src54_l, src76_l, src21_l, src43_l;
-    v16i8 src65_l, src87_l;
-    v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
-    v16i8 out0, out1, out2, out3;
-    v16i8 filt0, filt1, filt2;
-    v16u8 res0, res1, res2, res3;
-
-    filt0 = (v16i8) __msa_fill_h(filt_const0);
-    filt1 = (v16i8) __msa_fill_h(filt_const1);
-    filt2 = (v16i8) __msa_fill_h(filt_const2);
-
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-
-    XORI_B5_128_SB(src0, src1, src2, src3, src4);
-    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
-               src10_r, src21_r, src32_r, src43_r);
-    ILVL_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
-               src10_l, src21_l, src32_l, src43_l);
-
-    for (loop_cnt = 4; loop_cnt--;) {
-        LD_SB4(src, src_stride, src5, src6, src7, src8);
-        src += (4 * src_stride);
-
-        XORI_B4_128_SB(src5, src6, src7, src8);
-        ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
-                   src54_r, src65_r, src76_r, src87_r);
-        ILVL_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
-                   src54_l, src65_l, src76_l, src87_l);
-        out0_r = DPADD_SH3_SH(src10_r, src32_r, src54_r, filt0, filt1, filt2);
-        out1_r = DPADD_SH3_SH(src21_r, src43_r, src65_r, filt0, filt1, filt2);
-        out2_r = DPADD_SH3_SH(src32_r, src54_r, src76_r, filt0, filt1, filt2);
-        out3_r = DPADD_SH3_SH(src43_r, src65_r, src87_r, filt0, filt1, filt2);
-        out0_l = DPADD_SH3_SH(src10_l, src32_l, src54_l, filt0, filt1, filt2);
-        out1_l = DPADD_SH3_SH(src21_l, src43_l, src65_l, filt0, filt1, filt2);
-        out2_l = DPADD_SH3_SH(src32_l, src54_l, src76_l, filt0, filt1, filt2);
-        out3_l = DPADD_SH3_SH(src43_l, src65_l, src87_l, filt0, filt1, filt2);
-        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
-        SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 5);
-        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
-        SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
-        PCKEV_B4_SB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
-                    out3_r, out0, out1, out2, out3);
-        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-
-        if (ver_offset) {
-            res0 = (v16u8) __msa_aver_s_b(out0, src3);
-            res1 = (v16u8) __msa_aver_s_b(out1, src4);
-            res2 = (v16u8) __msa_aver_s_b(out2, src5);
-            res3 = (v16u8) __msa_aver_s_b(out3, src6);
-        } else {
-            res0 = (v16u8) __msa_aver_s_b(out0, src2);
-            res1 = (v16u8) __msa_aver_s_b(out1, src3);
-            res2 = (v16u8) __msa_aver_s_b(out2, src4);
-            res3 = (v16u8) __msa_aver_s_b(out3, src5);
-        }
-
-        XORI_B4_128_UB(res0, res1, res2, res3);
-        AVER_UB4_UB(res0, dst0, res1, dst1, res2, dst2, res3, dst3,
-                    dst0, dst1, dst2, dst3);
-        ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
-        dst += (4 * dst_stride);
-
-        src10_r = src54_r;
-        src32_r = src76_r;
-        src21_r = src65_r;
-        src43_r = src87_r;
-        src10_l = src54_l;
-        src32_l = src76_l;
-        src21_l = src65_l;
-        src43_l = src87_l;
-        src2 = src6;
-        src3 = src7;
-        src4 = src8;
-    }
-}
-
-static void avc_luma_mid_and_aver_dst_4x4_msa(const uint8_t *src,
-                                              int32_t src_stride,
-                                              uint8_t *dst, int32_t dst_stride)
-{
-    v16i8 src0, src1, src2, src3, src4;
-    v16i8 mask0, mask1, mask2;
-    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
-    v8i16 hz_out4, hz_out5, hz_out6, hz_out7, hz_out8;
-    v8i16 res0, res1, res2, res3;
-    v16u8 dst0, dst1, dst2, dst3;
-    v16u8 tmp0, tmp1, tmp2, tmp3;
-
-    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-
-    XORI_B5_128_SB(src0, src1, src2, src3, src4);
-
-    hz_out0 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
-                                                          mask0, mask1, mask2);
-    hz_out2 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src2, src3,
-                                                          mask0, mask1, mask2);
-
-    PCKOD_D2_SH(hz_out0, hz_out0, hz_out2, hz_out2, hz_out1, hz_out3);
-
-    hz_out4 = AVC_HORZ_FILTER_SH(src4, src4, mask0, mask1, mask2);
-
-    LD_SB4(src, src_stride, src0, src1, src2, src3);
-    XORI_B4_128_SB(src0, src1, src2, src3);
-
-    hz_out5 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
-                                                          mask0, mask1, mask2);
-    hz_out7 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src2, src3,
-                                                          mask0, mask1, mask2);
-
-    PCKOD_D2_SH(hz_out5, hz_out5, hz_out7, hz_out7, hz_out6, hz_out8);
-
-    res0 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out0, hz_out1, hz_out2,
-                                             hz_out3, hz_out4, hz_out5);
-    res1 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out1, hz_out2, hz_out3,
-                                             hz_out4, hz_out5, hz_out6);
-    res2 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out2, hz_out3, hz_out4,
-                                             hz_out5, hz_out6, hz_out7);
-    res3 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out3, hz_out4, hz_out5,
-                                             hz_out6, hz_out7, hz_out8);
-    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-    tmp0 = PCKEV_XORI128_UB(res0, res1);
-    tmp1 = PCKEV_XORI128_UB(res2, res3);
-    PCKEV_D2_UB(dst1, dst0, dst3, dst2, tmp2, tmp3);
-    AVER_UB2_UB(tmp0, tmp2, tmp1, tmp3, tmp0, tmp1);
-
-    ST4x4_UB(tmp0, tmp1, 0, 2, 0, 2, dst, dst_stride);
-}
-
-static void avc_luma_mid_and_aver_dst_8w_msa(const uint8_t *src,
-                                             int32_t src_stride,
-                                             uint8_t *dst, int32_t dst_stride,
-                                             int32_t height)
-{
-    uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4;
-    v16i8 mask0, mask1, mask2;
-    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
-    v8i16 hz_out4, hz_out5, hz_out6, hz_out7, hz_out8;
-    v16u8 dst0, dst1, dst2, dst3;
-    v8i16 res0, res1, res2, res3;
-
-    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
-
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-    XORI_B5_128_SB(src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-
-    hz_out0 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
-    hz_out1 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
-    hz_out2 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
-    hz_out3 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
-    hz_out4 = AVC_HORZ_FILTER_SH(src4, src4, mask0, mask1, mask2);
-
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB4(src, src_stride, src0, src1, src2, src3);
-        XORI_B4_128_SB(src0, src1, src2, src3);
-        src += (4 * src_stride);
-
-        hz_out5 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
-        hz_out6 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
-        hz_out7 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
-        hz_out8 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
-
-        res0 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out0, hz_out1, hz_out2,
-                                               hz_out3, hz_out4, hz_out5);
-        res1 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out1, hz_out2, hz_out3,
-                                               hz_out4, hz_out5, hz_out6);
-        res2 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out2, hz_out3, hz_out4,
-                                               hz_out5, hz_out6, hz_out7);
-        res3 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out3, hz_out4, hz_out5,
-                                               hz_out6, hz_out7, hz_out8);
-        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-        ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
-        CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1,
-                                dst, dst_stride);
-
-        dst += (4 * dst_stride);
-        hz_out3 = hz_out7;
-        hz_out1 = hz_out5;
-        hz_out5 = hz_out4;
-        hz_out4 = hz_out8;
-        hz_out2 = hz_out6;
-        hz_out0 = hz_out5;
-    }
-}
-
-static void avc_luma_mid_and_aver_dst_16x16_msa(const uint8_t *src,
-                                                int32_t src_stride,
-                                                uint8_t *dst,
-                                                int32_t dst_stride)
-{
-    avc_luma_mid_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride, 16);
-    avc_luma_mid_and_aver_dst_8w_msa(src + 8, src_stride, dst + 8, dst_stride,
-                                     16);
-}
-
-static void avc_luma_midh_qrt_and_aver_dst_4w_msa(const uint8_t *src,
-                                                  int32_t src_stride,
-                                                  uint8_t *dst,
-                                                  int32_t dst_stride,
-                                                  int32_t height,
-                                                  uint8_t horiz_offset)
-{
-    uint32_t row;
-    v16i8 src0, src1, src2, src3, src4, src5, src6;
-    v16u8 dst0, dst1, res;
-    v8i16 vt_res0, vt_res1, vt_res2, vt_res3;
-    v4i32 hz_res0, hz_res1;
-    v8i16 res0, res1;
-    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5;
-    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
-    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
-    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
-    v8i16 minus5h = __msa_ldi_h(-5);
-    v8i16 plus20h = __msa_ldi_h(20);
-    v8i16 zeros = { 0 };
-
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-
-    XORI_B5_128_SB(src0, src1, src2, src3, src4);
-
-    for (row = (height >> 1); row--;) {
-        LD_SB2(src, src_stride, src5, src6);
-        src += (2 * src_stride);
-
-        XORI_B2_128_SB(src5, src6);
-        LD_UB2(dst, dst_stride, dst0, dst1);
-
-        dst0 = (v16u8) __msa_ilvr_w((v4i32) dst1, (v4i32) dst0);
-
-        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src0, src1, src2, src3, src4, src5,
-                                        vt_res0, vt_res1);
-        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src1, src2, src3, src4, src5, src6,
-                                        vt_res2, vt_res3);
-        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1,
-                   mask0, mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
-        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3,
-                   mask0, mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
-
-        hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
-        DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
-
-        hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
-        DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
-
-        SRARI_W2_SW(hz_res0, hz_res1, 10);
-        SAT_SW2_SW(hz_res0, hz_res1, 7);
-
-        res0 = __msa_srari_h(shf_vec2, 5);
-        res1 = __msa_srari_h(shf_vec5, 5);
-
-        SAT_SH2_SH(res0, res1, 7);
-
-        if (horiz_offset) {
-            res0 = __msa_ilvod_h(zeros, res0);
-            res1 = __msa_ilvod_h(zeros, res1);
-        } else {
-            ILVEV_H2_SH(res0, zeros, res1, zeros, res0, res1);
-        }
-        hz_res0 = __msa_aver_s_w(hz_res0, (v4i32) res0);
-        hz_res1 = __msa_aver_s_w(hz_res1, (v4i32) res1);
-        res0 = __msa_pckev_h((v8i16) hz_res1, (v8i16) hz_res0);
-
-        res = PCKEV_XORI128_UB(res0, res0);
-
-        dst0 = __msa_aver_u_b(res, dst0);
-
-        ST4x2_UB(dst0, dst, dst_stride);
-        dst += (2 * dst_stride);
-
-        src0 = src2;
-        src1 = src3;
-        src2 = src4;
-        src3 = src5;
-        src4 = src6;
-    }
-}
-
-static void avc_luma_midh_qrt_and_aver_dst_8w_msa(const uint8_t *src,
-                                                  int32_t src_stride,
-                                                  uint8_t *dst,
-                                                  int32_t dst_stride,
-                                                  int32_t height,
-                                                  uint8_t horiz_offset)
-{
-    uint32_t multiple8_cnt;
-
-    for (multiple8_cnt = 2; multiple8_cnt--;) {
-        avc_luma_midh_qrt_and_aver_dst_4w_msa(src, src_stride, dst, dst_stride,
-                                              height, horiz_offset);
-
-        src += 4;
-        dst += 4;
-    }
-}
-
-static void avc_luma_midh_qrt_and_aver_dst_16w_msa(const uint8_t *src,
-                                                   int32_t src_stride,
-                                                   uint8_t *dst,
-                                                   int32_t dst_stride,
-                                                   int32_t height,
-                                                   uint8_t horiz_offset)
-{
-    uint32_t multiple8_cnt;
-
-    for (multiple8_cnt = 4; multiple8_cnt--;) {
-        avc_luma_midh_qrt_and_aver_dst_4w_msa(src, src_stride, dst, dst_stride,
-                                              height, horiz_offset);
-
-        src += 4;
-        dst += 4;
-    }
-}
-
-static void avc_luma_midv_qrt_and_aver_dst_4w_msa(const uint8_t *src,
-                                                  int32_t src_stride,
-                                                  uint8_t *dst,
-                                                  int32_t dst_stride,
-                                                  int32_t height,
-                                                  uint8_t ver_offset)
-{
-    int32_t loop_cnt;
-    int32_t out0, out1;
-    v16i8 src0, src1, src2, src3, src4;
-    v16u8 dst0, dst1;
-    v16i8 mask0, mask1, mask2;
-    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
-    v8i16 hz_out4, hz_out5, hz_out6;
-    v8i16 res0, res1, res2, res3;
-    v16u8 vec0, vec1;
-
-    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-
-    XORI_B5_128_SB(src0, src1, src2, src3, src4);
-
-    hz_out0 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
-                                                          mask0, mask1, mask2);
-    hz_out2 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src2, src3,
-                                                          mask0, mask1, mask2);
-
-    PCKOD_D2_SH(hz_out0, hz_out0, hz_out2, hz_out2, hz_out1, hz_out3);
-
-    hz_out4 = AVC_HORZ_FILTER_SH(src4, src4, mask0, mask1, mask2);
-
-    for (loop_cnt = (height >> 1); loop_cnt--;) {
-        LD_SB2(src, src_stride, src0, src1);
-        src += (2 * src_stride);
-
-        XORI_B2_128_SB(src0, src1);
-        LD_UB2(dst, dst_stride, dst0, dst1);
-        hz_out5 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
-                                                              mask0, mask1,
-                                                              mask2);
-        hz_out6 = (v8i16) __msa_pckod_d((v2i64) hz_out5, (v2i64) hz_out5);
-        res0 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out0, hz_out1, hz_out2,
-                                                 hz_out3, hz_out4, hz_out5);
-        res2 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out1, hz_out2, hz_out3,
-                                                 hz_out4, hz_out5, hz_out6);
-
-        if (ver_offset) {
-            res1 = __msa_srari_h(hz_out3, 5);
-            res3 = __msa_srari_h(hz_out4, 5);
-        } else {
-            res1 = __msa_srari_h(hz_out2, 5);
-            res3 = __msa_srari_h(hz_out3, 5);
-        }
-
-        SAT_SH2_SH(res1, res3, 7);
-
-        res0 = __msa_aver_s_h(res0, res1);
-        res1 = __msa_aver_s_h(res2, res3);
-
-        vec0 = PCKEV_XORI128_UB(res0, res0);
-        vec1 = PCKEV_XORI128_UB(res1, res1);
-
-        AVER_UB2_UB(vec0, dst0, vec1, dst1, dst0, dst1);
-
-        out0 = __msa_copy_u_w((v4i32) dst0, 0);
-        out1 = __msa_copy_u_w((v4i32) dst1, 0);
-        SW(out0, dst);
-        dst += dst_stride;
-        SW(out1, dst);
-        dst += dst_stride;
-
-        hz_out0 = hz_out2;
-        hz_out1 = hz_out3;
-        hz_out2 = hz_out4;
-        hz_out3 = hz_out5;
-        hz_out4 = hz_out6;
-    }
-}
-
-static void avc_luma_midv_qrt_and_aver_dst_8w_msa(const uint8_t *src,
-                                                  int32_t src_stride,
-                                                  uint8_t *dst,
-                                                  int32_t dst_stride,
-                                                  int32_t height,
-                                                  uint8_t vert_offset)
-{
-    int32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4;
-    v16u8 dst0, dst1, dst2, dst3;
-    v16i8 mask0, mask1, mask2;
-    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
-    v8i16 hz_out4, hz_out5, hz_out6, hz_out7, hz_out8;
-    v8i16 res0, res1, res2, res3;
-    v8i16 res4, res5, res6, res7;
-
-    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
-
-    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-    XORI_B5_128_SB(src0, src1, src2, src3, src4);
-    src += (5 * src_stride);
-
-    hz_out0 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
-    hz_out1 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
-    hz_out2 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
-    hz_out3 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
-    hz_out4 = AVC_HORZ_FILTER_SH(src4, src4, mask0, mask1, mask2);
-
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB4(src, src_stride, src0, src1, src2, src3);
-        XORI_B4_128_SB(src0, src1, src2, src3);
-        src += (4 * src_stride);
-
-        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-
-        hz_out5 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
-        hz_out6 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
-        hz_out7 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
-        hz_out8 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
-
-        res0 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out0, hz_out1, hz_out2,
-                                               hz_out3, hz_out4, hz_out5);
-        res2 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out1, hz_out2, hz_out3,
-                                               hz_out4, hz_out5, hz_out6);
-        res4 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out2, hz_out3, hz_out4,
-                                               hz_out5, hz_out6, hz_out7);
-        res6 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out3, hz_out4, hz_out5,
-                                               hz_out6, hz_out7, hz_out8);
-
-        if (vert_offset) {
-            res1 = __msa_srari_h(hz_out3, 5);
-            res3 = __msa_srari_h(hz_out4, 5);
-            res5 = __msa_srari_h(hz_out5, 5);
-            res7 = __msa_srari_h(hz_out6, 5);
-        } else {
-            res1 = __msa_srari_h(hz_out2, 5);
-            res3 = __msa_srari_h(hz_out3, 5);
-            res5 = __msa_srari_h(hz_out4, 5);
-            res7 = __msa_srari_h(hz_out5, 5);
-        }
-
-        SAT_SH4_SH(res1, res3, res5, res7, 7);
-
-        res0 = __msa_aver_s_h(res0, res1);
-        res1 = __msa_aver_s_h(res2, res3);
-        res2 = __msa_aver_s_h(res4, res5);
-        res3 = __msa_aver_s_h(res6, res7);
-        ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
-        CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1,
-                                dst, dst_stride);
-        dst += (4 * dst_stride);
-
-        hz_out0 = hz_out4;
-        hz_out1 = hz_out5;
-        hz_out2 = hz_out6;
-        hz_out3 = hz_out7;
-        hz_out4 = hz_out8;
-    }
-}
-
-static void avc_luma_midv_qrt_and_aver_dst_16w_msa(const uint8_t *src,
-                                                   int32_t src_stride,
-                                                   uint8_t *dst,
-                                                   int32_t dst_stride,
-                                                   int32_t height,
-                                                   uint8_t vert_offset)
-{
-    int32_t multiple8_cnt;
-
-    for (multiple8_cnt = 2; multiple8_cnt--;) {
-        avc_luma_midv_qrt_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride,
-                                              height, vert_offset);
-
-        src += 8;
-        dst += 8;
-    }
-}
-
-static void avc_luma_hv_qrt_and_aver_dst_4x4_msa(const uint8_t *src_x,
-                                                 const uint8_t *src_y,
-                                                 int32_t src_stride,
-                                                 uint8_t *dst,
-                                                 int32_t dst_stride)
-{
-    v16i8 src_hz0, src_hz1, src_hz2, src_hz3;
-    v16u8 dst0, dst1, dst2, dst3;
-    v16i8 src_vt0, src_vt1, src_vt2, src_vt3, src_vt4;
-    v16i8 src_vt5, src_vt6, src_vt7, src_vt8;
-    v16i8 mask0, mask1, mask2;
-    v8i16 hz_out0, hz_out1, vert_out0, vert_out1;
-    v8i16 res0, res1;
-    v16u8 res;
-
-    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
-    LD_SB5(src_y, src_stride, src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
-    src_y += (5 * src_stride);
-
-    src_vt0 = (v16i8) __msa_insve_w((v4i32) src_vt0, 1, (v4i32) src_vt1);
-    src_vt1 = (v16i8) __msa_insve_w((v4i32) src_vt1, 1, (v4i32) src_vt2);
-    src_vt2 = (v16i8) __msa_insve_w((v4i32) src_vt2, 1, (v4i32) src_vt3);
-    src_vt3 = (v16i8) __msa_insve_w((v4i32) src_vt3, 1, (v4i32) src_vt4);
-
-    XORI_B4_128_SB(src_vt0, src_vt1, src_vt2, src_vt3);
-    LD_SB4(src_x, src_stride, src_hz0, src_hz1, src_hz2, src_hz3);
-    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    LD_SB4(src_x, stride, src_hz0, src_hz1, src_hz2, src_hz3);
     XORI_B4_128_SB(src_hz0, src_hz1, src_hz2, src_hz3);
-    hz_out0 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src_hz0, src_hz1,
-                                                          mask0, mask1, mask2);
-    hz_out1 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src_hz2, src_hz3,
-                                                          mask0, mask1, mask2);
+    hz_out0 = AVC_HORZ_FILTER_SH(src_hz0, src_hz1, mask0, mask1, mask2);
+    hz_out1 = AVC_HORZ_FILTER_SH(src_hz2, src_hz3, mask0, mask1, mask2);
+
     SRARI_H2_SH(hz_out0, hz_out1, 5);
     SAT_SH2_SH(hz_out0, hz_out1, 7);
-    LD_SB4(src_y, src_stride, src_vt5, src_vt6, src_vt7, src_vt8);
+
+    LD_SB4(src_y, stride, src_vt5, src_vt6, src_vt7, src_vt8);
 
     src_vt4 = (v16i8) __msa_insve_w((v4i32) src_vt4, 1, (v4i32) src_vt5);
     src_vt5 = (v16i8) __msa_insve_w((v4i32) src_vt5, 1, (v4i32) src_vt6);
@@ -1761,132 +135,511 @@
     src_vt7 = (v16i8) __msa_insve_w((v4i32) src_vt7, 1, (v4i32) src_vt8);
 
     XORI_B4_128_SB(src_vt4, src_vt5, src_vt6, src_vt7);
+    ILVR_B2_SB(src_vt1, src_vt0, src_vt3, src_vt2, src_vt10_r, src_vt32_r);
+    ILVR_B2_SB(src_vt5, src_vt4, src_vt7, src_vt6, src_vt54_r, src_vt76_r);
+    vt_out0 = AVC_DOT_SH3_SH(src_vt10_r, src_vt32_r, src_vt54_r, filt0, filt1,
+                             filt2);
+    vt_out1 = AVC_DOT_SH3_SH(src_vt32_r, src_vt54_r, src_vt76_r, filt0, filt1,
+                             filt2);
+    SRARI_H2_SH(vt_out0, vt_out1, 5);
+    SAT_SH2_SH(vt_out0, vt_out1, 7);
 
-    /* filter calc */
-    vert_out0 = AVC_CALC_DPADD_B_6PIX_2COEFF_R_SH(src_vt0, src_vt1, src_vt2,
-                                                  src_vt3, src_vt4, src_vt5);
-    vert_out1 = AVC_CALC_DPADD_B_6PIX_2COEFF_R_SH(src_vt2, src_vt3, src_vt4,
-                                                  src_vt5, src_vt6, src_vt7);
-    SRARI_H2_SH(vert_out0, vert_out1, 5);
-    SAT_SH2_SH(vert_out0, vert_out1, 7);
+    out0 = __msa_srari_h((hz_out0 + vt_out0), 1);
+    out1 = __msa_srari_h((hz_out1 + vt_out1), 1);
 
-    res1 = __msa_srari_h((hz_out1 + vert_out1), 1);
-    res0 = __msa_srari_h((hz_out0 + vert_out0), 1);
+    SAT_SH2_SH(out0, out1, 7);
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
+}
+
+static void avc_luma_hv_qrt_8x8_msa(const uint8_t *src_x, const uint8_t *src_y,
+                                    uint8_t *dst, int32_t stride)
+{
+    const int16_t filt_const0 = 0xfb01;
+    const int16_t filt_const1 = 0x1414;
+    const int16_t filt_const2 = 0x1fb;
+    v16u8 out0, out1;
+    v16i8 src_hz0, src_hz1, src_hz2, src_hz3, mask0, mask1, mask2;
+    v16i8 src_vt0, src_vt1, src_vt2, src_vt3, src_vt4, src_vt5, src_vt6;
+    v16i8 src_vt7, src_vt8, src_vt9, src_vt10, src_vt11, src_vt12;
+    v16i8 src_vt10_r, src_vt21_r, src_vt32_r, src_vt43_r, src_vt54_r;
+    v16i8 src_vt65_r, src_vt76_r, src_vt87_r, src_vt98_r, src_vt109_r;
+    v16i8 src_vt1110_r, src_vt1211_r, filt0, filt1, filt2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, vt_out0, vt_out1, vt_out2;
+    v8i16 vt_out3, tmp0, tmp1, tmp2, tmp3;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+    LD_SB5(src_y, stride, src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
+    src_y += (5 * stride);
+
+    XORI_B5_128_SB(src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
+
+    LD_SB4(src_x, stride, src_hz0, src_hz1, src_hz2, src_hz3);
+    XORI_B4_128_SB(src_hz0, src_hz1, src_hz2, src_hz3);
+    src_x += (4 * stride);
+
+    hz_out0 = AVC_HORZ_FILTER_SH(src_hz0, src_hz0, mask0, mask1, mask2);
+    hz_out1 = AVC_HORZ_FILTER_SH(src_hz1, src_hz1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src_hz2, src_hz2, mask0, mask1, mask2);
+    hz_out3 = AVC_HORZ_FILTER_SH(src_hz3, src_hz3, mask0, mask1, mask2);
+
+    SRARI_H4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 5);
+    SAT_SH4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 7);
+
+    LD_SB4(src_y, stride, src_vt5, src_vt6, src_vt7, src_vt8);
+    src_y += (4 * stride);
+    XORI_B4_128_SB(src_vt5, src_vt6, src_vt7, src_vt8);
+
+    ILVR_B4_SB(src_vt1, src_vt0, src_vt2, src_vt1, src_vt3, src_vt2, src_vt4,
+               src_vt3, src_vt10_r, src_vt21_r, src_vt32_r, src_vt43_r);
+    ILVR_B4_SB(src_vt5, src_vt4, src_vt6, src_vt5, src_vt7, src_vt6, src_vt8,
+               src_vt7, src_vt54_r, src_vt65_r, src_vt76_r, src_vt87_r);
+    vt_out0 = AVC_DOT_SH3_SH(src_vt10_r, src_vt32_r, src_vt54_r, filt0, filt1,
+                             filt2);
+    vt_out1 = AVC_DOT_SH3_SH(src_vt21_r, src_vt43_r, src_vt65_r, filt0, filt1,
+                             filt2);
+    vt_out2 = AVC_DOT_SH3_SH(src_vt32_r, src_vt54_r, src_vt76_r, filt0, filt1,
+                             filt2);
+    vt_out3 = AVC_DOT_SH3_SH(src_vt43_r, src_vt65_r, src_vt87_r, filt0, filt1,
+                             filt2);
+    SRARI_H4_SH(vt_out0, vt_out1, vt_out2, vt_out3, 5);
+    SAT_SH4_SH(vt_out0, vt_out1, vt_out2, vt_out3, 7);
+
+    tmp0 = __msa_srari_h((hz_out0 + vt_out0), 1);
+    tmp1 = __msa_srari_h((hz_out1 + vt_out1), 1);
+    tmp2 = __msa_srari_h((hz_out2 + vt_out2), 1);
+    tmp3 = __msa_srari_h((hz_out3 + vt_out3), 1);
+
+    LD_SB4(src_x, stride, src_hz0, src_hz1, src_hz2, src_hz3);
+    XORI_B4_128_SB(src_hz0, src_hz1, src_hz2, src_hz3);
+
+    SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+    out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+    out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+    ST8x4_UB(out0, out1, dst, stride);
+    dst += (4 * stride);
+
+    LD_SB4(src_y, stride, src_vt9, src_vt10, src_vt11, src_vt12);
+    XORI_B4_128_SB(src_vt9, src_vt10, src_vt11, src_vt12);
+
+    hz_out0 = AVC_HORZ_FILTER_SH(src_hz0, src_hz0, mask0, mask1, mask2);
+    hz_out1 = AVC_HORZ_FILTER_SH(src_hz1, src_hz1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src_hz2, src_hz2, mask0, mask1, mask2);
+    hz_out3 = AVC_HORZ_FILTER_SH(src_hz3, src_hz3, mask0, mask1, mask2);
+
+    SRARI_H4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 5);
+    SAT_SH4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 7);
+
+    ILVR_B4_SB(src_vt9, src_vt8, src_vt10, src_vt9, src_vt11, src_vt10,
+               src_vt12, src_vt11, src_vt98_r, src_vt109_r, src_vt1110_r,
+               src_vt1211_r);
+    vt_out0 = AVC_DOT_SH3_SH(src_vt54_r, src_vt76_r, src_vt98_r, filt0, filt1,
+                             filt2);
+    vt_out1 = AVC_DOT_SH3_SH(src_vt65_r, src_vt87_r, src_vt109_r, filt0, filt1,
+                             filt2);
+    vt_out2 = AVC_DOT_SH3_SH(src_vt76_r, src_vt98_r, src_vt1110_r, filt0, filt1,
+                             filt2);
+    vt_out3 = AVC_DOT_SH3_SH(src_vt87_r, src_vt109_r, src_vt1211_r, filt0,
+                             filt1, filt2);
+    SRARI_H4_SH(vt_out0, vt_out1, vt_out2, vt_out3, 5);
+    SAT_SH4_SH(vt_out0, vt_out1, vt_out2, vt_out3, 7);
+
+    tmp0 = __msa_srari_h((hz_out0 + vt_out0), 1);
+    tmp1 = __msa_srari_h((hz_out1 + vt_out1), 1);
+    tmp2 = __msa_srari_h((hz_out2 + vt_out2), 1);
+    tmp3 = __msa_srari_h((hz_out3 + vt_out3), 1);
+
+    SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+    out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+    out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+    ST8x4_UB(out0, out1, dst, stride);
+    dst += (4 * stride);
+}
+
+static void avc_luma_hv_qrt_16x16_msa(const uint8_t *src_x,
+                                      const uint8_t *src_y, uint8_t *dst,
+                                      int32_t stride)
+{
+    const int16_t filt_const0 = 0xfb01;
+    const int16_t filt_const1 = 0x1414;
+    const int16_t filt_const2 = 0x1fb;
+    const uint8_t *src_x_tmp = src_x;
+    const uint8_t *src_y_tmp = src_y;
+    uint8_t *dst_tmp = dst;
+    uint32_t multiple8_cnt, loop_cnt;
+    v16u8 tmp0, tmp1;
+    v16i8 src_hz0, src_hz1, src_hz2, src_hz3, mask0, mask1, mask2;
+    v16i8 src_vt0, src_vt1, src_vt2, src_vt3, src_vt4, src_vt5, src_vt6;
+    v16i8 src_vt7, src_vt8;
+    v16i8 src_vt10_r, src_vt21_r, src_vt32_r, src_vt43_r, src_vt54_r;
+    v16i8 src_vt65_r, src_vt76_r, src_vt87_r, filt0, filt1, filt2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, vt_out0, vt_out1, vt_out2;
+    v8i16 vt_out3, out0, out1, out2, out3;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        src_x = src_x_tmp;
+        src_y = src_y_tmp;
+        dst = dst_tmp;
+
+        LD_SB5(src_y, stride, src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
+        src_y += (5 * stride);
+
+        XORI_B5_128_SB(src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
+
+        for (loop_cnt = 4; loop_cnt--;) {
+            LD_SB4(src_x, stride, src_hz0, src_hz1, src_hz2, src_hz3);
+            XORI_B4_128_SB(src_hz0, src_hz1, src_hz2, src_hz3);
+            src_x += (4 * stride);
+
+            hz_out0 = AVC_HORZ_FILTER_SH(src_hz0, src_hz0, mask0, mask1, mask2);
+            hz_out1 = AVC_HORZ_FILTER_SH(src_hz1, src_hz1, mask0, mask1, mask2);
+            hz_out2 = AVC_HORZ_FILTER_SH(src_hz2, src_hz2, mask0, mask1, mask2);
+            hz_out3 = AVC_HORZ_FILTER_SH(src_hz3, src_hz3, mask0, mask1, mask2);
+            SRARI_H4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 5);
+            SAT_SH4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 7);
+
+            LD_SB4(src_y, stride, src_vt5, src_vt6, src_vt7, src_vt8);
+            src_y += (4 * stride);
+
+            XORI_B4_128_SB(src_vt5, src_vt6, src_vt7, src_vt8);
+            ILVR_B4_SB(src_vt1, src_vt0, src_vt2, src_vt1, src_vt3, src_vt2,
+                       src_vt4, src_vt3, src_vt10_r, src_vt21_r, src_vt32_r,
+                       src_vt43_r);
+            ILVR_B4_SB(src_vt5, src_vt4, src_vt6, src_vt5, src_vt7, src_vt6,
+                       src_vt8, src_vt7, src_vt54_r, src_vt65_r, src_vt76_r,
+                       src_vt87_r);
+            vt_out0 = AVC_DOT_SH3_SH(src_vt10_r, src_vt32_r, src_vt54_r, filt0,
+                                     filt1, filt2);
+            vt_out1 = AVC_DOT_SH3_SH(src_vt21_r, src_vt43_r, src_vt65_r, filt0,
+                                     filt1, filt2);
+            vt_out2 = AVC_DOT_SH3_SH(src_vt32_r, src_vt54_r, src_vt76_r, filt0,
+                                     filt1, filt2);
+            vt_out3 = AVC_DOT_SH3_SH(src_vt43_r, src_vt65_r, src_vt87_r, filt0,
+                                     filt1, filt2);
+            SRARI_H4_SH(vt_out0, vt_out1, vt_out2, vt_out3, 5);
+            SAT_SH4_SH(vt_out0, vt_out1, vt_out2, vt_out3, 7);
+
+            out0 = __msa_srari_h((hz_out0 + vt_out0), 1);
+            out1 = __msa_srari_h((hz_out1 + vt_out1), 1);
+            out2 = __msa_srari_h((hz_out2 + vt_out2), 1);
+            out3 = __msa_srari_h((hz_out3 + vt_out3), 1);
+
+            SAT_SH4_SH(out0, out1, out2, out3, 7);
+            tmp0 = PCKEV_XORI128_UB(out0, out1);
+            tmp1 = PCKEV_XORI128_UB(out2, out3);
+            ST8x4_UB(tmp0, tmp1, dst, stride);
+            dst += (4 * stride);
+
+            src_vt0 = src_vt4;
+            src_vt1 = src_vt5;
+            src_vt2 = src_vt6;
+            src_vt3 = src_vt7;
+            src_vt4 = src_vt8;
+        }
+
+        src_x_tmp += 8;
+        src_y_tmp += 8;
+        dst_tmp += 8;
+    }
+}
+
+static void avc_luma_hv_qrt_and_aver_dst_4x4_msa(const uint8_t *src_x,
+                                                 const uint8_t *src_y,
+                                                 uint8_t *dst,
+                                                 int32_t stride)
+{
+    uint32_t tp0, tp1, tp2, tp3;
+    const int16_t filt_const0 = 0xfb01;
+    const int16_t filt_const1 = 0x1414;
+    const int16_t filt_const2 = 0x1fb;
+    v16u8 res, dst0 = { 0 };
+    v16i8 src_hz0, src_hz1, src_hz2, src_hz3, src_vt7, src_vt8;
+    v16i8 src_vt0, src_vt1, src_vt2, src_vt3, src_vt4, src_vt5, src_vt6;
+    v16i8 src_vt10_r, src_vt32_r, src_vt54_r, src_vt76_r;
+    v16i8 mask0, mask1, mask2, filt0, filt1, filt2;
+    v8i16 hz_out0, hz_out1, vt_out0, vt_out1, res0, res1;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+
+    LD_SB5(src_y, stride, src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
+    src_y += (5 * stride);
+
+    src_vt0 = (v16i8) __msa_insve_w((v4i32) src_vt0, 1, (v4i32) src_vt1);
+    src_vt1 = (v16i8) __msa_insve_w((v4i32) src_vt1, 1, (v4i32) src_vt2);
+    src_vt2 = (v16i8) __msa_insve_w((v4i32) src_vt2, 1, (v4i32) src_vt3);
+    src_vt3 = (v16i8) __msa_insve_w((v4i32) src_vt3, 1, (v4i32) src_vt4);
+
+    XORI_B4_128_SB(src_vt0, src_vt1, src_vt2, src_vt3);
+
+    LD_SB4(src_x, stride, src_hz0, src_hz1, src_hz2, src_hz3);
+    XORI_B4_128_SB(src_hz0, src_hz1, src_hz2, src_hz3);
+    hz_out0 = AVC_HORZ_FILTER_SH(src_hz0, src_hz1, mask0, mask1, mask2);
+    hz_out1 = AVC_HORZ_FILTER_SH(src_hz2, src_hz3, mask0, mask1, mask2);
+
+    SRARI_H2_SH(hz_out0, hz_out1, 5);
+    SAT_SH2_SH(hz_out0, hz_out1, 7);
+
+    LD_SB4(src_y, stride, src_vt5, src_vt6, src_vt7, src_vt8);
+
+    src_vt4 = (v16i8) __msa_insve_w((v4i32) src_vt4, 1, (v4i32) src_vt5);
+    src_vt5 = (v16i8) __msa_insve_w((v4i32) src_vt5, 1, (v4i32) src_vt6);
+    src_vt6 = (v16i8) __msa_insve_w((v4i32) src_vt6, 1, (v4i32) src_vt7);
+    src_vt7 = (v16i8) __msa_insve_w((v4i32) src_vt7, 1, (v4i32) src_vt8);
+
+    XORI_B4_128_SB(src_vt4, src_vt5, src_vt6, src_vt7);
+    ILVR_B2_SB(src_vt1, src_vt0, src_vt3, src_vt2, src_vt10_r, src_vt32_r);
+    ILVR_B2_SB(src_vt5, src_vt4, src_vt7, src_vt6, src_vt54_r, src_vt76_r);
+    vt_out0 = AVC_DOT_SH3_SH(src_vt10_r, src_vt32_r, src_vt54_r, filt0, filt1,
+                             filt2);
+    vt_out1 = AVC_DOT_SH3_SH(src_vt32_r, src_vt54_r, src_vt76_r, filt0, filt1,
+                             filt2);
+    SRARI_H2_SH(vt_out0, vt_out1, 5);
+    SAT_SH2_SH(vt_out0, vt_out1, 7);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+
+    res1 = __msa_srari_h((hz_out1 + vt_out1), 1);
+    res0 = __msa_srari_h((hz_out0 + vt_out0), 1);
 
     SAT_SH2_SH(res0, res1, 7);
     res = PCKEV_XORI128_UB(res0, res1);
-
-    dst0 = (v16u8) __msa_insve_w((v4i32) dst0, 1, (v4i32) dst1);
-    dst1 = (v16u8) __msa_insve_w((v4i32) dst2, 1, (v4i32) dst3);
-    dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
     dst0 = __msa_aver_u_b(res, dst0);
 
-    ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, dst_stride);
+    ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
 }
 
 static void avc_luma_hv_qrt_and_aver_dst_8x8_msa(const uint8_t *src_x,
                                                  const uint8_t *src_y,
-                                                 int32_t src_stride,
                                                  uint8_t *dst,
-                                                 int32_t dst_stride)
+                                                 int32_t stride)
 {
-    uint32_t loop_cnt;
-    v16i8 src_hz0, src_hz1, src_hz2, src_hz3;
-    v16u8 dst0, dst1, dst2, dst3;
-    v16i8 src_vt0, src_vt1, src_vt2, src_vt3;
-    v16i8 src_vt4, src_vt5, src_vt6, src_vt7, src_vt8;
-    v16i8 mask0, mask1, mask2;
-    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
-    v8i16 vert_out0, vert_out1, vert_out2, vert_out3;
-    v8i16 out0, out1, out2, out3;
+    const int16_t filt_const0 = 0xfb01;
+    const int16_t filt_const1 = 0x1414;
+    const int16_t filt_const2 = 0x1fb;
+    uint64_t tp0, tp1, tp2, tp3;
+    v16u8 out0, out1, dst0 = { 0 }, dst1 = { 0 };
+    v16i8 src_hz0, src_hz1, src_hz2, src_hz3, src_vt0, src_vt1, src_vt2;
+    v16i8 src_vt3, src_vt4, src_vt5, src_vt6, src_vt7, src_vt8;
+    v16i8 src_vt9, src_vt10, src_vt11, src_vt12, mask0, mask1, mask2;
+    v16i8 src_vt10_r, src_vt21_r, src_vt32_r, src_vt43_r, src_vt54_r;
+    v16i8 src_vt65_r, src_vt76_r, src_vt87_r, src_vt98_r, src_vt109_r;
+    v16i8 src_vt1110_r, src_vt1211_r, filt0, filt1, filt2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, vt_out0, vt_out1, vt_out2;
+    v8i16 vt_out3, tmp0, tmp1, tmp2, tmp3;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
 
     LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+    LD_SB5(src_y, stride, src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
+    src_y += (5 * stride);
 
-    LD_SB5(src_y, src_stride, src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
-    src_y += (5 * src_stride);
+    XORI_B5_128_SB(src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
 
-    src_vt0 = (v16i8) __msa_insve_d((v2i64) src_vt0, 1, (v2i64) src_vt1);
-    src_vt1 = (v16i8) __msa_insve_d((v2i64) src_vt1, 1, (v2i64) src_vt2);
-    src_vt2 = (v16i8) __msa_insve_d((v2i64) src_vt2, 1, (v2i64) src_vt3);
-    src_vt3 = (v16i8) __msa_insve_d((v2i64) src_vt3, 1, (v2i64) src_vt4);
+    LD_SB4(src_x, stride, src_hz0, src_hz1, src_hz2, src_hz3);
+    XORI_B4_128_SB(src_hz0, src_hz1, src_hz2, src_hz3);
+    src_x += (4 * stride);
 
-    XORI_B4_128_SB(src_vt0, src_vt1, src_vt2, src_vt3);
+    hz_out0 = AVC_HORZ_FILTER_SH(src_hz0, src_hz0, mask0, mask1, mask2);
+    hz_out1 = AVC_HORZ_FILTER_SH(src_hz1, src_hz1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src_hz2, src_hz2, mask0, mask1, mask2);
+    hz_out3 = AVC_HORZ_FILTER_SH(src_hz3, src_hz3, mask0, mask1, mask2);
 
-    for (loop_cnt = 2; loop_cnt--;) {
-        LD_SB4(src_x, src_stride, src_hz0, src_hz1, src_hz2, src_hz3);
-        XORI_B4_128_SB(src_hz0, src_hz1, src_hz2, src_hz3);
-        src_x += (4 * src_stride);
+    SRARI_H4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 5);
+    SAT_SH4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 7);
 
-        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-        hz_out0 = AVC_HORZ_FILTER_SH(src_hz0, src_hz0, mask0, mask1, mask2);
-        hz_out1 = AVC_HORZ_FILTER_SH(src_hz1, src_hz1, mask0, mask1, mask2);
-        hz_out2 = AVC_HORZ_FILTER_SH(src_hz2, src_hz2, mask0, mask1, mask2);
-        hz_out3 = AVC_HORZ_FILTER_SH(src_hz3, src_hz3, mask0, mask1, mask2);
-        SRARI_H4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 5);
-        SAT_SH4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 7);
-        LD_SB4(src_y, src_stride, src_vt5, src_vt6, src_vt7, src_vt8);
-        src_y += (4 * src_stride);
+    LD_SB4(src_y, stride, src_vt5, src_vt6, src_vt7, src_vt8);
+    src_y += (4 * stride);
+    XORI_B4_128_SB(src_vt5, src_vt6, src_vt7, src_vt8);
 
-        src_vt4 = (v16i8) __msa_insve_d((v2i64) src_vt4, 1, (v2i64) src_vt5);
-        src_vt5 = (v16i8) __msa_insve_d((v2i64) src_vt5, 1, (v2i64) src_vt6);
-        src_vt6 = (v16i8) __msa_insve_d((v2i64) src_vt6, 1, (v2i64) src_vt7);
-        src_vt7 = (v16i8) __msa_insve_d((v2i64) src_vt7, 1, (v2i64) src_vt8);
+    ILVR_B4_SB(src_vt1, src_vt0, src_vt2, src_vt1, src_vt3, src_vt2, src_vt4,
+               src_vt3, src_vt10_r, src_vt21_r, src_vt32_r, src_vt43_r);
+    ILVR_B4_SB(src_vt5, src_vt4, src_vt6, src_vt5, src_vt7, src_vt6, src_vt8,
+               src_vt7, src_vt54_r, src_vt65_r, src_vt76_r, src_vt87_r);
+    vt_out0 = AVC_DOT_SH3_SH(src_vt10_r, src_vt32_r, src_vt54_r, filt0, filt1,
+                             filt2);
+    vt_out1 = AVC_DOT_SH3_SH(src_vt21_r, src_vt43_r, src_vt65_r, filt0, filt1,
+                             filt2);
+    vt_out2 = AVC_DOT_SH3_SH(src_vt32_r, src_vt54_r, src_vt76_r, filt0, filt1,
+                             filt2);
+    vt_out3 = AVC_DOT_SH3_SH(src_vt43_r, src_vt65_r, src_vt87_r, filt0, filt1,
+                             filt2);
+    SRARI_H4_SH(vt_out0, vt_out1, vt_out2, vt_out3, 5);
+    SAT_SH4_SH(vt_out0, vt_out1, vt_out2, vt_out3, 7);
 
-        XORI_B4_128_SB(src_vt4, src_vt5, src_vt6, src_vt7);
-        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src_vt0, src_vt1, src_vt2, src_vt3,
-                                        src_vt4, src_vt5, vert_out0, vert_out1);
-        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src_vt2, src_vt3, src_vt4, src_vt5,
-                                        src_vt6, src_vt7, vert_out2, vert_out3);
-        SRARI_H4_SH(vert_out0, vert_out1, vert_out2, vert_out3, 5);
-        SAT_SH4_SH(vert_out0, vert_out1, vert_out2, vert_out3, 7);
+    tmp0 = __msa_srari_h((hz_out0 + vt_out0), 1);
+    tmp1 = __msa_srari_h((hz_out1 + vt_out1), 1);
+    tmp2 = __msa_srari_h((hz_out2 + vt_out2), 1);
+    tmp3 = __msa_srari_h((hz_out3 + vt_out3), 1);
 
-        out0 = __msa_srari_h((hz_out0 + vert_out0), 1);
-        out1 = __msa_srari_h((hz_out1 + vert_out1), 1);
-        out2 = __msa_srari_h((hz_out2 + vert_out2), 1);
-        out3 = __msa_srari_h((hz_out3 + vert_out3), 1);
+    LD_SB4(src_x, stride, src_hz0, src_hz1, src_hz2, src_hz3);
+    XORI_B4_128_SB(src_hz0, src_hz1, src_hz2, src_hz3);
 
-        SAT_SH4_SH(out0, out1, out2, out3, 7);
-        ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
-        CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1,
-                                dst, dst_stride);
-        dst += (4 * dst_stride);
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
 
-        src_vt0 = src_vt4;
-        src_vt1 = src_vt5;
-        src_vt2 = src_vt6;
-        src_vt3 = src_vt7;
-        src_vt4 = src_vt8;
-    }
+    SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+    out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+    out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+    AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
+    ST8x4_UB(dst0, dst1, dst, stride);
+    dst += (4 * stride);
+
+    LD_SB4(src_y, stride, src_vt9, src_vt10, src_vt11, src_vt12);
+    XORI_B4_128_SB(src_vt9, src_vt10, src_vt11, src_vt12);
+
+    hz_out0 = AVC_HORZ_FILTER_SH(src_hz0, src_hz0, mask0, mask1, mask2);
+    hz_out1 = AVC_HORZ_FILTER_SH(src_hz1, src_hz1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src_hz2, src_hz2, mask0, mask1, mask2);
+    hz_out3 = AVC_HORZ_FILTER_SH(src_hz3, src_hz3, mask0, mask1, mask2);
+
+    SRARI_H4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 5);
+    SAT_SH4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 7);
+
+    ILVR_B4_SB(src_vt9, src_vt8, src_vt10, src_vt9, src_vt11, src_vt10,
+               src_vt12, src_vt11, src_vt98_r, src_vt109_r, src_vt1110_r,
+               src_vt1211_r);
+    vt_out0 = AVC_DOT_SH3_SH(src_vt54_r, src_vt76_r, src_vt98_r, filt0, filt1,
+                             filt2);
+    vt_out1 = AVC_DOT_SH3_SH(src_vt65_r, src_vt87_r, src_vt109_r, filt0, filt1,
+                             filt2);
+    vt_out2 = AVC_DOT_SH3_SH(src_vt76_r, src_vt98_r, src_vt1110_r, filt0, filt1,
+                             filt2);
+    vt_out3 = AVC_DOT_SH3_SH(src_vt87_r, src_vt109_r, src_vt1211_r, filt0,
+                             filt1, filt2);
+    SRARI_H4_SH(vt_out0, vt_out1, vt_out2, vt_out3, 5);
+    SAT_SH4_SH(vt_out0, vt_out1, vt_out2, vt_out3, 7);
+
+    tmp0 = __msa_srari_h((hz_out0 + vt_out0), 1);
+    tmp1 = __msa_srari_h((hz_out1 + vt_out1), 1);
+    tmp2 = __msa_srari_h((hz_out2 + vt_out2), 1);
+    tmp3 = __msa_srari_h((hz_out3 + vt_out3), 1);
+
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+
+    SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+    out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+    out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+    AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
+    ST8x4_UB(dst0, dst1, dst, stride);
+    dst += (4 * stride);
 }
 
 static void avc_luma_hv_qrt_and_aver_dst_16x16_msa(const uint8_t *src_x,
                                                    const uint8_t *src_y,
-                                                   int32_t src_stride,
                                                    uint8_t *dst,
-                                                   int32_t dst_stride)
+                                                   int32_t stride)
 {
-    uint32_t multiple8_cnt;
+    const int16_t filt_const0 = 0xfb01;
+    const int16_t filt_const1 = 0x1414;
+    const int16_t filt_const2 = 0x1fb;
+    const uint8_t *src_x_tmp = src_x;
+    const uint8_t *src_y_tmp = src_y;
+    uint8_t *dst_tmp = dst;
+    uint32_t multiple8_cnt, loop_cnt;
+    uint64_t tp0, tp1, tp2, tp3;
+    v16u8 tmp0, tmp1, dst0 = { 0 }, dst1 = { 0 };
+    v16i8 src_hz0, src_hz1, src_hz2, src_hz3, mask0, mask1, mask2;
+    v16i8 src_vt0, src_vt1, src_vt2, src_vt3, src_vt4, src_vt5, src_vt6;
+    v16i8 src_vt7, src_vt8;
+    v16i8 src_vt10_r, src_vt21_r, src_vt32_r, src_vt43_r, src_vt54_r;
+    v16i8 src_vt65_r, src_vt76_r, src_vt87_r, filt0, filt1, filt2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, vt_out0, vt_out1, vt_out2;
+    v8i16 vt_out3, out0, out1, out2, out3;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
 
     for (multiple8_cnt = 2; multiple8_cnt--;) {
-        avc_luma_hv_qrt_and_aver_dst_8x8_msa(src_x, src_y, src_stride,
-                                             dst, dst_stride);
+        src_x = src_x_tmp;
+        src_y = src_y_tmp;
+        dst = dst_tmp;
 
-        src_x += 8;
-        src_y += 8;
-        dst += 8;
-    }
+        LD_SB5(src_y, stride, src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
+        src_y += (5 * stride);
 
-    src_x += (8 * src_stride) - 16;
-    src_y += (8 * src_stride) - 16;
-    dst += (8 * dst_stride) - 16;
+        XORI_B5_128_SB(src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
 
-    for (multiple8_cnt = 2; multiple8_cnt--;) {
-        avc_luma_hv_qrt_and_aver_dst_8x8_msa(src_x, src_y, src_stride,
-                                             dst, dst_stride);
+        for (loop_cnt = 4; loop_cnt--;) {
+            LD_SB4(src_x, stride, src_hz0, src_hz1, src_hz2, src_hz3);
+            XORI_B4_128_SB(src_hz0, src_hz1, src_hz2, src_hz3);
+            src_x += (4 * stride);
 
-        src_x += 8;
-        src_y += 8;
-        dst += 8;
+            hz_out0 = AVC_HORZ_FILTER_SH(src_hz0, src_hz0, mask0, mask1, mask2);
+            hz_out1 = AVC_HORZ_FILTER_SH(src_hz1, src_hz1, mask0, mask1, mask2);
+            hz_out2 = AVC_HORZ_FILTER_SH(src_hz2, src_hz2, mask0, mask1, mask2);
+            hz_out3 = AVC_HORZ_FILTER_SH(src_hz3, src_hz3, mask0, mask1, mask2);
+            SRARI_H4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 5);
+            SAT_SH4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 7);
+
+            LD_SB4(src_y, stride, src_vt5, src_vt6, src_vt7, src_vt8);
+            src_y += (4 * stride);
+
+            XORI_B4_128_SB(src_vt5, src_vt6, src_vt7, src_vt8);
+            ILVR_B4_SB(src_vt1, src_vt0, src_vt2, src_vt1, src_vt3, src_vt2,
+                       src_vt4, src_vt3, src_vt10_r, src_vt21_r, src_vt32_r,
+                       src_vt43_r);
+            ILVR_B4_SB(src_vt5, src_vt4, src_vt6, src_vt5, src_vt7, src_vt6,
+                       src_vt8, src_vt7, src_vt54_r, src_vt65_r, src_vt76_r,
+                       src_vt87_r);
+            vt_out0 = AVC_DOT_SH3_SH(src_vt10_r, src_vt32_r, src_vt54_r, filt0,
+                                     filt1, filt2);
+            vt_out1 = AVC_DOT_SH3_SH(src_vt21_r, src_vt43_r, src_vt65_r, filt0,
+                                     filt1, filt2);
+            vt_out2 = AVC_DOT_SH3_SH(src_vt32_r, src_vt54_r, src_vt76_r, filt0,
+                                     filt1, filt2);
+            vt_out3 = AVC_DOT_SH3_SH(src_vt43_r, src_vt65_r, src_vt87_r, filt0,
+                                     filt1, filt2);
+            SRARI_H4_SH(vt_out0, vt_out1, vt_out2, vt_out3, 5);
+            SAT_SH4_SH(vt_out0, vt_out1, vt_out2, vt_out3, 7);
+
+            out0 = __msa_srari_h((hz_out0 + vt_out0), 1);
+            out1 = __msa_srari_h((hz_out1 + vt_out1), 1);
+            out2 = __msa_srari_h((hz_out2 + vt_out2), 1);
+            out3 = __msa_srari_h((hz_out3 + vt_out3), 1);
+
+            LD4(dst, stride, tp0, tp1, tp2, tp3);
+            INSERT_D2_UB(tp0, tp1, dst0);
+            INSERT_D2_UB(tp2, tp3, dst1);
+
+            SAT_SH4_SH(out0, out1, out2, out3, 7);
+            tmp0 = PCKEV_XORI128_UB(out0, out1);
+            tmp1 = PCKEV_XORI128_UB(out2, out3);
+            AVER_UB2_UB(tmp0, dst0, tmp1, dst1, dst0, dst1);
+            ST8x4_UB(dst0, dst1, dst, stride);
+            dst += (4 * stride);
+
+            src_vt0 = src_vt4;
+            src_vt1 = src_vt5;
+            src_vt2 = src_vt6;
+            src_vt3 = src_vt7;
+            src_vt4 = src_vt8;
+        }
+
+        src_x_tmp += 8;
+        src_y_tmp += 8;
+        dst_tmp += 8;
     }
 }
 
@@ -2766,90 +1519,78 @@
 void ff_put_h264_qpel16_mc11_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_hv_qrt_16w_msa(src - 2,
-                            src - (stride * 2), stride, dst, stride, 16);
+    avc_luma_hv_qrt_16x16_msa(src - 2, src - (stride * 2), dst, stride);
 }
 
 void ff_put_h264_qpel16_mc31_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_hv_qrt_16w_msa(src - 2,
-                            src - (stride * 2) +
-                            sizeof(uint8_t), stride, dst, stride, 16);
+    avc_luma_hv_qrt_16x16_msa(src - 2, src - (stride * 2) + 1, dst, stride);
 }
 
 void ff_put_h264_qpel16_mc13_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_hv_qrt_16w_msa(src + stride - 2,
-                            src - (stride * 2), stride, dst, stride, 16);
+    avc_luma_hv_qrt_16x16_msa(src + stride - 2, src - (stride * 2), dst,
+                              stride);
 }
 
 void ff_put_h264_qpel16_mc33_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_hv_qrt_16w_msa(src + stride - 2,
-                            src - (stride * 2) +
-                            sizeof(uint8_t), stride, dst, stride, 16);
+    avc_luma_hv_qrt_16x16_msa(src + stride - 2, src - (stride * 2) + 1, dst,
+                              stride);
 }
 
 void ff_put_h264_qpel8_mc11_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_hv_qrt_8w_msa(src - 2, src - (stride * 2), stride, dst, stride, 8);
+    avc_luma_hv_qrt_8x8_msa(src - 2, src - (stride * 2), dst, stride);
 }
 
 void ff_put_h264_qpel8_mc31_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_hv_qrt_8w_msa(src - 2,
-                           src - (stride * 2) +
-                           sizeof(uint8_t), stride, dst, stride, 8);
+    avc_luma_hv_qrt_8x8_msa(src - 2, src - (stride * 2) + 1, dst, stride);
 }
 
 void ff_put_h264_qpel8_mc13_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_hv_qrt_8w_msa(src + stride - 2,
-                           src - (stride * 2), stride, dst, stride, 8);
+    avc_luma_hv_qrt_8x8_msa(src + stride - 2, src - (stride * 2), dst, stride);
 }
 
 void ff_put_h264_qpel8_mc33_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_hv_qrt_8w_msa(src + stride - 2,
-                           src - (stride * 2) +
-                           sizeof(uint8_t), stride, dst, stride, 8);
+    avc_luma_hv_qrt_8x8_msa(src + stride - 2, src - (stride * 2) + 1, dst,
+                            stride);
 }
 
 
 void ff_put_h264_qpel4_mc11_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_hv_qrt_4w_msa(src - 2, src - (stride * 2), stride, dst, stride, 4);
+    avc_luma_hv_qrt_4x4_msa(src - 2, src - (stride * 2), dst, stride);
 }
 
 void ff_put_h264_qpel4_mc31_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_hv_qrt_4w_msa(src - 2,
-                           src - (stride * 2) +
-                           sizeof(uint8_t), stride, dst, stride, 4);
+    avc_luma_hv_qrt_4x4_msa(src - 2, src - (stride * 2) + 1, dst, stride);
 }
 
 void ff_put_h264_qpel4_mc13_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_hv_qrt_4w_msa(src + stride - 2,
-                           src - (stride * 2), stride, dst, stride, 4);
+    avc_luma_hv_qrt_4x4_msa(src + stride - 2, src - (stride * 2), dst, stride);
 }
 
 void ff_put_h264_qpel4_mc33_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_hv_qrt_4w_msa(src + stride - 2,
-                           src - (stride * 2) +
-                           sizeof(uint8_t), stride, dst, stride, 4);
+    avc_luma_hv_qrt_4x4_msa(src + stride - 2, src - (stride * 2) + 1, dst,
+                            stride);
 }
 
 void ff_put_h264_qpel16_mc21_msa(uint8_t *dst, const uint8_t *src,
@@ -3634,153 +2375,1645 @@
 void ff_put_h264_qpel16_mc12_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_midh_qrt_16w_msa(src - (2 * stride) - 2,
-                              stride, dst, stride, 16, 0);
+    uint32_t row;
+    v16u8 out;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 src11;
+    v8i16 vt_res0, vt_res1, vt_res2, vt_res3, dst0, dst1, dst2, dst3, mask3;
+    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5, shf_vec6;
+    v8i16 shf_vec7, shf_vec8, shf_vec9, shf_vec10, shf_vec11, mask4, mask5;
+    v4i32 hz_res0, hz_res1, hz_res2, hz_res3;
+    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
+    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
+    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
+    v8i16 minus5h = __msa_ldi_h(-5);
+    v8i16 plus20h = __msa_ldi_h(20);
+
+    mask3 = mask0 + 4;
+    mask4 = mask1 + 4;
+    mask5 = mask2 + 4;
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    LD_SB5(src + 8, stride, src7, src8, src9, src10, src11);
+    src += (5 * stride);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src7, src8, src9, src10, src11);
+
+    for (row = 16; row--;) {
+        LD_SB2(src, 8, src5, src6);
+        src += stride;
+        XORI_B2_128_SB(src5, src6);
+
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src0, src1, src2, src3, src4, src5,
+                                        vt_res0, vt_res1);
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src7, src8, src9, src10, src11, src6,
+                                        vt_res2, vt_res3);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+                   mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+                   mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask3,
+                   mask4, mask5, shf_vec6, shf_vec7, shf_vec8);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask3,
+                   mask4, mask5, shf_vec9, shf_vec10, shf_vec11);
+        hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+        hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+        hz_res2 = __msa_hadd_s_w(shf_vec6, shf_vec6);
+        hz_res3 = __msa_hadd_s_w(shf_vec9, shf_vec9);
+        DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
+        DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
+        DPADD_SH2_SW(shf_vec7, shf_vec8, minus5h, plus20h, hz_res2, hz_res2);
+        DPADD_SH2_SW(shf_vec10, shf_vec11, minus5h, plus20h, hz_res3, hz_res3);
+        SRARI_W4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 10);
+        SAT_SW4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 7);
+        dst0 = __msa_srari_h(shf_vec2, 5);
+        dst1 = __msa_srari_h(shf_vec5, 5);
+        dst2 = __msa_srari_h(shf_vec8, 5);
+        dst3 = __msa_srari_h(shf_vec11, 5);
+        SAT_SH4_SH(dst0, dst1, dst2, dst3, 7);
+        PCKEV_H2_SH(dst2, dst0, dst3, dst1, dst0, dst1);
+        PCKEV_H2_SH(hz_res2, hz_res0, hz_res3, hz_res1, dst2, dst3);
+        dst0 = __msa_aver_s_h(dst2, dst0);
+        dst1 = __msa_aver_s_h(dst3, dst1);
+        out = PCKEV_XORI128_UB(dst0, dst1);
+        ST_UB(out, dst);
+        dst += stride;
+
+        src0 = src1;
+        src1 = src2;
+        src2 = src3;
+        src3 = src4;
+        src4 = src5;
+        src7 = src8;
+        src8 = src9;
+        src9 = src10;
+        src10 = src11;
+        src11 = src6;
+    }
 }
 
 void ff_put_h264_qpel16_mc32_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_midh_qrt_16w_msa(src - (2 * stride) - 2,
-                              stride, dst, stride, 16, 1);
+    uint32_t row;
+    v16u8 out;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 src11;
+    v8i16 vt_res0, vt_res1, vt_res2, vt_res3, dst0, dst1, dst2, dst3, mask3;
+    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5, shf_vec6;
+    v8i16 shf_vec7, shf_vec8, shf_vec9, shf_vec10, shf_vec11, mask4, mask5;
+    v4i32 hz_res0, hz_res1, hz_res2, hz_res3;
+    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
+    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
+    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
+    v8i16 minus5h = __msa_ldi_h(-5);
+    v8i16 plus20h = __msa_ldi_h(20);
+
+    mask3 = mask0 + 4;
+    mask4 = mask1 + 4;
+    mask5 = mask2 + 4;
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    LD_SB5(src + 8, stride, src7, src8, src9, src10, src11);
+    src += (5 * stride);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src7, src8, src9, src10, src11);
+
+    for (row = 16; row--;) {
+        LD_SB2(src, 8, src5, src6);
+        src += stride;
+        XORI_B2_128_SB(src5, src6);
+
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src0, src1, src2, src3, src4, src5,
+                                        vt_res0, vt_res1);
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src7, src8, src9, src10, src11, src6,
+                                        vt_res2, vt_res3);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+                   mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+                   mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask3,
+                   mask4, mask5, shf_vec6, shf_vec7, shf_vec8);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask3,
+                   mask4, mask5, shf_vec9, shf_vec10, shf_vec11);
+        hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+        hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+        hz_res2 = __msa_hadd_s_w(shf_vec6, shf_vec6);
+        hz_res3 = __msa_hadd_s_w(shf_vec9, shf_vec9);
+        DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
+        DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
+        DPADD_SH2_SW(shf_vec7, shf_vec8, minus5h, plus20h, hz_res2, hz_res2);
+        DPADD_SH2_SW(shf_vec10, shf_vec11, minus5h, plus20h, hz_res3, hz_res3);
+        SRARI_W4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 10);
+        SAT_SW4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 7);
+        dst0 = __msa_srari_h(shf_vec2, 5);
+        dst1 = __msa_srari_h(shf_vec5, 5);
+        dst2 = __msa_srari_h(shf_vec8, 5);
+        dst3 = __msa_srari_h(shf_vec11, 5);
+        SAT_SH4_SH(dst0, dst1, dst2, dst3, 7);
+        dst0 = __msa_pckod_h(dst2, dst0);
+        dst1 = __msa_pckod_h(dst3, dst1);
+        PCKEV_H2_SH(hz_res2, hz_res0, hz_res3, hz_res1, dst2, dst3);
+        dst0 = __msa_aver_s_h(dst2, dst0);
+        dst1 = __msa_aver_s_h(dst3, dst1);
+        out = PCKEV_XORI128_UB(dst0, dst1);
+        ST_UB(out, dst);
+        dst += stride;
+
+        src0 = src1;
+        src1 = src2;
+        src2 = src3;
+        src3 = src4;
+        src4 = src5;
+        src7 = src8;
+        src8 = src9;
+        src9 = src10;
+        src10 = src11;
+        src11 = src6;
+    }
 }
 
 void ff_put_h264_qpel8_mc12_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_midh_qrt_8w_msa(src - (2 * stride) - 2, stride, dst, stride, 8, 0);
+    uint32_t row;
+    v16u8 out;
+    v16i8 src0, src1, src2, src3, src4, src5, src6;
+    v8i16 vt_res0, vt_res1, vt_res2, vt_res3, dst0, dst1, dst2, dst3;
+    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5, shf_vec6;
+    v8i16 shf_vec7, shf_vec8, shf_vec9, shf_vec10, shf_vec11;
+    v8i16 mask3, mask4, mask5;
+    v4i32 hz_res0, hz_res1, hz_res2, hz_res3;
+    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
+    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
+    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
+    v8i16 minus5h = __msa_ldi_h(-5);
+    v8i16 plus20h = __msa_ldi_h(20);
+
+    mask3 = mask0 + 4;
+    mask4 = mask1 + 4;
+    mask5 = mask2 + 4;
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+
+    for (row = 4; row--;) {
+        LD_SB2(src, stride, src5, src6);
+        src += (2 * stride);
+        XORI_B2_128_SB(src5, src6);
+
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src0, src1, src2, src3, src4, src5,
+                                        vt_res0, vt_res1);
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src1, src2, src3, src4, src5, src6,
+                                        vt_res2, vt_res3);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+                   mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+                   mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask3,
+                   mask4, mask5, shf_vec6, shf_vec7, shf_vec8);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask3,
+                   mask4, mask5, shf_vec9, shf_vec10, shf_vec11);
+        hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+        hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+        hz_res2 = __msa_hadd_s_w(shf_vec6, shf_vec6);
+        hz_res3 = __msa_hadd_s_w(shf_vec9, shf_vec9);
+        DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
+        DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
+        DPADD_SH2_SW(shf_vec7, shf_vec8, minus5h, plus20h, hz_res2, hz_res2);
+        DPADD_SH2_SW(shf_vec10, shf_vec11, minus5h, plus20h, hz_res3, hz_res3);
+        SRARI_W4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 10);
+        SAT_SW4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 7);
+        dst0 = __msa_srari_h(shf_vec2, 5);
+        dst1 = __msa_srari_h(shf_vec5, 5);
+        dst2 = __msa_srari_h(shf_vec8, 5);
+        dst3 = __msa_srari_h(shf_vec11, 5);
+        SAT_SH4_SH(dst0, dst1, dst2, dst3, 7);
+        PCKEV_H2_SH(dst2, dst0, dst3, dst1, dst0, dst1);
+        PCKEV_H2_SH(hz_res2, hz_res0, hz_res3, hz_res1, dst2, dst3);
+        dst0 = __msa_aver_s_h(dst2, dst0);
+        dst1 = __msa_aver_s_h(dst3, dst1);
+        out = PCKEV_XORI128_UB(dst0, dst1);
+        ST8x2_UB(out, dst, stride);
+        dst += (2 * stride);
+
+        src0 = src2;
+        src1 = src3;
+        src2 = src4;
+        src3 = src5;
+        src4 = src6;
+    }
 }
 
 void ff_put_h264_qpel8_mc32_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_midh_qrt_8w_msa(src - (2 * stride) - 2, stride, dst, stride, 8, 1);
+    uint32_t row;
+    v16u8 out;
+    v16i8 src0, src1, src2, src3, src4, src5, src6;
+    v8i16 vt_res0, vt_res1, vt_res2, vt_res3, dst0, dst1, dst2, dst3;
+    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5, shf_vec6;
+    v8i16 shf_vec7, shf_vec8, shf_vec9, shf_vec10, shf_vec11;
+    v8i16 mask3, mask4, mask5;
+    v4i32 hz_res0, hz_res1, hz_res2, hz_res3;
+    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
+    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
+    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
+    v8i16 minus5h = __msa_ldi_h(-5);
+    v8i16 plus20h = __msa_ldi_h(20);
+
+    mask3 = mask0 + 4;
+    mask4 = mask1 + 4;
+    mask5 = mask2 + 4;
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+
+    for (row = 4; row--;) {
+        LD_SB2(src, stride, src5, src6);
+        src += (2 * stride);
+        XORI_B2_128_SB(src5, src6);
+
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src0, src1, src2, src3, src4, src5,
+                                        vt_res0, vt_res1);
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src1, src2, src3, src4, src5, src6,
+                                        vt_res2, vt_res3);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+                   mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+                   mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask3,
+                   mask4, mask5, shf_vec6, shf_vec7, shf_vec8);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask3,
+                   mask4, mask5, shf_vec9, shf_vec10, shf_vec11);
+        hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+        hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+        hz_res2 = __msa_hadd_s_w(shf_vec6, shf_vec6);
+        hz_res3 = __msa_hadd_s_w(shf_vec9, shf_vec9);
+        DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
+        DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
+        DPADD_SH2_SW(shf_vec7, shf_vec8, minus5h, plus20h, hz_res2, hz_res2);
+        DPADD_SH2_SW(shf_vec10, shf_vec11, minus5h, plus20h, hz_res3, hz_res3);
+        SRARI_W4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 10);
+        SAT_SW4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 7);
+        dst0 = __msa_srari_h(shf_vec2, 5);
+        dst1 = __msa_srari_h(shf_vec5, 5);
+        dst2 = __msa_srari_h(shf_vec8, 5);
+        dst3 = __msa_srari_h(shf_vec11, 5);
+        SAT_SH4_SH(dst0, dst1, dst2, dst3, 7);
+        dst0 = __msa_pckod_h(dst2, dst0);
+        dst1 = __msa_pckod_h(dst3, dst1);
+        PCKEV_H2_SH(hz_res2, hz_res0, hz_res3, hz_res1, dst2, dst3);
+        dst0 = __msa_aver_s_h(dst2, dst0);
+        dst1 = __msa_aver_s_h(dst3, dst1);
+        out = PCKEV_XORI128_UB(dst0, dst1);
+        ST8x2_UB(out, dst, stride);
+        dst += (2 * stride);
+
+        src0 = src2;
+        src1 = src3;
+        src2 = src4;
+        src3 = src5;
+        src4 = src6;
+    }
 }
 
 void ff_put_h264_qpel4_mc12_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_midh_qrt_4w_msa(src - (2 * stride) - 2, stride, dst, stride, 4, 0);
+    const int16_t filt_const0 = 0xfb01;
+    const int16_t filt_const1 = 0x1414;
+    const int16_t filt_const2 = 0x1fb;
+    v16u8 out;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src21_r, src32_r, src43_r, src54_r, src65_r, src76_r;
+    v16i8 src87_r, src10_l, src21_l, src32_l, src43_l, src54_l, src65_l;
+    v16i8 src76_l, src87_l, filt0, filt1, filt2;
+    v8i16 vt_res0, vt_res1, vt_res2, vt_res3, dst0, dst1, dst2, dst3, shf_vec7;
+    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5, shf_vec6;
+    v4i32 hz_res0, hz_res1, hz_res2, hz_res3;
+    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
+    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
+    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
+    v8i16 minus5h = __msa_ldi_h(-5);
+    v8i16 plus20h = __msa_ldi_h(20);
+    v8i16 zeros = { 0 };
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    LD_SB4(src, stride, src5, src6, src7, src8);
+    XORI_B4_128_SB(src5, src6, src7, src8);
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
+               src76_r, src87_r);
+    ILVL_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_l, src21_l,
+               src32_l, src43_l);
+    ILVL_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_l, src65_l,
+               src76_l, src87_l);
+    vt_res0 = AVC_DOT_SH3_SH(src10_r, src32_r, src54_r, filt0, filt1, filt2);
+    vt_res1 = AVC_DOT_SH3_SH(src10_l, src32_l, src54_l, filt0, filt1, filt2);
+    vt_res2 = AVC_DOT_SH3_SH(src21_r, src43_r, src65_r, filt0, filt1, filt2);
+    vt_res3 = AVC_DOT_SH3_SH(src21_l, src43_l, src65_l, filt0, filt1, filt2);
+    VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+               mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
+    VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+               mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
+    hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+    DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
+    hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+    DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
+
+    vt_res0 = AVC_DOT_SH3_SH(src32_r, src54_r, src76_r, filt0, filt1, filt2);
+    vt_res1 = AVC_DOT_SH3_SH(src32_l, src54_l, src76_l, filt0, filt1, filt2);
+    vt_res2 = AVC_DOT_SH3_SH(src43_r, src65_r, src87_r, filt0, filt1, filt2);
+    vt_res3 = AVC_DOT_SH3_SH(src43_l, src65_l, src87_l, filt0, filt1, filt2);
+    VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+               mask1, mask2, shf_vec0, shf_vec1, shf_vec6);
+    VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+               mask1, mask2, shf_vec3, shf_vec4, shf_vec7);
+    hz_res2 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+    DPADD_SH2_SW(shf_vec1, shf_vec6, minus5h, plus20h, hz_res2, hz_res2);
+    hz_res3 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+    DPADD_SH2_SW(shf_vec4, shf_vec7, minus5h, plus20h, hz_res3, hz_res3);
+
+    SRARI_W2_SW(hz_res0, hz_res1, 10);
+    SAT_SW2_SW(hz_res0, hz_res1, 7);
+    SRARI_W2_SW(hz_res2, hz_res3, 10);
+    SAT_SW2_SW(hz_res2, hz_res3, 7);
+
+    dst0 = __msa_srari_h(shf_vec2, 5);
+    dst1 = __msa_srari_h(shf_vec5, 5);
+    dst2 = __msa_srari_h(shf_vec6, 5);
+    dst3 = __msa_srari_h(shf_vec7, 5);
+
+    SAT_SH2_SH(dst0, dst1, 7);
+    SAT_SH2_SH(dst2, dst3, 7);
+    ILVEV_H2_SH(dst0, zeros, dst1, zeros, dst0, dst1);
+    ILVEV_H2_SH(dst2, zeros, dst3, zeros, dst2, dst3);
+
+    hz_res0 = __msa_aver_s_w(hz_res0, (v4i32) dst0);
+    hz_res1 = __msa_aver_s_w(hz_res1, (v4i32) dst1);
+    hz_res2 = __msa_aver_s_w(hz_res2, (v4i32) dst2);
+    hz_res3 = __msa_aver_s_w(hz_res3, (v4i32) dst3);
+
+    PCKEV_H2_SH(hz_res1, hz_res0, hz_res3, hz_res2, dst0, dst2);
+    out = PCKEV_XORI128_UB(dst0, dst2);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
 }
 
 void ff_put_h264_qpel4_mc32_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_midh_qrt_4w_msa(src - (2 * stride) - 2, stride, dst, stride, 4, 1);
+    const int16_t filt_const0 = 0xfb01;
+    const int16_t filt_const1 = 0x1414;
+    const int16_t filt_const2 = 0x1fb;
+    v16u8 out;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src21_r, src32_r, src43_r, src54_r, src65_r, src76_r;
+    v16i8 src87_r, src10_l, src21_l, src32_l, src43_l, src54_l, src65_l;
+    v16i8 src76_l, src87_l, filt0, filt1, filt2;
+    v8i16 vt_res0, vt_res1, vt_res2, vt_res3, dst0, dst1, dst2, dst3, shf_vec7;
+    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5, shf_vec6;
+    v4i32 hz_res0, hz_res1, hz_res2, hz_res3;
+    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
+    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
+    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
+    v8i16 minus5h = __msa_ldi_h(-5);
+    v8i16 plus20h = __msa_ldi_h(20);
+    v8i16 zeros = { 0 };
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    LD_SB4(src, stride, src5, src6, src7, src8);
+    XORI_B4_128_SB(src5, src6, src7, src8);
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
+               src76_r, src87_r);
+    ILVL_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_l, src21_l,
+               src32_l, src43_l);
+    ILVL_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_l, src65_l,
+               src76_l, src87_l);
+
+    vt_res0 = AVC_DOT_SH3_SH(src10_r, src32_r, src54_r, filt0, filt1, filt2);
+    vt_res1 = AVC_DOT_SH3_SH(src10_l, src32_l, src54_l, filt0, filt1, filt2);
+    vt_res2 = AVC_DOT_SH3_SH(src21_r, src43_r, src65_r, filt0, filt1, filt2);
+    vt_res3 = AVC_DOT_SH3_SH(src21_l, src43_l, src65_l, filt0, filt1, filt2);
+    VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+               mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
+    VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+               mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
+    hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+    DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
+    hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+    DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
+
+    vt_res0 = AVC_DOT_SH3_SH(src32_r, src54_r, src76_r, filt0, filt1, filt2);
+    vt_res1 = AVC_DOT_SH3_SH(src32_l, src54_l, src76_l, filt0, filt1, filt2);
+    vt_res2 = AVC_DOT_SH3_SH(src43_r, src65_r, src87_r, filt0, filt1, filt2);
+    vt_res3 = AVC_DOT_SH3_SH(src43_l, src65_l, src87_l, filt0, filt1, filt2);
+    VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+               mask1, mask2, shf_vec0, shf_vec1, shf_vec6);
+    VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+               mask1, mask2, shf_vec3, shf_vec4, shf_vec7);
+    hz_res2 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+    DPADD_SH2_SW(shf_vec1, shf_vec6, minus5h, plus20h, hz_res2, hz_res2);
+    hz_res3 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+    DPADD_SH2_SW(shf_vec4, shf_vec7, minus5h, plus20h, hz_res3, hz_res3);
+
+    SRARI_W2_SW(hz_res0, hz_res1, 10);
+    SAT_SW2_SW(hz_res0, hz_res1, 7);
+    SRARI_W2_SW(hz_res2, hz_res3, 10);
+    SAT_SW2_SW(hz_res2, hz_res3, 7);
+
+    dst0 = __msa_srari_h(shf_vec2, 5);
+    dst1 = __msa_srari_h(shf_vec5, 5);
+    dst2 = __msa_srari_h(shf_vec6, 5);
+    dst3 = __msa_srari_h(shf_vec7, 5);
+
+    SAT_SH2_SH(dst0, dst1, 7);
+    SAT_SH2_SH(dst2, dst3, 7);
+
+    dst0 = __msa_ilvod_h(zeros, dst0);
+    dst1 = __msa_ilvod_h(zeros, dst1);
+    dst2 = __msa_ilvod_h(zeros, dst2);
+    dst3 = __msa_ilvod_h(zeros, dst3);
+
+    hz_res0 = __msa_aver_s_w(hz_res0, (v4i32) dst0);
+    hz_res1 = __msa_aver_s_w(hz_res1, (v4i32) dst1);
+    hz_res2 = __msa_aver_s_w(hz_res2, (v4i32) dst2);
+    hz_res3 = __msa_aver_s_w(hz_res3, (v4i32) dst3);
+
+    PCKEV_H2_SH(hz_res1, hz_res0, hz_res3, hz_res2, dst0, dst2);
+    out = PCKEV_XORI128_UB(dst0, dst2);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
 }
 
 void ff_put_h264_qpel16_mc22_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_mid_16w_msa(src - (2 * stride) - 2, stride, dst, stride, 16);
+    const int32_t filt_const0 = 0xfffb0001;
+    const int32_t filt_const1 = 0x140014;
+    const int32_t filt_const2 = 0x1fffb;
+    const uint8_t *src_tmp = src - (2 * stride) - 2;
+    uint8_t *dst_tmp = dst;
+    uint32_t multiple8_cnt, loop_cnt;
+    v16u8 out0, out1;
+    v16i8 src0, src1, src2, src3, src4, mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, dst0, dst1, dst2, dst3;
+    v8i16 hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r, hz_out54_r;
+    v8i16 hz_out65_r, hz_out76_r, hz_out87_r, hz_out10_l, hz_out21_l;
+    v8i16 hz_out32_l, hz_out43_l, hz_out54_l, hz_out65_l, hz_out76_l;
+    v8i16 hz_out87_l, filt0, filt1, filt2;
+    v4i32 tmp0, tmp1;
+
+    filt0 = (v8i16) __msa_fill_w(filt_const0);
+    filt1 = (v8i16) __msa_fill_w(filt_const1);
+    filt2 = (v8i16) __msa_fill_w(filt_const2);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        src = src_tmp;
+        dst = dst_tmp;
+
+        LD_SB5(src, stride, src0, src1, src2, src3, src4);
+        XORI_B5_128_SB(src0, src1, src2, src3, src4);
+        src += (5 * stride);
+
+        hz_out0 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
+        hz_out1 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
+        hz_out2 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
+        hz_out3 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
+        hz_out4 = AVC_HORZ_FILTER_SH(src4, src4, mask0, mask1, mask2);
+
+        for (loop_cnt = 4; loop_cnt--;) {
+            LD_SB4(src, stride, src0, src1, src2, src3);
+            XORI_B4_128_SB(src0, src1, src2, src3);
+            src += (4 * stride);
+
+            hz_out5 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
+            hz_out6 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
+            hz_out7 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
+            hz_out8 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
+
+            ILVR_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2,
+                       hz_out4, hz_out3, hz_out10_r, hz_out21_r, hz_out32_r,
+                       hz_out43_r);
+            ILVL_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2,
+                       hz_out4, hz_out3, hz_out10_l, hz_out21_l, hz_out32_l,
+                       hz_out43_l);
+            ILVR_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6,
+                       hz_out8, hz_out7, hz_out54_r, hz_out65_r, hz_out76_r,
+                       hz_out87_r);
+            ILVL_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6,
+                       hz_out8, hz_out7, hz_out54_l, hz_out65_l, hz_out76_l,
+                       hz_out87_l);
+
+            tmp0 = AVC_DOT_SW3_SW(hz_out10_r, hz_out32_r, hz_out54_r, filt0,
+                                  filt1, filt2);
+            tmp1 = AVC_DOT_SW3_SW(hz_out10_l, hz_out32_l, hz_out54_l, filt0,
+                                  filt1, filt2);
+            dst0 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+            tmp0 = AVC_DOT_SW3_SW(hz_out21_r, hz_out43_r, hz_out65_r, filt0,
+                                  filt1, filt2);
+            tmp1 = AVC_DOT_SW3_SW(hz_out21_l, hz_out43_l, hz_out65_l, filt0,
+                                  filt1, filt2);
+            dst1 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+            tmp0 = AVC_DOT_SW3_SW(hz_out32_r, hz_out54_r, hz_out76_r, filt0,
+                                  filt1, filt2);
+            tmp1 = AVC_DOT_SW3_SW(hz_out32_l, hz_out54_l, hz_out76_l, filt0,
+                                  filt1, filt2);
+            dst2 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+            tmp0 = AVC_DOT_SW3_SW(hz_out43_r, hz_out65_r, hz_out87_r, filt0,
+                                  filt1, filt2);
+            tmp1 = AVC_DOT_SW3_SW(hz_out43_l, hz_out65_l, hz_out87_l, filt0,
+                                  filt1, filt2);
+            dst3 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+
+            out0 = PCKEV_XORI128_UB(dst0, dst1);
+            out1 = PCKEV_XORI128_UB(dst2, dst3);
+            ST8x4_UB(out0, out1, dst, stride);
+            dst += (4 * stride);
+
+            hz_out0 = hz_out4;
+            hz_out1 = hz_out5;
+            hz_out2 = hz_out6;
+            hz_out3 = hz_out7;
+            hz_out4 = hz_out8;
+        }
+
+        src_tmp += 8;
+        dst_tmp += 8;
+    }
 }
 
 void ff_put_h264_qpel8_mc22_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_mid_8w_msa(src - (2 * stride) - 2, stride, dst, stride, 8);
+    const int32_t filt_const0 = 0xfffb0001;
+    const int32_t filt_const1 = 0x140014;
+    const int32_t filt_const2 = 0x1fffb;
+    v16u8 out0, out1;
+    v16i8 src0, src1, src2, src3, src4, mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, hz_out9, hz_out10, hz_out11, hz_out12;
+    v8i16 hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r, hz_out54_r;
+    v8i16 hz_out65_r, hz_out76_r, hz_out87_r, hz_out89_r, hz_out910_r;
+    v8i16 hz_out1110_r, hz_out1211_r, dst0, dst1, dst2, dst3;
+    v8i16 hz_out10_l, hz_out21_l, hz_out32_l, hz_out43_l, hz_out54_l;
+    v8i16 hz_out65_l, hz_out76_l, hz_out87_l, hz_out89_l, hz_out910_l;
+    v8i16 hz_out1110_l, hz_out1211_l, filt0, filt1, filt2;
+    v4i32 tmp0, tmp1;
+
+    filt0 = (v8i16) __msa_fill_w(filt_const0);
+    filt1 = (v8i16) __msa_fill_w(filt_const1);
+    filt2 = (v8i16) __msa_fill_w(filt_const2);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    src -= ((2 * stride) + 2);
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    src += (5 * stride);
+
+    hz_out0 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
+    hz_out1 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
+    hz_out3 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, src4, mask0, mask1, mask2);
+
+    LD_SB4(src, stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    src += (4 * stride);
+    hz_out5 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
+    hz_out6 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
+    hz_out7 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
+    hz_out8 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
+    ILVR_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2, hz_out4,
+               hz_out3, hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r);
+    ILVL_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2, hz_out4,
+               hz_out3, hz_out10_l, hz_out21_l, hz_out32_l, hz_out43_l);
+    ILVR_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6, hz_out8,
+               hz_out7, hz_out54_r, hz_out65_r, hz_out76_r, hz_out87_r);
+    ILVL_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6, hz_out8,
+               hz_out7, hz_out54_l, hz_out65_l, hz_out76_l, hz_out87_l);
+
+    tmp0 = AVC_DOT_SW3_SW(hz_out10_r, hz_out32_r, hz_out54_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out10_l, hz_out32_l, hz_out54_l, filt0, filt1,
+                          filt2);
+    dst0 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out21_r, hz_out43_r, hz_out65_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out21_l, hz_out43_l, hz_out65_l, filt0, filt1,
+                          filt2);
+    dst1 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out32_r, hz_out54_r, hz_out76_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out32_l, hz_out54_l, hz_out76_l, filt0, filt1,
+                          filt2);
+    dst2 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out43_r, hz_out65_r, hz_out87_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out43_l, hz_out65_l, hz_out87_l, filt0, filt1,
+                          filt2);
+    dst3 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    out0 = PCKEV_XORI128_UB(dst0, dst1);
+    out1 = PCKEV_XORI128_UB(dst2, dst3);
+    ST8x4_UB(out0, out1, dst, stride);
+    dst += (4 * stride);
+
+    LD_SB4(src, stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    hz_out9 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
+    hz_out10 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
+    hz_out11 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
+    hz_out12 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
+    ILVR_H4_SH(hz_out9, hz_out8, hz_out10, hz_out9, hz_out11, hz_out10,
+               hz_out12, hz_out11, hz_out89_r, hz_out910_r, hz_out1110_r,
+               hz_out1211_r);
+    ILVL_H4_SH(hz_out9, hz_out8, hz_out10, hz_out9, hz_out11, hz_out10,
+               hz_out12, hz_out11, hz_out89_l, hz_out910_l, hz_out1110_l,
+               hz_out1211_l);
+    tmp0 = AVC_DOT_SW3_SW(hz_out54_r, hz_out76_r, hz_out89_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out54_l, hz_out76_l, hz_out89_l, filt0, filt1,
+                          filt2);
+    dst0 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out65_r, hz_out87_r, hz_out910_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out65_l, hz_out87_l, hz_out910_l, filt0, filt1,
+                          filt2);
+    dst1 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out76_r, hz_out89_r, hz_out1110_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out76_l, hz_out89_l, hz_out1110_l, filt0, filt1,
+                          filt2);
+    dst2 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out87_r, hz_out910_r, hz_out1211_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out87_l, hz_out910_l, hz_out1211_l, filt0, filt1,
+                          filt2);
+    dst3 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    out0 = PCKEV_XORI128_UB(dst0, dst1);
+    out1 = PCKEV_XORI128_UB(dst2, dst3);
+    ST8x4_UB(out0, out1, dst, stride);
 }
 
 void ff_put_h264_qpel4_mc22_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_mid_4w_msa(src - (2 * stride) - 2, stride, dst, stride, 4);
+    const int32_t filt_const0 = 0xfffb0001;
+    const int32_t filt_const1 = 0x140014;
+    const int32_t filt_const2 = 0x1fffb;
+    v16u8 res;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, dst0, dst1, filt0, filt1, filt2;
+    v8i16 hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r, hz_out54_r;
+    v8i16 hz_out65_r, hz_out76_r, hz_out87_r;
+    v4i32 tmp0, tmp1;
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+
+    filt0 = (v8i16) __msa_fill_w(filt_const0);
+    filt1 = (v8i16) __msa_fill_w(filt_const1);
+    filt2 = (v8i16) __msa_fill_w(filt_const2);
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    LD_SB4(src, stride, src5, src6, src7, src8);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    XORI_B4_128_SB(src5, src6, src7, src8);
+    hz_out0 = AVC_HORZ_FILTER_SH(src0, src1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src2, src3, mask0, mask1, mask2);
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, src5, mask0, mask1, mask2);
+    hz_out6 = AVC_HORZ_FILTER_SH(src6, src7, mask0, mask1, mask2);
+    hz_out8 = AVC_HORZ_FILTER_SH(src8, src8, mask0, mask1, mask2);
+    PCKOD_D2_SH(hz_out0, hz_out0, hz_out2, hz_out2, hz_out1, hz_out3);
+    PCKOD_D2_SH(hz_out4, hz_out4, hz_out6, hz_out6, hz_out5, hz_out7);
+    ILVR_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2, hz_out4,
+               hz_out3, hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r);
+    ILVR_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6, hz_out8,
+               hz_out7, hz_out54_r, hz_out65_r, hz_out76_r, hz_out87_r);
+
+    tmp0 = AVC_DOT_SW3_SW(hz_out10_r, hz_out32_r, hz_out54_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out21_r, hz_out43_r, hz_out65_r, filt0, filt1,
+                          filt2);
+    dst0 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out32_r, hz_out54_r, hz_out76_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out43_r, hz_out65_r, hz_out87_r, filt0, filt1,
+                          filt2);
+    dst1 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    res = PCKEV_XORI128_UB(dst0, dst1);
+    ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride);
 }
 
 void ff_avg_h264_qpel16_mc10_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_hz_qrt_and_aver_dst_16x16_msa(src - 2, stride, dst, stride, 0);
+    uint32_t loop_cnt;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 out0, out1, out2, out3, src0, src1, src2, src3, src4, src5, src6;
+    v16i8 mask0, mask1, mask2, mask3, mask4, mask5, src7, vec11;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9, vec10;
+    v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+    mask3 = mask0 + 8;
+    mask4 = mask1 + 8;
+    mask5 = mask2 + 8;
+    src -= 2;
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB2(src, 16, src0, src1);
+        src += stride;
+        LD_SB2(src, 16, src2, src3);
+        src += stride;
+        LD_SB2(src, 16, src4, src5);
+        src += stride;
+        LD_SB2(src, 16, src6, src7);
+        src += stride;
+
+        LD_UB4(dst, stride, dst0, dst1, dst2, dst3);
+        XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
+        VSHF_B2_SB(src0, src0, src0, src1, mask0, mask3, vec0, vec3);
+        VSHF_B2_SB(src2, src2, src2, src3, mask0, mask3, vec6, vec9);
+        VSHF_B2_SB(src0, src0, src0, src1, mask1, mask4, vec1, vec4);
+        VSHF_B2_SB(src2, src2, src2, src3, mask1, mask4, vec7, vec10);
+        VSHF_B2_SB(src0, src0, src0, src1, mask2, mask5, vec2, vec5);
+        VSHF_B2_SB(src2, src2, src2, src3, mask2, mask5, vec8, vec11);
+        HADD_SB4_SH(vec0, vec3, vec6, vec9, res0, res1, res2, res3);
+        DPADD_SB4_SH(vec1, vec4, vec7, vec10, minus5b, minus5b, minus5b,
+                     minus5b, res0, res1, res2, res3);
+        DPADD_SB4_SH(vec2, vec5, vec8, vec11, plus20b, plus20b, plus20b,
+                     plus20b, res0, res1, res2, res3);
+        VSHF_B2_SB(src4, src4, src4, src5, mask0, mask3, vec0, vec3);
+        VSHF_B2_SB(src6, src6, src6, src7, mask0, mask3, vec6, vec9);
+        VSHF_B2_SB(src4, src4, src4, src5, mask1, mask4, vec1, vec4);
+        VSHF_B2_SB(src6, src6, src6, src7, mask1, mask4, vec7, vec10);
+        VSHF_B2_SB(src4, src4, src4, src5, mask2, mask5, vec2, vec5);
+        VSHF_B2_SB(src6, src6, src6, src7, mask2, mask5, vec8, vec11);
+        HADD_SB4_SH(vec0, vec3, vec6, vec9, res4, res5, res6, res7);
+        DPADD_SB4_SH(vec1, vec4, vec7, vec10, minus5b, minus5b, minus5b,
+                     minus5b, res4, res5, res6, res7);
+        DPADD_SB4_SH(vec2, vec5, vec8, vec11, plus20b, plus20b, plus20b,
+                     plus20b, res4, res5, res6, res7);
+        SLDI_B2_SB(src1, src3, src0, src2, src0, src2, 2);
+        SLDI_B2_SB(src5, src7, src4, src6, src4, src6, 2);
+        SRARI_H4_SH(res0, res1, res2, res3, 5);
+        SRARI_H4_SH(res4, res5, res6, res7, 5);
+        SAT_SH4_SH(res0, res1, res2, res3, 7);
+        SAT_SH4_SH(res4, res5, res6, res7, 7);
+        PCKEV_B2_SB(res1, res0, res3, res2, out0, out1);
+        PCKEV_B2_SB(res5, res4, res7, res6, out2, out3);
+        out0 = __msa_aver_s_b(out0, src0);
+        out1 = __msa_aver_s_b(out1, src2);
+        out2 = __msa_aver_s_b(out2, src4);
+        out3 = __msa_aver_s_b(out3, src6);
+        XORI_B4_128_SB(out0, out1, out2, out3);
+        AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
+        AVER_UB2_UB(out2, dst2, out3, dst3, dst2, dst3);
+        ST_UB4(dst0, dst1, dst2, dst3, dst, stride);
+        dst += (4 * stride);
+    }
 }
 
 void ff_avg_h264_qpel16_mc30_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_hz_qrt_and_aver_dst_16x16_msa(src - 2, stride, dst, stride, 1);
+    uint32_t loop_cnt;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 out0, out1, out2, out3, src0, src1, src2, src3, src4, src5, src6;
+    v16i8 mask0, mask1, mask2, mask3, mask4, mask5, src7, vec11;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9, vec10;
+    v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+    mask3 = mask0 + 8;
+    mask4 = mask1 + 8;
+    mask5 = mask2 + 8;
+    src -= 2;
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB2(src, 16, src0, src1);
+        src += stride;
+        LD_SB2(src, 16, src2, src3);
+        src += stride;
+        LD_SB2(src, 16, src4, src5);
+        src += stride;
+        LD_SB2(src, 16, src6, src7);
+        src += stride;
+
+        LD_UB4(dst, stride, dst0, dst1, dst2, dst3);
+        XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
+        VSHF_B2_SB(src0, src0, src0, src1, mask0, mask3, vec0, vec3);
+        VSHF_B2_SB(src2, src2, src2, src3, mask0, mask3, vec6, vec9);
+        VSHF_B2_SB(src0, src0, src0, src1, mask1, mask4, vec1, vec4);
+        VSHF_B2_SB(src2, src2, src2, src3, mask1, mask4, vec7, vec10);
+        VSHF_B2_SB(src0, src0, src0, src1, mask2, mask5, vec2, vec5);
+        VSHF_B2_SB(src2, src2, src2, src3, mask2, mask5, vec8, vec11);
+        HADD_SB4_SH(vec0, vec3, vec6, vec9, res0, res1, res2, res3);
+        DPADD_SB4_SH(vec1, vec4, vec7, vec10, minus5b, minus5b, minus5b,
+                     minus5b, res0, res1, res2, res3);
+        DPADD_SB4_SH(vec2, vec5, vec8, vec11, plus20b, plus20b, plus20b,
+                     plus20b, res0, res1, res2, res3);
+        VSHF_B2_SB(src4, src4, src4, src5, mask0, mask3, vec0, vec3);
+        VSHF_B2_SB(src6, src6, src6, src7, mask0, mask3, vec6, vec9);
+        VSHF_B2_SB(src4, src4, src4, src5, mask1, mask4, vec1, vec4);
+        VSHF_B2_SB(src6, src6, src6, src7, mask1, mask4, vec7, vec10);
+        VSHF_B2_SB(src4, src4, src4, src5, mask2, mask5, vec2, vec5);
+        VSHF_B2_SB(src6, src6, src6, src7, mask2, mask5, vec8, vec11);
+        HADD_SB4_SH(vec0, vec3, vec6, vec9, res4, res5, res6, res7);
+        DPADD_SB4_SH(vec1, vec4, vec7, vec10, minus5b, minus5b, minus5b,
+                     minus5b, res4, res5, res6, res7);
+        DPADD_SB4_SH(vec2, vec5, vec8, vec11, plus20b, plus20b, plus20b,
+                     plus20b, res4, res5, res6, res7);
+        SLDI_B2_SB(src1, src3, src0, src2, src0, src2, 3);
+        SLDI_B2_SB(src5, src7, src4, src6, src4, src6, 3);
+        SRARI_H4_SH(res0, res1, res2, res3, 5);
+        SRARI_H4_SH(res4, res5, res6, res7, 5);
+        SAT_SH4_SH(res0, res1, res2, res3, 7);
+        SAT_SH4_SH(res4, res5, res6, res7, 7);
+        PCKEV_B2_SB(res1, res0, res3, res2, out0, out1);
+        PCKEV_B2_SB(res5, res4, res7, res6, out2, out3);
+        out0 = __msa_aver_s_b(out0, src0);
+        out1 = __msa_aver_s_b(out1, src2);
+        out2 = __msa_aver_s_b(out2, src4);
+        out3 = __msa_aver_s_b(out3, src6);
+        XORI_B4_128_SB(out0, out1, out2, out3);
+        AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
+        AVER_UB2_UB(out2, dst2, out3, dst3, dst2, dst3);
+        ST_UB4(dst0, dst1, dst2, dst3, dst, stride);
+        dst += (4 * stride);
+    }
 }
 
 void ff_avg_h264_qpel8_mc10_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_hz_qrt_and_aver_dst_8x8_msa(src - 2, stride, dst, stride, 0);
+    uint64_t tp0, tp1, tp2, tp3;
+    v16u8 dst0 = { 0 }, dst1 = { 0 }, dst2 = { 0 }, dst3 = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask0, mask1, mask2;
+    v16i8 tmp0, tmp1, tmp2, tmp3, vec11;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9, vec10;
+    v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+    LD_SB8(src - 2, stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
+    VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+    VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+    HADD_SB4_SH(vec0, vec1, vec2, vec3, res0, res1, res2, res3);
+    VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4, vec5);
+    VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6, vec7);
+    DPADD_SB4_SH(vec4, vec5, vec6, vec7, minus5b, minus5b, minus5b, minus5b,
+                 res0, res1, res2, res3);
+    VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec8, vec9);
+    VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec10, vec11);
+    DPADD_SB4_SH(vec8, vec9, vec10, vec11, plus20b, plus20b, plus20b, plus20b,
+                 res0, res1, res2, res3);
+    VSHF_B2_SB(src4, src4, src5, src5, mask0, mask0, vec0, vec1);
+    VSHF_B2_SB(src6, src6, src7, src7, mask0, mask0, vec2, vec3);
+    HADD_SB4_SH(vec0, vec1, vec2, vec3, res4, res5, res6, res7);
+    VSHF_B2_SB(src4, src4, src5, src5, mask1, mask1, vec4, vec5);
+    VSHF_B2_SB(src6, src6, src7, src7, mask1, mask1, vec6, vec7);
+    DPADD_SB4_SH(vec4, vec5, vec6, vec7, minus5b, minus5b, minus5b, minus5b,
+                 res4, res5, res6, res7);
+    VSHF_B2_SB(src4, src4, src5, src5, mask2, mask2, vec8, vec9);
+    VSHF_B2_SB(src6, src6, src7, src7, mask2, mask2, vec10, vec11);
+    DPADD_SB4_SH(vec8, vec9, vec10, vec11, plus20b, plus20b, plus20b, plus20b,
+                 res4, res5, res6, res7);
+    SLDI_B2_SB(src0, src1, src0, src1, src0, src1, 2);
+    SLDI_B2_SB(src2, src3, src2, src3, src2, src3, 2);
+    SLDI_B2_SB(src4, src5, src4, src5, src4, src5, 2);
+    SLDI_B2_SB(src6, src7, src6, src7, src6, src7, 2);
+    PCKEV_D2_SB(src1, src0, src3, src2, src0, src1);
+    PCKEV_D2_SB(src5, src4, src7, src6, src4, src5);
+    SRARI_H4_SH(res0, res1, res2, res3, 5);
+    SRARI_H4_SH(res4, res5, res6, res7, 5);
+    SAT_SH4_SH(res0, res1, res2, res3, 7);
+    SAT_SH4_SH(res4, res5, res6, res7, 7);
+    PCKEV_B2_SB(res1, res0, res3, res2, tmp0, tmp1);
+    PCKEV_B2_SB(res5, res4, res7, res6, tmp2, tmp3);
+    tmp0 = __msa_aver_s_b(tmp0, src0);
+    tmp1 = __msa_aver_s_b(tmp1, src1);
+    tmp2 = __msa_aver_s_b(tmp2, src4);
+    tmp3 = __msa_aver_s_b(tmp3, src5);
+    XORI_B4_128_SB(tmp0, tmp1, tmp2, tmp3);
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+    LD4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst2);
+    INSERT_D2_UB(tp2, tp3, dst3);
+    AVER_UB2_UB(tmp0, dst0, tmp1, dst1, dst0, dst1);
+    AVER_UB2_UB(tmp2, dst2, tmp3, dst3, dst2, dst3);
+    ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride);
 }
 
 void ff_avg_h264_qpel8_mc30_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_hz_qrt_and_aver_dst_8x8_msa(src - 2, stride, dst, stride, 1);
+    uint64_t tp0, tp1, tp2, tp3;
+    v16u8 dst0 = { 0 }, dst1 = { 0 }, dst2 = { 0 }, dst3 = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask0, mask1, mask2;
+    v16i8 tmp0, tmp1, tmp2, tmp3, vec11;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9, vec10;
+    v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+    LD_SB8(src - 2, stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
+    VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+    VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+    HADD_SB4_SH(vec0, vec1, vec2, vec3, res0, res1, res2, res3);
+    VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4, vec5);
+    VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6, vec7);
+    DPADD_SB4_SH(vec4, vec5, vec6, vec7, minus5b, minus5b, minus5b, minus5b,
+                 res0, res1, res2, res3);
+    VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec8, vec9);
+    VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec10, vec11);
+    DPADD_SB4_SH(vec8, vec9, vec10, vec11, plus20b, plus20b, plus20b, plus20b,
+                 res0, res1, res2, res3);
+    VSHF_B2_SB(src4, src4, src5, src5, mask0, mask0, vec0, vec1);
+    VSHF_B2_SB(src6, src6, src7, src7, mask0, mask0, vec2, vec3);
+    HADD_SB4_SH(vec0, vec1, vec2, vec3, res4, res5, res6, res7);
+    VSHF_B2_SB(src4, src4, src5, src5, mask1, mask1, vec4, vec5);
+    VSHF_B2_SB(src6, src6, src7, src7, mask1, mask1, vec6, vec7);
+    DPADD_SB4_SH(vec4, vec5, vec6, vec7, minus5b, minus5b, minus5b, minus5b,
+                 res4, res5, res6, res7);
+    VSHF_B2_SB(src4, src4, src5, src5, mask2, mask2, vec8, vec9);
+    VSHF_B2_SB(src6, src6, src7, src7, mask2, mask2, vec10, vec11);
+    DPADD_SB4_SH(vec8, vec9, vec10, vec11, plus20b, plus20b, plus20b, plus20b,
+                 res4, res5, res6, res7);
+    SLDI_B2_SB(src0, src1, src0, src1, src0, src1, 3);
+    SLDI_B2_SB(src2, src3, src2, src3, src2, src3, 3);
+    SLDI_B2_SB(src4, src5, src4, src5, src4, src5, 3);
+    SLDI_B2_SB(src6, src7, src6, src7, src6, src7, 3);
+    PCKEV_D2_SB(src1, src0, src3, src2, src0, src1);
+    PCKEV_D2_SB(src5, src4, src7, src6, src4, src5);
+    SRARI_H4_SH(res0, res1, res2, res3, 5);
+    SRARI_H4_SH(res4, res5, res6, res7, 5);
+    SAT_SH4_SH(res0, res1, res2, res3, 7);
+    SAT_SH4_SH(res4, res5, res6, res7, 7);
+    PCKEV_B2_SB(res1, res0, res3, res2, tmp0, tmp1);
+    PCKEV_B2_SB(res5, res4, res7, res6, tmp2, tmp3);
+    tmp0 = __msa_aver_s_b(tmp0, src0);
+    tmp1 = __msa_aver_s_b(tmp1, src1);
+    tmp2 = __msa_aver_s_b(tmp2, src4);
+    tmp3 = __msa_aver_s_b(tmp3, src5);
+    XORI_B4_128_SB(tmp0, tmp1, tmp2, tmp3);
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+    LD4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst2);
+    INSERT_D2_UB(tp2, tp3, dst3);
+    AVER_UB2_UB(tmp0, dst0, tmp1, dst1, dst0, dst1);
+    AVER_UB2_UB(tmp2, dst2, tmp3, dst3, dst2, dst3);
+    ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride);
 }
 
 void ff_avg_h264_qpel4_mc10_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_hz_qrt_and_aver_dst_4x4_msa(src - 2, stride, dst, stride, 0);
+    uint32_t tp0, tp1, tp2, tp3;
+    v16u8 dst0 = { 0 };
+    v16i8 src0, src1, src2, src3, res, vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 mask0, mask1, mask2;
+    v8i16 out0, out1;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+    LD_SB4(src - 2, stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0, vec1);
+    HADD_SB2_SH(vec0, vec1, out0, out1);
+    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2, vec3);
+    DPADD_SB2_SH(vec2, vec3, minus5b, minus5b, out0, out1);
+    VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4, vec5);
+    DPADD_SB2_SH(vec4, vec5, plus20b, plus20b, out0, out1);
+    SRARI_H2_SH(out0, out1, 5);
+    SAT_SH2_SH(out0, out1, 7);
+    res = __msa_pckev_b((v16i8) out1, (v16i8) out0);
+    SLDI_B2_SB(src0, src1, src0, src1, src0, src1, 2);
+    SLDI_B2_SB(src2, src3, src2, src3, src2, src3, 2);
+    src0 = (v16i8) __msa_insve_w((v4i32) src0, 1, (v4i32) src1);
+    src1 = (v16i8) __msa_insve_w((v4i32) src2, 1, (v4i32) src3);
+    src0 = (v16i8) __msa_insve_d((v2i64) src0, 1, (v2i64) src1);
+    res = __msa_aver_s_b(res, src0);
+    res = (v16i8) __msa_xori_b((v16u8) res, 128);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+    dst0 = __msa_aver_u_b((v16u8) res, dst0);
+    ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
 }
 
 void ff_avg_h264_qpel4_mc30_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_hz_qrt_and_aver_dst_4x4_msa(src - 2, stride, dst, stride, 1);
+    uint32_t tp0, tp1, tp2, tp3;
+    v16u8 dst0 = { 0 };
+    v16i8 src0, src1, src2, src3, res, vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 mask0, mask1, mask2;
+    v8i16 out0, out1;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+    LD_SB4(src - 2, stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0, vec1);
+    HADD_SB2_SH(vec0, vec1, out0, out1);
+    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2, vec3);
+    DPADD_SB2_SH(vec2, vec3, minus5b, minus5b, out0, out1);
+    VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4, vec5);
+    DPADD_SB2_SH(vec4, vec5, plus20b, plus20b, out0, out1);
+    SRARI_H2_SH(out0, out1, 5);
+    SAT_SH2_SH(out0, out1, 7);
+    res = __msa_pckev_b((v16i8) out1, (v16i8) out0);
+    SLDI_B2_SB(src0, src1, src0, src1, src0, src1, 3);
+    SLDI_B2_SB(src2, src3, src2, src3, src2, src3, 3);
+    src0 = (v16i8) __msa_insve_w((v4i32) src0, 1, (v4i32) src1);
+    src1 = (v16i8) __msa_insve_w((v4i32) src2, 1, (v4i32) src3);
+    src0 = (v16i8) __msa_insve_d((v2i64) src0, 1, (v2i64) src1);
+    res = __msa_aver_s_b(res, src0);
+    res = (v16i8) __msa_xori_b((v16u8) res, 128);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+    dst0 = __msa_aver_u_b((v16u8) res, dst0);
+    ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
 }
 
 void ff_avg_h264_qpel16_mc20_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_hz_and_aver_dst_16x16_msa(src - 2, stride, dst, stride);
+    uint32_t loop_cnt;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask0, mask1, mask2;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9, vec10;
+    v16i8 vec11;
+    v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+    src -= 2;
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB2(src, 8, src0, src1);
+        src += stride;
+        LD_SB2(src, 8, src2, src3);
+        src += stride;
+        LD_SB2(src, 8, src4, src5);
+        src += stride;
+        LD_SB2(src, 8, src6, src7);
+        src += stride;
+
+        LD_UB4(dst, stride, dst0, dst1, dst2, dst3);
+        XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec3);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec6, vec9);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec1, vec4);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec7, vec10);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec2, vec5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec8, vec11);
+        HADD_SB4_SH(vec0, vec3, vec6, vec9, res0, res1, res2, res3);
+        DPADD_SB4_SH(vec1, vec4, vec7, vec10, minus5b, minus5b, minus5b,
+                     minus5b, res0, res1, res2, res3);
+        DPADD_SB4_SH(vec2, vec5, vec8, vec11, plus20b, plus20b, plus20b,
+                     plus20b, res0, res1, res2, res3);
+        VSHF_B2_SB(src4, src4, src5, src5, mask0, mask0, vec0, vec3);
+        VSHF_B2_SB(src6, src6, src7, src7, mask0, mask0, vec6, vec9);
+        VSHF_B2_SB(src4, src4, src5, src5, mask1, mask1, vec1, vec4);
+        VSHF_B2_SB(src6, src6, src7, src7, mask1, mask1, vec7, vec10);
+        VSHF_B2_SB(src4, src4, src5, src5, mask2, mask2, vec2, vec5);
+        VSHF_B2_SB(src6, src6, src7, src7, mask2, mask2, vec8, vec11);
+        HADD_SB4_SH(vec0, vec3, vec6, vec9, res4, res5, res6, res7);
+        DPADD_SB4_SH(vec1, vec4, vec7, vec10, minus5b, minus5b, minus5b,
+                     minus5b, res4, res5, res6, res7);
+        DPADD_SB4_SH(vec2, vec5, vec8, vec11, plus20b, plus20b, plus20b,
+                     plus20b, res4, res5, res6, res7);
+        SRARI_H4_SH(res0, res1, res2, res3, 5);
+        SRARI_H4_SH(res4, res5, res6, res7, 5);
+        SAT_SH4_SH(res0, res1, res2, res3, 7);
+        SAT_SH4_SH(res4, res5, res6, res7, 7);
+        PCKEV_B4_SB(res1, res0, res3, res2, res5, res4, res7, res6, vec0, vec1,
+                    vec2, vec3);
+        XORI_B4_128_SB(vec0, vec1, vec2, vec3);
+        AVER_UB2_UB(vec0, dst0, vec1, dst1, dst0, dst1);
+        AVER_UB2_UB(vec2, dst2, vec3, dst3, dst2, dst3);
+        ST_UB4(dst0, dst1, dst2, dst3, dst, stride);
+        dst += (4 * stride);
+    }
 }
 
 void ff_avg_h264_qpel8_mc20_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_hz_and_aver_dst_8x8_msa(src - 2, stride, dst, stride);
+    uint64_t tp0, tp1, tp2, tp3;
+    v16u8 out0, out1, out2 = { 0 }, out3 = { 0 };
+    v16u8 out4, out5, out6 = { 0 }, out7 = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask0, mask1, mask2;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9, vec10;
+    v16i8 vec11;
+    v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    LD_SB8(src - 2, stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
+    VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+    VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+    HADD_SB4_SH(vec0, vec1, vec2, vec3, res0, res1, res2, res3);
+    VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4, vec5);
+    VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6, vec7);
+    DPADD_SB4_SH(vec4, vec5, vec6, vec7, minus5b, minus5b, minus5b, minus5b,
+                 res0, res1, res2, res3);
+    VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec8, vec9);
+    VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec10, vec11);
+    DPADD_SB4_SH(vec8, vec9, vec10, vec11, plus20b, plus20b, plus20b, plus20b,
+                 res0, res1, res2, res3);
+    VSHF_B2_SB(src4, src4, src5, src5, mask0, mask0, vec0, vec1);
+    VSHF_B2_SB(src6, src6, src7, src7, mask0, mask0, vec2, vec3);
+    HADD_SB4_SH(vec0, vec1, vec2, vec3, res4, res5, res6, res7);
+    VSHF_B2_SB(src4, src4, src5, src5, mask1, mask1, vec4, vec5);
+    VSHF_B2_SB(src6, src6, src7, src7, mask1, mask1, vec6, vec7);
+    DPADD_SB4_SH(vec4, vec5, vec6, vec7, minus5b, minus5b, minus5b, minus5b,
+                 res4, res5, res6, res7);
+    VSHF_B2_SB(src4, src4, src5, src5, mask2, mask2, vec8, vec9);
+    VSHF_B2_SB(src6, src6, src7, src7, mask2, mask2, vec10, vec11);
+    DPADD_SB4_SH(vec8, vec9, vec10, vec11, plus20b, plus20b, plus20b, plus20b,
+                 res4, res5, res6, res7);
+    SRARI_H4_SH(res0, res1, res2, res3, 5);
+    SRARI_H4_SH(res4, res5, res6, res7, 5);
+    SAT_SH4_SH(res0, res1, res2, res3, 7);
+    SAT_SH4_SH(res4, res5, res6, res7, 7);
+    out0 = PCKEV_XORI128_UB(res0, res1);
+    out1 = PCKEV_XORI128_UB(res2, res3);
+    out4 = PCKEV_XORI128_UB(res4, res5);
+    out5 = PCKEV_XORI128_UB(res6, res7);
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, out2);
+    INSERT_D2_UB(tp2, tp3, out3);
+    LD4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, out6);
+    INSERT_D2_UB(tp2, tp3, out7);
+    AVER_UB2_UB(out0, out2, out1, out3, out0, out1);
+    AVER_UB2_UB(out4, out6, out5, out7, out4, out5);
+    ST8x8_UB(out0, out1, out4, out5, dst, stride);
 }
 
 void ff_avg_h264_qpel4_mc20_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_hz_and_aver_dst_4x4_msa(src - 2, stride, dst, stride);
+    uint32_t tp0, tp1, tp2, tp3;
+    v16u8 res, dst0 = { 0 };
+    v16i8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 mask0, mask1, mask2;
+    v8i16 res0, res1;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+    LD_SB4(src - 2, stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0, vec1);
+    HADD_SB2_SH(vec0, vec1, res0, res1);
+    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2, vec3);
+    DPADD_SB2_SH(vec2, vec3, minus5b, minus5b, res0, res1);
+    VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4, vec5);
+    DPADD_SB2_SH(vec4, vec5, plus20b, plus20b, res0, res1);
+    SRARI_H2_SH(res0, res1, 5);
+    SAT_SH2_SH(res0, res1, 7);
+    res = PCKEV_XORI128_UB(res0, res1);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+    res = __msa_aver_u_b(res, dst0);
+    ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride);
 }
 
 void ff_avg_h264_qpel16_mc01_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_vt_qrt_and_aver_dst_16x16_msa(src - (stride * 2),
-                                           stride, dst, stride, 0);
+    int32_t loop_cnt;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16u8 res0, res1, res2, res3, dst0, dst1, dst2, dst3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src10_l, src32_l, src54_l, src76_l, src21_l, src43_l;
+    v16i8 src65_l, src87_l, filt0, filt1, filt2;
+    v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    src -= (stride * 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    ILVL_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_l, src21_l,
+               src32_l, src43_l);
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src, stride, src5, src6, src7, src8);
+        src += (4 * stride);
+
+        XORI_B4_128_SB(src5, src6, src7, src8);
+        ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r,
+                   src65_r, src76_r, src87_r);
+        ILVL_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_l,
+                   src65_l, src76_l, src87_l);
+        out0_r = AVC_DOT_SH3_SH(src10_r, src32_r, src54_r, filt0, filt1, filt2);
+        out1_r = AVC_DOT_SH3_SH(src21_r, src43_r, src65_r, filt0, filt1, filt2);
+        out2_r = AVC_DOT_SH3_SH(src32_r, src54_r, src76_r, filt0, filt1, filt2);
+        out3_r = AVC_DOT_SH3_SH(src43_r, src65_r, src87_r, filt0, filt1, filt2);
+        out0_l = AVC_DOT_SH3_SH(src10_l, src32_l, src54_l, filt0, filt1, filt2);
+        out1_l = AVC_DOT_SH3_SH(src21_l, src43_l, src65_l, filt0, filt1, filt2);
+        out2_l = AVC_DOT_SH3_SH(src32_l, src54_l, src76_l, filt0, filt1, filt2);
+        out3_l = AVC_DOT_SH3_SH(src43_l, src65_l, src87_l, filt0, filt1, filt2);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 5);
+        SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+        PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+                    out3_r, res0, res1, res2, res3);
+        res0 = (v16u8) __msa_aver_s_b((v16i8) res0, src2);
+        res1 = (v16u8) __msa_aver_s_b((v16i8) res1, src3);
+        res2 = (v16u8) __msa_aver_s_b((v16i8) res2, src4);
+        res3 = (v16u8) __msa_aver_s_b((v16i8) res3, src5);
+        LD_UB4(dst, stride, dst0, dst1, dst2, dst3);
+        XORI_B4_128_UB(res0, res1, res2, res3);
+        AVER_UB2_UB(res0, dst0, res1, dst1, dst0, dst1);
+        AVER_UB2_UB(res2, dst2, res3, dst3, dst2, dst3);
+        ST_UB4(dst0, dst1, dst2, dst3, dst, stride);
+        dst += (4 * stride);
+
+        src10_r = src54_r;
+        src32_r = src76_r;
+        src21_r = src65_r;
+        src43_r = src87_r;
+        src10_l = src54_l;
+        src32_l = src76_l;
+        src21_l = src65_l;
+        src43_l = src87_l;
+        src2 = src6;
+        src3 = src7;
+        src4 = src8;
+    }
 }
 
 void ff_avg_h264_qpel16_mc03_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_vt_qrt_and_aver_dst_16x16_msa(src - (stride * 2),
-                                           stride, dst, stride, 1);
+    int32_t loop_cnt;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16u8 res0, res1, res2, res3, dst0, dst1, dst2, dst3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src10_l, src32_l, src54_l, src76_l, src21_l, src43_l;
+    v16i8 src65_l, src87_l, filt0, filt1, filt2;
+    v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    src -= (stride * 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    ILVL_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_l, src21_l,
+               src32_l, src43_l);
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src, stride, src5, src6, src7, src8);
+        src += (4 * stride);
+
+        XORI_B4_128_SB(src5, src6, src7, src8);
+        ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r,
+                   src65_r, src76_r, src87_r);
+        ILVL_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_l,
+                   src65_l, src76_l, src87_l);
+        out0_r = AVC_DOT_SH3_SH(src10_r, src32_r, src54_r, filt0, filt1, filt2);
+        out1_r = AVC_DOT_SH3_SH(src21_r, src43_r, src65_r, filt0, filt1, filt2);
+        out2_r = AVC_DOT_SH3_SH(src32_r, src54_r, src76_r, filt0, filt1, filt2);
+        out3_r = AVC_DOT_SH3_SH(src43_r, src65_r, src87_r, filt0, filt1, filt2);
+        out0_l = AVC_DOT_SH3_SH(src10_l, src32_l, src54_l, filt0, filt1, filt2);
+        out1_l = AVC_DOT_SH3_SH(src21_l, src43_l, src65_l, filt0, filt1, filt2);
+        out2_l = AVC_DOT_SH3_SH(src32_l, src54_l, src76_l, filt0, filt1, filt2);
+        out3_l = AVC_DOT_SH3_SH(src43_l, src65_l, src87_l, filt0, filt1, filt2);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 5);
+        SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+        PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+                    out3_r, res0, res1, res2, res3);
+        res0 = (v16u8) __msa_aver_s_b((v16i8) res0, src3);
+        res1 = (v16u8) __msa_aver_s_b((v16i8) res1, src4);
+        res2 = (v16u8) __msa_aver_s_b((v16i8) res2, src5);
+        res3 = (v16u8) __msa_aver_s_b((v16i8) res3, src6);
+        LD_UB4(dst, stride, dst0, dst1, dst2, dst3);
+        XORI_B4_128_UB(res0, res1, res2, res3);
+        AVER_UB2_UB(res0, dst0, res1, dst1, dst0, dst1);
+        AVER_UB2_UB(res2, dst2, res3, dst3, dst2, dst3);
+        ST_UB4(dst0, dst1, dst2, dst3, dst, stride);
+        dst += (4 * stride);
+
+        src10_r = src54_r;
+        src32_r = src76_r;
+        src21_r = src65_r;
+        src43_r = src87_r;
+        src10_l = src54_l;
+        src32_l = src76_l;
+        src21_l = src65_l;
+        src43_l = src87_l;
+        src3 = src7;
+        src4 = src8;
+    }
 }
 
 void ff_avg_h264_qpel8_mc01_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_vt_qrt_and_aver_dst_8x8_msa(src - (stride * 2),
-                                         stride, dst, stride, 0);
+    uint64_t tp0, tp1, tp2, tp3;
+    const int16_t filt_const0 = 0xfb01;
+    const int16_t filt_const1 = 0x1414;
+    const int16_t filt_const2 = 0x1fb;
+    v16u8 dst0 = { 0 }, dst1 = { 0 }, dst2 = { 0 }, dst3 = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src7, src8, src9, src10, src11, src12;
+    v16i8 src13, src14, tmp0, tmp1, tmp2, tmp3, src109_r;
+    v16i8 src10_r, src32_r, src76_r, src98_r, src21_r, src43_r, src87_r;
+    v16i8 filt0, filt1, filt2, out0, out1, out2, out3;
+    v8i16 out0_r, out1_r, out2_r, out3_r, out4_r, out5_r, out6_r, out7_r;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    src -= (stride * 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    LD_SB8(src, stride, src7, src8, src9, src10, src11, src12, src13, src14);
+    XORI_B8_128_SB(src7, src8, src9, src10, src11, src12, src13, src14);
+    ILVR_B4_SB(src7, src4, src8, src7, src9, src8, src10, src9, src76_r,
+               src87_r, src98_r, src109_r);
+    out0_r = AVC_DOT_SH3_SH(src10_r, src32_r, src76_r, filt0, filt1, filt2);
+    out1_r = AVC_DOT_SH3_SH(src21_r, src43_r, src87_r, filt0, filt1, filt2);
+    out2_r = AVC_DOT_SH3_SH(src32_r, src76_r, src98_r, filt0, filt1, filt2);
+    out3_r = AVC_DOT_SH3_SH(src43_r, src87_r, src109_r, filt0, filt1, filt2);
+    PCKEV_D2_SB(src3, src2, src7, src4, tmp0, tmp1);
+    ILVR_B4_SB(src11, src10, src12, src11, src13, src12, src14, src13, src10_r,
+               src21_r, src32_r, src43_r);
+    out4_r = AVC_DOT_SH3_SH(src76_r, src98_r, src10_r, filt0, filt1, filt2);
+    out5_r = AVC_DOT_SH3_SH(src87_r, src109_r, src21_r, filt0, filt1, filt2);
+    out6_r = AVC_DOT_SH3_SH(src98_r, src10_r, src32_r, filt0, filt1, filt2);
+    out7_r = AVC_DOT_SH3_SH(src109_r, src21_r, src43_r, filt0, filt1, filt2);
+    PCKEV_D2_SB(src9, src8, src11, src10, tmp2, tmp3);
+    SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
+    SRARI_H4_SH(out4_r, out5_r, out6_r, out7_r, 5);
+    SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+    SAT_SH4_SH(out4_r, out5_r, out6_r, out7_r, 7);
+
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+    LD4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst2);
+    INSERT_D2_UB(tp2, tp3, dst3);
+
+    PCKEV_B2_SB(out1_r, out0_r, out3_r, out2_r, out0, out1);
+    PCKEV_B2_SB(out5_r, out4_r, out7_r, out6_r, out2, out3);
+    out0 = __msa_aver_s_b(out0, tmp0);
+    out1 = __msa_aver_s_b(out1, tmp1);
+    out2 = __msa_aver_s_b(out2, tmp2);
+    out3 = __msa_aver_s_b(out3, tmp3);
+    XORI_B4_128_SB(out0, out1, out2, out3);
+    AVER_UB4_UB(out0, dst0, out1, dst1, out2, dst2, out3, dst3, dst0, dst1,
+                dst2, dst3);
+    ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride);
 }
 
 void ff_avg_h264_qpel8_mc03_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_vt_qrt_and_aver_dst_8x8_msa(src - (stride * 2),
-                                         stride, dst, stride, 1);
+    uint64_t tp0, tp1, tp2, tp3;
+    const int16_t filt_const0 = 0xfb01;
+    const int16_t filt_const1 = 0x1414;
+    const int16_t filt_const2 = 0x1fb;
+    v16u8 dst0 = { 0 }, dst1 = { 0 }, dst2 = { 0 }, dst3 = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src7, src8, src9, src10, src11, src12;
+    v16i8 src13, src14, tmp0, tmp1, tmp2, tmp3, src109_r;
+    v16i8 src10_r, src32_r, src76_r, src98_r, src21_r, src43_r, src87_r;
+    v16i8 filt0, filt1, filt2, out0, out1, out2, out3;
+    v8i16 out0_r, out1_r, out2_r, out3_r, out4_r, out5_r, out6_r, out7_r;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    src -= (stride * 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    LD_SB8(src, stride, src7, src8, src9, src10, src11, src12, src13, src14);
+    XORI_B8_128_SB(src7, src8, src9, src10, src11, src12, src13, src14);
+    ILVR_B4_SB(src7, src4, src8, src7, src9, src8, src10, src9, src76_r,
+               src87_r, src98_r, src109_r);
+    out0_r = AVC_DOT_SH3_SH(src10_r, src32_r, src76_r, filt0, filt1, filt2);
+    out1_r = AVC_DOT_SH3_SH(src21_r, src43_r, src87_r, filt0, filt1, filt2);
+    out2_r = AVC_DOT_SH3_SH(src32_r, src76_r, src98_r, filt0, filt1, filt2);
+    out3_r = AVC_DOT_SH3_SH(src43_r, src87_r, src109_r, filt0, filt1, filt2);
+    PCKEV_D2_SB(src4, src3, src8, src7, tmp0, tmp1);
+    ILVR_B4_SB(src11, src10, src12, src11, src13, src12, src14, src13, src10_r,
+               src21_r, src32_r, src43_r);
+    out4_r = AVC_DOT_SH3_SH(src76_r, src98_r, src10_r, filt0, filt1, filt2);
+    out5_r = AVC_DOT_SH3_SH(src87_r, src109_r, src21_r, filt0, filt1, filt2);
+    out6_r = AVC_DOT_SH3_SH(src98_r, src10_r, src32_r, filt0, filt1, filt2);
+    out7_r = AVC_DOT_SH3_SH(src109_r, src21_r, src43_r, filt0, filt1, filt2);
+    PCKEV_D2_SB(src10, src9, src12, src11, tmp2, tmp3);
+    SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
+    SRARI_H4_SH(out4_r, out5_r, out6_r, out7_r, 5);
+    SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+    SAT_SH4_SH(out4_r, out5_r, out6_r, out7_r, 7);
+
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+    LD4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst2);
+    INSERT_D2_UB(tp2, tp3, dst3);
+
+    PCKEV_B2_SB(out1_r, out0_r, out3_r, out2_r, out0, out1);
+    PCKEV_B2_SB(out5_r, out4_r, out7_r, out6_r, out2, out3);
+    out0 = __msa_aver_s_b(out0, tmp0);
+    out1 = __msa_aver_s_b(out1, tmp1);
+    out2 = __msa_aver_s_b(out2, tmp2);
+    out3 = __msa_aver_s_b(out3, tmp3);
+    XORI_B4_128_SB(out0, out1, out2, out3);
+    AVER_UB4_UB(out0, dst0, out1, dst1, out2, dst2, out3, dst3, dst0, dst1,
+                dst2, dst3);
+    ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride);
 }
 
 void ff_avg_h264_qpel4_mc01_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_vt_qrt_and_aver_dst_4x4_msa(src - (stride * 2),
-                                         stride, dst, stride, 0);
+    uint32_t tp0, tp1, tp2, tp3;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16u8 res, dst0 = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src2110, src4332, src6554, src8776, filt0, filt1, filt2;
+    v8i16 out10, out32;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    src -= (stride * 2);
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+    XORI_B2_128_SB(src2110, src4332);
+    LD_SB4(src, stride, src5, src6, src7, src8);
+    ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
+               src76_r, src87_r);
+    ILVR_D2_SB(src65_r, src54_r, src87_r, src76_r, src6554, src8776);
+    XORI_B2_128_SB(src6554, src8776);
+    src32_r = (v16i8) __msa_insve_w((v4i32) src2, 1, (v4i32) src3);
+    src54_r = (v16i8) __msa_insve_w((v4i32) src4, 1, (v4i32) src5);
+    src32_r = (v16i8) __msa_insve_d((v2i64) src32_r, 1, (v2i64) src54_r);
+    out10 = AVC_DOT_SH3_SH(src2110, src4332, src6554, filt0, filt1, filt2);
+    out32 = AVC_DOT_SH3_SH(src4332, src6554, src8776, filt0, filt1, filt2);
+    SRARI_H2_SH(out10, out32, 5);
+    SAT_SH2_SH(out10, out32, 7);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+    res = PCKEV_XORI128_UB(out10, out32);
+    res = __msa_aver_u_b(res, (v16u8) src32_r);
+    dst0 = __msa_aver_u_b(res, dst0);
+    ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
 }
 
 void ff_avg_h264_qpel4_mc03_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_vt_qrt_and_aver_dst_4x4_msa(src - (stride * 2),
-                                         stride, dst, stride, 1);
+    uint32_t tp0, tp1, tp2, tp3;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16u8 res, dst0 = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src2110, src4332, src6554, src8776, filt0, filt1, filt2;
+    v8i16 out10, out32;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    src -= (stride * 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+    XORI_B2_128_SB(src2110, src4332);
+    LD_SB4(src, stride, src5, src6, src7, src8);
+    ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
+               src76_r, src87_r);
+    ILVR_D2_SB(src65_r, src54_r, src87_r, src76_r, src6554, src8776);
+    XORI_B2_128_SB(src6554, src8776);
+    out10 = AVC_DOT_SH3_SH(src2110, src4332, src6554, filt0, filt1, filt2);
+    out32 = AVC_DOT_SH3_SH(src4332, src6554, src8776, filt0, filt1, filt2);
+    SRARI_H2_SH(out10, out32, 5);
+    SAT_SH2_SH(out10, out32, 7);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+    res = PCKEV_XORI128_UB(out10, out32);
+    src32_r = (v16i8) __msa_insve_w((v4i32) src3, 1, (v4i32) src4);
+    src54_r = (v16i8) __msa_insve_w((v4i32) src5, 1, (v4i32) src6);
+    src32_r = (v16i8) __msa_insve_d((v2i64) src32_r, 1, (v2i64) src54_r);
+    res = __msa_aver_u_b(res, (v16u8) src32_r);
+    dst0 = __msa_aver_u_b(res, dst0);
+    ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
 }
 
 void ff_avg_h264_qpel16_mc11_msa(uint8_t *dst, const uint8_t *src,
@@ -3788,7 +4021,7 @@
 {
     avc_luma_hv_qrt_and_aver_dst_16x16_msa(src - 2,
                                            src - (stride * 2),
-                                           stride, dst, stride);
+                                           dst, stride);
 }
 
 void ff_avg_h264_qpel16_mc31_msa(uint8_t *dst, const uint8_t *src,
@@ -3796,7 +4029,7 @@
 {
     avc_luma_hv_qrt_and_aver_dst_16x16_msa(src - 2,
                                            src - (stride * 2) +
-                                           sizeof(uint8_t), stride,
+                                           sizeof(uint8_t),
                                            dst, stride);
 }
 
@@ -3805,7 +4038,7 @@
 {
     avc_luma_hv_qrt_and_aver_dst_16x16_msa(src + stride - 2,
                                            src - (stride * 2),
-                                           stride, dst, stride);
+                                           dst, stride);
 }
 
 void ff_avg_h264_qpel16_mc33_msa(uint8_t *dst, const uint8_t *src,
@@ -3813,7 +4046,7 @@
 {
     avc_luma_hv_qrt_and_aver_dst_16x16_msa(src + stride - 2,
                                            src - (stride * 2) +
-                                           sizeof(uint8_t), stride,
+                                           sizeof(uint8_t),
                                            dst, stride);
 }
 
@@ -3822,7 +4055,7 @@
 {
     avc_luma_hv_qrt_and_aver_dst_8x8_msa(src - 2,
                                          src - (stride * 2),
-                                         stride, dst, stride);
+                                         dst, stride);
 }
 
 void ff_avg_h264_qpel8_mc31_msa(uint8_t *dst, const uint8_t *src,
@@ -3830,7 +4063,7 @@
 {
     avc_luma_hv_qrt_and_aver_dst_8x8_msa(src - 2,
                                          src - (stride * 2) +
-                                         sizeof(uint8_t), stride, dst, stride);
+                                         sizeof(uint8_t), dst, stride);
 }
 
 void ff_avg_h264_qpel8_mc13_msa(uint8_t *dst, const uint8_t *src,
@@ -3838,7 +4071,7 @@
 {
     avc_luma_hv_qrt_and_aver_dst_8x8_msa(src + stride - 2,
                                          src - (stride * 2),
-                                         stride, dst, stride);
+                                         dst, stride);
 }
 
 void ff_avg_h264_qpel8_mc33_msa(uint8_t *dst, const uint8_t *src,
@@ -3846,7 +4079,7 @@
 {
     avc_luma_hv_qrt_and_aver_dst_8x8_msa(src + stride - 2,
                                          src - (stride * 2) +
-                                         sizeof(uint8_t), stride, dst, stride);
+                                         sizeof(uint8_t), dst, stride);
 }
 
 
@@ -3855,7 +4088,7 @@
 {
     avc_luma_hv_qrt_and_aver_dst_4x4_msa(src - 2,
                                          src - (stride * 2),
-                                         stride, dst, stride);
+                                         dst, stride);
 }
 
 void ff_avg_h264_qpel4_mc31_msa(uint8_t *dst, const uint8_t *src,
@@ -3863,7 +4096,7 @@
 {
     avc_luma_hv_qrt_and_aver_dst_4x4_msa(src - 2,
                                          src - (stride * 2) +
-                                         sizeof(uint8_t), stride, dst, stride);
+                                         sizeof(uint8_t), dst, stride);
 }
 
 void ff_avg_h264_qpel4_mc13_msa(uint8_t *dst, const uint8_t *src,
@@ -3871,7 +4104,7 @@
 {
     avc_luma_hv_qrt_and_aver_dst_4x4_msa(src + stride - 2,
                                          src - (stride * 2),
-                                         stride, dst, stride);
+                                         dst, stride);
 }
 
 void ff_avg_h264_qpel4_mc33_msa(uint8_t *dst, const uint8_t *src,
@@ -3879,128 +4112,1678 @@
 {
     avc_luma_hv_qrt_and_aver_dst_4x4_msa(src + stride - 2,
                                          src - (stride * 2) +
-                                         sizeof(uint8_t), stride, dst, stride);
+                                         sizeof(uint8_t), dst, stride);
 }
 
 void ff_avg_h264_qpel16_mc21_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_midv_qrt_and_aver_dst_16w_msa(src - (2 * stride) - 2,
-                                           stride, dst, stride, 16, 0);
+    uint64_t tp0, tp1, tp2, tp3;
+    uint8_t *dst_tmp = dst;
+    const uint8_t *src_tmp = src - (2 * stride) - 2;
+    uint32_t multiple8_cnt, loop_cnt;
+    const int32_t filt_const0 = 0xfffb0001;
+    const int32_t filt_const1 = 0x140014;
+    const int32_t filt_const2 = 0x1fffb;
+    v16u8 out0, out1, dst0 = { 0 }, dst1 = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask0, mask1;
+    v16i8 mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    v8i16 hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r, hz_out54_r;
+    v8i16 hz_out65_r, hz_out76_r, hz_out87_r, hz_out10_l, hz_out21_l;
+    v8i16 hz_out32_l, hz_out43_l, hz_out54_l, hz_out65_l, hz_out76_l;
+    v8i16 hz_out87_l, filt0, filt1, filt2;
+    v4i32 tmp0_w, tmp1_w;
+
+    filt0 = (v8i16) __msa_fill_w(filt_const0);
+    filt1 = (v8i16) __msa_fill_w(filt_const1);
+    filt2 = (v8i16) __msa_fill_w(filt_const2);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        dst = dst_tmp;
+        src = src_tmp;
+
+        LD_SB5(src, stride, src0, src1, src2, src3, src4);
+        XORI_B5_128_SB(src0, src1, src2, src3, src4);
+        src += (5 * stride);
+
+        hz_out0 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
+        hz_out1 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
+        hz_out2 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
+        hz_out3 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
+        hz_out4 = AVC_HORZ_FILTER_SH(src4, src4, mask0, mask1, mask2);
+
+        for (loop_cnt = 4; loop_cnt--;) {
+            LD_SB2(src, stride, src5, src6);
+            src += (2 * stride);
+
+            XORI_B2_128_SB(src5, src6);
+            hz_out5 = AVC_HORZ_FILTER_SH(src5, src5, mask0, mask1, mask2);
+            hz_out6 = AVC_HORZ_FILTER_SH(src6, src6, mask0, mask1, mask2);
+            ILVR_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2,
+                       hz_out4, hz_out3, hz_out10_r, hz_out21_r, hz_out32_r,
+                       hz_out43_r);
+            ILVL_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2,
+                       hz_out4, hz_out3, hz_out10_l, hz_out21_l, hz_out32_l,
+                       hz_out43_l);
+            ILVR_H2_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out54_r,
+                       hz_out65_r);
+            ILVL_H2_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out54_l,
+                       hz_out65_l);
+            tmp0_w = AVC_DOT_SW3_SW(hz_out10_r, hz_out32_r, hz_out54_r, filt0,
+                                    filt1, filt2);
+            tmp1_w = AVC_DOT_SW3_SW(hz_out10_l, hz_out32_l, hz_out54_l, filt0,
+                                    filt1, filt2);
+            tmp0 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+            tmp0_w = AVC_DOT_SW3_SW(hz_out21_r, hz_out43_r, hz_out65_r, filt0,
+                                    filt1, filt2);
+            tmp1_w = AVC_DOT_SW3_SW(hz_out21_l, hz_out43_l, hz_out65_l, filt0,
+                                    filt1, filt2);
+            tmp2 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+
+            tmp1 = __msa_srari_h(hz_out2, 5);
+            tmp3 = __msa_srari_h(hz_out3, 5);
+            SAT_SH2_SH(tmp1, tmp3, 7);
+
+            tmp0 = __msa_aver_s_h(tmp0, tmp1);
+            tmp1 = __msa_aver_s_h(tmp2, tmp3);
+
+            LD2(dst, stride, tp0, tp1);
+            INSERT_D2_UB(tp0, tp1, dst0);
+
+            out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+            dst0 = __msa_aver_u_b(out0, dst0);
+            ST8x2_UB(dst0, dst, stride);
+            dst += (2 * stride);
+
+            LD_SB2(src, stride, src7, src8);
+            src += (2 * stride);
+
+            XORI_B2_128_SB(src7, src8);
+            hz_out7 = AVC_HORZ_FILTER_SH(src7, src7, mask0, mask1, mask2);
+            hz_out8 = AVC_HORZ_FILTER_SH(src8, src8, mask0, mask1, mask2);
+            ILVR_H2_SH(hz_out7, hz_out6, hz_out8, hz_out7, hz_out76_r,
+                       hz_out87_r);
+            ILVL_H2_SH(hz_out7, hz_out6, hz_out8, hz_out7, hz_out76_l,
+                       hz_out87_l);
+            tmp0_w = AVC_DOT_SW3_SW(hz_out32_r, hz_out54_r, hz_out76_r, filt0,
+                                    filt1, filt2);
+            tmp1_w = AVC_DOT_SW3_SW(hz_out32_l, hz_out54_l, hz_out76_l, filt0,
+                                    filt1, filt2);
+            tmp4 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+            tmp0_w = AVC_DOT_SW3_SW(hz_out43_r, hz_out65_r, hz_out87_r, filt0,
+                                    filt1, filt2);
+            tmp1_w = AVC_DOT_SW3_SW(hz_out43_l, hz_out65_l, hz_out87_l, filt0,
+                                    filt1, filt2);
+            tmp6 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+
+            tmp5 = __msa_srari_h(hz_out4, 5);
+            tmp7 = __msa_srari_h(hz_out5, 5);
+            SAT_SH2_SH(tmp5, tmp7, 7);
+
+            tmp2 = __msa_aver_s_h(tmp4, tmp5);
+            tmp3 = __msa_aver_s_h(tmp6, tmp7);
+
+            LD2(dst, stride, tp2, tp3);
+            INSERT_D2_UB(tp2, tp3, dst1);
+
+            out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+            dst1 = __msa_aver_u_b(out1, dst1);
+            ST8x2_UB(dst1, dst, stride);
+            dst += (2 * stride);
+
+            hz_out0 = hz_out4;
+            hz_out1 = hz_out5;
+            hz_out2 = hz_out6;
+            hz_out3 = hz_out7;
+            hz_out4 = hz_out8;
+        }
+
+        src_tmp += 8;
+        dst_tmp += 8;
+    }
 }
 
 void ff_avg_h264_qpel16_mc23_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_midv_qrt_and_aver_dst_16w_msa(src - (2 * stride) - 2,
-                                           stride, dst, stride, 16, 1);
+    uint64_t tp0, tp1, tp2, tp3;
+    uint8_t *dst_tmp = dst;
+    const uint8_t *src_tmp = src - (2 * stride) - 2;
+    uint32_t multiple8_cnt, loop_cnt;
+    const int32_t filt_const0 = 0xfffb0001;
+    const int32_t filt_const1 = 0x140014;
+    const int32_t filt_const2 = 0x1fffb;
+    v16u8 out0, out1, dst0 = { 0 }, dst1 = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask0, mask1;
+    v16i8 mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    v8i16 hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r, hz_out54_r;
+    v8i16 hz_out65_r, hz_out76_r, hz_out87_r, hz_out10_l, hz_out21_l;
+    v8i16 hz_out32_l, hz_out43_l, hz_out54_l, hz_out65_l, hz_out76_l;
+    v8i16 hz_out87_l, filt0, filt1, filt2;
+    v4i32 tmp0_w, tmp1_w;
+
+    filt0 = (v8i16) __msa_fill_w(filt_const0);
+    filt1 = (v8i16) __msa_fill_w(filt_const1);
+    filt2 = (v8i16) __msa_fill_w(filt_const2);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        dst = dst_tmp;
+        src = src_tmp;
+
+        LD_SB5(src, stride, src0, src1, src2, src3, src4);
+        XORI_B5_128_SB(src0, src1, src2, src3, src4);
+        src += (5 * stride);
+
+        hz_out0 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
+        hz_out1 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
+        hz_out2 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
+        hz_out3 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
+        hz_out4 = AVC_HORZ_FILTER_SH(src4, src4, mask0, mask1, mask2);
+
+        for (loop_cnt = 4; loop_cnt--;) {
+            LD_SB2(src, stride, src5, src6);
+            src += (2 * stride);
+
+            XORI_B2_128_SB(src5, src6);
+            hz_out5 = AVC_HORZ_FILTER_SH(src5, src5, mask0, mask1, mask2);
+            hz_out6 = AVC_HORZ_FILTER_SH(src6, src6, mask0, mask1, mask2);
+            ILVR_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2,
+                       hz_out4, hz_out3, hz_out10_r, hz_out21_r, hz_out32_r,
+                       hz_out43_r);
+            ILVL_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2,
+                       hz_out4, hz_out3, hz_out10_l, hz_out21_l, hz_out32_l,
+                       hz_out43_l);
+            ILVR_H2_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out54_r, hz_out65_r);
+            ILVL_H2_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out54_l, hz_out65_l);
+
+            tmp0_w = AVC_DOT_SW3_SW(hz_out10_r, hz_out32_r, hz_out54_r, filt0,
+                                    filt1, filt2);
+            tmp1_w = AVC_DOT_SW3_SW(hz_out10_l, hz_out32_l, hz_out54_l, filt0,
+                                    filt1, filt2);
+            tmp0 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+            tmp0_w = AVC_DOT_SW3_SW(hz_out21_r, hz_out43_r, hz_out65_r, filt0,
+                                    filt1, filt2);
+            tmp1_w = AVC_DOT_SW3_SW(hz_out21_l, hz_out43_l, hz_out65_l, filt0,
+                                    filt1, filt2);
+            tmp2 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+
+            tmp1 = __msa_srari_h(hz_out3, 5);
+            tmp3 = __msa_srari_h(hz_out4, 5);
+            SAT_SH2_SH(tmp1, tmp3, 7);
+
+            tmp0 = __msa_aver_s_h(tmp0, tmp1);
+            tmp1 = __msa_aver_s_h(tmp2, tmp3);
+
+            LD2(dst, stride, tp0, tp1);
+            INSERT_D2_UB(tp0, tp1, dst0);
+            out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+            dst0 = __msa_aver_u_b(out0, dst0);
+            ST8x2_UB(dst0, dst, stride);
+            dst += (2 * stride);
+
+            LD_SB2(src, stride, src7, src8);
+            src += (2 * stride);
+
+            XORI_B2_128_SB(src7, src8);
+            hz_out7 = AVC_HORZ_FILTER_SH(src7, src7, mask0, mask1, mask2);
+            hz_out8 = AVC_HORZ_FILTER_SH(src8, src8, mask0, mask1, mask2);
+            ILVR_H2_SH(hz_out7, hz_out6, hz_out8, hz_out7, hz_out76_r,
+                       hz_out87_r);
+            ILVL_H2_SH(hz_out7, hz_out6, hz_out8, hz_out7, hz_out76_l,
+                       hz_out87_l);
+            tmp0_w = AVC_DOT_SW3_SW(hz_out32_r, hz_out54_r, hz_out76_r, filt0,
+                                    filt1, filt2);
+            tmp1_w = AVC_DOT_SW3_SW(hz_out32_l, hz_out54_l, hz_out76_l, filt0,
+                                    filt1, filt2);
+            tmp4 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+            tmp0_w = AVC_DOT_SW3_SW(hz_out43_r, hz_out65_r, hz_out87_r, filt0,
+                                    filt1, filt2);
+            tmp1_w = AVC_DOT_SW3_SW(hz_out43_l, hz_out65_l, hz_out87_l, filt0,
+                                    filt1, filt2);
+            tmp6 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+
+            tmp5 = __msa_srari_h(hz_out5, 5);
+            tmp7 = __msa_srari_h(hz_out6, 5);
+            SAT_SH2_SH(tmp5, tmp7, 7);
+
+            tmp2 = __msa_aver_s_h(tmp4, tmp5);
+            tmp3 = __msa_aver_s_h(tmp6, tmp7);
+
+            LD2(dst, stride, tp2, tp3);
+            INSERT_D2_UB(tp2, tp3, dst1);
+            out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+            dst1 = __msa_aver_u_b(out1, dst1);
+            ST8x2_UB(dst1, dst, stride);
+            dst += (2 * stride);
+
+            hz_out0 = hz_out4;
+            hz_out1 = hz_out5;
+            hz_out2 = hz_out6;
+            hz_out3 = hz_out7;
+            hz_out4 = hz_out8;
+        }
+
+        src_tmp += 8;
+        dst_tmp += 8;
+    }
 }
 
 void ff_avg_h264_qpel8_mc21_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_midv_qrt_and_aver_dst_8w_msa(src - (2 * stride) - 2,
-                                          stride, dst, stride, 8, 0);
+    const int32_t filt_const0 = 0xfffb0001;
+    const int32_t filt_const1 = 0x140014;
+    const int32_t filt_const2 = 0x1fffb;
+    uint64_t tp0, tp1, tp2, tp3;
+    v16u8 dst0 = { 0 }, dst1 = { 0 }, out0, out1;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 src11, src12, mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, hz_out9, hz_out10, hz_out11, hz_out12;
+    v8i16 hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r, hz_out54_r;
+    v8i16 hz_out65_r, hz_out76_r, hz_out87_r, hz_out89_r, hz_out910_r;
+    v8i16 hz_out1110_r, hz_out1211_r, tmp0, tmp1, tmp2, tmp3;
+    v8i16 hz_out10_l, hz_out21_l, hz_out32_l, hz_out43_l, hz_out54_l;
+    v8i16 hz_out65_l, hz_out76_l, hz_out87_l, hz_out89_l, hz_out910_l;
+    v8i16 hz_out1110_l, hz_out1211_l, filt0, filt1, filt2;
+    v4i32 tmp0_w, tmp1_w;
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    filt0 = (v8i16) __msa_fill_w(filt_const0);
+    filt1 = (v8i16) __msa_fill_w(filt_const1);
+    filt2 = (v8i16) __msa_fill_w(filt_const2);
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    src += (5 * stride);
+
+    hz_out0 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
+    hz_out1 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
+    hz_out3 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, src4, mask0, mask1, mask2);
+
+    LD_SB4(src, stride, src5, src6, src7, src8);
+    src += (4 * stride);
+    XORI_B4_128_SB(src5, src6, src7, src8);
+
+    hz_out5 = AVC_HORZ_FILTER_SH(src5, src5, mask0, mask1, mask2);
+    hz_out6 = AVC_HORZ_FILTER_SH(src6, src6, mask0, mask1, mask2);
+    hz_out7 = AVC_HORZ_FILTER_SH(src7, src7, mask0, mask1, mask2);
+    hz_out8 = AVC_HORZ_FILTER_SH(src8, src8, mask0, mask1, mask2);
+
+    ILVR_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2, hz_out4,
+               hz_out3, hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r);
+    ILVL_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2, hz_out4,
+               hz_out3, hz_out10_l, hz_out21_l, hz_out32_l, hz_out43_l);
+    ILVR_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6, hz_out8,
+               hz_out7, hz_out54_r, hz_out65_r, hz_out76_r, hz_out87_r);
+    ILVL_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6, hz_out8,
+               hz_out7, hz_out54_l, hz_out65_l, hz_out76_l, hz_out87_l);
+
+    tmp0_w = AVC_DOT_SW3_SW(hz_out10_r, hz_out32_r, hz_out54_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out10_l, hz_out32_l, hz_out54_l, filt0, filt1,
+                            filt2);
+    tmp0 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+    tmp0_w = AVC_DOT_SW3_SW(hz_out21_r, hz_out43_r, hz_out65_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out21_l, hz_out43_l, hz_out65_l, filt0, filt1,
+                            filt2);
+    tmp1 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+    tmp0_w = AVC_DOT_SW3_SW(hz_out32_r, hz_out54_r, hz_out76_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out32_l, hz_out54_l, hz_out76_l, filt0, filt1,
+                            filt2);
+    tmp2 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+    tmp0_w = AVC_DOT_SW3_SW(hz_out43_r, hz_out65_r, hz_out87_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out43_l, hz_out65_l, hz_out87_l, filt0, filt1,
+                            filt2);
+    tmp3 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+
+    SRARI_H4_SH(hz_out2, hz_out3, hz_out4, hz_out5, 5);
+    SAT_SH4_SH(hz_out2, hz_out3, hz_out4, hz_out5, 7);
+
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+
+    tmp0 = __msa_aver_s_h(tmp0, hz_out2);
+    tmp1 = __msa_aver_s_h(tmp1, hz_out3);
+    tmp2 = __msa_aver_s_h(tmp2, hz_out4);
+    tmp3 = __msa_aver_s_h(tmp3, hz_out5);
+
+    out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+    out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+    AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
+    ST8x4_UB(dst0, dst1, dst, stride);
+    dst += (4 * stride);
+
+    LD_SB4(src, stride, src9, src10, src11, src12);
+    XORI_B4_128_SB(src9, src10, src11, src12);
+    hz_out9 = AVC_HORZ_FILTER_SH(src9, src9, mask0, mask1, mask2);
+    hz_out10 = AVC_HORZ_FILTER_SH(src10, src10, mask0, mask1, mask2);
+    hz_out11 = AVC_HORZ_FILTER_SH(src11, src11, mask0, mask1, mask2);
+    hz_out12 = AVC_HORZ_FILTER_SH(src12, src12, mask0, mask1, mask2);
+    ILVR_H4_SH(hz_out9, hz_out8, hz_out10, hz_out9, hz_out11, hz_out10,
+               hz_out12, hz_out11, hz_out89_r, hz_out910_r, hz_out1110_r,
+               hz_out1211_r);
+    ILVL_H4_SH(hz_out9, hz_out8, hz_out10, hz_out9, hz_out11, hz_out10,
+               hz_out12, hz_out11, hz_out89_l, hz_out910_l, hz_out1110_l,
+               hz_out1211_l);
+    tmp0_w = AVC_DOT_SW3_SW(hz_out54_r, hz_out76_r, hz_out89_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out54_l, hz_out76_l, hz_out89_l, filt0, filt1,
+                            filt2);
+    tmp0 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+    tmp0_w = AVC_DOT_SW3_SW(hz_out65_r, hz_out87_r, hz_out910_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out65_l, hz_out87_l, hz_out910_l, filt0, filt1,
+                            filt2);
+    tmp1 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+    tmp0_w = AVC_DOT_SW3_SW(hz_out76_r, hz_out89_r, hz_out1110_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out76_l, hz_out89_l, hz_out1110_l, filt0, filt1,
+                            filt2);
+    tmp2 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+    tmp0_w = AVC_DOT_SW3_SW(hz_out87_r, hz_out910_r, hz_out1211_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out87_l, hz_out910_l, hz_out1211_l, filt0, filt1,
+                            filt2);
+    tmp3 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+
+    SRARI_H4_SH(hz_out6, hz_out7, hz_out8, hz_out9, 5);
+    SAT_SH4_SH(hz_out6, hz_out7, hz_out8, hz_out9, 7);
+
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+
+    tmp0 = __msa_aver_s_h(tmp0, hz_out6);
+    tmp1 = __msa_aver_s_h(tmp1, hz_out7);
+    tmp2 = __msa_aver_s_h(tmp2, hz_out8);
+    tmp3 = __msa_aver_s_h(tmp3, hz_out9);
+
+    out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+    out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+    AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
+    ST8x4_UB(dst0, dst1, dst, stride);
 }
 
 void ff_avg_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_midv_qrt_and_aver_dst_8w_msa(src - (2 * stride) - 2,
-                                          stride, dst, stride, 8, 1);
+    const int32_t filt_const0 = 0xfffb0001;
+    const int32_t filt_const1 = 0x140014;
+    const int32_t filt_const2 = 0x1fffb;
+    uint64_t tp0, tp1, tp2, tp3;
+    v16u8 dst0 = { 0 }, dst1 = { 0 }, out0, out1;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 src11, src12, mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, hz_out9, hz_out10, hz_out11, hz_out12;
+    v8i16 hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r, hz_out54_r;
+    v8i16 hz_out65_r, hz_out76_r, hz_out87_r, hz_out89_r, hz_out910_r;
+    v8i16 hz_out1110_r, hz_out1211_r, tmp0, tmp1, tmp2, tmp3;
+    v8i16 hz_out10_l, hz_out21_l, hz_out32_l, hz_out43_l, hz_out54_l;
+    v8i16 hz_out65_l, hz_out76_l, hz_out87_l, hz_out89_l, hz_out910_l;
+    v8i16 hz_out1110_l, hz_out1211_l, filt0, filt1, filt2;
+    v4i32 tmp0_w, tmp1_w;
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    filt0 = (v8i16) __msa_fill_w(filt_const0);
+    filt1 = (v8i16) __msa_fill_w(filt_const1);
+    filt2 = (v8i16) __msa_fill_w(filt_const2);
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    src += (5 * stride);
+
+    hz_out0 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
+    hz_out1 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
+    hz_out3 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, src4, mask0, mask1, mask2);
+
+    LD_SB4(src, stride, src5, src6, src7, src8);
+    src += (4 * stride);
+    XORI_B4_128_SB(src5, src6, src7, src8);
+
+    hz_out5 = AVC_HORZ_FILTER_SH(src5, src5, mask0, mask1, mask2);
+    hz_out6 = AVC_HORZ_FILTER_SH(src6, src6, mask0, mask1, mask2);
+    hz_out7 = AVC_HORZ_FILTER_SH(src7, src7, mask0, mask1, mask2);
+    hz_out8 = AVC_HORZ_FILTER_SH(src8, src8, mask0, mask1, mask2);
+
+    ILVR_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2, hz_out4,
+               hz_out3, hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r);
+    ILVL_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2, hz_out4,
+               hz_out3, hz_out10_l, hz_out21_l, hz_out32_l, hz_out43_l);
+    ILVR_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6, hz_out8,
+               hz_out7, hz_out54_r, hz_out65_r, hz_out76_r, hz_out87_r);
+    ILVL_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6, hz_out8,
+               hz_out7, hz_out54_l, hz_out65_l, hz_out76_l, hz_out87_l);
+
+    tmp0_w = AVC_DOT_SW3_SW(hz_out10_r, hz_out32_r, hz_out54_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out10_l, hz_out32_l, hz_out54_l, filt0, filt1,
+                            filt2);
+    tmp0 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+    tmp0_w = AVC_DOT_SW3_SW(hz_out21_r, hz_out43_r, hz_out65_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out21_l, hz_out43_l, hz_out65_l, filt0, filt1,
+                            filt2);
+    tmp1 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+    tmp0_w = AVC_DOT_SW3_SW(hz_out32_r, hz_out54_r, hz_out76_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out32_l, hz_out54_l, hz_out76_l, filt0, filt1,
+                            filt2);
+    tmp2 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+    tmp0_w = AVC_DOT_SW3_SW(hz_out43_r, hz_out65_r, hz_out87_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out43_l, hz_out65_l, hz_out87_l, filt0, filt1,
+                            filt2);
+    tmp3 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+
+    SRARI_H4_SH(hz_out3, hz_out4, hz_out5, hz_out6, 5);
+    SAT_SH4_SH(hz_out3, hz_out4, hz_out5, hz_out6, 7);
+
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+
+    tmp0 = __msa_aver_s_h(tmp0, hz_out3);
+    tmp1 = __msa_aver_s_h(tmp1, hz_out4);
+    tmp2 = __msa_aver_s_h(tmp2, hz_out5);
+    tmp3 = __msa_aver_s_h(tmp3, hz_out6);
+
+    out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+    out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+    AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
+    ST8x4_UB(dst0, dst1, dst, stride);
+    dst += (4 * stride);
+
+    LD_SB4(src, stride, src9, src10, src11, src12);
+    XORI_B4_128_SB(src9, src10, src11, src12);
+    hz_out9 = AVC_HORZ_FILTER_SH(src9, src9, mask0, mask1, mask2);
+    hz_out10 = AVC_HORZ_FILTER_SH(src10, src10, mask0, mask1, mask2);
+    hz_out11 = AVC_HORZ_FILTER_SH(src11, src11, mask0, mask1, mask2);
+    hz_out12 = AVC_HORZ_FILTER_SH(src12, src12, mask0, mask1, mask2);
+    ILVR_H4_SH(hz_out9, hz_out8, hz_out10, hz_out9, hz_out11, hz_out10,
+               hz_out12, hz_out11, hz_out89_r, hz_out910_r, hz_out1110_r,
+               hz_out1211_r);
+    ILVL_H4_SH(hz_out9, hz_out8, hz_out10, hz_out9, hz_out11, hz_out10,
+               hz_out12, hz_out11, hz_out89_l, hz_out910_l, hz_out1110_l,
+               hz_out1211_l);
+    tmp0_w = AVC_DOT_SW3_SW(hz_out54_r, hz_out76_r, hz_out89_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out54_l, hz_out76_l, hz_out89_l, filt0, filt1,
+                            filt2);
+    tmp0 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+    tmp0_w = AVC_DOT_SW3_SW(hz_out65_r, hz_out87_r, hz_out910_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out65_l, hz_out87_l, hz_out910_l, filt0, filt1,
+                            filt2);
+    tmp1 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+    tmp0_w = AVC_DOT_SW3_SW(hz_out76_r, hz_out89_r, hz_out1110_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out76_l, hz_out89_l, hz_out1110_l, filt0, filt1,
+                            filt2);
+    tmp2 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+    tmp0_w = AVC_DOT_SW3_SW(hz_out87_r, hz_out910_r, hz_out1211_r, filt0, filt1,
+                            filt2);
+    tmp1_w = AVC_DOT_SW3_SW(hz_out87_l, hz_out910_l, hz_out1211_l, filt0, filt1,
+                            filt2);
+    tmp3 = __msa_pckev_h((v8i16) tmp1_w, (v8i16) tmp0_w);
+
+    SRARI_H4_SH(hz_out7, hz_out8, hz_out9, hz_out10, 5);
+    SAT_SH4_SH(hz_out7, hz_out8, hz_out9, hz_out10, 7);
+
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+
+    tmp0 = __msa_aver_s_h(tmp0, hz_out7);
+    tmp1 = __msa_aver_s_h(tmp1, hz_out8);
+    tmp2 = __msa_aver_s_h(tmp2, hz_out9);
+    tmp3 = __msa_aver_s_h(tmp3, hz_out10);
+
+    out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+    out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+    AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
+    ST8x4_UB(dst0, dst1, dst, stride);
 }
 
 void ff_avg_h264_qpel4_mc21_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_midv_qrt_and_aver_dst_4w_msa(src - (2 * stride) - 2,
-                                          stride, dst, stride, 4, 0);
+    uint32_t tp0, tp1, tp2, tp3;
+    const int32_t filt_const0 = 0xfffb0001;
+    const int32_t filt_const1 = 0x140014;
+    const int32_t filt_const2 = 0x1fffb;
+    v16u8 res, out = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, dst0, dst1, filt0, filt1, filt2;
+    v8i16 hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r, hz_out54_r;
+    v8i16 hz_out65_r, hz_out76_r, hz_out87_r;
+    v4i32 tmp0, tmp1;
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+
+    filt0 = (v8i16) __msa_fill_w(filt_const0);
+    filt1 = (v8i16) __msa_fill_w(filt_const1);
+    filt2 = (v8i16) __msa_fill_w(filt_const2);
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    LD_SB4(src, stride, src5, src6, src7, src8);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    XORI_B4_128_SB(src5, src6, src7, src8);
+
+    hz_out0 = AVC_HORZ_FILTER_SH(src0, src1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src2, src3, mask0, mask1, mask2);
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, src5, mask0, mask1, mask2);
+    hz_out6 = AVC_HORZ_FILTER_SH(src6, src7, mask0, mask1, mask2);
+    hz_out8 = AVC_HORZ_FILTER_SH(src8, src8, mask0, mask1, mask2);
+    PCKOD_D2_SH(hz_out0, hz_out0, hz_out2, hz_out2, hz_out1, hz_out3);
+    PCKOD_D2_SH(hz_out4, hz_out4, hz_out6, hz_out6, hz_out5, hz_out7);
+
+    ILVR_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2, hz_out4,
+               hz_out3, hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r);
+    ILVR_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6, hz_out8,
+               hz_out7, hz_out54_r, hz_out65_r, hz_out76_r, hz_out87_r);
+
+    tmp0 = AVC_DOT_SW3_SW(hz_out10_r, hz_out32_r, hz_out54_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out21_r, hz_out43_r, hz_out65_r, filt0, filt1,
+                          filt2);
+    dst0 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out32_r, hz_out54_r, hz_out76_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out43_r, hz_out65_r, hz_out87_r, filt0, filt1,
+                          filt2);
+    dst1 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+
+    SRARI_H2_SH(hz_out2, hz_out4, 5);
+    SAT_SH2_SH(hz_out2, hz_out4, 7);
+
+    dst0 = __msa_aver_s_h(dst0, hz_out2);
+    dst1 = __msa_aver_s_h(dst1, hz_out4);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, out);
+    res = PCKEV_XORI128_UB(dst0, dst1);
+    res = __msa_aver_u_b(res, out);
+    ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride);
 }
 
 void ff_avg_h264_qpel4_mc23_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_midv_qrt_and_aver_dst_4w_msa(src - (2 * stride) - 2,
-                                          stride, dst, stride, 4, 1);
+    const int32_t filt_const0 = 0xfffb0001;
+    const int32_t filt_const1 = 0x140014;
+    const int32_t filt_const2 = 0x1fffb;
+    uint32_t tp0, tp1, tp2, tp3;
+    v16u8 res, out = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, dst0, dst1, filt0, filt1, filt2;
+    v8i16 hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r, hz_out54_r;
+    v8i16 hz_out65_r, hz_out76_r, hz_out87_r;
+    v4i32 tmp0, tmp1;
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+
+    filt0 = (v8i16) __msa_fill_w(filt_const0);
+    filt1 = (v8i16) __msa_fill_w(filt_const1);
+    filt2 = (v8i16) __msa_fill_w(filt_const2);
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    LD_SB4(src, stride, src5, src6, src7, src8);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    XORI_B4_128_SB(src5, src6, src7, src8);
+
+    hz_out0 = AVC_HORZ_FILTER_SH(src0, src1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src2, src3, mask0, mask1, mask2);
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, src5, mask0, mask1, mask2);
+    hz_out6 = AVC_HORZ_FILTER_SH(src6, src7, mask0, mask1, mask2);
+    hz_out8 = AVC_HORZ_FILTER_SH(src8, src8, mask0, mask1, mask2);
+    PCKOD_D2_SH(hz_out0, hz_out0, hz_out2, hz_out2, hz_out1, hz_out3);
+    PCKOD_D2_SH(hz_out4, hz_out4, hz_out6, hz_out6, hz_out5, hz_out7);
+
+    ILVR_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2, hz_out4,
+               hz_out3, hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r);
+    ILVR_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6, hz_out8,
+               hz_out7, hz_out54_r, hz_out65_r, hz_out76_r, hz_out87_r);
+
+    tmp0 = AVC_DOT_SW3_SW(hz_out10_r, hz_out32_r, hz_out54_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out21_r, hz_out43_r, hz_out65_r, filt0, filt1,
+                          filt2);
+    dst0 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out32_r, hz_out54_r, hz_out76_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out43_r, hz_out65_r, hz_out87_r, filt0, filt1,
+                          filt2);
+    dst1 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+
+    PCKEV_D2_SH(hz_out4, hz_out3, hz_out6, hz_out5, hz_out0, hz_out1);
+    SRARI_H2_SH(hz_out0, hz_out1, 5);
+    SAT_SH2_SH(hz_out0, hz_out1, 7);
+
+    dst0 = __msa_aver_s_h(dst0, hz_out0);
+    dst1 = __msa_aver_s_h(dst1, hz_out1);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, out);
+    res = PCKEV_XORI128_UB(dst0, dst1);
+    res = __msa_aver_u_b(res, out);
+    ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride);
 }
 
 void ff_avg_h264_qpel16_mc02_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_vt_and_aver_dst_16x16_msa(src - (stride * 2), stride, dst, stride);
+    int32_t loop_cnt;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16u8 res0, res1, res2, res3, dst0, dst1, dst2, dst3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src10_l, src32_l, src54_l, src76_l, src21_l, src43_l;
+    v16i8 src65_l, src87_l, filt0, filt1, filt2;
+    v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+    src -= (stride * 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    ILVL_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_l, src21_l,
+               src32_l, src43_l);
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src, stride, src5, src6, src7, src8);
+        src += (4 * stride);
+
+        XORI_B4_128_SB(src5, src6, src7, src8);
+        ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r,
+                   src65_r, src76_r, src87_r);
+        ILVL_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_l,
+                   src65_l, src76_l, src87_l);
+        out0_r = AVC_DOT_SH3_SH(src10_r, src32_r, src54_r, filt0, filt1, filt2);
+        out1_r = AVC_DOT_SH3_SH(src21_r, src43_r, src65_r, filt0, filt1, filt2);
+        out2_r = AVC_DOT_SH3_SH(src32_r, src54_r, src76_r, filt0, filt1, filt2);
+        out3_r = AVC_DOT_SH3_SH(src43_r, src65_r, src87_r, filt0, filt1, filt2);
+        out0_l = AVC_DOT_SH3_SH(src10_l, src32_l, src54_l, filt0, filt1, filt2);
+        out1_l = AVC_DOT_SH3_SH(src21_l, src43_l, src65_l, filt0, filt1, filt2);
+        out2_l = AVC_DOT_SH3_SH(src32_l, src54_l, src76_l, filt0, filt1, filt2);
+        out3_l = AVC_DOT_SH3_SH(src43_l, src65_l, src87_l, filt0, filt1, filt2);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 5);
+        SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+        LD_UB4(dst, stride, dst0, dst1, dst2, dst3);
+        PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+                    out3_r, res0, res1, res2, res3);
+        XORI_B4_128_UB(res0, res1, res2, res3);
+        AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+        AVER_UB2_UB(res2, dst2, res3, dst3, res2, res3);
+        ST_UB4(res0, res1, res2, res3, dst, stride);
+        dst += (4 * stride);
+
+        src10_r = src54_r;
+        src32_r = src76_r;
+        src21_r = src65_r;
+        src43_r = src87_r;
+        src10_l = src54_l;
+        src32_l = src76_l;
+        src21_l = src65_l;
+        src43_l = src87_l;
+        src4 = src8;
+    }
 }
 
 void ff_avg_h264_qpel8_mc02_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_vt_and_aver_dst_8x8_msa(src - (stride * 2), stride, dst, stride);
+    uint64_t tp0, tp1, tp2, tp3;
+    const int16_t filt_const0 = 0xfb01;
+    const int16_t filt_const1 = 0x1414;
+    const int16_t filt_const2 = 0x1fb;
+    v16u8 dst0 = { 0 }, dst1 = { 0 }, dst2 = { 0 }, dst3 = { 0 };
+    v16u8 out0, out1, out2, out3;
+    v16i8 src0, src1, src2, src3, src4, src7, src8, src9, src10, src109_r;
+    v16i8 src10_r, src32_r, src76_r, src98_r, src21_r, src43_r, src87_r;
+    v16i8 filt0, filt1, filt2;
+    v8i16 out0_r, out1_r, out2_r, out3_r, out4_r, out5_r, out6_r, out7_r;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    src -= (stride * 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+
+    LD_SB4(src, stride, src7, src8, src9, src10);
+    src += (4 * stride);
+    XORI_B4_128_SB(src7, src8, src9, src10);
+    ILVR_B4_SB(src7, src4, src8, src7, src9, src8, src10, src9, src76_r,
+               src87_r, src98_r, src109_r);
+    out0_r = AVC_DOT_SH3_SH(src10_r, src32_r, src76_r, filt0, filt1, filt2);
+    out1_r = AVC_DOT_SH3_SH(src21_r, src43_r, src87_r, filt0, filt1, filt2);
+    out2_r = AVC_DOT_SH3_SH(src32_r, src76_r, src98_r, filt0, filt1, filt2);
+    out3_r = AVC_DOT_SH3_SH(src43_r, src87_r, src109_r, filt0, filt1, filt2);
+
+    LD_SB4(src, stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    ILVR_B4_SB(src0, src10, src1, src0, src2, src1, src3, src2, src10_r,
+               src21_r, src32_r, src43_r);
+    out4_r = AVC_DOT_SH3_SH(src76_r, src98_r, src10_r, filt0, filt1, filt2);
+    out5_r = AVC_DOT_SH3_SH(src87_r, src109_r, src21_r, filt0, filt1, filt2);
+    out6_r = AVC_DOT_SH3_SH(src98_r, src10_r, src32_r, filt0, filt1, filt2);
+    out7_r = AVC_DOT_SH3_SH(src109_r, src21_r, src43_r, filt0, filt1, filt2);
+
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+    LD4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst2);
+    INSERT_D2_UB(tp2, tp3, dst3);
+
+    SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
+    SRARI_H4_SH(out4_r, out5_r, out6_r, out7_r, 5);
+    SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+    SAT_SH4_SH(out4_r, out5_r, out6_r, out7_r, 7);
+    out0 = PCKEV_XORI128_UB(out0_r, out1_r);
+    out1 = PCKEV_XORI128_UB(out2_r, out3_r);
+    out2 = PCKEV_XORI128_UB(out4_r, out5_r);
+    out3 = PCKEV_XORI128_UB(out6_r, out7_r);
+    AVER_UB4_UB(out0, dst0, out1, dst1, out2, dst2, out3, dst3, dst0, dst1,
+                dst2, dst3);
+    ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride);
 }
 
 void ff_avg_h264_qpel4_mc02_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_vt_and_aver_dst_4x4_msa(src - (stride * 2), stride, dst, stride);
+    uint32_t tp0, tp1, tp2, tp3;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16u8 res, dst0 = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src2110, src4332, src6554, src8776, filt0, filt1, filt2;
+    v8i16 out10, out32;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    src -= (stride * 2);
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+    XORI_B2_128_SB(src2110, src4332);
+    LD_SB4(src, stride, src5, src6, src7, src8);
+    ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
+               src76_r, src87_r);
+    ILVR_D2_SB(src65_r, src54_r, src87_r, src76_r, src6554, src8776);
+    XORI_B2_128_SB(src6554, src8776);
+    out10 = AVC_DOT_SH3_SH(src2110, src4332, src6554, filt0, filt1, filt2);
+    out32 = AVC_DOT_SH3_SH(src4332, src6554, src8776, filt0, filt1, filt2);
+    SRARI_H2_SH(out10, out32, 5);
+    SAT_SH2_SH(out10, out32, 7);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+    res = PCKEV_XORI128_UB(out10, out32);
+    dst0 = __msa_aver_u_b(res, dst0);
+    ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
 }
 
 void ff_avg_h264_qpel16_mc12_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_midh_qrt_and_aver_dst_16w_msa(src - (2 * stride) - 2,
-                                           stride, dst, stride, 16, 0);
+    uint32_t row;
+    v16u8 out, dst0;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 src11;
+    v8i16 vt_res0, vt_res1, vt_res2, vt_res3, tmp0, tmp1, tmp2, tmp3, mask3;
+    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5, shf_vec6;
+    v8i16 shf_vec7, shf_vec8, shf_vec9, shf_vec10, shf_vec11, mask4, mask5;
+    v4i32 hz_res0, hz_res1, hz_res2, hz_res3;
+    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
+    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
+    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
+    v8i16 minus5h = __msa_ldi_h(-5);
+    v8i16 plus20h = __msa_ldi_h(20);
+
+    mask3 = mask0 + 4;
+    mask4 = mask1 + 4;
+    mask5 = mask2 + 4;
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    LD_SB5(src + 8, stride, src7, src8, src9, src10, src11);
+    src += (5 * stride);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src7, src8, src9, src10, src11);
+
+    for (row = 16; row--;) {
+        LD_SB2(src, 8, src5, src6);
+        src += stride;
+        XORI_B2_128_SB(src5, src6);
+        dst0 = LD_UB(dst);
+
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src0, src1, src2, src3, src4, src5,
+                                        vt_res0, vt_res1);
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src7, src8, src9, src10, src11, src6,
+                                        vt_res2, vt_res3);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+                   mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+                   mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask3,
+                   mask4, mask5, shf_vec6, shf_vec7, shf_vec8);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask3,
+                   mask4, mask5, shf_vec9, shf_vec10, shf_vec11);
+        hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+        hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+        hz_res2 = __msa_hadd_s_w(shf_vec6, shf_vec6);
+        hz_res3 = __msa_hadd_s_w(shf_vec9, shf_vec9);
+        DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
+        DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
+        DPADD_SH2_SW(shf_vec7, shf_vec8, minus5h, plus20h, hz_res2, hz_res2);
+        DPADD_SH2_SW(shf_vec10, shf_vec11, minus5h, plus20h, hz_res3, hz_res3);
+        SRARI_W4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 10);
+        SAT_SW4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 7);
+        tmp0 = __msa_srari_h(shf_vec2, 5);
+        tmp1 = __msa_srari_h(shf_vec5, 5);
+        tmp2 = __msa_srari_h(shf_vec8, 5);
+        tmp3 = __msa_srari_h(shf_vec11, 5);
+        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        PCKEV_H2_SH(tmp2, tmp0, tmp3, tmp1, tmp0, tmp1);
+        PCKEV_H2_SH(hz_res2, hz_res0, hz_res3, hz_res1, tmp2, tmp3);
+        tmp0 = __msa_aver_s_h(tmp2, tmp0);
+        tmp1 = __msa_aver_s_h(tmp3, tmp1);
+        out = PCKEV_XORI128_UB(tmp0, tmp1);
+        out = __msa_aver_u_b(out, dst0);
+        ST_UB(out, dst);
+        dst += stride;
+
+        src0 = src1;
+        src1 = src2;
+        src2 = src3;
+        src3 = src4;
+        src4 = src5;
+        src7 = src8;
+        src8 = src9;
+        src9 = src10;
+        src10 = src11;
+        src11 = src6;
+    }
 }
 
 void ff_avg_h264_qpel16_mc32_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_midh_qrt_and_aver_dst_16w_msa(src - (2 * stride) - 2,
-                                           stride, dst, stride, 16, 1);
+    uint32_t row;
+    v16u8 out, dst0;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 src11;
+    v8i16 vt_res0, vt_res1, vt_res2, vt_res3, tmp0, tmp1, tmp2, tmp3, mask3;
+    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5, shf_vec6;
+    v8i16 shf_vec7, shf_vec8, shf_vec9, shf_vec10, shf_vec11, mask4, mask5;
+    v4i32 hz_res0, hz_res1, hz_res2, hz_res3;
+    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
+    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
+    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
+    v8i16 minus5h = __msa_ldi_h(-5);
+    v8i16 plus20h = __msa_ldi_h(20);
+
+    mask3 = mask0 + 4;
+    mask4 = mask1 + 4;
+    mask5 = mask2 + 4;
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    LD_SB5(src + 8, stride, src7, src8, src9, src10, src11);
+    src += (5 * stride);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src7, src8, src9, src10, src11);
+
+    for (row = 16; row--;) {
+        LD_SB2(src, 8, src5, src6);
+        src += stride;
+        XORI_B2_128_SB(src5, src6);
+        dst0 = LD_UB(dst);
+
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src0, src1, src2, src3, src4, src5,
+                                        vt_res0, vt_res1);
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src7, src8, src9, src10, src11, src6,
+                                        vt_res2, vt_res3);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+                   mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+                   mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask3,
+                   mask4, mask5, shf_vec6, shf_vec7, shf_vec8);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask3,
+                   mask4, mask5, shf_vec9, shf_vec10, shf_vec11);
+        hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+        hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+        hz_res2 = __msa_hadd_s_w(shf_vec6, shf_vec6);
+        hz_res3 = __msa_hadd_s_w(shf_vec9, shf_vec9);
+        DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
+        DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
+        DPADD_SH2_SW(shf_vec7, shf_vec8, minus5h, plus20h, hz_res2, hz_res2);
+        DPADD_SH2_SW(shf_vec10, shf_vec11, minus5h, plus20h, hz_res3, hz_res3);
+        SRARI_W4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 10);
+        SAT_SW4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 7);
+        tmp0 = __msa_srari_h(shf_vec2, 5);
+        tmp1 = __msa_srari_h(shf_vec5, 5);
+        tmp2 = __msa_srari_h(shf_vec8, 5);
+        tmp3 = __msa_srari_h(shf_vec11, 5);
+        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        tmp0 = __msa_pckod_h(tmp2, tmp0);
+        tmp1 = __msa_pckod_h(tmp3, tmp1);
+        PCKEV_H2_SH(hz_res2, hz_res0, hz_res3, hz_res1, tmp2, tmp3);
+        tmp0 = __msa_aver_s_h(tmp2, tmp0);
+        tmp1 = __msa_aver_s_h(tmp3, tmp1);
+        out = PCKEV_XORI128_UB(tmp0, tmp1);
+        out = __msa_aver_u_b(out, dst0);
+        ST_UB(out, dst);
+        dst += stride;
+
+        src0 = src1;
+        src1 = src2;
+        src2 = src3;
+        src3 = src4;
+        src4 = src5;
+        src7 = src8;
+        src8 = src9;
+        src9 = src10;
+        src10 = src11;
+        src11 = src6;
+    }
 }
 
 void ff_avg_h264_qpel8_mc12_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_midh_qrt_and_aver_dst_8w_msa(src - (2 * stride) - 2,
-                                          stride, dst, stride, 8, 0);
+    uint32_t row;
+    uint64_t tp0, tp1;
+    v16u8 out, dst0 = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6;
+    v8i16 vt_res0, vt_res1, vt_res2, vt_res3, tmp0, tmp1, tmp2, tmp3;
+    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5, shf_vec6;
+    v8i16 shf_vec7, shf_vec8, shf_vec9, shf_vec10, shf_vec11;
+    v8i16 mask3, mask4, mask5;
+    v4i32 hz_res0, hz_res1, hz_res2, hz_res3;
+    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
+    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
+    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
+    v8i16 minus5h = __msa_ldi_h(-5);
+    v8i16 plus20h = __msa_ldi_h(20);
+
+    mask3 = mask0 + 4;
+    mask4 = mask1 + 4;
+    mask5 = mask2 + 4;
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+
+    for (row = 4; row--;) {
+        LD_SB2(src, stride, src5, src6);
+        src += (2 * stride);
+        XORI_B2_128_SB(src5, src6);
+
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src0, src1, src2, src3, src4, src5,
+                                        vt_res0, vt_res1);
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src1, src2, src3, src4, src5, src6,
+                                        vt_res2, vt_res3);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+                   mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+                   mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask3,
+                   mask4, mask5, shf_vec6, shf_vec7, shf_vec8);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask3,
+                   mask4, mask5, shf_vec9, shf_vec10, shf_vec11);
+        hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+        hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+        hz_res2 = __msa_hadd_s_w(shf_vec6, shf_vec6);
+        hz_res3 = __msa_hadd_s_w(shf_vec9, shf_vec9);
+        DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
+        DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
+        DPADD_SH2_SW(shf_vec7, shf_vec8, minus5h, plus20h, hz_res2, hz_res2);
+        DPADD_SH2_SW(shf_vec10, shf_vec11, minus5h, plus20h, hz_res3, hz_res3);
+        SRARI_W4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 10);
+        SAT_SW4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 7);
+        tmp0 = __msa_srari_h(shf_vec2, 5);
+        tmp1 = __msa_srari_h(shf_vec5, 5);
+        tmp2 = __msa_srari_h(shf_vec8, 5);
+        tmp3 = __msa_srari_h(shf_vec11, 5);
+        LD2(dst, stride, tp0, tp1);
+        INSERT_D2_UB(tp0, tp1, dst0);
+        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        PCKEV_H2_SH(tmp2, tmp0, tmp3, tmp1, tmp0, tmp1);
+        PCKEV_H2_SH(hz_res2, hz_res0, hz_res3, hz_res1, tmp2, tmp3);
+        tmp0 = __msa_aver_s_h(tmp2, tmp0);
+        tmp1 = __msa_aver_s_h(tmp3, tmp1);
+        out = PCKEV_XORI128_UB(tmp0, tmp1);
+        out = __msa_aver_u_b(out, dst0);
+        ST8x2_UB(out, dst, stride);
+        dst += (2 * stride);
+
+        src0 = src2;
+        src1 = src3;
+        src2 = src4;
+        src3 = src5;
+        src4 = src6;
+    }
 }
 
 void ff_avg_h264_qpel8_mc32_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_midh_qrt_and_aver_dst_8w_msa(src - (2 * stride) - 2,
-                                          stride, dst, stride, 8, 1);
+    uint32_t row;
+    uint64_t tp0, tp1;
+    v16u8 out, dst0 = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6;
+    v8i16 vt_res0, vt_res1, vt_res2, vt_res3, tmp0, tmp1, tmp2, tmp3;
+    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5, shf_vec6;
+    v8i16 shf_vec7, shf_vec8, shf_vec9, shf_vec10, shf_vec11;
+    v8i16 mask3, mask4, mask5;
+    v4i32 hz_res0, hz_res1, hz_res2, hz_res3;
+    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
+    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
+    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
+    v8i16 minus5h = __msa_ldi_h(-5);
+    v8i16 plus20h = __msa_ldi_h(20);
+
+    mask3 = mask0 + 4;
+    mask4 = mask1 + 4;
+    mask5 = mask2 + 4;
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+
+    for (row = 4; row--;) {
+        LD_SB2(src, stride, src5, src6);
+        src += (2 * stride);
+        XORI_B2_128_SB(src5, src6);
+
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src0, src1, src2, src3, src4, src5,
+                                        vt_res0, vt_res1);
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src1, src2, src3, src4, src5, src6,
+                                        vt_res2, vt_res3);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+                   mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+                   mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask3,
+                   mask4, mask5, shf_vec6, shf_vec7, shf_vec8);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask3,
+                   mask4, mask5, shf_vec9, shf_vec10, shf_vec11);
+        hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+        hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+        hz_res2 = __msa_hadd_s_w(shf_vec6, shf_vec6);
+        hz_res3 = __msa_hadd_s_w(shf_vec9, shf_vec9);
+        DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
+        DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
+        DPADD_SH2_SW(shf_vec7, shf_vec8, minus5h, plus20h, hz_res2, hz_res2);
+        DPADD_SH2_SW(shf_vec10, shf_vec11, minus5h, plus20h, hz_res3, hz_res3);
+        SRARI_W4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 10);
+        SAT_SW4_SW(hz_res0, hz_res1, hz_res2, hz_res3, 7);
+        tmp0 = __msa_srari_h(shf_vec2, 5);
+        tmp1 = __msa_srari_h(shf_vec5, 5);
+        tmp2 = __msa_srari_h(shf_vec8, 5);
+        tmp3 = __msa_srari_h(shf_vec11, 5);
+        LD2(dst, stride, tp0, tp1);
+        INSERT_D2_UB(tp0, tp1, dst0);
+        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        tmp0 = __msa_pckod_h(tmp2, tmp0);
+        tmp1 = __msa_pckod_h(tmp3, tmp1);
+        PCKEV_H2_SH(hz_res2, hz_res0, hz_res3, hz_res1, tmp2, tmp3);
+        tmp0 = __msa_aver_s_h(tmp2, tmp0);
+        tmp1 = __msa_aver_s_h(tmp3, tmp1);
+        out = PCKEV_XORI128_UB(tmp0, tmp1);
+        out = __msa_aver_u_b(out, dst0);
+        ST8x2_UB(out, dst, stride);
+        dst += (2 * stride);
+
+        src0 = src2;
+        src1 = src3;
+        src2 = src4;
+        src3 = src5;
+        src4 = src6;
+    }
 }
 
 void ff_avg_h264_qpel4_mc12_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_midh_qrt_and_aver_dst_4w_msa(src - (2 * stride) - 2,
-                                          stride, dst, stride, 4, 0);
+    uint32_t tp0, tp1, tp2, tp3;
+    const int16_t filt_const0 = 0xfb01;
+    const int16_t filt_const1 = 0x1414;
+    const int16_t filt_const2 = 0x1fb;
+    v16u8 out, dstv = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src21_r, src32_r, src43_r, src54_r, src65_r, src76_r;
+    v16i8 src87_r, src10_l, src21_l, src32_l, src43_l, src54_l, src65_l;
+    v16i8 src76_l, src87_l, filt0, filt1, filt2;
+    v8i16 vt_res0, vt_res1, vt_res2, vt_res3, dst0, dst1, dst2, dst3, shf_vec7;
+    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5, shf_vec6;
+    v4i32 hz_res0, hz_res1, hz_res2, hz_res3;
+    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
+    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
+    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
+    v8i16 minus5h = __msa_ldi_h(-5);
+    v8i16 plus20h = __msa_ldi_h(20);
+    v8i16 zeros = { 0 };
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    LD_SB4(src, stride, src5, src6, src7, src8);
+    XORI_B4_128_SB(src5, src6, src7, src8);
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
+               src76_r, src87_r);
+    ILVL_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_l, src21_l,
+               src32_l, src43_l);
+    ILVL_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_l, src65_l,
+               src76_l, src87_l);
+    vt_res0 = AVC_DOT_SH3_SH(src10_r, src32_r, src54_r, filt0, filt1, filt2);
+    vt_res1 = AVC_DOT_SH3_SH(src10_l, src32_l, src54_l, filt0, filt1, filt2);
+    vt_res2 = AVC_DOT_SH3_SH(src21_r, src43_r, src65_r, filt0, filt1, filt2);
+    vt_res3 = AVC_DOT_SH3_SH(src21_l, src43_l, src65_l, filt0, filt1, filt2);
+    VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+               mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
+    VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+               mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
+    hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+    DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
+    hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+    DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
+
+    vt_res0 = AVC_DOT_SH3_SH(src32_r, src54_r, src76_r, filt0, filt1, filt2);
+    vt_res1 = AVC_DOT_SH3_SH(src32_l, src54_l, src76_l, filt0, filt1, filt2);
+    vt_res2 = AVC_DOT_SH3_SH(src43_r, src65_r, src87_r, filt0, filt1, filt2);
+    vt_res3 = AVC_DOT_SH3_SH(src43_l, src65_l, src87_l, filt0, filt1, filt2);
+    VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+               mask1, mask2, shf_vec0, shf_vec1, shf_vec6);
+    VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+               mask1, mask2, shf_vec3, shf_vec4, shf_vec7);
+    hz_res2 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+    DPADD_SH2_SW(shf_vec1, shf_vec6, minus5h, plus20h, hz_res2, hz_res2);
+    hz_res3 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+    DPADD_SH2_SW(shf_vec4, shf_vec7, minus5h, plus20h, hz_res3, hz_res3);
+
+    SRARI_W2_SW(hz_res0, hz_res1, 10);
+    SAT_SW2_SW(hz_res0, hz_res1, 7);
+    SRARI_W2_SW(hz_res2, hz_res3, 10);
+    SAT_SW2_SW(hz_res2, hz_res3, 7);
+
+    dst0 = __msa_srari_h(shf_vec2, 5);
+    dst1 = __msa_srari_h(shf_vec5, 5);
+    dst2 = __msa_srari_h(shf_vec6, 5);
+    dst3 = __msa_srari_h(shf_vec7, 5);
+
+    SAT_SH2_SH(dst0, dst1, 7);
+    SAT_SH2_SH(dst2, dst3, 7);
+    ILVEV_H2_SH(dst0, zeros, dst1, zeros, dst0, dst1);
+    ILVEV_H2_SH(dst2, zeros, dst3, zeros, dst2, dst3);
+
+    hz_res0 = __msa_aver_s_w(hz_res0, (v4i32) dst0);
+    hz_res1 = __msa_aver_s_w(hz_res1, (v4i32) dst1);
+    hz_res2 = __msa_aver_s_w(hz_res2, (v4i32) dst2);
+    hz_res3 = __msa_aver_s_w(hz_res3, (v4i32) dst3);
+
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dstv);
+    PCKEV_H2_SH(hz_res1, hz_res0, hz_res3, hz_res2, dst0, dst2);
+    out = PCKEV_XORI128_UB(dst0, dst2);
+    out = __msa_aver_u_b(out, dstv);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
 }
 
 void ff_avg_h264_qpel4_mc32_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_midh_qrt_and_aver_dst_4w_msa(src - (2 * stride) - 2,
-                                          stride, dst, stride, 4, 1);
+    uint32_t tp0, tp1, tp2, tp3;
+    const int16_t filt_const0 = 0xfb01;
+    const int16_t filt_const1 = 0x1414;
+    const int16_t filt_const2 = 0x1fb;
+    v16u8 out, dstv = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src21_r, src32_r, src43_r, src54_r, src65_r, src76_r;
+    v16i8 src87_r, src10_l, src21_l, src32_l, src43_l, src54_l, src65_l;
+    v16i8 src76_l, src87_l, filt0, filt1, filt2;
+    v8i16 vt_res0, vt_res1, vt_res2, vt_res3, dst0, dst1, dst2, dst3, shf_vec7;
+    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5, shf_vec6;
+    v4i32 hz_res0, hz_res1, hz_res2, hz_res3;
+    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
+    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
+    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
+    v8i16 minus5h = __msa_ldi_h(-5);
+    v8i16 plus20h = __msa_ldi_h(20);
+    v8i16 zeros = { 0 };
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    LD_SB4(src, stride, src5, src6, src7, src8);
+    XORI_B4_128_SB(src5, src6, src7, src8);
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
+               src76_r, src87_r);
+    ILVL_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_l, src21_l,
+               src32_l, src43_l);
+    ILVL_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_l, src65_l,
+               src76_l, src87_l);
+    vt_res0 = AVC_DOT_SH3_SH(src10_r, src32_r, src54_r, filt0, filt1, filt2);
+    vt_res1 = AVC_DOT_SH3_SH(src10_l, src32_l, src54_l, filt0, filt1, filt2);
+    vt_res2 = AVC_DOT_SH3_SH(src21_r, src43_r, src65_r, filt0, filt1, filt2);
+    vt_res3 = AVC_DOT_SH3_SH(src21_l, src43_l, src65_l, filt0, filt1, filt2);
+    VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+               mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
+    VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+               mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
+    hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+    DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
+    hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+    DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
+
+    vt_res0 = AVC_DOT_SH3_SH(src32_r, src54_r, src76_r, filt0, filt1, filt2);
+    vt_res1 = AVC_DOT_SH3_SH(src32_l, src54_l, src76_l, filt0, filt1, filt2);
+    vt_res2 = AVC_DOT_SH3_SH(src43_r, src65_r, src87_r, filt0, filt1, filt2);
+    vt_res3 = AVC_DOT_SH3_SH(src43_l, src65_l, src87_l, filt0, filt1, filt2);
+    VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1, mask0,
+               mask1, mask2, shf_vec0, shf_vec1, shf_vec6);
+    VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3, mask0,
+               mask1, mask2, shf_vec3, shf_vec4, shf_vec7);
+    hz_res2 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+    DPADD_SH2_SW(shf_vec1, shf_vec6, minus5h, plus20h, hz_res2, hz_res2);
+    hz_res3 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+    DPADD_SH2_SW(shf_vec4, shf_vec7, minus5h, plus20h, hz_res3, hz_res3);
+
+    SRARI_W2_SW(hz_res0, hz_res1, 10);
+    SAT_SW2_SW(hz_res0, hz_res1, 7);
+    SRARI_W2_SW(hz_res2, hz_res3, 10);
+    SAT_SW2_SW(hz_res2, hz_res3, 7);
+
+    dst0 = __msa_srari_h(shf_vec2, 5);
+    dst1 = __msa_srari_h(shf_vec5, 5);
+    dst2 = __msa_srari_h(shf_vec6, 5);
+    dst3 = __msa_srari_h(shf_vec7, 5);
+
+    SAT_SH2_SH(dst0, dst1, 7);
+    SAT_SH2_SH(dst2, dst3, 7);
+
+    dst0 = __msa_ilvod_h(zeros, dst0);
+    dst1 = __msa_ilvod_h(zeros, dst1);
+    dst2 = __msa_ilvod_h(zeros, dst2);
+    dst3 = __msa_ilvod_h(zeros, dst3);
+
+    hz_res0 = __msa_aver_s_w(hz_res0, (v4i32) dst0);
+    hz_res1 = __msa_aver_s_w(hz_res1, (v4i32) dst1);
+    hz_res2 = __msa_aver_s_w(hz_res2, (v4i32) dst2);
+    hz_res3 = __msa_aver_s_w(hz_res3, (v4i32) dst3);
+
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dstv);
+    PCKEV_H2_SH(hz_res1, hz_res0, hz_res3, hz_res2, dst0, dst2);
+    out = PCKEV_XORI128_UB(dst0, dst2);
+    out = __msa_aver_u_b(out, dstv);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
 }
 
 void ff_avg_h264_qpel16_mc22_msa(uint8_t *dst, const uint8_t *src,
                                  ptrdiff_t stride)
 {
-    avc_luma_mid_and_aver_dst_16x16_msa(src - (2 * stride) - 2,
-                                        stride, dst, stride);
+    const int32_t filt_const0 = 0xfffb0001;
+    const int32_t filt_const1 = 0x140014;
+    const int32_t filt_const2 = 0x1fffb;
+    const uint8_t *src_tmp = src - (2 * stride) - 2;
+    uint8_t *dst_tmp = dst;
+    uint64_t tp0, tp1, tp2, tp3;
+    uint32_t multiple8_cnt, loop_cnt;
+    v16u8 dst0, dst1, out0, out1;
+    v16i8 src0, src1, src2, src3, src4, mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, res0, res1, res2, res3;
+    v8i16 hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r, hz_out54_r;
+    v8i16 hz_out65_r, hz_out76_r, hz_out87_r, hz_out10_l, hz_out21_l;
+    v8i16 hz_out32_l, hz_out43_l, hz_out54_l, hz_out65_l, hz_out76_l;
+    v8i16 hz_out87_l, filt0, filt1, filt2;
+    v4i32 tmp0, tmp1;
+
+    filt0 = (v8i16) __msa_fill_w(filt_const0);
+    filt1 = (v8i16) __msa_fill_w(filt_const1);
+    filt2 = (v8i16) __msa_fill_w(filt_const2);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        src = src_tmp;
+        dst = dst_tmp;
+
+        LD_SB5(src, stride, src0, src1, src2, src3, src4);
+        XORI_B5_128_SB(src0, src1, src2, src3, src4);
+        src += (5 * stride);
+
+        hz_out0 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
+        hz_out1 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
+        hz_out2 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
+        hz_out3 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
+        hz_out4 = AVC_HORZ_FILTER_SH(src4, src4, mask0, mask1, mask2);
+
+        for (loop_cnt = 4; loop_cnt--;) {
+            LD_SB4(src, stride, src0, src1, src2, src3);
+            XORI_B4_128_SB(src0, src1, src2, src3);
+            src += (4 * stride);
+
+            hz_out5 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
+            hz_out6 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
+            hz_out7 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
+            hz_out8 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
+            ILVR_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2,
+                       hz_out4, hz_out3, hz_out10_r, hz_out21_r, hz_out32_r,
+                       hz_out43_r);
+            ILVL_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2,
+                       hz_out4, hz_out3, hz_out10_l, hz_out21_l, hz_out32_l,
+                       hz_out43_l);
+            ILVR_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6,
+                       hz_out8, hz_out7, hz_out54_r, hz_out65_r, hz_out76_r,
+                       hz_out87_r);
+            ILVL_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6,
+                       hz_out8, hz_out7, hz_out54_l, hz_out65_l, hz_out76_l,
+                       hz_out87_l);
+
+            tmp0 = AVC_DOT_SW3_SW(hz_out10_r, hz_out32_r, hz_out54_r, filt0,
+                                  filt1, filt2);
+            tmp1 = AVC_DOT_SW3_SW(hz_out10_l, hz_out32_l, hz_out54_l, filt0,
+                                  filt1, filt2);
+            res0 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+            tmp0 = AVC_DOT_SW3_SW(hz_out21_r, hz_out43_r, hz_out65_r, filt0,
+                                  filt1, filt2);
+            tmp1 = AVC_DOT_SW3_SW(hz_out21_l, hz_out43_l, hz_out65_l, filt0,
+                                  filt1, filt2);
+            res1 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+            tmp0 = AVC_DOT_SW3_SW(hz_out32_r, hz_out54_r, hz_out76_r, filt0,
+                                  filt1, filt2);
+            tmp1 = AVC_DOT_SW3_SW(hz_out32_l, hz_out54_l, hz_out76_l, filt0,
+                                  filt1, filt2);
+            res2 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+            tmp0 = AVC_DOT_SW3_SW(hz_out43_r, hz_out65_r, hz_out87_r, filt0,
+                                  filt1, filt2);
+            tmp1 = AVC_DOT_SW3_SW(hz_out43_l, hz_out65_l, hz_out87_l, filt0,
+                                  filt1, filt2);
+            res3 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+
+            LD4(dst, stride, tp0, tp1, tp2, tp3);
+            INSERT_D2_UB(tp0, tp1, dst0);
+            INSERT_D2_UB(tp2, tp3, dst1);
+            out0 = PCKEV_XORI128_UB(res0, res1);
+            out1 = PCKEV_XORI128_UB(res2, res3);
+            AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1);
+            ST8x4_UB(out0, out1, dst, stride);
+            dst += (4 * stride);
+
+            hz_out0 = hz_out4;
+            hz_out1 = hz_out5;
+            hz_out2 = hz_out6;
+            hz_out3 = hz_out7;
+            hz_out4 = hz_out8;
+        }
+
+        src_tmp += 8;
+        dst_tmp += 8;
+    }
 }
 
 void ff_avg_h264_qpel8_mc22_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_mid_and_aver_dst_8w_msa(src - (2 * stride) - 2,
-                                     stride, dst, stride, 8);
+    const int32_t filt_const0 = 0xfffb0001;
+    const int32_t filt_const1 = 0x140014;
+    const int32_t filt_const2 = 0x1fffb;
+    uint64_t tp0, tp1, tp2, tp3;
+    v16u8 out0, out1, dst0 = { 0 }, dst1 = { 0 };
+    v16i8 src0, src1, src2, src3, src4, mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, hz_out9, hz_out10, hz_out11, hz_out12;
+    v8i16 hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r, hz_out54_r;
+    v8i16 hz_out65_r, hz_out76_r, hz_out87_r, hz_out89_r, hz_out910_r;
+    v8i16 hz_out1110_r, hz_out1211_r, res0, res1, res2, res3;
+    v8i16 hz_out10_l, hz_out21_l, hz_out32_l, hz_out43_l, hz_out54_l;
+    v8i16 hz_out65_l, hz_out76_l, hz_out87_l, hz_out89_l, hz_out910_l;
+    v8i16 hz_out1110_l, hz_out1211_l, filt0, filt1, filt2;
+    v4i32 tmp0, tmp1;
+
+    filt0 = (v8i16) __msa_fill_w(filt_const0);
+    filt1 = (v8i16) __msa_fill_w(filt_const1);
+    filt2 = (v8i16) __msa_fill_w(filt_const2);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    src -= ((2 * stride) + 2);
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    src += (5 * stride);
+
+    hz_out0 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
+    hz_out1 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
+    hz_out3 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, src4, mask0, mask1, mask2);
+
+    LD_SB4(src, stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    src += (4 * stride);
+    hz_out5 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
+    hz_out6 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
+    hz_out7 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
+    hz_out8 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
+    ILVR_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2, hz_out4,
+               hz_out3, hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r);
+    ILVL_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2, hz_out4,
+               hz_out3, hz_out10_l, hz_out21_l, hz_out32_l, hz_out43_l);
+    ILVR_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6, hz_out8,
+               hz_out7, hz_out54_r, hz_out65_r, hz_out76_r, hz_out87_r);
+    ILVL_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6, hz_out8,
+               hz_out7, hz_out54_l, hz_out65_l, hz_out76_l, hz_out87_l);
+
+    tmp0 = AVC_DOT_SW3_SW(hz_out10_r, hz_out32_r, hz_out54_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out10_l, hz_out32_l, hz_out54_l, filt0, filt1,
+                          filt2);
+    res0 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out21_r, hz_out43_r, hz_out65_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out21_l, hz_out43_l, hz_out65_l, filt0, filt1,
+                          filt2);
+    res1 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out32_r, hz_out54_r, hz_out76_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out32_l, hz_out54_l, hz_out76_l, filt0, filt1,
+                          filt2);
+    res2 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out43_r, hz_out65_r, hz_out87_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out43_l, hz_out65_l, hz_out87_l, filt0, filt1,
+                          filt2);
+    res3 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+    out0 = PCKEV_XORI128_UB(res0, res1);
+    out1 = PCKEV_XORI128_UB(res2, res3);
+    AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
+    ST8x4_UB(dst0, dst1, dst, stride);
+    dst += (4 * stride);
+
+    LD_SB4(src, stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    hz_out9 = AVC_HORZ_FILTER_SH(src0, src0, mask0, mask1, mask2);
+    hz_out10 = AVC_HORZ_FILTER_SH(src1, src1, mask0, mask1, mask2);
+    hz_out11 = AVC_HORZ_FILTER_SH(src2, src2, mask0, mask1, mask2);
+    hz_out12 = AVC_HORZ_FILTER_SH(src3, src3, mask0, mask1, mask2);
+    ILVR_H4_SH(hz_out9, hz_out8, hz_out10, hz_out9, hz_out11, hz_out10,
+               hz_out12, hz_out11, hz_out89_r, hz_out910_r, hz_out1110_r,
+               hz_out1211_r);
+    ILVL_H4_SH(hz_out9, hz_out8, hz_out10, hz_out9, hz_out11, hz_out10,
+               hz_out12, hz_out11, hz_out89_l, hz_out910_l, hz_out1110_l,
+               hz_out1211_l);
+    tmp0 = AVC_DOT_SW3_SW(hz_out54_r, hz_out76_r, hz_out89_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out54_l, hz_out76_l, hz_out89_l, filt0, filt1,
+                          filt2);
+    res0 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out65_r, hz_out87_r, hz_out910_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out65_l, hz_out87_l, hz_out910_l, filt0, filt1,
+                          filt2);
+    res1 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out76_r, hz_out89_r, hz_out1110_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out76_l, hz_out89_l, hz_out1110_l, filt0, filt1,
+                          filt2);
+    res2 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out87_r, hz_out910_r, hz_out1211_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out87_l, hz_out910_l, hz_out1211_l, filt0, filt1,
+                          filt2);
+    res3 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    LD4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_D2_UB(tp0, tp1, dst0);
+    INSERT_D2_UB(tp2, tp3, dst1);
+    out0 = PCKEV_XORI128_UB(res0, res1);
+    out1 = PCKEV_XORI128_UB(res2, res3);
+    AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
+    ST8x4_UB(dst0, dst1, dst, stride);
 }
 
 void ff_avg_h264_qpel4_mc22_msa(uint8_t *dst, const uint8_t *src,
                                 ptrdiff_t stride)
 {
-    avc_luma_mid_and_aver_dst_4x4_msa(src - (2 * stride) - 2,
-                                      stride, dst, stride);
+    const int32_t filt_const0 = 0xfffb0001;
+    const int32_t filt_const1 = 0x140014;
+    const int32_t filt_const2 = 0x1fffb;
+    uint32_t tp0, tp1, tp2, tp3;
+    v16u8 res, dst0 = { 0 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, res0, res1, filt0, filt1, filt2;
+    v8i16 hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r, hz_out54_r;
+    v8i16 hz_out65_r, hz_out76_r, hz_out87_r;
+    v4i32 tmp0, tmp1;
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+
+    filt0 = (v8i16) __msa_fill_w(filt_const0);
+    filt1 = (v8i16) __msa_fill_w(filt_const1);
+    filt2 = (v8i16) __msa_fill_w(filt_const2);
+
+    src -= ((2 * stride) + 2);
+
+    LD_SB5(src, stride, src0, src1, src2, src3, src4);
+    src += (5 * stride);
+    LD_SB4(src, stride, src5, src6, src7, src8);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    XORI_B4_128_SB(src5, src6, src7, src8);
+    hz_out0 = AVC_HORZ_FILTER_SH(src0, src1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src2, src3, mask0, mask1, mask2);
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, src5, mask0, mask1, mask2);
+    hz_out6 = AVC_HORZ_FILTER_SH(src6, src7, mask0, mask1, mask2);
+    hz_out8 = AVC_HORZ_FILTER_SH(src8, src8, mask0, mask1, mask2);
+    PCKOD_D2_SH(hz_out0, hz_out0, hz_out2, hz_out2, hz_out1, hz_out3);
+    PCKOD_D2_SH(hz_out4, hz_out4, hz_out6, hz_out6, hz_out5, hz_out7);
+    ILVR_H4_SH(hz_out1, hz_out0, hz_out2, hz_out1, hz_out3, hz_out2, hz_out4,
+               hz_out3, hz_out10_r, hz_out21_r, hz_out32_r, hz_out43_r);
+    ILVR_H4_SH(hz_out5, hz_out4, hz_out6, hz_out5, hz_out7, hz_out6, hz_out8,
+               hz_out7, hz_out54_r, hz_out65_r, hz_out76_r, hz_out87_r);
+
+    tmp0 = AVC_DOT_SW3_SW(hz_out10_r, hz_out32_r, hz_out54_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out21_r, hz_out43_r, hz_out65_r, filt0, filt1,
+                          filt2);
+    res0 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    tmp0 = AVC_DOT_SW3_SW(hz_out32_r, hz_out54_r, hz_out76_r, filt0, filt1,
+                          filt2);
+    tmp1 = AVC_DOT_SW3_SW(hz_out43_r, hz_out65_r, hz_out87_r, filt0, filt1,
+                          filt2);
+    res1 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
+    LW4(dst, stride, tp0, tp1, tp2, tp3);
+    INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
+    res = PCKEV_XORI128_UB(res0, res1);
+    res = __msa_aver_u_b(res, dst0);
+    ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride);
 }

diff --git a/libavcodec/mips/hevc_macros_msa.h b/libavcodec/mips/hevc_macros_msa.h
index 7dcfea0..ea53812 100644
--- a/libavcodec/mips/hevc_macros_msa.h
+++ b/libavcodec/mips/hevc_macros_msa.h

@@ -21,43 +21,6 @@
 #ifndef AVCODEC_MIPS_HEVC_MACROS_MSA_H
 #define AVCODEC_MIPS_HEVC_MACROS_MSA_H
 
-#define HEVC_PCK_SW_SB2(in0, in1, out)                            \
-{                                                                 \
-    v8i16 tmp0_m;                                                 \
-                                                                  \
-    tmp0_m = __msa_pckev_h((v8i16) in0, (v8i16) in1);             \
-    out = (v4i32) __msa_pckev_b((v16i8) tmp0_m, (v16i8) tmp0_m);  \
-}
-
-#define HEVC_PCK_SW_SB4(in0, in1, in2, in3, out)                  \
-{                                                                 \
-    v8i16 tmp0_m, tmp1_m;                                         \
-                                                                  \
-    PCKEV_H2_SH(in0, in1, in2, in3, tmp0_m, tmp1_m);              \
-    out = (v4i32) __msa_pckev_b((v16i8) tmp1_m, (v16i8) tmp0_m);  \
-}
-
-#define HEVC_PCK_SW_SB8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1)  \
-{                                                                            \
-    v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                    \
-                                                                             \
-    PCKEV_H4_SH(in0, in1, in2, in3, in4, in5, in6, in7,                      \
-                tmp0_m, tmp1_m, tmp2_m, tmp3_m);                             \
-    PCKEV_B2_SW(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out1);                 \
-}
-
-#define HEVC_PCK_SW_SB12(in0, in1, in2, in3, in4, in5, in6, in7,   \
-                         in8, in9, in10, in11, out0, out1, out2)   \
-{                                                                  \
-    v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m, tmp4_m, tmp5_m;          \
-                                                                   \
-    PCKEV_H4_SH(in0, in1, in2, in3, in4, in5, in6, in7,            \
-                tmp0_m, tmp1_m, tmp2_m, tmp3_m);                   \
-    PCKEV_H2_SH(in8, in9, in10, in11, tmp4_m, tmp5_m);             \
-    PCKEV_B2_SW(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out1);       \
-    out2 = (v4i32) __msa_pckev_b((v16i8) tmp5_m, (v16i8) tmp4_m);  \
-}
-
 #define HEVC_FILT_8TAP_SH(in0, in1, in2, in3,                    \
                           filt0, filt1, filt2, filt3)            \
 ( {                                                              \
@@ -80,6 +43,15 @@
     out_m;                                                       \
 } )
 
+#define HEVC_FILT_4TAP_SH(in0, in1, filt0, filt1)                \
+( {                                                              \
+    v8i16 out_m;                                                 \
+                                                                 \
+    out_m = __msa_dotp_s_h((v16i8) in0, (v16i8) filt0);          \
+    out_m = __msa_dpadd_s_h(out_m, (v16i8) in1, (v16i8) filt1);  \
+    out_m;                                                       \
+} )
+
 #define HEVC_FILT_4TAP(in0, in1, filt0, filt1)           \
 ( {                                                      \
     v4i32 out_m;                                         \

diff --git a/libavcodec/mips/hevc_mc_bi_msa.c b/libavcodec/mips/hevc_mc_bi_msa.c
index 8208be3..b555517 100644
--- a/libavcodec/mips/hevc_mc_bi_msa.c
+++ b/libavcodec/mips/hevc_mc_bi_msa.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ * Copyright (c) 2015 - 2017 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
  *
  * This file is part of FFmpeg.
  *
@@ -22,6 +22,12 @@
 #include "libavcodec/mips/hevcdsp_mips.h"
 #include "libavcodec/mips/hevc_macros_msa.h"
 
+static const uint8_t ff_hevc_mask_arr[16 * 2] __attribute__((aligned(0x40))) = {
+    /* 8 width cases */
+    0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+    0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20
+};
+
 #define HEVC_BI_RND_CLIP2(in0, in1, vec0, vec1, rnd_val, out0, out1)  \
 {                                                                     \
     ADDS_SH2_SH(vec0, in0, vec1, in1, out0, out1);                    \
@@ -37,6 +43,21 @@
     HEVC_BI_RND_CLIP2(in2, in3, vec2, vec3, rnd_val, out2, out3);  \
 }
 
+#define HEVC_BI_RND_CLIP2_MAX_SATU(in0, in1, vec0, vec1, rnd_val,  \
+                                   out0, out1)                     \
+{                                                                  \
+    ADDS_SH2_SH(vec0, in0, vec1, in1, out0, out1);                 \
+    SRARI_H2_SH(out0, out1, rnd_val);                              \
+    CLIP_SH2_0_255_MAX_SATU(out0, out1);                           \
+}
+
+#define HEVC_BI_RND_CLIP4_MAX_SATU(in0, in1, in2, in3, vec0, vec1, vec2,    \
+                                   vec3,  rnd_val, out0, out1, out2, out3)  \
+{                                                                           \
+    HEVC_BI_RND_CLIP2_MAX_SATU(in0, in1, vec0, vec1, rnd_val, out0, out1);  \
+    HEVC_BI_RND_CLIP2_MAX_SATU(in2, in3, vec2, vec3, rnd_val, out2, out3);  \
+}
+
 static void hevc_bi_copy_4w_msa(uint8_t *src0_ptr,
                                 int32_t src_stride,
                                 int16_t *src1_ptr,
@@ -45,71 +66,59 @@
                                 int32_t dst_stride,
                                 int32_t height)
 {
+    uint32_t loop_cnt, tp0, tp1, tp2, tp3;
+    uint64_t tpd0, tpd1, tpd2, tpd3;
+    v16i8 src0 = { 0 }, src1 = { 0 };
     v16i8 zero = { 0 };
+    v8i16 in0 = { 0 }, in1 = { 0 }, in2 = { 0 }, in3 = { 0 };
+    v8i16 dst0, dst1, dst2, dst3;
 
     if (2 == height) {
-        v16i8 src0, src1;
-        v8i16 dst0, in0, in1;
-
-        LD_SB2(src0_ptr, src_stride, src0, src1);
-        LD_SH2(src1_ptr, src2_stride, in0, in1);
-
-        src0 = (v16i8) __msa_ilvr_w((v4i32) src1, (v4i32) src0);
-        in0 = (v8i16) __msa_ilvr_d((v2i64) in1, (v2i64) in0);
+        LW2(src0_ptr, src_stride, tp0, tp1);
+        INSERT_W2_SB(tp0, tp1, src0);
+        LD2(src1_ptr, src2_stride, tpd0, tpd1);
+        INSERT_D2_SH(tpd0, tpd1, in0);
 
         dst0 = (v8i16) __msa_ilvr_b(zero, src0);
         dst0 <<= 6;
         dst0 += in0;
         dst0 = __msa_srari_h(dst0, 7);
-        dst0 = CLIP_SH_0_255(dst0);
+        dst0 = CLIP_SH_0_255_MAX_SATU(dst0);
 
         dst0 = (v8i16) __msa_pckev_b((v16i8) dst0, (v16i8) dst0);
         ST4x2_UB(dst0, dst, dst_stride);
     } else if (4 == height) {
-        v16i8 src0, src1, src2, src3;
-        v8i16 dst0, dst1;
-        v8i16 in0, in1, in2, in3;
-
-        LD_SB4(src0_ptr, src_stride, src0, src1, src2, src3);
-        LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
-        ILVR_W2_SB(src1, src0, src3, src2, src0, src1);
-        ILVR_D2_SH(in1, in0, in3, in2, in0, in1);
-        ILVR_B2_SH(zero, src0, zero, src1, dst0, dst1);
-
-        dst0 <<= 6;
-        dst1 <<= 6;
-        HEVC_BI_RND_CLIP2(in0, in1, dst0, dst1, 7, dst0, dst1);
-
+        LW4(src0_ptr, src_stride, tp0, tp1, tp2, tp3);
+        INSERT_W4_SB(tp0, tp1, tp2, tp3, src0);
+        LD4(src1_ptr, src2_stride, tpd0, tpd1, tpd2, tpd3);
+        INSERT_D2_SH(tpd0, tpd1, in0);
+        INSERT_D2_SH(tpd2, tpd3, in1);
+        ILVRL_B2_SH(zero, src0, dst0, dst1);
+        SLLI_2V(dst0, dst1, 6);
+        HEVC_BI_RND_CLIP2_MAX_SATU(in0, in1, dst0, dst1, 7, dst0, dst1);
         dst0 = (v8i16) __msa_pckev_b((v16i8) dst1, (v16i8) dst0);
         ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, dst_stride);
     } else if (0 == height % 8) {
-        uint32_t loop_cnt;
-        v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
-        v8i16 dst0, dst1, dst2, dst3;
-        v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
-
         for (loop_cnt = (height >> 3); loop_cnt--;) {
-            LD_SB8(src0_ptr, src_stride,
-                   src0, src1, src2, src3, src4, src5, src6, src7);
-            src0_ptr += (8 * src_stride);
-
-            LD_SH8(src1_ptr, src2_stride,
-                   in0, in1, in2, in3, in4, in5, in6, in7);
-            src1_ptr += (8 * src2_stride);
-
-            ILVR_D2_SH(in1, in0, in3, in2, in0, in1);
-            ILVR_D2_SH(in5, in4, in7, in6, in2, in3);
-
-            ILVR_W4_SB(src1, src0, src3, src2, src5, src4, src7, src6,
-                       src0, src1, src2, src3);
-            ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3,
-                       dst0, dst1, dst2, dst3);
-
+            LW4(src0_ptr, src_stride, tp0, tp1, tp2, tp3);
+            src0_ptr += 4 * src_stride;
+            INSERT_W4_SB(tp0, tp1, tp2, tp3, src0);
+            LW4(src0_ptr, src_stride, tp0, tp1, tp2, tp3);
+            src0_ptr += 4 * src_stride;
+            INSERT_W4_SB(tp0, tp1, tp2, tp3, src1);
+            LD4(src1_ptr, src2_stride, tpd0, tpd1, tpd2, tpd3);
+            src1_ptr += (4 * src2_stride);
+            INSERT_D2_SH(tpd0, tpd1, in0);
+            INSERT_D2_SH(tpd2, tpd3, in1);
+            LD4(src1_ptr, src2_stride, tpd0, tpd1, tpd2, tpd3);
+            src1_ptr += (4 * src2_stride);
+            INSERT_D2_SH(tpd0, tpd1, in2);
+            INSERT_D2_SH(tpd2, tpd3, in3);
+            ILVRL_B2_SH(zero, src0, dst0, dst1);
+            ILVRL_B2_SH(zero, src1, dst2, dst3);
             SLLI_4V(dst0, dst1, dst2, dst3, 6);
-            HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
-                              dst0, dst1, dst2, dst3, 7,
-                              dst0, dst1, dst2, dst3);
-
+            HEVC_BI_RND_CLIP4_MAX_SATU(in0, in1, in2, in3, dst0, dst1, dst2,
+                                       dst3, 7, dst0, dst1, dst2, dst3);
             PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
             ST4x8_UB(dst0, dst1, dst, dst_stride);
             dst += (8 * dst_stride);
@@ -126,36 +135,39 @@
                                 int32_t height)
 {
     uint32_t loop_cnt;
+    uint64_t tp0, tp1, tp2, tp3;
+    v16u8 out0, out1, out2, out3;
     v16i8 zero = { 0 };
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16i8 src0 = { 0 }, src1 = { 0 }, src2 = { 0 }, src3 = { 0 };
     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
 
     for (loop_cnt = (height >> 3); loop_cnt--;) {
-        LD_SB8(src0_ptr, src_stride,
-               src0, src1, src2, src3, src4, src5, src6, src7);
-        src0_ptr += (8 * src_stride);
+        LD4(src0_ptr, src_stride, tp0, tp1, tp2, tp3);
+        src0_ptr += (4 * src_stride);
+        INSERT_D2_SB(tp0, tp1, src0);
+        INSERT_D2_SB(tp2, tp3, src1);
+        LD4(src0_ptr, src_stride, tp0, tp1, tp2, tp3);
+        src0_ptr += (4 * src_stride);
+        INSERT_D2_SB(tp0, tp1, src2);
+        INSERT_D2_SB(tp2, tp3, src3);
         LD_SH8(src1_ptr, src2_stride, in0, in1, in2, in3, in4, in5, in6, in7);
         src1_ptr += (8 * src2_stride);
-        ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3,
-                   dst0, dst1, dst2, dst3);
-        ILVR_B4_SH(zero, src4, zero, src5, zero, src6, zero, src7,
-                   dst4, dst5, dst6, dst7);
-
+        ILVRL_B2_SH(zero, src0, dst0, dst1);
+        ILVRL_B2_SH(zero, src1, dst2, dst3);
+        ILVRL_B2_SH(zero, src2, dst4, dst5);
+        ILVRL_B2_SH(zero, src3, dst6, dst7);
         SLLI_4V(dst0, dst1, dst2, dst3, 6);
         SLLI_4V(dst4, dst5, dst6, dst7, 6);
-        HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
-                          dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
-
-        PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
-        ST6x4_UB(dst0, dst1, dst, dst_stride);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in0, in1, in2, in3, dst0, dst1, dst2, dst3,
+                                   7, dst0, dst1, dst2, dst3);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in4, in5, in6, in7, dst4, dst5, dst6, dst7,
+                                   7, dst4, dst5, dst6, dst7);
+        PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+        PCKEV_B2_UB(dst5, dst4, dst7, dst6, out2, out3);
+        ST6x4_UB(out0, out1, dst, dst_stride);
         dst += (4 * dst_stride);
-
-        HEVC_BI_RND_CLIP4(in4, in5, in6, in7,
-                          dst4, dst5, dst6, dst7, 7, dst4, dst5, dst6, dst7);
-
-        PCKEV_B2_SH(dst5, dst4, dst7, dst6, dst4, dst5);
-        ST6x4_UB(dst4, dst5, dst, dst_stride);
+        ST6x4_UB(out2, out3, dst, dst_stride);
         dst += (4 * dst_stride);
     }
 }
@@ -168,100 +180,83 @@
                                 int32_t dst_stride,
                                 int32_t height)
 {
+    uint64_t tp0, tp1, tp2, tp3;
+    v16u8 out0, out1, out2, out3;
+    v16i8 src0 = { 0 }, src1 = { 0 }, src2 = { 0 }, src3 = { 0 };
     v16i8 zero = { 0 };
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
 
     if (2 == height) {
-        v16i8 src0, src1;
-        v8i16 in0, in1;
-        v8i16 dst0, dst1;
-
-        LD_SB2(src0_ptr, src_stride, src0, src1);
+        LD2(src0_ptr, src_stride, tp0, tp1);
+        INSERT_D2_SB(tp0, tp1, src0);
         LD_SH2(src1_ptr, src2_stride, in0, in1);
-        ILVR_B2_SH(zero, src0, zero, src1, dst0, dst1);
-
-        dst0 <<= 6;
-        dst1 <<= 6;
-        HEVC_BI_RND_CLIP2(in0, in1, dst0, dst1, 7, dst0, dst1);
-
-        dst0 = (v8i16) __msa_pckev_b((v16i8) dst1, (v16i8) dst0);
-        ST8x2_UB(dst0, dst, dst_stride);
+        ILVRL_B2_SH(zero, src0, dst0, dst1);
+        SLLI_2V(dst0, dst1, 6);
+        HEVC_BI_RND_CLIP2_MAX_SATU(in0, in1, dst0, dst1, 7, dst0, dst1);
+        out0 = (v16u8) __msa_pckev_b((v16i8) dst1, (v16i8) dst0);
+        ST8x2_UB(out0, dst, dst_stride);
     } else if (4 == height) {
-        v16i8 src0, src1, src2, src3;
-        v8i16 in0, in1, in2, in3;
-        v8i16 dst0, dst1, dst2, dst3;
-
-        LD_SB4(src0_ptr, src_stride, src0, src1, src2, src3);
+        LD4(src0_ptr, src_stride, tp0, tp1, tp2, tp3);
+        INSERT_D2_SB(tp0, tp1, src0);
+        INSERT_D2_SB(tp2, tp3, src1);
+        ILVRL_B2_SH(zero, src0, dst0, dst1);
+        ILVRL_B2_SH(zero, src1, dst2, dst3);
         LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
-        ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3,
-                   dst0, dst1, dst2, dst3);
-
         SLLI_4V(dst0, dst1, dst2, dst3, 6);
-        HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
-                          dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
-
-        PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
-        ST8x4_UB(dst0, dst1, dst, dst_stride);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in0, in1, in2, in3, dst0, dst1, dst2, dst3,
+                                   7, dst0, dst1, dst2, dst3);
+        PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
     } else if (6 == height) {
-        v16i8 src0, src1, src2, src3, src4, src5;
-        v8i16 in0, in1, in2, in3, in4, in5;
-        v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-
-        LD_SB6(src0_ptr, src_stride, src0, src1, src2, src3, src4, src5);
+        LD4(src0_ptr, src_stride, tp0, tp1, tp2, tp3);
+        src0_ptr += 4 * src_stride;
+        INSERT_D2_SB(tp0, tp1, src0);
+        INSERT_D2_SB(tp2, tp3, src1);
+        LD2(src0_ptr, src_stride, tp0, tp1);
+        INSERT_D2_SB(tp0, tp1, src2);
+        ILVRL_B2_SH(zero, src0, dst0, dst1);
+        ILVRL_B2_SH(zero, src1, dst2, dst3);
+        ILVRL_B2_SH(zero, src2, dst4, dst5);
         LD_SH6(src1_ptr, src2_stride, in0, in1, in2, in3, in4, in5);
-        ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3,
-                   dst0, dst1, dst2, dst3);
-        ILVR_B2_SH(zero, src4, zero, src5, dst4, dst5);
-
         SLLI_4V(dst0, dst1, dst2, dst3, 6);
-        dst4 <<= 6;
-        dst5 <<= 6;
-        HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
-                          dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
-        HEVC_BI_RND_CLIP2(in4, in5, dst4, dst5, 7, dst4, dst5);
-
-        PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
-        dst2 = (v8i16) __msa_pckev_b((v16i8) dst5, (v16i8) dst4);
-        ST8x4_UB(dst0, dst1, dst, dst_stride);
+        SLLI_2V(dst4, dst5, 6);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in0, in1, in2, in3, dst0, dst1, dst2, dst3,
+                                   7, dst0, dst1, dst2, dst3);
+        HEVC_BI_RND_CLIP2_MAX_SATU(in4, in5, dst4, dst5, 7, dst4, dst5);
+        PCKEV_B3_UB(dst1, dst0, dst3, dst2, dst5, dst4, out0, out1, out2);
+        ST8x4_UB(out0, out1, dst, dst_stride);
         dst += (4 * dst_stride);
-        ST8x2_UB(dst2, dst, dst_stride);
+        ST8x2_UB(out2, dst, dst_stride);
     } else if (0 == height % 8) {
-        v16i8 src0, src1, src2, src3;
-        v8i16 in0, in1, in2, in3;
-        v8i16 dst0, dst1, dst2, dst3;
         uint32_t loop_cnt;
 
         for (loop_cnt = (height >> 3); loop_cnt--;) {
-            LD_SB4(src0_ptr, src_stride, src0, src1, src2, src3);
-            src0_ptr += (4 * src_stride);
-            LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
-            src1_ptr += (4 * src2_stride);
-            ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3,
-                       dst0, dst1, dst2, dst3);
-
+            LD4(src0_ptr, src_stride, tp0, tp1, tp2, tp3);
+            src0_ptr += 4 * src_stride;
+            INSERT_D2_SB(tp0, tp1, src0);
+            INSERT_D2_SB(tp2, tp3, src1);
+            LD4(src0_ptr, src_stride, tp0, tp1, tp2, tp3);
+            src0_ptr += 4 * src_stride;
+            INSERT_D2_SB(tp0, tp1, src2);
+            INSERT_D2_SB(tp2, tp3, src3);
+            ILVRL_B2_SH(zero, src0, dst0, dst1);
+            ILVRL_B2_SH(zero, src1, dst2, dst3);
+            ILVRL_B2_SH(zero, src2, dst4, dst5);
+            ILVRL_B2_SH(zero, src3, dst6, dst7);
+            LD_SH8(src1_ptr, src2_stride, in0, in1, in2, in3, in4, in5, in6,
+                   in7);
+            src1_ptr += (8 * src2_stride);
             SLLI_4V(dst0, dst1, dst2, dst3, 6);
-            HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
-                              dst0, dst1, dst2, dst3, 7,
-                              dst0, dst1, dst2, dst3);
-
-            PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
-            ST8x4_UB(dst0, dst1, dst, dst_stride);
-            dst += (4 * dst_stride);
-
-            LD_SB4(src0_ptr, src_stride, src0, src1, src2, src3);
-            src0_ptr += (4 * src_stride);
-            LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
-            src1_ptr += (4 * src2_stride);
-            ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3,
-                       dst0, dst1, dst2, dst3);
-
-            SLLI_4V(dst0, dst1, dst2, dst3, 6);
-            HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
-                              dst0, dst1, dst2, dst3, 7,
-                              dst0, dst1, dst2, dst3);
-
-            PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
-            ST8x4_UB(dst0, dst1, dst, dst_stride);
-            dst += (4 * dst_stride);
+            SLLI_4V(dst4, dst5, dst6, dst7, 6);
+            HEVC_BI_RND_CLIP4_MAX_SATU(in0, in1, in2, in3, dst0, dst1, dst2,
+                                       dst3, 7, dst0, dst1, dst2, dst3);
+            HEVC_BI_RND_CLIP4_MAX_SATU(in4, in5, in6, in7, dst4, dst5, dst6,
+                                       dst7, 7, dst4, dst5, dst6, dst7);
+            PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+            PCKEV_B2_UB(dst5, dst4, dst7, dst6, out2, out3);
+            ST8x8_UB(out0, out1, out2, out3, dst, dst_stride);
+            dst += (8 * dst_stride);
         }
     }
 }
@@ -275,12 +270,13 @@
                                  int32_t height)
 {
     uint32_t loop_cnt;
+    v16i8 zero = { 0 };
+    v16u8 out0, out1, out2;
     v16i8 src0, src1, src2, src3;
     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-    v16i8 zero = { 0 };
 
-    for (loop_cnt = (16 >> 2); loop_cnt--;) {
+    for (loop_cnt = 4; loop_cnt--;) {
         LD_SB4(src0_ptr, src_stride, src0, src1, src2, src3);
         src0_ptr += (4 * src_stride);
 
@@ -288,88 +284,21 @@
         LD_SH4(src1_ptr + 8, src2_stride, in4, in5, in6, in7);
         src1_ptr += (4 * src2_stride);
         ILVR_D2_SH(in5, in4, in7, in6, in4, in5);
-        ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3,
-                   dst0, dst1, dst2, dst3);
-
+        ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3, dst0, dst1,
+                   dst2, dst3);
         SLLI_4V(dst0, dst1, dst2, dst3, 6);
         ILVL_W2_SB(src1, src0, src3, src2, src0, src1);
         ILVR_B2_SH(zero, src0, zero, src1, dst4, dst5);
-        dst4 <<= 6;
-        dst5 <<= 6;
-        HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
-                          dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
-        HEVC_BI_RND_CLIP2(in4, in5, dst4, dst5, 7, dst4, dst5);
-
-        PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
-        dst2 = (v8i16) __msa_pckev_b((v16i8) dst5, (v16i8) dst4);
-        ST12x4_UB(dst0, dst1, dst2, dst, dst_stride);
+        SLLI_2V(dst4, dst5, 6);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in0, in1, in2, in3, dst0, dst1, dst2, dst3,
+                                   7, dst0, dst1, dst2, dst3);
+        HEVC_BI_RND_CLIP2_MAX_SATU(in4, in5, dst4, dst5, 7, dst4, dst5);
+        PCKEV_B3_UB(dst1, dst0, dst3, dst2, dst5, dst4, out0, out1, out2);
+        ST12x4_UB(out0, out1, out2, dst, dst_stride);
         dst += (4 * dst_stride);
     }
 }
 
-static void hevc_bi_copy_16multx4mult_msa(uint8_t *src0_ptr,
-                                          int32_t src_stride,
-                                          int16_t *src1_ptr,
-                                          int32_t src2_stride,
-                                          uint8_t *dst,
-                                          int32_t dst_stride,
-                                          int32_t height,
-                                          int32_t width)
-{
-    uint32_t loop_cnt;
-    uint32_t cnt;
-    uint8_t *src0_ptr_tmp;
-    int16_t *src1_ptr_tmp;
-    uint8_t *dst_tmp;
-    v16i8 zero = { 0 };
-
-    for (cnt = (width >> 4); cnt--;) {
-        src0_ptr_tmp = src0_ptr;
-        src1_ptr_tmp = src1_ptr;
-        dst_tmp = dst;
-
-        for (loop_cnt = (height >> 2); loop_cnt--;) {
-            v16i8 src0, src1, src2, src3;
-            v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
-            v8i16 dst0_r, dst1_r, dst2_r, dst3_r;
-            v8i16 dst0_l, dst1_l, dst2_l, dst3_l;
-
-            LD_SB4(src0_ptr_tmp, src_stride, src0, src1, src2, src3);
-            src0_ptr_tmp += (4 * src_stride);
-            LD_SH4(src1_ptr_tmp, src2_stride, in0, in1, in2, in3);
-            LD_SH4(src1_ptr_tmp + 8, src2_stride, in4, in5, in6, in7);
-            src1_ptr_tmp += (4 * src2_stride);
-
-            ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3,
-                       dst0_r, dst1_r, dst2_r, dst3_r);
-            ILVL_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3,
-                       dst0_l, dst1_l, dst2_l, dst3_l);
-
-            SLLI_4V(dst0_r, dst1_r, dst2_r, dst3_r, 6);
-            SLLI_4V(dst0_l, dst1_l, dst2_l, dst3_l, 6);
-            HEVC_BI_RND_CLIP4(in0, in1, in4, in5,
-                              dst0_r, dst1_r, dst0_l, dst1_l, 7,
-                              dst0_r, dst1_r, dst0_l, dst1_l);
-
-            PCKEV_B2_SH(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r, dst1_r);
-            ST_SH2(dst0_r, dst1_r, dst_tmp, dst_stride);
-            dst_tmp += (2 * dst_stride);
-
-            HEVC_BI_RND_CLIP4(in2, in3, in6, in7,
-                              dst2_r, dst3_r, dst2_l, dst3_l, 7,
-                              dst2_r, dst3_r, dst2_l, dst3_l);
-
-            PCKEV_B2_SH(dst2_l, dst2_r, dst3_l, dst3_r, dst2_r, dst3_r);
-            ST_SH2(dst2_r, dst3_r, dst_tmp, dst_stride);
-            dst_tmp += (2 * dst_stride);
-        }
-
-        src0_ptr += 16;
-        src1_ptr += 16;
-        dst += 16;
-    }
-}
-
 static void hevc_bi_copy_16w_msa(uint8_t *src0_ptr,
                                  int32_t src_stride,
                                  int16_t *src1_ptr,
@@ -378,8 +307,34 @@
                                  int32_t dst_stride,
                                  int32_t height)
 {
-    hevc_bi_copy_16multx4mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                  dst, dst_stride, height, 16);
+    uint32_t loop_cnt;
+    v16u8 out0, out1, out2, out3;
+    v16i8 src0, src1, src2, src3;
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+    v8i16 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
+    v16i8 zero = { 0 };
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src0_ptr, src_stride, src0, src1, src2, src3);
+        src0_ptr += (4 * src_stride);
+        LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
+        LD_SH4(src1_ptr + 8, src2_stride, in4, in5, in6, in7);
+        src1_ptr += (4 * src2_stride);
+        ILVRL_B2_SH(zero, src0, dst0_r, dst0_l);
+        ILVRL_B2_SH(zero, src1, dst1_r, dst1_l);
+        ILVRL_B2_SH(zero, src2, dst2_r, dst2_l);
+        ILVRL_B2_SH(zero, src3, dst3_r, dst3_l);
+        SLLI_4V(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+        SLLI_4V(dst0_l, dst1_l, dst2_l, dst3_l, 6);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in0, in1, in4, in5, dst0_r, dst1_r, dst0_l,
+                                   dst1_l, 7, dst0_r, dst1_r, dst0_l, dst1_l);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in2, in3, in6, in7, dst2_r, dst3_r, dst2_l,
+                                   dst3_l, 7, dst2_r, dst3_r, dst2_l, dst3_l);
+        PCKEV_B2_UB(dst0_l, dst0_r, dst1_l, dst1_r, out0, out1);
+        PCKEV_B2_UB(dst2_l, dst2_r, dst3_l, dst3_r, out2, out3);
+        ST_UB4(out0, out1, out2, out3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
 }
 
 static void hevc_bi_copy_24w_msa(uint8_t *src0_ptr,
@@ -390,11 +345,42 @@
                                  int32_t dst_stride,
                                  int32_t height)
 {
-    hevc_bi_copy_16multx4mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                  dst, dst_stride, height, 16);
+    uint32_t loop_cnt;
+    v16u8 out0, out1, out2, out3, out4, out5;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, zero = { 0 };
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8, dst9, dst10;
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, dst11;
 
-    hevc_bi_copy_8w_msa(src0_ptr + 16, src_stride, src1_ptr + 16, src2_stride,
-                        dst + 16, dst_stride, height);
+    for (loop_cnt = 8; loop_cnt--;) {
+        LD_SB4(src0_ptr, src_stride, src0, src1, src4, src5);
+        LD_SB4(src0_ptr + 16, src_stride, src2, src3, src6, src7);
+        src0_ptr += (4 * src_stride);
+        LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
+        LD_SH4(src1_ptr + 8, src2_stride, in4, in5, in6, in7);
+        LD_SH4(src1_ptr + 16, src2_stride, in8, in9, in10, in11);
+        src1_ptr += (4 * src2_stride);
+
+        ILVRL_B2_SH(zero, src0, dst0, dst1);
+        ILVRL_B2_SH(zero, src1, dst2, dst3);
+        ILVR_B2_SH(zero, src2, zero, src3, dst4, dst5);
+        ILVRL_B2_SH(zero, src4, dst6, dst7);
+        ILVRL_B2_SH(zero, src5, dst8, dst9);
+        ILVR_B2_SH(zero, src6, zero, src7, dst10, dst11);
+        SLLI_4V(dst0, dst1, dst2, dst3, 6);
+        SLLI_4V(dst4, dst5, dst6, dst7, 6);
+        SLLI_4V(dst8, dst9, dst10, dst11, 6);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in0, in4, in1, in5, dst0, dst1, dst2, dst3,
+                                   7, dst0, dst1, dst2, dst3);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in8, in9, in2, in6, dst4, dst5, dst6, dst7,
+                                   7, dst4, dst5, dst6, dst7);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in3, in7, in10, in11, dst8, dst9, dst10,
+                                   dst11, 7, dst8, dst9, dst10, dst11);
+        PCKEV_B3_UB(dst1, dst0, dst3, dst2, dst5, dst4, out0, out1, out2);
+        PCKEV_B3_UB(dst7, dst6, dst9, dst8, dst11, dst10, out3, out4, out5);
+        ST_UB4(out0, out1, out3, out4, dst, dst_stride);
+        ST8x4_UB(out2, out5, dst + 16, dst_stride);
+        dst += (4 * dst_stride);
+    }
 }
 
 static void hevc_bi_copy_32w_msa(uint8_t *src0_ptr,
@@ -405,8 +391,40 @@
                                  int32_t dst_stride,
                                  int32_t height)
 {
-    hevc_bi_copy_16multx4mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                  dst, dst_stride, height, 32);
+    uint32_t loop_cnt;
+    v16u8 out0, out1, out2, out3;
+    v16i8 src0, src1, src2, src3;
+    v16i8 zero = { 0 };
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        LD_SB2(src0_ptr, 16, src0, src1);
+        src0_ptr += src_stride;
+        LD_SB2(src0_ptr, 16, src2, src3);
+        src0_ptr += src_stride;
+        LD_SH4(src1_ptr, 8, in0, in1, in2, in3);
+        src1_ptr += src2_stride;
+        LD_SH4(src1_ptr, 8, in4, in5, in6, in7);
+        src1_ptr += src2_stride;
+
+        ILVRL_B2_SH(zero, src0, dst0, dst1);
+        ILVRL_B2_SH(zero, src1, dst2, dst3);
+        ILVRL_B2_SH(zero, src2, dst4, dst5);
+        ILVRL_B2_SH(zero, src3, dst6, dst7);
+        SLLI_4V(dst0, dst1, dst2, dst3, 6);
+        SLLI_4V(dst4, dst5, dst6, dst7, 6);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in0, in1, in2, in3, dst0, dst1, dst2, dst3,
+                                   7, dst0, dst1, dst2, dst3);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in4, in5, in6, in7, dst4, dst5, dst6, dst7,
+                                   7, dst4, dst5, dst6, dst7);
+        PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+        PCKEV_B2_UB(dst5, dst4, dst7, dst6, out2, out3);
+        ST_UB2(out0, out1, dst, 16);
+        dst += dst_stride;
+        ST_UB2(out2, out3, dst, 16);
+        dst += dst_stride;
+    }
 }
 
 static void hevc_bi_copy_48w_msa(uint8_t *src0_ptr,
@@ -417,8 +435,50 @@
                                  int32_t dst_stride,
                                  int32_t height)
 {
-    hevc_bi_copy_16multx4mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                  dst, dst_stride, height, 48);
+    uint32_t loop_cnt;
+    v16u8 out0, out1, out2, out3, out4, out5;
+    v16i8 src0, src1, src2, src3, src4, src5;
+    v16i8 zero = { 0 };
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8, dst9, dst10;
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, dst11;
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        LD_SB3(src0_ptr, 16, src0, src1, src2);
+        src0_ptr += src_stride;
+        LD_SB3(src0_ptr, 16, src3, src4, src5);
+        src0_ptr += src_stride;
+
+        LD_SH6(src1_ptr, 8, in0, in1, in2, in3, in4, in5);
+        src1_ptr += src2_stride;
+        LD_SH6(src1_ptr, 8, in6, in7, in8, in9, in10, in11);
+        src1_ptr += src2_stride;
+
+        ILVRL_B2_SH(zero, src0, dst0, dst1);
+        ILVRL_B2_SH(zero, src1, dst2, dst3);
+        ILVRL_B2_SH(zero, src2, dst4, dst5);
+        ILVRL_B2_SH(zero, src3, dst6, dst7);
+        ILVRL_B2_SH(zero, src4, dst8, dst9);
+        ILVRL_B2_SH(zero, src5, dst10, dst11);
+
+        SLLI_4V(dst0, dst1, dst2, dst3, 6);
+        SLLI_4V(dst4, dst5, dst6, dst7, 6);
+        SLLI_4V(dst8, dst9, dst10, dst11, 6);
+
+        HEVC_BI_RND_CLIP4_MAX_SATU(in0, in1, in2, in3, dst0, dst1, dst2, dst3,
+                                   7, dst0, dst1, dst2, dst3);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in4, in5, in6, in7, dst4, dst5, dst6, dst7,
+                                   7, dst4, dst5, dst6, dst7);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in8, in9, in10, in11, dst8, dst9, dst10,
+                                   dst11, 7, dst8, dst9, dst10, dst11);
+        PCKEV_B3_UB(dst1, dst0, dst3, dst2, dst5, dst4, out0, out1, out2);
+        PCKEV_B3_UB(dst7, dst6, dst9, dst8, dst11, dst10, out3, out4, out5);
+        ST_UB2(out0, out1, dst, 16);
+        ST_UB(out2, dst + 32);
+        dst += dst_stride;
+        ST_UB2(out3, out4, dst, 16);
+        ST_UB(out5, dst + 32);
+        dst += dst_stride;
+    }
 }
 
 static void hevc_bi_copy_64w_msa(uint8_t *src0_ptr,
@@ -429,8 +489,35 @@
                                  int32_t dst_stride,
                                  int32_t height)
 {
-    hevc_bi_copy_16multx4mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                  dst, dst_stride, height, 64);
+    uint32_t loop_cnt;
+    v16u8 out0, out1, out2, out3;
+    v16i8 src0, src1, src2, src3;
+    v16i8 zero = { 0 };
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+    for (loop_cnt = height; loop_cnt--;) {
+        LD_SB4(src0_ptr, 16, src0, src1, src2, src3);
+        src0_ptr += src_stride;
+        LD_SH8(src1_ptr, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+        src1_ptr += src2_stride;
+
+        ILVRL_B2_SH(zero, src0, dst0, dst1);
+        ILVRL_B2_SH(zero, src1, dst2, dst3);
+        ILVRL_B2_SH(zero, src2, dst4, dst5);
+        ILVRL_B2_SH(zero, src3, dst6, dst7);
+        SLLI_4V(dst0, dst1, dst2, dst3, 6);
+        SLLI_4V(dst4, dst5, dst6, dst7, 6);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in0, in1, in2, in3, dst0, dst1, dst2, dst3,
+                                   7, dst0, dst1, dst2, dst3);
+        HEVC_BI_RND_CLIP4_MAX_SATU(in4, in5, in6, in7, dst4, dst5, dst6, dst7,
+                                   7, dst4, dst5, dst6, dst7);
+        PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+        PCKEV_B2_UB(dst5, dst4, dst7, dst6, out2, out3);
+
+        ST_UB4(out0, out1, out2, out3, dst, 16);
+        dst += dst_stride;
+    }
 }
 
 static void hevc_hz_bi_8t_4w_msa(uint8_t *src0_ptr,
@@ -450,7 +537,7 @@
     v8i16 dst0, dst1, dst2, dst3;
     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[16]);
 
     src0_ptr -= 3;
 
@@ -476,26 +563,26 @@
         ILVR_D2_SH(in5, in4, in7, in6, in2, in3);
         XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
 
-        VSHF_B4_SB(src0, src1, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
-        VSHF_B4_SB(src2, src3, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst1 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
-        VSHF_B4_SB(src4, src5, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst2 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
-        VSHF_B4_SB(src6, src7, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst3 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
+        VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src4, src5, src6, src7, mask0, mask0, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src5, src6, src7, mask1, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec0, vec1);
+        VSHF_B2_SB(src4, src5, src6, src7, mask2, mask2, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt2, filt2, filt2, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src1, src2, src3, mask3, mask3, vec0, vec1);
+        VSHF_B2_SB(src4, src5, src6, src7, mask3, mask3, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt3, filt3, filt3, filt3, dst0,
+                     dst1, dst2, dst3);
 
         HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
                           dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
@@ -523,7 +610,7 @@
     v8i16 dst0, dst1, dst2, dst3;
     v8i16 in0, in1, in2, in3;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
 
     src0_ptr -= 3;
 
@@ -544,26 +631,26 @@
         src1_ptr += (4 * src2_stride);
         XORI_B4_128_SB(src0, src1, src2, src3);
 
-        VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
-        VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst1 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
-        VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst2 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
-        VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst3 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt2, filt2, filt2, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt3, filt3, filt3, filt3, dst0,
+                     dst1, dst2, dst3);
 
         HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
                           dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
@@ -583,10 +670,83 @@
                                   const int8_t *filter,
                                   int32_t height)
 {
-    hevc_hz_bi_8t_8w_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                         dst, dst_stride, filter, height);
-    hevc_hz_bi_8t_4w_msa(src0_ptr + 8, src_stride, src1_ptr + 8, src2_stride,
-                         dst + 8, dst_stride, filter, height);
+    uint32_t loop_cnt;
+    int32_t tmp0, tmp1;
+    int64_t tmp2, tmp3;
+    v16i8 src0, src1, src2, src3;
+    v16i8 vec0, vec1, vec2;
+    v8i16 filt0, filt1, filt2, filt3;
+    v16i8 mask0, mask1, mask2, mask3, mask4, mask5, mask6, mask7;
+    v8i16 dst0, dst1, dst2;
+    v8i16 in0, in1, in2, in3;
+    v8i16 filter_vec, const_vec;
+
+    src0_ptr -= 3;
+    const_vec = __msa_ldi_h(128);
+    const_vec <<= 6;
+
+    filter_vec = LD_SH(filter);
+    SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+    mask4 = LD_SB(&ff_hevc_mask_arr[16]);
+    mask5 = mask4 + 2;
+    mask6 = mask4 + 4;
+    mask7 = mask4 + 6;
+
+    for (loop_cnt = 8; loop_cnt--;) {
+        LD_SB2(src0_ptr, 8, src0, src1);
+        src0_ptr += src_stride;
+        LD_SB2(src0_ptr, 8, src2, src3);
+        src0_ptr += src_stride;
+        LD_SH2(src1_ptr, 8, in0, in1);
+        src1_ptr += src2_stride;
+        LD_SH2(src1_ptr, 8, in2, in3);
+        src1_ptr += src2_stride;
+        XORI_B4_128_SB(src0, src1, src2, src3);
+
+        dst0 = const_vec;
+        dst1 = const_vec;
+        dst2 = const_vec;
+
+        VSHF_B3_SB(src0, src0, src1, src3, src2, src2, mask0, mask4, mask0,
+                   vec0, vec1, vec2);
+        DPADD_SB2_SH(vec0, vec1, filt0, filt0, dst0, dst1);
+        dst2 = __msa_dpadd_s_h(dst2, vec2, (v16i8) filt0);
+        VSHF_B3_SB(src0, src0, src1, src3, src2, src2, mask1, mask5, mask1,
+                   vec0, vec1, vec2);
+        DPADD_SB2_SH(vec0, vec1, filt1, filt1, dst0, dst1);
+        dst2 = __msa_dpadd_s_h(dst2, vec2, (v16i8) filt1);
+        VSHF_B3_SB(src0, src0, src1, src3, src2, src2, mask2, mask6, mask2,
+                   vec0, vec1, vec2);
+        DPADD_SB2_SH(vec0, vec1, filt2, filt2, dst0, dst1);
+        dst2 = __msa_dpadd_s_h(dst2, vec2, (v16i8) filt2);
+        VSHF_B3_SB(src0, src0, src1, src3, src2, src2, mask3, mask7, mask3,
+                   vec0, vec1, vec2);
+        DPADD_SB2_SH(vec0, vec1, filt3, filt3, dst0, dst1);
+        dst2 = __msa_dpadd_s_h(dst2, vec2, (v16i8) filt3);
+
+        in1 = (v8i16) __msa_pckev_d((v2i64) in3, (v2i64) in1);
+        HEVC_BI_RND_CLIP2(in0, in1, dst0, dst1, 7, dst0, dst1);
+        dst2 = __msa_adds_s_h(in2, dst2);
+        dst2 = __msa_srari_h(dst2, 7);
+        dst2 = CLIP_SH_0_255(dst2);
+        PCKEV_B2_SH(dst1, dst0, dst2, dst2, dst0, dst1);
+
+        tmp2 = __msa_copy_s_d((v2i64) dst0, 0);
+        tmp0 = __msa_copy_s_w((v4i32) dst0, 2);
+        tmp3 = __msa_copy_s_d((v2i64) dst1, 0);
+        tmp1 = __msa_copy_s_w((v4i32) dst0, 3);
+        SD(tmp2, dst);
+        SW(tmp0, dst + 8);
+        dst += dst_stride;
+        SD(tmp3, dst);
+        SW(tmp1, dst + 8);
+        dst += dst_stride;
+    }
 }
 
 static void hevc_hz_bi_8t_16w_msa(uint8_t *src0_ptr,
@@ -606,7 +766,7 @@
     v8i16 dst0, dst1, dst2, dst3;
     v8i16 in0, in1, in2, in3;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
 
     src0_ptr -= 3;
     const_vec = __msa_ldi_h(128);
@@ -630,26 +790,26 @@
         src1_ptr += src2_stride;
         XORI_B4_128_SB(src0, src1, src2, src3);
 
-        VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
-        VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst1 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
-        VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst2 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
-        VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst3 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt2, filt2, filt2, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt3, filt3, filt3, filt3, dst0,
+                     dst1, dst2, dst3);
 
         HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
                           dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
@@ -678,7 +838,7 @@
     v8i16 dst0, dst1, dst2;
     v8i16 in0, in1, in2;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
 
     src0_ptr = src0_ptr - 3;
     const_vec = __msa_ldi_h(128);
@@ -703,21 +863,21 @@
         src1_ptr += src2_stride;
         XORI_B2_128_SB(src0, src1);
 
-        VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
-        VSHF_B4_SB(src0, src1, mask4, mask5, mask6, mask7,
-                   vec0, vec1, vec2, vec3);
         dst1 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
-        VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst2 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
+        VSHF_B2_SB(src0, src0, src0, src1, mask0, mask4, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src0, src0, mask0, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt1, dst0,
+                     dst1, dst2, dst0);
+        VSHF_B2_SB(src0, src1, src1, src1, mask5, mask1, vec0, vec1);
+        VSHF_B2_SB(src0, src0, src0, src1, mask2, mask6, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt2, filt2, dst1,
+                     dst2, dst0, dst1);
+        VSHF_B2_SB(src1, src1, src0, src0, mask2, mask3, vec0, vec1);
+        VSHF_B2_SB(src0, src1, src1, src1, mask7, mask3, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt3, filt3, filt3, dst2,
+                     dst0, dst1, dst2);
 
         HEVC_BI_RND_CLIP2(in0, in1, dst0, dst1, 7, dst0, dst1);
         dst2 = __msa_adds_s_h(dst2, in2);
@@ -749,7 +909,7 @@
     v8i16 dst0, dst1, dst2, dst3;
     v8i16 in0, in1, in2, in3;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
 
     src0_ptr -= 3;
     const_vec = __msa_ldi_h(128);
@@ -774,26 +934,26 @@
         src1_ptr += src2_stride;
         XORI_B3_128_SB(src0, src1, src2);
 
-        VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
-        VSHF_B4_SB(src0, src1, mask4, mask5, mask6, mask7,
-                   vec0, vec1, vec2, vec3);
         dst1 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
-        VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst2 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
-        VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst3 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask0, mask4, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask0, mask0, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask1, mask5, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask1, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask2, mask6, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask2, mask2, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt2, filt2, filt2, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask3, mask7, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask3, mask3, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt3, filt3, filt3, filt3, dst0,
+                     dst1, dst2, dst3);
 
         HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
                           dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
@@ -822,7 +982,7 @@
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
     v8i16 in0, in1, in2, in3, in4, in5;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
 
     src0_ptr -= 3;
 
@@ -840,62 +1000,53 @@
     mask6 = mask0 + 12;
     mask7 = mask0 + 14;
 
-    for (loop_cnt = height; loop_cnt--;) {
-        LD_SB2(src0_ptr, 16, src0, src1);
-        XORI_B2_128_SB(src0, src1);
-        LD_SH2(src1_ptr, 8, in0, in1);
-
-        VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
-        dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
-        VSHF_B4_SB(src0, src1, mask4, mask5, mask6, mask7,
-                   vec0, vec1, vec2, vec3);
-        dst1 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
-
-        HEVC_BI_RND_CLIP2(in0, in1, dst0, dst1, 7, dst0, dst1);
-
-        tmp0 = __msa_pckev_b((v16i8) dst1, (v16i8) dst0);
-        ST_SB(tmp0, dst);
-
-        LD_SB2(src0_ptr + 32, 8, src2, src3);
-        XORI_B2_128_SB(src2, src3);
+    for (loop_cnt = 64; loop_cnt--;) {
+        LD_SB3(src0_ptr, 16, src0, src1, src2);
+        src3 = LD_SB(src0_ptr + 40);
         src0_ptr += src_stride;
+        LD_SH4(src1_ptr, 8, in0, in1, in2, in3);
+        XORI_B4_128_SB(src0, src1, src2, src3);
 
-        LD_SH2(src1_ptr + 16, 8, in2, in3);
-
-        VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
+        dst0 = const_vec;
+        dst1 = const_vec;
         dst2 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
-        VSHF_B4_SB(src1, src2, mask4, mask5, mask6, mask7,
-                   vec0, vec1, vec2, vec3);
         dst3 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
 
+        VSHF_B2_SB(src0, src0, src0, src1, mask0, mask4, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src1, src2, mask0, mask4, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask1, mask5, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src1, src2, mask1, mask5, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask2, mask6, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src1, src2, mask2, mask6, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt2, filt2, filt2, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask3, mask7, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src1, src2, mask3, mask7, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt3, filt3, filt3, filt3, dst0,
+                     dst1, dst2, dst3);
+        HEVC_BI_RND_CLIP2(in0, in1, dst0, dst1, 7, dst0, dst1);
         HEVC_BI_RND_CLIP2(in2, in3, dst2, dst3, 7, dst2, dst3);
-
-        tmp1 = __msa_pckev_b((v16i8) dst3, (v16i8) dst2);
+        PCKEV_B2_SB(dst1, dst0, dst3, dst2, tmp0, tmp1);
+        ST_SB(tmp0, dst);
         ST_SB(tmp1, dst + 16);
 
         LD_SH2(src1_ptr + 32, 8, in4, in5);
         src1_ptr += src2_stride;
 
-        VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst4 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst4, dst4, dst4, dst4);
-        VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst5 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst5, dst5, dst5, dst5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt1, filt1, dst4,
+                     dst5, dst4, dst5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt2, filt3, filt3, dst4,
+                     dst5, dst4, dst5);
 
         HEVC_BI_RND_CLIP2(in4, in5, dst4, dst5, 7, dst4, dst5);
 
@@ -914,14 +1065,10 @@
                                   const int8_t *filter,
                                   int32_t height)
 {
-    uint8_t *src0_ptr_tmp;
-    uint8_t *dst_tmp;
-    int16_t *src1_ptr_tmp;
     uint32_t loop_cnt;
-    uint32_t cnt;
-    v16i8 src0, src1, src2, tmp0, tmp1;
+    v16i8 src0, src1, src2, src3, src4, src5, tmp0, tmp1;
     v8i16 filt0, filt1, filt2, filt3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1, mask2, mask3, mask4, mask5, mask6, mask7;
     v16i8 vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1, dst2, dst3;
@@ -945,48 +1092,74 @@
     mask7 = mask0 + 14;
 
     for (loop_cnt = height; loop_cnt--;) {
-        src0_ptr_tmp = src0_ptr;
-        dst_tmp = dst;
-        src1_ptr_tmp = src1_ptr;
+        LD_SB2(src0_ptr, 16, src0, src1);
+        src2 = LD_SB(src0_ptr + 24);
+        LD_SB2(src0_ptr + 32, 16, src3, src4);
+        src5 = LD_SB(src0_ptr + 56);
+        LD_SH4(src1_ptr, 8, in0, in1, in2, in3);
+        XORI_B3_128_SB(src0, src1, src2);
 
-        for (cnt = 2; cnt--;) {
-            LD_SB2(src0_ptr_tmp, 16, src0, src1);
-            src2 = LD_SB(src0_ptr_tmp + 24);
-            src0_ptr_tmp += 32;
-            LD_SH4(src1_ptr_tmp, 8, in0, in1, in2, in3);
-            src1_ptr_tmp += 32;
-            XORI_B3_128_SB(src0, src1, src2);
+        dst0 = const_vec;
+        dst1 = const_vec;
+        dst2 = const_vec;
+        dst3 = const_vec;
 
-            VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3,
-                       vec0, vec1, vec2, vec3);
-            dst0 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst0, dst0, dst0, dst0);
-            VSHF_B4_SB(src0, src1, mask4, mask5, mask6, mask7,
-                       vec0, vec1, vec2, vec3);
-            dst1 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst1, dst1, dst1, dst1);
-            VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3,
-                       vec0, vec1, vec2, vec3);
-            dst2 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst2, dst2, dst2, dst2);
-            VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3,
-                       vec0, vec1, vec2, vec3);
-            dst3 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst3, dst3, dst3, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask0, mask4, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask0, mask0, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask1, mask5, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask1, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask2, mask6, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask2, mask2, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt2, filt2, filt2, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask3, mask7, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask3, mask3, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt3, filt3, filt3, filt3, dst0,
+                     dst1, dst2, dst3);
 
-            HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
-                              dst0, dst1, dst2, dst3, 7,
-                              dst0, dst1, dst2, dst3);
+        HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
+                          dst0, dst1, dst2, dst3, 7,
+                          dst0, dst1, dst2, dst3);
 
-            PCKEV_B2_SB(dst1, dst0, dst3, dst2, tmp0, tmp1);
-            ST_SB2(tmp0, tmp1, dst_tmp, 16);
-            dst_tmp += 32;
-        }
+        PCKEV_B2_SB(dst1, dst0, dst3, dst2, tmp0, tmp1);
+        ST_SB2(tmp0, tmp1, dst, 16);
 
+        src0 = src3;
+        src1 = src4;
+        src2 = src5;
+
+        LD_SH4(src1_ptr + 32, 8, in0, in1, in2, in3);
+        XORI_B3_128_SB(src0, src1, src2);
+
+        dst0 = const_vec;
+        dst1 = const_vec;
+        dst2 = const_vec;
+        dst3 = const_vec;
+        VSHF_B2_SB(src0, src0, src0, src1, mask0, mask4, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask0, mask0, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask1, mask5, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask1, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask2, mask6, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask2, mask2, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt2, filt2, filt2, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask3, mask7, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask3, mask3, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt3, filt3, filt3, filt3, dst0,
+                     dst1, dst2, dst3);
+        HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
+                          dst0, dst1, dst2, dst3, 7,
+                          dst0, dst1, dst2, dst3);
+        PCKEV_B2_SB(dst1, dst0, dst3, dst2, tmp0, tmp1);
+        ST_SB2(tmp0, tmp1, dst + 32, 16);
         src1_ptr += src2_stride;
         src0_ptr += src_stride;
         dst += dst_stride;
@@ -1447,30 +1620,30 @@
                                  int32_t height)
 {
     uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
-    v8i16 in0, in1;
+    uint64_t tp0, tp1;
+    v16u8 out;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v8i16 in0 = { 0 }, in1 = { 0 };
     v8i16 filt0, filt1, filt2, filt3;
-    v4i32 filt_h0, filt_h1, filt_h2, filt_h3;
+    v8i16 filt_h0, filt_h1, filt_h2, filt_h3;
     v16i8 mask1, mask2, mask3;
     v8i16 filter_vec, const_vec;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
-    v8i16 dst30, dst41, dst52, dst63, dst66, dst87;
-    v4i32 dst0_r, dst1_r, in0_r, in0_l;
-    v8i16 dst10_r, dst32_r, dst54_r, dst76_r;
-    v8i16 dst21_r, dst43_r, dst65_r, dst87_r;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
-    v8u16 mask4 = { 0, 4, 1, 5, 2, 6, 3, 7 };
+    v8i16 out0, out1;
+    v8i16 dst30, dst41, dst52, dst63, dst66, dst97, dst108;
+    v8i16 dst10, dst32, dst54, dst76, dst98, dst21, dst43, dst65, dst87, dst109;
+    v4i32 dst0, dst1, dst2, dst3;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
 
     src0_ptr -= ((3 * src_stride) + 3);
     filter_vec = LD_SH(filter_x);
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W4_SW(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
+    SPLATI_W4_SH(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
 
     mask1 = mask0 + 2;
     mask2 = mask0 + 4;
@@ -1491,71 +1664,77 @@
     VSHF_B4_SB(src3, src6, mask0, mask1, mask2, mask3,
                vec12, vec13, vec14, vec15);
 
-    dst30 = const_vec;
-    DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                 dst30, dst30, dst30, dst30);
-    dst41 = const_vec;
-    DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3,
-                 dst41, dst41, dst41, dst41);
-    dst52 = const_vec;
-    DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3,
-                 dst52, dst52, dst52, dst52);
-    dst63 = const_vec;
-    DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2, filt3,
-                 dst63, dst63, dst63, dst63);
+    dst30 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                              filt3);
+    dst41 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                              filt3);
+    dst52 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                              filt3);
+    dst63 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2,
+                              filt3);
 
-    ILVR_H3_SH(dst41, dst30, dst52, dst41, dst63, dst52,
-               dst10_r, dst21_r, dst32_r);
-    dst43_r = __msa_ilvl_h(dst41, dst30);
-    dst54_r = __msa_ilvl_h(dst52, dst41);
-    dst65_r = __msa_ilvl_h(dst63, dst52);
+    ILVRL_H2_SH(dst41, dst30, dst10, dst43);
+    ILVRL_H2_SH(dst52, dst41, dst21, dst54);
+    ILVRL_H2_SH(dst63, dst52, dst32, dst65);
+
     dst66 = (v8i16) __msa_splati_d((v2i64) dst63, 1);
 
-    for (loop_cnt = height >> 1; loop_cnt--;) {
-        LD_SB2(src0_ptr, src_stride, src7, src8);
-        src0_ptr += (2 * src_stride);
-        LD_SH2(src1_ptr, src2_stride, in0, in1);
+    for (loop_cnt = height >> 2; loop_cnt--;) {
+        LD_SB4(src0_ptr, src_stride, src7, src8, src9, src10);
+        src0_ptr += (4 * src_stride);
+        XORI_B4_128_SB(src7, src8, src9, src10);
+
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        INSERT_D2_SH(tp0, tp1, in0);
+        src1_ptr += (2 * src2_stride);
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        INSERT_D2_SH(tp0, tp1, in1);
         src1_ptr += (2 * src2_stride);
 
-        in0 = (v8i16) __msa_ilvr_d((v2i64) in1, (v2i64) in0);
-        XORI_B2_128_SB(src7, src8);
-
-        VSHF_B4_SB(src7, src8, mask0, mask1, mask2, mask3,
+        VSHF_B4_SB(src7, src9, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst87 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst87, dst87, dst87, dst87);
-        dst76_r = __msa_ilvr_h(dst87, dst66);
-        dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r,
-                                filt_h0, filt_h1, filt_h2, filt_h3);
-        dst87_r = __msa_vshf_h((v8i16) mask4, dst87, dst87);
-        dst1_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r,
-                                filt_h0, filt_h1, filt_h2, filt_h3);
+        VSHF_B4_SB(src8, src10, mask0, mask1, mask2, mask3,
+                   vec4, vec5, vec6, vec7);
+        dst97 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                  filt3);
+        dst108 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                   filt3);
 
-        dst0_r >>= 6;
-        dst1_r >>= 6;
-        UNPCK_SH_SW(in0, in0_r, in0_l);
-        dst0_r = __msa_adds_s_w(dst0_r, in0_r);
-        dst1_r = __msa_adds_s_w(dst1_r, in0_l);
-        SRARI_W2_SW(dst0_r, dst1_r, 7);
-        dst0_r = CLIP_SW_0_255(dst0_r);
-        dst1_r = CLIP_SW_0_255(dst1_r);
+        dst76 = __msa_ilvr_h(dst97, dst66);
+        ILVRL_H2_SH(dst108, dst97, dst87, dst109);
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst97, 1);
+        dst98 = __msa_ilvr_h(dst66, dst108);
 
-        HEVC_PCK_SW_SB2(dst1_r, dst0_r, dst0_r);
-        ST4x2_UB(dst0_r, dst, dst_stride);
-        dst += (2 * dst_stride);
+        dst0 = HEVC_FILT_8TAP(dst10, dst32, dst54, dst76, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
+        dst1 = HEVC_FILT_8TAP(dst21, dst43, dst65, dst87, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
+        dst2 = HEVC_FILT_8TAP(dst32, dst54, dst76, dst98, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
+        dst3 = HEVC_FILT_8TAP(dst43, dst65, dst87, dst109, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
 
-        dst10_r = dst32_r;
-        dst32_r = dst54_r;
-        dst54_r = dst76_r;
-        dst21_r = dst43_r;
-        dst43_r = dst65_r;
-        dst65_r = dst87_r;
-        dst66 = (v8i16) __msa_splati_d((v2i64) dst87, 1);
+        SRA_4V(dst0, dst1, dst2, dst3, 6);
+        PCKEV_H2_SH(dst1, dst0, dst3, dst2, out0, out1);
+        ADDS_SH2_SH(out0, in0, out1, in1, out0, out1);
+        ADDS_SH2_SH(out0, const_vec, out1, const_vec, out0, out1);
+        SRARI_H2_SH(out0, out1, 7);
+        CLIP_SH2_0_255_MAX_SATU(out0, out1);
+        out = (v16u8) __msa_pckev_b((v16i8) out1, (v16i8) out0);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        dst10 = dst54;
+        dst32 = dst76;
+        dst54 = dst98;
+        dst21 = dst65;
+        dst43 = dst87;
+        dst65 = dst109;
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst108, 1);
     }
 }
 
-static void hevc_hv_bi_8t_8multx2mult_msa(uint8_t *src0_ptr,
+static void hevc_hv_bi_8t_8multx1mult_msa(uint8_t *src0_ptr,
                                           int32_t src_stride,
                                           int16_t *src1_ptr,
                                           int32_t src2_stride,
@@ -1570,22 +1749,20 @@
     uint8_t *src0_ptr_tmp;
     int16_t *src1_ptr_tmp;
     uint8_t *dst_tmp;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
-    v8i16 in0, in1;
-    v4i32 in0_r, in0_l, in1_r, in1_l;
+    v16u8 out;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v8i16 in0, tmp;
     v8i16 filt0, filt1, filt2, filt3;
-    v4i32 filt_h0, filt_h1, filt_h2, filt_h3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1, filt_h2, filt_h3;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1, mask2, mask3;
     v8i16 filter_vec, const_vec;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8;
-    v4i32 dst0_r, dst0_l, dst1_r, dst1_l;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v4i32 dst0_r, dst0_l;
     v8i16 dst10_r, dst32_r, dst54_r, dst76_r;
     v8i16 dst10_l, dst32_l, dst54_l, dst76_l;
-    v8i16 dst21_r, dst43_r, dst65_r, dst87_r;
-    v8i16 dst21_l, dst43_l, dst65_l, dst87_l;
 
     src0_ptr -= ((3 * src_stride) + 3);
     const_vec = __msa_ldi_h(128);
@@ -1595,9 +1772,9 @@
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
-    SPLATI_W4_SW(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W4_SH(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
 
     mask1 = mask0 + 2;
     mask2 = mask0 + 4;
@@ -1622,18 +1799,14 @@
                    vec8, vec9, vec10, vec11);
         VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3,
                    vec12, vec13, vec14, vec15);
-        dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
-        dst1 = const_vec;
-        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
-        dst2 = const_vec;
-        DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
-        dst3 = const_vec;
-        DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
+        dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        dst1 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                 filt3);
+        dst2 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                                 filt3);
+        dst3 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1,
+                                 filt2, filt3);
 
         VSHF_B4_SB(src4, src4, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
@@ -1641,38 +1814,28 @@
                    vec4, vec5, vec6, vec7);
         VSHF_B4_SB(src6, src6, mask0, mask1, mask2, mask3,
                    vec8, vec9, vec10, vec11);
-        dst4 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst4, dst4, dst4, dst4);
-        dst5 = const_vec;
-        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3,
-                     dst5, dst5, dst5, dst5);
-        dst6 = const_vec;
-        DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3,
-                     dst6, dst6, dst6, dst6);
+        dst4 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        dst5 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                 filt3);
+        dst6 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                                 filt3);
 
-        ILVR_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst2, dst1,
-                   dst10_r, dst32_r, dst54_r, dst21_r);
-        ILVR_H2_SH(dst4, dst3, dst6, dst5, dst43_r, dst65_r);
-        ILVL_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst2, dst1,
-                   dst10_l, dst32_l, dst54_l, dst21_l);
-        ILVL_H2_SH(dst4, dst3, dst6, dst5, dst43_l, dst65_l);
+        for (loop_cnt = height; loop_cnt--;) {
+            src7 = LD_SB(src0_ptr_tmp);
+            src7 = (v16i8) __msa_xori_b((v16u8) src7, 128);
+            src0_ptr_tmp += src_stride;
 
-        for (loop_cnt = height >> 1; loop_cnt--;) {
-            /* row 7 */
-            LD_SB2(src0_ptr_tmp, src_stride, src7, src8);
-            XORI_B2_128_SB(src7, src8);
-            src0_ptr_tmp += 2 * src_stride;
-
-            LD_SH2(src1_ptr_tmp, src2_stride, in0, in1);
-            src1_ptr_tmp += (2 * src2_stride);
+            in0 = LD_SH(src1_ptr_tmp);
+            src1_ptr_tmp += src2_stride;
 
             VSHF_B4_SB(src7, src7, mask0, mask1, mask2, mask3,
                        vec0, vec1, vec2, vec3);
-            dst7 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst7, dst7, dst7, dst7);
-
+            dst7 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1,
+                                     filt2, filt3);
+            ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
+            ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+            ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
             ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
             dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r,
                                     filt_h0, filt_h1, filt_h2, filt_h3);
@@ -1681,49 +1844,21 @@
             dst0_r >>= 6;
             dst0_l >>= 6;
 
-            VSHF_B4_SB(src8, src8, mask0, mask1, mask2, mask3,
-                       vec0, vec1, vec2, vec3);
-            dst8 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst8, dst8, dst8, dst8);
+            tmp = __msa_pckev_h((v8i16) dst0_l, (v8i16) dst0_r);
+            ADDS_SH2_SH(tmp, in0, tmp, const_vec, tmp, tmp);
+            tmp = __msa_srari_h(tmp, 7);
+            tmp = CLIP_SH_0_255_MAX_SATU(tmp);
+            out = (v16u8) __msa_pckev_b((v16i8) tmp, (v16i8) tmp);
+            ST8x1_UB(out, dst_tmp);
+            dst_tmp += dst_stride;
 
-            ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l);
-            dst1_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r,
-                                    filt_h0, filt_h1, filt_h2, filt_h3);
-            dst1_l = HEVC_FILT_8TAP(dst21_l, dst43_l, dst65_l, dst87_l,
-                                    filt_h0, filt_h1, filt_h2, filt_h3);
-            dst1_r >>= 6;
-            dst1_l >>= 6;
-
-            UNPCK_SH_SW(in0, in0_r, in0_l);
-            UNPCK_SH_SW(in1, in1_r, in1_l);
-            in0_r = __msa_adds_s_w(in0_r, dst0_r);
-            in0_l = __msa_adds_s_w(in0_l, dst0_l);
-            in1_r = __msa_adds_s_w(in1_r, dst1_r);
-            in1_l = __msa_adds_s_w(in1_l, dst1_l);
-            SRARI_W4_SW(in0_r, in0_l, in1_r, in1_l, 7);
-            in0_r = CLIP_SW_0_255(in0_r);
-            in0_l = CLIP_SW_0_255(in0_l);
-            in1_r = CLIP_SW_0_255(in1_r);
-            in1_l = CLIP_SW_0_255(in1_l);
-
-            HEVC_PCK_SW_SB4(in0_l, in0_r, in1_l, in1_r, dst0_r);
-            ST8x2_UB(dst0_r, dst_tmp, dst_stride);
-            dst_tmp += (2 * dst_stride);
-
-            dst10_r = dst32_r;
-            dst32_r = dst54_r;
-            dst54_r = dst76_r;
-            dst10_l = dst32_l;
-            dst32_l = dst54_l;
-            dst54_l = dst76_l;
-            dst21_r = dst43_r;
-            dst43_r = dst65_r;
-            dst65_r = dst87_r;
-            dst21_l = dst43_l;
-            dst43_l = dst65_l;
-            dst65_l = dst87_l;
-            dst6 = dst8;
+            dst0 = dst1;
+            dst1 = dst2;
+            dst2 = dst3;
+            dst3 = dst4;
+            dst4 = dst5;
+            dst5 = dst6;
+            dst6 = dst7;
         }
 
         src0_ptr += 8;
@@ -1742,7 +1877,7 @@
                                  const int8_t *filter_y,
                                  int32_t height)
 {
-    hevc_hv_bi_8t_8multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
+    hevc_hv_bi_8t_8multx1mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                   dst, dst_stride, filter_x, filter_y,
                                   height, 8);
 }
@@ -1757,12 +1892,208 @@
                                   const int8_t *filter_y,
                                   int32_t height)
 {
-    hevc_hv_bi_8t_8multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                  dst, dst_stride, filter_x, filter_y,
-                                  height, 8);
+    uint32_t loop_cnt;
+    uint8_t *src0_ptr_tmp, *dst_tmp;
+    int16_t *src1_ptr_tmp;
+    uint64_t tp0, tp1;
+    v16u8 out;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 mask0, mask1, mask2, mask3, mask4, mask5, mask6, mask7;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
+    v8i16 in0, in1 = { 0 }, out0, out1, tmp, filter_vec, const_vec;
+    v8i16 filt0, filt1, filt2, filt3, filt_h0, filt_h1, filt_h2, filt_h3;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 dst30, dst41, dst52, dst63, dst66, dst97, dst108;
+    v8i16 dst10, dst32, dst54, dst76, dst98, dst21, dst43, dst65, dst87, dst109;
+    v8i16 dst10_r, dst32_r, dst54_r, dst76_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst76_l;
+    v4i32 dst0_r, dst0_l, tmp0, tmp1, tmp2, tmp3;
 
-    hevc_hv_bi_8t_4w_msa(src0_ptr + 8, src_stride, src1_ptr + 8, src2_stride,
-                         dst + 8, dst_stride, filter_x, filter_y, height);
+    src0_ptr -= ((3 * src_stride) + 3);
+
+    const_vec = __msa_ldi_h(128);
+    const_vec <<= 6;
+
+    filter_vec = LD_SH(filter_x);
+    SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    filter_vec = LD_SH(filter_y);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W4_SH(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    src0_ptr_tmp = src0_ptr;
+    dst_tmp = dst;
+    src1_ptr_tmp = src1_ptr;
+
+    LD_SB7(src0_ptr_tmp, src_stride, src0, src1, src2, src3, src4, src5,
+           src6);
+    src0_ptr_tmp += (7 * src_stride);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+
+    /* row 0 row 1 row 2 row 3 */
+    VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3, vec0, vec1, vec2,
+               vec3);
+    VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3, vec4, vec5, vec6,
+               vec7);
+    VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3, vec8, vec9, vec10,
+               vec11);
+    VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3, vec12, vec13, vec14,
+               vec15);
+    dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                             filt3);
+    dst1 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                             filt3);
+    dst2 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                             filt3);
+    dst3 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1,
+                             filt2, filt3);
+    VSHF_B4_SB(src4, src4, mask0, mask1, mask2, mask3, vec0, vec1, vec2,
+               vec3);
+    VSHF_B4_SB(src5, src5, mask0, mask1, mask2, mask3, vec4, vec5, vec6,
+               vec7);
+    VSHF_B4_SB(src6, src6, mask0, mask1, mask2, mask3, vec8, vec9, vec10,
+               vec11);
+    dst4 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                             filt3);
+    dst5 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                             filt3);
+    dst6 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                             filt3);
+
+    for (loop_cnt = 16; loop_cnt--;) {
+        src7 = LD_SB(src0_ptr_tmp);
+        src7 = (v16i8) __msa_xori_b((v16u8) src7, 128);
+        src0_ptr_tmp += src_stride;
+
+        in0 = LD_SH(src1_ptr_tmp);
+        src1_ptr_tmp += src2_stride;
+
+        VSHF_B4_SB(src7, src7, mask0, mask1, mask2, mask3, vec0, vec1, vec2,
+                   vec3);
+        dst7 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1,
+                                 filt2, filt3);
+        ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
+        ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+        ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+        ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
+        dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst0_l = HEVC_FILT_8TAP(dst10_l, dst32_l, dst54_l, dst76_l, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst0_r >>= 6;
+        dst0_l >>= 6;
+
+        tmp = __msa_pckev_h((v8i16) dst0_l, (v8i16) dst0_r);
+        ADDS_SH2_SH(tmp, in0, tmp, const_vec, tmp, tmp);
+        tmp = __msa_srari_h(tmp, 7);
+        tmp = CLIP_SH_0_255_MAX_SATU(tmp);
+        out = (v16u8) __msa_pckev_b((v16i8) tmp, (v16i8) tmp);
+        ST8x1_UB(out, dst_tmp);
+        dst_tmp += dst_stride;
+
+        dst0 = dst1;
+        dst1 = dst2;
+        dst2 = dst3;
+        dst3 = dst4;
+        dst4 = dst5;
+        dst5 = dst6;
+        dst6 = dst7;
+    }
+
+    src0_ptr += 8;
+    dst += 8;
+    src1_ptr += 8;
+
+    mask4 = LD_SB(ff_hevc_mask_arr + 16);
+    mask5 = mask4 + 2;
+    mask6 = mask4 + 4;
+    mask7 = mask4 + 6;
+
+    LD_SB7(src0_ptr, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    src0_ptr += (7 * src_stride);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+
+    /* row 0 row 1 row 2 row 3 */
+    VSHF_B4_SB(src0, src3, mask4, mask5, mask6, mask7, vec0, vec1, vec2, vec3);
+    VSHF_B4_SB(src1, src4, mask4, mask5, mask6, mask7, vec4, vec5, vec6, vec7);
+    VSHF_B4_SB(src2, src5, mask4, mask5, mask6, mask7,
+               vec8, vec9, vec10, vec11);
+    VSHF_B4_SB(src3, src6, mask4, mask5, mask6, mask7,
+               vec12, vec13, vec14, vec15);
+    dst30 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                              filt3);
+    dst41 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                              filt3);
+    dst52 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                              filt3);
+    dst63 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2,
+                              filt3);
+
+    ILVRL_H2_SH(dst41, dst30, dst10, dst43);
+    ILVRL_H2_SH(dst52, dst41, dst21, dst54);
+    ILVRL_H2_SH(dst63, dst52, dst32, dst65);
+
+    dst66 = (v8i16) __msa_splati_d((v2i64) dst63, 1);
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src0_ptr, src_stride, src7, src8, src9, src10);
+        src0_ptr += (4 * src_stride);
+        XORI_B4_128_SB(src7, src8, src9, src10);
+
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        INSERT_D2_SH(tp0, tp1, in0);
+        src1_ptr += (2 * src2_stride);
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        INSERT_D2_SH(tp0, tp1, in1);
+        src1_ptr += (2 * src2_stride);
+
+        VSHF_B4_SB(src7, src9, mask4, mask5, mask6, mask7, vec0, vec1, vec2,
+                   vec3);
+        VSHF_B4_SB(src8, src10, mask4, mask5, mask6, mask7, vec4, vec5, vec6,
+                   vec7);
+        dst97 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                  filt3);
+        dst108 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                   filt3);
+
+        dst76 = __msa_ilvr_h(dst97, dst66);
+        ILVRL_H2_SH(dst108, dst97, dst87, dst109);
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst97, 1);
+        dst98 = __msa_ilvr_h(dst66, dst108);
+
+        tmp0 = HEVC_FILT_8TAP(dst10, dst32, dst54, dst76, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
+        tmp1 = HEVC_FILT_8TAP(dst21, dst43, dst65, dst87, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
+        tmp2 = HEVC_FILT_8TAP(dst32, dst54, dst76, dst98, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
+        tmp3 = HEVC_FILT_8TAP(dst43, dst65, dst87, dst109, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
+        SRA_4V(tmp0, tmp1, tmp2, tmp3, 6);
+        PCKEV_H2_SH(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ADDS_SH2_SH(out0, in0, out1, in1, out0, out1);
+        ADDS_SH2_SH(out0, const_vec, out1, const_vec, out0, out1);
+        SRARI_H2_SH(out0, out1, 7);
+        CLIP_SH2_0_255_MAX_SATU(out0, out1);
+        out = (v16u8) __msa_pckev_b((v16i8) out1, (v16i8) out0);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        dst10 = dst54;
+        dst32 = dst76;
+        dst54 = dst98;
+        dst21 = dst65;
+        dst43 = dst87;
+        dst65 = dst109;
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst108, 1);
+    }
 }
 
 static void hevc_hv_bi_8t_16w_msa(uint8_t *src0_ptr,
@@ -1775,7 +2106,7 @@
                                   const int8_t *filter_y,
                                   int32_t height)
 {
-    hevc_hv_bi_8t_8multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
+    hevc_hv_bi_8t_8multx1mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                   dst, dst_stride, filter_x, filter_y,
                                   height, 16);
 }
@@ -1790,7 +2121,7 @@
                                   const int8_t *filter_y,
                                   int32_t height)
 {
-    hevc_hv_bi_8t_8multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
+    hevc_hv_bi_8t_8multx1mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                   dst, dst_stride, filter_x, filter_y,
                                   height, 24);
 }
@@ -1805,7 +2136,7 @@
                                   const int8_t *filter_y,
                                   int32_t height)
 {
-    hevc_hv_bi_8t_8multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
+    hevc_hv_bi_8t_8multx1mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                   dst, dst_stride, filter_x, filter_y,
                                   height, 32);
 }
@@ -1820,7 +2151,7 @@
                                   const int8_t *filter_y,
                                   int32_t height)
 {
-    hevc_hv_bi_8t_8multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
+    hevc_hv_bi_8t_8multx1mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                   dst, dst_stride, filter_x, filter_y,
                                   height, 48);
 }
@@ -1835,7 +2166,7 @@
                                   const int8_t *filter_y,
                                   int32_t height)
 {
-    hevc_hv_bi_8t_8multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
+    hevc_hv_bi_8t_8multx1mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                   dst, dst_stride, filter_x, filter_y,
                                   height, 64);
 }
@@ -1852,7 +2183,7 @@
     v8i16 filt0, filt1;
     v16i8 src0, src1, dst0, vec0, vec1;
     v8i16 in0, in1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[16]);
     v16i8 mask1;
     v8i16 tmp0;
     v8i16 filter_vec, const_vec;
@@ -1895,7 +2226,8 @@
     v8i16 filt0, filt1;
     v16i8 src0, src1, src2, src3, dst0, vec0, vec1;
     v8i16 in0, in1, in2, in3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+    v16i8 vec2, vec3;
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[16]);
     v16i8 mask1;
     v8i16 tmp0, tmp1;
     v8i16 filter_vec, const_vec;
@@ -1916,12 +2248,12 @@
     ILVR_D2_SH(in1, in0, in3, in2, in0, in1);
     XORI_B4_128_SB(src0, src1, src2, src3);
 
-    VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
     tmp0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, tmp0, tmp0);
-    VSHF_B2_SB(src2, src3, src2, src3, mask0, mask1, vec0, vec1);
     tmp1 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, tmp1, tmp1);
+    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0, vec1);
+    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2, vec3);
+    DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt1, filt1, tmp0, tmp1,
+                 tmp0, tmp1);
     HEVC_BI_RND_CLIP2(in0, in1, tmp0, tmp1, 7, tmp0, tmp1);
     dst0 = __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
 
@@ -1942,8 +2274,8 @@
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
     v16i8 dst0, dst1;
     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
-    v16i8 mask1, vec0, vec1;
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[16]);
+    v16i8 mask1, vec0, vec1, vec2, vec3;
     v8i16 tmp0, tmp1, tmp2, tmp3;
     v8i16 filter_vec, const_vec;
 
@@ -1969,18 +2301,18 @@
         ILVR_D2_SH(in5, in4, in7, in6, in2, in3);
         XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
 
-        VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
         tmp0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, tmp0, tmp0);
-        VSHF_B2_SB(src2, src3, src2, src3, mask0, mask1, vec0, vec1);
         tmp1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, tmp1, tmp1);
-        VSHF_B2_SB(src4, src5, src4, src5, mask0, mask1, vec0, vec1);
         tmp2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, tmp2, tmp2);
-        VSHF_B2_SB(src6, src7, src6, src7, mask0, mask1, vec0, vec1);
         tmp3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, tmp3, tmp3);
+        VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src4, src5, src6, src7, mask0, mask0, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0,
+                     tmp1, tmp2, tmp3);
+        VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src5, src6, src7, mask1, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, tmp0,
+                     tmp1, tmp2, tmp3);
 
         HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
                           tmp0, tmp1, tmp2, tmp3, 7, tmp0, tmp1, tmp2, tmp3);
@@ -2026,9 +2358,9 @@
     v8i16 filt0, filt1;
     v16i8 src0, src1, src2, src3;
     v8i16 in0, in1, in2, in3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1;
-    v16i8 vec0, vec1;
+    v16i8 vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1, dst2, dst3;
     v8i16 filter_vec, const_vec;
 
@@ -2049,18 +2381,18 @@
         src1_ptr += (4 * src2_stride);
         XORI_B4_128_SB(src0, src1, src2, src3);
 
-        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
         dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
         dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
         dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
         dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
 
         HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
                           dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
@@ -2083,8 +2415,8 @@
     v8i16 filt0, filt1;
     v16i8 src0, src1;
     v8i16 in0, in1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
-    v16i8 mask1, vec0, vec1;
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
+    v16i8 mask1, vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1;
     v8i16 filter_vec, const_vec;
 
@@ -2102,12 +2434,12 @@
     LD_SH2(src1_ptr, src2_stride, in0, in1);
     XORI_B2_128_SB(src0, src1);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
     dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
     dst1 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+    VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+    VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec2, vec3);
+    DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt1, filt1, dst0, dst1,
+                 dst0, dst1);
     HEVC_BI_RND_CLIP2(in0, in1, dst0, dst1, 7, dst0, dst1);
 
     dst0 = (v8i16) __msa_pckev_b((v16i8) dst1, (v16i8) dst0);
@@ -2126,9 +2458,9 @@
     v8i16 filt0, filt1;
     v16i8 src0, src1, src2, src3, src4, src5;
     v8i16 in0, in1, in2, in3, in4, in5;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1;
-    v16i8 vec0, vec1;
+    v16i8 vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
     v8i16 filter_vec, const_vec;
 
@@ -2148,24 +2480,25 @@
     LD_SH2(src1_ptr, src2_stride, in4, in5);
     XORI_B6_128_SB(src0, src1, src2, src3, src4, src5);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
     dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
     dst1 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
     dst2 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
     dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+    VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+    DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0, dst1,
+                 dst2, dst3);
+    VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0, vec1);
+    VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2, vec3);
+    DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0, dst1,
+                 dst2, dst3);
     dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
     dst5 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
+
+    VSHF_B2_SB(src4, src4, src5, src5, mask0, mask0, vec0, vec1);
+    VSHF_B2_SB(src4, src4, src5, src5, mask1, mask1, vec2, vec3);
+    DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt1, filt1, dst4, dst5,
+                 dst4, dst5);
 
     HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
                       dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
@@ -2191,9 +2524,9 @@
     v8i16 filt0, filt1;
     v16i8 src0, src1, src2, src3;
     v8i16 in0, in1, in2, in3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
-    v16i8 vec0, vec1;
+    v16i8 vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1, dst2, dst3;
     v8i16 filter_vec, const_vec;
 
@@ -2214,18 +2547,18 @@
         src1_ptr += (4 * src2_stride);
         XORI_B4_128_SB(src0, src1, src2, src3);
 
-        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
         dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
         dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
         dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
         dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
 
         HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
                           dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
@@ -2271,12 +2604,12 @@
     v8i16 filt0, filt1;
     v16i8 src0, src1, src2, src3;
     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask2 = {
         8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
     };
     v16i8 mask1, mask3;
-    v16i8 vec0, vec1;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
     v8i16 filter_vec, const_vec;
 
@@ -2301,24 +2634,24 @@
         ILVR_D2_SH(in5, in4, in7, in6, in4, in5);
         XORI_B4_128_SB(src0, src1, src2, src3);
 
-        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
         dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
         dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
         dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
         dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-        VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
         dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-        VSHF_B2_SB(src2, src3, src2, src3, mask2, mask3, vec0, vec1);
         dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+        VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4, vec5);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        DPADD_SB2_SH(vec4, vec5, filt0, filt0, dst4, dst5);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2, vec3);
+        VSHF_B2_SB(src0, src1, src2, src3, mask3, mask3, vec4, vec5);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
+        DPADD_SB2_SH(vec4, vec5, filt1, filt1, dst4, dst5);
 
         HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
                           dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
@@ -2341,13 +2674,11 @@
                                   int32_t height)
 {
     uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
-    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+    v16i8 src0, src1, src2, src3, vec0, vec1, vec2, vec3;
+    v8i16 in0, in1, in2, in3, dst0, dst1, dst2, dst3;
     v8i16 filt0, filt1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
-    v16i8 vec0, vec1;
     v8i16 filter_vec, const_vec;
 
     src0_ptr -= 1;
@@ -2360,49 +2691,36 @@
 
     mask1 = mask0 + 2;
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB4(src0_ptr, src_stride, src0, src2, src4, src6);
-        LD_SB4(src0_ptr + 8, src_stride, src1, src3, src5, src7);
-        src0_ptr += (4 * src_stride);
-        LD_SH4(src1_ptr, src2_stride, in0, in2, in4, in6);
-        LD_SH4(src1_ptr + 8, src2_stride, in1, in3, in5, in7);
-        src1_ptr += (4 * src2_stride);
-        XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        LD_SB2(src0_ptr, src_stride, src0, src2);
+        LD_SB2(src0_ptr + 8, src_stride, src1, src3);
+        src0_ptr += (2 * src_stride);
+        LD_SH2(src1_ptr, src2_stride, in0, in2);
+        LD_SH2(src1_ptr + 8, src2_stride, in1, in3);
+        src1_ptr += (2 * src2_stride);
 
-        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
+        XORI_B4_128_SB(src0, src1, src2, src3);
+
         dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
         dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
         dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
         dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-        dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-        dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-        dst6 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst6, dst6);
-        VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
-        dst7 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst7, dst7);
+
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
 
         HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
                           dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
-        HEVC_BI_RND_CLIP4(in4, in5, in6, in7,
-                          dst4, dst5, dst6, dst7, 7, dst4, dst5, dst6, dst7);
 
-        PCKEV_B4_SH(dst1, dst0, dst3, dst2,
-                    dst5, dst4, dst7, dst6, dst0, dst1, dst2, dst3);
-        ST_SH4(dst0, dst1, dst2, dst3, dst, dst_stride);
-        dst += (4 * dst_stride);
+        PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
+        ST_SH2(dst0, dst1, dst, dst_stride);
+        dst += (2 * dst_stride);
     }
 }
 
@@ -2421,9 +2739,9 @@
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
     v8i16 filt0, filt1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1, mask2, mask3;
-    v16i8 vec0, vec1;
+    v16i8 vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
     v8i16 filter_vec, const_vec;
 
@@ -2451,30 +2769,31 @@
         src1_ptr += (4 * src2_stride);
         XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
 
-        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
         dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-        VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
         dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
         dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-        VSHF_B2_SB(src2, src3, src2, src3, mask2, mask3, vec0, vec1);
         dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src0, src0, src0, src1, mask0, mask2, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src2, src3, mask0, mask2, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask1, mask3, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src2, src3, mask1, mask3, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
+
         dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-        VSHF_B2_SB(src4, src5, src4, src5, mask2, mask3, vec0, vec1);
         dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
         dst6 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst6, dst6);
-        VSHF_B2_SB(src6, src7, src6, src7, mask2, mask3, vec0, vec1);
         dst7 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst7, dst7);
+        VSHF_B2_SB(src4, src4, src4, src5, mask0, mask2, vec0, vec1);
+        VSHF_B2_SB(src6, src6, src6, src7, mask0, mask2, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst4,
+                     dst5, dst6, dst7);
+        VSHF_B2_SB(src4, src4, src4, src5, mask1, mask3, vec0, vec1);
+        VSHF_B2_SB(src6, src6, src6, src7, mask1, mask3, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst4,
+                     dst5, dst6, dst7);
 
         HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
                           dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
@@ -2489,18 +2808,18 @@
         LD_SH4(src1_ptr_tmp, src2_stride, in0, in1, in2, in3);
         src1_ptr_tmp += (4 * src2_stride);
 
-        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
         dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
         dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
-        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
         dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-        VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
         dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+        VSHF_B2_SB(src1, src1, src3, src3, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src5, src5, src7, src7, mask0, mask0, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src1, src1, src3, src3, mask1, mask1, vec0, vec1);
+        VSHF_B2_SB(src5, src5, src7, src7, mask1, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
 
         HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
                           dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
@@ -2524,10 +2843,10 @@
     v16i8 src0, src1, src2;
     v8i16 in0, in1, in2, in3;
     v8i16 filt0, filt1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1, mask2, mask3;
     v8i16 dst0, dst1, dst2, dst3;
-    v16i8 vec0, vec1;
+    v16i8 vec0, vec1, vec2, vec3;
     v8i16 filter_vec, const_vec;
 
     src0_ptr -= 1;
@@ -2542,7 +2861,7 @@
     mask2 = mask0 + 8;
     mask3 = mask0 + 10;
 
-    for (loop_cnt = (height >> 1); loop_cnt--;) {
+    for (loop_cnt = height; loop_cnt--;) {
         LD_SB2(src0_ptr, 16, src0, src1);
         src2 = LD_SB(src0_ptr + 24);
         src0_ptr += src_stride;
@@ -2550,45 +2869,18 @@
         src1_ptr += src2_stride;
         XORI_B3_128_SB(src0, src1, src2);
 
-        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
         dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-        VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
         dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
-        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
         dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
         dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
-        HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
-                          dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
-
-        PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
-        ST_SH2(dst0, dst1, dst, 16);
-        dst += dst_stride;
-
-        LD_SB2(src0_ptr, 16, src0, src1);
-        src2 = LD_SB(src0_ptr + 24);
-        src0_ptr += src_stride;
-        LD_SH4(src1_ptr, 8, in0, in1, in2, in3);
-        src1_ptr += src2_stride;
-        XORI_B3_128_SB(src0, src1, src2);
-
-        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-        VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
-        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask0, mask2, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask0, mask0, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask1, mask3, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask1, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
 
         HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
                           dst0, dst1, dst2, dst3, 7, dst0, dst1, dst2, dst3);
@@ -2800,10 +3092,9 @@
                                  const int8_t *filter,
                                  int32_t height)
 {
-    int32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4, src5;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v8i16 in0, in1, in2, in3;
-    v16i8 src10_r, src32_r, src21_r, src43_r;
+    v16i8 src10_r, src32_r, src21_r, src43_r, src54_r, src65_r;
     v8i16 dst0_r, dst1_r, dst2_r, dst3_r;
     v8i16 filt0, filt1;
     v8i16 filter_vec, const_vec;
@@ -2818,40 +3109,70 @@
 
     LD_SB3(src0_ptr, src_stride, src0, src1, src2);
     src0_ptr += (3 * src_stride);
+    LD_SB2(src0_ptr, src_stride, src3, src4);
+    src0_ptr += (2 * src_stride);
+    LD_SB2(src0_ptr, src_stride, src5, src6);
+    src0_ptr += (2 * src_stride);
+    LD_SB2(src0_ptr, src_stride, src7, src8);
+    src0_ptr += (2 * src_stride);
+    LD_SB2(src0_ptr, src_stride, src9, src10);
+    src0_ptr += (2 * src_stride);
+
+    LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
+    src1_ptr += (4 * src2_stride);
+
     XORI_B3_128_SB(src0, src1, src2);
+    XORI_B2_128_SB(src3, src4);
+    XORI_B2_128_SB(src5, src6);
+    XORI_B2_128_SB(src7, src8);
+    XORI_B2_128_SB(src9, src10);
+
     ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
+    ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB2(src0_ptr, src_stride, src3, src4);
-        src0_ptr += (2 * src_stride);
-        LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
-        src1_ptr += (4 * src2_stride);
-        XORI_B2_128_SB(src3, src4);
-        ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
+    dst0_r = const_vec;
+    DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, dst0_r, dst0_r);
+    dst1_r = const_vec;
+    DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, dst1_r, dst1_r);
 
-        dst0_r = const_vec;
-        DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, dst0_r, dst0_r);
-        dst1_r = const_vec;
-        DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, dst1_r, dst1_r);
+    ILVR_B2_SB(src5, src4, src6, src5, src54_r, src65_r);
 
-        LD_SB2(src0_ptr, src_stride, src5, src2);
-        src0_ptr += (2 * src_stride);
-        XORI_B2_128_SB(src5, src2);
-        ILVR_B2_SB(src5, src4, src2, src5, src10_r, src21_r);
+    dst2_r = const_vec;
+    DPADD_SB2_SH(src32_r, src54_r, filt0, filt1, dst2_r, dst2_r);
+    dst3_r = const_vec;
+    DPADD_SB2_SH(src43_r, src65_r, filt0, filt1, dst3_r, dst3_r);
 
-        dst2_r = const_vec;
-        DPADD_SB2_SH(src32_r, src10_r, filt0, filt1, dst2_r, dst2_r);
-        dst3_r = const_vec;
-        DPADD_SB2_SH(src43_r, src21_r, filt0, filt1, dst3_r, dst3_r);
+    HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
+                      dst0_r, dst1_r, dst2_r, dst3_r, 7,
+                      dst0_r, dst1_r, dst2_r, dst3_r);
 
-        HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
-                          dst0_r, dst1_r, dst2_r, dst3_r, 7,
-                          dst0_r, dst1_r, dst2_r, dst3_r);
+    PCKEV_B2_SH(dst1_r, dst0_r, dst3_r, dst2_r, dst0_r, dst1_r);
+    ST6x4_UB(dst0_r, dst1_r, dst, dst_stride);
+    dst += (4 * dst_stride);
 
-        PCKEV_B2_SH(dst1_r, dst0_r, dst3_r, dst2_r, dst0_r, dst1_r);
-        ST6x4_UB(dst0_r, dst1_r, dst, dst_stride);
-        dst += (4 * dst_stride);
-    }
+    LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
+    src1_ptr += (4 * src2_stride);
+    ILVR_B2_SB(src7, src6, src8, src7, src32_r, src43_r);
+
+    dst0_r = const_vec;
+    DPADD_SB2_SH(src54_r, src32_r, filt0, filt1, dst0_r, dst0_r);
+    dst1_r = const_vec;
+    DPADD_SB2_SH(src65_r, src43_r, filt0, filt1, dst1_r, dst1_r);
+
+    ILVR_B2_SB(src9, src8, src10, src9, src54_r, src65_r);
+
+    dst2_r = const_vec;
+    DPADD_SB2_SH(src32_r, src54_r, filt0, filt1, dst2_r, dst2_r);
+    dst3_r = const_vec;
+    DPADD_SB2_SH(src43_r, src65_r, filt0, filt1, dst3_r, dst3_r);
+
+    HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
+                      dst0_r, dst1_r, dst2_r, dst3_r, 7,
+                      dst0_r, dst1_r, dst2_r, dst3_r);
+
+    PCKEV_B2_SH(dst1_r, dst0_r, dst3_r, dst2_r, dst0_r, dst1_r);
+    ST6x4_UB(dst0_r, dst1_r, dst, dst_stride);
+    dst += (4 * dst_stride);
 }
 
 static void hevc_vt_bi_4t_8x2_msa(uint8_t *src0_ptr,
@@ -3053,12 +3374,12 @@
                                   int32_t height)
 {
     int32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4, src5;
+    v16i8 src0, src1, src2, src3, src4, src5, src6;
     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
-    v16i8 src10_r, src32_r, src21_r, src43_r;
+    v16i8 src10_r, src32_r, src21_r, src43_r, src54_r, src65_r;
     v8i16 dst0_r, dst1_r, dst2_r, dst3_r;
     v16i8 src10_l, src32_l, src54_l, src21_l, src43_l, src65_l;
-    v16i8 src2110, src4332;
+    v16i8 src2110, src4332, src6554;
     v8i16 dst0_l, dst1_l, filt0, filt1;
     v8i16 filter_vec, const_vec;
 
@@ -3080,15 +3401,21 @@
     for (loop_cnt = (height >> 2); loop_cnt--;) {
         LD_SB2(src0_ptr, src_stride, src3, src4);
         src0_ptr += (2 * src_stride);
+        LD_SB2(src0_ptr, src_stride, src5, src6);
+        src0_ptr += (2 * src_stride);
         LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
         LD_SH4((src1_ptr + 8), src2_stride, in4, in5, in6, in7);
         src1_ptr += (4 * src2_stride);
         ILVR_D2_SH(in5, in4, in7, in6, in4, in5);
         XORI_B2_128_SB(src3, src4);
+        XORI_B2_128_SB(src5, src6);
 
         ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
         ILVL_B2_SB(src3, src2, src4, src3, src32_l, src43_l);
         src4332 = (v16i8) __msa_ilvr_d((v2i64) src43_l, (v2i64) src32_l);
+        ILVR_B2_SB(src5, src4, src6, src5, src54_r, src65_r);
+        ILVL_B2_SB(src5, src4, src6, src5, src54_l, src65_l);
+        src6554 = (v16i8) __msa_ilvr_d((v2i64) src65_l, (v2i64) src54_l);
 
         dst0_r = const_vec;
         DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, dst0_r, dst0_r);
@@ -3096,21 +3423,12 @@
         DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, dst1_r, dst1_r);
         dst0_l = const_vec;
         DPADD_SB2_SH(src2110, src4332, filt0, filt1, dst0_l, dst0_l);
-
-        LD_SB2(src0_ptr, src_stride, src5, src2);
-        src0_ptr += (2 * src_stride);
-        XORI_B2_128_SB(src5, src2);
-
-        ILVR_B2_SB(src5, src4, src2, src5, src10_r, src21_r);
-        ILVL_B2_SB(src5, src4, src2, src5, src54_l, src65_l);
-        src2110 = (v16i8) __msa_ilvr_d((v2i64) src65_l, (v2i64) src54_l);
-
         dst2_r = const_vec;
-        DPADD_SB2_SH(src32_r, src10_r, filt0, filt1, dst2_r, dst2_r);
+        DPADD_SB2_SH(src32_r, src54_r, filt0, filt1, dst2_r, dst2_r);
         dst3_r = const_vec;
-        DPADD_SB2_SH(src43_r, src21_r, filt0, filt1, dst3_r, dst3_r);
+        DPADD_SB2_SH(src43_r, src65_r, filt0, filt1, dst3_r, dst3_r);
         dst1_l = const_vec;
-        DPADD_SB2_SH(src4332, src2110, filt0, filt1, dst1_l, dst1_l);
+        DPADD_SB2_SH(src4332, src6554, filt0, filt1, dst1_l, dst1_l);
         HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
                           dst0_r, dst1_r, dst2_r, dst3_r, 7,
                           dst0_r, dst1_r, dst2_r, dst3_r);
@@ -3120,6 +3438,11 @@
         dst0_l = (v8i16) __msa_pckev_b((v16i8) dst1_l, (v16i8) dst0_l);
         ST12x4_UB(dst0_r, dst1_r, dst0_l, dst, dst_stride);
         dst += (4 * dst_stride);
+
+        src2 = src6;
+        src10_r = src54_r;
+        src21_r = src65_r;
+        src2110 = src6554;
     }
 }
 
@@ -3449,20 +3772,20 @@
                                   uint8_t *dst,
                                   int32_t dst_stride,
                                   const int8_t *filter_x,
-                                  const int8_t *filter_y,
-                                  int32_t height)
+                                  const int8_t *filter_y)
 {
-    v8i16 in0, in1;
+    uint64_t tp0, tp1;
+    v16u8 out;
+    v8i16 in0 = { 0 };
     v16i8 src0, src1, src2, src3, src4;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
     v16i8 mask1;
     v8i16 filter_vec, const_vec;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4;
-    v4i32 dst0_r, dst1_r;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
+    v8i16 dst20, dst31, dst42, dst10, dst32, dst21, dst43, tmp;
+    v4i32 dst0, dst1;
 
     src0_ptr -= (src_stride + 1);
 
@@ -3470,56 +3793,43 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
     const_vec = __msa_ldi_h(128);
     const_vec <<= 6;
 
-    LD_SB3(src0_ptr, src_stride, src0, src1, src2);
-    src0_ptr += (3 * src_stride);
-    XORI_B3_128_SB(src0, src1, src2);
+    LD_SB5(src0_ptr, src_stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-    ILVR_H2_SH(dst1, dst0, dst2, dst1, dst10_r, dst21_r);
+    LD2(src1_ptr, src2_stride, tp0, tp1);
+    INSERT_D2_SH(tp0, tp1, in0);
+    in0 = __msa_adds_s_h(in0, const_vec);
 
-    LD_SB2(src0_ptr, src_stride, src3, src4);
-    LD_SH2(src1_ptr, src2_stride, in0, in1);
-    in0 = (v8i16) __msa_ilvr_d((v2i64) in1, (v2i64) in0);
-    XORI_B2_128_SB(src3, src4);
-    /* row 3 */
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-    dst32_r = __msa_ilvr_h(dst3, dst2);
-    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-    dst0_r >>= 6;
-    /* row 4 */
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-    dst43_r = __msa_ilvr_h(dst4, dst3);
-    dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-    dst1_r >>= 6;
-    dst0_r = (v4i32) __msa_pckev_h((v8i16) dst1_r, (v8i16) dst0_r);
-    dst0_r = (v4i32) __msa_adds_s_h((v8i16) dst0_r, in0);
-    dst0_r = (v4i32) __msa_srari_h((v8i16) dst0_r, 7);
-    dst0_r = (v4i32) CLIP_SH_0_255(dst0_r);
+    VSHF_B2_SB(src0, src2, src0, src2, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src3, src1, src3, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src4, src2, src4, mask0, mask1, vec4, vec5);
 
-    dst0_r = (v4i32) __msa_pckev_b((v16i8) dst0_r, (v16i8) dst0_r);
-    ST4x2_UB(dst0_r, dst, dst_stride);
+    dst20 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst31 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst42 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+
+    ILVRL_H2_SH(dst31, dst20, dst10, dst32);
+    ILVRL_H2_SH(dst42, dst31, dst21, dst43);
+
+    dst0 = HEVC_FILT_4TAP(dst10, dst32, filt_h0, filt_h1);
+    dst1 = HEVC_FILT_4TAP(dst21, dst43, filt_h0, filt_h1);
+    dst0 >>= 6;
+    dst1 >>= 6;
+    tmp = __msa_pckev_h((v8i16) dst1, (v8i16) dst0);
+    tmp = __msa_adds_s_h(tmp, in0);
+    tmp = __msa_srari_h(tmp, 7);
+    tmp = CLIP_SH_0_255_MAX_SATU(tmp);
+    out = (v16u8) __msa_pckev_b((v16i8) tmp, (v16i8) tmp);
+    ST4x2_UB(out, dst, dst_stride);
 }
 
 static void hevc_hv_bi_4t_4x4_msa(uint8_t *src0_ptr,
@@ -3529,21 +3839,22 @@
                                   uint8_t *dst,
                                   int32_t dst_stride,
                                   const int8_t *filter_x,
-                                  const int8_t *filter_y,
-                                  int32_t height)
+                                  const int8_t *filter_y)
 {
-    v8i16 in0, in1, in2, in3;
+    uint64_t tp0, tp1;
+    v16u8 out;
     v16i8 src0, src1, src2, src3, src4, src5, src6;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
     v16i8 mask1;
     v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-    v8i16 dst0_r, dst1_r;
-    v4i32 tmp0, tmp1, tmp2, tmp3;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 tmp0, tmp1;
+    v8i16 in0 = { 0 }, in1 = { 0 };
+    v8i16 dst30, dst41, dst52, dst63;
+    v8i16 dst10, dst32, dst54, dst21, dst43, dst65;
+    v4i32 dst0, dst1, dst2, dst3;
 
     src0_ptr -= (src_stride + 1);
 
@@ -3551,69 +3862,50 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
+    LD_SB7(src0_ptr, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+
     const_vec = __msa_ldi_h(128);
     const_vec <<= 6;
 
-    LD_SB3(src0_ptr, src_stride, src0, src1, src2);
-    src0_ptr += (3 * src_stride);
-    XORI_B3_128_SB(src0, src1, src2);
+    LD2(src1_ptr, src2_stride, tp0, tp1);
+    src1_ptr += 2 * src2_stride;
+    INSERT_D2_SH(tp0, tp1, in0);
+    LD2(src1_ptr, src2_stride, tp0, tp1);
+    INSERT_D2_SH(tp0, tp1, in1);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-    ILVR_H2_SH(dst1, dst0, dst2, dst1, dst10_r, dst21_r);
+    ADDS_SH2_SH(in0, const_vec, in1, const_vec, in0, in1);
 
-    LD_SB4(src0_ptr, src_stride, src3, src4, src5, src6);
-    LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
-    ILVR_D2_SH(in1, in0, in3, in2, in0, in1);
-    XORI_B4_128_SB(src3, src4, src5, src6);
-    /* row 3 */
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-    dst32_r = __msa_ilvr_h(dst3, dst2);
-    tmp0 = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-    tmp0 >>= 6;
-    /* row 4 */
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-    dst43_r = __msa_ilvr_h(dst4, dst3);
-    tmp1 = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-    tmp1 >>= 6;
-    /* row 5 */
-    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-    dst5 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-    dst10_r = __msa_ilvr_h(dst5, dst4);
-    tmp2 = HEVC_FILT_4TAP(dst32_r, dst10_r, filt_h0, filt_h1);
-    tmp2 >>= 6;
-    /* row 6 */
-    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-    dst21_r = __msa_ilvr_h(dst2, dst5);
-    tmp3 = HEVC_FILT_4TAP(dst43_r, dst21_r, filt_h0, filt_h1);
-    tmp3 >>= 6;
-    PCKEV_H2_SH(tmp1, tmp0, tmp3, tmp2, dst0_r, dst1_r);
-    HEVC_BI_RND_CLIP2(in0, in1, dst0_r, dst1_r, 7, dst0_r, dst1_r);
+    VSHF_B2_SB(src0, src3, src0, src3, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src4, src1, src4, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src5, src2, src5, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src3, src6, src3, src6, mask0, mask1, vec6, vec7);
 
-    dst0_r = (v8i16) __msa_pckev_b((v16i8) dst1_r, (v16i8) dst0_r);
-    ST4x4_UB(dst0_r, dst0_r, 0, 1, 2, 3, dst, dst_stride);
-    dst += (4 * dst_stride);
+    dst30 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst41 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst52 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dst63 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+
+    ILVRL_H2_SH(dst41, dst30, dst10, dst43);
+    ILVRL_H2_SH(dst52, dst41, dst21, dst54);
+    ILVRL_H2_SH(dst63, dst52, dst32, dst65);
+    dst0 = HEVC_FILT_4TAP(dst10, dst32, filt_h0, filt_h1);
+    dst1 = HEVC_FILT_4TAP(dst21, dst43, filt_h0, filt_h1);
+    dst2 = HEVC_FILT_4TAP(dst32, dst54, filt_h0, filt_h1);
+    dst3 = HEVC_FILT_4TAP(dst43, dst65, filt_h0, filt_h1);
+    SRA_4V(dst0, dst1, dst2, dst3, 6);
+    PCKEV_H2_SH(dst1, dst0, dst3, dst2, tmp0, tmp1);
+    ADDS_SH2_SH(tmp0, in0, tmp1, in1, tmp0, tmp1);
+    SRARI_H2_SH(tmp0, tmp1, 7);
+    CLIP_SH2_0_255_MAX_SATU(tmp0, tmp1);
+    out = (v16u8) __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
 }
 
 static void hevc_hv_bi_4t_4multx8mult_msa(uint8_t *src0_ptr,
@@ -3627,18 +3919,21 @@
                                           int32_t height)
 {
     uint32_t loop_cnt;
-    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+    uint64_t tp0, tp1;
+    v16u8 out0, out1;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
     v16i8 mask1;
     v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8, dst9;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v8i16 tmp0, tmp1, tmp2, tmp3;
+    v8i16 dst10, dst21, dst22, dst73, dst84, dst95, dst106;
     v8i16 dst10_r, dst32_r, dst54_r, dst76_r;
     v8i16 dst21_r, dst43_r, dst65_r, dst87_r;
+    v8i16 dst98_r, dst109_r;
+    v8i16 in0 = { 0 }, in1 = { 0 }, in2 = { 0 }, in3 = { 0 };
     v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r, dst6_r, dst7_r;
 
     src0_ptr -= (src_stride + 1);
@@ -3647,10 +3942,9 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
@@ -3661,90 +3955,75 @@
     src0_ptr += (3 * src_stride);
     XORI_B3_128_SB(src0, src1, src2);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-    ILVR_H2_SH(dst1, dst0, dst2, dst1, dst10_r, dst21_r);
+    VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src2, src1, src2, mask0, mask1, vec2, vec3);
+    dst10 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst21 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    ILVRL_H2_SH(dst21, dst10, dst10_r, dst21_r);
+    dst22 = (v8i16) __msa_splati_d((v2i64) dst21, 1);
+
 
     for (loop_cnt = height >> 3; loop_cnt--;) {
         LD_SB8(src0_ptr, src_stride,
                src3, src4, src5, src6, src7, src8, src9, src10);
         src0_ptr += (8 * src_stride);
-        LD_SH8(src1_ptr, src2_stride, in0, in1, in2, in3, in4, in5, in6, in7);
-        src1_ptr += (8 * src2_stride);
-        ILVR_D2_SH(in1, in0, in3, in2, in0, in1);
-        ILVR_D2_SH(in5, in4, in7, in6, in2, in3);
         XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
-        /* row 3 */
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-        dst32_r = __msa_ilvr_h(dst3, dst2);
+        VSHF_B2_SB(src3, src7, src3, src7, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src8, src4, src8, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src5, src9, src5, src9, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src10, src6, src10, mask0, mask1, vec6, vec7);
+
+        dst73 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst84 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst95 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst106 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+
+        dst32_r = __msa_ilvr_h(dst73, dst22);
+        ILVRL_H2_SH(dst84, dst73, dst43_r, dst87_r);
+        ILVRL_H2_SH(dst95, dst84, dst54_r, dst98_r);
+        ILVRL_H2_SH(dst106, dst95, dst65_r, dst109_r);
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst73, 1);
+        dst76_r = __msa_ilvr_h(dst22, dst106);
+
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in0);
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in1);
+
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in2);
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in3);
+
+        ADDS_SH4_SH(in0, const_vec, in1, const_vec, in2, const_vec, in3,
+                    const_vec, in0, in1, in2, in3);
         dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-        dst0_r >>= 6;
-        /* row 4 */
-        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-        dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-        dst43_r = __msa_ilvr_h(dst4, dst3);
         dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-        dst1_r >>= 6;
-        /* row 5 */
-        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-        dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-        dst54_r = __msa_ilvr_h(dst5, dst4);
         dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
-        dst2_r >>= 6;
-        /* row 6 */
-        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-        dst6 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst6, dst6);
-        dst65_r = __msa_ilvr_h(dst6, dst5);
         dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
-        dst3_r >>= 6;
-        /* row 7 */
-        VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
-        dst7 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst7, dst7);
-        dst76_r = __msa_ilvr_h(dst7, dst6);
         dst4_r = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
-        dst4_r >>= 6;
-        /* row 8 */
-        VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec0, vec1);
-        dst8 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst8, dst8);
-        dst87_r = __msa_ilvr_h(dst8, dst7);
         dst5_r = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
-        dst5_r >>= 6;
-        /* row 9 */
-        VSHF_B2_SB(src9, src9, src9, src9, mask0, mask1, vec0, vec1);
-        dst9 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst9, dst9);
-        dst10_r = __msa_ilvr_h(dst9, dst8);
-        dst6_r = HEVC_FILT_4TAP(dst76_r, dst10_r, filt_h0, filt_h1);
-        dst6_r >>= 6;
-        /* row 10 */
-        VSHF_B2_SB(src10, src10, src10, src10, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-        dst21_r = __msa_ilvr_h(dst2, dst9);
-        dst7_r = HEVC_FILT_4TAP(dst87_r, dst21_r, filt_h0, filt_h1);
-        dst7_r >>= 6;
+        dst6_r = HEVC_FILT_4TAP(dst76_r, dst98_r, filt_h0, filt_h1);
+        dst7_r = HEVC_FILT_4TAP(dst87_r, dst109_r, filt_h0, filt_h1);
+        SRA_4V(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+        SRA_4V(dst4_r, dst5_r, dst6_r, dst7_r, 6);
         PCKEV_H4_SH(dst1_r, dst0_r, dst3_r, dst2_r,
                     dst5_r, dst4_r, dst7_r, dst6_r, tmp0, tmp1, tmp2, tmp3);
-        HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
-                          tmp0, tmp1, tmp2, tmp3, 7, tmp0, tmp1, tmp2, tmp3);
-
-        PCKEV_B2_SH(tmp1, tmp0, tmp3, tmp2, tmp0, tmp1);
-        ST4x8_UB(tmp0, tmp1, dst, dst_stride);
+        ADDS_SH4_SH(in0, tmp0, in1, tmp1, in2, tmp2, in3, tmp3, tmp0, tmp1,
+                    tmp2, tmp3);
+        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+        PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST4x8_UB(out0, out1, dst, dst_stride);
         dst += (8 * dst_stride);
+
+        dst10_r = dst98_r;
+        dst21_r = dst109_r;
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst106, 1);
     }
 }
 
@@ -3760,10 +4039,10 @@
 {
     if (2 == height) {
         hevc_hv_bi_4t_4x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                              dst, dst_stride, filter_x, filter_y, height);
+                              dst, dst_stride, filter_x, filter_y);
     } else if (4 == height) {
         hevc_hv_bi_4t_4x4_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                              dst, dst_stride, filter_x, filter_y, height);
+                              dst, dst_stride, filter_x, filter_y);
     } else if (0 == (height % 8)) {
         hevc_hv_bi_4t_4multx8mult_msa(src0_ptr, src_stride,
                                       src1_ptr, src2_stride,
@@ -3782,20 +4061,28 @@
                                  const int8_t *filter_y,
                                  int32_t height)
 {
-    uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4, src5, src6;
-    v8i16 in0, in1, in2, in3;
+    uint32_t tpw0, tpw1, tpw2, tpw3;
+    uint64_t tp0, tp1;
+    v16u8 out0, out1, out2;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
     v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
+    v8i16 dsth0, dsth1, dsth2, dsth3, dsth4, dsth5, dsth6, dsth7, dsth8, dsth9;
+    v8i16 dsth10, tmp4, tmp5;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
+    v4i32 dst4_r, dst5_r, dst6_r, dst7_r;
     v8i16 tmp0, tmp1, tmp2, tmp3;
     v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
     v8i16 dst10_l, dst32_l, dst21_l, dst43_l;
+    v8i16 dst54_r, dst76_r, dst98_r, dst65_r, dst87_r, dst109_r;
+    v8i16 dst54_l, dst76_l, dst98_l, dst65_l, dst87_l, dst109_l;
+    v8i16 dst1021_l, dst3243_l, dst5465_l, dst7687_l, dst98109_l;
+    v8i16 in0 = { 0 }, in1 = { 0 }, in2 = { 0 }, in3 = { 0 };
+    v8i16 in4 = { 0 }, in5 = { 0 };
 
     src0_ptr -= (src_stride + 1);
 
@@ -3803,10 +4090,9 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
@@ -3820,71 +4106,101 @@
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
 
-    ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
-    ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
+    dsth0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
 
-    for (loop_cnt = height >> 2; loop_cnt--;) {
-        LD_SB4(src0_ptr, src_stride, src3, src4, src5, src6);
-        src0_ptr += (4 * src_stride);
-        LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
-        src1_ptr += (4 * src2_stride);
-        XORI_B4_128_SB(src3, src4, src5, src6);
+    ILVRL_H2_SH(dsth1, dsth0, dst10_r, dst10_l);
+    ILVRL_H2_SH(dsth2, dsth1, dst21_r, dst21_l);
 
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+    LD_SB8(src0_ptr, src_stride,
+           src3, src4, src5, src6, src7, src8, src9, src10);
+    XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
 
-        ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
-        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-        dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-        dst0_r >>= 6;
-        dst0_l >>= 6;
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
 
-        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-        dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
+    dsth3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dsth6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
 
-        ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
-        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-        dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-        dst1_r >>= 6;
-        dst1_l >>= 6;
+    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src9, src9, src9, src9, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src10, src10, src10, src10, mask0, mask1, vec6, vec7);
 
-        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-        dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
+    dsth7 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth8 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth9 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dsth10 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
 
-        ILVRL_H2_SH(dst5, dst4, dst10_r, dst10_l);
-        dst2_r = HEVC_FILT_4TAP(dst32_r, dst10_r, filt_h0, filt_h1);
-        dst2_l = HEVC_FILT_4TAP(dst32_l, dst10_l, filt_h0, filt_h1);
-        dst2_r >>= 6;
-        dst2_l >>= 6;
+    ILVRL_H2_SH(dsth3, dsth2, dst32_r, dst32_l);
+    ILVRL_H2_SH(dsth4, dsth3, dst43_r, dst43_l);
+    ILVRL_H2_SH(dsth5, dsth4, dst54_r, dst54_l);
+    ILVRL_H2_SH(dsth6, dsth5, dst65_r, dst65_l);
+    ILVRL_H2_SH(dsth7, dsth6, dst76_r, dst76_l);
+    ILVRL_H2_SH(dsth8, dsth7, dst87_r, dst87_l);
+    ILVRL_H2_SH(dsth9, dsth8, dst98_r, dst98_l);
+    ILVRL_H2_SH(dsth10, dsth9, dst109_r, dst109_l);
+    PCKEV_D2_SH(dst21_l, dst10_l, dst43_l, dst32_l, dst1021_l, dst3243_l);
+    PCKEV_D2_SH(dst65_l, dst54_l, dst87_l, dst76_l, dst5465_l, dst7687_l);
+    dst98109_l = (v8i16) __msa_pckev_d((v2i64) dst109_l, (v2i64) dst98_l);
 
-        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
+    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+    dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+    dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+    dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+    dst4_r = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
+    dst5_r = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
+    dst6_r = HEVC_FILT_4TAP(dst76_r, dst98_r, filt_h0, filt_h1);
+    dst7_r = HEVC_FILT_4TAP(dst87_r, dst109_r, filt_h0, filt_h1);
+    dst0_l = HEVC_FILT_4TAP(dst1021_l, dst3243_l, filt_h0, filt_h1);
+    dst1_l = HEVC_FILT_4TAP(dst3243_l, dst5465_l, filt_h0, filt_h1);
+    dst2_l = HEVC_FILT_4TAP(dst5465_l, dst7687_l, filt_h0, filt_h1);
+    dst3_l = HEVC_FILT_4TAP(dst7687_l, dst98109_l, filt_h0, filt_h1);
+    SRA_4V(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+    SRA_4V(dst4_r, dst5_r, dst6_r, dst7_r, 6);
+    SRA_4V(dst0_l, dst1_l, dst2_l, dst3_l, 6);
+    PCKEV_H2_SH(dst1_r, dst0_r, dst3_r, dst2_r, tmp0, tmp1);
+    PCKEV_H2_SH(dst5_r, dst4_r, dst7_r, dst6_r, tmp2, tmp3);
+    PCKEV_H2_SH(dst1_l, dst0_l, dst3_l, dst2_l, tmp4, tmp5);
 
-        ILVRL_H2_SH(dst2, dst5, dst21_r, dst21_l);
-        dst3_r = HEVC_FILT_4TAP(dst43_r, dst21_r, filt_h0, filt_h1);
-        dst3_l = HEVC_FILT_4TAP(dst43_l, dst21_l, filt_h0, filt_h1);
-        dst3_r >>= 6;
-        dst3_l >>= 6;
-        PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r,
-                    dst2_l, dst2_r, dst3_l, dst3_r, tmp0, tmp1, tmp2, tmp3);
-        HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
-                          tmp0, tmp1, tmp2, tmp3, 7, tmp0, tmp1, tmp2, tmp3);
+    LD2(src1_ptr, src2_stride, tp0, tp1);
+    INSERT_D2_SH(tp0, tp1, in0);
+    LD2(src1_ptr + 2 * src2_stride, src2_stride, tp0, tp1);
+    INSERT_D2_SH(tp0, tp1, in1);
 
-        PCKEV_B2_SW(tmp1, tmp0, tmp3, tmp2, dst0_r, dst1_r);
-        ST6x4_UB(dst0_r, dst1_r, dst, dst_stride);
-        dst += (4 * dst_stride);
-    }
+    LD2(src1_ptr + 4 * src2_stride, src2_stride, tp0, tp1);
+    INSERT_D2_SH(tp0, tp1, in2);
+    LD2(src1_ptr + 6 * src2_stride, src2_stride, tp0, tp1);
+    INSERT_D2_SH(tp0, tp1, in3);
+
+    ADDS_SH4_SH(in0, const_vec, in1, const_vec, in2, const_vec, in3, const_vec,
+                in0, in1, in2, in3);
+    ADDS_SH4_SH(in0, tmp0, in1, tmp1, in2, tmp2, in3, tmp3, tmp0, tmp1, tmp2,
+                tmp3);
+    SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+    CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+    PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+    ST4x8_UB(out0, out1, dst, dst_stride);
+
+    LW4(src1_ptr + 4, src2_stride, tpw0, tpw1, tpw2, tpw3);
+    src1_ptr += (4 * src2_stride);
+    INSERT_W4_SH(tpw0, tpw1, tpw2, tpw3, in4);
+    LW4(src1_ptr + 4, src2_stride, tpw0, tpw1, tpw2, tpw3);
+    INSERT_W4_SH(tpw0, tpw1, tpw2, tpw3, in5);
+    ADDS_SH2_SH(in4, const_vec, in5, const_vec, in4, in5);
+    ADDS_SH2_SH(in4, tmp4, in5, tmp5, tmp4, tmp5);
+    SRARI_H2_SH(tmp4, tmp5, 7);
+    CLIP_SH2_0_255_MAX_SATU(tmp4, tmp5);
+    out2 = (v16u8) __msa_pckev_b((v16i8) tmp5, (v16i8) tmp4);
+    ST2x4_UB(out2, 0, dst + 4, dst_stride);
+    dst += 4 * dst_stride;
+    ST2x4_UB(out2, 4, dst + 4, dst_stride);
 }
 
 static void hevc_hv_bi_4t_8x2_msa(uint8_t *src0_ptr,
@@ -3894,16 +4210,16 @@
                                   uint8_t *dst,
                                   int32_t dst_stride,
                                   const int8_t *filter_x,
-                                  const int8_t *filter_y,
-                                  int32_t height)
+                                  const int8_t *filter_y)
 {
+    v16u8 out;
     v16i8 src0, src1, src2, src3, src4;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
     v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
     v8i16 dst0, dst1, dst2, dst3, dst4;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l;
     v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
@@ -3917,62 +4233,144 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
     const_vec = __msa_ldi_h(128);
     const_vec <<= 6;
 
-    LD_SB3(src0_ptr, src_stride, src0, src1, src2);
-    src0_ptr += (3 * src_stride);
-    XORI_B3_128_SB(src0, src1, src2);
+    LD_SB5(src0_ptr, src_stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+
+    LD_SH2(src1_ptr, src2_stride, in0, in1);
+    ADDS_SH2_SH(in0, const_vec, in1, const_vec, in0, in1);
 
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec8, vec9);
+
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+    dst4 = HEVC_FILT_4TAP_SH(vec8, vec9, filt0, filt1);
 
     ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
     ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
-
-    LD_SB2(src0_ptr, src_stride, src3, src4);
-    LD_SH2(src1_ptr, src2_stride, in0, in1);
-    XORI_B2_128_SB(src3, src4);
-
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
     ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
     dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
     dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-    dst0_r >>= 6;
-    dst0_l >>= 6;
-
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
     dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
     dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-    dst1_r >>= 6;
-    dst1_l >>= 6;
-
+    SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
     PCKEV_H2_SH(dst0_l, dst0_r, dst1_l, dst1_r, tmp0, tmp1);
-    HEVC_BI_RND_CLIP2(in0, in1, tmp0, tmp1, 7, tmp0, tmp1);
+    ADDS_SH2_SH(in0, tmp0, in1, tmp1, tmp0, tmp1);
+    SRARI_H2_SH(tmp0, tmp1, 7);
+    CLIP_SH2_0_255_MAX_SATU(tmp0, tmp1);
+    out = (v16u8) __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+    ST8x2_UB(out, dst, dst_stride);
+}
 
-    dst0_r = (v4i32) __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
-    ST8x2_UB(dst0_r, dst, dst_stride);
+static void hevc_hv_bi_4t_8multx4_msa(uint8_t *src0_ptr,
+                                      int32_t src_stride,
+                                      int16_t *src1_ptr,
+                                      int32_t src2_stride,
+                                      uint8_t *dst,
+                                      int32_t dst_stride,
+                                      const int8_t *filter_x,
+                                      const int8_t *filter_y,
+                                      int32_t width8mult)
+{
+    uint32_t cnt;
+    v16u8 out0, out1;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, mask0, mask1;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 filt0, filt1, filt_h0, filt_h1, filter_vec, const_vec;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, tmp0, tmp1, tmp2, tmp3;
+    v8i16 in0, in1, in2, in3;
+    v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
+    v8i16 dst10_r, dst32_r, dst54_r, dst21_r, dst43_r, dst65_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst21_l, dst43_l, dst65_l;
+
+    src0_ptr -= (src_stride + 1);
+
+    filter_vec = LD_SH(filter_x);
+    SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
+
+    filter_vec = LD_SH(filter_y);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+
+    const_vec = __msa_ldi_h(128);
+    const_vec <<= 6;
+
+    for (cnt = width8mult; cnt--;) {
+        LD_SB7(src0_ptr, src_stride, src0, src1, src2, src3, src4, src5, src6);
+        src0_ptr += 8;
+        XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+
+        LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
+        src1_ptr += 8;
+        ADDS_SH4_SH(in0, const_vec, in1, const_vec, in2, const_vec, in3,
+                    const_vec, in0, in1, in2, in3);
+
+        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+
+        ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
+        ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
+
+        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
+
+        dst3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+
+        ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+        ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
+        ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+        ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
+
+        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
+        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
+        dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
+        dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
+
+        SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+        SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+        PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r, dst2_l, dst2_r, dst3_l,
+                    dst3_r, tmp0, tmp1, tmp2, tmp3);
+        ADDS_SH4_SH(in0, tmp0, in1, tmp1, in2, tmp2, in3, tmp3,
+                    tmp0, tmp1, tmp2, tmp3);
+        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+        PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += 8;
+    }
 }
 
 static void hevc_hv_bi_4t_8x6_msa(uint8_t *src0_ptr,
@@ -3982,17 +4380,18 @@
                                   uint8_t *dst,
                                   int32_t dst_stride,
                                   const int8_t *filter_x,
-                                  const int8_t *filter_y,
-                                  int32_t height)
+                                  const int8_t *filter_y)
 {
+    v16u8 out0, out1, out2;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
     v8i16 in0, in1, in2, in3, in4, in5;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
     v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
+    v16i8 vec10, vec11, vec12, vec13, vec14, vec15, vec16, vec17;
     v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
@@ -4008,120 +4407,87 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
     const_vec = __msa_ldi_h(128);
     const_vec <<= 6;
 
-    LD_SB3(src0_ptr, src_stride, src0, src1, src2);
-    src0_ptr += (3 * src_stride);
-    XORI_B3_128_SB(src0, src1, src2);
+    LD_SB5(src0_ptr, src_stride, src0, src1, src2, src3, src4);
+    src0_ptr += (5 * src_stride);
+    LD_SB4(src0_ptr, src_stride, src5, src6, src7, src8);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    XORI_B4_128_SB(src5, src6, src7, src8);
+
+    LD_SH6(src1_ptr, src2_stride, in0, in1, in2, in3, in4, in5);
+    ADDS_SH4_SH(in0, const_vec, in1, const_vec, in2, const_vec, in3, const_vec,
+                in0, in1, in2, in3);
+    ADDS_SH2_SH(in4, const_vec, in5, const_vec, in4, in5);
+
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec8, vec9);
+    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec10, vec11);
+    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec12, vec13);
+    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec14, vec15);
+    VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec16, vec17);
+
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+    dst4 = HEVC_FILT_4TAP_SH(vec8, vec9, filt0, filt1);
+    dst5 = HEVC_FILT_4TAP_SH(vec10, vec11, filt0, filt1);
+    dst6 = HEVC_FILT_4TAP_SH(vec12, vec13, filt0, filt1);
+    dst7 = HEVC_FILT_4TAP_SH(vec14, vec15, filt0, filt1);
+    dst8 = HEVC_FILT_4TAP_SH(vec16, vec17, filt0, filt1);
 
     ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
     ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
-
-    LD_SB2(src0_ptr, src_stride, src3, src4);
-    src0_ptr += (2 * src_stride);
-    XORI_B2_128_SB(src3, src4);
-    LD_SH6(src1_ptr, src2_stride, in0, in1, in2, in3, in4, in5);
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
     ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
+    ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+    ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
+    ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
+    ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l);
+
     dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
     dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-    dst0_r >>= 6;
-    dst0_l >>= 6;
-    tmp0 = __msa_pckev_h((v8i16) dst0_l, (v8i16) dst0_r);
-
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
     dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
     dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-    dst1_r >>= 6;
-    dst1_l >>= 6;
-    tmp1 = __msa_pckev_h((v8i16) dst1_l, (v8i16) dst1_r);
-
-    LD_SB2(src0_ptr, src_stride, src5, src6);
-    src0_ptr += (2 * src_stride);
-    XORI_B2_128_SB(src5, src6);
-    /* row 5 */
-    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-    dst5 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-
-    ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
     dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
     dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
-    dst2_r >>= 6;
-    dst2_l >>= 6;
-    tmp2 = __msa_pckev_h((v8i16) dst2_l, (v8i16) dst2_r);
-
-    /* row 6 */
-    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-    dst6 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst6, dst6);
-
-    ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
     dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
     dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
-    dst3_r >>= 6;
-    dst3_l >>= 6;
-    tmp3 = __msa_pckev_h((v8i16) dst3_l, (v8i16) dst3_r);
-
-    LD_SB2(src0_ptr, src_stride, src7, src8);
-    XORI_B2_128_SB(src7, src8);
-    /* row 7 */
-    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
-    dst7 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst7, dst7);
-
-    ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
     dst4_r = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
     dst4_l = HEVC_FILT_4TAP(dst54_l, dst76_l, filt_h0, filt_h1);
-
-    dst4_r >>= 6;
-    dst4_l >>= 6;
-    tmp4 = __msa_pckev_h((v8i16) dst4_l, (v8i16) dst4_r);
-    /* row 8 */
-    VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec0, vec1);
-    dst8 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst8, dst8);
-
-    ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l);
     dst5_r = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
     dst5_l = HEVC_FILT_4TAP(dst65_l, dst87_l, filt_h0, filt_h1);
-    dst5_r >>= 6;
-    dst5_l >>= 6;
-    tmp5 = __msa_pckev_h((v8i16) dst5_l, (v8i16) dst5_r);
 
-    HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
-                      tmp0, tmp1, tmp2, tmp3, 7, tmp0, tmp1, tmp2, tmp3);
-    HEVC_BI_RND_CLIP2(in4, in5, tmp4, tmp5, 7, tmp4, tmp5);
-
-    PCKEV_B2_SW(tmp1, tmp0, tmp3, tmp2, dst0_r, dst1_r);
-    dst2_r = (v4i32) __msa_pckev_b((v16i8) tmp5, (v16i8) tmp4);
-    ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
+    SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+    SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+    SRA_4V(dst4_r, dst4_l, dst5_r, dst5_l, 6);
+    PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r, dst2_l, dst2_r, dst3_l, dst3_r,
+                tmp0, tmp1, tmp2, tmp3);
+    PCKEV_H2_SH(dst4_l, dst4_r, dst5_l, dst5_r, tmp4, tmp5);
+    ADDS_SH4_SH(in0, tmp0, in1, tmp1, in2, tmp2, in3, tmp3,
+                tmp0, tmp1, tmp2, tmp3);
+    ADDS_SH2_SH(in4, tmp4, in5, tmp5, tmp4, tmp5);
+    SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+    SRARI_H2_SH(tmp4, tmp5, 7);
+    CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+    CLIP_SH2_0_255_MAX_SATU(tmp4, tmp5);
+    PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+    out2 = (v16u8) __msa_pckev_b((v16i8) tmp5, (v16i8) tmp4);
+    ST8x4_UB(out0, out1, dst, dst_stride);
     dst += (4 * dst_stride);
-    ST8x2_UB(dst2_r, dst, dst_stride);
+    ST8x2_UB(out2, dst, dst_stride);
 }
 
 static void hevc_hv_bi_4t_8multx4mult_msa(uint8_t *src0_ptr,
@@ -4139,19 +4505,21 @@
     uint8_t *src0_ptr_tmp;
     int16_t *src1_ptr_tmp;
     uint8_t *dst_tmp;
+    v16u8 out0, out1;
     v16i8 src0, src1, src2, src3, src4, src5, src6;
     v8i16 in0, in1, in2, in3;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
     v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
     v8i16 tmp0, tmp1, tmp2, tmp3;
     v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
     v8i16 dst10_l, dst32_l, dst21_l, dst43_l;
+    v8i16 dst54_r, dst54_l, dst65_r, dst65_l, dst6;
 
     src0_ptr -= (src_stride + 1);
 
@@ -4159,10 +4527,9 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
@@ -4181,12 +4548,10 @@
         VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
         VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
         VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
 
         ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
         ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
@@ -4198,55 +4563,50 @@
             src1_ptr_tmp += (4 * src2_stride);
             XORI_B4_128_SB(src3, src4, src5, src6);
 
+            ADDS_SH4_SH(in0, const_vec, in1, const_vec, in2, const_vec, in3,
+                        const_vec, in0, in1, in2, in3);
+
             VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-            dst3 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+            VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+            VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+            VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
+
+            dst3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+            dst4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+            dst5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+            dst6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
 
             ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+            ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
+            ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+            ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
+
             dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
             dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-            dst0_r >>= 6;
-            dst0_l >>= 6;
-
-            VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-            dst4 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-            ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
             dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
             dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-            dst1_r >>= 6;
-            dst1_l >>= 6;
+            dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+            dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
+            dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+            dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
 
-            VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-            dst5 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-
-            ILVRL_H2_SH(dst5, dst4, dst10_r, dst10_l);
-            dst2_r = HEVC_FILT_4TAP(dst32_r, dst10_r, filt_h0, filt_h1);
-            dst2_l = HEVC_FILT_4TAP(dst32_l, dst10_l, filt_h0, filt_h1);
-            dst2_r >>= 6;
-            dst2_l >>= 6;
-
-            VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-            dst2 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-            ILVRL_H2_SH(dst2, dst5, dst21_r, dst21_l);
-            dst3_r = HEVC_FILT_4TAP(dst43_r, dst21_r, filt_h0, filt_h1);
-            dst3_l = HEVC_FILT_4TAP(dst43_l, dst21_l, filt_h0, filt_h1);
-            dst3_r >>= 6;
-            dst3_l >>= 6;
-
-            PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, tmp0, tmp1, tmp2, tmp3);
-            HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
-                              tmp0, tmp1, tmp2, tmp3, 7,
-                              tmp0, tmp1, tmp2, tmp3);
-
-            PCKEV_B2_SW(tmp1, tmp0, tmp3, tmp2, dst0_r, dst1_r);
-            ST8x4_UB(dst0_r, dst1_r, dst_tmp, dst_stride);
+            SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+            SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+            PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r, dst2_l, dst2_r, dst3_l,
+                        dst3_r, tmp0, tmp1, tmp2, tmp3);
+            ADDS_SH4_SH(in0, tmp0, in1, tmp1, in2, tmp2, in3, tmp3,
+                        tmp0, tmp1, tmp2, tmp3);
+            SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+            CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+            PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+            ST8x4_UB(out0, out1, dst_tmp, dst_stride);
             dst_tmp += (4 * dst_stride);
+
+            dst10_r = dst54_r;
+            dst10_l = dst54_l;
+            dst21_r = dst65_r;
+            dst21_l = dst65_l;
+            dst2 = dst6;
         }
 
         src0_ptr += 8;
@@ -4267,10 +4627,13 @@
 {
     if (2 == height) {
         hevc_hv_bi_4t_8x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                              dst, dst_stride, filter_x, filter_y, height);
+                              dst, dst_stride, filter_x, filter_y);
+    } else if (4 == height) {
+        hevc_hv_bi_4t_8multx4_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
+                                  dst, dst_stride, filter_x, filter_y, 1);
     } else if (6 == height) {
         hevc_hv_bi_4t_8x6_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                              dst, dst_stride, filter_x, filter_y, height);
+                              dst, dst_stride, filter_x, filter_y);
     } else {
         hevc_hv_bi_4t_8multx4mult_msa(src0_ptr, src_stride,
                                       src1_ptr, src2_stride,
@@ -4289,11 +4652,196 @@
                                   const int8_t *filter_y,
                                   int32_t height)
 {
-    hevc_hv_bi_4t_8multx4mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                  dst, dst_stride, filter_x, filter_y,
-                                  height, 8);
-    hevc_hv_bi_4t_4w_msa(src0_ptr + 8, src_stride, src1_ptr + 8, src2_stride,
-                         dst + 8, dst_stride, filter_x, filter_y, height);
+    uint32_t loop_cnt;
+    uint64_t tp0, tp1;
+    uint8_t *src0_ptr_tmp, *dst_tmp;
+    int16_t *src1_ptr_tmp;
+    v16u8 out0, out1;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v16i8 mask0, mask1, mask2, mask3;
+    v8i16 filt0, filt1, filt_h0, filt_h1, filter_vec, tmp0, tmp1, tmp2, tmp3;
+    v8i16 dsth0, dsth1, dsth2, dsth3, dsth4, dsth5, dsth6, const_vec;
+    v8i16 dst10, dst21, dst22, dst73, dst84, dst95, dst106;
+    v8i16 dst76_r, dst98_r, dst87_r, dst109_r;
+    v8i16 in0 = { 0 }, in1 = { 0 }, in2 = { 0 }, in3 = { 0 };
+    v8i16 dst10_r, dst32_r, dst54_r, dst21_r, dst43_r, dst65_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst21_l, dst43_l, dst65_l;
+    v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
+    v4i32 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+
+    src0_ptr -= (src_stride + 1);
+
+    filter_vec = LD_SH(filter_x);
+    SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
+
+    filter_vec = LD_SH(filter_y);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+
+    const_vec = __msa_ldi_h(128);
+    const_vec <<= 6;
+
+    src0_ptr_tmp = src0_ptr;
+    dst_tmp = dst;
+    src1_ptr_tmp = src1_ptr;
+
+    LD_SB3(src0_ptr_tmp, src_stride, src0, src1, src2);
+    src0_ptr_tmp += (3 * src_stride);
+
+    XORI_B3_128_SB(src0, src1, src2);
+
+    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+
+    dsth0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+
+    ILVRL_H2_SH(dsth1, dsth0, dst10_r, dst10_l);
+    ILVRL_H2_SH(dsth2, dsth1, dst21_r, dst21_l);
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src0_ptr_tmp, src_stride, src3, src4, src5, src6);
+        src0_ptr_tmp += (4 * src_stride);
+        XORI_B4_128_SB(src3, src4, src5, src6);
+
+        LD_SH4(src1_ptr_tmp, src2_stride, in0, in1, in2, in3);
+        src1_ptr_tmp += (4 * src2_stride);
+        ADDS_SH4_SH(in0, const_vec, in1, const_vec, in2, const_vec, in3,
+                    const_vec, in0, in1, in2, in3);
+
+        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
+
+        dsth3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dsth4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dsth5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dsth6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+
+        ILVRL_H2_SH(dsth3, dsth2, dst32_r, dst32_l);
+        ILVRL_H2_SH(dsth4, dsth3, dst43_r, dst43_l);
+        ILVRL_H2_SH(dsth5, dsth4, dst54_r, dst54_l);
+        ILVRL_H2_SH(dsth6, dsth5, dst65_r, dst65_l);
+
+        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
+        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
+        dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
+        dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
+
+        SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+        SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+        PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r, dst2_l, dst2_r, dst3_l,
+                    dst3_r, tmp0, tmp1, tmp2, tmp3);
+        ADDS_SH4_SH(in0, tmp0, in1, tmp1, in2, tmp2, in3, tmp3,
+                    tmp0, tmp1, tmp2, tmp3);
+        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+        PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST8x4_UB(out0, out1, dst_tmp, dst_stride);
+        dst_tmp += (4 * dst_stride);
+
+        dst10_r = dst54_r;
+        dst10_l = dst54_l;
+        dst21_r = dst65_r;
+        dst21_l = dst65_l;
+        dsth2 = dsth6;
+    }
+
+    src0_ptr += 8;
+    dst += 8;
+    src1_ptr += 8;
+
+    mask2 = LD_SB(ff_hevc_mask_arr + 16);
+    mask3 = mask2 + 2;
+
+    LD_SB3(src0_ptr, src_stride, src0, src1, src2);
+    src0_ptr += (3 * src_stride);
+    XORI_B3_128_SB(src0, src1, src2);
+    VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
+    VSHF_B2_SB(src1, src2, src1, src2, mask2, mask3, vec2, vec3);
+
+    dst10 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst21 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+
+    ILVRL_H2_SH(dst21, dst10, dst10_r, dst21_r);
+    dst22 = (v8i16) __msa_splati_d((v2i64) dst21, 1);
+
+    for (loop_cnt = 2; loop_cnt--;) {
+        LD_SB8(src0_ptr, src_stride,
+               src3, src4, src5, src6, src7, src8, src9, src10);
+        src0_ptr += (8 * src_stride);
+        XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
+        VSHF_B2_SB(src3, src7, src3, src7, mask2, mask3, vec0, vec1);
+        VSHF_B2_SB(src4, src8, src4, src8, mask2, mask3, vec2, vec3);
+        VSHF_B2_SB(src5, src9, src5, src9, mask2, mask3, vec4, vec5);
+        VSHF_B2_SB(src6, src10, src6, src10, mask2, mask3, vec6, vec7);
+
+        dst73 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst84 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst95 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst106 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+
+        dst32_r = __msa_ilvr_h(dst73, dst22);
+        ILVRL_H2_SH(dst84, dst73, dst43_r, dst87_r);
+        ILVRL_H2_SH(dst95, dst84, dst54_r, dst98_r);
+        ILVRL_H2_SH(dst106, dst95, dst65_r, dst109_r);
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst73, 1);
+        dst76_r = __msa_ilvr_h(dst22, dst106);
+
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in0);
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in1);
+
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in2);
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in3);
+
+        ADDS_SH4_SH(in0, const_vec, in1, const_vec, in2, const_vec, in3,
+                    const_vec, in0, in1, in2, in3);
+
+        dst0 = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst1 = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst2 = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst3 = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst4 = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
+        dst5 = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
+        dst6 = HEVC_FILT_4TAP(dst76_r, dst98_r, filt_h0, filt_h1);
+        dst7 = HEVC_FILT_4TAP(dst87_r, dst109_r, filt_h0, filt_h1);
+
+        SRA_4V(dst0, dst1, dst2, dst3, 6);
+        SRA_4V(dst4, dst5, dst6, dst7, 6);
+        PCKEV_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6,
+                    tmp0, tmp1, tmp2, tmp3);
+        ADDS_SH4_SH(in0, tmp0, in1, tmp1, in2, tmp2, in3, tmp3,
+                    tmp0, tmp1, tmp2, tmp3);
+        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+        PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST4x8_UB(out0, out1, dst, dst_stride);
+        dst += (8 * dst_stride);
+
+        dst10_r = dst98_r;
+        dst21_r = dst109_r;
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst106, 1);
+    }
 }
 
 static void hevc_hv_bi_4t_16w_msa(uint8_t *src0_ptr,
@@ -4306,9 +4854,14 @@
                                   const int8_t *filter_y,
                                   int32_t height)
 {
-    hevc_hv_bi_4t_8multx4mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                  dst, dst_stride, filter_x, filter_y,
-                                  height, 16);
+    if (4 == height) {
+        hevc_hv_bi_4t_8multx4_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
+                                  dst, dst_stride, filter_x, filter_y, 2);
+    } else {
+        hevc_hv_bi_4t_8multx4mult_msa(src0_ptr, src_stride, src1_ptr,
+                                      src2_stride, dst, dst_stride, filter_x,
+                                      filter_y, height, 16);
+    }
 }
 
 static void hevc_hv_bi_4t_24w_msa(uint8_t *src0_ptr,
@@ -4333,7 +4886,7 @@
                                   uint8_t *dst,
                                   int32_t dst_stride,
                                   const int8_t *filter_x,
-                                  const const int8_t *filter_y,
+                                  const int8_t *filter_y,
                                   int32_t height)
 {
     hevc_hv_bi_4t_8multx4mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
@@ -4368,22 +4921,22 @@
 
 #undef BI_MC_COPY
 
-#define BI_MC(PEL, DIR, WIDTH, TAP, DIR1, FILT_DIR)                            \
-void ff_hevc_put_hevc_bi_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,            \
-                                                        ptrdiff_t dst_stride,  \
-                                                        uint8_t *src,          \
-                                                        ptrdiff_t src_stride,  \
-                                                        int16_t *src_16bit,    \
-                                                        int height,            \
-                                                        intptr_t mx,           \
-                                                        intptr_t my,           \
-                                                        int width)             \
-{                                                                              \
-    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];              \
-                                                                               \
-    hevc_##DIR1##_bi_##TAP##t_##WIDTH##w_msa(src, src_stride, src_16bit,       \
-                                             MAX_PB_SIZE, dst, dst_stride,     \
-                                             filter, height);                  \
+#define BI_MC(PEL, DIR, WIDTH, TAP, DIR1, FILT_DIR)                          \
+void ff_hevc_put_hevc_bi_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,          \
+                                                      ptrdiff_t dst_stride,  \
+                                                      uint8_t *src,          \
+                                                      ptrdiff_t src_stride,  \
+                                                      int16_t *src_16bit,    \
+                                                      int height,            \
+                                                      intptr_t mx,           \
+                                                      intptr_t my,           \
+                                                      int width)             \
+{                                                                            \
+    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];            \
+                                                                             \
+    hevc_##DIR1##_bi_##TAP##t_##WIDTH##w_msa(src, src_stride, src_16bit,     \
+                                             MAX_PB_SIZE, dst, dst_stride,   \
+                                             filter, height);                \
 }
 
 BI_MC(qpel, h, 4, 8, hz, mx);
@@ -4422,41 +4975,40 @@
 
 #undef BI_MC
 
-#define BI_MC_HV(PEL, DIR, WIDTH, TAP, DIR1)                                   \
-void ff_hevc_put_hevc_bi_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,            \
-                                                        ptrdiff_t dst_stride,  \
-                                                        uint8_t *src,          \
-                                                        ptrdiff_t src_stride,  \
-                                                        int16_t *src_16bit,    \
-                                                        int height,            \
-                                                        intptr_t mx,           \
-                                                        intptr_t my,           \
-                                                        int width)             \
-{                                                                              \
-    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];                  \
-    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];                  \
-                                                                               \
-    hevc_##DIR1##_bi_##TAP##t_##WIDTH##w_msa(src, src_stride, src_16bit,       \
-                                             MAX_PB_SIZE, dst, dst_stride,     \
-                                             filter_x, filter_y,               \
-                                             height);                          \
+#define BI_MC_HV(PEL, WIDTH, TAP)                                         \
+void ff_hevc_put_hevc_bi_##PEL##_hv##WIDTH##_8_msa(uint8_t *dst,          \
+                                                   ptrdiff_t dst_stride,  \
+                                                   uint8_t *src,          \
+                                                   ptrdiff_t src_stride,  \
+                                                   int16_t *src_16bit,    \
+                                                   int height,            \
+                                                   intptr_t mx,           \
+                                                   intptr_t my,           \
+                                                   int width)             \
+{                                                                         \
+    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];             \
+    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];             \
+                                                                          \
+    hevc_hv_bi_##TAP##t_##WIDTH##w_msa(src, src_stride, src_16bit,        \
+                                       MAX_PB_SIZE, dst, dst_stride,      \
+                                       filter_x, filter_y, height);       \
 }
 
-BI_MC_HV(qpel, hv, 4, 8, hv);
-BI_MC_HV(qpel, hv, 8, 8, hv);
-BI_MC_HV(qpel, hv, 12, 8, hv);
-BI_MC_HV(qpel, hv, 16, 8, hv);
-BI_MC_HV(qpel, hv, 24, 8, hv);
-BI_MC_HV(qpel, hv, 32, 8, hv);
-BI_MC_HV(qpel, hv, 48, 8, hv);
-BI_MC_HV(qpel, hv, 64, 8, hv);
+BI_MC_HV(qpel, 4, 8);
+BI_MC_HV(qpel, 8, 8);
+BI_MC_HV(qpel, 12, 8);
+BI_MC_HV(qpel, 16, 8);
+BI_MC_HV(qpel, 24, 8);
+BI_MC_HV(qpel, 32, 8);
+BI_MC_HV(qpel, 48, 8);
+BI_MC_HV(qpel, 64, 8);
 
-BI_MC_HV(epel, hv, 4, 4, hv);
-BI_MC_HV(epel, hv, 8, 4, hv);
-BI_MC_HV(epel, hv, 6, 4, hv);
-BI_MC_HV(epel, hv, 12, 4, hv);
-BI_MC_HV(epel, hv, 16, 4, hv);
-BI_MC_HV(epel, hv, 24, 4, hv);
-BI_MC_HV(epel, hv, 32, 4, hv);
+BI_MC_HV(epel, 4, 4);
+BI_MC_HV(epel, 8, 4);
+BI_MC_HV(epel, 6, 4);
+BI_MC_HV(epel, 12, 4);
+BI_MC_HV(epel, 16, 4);
+BI_MC_HV(epel, 24, 4);
+BI_MC_HV(epel, 32, 4);
 
 #undef BI_MC_HV

diff --git a/libavcodec/mips/hevc_mc_biw_msa.c b/libavcodec/mips/hevc_mc_biw_msa.c
index 05a28ec..ea65f00 100644
--- a/libavcodec/mips/hevc_mc_biw_msa.c
+++ b/libavcodec/mips/hevc_mc_biw_msa.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ * Copyright (c) 2015 - 2017 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
  *
  * This file is part of FFmpeg.
  *
@@ -22,9 +22,17 @@
 #include "libavcodec/mips/hevcdsp_mips.h"
 #include "libavcodec/mips/hevc_macros_msa.h"
 
+static const uint8_t ff_hevc_mask_arr[16 * 2] __attribute__((aligned(0x40))) = {
+    /* 8 width cases */
+    0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+    0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20
+};
+
 #define HEVC_BIW_RND_CLIP2(in0, in1, vec0, vec1, wgt, rnd, offset,  \
-                           out0_r, out1_r, out0_l, out1_l)          \
+                           out0, out1)                              \
 {                                                                   \
+    v4i32 out0_r, out1_r, out0_l, out1_l;                           \
+                                                                    \
     ILVR_H2_SW(in0, vec0, in1, vec1, out0_r, out1_r);               \
     ILVL_H2_SW(in0, vec0, in1, vec1, out0_l, out1_l);               \
                                                                     \
@@ -34,37 +42,41 @@
     out1_l = __msa_dpadd_s_w(offset, (v8i16) out1_l, (v8i16) wgt);  \
                                                                     \
     SRAR_W4_SW(out0_r, out1_r, out0_l, out1_l, rnd);                \
-                                                                    \
-    out0_r = CLIP_SW_0_255(out0_r);                                 \
-    out1_r = CLIP_SW_0_255(out1_r);                                 \
-    out0_l = CLIP_SW_0_255(out0_l);                                 \
-    out1_l = CLIP_SW_0_255(out1_l);                                 \
+    PCKEV_H2_SH(out0_l, out0_r, out1_l, out1_r, out0, out1);        \
+    CLIP_SH2_0_255(out0, out1);                                     \
 }
 
-#define HEVC_BIW_RND_CLIP4(in0, in1, in2, in3, vec0, vec1, vec2, vec3,  \
-                           wgt, rnd, offset,                            \
-                           out0_r, out1_r, out2_r, out3_r,              \
-                           out0_l, out1_l, out2_l, out3_l)              \
-{                                                                       \
-    HEVC_BIW_RND_CLIP2(in0, in1, vec0, vec1, wgt, rnd, offset,          \
-                       out0_r, out1_r, out0_l, out1_l)                  \
-    HEVC_BIW_RND_CLIP2(in2, in3, vec2, vec3, wgt, rnd, offset,          \
-                       out2_r, out3_r, out2_l, out3_l)                  \
+#define HEVC_BIW_RND_CLIP4(in0, in1, in2, in3, vec0, vec1, vec2, vec3,       \
+                           wgt, rnd, offset, out0, out1, out2, out3)         \
+{                                                                            \
+    HEVC_BIW_RND_CLIP2(in0, in1, vec0, vec1, wgt, rnd, offset, out0, out1);  \
+    HEVC_BIW_RND_CLIP2(in2, in3, vec2, vec3, wgt, rnd, offset, out2, out3);  \
 }
 
-#define HEVC_BI_RND_CLIP2(in0, in1, vec0, vec1, rnd_val, out0, out1)  \
-{                                                                     \
-    ADDS_SH2_SH(vec0, in0, vec1, in1, out0, out1);                    \
-    SRARI_H2_SH(out0, out1, rnd_val);                                 \
-    CLIP_SH2_0_255(out0, out1);                                       \
+#define HEVC_BIW_RND_CLIP2_MAX_SATU(in0, in1, vec0, vec1, wgt, rnd,  \
+                                    offset, out0, out1)              \
+{                                                                    \
+    v4i32 out0_r, out1_r, out0_l, out1_l;                            \
+                                                                     \
+    ILVR_H2_SW(in0, vec0, in1, vec1, out0_r, out1_r);                \
+    ILVL_H2_SW(in0, vec0, in1, vec1, out0_l, out1_l);                \
+    out0_r = __msa_dpadd_s_w(offset, (v8i16) out0_r, (v8i16) wgt);   \
+    out1_r = __msa_dpadd_s_w(offset, (v8i16) out1_r, (v8i16) wgt);   \
+    out0_l = __msa_dpadd_s_w(offset, (v8i16) out0_l, (v8i16) wgt);   \
+    out1_l = __msa_dpadd_s_w(offset, (v8i16) out1_l, (v8i16) wgt);   \
+    SRAR_W4_SW(out0_r, out1_r, out0_l, out1_l, rnd);                 \
+    PCKEV_H2_SH(out0_l, out0_r, out1_l, out1_r, out0, out1);         \
+    CLIP_SH2_0_255_MAX_SATU(out0, out1);                             \
 }
 
-#define HEVC_BI_RND_CLIP4(in0, in1, in2, in3,                      \
-                          vec0, vec1, vec2, vec3, rnd_val,         \
-                          out0, out1, out2, out3)                  \
-{                                                                  \
-    HEVC_BI_RND_CLIP2(in0, in1, vec0, vec1, rnd_val, out0, out1);  \
-    HEVC_BI_RND_CLIP2(in2, in3, vec2, vec3, rnd_val, out2, out3);  \
+#define HEVC_BIW_RND_CLIP4_MAX_SATU(in0, in1, in2, in3, vec0, vec1, vec2,  \
+                                    vec3, wgt, rnd, offset, out0, out1,    \
+                                    out2, out3)                            \
+{                                                                          \
+    HEVC_BIW_RND_CLIP2_MAX_SATU(in0, in1, vec0, vec1, wgt, rnd, offset,    \
+                                out0, out1);                               \
+    HEVC_BIW_RND_CLIP2_MAX_SATU(in2, in3, vec2, vec3, wgt, rnd, offset,    \
+                                out2, out3);                               \
 }
 
 static void hevc_biwgt_copy_4w_msa(uint8_t *src0_ptr,
@@ -80,93 +92,77 @@
                                    int32_t offset1,
                                    int32_t rnd_val)
 {
+    uint32_t loop_cnt, tp0, tp1, tp2, tp3;
+    uint64_t tpd0, tpd1, tpd2, tpd3;
     int32_t offset, weight;
+    v16u8 out0, out1;
     v16i8 zero = { 0 };
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v16i8 src0 = { 0 }, src1 = { 0 };
+    v8i16 in0 = { 0 }, in1 = { 0 }, in2 = { 0 }, in3 = { 0 };
+    v8i16 dst0, dst1, dst2, dst3, weight_vec;
+    v4i32 dst0_r, dst0_l, offset_vec, rnd_vec;
 
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
     offset_vec = __msa_fill_w(offset);
-    weight_vec = __msa_fill_w(weight);
+    weight_vec = (v8i16) __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
 
     if (2 == height) {
-        v16i8 src0, src1;
-        v8i16 in0, in1, dst0;
-        v4i32 dst0_r, dst0_l;
-
-        LD_SB2(src0_ptr, src_stride, src0, src1);
-        LD_SH2(src1_ptr, src2_stride, in0, in1);
-        in0 = (v8i16) __msa_ilvr_d((v2i64) in1, (v2i64) in0);
-        src0 = (v16i8) __msa_ilvr_w((v4i32) src1, (v4i32) src0);
+        LW2(src0_ptr, src_stride, tp0, tp1);
+        INSERT_W2_SB(tp0, tp1, src0);
+        LD2(src1_ptr, src2_stride, tpd0, tpd1);
+        INSERT_D2_SH(tpd0, tpd1, in0);
 
         dst0 = (v8i16) __msa_ilvr_b(zero, src0);
         dst0 <<= 6;
 
         ILVRL_H2_SW(dst0, in0, dst0_r, dst0_l);
-        dst0_r = __msa_dpadd_s_w(offset_vec, (v8i16) dst0_r,
-                                 (v8i16) weight_vec);
-        dst0_l = __msa_dpadd_s_w(offset_vec, (v8i16) dst0_l,
-                                 (v8i16) weight_vec);
+        dst0_r = __msa_dpadd_s_w(offset_vec, (v8i16) dst0_r, weight_vec);
+        dst0_l = __msa_dpadd_s_w(offset_vec, (v8i16) dst0_l, weight_vec);
         SRAR_W2_SW(dst0_r, dst0_l, rnd_vec);
-        dst0_r = CLIP_SW_0_255(dst0_r);
-        dst0_l = CLIP_SW_0_255(dst0_l);
-
-        HEVC_PCK_SW_SB2(dst0_l, dst0_r, dst0_r);
-        ST4x2_UB(dst0_r, dst, dst_stride);
+        dst0 = (v8i16) __msa_pckev_h((v8i16) dst0_l, (v8i16) dst0_r);
+        dst0 = CLIP_SH_0_255_MAX_SATU(dst0);
+        out0 = (v16u8) __msa_pckev_b((v16i8) dst0, (v16i8) dst0);
+        ST4x2_UB(out0, dst, dst_stride);
     } else if (4 == height) {
-        v16i8 src0, src1, src2, src3;
-        v8i16 in0, in1, in2, in3;
-        v8i16 dst0, dst1;
-        v4i32 dst0_r, dst1_r, dst0_l, dst1_l;
-
-        LD_SB4(src0_ptr, src_stride, src0, src1, src2, src3);
-        LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
-        ILVR_D2_SH(in1, in0, in3, in2, in0, in1);
-        ILVR_W2_SB(src1, src0, src3, src2, src0, src1);
-        ILVR_B2_SH(zero, src0, zero, src1, dst0, dst1);
-        dst0 <<= 6;
-        dst1 <<= 6;
-        HEVC_BIW_RND_CLIP2(dst0, dst1, in0, in1,
-                           weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst0_l, dst1_l);
-
-        HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-        ST4x4_UB(dst0_r, dst0_r, 0, 1, 2, 3, dst, dst_stride);
+        LW4(src0_ptr, src_stride, tp0, tp1, tp2, tp3);
+        INSERT_W4_SB(tp0, tp1, tp2, tp3, src0);
+        LD4(src1_ptr, src2_stride, tpd0, tpd1, tpd2, tpd3);
+        INSERT_D2_SH(tpd0, tpd1, in0);
+        INSERT_D2_SH(tpd2, tpd3, in1);
+        ILVRL_B2_SH(zero, src0, dst0, dst1);
+        SLLI_2V(dst0, dst1, 6);
+        HEVC_BIW_RND_CLIP2_MAX_SATU(dst0, dst1, in0, in1, weight_vec, rnd_vec,
+                                    offset_vec, dst0, dst1);
+        out0 = (v16u8) __msa_pckev_b((v16i8) dst1, (v16i8) dst0);
+        ST4x4_UB(out0, out0, 0, 1, 2, 3, dst, dst_stride);
     } else if (0 == height % 8) {
-        uint32_t loop_cnt;
-        v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
-        v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
-        v8i16 dst0, dst1, dst2, dst3;
-        v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-
         for (loop_cnt = (height >> 3); loop_cnt--;) {
-            LD_SB8(src0_ptr, src_stride,
-                   src0, src1, src2, src3, src4, src5, src6, src7);
-            src0_ptr += (8 * src_stride);
-            LD_SH8(src1_ptr, src2_stride,
-                   in0, in1, in2, in3, in4, in5, in6, in7);
-            src1_ptr += (8 * src2_stride);
-
-            ILVR_D2_SH(in1, in0, in3, in2, in0, in1);
-            ILVR_D2_SH(in5, in4, in7, in6, in2, in3);
-            ILVR_W4_SB(src1, src0, src3, src2, src5, src4, src7, src6,
-                       src0, src1, src2, src3);
-            ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3,
-                       dst0, dst1, dst2, dst3);
-
+            LW4(src0_ptr, src_stride, tp0, tp1, tp2, tp3);
+            src0_ptr += 4 * src_stride;
+            INSERT_W4_SB(tp0, tp1, tp2, tp3, src0);
+            LW4(src0_ptr, src_stride, tp0, tp1, tp2, tp3);
+            src0_ptr += 4 * src_stride;
+            INSERT_W4_SB(tp0, tp1, tp2, tp3, src1);
+            LD4(src1_ptr, src2_stride, tpd0, tpd1, tpd2, tpd3);
+            src1_ptr += (4 * src2_stride);
+            INSERT_D2_SH(tpd0, tpd1, in0);
+            INSERT_D2_SH(tpd2, tpd3, in1);
+            LD4(src1_ptr, src2_stride, tpd0, tpd1, tpd2, tpd3);
+            src1_ptr += (4 * src2_stride);
+            INSERT_D2_SH(tpd0, tpd1, in2);
+            INSERT_D2_SH(tpd2, tpd3, in3);
+            ILVRL_B2_SH(zero, src0, dst0, dst1);
+            ILVRL_B2_SH(zero, src1, dst2, dst3);
             SLLI_4V(dst0, dst1, dst2, dst3, 6);
-            HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
-                               in0, in1, in2, in3,
-                               weight_vec, rnd_vec, offset_vec,
-                               dst0_r, dst1_r, dst2_r, dst3_r,
-                               dst0_l, dst1_l, dst2_l, dst3_l);
-
-            HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                            dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-            ST4x8_UB(dst0_r, dst1_r, dst, dst_stride);
+            HEVC_BIW_RND_CLIP4_MAX_SATU(dst0, dst1, dst2, dst3, in0, in1, in2,
+                                        in3, weight_vec, rnd_vec, offset_vec,
+                                        dst0, dst1, dst2, dst3);
+            PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+            ST4x8_UB(out0, out1, dst, dst_stride);
             dst += (8 * dst_stride);
         }
     }
@@ -187,11 +183,12 @@
 {
     uint32_t loop_cnt;
     int32_t offset, weight;
+    uint64_t tp0, tp1, tp2, tp3;
+    v16u8 out0, out1;
     v16i8 zero = { 0 };
-    v16i8 src0, src1, src2, src3;
+    v16i8 src0 = { 0 }, src1 = { 0 };
     v8i16 in0, in1, in2, in3;
     v8i16 dst0, dst1, dst2, dst3;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
     v4i32 offset_vec, weight_vec, rnd_vec;
 
     offset = (offset0 + offset1) << rnd_val;
@@ -203,23 +200,21 @@
     rnd_vec = __msa_fill_w(rnd_val + 1);
 
     for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB4(src0_ptr, src_stride, src0, src1, src2, src3);
+        LD4(src0_ptr, src_stride, tp0, tp1, tp2, tp3);
         src0_ptr += (4 * src_stride);
+        INSERT_D2_SB(tp0, tp1, src0);
+        INSERT_D2_SB(tp2, tp3, src1);
         LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
         src1_ptr += (4 * src2_stride);
-        ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3,
-                   dst0, dst1, dst2, dst3);
-
+        ILVRL_B2_SH(zero, src0, dst0, dst1);
+        ILVRL_B2_SH(zero, src1, dst2, dst3);
         SLLI_4V(dst0, dst1, dst2, dst3, 6);
-        HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
-                           in0, in1, in2, in3,
-                           weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST6x4_UB(dst0_r, dst1_r, dst, dst_stride);
+        HEVC_BIW_RND_CLIP4_MAX_SATU(dst0, dst1, dst2, dst3,
+                                    in0, in1, in2, in3,
+                                    weight_vec, rnd_vec, offset_vec,
+                                    dst0, dst1, dst2, dst3);
+        PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+        ST6x4_UB(out0, out1, dst, dst_stride);
         dst += (4 * dst_stride);
     }
 }
@@ -237,8 +232,13 @@
                                    int32_t offset1,
                                    int32_t rnd_val)
 {
+    uint64_t tp0, tp1, tp2, tp3;
     int32_t offset, weight;
+    v16u8 out0, out1, out2;
     v16i8 zero = { 0 };
+    v16i8 src0 = { 0 }, src1 = { 0 }, src2 = { 0 };
+    v8i16 in0, in1, in2, in3, in4, in5;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
     v4i32 offset_vec, weight_vec, rnd_vec;
 
     offset = (offset0 + offset1) << rnd_val;
@@ -250,80 +250,59 @@
     rnd_vec = __msa_fill_w(rnd_val + 1);
 
     if (2 == height) {
-        v16i8 src0, src1;
-        v8i16 in0, in1, dst0, dst1;
-        v4i32 dst0_r, dst1_r, dst0_l, dst1_l;
-
-        LD_SB2(src0_ptr, src_stride, src0, src1);
+        LD2(src0_ptr, src_stride, tp0, tp1);
+        INSERT_D2_SB(tp0, tp1, src0);
         LD_SH2(src1_ptr, src2_stride, in0, in1);
+        ILVRL_B2_SH(zero, src0, dst0, dst1);
+        SLLI_2V(dst0, dst1, 6);
 
-        ILVR_B2_SH(zero, src0, zero, src1, dst0, dst1);
-
-        dst0 <<= 6;
-        dst1 <<= 6;
         HEVC_BIW_RND_CLIP2(dst0, dst1, in0, in1,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst0_l, dst1_l);
+                           dst0, dst1);
 
-        HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-        ST8x2_UB(dst0_r, dst, dst_stride);
+        out0 = (v16u8) __msa_pckev_b((v16i8) dst1, (v16i8) dst0);
+        ST8x2_UB(out0, dst, dst_stride);
     } else if (6 == height) {
-        v16i8 src0, src1, src2, src3, src4, src5;
-        v8i16 in0, in1, in2, in3, in4, in5;
-        v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-        v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r;
-        v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l;
-
-        LD_SB6(src0_ptr, src_stride, src0, src1, src2, src3, src4, src5);
+        LD4(src0_ptr, src_stride, tp0, tp1, tp2, tp3);
+        src0_ptr += 4 * src_stride;
+        INSERT_D2_SB(tp0, tp1, src0);
+        INSERT_D2_SB(tp2, tp3, src1);
+        LD2(src0_ptr, src_stride, tp0, tp1);
+        INSERT_D2_SB(tp0, tp1, src2);
+        ILVRL_B2_SH(zero, src0, dst0, dst1);
+        ILVRL_B2_SH(zero, src1, dst2, dst3);
+        ILVRL_B2_SH(zero, src2, dst4, dst5);
         LD_SH6(src1_ptr, src2_stride, in0, in1, in2, in3, in4, in5);
-        ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3,
-                   dst0, dst1, dst2, dst3);
-        ILVR_B2_SH(zero, src4, zero, src5, dst4, dst5);
-
         SLLI_4V(dst0, dst1, dst2, dst3, 6);
-        dst4 <<= 6;
-        dst5 <<= 6;
-        HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
-                           in0, in1, in2, in3,
-                           weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
-        HEVC_BIW_RND_CLIP2(dst4, dst5, in4, in5,
-                           weight_vec, rnd_vec, offset_vec,
-                           dst4_r, dst5_r, dst4_l, dst5_l);
-
-        HEVC_PCK_SW_SB12(dst0_l, dst0_r, dst1_l, dst1_r,
-                         dst2_l, dst2_r, dst3_l, dst3_r,
-                         dst4_l, dst4_r, dst5_l, dst5_r,
-                         dst0_r, dst1_r, dst2_r);
-        ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
+        SLLI_2V(dst4, dst5, 6);
+        HEVC_BIW_RND_CLIP4_MAX_SATU(dst0, dst1, dst2, dst3, in0, in1, in2, in3,
+                                    weight_vec, rnd_vec, offset_vec, dst0, dst1,
+                                    dst2, dst3);
+        HEVC_BIW_RND_CLIP2_MAX_SATU(dst4, dst5, in4, in5, weight_vec, rnd_vec,
+                                    offset_vec, dst4, dst5);
+        PCKEV_B3_UB(dst1, dst0, dst3, dst2, dst5, dst4, out0, out1, out2);
+        ST8x4_UB(out0, out1, dst, dst_stride);
         dst += (4 * dst_stride);
-        ST8x2_UB(dst2_r, dst, dst_stride);
+        ST8x2_UB(out2, dst, dst_stride);
     } else if (0 == height % 4) {
         uint32_t loop_cnt;
-        v16i8 src0, src1, src2, src3;
-        v8i16 in0, in1, in2, in3;
-        v8i16 dst0, dst1, dst2, dst3;
-        v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
 
         for (loop_cnt = (height >> 2); loop_cnt--;) {
-            LD_SB4(src0_ptr, src_stride, src0, src1, src2, src3);
+            LD4(src0_ptr, src_stride, tp0, tp1, tp2, tp3);
             src0_ptr += (4 * src_stride);
+            INSERT_D2_SB(tp0, tp1, src0);
+            INSERT_D2_SB(tp2, tp3, src1);
+            ILVRL_B2_SH(zero, src0, dst0, dst1);
+            ILVRL_B2_SH(zero, src1, dst2, dst3);
             LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
             src1_ptr += (4 * src2_stride);
-            ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3,
-                       dst0, dst1, dst2, dst3);
 
             SLLI_4V(dst0, dst1, dst2, dst3, 6);
-            HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
-                               in0, in1, in2, in3,
-                               weight_vec, rnd_vec, offset_vec,
-                               dst0_r, dst1_r, dst2_r, dst3_r,
-                               dst0_l, dst1_l, dst2_l, dst3_l);
-
-            HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                            dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-            ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
+            HEVC_BIW_RND_CLIP4_MAX_SATU(dst0, dst1, dst2, dst3, in0, in1, in2,
+                                        in3, weight_vec, rnd_vec, offset_vec,
+                                        dst0, dst1, dst2, dst3);
+            PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+            ST8x4_UB(out0, out1, dst, dst_stride);
             dst += (4 * dst_stride);
         }
     }
@@ -345,11 +324,10 @@
     uint32_t loop_cnt;
     int32_t offset, weight;
     v16i8 zero = { 0 };
+    v16u8 out0, out1, out2;
     v16i8 src0, src1, src2, src3;
     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l;
     v4i32 offset_vec, weight_vec, rnd_vec;
 
     offset = (offset0 + offset1) << rnd_val;
@@ -377,108 +355,17 @@
 
         dst4 <<= 6;
         dst5 <<= 6;
-        HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
-                           in0, in1, in2, in3,
-                           weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
-        HEVC_BIW_RND_CLIP2(dst4, dst5, in4, in5,
-                           weight_vec, rnd_vec, offset_vec,
-                           dst4_r, dst5_r, dst4_l, dst5_l);
-
-        HEVC_PCK_SW_SB12(dst0_l, dst0_r, dst1_l, dst1_r,
-                         dst2_l, dst2_r, dst3_l, dst3_r,
-                         dst4_l, dst4_r, dst5_l, dst5_r,
-                         dst0_r, dst1_r, dst2_r);
-        ST12x4_UB(dst0_r, dst1_r, dst2_r, dst, dst_stride);
+        HEVC_BIW_RND_CLIP4_MAX_SATU(dst0, dst1, dst2, dst3, in0, in1, in2, in3,
+                                    weight_vec, rnd_vec, offset_vec, dst0, dst1,
+                                    dst2, dst3);
+        HEVC_BIW_RND_CLIP2_MAX_SATU(dst4, dst5, in4, in5, weight_vec, rnd_vec,
+                                    offset_vec, dst4, dst5);
+        PCKEV_B3_UB(dst1, dst0, dst3, dst2, dst5, dst4, out0, out1, out2);
+        ST12x4_UB(out0, out1, out2, dst, dst_stride);
         dst += (4 * dst_stride);
     }
 }
 
-static void hevc_biwgt_copy_16multx4mult_msa(uint8_t *src0_ptr,
-                                             int32_t src_stride,
-                                             int16_t *src1_ptr,
-                                             int32_t src2_stride,
-                                             uint8_t *dst,
-                                             int32_t dst_stride,
-                                             int32_t height,
-                                             int32_t weight0,
-                                             int32_t weight1,
-                                             int32_t offset0,
-                                             int32_t offset1,
-                                             int32_t rnd_val,
-                                             int32_t width)
-{
-    uint32_t loop_cnt, cnt;
-    uint8_t *src0_ptr_tmp;
-    int16_t *src1_ptr_tmp;
-    uint8_t *dst_tmp;
-    int32_t offset, weight;
-    v16i8 zero = { 0 };
-    v4i32 offset_vec, weight_vec, rnd_vec;
-
-    offset = (offset0 + offset1) << rnd_val;
-    weight0 = weight0 & 0x0000FFFF;
-    weight = weight0 | (weight1 << 16);
-
-    offset_vec = __msa_fill_w(offset);
-    weight_vec = __msa_fill_w(weight);
-    rnd_vec = __msa_fill_w(rnd_val + 1);
-
-    for (cnt = (width >> 4); cnt--;) {
-        src0_ptr_tmp = src0_ptr;
-        src1_ptr_tmp = src1_ptr;
-        dst_tmp = dst;
-
-        for (loop_cnt = (height >> 2); loop_cnt--;) {
-            v16i8 src0, src1, src2, src3;
-            v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
-            v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-            v4i32 dst0_r, dst1_r, dst2_r, dst3_r;
-            v4i32 dst0_l, dst1_l, dst2_l, dst3_l;
-
-            LD_SB4(src0_ptr_tmp, src_stride, src0, src1, src2, src3);
-            src0_ptr_tmp += (4 * src_stride);
-            LD_SH4(src1_ptr_tmp, src2_stride, in0, in1, in2, in3);
-            LD_SH4(src1_ptr_tmp + 8, src2_stride, in4, in5, in6, in7);
-            src1_ptr_tmp += (4 * src2_stride);
-
-            ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3,
-                       tmp0, tmp1, tmp2, tmp3);
-            ILVL_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3,
-                       tmp4, tmp5, tmp6, tmp7);
-
-            SLLI_4V(tmp0, tmp1, tmp2, tmp3, 6);
-            SLLI_4V(tmp4, tmp5, tmp6, tmp7, 6);
-            HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp4, tmp5,
-                               in0, in1, in4, in5,
-                               weight_vec, rnd_vec, offset_vec,
-                               dst0_r, dst1_r, dst2_r, dst3_r,
-                               dst0_l, dst1_l, dst2_l, dst3_l);
-
-            HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst2_l, dst2_r,
-                            dst1_l, dst1_r, dst3_l, dst3_r, dst0_r, dst1_r);
-            ST_SW2(dst0_r, dst1_r, dst_tmp, dst_stride);
-            dst_tmp += (2 * dst_stride);
-
-            HEVC_BIW_RND_CLIP4(tmp2, tmp3, tmp6, tmp7,
-                               in2, in3, in6, in7,
-                               weight_vec, rnd_vec, offset_vec,
-                               dst0_r, dst1_r, dst2_r, dst3_r,
-                               dst0_l, dst1_l, dst2_l, dst3_l);
-
-            HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst2_l, dst2_r,
-                            dst1_l, dst1_r, dst3_l, dst3_r, dst0_r, dst1_r);
-            ST_SW2(dst0_r, dst1_r, dst_tmp, dst_stride);
-            dst_tmp += (2 * dst_stride);
-        }
-
-        src0_ptr += 16;
-        src1_ptr += 16;
-        dst += 16;
-    }
-}
-
 static void hevc_biwgt_copy_16w_msa(uint8_t *src0_ptr,
                                     int32_t src_stride,
                                     int16_t *src1_ptr,
@@ -492,10 +379,46 @@
                                     int32_t offset1,
                                     int32_t rnd_val)
 {
-    hevc_biwgt_copy_16multx4mult_msa(src0_ptr, src_stride,
-                                     src1_ptr, src2_stride,
-                                     dst, dst_stride, height, weight0,
-                                     weight1, offset0, offset1, rnd_val, 16);
+    uint32_t loop_cnt;
+    int32_t offset, weight;
+    v16u8 out0, out1, out2, out3;
+    v16i8 zero = { 0 };
+    v16i8 src0, src1, src2, src3;
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    v4i32 offset_vec, weight_vec, rnd_vec;
+
+    offset = (offset0 + offset1) << rnd_val;
+    weight0 = weight0 & 0x0000FFFF;
+    weight = weight0 | (weight1 << 16);
+
+    offset_vec = __msa_fill_w(offset);
+    weight_vec = __msa_fill_w(weight);
+    rnd_vec = __msa_fill_w(rnd_val + 1);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src0_ptr, src_stride, src0, src1, src2, src3);
+        src0_ptr += (4 * src_stride);
+        LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
+        LD_SH4(src1_ptr + 8, src2_stride, in4, in5, in6, in7);
+        src1_ptr += (4 * src2_stride);
+        ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3, tmp0, tmp1,
+                   tmp2, tmp3);
+        ILVL_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3, tmp4, tmp5,
+                   tmp6, tmp7);
+        SLLI_4V(tmp0, tmp1, tmp2, tmp3, 6);
+        SLLI_4V(tmp4, tmp5, tmp6, tmp7, 6);
+        HEVC_BIW_RND_CLIP4_MAX_SATU(tmp0, tmp1, tmp4, tmp5, in0, in1, in4, in5,
+                                    weight_vec, rnd_vec, offset_vec, tmp0, tmp1,
+                                    tmp4, tmp5);
+        HEVC_BIW_RND_CLIP4_MAX_SATU(tmp2, tmp3, tmp6, tmp7, in2, in3, in6, in7,
+                                    weight_vec, rnd_vec, offset_vec, tmp2, tmp3,
+                                    tmp6, tmp7);
+        PCKEV_B2_UB(tmp4, tmp0, tmp5, tmp1, out0, out1);
+        PCKEV_B2_UB(tmp6, tmp2, tmp7, tmp3, out2, out3);
+        ST_UB4(out0, out1, out2, out3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
 }
 
 static void hevc_biwgt_copy_24w_msa(uint8_t *src0_ptr,
@@ -511,14 +434,55 @@
                                     int32_t offset1,
                                     int32_t rnd_val)
 {
-    hevc_biwgt_copy_16multx4mult_msa(src0_ptr, src_stride,
-                                     src1_ptr, src2_stride,
-                                     dst, dst_stride, height, weight0,
-                                     weight1, offset0, offset1, rnd_val, 16);
-    hevc_biwgt_copy_8w_msa(src0_ptr + 16, src_stride,
-                           src1_ptr + 16, src2_stride,
-                           dst + 16, dst_stride, height, weight0,
-                           weight1, offset0, offset1, rnd_val);
+    uint32_t loop_cnt;
+    int32_t offset, weight;
+    v16u8 out0, out1, out2, out3, out4, out5;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, zero = { 0 };
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8, dst9, dst10;
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, dst11;
+    v4i32 offset_vec, weight_vec, rnd_vec;
+
+    offset = (offset0 + offset1) << rnd_val;
+    weight0 = weight0 & 0x0000FFFF;
+    weight = weight0 | (weight1 << 16);
+
+    offset_vec = __msa_fill_w(offset);
+    weight_vec = __msa_fill_w(weight);
+    rnd_vec = __msa_fill_w(rnd_val + 1);
+
+    for (loop_cnt = 8; loop_cnt--;) {
+        LD_SB4(src0_ptr, src_stride, src0, src1, src4, src5);
+        LD_SB4(src0_ptr + 16, src_stride, src2, src3, src6, src7);
+        src0_ptr += (4 * src_stride);
+        LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
+        LD_SH4(src1_ptr + 8, src2_stride, in4, in5, in6, in7);
+        LD_SH4(src1_ptr + 16, src2_stride, in8, in9, in10, in11);
+        src1_ptr += (4 * src2_stride);
+
+        ILVRL_B2_SH(zero, src0, dst0, dst1);
+        ILVRL_B2_SH(zero, src1, dst2, dst3);
+        ILVR_B2_SH(zero, src2, zero, src3, dst4, dst5);
+        ILVRL_B2_SH(zero, src4, dst6, dst7);
+        ILVRL_B2_SH(zero, src5, dst8, dst9);
+        ILVR_B2_SH(zero, src6, zero, src7, dst10, dst11);
+        SLLI_4V(dst0, dst1, dst2, dst3, 6);
+        SLLI_4V(dst4, dst5, dst6, dst7, 6);
+        SLLI_4V(dst8, dst9, dst10, dst11, 6);
+        HEVC_BIW_RND_CLIP4_MAX_SATU(dst0, dst1, dst2, dst3, in0, in4, in1, in5,
+                                    weight_vec, rnd_vec, offset_vec, dst0, dst1,
+                                    dst2, dst3);
+        HEVC_BIW_RND_CLIP4_MAX_SATU(dst4, dst5, dst6, dst7, in8, in9, in2, in6,
+                                    weight_vec, rnd_vec, offset_vec, dst4, dst5,
+                                    dst6, dst7);
+        HEVC_BIW_RND_CLIP4_MAX_SATU(dst8, dst9, dst10, dst11, in3, in7, in10,
+                                    in11, weight_vec, rnd_vec, offset_vec,
+                                    dst8, dst9, dst10, dst11);
+        PCKEV_B3_UB(dst1, dst0, dst3, dst2, dst5, dst4, out0, out1, out2);
+        PCKEV_B3_UB(dst7, dst6, dst9, dst8, dst11, dst10, out3, out4, out5);
+        ST_UB4(out0, out1, out3, out4, dst, dst_stride);
+        ST8x4_UB(out2, out5, dst + 16, dst_stride);
+        dst += (4 * dst_stride);
+    }
 }
 
 static void hevc_biwgt_copy_32w_msa(uint8_t *src0_ptr,
@@ -534,10 +498,52 @@
                                     int32_t offset1,
                                     int32_t rnd_val)
 {
-    hevc_biwgt_copy_16multx4mult_msa(src0_ptr, src_stride,
-                                     src1_ptr, src2_stride,
-                                     dst, dst_stride, height, weight0,
-                                     weight1, offset0, offset1, rnd_val, 32);
+    uint32_t loop_cnt;
+    int32_t offset, weight;
+    v16u8 out0, out1, out2, out3;
+    v16i8 zero = { 0 };
+    v16i8 src0, src1, src2, src3;
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    v4i32 offset_vec, weight_vec, rnd_vec;
+
+    offset = (offset0 + offset1) << rnd_val;
+    weight0 = weight0 & 0x0000FFFF;
+    weight = weight0 | (weight1 << 16);
+
+    offset_vec = __msa_fill_w(offset);
+    weight_vec = __msa_fill_w(weight);
+    rnd_vec = __msa_fill_w(rnd_val + 1);
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        LD_SB2(src0_ptr, 16, src0, src1);
+        src0_ptr += src_stride;
+        LD_SB2(src0_ptr, 16, src2, src3);
+        src0_ptr += src_stride;
+        LD_SH4(src1_ptr, 8, in0, in1, in2, in3);
+        src1_ptr += src2_stride;
+        LD_SH4(src1_ptr, 8, in4, in5, in6, in7);
+        src1_ptr += src2_stride;
+
+        ILVRL_B2_SH(zero, src0, tmp0, tmp4);
+        ILVRL_B2_SH(zero, src1, tmp1, tmp5);
+        ILVRL_B2_SH(zero, src2, tmp2, tmp6);
+        ILVRL_B2_SH(zero, src3, tmp3, tmp7);
+        SLLI_4V(tmp0, tmp1, tmp2, tmp3, 6);
+        SLLI_4V(tmp4, tmp5, tmp6, tmp7, 6);
+        HEVC_BIW_RND_CLIP4_MAX_SATU(tmp0, tmp4, tmp1, tmp5, in0, in1, in2, in3,
+                                    weight_vec, rnd_vec, offset_vec, tmp0, tmp4,
+                                    tmp1, tmp5);
+        HEVC_BIW_RND_CLIP4_MAX_SATU(tmp2, tmp6, tmp3, tmp7, in4, in5, in6, in7,
+                                    weight_vec, rnd_vec, offset_vec, tmp2, tmp6,
+                                    tmp3, tmp7);
+        PCKEV_B2_UB(tmp4, tmp0, tmp5, tmp1, out0, out1);
+        PCKEV_B2_UB(tmp6, tmp2, tmp7, tmp3, out2, out3);
+        ST_UB2(out0, out1, dst, 16);
+        dst += dst_stride;
+        ST_UB2(out2, out3, dst, 16);
+        dst += dst_stride;
+    }
 }
 
 static void hevc_biwgt_copy_48w_msa(uint8_t *src0_ptr,
@@ -553,10 +559,43 @@
                                     int32_t offset1,
                                     int32_t rnd_val)
 {
-    hevc_biwgt_copy_16multx4mult_msa(src0_ptr, src_stride,
-                                     src1_ptr, src2_stride,
-                                     dst, dst_stride, height, weight0,
-                                     weight1, offset0, offset1, rnd_val, 48);
+    uint32_t loop_cnt;
+    int32_t offset, weight;
+    v16u8 out0, out1, out2;
+    v16i8 src0, src1, src2;
+    v16i8 zero = { 0 };
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, in0, in1, in2, in3, in4, in5;
+    v4i32 offset_vec, weight_vec, rnd_vec;
+
+    offset = (offset0 + offset1) << rnd_val;
+    weight0 = weight0 & 0x0000FFFF;
+    weight = weight0 | (weight1 << 16);
+
+    offset_vec = __msa_fill_w(offset);
+    weight_vec = __msa_fill_w(weight);
+    rnd_vec = __msa_fill_w(rnd_val + 1);
+
+    for (loop_cnt = 64; loop_cnt--;) {
+        LD_SB3(src0_ptr, 16, src0, src1, src2);
+        src0_ptr += src_stride;
+        LD_SH6(src1_ptr, 8, in0, in1, in2, in3, in4, in5);
+        src1_ptr += src2_stride;
+
+        ILVRL_B2_SH(zero, src0, dst0, dst1);
+        ILVRL_B2_SH(zero, src1, dst2, dst3);
+        ILVRL_B2_SH(zero, src2, dst4, dst5);
+        SLLI_4V(dst0, dst1, dst2, dst3, 6);
+        SLLI_2V(dst4, dst5, 6);
+        HEVC_BIW_RND_CLIP4_MAX_SATU(dst0, dst1, dst2, dst3, in0, in1, in2, in3,
+                                    weight_vec, rnd_vec, offset_vec, dst0, dst1,
+                                    dst2, dst3);
+        HEVC_BIW_RND_CLIP2_MAX_SATU(dst4, dst5, in4, in5, weight_vec, rnd_vec,
+                                    offset_vec, dst4, dst5);
+        PCKEV_B3_UB(dst1, dst0, dst3, dst2, dst5, dst4, out0, out1, out2);
+        ST_UB2(out0, out1, dst, 16);
+        ST_UB(out2, dst + 32);
+        dst += dst_stride;
+    }
 }
 
 static void hevc_biwgt_copy_64w_msa(uint8_t *src0_ptr,
@@ -572,10 +611,46 @@
                                     int32_t offset1,
                                     int32_t rnd_val)
 {
-    hevc_biwgt_copy_16multx4mult_msa(src0_ptr, src_stride,
-                                     src1_ptr, src2_stride,
-                                     dst, dst_stride, height, weight0,
-                                     weight1, offset0, offset1, rnd_val, 64);
+    uint32_t loop_cnt;
+    int32_t offset, weight;
+    v16u8 out0, out1, out2, out3;
+    v16i8 zero = { 0 };
+    v16i8 src0, src1, src2, src3;
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    v4i32 offset_vec, weight_vec, rnd_vec;
+
+    offset = (offset0 + offset1) << rnd_val;
+    weight0 = weight0 & 0x0000FFFF;
+    weight = weight0 | (weight1 << 16);
+
+    offset_vec = __msa_fill_w(offset);
+    weight_vec = __msa_fill_w(weight);
+    rnd_vec = __msa_fill_w(rnd_val + 1);
+
+    for (loop_cnt = height; loop_cnt--;) {
+        LD_SB4(src0_ptr, 16, src0, src1, src2, src3);
+        src0_ptr += src_stride;
+        LD_SH8(src1_ptr, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+        src1_ptr += src2_stride;
+
+        ILVR_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3, tmp0, tmp1,
+                   tmp2, tmp3);
+        ILVL_B4_SH(zero, src0, zero, src1, zero, src2, zero, src3, tmp4, tmp5,
+                   tmp6, tmp7);
+        SLLI_4V(tmp0, tmp1, tmp2, tmp3, 6);
+        SLLI_4V(tmp4, tmp5, tmp6, tmp7, 6);
+        HEVC_BIW_RND_CLIP4_MAX_SATU(tmp0, tmp4, tmp1, tmp5, in0, in1, in2, in3,
+                                    weight_vec, rnd_vec, offset_vec, tmp0, tmp4,
+                                    tmp1, tmp5);
+        HEVC_BIW_RND_CLIP4_MAX_SATU(tmp2, tmp6, tmp3, tmp7, in4, in5, in6, in7,
+                                    weight_vec, rnd_vec, offset_vec, tmp2, tmp6,
+                                    tmp3, tmp7);
+        PCKEV_B2_UB(tmp4, tmp0, tmp5, tmp1, out0, out1);
+        PCKEV_B2_UB(tmp6, tmp2, tmp7, tmp3, out2, out3);
+        ST_UB4(out0, out1, out2, out3, dst, 16);
+        dst += dst_stride;
+    }
 }
 
 static void hevc_hz_biwgt_8t_4w_msa(uint8_t *src0_ptr,
@@ -593,17 +668,16 @@
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v8i16 filt0, filt1, filt2, filt3;
     v16i8 src0, src1, src2, src3;
     v16i8 mask1, mask2, mask3;
     v16i8 vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1;
     v8i16 in0, in1, in2, in3;
-    v4i32 dst0_r, dst1_r, dst0_l, dst1_l;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec, out0, out1;
     v4i32 weight_vec, offset_vec, rnd_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[16]);
 
     src0_ptr -= 3;
     filter_vec = LD_SH(filter);
@@ -616,9 +690,10 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -633,21 +708,19 @@
 
         VSHF_B4_SB(src0, src1, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
+        dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
         VSHF_B4_SB(src2, src3, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst1 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
+        dst1 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
 
         HEVC_BIW_RND_CLIP2(dst0, dst1, in0, in1,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst0_l, dst1_l);
+                           out0, out1);
 
-        HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-        ST4x4_UB(dst0_r, dst0_r, 0, 1, 2, 3, dst, dst_stride);
+        out0 = (v8i16) __msa_pckev_b((v16i8) out1, (v16i8) out0);
+        ST4x4_UB(out0, out0, 0, 1, 2, 3, dst, dst_stride);
         dst += (4 * dst_stride);
     }
 }
@@ -667,25 +740,25 @@
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v8i16 filt0, filt1, filt2, filt3;
     v16i8 src0, src1, src2, src3;
     v16i8 mask1, mask2, mask3;
     v16i8 vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1, dst2, dst3;
     v8i16 in0, in1, in2, in3;
-    v4i32 dst0_r, dst1_r, dst0_l, dst1_l, dst2_r, dst3_r, dst2_l, dst3_l;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec, out0, out1, out2, out3;
     v4i32 weight_vec, offset_vec, rnd_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
 
     src0_ptr -= 3;
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -706,34 +779,28 @@
 
         VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
+        dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
         VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst1 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
+        dst1 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
         VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst2 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
+        dst2 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
         VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst3 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
+        dst3 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
 
         HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           out0, out1, out2, out3);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_SH(out1, out0, out3, out2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
         dst += (4 * dst_stride);
     }
 }
@@ -752,12 +819,85 @@
                                      int32_t offset1,
                                      int32_t rnd_val)
 {
-    hevc_hz_biwgt_8t_8w_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                            dst, dst_stride, filter, height,
-                            weight0, weight1, offset0, offset1, rnd_val);
-    hevc_hz_biwgt_8t_4w_msa(src0_ptr + 8, src_stride, src1_ptr + 8, src2_stride,
-                            dst + 8, dst_stride, filter, height,
-                            weight0, weight1, offset0, offset1, rnd_val);
+    uint32_t loop_cnt;
+    int32_t offset, weight, constant;
+    v16i8 src0, src1, src2, src3, vec0, vec1, vec2, vec3;
+    v16i8 mask0, mask1, mask2, mask3, mask4, mask5, mask6, mask7;
+    v8i16 filt0, filt1, filt2, filt3, out0, out1, out2, out3;
+    v8i16 dst0, dst1, dst2, dst3, in0, in1, in2, in3, filter_vec;
+    v4i32 weight_vec, offset_vec, rnd_vec;
+
+    src0_ptr -= 3;
+
+    weight0 = weight0 & 0x0000FFFF;
+    weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset = (offset0 + offset1) << rnd_val;
+    offset += constant;
+
+    offset_vec = __msa_fill_w(offset);
+    weight_vec = __msa_fill_w(weight);
+    rnd_vec = __msa_fill_w(rnd_val + 1);
+
+    filter_vec = LD_SH(filter);
+    SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask0 = LD_SB(&ff_hevc_mask_arr[0]);
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+    mask4 = LD_SB(&ff_hevc_mask_arr[16]);
+    mask5 = mask4 + 2;
+    mask6 = mask4 + 4;
+    mask7 = mask4 + 6;
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src0_ptr, src_stride, src0, src1, src2, src3);
+        LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3, vec0, vec1, vec2,
+                   vec3);
+        dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3, vec0, vec1, vec2,
+                   vec3);
+        dst1 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3, vec0, vec1, vec2,
+                   vec3);
+        dst2 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3, vec0, vec1, vec2,
+                   vec3);
+        dst3 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3, in0, in1, in2, in3,
+                           weight_vec, rnd_vec, offset_vec, out0, out1, out2,
+                           out3);
+        PCKEV_B2_SH(out1, out0, out3, out2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+
+        LD_SB4(src0_ptr + 8, src_stride, src0, src1, src2, src3);
+        src0_ptr += (4 * src_stride);
+        LD_SH4(src1_ptr + 8, src2_stride, in0, in1, in2, in3);
+        src1_ptr += (4 * src2_stride);
+        ILVR_D2_SH(in1, in0, in3, in2, in0, in1);
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        VSHF_B4_SB(src0, src1, mask4, mask5, mask6, mask7, vec0, vec1, vec2,
+                   vec3);
+        dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        VSHF_B4_SB(src2, src3, mask4, mask5, mask6, mask7, vec0, vec1, vec2,
+                   vec3);
+        dst1 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        HEVC_BIW_RND_CLIP2(dst0, dst1, in0, in1, weight_vec, rnd_vec,
+                           offset_vec, out0, out1);
+        out0 = (v8i16) __msa_pckev_b((v16i8) out1, (v16i8) out0);
+        ST4x4_UB(out0, out0, 0, 1, 2, 3, dst + 8, dst_stride);
+        dst += (4 * dst_stride);
+    }
 }
 
 static void hevc_hz_biwgt_8t_16w_msa(uint8_t *src0_ptr,
@@ -775,15 +915,14 @@
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v16i8 src0, src1, src2, src3;
     v8i16 in0, in1, in2, in3;
     v8i16 filt0, filt1, filt2, filt3;
     v16i8 mask1, mask2, mask3;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec, out0, out1, out2, out3;
     v16i8 vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1, dst2, dst3;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
     v4i32 weight_vec, offset_vec, rnd_vec;
     v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
 
@@ -791,9 +930,10 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -818,34 +958,28 @@
 
         VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
+        dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
         VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst1 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
+        dst1 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
         VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst2 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
+        dst2 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
         VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst3 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
+        dst3 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
 
         HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           out0, out1, out2, out3);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_SH(out1, out0, out3, out2, out0, out1);
+        ST_SH2(out0, out1, dst, dst_stride);
         dst += (2 * dst_stride);
     }
 }
@@ -866,25 +1000,26 @@
 {
     uint32_t loop_cnt;
     uint64_t dst_val0;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v16i8 src0, src1;
     v8i16 in0, in1, in2;
     v8i16 filt0, filt1, filt2, filt3;
     v16i8 mask1, mask2, mask3, mask4, mask5, mask6, mask7;
     v16i8 vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1, dst2;
-    v4i32 dst0_r, dst1_r, dst2_r, dst0_l, dst1_l, dst2_l;
-    v8i16 filter_vec, const_vec;
+    v4i32 dst2_r, dst2_l;
+    v8i16 filter_vec, out0, out1, out2;
     v4i32 weight_vec, offset_vec, rnd_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
 
     src0_ptr = src0_ptr - 3;
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -900,33 +1035,30 @@
     mask6 = mask0 + 12;
     mask7 = mask0 + 14;
 
-    for (loop_cnt = height; loop_cnt--;) {
-        LD_SB2(src0_ptr, 16, src0, src1);
-        src0_ptr += src_stride;
-        LD_SH2(src1_ptr, 8, in0, in1);
-        in2 = LD_SH(src1_ptr + 16);
-        src1_ptr += src2_stride;
-        XORI_B2_128_SB(src0, src1);
+    LD_SB2(src0_ptr, 16, src0, src1);
+    src0_ptr += src_stride;
+    LD_SH2(src1_ptr, 8, in0, in1);
+    in2 = LD_SH(src1_ptr + 16);
+    src1_ptr += src2_stride;
+    XORI_B2_128_SB(src0, src1);
 
+    for (loop_cnt = 31; loop_cnt--;) {
         VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
+        dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
         VSHF_B4_SB(src0, src1, mask4, mask5, mask6, mask7,
                    vec0, vec1, vec2, vec3);
-        dst1 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
+        dst1 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
         VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst2 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
+        dst2 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
 
         HEVC_BIW_RND_CLIP2(dst0, dst1, in0, in1,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst0_l, dst1_l);
+                           out0, out1);
 
         ILVRL_H2_SW(dst2, in2, dst2_r, dst2_l);
         dst2_r = __msa_dpadd_s_w(offset_vec, (v8i16) dst2_r,
@@ -934,16 +1066,44 @@
         dst2_l = __msa_dpadd_s_w(offset_vec, (v8i16) dst2_l,
                                  (v8i16) weight_vec);
         SRAR_W2_SW(dst2_r, dst2_l, rnd_vec);
-        dst2_r = CLIP_SW_0_255(dst2_r);
-        dst2_l = CLIP_SW_0_255(dst2_l);
+        dst2_r = (v4i32) __msa_pckev_h((v8i16) dst2_l, (v8i16) dst2_r);
+        out2 = CLIP_SH_0_255(dst2_r);
 
-        HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-        HEVC_PCK_SW_SB2(dst2_l, dst2_r, dst2_r);
-        dst_val0 = __msa_copy_u_d((v2i64) dst2_r, 0);
-        ST_SW(dst0_r, dst);
+        LD_SB2(src0_ptr, 16, src0, src1);
+        src0_ptr += src_stride;
+        LD_SH2(src1_ptr, 8, in0, in1);
+        in2 = LD_SH(src1_ptr + 16);
+        src1_ptr += src2_stride;
+        XORI_B2_128_SB(src0, src1);
+        PCKEV_B2_SH(out1, out0, out2, out2, out0, out2);
+        dst_val0 = __msa_copy_u_d((v2i64) out2, 0);
+        ST_SH(out0, dst);
         SD(dst_val0, dst + 16);
         dst += dst_stride;
     }
+
+    VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3, vec0, vec1, vec2, vec3);
+    dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                             filt3);
+    VSHF_B4_SB(src0, src1, mask4, mask5, mask6, mask7, vec0, vec1, vec2, vec3);
+    dst1 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                             filt3);
+    VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3, vec0, vec1, vec2, vec3);
+    dst2 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                             filt3);
+    HEVC_BIW_RND_CLIP2(dst0, dst1, in0, in1, weight_vec, rnd_vec, offset_vec,
+                       out0, out1);
+    ILVRL_H2_SW(dst2, in2, dst2_r, dst2_l);
+    dst2_r = __msa_dpadd_s_w(offset_vec, (v8i16) dst2_r, (v8i16) weight_vec);
+    dst2_l = __msa_dpadd_s_w(offset_vec, (v8i16) dst2_l, (v8i16) weight_vec);
+    SRAR_W2_SW(dst2_r, dst2_l, rnd_vec);
+    dst2_r = (v4i32) __msa_pckev_h((v8i16) dst2_l, (v8i16) dst2_r);
+    out2 = CLIP_SH_0_255(dst2_r);
+    PCKEV_B2_SH(out1, out0, out2, out2, out0, out2);
+    dst_val0 = __msa_copy_u_d((v2i64) out2, 0);
+    ST_SH(out0, dst);
+    SD(dst_val0, dst + 16);
+    dst += dst_stride;
 }
 
 static void hevc_hz_biwgt_8t_32w_msa(uint8_t *src0_ptr,
@@ -961,25 +1121,25 @@
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v16i8 src0, src1, src2;
     v8i16 in0, in1, in2, in3;
     v8i16 filt0, filt1, filt2, filt3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1, mask2, mask3, mask4, mask5, mask6, mask7;
     v16i8 vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1, dst2, dst3;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
+    v8i16 filter_vec, out0, out1, out2, out3;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 3;
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -1006,34 +1166,28 @@
 
         VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
+        dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
         VSHF_B4_SB(src0, src1, mask4, mask5, mask6, mask7,
                    vec0, vec1, vec2, vec3);
-        dst1 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
+        dst1 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
         VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst2 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
+        dst2 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
         VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst3 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
+        dst3 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
 
         HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           out0, out1, out2, out3);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, 16);
+        PCKEV_B2_SH(out1, out0, out3, out2, out0, out1);
+        ST_SH2(out0, out1, dst, 16);
         dst += dst_stride;
     }
 }
@@ -1053,27 +1207,25 @@
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
-    uint64_t dst_val0;
-    v16i8 src0, src1, src2, src3;
-    v8i16 in0, in1, in2, in3, in4, in5;
+    int32_t offset, weight, constant;
+    v16i8 src0, src1, src2, src3, src4;
+    v8i16 in0, in1, in2, in3;
     v8i16 filt0, filt1, filt2, filt3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1, mask2, mask3, mask4, mask5, mask6, mask7;
     v16i8 vec0, vec1, vec2, vec3;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l;
+    v8i16 dst0, dst1, dst2, dst3;
+    v8i16 filter_vec, out0, out1, out2, out3;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 3;
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -1089,86 +1241,57 @@
     mask6 = mask0 + 12;
     mask7 = mask0 + 14;
 
-    for (loop_cnt = height; loop_cnt--;) {
-        LD_SB3(src0_ptr, 16, src0, src1, src2);
-        src3 = LD_SB(src0_ptr + 40);
+    for (loop_cnt = 64; loop_cnt--;) {
+        LD_SB2(src0_ptr, 16, src0, src1);
+        src2 = LD_SB(src0_ptr + 24);
+        LD_SH4(src1_ptr, 8, in0, in1, in2, in3);
+        XORI_B3_128_SB(src0, src1, src2);
+        LD_SB2(src0_ptr + 32, 8, src3, src4);
         src0_ptr += src_stride;
-        LD_SH2(src1_ptr, 8, in0, in1);
-        in2 = LD_SH(src1_ptr + 16);
-        XORI_B4_128_SB(src0, src1, src2, src3);
+        XORI_B2_128_SB(src3, src4);
 
         VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
+        dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
         VSHF_B4_SB(src0, src1, mask4, mask5, mask6, mask7,
                    vec0, vec1, vec2, vec3);
-        dst1 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
+        dst1 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
         VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst2 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
-        VSHF_B4_SB(src1, src2, mask4, mask5, mask6, mask7,
-                   vec0, vec1, vec2, vec3);
-        dst3 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
+        dst2 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
         VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst4 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst4, dst4, dst4, dst4);
-        VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
-        dst5 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst5, dst5, dst5, dst5);
+        dst3 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
 
-        HEVC_BIW_RND_CLIP2(dst0, dst1, in0, in1,
+        HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3, in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst0_l, dst1_l);
+                           out0, out1, out2, out3);
 
-        ILVRL_H2_SW(dst2, in2, dst2_r, dst2_l);
-        dst2_r = __msa_dpadd_s_w(offset_vec, (v8i16) dst2_r,
-                                 (v8i16) weight_vec);
-        dst2_l = __msa_dpadd_s_w(offset_vec, (v8i16) dst2_l,
-                                 (v8i16) weight_vec);
-        SRAR_W2_SW(dst2_r, dst2_l, rnd_vec);
-        dst2_r = CLIP_SW_0_255(dst2_r);
-        dst2_l = CLIP_SW_0_255(dst2_l);
+        PCKEV_B2_SH(out1, out0, out3, out2, out0, out1);
+        ST_SH2(out0, out1, dst, 16);
 
-        HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-        HEVC_PCK_SW_SB2(dst2_l, dst2_r, dst2_r);
-        dst_val0 = __msa_copy_u_d((v2i64) dst2_r, 0);
-        ST_SW(dst0_r, dst);
-        SD(dst_val0, dst + 16);
-
-        LD_SH2(src1_ptr + 24, 8, in3, in4);
-        in5 = LD_SH(src1_ptr + 40);
+        LD_SH2(src1_ptr + 32, 8, in2, in3);
         src1_ptr += src2_stride;
 
-        HEVC_BIW_RND_CLIP2(dst3, dst4, in3, in4,
+        VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3,
+                   vec0, vec1, vec2, vec3);
+        dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        VSHF_B4_SB(src4, src4, mask0, mask1, mask2, mask3,
+                   vec0, vec1, vec2, vec3);
+        dst1 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+
+        HEVC_BIW_RND_CLIP2(dst0, dst1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst3_r, dst4_r, dst3_l, dst4_l);
+                           out0, out1);
 
-        ILVRL_H2_SW(dst5, in5, dst5_r, dst5_l);
-        dst5_r = __msa_dpadd_s_w(offset_vec, (v8i16) dst5_r,
-                                 (v8i16) weight_vec);
-        dst5_l = __msa_dpadd_s_w(offset_vec, (v8i16) dst5_l,
-                                 (v8i16) weight_vec);
-        SRAR_W2_SW(dst5_r, dst5_l, rnd_vec);
-        dst5_r = CLIP_SW_0_255(dst5_r);
-        dst5_l = CLIP_SW_0_255(dst5_l);
-
-        HEVC_PCK_SW_SB4(dst4_l, dst4_r, dst5_l, dst5_r, dst4_r);
-        HEVC_PCK_SW_SB2(dst3_l, dst3_r, dst3_r);
-        dst_val0 = __msa_copy_u_d((v2i64) dst3_r, 0);
-        SD(dst_val0, dst + 24);
-        ST_SW(dst4_r, dst + 32);
+        out0 = (v8i16) __msa_pckev_b((v16i8) out1, (v16i8) out0);
+        ST_SH(out0, dst + 32);
         dst += dst_stride;
     }
 }
@@ -1191,25 +1314,25 @@
     uint8_t *dst_tmp;
     int16_t *src1_ptr_tmp;
     uint32_t loop_cnt, cnt;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v16i8 src0, src1, src2;
     v8i16 in0, in1, in2, in3;
     v8i16 filt0, filt1, filt2, filt3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1, mask2, mask3, mask4, mask5, mask6, mask7;
     v16i8 vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1, dst2, dst3;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
+    v8i16 filter_vec, out0, out1, out2, out3;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 3;
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -1240,34 +1363,28 @@
 
             VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3,
                        vec0, vec1, vec2, vec3);
-            dst0 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst0, dst0, dst0, dst0);
+            dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1,
+                                     filt2, filt3);
             VSHF_B4_SB(src0, src1, mask4, mask5, mask6, mask7,
                        vec0, vec1, vec2, vec3);
-            dst1 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst1, dst1, dst1, dst1);
+            dst1 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1,
+                                     filt2, filt3);
             VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3,
                        vec0, vec1, vec2, vec3);
-            dst2 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst2, dst2, dst2, dst2);
+            dst2 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1,
+                                     filt2, filt3);
             VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3,
                        vec0, vec1, vec2, vec3);
-            dst3 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst3, dst3, dst3, dst3);
+            dst3 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1,
+                                     filt2, filt3);
 
             HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
                                in0, in1, in2, in3,
                                weight_vec, rnd_vec, offset_vec,
-                               dst0_r, dst1_r, dst2_r, dst3_r,
-                               dst0_l, dst1_l, dst2_l, dst3_l);
+                               out0, out1, out2, out3);
 
-            HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                            dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-            ST_SW2(dst0_r, dst1_r, dst_tmp, 16);
+            PCKEV_B2_SH(out1, out0, out3, out2, out0, out1);
+            ST_SH2(out0, out1, dst_tmp, 16);
             dst_tmp += 32;
         }
 
@@ -1303,22 +1420,22 @@
     v16i8 src2110, src4332, src6554, src8776, src10998;
     v16i8 src12111110, src14131312;
     v8i16 dst10, dst32, dst54, dst76;
-    v4i32 dst10_r, dst32_r, dst54_r, dst76_r;
-    v4i32 dst10_l, dst32_l, dst54_l, dst76_l;
     v8i16 filt0, filt1, filt2, filt3;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 filter_vec, out0, out1, out2, out3;
+    v4i32 weight_vec, weight1_vec, offset_vec, rnd_vec, const_vec;
 
     src0_ptr -= (3 * src_stride);
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
-    const_vec = __msa_ldi_h(128);
+    const_vec = __msa_ldi_w(128);
     const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
+    weight1_vec = __msa_fill_w(weight1);
+    offset_vec += const_vec * weight1_vec;
 
     filter_vec = LD_SH(filter);
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
@@ -1351,28 +1468,22 @@
                    src8776, src10998, src12111110, src14131312);
         XORI_B4_128_SB(src8776, src10998, src12111110, src14131312);
 
-        dst10 = const_vec;
-        DPADD_SB4_SH(src2110, src4332, src6554, src8776, filt0, filt1,
-                     filt2, filt3, dst10, dst10, dst10, dst10);
-        dst32 = const_vec;
-        DPADD_SB4_SH(src4332, src6554, src8776, src10998,
-                     filt0, filt1, filt2, filt3, dst32, dst32, dst32, dst32);
-        dst54 = const_vec;
-        DPADD_SB4_SH(src6554, src8776, src10998, src12111110,
-                     filt0, filt1, filt2, filt3, dst54, dst54, dst54, dst54);
-        dst76 = const_vec;
-        DPADD_SB4_SH(src8776, src10998, src12111110, src14131312,
-                     filt0, filt1, filt2, filt3, dst76, dst76, dst76, dst76);
+        DOTP_SB4_SH(src2110, src4332, src6554, src8776, filt0, filt0, filt0,
+                    filt0, dst10, dst32, dst54, dst76);
+        DPADD_SB4_SH(src4332, src6554, src8776, src10998, filt1, filt1, filt1,
+                     filt1, dst10, dst32, dst54, dst76);
+        DPADD_SB4_SH(src6554, src8776, src10998, src12111110, filt2, filt2,
+                     filt2, filt2, dst10, dst32, dst54, dst76);
+        DPADD_SB4_SH(src8776, src10998, src12111110, src14131312, filt3, filt3,
+                     filt3, filt3, dst10, dst32, dst54, dst76);
 
         HEVC_BIW_RND_CLIP4(dst10, dst32, dst54, dst76,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst10_r, dst32_r, dst54_r, dst76_r,
-                           dst10_l, dst32_l, dst54_l, dst76_l);
+                           out0, out1, out2, out3);
 
-        HEVC_PCK_SW_SB8(dst10_l, dst10_r, dst32_l, dst32_r,
-                        dst54_l, dst54_r, dst76_l, dst76_r, dst10_r, dst54_r);
-        ST4x8_UB(dst10_r, dst54_r, dst, dst_stride);
+        PCKEV_B2_SH(out1, out0, out3, out2, out0, out1);
+        ST4x8_UB(out0, out1, dst, dst_stride);
         dst += (8 * dst_stride);
 
         src2110 = src10998;
@@ -1405,20 +1516,21 @@
     v16i8 src21_r, src43_r, src65_r, src87_r, src109_r;
     v8i16 tmp0, tmp1, tmp2, tmp3;
     v8i16 filt0, filt1, filt2, filt3;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 filter_vec, out0, out1, out2, out3;
+    v4i32 weight_vec, weight1_vec, offset_vec, rnd_vec, const_vec;
 
     src0_ptr -= (3 * src_stride);
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
-    const_vec = __msa_ldi_h(128);
+    const_vec = __msa_ldi_w(128);
     const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
+    weight1_vec = __msa_fill_w(weight1);
+    offset_vec += const_vec * weight1_vec;
 
     filter_vec = LD_SH(filter);
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
@@ -1441,28 +1553,22 @@
         ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9,
                    src76_r, src87_r, src98_r, src109_r);
 
-        tmp0 = const_vec;
-        DPADD_SB4_SH(src10_r, src32_r, src54_r, src76_r,
-                     filt0, filt1, filt2, filt3, tmp0, tmp0, tmp0, tmp0);
-        tmp1 = const_vec;
-        DPADD_SB4_SH(src21_r, src43_r, src65_r, src87_r,
-                     filt0, filt1, filt2, filt3, tmp1, tmp1, tmp1, tmp1);
-        tmp2 = const_vec;
-        DPADD_SB4_SH(src32_r, src54_r, src76_r, src98_r,
-                     filt0, filt1, filt2, filt3, tmp2, tmp2, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB4_SH(src43_r, src65_r, src87_r, src109_r,
-                     filt0, filt1, filt2, filt3, tmp3, tmp3, tmp3, tmp3);
+        DOTP_SB4_SH(src10_r, src21_r, src32_r, src43_r, filt0, filt0, filt0,
+                    filt0, tmp0, tmp1, tmp2, tmp3);
+        DPADD_SB4_SH(src32_r, src43_r, src54_r, src65_r, filt1, filt1, filt1,
+                     filt1, tmp0, tmp1, tmp2, tmp3);
+        DPADD_SB4_SH(src54_r, src65_r, src76_r, src87_r, filt2, filt2, filt2,
+                     filt2, tmp0, tmp1, tmp2, tmp3);
+        DPADD_SB4_SH(src76_r, src87_r, src98_r, src109_r, filt3, filt3, filt3,
+                     filt3, tmp0, tmp1, tmp2, tmp3);
 
         HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           out0, out1, out2, out3);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_SH(out1, out0, out3, out2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
         dst += (4 * dst_stride);
 
         src10_r = src54_r;
@@ -1500,20 +1606,22 @@
     v16i8 src21_l, src43_l, src65_l, src87_l;
     v16i8 src2110, src4332, src6554, src8776;
     v8i16 filt0, filt1, filt2, filt3;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst0_l, dst1_l, dst2_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 out0, out1, out2, filter_vec;
+    v4i32 dst2_r, dst2_l;
+    v4i32 weight_vec, weight1_vec, offset_vec, rnd_vec, const_vec;
 
     src0_ptr -= (3 * src_stride);
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
-    const_vec = __msa_ldi_h(128);
+    const_vec = __msa_ldi_w(128);
     const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
+    weight1_vec = __msa_fill_w(weight1);
+    offset_vec += const_vec * weight1_vec;
 
     filter_vec = LD_SH(filter);
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
@@ -1531,7 +1639,7 @@
     ILVR_D3_SB(src21_l, src10_l, src43_l, src32_l, src65_l, src54_l,
                src2110, src4332, src6554);
 
-    for (loop_cnt = (height >> 1); loop_cnt--;) {
+    for (loop_cnt = 8; loop_cnt--;) {
         LD_SB2(src0_ptr, src_stride, src7, src8);
         src0_ptr += (2 * src_stride);
         LD_SH2(src1_ptr, src2_stride, in0, in1);
@@ -1544,19 +1652,18 @@
         ILVL_B2_SB(src7, src6, src8, src7, src76_l, src87_l);
         src8776 = (v16i8) __msa_ilvr_d((v2i64) src87_l, (v2i64) src76_l);
 
-        tmp0 = const_vec;
-        DPADD_SB4_SH(src10_r, src32_r, src54_r, src76_r,
-                     filt0, filt1, filt2, filt3, tmp0, tmp0, tmp0, tmp0);
-        tmp1 = const_vec;
-        DPADD_SB4_SH(src21_r, src43_r, src65_r, src87_r,
-                     filt0, filt1, filt2, filt3, tmp1, tmp1, tmp1, tmp1);
-        tmp2 = const_vec;
-        DPADD_SB4_SH(src2110, src4332, src6554, src8776,
-                     filt0, filt1, filt2, filt3, tmp2, tmp2, tmp2, tmp2);
+        DOTP_SB3_SH(src10_r, src21_r, src2110, filt0, filt0, filt0,
+                    tmp0, tmp1, tmp2);
+        DPADD_SB2_SH(src32_r, src43_r, filt1, filt1, tmp0, tmp1);
+        tmp2 = __msa_dpadd_s_h(tmp2, src4332, (v16i8) filt1);
+        DPADD_SB2_SH(src54_r, src65_r, filt2, filt2, tmp0, tmp1);
+        tmp2 = __msa_dpadd_s_h(tmp2, src6554, (v16i8) filt2);
+        DPADD_SB2_SH(src76_r, src87_r, filt3, filt3, tmp0, tmp1);
+        tmp2 = __msa_dpadd_s_h(tmp2, src8776, (v16i8) filt3);
 
         HEVC_BIW_RND_CLIP2(tmp0, tmp1, in0, in1,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst0_l, dst1_l);
+                           out0, out1);
 
         ILVRL_H2_SW(tmp2, in2, dst2_r, dst2_l);
         dst2_r = __msa_dpadd_s_w(offset_vec, (v8i16) dst2_r,
@@ -1564,13 +1671,11 @@
         dst2_l = __msa_dpadd_s_w(offset_vec, (v8i16) dst2_l,
                                  (v8i16) weight_vec);
         SRAR_W2_SW(dst2_r, dst2_l, rnd_vec);
-        dst2_r = CLIP_SW_0_255(dst2_r);
-        dst2_l = CLIP_SW_0_255(dst2_l);
-
-        HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-        HEVC_PCK_SW_SB2(dst2_l, dst2_r, dst2_r);
-        ST8x2_UB(dst0_r, dst, dst_stride);
-        ST4x2_UB(dst2_r, dst + 8, dst_stride);
+        dst2_r = (v4i32) __msa_pckev_h((v8i16) dst2_l, (v8i16) dst2_r);
+        out2 = CLIP_SH_0_255(dst2_r);
+        PCKEV_B2_SH(out1, out0, out2, out2, out0, out2);
+        ST8x2_UB(out0, dst, dst_stride);
+        ST4x2_UB(out2, dst + 8, dst_stride);
         dst += (2 * dst_stride);
 
         src10_r = src32_r;
@@ -1614,9 +1719,9 @@
     v16i8 src21_l, src43_l, src65_l, src87_l;
     v8i16 tmp0, tmp1, tmp2, tmp3;
     v8i16 filt0, filt1, filt2, filt3;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 filter_vec;
+    v8i16 out0, out1, out2, out3;
+    v4i32 weight_vec, weight1_vec, offset_vec, rnd_vec, const_vec;
 
     src0_ptr -= (3 * src_stride);
 
@@ -1624,11 +1729,13 @@
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
-    const_vec = __msa_ldi_h(128);
+    const_vec = __msa_ldi_w(128);
     const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
+    weight1_vec = __msa_fill_w(weight1);
+    offset_vec += const_vec * weight1_vec;
 
     filter_vec = LD_SH(filter);
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
@@ -1661,28 +1768,22 @@
             ILVR_B2_SB(src7, src6, src8, src7, src76_r, src87_r);
             ILVL_B2_SB(src7, src6, src8, src7, src76_l, src87_l);
 
-            tmp0 = const_vec;
-            DPADD_SB4_SH(src10_r, src32_r, src54_r, src76_r,
-                         filt0, filt1, filt2, filt3, tmp0, tmp0, tmp0, tmp0);
-            tmp1 = const_vec;
-            DPADD_SB4_SH(src21_r, src43_r, src65_r, src87_r,
-                         filt0, filt1, filt2, filt3, tmp1, tmp1, tmp1, tmp1);
-            tmp2 = const_vec;
-            DPADD_SB4_SH(src10_l, src32_l, src54_l, src76_l,
-                         filt0, filt1, filt2, filt3, tmp2, tmp2, tmp2, tmp2);
-            tmp3 = const_vec;
-            DPADD_SB4_SH(src21_l, src43_l, src65_l, src87_l,
-                         filt0, filt1, filt2, filt3, tmp3, tmp3, tmp3, tmp3);
+            DOTP_SB4_SH(src10_r, src21_r, src10_l, src21_l, filt0, filt0,
+                        filt0, filt0, tmp0, tmp1, tmp2, tmp3);
+            DPADD_SB4_SH(src32_r, src43_r, src32_l, src43_l, filt1, filt1,
+                         filt1, filt1, tmp0, tmp1, tmp2, tmp3);
+            DPADD_SB4_SH(src54_r, src65_r, src54_l, src65_l, filt2, filt2,
+                         filt2, filt2, tmp0, tmp1, tmp2, tmp3);
+            DPADD_SB4_SH(src76_r, src87_r, src76_l, src87_l, filt3, filt3,
+                         filt3, filt3, tmp0, tmp1, tmp2, tmp3);
 
             HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
                                in0, in1, in2, in3,
                                weight_vec, rnd_vec, offset_vec,
-                               dst0_r, dst1_r, dst2_r, dst3_r,
-                               dst0_l, dst1_l, dst2_l, dst3_l);
+                               out0, out1, out2, out3);
 
-            HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst2_l, dst2_r,
-                            dst1_l, dst1_r, dst3_l, dst3_r, dst0_r, dst1_r);
-            ST_SW2(dst0_r, dst1_r, dst_tmp, dst_stride);
+            PCKEV_B2_SH(out2, out0, out3, out1, out0, out1);
+            ST_SH2(out0, out1, dst_tmp, dst_stride);
             dst_tmp += (2 * dst_stride);
 
             src10_r = src32_r;
@@ -1831,23 +1932,23 @@
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
-    v8i16 in0, in1;
+    uint64_t tp0, tp1;
+    int32_t offset, weight;
+    v16u8 out;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v8i16 in0 = { 0 }, in1 = { 0 };
     v8i16 filt0, filt1, filt2, filt3;
-    v4i32 filt_h0, filt_h1, filt_h2, filt_h3;
+    v8i16 filt_h0, filt_h1, filt_h2, filt_h3;
     v16i8 mask1, mask2, mask3;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec, weight_vec;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
     v8i16 dst30, dst41, dst52, dst63, dst66, dst87;
-    v4i32 dst0_r, dst1_r;
-    v4i32 tmp1, tmp2;
-    v4i32 weight_vec0, weight_vec1, offset_vec, rnd_vec;
-    v8i16 dst10_r, dst32_r, dst54_r, dst76_r;
-    v8i16 dst21_r, dst43_r, dst65_r, dst87_r;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
-    v8u16 mask4 = { 0, 4, 1, 5, 2, 6, 3, 7 };
+    v8i16 tmp0, tmp1, tmp2, tmp3;
+    v8i16 dst10, dst32, dst54, dst76;
+    v8i16 dst21, dst43, dst65, dst97, dst108, dst109, dst98;
+    v4i32 offset_vec, rnd_vec, const_vec, dst0, dst1, dst2, dst3;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
 
     src0_ptr -= ((3 * src_stride) + 3);
 
@@ -1855,10 +1956,9 @@
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W4_SW(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
+    SPLATI_W4_SH(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
 
     mask1 = mask0 + 2;
     mask2 = mask0 + 4;
@@ -1866,13 +1966,14 @@
 
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
+    weight = weight0 | (weight1 << 16);
 
-    const_vec = __msa_ldi_h(128);
+    const_vec = __msa_fill_w((128 * weight1));
     const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
-    weight_vec0 = __msa_fill_w(weight0);
-    weight_vec1 = __msa_fill_w(weight1);
     rnd_vec = __msa_fill_w(rnd_val + 1);
+    offset_vec += const_vec;
+    weight_vec = (v8i16) __msa_fill_w(weight);
 
     LD_SB7(src0_ptr, src_stride, src0, src1, src2, src3, src4, src5, src6);
     src0_ptr += (7 * src_stride);
@@ -1886,70 +1987,77 @@
     VSHF_B4_SB(src3, src6, mask0, mask1, mask2, mask3,
                vec12, vec13, vec14, vec15);
 
-    dst30 = const_vec;
-    DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                 dst30, dst30, dst30, dst30);
-    dst41 = const_vec;
-    DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3,
-                 dst41, dst41, dst41, dst41);
-    dst52 = const_vec;
-    DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3,
-                 dst52, dst52, dst52, dst52);
-    dst63 = const_vec;
-    DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2, filt3,
-                 dst63, dst63, dst63, dst63);
+    dst30 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                              filt3);
+    dst41 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                              filt3);
+    dst52 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                              filt3);
+    dst63 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2,
+                              filt3);
 
-    ILVR_H3_SH(dst41, dst30, dst52, dst41, dst63, dst52,
-               dst10_r, dst21_r, dst32_r);
-    dst43_r = __msa_ilvl_h(dst41, dst30);
-    dst54_r = __msa_ilvl_h(dst52, dst41);
-    dst65_r = __msa_ilvl_h(dst63, dst52);
+    ILVRL_H2_SH(dst41, dst30, dst10, dst43);
+    ILVRL_H2_SH(dst52, dst41, dst21, dst54);
+    ILVRL_H2_SH(dst63, dst52, dst32, dst65);
+
     dst66 = (v8i16) __msa_splati_d((v2i64) dst63, 1);
 
-    for (loop_cnt = height >> 1; loop_cnt--;) {
-        LD_SB2(src0_ptr, src_stride, src7, src8);
-        src0_ptr += (2 * src_stride);
-        LD_SH2(src1_ptr, src2_stride, in0, in1);
+    for (loop_cnt = height >> 2; loop_cnt--;) {
+        LD_SB4(src0_ptr, src_stride, src7, src8, src9, src10);
+        src0_ptr += (4 * src_stride);
+        XORI_B4_128_SB(src7, src8, src9, src10);
+
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        INSERT_D2_SH(tp0, tp1, in0);
+        src1_ptr += (2 * src2_stride);
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        INSERT_D2_SH(tp0, tp1, in1);
         src1_ptr += (2 * src2_stride);
 
-        in0 = (v8i16) __msa_ilvr_d((v2i64) in1, (v2i64) in0);
-        XORI_B2_128_SB(src7, src8);
-
-        VSHF_B4_SB(src7, src8, mask0, mask1, mask2, mask3,
+        VSHF_B4_SB(src7, src9, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst87 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst87, dst87, dst87, dst87);
-        dst76_r = __msa_ilvr_h(dst87, dst66);
-        dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r,
-                                filt_h0, filt_h1, filt_h2, filt_h3);
-        dst87_r = __msa_vshf_h((v8i16) mask4, dst87, dst87);
-        dst1_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r,
-                                filt_h0, filt_h1, filt_h2, filt_h3);
+        VSHF_B4_SB(src8, src10, mask0, mask1, mask2, mask3,
+                   vec4, vec5, vec6, vec7);
+        dst97 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                  filt3);
+        dst108 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                   filt3);
 
-        dst0_r >>= 6;
-        dst1_r >>= 6;
+        dst76 = __msa_ilvr_h(dst97, dst66);
+        ILVRL_H2_SH(dst108, dst97, dst87, dst109);
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst97, 1);
+        dst98 = __msa_ilvr_h(dst66, dst108);
 
-        ILVRL_H2_SW(in0, in0, tmp1, tmp2);
-        tmp1 = __msa_dpadd_s_w(offset_vec, (v8i16) tmp1, (v8i16) weight_vec0);
-        tmp2 = __msa_dpadd_s_w(offset_vec, (v8i16) tmp2, (v8i16) weight_vec0);
-        tmp1 += dst0_r * weight_vec1;
-        tmp2 += dst1_r * weight_vec1;
-        SRAR_W2_SW(tmp1, tmp2, rnd_vec);
-        tmp1 = CLIP_SW_0_255(tmp1);
-        tmp2 = CLIP_SW_0_255(tmp2);
+        dst0 = HEVC_FILT_8TAP(dst10, dst32, dst54, dst76, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
+        dst1 = HEVC_FILT_8TAP(dst21, dst43, dst65, dst87, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
+        dst2 = HEVC_FILT_8TAP(dst32, dst54, dst76, dst98, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
+        dst3 = HEVC_FILT_8TAP(dst43, dst65, dst87, dst109, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
+        SRA_4V(dst0, dst1, dst2, dst3, 6);
+        PCKEV_H2_SH(dst1, dst0, dst3, dst2, tmp1, tmp3);
+        ILVRL_H2_SH(tmp1, in0, tmp0, tmp1);
+        ILVRL_H2_SH(tmp3, in1, tmp2, tmp3);
+        dst0 = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+        dst1 = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+        dst2 = __msa_dpadd_s_w(offset_vec, tmp2, weight_vec);
+        dst3 = __msa_dpadd_s_w(offset_vec, tmp3, weight_vec);
+        SRAR_W4_SW(dst0, dst1, dst2, dst3, rnd_vec);
+        CLIP_SW4_0_255_MAX_SATU(dst0, dst1, dst2, dst3);
+        PCKEV_H2_SH(dst1, dst0, dst3, dst2, tmp0, tmp1);
+        out = (v16u8) __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
 
-        HEVC_PCK_SW_SB2(tmp2, tmp1, tmp1);
-        ST4x2_UB(tmp1, dst, dst_stride);
-        dst += (2 * dst_stride);
-
-        dst10_r = dst32_r;
-        dst32_r = dst54_r;
-        dst54_r = dst76_r;
-        dst21_r = dst43_r;
-        dst43_r = dst65_r;
-        dst65_r = dst87_r;
-        dst66 = (v8i16) __msa_splati_d((v2i64) dst87, 1);
+        dst10 = dst54;
+        dst32 = dst76;
+        dst54 = dst98;
+        dst21 = dst65;
+        dst43 = dst87;
+        dst65 = dst109;
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst108, 1);
     }
 }
 
@@ -1967,57 +2075,58 @@
                                              int32_t offset0,
                                              int32_t offset1,
                                              int32_t rnd_val,
-                                             int32_t width)
+                                             int32_t width8mult)
 {
     uint32_t loop_cnt, cnt;
-    int32_t offset;
+    int32_t offset, weight;
     uint8_t *src0_ptr_tmp;
     int16_t *src1_ptr_tmp;
     uint8_t *dst_tmp;
+    v16u8 out;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
     v8i16 in0, in1;
     v8i16 filt0, filt1, filt2, filt3;
-    v4i32 filt_h0, filt_h1, filt_h2, filt_h3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1, filt_h2, filt_h3;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1, mask2, mask3;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec, weight_vec;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l;
-    v4i32 tmp0, tmp1, tmp2, tmp3;
+    v8i16 tmp0, tmp1, tmp2, tmp3;
     v8i16 dst10_r, dst32_r, dst54_r, dst76_r;
     v8i16 dst10_l, dst32_l, dst54_l, dst76_l;
     v8i16 dst21_r, dst43_r, dst65_r, dst87_r;
     v8i16 dst21_l, dst43_l, dst65_l, dst87_l;
-    v4i32 weight_vec0, weight_vec1, offset_vec, rnd_vec;
+    v4i32 offset_vec, rnd_vec, const_vec;
 
     src0_ptr -= ((3 * src_stride) + 3);
 
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
+    weight = weight0 | (weight1 << 16);
 
-    const_vec = __msa_ldi_h(128);
+    const_vec = __msa_fill_w((128 * weight1));
     const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
-    weight_vec0 = __msa_fill_w(weight0);
-    weight_vec1 = __msa_fill_w(weight1);
     rnd_vec = __msa_fill_w(rnd_val + 1);
+    offset_vec += const_vec;
+    weight_vec = (v8i16) __msa_fill_w(weight);
 
     filter_vec = LD_SH(filter_x);
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W4_SW(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
+    SPLATI_W4_SH(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
 
     mask1 = mask0 + 2;
     mask2 = mask0 + 4;
     mask3 = mask0 + 6;
 
-    for (cnt = width >> 3; cnt--;) {
+    for (cnt = width8mult; cnt--;) {
         src0_ptr_tmp = src0_ptr;
         src1_ptr_tmp = src1_ptr;
         dst_tmp = dst;
@@ -2038,18 +2147,14 @@
         VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3,
                    vec12, vec13, vec14, vec15);
 
-        dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
-        dst1 = const_vec;
-        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
-        dst2 = const_vec;
-        DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
-        dst3 = const_vec;
-        DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
+        dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        dst1 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                 filt3);
+        dst2 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                                 filt3);
+        dst3 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1,
+                                 filt2, filt3);
 
         /* row 4 row 5 row 6 */
         VSHF_B4_SB(src4, src4, mask0, mask1, mask2, mask3,
@@ -2059,22 +2164,12 @@
         VSHF_B4_SB(src6, src6, mask0, mask1, mask2, mask3,
                    vec8, vec9, vec10, vec11);
 
-        dst4 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst4, dst4, dst4, dst4);
-        dst5 = const_vec;
-        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3,
-                     dst5, dst5, dst5, dst5);
-        dst6 = const_vec;
-        DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3,
-                     dst6, dst6, dst6, dst6);
-
-        ILVR_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst2, dst1,
-                   dst10_r, dst32_r, dst54_r, dst21_r);
-        ILVR_H2_SH(dst4, dst3, dst6, dst5, dst43_r, dst65_r);
-        ILVL_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst2, dst1,
-                   dst10_l, dst32_l, dst54_l, dst21_l);
-        ILVL_H2_SH(dst4, dst3, dst6, dst5, dst43_l, dst65_l);
+        dst4 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        dst5 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                 filt3);
+        dst6 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                                 filt3);
 
         for (loop_cnt = height >> 1; loop_cnt--;) {
             LD_SB2(src0_ptr_tmp, src_stride, src7, src8);
@@ -2084,11 +2179,17 @@
             LD_SH2(src1_ptr_tmp, src2_stride, in0, in1);
             src1_ptr_tmp += (2 * src2_stride);
 
+            ILVR_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst2, dst1, dst10_r,
+                       dst32_r, dst54_r, dst21_r);
+            ILVL_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst2, dst1, dst10_l,
+                       dst32_l, dst54_l, dst21_l);
+            ILVR_H2_SH(dst4, dst3, dst6, dst5, dst43_r, dst65_r);
+            ILVL_H2_SH(dst4, dst3, dst6, dst5, dst43_l, dst65_l);
+
             VSHF_B4_SB(src7, src7, mask0, mask1, mask2, mask3,
                        vec0, vec1, vec2, vec3);
-            dst7 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst7, dst7, dst7, dst7);
+            dst7 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1,
+                                     filt2, filt3);
 
             ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
             dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r,
@@ -2102,10 +2203,8 @@
             /* row 8 */
             VSHF_B4_SB(src8, src8, mask0, mask1, mask2, mask3,
                        vec0, vec1, vec2, vec3);
-
-            dst8 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst8, dst8, dst8, dst8);
+            dst8 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1,
+                                     filt2, filt3);
 
             ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l);
             dst1_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r,
@@ -2116,43 +2215,26 @@
             dst1_r >>= 6;
             dst1_l >>= 6;
 
-            ILVRL_H2_SW(in0, in0, tmp0, tmp1);
-            ILVRL_H2_SW(in1, in1, tmp2, tmp3);
-            tmp0 = __msa_dpadd_s_w(offset_vec, (v8i16) tmp0,
-                                   (v8i16) weight_vec0);
-            tmp1 = __msa_dpadd_s_w(offset_vec, (v8i16) tmp1,
-                                   (v8i16) weight_vec0);
-            tmp2 = __msa_dpadd_s_w(offset_vec, (v8i16) tmp2,
-                                   (v8i16) weight_vec0);
-            tmp3 = __msa_dpadd_s_w(offset_vec, (v8i16) tmp3,
-                                   (v8i16) weight_vec0);
-
-            tmp0 += (dst0_r * weight_vec1);
-            tmp1 += (dst0_l * weight_vec1);
-            tmp2 += (dst1_r * weight_vec1);
-            tmp3 += (dst1_l * weight_vec1);
-
-            SRAR_W4_SW(tmp0, tmp1, tmp2, tmp3, rnd_vec);
-            tmp0 = CLIP_SW_0_255(tmp0);
-            tmp1 = CLIP_SW_0_255(tmp1);
-            tmp2 = CLIP_SW_0_255(tmp2);
-            tmp3 = CLIP_SW_0_255(tmp3);
-            HEVC_PCK_SW_SB4(tmp1, tmp0, tmp3, tmp2, dst0_r);
-            ST8x2_UB(dst0_r, dst_tmp, dst_stride);
+            PCKEV_H2_SH(dst0_l, dst0_r, dst1_l, dst1_r, tmp1, tmp3);
+            ILVRL_H2_SH(tmp1, in0, tmp0, tmp1);
+            ILVRL_H2_SH(tmp3, in1, tmp2, tmp3);
+            dst0_r = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+            dst0_l = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+            dst1_r = __msa_dpadd_s_w(offset_vec, tmp2, weight_vec);
+            dst1_l = __msa_dpadd_s_w(offset_vec, tmp3, weight_vec);
+            SRAR_W4_SW(dst0_l, dst0_r, dst1_l, dst1_r, rnd_vec);
+            CLIP_SW4_0_255_MAX_SATU(dst0_l, dst0_r, dst1_l, dst1_r);
+            PCKEV_H2_SH(dst0_l, dst0_r, dst1_l, dst1_r, tmp0, tmp1);
+            out = (v16u8) __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+            ST8x2_UB(out, dst_tmp, dst_stride);
             dst_tmp += (2 * dst_stride);
 
-            dst10_r = dst32_r;
-            dst32_r = dst54_r;
-            dst54_r = dst76_r;
-            dst10_l = dst32_l;
-            dst32_l = dst54_l;
-            dst54_l = dst76_l;
-            dst21_r = dst43_r;
-            dst43_r = dst65_r;
-            dst65_r = dst87_r;
-            dst21_l = dst43_l;
-            dst43_l = dst65_l;
-            dst65_l = dst87_l;
+            dst0 = dst2;
+            dst1 = dst3;
+            dst2 = dst4;
+            dst3 = dst5;
+            dst4 = dst6;
+            dst5 = dst7;
             dst6 = dst8;
         }
 
@@ -2181,7 +2263,7 @@
                                      src1_ptr, src2_stride,
                                      dst, dst_stride, filter_x, filter_y,
                                      height, weight0, weight1, offset0,
-                                     offset1, rnd_val, 8);
+                                     offset1, rnd_val, 1);
 }
 
 static void hevc_hv_biwgt_8t_12w_msa(uint8_t *src0_ptr,
@@ -2199,16 +2281,239 @@
                                      int32_t offset1,
                                      int32_t rnd_val)
 {
-    hevc_hv_biwgt_8t_8multx2mult_msa(src0_ptr, src_stride,
-                                     src1_ptr, src2_stride,
-                                     dst, dst_stride, filter_x, filter_y,
-                                     height, weight0, weight1, offset0,
-                                     offset1, rnd_val, 8);
-    hevc_hv_biwgt_8t_4w_msa(src0_ptr + 8, src_stride,
-                            src1_ptr + 8, src2_stride,
-                            dst + 8, dst_stride, filter_x, filter_y,
-                            height, weight0, weight1, offset0, offset1,
-                            rnd_val);
+    uint32_t loop_cnt;
+    uint8_t *src0_ptr_tmp, *dst_tmp;
+    int16_t *src1_ptr_tmp;
+    int32_t offset, weight;
+    uint64_t tp0, tp1;
+    v16u8 out;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
+    v16i8 mask0, mask1, mask2, mask3, mask4, mask5, mask6, mask7;
+    v8i16 in0 = { 0 }, in1 = { 0 };
+    v8i16 filter_vec, weight_vec, tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt0, filt1, filt2, filt3, filt_h0, filt_h1, filt_h2, filt_h3;
+    v8i16 dsth0, dsth1, dsth2, dsth3, dsth4, dsth5, dsth6, dsth7, dsth8;
+    v8i16 dst10_r, dst32_r, dst54_r, dst76_r, dst21_r, dst43_r, dst65_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst76_l, dst21_l, dst43_l, dst65_l;
+    v8i16 dst30, dst41, dst52, dst63, dst66, dst87, dst10, dst32, dst54, dst76;
+    v8i16 dst21, dst43, dst65, dst97, dst108, dst109, dst98, dst87_r, dst87_l;
+    v4i32 offset_vec, rnd_vec, const_vec, dst0, dst1, dst2, dst3;
+
+    src0_ptr -= ((3 * src_stride) + 3);
+
+    offset = (offset0 + offset1) << rnd_val;
+    weight0 = weight0 & 0x0000FFFF;
+    weight = weight0 | (weight1 << 16);
+
+    const_vec = __msa_fill_w((128 * weight1));
+    const_vec <<= 6;
+    offset_vec = __msa_fill_w(offset);
+    rnd_vec = __msa_fill_w(rnd_val + 1);
+    offset_vec += const_vec;
+    weight_vec = (v8i16) __msa_fill_w(weight);
+
+    filter_vec = LD_SH(filter_x);
+    SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    filter_vec = LD_SH(filter_y);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W4_SH(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    src0_ptr_tmp = src0_ptr;
+    src1_ptr_tmp = src1_ptr;
+    dst_tmp = dst;
+
+    LD_SB7(src0_ptr_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    src0_ptr_tmp += (7 * src_stride);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+
+    VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3, vec0, vec1, vec2, vec3);
+    VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3, vec4, vec5, vec6, vec7);
+    VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3, vec8, vec9, vec10,
+               vec11);
+    VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3, vec12, vec13, vec14,
+               vec15);
+    dsth0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                              filt3);
+    dsth1 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                              filt3);
+    dsth2 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                              filt3);
+    dsth3 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1,
+                              filt2, filt3);
+    VSHF_B4_SB(src4, src4, mask0, mask1, mask2, mask3, vec0, vec1, vec2, vec3);
+    VSHF_B4_SB(src5, src5, mask0, mask1, mask2, mask3, vec4, vec5, vec6, vec7);
+    VSHF_B4_SB(src6, src6, mask0, mask1, mask2, mask3, vec8, vec9, vec10,
+               vec11);
+    dsth4 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                              filt3);
+    dsth5 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                              filt3);
+    dsth6 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                              filt3);
+
+    for (loop_cnt = 8; loop_cnt--;) {
+        LD_SB2(src0_ptr_tmp, src_stride, src7, src8);
+        src0_ptr_tmp += (2 * src_stride);
+        XORI_B2_128_SB(src7, src8);
+
+        LD_SH2(src1_ptr_tmp, src2_stride, in0, in1);
+        src1_ptr_tmp += (2 * src2_stride);
+
+        ILVR_H4_SH(dsth1, dsth0, dsth3, dsth2, dsth5, dsth4, dsth2, dsth1,
+                   dst10_r, dst32_r, dst54_r, dst21_r);
+        ILVL_H4_SH(dsth1, dsth0, dsth3, dsth2, dsth5, dsth4, dsth2, dsth1,
+                   dst10_l, dst32_l, dst54_l, dst21_l);
+        ILVR_H2_SH(dsth4, dsth3, dsth6, dsth5, dst43_r, dst65_r);
+        ILVL_H2_SH(dsth4, dsth3, dsth6, dsth5, dst43_l, dst65_l);
+
+        VSHF_B4_SB(src7, src7, mask0, mask1, mask2, mask3, vec0, vec1, vec2,
+                   vec3);
+        dsth7 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                  filt3);
+
+        ILVRL_H2_SH(dsth7, dsth6, dst76_r, dst76_l);
+        dst0 = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r, filt_h0,
+                              filt_h1, filt_h2, filt_h3);
+        dst1 = HEVC_FILT_8TAP(dst10_l, dst32_l, dst54_l, dst76_l, filt_h0,
+                              filt_h1, filt_h2, filt_h3);
+        dst0 >>= 6;
+        dst1 >>= 6;
+
+        VSHF_B4_SB(src8, src8, mask0, mask1, mask2, mask3, vec0, vec1, vec2,
+                   vec3);
+        dsth8 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                  filt3);
+
+        ILVRL_H2_SH(dsth8, dsth7, dst87_r, dst87_l);
+        dst2 = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r, filt_h0,
+                              filt_h1, filt_h2, filt_h3);
+        dst3 = HEVC_FILT_8TAP(dst21_l, dst43_l, dst65_l, dst87_l, filt_h0,
+                              filt_h1, filt_h2, filt_h3);
+        dst2 >>= 6;
+        dst3 >>= 6;
+
+        PCKEV_H2_SH(dst1, dst0, dst3, dst2, tmp1, tmp3);
+        ILVRL_H2_SH(tmp1, in0, tmp0, tmp1);
+        ILVRL_H2_SH(tmp3, in1, tmp2, tmp3);
+        dst0 = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+        dst1 = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+        dst2 = __msa_dpadd_s_w(offset_vec, tmp2, weight_vec);
+        dst3 = __msa_dpadd_s_w(offset_vec, tmp3, weight_vec);
+        SRAR_W4_SW(dst1, dst0, dst3, dst2, rnd_vec);
+        CLIP_SW4_0_255_MAX_SATU(dst1, dst0, dst3, dst2);
+        PCKEV_H2_SH(dst1, dst0, dst3, dst2, tmp0, tmp1);
+        out = (v16u8) __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+        ST8x2_UB(out, dst_tmp, dst_stride);
+        dst_tmp += (2 * dst_stride);
+
+        dsth0 = dsth2;
+        dsth1 = dsth3;
+        dsth2 = dsth4;
+        dsth3 = dsth5;
+        dsth4 = dsth6;
+        dsth5 = dsth7;
+        dsth6 = dsth8;
+    }
+
+    src0_ptr += 8;
+    src1_ptr += 8;
+    dst += 8;
+
+    mask4 = LD_SB(ff_hevc_mask_arr + 16);
+    mask5 = mask4 + 2;
+    mask6 = mask4 + 4;
+    mask7 = mask4 + 6;
+
+    LD_SB7(src0_ptr, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    src0_ptr += (7 * src_stride);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+
+    VSHF_B4_SB(src0, src3, mask4, mask5, mask6, mask7, vec0, vec1, vec2, vec3);
+    VSHF_B4_SB(src1, src4, mask4, mask5, mask6, mask7, vec4, vec5, vec6, vec7);
+    VSHF_B4_SB(src2, src5, mask4, mask5, mask6, mask7, vec8, vec9, vec10,
+               vec11);
+    VSHF_B4_SB(src3, src6, mask4, mask5, mask6, mask7, vec12, vec13, vec14,
+               vec15);
+    dst30 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                              filt3);
+    dst41 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                              filt3);
+    dst52 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                              filt3);
+    dst63 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2,
+                              filt3);
+    ILVRL_H2_SH(dst41, dst30, dst10, dst43);
+    ILVRL_H2_SH(dst52, dst41, dst21, dst54);
+    ILVRL_H2_SH(dst63, dst52, dst32, dst65);
+
+    dst66 = (v8i16) __msa_splati_d((v2i64) dst63, 1);
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src0_ptr, src_stride, src7, src8, src9, src10);
+        src0_ptr += (4 * src_stride);
+        XORI_B4_128_SB(src7, src8, src9, src10);
+
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        INSERT_D2_SH(tp0, tp1, in0);
+        src1_ptr += (2 * src2_stride);
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        INSERT_D2_SH(tp0, tp1, in1);
+        src1_ptr += (2 * src2_stride);
+
+        VSHF_B4_SB(src7, src9, mask4, mask5, mask6, mask7, vec0, vec1, vec2,
+                   vec3);
+        VSHF_B4_SB(src8, src10, mask4, mask5, mask6, mask7, vec4, vec5, vec6,
+                   vec7);
+        dst97 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                  filt3);
+        dst108 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                   filt3);
+
+        dst76 = __msa_ilvr_h(dst97, dst66);
+        ILVRL_H2_SH(dst108, dst97, dst87, dst109);
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst97, 1);
+        dst98 = __msa_ilvr_h(dst66, dst108);
+
+        dst0 = HEVC_FILT_8TAP(dst10, dst32, dst54, dst76, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
+        dst1 = HEVC_FILT_8TAP(dst21, dst43, dst65, dst87, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
+        dst2 = HEVC_FILT_8TAP(dst32, dst54, dst76, dst98, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
+        dst3 = HEVC_FILT_8TAP(dst43, dst65, dst87, dst109, filt_h0, filt_h1,
+                              filt_h2, filt_h3);
+        SRA_4V(dst0, dst1, dst2, dst3, 6);
+        PCKEV_H2_SH(dst1, dst0, dst3, dst2, tmp1, tmp3);
+        ILVRL_H2_SH(tmp1, in0, tmp0, tmp1);
+        ILVRL_H2_SH(tmp3, in1, tmp2, tmp3);
+        dst0 = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+        dst1 = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+        dst2 = __msa_dpadd_s_w(offset_vec, tmp2, weight_vec);
+        dst3 = __msa_dpadd_s_w(offset_vec, tmp3, weight_vec);
+        SRAR_W4_SW(dst0, dst1, dst2, dst3, rnd_vec);
+        CLIP_SW4_0_255_MAX_SATU(dst0, dst1, dst2, dst3);
+        PCKEV_H2_SH(dst1, dst0, dst3, dst2, tmp0, tmp1);
+        out = (v16u8) __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        dst10 = dst54;
+        dst32 = dst76;
+        dst54 = dst98;
+        dst21 = dst65;
+        dst43 = dst87;
+        dst65 = dst109;
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst108, 1);
+    }
 }
 
 static void hevc_hv_biwgt_8t_16w_msa(uint8_t *src0_ptr,
@@ -2230,7 +2535,7 @@
                                      src1_ptr, src2_stride,
                                      dst, dst_stride, filter_x, filter_y,
                                      height, weight0, weight1, offset0,
-                                     offset1, rnd_val, 16);
+                                     offset1, rnd_val, 2);
 }
 
 static void hevc_hv_biwgt_8t_24w_msa(uint8_t *src0_ptr,
@@ -2252,7 +2557,7 @@
                                      src1_ptr, src2_stride,
                                      dst, dst_stride, filter_x, filter_y,
                                      height, weight0, weight1, offset0,
-                                     offset1, rnd_val, 24);
+                                     offset1, rnd_val, 3);
 }
 
 static void hevc_hv_biwgt_8t_32w_msa(uint8_t *src0_ptr,
@@ -2274,7 +2579,7 @@
                                      src1_ptr, src2_stride,
                                      dst, dst_stride, filter_x, filter_y,
                                      height, weight0, weight1, offset0,
-                                     offset1, rnd_val, 32);
+                                     offset1, rnd_val, 4);
 }
 
 static void hevc_hv_biwgt_8t_48w_msa(uint8_t *src0_ptr,
@@ -2296,7 +2601,7 @@
                                      src1_ptr, src2_stride,
                                      dst, dst_stride, filter_x, filter_y,
                                      height, weight0, weight1, offset0,
-                                     offset1, rnd_val, 48);
+                                     offset1, rnd_val, 6);
 }
 
 static void hevc_hv_biwgt_8t_64w_msa(uint8_t *src0_ptr,
@@ -2318,7 +2623,7 @@
                                      src1_ptr, src2_stride,
                                      dst, dst_stride, filter_x, filter_y,
                                      height, weight0, weight1, offset0,
-                                     offset1, rnd_val, 64);
+                                     offset1, rnd_val, 8);
 }
 
 static void hevc_hz_biwgt_4t_4x2_msa(uint8_t *src0_ptr,
@@ -2328,22 +2633,21 @@
                                      uint8_t *dst,
                                      int32_t dst_stride,
                                      const int8_t *filter,
-                                     int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
                                      int32_t offset0,
                                      int32_t offset1,
                                      int32_t rnd_val)
 {
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v8i16 filt0, filt1;
     v16i8 src0, src1;
     v8i16 in0, in1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[16]);
     v16i8 mask1, vec0, vec1;
     v8i16 dst0;
     v4i32 dst0_r, dst0_l;
-    v8i16 filter_vec, const_vec;
+    v8i16 out0, filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 1;
@@ -2356,9 +2660,10 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -2369,18 +2674,16 @@
     XORI_B2_128_SB(src0, src1);
 
     VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
 
     ILVRL_H2_SW(dst0, in0, dst0_r, dst0_l);
     dst0_r = __msa_dpadd_s_w(offset_vec, (v8i16) dst0_r, (v8i16) weight_vec);
     dst0_l = __msa_dpadd_s_w(offset_vec, (v8i16) dst0_l, (v8i16) weight_vec);
     SRAR_W2_SW(dst0_r, dst0_l, rnd_vec);
-    dst0_r = CLIP_SW_0_255(dst0_r);
-    dst0_l = CLIP_SW_0_255(dst0_l);
-
-    HEVC_PCK_SW_SB2(dst0_l, dst0_r, dst0_r);
-    ST4x2_UB(dst0_r, dst, dst_stride);
+    dst0_r = (v4i32) __msa_pckev_h((v8i16) dst0_l, (v8i16) dst0_r);
+    out0 = CLIP_SH_0_255(dst0_r);
+    out0 = (v8i16) __msa_pckev_b((v16i8) out0, (v16i8) out0);
+    ST4x2_UB(out0, dst, dst_stride);
 }
 
 static void hevc_hz_biwgt_4t_4x4_msa(uint8_t *src0_ptr,
@@ -2390,23 +2693,21 @@
                                      uint8_t *dst,
                                      int32_t dst_stride,
                                      const int8_t *filter,
-                                     int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
                                      int32_t offset0,
                                      int32_t offset1,
                                      int32_t rnd_val)
 {
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v8i16 filt0, filt1;
     v16i8 src0, src1, src2, src3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[16]);
     v16i8 mask1;
     v8i16 dst0, dst1;
     v16i8 vec0, vec1;
     v8i16 in0, in1, in2, in3;
-    v4i32 dst0_r, dst1_r, dst0_l, dst1_l;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 1;
@@ -2420,9 +2721,10 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -2433,17 +2735,15 @@
     ILVR_D2_SH(in1, in0, in3, in2, in0, in1);
 
     VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
     VSHF_B2_SB(src2, src3, src2, src3, mask0, mask1, vec0, vec1);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+    dst1 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
     HEVC_BIW_RND_CLIP2(dst0, dst1, in0, in1,
                        weight_vec, rnd_vec, offset_vec,
-                       dst0_r, dst1_r, dst0_l, dst1_l);
+                       dst0, dst1);
 
-    HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-    ST4x4_UB(dst0_r, dst0_r, 0, 1, 2, 3, dst, dst_stride);
+    dst0 = (v8i16) __msa_pckev_b((v16i8) dst1, (v16i8) dst0);
+    ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, dst_stride);
 }
 
 static void hevc_hz_biwgt_4t_4x8multiple_msa(uint8_t *src0_ptr,
@@ -2461,16 +2761,15 @@
                                              int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t weight, offset;
+    int32_t weight, offset, constant;
     v8i16 filt0, filt1;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[16]);
     v16i8 mask1;
     v16i8 vec0, vec1;
     v8i16 dst0, dst1, dst2, dst3;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 1;
@@ -2481,9 +2780,10 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -2503,26 +2803,20 @@
         XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
 
         VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src2, src3, src2, src3, mask0, mask1, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+        dst1 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src4, src5, src4, src5, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
+        dst2 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src6, src7, src6, src7, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+        dst3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           dst0, dst1, dst2, dst3);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST4x8_UB(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
+        ST4x8_UB(dst0, dst1, dst, dst_stride);
         dst += (8 * dst_stride);
     }
 }
@@ -2543,11 +2837,11 @@
 {
     if (2 == height) {
         hevc_hz_biwgt_4t_4x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                 dst, dst_stride, filter, height,
+                                 dst, dst_stride, filter,
                                  weight0, weight1, offset0, offset1, rnd_val);
     } else if (4 == height) {
         hevc_hz_biwgt_4t_4x4_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                 dst, dst_stride, filter, height,
+                                 dst, dst_stride, filter,
                                  weight0, weight1, offset0, offset1, rnd_val);
     } else if (0 == (height % 8)) {
         hevc_hz_biwgt_4t_4x8multiple_msa(src0_ptr, src_stride,
@@ -2573,16 +2867,15 @@
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v8i16 filt0, filt1;
     v16i8 src0, src1, src2, src3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1;
     v16i8 vec0, vec1;
     v8i16 in0, in1, in2, in3;
     v8i16 dst0, dst1, dst2, dst3;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 1;
@@ -2593,16 +2886,17 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
 
     mask1 = mask0 + 2;
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
+    for (loop_cnt = 2; loop_cnt--;) {
         LD_SB4(src0_ptr, src_stride, src0, src1, src2, src3);
         src0_ptr += (4 * src_stride);
         LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
@@ -2610,27 +2904,21 @@
         XORI_B4_128_SB(src0, src1, src2, src3);
 
         VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+        dst1 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
+        dst2 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+        dst3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
 
         HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           dst0, dst1, dst2, dst3);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST6x4_UB(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
+        ST6x4_UB(dst0, dst1, dst, dst_stride);
         dst += (4 * dst_stride);
     }
 }
@@ -2642,22 +2930,20 @@
                                      uint8_t *dst,
                                      int32_t dst_stride,
                                      const int8_t *filter,
-                                     int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
                                      int32_t offset0,
                                      int32_t offset1,
                                      int32_t rnd_val)
 {
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v8i16 filt0, filt1;
     v16i8 src0, src1;
     v8i16 in0, in1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1, vec0, vec1;
     v8i16 dst0, dst1;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst0_l, dst1_l;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 1;
@@ -2668,9 +2954,10 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -2681,17 +2968,15 @@
     LD_SH2(src1_ptr, src2_stride, in0, in1);
     XORI_B2_128_SB(src0, src1);
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+    dst1 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
     HEVC_BIW_RND_CLIP2(dst0, dst1, in0, in1,
                        weight_vec, rnd_vec, offset_vec,
-                       dst0_r, dst1_r, dst0_l, dst1_l);
+                       dst0, dst1);
 
-    HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-    ST8x2_UB(dst0_r, dst, dst_stride);
+    dst0 = (v8i16) __msa_pckev_b((v16i8) dst1, (v16i8) dst0);
+    ST8x2_UB(dst0, dst, dst_stride);
 }
 
 static void hevc_hz_biwgt_4t_8x6_msa(uint8_t *src0_ptr,
@@ -2701,24 +2986,21 @@
                                      uint8_t *dst,
                                      int32_t dst_stride,
                                      const int8_t *filter,
-                                     int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
                                      int32_t offset0,
                                      int32_t offset1,
                                      int32_t rnd_val)
 {
-    int32_t weight, offset;
+    int32_t weight, offset, constant;
     v8i16 filt0, filt1;
     v16i8 src0, src1, src2, src3, src4, src5;
     v8i16 in0, in1, in2, in3, in4, in5;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1;
     v16i8 vec0, vec1;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 1;
@@ -2729,9 +3011,10 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -2745,38 +3028,30 @@
     LD_SH2(src1_ptr, src2_stride, in4, in5);
     XORI_B6_128_SB(src0, src1, src2, src3, src4, src5);
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+    dst1 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
+    dst2 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
     VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+    dst3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
     VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
+    dst4 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
     VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-    dst5 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
+    dst5 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
     HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
                        in0, in1, in2, in3,
                        weight_vec, rnd_vec, offset_vec,
-                       dst0_r, dst1_r, dst2_r, dst3_r,
-                       dst0_l, dst1_l, dst2_l, dst3_l);
+                       dst0, dst1, dst2, dst3);
     HEVC_BIW_RND_CLIP2(dst4, dst5, in4, in5,
                        weight_vec, rnd_vec, offset_vec,
-                       dst4_r, dst5_r, dst4_l, dst5_l);
+                       dst4, dst5);
 
-    HEVC_PCK_SW_SB12(dst0_l, dst0_r, dst1_l, dst1_r,
-                     dst2_l, dst2_r, dst3_l, dst3_r,
-                     dst4_l, dst4_r, dst5_l, dst5_r, dst0_r, dst1_r, dst2_r);
-    ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
+    PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
+    dst3 = (v8i16) __msa_pckev_b((v16i8) dst5, (v16i8) dst4);
+    ST8x4_UB(dst0, dst1, dst, dst_stride);
     dst += (4 * dst_stride);
-    ST8x2_UB(dst2_r, dst, dst_stride);
+    ST8x2_UB(dst3, dst, dst_stride);
 }
 
 static void hevc_hz_biwgt_4t_8x4multiple_msa(uint8_t *src0_ptr,
@@ -2794,16 +3069,15 @@
                                              int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v8i16 filt0, filt1;
     v16i8 src0, src1, src2, src3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
     v16i8 vec0, vec1;
     v8i16 in0, in1, in2, in3;
     v8i16 dst0, dst1, dst2, dst3;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 1;
@@ -2814,9 +3088,10 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -2831,26 +3106,20 @@
         XORI_B4_128_SB(src0, src1, src2, src3);
 
         VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+        dst1 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
+        dst2 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+        dst3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           dst0, dst1, dst2, dst3);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
+        ST8x4_UB(dst0, dst1, dst, dst_stride);
         dst += (4 * dst_stride);
     }
 }
@@ -2871,11 +3140,11 @@
 {
     if (2 == height) {
         hevc_hz_biwgt_4t_8x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                 dst, dst_stride, filter, height,
+                                 dst, dst_stride, filter,
                                  weight0, weight1, offset0, offset1, rnd_val);
     } else if (6 == height) {
         hevc_hz_biwgt_4t_8x6_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                 dst, dst_stride, filter, height,
+                                 dst, dst_stride, filter,
                                  weight0, weight1, offset0, offset1, rnd_val);
     } else if (0 == (height % 4)) {
         hevc_hz_biwgt_4t_8x4multiple_msa(src0_ptr, src_stride,
@@ -2901,20 +3170,18 @@
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v8i16 filt0, filt1;
     v16i8 src0, src1, src2, src3;
     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask2 = {
         8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
     };
     v16i8 mask1, mask3;
     v16i8 vec0, vec1;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 1;
@@ -2925,9 +3192,10 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -2935,7 +3203,7 @@
     mask1 = mask0 + 2;
     mask3 = mask2 + 2;
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
+    for (loop_cnt = 4; loop_cnt--;) {
         LD_SB4(src0_ptr, src_stride, src0, src1, src2, src3);
         src0_ptr += (4 * src_stride);
         LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
@@ -2945,38 +3213,29 @@
         XORI_B4_128_SB(src0, src1, src2, src3);
 
         VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+        dst1 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
+        dst2 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+        dst3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
-        dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
+        dst4 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src2, src3, src2, src3, mask2, mask3, vec0, vec1);
-        dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
+        dst5 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
 
         HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           dst0, dst1, dst2, dst3);
         HEVC_BIW_RND_CLIP2(dst4, dst5, in4, in5,
                            weight_vec, rnd_vec, offset_vec,
-                           dst4_r, dst5_r, dst4_l, dst5_l);
+                           dst4, dst5);
 
-        HEVC_PCK_SW_SB12(dst0_l, dst0_r, dst1_l, dst1_r,
-                         dst2_l, dst2_r, dst3_l, dst3_r,
-                         dst4_l, dst4_r, dst5_l, dst5_r,
-                         dst0_r, dst1_r, dst2_r);
-        ST12x4_UB(dst0_r, dst1_r, dst2_r, dst, dst_stride);
+        PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
+        dst3 = (v8i16) __msa_pckev_b((v16i8) dst5, (v16i8) dst4);
+        ST12x4_UB(dst0, dst1, dst3, dst, dst_stride);
         dst += (4 * dst_stride);
     }
 }
@@ -2996,16 +3255,15 @@
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
     v8i16 filt0, filt1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
     v16i8 vec0, vec1;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 1;
@@ -3016,9 +3274,10 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -3035,49 +3294,37 @@
         XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
 
         VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+        dst1 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
+        dst2 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+        dst3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-        dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
+        dst4 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-        dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
+        dst5 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-        dst6 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst6, dst6);
+        dst6 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
-        dst7 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst7, dst7);
+        dst7 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           dst0, dst1, dst2, dst3);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
+        ST_SH2(dst0, dst1, dst, dst_stride);
         dst += (2 * dst_stride);
 
         HEVC_BIW_RND_CLIP4(dst4, dst5, dst6, dst7,
                            in4, in5, in6, in7,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           dst0, dst1, dst2, dst3);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
+        ST_SH2(dst0, dst1, dst, dst_stride);
         dst += (2 * dst_stride);
     }
 }
@@ -3097,17 +3344,15 @@
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
-    uint8_t *dst_tmp = dst + 16;
+    int32_t offset, weight, constant;
     v16i8 src0, src1, src2, src3;
     v8i16 filt0, filt1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1, mask2, mask3;
     v16i8 vec0, vec1;
     v8i16 dst0, dst1, dst2, dst3;
     v8i16 in0, in1, in2, in3, in4, in5;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 1;
@@ -3118,9 +3363,10 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -3129,7 +3375,7 @@
     mask2 = mask0 + 8;
     mask3 = mask0 + 10;
 
-    for (loop_cnt = (height >> 1); loop_cnt--;) {
+    for (loop_cnt = 16; loop_cnt--;) {
         LD_SB2(src0_ptr, src_stride, src0, src2);
         LD_SB2(src0_ptr + 16, src_stride, src1, src3);
         src0_ptr += (2 * src_stride);
@@ -3140,41 +3386,33 @@
         XORI_B4_128_SB(src0, src1, src2, src3);
 
         VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+        dst1 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
+        dst2 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src2, src3, src2, src3, mask2, mask3, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+        dst3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           dst0, dst1, dst2, dst3);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
-        dst += (2 * dst_stride);
+        PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
+        ST_SH2(dst0, dst1, dst, dst_stride);
+
         /* 8 width */
         VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+        dst1 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         HEVC_BIW_RND_CLIP2(dst0, dst1, in4, in5,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst0_l, dst1_l);
+                           dst0, dst1);
 
-        HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-        ST8x2_UB(dst0_r, dst_tmp, dst_stride);
-        dst_tmp += (2 * dst_stride);
+        dst0 = (v8i16) __msa_pckev_b((v16i8) dst1, (v16i8) dst0);
+        ST8x2_UB(dst0, (dst + 16), dst_stride);
+        dst += (2 * dst_stride);
     }
 }
 
@@ -3193,16 +3431,15 @@
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v16i8 src0, src1, src2;
     v8i16 filt0, filt1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1, mask2, mask3;
     v8i16 dst0, dst1, dst2, dst3;
     v16i8 vec0, vec1;
     v8i16 in0, in1, in2, in3;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 1;
@@ -3213,9 +3450,10 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -3233,26 +3471,20 @@
         XORI_B3_128_SB(src0, src1, src2);
 
         VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+        dst1 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
+        dst2 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+        dst3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
         HEVC_BIW_RND_CLIP4(dst0, dst1, dst2, dst3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           dst0, dst1, dst2, dst3);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, 16);
+        PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst1);
+        ST_SH2(dst0, dst1, dst, 16);
         dst += dst_stride;
     }
 }
@@ -3264,20 +3496,19 @@
                                      uint8_t *dst,
                                      int32_t dst_stride,
                                      const int8_t *filter,
-                                     int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
                                      int32_t offset0,
                                      int32_t offset1,
                                      int32_t rnd_val)
 {
-    int32_t weight, offset;
+    int32_t weight, offset, constant;
     v16i8 src0, src1, src2, src3, src4;
     v8i16 in0, in1, dst10;
     v16i8 src10_r, src32_r, src21_r, src43_r, src2110, src4332;
     v4i32 dst10_r, dst10_l;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec, out;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= src_stride;
@@ -3285,9 +3516,10 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -3310,18 +3542,16 @@
     src4332 = (v16i8) __msa_ilvr_d((v2i64) src43_r, (v2i64) src32_r);
     src4332 = (v16i8) __msa_xori_b((v16u8) src4332, 128);
 
-    dst10 = const_vec;
-    DPADD_SB2_SH(src2110, src4332, filt0, filt1, dst10, dst10);
+    dst10 = HEVC_FILT_4TAP_SH(src2110, src4332, filt0, filt1);
 
     ILVRL_H2_SW(dst10, in0, dst10_r, dst10_l);
     dst10_r = __msa_dpadd_s_w(offset_vec, (v8i16) dst10_r, (v8i16) weight_vec);
     dst10_l = __msa_dpadd_s_w(offset_vec, (v8i16) dst10_l, (v8i16) weight_vec);
     SRAR_W2_SW(dst10_r, dst10_l, rnd_vec);
-    dst10_r = CLIP_SW_0_255(dst10_r);
-    dst10_l = CLIP_SW_0_255(dst10_l);
-
-    HEVC_PCK_SW_SB2(dst10_l, dst10_r, dst10_r);
-    ST4x2_UB(dst10_r, dst, dst_stride);
+    dst10_r = (v4i32) __msa_pckev_h((v8i16) dst10_l, (v8i16) dst10_r);
+    out = CLIP_SH_0_255(dst10_r);
+    out = (v8i16) __msa_pckev_b((v16i8) out, (v16i8) out);
+    ST4x2_UB(out, dst, dst_stride);
 }
 
 static void hevc_vt_biwgt_4t_4x4_msa(uint8_t *src0_ptr,
@@ -3331,22 +3561,20 @@
                                      uint8_t *dst,
                                      int32_t dst_stride,
                                      const int8_t *filter,
-                                     int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
                                      int32_t offset0,
                                      int32_t offset1,
                                      int32_t rnd_val)
 {
-    int32_t weight, offset;
+    int32_t weight, offset, constant;
     v16i8 src0, src1, src2, src3, src4, src5, src6;
     v8i16 in0, in1, in2, in3;
     v16i8 src10_r, src32_r, src54_r, src21_r, src43_r, src65_r;
     v16i8 src2110, src4332, src6554;
     v8i16 dst10, dst32;
-    v4i32 dst10_r, dst32_r, dst10_l, dst32_l;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= src_stride;
@@ -3354,9 +3582,10 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -3380,17 +3609,15 @@
     ILVR_D2_SB(src43_r, src32_r, src65_r, src54_r, src4332, src6554);
     XORI_B2_128_SB(src4332, src6554);
 
-    dst10 = const_vec;
-    DPADD_SB2_SH(src2110, src4332, filt0, filt1, dst10, dst10);
-    dst32 = const_vec;
-    DPADD_SB2_SH(src4332, src6554, filt0, filt1, dst32, dst32);
+    dst10 = HEVC_FILT_4TAP_SH(src2110, src4332, filt0, filt1);
+    dst32 = HEVC_FILT_4TAP_SH(src4332, src6554, filt0, filt1);
 
     HEVC_BIW_RND_CLIP2(dst10, dst32, in0, in1,
                        weight_vec, rnd_vec, offset_vec,
-                       dst10_r, dst32_r, dst10_l, dst32_l);
+                       dst10, dst32);
 
-    HEVC_PCK_SW_SB4(dst10_l, dst10_r, dst32_l, dst32_r, dst10_r);
-    ST4x4_UB(dst10_r, dst10_r, 0, 1, 2, 3, dst, dst_stride);
+    dst10 = (v8i16) __msa_pckev_b((v16i8) dst32, (v16i8) dst10);
+    ST4x4_UB(dst10, dst10, 0, 1, 2, 3, dst, dst_stride);
     dst += (4 * dst_stride);
 }
 
@@ -3409,17 +3636,15 @@
                                              int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t weight, offset;
+    int32_t weight, offset, constant;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
     v16i8 src10_r, src32_r, src54_r, src76_r, src98_r;
     v16i8 src21_r, src43_r, src65_r, src87_r, src109_r;
     v16i8 src2110, src4332, src6554, src8776;
     v8i16 dst10, dst32, dst54, dst76;
-    v4i32 dst10_r, dst32_r, dst54_r, dst76_r;
-    v4i32 dst10_l, dst32_l, dst54_l, dst76_l;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= src_stride;
@@ -3427,9 +3652,10 @@
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -3459,12 +3685,9 @@
                    src4332, src6554, src8776);
         XORI_B3_128_SB(src4332, src6554, src8776);
 
-        dst10 = const_vec;
-        DPADD_SB2_SH(src2110, src4332, filt0, filt1, dst10, dst10);
-        dst32 = const_vec;
-        DPADD_SB2_SH(src4332, src6554, filt0, filt1, dst32, dst32);
-        dst54 = const_vec;
-        DPADD_SB2_SH(src6554, src8776, filt0, filt1, dst54, dst54);
+        dst10 = HEVC_FILT_4TAP_SH(src2110, src4332, filt0, filt1);
+        dst32 = HEVC_FILT_4TAP_SH(src4332, src6554, filt0, filt1);
+        dst54 = HEVC_FILT_4TAP_SH(src6554, src8776, filt0, filt1);
 
         LD_SB2(src0_ptr, src_stride, src9, src2);
         src0_ptr += (2 * src_stride);
@@ -3472,17 +3695,14 @@
         src2110 = (v16i8) __msa_ilvr_d((v2i64) src109_r, (v2i64) src98_r);
         src2110 = (v16i8) __msa_xori_b((v16u8) src2110, 128);
 
-        dst76 = const_vec;
-        DPADD_SB2_SH(src8776, src2110, filt0, filt1, dst76, dst76);
+        dst76 = HEVC_FILT_4TAP_SH(src8776, src2110, filt0, filt1);
         HEVC_BIW_RND_CLIP4(dst10, dst32, dst54, dst76,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst10_r, dst32_r, dst54_r, dst76_r,
-                           dst10_l, dst32_l, dst54_l, dst76_l);
+                           dst10, dst32, dst54, dst76);
 
-        HEVC_PCK_SW_SB8(dst10_l, dst10_r, dst32_l, dst32_r,
-                        dst54_l, dst54_r, dst76_l, dst76_r, dst10_r, dst54_r);
-        ST4x8_UB(dst10_r, dst54_r, dst, dst_stride);
+        PCKEV_B2_SH(dst32, dst10, dst76, dst54, dst10, dst32);
+        ST4x8_UB(dst10, dst32, dst, dst_stride);
         dst += (8 * dst_stride);
     }
 }
@@ -3503,11 +3723,11 @@
 {
     if (2 == height) {
         hevc_vt_biwgt_4t_4x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                 dst, dst_stride, filter, height,
+                                 dst, dst_stride, filter,
                                  weight0, weight1, offset0, offset1, rnd_val);
     } else if (4 == height) {
         hevc_vt_biwgt_4t_4x4_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                 dst, dst_stride, filter, height,
+                                 dst, dst_stride, filter,
                                  weight0, weight1, offset0, offset1, rnd_val);
     } else if (0 == (height % 8)) {
         hevc_vt_biwgt_4t_4x8multiple_msa(src0_ptr, src_stride,
@@ -3533,24 +3753,24 @@
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v16i8 src0, src1, src2, src3, src4;
     v8i16 in0, in1, in2, in3;
     v16i8 src10_r, src32_r, src21_r, src43_r;
     v8i16 tmp0, tmp1, tmp2, tmp3;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
 
     src0_ptr -= src_stride;
 
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -3571,29 +3791,23 @@
         XORI_B2_128_SB(src3, src4);
         ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
 
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
+        tmp0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+        tmp1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
 
         LD_SB2(src0_ptr, src_stride, src1, src2);
         src0_ptr += (2 * src_stride);
         XORI_B2_128_SB(src1, src2);
         ILVR_B2_SB(src1, src4, src2, src1, src10_r, src21_r);
 
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src32_r, src10_r, filt0, filt1, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src43_r, src21_r, filt0, filt1, tmp3, tmp3);
+        tmp2 = HEVC_FILT_4TAP_SH(src32_r, src10_r, filt0, filt1);
+        tmp3 = HEVC_FILT_4TAP_SH(src43_r, src21_r, filt0, filt1);
         HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           tmp0, tmp1, tmp2, tmp3);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST6x4_UB(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_SH(tmp1, tmp0, tmp3, tmp2, tmp0, tmp1);
+        ST6x4_UB(tmp0, tmp1, dst, dst_stride);
         dst += (4 * dst_stride);
     }
 }
@@ -3605,30 +3819,29 @@
                                      uint8_t *dst,
                                      int32_t dst_stride,
                                      const int8_t *filter,
-                                     int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
                                      int32_t offset0,
                                      int32_t offset1,
                                      int32_t rnd_val)
 {
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v16i8 src0, src1, src2, src3, src4;
     v8i16 in0, in1, tmp0, tmp1;
     v16i8 src10_r, src32_r, src21_r, src43_r;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst0_l, dst1_l;
 
     src0_ptr -= src_stride;
 
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -3646,16 +3859,14 @@
     XORI_B2_128_SB(src3, src4);
     ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
 
-    tmp0 = const_vec;
-    DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-    tmp1 = const_vec;
-    DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
+    tmp0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+    tmp1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
     HEVC_BIW_RND_CLIP2(tmp0, tmp1, in0, in1,
                        weight_vec, rnd_vec, offset_vec,
-                       dst0_r, dst1_r, dst0_l, dst1_l);
+                       tmp0, tmp1);
 
-    HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-    ST8x2_UB(dst0_r, dst, dst_stride);
+    tmp0 = (v8i16) __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+    ST8x2_UB(tmp0, dst, dst_stride);
 }
 
 static void hevc_vt_biwgt_4t_8x6_msa(uint8_t *src0_ptr,
@@ -3665,33 +3876,31 @@
                                      uint8_t *dst,
                                      int32_t dst_stride,
                                      const int8_t *filter,
-                                     int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
                                      int32_t offset0,
                                      int32_t offset1,
                                      int32_t rnd_val)
 {
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
     v8i16 in0, in1, in2, in3, in4, in5;
     v16i8 src10_r, src32_r, src54_r, src76_r;
     v16i8 src21_r, src43_r, src65_r, src87_r;
     v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l;
 
     src0_ptr -= src_stride;
 
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -3711,33 +3920,25 @@
                src32_r, src43_r, src54_r, src65_r);
     ILVR_B2_SB(src7, src6, src8, src7, src76_r, src87_r);
 
-    tmp0 = const_vec;
-    DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-    tmp1 = const_vec;
-    DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
-    tmp2 = const_vec;
-    DPADD_SB2_SH(src32_r, src54_r, filt0, filt1, tmp2, tmp2);
-    tmp3 = const_vec;
-    DPADD_SB2_SH(src43_r, src65_r, filt0, filt1, tmp3, tmp3);
-    tmp4 = const_vec;
-    DPADD_SB2_SH(src54_r, src76_r, filt0, filt1, tmp4, tmp4);
-    tmp5 = const_vec;
-    DPADD_SB2_SH(src65_r, src87_r, filt0, filt1, tmp5, tmp5);
+    tmp0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+    tmp1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+    tmp2 = HEVC_FILT_4TAP_SH(src32_r, src54_r, filt0, filt1);
+    tmp3 = HEVC_FILT_4TAP_SH(src43_r, src65_r, filt0, filt1);
+    tmp4 = HEVC_FILT_4TAP_SH(src54_r, src76_r, filt0, filt1);
+    tmp5 = HEVC_FILT_4TAP_SH(src65_r, src87_r, filt0, filt1);
     HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
                        in0, in1, in2, in3,
                        weight_vec, rnd_vec, offset_vec,
-                       dst0_r, dst1_r, dst2_r, dst3_r,
-                       dst0_l, dst1_l, dst2_l, dst3_l);
+                       tmp0, tmp1, tmp2, tmp3);
     HEVC_BIW_RND_CLIP2(tmp4, tmp5, in4, in5,
                        weight_vec, rnd_vec, offset_vec,
-                       dst4_r, dst5_r, dst4_l, dst5_l);
+                       tmp4, tmp5);
 
-    HEVC_PCK_SW_SB12(dst0_l, dst0_r, dst1_l, dst1_r,
-                     dst2_l, dst2_r, dst3_l, dst3_r,
-                     dst4_l, dst4_r, dst5_l, dst5_r, dst0_r, dst1_r, dst2_r);
-    ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
+    PCKEV_B2_SH(tmp1, tmp0, tmp3, tmp2, tmp0, tmp1);
+    tmp3 = (v8i16) __msa_pckev_b((v16i8) tmp5, (v16i8) tmp4);
+    ST8x4_UB(tmp0, tmp1, dst, dst_stride);
     dst += (4 * dst_stride);
-    ST8x2_UB(dst2_r, dst, dst_stride);
+    ST8x2_UB(tmp3, dst, dst_stride);
 }
 
 static void hevc_vt_biwgt_4t_8x4multiple_msa(uint8_t *src0_ptr,
@@ -3755,24 +3956,24 @@
                                              int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v16i8 src0, src1, src2, src3, src4;
     v8i16 in0, in1, in2, in3;
     v16i8 src10_r, src32_r, src21_r, src43_r;
     v8i16 tmp0, tmp1, tmp2, tmp3;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
 
     src0_ptr -= src_stride;
 
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -3793,29 +3994,23 @@
         XORI_B2_128_SB(src3, src4);
         ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
 
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
+        tmp0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+        tmp1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
 
         LD_SB2(src0_ptr, src_stride, src1, src2);
         src0_ptr += (2 * src_stride);
         XORI_B2_128_SB(src1, src2);
         ILVR_B2_SB(src1, src4, src2, src1, src10_r, src21_r);
 
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src32_r, src10_r, filt0, filt1, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src43_r, src21_r, filt0, filt1, tmp3, tmp3);
+        tmp2 = HEVC_FILT_4TAP_SH(src32_r, src10_r, filt0, filt1);
+        tmp3 = HEVC_FILT_4TAP_SH(src43_r, src21_r, filt0, filt1);
         HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           tmp0, tmp1, tmp2, tmp3);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_SH(tmp1, tmp0, tmp3, tmp2, tmp0, tmp1);
+        ST8x4_UB(tmp0, tmp1, dst, dst_stride);
         dst += (4 * dst_stride);
     }
 }
@@ -3836,11 +4031,11 @@
 {
     if (2 == height) {
         hevc_vt_biwgt_4t_8x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                 dst, dst_stride, filter, height,
+                                 dst, dst_stride, filter,
                                  weight0, weight1, offset0, offset1, rnd_val);
     } else if (6 == height) {
         hevc_vt_biwgt_4t_8x6_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
-                                 dst, dst_stride, filter, height,
+                                 dst, dst_stride, filter,
                                  weight0, weight1, offset0, offset1, rnd_val);
     } else {
         hevc_vt_biwgt_4t_8x4multiple_msa(src0_ptr, src_stride,
@@ -3866,7 +4061,7 @@
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v16i8 src0, src1, src2, src3, src4, src5;
     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
     v16i8 src10_r, src32_r, src21_r, src43_r;
@@ -3874,19 +4069,18 @@
     v16i8 src10_l, src32_l, src54_l, src21_l, src43_l, src65_l;
     v16i8 src2110, src4332;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l;
 
     src0_ptr -= (1 * src_stride);
 
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -3914,12 +4108,9 @@
         ILVL_B2_SB(src3, src2, src4, src3, src32_l, src43_l);
         src4332 = (v16i8) __msa_ilvr_d((v2i64) src43_l, (v2i64) src32_l);
 
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
-        tmp4 = const_vec;
-        DPADD_SB2_SH(src2110, src4332, filt0, filt1, tmp4, tmp4);
+        tmp0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+        tmp1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+        tmp4 = HEVC_FILT_4TAP_SH(src2110, src4332, filt0, filt1);
 
         LD_SB2(src0_ptr, src_stride, src5, src2);
         src0_ptr += (2 * src_stride);
@@ -3928,26 +4119,20 @@
         ILVL_B2_SB(src5, src4, src2, src5, src54_l, src65_l);
         src2110 = (v16i8) __msa_ilvr_d((v2i64) src65_l, (v2i64) src54_l);
 
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src32_r, src10_r, filt0, filt1, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src43_r, src21_r, filt0, filt1, tmp3, tmp3);
-        tmp5 = const_vec;
-        DPADD_SB2_SH(src4332, src2110, filt0, filt1, tmp5, tmp5);
+        tmp2 = HEVC_FILT_4TAP_SH(src32_r, src10_r, filt0, filt1);
+        tmp3 = HEVC_FILT_4TAP_SH(src43_r, src21_r, filt0, filt1);
+        tmp5 = HEVC_FILT_4TAP_SH(src4332, src2110, filt0, filt1);
         HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           tmp0, tmp1, tmp2, tmp3);
         HEVC_BIW_RND_CLIP2(tmp4, tmp5, in4, in5,
                            weight_vec, rnd_vec, offset_vec,
-                           dst4_r, dst5_r, dst4_l, dst5_l);
+                           tmp4, tmp5);
 
-        HEVC_PCK_SW_SB12(dst0_l, dst0_r, dst1_l, dst1_r,
-                         dst2_l, dst2_r, dst3_l, dst3_r,
-                         dst4_l, dst4_r, dst5_l, dst5_r,
-                         dst0_r, dst1_r, dst2_r);
-        ST12x4_UB(dst0_r, dst1_r, dst2_r, dst, dst_stride);
+        PCKEV_B2_SH(tmp1, tmp0, tmp3, tmp2, tmp0, tmp1);
+        tmp2 = (v8i16) __msa_pckev_b((v16i8) tmp5, (v16i8) tmp4);
+        ST12x4_UB(tmp0, tmp1, tmp2, dst, dst_stride);
         dst += (4 * dst_stride);
     }
 }
@@ -3967,25 +4152,25 @@
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v16i8 src0, src1, src2, src3, src4, src5;
     v8i16 in0, in1, in2, in3;
     v16i8 src10_r, src32_r, src21_r, src43_r;
     v16i8 src10_l, src32_l, src21_l, src43_l;
     v8i16 tmp0, tmp1, tmp2, tmp3;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
 
     src0_ptr -= src_stride;
 
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -4009,23 +4194,17 @@
         ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
         ILVL_B2_SB(src3, src2, src4, src3, src32_l, src43_l);
 
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src10_l, src32_l, filt0, filt1, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src21_l, src43_l, filt0, filt1, tmp3, tmp3);
+        tmp0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+        tmp1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+        tmp2 = HEVC_FILT_4TAP_SH(src10_l, src32_l, filt0, filt1);
+        tmp3 = HEVC_FILT_4TAP_SH(src21_l, src43_l, filt0, filt1);
 
         HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst2_l, dst2_r,
-                        dst1_l, dst1_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
+                           tmp0, tmp1, tmp2, tmp3);
+        PCKEV_B2_SH(tmp2, tmp0, tmp3, tmp1, tmp0, tmp1);
+        ST_SH2(tmp0, tmp1, dst, dst_stride);
         dst += (2 * dst_stride);
         LD_SB2(src0_ptr, src_stride, src5, src2);
         src0_ptr += (2 * src_stride);
@@ -4037,23 +4216,17 @@
         ILVR_B2_SB(src5, src4, src2, src5, src10_r, src21_r);
         ILVL_B2_SB(src5, src4, src2, src5, src10_l, src21_l);
 
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src32_r, src10_r, filt0, filt1, tmp0, tmp0);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src43_r, src21_r, filt0, filt1, tmp1, tmp1);
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src32_l, src10_l, filt0, filt1, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src43_l, src21_l, filt0, filt1, tmp3, tmp3);
+        tmp0 = HEVC_FILT_4TAP_SH(src32_r, src10_r, filt0, filt1);
+        tmp1 = HEVC_FILT_4TAP_SH(src43_r, src21_r, filt0, filt1);
+        tmp2 = HEVC_FILT_4TAP_SH(src32_l, src10_l, filt0, filt1);
+        tmp3 = HEVC_FILT_4TAP_SH(src43_l, src21_l, filt0, filt1);
         HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           tmp0, tmp1, tmp2, tmp3);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst2_l, dst2_r,
-                        dst1_l, dst1_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_SH(tmp2, tmp0, tmp3, tmp1, tmp0, tmp1);
+        ST_SH2(tmp0, tmp1, dst, dst_stride);
         dst += (2 * dst_stride);
     }
 }
@@ -4073,7 +4246,7 @@
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v16i8 src0, src1, src2, src3, src4, src5;
     v16i8 src6, src7, src8, src9, src10, src11;
     v8i16 in0, in1, in2, in3, in4, in5;
@@ -4082,19 +4255,18 @@
     v16i8 src21_r, src43_r, src87_r, src109_r;
     v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l;
 
     src0_ptr -= src_stride;
 
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -4130,36 +4302,28 @@
         XORI_B2_128_SB(src9, src10);
         ILVR_B2_SB(src9, src8, src10, src9, src98_r, src109_r);
         /* 16width */
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-        tmp4 = const_vec;
-        DPADD_SB2_SH(src10_l, src32_l, filt0, filt1, tmp4, tmp4);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
-        tmp5 = const_vec;
-        DPADD_SB2_SH(src21_l, src43_l, filt0, filt1, tmp5, tmp5);
+        tmp0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+        tmp4 = HEVC_FILT_4TAP_SH(src10_l, src32_l, filt0, filt1);
+        tmp1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+        tmp5 = HEVC_FILT_4TAP_SH(src21_l, src43_l, filt0, filt1);
         /* 8width */
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src76_r, src98_r, filt0, filt1, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src87_r, src109_r, filt0, filt1, tmp3, tmp3);
+        tmp2 = HEVC_FILT_4TAP_SH(src76_r, src98_r, filt0, filt1);
+        tmp3 = HEVC_FILT_4TAP_SH(src87_r, src109_r, filt0, filt1);
         /* 16width */
         HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp4, tmp5,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           tmp0, tmp1, tmp4, tmp5);
         /* 8width */
         HEVC_BIW_RND_CLIP2(tmp2, tmp3, in4, in5,
                            weight_vec, rnd_vec, offset_vec,
-                           dst4_r, dst5_r, dst4_l, dst5_l);
+                           tmp2, tmp3);
         /* 16width */
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst2_l, dst2_r,
-                        dst1_l, dst1_r, dst3_l, dst3_r, dst0_r, dst1_r);
+        PCKEV_B2_SH(tmp4, tmp0, tmp5, tmp1, tmp0, tmp1);
         /* 8width */
-        HEVC_PCK_SW_SB4(dst4_l, dst4_r, dst5_l, dst5_r, dst4_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
-        ST8x2_UB(dst4_r, dst + 16, dst_stride);
+        tmp2 = (v8i16) __msa_pckev_b((v16i8) tmp3, (v16i8) tmp2);
+        ST_SH2(tmp0, tmp1, dst, dst_stride);
+        ST8x2_UB(tmp2, dst + 16, dst_stride);
         dst += (2 * dst_stride);
 
         /* 16width */
@@ -4177,37 +4341,29 @@
         XORI_B2_128_SB(src11, src8);
         ILVR_B2_SB(src11, src10, src8, src11, src76_r, src87_r);
         /* 16width */
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src32_r, src10_r, filt0, filt1, tmp0, tmp0);
-        tmp4 = const_vec;
-        DPADD_SB2_SH(src32_l, src10_l, filt0, filt1, tmp4, tmp4);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src43_r, src21_r, filt0, filt1, tmp1, tmp1);
-        tmp5 = const_vec;
-        DPADD_SB2_SH(src43_l, src21_l, filt0, filt1, tmp5, tmp5);
+        tmp0 = HEVC_FILT_4TAP_SH(src32_r, src10_r, filt0, filt1);
+        tmp4 = HEVC_FILT_4TAP_SH(src32_l, src10_l, filt0, filt1);
+        tmp1 = HEVC_FILT_4TAP_SH(src43_r, src21_r, filt0, filt1);
+        tmp5 = HEVC_FILT_4TAP_SH(src43_l, src21_l, filt0, filt1);
         /* 8width */
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src98_r, src76_r, filt0, filt1, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src109_r, src87_r, filt0, filt1, tmp3, tmp3);
+        tmp2 = HEVC_FILT_4TAP_SH(src98_r, src76_r, filt0, filt1);
+        tmp3 = HEVC_FILT_4TAP_SH(src109_r, src87_r, filt0, filt1);
         /* 16width */
         HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp4, tmp5,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           tmp0, tmp1, tmp4, tmp5);
         /* 8width */
         HEVC_BIW_RND_CLIP2(tmp2, tmp3, in4, in5,
                            weight_vec, rnd_vec, offset_vec,
-                           dst4_r, dst5_r, dst4_l, dst5_l);
+                           tmp2, tmp3);
         /* 16width */
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst2_l, dst2_r,
-                        dst1_l, dst1_r, dst3_l, dst3_r, dst0_r, dst1_r);
+        PCKEV_B2_SH(tmp4, tmp0, tmp5, tmp1, tmp0, tmp1);
 
         /* 8width */
-        HEVC_PCK_SW_SB4(dst4_l, dst4_r, dst5_l, dst5_r, dst4_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
-        ST8x2_UB(dst4_r, dst + 16, dst_stride);
+        tmp2 = (v8i16) __msa_pckev_b((v16i8) tmp3, (v16i8) tmp2);
+        ST_SH2(tmp0, tmp1, dst, dst_stride);
+        ST8x2_UB(tmp2, dst + 16, dst_stride);
         dst += (2 * dst_stride);
     }
 }
@@ -4228,7 +4384,7 @@
 {
     uint32_t loop_cnt;
     uint8_t *dst_tmp = dst + 16;
-    int32_t offset, weight;
+    int32_t offset, weight, constant;
     v16i8 src0, src1, src2, src3, src4, src6, src7, src8, src9, src10;
     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
     v16i8 src10_r, src32_r, src76_r, src98_r;
@@ -4237,19 +4393,18 @@
     v16i8 src10_l, src32_l, src76_l, src98_l;
     v16i8 src21_l, src43_l, src87_l, src109_l;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r, dst6_r, dst7_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l, dst6_l, dst7_l;
 
     src0_ptr -= src_stride;
 
     offset = (offset0 + offset1) << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
+    constant = 128 * weight1;
+    constant <<= 6;
+    offset += constant;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
     weight_vec = __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
@@ -4279,24 +4434,18 @@
         ILVL_B2_SB(src3, src2, src4, src3, src32_l, src43_l);
 
         /* 16width */
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-        tmp4 = const_vec;
-        DPADD_SB2_SH(src10_l, src32_l, filt0, filt1, tmp4, tmp4);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
-        tmp5 = const_vec;
-        DPADD_SB2_SH(src21_l, src43_l, filt0, filt1, tmp5, tmp5);
+        tmp0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+        tmp4 = HEVC_FILT_4TAP_SH(src10_l, src32_l, filt0, filt1);
+        tmp1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+        tmp5 = HEVC_FILT_4TAP_SH(src21_l, src43_l, filt0, filt1);
         /* 16width */
         HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp4, tmp5,
                            in0, in1, in2, in3,
                            weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+                           tmp0, tmp1, tmp4, tmp5);
         /* 16width */
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst2_l, dst2_r,
-                        dst1_l, dst1_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_SH(tmp4, tmp0, tmp5, tmp1, tmp0, tmp1);
+        ST_SH2(tmp0, tmp1, dst, dst_stride);
         dst += (2 * dst_stride);
 
         src10_r = src32_r;
@@ -4315,25 +4464,19 @@
         ILVR_B2_SB(src9, src8, src10, src9, src98_r, src109_r);
         ILVL_B2_SB(src9, src8, src10, src9, src98_l, src109_l);
         /* next 16width */
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src76_r, src98_r, filt0, filt1, tmp2, tmp2);
-        tmp6 = const_vec;
-        DPADD_SB2_SH(src76_l, src98_l, filt0, filt1, tmp6, tmp6);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src87_r, src109_r, filt0, filt1, tmp3, tmp3);
-        tmp7 = const_vec;
-        DPADD_SB2_SH(src87_l, src109_l, filt0, filt1, tmp7, tmp7);
+        tmp2 = HEVC_FILT_4TAP_SH(src76_r, src98_r, filt0, filt1);
+        tmp6 = HEVC_FILT_4TAP_SH(src76_l, src98_l, filt0, filt1);
+        tmp3 = HEVC_FILT_4TAP_SH(src87_r, src109_r, filt0, filt1);
+        tmp7 = HEVC_FILT_4TAP_SH(src87_l, src109_l, filt0, filt1);
         /* next 16width */
         HEVC_BIW_RND_CLIP4(tmp2, tmp3, tmp6, tmp7,
                            in4, in5, in6, in7,
                            weight_vec, rnd_vec, offset_vec,
-                           dst4_r, dst5_r, dst6_r, dst7_r,
-                           dst4_l, dst5_l, dst6_l, dst7_l);
+                           tmp2, tmp3, tmp6, tmp7);
 
         /* next 16width */
-        HEVC_PCK_SW_SB8(dst4_l, dst4_r, dst6_l, dst6_r,
-                        dst5_l, dst5_r, dst7_l, dst7_r, dst4_r, dst5_r);
-        ST_SW2(dst4_r, dst5_r, dst_tmp, dst_stride);
+        PCKEV_B2_SH(tmp6, tmp2, tmp7, tmp3, tmp2, tmp3);
+        ST_SH2(tmp2, tmp3, dst_tmp, dst_stride);
         dst_tmp += (2 * dst_stride);
 
         src76_r = src98_r;
@@ -4352,26 +4495,25 @@
                                      int32_t dst_stride,
                                      const int8_t *filter_x,
                                      const int8_t *filter_y,
-                                     int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
                                      int32_t offset0,
                                      int32_t offset1,
                                      int32_t rnd_val)
 {
+    uint64_t tp0, tp1;
     int32_t offset, weight;
-    v8i16 in0, in1;
+    v8i16 in0 = { 0 };
+    v16u8 out;
     v16i8 src0, src1, src2, src3, src4;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec, tmp, weight_vec;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4;
-    v4i32 dst0_r, dst1_r, dst0_l;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 dst20, dst31, dst42, dst10, dst32, dst21, dst43, tmp0, tmp1;
+    v4i32 dst0, dst1, offset_vec, rnd_vec, const_vec;
 
     src0_ptr -= (src_stride + 1);
 
@@ -4379,10 +4521,9 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
@@ -4390,56 +4531,44 @@
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
-    const_vec = __msa_ldi_h(128);
+    const_vec = __msa_fill_w((128 * weight1));
     const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
-    weight_vec = __msa_fill_w(weight);
+    weight_vec = (v8i16) __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
+    offset_vec += const_vec;
 
-    LD_SB3(src0_ptr, src_stride, src0, src1, src2);
-    src0_ptr += (3 * src_stride);
-    XORI_B3_128_SB(src0, src1, src2);
+    LD_SB5(src0_ptr, src_stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-    ILVR_H2_SH(dst1, dst0, dst2, dst1, dst10_r, dst21_r);
+    VSHF_B2_SB(src0, src2, src0, src2, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src3, src1, src3, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src4, src2, src4, mask0, mask1, vec4, vec5);
 
-    LD_SB2(src0_ptr, src_stride, src3, src4);
-    LD_SH2(src1_ptr, src2_stride, in0, in1);
-    in0 = (v8i16) __msa_ilvr_d((v2i64) in1, (v2i64) in0);
-    XORI_B2_128_SB(src3, src4);
-    /* row 3 */
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-    dst32_r = __msa_ilvr_h(dst3, dst2);
-    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-    dst0_r >>= 6;
-    /* row 4 */
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-    dst43_r = __msa_ilvr_h(dst4, dst3);
-    dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-    dst1_r >>= 6;
-    dst1_r = (v4i32) __msa_pckev_h((v8i16) dst1_r, (v8i16) dst0_r);
+    dst20 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst31 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst42 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
 
-    ILVRL_H2_SW(dst1_r, in0, dst0_r, dst0_l);
-    dst0_r = __msa_dpadd_s_w(offset_vec, (v8i16) dst0_r, (v8i16) weight_vec);
-    dst0_l = __msa_dpadd_s_w(offset_vec, (v8i16) dst0_l, (v8i16) weight_vec);
-    SRAR_W2_SW(dst0_r, dst0_l, rnd_vec);
-    dst0_r = CLIP_SW_0_255(dst0_r);
-    dst0_l = CLIP_SW_0_255(dst0_l);
+    ILVRL_H2_SH(dst31, dst20, dst10, dst32);
+    ILVRL_H2_SH(dst42, dst31, dst21, dst43);
 
-    HEVC_PCK_SW_SB2(dst0_l, dst0_r, dst0_r);
-    ST4x2_UB(dst0_r, dst, dst_stride);
+    dst0 = HEVC_FILT_4TAP(dst10, dst32, filt_h0, filt_h1);
+    dst1 = HEVC_FILT_4TAP(dst21, dst43, filt_h0, filt_h1);
+    dst0 >>= 6;
+    dst1 >>= 6;
+    dst0 = (v4i32) __msa_pckev_h((v8i16) dst1, (v8i16) dst0);
+
+    LD2(src1_ptr, src2_stride, tp0, tp1);
+    INSERT_D2_SH(tp0, tp1, in0);
+
+    ILVRL_H2_SH(dst0, in0, tmp0, tmp1);
+    dst0 = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+    dst1 = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+    SRAR_W2_SW(dst0, dst1, rnd_vec);
+    tmp = __msa_pckev_h((v8i16) dst1, (v8i16) dst0);
+    tmp = CLIP_SH_0_255_MAX_SATU(tmp);
+    out = (v16u8) __msa_pckev_b((v16i8) tmp, (v16i8) tmp);
+    ST4x2_UB(out, dst, dst_stride);
 }
 
 static void hevc_hv_biwgt_4t_4x4_msa(uint8_t *src0_ptr,
@@ -4450,28 +4579,28 @@
                                      int32_t dst_stride,
                                      const int8_t *filter_x,
                                      const int8_t *filter_y,
-                                     int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
                                      int32_t offset0,
                                      int32_t offset1,
                                      int32_t rnd_val)
 {
+    uint64_t tp0, tp1;
     int32_t offset, weight;
-    v8i16 in0, in1, in2, in3;
+    v16u8 out;
+    v8i16 in0 = { 0 }, in1 = { 0 };
     v16i8 src0, src1, src2, src3, src4, src5, src6;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-    v8i16 tmp0, tmp1;
-    v4i32 dst0_l, dst1_l;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 filter_vec, weight_vec;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 tmp0, tmp1, tmp2, tmp3;
+    v8i16 dst30, dst41, dst52, dst63;
+    v8i16 dst10, dst32, dst54, dst21, dst43, dst65;
+    v4i32 offset_vec, rnd_vec, const_vec;
+    v4i32 dst0, dst1, dst2, dst3;
 
     src0_ptr -= (src_stride + 1);
 
@@ -4479,10 +4608,9 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
@@ -4490,66 +4618,54 @@
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
-    const_vec = __msa_ldi_h(128);
+    const_vec = __msa_fill_w((128 * weight1));
     const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
-    weight_vec = __msa_fill_w(weight);
+    weight_vec = (v8i16) __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
+    offset_vec += const_vec;
 
-    LD_SB3(src0_ptr, src_stride, src0, src1, src2);
-    src0_ptr += (3 * src_stride);
-    XORI_B3_128_SB(src0, src1, src2);
+    LD_SB7(src0_ptr, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-    ILVR_H2_SH(dst1, dst0, dst2, dst1, dst10_r, dst21_r);
+    VSHF_B2_SB(src0, src3, src0, src3, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src4, src1, src4, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src5, src2, src5, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src3, src6, src3, src6, mask0, mask1, vec6, vec7);
 
-    LD_SB4(src0_ptr, src_stride, src3, src4, src5, src6);
-    LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
-    ILVR_D2_SH(in1, in0, in3, in2, in0, in1);
-    XORI_B4_128_SB(src3, src4, src5, src6);
-    /* row 3 */
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-    dst32_r = __msa_ilvr_h(dst3, dst2);
-    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-    dst0_r >>= 6;
-    /* row 4 */
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-    dst43_r = __msa_ilvr_h(dst4, dst3);
-    dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-    dst1_r >>= 6;
-    /* row 5 */
-    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-    dst5 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-    dst10_r = __msa_ilvr_h(dst5, dst4);
-    dst2_r = HEVC_FILT_4TAP(dst32_r, dst10_r, filt_h0, filt_h1);
-    dst2_r >>= 6;
-    /* row 6 */
-    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-    dst21_r = __msa_ilvr_h(dst2, dst5);
-    dst3_r = HEVC_FILT_4TAP(dst43_r, dst21_r, filt_h0, filt_h1);
-    dst3_r >>= 6;
-    PCKEV_H2_SH(dst1_r, dst0_r, dst3_r, dst2_r, tmp0, tmp1);
-    HEVC_BIW_RND_CLIP2(tmp0, tmp1, in0, in1,
-                       weight_vec, rnd_vec, offset_vec,
-                       dst0_r, dst1_r, dst0_l, dst1_l);
+    dst30 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst41 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst52 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dst63 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
 
-    HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-    ST4x4_UB(dst0_r, dst0_r, 0, 1, 2, 3, dst, dst_stride);
+    ILVRL_H2_SH(dst41, dst30, dst10, dst43);
+    ILVRL_H2_SH(dst52, dst41, dst21, dst54);
+    ILVRL_H2_SH(dst63, dst52, dst32, dst65);
+    dst0 = HEVC_FILT_4TAP(dst10, dst32, filt_h0, filt_h1);
+    dst1 = HEVC_FILT_4TAP(dst21, dst43, filt_h0, filt_h1);
+    dst2 = HEVC_FILT_4TAP(dst32, dst54, filt_h0, filt_h1);
+    dst3 = HEVC_FILT_4TAP(dst43, dst65, filt_h0, filt_h1);
+    SRA_4V(dst0, dst1, dst2, dst3, 6);
+    PCKEV_H2_SH(dst1, dst0, dst3, dst2, tmp1, tmp3);
+
+    LD2(src1_ptr, src2_stride, tp0, tp1);
+    INSERT_D2_SH(tp0, tp1, in0);
+    src1_ptr += (2 * src2_stride);
+    LD2(src1_ptr, src2_stride, tp0, tp1);
+    INSERT_D2_SH(tp0, tp1, in1);
+
+    ILVRL_H2_SH(tmp1, in0, tmp0, tmp1);
+    ILVRL_H2_SH(tmp3, in1, tmp2, tmp3);
+
+    dst0 = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+    dst1 = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+    dst2 = __msa_dpadd_s_w(offset_vec, tmp2, weight_vec);
+    dst3 = __msa_dpadd_s_w(offset_vec, tmp3, weight_vec);
+    SRAR_W4_SW(dst0, dst1, dst2, dst3, rnd_vec);
+    PCKEV_H2_SH(dst1, dst0, dst3, dst2, tmp0, tmp1);
+    CLIP_SH2_0_255_MAX_SATU(tmp0, tmp1);
+    out = (v16u8) __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
 }
 
 static void hevc_hv_biwgt_4t_4multx8mult_msa(uint8_t *src0_ptr,
@@ -4568,22 +4684,24 @@
                                              int32_t rnd_val)
 {
     uint32_t loop_cnt;
+    uint64_t tp0, tp1;
     int32_t offset, weight;
-    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+    v16u8 out0, out1;
+    v8i16 in0 = { 0 }, in1 = { 0 }, in2 = { 0 }, in3 = { 0 };
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8, dst9;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r, dst6_r, dst7_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l;
-    v8i16 tmp0, tmp1, tmp2, tmp3;
+    v8i16 filter_vec, weight_vec;
+    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    v8i16 dst10, dst21, dst22, dst73, dst84, dst95, dst106;
     v8i16 dst10_r, dst32_r, dst54_r, dst76_r;
     v8i16 dst21_r, dst43_r, dst65_r, dst87_r;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 dst98_r, dst109_r;
+    v4i32 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v4i32 offset_vec, rnd_vec, const_vec;
 
     src0_ptr -= (src_stride + 1);
 
@@ -4591,10 +4709,9 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
@@ -4602,100 +4719,96 @@
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
-    const_vec = __msa_ldi_h(128);
+    const_vec = __msa_fill_w((128 * weight1));
     const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
-    weight_vec = __msa_fill_w(weight);
+    weight_vec = (v8i16) __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
+    offset_vec += const_vec;
 
     LD_SB3(src0_ptr, src_stride, src0, src1, src2);
     src0_ptr += (3 * src_stride);
     XORI_B3_128_SB(src0, src1, src2);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-    ILVR_H2_SH(dst1, dst0, dst2, dst1, dst10_r, dst21_r);
+    VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src2, src1, src2, mask0, mask1, vec2, vec3);
+    dst10 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst21 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    ILVRL_H2_SH(dst21, dst10, dst10_r, dst21_r);
+    dst22 = (v8i16) __msa_splati_d((v2i64) dst21, 1);
 
     for (loop_cnt = height >> 3; loop_cnt--;) {
         LD_SB8(src0_ptr, src_stride,
                src3, src4, src5, src6, src7, src8, src9, src10);
         src0_ptr += (8 * src_stride);
-        LD_SH8(src1_ptr, src2_stride, in0, in1, in2, in3, in4, in5, in6, in7);
-        src1_ptr += (8 * src2_stride);
-        ILVR_D2_SH(in1, in0, in3, in2, in0, in1);
-        ILVR_D2_SH(in5, in4, in7, in6, in2, in3);
         XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
-        /* row 3 */
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-        dst32_r = __msa_ilvr_h(dst3, dst2);
-        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-        dst0_r >>= 6;
-        /* row 4 */
-        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-        dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-        dst43_r = __msa_ilvr_h(dst4, dst3);
-        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-        dst1_r >>= 6;
-        /* row 5 */
-        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-        dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-        dst54_r = __msa_ilvr_h(dst5, dst4);
-        dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
-        dst2_r >>= 6;
-        /* row 6 */
-        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-        dst6 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst6, dst6);
-        dst65_r = __msa_ilvr_h(dst6, dst5);
-        dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
-        dst3_r >>= 6;
-        VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
-        dst7 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst7, dst7);
-        dst76_r = __msa_ilvr_h(dst7, dst6);
-        dst4_r = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
-        dst4_r >>= 6;
-        VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec0, vec1);
-        dst8 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst8, dst8);
-        dst87_r = __msa_ilvr_h(dst8, dst7);
-        dst5_r = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
-        dst5_r >>= 6;
-        VSHF_B2_SB(src9, src9, src9, src9, mask0, mask1, vec0, vec1);
-        dst9 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst9, dst9);
-        dst10_r = __msa_ilvr_h(dst9, dst8);
-        dst6_r = HEVC_FILT_4TAP(dst76_r, dst10_r, filt_h0, filt_h1);
-        dst6_r >>= 6;
-        VSHF_B2_SB(src10, src10, src10, src10, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-        dst21_r = __msa_ilvr_h(dst2, dst9);
-        dst7_r = HEVC_FILT_4TAP(dst87_r, dst21_r, filt_h0, filt_h1);
-        dst7_r >>= 6;
-        PCKEV_H4_SH(dst1_r, dst0_r, dst3_r, dst2_r,
-                    dst5_r, dst4_r, dst7_r, dst6_r, tmp0, tmp1, tmp2, tmp3);
-        HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
-                           in0, in1, in2, in3,
-                           weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+        VSHF_B2_SB(src3, src7, src3, src7, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src8, src4, src8, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src5, src9, src5, src9, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src10, src6, src10, mask0, mask1, vec6, vec7);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST4x8_UB(dst0_r, dst1_r, dst, dst_stride);
+        dst73 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst84 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst95 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst106 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+
+        dst32_r = __msa_ilvr_h(dst73, dst22);
+        ILVRL_H2_SH(dst84, dst73, dst43_r, dst87_r);
+        ILVRL_H2_SH(dst95, dst84, dst54_r, dst98_r);
+        ILVRL_H2_SH(dst106, dst95, dst65_r, dst109_r);
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst73, 1);
+        dst76_r = __msa_ilvr_h(dst22, dst106);
+
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in0);
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in1);
+
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in2);
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in3);
+
+        dst0 = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst1 = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst2 = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst3 = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst4 = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
+        dst5 = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
+        dst6 = HEVC_FILT_4TAP(dst76_r, dst98_r, filt_h0, filt_h1);
+        dst7 = HEVC_FILT_4TAP(dst87_r, dst109_r, filt_h0, filt_h1);
+        SRA_4V(dst0, dst1, dst2, dst3, 6);
+        SRA_4V(dst4, dst5, dst6, dst7, 6);
+        PCKEV_H4_SW(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst1,
+                    dst2, dst3);
+        ILVRL_H2_SH(dst0, in0, tmp0, tmp1);
+        ILVRL_H2_SH(dst1, in1, tmp2, tmp3);
+        ILVRL_H2_SH(dst2, in2, tmp4, tmp5);
+        ILVRL_H2_SH(dst3, in3, tmp6, tmp7);
+        dst0 = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+        dst1 = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+        dst2 = __msa_dpadd_s_w(offset_vec, tmp2, weight_vec);
+        dst3 = __msa_dpadd_s_w(offset_vec, tmp3, weight_vec);
+        dst4 = __msa_dpadd_s_w(offset_vec, tmp4, weight_vec);
+        dst5 = __msa_dpadd_s_w(offset_vec, tmp5, weight_vec);
+        dst6 = __msa_dpadd_s_w(offset_vec, tmp6, weight_vec);
+        dst7 = __msa_dpadd_s_w(offset_vec, tmp7, weight_vec);
+        SRAR_W4_SW(dst0, dst1, dst2, dst3, rnd_vec);
+        SRAR_W4_SW(dst4, dst5, dst6, dst7, rnd_vec);
+        PCKEV_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, tmp0, tmp1,
+                    tmp2, tmp3);
+        CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+        PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST4x8_UB(out0, out1, dst, dst_stride);
         dst += (8 * dst_stride);
+
+        dst10_r = dst98_r;
+        dst21_r = dst109_r;
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst106, 1);
     }
 }
 
@@ -4717,13 +4830,11 @@
     if (2 == height) {
         hevc_hv_biwgt_4t_4x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter_x, filter_y,
-                                 height, weight0, weight1, offset0, offset1,
-                                 rnd_val);
+                                 weight0, weight1, offset0, offset1, rnd_val);
     } else if (4 == height) {
         hevc_hv_biwgt_4t_4x4_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter_x, filter_y,
-                                 height, weight0, weight1, offset0, offset1,
-                                 rnd_val);
+                                 weight0, weight1, offset0, offset1, rnd_val);
     } else if (0 == (height % 8)) {
         hevc_hv_biwgt_4t_4multx8mult_msa(src0_ptr, src_stride,
                                          src1_ptr, src2_stride,
@@ -4748,22 +4859,28 @@
                                     int32_t offset1,
                                     int32_t rnd_val)
 {
-    uint32_t loop_cnt;
+    uint32_t tpw0, tpw1, tpw2, tpw3;
+    uint64_t tp0, tp1;
     int32_t offset, weight;
-    v16i8 src0, src1, src2, src3, src4, src5, src6;
-    v8i16 in0, in1, in2, in3;
+    v16u8 out0, out1, out2;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v8i16 in0 = { 0 }, in1 = { 0 }, in2 = { 0 }, in3 = { 0 };
+    v8i16 in4 = { 0 }, in5 = { 0 };
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1, filter_vec;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
+    v8i16 dsth0, dsth1, dsth2, dsth3, dsth4, dsth5, dsth6, dsth7, dsth8, dsth9;
+    v8i16 dsth10, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, weight_vec;
+    v8i16 dst10_r, dst32_r, dst54_r, dst76_r, dst98_r, dst21_r, dst43_r;
+    v8i16 dst65_r, dst87_r, dst109_r, dst10_l, dst32_l, dst54_l, dst76_l;
+    v8i16 dst98_l, dst21_l, dst43_l, dst65_l, dst87_l, dst109_l;
+    v8i16 dst1021_l, dst3243_l, dst5465_l, dst7687_l, dst98109_l;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
-    v8i16 tmp0, tmp1, tmp2, tmp3;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
-    v8i16 dst10_l, dst32_l, dst21_l, dst43_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v4i32 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v4i32 dst4_r, dst5_r, dst6_r, dst7_r;
+    v4i32 offset_vec, rnd_vec, const_vec;
 
     src0_ptr -= (src_stride + 1);
 
@@ -4771,10 +4888,9 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
@@ -4782,11 +4898,12 @@
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
-    const_vec = __msa_ldi_h(128);
+    const_vec = __msa_fill_w((128 * weight1));
     const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
-    weight_vec = __msa_fill_w(weight);
+    weight_vec = (v8i16) __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
+    offset_vec += const_vec;
 
     LD_SB3(src0_ptr, src_stride, src0, src1, src2);
     src0_ptr += (3 * src_stride);
@@ -4795,74 +4912,120 @@
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+    dsth0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
 
-    ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
-    ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
+    ILVRL_H2_SH(dsth1, dsth0, dst10_r, dst10_l);
+    ILVRL_H2_SH(dsth2, dsth1, dst21_r, dst21_l);
 
-    for (loop_cnt = height >> 2; loop_cnt--;) {
-        LD_SB4(src0_ptr, src_stride, src3, src4, src5, src6);
-        src0_ptr += (4 * src_stride);
-        LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
-        src1_ptr += (4 * src2_stride);
-        XORI_B4_128_SB(src3, src4, src5, src6);
+    LD_SB8(src0_ptr, src_stride, src3, src4, src5, src6, src7, src8, src9,
+           src10);
+    XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
 
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
 
-        ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
-        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-        dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-        dst0_r >>= 6;
-        dst0_l >>= 6;
+    dsth3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dsth6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
 
-        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-        dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
+    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src9, src9, src9, src9, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src10, src10, src10, src10, mask0, mask1, vec6, vec7);
 
-        ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
-        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-        dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-        dst1_r >>= 6;
-        dst1_l >>= 6;
+    dsth7 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth8 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth9 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dsth10 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
 
-        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-        dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
+    ILVRL_H2_SH(dsth3, dsth2, dst32_r, dst32_l);
+    ILVRL_H2_SH(dsth4, dsth3, dst43_r, dst43_l);
+    ILVRL_H2_SH(dsth5, dsth4, dst54_r, dst54_l);
+    ILVRL_H2_SH(dsth6, dsth5, dst65_r, dst65_l);
+    ILVRL_H2_SH(dsth7, dsth6, dst76_r, dst76_l);
+    ILVRL_H2_SH(dsth8, dsth7, dst87_r, dst87_l);
+    ILVRL_H2_SH(dsth9, dsth8, dst98_r, dst98_l);
+    ILVRL_H2_SH(dsth10, dsth9, dst109_r, dst109_l);
+    PCKEV_D2_SH(dst21_l, dst10_l, dst43_l, dst32_l, dst1021_l, dst3243_l);
+    PCKEV_D2_SH(dst65_l, dst54_l, dst87_l, dst76_l, dst5465_l, dst7687_l);
+    dst98109_l = (v8i16) __msa_pckev_d((v2i64) dst109_l, (v2i64) dst98_l);
 
-        ILVRL_H2_SH(dst5, dst4, dst10_r, dst10_l);
-        dst2_r = HEVC_FILT_4TAP(dst32_r, dst10_r, filt_h0, filt_h1);
-        dst2_l = HEVC_FILT_4TAP(dst32_l, dst10_l, filt_h0, filt_h1);
-        dst2_r >>= 6;
-        dst2_l >>= 6;
-        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
+    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+    dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+    dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+    dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+    dst4_r = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
+    dst5_r = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
+    dst6_r = HEVC_FILT_4TAP(dst76_r, dst98_r, filt_h0, filt_h1);
+    dst7_r = HEVC_FILT_4TAP(dst87_r, dst109_r, filt_h0, filt_h1);
+    dst0_l = HEVC_FILT_4TAP(dst1021_l, dst3243_l, filt_h0, filt_h1);
+    dst1_l = HEVC_FILT_4TAP(dst3243_l, dst5465_l, filt_h0, filt_h1);
+    dst2_l = HEVC_FILT_4TAP(dst5465_l, dst7687_l, filt_h0, filt_h1);
+    dst3_l = HEVC_FILT_4TAP(dst7687_l, dst98109_l, filt_h0, filt_h1);
+    SRA_4V(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+    SRA_4V(dst4_r, dst5_r, dst6_r, dst7_r, 6);
+    SRA_4V(dst0_l, dst1_l, dst2_l, dst3_l, 6);
+    PCKEV_H2_SW(dst1_r, dst0_r, dst3_r, dst2_r, dst0, dst1);
+    PCKEV_H2_SW(dst5_r, dst4_r, dst7_r, dst6_r, dst2, dst3);
 
-        ILVRL_H2_SH(dst2, dst5, dst21_r, dst21_l);
-        dst3_r = HEVC_FILT_4TAP(dst43_r, dst21_r, filt_h0, filt_h1);
-        dst3_l = HEVC_FILT_4TAP(dst43_l, dst21_l, filt_h0, filt_h1);
-        dst3_r >>= 6;
-        dst3_l >>= 6;
-        PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r,
-                    dst2_l, dst2_r, dst3_l, dst3_r, tmp0, tmp1, tmp2, tmp3);
-        HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
-                           in0, in1, in2, in3,
-                           weight_vec, rnd_vec, offset_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r,
-                           dst0_l, dst1_l, dst2_l, dst3_l);
+    LD2(src1_ptr, src2_stride, tp0, tp1);
+    INSERT_D2_SH(tp0, tp1, in0);
+    LD2(src1_ptr + 2 * src2_stride, src2_stride, tp0, tp1);
+    INSERT_D2_SH(tp0, tp1, in1);
 
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST6x4_UB(dst0_r, dst1_r, dst, dst_stride);
-        dst += (4 * dst_stride);
-    }
+    LD2(src1_ptr + 4 * src2_stride, src2_stride, tp0, tp1);
+    INSERT_D2_SH(tp0, tp1, in2);
+    LD2(src1_ptr + 6 * src2_stride, src2_stride, tp0, tp1);
+    INSERT_D2_SH(tp0, tp1, in3);
+
+    ILVRL_H2_SH(dst0, in0, tmp0, tmp1);
+    ILVRL_H2_SH(dst1, in1, tmp2, tmp3);
+    ILVRL_H2_SH(dst2, in2, tmp4, tmp5);
+    ILVRL_H2_SH(dst3, in3, tmp6, tmp7);
+    dst0 = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+    dst1 = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+    dst2 = __msa_dpadd_s_w(offset_vec, tmp2, weight_vec);
+    dst3 = __msa_dpadd_s_w(offset_vec, tmp3, weight_vec);
+    dst4 = __msa_dpadd_s_w(offset_vec, tmp4, weight_vec);
+    dst5 = __msa_dpadd_s_w(offset_vec, tmp5, weight_vec);
+    dst6 = __msa_dpadd_s_w(offset_vec, tmp6, weight_vec);
+    dst7 = __msa_dpadd_s_w(offset_vec, tmp7, weight_vec);
+    SRAR_W4_SW(dst0, dst1, dst2, dst3, rnd_vec);
+    SRAR_W4_SW(dst4, dst5, dst6, dst7, rnd_vec);
+    PCKEV_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, tmp0, tmp1,
+                tmp2, tmp3);
+    CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+    PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+    ST4x8_UB(out0, out1, dst, dst_stride);
+
+    PCKEV_H2_SW(dst1_l, dst0_l, dst3_l, dst2_l, dst4, dst5);
+
+    LW4(src1_ptr + 4, src2_stride, tpw0, tpw1, tpw2, tpw3);
+    src1_ptr += (4 * src2_stride);
+    INSERT_W4_SH(tpw0, tpw1, tpw2, tpw3, in4);
+    LW4(src1_ptr + 4, src2_stride, tpw0, tpw1, tpw2, tpw3);
+    INSERT_W4_SH(tpw0, tpw1, tpw2, tpw3, in5);
+
+    ILVRL_H2_SH(dst4, in4, tmp0, tmp1);
+    ILVRL_H2_SH(dst5, in5, tmp2, tmp3);
+
+    dst0 = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+    dst1 = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+    dst2 = __msa_dpadd_s_w(offset_vec, tmp2, weight_vec);
+    dst3 = __msa_dpadd_s_w(offset_vec, tmp3, weight_vec);
+    SRAR_W4_SW(dst0, dst1, dst2, dst3, rnd_vec);
+    PCKEV_H2_SH(dst1, dst0, dst3, dst2, tmp4, tmp5);
+
+    CLIP_SH2_0_255_MAX_SATU(tmp4, tmp5);
+    out2 = (v16u8) __msa_pckev_b((v16i8) tmp5, (v16i8) tmp4);
+    ST2x4_UB(out2, 0, dst + 4, dst_stride);
+    dst += 4 * dst_stride;
+    ST2x4_UB(out2, 4, dst + 4, dst_stride);
 }
 
 static void hevc_hv_biwgt_4t_8x2_msa(uint8_t *src0_ptr,
@@ -4873,7 +5036,6 @@
                                      int32_t dst_stride,
                                      const int8_t *filter_x,
                                      const int8_t *filter_y,
-                                     int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
                                      int32_t offset0,
@@ -4881,20 +5043,21 @@
                                      int32_t rnd_val)
 {
     int32_t weight, offset;
+    v16u8 out;
     v16i8 src0, src1, src2, src3, src4;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v8i16 filter_vec, weight_vec;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
     v8i16 dst0, dst1, dst2, dst3, dst4;
     v8i16 in0, in1;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l;
     v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
     v8i16 dst10_l, dst32_l, dst21_l, dst43_l;
-    v8i16 tmp0, tmp1;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 tmp0, tmp1, tmp2, tmp3;
+    v4i32 offset_vec, rnd_vec, const_vec;
 
     src0_ptr -= (src_stride + 1);
 
@@ -4902,10 +5065,9 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
@@ -4913,61 +5075,177 @@
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
-    const_vec = __msa_ldi_h(128);
+    const_vec = __msa_fill_w((128 * weight1));
     const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
-    weight_vec = __msa_fill_w(weight);
+    weight_vec = (v8i16) __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
+    offset_vec += const_vec;
 
-    LD_SB3(src0_ptr, src_stride, src0, src1, src2);
-    src0_ptr += (3 * src_stride);
-    XORI_B3_128_SB(src0, src1, src2);
+    LD_SB5(src0_ptr, src_stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+
+    LD_SH2(src1_ptr, src2_stride, in0, in1);
 
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec8, vec9);
+
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+    dst4 = HEVC_FILT_4TAP_SH(vec8, vec9, filt0, filt1);
 
     ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
     ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
-
-    LD_SB2(src0_ptr, src_stride, src3, src4);
-
-    LD_SH2(src1_ptr, src2_stride, in0, in1);
-    XORI_B2_128_SB(src3, src4);
-
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
     ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
     dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
     dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-    dst0_r >>= 6;
-    dst0_l >>= 6;
-    tmp0 = __msa_pckev_h((v8i16) dst0_l, (v8i16) dst0_r);
-
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
     dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
     dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-    dst1_r >>= 6;
-    dst1_l >>= 6;
-    tmp1 = __msa_pckev_h((v8i16) dst1_l, (v8i16) dst1_r);
+    SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+    PCKEV_H2_SH(dst0_l, dst0_r, dst1_l, dst1_r, tmp1, tmp3);
 
-    HEVC_BIW_RND_CLIP2(tmp0, tmp1, in0, in1,
-                       weight_vec, rnd_vec, offset_vec,
-                       dst0_r, dst1_r, dst0_l, dst1_l);
-    HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-    ST8x2_UB(dst0_r, dst, dst_stride);
+    ILVRL_H2_SH(tmp1, in0, tmp0, tmp1);
+    ILVRL_H2_SH(tmp3, in1, tmp2, tmp3);
+
+    dst0_r = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+    dst0_l = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+    dst1_r = __msa_dpadd_s_w(offset_vec, tmp2, weight_vec);
+    dst1_l = __msa_dpadd_s_w(offset_vec, tmp3, weight_vec);
+    SRAR_W4_SW(dst0_r, dst0_l, dst1_r, dst1_l, rnd_vec);
+    PCKEV_H2_SH(dst0_l, dst0_r, dst1_l, dst1_r, tmp0, tmp1);
+    CLIP_SH2_0_255_MAX_SATU(tmp0, tmp1);
+    out = (v16u8) __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+    ST8x2_UB(out, dst, dst_stride);
+}
+
+static void hevc_hv_biwgt_4t_8multx4_msa(uint8_t *src0_ptr,
+                                         int32_t src_stride,
+                                         int16_t *src1_ptr,
+                                         int32_t src2_stride,
+                                         uint8_t *dst,
+                                         int32_t dst_stride,
+                                         const int8_t *filter_x,
+                                         const int8_t *filter_y,
+                                         int32_t weight0,
+                                         int32_t weight1,
+                                         int32_t offset0,
+                                         int32_t offset1,
+                                         int32_t rnd_val,
+                                         int32_t width8mult)
+{
+    int32_t weight, offset;
+    uint32_t cnt;
+    v16u8 out0, out1;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, mask0, mask1;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 filt0, filt1, filt_h0, filt_h1, filter_vec, weight_vec;
+    v8i16 dsth0, dsth1, dsth2, dsth3, dsth4, dsth5, dsth6;
+    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, in0, in1, in2, in3;
+    v8i16 dst10_r, dst32_r, dst54_r, dst21_r, dst43_r, dst65_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst21_l, dst43_l, dst65_l;
+    v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
+    v4i32 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v4i32 offset_vec, rnd_vec, const_vec;
+
+    src0_ptr -= (src_stride + 1);
+
+    filter_vec = LD_SH(filter_x);
+    SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
+
+    filter_vec = LD_SH(filter_y);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+
+    offset = (offset0 + offset1) << rnd_val;
+    weight0 = weight0 & 0x0000FFFF;
+    weight = weight0 | (weight1 << 16);
+
+    const_vec = __msa_fill_w((128 * weight1));
+    const_vec <<= 6;
+    offset_vec = __msa_fill_w(offset);
+    rnd_vec = __msa_fill_w(rnd_val + 1);
+    offset_vec += const_vec;
+    weight_vec = (v8i16) __msa_fill_w(weight);
+
+    for (cnt = width8mult; cnt--;) {
+        LD_SB7(src0_ptr, src_stride, src0, src1, src2, src3, src4, src5, src6);
+        src0_ptr += 8;
+        XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+
+        LD_SH4(src1_ptr, src2_stride, in0, in1, in2, in3);
+        src1_ptr += 8;
+
+        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+
+        dsth0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dsth1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dsth2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+
+        ILVRL_H2_SH(dsth1, dsth0, dst10_r, dst10_l);
+        ILVRL_H2_SH(dsth2, dsth1, dst21_r, dst21_l);
+
+        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
+
+        dsth3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dsth4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dsth5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dsth6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+
+        ILVRL_H2_SH(dsth3, dsth2, dst32_r, dst32_l);
+        ILVRL_H2_SH(dsth4, dsth3, dst43_r, dst43_l);
+        ILVRL_H2_SH(dsth5, dsth4, dst54_r, dst54_l);
+        ILVRL_H2_SH(dsth6, dsth5, dst65_r, dst65_l);
+
+        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
+        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
+        dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
+        dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
+
+        SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+        SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+        PCKEV_H4_SW(dst0_l, dst0_r, dst1_l, dst1_r, dst2_l, dst2_r, dst3_l,
+                    dst3_r, dst0, dst1, dst2, dst3);
+
+        ILVRL_H2_SH(dst0, in0, tmp0, tmp1);
+        ILVRL_H2_SH(dst1, in1, tmp2, tmp3);
+        ILVRL_H2_SH(dst2, in2, tmp4, tmp5);
+        ILVRL_H2_SH(dst3, in3, tmp6, tmp7);
+        dst0 = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+        dst1 = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+        dst2 = __msa_dpadd_s_w(offset_vec, tmp2, weight_vec);
+        dst3 = __msa_dpadd_s_w(offset_vec, tmp3, weight_vec);
+        dst4 = __msa_dpadd_s_w(offset_vec, tmp4, weight_vec);
+        dst5 = __msa_dpadd_s_w(offset_vec, tmp5, weight_vec);
+        dst6 = __msa_dpadd_s_w(offset_vec, tmp6, weight_vec);
+        dst7 = __msa_dpadd_s_w(offset_vec, tmp7, weight_vec);
+        SRAR_W4_SW(dst0, dst1, dst2, dst3, rnd_vec);
+        SRAR_W4_SW(dst4, dst5, dst6, dst7, rnd_vec);
+        PCKEV_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6,
+                    tmp0, tmp1, tmp2, tmp3);
+        CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+        PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += 8;
+    }
 }
 
 static void hevc_hv_biwgt_4t_8x6_msa(uint8_t *src0_ptr,
@@ -4978,7 +5256,6 @@
                                      int32_t dst_stride,
                                      const int8_t *filter_x,
                                      const int8_t *filter_y,
-                                     int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
                                      int32_t offset0,
@@ -4986,14 +5263,16 @@
                                      int32_t rnd_val)
 {
     uint32_t offset, weight;
+    v16u8 out0, out1, out2;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8;
+    v8i16 filter_vec, weight_vec;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
+    v16i8 vec10, vec11, vec12, vec13, vec14, vec15, vec16, vec17;
+    v8i16 dsth0, dsth1, dsth2, dsth3, dsth4, dsth5, dsth6, dsth7, dsth8;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
     v4i32 dst4_r, dst4_l, dst5_r, dst5_l;
     v8i16 dst10_r, dst32_r, dst10_l, dst32_l;
@@ -5001,8 +5280,9 @@
     v8i16 dst54_r, dst54_l, dst65_r, dst65_l;
     v8i16 dst76_r, dst76_l, dst87_r, dst87_l;
     v8i16 in0, in1, in2, in3, in4, in5;
-    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    v4i32 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v4i32 offset_vec, rnd_vec, const_vec;
 
     src0_ptr -= (src_stride + 1);
 
@@ -5010,10 +5290,9 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
@@ -5021,123 +5300,103 @@
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
-    const_vec = __msa_ldi_h(128);
+    const_vec = __msa_fill_w((128 * weight1));
     const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
-    weight_vec = __msa_fill_w(weight);
+    weight_vec = (v8i16) __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
+    offset_vec += const_vec;
 
-    LD_SB3(src0_ptr, src_stride, src0, src1, src2);
-    src0_ptr += (3 * src_stride);
-    XORI_B3_128_SB(src0, src1, src2);
+    LD_SB5(src0_ptr, src_stride, src0, src1, src2, src3, src4);
+    src0_ptr += (5 * src_stride);
+    LD_SB4(src0_ptr, src_stride, src5, src6, src7, src8);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    XORI_B4_128_SB(src5, src6, src7, src8);
+
+    LD_SH6(src1_ptr, src2_stride, in0, in1, in2, in3, in4, in5);
 
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec8, vec9);
+    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec10, vec11);
+    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec12, vec13);
+    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec14, vec15);
+    VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec16, vec17);
 
-    ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
-    ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
+    dsth0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dsth3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+    dsth4 = HEVC_FILT_4TAP_SH(vec8, vec9, filt0, filt1);
+    dsth5 = HEVC_FILT_4TAP_SH(vec10, vec11, filt0, filt1);
+    dsth6 = HEVC_FILT_4TAP_SH(vec12, vec13, filt0, filt1);
+    dsth7 = HEVC_FILT_4TAP_SH(vec14, vec15, filt0, filt1);
+    dsth8 = HEVC_FILT_4TAP_SH(vec16, vec17, filt0, filt1);
 
-    LD_SB2(src0_ptr, src_stride, src3, src4);
-    src0_ptr += (2 * src_stride);
-    XORI_B2_128_SB(src3, src4);
-    LD_SH6(src1_ptr, src2_stride, in0, in1, in2, in3, in4, in5);
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+    ILVRL_H2_SH(dsth1, dsth0, dst10_r, dst10_l);
+    ILVRL_H2_SH(dsth2, dsth1, dst21_r, dst21_l);
+    ILVRL_H2_SH(dsth3, dsth2, dst32_r, dst32_l);
+    ILVRL_H2_SH(dsth4, dsth3, dst43_r, dst43_l);
+    ILVRL_H2_SH(dsth5, dsth4, dst54_r, dst54_l);
+    ILVRL_H2_SH(dsth6, dsth5, dst65_r, dst65_l);
+    ILVRL_H2_SH(dsth7, dsth6, dst76_r, dst76_l);
+    ILVRL_H2_SH(dsth8, dsth7, dst87_r, dst87_l);
 
-    ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
     dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
     dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-    dst0_r >>= 6;
-    dst0_l >>= 6;
-    tmp0 = __msa_pckev_h((v8i16) dst0_l, (v8i16) dst0_r);
-
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
     dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
     dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-    dst1_r >>= 6;
-    dst1_l >>= 6;
-    tmp1 = __msa_pckev_h((v8i16) dst1_l, (v8i16) dst1_r);
-
-    LD_SB2(src0_ptr, src_stride, src5, src6);
-    src0_ptr += (2 * src_stride);
-    XORI_B2_128_SB(src5, src6);
-
-    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-    dst5 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-
-    ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
     dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
     dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
-    dst2_r >>= 6;
-    dst2_l >>= 6;
-    tmp2 = __msa_pckev_h((v8i16) dst2_l, (v8i16) dst2_r);
-
-    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-    dst6 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst6, dst6);
-
-    ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
     dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
     dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
-    dst3_r >>= 6;
-    dst3_l >>= 6;
-    tmp3 = __msa_pckev_h((v8i16) dst3_l, (v8i16) dst3_r);
-
-    HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
-                       in0, in1, in2, in3,
-                       weight_vec, rnd_vec, offset_vec,
-                       dst0_r, dst1_r, dst2_r, dst3_r,
-                       dst0_l, dst1_l, dst2_l, dst3_l);
-
-    HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                    dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-    ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
-    dst += (4 * dst_stride);
-
-    LD_SB2(src0_ptr, src_stride, src7, src8);
-    XORI_B2_128_SB(src7, src8);
-
-    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
-    dst7 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst7, dst7);
-
-    ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
     dst4_r = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
     dst4_l = HEVC_FILT_4TAP(dst54_l, dst76_l, filt_h0, filt_h1);
-    dst4_r >>= 6;
-    dst4_l >>= 6;
-    tmp4 = __msa_pckev_h((v8i16) dst4_l, (v8i16) dst4_r);
-
-    VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec0, vec1);
-    dst8 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst8, dst8);
-
-    ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l);
     dst5_r = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
     dst5_l = HEVC_FILT_4TAP(dst65_l, dst87_l, filt_h0, filt_h1);
-    dst5_r >>= 6;
-    dst5_l >>= 6;
-    tmp5 = __msa_pckev_h((v8i16) dst5_l, (v8i16) dst5_r);
 
-    HEVC_BIW_RND_CLIP2(tmp4, tmp5, in4, in5,
-                       weight_vec, rnd_vec, offset_vec,
-                       dst4_r, dst5_r, dst4_l, dst5_l);
+    SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+    SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+    SRA_4V(dst4_r, dst4_l, dst5_r, dst5_l, 6);
+    PCKEV_H4_SW(dst0_l, dst0_r, dst1_l, dst1_r, dst2_l, dst2_r, dst3_l, dst3_r,
+                dst0, dst1, dst2, dst3);
 
-    HEVC_PCK_SW_SB4(dst4_l, dst4_r, dst5_l, dst5_r, dst2_r);
-    ST8x2_UB(dst2_r, dst, dst_stride);
+    ILVRL_H2_SH(dst0, in0, tmp0, tmp1);
+    ILVRL_H2_SH(dst1, in1, tmp2, tmp3);
+    ILVRL_H2_SH(dst2, in2, tmp4, tmp5);
+    ILVRL_H2_SH(dst3, in3, tmp6, tmp7);
+    dst0 = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+    dst1 = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+    dst2 = __msa_dpadd_s_w(offset_vec, tmp2, weight_vec);
+    dst3 = __msa_dpadd_s_w(offset_vec, tmp3, weight_vec);
+    dst4 = __msa_dpadd_s_w(offset_vec, tmp4, weight_vec);
+    dst5 = __msa_dpadd_s_w(offset_vec, tmp5, weight_vec);
+    dst6 = __msa_dpadd_s_w(offset_vec, tmp6, weight_vec);
+    dst7 = __msa_dpadd_s_w(offset_vec, tmp7, weight_vec);
+    SRAR_W4_SW(dst0, dst1, dst2, dst3, rnd_vec);
+    SRAR_W4_SW(dst4, dst5, dst6, dst7, rnd_vec);
+    PCKEV_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6,
+                tmp0, tmp1, tmp2, tmp3);
+    CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+    PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+
+    PCKEV_H2_SW(dst4_l, dst4_r, dst5_l, dst5_r, dst0, dst1);
+    ILVRL_H2_SH(dst0, in4, tmp0, tmp1);
+    ILVRL_H2_SH(dst1, in5, tmp2, tmp3);
+    dst0 = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+    dst1 = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+    dst2 = __msa_dpadd_s_w(offset_vec, tmp2, weight_vec);
+    dst3 = __msa_dpadd_s_w(offset_vec, tmp3, weight_vec);
+    SRAR_W4_SW(dst0, dst1, dst2, dst3, rnd_vec);
+    PCKEV_H2_SH(dst1, dst0, dst3, dst2, tmp4, tmp5);
+    CLIP_SH2_0_255_MAX_SATU(tmp4, tmp5);
+    out2 = (v16u8) __msa_pckev_b((v16i8) tmp5, (v16i8) tmp4);
+    ST8x4_UB(out0, out1, dst, dst_stride);
+    dst += (4 * dst_stride);
+    ST8x2_UB(out2, dst, dst_stride);
 }
 
 static void hevc_hv_biwgt_4t_8multx4mult_msa(uint8_t *src0_ptr,
@@ -5162,20 +5421,22 @@
     uint8_t *src0_ptr_tmp;
     int16_t *src1_ptr_tmp;
     uint8_t *dst_tmp;
+    v16u8 out0, out1;
     v16i8 src0, src1, src2, src3, src4, src5, src6;
     v8i16 in0, in1, in2, in3;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
+    v8i16 filter_vec;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 dsth0, dsth1, dsth2, dsth3, dsth4, dsth5, dsth6;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
-    v8i16 tmp0, tmp1, tmp2, tmp3;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
-    v8i16 dst10_l, dst32_l, dst21_l, dst43_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    v4i32 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 dst10_r, dst32_r, dst54_r, dst21_r, dst43_r, dst65_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst21_l, dst43_l, dst65_l, weight_vec;
+    v4i32 offset_vec, rnd_vec, const_vec;
 
     src0_ptr -= (src_stride + 1);
 
@@ -5183,10 +5444,9 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
@@ -5194,11 +5454,12 @@
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
-    const_vec = __msa_ldi_h(128);
+    const_vec = __msa_fill_w((128 * weight1));
     const_vec <<= 6;
     offset_vec = __msa_fill_w(offset);
-    weight_vec = __msa_fill_w(weight);
+    weight_vec = (v8i16) __msa_fill_w(weight);
     rnd_vec = __msa_fill_w(rnd_val + 1);
+    offset_vec += const_vec;
 
     for (cnt = width >> 3; cnt--;) {
         src0_ptr_tmp = src0_ptr;
@@ -5212,15 +5473,12 @@
         VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
         VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
         VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+        dsth0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dsth1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dsth2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
 
-        ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
-        ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
+        ILVRL_H2_SH(dsth1, dsth0, dst10_r, dst10_l);
+        ILVRL_H2_SH(dsth2, dsth1, dst21_r, dst21_l);
 
         for (loop_cnt = height >> 2; loop_cnt--;) {
             LD_SB4(src0_ptr_tmp, src_stride, src3, src4, src5, src6);
@@ -5230,57 +5488,59 @@
             XORI_B4_128_SB(src3, src4, src5, src6);
 
             VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-            dst3 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+            VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+            VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+            VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
 
-            ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+            dsth3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+            dsth4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+            dsth5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+            dsth6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+
+            ILVRL_H2_SH(dsth3, dsth2, dst32_r, dst32_l);
+            ILVRL_H2_SH(dsth4, dsth3, dst43_r, dst43_l);
+            ILVRL_H2_SH(dsth5, dsth4, dst54_r, dst54_l);
+            ILVRL_H2_SH(dsth6, dsth5, dst65_r, dst65_l);
+
             dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
             dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-            dst0_r >>= 6;
-            dst0_l >>= 6;
-
-            VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-            dst4 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-            ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
             dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
             dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-            dst1_r >>= 6;
-            dst1_l >>= 6;
+            dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+            dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
+            dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+            dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
 
-            VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-            dst5 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-
-            ILVRL_H2_SH(dst5, dst4, dst10_r, dst10_l);
-            dst2_r = HEVC_FILT_4TAP(dst32_r, dst10_r, filt_h0, filt_h1);
-            dst2_l = HEVC_FILT_4TAP(dst32_l, dst10_l, filt_h0, filt_h1);
-            dst2_r >>= 6;
-            dst2_l >>= 6;
-
-            VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-            dst2 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-            ILVRL_H2_SH(dst2, dst5, dst21_r, dst21_l);
-            dst3_r = HEVC_FILT_4TAP(dst43_r, dst21_r, filt_h0, filt_h1);
-            dst3_l = HEVC_FILT_4TAP(dst43_l, dst21_l, filt_h0, filt_h1);
-            dst3_r >>= 6;
-            dst3_l >>= 6;
-
-            PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, tmp0, tmp1, tmp2, tmp3);
-            HEVC_BIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
-                               in0, in1, in2, in3,
-                               weight_vec, rnd_vec, offset_vec,
-                               dst0_r, dst1_r, dst2_r, dst3_r,
-                               dst0_l, dst1_l, dst2_l, dst3_l);
-
-            HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                            dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-            ST8x4_UB(dst0_r, dst1_r, dst_tmp, dst_stride);
+            SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+            SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+            PCKEV_H4_SW(dst0_l, dst0_r, dst1_l, dst1_r, dst2_l, dst2_r, dst3_l,
+                        dst3_r, dst0, dst1, dst2, dst3);
+            ILVRL_H2_SH(dst0, in0, tmp0, tmp1);
+            ILVRL_H2_SH(dst1, in1, tmp2, tmp3);
+            ILVRL_H2_SH(dst2, in2, tmp4, tmp5);
+            ILVRL_H2_SH(dst3, in3, tmp6, tmp7);
+            dst0 = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+            dst1 = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+            dst2 = __msa_dpadd_s_w(offset_vec, tmp2, weight_vec);
+            dst3 = __msa_dpadd_s_w(offset_vec, tmp3, weight_vec);
+            dst4 = __msa_dpadd_s_w(offset_vec, tmp4, weight_vec);
+            dst5 = __msa_dpadd_s_w(offset_vec, tmp5, weight_vec);
+            dst6 = __msa_dpadd_s_w(offset_vec, tmp6, weight_vec);
+            dst7 = __msa_dpadd_s_w(offset_vec, tmp7, weight_vec);
+            SRAR_W4_SW(dst0, dst1, dst2, dst3, rnd_vec);
+            SRAR_W4_SW(dst4, dst5, dst6, dst7, rnd_vec);
+            PCKEV_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6,
+                        tmp0, tmp1, tmp2, tmp3);
+            CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+            PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+            ST8x4_UB(out0, out1, dst_tmp, dst_stride);
             dst_tmp += (4 * dst_stride);
+
+            dst10_r = dst54_r;
+            dst10_l = dst54_l;
+            dst21_r = dst65_r;
+            dst21_l = dst65_l;
+            dsth2 = dsth6;
         }
 
         src0_ptr += 8;
@@ -5307,13 +5567,16 @@
     if (2 == height) {
         hevc_hv_biwgt_4t_8x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter_x, filter_y,
-                                 height, weight0, weight1, offset0, offset1,
-                                 rnd_val);
+                                 weight0, weight1, offset0, offset1, rnd_val);
+    } else if (4 == height) {
+        hevc_hv_biwgt_4t_8multx4_msa(src0_ptr, src_stride, src1_ptr,
+                                     src2_stride, dst, dst_stride, filter_x,
+                                     filter_y, weight0, weight1, offset0,
+                                     offset1, rnd_val, 1);
     } else if (6 == height) {
         hevc_hv_biwgt_4t_8x6_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter_x, filter_y,
-                                 height, weight0, weight1, offset0, offset1,
-                                 rnd_val);
+                                 weight0, weight1, offset0, offset1, rnd_val);
     } else if (0 == (height % 4)) {
         hevc_hv_biwgt_4t_8multx4mult_msa(src0_ptr, src_stride,
                                          src1_ptr, src2_stride,
@@ -5338,16 +5601,228 @@
                                      int32_t offset1,
                                      int32_t rnd_val)
 {
-    hevc_hv_biwgt_4t_8multx4mult_msa(src0_ptr, src_stride,
-                                     src1_ptr, src2_stride,
-                                     dst, dst_stride,
-                                     filter_x, filter_y, height, weight0,
-                                     weight1, offset0, offset1, rnd_val, 8);
+    uint32_t loop_cnt;
+    uint64_t tp0, tp1;
+    int32_t offset, weight;
+    uint8_t *src0_ptr_tmp, *dst_tmp;
+    int16_t *src1_ptr_tmp;
+    v16u8 out0, out1;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v16i8 mask0, mask1, mask2, mask3;
+    v8i16 filt0, filt1, filt_h0, filt_h1, filter_vec;
+    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    v8i16 dsth0, dsth1, dsth2, dsth3, dsth4, dsth5, dsth6, weight_vec;
+    v8i16 dst10, dst21, dst22, dst73, dst84, dst95, dst106;
+    v8i16 dst76_r, dst98_r, dst87_r, dst109_r;
+    v8i16 in0 = { 0 }, in1 = { 0 }, in2 = { 0 }, in3 = { 0 };
+    v8i16 dst10_r, dst32_r, dst54_r, dst21_r, dst43_r, dst65_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst21_l, dst43_l, dst65_l;
+    v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
+    v4i32 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v4i32 offset_vec, rnd_vec, const_vec;
 
-    hevc_hv_biwgt_4t_4w_msa(src0_ptr + 8, src_stride, src1_ptr + 8, src2_stride,
-                            dst + 8, dst_stride, filter_x, filter_y,
-                            height, weight0, weight1, offset0,
-                            offset1, rnd_val);
+    src0_ptr -= (src_stride + 1);
+
+    filter_vec = LD_SH(filter_x);
+    SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
+
+    filter_vec = LD_SH(filter_y);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+
+    offset = (offset0 + offset1) << rnd_val;
+    weight0 = weight0 & 0x0000FFFF;
+    weight = weight0 | (weight1 << 16);
+
+    const_vec = __msa_fill_w((128 * weight1));
+    const_vec <<= 6;
+    offset_vec = __msa_fill_w(offset);
+    rnd_vec = __msa_fill_w(rnd_val + 1);
+    offset_vec += const_vec;
+    weight_vec = (v8i16) __msa_fill_w(weight);
+
+    src0_ptr_tmp = src0_ptr;
+    dst_tmp = dst;
+    src1_ptr_tmp = src1_ptr;
+
+    LD_SB3(src0_ptr_tmp, src_stride, src0, src1, src2);
+    src0_ptr_tmp += (3 * src_stride);
+
+    XORI_B3_128_SB(src0, src1, src2);
+
+    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+
+    dsth0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+
+    ILVRL_H2_SH(dsth1, dsth0, dst10_r, dst10_l);
+    ILVRL_H2_SH(dsth2, dsth1, dst21_r, dst21_l);
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src0_ptr_tmp, src_stride, src3, src4, src5, src6);
+        src0_ptr_tmp += (4 * src_stride);
+        XORI_B4_128_SB(src3, src4, src5, src6);
+
+        LD_SH4(src1_ptr_tmp, src2_stride, in0, in1, in2, in3);
+        src1_ptr_tmp += (4 * src2_stride);
+
+        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
+
+        dsth3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dsth4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dsth5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dsth6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+
+        ILVRL_H2_SH(dsth3, dsth2, dst32_r, dst32_l);
+        ILVRL_H2_SH(dsth4, dsth3, dst43_r, dst43_l);
+        ILVRL_H2_SH(dsth5, dsth4, dst54_r, dst54_l);
+        ILVRL_H2_SH(dsth6, dsth5, dst65_r, dst65_l);
+
+        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
+        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
+        dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
+        dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
+
+        SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+        SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+        PCKEV_H4_SW(dst0_l, dst0_r, dst1_l, dst1_r, dst2_l, dst2_r, dst3_l,
+                    dst3_r, dst0, dst1, dst2, dst3);
+        ILVRL_H2_SH(dst0, in0, tmp0, tmp1);
+        ILVRL_H2_SH(dst1, in1, tmp2, tmp3);
+        ILVRL_H2_SH(dst2, in2, tmp4, tmp5);
+        ILVRL_H2_SH(dst3, in3, tmp6, tmp7);
+        dst0 = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+        dst1 = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+        dst2 = __msa_dpadd_s_w(offset_vec, tmp2, weight_vec);
+        dst3 = __msa_dpadd_s_w(offset_vec, tmp3, weight_vec);
+        dst4 = __msa_dpadd_s_w(offset_vec, tmp4, weight_vec);
+        dst5 = __msa_dpadd_s_w(offset_vec, tmp5, weight_vec);
+        dst6 = __msa_dpadd_s_w(offset_vec, tmp6, weight_vec);
+        dst7 = __msa_dpadd_s_w(offset_vec, tmp7, weight_vec);
+        SRAR_W4_SW(dst0, dst1, dst2, dst3, rnd_vec);
+        SRAR_W4_SW(dst4, dst5, dst6, dst7, rnd_vec);
+        PCKEV_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6,
+                    tmp0, tmp1, tmp2, tmp3);
+        CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+        PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST8x4_UB(out0, out1, dst_tmp, dst_stride);
+        dst_tmp += (4 * dst_stride);
+
+        dst10_r = dst54_r;
+        dst10_l = dst54_l;
+        dst21_r = dst65_r;
+        dst21_l = dst65_l;
+        dsth2 = dsth6;
+    }
+
+    src0_ptr += 8;
+    dst += 8;
+    src1_ptr += 8;
+
+    mask2 = LD_SB(ff_hevc_mask_arr + 16);
+    mask3 = mask2 + 2;
+
+    LD_SB3(src0_ptr, src_stride, src0, src1, src2);
+    src0_ptr += (3 * src_stride);
+    XORI_B3_128_SB(src0, src1, src2);
+    VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
+    VSHF_B2_SB(src1, src2, src1, src2, mask2, mask3, vec2, vec3);
+
+    dst10 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst21 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+
+    ILVRL_H2_SH(dst21, dst10, dst10_r, dst21_r);
+    dst22 = (v8i16) __msa_splati_d((v2i64) dst21, 1);
+
+    for (loop_cnt = 2; loop_cnt--;) {
+        LD_SB8(src0_ptr, src_stride, src3, src4, src5, src6, src7, src8, src9,
+               src10);
+        src0_ptr += (8 * src_stride);
+        XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
+        VSHF_B2_SB(src3, src7, src3, src7, mask2, mask3, vec0, vec1);
+        VSHF_B2_SB(src4, src8, src4, src8, mask2, mask3, vec2, vec3);
+        VSHF_B2_SB(src5, src9, src5, src9, mask2, mask3, vec4, vec5);
+        VSHF_B2_SB(src6, src10, src6, src10, mask2, mask3, vec6, vec7);
+
+        dst73 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst84 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst95 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst106 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+
+        dst32_r = __msa_ilvr_h(dst73, dst22);
+        ILVRL_H2_SH(dst84, dst73, dst43_r, dst87_r);
+        ILVRL_H2_SH(dst95, dst84, dst54_r, dst98_r);
+        ILVRL_H2_SH(dst106, dst95, dst65_r, dst109_r);
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst73, 1);
+        dst76_r = __msa_ilvr_h(dst22, dst106);
+
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in0);
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in1);
+
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in2);
+        LD2(src1_ptr, src2_stride, tp0, tp1);
+        src1_ptr += 2 * src2_stride;
+        INSERT_D2_SH(tp0, tp1, in3);
+
+        dst0 = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst1 = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst2 = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst3 = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst4 = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
+        dst5 = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
+        dst6 = HEVC_FILT_4TAP(dst76_r, dst98_r, filt_h0, filt_h1);
+        dst7 = HEVC_FILT_4TAP(dst87_r, dst109_r, filt_h0, filt_h1);
+
+        SRA_4V(dst0, dst1, dst2, dst3, 6);
+        SRA_4V(dst4, dst5, dst6, dst7, 6);
+        PCKEV_H4_SW(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6,
+                    dst0, dst1, dst2, dst3);
+        ILVRL_H2_SH(dst0, in0, tmp0, tmp1);
+        ILVRL_H2_SH(dst1, in1, tmp2, tmp3);
+        ILVRL_H2_SH(dst2, in2, tmp4, tmp5);
+        ILVRL_H2_SH(dst3, in3, tmp6, tmp7);
+        dst0 = __msa_dpadd_s_w(offset_vec, tmp0, weight_vec);
+        dst1 = __msa_dpadd_s_w(offset_vec, tmp1, weight_vec);
+        dst2 = __msa_dpadd_s_w(offset_vec, tmp2, weight_vec);
+        dst3 = __msa_dpadd_s_w(offset_vec, tmp3, weight_vec);
+        dst4 = __msa_dpadd_s_w(offset_vec, tmp4, weight_vec);
+        dst5 = __msa_dpadd_s_w(offset_vec, tmp5, weight_vec);
+        dst6 = __msa_dpadd_s_w(offset_vec, tmp6, weight_vec);
+        dst7 = __msa_dpadd_s_w(offset_vec, tmp7, weight_vec);
+        SRAR_W4_SW(dst0, dst1, dst2, dst3, rnd_vec);
+        SRAR_W4_SW(dst4, dst5, dst6, dst7, rnd_vec);
+        PCKEV_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6,
+                    tmp0, tmp1, tmp2, tmp3);
+        CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+        PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST4x8_UB(out0, out1, dst, dst_stride);
+        dst += (8 * dst_stride);
+
+        dst10_r = dst98_r;
+        dst21_r = dst109_r;
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst106, 1);
+    }
 }
 
 static void hevc_hv_biwgt_4t_16w_msa(uint8_t *src0_ptr,
@@ -5365,11 +5840,17 @@
                                      int32_t offset1,
                                      int32_t rnd_val)
 {
-    hevc_hv_biwgt_4t_8multx4mult_msa(src0_ptr, src_stride,
-                                     src1_ptr, src2_stride,
-                                     dst, dst_stride,
-                                     filter_x, filter_y, height, weight0,
-                                     weight1, offset0, offset1, rnd_val, 16);
+    if (4 == height) {
+        hevc_hv_biwgt_4t_8multx4_msa(src0_ptr, src_stride, src1_ptr,
+                                     src2_stride, dst, dst_stride, filter_x,
+                                     filter_y, weight0, weight1, offset0,
+                                     offset1, rnd_val, 2);
+    } else {
+        hevc_hv_biwgt_4t_8multx4mult_msa(src0_ptr, src_stride, src1_ptr,
+                                         src2_stride, dst, dst_stride,
+                                         filter_x, filter_y, height, weight0,
+                                         weight1, offset0, offset1, rnd_val, 16);
+    }
 }
 
 static void hevc_hv_biwgt_4t_24w_msa(uint8_t *src0_ptr,
@@ -5453,35 +5934,32 @@
 
 #undef BI_W_MC_COPY
 
-#define BI_W_MC(PEL, DIR, WIDTH, TAP, DIR1, FILT_DIR)                          \
-void ff_hevc_put_hevc_bi_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,          \
-                                                          ptrdiff_t            \
-                                                          dst_stride,          \
-                                                          uint8_t *src,        \
-                                                          ptrdiff_t            \
-                                                          src_stride,          \
-                                                          int16_t *src_16bit,  \
-                                                          int height,          \
-                                                          int denom,           \
-                                                          int weight0,         \
-                                                          int weight1,         \
-                                                          int offset0,         \
-                                                          int offset1,         \
-                                                          intptr_t mx,         \
-                                                          intptr_t my,         \
-                                                          int width)           \
-{                                                                              \
-    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];              \
-                                                                               \
-    int shift = 14 + 1 - 8;                                                    \
-    int log2Wd = denom + shift - 1;                                            \
-                                                                               \
-    hevc_##DIR1##_biwgt_##TAP##t_##WIDTH##w_msa(src, src_stride,               \
-                                                src_16bit, MAX_PB_SIZE,        \
-                                                dst, dst_stride,               \
-                                                filter, height,                \
-                                                weight0, weight1, offset0,     \
-                                                offset1, log2Wd);              \
+#define BI_W_MC(PEL, DIR, WIDTH, TAP, DIR1, FILT_DIR)                         \
+void ff_hevc_put_hevc_bi_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,         \
+                                                        ptrdiff_t             \
+                                                        dst_stride,           \
+                                                        uint8_t *src,         \
+                                                        ptrdiff_t             \
+                                                        src_stride,           \
+                                                        int16_t *src_16bit,   \
+                                                        int height,           \
+                                                        int denom,            \
+                                                        int weight0,          \
+                                                        int weight1,          \
+                                                        int offset0,          \
+                                                        int offset1,          \
+                                                        intptr_t mx,          \
+                                                        intptr_t my,          \
+                                                        int width)            \
+{                                                                             \
+    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];             \
+    int log2Wd = denom + 14 - 8;                                              \
+                                                                              \
+    hevc_##DIR1##_biwgt_##TAP##t_##WIDTH##w_msa(src, src_stride, src_16bit,   \
+                                                MAX_PB_SIZE, dst, dst_stride, \
+                                                filter, height, weight0,      \
+                                                weight1, offset0, offset1,    \
+                                                log2Wd);                      \
 }
 
 BI_W_MC(qpel, h, 4, 8, hz, mx);
@@ -5520,53 +5998,48 @@
 
 #undef BI_W_MC
 
-#define BI_W_MC_HV(PEL, DIR, WIDTH, TAP, DIR1)                                 \
-void ff_hevc_put_hevc_bi_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,          \
-                                                          ptrdiff_t            \
-                                                          dst_stride,          \
-                                                          uint8_t *src,        \
-                                                          ptrdiff_t            \
-                                                          src_stride,          \
-                                                          int16_t *src_16bit,  \
-                                                          int height,          \
-                                                          int denom,           \
-                                                          int weight0,         \
-                                                          int weight1,         \
-                                                          int offset0,         \
-                                                          int offset1,         \
-                                                          intptr_t mx,         \
-                                                          intptr_t my,         \
-                                                          int width)           \
-{                                                                              \
-    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];                  \
-    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];                  \
-                                                                               \
-    int shift = 14 + 1 - 8;                                                    \
-    int log2Wd = denom + shift - 1;                                            \
-                                                                               \
-    hevc_##DIR1##_biwgt_##TAP##t_##WIDTH##w_msa(src, src_stride,               \
-                                                src_16bit, MAX_PB_SIZE,        \
-                                                dst, dst_stride,               \
-                                                filter_x, filter_y,            \
-                                                height, weight0, weight1,      \
-                                                offset0, offset1, log2Wd);     \
+#define BI_W_MC_HV(PEL, WIDTH, TAP)                                         \
+void ff_hevc_put_hevc_bi_w_##PEL##_hv##WIDTH##_8_msa(uint8_t *dst,          \
+                                                     ptrdiff_t dst_stride,  \
+                                                     uint8_t *src,          \
+                                                     ptrdiff_t src_stride,  \
+                                                     int16_t *src_16bit,    \
+                                                     int height,            \
+                                                     int denom,             \
+                                                     int weight0,           \
+                                                     int weight1,           \
+                                                     int offset0,           \
+                                                     int offset1,           \
+                                                     intptr_t mx,           \
+                                                     intptr_t my,           \
+                                                     int width)             \
+{                                                                           \
+    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];               \
+    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];               \
+    int log2Wd = denom + 14 - 8;                                            \
+                                                                            \
+    hevc_hv_biwgt_##TAP##t_##WIDTH##w_msa(src, src_stride, src_16bit,       \
+                                          MAX_PB_SIZE, dst, dst_stride,     \
+                                          filter_x, filter_y, height,       \
+                                          weight0, weight1, offset0,        \
+                                          offset1, log2Wd);                 \
 }
 
-BI_W_MC_HV(qpel, hv, 4, 8, hv);
-BI_W_MC_HV(qpel, hv, 8, 8, hv);
-BI_W_MC_HV(qpel, hv, 12, 8, hv);
-BI_W_MC_HV(qpel, hv, 16, 8, hv);
-BI_W_MC_HV(qpel, hv, 24, 8, hv);
-BI_W_MC_HV(qpel, hv, 32, 8, hv);
-BI_W_MC_HV(qpel, hv, 48, 8, hv);
-BI_W_MC_HV(qpel, hv, 64, 8, hv);
+BI_W_MC_HV(qpel, 4, 8);
+BI_W_MC_HV(qpel, 8, 8);
+BI_W_MC_HV(qpel, 12, 8);
+BI_W_MC_HV(qpel, 16, 8);
+BI_W_MC_HV(qpel, 24, 8);
+BI_W_MC_HV(qpel, 32, 8);
+BI_W_MC_HV(qpel, 48, 8);
+BI_W_MC_HV(qpel, 64, 8);
 
-BI_W_MC_HV(epel, hv, 4, 4, hv);
-BI_W_MC_HV(epel, hv, 8, 4, hv);
-BI_W_MC_HV(epel, hv, 6, 4, hv);
-BI_W_MC_HV(epel, hv, 12, 4, hv);
-BI_W_MC_HV(epel, hv, 16, 4, hv);
-BI_W_MC_HV(epel, hv, 24, 4, hv);
-BI_W_MC_HV(epel, hv, 32, 4, hv);
+BI_W_MC_HV(epel, 4, 4);
+BI_W_MC_HV(epel, 8, 4);
+BI_W_MC_HV(epel, 6, 4);
+BI_W_MC_HV(epel, 12, 4);
+BI_W_MC_HV(epel, 16, 4);
+BI_W_MC_HV(epel, 24, 4);
+BI_W_MC_HV(epel, 32, 4);
 
 #undef BI_W_MC_HV

diff --git a/libavcodec/mips/hevc_mc_uni_msa.c b/libavcodec/mips/hevc_mc_uni_msa.c
index eead591..740c970 100644
--- a/libavcodec/mips/hevc_mc_uni_msa.c
+++ b/libavcodec/mips/hevc_mc_uni_msa.c

@@ -22,6 +22,85 @@
 #include "libavcodec/mips/hevcdsp_mips.h"
 #include "libavcodec/mips/hevc_macros_msa.h"
 
+static const uint8_t ff_hevc_mask_arr[16 * 3] __attribute__((aligned(0x40))) = {
+    /* 8 width cases */
+    0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+    /* 4 width cases */
+    0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20,
+    /* 4 width cases */
+    8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
+};
+
+#define HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3,                  \
+                                   mask0, mask1, mask2, mask3,              \
+                                   filt0, filt1, filt2, filt3,              \
+                                   out0, out1)                              \
+{                                                                           \
+    v16i8 vec0_m, vec1_m, vec2_m, vec3_m,  vec4_m, vec5_m, vec6_m, vec7_m;  \
+                                                                            \
+    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m);       \
+    DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, out0, out1);                  \
+    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m);       \
+    DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, out0, out1);                 \
+    VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4_m, vec5_m);       \
+    DPADD_SB2_SH(vec4_m, vec5_m, filt2, filt2, out0, out1);                 \
+    VSHF_B2_SB(src0, src1, src2, src3, mask3, mask3, vec6_m, vec7_m);       \
+    DPADD_SB2_SH(vec6_m, vec7_m, filt3, filt3, out0, out1);                 \
+}
+
+#define HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3,                    \
+                                   mask0, mask1, mask2, mask3,                \
+                                   filt0, filt1, filt2, filt3,                \
+                                   out0, out1, out2, out3)                    \
+{                                                                             \
+    v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m;     \
+                                                                              \
+    VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m);         \
+    VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m);         \
+    DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,   \
+                out0, out1, out2, out3);                                      \
+    VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0_m, vec1_m);         \
+    VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2_m, vec3_m);         \
+    DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt2, filt2, filt2, filt2,  \
+                 out0, out1, out2, out3);                                     \
+    VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4_m, vec5_m);         \
+    VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6_m, vec7_m);         \
+    DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt1, filt1, filt1, filt1,  \
+                 out0, out1, out2, out3);                                     \
+    VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec4_m, vec5_m);         \
+    VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec6_m, vec7_m);         \
+    DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt3, filt3, filt3, filt3,  \
+                 out0, out1, out2, out3);                                     \
+}
+
+#define HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3,             \
+                                   mask0, mask1, filt0, filt1,         \
+                                   out0, out1)                         \
+{                                                                      \
+    v16i8 vec0_m, vec1_m, vec2_m, vec3_m;                              \
+                                                                       \
+    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m);  \
+    DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, out0, out1);             \
+    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m);  \
+    DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, out0, out1);            \
+}
+
+#define HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3,                    \
+                                   mask0, mask1, filt0, filt1,                \
+                                   out0, out1, out2, out3)                    \
+{                                                                             \
+    v16i8 vec0_m, vec1_m, vec2_m, vec3_m;                                     \
+                                                                              \
+    VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m);         \
+    VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m);         \
+    DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,   \
+                out0, out1, out2, out3);                                      \
+    VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0_m, vec1_m);         \
+    VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2_m, vec3_m);         \
+    DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1,  \
+                 out0, out1, out2, out3);                                     \
+}
+
 static void copy_width8_msa(uint8_t *src, int32_t src_stride,
                             uint8_t *dst, int32_t dst_stride,
                             int32_t height)
@@ -204,114 +283,6 @@
     }
 }
 
-static const uint8_t mc_filt_mask_arr[16 * 3] = {
-    /* 8 width cases */
-    0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
-    /* 4 width cases */
-    0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20,
-    /* 4 width cases */
-    8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
-};
-
-#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3,             \
-                            filt0, filt1, filt2, filt3)         \
-( {                                                             \
-    v8i16 tmp0, tmp1;                                           \
-                                                                \
-    tmp0 = __msa_dotp_s_h((v16i8) vec0, (v16i8) filt0);         \
-    tmp0 = __msa_dpadd_s_h(tmp0, (v16i8) vec1, (v16i8) filt1);  \
-    tmp1 = __msa_dotp_s_h((v16i8) vec2, (v16i8) filt2);         \
-    tmp1 = __msa_dpadd_s_h(tmp1, (v16i8) vec3, (v16i8) filt3);  \
-    tmp0 = __msa_adds_s_h(tmp0, tmp1);                          \
-                                                                \
-    tmp0;                                                       \
-} )
-
-#define HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3,                  \
-                                   mask0, mask1, mask2, mask3,              \
-                                   filt0, filt1, filt2, filt3,              \
-                                   out0, out1)                              \
-{                                                                           \
-    v16i8 vec0_m, vec1_m, vec2_m, vec3_m,  vec4_m, vec5_m, vec6_m, vec7_m;  \
-    v8i16 res0_m, res1_m, res2_m, res3_m;                                   \
-                                                                            \
-    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m);       \
-    DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, res0_m, res1_m);              \
-    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m);       \
-    DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, res0_m, res1_m);             \
-    VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4_m, vec5_m);       \
-    DOTP_SB2_SH(vec4_m, vec5_m, filt2, filt2, res2_m, res3_m);              \
-    VSHF_B2_SB(src0, src1, src2, src3, mask3, mask3, vec6_m, vec7_m);       \
-    DPADD_SB2_SH(vec6_m, vec7_m, filt3, filt3, res2_m, res3_m);             \
-    ADDS_SH2_SH(res0_m, res2_m, res1_m, res3_m, out0, out1);                \
-}
-
-#define HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3,                    \
-                                   mask0, mask1, mask2, mask3,                \
-                                   filt0, filt1, filt2, filt3,                \
-                                   out0, out1, out2, out3)                    \
-{                                                                             \
-    v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m;     \
-    v8i16 res0_m, res1_m, res2_m, res3_m, res4_m, res5_m, res6_m, res7_m;     \
-                                                                              \
-    VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m);         \
-    VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m);         \
-    DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,   \
-                res0_m, res1_m, res2_m, res3_m);                              \
-    VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0_m, vec1_m);         \
-    VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2_m, vec3_m);         \
-    DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt2, filt2, filt2, filt2,   \
-                res4_m, res5_m, res6_m, res7_m);                              \
-    VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4_m, vec5_m);         \
-    VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6_m, vec7_m);         \
-    DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt1, filt1, filt1, filt1,  \
-                 res0_m, res1_m, res2_m, res3_m);                             \
-    VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec4_m, vec5_m);         \
-    VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec6_m, vec7_m);         \
-    DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt3, filt3, filt3, filt3,  \
-                 res4_m, res5_m, res6_m, res7_m);                             \
-    ADDS_SH4_SH(res0_m, res4_m, res1_m, res5_m, res2_m, res6_m, res3_m,       \
-                res7_m, out0, out1, out2, out3);                              \
-}
-
-#define FILT_4TAP_DPADD_S_H(vec0, vec1, filt0, filt1)           \
-( {                                                             \
-    v8i16 tmp0;                                                 \
-                                                                \
-    tmp0 = __msa_dotp_s_h((v16i8) vec0, (v16i8) filt0);         \
-    tmp0 = __msa_dpadd_s_h(tmp0, (v16i8) vec1, (v16i8) filt1);  \
-                                                                \
-    tmp0;                                                       \
-} )
-
-#define HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3,             \
-                                   mask0, mask1, filt0, filt1,         \
-                                   out0, out1)                         \
-{                                                                      \
-    v16i8 vec0_m, vec1_m, vec2_m, vec3_m;                              \
-                                                                       \
-    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m);  \
-    DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, out0, out1);             \
-    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m);  \
-    DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, out0, out1);            \
-}
-
-#define HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3,                    \
-                                   mask0, mask1, filt0, filt1,                \
-                                   out0, out1, out2, out3)                    \
-{                                                                             \
-    v16i8 vec0_m, vec1_m, vec2_m, vec3_m;                                     \
-                                                                              \
-    VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m);         \
-    VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m);         \
-    DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,   \
-                out0, out1, out2, out3);                                      \
-    VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0_m, vec1_m);         \
-    VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2_m, vec3_m);         \
-    DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1,  \
-                 out0, out1, out2, out3);                                     \
-}
-
 static void common_hz_8t_4x4_msa(uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
                                  const int8_t *filter)
@@ -320,7 +291,7 @@
     v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
     v8i16 filt, out0, out1;
 
-    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    mask0 = LD_UB(&ff_hevc_mask_arr[16]);
     src -= 3;
 
     /* rearranging filter */
@@ -350,7 +321,7 @@
     v16u8 mask0, mask1, mask2, mask3, out;
     v8i16 filt, out0, out1, out2, out3;
 
-    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    mask0 = LD_UB(&ff_hevc_mask_arr[16]);
     src -= 3;
 
     /* rearranging filter */
@@ -387,7 +358,7 @@
     v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
     v8i16 filt, out0, out1, out2, out3;
 
-    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    mask0 = LD_UB(&ff_hevc_mask_arr[16]);
     src -= 3;
 
     /* rearranging filter */
@@ -450,47 +421,17 @@
     }
 }
 
-static void common_hz_8t_8x4_msa(uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 const int8_t *filter)
-{
-    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
-    v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1;
-    v8i16 filt, out0, out1, out2, out3;
-
-    mask0 = LD_UB(&mc_filt_mask_arr[0]);
-    src -= 3;
-
-    /* rearranging filter */
-    filt = LD_SH(filter);
-    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
-    mask1 = mask0 + 2;
-    mask2 = mask0 + 4;
-    mask3 = mask0 + 6;
-
-    LD_SB4(src, src_stride, src0, src1, src2, src3);
-    XORI_B4_128_SB(src0, src1, src2, src3);
-    HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
-                               mask3, filt0, filt1, filt2, filt3, out0, out1,
-                               out2, out3);
-    SRARI_H4_SH(out0, out1, out2, out3, 6);
-    SAT_SH4_SH(out0, out1, out2, out3, 7);
-    tmp0 = PCKEV_XORI128_UB(out0, out1);
-    tmp1 = PCKEV_XORI128_UB(out2, out3);
-    ST8x4_UB(tmp0, tmp1, dst, dst_stride);
-}
-
-static void common_hz_8t_8x8mult_msa(uint8_t *src, int32_t src_stride,
-                                     uint8_t *dst, int32_t dst_stride,
-                                     const int8_t *filter, int32_t height)
+static void common_hz_8t_8w_msa(uint8_t *src, int32_t src_stride,
+                                uint8_t *dst, int32_t dst_stride,
+                                const int8_t *filter, int32_t height)
 {
     uint32_t loop_cnt;
     v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
     v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1;
+    v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m;
     v8i16 filt, out0, out1, out2, out3;
 
-    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    mask0 = LD_UB(&ff_hevc_mask_arr[0]);
     src -= 3;
 
     /* rearranging filter */
@@ -505,9 +446,24 @@
         LD_SB4(src, src_stride, src0, src1, src2, src3);
         XORI_B4_128_SB(src0, src1, src2, src3);
         src += (4 * src_stride);
-        HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
-                                   mask3, filt0, filt1, filt2, filt3, out0,
-                                   out1, out2, out3);
+
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m);
+        DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,
+                    out0, out1, out2, out3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0_m, vec1_m);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2_m, vec3_m);
+        DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt2, filt2, filt2, filt2,
+                     out0, out1, out2, out3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4_m, vec5_m);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6_m, vec7_m);
+        DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt1, filt1, filt1, filt1,
+                     out0, out1, out2, out3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec4_m, vec5_m);
+        VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec6_m, vec7_m);
+        DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt3, filt3, filt3, filt3,
+                     out0, out1, out2, out3);
+
         SRARI_H4_SH(out0, out1, out2, out3, 6);
         SAT_SH4_SH(out0, out1, out2, out3, 7);
         tmp0 = PCKEV_XORI128_UB(out0, out1);
@@ -517,36 +473,22 @@
     }
 }
 
-static void common_hz_8t_8w_msa(uint8_t *src, int32_t src_stride,
-                                uint8_t *dst, int32_t dst_stride,
-                                const int8_t *filter, int32_t height)
-{
-    if (4 == height) {
-        common_hz_8t_8x4_msa(src, src_stride, dst, dst_stride, filter);
-    } else {
-        common_hz_8t_8x8mult_msa(src, src_stride, dst, dst_stride, filter,
-                                 height);
-    }
-}
-
 static void common_hz_8t_12w_msa(uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
                                  const int8_t *filter, int32_t height)
 {
-    uint8_t *src1_ptr, *dst1;
     uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
-    v8i16 filt, out0, out1, out2, out3;
-    v16u8 mask0, mask1, mask2, mask3, mask4, mask5, mask6, mask00, tmp0, tmp1;
+    v16u8 mask0, mask1, mask2, mask3, mask4, mask5, mask6, mask00;
+    v16u8 tmp0, tmp1, tmp2;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v16i8 filt0, filt1, filt2, filt3;
+    v8i16 filt, out0, out1, out2, out3, out4, out5;
 
-    mask00 = LD_UB(&mc_filt_mask_arr[0]);
-    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    mask00 = LD_UB(&ff_hevc_mask_arr[0]);
+    mask0 = LD_UB(&ff_hevc_mask_arr[16]);
 
-    src1_ptr = src - 3;
-    dst1 = dst;
-
-    dst = dst1 + 8;
-    src = src1_ptr + 8;
+    src = src - 3;
 
     /* rearranging filter */
     filt = LD_SH(filter);
@@ -559,32 +501,53 @@
     mask5 = mask0 + 4;
     mask6 = mask0 + 6;
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
+    for (loop_cnt = 4; loop_cnt--;) {
         /* 8 width */
-        LD_SB4(src1_ptr, src_stride, src0, src1, src2, src3);
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        /* 4 width */
+        LD_SB4(src + 8, src_stride, src4, src5, src6, src7);
+
         XORI_B4_128_SB(src0, src1, src2, src3);
-        src1_ptr += (4 * src_stride);
-        HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask00, mask1, mask2,
-                                   mask3, filt0, filt1, filt2, filt3, out0,
-                                   out1, out2, out3);
-        SRARI_H4_SH(out0, out1, out2, out3, 6);
-        SAT_SH4_SH(out0, out1, out2, out3, 7);
-        tmp0 = PCKEV_XORI128_UB(out0, out1);
-        tmp1 = PCKEV_XORI128_UB(out2, out3);
-        ST8x4_UB(tmp0, tmp1, dst1, dst_stride);
-        dst1 += (4 * dst_stride);
+        XORI_B4_128_SB(src4, src5, src6, src7);
+        src += (4 * src_stride);
+
+        VSHF_B2_SB(src0, src0, src1, src1, mask00, mask00, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask00, mask00, vec2, vec3);
+        DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0,
+                    out1, out2, out3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt2, filt2, filt2, out0,
+                     out1, out2, out3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4, vec5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6, vec7);
+        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, out0,
+                     out1, out2, out3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec4, vec5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec6, vec7);
+        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt3, filt3, filt3, filt3, out0,
+                     out1, out2, out3);
 
         /* 4 width */
-        LD_SB4(src, src_stride, src0, src1, src2, src3);
-        XORI_B4_128_SB(src0, src1, src2, src3);
-        src += (4 * src_stride);
-        HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask4, mask5,
-                                   mask6, filt0, filt1, filt2, filt3, out0,
-                                   out1);
-        SRARI_H2_SH(out0, out1, 6);
-        SAT_SH2_SH(out0, out1, 7);
+        VSHF_B2_SB(src4, src5, src6, src7, mask0, mask0, vec0, vec1);
+        DOTP_SB2_SH(vec0, vec1, filt0, filt0, out4, out5);
+        VSHF_B2_SB(src4, src5, src6, src7, mask4, mask4, vec2, vec3);
+        DPADD_SB2_SH(vec2, vec3, filt1, filt1, out4, out5);
+        VSHF_B2_SB(src4, src5, src6, src7, mask5, mask5, vec4, vec5);
+        DPADD_SB2_SH(vec4, vec5, filt2, filt2, out4, out5);
+        VSHF_B2_SB(src4, src5, src6, src7, mask6, mask6, vec6, vec7);
+        DPADD_SB2_SH(vec6, vec7, filt3, filt3, out4, out5);
+
+        SRARI_H4_SH(out0, out1, out2, out3, 6);
+        SRARI_H2_SH(out4, out5, 6);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        SAT_SH2_SH(out4, out5, 7);
         tmp0 = PCKEV_XORI128_UB(out0, out1);
-        ST4x4_UB(tmp0, tmp0, 0, 1, 2, 3, dst, dst_stride);
+        tmp1 = PCKEV_XORI128_UB(out2, out3);
+        tmp2 = PCKEV_XORI128_UB(out4, out5);
+
+        ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+        ST4x4_UB(tmp2, tmp2, 0, 1, 2, 3, dst + 8, dst_stride);
         dst += (4 * dst_stride);
     }
 }
@@ -594,11 +557,12 @@
                                  const int8_t *filter, int32_t height)
 {
     uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
     v16u8 mask0, mask1, mask2, mask3, out;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16i8 filt0, filt1, filt2, filt3;
     v8i16 filt, out0, out1, out2, out3;
 
-    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    mask0 = LD_UB(&ff_hevc_mask_arr[0]);
     src -= 3;
 
     /* rearranging filter */
@@ -609,11 +573,17 @@
     mask2 = mask0 + 4;
     mask3 = mask0 + 6;
 
-    for (loop_cnt = (height >> 1); loop_cnt--;) {
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
         LD_SB2(src, src_stride, src0, src2);
         LD_SB2(src + 8, src_stride, src1, src3);
-        XORI_B4_128_SB(src0, src1, src2, src3);
         src += (2 * src_stride);
+
+        LD_SB2(src, src_stride, src4, src6);
+        LD_SB2(src + 8, src_stride, src5, src7);
+        src += (2 * src_stride);
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        XORI_B4_128_SB(src4, src5, src6, src7);
         HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
                                    mask3, filt0, filt1, filt2, filt3, out0,
                                    out1, out2, out3);
@@ -625,6 +595,18 @@
         out = PCKEV_XORI128_UB(out2, out3);
         ST_UB(out, dst);
         dst += dst_stride;
+
+        HORIZ_8TAP_8WID_4VECS_FILT(src4, src5, src6, src7, mask0, mask1, mask2,
+                                   mask3, filt0, filt1, filt2, filt3, out0,
+                                   out1, out2, out3);
+        SRARI_H4_SH(out0, out1, out2, out3, 6);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        out = PCKEV_XORI128_UB(out0, out1);
+        ST_UB(out, dst);
+        dst += dst_stride;
+        out = PCKEV_XORI128_UB(out2, out3);
+        ST_UB(out, dst);
+        dst += dst_stride;
     }
 }
 
@@ -637,10 +619,9 @@
     v16u8 mask0, mask1, mask2, mask3, mask4, mask5, mask6, mask7, out;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9, vec10;
     v16i8 vec11;
-    v8i16 out0, out1, out2, out3, out4, out5, out6, out7, out8, out9, out10;
-    v8i16 out11, filt;
+    v8i16 out0, out1, out2, out3, out8, out9, filt;
 
-    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    mask0 = LD_UB(&ff_hevc_mask_arr[0]);
     src -= 3;
 
     /* rearranging filter */
@@ -655,7 +636,7 @@
     mask6 = mask0 + 12;
     mask7 = mask0 + 14;
 
-    for (loop_cnt = (height >> 1); loop_cnt--;) {
+    for (loop_cnt = 16; loop_cnt--;) {
         LD_SB2(src, src_stride, src0, src2);
         LD_SB2(src + 16, src_stride, src1, src3);
         XORI_B4_128_SB(src0, src1, src2, src3);
@@ -669,9 +650,9 @@
         VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0, vec8);
         VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2, vec9);
         VSHF_B2_SB(src0, src1, src2, src3, mask6, mask6, vec1, vec3);
-        DOTP_SB4_SH(vec0, vec8, vec2, vec9, filt2, filt2, filt2, filt2, out4,
-                    out10, out6, out11);
-        DOTP_SB2_SH(vec1, vec3, filt2, filt2, out5, out7);
+        DPADD_SB4_SH(vec0, vec8, vec2, vec9, filt2, filt2, filt2, filt2,
+                     out0, out8, out2, out9);
+        DPADD_SB2_SH(vec1, vec3, filt2, filt2, out1, out3);
         VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4, vec10);
         VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6, vec11);
         VSHF_B2_SB(src0, src1, src2, src3, mask5, mask5, vec5, vec7);
@@ -682,11 +663,8 @@
         VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec6, vec11);
         VSHF_B2_SB(src0, src1, src2, src3, mask7, mask7, vec5, vec7);
         DPADD_SB4_SH(vec4, vec10, vec6, vec11, filt3, filt3, filt3, filt3,
-                     out4, out10, out6, out11);
-        DPADD_SB2_SH(vec5, vec7, filt3, filt3, out5, out7);
-        ADDS_SH4_SH(out0, out4, out8, out10, out2, out6, out9, out11, out0,
-                    out8, out2, out9);
-        ADDS_SH2_SH(out1, out5, out3, out7, out1, out3);
+                     out0, out8, out2, out9);
+        DPADD_SB2_SH(vec5, vec7, filt3, filt3, out1, out3);
         SRARI_H4_SH(out0, out8, out2, out9, 6);
         SRARI_H2_SH(out1, out3, 6);
         SAT_SH4_SH(out0, out8, out2, out9, 7);
@@ -707,11 +685,12 @@
                                  const int8_t *filter, int32_t height)
 {
     uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
     v16u8 mask0, mask1, mask2, mask3, out;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16i8 filt0, filt1, filt2, filt3;
     v8i16 filt, out0, out1, out2, out3;
 
-    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    mask0 = LD_UB(&ff_hevc_mask_arr[0]);
     src -= 3;
 
     /* rearranging filter */
@@ -724,31 +703,32 @@
 
     for (loop_cnt = (height >> 1); loop_cnt--;) {
         src0 = LD_SB(src);
+        src1 = LD_SB(src + 8);
         src2 = LD_SB(src + 16);
         src3 = LD_SB(src + 24);
-        src1 = __msa_sldi_b(src2, src0, 8);
         src += src_stride;
         XORI_B4_128_SB(src0, src1, src2, src3);
+
+        src4 = LD_SB(src);
+        src5 = LD_SB(src + 8);
+        src6 = LD_SB(src + 16);
+        src7 = LD_SB(src + 24);
+        src += src_stride;
+        XORI_B4_128_SB(src4, src5, src6, src7);
+
         HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
                                    mask3, filt0, filt1, filt2, filt3, out0,
                                    out1, out2, out3);
         SRARI_H4_SH(out0, out1, out2, out3, 6);
         SAT_SH4_SH(out0, out1, out2, out3, 7);
 
-        src0 = LD_SB(src);
-        src2 = LD_SB(src + 16);
-        src3 = LD_SB(src + 24);
-        src1 = __msa_sldi_b(src2, src0, 8);
-        src += src_stride;
-
         out = PCKEV_XORI128_UB(out0, out1);
         ST_UB(out, dst);
         out = PCKEV_XORI128_UB(out2, out3);
         ST_UB(out, dst + 16);
         dst += dst_stride;
 
-        XORI_B4_128_SB(src0, src1, src2, src3);
-        HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+        HORIZ_8TAP_8WID_4VECS_FILT(src4, src5, src6, src7, mask0, mask1, mask2,
                                    mask3, filt0, filt1, filt2, filt3, out0,
                                    out1, out2, out3);
         SRARI_H4_SH(out0, out1, out2, out3, 6);
@@ -767,10 +747,11 @@
 {
     uint32_t loop_cnt;
     v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3, vec0, vec1, vec2;
+    v16i8 src4;
     v16u8 mask0, mask1, mask2, mask3, mask4, mask5, mask6, mask7, out;
-    v8i16 filt, out0, out1, out2, out3, out4, out5, out6;
+    v8i16 filt, out0, out1, out2, out3;
 
-    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    mask0 = LD_UB(&ff_hevc_mask_arr[0]);
     src -= 3;
 
     /* rearranging filter */
@@ -785,11 +766,17 @@
     mask6 = mask0 + 12;
     mask7 = mask0 + 14;
 
-    for (loop_cnt = height; loop_cnt--;) {
-        LD_SB3(src, 16, src0, src2, src3);
-        src1 = __msa_sldi_b(src2, src0, 8);
+    for (loop_cnt = 64; loop_cnt--;) {
+        src0 = LD_SB(src);
+        src1 = LD_SB(src + 8);
+        src2 = LD_SB(src + 16);
+        src3 = LD_SB(src + 32);
+        src4 = LD_SB(src + 40);
+        src += src_stride;
 
         XORI_B4_128_SB(src0, src1, src2, src3);
+        src4 = (v16i8) __msa_xori_b((v16u8) src4, 128);
+
         VSHF_B3_SB(src0, src0, src1, src1, src2, src2, mask0, mask0, mask0,
                    vec0, vec1, vec2);
         DOTP_SB3_SH(vec0, vec1, vec2, filt0, filt0, filt0, out0, out1, out2);
@@ -799,45 +786,42 @@
         out2 = __msa_dpadd_s_h(out2, vec2, filt1);
         VSHF_B3_SB(src0, src0, src1, src1, src2, src2, mask2, mask2, mask2,
                    vec0, vec1, vec2);
-        DOTP_SB3_SH(vec0, vec1, vec2, filt2, filt2, filt2, out3, out4, out5);
+        DPADD_SB2_SH(vec0, vec1, filt2, filt2, out0, out1);
+        out2 = __msa_dpadd_s_h(out2, vec2, filt2);
+
         VSHF_B3_SB(src0, src0, src1, src1, src2, src2, mask3, mask3, mask3,
                    vec0, vec1, vec2);
-        DPADD_SB2_SH(vec0, vec1, filt3, filt3, out3, out4);
-        out5 = __msa_dpadd_s_h(out5, vec2, filt3);
-        ADDS_SH2_SH(out0, out3, out1, out4, out0, out1);
-        out2 = __msa_adds_s_h(out2, out5);
+        DPADD_SB2_SH(vec0, vec1, filt3, filt3, out0, out1);
+        out2 = __msa_dpadd_s_h(out2, vec2, filt3);
+
         SRARI_H2_SH(out0, out1, 6);
-        out6 = __msa_srari_h(out2, 6);
-        SAT_SH3_SH(out0, out1, out6, 7);
+        out3 = __msa_srari_h(out2, 6);
+        SAT_SH3_SH(out0, out1, out3, 7);
         out = PCKEV_XORI128_UB(out0, out1);
         ST_UB(out, dst);
 
-        src1 = LD_SB(src + 40);
-        src += src_stride;
-        src1 = (v16i8) __msa_xori_b((v16u8) src1, 128);
-
-        VSHF_B3_SB(src2, src3, src3, src3, src1, src1, mask4, mask0, mask0,
+        VSHF_B3_SB(src2, src3, src3, src3, src4, src4, mask4, mask0, mask0,
                    vec0, vec1, vec2);
         DOTP_SB3_SH(vec0, vec1, vec2, filt0, filt0, filt0, out0, out1, out2);
-        VSHF_B3_SB(src2, src3, src3, src3, src1, src1, mask5, mask1, mask1,
+        VSHF_B3_SB(src2, src3, src3, src3, src4, src4, mask5, mask1, mask1,
                    vec0, vec1, vec2);
         DPADD_SB2_SH(vec0, vec1, filt1, filt1, out0, out1);
         out2 = __msa_dpadd_s_h(out2, vec2, filt1);
-        VSHF_B3_SB(src2, src3, src3, src3, src1, src1, mask6, mask2, mask2,
+        VSHF_B3_SB(src2, src3, src3, src3, src4, src4, mask6, mask2, mask2,
                    vec0, vec1, vec2);
-        DOTP_SB3_SH(vec0, vec1, vec2, filt2, filt2, filt2, out3, out4, out5);
-        VSHF_B3_SB(src2, src3, src3, src3, src1, src1, mask7, mask3, mask3,
+        DPADD_SB2_SH(vec0, vec1, filt2, filt2, out0, out1);
+        out2 = __msa_dpadd_s_h(out2, vec2, filt2);
+        VSHF_B3_SB(src2, src3, src3, src3, src4, src4, mask7, mask3, mask3,
                    vec0, vec1, vec2);
-        DPADD_SB2_SH(vec0, vec1, filt3, filt3, out3, out4);
-        out5 = __msa_dpadd_s_h(out5, vec2, filt3);
-        ADDS_SH2_SH(out0, out3, out1, out4, out3, out4);
-        out5 = __msa_adds_s_h(out2, out5);
-        SRARI_H2_SH(out3, out4, 6);
-        out5 = __msa_srari_h(out5, 6);
-        SAT_SH3_SH(out3, out4, out5, 7);
-        out = PCKEV_XORI128_UB(out6, out3);
+        DPADD_SB2_SH(vec0, vec1, filt3, filt3, out0, out1);
+        out2 = __msa_dpadd_s_h(out2, vec2, filt3);
+
+        SRARI_H2_SH(out0, out1, 6);
+        out2 = __msa_srari_h(out2, 6);
+        SAT_SH3_SH(out0, out1, out2, 7);
+        out = PCKEV_XORI128_UB(out3, out0);
         ST_UB(out, dst + 16);
-        out = PCKEV_XORI128_UB(out4, out5);
+        out = PCKEV_XORI128_UB(out1, out2);
         ST_UB(out, dst + 32);
         dst += dst_stride;
     }
@@ -848,11 +832,13 @@
                                  const int8_t *filter, int32_t height)
 {
     int32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
     v16u8 mask0, mask1, mask2, mask3, out;
-    v8i16 filt, out0, out1, out2, out3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v16i8 filt0, filt1, filt2, filt3;
+    v8i16 res0, res1, res2, res3, filt;
 
-    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    mask0 = LD_UB(&ff_hevc_mask_arr[0]);
     src -= 3;
 
     /* rearranging filter */
@@ -864,37 +850,57 @@
     mask3 = mask0 + 6;
 
     for (loop_cnt = height; loop_cnt--;) {
-        src0 = LD_SB(src);
-        src2 = LD_SB(src + 16);
-        src3 = LD_SB(src + 24);
-        src1 = __msa_sldi_b(src2, src0, 8);
-
-        XORI_B4_128_SB(src0, src1, src2, src3);
-        HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,
-                                   mask2, mask3, filt0, filt1, filt2, filt3,
-                                   out0, out1, out2, out3);
-        SRARI_H4_SH(out0, out1, out2, out3, 6);
-        SAT_SH4_SH(out0, out1, out2, out3, 7);
-        out = PCKEV_XORI128_UB(out0, out1);
-        ST_UB(out, dst);
-        out = PCKEV_XORI128_UB(out2, out3);
-        ST_UB(out, dst + 16);
-
-        src0 = LD_SB(src + 32);
-        src2 = LD_SB(src + 48);
-        src3 = LD_SB(src + 56);
-        src1 = __msa_sldi_b(src2, src0, 8);
+        LD_SB8(src, 8, src0, src1, src2, src3, src4, src5, src6, src7);
         src += src_stride;
 
-        XORI_B4_128_SB(src0, src1, src2, src3);
-        HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,
-                                   mask2, mask3, filt0, filt1, filt2, filt3,
-                                   out0, out1, out2, out3);
-        SRARI_H4_SH(out0, out1, out2, out3, 6);
-        SAT_SH4_SH(out0, out1, out2, out3, 7);
-        out = PCKEV_XORI128_UB(out0, out1);
+        XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
+
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+        DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0,
+                    res1, res2, res3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt2, filt2, filt2, res0,
+                     res1, res2, res3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4, vec5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6, vec7);
+        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, res0,
+                     res1, res2, res3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec4, vec5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec6, vec7);
+        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt3, filt3, filt3, filt3, res0,
+                     res1, res2, res3);
+
+        SRARI_H4_SH(res0, res1, res2, res3, 6);
+        SAT_SH4_SH(res0, res1, res2, res3, 7);
+        out = PCKEV_XORI128_UB(res0, res1);
+        ST_UB(out, dst);
+        out = PCKEV_XORI128_UB(res2, res3);
+        ST_UB(out, dst + 16);
+
+        VSHF_B2_SB(src4, src4, src5, src5, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src6, src6, src7, src7, mask0, mask0, vec2, vec3);
+        DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0,
+                    res1, res2, res3);
+        VSHF_B2_SB(src4, src4, src5, src5, mask2, mask2, vec0, vec1);
+        VSHF_B2_SB(src6, src6, src7, src7, mask2, mask2, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt2, filt2, filt2, res0,
+                     res1, res2, res3);
+        VSHF_B2_SB(src4, src4, src5, src5, mask1, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src6, src7, src7, mask1, mask1, vec6, vec7);
+        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, res0,
+                     res1, res2, res3);
+        VSHF_B2_SB(src4, src4, src5, src5, mask3, mask3, vec4, vec5);
+        VSHF_B2_SB(src6, src6, src7, src7, mask3, mask3, vec6, vec7);
+        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt3, filt3, filt3, filt3, res0,
+                     res1, res2, res3);
+
+        SRARI_H4_SH(res0, res1, res2, res3, 6);
+        SAT_SH4_SH(res0, res1, res2, res3, 7);
+        out = PCKEV_XORI128_UB(res0, res1);
         ST_UB(out, dst + 32);
-        out = PCKEV_XORI128_UB(out2, out3);
+        out = PCKEV_XORI128_UB(res2, res3);
         ST_UB(out, dst + 48);
         dst += dst_stride;
     }
@@ -905,12 +911,14 @@
                                 const int8_t *filter, int32_t height)
 {
     uint32_t loop_cnt;
+    v16u8 out0, out1;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 src11, src12, src13, src14;
     v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
     v16i8 src65_r, src87_r, src109_r, src2110, src4332, src6554, src8776;
+    v16i8 src1110_r, src1211_r, src1312_r, src1413_r, src12111110, src14131312;
     v16i8 src10998, filt0, filt1, filt2, filt3;
-    v16u8 out;
-    v8i16 filt, out10, out32;
+    v8i16 filt, out10, out32, out54, out76;
 
     src -= (3 * src_stride);
 
@@ -927,28 +935,45 @@
                src4332, src6554);
     XORI_B3_128_SB(src2110, src4332, src6554);
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
+    for (loop_cnt = (height >> 3); loop_cnt--;) {
         LD_SB4(src, src_stride, src7, src8, src9, src10);
         src += (4 * src_stride);
+        LD_SB4(src, src_stride, src11, src12, src13, src14);
+        src += (4 * src_stride);
 
         ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
                    src87_r, src98_r, src109_r);
+        ILVR_B4_SB(src11, src10, src12, src11, src13, src12, src14, src13,
+                   src1110_r, src1211_r, src1312_r, src1413_r);
         ILVR_D2_SB(src87_r, src76_r, src109_r, src98_r, src8776, src10998);
+        ILVR_D2_SB(src1211_r, src1110_r, src1413_r, src1312_r,
+                   src12111110, src14131312);
         XORI_B2_128_SB(src8776, src10998);
-        out10 = FILT_8TAP_DPADD_S_H(src2110, src4332, src6554, src8776, filt0,
-                                    filt1, filt2, filt3);
-        out32 = FILT_8TAP_DPADD_S_H(src4332, src6554, src8776, src10998, filt0,
-                                    filt1, filt2, filt3);
+        XORI_B2_128_SB(src12111110, src14131312);
+
+        DOTP_SB2_SH(src2110, src4332, filt0, filt0, out10, out32);
+        DOTP_SB2_SH(src6554, src8776, filt0, filt0, out54, out76);
+        DPADD_SB2_SH(src4332, src6554, filt1, filt1, out10, out32);
+        DPADD_SB2_SH(src8776, src10998, filt1, filt1, out54, out76);
+        DPADD_SB2_SH(src6554, src8776, filt2, filt2, out10, out32);
+        DPADD_SB2_SH(src10998, src12111110, filt2, filt2, out54, out76);
+        DPADD_SB2_SH(src8776, src10998, filt3, filt3, out10, out32);
+        DPADD_SB2_SH(src12111110, src14131312, filt3, filt3, out54, out76);
         SRARI_H2_SH(out10, out32, 6);
+        SRARI_H2_SH(out54, out76, 6);
         SAT_SH2_SH(out10, out32, 7);
-        out = PCKEV_XORI128_UB(out10, out32);
-        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        SAT_SH2_SH(out54, out76, 7);
+        out0 = PCKEV_XORI128_UB(out10, out32);
+        out1 = PCKEV_XORI128_UB(out54, out76);
+        ST4x4_UB(out0, out0, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+        ST4x4_UB(out1, out1, 0, 1, 2, 3, dst, dst_stride);
         dst += (4 * dst_stride);
 
-        src2110 = src6554;
-        src4332 = src8776;
-        src6554 = src10998;
-        src6 = src10;
+        src2110 = src10998;
+        src4332 = src12111110;
+        src6554 = src14131312;
+        src6 = src14;
     }
 }
 
@@ -982,14 +1007,14 @@
 
         ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
                    src87_r, src98_r, src109_r);
-        out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
-                                     filt1, filt2, filt3);
-        out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
-                                     filt1, filt2, filt3);
-        out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
-                                     filt1, filt2, filt3);
-        out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
-                                     filt1, filt2, filt3);
+        DOTP_SB4_SH(src10_r, src21_r, src32_r, src43_r, filt0, filt0, filt0,
+                    filt0, out0_r, out1_r, out2_r, out3_r);
+        DPADD_SB4_SH(src32_r, src43_r, src54_r, src65_r, filt1, filt1, filt1,
+                     filt1, out0_r, out1_r, out2_r, out3_r);
+        DPADD_SB4_SH(src54_r, src65_r, src76_r, src87_r, filt2, filt2, filt2,
+                     filt2, out0_r, out1_r, out2_r, out3_r);
+        DPADD_SB4_SH(src76_r, src87_r, src98_r, src109_r, filt3, filt3, filt3,
+                     filt3, out0_r, out1_r, out2_r, out3_r);
         SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
         SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
         tmp0 = PCKEV_XORI128_UB(out0_r, out1_r);
@@ -1011,61 +1036,81 @@
                                  uint8_t *dst, int32_t dst_stride,
                                  const int8_t *filter, int32_t height)
 {
-    int32_t loop_cnt;
+    uint32_t loop_cnt;
     uint32_t out2, out3;
     uint64_t out0, out1;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, res0, res1;
-    v16i8 res2, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
-    v8i16 vec01, vec23, vec45, vec67, tmp0, tmp1, tmp2;
-    v8i16 filt, filt0, filt1, filt2, filt3;
-    v4i32 mask = { 2, 6, 2, 6 };
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 filt0, filt1, filt2, filt3;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+    v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l;
+    v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
+    v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
 
     src -= (3 * src_stride);
 
-    /* rearranging filter_y */
     filt = LD_SH(filter);
-    SPLATI_H4_SH(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
     LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
     src += (7 * src_stride);
 
     XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
 
-    /* 4 width */
-    VSHF_W2_SB(src0, src1, src1, src2, mask, mask, vec0, vec1);
-    VSHF_W2_SB(src2, src3, src3, src4, mask, mask, vec2, vec3);
-    VSHF_W2_SB(src4, src5, src5, src6, mask, mask, vec4, vec5);
+    ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+               src54_r, src21_r);
+    ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+    ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l, src32_l,
+               src54_l, src21_l);
+    ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
 
-    for (loop_cnt = (height >> 1); loop_cnt--;) {
-        LD_SB2(src, src_stride, src7, src8);
-        XORI_B2_128_SB(src7, src8);
-        src += (2 * src_stride);
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        XORI_B4_128_SB(src7, src8, src9, src10);
+        src += (4 * src_stride);
 
-        ILVR_B4_SH(src1, src0, src3, src2, src5, src4, src7, src6,
-                   vec01, vec23, vec45, vec67);
-        tmp0 = FILT_8TAP_DPADD_S_H(vec01, vec23, vec45, vec67, filt0, filt1,
-                                   filt2, filt3);
-        ILVR_B4_SH(src2, src1, src4, src3, src6, src5, src8, src7, vec01, vec23,
-                   vec45, vec67);
-        tmp1 = FILT_8TAP_DPADD_S_H(vec01, vec23, vec45, vec67, filt0, filt1,
-                                   filt2, filt3);
+        ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+                   src87_r, src98_r, src109_r);
+        ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
+                   src87_l, src98_l, src109_l);
+        out0_r = HEVC_FILT_8TAP_SH(src10_r, src32_r, src54_r, src76_r, filt0,
+                                   filt1, filt2, filt3);
+        out1_r = HEVC_FILT_8TAP_SH(src21_r, src43_r, src65_r, src87_r, filt0,
+                                   filt1, filt2, filt3);
+        out2_r = HEVC_FILT_8TAP_SH(src32_r, src54_r, src76_r, src98_r, filt0,
+                                   filt1, filt2, filt3);
+        out3_r = HEVC_FILT_8TAP_SH(src43_r, src65_r, src87_r, src109_r, filt0,
+                                   filt1, filt2, filt3);
+        out0_l = HEVC_FILT_8TAP_SH(src10_l, src32_l, src54_l, src76_l, filt0,
+                                   filt1, filt2, filt3);
+        out1_l = HEVC_FILT_8TAP_SH(src21_l, src43_l, src65_l, src87_l, filt0,
+                                   filt1, filt2, filt3);
+        out2_l = HEVC_FILT_8TAP_SH(src32_l, src54_l, src76_l, src98_l, filt0,
+                                   filt1, filt2, filt3);
+        out3_l = HEVC_FILT_8TAP_SH(src43_l, src65_l, src87_l, src109_l, filt0,
+                                   filt1, filt2, filt3);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
+        SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 6);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+        PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+                    out3_r, tmp0, tmp1, tmp2, tmp3);
+        XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
 
-        /* 4 width */
-        VSHF_W2_SB(src6, src7, src7, src8, mask, mask, vec6, vec7);
-        ILVR_B4_SH(vec1, vec0, vec3, vec2, vec5, vec4, vec7, vec6, vec01, vec23,
-                   vec45, vec67);
-        tmp2 = FILT_8TAP_DPADD_S_H(vec01, vec23, vec45, vec67, filt0, filt1,
-                                   filt2, filt3);
-        SRARI_H2_SH(tmp0, tmp1, 6);
-        tmp2 = __msa_srari_h(tmp2, 6);
-        SAT_SH3_SH(tmp0, tmp1, tmp2, 7);
-        PCKEV_B3_SB(tmp0, tmp0, tmp1, tmp1, tmp2, tmp2, res0, res1, res2);
-        XORI_B3_128_SB(res0, res1, res2);
-
-        out0 = __msa_copy_u_d((v2i64) res0, 0);
-        out1 = __msa_copy_u_d((v2i64) res1, 0);
-        out2 = __msa_copy_u_w((v4i32) res2, 0);
-        out3 = __msa_copy_u_w((v4i32) res2, 1);
+        out0 = __msa_copy_u_d((v2i64) tmp0, 0);
+        out1 = __msa_copy_u_d((v2i64) tmp1, 0);
+        out2 = __msa_copy_u_w((v4i32) tmp0, 2);
+        out3 = __msa_copy_u_w((v4i32) tmp1, 2);
+        SD(out0, dst);
+        SW(out2, (dst + 8));
+        dst += dst_stride;
+        SD(out1, dst);
+        SW(out3, (dst + 8));
+        dst += dst_stride;
+        out0 = __msa_copy_u_d((v2i64) tmp2, 0);
+        out1 = __msa_copy_u_d((v2i64) tmp3, 0);
+        out2 = __msa_copy_u_w((v4i32) tmp2, 2);
+        out3 = __msa_copy_u_w((v4i32) tmp3, 2);
         SD(out0, dst);
         SW(out2, (dst + 8));
         dst += dst_stride;
@@ -1073,19 +1118,19 @@
         SW(out3, (dst + 8));
         dst += dst_stride;
 
-        src0 = src2;
-        src1 = src3;
-        src2 = src4;
-        src3 = src5;
-        src4 = src6;
-        src5 = src7;
-        src6 = src8;
-        vec0 = vec2;
-        vec1 = vec3;
-        vec2 = vec4;
-        vec3 = vec5;
-        vec4 = vec6;
-        vec5 = vec7;
+        src10_r = src54_r;
+        src32_r = src76_r;
+        src54_r = src98_r;
+        src21_r = src65_r;
+        src43_r = src87_r;
+        src65_r = src109_r;
+        src10_l = src54_l;
+        src32_l = src76_l;
+        src54_l = src98_l;
+        src21_l = src65_l;
+        src43_l = src87_l;
+        src65_l = src109_l;
+        src6 = src10;
     }
 }
 
@@ -1126,22 +1171,22 @@
                    src87_r, src98_r, src109_r);
         ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
                    src87_l, src98_l, src109_l);
-        out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
-                                     filt1, filt2, filt3);
-        out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
-                                     filt1, filt2, filt3);
-        out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
-                                     filt1, filt2, filt3);
-        out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
-                                     filt1, filt2, filt3);
-        out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l, filt0,
-                                     filt1, filt2, filt3);
-        out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l, filt0,
-                                     filt1, filt2, filt3);
-        out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l, filt0,
-                                     filt1, filt2, filt3);
-        out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0,
-                                     filt1, filt2, filt3);
+        out0_r = HEVC_FILT_8TAP_SH(src10_r, src32_r, src54_r, src76_r, filt0,
+                                   filt1, filt2, filt3);
+        out1_r = HEVC_FILT_8TAP_SH(src21_r, src43_r, src65_r, src87_r, filt0,
+                                   filt1, filt2, filt3);
+        out2_r = HEVC_FILT_8TAP_SH(src32_r, src54_r, src76_r, src98_r, filt0,
+                                   filt1, filt2, filt3);
+        out3_r = HEVC_FILT_8TAP_SH(src43_r, src65_r, src87_r, src109_r, filt0,
+                                   filt1, filt2, filt3);
+        out0_l = HEVC_FILT_8TAP_SH(src10_l, src32_l, src54_l, src76_l, filt0,
+                                   filt1, filt2, filt3);
+        out1_l = HEVC_FILT_8TAP_SH(src21_l, src43_l, src65_l, src87_l, filt0,
+                                   filt1, filt2, filt3);
+        out2_l = HEVC_FILT_8TAP_SH(src32_l, src54_l, src76_l, src98_l, filt0,
+                                   filt1, filt2, filt3);
+        out3_l = HEVC_FILT_8TAP_SH(src43_l, src65_l, src87_l, src109_l, filt0,
+                                   filt1, filt2, filt3);
         SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
         SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 6);
         SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
@@ -1211,22 +1256,22 @@
                        src87_r, src98_r, src109_r);
             ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
                        src87_l, src98_l, src109_l);
-            out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r,
-                                         filt0, filt1, filt2, filt3);
-            out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r,
-                                         filt0, filt1, filt2, filt3);
-            out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r,
-                                         filt0, filt1, filt2, filt3);
-            out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r,
-                                         filt0, filt1, filt2, filt3);
-            out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l,
-                                         filt0, filt1, filt2, filt3);
-            out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l,
-                                         filt0, filt1, filt2, filt3);
-            out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l,
-                                         filt0, filt1, filt2, filt3);
-            out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l,
-                                         filt0, filt1, filt2, filt3);
+            out0_r = HEVC_FILT_8TAP_SH(src10_r, src32_r, src54_r, src76_r,
+                                       filt0, filt1, filt2, filt3);
+            out1_r = HEVC_FILT_8TAP_SH(src21_r, src43_r, src65_r, src87_r,
+                                       filt0, filt1, filt2, filt3);
+            out2_r = HEVC_FILT_8TAP_SH(src32_r, src54_r, src76_r, src98_r,
+                                       filt0, filt1, filt2, filt3);
+            out3_r = HEVC_FILT_8TAP_SH(src43_r, src65_r, src87_r, src109_r,
+                                       filt0, filt1, filt2, filt3);
+            out0_l = HEVC_FILT_8TAP_SH(src10_l, src32_l, src54_l, src76_l,
+                                       filt0, filt1, filt2, filt3);
+            out1_l = HEVC_FILT_8TAP_SH(src21_l, src43_l, src65_l, src87_l,
+                                       filt0, filt1, filt2, filt3);
+            out2_l = HEVC_FILT_8TAP_SH(src32_l, src54_l, src76_l, src98_l,
+                                       filt0, filt1, filt2, filt3);
+            out3_l = HEVC_FILT_8TAP_SH(src43_l, src65_l, src87_l, src109_l,
+                                       filt0, filt1, filt2, filt3);
             SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
             SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 6);
             SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
@@ -1301,37 +1346,34 @@
                                   int32_t height)
 {
     uint32_t loop_cnt;
+    v16u8 out0, out1;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src9, src10, src11, src12, src13, src14;
     v8i16 filt0, filt1, filt2, filt3;
-    v4i32 filt_h0, filt_h1, filt_h2, filt_h3;
+    v8i16 filt_h0, filt_h1, filt_h2, filt_h3;
     v16i8 mask1, mask2, mask3;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
-    v8i16 dst30, dst41, dst52, dst63, dst66, dst87;
-    v4i32 dst0_r, dst1_r;
-    v8i16 dst10_r, dst32_r, dst54_r, dst76_r;
-    v8i16 dst21_r, dst43_r, dst65_r, dst87_r;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
-    v8i16 mask4 = { 0, 4, 1, 5, 2, 6, 3, 7 };
+    v8i16 dst30, dst41, dst52, dst63, dst66, dst117, dst128, dst139, dst1410;
+    v8i16 dst10_r, dst32_r, dst54_r, dst76_r, dst98_r, dst1110_r, dst1312_r;
+    v8i16 dst21_r, dst43_r, dst65_r, dst87_r, dst109_r, dst1211_r, dst1413_r;
+    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r, dst6_r, dst7_r;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
 
     src -= ((3 * src_stride) + 3);
     filter_vec = LD_SH(filter_x);
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W4_SW(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
+    SPLATI_W4_SH(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
 
     mask1 = mask0 + 2;
     mask2 = mask0 + 4;
     mask3 = mask0 + 6;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
     LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
     src += (7 * src_stride);
     XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
@@ -1343,61 +1385,91 @@
     VSHF_B4_SB(src3, src6, mask0, mask1, mask2, mask3,
                vec12, vec13, vec14, vec15);
 
-    dst30 = const_vec;
-    DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                 dst30, dst30, dst30, dst30);
-    dst41 = const_vec;
-    DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3,
-                 dst41, dst41, dst41, dst41);
-    dst52 = const_vec;
-    DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3,
-                 dst52, dst52, dst52, dst52);
-    dst63 = const_vec;
-    DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2, filt3,
-                 dst63, dst63, dst63, dst63);
+    dst30 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                              filt3);
+    dst41 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                              filt3);
+    dst52 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                              filt3);
+    dst63 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2,
+                              filt3);
 
-    ILVR_H3_SH(dst41, dst30, dst52, dst41, dst63, dst52,
-               dst10_r, dst21_r, dst32_r);
-    dst43_r = __msa_ilvl_h(dst41, dst30);
-    dst54_r = __msa_ilvl_h(dst52, dst41);
-    dst65_r = __msa_ilvl_h(dst63, dst52);
+    ILVRL_H2_SH(dst41, dst30, dst10_r, dst43_r);
+    ILVRL_H2_SH(dst52, dst41, dst21_r, dst54_r);
+    ILVRL_H2_SH(dst63, dst52, dst32_r, dst65_r);
+
     dst66 = (v8i16) __msa_splati_d((v2i64) dst63, 1);
 
-    for (loop_cnt = height >> 1; loop_cnt--;) {
-        LD_SB2(src, src_stride, src7, src8);
-        src += 2 * src_stride;
-        XORI_B2_128_SB(src7, src8);
+    for (loop_cnt = height >> 3; loop_cnt--;) {
+        LD_SB8(src, src_stride, src7, src8, src9, src10, src11, src12, src13,
+               src14);
+        src += (8 * src_stride);
+        XORI_B8_128_SB(src7, src8, src9, src10, src11, src12, src13, src14);
 
-        VSHF_B4_SB(src7, src8, mask0, mask1, mask2, mask3,
+        VSHF_B4_SB(src7, src11, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst87 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst87, dst87, dst87, dst87);
+        VSHF_B4_SB(src8, src12, mask0, mask1, mask2, mask3,
+                   vec4, vec5, vec6, vec7);
+        VSHF_B4_SB(src9, src13, mask0, mask1, mask2, mask3,
+                   vec8, vec9, vec10, vec11);
+        VSHF_B4_SB(src10, src14, mask0, mask1, mask2, mask3,
+                   vec12, vec13, vec14, vec15);
 
-        dst76_r = __msa_ilvr_h(dst87, dst66);
-        dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r,
-                                filt_h0, filt_h1, filt_h2, filt_h3);
-        dst87_r = __msa_vshf_h(mask4, dst87, dst87);
-        dst1_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r,
+        dst117 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                   filt3);
+        dst128 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                   filt3);
+        dst139 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1,
+                                   filt2, filt3);
+        dst1410 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1,
+                                   filt2, filt3);
+
+        dst76_r = __msa_ilvr_h(dst117, dst66);
+        ILVRL_H2_SH(dst128, dst117, dst87_r, dst1211_r);
+        ILVRL_H2_SH(dst139, dst128, dst98_r, dst1312_r);
+        ILVRL_H2_SH(dst1410, dst139, dst109_r, dst1413_r);
+        dst117 = (v8i16) __msa_splati_d((v2i64) dst117, 1);
+        dst1110_r = __msa_ilvr_h(dst117, dst1410);
+
+        dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst1_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst2_r = HEVC_FILT_8TAP(dst32_r, dst54_r, dst76_r, dst98_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst3_r = HEVC_FILT_8TAP(dst43_r, dst65_r, dst87_r, dst109_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst4_r = HEVC_FILT_8TAP(dst54_r, dst76_r, dst98_r, dst1110_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst5_r = HEVC_FILT_8TAP(dst65_r, dst87_r, dst109_r, dst1211_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst6_r = HEVC_FILT_8TAP(dst76_r, dst98_r, dst1110_r, dst1312_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst7_r = HEVC_FILT_8TAP(dst87_r, dst109_r, dst1211_r, dst1413_r,
                                 filt_h0, filt_h1, filt_h2, filt_h3);
 
-        dst0_r >>= 6;
-        dst1_r >>= 6;
-        SRARI_W2_SW(dst0_r, dst1_r, 6);
-        dst0_r = CLIP_SW_0_255(dst0_r);
-        dst1_r = CLIP_SW_0_255(dst1_r);
+        SRA_4V(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+        SRA_4V(dst4_r, dst5_r, dst6_r, dst7_r, 6);
+        SRARI_W4_SW(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+        SRARI_W4_SW(dst4_r, dst5_r, dst6_r, dst7_r, 6);
+        SAT_SW4_SW(dst0_r, dst1_r, dst2_r, dst3_r, 7);
+        SAT_SW4_SW(dst4_r, dst5_r, dst6_r, dst7_r, 7);
+        PCKEV_H2_SW(dst1_r, dst0_r, dst3_r, dst2_r, dst0_r, dst1_r);
+        PCKEV_H2_SW(dst5_r, dst4_r, dst7_r, dst6_r, dst4_r, dst5_r);
+        out0 = PCKEV_XORI128_UB(dst0_r, dst1_r);
+        out1 = PCKEV_XORI128_UB(dst4_r, dst5_r);
+        ST4x4_UB(out0, out0, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+        ST4x4_UB(out1, out1, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
 
-        HEVC_PCK_SW_SB2(dst1_r, dst0_r, dst0_r);
-        ST4x2_UB(dst0_r, dst, dst_stride);
-        dst += (2 * dst_stride);
-
-        dst10_r = dst32_r;
-        dst32_r = dst54_r;
-        dst54_r = dst76_r;
-        dst21_r = dst43_r;
-        dst43_r = dst65_r;
-        dst65_r = dst87_r;
-        dst66 = (v8i16) __msa_splati_d((v2i64) dst87, 1);
+        dst10_r = dst98_r;
+        dst32_r = dst1110_r;
+        dst54_r = dst1312_r;
+        dst21_r = dst109_r;
+        dst43_r = dst1211_r;
+        dst65_r = dst1413_r;
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst1410, 1);
     }
 }
 
@@ -1412,11 +1484,12 @@
     uint32_t loop_cnt, cnt;
     uint8_t *src_tmp;
     uint8_t *dst_tmp;
+    v16u8 out;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
     v8i16 filt0, filt1, filt2, filt3;
-    v4i32 filt_h0, filt_h1, filt_h2, filt_h3;
+    v8i16 filt_h0, filt_h1, filt_h2, filt_h3;
     v16i8 mask1, mask2, mask3;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8;
@@ -1425,20 +1498,17 @@
     v8i16 dst10_l, dst32_l, dst54_l, dst76_l;
     v8i16 dst21_r, dst43_r, dst65_r, dst87_r;
     v8i16 dst21_l, dst43_l, dst65_l, dst87_l;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
 
     src -= ((3 * src_stride) + 3);
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
 
     filter_vec = LD_SH(filter_x);
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W4_SW(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
+    SPLATI_W4_SH(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
 
     mask1 = mask0 + 2;
     mask2 = mask0 + 4;
@@ -1461,18 +1531,14 @@
                    vec8, vec9, vec10, vec11);
         VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3,
                    vec12, vec13, vec14, vec15);
-        dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
-        dst1 = const_vec;
-        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
-        dst2 = const_vec;
-        DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
-        dst3 = const_vec;
-        DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
+        dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        dst1 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                 filt3);
+        dst2 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                                 filt3);
+        dst3 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1,
+                                 filt2, filt3);
 
         VSHF_B4_SB(src4, src4, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
@@ -1480,33 +1546,29 @@
                    vec4, vec5, vec6, vec7);
         VSHF_B4_SB(src6, src6, mask0, mask1, mask2, mask3,
                    vec8, vec9, vec10, vec11);
-        dst4 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst4, dst4, dst4, dst4);
-        dst5 = const_vec;
-        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3,
-                     dst5, dst5, dst5, dst5);
-        dst6 = const_vec;
-        DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3,
-                     dst6, dst6, dst6, dst6);
-
-        ILVR_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst2, dst1,
-                   dst10_r, dst32_r, dst54_r, dst21_r);
-        ILVR_H2_SH(dst4, dst3, dst6, dst5, dst43_r, dst65_r);
-        ILVL_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst2, dst1,
-                   dst10_l, dst32_l, dst54_l, dst21_l);
-        ILVL_H2_SH(dst4, dst3, dst6, dst5, dst43_l, dst65_l);
+        dst4 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        dst5 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                 filt3);
+        dst6 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                                 filt3);
 
         for (loop_cnt = height >> 1; loop_cnt--;) {
             LD_SB2(src_tmp, src_stride, src7, src8);
             XORI_B2_128_SB(src7, src8);
             src_tmp += 2 * src_stride;
 
+            ILVR_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst2, dst1,
+                       dst10_r, dst32_r, dst54_r, dst21_r);
+            ILVL_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst2, dst1,
+                       dst10_l, dst32_l, dst54_l, dst21_l);
+            ILVR_H2_SH(dst4, dst3, dst6, dst5, dst43_r, dst65_r);
+            ILVL_H2_SH(dst4, dst3, dst6, dst5, dst43_l, dst65_l);
+
             VSHF_B4_SB(src7, src7, mask0, mask1, mask2, mask3,
                        vec0, vec1, vec2, vec3);
-            dst7 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst7, dst7, dst7, dst7);
+            dst7 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1,
+                                     filt2, filt3);
 
             ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
             dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r,
@@ -1518,9 +1580,8 @@
 
             VSHF_B4_SB(src8, src8, mask0, mask1, mask2, mask3,
                        vec0, vec1, vec2, vec3);
-            dst8 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst8, dst8, dst8, dst8);
+            dst8 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1,
+                                     filt2, filt3);
 
             ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l);
             dst1_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r,
@@ -1530,27 +1591,19 @@
             dst1_r >>= 6;
             dst1_l >>= 6;
             SRARI_W4_SW(dst0_r, dst0_l, dst1_r, dst1_l, 6);
-            dst0_r = CLIP_SW_0_255(dst0_r);
-            dst0_l = CLIP_SW_0_255(dst0_l);
-            dst1_r = CLIP_SW_0_255(dst1_r);
-            dst1_l = CLIP_SW_0_255(dst1_l);
+            SAT_SW4_SW(dst0_r, dst0_l, dst1_r, dst1_l, 7);
 
-            HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-            ST8x2_UB(dst0_r, dst_tmp, dst_stride);
+            PCKEV_H2_SH(dst0_l, dst0_r, dst1_l, dst1_r, dst0, dst1);
+            out = PCKEV_XORI128_UB(dst0, dst1);
+            ST8x2_UB(out, dst_tmp, dst_stride);
             dst_tmp += (2 * dst_stride);
 
-            dst10_r = dst32_r;
-            dst32_r = dst54_r;
-            dst54_r = dst76_r;
-            dst10_l = dst32_l;
-            dst32_l = dst54_l;
-            dst54_l = dst76_l;
-            dst21_r = dst43_r;
-            dst43_r = dst65_r;
-            dst65_r = dst87_r;
-            dst21_l = dst43_l;
-            dst43_l = dst65_l;
-            dst65_l = dst87_l;
+            dst0 = dst2;
+            dst1 = dst3;
+            dst2 = dst4;
+            dst3 = dst5;
+            dst4 = dst6;
+            dst5 = dst7;
             dst6 = dst8;
         }
 
@@ -1579,11 +1632,232 @@
                                    const int8_t *filter_y,
                                    int32_t height)
 {
-    hevc_hv_uni_8t_8multx2mult_msa(src, src_stride, dst, dst_stride,
-                                   filter_x, filter_y, height, 8);
+    uint32_t loop_cnt;
+    uint8_t *src_tmp, *dst_tmp;
+    v16u8 out0, out1;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 src11, src12, src13, src14;
+    v16i8 mask0, mask1, mask2, mask3, mask4, mask5, mask6, mask7;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8;
+    v8i16 dst30, dst41, dst52, dst63, dst66, dst117, dst128, dst139, dst1410;
+    v8i16 filt0, filt1, filt2, filt3, filt_h0, filt_h1, filt_h2, filt_h3;
+    v8i16 dst10_r, dst32_r, dst54_r, dst76_r, dst21_r, dst43_r, dst65_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst76_l, dst21_l, dst43_l, dst65_l;
+    v8i16 dst87_r, dst98_r, dst1110_r, dst1312_r, dst109_r, dst1211_r;
+    v8i16 dst1413_r, dst87_l, filter_vec;
+    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r, dst6_r, dst7_r;
+    v4i32 dst0_l, dst1_l;
 
-    hevc_hv_uni_8t_4w_msa(src + 8, src_stride, dst + 8, dst_stride,
-                          filter_x, filter_y, height);
+    src -= ((3 * src_stride) + 3);
+
+    filter_vec = LD_SH(filter_x);
+    SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    filter_vec = LD_SH(filter_y);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W4_SH(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    src_tmp = src;
+    dst_tmp = dst;
+
+    LD_SB7(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    src_tmp += (7 * src_stride);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+
+    /* row 0 row 1 row 2 row 3 */
+    VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3, vec0, vec1, vec2, vec3);
+    VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3, vec4, vec5, vec6, vec7);
+    VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3, vec8, vec9, vec10,
+               vec11);
+    VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3, vec12, vec13, vec14,
+               vec15);
+    dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                             filt3);
+    dst1 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                             filt3);
+    dst2 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                             filt3);
+    dst3 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1,
+                             filt2, filt3);
+
+    VSHF_B4_SB(src4, src4, mask0, mask1, mask2, mask3, vec0, vec1, vec2, vec3);
+    VSHF_B4_SB(src5, src5, mask0, mask1, mask2, mask3, vec4, vec5, vec6, vec7);
+    VSHF_B4_SB(src6, src6, mask0, mask1, mask2, mask3, vec8, vec9, vec10,
+               vec11);
+    dst4 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                             filt3);
+    dst5 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                             filt3);
+    dst6 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                             filt3);
+
+    for (loop_cnt = 8; loop_cnt--;) {
+        LD_SB2(src_tmp, src_stride, src7, src8);
+        XORI_B2_128_SB(src7, src8);
+        src_tmp += 2 * src_stride;
+
+        ILVR_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst2, dst1, dst10_r,
+                   dst32_r, dst54_r, dst21_r);
+        ILVL_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst2, dst1, dst10_l,
+                   dst32_l, dst54_l, dst21_l);
+        ILVR_H2_SH(dst4, dst3, dst6, dst5, dst43_r, dst65_r);
+        ILVL_H2_SH(dst4, dst3, dst6, dst5, dst43_l, dst65_l);
+
+        VSHF_B4_SB(src7, src7, mask0, mask1, mask2, mask3, vec0, vec1, vec2,
+                   vec3);
+        dst7 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+
+        ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
+        dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r,
+                                filt_h0, filt_h1, filt_h2, filt_h3);
+        dst0_l = HEVC_FILT_8TAP(dst10_l, dst32_l, dst54_l, dst76_l,
+                                filt_h0, filt_h1, filt_h2, filt_h3);
+        dst0_r >>= 6;
+        dst0_l >>= 6;
+
+        VSHF_B4_SB(src8, src8, mask0, mask1, mask2, mask3, vec0, vec1, vec2,
+                   vec3);
+        dst8 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+
+        ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l);
+        dst1_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r,
+                                filt_h0, filt_h1, filt_h2, filt_h3);
+        dst1_l = HEVC_FILT_8TAP(dst21_l, dst43_l, dst65_l, dst87_l,
+                                filt_h0, filt_h1, filt_h2, filt_h3);
+        dst1_r >>= 6;
+        dst1_l >>= 6;
+        SRARI_W4_SW(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+        SAT_SW4_SW(dst0_r, dst0_l, dst1_r, dst1_l, 7);
+
+        PCKEV_H2_SH(dst0_l, dst0_r, dst1_l, dst1_r, dst0, dst1);
+        out0 = PCKEV_XORI128_UB(dst0, dst1);
+        ST8x2_UB(out0, dst_tmp, dst_stride);
+        dst_tmp += (2 * dst_stride);
+
+        dst0 = dst2;
+        dst1 = dst3;
+        dst2 = dst4;
+        dst3 = dst5;
+        dst4 = dst6;
+        dst5 = dst7;
+        dst6 = dst8;
+    }
+
+    src += 8;
+    dst += 8;
+
+    mask4 = LD_SB(ff_hevc_mask_arr + 16);
+    mask5 = mask4 + 2;
+    mask6 = mask4 + 4;
+    mask7 = mask4 + 6;
+
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    src += (7 * src_stride);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+
+    VSHF_B4_SB(src0, src3, mask4, mask5, mask6, mask7, vec0, vec1, vec2, vec3);
+    VSHF_B4_SB(src1, src4, mask4, mask5, mask6, mask7, vec4, vec5, vec6, vec7);
+    VSHF_B4_SB(src2, src5, mask4, mask5, mask6, mask7, vec8, vec9, vec10,
+               vec11);
+    VSHF_B4_SB(src3, src6, mask4, mask5, mask6, mask7, vec12, vec13, vec14,
+               vec15);
+
+    dst30 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                              filt3);
+    dst41 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                              filt3);
+    dst52 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                              filt3);
+    dst63 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2,
+                              filt3);
+
+    ILVRL_H2_SH(dst41, dst30, dst10_r, dst43_r);
+    ILVRL_H2_SH(dst52, dst41, dst21_r, dst54_r);
+    ILVRL_H2_SH(dst63, dst52, dst32_r, dst65_r);
+
+    dst66 = (v8i16) __msa_splati_d((v2i64) dst63, 1);
+
+    for (loop_cnt = 2; loop_cnt--;) {
+        LD_SB8(src, src_stride, src7, src8, src9, src10, src11, src12, src13,
+               src14);
+        src += (8 * src_stride);
+        XORI_B8_128_SB(src7, src8, src9, src10, src11, src12, src13, src14);
+
+        VSHF_B4_SB(src7, src11, mask4, mask5, mask6, mask7, vec0, vec1, vec2,
+                   vec3);
+        VSHF_B4_SB(src8, src12, mask4, mask5, mask6, mask7, vec4, vec5, vec6,
+                   vec7);
+        VSHF_B4_SB(src9, src13, mask4, mask5, mask6, mask7, vec8, vec9, vec10,
+                   vec11);
+        VSHF_B4_SB(src10, src14, mask4, mask5, mask6, mask7, vec12, vec13,
+                   vec14, vec15);
+
+        dst117 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                   filt3);
+        dst128 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                   filt3);
+        dst139 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1,
+                                   filt2, filt3);
+        dst1410 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1,
+                                   filt2, filt3);
+
+        dst76_r = __msa_ilvr_h(dst117, dst66);
+        ILVRL_H2_SH(dst128, dst117, dst87_r, dst1211_r);
+        ILVRL_H2_SH(dst139, dst128, dst98_r, dst1312_r);
+        ILVRL_H2_SH(dst1410, dst139, dst109_r, dst1413_r);
+        dst117 = (v8i16) __msa_splati_d((v2i64) dst117, 1);
+        dst1110_r = __msa_ilvr_h(dst117, dst1410);
+
+        dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst1_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst2_r = HEVC_FILT_8TAP(dst32_r, dst54_r, dst76_r, dst98_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst3_r = HEVC_FILT_8TAP(dst43_r, dst65_r, dst87_r, dst109_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst4_r = HEVC_FILT_8TAP(dst54_r, dst76_r, dst98_r, dst1110_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst5_r = HEVC_FILT_8TAP(dst65_r, dst87_r, dst109_r, dst1211_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst6_r = HEVC_FILT_8TAP(dst76_r, dst98_r, dst1110_r, dst1312_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst7_r = HEVC_FILT_8TAP(dst87_r, dst109_r, dst1211_r, dst1413_r,
+                                filt_h0, filt_h1, filt_h2, filt_h3);
+
+        SRA_4V(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+        SRA_4V(dst4_r, dst5_r, dst6_r, dst7_r, 6);
+        SRARI_W4_SW(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+        SRARI_W4_SW(dst4_r, dst5_r, dst6_r, dst7_r, 6);
+        SAT_SW4_SW(dst0_r, dst1_r, dst2_r, dst3_r, 7);
+        SAT_SW4_SW(dst4_r, dst5_r, dst6_r, dst7_r, 7);
+        PCKEV_H2_SW(dst1_r, dst0_r, dst3_r, dst2_r, dst0_r, dst1_r);
+        PCKEV_H2_SW(dst5_r, dst4_r, dst7_r, dst6_r, dst4_r, dst5_r);
+        out0 = PCKEV_XORI128_UB(dst0_r, dst1_r);
+        out1 = PCKEV_XORI128_UB(dst4_r, dst5_r);
+        ST4x4_UB(out0, out0, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+        ST4x4_UB(out1, out1, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        dst10_r = dst98_r;
+        dst32_r = dst1110_r;
+        dst54_r = dst1312_r;
+        dst21_r = dst109_r;
+        dst43_r = dst1211_r;
+        dst65_r = dst1413_r;
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst1410, 1);
+    }
 }
 
 static void hevc_hv_uni_8t_16w_msa(uint8_t *src,
@@ -1654,7 +1928,7 @@
     v16u8 out;
     v8i16 filt, res0;
 
-    mask0 = LD_SB(&mc_filt_mask_arr[16]);
+    mask0 = LD_SB(&ff_hevc_mask_arr[16]);
     src -= 1;
 
     /* rearranging filter */
@@ -1666,7 +1940,7 @@
     LD_SB2(src, src_stride, src0, src1);
     XORI_B2_128_SB(src0, src1);
     VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
-    res0 = FILT_4TAP_DPADD_S_H(vec0, vec1, filt0, filt1);
+    res0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
     res0 = __msa_srari_h(res0, 6);
     res0 = __msa_sat_s_h(res0, 7);
     out = PCKEV_XORI128_UB(res0, res0);
@@ -1681,7 +1955,7 @@
     v8i16 filt, out0, out1;
     v16u8 out;
 
-    mask0 = LD_SB(&mc_filt_mask_arr[16]);
+    mask0 = LD_SB(&ff_hevc_mask_arr[16]);
     src -= 1;
 
     /* rearranging filter */
@@ -1708,7 +1982,7 @@
     v16u8 out;
     v8i16 filt, out0, out1, out2, out3;
 
-    mask0 = LD_SB(&mc_filt_mask_arr[16]);
+    mask0 = LD_SB(&ff_hevc_mask_arr[16]);
     src -= 1;
 
     /* rearranging filter */
@@ -1745,7 +2019,7 @@
     v16u8 out;
     v8i16 filt, out0, out1, out2, out3;
 
-    mask0 = LD_SB(&mc_filt_mask_arr[16]);
+    mask0 = LD_SB(&ff_hevc_mask_arr[16]);
     src -= 1;
 
     /* rearranging filter */
@@ -1805,12 +2079,11 @@
                                 uint8_t *dst, int32_t dst_stride,
                                 const int8_t *filter, int32_t height)
 {
-    uint32_t loop_cnt;
     v16i8 src0, src1, src2, src3, filt0, filt1, mask0, mask1;
     v16u8 out4, out5;
     v8i16 filt, out0, out1, out2, out3;
 
-    mask0 = LD_SB(&mc_filt_mask_arr[0]);
+    mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     src -= 1;
 
     /* rearranging filter */
@@ -1819,21 +2092,31 @@
 
     mask1 = mask0 + 2;
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB4(src, src_stride, src0, src1, src2, src3);
-        src += (4 * src_stride);
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    src += (4 * src_stride);
 
-        XORI_B4_128_SB(src0, src1, src2, src3);
-        HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, filt0,
-                                   filt1, out0, out1, out2, out3);
-        SRARI_H4_SH(out0, out1, out2, out3, 6);
-        SAT_SH4_SH(out0, out1, out2, out3, 7);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, filt0,
+                               filt1, out0, out1, out2, out3);
+    SRARI_H4_SH(out0, out1, out2, out3, 6);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    out4 = PCKEV_XORI128_UB(out0, out1);
+    out5 = PCKEV_XORI128_UB(out2, out3);
+    ST6x4_UB(out4, out5, dst, dst_stride);
+    dst += (4 * dst_stride);
 
-        out4 = PCKEV_XORI128_UB(out0, out1);
-        out5 = PCKEV_XORI128_UB(out2, out3);
-        ST6x4_UB(out4, out5, dst, dst_stride);
-        dst += (4 * dst_stride);
-    }
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    src += (4 * src_stride);
+
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, filt0,
+                               filt1, out0, out1, out2, out3);
+    SRARI_H4_SH(out0, out1, out2, out3, 6);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    out4 = PCKEV_XORI128_UB(out0, out1);
+    out5 = PCKEV_XORI128_UB(out2, out3);
+    ST6x4_UB(out4, out5, dst, dst_stride);
+    dst += (4 * dst_stride);
 }
 
 static void common_hz_4t_8x2mult_msa(uint8_t *src, int32_t src_stride,
@@ -1845,7 +2128,7 @@
     v16u8 out;
     v8i16 filt, vec0, vec1, vec2, vec3;
 
-    mask0 = LD_SB(&mc_filt_mask_arr[0]);
+    mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     src -= 1;
 
     filt = LD_SH(filter);
@@ -1879,7 +2162,7 @@
     v16u8 tmp0, tmp1;
     v8i16 filt, out0, out1, out2, out3;
 
-    mask0 = LD_SB(&mc_filt_mask_arr[0]);
+    mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     src -= 1;
 
     /* rearranging filter */
@@ -1928,8 +2211,8 @@
     v16u8 tmp0, tmp1;
     v8i16 filt, out0, out1, out2, out3, out4, out5;
 
-    mask0 = LD_SB(&mc_filt_mask_arr[0]);
-    mask2 = LD_SB(&mc_filt_mask_arr[32]);
+    mask0 = LD_SB(&ff_hevc_mask_arr[0]);
+    mask2 = LD_SB(&ff_hevc_mask_arr[32]);
 
     src -= 1;
 
@@ -1940,32 +2223,33 @@
     mask1 = mask0 + 2;
     mask3 = mask2 + 2;
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
+    for (loop_cnt = 4; loop_cnt--;) {
         LD_SB4(src, src_stride, src0, src1, src2, src3);
         src += (4 * src_stride);
 
         XORI_B4_128_SB(src0, src1, src2, src3);
+        VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec0, vec1);
+        DOTP_SB2_SH(vec0, vec1, filt0, filt0, out0, out1);
+        VSHF_B2_SB(src0, src1, src2, src3, mask3, mask3, vec2, vec3);
+        DPADD_SB2_SH(vec2, vec3, filt1, filt1, out0, out1);
+        SRARI_H2_SH(out0, out1, 6);
+        SAT_SH2_SH(out0, out1, 7);
+        tmp0 = PCKEV_XORI128_UB(out0, out1);
+        ST4x4_UB(tmp0, tmp0, 0, 1, 2, 3, dst + 8, dst_stride);
+
         VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec4, vec5);
         VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec6, vec7);
-        VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec0, vec1);
         DOTP_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
                     out2, out3, out4, out5);
-        DOTP_SB2_SH(vec0, vec1, filt0, filt0, out0, out1);
         VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec8, vec9);
         VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec10, vec11);
-        VSHF_B2_SB(src0, src1, src2, src3, mask3, mask3, vec2, vec3);
         DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt1, filt1, filt1, filt1,
                      out2, out3, out4, out5);
-        DPADD_SB2_SH(vec2, vec3, filt1, filt1, out0, out1);
-        SRARI_H4_SH(out0, out1, out2, out3, 6);
-        SRARI_H2_SH(out4, out5, 6);
-        SAT_SH4_SH(out0, out1, out2, out3, 7);
-        SAT_SH2_SH(out4, out5, 7);
+        SRARI_H4_SH(out2, out3, out4, out5, 6);
+        SAT_SH4_SH(out2, out3, out4, out5, 7);
         tmp0 = PCKEV_XORI128_UB(out2, out3);
         tmp1 = PCKEV_XORI128_UB(out4, out5);
         ST8x4_UB(tmp0, tmp1, dst, dst_stride);
-        tmp0 = PCKEV_XORI128_UB(out0, out1);
-        ST4x4_UB(tmp0, tmp0, 0, 1, 2, 3, dst + 8, dst_stride);
         dst += (4 * dst_stride);
     }
 }
@@ -1977,10 +2261,11 @@
     uint32_t loop_cnt;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
     v16i8 filt0, filt1, mask0, mask1;
+    v16i8 vec0_m, vec1_m, vec2_m, vec3_m;
     v8i16 filt, out0, out1, out2, out3, out4, out5, out6, out7;
     v16u8 out;
 
-    mask0 = LD_SB(&mc_filt_mask_arr[0]);
+    mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     src -= 1;
 
     /* rearranging filter */
@@ -1995,20 +2280,34 @@
         src += (4 * src_stride);
 
         XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
-        HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, filt0,
-                                   filt1, out0, out1, out2, out3);
-        HORIZ_4TAP_8WID_4VECS_FILT(src4, src5, src6, src7, mask0, mask1, filt0,
-                                   filt1, out4, out5, out6, out7);
+
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m);
+        DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,
+                    out0, out1, out2, out3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0_m, vec1_m);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2_m, vec3_m);
+        DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1,
+                     out0, out1, out2, out3);
         SRARI_H4_SH(out0, out1, out2, out3, 6);
-        SRARI_H4_SH(out4, out5, out6, out7, 6);
         SAT_SH4_SH(out0, out1, out2, out3, 7);
-        SAT_SH4_SH(out4, out5, out6, out7, 7);
         out = PCKEV_XORI128_UB(out0, out1);
         ST_UB(out, dst);
         dst += dst_stride;
         out = PCKEV_XORI128_UB(out2, out3);
         ST_UB(out, dst);
         dst += dst_stride;
+
+        VSHF_B2_SB(src4, src4, src5, src5, mask0, mask0, vec0_m, vec1_m);
+        VSHF_B2_SB(src6, src6, src7, src7, mask0, mask0, vec2_m, vec3_m);
+        DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,
+                    out4, out5, out6, out7);
+        VSHF_B2_SB(src4, src4, src5, src5, mask1, mask1, vec0_m, vec1_m);
+        VSHF_B2_SB(src6, src6, src7, src7, mask1, mask1, vec2_m, vec3_m);
+        DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1,
+                     out4, out5, out6, out7);
+        SRARI_H4_SH(out4, out5, out6, out7, 6);
+        SAT_SH4_SH(out4, out5, out6, out7, 7);
         out = PCKEV_XORI128_UB(out4, out5);
         ST_UB(out, dst);
         dst += dst_stride;
@@ -2030,7 +2329,7 @@
     v8i16 filt, out0, out1, out2, out3;
     v16u8 tmp0, tmp1;
 
-    mask0 = LD_SB(&mc_filt_mask_arr[0]);
+    mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     src -= 1;
 
     /* rearranging filter */
@@ -2041,7 +2340,7 @@
     mask00 = mask0 + 8;
     mask11 = mask0 + 10;
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
+    for (loop_cnt = 8; loop_cnt--;) {
         LD_SB4(src, src_stride, src0, src2, src4, src6);
         LD_SB4(src + 16, src_stride, src1, src3, src5, src7);
         src += (4 * src_stride);
@@ -2109,9 +2408,10 @@
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
     v16i8 filt0, filt1, mask0, mask1;
     v16u8 out;
+    v16i8 vec0_m, vec1_m, vec2_m, vec3_m;
     v8i16 filt, out0, out1, out2, out3, out4, out5, out6, out7;
 
-    mask0 = LD_SB(&mc_filt_mask_arr[0]);
+    mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     src -= 1;
 
     /* rearranging filter */
@@ -2122,20 +2422,35 @@
 
     for (loop_cnt = (height >> 1); loop_cnt--;) {
         src0 = LD_SB(src);
+        src1 = LD_SB(src + 8);
         src2 = LD_SB(src + 16);
         src3 = LD_SB(src + 24);
         src += src_stride;
         src4 = LD_SB(src);
+        src5 = LD_SB(src + 8);
         src6 = LD_SB(src + 16);
         src7 = LD_SB(src + 24);
-        SLDI_B2_SB(src2, src6, src0, src4, src1, src5, 8);
         src += src_stride;
 
         XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
-        HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,
-                                   filt0, filt1, out0, out1, out2, out3);
-        HORIZ_4TAP_8WID_4VECS_FILT(src4, src5, src6, src7, mask0, mask1,
-                                   filt0, filt1, out4, out5, out6, out7);
+
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m);
+        DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,
+                    out0, out1, out2, out3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0_m, vec1_m);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2_m, vec3_m);
+        DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1,
+                     out0, out1, out2, out3);
+
+        VSHF_B2_SB(src4, src4, src5, src5, mask0, mask0, vec0_m, vec1_m);
+        VSHF_B2_SB(src6, src6, src7, src7, mask0, mask0, vec2_m, vec3_m);
+        DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,
+                    out4, out5, out6, out7);
+        VSHF_B2_SB(src4, src4, src5, src5, mask1, mask1, vec0_m, vec1_m);
+        VSHF_B2_SB(src6, src6, src7, src7, mask1, mask1, vec2_m, vec3_m);
+        DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1,
+                     out4, out5, out6, out7);
         SRARI_H4_SH(out0, out1, out2, out3, 6);
         SRARI_H4_SH(out4, out5, out6, out7, 6);
         SAT_SH4_SH(out0, out1, out2, out3, 7);
@@ -2177,7 +2492,7 @@
     ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
     src4332 = (v16i8) __msa_ilvr_d((v2i64) src43_r, (v2i64) src32_r);
     src4332 = (v16i8) __msa_xori_b((v16u8) src4332, 128);
-    out10 = FILT_4TAP_DPADD_S_H(src2110, src4332, filt0, filt1);
+    out10 = HEVC_FILT_4TAP_SH(src2110, src4332, filt0, filt1);
     out10 = __msa_srari_h(out10, 6);
     out10 = __msa_sat_s_h(out10, 7);
     out = PCKEV_XORI128_UB(out10, out10);
@@ -2214,14 +2529,14 @@
         ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
         src4332 = (v16i8) __msa_ilvr_d((v2i64) src43_r, (v2i64) src32_r);
         src4332 = (v16i8) __msa_xori_b((v16u8) src4332, 128);
-        out10 = FILT_4TAP_DPADD_S_H(src2110, src4332, filt0, filt1);
+        out10 = HEVC_FILT_4TAP_SH(src2110, src4332, filt0, filt1);
 
         src2 = LD_SB(src);
         src += (src_stride);
         ILVR_B2_SB(src5, src4, src2, src5, src54_r, src65_r);
         src2110 = (v16i8) __msa_ilvr_d((v2i64) src65_r, (v2i64) src54_r);
         src2110 = (v16i8) __msa_xori_b((v16u8) src2110, 128);
-        out32 = FILT_4TAP_DPADD_S_H(src4332, src2110, filt0, filt1);
+        out32 = HEVC_FILT_4TAP_SH(src4332, src2110, filt0, filt1);
         SRARI_H2_SH(out10, out32, 6);
         SAT_SH2_SH(out10, out32, 7);
         out = PCKEV_XORI128_UB(out10, out32);
@@ -2246,51 +2561,65 @@
                                 uint8_t *dst, int32_t dst_stride,
                                 const int8_t *filter, int32_t height)
 {
-    uint32_t loop_cnt;
-    v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, out0, out1;
-    v8i16 vec01, vec12, vec23, vec30, tmp0, tmp1, tmp2, tmp3;
-    v8i16 filt, filt0, filt1;
+    v16u8 out0, out1;
+    v16i8 src0, src1, src2, src3, src4, src5, src6;
+    v16i8 src10_r, src32_r, src21_r, src43_r, src54_r, src65_r;
+    v8i16 dst0_r, dst1_r, dst2_r, dst3_r, filt0, filt1, filter_vec;
 
     src -= src_stride;
 
-    /* rearranging filter_y */
-    filt = LD_SH(filter);
-    SPLATI_H2_SH(filt, 0, 1, filt0, filt1);
+    filter_vec = LD_SH(filter);
+    SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
-    LD_UB3(src, src_stride, src0, src1, src2);
+    LD_SB3(src, src_stride, src0, src1, src2);
     src += (3 * src_stride);
+    XORI_B3_128_SB(src0, src1, src2);
+    ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
 
-    vec0 = (v16u8) __msa_xori_b((v16u8) src0, 128);
-    vec1 = (v16u8) __msa_xori_b((v16u8) src1, 128);
-    vec2 = (v16u8) __msa_xori_b((v16u8) src2, 128);
+    LD_SB2(src, src_stride, src3, src4);
+    src += (2 * src_stride);
+    XORI_B2_128_SB(src3, src4);
+    ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_UB4(src, src_stride, src3, src0, src1, src2);
-        src += (4 * src_stride);
+    dst0_r = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+    dst1_r = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
 
-        vec3 = (v16u8) __msa_xori_b((v16u8) src3, 128);
-        ILVR_B2_SH(vec1, vec0, vec3, vec2, vec01, vec23);
-        tmp0 = FILT_4TAP_DPADD_S_H(vec01, vec23, filt0, filt1);
+    LD_SB2(src, src_stride, src5, src6);
+    src += (2 * src_stride);
+    XORI_B2_128_SB(src5, src6);
+    ILVR_B2_SB(src5, src4, src6, src5, src54_r, src65_r);
 
-        vec0 = __msa_xori_b((v16u8) src0, 128);
-        ILVR_B2_SH(vec2, vec1, vec0, vec3, vec12, vec30);
-        tmp1 = FILT_4TAP_DPADD_S_H(vec12, vec30, filt0, filt1);
+    dst2_r = HEVC_FILT_4TAP_SH(src32_r, src54_r, filt0, filt1);
+    dst3_r = HEVC_FILT_4TAP_SH(src43_r, src65_r, filt0, filt1);
 
-        vec1 = __msa_xori_b((v16u8) src1, 128);
-        vec01 = (v8i16) __msa_ilvr_b((v16i8) vec1, (v16i8) vec0);
-        tmp2 = FILT_4TAP_DPADD_S_H(vec23, vec01, filt0, filt1);
+    SRARI_H4_SH(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+    SAT_SH4_SH(dst0_r, dst1_r, dst2_r, dst3_r, 7);
+    out0 = PCKEV_XORI128_UB(dst0_r, dst1_r);
+    out1 = PCKEV_XORI128_UB(dst2_r, dst3_r);
+    ST6x4_UB(out0, out1, dst, dst_stride);
+    dst += (4 * dst_stride);
 
-        vec2 = __msa_xori_b((v16u8) src2, 128);
-        vec12 = (v8i16) __msa_ilvr_b((v16i8) vec2, (v16i8) vec1);
-        tmp3 = FILT_4TAP_DPADD_S_H(vec30, vec12, filt0, filt1);
+    LD_SB2(src, src_stride, src3, src4);
+    src += (2 * src_stride);
+    XORI_B2_128_SB(src3, src4);
+    ILVR_B2_SB(src3, src6, src4, src3, src32_r, src43_r);
 
-        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 6);
-        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
-        out0 = PCKEV_XORI128_UB(tmp0, tmp1);
-        out1 = PCKEV_XORI128_UB(tmp2, tmp3);
-        ST6x4_UB(out0, out1, dst, dst_stride);
-        dst += (4 * dst_stride);
-    }
+    dst0_r = HEVC_FILT_4TAP_SH(src54_r, src32_r, filt0, filt1);
+    dst1_r = HEVC_FILT_4TAP_SH(src65_r, src43_r, filt0, filt1);
+
+    LD_SB2(src, src_stride, src5, src6);
+    src += (2 * src_stride);
+    XORI_B2_128_SB(src5, src6);
+    ILVR_B2_SB(src5, src4, src6, src5, src54_r, src65_r);
+
+    dst2_r = HEVC_FILT_4TAP_SH(src32_r, src54_r, filt0, filt1);
+    dst3_r = HEVC_FILT_4TAP_SH(src43_r, src65_r, filt0, filt1);
+
+    SRARI_H4_SH(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+    SAT_SH4_SH(dst0_r, dst1_r, dst2_r, dst3_r, 7);
+    out0 = PCKEV_XORI128_UB(dst0_r, dst1_r);
+    out1 = PCKEV_XORI128_UB(dst2_r, dst3_r);
+    ST6x4_UB(out0, out1, dst, dst_stride);
 }
 
 static void common_vt_4t_8x2_msa(uint8_t *src, int32_t src_stride,
@@ -2310,9 +2639,9 @@
     LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
     XORI_B5_128_SB(src0, src1, src2, src3, src4);
     ILVR_B2_SH(src1, src0, src3, src2, src01, src23);
-    tmp0 = FILT_4TAP_DPADD_S_H(src01, src23, filt0, filt1);
+    tmp0 = HEVC_FILT_4TAP_SH(src01, src23, filt0, filt1);
     ILVR_B2_SH(src2, src1, src4, src3, src12, src34);
-    tmp1 = FILT_4TAP_DPADD_S_H(src12, src34, filt0, filt1);
+    tmp1 = HEVC_FILT_4TAP_SH(src12, src34, filt0, filt1);
     SRARI_H2_SH(tmp0, tmp1, 6);
     SAT_SH2_SH(tmp0, tmp1, 7);
     out = PCKEV_XORI128_UB(tmp0, tmp1);
@@ -2347,9 +2676,9 @@
 
         XORI_B3_128_SB(src3, src4, src5);
         ILVR_B3_SH(src3, src2, src4, src3, src5, src4, vec1, vec3, vec4);
-        tmp0 = FILT_4TAP_DPADD_S_H(vec0, vec1, filt0, filt1);
-        tmp1 = FILT_4TAP_DPADD_S_H(vec2, vec3, filt0, filt1);
-        tmp2 = FILT_4TAP_DPADD_S_H(vec1, vec4, filt0, filt1);
+        tmp0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        tmp1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        tmp2 = HEVC_FILT_4TAP_SH(vec1, vec4, filt0, filt1);
         SRARI_H2_SH(tmp0, tmp1, 6);
         tmp2 = __msa_srari_h(tmp2, 6);
         SAT_SH3_SH(tmp0, tmp1, tmp2, 7);
@@ -2400,10 +2729,10 @@
         XORI_B4_128_SB(src7, src8, src9, src10);
         ILVR_B4_SB(src7, src2, src8, src7, src9, src8, src10, src9,
                    src72_r, src87_r, src98_r, src109_r);
-        out0_r = FILT_4TAP_DPADD_S_H(src10_r, src72_r, filt0, filt1);
-        out1_r = FILT_4TAP_DPADD_S_H(src21_r, src87_r, filt0, filt1);
-        out2_r = FILT_4TAP_DPADD_S_H(src72_r, src98_r, filt0, filt1);
-        out3_r = FILT_4TAP_DPADD_S_H(src87_r, src109_r, filt0, filt1);
+        out0_r = HEVC_FILT_4TAP_SH(src10_r, src72_r, filt0, filt1);
+        out1_r = HEVC_FILT_4TAP_SH(src21_r, src87_r, filt0, filt1);
+        out2_r = HEVC_FILT_4TAP_SH(src72_r, src98_r, filt0, filt1);
+        out3_r = HEVC_FILT_4TAP_SH(src87_r, src109_r, filt0, filt1);
         SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
         SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
         tmp0 = PCKEV_XORI128_UB(out0_r, out1_r);
@@ -2437,58 +2766,60 @@
 {
     uint32_t loop_cnt;
     v16i8 src0, src1, src2, src3, src4, src5, src6;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
     v16u8 out0, out1;
-    v8i16 src10, src21, src32, src43, src54, src65, src87, src109, src1211;
-    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, filt, filt0, filt1;
-    v4u32 mask = { 2, 6, 2, 6 };
+    v16i8 src10_r, src32_r, src21_r, src43_r, src54_r, src65_r;
+    v16i8 src10_l, src32_l, src54_l, src21_l, src43_l, src65_l;
+    v16i8 src2110, src4332, src6554;
+    v8i16 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, filt0, filt1;
+    v8i16 filter_vec;
 
-    /* rearranging filter_y */
-    filt = LD_SH(filter);
-    SPLATI_H2_SH(filt, 0, 1, filt0, filt1);
+    src -= (1 * src_stride);
 
-    src -= src_stride;
+    filter_vec = LD_SH(filter);
+    SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     LD_SB3(src, src_stride, src0, src1, src2);
     src += (3 * src_stride);
 
     XORI_B3_128_SB(src0, src1, src2);
-    VSHF_W2_SB(src0, src1, src1, src2, mask, mask, vec0, vec1);
+    ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
+    ILVL_B2_SB(src1, src0, src2, src1, src10_l, src21_l);
+    src2110 = (v16i8) __msa_ilvr_d((v2i64) src21_l, (v2i64) src10_l);
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
+    for (loop_cnt = 4; loop_cnt--;) {
         LD_SB4(src, src_stride, src3, src4, src5, src6);
         src += (4 * src_stride);
 
         XORI_B4_128_SB(src3, src4, src5, src6);
-        ILVR_B2_SH(src1, src0, src3, src2, src10, src32);
-        VSHF_W2_SB(src2, src3, src3, src4, mask, mask, vec2, vec3);
-        VSHF_W2_SB(src4, src5, src5, src6, mask, mask, vec4, vec5);
-        tmp0 = FILT_4TAP_DPADD_S_H(src10, src32, filt0, filt1);
-        ILVR_B4_SH(src2, src1, src4, src3, src5, src4, src6, src5,
-                   src21, src43, src54, src65);
-        tmp1 = FILT_4TAP_DPADD_S_H(src21, src43, filt0, filt1);
-        tmp2 = FILT_4TAP_DPADD_S_H(src32, src54, filt0, filt1);
-        tmp3 = FILT_4TAP_DPADD_S_H(src43, src65, filt0, filt1);
-        ILVR_B3_SH(vec1, vec0, vec3, vec2, vec5, vec4, src87, src109, src1211);
-        tmp4 = FILT_4TAP_DPADD_S_H(src87, src109, filt0, filt1);
-        tmp5 = FILT_4TAP_DPADD_S_H(src109, src1211, filt0, filt1);
-        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 6);
-        SRARI_H2_SH(tmp4, tmp5, 6);
-        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
-        SAT_SH2_SH(tmp4, tmp5, 7);
-        out0 = PCKEV_XORI128_UB(tmp0, tmp1);
-        out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+        ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
+        ILVL_B2_SB(src3, src2, src4, src3, src32_l, src43_l);
+        src4332 = (v16i8) __msa_ilvr_d((v2i64) src43_l, (v2i64) src32_l);
+        ILVR_B2_SB(src5, src4, src6, src5, src54_r, src65_r);
+        ILVL_B2_SB(src5, src4, src6, src5, src54_l, src65_l);
+        src6554 = (v16i8) __msa_ilvr_d((v2i64) src65_l, (v2i64) src54_l);
+
+        dst0_r = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+        dst1_r = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+        dst0_l = HEVC_FILT_4TAP_SH(src2110, src4332, filt0, filt1);
+        dst2_r = HEVC_FILT_4TAP_SH(src32_r, src54_r, filt0, filt1);
+        dst3_r = HEVC_FILT_4TAP_SH(src43_r, src65_r, filt0, filt1);
+        dst1_l = HEVC_FILT_4TAP_SH(src4332, src6554, filt0, filt1);
+
+        SRARI_H4_SH(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+        SRARI_H2_SH(dst0_l, dst1_l, 6);
+        SAT_SH4_SH(dst0_r, dst1_r, dst2_r, dst3_r, 7);
+        SAT_SH2_SH(dst0_l, dst1_l, 7);
+        out0 = PCKEV_XORI128_UB(dst0_r, dst1_r);
+        out1 = PCKEV_XORI128_UB(dst2_r, dst3_r);
         ST8x4_UB(out0, out1, dst, dst_stride);
-        out0 = PCKEV_XORI128_UB(tmp4, tmp5);
+        out0 = PCKEV_XORI128_UB(dst0_l, dst1_l);
         ST4x4_UB(out0, out0, 0, 1, 2, 3, dst + 8, dst_stride);
         dst += (4 * dst_stride);
 
-        src0 = src4;
-        src1 = src5;
         src2 = src6;
-        vec0 = vec4;
-        vec1 = vec5;
-        src2 = src6;
+        src10_r = src54_r;
+        src21_r = src65_r;
+        src2110 = src6554;
     }
 }
 
@@ -2524,14 +2855,14 @@
                    src32_r, src43_r, src54_r, src65_r);
         ILVL_B4_SB(src3, src2, src4, src3, src5, src4, src6, src5,
                    src32_l, src43_l, src54_l, src65_l);
-        out0_r = FILT_4TAP_DPADD_S_H(src10_r, src32_r, filt0, filt1);
-        out1_r = FILT_4TAP_DPADD_S_H(src21_r, src43_r, filt0, filt1);
-        out2_r = FILT_4TAP_DPADD_S_H(src32_r, src54_r, filt0, filt1);
-        out3_r = FILT_4TAP_DPADD_S_H(src43_r, src65_r, filt0, filt1);
-        out0_l = FILT_4TAP_DPADD_S_H(src10_l, src32_l, filt0, filt1);
-        out1_l = FILT_4TAP_DPADD_S_H(src21_l, src43_l, filt0, filt1);
-        out2_l = FILT_4TAP_DPADD_S_H(src32_l, src54_l, filt0, filt1);
-        out3_l = FILT_4TAP_DPADD_S_H(src43_l, src65_l, filt0, filt1);
+        out0_r = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+        out1_r = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+        out2_r = HEVC_FILT_4TAP_SH(src32_r, src54_r, filt0, filt1);
+        out3_r = HEVC_FILT_4TAP_SH(src43_r, src65_r, filt0, filt1);
+        out0_l = HEVC_FILT_4TAP_SH(src10_l, src32_l, filt0, filt1);
+        out1_l = HEVC_FILT_4TAP_SH(src21_l, src43_l, filt0, filt1);
+        out2_l = HEVC_FILT_4TAP_SH(src32_l, src54_l, filt0, filt1);
+        out3_l = HEVC_FILT_4TAP_SH(src43_l, src65_l, filt0, filt1);
         SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
         SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 6);
         SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
@@ -2580,7 +2911,7 @@
     XORI_B3_128_SB(src6, src7, src8);
     ILVR_B2_SB(src7, src6, src8, src7, src76_r, src87_r);
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
+    for (loop_cnt = 8; loop_cnt--;) {
         /* 16 width */
         LD_SB2(src, src_stride, src3, src4);
         XORI_B2_128_SB(src3, src4);
@@ -2594,14 +2925,14 @@
         ILVR_B2_SB(src9, src8, src10, src9, src98_r, src109_r);
 
         /* 16 width */
-        out0_r = FILT_4TAP_DPADD_S_H(src10_r, src32_r, filt0, filt1);
-        out0_l = FILT_4TAP_DPADD_S_H(src10_l, src32_l, filt0, filt1);
-        out1_r = FILT_4TAP_DPADD_S_H(src21_r, src43_r, filt0, filt1);
-        out1_l = FILT_4TAP_DPADD_S_H(src21_l, src43_l, filt0, filt1);
+        out0_r = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+        out0_l = HEVC_FILT_4TAP_SH(src10_l, src32_l, filt0, filt1);
+        out1_r = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+        out1_l = HEVC_FILT_4TAP_SH(src21_l, src43_l, filt0, filt1);
 
         /* 8 width */
-        out2_r = FILT_4TAP_DPADD_S_H(src76_r, src98_r, filt0, filt1);
-        out3_r = FILT_4TAP_DPADD_S_H(src87_r, src109_r, filt0, filt1);
+        out2_r = HEVC_FILT_4TAP_SH(src76_r, src98_r, filt0, filt1);
+        out3_r = HEVC_FILT_4TAP_SH(src87_r, src109_r, filt0, filt1);
 
         /* 16 + 8 width */
         SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
@@ -2634,14 +2965,14 @@
         ILVR_B2_SB(src11, src10, src8, src11, src76_r, src87_r);
 
         /* 16 width */
-        out0_r = FILT_4TAP_DPADD_S_H(src32_r, src10_r, filt0, filt1);
-        out0_l = FILT_4TAP_DPADD_S_H(src32_l, src10_l, filt0, filt1);
-        out1_r = FILT_4TAP_DPADD_S_H(src43_r, src21_r, filt0, filt1);
-        out1_l = FILT_4TAP_DPADD_S_H(src43_l, src21_l, filt0, filt1);
+        out0_r = HEVC_FILT_4TAP_SH(src32_r, src10_r, filt0, filt1);
+        out0_l = HEVC_FILT_4TAP_SH(src32_l, src10_l, filt0, filt1);
+        out1_r = HEVC_FILT_4TAP_SH(src43_r, src21_r, filt0, filt1);
+        out1_l = HEVC_FILT_4TAP_SH(src43_l, src21_l, filt0, filt1);
 
         /* 8 width */
-        out2_r = FILT_4TAP_DPADD_S_H(src98_r, src76_r, filt0, filt1);
-        out3_r = FILT_4TAP_DPADD_S_H(src109_r, src87_r, filt0, filt1);
+        out2_r = HEVC_FILT_4TAP_SH(src98_r, src76_r, filt0, filt1);
+        out3_r = HEVC_FILT_4TAP_SH(src109_r, src87_r, filt0, filt1);
 
         /* 16 + 8 width */
         SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
@@ -2661,13 +2992,11 @@
     }
 }
 
-static void common_vt_4t_32w_mult_msa(uint8_t *src, int32_t src_stride,
-                                      uint8_t *dst, int32_t dst_stride,
-                                      const int8_t *filter, int32_t height,
-                                      int32_t width)
+static void common_vt_4t_32w_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter, int32_t height)
 {
-    uint32_t loop_cnt, cnt;
-    uint8_t *dst_tmp, *src_tmp;
+    uint32_t loop_cnt;
     v16i8 src0, src1, src2, src3, src4, src6, src7, src8, src9, src10;
     v16i8 src10_r, src32_r, src76_r, src98_r;
     v16i8 src21_r, src43_r, src87_r, src109_r;
@@ -2683,113 +3012,96 @@
     filt = LD_SH(filter);
     SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
 
-    for (cnt = (width >> 5); cnt--;) {
-        dst_tmp = dst;
-        src_tmp = src;
+    /* 16 width */
+    LD_SB3(src, src_stride, src0, src1, src2);
+    XORI_B3_128_SB(src0, src1, src2);
+
+    ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
+    ILVL_B2_SB(src1, src0, src2, src1, src10_l, src21_l);
+
+    /* next 16 width */
+    LD_SB3(src + 16, src_stride, src6, src7, src8);
+    src += (3 * src_stride);
+
+    XORI_B3_128_SB(src6, src7, src8);
+    ILVR_B2_SB(src7, src6, src8, src7, src76_r, src87_r);
+    ILVL_B2_SB(src7, src6, src8, src7, src76_l, src87_l);
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        /* 16 width */
+        LD_SB2(src, src_stride, src3, src4);
+        XORI_B2_128_SB(src3, src4);
+        ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
+        ILVL_B2_SB(src3, src2, src4, src3, src32_l, src43_l);
 
         /* 16 width */
-        LD_SB3(src_tmp, src_stride, src0, src1, src2);
-        XORI_B3_128_SB(src0, src1, src2);
+        out0_r = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+        out0_l = HEVC_FILT_4TAP_SH(src10_l, src32_l, filt0, filt1);
+        out1_r = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+        out1_l = HEVC_FILT_4TAP_SH(src21_l, src43_l, filt0, filt1);
 
-        ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
-        ILVL_B2_SB(src1, src0, src2, src1, src10_l, src21_l);
+        /* 16 width */
+        SRARI_H4_SH(out0_r, out1_r, out0_l, out1_l, 6);
+        SAT_SH4_SH(out0_r, out1_r, out0_l, out1_l, 7);
+        out = PCKEV_XORI128_UB(out0_r, out0_l);
+        ST_UB(out, dst);
+        out = PCKEV_XORI128_UB(out1_r, out1_l);
+        ST_UB(out, dst + dst_stride);
+
+        src10_r = src32_r;
+        src21_r = src43_r;
+        src10_l = src32_l;
+        src21_l = src43_l;
+        src2 = src4;
 
         /* next 16 width */
-        LD_SB3(src_tmp + 16, src_stride, src6, src7, src8);
-        src_tmp += (3 * src_stride);
+        LD_SB2(src + 16, src_stride, src9, src10);
+        src += (2 * src_stride);
+        XORI_B2_128_SB(src9, src10);
+        ILVR_B2_SB(src9, src8, src10, src9, src98_r, src109_r);
+        ILVL_B2_SB(src9, src8, src10, src9, src98_l, src109_l);
 
-        XORI_B3_128_SB(src6, src7, src8);
-        ILVR_B2_SB(src7, src6, src8, src7, src76_r, src87_r);
-        ILVL_B2_SB(src7, src6, src8, src7, src76_l, src87_l);
+        /* next 16 width */
+        out2_r = HEVC_FILT_4TAP_SH(src76_r, src98_r, filt0, filt1);
+        out2_l = HEVC_FILT_4TAP_SH(src76_l, src98_l, filt0, filt1);
+        out3_r = HEVC_FILT_4TAP_SH(src87_r, src109_r, filt0, filt1);
+        out3_l = HEVC_FILT_4TAP_SH(src87_l, src109_l, filt0, filt1);
 
-        for (loop_cnt = (height >> 1); loop_cnt--;) {
-            /* 16 width */
-            LD_SB2(src_tmp, src_stride, src3, src4);
-            XORI_B2_128_SB(src3, src4);
-            ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
-            ILVL_B2_SB(src3, src2, src4, src3, src32_l, src43_l);
+        /* next 16 width */
+        SRARI_H4_SH(out2_r, out3_r, out2_l, out3_l, 6);
+        SAT_SH4_SH(out2_r, out3_r, out2_l, out3_l, 7);
+        out = PCKEV_XORI128_UB(out2_r, out2_l);
+        ST_UB(out, dst + 16);
+        out = PCKEV_XORI128_UB(out3_r, out3_l);
+        ST_UB(out, dst + 16 + dst_stride);
 
-            /* 16 width */
-            out0_r = FILT_4TAP_DPADD_S_H(src10_r, src32_r, filt0, filt1);
-            out0_l = FILT_4TAP_DPADD_S_H(src10_l, src32_l, filt0, filt1);
-            out1_r = FILT_4TAP_DPADD_S_H(src21_r, src43_r, filt0, filt1);
-            out1_l = FILT_4TAP_DPADD_S_H(src21_l, src43_l, filt0, filt1);
+        dst += 2 * dst_stride;
 
-            /* 16 width */
-            SRARI_H4_SH(out0_r, out1_r, out0_l, out1_l, 6);
-            SAT_SH4_SH(out0_r, out1_r, out0_l, out1_l, 7);
-            out = PCKEV_XORI128_UB(out0_r, out0_l);
-            ST_UB(out, dst_tmp);
-            out = PCKEV_XORI128_UB(out1_r, out1_l);
-            ST_UB(out, dst_tmp + dst_stride);
-
-            src10_r = src32_r;
-            src21_r = src43_r;
-            src10_l = src32_l;
-            src21_l = src43_l;
-            src2 = src4;
-
-            /* next 16 width */
-            LD_SB2(src_tmp + 16, src_stride, src9, src10);
-            src_tmp += (2 * src_stride);
-            XORI_B2_128_SB(src9, src10);
-            ILVR_B2_SB(src9, src8, src10, src9, src98_r, src109_r);
-            ILVL_B2_SB(src9, src8, src10, src9, src98_l, src109_l);
-
-            /* next 16 width */
-            out2_r = FILT_4TAP_DPADD_S_H(src76_r, src98_r, filt0, filt1);
-            out2_l = FILT_4TAP_DPADD_S_H(src76_l, src98_l, filt0, filt1);
-            out3_r = FILT_4TAP_DPADD_S_H(src87_r, src109_r, filt0, filt1);
-            out3_l = FILT_4TAP_DPADD_S_H(src87_l, src109_l, filt0, filt1);
-
-            /* next 16 width */
-            SRARI_H4_SH(out2_r, out3_r, out2_l, out3_l, 6);
-            SAT_SH4_SH(out2_r, out3_r, out2_l, out3_l, 7);
-            out = PCKEV_XORI128_UB(out2_r, out2_l);
-            ST_UB(out, dst_tmp + 16);
-            out = PCKEV_XORI128_UB(out3_r, out3_l);
-            ST_UB(out, dst_tmp + 16 + dst_stride);
-
-            dst_tmp += 2 * dst_stride;
-
-            src76_r = src98_r;
-            src87_r = src109_r;
-            src76_l = src98_l;
-            src87_l = src109_l;
-            src8 = src10;
-        }
-
-        src += 32;
-        dst += 32;
+        src76_r = src98_r;
+        src87_r = src109_r;
+        src76_l = src98_l;
+        src87_l = src109_l;
+        src8 = src10;
     }
 }
 
-static void common_vt_4t_32w_msa(uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 const int8_t *filter, int32_t height)
-{
-    common_vt_4t_32w_mult_msa(src, src_stride, dst, dst_stride,
-                              filter, height, 32);
-}
-
 static void hevc_hv_uni_4t_4x2_msa(uint8_t *src,
                                    int32_t src_stride,
                                    uint8_t *dst,
                                    int32_t dst_stride,
                                    const int8_t *filter_x,
-                                   const int8_t *filter_y,
-                                   int32_t height)
+                                   const int8_t *filter_y)
 {
+    v16u8 out;
     v16i8 src0, src1, src2, src3, src4;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec, tmp;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4;
-    v4i32 dst0_r, dst1_r;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
+    v8i16 dst20, dst31, dst42, dst10, dst32, dst21, dst43;
+    v4i32 dst0, dst1;
 
     src -= (src_stride + 1);
 
@@ -2797,60 +3109,35 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
 
-    LD_SB3(src, src_stride, src0, src1, src2);
-    src += (3 * src_stride);
+    VSHF_B2_SB(src0, src2, src0, src2, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src3, src1, src3, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src4, src2, src4, mask0, mask1, vec4, vec5);
 
-    XORI_B3_128_SB(src0, src1, src2);
+    dst20 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst31 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst42 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+    ILVRL_H2_SH(dst31, dst20, dst10, dst32);
+    ILVRL_H2_SH(dst42, dst31, dst21, dst43);
 
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-
-    ILVR_H2_SH(dst1, dst0, dst2, dst1, dst10_r, dst21_r);
-    LD_SB2(src, src_stride, src3, src4);
-    XORI_B2_128_SB(src3, src4);
-
-    /* row 3 */
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
-    dst32_r = __msa_ilvr_h(dst3, dst2);
-    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-    dst0_r >>= 6;
-
-    /* row 4 */
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-    dst43_r = __msa_ilvr_h(dst4, dst3);
-    dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-    dst1_r >>= 6;
-
-    dst0_r = (v4i32) __msa_pckev_h((v8i16) dst1_r, (v8i16) dst0_r);
-    dst0_r = (v4i32) __msa_srari_h((v8i16) dst0_r, 6);
-    dst0_r = (v4i32) CLIP_SH_0_255(dst0_r);
-    dst0_r = (v4i32) __msa_pckev_b((v16i8) dst0_r, (v16i8) dst0_r);
-
-    ST4x2_UB(dst0_r, dst, dst_stride);
+    dst0 = HEVC_FILT_4TAP(dst10, dst32, filt_h0, filt_h1);
+    dst1 = HEVC_FILT_4TAP(dst21, dst43, filt_h0, filt_h1);
+    dst0 >>= 6;
+    dst1 >>= 6;
+    tmp = __msa_pckev_h((v8i16) dst1, (v8i16) dst0);
+    tmp = __msa_srari_h(tmp, 6);
+    tmp = __msa_sat_s_h(tmp, 7);
+    out = PCKEV_XORI128_UB(tmp, tmp);
+    ST4x2_UB(out, dst, dst_stride);
 }
 
 static void hevc_hv_uni_4t_4x4_msa(uint8_t *src,
@@ -2858,20 +3145,19 @@
                                    uint8_t *dst,
                                    int32_t dst_stride,
                                    const int8_t *filter_x,
-                                   const int8_t *filter_y,
-                                   int32_t height)
+                                   const int8_t *filter_y)
 {
+    v16u8 out;
     v16i8 src0, src1, src2, src3, src4, src5, src6;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r;
-    v8i16 out0_r, out1_r;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 filter_vec, tmp0, tmp1;
+    v8i16 dst30, dst41, dst52, dst63;
+    v8i16 dst10, dst32, dst54, dst21, dst43, dst65;
+    v4i32 dst0, dst1, dst2, dst3;
 
     src -= (src_stride + 1);
 
@@ -2879,78 +3165,38 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
 
-    LD_SB3(src, src_stride, src0, src1, src2);
-    src += (3 * src_stride);
+    VSHF_B2_SB(src0, src3, src0, src3, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src4, src1, src4, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src5, src2, src5, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src3, src6, src3, src6, mask0, mask1, vec6, vec7);
 
-    XORI_B3_128_SB(src0, src1, src2);
+    dst30 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst41 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst52 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dst63 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-
-    ILVR_H2_SH(dst1, dst0, dst2, dst1, dst10_r, dst21_r);
-    LD_SB4(src, src_stride, src3, src4, src5, src6);
-    XORI_B4_128_SB(src3, src4, src5, src6);
-
-    /* row 3 */
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
-    dst32_r = __msa_ilvr_h(dst3, dst2);
-    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-    dst0_r >>= 6;
-
-    /* row 4 */
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-    dst43_r = __msa_ilvr_h(dst4, dst3);
-    dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-    dst1_r >>= 6;
-
-    /* row 5 */
-    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-    dst5 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-
-    dst10_r = __msa_ilvr_h(dst5, dst4);
-    dst2_r = HEVC_FILT_4TAP(dst32_r, dst10_r, filt_h0, filt_h1);
-    dst2_r >>= 6;
-
-    /* row 6 */
-    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-    dst21_r = __msa_ilvr_h(dst2, dst5);
-    dst3_r = HEVC_FILT_4TAP(dst43_r, dst21_r, filt_h0, filt_h1);
-    dst3_r >>= 6;
-
-    PCKEV_H2_SH(dst1_r, dst0_r, dst3_r, dst2_r, out0_r, out1_r);
-    SRARI_H2_SH(out0_r, out1_r, 6);
-    CLIP_SH2_0_255(out0_r, out1_r);
-    out0_r = (v8i16) __msa_pckev_b((v16i8) out1_r, (v16i8) out0_r);
-
-    ST4x4_UB(out0_r, out0_r, 0, 1, 2, 3, dst, dst_stride);
+    ILVRL_H2_SH(dst41, dst30, dst10, dst43);
+    ILVRL_H2_SH(dst52, dst41, dst21, dst54);
+    ILVRL_H2_SH(dst63, dst52, dst32, dst65);
+    dst0 = HEVC_FILT_4TAP(dst10, dst32, filt_h0, filt_h1);
+    dst1 = HEVC_FILT_4TAP(dst21, dst43, filt_h0, filt_h1);
+    dst2 = HEVC_FILT_4TAP(dst32, dst54, filt_h0, filt_h1);
+    dst3 = HEVC_FILT_4TAP(dst43, dst65, filt_h0, filt_h1);
+    SRA_4V(dst0, dst1, dst2, dst3, 6);
+    PCKEV_H2_SH(dst1, dst0, dst3, dst2, tmp0, tmp1);
+    SRARI_H2_SH(tmp0, tmp1, 6);
+    SAT_SH2_SH(tmp0, tmp1, 7);
+    out = PCKEV_XORI128_UB(tmp0, tmp1);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
 }
 
 static void hevc_hv_uni_4t_4multx8mult_msa(uint8_t *src,
@@ -2962,19 +3208,20 @@
                                            int32_t height)
 {
     uint32_t loop_cnt;
+    v16u8 out0, out1;
     v16i8 src0, src1, src2, src3, src4, src5;
     v16i8 src6, src7, src8, src9, src10;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8, dst9;
+    v8i16 filter_vec, tmp0, tmp1, tmp2, tmp3;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 dst10, dst21, dst22, dst73, dst84, dst95, dst106;
     v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r, dst6_r, dst7_r;
     v8i16 dst10_r, dst32_r, dst54_r, dst76_r;
     v8i16 dst21_r, dst43_r, dst65_r, dst87_r;
-    v8i16 out0_r, out1_r, out2_r, out3_r;
+    v8i16 dst98_r, dst109_r;
 
     src -= (src_stride + 1);
 
@@ -2982,33 +3229,23 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
     LD_SB3(src, src_stride, src0, src1, src2);
     src += (3 * src_stride);
 
     XORI_B3_128_SB(src0, src1, src2);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-
-    ILVR_H2_SH(dst1, dst0, dst2, dst1, dst10_r, dst21_r);
+    VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src2, src1, src2, mask0, mask1, vec2, vec3);
+    dst10 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst21 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    ILVRL_H2_SH(dst21, dst10, dst10_r, dst21_r);
+    dst22 = (v8i16) __msa_splati_d((v2i64) dst21, 1);
 
     for (loop_cnt = height >> 3; loop_cnt--;) {
         LD_SB8(src, src_stride,
@@ -3017,88 +3254,46 @@
 
         XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
 
-        /* row 3 */
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+        VSHF_B2_SB(src3, src7, src3, src7, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src8, src4, src8, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src5, src9, src5, src9, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src10, src6, src10, mask0, mask1, vec6, vec7);
 
-        dst32_r = __msa_ilvr_h(dst3, dst2);
+        dst73 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst84 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst95 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst106 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+
+        dst32_r = __msa_ilvr_h(dst73, dst22);
+        ILVRL_H2_SH(dst84, dst73, dst43_r, dst87_r);
+        ILVRL_H2_SH(dst95, dst84, dst54_r, dst98_r);
+        ILVRL_H2_SH(dst106, dst95, dst65_r, dst109_r);
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst73, 1);
+        dst76_r = __msa_ilvr_h(dst22, dst106);
+
         dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-        dst0_r >>= 6;
-
-        /* row 4 */
-        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-        dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-        dst43_r = __msa_ilvr_h(dst4, dst3);
         dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-        dst1_r >>= 6;
-
-        /* row 5 */
-        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-        dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-
-        dst54_r = __msa_ilvr_h(dst5, dst4);
         dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
-        dst2_r >>= 6;
-
-        /* row 6 */
-        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-        dst6 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst6, dst6);
-
-        dst65_r = __msa_ilvr_h(dst6, dst5);
         dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
-        dst3_r >>= 6;
-
-        /* row 7 */
-        VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
-        dst7 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst7, dst7);
-
-        dst76_r = __msa_ilvr_h(dst7, dst6);
         dst4_r = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
-        dst4_r >>= 6;
-
-        /* row 8 */
-        VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec0, vec1);
-        dst8 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst8, dst8);
-
-        dst87_r = __msa_ilvr_h(dst8, dst7);
         dst5_r = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
-        dst5_r >>= 6;
-
-        /* row 9 */
-        VSHF_B2_SB(src9, src9, src9, src9, mask0, mask1, vec0, vec1);
-        dst9 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst9, dst9);
-
-        dst10_r = __msa_ilvr_h(dst9, dst8);
-        dst6_r = HEVC_FILT_4TAP(dst76_r, dst10_r, filt_h0, filt_h1);
-        dst6_r >>= 6;
-
-        /* row 10 */
-        VSHF_B2_SB(src10, src10, src10, src10, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-        dst21_r = __msa_ilvr_h(dst2, dst9);
-        dst7_r = HEVC_FILT_4TAP(dst87_r, dst21_r, filt_h0, filt_h1);
-        dst7_r >>= 6;
-
+        dst6_r = HEVC_FILT_4TAP(dst76_r, dst98_r, filt_h0, filt_h1);
+        dst7_r = HEVC_FILT_4TAP(dst87_r, dst109_r, filt_h0, filt_h1);
+        SRA_4V(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+        SRA_4V(dst4_r, dst5_r, dst6_r, dst7_r, 6);
         PCKEV_H4_SH(dst1_r, dst0_r, dst3_r, dst2_r,
                     dst5_r, dst4_r, dst7_r, dst6_r,
-                    out0_r, out1_r, out2_r, out3_r);
-
-        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
-        CLIP_SH4_0_255(out0_r, out1_r, out2_r, out3_r);
-
-        PCKEV_B2_SH(out1_r, out0_r, out3_r, out2_r, out0_r, out1_r);
-        ST4x8_UB(out0_r, out1_r, dst, dst_stride);
+                    tmp0, tmp1, tmp2, tmp3);
+        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 6);
+        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+        out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+        ST4x8_UB(out0, out1, dst, dst_stride);
         dst += (8 * dst_stride);
+
+        dst10_r = dst98_r;
+        dst21_r = dst109_r;
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst106, 1);
     }
 }
 
@@ -3112,10 +3307,10 @@
 {
     if (2 == height) {
         hevc_hv_uni_4t_4x2_msa(src, src_stride, dst, dst_stride,
-                               filter_x, filter_y, height);
+                               filter_x, filter_y);
     } else if (4 == height) {
         hevc_hv_uni_4t_4x4_msa(src, src_stride, dst, dst_stride,
-                               filter_x, filter_y, height);
+                               filter_x, filter_y);
     } else if (0 == (height % 8)) {
         hevc_hv_uni_4t_4multx8mult_msa(src, src_stride, dst, dst_stride,
                                        filter_x, filter_y, height);
@@ -3130,19 +3325,23 @@
                                   const int8_t *filter_y,
                                   int32_t height)
 {
-    uint32_t loop_cnt;
+    v16u8 out0, out1, out2;
     v16i8 src0, src1, src2, src3, src4, src5, src6;
+    v16i8 src7, src8, src9, src10;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
+    v8i16 filt_h0, filt_h1, filter_vec;
+    v8i16 dsth0, dsth1, dsth2, dsth3, dsth4, dsth5, dsth6, dsth7, dsth8, dsth9;
+    v8i16 dsth10, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
+    v4i32 dst4_r, dst5_r, dst6_r, dst7_r;
     v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
     v8i16 dst10_l, dst32_l, dst21_l, dst43_l;
-    v8i16 out0_r, out1_r, out2_r, out3_r;
+    v8i16 dst54_r, dst76_r, dst98_r, dst65_r, dst87_r, dst109_r;
+    v8i16 dst98_l, dst65_l, dst54_l, dst76_l, dst87_l, dst109_l;
+    v8i16 dst1021_l, dst3243_l, dst5465_l, dst7687_l, dst98109_l;
 
     src -= (src_stride + 1);
 
@@ -3150,16 +3349,12 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
     LD_SB3(src, src_stride, src0, src1, src2);
     src += (3 * src_stride);
 
@@ -3169,79 +3364,78 @@
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
 
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+    dsth0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
 
-    ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
-    ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
+    ILVRL_H2_SH(dsth1, dsth0, dst10_r, dst10_l);
+    ILVRL_H2_SH(dsth2, dsth1, dst21_r, dst21_l);
 
-    for (loop_cnt = height >> 2; loop_cnt--;) {
-        LD_SB4(src, src_stride, src3, src4, src5, src6);
-        src += (4 * src_stride);
+    LD_SB8(src, src_stride, src3, src4, src5, src6, src7, src8, src9, src10);
+    XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
 
-        XORI_B4_128_SB(src3, src4, src5, src6);
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
 
-        /* row 3 */
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+    dsth3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dsth6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
 
-        ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
-        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-        dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-        dst0_r >>= 6;
-        dst0_l >>= 6;
+    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src9, src9, src9, src9, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src10, src10, src10, src10, mask0, mask1, vec6, vec7);
 
-        /* row 4 */
-        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-        dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
+    dsth7 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth8 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth9 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dsth10 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
 
-        ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
-        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-        dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-        dst1_r >>= 6;
-        dst1_l >>= 6;
+    ILVRL_H2_SH(dsth3, dsth2, dst32_r, dst32_l);
+    ILVRL_H2_SH(dsth4, dsth3, dst43_r, dst43_l);
+    ILVRL_H2_SH(dsth5, dsth4, dst54_r, dst54_l);
+    ILVRL_H2_SH(dsth6, dsth5, dst65_r, dst65_l);
+    ILVRL_H2_SH(dsth7, dsth6, dst76_r, dst76_l);
+    ILVRL_H2_SH(dsth8, dsth7, dst87_r, dst87_l);
+    ILVRL_H2_SH(dsth9, dsth8, dst98_r, dst98_l);
+    ILVRL_H2_SH(dsth10, dsth9, dst109_r, dst109_l);
 
-        /* row 5 */
-        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-        dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
+    PCKEV_D2_SH(dst21_l, dst10_l, dst43_l, dst32_l, dst1021_l, dst3243_l);
+    PCKEV_D2_SH(dst65_l, dst54_l, dst87_l, dst76_l, dst5465_l, dst7687_l);
+    dst98109_l = (v8i16) __msa_pckev_d((v2i64) dst109_l, (v2i64) dst98_l);
 
-        ILVRL_H2_SH(dst5, dst4, dst10_r, dst10_l);
-        dst2_r = HEVC_FILT_4TAP(dst32_r, dst10_r, filt_h0, filt_h1);
-        dst2_l = HEVC_FILT_4TAP(dst32_l, dst10_l, filt_h0, filt_h1);
-
-        dst2_r >>= 6;
-        dst2_l >>= 6;
-
-        /* row 6 */
-        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-        ILVRL_H2_SH(dst2, dst5, dst21_r, dst21_l);
-        dst3_r = HEVC_FILT_4TAP(dst43_r, dst21_r, filt_h0, filt_h1);
-        dst3_l = HEVC_FILT_4TAP(dst43_l, dst21_l, filt_h0, filt_h1);
-
-        dst3_r >>= 6;
-        dst3_l >>= 6;
-
-        PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r,
-                    dst2_l, dst2_r, dst3_l, dst3_r,
-                    out0_r, out1_r, out2_r, out3_r);
-
-        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
-        CLIP_SH4_0_255(out0_r, out1_r, out2_r, out3_r);
-
-        PCKEV_B2_SH(out1_r, out0_r, out3_r, out2_r, out0_r, out1_r);
-        ST6x4_UB(out0_r, out1_r, dst, dst_stride);
-        dst += (4 * dst_stride);
-    }
+    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+    dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+    dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+    dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+    dst4_r = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
+    dst5_r = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
+    dst6_r = HEVC_FILT_4TAP(dst76_r, dst98_r, filt_h0, filt_h1);
+    dst7_r = HEVC_FILT_4TAP(dst87_r, dst109_r, filt_h0, filt_h1);
+    dst0_l = HEVC_FILT_4TAP(dst1021_l, dst3243_l, filt_h0, filt_h1);
+    dst1_l = HEVC_FILT_4TAP(dst3243_l, dst5465_l, filt_h0, filt_h1);
+    dst2_l = HEVC_FILT_4TAP(dst5465_l, dst7687_l, filt_h0, filt_h1);
+    dst3_l = HEVC_FILT_4TAP(dst7687_l, dst98109_l, filt_h0, filt_h1);
+    SRA_4V(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+    SRA_4V(dst4_r, dst5_r, dst6_r, dst7_r, 6);
+    SRA_4V(dst0_l, dst1_l, dst2_l, dst3_l, 6);
+    PCKEV_H2_SH(dst1_r, dst0_r, dst3_r, dst2_r, tmp0, tmp1);
+    PCKEV_H2_SH(dst5_r, dst4_r, dst7_r, dst6_r, tmp2, tmp3);
+    PCKEV_H2_SH(dst1_l, dst0_l, dst3_l, dst2_l, tmp4, tmp5);
+    SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 6);
+    SRARI_H2_SH(tmp4, tmp5, 6);
+    SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3,7);
+    SAT_SH2_SH(tmp4, tmp5,7);
+    out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+    out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+    out2 = PCKEV_XORI128_UB(tmp4, tmp5);
+    ST4x8_UB(out0, out1, dst, dst_stride);
+    ST2x4_UB(out2, 0, dst + 4, dst_stride);
+    dst += 4 * dst_stride;
+    ST2x4_UB(out2, 4, dst + 4, dst_stride);
 }
 
 static void hevc_hv_uni_4t_8x2_msa(uint8_t *src,
@@ -3249,16 +3443,15 @@
                                    uint8_t *dst,
                                    int32_t dst_stride,
                                    const int8_t *filter_x,
-                                   const int8_t *filter_y,
-                                   int32_t height)
+                                   const int8_t *filter_y)
 {
+    v16u8 out;
     v16i8 src0, src1, src2, src3, src4;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1, filter_vec;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
     v8i16 dst0, dst1, dst2, dst3, dst4;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l;
     v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
@@ -3271,66 +3464,125 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
-    LD_SB3(src, src_stride, src0, src1, src2);
-    src += (3 * src_stride);
-
-    XORI_B3_128_SB(src0, src1, src2);
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
 
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec8, vec9);
 
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+    dst4 = HEVC_FILT_4TAP_SH(vec8, vec9, filt0, filt1);
     ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
     ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
-
-    LD_SB2(src, src_stride, src3, src4);
-    XORI_B2_128_SB(src3, src4);
-
-    /* row 3 */
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
     ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
     dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
     dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-    dst0_r >>= 6;
-    dst0_l >>= 6;
-
-    /* row 4 */
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
     dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
     dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-    dst1_r >>= 6;
-    dst1_l >>= 6;
-
+    SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
     PCKEV_H2_SH(dst0_l, dst0_r, dst1_l, dst1_r, out0_r, out1_r);
     SRARI_H2_SH(out0_r, out1_r, 6);
-    CLIP_SH2_0_255(out0_r, out1_r);
-    out0_r = (v8i16) __msa_pckev_b((v16i8) out1_r, (v16i8) out0_r);
+    SAT_SH2_SH(out0_r, out1_r, 7);
+    out = PCKEV_XORI128_UB(out0_r, out1_r);
+    ST8x2_UB(out, dst, dst_stride);
+}
 
-    ST8x2_UB(out0_r, dst, dst_stride);
+static void hevc_hv_uni_4t_8multx4_msa(uint8_t *src,
+                                       int32_t src_stride,
+                                       uint8_t *dst,
+                                       int32_t dst_stride,
+                                       const int8_t *filter_x,
+                                       const int8_t *filter_y,
+                                       int32_t width8mult)
+{
+    uint32_t cnt;
+    v16u8 out0, out1;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, mask0, mask1;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 filt0, filt1, filt_h0, filt_h1, filter_vec;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, tmp0, tmp1, tmp2, tmp3;
+    v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
+    v8i16 dst10_r, dst32_r, dst54_r, dst21_r, dst43_r, dst65_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst21_l, dst43_l, dst65_l;
+
+    src -= (src_stride + 1);
+
+    filter_vec = LD_SH(filter_x);
+    SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
+
+    filter_vec = LD_SH(filter_y);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+
+    for (cnt = width8mult; cnt--;) {
+        LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+        src += 8;
+        XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+
+        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+
+        ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
+        ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
+
+        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
+
+        dst3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+
+        ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+        ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
+        ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+        ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
+
+        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
+        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
+        dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
+        dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
+
+        SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+        SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+
+        PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r, dst2_l, dst2_r, dst3_l,
+                    dst3_r, tmp0, tmp1, tmp2, tmp3);
+        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 6);
+        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+        out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += 8;
+    }
 }
 
 static void hevc_hv_uni_4t_8x6_msa(uint8_t *src,
@@ -3338,16 +3590,16 @@
                                    uint8_t *dst,
                                    int32_t dst_stride,
                                    const int8_t *filter_x,
-                                   const int8_t *filter_y,
-                                   int32_t height)
+                                   const int8_t *filter_y)
 {
+    v16u8 out0, out1, out2;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1, filter_vec;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
+    v16i8 vec10, vec11, vec12, vec13, vec14, vec15, vec16, vec17;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
     v4i32 dst4_r, dst4_l, dst5_r, dst5_l;
@@ -3363,157 +3615,104 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+    LD_SB4(src, src_stride, src5, src6, src7, src8);
 
-    LD_SB3(src, src_stride, src0, src1, src2);
-    src += (3 * src_stride);
-
-    XORI_B3_128_SB(src0, src1, src2);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    XORI_B4_128_SB(src5, src6, src7, src8);
 
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec8, vec9);
+    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec10, vec11);
+    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec12, vec13);
+    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec14, vec15);
+    VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec16, vec17);
 
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+    dst4 = HEVC_FILT_4TAP_SH(vec8, vec9, filt0, filt1);
+    dst5 = HEVC_FILT_4TAP_SH(vec10, vec11, filt0, filt1);
+    dst6 = HEVC_FILT_4TAP_SH(vec12, vec13, filt0, filt1);
+    dst7 = HEVC_FILT_4TAP_SH(vec14, vec15, filt0, filt1);
+    dst8 = HEVC_FILT_4TAP_SH(vec16, vec17, filt0, filt1);
 
     ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
     ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
-
-    LD_SB2(src, src_stride, src3, src4);
-    src += (2 * src_stride);
-
-    XORI_B2_128_SB(src3, src4);
-
-    /* row 3 */
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
     ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
+    ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+    ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
+    ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
+    ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l);
+
     dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
     dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-
-    dst0_r >>= 6;
-    dst0_l >>= 6;
-
-    /* row 4 */
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
     dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
     dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-    dst1_r >>= 6;
-    dst1_l >>= 6;
-
-    LD_SB2(src, src_stride, src5, src6);
-    src += (2 * src_stride);
-
-    XORI_B2_128_SB(src5, src6);
-
-    /* row 5 */
-    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-    dst5 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-
-    ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
     dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
     dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
-    dst2_r >>= 6;
-    dst2_l >>= 6;
-
-    /* row 6 */
-    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-    dst6 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst6, dst6);
-
-    ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
     dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
     dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
-    dst3_r >>= 6;
-    dst3_l >>= 6;
-
-    LD_SB2(src, src_stride, src7, src8);
-    src += (2 * src_stride);
-
-    XORI_B2_128_SB(src7, src8);
-
-    /* row 7 */
-    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
-    dst7 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst7, dst7);
-
-    ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
     dst4_r = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
     dst4_l = HEVC_FILT_4TAP(dst54_l, dst76_l, filt_h0, filt_h1);
-
-    dst4_r >>= 6;
-    dst4_l >>= 6;
-
-    /* row 8 */
-    VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec0, vec1);
-    dst8 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst8, dst8);
-
-    ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l);
     dst5_r = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
     dst5_l = HEVC_FILT_4TAP(dst65_l, dst87_l, filt_h0, filt_h1);
-    dst5_r >>= 6;
-    dst5_l >>= 6;
 
+    SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+    SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+    SRA_4V(dst4_r, dst4_l, dst5_r, dst5_l, 6);
     PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r,
                 dst2_l, dst2_r, dst3_l, dst3_r, out0_r, out1_r, out2_r, out3_r);
     PCKEV_H2_SH(dst4_l, dst4_r, dst5_l, dst5_r, out4_r, out5_r);
     SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
     SRARI_H2_SH(out4_r, out5_r, 6);
-    CLIP_SH4_0_255(out0_r, out1_r, out2_r, out3_r);
-    CLIP_SH2_0_255(out4_r, out5_r);
+    SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+    SAT_SH2_SH(out4_r, out5_r, 7);
+    out0 = PCKEV_XORI128_UB(out0_r, out1_r);
+    out1 = PCKEV_XORI128_UB(out2_r, out3_r);
+    out2 = PCKEV_XORI128_UB(out4_r, out5_r);
 
-    PCKEV_B2_SH(out1_r, out0_r, out3_r, out2_r, out0_r, out1_r);
-    out2_r = (v8i16) __msa_pckev_b((v16i8) out5_r, (v16i8) out4_r);
-
-    ST8x4_UB(out0_r, out1_r, dst, dst_stride);
+    ST8x4_UB(out0, out1, dst, dst_stride);
     dst += (4 * dst_stride);
-    ST8x2_UB(out2_r, dst, dst_stride);
+    ST8x2_UB(out2, dst, dst_stride);
 }
 
-static void hevc_hv_uni_4t_8w_mult_msa(uint8_t *src,
-                                       int32_t src_stride,
-                                       uint8_t *dst,
-                                       int32_t dst_stride,
-                                       const int8_t *filter_x,
-                                       const int8_t *filter_y,
-                                       int32_t height,
-                                       int32_t width)
+static void hevc_hv_uni_4t_8multx4mult_msa(uint8_t *src,
+                                           int32_t src_stride,
+                                           uint8_t *dst,
+                                           int32_t dst_stride,
+                                           const int8_t *filter_x,
+                                           const int8_t *filter_y,
+                                           int32_t height,
+                                           int32_t width8mult)
 {
     uint32_t loop_cnt, cnt;
     uint8_t *src_tmp;
     uint8_t *dst_tmp;
+    v16u8 out0, out1;
     v16i8 src0, src1, src2, src3, src4, src5, src6;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1, filter_vec;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
     v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
     v8i16 dst10_l, dst32_l, dst21_l, dst43_l;
+    v8i16 dst54_r, dst54_l, dst65_r, dst65_l, dst6;
     v8i16 out0_r, out1_r, out2_r, out3_r;
 
     src -= (src_stride + 1);
@@ -3522,17 +3721,13 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
-    for (cnt = width >> 3; cnt--;) {
+    for (cnt = width8mult; cnt--;) {
         src_tmp = src;
         dst_tmp = dst;
 
@@ -3545,79 +3740,62 @@
         VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
         VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
 
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
 
         ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
         ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
 
-        for (loop_cnt = height >> 2; loop_cnt--;) {
+        for (loop_cnt = (height >> 2); loop_cnt--;) {
             LD_SB4(src_tmp, src_stride, src3, src4, src5, src6);
             src_tmp += (4 * src_stride);
 
             XORI_B4_128_SB(src3, src4, src5, src6);
 
-            /* row 3 */
             VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-            dst3 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+            VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+            VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+            VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
+
+            dst3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+            dst4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+            dst5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+            dst6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
 
             ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+            ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
+            ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+            ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
+
             dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
             dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-
-            dst0_r >>= 6;
-            dst0_l >>= 6;
-
-            /* row 4 */
-            VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-            dst4 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-            ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
             dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
             dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-            dst1_r >>= 6;
-            dst1_l >>= 6;
+            dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+            dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
+            dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+            dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
 
-            /* row 5 */
-            VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-            dst5 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-
-            ILVRL_H2_SH(dst5, dst4, dst10_r, dst10_l);
-            dst2_r = HEVC_FILT_4TAP(dst32_r, dst10_r, filt_h0, filt_h1);
-            dst2_l = HEVC_FILT_4TAP(dst32_l, dst10_l, filt_h0, filt_h1);
-
-            dst2_r >>= 6;
-            dst2_l >>= 6;
-
-            /* row 6 */
-            VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-            dst2 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-            ILVRL_H2_SH(dst2, dst5, dst21_r, dst21_l);
-            dst3_r = HEVC_FILT_4TAP(dst43_r, dst21_r, filt_h0, filt_h1);
-            dst3_l = HEVC_FILT_4TAP(dst43_l, dst21_l, filt_h0, filt_h1);
-
-            dst3_r >>= 6;
-            dst3_l >>= 6;
+            SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+            SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
 
             PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r,
                         dst2_l, dst2_r, dst3_l, dst3_r,
                         out0_r, out1_r, out2_r, out3_r);
 
             SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 6);
-            CLIP_SH4_0_255(out0_r, out1_r, out2_r, out3_r);
-
-            PCKEV_B2_SH(out1_r, out0_r, out3_r, out2_r, out0_r, out1_r);
-            ST8x4_UB(out0_r, out1_r, dst_tmp, dst_stride);
+            SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+            out0 = PCKEV_XORI128_UB(out0_r, out1_r);
+            out1 = PCKEV_XORI128_UB(out2_r, out3_r);
+            ST8x4_UB(out0, out1, dst_tmp, dst_stride);
             dst_tmp += (4 * dst_stride);
+
+            dst10_r = dst54_r;
+            dst10_l = dst54_l;
+            dst21_r = dst65_r;
+            dst21_l = dst65_l;
+            dst2 = dst6;
         }
 
         src += 8;
@@ -3635,13 +3813,16 @@
 {
     if (2 == height) {
         hevc_hv_uni_4t_8x2_msa(src, src_stride, dst, dst_stride,
-                               filter_x, filter_y, height);
+                               filter_x, filter_y);
+    } else if (4 == height) {
+        hevc_hv_uni_4t_8multx4_msa(src, src_stride, dst, dst_stride,
+                                   filter_x, filter_y, 1);
     } else if (6 == height) {
         hevc_hv_uni_4t_8x6_msa(src, src_stride, dst, dst_stride,
-                               filter_x, filter_y, height);
+                               filter_x, filter_y);
     } else if (0 == (height % 4)) {
-        hevc_hv_uni_4t_8w_mult_msa(src, src_stride, dst, dst_stride,
-                                   filter_x, filter_y, height, 8);
+        hevc_hv_uni_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
+                                       filter_x, filter_y, height, 1);
     }
 }
 
@@ -3653,11 +3834,164 @@
                                    const int8_t *filter_y,
                                    int32_t height)
 {
-    hevc_hv_uni_4t_8w_mult_msa(src, src_stride, dst, dst_stride,
-                               filter_x, filter_y, height, 8);
+    uint32_t loop_cnt;
+    uint8_t *src_tmp, *dst_tmp;
+    v16u8 out0, out1;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v16i8 mask0, mask1, mask2, mask3;
+    v8i16 filt0, filt1, filt_h0, filt_h1, filter_vec, tmp0, tmp1, tmp2, tmp3;
+    v8i16 dsth0, dsth1, dsth2, dsth3, dsth4, dsth5, dsth6;
+    v8i16 dst10, dst21, dst22, dst73, dst84, dst95, dst106;
+    v8i16 dst76_r, dst98_r, dst87_r, dst109_r;
+    v8i16 dst10_r, dst32_r, dst54_r, dst21_r, dst43_r, dst65_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst21_l, dst43_l, dst65_l;
+    v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
+    v4i32 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
 
-    hevc_hv_uni_4t_4w_msa(src + 8, src_stride, dst + 8, dst_stride,
-                          filter_x, filter_y, height);
+    src -= (src_stride + 1);
+
+    filter_vec = LD_SH(filter_x);
+    SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
+
+    filter_vec = LD_SH(filter_y);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+
+    src_tmp = src;
+    dst_tmp = dst;
+
+    LD_SB3(src_tmp, src_stride, src0, src1, src2);
+    src_tmp += (3 * src_stride);
+
+    XORI_B3_128_SB(src0, src1, src2);
+
+    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+
+    dsth0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+
+    ILVRL_H2_SH(dsth1, dsth0, dst10_r, dst10_l);
+    ILVRL_H2_SH(dsth2, dsth1, dst21_r, dst21_l);
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src_tmp, src_stride, src3, src4, src5, src6);
+        src_tmp += (4 * src_stride);
+        XORI_B4_128_SB(src3, src4, src5, src6);
+
+        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
+
+        dsth3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dsth4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dsth5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dsth6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+
+        ILVRL_H2_SH(dsth3, dsth2, dst32_r, dst32_l);
+        ILVRL_H2_SH(dsth4, dsth3, dst43_r, dst43_l);
+        ILVRL_H2_SH(dsth5, dsth4, dst54_r, dst54_l);
+        ILVRL_H2_SH(dsth6, dsth5, dst65_r, dst65_l);
+
+        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
+        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
+        dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
+        dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
+
+        SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+        SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+
+        PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r, dst2_l, dst2_r, dst3_l,
+                    dst3_r, tmp0, tmp1, tmp2, tmp3);
+        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 6);
+        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+        out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+        ST8x4_UB(out0, out1, dst_tmp, dst_stride);
+        dst_tmp += (4 * dst_stride);
+
+        dst10_r = dst54_r;
+        dst10_l = dst54_l;
+        dst21_r = dst65_r;
+        dst21_l = dst65_l;
+        dsth2 = dsth6;
+    }
+
+    src += 8;
+    dst += 8;
+
+    mask2 = LD_SB(ff_hevc_mask_arr + 16);
+    mask3 = mask2 + 2;
+
+    LD_SB3(src, src_stride, src0, src1, src2);
+    src += (3 * src_stride);
+    XORI_B3_128_SB(src0, src1, src2);
+    VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
+    VSHF_B2_SB(src1, src2, src1, src2, mask2, mask3, vec2, vec3);
+
+    dst10 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst21 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+
+    ILVRL_H2_SH(dst21, dst10, dst10_r, dst21_r);
+    dst22 = (v8i16) __msa_splati_d((v2i64) dst21, 1);
+
+    for (loop_cnt = 2; loop_cnt--;) {
+        LD_SB8(src, src_stride,
+               src3, src4, src5, src6, src7, src8, src9, src10);
+        src += (8 * src_stride);
+        XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
+        VSHF_B2_SB(src3, src7, src3, src7, mask2, mask3, vec0, vec1);
+        VSHF_B2_SB(src4, src8, src4, src8, mask2, mask3, vec2, vec3);
+        VSHF_B2_SB(src5, src9, src5, src9, mask2, mask3, vec4, vec5);
+        VSHF_B2_SB(src6, src10, src6, src10, mask2, mask3, vec6, vec7);
+
+        dst73 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst84 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst95 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst106 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+
+        dst32_r = __msa_ilvr_h(dst73, dst22);
+        ILVRL_H2_SH(dst84, dst73, dst43_r, dst87_r);
+        ILVRL_H2_SH(dst95, dst84, dst54_r, dst98_r);
+        ILVRL_H2_SH(dst106, dst95, dst65_r, dst109_r);
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst73, 1);
+        dst76_r = __msa_ilvr_h(dst22, dst106);
+
+        dst0 = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst1 = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst2 = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst3 = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst4 = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
+        dst5 = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
+        dst6 = HEVC_FILT_4TAP(dst76_r, dst98_r, filt_h0, filt_h1);
+        dst7 = HEVC_FILT_4TAP(dst87_r, dst109_r, filt_h0, filt_h1);
+        SRA_4V(dst0, dst1, dst2, dst3, 6);
+        SRA_4V(dst4, dst5, dst6, dst7, 6);
+        PCKEV_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6,
+                    tmp0, tmp1, tmp2, tmp3);
+        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 6);
+        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+        out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+        ST4x8_UB(out0, out1, dst, dst_stride);
+        dst += (8 * dst_stride);
+
+        dst10_r = dst98_r;
+        dst21_r = dst109_r;
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst106, 1);
+    }
 }
 
 static void hevc_hv_uni_4t_16w_msa(uint8_t *src,
@@ -3668,8 +4002,13 @@
                                    const int8_t *filter_y,
                                    int32_t height)
 {
-    hevc_hv_uni_4t_8w_mult_msa(src, src_stride, dst, dst_stride,
-                               filter_x, filter_y, height, 16);
+    if (4 == height) {
+        hevc_hv_uni_4t_8multx4_msa(src, src_stride, dst, dst_stride, filter_x,
+                                   filter_y, 2);
+    } else {
+        hevc_hv_uni_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
+                                       filter_x, filter_y, height, 2);
+    }
 }
 
 static void hevc_hv_uni_4t_24w_msa(uint8_t *src,
@@ -3680,8 +4019,8 @@
                                    const int8_t *filter_y,
                                    int32_t height)
 {
-    hevc_hv_uni_4t_8w_mult_msa(src, src_stride, dst, dst_stride,
-                               filter_x, filter_y, height, 24);
+    hevc_hv_uni_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
+                                   filter_x, filter_y, height, 3);
 }
 
 static void hevc_hv_uni_4t_32w_msa(uint8_t *src,
@@ -3692,8 +4031,8 @@
                                    const int8_t *filter_y,
                                    int32_t height)
 {
-    hevc_hv_uni_4t_8w_mult_msa(src, src_stride, dst, dst_stride,
-                               filter_x, filter_y, height, 32);
+    hevc_hv_uni_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
+                                   filter_x, filter_y, height, 4);
 }
 
 #define UNI_MC_COPY(WIDTH)                                                 \
@@ -3721,15 +4060,13 @@
 
 #define UNI_MC(PEL, DIR, WIDTH, TAP, DIR1, FILT_DIR)                           \
 void ff_hevc_put_hevc_uni_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,           \
-                                                         ptrdiff_t             \
-                                                         dst_stride,           \
-                                                         uint8_t *src,         \
-                                                         ptrdiff_t             \
-                                                         src_stride,           \
-                                                         int height,           \
-                                                         intptr_t mx,          \
-                                                         intptr_t my,          \
-                                                         int width)            \
+                                                       ptrdiff_t dst_stride,   \
+                                                       uint8_t *src,           \
+                                                       ptrdiff_t src_stride,   \
+                                                       int height,             \
+                                                       intptr_t mx,            \
+                                                       intptr_t my,            \
+                                                       int width)              \
 {                                                                              \
     const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];              \
                                                                                \
@@ -3773,41 +4110,38 @@
 
 #undef UNI_MC
 
-#define UNI_MC_HV(PEL, DIR, WIDTH, TAP, DIR1)                           \
-void ff_hevc_put_hevc_uni_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,    \
-                                                         ptrdiff_t      \
-                                                         dst_stride,    \
-                                                         uint8_t *src,  \
-                                                         ptrdiff_t      \
-                                                         src_stride,    \
-                                                         int height,    \
-                                                         intptr_t mx,   \
-                                                         intptr_t my,   \
-                                                         int width)     \
-{                                                                       \
-    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];           \
-    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];           \
-                                                                        \
-    hevc_##DIR1##_uni_##TAP##t_##WIDTH##w_msa(src, src_stride, dst,     \
-                                              dst_stride, filter_x,     \
-                                              filter_y, height);        \
+#define UNI_MC_HV(PEL, WIDTH, TAP)                                         \
+void ff_hevc_put_hevc_uni_##PEL##_hv##WIDTH##_8_msa(uint8_t *dst,          \
+                                                    ptrdiff_t dst_stride,  \
+                                                    uint8_t *src,          \
+                                                    ptrdiff_t src_stride,  \
+                                                    int height,            \
+                                                    intptr_t mx,           \
+                                                    intptr_t my,           \
+                                                    int width)             \
+{                                                                          \
+    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];              \
+    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];              \
+                                                                           \
+    hevc_hv_uni_##TAP##t_##WIDTH##w_msa(src, src_stride, dst, dst_stride,  \
+                                        filter_x, filter_y, height);       \
 }
 
-UNI_MC_HV(qpel, hv, 4, 8, hv);
-UNI_MC_HV(qpel, hv, 8, 8, hv);
-UNI_MC_HV(qpel, hv, 12, 8, hv);
-UNI_MC_HV(qpel, hv, 16, 8, hv);
-UNI_MC_HV(qpel, hv, 24, 8, hv);
-UNI_MC_HV(qpel, hv, 32, 8, hv);
-UNI_MC_HV(qpel, hv, 48, 8, hv);
-UNI_MC_HV(qpel, hv, 64, 8, hv);
+UNI_MC_HV(qpel, 4, 8);
+UNI_MC_HV(qpel, 8, 8);
+UNI_MC_HV(qpel, 12, 8);
+UNI_MC_HV(qpel, 16, 8);
+UNI_MC_HV(qpel, 24, 8);
+UNI_MC_HV(qpel, 32, 8);
+UNI_MC_HV(qpel, 48, 8);
+UNI_MC_HV(qpel, 64, 8);
 
-UNI_MC_HV(epel, hv, 4, 4, hv);
-UNI_MC_HV(epel, hv, 6, 4, hv);
-UNI_MC_HV(epel, hv, 8, 4, hv);
-UNI_MC_HV(epel, hv, 12, 4, hv);
-UNI_MC_HV(epel, hv, 16, 4, hv);
-UNI_MC_HV(epel, hv, 24, 4, hv);
-UNI_MC_HV(epel, hv, 32, 4, hv);
+UNI_MC_HV(epel, 4, 4);
+UNI_MC_HV(epel, 6, 4);
+UNI_MC_HV(epel, 8, 4);
+UNI_MC_HV(epel, 12, 4);
+UNI_MC_HV(epel, 16, 4);
+UNI_MC_HV(epel, 24, 4);
+UNI_MC_HV(epel, 32, 4);
 
 #undef UNI_MC_HV

diff --git a/libavcodec/mips/hevc_mc_uniw_msa.c b/libavcodec/mips/hevc_mc_uniw_msa.c
index 7c01c32..f9ecb41 100644
--- a/libavcodec/mips/hevc_mc_uniw_msa.c
+++ b/libavcodec/mips/hevc_mc_uniw_msa.c

@@ -29,46 +29,6 @@
     0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20
 };
 
-#define HEVC_HV_UNIW_RND_CLIP4(in0, in1, in2, in3, wgt, offset, rnd,       \
-                               out0, out1, out2, out3)                     \
-{                                                                          \
-    MUL4(in0, wgt, in1, wgt, in2, wgt, in3, wgt, out0, out1, out2, out3);  \
-    SRAR_W4_SW(out0, out1, out2, out3, rnd);                               \
-    ADD4(out0, offset, out1, offset, out2, offset, out3, offset,           \
-         out0, out1, out2, out3);                                          \
-    out0 = CLIP_SW_0_255(out0);                                            \
-    out1 = CLIP_SW_0_255(out1);                                            \
-    out2 = CLIP_SW_0_255(out2);                                            \
-    out3 = CLIP_SW_0_255(out3);                                            \
-}
-
-#define HEVC_UNIW_RND_CLIP2(in0, in1, wgt, offset, rnd,              \
-                            out0_r, out1_r, out0_l, out1_l)          \
-{                                                                    \
-    ILVR_H2_SW(in0, in0, in1, in1, out0_r, out1_r);                  \
-    ILVL_H2_SW(in0, in0, in1, in1, out0_l, out1_l);                  \
-    DOTP_SH4_SW(out0_r, out1_r, out0_l, out1_l, wgt, wgt, wgt, wgt,  \
-                out0_r, out1_r, out0_l, out1_l);                     \
-    SRAR_W4_SW(out0_r, out1_r, out0_l, out1_l, rnd);                 \
-    ADD4(out0_r, offset, out1_r, offset,                             \
-         out0_l, offset, out1_l, offset,                             \
-         out0_r, out1_r, out0_l, out1_l);                            \
-    out0_r = CLIP_SW_0_255(out0_r);                                  \
-    out1_r = CLIP_SW_0_255(out1_r);                                  \
-    out0_l = CLIP_SW_0_255(out0_l);                                  \
-    out1_l = CLIP_SW_0_255(out1_l);                                  \
-}
-
-#define HEVC_UNIW_RND_CLIP4(in0, in1, in2, in3, wgt, offset, rnd,  \
-                            out0_r, out1_r, out2_r, out3_r,        \
-                            out0_l, out1_l, out2_l, out3_l)        \
-{                                                                  \
-    HEVC_UNIW_RND_CLIP2(in0, in1, wgt, offset, rnd,                \
-                        out0_r, out1_r, out0_l, out1_l);           \
-    HEVC_UNIW_RND_CLIP2(in2, in3, wgt, offset, rnd,                \
-                        out2_r, out3_r, out2_l, out3_l);           \
-}
-
 #define HEVC_UNIW_RND_CLIP2_MAX_SATU_H(in0_h, in1_h, wgt_w, offset_h, rnd_w,  \
                                        out0_h, out1_h)                        \
 {                                                                             \
@@ -1337,6 +1297,7 @@
                                      int32_t rnd_val)
 {
     int32_t loop_cnt;
+    v16u8 out0, out1;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
     v16i8 src9, src10, src11, src12, src13, src14;
     v16i8 src10_r, src32_r, src54_r, src76_r, src98_r;
@@ -1344,21 +1305,27 @@
     v16i8 src1110_r, src1211_r, src1312_r, src1413_r;
     v16i8 src2110, src4332, src6554, src8776, src10998;
     v16i8 src12111110, src14131312;
-    v8i16 dst10, dst32, dst54, dst76;
+    v8i16 filter_vec, dst01, dst23, dst45, dst67;
     v8i16 filt0, filt1, filt2, filt3;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 dst0, dst1, dst2, dst3, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= (3 * src_stride);
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
 
-    weight = weight & 0x0000FFFF;
+
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     filter_vec = LD_SH(filter);
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
@@ -1387,28 +1354,21 @@
                    src1413_r, src1312_r,
                    src8776, src10998, src12111110, src14131312);
         XORI_B4_128_SB(src8776, src10998, src12111110, src14131312);
+        dst01 = HEVC_FILT_8TAP_SH(src2110, src4332, src6554, src8776, filt0,
+                                  filt1, filt2, filt3);
+        dst23 = HEVC_FILT_8TAP_SH(src4332, src6554, src8776, src10998, filt0,
+                                  filt1, filt2, filt3);
+        dst45 = HEVC_FILT_8TAP_SH(src6554, src8776, src10998, src12111110,
+                                  filt0, filt1, filt2, filt3);
+        dst67 = HEVC_FILT_8TAP_SH(src8776, src10998, src12111110, src14131312,
+                                  filt0, filt1, filt2, filt3);
 
-        dst10 = const_vec;
-        DPADD_SB4_SH(src2110, src4332, src6554, src8776, filt0, filt1,
-                     filt2, filt3, dst10, dst10, dst10, dst10);
-        dst32 = const_vec;
-        DPADD_SB4_SH(src4332, src6554, src8776, src10998,
-                     filt0, filt1, filt2, filt3, dst32, dst32, dst32, dst32);
-        dst54 = const_vec;
-        DPADD_SB4_SH(src6554, src8776, src10998, src12111110,
-                     filt0, filt1, filt2, filt3, dst54, dst54, dst54, dst54);
-        dst76 = const_vec;
-        DPADD_SB4_SH(src8776, src10998, src12111110, src14131312,
-                     filt0, filt1, filt2, filt3, dst76, dst76, dst76, dst76);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst01, dst23, dst45, dst67, weight_vec,
+                                       offset_vec, rnd_vec, dst0, dst1, dst2,
+                                       dst3);
 
-        HEVC_UNIW_RND_CLIP4(dst10, dst32, dst54, dst76,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST4x8_UB(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+        ST4x8_UB(out0, out1, dst, dst_stride);
         dst += (8 * dst_stride);
 
         src2110 = src10998;
@@ -1429,24 +1389,30 @@
                                      int32_t rnd_val)
 {
     int32_t loop_cnt;
+    v16u8 out0, out1;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v16i8 src10_r, src32_r, src54_r, src76_r, src98_r;
     v16i8 src21_r, src43_r, src65_r, src87_r, src109_r;
-    v8i16 tmp0, tmp1, tmp2, tmp3;
     v8i16 filt0, filt1, filt2, filt3;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 filter_vec;
+    v8i16 dst0, dst1, dst2, dst3, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= (3 * src_stride);
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
 
-    weight = weight & 0x0000FFFF;
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     filter_vec = LD_SH(filter);
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
@@ -1464,28 +1430,21 @@
         XORI_B4_128_SB(src7, src8, src9, src10);
         ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9,
                    src76_r, src87_r, src98_r, src109_r);
+        dst0 = HEVC_FILT_8TAP_SH(src10_r, src32_r, src54_r, src76_r, filt0,
+                                 filt1, filt2, filt3);
+        dst1 = HEVC_FILT_8TAP_SH(src21_r, src43_r, src65_r, src87_r, filt0,
+                                 filt1, filt2, filt3);
+        dst2 = HEVC_FILT_8TAP_SH(src32_r, src54_r, src76_r, src98_r, filt0,
+                                 filt1, filt2, filt3);
+        dst3 = HEVC_FILT_8TAP_SH(src43_r, src65_r, src87_r, src109_r, filt0,
+                                 filt1, filt2, filt3);
 
-        tmp0 = const_vec;
-        DPADD_SB4_SH(src10_r, src32_r, src54_r, src76_r,
-                     filt0, filt1, filt2, filt3, tmp0, tmp0, tmp0, tmp0);
-        tmp1 = const_vec;
-        DPADD_SB4_SH(src21_r, src43_r, src65_r, src87_r,
-                     filt0, filt1, filt2, filt3, tmp1, tmp1, tmp1, tmp1);
-        tmp2 = const_vec;
-        DPADD_SB4_SH(src32_r, src54_r, src76_r, src98_r,
-                     filt0, filt1, filt2, filt3, tmp2, tmp2, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB4_SH(src43_r, src65_r, src87_r, src109_r,
-                     filt0, filt1, filt2, filt3, tmp3, tmp3, tmp3, tmp3);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3, weight_vec,
+                                       offset_vec, rnd_vec, dst0, dst1, dst2,
+                                       dst3);
 
-        HEVC_UNIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
         dst += (4 * dst_stride);
 
         src10_r = src54_r;
@@ -1509,28 +1468,34 @@
                                       int32_t rnd_val)
 {
     int32_t loop_cnt;
+    v16u8 out0, out1, out2;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v16i8 src10_r, src32_r, src54_r, src76_r, src98_r;
     v16i8 src21_r, src43_r, src65_r, src87_r, src109_r;
-    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
     v16i8 src10_l, src32_l, src54_l, src76_l, src98_l;
     v16i8 src21_l, src43_l, src65_l, src87_l, src109_l;
     v16i8 src2110, src4332, src6554, src8776, src10998;
     v8i16 filt0, filt1, filt2, filt3;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
+    v8i16 weight_vec_h, offset_vec, denom_vec, filter_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= (3 * src_stride);
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
 
     weight = weight & 0x0000FFFF;
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     filter_vec = LD_SH(filter);
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
@@ -1547,7 +1512,7 @@
     ILVR_D3_SB(src21_l, src10_l, src43_l, src32_l, src65_l, src54_l,
                src2110, src4332, src6554);
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
+    for (loop_cnt = 4; loop_cnt--;) {
         LD_SB4(src, src_stride, src7, src8, src9, src10);
         src += (4 * src_stride);
         XORI_B4_128_SB(src7, src8, src9, src10);
@@ -1558,37 +1523,28 @@
                    src76_l, src87_l, src98_l, src109_l);
         ILVR_D2_SB(src87_l, src76_l, src109_l, src98_l, src8776, src10998);
 
-        tmp0 = const_vec;
-        DPADD_SB4_SH(src10_r, src32_r, src54_r, src76_r,
-                     filt0, filt1, filt2, filt3, tmp0, tmp0, tmp0, tmp0);
-        tmp1 = const_vec;
-        DPADD_SB4_SH(src21_r, src43_r, src65_r, src87_r,
-                     filt0, filt1, filt2, filt3, tmp1, tmp1, tmp1, tmp1);
-        tmp2 = const_vec;
-        DPADD_SB4_SH(src32_r, src54_r, src76_r, src98_r,
-                     filt0, filt1, filt2, filt3, tmp2, tmp2, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB4_SH(src43_r, src65_r, src87_r, src109_r,
-                     filt0, filt1, filt2, filt3, tmp3, tmp3, tmp3, tmp3);
-        tmp4 = const_vec;
-        DPADD_SB4_SH(src2110, src4332, src6554, src8776,
-                     filt0, filt1, filt2, filt3, tmp4, tmp4, tmp4, tmp4);
-        tmp5 = const_vec;
-        DPADD_SB4_SH(src4332, src6554, src8776, src10998,
-                     filt0, filt1, filt2, filt3, tmp5, tmp5, tmp5, tmp5);
+        dst0 = HEVC_FILT_8TAP_SH(src10_r, src32_r, src54_r, src76_r, filt0,
+                                 filt1, filt2, filt3);
+        dst1 = HEVC_FILT_8TAP_SH(src21_r, src43_r, src65_r, src87_r, filt0,
+                                 filt1, filt2, filt3);
+        dst2 = HEVC_FILT_8TAP_SH(src32_r, src54_r, src76_r, src98_r, filt0,
+                                 filt1, filt2, filt3);
+        dst3 = HEVC_FILT_8TAP_SH(src43_r, src65_r, src87_r, src109_r, filt0,
+                                 filt1, filt2, filt3);
+        dst4 = HEVC_FILT_8TAP_SH(src2110, src4332, src6554, src8776, filt0,
+                                 filt1, filt2, filt3);
+        dst5 = HEVC_FILT_8TAP_SH(src4332, src6554, src8776, src10998, filt0,
+                                 filt1, filt2, filt3);
 
-        HEVC_UNIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-        HEVC_UNIW_RND_CLIP2(tmp4, tmp5, weight_vec, offset_vec, rnd_vec,
-                            dst4_r, dst5_r, dst4_l, dst5_l);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3, weight_vec,
+                                       offset_vec, rnd_vec, dst0, dst1, dst2,
+                                       dst3);
+        HEVC_UNIW_RND_CLIP2_MAX_SATU_H(dst4, dst5, weight_vec, offset_vec,
+                                       rnd_vec, dst4, dst5);
 
-        HEVC_PCK_SW_SB12(dst0_l, dst0_r, dst1_l, dst1_r,
-                         dst2_l, dst2_r, dst3_l, dst3_r,
-                         dst4_l, dst4_r, dst5_l, dst5_r,
-                         dst0_r, dst1_r, dst2_r);
-        ST12x4_UB(dst0_r, dst1_r, dst2_r, dst, dst_stride);
+        PCKEV_B3_UB(dst1, dst0, dst3, dst2, dst5, dst4, out0, out1, out2);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        ST4x4_UB(out2, out2, 0, 1, 2, 3, dst + 8, dst_stride);
         dst += (4 * dst_stride);
 
         src10_r = src54_r;
@@ -1604,7 +1560,7 @@
     }
 }
 
-static void hevc_vt_uniwgt_8t_16multx2mult_msa(uint8_t *src,
+static void hevc_vt_uniwgt_8t_16multx4mult_msa(uint8_t *src,
                                                int32_t src_stride,
                                                uint8_t *dst,
                                                int32_t dst_stride,
@@ -1613,91 +1569,101 @@
                                                int32_t weight,
                                                int32_t offset,
                                                int32_t rnd_val,
-                                               int32_t width)
+                                               int32_t weightmul16)
 {
     uint8_t *src_tmp;
     uint8_t *dst_tmp;
     int32_t loop_cnt, cnt;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 out0, out1, out2, out3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v16i8 src10_r, src32_r, src54_r, src76_r;
     v16i8 src21_r, src43_r, src65_r, src87_r;
-    v8i16 tmp0, tmp1, tmp2, tmp3;
     v16i8 src10_l, src32_l, src54_l, src76_l;
     v16i8 src21_l, src43_l, src65_l, src87_l;
+    v16i8 src98_r, src109_r, src98_l, src109_l;
     v8i16 filt0, filt1, filt2, filt3;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 filter_vec;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= (3 * src_stride);
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
 
-    weight = weight & 0x0000FFFF;
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     filter_vec = LD_SH(filter);
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
-    for (cnt = (width >> 4); cnt--;) {
+    for (cnt = weightmul16; cnt--;) {
         src_tmp = src;
         dst_tmp = dst;
 
         LD_SB7(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6);
         src_tmp += (7 * src_stride);
         XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
-        ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1,
-                   src10_r, src32_r, src54_r, src21_r);
-        ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
-        ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1,
-                   src10_l, src32_l, src54_l, src21_l);
-        ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
 
-        for (loop_cnt = (height >> 1); loop_cnt--;) {
-            LD_SB2(src_tmp, src_stride, src7, src8);
-            src_tmp += (2 * src_stride);
-            XORI_B2_128_SB(src7, src8);
-            ILVR_B2_SB(src7, src6, src8, src7, src76_r, src87_r);
-            ILVL_B2_SB(src7, src6, src8, src7, src76_l, src87_l);
+        for (loop_cnt = (height >> 2); loop_cnt--;) {
+            LD_SB4(src_tmp, src_stride, src7, src8, src9, src10);
+            src_tmp += (4 * src_stride);
+            XORI_B4_128_SB(src7, src8, src9, src10);
 
-            tmp0 = const_vec;
-            DPADD_SB4_SH(src10_r, src32_r, src54_r, src76_r,
-                         filt0, filt1, filt2, filt3, tmp0, tmp0, tmp0, tmp0);
-            tmp1 = const_vec;
-            DPADD_SB4_SH(src21_r, src43_r, src65_r, src87_r,
-                         filt0, filt1, filt2, filt3, tmp1, tmp1, tmp1, tmp1);
-            tmp2 = const_vec;
-            DPADD_SB4_SH(src10_l, src32_l, src54_l, src76_l,
-                         filt0, filt1, filt2, filt3, tmp2, tmp2, tmp2, tmp2);
-            tmp3 = const_vec;
-            DPADD_SB4_SH(src21_l, src43_l, src65_l, src87_l,
-                         filt0, filt1, filt2, filt3, tmp3, tmp3, tmp3, tmp3);
+            ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1,
+                       src10_r, src32_r, src54_r, src21_r);
+            ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+            ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1,
+                       src10_l, src32_l, src54_l, src21_l);
+            ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
+            ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9,
+                       src76_r, src87_r, src98_r, src109_r);
+            ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9,
+                       src76_l, src87_l, src98_l, src109_l);
 
-            HEVC_UNIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
-                                weight_vec, offset_vec, rnd_vec,
-                                dst0_r, dst1_r, dst2_r, dst3_r,
-                                dst0_l, dst1_l, dst2_l, dst3_l);
+            dst0 = HEVC_FILT_8TAP_SH(src10_r, src32_r, src54_r, src76_r, filt0,
+                                     filt1, filt2, filt3);
+            dst1 = HEVC_FILT_8TAP_SH(src10_l, src32_l, src54_l, src76_l, filt0,
+                                     filt1, filt2, filt3);
+            dst2 = HEVC_FILT_8TAP_SH(src21_r, src43_r, src65_r, src87_r, filt0,
+                                     filt1, filt2, filt3);
+            dst3 = HEVC_FILT_8TAP_SH(src21_l, src43_l, src65_l, src87_l, filt0,
+                                     filt1, filt2, filt3);
+            dst4 = HEVC_FILT_8TAP_SH(src32_r, src54_r, src76_r, src98_r, filt0,
+                                     filt1, filt2, filt3);
+            dst5 = HEVC_FILT_8TAP_SH(src32_l, src54_l, src76_l, src98_l, filt0,
+                                     filt1, filt2, filt3);
+            dst6 = HEVC_FILT_8TAP_SH(src43_r, src65_r, src87_r, src109_r, filt0,
+                                     filt1, filt2, filt3);
+            dst7 = HEVC_FILT_8TAP_SH(src43_l, src65_l, src87_l, src109_l, filt0,
+                                     filt1, filt2, filt3);
 
-            HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst2_l, dst2_r,
-                            dst1_l, dst1_r, dst3_l, dst3_r, dst0_r, dst1_r);
-            ST_SW2(dst0_r, dst1_r, dst_tmp, dst_stride);
-            dst_tmp += (2 * dst_stride);
+            HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3, weight_vec,
+                                           offset_vec, rnd_vec, dst0, dst1,
+                                           dst2, dst3);
+            HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst4, dst5, dst6, dst7, weight_vec,
+                                           offset_vec, rnd_vec, dst4, dst5,
+                                           dst6, dst7);
+            PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+            PCKEV_B2_UB(dst5, dst4, dst7, dst6, out2, out3);
+            ST_UB4(out0, out1, out2, out3, dst_tmp, dst_stride);
+            dst_tmp += (4 * dst_stride);
 
-            src10_r = src32_r;
-            src32_r = src54_r;
-            src54_r = src76_r;
-            src21_r = src43_r;
-            src43_r = src65_r;
-            src65_r = src87_r;
-            src10_l = src32_l;
-            src32_l = src54_l;
-            src54_l = src76_l;
-            src21_l = src43_l;
-            src43_l = src65_l;
-            src65_l = src87_l;
-            src6 = src8;
+            src0 = src4;
+            src1 = src5;
+            src2 = src6;
+            src3 = src7;
+            src4 = src8;
+            src5 = src9;
+            src6 = src10;
         }
 
         src += 16;
@@ -1715,9 +1681,9 @@
                                       int32_t offset,
                                       int32_t rnd_val)
 {
-    hevc_vt_uniwgt_8t_16multx2mult_msa(src, src_stride, dst, dst_stride,
+    hevc_vt_uniwgt_8t_16multx4mult_msa(src, src_stride, dst, dst_stride,
                                        filter, height, weight,
-                                       offset, rnd_val, 16);
+                                       offset, rnd_val, 1);
 }
 
 static void hevc_vt_uniwgt_8t_24w_msa(uint8_t *src,
@@ -1730,12 +1696,12 @@
                                       int32_t offset,
                                       int32_t rnd_val)
 {
-    hevc_vt_uniwgt_8t_16multx2mult_msa(src, src_stride, dst, dst_stride,
-                                       filter, height, weight,
-                                       offset, rnd_val, 16);
+    hevc_vt_uniwgt_8t_16multx4mult_msa(src, src_stride, dst, dst_stride,
+                                       filter, 32, weight,
+                                       offset, rnd_val, 1);
 
     hevc_vt_uniwgt_8t_8w_msa(src + 16, src_stride, dst + 16, dst_stride,
-                             filter, height, weight, offset, rnd_val);
+                             filter, 32, weight, offset, rnd_val);
 }
 
 static void hevc_vt_uniwgt_8t_32w_msa(uint8_t *src,
@@ -1748,9 +1714,9 @@
                                       int32_t offset,
                                       int32_t rnd_val)
 {
-    hevc_vt_uniwgt_8t_16multx2mult_msa(src, src_stride, dst, dst_stride,
+    hevc_vt_uniwgt_8t_16multx4mult_msa(src, src_stride, dst, dst_stride,
                                        filter, height, weight,
-                                       offset, rnd_val, 32);
+                                       offset, rnd_val, 2);
 }
 
 static void hevc_vt_uniwgt_8t_48w_msa(uint8_t *src,
@@ -1763,9 +1729,9 @@
                                       int32_t offset,
                                       int32_t rnd_val)
 {
-    hevc_vt_uniwgt_8t_16multx2mult_msa(src, src_stride, dst, dst_stride,
-                                       filter, height, weight,
-                                       offset, rnd_val, 48);
+    hevc_vt_uniwgt_8t_16multx4mult_msa(src, src_stride, dst, dst_stride,
+                                       filter, 64, weight,
+                                       offset, rnd_val, 3);
 }
 
 static void hevc_vt_uniwgt_8t_64w_msa(uint8_t *src,
@@ -1778,9 +1744,9 @@
                                       int32_t offset,
                                       int32_t rnd_val)
 {
-    hevc_vt_uniwgt_8t_16multx2mult_msa(src, src_stride, dst, dst_stride,
+    hevc_vt_uniwgt_8t_16multx4mult_msa(src, src_stride, dst, dst_stride,
                                        filter, height, weight,
-                                       offset, rnd_val, 64);
+                                       offset, rnd_val, 4);
 }
 
 static void hevc_hv_uniwgt_8t_4w_msa(uint8_t *src,
@@ -1795,40 +1761,42 @@
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 out;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v8i16 filt0, filt1, filt2, filt3;
-    v4i32 filt_h0, filt_h1, filt_h2, filt_h3;
+    v8i16 filt_h0, filt_h1, filt_h2, filt_h3;
     v16i8 mask1, mask2, mask3;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
-    v8i16 dst30, dst41, dst52, dst63, dst66, dst87;
-    v4i32 dst0_r, dst1_r, weight_vec, offset_vec, rnd_vec;
-    v8i16 dst10_r, dst32_r, dst54_r, dst76_r;
-    v8i16 dst21_r, dst43_r, dst65_r, dst87_r;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
-    v8u16 mask4 = { 0, 4, 1, 5, 2, 6, 3, 7 };
+    v8i16 dst30, dst41, dst52, dst63, dst66, dst97, dst108;
+    v8i16 dst10_r, dst32_r, dst54_r, dst76_r, dst98_r;
+    v8i16 dst21_r, dst43_r, dst65_r, dst87_r, dst109_r;
+    v4i32 dst0_r, dst1_r, dst2_r, dst3_r;
+    v4i32 weight_vec, offset_vec, rnd_vec, const_128, denom_vec;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
 
     src -= ((3 * src_stride) + 3);
     filter_vec = LD_SH(filter_x);
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W4_SW(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
+    SPLATI_W4_SH(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
 
     mask1 = mask0 + 2;
     mask2 = mask0 + 4;
     mask3 = mask0 + 6;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
     weight_vec = __msa_fill_w(weight);
     offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
+    denom_vec = rnd_vec - 6;
+
+    const_128 = __msa_ldi_w(128);
+    const_128 *= weight_vec;
+    offset_vec += __msa_srar_w(const_128, denom_vec);
 
     LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
     src += (7 * src_stride);
@@ -1841,64 +1809,68 @@
                vec8, vec9, vec10, vec11);
     VSHF_B4_SB(src3, src6, mask0, mask1, mask2, mask3,
                vec12, vec13, vec14, vec15);
-    dst30 = const_vec;
-    DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                 dst30, dst30, dst30, dst30);
-    dst41 = const_vec;
-    DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3,
-                 dst41, dst41, dst41, dst41);
-    dst52 = const_vec;
-    DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3,
-                 dst52, dst52, dst52, dst52);
-    dst63 = const_vec;
-    DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2, filt3,
-                 dst63, dst63, dst63, dst63);
+    dst30 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                              filt3);
+    dst41 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                              filt3);
+    dst52 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                              filt3);
+    dst63 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2,
+                              filt3);
 
-    ILVR_H3_SH(dst41, dst30, dst52, dst41, dst63, dst52,
-               dst10_r, dst21_r, dst32_r);
-
-    dst43_r = __msa_ilvl_h(dst41, dst30);
-    dst54_r = __msa_ilvl_h(dst52, dst41);
-    dst65_r = __msa_ilvl_h(dst63, dst52);
+    ILVRL_H2_SH(dst41, dst30, dst10_r, dst43_r);
+    ILVRL_H2_SH(dst52, dst41, dst21_r, dst54_r);
+    ILVRL_H2_SH(dst63, dst52, dst32_r, dst65_r);
 
     dst66 = (v8i16) __msa_splati_d((v2i64) dst63, 1);
 
-    for (loop_cnt = height >> 1; loop_cnt--;) {
-        LD_SB2(src, src_stride, src7, src8);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src7, src8);
+    for (loop_cnt = height >> 2; loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        src += (4 * src_stride);
+        XORI_B4_128_SB(src7, src8, src9, src10);
 
-        VSHF_B4_SB(src7, src8, mask0, mask1, mask2, mask3,
+        VSHF_B4_SB(src7, src9, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst87 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst87, dst87, dst87, dst87);
-        dst76_r = __msa_ilvr_h(dst87, dst66);
-        dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r,
-                                filt_h0, filt_h1, filt_h2, filt_h3);
-        dst87_r = __msa_vshf_h((v8i16) mask4, dst87, dst87);
-        dst1_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r,
-                                filt_h0, filt_h1, filt_h2, filt_h3);
+        VSHF_B4_SB(src8, src10, mask0, mask1, mask2, mask3,
+                   vec4, vec5, vec6, vec7);
+        dst97 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                  filt3);
+        dst108 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                   filt3);
 
-        dst0_r >>= 6;
-        dst1_r >>= 6;
+        dst76_r = __msa_ilvr_h(dst97, dst66);
+        ILVRL_H2_SH(dst108, dst97, dst87_r, dst109_r);
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst97, 1);
+        dst98_r = __msa_ilvr_h(dst66, dst108);
+
+        dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst1_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst2_r = HEVC_FILT_8TAP(dst32_r, dst54_r, dst76_r, dst98_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst3_r = HEVC_FILT_8TAP(dst43_r, dst65_r, dst87_r, dst109_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+
+        SRA_4V(dst0_r, dst1_r, dst2_r, dst3_r, 6);
         MUL2(dst0_r, weight_vec, dst1_r, weight_vec, dst0_r, dst1_r);
-        SRAR_W2_SW(dst0_r, dst1_r, rnd_vec);
+        MUL2(dst2_r, weight_vec, dst3_r, weight_vec, dst2_r, dst3_r);
+        SRAR_W4_SW(dst0_r, dst1_r, dst2_r, dst3_r, rnd_vec);
         ADD2(dst0_r, offset_vec, dst1_r, offset_vec, dst0_r, dst1_r);
-        dst0_r = CLIP_SW_0_255(dst0_r);
-        dst1_r = CLIP_SW_0_255(dst1_r);
+        ADD2(dst2_r, offset_vec, dst3_r, offset_vec, dst2_r, dst3_r);
+        CLIP_SW4_0_255_MAX_SATU(dst0_r, dst1_r, dst2_r, dst3_r);
+        PCKEV_H2_SW(dst1_r, dst0_r, dst3_r, dst2_r, dst0_r, dst1_r);
+        out = (v16u8) __msa_pckev_b((v16i8) dst1_r, (v16i8) dst0_r);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
 
-        HEVC_PCK_SW_SB2(dst1_r, dst0_r, dst0_r);
-        ST4x2_UB(dst0_r, dst, dst_stride);
-        dst += (2 * dst_stride);
-
-        dst10_r = dst32_r;
-        dst32_r = dst54_r;
-        dst54_r = dst76_r;
-        dst21_r = dst43_r;
-        dst43_r = dst65_r;
-        dst65_r = dst87_r;
-        dst66 = (v8i16) __msa_splati_d((v2i64) dst87, 1);
+        dst10_r = dst54_r;
+        dst32_r = dst76_r;
+        dst54_r = dst98_r;
+        dst21_r = dst65_r;
+        dst43_r = dst87_r;
+        dst65_r = dst109_r;
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst108, 1);
     }
 }
 
@@ -1921,7 +1893,7 @@
     v8i16 filt0, filt1, filt2, filt3;
     v4i32 filt_h0, filt_h1, filt_h2, filt_h3;
     v16i8 mask1, mask2, mask3;
-    v8i16 filter_vec, const_vec;
+    v8i16 filter_vec;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8;
@@ -1930,23 +1902,25 @@
     v8i16 dst10_l, dst32_l, dst54_l, dst76_l;
     v8i16 dst21_r, dst43_r, dst65_r, dst87_r;
     v8i16 dst21_l, dst43_l, dst65_l, dst87_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v4i32 weight_vec, offset_vec, rnd_vec, const_128, denom_vec;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
 
     src -= ((3 * src_stride) + 3);
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
 
     weight_vec = __msa_fill_w(weight);
     offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
+    denom_vec = rnd_vec - 6;
+
+    const_128 = __msa_ldi_w(128);
+    const_128 *= weight_vec;
+    offset_vec += __msa_srar_w(const_128, denom_vec);
 
     filter_vec = LD_SH(filter_x);
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
     SPLATI_W4_SW(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
 
     mask1 = mask0 + 2;
@@ -1969,18 +1943,14 @@
                    vec8, vec9, vec10, vec11);
         VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3,
                    vec12, vec13, vec14, vec15);
-        dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
-        dst1 = const_vec;
-        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
-        dst2 = const_vec;
-        DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
-        dst3 = const_vec;
-        DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
+        dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        dst1 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                 filt3);
+        dst2 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                                 filt3);
+        dst3 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1,
+                                 filt2, filt3);
 
         VSHF_B4_SB(src4, src4, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
@@ -1988,15 +1958,12 @@
                    vec4, vec5, vec6, vec7);
         VSHF_B4_SB(src6, src6, mask0, mask1, mask2, mask3,
                    vec8, vec9, vec10, vec11);
-        dst4 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst4, dst4, dst4, dst4);
-        dst5 = const_vec;
-        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3,
-                     dst5, dst5, dst5, dst5);
-        dst6 = const_vec;
-        DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3,
-                     dst6, dst6, dst6, dst6);
+        dst4 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        dst5 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                 filt3);
+        dst6 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                                 filt3);
 
         ILVR_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst2, dst1,
                    dst10_r, dst32_r, dst54_r, dst21_r);
@@ -2012,9 +1979,8 @@
 
             VSHF_B4_SB(src7, src7, mask0, mask1, mask2, mask3,
                        vec0, vec1, vec2, vec3);
-            dst7 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst7, dst7, dst7, dst7);
+            dst7 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1,
+                                     filt2, filt3);
 
             ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
             dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r,
@@ -2027,9 +1993,8 @@
             /* row 8 */
             VSHF_B4_SB(src8, src8, mask0, mask1, mask2, mask3,
                        vec0, vec1, vec2, vec3);
-            dst8 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst8, dst8, dst8, dst8);
+            dst8 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1,
+                                     filt2, filt3);
 
             ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l);
             dst1_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r,
@@ -2039,11 +2004,15 @@
             dst1_r >>= 6;
             dst1_l >>= 6;
 
-            HEVC_HV_UNIW_RND_CLIP4(dst0_r, dst1_r, dst0_l, dst1_l,
-                                   weight_vec, offset_vec, rnd_vec,
-                                   dst0_r, dst1_r, dst0_l, dst1_l);
+            MUL2(dst0_r, weight_vec, dst0_l, weight_vec, dst0_r, dst0_l);
+            MUL2(dst1_r, weight_vec, dst1_l, weight_vec, dst1_r, dst1_l);
+            SRAR_W4_SW(dst0_r, dst1_r, dst0_l, dst1_l, rnd_vec);
+            ADD2(dst0_r, offset_vec, dst0_l, offset_vec, dst0_r, dst0_l);
+            ADD2(dst1_r, offset_vec, dst1_l, offset_vec, dst1_r, dst1_l);
+            CLIP_SW4_0_255_MAX_SATU(dst0_r, dst1_r, dst0_l, dst1_l);
 
-            HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
+            PCKEV_H2_SW(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r, dst1_r);
+            dst0_r = (v4i32) __msa_pckev_b((v16i8) dst1_r, (v16i8) dst0_r);
             ST8x2_UB(dst0_r, dst_tmp, dst_stride);
             dst_tmp += (2 * dst_stride);
 
@@ -2094,12 +2063,198 @@
                                       int32_t offset,
                                       int32_t rnd_val)
 {
-    hevc_hv_uniwgt_8t_8multx2mult_msa(src, src_stride, dst, dst_stride,
-                                      filter_x, filter_y, height, weight,
-                                      offset, rnd_val, 8);
-    hevc_hv_uniwgt_8t_4w_msa(src + 8, src_stride, dst + 8, dst_stride,
-                             filter_x, filter_y, height, weight, offset,
-                             rnd_val);
+    uint32_t loop_cnt;
+    uint8_t *src_tmp, *dst_tmp;
+    v16u8 out;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 mask0, mask1, mask2, mask3, mask4, mask5, mask6, mask7;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 dst30, dst41, dst52, dst63, dst66, dst97, dst108;
+    v8i16 filt0, filt1, filt2, filt3, filt_h0, filt_h1, filt_h2, filt_h3;
+    v8i16 dst10_r, dst32_r, dst54_r, dst76_r, dst10_l, dst32_l, dst54_l;
+    v8i16 dst98_r, dst21_r, dst43_r, dst65_r, dst87_r, dst109_r;
+    v8i16 dst76_l, filter_vec;
+    v4i32 dst0_r, dst0_l, dst1_r, dst2_r, dst3_r;
+    v4i32 weight_vec, offset_vec, rnd_vec, const_128, denom_vec;
+
+    src -= ((3 * src_stride) + 3);
+
+    filter_vec = LD_SH(filter_x);
+    SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    filter_vec = LD_SH(filter_y);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W4_SH(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
+
+    weight_vec = __msa_fill_w(weight);
+    offset_vec = __msa_fill_w(offset);
+    rnd_vec = __msa_fill_w(rnd_val);
+    denom_vec = rnd_vec - 6;
+
+    const_128 = __msa_ldi_w(128);
+    const_128 *= weight_vec;
+    offset_vec += __msa_srar_w(const_128, denom_vec);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    src_tmp = src;
+    dst_tmp = dst;
+
+    LD_SB7(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    src_tmp += (7 * src_stride);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+
+    /* row 0 row 1 row 2 row 3 */
+    VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3, vec0, vec1, vec2, vec3);
+    VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3, vec4, vec5, vec6, vec7);
+    VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3, vec8, vec9, vec10,
+               vec11);
+    VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3, vec12, vec13, vec14,
+               vec15);
+    dst0 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                             filt3);
+    dst1 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                             filt3);
+    dst2 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                             filt3);
+    dst3 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1,
+                             filt2, filt3);
+    VSHF_B4_SB(src4, src4, mask0, mask1, mask2, mask3, vec0, vec1, vec2, vec3);
+    VSHF_B4_SB(src5, src5, mask0, mask1, mask2, mask3, vec4, vec5, vec6, vec7);
+    VSHF_B4_SB(src6, src6, mask0, mask1, mask2, mask3, vec8, vec9, vec10,
+               vec11);
+    dst4 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                             filt3);
+    dst5 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                             filt3);
+    dst6 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                             filt3);
+
+    for (loop_cnt = 16; loop_cnt--;) {
+        src7 = LD_SB(src_tmp);
+        src7 = (v16i8) __msa_xori_b((v16u8) src7, 128);
+        src_tmp += src_stride;
+
+        VSHF_B4_SB(src7, src7, mask0, mask1, mask2, mask3, vec0, vec1, vec2,
+                   vec3);
+        dst7 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                 filt3);
+        ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
+        ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+        ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+        ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
+
+        dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r,
+                                filt_h0, filt_h1, filt_h2, filt_h3);
+        dst0_l = HEVC_FILT_8TAP(dst10_l, dst32_l, dst54_l, dst76_l,
+                                filt_h0, filt_h1, filt_h2, filt_h3);
+        dst0_r >>= 6;
+        dst0_l >>= 6;
+
+        MUL2(dst0_r, weight_vec, dst0_l, weight_vec, dst0_r, dst0_l);
+        SRAR_W2_SW(dst0_r, dst0_l, rnd_vec);
+        ADD2(dst0_r, offset_vec, dst0_l, offset_vec, dst0_r, dst0_l);
+        CLIP_SW2_0_255_MAX_SATU(dst0_r, dst0_l);
+        dst0_r = (v4i32) __msa_pckev_h((v8i16) dst0_l, (v8i16) dst0_r);
+        out = (v16u8) __msa_pckev_b((v16i8) dst0_r, (v16i8) dst0_r);
+        ST8x1_UB(out, dst_tmp);
+        dst_tmp += dst_stride;
+
+        dst0 = dst1;
+        dst1 = dst2;
+        dst2 = dst3;
+        dst3 = dst4;
+        dst4 = dst5;
+        dst5 = dst6;
+        dst6 = dst7;
+    }
+
+    src += 8;
+    dst += 8;
+
+    mask4 = LD_SB(ff_hevc_mask_arr + 16);
+    mask5 = mask4 + 2;
+    mask6 = mask4 + 4;
+    mask7 = mask4 + 6;
+
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    src += (7 * src_stride);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+
+    VSHF_B4_SB(src0, src3, mask4, mask5, mask6, mask7, vec0, vec1, vec2, vec3);
+    VSHF_B4_SB(src1, src4, mask4, mask5, mask6, mask7, vec4, vec5, vec6, vec7);
+    VSHF_B4_SB(src2, src5, mask4, mask5, mask6, mask7, vec8, vec9, vec10,
+               vec11);
+    VSHF_B4_SB(src3, src6, mask4, mask5, mask6, mask7, vec12, vec13, vec14,
+               vec15);
+    dst30 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                              filt3);
+    dst41 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                              filt3);
+    dst52 = HEVC_FILT_8TAP_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2,
+                              filt3);
+    dst63 = HEVC_FILT_8TAP_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2,
+                              filt3);
+    ILVRL_H2_SH(dst41, dst30, dst10_r, dst43_r);
+    ILVRL_H2_SH(dst52, dst41, dst21_r, dst54_r);
+    ILVRL_H2_SH(dst63, dst52, dst32_r, dst65_r);
+
+    dst66 = (v8i16) __msa_splati_d((v2i64) dst63, 1);
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        src += (4 * src_stride);
+        XORI_B4_128_SB(src7, src8, src9, src10);
+
+        VSHF_B4_SB(src7, src9, mask4, mask5, mask6, mask7, vec0, vec1, vec2,
+                   vec3);
+        VSHF_B4_SB(src8, src10, mask4, mask5, mask6, mask7, vec4, vec5, vec6,
+                   vec7);
+        dst97 = HEVC_FILT_8TAP_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2,
+                                  filt3);
+        dst108 = HEVC_FILT_8TAP_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2,
+                                   filt3);
+
+        dst76_r = __msa_ilvr_h(dst97, dst66);
+        ILVRL_H2_SH(dst108, dst97, dst87_r, dst109_r);
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst97, 1);
+        dst98_r = __msa_ilvr_h(dst66, dst108);
+
+        dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst1_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst2_r = HEVC_FILT_8TAP(dst32_r, dst54_r, dst76_r, dst98_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst3_r = HEVC_FILT_8TAP(dst43_r, dst65_r, dst87_r, dst109_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+
+        SRA_4V(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+        MUL2(dst0_r, weight_vec, dst1_r, weight_vec, dst0_r, dst1_r);
+        MUL2(dst2_r, weight_vec, dst3_r, weight_vec, dst2_r, dst3_r);
+        SRAR_W4_SW(dst0_r, dst1_r, dst2_r, dst3_r, rnd_vec);
+        ADD2(dst0_r, offset_vec, dst1_r, offset_vec, dst0_r, dst1_r);
+        ADD2(dst2_r, offset_vec, dst3_r, offset_vec, dst2_r, dst3_r);
+        CLIP_SW4_0_255_MAX_SATU(dst0_r, dst1_r, dst2_r, dst3_r);
+        PCKEV_H2_SW(dst1_r, dst0_r, dst3_r, dst2_r, dst0_r, dst1_r);
+        out = (v16u8) __msa_pckev_b((v16i8) dst1_r, (v16i8) dst0_r);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        dst10_r = dst54_r;
+        dst32_r = dst76_r;
+        dst54_r = dst98_r;
+        dst21_r = dst65_r;
+        dst43_r = dst87_r;
+        dst65_r = dst109_r;
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst108, 1);
+    }
 }
 
 static void hevc_hv_uniwgt_8t_16w_msa(uint8_t *src,
@@ -2187,19 +2342,19 @@
                                       uint8_t *dst,
                                       int32_t dst_stride,
                                       const int8_t *filter,
-                                      int32_t height,
                                       int32_t weight,
                                       int32_t offset,
                                       int32_t rnd_val)
 {
+    v16u8 out;
     v8i16 filt0, filt1;
     v16i8 src0, src1, vec0, vec1;
     v16i8 mask1;
     v8i16 dst0;
     v4i32 dst0_r, dst0_l;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[16]);
 
     src -= 1;
 
@@ -2210,29 +2365,33 @@
 
     weight = weight & 0x0000FFFF;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     LD_SB2(src, src_stride, src0, src1);
     XORI_B2_128_SB(src0, src1);
 
     VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
 
     ILVRL_H2_SW(dst0, dst0, dst0_r, dst0_l);
     DOTP_SH2_SW(dst0_r, dst0_l, weight_vec, weight_vec, dst0_r, dst0_l);
     SRAR_W2_SW(dst0_r, dst0_l, rnd_vec);
-    ADD2(dst0_r, offset_vec, dst0_l, offset_vec, dst0_r, dst0_l);
-    dst0_r = CLIP_SW_0_255(dst0_r);
-    dst0_l = CLIP_SW_0_255(dst0_l);
-
-    HEVC_PCK_SW_SB2(dst0_l, dst0_r, dst0_r);
-    ST4x2_UB(dst0_r, dst, dst_stride);
+    dst0 = __msa_pckev_h((v8i16) dst0_l, (v8i16) dst0_r);
+    dst0 = __msa_adds_s_h(dst0, offset_vec);
+    dst0 = CLIP_SH_0_255_MAX_SATU(dst0);
+    out = (v16u8) __msa_pckev_b((v16i8) dst0, (v16i8) dst0);
+    ST4x2_UB(out, dst, dst_stride);
     dst += (4 * dst_stride);
 }
 
@@ -2241,19 +2400,18 @@
                                       uint8_t *dst,
                                       int32_t dst_stride,
                                       const int8_t *filter,
-                                      int32_t height,
                                       int32_t weight,
                                       int32_t offset,
                                       int32_t rnd_val)
 {
+    v16u8 out;
     v8i16 filt0, filt1;
     v16i8 src0, src1, src2, src3;
-    v16i8 mask1, vec0, vec1;
+    v16i8 mask1, vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1;
-    v4i32 dst0_r, dst1_r, dst0_l, dst1_l;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[16]);
 
     src -= 1;
 
@@ -2265,29 +2423,32 @@
 
     weight = weight & 0x0000FFFF;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     LD_SB4(src, src_stride, src0, src1, src2, src3);
     XORI_B4_128_SB(src0, src1, src2, src3);
 
     VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+    VSHF_B2_SB(src2, src3, src2, src3, mask0, mask1, vec2, vec3);
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
 
-    VSHF_B2_SB(src2, src3, src2, src3, mask0, mask1, vec0, vec1);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+    HEVC_UNIW_RND_CLIP2_MAX_SATU_H(dst0, dst1, weight_vec, offset_vec, rnd_vec,
+                                   dst0, dst1);
 
-    HEVC_UNIW_RND_CLIP2(dst0, dst1, weight_vec, offset_vec, rnd_vec,
-                        dst0_r, dst1_r, dst0_l, dst1_l);
-
-    HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-    ST4x4_UB(dst0_r, dst0_r, 0, 1, 2, 3, dst, dst_stride);
+    out = (v16u8) __msa_pckev_b((v16i8) dst1, (v16i8) dst0);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
     dst += (4 * dst_stride);
 }
 
@@ -2302,14 +2463,15 @@
                                               int32_t rnd_val)
 {
     uint32_t loop_cnt;
+    v16u8 out0, out1;
     v8i16 filt0, filt1;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
-    v16i8 mask1, vec0, vec1;
+    v16i8 mask1, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v8i16 dst0, dst1, dst2, dst3;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+    v8i16 filter_vec;
+    v8i16 weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[16]);
 
     src -= 1;
 
@@ -2317,13 +2479,20 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     weight = weight & 0x0000FFFF;
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     mask1 = mask0 + 2;
 
     for (loop_cnt = (height >> 3); loop_cnt--;) {
@@ -2333,29 +2502,20 @@
         XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
 
         VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+        VSHF_B2_SB(src2, src3, src2, src3, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src4, src5, src4, src5, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src7, src6, src7, mask0, mask1, vec6, vec7);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
 
-        VSHF_B2_SB(src2, src3, src2, src3, mask0, mask1, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3,
+                                       weight_vec, offset_vec, rnd_vec,
+                                       dst0, dst1, dst2, dst3);
 
-        VSHF_B2_SB(src4, src5, src4, src5, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-        VSHF_B2_SB(src6, src7, src6, src7, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
-        HEVC_UNIW_RND_CLIP4(dst0, dst1, dst2, dst3,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST4x8_UB(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+        ST4x8_UB(out0, out1, dst, dst_stride);
         dst += (8 * dst_stride);
     }
 }
@@ -2372,10 +2532,10 @@
 {
     if (2 == height) {
         hevc_hz_uniwgt_4t_4x2_msa(src, src_stride, dst, dst_stride,
-                                  filter, height, weight, offset, rnd_val);
+                                  filter, weight, offset, rnd_val);
     } else if (4 == height) {
         hevc_hz_uniwgt_4t_4x4_msa(src, src_stride, dst, dst_stride,
-                                  filter, height, weight, offset, rnd_val);
+                                  filter, weight, offset, rnd_val);
     } else if (8 == height || 16 == height) {
         hevc_hz_uniwgt_4t_4x8multiple_msa(src, src_stride, dst, dst_stride,
                                           filter, height, weight,
@@ -2393,16 +2553,15 @@
                                      int32_t offset,
                                      int32_t rnd_val)
 {
-    uint32_t loop_cnt;
+    v16u8 out0, out1, out2, out3;
     v8i16 filt0, filt1;
-    v16i8 src0, src1, src2, src3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
-    v16i8 vec0, vec1;
-    v8i16 dst0, dst1, dst2, dst3;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= 1;
 
@@ -2410,48 +2569,53 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     weight = weight & 0x0000FFFF;
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     mask1 = mask0 + 2;
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB4(src, src_stride, src0, src1, src2, src3);
-        src += (4 * src_stride);
+    LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
+    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec6, vec7);
+    dst4 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst5 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst6 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dst7 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
 
-        XORI_B4_128_SB(src0, src1, src2, src3);
+    HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3,
+                                   weight_vec, offset_vec, rnd_vec,
+                                   dst0, dst1, dst2, dst3);
+    HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst4, dst5, dst6, dst7,
+                                   weight_vec, offset_vec, rnd_vec,
+                                   dst4, dst5, dst6, dst7);
 
-        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-
-        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
-
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
-        HEVC_UNIW_RND_CLIP4(dst0, dst1, dst2, dst3,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-
-        ST6x4_UB(dst0_r, dst1_r, dst, dst_stride);
-        dst += (4 * dst_stride);
-    }
+    PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+    PCKEV_B2_UB(dst5, dst4, dst7, dst6, out2, out3);
+    ST6x4_UB(out0, out1, dst, dst_stride);
+    dst += (4 * dst_stride);
+    ST6x4_UB(out2, out3, dst, dst_stride);
 }
 
 static void hevc_hz_uniwgt_4t_8x2_msa(uint8_t *src,
@@ -2459,19 +2623,18 @@
                                       uint8_t *dst,
                                       int32_t dst_stride,
                                       const int8_t *filter,
-                                      int32_t height,
                                       int32_t weight,
                                       int32_t offset,
                                       int32_t rnd_val)
 {
+    v16u8 out;
     v8i16 filt0, filt1, dst0, dst1;
     v16i8 src0, src1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
-    v16i8 vec0, vec1;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst0_l, dst1_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v16i8 vec0, vec1, vec2, vec3;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= 1;
 
@@ -2479,52 +2642,52 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     weight = weight & 0x0000FFFF;
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     mask1 = mask0 + 2;
 
     LD_SB2(src, src_stride, src0, src1);
     XORI_B2_128_SB(src0, src1);
 
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
 
-    HEVC_UNIW_RND_CLIP2(dst0, dst1, weight_vec, offset_vec, rnd_vec,
-                        dst0_r, dst1_r, dst0_l, dst1_l);
+    HEVC_UNIW_RND_CLIP2_MAX_SATU_H(dst0, dst1, weight_vec, offset_vec, rnd_vec,
+                                   dst0, dst1);
 
-    HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-    ST8x2_UB(dst0_r, dst, dst_stride);
+    out = (v16u8) __msa_pckev_b((v16i8) dst1, (v16i8) dst0);
+    ST8x2_UB(out, dst, dst_stride);
 }
 
-static void hevc_hz_uniwgt_4t_8x6_msa(uint8_t *src,
+static void hevc_hz_uniwgt_4t_8x4_msa(uint8_t *src,
                                       int32_t src_stride,
                                       uint8_t *dst,
                                       int32_t dst_stride,
                                       const int8_t *filter,
-                                      int32_t height,
                                       int32_t weight,
                                       int32_t offset,
                                       int32_t rnd_val)
 {
-    v8i16 filt0, filt1;
-    v16i8 src0, src1, src2, src3, src4, src5;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
-    v16i8 mask1;
-    v16i8 vec0, vec1;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v16u8 out0, out1;
+    v16i8 src0, src1, src2, src3;
+    v16i8 mask0, mask1, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 filt0, filt1, dst0, dst1, dst2, dst3;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= 1;
 
@@ -2532,61 +2695,113 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     weight = weight & 0x0000FFFF;
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
+    weight_vec = __msa_fill_w(weight);
+    rnd_vec = __msa_fill_w(rnd_val);
+
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
+    mask0 = LD_SB(&ff_hevc_mask_arr[0]);
+    mask1 = mask0 + 2;
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+
+    HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3,
+                                   weight_vec, offset_vec, rnd_vec,
+                                   dst0, dst1, dst2, dst3);
+
+    PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
+}
+
+static void hevc_hz_uniwgt_4t_8x6_msa(uint8_t *src,
+                                      int32_t src_stride,
+                                      uint8_t *dst,
+                                      int32_t dst_stride,
+                                      const int8_t *filter,
+                                      int32_t weight,
+                                      int32_t offset,
+                                      int32_t rnd_val)
+{
+    v16u8 out0, out1, out2;
+    v8i16 filt0, filt1;
+    v16i8 src0, src1, src2, src3, src4, src5;
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
+    v16i8 mask1;
+    v16i8 vec11;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9, vec10;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
+
+    src -= 1;
+
+    filter_vec = LD_SH(filter);
+    SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
+
+    weight = weight & 0x0000FFFF;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     mask1 = mask0 + 2;
 
     LD_SB6(src, src_stride, src0, src1, src2, src3, src4, src5);
-    LD_SB6(src, src_stride, src0, src1, src2, src3, src4, src5);
     XORI_B6_128_SB(src0, src1, src2, src3, src4, src5);
 
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec8, vec9);
+    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec10, vec11);
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+    dst4 = HEVC_FILT_4TAP_SH(vec8, vec9, filt0, filt1);
+    dst5 = HEVC_FILT_4TAP_SH(vec10, vec11, filt0, filt1);
 
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+    HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3,
+                                   weight_vec, offset_vec, rnd_vec,
+                                   dst0, dst1, dst2, dst3);
 
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
+    HEVC_UNIW_RND_CLIP2_MAX_SATU_H(dst4, dst5, weight_vec, offset_vec, rnd_vec,
+                                   dst4, dst5);
 
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-    dst5 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-
-    HEVC_UNIW_RND_CLIP4(dst0, dst1, dst2, dst3,
-                        weight_vec, offset_vec, rnd_vec,
-                        dst0_r, dst1_r, dst2_r, dst3_r,
-                        dst0_l, dst1_l, dst2_l, dst3_l);
-
-    HEVC_UNIW_RND_CLIP2(dst4, dst5, weight_vec, offset_vec, rnd_vec,
-                        dst4_r, dst5_r, dst4_l, dst5_l);
-
-    HEVC_PCK_SW_SB12(dst0_l, dst0_r, dst1_l, dst1_r,
-                     dst2_l, dst2_r, dst3_l, dst3_r,
-                     dst4_l, dst4_r, dst5_l, dst5_r, dst0_r, dst1_r, dst2_r);
-
-    ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
+    PCKEV_B3_UB(dst1, dst0, dst3, dst2, dst5, dst4, out0, out1, out2);
+    ST8x4_UB(out0, out1, dst, dst_stride);
     dst += (4 * dst_stride);
-    ST8x2_UB(dst2_r, dst, dst_stride);
+    ST8x2_UB(out2, dst, dst_stride);
 }
 
-static void hevc_hz_uniwgt_4t_8x4multiple_msa(uint8_t *src,
+static void hevc_hz_uniwgt_4t_8x8multiple_msa(uint8_t *src,
                                               int32_t src_stride,
                                               uint8_t *dst,
                                               int32_t dst_stride,
@@ -2598,14 +2813,14 @@
 {
     uint32_t loop_cnt;
     v8i16 filt0, filt1;
-    v16i8 src0, src1, src2, src3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 out0, out1, out2, out3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1;
-    v16i8 vec0, vec1;
-    v8i16 dst0, dst1, dst2, dst3;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= 1;
 
@@ -2613,47 +2828,56 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     weight = weight & 0x0000FFFF;
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     mask1 = mask0 + 2;
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB4(src, src_stride, src0, src1, src2, src3);
-        src += (4 * src_stride);
-
-        XORI_B4_128_SB(src0, src1, src2, src3);
+    for (loop_cnt = (height >> 3); loop_cnt--;) {
+        LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+        src += (8 * src_stride);
+        XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
 
         VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec6, vec7);
+        dst4 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst5 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst6 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst7 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
 
-        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3,
+                                       weight_vec, offset_vec, rnd_vec,
+                                       dst0, dst1, dst2, dst3);
 
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst4, dst5, dst6, dst7,
+                                       weight_vec, offset_vec, rnd_vec,
+                                       dst4, dst5, dst6, dst7);
 
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
-        HEVC_UNIW_RND_CLIP4(dst0, dst1, dst2, dst3,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-
-        ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
-        dst += (4 * dst_stride);
+        PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+        PCKEV_B2_UB(dst5, dst4, dst7, dst6, out2, out3);
+        ST8x8_UB(out0, out1, out2, out3, dst, dst_stride);
+        dst += (8 * dst_stride);
     }
 }
 
@@ -2669,12 +2893,15 @@
 {
     if (2 == height) {
         hevc_hz_uniwgt_4t_8x2_msa(src, src_stride, dst, dst_stride,
-                                  filter, height, weight, offset, rnd_val);
+                                  filter, weight, offset, rnd_val);
+    } else if (4 == height) {
+        hevc_hz_uniwgt_4t_8x4_msa(src, src_stride, dst, dst_stride,
+                                  filter, weight, offset, rnd_val);
     } else if (6 == height) {
         hevc_hz_uniwgt_4t_8x6_msa(src, src_stride, dst, dst_stride,
-                                  filter, height, weight, offset, rnd_val);
+                                  filter, weight, offset, rnd_val);
     } else {
-        hevc_hz_uniwgt_4t_8x4multiple_msa(src, src_stride, dst, dst_stride,
+        hevc_hz_uniwgt_4t_8x8multiple_msa(src, src_stride, dst, dst_stride,
                                           filter, height, weight, offset,
                                           rnd_val);
     }
@@ -2691,19 +2918,18 @@
                                       int32_t rnd_val)
 {
     uint32_t loop_cnt;
+    v16u8 out0, out1, out2;
     v8i16 filt0, filt1;
     v16i8 src0, src1, src2, src3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask2 = { 8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
     };
     v16i8 mask1;
-    v16i8 vec0, vec1;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9, vec10;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-    v8i16 filter_vec, const_vec;
-    v16i8 mask3;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v16i8 mask3, vec11;
+    v4i32 weight_vec, rnd_vec;
 
     src -= 1;
 
@@ -2711,60 +2937,51 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     weight = weight & 0x0000FFFF;
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     mask1 = mask0 + 2;
     mask3 = mask2 + 2;
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
+    for (loop_cnt = 4; loop_cnt--;) {
         LD_SB4(src, src_stride, src0, src1, src2, src3);
         src += (4 * src_stride);
 
         XORI_B4_128_SB(src0, src1, src2, src3);
 
         VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+        VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec8, vec9);
+        VSHF_B2_SB(src2, src3, src2, src3, mask2, mask3, vec10, vec11);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+        dst4 = HEVC_FILT_4TAP_SH(vec8, vec9, filt0, filt1);
+        dst5 = HEVC_FILT_4TAP_SH(vec10, vec11, filt0, filt1);
 
-        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3,
+                                       weight_vec, offset_vec, rnd_vec,
+                                       dst0, dst1, dst2, dst3);
 
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
+        HEVC_UNIW_RND_CLIP2_MAX_SATU_H(dst4, dst5, weight_vec, offset_vec,
+                                       rnd_vec, dst4, dst5);
 
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
-        VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
-        dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-        VSHF_B2_SB(src2, src3, src2, src3, mask2, mask3, vec0, vec1);
-        dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-
-        HEVC_UNIW_RND_CLIP4(dst0, dst1, dst2, dst3,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_UNIW_RND_CLIP2(dst4, dst5, weight_vec, offset_vec, rnd_vec,
-                            dst4_r, dst5_r, dst4_l, dst5_l);
-
-        HEVC_PCK_SW_SB12(dst0_l, dst0_r, dst1_l, dst1_r,
-                         dst2_l, dst2_r, dst3_l, dst3_r,
-                         dst4_l, dst4_r, dst5_l, dst5_r,
-                         dst0_r, dst1_r, dst2_r);
-
-        ST12x4_UB(dst0_r, dst1_r, dst2_r, dst, dst_stride);
+        PCKEV_B3_UB(dst1, dst0, dst3, dst2, dst5, dst4, out0, out1, out2);
+        ST12x4_UB(out0, out1, out2, dst, dst_stride);
         dst += (4 * dst_stride);
     }
 }
@@ -2780,15 +2997,15 @@
                                       int32_t rnd_val)
 {
     uint32_t loop_cnt;
+    v16u8 out0, out1, out2, out3;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
     v8i16 filt0, filt1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
-    v16i8 vec0, vec1;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= 1;
 
@@ -2796,13 +3013,20 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     weight = weight & 0x0000FFFF;
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     mask1 = mask0 + 2;
 
     for (loop_cnt = (height >> 2); loop_cnt--;) {
@@ -2813,56 +3037,35 @@
         XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
 
         VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-
-        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
-
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
+        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
         VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-        dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
+        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec6, vec7);
+        dst4 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst5 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst6 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst7 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
 
-        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-        dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3,
+                                       weight_vec, offset_vec, rnd_vec,
+                                       dst0, dst1, dst2, dst3);
 
-        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-        dst6 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst6, dst6);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst4, dst5, dst6, dst7,
+                                       weight_vec, offset_vec, rnd_vec,
+                                       dst4, dst5, dst6, dst7);
 
-        VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
-        dst7 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst7, dst7);
+        PCKEV_B4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6,
+                    out0, out1, out2, out3);
 
-        HEVC_UNIW_RND_CLIP4(dst0, dst1, dst2, dst3,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
-        dst += (2 * dst_stride);
-
-        HEVC_UNIW_RND_CLIP4(dst4, dst5, dst6, dst7,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
-        dst += (2 * dst_stride);
+        ST_UB4(out0, out1, out2, out3, dst, dst_stride);
+        dst += (4 * dst_stride);
     }
 }
 
@@ -2877,16 +3080,14 @@
                                       int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    uint8_t *dst_tmp = dst + 16;
+    v16u8 out0, out1, out2;
     v16i8 src0, src1, src2, src3;
     v8i16 filt0, filt1;
-    v8i16 dst0, dst1, dst2, dst3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
-    v16i8 mask1, mask2, mask3;
-    v16i8 vec0, vec1;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v16i8 mask0, mask1, mask2, mask3;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= 1;
 
@@ -2894,19 +3095,25 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     weight = weight & 0x0000FFFF;
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
+    mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     mask1 = mask0 + 2;
     mask2 = mask0 + 8;
     mask3 = mask0 + 10;
 
-    for (loop_cnt = (height >> 1); loop_cnt--;) {
-        /* 16 width */
+    for (loop_cnt = 16; loop_cnt--;) {
         LD_SB2(src, src_stride, src0, src2);
         LD_SB2(src + 16, src_stride, src1, src3);
         src += (2 * src_stride);
@@ -2914,46 +3121,29 @@
         XORI_B4_128_SB(src0, src1, src2, src3);
 
         VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-
-        VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
-
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-        VSHF_B2_SB(src2, src3, src2, src3, mask2, mask3, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
-        HEVC_UNIW_RND_CLIP4(dst0, dst1, dst2, dst3,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
-        dst += (2 * dst_stride);
-
-        /* 8 width */
+        VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec2, vec3);
+        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src2, src3, src2, src3, mask2, mask3, vec6, vec7);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
         VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec2, vec3);
+        dst4 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst5 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
 
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3,
+                                       weight_vec, offset_vec, rnd_vec,
+                                       dst0, dst1, dst2, dst3);
 
-        HEVC_UNIW_RND_CLIP2(dst0, dst1, weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst0_l, dst1_l);
+        HEVC_UNIW_RND_CLIP2_MAX_SATU_H(dst4, dst5, weight_vec, offset_vec,
+                                       rnd_vec, dst4, dst5);
 
-        HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-        ST8x2_UB(dst0_r, dst_tmp, dst_stride);
-        dst_tmp += (2 * dst_stride);
+        PCKEV_B3_UB(dst1, dst0, dst3, dst2, dst5, dst4, out0, out1, out2);
+        ST_UB2(out0, out1, dst, dst_stride);
+        ST8x2_UB(out2, dst + 16, dst_stride);
+        dst += (2 * dst_stride);
     }
 }
 
@@ -2968,15 +3158,15 @@
                                       int32_t rnd_val)
 {
     uint32_t loop_cnt;
-    v16i8 src0, src1, src2;
+    v16u8 out0, out1, out2, out3;
+    v16i8 src0, src1, src2, src3, src4, src5;
     v8i16 filt0, filt1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
     v16i8 mask1, mask2, mask3;
-    v8i16 dst0, dst1, dst2, dst3;
-    v16i8 vec0, vec1;
-    v8i16 filter_vec, const_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= 1;
 
@@ -2984,13 +3174,20 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     weight = weight & 0x0000FFFF;
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     mask1 = mask0 + 2;
     mask2 = mask0 + 8;
     mask3 = mask0 + 10;
@@ -2999,65 +3196,40 @@
         LD_SB2(src, 16, src0, src1);
         src2 = LD_SB(src + 24);
         src += src_stride;
-
-        XORI_B3_128_SB(src0, src1, src2);
-
-        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-
-        VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
-
-        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
-        HEVC_UNIW_RND_CLIP4(dst0, dst1, dst2, dst3,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, 16);
-        dst += dst_stride;
-
-        LD_SB2(src, 16, src0, src1);
-        src2 = LD_SB(src + 24);
+        LD_SB2(src, 16, src3, src4);
+        src5 = LD_SB(src + 24);
         src += src_stride;
-
-        XORI_B3_128_SB(src0, src1, src2);
-
+        XORI_B6_128_SB(src0, src1, src2, src3, src4, src5);
         VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+        VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec2, vec3);
+        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec6, vec7);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src3, src4, src3, src4, mask2, mask3, vec2, vec3);
+        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec6, vec7);
+        dst4 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst5 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst6 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst7 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
 
-        VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3,
+                                       weight_vec, offset_vec, rnd_vec,
+                                       dst0, dst1, dst2, dst3);
 
-        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst4, dst5, dst6, dst7,
+                                       weight_vec, offset_vec, rnd_vec,
+                                       dst4, dst5, dst6, dst7);
 
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
-        HEVC_UNIW_RND_CLIP4(dst0, dst1, dst2, dst3,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, 16);
+        PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+        PCKEV_B2_UB(dst5, dst4, dst7, dst6, out2, out3);
+        ST_UB2(out0, out1, dst, 16);
+        dst += dst_stride;
+        ST_UB2(out2, out3, dst, 16);
         dst += dst_stride;
     }
 }
@@ -3067,55 +3239,54 @@
                                       uint8_t *dst,
                                       int32_t dst_stride,
                                       const int8_t *filter,
-                                      int32_t height,
                                       int32_t weight,
                                       int32_t offset,
                                       int32_t rnd_val)
 {
+    v16u8 out;
     v16i8 src0, src1, src2, src3, src4;
     v16i8 src10_r, src32_r, src21_r, src43_r;
     v16i8 src2110, src4332;
-    v8i16 dst10;
+    v8i16 dst0;
     v4i32 dst0_r, dst0_l;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= src_stride;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     weight = weight & 0x0000FFFF;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
-    LD_SB3(src, src_stride, src0, src1, src2);
-    src += (3 * src_stride);
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
     ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
-    src2110 = (v16i8) __msa_ilvr_d((v2i64) src21_r, (v2i64) src10_r);
-    src2110 = (v16i8) __msa_xori_b((v16u8) src2110, 128);
-    LD_SB2(src, src_stride, src3, src4);
     ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
-    src4332 = (v16i8) __msa_ilvr_d((v2i64) src43_r, (v2i64) src32_r);
-    src4332 = (v16i8) __msa_xori_b((v16u8) src4332, 128);
-
-    dst10 = const_vec;
-    DPADD_SB2_SH(src2110, src4332, filt0, filt1, dst10, dst10);
-
-    ILVRL_H2_SW(dst10, dst10, dst0_r, dst0_l);
+    ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+    XORI_B2_128_SB(src2110, src4332);
+    dst0 = HEVC_FILT_4TAP_SH(src2110, src4332, filt0, filt1);
+    ILVRL_H2_SW(dst0, dst0, dst0_r, dst0_l);
     DOTP_SH2_SW(dst0_r, dst0_l, weight_vec, weight_vec, dst0_r, dst0_l);
     SRAR_W2_SW(dst0_r, dst0_l, rnd_vec);
-    ADD2(dst0_r, offset_vec, dst0_l, offset_vec, dst0_r, dst0_l);
-    dst0_r = CLIP_SW_0_255(dst0_r);
-    dst0_l = CLIP_SW_0_255(dst0_l);
-
-    HEVC_PCK_SW_SB2(dst0_l, dst0_r, dst0_r);
-    ST4x2_UB(dst0_r, dst, dst_stride);
+    dst0 = __msa_pckev_h((v8i16) dst0_l, (v8i16) dst0_r);
+    dst0 = __msa_adds_s_h(dst0, offset_vec);
+    dst0 = CLIP_SH_0_255_MAX_SATU(dst0);
+    out = (v16u8) __msa_pckev_b((v16i8) dst0, (v16i8) dst0);
+    ST4x2_UB(out, dst, dst_stride);
 }
 
 static void hevc_vt_uniwgt_4t_4x4_msa(uint8_t *src,
@@ -3123,55 +3294,53 @@
                                       uint8_t *dst,
                                       int32_t dst_stride,
                                       const int8_t *filter,
-                                      int32_t height,
                                       int32_t weight,
                                       int32_t offset,
                                       int32_t rnd_val)
 {
+    v16u8 out;
     v16i8 src0, src1, src2, src3, src4, src5, src6;
     v16i8 src10_r, src32_r, src54_r, src21_r, src43_r, src65_r;
     v16i8 src2110, src4332, src6554;
-    v8i16 dst10, dst32;
-    v4i32 dst0_r, dst1_r, dst0_l, dst1_l;
+    v8i16 dst0, dst1;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= src_stride;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     weight = weight & 0x0000FFFF;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
-    LD_SB3(src, src_stride, src0, src1, src2);
-    src += (3 * src_stride);
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
     ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
-    src2110 = (v16i8) __msa_ilvr_d((v2i64) src21_r, (v2i64) src10_r);
-    src2110 = (v16i8) __msa_xori_b((v16u8) src2110, 128);
-
-    LD_SB4(src, src_stride, src3, src4, src5, src6);
     ILVR_B4_SB(src3, src2, src4, src3, src5, src4, src6, src5,
                src32_r, src43_r, src54_r, src65_r);
-    ILVR_D2_SB(src43_r, src32_r, src65_r, src54_r, src4332, src6554);
-    XORI_B2_128_SB(src4332, src6554);
+    ILVR_D3_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r,
+               src2110, src4332, src6554);
+    XORI_B3_128_SB(src2110, src4332, src6554);
+    dst0 = HEVC_FILT_4TAP_SH(src2110, src4332, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(src4332, src6554, filt0, filt1);
+    HEVC_UNIW_RND_CLIP2_MAX_SATU_H(dst0, dst1, weight_vec, offset_vec, rnd_vec,
+                                   dst0, dst1);
 
-    dst10 = const_vec;
-    DPADD_SB2_SH(src2110, src4332, filt0, filt1, dst10, dst10);
-    dst32 = const_vec;
-    DPADD_SB2_SH(src4332, src6554, filt0, filt1, dst32, dst32);
-    HEVC_UNIW_RND_CLIP2(dst10, dst32, weight_vec, offset_vec, rnd_vec,
-                        dst0_r, dst1_r, dst0_l, dst1_l);
-
-    HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-    ST4x4_UB(dst0_r, dst0_r, 0, 1, 2, 3, dst, dst_stride);
-    dst += (4 * dst_stride);
+    out = (v16u8) __msa_pckev_b((v16i8) dst1, (v16i8) dst0);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
 }
 
 static void hevc_vt_uniwgt_4t_4x8multiple_msa(uint8_t *src,
@@ -3185,26 +3354,33 @@
                                               int32_t rnd_val)
 {
     int32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
+    v16u8 out0, out1;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v16i8 src10_r, src32_r, src54_r, src76_r, src98_r;
     v16i8 src21_r, src43_r, src65_r, src87_r, src109_r;
     v16i8 src2110, src4332, src6554, src8776;
-    v8i16 dst10, dst32, dst54, dst76;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
-    v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v16i8 src10998;
+    v8i16 dst0, dst1, dst2, dst3, filt0, filt1;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= src_stride;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     weight = weight & 0x0000FFFF;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
@@ -3215,39 +3391,31 @@
     src2110 = (v16i8) __msa_xori_b((v16u8) src2110, 128);
 
     for (loop_cnt = (height >> 3); loop_cnt--;) {
-        LD_SB6(src, src_stride, src3, src4, src5, src6, src7, src8);
-        src += (6 * src_stride);
+        LD_SB8(src, src_stride,
+               src3, src4, src5, src6, src7, src8, src9, src10);
+        src += (8 * src_stride);
         ILVR_B4_SB(src3, src2, src4, src3, src5, src4, src6, src5,
                    src32_r, src43_r, src54_r, src65_r);
         ILVR_B2_SB(src7, src6, src8, src7, src76_r, src87_r);
-        ILVR_D3_SB(src43_r, src32_r, src65_r, src54_r, src87_r, src76_r,
-                   src4332, src6554, src8776);
-        XORI_B3_128_SB(src4332, src6554, src8776);
+        ILVR_B2_SB(src9, src8, src10, src9, src98_r, src109_r);
+        ILVR_D4_SB(src43_r, src32_r, src65_r, src54_r, src87_r, src76_r,
+                   src109_r, src98_r, src4332, src6554, src8776, src10998);
+        XORI_B4_128_SB(src4332, src6554, src8776, src10998);
+        dst0 = HEVC_FILT_4TAP_SH(src2110, src4332, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(src4332, src6554, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(src6554, src8776, filt0, filt1);
+        dst3 = HEVC_FILT_4TAP_SH(src8776, src10998, filt0, filt1);
 
-        dst10 = const_vec;
-        DPADD_SB2_SH(src2110, src4332, filt0, filt1, dst10, dst10);
-        dst32 = const_vec;
-        DPADD_SB2_SH(src4332, src6554, filt0, filt1, dst32, dst32);
-        dst54 = const_vec;
-        DPADD_SB2_SH(src6554, src8776, filt0, filt1, dst54, dst54);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3,
+                                       weight_vec, offset_vec, rnd_vec,
+                                       dst0, dst1, dst2, dst3);
 
-        LD_SB2(src, src_stride, src9, src2);
-        src += (2 * src_stride);
-        ILVR_B2_SB(src9, src8, src2, src9, src98_r, src109_r);
-        src2110 = (v16i8) __msa_ilvr_d((v2i64) src109_r, (v2i64) src98_r);
-        src2110 = (v16i8) __msa_xori_b((v16u8) src2110, 128);
-
-        dst76 = const_vec;
-        DPADD_SB2_SH(src8776, src2110, filt0, filt1, dst76, dst76);
-        HEVC_UNIW_RND_CLIP4(dst10, dst32, dst54, dst76,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST4x8_UB(dst0_r, dst1_r, dst, dst_stride);
+        PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+        ST4x8_UB(out0, out1, dst, dst_stride);
         dst += (8 * dst_stride);
+
+        src2 = src10;
+        src2110 = src10998;
     }
 }
 
@@ -3263,10 +3431,10 @@
 {
     if (2 == height) {
         hevc_vt_uniwgt_4t_4x2_msa(src, src_stride, dst, dst_stride,
-                                  filter, height, weight, offset, rnd_val);
+                                  filter, weight, offset, rnd_val);
     } else if (4 == height) {
         hevc_vt_uniwgt_4t_4x4_msa(src, src_stride, dst, dst_stride,
-                                  filter, height, weight, offset, rnd_val);
+                                  filter, weight, offset, rnd_val);
     } else if (0 == (height % 8)) {
         hevc_vt_uniwgt_4t_4x8multiple_msa(src, src_stride, dst, dst_stride,
                                           filter, height, weight, offset,
@@ -3284,64 +3452,66 @@
                                      int32_t offset,
                                      int32_t rnd_val)
 {
-    int32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4;
+    v16u8 out0, out1, out2, out3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v16i8 src10_r, src32_r, src21_r, src43_r;
-    v8i16 tmp0, tmp1, tmp2, tmp3;
+    v16i8 src54_r, src65_r, src76_r, src87_r, src98_r, src109_r;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= src_stride;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     weight = weight & 0x0000FFFF;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     LD_SB3(src, src_stride, src0, src1, src2);
     src += (3 * src_stride);
+    LD_SB8(src, src_stride, src3, src4, src5, src6, src7, src8, src9, src10);
     XORI_B3_128_SB(src0, src1, src2);
+    XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
     ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
+    ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
+    ILVR_B2_SB(src5, src4, src6, src5, src54_r, src65_r);
+    ILVR_B2_SB(src7, src6, src8, src7, src76_r, src87_r);
+    ILVR_B2_SB(src9, src8, src10, src9, src98_r, src109_r);
+    dst0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+    dst2 = HEVC_FILT_4TAP_SH(src32_r, src54_r, filt0, filt1);
+    dst3 = HEVC_FILT_4TAP_SH(src43_r, src65_r, filt0, filt1);
+    dst4 = HEVC_FILT_4TAP_SH(src54_r, src76_r, filt0, filt1);
+    dst5 = HEVC_FILT_4TAP_SH(src65_r, src87_r, filt0, filt1);
+    dst6 = HEVC_FILT_4TAP_SH(src76_r, src98_r, filt0, filt1);
+    dst7 = HEVC_FILT_4TAP_SH(src87_r, src109_r, filt0, filt1);
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB2(src, src_stride, src3, src4);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src3, src4);
-        ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
+    HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3,
+                                   weight_vec, offset_vec, rnd_vec,
+                                   dst0, dst1, dst2, dst3);
+    HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst4, dst5, dst6, dst7,
+                                   weight_vec, offset_vec, rnd_vec,
+                                   dst4, dst5, dst6, dst7);
 
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
-
-        LD_SB2(src, src_stride, src1, src2);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src1, src2);
-        ILVR_B2_SB(src1, src4, src2, src1, src10_r, src21_r);
-
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src32_r, src10_r, filt0, filt1, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src43_r, src21_r, filt0, filt1, tmp3, tmp3);
-        HEVC_UNIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-
-        ST6x4_UB(dst0_r, dst1_r, dst, dst_stride);
-        dst += (4 * dst_stride);
-    }
+    PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+    PCKEV_B2_UB(dst5, dst4, dst7, dst6, out2, out3);
+    ST6x4_UB(out0, out1, dst, dst_stride);
+    dst += (4 * dst_stride);
+    ST6x4_UB(out2, out3, dst, dst_stride);
 }
 
 static void hevc_vt_uniwgt_4t_8x2_msa(uint8_t *src,
@@ -3349,49 +3519,105 @@
                                       uint8_t *dst,
                                       int32_t dst_stride,
                                       const int8_t *filter,
-                                      int32_t height,
                                       int32_t weight,
                                       int32_t offset,
                                       int32_t rnd_val)
 {
+    v16u8 out;
     v16i8 src0, src1, src2, src3, src4;
     v16i8 src10_r, src32_r, src21_r, src43_r;
-    v8i16 tmp0, tmp1;
+    v8i16 dst0, dst1;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst0_l, dst1_l;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= src_stride;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     weight = weight & 0x0000FFFF;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
-    LD_SB3(src, src_stride, src0, src1, src2);
-    src += (3 * src_stride);
-    XORI_B3_128_SB(src0, src1, src2);
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
     ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
-    LD_SB2(src, src_stride, src3, src4);
-    XORI_B2_128_SB(src3, src4);
     ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
+    dst0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
 
-    tmp0 = const_vec;
-    DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-    tmp1 = const_vec;
-    DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
-    HEVC_UNIW_RND_CLIP2(tmp0, tmp1, weight_vec, offset_vec, rnd_vec,
-                        dst0_r, dst1_r, dst0_l, dst1_l);
+    HEVC_UNIW_RND_CLIP2_MAX_SATU_H(dst0, dst1, weight_vec, offset_vec, rnd_vec,
+                                   dst0, dst1);
 
-    HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-    ST8x2_UB(dst0_r, dst, dst_stride);
+    out = (v16u8) __msa_pckev_b((v16i8) dst1, (v16i8) dst0);
+    ST8x2_UB(out, dst, dst_stride);
+}
+
+static void hevc_vt_uniwgt_4t_8x4_msa(uint8_t *src,
+                                      int32_t src_stride,
+                                      uint8_t *dst,
+                                      int32_t dst_stride,
+                                      const int8_t *filter,
+                                      int32_t weight,
+                                      int32_t offset,
+                                      int32_t rnd_val)
+{
+    v16u8 out0, out1;
+    v16i8 src0, src1, src2, src3, src4;
+    v16i8 src10_r, src32_r, src21_r, src43_r;
+    v16i8 src5, src6, src54_r, src65_r;
+    v8i16 filt0, filt1;
+    v8i16 dst0, dst1, dst2, dst3;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
+
+    src -= src_stride;
+
+    weight = weight & 0x0000FFFF;
+
+    weight_vec = __msa_fill_w(weight);
+    rnd_vec = __msa_fill_w(rnd_val);
+
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
+    filter_vec = LD_SH(filter);
+    SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
+
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    src += (3 * src_stride);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+    ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
+    ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
+    ILVR_B2_SB(src5, src4, src6, src5, src54_r, src65_r);
+    dst0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+    dst2 = HEVC_FILT_4TAP_SH(src32_r, src54_r, filt0, filt1);
+    dst3 = HEVC_FILT_4TAP_SH(src43_r, src65_r, filt0, filt1);
+    HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3, weight_vec,
+                                   offset_vec, rnd_vec, dst0, dst1, dst2,
+                                   dst3);
+    PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
 }
 
 static void hevc_vt_uniwgt_4t_8x6_msa(uint8_t *src,
@@ -3399,101 +3625,102 @@
                                       uint8_t *dst,
                                       int32_t dst_stride,
                                       const int8_t *filter,
-                                      int32_t height,
                                       int32_t weight,
                                       int32_t offset,
                                       int32_t rnd_val)
 {
+    v16u8 out0, out1, out2;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
     v16i8 src10_r, src32_r, src54_r, src76_r;
     v16i8 src21_r, src43_r, src65_r, src87_r;
-    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= src_stride;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     weight = weight & 0x0000FFFF;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     LD_SB3(src, src_stride, src0, src1, src2);
     src += (3 * src_stride);
-    XORI_B3_128_SB(src0, src1, src2);
-    ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
-
     LD_SB6(src, src_stride, src3, src4, src5, src6, src7, src8);
+
+    XORI_B3_128_SB(src0, src1, src2);
     XORI_B6_128_SB(src3, src4, src5, src6, src7, src8);
-    ILVR_B4_SB(src3, src2, src4, src3, src5, src4, src6, src5,
-               src32_r, src43_r, src54_r, src65_r);
-    ILVR_B2_SB(src7, src6, src8, src7, src76_r, src87_r);
-
-    tmp0 = const_vec;
-    DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-    tmp1 = const_vec;
-    DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
-    tmp2 = const_vec;
-    DPADD_SB2_SH(src32_r, src54_r, filt0, filt1, tmp2, tmp2);
-    tmp3 = const_vec;
-    DPADD_SB2_SH(src43_r, src65_r, filt0, filt1, tmp3, tmp3);
-    tmp4 = const_vec;
-    DPADD_SB2_SH(src54_r, src76_r, filt0, filt1, tmp4, tmp4);
-    tmp5 = const_vec;
-    DPADD_SB2_SH(src65_r, src87_r, filt0, filt1, tmp5, tmp5);
-    HEVC_UNIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
-                        weight_vec, offset_vec, rnd_vec,
-                        dst0_r, dst1_r, dst2_r, dst3_r,
-                        dst0_l, dst1_l, dst2_l, dst3_l);
-    HEVC_UNIW_RND_CLIP2(tmp4, tmp5, weight_vec, offset_vec, rnd_vec,
-                        dst4_r, dst5_r, dst4_l, dst5_l);
-
-    HEVC_PCK_SW_SB12(dst0_l, dst0_r, dst1_l, dst1_r,
-                     dst2_l, dst2_r, dst3_l, dst3_r,
-                     dst4_l, dst4_r, dst5_l, dst5_r, dst0_r, dst1_r, dst2_r);
-    ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
+               src76_r, src87_r);
+    dst0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+    dst2 = HEVC_FILT_4TAP_SH(src32_r, src54_r, filt0, filt1);
+    dst3 = HEVC_FILT_4TAP_SH(src43_r, src65_r, filt0, filt1);
+    dst4 = HEVC_FILT_4TAP_SH(src54_r, src76_r, filt0, filt1);
+    dst5 = HEVC_FILT_4TAP_SH(src65_r, src87_r, filt0, filt1);
+    HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3, weight_vec,
+                                   offset_vec, rnd_vec, dst0, dst1, dst2, dst3);
+    HEVC_UNIW_RND_CLIP2_MAX_SATU_H(dst4, dst5, weight_vec, offset_vec, rnd_vec,
+                                   dst4, dst5);
+    PCKEV_B3_UB(dst1, dst0, dst3, dst2, dst5, dst4, out0, out1, out2);
+    ST8x4_UB(out0, out1, dst, dst_stride);
     dst += (4 * dst_stride);
-    ST8x2_UB(dst2_r, dst, dst_stride);
+    ST8x2_UB(out2, dst, dst_stride);
 }
 
-static void hevc_vt_uniwgt_4t_8x4multiple_msa(uint8_t *src,
-                                              int32_t src_stride,
-                                              uint8_t *dst,
-                                              int32_t dst_stride,
-                                              const int8_t *filter,
-                                              int32_t height,
-                                              int32_t weight,
-                                              int32_t offset,
-                                              int32_t rnd_val)
+static void hevc_vt_uniwgt_4t_8x8mult_msa(uint8_t *src,
+                                          int32_t src_stride,
+                                          uint8_t *dst,
+                                          int32_t dst_stride,
+                                          const int8_t *filter,
+                                          int32_t height,
+                                          int32_t weight,
+                                          int32_t offset,
+                                          int32_t rnd_val)
 {
     int32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4;
+    v16u8 out0, out1, out2, out3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v16i8 src10_r, src32_r, src21_r, src43_r;
-    v8i16 tmp0, tmp1, tmp2, tmp3;
+    v16i8 src54_r, src65_r, src76_r, src87_r, src98_r, src109_r;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= src_stride;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     weight = weight & 0x0000FFFF;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
@@ -3502,35 +3729,37 @@
     XORI_B3_128_SB(src0, src1, src2);
     ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB2(src, src_stride, src3, src4);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src3, src4);
+    for (loop_cnt = (height >> 3); loop_cnt--;) {
+        LD_SB8(src, src_stride,
+               src3, src4, src5, src6, src7, src8, src9, src10);
+        src += (8 * src_stride);
+        XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
         ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
+        ILVR_B2_SB(src5, src4, src6, src5, src54_r, src65_r);
+        ILVR_B2_SB(src7, src6, src8, src7, src76_r, src87_r);
+        ILVR_B2_SB(src9, src8, src10, src9, src98_r, src109_r);
+        dst0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(src32_r, src54_r, filt0, filt1);
+        dst3 = HEVC_FILT_4TAP_SH(src43_r, src65_r, filt0, filt1);
+        dst4 = HEVC_FILT_4TAP_SH(src54_r, src76_r, filt0, filt1);
+        dst5 = HEVC_FILT_4TAP_SH(src65_r, src87_r, filt0, filt1);
+        dst6 = HEVC_FILT_4TAP_SH(src76_r, src98_r, filt0, filt1);
+        dst7 = HEVC_FILT_4TAP_SH(src87_r, src109_r, filt0, filt1);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3, weight_vec,
+                                       offset_vec, rnd_vec, dst0, dst1, dst2,
+                                       dst3);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst4, dst5, dst6, dst7, weight_vec,
+                                       offset_vec, rnd_vec, dst4, dst5, dst6,
+                                       dst7);
+        PCKEV_B2_UB(dst1, dst0, dst3, dst2, out0, out1);
+        PCKEV_B2_UB(dst5, dst4, dst7, dst6, out2, out3);
+        ST8x8_UB(out0, out1, out2, out3, dst, dst_stride);
+        dst += (8 * dst_stride);
 
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
-
-        LD_SB2(src, src_stride, src1, src2);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src1, src2);
-        ILVR_B2_SB(src1, src4, src2, src1, src10_r, src21_r);
-
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src32_r, src10_r, filt0, filt1, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src43_r, src21_r, filt0, filt1, tmp3, tmp3);
-        HEVC_UNIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
-        dst += (4 * dst_stride);
+        src2 = src10;
+        src10_r = src98_r;
+        src21_r = src109_r;
     }
 }
 
@@ -3546,14 +3775,17 @@
 {
     if (2 == height) {
         hevc_vt_uniwgt_4t_8x2_msa(src, src_stride, dst, dst_stride,
-                                  filter, height, weight, offset, rnd_val);
+                                  filter, weight, offset, rnd_val);
+    } else if (4 == height) {
+        hevc_vt_uniwgt_4t_8x4_msa(src, src_stride, dst, dst_stride,
+                                  filter, weight, offset, rnd_val);
     } else if (6 == height) {
         hevc_vt_uniwgt_4t_8x6_msa(src, src_stride, dst, dst_stride,
-                                  filter, height, weight, offset, rnd_val);
+                                  filter, weight, offset, rnd_val);
     } else {
-        hevc_vt_uniwgt_4t_8x4multiple_msa(src, src_stride, dst, dst_stride,
-                                          filter, height, weight, offset,
-                                          rnd_val);
+        hevc_vt_uniwgt_4t_8x8mult_msa(src, src_stride, dst, dst_stride,
+                                      filter, height, weight, offset,
+                                      rnd_val);
     }
 }
 
@@ -3568,27 +3800,35 @@
                                       int32_t rnd_val)
 {
     int32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4, src5;
+    v16u8 out0, out1, out2, out3, out4, out5;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v16i8 src10_r, src32_r, src21_r, src43_r;
-    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
     v16i8 src10_l, src32_l, src54_l, src21_l, src43_l, src65_l;
     v16i8 src2110, src4332;
+    v16i8 src54_r, src76_r, src98_r, src65_r, src87_r, src109_r;
+    v16i8 src76_l, src98_l, src87_l, src109_l, src6554, src8776, src10998;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8;
+    v8i16 dst9, dst10, dst11, filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= (1 * src_stride);
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     weight = weight & 0x0000FFFF;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
@@ -3599,47 +3839,56 @@
     ILVL_B2_SB(src1, src0, src2, src1, src10_l, src21_l);
     src2110 = (v16i8) __msa_ilvr_d((v2i64) src21_l, (v2i64) src10_l);
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB2(src, src_stride, src3, src4);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src3, src4);
-        ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
-        ILVL_B2_SB(src3, src2, src4, src3, src32_l, src43_l);
+    for (loop_cnt = 2; loop_cnt--;) {
+        LD_SB8(src, src_stride, src3, src4, src5, src6, src7, src8, src9, src10);
+        src += (8 * src_stride);
+        XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
+        ILVRL_B2_SB(src3, src2, src32_r, src32_l);
+        ILVRL_B2_SB(src4, src3, src43_r, src43_l);
+        ILVRL_B2_SB(src5, src4, src54_r, src54_l);
+        ILVRL_B2_SB(src6, src5, src65_r, src65_l);
         src4332 = (v16i8) __msa_ilvr_d((v2i64) src43_l, (v2i64) src32_l);
-
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
-        tmp4 = const_vec;
-        DPADD_SB2_SH(src2110, src4332, filt0, filt1, tmp4, tmp4);
-
-        LD_SB2(src, src_stride, src5, src2);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src5, src2);
-        ILVR_B2_SB(src5, src4, src2, src5, src10_r, src21_r);
-        ILVL_B2_SB(src5, src4, src2, src5, src54_l, src65_l);
-        src2110 = (v16i8) __msa_ilvr_d((v2i64) src65_l, (v2i64) src54_l);
-
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src32_r, src10_r, filt0, filt1, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src43_r, src21_r, filt0, filt1, tmp3, tmp3);
-        tmp5 = const_vec;
-        DPADD_SB2_SH(src4332, src2110, filt0, filt1, tmp5, tmp5);
-        HEVC_UNIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-        HEVC_UNIW_RND_CLIP2(tmp4, tmp5, weight_vec, offset_vec, rnd_vec,
-                            dst4_r, dst5_r, dst4_l, dst5_l);
-
-        HEVC_PCK_SW_SB12(dst0_l, dst0_r, dst1_l, dst1_r,
-                         dst2_l, dst2_r, dst3_l, dst3_r,
-                         dst4_l, dst4_r, dst5_l, dst5_r,
-                         dst0_r, dst1_r, dst2_r);
-        ST12x4_UB(dst0_r, dst1_r, dst2_r, dst, dst_stride);
+        src6554 = (v16i8) __msa_ilvr_d((v2i64) src65_l, (v2i64) src54_l);
+        dst0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(src32_r, src54_r, filt0, filt1);
+        dst3 = HEVC_FILT_4TAP_SH(src43_r, src65_r, filt0, filt1);
+        dst4 = HEVC_FILT_4TAP_SH(src2110, src4332, filt0, filt1);
+        dst5 = HEVC_FILT_4TAP_SH(src4332, src6554, filt0, filt1);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3, weight_vec,
+                                       offset_vec, rnd_vec, dst0, dst1, dst2,
+                                       dst3);
+        HEVC_UNIW_RND_CLIP2_MAX_SATU_H(dst4, dst5, weight_vec, offset_vec,
+                                       rnd_vec, dst4, dst5);
+        PCKEV_B3_UB(dst1, dst0, dst3, dst2, dst5, dst4, out0, out1, out2);
+        ST12x4_UB(out0, out1, out2, dst, dst_stride);
         dst += (4 * dst_stride);
+
+        ILVRL_B2_SB(src7, src6, src76_r, src76_l);
+        ILVRL_B2_SB(src8, src7, src87_r, src87_l);
+        ILVRL_B2_SB(src9, src8, src98_r, src98_l);
+        ILVRL_B2_SB(src10, src9, src109_r, src109_l);
+        src8776 = (v16i8) __msa_ilvr_d((v2i64) src87_l, (v2i64) src76_l);
+        src10998 = (v16i8) __msa_ilvr_d((v2i64) src109_l, (v2i64) src98_l);
+        dst6 = HEVC_FILT_4TAP_SH(src54_r, src76_r, filt0, filt1);
+        dst7 = HEVC_FILT_4TAP_SH(src65_r, src87_r, filt0, filt1);
+        dst8 = HEVC_FILT_4TAP_SH(src76_r, src98_r, filt0, filt1);
+        dst9 = HEVC_FILT_4TAP_SH(src87_r, src109_r, filt0, filt1);
+        dst10 = HEVC_FILT_4TAP_SH(src6554, src8776, filt0, filt1);
+        dst11 = HEVC_FILT_4TAP_SH(src8776, src10998, filt0, filt1);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst6, dst7, dst8, dst9, weight_vec,
+                                       offset_vec, rnd_vec, dst6, dst7, dst8,
+                                       dst9);
+        HEVC_UNIW_RND_CLIP2_MAX_SATU_H(dst10, dst11, weight_vec, offset_vec,
+                                       rnd_vec, dst10, dst11);
+        PCKEV_B3_UB(dst7, dst6, dst9, dst8, dst11, dst10, out3, out4, out5);
+        ST12x4_UB(out3, out4, out5, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src2 = src10;
+        src10_r = src98_r;
+        src21_r = src109_r;
+        src2110 = src10998;
     }
 }
 
@@ -3654,25 +3903,33 @@
                                       int32_t rnd_val)
 {
     int32_t loop_cnt;
+    v16u8 out0, out1, out2, out3;
     v16i8 src0, src1, src2, src3, src4, src5;
     v16i8 src10_r, src32_r, src21_r, src43_r;
     v16i8 src10_l, src32_l, src21_l, src43_l;
-    v8i16 tmp0, tmp1, tmp2, tmp3;
+    v16i8 src54_r, src54_l, src65_r, src65_l, src6;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst0_l, dst1_l, dst2_l, dst3_l;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= src_stride;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     weight = weight & 0x0000FFFF;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
@@ -3683,53 +3940,37 @@
     ILVL_B2_SB(src1, src0, src2, src1, src10_l, src21_l);
 
     for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB2(src, src_stride, src3, src4);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src3, src4);
-        ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
-        ILVL_B2_SB(src3, src2, src4, src3, src32_l, src43_l);
+        LD_SB4(src, src_stride, src3, src4, src5, src6);
+        src += (4 * src_stride);
+        XORI_B4_128_SB(src3, src4, src5, src6);
+        ILVRL_B2_SB(src3, src2, src32_r, src32_l);
+        ILVRL_B2_SB(src4, src3, src43_r, src43_l);
+        ILVRL_B2_SB(src5, src4, src54_r, src54_l);
+        ILVRL_B2_SB(src6, src5, src65_r, src65_l);
+        dst0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(src32_r, src54_r, filt0, filt1);
+        dst3 = HEVC_FILT_4TAP_SH(src43_r, src65_r, filt0, filt1);
+        dst4 = HEVC_FILT_4TAP_SH(src10_l, src32_l, filt0, filt1);
+        dst5 = HEVC_FILT_4TAP_SH(src21_l, src43_l, filt0, filt1);
+        dst6 = HEVC_FILT_4TAP_SH(src32_l, src54_l, filt0, filt1);
+        dst7 = HEVC_FILT_4TAP_SH(src43_l, src65_l, filt0, filt1);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3, weight_vec,
+                                       offset_vec, rnd_vec, dst0, dst1, dst2,
+                                       dst3);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst4, dst5, dst6, dst7, weight_vec,
+                                       offset_vec, rnd_vec, dst4, dst5, dst6,
+                                       dst7);
+        PCKEV_B4_UB(dst4, dst0, dst5, dst1, dst6, dst2, dst7, dst3, out0, out1,
+                    out2, out3);
+        ST_UB4(out0, out1, out2, out3, dst, dst_stride);
+        dst += (4 * dst_stride);
 
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src10_l, src32_l, filt0, filt1, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src21_l, src43_l, filt0, filt1, tmp3, tmp3);
-        HEVC_UNIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst2_l, dst2_r,
-                        dst1_l, dst1_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
-        dst += (2 * dst_stride);
-
-        LD_SB2(src, src_stride, src5, src2);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src5, src2);
-        ILVR_B2_SB(src5, src4, src2, src5, src10_r, src21_r);
-        ILVL_B2_SB(src5, src4, src2, src5, src10_l, src21_l);
-
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src32_r, src10_r, filt0, filt1, tmp0, tmp0);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src43_r, src21_r, filt0, filt1, tmp1, tmp1);
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src32_l, src10_l, filt0, filt1, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src43_l, src21_l, filt0, filt1, tmp3, tmp3);
-        HEVC_UNIW_RND_CLIP4(tmp0, tmp1, tmp2, tmp3,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst2_l, dst2_r,
-                        dst1_l, dst1_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
-        dst += (2 * dst_stride);
+        src2 = src6;
+        src10_r = src54_r;
+        src21_r = src65_r;
+        src10_l = src54_l;
+        src21_l = src65_l;
     }
 }
 
@@ -3744,113 +3985,94 @@
                                       int32_t rnd_val)
 {
     uint32_t loop_cnt;
+    v16u8 out0, out1, out2, out3, out4, out5;
     v16i8 src0, src1, src2, src3, src4, src5;
-    v16i8 src6, src7, src8, src9, src10, src11;
-    v16i8 src10_r, src32_r, src76_r, src98_r;
-    v16i8 src21_r, src43_r, src87_r, src109_r;
-    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    v16i8 src10_l, src32_l, src21_l, src43_l;
+    v16i8 src6, src7, src8, src9, src10, src11, src12, src13;
+    v16i8 src10_r, src32_r, src54_r, src21_r, src43_r, src65_r;
+    v16i8 src10_l, src32_l, src54_l, src21_l, src43_l, src65_l;
+    v16i8 src87_r, src98_r, src109_r, src1110_r, src1211_r, src1312_r;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8, dst9, dst10;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec, dst11;
+    v4i32 weight_vec, rnd_vec;
 
     src -= src_stride;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     weight = weight & 0x0000FFFF;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     LD_SB3(src, src_stride, src0, src1, src2);
+    LD_SB3(src + 16, src_stride, src7, src8, src9);
+    src += (3 * src_stride);
     XORI_B3_128_SB(src0, src1, src2);
+    XORI_B3_128_SB(src7, src8, src9);
     ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
     ILVL_B2_SB(src1, src0, src2, src1, src10_l, src21_l);
+    ILVR_B2_SB(src8, src7, src9, src8, src87_r, src98_r);
 
-    LD_SB3(src + 16, src_stride, src6, src7, src8);
-    src += (3 * src_stride);
-    XORI_B3_128_SB(src6, src7, src8);
-    ILVR_B2_SB(src7, src6, src8, src7, src76_r, src87_r);
-
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB2(src, src_stride, src3, src4);
-        XORI_B2_128_SB(src3, src4);
+    for (loop_cnt = 8; loop_cnt--;) {
+        LD_SB4(src, src_stride, src3, src4, src5, src6);
+        LD_SB4(src + 16, src_stride, src10, src11, src12, src13);
+        src += (4 * src_stride);
+        XORI_B4_128_SB(src3, src4, src5, src6);
+        XORI_B4_128_SB(src10, src11, src12, src13);
         ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
         ILVL_B2_SB(src3, src2, src4, src3, src32_l, src43_l);
-        LD_SB2(src + 16, src_stride, src9, src10);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src9, src10);
-        ILVR_B2_SB(src9, src8, src10, src9, src98_r, src109_r);
+        ILVRL_B2_SB(src5, src4, src54_r, src54_l);
+        ILVRL_B2_SB(src6, src5, src65_r, src65_l);
+        ILVR_B2_SB(src10, src9, src11, src10, src109_r, src1110_r);
+        ILVR_B2_SB(src12, src11, src13, src12, src1211_r, src1312_r);
+        dst0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(src32_r, src54_r, filt0, filt1);
+        dst3 = HEVC_FILT_4TAP_SH(src43_r, src65_r, filt0, filt1);
+        dst4 = HEVC_FILT_4TAP_SH(src10_l, src32_l, filt0, filt1);
+        dst5 = HEVC_FILT_4TAP_SH(src21_l, src43_l, filt0, filt1);
+        dst6 = HEVC_FILT_4TAP_SH(src32_l, src54_l, filt0, filt1);
+        dst7 = HEVC_FILT_4TAP_SH(src43_l, src65_l, filt0, filt1);
+        dst8 = HEVC_FILT_4TAP_SH(src87_r, src109_r, filt0, filt1);
+        dst9 = HEVC_FILT_4TAP_SH(src98_r, src1110_r, filt0, filt1);
+        dst10 = HEVC_FILT_4TAP_SH(src109_r, src1211_r, filt0, filt1);
+        dst11 = HEVC_FILT_4TAP_SH(src1110_r, src1312_r, filt0, filt1);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3, weight_vec,
+                                       offset_vec, rnd_vec, dst0, dst1, dst2,
+                                       dst3);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst4, dst5, dst6, dst7, weight_vec,
+                                       offset_vec, rnd_vec, dst4, dst5, dst6,
+                                       dst7);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst8, dst9, dst10, dst11, weight_vec,
+                                       offset_vec, rnd_vec, dst8, dst9, dst10,
+                                       dst11);
+        PCKEV_B4_UB(dst4, dst0, dst5, dst1, dst6, dst2, dst7, dst3, out0, out1,
+                    out2, out3);
+        PCKEV_B2_UB(dst9, dst8, dst11, dst10, out4, out5);
+        ST_UB4(out0, out1, out2, out3, dst, dst_stride);
+        ST8x4_UB(out4, out5, dst + 16, dst_stride);
+        dst += (4 * dst_stride);
 
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-        tmp4 = const_vec;
-        DPADD_SB2_SH(src10_l, src32_l, filt0, filt1, tmp4, tmp4);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
-        tmp5 = const_vec;
-        DPADD_SB2_SH(src21_l, src43_l, filt0, filt1, tmp5, tmp5);
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src76_r, src98_r, filt0, filt1, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src87_r, src109_r, filt0, filt1, tmp3, tmp3);
-
-        HEVC_UNIW_RND_CLIP4(tmp0, tmp1, tmp4, tmp5,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-        HEVC_UNIW_RND_CLIP2(tmp2, tmp3, weight_vec, offset_vec, rnd_vec,
-                            dst4_r, dst5_r, dst4_l, dst5_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst2_l, dst2_r,
-                        dst1_l, dst1_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        HEVC_PCK_SW_SB4(dst4_l, dst4_r, dst5_l, dst5_r, dst4_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
-        ST8x2_UB(dst4_r, dst + 16, dst_stride);
-        dst += (2 * dst_stride);
-
-        LD_SB2(src, src_stride, src5, src2);
-        XORI_B2_128_SB(src5, src2);
-        ILVR_B2_SB(src5, src4, src2, src5, src10_r, src21_r);
-        ILVL_B2_SB(src5, src4, src2, src5, src10_l, src21_l);
-        LD_SB2(src + 16, src_stride, src11, src8);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src11, src8);
-        ILVR_B2_SB(src11, src10, src8, src11, src76_r, src87_r);
-
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src32_r, src10_r, filt0, filt1, tmp0, tmp0);
-        tmp4 = const_vec;
-        DPADD_SB2_SH(src32_l, src10_l, filt0, filt1, tmp4, tmp4);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src43_r, src21_r, filt0, filt1, tmp1, tmp1);
-        tmp5 = const_vec;
-        DPADD_SB2_SH(src43_l, src21_l, filt0, filt1, tmp5, tmp5);
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src98_r, src76_r, filt0, filt1, tmp2, tmp2);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src109_r, src87_r, filt0, filt1, tmp3, tmp3);
-
-        HEVC_UNIW_RND_CLIP4(tmp0, tmp1, tmp4, tmp5,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-        HEVC_UNIW_RND_CLIP2(tmp2, tmp3, weight_vec, offset_vec, rnd_vec,
-                            dst4_r, dst5_r, dst4_l, dst5_l);
-
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst2_l, dst2_r,
-                        dst1_l, dst1_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        HEVC_PCK_SW_SB4(dst4_l, dst4_r, dst5_l, dst5_r, dst4_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
-        ST8x2_UB(dst4_r, dst + 16, dst_stride);
-        dst += (2 * dst_stride);
+        src2 = src6;
+        src9 = src13;
+        src10_r = src54_r;
+        src21_r = src65_r;
+        src10_l = src54_l;
+        src21_l = src65_l;
+        src87_r = src1211_r;
+        src98_r = src1312_r;
     }
 }
 
@@ -3864,104 +4086,87 @@
                                       int32_t offset,
                                       int32_t rnd_val)
 {
-    int32_t loop_cnt;
-    uint8_t *dst_tmp = dst + 16;
-    v16i8 src0, src1, src2, src3, src4, src6, src7, src8, src9, src10;
+    uint32_t loop_cnt;
+    v16u8 out0, out1, out2, out3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
     v16i8 src10_r, src32_r, src76_r, src98_r;
-    v16i8 src21_r, src43_r, src87_r, src109_r;
-    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    v16i8 src21_r, src43_r, src65_r, src87_r;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
     v16i8 src10_l, src32_l, src76_l, src98_l;
-    v16i8 src21_l, src43_l, src87_l, src109_l;
+    v16i8 src21_l, src43_l, src65_l, src87_l;
     v8i16 filt0, filt1;
-    v8i16 filter_vec, const_vec;
-    v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r, dst6_r, dst7_r;
-    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, dst4_l, dst5_l, dst6_l, dst7_l;
+    v8i16 filter_vec, weight_vec_h, offset_vec, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= src_stride;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
     weight = weight & 0x0000FFFF;
 
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    weight *= 128;
+    rnd_val -= 6;
+
+    weight_vec_h = __msa_fill_h(weight);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val);
+
+    weight_vec_h = __msa_srar_h(weight_vec_h, denom_vec);
+    offset_vec = __msa_adds_s_h(offset_vec, weight_vec_h);
+
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     LD_SB3(src, src_stride, src0, src1, src2);
-    XORI_B3_128_SB(src0, src1, src2);
+    LD_SB3(src + 16, src_stride, src5, src6, src7);
+    src += (3 * src_stride);
+    XORI_B6_128_SB(src0, src1, src2, src5, src6, src7);
     ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
     ILVL_B2_SB(src1, src0, src2, src1, src10_l, src21_l);
-
-    LD_SB3(src + 16, src_stride, src6, src7, src8);
-    src += (3 * src_stride);
-    XORI_B3_128_SB(src6, src7, src8);
-    ILVR_B2_SB(src7, src6, src8, src7, src76_r, src87_r);
-    ILVL_B2_SB(src7, src6, src8, src7, src76_l, src87_l);
+    ILVR_B2_SB(src6, src5, src7, src6, src65_r, src76_r);
+    ILVL_B2_SB(src6, src5, src7, src6, src65_l, src76_l);
 
     for (loop_cnt = (height >> 1); loop_cnt--;) {
         LD_SB2(src, src_stride, src3, src4);
-        XORI_B2_128_SB(src3, src4);
+        LD_SB2(src + 16, src_stride, src8, src9);
+        src += (2 * src_stride);
+        XORI_B4_128_SB(src3, src4, src8, src9);
         ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
         ILVL_B2_SB(src3, src2, src4, src3, src32_l, src43_l);
+        ILVRL_B2_SB(src8, src7, src87_r, src87_l);
+        ILVRL_B2_SB(src9, src8, src98_r, src98_l);
+        dst0 = HEVC_FILT_4TAP_SH(src10_r, src32_r, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(src21_r, src43_r, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(src10_l, src32_l, filt0, filt1);
+        dst3 = HEVC_FILT_4TAP_SH(src21_l, src43_l, filt0, filt1);
+        dst4 = HEVC_FILT_4TAP_SH(src65_r, src87_r, filt0, filt1);
+        dst5 = HEVC_FILT_4TAP_SH(src76_r, src98_r, filt0, filt1);
+        dst6 = HEVC_FILT_4TAP_SH(src65_l, src87_l, filt0, filt1);
+        dst7 = HEVC_FILT_4TAP_SH(src76_l, src98_l, filt0, filt1);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst0, dst1, dst2, dst3, weight_vec,
+                                       offset_vec, rnd_vec, dst0, dst1, dst2,
+                                       dst3);
+        HEVC_UNIW_RND_CLIP4_MAX_SATU_H(dst4, dst5, dst6, dst7, weight_vec,
+                                       offset_vec, rnd_vec, dst4, dst5, dst6,
+                                       dst7);
+        PCKEV_B4_UB(dst2, dst0, dst3, dst1, dst6, dst4, dst7, dst5, out0, out1,
+                    out2, out3);
+        ST_UB2(out0, out2, dst, 16);
+        dst += dst_stride;
+        ST_UB2(out1, out3, dst, 16);
+        dst += dst_stride;
 
-        tmp0 = const_vec;
-        DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, tmp0, tmp0);
-        tmp4 = const_vec;
-        DPADD_SB2_SH(src10_l, src32_l, filt0, filt1, tmp4, tmp4);
-        tmp1 = const_vec;
-        DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, tmp1, tmp1);
-        tmp5 = const_vec;
-        DPADD_SB2_SH(src21_l, src43_l, filt0, filt1, tmp5, tmp5);
-
-        HEVC_UNIW_RND_CLIP4(tmp0, tmp1, tmp4, tmp5,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst0_r, dst1_r, dst2_r, dst3_r,
-                            dst0_l, dst1_l, dst2_l, dst3_l);
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst2_l, dst2_r,
-                        dst1_l, dst1_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST_SW2(dst0_r, dst1_r, dst, dst_stride);
-        dst += (2 * dst_stride);
-
+        src2 = src4;
+        src7 = src9;
         src10_r = src32_r;
         src21_r = src43_r;
         src10_l = src32_l;
         src21_l = src43_l;
-        src2 = src4;
-
-        LD_SB2(src + 16, src_stride, src9, src10);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src9, src10);
-        ILVR_B2_SB(src9, src8, src10, src9, src98_r, src109_r);
-        ILVL_B2_SB(src9, src8, src10, src9, src98_l, src109_l);
-
-        tmp2 = const_vec;
-        DPADD_SB2_SH(src76_r, src98_r, filt0, filt1, tmp2, tmp2);
-        tmp6 = const_vec;
-        DPADD_SB2_SH(src76_l, src98_l, filt0, filt1, tmp6, tmp6);
-        tmp3 = const_vec;
-        DPADD_SB2_SH(src87_r, src109_r, filt0, filt1, tmp3, tmp3);
-        tmp7 = const_vec;
-        DPADD_SB2_SH(src87_l, src109_l, filt0, filt1, tmp7, tmp7);
-
-        HEVC_UNIW_RND_CLIP4(tmp2, tmp3, tmp6, tmp7,
-                            weight_vec, offset_vec, rnd_vec,
-                            dst4_r, dst5_r, dst6_r, dst7_r,
-                            dst4_l, dst5_l, dst6_l, dst7_l);
-
-        HEVC_PCK_SW_SB8(dst4_l, dst4_r, dst6_l, dst6_r,
-                        dst5_l, dst5_r, dst7_l, dst7_r, dst4_r, dst5_r);
-        ST_SW2(dst4_r, dst5_r, dst_tmp, dst_stride);
-        dst_tmp += (2 * dst_stride);
-
+        src65_r = src87_r;
         src76_r = src98_r;
-        src87_r = src109_r;
+        src65_l = src87_l;
         src76_l = src98_l;
-        src87_l = src109_l;
-        src8 = src10;
     }
 }
 
@@ -3971,22 +4176,20 @@
                                       int32_t dst_stride,
                                       const int8_t *filter_x,
                                       const int8_t *filter_y,
-                                      int32_t height,
                                       int32_t weight,
                                       int32_t offset,
                                       int32_t rnd_val)
 {
+    v16u8 out;
     v16i8 src0, src1, src2, src3, src4;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
+    v8i16 filt_h0, filt_h1, filter_vec, tmp;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
-    v4i32 dst0_r, dst1_r;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 dst20, dst31, dst42, dst10, dst32, dst21, dst43;
+    v8i16 offset_vec, const_128, denom_vec;
+    v4i32 dst0, dst1, weight_vec, rnd_vec;
 
     src -= (src_stride + 1);
 
@@ -3994,63 +4197,41 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
-    LD_SB3(src, src_stride, src0, src1, src2);
-    src += (3 * src_stride);
-    XORI_B3_128_SB(src0, src1, src2);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val - 6);
+    const_128 = __msa_fill_h((128 * weight));
+    offset_vec += __msa_srar_h(const_128, denom_vec);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-
-    ILVR_H2_SH(dst1, dst0, dst2, dst1, dst10_r, dst21_r);
-    LD_SB2(src, src_stride, src3, src4);
-    XORI_B2_128_SB(src3, src4);
-
-    /* row 3 */
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
-    dst32_r = __msa_ilvr_h(dst3, dst2);
-    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-    dst0_r >>= 6;
-
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-    dst43_r = __msa_ilvr_h(dst4, dst3);
-    dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-    dst1_r >>= 6;
-
-    MUL2(dst0_r, weight_vec, dst1_r, weight_vec, dst0_r, dst1_r);
-    SRAR_W2_SW(dst0_r, dst1_r, rnd_vec);
-    ADD2(dst0_r, offset_vec, dst1_r, offset_vec, dst0_r, dst1_r);
-    dst0_r = CLIP_SW_0_255(dst0_r);
-    dst1_r = CLIP_SW_0_255(dst1_r);
-
-    HEVC_PCK_SW_SB2(dst1_r, dst0_r, dst0_r);
-    ST4x2_UB(dst0_r, dst, dst_stride);
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    VSHF_B2_SB(src0, src2, src0, src2, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src3, src1, src3, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src4, src2, src4, mask0, mask1, vec4, vec5);
+    dst20 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst31 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst42 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    ILVRL_H2_SH(dst31, dst20, dst10, dst32);
+    ILVRL_H2_SH(dst42, dst31, dst21, dst43);
+    dst0 = HEVC_FILT_4TAP(dst10, dst32, filt_h0, filt_h1);
+    dst1 = HEVC_FILT_4TAP(dst21, dst43, filt_h0, filt_h1);
+    dst0 >>= 6;
+    dst1 >>= 6;
+    MUL2(dst0, weight_vec, dst1, weight_vec, dst0, dst1);
+    SRAR_W2_SW(dst0, dst1, rnd_vec);
+    tmp = __msa_pckev_h((v8i16) dst1, (v8i16) dst0);
+    tmp += offset_vec;
+    tmp = CLIP_SH_0_255_MAX_SATU(tmp);
+    out = (v16u8) __msa_pckev_b((v16i8) tmp, (v16i8) tmp);
+    ST4x2_UB(out, dst, dst_stride);
 }
 
 static void hevc_hv_uniwgt_4t_4x4_msa(uint8_t *src,
@@ -4059,22 +4240,20 @@
                                       int32_t dst_stride,
                                       const int8_t *filter_x,
                                       const int8_t *filter_y,
-                                      int32_t height,
                                       int32_t weight,
                                       int32_t offset,
                                       int32_t rnd_val)
 {
+    v16u8 out;
     v16i8 src0, src1, src2, src3, src4, src5, src6;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1, filter_vec, tmp0, tmp1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 dst30, dst41, dst52, dst63, dst10, dst32, dst54, dst21, dst43, dst65;
+    v8i16 offset_vec, const_128, denom_vec;
+    v4i32 dst0, dst1, dst2, dst3, weight_vec, rnd_vec;
 
     src -= (src_stride + 1);
 
@@ -4082,76 +4261,46 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
-    LD_SB3(src, src_stride, src0, src1, src2);
-    src += (3 * src_stride);
-    XORI_B3_128_SB(src0, src1, src2);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val - 6);
+    const_128 = __msa_fill_h((128 * weight));
+    offset_vec += __msa_srar_h(const_128, denom_vec);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-
-    ILVR_H2_SH(dst1, dst0, dst2, dst1, dst10_r, dst21_r);
-
-    LD_SB4(src, src_stride, src3, src4, src5, src6);
-    XORI_B4_128_SB(src3, src4, src5, src6);
-
-    /* row 3 */
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-    dst32_r = __msa_ilvr_h(dst3, dst2);
-    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-    dst0_r >>= 6;
-
-    /* row 4 */
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-    dst43_r = __msa_ilvr_h(dst4, dst3);
-    dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-    dst1_r >>= 6;
-
-    /* row 5 */
-    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-    dst5 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-    dst10_r = __msa_ilvr_h(dst5, dst4);
-    dst2_r = HEVC_FILT_4TAP(dst32_r, dst10_r, filt_h0, filt_h1);
-    dst2_r >>= 6;
-
-    /* row 6 */
-    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-    dst21_r = __msa_ilvr_h(dst2, dst5);
-    dst3_r = HEVC_FILT_4TAP(dst43_r, dst21_r, filt_h0, filt_h1);
-    dst3_r >>= 6;
-
-    HEVC_HV_UNIW_RND_CLIP4(dst0_r, dst1_r, dst2_r, dst3_r,
-                           weight_vec, offset_vec, rnd_vec,
-                           dst0_r, dst1_r, dst2_r, dst3_r);
-    HEVC_PCK_SW_SB4(dst1_r, dst0_r, dst3_r, dst2_r, dst0_r);
-    ST4x4_UB(dst0_r, dst0_r, 0, 1, 2, 3, dst, dst_stride);
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+    VSHF_B2_SB(src0, src3, src0, src3, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src4, src1, src4, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src5, src2, src5, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src3, src6, src3, src6, mask0, mask1, vec6, vec7);
+    dst30 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst41 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst52 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dst63 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+    ILVRL_H2_SH(dst41, dst30, dst10, dst43);
+    ILVRL_H2_SH(dst52, dst41, dst21, dst54);
+    ILVRL_H2_SH(dst63, dst52, dst32, dst65);
+    dst0 = HEVC_FILT_4TAP(dst10, dst32, filt_h0, filt_h1);
+    dst1 = HEVC_FILT_4TAP(dst21, dst43, filt_h0, filt_h1);
+    dst2 = HEVC_FILT_4TAP(dst32, dst54, filt_h0, filt_h1);
+    dst3 = HEVC_FILT_4TAP(dst43, dst65, filt_h0, filt_h1);
+    SRA_4V(dst0, dst1, dst2, dst3, 6);
+    MUL2(dst0, weight_vec, dst1, weight_vec, dst0, dst1);
+    MUL2(dst2, weight_vec, dst3, weight_vec, dst2, dst3);
+    SRAR_W4_SW(dst0, dst1, dst2, dst3, rnd_vec);
+    PCKEV_H2_SH(dst1, dst0, dst3, dst2, tmp0, tmp1);
+    ADD2(tmp0, offset_vec, tmp1, offset_vec, tmp0, tmp1);
+    CLIP_SH2_0_255_MAX_SATU(tmp0, tmp1);
+    out = (v16u8) __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
 }
 
 static void hevc_hv_uniwgt_4t_4multx8mult_msa(uint8_t *src,
@@ -4166,18 +4315,18 @@
                                               int32_t rnd_val)
 {
     uint32_t loop_cnt;
+    v16u8 out0, out1;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8, dst9;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r, dst6_r, dst7_r;
+    v8i16 filt_h0, filt_h1, filter_vec, tmp0, tmp1, tmp2, tmp3;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 dst10, dst21, dst22, dst73, dst84, dst95, dst106;
     v8i16 dst10_r, dst32_r, dst54_r, dst76_r;
     v8i16 dst21_r, dst43_r, dst65_r, dst87_r;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 dst98_r, dst109_r, offset_vec, const_128, denom_vec;
+    v4i32 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, weight_vec, rnd_vec;
 
     src -= (src_stride + 1);
 
@@ -4185,34 +4334,30 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val - 6);
+    const_128 = __msa_fill_h((128 * weight));
+    offset_vec += __msa_srar_h(const_128, denom_vec);
+
     LD_SB3(src, src_stride, src0, src1, src2);
     src += (3 * src_stride);
     XORI_B3_128_SB(src0, src1, src2);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-    ILVR_H2_SH(dst1, dst0, dst2, dst1, dst10_r, dst21_r);
+    VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src2, src1, src2, mask0, mask1, vec2, vec3);
+    dst10 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst21 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    ILVRL_H2_SH(dst21, dst10, dst10_r, dst21_r);
+    dst22 = (v8i16) __msa_splati_d((v2i64) dst21, 1);
 
     for (loop_cnt = height >> 3; loop_cnt--;) {
         LD_SB8(src, src_stride,
@@ -4220,75 +4365,48 @@
         src += (8 * src_stride);
         XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
 
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-        dst32_r = __msa_ilvr_h(dst3, dst2);
-        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-        dst0_r >>= 6;
+        VSHF_B2_SB(src3, src7, src3, src7, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src8, src4, src8, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src5, src9, src5, src9, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src10, src6, src10, mask0, mask1, vec6, vec7);
+        dst73 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst84 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst95 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst106 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+        dst32_r = __msa_ilvr_h(dst73, dst22);
+        ILVRL_H2_SH(dst84, dst73, dst43_r, dst87_r);
+        ILVRL_H2_SH(dst95, dst84, dst54_r, dst98_r);
+        ILVRL_H2_SH(dst106, dst95, dst65_r, dst109_r);
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst73, 1);
+        dst76_r = __msa_ilvr_h(dst22, dst106);
+        dst0 = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst1 = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst2 = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst3 = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst4 = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
+        dst5 = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
+        dst6 = HEVC_FILT_4TAP(dst76_r, dst98_r, filt_h0, filt_h1);
+        dst7 = HEVC_FILT_4TAP(dst87_r, dst109_r, filt_h0, filt_h1);
+        SRA_4V(dst0, dst1, dst2, dst3, 6);
+        SRA_4V(dst4, dst5, dst6, dst7, 6);
+        MUL2(dst0, weight_vec, dst1, weight_vec, dst0, dst1);
+        MUL2(dst2, weight_vec, dst3, weight_vec, dst2, dst3);
+        MUL2(dst4, weight_vec, dst5, weight_vec, dst4, dst5);
+        MUL2(dst6, weight_vec, dst7, weight_vec, dst6, dst7);
+        SRAR_W4_SW(dst0, dst1, dst2, dst3, rnd_vec);
+        SRAR_W4_SW(dst4, dst5, dst6, dst7, rnd_vec);
+        PCKEV_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, tmp0, tmp1,
+                    tmp2, tmp3);
+        ADD2(tmp0, offset_vec, tmp1, offset_vec, tmp0, tmp1);
+        ADD2(tmp2, offset_vec, tmp3, offset_vec, tmp2, tmp3);
+        CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+        PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST4x8_UB(out0, out1, dst, dst_stride);
+        dst += (8 * dst_stride);
 
-        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-        dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-        dst43_r = __msa_ilvr_h(dst4, dst3);
-        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-        dst1_r >>= 6;
-
-        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-        dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-        dst54_r = __msa_ilvr_h(dst5, dst4);
-        dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
-        dst2_r >>= 6;
-
-        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-        dst6 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst6, dst6);
-        dst65_r = __msa_ilvr_h(dst6, dst5);
-        dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
-        dst3_r >>= 6;
-
-        VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
-        dst7 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst7, dst7);
-        dst76_r = __msa_ilvr_h(dst7, dst6);
-        dst4_r = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
-        dst4_r >>= 6;
-
-        VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec0, vec1);
-        dst8 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst8, dst8);
-        dst87_r = __msa_ilvr_h(dst8, dst7);
-        dst5_r = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
-        dst5_r >>= 6;
-
-        VSHF_B2_SB(src9, src9, src9, src9, mask0, mask1, vec0, vec1);
-        dst9 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst9, dst9);
-        dst10_r = __msa_ilvr_h(dst9, dst8);
-        dst6_r = HEVC_FILT_4TAP(dst76_r, dst10_r, filt_h0, filt_h1);
-        dst6_r >>= 6;
-
-        VSHF_B2_SB(src10, src10, src10, src10, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-        dst21_r = __msa_ilvr_h(dst2, dst9);
-        dst7_r = HEVC_FILT_4TAP(dst87_r, dst21_r, filt_h0, filt_h1);
-        dst7_r >>= 6;
-
-        HEVC_HV_UNIW_RND_CLIP4(dst0_r, dst1_r, dst2_r, dst3_r,
-                               weight_vec, offset_vec, rnd_vec,
-                               dst0_r, dst1_r, dst2_r, dst3_r);
-        HEVC_PCK_SW_SB4(dst1_r, dst0_r, dst3_r, dst2_r, dst0_r);
-        ST4x4_UB(dst0_r, dst0_r, 0, 1, 2, 3, dst, dst_stride);
-        dst += (4 * dst_stride);
-
-        HEVC_HV_UNIW_RND_CLIP4(dst4_r, dst5_r, dst6_r, dst7_r,
-                               weight_vec, offset_vec, rnd_vec,
-                               dst4_r, dst5_r, dst6_r, dst7_r);
-        HEVC_PCK_SW_SB4(dst5_r, dst4_r, dst7_r, dst6_r, dst0_r);
-        ST4x4_UB(dst0_r, dst0_r, 0, 1, 2, 3, dst, dst_stride);
-        dst += (4 * dst_stride);
+        dst10_r = dst98_r;
+        dst21_r = dst109_r;
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst106, 1);
     }
 }
 
@@ -4305,11 +4423,11 @@
 {
     if (2 == height) {
         hevc_hv_uniwgt_4t_4x2_msa(src, src_stride, dst, dst_stride,
-                                  filter_x, filter_y, height, weight,
+                                  filter_x, filter_y, weight,
                                   offset, rnd_val);
     } else if (4 == height) {
         hevc_hv_uniwgt_4t_4x4_msa(src, src_stride, dst, dst_stride,
-                                  filter_x, filter_y, height, weight,
+                                  filter_x,filter_y, weight,
                                   offset, rnd_val);
     } else if (0 == (height % 8)) {
         hevc_hv_uniwgt_4t_4multx8mult_msa(src, src_stride, dst, dst_stride,
@@ -4329,20 +4447,22 @@
                                      int32_t offset,
                                      int32_t rnd_val)
 {
-    uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4, src5, src6;
+    v16u8 out0, out1, out2;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-    v4i32 dst0_r, dst0_l, dst1_r, dst1_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
-    v4i32 dst2_r, dst2_l, dst3_r, dst3_l;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
-    v8i16 dst10_l, dst32_l, dst21_l, dst43_l;
+    v8i16 filt_h0, filt_h1, filter_vec;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 dsth0, dsth1, dsth2, dsth3, dsth4, dsth5, dsth6, dsth7, dsth8, dsth9;
+    v8i16 dsth10, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    v8i16 dst10_r, dst32_r, dst54_r, dst76_r, dst98_r, dst21_r, dst43_r;
+    v8i16 dst65_r, dst87_r, dst109_r, dst10_l, dst32_l, dst54_l, dst76_l;
+    v8i16 dst98_l, dst21_l, dst43_l, dst65_l, dst87_l, dst109_l;
+    v8i16 dst1021_l, dst3243_l, dst5465_l, dst7687_l, dst98109_l;
+    v8i16 offset_vec, const_128, denom_vec;
+    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r, dst6_r, dst7_r;
+    v4i32 dst0_l, dst1_l, dst2_l, dst3_l, weight_vec, rnd_vec;
 
     src -= (src_stride + 1);
 
@@ -4350,20 +4470,20 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val - 6);
+    const_128 = __msa_fill_h((128 * weight));
+    offset_vec += __msa_srar_h(const_128, denom_vec);
+
     LD_SB3(src, src_stride, src0, src1, src2);
     src += (3 * src_stride);
     XORI_B3_128_SB(src0, src1, src2);
@@ -4371,72 +4491,78 @@
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+    dsth0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    ILVRL_H2_SH(dsth1, dsth0, dst10_r, dst10_l);
+    ILVRL_H2_SH(dsth2, dsth1, dst21_r, dst21_l);
 
-    ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
-    ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
-
-    for (loop_cnt = height >> 2; loop_cnt--;) {
-        LD_SB4(src, src_stride, src3, src4, src5, src6);
-        src += (4 * src_stride);
-        XORI_B4_128_SB(src3, src4, src5, src6);
-
-        /* row 3 */
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-        ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
-        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-        dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-        dst0_r >>= 6;
-        dst0_l >>= 6;
-
-        /* row 4 */
-        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-        dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-        ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
-        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-        dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-        dst1_r >>= 6;
-        dst1_l >>= 6;
-
-        /* row 5 */
-        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-        dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-        ILVRL_H2_SH(dst5, dst4, dst10_r, dst10_l);
-        dst2_r = HEVC_FILT_4TAP(dst32_r, dst10_r, filt_h0, filt_h1);
-        dst2_l = HEVC_FILT_4TAP(dst32_l, dst10_l, filt_h0, filt_h1);
-        dst2_r >>= 6;
-        dst2_l >>= 6;
-
-        /* row 6 */
-        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-        ILVRL_H2_SH(dst2, dst5, dst21_r, dst21_l);
-        dst3_r = HEVC_FILT_4TAP(dst43_r, dst21_r, filt_h0, filt_h1);
-        dst3_l = HEVC_FILT_4TAP(dst43_l, dst21_l, filt_h0, filt_h1);
-        dst3_r >>= 6;
-        dst3_l >>= 6;
-
-        HEVC_HV_UNIW_RND_CLIP4(dst0_r, dst1_r, dst0_l, dst1_l,
-                               weight_vec, offset_vec, rnd_vec,
-                               dst0_r, dst1_r, dst0_l, dst1_l);
-        HEVC_HV_UNIW_RND_CLIP4(dst2_r, dst3_r, dst2_l, dst3_l,
-                               weight_vec, offset_vec, rnd_vec,
-                               dst2_r, dst3_r, dst2_l, dst3_l);
-        HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                        dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-        ST6x4_UB(dst0_r, dst1_r, dst, dst_stride);
-        dst += (4 * dst_stride);
-    }
+    LD_SB8(src, src_stride, src3, src4, src5, src6, src7, src8, src9, src10);
+    XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
+    dsth3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dsth6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src9, src9, src9, src9, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src10, src10, src10, src10, mask0, mask1, vec6, vec7);
+    dsth7 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth8 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth9 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dsth10 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+    ILVRL_H2_SH(dsth3, dsth2, dst32_r, dst32_l);
+    ILVRL_H2_SH(dsth4, dsth3, dst43_r, dst43_l);
+    ILVRL_H2_SH(dsth5, dsth4, dst54_r, dst54_l);
+    ILVRL_H2_SH(dsth6, dsth5, dst65_r, dst65_l);
+    ILVRL_H2_SH(dsth7, dsth6, dst76_r, dst76_l);
+    ILVRL_H2_SH(dsth8, dsth7, dst87_r, dst87_l);
+    ILVRL_H2_SH(dsth9, dsth8, dst98_r, dst98_l);
+    ILVRL_H2_SH(dsth10, dsth9, dst109_r, dst109_l);
+    PCKEV_D2_SH(dst21_l, dst10_l, dst43_l, dst32_l, dst1021_l, dst3243_l);
+    PCKEV_D2_SH(dst65_l, dst54_l, dst87_l, dst76_l, dst5465_l, dst7687_l);
+    dst98109_l = (v8i16) __msa_pckev_d((v2i64) dst109_l, (v2i64) dst98_l);
+    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+    dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+    dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+    dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+    dst4_r = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
+    dst5_r = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
+    dst6_r = HEVC_FILT_4TAP(dst76_r, dst98_r, filt_h0, filt_h1);
+    dst7_r = HEVC_FILT_4TAP(dst87_r, dst109_r, filt_h0, filt_h1);
+    dst0_l = HEVC_FILT_4TAP(dst1021_l, dst3243_l, filt_h0, filt_h1);
+    dst1_l = HEVC_FILT_4TAP(dst3243_l, dst5465_l, filt_h0, filt_h1);
+    dst2_l = HEVC_FILT_4TAP(dst5465_l, dst7687_l, filt_h0, filt_h1);
+    dst3_l = HEVC_FILT_4TAP(dst7687_l, dst98109_l, filt_h0, filt_h1);
+    SRA_4V(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+    SRA_4V(dst4_r, dst5_r, dst6_r, dst7_r, 6);
+    SRA_4V(dst0_l, dst1_l, dst2_l, dst3_l, 6);
+    MUL2(dst0_r, weight_vec, dst1_r, weight_vec, dst0_r, dst1_r);
+    MUL2(dst2_r, weight_vec, dst3_r, weight_vec, dst2_r, dst3_r);
+    MUL2(dst4_r, weight_vec, dst5_r, weight_vec, dst4_r, dst5_r);
+    MUL2(dst6_r, weight_vec, dst7_r, weight_vec, dst6_r, dst7_r);
+    MUL2(dst0_l, weight_vec, dst1_l, weight_vec, dst0_l, dst1_l);
+    MUL2(dst2_l, weight_vec, dst3_l, weight_vec, dst2_l, dst3_l);
+    SRAR_W4_SW(dst0_r, dst1_r, dst2_r, dst3_r, rnd_vec);
+    SRAR_W4_SW(dst4_r, dst5_r, dst6_r, dst7_r, rnd_vec);
+    SRAR_W4_SW(dst0_l, dst1_l, dst2_l, dst3_l, rnd_vec);
+    PCKEV_H2_SH(dst1_r, dst0_r, dst3_r, dst2_r, tmp0, tmp1);
+    PCKEV_H2_SH(dst5_r, dst4_r, dst7_r, dst6_r, tmp2, tmp3);
+    PCKEV_H2_SH(dst1_l, dst0_l, dst3_l, dst2_l, tmp4, tmp5);
+    ADD2(tmp0, offset_vec, tmp1, offset_vec, tmp0, tmp1);
+    ADD2(tmp2, offset_vec, tmp3, offset_vec, tmp2, tmp3);
+    ADD2(tmp4, offset_vec, tmp5, offset_vec, tmp4, tmp5);
+    CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+    CLIP_SH2_0_255_MAX_SATU(tmp4, tmp5);
+    PCKEV_B3_UB(tmp1, tmp0, tmp3, tmp2, tmp5, tmp4, out0, out1, out2);
+    ST4x8_UB(out0, out1, dst, dst_stride);
+    ST2x4_UB(out2, 0, dst + 4, dst_stride);
+    dst += 4 * dst_stride;
+    ST2x4_UB(out2, 4, dst + 4, dst_stride);
 }
 
 static void hevc_hv_uniwgt_4t_8x2_msa(uint8_t *src,
@@ -4445,23 +4571,24 @@
                                       int32_t dst_stride,
                                       const int8_t *filter_x,
                                       const int8_t *filter_y,
-                                      int32_t height,
                                       int32_t weight,
                                       int32_t offset,
                                       int32_t rnd_val)
 {
+    v16u8 out;
     v16i8 src0, src1, src2, src3, src4;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1, filter_vec;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
     v8i16 dst0, dst1, dst2, dst3, dst4;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l;
     v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
     v8i16 dst10_l, dst32_l, dst21_l, dst43_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 tmp0, tmp1;
+    v8i16 offset_vec, const_128, denom_vec;
+    v4i32 weight_vec, rnd_vec;
 
     src -= (src_stride + 1);
 
@@ -4469,65 +4596,144 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
-    LD_SB3(src, src_stride, src0, src1, src2);
-    src += (3 * src_stride);
-    XORI_B3_128_SB(src0, src1, src2);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val - 6);
+    const_128 = __msa_fill_h((128 * weight));
+    offset_vec += __msa_srar_h(const_128, denom_vec);
 
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec8, vec9);
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+    dst4 = HEVC_FILT_4TAP_SH(vec8, vec9, filt0, filt1);
     ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
     ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
-
-    LD_SB2(src, src_stride, src3, src4);
-    src += (2 * src_stride);
-    XORI_B2_128_SB(src3, src4);
-
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
     ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
     dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
     dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-    dst0_r >>= 6;
-    dst0_l >>= 6;
-
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
     dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
     dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-    dst1_r >>= 6;
-    dst1_l >>= 6;
+    SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+    MUL2(dst0_r, weight_vec, dst1_r, weight_vec, dst0_r, dst1_r);
+    MUL2(dst0_l, weight_vec, dst1_l, weight_vec, dst0_l, dst1_l);
+    SRAR_W4_SW(dst0_r, dst0_l, dst1_r, dst1_l, rnd_vec);
+    PCKEV_H2_SH(dst0_l, dst0_r, dst1_l, dst1_r, tmp0, tmp1);
+    ADD2(tmp0, offset_vec, tmp1, offset_vec, tmp0, tmp1);
+    CLIP_SH2_0_255_MAX_SATU(tmp0, tmp1);
+    out = (v16u8) __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+    ST8x2_UB(out, dst, dst_stride);
+}
 
-    HEVC_HV_UNIW_RND_CLIP4(dst0_r, dst1_r, dst0_l, dst1_l,
-                           weight_vec, offset_vec, rnd_vec,
-                           dst0_r, dst1_r, dst0_l, dst1_l);
-    HEVC_PCK_SW_SB4(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r);
-    ST8x2_UB(dst0_r, dst, dst_stride);
-    dst += (2 * dst_stride);
+static void hevc_hv_uniwgt_4t_8multx4_msa(uint8_t *src,
+                                          int32_t src_stride,
+                                          uint8_t *dst,
+                                          int32_t dst_stride,
+                                          const int8_t *filter_x,
+                                          const int8_t *filter_y,
+                                          int32_t width8mult,
+                                          int32_t weight,
+                                          int32_t offset,
+                                          int32_t rnd_val)
+{
+    uint32_t cnt;
+    v16u8 out0, out1;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, mask0, mask1;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 filt0, filt1, filt_h0, filt_h1, filter_vec;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, tmp0, tmp1, tmp2, tmp3;
+    v8i16 dst10_r, dst32_r, dst54_r, dst21_r, dst43_r, dst65_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst21_l, dst43_l, dst65_l;
+    v8i16 offset_vec, const_128, denom_vec;
+    v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
+    v4i32 weight_vec, rnd_vec;
+
+    src -= (src_stride + 1);
+
+    filter_vec = LD_SH(filter_x);
+    SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
+
+    filter_vec = LD_SH(filter_y);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+
+    weight_vec = __msa_fill_w(weight);
+    rnd_vec = __msa_fill_w(rnd_val);
+
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val - 6);
+    const_128 = __msa_fill_h((128 * weight));
+    offset_vec += __msa_srar_h(const_128, denom_vec);
+
+    for (cnt = width8mult; cnt--;) {
+        LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+        src += 8;
+        XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
+        ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
+        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
+        dst3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+        ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+        ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
+        ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+        ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
+        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
+        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
+        dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
+        dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
+        SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+        SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+        MUL2(dst0_r, weight_vec, dst1_r, weight_vec, dst0_r, dst1_r);
+        MUL2(dst2_r, weight_vec, dst3_r, weight_vec, dst2_r, dst3_r);
+        MUL2(dst0_l, weight_vec, dst1_l, weight_vec, dst0_l, dst1_l);
+        MUL2(dst2_l, weight_vec, dst3_l, weight_vec, dst2_l, dst3_l);
+        SRAR_W4_SW(dst0_r, dst0_l, dst1_r, dst1_l, rnd_vec);
+        SRAR_W4_SW(dst2_r, dst2_l, dst3_r, dst3_l, rnd_vec);
+        PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r, dst2_l, dst2_r, dst3_l,
+                    dst3_r, tmp0, tmp1, tmp2, tmp3);
+        ADD2(tmp0, offset_vec, tmp1, offset_vec, tmp0, tmp1);
+        ADD2(tmp2, offset_vec, tmp3, offset_vec, tmp2, tmp3);
+        CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+        PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += 8;
+    }
 }
 
 static void hevc_hv_uniwgt_4t_8x6_msa(uint8_t *src,
@@ -4536,26 +4742,27 @@
                                       int32_t dst_stride,
                                       const int8_t *filter_x,
                                       const int8_t *filter_y,
-                                      int32_t height,
                                       int32_t weight,
                                       int32_t offset,
                                       int32_t rnd_val)
 {
+    v16u8 out0, out1, out2;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1, filter_vec;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
+    v16i8 vec10, vec11, vec12, vec13, vec14, vec15, vec16, vec17;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
-    v4i32 dst4_r, dst4_l, dst5_r, dst5_l;
+    v4i32 dst4_r, dst4_l, dst5_r, dst5_l, weight_vec, rnd_vec;
     v8i16 dst10_r, dst32_r, dst10_l, dst32_l;
     v8i16 dst21_r, dst43_r, dst21_l, dst43_l;
     v8i16 dst54_r, dst54_l, dst65_r, dst65_l;
     v8i16 dst76_r, dst76_l, dst87_r, dst87_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    v8i16 offset_vec, const_128, denom_vec;
 
     src -= (src_stride + 1);
 
@@ -4563,126 +4770,87 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
-    LD_SB3(src, src_stride, src0, src1, src2);
-    src += (3 * src_stride);
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val - 6);
+    const_128 = __msa_fill_h((128 * weight));
+    offset_vec += __msa_srar_h(const_128, denom_vec);
 
-    XORI_B3_128_SB(src0, src1, src2);
-
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+    LD_SB4(src, src_stride, src5, src6, src7, src8);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    XORI_B4_128_SB(src5, src6, src7, src8);
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec8, vec9);
+    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec10, vec11);
+    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec12, vec13);
+    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec14, vec15);
+    VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec16, vec17);
+    dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    dst3 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+    dst4 = HEVC_FILT_4TAP_SH(vec8, vec9, filt0, filt1);
+    dst5 = HEVC_FILT_4TAP_SH(vec10, vec11, filt0, filt1);
+    dst6 = HEVC_FILT_4TAP_SH(vec12, vec13, filt0, filt1);
+    dst7 = HEVC_FILT_4TAP_SH(vec14, vec15, filt0, filt1);
+    dst8 = HEVC_FILT_4TAP_SH(vec16, vec17, filt0, filt1);
     ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
     ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
-
-    LD_SB2(src, src_stride, src3, src4);
-    src += (2 * src_stride);
-    XORI_B2_128_SB(src3, src4);
-
-    /* row 3 */
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
     ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
+    ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+    ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
+    ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
+    ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l);
     dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
     dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-    dst0_r >>= 6;
-    dst0_l >>= 6;
-
-    /* row 4 */
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
     dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
     dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-    dst1_r >>= 6;
-    dst1_l >>= 6;
-
-    LD_SB2(src, src_stride, src5, src6);
-    src += (2 * src_stride);
-    XORI_B2_128_SB(src5, src6);
-
-    /* row 5 */
-    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-    dst5 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-    ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
     dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
     dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
-    dst2_r >>= 6;
-    dst2_l >>= 6;
-
-    /* row 6 */
-    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-    dst6 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst6, dst6);
-    ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
     dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
     dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
-    dst3_r >>= 6;
-    dst3_l >>= 6;
-
-    LD_SB2(src, src_stride, src7, src8);
-    src += (2 * src_stride);
-    XORI_B2_128_SB(src7, src8);
-
-    /* row 7 */
-    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
-    dst7 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst7, dst7);
-    ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
     dst4_r = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
     dst4_l = HEVC_FILT_4TAP(dst54_l, dst76_l, filt_h0, filt_h1);
-
-    dst4_r >>= 6;
-    dst4_l >>= 6;
-
-    /* row 8 */
-    VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec0, vec1);
-    dst8 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst8, dst8);
-    ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l);
     dst5_r = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
     dst5_l = HEVC_FILT_4TAP(dst65_l, dst87_l, filt_h0, filt_h1);
-    dst5_r >>= 6;
-    dst5_l >>= 6;
-
-    HEVC_HV_UNIW_RND_CLIP4(dst0_r, dst1_r, dst0_l, dst1_l,
-                           weight_vec, offset_vec, rnd_vec,
-                           dst0_r, dst1_r, dst0_l, dst1_l);
-    HEVC_HV_UNIW_RND_CLIP4(dst2_r, dst3_r, dst2_l, dst3_l,
-                           weight_vec, offset_vec, rnd_vec,
-                           dst2_r, dst3_r, dst2_l, dst3_l);
-    HEVC_HV_UNIW_RND_CLIP4(dst4_r, dst5_r, dst4_l, dst5_l,
-                           weight_vec, offset_vec, rnd_vec,
-                           dst4_r, dst5_r, dst4_l, dst5_l);
-    HEVC_PCK_SW_SB12(dst0_l, dst0_r, dst1_l, dst1_r,
-                     dst2_l, dst2_r, dst3_l, dst3_r,
-                     dst4_l, dst4_r, dst5_l, dst5_r, dst0_r, dst1_r, dst2_r);
-    ST8x4_UB(dst0_r, dst1_r, dst, dst_stride);
+    SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+    SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+    SRA_4V(dst4_r, dst4_l, dst5_r, dst5_l, 6);
+    MUL2(dst0_r, weight_vec, dst1_r, weight_vec, dst0_r, dst1_r);
+    MUL2(dst2_r, weight_vec, dst3_r, weight_vec, dst2_r, dst3_r);
+    MUL2(dst4_r, weight_vec, dst5_r, weight_vec, dst4_r, dst5_r);
+    MUL2(dst0_l, weight_vec, dst1_l, weight_vec, dst0_l, dst1_l);
+    MUL2(dst2_l, weight_vec, dst3_l, weight_vec, dst2_l, dst3_l);
+    MUL2(dst4_l, weight_vec, dst5_l, weight_vec, dst4_l, dst5_l);
+    SRAR_W4_SW(dst0_r, dst0_l, dst1_r, dst1_l, rnd_vec);
+    SRAR_W4_SW(dst2_r, dst2_l, dst3_r, dst3_l, rnd_vec);
+    SRAR_W4_SW(dst4_r, dst4_l, dst5_r, dst5_l, rnd_vec);
+    PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r, dst2_l, dst2_r, dst3_l, dst3_r,
+                tmp0, tmp1, tmp2, tmp3);
+    PCKEV_H2_SH(dst4_l, dst4_r, dst5_l, dst5_r, tmp4, tmp5);
+    ADD2(tmp0, offset_vec, tmp1, offset_vec, tmp0, tmp1);
+    ADD2(tmp2, offset_vec, tmp3, offset_vec, tmp2, tmp3);
+    ADD2(tmp4, offset_vec, tmp5, offset_vec, tmp4, tmp5);
+    CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+    CLIP_SH2_0_255_MAX_SATU(tmp4, tmp5);
+    PCKEV_B3_UB(tmp1, tmp0, tmp3, tmp2, tmp5, tmp4, out0, out1, out2);
+    ST8x4_UB(out0, out1, dst, dst_stride);
     dst += (4 * dst_stride);
-    ST8x2_UB(dst2_r, dst, dst_stride);
+    ST8x2_UB(out2, dst, dst_stride);
 }
 
 static void hevc_hv_uniwgt_4t_8multx4mult_msa(uint8_t *src,
@@ -4695,24 +4863,25 @@
                                               int32_t weight,
                                               int32_t offset,
                                               int32_t rnd_val,
-                                              int32_t width)
+                                              int32_t width8mult)
 {
     uint32_t loop_cnt, cnt;
     uint8_t *src_tmp;
     uint8_t *dst_tmp;
+    v16u8 out0, out1;
     v16i8 src0, src1, src2, src3, src4, src5, src6;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1, filter_vec;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
-    v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, tmp0, tmp1, tmp2, tmp3;
+    v8i16 dst10_r, dst32_r, dst54_r, dst21_r, dst43_r, dst65_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst21_l, dst43_l, dst65_l;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l;
-    v4i32 weight_vec, offset_vec, rnd_vec;
+    v8i16 offset_vec, const_128, denom_vec;
     v4i32 dst2_r, dst2_l, dst3_r, dst3_l;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
-    v8i16 dst10_l, dst32_l, dst21_l, dst43_l;
+    v4i32 weight_vec, rnd_vec;
 
     src -= (src_stride + 1);
 
@@ -4720,21 +4889,21 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
-    const_vec = __msa_ldi_h(128);
-    const_vec <<= 6;
-
     weight_vec = __msa_fill_w(weight);
-    offset_vec = __msa_fill_w(offset);
     rnd_vec = __msa_fill_w(rnd_val);
 
-    for (cnt = width >> 3; cnt--;) {
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val - 6);
+    const_128 = __msa_fill_h((128 * weight));
+    offset_vec += __msa_srar_h(const_128, denom_vec);
+
+    for (cnt = width8mult; cnt--;) {
         src_tmp = src;
         dst_tmp = dst;
 
@@ -4745,12 +4914,9 @@
         VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
         VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
         VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+        dst0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
 
         ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
         ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
@@ -4761,51 +4927,47 @@
             XORI_B4_128_SB(src3, src4, src5, src6);
 
             VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-            dst3 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+            VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+            VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+            VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
+            dst3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+            dst4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+            dst5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+            dst6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
             ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+            ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
+            ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+            ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
             dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
             dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-            dst0_r >>= 6;
-            dst0_l >>= 6;
-
-            VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-            dst4 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-            ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
             dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
             dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-            dst1_r >>= 6;
-            dst1_l >>= 6;
-
-            VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-            dst5 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-            ILVRL_H2_SH(dst5, dst4, dst10_r, dst10_l);
-            dst2_r = HEVC_FILT_4TAP(dst32_r, dst10_r, filt_h0, filt_h1);
-            dst2_l = HEVC_FILT_4TAP(dst32_l, dst10_l, filt_h0, filt_h1);
-            dst2_r >>= 6;
-            dst2_l >>= 6;
-
-            VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-            dst2 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-            ILVRL_H2_SH(dst2, dst5, dst21_r, dst21_l);
-            dst3_r = HEVC_FILT_4TAP(dst43_r, dst21_r, filt_h0, filt_h1);
-            dst3_l = HEVC_FILT_4TAP(dst43_l, dst21_l, filt_h0, filt_h1);
-            dst3_r >>= 6;
-            dst3_l >>= 6;
-
-            HEVC_HV_UNIW_RND_CLIP4(dst0_r, dst1_r, dst0_l, dst1_l,
-                                   weight_vec, offset_vec, rnd_vec,
-                                   dst0_r, dst1_r, dst0_l, dst1_l);
-            HEVC_HV_UNIW_RND_CLIP4(dst2_r, dst3_r, dst2_l, dst3_l,
-                                   weight_vec, offset_vec, rnd_vec,
-                                   dst2_r, dst3_r, dst2_l, dst3_l);
-            HEVC_PCK_SW_SB8(dst0_l, dst0_r, dst1_l, dst1_r,
-                            dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r);
-            ST8x4_UB(dst0_r, dst1_r, dst_tmp, dst_stride);
+            dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+            dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
+            dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+            dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
+            SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+            SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+            MUL2(dst0_r, weight_vec, dst1_r, weight_vec, dst0_r, dst1_r);
+            MUL2(dst2_r, weight_vec, dst3_r, weight_vec, dst2_r, dst3_r);
+            MUL2(dst0_l, weight_vec, dst1_l, weight_vec, dst0_l, dst1_l);
+            MUL2(dst2_l, weight_vec, dst3_l, weight_vec, dst2_l, dst3_l);
+            SRAR_W4_SW(dst0_r, dst0_l, dst1_r, dst1_l, rnd_vec);
+            SRAR_W4_SW(dst2_r, dst2_l, dst3_r, dst3_l, rnd_vec);
+            PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r, dst2_l, dst2_r, dst3_l,
+                        dst3_r, tmp0, tmp1, tmp2, tmp3);
+            ADD2(tmp0, offset_vec, tmp1, offset_vec, tmp0, tmp1);
+            ADD2(tmp2, offset_vec, tmp3, offset_vec, tmp2, tmp3);
+            CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+            PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+            ST8x4_UB(out0, out1, dst_tmp, dst_stride);
             dst_tmp += (4 * dst_stride);
+
+            dst10_r = dst54_r;
+            dst10_l = dst54_l;
+            dst21_r = dst65_r;
+            dst21_l = dst65_l;
+            dst2 = dst6;
         }
 
         src += 8;
@@ -4827,16 +4989,20 @@
 
     if (2 == height) {
         hevc_hv_uniwgt_4t_8x2_msa(src, src_stride, dst, dst_stride,
-                                  filter_x, filter_y, height, weight,
+                                  filter_x, filter_y, weight,
                                   offset, rnd_val);
+    } else if (4 == height) {
+        hevc_hv_uniwgt_4t_8multx4_msa(src, src_stride, dst, dst_stride,
+                                      filter_x, filter_y, 1, weight,
+                                      offset, rnd_val);
     } else if (6 == height) {
         hevc_hv_uniwgt_4t_8x6_msa(src, src_stride, dst, dst_stride,
-                                  filter_x, filter_y, height, weight,
+                                  filter_x, filter_y, weight,
                                   offset, rnd_val);
     } else if (0 == (height % 4)) {
         hevc_hv_uniwgt_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
                                           filter_x, filter_y, height, weight,
-                                          offset, rnd_val, 8);
+                                          offset, rnd_val, 1);
     }
 }
 
@@ -4851,12 +5017,170 @@
                                       int32_t offset,
                                       int32_t rnd_val)
 {
-    hevc_hv_uniwgt_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
-                                      filter_x, filter_y, height, weight,
-                                      offset, rnd_val, 8);
-    hevc_hv_uniwgt_4t_4w_msa(src + 8, src_stride, dst + 8, dst_stride,
-                             filter_x, filter_y, height, weight,
-                             offset, rnd_val);
+    uint32_t loop_cnt;
+    uint8_t *src_tmp, *dst_tmp;
+    v16u8 out0, out1;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v16i8 mask0, mask1, mask2, mask3;
+    v8i16 filt0, filt1, filt_h0, filt_h1, filter_vec, tmp0, tmp1, tmp2, tmp3;
+    v8i16 dsth0, dsth1, dsth2, dsth3, dsth4, dsth5, dsth6;
+    v8i16 dst10, dst21, dst22, dst73, dst84, dst95, dst106;
+    v8i16 dst76_r, dst98_r, dst87_r, dst109_r;
+    v8i16 dst10_r, dst32_r, dst54_r, dst21_r, dst43_r, dst65_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst21_l, dst43_l, dst65_l;
+    v8i16 offset_vec, const_128, denom_vec;
+    v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
+    v4i32 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, weight_vec, rnd_vec;
+
+    src -= (src_stride + 1);
+
+    filter_vec = LD_SH(filter_x);
+    SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
+
+    filter_vec = LD_SH(filter_y);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+
+    weight_vec = __msa_fill_w(weight);
+    rnd_vec = __msa_fill_w(rnd_val);
+
+    offset_vec = __msa_fill_h(offset);
+    denom_vec = __msa_fill_h(rnd_val - 6);
+    const_128 = __msa_fill_h((128 * weight));
+    offset_vec += __msa_srar_h(const_128, denom_vec);
+
+    src_tmp = src;
+    dst_tmp = dst;
+
+    LD_SB3(src_tmp, src_stride, src0, src1, src2);
+    src_tmp += (3 * src_stride);
+    XORI_B3_128_SB(src0, src1, src2);
+    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+    dsth0 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dsth1 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    dsth2 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+    ILVRL_H2_SH(dsth1, dsth0, dst10_r, dst10_l);
+    ILVRL_H2_SH(dsth2, dsth1, dst21_r, dst21_l);
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src_tmp, src_stride, src3, src4, src5, src6);
+        src_tmp += (4 * src_stride);
+        XORI_B4_128_SB(src3, src4, src5, src6);
+        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
+        dsth3 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dsth4 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dsth5 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dsth6 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+        ILVRL_H2_SH(dsth3, dsth2, dst32_r, dst32_l);
+        ILVRL_H2_SH(dsth4, dsth3, dst43_r, dst43_l);
+        ILVRL_H2_SH(dsth5, dsth4, dst54_r, dst54_l);
+        ILVRL_H2_SH(dsth6, dsth5, dst65_r, dst65_l);
+        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
+        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
+        dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
+        dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
+        SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+        SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+        MUL2(dst0_r, weight_vec, dst1_r, weight_vec, dst0_r, dst1_r);
+        MUL2(dst2_r, weight_vec, dst3_r, weight_vec, dst2_r, dst3_r);
+        MUL2(dst0_l, weight_vec, dst1_l, weight_vec, dst0_l, dst1_l);
+        MUL2(dst2_l, weight_vec, dst3_l, weight_vec, dst2_l, dst3_l);
+        SRAR_W4_SW(dst0_r, dst0_l, dst1_r, dst1_l, rnd_vec);
+        SRAR_W4_SW(dst2_r, dst2_l, dst3_r, dst3_l, rnd_vec);
+        PCKEV_H4_SH(dst0_l, dst0_r, dst1_l, dst1_r, dst2_l, dst2_r, dst3_l,
+                    dst3_r, tmp0, tmp1, tmp2, tmp3);
+        ADD2(tmp0, offset_vec, tmp1, offset_vec, tmp0, tmp1);
+        ADD2(tmp2, offset_vec, tmp3, offset_vec, tmp2, tmp3);
+        CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+        PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST8x4_UB(out0, out1, dst_tmp, dst_stride);
+        dst_tmp += (4 * dst_stride);
+
+        dst10_r = dst54_r;
+        dst10_l = dst54_l;
+        dst21_r = dst65_r;
+        dst21_l = dst65_l;
+        dsth2 = dsth6;
+    }
+
+    src += 8;
+    dst += 8;
+
+    mask2 = LD_SB(ff_hevc_mask_arr + 16);
+    mask3 = mask2 + 2;
+
+    LD_SB3(src, src_stride, src0, src1, src2);
+    src += (3 * src_stride);
+    XORI_B3_128_SB(src0, src1, src2);
+    VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
+    VSHF_B2_SB(src1, src2, src1, src2, mask2, mask3, vec2, vec3);
+    dst10 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+    dst21 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+    ILVRL_H2_SH(dst21, dst10, dst10_r, dst21_r);
+    dst22 = (v8i16) __msa_splati_d((v2i64) dst21, 1);
+
+    for (loop_cnt = 2; loop_cnt--;) {
+        LD_SB8(src, src_stride, src3, src4, src5, src6, src7, src8, src9,
+               src10);
+        src += (8 * src_stride);
+        XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
+        VSHF_B2_SB(src3, src7, src3, src7, mask2, mask3, vec0, vec1);
+        VSHF_B2_SB(src4, src8, src4, src8, mask2, mask3, vec2, vec3);
+        VSHF_B2_SB(src5, src9, src5, src9, mask2, mask3, vec4, vec5);
+        VSHF_B2_SB(src6, src10, src6, src10, mask2, mask3, vec6, vec7);
+        dst73 = HEVC_FILT_4TAP_SH(vec0, vec1, filt0, filt1);
+        dst84 = HEVC_FILT_4TAP_SH(vec2, vec3, filt0, filt1);
+        dst95 = HEVC_FILT_4TAP_SH(vec4, vec5, filt0, filt1);
+        dst106 = HEVC_FILT_4TAP_SH(vec6, vec7, filt0, filt1);
+        dst32_r = __msa_ilvr_h(dst73, dst22);
+        ILVRL_H2_SH(dst84, dst73, dst43_r, dst87_r);
+        ILVRL_H2_SH(dst95, dst84, dst54_r, dst98_r);
+        ILVRL_H2_SH(dst106, dst95, dst65_r, dst109_r);
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst73, 1);
+        dst76_r = __msa_ilvr_h(dst22, dst106);
+        dst0 = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst1 = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst2 = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst3 = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst4 = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
+        dst5 = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
+        dst6 = HEVC_FILT_4TAP(dst76_r, dst98_r, filt_h0, filt_h1);
+        dst7 = HEVC_FILT_4TAP(dst87_r, dst109_r, filt_h0, filt_h1);
+        SRA_4V(dst0, dst1, dst2, dst3, 6);
+        SRA_4V(dst4, dst5, dst6, dst7, 6);
+        MUL2(dst0, weight_vec, dst1, weight_vec, dst0, dst1);
+        MUL2(dst2, weight_vec, dst3, weight_vec, dst2, dst3);
+        MUL2(dst4, weight_vec, dst5, weight_vec, dst4, dst5);
+        MUL2(dst6, weight_vec, dst7, weight_vec, dst6, dst7);
+        SRAR_W4_SW(dst0, dst1, dst2, dst3, rnd_vec);
+        SRAR_W4_SW(dst4, dst5, dst6, dst7, rnd_vec);
+        PCKEV_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, tmp0, tmp1,
+                    tmp2, tmp3);
+        ADD2(tmp0, offset_vec, tmp1, offset_vec, tmp0, tmp1);
+        ADD2(tmp2, offset_vec, tmp3, offset_vec, tmp2, tmp3);
+        CLIP_SH4_0_255_MAX_SATU(tmp0, tmp1, tmp2, tmp3);
+        PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST4x8_UB(out0, out1, dst, dst_stride);
+        dst += (8 * dst_stride);
+
+        dst10_r = dst98_r;
+        dst21_r = dst109_r;
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst106, 1);
+    }
 }
 
 static void hevc_hv_uniwgt_4t_16w_msa(uint8_t *src,
@@ -4870,9 +5194,15 @@
                                       int32_t offset,
                                       int32_t rnd_val)
 {
-    hevc_hv_uniwgt_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
-                                      filter_x, filter_y, height, weight,
-                                      offset, rnd_val, 16);
+    if (4 == height) {
+        hevc_hv_uniwgt_4t_8multx4_msa(src, src_stride, dst, dst_stride,
+                                      filter_x, filter_y, 2, weight, offset,
+                                      rnd_val);
+    } else {
+        hevc_hv_uniwgt_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
+                                          filter_x, filter_y, height, weight,
+                                          offset, rnd_val, 2);
+    }
 }
 
 static void hevc_hv_uniwgt_4t_24w_msa(uint8_t *src,
@@ -4888,7 +5218,7 @@
 {
     hevc_hv_uniwgt_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
                                       filter_x, filter_y, height, weight,
-                                      offset, rnd_val, 24);
+                                      offset, rnd_val, 3);
 }
 
 static void hevc_hv_uniwgt_4t_32w_msa(uint8_t *src,
@@ -4904,7 +5234,7 @@
 {
     hevc_hv_uniwgt_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
                                       filter_x, filter_y, height, weight,
-                                      offset, rnd_val, 32);
+                                      offset, rnd_val, 4);
 }
 
 #define UNIWGT_MC_COPY(WIDTH)                                                \
@@ -4939,18 +5269,18 @@
 
 #define UNI_W_MC(PEL, DIR, WIDTH, TAP, DIR1, FILT_DIR)                        \
 void ff_hevc_put_hevc_uni_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,        \
-                                                           ptrdiff_t          \
-                                                           dst_stride,        \
-                                                           uint8_t *src,      \
-                                                           ptrdiff_t          \
-                                                           src_stride,        \
-                                                           int height,        \
-                                                           int denom,         \
-                                                           int weight,        \
-                                                           int offset,        \
-                                                           intptr_t mx,       \
-                                                           intptr_t my,       \
-                                                           int width)         \
+                                                         ptrdiff_t            \
+                                                         dst_stride,          \
+                                                         uint8_t *src,        \
+                                                         ptrdiff_t            \
+                                                         src_stride,          \
+                                                         int height,          \
+                                                         int denom,           \
+                                                         int weight,          \
+                                                         int offset,          \
+                                                         intptr_t mx,         \
+                                                         intptr_t my,         \
+                                                         int width)           \
 {                                                                             \
     const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];             \
     int shift = denom + 14 - 8;                                               \
@@ -4996,46 +5326,43 @@
 
 #undef UNI_W_MC
 
-#define UNI_W_MC_HV(PEL, DIR, WIDTH, TAP, DIR1)                              \
-void ff_hevc_put_hevc_uni_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,       \
-                                                           ptrdiff_t         \
-                                                           dst_stride,       \
-                                                           uint8_t *src,     \
-                                                           ptrdiff_t         \
-                                                           src_stride,       \
-                                                           int height,       \
-                                                           int denom,        \
-                                                           int weight,       \
-                                                           int offset,       \
-                                                           intptr_t mx,      \
-                                                           intptr_t my,      \
-                                                           int width)        \
-{                                                                            \
-    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];                \
-    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];                \
-    int shift = denom + 14 - 8;                                              \
-                                                                             \
-    hevc_##DIR1##_uniwgt_##TAP##t_##WIDTH##w_msa(src, src_stride, dst,       \
-                                                 dst_stride, filter_x,       \
-                                                 filter_y,  height, weight,  \
-                                                 offset, shift);             \
+#define UNI_W_MC_HV(PEL, WIDTH, TAP)                                          \
+void ff_hevc_put_hevc_uni_w_##PEL##_hv##WIDTH##_8_msa(uint8_t *dst,           \
+                                                      ptrdiff_t dst_stride,   \
+                                                      uint8_t *src,           \
+                                                      ptrdiff_t src_stride,   \
+                                                      int height,             \
+                                                      int denom,              \
+                                                      int weight,             \
+                                                      int offset,             \
+                                                      intptr_t mx,            \
+                                                      intptr_t my,            \
+                                                      int width)              \
+{                                                                             \
+    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];                 \
+    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];                 \
+    int shift = denom + 14 - 8;                                               \
+                                                                              \
+    hevc_hv_uniwgt_##TAP##t_##WIDTH##w_msa(src, src_stride, dst, dst_stride,  \
+                                           filter_x, filter_y,  height,       \
+                                           weight, offset, shift);            \
 }
 
-UNI_W_MC_HV(qpel, hv, 4, 8, hv);
-UNI_W_MC_HV(qpel, hv, 8, 8, hv);
-UNI_W_MC_HV(qpel, hv, 12, 8, hv);
-UNI_W_MC_HV(qpel, hv, 16, 8, hv);
-UNI_W_MC_HV(qpel, hv, 24, 8, hv);
-UNI_W_MC_HV(qpel, hv, 32, 8, hv);
-UNI_W_MC_HV(qpel, hv, 48, 8, hv);
-UNI_W_MC_HV(qpel, hv, 64, 8, hv);
+UNI_W_MC_HV(qpel, 4, 8);
+UNI_W_MC_HV(qpel, 8, 8);
+UNI_W_MC_HV(qpel, 12, 8);
+UNI_W_MC_HV(qpel, 16, 8);
+UNI_W_MC_HV(qpel, 24, 8);
+UNI_W_MC_HV(qpel, 32, 8);
+UNI_W_MC_HV(qpel, 48, 8);
+UNI_W_MC_HV(qpel, 64, 8);
 
-UNI_W_MC_HV(epel, hv, 4, 4, hv);
-UNI_W_MC_HV(epel, hv, 6, 4, hv);
-UNI_W_MC_HV(epel, hv, 8, 4, hv);
-UNI_W_MC_HV(epel, hv, 12, 4, hv);
-UNI_W_MC_HV(epel, hv, 16, 4, hv);
-UNI_W_MC_HV(epel, hv, 24, 4, hv);
-UNI_W_MC_HV(epel, hv, 32, 4, hv);
+UNI_W_MC_HV(epel, 4, 4);
+UNI_W_MC_HV(epel, 6, 4);
+UNI_W_MC_HV(epel, 8, 4);
+UNI_W_MC_HV(epel, 12, 4);
+UNI_W_MC_HV(epel, 16, 4);
+UNI_W_MC_HV(epel, 24, 4);
+UNI_W_MC_HV(epel, 32, 4);
 
 #undef UNI_W_MC_HV

diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c
index 73cc3ea..81db62b 100644
--- a/libavcodec/mips/hevcdsp_msa.c
+++ b/libavcodec/mips/hevcdsp_msa.c

@@ -22,6 +22,13 @@
 #include "libavcodec/mips/hevcdsp_mips.h"
 #include "libavcodec/mips/hevc_macros_msa.h"
 
+static const uint8_t ff_hevc_mask_arr[16 * 2] __attribute__((aligned(0x40))) = {
+    /* 8 width cases */
+    0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+    /* 4 width cases */
+    0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20
+};
+
 static void hevc_copy_4w_msa(uint8_t *src, int32_t src_stride,
                              int16_t *dst, int32_t dst_stride,
                              int32_t height)
@@ -449,7 +456,7 @@
     v16i8 vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1, dst2, dst3;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
 
     src -= 3;
     const_vec = __msa_ldi_h(128);
@@ -504,7 +511,7 @@
     v16i8 vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1, dst2, dst3;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
 
     src -= 3;
     const_vec = __msa_ldi_h(128);
@@ -552,8 +559,75 @@
                                int16_t *dst, int32_t dst_stride,
                                const int8_t *filter, int32_t height)
 {
-    hevc_hz_8t_8w_msa(src, src_stride, dst, dst_stride, filter, height);
-    hevc_hz_8t_4w_msa(src + 8, src_stride, dst + 8, dst_stride, filter, height);
+    uint32_t loop_cnt;
+    int64_t res0, res1, res2, res3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16i8 mask0, mask1, mask2, mask3, mask4, mask5, mask6, mask7;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v8i16 filt0, filt1, filt2, filt3, dst0, dst1, dst2, dst3, dst4, dst5;
+    v8i16 filter_vec, const_vec;
+
+    src -= 3;
+    const_vec = __msa_ldi_h(128);
+    const_vec <<= 6;
+
+    filter_vec = LD_SH(filter);
+    SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+    mask4 = LD_SB(ff_hevc_mask_arr + 16);
+    mask5 = mask4 + 2;
+    mask6 = mask4 + 4;
+    mask7 = mask4 + 6;
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        LD_SB4(src + 8, src_stride, src4, src5, src6, src7);
+        src += (4 * src_stride);
+        XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
+
+        dst0 = const_vec;
+        dst1 = const_vec;
+        dst2 = const_vec;
+        dst3 = const_vec;
+        dst4 = const_vec;
+        dst5 = const_vec;
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+        VSHF_B2_SB(src4, src5, src6, src7, mask4, mask4, vec4, vec5);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        DPADD_SB2_SH(vec4, vec5, filt0, filt0, dst4, dst5);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2, vec3);
+        VSHF_B2_SB(src4, src5, src6, src7, mask5, mask5, vec4, vec5);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
+        DPADD_SB2_SH(vec4, vec5, filt1, filt1, dst4, dst5);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2, vec3);
+        VSHF_B2_SB(src4, src5, src6, src7, mask6, mask6, vec4, vec5);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt2, filt2, filt2, dst0,
+                     dst1, dst2, dst3);
+        DPADD_SB2_SH(vec4, vec5, filt2, filt2, dst4, dst5);
+        VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec2, vec3);
+        VSHF_B2_SB(src4, src5, src6, src7, mask7, mask7, vec4, vec5);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt3, filt3, filt3, filt3, dst0,
+                     dst1, dst2, dst3);
+        DPADD_SB2_SH(vec4, vec5, filt3, filt3, dst4, dst5);
+
+        res0 = __msa_copy_s_d((v2i64) dst4, 0);
+        res1 = __msa_copy_s_d((v2i64) dst4, 1);
+        res2 = __msa_copy_s_d((v2i64) dst5, 0);
+        res3 = __msa_copy_s_d((v2i64) dst5, 1);
+        ST_SH4(dst0, dst1, dst2, dst3, dst, dst_stride);
+        SD4(res0, res1, res2, res3, (dst + 8), dst_stride);
+        dst += (4 * dst_stride);
+    }
 }
 
 static void hevc_hz_8t_16w_msa(uint8_t *src, int32_t src_stride,
@@ -561,13 +635,13 @@
                                const int8_t *filter, int32_t height)
 {
     uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16i8 src0, src1, src2, src3;
     v8i16 filt0, filt1, filt2, filt3;
     v16i8 mask1, mask2, mask3;
     v16i8 vec0, vec1, vec2, vec3;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 dst0, dst1, dst2, dst3;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
 
     src -= 3;
     const_vec = __msa_ldi_h(128);
@@ -580,56 +654,36 @@
     mask2 = mask0 + 4;
     mask3 = mask0 + 6;
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB4(src, src_stride, src0, src2, src4, src6);
-        LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
-        src += (4 * src_stride);
-        XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        LD_SB2(src, src_stride, src0, src2);
+        LD_SB2(src + 8, src_stride, src1, src3);
+        src += (2 * src_stride);
+        XORI_B4_128_SB(src0, src1, src2, src3);
 
-        VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
-        VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst1 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
-        VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst2 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
-        VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst3 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
-        VSHF_B4_SB(src4, src4, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
-        dst4 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst4, dst4, dst4, dst4);
-        VSHF_B4_SB(src5, src5, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
-        dst5 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst5, dst5, dst5, dst5);
-        VSHF_B4_SB(src6, src6, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
-        dst6 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst6, dst6, dst6, dst6);
-        VSHF_B4_SB(src7, src7, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
-        dst7 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst7, dst7, dst7, dst7);
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt2, filt2, filt2, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt3, filt3, filt3, filt3, dst0,
+                     dst1, dst2, dst3);
 
-        ST_SH4(dst0, dst2, dst4, dst6, dst, dst_stride);
-        ST_SH4(dst1, dst3, dst5, dst7, dst + 8, dst_stride);
-        dst += (4 * dst_stride);
+        ST_SH2(dst0, dst2, dst, dst_stride);
+        ST_SH2(dst1, dst3, dst + 8, dst_stride);
+        dst += (2 * dst_stride);
     }
 }
 
@@ -641,10 +695,10 @@
     v16i8 src0, src1, src2, src3;
     v8i16 filt0, filt1, filt2, filt3;
     v16i8 mask1, mask2, mask3, mask4, mask5, mask6, mask7;
-    v16i8 vec0, vec1, vec2, vec3;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
 
     src -= 3;
     filter_vec = LD_SH(filter);
@@ -668,36 +722,36 @@
         src += src_stride;
         XORI_B4_128_SB(src0, src1, src2, src3);
 
-        VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
-        VSHF_B4_SB(src0, src1, mask4, mask5, mask6, mask7,
-                   vec0, vec1, vec2, vec3);
         dst1 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
-        VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst2 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
-        VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst3 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
-        VSHF_B4_SB(src2, src3, mask4, mask5, mask6, mask7,
-                   vec0, vec1, vec2, vec3);
         dst4 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst4, dst4, dst4, dst4);
-        VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst5 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst5, dst5, dst5, dst5);
+        VSHF_B2_SB(src0, src0, src0, src1, mask0, mask4, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask0, mask0, vec2, vec3);
+        VSHF_B2_SB(src2, src3, src3, src3, mask4, mask0, vec4, vec5);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        DPADD_SB2_SH(vec4, vec5, filt0, filt0, dst4, dst5);
+        VSHF_B2_SB(src0, src0, src0, src1, mask1, mask5, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask1, mask1, vec2, vec3);
+        VSHF_B2_SB(src2, src3, src3, src3, mask5, mask1, vec4, vec5);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
+        DPADD_SB2_SH(vec4, vec5, filt1, filt1, dst4, dst5);
+        VSHF_B2_SB(src0, src0, src0, src1, mask2, mask6, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask2, mask2, vec2, vec3);
+        VSHF_B2_SB(src2, src3, src3, src3, mask6, mask2, vec4, vec5);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt2, filt2, filt2, dst0,
+                     dst1, dst2, dst3);
+        DPADD_SB2_SH(vec4, vec5, filt2, filt2, dst4, dst5);
+        VSHF_B2_SB(src0, src0, src0, src1, mask3, mask7, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask3, mask3, vec2, vec3);
+        VSHF_B2_SB(src2, src3, src3, src3, mask7, mask3, vec4, vec5);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt3, filt3, filt3, filt3, dst0,
+                     dst1, dst2, dst3);
+        DPADD_SB2_SH(vec4, vec5, filt3, filt3, dst4, dst5);
 
         ST_SH2(dst0, dst1, dst, 8);
         ST_SH(dst2, dst + 16);
@@ -719,7 +773,7 @@
     v16i8 vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1, dst2, dst3;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
 
     src -= 3;
     filter_vec = LD_SH(filter);
@@ -776,10 +830,10 @@
     v16i8 src0, src1, src2, src3;
     v8i16 filt0, filt1, filt2, filt3;
     v16i8 mask1, mask2, mask3, mask4, mask5, mask6, mask7;
-    v16i8 vec0, vec1, vec2, vec3;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
 
     src -= 3;
     filter_vec = LD_SH(filter);
@@ -802,38 +856,39 @@
         src += src_stride;
         XORI_B4_128_SB(src0, src1, src2, src3);
 
-        VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst0 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst0, dst0, dst0, dst0);
-        VSHF_B4_SB(src0, src1, mask4, mask5, mask6, mask7,
-                   vec0, vec1, vec2, vec3);
         dst1 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst1, dst1, dst1, dst1);
-        VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst2 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst2, dst2, dst2, dst2);
-        VSHF_B4_SB(src1, src2, mask4, mask5, mask6, mask7,
-                   vec0, vec1, vec2, vec3);
         dst3 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst3, dst3, dst3, dst3);
-        VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst4 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst4, dst4, dst4, dst4);
-        VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3,
-                   vec0, vec1, vec2, vec3);
         dst5 = const_vec;
-        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst5, dst5, dst5, dst5);
+        VSHF_B2_SB(src0, src0, src0, src1, mask0, mask4, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src1, src2, mask0, mask4, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask1, mask5, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src1, src2, mask1, mask5, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask2, mask6, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src1, src2, mask2, mask6, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt2, filt2, filt2, filt2, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask3, mask7, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src1, src2, mask3, mask7, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt3, filt3, filt3, filt3, dst0,
+                     dst1, dst2, dst3);
+        ST_SH4(dst0, dst1, dst2, dst3, dst, 8);
 
-        ST_SH6(dst0, dst1, dst2, dst3, dst4, dst5, dst, 8);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec4, vec5);
+        DPADD_SB2_SH(vec4, vec5, filt0, filt0, dst4, dst5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec4, vec5);
+        DPADD_SB2_SH(vec4, vec5, filt1, filt1, dst4, dst5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec4, vec5);
+        DPADD_SB2_SH(vec4, vec5, filt2, filt2, dst4, dst5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec4, vec5);
+        DPADD_SB2_SH(vec4, vec5, filt3, filt3, dst4, dst5);
+        ST_SH2(dst4, dst5, (dst + 32), 8);
         dst += dst_stride;
     }
 }
@@ -849,7 +904,7 @@
     v16i8 vec0, vec1, vec2, vec3;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
 
     src -= 3;
 
@@ -1308,31 +1363,28 @@
                               int32_t height)
 {
     uint32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    int32_t dst_stride_in_bytes = 2 * dst_stride;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v8i16 filt0, filt1, filt2, filt3;
-    v4i32 filt_h0, filt_h1, filt_h2, filt_h3;
+    v8i16 filt_h0, filt_h1, filt_h2, filt_h3;
     v16i8 mask1, mask2, mask3;
     v8i16 filter_vec, const_vec;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
-    v8i16 dst30, dst41, dst52, dst63, dst66, dst87;
-    v4i32 dst0_r, dst1_r;
-    v8i16 dst10_r, dst32_r, dst54_r, dst76_r;
-    v8i16 dst21_r, dst43_r, dst65_r, dst87_r;
-    v16i8 mask0 = {
-        0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20
-    };
-    v8u16 mask4 = { 0, 4, 1, 5, 2, 6, 3, 7 };
+    v8i16 dst30, dst41, dst52, dst63, dst66, dst97, dst108;
+    v4i32 dst0_r, dst1_r, dst2_r, dst3_r;
+    v8i16 dst10_r, dst32_r, dst54_r, dst76_r, dst98_r;
+    v8i16 dst21_r, dst43_r, dst65_r, dst87_r, dst109_r;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
 
     src -= ((3 * src_stride) + 3);
     filter_vec = LD_SH(filter_x);
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W4_SW(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
+    SPLATI_W4_SH(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
 
     mask1 = mask0 + 2;
     mask2 = mask0 + 4;
@@ -1364,47 +1416,56 @@
     DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2, filt3,
                  dst63, dst63, dst63, dst63);
 
-    ILVR_H3_SH(dst41, dst30, dst52, dst41, dst63, dst52,
-               dst10_r, dst21_r, dst32_r);
-    dst43_r = __msa_ilvl_h(dst41, dst30);
-    dst54_r = __msa_ilvl_h(dst52, dst41);
-    dst65_r = __msa_ilvl_h(dst63, dst52);
+    ILVRL_H2_SH(dst41, dst30, dst10_r, dst43_r);
+    ILVRL_H2_SH(dst52, dst41, dst21_r, dst54_r);
+    ILVRL_H2_SH(dst63, dst52, dst32_r, dst65_r);
     dst66 = (v8i16) __msa_splati_d((v2i64) dst63, 1);
 
-    for (loop_cnt = height >> 1; loop_cnt--;) {
-        LD_SB2(src, src_stride, src7, src8);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src7, src8);
+    for (loop_cnt = height >> 2; loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        src += (4 * src_stride);
+        XORI_B4_128_SB(src7, src8, src9, src10);
 
-        VSHF_B4_SB(src7, src8, mask0, mask1, mask2, mask3,
+        VSHF_B4_SB(src7, src9, mask0, mask1, mask2, mask3,
                    vec0, vec1, vec2, vec3);
-        dst87 = const_vec;
+        VSHF_B4_SB(src8, src10, mask0, mask1, mask2, mask3,
+                   vec4, vec5, vec6, vec7);
+        dst97 = const_vec;
+        dst108 = const_vec;
         DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                     dst87, dst87, dst87, dst87);
-        dst76_r = __msa_ilvr_h(dst87, dst66);
+                     dst97, dst97, dst97, dst97);
+        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3,
+                     dst108, dst108, dst108, dst108);
+
+        dst76_r = __msa_ilvr_h(dst97, dst66);
+        ILVRL_H2_SH(dst108, dst97, dst87_r, dst109_r);
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst97, 1);
+        dst98_r = __msa_ilvr_h(dst66, dst108);
+
         dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r,
                                 filt_h0, filt_h1, filt_h2, filt_h3);
-        dst87_r = __msa_vshf_h((v8i16) mask4, dst87, dst87);
         dst1_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r,
                                 filt_h0, filt_h1, filt_h2, filt_h3);
-        dst0_r >>= 6;
-        dst1_r >>= 6;
+        dst2_r = HEVC_FILT_8TAP(dst32_r, dst54_r, dst76_r, dst98_r,
+                                filt_h0, filt_h1, filt_h2, filt_h3);
+        dst3_r = HEVC_FILT_8TAP(dst43_r, dst65_r, dst87_r, dst109_r,
+                                filt_h0, filt_h1, filt_h2, filt_h3);
+        SRA_4V(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+        PCKEV_H2_SW(dst1_r, dst0_r, dst3_r, dst2_r, dst0_r, dst2_r);
+        ST8x4_UB(dst0_r, dst2_r, dst, dst_stride_in_bytes);
+        dst += (4 * dst_stride);
 
-        dst0_r = (v4i32) __msa_pckev_h((v8i16) dst1_r, (v8i16) dst0_r);
-        ST8x2_UB(dst0_r, dst, (2 * dst_stride));
-        dst += (2 * dst_stride);
-
-        dst10_r = dst32_r;
-        dst32_r = dst54_r;
-        dst54_r = dst76_r;
-        dst21_r = dst43_r;
-        dst43_r = dst65_r;
-        dst65_r = dst87_r;
-        dst66 = (v8i16) __msa_splati_d((v2i64) dst87, 1);
+        dst10_r = dst54_r;
+        dst32_r = dst76_r;
+        dst54_r = dst98_r;
+        dst21_r = dst65_r;
+        dst43_r = dst87_r;
+        dst65_r = dst109_r;
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst108, 1);
     }
 }
 
-static void hevc_hv_8t_8multx2mult_msa(uint8_t *src,
+static void hevc_hv_8t_8multx1mult_msa(uint8_t *src,
                                        int32_t src_stride,
                                        int16_t *dst,
                                        int32_t dst_stride,
@@ -1415,19 +1476,17 @@
     uint32_t loop_cnt, cnt;
     uint8_t *src_tmp;
     int16_t *dst_tmp;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
     v8i16 filt0, filt1, filt2, filt3;
-    v4i32 filt_h0, filt_h1, filt_h2, filt_h3;
+    v8i16 filt_h0, filt_h1, filt_h2, filt_h3;
     v16i8 mask1, mask2, mask3;
     v8i16 filter_vec, const_vec;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
     v4i32 dst0_r, dst0_l;
     v8i16 dst10_r, dst32_r, dst54_r, dst76_r;
     v8i16 dst10_l, dst32_l, dst54_l, dst76_l;
-    v8i16 dst21_r, dst43_r, dst65_r, dst87_r;
-    v8i16 dst21_l, dst43_l, dst65_l, dst87_l;
     v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
 
     src -= ((3 * src_stride) + 3);
@@ -1435,10 +1494,9 @@
     SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W4_SW(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
+    SPLATI_W4_SH(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
 
     mask1 = mask0 + 2;
     mask2 = mask0 + 4;
@@ -1494,17 +1552,10 @@
         DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3,
                      dst6, dst6, dst6, dst6);
 
-        ILVR_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst2, dst1,
-                   dst10_r, dst32_r, dst54_r, dst21_r);
-        ILVR_H2_SH(dst4, dst3, dst6, dst5, dst43_r, dst65_r);
-        ILVL_H4_SH(dst1, dst0, dst3, dst2, dst5, dst4, dst2, dst1,
-                   dst10_l, dst32_l, dst54_l, dst21_l);
-        ILVL_H2_SH(dst4, dst3, dst6, dst5, dst43_l, dst65_l);
-
-        for (loop_cnt = height >> 1; loop_cnt--;) {
-            LD_SB2(src_tmp, src_stride, src7, src8);
-            XORI_B2_128_SB(src7, src8);
-            src_tmp += 2 * src_stride;
+        for (loop_cnt = height; loop_cnt--;) {
+            src7 = LD_SB(src_tmp);
+            src7 = (v16i8) __msa_xori_b((v16u8) src7, 128);
+            src_tmp += src_stride;
 
             VSHF_B4_SB(src7, src7, mask0, mask1, mask2, mask3,
                        vec0, vec1, vec2, vec3);
@@ -1512,6 +1563,9 @@
             DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
                          dst7, dst7, dst7, dst7);
 
+            ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
+            ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+            ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
             ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
             dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r,
                                     filt_h0, filt_h1, filt_h2, filt_h3);
@@ -1524,37 +1578,13 @@
             ST_SW(dst0_r, dst_tmp);
             dst_tmp += dst_stride;
 
-            VSHF_B4_SB(src8, src8, mask0, mask1, mask2, mask3,
-                       vec0, vec1, vec2, vec3);
-            dst8 = const_vec;
-            DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3,
-                         dst8, dst8, dst8, dst8);
-
-            ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l);
-            dst6 = dst8;
-            dst0_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r,
-                                    filt_h0, filt_h1, filt_h2, filt_h3);
-            dst0_l = HEVC_FILT_8TAP(dst21_l, dst43_l, dst65_l, dst87_l,
-                                    filt_h0, filt_h1, filt_h2, filt_h3);
-            dst0_r >>= 6;
-            dst0_l >>= 6;
-
-            dst0_r = (v4i32) __msa_pckev_h((v8i16) dst0_l, (v8i16) dst0_r);
-            ST_SW(dst0_r, dst_tmp);
-            dst_tmp += dst_stride;
-
-            dst10_r = dst32_r;
-            dst32_r = dst54_r;
-            dst54_r = dst76_r;
-            dst10_l = dst32_l;
-            dst32_l = dst54_l;
-            dst54_l = dst76_l;
-            dst21_r = dst43_r;
-            dst43_r = dst65_r;
-            dst65_r = dst87_r;
-            dst21_l = dst43_l;
-            dst43_l = dst65_l;
-            dst65_l = dst87_l;
+            dst0 = dst1;
+            dst1 = dst2;
+            dst2 = dst3;
+            dst3 = dst4;
+            dst4 = dst5;
+            dst5 = dst6;
+            dst6 = dst7;
         }
 
         src += 8;
@@ -1567,7 +1597,7 @@
                               const int8_t *filter_x, const int8_t *filter_y,
                               int32_t height)
 {
-    hevc_hv_8t_8multx2mult_msa(src, src_stride, dst, dst_stride,
+    hevc_hv_8t_8multx1mult_msa(src, src_stride, dst, dst_stride,
                                filter_x, filter_y, height, 8);
 }
 
@@ -1576,11 +1606,195 @@
                                const int8_t *filter_x, const int8_t *filter_y,
                                int32_t height)
 {
-    hevc_hv_8t_8multx2mult_msa(src, src_stride, dst, dst_stride,
-                               filter_x, filter_y, height, 8);
+    uint32_t loop_cnt;
+    int32_t dst_stride_in_bytes = 2 * dst_stride;
+    uint8_t *src_tmp;
+    int16_t *dst_tmp;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 mask0, mask1, mask2, mask3, mask4, mask5, mask6, mask7;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
+    v8i16 filt0, filt1, filt2, filt3, filt_h0, filt_h1, filt_h2, filt_h3;
+    v8i16 filter_vec, const_vec;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 dst30, dst41, dst52, dst63, dst66, dst97, dst108;
+    v8i16 dst10_r, dst32_r, dst54_r, dst76_r, dst98_r, dst21_r, dst43_r;
+    v8i16 dst65_r, dst87_r, dst109_r, dst10_l, dst32_l, dst54_l, dst76_l;
+    v4i32 dst0_r, dst0_l, dst1_r, dst2_r, dst3_r;
 
-    hevc_hv_8t_4w_msa(src + 8, src_stride, dst + 8, dst_stride,
-                      filter_x, filter_y, height);
+    src -= ((3 * src_stride) + 3);
+    filter_vec = LD_SH(filter_x);
+    SPLATI_H4_SH(filter_vec, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    filter_vec = LD_SH(filter_y);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W4_SH(filter_vec, filt_h0, filt_h1, filt_h2, filt_h3);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    const_vec = __msa_ldi_h(128);
+    const_vec <<= 6;
+
+    src_tmp = src;
+    dst_tmp = dst;
+
+    LD_SB7(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    src_tmp += (7 * src_stride);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+
+    /* row 0 row 1 row 2 row 3 */
+    VSHF_B4_SB(src0, src0, mask0, mask1, mask2, mask3, vec0, vec1, vec2, vec3);
+    VSHF_B4_SB(src1, src1, mask0, mask1, mask2, mask3, vec4, vec5, vec6, vec7);
+    VSHF_B4_SB(src2, src2, mask0, mask1, mask2, mask3, vec8, vec9, vec10,
+               vec11);
+    VSHF_B4_SB(src3, src3, mask0, mask1, mask2, mask3, vec12, vec13, vec14,
+               vec15);
+    dst0 = const_vec;
+    DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3, dst0, dst0,
+                 dst0, dst0);
+    dst1 = const_vec;
+    DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3, dst1, dst1,
+                 dst1, dst1);
+    dst2 = const_vec;
+    DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3, dst2,
+                 dst2, dst2, dst2);
+    dst3 = const_vec;
+    DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2, filt3, dst3,
+                 dst3, dst3, dst3);
+
+    /* row 4 row 5 row 6 */
+    VSHF_B4_SB(src4, src4, mask0, mask1, mask2, mask3, vec0, vec1, vec2, vec3);
+    VSHF_B4_SB(src5, src5, mask0, mask1, mask2, mask3, vec4, vec5, vec6, vec7);
+    VSHF_B4_SB(src6, src6, mask0, mask1, mask2, mask3, vec8, vec9, vec10,
+               vec11);
+    dst4 = const_vec;
+    DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3, dst4, dst4,
+                 dst4, dst4);
+    dst5 = const_vec;
+    DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3, dst5, dst5,
+                 dst5, dst5);
+    dst6 = const_vec;
+    DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3, dst6,
+                 dst6, dst6, dst6);
+
+    for (loop_cnt = height; loop_cnt--;) {
+        src7 = LD_SB(src_tmp);
+        src7 = (v16i8) __msa_xori_b((v16u8) src7, 128);
+        src_tmp += src_stride;
+
+        VSHF_B4_SB(src7, src7, mask0, mask1, mask2, mask3, vec0, vec1, vec2,
+                   vec3);
+        dst7 = const_vec;
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3, dst7,
+                     dst7, dst7, dst7);
+
+        ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
+        ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+        ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+        ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
+        dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst0_l = HEVC_FILT_8TAP(dst10_l, dst32_l, dst54_l, dst76_l, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst0_r >>= 6;
+        dst0_l >>= 6;
+
+        dst0_r = (v4i32) __msa_pckev_h((v8i16) dst0_l, (v8i16) dst0_r);
+        ST_SW(dst0_r, dst_tmp);
+        dst_tmp += dst_stride;
+
+        dst0 = dst1;
+        dst1 = dst2;
+        dst2 = dst3;
+        dst3 = dst4;
+        dst4 = dst5;
+        dst5 = dst6;
+        dst6 = dst7;
+    }
+
+    src += 8;
+    dst += 8;
+
+    mask4 = LD_SB(ff_hevc_mask_arr + 16);
+    mask5 = mask4 + 2;
+    mask6 = mask4 + 4;
+    mask7 = mask4 + 6;
+
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    src += (7 * src_stride);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+
+    VSHF_B4_SB(src0, src3, mask4, mask5, mask6, mask7, vec0, vec1, vec2, vec3);
+    VSHF_B4_SB(src1, src4, mask4, mask5, mask6, mask7, vec4, vec5, vec6, vec7);
+    VSHF_B4_SB(src2, src5, mask4, mask5, mask6, mask7, vec8, vec9, vec10,
+               vec11);
+    VSHF_B4_SB(src3, src6, mask4, mask5, mask6, mask7, vec12, vec13, vec14,
+               vec15);
+    dst30 = const_vec;
+    DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3, dst30,
+                 dst30, dst30, dst30);
+    dst41 = const_vec;
+    DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3, dst41,
+                 dst41, dst41, dst41);
+    dst52 = const_vec;
+    DPADD_SB4_SH(vec8, vec9, vec10, vec11, filt0, filt1, filt2, filt3, dst52,
+                 dst52, dst52, dst52);
+    dst63 = const_vec;
+    DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt0, filt1, filt2, filt3, dst63,
+                 dst63, dst63, dst63);
+
+    ILVRL_H2_SH(dst41, dst30, dst10_r, dst43_r);
+    ILVRL_H2_SH(dst52, dst41, dst21_r, dst54_r);
+    ILVRL_H2_SH(dst63, dst52, dst32_r, dst65_r);
+
+    dst66 = (v8i16) __msa_splati_d((v2i64) dst63, 1);
+
+    for (loop_cnt = height >> 2; loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        src += (4 * src_stride);
+        XORI_B4_128_SB(src7, src8, src9, src10);
+
+        VSHF_B4_SB(src7, src9, mask4, mask5, mask6, mask7, vec0, vec1, vec2,
+                   vec3);
+        VSHF_B4_SB(src8, src10, mask4, mask5, mask6, mask7, vec4, vec5, vec6,
+                   vec7);
+        dst97 = const_vec;
+        dst108 = const_vec;
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt1, filt2, filt3, dst97,
+                     dst97, dst97, dst97);
+        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt0, filt1, filt2, filt3, dst108,
+                     dst108, dst108, dst108);
+
+        dst76_r = __msa_ilvr_h(dst97, dst66);
+        ILVRL_H2_SH(dst108, dst97, dst87_r, dst109_r);
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst97, 1);
+        dst98_r = __msa_ilvr_h(dst66, dst108);
+
+        dst0_r = HEVC_FILT_8TAP(dst10_r, dst32_r, dst54_r, dst76_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst1_r = HEVC_FILT_8TAP(dst21_r, dst43_r, dst65_r, dst87_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst2_r = HEVC_FILT_8TAP(dst32_r, dst54_r, dst76_r, dst98_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        dst3_r = HEVC_FILT_8TAP(dst43_r, dst65_r, dst87_r, dst109_r, filt_h0,
+                                filt_h1, filt_h2, filt_h3);
+        SRA_4V(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+        PCKEV_H2_SW(dst1_r, dst0_r, dst3_r, dst2_r, dst0_r, dst2_r);
+        ST8x4_UB(dst0_r, dst2_r, dst, dst_stride_in_bytes);
+        dst += (4 * dst_stride);
+
+        dst10_r = dst54_r;
+        dst32_r = dst76_r;
+        dst54_r = dst98_r;
+        dst21_r = dst65_r;
+        dst43_r = dst87_r;
+        dst65_r = dst109_r;
+        dst66 = (v8i16) __msa_splati_d((v2i64) dst108, 1);
+    }
 }
 
 static void hevc_hv_8t_16w_msa(uint8_t *src, int32_t src_stride,
@@ -1588,7 +1802,7 @@
                                const int8_t *filter_x, const int8_t *filter_y,
                                int32_t height)
 {
-    hevc_hv_8t_8multx2mult_msa(src, src_stride, dst, dst_stride,
+    hevc_hv_8t_8multx1mult_msa(src, src_stride, dst, dst_stride,
                                filter_x, filter_y, height, 16);
 }
 
@@ -1597,7 +1811,7 @@
                                const int8_t *filter_x, const int8_t *filter_y,
                                int32_t height)
 {
-    hevc_hv_8t_8multx2mult_msa(src, src_stride, dst, dst_stride,
+    hevc_hv_8t_8multx1mult_msa(src, src_stride, dst, dst_stride,
                                filter_x, filter_y, height, 24);
 }
 
@@ -1606,7 +1820,7 @@
                                const int8_t *filter_x, const int8_t *filter_y,
                                int32_t height)
 {
-    hevc_hv_8t_8multx2mult_msa(src, src_stride, dst, dst_stride,
+    hevc_hv_8t_8multx1mult_msa(src, src_stride, dst, dst_stride,
                                filter_x, filter_y, height, 32);
 }
 
@@ -1615,7 +1829,7 @@
                                const int8_t *filter_x, const int8_t *filter_y,
                                int32_t height)
 {
-    hevc_hv_8t_8multx2mult_msa(src, src_stride, dst, dst_stride,
+    hevc_hv_8t_8multx1mult_msa(src, src_stride, dst, dst_stride,
                                filter_x, filter_y, height, 48);
 }
 
@@ -1624,7 +1838,7 @@
                                const int8_t *filter_x, const int8_t *filter_y,
                                int32_t height)
 {
-    hevc_hv_8t_8multx2mult_msa(src, src_stride, dst, dst_stride,
+    hevc_hv_8t_8multx1mult_msa(src, src_stride, dst, dst_stride,
                                filter_x, filter_y, height, 64);
 }
 
@@ -1639,7 +1853,7 @@
     v16i8 mask1, vec0, vec1;
     v8i16 dst0;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
 
     src -= 1;
 
@@ -1672,7 +1886,7 @@
     v16i8 mask1, vec0, vec1;
     v8i16 dst0, dst1;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
 
     src -= 1;
 
@@ -1711,7 +1925,7 @@
     v16i8 mask1, vec0, vec1;
     v8i16 dst0, dst1, dst2, dst3;
     v8i16 filter_vec, const_vec;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
 
     src -= 1;
 
@@ -1776,7 +1990,7 @@
     uint32_t dst_val_int0, dst_val_int1, dst_val_int2, dst_val_int3;
     v8i16 filt0, filt1, dst0, dst1, dst2, dst3;
     v16i8 src0, src1, src2, src3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
     v16i8 vec0, vec1;
     v8i16 filter_vec, const_vec;
@@ -1791,7 +2005,7 @@
     const_vec = __msa_ldi_h(128);
     const_vec <<= 6;
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
+    for (loop_cnt = 2; loop_cnt--;) {
         LD_SB4(src, src_stride, src0, src1, src2, src3);
         src += (4 * src_stride);
 
@@ -1845,7 +2059,7 @@
     uint32_t loop_cnt;
     v8i16 filt0, filt1, dst0, dst1;
     v16i8 src0, src1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
     v16i8 vec0, vec1;
     v8i16 filter_vec, const_vec;
@@ -1889,7 +2103,7 @@
     uint32_t loop_cnt;
     v8i16 filt0, filt1;
     v16i8 src0, src1, src2, src3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
     v16i8 vec0, vec1;
     v8i16 dst0, dst1, dst2, dst3;
@@ -1963,7 +2177,7 @@
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
     v8i16 filter_vec, const_vec;
     v16i8 mask3;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask2 = {
         8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
     };
@@ -2020,7 +2234,7 @@
     v16i8 src0, src1, src2, src3;
     v16i8 src4, src5, src6, src7;
     v8i16 filt0, filt1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
     v16i8 vec0, vec1;
@@ -2092,7 +2306,7 @@
     int16_t *dst_tmp = dst + 16;
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
     v8i16 filt0, filt1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1, mask00, mask11;
     v16i8 vec0, vec1;
     v8i16 dst0, dst1, dst2, dst3;
@@ -2192,10 +2406,10 @@
     uint32_t loop_cnt;
     v16i8 src0, src1, src2;
     v8i16 filt0, filt1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1, mask2, mask3;
     v8i16 dst0, dst1, dst2, dst3;
-    v16i8 vec0, vec1;
+    v16i8 vec0, vec1, vec2, vec3;
     v8i16 filter_vec, const_vec;
 
     src -= 1;
@@ -2210,54 +2424,25 @@
     mask2 = mask0 + 8;
     mask3 = mask0 + 10;
 
-    for (loop_cnt = (height >> 1); loop_cnt--;) {
+    for (loop_cnt = height; loop_cnt--;) {
         LD_SB2(src, 16, src0, src1);
         src2 = LD_SB(src + 24);
         src += src_stride;
 
         XORI_B3_128_SB(src0, src1, src2);
 
-        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
         dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-
-        VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
         dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
-
-        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
         dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
         dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
-        ST_SH4(dst0, dst1, dst2, dst3, dst, 8);
-        dst += dst_stride;
-
-        LD_SB2(src, 16, src0, src1);
-        src2 = LD_SB(src + 24);
-        src += src_stride;
-
-        XORI_B3_128_SB(src0, src1, src2);
-
-        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-        dst0 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-
-        VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
-        dst1 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst1, dst1);
-
-        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
+        VSHF_B2_SB(src0, src0, src0, src1, mask0, mask2, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask0, mask0, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, dst0,
+                     dst1, dst2, dst3);
+        VSHF_B2_SB(src0, src0, src0, src1, mask1, mask3, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src2, src2, mask1, mask1, vec2, vec3);
+        DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, dst0,
+                     dst1, dst2, dst3);
         ST_SH4(dst0, dst1, dst2, dst3, dst, 8);
         dst += dst_stride;
     }
@@ -2333,18 +2518,17 @@
     ST8x4_UB(dst10, dst32, dst, 2 * dst_stride);
 }
 
-static void hevc_vt_4t_4x8multiple_msa(uint8_t *src,
-                                       int32_t src_stride,
-                                       int16_t *dst,
-                                       int32_t dst_stride,
-                                       const int8_t *filter,
-                                       int32_t height)
+static void hevc_vt_4t_4x8_msa(uint8_t *src,
+                               int32_t src_stride,
+                               int16_t *dst,
+                               int32_t dst_stride,
+                               const int8_t *filter,
+                               int32_t height)
 {
-    int32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v16i8 src10_r, src32_r, src54_r, src76_r, src98_r;
     v16i8 src21_r, src43_r, src65_r, src87_r, src109_r;
-    v16i8 src2110, src4332, src6554, src8776;
+    v16i8 src2110, src4332, src6554, src8776, src10998;
     v8i16 dst10, dst32, dst54, dst76;
     v8i16 filt0, filt1;
     v8i16 filter_vec, const_vec;
@@ -2363,35 +2547,96 @@
     src2110 = (v16i8) __msa_ilvr_d((v2i64) src21_r, (v2i64) src10_r);
     src2110 = (v16i8) __msa_xori_b((v16u8) src2110, 128);
 
-    for (loop_cnt = (height >> 3); loop_cnt--;) {
-        LD_SB6(src, src_stride, src3, src4, src5, src6, src7, src8);
-        src += (6 * src_stride);
+    LD_SB8(src, src_stride, src3, src4, src5, src6, src7, src8, src9, src10);
+    src += (8 * src_stride);
+    ILVR_B4_SB(src3, src2, src4, src3, src5, src4, src6, src5,
+               src32_r, src43_r, src54_r, src65_r);
+    ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9,
+               src76_r, src87_r, src98_r, src109_r);
+    ILVR_D4_SB(src43_r, src32_r, src65_r, src54_r, src87_r, src76_r, src109_r,
+               src98_r, src4332, src6554, src8776, src10998);
+    XORI_B4_128_SB(src4332, src6554, src8776, src10998);
+    dst10 = const_vec;
+    dst32 = const_vec;
+    dst54 = const_vec;
+    dst76 = const_vec;
+    DPADD_SB2_SH(src2110, src4332, filt0, filt1, dst10, dst10);
+    DPADD_SB2_SH(src4332, src6554, filt0, filt1, dst32, dst32);
+    DPADD_SB2_SH(src6554, src8776, filt0, filt1, dst54, dst54);
+    DPADD_SB2_SH(src8776, src10998, filt0, filt1, dst76, dst76);
+    ST8x8_UB(dst10, dst32, dst54, dst76, dst, 2 * dst_stride);
+    dst += (8 * dst_stride);
+}
 
-        ILVR_B4_SB(src3, src2, src4, src3, src5, src4, src6, src5,
-                   src32_r, src43_r, src54_r, src65_r);
-        ILVR_B2_SB(src7, src6, src8, src7, src76_r, src87_r);
-        ILVR_D3_SB(src43_r, src32_r, src65_r, src54_r, src87_r, src76_r,
-                   src4332, src6554, src8776);
-        XORI_B3_128_SB(src4332, src6554, src8776);
+static void hevc_vt_4t_4x16_msa(uint8_t *src, int32_t src_stride,
+                                int16_t *dst, int32_t dst_stride,
+                                const int8_t *filter, int32_t height)
+{
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+    v16i8 src65_r, src87_r, src109_r, src2110, src4332, src6554, src8776;
+    v16i8 src10998;
+    v8i16 dst10, dst32, dst54, dst76, filt0, filt1, filter_vec, const_vec;
 
-        dst10 = const_vec;
-        DPADD_SB2_SH(src2110, src4332, filt0, filt1, dst10, dst10);
-        dst32 = const_vec;
-        DPADD_SB2_SH(src4332, src6554, filt0, filt1, dst32, dst32);
-        dst54 = const_vec;
-        DPADD_SB2_SH(src6554, src8776, filt0, filt1, dst54, dst54);
+    src -= src_stride;
+    const_vec = __msa_ldi_h(128);
+    const_vec <<= 6;
 
-        LD_SB2(src, src_stride, src9, src2);
-        src += (2 * src_stride);
-        ILVR_B2_SB(src9, src8, src2, src9, src98_r, src109_r);
-        src2110 = (v16i8) __msa_ilvr_d((v2i64) src109_r, (v2i64) src98_r);
-        src2110 = (v16i8) __msa_xori_b((v16u8) src2110, 128);
-        dst76 = const_vec;
-        DPADD_SB2_SH(src8776, src2110, filt0, filt1, dst76, dst76);
+    filter_vec = LD_SH(filter);
+    SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
-        ST8x8_UB(dst10, dst32, dst54, dst76, dst, 2 * dst_stride);
-        dst += (8 * dst_stride);
-    }
+    LD_SB3(src, src_stride, src0, src1, src2);
+    src += (3 * src_stride);
+
+    ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
+    src2110 = (v16i8) __msa_ilvr_d((v2i64) src21_r, (v2i64) src10_r);
+    src2110 = (v16i8) __msa_xori_b((v16u8) src2110, 128);
+
+    LD_SB8(src, src_stride, src3, src4, src5, src6, src7, src8, src9, src10);
+    src += (8 * src_stride);
+    ILVR_B4_SB(src3, src2, src4, src3, src5, src4, src6, src5, src32_r, src43_r,
+               src54_r, src65_r);
+    ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+               src87_r, src98_r, src109_r);
+    ILVR_D4_SB(src43_r, src32_r, src65_r, src54_r, src87_r, src76_r, src109_r,
+               src98_r, src4332, src6554, src8776, src10998);
+    XORI_B4_128_SB(src4332, src6554, src8776, src10998);
+
+    dst10 = const_vec;
+    dst32 = const_vec;
+    dst54 = const_vec;
+    dst76 = const_vec;
+    DPADD_SB2_SH(src2110, src4332, filt0, filt1, dst10, dst10);
+    DPADD_SB2_SH(src4332, src6554, filt0, filt1, dst32, dst32);
+    DPADD_SB2_SH(src6554, src8776, filt0, filt1, dst54, dst54);
+    DPADD_SB2_SH(src8776, src10998, filt0, filt1, dst76, dst76);
+    ST8x8_UB(dst10, dst32, dst54, dst76, dst, 2 * dst_stride);
+    dst += (8 * dst_stride);
+
+    src2 = src10;
+    src2110 = src10998;
+
+    LD_SB8(src, src_stride, src3, src4, src5, src6, src7, src8, src9, src10);
+    src += (8 * src_stride);
+
+    ILVR_B4_SB(src3, src2, src4, src3, src5, src4, src6, src5, src32_r, src43_r,
+               src54_r, src65_r);
+    ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+               src87_r, src98_r, src109_r);
+    ILVR_D4_SB(src43_r, src32_r, src65_r, src54_r, src87_r, src76_r, src109_r,
+               src98_r, src4332, src6554, src8776, src10998);
+    XORI_B4_128_SB(src4332, src6554, src8776, src10998);
+
+    dst10 = const_vec;
+    dst32 = const_vec;
+    dst54 = const_vec;
+    dst76 = const_vec;
+    DPADD_SB2_SH(src2110, src4332, filt0, filt1, dst10, dst10);
+    DPADD_SB2_SH(src4332, src6554, filt0, filt1, dst32, dst32);
+    DPADD_SB2_SH(src6554, src8776, filt0, filt1, dst54, dst54);
+    DPADD_SB2_SH(src8776, src10998, filt0, filt1, dst76, dst76);
+    ST8x8_UB(dst10, dst32, dst54, dst76, dst, 2 * dst_stride);
+    dst += (8 * dst_stride);
 }
 
 static void hevc_vt_4t_4w_msa(uint8_t *src,
@@ -2405,9 +2650,10 @@
         hevc_vt_4t_4x2_msa(src, src_stride, dst, dst_stride, filter);
     } else if (4 == height) {
         hevc_vt_4t_4x4_msa(src, src_stride, dst, dst_stride, filter, height);
-    } else if (0 == (height % 8)) {
-        hevc_vt_4t_4x8multiple_msa(src, src_stride, dst, dst_stride,
-                                   filter, height);
+    } else if (8 == height) {
+        hevc_vt_4t_4x8_msa(src, src_stride, dst, dst_stride, filter, height);
+    } else if (16 == height) {
+        hevc_vt_4t_4x16_msa(src, src_stride, dst, dst_stride, filter, height);
     }
 }
 
@@ -2590,9 +2836,9 @@
                                        int32_t height)
 {
     int32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4, src5;
-    v16i8 src10_r, src32_r, src21_r, src43_r;
-    v8i16 dst0_r, dst1_r;
+    v16i8 src0, src1, src2, src3, src4, src5, src6;
+    v16i8 src10_r, src32_r, src21_r, src43_r, src54_r, src65_r;
+    v8i16 dst0_r, dst1_r, dst2_r, dst3_r;
     v8i16 filt0, filt1;
     v8i16 filter_vec, const_vec;
 
@@ -2609,29 +2855,25 @@
     ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
 
     for (loop_cnt = (height >> 2); loop_cnt--;) {
-        LD_SB2(src, src_stride, src3, src4);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src3, src4);
+        LD_SB4(src, src_stride, src3, src4, src5, src6);
+        src += (4 * src_stride);
+        XORI_B4_128_SB(src3, src4, src5, src6);
         ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
+        ILVR_B2_SB(src5, src4, src6, src5, src54_r, src65_r);
         dst0_r = const_vec;
+        dst1_r = const_vec;
+        dst2_r = const_vec;
+        dst3_r = const_vec;
         DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, dst0_r, dst0_r);
-        dst1_r = const_vec;
         DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, dst1_r, dst1_r);
+        DPADD_SB2_SH(src32_r, src54_r, filt0, filt1, dst2_r, dst2_r);
+        DPADD_SB2_SH(src43_r, src65_r, filt0, filt1, dst3_r, dst3_r);
+        ST_SH4(dst0_r, dst1_r, dst2_r, dst3_r, dst, dst_stride);
+        dst += (4 * dst_stride);
 
-        ST_SH2(dst0_r, dst1_r, dst, dst_stride);
-        dst += (2 * dst_stride);
-
-        LD_SB2(src, src_stride, src5, src2);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src5, src2);
-        ILVR_B2_SB(src5, src4, src2, src5, src10_r, src21_r);
-        dst0_r = const_vec;
-        DPADD_SB2_SH(src32_r, src10_r, filt0, filt1, dst0_r, dst0_r);
-        dst1_r = const_vec;
-        DPADD_SB2_SH(src43_r, src21_r, filt0, filt1, dst1_r, dst1_r);
-
-        ST_SH2(dst0_r, dst1_r, dst, dst_stride);
-        dst += (2 * dst_stride);
+        src2 = src6;
+        src10_r = src54_r;
+        src21_r = src65_r;
     }
 }
 
@@ -2660,11 +2902,12 @@
                                int32_t height)
 {
     int32_t loop_cnt;
-    v16i8 src0, src1, src2, src3, src4, src5;
+    v16i8 src0, src1, src2, src3, src4, src5, src6;
     v16i8 src10_r, src32_r, src21_r, src43_r;
     v8i16 dst0_r, dst1_r, dst2_r, dst3_r;
     v16i8 src10_l, src32_l, src54_l, src21_l, src43_l, src65_l;
     v16i8 src2110, src4332;
+    v16i8 src54_r, src65_r, src6554;
     v8i16 dst0_l, dst1_l;
     v8i16 filt0, filt1;
     v8i16 filter_vec, const_vec;
@@ -2683,36 +2926,42 @@
     ILVL_B2_SB(src1, src0, src2, src1, src10_l, src21_l);
     src2110 = (v16i8) __msa_ilvr_d((v2i64) src21_l, (v2i64) src10_l);
 
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
+    for (loop_cnt = 4; loop_cnt--;) {
         LD_SB2(src, src_stride, src3, src4);
         src += (2 * src_stride);
+        LD_SB2(src, src_stride, src5, src6);
+        src += (2 * src_stride);
         XORI_B2_128_SB(src3, src4);
+        XORI_B2_128_SB(src5, src6);
+
         ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
         ILVL_B2_SB(src3, src2, src4, src3, src32_l, src43_l);
         src4332 = (v16i8) __msa_ilvr_d((v2i64) src43_l, (v2i64) src32_l);
+        ILVR_B2_SB(src5, src4, src6, src5, src54_r, src65_r);
+        ILVL_B2_SB(src5, src4, src6, src5, src54_l, src65_l);
+        src6554 = (v16i8) __msa_ilvr_d((v2i64) src65_l, (v2i64) src54_l);
+
         dst0_r = const_vec;
         DPADD_SB2_SH(src10_r, src32_r, filt0, filt1, dst0_r, dst0_r);
         dst1_r = const_vec;
         DPADD_SB2_SH(src21_r, src43_r, filt0, filt1, dst1_r, dst1_r);
+        dst2_r = const_vec;
+        DPADD_SB2_SH(src32_r, src54_r, filt0, filt1, dst2_r, dst2_r);
+        dst3_r = const_vec;
+        DPADD_SB2_SH(src43_r, src65_r, filt0, filt1, dst3_r, dst3_r);
         dst0_l = const_vec;
         DPADD_SB2_SH(src2110, src4332, filt0, filt1, dst0_l, dst0_l);
-
-        LD_SB2(src, src_stride, src5, src2);
-        src += (2 * src_stride);
-        XORI_B2_128_SB(src5, src2);
-        ILVR_B2_SB(src5, src4, src2, src5, src10_r, src21_r);
-        ILVL_B2_SB(src5, src4, src2, src5, src54_l, src65_l);
-        src2110 = (v16i8) __msa_ilvr_d((v2i64) src65_l, (v2i64) src54_l);
-        dst2_r = const_vec;
-        DPADD_SB2_SH(src32_r, src10_r, filt0, filt1, dst2_r, dst2_r);
-        dst3_r = const_vec;
-        DPADD_SB2_SH(src43_r, src21_r, filt0, filt1, dst3_r, dst3_r);
         dst1_l = const_vec;
-        DPADD_SB2_SH(src4332, src2110, filt0, filt1, dst1_l, dst1_l);
+        DPADD_SB2_SH(src4332, src6554, filt0, filt1, dst1_l, dst1_l);
 
         ST_SH4(dst0_r, dst1_r, dst2_r, dst3_r, dst, dst_stride);
         ST8x4_UB(dst0_l, dst1_l, dst + 8, (2 * dst_stride));
         dst += (4 * dst_stride);
+
+        src2 = src6;
+        src10_r = src54_r;
+        src21_r = src65_r;
+        src2110 = src6554;
     }
 }
 
@@ -2994,69 +3243,52 @@
                                const int8_t *filter_x,
                                const int8_t *filter_y)
 {
+    int32_t dst_stride_in_bytes = 2 * dst_stride;
     v16i8 src0, src1, src2, src3, src4;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
     v16i8 mask1;
     v8i16 filter_vec, const_vec;
     v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4;
-    v4i32 dst0_r, dst1_r;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
+    v8i16 dst20, dst31, dst42, dst10, dst32, dst21, dst43;
+    v4i32 dst0, dst1;
 
     src -= (src_stride + 1);
     filter_vec = LD_SH(filter_x);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
     const_vec = __msa_ldi_h(128);
     const_vec <<= 6;
 
-    LD_SB3(src, src_stride, src0, src1, src2);
-    src += (3 * src_stride);
-    XORI_B3_128_SB(src0, src1, src2);
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    VSHF_B2_SB(src0, src2, src0, src2, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src3, src1, src3, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src4, src2, src4, mask0, mask1, vec4, vec5);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+    dst20 = const_vec;
+    dst31 = const_vec;
+    dst42 = const_vec;
+    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst20, dst20);
+    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst31, dst31);
+    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst42, dst42);
+    ILVRL_H2_SH(dst31, dst20, dst10, dst32);
+    ILVRL_H2_SH(dst42, dst31, dst21, dst43);
 
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-    ILVR_H2_SH(dst1, dst0, dst2, dst1, dst10_r, dst21_r);
-
-    LD_SB2(src, src_stride, src3, src4);
-    XORI_B2_128_SB(src3, src4);
-
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
-    dst32_r = __msa_ilvr_h(dst3, dst2);
-    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-    dst0_r >>= 6;
-
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-    dst43_r = __msa_ilvr_h(dst4, dst3);
-    dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-    dst1_r >>= 6;
-
-    dst0_r = (v4i32) __msa_pckev_h((v8i16) dst1_r, (v8i16) dst0_r);
-    ST8x2_UB(dst0_r, dst, 2 * dst_stride);
+    dst0 = HEVC_FILT_4TAP(dst10, dst32, filt_h0, filt_h1);
+    dst1 = HEVC_FILT_4TAP(dst21, dst43, filt_h0, filt_h1);
+    dst0 >>= 6;
+    dst1 >>= 6;
+    dst0 = (v4i32) __msa_pckev_h((v8i16) dst1, (v8i16) dst0);
+    ST8x2_UB(dst0, dst, dst_stride_in_bytes);
 }
 
 static void hevc_hv_4t_4x4_msa(uint8_t *src,
@@ -3066,16 +3298,16 @@
                                const int8_t *filter_x,
                                const int8_t *filter_y)
 {
+    int32_t dst_stride_in_bytes = 2 * dst_stride;
     v16i8 src0, src1, src2, src3, src4, src5, src6;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
     v16i8 mask1;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
     v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
+    v8i16 dst30, dst41, dst52, dst63, dst10, dst32, dst54, dst21, dst43, dst65;
+    v4i32 dst0, dst1, dst2, dst3;
 
     src -= (src_stride + 1);
 
@@ -3083,71 +3315,43 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
     const_vec = __msa_ldi_h(128);
     const_vec <<= 6;
 
-    LD_SB3(src, src_stride, src0, src1, src2);
-    src += (3 * src_stride);
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
 
-    XORI_B3_128_SB(src0, src1, src2);
+    VSHF_B2_SB(src0, src3, src0, src3, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src4, src1, src4, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src5, src2, src5, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src3, src6, src3, src6, mask0, mask1, vec6, vec7);
 
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+    dst30 = const_vec;
+    dst41 = const_vec;
+    dst52 = const_vec;
+    dst63 = const_vec;
+    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst30, dst30);
+    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst41, dst41);
+    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst52, dst52);
+    DPADD_SB2_SH(vec6, vec7, filt0, filt1, dst63, dst63);
 
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+    ILVRL_H2_SH(dst41, dst30, dst10, dst43);
+    ILVRL_H2_SH(dst52, dst41, dst21, dst54);
+    ILVRL_H2_SH(dst63, dst52, dst32, dst65);
 
-    ILVR_H2_SH(dst1, dst0, dst2, dst1, dst10_r, dst21_r);
-
-    LD_SB4(src, src_stride, src3, src4, src5, src6);
-    XORI_B4_128_SB(src3, src4, src5, src6);
-
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
-    dst32_r = __msa_ilvr_h(dst3, dst2);
-    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-    dst0_r >>= 6;
-
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-    dst43_r = __msa_ilvr_h(dst4, dst3);
-    dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-    dst1_r >>= 6;
-
-    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-    dst5 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-
-    dst10_r = __msa_ilvr_h(dst5, dst4);
-    dst2_r = HEVC_FILT_4TAP(dst32_r, dst10_r, filt_h0, filt_h1);
-    dst2_r >>= 6;
-
-    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-    dst21_r = __msa_ilvr_h(dst2, dst5);
-    dst3_r = HEVC_FILT_4TAP(dst43_r, dst21_r, filt_h0, filt_h1);
-    dst3_r >>= 6;
-
-    PCKEV_H2_SW(dst1_r, dst0_r, dst3_r, dst2_r, dst0_r, dst1_r);
-    ST8x4_UB(dst0_r, dst1_r, dst, 2 * dst_stride);
+    dst0 = HEVC_FILT_4TAP(dst10, dst32, filt_h0, filt_h1);
+    dst1 = HEVC_FILT_4TAP(dst21, dst43, filt_h0, filt_h1);
+    dst2 = HEVC_FILT_4TAP(dst32, dst54, filt_h0, filt_h1);
+    dst3 = HEVC_FILT_4TAP(dst43, dst65, filt_h0, filt_h1);
+    SRA_4V(dst0, dst1, dst2, dst3, 6);
+    PCKEV_H2_SW(dst1, dst0, dst3, dst2, dst0, dst2);
+    ST8x4_UB(dst0, dst2, dst, dst_stride_in_bytes);
 }
 
 
@@ -3163,25 +3367,24 @@
     v16i8 src0, src1, src2, src3, src4, src5, src6;
     v16i8 src7, src8, src9, src10;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr + 16);
     v16i8 mask1;
     v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8, dst9;
-    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r, dst6_r, dst7_r;
-    v8i16 dst10_r, dst32_r, dst54_r, dst76_r;
-    v8i16 dst21_r, dst43_r, dst65_r, dst87_r;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 dst10, dst21, dst22, dst73, dst84, dst95, dst106;
+    v8i16 dst10_r, dst32_r, dst54_r, dst76_r, dst98_r;
+    v8i16 dst21_r, dst43_r, dst65_r, dst87_r, dst109_r;
+    v4i32 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
 
     src -= (src_stride + 1);
     filter_vec = LD_SH(filter_x);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
@@ -3191,19 +3394,14 @@
     LD_SB3(src, src_stride, src0, src1, src2);
     src += (3 * src_stride);
     XORI_B3_128_SB(src0, src1, src2);
-
-    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
-    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
-    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
-
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
-
-    ILVR_H2_SH(dst1, dst0, dst2, dst1, dst10_r, dst21_r);
+    VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src2, src1, src2, mask0, mask1, vec2, vec3);
+    dst10 = const_vec;
+    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst10, dst10);
+    dst21 = const_vec;
+    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst21, dst21);
+    ILVRL_H2_SH(dst21, dst10, dst10_r, dst21_r);
+    dst22 = (v8i16) __msa_splati_d((v2i64) dst21, 1);
 
     for (loop_cnt = height >> 3; loop_cnt--;) {
         LD_SB8(src, src_stride,
@@ -3211,75 +3409,45 @@
         src += (8 * src_stride);
         XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
 
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+        VSHF_B2_SB(src3, src7, src3, src7, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src8, src4, src8, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src5, src9, src5, src9, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src10, src6, src10, mask0, mask1, vec6, vec7);
 
-        dst32_r = __msa_ilvr_h(dst3, dst2);
-        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-        dst0_r >>= 6;
+        dst73 = const_vec;
+        dst84 = const_vec;
+        dst95 = const_vec;
+        dst106 = const_vec;
+        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst73, dst73);
+        DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst84, dst84);
+        DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst95, dst95);
+        DPADD_SB2_SH(vec6, vec7, filt0, filt1, dst106, dst106);
 
-        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-        dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
+        dst32_r = __msa_ilvr_h(dst73, dst22);
+        ILVRL_H2_SH(dst84, dst73, dst43_r, dst87_r);
+        ILVRL_H2_SH(dst95, dst84, dst54_r, dst98_r);
+        ILVRL_H2_SH(dst106, dst95, dst65_r, dst109_r);
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst73, 1);
+        dst76_r = __msa_ilvr_h(dst22, dst106);
 
-        dst43_r = __msa_ilvr_h(dst4, dst3);
-        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-        dst1_r >>= 6;
-
-        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-        dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-
-        dst54_r = __msa_ilvr_h(dst5, dst4);
-        dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
-        dst2_r >>= 6;
-
-        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-        dst6 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst6, dst6);
-
-        dst65_r = __msa_ilvr_h(dst6, dst5);
-        dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
-        dst3_r >>= 6;
-
-        VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
-        dst7 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst7, dst7);
-
-        dst76_r = __msa_ilvr_h(dst7, dst6);
-        dst4_r = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
-        dst4_r >>= 6;
-
-        VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec0, vec1);
-        dst8 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst8, dst8);
-
-        dst87_r = __msa_ilvr_h(dst8, dst7);
-        dst5_r = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
-        dst5_r >>= 6;
-
-        VSHF_B2_SB(src9, src9, src9, src9, mask0, mask1, vec0, vec1);
-        dst9 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst9, dst9);
-
-        dst10_r = __msa_ilvr_h(dst9, dst8);
-        dst6_r = HEVC_FILT_4TAP(dst76_r, dst10_r, filt_h0, filt_h1);
-        dst6_r >>= 6;
-
-        VSHF_B2_SB(src10, src10, src10, src10, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-        dst21_r = __msa_ilvr_h(dst2, dst9);
-        dst7_r = HEVC_FILT_4TAP(dst87_r, dst21_r, filt_h0, filt_h1);
-        dst7_r >>= 6;
-
-        PCKEV_H4_SW(dst1_r, dst0_r, dst3_r, dst2_r,
-                    dst5_r, dst4_r, dst7_r, dst6_r,
-                    dst0_r, dst1_r, dst2_r, dst3_r);
-        ST8x8_UB(dst0_r, dst1_r, dst2_r, dst3_r, dst, 2 * dst_stride);
+        dst0 = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst1 = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst2 = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst3 = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst4 = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
+        dst5 = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
+        dst6 = HEVC_FILT_4TAP(dst76_r, dst98_r, filt_h0, filt_h1);
+        dst7 = HEVC_FILT_4TAP(dst87_r, dst109_r, filt_h0, filt_h1);
+        SRA_4V(dst0, dst1, dst2, dst3, 6);
+        SRA_4V(dst4, dst5, dst6, dst7, 6);
+        PCKEV_H4_SW(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6,
+                    dst0, dst1, dst2, dst3);
+        ST8x8_UB(dst0, dst1, dst2, dst3, dst, 2 * dst_stride);
         dst += (8 * dst_stride);
+
+        dst10_r = dst98_r;
+        dst21_r = dst109_r;
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst106, 1);
     }
 }
 
@@ -3311,30 +3479,31 @@
                               const int8_t *filter_y,
                               int32_t height)
 {
-    uint32_t loop_cnt;
-    uint64_t dst_val0, dst_val1, dst_val2, dst_val3;
-    uint32_t dst_val_int0, dst_val_int1, dst_val_int2, dst_val_int3;
-    v16i8 src0, src1, src2, src3, src4, src5, src6;
+    int32_t dst_stride_in_bytes = 2 * dst_stride;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
     v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
-    v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
-    v8i16 dst10_l, dst32_l, dst21_l, dst43_l;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 dsth0, dsth1, dsth2, dsth3, dsth4, dsth5, dsth6, dsth7, dsth8, dsth9;
+    v8i16 dsth10, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    v8i16 dst10_r, dst32_r, dst54_r, dst76_r, dst98_r, dst21_r, dst43_r;
+    v8i16 dst65_r, dst87_r, dst109_r, dst10_l, dst32_l, dst54_l, dst76_l;
+    v8i16 dst98_l, dst21_l, dst43_l, dst65_l, dst87_l, dst109_l;
+    v8i16 dst1021_l, dst3243_l, dst5465_l, dst7687_l, dst98109_l;
+    v4i32 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r, dst6_r, dst7_r;
+    v4i32 dst0_l, dst1_l, dst2_l, dst3_l;
 
     src -= (src_stride + 1);
     filter_vec = LD_SH(filter_x);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
@@ -3349,89 +3518,83 @@
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
 
-    dst0 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
-    dst1 = const_vec;
-    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
-    dst2 = const_vec;
-    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+    dsth0 = const_vec;
+    dsth1 = const_vec;
+    dsth2 = const_vec;
+    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dsth0, dsth0);
+    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dsth1, dsth1);
+    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dsth2, dsth2);
 
-    ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
-    ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
+    ILVRL_H2_SH(dsth1, dsth0, dst10_r, dst10_l);
+    ILVRL_H2_SH(dsth2, dsth1, dst21_r, dst21_l);
 
-    for (loop_cnt = height >> 2; loop_cnt--;) {
-        LD_SB4(src, src_stride, src3, src4, src5, src6);
-        src += (4 * src_stride);
-        XORI_B4_128_SB(src3, src4, src5, src6);
+    LD_SB8(src, src_stride, src3, src4, src5, src6, src7, src8, src9, src10);
+    XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
 
-        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-        dst3 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
 
-        ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
-        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-        dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-        dst0_r >>= 6;
-        dst0_l >>= 6;
+    dsth3 = const_vec;
+    dsth4 = const_vec;
+    dsth5 = const_vec;
+    dsth6 = const_vec;
+    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dsth3, dsth3);
+    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dsth4, dsth4);
+    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dsth5, dsth5);
+    DPADD_SB2_SH(vec6, vec7, filt0, filt1, dsth6, dsth6);
 
-        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-        dst4 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
+    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src9, src9, src9, src9, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src10, src10, src10, src10, mask0, mask1, vec6, vec7);
 
-        ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
-        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
-        dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-        dst1_r >>= 6;
-        dst1_l >>= 6;
+    dsth7 = const_vec;
+    dsth8 = const_vec;
+    dsth9 = const_vec;
+    dsth10 = const_vec;
+    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dsth7, dsth7);
+    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dsth8, dsth8);
+    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dsth9, dsth9);
+    DPADD_SB2_SH(vec6, vec7, filt0, filt1, dsth10, dsth10);
 
-        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-        dst5 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
+    ILVRL_H2_SH(dsth3, dsth2, dst32_r, dst32_l);
+    ILVRL_H2_SH(dsth4, dsth3, dst43_r, dst43_l);
+    ILVRL_H2_SH(dsth5, dsth4, dst54_r, dst54_l);
+    ILVRL_H2_SH(dsth6, dsth5, dst65_r, dst65_l);
+    ILVRL_H2_SH(dsth7, dsth6, dst76_r, dst76_l);
+    ILVRL_H2_SH(dsth8, dsth7, dst87_r, dst87_l);
+    ILVRL_H2_SH(dsth9, dsth8, dst98_r, dst98_l);
+    ILVRL_H2_SH(dsth10, dsth9, dst109_r, dst109_l);
 
-        ILVRL_H2_SH(dst5, dst4, dst10_r, dst10_l);
-        dst2_r = HEVC_FILT_4TAP(dst32_r, dst10_r, filt_h0, filt_h1);
-        dst2_l = HEVC_FILT_4TAP(dst32_l, dst10_l, filt_h0, filt_h1);
-        dst2_r >>= 6;
-        dst2_l >>= 6;
+    PCKEV_D2_SH(dst21_l, dst10_l, dst43_l, dst32_l, dst1021_l, dst3243_l);
+    PCKEV_D2_SH(dst65_l, dst54_l, dst87_l, dst76_l, dst5465_l, dst7687_l);
+    dst98109_l = (v8i16) __msa_pckev_d((v2i64) dst109_l, (v2i64) dst98_l);
 
-        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-        dst2 = const_vec;
-        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-        ILVRL_H2_SH(dst2, dst5, dst21_r, dst21_l);
-        dst3_r = HEVC_FILT_4TAP(dst43_r, dst21_r, filt_h0, filt_h1);
-        dst3_l = HEVC_FILT_4TAP(dst43_l, dst21_l, filt_h0, filt_h1);
-        dst3_r >>= 6;
-        dst3_l >>= 6;
-
-        PCKEV_H4_SW(dst0_l, dst0_r, dst1_l, dst1_r,
-                    dst2_l, dst2_r, dst3_l, dst3_r,
-                    dst0_r, dst1_r, dst2_r, dst3_r);
-
-        dst_val0 = __msa_copy_u_d((v2i64) dst0_r, 0);
-        dst_val1 = __msa_copy_u_d((v2i64) dst1_r, 0);
-        dst_val2 = __msa_copy_u_d((v2i64) dst2_r, 0);
-        dst_val3 = __msa_copy_u_d((v2i64) dst3_r, 0);
-
-        dst_val_int0 = __msa_copy_u_w((v4i32) dst0_r, 2);
-        dst_val_int1 = __msa_copy_u_w((v4i32) dst1_r, 2);
-        dst_val_int2 = __msa_copy_u_w((v4i32) dst2_r, 2);
-        dst_val_int3 = __msa_copy_u_w((v4i32) dst3_r, 2);
-
-        SD(dst_val0, dst);
-        SW(dst_val_int0, dst + 4);
-        dst += dst_stride;
-        SD(dst_val1, dst);
-        SW(dst_val_int1, dst + 4);
-        dst += dst_stride;
-        SD(dst_val2, dst);
-        SW(dst_val_int2, dst + 4);
-        dst += dst_stride;
-        SD(dst_val3, dst);
-        SW(dst_val_int3, dst + 4);
-        dst += dst_stride;
-
-    }
+    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+    dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+    dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+    dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+    dst4_r = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
+    dst5_r = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
+    dst6_r = HEVC_FILT_4TAP(dst76_r, dst98_r, filt_h0, filt_h1);
+    dst7_r = HEVC_FILT_4TAP(dst87_r, dst109_r, filt_h0, filt_h1);
+    dst0_l = HEVC_FILT_4TAP(dst1021_l, dst3243_l, filt_h0, filt_h1);
+    dst1_l = HEVC_FILT_4TAP(dst3243_l, dst5465_l, filt_h0, filt_h1);
+    dst2_l = HEVC_FILT_4TAP(dst5465_l, dst7687_l, filt_h0, filt_h1);
+    dst3_l = HEVC_FILT_4TAP(dst7687_l, dst98109_l, filt_h0, filt_h1);
+    SRA_4V(dst0_r, dst1_r, dst2_r, dst3_r, 6);
+    SRA_4V(dst4_r, dst5_r, dst6_r, dst7_r, 6);
+    SRA_4V(dst0_l, dst1_l, dst2_l, dst3_l, 6);
+    PCKEV_H2_SH(dst1_r, dst0_r, dst3_r, dst2_r, tmp0, tmp1);
+    PCKEV_H2_SH(dst5_r, dst4_r, dst7_r, dst6_r, tmp2, tmp3);
+    PCKEV_H2_SH(dst1_l, dst0_l, dst3_l, dst2_l, tmp4, tmp5);
+    ST8x4_UB(tmp0, tmp1, dst, dst_stride_in_bytes);
+    ST4x4_UB(tmp4, tmp4, 0, 1, 2, 3, dst + 4, dst_stride_in_bytes);
+    dst += 4 * dst_stride;
+    ST8x4_UB(tmp2, tmp3, dst, dst_stride_in_bytes);
+    ST4x4_UB(tmp5, tmp5, 0, 1, 2, 3, dst + 4, dst_stride_in_bytes);
 }
 
 static void hevc_hv_4t_8x2_msa(uint8_t *src,
@@ -3439,16 +3602,15 @@
                                int16_t *dst,
                                int32_t dst_stride,
                                const int8_t *filter_x,
-                               const int8_t *filter_y,
-                               int32_t height)
+                               const int8_t *filter_y)
 {
     v16i8 src0, src1, src2, src3, src4;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
     v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
     v8i16 dst0, dst1, dst2, dst3, dst4;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l;
     v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
@@ -3460,23 +3622,23 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
     const_vec = __msa_ldi_h(128);
     const_vec <<= 6;
 
-    LD_SB3(src, src_stride, src0, src1, src2);
-    src += (3 * src_stride);
-    XORI_B3_128_SB(src0, src1, src2);
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
 
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec8, vec9);
 
     dst0 = const_vec;
     DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
@@ -3484,53 +3646,123 @@
     DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
     dst2 = const_vec;
     DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+    dst3 = const_vec;
+    DPADD_SB2_SH(vec6, vec7, filt0, filt1, dst3, dst3);
+    dst4 = const_vec;
+    DPADD_SB2_SH(vec8, vec9, filt0, filt1, dst4, dst4);
 
     ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
     ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
-
-    LD_SB2(src, src_stride, src3, src4);
-    src += (2 * src_stride);
-    XORI_B2_128_SB(src3, src4);
-
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
     ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
     dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
     dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-    dst0_r >>= 6;
-    dst0_l >>= 6;
-
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-    ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
     dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
     dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-    dst1_r >>= 6;
-    dst1_l >>= 6;
-
+    SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
     PCKEV_H2_SW(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r, dst1_r);
     ST_SW2(dst0_r, dst1_r, dst, dst_stride);
 }
 
+static void hevc_hv_4t_8multx4_msa(uint8_t *src, int32_t src_stride,
+                                   int16_t *dst, int32_t dst_stride,
+                                   const int8_t *filter_x,
+                                   const int8_t *filter_y, int32_t width8mult)
+{
+    int32_t cnt;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, mask0, mask1;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 filt0, filt1, filt_h0, filt_h1, filter_vec, const_vec;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6;
+    v8i16 dst10_r, dst32_r, dst54_r, dst21_r, dst43_r, dst65_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst21_l, dst43_l, dst65_l;
+    v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
+
+    src -= (src_stride + 1);
+
+    filter_vec = LD_SH(filter_x);
+    SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
+
+    filter_vec = LD_SH(filter_y);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+
+    const_vec = __msa_ldi_h(128);
+    const_vec <<= 6;
+
+    for (cnt = width8mult; cnt--;) {
+        LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+        src += 8;
+        XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+
+        VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+
+        dst0 = const_vec;
+        dst1 = const_vec;
+        dst2 = const_vec;
+        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+        DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
+        DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+
+        ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
+        ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
+
+        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
+        dst3 = const_vec;
+        dst4 = const_vec;
+        dst5 = const_vec;
+        dst6 = const_vec;
+        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+        DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst4, dst4);
+        DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst5, dst5);
+        DPADD_SB2_SH(vec6, vec7, filt0, filt1, dst6, dst6);
+        ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+        ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
+        ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+        ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
+        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
+        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
+
+        dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
+        dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
+        SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+        SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+        PCKEV_H2_SW(dst0_l, dst0_r, dst1_l, dst1_r, dst0_r, dst1_r);
+        PCKEV_H2_SW(dst2_l, dst2_r, dst3_l, dst3_r, dst2_r, dst3_r);
+
+        ST_SW4(dst0_r, dst1_r, dst2_r, dst3_r, dst, dst_stride);
+        dst += 8;
+    }
+}
+
 static void hevc_hv_4t_8x6_msa(uint8_t *src,
                                int32_t src_stride,
                                int16_t *dst,
                                int32_t dst_stride,
                                const int8_t *filter_x,
-                               const int8_t *filter_y,
-                               int32_t height)
+                               const int8_t *filter_y)
 {
     v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
     v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
+    v16i8 vec10, vec11, vec12, vec13, vec14, vec15, vec16, vec17;
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
     v4i32 dst4_r, dst4_l, dst5_r, dst5_l;
@@ -3545,24 +3777,31 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
     const_vec = __msa_ldi_h(128);
     const_vec <<= 6;
 
-    LD_SB3(src, src_stride, src0, src1, src2);
-    src += (3 * src_stride);
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+    LD_SB4(src, src_stride, src5, src6, src7, src8);
 
-    XORI_B3_128_SB(src0, src1, src2);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    XORI_B4_128_SB(src5, src6, src7, src8);
 
     VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
     VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
     VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec6, vec7);
+    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec8, vec9);
+    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec10, vec11);
+    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec12, vec13);
+    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec14, vec15);
+    VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec16, vec17);
 
     dst0 = const_vec;
     DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
@@ -3570,91 +3809,44 @@
     DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
     dst2 = const_vec;
     DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+    dst3 = const_vec;
+    DPADD_SB2_SH(vec6, vec7, filt0, filt1, dst3, dst3);
+    dst4 = const_vec;
+    DPADD_SB2_SH(vec8, vec9, filt0, filt1, dst4, dst4);
+    dst5 = const_vec;
+    DPADD_SB2_SH(vec10, vec11, filt0, filt1, dst5, dst5);
+    dst6 = const_vec;
+    DPADD_SB2_SH(vec12, vec13, filt0, filt1, dst6, dst6);
+    dst7 = const_vec;
+    DPADD_SB2_SH(vec14, vec15, filt0, filt1, dst7, dst7);
+    dst8 = const_vec;
+    DPADD_SB2_SH(vec16, vec17, filt0, filt1, dst8, dst8);
 
     ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
     ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
-
-    LD_SB2(src, src_stride, src3, src4);
-    src += (2 * src_stride);
-
-    XORI_B2_128_SB(src3, src4);
-
-    /* row 3 */
-    VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
-    dst3 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
-
     ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
-    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
-
-    dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-
-    dst0_r >>= 6;
-    dst0_l >>= 6;
-
-    /* row 4 */
-    VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-    dst4 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
     ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
+    ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+    ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
+    ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
+    ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l);
+
+    dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+    dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
     dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
     dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-    dst1_r >>= 6;
-    dst1_l >>= 6;
-
-    LD_SB2(src, src_stride, src5, src6);
-    src += (2 * src_stride);
-
-    XORI_B2_128_SB(src5, src6);
-
-    /* row 5 */
-    VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-    dst5 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-
-    ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
     dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
     dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
-    dst2_r >>= 6;
-    dst2_l >>= 6;
-
-    /* row 6 */
-    VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-    dst6 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst6, dst6);
-
-    ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
     dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
     dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
-    dst3_r >>= 6;
-    dst3_l >>= 6;
-
-    LD_SB2(src, src_stride, src7, src8);
-
-    XORI_B2_128_SB(src7, src8);
-
-    /* row 7 */
-    VSHF_B2_SB(src7, src7, src7, src7, mask0, mask1, vec0, vec1);
-    dst7 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst7, dst7);
-
-    ILVRL_H2_SH(dst7, dst6, dst76_r, dst76_l);
     dst4_r = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
     dst4_l = HEVC_FILT_4TAP(dst54_l, dst76_l, filt_h0, filt_h1);
-    dst4_r >>= 6;
-    dst4_l >>= 6;
-
-    /* row 8 */
-    VSHF_B2_SB(src8, src8, src8, src8, mask0, mask1, vec0, vec1);
-    dst8 = const_vec;
-    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst8, dst8);
-
-    ILVRL_H2_SH(dst8, dst7, dst87_r, dst87_l);
     dst5_r = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
     dst5_l = HEVC_FILT_4TAP(dst65_l, dst87_l, filt_h0, filt_h1);
-    dst5_r >>= 6;
-    dst5_l >>= 6;
+
+    SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+    SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+    SRA_4V(dst4_r, dst4_l, dst5_r, dst5_l, 6);
 
     PCKEV_H4_SW(dst0_l, dst0_r, dst1_l, dst1_r,
                 dst2_l, dst2_r, dst3_l, dst3_r, dst0_r, dst1_r, dst2_r, dst3_r);
@@ -3674,22 +3866,22 @@
                                        const int8_t *filter_x,
                                        const int8_t *filter_y,
                                        int32_t height,
-                                       int32_t width)
+                                       int32_t width8mult)
 {
     uint32_t loop_cnt, cnt;
     uint8_t *src_tmp;
     int16_t *dst_tmp;
     v16i8 src0, src1, src2, src3, src4, src5, src6;
     v8i16 filt0, filt1;
-    v4i32 filt_h0, filt_h1;
-    v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8i16 filt_h0, filt_h1;
+    v16i8 mask0 = LD_SB(ff_hevc_mask_arr);
     v16i8 mask1;
     v8i16 filter_vec, const_vec;
-    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
-    v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6;
     v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
-    v8i16 dst10_r, dst32_r, dst21_r, dst43_r;
-    v8i16 dst10_l, dst32_l, dst21_l, dst43_l;
+    v8i16 dst10_r, dst32_r, dst54_r, dst21_r, dst43_r, dst65_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst21_l, dst43_l, dst65_l;
 
     src -= (src_stride + 1);
 
@@ -3697,17 +3889,16 @@
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
     filter_vec = LD_SH(filter_y);
-    vec0 = __msa_clti_s_b((v16i8) filter_vec, 0);
-    filter_vec = (v8i16) __msa_ilvr_b(vec0, (v16i8) filter_vec);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
 
-    SPLATI_W2_SW(filter_vec, 0, filt_h0, filt_h1);
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
 
     mask1 = mask0 + 2;
 
     const_vec = __msa_ldi_h(128);
     const_vec <<= 6;
 
-    for (cnt = width >> 3; cnt--;) {
+    for (cnt = width8mult; cnt--;) {
         src_tmp = src;
         dst_tmp = dst;
 
@@ -3736,59 +3927,48 @@
             XORI_B4_128_SB(src3, src4, src5, src6);
 
             VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+            VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+            VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+            VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
+
             dst3 = const_vec;
+            dst4 = const_vec;
+            dst5 = const_vec;
+            dst6 = const_vec;
             DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+            DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst4, dst4);
+            DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst5, dst5);
+            DPADD_SB2_SH(vec6, vec7, filt0, filt1, dst6, dst6);
 
             ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+            ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
+            ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+            ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
+
             dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
             dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
-
-            dst0_r >>= 6;
-            dst0_l >>= 6;
-
-            /* row 4 */
-            VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec0, vec1);
-            dst4 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst4, dst4);
-
-            ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
             dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
             dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
-            dst1_r >>= 6;
-            dst1_l >>= 6;
+            dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+            dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
+            dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+            dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
 
-            /* row 5 */
-            VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec0, vec1);
-            dst5 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst5, dst5);
-
-            ILVRL_H2_SH(dst5, dst4, dst10_r, dst10_l);
-            dst2_r = HEVC_FILT_4TAP(dst32_r, dst10_r, filt_h0, filt_h1);
-            dst2_l = HEVC_FILT_4TAP(dst32_l, dst10_l, filt_h0, filt_h1);
-
-            dst2_r >>= 6;
-            dst2_l >>= 6;
-
-            /* row 6 */
-            VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec0, vec1);
-            dst2 = const_vec;
-            DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst2, dst2);
-
-            ILVRL_H2_SH(dst2, dst5, dst21_r, dst21_l);
-            dst3_r = HEVC_FILT_4TAP(dst43_r, dst21_r, filt_h0, filt_h1);
-            dst3_l = HEVC_FILT_4TAP(dst43_l, dst21_l, filt_h0, filt_h1);
-
-            dst3_r >>= 6;
-            dst3_l >>= 6;
+            SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+            SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
 
             PCKEV_H4_SW(dst0_l, dst0_r, dst1_l, dst1_r,
                         dst2_l, dst2_r, dst3_l, dst3_r,
                         dst0_r, dst1_r, dst2_r, dst3_r);
 
-            ST_SW2(dst0_r, dst1_r, dst_tmp, dst_stride);
-            dst_tmp += (2 * dst_stride);
-            ST_SW2(dst2_r, dst3_r, dst_tmp, dst_stride);
-            dst_tmp += (2 * dst_stride);
+            ST_SW4(dst0_r, dst1_r, dst2_r, dst3_r, dst_tmp, dst_stride);
+            dst_tmp += (4 * dst_stride);
+
+            dst10_r = dst54_r;
+            dst10_l = dst54_l;
+            dst21_r = dst65_r;
+            dst21_l = dst65_l;
+            dst2 = dst6;
         }
 
         src += 8;
@@ -3807,13 +3987,16 @@
 
     if (2 == height) {
         hevc_hv_4t_8x2_msa(src, src_stride, dst, dst_stride,
-                           filter_x, filter_y, height);
+                           filter_x, filter_y);
+    } else if (4 == height) {
+        hevc_hv_4t_8multx4_msa(src, src_stride, dst, dst_stride,
+                               filter_x, filter_y, 1);
     } else if (6 == height) {
         hevc_hv_4t_8x6_msa(src, src_stride, dst, dst_stride,
-                           filter_x, filter_y, height);
+                           filter_x, filter_y);
     } else if (0 == (height % 4)) {
         hevc_hv_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
-                                   filter_x, filter_y, height, 8);
+                                   filter_x, filter_y, height, 1);
     }
 }
 
@@ -3825,12 +4008,169 @@
                                const int8_t *filter_y,
                                int32_t height)
 {
-    hevc_hv_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
-                               filter_x, filter_y, height, 8);
+    uint32_t loop_cnt;
+    uint8_t *src_tmp;
+    int16_t *dst_tmp;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v16i8 mask0, mask1, mask2, mask3;
+    v8i16 filt0, filt1, filt_h0, filt_h1, filter_vec, const_vec;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst10, dst21, dst22, dst73;
+    v8i16 dst84, dst95, dst106, dst76_r, dst98_r, dst87_r, dst109_r;
+    v8i16 dst10_r, dst32_r, dst54_r, dst21_r, dst43_r, dst65_r;
+    v8i16 dst10_l, dst32_l, dst54_l, dst21_l, dst43_l, dst65_l;
+    v4i32 dst0_r, dst0_l, dst1_r, dst1_l, dst2_r, dst2_l, dst3_r, dst3_l;
+    v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 
-    hevc_hv_4t_4w_msa(src + 8, src_stride, dst + 8, dst_stride,
-                      filter_x, filter_y, height);
+    src -= (src_stride + 1);
 
+    filter_vec = LD_SH(filter_x);
+    SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
+
+    filter_vec = LD_SH(filter_y);
+    UNPCK_R_SB_SH(filter_vec, filter_vec);
+
+    SPLATI_W2_SH(filter_vec, 0, filt_h0, filt_h1);
+
+    mask0 = LD_SB(ff_hevc_mask_arr);
+    mask1 = mask0 + 2;
+
+    const_vec = __msa_ldi_h(128);
+    const_vec <<= 6;
+
+    src_tmp = src;
+    dst_tmp = dst;
+
+    LD_SB3(src_tmp, src_stride, src0, src1, src2);
+    src_tmp += (3 * src_stride);
+
+    XORI_B3_128_SB(src0, src1, src2);
+
+    VSHF_B2_SB(src0, src0, src0, src0, mask0, mask1, vec0, vec1);
+    VSHF_B2_SB(src1, src1, src1, src1, mask0, mask1, vec2, vec3);
+    VSHF_B2_SB(src2, src2, src2, src2, mask0, mask1, vec4, vec5);
+
+    dst0 = const_vec;
+    dst1 = const_vec;
+    dst2 = const_vec;
+    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst0, dst0);
+    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst1, dst1);
+    DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst2, dst2);
+
+    ILVRL_H2_SH(dst1, dst0, dst10_r, dst10_l);
+    ILVRL_H2_SH(dst2, dst1, dst21_r, dst21_l);
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src_tmp, src_stride, src3, src4, src5, src6);
+        src_tmp += (4 * src_stride);
+        XORI_B4_128_SB(src3, src4, src5, src6);
+
+        VSHF_B2_SB(src3, src3, src3, src3, mask0, mask1, vec0, vec1);
+        VSHF_B2_SB(src4, src4, src4, src4, mask0, mask1, vec2, vec3);
+        VSHF_B2_SB(src5, src5, src5, src5, mask0, mask1, vec4, vec5);
+        VSHF_B2_SB(src6, src6, src6, src6, mask0, mask1, vec6, vec7);
+
+        dst3 = const_vec;
+        dst4 = const_vec;
+        dst5 = const_vec;
+        dst6 = const_vec;
+        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst3, dst3);
+        DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst4, dst4);
+        DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst5, dst5);
+        DPADD_SB2_SH(vec6, vec7, filt0, filt1, dst6, dst6);
+
+        ILVRL_H2_SH(dst3, dst2, dst32_r, dst32_l);
+        ILVRL_H2_SH(dst4, dst3, dst43_r, dst43_l);
+        ILVRL_H2_SH(dst5, dst4, dst54_r, dst54_l);
+        ILVRL_H2_SH(dst6, dst5, dst65_r, dst65_l);
+
+        dst0_r = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        dst0_l = HEVC_FILT_4TAP(dst10_l, dst32_l, filt_h0, filt_h1);
+        dst1_r = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        dst1_l = HEVC_FILT_4TAP(dst21_l, dst43_l, filt_h0, filt_h1);
+        dst2_r = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        dst2_l = HEVC_FILT_4TAP(dst32_l, dst54_l, filt_h0, filt_h1);
+        dst3_r = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        dst3_l = HEVC_FILT_4TAP(dst43_l, dst65_l, filt_h0, filt_h1);
+
+        SRA_4V(dst0_r, dst0_l, dst1_r, dst1_l, 6);
+        SRA_4V(dst2_r, dst2_l, dst3_r, dst3_l, 6);
+        PCKEV_H4_SW(dst0_l, dst0_r, dst1_l, dst1_r, dst2_l, dst2_r, dst3_l,
+                    dst3_r, dst0_r, dst1_r, dst2_r, dst3_r);
+        ST_SW4(dst0_r, dst1_r, dst2_r, dst3_r, dst_tmp, dst_stride);
+        dst_tmp += (4 * dst_stride);
+
+        dst10_r = dst54_r;
+        dst10_l = dst54_l;
+        dst21_r = dst65_r;
+        dst21_l = dst65_l;
+        dst2 = dst6;
+    }
+
+    src += 8;
+    dst += 8;
+
+    mask2 = LD_SB(ff_hevc_mask_arr + 16);
+    mask3 = mask2 + 2;
+
+    LD_SB3(src, src_stride, src0, src1, src2);
+    src += (3 * src_stride);
+    XORI_B3_128_SB(src0, src1, src2);
+    VSHF_B2_SB(src0, src1, src0, src1, mask2, mask3, vec0, vec1);
+    VSHF_B2_SB(src1, src2, src1, src2, mask2, mask3, vec2, vec3);
+    dst10 = const_vec;
+    dst21 = const_vec;
+    DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst10, dst10);
+    DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst21, dst21);
+    ILVRL_H2_SH(dst21, dst10, dst10_r, dst21_r);
+    dst22 = (v8i16) __msa_splati_d((v2i64) dst21, 1);
+
+    for (loop_cnt = 2; loop_cnt--;) {
+        LD_SB8(src, src_stride, src3, src4, src5, src6, src7, src8, src9,
+               src10);
+        src += (8 * src_stride);
+        XORI_B8_128_SB(src3, src4, src5, src6, src7, src8, src9, src10);
+        VSHF_B2_SB(src3, src7, src3, src7, mask2, mask3, vec0, vec1);
+        VSHF_B2_SB(src4, src8, src4, src8, mask2, mask3, vec2, vec3);
+        VSHF_B2_SB(src5, src9, src5, src9, mask2, mask3, vec4, vec5);
+        VSHF_B2_SB(src6, src10, src6, src10, mask2, mask3, vec6, vec7);
+
+        dst73 = const_vec;
+        dst84 = const_vec;
+        dst95 = const_vec;
+        dst106 = const_vec;
+        DPADD_SB2_SH(vec0, vec1, filt0, filt1, dst73, dst73);
+        DPADD_SB2_SH(vec2, vec3, filt0, filt1, dst84, dst84);
+        DPADD_SB2_SH(vec4, vec5, filt0, filt1, dst95, dst95);
+        DPADD_SB2_SH(vec6, vec7, filt0, filt1, dst106, dst106);
+
+        dst32_r = __msa_ilvr_h(dst73, dst22);
+        ILVRL_H2_SH(dst84, dst73, dst43_r, dst87_r);
+        ILVRL_H2_SH(dst95, dst84, dst54_r, dst98_r);
+        ILVRL_H2_SH(dst106, dst95, dst65_r, dst109_r);
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst73, 1);
+        dst76_r = __msa_ilvr_h(dst22, dst106);
+
+        tmp0 = HEVC_FILT_4TAP(dst10_r, dst32_r, filt_h0, filt_h1);
+        tmp1 = HEVC_FILT_4TAP(dst21_r, dst43_r, filt_h0, filt_h1);
+        tmp2 = HEVC_FILT_4TAP(dst32_r, dst54_r, filt_h0, filt_h1);
+        tmp3 = HEVC_FILT_4TAP(dst43_r, dst65_r, filt_h0, filt_h1);
+        tmp4 = HEVC_FILT_4TAP(dst54_r, dst76_r, filt_h0, filt_h1);
+        tmp5 = HEVC_FILT_4TAP(dst65_r, dst87_r, filt_h0, filt_h1);
+        tmp6 = HEVC_FILT_4TAP(dst76_r, dst98_r, filt_h0, filt_h1);
+        tmp7 = HEVC_FILT_4TAP(dst87_r, dst109_r, filt_h0, filt_h1);
+
+        SRA_4V(tmp0, tmp1, tmp2, tmp3, 6);
+        SRA_4V(tmp4, tmp5, tmp6, tmp7, 6);
+        PCKEV_H4_SW(tmp1, tmp0, tmp3, tmp2, tmp5, tmp4, tmp7, tmp6, tmp0, tmp1,
+                    tmp2, tmp3);
+        ST8x8_UB(tmp0, tmp1, tmp2, tmp3, dst, 2 * dst_stride);
+        dst += (8 * dst_stride);
+
+        dst10_r = dst98_r;
+        dst21_r = dst109_r;
+        dst22 = (v8i16) __msa_splati_d((v2i64) dst106, 1);
+    }
 }
 
 static void hevc_hv_4t_16w_msa(uint8_t *src,
@@ -3841,8 +4181,13 @@
                                const int8_t *filter_y,
                                int32_t height)
 {
-    hevc_hv_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
-                               filter_x, filter_y, height, 16);
+    if (4 == height) {
+        hevc_hv_4t_8multx4_msa(src, src_stride, dst, dst_stride,
+                               filter_x, filter_y, 2);
+    } else {
+        hevc_hv_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
+                                   filter_x, filter_y, height, 2);
+    }
 }
 
 static void hevc_hv_4t_24w_msa(uint8_t *src,
@@ -3854,7 +4199,7 @@
                                int32_t height)
 {
     hevc_hv_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
-                               filter_x, filter_y, height, 24);
+                               filter_x, filter_y, height, 3);
 }
 
 static void hevc_hv_4t_32w_msa(uint8_t *src,
@@ -3866,7 +4211,7 @@
                                int32_t height)
 {
     hevc_hv_4t_8multx4mult_msa(src, src_stride, dst, dst_stride,
-                               filter_x, filter_y, height, 32);
+                               filter_x, filter_y, height, 4);
 }
 
 #define MC_COPY(WIDTH)                                                    \
@@ -3893,19 +4238,19 @@
 
 #undef MC_COPY
 
-#define MC(PEL, DIR, WIDTH, TAP, DIR1, FILT_DIR)                            \
-void ff_hevc_put_hevc_##PEL##_##DIR##WIDTH##_8_msa(int16_t *dst,            \
-                                                     uint8_t *src,          \
-                                                     ptrdiff_t src_stride,  \
-                                                     int height,            \
-                                                     intptr_t mx,           \
-                                                     intptr_t my,           \
-                                                     int width)             \
-{                                                                           \
-    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];           \
-                                                                            \
-    hevc_##DIR1##_##TAP##t_##WIDTH##w_msa(src, src_stride, dst,             \
-                                          MAX_PB_SIZE, filter, height);     \
+#define MC(PEL, DIR, WIDTH, TAP, DIR1, FILT_DIR)                          \
+void ff_hevc_put_hevc_##PEL##_##DIR##WIDTH##_8_msa(int16_t *dst,          \
+                                                   uint8_t *src,          \
+                                                   ptrdiff_t src_stride,  \
+                                                   int height,            \
+                                                   intptr_t mx,           \
+                                                   intptr_t my,           \
+                                                   int width)             \
+{                                                                         \
+    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];         \
+                                                                          \
+    hevc_##DIR1##_##TAP##t_##WIDTH##w_msa(src, src_stride, dst,           \
+                                          MAX_PB_SIZE, filter, height);   \
 }
 
 MC(qpel, h, 4, 8, hz, mx);
@@ -3944,37 +4289,37 @@
 
 #undef MC
 
-#define MC_HV(PEL, DIR, WIDTH, TAP, DIR1)                                     \
-void ff_hevc_put_hevc_##PEL##_##DIR##WIDTH##_8_msa(int16_t *dst,              \
-                                                     uint8_t *src,            \
-                                                     ptrdiff_t src_stride,    \
-                                                     int height,              \
-                                                     intptr_t mx,             \
-                                                     intptr_t my,             \
-                                                     int width)               \
-{                                                                             \
-    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];                 \
-    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];                 \
-                                                                              \
-    hevc_##DIR1##_##TAP##t_##WIDTH##w_msa(src, src_stride, dst, MAX_PB_SIZE,  \
-                                          filter_x, filter_y, height);        \
+#define MC_HV(PEL, WIDTH, TAP)                                          \
+void ff_hevc_put_hevc_##PEL##_hv##WIDTH##_8_msa(int16_t *dst,           \
+                                                uint8_t *src,           \
+                                                ptrdiff_t src_stride,   \
+                                                int height,             \
+                                                intptr_t mx,            \
+                                                intptr_t my,            \
+                                                int width)              \
+{                                                                       \
+    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];           \
+    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];           \
+                                                                        \
+    hevc_hv_##TAP##t_##WIDTH##w_msa(src, src_stride, dst, MAX_PB_SIZE,  \
+                                          filter_x, filter_y, height);  \
 }
 
-MC_HV(qpel, hv, 4, 8, hv);
-MC_HV(qpel, hv, 8, 8, hv);
-MC_HV(qpel, hv, 12, 8, hv);
-MC_HV(qpel, hv, 16, 8, hv);
-MC_HV(qpel, hv, 24, 8, hv);
-MC_HV(qpel, hv, 32, 8, hv);
-MC_HV(qpel, hv, 48, 8, hv);
-MC_HV(qpel, hv, 64, 8, hv);
+MC_HV(qpel, 4, 8);
+MC_HV(qpel, 8, 8);
+MC_HV(qpel, 12, 8);
+MC_HV(qpel, 16, 8);
+MC_HV(qpel, 24, 8);
+MC_HV(qpel, 32, 8);
+MC_HV(qpel, 48, 8);
+MC_HV(qpel, 64, 8);
 
-MC_HV(epel, hv, 4, 4, hv);
-MC_HV(epel, hv, 6, 4, hv);
-MC_HV(epel, hv, 8, 4, hv);
-MC_HV(epel, hv, 12, 4, hv);
-MC_HV(epel, hv, 16, 4, hv);
-MC_HV(epel, hv, 24, 4, hv);
-MC_HV(epel, hv, 32, 4, hv);
+MC_HV(epel, 4, 4);
+MC_HV(epel, 6, 4);
+MC_HV(epel, 8, 4);
+MC_HV(epel, 12, 4);
+MC_HV(epel, 16, 4);
+MC_HV(epel, 24, 4);
+MC_HV(epel, 32, 4);
 
 #undef MC_HV

diff --git a/libavcodec/mips/hpeldsp_mmi.c b/libavcodec/mips/hpeldsp_mmi.c
index 2dbef22..e69b2bd 100644
--- a/libavcodec/mips/hpeldsp_mmi.c
+++ b/libavcodec/mips/hpeldsp_mmi.c

@@ -29,36 +29,27 @@
 void ff_put_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels,
     ptrdiff_t line_size, int h)
 {
-    double ftmp[2];
-    mips_reg addr[2];
+    double ftmp[4];
     DECLARE_VAR_LOW32;
-    DECLARE_VAR_ADDRT;
 
     __asm__ volatile (
-        PTR_ADDU   "%[addr1],   %[line_size],   %[line_size]            \n\t"
         "1:                                                             \n\t"
-        PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
         MMI_ULWC1(%[ftmp0], %[pixels], 0x00)
-        MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
-        MMI_SWC1(%[ftmp0], %[block], 0x00)
-        MMI_SWXC1(%[ftmp1], %[block], %[line_size], 0x00)
-        PTR_ADDU   "%[pixels],  %[pixels],      %[addr1]                \n\t"
-        PTR_ADDU   "%[block],   %[block],       %[addr1]                \n\t"
+        PTR_ADDU   "%[pixels],   %[pixels],      %[line_size]           \n\t"
+        MMI_ULWC1(%[ftmp1], %[pixels], 0x00)
+        PTR_ADDU   "%[pixels],   %[pixels],      %[line_size]           \n\t"
 
-        PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
-        MMI_ULWC1(%[ftmp0], %[pixels], 0x00)
-        MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
-        MMI_SWC1(%[ftmp0], %[block], 0x00)
-        MMI_SWXC1(%[ftmp1], %[block], %[line_size], 0x00)
-        PTR_ADDU   "%[pixels],  %[pixels],      %[addr1]                \n\t"
-        PTR_ADDU   "%[block],   %[block],       %[addr1]                \n\t"
+        PTR_ADDI   "%[h],       %[h],           -0x02                   \n\t"
 
-        PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
+        MMI_SWC1(%[ftmp0], %[block], 0x00)
+        PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
+        MMI_SWC1(%[ftmp1], %[block], 0x00)
+        PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
+
         "bnez       %[h],       1b                                      \n\t"
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
+          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
           RESTRICT_ASM_LOW32
-          RESTRICT_ASM_ADDRT
-          [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
           [block]"+&r"(block),              [pixels]"+&r"(pixels),
           [h]"+&r"(h)
         : [line_size]"r"((mips_reg)line_size)
@@ -69,37 +60,35 @@
 void ff_put_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels,
     ptrdiff_t line_size, int h)
 {
-    double ftmp[2];
-    mips_reg addr[3];
+    double ftmp[4];
     DECLARE_VAR_ALL64;
 
     __asm__ volatile (
-        PTR_ADDU   "%[addr1],   %[line_size],   %[line_size]            \n\t"
         "1:                                                             \n\t"
         MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
-        PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
-        MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
-        MMI_SDC1(%[ftmp0], %[block], 0x00)
-        PTR_ADDU   "%[addr2],   %[block],       %[line_size]            \n\t"
-        MMI_SDC1(%[ftmp1], %[addr2], 0x00)
-        PTR_ADDU   "%[pixels],  %[pixels],      %[addr1]                \n\t"
-        PTR_ADDU   "%[block],   %[block],       %[addr1]                \n\t"
-
-        MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
-        PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
-        MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
-        MMI_SDC1(%[ftmp0], %[block], 0x00)
-        PTR_ADDU   "%[addr2],   %[block],       %[line_size]            \n\t"
-        MMI_SDC1(%[ftmp1], %[addr2], 0x00)
-        PTR_ADDU   "%[pixels],  %[pixels],      %[addr1]                \n\t"
-        PTR_ADDU   "%[block],   %[block],       %[addr1]                \n\t"
+        PTR_ADDU   "%[pixels],   %[pixels],      %[line_size]           \n\t"
+        MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
+        PTR_ADDU   "%[pixels],   %[pixels],      %[line_size]           \n\t"
+        MMI_ULDC1(%[ftmp2], %[pixels], 0x00)
+        PTR_ADDU   "%[pixels],   %[pixels],      %[line_size]           \n\t"
+        MMI_ULDC1(%[ftmp3], %[pixels], 0x00)
+        PTR_ADDU   "%[pixels],   %[pixels],      %[line_size]           \n\t"
 
         PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
+
+        MMI_SDC1(%[ftmp0], %[block], 0x00)
+        PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
+        MMI_SDC1(%[ftmp1], %[block], 0x00)
+        PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
+        MMI_SDC1(%[ftmp2], %[block], 0x00)
+        PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
+        MMI_SDC1(%[ftmp3], %[block], 0x00)
+        PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
+
         "bnez       %[h],       1b                                      \n\t"
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
+          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
           RESTRICT_ASM_ALL64
-          [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
-          [addr2]"=&r"(addr[2]),
           [block]"+&r"(block),              [pixels]"+&r"(pixels),
           [h]"+&r"(h)
         : [line_size]"r"((mips_reg)line_size)
@@ -110,45 +99,45 @@
 void ff_put_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels,
     ptrdiff_t line_size, int h)
 {
-    double ftmp[4];
-    mips_reg addr[2];
+    double ftmp[8];
     DECLARE_VAR_ALL64;
-    DECLARE_VAR_ADDRT;
 
     __asm__ volatile (
-        PTR_ADDU   "%[addr1],   %[line_size],   %[line_size]            \n\t"
-        "1:                                                             \n\t"
-        PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
+        "1:                                                            \n\t"
         MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
         MMI_ULDC1(%[ftmp2], %[pixels], 0x08)
-        MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
-        MMI_ULDC1(%[ftmp3], %[addr0], 0x08)
-        MMI_SDC1(%[ftmp0], %[block], 0x00)
-        MMI_SDXC1(%[ftmp1], %[block], %[line_size], 0x00)
-        MMI_SDC1(%[ftmp2], %[block], 0x08)
-        MMI_SDXC1(%[ftmp3], %[block], %[line_size], 0x08)
-        PTR_ADDU   "%[pixels],  %[pixels],      %[addr1]                \n\t"
-        PTR_ADDU   "%[block],   %[block],       %[addr1]                \n\t"
+        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]           \n\t"
+        MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
+        MMI_ULDC1(%[ftmp3], %[pixels], 0x08)
+        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]           \n\t"
+        MMI_ULDC1(%[ftmp4], %[pixels], 0x00)
+        MMI_ULDC1(%[ftmp6], %[pixels], 0x08)
+        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]           \n\t"
+        MMI_ULDC1(%[ftmp5], %[pixels], 0x00)
+        MMI_ULDC1(%[ftmp7], %[pixels], 0x08)
+        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]           \n\t"
 
-        PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
-        MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
-        MMI_ULDC1(%[ftmp2], %[pixels], 0x08)
-        MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
-        MMI_ULDC1(%[ftmp3], %[addr0], 0x08)
-        MMI_SDC1(%[ftmp0], %[block], 0x00)
-        MMI_SDXC1(%[ftmp1], %[block], %[line_size], 0x00)
-        MMI_SDC1(%[ftmp2], %[block], 0x08)
-        MMI_SDXC1(%[ftmp3], %[block], %[line_size], 0x08)
-        PTR_ADDU   "%[pixels],  %[pixels],      %[addr1]                \n\t"
-        PTR_ADDU   "%[block],   %[block],       %[addr1]                \n\t"
+        PTR_ADDI   "%[h],       %[h],           -0x04                  \n\t"
 
-        PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
-        "bnez       %[h],       1b                                      \n\t"
+        MMI_SDC1(%[ftmp0], %[block], 0x00)
+        MMI_SDC1(%[ftmp2], %[block], 0x08)
+        PTR_ADDU   "%[block],   %[block],       %[line_size]           \n\t"
+        MMI_SDC1(%[ftmp1], %[block], 0x00)
+        MMI_SDC1(%[ftmp3], %[block], 0x08)
+        PTR_ADDU   "%[block],   %[block],       %[line_size]           \n\t"
+        MMI_SDC1(%[ftmp4], %[block], 0x00)
+        MMI_SDC1(%[ftmp6], %[block], 0x08)
+        PTR_ADDU   "%[block],   %[block],       %[line_size]           \n\t"
+        MMI_SDC1(%[ftmp5], %[block], 0x00)
+        MMI_SDC1(%[ftmp7], %[block], 0x08)
+        PTR_ADDU   "%[block],   %[block],       %[line_size]           \n\t"
+
+        "bnez       %[h],       1b                                     \n\t"
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
+          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
+          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
           RESTRICT_ASM_ALL64
-          RESTRICT_ASM_ADDRT
-          [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
           [block]"+&r"(block),              [pixels]"+&r"(pixels),
           [h]"+&r"(h)
         : [line_size]"r"((mips_reg)line_size)
@@ -160,12 +149,10 @@
     ptrdiff_t line_size, int h)
 {
     double ftmp[4];
-    mips_reg addr[3];
+    mips_reg addr[2];
     DECLARE_VAR_LOW32;
-    DECLARE_VAR_ADDRT;
 
     __asm__ volatile (
-        PTR_ADDU   "%[addr2],   %[line_size],   %[line_size]            \n\t"
         "1:                                                             \n\t"
         PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
         MMI_ULWC1(%[ftmp0], %[pixels], 0x00)
@@ -173,34 +160,21 @@
         PTR_ADDU   "%[addr1],   %[block],       %[line_size]            \n\t"
         MMI_ULWC1(%[ftmp2], %[block], 0x00)
         MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
+
+        PTR_ADDI   "%[h],       %[h],           -0x02                   \n\t"
+
         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
         MMI_SWC1(%[ftmp0], %[block], 0x00)
-        MMI_SWXC1(%[ftmp1], %[block], %[line_size], 0x00)
-        PTR_ADDU   "%[pixels],  %[pixels],      %[addr2]                \n\t"
-        PTR_ADDU   "%[block],   %[block],       %[addr2]                \n\t"
+        MMI_SWC1(%[ftmp1], %[addr1], 0x00)
+        PTR_ADDU   "%[pixels],  %[addr0],       %[line_size]            \n\t"
+        PTR_ADDU   "%[block],   %[addr1],       %[line_size]            \n\t"
 
-        PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
-        MMI_ULWC1(%[ftmp0], %[pixels], 0x00)
-        MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
-        PTR_ADDU   "%[addr1],   %[block],       %[line_size]            \n\t"
-        MMI_ULWC1(%[ftmp2], %[block], 0x00)
-        MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
-        "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
-        "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
-        MMI_SWC1(%[ftmp0], %[block], 0x00)
-        MMI_SWXC1(%[ftmp1], %[block], %[line_size], 0x00)
-        PTR_ADDU   "%[pixels],  %[pixels],      %[addr2]                \n\t"
-        PTR_ADDU   "%[block],   %[block],       %[addr2]                \n\t"
-
-        PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
         "bnez       %[h],       1b                                      \n\t"
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
           RESTRICT_ASM_LOW32
-          RESTRICT_ASM_ADDRT
           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
-          [addr2]"=&r"(addr[2]),
           [block]"+&r"(block),              [pixels]"+&r"(pixels),
           [h]"+&r"(h)
         : [line_size]"r"((mips_reg)line_size)
@@ -264,65 +238,61 @@
     ptrdiff_t line_size, int h)
 {
     double ftmp[8];
-    mips_reg addr[3];
+    mips_reg addr[1];
     DECLARE_VAR_ALL64;
-    DECLARE_VAR_ADDRT;
 
     __asm__ volatile (
-        PTR_ADDU   "%[addr2],   %[line_size],   %[line_size]            \n\t"
         "1:                                                             \n\t"
-        MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
-        PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
-        MMI_ULDC1(%[ftmp4], %[pixels], 0x08)
-        PTR_ADDU   "%[addr1],   %[block],       %[line_size]            \n\t"
-        MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
-        MMI_ULDC1(%[ftmp5], %[addr0], 0x08)
-        MMI_ULDC1(%[ftmp2], %[block], 0x00)
-        MMI_ULDC1(%[ftmp6], %[block], 0x08)
-        MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
-        MMI_ULDC1(%[ftmp7], %[addr1], 0x08)
-        "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
-        "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
-        "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
-        "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
-        MMI_SDC1(%[ftmp0], %[block], 0x00)
-        MMI_SDXC1(%[ftmp1], %[block], %[line_size], 0x00)
-        MMI_SDC1(%[ftmp4], %[block], 0x08)
-        MMI_SDXC1(%[ftmp5], %[block], %[line_size], 0x08)
-        PTR_ADDU   "%[pixels],  %[pixels],      %[addr2]                \n\t"
-        PTR_ADDU   "%[block],   %[block],       %[addr2]                \n\t"
-
-        MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
-        PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
-        MMI_ULDC1(%[ftmp4], %[pixels], 0x08)
-        PTR_ADDU   "%[addr1],   %[block],       %[line_size]            \n\t"
-        MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
-        MMI_ULDC1(%[ftmp5], %[addr0], 0x08)
-        MMI_ULDC1(%[ftmp2], %[block], 0x00)
-        MMI_ULDC1(%[ftmp6], %[block], 0x08)
-        MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
-        MMI_ULDC1(%[ftmp7], %[addr1], 0x08)
-        "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
-        "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
-        "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
-        "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
-        MMI_SDC1(%[ftmp0], %[block], 0x00)
-        MMI_SDXC1(%[ftmp1], %[block], %[line_size], 0x00)
-        MMI_SDC1(%[ftmp4], %[block], 0x08)
-        MMI_SDXC1(%[ftmp5], %[block], %[line_size], 0x08)
-        PTR_ADDU   "%[pixels],  %[pixels],      %[addr2]                \n\t"
-        PTR_ADDU   "%[block],   %[block],       %[addr2]                \n\t"
-
         PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
+        MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
+        MMI_ULDC1(%[ftmp4], %[pixels], 0x08)
+        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
+        MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
+        MMI_ULDC1(%[ftmp5], %[pixels], 0x08)
+        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
+        MMI_ULDC1(%[ftmp2], %[block], 0x00)
+        MMI_ULDC1(%[ftmp6], %[block], 0x08)
+        PTR_ADDU   "%[addr0],   %[block],       %[line_size]            \n\t"
+        MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
+        MMI_ULDC1(%[ftmp7], %[addr0], 0x08)
+        "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
+        "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
+        "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
+        "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
+        MMI_SDC1(%[ftmp0], %[block], 0x00)
+        MMI_SDC1(%[ftmp4], %[block], 0x08)
+        MMI_SDC1(%[ftmp1], %[addr0], 0x00)
+        MMI_SDC1(%[ftmp5], %[addr0], 0x08)
+        PTR_ADDU   "%[block],   %[addr0],       %[line_size]            \n\t"
+
+        MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
+        MMI_ULDC1(%[ftmp4], %[pixels], 0x08)
+        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
+        MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
+        MMI_ULDC1(%[ftmp5], %[pixels], 0x08)
+        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
+        MMI_ULDC1(%[ftmp2], %[block], 0x00)
+        MMI_ULDC1(%[ftmp6], %[block], 0x08)
+        PTR_ADDU   "%[addr0],   %[block],       %[line_size]            \n\t"
+        MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
+        MMI_ULDC1(%[ftmp7], %[addr0], 0x08)
+        "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
+        "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
+        "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
+        "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
+        MMI_SDC1(%[ftmp0], %[block], 0x00)
+        MMI_SDC1(%[ftmp4], %[block], 0x08)
+        MMI_SDC1(%[ftmp1], %[addr0], 0x00)
+        MMI_SDC1(%[ftmp5], %[addr0], 0x08)
+        PTR_ADDU   "%[block],   %[addr0],       %[line_size]            \n\t"
+
         "bnez       %[h],       1b                                      \n\t"
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
           RESTRICT_ASM_ALL64
-          RESTRICT_ASM_ADDRT
-          [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
-          [addr2]"=&r"(addr[2]),
+          [addr0]"=&r"(addr[0]),
           [block]"+&r"(block),              [pixels]"+&r"(pixels),
           [h]"+&r"(h)
         : [line_size]"r"((mips_reg)line_size)
@@ -337,50 +307,33 @@
     double ftmp[4];
     mips_reg addr[5];
     DECLARE_VAR_LOW32;
-    DECLARE_VAR_ADDRT;
 
     __asm__ volatile (
-        PTR_ADDU   "%[addr2],   %[src_stride1], %[src_stride1]          \n\t"
-        PTR_ADDU   "%[addr3],   %[src_stride2], %[src_stride2]          \n\t"
-        PTR_ADDU   "%[addr4],   %[dst_stride],  %[dst_stride]           \n\t"
         "1:                                                             \n\t"
         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
         MMI_ULWC1(%[ftmp0], %[src1], 0x00)
         MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
-        MMI_ULWC1(%[ftmp2], %[src2], 0x00)
         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
+        MMI_ULWC1(%[ftmp2], %[src2], 0x00)
         MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
-        PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
+        PTR_ADDU   "%[src1],    %[addr0],       %[src_stride1]          \n\t"
+        PTR_ADDU   "%[src2],    %[addr1],       %[src_stride2]          \n\t"
+
+        PTR_ADDI   "%[h],       %[h],           -0x02                   \n\t"
+
         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
         MMI_SWC1(%[ftmp0], %[dst], 0x00)
-        MMI_SWXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
-        PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
-        PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
+        PTR_ADDU   "%[dst],     %[dst],         %[dst_stride]           \n\t"
+        MMI_SWC1(%[ftmp1], %[dst], 0x00)
+        PTR_ADDU   "%[dst],     %[dst],         %[dst_stride]           \n\t"
 
-        PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
-        MMI_ULWC1(%[ftmp0], %[src1], 0x00)
-        MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
-        MMI_ULWC1(%[ftmp2], %[src2], 0x00)
-        PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
-        MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
-        PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
-        "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
-        "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
-        MMI_SWC1(%[ftmp0], %[dst], 0x00)
-        MMI_SWXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
-        PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
-        PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
-
-        PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
         "bnez       %[h],       1b                                      \n\t"
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
           RESTRICT_ASM_LOW32
           RESTRICT_ASM_ADDRT
           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
-          [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
-          [addr4]"=&r"(addr[4]),
           [dst]"+&r"(dst),                  [src1]"+&r"(src1),
           [src2]"+&r"(src2),                [h]"+&r"(h)
         : [dst_stride]"r"((mips_reg)dst_stride),
@@ -537,62 +490,36 @@
     double ftmp[6];
     mips_reg addr[6];
     DECLARE_VAR_LOW32;
-    DECLARE_VAR_ADDRT;
 
     __asm__ volatile (
-        PTR_ADDU   "%[addr2],   %[src_stride1], %[src_stride1]          \n\t"
-        PTR_ADDU   "%[addr3],   %[src_stride2], %[src_stride2]          \n\t"
-        PTR_ADDU   "%[addr4],   %[dst_stride],  %[dst_stride]           \n\t"
-
         "1:                                                             \n\t"
         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
         MMI_ULWC1(%[ftmp0], %[src1], 0x00)
         MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
-        MMI_ULWC1(%[ftmp2], %[src2], 0x00)
         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
+        MMI_ULWC1(%[ftmp2], %[src2], 0x00)
         MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
-        PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
+        PTR_ADDU   "%[src1],    %[addr0],       %[src_stride1]          \n\t"
+        PTR_ADDU   "%[src2],    %[addr1],       %[src_stride2]          \n\t"
         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
-        PTR_ADDU   "%[addr5],   %[dst],         %[dst_stride]           \n\t"
+        PTR_ADDU   "%[addr2],   %[dst],         %[dst_stride]           \n\t"
         MMI_ULWC1(%[ftmp4], %[dst], 0x00)
-        MMI_ULWC1(%[ftmp5], %[addr5], 0x00)
+        MMI_ULWC1(%[ftmp5], %[addr2], 0x00)
+        PTR_ADDI   "%[h],       %[h],           -0x02                   \n\t"
         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
         MMI_SWC1(%[ftmp0], %[dst], 0x00)
-        MMI_SWXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
-        PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
-        PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
+        MMI_SWC1(%[ftmp1], %[addr2], 0x00)
+        PTR_ADDU   "%[dst],     %[addr2],       %[dst_stride]           \n\t"
 
-        PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
-        MMI_ULWC1(%[ftmp0], %[src1], 0x00)
-        MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
-        MMI_ULWC1(%[ftmp2], %[src2], 0x00)
-        PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
-        MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
-        PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
-        "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
-        "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
-        PTR_ADDU   "%[addr5],   %[dst],         %[dst_stride]           \n\t"
-        MMI_ULWC1(%[ftmp4], %[dst], 0x00)
-        MMI_ULWC1(%[ftmp5], %[addr5], 0x00)
-        "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
-        "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
-        MMI_SWC1(%[ftmp0], %[dst], 0x00)
-        MMI_SWXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
-        PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
-        PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
-
-        PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
         "bnez       %[h],       1b                                      \n\t"
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
           RESTRICT_ASM_LOW32
-          RESTRICT_ASM_ADDRT
           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
-          [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
-          [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
+          [addr2]"=&r"(addr[2]),
           [dst]"+&r"(dst),                  [src1]"+&r"(src1),
           [src2]"+&r"(src2),                [h]"+&r"(h)
         : [dst_stride]"r"((mips_reg)dst_stride),

diff --git a/libavcodec/mips/idctdsp_init_mips.c b/libavcodec/mips/idctdsp_init_mips.c
index 8c26bca..bb33b55 100644
--- a/libavcodec/mips/idctdsp_init_mips.c
+++ b/libavcodec/mips/idctdsp_init_mips.c

@@ -20,6 +20,7 @@
  */
 
 #include "idctdsp_mips.h"
+#include "xvididct_mips.h"
 
 #if HAVE_MSA
 static av_cold void idctdsp_init_msa(IDCTDSPContext *c, AVCodecContext *avctx,
@@ -48,8 +49,10 @@
     if ((avctx->lowres != 1) && (avctx->lowres != 2) && (avctx->lowres != 3) &&
         (avctx->bits_per_raw_sample != 10) &&
         (avctx->bits_per_raw_sample != 12) &&
-        (avctx->idct_algo == FF_IDCT_AUTO)) {
-                c->idct = ff_simple_idct_mmi;
+        ((avctx->idct_algo == FF_IDCT_AUTO) || (avctx->idct_algo == FF_IDCT_SIMPLE))) {
+                c->idct_put = ff_simple_idct_put_8_mmi;
+                c->idct_add = ff_simple_idct_add_8_mmi;
+                c->idct = ff_simple_idct_8_mmi;
                 c->perm_type = FF_IDCT_PERM_NONE;
     }
 

diff --git a/libavcodec/mips/idctdsp_mips.h b/libavcodec/mips/idctdsp_mips.h
index 7ca7c1c..829efeb 100644
--- a/libavcodec/mips/idctdsp_mips.h
+++ b/libavcodec/mips/idctdsp_mips.h

@@ -46,8 +46,8 @@
         uint8_t *av_restrict pixels, ptrdiff_t line_size);
 void ff_add_pixels_clamped_mmi(const int16_t *block,
         uint8_t *av_restrict pixels, ptrdiff_t line_size);
-void ff_simple_idct_mmi(int16_t *block);
-void ff_simple_idct_put_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
-void ff_simple_idct_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_8_mmi(int16_t *block);
+void ff_simple_idct_put_8_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_add_8_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
 
 #endif  // #ifndef AVCODEC_MIPS_IDCTDSP_MIPS_H

diff --git a/libavcodec/mips/idctdsp_mmi.c b/libavcodec/mips/idctdsp_mmi.c
index b797965..a96dac4 100644
--- a/libavcodec/mips/idctdsp_mmi.c
+++ b/libavcodec/mips/idctdsp_mmi.c

@@ -29,9 +29,6 @@
         uint8_t *av_restrict pixels, ptrdiff_t line_size)
 {
     double ftmp[8];
-    mips_reg addr[1];
-    DECLARE_VAR_ALL64;
-    DECLARE_VAR_ADDRT;
 
     __asm__ volatile (
         MMI_LDC1(%[ftmp0], %[block], 0x00)
@@ -42,60 +39,44 @@
         MMI_LDC1(%[ftmp5], %[block], 0x28)
         MMI_LDC1(%[ftmp6], %[block], 0x30)
         MMI_LDC1(%[ftmp7], %[block], 0x38)
-        PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
         MMI_SDC1(%[ftmp0], %[pixels], 0x00)
-        MMI_SDC1(%[ftmp2], %[addr0], 0x00)
-        MMI_SDXC1(%[ftmp4], %[addr0], %[line_size], 0x00)
-        MMI_SDXC1(%[ftmp6], %[pixels], %[line_sizex3], 0x00)
-        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
-          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
-          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
-          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
-          RESTRICT_ASM_ALL64
-          RESTRICT_ASM_ADDRT
-          [addr0]"=&r"(addr[0]),
-          [pixels]"+&r"(pixels)
-        : [line_size]"r"((mips_reg)line_size),
-          [line_sizex3]"r"((mips_reg)(line_size*3)),
-          [block]"r"(block)
-        : "memory"
-    );
+        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
+        MMI_SDC1(%[ftmp2], %[pixels], 0x00)
+        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
+        MMI_SDC1(%[ftmp4], %[pixels], 0x00)
+        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
+        MMI_SDC1(%[ftmp6], %[pixels], 0x00)
+        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
 
-    pixels += line_size*4;
-    block += 32;
-
-    __asm__ volatile (
-        MMI_LDC1(%[ftmp0], %[block], 0x00)
-        MMI_LDC1(%[ftmp1], %[block], 0x08)
-        MMI_LDC1(%[ftmp2], %[block], 0x10)
-        MMI_LDC1(%[ftmp3], %[block], 0x18)
-        MMI_LDC1(%[ftmp4], %[block], 0x20)
-        MMI_LDC1(%[ftmp5], %[block], 0x28)
-        MMI_LDC1(%[ftmp6], %[block], 0x30)
-        MMI_LDC1(%[ftmp7], %[block], 0x38)
-        PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
+        MMI_LDC1(%[ftmp0], %[block], 0x40)
+        MMI_LDC1(%[ftmp1], %[block], 0x48)
+        MMI_LDC1(%[ftmp2], %[block], 0x50)
+        MMI_LDC1(%[ftmp3], %[block], 0x58)
+        MMI_LDC1(%[ftmp4], %[block], 0x60)
+        MMI_LDC1(%[ftmp5], %[block], 0x68)
+        MMI_LDC1(%[ftmp6], %[block], 0x70)
+        MMI_LDC1(%[ftmp7], %[block], 0x78)
         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
         MMI_SDC1(%[ftmp0], %[pixels], 0x00)
-        MMI_SDC1(%[ftmp2], %[addr0], 0x00)
-        MMI_SDXC1(%[ftmp4], %[addr0], %[line_size], 0x00)
-        MMI_SDXC1(%[ftmp6], %[pixels], %[line_sizex3], 0x00)
+        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
+        MMI_SDC1(%[ftmp2], %[pixels], 0x00)
+        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
+        MMI_SDC1(%[ftmp4], %[pixels], 0x00)
+        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
+        MMI_SDC1(%[ftmp6], %[pixels], 0x00)
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
-          RESTRICT_ASM_ALL64
-          RESTRICT_ASM_ADDRT
-          [addr0]"=&r"(addr[0]),
           [pixels]"+&r"(pixels)
         : [line_size]"r"((mips_reg)line_size),
-          [line_sizex3]"r"((mips_reg)(line_size*3)),
           [block]"r"(block)
         : "memory"
     );
@@ -104,15 +85,9 @@
 void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
     uint8_t *av_restrict pixels, ptrdiff_t line_size)
 {
-    int64_t line_skip = line_size;
-    int64_t line_skip3 = 0;
     double ftmp[5];
-    mips_reg addr[1];
-    DECLARE_VAR_ALL64;
-    DECLARE_VAR_ADDRT;
 
     __asm__ volatile (
-        PTR_ADDU   "%[line_skip3],  %[line_skip],   %[line_skip]        \n\t"
         MMI_LDC1(%[ftmp1], %[block], 0x00)
         MMI_LDC1(%[ftmp0], %[block], 0x08)
         "packsshb   %[ftmp1],       %[ftmp1],       %[ftmp0]            \n\t"
@@ -130,12 +105,14 @@
         "paddb      %[ftmp3],       %[ftmp3],       %[ff_pb_80]         \n\t"
         "paddb      %[ftmp4],       %[ftmp4],       %[ff_pb_80]         \n\t"
         MMI_SDC1(%[ftmp1], %[pixels], 0x00)
-        MMI_SDXC1(%[ftmp2], %[pixels], %[line_skip], 0x00)
-        MMI_SDXC1(%[ftmp3], %[pixels], %[line_skip3], 0x00)
-        PTR_ADDU   "%[line_skip3],  %[line_skip3],  %[line_skip]        \n\t"
-        MMI_SDXC1(%[ftmp4], %[pixels], %[line_skip3], 0x00)
-        PTR_ADDU   "%[addr0],       %[line_skip3],  %[line_skip]        \n\t"
-        PTR_ADDU   "%[pixels],      %[pixels],      %[addr0]            \n\t"
+        PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
+        MMI_SDC1(%[ftmp2], %[pixels], 0x00)
+        PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
+        MMI_SDC1(%[ftmp3], %[pixels], 0x00)
+        PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
+        MMI_SDC1(%[ftmp4], %[pixels], 0x00)
+        PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
+
         MMI_LDC1(%[ftmp1], %[block], 0x40)
         MMI_LDC1(%[ftmp0], %[block], 0x48)
         "packsshb   %[ftmp1],       %[ftmp1],       %[ftmp0]            \n\t"
@@ -153,19 +130,18 @@
         "paddb      %[ftmp3],       %[ftmp3],       %[ff_pb_80]         \n\t"
         "paddb      %[ftmp4],       %[ftmp4],       %[ff_pb_80]         \n\t"
         MMI_SDC1(%[ftmp1], %[pixels], 0x00)
-        MMI_SDXC1(%[ftmp2], %[pixels], %[line_skip], 0x00)
-        PTR_ADDU   "%[addr0],       %[line_skip],   %[line_skip]        \n\t"
-        MMI_SDXC1(%[ftmp3], %[pixels], %[addr0], 0x00)
-        MMI_SDXC1(%[ftmp4], %[pixels], %[line_skip3], 0x00)
+        PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
+        MMI_SDC1(%[ftmp2], %[pixels], 0x00)
+        PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
+        MMI_SDC1(%[ftmp3], %[pixels], 0x00)
+        PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
+        MMI_SDC1(%[ftmp4], %[pixels], 0x00)
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
           [ftmp4]"=&f"(ftmp[4]),
-          RESTRICT_ASM_ALL64
-          RESTRICT_ASM_ADDRT
-          [addr0]"=&r"(addr[0]),
-          [pixels]"+&r"(pixels),            [line_skip3]"+&r"(line_skip3)
+          [pixels]"+&r"(pixels)
         : [block]"r"(block),
-          [line_skip]"r"((mips_reg)line_skip),
+          [line_size]"r"((mips_reg)line_size),
           [ff_pb_80]"f"(ff_pb_80)
         : "memory"
     );
@@ -174,49 +150,42 @@
 void ff_add_pixels_clamped_mmi(const int16_t *block,
         uint8_t *av_restrict pixels, ptrdiff_t line_size)
 {
-    double ftmp[8];
+    double ftmp[9];
     uint64_t tmp[1];
-    mips_reg addr[1];
-    DECLARE_VAR_ALL64;
-    DECLARE_VAR_ADDRT;
-
     __asm__ volatile (
-        "li         %[tmp0],    0x04                                    \n\t"
-        "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
-        "1:                                                             \n\t"
+        "li         %[tmp0],    0x04                           \n\t"
+        "xor        %[ftmp0],   %[ftmp0],   %[ftmp0]           \n\t"
+        "1:                                                    \n\t"
+        MMI_LDC1(%[ftmp5], %[pixels], 0x00)
+        PTR_ADDU   "%[pixels],  %[pixels],  %[line_size]       \n\t"
+        MMI_LDC1(%[ftmp6], %[pixels], 0x00)
+        PTR_SUBU   "%[pixels],  %[pixels],  %[line_size]       \n\t"
         MMI_LDC1(%[ftmp1], %[block], 0x00)
         MMI_LDC1(%[ftmp2], %[block], 0x08)
         MMI_LDC1(%[ftmp3], %[block], 0x10)
         MMI_LDC1(%[ftmp4], %[block], 0x18)
-        MMI_LDC1(%[ftmp5], %[pixels], 0x00)
-        MMI_LDXC1(%[ftmp6], %[pixels], %[line_size], 0x00)
-        "mov.d      %[ftmp7],   %[ftmp5]                                \n\t"
-        "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
-        "punpckhbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
-        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
-        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
-        "mov.d      %[ftmp7],   %[ftmp6]                                \n\t"
-        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
-        "punpckhbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
-        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
-        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
-        "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
-        "packushb   %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
+        PTR_ADDIU  "%[block],   %[block],   0x20               \n\t"
+        "punpckhbh  %[ftmp7],   %[ftmp5],   %[ftmp0]           \n\t"
+        "punpcklbh  %[ftmp5],   %[ftmp5],   %[ftmp0]           \n\t"
+        "punpckhbh  %[ftmp8],   %[ftmp6],   %[ftmp0]           \n\t"
+        "punpcklbh  %[ftmp6],   %[ftmp6],   %[ftmp0]           \n\t"
+        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp5]           \n\t"
+        "paddh      %[ftmp2],   %[ftmp2],   %[ftmp7]           \n\t"
+        "paddh      %[ftmp3],   %[ftmp3],   %[ftmp6]           \n\t"
+        "paddh      %[ftmp4],   %[ftmp4],   %[ftmp8]           \n\t"
+        "packushb   %[ftmp1],   %[ftmp1],   %[ftmp2]           \n\t"
+        "packushb   %[ftmp3],   %[ftmp3],   %[ftmp4]           \n\t"
         MMI_SDC1(%[ftmp1], %[pixels], 0x00)
-        MMI_SDXC1(%[ftmp3], %[pixels], %[line_size], 0x00)
-        "addi       %[tmp0],    %[tmp0],        -0x01                   \n\t"
-        PTR_ADDIU  "%[block],   %[block],       0x20                    \n\t"
-        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
-        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
-        "bnez       %[tmp0],    1b"
+        PTR_ADDU   "%[pixels],  %[pixels],  %[line_size]       \n\t"
+        MMI_SDC1(%[ftmp3], %[pixels], 0x00)
+        "addi       %[tmp0],    %[tmp0],    -0x01              \n\t"
+        PTR_ADDU   "%[pixels],  %[pixels],  %[line_size]       \n\t"
+        "bnez       %[tmp0],    1b                             \n\t"
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
-          [tmp0]"=&r"(tmp[0]),
-          RESTRICT_ASM_ALL64
-          RESTRICT_ASM_ADDRT
-          [addr0]"=&r"(addr[0]),
+          [ftmp8]"=&f"(ftmp[8]),            [tmp0]"=&r"(tmp[0]),
           [pixels]"+&r"(pixels),            [block]"+&r"(block)
         : [line_size]"r"((mips_reg)line_size)
         : "memory"

diff --git a/libavcodec/mips/simple_idct_mmi.c b/libavcodec/mips/simple_idct_mmi.c
index 628e13f..7f4bb74 100644
--- a/libavcodec/mips/simple_idct_mmi.c
+++ b/libavcodec/mips/simple_idct_mmi.c

@@ -24,793 +24,400 @@
 
 #include "idctdsp_mips.h"
 #include "constants.h"
+#include "libavutil/mips/asmdefs.h"
+#include "libavutil/mips/mmiutils.h"
 
-#define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
-#define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C6 8867  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C7 4520  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
 
 #define ROW_SHIFT 11
 #define COL_SHIFT 20
+#define DC_SHIFT 3
 
-DECLARE_ALIGNED(8, static const int16_t, coeffs)[]= {
-    1<<(ROW_SHIFT-1),   0, 1<<(ROW_SHIFT-1),   0,
-    1<<(ROW_SHIFT-1),   1, 1<<(ROW_SHIFT-1),   0,
-                  C4,  C4,               C4,  C4,
-                  C4, -C4,               C4, -C4,
-                  C2,  C6,               C2,  C6,
-                  C6, -C2,               C6, -C2,
-                  C1,  C3,               C1,  C3,
-                  C5,  C7,               C5,  C7,
-                  C3, -C7,               C3, -C7,
-                 -C1, -C5,              -C1, -C5,
-                  C5, -C1,               C5, -C1,
-                  C7,  C3,               C7,  C3,
-                  C7, -C5,               C7, -C5,
-                  C3, -C1,               C3, -C1
+DECLARE_ALIGNED(8, const int16_t, W_arr)[46] = {
+    W4,  W2,  W4,  W6,
+    W1,  W3,  W5,  W7,
+    W4,  W6, -W4, -W2,
+    W3, -W7, -W1, -W5,
+    W4, -W6, -W4,  W2,
+    W5, -W1,  W7,  W3,
+    W4, -W2,  W4, -W6,
+    W7, -W5,  W3, -W1,
+    1024, 0,  1024, 0, //ff_p32_1024 = 0x0000040000000400ULL
+    0,   -1,  -1,  -1, //mask = 0xffffffffffff0000ULL
+    32,  32,  32,  32  //ff_p16_32 = 0x0020002000200020ULL
 };
 
-void ff_simple_idct_mmi(int16_t *block)
+void ff_simple_idct_8_mmi(int16_t *block)
 {
-        DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
-        int16_t * const temp= (int16_t*)align_tmp;
+    BACKUP_REG
+    __asm__ volatile (
 
-        __asm__ volatile (
-#undef  DC_COND_IDCT
-#define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, rarg, shift)      \
-        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
-        "ldc1 $f2, " #src4 "            \n\t" /* R6     R2      r6      r2 */\
-        "ldc1 $f4, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
-        "ldc1 $f6, " #src5 "            \n\t" /* R7     R5      r7      r5 */\
-        "ldc1 $f8, %3                   \n\t"                                \
-        "and  $f8, $f8, $f0             \n\t"                                \
-        "or $f8, $f8, $f2               \n\t"                                \
-        "or $f8, $f8, $f4               \n\t"                                \
-        "or $f8, $f8, $f6               \n\t"                                \
-        "packsswh $f8, $f8, $f8         \n\t"                                \
-        "li $11, " #shift "             \n\t"                                \
-        "mfc1 $10, $f8                  \n\t"                                \
-        "mtc1 $11, $f18                 \n\t"                                \
-        "beqz $10, 1f                   \n\t"                                \
-        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
-        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
-        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "ldc1 $f10, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
-        "pmaddhw $f10, $f10, $f2        \n\t" /* C6R6+C2R2      C6r6+C2r2  */\
-        "ldc1 $f12, 40(%2)              \n\t" /* -C2    C6      -C2     C6 */\
-        "pmaddhw $f2, $f2, $f12         \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
-        "ldc1 $f14, 48(%2)              \n\t" /* C3     C1      C3      C1 */\
-        "ldc1 $f16, " #rarg "           \n\t"                                \
-        "pmaddhw $f14, $f14, $f4        \n\t" /* C3R3+C1R1      C3r3+C1r1  */\
-        #rounder " $f8, $f8, $f16       \n\t"                                \
-        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "paddw $f8, $f8, $f10           \n\t" /* A0             a0         */\
-        "psubw $f12, $f12, $f10         \n\t" /* A3             a3         */\
-        "ldc1 $f10, 56(%2)              \n\t" /* C7     C5      C7      C5 */\
-        "ldc1 $f16, " #rarg "           \n\t"                                \
-        "pmaddhw $f10, $f10, $f6        \n\t" /* C7R7+C5R5      C7r7+C5r5  */\
-        #rounder " $f0, $f0, $f16       \n\t"                                \
-        "paddw $f2, $f2, $f0            \n\t" /* A1             a1         */\
-        "ldc1 $f16, 64(%2)              \n\t"                                \
-        "paddw $f0, $f0, $f0            \n\t"                                \
-        "psubw $f0, $f0, $f2            \n\t" /* A2             a2         */\
-        "pmaddhw $f4, $f4, $f16         \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
-        "paddw $f14, $f14, $f10         \n\t" /* B0             b0         */\
-        "ldc1 $f10, 72(%2)              \n\t" /* -C5    -C1     -C5    -C1 */\
-        "pmaddhw $f10, $f10, $f6        \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
-        "paddw $f14, $f14, $f8          \n\t" /* A0+B0          a0+b0      */\
-        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
-        "psubw $f8, $f8, $f14           \n\t" /* A0-B0          a0-b0      */\
-        "paddw $f10, $f10, $f4          \n\t" /* B1             b1         */\
-        "psraw $f14, $f14, $f18         \n\t"                                \
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "mov.d $f4, $f2                 \n\t" /* A1             a1         */\
-        "paddw $f2, $f2, $f10           \n\t" /* A1+B1          a1+b1      */\
-        "psubw $f4, $f4, $f10           \n\t" /* A1-B1          a1-b1      */\
-        "psraw $f2, $f2, $f18           \n\t"                                \
-        "psraw $f4, $f4, $f18           \n\t"                                \
-        "packsswh $f14, $f14, $f2       \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0   */\
-        "packsswh $f4, $f4, $f8         \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1   */\
-        "sdc1 $f14, " #dst "            \n\t"                                \
-        "ldc1 $f2, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
-        "ldc1 $f8, 80(%2)               \n\t" /* -C1    C5      -C1     C5 */\
-        "sdc1 $f4, 24+" #dst "          \n\t"                                \
-        "pmaddhw $f8, $f8, $f2          \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
-        "ldc1 $f16, 96(%2)              \n\t"                                \
-        "ldc1 $f14, 88(%2)              \n\t" /* C3     C7      C3      C7 */\
-        "pmaddhw $f2, $f2, $f16         \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
-        "pmaddhw $f14, $f14, $f6        \n\t" /* C3R7+C7R5      C3r7+C7r5  */\
-        "ldc1 $f16, 104(%2)             \n\t"                                \
-        "mov.d $f4, $f0                 \n\t" /* A2             a2         */\
-        "pmaddhw $f6, $f6, $f16         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
-        "paddw $f8, $f8, $f14           \n\t" /* B2             b2         */\
-        "paddw $f4, $f4, $f8            \n\t" /* A2+B2          a2+b2      */\
-        "psubw $f0, $f0, $f8            \n\t" /* a2-B2          a2-b2      */\
-        "psraw $f4, $f4, $f18           \n\t"                                \
-        "psraw $f0, $f0, $f18           \n\t"                                \
-        "mov.d $f8, $f12                \n\t" /* A3             a3         */\
-        "paddw $f6, $f6, $f2            \n\t" /* B3             b3         */\
-        "paddw $f12, $f12, $f6          \n\t" /* A3+B3          a3+b3      */\
-        "psubw $f8, $f8, $f6            \n\t" /* a3-B3          a3-b3      */\
-        "psraw $f12, $f12, $f18         \n\t"                                \
-        "packsswh $f4, $f4, $f12        \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2   */\
-        "sdc1 $f4, 8+" #dst "           \n\t"                                \
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "packsswh $f8, $f8, $f0         \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3   */\
-        "sdc1 $f8, 16+" #dst "          \n\t"                                \
-        "b 2f                           \n\t"                                \
-        "1:                             \n\t"                                \
-        "li $10, 16                     \n\t"                                \
-        "mtc1 $10, $f16                 \n\t"                                \
-        "psllw $f0, $f0, $f16           \n\t"                                \
-        "ldc1 $f16, %4                  \n\t"                                \
-        "paddw $f0, $f0, $f16           \n\t"                                \
-        "li $10, 13                     \n\t"                                \
-        "mtc1 $10, $f16                 \n\t"                                \
-        "psraw $f0, $f0, $f16           \n\t"                                \
-        "packsswh $f0, $f0, $f0         \n\t"                                \
-        "sdc1 $f0, " #dst "             \n\t"                                \
-        "sdc1 $f0, 8+" #dst "           \n\t"                                \
-        "sdc1 $f0, 16+" #dst "          \n\t"                                \
-        "sdc1 $f0, 24+" #dst "          \n\t"                                \
-        "2:                             \n\t"
+#define IDCT_ROW_COND_DC(src1, src2)                                  \
+        "dmfc1        $11,      "#src1"                         \n\t" \
+        "dmfc1        $12,      "#src2"                         \n\t" \
+        "and          $11,       $11,       $9                  \n\t" \
+        "or           $10,       $11,       $12                 \n\t" \
+        "beqz         $10,       1f                             \n\t" \
+                                                                      \
+        "punpcklhw    $f30,     "#src1",   "#src2"              \n\t" \
+        "punpckhhw    $f31,     "#src1",   "#src2"              \n\t" \
+        /* s6, s4, s2, s0 */                                          \
+        "punpcklhw   "#src1",    $f30,      $f31                \n\t" \
+        /* s7, s5, s3, s1 */                                          \
+        "punpckhhw   "#src2",    $f30,      $f31                \n\t" \
+                                                                      \
+        "pmaddhw      $f30,     "#src1",    $f18                \n\t" \
+        "pmaddhw      $f31,     "#src2",    $f19                \n\t" \
+        "paddw        $f28,      $f30,      $f31                \n\t" \
+        "psubw        $f29,      $f30,      $f31                \n\t" \
+        "punpcklwd    $f30,      $f28,      $f29                \n\t" \
+        "punpckhwd    $f31,      $f28,      $f29                \n\t" \
+        "paddw        $f26,      $f30,      $f31                \n\t" \
+        "paddw        $f26,      $f26,      $f16                \n\t" \
+        /* $f26: src[7], src[0] */                                    \
+        "psraw        $f26,      $f26,      $f17                \n\t" \
+                                                                      \
+        "pmaddhw      $f30,     "#src1",    $f20                \n\t" \
+        "pmaddhw      $f31,     "#src2",    $f21                \n\t" \
+        "paddw        $f28,      $f30,      $f31                \n\t" \
+        "psubw        $f29,      $f30,      $f31                \n\t" \
+        "punpcklwd    $f30,      $f28,      $f29                \n\t" \
+        "punpckhwd    $f31,      $f28,      $f29                \n\t" \
+        "paddw        $f27,      $f30,      $f31                \n\t" \
+        "paddw        $f27,      $f27,      $f16                \n\t" \
+        /* $f27: src[6], src[1] */                                    \
+        "psraw        $f27,      $f27,      $f17                \n\t" \
+                                                                      \
+        "pmaddhw      $f30,     "#src1",    $f22                \n\t" \
+        "pmaddhw      $f31,     "#src2",    $f23                \n\t" \
+        "paddw        $f28,      $f30,      $f31                \n\t" \
+        "psubw        $f29,      $f30,      $f31                \n\t" \
+        "punpcklwd    $f30,      $f28,      $f29                \n\t" \
+        "punpckhwd    $f31,      $f28,      $f29                \n\t" \
+        "paddw        $f28,      $f30,      $f31                \n\t" \
+        "paddw        $f28,      $f28,      $f16                \n\t" \
+        /* $f28: src[5], src[2] */                                    \
+        "psraw        $f28,      $f28,      $f17                \n\t" \
+                                                                      \
+        "pmaddhw      $f30,     "#src1",    $f24                \n\t" \
+        "pmaddhw      $f31,     "#src2",    $f25                \n\t" \
+        "paddw       "#src1",    $f30,      $f31                \n\t" \
+        "psubw       "#src2",    $f30,      $f31                \n\t" \
+        "punpcklwd    $f30,     "#src1",   "#src2"              \n\t" \
+        "punpckhwd    $f31,     "#src1",   "#src2"              \n\t" \
+        "paddw        $f29,      $f30,      $f31                \n\t" \
+        "paddw        $f29,      $f29,      $f16                \n\t" \
+        /* $f29: src[4], src[3] */                                    \
+        "psraw        $f29,      $f29,      $f17                \n\t" \
+                                                                      \
+        "punpcklhw   "#src1",    $f26,      $f27                \n\t" \
+        "punpckhhw    $f30,      $f27,      $f26                \n\t" \
+        "punpcklhw    $f31,      $f28,      $f29                \n\t" \
+        "punpckhhw   "#src2",    $f29,      $f28                \n\t" \
+        /* src[3], src[2], src[1], src[0] */                          \
+        "punpcklwd   "#src1",   "#src1",    $f31                \n\t" \
+        /* src[7], src[6], src[5], src[4] */                          \
+        "punpcklwd   "#src2",   "#src2",    $f30                \n\t" \
+        "j                       2f                             \n\t" \
+                                                                      \
+        "1:                                                     \n\t" \
+        "li           $10,       3                              \n\t" \
+        "dmtc1        $10,       $f30                           \n\t" \
+        "psllh        $f28,     "#src1",    $f30                \n\t" \
+        "dmtc1        $9,        $f31                           \n\t" \
+        "punpcklhw    $f29,      $f28,      $f28                \n\t" \
+        "and          $f29,      $f29,      $f31                \n\t" \
+        "paddw        $f28,      $f28,      $f29                \n\t" \
+        "punpcklwd   "#src1",    $f28,      $f28                \n\t" \
+        "punpcklwd   "#src2",    $f28,      $f28                \n\t" \
+        "2:                                                     \n\t" \
 
-#undef  Z_COND_IDCT
-#define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, rarg, shift, bt)   \
-        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
-        "ldc1 $f2, " #src4 "            \n\t" /* R6     R2      r6      r2 */\
-        "ldc1 $f4, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
-        "ldc1 $f6, " #src5 "            \n\t" /* R7     R5      r7      r5 */\
-        "mov.d $f8, $f0                 \n\t"                                \
-        "or $f8, $f8, $f2               \n\t"                                \
-        "or $f8, $f8, $f4               \n\t"                                \
-        "or $f8, $f8, $f6               \n\t"                                \
-        "packsswh $f8, $f8, $f8         \n\t"                                \
-        "mfc1 $10, $f8                  \n\t"                                \
-        "beqz $10, " #bt "              \n\t"                                \
-        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
-        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
-        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "ldc1 $f10, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
-        "pmaddhw $f10, $f10, $f2        \n\t" /* C6R6+C2R2      C6r6+C2r2  */\
-        "ldc1 $f12, 40(%2)              \n\t" /* -C2    C6      -C2     C6 */\
-        "pmaddhw $f2, $f2, $f12         \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
-        "ldc1 $f14, 48(%2)              \n\t" /* C3     C1      C3      C1 */\
-        "ldc1 $f16, " #rarg "           \n\t"                                \
-        "pmaddhw $f14, $f14, $f4        \n\t" /* C3R3+C1R1      C3r3+C1r1  */\
-        #rounder " $f8, $f8, $f16       \n\t"                                \
-        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "paddw $f8, $f8, $f10           \n\t" /* A0             a0         */\
-        "psubw $f12, $f12, $f10         \n\t" /* A3             a3         */\
-        "ldc1 $f10, 56(%2)              \n\t" /* C7     C5      C7      C5 */\
-        "ldc1 $f16, " #rarg "           \n\t"                                \
-        "pmaddhw $f10, $f10, $f6        \n\t" /* C7R7+C5R5      C7r7+C5r5  */\
-        #rounder " $f0, $f0, $f16       \n\t"                                \
-        "paddw $f2, $f2, $f0            \n\t" /* A1             a1         */\
-        "paddw $f0, $f0, $f0            \n\t"                                \
-        "ldc1 $f16, 64(%2)              \n\t"                                \
-        "psubw $f0, $f0, $f2            \n\t" /* A2             a2         */\
-        "pmaddhw $f4, $f4, $f16         \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
-        "paddw $f14, $f14, $f10         \n\t" /* B0             b0         */\
-        "ldc1 $f10, 72(%2)              \n\t" /* -C5    -C1     -C5    -C1 */\
-        "pmaddhw $f10, $f10, $f6        \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
-        "paddw $f14, $f14, $f8          \n\t" /* A0+B0          a0+b0      */\
-        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
-        "li $10, " #shift "             \n\t"                                \
-        "psubw $f8, $f8, $f14           \n\t" /* A0-B0          a0-b0      */\
-        "mtc1 $10, $f18                 \n\t"                                \
-        "paddw $f10, $f10, $f4          \n\t" /* B1             b1         */\
-        "psraw $f14, $f14, $f18         \n\t"                                \
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "mov.d $f4, $f2                 \n\t" /* A1             a1         */\
-        "paddw $f2, $f2, $f10           \n\t" /* A1+B1          a1+b1      */\
-        "psubw $f4, $f4, $f10           \n\t" /* A1-B1          a1-b1      */\
-        "psraw $f2, $f2, $f18           \n\t"                                \
-        "psraw $f4, $f4, $f18           \n\t"                                \
-        "packsswh $f14, $f14, $f2       \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0   */\
-        "packsswh $f4, $f4, $f8         \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1   */\
-        "sdc1 $f14, " #dst "            \n\t"                                \
-        "ldc1 $f2, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
-        "ldc1 $f8, 80(%2)               \n\t" /* -C1    C5      -C1     C5 */\
-        "sdc1 $f4, 24+" #dst "          \n\t"                                \
-        "pmaddhw $f8, $f8, $f2          \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
-        "ldc1 $f16, 96(%2)              \n\t"                                \
-        "ldc1 $f14, 88(%2)              \n\t" /* C3     C7      C3      C7 */\
-        "pmaddhw $f2, $f2, $f16         \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
-        "pmaddhw $f14, $f14, $f6        \n\t" /* C3R7+C7R5      C3r7+C7r5  */\
-        "ldc1 $f16, 104(%2)             \n\t"                                \
-        "mov.d $f4, $f0                 \n\t" /* A2             a2         */\
-        "pmaddhw $f6, $f6, $f16         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
-        "paddw $f8, $f8, $f14           \n\t" /* B2             b2         */\
-        "paddw $f4, $f4, $f8            \n\t" /* A2+B2          a2+b2      */\
-        "psubw $f0, $f0, $f8            \n\t" /* a2-B2          a2-b2      */\
-        "psraw $f4, $f4, $f18           \n\t"                                \
-        "psraw $f0, $f0, $f18           \n\t"                                \
-        "mov.d $f8, $f12                \n\t" /* A3             a3         */\
-        "paddw $f6, $f6, $f2            \n\t" /* B3             b3         */\
-        "paddw $f12, $f12, $f6          \n\t" /* A3+B3          a3+b3      */\
-        "psubw $f8, $f8, $f6            \n\t" /* a3-B3          a3-b3      */\
-        "psraw $f12, $f12, $f18         \n\t"                                \
-        "packsswh $f4, $f4, $f12        \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2   */\
-        "sdc1 $f4, 8+" #dst "           \n\t"                                \
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "packsswh $f8, $f8, $f0         \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3   */\
-        "sdc1 $f8, 16+" #dst "          \n\t"                                \
+        /* idctRowCondDC row0~8 */
 
-        //IDCT(       src0,   src4,   src1,   src5,    dst,     rounder, shift)
-        DC_COND_IDCT(0(%0),  8(%0), 16(%0), 24(%0),  0(%1), paddw,8(%2), 11)
-        Z_COND_IDCT(32(%0), 40(%0), 48(%0), 56(%0), 32(%1), paddw,(%2), 11, 4f)
-        Z_COND_IDCT(64(%0), 72(%0), 80(%0), 88(%0), 64(%1), paddw,(%2), 11, 2f)
-        Z_COND_IDCT(96(%0),104(%0),112(%0),120(%0), 96(%1), paddw,(%2), 11, 1f)
+        /* load W */
+        "gslqc1       $f19,      $f18,      0x00(%[w_arr])      \n\t"
+        "gslqc1       $f21,      $f20,      0x10(%[w_arr])      \n\t"
+        "gslqc1       $f23,      $f22,      0x20(%[w_arr])      \n\t"
+        "gslqc1       $f25,      $f24,      0x30(%[w_arr])      \n\t"
+        "gslqc1       $f17,      $f16,      0x40(%[w_arr])      \n\t"
+        /* load source in block */
+        "gslqc1       $f1,       $f0,       0x00(%[block])      \n\t"
+        "gslqc1       $f3,       $f2,       0x10(%[block])      \n\t"
+        "gslqc1       $f5,       $f4,       0x20(%[block])      \n\t"
+        "gslqc1       $f7,       $f6,       0x30(%[block])      \n\t"
+        "gslqc1       $f9,       $f8,       0x40(%[block])      \n\t"
+        "gslqc1       $f11,      $f10,      0x50(%[block])      \n\t"
+        "gslqc1       $f13,      $f12,      0x60(%[block])      \n\t"
+        "gslqc1       $f15,      $f14,      0x70(%[block])      \n\t"
 
-#undef  IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift)                             \
-        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
-        "ldc1 $f2, " #src4 "            \n\t" /* R6     R2      r6      r2 */\
-        "ldc1 $f4, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
-        "ldc1 $f6, " #src5 "            \n\t" /* R7     R5      r7      r5 */\
-        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
-        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
-        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "ldc1 $f10, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
-        "pmaddhw $f10, $f10, $f2        \n\t" /* C6R6+C2R2      C6r6+C2r2  */\
-        "ldc1 $f12, 40(%2)              \n\t" /* -C2    C6      -C2     C6 */\
-        "pmaddhw $f2, $f2, $f12         \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
-        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "ldc1 $f14, 48(%2)              \n\t" /* C3     C1      C3      C1 */\
-        "pmaddhw $f14, $f14, $f4        \n\t" /* C3R3+C1R1      C3r3+C1r1  */\
-        "paddw $f8, $f8, $f10           \n\t" /* A0             a0         */\
-        "psubw $f12, $f12, $f10         \n\t" /* A3             a3         */\
-        "mov.d $f10, $f0                \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "paddw $f0, $f0, $f2            \n\t" /* A1             a1         */\
-        "psubw $f10, $f10, $f2          \n\t" /* A2             a2         */\
-        "ldc1 $f2, 56(%2)               \n\t" /* C7     C5      C7      C5 */\
-        "ldc1 $f16, 64(%2)              \n\t"                                \
-        "pmaddhw $f2, $f2, $f6          \n\t" /* C7R7+C5R5      C7r7+C5r5  */\
-        "pmaddhw $f4, $f4, $f16         \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
-        "li $10, " #shift "             \n\t"                                \
-        "paddw $f14, $f14, $f2          \n\t" /* B0             b0         */\
-        "ldc1 $f2, 72(%2)               \n\t" /* -C5    -C1     -C5    -C1 */\
-        "mtc1 $10, $f18                 \n\t"                                \
-        "pmaddhw $f2, $f2, $f6          \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
-        "paddw $f14, $f14, $f8          \n\t" /* A0+B0          a0+b0      */\
-        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
-        "psubw $f8, $f8, $f14           \n\t" /* A0-B0          a0-b0      */\
-        "paddw $f2, $f2, $f4            \n\t" /* B1             b1         */\
-        "psraw $f14, $f14, $f18         \n\t"                                \
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "mov.d $f4, $f0                 \n\t" /* A1             a1         */\
-        "paddw $f0, $f0, $f2            \n\t" /* A1+B1          a1+b1      */\
-        "psubw $f4, $f4, $f2            \n\t" /* A1-B1          a1-b1      */\
-        "psraw $f0, $f0, $f18           \n\t"                                \
-        "psraw $f4, $f4, $f18           \n\t"                                \
-        "packsswh $f14, $f14, $f14      \n\t" /* A0+B0          a0+b0      */\
-        "swc1 $f14, " #dst "            \n\t"                                \
-        "packsswh $f0, $f0, $f0         \n\t" /* A1+B1          a1+b1      */\
-        "swc1 $f0, 16+" #dst "          \n\t"                                \
-        "packsswh $f4, $f4, $f4         \n\t" /* A1-B1          a1-b1      */\
-        "swc1 $f4, 96+" #dst "          \n\t"                                \
-        "packsswh $f8, $f8, $f8         \n\t" /* A0-B0          a0-b0      */\
-        "swc1 $f8, 112+" #dst "         \n\t"                                \
-        "ldc1 $f0, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
-        "ldc1 $f8, 80(%2)               \n\t" /* -C1    C5      -C1     C5 */\
-        "pmaddhw $f8, $f8, $f0          \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
-        "ldc1 $f16, 96(%2)              \n\t"                                \
-        "ldc1 $f14, 88(%2)              \n\t" /* C3     C7      C3      C7 */\
-        "pmaddhw $f0, $f0, $f16         \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
-        "pmaddhw $f14, $f14, $f6        \n\t" /* C3R7+C7R5      C3r7+C7r5  */\
-        "ldc1 $f16, 104(%2)             \n\t"                                \
-        "mov.d $f4, $f10                \n\t" /* A2             a2         */\
-        "pmaddhw $f6, $f6, $f16         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
-        "paddw $f8, $f8, $f14           \n\t" /* B2             b2         */\
-        "paddw $f4, $f4, $f8            \n\t" /* A2+B2          a2+b2      */\
-        "psubw $f10, $f10, $f8          \n\t" /* a2-B2          a2-b2      */\
-        "psraw $f4, $f4, $f18           \n\t"                                \
-        "psraw $f10, $f10, $f18         \n\t"                                \
-        "mov.d $f8, $f12                \n\t" /* A3             a3         */\
-        "paddw $f6, $f6, $f0            \n\t" /* B3             b3         */\
-        "paddw $f12, $f12, $f6          \n\t" /* A3+B3          a3+b3      */\
-        "psubw $f8, $f8, $f6            \n\t" /* a3-B3          a3-b3      */\
-        "psraw $f12, $f12, $f18         \n\t"                                \
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "packsswh $f4, $f4, $f4         \n\t" /* A2+B2          a2+b2      */\
-        "packsswh $f12, $f12, $f12      \n\t" /* A3+B3          a3+b3      */\
-        "swc1 $f4, 32+" #dst "          \n\t"                                \
-        "packsswh $f8, $f8, $f8         \n\t" /* A3-B3          a3-b3      */\
-        "packsswh $f10, $f10, $f10      \n\t" /* A2-B2          a2-b2      */\
-        "swc1 $f12, 48+" #dst "         \n\t"                                \
-        "swc1 $f8, 64+" #dst "          \n\t"                                \
-        "swc1 $f10, 80+" #dst "         \n\t"
+        /* $9: mask ; $f17: ROW_SHIFT */
+        "dmfc1        $9,        $f17                           \n\t"
+        "li           $10,       11                             \n\t"
+        "mtc1         $10,       $f17                           \n\t"
+        IDCT_ROW_COND_DC($f0,$f1)
+        IDCT_ROW_COND_DC($f2,$f3)
+        IDCT_ROW_COND_DC($f4,$f5)
+        IDCT_ROW_COND_DC($f6,$f7)
+        IDCT_ROW_COND_DC($f8,$f9)
+        IDCT_ROW_COND_DC($f10,$f11)
+        IDCT_ROW_COND_DC($f12,$f13)
+        IDCT_ROW_COND_DC($f14,$f15)
 
-        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
-        IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
-        IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),    20)
-        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
-        IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),    20)
-        "b 9f                           \n\t"
+#define IDCT_COL_CASE1(src, out1, out2)                               \
+        "pmaddhw      $f26,     "#src",     $f18                \n\t" \
+        "pmaddhw      $f27,     "#src",     $f20                \n\t" \
+        "pmaddhw      $f28,     "#src",     $f22                \n\t" \
+        "pmaddhw      $f29,     "#src",     $f24                \n\t" \
+                                                                      \
+        "punpcklwd    $f30,      $f26,      $f26                \n\t" \
+        "punpckhwd    $f31,      $f26,      $f26                \n\t" \
+        /* $f26: src[0], src[56] */                                   \
+        "paddw        $f26,      $f30,      $f31                \n\t" \
+        "punpcklwd    $f30,      $f27,      $f27                \n\t" \
+        "punpckhwd    $f31,      $f27,      $f27                \n\t" \
+        /* $f27: src[8], src[48] */                                   \
+        "paddw        $f27,      $f30,      $f31                \n\t" \
+        "punpcklwd    $f30,      $f28,      $f28                \n\t" \
+        "punpckhwd    $f31,      $f28,      $f28                \n\t" \
+        /* $f28: src[16], src[40] */                                  \
+        "paddw        $f28,      $f30,      $f31                \n\t" \
+        "punpcklwd    $f30,      $f29,      $f29                \n\t" \
+        "punpckhwd    $f31,      $f29,      $f29                \n\t" \
+        /* $f29: src[24], src[32] */                                  \
+        "paddw        $f29,      $f30,      $f31                \n\t" \
+                                                                      \
+        /* out1: src[24], src[16], src[8], src[0] */                  \
+        /* out2: src[56], src[48], src[40], src[32] */                \
+        "punpcklhw    $f30,      $f26,      $f27                \n\t" \
+        "punpcklhw    $f31,      $f28,      $f29                \n\t" \
+        "punpckhwd   "#out1",    $f30,      $f31                \n\t" \
+        "psrah       "#out1",   "#out1",    $f16                \n\t" \
+        "punpcklhw    $f30,      $f27,      $f26                \n\t" \
+        "punpcklhw    $f31,      $f29,      $f28                \n\t" \
+        "punpckhwd   "#out2",    $f31,      $f30                \n\t" \
+        "psrah       "#out2",   "#out2",    $f16                \n\t"
 
-        "# .p2align 4                   \n\t"
-        "4:                             \n\t"
-        Z_COND_IDCT(64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddw,(%2), 11, 6f)
-        Z_COND_IDCT(96(%0),104(%0),112(%0),120(%0), 96(%1),paddw,(%2), 11, 5f)
+#define IDCT_COL_CASE2(src1, src2, out1, out2)                        \
+        "pmaddhw      $f28,     "#src1",    $f18                \n\t" \
+        "pmaddhw      $f29,     "#src2",    $f19                \n\t" \
+        "paddw        $f30,      $f28,      $f29                \n\t" \
+        "psubw        $f31,      $f28,      $f29                \n\t" \
+        "punpcklwd    $f28,      $f30,      $f31                \n\t" \
+        "punpckhwd    $f29,      $f30,      $f31                \n\t" \
+        "pmaddhw      $f30,     "#src1",    $f20                \n\t" \
+        "pmaddhw      $f31,     "#src2",    $f21                \n\t" \
+        /* $f26: src[0], src[56] */                                   \
+        "paddw        $f26,      $f28,      $f29                \n\t" \
+        "paddw        $f28,      $f30,      $f31                \n\t" \
+        "psubw        $f29,      $f30,      $f31                \n\t" \
+        "punpcklwd    $f30,      $f28,      $f29                \n\t" \
+        "punpckhwd    $f31,      $f28,      $f29                \n\t" \
+        "pmaddhw      $f28,     "#src1",    $f22                \n\t" \
+        "pmaddhw      $f29,     "#src2",    $f23                \n\t" \
+        /* $f27: src[8], src[48] */                                   \
+        "paddw        $f27,      $f30,      $f31                \n\t" \
+        "paddw        $f30,      $f28,      $f29                \n\t" \
+        "psubw        $f31,      $f28,      $f29                \n\t" \
+        "punpcklwd    $f28,      $f30,      $f31                \n\t" \
+        "punpckhwd    $f29,      $f30,      $f31                \n\t" \
+        "pmaddhw      $f30,     "#src1",    $f24                \n\t" \
+        "pmaddhw      $f31,     "#src2",    $f25                \n\t" \
+        /* $f28: src[16], src[40] */                                  \
+        "paddw        $f28,      $f28,      $f29                \n\t" \
+        "paddw       "#out1",    $f30,      $f31                \n\t" \
+        "psubw       "#out2",    $f30,      $f31                \n\t" \
+        "punpcklwd    $f30,     "#out1",   "#out2"              \n\t" \
+        "punpckhwd    $f31,     "#out1",   "#out2"              \n\t" \
+        /* $f29: src[24], src[32] */                                  \
+        "paddw        $f29,      $f30,      $f31                \n\t" \
+                                                                      \
+        /* out1: src[24], src[16], src[8], src[0] */                  \
+        /* out2: src[56], src[48], src[40], src[32] */                \
+        "punpcklhw   "#out1",    $f26,      $f27                \n\t" \
+        "punpckhhw   "#out2",    $f27,      $f26                \n\t" \
+        "punpcklhw    $f30,      $f28,      $f29                \n\t" \
+        "punpckhhw    $f31,      $f29,      $f28                \n\t" \
+        "punpckhwd   "#out1",   "#out1",    $f30                \n\t" \
+        "punpckhwd   "#out2",    $f31,     "#out2"              \n\t" \
+        "psrah       "#out1",   "#out1",    $f16                \n\t" \
+        "psrah       "#out2",   "#out2",    $f16                \n\t"
 
-#undef  IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift)                             \
-        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
-        "ldc1 $f2, " #src4 "            \n\t" /* R6     R2      r6      r2 */\
-        "ldc1 $f6, " #src5 "            \n\t" /* R7     R5      r7      r5 */\
-        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
-        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
-        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "ldc1 $f10, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
-        "pmaddhw $f10, $f10, $f2        \n\t" /* C6R6+C2R2      C6r6+C2r2  */\
-        "ldc1 $f12, 40(%2)              \n\t" /* -C2    C6      -C2     C6 */\
-        "pmaddhw $f2, $f2, $f12         \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
-        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "paddw $f8, $f8, $f10           \n\t" /* A0             a0         */\
-        "psubw $f12, $f12, $f10         \n\t" /* A3             a3         */\
-        "mov.d $f10, $f0                \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "paddw $f0, $f0, $f2            \n\t" /* A1             a1         */\
-        "psubw $f10, $f10, $f2          \n\t" /* A2             a2         */\
-        "ldc1 $f2, 56(%2)               \n\t" /* C7     C5      C7      C5 */\
-        "li $10, " #shift "             \n\t"                                \
-        "pmaddhw $f2, $f2, $f6          \n\t" /* C7R7+C5R5      C7r7+C5r5  */\
-        "ldc1 $f14, 72(%2)              \n\t" /* -C5    -C1     -C5    -C1 */\
-        "mtc1 $10, $f18                 \n\t"                                \
-        "pmaddhw $f14, $f14, $f6        \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
-        "paddw $f2, $f2, $f8            \n\t" /* A0+B0          a0+b0      */\
-        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
-        "psubw $f8, $f8, $f2            \n\t" /* A0-B0          a0-b0      */\
-        "psraw $f2, $f2, $f18           \n\t"                                \
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "mov.d $f4, $f0                 \n\t" /* A1             a1         */\
-        "paddw $f0, $f0, $f14           \n\t" /* A1+B1          a1+b1      */\
-        "psubw $f4, $f4, $f14           \n\t" /* A1-B1          a1-b1      */\
-        "psraw $f0, $f0, $f18           \n\t"                                \
-        "psraw $f4, $f4, $f18           \n\t"                                \
-        "packsswh $f2, $f2, $f2         \n\t" /* A0+B0          a0+b0      */\
-        "swc1 $f2, " #dst "             \n\t"                                \
-        "packsswh $f0, $f0, $f0         \n\t" /* A1+B1          a1+b1      */\
-        "swc1 $f0, 16+" #dst "          \n\t"                                \
-        "packsswh $f4, $f4, $f4         \n\t" /* A1-B1          a1-b1      */\
-        "swc1 $f4, 96+" #dst "          \n\t"                                \
-        "packsswh $f8, $f8, $f8         \n\t" /* A0-B0          a0-b0      */\
-        "swc1 $f8, 112+" #dst "         \n\t"                                \
-        "ldc1 $f2, 88(%2)               \n\t" /* C3     C7      C3      C7 */\
-        "ldc1 $f16, 104(%2)             \n\t"                                \
-        "pmaddhw $f2, $f2, $f6          \n\t" /* C3R7+C7R5      C3r7+C7r5  */\
-        "mov.d $f4, $f10                \n\t" /* A2             a2         */\
-        "pmaddhw $f6, $f6, $f16         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
-        "paddw $f4, $f4, $f2            \n\t" /* A2+B2          a2+b2      */\
-        "psubw $f10, $f10, $f2          \n\t" /* a2-B2          a2-b2      */\
-        "psraw $f4, $f4, $f18           \n\t"                                \
-        "psraw $f10, $f10, $f18         \n\t"                                \
-        "mov.d $f2, $f12                \n\t" /* A3             a3         */\
-        "paddw $f12, $f12, $f6          \n\t" /* A3+B3          a3+b3      */\
-        "psubw $f2, $f2, $f6            \n\t" /* a3-B3          a3-b3      */\
-        "psraw $f12, $f12, $f18         \n\t"                                \
-        "psraw $f2, $f2, $f18           \n\t"                                \
-        "packsswh $f4, $f4, $f4         \n\t" /* A2+B2          a2+b2      */\
-        "packsswh $f12, $f12, $f12      \n\t" /* A3+B3          a3+b3      */\
-        "swc1 $f4, 32+" #dst "          \n\t"                                \
-        "packsswh $f2, $f2, $f2         \n\t" /* A3-B3          a3-b3      */\
-        "packsswh $f10, $f10, $f10      \n\t" /* A2-B2          a2-b2      */\
-        "swc1 $f12, 48+" #dst "         \n\t"                                \
-        "swc1 $f2, 64+" #dst "          \n\t"                                \
-        "swc1 $f10, 80+" #dst "         \n\t"
 
-        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
-        IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
-        IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),    20)
-        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
-        IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),    20)
-        "b 9f                           \n\t"
+        /* idctSparseCol col0~3 */
 
-        "# .p2align 4                   \n\t"
-        "6:                             \n\t"
-        Z_COND_IDCT(96(%0),104(%0),112(%0),120(%0), 96(%1),paddw,(%2), 11, 7f)
+        /* $f17: ff_p16_32; $f16: COL_SHIFT-16 */
+        "gsldlc1      $f17,      0x57(%[w_arr])                 \n\t"
+        "gsldrc1      $f17,      0x50(%[w_arr])                 \n\t"
+        "li           $10,       4                              \n\t"
+        "dmtc1        $10,       $f16                           \n\t"
+        "paddh        $f0,       $f0,       $f17                \n\t"
+        /* Transpose row[0,2,4,6] */
+        "punpcklhw    $f26,      $f0,       $f4                 \n\t"
+        "punpckhhw    $f27,      $f0,       $f4                 \n\t"
+        "punpcklhw    $f28,      $f8,       $f12                \n\t"
+        "punpckhhw    $f29,      $f8,       $f12                \n\t"
+        "punpcklwd    $f0,       $f26,      $f28                \n\t"
+        "punpckhwd    $f4,       $f26,      $f28                \n\t"
+        "punpcklwd    $f8,       $f27,      $f29                \n\t"
+        "punpckhwd    $f12,      $f27,      $f29                \n\t"
 
-#undef  IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift)                             \
-        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
-        "ldc1 $f6, " #src5 "            \n\t" /* R7     R5      r7      r5 */\
-        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
-        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
-        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "mov.d $f10, $f0                \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "ldc1 $f2, 56(%2)               \n\t" /* C7     C5      C7      C5 */\
-        "pmaddhw $f2, $f2, $f6          \n\t" /* C7R7+C5R5      C7r7+C5r5  */\
-        "ldc1 $f14, 72(%2)              \n\t" /* -C5    -C1     -C5    -C1 */\
-        "li $10, " #shift "             \n\t"                                \
-        "pmaddhw $f14, $f14, $f6        \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
-        "paddw $f2, $f2, $f8            \n\t" /* A0+B0          a0+b0      */\
-        "mtc1 $10, $f18                 \n\t"                                \
-        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
-        "psubw $f8, $f8, $f2            \n\t" /* A0-B0          a0-b0      */\
-        "psraw $f2, $f2, $f18           \n\t"                                \
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "mov.d $f4, $f0                 \n\t" /* A1             a1         */\
-        "paddw $f0, $f0, $f14           \n\t" /* A1+B1          a1+b1      */\
-        "psubw $f4, $f4, $f14           \n\t" /* A1-B1          a1-b1      */\
-        "psraw $f0, $f0, $f18           \n\t"                                \
-        "psraw $f4, $f4, $f18           \n\t"                                \
-        "packsswh $f2, $f2, $f2         \n\t" /* A0+B0          a0+b0      */\
-        "swc1 $f2, " #dst "             \n\t"                                \
-        "packsswh $f0, $f0, $f0         \n\t" /* A1+B1          a1+b1      */\
-        "swc1 $f0, 16+" #dst "          \n\t"                                \
-        "packsswh $f4, $f4, $f4         \n\t" /* A1-B1          a1-b1      */\
-        "swc1 $f4, 96+" #dst "          \n\t"                                \
-        "packsswh $f8, $f8, $f8         \n\t" /* A0-B0          a0-b0      */\
-        "swc1 $f8, 112+" #dst "         \n\t"                                \
-        "ldc1 $f2, 88(%2)               \n\t" /* C3     C7      C3      C7 */\
-        "ldc1 $f16, 104(%2)             \n\t"                                \
-        "pmaddhw $f2, $f2, $f6          \n\t" /* C3R7+C7R5      C3r7+C7r5  */\
-        "mov.d $f4, $f10                \n\t" /* A2             a2         */\
-        "pmaddhw $f6, $f6, $f16         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
-        "paddw $f4, $f4, $f2            \n\t" /* A2+B2          a2+b2      */\
-        "psubw $f10, $f10, $f2          \n\t" /* a2-B2          a2-b2      */\
-        "psraw $f4, $f4, $f18           \n\t"                                \
-        "psraw $f10, $f10, $f18         \n\t"                                \
-        "mov.d $f2, $f12                \n\t" /* A3             a3         */\
-        "paddw $f12, $f12, $f6          \n\t" /* A3+B3          a3+b3      */\
-        "psubw $f2, $f2, $f6            \n\t" /* a3-B3          a3-b3      */\
-        "psraw $f12, $f12, $f18         \n\t"                                \
-        "psraw $f2, $f2, $f18           \n\t"                                \
-        "packsswh $f4, $f4, $f4         \n\t" /* A2+B2          a2+b2      */\
-        "packsswh $f12, $f12, $f12      \n\t" /* A3+B3          a3+b3      */\
-        "swc1 $f4, 32+" #dst "          \n\t"                                \
-        "packsswh $f2, $f2, $f2         \n\t" /* A3-B3          a3-b3      */\
-        "packsswh $f10, $f10, $f10      \n\t" /* A2-B2          a2-b2      */\
-        "swc1 $f12, 48+" #dst "         \n\t"                                \
-        "swc1 $f2, 64+" #dst "          \n\t"                                \
-        "swc1 $f10, 80+" #dst "         \n\t"
+        "or           $f26,      $f2,       $f6                 \n\t"
+        "or           $f26,      $f26,      $f10                \n\t"
+        "or           $f26,      $f26,      $f14                \n\t"
+        "dmfc1        $10,       $f26                           \n\t"
+        "bnez         $10,       1f                             \n\t"
+        /* case1: In this case, row[1,3,5,7] are all zero */
+        /* col0: $f0: col[24,16,8,0]; $f2: col[56,48,40,32] */
+        IDCT_COL_CASE1($f0, $f0, $f2)
+        /* col1: $f4: col[25,17,9,1]; $f6: col[57,49,41,33] */
+        IDCT_COL_CASE1($f4, $f4, $f6)
+        /* col2: $f8: col[26,18,10,2]; $f10: col[58,50,42,34] */
+        IDCT_COL_CASE1($f8, $f8, $f10)
+        /* col3: $f12: col[27,19,11,3]; $f14: col[59,51,43,35] */
+        IDCT_COL_CASE1($f12, $f12, $f14)
+        "j                                  2f                  \n\t"
 
-        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
-        IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
-        IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),    20)
-        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
-        IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),    20)
-        "b 9f                           \n\t"
+        "1:                                                     \n\t"
+        /* case2: row[1,3,5,7] are not all zero */
+        /* Transpose */
+        "punpcklhw    $f26,      $f2,       $f6                 \n\t"
+        "punpckhhw    $f27,      $f2,       $f6                 \n\t"
+        "punpcklhw    $f28,      $f10,      $f14                \n\t"
+        "punpckhhw    $f29,      $f10,      $f14                \n\t"
+        "punpcklwd    $f2,       $f26,      $f28                \n\t"
+        "punpckhwd    $f6,       $f26,      $f28                \n\t"
+        "punpcklwd    $f10,      $f27,      $f29                \n\t"
+        "punpckhwd    $f14,      $f27,      $f29                \n\t"
 
-        "# .p2align 4                   \n\t"
-        "2:                             \n\t"
-        Z_COND_IDCT(96(%0),104(%0),112(%0),120(%0), 96(%1),paddw,(%2), 11, 3f)
+        /* col0: $f0: col[24,16,8,0]; $f2: col[56,48,40,32] */
+        IDCT_COL_CASE2($f0, $f2, $f0, $f2)
+        /* col1: $f4: col[25,17,9,1]; $f6: col[57,49,41,33] */
+        IDCT_COL_CASE2($f4, $f6, $f4, $f6)
+        /* col2: $f8: col[26,18,10,2]; $f10: col[58,50,42,34] */
+        IDCT_COL_CASE2($f8, $f10, $f8, $f10)
+        /* col3: $f12: col[27,19,11,3]; $f14: col[59,51,43,35] */
+        IDCT_COL_CASE2($f12, $f14, $f12, $f14)
 
-#undef  IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift)                             \
-        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
-        "ldc1 $f4, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
-        "ldc1 $f6, " #src5 "            \n\t" /* R7     R5      r7      r5 */\
-        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
-        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
-        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "ldc1 $f14, 48(%2)              \n\t" /* C3     C1      C3      C1 */\
-        "pmaddhw $f14, $f14, $f4        \n\t" /* C3R3+C1R1      C3r3+C1r1  */\
-        "mov.d $f10, $f0                \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "ldc1 $f2, 56(%2)               \n\t" /* C7     C5      C7      C5 */\
-        "pmaddhw $f2, $f2, $f6          \n\t" /* C7R7+C5R5      C7r7+C5r5  */\
-        "ldc1 $f16, 64(%2)              \n\t"                                \
-        "pmaddhw $f4, $f4, $f16         \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
-        "paddw $f14, $f14, $f2          \n\t" /* B0             b0         */\
-        "ldc1 $f2, 72(%2)               \n\t" /* -C5    -C1     -C5    -C1 */\
-        "li $10, " #shift "             \n\t"                                \
-        "pmaddhw $f2, $f2, $f6          \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
-        "paddw $f14, $f14, $f8          \n\t" /* A0+B0          a0+b0      */\
-        "mtc1 $10, $f18                 \n\t"                                \
-        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
-        "psubw $f8, $f8, $f14           \n\t" /* A0-B0          a0-b0      */\
-        "paddw $f2, $f2, $f4            \n\t" /* B1             b1         */\
-        "psraw $f14, $f14, $f18         \n\t"                                \
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "mov.d $f4, $f0                 \n\t" /* A1             a1         */\
-        "paddw $f0, $f0, $f2            \n\t" /* A1+B1          a1+b1      */\
-        "psubw $f4, $f4, $f2            \n\t" /* A1-B1          a1-b1      */\
-        "psraw $f0, $f0, $f18           \n\t"                                \
-        "psraw $f4, $f4, $f18           \n\t"                                \
-        "packsswh $f14, $f14, $f14      \n\t" /* A0+B0          a0+b0      */\
-        "swc1 $f14, " #dst "            \n\t"                                \
-        "packsswh $f0, $f0, $f0         \n\t" /* A1+B1          a1+b1      */\
-        "swc1 $f0, 16+" #dst "          \n\t"                                \
-        "packsswh $f4, $f4, $f4         \n\t" /* A1-B1          a1-b1      */\
-        "swc1 $f4, 96+" #dst "          \n\t"                                \
-        "packsswh $f8, $f8, $f8         \n\t" /* A0-B0          a0-b0      */\
-        "swc1 $f8, 112+" #dst "         \n\t"                                \
-        "ldc1 $f0, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
-        "ldc1 $f8, 80(%2)               \n\t" /* -C1    C5      -C1     C5 */\
-        "pmaddhw $f8, $f8, $f0          \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
-        "ldc1 $f14, 88(%2)              \n\t" /* C3     C7      C3      C7 */\
-        "ldc1 $f16, 96(%2)              \n\t"                                \
-        "pmaddhw $f0, $f0, $f16         \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
-        "pmaddhw $f14, $f14, $f6        \n\t" /* C3R7+C7R5      C3r7+C7r5  */\
-        "mov.d $f4, $f10                \n\t" /* A2             a2         */\
-        "ldc1 $f16, 104(%2)             \n\t"                                \
-        "pmaddhw $f6, $f6, $f16         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
-        "paddw $f8, $f8, $f14           \n\t" /* B2             b2         */\
-        "paddw $f4, $f4, $f8            \n\t" /* A2+B2          a2+b2      */\
-        "psubw $f10, $f10, $f8          \n\t" /* a2-B2          a2-b2      */\
-        "psraw $f4, $f4, $f18           \n\t"                                \
-        "psraw $f10, $f10, $f18         \n\t"                                \
-        "mov.d $f8, $f12                \n\t" /* A3             a3         */\
-        "paddw $f6, $f6, $f0            \n\t" /* B3             b3         */\
-        "paddw $f12, $f12, $f6          \n\t" /* A3+B3          a3+b3      */\
-        "psubw $f8, $f8, $f6            \n\t" /* a3-B3          a3-b3      */\
-        "psraw $f12, $f12, $f18         \n\t"                                \
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "packsswh $f4, $f4, $f4         \n\t" /* A2+B2          a2+b2      */\
-        "packsswh $f12, $f12, $f12      \n\t" /* A3+B3          a3+b3      */\
-        "swc1 $f4, 32+" #dst "          \n\t"                                \
-        "packsswh $f8, $f8, $f8         \n\t" /* A3-B3          a3-b3      */\
-        "packsswh $f10, $f10, $f10      \n\t" /* A2-B2          a2-b2      */\
-        "swc1 $f12, 48+" #dst "         \n\t"                                \
-        "swc1 $f8, 64+" #dst "          \n\t"                                \
-        "swc1 $f10, 80+" #dst "         \n\t"
+        "2:                                                     \n\t"
+        /* Transpose */
+        "punpcklhw    $f26,      $f0,       $f4                 \n\t"
+        "punpckhhw    $f27,      $f0,       $f4                 \n\t"
+        "punpcklhw    $f28,      $f8,       $f12                \n\t"
+        "punpckhhw    $f29,      $f8,       $f12                \n\t"
+        "punpcklwd    $f0,       $f26,      $f28                \n\t"
+        "punpckhwd    $f4,       $f26,      $f28                \n\t"
+        "punpcklwd    $f8,       $f27,      $f29                \n\t"
+        "punpckhwd    $f12,      $f27,      $f29                \n\t"
+        /* Transpose */
+        "punpcklhw    $f26,      $f2,       $f6                 \n\t"
+        "punpckhhw    $f27,      $f2,       $f6                 \n\t"
+        "punpcklhw    $f28,      $f10,      $f14                \n\t"
+        "punpckhhw    $f29,      $f10,      $f14                \n\t"
+        "punpcklwd    $f2,       $f26,      $f28                \n\t"
+        "punpckhwd    $f6,       $f26,      $f28                \n\t"
+        "punpcklwd    $f10,      $f27,      $f29                \n\t"
+        "punpckhwd    $f14,      $f27,      $f29                \n\t"
 
-        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
-        IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
-        IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),    20)
-        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
-        IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),    20)
-        "b 9f                           \n\t"
+        /* idctSparseCol col4~7 */
 
-        "# .p2align 4                   \n\t"
-        "3:                             \n\t"
+        "paddh        $f1,       $f1,       $f17                \n\t"
+        /* Transpose */
+        "punpcklhw    $f26,      $f1,       $f5                 \n\t"
+        "punpckhhw    $f27,      $f1,       $f5                 \n\t"
+        "punpcklhw    $f28,      $f9,       $f13                \n\t"
+        "punpckhhw    $f29,      $f9,       $f13                \n\t"
+        "punpcklwd    $f1,       $f26,      $f28                \n\t"
+        "punpckhwd    $f5,       $f26,      $f28                \n\t"
+        "punpcklwd    $f9,       $f27,      $f29                \n\t"
+        "punpckhwd    $f13,      $f27,      $f29                \n\t"
 
-#undef  IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift)                             \
-        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
-        "ldc1 $f4, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
-        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
-        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
-        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "ldc1 $f14, 48(%2)              \n\t" /* C3     C1      C3      C1 */\
-        "pmaddhw $f14, $f14, $f4        \n\t" /* C3R3+C1R1      C3r3+C1r1  */\
-        "mov.d $f10, $f0                \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "ldc1 $f6, 64(%2)               \n\t"                                \
-        "pmaddhw $f6, $f6, $f4          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
-        "li $10, " #shift "             \n\t"                                \
-        "paddw $f14, $f14, $f8          \n\t" /* A0+B0          a0+b0      */\
-        "mtc1 $10, $f18                 \n\t"                                \
-        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
-        "psubw $f8, $f8, $f14           \n\t" /* A0-B0          a0-b0      */\
-        "psraw $f14, $f14, $f18         \n\t"                                \
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "mov.d $f2, $f0                 \n\t" /* A1             a1         */\
-        "paddw $f0, $f0, $f6            \n\t" /* A1+B1          a1+b1      */\
-        "psubw $f2, $f2, $f6            \n\t" /* A1-B1          a1-b1      */\
-        "psraw $f0, $f0, $f18           \n\t"                                \
-        "psraw $f2, $f2, $f18           \n\t"                                \
-        "packsswh $f14, $f14, $f14      \n\t" /* A0+B0  a0+b0              */\
-        "swc1 $f14, " #dst "            \n\t"                                \
-        "packsswh $f0, $f0, $f0         \n\t" /* A1+B1  a1+b1              */\
-        "swc1 $f0, 16+" #dst "          \n\t"                                \
-        "packsswh $f2, $f2, $f2         \n\t" /* A1-B1  a1-b1              */\
-        "swc1 $f2, 96+" #dst "          \n\t"                                \
-        "packsswh $f8, $f8, $f8         \n\t" /* A0-B0  a0-b0              */\
-        "swc1 $f8, 112+" #dst "         \n\t"                                \
-        "ldc1 $f8, 80(%2)               \n\t" /* -C1    C5      -C1     C5 */\
-        "ldc1 $f16, 96(%2)              \n\t"                                \
-        "pmaddhw $f8, $f8, $f4          \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
-        "pmaddhw $f4, $f4, $f16         \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
-        "mov.d $f2, $f10                \n\t" /* A2             a2         */\
-        "paddw $f2, $f2, $f8            \n\t" /* A2+B2          a2+b2      */\
-        "psubw $f10, $f10, $f8          \n\t" /* a2-B2          a2-b2      */\
-        "psraw $f2, $f2, $f18           \n\t"                                \
-        "psraw $f10, $f10, $f18         \n\t"                                \
-        "mov.d $f8, $f12                \n\t" /* A3             a3         */\
-        "paddw $f12, $f12, $f4          \n\t" /* A3+B3          a3+b3      */\
-        "psubw $f8, $f8, $f4            \n\t" /* a3-B3          a3-b3      */\
-        "psraw $f12, $f12, $f18         \n\t"                                \
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "packsswh $f2, $f2, $f2         \n\t" /* A2+B2  a2+b2              */\
-        "packsswh $f12, $f12, $f12      \n\t" /* A3+B3  a3+b3              */\
-        "swc1 $f2, 32+" #dst "          \n\t"                                \
-        "packsswh $f8, $f8, $f8         \n\t" /* A3-B3  a3-b3              */\
-        "packsswh $f10, $f10, $f10      \n\t" /* A2-B2  a2-b2              */\
-        "swc1 $f12, 48+" #dst "         \n\t"                                \
-        "swc1 $f8, 64+" #dst "          \n\t"                                \
-        "swc1 $f10, 80+" #dst "         \n\t"
+        "or           $f26,      $f3,       $f7                 \n\t"
+        "or           $f26,      $f26,      $f11                \n\t"
+        "or           $f26,      $f26,      $f15                \n\t"
+        "dmfc1        $10,       $f26                           \n\t"
+        "bnez         $10,       1f                             \n\t"
+        /* case1: In this case, row[1,3,5,7] are all zero */
+        /* col4: $f1: col[24,16,8,0]; $f3: col[56,48,40,32] */
+        IDCT_COL_CASE1($f1, $f1, $f3)
+        /* col5: $f5: col[25,17,9,1]; $f7: col[57,49,41,33] */
+        IDCT_COL_CASE1($f5, $f5, $f7)
+        /* col6: $f9: col[26,18,10,2]; $f11: col[58,50,42,34] */
+        IDCT_COL_CASE1($f9, $f9, $f11)
+        /* col7: $f13: col[27,19,11,3]; $f15: col[59,51,43,35] */
+        IDCT_COL_CASE1($f13, $f13, $f15)
+        "j                                  2f                  \n\t"
 
-        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
-        IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
-        IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),    20)
-        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
-        IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),    20)
-        "b 9f                           \n\t"
+        "1:                                                     \n\t"
+        /* case2: row[1,3,5,7] are not all zero */
+        /* Transpose */
+        "punpcklhw    $f26,      $f3,       $f7                 \n\t"
+        "punpckhhw    $f27,      $f3,       $f7                 \n\t"
+        "punpcklhw    $f28,      $f11,      $f15                \n\t"
+        "punpckhhw    $f29,      $f11,      $f15                \n\t"
+        "punpcklwd    $f3,       $f26,      $f28                \n\t"
+        "punpckhwd    $f7,       $f26,      $f28                \n\t"
+        "punpcklwd    $f11,      $f27,      $f29                \n\t"
+        "punpckhwd    $f15,      $f27,      $f29                \n\t"
 
-        "# .p2align 4                   \n\t"
-        "5:                             \n\t"
+        /* col4: $f1: col[24,16,8,0]; $f3: col[56,48,40,32] */
+        IDCT_COL_CASE2($f1, $f3, $f1, $f3)
+        /* col5: $f5: col[25,17,9,1]; $f7: col[57,49,41,33] */
+        IDCT_COL_CASE2($f5, $f7, $f5, $f7)
+        /* col6: $f9: col[26,18,10,2]; $f11: col[58,50,42,34] */
+        IDCT_COL_CASE2($f9, $f11, $f9, $f11)
+        /* col7: $f13: col[27,19,11,3]; $f15: col[59,51,43,35] */
+        IDCT_COL_CASE2($f13, $f15, $f13, $f15)
 
-#undef  IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift)                             \
-        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
-        "ldc1 $f2, " #src4 "            \n\t" /* R6     R2      r6      r2 */\
-        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
-        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
-        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "ldc1 $f10, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
-        "pmaddhw $f10, $f10, $f2        \n\t" /* C6R6+C2R2      C6r6+C2r2  */\
-        "ldc1 $f12, 40(%2)              \n\t" /* -C2    C6      -C2     C6 */\
-        "pmaddhw $f2, $f2, $f12         \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
-        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "paddw $f8, $f8, $f10           \n\t" /* A0             a0         */\
-        "psubw $f12, $f12, $f10         \n\t" /* A3             a3         */\
-        "mov.d $f10, $f0                \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "paddw $f0, $f0, $f2            \n\t" /* A1             a1         */\
-        "psubw $f10, $f10, $f2          \n\t" /* A2             a2         */\
-        "ldc1 $f4, 8+" #src0 "          \n\t" /* R4     R0      r4      r0 */\
-        "ldc1 $f6, 8+" #src4 "          \n\t" /* R6     R2      r6      r2 */\
-        "ldc1 $f2, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
-        "pmaddhw $f2, $f2, $f4          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "ldc1 $f14, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
-        "pmaddhw $f4, $f4, $f14         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "ldc1 $f14, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
-        "ldc1 $f16, 40(%2)              \n\t"                                \
-        "pmaddhw $f14, $f14, $f6        \n\t" /* C6R6+C2R2      C6r6+C2r2  */\
-        "pmaddhw $f6, $f6, $f16         \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
-        "paddw $f14, $f14, $f2          \n\t" /* A0             a0         */\
-        "paddw $f2, $f2, $f2            \n\t" /* 2C0            2c0        */\
-        "psubw $f2, $f2, $f14           \n\t" /* A3             a3         */\
-        "li $10, " #shift "             \n\t"                                \
-        "paddw $f6, $f6, $f4            \n\t" /* A1             a1         */\
-        "mtc1 $10, $f18                 \n\t"                                \
-        "paddw $f4, $f4, $f4            \n\t" /* 2C1            2c1        */\
-        "psubw $f4, $f4, $f6            \n\t" /* A2             a2         */\
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "psraw $f14, $f14, $f18         \n\t"                                \
-        "psraw $f6, $f6, $f18           \n\t"                                \
-        "packsswh $f8, $f8, $f14        \n\t" /* A0             a0         */\
-        "sdc1 $f8, " #dst "             \n\t"                                \
-        "psraw $f0, $f0, $f18           \n\t"                                \
-        "packsswh $f0, $f0, $f6         \n\t" /* A1             a1         */\
-        "sdc1 $f0, 16+" #dst "          \n\t"                                \
-        "sdc1 $f0, 96+" #dst "          \n\t"                                \
-        "sdc1 $f8, 112+" #dst "         \n\t"                                \
-        "psraw $f10, $f10, $f18         \n\t"                                \
-        "psraw $f12, $f12, $f18         \n\t"                                \
-        "psraw $f4, $f4, $f18           \n\t"                                \
-        "packsswh $f10, $f10, $f4       \n\t" /* A2-B2          a2-b2      */\
-        "sdc1 $f10, 32+" #dst "         \n\t"                                \
-        "psraw $f2, $f2, $f18           \n\t"                                \
-        "packsswh $f12, $f12, $f2       \n\t" /* A3+B3          a3+b3      */\
-        "sdc1 $f12, 48+" #dst "         \n\t"                                \
-        "sdc1 $f12, 64+" #dst "         \n\t"                                \
-        "sdc1 $f10, 80+" #dst "         \n\t"
+        "2:                                                     \n\t"
+        /* Transpose */
+        "punpcklhw    $f26,      $f1,       $f5                 \n\t"
+        "punpckhhw    $f27,      $f1,       $f5                 \n\t"
+        "punpcklhw    $f28,      $f9,       $f13                \n\t"
+        "punpckhhw    $f29,      $f9,       $f13                \n\t"
+        "punpcklwd    $f1,       $f26,      $f28                \n\t"
+        "punpckhwd    $f5,       $f26,      $f28                \n\t"
+        "punpcklwd    $f9,       $f27,      $f29                \n\t"
+        "punpckhwd    $f13,      $f27,      $f29                \n\t"
+        /* Transpose */
+        "punpcklhw    $f26,      $f3,       $f7                 \n\t"
+        "punpckhhw    $f27,      $f3,       $f7                 \n\t"
+        "punpcklhw    $f28,      $f11,      $f15                \n\t"
+        "punpckhhw    $f29,      $f11,      $f15                \n\t"
+        "punpcklwd    $f3,       $f26,      $f28                \n\t"
+        "punpckhwd    $f7,       $f26,      $f28                \n\t"
+        "punpcklwd    $f11,      $f27,      $f29                \n\t"
+        "punpckhwd    $f15,      $f27,      $f29                \n\t"
+        /* Store */
+        "gssqc1       $f1,       $f0,       0x00(%[block])      \n\t"
+        "gssqc1       $f5,       $f4,       0x10(%[block])      \n\t"
+        "gssqc1       $f9,       $f8,       0x20(%[block])      \n\t"
+        "gssqc1       $f13,      $f12,      0x30(%[block])      \n\t"
+        "gssqc1       $f3,       $f2,       0x40(%[block])      \n\t"
+        "gssqc1       $f7,       $f6,       0x50(%[block])      \n\t"
+        "gssqc1       $f11,      $f10,      0x60(%[block])      \n\t"
+        "gssqc1       $f15,      $f14,      0x70(%[block])      \n\t"
 
-        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
-        IDCT(   0(%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
-        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
-        "b 9f                           \n\t"
-
-        "# .p2align 4                   \n\t"
-        "1:                             \n\t"
-
-#undef  IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift)                             \
-        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
-        "ldc1 $f2, " #src4 "            \n\t" /* R6     R2      r6      r2 */\
-        "ldc1 $f4, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
-        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
-        "li $10, " #shift "             \n\t"                                \
-        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
-        "mtc1 $10, $f18                 \n\t"                                \
-        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "ldc1 $f10, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
-        "pmaddhw $f10, $f10, $f2        \n\t" /* C6R6+C2R2      C6r6+C2r2  */\
-        "ldc1 $f12, 40(%2)              \n\t" /* -C2    C6      -C2     C6 */\
-        "pmaddhw $f2, $f2, $f12         \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
-        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "ldc1 $f14, 48(%2)              \n\t" /* C3     C1      C3      C1 */\
-        "pmaddhw $f14, $f14, $f4        \n\t" /* C3R3+C1R1      C3r3+C1r1  */\
-        "paddw $f8, $f8, $f10           \n\t" /* A0             a0         */\
-        "psubw $f12, $f12, $f10         \n\t" /* A3             a3         */\
-        "mov.d $f10, $f0                \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "paddw $f0, $f0, $f2            \n\t" /* A1             a1         */\
-        "psubw $f10, $f10, $f2          \n\t" /* A2             a2         */\
-        "ldc1 $f2, 64(%2)               \n\t"                                \
-        "pmaddhw $f2, $f2, $f4          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
-        "paddw $f14, $f14, $f8          \n\t" /* A0+B0          a0+b0      */\
-        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
-        "psubw $f8, $f8, $f14           \n\t" /* A0-B0          a0-b0      */\
-        "psraw $f14, $f14, $f18         \n\t"                                \
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "mov.d $f6, $f0                 \n\t" /* A1             a1         */\
-        "paddw $f0, $f0, $f2            \n\t" /* A1+B1          a1+b1      */\
-        "psubw $f6, $f6, $f2            \n\t" /* A1-B1          a1-b1      */\
-        "psraw $f0, $f0, $f18           \n\t"                                \
-        "psraw $f6, $f6, $f18           \n\t"                                \
-        "packsswh $f14, $f14, $f14      \n\t" /* A0+B0  a0+b0              */\
-        "swc1 $f14, " #dst "            \n\t"                                \
-        "packsswh $f0, $f0, $f0         \n\t" /* A1+B1  a1+b1              */\
-        "swc1 $f0, 16+" #dst "          \n\t"                                \
-        "packsswh $f6, $f6, $f6         \n\t" /* A1-B1  a1-b1              */\
-        "swc1 $f6, 96+" #dst "          \n\t"                                \
-        "packsswh $f8, $f8, $f8         \n\t" /* A0-B0  a0-b0              */\
-        "swc1 $f8, 112+" #dst "         \n\t"                                \
-        "ldc1 $f8, 80(%2)               \n\t" /* -C1    C5      -C1     C5 */\
-        "ldc1 $f16, 96(%2)              \n\t"                                \
-        "pmaddhw $f8, $f8, $f4          \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
-        "pmaddhw $f4, $f4, $f16         \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
-        "mov.d $f6, $f10                \n\t" /* A2             a2         */\
-        "paddw $f6, $f6, $f8            \n\t" /* A2+B2          a2+b2      */\
-        "psubw $f10, $f10, $f8          \n\t" /* a2-B2          a2-b2      */\
-        "psraw $f6, $f6, $f18           \n\t"                                \
-        "psraw $f10, $f10, $f18         \n\t"                                \
-        "mov.d $f8, $f12                \n\t" /* A3             a3         */\
-        "paddw $f12, $f12, $f4          \n\t" /* A3+B3          a3+b3      */\
-        "psubw $f8, $f8, $f4            \n\t" /* a3-B3          a3-b3      */\
-        "psraw $f12, $f12, $f18         \n\t"                                \
-        "packsswh $f6, $f6, $f6         \n\t" /* A2+B2          a2+b2      */\
-        "swc1 $f6, 32+" #dst "          \n\t"                                \
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "packsswh $f12, $f12, $f12      \n\t" /* A3+B3          a3+b3      */\
-        "swc1 $f12, 48+" #dst "         \n\t"                                \
-        "packsswh $f8, $f8, $f8         \n\t" /* A3-B3          a3-b3      */\
-        "packsswh $f10, $f10, $f10      \n\t" /* A2-B2          a2-b2      */\
-        "swc1 $f8, 64+" #dst "          \n\t"                                \
-        "swc1 $f10, 80+" #dst "         \n\t"
-
-        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
-        IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
-        IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),    20)
-        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
-        IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),    20)
-        "b 9f                           \n\t"
-
-        "# .p2align 4                   \n\t"
-        "7:                             \n\t"
-
-#undef  IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift)                             \
-        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
-        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
-        "li $10, " #shift "             \n\t"                                \
-        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "mtc1 $10, $f18                 \n\t"                                \
-        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
-        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "psraw $f8, $f8, $f18           \n\t"                                \
-        "psraw $f0, $f0, $f18           \n\t"                                \
-        "ldc1 $f4, 8+" #src0 "          \n\t" /* R4     R0      r4      r0 */\
-        "ldc1 $f2, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
-        "pmaddhw $f2, $f2, $f4          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
-        "ldc1 $f14, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
-        "pmaddhw $f4, $f4, $f14         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
-        "ldc1 $f14, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
-        "psraw $f2, $f2, $f18           \n\t"                                \
-        "packsswh $f8, $f8, $f2         \n\t" /* A0             a0         */\
-        "sdc1 $f8, " #dst "             \n\t"                                \
-        "psraw $f4, $f4, $f18           \n\t"                                \
-        "packsswh $f0, $f0, $f4         \n\t" /* A1             a1         */\
-        "sdc1 $f0, 16+" #dst "          \n\t"                                \
-        "sdc1 $f0, 96+" #dst "          \n\t"                                \
-        "sdc1 $f8, 112+" #dst "         \n\t"                                \
-        "sdc1 $f0, 32+" #dst "          \n\t"                                \
-        "sdc1 $f8, 48+" #dst "          \n\t"                                \
-        "sdc1 $f8, 64+" #dst "          \n\t"                                \
-        "sdc1 $f0, 80+" #dst "          \n\t"
-
-        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
-        IDCT(   0(%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
-        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
-
-        "9:                             \n\t"
-        ::"r"(block),"r"(temp),"r"(coeffs),"m"(ff_wm1010),"m"(ff_d40000)
-        : "$10","$11"
+        : [block]"+&r"(block)
+        : [w_arr]"r"(W_arr)
+        : "memory"
     );
+
+    RECOVER_REG
+}
+
+void ff_simple_idct_put_8_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    ff_simple_idct_8_mmi(block);
+    ff_put_pixels_clamped_mmi(block, dest, line_size);
+}
+void ff_simple_idct_add_8_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    ff_simple_idct_8_mmi(block);
+    ff_add_pixels_clamped_mmi(block, dest, line_size);
 }

diff --git a/libavcodec/mips/vc1dsp_mips.h b/libavcodec/mips/vc1dsp_mips.h
index b9b07e1..0db85fa 100644
--- a/libavcodec/mips/vc1dsp_mips.h
+++ b/libavcodec/mips/vc1dsp_mips.h

@@ -169,7 +169,7 @@
 void ff_vc1_v_overlap_mmi(uint8_t *src, int stride);
 void ff_vc1_h_overlap_mmi(uint8_t *src, int stride);
 void ff_vc1_v_s_overlap_mmi(int16_t *top, int16_t *bottom);
-void ff_vc1_h_s_overlap_mmi(int16_t *left, int16_t *right);
+void ff_vc1_h_s_overlap_mmi(int16_t *left, int16_t *right, int left_stride, int right_stride, int flags);
 
 void ff_vc1_v_loop_filter4_mmi(uint8_t *src, int stride, int pq);
 void ff_vc1_h_loop_filter4_mmi(uint8_t *src, int stride, int pq);

diff --git a/libavcodec/mips/vc1dsp_mmi.c b/libavcodec/mips/vc1dsp_mmi.c
index 01e7f9f..db314de 100644
--- a/libavcodec/mips/vc1dsp_mmi.c
+++ b/libavcodec/mips/vc1dsp_mmi.c

@@ -27,118 +27,97 @@
 #include "hpeldsp_mips.h"
 #include "libavutil/mips/mmiutils.h"
 
+#define VC1_INV_TRANCS_8_TYPE1(o1, o2, r1, r2, r3, r4, c0)                  \
+        "li         %[tmp0],    "#r1"                                 \n\t" \
+        "mtc1       %[tmp0],    %[ftmp13]                             \n\t" \
+        "punpcklwd  %[ftmp13],  %[ftmp13],  %[ftmp13]                 \n\t" \
+        "li         %[tmp0],    "#r2"                                 \n\t" \
+        "mtc1       %[tmp0],    %[ftmp14]                             \n\t" \
+        "punpcklwd  %[ftmp14],  %[ftmp14],  %[ftmp14]                 \n\t" \
+        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp13]                 \n\t" \
+        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp14]                 \n\t" \
+        "paddw      %[ftmp1],   %[ftmp1],   %[ftmp2]                  \n\t" \
+        "pmaddhw    %[ftmp2],   %[ftmp6],   %[ftmp13]                 \n\t" \
+        "pmaddhw    %[ftmp3],   %[ftmp8],   %[ftmp14]                 \n\t" \
+        "paddw      %[ftmp2],   %[ftmp2],   %[ftmp3]                  \n\t" \
+                                                                            \
+        "li         %[tmp0],    "#r3"                                 \n\t" \
+        "mtc1       %[tmp0],    %[ftmp13]                             \n\t" \
+        "punpcklwd  %[ftmp13],  %[ftmp13],  %[ftmp13]                 \n\t" \
+        "li         %[tmp0],    "#r4"                                 \n\t" \
+        "mtc1       %[tmp0],    %[ftmp14]                             \n\t" \
+        "punpcklwd  %[ftmp14],  %[ftmp14],  %[ftmp14]                 \n\t" \
+        "pmaddhw    %[ftmp3],   %[ftmp9],   %[ftmp13]                 \n\t" \
+        "pmaddhw    %[ftmp4],   %[ftmp11],  %[ftmp14]                 \n\t" \
+        "paddw      %[ftmp3],   %[ftmp3],   %[ftmp4]                  \n\t" \
+        "pmaddhw    %[ftmp4],   %[ftmp10],  %[ftmp13]                 \n\t" \
+        "pmaddhw    %[ftmp13],  %[ftmp12],  %[ftmp14]                 \n\t" \
+        "paddw      %[ftmp4],   %[ftmp4],   %[ftmp13]                 \n\t" \
+                                                                            \
+        "paddw      %[ftmp1],   %[ftmp1],   "#c0"                     \n\t" \
+        "paddw      %[ftmp2],   %[ftmp2],   "#c0"                     \n\t" \
+        "paddw      %[ftmp13],  %[ftmp1],   %[ftmp3]                  \n\t" \
+        "psubw      %[ftmp14],  %[ftmp1],   %[ftmp3]                  \n\t" \
+        "paddw      %[ftmp1],   %[ftmp2],   %[ftmp4]                  \n\t" \
+        "psubw      %[ftmp3],   %[ftmp2],   %[ftmp4]                  \n\t" \
+        "psraw      %[ftmp13],  %[ftmp13],  %[ftmp0]                  \n\t" \
+        "psraw      %[ftmp1],   %[ftmp1],   %[ftmp0]                  \n\t" \
+        "psraw      %[ftmp14],  %[ftmp14],  %[ftmp0]                  \n\t" \
+        "psraw      %[ftmp3],   %[ftmp3],   %[ftmp0]                  \n\t" \
+        "punpcklhw  %[ftmp2],   %[ftmp13],  %[ftmp1]                  \n\t" \
+        "punpckhhw  %[ftmp4],   %[ftmp13],  %[ftmp1]                  \n\t" \
+        "punpcklhw  "#o1",      %[ftmp2],   %[ftmp4]                  \n\t" \
+        "punpcklhw  %[ftmp2],   %[ftmp14],  %[ftmp3]                  \n\t" \
+        "punpckhhw  %[ftmp4],   %[ftmp14],  %[ftmp3]                  \n\t" \
+        "punpcklhw  "#o2",      %[ftmp2],   %[ftmp4]                  \n\t"
 
-#define VC1_INV_TRANCS_8_STEP1_MMI(fp1,   fp2,   fp3,   fp4,                \
-                                   o1,    o2,    o3,    o4,                 \
-                                   t1,    t2,    t3,    t4,                 \
-                                   ff_p1, ff_p2, ff_p3, ff_p4)              \
-        "pmullh     "#t1"   ,   "#fp1"  ,   "#ff_p1"                \n\t"   \
-        "pmullh     "#t2"   ,   "#fp2"  ,   "#ff_p2"                \n\t"   \
-        "pmullh     "#t3"   ,   "#fp3"  ,   "#ff_p3"                \n\t"   \
-        "pmullh     "#t4"   ,   "#fp4"  ,   "#ff_p4"                \n\t"   \
-        "paddh      "#o1"   ,   "#t1"   ,   "#t2"                   \n\t"   \
-        "paddh      "#o1"   ,   "#o1"   ,   "#t3"                   \n\t"   \
-        "paddh      "#o1"   ,   "#o1"   ,   "#t4"                   \n\t"   \
+#define VC1_INV_TRANCS_8_TYPE2(o1, o2, r1, r2, r3, r4, c0, c1)              \
+        "li         %[tmp0],    "#r1"                                 \n\t" \
+        "mtc1       %[tmp0],    %[ftmp13]                             \n\t" \
+        "punpcklwd  %[ftmp13],  %[ftmp13],  %[ftmp13]                 \n\t" \
+        "li         %[tmp0],    "#r2"                                 \n\t" \
+        "mtc1       %[tmp0],    %[ftmp14]                             \n\t" \
+        "punpcklwd  %[ftmp14],  %[ftmp14],  %[ftmp14]                 \n\t" \
+        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp13]                 \n\t" \
+        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp14]                 \n\t" \
+        "paddw      %[ftmp1],   %[ftmp1],   %[ftmp2]                  \n\t" \
+        "pmaddhw    %[ftmp2],   %[ftmp6],   %[ftmp13]                 \n\t" \
+        "pmaddhw    %[ftmp3],   %[ftmp8],   %[ftmp14]                 \n\t" \
+        "paddw      %[ftmp2],   %[ftmp2],   %[ftmp3]                  \n\t" \
                                                                             \
-        "pmullh     "#t1"   ,   "#fp1"  ,   "#ff_p2"                \n\t"   \
-        "pmullh     "#t2"   ,   "#fp2"  ,   "#ff_p4"                \n\t"   \
-        "pmullh     "#t3"   ,   "#fp3"  ,   "#ff_p1"                \n\t"   \
-        "pmullh     "#t4"   ,   "#fp4"  ,   "#ff_p3"                \n\t"   \
-        "psubh      "#o2"   ,   "#t1"   ,   "#t2"                   \n\t"   \
-        "psubh      "#o2"   ,   "#o2"   ,   "#t3"                   \n\t"   \
-        "psubh      "#o2"   ,   "#o2"   ,   "#t4"                   \n\t"   \
+        "li         %[tmp0],    "#r3"                                 \n\t" \
+        "mtc1       %[tmp0],    %[ftmp13]                             \n\t" \
+        "punpcklwd  %[ftmp13],  %[ftmp13],  %[ftmp13]                 \n\t" \
+        "li         %[tmp0],    "#r4"                                 \n\t" \
+        "mtc1       %[tmp0],    %[ftmp14]                             \n\t" \
+        "punpcklwd  %[ftmp14],  %[ftmp14],  %[ftmp14]                 \n\t" \
+        "pmaddhw    %[ftmp3],   %[ftmp9],   %[ftmp13]                 \n\t" \
+        "pmaddhw    %[ftmp4],   %[ftmp11],  %[ftmp14]                 \n\t" \
+        "paddw      %[ftmp3],   %[ftmp3],   %[ftmp4]                  \n\t" \
+        "pmaddhw    %[ftmp4],   %[ftmp10],  %[ftmp13]                 \n\t" \
+        "pmaddhw    %[ftmp13],  %[ftmp12],  %[ftmp14]                 \n\t" \
+        "paddw      %[ftmp4],   %[ftmp4],   %[ftmp13]                 \n\t" \
                                                                             \
-        "pmullh     "#t1"   ,   "#fp1"  ,   "#ff_p3"                \n\t"   \
-        "pmullh     "#t2"   ,   "#fp2"  ,   "#ff_p1"                \n\t"   \
-        "pmullh     "#t3"   ,   "#fp3"  ,   "#ff_p4"                \n\t"   \
-        "pmullh     "#t4"   ,   "#fp4"  ,   "#ff_p2"                \n\t"   \
-        "psubh      "#o3"   ,   "#t1"   ,   "#t2"                   \n\t"   \
-        "paddh      "#o3"   ,   "#o3"   ,   "#t3"                   \n\t"   \
-        "paddh      "#o3"   ,   "#o3"   ,   "#t4"                   \n\t"   \
-                                                                            \
-        "pmullh     "#t1"   ,   "#fp1"  ,   "#ff_p4"                \n\t"   \
-        "pmullh     "#t2"   ,   "#fp2"  ,   "#ff_p3"                \n\t"   \
-        "pmullh     "#t3"   ,   "#fp3"  ,   "#ff_p2"                \n\t"   \
-        "pmullh     "#t4"   ,   "#fp4"  ,   "#ff_p1"                \n\t"   \
-        "psubh      "#o4"   ,   "#t1"   ,   "#t2"                   \n\t"   \
-        "paddh      "#o4"   ,   "#o4"   ,   "#t3"                   \n\t"   \
-        "psubh      "#o4"   ,   "#o4"   ,   "#t4"                   \n\t"
-
-
-#define VC1_INV_TRANCS_8_STEP2_MMI(fp1,   fp2,   fp3,   fp4,                \
-                                   fp5,   fp6,   fp7,   fp8,                \
-                                   o1,    o2,    o3,    o4,                 \
-                                   ff_p1, ff_p2, ff_p3, ff_pw)              \
-        "paddh      "#fp5"  ,   "#fp1"  ,   "#fp2"                  \n\t"   \
-        "psubh      "#fp6"  ,   "#fp1"  ,   "#fp2"                  \n\t"   \
-        "pmullh     "#fp5"  ,   "#fp5"  ,   "#ff_p1"                \n\t"   \
-        "pmullh     "#fp6"  ,   "#fp6"  ,   "#ff_p1"                \n\t"   \
-        "paddh      "#fp5"  ,   "#fp5"  ,   "#ff_pw"                \n\t"   \
-        "paddh      "#fp6"  ,   "#fp6"  ,   "#ff_pw"                \n\t"   \
-                                                                            \
-        "pmullh     "#fp1"  ,   "#fp3"  ,   "#ff_p2"                \n\t"   \
-        "pmullh     "#fp2"  ,   "#fp4"  ,   "#ff_p3"                \n\t"   \
-        "pmullh     "#fp3"  ,   "#fp3"  ,   "#ff_p3"                \n\t"   \
-        "pmullh     "#fp4"  ,   "#fp4"  ,   "#ff_p2"                \n\t"   \
-        "paddh      "#fp7"  ,   "#fp1"  ,   "#fp2"                  \n\t"   \
-        "psubh      "#fp8"  ,   "#fp3"  ,   "#fp4"                  \n\t"   \
-                                                                            \
-        "paddh      "#fp1"  ,   "#fp5"  ,   "#fp7"                  \n\t"   \
-        "paddh      "#fp2"  ,   "#fp6"  ,   "#fp8"                  \n\t"   \
-        "psubh      "#fp3"  ,   "#fp6"  ,   "#fp8"                  \n\t"   \
-        "psubh      "#fp4"  ,   "#fp5"  ,   "#fp7"                  \n\t"   \
-                                                                            \
-        "paddh      "#fp5"  ,   "#fp1"  ,   "#o1"                   \n\t"   \
-        "paddh      "#fp6"  ,   "#fp2"  ,   "#o2"                   \n\t"   \
-        "paddh      "#fp7"  ,   "#fp3"  ,   "#o3"                   \n\t"   \
-        "paddh      "#fp8"  ,   "#fp4"  ,   "#o4"                   \n\t"   \
-                                                                            \
-        "psubh      "#fp4"  ,   "#fp4"  ,   "#o4"                   \n\t"   \
-        "psubh      "#fp3"  ,   "#fp3"  ,   "#o3"                   \n\t"   \
-        "psubh      "#fp2"  ,   "#fp2"  ,   "#o2"                   \n\t"   \
-        "psubh      "#fp1"  ,   "#fp1"  ,   "#o1"                   \n\t"
-
-
-#define VC1_INV_TRANCS_4_STEP1_MMI(fp1,   fp2,   fp3,   fp4,                \
-                                   fp5,   fp6,   fp7,   fp8,                \
-                                   ff_p1, ff_p2, ff_p3, ff_pw)              \
-        "paddh      "#fp5"  ,   "#fp1"  ,   "#fp2"                  \n\t"   \
-        "psubh      "#fp6"  ,   "#fp1"  ,   "#fp2"                  \n\t"   \
-        "pmullh     "#fp5"  ,   "#fp5"  ,   "#ff_p1"                \n\t"   \
-        "pmullh     "#fp6"  ,   "#fp6"  ,   "#ff_p1"                \n\t"   \
-        "paddh      "#fp5"  ,   "#fp5"  ,   "#ff_pw"                \n\t"   \
-        "paddh      "#fp6"  ,   "#fp6"  ,   "#ff_pw"                \n\t"   \
-                                                                            \
-        "pmullh     "#fp1"  ,   "#fp3"  ,   "#ff_p2"                \n\t"   \
-        "pmullh     "#fp2"  ,   "#fp4"  ,   "#ff_p3"                \n\t"   \
-        "pmullh     "#fp3"  ,   "#fp3"  ,   "#ff_p3"                \n\t"   \
-        "pmullh     "#fp4"  ,   "#fp4"  ,   "#ff_p2"                \n\t"   \
-        "paddh      "#fp7"  ,   "#fp1"  ,   "#fp2"                  \n\t"   \
-        "psubh      "#fp8"  ,   "#fp3"  ,   "#fp4"                  \n\t"   \
-                                                                            \
-        "paddh      "#fp1"  ,   "#fp5"  ,   "#fp7"                  \n\t"   \
-        "psubh      "#fp2"  ,   "#fp6"  ,   "#fp8"                  \n\t"   \
-        "paddh      "#fp3"  ,   "#fp6"  ,   "#fp8"                  \n\t"   \
-        "psubh      "#fp4"  ,   "#fp5"  ,   "#fp7"                  \n\t"
-
-
-#define VC1_INV_TRANCS_4_STEP2_MMI(fp1, fp2, fp3, fp4,                      \
-                                   fp5, fp6, fp7, fp8, zero)                \
-        "punpcklbh  "#fp5"  ,   "#fp5"  ,   "#zero"                 \n\t"   \
-        "punpcklbh  "#fp6"  ,   "#fp6"  ,   "#zero"                 \n\t"   \
-        "punpcklbh  "#fp7"  ,   "#fp7"  ,   "#zero"                 \n\t"   \
-        "punpcklbh  "#fp8"  ,   "#fp8"  ,   "#zero"                 \n\t"   \
-                                                                            \
-        "paddh      "#fp1"  ,   "#fp1"  ,   "#fp5"                  \n\t"   \
-        "paddh      "#fp2"  ,   "#fp2"  ,   "#fp6"                  \n\t"   \
-        "paddh      "#fp3"  ,   "#fp3"  ,   "#fp7"                  \n\t"   \
-        "paddh      "#fp4"  ,   "#fp4"  ,   "#fp8"                  \n\t"   \
-                                                                            \
-        "packushb   "#fp1"  ,   "#fp1"  ,   "#zero"                 \n\t"   \
-        "packushb   "#fp2"  ,   "#fp2"  ,   "#zero"                 \n\t"   \
-        "packushb   "#fp3"  ,   "#fp3"  ,   "#zero"                 \n\t"   \
-        "packushb   "#fp4"  ,   "#fp4"  ,   "#zero"                 \n\t"
-
+        "paddw      %[ftmp13],  %[ftmp1],   %[ftmp3]                  \n\t" \
+        "psubw      %[ftmp14],  %[ftmp1],   %[ftmp3]                  \n\t" \
+        "paddw      %[ftmp14],  %[ftmp14],  "#c1"                     \n\t" \
+        "paddw      %[ftmp1],   %[ftmp2],   %[ftmp4]                  \n\t" \
+        "psubw      %[ftmp3],   %[ftmp2],   %[ftmp4]                  \n\t" \
+        "paddw      %[ftmp3],   %[ftmp3],   "#c1"                     \n\t" \
+        "paddw      %[ftmp13],  %[ftmp13],  "#c0"                     \n\t" \
+        "paddw      %[ftmp14],  %[ftmp14],  "#c0"                     \n\t" \
+        "paddw      %[ftmp1],   %[ftmp1],   "#c0"                     \n\t" \
+        "paddw      %[ftmp3],   %[ftmp3],   "#c0"                     \n\t" \
+        "psraw      %[ftmp13],  %[ftmp13],  %[ftmp0]                  \n\t" \
+        "psraw      %[ftmp1],   %[ftmp1],   %[ftmp0]                  \n\t" \
+        "psraw      %[ftmp14],  %[ftmp14],  %[ftmp0]                  \n\t" \
+        "psraw      %[ftmp3],   %[ftmp3],   %[ftmp0]                  \n\t" \
+        "punpcklhw  %[ftmp2],   %[ftmp13],  %[ftmp1]                  \n\t" \
+        "punpckhhw  %[ftmp4],   %[ftmp13],  %[ftmp1]                  \n\t" \
+        "punpcklhw  "#o1",      %[ftmp2],   %[ftmp4]                  \n\t" \
+        "punpcklhw  %[ftmp2],   %[ftmp14],  %[ftmp3]                  \n\t" \
+        "punpckhhw  %[ftmp4],   %[ftmp14],  %[ftmp3]                  \n\t" \
+        "punpcklhw  "#o2",      %[ftmp2],   %[ftmp4]                  \n\t"
 
 /* Do inverse transform on 8x8 block */
 void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
@@ -216,137 +195,202 @@
 void ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
 {
     DECLARE_ALIGNED(16, int16_t, temp[64]);
-    int16_t *src = block;
-    int16_t *dst = temp;
-    double ftmp[16];
-    uint32_t count, tmp[1];
+    DECLARE_ALIGNED(8, const uint64_t, ff_pw_1_local) = {0x0000000100000001ULL};
+    DECLARE_ALIGNED(8, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
+    DECLARE_ALIGNED(8, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
+    double ftmp[23];
+    uint64_t tmp[1];
 
-    // 1st loop
     __asm__ volatile (
+        /* 1st loop: start */
         "li         %[tmp0],    0x03                                    \n\t"
         "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
-        "li         %[count],   0x02                                    \n\t"
 
-        "1:                                                             \n\t"
-        MMI_LDC1(%[ftmp5], %[src], 0x10)
-        MMI_LDC1(%[ftmp6], %[src], 0x30)
-        MMI_LDC1(%[ftmp7], %[src], 0x50)
-        MMI_LDC1(%[ftmp8], %[src], 0x70)
+       // 1st part
+        MMI_LDC1(%[ftmp1], %[block], 0x00)
+        MMI_LDC1(%[ftmp11], %[block], 0x10)
+        MMI_LDC1(%[ftmp2], %[block], 0x20)
+        MMI_LDC1(%[ftmp12], %[block], 0x30)
+        MMI_LDC1(%[ftmp3], %[block], 0x40)
+        MMI_LDC1(%[ftmp13], %[block], 0x50)
+        MMI_LDC1(%[ftmp4], %[block], 0x60)
+        MMI_LDC1(%[ftmp14], %[block], 0x70)
+        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t"
+        "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t"
 
-        VC1_INV_TRANCS_8_STEP1_MMI(%[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                                   %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                                   %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                                   %[ff_pw_16], %[ff_pw_15], %[ff_pw_9],
-                                   %[ff_pw_4])
+        "punpcklhw  %[ftmp9],  %[ftmp11],  %[ftmp12]                    \n\t"
+        "punpckhhw  %[ftmp10], %[ftmp11],  %[ftmp12]                    \n\t"
+        "punpcklhw  %[ftmp11], %[ftmp13],  %[ftmp14]                    \n\t"
+        "punpckhhw  %[ftmp12], %[ftmp13],  %[ftmp14]                    \n\t"
 
-        MMI_LDC1(%[ftmp1], %[src], 0x00)
-        MMI_LDC1(%[ftmp2], %[src], 0x40)
-        MMI_LDC1(%[ftmp3], %[src], 0x20)
-        MMI_LDC1(%[ftmp4], %[src], 0x60)
+        /* ftmp15:dst03,dst02,dst01,dst00 ftmp22:dst73,dst72,dst71,dst70 */
+        VC1_INV_TRANCS_8_TYPE1(%[ftmp15], %[ftmp22], 0x0010000c, 0x0006000c,
+                               0x000f0010, 0x00040009, %[ff_pw_4])
 
-        VC1_INV_TRANCS_8_STEP2_MMI(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                                   %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                                   %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                                   %[ff_pw_12], %[ff_pw_16], %[ff_pw_6],
-                                   %[ff_pw_4])
+        /* ftmp16:dst13,dst12,dst11,dst10 ftmp21:dst63,dst62,dst61,dst60 */
+        VC1_INV_TRANCS_8_TYPE1(%[ftmp16], %[ftmp21], 0x0006000c, 0xfff0fff4,
+                               0xfffc000f, 0xfff7fff0, %[ff_pw_4])
 
+        /* ftmp17:dst23,dst22,dst21,dst20 ftmp20:dst53,dst52,dst51,dst50 */
+        VC1_INV_TRANCS_8_TYPE1(%[ftmp17], %[ftmp20], 0xfffa000c, 0x0010fff4,
+                               0xfff00009, 0x000f0004, %[ff_pw_4])
 
-        PSRAH_8_MMI(%[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                    %[ftmp4], %[ftmp3], %[ftmp2], %[ftmp1], %[ftmp0])
+        /* ftmp18:dst33,dst32,dst31,dst30 ftmp19:dst43,dst42,dst41,dst40 */
+        VC1_INV_TRANCS_8_TYPE1(%[ftmp18], %[ftmp19], 0xfff0000c, 0xfffa000c,
+                               0xfff70004, 0xfff0000f, %[ff_pw_4])
 
-        TRANSPOSE_4H(%[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                     %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                     %[ftmp13], %[tmp0],  %[ftmp14], %[ftmp15])
+        TRANSPOSE_4H(%[ftmp15], %[ftmp16], %[ftmp17], %[ftmp18],
+                     %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
 
-        MMI_SDC1(%[ftmp5], %[dst], 0x00)
-        MMI_SDC1(%[ftmp6], %[dst], 0x10)
-        MMI_SDC1(%[ftmp7], %[dst], 0x20)
-        MMI_SDC1(%[ftmp8], %[dst], 0x30)
+        TRANSPOSE_4H(%[ftmp19], %[ftmp20], %[ftmp21], %[ftmp22],
+                     %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
 
-        TRANSPOSE_4H(%[ftmp4], %[ftmp3], %[ftmp2], %[ftmp1],
-                     %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                     %[ftmp13], %[tmp0],  %[ftmp14], %[ftmp15])
+        MMI_SDC1(%[ftmp15], %[temp], 0x00)
+        MMI_SDC1(%[ftmp19], %[temp], 0x08)
+        MMI_SDC1(%[ftmp16], %[temp], 0x10)
+        MMI_SDC1(%[ftmp20], %[temp], 0x18)
+        MMI_SDC1(%[ftmp17], %[temp], 0x20)
+        MMI_SDC1(%[ftmp21], %[temp], 0x28)
+        MMI_SDC1(%[ftmp18], %[temp], 0x30)
+        MMI_SDC1(%[ftmp22], %[temp], 0x38)
 
-        MMI_SDC1(%[ftmp4], %[dst], 0x08)
-        MMI_SDC1(%[ftmp3], %[dst], 0x18)
-        MMI_SDC1(%[ftmp2], %[dst], 0x28)
-        MMI_SDC1(%[ftmp1], %[dst], 0x38)
+       // 2nd part
+        MMI_LDC1(%[ftmp1], %[block], 0x08)
+        MMI_LDC1(%[ftmp11], %[block], 0x18)
+        MMI_LDC1(%[ftmp2], %[block], 0x28)
+        MMI_LDC1(%[ftmp12], %[block], 0x38)
+        MMI_LDC1(%[ftmp3], %[block], 0x48)
+        MMI_LDC1(%[ftmp13], %[block], 0x58)
+        MMI_LDC1(%[ftmp4], %[block], 0x68)
+        MMI_LDC1(%[ftmp14], %[block], 0x78)
+        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t"
+        "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t"
 
-        "addiu      %[count],   %[count],  -0x01                        \n\t"
-        PTR_ADDIU  "%[src],     %[src],     0x08                        \n\t"
-        PTR_ADDIU  "%[dst],     %[dst],     0x40                        \n\t"
-        "bnez       %[count],   1b                                      \n\t"
-        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
-          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
-          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
-          [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
-          [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
-          [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
-          [ftmp12]"=&f"(ftmp[12]),      [ftmp13]"=&f"(ftmp[13]),
-          [ftmp14]"=&f"(ftmp[14]),      [ftmp15]"=&f"(ftmp[15]),
-          [tmp0]"=&r"(tmp[0]),
-          [count]"=&r"(count),
-          [src]"+&r"(src),              [dst]"+&r"(dst)
-        : [ff_pw_4]"f"(ff_pw_4),        [ff_pw_6]"f"(ff_pw_6),
-          [ff_pw_9]"f"(ff_pw_9),        [ff_pw_12]"f"(ff_pw_12),
-          [ff_pw_15]"f"(ff_pw_15),      [ff_pw_16]"f"(ff_pw_16)
-        : "memory"
-    );
+        "punpcklhw  %[ftmp9],   %[ftmp11],  %[ftmp12]                   \n\t"
+        "punpckhhw  %[ftmp10],  %[ftmp11],  %[ftmp12]                   \n\t"
+        "punpcklhw  %[ftmp11],  %[ftmp13],  %[ftmp14]                   \n\t"
+        "punpckhhw  %[ftmp12],  %[ftmp13],  %[ftmp14]                   \n\t"
 
-    src = temp;
-    dst = block;
+        /* ftmp15:dst03,dst02,dst01,dst00 ftmp22:dst73,dst72,dst71,dst70 */
+        VC1_INV_TRANCS_8_TYPE1(%[ftmp15], %[ftmp22], 0x0010000c, 0x0006000c,
+                               0x000f0010, 0x00040009, %[ff_pw_4])
 
-    // 2nd loop
-    __asm__ volatile (
+        /* ftmp16:dst13,dst12,dst11,dst10 ftmp21:dst63,dst62,dst61,dst60 */
+        VC1_INV_TRANCS_8_TYPE1(%[ftmp16], %[ftmp21], 0x0006000c, 0xfff0fff4,
+                               0xfffc000f, 0xfff7fff0, %[ff_pw_4])
+
+        /* ftmp17:dst23,dst22,dst21,dst20 ftmp20:dst53,dst52,dst51,dst50 */
+        VC1_INV_TRANCS_8_TYPE1(%[ftmp17], %[ftmp20], 0xfffa000c, 0x0010fff4,
+                               0xfff00009, 0x000f0004, %[ff_pw_4])
+
+        /* ftmp18:dst33,dst32,dst31,dst30 ftmp19:dst43,dst42,dst41,dst40 */
+        VC1_INV_TRANCS_8_TYPE1(%[ftmp18], %[ftmp19], 0xfff0000c, 0xfffa000c,
+                               0xfff70004, 0xfff0000f, %[ff_pw_4])
+
+        TRANSPOSE_4H(%[ftmp15], %[ftmp16], %[ftmp17], %[ftmp18],
+                     %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
+
+        TRANSPOSE_4H(%[ftmp19], %[ftmp20], %[ftmp21], %[ftmp22],
+                     %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
+
+        MMI_SDC1(%[ftmp19], %[temp], 0x48)
+        MMI_SDC1(%[ftmp20], %[temp], 0x58)
+        MMI_SDC1(%[ftmp21], %[temp], 0x68)
+        MMI_SDC1(%[ftmp22], %[temp], 0x78)
+        /* 1st loop: end */
+
+        /* 2nd loop: start */
         "li         %[tmp0],    0x07                                    \n\t"
         "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
-        "li         %[count],   0x02                                    \n\t"
 
-        "1:                                                             \n\t"
-        MMI_LDC1(%[ftmp5], %[src], 0x10)
-        MMI_LDC1(%[ftmp6], %[src], 0x30)
-        MMI_LDC1(%[ftmp7], %[src], 0x50)
-        MMI_LDC1(%[ftmp8], %[src], 0x70)
+        // 1st part
+        MMI_LDC1(%[ftmp1], %[temp], 0x00)
+        MMI_LDC1(%[ftmp11], %[temp], 0x10)
+        MMI_LDC1(%[ftmp2], %[temp], 0x20)
+        MMI_LDC1(%[ftmp12], %[temp], 0x30)
+        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpcklhw  %[ftmp7],   %[ftmp15],  %[ftmp17]                   \n\t"
+        "punpckhhw  %[ftmp8],   %[ftmp15],  %[ftmp17]                   \n\t"
 
-        VC1_INV_TRANCS_8_STEP1_MMI(%[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                                   %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                                   %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                                   %[ff_pw_16], %[ff_pw_15], %[ff_pw_9],
-                                   %[ff_pw_4])
+        "punpcklhw  %[ftmp9],   %[ftmp11],  %[ftmp12]                   \n\t"
+        "punpckhhw  %[ftmp10],  %[ftmp11],  %[ftmp12]                   \n\t"
+        "punpcklhw  %[ftmp11],  %[ftmp16],  %[ftmp18]                   \n\t"
+        "punpckhhw  %[ftmp12],  %[ftmp16],  %[ftmp18]                   \n\t"
 
-        MMI_LDC1(%[ftmp1], %[src], 0x00)
-        MMI_LDC1(%[ftmp2], %[src], 0x40)
-        MMI_LDC1(%[ftmp3], %[src], 0x20)
-        MMI_LDC1(%[ftmp4], %[src], 0x60)
+        /* ftmp15:dst03,dst02,dst01,dst00 ftmp22:dst73,dst72,dst71,dst70 */
+        VC1_INV_TRANCS_8_TYPE2(%[ftmp15], %[ftmp22], 0x0010000c, 0x0006000c,
+                               0x000f0010, 0x00040009, %[ff_pw_64], %[ff_pw_1])
 
-        VC1_INV_TRANCS_8_STEP2_MMI(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                                   %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                                   %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                                   %[ff_pw_12], %[ff_pw_16], %[ff_pw_6],
-                                   %[ff_pw_64])
+        /* ftmp16:dst13,dst12,dst11,dst10 ftmp21:dst63,dst62,dst61,dst60 */
+        VC1_INV_TRANCS_8_TYPE2(%[ftmp16], %[ftmp21], 0x0006000c, 0xfff0fff4,
+                               0xfffc000f, 0xfff7fff0, %[ff_pw_64], %[ff_pw_1])
 
-        "paddh      %[ftmp4],   %[ftmp4],   %[ff_pw_1]                  \n\t"
-        "paddh      %[ftmp3],   %[ftmp3],   %[ff_pw_1]                  \n\t"
-        "paddh      %[ftmp2],   %[ftmp2],   %[ff_pw_1]                  \n\t"
-        "paddh      %[ftmp1],   %[ftmp1],   %[ff_pw_1]                  \n\t"
+        /* ftmp17:dst23,dst22,dst21,dst20 ftmp20:dst53,dst52,dst51,dst50 */
+        VC1_INV_TRANCS_8_TYPE2(%[ftmp17], %[ftmp20], 0xfffa000c, 0x0010fff4,
+                               0xfff00009, 0x000f0004, %[ff_pw_64], %[ff_pw_1])
 
-        PSRAH_8_MMI(%[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                    %[ftmp4], %[ftmp3], %[ftmp2], %[ftmp1], %[ftmp0])
+        /* ftmp18:dst33,dst32,dst31,dst30 ftmp19:dst43,dst42,dst41,dst40 */
+        VC1_INV_TRANCS_8_TYPE2(%[ftmp18], %[ftmp19], 0xfff0000c, 0xfffa000c,
+                               0xfff70004, 0xfff0000f, %[ff_pw_64], %[ff_pw_1])
 
-        MMI_SDC1(%[ftmp5], %[dst], 0x00)
-        MMI_SDC1(%[ftmp6], %[dst], 0x10)
-        MMI_SDC1(%[ftmp7], %[dst], 0x20)
-        MMI_SDC1(%[ftmp8], %[dst], 0x30)
+        MMI_SDC1(%[ftmp15], %[block], 0x00)
+        MMI_SDC1(%[ftmp16], %[block], 0x10)
+        MMI_SDC1(%[ftmp17], %[block], 0x20)
+        MMI_SDC1(%[ftmp18], %[block], 0x30)
+        MMI_SDC1(%[ftmp19], %[block], 0x40)
+        MMI_SDC1(%[ftmp20], %[block], 0x50)
+        MMI_SDC1(%[ftmp21], %[block], 0x60)
+        MMI_SDC1(%[ftmp22], %[block], 0x70)
 
-        MMI_SDC1(%[ftmp4], %[dst], 0x40)
-        MMI_SDC1(%[ftmp3], %[dst], 0x50)
-        MMI_SDC1(%[ftmp2], %[dst], 0x60)
-        MMI_SDC1(%[ftmp1], %[dst], 0x70)
+       // 2nd part
+        MMI_LDC1(%[ftmp1], %[temp], 0x08)
+        MMI_LDC1(%[ftmp11], %[temp], 0x18)
+        MMI_LDC1(%[ftmp2], %[temp], 0x28)
+        MMI_LDC1(%[ftmp12], %[temp], 0x38)
+        MMI_LDC1(%[ftmp3], %[temp], 0x48)
+        MMI_LDC1(%[ftmp13], %[temp], 0x58)
+        MMI_LDC1(%[ftmp4], %[temp], 0x68)
+        MMI_LDC1(%[ftmp14], %[temp], 0x78)
+        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t"
+        "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t"
 
-        "addiu      %[count],   %[count],  -0x01                        \n\t"
-        PTR_ADDIU  "%[src],     %[src],     0x08                        \n\t"
-        PTR_ADDIU  "%[dst],     %[dst],     0x08                        \n\t"
-        "bnez       %[count],   1b                                      \n\t"
+        "punpcklhw  %[ftmp9],   %[ftmp11],  %[ftmp12]                   \n\t"
+        "punpckhhw  %[ftmp10],  %[ftmp11],  %[ftmp12]                   \n\t"
+        "punpcklhw  %[ftmp11],  %[ftmp13],  %[ftmp14]                   \n\t"
+        "punpckhhw  %[ftmp12],  %[ftmp13],  %[ftmp14]                   \n\t"
+
+        /* ftmp15:dst03,dst02,dst01,dst00 ftmp22:dst73,dst72,dst71,dst70 */
+        VC1_INV_TRANCS_8_TYPE2(%[ftmp15], %[ftmp22], 0x0010000c, 0x0006000c,
+                               0x000f0010, 0x00040009, %[ff_pw_64], %[ff_pw_1])
+
+        /* ftmp16:dst13,dst12,dst11,dst10 ftmp21:dst63,dst62,dst61,dst60 */
+        VC1_INV_TRANCS_8_TYPE2(%[ftmp16], %[ftmp21], 0x0006000c, 0xfff0fff4,
+                               0xfffc000f, 0xfff7fff0, %[ff_pw_64], %[ff_pw_1])
+
+        /* ftmp17:dst23,dst22,dst21,dst20 ftmp20:dst53,dst52,dst51,dst50 */
+        VC1_INV_TRANCS_8_TYPE2(%[ftmp17], %[ftmp20], 0xfffa000c, 0x0010fff4,
+                               0xfff00009, 0x000f0004, %[ff_pw_64], %[ff_pw_1])
+
+        /* ftmp18:dst33,dst32,dst31,dst30 ftmp19:dst43,dst42,dst41,dst40 */
+        VC1_INV_TRANCS_8_TYPE2(%[ftmp18], %[ftmp19], 0xfff0000c, 0xfffa000c,
+                               0xfff70004, 0xfff0000f, %[ff_pw_64], %[ff_pw_1])
+
+        MMI_SDC1(%[ftmp15], %[block], 0x08)
+        MMI_SDC1(%[ftmp16], %[block], 0x18)
+        MMI_SDC1(%[ftmp17], %[block], 0x28)
+        MMI_SDC1(%[ftmp18], %[block], 0x38)
+        MMI_SDC1(%[ftmp19], %[block], 0x48)
+        MMI_SDC1(%[ftmp20], %[block], 0x58)
+        MMI_SDC1(%[ftmp21], %[block], 0x68)
+        MMI_SDC1(%[ftmp22], %[block], 0x78)
+        /* 2nd loop: end */
         : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
           [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
@@ -355,13 +399,14 @@
           [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
           [ftmp12]"=&f"(ftmp[12]),      [ftmp13]"=&f"(ftmp[13]),
           [ftmp14]"=&f"(ftmp[14]),      [ftmp15]"=&f"(ftmp[15]),
-          [tmp0]"=&r"(tmp[0]),
-          [count]"=&r"(count),
-          [src]"+&r"(src),              [dst]"+&r"(dst)
-        : [ff_pw_1]"f"(ff_pw_1),        [ff_pw_4]"f"(ff_pw_4),
-          [ff_pw_6]"f"(ff_pw_6),        [ff_pw_9]"f"(ff_pw_9),
-          [ff_pw_12]"f"(ff_pw_12),      [ff_pw_15]"f"(ff_pw_15),
-          [ff_pw_16]"f"(ff_pw_16),      [ff_pw_64]"f"(ff_pw_64)
+          [ftmp16]"=&f"(ftmp[16]),      [ftmp17]"=&f"(ftmp[17]),
+          [ftmp18]"=&f"(ftmp[18]),      [ftmp19]"=&f"(ftmp[19]),
+          [ftmp20]"=&f"(ftmp[20]),      [ftmp21]"=&f"(ftmp[21]),
+          [ftmp22]"=&f"(ftmp[22]),
+          [tmp0]"=&r"(tmp[0])
+        : [ff_pw_1]"f"(ff_pw_1_local),  [ff_pw_64]"f"(ff_pw_64_local),
+          [ff_pw_4]"f"(ff_pw_4_local), [block]"r"(block),
+          [temp]"r"(temp)
         : "memory"
     );
 }
@@ -431,66 +476,377 @@
     int16_t *dst = block;
     double ftmp[16];
     uint32_t tmp[1];
-    mips_reg addr[1];
-    DECLARE_VAR_LOW32;
+    int16_t count = 4;
+    DECLARE_ALIGNED(16, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
+    DECLARE_ALIGNED(16, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
+    int16_t coeff[64] = {12, 16,  16,  15,  12,   9,   6,   4,
+                         12, 15,   6,  -4, -12, -16, -16,  -9,
+                         12,  9,  -6, -16, -12,   4,  16,  15,
+                         12,  4, -16,  -9,  12,  15,  -6, -16,
+                         12, -4, -16,   9,  12, -15,  -6,  16,
+                         12, -9,  -6,  16, -12,  -4,  16, -15,
+                         12, -15,  6,   4, -12,  16, -16,   9,
+                         12, -16, 16, -15,  12,  -9,   6,  -4};
 
     // 1st loop
     __asm__ volatile (
-        MMI_LDC1(%[ftmp1], %[src], 0x00)
-        MMI_LDC1(%[ftmp2], %[src], 0x08)
-        MMI_LDC1(%[ftmp3], %[src], 0x10)
-        MMI_LDC1(%[ftmp4], %[src], 0x18)
-        MMI_LDC1(%[ftmp5], %[src], 0x20)
-        MMI_LDC1(%[ftmp6], %[src], 0x28)
-        MMI_LDC1(%[ftmp7], %[src], 0x30)
-        MMI_LDC1(%[ftmp8], %[src], 0x38)
-
-        //             a1        b1        a3        b2
-        TRANSPOSE_4H(%[ftmp1], %[ftmp3], %[ftmp5], %[ftmp7],
-                     %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                     %[ftmp13], %[tmp0],  %[ftmp14], %[ftmp15])
-
-        //             a2        b3        a4        b4
-        TRANSPOSE_4H(%[ftmp2], %[ftmp4], %[ftmp6], %[ftmp8],
-                     %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                     %[ftmp13], %[tmp0],  %[ftmp14], %[ftmp15])
-
-        // input b1 b2 b3 b4
-        VC1_INV_TRANCS_8_STEP1_MMI(%[ftmp3], %[ftmp7], %[ftmp4], %[ftmp8],
-                                   %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                                   %[ftmp0], %[ftmp13], %[ftmp14], %[ftmp15],
-                                   %[ff_pw_16], %[ff_pw_15], %[ff_pw_9],
-                                   %[ff_pw_4])
-        // input a1 a2 a3 a4
-        VC1_INV_TRANCS_8_STEP2_MMI(%[ftmp1], %[ftmp2], %[ftmp5], %[ftmp6],
-                                   %[ftmp3], %[ftmp7], %[ftmp4], %[ftmp8],
-                                   %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                                   %[ff_pw_12], %[ff_pw_16], %[ff_pw_6],
-                                   %[ff_pw_4])
-
         "li         %[tmp0],    0x03                                    \n\t"
         "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
 
-        PSRAH_8_MMI(%[ftmp3], %[ftmp7], %[ftmp4], %[ftmp8],
-                    %[ftmp6], %[ftmp5], %[ftmp2], %[ftmp1], %[ftmp0])
+        "1:                                                             \n\t"
+        MMI_LDC1(%[ftmp1], %[src], 0x00)
+        MMI_LDC1(%[ftmp2], %[src], 0x08)
 
-        TRANSPOSE_4H(%[ftmp3], %[ftmp7], %[ftmp4], %[ftmp8],
-                     %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                     %[ftmp13], %[tmp0],  %[ftmp14], %[ftmp15])
+        /* ftmp11: dst1,dst0 */
+        MMI_LDC1(%[ftmp3], %[coeff], 0x00)
+        MMI_LDC1(%[ftmp4], %[coeff], 0x08)
+        MMI_LDC1(%[ftmp5], %[coeff], 0x10)
+        MMI_LDC1(%[ftmp6], %[coeff], 0x18)
+        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp7],   %[ftmp8]                    \n\t"
+        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp5]                    \n\t"
+        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp6]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp7],   %[ftmp8]                    \n\t"
+        "punpcklwd  %[ftmp7],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhwd  %[ftmp8],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "paddw      %[ftmp11],  %[ftmp7],   %[ftmp8]                    \n\t"
+        "paddw      %[ftmp11],  %[ftmp11],  %[ff_pw_4]                  \n\t"
 
-        MMI_SDC1(%[ftmp3], %[dst], 0x00)
-        MMI_SDC1(%[ftmp7], %[dst], 0x10)
-        MMI_SDC1(%[ftmp4], %[dst], 0x20)
-        MMI_SDC1(%[ftmp8], %[dst], 0x30)
+        /* ftmp12: dst3,dst2 */
+        MMI_LDC1(%[ftmp3], %[coeff], 0x20)
+        MMI_LDC1(%[ftmp4], %[coeff], 0x28)
+        MMI_LDC1(%[ftmp5], %[coeff], 0x30)
+        MMI_LDC1(%[ftmp6], %[coeff], 0x38)
+        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp7],   %[ftmp8]                    \n\t"
+        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp5]                    \n\t"
+        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp6]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp7],   %[ftmp8]                    \n\t"
+        "punpcklwd  %[ftmp7],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhwd  %[ftmp8],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "paddw      %[ftmp12],  %[ftmp7],   %[ftmp8]                    \n\t"
+        "paddw      %[ftmp12],  %[ftmp12],  %[ff_pw_4]                  \n\t"
 
-        TRANSPOSE_4H(%[ftmp6], %[ftmp5], %[ftmp2], %[ftmp1],
-                     %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                     %[ftmp13], %[tmp0],  %[ftmp14], %[ftmp15])
+        /* ftmp13: dst5,dst4 */
+        MMI_LDC1(%[ftmp3], %[coeff], 0x40)
+        MMI_LDC1(%[ftmp4], %[coeff], 0x48)
+        MMI_LDC1(%[ftmp5], %[coeff], 0x50)
+        MMI_LDC1(%[ftmp6], %[coeff], 0x58)
+        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp7],   %[ftmp8]                    \n\t"
+        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp5]                    \n\t"
+        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp6]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp7],   %[ftmp8]                    \n\t"
+        "punpcklwd  %[ftmp7],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhwd  %[ftmp8],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "paddw      %[ftmp13],  %[ftmp7],   %[ftmp8]                    \n\t"
+        "paddw      %[ftmp13],  %[ftmp13],  %[ff_pw_4]                  \n\t"
 
-        MMI_SDC1(%[ftmp6], %[dst], 0x08)
-        MMI_SDC1(%[ftmp5], %[dst], 0x18)
-        MMI_SDC1(%[ftmp2], %[dst], 0x28)
-        MMI_SDC1(%[ftmp1], %[dst], 0x38)
+        /* ftmp14: dst7,dst6 */
+        MMI_LDC1(%[ftmp3], %[coeff], 0x60)
+        MMI_LDC1(%[ftmp4], %[coeff], 0x68)
+        MMI_LDC1(%[ftmp5], %[coeff], 0x70)
+        MMI_LDC1(%[ftmp6], %[coeff], 0x78)
+        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp7],   %[ftmp8]                    \n\t"
+        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp5]                    \n\t"
+        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp6]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp7],   %[ftmp8]                    \n\t"
+        "punpcklwd  %[ftmp7],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhwd  %[ftmp8],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "paddw      %[ftmp14],  %[ftmp7],   %[ftmp8]                    \n\t"
+        "paddw      %[ftmp14],  %[ftmp14],  %[ff_pw_4]                  \n\t"
+
+        /* ftmp9: dst3,dst2,dst1,dst0    ftmp10: dst7,dst6,dst5,dst4 */
+        "psraw      %[ftmp11],  %[ftmp11],  %[ftmp0]                    \n\t"
+        "psraw      %[ftmp12],  %[ftmp12],  %[ftmp0]                    \n\t"
+        "psraw      %[ftmp13],  %[ftmp13],  %[ftmp0]                    \n\t"
+        "psraw      %[ftmp14],  %[ftmp14],  %[ftmp0]                    \n\t"
+        "punpcklhw  %[ftmp7],   %[ftmp11],  %[ftmp12]                   \n\t"
+        "punpckhhw  %[ftmp8],   %[ftmp11],  %[ftmp12]                   \n\t"
+        "punpcklhw  %[ftmp9],   %[ftmp7],   %[ftmp8]                    \n\t"
+        "punpcklhw  %[ftmp7],   %[ftmp13],  %[ftmp14]                   \n\t"
+        "punpckhhw  %[ftmp8],   %[ftmp13],  %[ftmp14]                   \n\t"
+        "punpcklhw  %[ftmp10],  %[ftmp7],   %[ftmp8]                    \n\t"
+        MMI_SDC1(%[ftmp9], %[dst], 0x00)
+        MMI_SDC1(%[ftmp10], %[dst], 0x08)
+
+        PTR_ADDIU  "%[src],     %[src],     0x10                        \n\t"
+        PTR_ADDIU  "%[dst],     %[dst],     0x10                        \n\t"
+        "addiu      %[count],   %[count],   -0x01                       \n\t"
+        "bnez       %[count],   1b                                      \n\t"
+        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
+          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
+          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
+          [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
+          [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
+          [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
+          [ftmp12]"=&f"(ftmp[12]),      [ftmp13]"=&f"(ftmp[13]),
+          [ftmp14]"=&f"(ftmp[14]),      [tmp0]"=&r"(tmp[0]),
+          [src]"+&r"(src), [dst]"+&r"(dst), [count]"+&r"(count)
+        : [ff_pw_4]"f"(ff_pw_4_local),  [coeff]"r"(coeff)
+        : "memory"
+    );
+
+    src = block;
+
+    // 2nd loop
+    __asm__ volatile (
+        "li         %[tmp0],    0x44                                    \n\t"
+        "mtc1       %[tmp0],    %[ftmp15]                               \n\t"
+
+        // 1st part
+        "li         %[tmp0],    0x07                                    \n\t"
+        "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
+        MMI_LDC1(%[ftmp1], %[src], 0x00)
+        MMI_LDC1(%[ftmp2], %[src], 0x10)
+        MMI_LDC1(%[ftmp3], %[src], 0x20)
+        MMI_LDC1(%[ftmp4], %[src], 0x30)
+        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t"
+        "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t"
+
+        /* ftmp11: dst03,dst02,dst01,dst00 */
+        "li         %[tmp0],    0x00160011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
+        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
+        "li         %[tmp0],    0x000a0011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
+        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
+        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
+        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
+        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
+        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpcklhw  %[ftmp11],  %[ftmp1],   %[ftmp2]                    \n\t"
+
+        /* ftmp12: dst13,dst12,dst11,dst10 */
+        "li         %[tmp0],    0x000a0011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
+        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
+        "li         %[tmp0],    0xffeaffef                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
+        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
+        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
+        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
+        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
+        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpcklhw  %[ftmp12],  %[ftmp1],   %[ftmp2]                    \n\t"
+
+        /* ftmp13: dst23,dst22,dst21,dst20 */
+        "li         %[tmp0],    0xfff60011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
+        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
+        "li         %[tmp0],    0x0016ffef                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
+        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
+        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
+        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
+        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
+        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpcklhw  %[ftmp13],  %[ftmp1],   %[ftmp2]                    \n\t"
+
+        /* ftmp14: dst33,dst32,dst31,dst30 */
+        "li         %[tmp0],    0xffea0011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
+        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
+        "li         %[tmp0],    0xfff60011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
+        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
+        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
+        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
+        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
+        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpcklhw  %[ftmp14],  %[ftmp1],   %[ftmp2]                    \n\t"
+
+        MMI_LWC1(%[ftmp1], %[dest], 0x00)
+        PTR_ADDU    "%[tmp0],   %[dest],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp2], %[tmp0], 0x00)
+        PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp3], %[tmp0], 0x00)
+        PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp4], %[tmp0], 0x00)
+        "xor        %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
+        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp11]                   \n\t"
+        "paddh      %[ftmp2],   %[ftmp2],   %[ftmp12]                   \n\t"
+        "paddh      %[ftmp3],   %[ftmp3],   %[ftmp13]                   \n\t"
+        "paddh      %[ftmp4],   %[ftmp4],   %[ftmp14]                   \n\t"
+        "packushb   %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
+        MMI_SWC1(%[ftmp1], %[dest], 0x00)
+        PTR_ADDU   "%[tmp0],    %[dest],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp2], %[tmp0], 0x00)
+        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp3], %[tmp0], 0x00)
+        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp4], %[tmp0], 0x00)
+
+        // 2nd part
+        "li         %[tmp0],    0x07                                    \n\t"
+        "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
+        MMI_LDC1(%[ftmp1], %[src], 0x08)
+        MMI_LDC1(%[ftmp2], %[src], 0x18)
+        MMI_LDC1(%[ftmp3], %[src], 0x28)
+        MMI_LDC1(%[ftmp4], %[src], 0x38)
+        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t"
+        "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t"
+
+        /* ftmp11: dst03,dst02,dst01,dst00 */
+        "li         %[tmp0],    0x00160011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
+        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
+        "li         %[tmp0],    0x000a0011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
+        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
+        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
+        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
+        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
+        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpcklhw  %[ftmp11],  %[ftmp1],   %[ftmp2]                    \n\t"
+
+        /* ftmp12: dst13,dst12,dst11,dst10 */
+        "li         %[tmp0],    0x000a0011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
+        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
+        "li         %[tmp0],    0xffeaffef                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
+        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
+        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
+        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
+        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
+        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpcklhw  %[ftmp12],  %[ftmp1],   %[ftmp2]                    \n\t"
+
+        /* ftmp13: dst23,dst22,dst21,dst20 */
+        "li         %[tmp0],    0xfff60011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
+        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
+        "li         %[tmp0],    0x0016ffef                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
+        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
+        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
+        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
+        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
+        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpcklhw  %[ftmp13],  %[ftmp1],   %[ftmp2]                    \n\t"
+
+        /* ftmp14: dst33,dst32,dst31,dst30 */
+        "li         %[tmp0],    0xffea0011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
+        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
+        "li         %[tmp0],    0xfff60011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
+        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
+        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
+        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
+        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
+        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpcklhw  %[ftmp14],  %[ftmp1],   %[ftmp2]                    \n\t"
+
+        MMI_LWC1(%[ftmp1], %[dest], 0x04)
+        PTR_ADDU    "%[tmp0],   %[dest],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp2], %[tmp0], 0x04)
+        PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp3], %[tmp0], 0x04)
+        PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp4], %[tmp0], 0x04)
+        "xor        %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
+        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp11]                   \n\t"
+        "paddh      %[ftmp2],   %[ftmp2],   %[ftmp12]                   \n\t"
+        "paddh      %[ftmp3],   %[ftmp3],   %[ftmp13]                   \n\t"
+        "paddh      %[ftmp4],   %[ftmp4],   %[ftmp14]                   \n\t"
+        "packushb   %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
+        MMI_SWC1(%[ftmp1], %[dest], 0x04)
+        PTR_ADDU   "%[tmp0],    %[dest],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp2], %[tmp0], 0x04)
+        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp3], %[tmp0], 0x04)
+        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp4], %[tmp0], 0x04)
+
         : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
           [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
@@ -500,100 +856,9 @@
           [ftmp12]"=&f"(ftmp[12]),      [ftmp13]"=&f"(ftmp[13]),
           [ftmp14]"=&f"(ftmp[14]),      [ftmp15]"=&f"(ftmp[15]),
           [tmp0]"=&r"(tmp[0])
-        : [src]"r"(src),                [dst]"r"(dst),
-          [ff_pw_4]"f"(ff_pw_4),        [ff_pw_6]"f"(ff_pw_6),
-          [ff_pw_9]"f"(ff_pw_9),        [ff_pw_12]"f"(ff_pw_12),
-          [ff_pw_15]"f"(ff_pw_15),      [ff_pw_16]"f"(ff_pw_16)
-        : "memory"
-    );
-
-    src = block;
-
-    // 2nd loop
-    __asm__ volatile (
-        "li         %[tmp0],    0x07                                    \n\t"
-        "xor        %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t"
-        "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
-
-        // dest low 32bit
-        MMI_LDC1(%[ftmp1], %[src], 0x00)
-        MMI_LDC1(%[ftmp2], %[src], 0x20)
-        MMI_LDC1(%[ftmp3], %[src], 0x30)
-        MMI_LDC1(%[ftmp4], %[src], 0x10)
-
-        VC1_INV_TRANCS_4_STEP1_MMI(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                                   %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                                   %[ff_pw_17], %[ff_pw_10], %[ff_pw_22],
-                                   %[ff_pw_64])
-
-        PSRAH_4_MMI(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4], %[ftmp9])
-
-        MMI_LWC1(%[ftmp5], %[dest], 0x00)
-        PTR_ADDU   "%[addr0],   %[dest],    %[linesize]                 \n\t"
-        MMI_LWC1(%[ftmp6], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_LWC1(%[ftmp7], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_LWC1(%[ftmp8], %[addr0], 0x00)
-
-        VC1_INV_TRANCS_4_STEP2_MMI(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                                   %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                                   %[ftmp0])
-
-        MMI_SWC1(%[ftmp1], %[dest], 0x00)
-        PTR_ADDU   "%[addr0],   %[dest],    %[linesize]                 \n\t"
-        MMI_SWC1(%[ftmp2], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_SWC1(%[ftmp3], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_SWC1(%[ftmp4], %[addr0], 0x00)
-
-        // dest high 32bit
-        MMI_LDC1(%[ftmp1], %[src], 0x08)
-        MMI_LDC1(%[ftmp2], %[src], 0x28)
-        MMI_LDC1(%[ftmp3], %[src], 0x38)
-        MMI_LDC1(%[ftmp4], %[src], 0x18)
-
-        VC1_INV_TRANCS_4_STEP1_MMI(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                                   %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                                   %[ff_pw_17], %[ff_pw_10], %[ff_pw_22],
-                                   %[ff_pw_64])
-
-        PSRAH_4_MMI(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4], %[ftmp9])
-
-        MMI_LWC1(%[ftmp5], %[dest], 0x04)
-        PTR_ADDU   "%[addr0],   %[dest],    %[linesize]                 \n\t"
-        MMI_LWC1(%[ftmp6], %[addr0], 0x04)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_LWC1(%[ftmp7], %[addr0], 0x04)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_LWC1(%[ftmp8], %[addr0], 0x04)
-
-        VC1_INV_TRANCS_4_STEP2_MMI(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                                   %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                                   %[ftmp0])
-
-        MMI_SWC1(%[ftmp1], %[dest], 0x04)
-        PTR_ADDU   "%[addr0],   %[dest],    %[linesize]                 \n\t"
-        MMI_SWC1(%[ftmp2], %[addr0], 0x04)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_SWC1(%[ftmp3], %[addr0], 0x04)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_SWC1(%[ftmp4], %[addr0], 0x04)
-
-        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
-          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
-          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
-          [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
-          [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
-          [tmp0]"=&r"(tmp[0]),
-          RESTRICT_ASM_LOW32
-          [addr0]"=&r"(addr[0])
-        : [src]"r"(src),                [dest]"r"(dest),
-          [linesize]"r"((mips_reg)linesize),
-          [ff_pw_17]"f"(ff_pw_17),      [ff_pw_22]"f"(ff_pw_22),
-          [ff_pw_10]"f"(ff_pw_10),      [ff_pw_64]"f"(ff_pw_64)
-        : "memory"
+        : [ff_pw_64]"f"(ff_pw_64_local),
+          [src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
+        :"memory"
     );
 }
 #endif
@@ -676,47 +941,51 @@
 {
     int16_t *src = block;
     int16_t *dst = block;
-    double ftmp[16];
-    uint32_t count, tmp[1];
-    mips_reg addr[1];
-    DECLARE_VAR_LOW32;
+    double ftmp[23];
+    uint32_t count = 8, tmp[1];
+    int16_t coeff[16] = {17, 22, 17, 10,
+                         17, 10,-17,-22,
+                         17,-10,-17, 22,
+                         17,-22, 17,-10};
+    DECLARE_ALIGNED(8, const uint64_t, ff_pw_1_local) = {0x0000000100000001ULL};
+    DECLARE_ALIGNED(8, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
+    DECLARE_ALIGNED(8, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
 
     // 1st loop
     __asm__ volatile (
-        "li         %[count],   0x02                                    \n\t"
+
         "li         %[tmp0],    0x03                                    \n\t"
         "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
 
+        MMI_LDC1(%[ftmp2], %[coeff], 0x00)
+        MMI_LDC1(%[ftmp3], %[coeff], 0x08)
+        MMI_LDC1(%[ftmp4], %[coeff], 0x10)
+        MMI_LDC1(%[ftmp5], %[coeff], 0x18)
         "1:                                                             \n\t"
+        /* ftmp8: dst3,dst2,dst1,dst0 */
         MMI_LDC1(%[ftmp1], %[src], 0x00)
-        MMI_LDC1(%[ftmp2], %[src], 0x10)
-        MMI_LDC1(%[ftmp3], %[src], 0x20)
-        MMI_LDC1(%[ftmp4], %[src], 0x30)
+        "pmaddhw    %[ftmp6],   %[ftmp2],   %[ftmp1]                    \n\t"
+        "pmaddhw    %[ftmp7],   %[ftmp3],   %[ftmp1]                    \n\t"
+        "pmaddhw    %[ftmp8],   %[ftmp4],   %[ftmp1]                    \n\t"
+        "pmaddhw    %[ftmp9],   %[ftmp5],   %[ftmp1]                    \n\t"
+        "punpcklwd  %[ftmp10],  %[ftmp6],   %[ftmp7]                    \n\t"
+        "punpckhwd  %[ftmp11],  %[ftmp6],   %[ftmp7]                    \n\t"
+        "punpcklwd  %[ftmp6],   %[ftmp8],   %[ftmp9]                    \n\t"
+        "punpckhwd  %[ftmp7],   %[ftmp8],   %[ftmp9]                    \n\t"
+        "paddw      %[ftmp8],   %[ftmp10],  %[ftmp11]                   \n\t"
+        "paddw      %[ftmp9],   %[ftmp6],   %[ftmp7]                    \n\t"
+        "paddw      %[ftmp8],   %[ftmp8],   %[ff_pw_4]                  \n\t"
+        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_4]                  \n\t"
+        "psraw      %[ftmp8],   %[ftmp8],   %[ftmp0]                    \n\t"
+        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
+        "punpcklhw  %[ftmp6],   %[ftmp8],   %[ftmp9]                    \n\t"
+        "punpckhhw  %[ftmp7],   %[ftmp8],   %[ftmp9]                    \n\t"
+        "punpcklhw  %[ftmp8],   %[ftmp6],   %[ftmp7]                    \n\t"
+        MMI_SDC1(%[ftmp8], %[dst], 0x00)
 
-        TRANSPOSE_4H(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                     %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                     %[ftmp9], %[tmp0],  %[ftmp10], %[ftmp11])
-
-        //                              t1        t2        t3        t4
-        VC1_INV_TRANCS_4_STEP1_MMI(%[ftmp1], %[ftmp3], %[ftmp4], %[ftmp2],
-                                   %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                                   %[ff_pw_17], %[ff_pw_10], %[ff_pw_22],
-                                   %[ff_pw_4])
-
-        PSRAH_4_MMI(%[ftmp1], %[ftmp3], %[ftmp4], %[ftmp2], %[ftmp0])
-
-        TRANSPOSE_4H(%[ftmp1], %[ftmp3], %[ftmp4], %[ftmp2],
-                     %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                     %[ftmp9], %[tmp0],  %[ftmp10], %[ftmp11])
-
-        MMI_SDC1(%[ftmp1], %[dst], 0x00)
-        MMI_SDC1(%[ftmp3], %[dst], 0x10)
-        MMI_SDC1(%[ftmp4], %[dst], 0x20)
-        MMI_SDC1(%[ftmp2], %[dst], 0x30)
-
-        "addiu      %[count],   %[count],  -0x01                        \n\t"
-        PTR_ADDIU  "%[src],     %[src],     0x40                        \n\t"
-        PTR_ADDIU  "%[dst],     %[dst],     0x40                        \n\t"
+        PTR_ADDIU  "%[src],     %[src],     0x10                        \n\t"
+        PTR_ADDIU  "%[dst],     %[dst],     0x10                        \n\t"
+        "addiu      %[count],   %[count],   -0x01                       \n\t"
         "bnez       %[count],   1b                                      \n\t"
         : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
@@ -724,11 +993,9 @@
           [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
           [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
           [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
-          [tmp0]"=&r"(tmp[0]),
-          [count]"=&r"(count),
+          [tmp0]"=&r"(tmp[0]),          [count]"+&r"(count),
           [src]"+&r"(src),              [dst]"+&r"(dst)
-        : [ff_pw_17]"f"(ff_pw_17),      [ff_pw_10]"f"(ff_pw_10),
-          [ff_pw_22]"f"(ff_pw_22),      [ff_pw_4]"f"(ff_pw_4)
+        : [ff_pw_4]"f"(ff_pw_4_local),  [coeff]"r"(coeff)
         : "memory"
     );
 
@@ -739,99 +1006,114 @@
         "li         %[tmp0],    0x07                                    \n\t"
         "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
 
-        MMI_LDC1(%[ftmp5], %[src], 0x10)
-        MMI_LDC1(%[ftmp6], %[src], 0x30)
-        MMI_LDC1(%[ftmp7], %[src], 0x50)
-        MMI_LDC1(%[ftmp8], %[src], 0x70)
-
-        VC1_INV_TRANCS_8_STEP1_MMI(%[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                                   %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                                   %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                                   %[ff_pw_16], %[ff_pw_15], %[ff_pw_9],
-                                   %[ff_pw_4])
-
         MMI_LDC1(%[ftmp1], %[src], 0x00)
-        MMI_LDC1(%[ftmp2], %[src], 0x40)
-        MMI_LDC1(%[ftmp3], %[src], 0x20)
+        MMI_LDC1(%[ftmp2], %[src], 0x20)
+        MMI_LDC1(%[ftmp3], %[src], 0x40)
         MMI_LDC1(%[ftmp4], %[src], 0x60)
+        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t"
+        "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t"
 
-        VC1_INV_TRANCS_8_STEP2_MMI(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                                   %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                                   %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                                   %[ff_pw_12], %[ff_pw_16], %[ff_pw_6],
-                                   %[ff_pw_64])
+        MMI_LDC1(%[ftmp1], %[src], 0x10)
+        MMI_LDC1(%[ftmp2], %[src], 0x30)
+        MMI_LDC1(%[ftmp3], %[src], 0x50)
+        MMI_LDC1(%[ftmp4], %[src], 0x70)
+        "punpcklhw  %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpckhhw  %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpcklhw  %[ftmp11],  %[ftmp3],   %[ftmp4]                    \n\t"
+        "punpckhhw  %[ftmp12],  %[ftmp3],   %[ftmp4]                    \n\t"
 
-        "paddh      %[ftmp4],   %[ftmp4],   %[ff_pw_1]                  \n\t"
-        "paddh      %[ftmp3],   %[ftmp3],   %[ff_pw_1]                  \n\t"
-        "paddh      %[ftmp2],   %[ftmp2],   %[ff_pw_1]                  \n\t"
-        "paddh      %[ftmp1],   %[ftmp1],   %[ff_pw_1]                  \n\t"
+        /* ftmp15:dst03,dst02,dst01,dst00 ftmp22:dst73,dst72,dst71,dst70 */
+        VC1_INV_TRANCS_8_TYPE2(%[ftmp15], %[ftmp22], 0x0010000c, 0x0006000c,
+                               0x000f0010, 0x00040009, %[ff_pw_64], %[ff_pw_1])
 
-        PSRAH_8_MMI(%[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                    %[ftmp4], %[ftmp3], %[ftmp2], %[ftmp1], %[ftmp0])
+        /* ftmp16:dst13,dst12,dst11,dst10 ftmp21:dst63,dst62,dst61,dst60 */
+        VC1_INV_TRANCS_8_TYPE2(%[ftmp16], %[ftmp21], 0x0006000c, 0xfff0fff4,
+                               0xfffc000f, 0xfff7fff0, %[ff_pw_64], %[ff_pw_1])
 
+        /* ftmp17:dst23,dst22,dst21,dst20 ftmp20:dst53,dst52,dst51,dst50 */
+        VC1_INV_TRANCS_8_TYPE2(%[ftmp17], %[ftmp20], 0xfffa000c, 0x0010fff4,
+                               0xfff00009, 0x000f0004, %[ff_pw_64], %[ff_pw_1])
+
+        /* ftmp18:dst33,dst32,dst31,dst30 ftmp19:dst43,dst42,dst41,dst40 */
+        VC1_INV_TRANCS_8_TYPE2(%[ftmp18], %[ftmp19], 0xfff0000c, 0xfffa000c,
+                               0xfff70004, 0xfff0000f, %[ff_pw_64], %[ff_pw_1])
+
+        MMI_LWC1(%[ftmp1], %[dest], 0x00)
+        PTR_ADDU  "%[tmp0],   %[dest],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp2], %[tmp0], 0x00)
+        PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp3], %[tmp0], 0x00)
+        PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp4], %[tmp0], 0x00)
+        PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp5], %[tmp0], 0x00)
+        PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp6], %[tmp0], 0x00)
+        PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp7], %[tmp0], 0x00)
+        PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp8], %[tmp0], 0x00)
         "xor        %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp5],   %[ftmp5],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp6],   %[ftmp6],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp7],   %[ftmp7],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp8],   %[ftmp8],   %[ftmp0]                    \n\t"
 
-        // dest low
-        MMI_LWC1(%[ftmp9], %[dest], 0x00)
-        PTR_ADDU   "%[addr0],   %[dest],    %[linesize]                 \n\t"
-        MMI_LWC1(%[ftmp10], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_LWC1(%[ftmp11], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_LWC1(%[ftmp12], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
+        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp15]                   \n\t"
+        "paddh      %[ftmp2],   %[ftmp2],   %[ftmp16]                   \n\t"
+        "paddh      %[ftmp3],   %[ftmp3],   %[ftmp17]                   \n\t"
+        "paddh      %[ftmp4],   %[ftmp4],   %[ftmp18]                   \n\t"
+        "paddh      %[ftmp5],   %[ftmp5],   %[ftmp19]                   \n\t"
+        "paddh      %[ftmp6],   %[ftmp6],   %[ftmp20]                   \n\t"
+        "paddh      %[ftmp7],   %[ftmp7],   %[ftmp21]                   \n\t"
+        "paddh      %[ftmp8],   %[ftmp8],   %[ftmp22]                   \n\t"
 
-        VC1_INV_TRANCS_4_STEP2_MMI(%[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                                   %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                                   %[ftmp0])
+        "packushb   %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp5],   %[ftmp5],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp6],   %[ftmp6],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp7],   %[ftmp7],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp8],   %[ftmp8],   %[ftmp0]                    \n\t"
 
-        // dest high
-        MMI_LWC1(%[ftmp9], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_LWC1(%[ftmp10], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_LWC1(%[ftmp11], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_LWC1(%[ftmp12], %[addr0], 0x00)
+        MMI_SWC1(%[ftmp1], %[dest], 0x00)
+        PTR_ADDU   "%[tmp0],    %[dest],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp2], %[tmp0], 0x00)
+        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp3], %[tmp0], 0x00)
+        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp4], %[tmp0], 0x00)
+        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp5], %[tmp0], 0x00)
+        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp6], %[tmp0], 0x00)
+        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp7], %[tmp0], 0x00)
+        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp8], %[tmp0], 0x00)
 
-        VC1_INV_TRANCS_4_STEP2_MMI(%[ftmp4], %[ftmp3], %[ftmp2], %[ftmp1],
-                                   %[ftmp9], %[ftmp10], %[ftmp11], %[ftmp12],
-                                   %[ftmp0])
-
-        // dest low
-        MMI_SWC1(%[ftmp5], %[dest], 0x00)
-        PTR_ADDU   "%[addr0],   %[dest],    %[linesize]                 \n\t"
-        MMI_SWC1(%[ftmp6], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_SWC1(%[ftmp7], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_SWC1(%[ftmp8], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-
-        // dest high
-        MMI_SWC1(%[ftmp4], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_SWC1(%[ftmp3], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_SWC1(%[ftmp2], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_SWC1(%[ftmp1], %[addr0], 0x00)
         : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
           [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
           [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
           [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
           [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
-          [ftmp12]"=&f"(ftmp[12]),
-          [tmp0]"=&r"(tmp[0]),
-          RESTRICT_ASM_LOW32
-          [addr0]"=&r"(addr[0]),
-          [dest]"+&r"(dest)
-        : [src]"r"(src),                [linesize]"r"(linesize),
-          [ff_pw_1]"f"(ff_pw_1),        [ff_pw_4]"f"(ff_pw_4),
-          [ff_pw_6]"f"(ff_pw_6),        [ff_pw_9]"f"(ff_pw_9),
-          [ff_pw_12]"f"(ff_pw_12),      [ff_pw_15]"f"(ff_pw_15),
-          [ff_pw_16]"f"(ff_pw_16),      [ff_pw_64]"f"(ff_pw_64)
+          [ftmp12]"=&f"(ftmp[12]),      [ftmp13]"=&f"(ftmp[13]),
+          [ftmp14]"=&f"(ftmp[14]),      [ftmp15]"=&f"(ftmp[15]),
+          [ftmp16]"=&f"(ftmp[16]),      [ftmp17]"=&f"(ftmp[17]),
+          [ftmp18]"=&f"(ftmp[18]),      [ftmp19]"=&f"(ftmp[19]),
+          [ftmp20]"=&f"(ftmp[20]),      [ftmp21]"=&f"(ftmp[21]),
+          [ftmp22]"=&f"(ftmp[22]),
+          [tmp0]"=&r"(tmp[0])
+        : [ff_pw_1]"f"(ff_pw_1_local),  [ff_pw_64]"f"(ff_pw_64_local),
+          [src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
         : "memory"
     );
 }
@@ -890,51 +1172,58 @@
 {
     int16_t *src = block;
     int16_t *dst = block;
-    double ftmp[12];
-    uint32_t tmp[1];
-    mips_reg addr[1];
-    DECLARE_VAR_LOW32;
-
+    double ftmp[16];
+    uint32_t count = 4, tmp[1];
+    int16_t coeff[16] = {17, 22, 17, 10,
+                         17, 10,-17,-22,
+                         17,-10,-17, 22,
+                         17,-22, 17,-10};
+    DECLARE_ALIGNED(8, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
+    DECLARE_ALIGNED(8, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
     // 1st loop
     __asm__ volatile (
+
         "li         %[tmp0],    0x03                                    \n\t"
         "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
-
+        MMI_LDC1(%[ftmp2], %[coeff], 0x00)
+        MMI_LDC1(%[ftmp3], %[coeff], 0x08)
+        MMI_LDC1(%[ftmp4], %[coeff], 0x10)
+        MMI_LDC1(%[ftmp5], %[coeff], 0x18)
+        "1:                                                             \n\t"
+        /* ftmp8: dst3,dst2,dst1,dst0 */
         MMI_LDC1(%[ftmp1], %[src], 0x00)
-        MMI_LDC1(%[ftmp2], %[src], 0x10)
-        MMI_LDC1(%[ftmp3], %[src], 0x20)
-        MMI_LDC1(%[ftmp4], %[src], 0x30)
+        "pmaddhw    %[ftmp6],   %[ftmp2],   %[ftmp1]                    \n\t"
+        "pmaddhw    %[ftmp7],   %[ftmp3],   %[ftmp1]                    \n\t"
+        "pmaddhw    %[ftmp8],   %[ftmp4],   %[ftmp1]                    \n\t"
+        "pmaddhw    %[ftmp9],   %[ftmp5],   %[ftmp1]                    \n\t"
+        "punpcklwd  %[ftmp10],  %[ftmp6],   %[ftmp7]                    \n\t"
+        "punpckhwd  %[ftmp11],  %[ftmp6],   %[ftmp7]                    \n\t"
+        "punpcklwd  %[ftmp6],   %[ftmp8],   %[ftmp9]                    \n\t"
+        "punpckhwd  %[ftmp7],   %[ftmp8],   %[ftmp9]                    \n\t"
+        "paddw      %[ftmp8],   %[ftmp10],  %[ftmp11]                   \n\t"
+        "paddw      %[ftmp9],   %[ftmp6],   %[ftmp7]                    \n\t"
+        "paddw      %[ftmp8],   %[ftmp8],   %[ff_pw_4]                  \n\t"
+        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_4]                  \n\t"
+        "psraw      %[ftmp8],   %[ftmp8],   %[ftmp0]                    \n\t"
+        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
+        "punpcklhw  %[ftmp6],   %[ftmp8],   %[ftmp9]                    \n\t"
+        "punpckhhw  %[ftmp7],   %[ftmp8],   %[ftmp9]                    \n\t"
+        "punpcklhw  %[ftmp8],   %[ftmp6],   %[ftmp7]                    \n\t"
+        MMI_SDC1(%[ftmp8], %[dst], 0x00)
 
-        TRANSPOSE_4H(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                     %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                     %[ftmp9], %[tmp0],  %[ftmp10], %[ftmp11])
-
-        //                              t1        t2        t3        t4
-        VC1_INV_TRANCS_4_STEP1_MMI(%[ftmp1], %[ftmp3], %[ftmp4], %[ftmp2],
-                                   %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                                   %[ff_pw_17], %[ff_pw_10], %[ff_pw_22],
-                                   %[ff_pw_4])
-
-        PSRAH_4_MMI(%[ftmp1], %[ftmp3], %[ftmp4], %[ftmp2], %[ftmp0])
-
-        TRANSPOSE_4H(%[ftmp1], %[ftmp3], %[ftmp4], %[ftmp2],
-                     %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                     %[ftmp9], %[tmp0],  %[ftmp10], %[ftmp11])
-
-        MMI_SDC1(%[ftmp1], %[dst], 0x00)
-        MMI_SDC1(%[ftmp3], %[dst], 0x10)
-        MMI_SDC1(%[ftmp4], %[dst], 0x20)
-        MMI_SDC1(%[ftmp2], %[dst], 0x30)
+        PTR_ADDIU  "%[src],     %[src],     0x10                        \n\t"
+        PTR_ADDIU  "%[dst],     %[dst],     0x10                        \n\t"
+        "addiu      %[count],   %[count],   -0x01                       \n\t"
+        "bnez       %[count],   1b                                      \n\t"
         : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
           [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
           [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
           [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
           [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
-          [tmp0]"=&r"(tmp[0]),
+          [tmp0]"=&r"(tmp[0]),          [count]"+&r"(count),
           [src]"+&r"(src),              [dst]"+&r"(dst)
-        : [ff_pw_17]"f"(ff_pw_17),      [ff_pw_10]"f"(ff_pw_10),
-          [ff_pw_22]"f"(ff_pw_22),      [ff_pw_4]"f"(ff_pw_4)
+        : [ff_pw_4]"f"(ff_pw_4_local),  [coeff]"r"(coeff)
         : "memory"
     );
 
@@ -944,54 +1233,143 @@
     __asm__ volatile (
         "li         %[tmp0],    0x07                                    \n\t"
         "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
+        "li         %[tmp0],    0x44                                    \n\t"
+        "mtc1       %[tmp0],    %[ftmp15]                               \n\t"
 
-        // dest low 32bit
         MMI_LDC1(%[ftmp1], %[src], 0x00)
-        MMI_LDC1(%[ftmp2], %[src], 0x20)
-        MMI_LDC1(%[ftmp3], %[src], 0x30)
-        MMI_LDC1(%[ftmp4], %[src], 0x10)
+        MMI_LDC1(%[ftmp2], %[src], 0x10)
+        MMI_LDC1(%[ftmp3], %[src], 0x20)
+        MMI_LDC1(%[ftmp4], %[src], 0x30)
+        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t"
+        "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t"
 
-        VC1_INV_TRANCS_4_STEP1_MMI(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                                   %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                                   %[ff_pw_17], %[ff_pw_10], %[ff_pw_22],
-                                   %[ff_pw_64])
+        /* ftmp11: dst03,dst02,dst01,dst00 */
+        "li         %[tmp0],    0x00160011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
+        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
+        "li         %[tmp0],    0x000a0011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
+        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
+        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
+        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
+        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
+        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpcklhw  %[ftmp11],  %[ftmp1],   %[ftmp2]                    \n\t"
 
-        PSRAH_4_MMI(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4], %[ftmp0])
+        /* ftmp12: dst13,dst12,dst11,dst10 */
+        "li         %[tmp0],    0x000a0011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
+        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
+        "li         %[tmp0],    0xffeaffef                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
+        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
+        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
+        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
+        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
+        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpcklhw  %[ftmp12],  %[ftmp1],   %[ftmp2]                    \n\t"
 
-        MMI_LWC1(%[ftmp5], %[dest], 0x00)
-        PTR_ADDU   "%[addr0],   %[dest],    %[linesize]                 \n\t"
-        MMI_LWC1(%[ftmp6], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_LWC1(%[ftmp7], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_LWC1(%[ftmp8], %[addr0], 0x00)
+        /* ftmp13: dst23,dst22,dst21,dst20 */
+        "li         %[tmp0],    0xfff60011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
+        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
+        "li         %[tmp0],    0x0016ffef                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
+        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
+        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
+        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
+        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
+        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpcklhw  %[ftmp13],  %[ftmp1],   %[ftmp2]                    \n\t"
 
-        "xor        %[ftmp9],   %[ftmp9],  %[ftmp9]                     \n\t"
+        /* ftmp14: dst33,dst32,dst31,dst30 */
+        "li         %[tmp0],    0xffea0011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
+        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
+        "li         %[tmp0],    0xfff60011                              \n\t"
+        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
+        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
+        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
+        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
+        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
+        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
+        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
+        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
+        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
+        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
+        "punpcklhw  %[ftmp14],  %[ftmp1],   %[ftmp2]                    \n\t"
 
-        VC1_INV_TRANCS_4_STEP2_MMI(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                                   %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                                   %[ftmp9])
+        MMI_LWC1(%[ftmp1], %[dest], 0x00)
+        PTR_ADDU    "%[tmp0],   %[dest],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp2], %[tmp0], 0x00)
+        PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp3], %[tmp0], 0x00)
+        PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
+        MMI_LWC1(%[ftmp4], %[tmp0], 0x00)
+        "xor        %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
+        "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
+        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp11]                   \n\t"
+        "paddh      %[ftmp2],   %[ftmp2],   %[ftmp12]                   \n\t"
+        "paddh      %[ftmp3],   %[ftmp3],   %[ftmp13]                   \n\t"
+        "paddh      %[ftmp4],   %[ftmp4],   %[ftmp14]                   \n\t"
+        "packushb   %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
+        "packushb   %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
 
         MMI_SWC1(%[ftmp1], %[dest], 0x00)
-        PTR_ADDU   "%[addr0],   %[dest],    %[linesize]                 \n\t"
-        MMI_SWC1(%[ftmp2], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_SWC1(%[ftmp3], %[addr0], 0x00)
-        PTR_ADDU   "%[addr0],   %[addr0],   %[linesize]                 \n\t"
-        MMI_SWC1(%[ftmp4], %[addr0], 0x00)
+        PTR_ADDU   "%[tmp0],    %[dest],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp2], %[tmp0], 0x00)
+        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp3], %[tmp0], 0x00)
+        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
+        MMI_SWC1(%[ftmp4], %[tmp0], 0x00)
+
         : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
           [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
           [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
           [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
-          [tmp0]"=&r"(tmp[0]),
-          RESTRICT_ASM_LOW32
-          [addr0]"=&r"(addr[0])
-        : [src]"r"(src),                [dest]"r"(dest),
-          [linesize]"r"((mips_reg)linesize),
-          [ff_pw_17]"f"(ff_pw_17),      [ff_pw_22]"f"(ff_pw_22),
-          [ff_pw_10]"f"(ff_pw_10),      [ff_pw_64]"f"(ff_pw_64)
-        : "memory"
+          [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
+          [ftmp12]"=&f"(ftmp[12]),      [ftmp13]"=&f"(ftmp[13]),
+          [ftmp14]"=&f"(ftmp[14]),      [ftmp15]"=&f"(ftmp[15]),
+          [tmp0]"=&r"(tmp[0])
+        : [ff_pw_64]"f"(ff_pw_64_local),
+          [src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
+        :"memory"
     );
 }
 
@@ -1019,12 +1397,13 @@
     }
 }
 
-void ff_vc1_h_s_overlap_mmi(int16_t *left, int16_t *right)
+void ff_vc1_h_s_overlap_mmi(int16_t *left, int16_t *right, int left_stride, int right_stride, int flags)
 {
     int i;
     int a, b, c, d;
     int d1, d2;
-    int rnd1 = 4, rnd2 = 3;
+    int rnd1 = flags & 2 ? 3 : 4;
+    int rnd2 = 7 - rnd1;
     for (i = 0; i < 8; i++) {
         a  = left[6];
         b  = left[7];
@@ -1038,10 +1417,12 @@
         right[0] = ((c << 3) + d2 + rnd1) >> 3;
         right[1] = ((d << 3) + d1 + rnd2) >> 3;
 
-        right += 8;
-        left  += 8;
-        rnd2   = 7 - rnd2;
-        rnd1   = 7 - rnd1;
+        right += right_stride;
+        left  += left_stride;
+        if (flags & 1) {
+            rnd2   = 7 - rnd2;
+            rnd1   = 7 - rnd1;
+        }
     }
 }
 
@@ -1433,8 +1814,8 @@
         : RESTRICT_ASM_LOW32            RESTRICT_ASM_ADDRT                  \
           [src]"+r"(src),               [dst]"+r"(dst)                      \
         : [offset]"r"(offset),          [offset_x2n]"r"(-2*offset),         \
-          [stride]"g"(stride),          [rnd]"m"(rnd),                      \
-          [stride1]"g"(stride-offset),                                      \
+          [stride]"r"(stride),          [rnd]"m"(rnd),                      \
+          [stride1]"r"(stride-offset),                                      \
           [ff_pw_9]"m"(ff_pw_9)                                             \
         : "$8", "$9", "$10", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10",     \
           "$f12", "$f14", "$f16", "memory"                                  \
@@ -1658,7 +2039,7 @@
           [h]"+r"(h),                                                       \
           [src]"+r"(src),               [dst]"+r"(dst)                      \
         : [offset_x1]"r"(offset),       [offset_x2]"r"(2*offset),           \
-          [offset_x3]"r"(3*offset),     [stride]"g"(stride),                \
+          [offset_x3]"r"(3*offset),     [stride]"r"(stride),                \
           [rnd]"m"(rnd),                                                    \
           [ff_pw_53]"m"(ff_pw_53),      [ff_pw_18]"m"(ff_pw_18),            \
           [ff_pw_3]"f"(ff_pw_3)                                             \

diff --git a/libavcodec/mips/vp8dsp_mmi.c b/libavcodec/mips/vp8dsp_mmi.c
index f972628..bd80aa1 100644
--- a/libavcodec/mips/vp8dsp_mmi.c
+++ b/libavcodec/mips/vp8dsp_mmi.c

@@ -25,6 +25,169 @@
 #include "constants.h"
 #include "libavutil/mips/mmiutils.h"
 
+#define DECLARE_DOUBLE_1            double db_1
+#define DECLARE_DOUBLE_2            double db_2
+#define DECLARE_UINT32_T            uint32_t  it_1
+#define RESTRICT_ASM_DOUBLE_1       [db_1]"=&f"(db_1)
+#define RESTRICT_ASM_DOUBLE_2       [db_2]"=&f"(db_2)
+#define RESTRICT_ASM_UINT32_T       [it_1]"=&r"(it_1)
+
+#define MMI_PCMPGTUB(dst, src1, src2)                                       \
+        "pcmpeqb    %[db_1],    "#src1",        "#src2"             \n\t"   \
+        "pmaxub     %[db_2],    "#src1",        "#src2"             \n\t"   \
+        "pcmpeqb    %[db_2],    %[db_2],        "#src1"             \n\t"   \
+        "xor        "#dst",     %[db_2],        %[db_1]             \n\t"
+
+#define MMI_BTOH(dst_l, dst_r, src)                                         \
+        "xor        %[db_1],    %[db_1],        %[db_1]             \n\t"   \
+        "pcmpgtb    %[db_2],    %[db_1],        "#src"              \n\t"   \
+        "punpcklbh  "#dst_r",   "#src",         %[db_2]             \n\t"   \
+        "punpckhbh  "#dst_l",   "#src",         %[db_2]             \n\t"
+
+#define MMI_VP8_LOOP_FILTER                                                 \
+        /* Calculation of hev */                                            \
+        "dmtc1      %[thresh],  %[ftmp3]                            \n\t"   \
+        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
+        "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
+        "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
+        "pasubub    %[ftmp0],   %[p1],          %[p0]               \n\t"   \
+        "pasubub    %[ftmp1],   %[q1],          %[q0]               \n\t"   \
+        "pmaxub     %[ftmp0],   %[ftmp0],       %[ftmp1]            \n\t"   \
+        MMI_PCMPGTUB(%[hev], %[ftmp0], %[ftmp3])                            \
+        /* Calculation of mask */                                           \
+        "pasubub    %[ftmp1],   %[p0],          %[q0]               \n\t"   \
+        "paddusb    %[ftmp1],   %[ftmp1],       %[ftmp1]            \n\t"   \
+        "pasubub    %[ftmp2],   %[p1],          %[q1]               \n\t"   \
+        "li         %[tmp0],    0x09                                \n\t"   \
+        "dmtc1      %[tmp0],    %[ftmp3]                            \n\t"   \
+        PSRLB_MMI(%[ftmp2],  %[ftmp3],  %[ftmp4],  %[ftmp5],  %[ftmp2])     \
+        "paddusb    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"   \
+        "dmtc1      %[e],       %[ftmp3]                            \n\t"   \
+        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
+        "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
+        "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
+        MMI_PCMPGTUB(%[mask], %[ftmp1], %[ftmp3])                           \
+        "pmaxub     %[mask],    %[mask],        %[ftmp0]            \n\t"   \
+        "pasubub    %[ftmp1],   %[p3],          %[p2]               \n\t"   \
+        "pasubub    %[ftmp2],   %[p2],          %[p1]               \n\t"   \
+        "pmaxub     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"   \
+        "pmaxub     %[mask],    %[mask],        %[ftmp1]            \n\t"   \
+        "pasubub    %[ftmp1],   %[q3],          %[q2]               \n\t"   \
+        "pasubub    %[ftmp2],   %[q2],          %[q1]               \n\t"   \
+        "pmaxub     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"   \
+        "pmaxub     %[mask],    %[mask],        %[ftmp1]            \n\t"   \
+        "dmtc1      %[i],       %[ftmp3]                            \n\t"   \
+        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
+        "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
+        "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
+        MMI_PCMPGTUB(%[mask], %[mask], %[ftmp3])                            \
+        "pcmpeqw    %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
+        "xor        %[mask],    %[mask],        %[ftmp3]            \n\t"   \
+        /* VP8_MBFILTER */                                                  \
+        "li         %[tmp0],    0x80808080                          \n\t"   \
+        "dmtc1      %[tmp0],    %[ftmp7]                            \n\t"   \
+        "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp7]            \n\t"   \
+        "xor        %[p2],      %[p2],          %[ftmp7]            \n\t"   \
+        "xor        %[p1],      %[p1],          %[ftmp7]            \n\t"   \
+        "xor        %[p0],      %[p0],          %[ftmp7]            \n\t"   \
+        "xor        %[q0],      %[q0],          %[ftmp7]            \n\t"   \
+        "xor        %[q1],      %[q1],          %[ftmp7]            \n\t"   \
+        "xor        %[q2],      %[q2],          %[ftmp7]            \n\t"   \
+        "psubsb     %[ftmp4],   %[p1],          %[q1]               \n\t"   \
+        "psubb      %[ftmp5],   %[q0],          %[p0]               \n\t"   \
+        MMI_BTOH(%[ftmp1],  %[ftmp0],  %[ftmp5])                            \
+        MMI_BTOH(%[ftmp3],  %[ftmp2],  %[ftmp4])                            \
+        /* Right part */                                                    \
+        "paddh      %[ftmp5],   %[ftmp0],       %[ftmp0]            \n\t"   \
+        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp5]            \n\t"   \
+        "paddh      %[ftmp0],   %[ftmp2],       %[ftmp0]            \n\t"   \
+        /* Left part */                                                     \
+        "paddh      %[ftmp5],   %[ftmp1],       %[ftmp1]            \n\t"   \
+        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"   \
+        "paddh      %[ftmp1],   %[ftmp3],       %[ftmp1]            \n\t"   \
+        /* Combine left and right part */                                   \
+        "packsshb   %[ftmp1],   %[ftmp0],       %[ftmp1]            \n\t"   \
+        "and        %[ftmp1],   %[ftmp1],       %[mask]             \n\t"   \
+        "and        %[ftmp2],   %[ftmp1],       %[hev]              \n\t"   \
+        "li         %[tmp0],    0x04040404                          \n\t"   \
+        "dmtc1      %[tmp0],    %[ftmp0]                            \n\t"   \
+        "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"   \
+        "paddsb     %[ftmp3],   %[ftmp2],       %[ftmp0]            \n\t"   \
+        "li         %[tmp0],    0x0B                                \n\t"   \
+        "dmtc1      %[tmp0],    %[ftmp4]                            \n\t"   \
+        PSRAB_MMI(%[ftmp3],  %[ftmp4],  %[ftmp5],  %[ftmp6],  %[ftmp3])     \
+        "li         %[tmp0],    0x03030303                          \n\t"   \
+        "dmtc1      %[tmp0],    %[ftmp0]                            \n\t"   \
+        "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"   \
+        "paddsb     %[ftmp4],   %[ftmp2],       %[ftmp0]            \n\t"   \
+        "li         %[tmp0],    0x0B                                \n\t"   \
+        "dmtc1      %[tmp0],    %[ftmp2]                            \n\t"   \
+        PSRAB_MMI(%[ftmp4],  %[ftmp2],  %[ftmp5],  %[ftmp6],  %[ftmp4])     \
+        "psubsb     %[q0],      %[q0],          %[ftmp3]            \n\t"   \
+        "paddsb     %[p0],      %[p0],          %[ftmp4]            \n\t"   \
+        /* filt_val &= ~hev */                                              \
+        "pcmpeqw    %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"   \
+        "xor        %[hev],     %[hev],         %[ftmp0]            \n\t"   \
+        "and        %[ftmp1],   %[ftmp1],       %[hev]              \n\t"   \
+        MMI_BTOH(%[ftmp5],  %[ftmp6],  %[ftmp1])                            \
+        "li         %[tmp0],    0x07                                \n\t"   \
+        "dmtc1      %[tmp0],    %[ftmp2]                            \n\t"   \
+        "li         %[tmp0],    0x001b001b                          \n\t"   \
+        "dmtc1      %[tmp0],    %[ftmp1]                            \n\t"   \
+        "punpcklwd  %[ftmp1],   %[ftmp1],       %[ftmp1]            \n\t"   \
+        "li         %[tmp0],    0x003f003f                          \n\t"   \
+        "dmtc1      %[tmp0],    %[ftmp0]                            \n\t"   \
+        "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"   \
+        /* Right part */                                                    \
+        "pmullh     %[ftmp3],   %[ftmp6],       %[ftmp1]            \n\t"   \
+        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"   \
+        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
+        /* Left part */                                                     \
+        "pmullh     %[ftmp4],   %[ftmp5],       %[ftmp1]            \n\t"   \
+        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"   \
+        "psrah      %[ftmp4],   %[ftmp4],       %[ftmp2]            \n\t"   \
+        /* Combine left and right part */                                   \
+        "packsshb   %[ftmp4],   %[ftmp3],       %[ftmp4]            \n\t"   \
+        "psubsb     %[q0],      %[q0],          %[ftmp4]            \n\t"   \
+        "xor        %[q0],      %[q0],          %[ftmp7]            \n\t"   \
+        "paddsb     %[p0],      %[p0],          %[ftmp4]            \n\t"   \
+        "xor        %[p0],      %[p0],          %[ftmp7]            \n\t"   \
+        "li         %[tmp0],    0x00120012                          \n\t"   \
+        "dmtc1      %[tmp0],    %[ftmp1]                            \n\t"   \
+        "punpcklwd  %[ftmp1],   %[ftmp1],       %[ftmp1]            \n\t"   \
+        /* Right part */                                                    \
+        "pmullh     %[ftmp3],   %[ftmp6],       %[ftmp1]            \n\t"   \
+        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"   \
+        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
+        /* Left part */                                                     \
+        "pmullh     %[ftmp4],   %[ftmp5],       %[ftmp1]            \n\t"   \
+        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"   \
+        "psrah      %[ftmp4],   %[ftmp4],       %[ftmp2]            \n\t"   \
+        /* Combine left and right part */                                   \
+        "packsshb   %[ftmp4],   %[ftmp3],       %[ftmp4]            \n\t"   \
+        "psubsb     %[q1],      %[q1],          %[ftmp4]            \n\t"   \
+        "xor        %[q1],      %[q1],          %[ftmp7]            \n\t"   \
+        "paddsb     %[p1],      %[p1],          %[ftmp4]            \n\t"   \
+        "xor        %[p1],      %[p1],          %[ftmp7]            \n\t"   \
+        "li         %[tmp0],    0x03                                \n\t"   \
+        "dmtc1      %[tmp0],    %[ftmp1]                            \n\t"   \
+        /* Right part */                                                    \
+        "psllh      %[ftmp3],   %[ftmp6],       %[ftmp1]            \n\t"   \
+        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp6]            \n\t"   \
+        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"   \
+        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
+        /* Left part */                                                     \
+        "psllh      %[ftmp4],   %[ftmp5],       %[ftmp1]            \n\t"   \
+        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp5]            \n\t"   \
+        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"   \
+        "psrah      %[ftmp4],   %[ftmp4],       %[ftmp2]            \n\t"   \
+        /* Combine left and right part */                                   \
+        "packsshb   %[ftmp4],   %[ftmp3],       %[ftmp4]            \n\t"   \
+        "psubsb     %[q2],      %[q2],          %[ftmp4]            \n\t"   \
+        "xor        %[q2],      %[q2],          %[ftmp7]            \n\t"   \
+        "paddsb     %[p2],      %[p2],          %[ftmp4]            \n\t"   \
+        "xor        %[p2],      %[p2],          %[ftmp7]            \n\t"
+
 #define PUT_VP8_EPEL4_H6_MMI(src, dst)                                      \
         MMI_ULWC1(%[ftmp1], src, 0x00)                                      \
         "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
@@ -621,15 +784,71 @@
 static av_always_inline void vp8_v_loop_filter8_mmi(uint8_t *dst,
         ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
 {
-    int i;
-
-    for (i = 0; i < 8; i++)
-        if (vp8_normal_limit(dst + i * 1, stride, flim_E, flim_I)) {
-            if (hev(dst + i * 1, stride, hev_thresh))
-                vp8_filter_common_is4tap(dst + i * 1, stride);
-            else
-                filter_mbedge(dst + i * 1, stride);
-        }
+    double ftmp[18];
+    uint32_t tmp[1];
+    DECLARE_DOUBLE_1;
+    DECLARE_DOUBLE_2;
+    DECLARE_UINT32_T;
+    __asm__ volatile(
+        /* Get data from dst */
+        "gsldlc1    %[q0],      0x07(%[dst])                      \n\t"
+        "gsldrc1    %[q0],      0x00(%[dst])                      \n\t"
+        PTR_SUBU    "%[tmp0],   %[dst],         %[stride]         \n\t"
+        "gsldlc1    %[p0],      0x07(%[tmp0])                     \n\t"
+        "gsldrc1    %[p0],      0x00(%[tmp0])                     \n\t"
+        PTR_SUBU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
+        "gsldlc1    %[p1],      0x07(%[tmp0])                     \n\t"
+        "gsldrc1    %[p1],      0x00(%[tmp0])                     \n\t"
+        PTR_SUBU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
+        "gsldlc1    %[p2],      0x07(%[tmp0])                     \n\t"
+        "gsldrc1    %[p2],      0x00(%[tmp0])                     \n\t"
+        PTR_SUBU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
+        "gsldlc1    %[p3],      0x07(%[tmp0])                     \n\t"
+        "gsldrc1    %[p3],      0x00(%[tmp0])                     \n\t"
+        PTR_ADDU    "%[tmp0],   %[dst],         %[stride]         \n\t"
+        "gsldlc1    %[q1],      0x07(%[tmp0])                     \n\t"
+        "gsldrc1    %[q1],      0x00(%[tmp0])                     \n\t"
+        PTR_ADDU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
+        "gsldlc1    %[q2],      0x07(%[tmp0])                     \n\t"
+        "gsldrc1    %[q2],      0x00(%[tmp0])                     \n\t"
+        PTR_ADDU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
+        "gsldlc1    %[q3],      0x07(%[tmp0])                     \n\t"
+        "gsldrc1    %[q3],      0x00(%[tmp0])                     \n\t"
+        MMI_VP8_LOOP_FILTER
+        /* Move to dst */
+        "gssdlc1    %[q0],      0x07(%[dst])                      \n\t"
+        "gssdrc1    %[q0],      0x00(%[dst])                      \n\t"
+        PTR_SUBU    "%[tmp0],   %[dst],         %[stride]         \n\t"
+        "gssdlc1    %[p0],      0x07(%[tmp0])                     \n\t"
+        "gssdrc1    %[p0],      0x00(%[tmp0])                     \n\t"
+        PTR_SUBU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
+        "gssdlc1    %[p1],      0x07(%[tmp0])                     \n\t"
+        "gssdrc1    %[p1],      0x00(%[tmp0])                     \n\t"
+        PTR_SUBU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
+        "gssdlc1    %[p2],      0x07(%[tmp0])                     \n\t"
+        "gssdrc1    %[p2],      0x00(%[tmp0])                     \n\t"
+        PTR_ADDU    "%[tmp0],   %[dst],         %[stride]         \n\t"
+        "gssdlc1    %[q1],      0x07(%[tmp0])                     \n\t"
+        "gssdrc1    %[q1],      0x00(%[tmp0])                     \n\t"
+        PTR_ADDU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
+        "gssdlc1    %[q2],      0x07(%[tmp0])                     \n\t"
+        "gssdrc1    %[q2],      0x00(%[tmp0])                     \n\t"
+        : [p3]"=&f"(ftmp[0]),       [p2]"=&f"(ftmp[1]),
+          [p1]"=&f"(ftmp[2]),       [p0]"=&f"(ftmp[3]),
+          [q0]"=&f"(ftmp[4]),       [q1]"=&f"(ftmp[5]),
+          [q2]"=&f"(ftmp[6]),       [q3]"=&f"(ftmp[7]),
+          [ftmp0]"=&f"(ftmp[8]),    [ftmp1]"=&f"(ftmp[9]),
+          [ftmp2]"=&f"(ftmp[10]),   [ftmp3]"=&f"(ftmp[11]),
+          [hev]"=&f"(ftmp[12]),     [mask]"=&f"(ftmp[13]),
+          [ftmp4]"=&f"(ftmp[14]),   [ftmp5]"=&f"(ftmp[15]),
+          [ftmp6]"=&f"(ftmp[16]),   [ftmp7]"=&f"(ftmp[17]),
+          [dst]"+&r"(dst),          [tmp0]"=&r"(tmp[0]),
+          RESTRICT_ASM_DOUBLE_1,    RESTRICT_ASM_DOUBLE_2,
+          RESTRICT_ASM_UINT32_T
+        : [e]"r"((mips_reg)flim_E), [thresh]"r"((mips_reg)hev_thresh),
+          [i]"r"((mips_reg)flim_I), [stride]"r"((mips_reg)stride)
+        : "memory"
+    );
 }
 
 static av_always_inline void vp8_v_loop_filter8_inner_mmi(uint8_t *dst,
@@ -650,15 +869,85 @@
 static av_always_inline void vp8_h_loop_filter8_mmi(uint8_t *dst,
         ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
 {
-    int i;
-
-    for (i = 0; i < 8; i++)
-        if (vp8_normal_limit(dst + i * stride, 1, flim_E, flim_I)) {
-            if (hev(dst + i * stride, 1, hev_thresh))
-                vp8_filter_common_is4tap(dst + i * stride, 1);
-            else
-                filter_mbedge(dst + i * stride, 1);
-        }
+    double ftmp[18];
+    uint32_t tmp[1];
+    DECLARE_DOUBLE_1;
+    DECLARE_DOUBLE_2;
+    DECLARE_UINT32_T;
+    __asm__ volatile(
+        /* Get data from dst */
+        "gsldlc1    %[p3],        0x03(%[dst])                    \n\t"
+        "gsldrc1    %[p3],        -0x04(%[dst])                   \n\t"
+        PTR_ADDU    "%[tmp0],     %[dst],           %[stride]     \n\t"
+        "gsldlc1    %[p2],        0x03(%[tmp0])                   \n\t"
+        "gsldrc1    %[p2],        -0x04(%[tmp0])                  \n\t"
+        PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
+        "gsldlc1    %[p1],        0x03(%[tmp0])                   \n\t"
+        "gsldrc1    %[p1],        -0x04(%[tmp0])                  \n\t"
+        PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
+        "gsldlc1    %[p0],        0x03(%[tmp0])                   \n\t"
+        "gsldrc1    %[p0],        -0x04(%[tmp0])                  \n\t"
+        PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
+        "gsldlc1    %[q0],        0x03(%[tmp0])                   \n\t"
+        "gsldrc1    %[q0],        -0x04(%[tmp0])                  \n\t"
+        PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
+        "gsldlc1    %[q1],        0x03(%[tmp0])                   \n\t"
+        "gsldrc1    %[q1],        -0x04(%[tmp0])                  \n\t"
+        PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
+        "gsldlc1    %[q2],        0x03(%[tmp0])                   \n\t"
+        "gsldrc1    %[q2],        -0x04(%[tmp0])                  \n\t"
+        PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
+        "gsldlc1    %[q3],        0x03(%[tmp0])                   \n\t"
+        "gsldrc1    %[q3],        -0x04(%[tmp0])                  \n\t"
+        /* Matrix transpose */
+        TRANSPOSE_8B(%[p3], %[p2], %[p1], %[p0],
+                     %[q0], %[q1], %[q2], %[q3],
+                     %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
+        MMI_VP8_LOOP_FILTER
+        /* Matrix transpose */
+        TRANSPOSE_8B(%[p3], %[p2], %[p1], %[p0],
+                     %[q0], %[q1], %[q2], %[q3],
+                     %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
+        /* Move to dst */
+        "gssdlc1    %[p3],        0x03(%[dst])                    \n\t"
+        "gssdrc1    %[p3],        -0x04(%[dst])                   \n\t"
+        PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
+        "gssdlc1    %[p2],        0x03(%[dst])                    \n\t"
+        "gssdrc1    %[p2],        -0x04(%[dst])                   \n\t"
+        PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
+        "gssdlc1    %[p1],        0x03(%[dst])                    \n\t"
+        "gssdrc1    %[p1],        -0x04(%[dst])                   \n\t"
+        PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
+        "gssdlc1    %[p0],        0x03(%[dst])                    \n\t"
+        "gssdrc1    %[p0],        -0x04(%[dst])                   \n\t"
+        PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
+        "gssdlc1    %[q0],        0x03(%[dst])                    \n\t"
+        "gssdrc1    %[q0],        -0x04(%[dst])                   \n\t"
+        PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
+        "gssdlc1    %[q1],        0x03(%[dst])                    \n\t"
+        "gssdrc1    %[q1],        -0x04(%[dst])                   \n\t"
+        PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
+        "gssdlc1    %[q2],        0x03(%[dst])                    \n\t"
+        "gssdrc1    %[q2],        -0x04(%[dst])                   \n\t"
+        PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
+        "gssdlc1    %[q3],        0x03(%[dst])                    \n\t"
+        "gssdrc1    %[q3],        -0x04(%[dst])                   \n\t"
+        : [p3]"=&f"(ftmp[0]),       [p2]"=&f"(ftmp[1]),
+          [p1]"=&f"(ftmp[2]),       [p0]"=&f"(ftmp[3]),
+          [q0]"=&f"(ftmp[4]),       [q1]"=&f"(ftmp[5]),
+          [q2]"=&f"(ftmp[6]),       [q3]"=&f"(ftmp[7]),
+          [ftmp0]"=&f"(ftmp[8]),    [ftmp1]"=&f"(ftmp[9]),
+          [ftmp2]"=&f"(ftmp[10]),   [ftmp3]"=&f"(ftmp[11]),
+          [hev]"=&f"(ftmp[12]),     [mask]"=&f"(ftmp[13]),
+          [ftmp4]"=&f"(ftmp[14]),   [ftmp5]"=&f"(ftmp[15]),
+          [ftmp6]"=&f"(ftmp[16]),   [ftmp7]"=&f"(ftmp[17]),
+          [dst]"+&r"(dst),          [tmp0]"=&r"(tmp[0]),
+          RESTRICT_ASM_DOUBLE_1,    RESTRICT_ASM_DOUBLE_2,
+          RESTRICT_ASM_UINT32_T
+        : [e]"r"((mips_reg)flim_E), [thresh]"r"((mips_reg)hev_thresh),
+          [i]"r"((mips_reg)flim_I), [stride]"r"((mips_reg)stride)
+        : "memory"
+    );
 }
 
 static av_always_inline void vp8_h_loop_filter8_inner_mmi(uint8_t *dst,
@@ -890,8 +1179,7 @@
         MMI_SDC1(%[ftmp0], %[block], 0x18)
 
         TRANSPOSE_4H(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                     %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                     %[ftmp9], %[tmp0],  %[ftmp0], %[ftmp10])
+                     %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
 
         // t[0 4  8 12]
         "paddh      %[ftmp5],   %[ftmp1],       %[ftmp3]            \n\t"
@@ -926,8 +1214,7 @@
         "psrah      %[ftmp4],   %[ftmp4],       %[ftmp11]           \n\t"
 
         TRANSPOSE_4H(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
-                     %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
-                     %[ftmp9], %[tmp0],  %[ftmp0], %[ftmp10])
+                     %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
 
         MMI_LWC1(%[ftmp5], %[dst0], 0x00)
         MMI_LWC1(%[ftmp6], %[dst1], 0x00)
@@ -1083,29 +1370,16 @@
 void ff_vp8_v_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E,
         int flim_I, int hev_thresh)
 {
-    int i;
-
-    for (i = 0; i < 16; i++)
-        if (vp8_normal_limit(dst + i * 1, stride, flim_E, flim_I)) {
-            if (hev(dst + i * 1, stride, hev_thresh))
-                vp8_filter_common_is4tap(dst + i * 1, stride);
-            else
-                filter_mbedge(dst + i * 1, stride);
-        }
+    vp8_v_loop_filter8_mmi(dst, stride, flim_E, flim_I, hev_thresh);
+    vp8_v_loop_filter8_mmi(dst + 8, stride, flim_E, flim_I, hev_thresh);
 }
 
 void ff_vp8_h_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E,
         int flim_I, int hev_thresh)
 {
-    int i;
-
-    for (i = 0; i < 16; i++)
-        if (vp8_normal_limit(dst + i * stride, 1, flim_E, flim_I)) {
-            if (hev(dst + i * stride, 1, hev_thresh))
-                vp8_filter_common_is4tap(dst + i * stride, 1);
-            else
-                filter_mbedge(dst + i * stride, 1);
-        }
+    vp8_h_loop_filter8_mmi(dst, stride, flim_E, flim_I, hev_thresh);
+    vp8_h_loop_filter8_mmi(dst + 8 * stride, stride, flim_E, flim_I,
+                           hev_thresh);
 }
 
 void ff_vp8_v_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,

diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index 3455126..35ee10d 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c

@@ -36,6 +36,7 @@
 #include "avcodec.h"
 #include "blockdsp.h"
 #include "copy_block.h"
+#include "hwaccel.h"
 #include "idctdsp.h"
 #include "internal.h"
 #include "jpegtables.h"
@@ -147,6 +148,7 @@
     s->org_height    = avctx->coded_height;
     avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
     avctx->colorspace = AVCOL_SPC_BT470BG;
+    s->hwaccel_pix_fmt = s->hwaccel_sw_pix_fmt = AV_PIX_FMT_NONE;
 
     if ((ret = build_basic_mjpeg_vlc(s)) < 0)
         return ret;
@@ -279,13 +281,18 @@
                                  code_max + 1, 0, 0)) < 0)
                 return ret;
         }
+
+        for (i = 0; i < 16; i++)
+            s->raw_huffman_lengths[class][index][i] = bits_table[i + 1];
+        for (i = 0; i < 256; i++)
+            s->raw_huffman_values[class][index][i] = val_table[i];
     }
     return 0;
 }
 
 int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
 {
-    int len, nb_components, i, width, height, bits, ret;
+    int len, nb_components, i, width, height, bits, ret, size_change;
     unsigned pix_fmt_id;
     int h_count[MAX_COMPONENTS] = { 0 };
     int v_count[MAX_COMPONENTS] = { 0 };
@@ -328,6 +335,8 @@
     av_log(s->avctx, AV_LOG_DEBUG, "sof0: picture: %dx%d\n", width, height);
     if (av_image_check_size(width, height, 0, s->avctx) < 0)
         return AVERROR_INVALIDDATA;
+    if (s->buf_size && (width + 7) / 8 * ((height + 7) / 8) > s->buf_size * 4LL)
+        return AVERROR_INVALIDDATA;
 
     nb_components = get_bits(&s->gb, 8);
     if (nb_components <= 0 ||
@@ -392,6 +401,7 @@
     if (width != s->width || height != s->height || bits != s->bits ||
         memcmp(s->h_count, h_count, sizeof(h_count))                ||
         memcmp(s->v_count, v_count, sizeof(v_count))) {
+        size_change = 1;
 
         s->width      = width;
         s->height     = height;
@@ -418,6 +428,8 @@
             return ret;
 
         s->first_picture = 0;
+    } else {
+        size_change = 0;
     }
 
     if (s->got_picture && s->interlaced && (s->bottom_field == !s->interlace_polarity)) {
@@ -614,6 +626,10 @@
         avpriv_report_missing_feature(s->avctx, "Lowres for weird subsampling");
         return AVERROR_PATCHWELCOME;
     }
+    if ((AV_RB32(s->upscale_h) || AV_RB32(s->upscale_v)) && s->progressive && s->avctx->pix_fmt == AV_PIX_FMT_GBRP) {
+        avpriv_report_missing_feature(s->avctx, "progressive for weird subsampling");
+        return AVERROR_PATCHWELCOME;
+    }
     if (s->ls) {
         memset(s->upscale_h, 0, sizeof(s->upscale_h));
         memset(s->upscale_v, 0, sizeof(s->upscale_v));
@@ -636,6 +652,27 @@
         return AVERROR_BUG;
     }
 
+    if (s->avctx->pix_fmt == s->hwaccel_sw_pix_fmt && !size_change) {
+        s->avctx->pix_fmt = s->hwaccel_pix_fmt;
+    } else {
+        enum AVPixelFormat pix_fmts[] = {
+#if CONFIG_MJPEG_NVDEC_HWACCEL
+            AV_PIX_FMT_CUDA,
+#endif
+#if CONFIG_MJPEG_VAAPI_HWACCEL
+            AV_PIX_FMT_VAAPI,
+#endif
+            s->avctx->pix_fmt,
+            AV_PIX_FMT_NONE,
+        };
+        s->hwaccel_pix_fmt = ff_get_format(s->avctx, pix_fmts);
+        if (s->hwaccel_pix_fmt < 0)
+            return AVERROR(EINVAL);
+
+        s->hwaccel_sw_pix_fmt = s->avctx->pix_fmt;
+        s->avctx->pix_fmt     = s->hwaccel_pix_fmt;
+    }
+
     if (s->avctx->skip_frame == AVDISCARD_ALL) {
         s->picture_ptr->pict_type = AV_PICTURE_TYPE_I;
         s->picture_ptr->key_frame = 1;
@@ -683,6 +720,19 @@
         }
         memset(s->coefs_finished, 0, sizeof(s->coefs_finished));
     }
+
+    if (s->avctx->hwaccel) {
+        s->hwaccel_picture_private =
+            av_mallocz(s->avctx->hwaccel->frame_priv_data_size);
+        if (!s->hwaccel_picture_private)
+            return AVERROR(ENOMEM);
+
+        ret = s->avctx->hwaccel->start_frame(s->avctx, s->raw_image_buffer,
+                                             s->raw_image_buffer_size);
+        if (ret < 0)
+            return ret;
+    }
+
     return 0;
 }
 
@@ -1009,6 +1059,11 @@
         for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
             int modified_predictor = predictor;
 
+            if (get_bits_left(&s->gb) < 1) {
+                av_log(s->avctx, AV_LOG_ERROR, "bitstream end in rgb_scan\n");
+                return AVERROR_INVALIDDATA;
+            }
+
             if (s->restart_interval && !s->restart_count){
                 s->restart_count = s->restart_interval;
                 resync_mb_x = mb_x;
@@ -1032,7 +1087,7 @@
                     return -1;
 
                 left[i] = buffer[mb_x][i] =
-                    mask & (pred + (dc * (1 << point_transform)));
+                    mask & (pred + (unsigned)(dc * (1 << point_transform)));
             }
 
             if (s->restart_interval && !--s->restart_count) {
@@ -1510,7 +1565,6 @@
         }
     }
 
-    av_assert0(s->picture_ptr->data[0]);
     /* XXX: verify len field validity */
     len = get_bits(&s->gb, 16);
     nb_components = get_bits(&s->gb, 8);
@@ -1546,9 +1600,7 @@
         s->h_scount[i]  = s->h_count[index];
         s->v_scount[i]  = s->v_count[index];
 
-        if(nb_components == 3 && s->nb_components == 3 && s->avctx->pix_fmt == AV_PIX_FMT_GBR24P)
-            index = (i+2)%3;
-        if(nb_components == 1 && s->nb_components == 3 && s->avctx->pix_fmt == AV_PIX_FMT_GBR24P)
+        if((nb_components == 1 || nb_components == 3) && s->nb_components == 3 && s->avctx->pix_fmt == AV_PIX_FMT_GBR24P)
             index = (index+2)%3;
 
         s->comp_index[i] = index;
@@ -1600,7 +1652,18 @@
     for (i = 0; i < nb_components; i++)
         s->last_dc[i] = (4 << s->bits);
 
-    if (s->lossless) {
+    if (s->avctx->hwaccel) {
+        int bytes_to_start = get_bits_count(&s->gb) / 8;
+        av_assert0(bytes_to_start >= 0 &&
+                   s->raw_scan_buffer_size >= bytes_to_start);
+
+        ret = s->avctx->hwaccel->decode_slice(s->avctx,
+                                              s->raw_scan_buffer      + bytes_to_start,
+                                              s->raw_scan_buffer_size - bytes_to_start);
+        if (ret < 0)
+            return ret;
+
+    } else if (s->lossless) {
         av_assert0(s->picture_ptr == s->picture);
         if (CONFIG_JPEGLS_DECODER && s->ls) {
 //            for () {
@@ -1867,7 +1930,7 @@
 
             // read 0th IFD and store the metadata
             // (return values > 0 indicate the presence of subimage metadata)
-            ret = avpriv_exif_decode_ifd(s->avctx, &gbytes, le, 0, &s->exif_metadata);
+            ret = ff_exif_decode_ifd(s->avctx, &gbytes, le, 0, &s->exif_metadata);
             if (ret < 0) {
                 av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error decoding EXIF data\n");
             }
@@ -2193,6 +2256,8 @@
     int ret = 0;
     int is16bit;
 
+    s->buf_size = buf_size;
+
     av_dict_free(&s->exif_metadata);
     av_freep(&s->stereo3d);
     s->adobe_transform = -1;
@@ -2278,6 +2343,8 @@
         case SOI:
             s->restart_interval = 0;
             s->restart_count    = 0;
+            s->raw_image_buffer      = buf_ptr;
+            s->raw_image_buffer_size = buf_end - buf_ptr;
             /* nothing to do on SOI */
             break;
         case DHT:
@@ -2288,6 +2355,10 @@
             break;
         case SOF0:
         case SOF1:
+            if (start_code == SOF0)
+                s->avctx->profile = FF_PROFILE_MJPEG_HUFFMAN_BASELINE_DCT;
+            else
+                s->avctx->profile = FF_PROFILE_MJPEG_HUFFMAN_EXTENDED_SEQUENTIAL_DCT;
             s->lossless    = 0;
             s->ls          = 0;
             s->progressive = 0;
@@ -2295,6 +2366,7 @@
                 goto fail;
             break;
         case SOF2:
+            s->avctx->profile = FF_PROFILE_MJPEG_HUFFMAN_PROGRESSIVE_DCT;
             s->lossless    = 0;
             s->ls          = 0;
             s->progressive = 1;
@@ -2302,6 +2374,7 @@
                 goto fail;
             break;
         case SOF3:
+            s->avctx->profile     = FF_PROFILE_MJPEG_HUFFMAN_LOSSLESS;
             s->avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
             s->lossless    = 1;
             s->ls          = 0;
@@ -2310,6 +2383,7 @@
                 goto fail;
             break;
         case SOF48:
+            s->avctx->profile     = FF_PROFILE_MJPEG_JPEG_LS;
             s->avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
             s->lossless    = 1;
             s->ls          = 1;
@@ -2324,7 +2398,8 @@
             break;
         case EOI:
 eoi_parser:
-            if (avctx->skip_frame != AVDISCARD_ALL && s->progressive && s->cur_scan && s->got_picture)
+            if (!avctx->hwaccel && avctx->skip_frame != AVDISCARD_ALL &&
+                s->progressive && s->cur_scan && s->got_picture)
                 mjpeg_idct_scan_progressive_ac(s);
             s->cur_scan = 0;
             if (!s->got_picture) {
@@ -2342,6 +2417,13 @@
                 s->got_picture = 0;
                 goto the_end_no_picture;
             }
+            if (s->avctx->hwaccel) {
+                ret = s->avctx->hwaccel->end_frame(s->avctx);
+                if (ret < 0)
+                    return ret;
+
+                av_freep(&s->hwaccel_picture_private);
+            }
             if ((ret = av_frame_ref(frame, s->picture_ptr)) < 0)
                 return ret;
             *got_frame = 1;
@@ -2364,6 +2446,9 @@
 
             goto the_end;
         case SOS:
+            s->raw_scan_buffer      = buf_ptr;
+            s->raw_scan_buffer_size = buf_end - buf_ptr;
+
             s->cur_scan++;
             if (avctx->skip_frame == AVDISCARD_ALL) {
                 skip_bits(&s->gb, get_bits_left(&s->gb));
@@ -2428,7 +2513,10 @@
                    avctx->pix_fmt == AV_PIX_FMT_GBRP     ||
                    avctx->pix_fmt == AV_PIX_FMT_GBRAP
                   );
-        avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &hshift, &vshift);
+        ret = av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt, &hshift, &vshift);
+        if (ret)
+            return ret;
+
         av_assert0(s->nb_components == av_pix_fmt_count_planes(s->picture_ptr->format));
         for (p = 0; p<s->nb_components; p++) {
             uint8_t *line = s->picture_ptr->data[p];
@@ -2487,7 +2575,10 @@
                    avctx->pix_fmt == AV_PIX_FMT_GBRP     ||
                    avctx->pix_fmt == AV_PIX_FMT_GBRAP
                    );
-        avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &hshift, &vshift);
+        ret = av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt, &hshift, &vshift);
+        if (ret)
+            return ret;
+
         av_assert0(s->nb_components == av_pix_fmt_count_planes(s->picture_ptr->format));
         for (p = 0; p < s->nb_components; p++) {
             uint8_t *dst;
@@ -2515,7 +2606,10 @@
     }
     if (s->flipped && !s->rgb) {
         int j;
-        avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &hshift, &vshift);
+        ret = av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt, &hshift, &vshift);
+        if (ret)
+            return ret;
+
         av_assert0(s->nb_components == av_pix_fmt_count_planes(s->picture_ptr->format));
         for (index=0; index<s->nb_components; index++) {
             uint8_t *dst = s->picture_ptr->data[index];
@@ -2657,6 +2751,8 @@
 
     reset_icc_profile(s);
 
+    av_freep(&s->hwaccel_picture_private);
+
     return 0;
 }
 
@@ -2697,6 +2793,15 @@
     .priv_class     = &mjpegdec_class,
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
                       FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM,
+    .hw_configs     = (const AVCodecHWConfigInternal*[]) {
+#if CONFIG_MJPEG_NVDEC_HWACCEL
+                        HWACCEL_NVDEC(mjpeg),
+#endif
+#if CONFIG_MJPEG_VAAPI_HWACCEL
+                        HWACCEL_VAAPI(mjpeg),
+#endif
+                        NULL
+                    },
 };
 #endif
 #if CONFIG_THP_DECODER

diff --git a/libavcodec/mjpegdec.h b/libavcodec/mjpegdec.h
index c84a40a..653fe7c 100644
--- a/libavcodec/mjpegdec.h
+++ b/libavcodec/mjpegdec.h

@@ -39,12 +39,15 @@
 #include "hpeldsp.h"
 #include "idctdsp.h"
 
+#undef near /* This file uses struct member 'near' which in windows.h is defined as empty. */
+
 #define MAX_COMPONENTS 4
 
 typedef struct MJpegDecodeContext {
     AVClass *class;
     AVCodecContext *avctx;
     GetBitContext gb;
+    int buf_size;
 
     int start_code; /* current start code */
     int buffer_size;
@@ -135,6 +138,19 @@
     int *iccdatalens;
     int iccnum;
     int iccread;
+
+    // Raw stream data for hwaccel use.
+    const uint8_t *raw_image_buffer;
+    size_t         raw_image_buffer_size;
+    const uint8_t *raw_scan_buffer;
+    size_t         raw_scan_buffer_size;
+
+    uint8_t raw_huffman_lengths[2][4][16];
+    uint8_t raw_huffman_values[2][4][256];
+
+    enum AVPixelFormat hwaccel_sw_pix_fmt;
+    enum AVPixelFormat hwaccel_pix_fmt;
+    void *hwaccel_picture_private;
 } MJpegDecodeContext;
 
 int ff_mjpeg_decode_init(AVCodecContext *avctx);

diff --git a/libavcodec/mjpegenc.c b/libavcodec/mjpegenc.c
index e6cdaf6..d2fcb8e 100644
--- a/libavcodec/mjpegenc.c
+++ b/libavcodec/mjpegenc.c

@@ -358,12 +358,6 @@
 
     av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
 
-#if FF_API_EMU_EDGE
-    //CODEC_FLAG_EMU_EDGE have to be cleared
-    if(s->avctx->flags & CODEC_FLAG_EMU_EDGE)
-        return AVERROR(EINVAL);
-#endif
-
     if ((avctx->height & 15) && avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
         av_log(avctx, AV_LOG_ERROR,
                "Heights which are not a multiple of 16 might fail with some decoders, "

diff --git a/libavcodec/mlp_parser.c b/libavcodec/mlp_parser.c
index 23601c8..eb3435b 100644
--- a/libavcodec/mlp_parser.c
+++ b/libavcodec/mlp_parser.c

@@ -119,7 +119,7 @@
     return layout;
 }
 
-static int ff_mlp_get_major_sync_size(const uint8_t * buf, int bufsize)
+static int mlp_get_major_sync_size(const uint8_t * buf, int bufsize)
 {
     int has_extension, extensions = 0;
     int size = 28;
@@ -149,7 +149,7 @@
 
     av_assert1(get_bits_count(gb) == 0);
 
-    header_size = ff_mlp_get_major_sync_size(gb->buffer, gb->size_in_bits >> 3);
+    header_size = mlp_get_major_sync_size(gb->buffer, gb->size_in_bits >> 3);
     if (header_size < 0 || gb->size_in_bits < header_size << 3) {
         av_log(log, AV_LOG_ERROR, "packet too short, unable to read major sync\n");
         return -1;
@@ -256,67 +256,71 @@
     if (buf_size == 0)
         return 0;
 
-    if (!mp->in_sync) {
-        // Not in sync - find a major sync header
-
-        for (i = 0; i < buf_size; i++) {
-            mp->pc.state = (mp->pc.state << 8) | buf[i];
-            if ((mp->pc.state & 0xfffffffe) == 0xf8726fba &&
-                // ignore if we do not have the data for the start of header
-                mp->pc.index + i >= 7) {
-                mp->in_sync = 1;
-                mp->bytes_left = 0;
-                break;
-            }
-        }
-
+    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
+        next = buf_size;
+    } else {
         if (!mp->in_sync) {
-            if (ff_combine_frame(&mp->pc, END_NOT_FOUND, &buf, &buf_size) != -1)
+            // Not in sync - find a major sync header
+
+            for (i = 0; i < buf_size; i++) {
+                mp->pc.state = (mp->pc.state << 8) | buf[i];
+                if ((mp->pc.state & 0xfffffffe) == 0xf8726fba &&
+                    // ignore if we do not have the data for the start of header
+                    mp->pc.index + i >= 7) {
+                    mp->in_sync = 1;
+                    mp->bytes_left = 0;
+                    break;
+                }
+            }
+
+            if (!mp->in_sync) {
+                if (ff_combine_frame(&mp->pc, END_NOT_FOUND, &buf, &buf_size) != -1)
+                    av_log(avctx, AV_LOG_WARNING, "ff_combine_frame failed\n");
+                return buf_size;
+            }
+
+            if ((ret = ff_combine_frame(&mp->pc, i - 7, &buf, &buf_size)) < 0) {
                 av_log(avctx, AV_LOG_WARNING, "ff_combine_frame failed\n");
+                return ret;
+            }
+
+            return i - 7;
+        }
+
+        if (mp->bytes_left == 0) {
+            // Find length of this packet
+
+            /* Copy overread bytes from last frame into buffer. */
+            for(; mp->pc.overread>0; mp->pc.overread--) {
+                mp->pc.buffer[mp->pc.index++]= mp->pc.buffer[mp->pc.overread_index++];
+            }
+
+            if (mp->pc.index + buf_size < 2) {
+                if (ff_combine_frame(&mp->pc, END_NOT_FOUND, &buf, &buf_size) != -1)
+                    av_log(avctx, AV_LOG_WARNING, "ff_combine_frame failed\n");
+                return buf_size;
+            }
+
+            mp->bytes_left = ((mp->pc.index > 0 ? mp->pc.buffer[0] : buf[0]) << 8)
+                           |  (mp->pc.index > 1 ? mp->pc.buffer[1] : buf[1-mp->pc.index]);
+            mp->bytes_left = (mp->bytes_left & 0xfff) * 2;
+            if (mp->bytes_left <= 0) { // prevent infinite loop
+                goto lost_sync;
+            }
+            mp->bytes_left -= mp->pc.index;
+        }
+
+        next = (mp->bytes_left > buf_size) ? END_NOT_FOUND : mp->bytes_left;
+
+        if (ff_combine_frame(&mp->pc, next, &buf, &buf_size) < 0) {
+            mp->bytes_left -= buf_size;
             return buf_size;
         }
 
-        if ((ret = ff_combine_frame(&mp->pc, i - 7, &buf, &buf_size)) < 0) {
-            av_log(avctx, AV_LOG_WARNING, "ff_combine_frame failed\n");
-            return ret;
-        }
-
-        return i - 7;
+        mp->bytes_left = 0;
     }
 
-    if (mp->bytes_left == 0) {
-        // Find length of this packet
-
-        /* Copy overread bytes from last frame into buffer. */
-        for(; mp->pc.overread>0; mp->pc.overread--) {
-            mp->pc.buffer[mp->pc.index++]= mp->pc.buffer[mp->pc.overread_index++];
-        }
-
-        if (mp->pc.index + buf_size < 2) {
-            if (ff_combine_frame(&mp->pc, END_NOT_FOUND, &buf, &buf_size) != -1)
-                av_log(avctx, AV_LOG_WARNING, "ff_combine_frame failed\n");
-            return buf_size;
-        }
-
-        mp->bytes_left = ((mp->pc.index > 0 ? mp->pc.buffer[0] : buf[0]) << 8)
-                       |  (mp->pc.index > 1 ? mp->pc.buffer[1] : buf[1-mp->pc.index]);
-        mp->bytes_left = (mp->bytes_left & 0xfff) * 2;
-        if (mp->bytes_left <= 0) { // prevent infinite loop
-            goto lost_sync;
-        }
-        mp->bytes_left -= mp->pc.index;
-    }
-
-    next = (mp->bytes_left > buf_size) ? END_NOT_FOUND : mp->bytes_left;
-
-    if (ff_combine_frame(&mp->pc, next, &buf, &buf_size) < 0) {
-        mp->bytes_left -= buf_size;
-        return buf_size;
-    }
-
-    mp->bytes_left = 0;
-
-    sync_present = (AV_RB32(buf + 4) & 0xfffffffe) == 0xf8726fba;
+    sync_present = buf_size >= 8 && (AV_RB32(buf + 4) & 0xfffffffe) == 0xf8726fba;
 
     if (!sync_present) {
         /* The first nibble of a frame is a parity check of the 4-byte
@@ -357,28 +361,11 @@
         if(!avctx->channels || !avctx->channel_layout) {
         if (mh.stream_type == 0xbb) {
             /* MLP stream */
-            if (avctx->request_channel_layout &&
-                (avctx->request_channel_layout & AV_CH_LAYOUT_STEREO) ==
-                avctx->request_channel_layout &&
-                mh.num_substreams > 1) {
-                avctx->channels       = 2;
-                avctx->channel_layout = AV_CH_LAYOUT_STEREO;
-            } else {
-                avctx->channels       = mh.channels_mlp;
-                avctx->channel_layout = mh.channel_layout_mlp;
-            }
+            avctx->channels       = mh.channels_mlp;
+            avctx->channel_layout = mh.channel_layout_mlp;
         } else { /* mh.stream_type == 0xba */
             /* TrueHD stream */
-            if (avctx->request_channel_layout &&
-                    (avctx->request_channel_layout & AV_CH_LAYOUT_STEREO) ==
-                    avctx->request_channel_layout &&
-                    mh.num_substreams > 1) {
-                avctx->channels       = 2;
-                avctx->channel_layout = AV_CH_LAYOUT_STEREO;
-            } else if (!mh.channels_thd_stream2 ||
-                       (avctx->request_channel_layout &&
-                        (avctx->request_channel_layout & mh.channel_layout_thd_stream1) ==
-                        avctx->request_channel_layout)) {
+            if (!mh.channels_thd_stream2) {
                 avctx->channels       = mh.channels_thd_stream1;
                 avctx->channel_layout = mh.channel_layout_thd_stream1;
             } else {

diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
index d5585d3..8caa266 100644
--- a/libavcodec/mlpdec.c
+++ b/libavcodec/mlpdec.c

@@ -66,7 +66,7 @@
     /// For each channel output by the matrix, the output channel to map it to
     uint8_t     ch_assign[MAX_CHANNELS];
     /// The channel layout for this substream
-    uint64_t    ch_layout;
+    uint64_t    mask;
     /// The matrix encoding mode for this substream
     enum AVMatrixEncoding matrix_encoding;
 
@@ -180,6 +180,11 @@
     AV_CH_LOW_FREQUENCY_2,                                   // LFE2
 };
 
+static int mlp_channel_layout_subset(uint64_t channel_layout, uint64_t mask)
+{
+    return channel_layout && ((channel_layout & mask) == channel_layout);
+}
+
 static uint64_t thd_channel_layout_extract_channel(uint64_t channel_layout,
                                                    int index)
 {
@@ -390,8 +395,8 @@
             return AVERROR_PATCHWELCOME;
         }
         if ((substr = (mh.num_substreams > 1)))
-            m->substream[0].ch_layout = AV_CH_LAYOUT_STEREO;
-        m->substream[substr].ch_layout = mh.channel_layout_mlp;
+            m->substream[0].mask = AV_CH_LAYOUT_STEREO;
+        m->substream[substr].mask = mh.channel_layout_mlp;
     } else {
         if (mh.stream_type != 0xba) {
             avpriv_request_sample(m->avctx,
@@ -400,15 +405,15 @@
             return AVERROR_PATCHWELCOME;
         }
         if ((substr = (mh.num_substreams > 1)))
-            m->substream[0].ch_layout = AV_CH_LAYOUT_STEREO;
+            m->substream[0].mask = AV_CH_LAYOUT_STEREO;
         if (mh.num_substreams > 2)
             if (mh.channel_layout_thd_stream2)
-                m->substream[2].ch_layout = mh.channel_layout_thd_stream2;
+                m->substream[2].mask = mh.channel_layout_thd_stream2;
             else
-                m->substream[2].ch_layout = mh.channel_layout_thd_stream1;
-        m->substream[substr].ch_layout = mh.channel_layout_thd_stream1;
+                m->substream[2].mask = mh.channel_layout_thd_stream1;
+        m->substream[substr].mask = mh.channel_layout_thd_stream1;
 
-        if (m->avctx->channels<=2 && m->substream[substr].ch_layout == AV_CH_LAYOUT_MONO && m->max_decoded_substream == 1) {
+        if (m->avctx->channels<=2 && m->substream[substr].mask == AV_CH_LAYOUT_MONO && m->max_decoded_substream == 1) {
             av_log(m->avctx, AV_LOG_DEBUG, "Mono stream with 2 substreams, ignoring 2nd\n");
             m->max_decoded_substream = 0;
             if (m->avctx->channels==2)
@@ -474,7 +479,7 @@
     uint8_t checksum;
     uint8_t lossless_check;
     int start_count = get_bits_count(gbp);
-    int min_channel, max_channel, max_matrix_channel;
+    int min_channel, max_channel, max_matrix_channel, noise_type;
     const int std_max_matrix_channel = m->avctx->codec_id == AV_CODEC_ID_MLP
                                      ? MAX_MATRIX_CHANNEL_MLP
                                      : MAX_MATRIX_CHANNEL_TRUEHD;
@@ -487,9 +492,9 @@
         return AVERROR_INVALIDDATA;
     }
 
-    s->noise_type = get_bits1(gbp);
+    noise_type = get_bits1(gbp);
 
-    if (m->avctx->codec_id == AV_CODEC_ID_MLP && s->noise_type) {
+    if (m->avctx->codec_id == AV_CODEC_ID_MLP && noise_type) {
         av_log(m->avctx, AV_LOG_ERROR, "MLP must have 0x31ea sync word.\n");
         return AVERROR_INVALIDDATA;
     }
@@ -515,7 +520,7 @@
 
     /* This should happen for TrueHD streams with >6 channels and MLP's noise
      * type. It is not yet known if this is allowed. */
-    if (max_channel > MAX_MATRIX_CHANNEL_MLP && !s->noise_type) {
+    if (max_channel > MAX_MATRIX_CHANNEL_MLP && !noise_type) {
         avpriv_request_sample(m->avctx,
                               "%d channels (more than the "
                               "maximum supported by the decoder)",
@@ -532,13 +537,14 @@
     s->min_channel        = min_channel;
     s->max_channel        = max_channel;
     s->max_matrix_channel = max_matrix_channel;
+    s->noise_type         = noise_type;
 
-    if (m->avctx->request_channel_layout && (s->ch_layout & m->avctx->request_channel_layout) ==
-        m->avctx->request_channel_layout && m->max_decoded_substream > substr) {
+    if (mlp_channel_layout_subset(m->avctx->request_channel_layout, s->mask) &&
+        m->max_decoded_substream > substr) {
         av_log(m->avctx, AV_LOG_DEBUG,
                "Extracting %d-channel downmix (0x%"PRIx64") from substream %d. "
                "Further substreams will be skipped.\n",
-               s->max_channel + 1, s->ch_layout, substr);
+               s->max_channel + 1, s->mask, substr);
         m->max_decoded_substream = substr;
     }
 
@@ -565,9 +571,9 @@
     for (ch = 0; ch <= s->max_matrix_channel; ch++) {
         int ch_assign = get_bits(gbp, 6);
         if (m->avctx->codec_id == AV_CODEC_ID_TRUEHD) {
-            uint64_t channel = thd_channel_layout_extract_channel(s->ch_layout,
+            uint64_t channel = thd_channel_layout_extract_channel(s->mask,
                                                                   ch_assign);
-            ch_assign = av_get_channel_layout_channel_index(s->ch_layout,
+            ch_assign = av_get_channel_layout_channel_index(s->mask,
                                                             channel);
         }
         if (ch_assign < 0 || ch_assign > s->max_matrix_channel) {
@@ -609,7 +615,7 @@
 
     if (substr == m->max_decoded_substream) {
         m->avctx->channels       = s->max_matrix_channel + 1;
-        m->avctx->channel_layout = s->ch_layout;
+        m->avctx->channel_layout = s->mask;
         m->dsp.mlp_pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
                                                                s->output_shift,
                                                                s->max_matrix_channel,

diff --git a/libavcodec/mmaldec.c b/libavcodec/mmaldec.c
index 0b1195d..647a22e 100644
--- a/libavcodec/mmaldec.c
+++ b/libavcodec/mmaldec.c

@@ -34,6 +34,7 @@
 #include <stdatomic.h>
 
 #include "avcodec.h"
+#include "hwaccel.h"
 #include "internal.h"
 #include "libavutil/avassert.h"
 #include "libavutil/buffer.h"
@@ -807,32 +808,9 @@
     return ret;
 }
 
-AVHWAccel ff_h264_mmal_hwaccel = {
-    .name       = "h264_mmal",
-    .type       = AVMEDIA_TYPE_VIDEO,
-    .id         = AV_CODEC_ID_H264,
-    .pix_fmt    = AV_PIX_FMT_MMAL,
-};
-
-AVHWAccel ff_mpeg2_mmal_hwaccel = {
-    .name       = "mpeg2_mmal",
-    .type       = AVMEDIA_TYPE_VIDEO,
-    .id         = AV_CODEC_ID_MPEG2VIDEO,
-    .pix_fmt    = AV_PIX_FMT_MMAL,
-};
-
-AVHWAccel ff_mpeg4_mmal_hwaccel = {
-    .name       = "mpeg4_mmal",
-    .type       = AVMEDIA_TYPE_VIDEO,
-    .id         = AV_CODEC_ID_MPEG4,
-    .pix_fmt    = AV_PIX_FMT_MMAL,
-};
-
-AVHWAccel ff_vc1_mmal_hwaccel = {
-    .name       = "vc1_mmal",
-    .type       = AVMEDIA_TYPE_VIDEO,
-    .id         = AV_CODEC_ID_VC1,
-    .pix_fmt    = AV_PIX_FMT_MMAL,
+static const AVCodecHWConfigInternal *mmal_hw_configs[] = {
+    HW_CONFIG_INTERNAL(MMAL),
+    NULL
 };
 
 static const AVOption options[]={
@@ -844,6 +822,7 @@
 #define FFMMAL_DEC_CLASS(NAME) \
     static const AVClass ffmmal_##NAME##_dec_class = { \
         .class_name = "mmal_" #NAME "_dec", \
+        .item_name  = av_default_item_name, \
         .option     = options, \
         .version    = LIBAVUTIL_VERSION_INT, \
     };
@@ -861,11 +840,13 @@
         .decode         = ffmmal_decode, \
         .flush          = ffmmal_flush, \
         .priv_class     = &ffmmal_##NAME##_dec_class, \
-        .capabilities   = AV_CODEC_CAP_DELAY, \
+        .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE, \
         .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS, \
         .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_MMAL, \
                                                          AV_PIX_FMT_YUV420P, \
                                                          AV_PIX_FMT_NONE}, \
+        .hw_configs     = mmal_hw_configs, \
+        .wrapper_name   = "mmal", \
     };
 
 FFMMAL_DEC(h264, AV_CODEC_ID_H264)

diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index 316d16a..8b5ce21 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c

@@ -313,26 +313,6 @@
         return -1;
     }
 
-#if FF_API_MOTION_EST
-    //special case of snow is needed because snow uses its own iterative ME code
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (s->motion_est == FF_ME_EPZS) {
-        if (s->me_method == ME_ZERO)
-            s->motion_est = FF_ME_ZERO;
-        else if (s->me_method == ME_EPZS)
-            s->motion_est = FF_ME_EPZS;
-        else if (s->me_method == ME_X1)
-            s->motion_est = FF_ME_XONE;
-        else if (s->avctx->codec_id != AV_CODEC_ID_SNOW) {
-            av_log(s->avctx, AV_LOG_ERROR,
-                   "me_method is only allowed to be set to zero and epzs; "
-                   "for hex,umh,full and others see dia_size\n");
-            return -1;
-        }
-    }
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
     c->avctx= s->avctx;
 
     if(s->codec_id == AV_CODEC_ID_H261)

diff --git a/libavcodec/movsub_bsf.c b/libavcodec/movsub_bsf.c
index 3cb1183..5878607 100644
--- a/libavcodec/movsub_bsf.c
+++ b/libavcodec/movsub_bsf.c

@@ -62,35 +62,23 @@
     .filter = text2movsub,
 };
 
-static int mov2textsub(AVBSFContext *ctx, AVPacket *out)
+static int mov2textsub(AVBSFContext *ctx, AVPacket *pkt)
 {
-    AVPacket *in;
     int ret = 0;
 
-    ret = ff_bsf_get_packet(ctx, &in);
+    ret = ff_bsf_get_packet_ref(ctx, pkt);
     if (ret < 0)
         return ret;
 
-    if (in->size < 2) {
-       ret = AVERROR_INVALIDDATA;
-       goto fail;
+    if (pkt->size < 2) {
+       av_packet_unref(pkt);
+       return AVERROR_INVALIDDATA;
     }
 
-    ret = av_new_packet(out, FFMIN(in->size - 2, AV_RB16(in->data)));
-    if (ret < 0)
-        goto fail;
+    pkt->data += 2;
+    pkt->size  = FFMIN(pkt->size - 2, AV_RB16(pkt->data));
 
-    ret = av_packet_copy_props(out, in);
-    if (ret < 0)
-        goto fail;
-
-    memcpy(out->data, in->data + 2, out->size);
-
-fail:
-    if (ret < 0)
-        av_packet_unref(out);
-    av_packet_free(&in);
-    return ret;
+    return 0;
 }
 
 const AVBitStreamFilter ff_mov2textsub_bsf = {

diff --git a/libavcodec/movtextdec.c b/libavcodec/movtextdec.c
index fb5085c..c38c5ed 100644
--- a/libavcodec/movtextdec.c
+++ b/libavcodec/movtextdec.c

@@ -299,6 +299,14 @@
         m->s_temp->style_start = AV_RB16(tsmb);
         tsmb += 2;
         m->s_temp->style_end = AV_RB16(tsmb);
+
+        if (   m->s_temp->style_end < m->s_temp->style_start
+            || (m->count_s && m->s_temp->style_start < m->s[m->count_s - 1]->style_end)) {
+            av_freep(&m->s_temp);
+            mov_text_cleanup(m);
+            return AVERROR(ENOMEM);
+        }
+
         tsmb += 2;
         m->s_temp->style_fontID = AV_RB16(tsmb);
         tsmb += 2;
@@ -326,9 +334,24 @@
 
 const static size_t box_count = FF_ARRAY_ELEMS(box_types);
 
-static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end,
-                        MovTextContext *m)
+// Return byte length of the UTF-8 sequence starting at text[0]. 0 on error.
+static int get_utf8_length_at(const char *text, const char *text_end)
 {
+    const char *start = text;
+    int err = 0;
+    uint32_t c;
+    GET_UTF8(c, text < text_end ? (uint8_t)*text++ : (err = 1, 0), goto error;);
+    if (err)
+        goto error;
+    return text - start;
+error:
+    return 0;
+}
+
+static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end,
+                       AVCodecContext *avctx)
+{
+    MovTextContext *m = avctx->priv_data;
     int i = 0;
     int j = 0;
     int text_pos = 0;
@@ -342,6 +365,8 @@
     }
 
     while (text < text_end) {
+        int len;
+
         if (m->box_flags & STYL_BOX) {
             for (i = 0; i < m->style_entries; i++) {
                 if (m->s[i]->style_flag && text_pos == m->s[i]->style_end) {
@@ -388,17 +413,24 @@
             }
         }
 
-        switch (*text) {
-        case '\r':
-            break;
-        case '\n':
-            av_bprintf(buf, "\\N");
-            break;
-        default:
-            av_bprint_chars(buf, *text, 1);
-            break;
+        len = get_utf8_length_at(text, text_end);
+        if (len < 1) {
+            av_log(avctx, AV_LOG_ERROR, "invalid UTF-8 byte in subtitle\n");
+            len = 1;
         }
-        text++;
+        for (i = 0; i < len; i++) {
+            switch (*text) {
+            case '\r':
+                break;
+            case '\n':
+                av_bprintf(buf, "\\N");
+                break;
+            default:
+                av_bprint_chars(buf, *text, 1);
+                break;
+            }
+            text++;
+        }
         text_pos++;
     }
 
@@ -436,6 +468,7 @@
     int text_length, tsmb_type, ret_tsmb;
     uint64_t tsmb_size;
     const uint8_t *tsmb;
+    size_t i;
 
     if (!ptr || avpkt->size < 2)
         return AVERROR_INVALIDDATA;
@@ -495,7 +528,7 @@
             if (tsmb_size > avpkt->size - m->tracksize)
                 break;
 
-            for (size_t i = 0; i < box_count; i++) {
+            for (i = 0; i < box_count; i++) {
                 if (tsmb_type == box_types[i].type) {
                     if (m->tracksize + m->size_var + box_types[i].base_size > avpkt->size)
                         break;
@@ -506,10 +539,10 @@
             }
             m->tracksize = m->tracksize + tsmb_size;
         }
-        text_to_ass(&buf, ptr, end, m);
+        text_to_ass(&buf, ptr, end, avctx);
         mov_text_cleanup(m);
     } else
-        text_to_ass(&buf, ptr, end, m);
+        text_to_ass(&buf, ptr, end, avctx);
 
     ret = ff_ass_add_rect(sub, buf.str, m->readorder++, 0, NULL, NULL);
     av_bprint_finalize(&buf, NULL);

diff --git a/libavcodec/movtextenc.c b/libavcodec/movtextenc.c
index d795e31..c19ef38 100644
--- a/libavcodec/movtextenc.c
+++ b/libavcodec/movtextenc.c

@@ -72,6 +72,7 @@
     uint8_t style_fontsize;
     uint32_t style_color;
     uint16_t text_pos;
+    uint16_t byte_count;
 } MovTextContext;
 
 typedef struct {
@@ -304,11 +305,34 @@
      */
 }
 
+static uint16_t utf8_strlen(const char *text, int len)
+{
+    uint16_t i = 0, ret = 0;
+    while (i < len) {
+        char c = text[i];
+        if ((c & 0x80) == 0)
+            i += 1;
+        else if ((c & 0xE0) == 0xC0)
+            i += 2;
+        else if ((c & 0xF0) == 0xE0)
+            i += 3;
+        else if ((c & 0xF8) == 0xF0)
+            i += 4;
+        else
+            return 0;
+        ret++;
+    }
+    return ret;
+}
+
 static void mov_text_text_cb(void *priv, const char *text, int len)
 {
+    uint16_t utf8_len = utf8_strlen(text, len);
     MovTextContext *s = priv;
     av_bprint_append_data(&s->buffer, text, len);
-    s->text_pos += len;
+    // If it's not utf-8, just use the byte length
+    s->text_pos += utf8_len ? utf8_len : len;
+    s->byte_count += len;
 }
 
 static void mov_text_new_line_cb(void *priv, int forced)
@@ -316,6 +340,7 @@
     MovTextContext *s = priv;
     av_bprint_append_data(&s->buffer, "\n", 1);
     s->text_pos += 1;
+    s->byte_count += 1;
 }
 
 static const ASSCodesCallbacks mov_text_callbacks = {
@@ -333,6 +358,7 @@
     int i, length;
     size_t j;
 
+    s->byte_count = 0;
     s->text_pos = 0;
     s->count = 0;
     s->box_flags = 0;
@@ -368,7 +394,7 @@
         }
     }
 
-    AV_WB16(buf, s->text_pos);
+    AV_WB16(buf, s->byte_count);
     buf += 2;
 
     if (!av_bprint_is_complete(&s->buffer)) {

diff --git a/libavcodec/mp3_header_decompress_bsf.c b/libavcodec/mp3_header_decompress_bsf.c
index 22c1ef0..2948589 100644
--- a/libavcodec/mp3_header_decompress_bsf.c
+++ b/libavcodec/mp3_header_decompress_bsf.c

@@ -87,7 +87,7 @@
         goto fail;
     ret = av_packet_copy_props(out, in);
     if (ret < 0) {
-        av_packet_free(&out);
+        av_packet_unref(out);
         goto fail;
     }
     memcpy(out->data + frame_size - buf_size, buf, buf_size + AV_INPUT_BUFFER_PADDING_SIZE);

diff --git a/libavcodec/mpc8.c b/libavcodec/mpc8.c
index 80e0e9e..3be2f79 100644
--- a/libavcodec/mpc8.c
+++ b/libavcodec/mpc8.c

@@ -250,11 +250,6 @@
     int maxband, keyframe;
     int last[2];
 
-    /* get output buffer */
-    frame->nb_samples = MPC_FRAME_SIZE;
-    if ((res = ff_get_buffer(avctx, frame, 0)) < 0)
-        return res;
-
     keyframe = c->cur_frame == 0;
 
     if(keyframe){
@@ -273,6 +268,11 @@
         if(maxband > 32) maxband -= 33;
     }
 
+    if (get_bits_left(gb) < 0) {
+        *got_frame_ptr = 0;
+        return buf_size;
+    }
+
     if(maxband > c->maxbands + 1) {
         av_log(avctx, AV_LOG_ERROR, "maxband %d too large\n",maxband);
         return AVERROR_INVALIDDATA;
@@ -410,6 +410,10 @@
         }
     }
 
+    frame->nb_samples = MPC_FRAME_SIZE;
+    if ((res = ff_get_buffer(avctx, frame, 0)) < 0)
+        return res;
+
     ff_mpc_dequantize_and_synth(c, maxband - 1,
                                 (int16_t **)frame->extended_data,
                                 avctx->channels);
@@ -419,11 +423,11 @@
     c->last_bits_used = get_bits_count(gb);
     if(c->cur_frame >= c->frames)
         c->cur_frame = 0;
-    if(c->cur_frame == 0 && get_bits_left(gb) < 8) {// we have only padding left
-        c->last_bits_used = buf_size << 3;
-    } else if (get_bits_left(gb) < 0) {
+    if (get_bits_left(gb) < 0) {
         av_log(avctx, AV_LOG_ERROR, "Overread %d\n", -get_bits_left(gb));
         c->last_bits_used = buf_size << 3;
+    } else if (c->cur_frame == 0 && get_bits_left(gb) < 8) {// we have only padding left
+        c->last_bits_used = buf_size << 3;
     }
 
     *got_frame_ptr = 1;

diff --git a/libavcodec/mpeg12.h b/libavcodec/mpeg12.h
index f551504..1ec99f1 100644
--- a/libavcodec/mpeg12.h
+++ b/libavcodec/mpeg12.h

@@ -73,4 +73,8 @@
 void ff_mpeg1_encode_init(MpegEncContext *s);
 void ff_mpeg1_encode_slice_header(MpegEncContext *s);
 
+void ff_mpeg12_find_best_frame_rate(AVRational frame_rate,
+                                    int *code, int *ext_n, int *ext_d,
+                                    int nonstandard);
+
 #endif /* AVCODEC_MPEG12_H */

diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c
index 4b76428..83e5378 100644
--- a/libavcodec/mpeg12dec.c
+++ b/libavcodec/mpeg12dec.c

@@ -36,6 +36,7 @@
 #include "avcodec.h"
 #include "bytestream.h"
 #include "error_resilience.h"
+#include "hwaccel.h"
 #include "idctdsp.h"
 #include "internal.h"
 #include "mpeg_er.h"
@@ -47,7 +48,6 @@
 #include "profiles.h"
 #include "thread.h"
 #include "version.h"
-#include "vdpau_compat.h"
 #include "xvmc_internal.h"
 
 typedef struct Mpeg1Context {
@@ -649,16 +649,6 @@
         return 0;
 }
 
-static inline int get_qscale(MpegEncContext *s)
-{
-    int qscale = get_bits(&s->gb, 5);
-    if (s->q_scale_type)
-        return ff_mpeg2_non_linear_qscale[qscale];
-    else
-        return qscale << 1;
-}
-
-
 /* motion type (for MPEG-2) */
 #define MT_FIELD 1
 #define MT_FRAME 2
@@ -751,7 +741,7 @@
             s->interlaced_dct = get_bits1(&s->gb);
 
         if (IS_QUANT(mb_type))
-            s->qscale = get_qscale(s);
+            s->qscale = mpeg_get_qscale(s);
 
         if (s->concealment_motion_vectors) {
             /* just parse them */
@@ -819,7 +809,7 @@
             }
 
             if (IS_QUANT(mb_type))
-                s->qscale = get_qscale(s);
+                s->qscale = mpeg_get_qscale(s);
 
             s->last_mv[0][0][0] = 0;
             s->last_mv[0][0][1] = 0;
@@ -840,7 +830,7 @@
             }
 
             if (IS_QUANT(mb_type))
-                s->qscale = get_qscale(s);
+                s->qscale = mpeg_get_qscale(s);
 
             /* motion vectors */
             s->mv_dir = (mb_type >> 13) & 3;
@@ -1131,12 +1121,12 @@
 }
 
 static const enum AVPixelFormat mpeg1_hwaccel_pixfmt_list_420[] = {
+#if CONFIG_MPEG1_NVDEC_HWACCEL
+    AV_PIX_FMT_CUDA,
+#endif
 #if CONFIG_MPEG1_XVMC_HWACCEL
     AV_PIX_FMT_XVMC,
 #endif
-#if CONFIG_MPEG1_VDPAU_DECODER && FF_API_VDPAU
-    AV_PIX_FMT_VDPAU_MPEG1,
-#endif
 #if CONFIG_MPEG1_VDPAU_HWACCEL
     AV_PIX_FMT_VDPAU,
 #endif
@@ -1145,12 +1135,12 @@
 };
 
 static const enum AVPixelFormat mpeg2_hwaccel_pixfmt_list_420[] = {
+#if CONFIG_MPEG2_NVDEC_HWACCEL
+    AV_PIX_FMT_CUDA,
+#endif
 #if CONFIG_MPEG2_XVMC_HWACCEL
     AV_PIX_FMT_XVMC,
 #endif
-#if CONFIG_MPEG_VDPAU_DECODER && FF_API_VDPAU
-    AV_PIX_FMT_VDPAU_MPEG2,
-#endif
 #if CONFIG_MPEG2_VDPAU_HWACCEL
     AV_PIX_FMT_VDPAU,
 #endif
@@ -1181,12 +1171,6 @@
     AV_PIX_FMT_NONE
 };
 
-#if FF_API_VDPAU
-static inline int uses_vdpau(AVCodecContext *avctx) {
-    return avctx->pix_fmt == AV_PIX_FMT_VDPAU_MPEG1 || avctx->pix_fmt == AV_PIX_FMT_VDPAU_MPEG2;
-}
-#endif
-
 static enum AVPixelFormat mpeg_get_pixelformat(AVCodecContext *avctx)
 {
     Mpeg1Context *s1  = avctx->priv_data;
@@ -1211,11 +1195,7 @@
 static void setup_hwaccel_for_pixfmt(AVCodecContext *avctx)
 {
     // until then pix_fmt may be changed right after codec init
-    if (avctx->hwaccel
-#if FF_API_VDPAU
-        || uses_vdpau(avctx)
-#endif
-        )
+    if (avctx->hwaccel)
         if (avctx->idct_algo == FF_IDCT_AUTO)
             avctx->idct_algo = FF_IDCT_NONE;
 
@@ -1224,11 +1204,6 @@
         MpegEncContext *s = &s1->mpeg_enc_ctx;
 
         s->pack_pblocks = 1;
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
-        avctx->xvmc_acceleration = 2;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
     }
 }
 
@@ -1743,7 +1718,7 @@
     ff_mpeg1_clean_buffers(s);
     s->interlaced_dct = 0;
 
-    s->qscale = get_qscale(s);
+    s->qscale = mpeg_get_qscale(s);
 
     if (s->qscale == 0) {
         av_log(s->avctx, AV_LOG_ERROR, "qscale == 0\n");
@@ -2259,8 +2234,52 @@
             av_freep(&s1->a53_caption);
             s1->a53_caption_size = cc_count * 3;
             s1->a53_caption      = av_malloc(s1->a53_caption_size);
-            if (s1->a53_caption)
+            if (!s1->a53_caption) {
+                s1->a53_caption_size = 0;
+            } else {
                 memcpy(s1->a53_caption, p + 7, s1->a53_caption_size);
+            }
+            avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
+        }
+        return 1;
+    } else if (buf_size >= 2 &&
+               p[0] == 0x03 && (p[1]&0x7f) == 0x01) {
+        /* extract SCTE-20 CC data */
+        GetBitContext gb;
+        int cc_count = 0;
+        int i;
+
+        init_get_bits(&gb, p + 2, buf_size - 2);
+        cc_count = get_bits(&gb, 5);
+        if (cc_count > 0) {
+            av_freep(&s1->a53_caption);
+            s1->a53_caption_size = cc_count * 3;
+            s1->a53_caption      = av_mallocz(s1->a53_caption_size);
+            if (!s1->a53_caption) {
+                s1->a53_caption_size = 0;
+            } else {
+                uint8_t field, cc1, cc2;
+                uint8_t *cap = s1->a53_caption;
+                for (i = 0; i < cc_count && get_bits_left(&gb) >= 26; i++) {
+                    skip_bits(&gb, 2); // priority
+                    field = get_bits(&gb, 2);
+                    skip_bits(&gb, 5); // line_offset
+                    cc1 = get_bits(&gb, 8);
+                    cc2 = get_bits(&gb, 8);
+                    skip_bits(&gb, 1); // marker
+
+                    if (!field) { // forbidden
+                        cap[0] = cap[1] = cap[2] = 0x00;
+                    } else {
+                        field = (field == 2 ? 1 : 0);
+                        if (!s1->mpeg_enc_ctx.top_field_first) field = !field;
+                        cap[0] = 0x04 | field;
+                        cap[1] = ff_reverse[cc1];
+                        cap[2] = ff_reverse[cc2];
+                    }
+                    cap += 3;
+                }
+            }
             avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
         }
         return 1;
@@ -2302,7 +2321,9 @@
             av_freep(&s1->a53_caption);
             s1->a53_caption_size = cc_count * 6;
             s1->a53_caption      = av_malloc(s1->a53_caption_size);
-            if (s1->a53_caption) {
+            if (!s1->a53_caption) {
+                s1->a53_caption_size = 0;
+            } else {
                 uint8_t field1 = !!(p[4] & 0x80);
                 uint8_t *cap = s1->a53_caption;
                 p += 5;
@@ -2358,11 +2379,6 @@
         if (flags & 0x40) {
             if (buf_end - p < 1)
                 return;
-#if FF_API_AFD
-FF_DISABLE_DEPRECATION_WARNINGS
-            avctx->dtg_active_format = p[0] & 0x0f;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_AFD */
             s1->has_afd = 1;
             s1->afd     = p[0] & 0x0f;
         }
@@ -2462,12 +2478,6 @@
                         s2->er.error_count += s2->thread_context[i]->er.error_count;
                 }
 
-#if FF_API_VDPAU
-                if ((CONFIG_MPEG_VDPAU_DECODER || CONFIG_MPEG1_VDPAU_DECODER)
-                    && uses_vdpau(avctx))
-                    ff_vdpau_mpeg_picture_complete(s2, buf, buf_size, s->slice_count);
-#endif
-
                 ret = slice_end(avctx, picture);
                 if (ret < 0)
                     return ret;
@@ -2719,13 +2729,6 @@
                     return AVERROR_INVALIDDATA;
                 }
 
-#if FF_API_VDPAU
-                if (uses_vdpau(avctx)) {
-                    s->slice_count++;
-                    break;
-                }
-#endif
-
                 if (HAVE_THREADS &&
                     (avctx->active_thread_type & FF_THREAD_SLICE) &&
                     !avctx->hwaccel) {
@@ -2880,7 +2883,22 @@
     .caps_internal         = FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM,
     .flush                 = flush,
     .max_lowres            = 3,
-    .update_thread_context = ONLY_IF_THREADS_ENABLED(mpeg_decode_update_thread_context)
+    .update_thread_context = ONLY_IF_THREADS_ENABLED(mpeg_decode_update_thread_context),
+    .hw_configs            = (const AVCodecHWConfigInternal*[]) {
+#if CONFIG_MPEG1_NVDEC_HWACCEL
+                               HWACCEL_NVDEC(mpeg1),
+#endif
+#if CONFIG_MPEG1_VDPAU_HWACCEL
+                               HWACCEL_VDPAU(mpeg1),
+#endif
+#if CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL
+                               HWACCEL_VIDEOTOOLBOX(mpeg1),
+#endif
+#if CONFIG_MPEG1_XVMC_HWACCEL
+                               HWACCEL_XVMC(mpeg1),
+#endif
+                               NULL
+                           },
 };
 
 AVCodec ff_mpeg2video_decoder = {
@@ -2899,6 +2917,33 @@
     .flush          = flush,
     .max_lowres     = 3,
     .profiles       = NULL_IF_CONFIG_SMALL(ff_mpeg2_video_profiles),
+    .hw_configs     = (const AVCodecHWConfigInternal*[]) {
+#if CONFIG_MPEG2_DXVA2_HWACCEL
+                        HWACCEL_DXVA2(mpeg2),
+#endif
+#if CONFIG_MPEG2_D3D11VA_HWACCEL
+                        HWACCEL_D3D11VA(mpeg2),
+#endif
+#if CONFIG_MPEG2_D3D11VA2_HWACCEL
+                        HWACCEL_D3D11VA2(mpeg2),
+#endif
+#if CONFIG_MPEG2_NVDEC_HWACCEL
+                        HWACCEL_NVDEC(mpeg2),
+#endif
+#if CONFIG_MPEG2_VAAPI_HWACCEL
+                        HWACCEL_VAAPI(mpeg2),
+#endif
+#if CONFIG_MPEG2_VDPAU_HWACCEL
+                        HWACCEL_VDPAU(mpeg2),
+#endif
+#if CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL
+                        HWACCEL_VIDEOTOOLBOX(mpeg2),
+#endif
+#if CONFIG_MPEG2_XVMC_HWACCEL
+                        HWACCEL_XVMC(mpeg2),
+#endif
+                        NULL
+                    },
 };
 
 //legacy decoder
@@ -2916,73 +2961,3 @@
     .flush          = flush,
     .max_lowres     = 3,
 };
-
-#if FF_API_XVMC
-#if CONFIG_MPEG_XVMC_DECODER
-FF_DISABLE_DEPRECATION_WARNINGS
-static av_cold int mpeg_mc_decode_init(AVCodecContext *avctx)
-{
-    if (avctx->active_thread_type & FF_THREAD_SLICE)
-        return -1;
-    if (!(avctx->slice_flags & SLICE_FLAG_CODED_ORDER))
-        return -1;
-    if (!(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD)) {
-        ff_dlog(avctx, "mpeg12.c: XvMC decoder will work better if SLICE_FLAG_ALLOW_FIELD is set\n");
-    }
-    mpeg_decode_init(avctx);
-
-    avctx->pix_fmt           = AV_PIX_FMT_XVMC_MPEG2_IDCT;
-    avctx->xvmc_acceleration = 2; // 2 - the blocks are packed!
-
-    return 0;
-}
-
-AVCodec ff_mpeg_xvmc_decoder = {
-    .name           = "mpegvideo_xvmc",
-    .long_name      = NULL_IF_CONFIG_SMALL("MPEG-1/2 video XvMC (X-Video Motion Compensation)"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_MPEG2VIDEO_XVMC,
-    .priv_data_size = sizeof(Mpeg1Context),
-    .init           = mpeg_mc_decode_init,
-    .close          = mpeg_decode_end,
-    .decode         = mpeg_decode_frame,
-    .capabilities   = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1 |
-                      AV_CODEC_CAP_TRUNCATED | CODEC_CAP_HWACCEL |
-                      AV_CODEC_CAP_DELAY,
-    .flush          = flush,
-};
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-#endif /* FF_API_XVMC */
-
-#if CONFIG_MPEG_VDPAU_DECODER && FF_API_VDPAU
-AVCodec ff_mpeg_vdpau_decoder = {
-    .name           = "mpegvideo_vdpau",
-    .long_name      = NULL_IF_CONFIG_SMALL("MPEG-1/2 video (VDPAU acceleration)"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_MPEG2VIDEO,
-    .priv_data_size = sizeof(Mpeg1Context),
-    .init           = mpeg_decode_init,
-    .close          = mpeg_decode_end,
-    .decode         = mpeg_decode_frame,
-    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_TRUNCATED |
-                      AV_CODEC_CAP_HWACCEL_VDPAU | AV_CODEC_CAP_DELAY,
-    .flush          = flush,
-};
-#endif
-
-#if CONFIG_MPEG1_VDPAU_DECODER && FF_API_VDPAU
-AVCodec ff_mpeg1_vdpau_decoder = {
-    .name           = "mpeg1video_vdpau",
-    .long_name      = NULL_IF_CONFIG_SMALL("MPEG-1 video (VDPAU acceleration)"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_MPEG1VIDEO,
-    .priv_data_size = sizeof(Mpeg1Context),
-    .init           = mpeg_decode_init,
-    .close          = mpeg_decode_end,
-    .decode         = mpeg_decode_frame,
-    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_TRUNCATED |
-                      AV_CODEC_CAP_HWACCEL_VDPAU | AV_CODEC_CAP_DELAY,
-    .flush          = flush,
-};
-#endif

diff --git a/libavcodec/mpeg12enc.c b/libavcodec/mpeg12enc.c
index f45598a..d0b458e 100644
--- a/libavcodec/mpeg12enc.c
+++ b/libavcodec/mpeg12enc.c

@@ -348,12 +348,13 @@
                                 height != s->height ||
                                 s->avctx->color_primaries != AVCOL_PRI_UNSPECIFIED ||
                                 s->avctx->color_trc != AVCOL_TRC_UNSPECIFIED ||
-                                s->avctx->colorspace != AVCOL_SPC_UNSPECIFIED);
+                                s->avctx->colorspace != AVCOL_SPC_UNSPECIFIED ||
+                                s->video_format != VIDEO_FORMAT_UNSPECIFIED);
 
             if (s->seq_disp_ext == 1 || (s->seq_disp_ext == -1 && use_seq_disp_ext)) {
                 put_header(s, EXT_START_CODE);
                 put_bits(&s->pb, 4, 2);                         // sequence display extension
-                put_bits(&s->pb, 3, 0);                         // video_format: 0 is components
+                put_bits(&s->pb, 3, s->video_format);           // video_format
                 put_bits(&s->pb, 1, 1);                         // colour_description
                 put_bits(&s->pb, 8, s->avctx->color_primaries); // colour_primaries
                 put_bits(&s->pb, 8, s->avctx->color_trc);       // transfer_characteristics
@@ -1125,6 +1126,13 @@
     {     "auto",   NULL, 0, AV_OPT_TYPE_CONST,  {.i64 = -1},  0, 0, VE, "seq_disp_ext" },
     {     "never",  NULL, 0, AV_OPT_TYPE_CONST,  {.i64 = 0 },  0, 0, VE, "seq_disp_ext" },
     {     "always", NULL, 0, AV_OPT_TYPE_CONST,  {.i64 = 1 },  0, 0, VE, "seq_disp_ext" },
+    { "video_format",     "Video_format in the sequence_display_extension indicating the source of the video.", OFFSET(video_format), AV_OPT_TYPE_INT, { .i64 = VIDEO_FORMAT_UNSPECIFIED }, 0, 7, VE, "video_format" },
+    {     "component",    NULL, 0, AV_OPT_TYPE_CONST,  {.i64 = VIDEO_FORMAT_COMPONENT  },  0, 0, VE, "video_format" },
+    {     "pal",          NULL, 0, AV_OPT_TYPE_CONST,  {.i64 = VIDEO_FORMAT_PAL        },  0, 0, VE, "video_format" },
+    {     "ntsc",         NULL, 0, AV_OPT_TYPE_CONST,  {.i64 = VIDEO_FORMAT_NTSC       },  0, 0, VE, "video_format" },
+    {     "secam",        NULL, 0, AV_OPT_TYPE_CONST,  {.i64 = VIDEO_FORMAT_SECAM      },  0, 0, VE, "video_format" },
+    {     "mac",          NULL, 0, AV_OPT_TYPE_CONST,  {.i64 = VIDEO_FORMAT_MAC        },  0, 0, VE, "video_format" },
+    {     "unspecified",  NULL, 0, AV_OPT_TYPE_CONST,  {.i64 = VIDEO_FORMAT_UNSPECIFIED},  0, 0, VE, "video_format" },
     FF_MPV_COMMON_OPTS
     { NULL },
 };

diff --git a/libavcodec/mpeg12framerate.c b/libavcodec/mpeg12framerate.c
index 094cd18..ab3d351 100644
--- a/libavcodec/mpeg12framerate.c
+++ b/libavcodec/mpeg12framerate.c

@@ -18,6 +18,9 @@
 
 #include "libavutil/rational.h"
 
+#include "mpeg12.h"
+#include "mpeg12data.h"
+
 const AVRational ff_mpeg12_frame_rate_tab[16] = {
     {    0,    0},
     {24000, 1001},
@@ -37,3 +40,64 @@
     {   15,    1},
     {    0,    0},
 };
+
+void ff_mpeg12_find_best_frame_rate(AVRational frame_rate,
+                                    int *code, int *ext_n, int *ext_d,
+                                    int nonstandard)
+{
+    int mpeg2 = ext_n && ext_d;
+    int max_code = nonstandard ? 12 : 8;
+    int c, n, d, best_c, best_n, best_d;
+    AVRational best_error = { INT_MAX, 1 };
+
+    // Default to NTSC if the inputs make no sense.
+    best_c = 4;
+    best_n = best_d = 1;
+
+    for (c = 1; c <= max_code; c++) {
+        if (av_cmp_q(frame_rate, ff_mpeg12_frame_rate_tab[c]) == 0) {
+            best_c = c;
+            goto found;
+        }
+    }
+
+    for (c = 1; c <= max_code; c++) {
+        for (n = 1; n <= (mpeg2 ? 4 : 1); n++) {
+            for (d = 1; d <= (mpeg2 ? 32 : 1); d++) {
+                AVRational test, error;
+                int cmp;
+
+                test = av_mul_q(ff_mpeg12_frame_rate_tab[c],
+                                (AVRational) { n, d });
+
+                cmp = av_cmp_q(test, frame_rate);
+                if (cmp == 0) {
+                    best_c = c;
+                    best_n = n;
+                    best_d = d;
+                    goto found;
+                }
+
+                if (cmp < 0)
+                    error = av_div_q(frame_rate, test);
+                else
+                    error = av_div_q(test, frame_rate);
+
+                cmp = av_cmp_q(error, best_error);
+                if (cmp < 0 || (cmp == 0 && n == 1 && d == 1)) {
+                    best_c = c;
+                    best_n = n;
+                    best_d = d;
+                    best_error = error;
+                }
+            }
+        }
+    }
+
+found:
+    *code = best_c;
+    if (mpeg2) {
+        *ext_n = best_n - 1;
+        *ext_d = best_d - 1;
+    }
+}

diff --git a/libavcodec/mpeg2_metadata_bsf.c b/libavcodec/mpeg2_metadata_bsf.c
new file mode 100644
index 0000000..e787cb3
--- /dev/null
+++ b/libavcodec/mpeg2_metadata_bsf.c

@@ -0,0 +1,314 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "bsf.h"
+#include "cbs.h"
+#include "cbs_mpeg2.h"
+#include "mpeg12.h"
+
+typedef struct MPEG2MetadataContext {
+    const AVClass *class;
+
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment fragment;
+
+    MPEG2RawExtensionData sequence_display_extension;
+
+    AVRational display_aspect_ratio;
+
+    AVRational frame_rate;
+
+    int video_format;
+    int colour_primaries;
+    int transfer_characteristics;
+    int matrix_coefficients;
+
+    int mpeg1_warned;
+} MPEG2MetadataContext;
+
+
+static int mpeg2_metadata_update_fragment(AVBSFContext *bsf,
+                                          CodedBitstreamFragment *frag)
+{
+    MPEG2MetadataContext             *ctx = bsf->priv_data;
+    MPEG2RawSequenceHeader            *sh = NULL;
+    MPEG2RawSequenceExtension         *se = NULL;
+    MPEG2RawSequenceDisplayExtension *sde = NULL;
+    int i, se_pos, add_sde = 0;
+
+    for (i = 0; i < frag->nb_units; i++) {
+        if (frag->units[i].type == MPEG2_START_SEQUENCE_HEADER) {
+            sh = frag->units[i].content;
+        } else if (frag->units[i].type == MPEG2_START_EXTENSION) {
+            MPEG2RawExtensionData *ext = frag->units[i].content;
+            if (ext->extension_start_code_identifier ==
+                MPEG2_EXTENSION_SEQUENCE) {
+                se = &ext->data.sequence;
+                se_pos = i;
+            } else if (ext->extension_start_code_identifier ==
+                MPEG2_EXTENSION_SEQUENCE_DISPLAY) {
+                sde = &ext->data.sequence_display;
+            }
+        }
+    }
+
+    if (!sh || !se) {
+        // No sequence header and sequence extension: not an MPEG-2 video
+        // sequence.
+        if (sh && !ctx->mpeg1_warned) {
+            av_log(bsf, AV_LOG_WARNING, "Stream contains a sequence "
+                   "header but not a sequence extension: maybe it's "
+                   "actually MPEG-1?\n");
+            ctx->mpeg1_warned = 1;
+        }
+        return 0;
+    }
+
+    if (ctx->display_aspect_ratio.num && ctx->display_aspect_ratio.den) {
+        int num, den;
+
+        av_reduce(&num, &den, ctx->display_aspect_ratio.num,
+                  ctx->display_aspect_ratio.den, 65535);
+
+        if (num == 4 && den == 3)
+            sh->aspect_ratio_information = 2;
+        else if (num == 16 && den == 9)
+            sh->aspect_ratio_information = 3;
+        else if (num == 221 && den == 100)
+            sh->aspect_ratio_information = 4;
+        else
+            sh->aspect_ratio_information = 1;
+    }
+
+    if (ctx->frame_rate.num && ctx->frame_rate.den) {
+        int code, ext_n, ext_d;
+
+        ff_mpeg12_find_best_frame_rate(ctx->frame_rate,
+                                       &code, &ext_n, &ext_d, 0);
+
+        sh->frame_rate_code        = code;
+        se->frame_rate_extension_n = ext_n;
+        se->frame_rate_extension_d = ext_d;
+    }
+
+    if (ctx->video_format             >= 0 ||
+        ctx->colour_primaries         >= 0 ||
+        ctx->transfer_characteristics >= 0 ||
+        ctx->matrix_coefficients      >= 0) {
+        if (!sde) {
+            add_sde = 1;
+            ctx->sequence_display_extension.extension_start_code =
+                MPEG2_START_EXTENSION;
+            ctx->sequence_display_extension.extension_start_code_identifier =
+                MPEG2_EXTENSION_SEQUENCE_DISPLAY;
+            sde = &ctx->sequence_display_extension.data.sequence_display;
+
+            *sde = (MPEG2RawSequenceDisplayExtension) {
+                .video_format = 5,
+
+                .colour_description       = 0,
+                .colour_primaries         = 2,
+                .transfer_characteristics = 2,
+                .matrix_coefficients      = 2,
+
+                .display_horizontal_size =
+                    se->horizontal_size_extension << 12 | sh->horizontal_size_value,
+                .display_vertical_size =
+                    se->vertical_size_extension << 12 | sh->vertical_size_value,
+            };
+        }
+
+        if (ctx->video_format >= 0)
+            sde->video_format = ctx->video_format;
+
+        if (ctx->colour_primaries         >= 0 ||
+            ctx->transfer_characteristics >= 0 ||
+            ctx->matrix_coefficients      >= 0) {
+            sde->colour_description = 1;
+
+            if (ctx->colour_primaries >= 0)
+                sde->colour_primaries = ctx->colour_primaries;
+            else if (add_sde)
+                sde->colour_primaries = 2;
+
+            if (ctx->transfer_characteristics >= 0)
+                sde->transfer_characteristics = ctx->transfer_characteristics;
+            else if (add_sde)
+                sde->transfer_characteristics = 2;
+
+            if (ctx->matrix_coefficients >= 0)
+                sde->matrix_coefficients = ctx->matrix_coefficients;
+            else if (add_sde)
+                sde->matrix_coefficients = 2;
+        }
+    }
+
+    if (add_sde) {
+        int err;
+
+        err = ff_cbs_insert_unit_content(ctx->cbc, frag, se_pos + 1,
+                                         MPEG2_START_EXTENSION,
+                                         &ctx->sequence_display_extension,
+                                         NULL);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to insert new sequence "
+                   "display extension.\n");
+            return err;
+        }
+    }
+
+    return 0;
+}
+
+static int mpeg2_metadata_filter(AVBSFContext *bsf, AVPacket *out)
+{
+    MPEG2MetadataContext *ctx = bsf->priv_data;
+    AVPacket *in = NULL;
+    CodedBitstreamFragment *frag = &ctx->fragment;
+    int err;
+
+    err = ff_bsf_get_packet(bsf, &in);
+    if (err < 0)
+        return err;
+
+    err = ff_cbs_read_packet(ctx->cbc, frag, in);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to read packet.\n");
+        goto fail;
+    }
+
+    err = mpeg2_metadata_update_fragment(bsf, frag);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to update frame fragment.\n");
+        goto fail;
+    }
+
+    err = ff_cbs_write_packet(ctx->cbc, out, frag);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
+        goto fail;
+    }
+
+    err = av_packet_copy_props(out, in);
+    if (err < 0)
+        goto fail;
+
+    err = 0;
+fail:
+    ff_cbs_fragment_uninit(ctx->cbc, frag);
+
+    if (err < 0)
+        av_packet_unref(out);
+    av_packet_free(&in);
+
+    return err;
+}
+
+static int mpeg2_metadata_init(AVBSFContext *bsf)
+{
+    MPEG2MetadataContext *ctx = bsf->priv_data;
+    CodedBitstreamFragment *frag = &ctx->fragment;
+    int err;
+
+    err = ff_cbs_init(&ctx->cbc, AV_CODEC_ID_MPEG2VIDEO, bsf);
+    if (err < 0)
+        return err;
+
+    if (bsf->par_in->extradata) {
+        err = ff_cbs_read_extradata(ctx->cbc, frag, bsf->par_in);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to read extradata.\n");
+            goto fail;
+        }
+
+        err = mpeg2_metadata_update_fragment(bsf, frag);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to update metadata fragment.\n");
+            goto fail;
+        }
+
+        err = ff_cbs_write_extradata(ctx->cbc, bsf->par_out, frag);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to write extradata.\n");
+            goto fail;
+        }
+    }
+
+    err = 0;
+fail:
+    ff_cbs_fragment_uninit(ctx->cbc, frag);
+    return err;
+}
+
+static void mpeg2_metadata_close(AVBSFContext *bsf)
+{
+    MPEG2MetadataContext *ctx = bsf->priv_data;
+    ff_cbs_close(&ctx->cbc);
+}
+
+#define OFFSET(x) offsetof(MPEG2MetadataContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_BSF_PARAM)
+static const AVOption mpeg2_metadata_options[] = {
+    { "display_aspect_ratio", "Set display aspect ratio (table 6-3)",
+        OFFSET(display_aspect_ratio), AV_OPT_TYPE_RATIONAL,
+        { .dbl = 0.0 }, 0, 65535, FLAGS },
+
+    { "frame_rate", "Set frame rate",
+        OFFSET(frame_rate), AV_OPT_TYPE_RATIONAL,
+        { .dbl = 0.0 }, 0, UINT_MAX, FLAGS },
+
+    { "video_format", "Set video format (table 6-6)",
+        OFFSET(video_format), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 7, FLAGS },
+    { "colour_primaries", "Set colour primaries (table 6-7)",
+        OFFSET(colour_primaries), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 255, FLAGS },
+    { "transfer_characteristics", "Set transfer characteristics (table 6-8)",
+        OFFSET(transfer_characteristics), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 255, FLAGS },
+    { "matrix_coefficients", "Set matrix coefficients (table 6-9)",
+        OFFSET(matrix_coefficients), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 255, FLAGS },
+
+    { NULL }
+};
+
+static const AVClass mpeg2_metadata_class = {
+    .class_name = "mpeg2_metadata_bsf",
+    .item_name  = av_default_item_name,
+    .option     = mpeg2_metadata_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const enum AVCodecID mpeg2_metadata_codec_ids[] = {
+    AV_CODEC_ID_MPEG2VIDEO, AV_CODEC_ID_NONE,
+};
+
+const AVBitStreamFilter ff_mpeg2_metadata_bsf = {
+    .name           = "mpeg2_metadata",
+    .priv_data_size = sizeof(MPEG2MetadataContext),
+    .priv_class     = &mpeg2_metadata_class,
+    .init           = &mpeg2_metadata_init,
+    .close          = &mpeg2_metadata_close,
+    .filter         = &mpeg2_metadata_filter,
+    .codec_ids      = mpeg2_metadata_codec_ids,
+};

diff --git a/libavcodec/mpeg4_unpack_bframes_bsf.c b/libavcodec/mpeg4_unpack_bframes_bsf.c
index e227f58..e9c535f 100644
--- a/libavcodec/mpeg4_unpack_bframes_bsf.c
+++ b/libavcodec/mpeg4_unpack_bframes_bsf.c

@@ -24,8 +24,7 @@
 #include "mpeg4video.h"
 
 typedef struct UnpackBFramesBSFContext {
-    uint8_t *b_frame_buf;
-    int      b_frame_buf_size;
+    AVPacket *b_frame;
 } UnpackBFramesBSFContext;
 
 /* search next start code */
@@ -71,18 +70,6 @@
     }
 }
 
-/* allocate new buffer and copy size bytes from src */
-static uint8_t *create_new_buffer(const uint8_t *src, int size) {
-    uint8_t *dst = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
-
-    if (dst) {
-        memcpy(dst, src, size);
-        memset(dst + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
-    }
-
-    return dst;
-}
-
 static int mpeg4_unpack_bframes_filter(AVBSFContext *ctx, AVPacket *out)
 {
     UnpackBFramesBSFContext *s = ctx->priv_data;
@@ -97,20 +84,18 @@
     av_log(ctx, AV_LOG_DEBUG, "Found %d VOP startcode(s) in this packet.\n", nb_vop);
 
     if (pos_vop2 >= 0) {
-        if (s->b_frame_buf) {
+        if (s->b_frame->data) {
             av_log(ctx, AV_LOG_WARNING,
                    "Missing one N-VOP packet, discarding one B-frame.\n");
-            av_freep(&s->b_frame_buf);
-            s->b_frame_buf_size = 0;
+            av_packet_unref(s->b_frame);
         }
         /* store the packed B-frame in the BSFContext */
-        s->b_frame_buf_size = in->size - pos_vop2;
-        s->b_frame_buf      = create_new_buffer(in->data + pos_vop2, s->b_frame_buf_size);
-        if (!s->b_frame_buf) {
-            s->b_frame_buf_size = 0;
-            av_packet_free(&in);
-            return AVERROR(ENOMEM);
+        ret = av_packet_ref(s->b_frame, in);
+        if (ret < 0) {
+            goto fail;
         }
+        s->b_frame->size -= pos_vop2;
+        s->b_frame->data += pos_vop2;
     }
 
     if (nb_vop > 2) {
@@ -118,40 +103,31 @@
        "Found %d VOP headers in one packet, only unpacking one.\n", nb_vop);
     }
 
-    if (nb_vop == 1 && s->b_frame_buf) {
+    if (nb_vop == 1 && s->b_frame->data) {
         /* use frame from BSFContext */
+        av_packet_move_ref(out, s->b_frame);
+
+        /* use properties from current input packet */
         ret = av_packet_copy_props(out, in);
         if (ret < 0) {
-            av_packet_free(&in);
-            return ret;
+            goto fail;
         }
 
-        ret = av_packet_from_data(out, s->b_frame_buf, s->b_frame_buf_size);
-        if (ret < 0) {
-            av_packet_free(&in);
-            return ret;
-        }
         if (in->size <= MAX_NVOP_SIZE) {
             /* N-VOP */
             av_log(ctx, AV_LOG_DEBUG, "Skipping N-VOP.\n");
-            s->b_frame_buf      = NULL;
-            s->b_frame_buf_size = 0;
         } else {
             /* copy packet into BSFContext */
-            s->b_frame_buf_size = in->size;
-            s->b_frame_buf      = create_new_buffer(in->data, in->size);
-            if (!s->b_frame_buf) {
-                s->b_frame_buf_size = 0;
-                av_packet_unref(out);
-                av_packet_free(&in);
-                return AVERROR(ENOMEM);
-            }
+            av_packet_move_ref(s->b_frame, in);
         }
     } else if (nb_vop >= 2) {
         /* use first frame of the packet */
         av_packet_move_ref(out, in);
         out->size = pos_vop2;
     } else if (pos_p >= 0) {
+        ret = av_packet_make_writable(in);
+        if (ret < 0)
+            goto fail;
         av_log(ctx, AV_LOG_DEBUG, "Updating DivX userdata (remove trailing 'p').\n");
         av_packet_move_ref(out, in);
         /* remove 'p' (packed) from the end of the (DivX) userdata string */
@@ -161,13 +137,22 @@
         av_packet_move_ref(out, in);
     }
 
+fail:
+    if (ret < 0)
+        av_packet_unref(out);
     av_packet_free(&in);
 
-    return 0;
+    return ret;
 }
 
 static int mpeg4_unpack_bframes_init(AVBSFContext *ctx)
 {
+    UnpackBFramesBSFContext *s = ctx->priv_data;
+
+    s->b_frame = av_packet_alloc();
+    if (!s->b_frame)
+        return AVERROR(ENOMEM);
+
     if (ctx->par_in->extradata) {
         int pos_p_ext = -1;
         scan_buffer(ctx->par_in->extradata, ctx->par_in->extradata_size, &pos_p_ext, NULL, NULL);
@@ -181,10 +166,16 @@
     return 0;
 }
 
+static void mpeg4_unpack_bframes_flush(AVBSFContext *bsfc)
+{
+    UnpackBFramesBSFContext *ctx = bsfc->priv_data;
+    av_packet_unref(ctx->b_frame);
+}
+
 static void mpeg4_unpack_bframes_close(AVBSFContext *bsfc)
 {
     UnpackBFramesBSFContext *ctx = bsfc->priv_data;
-    av_freep(&ctx->b_frame_buf);
+    av_packet_free(&ctx->b_frame);
 }
 
 static const enum AVCodecID codec_ids[] = {
@@ -196,6 +187,7 @@
     .priv_data_size = sizeof(UnpackBFramesBSFContext),
     .init           = mpeg4_unpack_bframes_init,
     .filter         = mpeg4_unpack_bframes_filter,
+    .flush          = mpeg4_unpack_bframes_flush,
     .close          = mpeg4_unpack_bframes_close,
     .codec_ids      = codec_ids,
 };

diff --git a/libavcodec/mpeg4audio.c b/libavcodec/mpeg4audio.c
index b6bb323..2197147 100644
--- a/libavcodec/mpeg4audio.c
+++ b/libavcodec/mpeg4audio.c

@@ -167,43 +167,3 @@
 
     return ff_mpeg4audio_get_config_gb(c, &gb, sync_extension);
 }
-
-static av_always_inline unsigned int copy_bits(PutBitContext *pb,
-                                               GetBitContext *gb,
-                                               int bits)
-{
-    unsigned int el = get_bits(gb, bits);
-    put_bits(pb, bits, el);
-    return el;
-}
-
-int avpriv_copy_pce_data(PutBitContext *pb, GetBitContext *gb)
-{
-    int five_bit_ch, four_bit_ch, comment_size, bits;
-    int offset = put_bits_count(pb);
-
-    copy_bits(pb, gb, 10);                  //Tag, Object Type, Frequency
-    five_bit_ch  = copy_bits(pb, gb, 4);    //Front
-    five_bit_ch += copy_bits(pb, gb, 4);    //Side
-    five_bit_ch += copy_bits(pb, gb, 4);    //Back
-    four_bit_ch  = copy_bits(pb, gb, 2);    //LFE
-    four_bit_ch += copy_bits(pb, gb, 3);    //Data
-    five_bit_ch += copy_bits(pb, gb, 4);    //Coupling
-    if (copy_bits(pb, gb, 1))               //Mono Mixdown
-        copy_bits(pb, gb, 4);
-    if (copy_bits(pb, gb, 1))               //Stereo Mixdown
-        copy_bits(pb, gb, 4);
-    if (copy_bits(pb, gb, 1))               //Matrix Mixdown
-        copy_bits(pb, gb, 3);
-    for (bits = five_bit_ch*5+four_bit_ch*4; bits > 16; bits -= 16)
-        copy_bits(pb, gb, 16);
-    if (bits)
-        copy_bits(pb, gb, bits);
-    avpriv_align_put_bits(pb);
-    align_get_bits(gb);
-    comment_size = copy_bits(pb, gb, 8);
-    for (; comment_size > 0; comment_size--)
-        copy_bits(pb, gb, 8);
-
-    return put_bits_count(pb) - offset;
-}

diff --git a/libavcodec/mpeg4audio.h b/libavcodec/mpeg4audio.h
index 8fd32f9..b9cea8a 100644
--- a/libavcodec/mpeg4audio.h
+++ b/libavcodec/mpeg4audio.h

@@ -23,7 +23,11 @@
 #define AVCODEC_MPEG4AUDIO_H
 
 #include <stdint.h>
+
+#include "libavutil/attributes.h"
+
 #include "get_bits.h"
+#include "internal.h"
 #include "put_bits.h"
 
 typedef struct MPEG4AudioConfig {
@@ -41,7 +45,7 @@
     int frame_length_short;
 } MPEG4AudioConfig;
 
-extern av_export const int avpriv_mpeg4audio_sample_rates[16];
+extern av_export_avcodec const int avpriv_mpeg4audio_sample_rates[16];
 extern const uint8_t ff_mpeg4audio_channels[8];
 
 /**
@@ -115,6 +119,44 @@
 #define MAX_PCE_SIZE 320 ///<Maximum size of a PCE including the 3-bit ID_PCE
                          ///<marker and the comment
 
-int avpriv_copy_pce_data(PutBitContext *pb, GetBitContext *gb);
+static av_always_inline unsigned int ff_pce_copy_bits(PutBitContext *pb,
+                                                      GetBitContext *gb,
+                                                      int bits)
+{
+    unsigned int el = get_bits(gb, bits);
+    put_bits(pb, bits, el);
+    return el;
+}
+
+static inline int ff_copy_pce_data(PutBitContext *pb, GetBitContext *gb)
+{
+    int five_bit_ch, four_bit_ch, comment_size, bits;
+    int offset = put_bits_count(pb);
+
+    ff_pce_copy_bits(pb, gb, 10);               // Tag, Object Type, Frequency
+    five_bit_ch  = ff_pce_copy_bits(pb, gb, 4); // Front
+    five_bit_ch += ff_pce_copy_bits(pb, gb, 4); // Side
+    five_bit_ch += ff_pce_copy_bits(pb, gb, 4); // Back
+    four_bit_ch  = ff_pce_copy_bits(pb, gb, 2); // LFE
+    four_bit_ch += ff_pce_copy_bits(pb, gb, 3); // Data
+    five_bit_ch += ff_pce_copy_bits(pb, gb, 4); // Coupling
+    if (ff_pce_copy_bits(pb, gb, 1))            // Mono Mixdown
+        ff_pce_copy_bits(pb, gb, 4);
+    if (ff_pce_copy_bits(pb, gb, 1))            // Stereo Mixdown
+        ff_pce_copy_bits(pb, gb, 4);
+    if (ff_pce_copy_bits(pb, gb, 1))            // Matrix Mixdown
+        ff_pce_copy_bits(pb, gb, 3);
+    for (bits = five_bit_ch*5+four_bit_ch*4; bits > 16; bits -= 16)
+        ff_pce_copy_bits(pb, gb, 16);
+    if (bits)
+        ff_pce_copy_bits(pb, gb, bits);
+    avpriv_align_put_bits(pb);
+    align_get_bits(gb);
+    comment_size = ff_pce_copy_bits(pb, gb, 8);
+    for (; comment_size > 0; comment_size--)
+        ff_pce_copy_bits(pb, gb, 8);
+
+    return put_bits_count(pb) - offset;
+}
 
 #endif /* AVCODEC_MPEG4AUDIO_H */

diff --git a/libavcodec/mpeg4data.h b/libavcodec/mpeg4data.h
index b7c3fab..4756e9e 100644
--- a/libavcodec/mpeg4data.h
+++ b/libavcodec/mpeg4data.h

@@ -373,4 +373,120 @@
     99, 13, 15, 17, 19, 21, 23, 0
 };
 
+/* Note these are different in studio mode */
+const uint16_t ff_mpeg4_studio_dc_luma[19][2]={
+    {0x0e,  6}, {0x06,  5}, {0x00,  4}, {0x02,  4},
+    {0x07,  3}, {0x05,  3}, {0x03,  3}, {0x02,  3},
+    {0x04,  3}, {0x06,  3}, {0x01,  4}, {0x1e,  7},
+    {0x3e,  8}, {0x7e,  9}, {0xfe, 10}, {0x1fe, 11},
+    {0x3fe, 12}, {0x7fe, 13}, {0x7ff, 13}
+};
+
+const uint16_t ff_mpeg4_studio_dc_chroma[19][2]={
+    {0x00,  4}, {0x02,  4}, {0x07,  3}, {0x05,  3},
+    {0x03,  3}, {0x02,  3}, {0x04,  3}, {0x06,  3},
+    {0x01,  4}, {0x06,  5}, {0x0e,  6}, {0x1e,  7},
+    {0x3e,  8}, {0x7e,  9}, {0xfe, 10}, {0x1fe, 11},
+    {0x3fe, 12}, {0x7fe, 13}, {0x7ff, 13}
+};
+
+const uint16_t ff_mpeg4_studio_intra[12][22][2]={
+    {
+        {0x05,  4}, {0x04,  4}, {0x05,  7}, {0x09,  9},
+        {0x21, 11}, {0x41, 12}, {0x81, 13}, {0x03,  4},
+        {0x03,  5}, {0x05,  6}, {0x04,  7}, {0x03,  7},
+        {0x05,  8}, {0x03,  2}, {0x05,  3}, {0x04,  3},
+        {0x03,  3}, {0x02,  4}, {0x04,  6}, {0x03,  6},
+        {0x11, 10}, {0x80, 13}
+    },
+    {
+        {0x00,  0}, {0x00,  0}, {0x00,  0}, {0x00,  0},
+        {0x00,  0}, {0x00,  0}, {0x00,  0}, {0x00,  0},
+        {0x00,  0}, {0x00,  0}, {0x00,  0}, {0x00,  0},
+        {0x00,  0}, {0x00,  0}, {0x01,  1}, {0x01,  2},
+        {0x01,  3}, {0x01,  4}, {0x01,  5}, {0x03,  7},
+        {0x05,  8}, {0x04,  8}
+    },
+    {
+        {0x05,  3},  {0x03,  5},  {0x02,  5},  {0x03,  7},
+        {0x09,  9},  {0x103, 14}, {0x102, 14}, {0x04,  3},
+        {0x03,  3},  {0x03,  4},  {0x02,  4},  {0x03,  6},
+        {0x11, 10},  {0x03,  2},  {0x02,  3},  {0x02,  6},
+        {0x05,  8},  {0x21, 11},  {0x83, 13},  {0x101, 14},
+        {0x201, 15}, {0x82, 13}
+    },
+    {
+        {0x05,  5}, {0x05,  4}, {0x04,  5}, {0x03,  6},
+        {0x09,  9}, {0x83, 13}, {0x82, 13}, {0x03,  3},
+        {0x04,  4}, {0x03,  4}, {0x03,  5}, {0x05,  8},
+        {0x81, 13}, {0x03,  2}, {0x02,  2}, {0x02,  5},
+        {0x02,  6}, {0x03,  7}, {0x11, 10}, {0x43, 12},
+        {0x80, 13}, {0x42, 12}
+    },
+    {
+        {0x05,  7},  {0x03,  4}, {0x03,  5},  {0x04,  7},
+        {0x09,  9},  {0x83, 13}, {0x101, 14}, {0x03,  3},
+        {0x02,  4},  {0x05,  6}, {0x03,  7},  {0x11, 10},
+        {0x201, 15}, {0x03,  2}, {0x02,  2},  {0x02,  3},
+        {0x04,  6},  {0x03,  6}, {0x05,  8},  {0x21, 11},
+        {0x82, 13},  {0x81, 13}
+    },
+    {
+        {0x13, 10},  {0x03,  5}, {0x05,  7}, {0x12, 10},
+        {0x43, 12},  {0x83, 13}, {0x82, 13}, {0x02,  5},
+        {0x04,  7},  {0x05,  8}, {0x23, 11}, {0x81, 13},
+        {0x101, 14}, {0x03,  2}, {0x02,  2}, {0x01,  2},
+        {0x01,  3},  {0x03,  6}, {0x03,  7}, {0x22, 11},
+        {0x201, 15}, {0x42, 12}
+    },
+    {
+        {0x23, 11},  {0x01,  4},  {0x07,  8},  {0x13, 10},
+        {0x22, 11},  {0x103, 14}, {0x102, 14}, {0x03,  6},
+        {0x06,  8},  {0x12, 10},  {0x43, 12},  {0x101, 14},
+        {0x201, 15}, {0x03,  3},  {0x02,  3},  {0x03,  2},
+        {0x02,  2},  {0x01,  3},  {0x02,  6},  {0x05,  8},
+        {0x42, 12},  {0x41, 12}
+    },
+    {
+        {0x0b,  9}, {0x03,  5}, {0x07,  8}, {0x07,  7},
+        {0x06,  7}, {0x23, 11}, {0x41, 12}, {0x05,  7},
+        {0x06,  8}, {0x0a,  9}, {0x13, 10}, {0x22, 11},
+        {0x40, 12}, {0x03,  4}, {0x02,  4}, {0x03,  2},
+        {0x02,  2}, {0x01,  2}, {0x02,  5}, {0x04,  7},
+        {0x12, 10}, {0x21, 11}
+    },
+    {
+        {0x15, 10}, {0x03,  6}, {0x14, 10}, {0x23, 11},
+        {0x07,  8}, {0x43, 12}, {0x81, 13}, {0x06,  8},
+        {0x0b,  9}, {0x13, 10}, {0x12, 10}, {0x42, 12},
+        {0x80, 13}, {0x01,  4}, {0x03,  3}, {0x02,  3},
+        {0x03,  2}, {0x02,  2}, {0x01,  3}, {0x02,  6},
+        {0x22, 11}, {0x41, 12}
+    },
+    {
+        {0x43, 12}, {0x05,  6}, {0x07,  8}, {0x04,  6},
+        {0x03,  6}, {0x13, 10}, {0x42, 12}, {0x05,  7},
+        {0x04,  7}, {0x06,  8}, {0x12, 10}, {0x41, 12},
+        {0x40, 12}, {0x03,  5}, {0x03,  4}, {0x03,  3},
+        {0x02,  3}, {0x03,  2}, {0x02,  2}, {0x02,  4},
+        {0x05,  8}, {0x11, 10}
+    },
+    {
+        {0x83, 13}, {0x05,  7}, {0x07,  8}, {0x03,  4},
+        {0x21, 11}, {0x82, 13}, {0x81, 13}, {0x04,  7},
+        {0x06,  8}, {0x0b,  9}, {0x0a,  9}, {0x11, 10},
+        {0x80, 13}, {0x03,  5}, {0x02,  5}, {0x02,  4},
+        {0x03,  3}, {0x02,  3}, {0x03,  2}, {0x02,  2},
+        {0x03,  6}, {0x09,  9}
+    },
+    {
+        {0x13, 10}, {0x03,  5}, {0x03,  6}, {0x0d,  9},
+        {0x0c,  9}, {0x21, 11}, {0x20, 11}, {0x02,  5},
+        {0x02,  6}, {0x07,  8}, {0x0b,  9}, {0x12, 10},
+        {0x11, 10}, {0x05,  3}, {0x04,  3}, {0x05,  4},
+        {0x04,  4}, {0x03,  4}, {0x02,  4}, {0x03,  3},
+        {0x03,  2}, {0x0a,  9}
+    }
+};
+
 #endif /* AVCODEC_MPEG4DATA_H */

diff --git a/libavcodec/mpeg4video.h b/libavcodec/mpeg4video.h
index 515b008..dd0a590 100644
--- a/libavcodec/mpeg4video.h
+++ b/libavcodec/mpeg4video.h

@@ -41,8 +41,13 @@
 #define NBIT_VO_TYPE             5
 #define ARTS_VO_TYPE            10
 #define ACE_VO_TYPE             12
+#define SIMPLE_STUDIO_VO_TYPE   14
+#define CORE_STUDIO_VO_TYPE     15
 #define ADV_SIMPLE_VO_TYPE      17
 
+#define VOT_VIDEO_ID 1
+#define VOT_STILL_TEXTURE_ID 2
+
 // aspect_ratio_info
 #define EXTENDED_PAR 15
 
@@ -58,6 +63,10 @@
 #define GOP_STARTCODE        0x1B3
 #define VISUAL_OBJ_STARTCODE 0x1B5
 #define VOP_STARTCODE        0x1B6
+#define SLICE_STARTCODE      0x1B7
+#define EXT_STARTCODE        0x1B8
+
+#define QUANT_MATRIX_EXT_ID  0x3
 
 /* smaller packets likely don't contain a real frame */
 #define MAX_NVOP_SIZE 19
@@ -105,8 +114,16 @@
     int cplx_estimation_trash_i;
     int cplx_estimation_trash_p;
     int cplx_estimation_trash_b;
+
+    VLC studio_intra_tab[12];
+    VLC studio_luma_dc;
+    VLC studio_chroma_dc;
+
+    int rgb;
 } Mpeg4DecContext;
 
+static const uint8_t mpeg4_block_count[4] = {0, 6, 8, 12};
+
 /* dc encoding for MPEG-4 */
 extern const uint8_t ff_mpeg4_DCtab_lum[13][2];
 extern const uint8_t ff_mpeg4_DCtab_chrom[13][2];
@@ -134,6 +151,10 @@
 
 extern const uint8_t ff_mpeg4_dc_threshold[8];
 
+extern const uint16_t ff_mpeg4_studio_dc_luma[19][2];
+extern const uint16_t ff_mpeg4_studio_dc_chroma[19][2];
+extern const uint16_t ff_mpeg4_studio_intra[12][22][2];
+
 void ff_mpeg4_encode_mb(MpegEncContext *s,
                         int16_t block[6][64],
                         int motion_x, int motion_y);
@@ -152,6 +173,7 @@
 int ff_mpeg4_decode_partitions(Mpeg4DecContext *ctx);
 int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s);
 int ff_mpeg4_decode_video_packet_header(Mpeg4DecContext *ctx);
+int ff_mpeg4_decode_studio_slice_header(Mpeg4DecContext *ctx);
 void ff_mpeg4_init_direct_mv(MpegEncContext *s);
 void ff_mpeg4videodec_static_init(void);
 int ff_mpeg4_workaround_bugs(AVCodecContext *avctx);
@@ -236,12 +258,12 @@
             if (level < 0) {
                 av_log(s->avctx, AV_LOG_ERROR,
                        "dc<0 at %dx%d\n", s->mb_x, s->mb_y);
-                return -1;
+                return AVERROR_INVALIDDATA;
             }
             if (level > 2048 + scale) {
                 av_log(s->avctx, AV_LOG_ERROR,
                        "dc overflow at %dx%d\n", s->mb_x, s->mb_y);
-                return -1;
+                return AVERROR_INVALIDDATA;
             }
         }
         if (level < 0)

diff --git a/libavcodec/mpeg4video_parser.c b/libavcodec/mpeg4video_parser.c
index b7d6da1..9ebb09a 100644
--- a/libavcodec/mpeg4video_parser.c
+++ b/libavcodec/mpeg4video_parser.c

@@ -46,7 +46,7 @@
     if (!vop_found) {
         for (i = 0; i < buf_size; i++) {
             state = (state << 8) | buf[i];
-            if (state == 0x1B6) {
+            if (state == VOP_STARTCODE) {
                 i++;
                 vop_found = 1;
                 break;
@@ -61,6 +61,8 @@
         for (; i < buf_size; i++) {
             state = (state << 8) | buf[i];
             if ((state & 0xFFFFFF00) == 0x100) {
+                if (state == SLICE_STARTCODE || state == EXT_STARTCODE)
+                    continue;
                 pc->frame_start_found = 0;
                 pc->state             = -1;
                 return i - 3;

diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c
index 319a380..f435a52 100644
--- a/libavcodec/mpeg4videodec.c
+++ b/libavcodec/mpeg4videodec.c

@@ -24,7 +24,9 @@
 
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
 #include "error_resilience.h"
+#include "hwaccel.h"
 #include "idctdsp.h"
 #include "internal.h"
 #include "mpegutils.h"
@@ -35,6 +37,7 @@
 #include "profiles.h"
 #include "thread.h"
 #include "xvididct.h"
+#include "unary.h"
 
 /* The defines below define the number of bits that are read at once for
  * reading vlc values. Changing these may improve speed and data cache needs
@@ -43,6 +46,9 @@
 #define SPRITE_TRAJ_VLC_BITS 6
 #define DC_VLC_BITS 9
 #define MB_TYPE_B_VLC_BITS 4
+#define STUDIO_INTRA_BITS 9
+
+static int decode_studio_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb);
 
 static VLC dc_lum, dc_chrom;
 static VLC sprite_trajectory;
@@ -189,6 +195,10 @@
     if (w <= 0 || h <= 0)
         return AVERROR_INVALIDDATA;
 
+    /* the decoder was not properly initialized and we cannot continue */
+    if (sprite_trajectory.table == NULL)
+        return AVERROR_INVALIDDATA;
+
     for (i = 0; i < ctx->num_sprite_warping_points; i++) {
         int length;
         int x = 0, y = 0;
@@ -443,7 +453,7 @@
 
     /* is there enough space left for a video packet + header */
     if (get_bits_count(&s->gb) > s->gb.size_in_bits - 20)
-        return -1;
+        return AVERROR_INVALIDDATA;
 
     for (len = 0; len < 32; len++)
         if (get_bits1(&s->gb))
@@ -451,7 +461,7 @@
 
     if (len != ff_mpeg4_get_video_packet_prefix_length(s)) {
         av_log(s->avctx, AV_LOG_ERROR, "marker does not match f_code\n");
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     if (ctx->shape != RECT_SHAPE) {
@@ -463,7 +473,7 @@
     if (mb_num >= s->mb_num || !mb_num) {
         av_log(s->avctx, AV_LOG_ERROR,
                "illegal mb_num in video packet (%d %d) \n", mb_num, s->mb_num);
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     s->mb_x = mb_num % s->mb_width;
@@ -523,6 +533,55 @@
     return 0;
 }
 
+static void reset_studio_dc_predictors(MpegEncContext *s)
+{
+    /* Reset DC Predictors */
+    s->last_dc[0] =
+    s->last_dc[1] =
+    s->last_dc[2] = 1 << (s->avctx->bits_per_raw_sample + s->dct_precision + s->intra_dc_precision - 1);
+}
+
+/**
+ * Decode the next video packet.
+ * @return <0 if something went wrong
+ */
+int ff_mpeg4_decode_studio_slice_header(Mpeg4DecContext *ctx)
+{
+    MpegEncContext *s = &ctx->m;
+    GetBitContext *gb = &s->gb;
+    unsigned vlc_len;
+    uint16_t mb_num;
+
+    if (get_bits_left(gb) >= 32 && get_bits_long(gb, 32) == SLICE_START_CODE) {
+        vlc_len = av_log2(s->mb_width * s->mb_height) + 1;
+        mb_num = get_bits(gb, vlc_len);
+
+        if (mb_num >= s->mb_num)
+            return AVERROR_INVALIDDATA;
+
+        s->mb_x = mb_num % s->mb_width;
+        s->mb_y = mb_num / s->mb_width;
+
+        if (ctx->shape != BIN_ONLY_SHAPE)
+            s->qscale = mpeg_get_qscale(s);
+
+        if (get_bits1(gb)) {  /* slice_extension_flag */
+            skip_bits1(gb);   /* intra_slice */
+            skip_bits1(gb);   /* slice_VOP_id_enable */
+            skip_bits(gb, 6); /* slice_VOP_id */
+            while (get_bits1(gb)) /* extra_bit_slice */
+                skip_bits(gb, 8); /* extra_information_slice */
+        }
+
+        reset_studio_dc_predictors(s);
+    }
+    else {
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
 /**
  * Get the average motion vector for a GMC MB.
  * @param n either 0 for the x component or 1 for y
@@ -539,7 +598,7 @@
         len >>= s->quarter_sample;
 
     if (s->real_sprite_warping_points == 1) {
-        if (ctx->divx_version == 500 && ctx->divx_build == 413)
+        if (ctx->divx_version == 500 && ctx->divx_build == 413 && a >= s->quarter_sample)
             sum = s->sprite_offset[0][n] / (1 << (a - s->quarter_sample));
         else
             sum = RSHIFT(s->sprite_offset[0][n] * (1 << s->quarter_sample), a);
@@ -592,7 +651,7 @@
 
     if (code < 0 || code > 9 /* && s->nbit < 9 */) {
         av_log(s->avctx, AV_LOG_ERROR, "illegal dc vlc\n");
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     if (code == 0) {
@@ -615,7 +674,7 @@
             if (get_bits1(&s->gb) == 0) { /* marker */
                 if (s->avctx->err_recognition & (AV_EF_BITSTREAM|AV_EF_COMPLIANT)) {
                     av_log(s->avctx, AV_LOG_ERROR, "dc marker bit missing\n");
-                    return -1;
+                    return AVERROR_INVALIDDATA;
                 }
             }
         }
@@ -659,7 +718,7 @@
                     if (cbpc < 0) {
                         av_log(s->avctx, AV_LOG_ERROR,
                                "mcbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
-                        return -1;
+                        return AVERROR_INVALIDDATA;
                     }
                 } while (cbpc == 8);
 
@@ -679,7 +738,7 @@
                     if (dc < 0) {
                         av_log(s->avctx, AV_LOG_ERROR,
                                "DC corrupted at %d %d\n", s->mb_x, s->mb_y);
-                        return -1;
+                        return dc;
                     }
                     dir <<= 1;
                     if (dc_pred_dir)
@@ -731,7 +790,7 @@
                 if (cbpc < 0) {
                     av_log(s->avctx, AV_LOG_ERROR,
                            "mcbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
-                    return -1;
+                    return AVERROR_INVALIDDATA;
                 }
                 if (cbpc == 20)
                     goto try_again;
@@ -769,11 +828,11 @@
                         if (!s->mcsel) {
                             mx = ff_h263_decode_motion(s, pred_x, s->f_code);
                             if (mx >= 0xffff)
-                                return -1;
+                                return AVERROR_INVALIDDATA;
 
                             my = ff_h263_decode_motion(s, pred_y, s->f_code);
                             if (my >= 0xffff)
-                                return -1;
+                                return AVERROR_INVALIDDATA;
                             s->current_picture.mb_type[xy] = MB_TYPE_16x16 |
                                                              MB_TYPE_L0;
                         } else {
@@ -800,11 +859,11 @@
                             int16_t *mot_val = ff_h263_pred_motion(s, i, 0, &pred_x, &pred_y);
                             mx = ff_h263_decode_motion(s, pred_x, s->f_code);
                             if (mx >= 0xffff)
-                                return -1;
+                                return AVERROR_INVALIDDATA;
 
                             my = ff_h263_decode_motion(s, pred_y, s->f_code);
                             if (my >= 0xffff)
-                                return -1;
+                                return AVERROR_INVALIDDATA;
                             mot_val[0] = mx;
                             mot_val[1] = my;
                         }
@@ -845,7 +904,7 @@
                 if (cbpy < 0) {
                     av_log(s->avctx, AV_LOG_ERROR,
                            "cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
-                    return -1;
+                    return AVERROR_INVALIDDATA;
                 }
 
                 s->cbp_table[xy]               |= cbpy << 2;
@@ -860,7 +919,7 @@
                     if (cbpy < 0) {
                         av_log(s->avctx, AV_LOG_ERROR,
                                "I cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
-                        return -1;
+                        return AVERROR_INVALIDDATA;
                     }
 
                     if (s->cbp_table[xy] & 8)
@@ -873,7 +932,7 @@
                         if (dc < 0) {
                             av_log(s->avctx, AV_LOG_ERROR,
                                    "DC corrupted at %d %d\n", s->mb_x, s->mb_y);
-                            return -1;
+                            return dc;
                         }
                         dir <<= 1;
                         if (dc_pred_dir)
@@ -892,7 +951,7 @@
                     if (cbpy < 0) {
                         av_log(s->avctx, AV_LOG_ERROR,
                                "P cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
-                        return -1;
+                        return AVERROR_INVALIDDATA;
                     }
 
                     if (s->cbp_table[xy] & 8)
@@ -919,6 +978,7 @@
 {
     MpegEncContext *s = &ctx->m;
     int mb_num;
+    int ret;
     const int part_a_error = s->pict_type == AV_PICTURE_TYPE_I ? (ER_DC_ERROR | ER_MV_ERROR) : ER_MV_ERROR;
     const int part_a_end   = s->pict_type == AV_PICTURE_TYPE_I ? (ER_DC_END   | ER_MV_END)   : ER_MV_END;
 
@@ -926,14 +986,14 @@
     if (mb_num <= 0) {
         ff_er_add_slice(&s->er, s->resync_mb_x, s->resync_mb_y,
                         s->mb_x, s->mb_y, part_a_error);
-        return -1;
+        return mb_num ? mb_num : AVERROR_INVALIDDATA;
     }
 
     if (s->resync_mb_x + s->resync_mb_y * s->mb_width + mb_num > s->mb_num) {
         av_log(s->avctx, AV_LOG_ERROR, "slice below monitor ...\n");
         ff_er_add_slice(&s->er, s->resync_mb_x, s->resync_mb_y,
                         s->mb_x, s->mb_y, part_a_error);
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     s->mb_num_left = mb_num;
@@ -945,7 +1005,7 @@
             av_log(s->avctx, AV_LOG_ERROR,
                    "marker missing after first I partition at %d %d\n",
                    s->mb_x, s->mb_y);
-            return -1;
+            return AVERROR_INVALIDDATA;
         }
     } else {
         while (show_bits(&s->gb, 10) == 1)
@@ -954,17 +1014,18 @@
             av_log(s->avctx, AV_LOG_ERROR,
                    "marker missing after first P partition at %d %d\n",
                    s->mb_x, s->mb_y);
-            return -1;
+            return AVERROR_INVALIDDATA;
         }
     }
     ff_er_add_slice(&s->er, s->resync_mb_x, s->resync_mb_y,
                     s->mb_x - 1, s->mb_y, part_a_end);
 
-    if (mpeg4_decode_partition_b(s, mb_num) < 0) {
+    ret = mpeg4_decode_partition_b(s, mb_num);
+    if (ret < 0) {
         if (s->pict_type == AV_PICTURE_TYPE_P)
             ff_er_add_slice(&s->er, s->resync_mb_x, s->resync_mb_y,
                             s->mb_x, s->mb_y, ER_DC_ERROR);
-        return -1;
+        return ret;
     } else {
         if (s->pict_type == AV_PICTURE_TYPE_P)
             ff_er_add_slice(&s->er, s->resync_mb_x, s->resync_mb_y,
@@ -1003,7 +1064,7 @@
             } else {
                 level = mpeg4_decode_dc(s, n, &dc_pred_dir);
                 if (level < 0)
-                    return -1;
+                    return level;
             }
             block[0] = level;
             i        = 0;
@@ -1071,7 +1132,7 @@
                     if (SHOW_UBITS(re, &s->gb, 1) == 0) {
                         av_log(s->avctx, AV_LOG_ERROR,
                                "1. marker bit missing in rvlc esc\n");
-                        return -1;
+                        return AVERROR_INVALIDDATA;
                     }
                     SKIP_CACHE(re, &s->gb, 1);
 
@@ -1084,7 +1145,7 @@
                     if (SHOW_UBITS(re, &s->gb, 1) == 0) {
                         av_log(s->avctx, AV_LOG_ERROR,
                                "2. marker bit missing in rvlc esc\n");
-                        return -1;
+                        return AVERROR_INVALIDDATA;
                     }
                     SKIP_CACHE(re, &s->gb, 1);
 
@@ -1093,7 +1154,7 @@
 
                     if (SHOW_UBITS(re, &s->gb, 5) != 0x10) {
                         av_log(s->avctx, AV_LOG_ERROR, "reverse esc missing\n");
-                        return -1;
+                        return AVERROR_INVALIDDATA;
                     }
                     SKIP_CACHE(re, &s->gb, 5);
 
@@ -1129,7 +1190,7 @@
                                     av_log(s->avctx, AV_LOG_ERROR,
                                            "1. marker bit missing in 3. esc\n");
                                     if (!(s->avctx->err_recognition & AV_EF_IGNORE_ERR))
-                                        return -1;
+                                        return AVERROR_INVALIDDATA;
                                 }
                                 SKIP_CACHE(re, &s->gb, 1);
 
@@ -1140,7 +1201,7 @@
                                     av_log(s->avctx, AV_LOG_ERROR,
                                            "2. marker bit missing in 3. esc\n");
                                     if (!(s->avctx->err_recognition & AV_EF_IGNORE_ERR))
-                                        return -1;
+                                        return AVERROR_INVALIDDATA;
                                 }
 
                                 SKIP_COUNTER(re, &s->gb, 1 + 12 + 1);
@@ -1153,16 +1214,16 @@
                                     const int run1= run - rl->max_run[last][abs_level] - 1;
                                     if (abs_level <= rl->max_level[last][run]) {
                                         av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, vlc encoding possible\n");
-                                        return -1;
+                                        return AVERROR_INVALIDDATA;
                                     }
                                     if (s->error_recognition > FF_ER_COMPLIANT) {
                                         if (abs_level <= rl->max_level[last][run]*2) {
                                             av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 1 encoding possible\n");
-                                            return -1;
+                                            return AVERROR_INVALIDDATA;
                                         }
                                         if (run1 >= 0 && abs_level <= rl->max_level[last][run1]) {
                                             av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 2 encoding possible\n");
-                                            return -1;
+                                            return AVERROR_INVALIDDATA;
                                         }
                                     }
                                 }
@@ -1179,7 +1240,7 @@
                                         av_log(s->avctx, AV_LOG_ERROR,
                                                "|level| overflow in 3. esc, qp=%d\n",
                                                s->qscale);
-                                        return -1;
+                                        return AVERROR_INVALIDDATA;
                                     }
                                 }
                                 level = level < 0 ? -2048 : 2047;
@@ -1217,7 +1278,7 @@
                 if (i & (~63)) {
                     av_log(s->avctx, AV_LOG_ERROR,
                            "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
-                    return -1;
+                    return AVERROR_INVALIDDATA;
                 }
 
                 block[scan_table[i]] = level;
@@ -1314,7 +1375,7 @@
                 av_log(s->avctx, AV_LOG_ERROR,
                        "texture corrupted at %d %d %d\n",
                        s->mb_x, s->mb_y, s->mb_intra);
-                return -1;
+                return AVERROR_INVALIDDATA;
             }
             cbp += cbp;
         }
@@ -1382,7 +1443,7 @@
             if (cbpc < 0) {
                 av_log(s->avctx, AV_LOG_ERROR,
                        "mcbpc damaged at %d %d\n", s->mb_x, s->mb_y);
-                return -1;
+                return AVERROR_INVALIDDATA;
             }
         } while (cbpc == 20);
 
@@ -1438,11 +1499,11 @@
                 for (i = 0; i < 2; i++) {
                     mx = ff_h263_decode_motion(s, pred_x, s->f_code);
                     if (mx >= 0xffff)
-                        return -1;
+                        return AVERROR_INVALIDDATA;
 
                     my = ff_h263_decode_motion(s, pred_y / 2, s->f_code);
                     if (my >= 0xffff)
-                        return -1;
+                        return AVERROR_INVALIDDATA;
 
                     s->mv[0][i][0] = mx;
                     s->mv[0][i][1] = my;
@@ -1455,12 +1516,12 @@
                 mx = ff_h263_decode_motion(s, pred_x, s->f_code);
 
                 if (mx >= 0xffff)
-                    return -1;
+                    return AVERROR_INVALIDDATA;
 
                 my = ff_h263_decode_motion(s, pred_y, s->f_code);
 
                 if (my >= 0xffff)
-                    return -1;
+                    return AVERROR_INVALIDDATA;
                 s->mv[0][0][0] = mx;
                 s->mv[0][0][1] = my;
             }
@@ -1471,11 +1532,11 @@
                 mot_val = ff_h263_pred_motion(s, i, 0, &pred_x, &pred_y);
                 mx      = ff_h263_decode_motion(s, pred_x, s->f_code);
                 if (mx >= 0xffff)
-                    return -1;
+                    return AVERROR_INVALIDDATA;
 
                 my = ff_h263_decode_motion(s, pred_y, s->f_code);
                 if (my >= 0xffff)
-                    return -1;
+                    return AVERROR_INVALIDDATA;
                 s->mv[0][i][0] = mx;
                 s->mv[0][i][1] = my;
                 mot_val[0]     = mx;
@@ -1531,7 +1592,7 @@
             mb_type = get_vlc2(&s->gb, mb_type_b_vlc.table, MB_TYPE_B_VLC_BITS, 1);
             if (mb_type < 0) {
                 av_log(s->avctx, AV_LOG_ERROR, "illegal MB_type\n");
-                return -1;
+                return AVERROR_INVALIDDATA;
             }
             mb_type = mb_type_b_map[mb_type];
             if (modb2) {
@@ -1642,7 +1703,7 @@
             if (cbpc < 0) {
                 av_log(s->avctx, AV_LOG_ERROR,
                        "I cbpc damaged at %d %d\n", s->mb_x, s->mb_y);
-                return -1;
+                return AVERROR_INVALIDDATA;
             }
         } while (cbpc == 8);
 
@@ -1660,7 +1721,7 @@
         if (cbpy < 0) {
             av_log(s->avctx, AV_LOG_ERROR,
                    "I cbpy damaged at %d %d\n", s->mb_x, s->mb_y);
-            return -1;
+            return AVERROR_INVALIDDATA;
         }
         cbp = (cbpc & 3) | (cbpy << 2);
 
@@ -1676,7 +1737,7 @@
         /* decode each block */
         for (i = 0; i < 6; i++) {
             if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, 1, 0) < 0)
-                return -1;
+                return AVERROR_INVALIDDATA;
             cbp += cbp;
         }
         goto end;
@@ -1685,7 +1746,7 @@
     /* decode each block */
     for (i = 0; i < 6; i++) {
         if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, 0, 0) < 0)
-            return -1;
+            return AVERROR_INVALIDDATA;
         cbp += cbp;
     }
 
@@ -1695,7 +1756,7 @@
         int next = mpeg4_is_resync(ctx);
         if (next) {
             if        (s->mb_x + s->mb_y*s->mb_width + 1 >  next && (s->avctx->err_recognition & AV_EF_AGGRESSIVE)) {
-                return -1;
+                return AVERROR_INVALIDDATA;
             } else if (s->mb_x + s->mb_y*s->mb_width + 1 >= next)
                 return SLICE_END;
 
@@ -1716,13 +1777,291 @@
     return SLICE_OK;
 }
 
+/* As per spec, studio start code search isn't the same as the old type of start code */
+static void next_start_code_studio(GetBitContext *gb)
+{
+    align_get_bits(gb);
+
+    while (get_bits_left(gb) >= 24 && show_bits_long(gb, 24) != 0x1) {
+        get_bits(gb, 8);
+    }
+}
+
+/* additional_code, vlc index */
+static const uint8_t ac_state_tab[22][2] =
+{
+    {0, 0},
+    {0, 1},
+    {1, 1},
+    {2, 1},
+    {3, 1},
+    {4, 1},
+    {5, 1},
+    {1, 2},
+    {2, 2},
+    {3, 2},
+    {4, 2},
+    {5, 2},
+    {6, 2},
+    {1, 3},
+    {2, 4},
+    {3, 5},
+    {4, 6},
+    {5, 7},
+    {6, 8},
+    {7, 9},
+    {8, 10},
+    {0, 11}
+};
+
+static int mpeg4_decode_studio_block(MpegEncContext *s, int32_t block[64], int n)
+{
+    Mpeg4DecContext *ctx = s->avctx->priv_data;
+
+    int cc, dct_dc_size, dct_diff, code, j, idx = 1, group = 0, run = 0,
+        additional_code_len, sign, mismatch;
+    VLC *cur_vlc = &ctx->studio_intra_tab[0];
+    uint8_t *const scantable = s->intra_scantable.permutated;
+    const uint16_t *quant_matrix;
+    uint32_t flc;
+    const int min = -1 *  (1 << (s->avctx->bits_per_raw_sample + 6));
+    const int max =      ((1 << (s->avctx->bits_per_raw_sample + 6)) - 1);
+
+    mismatch = 1;
+
+    memset(block, 0, 64 * sizeof(int32_t));
+
+    if (n < 4) {
+        cc = 0;
+        dct_dc_size = get_vlc2(&s->gb, ctx->studio_luma_dc.table, STUDIO_INTRA_BITS, 2);
+        quant_matrix = s->intra_matrix;
+    } else {
+        cc = (n & 1) + 1;
+        if (ctx->rgb)
+            dct_dc_size = get_vlc2(&s->gb, ctx->studio_luma_dc.table, STUDIO_INTRA_BITS, 2);
+        else
+            dct_dc_size = get_vlc2(&s->gb, ctx->studio_chroma_dc.table, STUDIO_INTRA_BITS, 2);
+        quant_matrix = s->chroma_intra_matrix;
+    }
+
+    if (dct_dc_size < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "illegal dct_dc_size vlc\n");
+        return AVERROR_INVALIDDATA;
+    } else if (dct_dc_size == 0) {
+        dct_diff = 0;
+    } else {
+        dct_diff = get_xbits(&s->gb, dct_dc_size);
+
+        if (dct_dc_size > 8) {
+            if(!check_marker(s->avctx, &s->gb, "dct_dc_size > 8"))
+                return AVERROR_INVALIDDATA;
+        }
+
+    }
+
+    s->last_dc[cc] += dct_diff;
+
+    if (s->mpeg_quant)
+        block[0] = s->last_dc[cc] * (8 >> s->intra_dc_precision);
+    else
+        block[0] = s->last_dc[cc] * (8 >> s->intra_dc_precision) * (8 >> s->dct_precision);
+    /* TODO: support mpeg_quant for AC coefficients */
+
+    block[0] = av_clip(block[0], min, max);
+    mismatch ^= block[0];
+
+    /* AC Coefficients */
+    while (1) {
+        group = get_vlc2(&s->gb, cur_vlc->table, STUDIO_INTRA_BITS, 2);
+
+        if (group < 0) {
+            av_log(s->avctx, AV_LOG_ERROR, "illegal ac coefficient group vlc\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        additional_code_len = ac_state_tab[group][0];
+        cur_vlc = &ctx->studio_intra_tab[ac_state_tab[group][1]];
+
+        if (group == 0) {
+            /* End of Block */
+            break;
+        } else if (group >= 1 && group <= 6) {
+            /* Zero run length (Table B.47) */
+            run = 1 << additional_code_len;
+            if (additional_code_len)
+                run += get_bits(&s->gb, additional_code_len);
+            idx += run;
+            continue;
+        } else if (group >= 7 && group <= 12) {
+            /* Zero run length and +/-1 level (Table B.48) */
+            code = get_bits(&s->gb, additional_code_len);
+            sign = code & 1;
+            code >>= 1;
+            run = (1 << (additional_code_len - 1)) + code;
+            idx += run;
+            j = scantable[idx++];
+            block[j] = sign ? 1 : -1;
+        } else if (group >= 13 && group <= 20) {
+            /* Level value (Table B.49) */
+            j = scantable[idx++];
+            block[j] = get_xbits(&s->gb, additional_code_len);
+        } else if (group == 21) {
+            /* Escape */
+            j = scantable[idx++];
+            additional_code_len = s->avctx->bits_per_raw_sample + s->dct_precision + 4;
+            flc = get_bits(&s->gb, additional_code_len);
+            if (flc >> (additional_code_len-1))
+                block[j] = -1 * (( flc ^ ((1 << additional_code_len) -1)) + 1);
+            else
+                block[j] = flc;
+        }
+        block[j] = ((8 * 2 * block[j] * quant_matrix[j] * s->qscale) >> s->dct_precision) / 32;
+        block[j] = av_clip(block[j], min, max);
+        mismatch ^= block[j];
+    }
+
+    block[63] ^= mismatch & 1;
+
+    return 0;
+}
+
+static int mpeg4_decode_dpcm_macroblock(MpegEncContext *s, int16_t macroblock[256], int n)
+{
+    int i, j, w, h, idx = 0;
+    int block_mean, rice_parameter, rice_prefix_code, rice_suffix_code,
+        dpcm_residual, left, top, topleft, min_left_top, max_left_top, p, p2, output;
+    h = 16 >> (n ? s->chroma_y_shift : 0);
+    w = 16 >> (n ? s->chroma_x_shift : 0);
+
+    block_mean = get_bits(&s->gb, s->avctx->bits_per_raw_sample);
+    if (block_mean == 0){
+        av_log(s->avctx, AV_LOG_ERROR, "Forbidden block_mean\n");
+        return AVERROR_INVALIDDATA;
+    }
+    s->last_dc[n] = block_mean * (1 << (s->dct_precision + s->intra_dc_precision));
+
+    rice_parameter = get_bits(&s->gb, 4);
+    if (rice_parameter == 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "Forbidden rice_parameter\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (rice_parameter == 15)
+        rice_parameter = 0;
+
+    if (rice_parameter > 11) {
+        av_log(s->avctx, AV_LOG_ERROR, "Forbidden rice_parameter\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    for (i = 0; i < h; i++) {
+        output = 1 << (s->avctx->bits_per_raw_sample - 1);
+        top = 1 << (s->avctx->bits_per_raw_sample - 1);
+
+        for (j = 0; j < w; j++) {
+            left = output;
+            topleft = top;
+
+            rice_prefix_code = get_unary(&s->gb, 1, 12);
+
+            /* Escape */
+            if (rice_prefix_code == 11)
+                dpcm_residual = get_bits(&s->gb, s->avctx->bits_per_raw_sample);
+            else {
+                if (rice_prefix_code == 12) {
+                    av_log(s->avctx, AV_LOG_ERROR, "Forbidden rice_prefix_code\n");
+                    return AVERROR_INVALIDDATA;
+                }
+                rice_suffix_code = get_bitsz(&s->gb, rice_parameter);
+                dpcm_residual = (rice_prefix_code << rice_parameter) + rice_suffix_code;
+            }
+
+            /* Map to a signed residual */
+            if (dpcm_residual & 1)
+                dpcm_residual = (-1 * dpcm_residual) >> 1;
+            else
+                dpcm_residual = (dpcm_residual >> 1);
+
+            if (i != 0)
+                top = macroblock[idx-w];
+
+            p = left + top - topleft;
+            min_left_top = FFMIN(left, top);
+            if (p < min_left_top)
+                p = min_left_top;
+
+            max_left_top = FFMAX(left, top);
+            if (p > max_left_top)
+                p = max_left_top;
+
+            p2 = (FFMIN(min_left_top, topleft) + FFMAX(max_left_top, topleft)) >> 1;
+            if (p2 == p)
+                p2 = block_mean;
+
+            if (p2 > p)
+                dpcm_residual *= -1;
+
+            macroblock[idx++] = output = (dpcm_residual + p) & ((1 << s->avctx->bits_per_raw_sample) - 1);
+        }
+    }
+
+    return 0;
+}
+
+static int mpeg4_decode_studio_mb(MpegEncContext *s, int16_t block_[12][64])
+{
+    int i;
+
+    s->dpcm_direction = 0;
+
+    /* StudioMacroblock */
+    /* Assumes I-VOP */
+    s->mb_intra = 1;
+    if (get_bits1(&s->gb)) { /* compression_mode */
+        /* DCT */
+        /* macroblock_type, 1 or 2-bit VLC */
+        if (!get_bits1(&s->gb)) {
+            skip_bits1(&s->gb);
+            s->qscale = mpeg_get_qscale(s);
+        }
+
+        for (i = 0; i < mpeg4_block_count[s->chroma_format]; i++) {
+            if (mpeg4_decode_studio_block(s, (*s->block32)[i], i) < 0)
+                return AVERROR_INVALIDDATA;
+        }
+    } else {
+        /* DPCM */
+        check_marker(s->avctx, &s->gb, "DPCM block start");
+        s->dpcm_direction = get_bits1(&s->gb) ? -1 : 1;
+        for (i = 0; i < 3; i++) {
+            if (mpeg4_decode_dpcm_macroblock(s, (*s->dpcm_macroblock)[i], i) < 0)
+                return AVERROR_INVALIDDATA;
+        }
+    }
+
+    if (get_bits_left(&s->gb) >= 24 && show_bits(&s->gb, 23) == 0) {
+        next_start_code_studio(&s->gb);
+        return SLICE_END;
+    }
+
+    //vcon-stp9L1.bits (first frame)
+    if (get_bits_left(&s->gb) == 0)
+        return SLICE_END;
+
+    //vcon-stp2L1.bits, vcon-stp3L1.bits, vcon-stp6L1.bits, vcon-stp7L1.bits, vcon-stp8L1.bits, vcon-stp10L1.bits (first frame)
+    if (get_bits_left(&s->gb) < 8U && show_bits(&s->gb, get_bits_left(&s->gb)) == 0)
+        return SLICE_END;
+
+    return SLICE_OK;
+}
+
 static int mpeg4_decode_gop_header(MpegEncContext *s, GetBitContext *gb)
 {
     int hours, minutes, seconds;
 
     if (!show_bits(gb, 23)) {
         av_log(s->avctx, AV_LOG_WARNING, "GOP header invalid\n");
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     hours   = get_bits(gb, 5);
@@ -1738,20 +2077,69 @@
     return 0;
 }
 
-static int mpeg4_decode_profile_level(MpegEncContext *s, GetBitContext *gb)
+static int mpeg4_decode_profile_level(MpegEncContext *s, GetBitContext *gb, int *profile, int *level)
 {
 
-    s->avctx->profile = get_bits(gb, 4);
-    s->avctx->level   = get_bits(gb, 4);
+    *profile = get_bits(gb, 4);
+    *level   = get_bits(gb, 4);
 
     // for Simple profile, level 0
-    if (s->avctx->profile == 0 && s->avctx->level == 8) {
-        s->avctx->level = 0;
+    if (*profile == 0 && *level == 8) {
+        *level = 0;
     }
 
     return 0;
 }
 
+static int mpeg4_decode_visual_object(MpegEncContext *s, GetBitContext *gb)
+{
+    int visual_object_type;
+    int is_visual_object_identifier = get_bits1(gb);
+
+    if (is_visual_object_identifier) {
+        skip_bits(gb, 4+3);
+    }
+    visual_object_type = get_bits(gb, 4);
+
+    if (visual_object_type == VOT_VIDEO_ID ||
+        visual_object_type == VOT_STILL_TEXTURE_ID) {
+        int video_signal_type = get_bits1(gb);
+        if (video_signal_type) {
+            int video_range, color_description;
+            skip_bits(gb, 3); // video_format
+            video_range = get_bits1(gb);
+            color_description = get_bits1(gb);
+
+            s->avctx->color_range = video_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
+
+            if (color_description) {
+                s->avctx->color_primaries = get_bits(gb, 8);
+                s->avctx->color_trc       = get_bits(gb, 8);
+                s->avctx->colorspace      = get_bits(gb, 8);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static void mpeg4_load_default_matrices(MpegEncContext *s)
+{
+    int i, v;
+
+    /* load default matrices */
+    for (i = 0; i < 64; i++) {
+        int j = s->idsp.idct_permutation[i];
+        v = ff_mpeg4_default_intra_matrix[i];
+        s->intra_matrix[j]        = v;
+        s->chroma_intra_matrix[j] = v;
+
+        v = ff_mpeg4_default_non_intra_matrix[i];
+        s->inter_matrix[j]        = v;
+        s->chroma_inter_matrix[j] = v;
+    }
+}
+
 static int decode_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb)
 {
     MpegEncContext *s = &ctx->m;
@@ -1760,6 +2148,23 @@
     /* vol header */
     skip_bits(gb, 1);                   /* random access */
     s->vo_type = get_bits(gb, 8);
+
+    /* If we are in studio profile (per vo_type), check if its all consistent
+     * and if so continue pass control to decode_studio_vol_header().
+     * elIf something is inconsistent, error out
+     * else continue with (non studio) vol header decpoding.
+     */
+    if (s->vo_type == CORE_STUDIO_VO_TYPE ||
+        s->vo_type == SIMPLE_STUDIO_VO_TYPE) {
+        if (s->avctx->profile != FF_PROFILE_UNKNOWN && s->avctx->profile != FF_PROFILE_MPEG4_SIMPLE_STUDIO)
+            return AVERROR_INVALIDDATA;
+        s->studio_profile = 1;
+        s->avctx->profile = FF_PROFILE_MPEG4_SIMPLE_STUDIO;
+        return decode_studio_vol_header(ctx, gb);
+    } else if (s->studio_profile) {
+        return AVERROR_PATCHWELCOME;
+    }
+
     if (get_bits1(gb) != 0) {           /* is_ol_id */
         vo_ver_id = get_bits(gb, 4);    /* vo_ver_id */
         skip_bits(gb, 3);               /* vo_priority */
@@ -1915,17 +2320,7 @@
         if ((s->mpeg_quant = get_bits1(gb))) { /* vol_quant_type */
             int i, v;
 
-            /* load default matrixes */
-            for (i = 0; i < 64; i++) {
-                int j = s->idsp.idct_permutation[i];
-                v = ff_mpeg4_default_intra_matrix[i];
-                s->intra_matrix[j]        = v;
-                s->chroma_intra_matrix[j] = v;
-
-                v = ff_mpeg4_default_non_intra_matrix[i];
-                s->inter_matrix[j]        = v;
-                s->chroma_inter_matrix[j] = v;
-            }
+            mpeg4_load_default_matrices(s);
 
             /* load custom intra matrix */
             if (get_bits1(gb)) {
@@ -2569,6 +2964,241 @@
     return 0;
 }
 
+static int read_quant_matrix_ext(MpegEncContext *s, GetBitContext *gb)
+{
+    int i, j, v;
+
+    if (get_bits1(gb)) {
+        if (get_bits_left(gb) < 64*8)
+            return AVERROR_INVALIDDATA;
+        /* intra_quantiser_matrix */
+        for (i = 0; i < 64; i++) {
+            v = get_bits(gb, 8);
+            j = s->idsp.idct_permutation[ff_zigzag_direct[i]];
+            s->intra_matrix[j]        = v;
+            s->chroma_intra_matrix[j] = v;
+        }
+    }
+
+    if (get_bits1(gb)) {
+        if (get_bits_left(gb) < 64*8)
+            return AVERROR_INVALIDDATA;
+        /* non_intra_quantiser_matrix */
+        for (i = 0; i < 64; i++) {
+            get_bits(gb, 8);
+        }
+    }
+
+    if (get_bits1(gb)) {
+        if (get_bits_left(gb) < 64*8)
+            return AVERROR_INVALIDDATA;
+        /* chroma_intra_quantiser_matrix */
+        for (i = 0; i < 64; i++) {
+            v = get_bits(gb, 8);
+            j = s->idsp.idct_permutation[ff_zigzag_direct[i]];
+            s->chroma_intra_matrix[j] = v;
+        }
+    }
+
+    if (get_bits1(gb)) {
+        if (get_bits_left(gb) < 64*8)
+            return AVERROR_INVALIDDATA;
+        /* chroma_non_intra_quantiser_matrix */
+        for (i = 0; i < 64; i++) {
+            get_bits(gb, 8);
+        }
+    }
+
+    next_start_code_studio(gb);
+    return 0;
+}
+
+static void extension_and_user_data(MpegEncContext *s, GetBitContext *gb, int id)
+{
+    uint32_t startcode;
+    uint8_t extension_type;
+
+    startcode = show_bits_long(gb, 32);
+    if (startcode == USER_DATA_STARTCODE || startcode == EXT_STARTCODE) {
+
+        if ((id == 2 || id == 4) && startcode == EXT_STARTCODE) {
+            skip_bits_long(gb, 32);
+            extension_type = get_bits(gb, 4);
+            if (extension_type == QUANT_MATRIX_EXT_ID)
+                read_quant_matrix_ext(s, gb);
+        }
+    }
+}
+
+static void decode_smpte_tc(Mpeg4DecContext *ctx, GetBitContext *gb)
+{
+    MpegEncContext *s = &ctx->m;
+
+    skip_bits(gb, 16); /* Time_code[63..48] */
+    check_marker(s->avctx, gb, "after Time_code[63..48]");
+    skip_bits(gb, 16); /* Time_code[47..32] */
+    check_marker(s->avctx, gb, "after Time_code[47..32]");
+    skip_bits(gb, 16); /* Time_code[31..16] */
+    check_marker(s->avctx, gb, "after Time_code[31..16]");
+    skip_bits(gb, 16); /* Time_code[15..0] */
+    check_marker(s->avctx, gb, "after Time_code[15..0]");
+    skip_bits(gb, 4); /* reserved_bits */
+}
+
+/**
+ * Decode the next studio vop header.
+ * @return <0 if something went wrong
+ */
+static int decode_studio_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb)
+{
+    MpegEncContext *s = &ctx->m;
+
+    if (get_bits_left(gb) <= 32)
+        return 0;
+
+    s->decode_mb = mpeg4_decode_studio_mb;
+
+    decode_smpte_tc(ctx, gb);
+
+    skip_bits(gb, 10); /* temporal_reference */
+    skip_bits(gb, 2); /* vop_structure */
+    s->pict_type = get_bits(gb, 2) + AV_PICTURE_TYPE_I; /* vop_coding_type */
+    if (get_bits1(gb)) { /* vop_coded */
+        skip_bits1(gb); /* top_field_first */
+        skip_bits1(gb); /* repeat_first_field */
+        s->progressive_frame = get_bits1(gb) ^ 1; /* progressive_frame */
+    }
+
+    if (s->pict_type == AV_PICTURE_TYPE_I) {
+        if (get_bits1(gb))
+            reset_studio_dc_predictors(s);
+    }
+
+    if (ctx->shape != BIN_ONLY_SHAPE) {
+        s->alternate_scan = get_bits1(gb);
+        s->frame_pred_frame_dct = get_bits1(gb);
+        s->dct_precision = get_bits(gb, 2);
+        s->intra_dc_precision = get_bits(gb, 2);
+        s->q_scale_type = get_bits1(gb);
+    }
+
+    if (s->alternate_scan) {
+        ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable,   ff_alternate_vertical_scan);
+        ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable,   ff_alternate_vertical_scan);
+        ff_init_scantable(s->idsp.idct_permutation, &s->intra_h_scantable, ff_alternate_vertical_scan);
+        ff_init_scantable(s->idsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
+    } else {
+        ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable,   ff_zigzag_direct);
+        ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable,   ff_zigzag_direct);
+        ff_init_scantable(s->idsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
+        ff_init_scantable(s->idsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
+    }
+
+    mpeg4_load_default_matrices(s);
+
+    next_start_code_studio(gb);
+    extension_and_user_data(s, gb, 4);
+
+    return 0;
+}
+
+static int decode_studiovisualobject(Mpeg4DecContext *ctx, GetBitContext *gb)
+{
+    MpegEncContext *s = &ctx->m;
+    int visual_object_type;
+
+        skip_bits(gb, 4); /* visual_object_verid */
+        visual_object_type = get_bits(gb, 4);
+        if (visual_object_type != VOT_VIDEO_ID) {
+            avpriv_request_sample(s->avctx, "VO type %u", visual_object_type);
+            return AVERROR_PATCHWELCOME;
+        }
+
+        next_start_code_studio(gb);
+        extension_and_user_data(s, gb, 1);
+
+    return 0;
+}
+
+static int decode_studio_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb)
+{
+    MpegEncContext *s = &ctx->m;
+    int width, height;
+    int bits_per_raw_sample;
+
+            // random_accessible_vol and video_object_type_indication have already
+            // been read by the caller decode_vol_header()
+            skip_bits(gb, 4); /* video_object_layer_verid */
+            ctx->shape = get_bits(gb, 2); /* video_object_layer_shape */
+            skip_bits(gb, 4); /* video_object_layer_shape_extension */
+            skip_bits1(gb); /* progressive_sequence */
+            if (ctx->shape != BIN_ONLY_SHAPE) {
+                ctx->rgb = get_bits1(gb); /* rgb_components */
+                s->chroma_format = get_bits(gb, 2); /* chroma_format */
+                if (!s->chroma_format) {
+                    av_log(s->avctx, AV_LOG_ERROR, "illegal chroma format\n");
+                    return AVERROR_INVALIDDATA;
+                }
+
+                bits_per_raw_sample = get_bits(gb, 4); /* bit_depth */
+                if (bits_per_raw_sample == 10) {
+                    if (ctx->rgb) {
+                        s->avctx->pix_fmt = AV_PIX_FMT_GBRP10;
+                    }
+                    else {
+                        s->avctx->pix_fmt = s->chroma_format == CHROMA_422 ? AV_PIX_FMT_YUV422P10 : AV_PIX_FMT_YUV444P10;
+                    }
+                }
+                else {
+                    avpriv_request_sample(s->avctx, "MPEG-4 Studio profile bit-depth %u", bits_per_raw_sample);
+                    return AVERROR_PATCHWELCOME;
+                }
+                s->avctx->bits_per_raw_sample = bits_per_raw_sample;
+            }
+            if (ctx->shape == RECT_SHAPE) {
+                check_marker(s->avctx, gb, "before video_object_layer_width");
+                width = get_bits(gb, 14); /* video_object_layer_width */
+                check_marker(s->avctx, gb, "before video_object_layer_height");
+                height = get_bits(gb, 14); /* video_object_layer_height */
+                check_marker(s->avctx, gb, "after video_object_layer_height");
+
+                /* Do the same check as non-studio profile */
+                if (width && height) {
+                    if (s->width && s->height &&
+                        (s->width != width || s->height != height))
+                        s->context_reinit = 1;
+                    s->width  = width;
+                    s->height = height;
+                }
+            }
+            s->aspect_ratio_info = get_bits(gb, 4);
+            if (s->aspect_ratio_info == FF_ASPECT_EXTENDED) {
+                s->avctx->sample_aspect_ratio.num = get_bits(gb, 8);  // par_width
+                s->avctx->sample_aspect_ratio.den = get_bits(gb, 8);  // par_height
+            } else {
+                s->avctx->sample_aspect_ratio = ff_h263_pixel_aspect[s->aspect_ratio_info];
+            }
+            skip_bits(gb, 4); /* frame_rate_code */
+            skip_bits(gb, 15); /* first_half_bit_rate */
+            check_marker(s->avctx, gb, "after first_half_bit_rate");
+            skip_bits(gb, 15); /* latter_half_bit_rate */
+            check_marker(s->avctx, gb, "after latter_half_bit_rate");
+            skip_bits(gb, 15); /* first_half_vbv_buffer_size */
+            check_marker(s->avctx, gb, "after first_half_vbv_buffer_size");
+            skip_bits(gb, 3); /* latter_half_vbv_buffer_size */
+            skip_bits(gb, 11); /* first_half_vbv_buffer_size */
+            check_marker(s->avctx, gb, "after first_half_vbv_buffer_size");
+            skip_bits(gb, 15); /* latter_half_vbv_occupancy */
+            check_marker(s->avctx, gb, "after latter_half_vbv_occupancy");
+            s->low_delay = get_bits1(gb);
+            s->mpeg_quant = get_bits1(gb); /* mpeg2_stream */
+
+            next_start_code_studio(gb);
+            extension_and_user_data(s, gb, 2);
+
+    return 0;
+}
+
 /**
  * Decode MPEG-4 headers.
  * @return <0 if no VOP found (or a damaged one)
@@ -2585,6 +3215,12 @@
     /* search next start code */
     align_get_bits(gb);
 
+    // If we have not switched to studio profile than we also did not switch bps
+    // that means something else (like a previous instance) outside set bps which
+    // would be inconsistant with the currect state, thus reset it
+    if (!s->studio_profile && s->avctx->bits_per_raw_sample != 8)
+        s->avctx->bits_per_raw_sample = 0;
+
     if (s->codec_tag == AV_RL32("WV1F") && show_bits(gb, 24) == 0x575630) {
         skip_bits(gb, 24);
         if (get_bits(gb, 8) == 0xF0)
@@ -2599,7 +3235,7 @@
                 av_log(s->avctx, AV_LOG_VERBOSE, "frame skip %d\n", gb->size_in_bits);
                 return FRAME_SKIPPED;  // divx bug
             } else
-                return -1;  // end of stream
+                return AVERROR_INVALIDDATA;  // end of stream
         }
 
         /* use the bits after the test */
@@ -2681,7 +3317,25 @@
         } else if (startcode == GOP_STARTCODE) {
             mpeg4_decode_gop_header(s, gb);
         } else if (startcode == VOS_STARTCODE) {
-            mpeg4_decode_profile_level(s, gb);
+            int profile, level;
+            mpeg4_decode_profile_level(s, gb, &profile, &level);
+            if (profile == FF_PROFILE_MPEG4_SIMPLE_STUDIO &&
+                (level > 0 && level < 9)) {
+                s->studio_profile = 1;
+                next_start_code_studio(gb);
+                extension_and_user_data(s, gb, 0);
+            } else if (s->studio_profile) {
+                avpriv_request_sample(s->avctx, "Mixes studio and non studio profile\n");
+                return AVERROR_PATCHWELCOME;
+            }
+            s->avctx->profile = profile;
+            s->avctx->level   = level;
+        } else if (startcode == VISUAL_OBJ_STARTCODE) {
+            if (s->studio_profile) {
+                if ((ret = decode_studiovisualobject(ctx, gb)) < 0)
+                    return ret;
+            } else
+                mpeg4_decode_visual_object(s, gb);
         } else if (startcode == VOP_STARTCODE) {
             break;
         }
@@ -2695,7 +3349,14 @@
         s->low_delay = 1;
     s->avctx->has_b_frames = !s->low_delay;
 
-    return decode_vop_header(ctx, gb);
+    if (s->studio_profile) {
+        if (!s->avctx->bits_per_raw_sample) {
+            av_log(s->avctx, AV_LOG_ERROR, "Missing VOL header\n");
+            return AVERROR_INVALIDDATA;
+        }
+        return decode_studio_vop_header(ctx, gb);
+    } else
+        return decode_vop_header(ctx, gb);
 }
 
 av_cold void ff_mpeg4videodec_static_init(void) {
@@ -2795,6 +3456,37 @@
 }
 #endif
 
+static av_cold int init_studio_vlcs(Mpeg4DecContext *ctx)
+{
+    int i, ret;
+
+    for (i = 0; i < 12; i++) {
+        ret = init_vlc(&ctx->studio_intra_tab[i], STUDIO_INTRA_BITS, 22,
+                       &ff_mpeg4_studio_intra[i][0][1], 4, 2,
+                       &ff_mpeg4_studio_intra[i][0][0], 4, 2,
+                       0);
+
+        if (ret < 0)
+            return ret;
+    }
+
+    ret = init_vlc(&ctx->studio_luma_dc, STUDIO_INTRA_BITS, 19,
+                   &ff_mpeg4_studio_dc_luma[0][1], 4, 2,
+                   &ff_mpeg4_studio_dc_luma[0][0], 4, 2,
+                   0);
+    if (ret < 0)
+        return ret;
+
+    ret = init_vlc(&ctx->studio_chroma_dc, STUDIO_INTRA_BITS, 19,
+                   &ff_mpeg4_studio_dc_chroma[0][1], 4, 2,
+                   &ff_mpeg4_studio_dc_chroma[0][0], 4, 2,
+                   0);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
 static av_cold int decode_init(AVCodecContext *avctx)
 {
     Mpeg4DecContext *ctx = avctx->priv_data;
@@ -2810,6 +3502,8 @@
         return ret;
 
     ff_mpeg4videodec_static_init();
+    if ((ret = init_studio_vlcs(ctx)) < 0)
+        return ret;
 
     s->h263_pred = 1;
     s->low_delay = 0; /* default, might be overridden in the vol header during header parsing */
@@ -2822,6 +3516,22 @@
     return 0;
 }
 
+static av_cold int decode_end(AVCodecContext *avctx)
+{
+    Mpeg4DecContext *ctx = avctx->priv_data;
+    int i;
+
+    if (!avctx->internal->is_copy) {
+        for (i = 0; i < 12; i++)
+            ff_free_vlc(&ctx->studio_intra_tab[i]);
+
+        ff_free_vlc(&ctx->studio_luma_dc);
+        ff_free_vlc(&ctx->studio_chroma_dc);
+    }
+
+    return ff_h263_decode_end(avctx);
+}
+
 static const AVOption mpeg4_options[] = {
     {"quarter_sample", "1/4 subpel MC", offsetof(MpegEncContext, quarter_sample), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, 0},
     {"divx_packed", "divx style packed b frames", offsetof(MpegEncContext, divx_packed), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, 0},
@@ -2829,10 +3539,10 @@
 };
 
 static const AVClass mpeg4_class = {
-    "MPEG4 Video Decoder",
-    av_default_item_name,
-    mpeg4_options,
-    LIBAVUTIL_VERSION_INT,
+    .class_name = "MPEG4 Video Decoder",
+    .item_name  = av_default_item_name,
+    .option     = mpeg4_options,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
 AVCodec ff_mpeg4_decoder = {
@@ -2842,7 +3552,7 @@
     .id                    = AV_CODEC_ID_MPEG4,
     .priv_data_size        = sizeof(Mpeg4DecContext),
     .init                  = decode_init,
-    .close                 = ff_h263_decode_end,
+    .close                 = decode_end,
     .decode                = ff_h263_decode_frame,
     .capabilities          = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1 |
                              AV_CODEC_CAP_TRUNCATED | AV_CODEC_CAP_DELAY |
@@ -2854,30 +3564,19 @@
     .profiles              = NULL_IF_CONFIG_SMALL(ff_mpeg4_video_profiles),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(mpeg4_update_thread_context),
     .priv_class = &mpeg4_class,
-};
-
-
-#if CONFIG_MPEG4_VDPAU_DECODER && FF_API_VDPAU
-static const AVClass mpeg4_vdpau_class = {
-    "MPEG4 Video VDPAU Decoder",
-    av_default_item_name,
-    mpeg4_options,
-    LIBAVUTIL_VERSION_INT,
-};
-
-AVCodec ff_mpeg4_vdpau_decoder = {
-    .name           = "mpeg4_vdpau",
-    .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 (VDPAU)"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_MPEG4,
-    .priv_data_size = sizeof(Mpeg4DecContext),
-    .init           = decode_init,
-    .close          = ff_h263_decode_end,
-    .decode         = ff_h263_decode_frame,
-    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_TRUNCATED | AV_CODEC_CAP_DELAY |
-                      AV_CODEC_CAP_HWACCEL_VDPAU,
-    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_VDPAU_MPEG4,
-                                                  AV_PIX_FMT_NONE },
-    .priv_class     = &mpeg4_vdpau_class,
-};
+    .hw_configs            = (const AVCodecHWConfigInternal*[]) {
+#if CONFIG_MPEG4_NVDEC_HWACCEL
+                               HWACCEL_NVDEC(mpeg4),
 #endif
+#if CONFIG_MPEG4_VAAPI_HWACCEL
+                               HWACCEL_VAAPI(mpeg4),
+#endif
+#if CONFIG_MPEG4_VDPAU_HWACCEL
+                               HWACCEL_VDPAU(mpeg4),
+#endif
+#if CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL
+                               HWACCEL_VIDEOTOOLBOX(mpeg4),
+#endif
+                               NULL
+                           },
+};

diff --git a/libavcodec/mpeg4videoenc.c b/libavcodec/mpeg4videoenc.c
index 494452c..f6a5992 100644
--- a/libavcodec/mpeg4videoenc.c
+++ b/libavcodec/mpeg4videoenc.c

@@ -882,7 +882,7 @@
 
 static void mpeg4_encode_gop_header(MpegEncContext *s)
 {
-    int hours, minutes, seconds;
+    int64_t hours, minutes, seconds;
     int64_t time;
 
     put_bits(&s->pb, 16, 0);

diff --git a/libavcodec/mpegaudio_parser.c b/libavcodec/mpegaudio_parser.c
index 8c39825..a109f12 100644
--- a/libavcodec/mpegaudio_parser.c
+++ b/libavcodec/mpegaudio_parser.c

@@ -23,6 +23,7 @@
 #include "parser.h"
 #include "mpegaudiodecheader.h"
 #include "libavutil/common.h"
+#include "libavformat/apetag.h" // for APE tag.
 #include "libavformat/id3v1.h" // for ID3v1_TAG_SIZE
 
 typedef struct MpegAudioParseContext {
@@ -98,6 +99,8 @@
                     } else if (codec_id == AV_CODEC_ID_MP3ADU) {
                         avpriv_report_missing_feature(avctx,
                             "MP3ADU full parser");
+                        *poutbuf = NULL;
+                        *poutbuf_size = 0;
                         return 0; /* parsers must not return error codes */
                     }
 
@@ -120,6 +123,12 @@
         return next;
     }
 
+    if (flush && buf_size >= APE_TAG_FOOTER_BYTES && memcmp(buf, APE_TAG_PREAMBLE, 8) == 0) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return next;
+    }
+
     *poutbuf = buf;
     *poutbuf_size = buf_size;
     return next;

diff --git a/libavcodec/mpegaudiodata.h b/libavcodec/mpegaudiodata.h
index 29a2658..a188150 100644
--- a/libavcodec/mpegaudiodata.h
+++ b/libavcodec/mpegaudiodata.h

@@ -29,13 +29,13 @@
 
 #include <stdint.h>
 
-#include "libavutil/internal.h"
+#include "internal.h"
 
 #define MODE_EXT_MS_STEREO 2
 #define MODE_EXT_I_STEREO  1
 
-extern av_export const uint16_t avpriv_mpa_bitrate_tab[2][3][15];
-extern av_export const uint16_t avpriv_mpa_freq_tab[3];
+extern av_export_avcodec const uint16_t avpriv_mpa_bitrate_tab[2][3][15];
+extern av_export_avcodec const uint16_t avpriv_mpa_freq_tab[3];
 extern const int ff_mpa_sblimit_table[5];
 extern const int ff_mpa_quant_steps[17];
 extern const int ff_mpa_quant_bits[17];

diff --git a/libavcodec/mpegaudiodecheader.c b/libavcodec/mpegaudiodecheader.c
index ae86b08..6cc79f1 100644
--- a/libavcodec/mpegaudiodecheader.c
+++ b/libavcodec/mpegaudiodecheader.c

@@ -152,15 +152,3 @@
     *bit_rate = s->bit_rate;
     return s->frame_size;
 }
-
-#if LIBAVCODEC_VERSION_MAJOR < 58
-int avpriv_mpa_decode_header2(uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate, enum AVCodecID *codec_id)
-{
-    return ff_mpa_decode_header(head, sample_rate, channels, frame_size, bit_rate, codec_id);
-}
-
-int avpriv_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate)
-{
-    return ff_mpa_decode_header(head, sample_rate, channels, frame_size, bit_rate, &avctx->codec_id);
-}
-#endif

diff --git a/libavcodec/mpegaudiodecheader.h b/libavcodec/mpegaudiodecheader.h
index 952ba17..1da2a4c 100644
--- a/libavcodec/mpegaudiodecheader.h
+++ b/libavcodec/mpegaudiodecheader.h

@@ -57,16 +57,14 @@
 int ff_mpa_decode_header(uint32_t head, int *sample_rate,
                          int *channels, int *frame_size, int *bitrate, enum AVCodecID *codec_id);
 
-#if LIBAVCODEC_VERSION_MAJOR < 58
-int avpriv_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
-int avpriv_mpa_decode_header2(uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate, enum AVCodecID *codec_id);
-#endif
-
 /* fast header check for resync */
 static inline int ff_mpa_check_header(uint32_t header){
     /* header */
     if ((header & 0xffe00000) != 0xffe00000)
         return -1;
+    /* version check */
+    if ((header & (3<<19)) == 1<<19)
+        return -1;
     /* layer check */
     if ((header & (3<<17)) == 0)
         return -1;

diff --git a/libavcodec/mpegpicture.c b/libavcodec/mpegpicture.c
index 53fb35b..c0e0690 100644
--- a/libavcodec/mpegpicture.c
+++ b/libavcodec/mpegpicture.c

@@ -22,6 +22,7 @@
 
 #include "libavutil/avassert.h"
 #include "libavutil/common.h"
+#include "libavutil/pixdesc.h"
 
 #include "avcodec.h"
 #include "motion_est.h"
@@ -58,11 +59,7 @@
 {
     int alloc_size = FFALIGN(FFABS(linesize) + 64, 32);
 
-    if (avctx->hwaccel
-#if FF_API_CAP_VDPAU
-        || avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU
-#endif
-        )
+    if (avctx->hwaccel)
         return 0;
 
     if (linesize < 24) {
@@ -151,15 +148,18 @@
         }
     }
 
-    if (linesize && (linesize   != pic->f->linesize[0] ||
-                     uvlinesize != pic->f->linesize[1])) {
+    if ((linesize   &&   linesize != pic->f->linesize[0]) ||
+        (uvlinesize && uvlinesize != pic->f->linesize[1])) {
         av_log(avctx, AV_LOG_ERROR,
-               "get_buffer() failed (stride changed)\n");
+               "get_buffer() failed (stride changed: linesize=%d/%d uvlinesize=%d/%d)\n",
+               linesize,   pic->f->linesize[0],
+               uvlinesize, pic->f->linesize[1]);
         ff_mpeg_unref_picture(avctx, pic);
         return -1;
     }
 
-    if (pic->f->linesize[1] != pic->f->linesize[2]) {
+    if (av_pix_fmt_count_planes(pic->f->format) > 2 &&
+        pic->f->linesize[1] != pic->f->linesize[2]) {
         av_log(avctx, AV_LOG_ERROR,
                "get_buffer() failed (uv stride mismatch)\n");
         ff_mpeg_unref_picture(avctx, pic);
@@ -377,8 +377,10 @@
 
     if (src->hwaccel_picture_private) {
         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
-        if (!dst->hwaccel_priv_buf)
+        if (!dst->hwaccel_priv_buf) {
+            ret = AVERROR(ENOMEM);
             goto fail;
+        }
         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
     }
 

diff --git a/libavcodec/mpegutils.c b/libavcodec/mpegutils.c
index 62cc36a..3f94540 100644
--- a/libavcodec/mpegutils.c
+++ b/libavcodec/mpegutils.c

@@ -23,10 +23,31 @@
 #include "libavutil/common.h"
 #include "libavutil/frame.h"
 #include "libavutil/pixdesc.h"
+#include "libavutil/motion_vector.h"
+#include "libavutil/avassert.h"
 
 #include "avcodec.h"
 #include "mpegutils.h"
 
+static int add_mb(AVMotionVector *mb, uint32_t mb_type,
+                  int dst_x, int dst_y,
+                  int motion_x, int motion_y, int motion_scale,
+                  int direction)
+{
+    mb->w = IS_8X8(mb_type) || IS_8X16(mb_type) ? 8 : 16;
+    mb->h = IS_8X8(mb_type) || IS_16X8(mb_type) ? 8 : 16;
+    mb->motion_x = motion_x;
+    mb->motion_y = motion_y;
+    mb->motion_scale = motion_scale;
+    mb->dst_x = dst_x;
+    mb->dst_y = dst_y;
+    mb->src_x = dst_x + motion_x / motion_scale;
+    mb->src_y = dst_y + motion_y / motion_scale;
+    mb->source = direction ? 1 : -1;
+    mb->flags = 0; // XXX: does mb_type contain extra information that could be exported here?
+    return 1;
+}
+
 void ff_draw_horiz_band(AVCodecContext *avctx,
                         AVFrame *cur, AVFrame *last,
                         int y, int h, int picture_structure,
@@ -78,3 +99,295 @@
                                y, picture_structure, h);
     }
 }
+
+void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_table,
+                         uint32_t *mbtype_table, int8_t *qscale_table, int16_t (*motion_val[2])[2],
+                         int *low_delay,
+                         int mb_width, int mb_height, int mb_stride, int quarter_sample)
+{
+    if ((avctx->flags2 & AV_CODEC_FLAG2_EXPORT_MVS) && mbtype_table && motion_val[0]) {
+        const int shift = 1 + quarter_sample;
+        const int scale = 1 << shift;
+        const int mv_sample_log2 = avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_SVQ3 ? 2 : 1;
+        const int mv_stride      = (mb_width << mv_sample_log2) +
+                                   (avctx->codec->id == AV_CODEC_ID_H264 ? 0 : 1);
+        int mb_x, mb_y, mbcount = 0;
+
+        /* size is width * height * 2 * 4 where 2 is for directions and 4 is
+         * for the maximum number of MB (4 MB in case of IS_8x8) */
+        AVMotionVector *mvs = av_malloc_array(mb_width * mb_height, 2 * 4 * sizeof(AVMotionVector));
+        if (!mvs)
+            return;
+
+        for (mb_y = 0; mb_y < mb_height; mb_y++) {
+            for (mb_x = 0; mb_x < mb_width; mb_x++) {
+                int i, direction, mb_type = mbtype_table[mb_x + mb_y * mb_stride];
+                for (direction = 0; direction < 2; direction++) {
+                    if (!USES_LIST(mb_type, direction))
+                        continue;
+                    if (IS_8X8(mb_type)) {
+                        for (i = 0; i < 4; i++) {
+                            int sx = mb_x * 16 + 4 + 8 * (i & 1);
+                            int sy = mb_y * 16 + 4 + 8 * (i >> 1);
+                            int xy = (mb_x * 2 + (i & 1) +
+                                      (mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1);
+                            int mx = motion_val[direction][xy][0];
+                            int my = motion_val[direction][xy][1];
+                            mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, scale, direction);
+                        }
+                    } else if (IS_16X8(mb_type)) {
+                        for (i = 0; i < 2; i++) {
+                            int sx = mb_x * 16 + 8;
+                            int sy = mb_y * 16 + 4 + 8 * i;
+                            int xy = (mb_x * 2 + (mb_y * 2 + i) * mv_stride) << (mv_sample_log2 - 1);
+                            int mx = motion_val[direction][xy][0];
+                            int my = motion_val[direction][xy][1];
+
+                            if (IS_INTERLACED(mb_type))
+                                my *= 2;
+
+                            mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, scale, direction);
+                        }
+                    } else if (IS_8X16(mb_type)) {
+                        for (i = 0; i < 2; i++) {
+                            int sx = mb_x * 16 + 4 + 8 * i;
+                            int sy = mb_y * 16 + 8;
+                            int xy = (mb_x * 2 + i + mb_y * 2 * mv_stride) << (mv_sample_log2 - 1);
+                            int mx = motion_val[direction][xy][0];
+                            int my = motion_val[direction][xy][1];
+
+                            if (IS_INTERLACED(mb_type))
+                                my *= 2;
+
+                            mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, scale, direction);
+                        }
+                    } else {
+                          int sx = mb_x * 16 + 8;
+                          int sy = mb_y * 16 + 8;
+                          int xy = (mb_x + mb_y * mv_stride) << mv_sample_log2;
+                          int mx = motion_val[direction][xy][0];
+                          int my = motion_val[direction][xy][1];
+                          mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, scale, direction);
+                    }
+                }
+            }
+        }
+
+        if (mbcount) {
+            AVFrameSideData *sd;
+
+            av_log(avctx, AV_LOG_DEBUG, "Adding %d MVs info to frame %d\n", mbcount, avctx->frame_number);
+            sd = av_frame_new_side_data(pict, AV_FRAME_DATA_MOTION_VECTORS, mbcount * sizeof(AVMotionVector));
+            if (!sd) {
+                av_freep(&mvs);
+                return;
+            }
+            memcpy(sd->data, mvs, mbcount * sizeof(AVMotionVector));
+        }
+
+        av_freep(&mvs);
+    }
+
+    /* TODO: export all the following to make them accessible for users (and filters) */
+    if (avctx->hwaccel || !mbtype_table)
+        return;
+
+
+    if (avctx->debug & (FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)) {
+        int x,y;
+
+        av_log(avctx, AV_LOG_DEBUG, "New frame, type: %c\n",
+               av_get_picture_type_char(pict->pict_type));
+        for (y = 0; y < mb_height; y++) {
+            for (x = 0; x < mb_width; x++) {
+                if (avctx->debug & FF_DEBUG_SKIP) {
+                    int count = mbskip_table ? mbskip_table[x + y * mb_stride] : 0;
+                    if (count > 9)
+                        count = 9;
+                    av_log(avctx, AV_LOG_DEBUG, "%1d", count);
+                }
+                if (avctx->debug & FF_DEBUG_QP) {
+                    av_log(avctx, AV_LOG_DEBUG, "%2d",
+                           qscale_table[x + y * mb_stride]);
+                }
+                if (avctx->debug & FF_DEBUG_MB_TYPE) {
+                    int mb_type = mbtype_table[x + y * mb_stride];
+                    // Type & MV direction
+                    if (IS_PCM(mb_type))
+                        av_log(avctx, AV_LOG_DEBUG, "P");
+                    else if (IS_INTRA(mb_type) && IS_ACPRED(mb_type))
+                        av_log(avctx, AV_LOG_DEBUG, "A");
+                    else if (IS_INTRA4x4(mb_type))
+                        av_log(avctx, AV_LOG_DEBUG, "i");
+                    else if (IS_INTRA16x16(mb_type))
+                        av_log(avctx, AV_LOG_DEBUG, "I");
+                    else if (IS_DIRECT(mb_type) && IS_SKIP(mb_type))
+                        av_log(avctx, AV_LOG_DEBUG, "d");
+                    else if (IS_DIRECT(mb_type))
+                        av_log(avctx, AV_LOG_DEBUG, "D");
+                    else if (IS_GMC(mb_type) && IS_SKIP(mb_type))
+                        av_log(avctx, AV_LOG_DEBUG, "g");
+                    else if (IS_GMC(mb_type))
+                        av_log(avctx, AV_LOG_DEBUG, "G");
+                    else if (IS_SKIP(mb_type))
+                        av_log(avctx, AV_LOG_DEBUG, "S");
+                    else if (!USES_LIST(mb_type, 1))
+                        av_log(avctx, AV_LOG_DEBUG, ">");
+                    else if (!USES_LIST(mb_type, 0))
+                        av_log(avctx, AV_LOG_DEBUG, "<");
+                    else {
+                        av_assert2(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
+                        av_log(avctx, AV_LOG_DEBUG, "X");
+                    }
+
+                    // segmentation
+                    if (IS_8X8(mb_type))
+                        av_log(avctx, AV_LOG_DEBUG, "+");
+                    else if (IS_16X8(mb_type))
+                        av_log(avctx, AV_LOG_DEBUG, "-");
+                    else if (IS_8X16(mb_type))
+                        av_log(avctx, AV_LOG_DEBUG, "|");
+                    else if (IS_INTRA(mb_type) || IS_16X16(mb_type))
+                        av_log(avctx, AV_LOG_DEBUG, " ");
+                    else
+                        av_log(avctx, AV_LOG_DEBUG, "?");
+
+
+                    if (IS_INTERLACED(mb_type))
+                        av_log(avctx, AV_LOG_DEBUG, "=");
+                    else
+                        av_log(avctx, AV_LOG_DEBUG, " ");
+                }
+            }
+            av_log(avctx, AV_LOG_DEBUG, "\n");
+        }
+    }
+
+#if FF_API_DEBUG_MV
+    if ((avctx->debug & (FF_DEBUG_VIS_QP | FF_DEBUG_VIS_MB_TYPE)) ||
+        (avctx->debug_mv)) {
+        int mb_y;
+        int i, ret;
+        int h_chroma_shift, v_chroma_shift, block_height;
+        const int mv_sample_log2 = avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_SVQ3 ? 2 : 1;
+        const int mv_stride      = (mb_width << mv_sample_log2) +
+                                   (avctx->codec->id == AV_CODEC_ID_H264 ? 0 : 1);
+
+        if (low_delay)
+            *low_delay = 0; // needed to see the vectors without trashing the buffers
+
+        ret = av_pix_fmt_get_chroma_sub_sample (avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
+        if (ret)
+            return ret;
+
+        av_frame_make_writable(pict);
+
+        pict->opaque = NULL;
+        block_height = 16 >> v_chroma_shift;
+
+        for (mb_y = 0; mb_y < mb_height; mb_y++) {
+            int mb_x;
+            for (mb_x = 0; mb_x < mb_width; mb_x++) {
+                const int mb_index = mb_x + mb_y * mb_stride;
+                if ((avctx->debug & FF_DEBUG_VIS_QP)) {
+                    uint64_t c = (qscale_table[mb_index] * 128 / 31) *
+                                 0x0101010101010101ULL;
+                    int y;
+                    for (y = 0; y < block_height; y++) {
+                        *(uint64_t *)(pict->data[1] + 8 * mb_x +
+                                      (block_height * mb_y + y) *
+                                      pict->linesize[1]) = c;
+                        *(uint64_t *)(pict->data[2] + 8 * mb_x +
+                                      (block_height * mb_y + y) *
+                                      pict->linesize[2]) = c;
+                    }
+                }
+                if ((avctx->debug & FF_DEBUG_VIS_MB_TYPE) &&
+                    motion_val[0]) {
+                    int mb_type = mbtype_table[mb_index];
+                    uint64_t u,v;
+                    int y;
+#define COLOR(theta, r) \
+    u = (int)(128 + r * cos(theta * M_PI / 180)); \
+    v = (int)(128 + r * sin(theta * M_PI / 180));
+
+
+                    u = v = 128;
+                    if (IS_PCM(mb_type)) {
+                        COLOR(120, 48)
+                    } else if ((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) ||
+                               IS_INTRA16x16(mb_type)) {
+                        COLOR(30, 48)
+                    } else if (IS_INTRA4x4(mb_type)) {
+                        COLOR(90, 48)
+                    } else if (IS_DIRECT(mb_type) && IS_SKIP(mb_type)) {
+                        // COLOR(120, 48)
+                    } else if (IS_DIRECT(mb_type)) {
+                        COLOR(150, 48)
+                    } else if (IS_GMC(mb_type) && IS_SKIP(mb_type)) {
+                        COLOR(170, 48)
+                    } else if (IS_GMC(mb_type)) {
+                        COLOR(190, 48)
+                    } else if (IS_SKIP(mb_type)) {
+                        // COLOR(180, 48)
+                    } else if (!USES_LIST(mb_type, 1)) {
+                        COLOR(240, 48)
+                    } else if (!USES_LIST(mb_type, 0)) {
+                        COLOR(0, 48)
+                    } else {
+                        av_assert2(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
+                        COLOR(300,48)
+                    }
+
+                    u *= 0x0101010101010101ULL;
+                    v *= 0x0101010101010101ULL;
+                    for (y = 0; y < block_height; y++) {
+                        *(uint64_t *)(pict->data[1] + 8 * mb_x +
+                                      (block_height * mb_y + y) * pict->linesize[1]) = u;
+                        *(uint64_t *)(pict->data[2] + 8 * mb_x +
+                                      (block_height * mb_y + y) * pict->linesize[2]) = v;
+                    }
+
+                    // segmentation
+                    if (IS_8X8(mb_type) || IS_16X8(mb_type)) {
+                        *(uint64_t *)(pict->data[0] + 16 * mb_x + 0 +
+                                      (16 * mb_y + 8) * pict->linesize[0]) ^= 0x8080808080808080ULL;
+                        *(uint64_t *)(pict->data[0] + 16 * mb_x + 8 +
+                                      (16 * mb_y + 8) * pict->linesize[0]) ^= 0x8080808080808080ULL;
+                    }
+                    if (IS_8X8(mb_type) || IS_8X16(mb_type)) {
+                        for (y = 0; y < 16; y++)
+                            pict->data[0][16 * mb_x + 8 + (16 * mb_y + y) *
+                                          pict->linesize[0]] ^= 0x80;
+                    }
+                    if (IS_8X8(mb_type) && mv_sample_log2 >= 2) {
+                        int dm = 1 << (mv_sample_log2 - 2);
+                        for (i = 0; i < 4; i++) {
+                            int sx = mb_x * 16 + 8 * (i & 1);
+                            int sy = mb_y * 16 + 8 * (i >> 1);
+                            int xy = (mb_x * 2 + (i & 1) +
+                                     (mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1);
+                            // FIXME bidir
+                            int32_t *mv = (int32_t *) &motion_val[0][xy];
+                            if (mv[0] != mv[dm] ||
+                                mv[dm * mv_stride] != mv[dm * (mv_stride + 1)])
+                                for (y = 0; y < 8; y++)
+                                    pict->data[0][sx + 4 + (sy + y) * pict->linesize[0]] ^= 0x80;
+                            if (mv[0] != mv[dm * mv_stride] || mv[dm] != mv[dm * (mv_stride + 1)])
+                                *(uint64_t *)(pict->data[0] + sx + (sy + 4) *
+                                              pict->linesize[0]) ^= 0x8080808080808080ULL;
+                        }
+                    }
+
+                    if (IS_INTERLACED(mb_type) &&
+                        avctx->codec->id == AV_CODEC_ID_H264) {
+                        // hmm
+                    }
+                }
+                if (mbskip_table)
+                    mbskip_table[mb_index] = 0;
+            }
+        }
+    }
+#endif
+}

diff --git a/libavcodec/mpegutils.h b/libavcodec/mpegutils.h
index 9cfadfc..1ed21c1 100644
--- a/libavcodec/mpegutils.h
+++ b/libavcodec/mpegutils.h

@@ -48,7 +48,6 @@
 #define MAX_FCODE        7
 
 /* MB types */
-#if !FF_API_MB_TYPE
 #define MB_TYPE_INTRA4x4   (1 <<  0)
 #define MB_TYPE_INTRA16x16 (1 <<  1) // FIXME H.264-specific
 #define MB_TYPE_INTRA_PCM  (1 <<  2) // FIXME H.264-specific
@@ -70,7 +69,6 @@
 #define MB_TYPE_L0L1       (MB_TYPE_L0   | MB_TYPE_L1)
 #define MB_TYPE_QUANT      (1 << 16)
 #define MB_TYPE_CBP        (1 << 17)
-#endif
 
 #define MB_TYPE_INTRA    MB_TYPE_INTRA4x4 // default mb_type if there is just one type
 
@@ -139,4 +137,12 @@
                         int y, int h, int picture_structure, int first_field,
                         int low_delay);
 
+/**
+ * Print debugging info for the given picture.
+ */
+void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_table,
+                         uint32_t *mbtype_table, int8_t *qscale_table, int16_t (*motion_val[2])[2],
+                         int *low_delay,
+                         int mb_width, int mb_height, int mb_stride, int quarter_sample);
+
 #endif /* AVCODEC_MPEGUTILS_H */

diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index c408997..d4d3bea 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c

@@ -329,6 +329,8 @@
 
 av_cold void ff_mpv_idct_init(MpegEncContext *s)
 {
+    if (s->codec_id == AV_CODEC_ID_MPEG4)
+        s->idsp.mpeg4_studio_profile = s->studio_profile;
     ff_idctdsp_init(&s->idsp, s->avctx);
 
     /* load & permutate scantables
@@ -386,6 +388,11 @@
     for (i = 0; i < 12; i++) {
         s->pblocks[i] = &s->block[i];
     }
+
+    FF_ALLOCZ_OR_GOTO(s->avctx, s->block32, sizeof(*s->block32), fail)
+    s->dpcm_direction = 0;
+    FF_ALLOCZ_OR_GOTO(s->avctx, s->dpcm_macroblock, sizeof(*s->dpcm_macroblock), fail)
+
     if (s->avctx->codec_tag == AV_RL32("VCR2")) {
         // exchange uv
         FFSWAP(void *, s->pblocks[4], s->pblocks[5]);
@@ -421,6 +428,8 @@
     av_freep(&s->me.map);
     av_freep(&s->me.score_map);
     av_freep(&s->blocks);
+    av_freep(&s->block32);
+    av_freep(&s->dpcm_macroblock);
     av_freep(&s->ac_val_base);
     s->block = NULL;
 }
@@ -438,6 +447,9 @@
     COPY(me.score_map);
     COPY(blocks);
     COPY(block);
+    COPY(block32);
+    COPY(dpcm_macroblock);
+    COPY(dpcm_direction);
     COPY(start_mb_y);
     COPY(end_mb_y);
     COPY(me.map_generation);
@@ -811,7 +823,10 @@
     s->dct_error_sum = NULL;
     s->block = NULL;
     s->blocks = NULL;
+    s->block32 = NULL;
     memset(s->pblocks, 0, sizeof(s->pblocks));
+    s->dpcm_direction = 0;
+    s->dpcm_macroblock = NULL;
     s->ac_val_base = NULL;
     s->ac_val[0] =
     s->ac_val[1] =
@@ -876,7 +891,7 @@
  */
 av_cold int ff_mpv_common_init(MpegEncContext *s)
 {
-    int i;
+    int i, ret;
     int nb_slices = (HAVE_THREADS &&
                      s->avctx->active_thread_type & FF_THREAD_SLICE) ?
                     s->avctx->thread_count : 1;
@@ -915,10 +930,11 @@
     dct_init(s);
 
     /* set chroma shifts */
-    avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,
-                                  &s->chroma_x_shift,
-                                  &s->chroma_y_shift);
-
+    ret = av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
+                                           &s->chroma_x_shift,
+                                           &s->chroma_y_shift);
+    if (ret)
+        return ret;
 
     FF_ALLOCZ_OR_GOTO(s->avctx, s->picture,
                       MAX_PICTURE_COUNT * sizeof(Picture), fail);
@@ -1311,11 +1327,7 @@
             return -1;
         }
 
-        if (!avctx->hwaccel
-#if FF_API_CAP_VDPAU
-            && !(avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU)
-#endif
-            ) {
+        if (!avctx->hwaccel) {
             for(i=0; i<avctx->height; i++)
                 memset(s->last_picture_ptr->f->data[0] + s->last_picture_ptr->f->linesize[0]*i,
                        0x80, avctx->width);
@@ -1422,540 +1434,6 @@
         ff_thread_report_progress(&s->current_picture_ptr->tf, INT_MAX, 0);
 }
 
-
-#if FF_API_VISMV
-static int clip_line(int *sx, int *sy, int *ex, int *ey, int maxx)
-{
-    if(*sx > *ex)
-        return clip_line(ex, ey, sx, sy, maxx);
-
-    if (*sx < 0) {
-        if (*ex < 0)
-            return 1;
-        *sy = *ey + (*sy - *ey) * (int64_t)*ex / (*ex - *sx);
-        *sx = 0;
-    }
-
-    if (*ex > maxx) {
-        if (*sx > maxx)
-            return 1;
-        *ey = *sy + (*ey - *sy) * (int64_t)(maxx - *sx) / (*ex - *sx);
-        *ex = maxx;
-    }
-    return 0;
-}
-
-
-/**
- * Draw a line from (ex, ey) -> (sx, sy).
- * @param w width of the image
- * @param h height of the image
- * @param stride stride/linesize of the image
- * @param color color of the arrow
- */
-static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey,
-                      int w, int h, int stride, int color)
-{
-    int x, y, fr, f;
-
-    if (clip_line(&sx, &sy, &ex, &ey, w - 1))
-        return;
-    if (clip_line(&sy, &sx, &ey, &ex, h - 1))
-        return;
-
-    sx = av_clip(sx, 0, w - 1);
-    sy = av_clip(sy, 0, h - 1);
-    ex = av_clip(ex, 0, w - 1);
-    ey = av_clip(ey, 0, h - 1);
-
-    buf[sy * stride + sx] += color;
-
-    if (FFABS(ex - sx) > FFABS(ey - sy)) {
-        if (sx > ex) {
-            FFSWAP(int, sx, ex);
-            FFSWAP(int, sy, ey);
-        }
-        buf += sx + sy * stride;
-        ex  -= sx;
-        f    = ((ey - sy) << 16) / ex;
-        for (x = 0; x <= ex; x++) {
-            y  = (x * f) >> 16;
-            fr = (x * f) & 0xFFFF;
-            buf[y * stride + x]       += (color * (0x10000 - fr)) >> 16;
-            if(fr) buf[(y + 1) * stride + x] += (color *            fr ) >> 16;
-        }
-    } else {
-        if (sy > ey) {
-            FFSWAP(int, sx, ex);
-            FFSWAP(int, sy, ey);
-        }
-        buf += sx + sy * stride;
-        ey  -= sy;
-        if (ey)
-            f = ((ex - sx) << 16) / ey;
-        else
-            f = 0;
-        for(y= 0; y <= ey; y++){
-            x  = (y*f) >> 16;
-            fr = (y*f) & 0xFFFF;
-            buf[y * stride + x]     += (color * (0x10000 - fr)) >> 16;
-            if(fr) buf[y * stride + x + 1] += (color *            fr ) >> 16;
-        }
-    }
-}
-
-/**
- * Draw an arrow from (ex, ey) -> (sx, sy).
- * @param w width of the image
- * @param h height of the image
- * @param stride stride/linesize of the image
- * @param color color of the arrow
- */
-static void draw_arrow(uint8_t *buf, int sx, int sy, int ex,
-                       int ey, int w, int h, int stride, int color, int tail, int direction)
-{
-    int dx,dy;
-
-    if (direction) {
-        FFSWAP(int, sx, ex);
-        FFSWAP(int, sy, ey);
-    }
-
-    sx = av_clip(sx, -100, w + 100);
-    sy = av_clip(sy, -100, h + 100);
-    ex = av_clip(ex, -100, w + 100);
-    ey = av_clip(ey, -100, h + 100);
-
-    dx = ex - sx;
-    dy = ey - sy;
-
-    if (dx * dx + dy * dy > 3 * 3) {
-        int rx =  dx + dy;
-        int ry = -dx + dy;
-        int length = ff_sqrt((rx * rx + ry * ry) << 8);
-
-        // FIXME subpixel accuracy
-        rx = ROUNDED_DIV(rx * 3 << 4, length);
-        ry = ROUNDED_DIV(ry * 3 << 4, length);
-
-        if (tail) {
-            rx = -rx;
-            ry = -ry;
-        }
-
-        draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
-        draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
-    }
-    draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
-}
-#endif
-
-static int add_mb(AVMotionVector *mb, uint32_t mb_type,
-                  int dst_x, int dst_y,
-                  int motion_x, int motion_y, int motion_scale,
-                  int direction)
-{
-    mb->w = IS_8X8(mb_type) || IS_8X16(mb_type) ? 8 : 16;
-    mb->h = IS_8X8(mb_type) || IS_16X8(mb_type) ? 8 : 16;
-    mb->motion_x = motion_x;
-    mb->motion_y = motion_y;
-    mb->motion_scale = motion_scale;
-    mb->dst_x = dst_x;
-    mb->dst_y = dst_y;
-    mb->src_x = dst_x + motion_x / motion_scale;
-    mb->src_y = dst_y + motion_y / motion_scale;
-    mb->source = direction ? 1 : -1;
-    mb->flags = 0; // XXX: does mb_type contain extra information that could be exported here?
-    return 1;
-}
-
-/**
- * Print debugging info for the given picture.
- */
-void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_table,
-                         uint32_t *mbtype_table, int8_t *qscale_table, int16_t (*motion_val[2])[2],
-                         int *low_delay,
-                         int mb_width, int mb_height, int mb_stride, int quarter_sample)
-{
-    if ((avctx->flags2 & AV_CODEC_FLAG2_EXPORT_MVS) && mbtype_table && motion_val[0]) {
-        const int shift = 1 + quarter_sample;
-        const int scale = 1 << shift;
-        const int mv_sample_log2 = avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_SVQ3 ? 2 : 1;
-        const int mv_stride      = (mb_width << mv_sample_log2) +
-                                   (avctx->codec->id == AV_CODEC_ID_H264 ? 0 : 1);
-        int mb_x, mb_y, mbcount = 0;
-
-        /* size is width * height * 2 * 4 where 2 is for directions and 4 is
-         * for the maximum number of MB (4 MB in case of IS_8x8) */
-        AVMotionVector *mvs = av_malloc_array(mb_width * mb_height, 2 * 4 * sizeof(AVMotionVector));
-        if (!mvs)
-            return;
-
-        for (mb_y = 0; mb_y < mb_height; mb_y++) {
-            for (mb_x = 0; mb_x < mb_width; mb_x++) {
-                int i, direction, mb_type = mbtype_table[mb_x + mb_y * mb_stride];
-                for (direction = 0; direction < 2; direction++) {
-                    if (!USES_LIST(mb_type, direction))
-                        continue;
-                    if (IS_8X8(mb_type)) {
-                        for (i = 0; i < 4; i++) {
-                            int sx = mb_x * 16 + 4 + 8 * (i & 1);
-                            int sy = mb_y * 16 + 4 + 8 * (i >> 1);
-                            int xy = (mb_x * 2 + (i & 1) +
-                                      (mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1);
-                            int mx = motion_val[direction][xy][0];
-                            int my = motion_val[direction][xy][1];
-                            mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, scale, direction);
-                        }
-                    } else if (IS_16X8(mb_type)) {
-                        for (i = 0; i < 2; i++) {
-                            int sx = mb_x * 16 + 8;
-                            int sy = mb_y * 16 + 4 + 8 * i;
-                            int xy = (mb_x * 2 + (mb_y * 2 + i) * mv_stride) << (mv_sample_log2 - 1);
-                            int mx = motion_val[direction][xy][0];
-                            int my = motion_val[direction][xy][1];
-
-                            if (IS_INTERLACED(mb_type))
-                                my *= 2;
-
-                            mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, scale, direction);
-                        }
-                    } else if (IS_8X16(mb_type)) {
-                        for (i = 0; i < 2; i++) {
-                            int sx = mb_x * 16 + 4 + 8 * i;
-                            int sy = mb_y * 16 + 8;
-                            int xy = (mb_x * 2 + i + mb_y * 2 * mv_stride) << (mv_sample_log2 - 1);
-                            int mx = motion_val[direction][xy][0];
-                            int my = motion_val[direction][xy][1];
-
-                            if (IS_INTERLACED(mb_type))
-                                my *= 2;
-
-                            mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, scale, direction);
-                        }
-                    } else {
-                          int sx = mb_x * 16 + 8;
-                          int sy = mb_y * 16 + 8;
-                          int xy = (mb_x + mb_y * mv_stride) << mv_sample_log2;
-                          int mx = motion_val[direction][xy][0];
-                          int my = motion_val[direction][xy][1];
-                          mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, scale, direction);
-                    }
-                }
-            }
-        }
-
-        if (mbcount) {
-            AVFrameSideData *sd;
-
-            av_log(avctx, AV_LOG_DEBUG, "Adding %d MVs info to frame %d\n", mbcount, avctx->frame_number);
-            sd = av_frame_new_side_data(pict, AV_FRAME_DATA_MOTION_VECTORS, mbcount * sizeof(AVMotionVector));
-            if (!sd) {
-                av_freep(&mvs);
-                return;
-            }
-            memcpy(sd->data, mvs, mbcount * sizeof(AVMotionVector));
-        }
-
-        av_freep(&mvs);
-    }
-
-    /* TODO: export all the following to make them accessible for users (and filters) */
-    if (avctx->hwaccel || !mbtype_table
-#if FF_API_CAP_VDPAU
-        || (avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU)
-#endif
-        )
-        return;
-
-
-    if (avctx->debug & (FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)) {
-        int x,y;
-
-        av_log(avctx, AV_LOG_DEBUG, "New frame, type: %c\n",
-               av_get_picture_type_char(pict->pict_type));
-        for (y = 0; y < mb_height; y++) {
-            for (x = 0; x < mb_width; x++) {
-                if (avctx->debug & FF_DEBUG_SKIP) {
-                    int count = mbskip_table ? mbskip_table[x + y * mb_stride] : 0;
-                    if (count > 9)
-                        count = 9;
-                    av_log(avctx, AV_LOG_DEBUG, "%1d", count);
-                }
-                if (avctx->debug & FF_DEBUG_QP) {
-                    av_log(avctx, AV_LOG_DEBUG, "%2d",
-                           qscale_table[x + y * mb_stride]);
-                }
-                if (avctx->debug & FF_DEBUG_MB_TYPE) {
-                    int mb_type = mbtype_table[x + y * mb_stride];
-                    // Type & MV direction
-                    if (IS_PCM(mb_type))
-                        av_log(avctx, AV_LOG_DEBUG, "P");
-                    else if (IS_INTRA(mb_type) && IS_ACPRED(mb_type))
-                        av_log(avctx, AV_LOG_DEBUG, "A");
-                    else if (IS_INTRA4x4(mb_type))
-                        av_log(avctx, AV_LOG_DEBUG, "i");
-                    else if (IS_INTRA16x16(mb_type))
-                        av_log(avctx, AV_LOG_DEBUG, "I");
-                    else if (IS_DIRECT(mb_type) && IS_SKIP(mb_type))
-                        av_log(avctx, AV_LOG_DEBUG, "d");
-                    else if (IS_DIRECT(mb_type))
-                        av_log(avctx, AV_LOG_DEBUG, "D");
-                    else if (IS_GMC(mb_type) && IS_SKIP(mb_type))
-                        av_log(avctx, AV_LOG_DEBUG, "g");
-                    else if (IS_GMC(mb_type))
-                        av_log(avctx, AV_LOG_DEBUG, "G");
-                    else if (IS_SKIP(mb_type))
-                        av_log(avctx, AV_LOG_DEBUG, "S");
-                    else if (!USES_LIST(mb_type, 1))
-                        av_log(avctx, AV_LOG_DEBUG, ">");
-                    else if (!USES_LIST(mb_type, 0))
-                        av_log(avctx, AV_LOG_DEBUG, "<");
-                    else {
-                        av_assert2(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
-                        av_log(avctx, AV_LOG_DEBUG, "X");
-                    }
-
-                    // segmentation
-                    if (IS_8X8(mb_type))
-                        av_log(avctx, AV_LOG_DEBUG, "+");
-                    else if (IS_16X8(mb_type))
-                        av_log(avctx, AV_LOG_DEBUG, "-");
-                    else if (IS_8X16(mb_type))
-                        av_log(avctx, AV_LOG_DEBUG, "|");
-                    else if (IS_INTRA(mb_type) || IS_16X16(mb_type))
-                        av_log(avctx, AV_LOG_DEBUG, " ");
-                    else
-                        av_log(avctx, AV_LOG_DEBUG, "?");
-
-
-                    if (IS_INTERLACED(mb_type))
-                        av_log(avctx, AV_LOG_DEBUG, "=");
-                    else
-                        av_log(avctx, AV_LOG_DEBUG, " ");
-                }
-            }
-            av_log(avctx, AV_LOG_DEBUG, "\n");
-        }
-    }
-
-#if FF_API_DEBUG_MV
-    if ((avctx->debug & (FF_DEBUG_VIS_QP | FF_DEBUG_VIS_MB_TYPE)) ||
-        (avctx->debug_mv)) {
-        int mb_y;
-        int i;
-        int h_chroma_shift, v_chroma_shift, block_height;
-#if FF_API_VISMV
-        const int shift = 1 + quarter_sample;
-        uint8_t *ptr;
-        const int width          = avctx->width;
-        const int height         = avctx->height;
-#endif
-        const int mv_sample_log2 = avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_SVQ3 ? 2 : 1;
-        const int mv_stride      = (mb_width << mv_sample_log2) +
-                                   (avctx->codec->id == AV_CODEC_ID_H264 ? 0 : 1);
-
-        if (low_delay)
-            *low_delay = 0; // needed to see the vectors without trashing the buffers
-
-        avcodec_get_chroma_sub_sample(avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
-
-        av_frame_make_writable(pict);
-
-        pict->opaque = NULL;
-#if FF_API_VISMV
-        ptr          = pict->data[0];
-#endif
-        block_height = 16 >> v_chroma_shift;
-
-        for (mb_y = 0; mb_y < mb_height; mb_y++) {
-            int mb_x;
-            for (mb_x = 0; mb_x < mb_width; mb_x++) {
-                const int mb_index = mb_x + mb_y * mb_stride;
-#if FF_API_VISMV
-                if ((avctx->debug_mv) && motion_val[0]) {
-                    int type;
-                    for (type = 0; type < 3; type++) {
-                        int direction = 0;
-                        switch (type) {
-                        case 0:
-                            if ((!(avctx->debug_mv & FF_DEBUG_VIS_MV_P_FOR)) ||
-                                (pict->pict_type!= AV_PICTURE_TYPE_P))
-                                continue;
-                            direction = 0;
-                            break;
-                        case 1:
-                            if ((!(avctx->debug_mv & FF_DEBUG_VIS_MV_B_FOR)) ||
-                                (pict->pict_type!= AV_PICTURE_TYPE_B))
-                                continue;
-                            direction = 0;
-                            break;
-                        case 2:
-                            if ((!(avctx->debug_mv & FF_DEBUG_VIS_MV_B_BACK)) ||
-                                (pict->pict_type!= AV_PICTURE_TYPE_B))
-                                continue;
-                            direction = 1;
-                            break;
-                        }
-                        if (!USES_LIST(mbtype_table[mb_index], direction))
-                            continue;
-
-                        if (IS_8X8(mbtype_table[mb_index])) {
-                            int i;
-                            for (i = 0; i < 4; i++) {
-                                int sx = mb_x * 16 + 4 + 8 * (i & 1);
-                                int sy = mb_y * 16 + 4 + 8 * (i >> 1);
-                                int xy = (mb_x * 2 + (i & 1) +
-                                          (mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1);
-                                int mx = (motion_val[direction][xy][0] >> shift) + sx;
-                                int my = (motion_val[direction][xy][1] >> shift) + sy;
-                                draw_arrow(ptr, sx, sy, mx, my, width,
-                                           height, pict->linesize[0], 100, 0, direction);
-                            }
-                        } else if (IS_16X8(mbtype_table[mb_index])) {
-                            int i;
-                            for (i = 0; i < 2; i++) {
-                                int sx = mb_x * 16 + 8;
-                                int sy = mb_y * 16 + 4 + 8 * i;
-                                int xy = (mb_x * 2 + (mb_y * 2 + i) * mv_stride) << (mv_sample_log2 - 1);
-                                int mx = (motion_val[direction][xy][0] >> shift);
-                                int my = (motion_val[direction][xy][1] >> shift);
-
-                                if (IS_INTERLACED(mbtype_table[mb_index]))
-                                    my *= 2;
-
-                                draw_arrow(ptr, sx, sy, mx + sx, my + sy, width,
-                                           height, pict->linesize[0], 100, 0, direction);
-                            }
-                        } else if (IS_8X16(mbtype_table[mb_index])) {
-                            int i;
-                            for (i = 0; i < 2; i++) {
-                                int sx = mb_x * 16 + 4 + 8 * i;
-                                int sy = mb_y * 16 + 8;
-                                int xy = (mb_x * 2 + i + mb_y * 2 * mv_stride) << (mv_sample_log2 - 1);
-                                int mx = motion_val[direction][xy][0] >> shift;
-                                int my = motion_val[direction][xy][1] >> shift;
-
-                                if (IS_INTERLACED(mbtype_table[mb_index]))
-                                    my *= 2;
-
-                                draw_arrow(ptr, sx, sy, mx + sx, my + sy, width,
-                                           height, pict->linesize[0], 100, 0, direction);
-                            }
-                        } else {
-                              int sx= mb_x * 16 + 8;
-                              int sy= mb_y * 16 + 8;
-                              int xy= (mb_x + mb_y * mv_stride) << mv_sample_log2;
-                              int mx= (motion_val[direction][xy][0]>>shift) + sx;
-                              int my= (motion_val[direction][xy][1]>>shift) + sy;
-                              draw_arrow(ptr, sx, sy, mx, my, width, height, pict->linesize[0], 100, 0, direction);
-                        }
-                    }
-                }
-#endif
-                if ((avctx->debug & FF_DEBUG_VIS_QP)) {
-                    uint64_t c = (qscale_table[mb_index] * 128 / 31) *
-                                 0x0101010101010101ULL;
-                    int y;
-                    for (y = 0; y < block_height; y++) {
-                        *(uint64_t *)(pict->data[1] + 8 * mb_x +
-                                      (block_height * mb_y + y) *
-                                      pict->linesize[1]) = c;
-                        *(uint64_t *)(pict->data[2] + 8 * mb_x +
-                                      (block_height * mb_y + y) *
-                                      pict->linesize[2]) = c;
-                    }
-                }
-                if ((avctx->debug & FF_DEBUG_VIS_MB_TYPE) &&
-                    motion_val[0]) {
-                    int mb_type = mbtype_table[mb_index];
-                    uint64_t u,v;
-                    int y;
-#define COLOR(theta, r) \
-    u = (int)(128 + r * cos(theta * M_PI / 180)); \
-    v = (int)(128 + r * sin(theta * M_PI / 180));
-
-
-                    u = v = 128;
-                    if (IS_PCM(mb_type)) {
-                        COLOR(120, 48)
-                    } else if ((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) ||
-                               IS_INTRA16x16(mb_type)) {
-                        COLOR(30, 48)
-                    } else if (IS_INTRA4x4(mb_type)) {
-                        COLOR(90, 48)
-                    } else if (IS_DIRECT(mb_type) && IS_SKIP(mb_type)) {
-                        // COLOR(120, 48)
-                    } else if (IS_DIRECT(mb_type)) {
-                        COLOR(150, 48)
-                    } else if (IS_GMC(mb_type) && IS_SKIP(mb_type)) {
-                        COLOR(170, 48)
-                    } else if (IS_GMC(mb_type)) {
-                        COLOR(190, 48)
-                    } else if (IS_SKIP(mb_type)) {
-                        // COLOR(180, 48)
-                    } else if (!USES_LIST(mb_type, 1)) {
-                        COLOR(240, 48)
-                    } else if (!USES_LIST(mb_type, 0)) {
-                        COLOR(0, 48)
-                    } else {
-                        av_assert2(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
-                        COLOR(300,48)
-                    }
-
-                    u *= 0x0101010101010101ULL;
-                    v *= 0x0101010101010101ULL;
-                    for (y = 0; y < block_height; y++) {
-                        *(uint64_t *)(pict->data[1] + 8 * mb_x +
-                                      (block_height * mb_y + y) * pict->linesize[1]) = u;
-                        *(uint64_t *)(pict->data[2] + 8 * mb_x +
-                                      (block_height * mb_y + y) * pict->linesize[2]) = v;
-                    }
-
-                    // segmentation
-                    if (IS_8X8(mb_type) || IS_16X8(mb_type)) {
-                        *(uint64_t *)(pict->data[0] + 16 * mb_x + 0 +
-                                      (16 * mb_y + 8) * pict->linesize[0]) ^= 0x8080808080808080ULL;
-                        *(uint64_t *)(pict->data[0] + 16 * mb_x + 8 +
-                                      (16 * mb_y + 8) * pict->linesize[0]) ^= 0x8080808080808080ULL;
-                    }
-                    if (IS_8X8(mb_type) || IS_8X16(mb_type)) {
-                        for (y = 0; y < 16; y++)
-                            pict->data[0][16 * mb_x + 8 + (16 * mb_y + y) *
-                                          pict->linesize[0]] ^= 0x80;
-                    }
-                    if (IS_8X8(mb_type) && mv_sample_log2 >= 2) {
-                        int dm = 1 << (mv_sample_log2 - 2);
-                        for (i = 0; i < 4; i++) {
-                            int sx = mb_x * 16 + 8 * (i & 1);
-                            int sy = mb_y * 16 + 8 * (i >> 1);
-                            int xy = (mb_x * 2 + (i & 1) +
-                                     (mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1);
-                            // FIXME bidir
-                            int32_t *mv = (int32_t *) &motion_val[0][xy];
-                            if (mv[0] != mv[dm] ||
-                                mv[dm * mv_stride] != mv[dm * (mv_stride + 1)])
-                                for (y = 0; y < 8; y++)
-                                    pict->data[0][sx + 4 + (sy + y) * pict->linesize[0]] ^= 0x80;
-                            if (mv[0] != mv[dm * mv_stride] || mv[dm] != mv[dm * (mv_stride + 1)])
-                                *(uint64_t *)(pict->data[0] + sx + (sy + 4) *
-                                              pict->linesize[0]) ^= 0x8080808080808080ULL;
-                        }
-                    }
-
-                    if (IS_INTERLACED(mb_type) &&
-                        avctx->codec->id == AV_CODEC_ID_H264) {
-                        // hmm
-                    }
-                }
-                if (mbskip_table)
-                    mbskip_table[mb_index] = 0;
-            }
-        }
-    }
-#endif
-}
-
 void ff_print_debug_info(MpegEncContext *s, Picture *p, AVFrame *pict)
 {
     ff_print_debug_info2(s->avctx, pict, s->mbskip_table, p->mb_type,
@@ -2657,8 +2135,63 @@
                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
             }
         } else {
+            /* Only MPEG-4 Simple Studio Profile is supported in > 8-bit mode.
+               TODO: Integrate 10-bit properly into mpegvideo.c so that ER works properly */
+            if (s->avctx->bits_per_raw_sample > 8){
+                const int act_block_size = block_size * 2;
+
+                if(s->dpcm_direction == 0) {
+                    s->idsp.idct_put(dest_y,                           dct_linesize, (int16_t*)(*s->block32)[0]);
+                    s->idsp.idct_put(dest_y              + act_block_size, dct_linesize, (int16_t*)(*s->block32)[1]);
+                    s->idsp.idct_put(dest_y + dct_offset,              dct_linesize, (int16_t*)(*s->block32)[2]);
+                    s->idsp.idct_put(dest_y + dct_offset + act_block_size, dct_linesize, (int16_t*)(*s->block32)[3]);
+
+                    dct_linesize = uvlinesize << s->interlaced_dct;
+                    dct_offset   = s->interlaced_dct ? uvlinesize : uvlinesize*block_size;
+
+                    s->idsp.idct_put(dest_cb,              dct_linesize, (int16_t*)(*s->block32)[4]);
+                    s->idsp.idct_put(dest_cr,              dct_linesize, (int16_t*)(*s->block32)[5]);
+                    s->idsp.idct_put(dest_cb + dct_offset, dct_linesize, (int16_t*)(*s->block32)[6]);
+                    s->idsp.idct_put(dest_cr + dct_offset, dct_linesize, (int16_t*)(*s->block32)[7]);
+                    if(!s->chroma_x_shift){//Chroma444
+                        s->idsp.idct_put(dest_cb + act_block_size,              dct_linesize, (int16_t*)(*s->block32)[8]);
+                        s->idsp.idct_put(dest_cr + act_block_size,              dct_linesize, (int16_t*)(*s->block32)[9]);
+                        s->idsp.idct_put(dest_cb + act_block_size + dct_offset, dct_linesize, (int16_t*)(*s->block32)[10]);
+                        s->idsp.idct_put(dest_cr + act_block_size + dct_offset, dct_linesize, (int16_t*)(*s->block32)[11]);
+                    }
+                } else if(s->dpcm_direction == 1) {
+                    int i, w, h;
+                    uint16_t *dest_pcm[3] = {(uint16_t*)dest_y, (uint16_t*)dest_cb, (uint16_t*)dest_cr};
+                    int linesize[3] = {dct_linesize, uvlinesize, uvlinesize};
+                    for(i = 0; i < 3; i++) {
+                        int idx = 0;
+                        int vsub = i ? s->chroma_y_shift : 0;
+                        int hsub = i ? s->chroma_x_shift : 0;
+                        for(h = 0; h < (16 >> vsub); h++){
+                            for(w = 0; w < (16 >> hsub); w++)
+                                dest_pcm[i][w] = (*s->dpcm_macroblock)[i][idx++];
+                            dest_pcm[i] += linesize[i] / 2;
+                        }
+                    }
+                } else if(s->dpcm_direction == -1) {
+                    int i, w, h;
+                    uint16_t *dest_pcm[3] = {(uint16_t*)dest_y, (uint16_t*)dest_cb, (uint16_t*)dest_cr};
+                    int linesize[3] = {dct_linesize, uvlinesize, uvlinesize};
+                    for(i = 0; i < 3; i++) {
+                        int idx = 0;
+                        int vsub = i ? s->chroma_y_shift : 0;
+                        int hsub = i ? s->chroma_x_shift : 0;
+                        dest_pcm[i] += (linesize[i] / 2) * ((16 >> vsub) - 1);
+                        for(h = (16 >> vsub)-1; h >= 1; h--){
+                            for(w = (16 >> hsub)-1; w >= 1; w--)
+                                dest_pcm[i][w] = (*s->dpcm_macroblock)[i][idx++];
+                            dest_pcm[i] -= linesize[i] / 2;
+                        }
+                    }
+                }
+            }
             /* dct only in intra block */
-            if(s->encoding || !(s->codec_id==AV_CODEC_ID_MPEG1VIDEO || s->codec_id==AV_CODEC_ID_MPEG2VIDEO)){
+            else if(s->encoding || !(s->codec_id==AV_CODEC_ID_MPEG1VIDEO || s->codec_id==AV_CODEC_ID_MPEG2VIDEO)){
                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
@@ -2739,7 +2272,8 @@
 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
     const int linesize   = s->current_picture.f->linesize[0]; //not s->linesize as this would be wrong for field pics
     const int uvlinesize = s->current_picture.f->linesize[1];
-    const int mb_size= 4 - s->avctx->lowres;
+    const int width_of_mb = (4 + (s->avctx->bits_per_raw_sample > 8)) - s->avctx->lowres;
+    const int height_of_mb = 4 - s->avctx->lowres;
 
     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
@@ -2749,20 +2283,20 @@
     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
     //block_index is not used by mpeg2, so it is not affected by chroma_format
 
-    s->dest[0] = s->current_picture.f->data[0] + (int)((s->mb_x - 1U) <<  mb_size);
-    s->dest[1] = s->current_picture.f->data[1] + (int)((s->mb_x - 1U) << (mb_size - s->chroma_x_shift));
-    s->dest[2] = s->current_picture.f->data[2] + (int)((s->mb_x - 1U) << (mb_size - s->chroma_x_shift));
+    s->dest[0] = s->current_picture.f->data[0] + (int)((s->mb_x - 1U) <<  width_of_mb);
+    s->dest[1] = s->current_picture.f->data[1] + (int)((s->mb_x - 1U) << (width_of_mb - s->chroma_x_shift));
+    s->dest[2] = s->current_picture.f->data[2] + (int)((s->mb_x - 1U) << (width_of_mb - s->chroma_x_shift));
 
     if(!(s->pict_type==AV_PICTURE_TYPE_B && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
     {
         if(s->picture_structure==PICT_FRAME){
-        s->dest[0] += s->mb_y *   linesize << mb_size;
-        s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
-        s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
+        s->dest[0] += s->mb_y *   linesize << height_of_mb;
+        s->dest[1] += s->mb_y * uvlinesize << (height_of_mb - s->chroma_y_shift);
+        s->dest[2] += s->mb_y * uvlinesize << (height_of_mb - s->chroma_y_shift);
         }else{
-            s->dest[0] += (s->mb_y>>1) *   linesize << mb_size;
-            s->dest[1] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
-            s->dest[2] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
+            s->dest[0] += (s->mb_y>>1) *   linesize << height_of_mb;
+            s->dest[1] += (s->mb_y>>1) * uvlinesize << (height_of_mb - s->chroma_y_shift);
+            s->dest[2] += (s->mb_y>>1) * uvlinesize << (height_of_mb - s->chroma_y_shift);
             av_assert1((s->mb_y&1) == (s->picture_structure == PICT_BOTTOM_FIELD));
         }
     }

diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index e9eb633..bbc6b56 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h

@@ -45,6 +45,7 @@
 #include "mpegpicture.h"
 #include "mpegvideodsp.h"
 #include "mpegvideoencdsp.h"
+#include "mpegvideodata.h"
 #include "pixblockdsp.h"
 #include "put_bits.h"
 #include "ratecontrol.h"
@@ -71,6 +72,8 @@
 #define SLICE_MAX_START_CODE    0x000001af
 #define EXT_START_CODE          0x000001b5
 #define USER_START_CODE         0x000001b2
+#define SLICE_START_CODE        0x000001b7
+
 
 /**
  * MpegEncContext.
@@ -252,9 +255,6 @@
     int16_t (*b_field_mv_table[2][2][2])[2];///< MV table (4MV per MB) interlaced B-frame encoding
     uint8_t (*p_field_select_table[2]);
     uint8_t (*b_field_select_table[2][2]);
-#if FF_API_MOTION_EST
-    int me_method;                       ///< ME algorithm
-#endif
     int motion_est;                      ///< ME algorithm
     int me_penalty_compensation;
     int me_pre;                          ///< prepass for motion estimation
@@ -369,7 +369,7 @@
     uint8_t *mb_info_ptr;
     int mb_info_size;
     int ehc_mode;
-    int rc_strategy;
+    int rc_strategy;                ///< deprecated
 
     /* H.263+ specific */
     int umvplus;                    ///< == H.263+ && unrestricted_mv
@@ -381,6 +381,8 @@
     int custom_pcf;
 
     /* MPEG-4 specific */
+    int studio_profile;
+    int dct_precision;
     ///< number of bits to represent the fractional part of time (encoder only)
     int time_increment_bits;
     int last_time_base;
@@ -467,6 +469,13 @@
     int intra_vlc_format;
     int alternate_scan;
     int seq_disp_ext;
+    int video_format;
+#define VIDEO_FORMAT_COMPONENT   0
+#define VIDEO_FORMAT_PAL         1
+#define VIDEO_FORMAT_NTSC        2
+#define VIDEO_FORMAT_SECAM       3
+#define VIDEO_FORMAT_MAC         4
+#define VIDEO_FORMAT_UNSPECIFIED 5
     int repeat_first_field;
     int chroma_420_type;
     int chroma_format;
@@ -497,7 +506,12 @@
 
     int16_t (*block)[64]; ///< points to one of the following blocks
     int16_t (*blocks)[12][64]; // for HQ mode we need to keep the best block
-    int (*decode_mb)(struct MpegEncContext *s, int16_t block[6][64]); // used by some codecs to avoid a switch()
+    int (*decode_mb)(struct MpegEncContext *s, int16_t block[12][64]); // used by some codecs to avoid a switch()
+
+    int32_t (*block32)[12][64];
+    int dpcm_direction;          // 0 = DCT, 1 = DPCM top to bottom scan, -1 = DPCM bottom to top scan
+    int16_t (*dpcm_macroblock)[3][256];
+
 #define SLICE_OK         0
 #define SLICE_ERROR     -1
 #define SLICE_END       -2 ///<end marker found
@@ -575,12 +589,6 @@
 #define FF_MPV_FLAG_NAQ          0x0010
 #define FF_MPV_FLAG_MV0          0x0020
 
-enum rc_strategy {
-    MPV_RC_STRATEGY_FFMPEG,
-    MPV_RC_STRATEGY_XVID,
-    NB_MPV_RC_STRATEGY
-};
-
 #define FF_MPV_OPT_CMP_FUNC \
 { "sad",    "Sum of absolute differences, fast", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SAD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
 { "sse",    "Sum of squared errors", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SSE }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
@@ -634,9 +642,9 @@
 {"lmax", "maximum Lagrange factor (VBR)",                           FF_MPV_OFFSET(lmax), AV_OPT_TYPE_INT, {.i64 = 31*FF_QP2LAMBDA }, 0, INT_MAX, FF_MPV_OPT_FLAGS },            \
 {"ibias", "intra quant bias",                                       FF_MPV_OFFSET(intra_quant_bias), AV_OPT_TYPE_INT, {.i64 = FF_DEFAULT_QUANT_BIAS }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS },   \
 {"pbias", "inter quant bias",                                       FF_MPV_OFFSET(inter_quant_bias), AV_OPT_TYPE_INT, {.i64 = FF_DEFAULT_QUANT_BIAS }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS },   \
-{"rc_strategy", "ratecontrol method",                               FF_MPV_OFFSET(rc_strategy), AV_OPT_TYPE_INT, {.i64 = MPV_RC_STRATEGY_FFMPEG }, 0, NB_MPV_RC_STRATEGY-1, FF_MPV_OPT_FLAGS, "rc_strategy" },   \
-    { "ffmpeg", "default native rate control", 0, AV_OPT_TYPE_CONST, { .i64 = MPV_RC_STRATEGY_FFMPEG }, 0, 0, FF_MPV_OPT_FLAGS, "rc_strategy" }, \
-    { "xvid",   "libxvid (2 pass only)",       0, AV_OPT_TYPE_CONST, { .i64 = MPV_RC_STRATEGY_XVID },   0, 0, FF_MPV_OPT_FLAGS, "rc_strategy" }, \
+{"rc_strategy", "ratecontrol method",                               FF_MPV_OFFSET(rc_strategy), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, FF_MPV_OPT_FLAGS | AV_OPT_FLAG_DEPRECATED, "rc_strategy" },   \
+    { "ffmpeg", "deprecated, does nothing", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FF_MPV_OPT_FLAGS | AV_OPT_FLAG_DEPRECATED, "rc_strategy" }, \
+    { "xvid",   "deprecated, does nothing", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FF_MPV_OPT_FLAGS | AV_OPT_FLAG_DEPRECATED, "rc_strategy" }, \
 {"motion_est", "motion estimation algorithm",                       FF_MPV_OFFSET(motion_est), AV_OPT_TYPE_INT, {.i64 = FF_ME_EPZS }, FF_ME_ZERO, FF_ME_XONE, FF_MPV_OPT_FLAGS, "motion_est" },   \
 { "zero", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_ZERO }, 0, 0, FF_MPV_OPT_FLAGS, "motion_est" }, \
 { "epzs", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_EPZS }, 0, 0, FF_MPV_OPT_FLAGS, "motion_est" }, \
@@ -699,10 +707,6 @@
 void ff_mpeg_flush(AVCodecContext *avctx);
 
 void ff_print_debug_info(MpegEncContext *s, Picture *p, AVFrame *pict);
-void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_table,
-                         uint32_t *mbtype_table, int8_t *qscale_table, int16_t (*motion_val[2])[2],
-                         int *low_delay,
-                         int mb_width, int mb_height, int mb_stride, int quarter_sample);
 
 int ff_mpv_export_qp_table(MpegEncContext *s, AVFrame *f, Picture *p, int qp_type);
 
@@ -729,7 +733,8 @@
                    qpel_mc_func (*qpix_op)[16]);
 
 static inline void ff_update_block_index(MpegEncContext *s){
-    const int block_size= 8 >> s->avctx->lowres;
+    const int bytes_per_pixel = 1 + (s->avctx->bits_per_raw_sample > 8);
+    const int block_size= (8*bytes_per_pixel) >> s->avctx->lowres;
 
     s->block_index[0]+=2;
     s->block_index[1]+=2;
@@ -738,8 +743,8 @@
     s->block_index[4]++;
     s->block_index[5]++;
     s->dest[0]+= 2*block_size;
-    s->dest[1]+= block_size;
-    s->dest[2]+= block_size;
+    s->dest[1]+= (2 >> s->chroma_x_shift) * block_size;
+    s->dest[2]+= (2 >> s->chroma_x_shift) * block_size;
 }
 
 static inline int get_bits_diff(MpegEncContext *s){
@@ -751,4 +756,13 @@
     return bits - last;
 }
 
+static inline int mpeg_get_qscale(MpegEncContext *s)
+{
+    int qscale = get_bits(&s->gb, 5);
+    if (s->q_scale_type)
+        return ff_mpeg2_non_linear_qscale[qscale];
+    else
+        return qscale << 1;
+}
+
 #endif /* AVCODEC_MPEGVIDEO_H */

diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 5765ef3..9fdab31 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c

@@ -415,22 +415,9 @@
         s->intra_only = 0;
     }
 
-#if FF_API_MOTION_EST
-FF_DISABLE_DEPRECATION_WARNINGS
-    s->me_method = avctx->me_method;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
     /* Fixed QSCALE */
     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
 
-#if FF_API_MPV_OPT
-    FF_DISABLE_DEPRECATION_WARNINGS
-    if (avctx->border_masking != 0.0)
-        s->border_masking = avctx->border_masking;
-    FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
     s->adaptive_quant = (s->avctx->lumi_masking ||
                          s->avctx->dark_masking ||
                          s->avctx->temporal_cplx_masking ||
@@ -760,15 +747,6 @@
         return AVERROR(EINVAL);
     }
 
-#if FF_API_QUANT_BIAS
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
-        s->intra_quant_bias = avctx->intra_quant_bias;
-    if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
-        s->inter_quant_bias = avctx->inter_quant_bias;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 
     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
@@ -1043,79 +1021,9 @@
                           31, 0);
     }
 
-#if FF_API_RC_STRATEGY
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (!s->rc_strategy)
-        s->rc_strategy = s->avctx->rc_strategy;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
     if (ff_rate_control_init(s) < 0)
         return -1;
 
-#if FF_API_RC_STRATEGY
-    av_assert0(MPV_RC_STRATEGY_XVID == FF_RC_STRATEGY_XVID);
-#endif
-
-    if ((s->avctx->flags & AV_CODEC_FLAG_PASS2) && s->rc_strategy == MPV_RC_STRATEGY_XVID) {
-#if CONFIG_LIBXVID
-        ret = ff_xvid_rate_control_init(s);
-#else
-        ret = AVERROR(ENOSYS);
-        av_log(s->avctx, AV_LOG_ERROR,
-               "Xvid ratecontrol requires libavcodec compiled with Xvid support.\n");
-#endif
-        if (ret < 0)
-            return ret;
-    }
-
-#if FF_API_ERROR_RATE
-    FF_DISABLE_DEPRECATION_WARNINGS
-    if (avctx->error_rate)
-        s->error_rate = avctx->error_rate;
-    FF_ENABLE_DEPRECATION_WARNINGS;
-#endif
-
-#if FF_API_NORMALIZE_AQP
-    FF_DISABLE_DEPRECATION_WARNINGS
-    if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
-        s->mpv_flags |= FF_MPV_FLAG_NAQ;
-    FF_ENABLE_DEPRECATION_WARNINGS;
-#endif
-
-#if FF_API_MV0
-    FF_DISABLE_DEPRECATION_WARNINGS
-    if (avctx->flags & CODEC_FLAG_MV0)
-        s->mpv_flags |= FF_MPV_FLAG_MV0;
-    FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
-#if FF_API_MPV_OPT
-    FF_DISABLE_DEPRECATION_WARNINGS
-    if (avctx->rc_qsquish != 0.0)
-        s->rc_qsquish = avctx->rc_qsquish;
-    if (avctx->rc_qmod_amp != 0.0)
-        s->rc_qmod_amp = avctx->rc_qmod_amp;
-    if (avctx->rc_qmod_freq)
-        s->rc_qmod_freq = avctx->rc_qmod_freq;
-    if (avctx->rc_buffer_aggressivity != 1.0)
-        s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
-    if (avctx->rc_initial_cplx != 0.0)
-        s->rc_initial_cplx = avctx->rc_initial_cplx;
-    if (avctx->lmin)
-        s->lmin = avctx->lmin;
-    if (avctx->lmax)
-        s->lmax = avctx->lmax;
-
-    if (avctx->rc_eq) {
-        av_freep(&s->rc_eq);
-        s->rc_eq = av_strdup(avctx->rc_eq);
-        if (!s->rc_eq)
-            return AVERROR(ENOMEM);
-    }
-    FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
 #if FF_API_PRIVATE_OPT
     FF_DISABLE_DEPRECATION_WARNINGS
     if (avctx->brd_scale)
@@ -1162,10 +1070,6 @@
     int i;
 
     ff_rate_control_uninit(s);
-#if CONFIG_LIBXVID
-    if ((avctx->flags & AV_CODEC_FLAG_PASS2) && s->rc_strategy == MPV_RC_STRATEGY_XVID)
-        ff_xvid_rate_control_uninit(s);
-#endif
 
     ff_mpv_common_end(s);
     if (CONFIG_MJPEG_ENCODER &&
@@ -2793,7 +2697,7 @@
 }
 
 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
-    uint32_t *sq = ff_square_tab + 256;
+    const uint32_t *sq = ff_square_tab + 256;
     int acc=0;
     int x,y;
 
@@ -3700,13 +3604,7 @@
         s->current_picture.f->quality = s->next_lambda;
         if(!dry_run) s->next_lambda= 0;
     } else if (!s->fixed_qscale) {
-        int quality;
-#if CONFIG_LIBXVID
-        if ((s->avctx->flags & AV_CODEC_FLAG_PASS2) && s->rc_strategy == MPV_RC_STRATEGY_XVID)
-            quality = ff_xvid_rate_estimate_qscale(s, dry_run);
-        else
-#endif
-        quality = ff_rate_estimate_qscale(s, dry_run);
+        int quality = ff_rate_estimate_qscale(s, dry_run);
         s->current_picture_ptr->f->quality =
         s->current_picture.f->quality = quality;
         if (s->current_picture.f->quality < 0)

diff --git a/libavcodec/mpegvideo_motion.c b/libavcodec/mpegvideo_motion.c
index c913504..5624c10 100644
--- a/libavcodec/mpegvideo_motion.c
+++ b/libavcodec/mpegvideo_motion.c

@@ -239,20 +239,22 @@
                           int motion_y,
                           int h,
                           int is_mpeg12,
+                          int is_16x8,
                           int mb_y)
 {
     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
     int dxy, uvdxy, mx, my, src_x, src_y,
-        uvsrc_x, uvsrc_y, v_edge_pos;
+        uvsrc_x, uvsrc_y, v_edge_pos, block_y_half;
     ptrdiff_t uvlinesize, linesize;
 
     v_edge_pos = s->v_edge_pos >> field_based;
     linesize   = s->current_picture.f->linesize[0] << field_based;
     uvlinesize = s->current_picture.f->linesize[1] << field_based;
+    block_y_half = (field_based | is_16x8);
 
     dxy   = ((motion_y & 1) << 1) | (motion_x & 1);
     src_x = s->mb_x * 16 + (motion_x >> 1);
-    src_y = (mb_y << (4 - field_based)) + (motion_y >> 1);
+    src_y = (mb_y << (4 - block_y_half)) + (motion_y >> 1);
 
     if (!is_mpeg12 && s->out_format == FMT_H263) {
         if ((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based) {
@@ -260,7 +262,7 @@
             my      = motion_y >> 1;
             uvdxy   = ((my & 1) << 1) | (mx & 1);
             uvsrc_x = s->mb_x * 8 + (mx >> 1);
-            uvsrc_y = (mb_y << (3 - field_based)) + (my >> 1);
+            uvsrc_y = (mb_y << (3 - block_y_half)) + (my >> 1);
         } else {
             uvdxy   = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
             uvsrc_x = src_x >> 1;
@@ -279,7 +281,7 @@
             my      = motion_y / 2;
             uvdxy   = ((my & 1) << 1) | (mx & 1);
             uvsrc_x = s->mb_x * 8 + (mx >> 1);
-            uvsrc_y = (mb_y << (3 - field_based)) + (my >> 1);
+            uvsrc_y = (mb_y << (3 - block_y_half)) + (my >> 1);
         } else {
             if (s->chroma_x_shift) {
                 // Chroma422
@@ -370,18 +372,18 @@
                         uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                         int field_select, uint8_t **ref_picture,
                         op_pixels_func (*pix_op)[4],
-                        int motion_x, int motion_y, int h, int mb_y)
+                        int motion_x, int motion_y, int h, int is_16x8, int mb_y)
 {
 #if !CONFIG_SMALL
     if (s->out_format == FMT_MPEG1)
         mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 0, 0,
                              field_select, ref_picture, pix_op,
-                             motion_x, motion_y, h, 1, mb_y);
+                             motion_x, motion_y, h, 1, is_16x8, mb_y);
     else
 #endif
         mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 0, 0,
                              field_select, ref_picture, pix_op,
-                             motion_x, motion_y, h, 0, mb_y);
+                             motion_x, motion_y, h, 0, is_16x8, mb_y);
 }
 
 static void mpeg_motion_field(MpegEncContext *s, uint8_t *dest_y,
@@ -395,12 +397,12 @@
     if (s->out_format == FMT_MPEG1)
         mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 1,
                              bottom_field, field_select, ref_picture, pix_op,
-                             motion_x, motion_y, h, 1, mb_y);
+                             motion_x, motion_y, h, 1, 0, mb_y);
     else
 #endif
         mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 1,
                              bottom_field, field_select, ref_picture, pix_op,
-                             motion_x, motion_y, h, 0, mb_y);
+                             motion_x, motion_y, h, 0, 0, mb_y);
 }
 
 // FIXME: SIMDify, avg variant, 16x16 version
@@ -870,7 +872,7 @@
         } else {
             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
                         ref_picture, pix_op,
-                        s->mv[dir][0][0], s->mv[dir][0][1], 16, mb_y);
+                        s->mv[dir][0][0], s->mv[dir][0][1], 16, 0, mb_y);
         }
         break;
     case MV_TYPE_8X8:
@@ -907,7 +909,7 @@
             mpeg_motion(s, dest_y, dest_cb, dest_cr,
                         s->field_select[dir][0],
                         ref_picture, pix_op,
-                        s->mv[dir][0][0], s->mv[dir][0][1], 16, mb_y >> 1);
+                        s->mv[dir][0][0], s->mv[dir][0][1], 16, 0, mb_y >> 1);
         }
         break;
     case MV_TYPE_16X8:
@@ -924,8 +926,8 @@
             mpeg_motion(s, dest_y, dest_cb, dest_cr,
                         s->field_select[dir][i],
                         ref2picture, pix_op,
-                        s->mv[dir][i][0], s->mv[dir][i][1] + 16 * i,
-                        8, mb_y >> 1);
+                        s->mv[dir][i][0], s->mv[dir][i][1],
+                        8, 1, (mb_y & ~1) + i);
 
             dest_y  += 16 * s->linesize;
             dest_cb += (16 >> s->chroma_y_shift) * s->uvlinesize;
@@ -952,7 +954,7 @@
                             s->picture_structure != i + 1,
                             ref_picture, pix_op,
                             s->mv[dir][2 * i][0], s->mv[dir][2 * i][1],
-                            16, mb_y >> 1);
+                            16, 0, mb_y >> 1);
 
                 // after put we make avg of the same block
                 pix_op = s->hdsp.avg_pixels_tab;

diff --git a/libavcodec/mpegvideo_parser.c b/libavcodec/mpegvideo_parser.c
index de70cd5..7a3c7ab 100644
--- a/libavcodec/mpegvideo_parser.c
+++ b/libavcodec/mpegvideo_parser.c

@@ -61,7 +61,7 @@
             if (bytes_left >= 2) {
                 s->pict_type = (buf[1] >> 3) & 7;
                 if (bytes_left >= 4)
-                vbv_delay = ((buf[1] & 0x07) << 13) | (buf[2] << 5) | (buf[3]  >> 3);
+                    vbv_delay = ((buf[1] & 0x07) << 13) | (buf[2] << 5) | (buf[3] >> 3);
             }
             break;
         case SEQ_START_CODE:
@@ -131,7 +131,7 @@
                             }
                         }
 
-                        if (!pc->progressive_sequence) {
+                        if (!pc->progressive_sequence && !progressive_frame) {
                             if (top_field_first)
                                 s->field_order = AV_FIELD_TT;
                             else

diff --git a/libavcodec/mpegvideo_xvmc.c b/libavcodec/mpegvideo_xvmc.c
index 519a448..f065837 100644
--- a/libavcodec/mpegvideo_xvmc.c
+++ b/libavcodec/mpegvideo_xvmc.c

@@ -348,7 +348,7 @@
 }
 
 #if CONFIG_MPEG1_XVMC_HWACCEL
-AVHWAccel ff_mpeg1_xvmc_hwaccel = {
+const AVHWAccel ff_mpeg1_xvmc_hwaccel = {
     .name           = "mpeg1_xvmc",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG1VIDEO,
@@ -362,7 +362,7 @@
 #endif
 
 #if CONFIG_MPEG2_XVMC_HWACCEL
-AVHWAccel ff_mpeg2_xvmc_hwaccel = {
+const AVHWAccel ff_mpeg2_xvmc_hwaccel = {
     .name           = "mpeg2_xvmc",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG2VIDEO,

diff --git a/libavcodec/mpegvideoencdsp.c b/libavcodec/mpegvideoencdsp.c
index 0af3d8b..a34ab35 100644
--- a/libavcodec/mpegvideoencdsp.c
+++ b/libavcodec/mpegvideoencdsp.c

@@ -81,7 +81,7 @@
 static int pix_norm1_c(uint8_t *pix, int line_size)
 {
     int s = 0, i, j;
-    uint32_t *sq = ff_square_tab + 256;
+    const uint32_t *sq = ff_square_tab + 256;
 
     for (i = 0; i < 16; i++) {
         for (j = 0; j < 16; j += 8) {

diff --git a/libavcodec/mscc.c b/libavcodec/mscc.c
index 6e4dbb0..86e4e88 100644
--- a/libavcodec/mscc.c
+++ b/libavcodec/mscc.c

@@ -37,10 +37,15 @@
     unsigned int      uncomp_size;
     uint8_t          *uncomp_buf;
     z_stream          zstream;
+
+    uint32_t          pal[256];
 } MSCCContext;
 
-static int rle_uncompress(AVCodecContext *avctx, GetByteContext *gb, PutByteContext *pb, int bpp)
+static int rle_uncompress(AVCodecContext *avctx, GetByteContext *gb, PutByteContext *pb)
 {
+    MSCCContext *s = avctx->priv_data;
+    unsigned x = 0, y = 0;
+
     while (bytestream2_get_bytes_left(gb) > 0) {
         uint32_t fill;
         int j;
@@ -78,19 +83,22 @@
                     break;
                 }
             }
+            x += run;
         } else {
             unsigned copy = bytestream2_get_byte(gb);
 
-            if (copy == 1) {
+            if (copy == 0) {
+                x = 0;
+                y++;
+                bytestream2_seek_p(pb, y * avctx->width * s->bpp, SEEK_SET);
+            } else if (copy == 1) {
                 return 0;
             } else if (copy == 2) {
-                unsigned x, y;
 
-                x = bytestream2_get_byte(gb);
-                y = bytestream2_get_byte(gb);
+                x += bytestream2_get_byte(gb);
+                y += bytestream2_get_byte(gb);
 
-                bytestream2_skip_p(pb, x * bpp);
-                bytestream2_skip_p(pb, y * bpp * avctx->width);
+                bytestream2_seek_p(pb, y * avctx->width * s->bpp + x * s->bpp, SEEK_SET);
             } else {
                 for (j = 0; j < copy; j++) {
                     switch (avctx->bits_per_coded_sample) {
@@ -108,6 +116,10 @@
                         break;
                     }
                 }
+
+                if (s->bpp == 1 && (copy & 1))
+                    bytestream2_skip(gb, 1);
+                x += copy;
             }
         }
     }
@@ -128,7 +140,8 @@
     int ret, j;
 
     if (avpkt->size < 3)
-        return AVERROR_INVALIDDATA;
+        return buf_size;
+
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
 
@@ -138,6 +151,20 @@
         buf_size -= 2;
     }
 
+    if (avctx->pix_fmt == AV_PIX_FMT_PAL8) {
+        int size;
+        const uint8_t *pal = av_packet_get_side_data(avpkt, AV_PKT_DATA_PALETTE, &size);
+
+        if (pal && size == AVPALETTE_SIZE) {
+            frame->palette_has_changed = 1;
+            for (j = 0; j < 256; j++)
+                s->pal[j] = 0xFF000000 | AV_RL32(pal + j * 4);
+        } else if (pal) {
+            av_log(avctx, AV_LOG_ERROR, "Palette size %d is wrong\n", size);
+        }
+        memcpy(frame->data[1], s->pal, AVPALETTE_SIZE);
+    }
+
     ret = inflateReset(&s->zstream);
     if (ret != Z_OK) {
         av_log(avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", ret);
@@ -156,7 +183,7 @@
     bytestream2_init(&gb, s->decomp_buf, s->zstream.total_out);
     bytestream2_init_writer(&pb, s->uncomp_buf, s->uncomp_size);
 
-    ret = rle_uncompress(avctx, &gb, &pb, s->bpp);
+    ret = rle_uncompress(avctx, &gb, &pb);
     if (ret)
         return ret;
 
@@ -176,10 +203,10 @@
 static av_cold int decode_init(AVCodecContext *avctx)
 {
     MSCCContext *s = avctx->priv_data;
-    int zret;
+    int stride, zret;
 
     switch (avctx->bits_per_coded_sample) {
-    case  8: avctx->pix_fmt = AV_PIX_FMT_GRAY8;  break;
+    case  8: avctx->pix_fmt = AV_PIX_FMT_PAL8;   break;
     case 16: avctx->pix_fmt = AV_PIX_FMT_RGB555; break;
     case 24: avctx->pix_fmt = AV_PIX_FMT_BGR24;  break;
     case 32: avctx->pix_fmt = AV_PIX_FMT_BGRA;   break;
@@ -189,13 +216,13 @@
     }
 
     s->bpp = avctx->bits_per_coded_sample >> 3;
-    memset(&s->zstream, 0, sizeof(z_stream));
+    stride = 4 * ((avctx->width * avctx->bits_per_coded_sample + 31) / 32);
 
-    s->decomp_size = 4 * avctx->height * ((avctx->width * avctx->bits_per_coded_sample + 31) / 32);
+    s->decomp_size = 2 * avctx->height * stride;
     if (!(s->decomp_buf = av_malloc(s->decomp_size)))
         return AVERROR(ENOMEM);
 
-    s->uncomp_size = 4 * avctx->height * ((avctx->width * avctx->bits_per_coded_sample + 31) / 32);
+    s->uncomp_size = avctx->height * stride;
     if (!(s->uncomp_buf = av_malloc(s->uncomp_size)))
         return AVERROR(ENOMEM);
 
@@ -234,6 +261,7 @@
     .close            = decode_close,
     .decode           = decode_frame,
     .capabilities     = AV_CODEC_CAP_DR1,
+    .caps_internal    = FF_CODEC_CAP_INIT_CLEANUP,
 };
 
 AVCodec ff_srgc_decoder = {
@@ -246,4 +274,5 @@
     .close            = decode_close,
     .decode           = decode_frame,
     .capabilities     = AV_CODEC_CAP_DR1,
+    .caps_internal    = FF_CODEC_CAP_INIT_CLEANUP,
 };

diff --git a/libavcodec/msmpeg4dec.c b/libavcodec/msmpeg4dec.c
index 4105d4b..457a37e 100644
--- a/libavcodec/msmpeg4dec.c
+++ b/libavcodec/msmpeg4dec.c

@@ -208,6 +208,9 @@
     uint8_t *coded_val;
     uint32_t * const mb_type_ptr = &s->current_picture.mb_type[s->mb_x + s->mb_y*s->mb_stride];
 
+    if (get_bits_left(&s->gb) <= 0)
+        return AVERROR_INVALIDDATA;
+
     if (s->pict_type == AV_PICTURE_TYPE_P) {
         if (s->use_skip_mb_code) {
             if (get_bits1(&s->gb)) {

diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c
index 9e7cc46..3180af1 100644
--- a/libavcodec/mss2.c
+++ b/libavcodec/mss2.c

@@ -464,9 +464,9 @@
     return 0;
 }
 
-typedef struct Rectangle {
+struct Rectangle {
     int coded, x, y, w, h;
-} Rectangle;
+};
 
 #define MAX_WMV9_RECTANGLES 20
 #define ARITH2_PADDING 2
@@ -485,7 +485,7 @@
 
     int keyframe, has_wmv9, has_mv, is_rle, is_555, ret;
 
-    Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
+    struct Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
     int used_rects = 0, i, implicit_rect = 0, av_uninit(wmv9_mask);
 
     if ((ret = init_get_bits8(&gb, buf, buf_size)) < 0)

diff --git a/libavcodec/mwsc.c b/libavcodec/mwsc.c
new file mode 100644
index 0000000..4db7642
--- /dev/null
+++ b/libavcodec/mwsc.c

@@ -0,0 +1,192 @@
+/*
+ * MatchWare Screen Capture Codec decoder
+ *
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+
+#include <zlib.h>
+
+typedef struct MWSCContext {
+    unsigned int      decomp_size;
+    uint8_t          *decomp_buf;
+    z_stream          zstream;
+    AVFrame          *prev_frame;
+} MWSCContext;
+
+static int rle_uncompress(GetByteContext *gb, PutByteContext *pb, GetByteContext *gbp,
+                          int width, int height, int stride, int pb_linesize, int gbp_linesize)
+{
+    int intra = 1, w = 0;
+
+    bytestream2_seek_p(pb, (height - 1) * pb_linesize, SEEK_SET);
+
+    while (bytestream2_get_bytes_left(gb) > 0) {
+        uint32_t fill = bytestream2_get_le24(gb);
+        unsigned run = bytestream2_get_byte(gb);
+
+        if (run == 0) {
+            run = bytestream2_get_le32(gb);
+            for (int j = 0; j < run; j++, w++) {
+                if (w == width) {
+                    w = 0;
+                    bytestream2_seek_p(pb, -(pb_linesize + stride), SEEK_CUR);
+                }
+                bytestream2_put_le24(pb, fill);
+            }
+        } else if (run == 255) {
+            int pos = bytestream2_tell_p(pb);
+
+            bytestream2_seek(gbp, pos, SEEK_SET);
+            for (int j = 0; j < fill; j++, w++) {
+                if (w == width) {
+                    w = 0;
+                    bytestream2_seek_p(pb, -(pb_linesize + stride), SEEK_CUR);
+                    bytestream2_seek(gbp, -(gbp_linesize + stride), SEEK_CUR);
+                }
+                bytestream2_put_le24(pb, bytestream2_get_le24(gbp));
+            }
+
+            intra = 0;
+        } else {
+            for (int j = 0; j < run; j++, w++) {
+                if (w == width) {
+                    w = 0;
+                    bytestream2_seek_p(pb, -(pb_linesize + stride), SEEK_CUR);
+                }
+                bytestream2_put_le24(pb, fill);
+            }
+        }
+    }
+
+    return intra;
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *got_frame,
+                        AVPacket *avpkt)
+{
+    MWSCContext *s = avctx->priv_data;
+    AVFrame *frame = data;
+    uint8_t *buf = avpkt->data;
+    int buf_size = avpkt->size;
+    GetByteContext gb;
+    GetByteContext gbp;
+    PutByteContext pb;
+    int ret;
+
+    ret = inflateReset(&s->zstream);
+    if (ret != Z_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", ret);
+        return AVERROR_EXTERNAL;
+    }
+    s->zstream.next_in   = buf;
+    s->zstream.avail_in  = buf_size;
+    s->zstream.next_out  = s->decomp_buf;
+    s->zstream.avail_out = s->decomp_size;
+    ret = inflate(&s->zstream, Z_FINISH);
+    if (ret != Z_STREAM_END) {
+        av_log(avctx, AV_LOG_ERROR, "Inflate error: %d\n", ret);
+        return AVERROR_EXTERNAL;
+    }
+
+    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
+        return ret;
+
+    bytestream2_init(&gb, s->decomp_buf, s->zstream.total_out);
+    bytestream2_init(&gbp, s->prev_frame->data[0], avctx->height * s->prev_frame->linesize[0]);
+    bytestream2_init_writer(&pb, frame->data[0], avctx->height * frame->linesize[0]);
+
+    frame->key_frame = rle_uncompress(&gb, &pb, &gbp, avctx->width, avctx->height, avctx->width * 3,
+                                      frame->linesize[0], s->prev_frame->linesize[0]);
+
+    frame->pict_type = frame->key_frame ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+
+    av_frame_unref(s->prev_frame);
+    if ((ret = av_frame_ref(s->prev_frame, frame)) < 0)
+        return ret;
+
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    MWSCContext *s = avctx->priv_data;
+    int64_t size;
+    int zret;
+
+    avctx->pix_fmt = AV_PIX_FMT_BGR24;
+
+    size = 32LL * avctx->height * avctx->width;
+    if (size >= INT32_MAX)
+        return AVERROR_INVALIDDATA;
+    s->decomp_size = size;
+    if (!(s->decomp_buf = av_malloc(s->decomp_size)))
+        return AVERROR(ENOMEM);
+
+    s->zstream.zalloc = Z_NULL;
+    s->zstream.zfree = Z_NULL;
+    s->zstream.opaque = Z_NULL;
+    zret = inflateInit(&s->zstream);
+    if (zret != Z_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Inflate init error: %d\n", zret);
+        return AVERROR_EXTERNAL;
+    }
+
+    s->prev_frame = av_frame_alloc();
+    if (!s->prev_frame)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static av_cold int decode_close(AVCodecContext *avctx)
+{
+    MWSCContext *s = avctx->priv_data;
+
+    av_frame_free(&s->prev_frame);
+    av_freep(&s->decomp_buf);
+    s->decomp_size = 0;
+    inflateEnd(&s->zstream);
+
+    return 0;
+}
+
+AVCodec ff_mwsc_decoder = {
+    .name             = "mwsc",
+    .long_name        = NULL_IF_CONFIG_SMALL("MatchWare Screen Capture Codec"),
+    .type             = AVMEDIA_TYPE_VIDEO,
+    .id               = AV_CODEC_ID_MWSC,
+    .priv_data_size   = sizeof(MWSCContext),
+    .init             = decode_init,
+    .close            = decode_close,
+    .decode           = decode_frame,
+    .capabilities     = AV_CODEC_CAP_DR1,
+    .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
+                        FF_CODEC_CAP_INIT_CLEANUP,
+};

diff --git a/libavcodec/noise_bsf.c b/libavcodec/noise_bsf.c
index 84b9403..d79f63b 100644
--- a/libavcodec/noise_bsf.c
+++ b/libavcodec/noise_bsf.c

@@ -35,52 +35,46 @@
     unsigned int state;
 } NoiseContext;
 
-static int noise(AVBSFContext *ctx, AVPacket *out)
+static int noise(AVBSFContext *ctx, AVPacket *pkt)
 {
     NoiseContext *s = ctx->priv_data;
-    AVPacket *in;
     int amount = s->amount > 0 ? s->amount : (s->state % 10001 + 1);
     int i, ret = 0;
 
     if (amount <= 0)
         return AVERROR(EINVAL);
 
-    ret = ff_bsf_get_packet(ctx, &in);
+    ret = ff_bsf_get_packet_ref(ctx, pkt);
     if (ret < 0)
         return ret;
 
     if (s->dropamount > 0 && s->state % s->dropamount == 0) {
         s->state++;
-        av_packet_free(&in);
+        av_packet_unref(pkt);
         return AVERROR(EAGAIN);
     }
 
-    ret = av_new_packet(out, in->size);
+    ret = av_packet_make_writable(pkt);
     if (ret < 0)
         goto fail;
 
-    ret = av_packet_copy_props(out, in);
-    if (ret < 0)
-        goto fail;
-
-    memcpy(out->data, in->data, in->size);
-
-    for (i = 0; i < out->size; i++) {
-        s->state += out->data[i] + 1;
+    for (i = 0; i < pkt->size; i++) {
+        s->state += pkt->data[i] + 1;
         if (s->state % amount == 0)
-            out->data[i] = s->state;
+            pkt->data[i] = s->state;
     }
 fail:
     if (ret < 0)
-        av_packet_unref(out);
-    av_packet_free(&in);
+        av_packet_unref(pkt);
+
     return ret;
 }
 
 #define OFFSET(x) offsetof(NoiseContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_BSF_PARAM)
 static const AVOption options[] = {
-    { "amount", NULL, OFFSET(amount), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX },
-    { "dropamount", NULL, OFFSET(dropamount), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX },
+    { "amount", NULL, OFFSET(amount), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },
+    { "dropamount", NULL, OFFSET(dropamount), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },
     { NULL },
 };
 

diff --git a/libavcodec/null_bsf.c b/libavcodec/null_bsf.c
index feb7124..24d26df 100644
--- a/libavcodec/null_bsf.c
+++ b/libavcodec/null_bsf.c

@@ -24,17 +24,9 @@
 #include "avcodec.h"
 #include "bsf.h"
 
-static int null_filter(AVBSFContext *ctx, AVPacket *out)
+static int null_filter(AVBSFContext *ctx, AVPacket *pkt)
 {
-    AVPacket *in;
-    int ret;
-
-    ret = ff_bsf_get_packet(ctx, &in);
-    if (ret < 0)
-        return ret;
-    av_packet_move_ref(out, in);
-    av_packet_free(&in);
-    return 0;
+    return ff_bsf_get_packet_ref(ctx, pkt);
 }
 
 const AVBitStreamFilter ff_null_bsf = {

diff --git a/libavcodec/nuv.c b/libavcodec/nuv.c
index ad6c029..32ed658 100644
--- a/libavcodec/nuv.c
+++ b/libavcodec/nuv.c

@@ -161,6 +161,7 @@
     int orig_size      = buf_size;
     int keyframe, ret;
     int size_change = 0;
+    int minsize = 0;
     int result, init_frame = !avctx->frame_number;
     enum {
         NUV_UNCOMPRESSED  = '0',
@@ -198,6 +199,9 @@
     case NUV_RTJPEG_IN_LZO:
     case NUV_RTJPEG:
         keyframe = !buf[2];
+        if (c->width < 16 || c->height < 16) {
+            return AVERROR_INVALIDDATA;
+        }
         break;
     case NUV_COPY_LAST:
         keyframe = 0;
@@ -206,6 +210,16 @@
         keyframe = 1;
         break;
     }
+    switch (comptype) {
+    case NUV_UNCOMPRESSED:
+        minsize = c->width * c->height * 3 / 2;
+        break;
+    case NUV_RTJPEG:
+        minsize = c->width/16 * (c->height/16) * 6;
+        break;
+    }
+    if (buf_size < minsize / 4)
+        return AVERROR_INVALIDDATA;
 retry:
     // Skip the rest of the frame header.
     buf       = &buf[12];

diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
new file mode 100644
index 0000000..e779be3
--- /dev/null
+++ b/libavcodec/nvdec.c

@@ -0,0 +1,639 @@
+/*
+ * HW decode acceleration through NVDEC
+ *
+ * Copyright (c) 2016 Anton Khirnov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/common.h"
+#include "libavutil/error.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/pixfmt.h"
+
+#include "avcodec.h"
+#include "decode.h"
+#include "nvdec.h"
+#include "internal.h"
+
+typedef struct NVDECDecoder {
+    CUvideodecoder decoder;
+
+    AVBufferRef *hw_device_ref;
+    CUcontext    cuda_ctx;
+    CUstream     stream;
+
+    CudaFunctions *cudl;
+    CuvidFunctions *cvdl;
+} NVDECDecoder;
+
+typedef struct NVDECFramePool {
+    unsigned int dpb_size;
+    unsigned int nb_allocated;
+} NVDECFramePool;
+
+static int map_avcodec_id(enum AVCodecID id)
+{
+    switch (id) {
+    case AV_CODEC_ID_H264:       return cudaVideoCodec_H264;
+    case AV_CODEC_ID_HEVC:       return cudaVideoCodec_HEVC;
+    case AV_CODEC_ID_MJPEG:      return cudaVideoCodec_JPEG;
+    case AV_CODEC_ID_MPEG1VIDEO: return cudaVideoCodec_MPEG1;
+    case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2;
+    case AV_CODEC_ID_MPEG4:      return cudaVideoCodec_MPEG4;
+    case AV_CODEC_ID_VC1:        return cudaVideoCodec_VC1;
+    case AV_CODEC_ID_VP8:        return cudaVideoCodec_VP8;
+    case AV_CODEC_ID_VP9:        return cudaVideoCodec_VP9;
+    case AV_CODEC_ID_WMV3:       return cudaVideoCodec_VC1;
+    }
+    return -1;
+}
+
+static int map_chroma_format(enum AVPixelFormat pix_fmt)
+{
+    int shift_h = 0, shift_v = 0;
+
+    av_pix_fmt_get_chroma_sub_sample(pix_fmt, &shift_h, &shift_v);
+
+    if (shift_h == 1 && shift_v == 1)
+        return cudaVideoChromaFormat_420;
+    else if (shift_h == 1 && shift_v == 0)
+        return cudaVideoChromaFormat_422;
+    else if (shift_h == 0 && shift_v == 0)
+        return cudaVideoChromaFormat_444;
+
+    return -1;
+}
+
+static int nvdec_test_capabilities(NVDECDecoder *decoder,
+                                   CUVIDDECODECREATEINFO *params, void *logctx)
+{
+    CUresult err;
+    CUVIDDECODECAPS caps = { 0 };
+
+    caps.eCodecType      = params->CodecType;
+    caps.eChromaFormat   = params->ChromaFormat;
+    caps.nBitDepthMinus8 = params->bitDepthMinus8;
+
+    if (!decoder->cvdl->cuvidGetDecoderCaps) {
+        av_log(logctx, AV_LOG_WARNING, "Used Nvidia driver is too old to perform a capability check.\n");
+        av_log(logctx, AV_LOG_WARNING, "The minimum required version is "
+#if defined(_WIN32) || defined(__CYGWIN__)
+            "378.66"
+#else
+            "378.13"
+#endif
+            ". Continuing blind.\n");
+        return 0;
+    }
+
+    err = decoder->cvdl->cuvidGetDecoderCaps(&caps);
+    if (err != CUDA_SUCCESS) {
+        av_log(logctx, AV_LOG_ERROR, "Failed querying decoder capabilities\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    av_log(logctx, AV_LOG_VERBOSE, "NVDEC capabilities:\n");
+    av_log(logctx, AV_LOG_VERBOSE, "format supported: %s, max_mb_count: %d\n",
+           caps.bIsSupported ? "yes" : "no", caps.nMaxMBCount);
+    av_log(logctx, AV_LOG_VERBOSE, "min_width: %d, max_width: %d\n",
+           caps.nMinWidth, caps.nMaxWidth);
+    av_log(logctx, AV_LOG_VERBOSE, "min_height: %d, max_height: %d\n",
+           caps.nMinHeight, caps.nMaxHeight);
+
+    if (!caps.bIsSupported) {
+        av_log(logctx, AV_LOG_ERROR, "Hardware is lacking required capabilities\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (params->ulWidth > caps.nMaxWidth || params->ulWidth < caps.nMinWidth) {
+        av_log(logctx, AV_LOG_ERROR, "Video width %d not within range from %d to %d\n",
+               (int)params->ulWidth, caps.nMinWidth, caps.nMaxWidth);
+        return AVERROR(EINVAL);
+    }
+
+    if (params->ulHeight > caps.nMaxHeight || params->ulHeight < caps.nMinHeight) {
+        av_log(logctx, AV_LOG_ERROR, "Video height %d not within range from %d to %d\n",
+               (int)params->ulHeight, caps.nMinHeight, caps.nMaxHeight);
+        return AVERROR(EINVAL);
+    }
+
+    if ((params->ulWidth * params->ulHeight) / 256 > caps.nMaxMBCount) {
+        av_log(logctx, AV_LOG_ERROR, "Video macroblock count %d exceeds maximum of %d\n",
+               (int)(params->ulWidth * params->ulHeight) / 256, caps.nMaxMBCount);
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+static void nvdec_decoder_free(void *opaque, uint8_t *data)
+{
+    NVDECDecoder *decoder = (NVDECDecoder*)data;
+
+    if (decoder->decoder)
+        decoder->cvdl->cuvidDestroyDecoder(decoder->decoder);
+
+    av_buffer_unref(&decoder->hw_device_ref);
+
+    cuvid_free_functions(&decoder->cvdl);
+
+    av_freep(&decoder);
+}
+
+static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref,
+                                CUVIDDECODECREATEINFO *params, void *logctx)
+{
+    AVHWDeviceContext  *hw_device_ctx = (AVHWDeviceContext*)hw_device_ref->data;
+    AVCUDADeviceContext *device_hwctx = hw_device_ctx->hwctx;
+
+    AVBufferRef *decoder_ref;
+    NVDECDecoder *decoder;
+
+    CUcontext dummy;
+    CUresult err;
+    int ret;
+
+    decoder = av_mallocz(sizeof(*decoder));
+    if (!decoder)
+        return AVERROR(ENOMEM);
+
+    decoder_ref = av_buffer_create((uint8_t*)decoder, sizeof(*decoder),
+                                   nvdec_decoder_free, NULL, AV_BUFFER_FLAG_READONLY);
+    if (!decoder_ref) {
+        av_freep(&decoder);
+        return AVERROR(ENOMEM);
+    }
+
+    decoder->hw_device_ref = av_buffer_ref(hw_device_ref);
+    if (!decoder->hw_device_ref) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    decoder->cuda_ctx = device_hwctx->cuda_ctx;
+    decoder->cudl = device_hwctx->internal->cuda_dl;
+    decoder->stream = device_hwctx->stream;
+
+    ret = cuvid_load_functions(&decoder->cvdl, logctx);
+    if (ret < 0) {
+        av_log(logctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
+        goto fail;
+    }
+
+    err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
+    if (err != CUDA_SUCCESS) {
+        ret = AVERROR_UNKNOWN;
+        goto fail;
+    }
+
+    ret = nvdec_test_capabilities(decoder, params, logctx);
+    if (ret < 0) {
+        decoder->cudl->cuCtxPopCurrent(&dummy);
+        goto fail;
+    }
+
+    err = decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params);
+
+    decoder->cudl->cuCtxPopCurrent(&dummy);
+
+    if (err != CUDA_SUCCESS) {
+        av_log(logctx, AV_LOG_ERROR, "Error creating a NVDEC decoder: %d\n", err);
+        ret = AVERROR_UNKNOWN;
+        goto fail;
+    }
+
+    *out = decoder_ref;
+
+    return 0;
+fail:
+    av_buffer_unref(&decoder_ref);
+    return ret;
+}
+
+static AVBufferRef *nvdec_decoder_frame_alloc(void *opaque, int size)
+{
+    NVDECFramePool *pool = opaque;
+    AVBufferRef *ret;
+
+    if (pool->nb_allocated >= pool->dpb_size)
+        return NULL;
+
+    ret = av_buffer_alloc(sizeof(unsigned int));
+    if (!ret)
+        return NULL;
+
+    *(unsigned int*)ret->data = pool->nb_allocated++;
+
+    return ret;
+}
+
+int ff_nvdec_decode_uninit(AVCodecContext *avctx)
+{
+    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
+
+    av_freep(&ctx->bitstream);
+    ctx->bitstream_len       = 0;
+    ctx->bitstream_allocated = 0;
+
+    av_freep(&ctx->slice_offsets);
+    ctx->nb_slices               = 0;
+    ctx->slice_offsets_allocated = 0;
+
+    av_buffer_unref(&ctx->decoder_ref);
+    av_buffer_pool_uninit(&ctx->decoder_pool);
+
+    return 0;
+}
+
+int ff_nvdec_decode_init(AVCodecContext *avctx)
+{
+    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
+
+    NVDECFramePool      *pool;
+    AVHWFramesContext   *frames_ctx;
+    const AVPixFmtDescriptor *sw_desc;
+
+    CUVIDDECODECREATEINFO params = { 0 };
+
+    int cuvid_codec_type, cuvid_chroma_format;
+    int ret = 0;
+
+    sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+    if (!sw_desc)
+        return AVERROR_BUG;
+
+    cuvid_codec_type = map_avcodec_id(avctx->codec_id);
+    if (cuvid_codec_type < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Unsupported codec ID\n");
+        return AVERROR_BUG;
+    }
+
+    cuvid_chroma_format = map_chroma_format(avctx->sw_pix_fmt);
+    if (cuvid_chroma_format < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
+        return AVERROR(ENOSYS);
+    }
+
+    if (!avctx->hw_frames_ctx) {
+        ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA);
+        if (ret < 0)
+            return ret;
+    }
+
+    frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+
+    params.ulWidth             = avctx->coded_width;
+    params.ulHeight            = avctx->coded_height;
+    params.ulTargetWidth       = avctx->coded_width;
+    params.ulTargetHeight      = avctx->coded_height;
+    params.bitDepthMinus8      = sw_desc->comp[0].depth - 8;
+    params.OutputFormat        = params.bitDepthMinus8 ?
+                                 cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
+    params.CodecType           = cuvid_codec_type;
+    params.ChromaFormat        = cuvid_chroma_format;
+    params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
+    params.ulNumOutputSurfaces = frames_ctx->initial_pool_size;
+
+    ret = nvdec_decoder_create(&ctx->decoder_ref, frames_ctx->device_ref, &params, avctx);
+    if (ret < 0) {
+        if (params.ulNumDecodeSurfaces > 32) {
+            av_log(avctx, AV_LOG_WARNING, "Using more than 32 (%d) decode surfaces might cause nvdec to fail.\n",
+                   (int)params.ulNumDecodeSurfaces);
+            av_log(avctx, AV_LOG_WARNING, "Try lowering the amount of threads. Using %d right now.\n",
+                   avctx->thread_count);
+        }
+        return ret;
+    }
+
+    pool = av_mallocz(sizeof(*pool));
+    if (!pool) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    pool->dpb_size = frames_ctx->initial_pool_size;
+
+    ctx->decoder_pool = av_buffer_pool_init2(sizeof(int), pool,
+                                             nvdec_decoder_frame_alloc, av_free);
+    if (!ctx->decoder_pool) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    return 0;
+fail:
+    ff_nvdec_decode_uninit(avctx);
+    return ret;
+}
+
+static void nvdec_fdd_priv_free(void *priv)
+{
+    NVDECFrame *cf = priv;
+
+    if (!cf)
+        return;
+
+    av_buffer_unref(&cf->idx_ref);
+    av_buffer_unref(&cf->decoder_ref);
+
+    av_freep(&priv);
+}
+
+static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data)
+{
+    NVDECFrame *unmap_data = (NVDECFrame*)data;
+    NVDECDecoder *decoder = (NVDECDecoder*)unmap_data->decoder_ref->data;
+    CUdeviceptr devptr = (CUdeviceptr)opaque;
+    CUresult err;
+    CUcontext dummy;
+
+    err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
+    if (err != CUDA_SUCCESS) {
+        av_log(NULL, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+        goto finish;
+    }
+
+    err = decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
+    if (err != CUDA_SUCCESS)
+        av_log(NULL, AV_LOG_ERROR, "cuvidUnmapVideoFrame failed\n");
+
+    decoder->cudl->cuCtxPopCurrent(&dummy);
+
+finish:
+    av_buffer_unref(&unmap_data->idx_ref);
+    av_buffer_unref(&unmap_data->decoder_ref);
+    av_free(unmap_data);
+}
+
+static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
+{
+    FrameDecodeData  *fdd = (FrameDecodeData*)frame->private_ref->data;
+    NVDECFrame        *cf = (NVDECFrame*)fdd->hwaccel_priv;
+    NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
+
+    CUVIDPROCPARAMS vpp = { 0 };
+    NVDECFrame *unmap_data = NULL;
+
+    CUresult err;
+    CUcontext dummy;
+    CUdeviceptr devptr;
+
+    unsigned int pitch, i;
+    unsigned int offset = 0;
+    int ret = 0;
+
+    vpp.progressive_frame = 1;
+    vpp.output_stream = decoder->stream;
+
+    err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
+    if (err != CUDA_SUCCESS)
+        return AVERROR_UNKNOWN;
+
+    err = decoder->cvdl->cuvidMapVideoFrame(decoder->decoder, cf->idx, &devptr,
+                                            &pitch, &vpp);
+    if (err != CUDA_SUCCESS) {
+        av_log(logctx, AV_LOG_ERROR, "Error mapping a picture with CUVID: %d\n",
+               err);
+        ret = AVERROR_UNKNOWN;
+        goto finish;
+    }
+
+    unmap_data = av_mallocz(sizeof(*unmap_data));
+    if (!unmap_data) {
+        ret = AVERROR(ENOMEM);
+        goto copy_fail;
+    }
+
+    frame->buf[1] = av_buffer_create((uint8_t *)unmap_data, sizeof(*unmap_data),
+                                     nvdec_unmap_mapped_frame, (void*)devptr,
+                                     AV_BUFFER_FLAG_READONLY);
+    if (!frame->buf[1]) {
+        ret = AVERROR(ENOMEM);
+        goto copy_fail;
+    }
+
+    unmap_data->idx = cf->idx;
+    unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
+    unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
+
+    for (i = 0; frame->linesize[i]; i++) {
+        frame->data[i] = (uint8_t*)(devptr + offset);
+        frame->linesize[i] = pitch;
+        offset += pitch * (frame->height >> (i ? 1 : 0));
+    }
+
+    goto finish;
+
+copy_fail:
+    if (!frame->buf[1]) {
+        decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
+        av_freep(&unmap_data);
+    } else {
+        av_buffer_unref(&frame->buf[1]);
+    }
+
+finish:
+    decoder->cudl->cuCtxPopCurrent(&dummy);
+    return ret;
+}
+
+int ff_nvdec_start_frame(AVCodecContext *avctx, AVFrame *frame)
+{
+    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
+    FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
+    NVDECFrame *cf = NULL;
+    int ret;
+
+    ctx->bitstream_len = 0;
+    ctx->nb_slices     = 0;
+
+    if (fdd->hwaccel_priv)
+        return 0;
+
+    cf = av_mallocz(sizeof(*cf));
+    if (!cf)
+        return AVERROR(ENOMEM);
+
+    cf->decoder_ref = av_buffer_ref(ctx->decoder_ref);
+    if (!cf->decoder_ref) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    cf->idx_ref = av_buffer_pool_get(ctx->decoder_pool);
+    if (!cf->idx_ref) {
+        av_log(avctx, AV_LOG_ERROR, "No decoder surfaces left\n");
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    cf->idx = *(unsigned int*)cf->idx_ref->data;
+
+    fdd->hwaccel_priv      = cf;
+    fdd->hwaccel_priv_free = nvdec_fdd_priv_free;
+    fdd->post_process      = nvdec_retrieve_data;
+
+    return 0;
+fail:
+    nvdec_fdd_priv_free(cf);
+    return ret;
+
+}
+
+int ff_nvdec_end_frame(AVCodecContext *avctx)
+{
+    NVDECContext     *ctx = avctx->internal->hwaccel_priv_data;
+    NVDECDecoder *decoder = (NVDECDecoder*)ctx->decoder_ref->data;
+    CUVIDPICPARAMS    *pp = &ctx->pic_params;
+
+    CUresult err;
+    CUcontext dummy;
+
+    int ret = 0;
+
+    pp->nBitstreamDataLen = ctx->bitstream_len;
+    pp->pBitstreamData    = ctx->bitstream;
+    pp->nNumSlices        = ctx->nb_slices;
+    pp->pSliceDataOffsets = ctx->slice_offsets;
+
+    err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
+    if (err != CUDA_SUCCESS)
+        return AVERROR_UNKNOWN;
+
+    err = decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params);
+    if (err != CUDA_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Error decoding a picture with NVDEC: %d\n",
+               err);
+        ret = AVERROR_UNKNOWN;
+        goto finish;
+    }
+
+finish:
+    decoder->cudl->cuCtxPopCurrent(&dummy);
+
+    return ret;
+}
+
+int ff_nvdec_simple_end_frame(AVCodecContext *avctx)
+{
+    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
+    int ret = ff_nvdec_end_frame(avctx);
+    ctx->bitstream = NULL;
+    return ret;
+}
+
+int ff_nvdec_simple_decode_slice(AVCodecContext *avctx, const uint8_t *buffer,
+                                 uint32_t size)
+{
+    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
+    void *tmp;
+
+    tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
+                          (ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
+    if (!tmp)
+        return AVERROR(ENOMEM);
+    ctx->slice_offsets = tmp;
+
+    if (!ctx->bitstream)
+        ctx->bitstream = (uint8_t*)buffer;
+
+    ctx->slice_offsets[ctx->nb_slices] = buffer - ctx->bitstream;
+    ctx->bitstream_len += size;
+    ctx->nb_slices++;
+
+    return 0;
+}
+
+static void nvdec_free_dummy(struct AVHWFramesContext *ctx)
+{
+    av_buffer_pool_uninit(&ctx->pool);
+}
+
+static AVBufferRef *nvdec_alloc_dummy(int size)
+{
+    return av_buffer_create(NULL, 0, NULL, NULL, 0);
+}
+
+int ff_nvdec_frame_params(AVCodecContext *avctx,
+                          AVBufferRef *hw_frames_ctx,
+                          int dpb_size)
+{
+    AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
+    const AVPixFmtDescriptor *sw_desc;
+    int cuvid_codec_type, cuvid_chroma_format;
+
+    sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+    if (!sw_desc)
+        return AVERROR_BUG;
+
+    cuvid_codec_type = map_avcodec_id(avctx->codec_id);
+    if (cuvid_codec_type < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Unsupported codec ID\n");
+        return AVERROR_BUG;
+    }
+
+    cuvid_chroma_format = map_chroma_format(avctx->sw_pix_fmt);
+    if (cuvid_chroma_format < 0) {
+        av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
+        return AVERROR(EINVAL);
+    }
+
+    frames_ctx->format            = AV_PIX_FMT_CUDA;
+    frames_ctx->width             = (avctx->coded_width + 1) & ~1;
+    frames_ctx->height            = (avctx->coded_height + 1) & ~1;
+    frames_ctx->initial_pool_size = dpb_size;
+
+    frames_ctx->free = nvdec_free_dummy;
+    frames_ctx->pool = av_buffer_pool_init(0, nvdec_alloc_dummy);
+
+    if (!frames_ctx->pool)
+        return AVERROR(ENOMEM);
+
+    switch (sw_desc->comp[0].depth) {
+    case 8:
+        frames_ctx->sw_format = AV_PIX_FMT_NV12;
+        break;
+    case 10:
+        frames_ctx->sw_format = AV_PIX_FMT_P010;
+        break;
+    case 12:
+        frames_ctx->sw_format = AV_PIX_FMT_P016;
+        break;
+    default:
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+int ff_nvdec_get_ref_idx(AVFrame *frame)
+{
+    FrameDecodeData *fdd;
+    NVDECFrame *cf;
+
+    if (!frame || !frame->private_ref)
+        return -1;
+
+    fdd = (FrameDecodeData*)frame->private_ref->data;
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
+    if (!cf)
+        return -1;
+
+    return cf->idx;
+}

diff --git a/libavcodec/nvdec.h b/libavcodec/nvdec.h
new file mode 100644
index 0000000..85a0fcf
--- /dev/null
+++ b/libavcodec/nvdec.h

@@ -0,0 +1,78 @@
+/*
+ * HW decode acceleration through NVDEC
+ *
+ * Copyright (c) 2016 Anton Khirnov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_NVDEC_H
+#define AVCODEC_NVDEC_H
+
+#include "compat/cuda/dynlink_loader.h"
+
+#include <stdint.h>
+
+#include "libavutil/buffer.h"
+#include "libavutil/frame.h"
+
+#include "avcodec.h"
+
+#if defined(NVDECAPI_MAJOR_VERSION) && defined(NVDECAPI_MINOR_VERSION)
+# define NVDECAPI_CHECK_VERSION(major, minor) \
+    ((major) < NVDECAPI_MAJOR_VERSION || ((major) == NVDECAPI_MAJOR_VERSION && (minor) <= NVDECAPI_MINOR_VERSION))
+#else
+/* version macros were added in SDK 8.1 ffnvcodec */
+# define NVDECAPI_CHECK_VERSION(major, minor) \
+    ((major) < 8 || ((major) == 8 && (minor) <= 0))
+#endif
+
+typedef struct NVDECFrame {
+    unsigned int idx;
+    AVBufferRef *idx_ref;
+    AVBufferRef *decoder_ref;
+} NVDECFrame;
+
+typedef struct NVDECContext {
+    CUVIDPICPARAMS pic_params;
+
+    AVBufferPool *decoder_pool;
+
+    AVBufferRef  *decoder_ref;
+
+    uint8_t      *bitstream;
+    int           bitstream_len;
+    unsigned int  bitstream_allocated;
+
+    unsigned     *slice_offsets;
+    int           nb_slices;
+    unsigned int  slice_offsets_allocated;
+} NVDECContext;
+
+int ff_nvdec_decode_init(AVCodecContext *avctx);
+int ff_nvdec_decode_uninit(AVCodecContext *avctx);
+int ff_nvdec_start_frame(AVCodecContext *avctx, AVFrame *frame);
+int ff_nvdec_end_frame(AVCodecContext *avctx);
+int ff_nvdec_simple_end_frame(AVCodecContext *avctx);
+int ff_nvdec_simple_decode_slice(AVCodecContext *avctx, const uint8_t *buffer,
+                                 uint32_t size);
+int ff_nvdec_frame_params(AVCodecContext *avctx,
+                          AVBufferRef *hw_frames_ctx,
+                          int dpb_size);
+int ff_nvdec_get_ref_idx(AVFrame *frame);
+
+#endif /* AVCODEC_NVDEC_H */

diff --git a/libavcodec/nvdec_h264.c b/libavcodec/nvdec_h264.c
new file mode 100644
index 0000000..25b3032
--- /dev/null
+++ b/libavcodec/nvdec_h264.c

@@ -0,0 +1,184 @@
+/*
+ * MPEG-4 Part 10 / AVC / H.264 HW decode acceleration through NVDEC
+ *
+ * Copyright (c) 2016 Anton Khirnov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "avcodec.h"
+#include "nvdec.h"
+#include "decode.h"
+#include "internal.h"
+#include "h264dec.h"
+
+static void dpb_add(const H264Context *h, CUVIDH264DPBENTRY *dst, const H264Picture *src,
+                    int frame_idx)
+{
+    FrameDecodeData *fdd = (FrameDecodeData*)src->f->private_ref->data;
+    const NVDECFrame *cf = fdd->hwaccel_priv;
+
+    dst->PicIdx             = cf ? cf->idx : -1;
+    dst->FrameIdx           = frame_idx;
+    dst->is_long_term       = src->long_ref;
+    dst->not_existing       = 0;
+    dst->used_for_reference = src->reference & 3;
+    dst->FieldOrderCnt[0]   = src->field_poc[0];
+    dst->FieldOrderCnt[1]   = src->field_poc[1];
+}
+
+static int nvdec_h264_start_frame(AVCodecContext *avctx,
+                                  const uint8_t *buffer, uint32_t size)
+{
+    const H264Context *h = avctx->priv_data;
+    const PPS *pps = h->ps.pps;
+    const SPS *sps = h->ps.sps;
+
+    NVDECContext       *ctx = avctx->internal->hwaccel_priv_data;
+    CUVIDPICPARAMS      *pp = &ctx->pic_params;
+    CUVIDH264PICPARAMS *ppc = &pp->CodecSpecific.h264;
+    FrameDecodeData *fdd;
+    NVDECFrame *cf;
+
+    int i, dpb_size, ret;
+
+    ret = ff_nvdec_start_frame(avctx, h->cur_pic_ptr->f);
+    if (ret < 0)
+        return ret;
+
+    fdd = (FrameDecodeData*)h->cur_pic_ptr->f->private_ref->data;
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
+
+    *pp = (CUVIDPICPARAMS) {
+        .PicWidthInMbs     = h->mb_width,
+        .FrameHeightInMbs  = h->mb_height,
+        .CurrPicIdx        = cf->idx,
+        .field_pic_flag    = FIELD_PICTURE(h),
+        .bottom_field_flag = h->picture_structure == PICT_BOTTOM_FIELD,
+        .second_field      = FIELD_PICTURE(h) && !h->first_field,
+        .ref_pic_flag      = h->nal_ref_idc != 0,
+        .intra_pic_flag    = 1,
+
+        .CodecSpecific.h264 = {
+            .log2_max_frame_num_minus4            = sps->log2_max_frame_num - 4,
+            .pic_order_cnt_type                   = sps->poc_type,
+            .log2_max_pic_order_cnt_lsb_minus4    = FFMAX(sps->log2_max_poc_lsb - 4, 0),
+            .delta_pic_order_always_zero_flag     = sps->delta_pic_order_always_zero_flag,
+            .frame_mbs_only_flag                  = sps->frame_mbs_only_flag,
+            .direct_8x8_inference_flag            = sps->direct_8x8_inference_flag,
+            .num_ref_frames                       = sps->ref_frame_count,
+            .residual_colour_transform_flag       = sps->residual_color_transform_flag,
+            .bit_depth_luma_minus8                = sps->bit_depth_luma - 8,
+            .bit_depth_chroma_minus8              = sps->bit_depth_chroma - 8,
+            .qpprime_y_zero_transform_bypass_flag = sps->transform_bypass,
+
+            .entropy_coding_mode_flag               = pps->cabac,
+            .pic_order_present_flag                 = pps->pic_order_present,
+            .num_ref_idx_l0_active_minus1           = pps->ref_count[0] - 1,
+            .num_ref_idx_l1_active_minus1           = pps->ref_count[1] - 1,
+            .weighted_pred_flag                     = pps->weighted_pred,
+            .weighted_bipred_idc                    = pps->weighted_bipred_idc,
+            .pic_init_qp_minus26                    = pps->init_qp - 26,
+            .deblocking_filter_control_present_flag = pps->deblocking_filter_parameters_present,
+            .redundant_pic_cnt_present_flag         = pps->redundant_pic_cnt_present,
+            .transform_8x8_mode_flag                = pps->transform_8x8_mode,
+            .MbaffFrameFlag                         = sps->mb_aff && !FIELD_PICTURE(h),
+            .constrained_intra_pred_flag            = pps->constrained_intra_pred,
+            .chroma_qp_index_offset                 = pps->chroma_qp_index_offset[0],
+            .second_chroma_qp_index_offset          = pps->chroma_qp_index_offset[1],
+            .ref_pic_flag                           = h->nal_ref_idc != 0,
+            .frame_num                              = h->poc.frame_num,
+            .CurrFieldOrderCnt[0]                   = h->cur_pic_ptr->field_poc[0],
+            .CurrFieldOrderCnt[1]                   = h->cur_pic_ptr->field_poc[1],
+        },
+    };
+
+    memcpy(ppc->WeightScale4x4,    pps->scaling_matrix4,    sizeof(ppc->WeightScale4x4));
+    memcpy(ppc->WeightScale8x8[0], pps->scaling_matrix8[0], sizeof(ppc->WeightScale8x8[0]));
+    memcpy(ppc->WeightScale8x8[1], pps->scaling_matrix8[3], sizeof(ppc->WeightScale8x8[0]));
+
+    dpb_size = 0;
+    for (i = 0; i < h->short_ref_count; i++)
+        dpb_add(h, &ppc->dpb[dpb_size++], h->short_ref[i], h->short_ref[i]->frame_num);
+    for (i = 0; i < 16; i++) {
+        if (h->long_ref[i])
+            dpb_add(h, &ppc->dpb[dpb_size++], h->long_ref[i], i);
+    }
+
+    for (i = dpb_size; i < FF_ARRAY_ELEMS(ppc->dpb); i++)
+        ppc->dpb[i].PicIdx = -1;
+
+    return 0;
+}
+
+static int nvdec_h264_decode_slice(AVCodecContext *avctx, const uint8_t *buffer,
+                                   uint32_t size)
+{
+    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
+    CUVIDPICPARAMS *pp = &ctx->pic_params;
+    const H264Context *h = avctx->priv_data;
+    const H264SliceContext *sl = &h->slice_ctx[0];
+    void *tmp;
+
+    tmp = av_fast_realloc(ctx->bitstream, &ctx->bitstream_allocated,
+                          ctx->bitstream_len + size + 3);
+    if (!tmp)
+        return AVERROR(ENOMEM);
+    ctx->bitstream = tmp;
+
+    tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
+                          (ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
+    if (!tmp)
+        return AVERROR(ENOMEM);
+    ctx->slice_offsets = tmp;
+
+    AV_WB24(ctx->bitstream + ctx->bitstream_len, 1);
+    memcpy(ctx->bitstream + ctx->bitstream_len + 3, buffer, size);
+    ctx->slice_offsets[ctx->nb_slices] = ctx->bitstream_len ;
+    ctx->bitstream_len += size + 3;
+    ctx->nb_slices++;
+
+    if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type != AV_PICTURE_TYPE_SI)
+        pp->intra_pic_flag = 0;
+
+    return 0;
+}
+
+static int nvdec_h264_frame_params(AVCodecContext *avctx,
+                                   AVBufferRef *hw_frames_ctx)
+{
+    const H264Context *h = avctx->priv_data;
+    const SPS       *sps = h->ps.sps;
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, sps->ref_frame_count + sps->num_reorder_frames);
+}
+
+const AVHWAccel ff_h264_nvdec_hwaccel = {
+    .name                 = "h264_nvdec",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_H264,
+    .pix_fmt              = AV_PIX_FMT_CUDA,
+    .start_frame          = nvdec_h264_start_frame,
+    .end_frame            = ff_nvdec_end_frame,
+    .decode_slice         = nvdec_h264_decode_slice,
+    .frame_params         = nvdec_h264_frame_params,
+    .init                 = ff_nvdec_decode_init,
+    .uninit               = ff_nvdec_decode_uninit,
+    .priv_data_size       = sizeof(NVDECContext),
+};

diff --git a/libavcodec/nvdec_hevc.c b/libavcodec/nvdec_hevc.c
new file mode 100644
index 0000000..e04a701
--- /dev/null
+++ b/libavcodec/nvdec_hevc.c

@@ -0,0 +1,287 @@
+/*
+ * HEVC HW decode acceleration through NVDEC
+ *
+ * Copyright (c) 2017 Anton Khirnov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "avcodec.h"
+#include "nvdec.h"
+#include "decode.h"
+#include "internal.h"
+#include "hevcdec.h"
+#include "hevc_data.h"
+
+static void dpb_add(CUVIDHEVCPICPARAMS *pp, int idx, const HEVCFrame *src)
+{
+    FrameDecodeData *fdd = (FrameDecodeData*)src->frame->private_ref->data;
+    const NVDECFrame *cf = fdd->hwaccel_priv;
+
+    pp->RefPicIdx[idx]      = cf ? cf->idx : -1;
+    pp->PicOrderCntVal[idx] = src->poc;
+    pp->IsLongTerm[idx]     = !!(src->flags & HEVC_FRAME_FLAG_LONG_REF);
+}
+
+static void fill_scaling_lists(CUVIDHEVCPICPARAMS *ppc, const HEVCContext *s)
+{
+    const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
+                            &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
+    int i, j, pos;
+
+    for (i = 0; i < 6; i++) {
+        for (j = 0; j < 16; j++) {
+            pos = 4 * ff_hevc_diag_scan4x4_y[j] + ff_hevc_diag_scan4x4_x[j];
+            ppc->ScalingList4x4[i][j] = sl->sl[0][i][pos];
+        }
+
+        for (j = 0; j < 64; j++) {
+            pos = 8 * ff_hevc_diag_scan8x8_y[j] + ff_hevc_diag_scan8x8_x[j];
+            ppc->ScalingList8x8[i][j]   = sl->sl[1][i][pos];
+            ppc->ScalingList16x16[i][j] = sl->sl[2][i][pos];
+
+            if (i < 2)
+                ppc->ScalingList32x32[i][j] = sl->sl[3][i * 3][pos];
+        }
+
+        ppc->ScalingListDCCoeff16x16[i] = sl->sl_dc[0][i];
+        if (i < 2)
+            ppc->ScalingListDCCoeff32x32[i] = sl->sl_dc[1][i * 3];
+    }
+}
+
+static int nvdec_hevc_start_frame(AVCodecContext *avctx,
+                                  const uint8_t *buffer, uint32_t size)
+{
+    const HEVCContext *s = avctx->priv_data;
+    const HEVCPPS *pps = s->ps.pps;
+    const HEVCSPS *sps = s->ps.sps;
+
+    NVDECContext       *ctx = avctx->internal->hwaccel_priv_data;
+    CUVIDPICPARAMS      *pp = &ctx->pic_params;
+    CUVIDHEVCPICPARAMS *ppc = &pp->CodecSpecific.hevc;
+    FrameDecodeData *fdd;
+    NVDECFrame *cf;
+
+    int i, j, dpb_size, ret;
+
+    ret = ff_nvdec_start_frame(avctx, s->ref->frame);
+    if (ret < 0)
+        return ret;
+
+    fdd = (FrameDecodeData*)s->ref->frame->private_ref->data;
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
+
+    *pp = (CUVIDPICPARAMS) {
+        .PicWidthInMbs     = sps->width  / 16,
+        .FrameHeightInMbs  = sps->height / 16,
+        .CurrPicIdx        = cf->idx,
+        .ref_pic_flag      = 1,
+        .intra_pic_flag    = IS_IRAP(s),
+
+        .CodecSpecific.hevc = {
+            .pic_width_in_luma_samples                    = sps->width,
+            .pic_height_in_luma_samples                   = sps->height,
+            .log2_min_luma_coding_block_size_minus3       = sps->log2_min_cb_size - 3,
+            .log2_diff_max_min_luma_coding_block_size     = sps->log2_diff_max_min_coding_block_size,
+            .log2_min_transform_block_size_minus2         = sps->log2_min_tb_size - 2,
+            .log2_diff_max_min_transform_block_size       = sps->log2_max_trafo_size - sps->log2_min_tb_size,
+            .pcm_enabled_flag                             = sps->pcm_enabled_flag,
+            .log2_min_pcm_luma_coding_block_size_minus3   = sps->pcm_enabled_flag ? sps->pcm.log2_min_pcm_cb_size - 3 : 0,
+            .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size,
+            .pcm_sample_bit_depth_luma_minus1             = sps->pcm_enabled_flag ? sps->pcm.bit_depth - 1 : 0,
+            .pcm_sample_bit_depth_chroma_minus1           = sps->pcm_enabled_flag ? sps->pcm.bit_depth_chroma - 1 : 0,
+#if NVDECAPI_CHECK_VERSION(8, 1)
+            .log2_max_transform_skip_block_size_minus2    = pps->log2_max_transform_skip_block_size - 2,
+            .log2_sao_offset_scale_luma                   = pps->log2_sao_offset_scale_luma,
+            .log2_sao_offset_scale_chroma                 = pps->log2_sao_offset_scale_chroma,
+            .high_precision_offsets_enabled_flag          = sps->high_precision_offsets_enabled_flag,
+#endif
+            .pcm_loop_filter_disabled_flag                = sps->pcm.loop_filter_disable_flag,
+            .strong_intra_smoothing_enabled_flag          = sps->sps_strong_intra_smoothing_enable_flag,
+            .max_transform_hierarchy_depth_intra          = sps->max_transform_hierarchy_depth_intra,
+            .max_transform_hierarchy_depth_inter          = sps->max_transform_hierarchy_depth_inter,
+            .amp_enabled_flag                             = sps->amp_enabled_flag,
+            .separate_colour_plane_flag                   = sps->separate_colour_plane_flag,
+            .log2_max_pic_order_cnt_lsb_minus4            = sps->log2_max_poc_lsb - 4,
+            .num_short_term_ref_pic_sets                  = sps->nb_st_rps,
+            .long_term_ref_pics_present_flag              = sps->long_term_ref_pics_present_flag,
+            .num_long_term_ref_pics_sps                   = sps->num_long_term_ref_pics_sps,
+            .sps_temporal_mvp_enabled_flag                = sps->sps_temporal_mvp_enabled_flag,
+            .sample_adaptive_offset_enabled_flag          = sps->sao_enabled,
+            .scaling_list_enable_flag                     = sps->scaling_list_enable_flag,
+            .IrapPicFlag                                  = IS_IRAP(s),
+            .IdrPicFlag                                   = IS_IDR(s),
+            .bit_depth_luma_minus8                        = sps->bit_depth - 8,
+            .bit_depth_chroma_minus8                      = sps->bit_depth - 8,
+
+            .dependent_slice_segments_enabled_flag        = pps->dependent_slice_segments_enabled_flag,
+            .slice_segment_header_extension_present_flag  = pps->slice_header_extension_present_flag,
+            .sign_data_hiding_enabled_flag                = pps->sign_data_hiding_flag,
+            .cu_qp_delta_enabled_flag                     = pps->cu_qp_delta_enabled_flag,
+            .diff_cu_qp_delta_depth                       = pps->diff_cu_qp_delta_depth,
+            .init_qp_minus26                              = pps->pic_init_qp_minus26,
+            .pps_cb_qp_offset                             = pps->cb_qp_offset,
+            .pps_cr_qp_offset                             = pps->cr_qp_offset,
+            .constrained_intra_pred_flag                  = pps->constrained_intra_pred_flag,
+            .weighted_pred_flag                           = pps->weighted_pred_flag,
+            .weighted_bipred_flag                         = pps->weighted_bipred_flag,
+            .transform_skip_enabled_flag                  = pps->transform_skip_enabled_flag,
+            .transquant_bypass_enabled_flag               = pps->transquant_bypass_enable_flag,
+            .entropy_coding_sync_enabled_flag             = pps->entropy_coding_sync_enabled_flag,
+            .log2_parallel_merge_level_minus2             = pps->log2_parallel_merge_level - 2,
+            .num_extra_slice_header_bits                  = pps->num_extra_slice_header_bits,
+            .loop_filter_across_tiles_enabled_flag        = pps->loop_filter_across_tiles_enabled_flag,
+            .loop_filter_across_slices_enabled_flag       = pps->seq_loop_filter_across_slices_enabled_flag,
+            .output_flag_present_flag                     = pps->output_flag_present_flag,
+            .num_ref_idx_l0_default_active_minus1         = pps->num_ref_idx_l0_default_active - 1,
+            .num_ref_idx_l1_default_active_minus1         = pps->num_ref_idx_l1_default_active - 1,
+            .lists_modification_present_flag              = pps->lists_modification_present_flag,
+            .cabac_init_present_flag                      = pps->cabac_init_present_flag,
+            .pps_slice_chroma_qp_offsets_present_flag     = pps->pic_slice_level_chroma_qp_offsets_present_flag,
+            .deblocking_filter_override_enabled_flag      = pps->deblocking_filter_override_enabled_flag,
+            .pps_deblocking_filter_disabled_flag          = pps->disable_dbf,
+            .pps_beta_offset_div2                         = pps->beta_offset / 2,
+            .pps_tc_offset_div2                           = pps->tc_offset / 2,
+            .tiles_enabled_flag                           = pps->tiles_enabled_flag,
+            .uniform_spacing_flag                         = pps->uniform_spacing_flag,
+            .num_tile_columns_minus1                      = pps->num_tile_columns - 1,
+            .num_tile_rows_minus1                         = pps->num_tile_rows - 1,
+
+            .NumBitsForShortTermRPSInSlice                = s->sh.short_term_rps ? s->sh.short_term_ref_pic_set_size : 0,
+            .NumDeltaPocsOfRefRpsIdx                      = s->sh.short_term_rps ? s->sh.short_term_rps->rps_idx_num_delta_pocs : 0,
+            .NumPocTotalCurr                              = ff_hevc_frame_nb_refs(s),
+            .NumPocStCurrBefore                           = s->rps[ST_CURR_BEF].nb_refs,
+            .NumPocStCurrAfter                            = s->rps[ST_CURR_AFT].nb_refs,
+            .NumPocLtCurr                                 = s->rps[LT_CURR].nb_refs,
+            .CurrPicOrderCntVal                           = s->ref->poc,
+        },
+    };
+
+    if (pps->num_tile_columns > FF_ARRAY_ELEMS(ppc->column_width_minus1) ||
+        pps->num_tile_rows    > FF_ARRAY_ELEMS(ppc->row_height_minus1)) {
+        av_log(avctx, AV_LOG_ERROR, "Too many tiles\n");
+        return AVERROR(ENOSYS);
+    }
+    for (i = 0; i < pps->num_tile_columns; i++)
+        ppc->column_width_minus1[i] = pps->column_width[i] - 1;
+    for (i = 0; i < pps->num_tile_rows; i++)
+        ppc->row_height_minus1[i] = pps->row_height[i] - 1;
+
+    if (s->rps[LT_CURR].nb_refs     > FF_ARRAY_ELEMS(ppc->RefPicSetLtCurr)       ||
+        s->rps[ST_CURR_BEF].nb_refs > FF_ARRAY_ELEMS(ppc->RefPicSetStCurrBefore) ||
+        s->rps[ST_CURR_AFT].nb_refs > FF_ARRAY_ELEMS(ppc->RefPicSetStCurrAfter)) {
+        av_log(avctx, AV_LOG_ERROR, "Too many reference frames\n");
+        return AVERROR(ENOSYS);
+    }
+
+    dpb_size = 0;
+    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+        const HEVCFrame *ref = &s->DPB[i];
+        if (!(ref->flags & (HEVC_FRAME_FLAG_SHORT_REF | HEVC_FRAME_FLAG_LONG_REF)))
+            continue;
+        if (dpb_size >= FF_ARRAY_ELEMS(ppc->RefPicIdx)) {
+            av_log(avctx, AV_LOG_ERROR, "Too many reference frames\n");
+            return AVERROR_INVALIDDATA;
+        }
+        dpb_add(ppc, dpb_size++, ref);
+
+    }
+    for (i = dpb_size; i < FF_ARRAY_ELEMS(ppc->RefPicIdx); i++)
+        ppc->RefPicIdx[i] = -1;
+
+    for (i = 0; i < s->rps[ST_CURR_BEF].nb_refs; i++) {
+        for (j = 0; j < dpb_size; j++) {
+            if (ppc->PicOrderCntVal[j] == s->rps[ST_CURR_BEF].list[i]) {
+                ppc->RefPicSetStCurrBefore[i] = j;
+                break;
+            }
+        }
+    }
+    for (i = 0; i < s->rps[ST_CURR_AFT].nb_refs; i++) {
+        for (j = 0; j < dpb_size; j++) {
+            if (ppc->PicOrderCntVal[j] == s->rps[ST_CURR_AFT].list[i]) {
+                ppc->RefPicSetStCurrAfter[i] = j;
+                break;
+            }
+        }
+    }
+    for (i = 0; i < s->rps[LT_CURR].nb_refs; i++) {
+        for (j = 0; j < dpb_size; j++) {
+            if (ppc->PicOrderCntVal[j] == s->rps[LT_CURR].list[i]) {
+                ppc->RefPicSetLtCurr[i] = j;
+                break;
+            }
+        }
+    }
+
+    fill_scaling_lists(ppc, s);
+
+    return 0;
+}
+
+static int nvdec_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer,
+                                   uint32_t size)
+{
+    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
+    void *tmp;
+
+    tmp = av_fast_realloc(ctx->bitstream, &ctx->bitstream_allocated,
+                          ctx->bitstream_len + size + 3);
+    if (!tmp)
+        return AVERROR(ENOMEM);
+    ctx->bitstream = tmp;
+
+    tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
+                          (ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
+    if (!tmp)
+        return AVERROR(ENOMEM);
+    ctx->slice_offsets = tmp;
+
+    AV_WB24(ctx->bitstream + ctx->bitstream_len, 1);
+    memcpy(ctx->bitstream + ctx->bitstream_len + 3, buffer, size);
+    ctx->slice_offsets[ctx->nb_slices] = ctx->bitstream_len ;
+    ctx->bitstream_len += size + 3;
+    ctx->nb_slices++;
+
+    return 0;
+}
+
+static int nvdec_hevc_frame_params(AVCodecContext *avctx,
+                                   AVBufferRef *hw_frames_ctx)
+{
+    const HEVCContext *s = avctx->priv_data;
+    const HEVCSPS *sps = s->ps.sps;
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering + 1);
+}
+
+const AVHWAccel ff_hevc_nvdec_hwaccel = {
+    .name                 = "hevc_nvdec",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_HEVC,
+    .pix_fmt              = AV_PIX_FMT_CUDA,
+    .start_frame          = nvdec_hevc_start_frame,
+    .end_frame            = ff_nvdec_end_frame,
+    .decode_slice         = nvdec_hevc_decode_slice,
+    .frame_params         = nvdec_hevc_frame_params,
+    .init                 = ff_nvdec_decode_init,
+    .uninit               = ff_nvdec_decode_uninit,
+    .priv_data_size       = sizeof(NVDECContext),
+};

diff --git a/libavcodec/nvdec_mjpeg.c b/libavcodec/nvdec_mjpeg.c
new file mode 100644
index 0000000..7e40424
--- /dev/null
+++ b/libavcodec/nvdec_mjpeg.c

@@ -0,0 +1,86 @@
+/*
+ * MJPEG HW decode acceleration through NVDEC
+ *
+ * Copyright (c) 2017 Philip Langdale
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+#include "mjpegdec.h"
+#include "nvdec.h"
+#include "decode.h"
+
+static int nvdec_mjpeg_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
+{
+    MJpegDecodeContext *s = avctx->priv_data;
+
+    NVDECContext      *ctx = avctx->internal->hwaccel_priv_data;
+    CUVIDPICPARAMS     *pp = &ctx->pic_params;
+    FrameDecodeData *fdd;
+    NVDECFrame *cf;
+    AVFrame *cur_frame = s->picture;
+
+    int ret;
+
+    ret = ff_nvdec_start_frame(avctx, cur_frame);
+    if (ret < 0)
+        return ret;
+
+    fdd = (FrameDecodeData*)cur_frame->private_ref->data;
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
+
+    *pp = (CUVIDPICPARAMS) {
+        .PicWidthInMbs     = (cur_frame->width  + 15) / 16,
+        .FrameHeightInMbs  = (cur_frame->height + 15) / 16,
+        .CurrPicIdx        = cf->idx,
+
+        .intra_pic_flag    = 1,
+        .ref_pic_flag      = 0,
+    };
+
+    return ff_nvdec_simple_decode_slice(avctx, buffer, size);
+}
+
+static int nvdec_mjpeg_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
+{
+    return 0;
+}
+
+static int nvdec_mjpeg_frame_params(AVCodecContext *avctx,
+                                  AVBufferRef *hw_frames_ctx)
+{
+    // Only need storage for the current frame
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 1);
+}
+
+#if CONFIG_MJPEG_NVDEC_HWACCEL
+AVHWAccel ff_mjpeg_nvdec_hwaccel = {
+    .name                 = "mjpeg_nvdec",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_MJPEG,
+    .pix_fmt              = AV_PIX_FMT_CUDA,
+    .start_frame          = nvdec_mjpeg_start_frame,
+    .end_frame            = ff_nvdec_simple_end_frame,
+    .decode_slice         = nvdec_mjpeg_decode_slice,
+    .frame_params         = nvdec_mjpeg_frame_params,
+    .init                 = ff_nvdec_decode_init,
+    .uninit               = ff_nvdec_decode_uninit,
+    .priv_data_size       = sizeof(NVDECContext),
+};
+#endif

diff --git a/libavcodec/nvdec_mpeg12.c b/libavcodec/nvdec_mpeg12.c
new file mode 100644
index 0000000..7293d50
--- /dev/null
+++ b/libavcodec/nvdec_mpeg12.c

@@ -0,0 +1,123 @@
+/*
+ * MPEG-1/2 HW decode acceleration through NVDEC
+ *
+ * Copyright (c) 2017 Philip Langdale
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "mpegvideo.h"
+#include "nvdec.h"
+#include "decode.h"
+
+static int nvdec_mpeg12_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
+{
+    MpegEncContext *s = avctx->priv_data;
+
+    NVDECContext      *ctx = avctx->internal->hwaccel_priv_data;
+    CUVIDPICPARAMS     *pp = &ctx->pic_params;
+    CUVIDMPEG2PICPARAMS *ppc = &pp->CodecSpecific.mpeg2;
+    FrameDecodeData *fdd;
+    NVDECFrame *cf;
+    AVFrame *cur_frame = s->current_picture.f;
+
+    int ret, i;
+
+    ret = ff_nvdec_start_frame(avctx, cur_frame);
+    if (ret < 0)
+        return ret;
+
+    fdd = (FrameDecodeData*)cur_frame->private_ref->data;
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
+
+    *pp = (CUVIDPICPARAMS) {
+        .PicWidthInMbs     = (cur_frame->width  + 15) / 16,
+        .FrameHeightInMbs  = (cur_frame->height + 15) / 16,
+        .CurrPicIdx        = cf->idx,
+
+        .intra_pic_flag    = s->pict_type == AV_PICTURE_TYPE_I,
+        .ref_pic_flag      = s->pict_type == AV_PICTURE_TYPE_I ||
+                             s->pict_type == AV_PICTURE_TYPE_P,
+
+        .CodecSpecific.mpeg2 = {
+            .ForwardRefIdx     = ff_nvdec_get_ref_idx(s->last_picture.f),
+            .BackwardRefIdx    = ff_nvdec_get_ref_idx(s->next_picture.f),
+
+            .picture_coding_type        = s->pict_type,
+            .full_pel_forward_vector    = s->full_pel[0],
+            .full_pel_backward_vector   = s->full_pel[1],
+            .f_code                     = { { s->mpeg_f_code[0][0],
+                                              s->mpeg_f_code[0][1] },
+                                            { s->mpeg_f_code[1][0],
+                                              s->mpeg_f_code[1][1] } },
+            .intra_dc_precision         = s->intra_dc_precision,
+            .frame_pred_frame_dct       = s->frame_pred_frame_dct,
+            .concealment_motion_vectors = s->concealment_motion_vectors,
+            .q_scale_type               = s->q_scale_type,
+            .intra_vlc_format           = s->intra_vlc_format,
+            .alternate_scan             = s->alternate_scan,
+            .top_field_first            = s->top_field_first,
+        }
+    };
+
+    for (i = 0; i < 64; ++i) {
+        ppc->QuantMatrixIntra[i] = s->intra_matrix[i];
+        ppc->QuantMatrixInter[i] = s->inter_matrix[i];
+    }
+
+    return 0;
+}
+
+static int nvdec_mpeg12_frame_params(AVCodecContext *avctx,
+                                  AVBufferRef *hw_frames_ctx)
+{
+    // Each frame can at most have one P and one B reference
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 2);
+}
+
+#if CONFIG_MPEG2_NVDEC_HWACCEL
+const AVHWAccel ff_mpeg2_nvdec_hwaccel = {
+    .name                 = "mpeg2_nvdec",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_MPEG2VIDEO,
+    .pix_fmt              = AV_PIX_FMT_CUDA,
+    .start_frame          = nvdec_mpeg12_start_frame,
+    .end_frame            = ff_nvdec_simple_end_frame,
+    .decode_slice         = ff_nvdec_simple_decode_slice,
+    .frame_params         = nvdec_mpeg12_frame_params,
+    .init                 = ff_nvdec_decode_init,
+    .uninit               = ff_nvdec_decode_uninit,
+    .priv_data_size       = sizeof(NVDECContext),
+};
+#endif
+
+#if CONFIG_MPEG1_NVDEC_HWACCEL
+const AVHWAccel ff_mpeg1_nvdec_hwaccel = {
+    .name                 = "mpeg1_nvdec",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_MPEG1VIDEO,
+    .pix_fmt              = AV_PIX_FMT_CUDA,
+    .start_frame          = nvdec_mpeg12_start_frame,
+    .end_frame            = ff_nvdec_simple_end_frame,
+    .decode_slice         = ff_nvdec_simple_decode_slice,
+    .frame_params         = nvdec_mpeg12_frame_params,
+    .init                 = ff_nvdec_decode_init,
+    .uninit               = ff_nvdec_decode_uninit,
+    .priv_data_size       = sizeof(NVDECContext),
+};
+#endif

diff --git a/libavcodec/nvdec_mpeg4.c b/libavcodec/nvdec_mpeg4.c
new file mode 100644
index 0000000..907af13
--- /dev/null
+++ b/libavcodec/nvdec_mpeg4.c

@@ -0,0 +1,121 @@
+/*
+ * MPEG-4 Part 2 HW decode acceleration through NVDEC
+ *
+ * Copyright (c) 2017 Philip Langdale
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "mpeg4video.h"
+#include "nvdec.h"
+#include "decode.h"
+
+static int nvdec_mpeg4_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
+{
+    Mpeg4DecContext *m = avctx->priv_data;
+    MpegEncContext *s = &m->m;
+
+    NVDECContext      *ctx = avctx->internal->hwaccel_priv_data;
+    CUVIDPICPARAMS     *pp = &ctx->pic_params;
+    CUVIDMPEG4PICPARAMS *ppc = &pp->CodecSpecific.mpeg4;
+    FrameDecodeData *fdd;
+    NVDECFrame *cf;
+    AVFrame *cur_frame = s->current_picture.f;
+
+    int ret, i;
+
+    ret = ff_nvdec_start_frame(avctx, cur_frame);
+    if (ret < 0)
+        return ret;
+
+    fdd = (FrameDecodeData*)cur_frame->private_ref->data;
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
+
+    *pp = (CUVIDPICPARAMS) {
+        .PicWidthInMbs     = (cur_frame->width  + 15) / 16,
+        .FrameHeightInMbs  = (cur_frame->height + 15) / 16,
+        .CurrPicIdx        = cf->idx,
+
+        .intra_pic_flag    = s->pict_type == AV_PICTURE_TYPE_I,
+        .ref_pic_flag      = s->pict_type == AV_PICTURE_TYPE_I ||
+                             s->pict_type == AV_PICTURE_TYPE_P ||
+                             s->pict_type == AV_PICTURE_TYPE_S,
+
+        .CodecSpecific.mpeg4 = {
+            .ForwardRefIdx                = ff_nvdec_get_ref_idx(s->last_picture.f),
+            .BackwardRefIdx               = ff_nvdec_get_ref_idx(s->next_picture.f),
+
+            .video_object_layer_width     = s->width,
+            .video_object_layer_height    = s->height,
+            .vop_time_increment_bitcount  = m->time_increment_bits,
+            .top_field_first              = s->top_field_first,
+            .resync_marker_disable        = !m->resync_marker,
+            .quant_type                   = s->mpeg_quant,
+            .quarter_sample               = s->quarter_sample,
+            .short_video_header           = avctx->codec->id == AV_CODEC_ID_H263,
+            .divx_flags                   = s->divx_packed ? 5 : 0,
+
+            .vop_coding_type              = s->pict_type - AV_PICTURE_TYPE_I,
+            .vop_coded                    = 1,
+            .vop_rounding_type            = s->no_rounding,
+            .alternate_vertical_scan_flag = s->alternate_scan,
+            .interlaced                   = !s->progressive_sequence,
+            .vop_fcode_forward            = s->f_code,
+            .vop_fcode_backward           = s->b_code,
+            .trd                          = { s->pp_time, s->pp_field_time >> 1 },
+            .trb                          = { s->pb_time, s->pb_field_time >> 1 },
+
+            .gmc_enabled                  = s->pict_type == AV_PICTURE_TYPE_S &&
+                                            m->vol_sprite_usage == GMC_SPRITE,
+        }
+    };
+
+    for (i = 0; i < 64; ++i) {
+        ppc->QuantMatrixIntra[i] = s->intra_matrix[i];
+        ppc->QuantMatrixInter[i] = s->inter_matrix[i];
+    }
+
+    // We need to pass the full frame buffer and not just the slice
+    return ff_nvdec_simple_decode_slice(avctx, buffer, size);
+}
+
+static int nvdec_mpeg4_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
+{
+    return 0;
+}
+
+static int nvdec_mpeg4_frame_params(AVCodecContext *avctx,
+                                  AVBufferRef *hw_frames_ctx)
+{
+    // Each frame can at most have one P and one B reference
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 2);
+}
+
+const AVHWAccel ff_mpeg4_nvdec_hwaccel = {
+    .name                 = "mpeg4_nvdec",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_MPEG4,
+    .pix_fmt              = AV_PIX_FMT_CUDA,
+    .start_frame          = nvdec_mpeg4_start_frame,
+    .end_frame            = ff_nvdec_simple_end_frame,
+    .decode_slice         = nvdec_mpeg4_decode_slice,
+    .frame_params         = nvdec_mpeg4_frame_params,
+    .init                 = ff_nvdec_decode_init,
+    .uninit               = ff_nvdec_decode_uninit,
+    .priv_data_size       = sizeof(NVDECContext),
+};

diff --git a/libavcodec/nvdec_vc1.c b/libavcodec/nvdec_vc1.c
new file mode 100644
index 0000000..7257692
--- /dev/null
+++ b/libavcodec/nvdec_vc1.c

@@ -0,0 +1,141 @@
+/*
+ * VC1 HW decode acceleration through NVDEC
+ *
+ * Copyright (c) 2017 Philip Langdale
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "nvdec.h"
+#include "decode.h"
+#include "vc1.h"
+
+static int nvdec_vc1_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
+{
+    VC1Context *v = avctx->priv_data;
+    MpegEncContext *s = &v->s;
+
+    NVDECContext      *ctx = avctx->internal->hwaccel_priv_data;
+    CUVIDPICPARAMS     *pp = &ctx->pic_params;
+    FrameDecodeData *fdd;
+    NVDECFrame *cf;
+    AVFrame *cur_frame = s->current_picture.f;
+
+    int ret;
+
+    ret = ff_nvdec_start_frame(avctx, cur_frame);
+    if (ret < 0)
+        return ret;
+
+    fdd = (FrameDecodeData*)cur_frame->private_ref->data;
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
+
+    *pp = (CUVIDPICPARAMS) {
+        .PicWidthInMbs     = (cur_frame->width  + 15) / 16,
+        .FrameHeightInMbs  = (cur_frame->height + 15) / 16,
+        .CurrPicIdx        = cf->idx,
+        .field_pic_flag    = v->field_mode,
+        .bottom_field_flag = v->cur_field_type,
+        .second_field      = v->second_field,
+
+        .intra_pic_flag    = s->pict_type == AV_PICTURE_TYPE_I ||
+                             s->pict_type == AV_PICTURE_TYPE_BI,
+        .ref_pic_flag      = s->pict_type == AV_PICTURE_TYPE_I ||
+                             s->pict_type == AV_PICTURE_TYPE_P,
+
+        .CodecSpecific.vc1 = {
+            .ForwardRefIdx     = ff_nvdec_get_ref_idx(s->last_picture.f),
+            .BackwardRefIdx    = ff_nvdec_get_ref_idx(s->next_picture.f),
+            .FrameWidth        = cur_frame->width,
+            .FrameHeight       = cur_frame->height,
+
+            .intra_pic_flag    = s->pict_type == AV_PICTURE_TYPE_I ||
+                                 s->pict_type == AV_PICTURE_TYPE_BI,
+            .ref_pic_flag      = s->pict_type == AV_PICTURE_TYPE_I ||
+                                 s->pict_type == AV_PICTURE_TYPE_P,
+            .progressive_fcm   = v->fcm == 0,
+
+            .profile           = v->profile,
+            .postprocflag      = v->postprocflag,
+            .pulldown          = v->broadcast,
+            .interlace         = v->interlace,
+            .tfcntrflag        = v->tfcntrflag,
+            .finterpflag       = v->finterpflag,
+            .psf               = v->psf,
+            .multires          = v->multires,
+            .syncmarker        = v->resync_marker,
+            .rangered          = v->rangered,
+            .maxbframes        = s->max_b_frames,
+
+            .panscan_flag      = v->panscanflag,
+            .refdist_flag      = v->refdist_flag,
+            .extended_mv       = v->extended_mv,
+            .dquant            = v->dquant,
+            .vstransform       = v->vstransform,
+            .loopfilter        = v->s.loop_filter,
+            .fastuvmc          = v->fastuvmc,
+            .overlap           = v->overlap,
+            .quantizer         = v->quantizer_mode,
+            .extended_dmv      = v->extended_dmv,
+            .range_mapy_flag   = v->range_mapy_flag,
+            .range_mapy        = v->range_mapy,
+            .range_mapuv_flag  = v->range_mapuv_flag,
+            .range_mapuv       = v->range_mapuv,
+            .rangeredfrm       = v->rangeredfrm,
+        }
+    };
+
+    return 0;
+}
+
+static int nvdec_vc1_frame_params(AVCodecContext *avctx,
+                                  AVBufferRef *hw_frames_ctx)
+{
+    // Each frame can at most have one P and one B reference
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 2);
+}
+
+const AVHWAccel ff_vc1_nvdec_hwaccel = {
+    .name                 = "vc1_nvdec",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_VC1,
+    .pix_fmt              = AV_PIX_FMT_CUDA,
+    .start_frame          = nvdec_vc1_start_frame,
+    .end_frame            = ff_nvdec_simple_end_frame,
+    .decode_slice         = ff_nvdec_simple_decode_slice,
+    .frame_params         = nvdec_vc1_frame_params,
+    .init                 = ff_nvdec_decode_init,
+    .uninit               = ff_nvdec_decode_uninit,
+    .priv_data_size       = sizeof(NVDECContext),
+};
+
+#if CONFIG_WMV3_NVDEC_HWACCEL
+const AVHWAccel ff_wmv3_nvdec_hwaccel = {
+    .name                 = "wmv3_nvdec",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_WMV3,
+    .pix_fmt              = AV_PIX_FMT_CUDA,
+    .start_frame          = nvdec_vc1_start_frame,
+    .end_frame            = ff_nvdec_simple_end_frame,
+    .decode_slice         = ff_nvdec_simple_decode_slice,
+    .frame_params         = nvdec_vc1_frame_params,
+    .init                 = ff_nvdec_decode_init,
+    .uninit               = ff_nvdec_decode_uninit,
+    .priv_data_size       = sizeof(NVDECContext),
+};
+#endif

diff --git a/libavcodec/nvdec_vp8.c b/libavcodec/nvdec_vp8.c
new file mode 100644
index 0000000..7b37445
--- /dev/null
+++ b/libavcodec/nvdec_vp8.c

@@ -0,0 +1,105 @@
+/*
+ * VP8 HW decode acceleration through NVDEC
+ *
+ * Copyright (c) 2017 Philip Langdale
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "nvdec.h"
+#include "decode.h"
+#include "internal.h"
+#include "vp8.h"
+
+static unsigned char safe_get_ref_idx(VP8Frame *frame)
+{
+    return frame ? ff_nvdec_get_ref_idx(frame->tf.f) : 255;
+}
+
+static int nvdec_vp8_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
+{
+    VP8Context *h = avctx->priv_data;
+
+    NVDECContext      *ctx = avctx->internal->hwaccel_priv_data;
+    CUVIDPICPARAMS     *pp = &ctx->pic_params;
+    FrameDecodeData *fdd;
+    NVDECFrame *cf;
+    AVFrame *cur_frame = h->framep[VP56_FRAME_CURRENT]->tf.f;
+
+    int ret;
+
+    ret = ff_nvdec_start_frame(avctx, cur_frame);
+    if (ret < 0)
+        return ret;
+
+    fdd = (FrameDecodeData*)cur_frame->private_ref->data;
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
+
+    *pp = (CUVIDPICPARAMS) {
+        .PicWidthInMbs     = (cur_frame->width  + 15) / 16,
+        .FrameHeightInMbs  = (cur_frame->height + 15) / 16,
+        .CurrPicIdx        = cf->idx,
+
+        .CodecSpecific.vp8 = {
+            .width                       = cur_frame->width,
+            .height                      = cur_frame->height,
+
+            .first_partition_size        = h->header_partition_size,
+
+            .LastRefIdx                  = safe_get_ref_idx(h->framep[VP56_FRAME_PREVIOUS]),
+            .GoldenRefIdx                = safe_get_ref_idx(h->framep[VP56_FRAME_GOLDEN]),
+            .AltRefIdx                   = safe_get_ref_idx(h->framep[VP56_FRAME_GOLDEN2]),
+            /*
+             * Explicit braces for anonymous inners and unnamed fields
+             * to work around limitations in ancient versions of gcc.
+             */
+            { // union
+                { // struct
+                    !h->keyframe,             // frame_type
+                    h->profile,               // version
+                    !h->invisible,            // show_frame
+                    h->segmentation.enabled ? // update_mb_segmentation_data
+                        h->segmentation.update_feature_data : 0,
+                }
+            }
+        }
+    };
+
+    return 0;
+}
+
+static int nvdec_vp8_frame_params(AVCodecContext *avctx,
+                                  AVBufferRef *hw_frames_ctx)
+{
+    // VP8 uses a fixed size pool of 3 possible reference frames
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 3);
+}
+
+AVHWAccel ff_vp8_nvdec_hwaccel = {
+    .name                 = "vp8_nvdec",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_VP8,
+    .pix_fmt              = AV_PIX_FMT_CUDA,
+    .start_frame          = nvdec_vp8_start_frame,
+    .end_frame            = ff_nvdec_simple_end_frame,
+    .decode_slice         = ff_nvdec_simple_decode_slice,
+    .frame_params         = nvdec_vp8_frame_params,
+    .init                 = ff_nvdec_decode_init,
+    .uninit               = ff_nvdec_decode_uninit,
+    .priv_data_size       = sizeof(NVDECContext),
+};

diff --git a/libavcodec/nvdec_vp9.c b/libavcodec/nvdec_vp9.c
new file mode 100644
index 0000000..3b665a9
--- /dev/null
+++ b/libavcodec/nvdec_vp9.c

@@ -0,0 +1,184 @@
+/*
+ * VP9 HW decode acceleration through NVDEC
+ *
+ * Copyright (c) 2016 Timo Rothenpieler
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/pixdesc.h"
+
+#include "avcodec.h"
+#include "nvdec.h"
+#include "decode.h"
+#include "internal.h"
+#include "vp9shared.h"
+
+static int nvdec_vp9_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
+{
+    VP9SharedContext *h = avctx->priv_data;
+    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+
+    NVDECContext      *ctx = avctx->internal->hwaccel_priv_data;
+    CUVIDPICPARAMS     *pp = &ctx->pic_params;
+    CUVIDVP9PICPARAMS *ppc = &pp->CodecSpecific.vp9;
+    FrameDecodeData *fdd;
+    NVDECFrame *cf;
+    AVFrame *cur_frame = h->frames[CUR_FRAME].tf.f;
+
+    int ret, i;
+
+    ret = ff_nvdec_start_frame(avctx, cur_frame);
+    if (ret < 0)
+        return ret;
+
+    fdd = (FrameDecodeData*)cur_frame->private_ref->data;
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
+
+    *pp = (CUVIDPICPARAMS) {
+        .PicWidthInMbs     = (cur_frame->width  + 15) / 16,
+        .FrameHeightInMbs  = (cur_frame->height + 15) / 16,
+        .CurrPicIdx        = cf->idx,
+
+        .CodecSpecific.vp9 = {
+            .width                    = cur_frame->width,
+            .height                   = cur_frame->height,
+
+            .LastRefIdx               = ff_nvdec_get_ref_idx(h->refs[h->h.refidx[0]].f),
+            .GoldenRefIdx             = ff_nvdec_get_ref_idx(h->refs[h->h.refidx[1]].f),
+            .AltRefIdx                = ff_nvdec_get_ref_idx(h->refs[h->h.refidx[2]].f),
+
+            .profile                  = h->h.profile,
+            .frameContextIdx          = h->h.framectxid,
+            .frameType                = !h->h.keyframe,
+            .showFrame                = !h->h.invisible,
+            .errorResilient           = h->h.errorres,
+            .frameParallelDecoding    = h->h.parallelmode,
+            .subSamplingX             = pixdesc->log2_chroma_w,
+            .subSamplingY             = pixdesc->log2_chroma_h,
+            .intraOnly                = h->h.intraonly,
+            .allow_high_precision_mv  = h->h.keyframe ? 0 : h->h.highprecisionmvs,
+            .refreshEntropyProbs      = h->h.refreshctx,
+
+            .bitDepthMinus8Luma       = pixdesc->comp[0].depth - 8,
+            .bitDepthMinus8Chroma     = pixdesc->comp[1].depth - 8,
+
+            .loopFilterLevel          = h->h.filter.level,
+            .loopFilterSharpness      = h->h.filter.sharpness,
+            .modeRefLfEnabled         = h->h.lf_delta.enabled,
+
+            .log2_tile_columns        = h->h.tiling.log2_tile_cols,
+            .log2_tile_rows           = h->h.tiling.log2_tile_rows,
+
+            .segmentEnabled           = h->h.segmentation.enabled,
+            .segmentMapUpdate         = h->h.segmentation.update_map,
+            .segmentMapTemporalUpdate = h->h.segmentation.temporal,
+            .segmentFeatureMode       = h->h.segmentation.absolute_vals,
+
+            .qpYAc                    = h->h.yac_qi,
+            .qpYDc                    = h->h.ydc_qdelta,
+            .qpChDc                   = h->h.uvdc_qdelta,
+            .qpChAc                   = h->h.uvac_qdelta,
+
+            .resetFrameContext        = h->h.resetctx,
+            .mcomp_filter_type        = h->h.filtermode ^ (h->h.filtermode <= 1),
+
+            .frameTagSize             = h->h.uncompressed_header_size,
+            .offsetToDctParts         = h->h.compressed_header_size,
+
+            .refFrameSignBias[0]      = 0,
+        }
+    };
+
+    for (i = 0; i < 2; i++)
+        ppc->mbModeLfDelta[i] = h->h.lf_delta.mode[i];
+
+    for (i = 0; i < 4; i++)
+        ppc->mbRefLfDelta[i] = h->h.lf_delta.ref[i];
+
+    for (i = 0; i < 7; i++)
+        ppc->mb_segment_tree_probs[i] = h->h.segmentation.prob[i];
+
+    for (i = 0; i < 3; i++) {
+        ppc->activeRefIdx[i] = h->h.refidx[i];
+        ppc->segment_pred_probs[i] = h->h.segmentation.pred_prob[i];
+        ppc->refFrameSignBias[i + 1] = h->h.signbias[i];
+    }
+
+    for (i = 0; i < 8; i++) {
+        ppc->segmentFeatureEnable[i][0] = h->h.segmentation.feat[i].q_enabled;
+        ppc->segmentFeatureEnable[i][1] = h->h.segmentation.feat[i].lf_enabled;
+        ppc->segmentFeatureEnable[i][2] = h->h.segmentation.feat[i].ref_enabled;
+        ppc->segmentFeatureEnable[i][3] = h->h.segmentation.feat[i].skip_enabled;
+
+        ppc->segmentFeatureData[i][0] = h->h.segmentation.feat[i].q_val;
+        ppc->segmentFeatureData[i][1] = h->h.segmentation.feat[i].lf_val;
+        ppc->segmentFeatureData[i][2] = h->h.segmentation.feat[i].ref_val;
+        ppc->segmentFeatureData[i][3] = 0;
+    }
+
+    switch (avctx->colorspace) {
+    default:
+    case AVCOL_SPC_UNSPECIFIED:
+        ppc->colorSpace = 0;
+        break;
+    case AVCOL_SPC_BT470BG:
+        ppc->colorSpace = 1;
+        break;
+    case AVCOL_SPC_BT709:
+        ppc->colorSpace = 2;
+        break;
+    case AVCOL_SPC_SMPTE170M:
+        ppc->colorSpace = 3;
+        break;
+    case AVCOL_SPC_SMPTE240M:
+        ppc->colorSpace = 4;
+        break;
+    case AVCOL_SPC_BT2020_NCL:
+        ppc->colorSpace = 5;
+        break;
+    case AVCOL_SPC_RESERVED:
+        ppc->colorSpace = 6;
+        break;
+    case AVCOL_SPC_RGB:
+        ppc->colorSpace = 7;
+        break;
+    }
+
+    return 0;
+}
+
+static int nvdec_vp9_frame_params(AVCodecContext *avctx,
+                                  AVBufferRef *hw_frames_ctx)
+{
+    // VP9 uses a fixed size pool of 8 possible reference frames
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 8);
+}
+
+const AVHWAccel ff_vp9_nvdec_hwaccel = {
+    .name                 = "vp9_nvdec",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_VP9,
+    .pix_fmt              = AV_PIX_FMT_CUDA,
+    .start_frame          = nvdec_vp9_start_frame,
+    .end_frame            = ff_nvdec_simple_end_frame,
+    .decode_slice         = ff_nvdec_simple_decode_slice,
+    .frame_params         = nvdec_vp9_frame_params,
+    .init                 = ff_nvdec_decode_init,
+    .uninit               = ff_nvdec_decode_uninit,
+    .priv_data_size       = sizeof(NVDECContext),
+};

diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index d36fa29..e180d7b 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c

@@ -41,14 +41,19 @@
     AV_PIX_FMT_NV12,
     AV_PIX_FMT_P010,
     AV_PIX_FMT_YUV444P,
-    AV_PIX_FMT_YUV444P16,
+    AV_PIX_FMT_P016,      // Truncated to 10bits
+    AV_PIX_FMT_YUV444P16, // Truncated to 10bits
     AV_PIX_FMT_0RGB32,
     AV_PIX_FMT_0BGR32,
     AV_PIX_FMT_CUDA,
+#if CONFIG_D3D11VA
+    AV_PIX_FMT_D3D11,
+#endif
     AV_PIX_FMT_NONE
 };
 
 #define IS_10BIT(pix_fmt)  (pix_fmt == AV_PIX_FMT_P010    || \
+                            pix_fmt == AV_PIX_FMT_P016    || \
                             pix_fmt == AV_PIX_FMT_YUV444P16)
 
 #define IS_YUV444(pix_fmt) (pix_fmt == AV_PIX_FMT_YUV444P || \
@@ -114,10 +119,18 @@
 
 static void nvenc_print_driver_requirement(AVCodecContext *avctx, int level)
 {
-#if defined(_WIN32) || defined(__CYGWIN__)
-    const char *minver = "378.66";
+#if NVENCAPI_CHECK_VERSION(8, 1)
+# if defined(_WIN32) || defined(__CYGWIN__)
+    const char *minver = "390.77";
+# else
+    const char *minver = "390.25";
+# endif
 #else
+# if defined(_WIN32) || defined(__CYGWIN__)
+    const char *minver = "378.66";
+# else
     const char *minver = "378.13";
+# endif
 #endif
     av_log(avctx, level, "The minimum required Nvidia driver for nvenc is %s or newer\n", minver);
 }
@@ -130,11 +143,11 @@
     uint32_t nvenc_max_ver;
     int ret;
 
-    ret = cuda_load_functions(&dl_fn->cuda_dl);
+    ret = cuda_load_functions(&dl_fn->cuda_dl, avctx);
     if (ret < 0)
         return ret;
 
-    ret = nvenc_load_functions(&dl_fn->nvenc_dl);
+    ret = nvenc_load_functions(&dl_fn->nvenc_dl, avctx);
     if (ret < 0) {
         nvenc_print_driver_requirement(avctx, AV_LOG_ERROR);
         return ret;
@@ -166,6 +179,43 @@
     return 0;
 }
 
+static int nvenc_push_context(AVCodecContext *avctx)
+{
+    NvencContext *ctx            = avctx->priv_data;
+    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+    CUresult cu_res;
+
+    if (ctx->d3d11_device)
+        return 0;
+
+    cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+    if (cu_res != CUDA_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    return 0;
+}
+
+static int nvenc_pop_context(AVCodecContext *avctx)
+{
+    NvencContext *ctx            = avctx->priv_data;
+    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+    CUresult cu_res;
+    CUcontext dummy;
+
+    if (ctx->d3d11_device)
+        return 0;
+
+    cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+    if (cu_res != CUDA_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    return 0;
+}
+
 static av_cold int nvenc_open_session(AVCodecContext *avctx)
 {
     NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { 0 };
@@ -175,8 +225,13 @@
 
     params.version    = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
     params.apiVersion = NVENCAPI_VERSION;
-    params.device     = ctx->cu_context;
-    params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
+    if (ctx->d3d11_device) {
+        params.device     = ctx->d3d11_device;
+        params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX;
+    } else {
+        params.device     = ctx->cu_context;
+        params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
+    }
 
     ret = p_nvenc->nvEncOpenEncodeSessionEx(&params, &ctx->nvencoder);
     if (ret != NV_ENC_SUCCESS) {
@@ -323,6 +378,24 @@
         return AVERROR(ENOSYS);
     }
 
+#ifdef NVENC_HAVE_BFRAME_REF_MODE
+    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE);
+    if (ctx->b_ref_mode == NV_ENC_BFRAME_REF_MODE_EACH && ret != 1) {
+        av_log(avctx, AV_LOG_VERBOSE, "Each B frame as reference is not supported\n");
+        return AVERROR(ENOSYS);
+    } else if (ctx->b_ref_mode != NV_ENC_BFRAME_REF_MODE_DISABLED && ret == 0) {
+        av_log(avctx, AV_LOG_VERBOSE, "B frames as references are not supported\n");
+        return AVERROR(ENOSYS);
+    }
+#else
+    if (ctx->b_ref_mode != 0) {
+        av_log(avctx, AV_LOG_VERBOSE, "B frames as references need SDK 8.1 at build time\n");
+        return AVERROR(ENOSYS);
+    }
+#endif
+
+    ctx->support_dyn_bitrate = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE);
+
     return 0;
 }
 
@@ -335,7 +408,6 @@
     int major, minor, ret;
     CUresult cu_res;
     CUdevice cu_device;
-    CUcontext dummy;
     int loglevel = AV_LOG_VERBOSE;
 
     if (ctx->device == LIST_DEVICES)
@@ -378,11 +450,8 @@
 
     ctx->cu_context = ctx->cu_context_internal;
 
-    cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_FATAL, "Failed popping CUDA context: 0x%x\n", (int)cu_res);
+    if ((ret = nvenc_pop_context(avctx)) < 0)
         goto fail2;
-    }
 
     if ((ret = nvenc_open_session(avctx)) < 0)
         goto fail2;
@@ -398,20 +467,14 @@
         return 0;
 
 fail3:
-    cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
-        return AVERROR_EXTERNAL;
-    }
+    if ((ret = nvenc_push_context(avctx)) < 0)
+        return ret;
 
     p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
     ctx->nvencoder = NULL;
 
-    cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
-        return AVERROR_EXTERNAL;
-    }
+    if ((ret = nvenc_pop_context(avctx)) < 0)
+        return ret;
 
 fail2:
     dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
@@ -437,23 +500,48 @@
         return AVERROR_BUG;
     }
 
-    if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->hw_frames_ctx || avctx->hw_device_ctx) {
+    if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11 || avctx->hw_frames_ctx || avctx->hw_device_ctx) {
         AVHWFramesContext   *frames_ctx;
         AVHWDeviceContext   *hwdev_ctx;
-        AVCUDADeviceContext *device_hwctx;
+        AVCUDADeviceContext *cuda_device_hwctx = NULL;
+#if CONFIG_D3D11VA
+        AVD3D11VADeviceContext *d3d11_device_hwctx = NULL;
+#endif
         int ret;
 
         if (avctx->hw_frames_ctx) {
             frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
-            device_hwctx = frames_ctx->device_ctx->hwctx;
+            if (frames_ctx->format == AV_PIX_FMT_CUDA)
+                cuda_device_hwctx = frames_ctx->device_ctx->hwctx;
+#if CONFIG_D3D11VA
+            else if (frames_ctx->format == AV_PIX_FMT_D3D11)
+                d3d11_device_hwctx = frames_ctx->device_ctx->hwctx;
+#endif
+            else
+                return AVERROR(EINVAL);
         } else if (avctx->hw_device_ctx) {
             hwdev_ctx = (AVHWDeviceContext*)avctx->hw_device_ctx->data;
-            device_hwctx = hwdev_ctx->hwctx;
+            if (hwdev_ctx->type == AV_HWDEVICE_TYPE_CUDA)
+                cuda_device_hwctx = hwdev_ctx->hwctx;
+#if CONFIG_D3D11VA
+            else if (hwdev_ctx->type == AV_HWDEVICE_TYPE_D3D11VA)
+                d3d11_device_hwctx = hwdev_ctx->hwctx;
+#endif
+            else
+                return AVERROR(EINVAL);
         } else {
             return AVERROR(EINVAL);
         }
 
-        ctx->cu_context = device_hwctx->cuda_ctx;
+        if (cuda_device_hwctx) {
+            ctx->cu_context = cuda_device_hwctx->cuda_ctx;
+        }
+#if CONFIG_D3D11VA
+        else if (d3d11_device_hwctx) {
+            ctx->d3d11_device = d3d11_device_hwctx->device;
+            ID3D11Device_AddRef(ctx->d3d11_device);
+        }
+#endif
 
         ret = nvenc_open_session(avctx);
         if (ret < 0)
@@ -787,7 +875,7 @@
     if (avctx->rc_buffer_size > 0) {
         ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size;
     } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
-        ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate;
+        avctx->rc_buffer_size = ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate;
     }
 
     if (ctx->aq) {
@@ -926,6 +1014,10 @@
     if (ctx->coder >= 0)
         h264->entropyCodingMode = ctx->coder;
 
+#ifdef NVENC_HAVE_BFRAME_REF_MODE
+    h264->useBFramesAsRef = ctx->b_ref_mode;
+#endif
+
     return 0;
 }
 
@@ -1022,6 +1114,20 @@
     return 0;
 }
 
+static void compute_dar(AVCodecContext *avctx, int *dw, int *dh) {
+    int sw, sh;
+
+    sw = avctx->width;
+    sh = avctx->height;
+
+    if (avctx->sample_aspect_ratio.num > 0 && avctx->sample_aspect_ratio.den > 0) {
+        sw *= avctx->sample_aspect_ratio.num;
+        sh *= avctx->sample_aspect_ratio.den;
+    }
+
+    av_reduce(dw, dh, sw, sh, 1024 * 1024);
+}
+
 static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
 {
     NvencContext *ctx = avctx->priv_data;
@@ -1031,8 +1137,6 @@
     NV_ENC_PRESET_CONFIG preset_config = { 0 };
     NVENCSTATUS nv_status = NV_ENC_SUCCESS;
     AVCPBProperties *cpb_props;
-    CUresult cu_res;
-    CUcontext dummy;
     int res = 0;
     int dw, dh;
 
@@ -1060,13 +1164,7 @@
 
     ctx->encode_config.version = NV_ENC_CONFIG_VER;
 
-    dw = avctx->width;
-    dh = avctx->height;
-    if (avctx->sample_aspect_ratio.num > 0 && avctx->sample_aspect_ratio.den > 0) {
-        dw*= avctx->sample_aspect_ratio.num;
-        dh*= avctx->sample_aspect_ratio.den;
-    }
-    av_reduce(&dw, &dh, dw, dh, 1024 * 1024);
+    compute_dar(avctx, &dw, &dh);
     ctx->init_encode_params.darHeight = dh;
     ctx->init_encode_params.darWidth = dw;
 
@@ -1123,19 +1221,15 @@
     if (res)
         return res;
 
-    cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
-        return AVERROR_EXTERNAL;
-    }
+    res = nvenc_push_context(avctx);
+    if (res < 0)
+        return res;
 
     nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
 
-    cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
-        return AVERROR_EXTERNAL;
-    }
+    res = nvenc_pop_context(avctx);
+    if (res < 0)
+        return res;
 
     if (nv_status != NV_ENC_SUCCESS) {
         return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed");
@@ -1165,6 +1259,7 @@
     case AV_PIX_FMT_NV12:
         return NV_ENC_BUFFER_FORMAT_NV12_PL;
     case AV_PIX_FMT_P010:
+    case AV_PIX_FMT_P016:
         return NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
     case AV_PIX_FMT_YUV444P:
         return NV_ENC_BUFFER_FORMAT_YUV444_PL;
@@ -1190,7 +1285,7 @@
     NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
     allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
 
-    if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
+    if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
         ctx->surfaces[idx].in_ref = av_frame_alloc();
         if (!ctx->surfaces[idx].in_ref)
             return AVERROR(ENOMEM);
@@ -1222,7 +1317,7 @@
     nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
     if (nv_status != NV_ENC_SUCCESS) {
         int err = nvenc_print_error(avctx, nv_status, "CreateBitstreamBuffer failed");
-        if (avctx->pix_fmt != AV_PIX_FMT_CUDA)
+        if (avctx->pix_fmt != AV_PIX_FMT_CUDA && avctx->pix_fmt != AV_PIX_FMT_D3D11)
             p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[idx].input_surface);
         av_frame_free(&ctx->surfaces[idx].in_ref);
         return err;
@@ -1239,10 +1334,7 @@
 static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
 {
     NvencContext *ctx = avctx->priv_data;
-    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
-    CUresult cu_res;
-    CUcontext dummy;
-    int i, res;
+    int i, res = 0, res2;
 
     ctx->surfaces = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->surfaces));
     if (!ctx->surfaces)
@@ -1263,31 +1355,21 @@
     if (!ctx->output_surface_ready_queue)
         return AVERROR(ENOMEM);
 
-    cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
-        return AVERROR_EXTERNAL;
-    }
+    res = nvenc_push_context(avctx);
+    if (res < 0)
+        return res;
 
     for (i = 0; i < ctx->nb_surfaces; i++) {
         if ((res = nvenc_alloc_surface(avctx, i)) < 0)
-        {
-            cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
-            if (cu_res != CUDA_SUCCESS) {
-                av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
-                return AVERROR_EXTERNAL;
-            }
-            return res;
-        }
+            goto fail;
     }
 
-    cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
-        return AVERROR_EXTERNAL;
-    }
+fail:
+    res2 = nvenc_pop_context(avctx);
+    if (res2 < 0)
+        return res2;
 
-    return 0;
+    return res;
 }
 
 static av_cold int nvenc_setup_extradata(AVCodecContext *avctx)
@@ -1328,20 +1410,16 @@
     NvencContext *ctx               = avctx->priv_data;
     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
-    CUresult cu_res;
-    CUcontext dummy;
-    int i;
+    int i, res;
 
     /* the encoder has to be flushed before it can be closed */
     if (ctx->nvencoder) {
         NV_ENC_PIC_PARAMS params        = { .version        = NV_ENC_PIC_PARAMS_VER,
                                             .encodePicFlags = NV_ENC_PIC_FLAG_EOS };
 
-        cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
-        if (cu_res != CUDA_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
-            return AVERROR_EXTERNAL;
-        }
+        res = nvenc_push_context(avctx);
+        if (res < 0)
+            return res;
 
         p_nvenc->nvEncEncodePicture(ctx->nvencoder, &params);
     }
@@ -1351,7 +1429,7 @@
     av_fifo_freep(&ctx->output_surface_queue);
     av_fifo_freep(&ctx->unused_surface_queue);
 
-    if (ctx->surfaces && avctx->pix_fmt == AV_PIX_FMT_CUDA) {
+    if (ctx->surfaces && (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11)) {
         for (i = 0; i < ctx->nb_registered_frames; i++) {
             if (ctx->registered_frames[i].mapped)
                 p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->registered_frames[i].in_map.mappedResource);
@@ -1363,7 +1441,7 @@
 
     if (ctx->surfaces) {
         for (i = 0; i < ctx->nb_surfaces; ++i) {
-            if (avctx->pix_fmt != AV_PIX_FMT_CUDA)
+            if (avctx->pix_fmt != AV_PIX_FMT_CUDA && avctx->pix_fmt != AV_PIX_FMT_D3D11)
                 p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[i].input_surface);
             av_frame_free(&ctx->surfaces[i].in_ref);
             p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->surfaces[i].output_surface);
@@ -1375,11 +1453,9 @@
     if (ctx->nvencoder) {
         p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
 
-        cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
-        if (cu_res != CUDA_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
-            return AVERROR_EXTERNAL;
-        }
+        res = nvenc_pop_context(avctx);
+        if (res < 0)
+            return res;
     }
     ctx->nvencoder = NULL;
 
@@ -1387,6 +1463,13 @@
         dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
     ctx->cu_context = ctx->cu_context_internal = NULL;
 
+#if CONFIG_D3D11VA
+    if (ctx->d3d11_device) {
+        ID3D11Device_Release(ctx->d3d11_device);
+        ctx->d3d11_device = NULL;
+    }
+#endif
+
     nvenc_free_functions(&dl_fn->nvenc_dl);
     cuda_free_functions(&dl_fn->cuda_dl);
 
@@ -1402,7 +1485,7 @@
     NvencContext *ctx = avctx->priv_data;
     int ret;
 
-    if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
+    if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
         AVHWFramesContext *frames_ctx;
         if (!avctx->hw_frames_ctx) {
             av_log(avctx, AV_LOG_ERROR,
@@ -1410,6 +1493,11 @@
             return AVERROR(EINVAL);
         }
         frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+        if (frames_ctx->format != avctx->pix_fmt) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "hw_frames_ctx must match the GPU frame type\n");
+            return AVERROR(EINVAL);
+        }
         ctx->data_pix_fmt = frames_ctx->sw_format;
     } else {
         ctx->data_pix_fmt = avctx->pix_fmt;
@@ -1493,7 +1581,7 @@
                     nv_status = p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[i].regptr);
                     if (nv_status != NV_ENC_SUCCESS)
                         return nvenc_print_error(avctx, nv_status, "Failed unregistering unused input resource");
-                    ctx->registered_frames[i].ptr = 0;
+                    ctx->registered_frames[i].ptr = NULL;
                     ctx->registered_frames[i].regptr = NULL;
                 }
                 return i;
@@ -1518,7 +1606,9 @@
     int i, idx, ret;
 
     for (i = 0; i < ctx->nb_registered_frames; i++) {
-        if (ctx->registered_frames[i].ptr == (CUdeviceptr)frame->data[0])
+        if (avctx->pix_fmt == AV_PIX_FMT_CUDA && ctx->registered_frames[i].ptr == frame->data[0])
+            return i;
+        else if (avctx->pix_fmt == AV_PIX_FMT_D3D11 && ctx->registered_frames[i].ptr == frame->data[0] && ctx->registered_frames[i].ptr_index == (intptr_t)frame->data[1])
             return i;
     }
 
@@ -1527,12 +1617,19 @@
         return idx;
 
     reg.version            = NV_ENC_REGISTER_RESOURCE_VER;
-    reg.resourceType       = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
     reg.width              = frames_ctx->width;
     reg.height             = frames_ctx->height;
     reg.pitch              = frame->linesize[0];
     reg.resourceToRegister = frame->data[0];
 
+    if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
+        reg.resourceType   = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
+    }
+    else if (avctx->pix_fmt == AV_PIX_FMT_D3D11) {
+        reg.resourceType     = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
+        reg.subResourceIndex = (intptr_t)frame->data[1];
+    }
+
     reg.bufferFormat       = nvenc_map_buffer_format(frames_ctx->sw_format);
     if (reg.bufferFormat == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
         av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format: %s\n",
@@ -1546,8 +1643,9 @@
         return AVERROR_UNKNOWN;
     }
 
-    ctx->registered_frames[idx].ptr    = (CUdeviceptr)frame->data[0];
-    ctx->registered_frames[idx].regptr = reg.registeredResource;
+    ctx->registered_frames[idx].ptr       = frame->data[0];
+    ctx->registered_frames[idx].ptr_index = reg.subResourceIndex;
+    ctx->registered_frames[idx].regptr    = reg.registeredResource;
     return idx;
 }
 
@@ -1561,10 +1659,10 @@
     int res;
     NVENCSTATUS nv_status;
 
-    if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
+    if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
         int reg_idx = nvenc_register_frame(avctx, frame);
         if (reg_idx < 0) {
-            av_log(avctx, AV_LOG_ERROR, "Could not register an input CUDA frame\n");
+            av_log(avctx, AV_LOG_ERROR, "Could not register an input HW frame\n");
             return reg_idx;
         }
 
@@ -1614,7 +1712,8 @@
 }
 
 static void nvenc_codec_specific_pic_params(AVCodecContext *avctx,
-                                            NV_ENC_PIC_PARAMS *params)
+                                            NV_ENC_PIC_PARAMS *params,
+                                            NV_ENC_SEI_PAYLOAD *sei_data)
 {
     NvencContext *ctx = avctx->priv_data;
 
@@ -1624,12 +1723,22 @@
             ctx->encode_config.encodeCodecConfig.h264Config.sliceMode;
         params->codecPicParams.h264PicParams.sliceModeData =
             ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
+        if (sei_data) {
+            params->codecPicParams.h264PicParams.seiPayloadArray = sei_data;
+            params->codecPicParams.h264PicParams.seiPayloadArrayCnt = 1;
+        }
+
       break;
     case AV_CODEC_ID_HEVC:
         params->codecPicParams.hevcPicParams.sliceMode =
             ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode;
         params->codecPicParams.hevcPicParams.sliceModeData =
             ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
+        if (sei_data) {
+            params->codecPicParams.hevcPicParams.seiPayloadArray = sei_data;
+            params->codecPicParams.hevcPicParams.seiPayloadArrayCnt = 1;
+        }
+
         break;
     }
 }
@@ -1710,8 +1819,10 @@
     }
     slice_offsets = av_mallocz(slice_mode_data * sizeof(*slice_offsets));
 
-    if (!slice_offsets)
+    if (!slice_offsets) {
+        res = AVERROR(ENOMEM);
         goto error;
+    }
 
     lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;
 
@@ -1739,7 +1850,7 @@
     }
 
 
-    if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
+    if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
         ctx->registered_frames[tmpoutsurf->reg_idx].mapped -= 1;
         if (ctx->registered_frames[tmpoutsurf->reg_idx].mapped == 0) {
             nv_status = p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->registered_frames[tmpoutsurf->reg_idx].in_map.mappedResource);
@@ -1752,7 +1863,7 @@
                 res = nvenc_print_error(avctx, nv_status, "Failed unregistering input resource");
                 goto error;
             }
-            ctx->registered_frames[tmpoutsurf->reg_idx].ptr = 0;
+            ctx->registered_frames[tmpoutsurf->reg_idx].ptr = NULL;
             ctx->registered_frames[tmpoutsurf->reg_idx].regptr = NULL;
         } else if (ctx->registered_frames[tmpoutsurf->reg_idx].mapped < 0) {
             res = AVERROR_BUG;
@@ -1830,13 +1941,105 @@
     return (nb_ready > 0) && (nb_ready + nb_pending >= ctx->async_depth);
 }
 
+static void reconfig_encoder(AVCodecContext *avctx, const AVFrame *frame)
+{
+    NvencContext *ctx = avctx->priv_data;
+    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
+    NVENCSTATUS ret;
+
+    NV_ENC_RECONFIGURE_PARAMS params = { 0 };
+    int needs_reconfig = 0;
+    int needs_encode_config = 0;
+    int reconfig_bitrate = 0, reconfig_dar = 0;
+    int dw, dh;
+
+    params.version = NV_ENC_RECONFIGURE_PARAMS_VER;
+    params.reInitEncodeParams = ctx->init_encode_params;
+
+    compute_dar(avctx, &dw, &dh);
+    if (dw != ctx->init_encode_params.darWidth || dh != ctx->init_encode_params.darHeight) {
+        av_log(avctx, AV_LOG_VERBOSE,
+               "aspect ratio change (DAR): %d:%d -> %d:%d\n",
+               ctx->init_encode_params.darWidth,
+               ctx->init_encode_params.darHeight, dw, dh);
+
+        params.reInitEncodeParams.darHeight = dh;
+        params.reInitEncodeParams.darWidth = dw;
+
+        needs_reconfig = 1;
+        reconfig_dar = 1;
+    }
+
+    if (ctx->rc != NV_ENC_PARAMS_RC_CONSTQP && ctx->support_dyn_bitrate) {
+        if (avctx->bit_rate > 0 && params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate != avctx->bit_rate) {
+            av_log(avctx, AV_LOG_VERBOSE,
+                   "avg bitrate change: %d -> %d\n",
+                   params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate,
+                   (uint32_t)avctx->bit_rate);
+
+            params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate = avctx->bit_rate;
+            reconfig_bitrate = 1;
+        }
+
+        if (avctx->rc_max_rate > 0 && ctx->encode_config.rcParams.maxBitRate != avctx->rc_max_rate) {
+            av_log(avctx, AV_LOG_VERBOSE,
+                   "max bitrate change: %d -> %d\n",
+                   params.reInitEncodeParams.encodeConfig->rcParams.maxBitRate,
+                   (uint32_t)avctx->rc_max_rate);
+
+            params.reInitEncodeParams.encodeConfig->rcParams.maxBitRate = avctx->rc_max_rate;
+            reconfig_bitrate = 1;
+        }
+
+        if (avctx->rc_buffer_size > 0 && ctx->encode_config.rcParams.vbvBufferSize != avctx->rc_buffer_size) {
+            av_log(avctx, AV_LOG_VERBOSE,
+                   "vbv buffer size change: %d -> %d\n",
+                   params.reInitEncodeParams.encodeConfig->rcParams.vbvBufferSize,
+                   avctx->rc_buffer_size);
+
+            params.reInitEncodeParams.encodeConfig->rcParams.vbvBufferSize = avctx->rc_buffer_size;
+            reconfig_bitrate = 1;
+        }
+
+        if (reconfig_bitrate) {
+            params.resetEncoder = 1;
+            params.forceIDR = 1;
+
+            needs_encode_config = 1;
+            needs_reconfig = 1;
+        }
+    }
+
+    if (!needs_encode_config)
+        params.reInitEncodeParams.encodeConfig = NULL;
+
+    if (needs_reconfig) {
+        ret = p_nvenc->nvEncReconfigureEncoder(ctx->nvencoder, &params);
+        if (ret != NV_ENC_SUCCESS) {
+            nvenc_print_error(avctx, ret, "failed to reconfigure nvenc");
+        } else {
+            if (reconfig_dar) {
+                ctx->init_encode_params.darHeight = dh;
+                ctx->init_encode_params.darWidth = dw;
+            }
+
+            if (reconfig_bitrate) {
+                ctx->encode_config.rcParams.averageBitRate = params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate;
+                ctx->encode_config.rcParams.maxBitRate = params.reInitEncodeParams.encodeConfig->rcParams.maxBitRate;
+                ctx->encode_config.rcParams.vbvBufferSize = params.reInitEncodeParams.encodeConfig->rcParams.vbvBufferSize;
+            }
+
+        }
+    }
+}
+
 int ff_nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
 {
     NVENCSTATUS nv_status;
-    CUresult cu_res;
-    CUcontext dummy;
     NvencSurface *tmp_out_surf, *in_surf;
-    int res;
+    int res, res2;
+    NV_ENC_SEI_PAYLOAD *sei_data = NULL;
+    size_t sei_size;
 
     NvencContext *ctx = avctx->priv_data;
     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
@@ -1845,30 +2048,36 @@
     NV_ENC_PIC_PARAMS pic_params = { 0 };
     pic_params.version = NV_ENC_PIC_PARAMS_VER;
 
-    if (!ctx->cu_context || !ctx->nvencoder)
+    if ((!ctx->cu_context && !ctx->d3d11_device) || !ctx->nvencoder)
         return AVERROR(EINVAL);
 
-    if (ctx->encoder_flushing)
-        return AVERROR_EOF;
+    if (ctx->encoder_flushing) {
+        if (avctx->internal->draining)
+            return AVERROR_EOF;
+
+        ctx->encoder_flushing = 0;
+        ctx->first_packet_output = 0;
+        ctx->initial_pts[0] = AV_NOPTS_VALUE;
+        ctx->initial_pts[1] = AV_NOPTS_VALUE;
+        av_fifo_reset(ctx->timestamp_list);
+    }
 
     if (frame) {
         in_surf = get_free_frame(ctx);
         if (!in_surf)
             return AVERROR(EAGAIN);
 
-        cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
-        if (cu_res != CUDA_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
-            return AVERROR_EXTERNAL;
-        }
+        res = nvenc_push_context(avctx);
+        if (res < 0)
+            return res;
+
+        reconfig_encoder(avctx, frame);
 
         res = nvenc_upload_frame(avctx, frame, in_surf);
 
-        cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
-        if (cu_res != CUDA_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
-            return AVERROR_EXTERNAL;
-        }
+        res2 = nvenc_pop_context(avctx);
+        if (res2 < 0)
+            return res2;
 
         if (res)
             return res;
@@ -1898,25 +2107,34 @@
 
         pic_params.inputTimeStamp = frame->pts;
 
-        nvenc_codec_specific_pic_params(avctx, &pic_params);
+        if (ctx->a53_cc && av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC)) {
+            if (ff_alloc_a53_sei(frame, sizeof(NV_ENC_SEI_PAYLOAD), (void**)&sei_data, &sei_size) < 0) {
+                av_log(ctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
+            }
+
+            if (sei_data) {
+                sei_data->payloadSize = (uint32_t)sei_size;
+                sei_data->payloadType = 4;
+                sei_data->payload = (uint8_t*)(sei_data + 1);
+            }
+        }
+
+        nvenc_codec_specific_pic_params(avctx, &pic_params, sei_data);
     } else {
         pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
         ctx->encoder_flushing = 1;
     }
 
-    cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
-        return AVERROR_EXTERNAL;
-    }
+    res = nvenc_push_context(avctx);
+    if (res < 0)
+        return res;
 
     nv_status = p_nvenc->nvEncEncodePicture(ctx->nvencoder, &pic_params);
+    av_free(sei_data);
 
-    cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
-        return AVERROR_EXTERNAL;
-    }
+    res = nvenc_pop_context(avctx);
+    if (res < 0)
+        return res;
 
     if (nv_status != NV_ENC_SUCCESS &&
         nv_status != NV_ENC_ERR_NEED_MORE_INPUT)
@@ -1945,33 +2163,26 @@
 
 int ff_nvenc_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
 {
-    CUresult cu_res;
-    CUcontext dummy;
     NvencSurface *tmp_out_surf;
-    int res;
+    int res, res2;
 
     NvencContext *ctx = avctx->priv_data;
-    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
 
-    if (!ctx->cu_context || !ctx->nvencoder)
+    if ((!ctx->cu_context && !ctx->d3d11_device) || !ctx->nvencoder)
         return AVERROR(EINVAL);
 
     if (output_ready(avctx, ctx->encoder_flushing)) {
         av_fifo_generic_read(ctx->output_surface_ready_queue, &tmp_out_surf, sizeof(tmp_out_surf), NULL);
 
-        cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
-        if (cu_res != CUDA_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
-            return AVERROR_EXTERNAL;
-        }
+        res = nvenc_push_context(avctx);
+        if (res < 0)
+            return res;
 
         res = process_output_surface(avctx, pkt, tmp_out_surf);
 
-        cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
-        if (cu_res != CUDA_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
-            return AVERROR_EXTERNAL;
-        }
+        res2 = nvenc_pop_context(avctx);
+        if (res2 < 0)
+            return res2;
 
         if (res)
             return res;

diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h
index d8e23d0..ebb7a80 100644
--- a/libavcodec/nvenc.h
+++ b/libavcodec/nvenc.h

@@ -19,10 +19,17 @@
 #ifndef AVCODEC_NVENC_H
 #define AVCODEC_NVENC_H
 
-#include "compat/nvenc/nvEncodeAPI.h"
-
 #include "config.h"
 
+#if CONFIG_D3D11VA
+#define COBJMACROS
+#include "libavutil/hwcontext_d3d11va.h"
+#else
+typedef void ID3D11Device;
+#endif
+
+#include <ffnvcodec/nvEncodeAPI.h>
+
 #include "compat/cuda/dynlink_loader.h"
 #include "libavutil/fifo.h"
 #include "libavutil/opt.h"
@@ -33,6 +40,15 @@
 #define RC_MODE_DEPRECATED 0x800000
 #define RCD(rc_mode) ((rc_mode) | RC_MODE_DEPRECATED)
 
+#define NVENCAPI_CHECK_VERSION(major, minor) \
+    ((major) < NVENCAPI_MAJOR_VERSION || ((major) == NVENCAPI_MAJOR_VERSION && (minor) <= NVENCAPI_MINOR_VERSION))
+
+// SDK 8.1 compile time feature checks
+#if NVENCAPI_CHECK_VERSION(8, 1)
+#define NVENC_HAVE_BFRAME_REF_MODE
+#define NVENC_HAVE_QP_MAP_MODE
+#endif
+
 typedef struct NvencSurface
 {
     NV_ENC_INPUT_PTR input_surface;
@@ -106,6 +122,7 @@
     NV_ENC_CONFIG encode_config;
     CUcontext cu_context;
     CUcontext cu_context_internal;
+    ID3D11Device *d3d11_device;
 
     int nb_surfaces;
     NvencSurface *surfaces;
@@ -118,7 +135,8 @@
     int encoder_flushing;
 
     struct {
-        CUdeviceptr ptr;
+        void *ptr;
+        int ptr_index;
         NV_ENC_REGISTERED_PTR regptr;
         int mapped;
         NV_ENC_MAP_INPUT_RESOURCE in_map;
@@ -134,6 +152,8 @@
     int64_t initial_pts[2];
     int first_packet_output;
 
+    int support_dyn_bitrate;
+
     void *nvencoder;
 
     int preset;
@@ -165,6 +185,8 @@
     int cqp;
     int weighted_pred;
     int coder;
+    int b_ref_mode;
+    int a53_cc;
 } NvencContext;
 
 int ff_nvenc_encode_init(AVCodecContext *avctx);

diff --git a/libavcodec/nvenc_h264.c b/libavcodec/nvenc_h264.c
index c3b4bac..a6623f5 100644
--- a/libavcodec/nvenc_h264.c
+++ b/libavcodec/nvenc_h264.c

@@ -126,6 +126,18 @@
     { "cavlc",        "",                                   0,                    AV_OPT_TYPE_CONST, { .i64 = NV_ENC_H264_ENTROPY_CODING_MODE_CAVLC      }, 0, 0, VE, "coder" },
     { "ac",           "",                                   0,                    AV_OPT_TYPE_CONST, { .i64 = NV_ENC_H264_ENTROPY_CODING_MODE_CABAC      }, 0, 0, VE, "coder" },
     { "vlc",          "",                                   0,                    AV_OPT_TYPE_CONST, { .i64 = NV_ENC_H264_ENTROPY_CODING_MODE_CAVLC      }, 0, 0, VE, "coder" },
+#ifdef NVENC_HAVE_BFRAME_REF_MODE
+    { "b_ref_mode",   "Use B frames as references",         OFFSET(b_ref_mode),   AV_OPT_TYPE_INT,   { .i64 = NV_ENC_BFRAME_REF_MODE_DISABLED }, NV_ENC_BFRAME_REF_MODE_DISABLED, NV_ENC_BFRAME_REF_MODE_MIDDLE, VE, "b_ref_mode" },
+    { "disabled",     "B frames will not be used for reference", 0,               AV_OPT_TYPE_CONST, { .i64 = NV_ENC_BFRAME_REF_MODE_DISABLED }, 0, 0, VE, "b_ref_mode" },
+    { "each",         "Each B frame will be used for reference", 0,               AV_OPT_TYPE_CONST, { .i64 = NV_ENC_BFRAME_REF_MODE_EACH }, 0, 0, VE, "b_ref_mode" },
+    { "middle",       "Only (number of B frames)/2 will be used for reference", 0,AV_OPT_TYPE_CONST, { .i64 = NV_ENC_BFRAME_REF_MODE_MIDDLE }, 0, 0, VE, "b_ref_mode" },
+#else
+    { "b_ref_mode",   "(not supported)",                    OFFSET(b_ref_mode),   AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, INT_MAX, VE, "b_ref_mode" },
+    { "disabled",     "",                                   0,                    AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0,       VE, "b_ref_mode" },
+    { "each",         "",                                   0,                    AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0,       VE, "b_ref_mode" },
+    { "middle",       "",                                   0,                    AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0,       VE, "b_ref_mode" },
+#endif
+    { "a53cc",        "Use A53 Closed Captions (if available)", OFFSET(a53_cc),   AV_OPT_TYPE_BOOL,  { .i64 = 1 }, 0, 1, VE },
     { NULL }
 };
 
@@ -171,9 +183,10 @@
     .priv_data_size = sizeof(NvencContext),
     .priv_class     = &nvenc_class,
     .defaults       = defaults,
-    .capabilities   = AV_CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
     .pix_fmts       = ff_nvenc_pix_fmts,
+    .wrapper_name   = "nvenc",
 };
 #endif
 
@@ -199,9 +212,10 @@
     .priv_data_size = sizeof(NvencContext),
     .priv_class     = &nvenc_h264_class,
     .defaults       = defaults,
-    .capabilities   = AV_CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
     .pix_fmts       = ff_nvenc_pix_fmts,
+    .wrapper_name   = "nvenc",
 };
 #endif
 
@@ -227,7 +241,8 @@
     .priv_data_size = sizeof(NvencContext),
     .priv_class     = &h264_nvenc_class,
     .defaults       = defaults,
-    .capabilities   = AV_CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
     .pix_fmts       = ff_nvenc_pix_fmts,
+    .wrapper_name   = "nvenc",
 };

diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c
index 89e8c3e..0df7eab 100644
--- a/libavcodec/nvenc_hevc.c
+++ b/libavcodec/nvenc_hevc.c

@@ -161,8 +161,9 @@
     .priv_class     = &nvenc_hevc_class,
     .defaults       = defaults,
     .pix_fmts       = ff_nvenc_pix_fmts,
-    .capabilities   = AV_CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+    .wrapper_name   = "nvenc",
 };
 
 #endif
@@ -188,6 +189,7 @@
     .priv_class     = &hevc_nvenc_class,
     .defaults       = defaults,
     .pix_fmts       = ff_nvenc_pix_fmts,
-    .capabilities   = AV_CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+    .wrapper_name   = "nvenc",
 };

diff --git a/libavcodec/on2avc.c b/libavcodec/on2avc.c
index 4e4be75..00e5bf5 100644
--- a/libavcodec/on2avc.c
+++ b/libavcodec/on2avc.c

@@ -1018,6 +1018,7 @@
     .decode         = on2avc_decode_frame,
     .close          = on2avc_decode_close,
     .capabilities   = AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };

diff --git a/libavcodec/options.c b/libavcodec/options.c
index 82e1217..41b6052 100644
--- a/libavcodec/options.c
+++ b/libavcodec/options.c

@@ -30,7 +30,6 @@
 #include "libavutil/internal.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
-#include <float.h>              /* FLT_MIN, FLT_MAX */
 #include <string.h>
 
 FF_DISABLE_DEPRECATION_WARNINGS

diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index 2ac37c3..099261e 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h

@@ -54,26 +54,11 @@
 {"qpel", "use 1/4-pel motion compensation", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_QPEL }, INT_MIN, INT_MAX, V|E, "flags"},
 {"loop", "use loop filter", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_LOOP_FILTER }, INT_MIN, INT_MAX, V|E, "flags"},
 {"qscale", "use fixed qscale", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_QSCALE }, INT_MIN, INT_MAX, 0, "flags"},
-#if FF_API_GMC
-{"gmc", "use gmc", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_GMC }, INT_MIN, INT_MAX, V|E, "flags"},
-#endif
-#if FF_API_MV0
-{"mv0", "always try a mb with mv=<0,0>", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_MV0 }, INT_MIN, INT_MAX, V|E, "flags"},
-#endif
-#if FF_API_INPUT_PRESERVED
-{"input_preserved", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_INPUT_PRESERVED }, INT_MIN, INT_MAX, 0, "flags"},
-#endif
 {"pass1", "use internal 2-pass ratecontrol in first  pass mode", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_PASS1 }, INT_MIN, INT_MAX, 0, "flags"},
 {"pass2", "use internal 2-pass ratecontrol in second pass mode", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_PASS2 }, INT_MIN, INT_MAX, 0, "flags"},
 {"gray", "only decode/encode grayscale", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_GRAY }, INT_MIN, INT_MAX, V|E|D, "flags"},
-#if FF_API_EMU_EDGE
-{"emu_edge", "do not draw edges", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_EMU_EDGE }, INT_MIN, INT_MAX, 0, "flags"},
-#endif
 {"psnr", "error[?] variables will be set during encoding", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_PSNR }, INT_MIN, INT_MAX, V|E, "flags"},
 {"truncated", "Input bitstream might be randomly truncated", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_TRUNCATED }, INT_MIN, INT_MAX, V|D, "flags"},
-#if FF_API_NORMALIZE_AQP
-{"naq", "normalize adaptive quantization", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_NORMALIZE_AQP }, INT_MIN, INT_MAX, V|E, "flags"},
-#endif
 {"ildct", "use interlaced DCT", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_INTERLACED_DCT }, INT_MIN, INT_MAX, V|E, "flags"},
 {"low_delay", "force low delay", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_LOW_DELAY }, INT_MIN, INT_MAX, V|D|E, "flags"},
 {"global_header", "place global headers in extradata instead of every keyframe", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_GLOBAL_HEADER }, INT_MIN, INT_MAX, V|A|E, "flags"},
@@ -91,21 +76,6 @@
 {"export_mvs", "export motion vectors through frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_EXPORT_MVS}, INT_MIN, INT_MAX, V|D, "flags2"},
 {"skip_manual", "do not skip samples and export skip information as frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_SKIP_MANUAL}, INT_MIN, INT_MAX, V|D, "flags2"},
 {"ass_ro_flush_noop", "do not reset ASS ReadOrder field on flush", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_RO_FLUSH_NOOP}, INT_MIN, INT_MAX, S|D, "flags2"},
-#if FF_API_MOTION_EST
-{"me_method", "set motion estimation method", OFFSET(me_method), AV_OPT_TYPE_INT, {.i64 = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method"},
-{"zero", "zero motion estimation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_ZERO }, INT_MIN, INT_MAX, V|E, "me_method" },
-{"full", "full motion estimation (slowest)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_FULL }, INT_MIN, INT_MAX, V|E, "me_method" },
-{"epzs", "EPZS motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method" },
-{"esa", "esa motion estimation (alias for full)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_FULL }, INT_MIN, INT_MAX, V|E, "me_method" },
-{"tesa", "tesa motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_TESA }, INT_MIN, INT_MAX, V|E, "me_method" },
-{"dia", "diamond motion estimation (alias for EPZS)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method" },
-{"log", "log motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_LOG }, INT_MIN, INT_MAX, V|E, "me_method" },
-{"phods", "phods motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_PHODS }, INT_MIN, INT_MAX, V|E, "me_method" },
-{"x1", "X1 motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_X1 }, INT_MIN, INT_MAX, V|E, "me_method" },
-{"hex", "hex motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_HEX }, INT_MIN, INT_MAX, V|E, "me_method" },
-{"umh", "umh motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_UMH }, INT_MIN, INT_MAX, V|E, "me_method" },
-{"iter", "iter motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_ITER }, INT_MIN, INT_MAX, V|E, "me_method" },
-#endif
 {"time_base", NULL, OFFSET(time_base), AV_OPT_TYPE_RATIONAL, {.dbl = 0}, 0, INT_MAX},
 {"g", "set the group of picture (GOP) size", OFFSET(gop_size), AV_OPT_TYPE_INT, {.i64 = 12 }, INT_MIN, INT_MAX, V|E},
 {"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, A|D|E},
@@ -123,9 +93,6 @@
 {"qdiff", "maximum difference between the quantizer scales (VBR)", OFFSET(max_qdiff), AV_OPT_TYPE_INT, {.i64 = 3 }, INT_MIN, INT_MAX, V|E},
 {"bf", "set maximum number of B-frames between non-B-frames", OFFSET(max_b_frames), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, -1, INT_MAX, V|E},
 {"b_qfactor", "QP factor between P- and B-frames", OFFSET(b_quant_factor), AV_OPT_TYPE_FLOAT, {.dbl = 1.25 }, -FLT_MAX, FLT_MAX, V|E},
-#if FF_API_RC_STRATEGY
-{"rc_strategy", "ratecontrol method", OFFSET(rc_strategy), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
-#endif
 #if FF_API_PRIVATE_OPT
 {"b_strategy", "strategy to choose between I/P/B-frames", OFFSET(b_frame_strategy), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, V|E},
 {"ps", "RTP payload size in bytes", OFFSET(rtp_payload_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
@@ -144,16 +111,10 @@
 {"codec_tag", NULL, OFFSET(codec_tag), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 {"bug", "work around not autodetected encoder bugs", OFFSET(workaround_bugs), AV_OPT_TYPE_FLAGS, {.i64 = FF_BUG_AUTODETECT }, INT_MIN, INT_MAX, V|D, "bug"},
 {"autodetect", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_AUTODETECT }, INT_MIN, INT_MAX, V|D, "bug"},
-#if FF_API_OLD_MSMPEG4
-{"old_msmpeg4", "some old lavc-generated MSMPEG4v3 files (no autodetection)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_OLD_MSMPEG4 }, INT_MIN, INT_MAX, V|D, "bug"},
-#endif
 {"xvid_ilace", "Xvid interlacing bug (autodetected if FOURCC == XVIX)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_XVID_ILACE }, INT_MIN, INT_MAX, V|D, "bug"},
 {"ump4", "(autodetected if FOURCC == UMP4)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_UMP4 }, INT_MIN, INT_MAX, V|D, "bug"},
 {"no_padding", "padding bug (autodetected)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_NO_PADDING }, INT_MIN, INT_MAX, V|D, "bug"},
 {"amv", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_AMV }, INT_MIN, INT_MAX, V|D, "bug"},
-#if FF_API_AC_VLC
-{"ac_vlc", "illegal VLC bug (autodetected per FOURCC)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_AC_VLC }, INT_MIN, INT_MAX, V|D, "bug"},
-#endif
 {"qpel_chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_QPEL_CHROMA }, INT_MIN, INT_MAX, V|D, "bug"},
 {"std_qpel", "old standard qpel (autodetected per FOURCC/version)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_STD_QPEL }, INT_MIN, INT_MAX, V|D, "bug"},
 {"qpel_chroma2", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_QPEL_CHROMA2 }, INT_MIN, INT_MAX, V|D, "bug"},
@@ -185,27 +146,13 @@
 #if FF_API_PRIVATE_OPT
 {"mpeg_quant", "use MPEG quantizers instead of H.263", OFFSET(mpeg_quant), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 #endif
-#if FF_API_MPV_OPT
-{"qsquish", "deprecated, use encoder private options instead", OFFSET(rc_qsquish), AV_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, 0, 99, V|E},
-{"rc_qmod_amp",  "deprecated, use encoder private options instead", OFFSET(rc_qmod_amp), AV_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, -FLT_MAX, FLT_MAX, V|E},
-{"rc_qmod_freq", "deprecated, use encoder private options instead", OFFSET(rc_qmod_freq), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
-#endif
 {"rc_override_count", NULL, OFFSET(rc_override_count), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
-#if FF_API_MPV_OPT
-{"rc_eq", "deprecated, use encoder private options instead", OFFSET(rc_eq), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, V|E},
-#endif
 {"maxrate", "maximum bitrate (in bits/s). Used for VBV together with bufsize.", OFFSET(rc_max_rate), AV_OPT_TYPE_INT64, {.i64 = DEFAULT }, 0, INT_MAX, V|A|E},
 {"minrate", "minimum bitrate (in bits/s). Most useful in setting up a CBR encode. It is of little use otherwise.",
             OFFSET(rc_min_rate), AV_OPT_TYPE_INT64, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E},
 {"bufsize", "set ratecontrol buffer size (in bits)", OFFSET(rc_buffer_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, A|V|E},
-#if FF_API_MPV_OPT
-{"rc_buf_aggressivity", "deprecated, use encoder private options instead", OFFSET(rc_buffer_aggressivity), AV_OPT_TYPE_FLOAT, {.dbl = 1.0 }, -FLT_MAX, FLT_MAX, V|E},
-#endif
 {"i_qfactor", "QP factor between P- and I-frames", OFFSET(i_quant_factor), AV_OPT_TYPE_FLOAT, {.dbl = -0.8 }, -FLT_MAX, FLT_MAX, V|E},
 {"i_qoffset", "QP offset between P- and I-frames", OFFSET(i_quant_offset), AV_OPT_TYPE_FLOAT, {.dbl = 0.0 }, -FLT_MAX, FLT_MAX, V|E},
-#if FF_API_MPV_OPT
-{"rc_init_cplx", "deprecated, use encoder private options instead", OFFSET(rc_initial_cplx), AV_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, -FLT_MAX, FLT_MAX, V|E},
-#endif
 {"dct", "DCT algorithm", OFFSET(dct_algo), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|E, "dct"},
 {"auto", "autoselect a good one", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_AUTO }, INT_MIN, INT_MAX, V|E, "dct"},
 {"fastint", "fast integer", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FASTINT }, INT_MIN, INT_MAX, V|E, "dct"},
@@ -225,19 +172,10 @@
 {"simplemmx", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEMMX }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"arm", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_ARM }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"altivec", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_ALTIVEC }, INT_MIN, INT_MAX, V|E|D, "idct"},
-#if FF_API_ARCH_SH4
-{"sh4", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SH4 }, INT_MIN, INT_MAX, V|E|D, "idct"},
-#endif
 {"simplearm", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARM }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"simplearmv5te", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV5TE }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"simplearmv6", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV6 }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"simpleneon", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLENEON }, INT_MIN, INT_MAX, V|E|D, "idct"},
-#if FF_API_ARCH_ALPHA
-{"simplealpha", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEALPHA }, INT_MIN, INT_MAX, V|E|D, "idct"},
-#endif
-#if FF_API_UNUSED_MEMBERS
-{"ipp", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_IPP }, INT_MIN, INT_MAX, V|E|D, "idct"},
-#endif /* FF_API_UNUSED_MEMBERS */
 {"xvid", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVID }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"xvidmmx", "deprecated, for compatibility only", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVID }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"faani", "floating point AAN IDCT", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_FAAN }, INT_MIN, INT_MAX, V|D|E, "idct"},
@@ -269,9 +207,6 @@
 {"green_metadata", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_GREEN_MD }, INT_MIN, INT_MAX, V|D, "debug"},
 {"skip", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_SKIP }, INT_MIN, INT_MAX, V|D, "debug"},
 {"startcode", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_STARTCODE }, INT_MIN, INT_MAX, V|D, "debug"},
-#if FF_API_UNUSED_MEMBERS
-{"pts", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_PTS }, INT_MIN, INT_MAX, V|D, "debug"},
-#endif /* FF_API_UNUSED_MEMBERS */
 {"er", "error recognition", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_ER }, INT_MIN, INT_MAX, V|D, "debug"},
 {"mmco", "memory management control operations (H.264)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_MMCO }, INT_MIN, INT_MAX, V|D, "debug"},
 {"bugs", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_BUGS }, INT_MIN, INT_MAX, V|D, "debug"},
@@ -282,12 +217,6 @@
 {"buffers", "picture buffer allocations", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_BUFFERS }, INT_MIN, INT_MAX, V|D, "debug"},
 {"thread_ops", "threading operations", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_THREADS }, INT_MIN, INT_MAX, V|A|D, "debug"},
 {"nomc", "skip motion compensation", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_NOMC }, INT_MIN, INT_MAX, V|A|D, "debug"},
-#if FF_API_VISMV
-{"vismv", "visualize motion vectors (MVs) (deprecated)", OFFSET(debug_mv), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT }, 0, INT_MAX, V|D, "debug_mv"},
-{"pf", "forward predicted MVs of P-frames", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_VIS_MV_P_FOR }, INT_MIN, INT_MAX, V|D, "debug_mv"},
-{"bf", "forward predicted MVs of B-frames", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_VIS_MV_B_FOR }, INT_MIN, INT_MAX, V|D, "debug_mv"},
-{"bb", "backward predicted MVs of B-frames", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_VIS_MV_B_BACK }, INT_MIN, INT_MAX, V|D, "debug_mv"},
-#endif
 {"cmp", "full-pel ME compare function", OFFSET(me_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"subcmp", "sub-pel ME compare function", OFFSET(me_sub_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"mbcmp", "macroblock compare function", OFFSET(mb_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
@@ -318,14 +247,7 @@
 {"msad", "sum of absolute differences, median predicted", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_MEDIAN_SAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"pre_dia_size", "diamond type & size for motion estimation pre-pass", OFFSET(pre_dia_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"subq", "sub-pel motion estimation quality", OFFSET(me_subpel_quality), AV_OPT_TYPE_INT, {.i64 = 8 }, INT_MIN, INT_MAX, V|E},
-#if FF_API_AFD
-{"dtg_active_format", NULL, OFFSET(dtg_active_format), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
-#endif
 {"me_range", "limit motion vectors range (1023 for DivX player)", OFFSET(me_range), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
-#if FF_API_QUANT_BIAS
-{"ibias", "intra quant bias", OFFSET(intra_quant_bias), AV_OPT_TYPE_INT, {.i64 = FF_DEFAULT_QUANT_BIAS }, INT_MIN, INT_MAX, V|E},
-{"pbias", "inter quant bias", OFFSET(inter_quant_bias), AV_OPT_TYPE_INT, {.i64 = FF_DEFAULT_QUANT_BIAS }, INT_MIN, INT_MAX, V|E},
-#endif
 {"global_quality", NULL, OFFSET(global_quality), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E},
 #if FF_API_CODER_TYPE
 {"coder", NULL, OFFSET(coder_type), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "coder"},
@@ -333,45 +255,25 @@
 {"ac", "arithmetic coder", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CODER_TYPE_AC }, INT_MIN, INT_MAX, V|E, "coder"},
 {"raw", "raw (no encoding)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CODER_TYPE_RAW }, INT_MIN, INT_MAX, V|E, "coder"},
 {"rle", "run-length coder", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CODER_TYPE_RLE }, INT_MIN, INT_MAX, V|E, "coder"},
-#if FF_API_UNUSED_MEMBERS
-{"deflate", "deflate-based coder", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CODER_TYPE_DEFLATE }, INT_MIN, INT_MAX, V|E, "coder"},
-#endif /* FF_API_UNUSED_MEMBERS */
 #endif /* FF_API_CODER_TYPE */
 #if FF_API_PRIVATE_OPT
 {"context", "context model", OFFSET(context_model), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 #endif
 {"slice_flags", NULL, OFFSET(slice_flags), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
-#if FF_API_XVMC
-{"xvmc_acceleration", NULL, OFFSET(xvmc_acceleration), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
-#endif /* FF_API_XVMC */
 {"mbd", "macroblock decision algorithm (high quality mode)", OFFSET(mb_decision), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, 2, V|E, "mbd"},
 {"simple", "use mbcmp", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_SIMPLE }, INT_MIN, INT_MAX, V|E, "mbd"},
 {"bits", "use fewest bits", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_BITS }, INT_MIN, INT_MAX, V|E, "mbd"},
 {"rd", "use best rate distortion", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_RD }, INT_MIN, INT_MAX, V|E, "mbd"},
-#if FF_API_STREAM_CODEC_TAG
-{"stream_codec_tag", NULL, OFFSET(stream_codec_tag), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
-#endif
 #if FF_API_PRIVATE_OPT
 {"sc_threshold", "scene change threshold", OFFSET(scenechange_threshold), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 #endif
-#if FF_API_MPV_OPT
-{"lmin", "deprecated, use encoder private options instead", OFFSET(lmin), AV_OPT_TYPE_INT, {.i64 =  0 }, 0, INT_MAX, V|E},
-{"lmax", "deprecated, use encoder private options instead", OFFSET(lmax), AV_OPT_TYPE_INT, {.i64 =  0 }, 0, INT_MAX, V|E},
-#endif
 #if FF_API_PRIVATE_OPT
 {"nr", "noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 #endif
 {"rc_init_occupancy", "number of bits which should be loaded into the rc buffer before decoding starts", OFFSET(rc_initial_buffer_occupancy), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"flags2", NULL, OFFSET(flags2), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT}, 0, UINT_MAX, V|A|E|D, "flags2"},
-#if FF_API_ERROR_RATE
-{"error", NULL, OFFSET(error_rate), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
-#endif
 {"threads", "set the number of threads", OFFSET(thread_count), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, INT_MAX, V|A|E|D, "threads"},
 {"auto", "autodetect a suitable number of threads to use", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, V|E|D, "threads"},
-#if FF_API_MPV_OPT
-{"me_threshold", "motion estimation threshold", OFFSET(me_threshold), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
-{"mb_threshold", "macroblock threshold", OFFSET(mb_threshold), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
-#endif
 {"dc", "intra_dc_precision", OFFSET(intra_dc_precision), AV_OPT_TYPE_INT, {.i64 = 0 }, -8, 16, V|E},
 {"nssew", "nsse weight", OFFSET(nsse_weight), AV_OPT_TYPE_INT, {.i64 = 8 }, INT_MIN, INT_MAX, V|E},
 {"skip_top", "number of macroblock rows at the top which are skipped", OFFSET(skip_top), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|D},
@@ -398,6 +300,7 @@
 {"mpeg4_main", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_MPEG4_MAIN }, INT_MIN, INT_MAX, V|E, "profile"},
 {"mpeg4_asp",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_MPEG4_ADVANCED_SIMPLE }, INT_MIN, INT_MAX, V|E, "profile"},
 {"main10",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_HEVC_MAIN_10 }, INT_MIN, INT_MAX, V|E, "profile"},
+{"msbc",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_SBC_MSBC }, INT_MIN, INT_MAX, A|E, "profile"},
 {"level", NULL, OFFSET(level), AV_OPT_TYPE_INT, {.i64 = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "level"},
 {"unknown", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "level"},
 {"lowres", "decode at 1= 1/2, 2=1/4, 3=1/8 resolutions", OFFSET(lowres), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, V|A|D},
@@ -407,9 +310,6 @@
 {"skip_exp", "frame skip exponent", OFFSET(frame_skip_exp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"skipcmp", "frame skip compare function", OFFSET(frame_skip_cmp), AV_OPT_TYPE_INT, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 #endif
-#if FF_API_MPV_OPT
-{"border_mask", "deprecated, use encoder private options instead", OFFSET(border_masking), AV_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, -FLT_MAX, FLT_MAX, V|E},
-#endif
 {"mblmin", "minimum macroblock Lagrange factor (VBR)", OFFSET(mb_lmin), AV_OPT_TYPE_INT, {.i64 = FF_QP2LAMBDA * 2 }, 1, FF_LAMBDA_MAX, V|E},
 {"mblmax", "maximum macroblock Lagrange factor (VBR)", OFFSET(mb_lmax), AV_OPT_TYPE_INT, {.i64 = FF_QP2LAMBDA * 31 }, 1, FF_LAMBDA_MAX, V|E},
 #if FF_API_PRIVATE_OPT
@@ -435,9 +335,6 @@
 {"chromaoffset", "chroma QP offset from luma", OFFSET(chromaoffset), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 #endif
 {"trellis", "rate-distortion optimal quantization", OFFSET(trellis), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E},
-#if FF_API_UNUSED_MEMBERS
-{"sc_factor", "multiplied by qscale for each frame and added to scene_change_score", OFFSET(scenechange_factor), AV_OPT_TYPE_INT, {.i64 = 6 }, 0, INT_MAX, V|E},
-#endif /* FF_API_UNUSED_MEMBERS */
 {"mv0_threshold", NULL, OFFSET(mv0_threshold), AV_OPT_TYPE_INT, {.i64 = 256 }, 0, INT_MAX, V|E},
 #if FF_API_PRIVATE_OPT
 {"b_sensitivity", "adjust sensitivity of b_frame_strategy 1", OFFSET(b_sensitivity), AV_OPT_TYPE_INT, {.i64 = 40 }, 1, INT_MAX, V|E},
@@ -550,6 +447,7 @@
 {"do_nothing",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_DO_NOTHING},  INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
 {"auto",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC},   INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
 {"pre_decoder", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_PRE_DECODER}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
+{"ignore",      NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_IGNORE},      INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
 #if FF_API_ASS_TIMING
 {"sub_text_format", "set decoded text subtitle format", OFFSET(sub_text_format), AV_OPT_TYPE_INT, {.i64 = FF_SUB_TEXT_FMT_ASS_WITH_TIMINGS}, 0, 1, S|D, "sub_text_format"},
 #else
@@ -580,6 +478,7 @@
 {"ignore_level", "ignore level even if the codec level used is unknown or higher than the maximum supported level reported by the hardware driver", 0, AV_OPT_TYPE_CONST, { .i64 = AV_HWACCEL_FLAG_IGNORE_LEVEL }, INT_MIN, INT_MAX, V | D, "hwaccel_flags" },
 {"allow_high_depth", "allow to output YUV pixel formats with a different chroma sampling than 4:2:0 and/or other than 8 bits per component", 0, AV_OPT_TYPE_CONST, {.i64 = AV_HWACCEL_FLAG_ALLOW_HIGH_DEPTH }, INT_MIN, INT_MAX, V | D, "hwaccel_flags"},
 {"allow_profile_mismatch", "attempt to decode anyway if HW accelerated decoder's supported profiles do not exactly match the stream", 0, AV_OPT_TYPE_CONST, {.i64 = AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH }, INT_MIN, INT_MAX, V | D, "hwaccel_flags"},
+{"extra_hw_frames", "Number of extra hardware frames to allocate for the user", OFFSET(extra_hw_frames), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, V|D },
 {NULL},
 };
 

diff --git a/libavcodec/opus.c b/libavcodec/opus.c
index 5847e88..aa827b6 100644
--- a/libavcodec/opus.c
+++ b/libavcodec/opus.c

@@ -29,7 +29,8 @@
 #include "libavutil/error.h"
 #include "libavutil/ffmath.h"
 
-#include "opus.h"
+#include "opus_celt.h"
+#include "opustab.h"
 #include "vorbis.h"
 
 static const uint16_t opus_frame_duration[32] = {
@@ -438,3 +439,459 @@
 
     return 0;
 }
+
+void ff_celt_quant_bands(CeltFrame *f, OpusRangeCoder *rc)
+{
+    float lowband_scratch[8 * 22];
+    float norm1[2 * 8 * 100];
+    float *norm2 = norm1 + 8 * 100;
+
+    int totalbits = (f->framebits << 3) - f->anticollapse_needed;
+
+    int update_lowband = 1;
+    int lowband_offset = 0;
+
+    int i, j;
+
+    for (i = f->start_band; i < f->end_band; i++) {
+        uint32_t cm[2] = { (1 << f->blocks) - 1, (1 << f->blocks) - 1 };
+        int band_offset = ff_celt_freq_bands[i] << f->size;
+        int band_size   = ff_celt_freq_range[i] << f->size;
+        float *X = f->block[0].coeffs + band_offset;
+        float *Y = (f->channels == 2) ? f->block[1].coeffs + band_offset : NULL;
+        float *norm_loc1, *norm_loc2;
+
+        int consumed = opus_rc_tell_frac(rc);
+        int effective_lowband = -1;
+        int b = 0;
+
+        /* Compute how many bits we want to allocate to this band */
+        if (i != f->start_band)
+            f->remaining -= consumed;
+        f->remaining2 = totalbits - consumed - 1;
+        if (i <= f->coded_bands - 1) {
+            int curr_balance = f->remaining / FFMIN(3, f->coded_bands-i);
+            b = av_clip_uintp2(FFMIN(f->remaining2 + 1, f->pulses[i] + curr_balance), 14);
+        }
+
+        if ((ff_celt_freq_bands[i] - ff_celt_freq_range[i] >= ff_celt_freq_bands[f->start_band] ||
+            i == f->start_band + 1) && (update_lowband || lowband_offset == 0))
+            lowband_offset = i;
+
+        if (i == f->start_band + 1) {
+            /* Special Hybrid Folding (RFC 8251 section 9). Copy the first band into
+            the second to ensure the second band never has to use the LCG. */
+            int count = (ff_celt_freq_range[i] - ff_celt_freq_range[i-1]) << f->size;
+
+            memcpy(&norm1[band_offset], &norm1[band_offset - count], count * sizeof(float));
+
+            if (f->channels == 2)
+                memcpy(&norm2[band_offset], &norm2[band_offset - count], count * sizeof(float));
+        }
+
+        /* Get a conservative estimate of the collapse_mask's for the bands we're
+           going to be folding from. */
+        if (lowband_offset != 0 && (f->spread != CELT_SPREAD_AGGRESSIVE ||
+                                    f->blocks > 1 || f->tf_change[i] < 0)) {
+            int foldstart, foldend;
+
+            /* This ensures we never repeat spectral content within one band */
+            effective_lowband = FFMAX(ff_celt_freq_bands[f->start_band],
+                                      ff_celt_freq_bands[lowband_offset] - ff_celt_freq_range[i]);
+            foldstart = lowband_offset;
+            while (ff_celt_freq_bands[--foldstart] > effective_lowband);
+            foldend = lowband_offset - 1;
+            while (++foldend < i && ff_celt_freq_bands[foldend] < effective_lowband + ff_celt_freq_range[i]);
+
+            cm[0] = cm[1] = 0;
+            for (j = foldstart; j < foldend; j++) {
+                cm[0] |= f->block[0].collapse_masks[j];
+                cm[1] |= f->block[f->channels - 1].collapse_masks[j];
+            }
+        }
+
+        if (f->dual_stereo && i == f->intensity_stereo) {
+            /* Switch off dual stereo to do intensity */
+            f->dual_stereo = 0;
+            for (j = ff_celt_freq_bands[f->start_band] << f->size; j < band_offset; j++)
+                norm1[j] = (norm1[j] + norm2[j]) / 2;
+        }
+
+        norm_loc1 = effective_lowband != -1 ? norm1 + (effective_lowband << f->size) : NULL;
+        norm_loc2 = effective_lowband != -1 ? norm2 + (effective_lowband << f->size) : NULL;
+
+        if (f->dual_stereo) {
+            cm[0] = f->pvq->quant_band(f->pvq, f, rc, i, X, NULL, band_size, b >> 1,
+                                       f->blocks, norm_loc1, f->size,
+                                       norm1 + band_offset, 0, 1.0f,
+                                       lowband_scratch, cm[0]);
+
+            cm[1] = f->pvq->quant_band(f->pvq, f, rc, i, Y, NULL, band_size, b >> 1,
+                                       f->blocks, norm_loc2, f->size,
+                                       norm2 + band_offset, 0, 1.0f,
+                                       lowband_scratch, cm[1]);
+        } else {
+            cm[0] = f->pvq->quant_band(f->pvq, f, rc, i, X,    Y, band_size, b >> 0,
+                                       f->blocks, norm_loc1, f->size,
+                                       norm1 + band_offset, 0, 1.0f,
+                                       lowband_scratch, cm[0] | cm[1]);
+            cm[1] = cm[0];
+        }
+
+        f->block[0].collapse_masks[i]               = (uint8_t)cm[0];
+        f->block[f->channels - 1].collapse_masks[i] = (uint8_t)cm[1];
+        f->remaining += f->pulses[i] + consumed;
+
+        /* Update the folding position only as long as we have 1 bit/sample depth */
+        update_lowband = (b > band_size << 3);
+    }
+}
+
+#define NORMC(bits) ((bits) << (f->channels - 1) << f->size >> 2)
+
+void ff_celt_bitalloc(CeltFrame *f, OpusRangeCoder *rc, int encode)
+{
+    int i, j, low, high, total, done, bandbits, remaining, tbits_8ths;
+    int skip_startband      = f->start_band;
+    int skip_bit            = 0;
+    int intensitystereo_bit = 0;
+    int dualstereo_bit      = 0;
+    int dynalloc            = 6;
+    int extrabits           = 0;
+
+    int boost[CELT_MAX_BANDS] = { 0 };
+    int trim_offset[CELT_MAX_BANDS];
+    int threshold[CELT_MAX_BANDS];
+    int bits1[CELT_MAX_BANDS];
+    int bits2[CELT_MAX_BANDS];
+
+    /* Spread */
+    if (opus_rc_tell(rc) + 4 <= f->framebits) {
+        if (encode)
+            ff_opus_rc_enc_cdf(rc, f->spread, ff_celt_model_spread);
+        else
+            f->spread = ff_opus_rc_dec_cdf(rc, ff_celt_model_spread);
+    } else {
+        f->spread = CELT_SPREAD_NORMAL;
+    }
+
+    /* Initialize static allocation caps */
+    for (i = 0; i < CELT_MAX_BANDS; i++)
+        f->caps[i] = NORMC((ff_celt_static_caps[f->size][f->channels - 1][i] + 64) * ff_celt_freq_range[i]);
+
+    /* Band boosts */
+    tbits_8ths = f->framebits << 3;
+    for (i = f->start_band; i < f->end_band; i++) {
+        int quanta = ff_celt_freq_range[i] << (f->channels - 1) << f->size;
+        int b_dynalloc = dynalloc;
+        int boost_amount = f->alloc_boost[i];
+        quanta = FFMIN(quanta << 3, FFMAX(6 << 3, quanta));
+
+        while (opus_rc_tell_frac(rc) + (b_dynalloc << 3) < tbits_8ths && boost[i] < f->caps[i]) {
+            int is_boost;
+            if (encode) {
+                is_boost = boost_amount--;
+                ff_opus_rc_enc_log(rc, is_boost, b_dynalloc);
+            } else {
+                is_boost = ff_opus_rc_dec_log(rc, b_dynalloc);
+            }
+
+            if (!is_boost)
+                break;
+
+            boost[i]   += quanta;
+            tbits_8ths -= quanta;
+
+            b_dynalloc = 1;
+        }
+
+        if (boost[i])
+            dynalloc = FFMAX(dynalloc - 1, 2);
+    }
+
+    /* Allocation trim */
+    if (opus_rc_tell_frac(rc) + (6 << 3) <= tbits_8ths)
+        if (encode)
+            ff_opus_rc_enc_cdf(rc, f->alloc_trim, ff_celt_model_alloc_trim);
+        else
+            f->alloc_trim = ff_opus_rc_dec_cdf(rc, ff_celt_model_alloc_trim);
+
+    /* Anti-collapse bit reservation */
+    tbits_8ths = (f->framebits << 3) - opus_rc_tell_frac(rc) - 1;
+    f->anticollapse_needed = 0;
+    if (f->transient && f->size >= 2 && tbits_8ths >= ((f->size + 2) << 3))
+        f->anticollapse_needed = 1 << 3;
+    tbits_8ths -= f->anticollapse_needed;
+
+    /* Band skip bit reservation */
+    if (tbits_8ths >= 1 << 3)
+        skip_bit = 1 << 3;
+    tbits_8ths -= skip_bit;
+
+    /* Intensity/dual stereo bit reservation */
+    if (f->channels == 2) {
+        intensitystereo_bit = ff_celt_log2_frac[f->end_band - f->start_band];
+        if (intensitystereo_bit <= tbits_8ths) {
+            tbits_8ths -= intensitystereo_bit;
+            if (tbits_8ths >= 1 << 3) {
+                dualstereo_bit = 1 << 3;
+                tbits_8ths -= 1 << 3;
+            }
+        } else {
+            intensitystereo_bit = 0;
+        }
+    }
+
+    /* Trim offsets */
+    for (i = f->start_band; i < f->end_band; i++) {
+        int trim     = f->alloc_trim - 5 - f->size;
+        int band     = ff_celt_freq_range[i] * (f->end_band - i - 1);
+        int duration = f->size + 3;
+        int scale    = duration + f->channels - 1;
+
+        /* PVQ minimum allocation threshold, below this value the band is
+         * skipped */
+        threshold[i] = FFMAX(3 * ff_celt_freq_range[i] << duration >> 4,
+                             f->channels << 3);
+
+        trim_offset[i] = trim * (band << scale) >> 6;
+
+        if (ff_celt_freq_range[i] << f->size == 1)
+            trim_offset[i] -= f->channels << 3;
+    }
+
+    /* Bisection */
+    low  = 1;
+    high = CELT_VECTORS - 1;
+    while (low <= high) {
+        int center = (low + high) >> 1;
+        done = total = 0;
+
+        for (i = f->end_band - 1; i >= f->start_band; i--) {
+            bandbits = NORMC(ff_celt_freq_range[i] * ff_celt_static_alloc[center][i]);
+
+            if (bandbits)
+                bandbits = FFMAX(bandbits + trim_offset[i], 0);
+            bandbits += boost[i];
+
+            if (bandbits >= threshold[i] || done) {
+                done = 1;
+                total += FFMIN(bandbits, f->caps[i]);
+            } else if (bandbits >= f->channels << 3) {
+                total += f->channels << 3;
+            }
+        }
+
+        if (total > tbits_8ths)
+            high = center - 1;
+        else
+            low = center + 1;
+    }
+    high = low--;
+
+    /* Bisection */
+    for (i = f->start_band; i < f->end_band; i++) {
+        bits1[i] = NORMC(ff_celt_freq_range[i] * ff_celt_static_alloc[low][i]);
+        bits2[i] = high >= CELT_VECTORS ? f->caps[i] :
+                   NORMC(ff_celt_freq_range[i] * ff_celt_static_alloc[high][i]);
+
+        if (bits1[i])
+            bits1[i] = FFMAX(bits1[i] + trim_offset[i], 0);
+        if (bits2[i])
+            bits2[i] = FFMAX(bits2[i] + trim_offset[i], 0);
+
+        if (low)
+            bits1[i] += boost[i];
+        bits2[i] += boost[i];
+
+        if (boost[i])
+            skip_startband = i;
+        bits2[i] = FFMAX(bits2[i] - bits1[i], 0);
+    }
+
+    /* Bisection */
+    low  = 0;
+    high = 1 << CELT_ALLOC_STEPS;
+    for (i = 0; i < CELT_ALLOC_STEPS; i++) {
+        int center = (low + high) >> 1;
+        done = total = 0;
+
+        for (j = f->end_band - 1; j >= f->start_band; j--) {
+            bandbits = bits1[j] + (center * bits2[j] >> CELT_ALLOC_STEPS);
+
+            if (bandbits >= threshold[j] || done) {
+                done = 1;
+                total += FFMIN(bandbits, f->caps[j]);
+            } else if (bandbits >= f->channels << 3)
+                total += f->channels << 3;
+        }
+        if (total > tbits_8ths)
+            high = center;
+        else
+            low = center;
+    }
+
+    /* Bisection */
+    done = total = 0;
+    for (i = f->end_band - 1; i >= f->start_band; i--) {
+        bandbits = bits1[i] + (low * bits2[i] >> CELT_ALLOC_STEPS);
+
+        if (bandbits >= threshold[i] || done)
+            done = 1;
+        else
+            bandbits = (bandbits >= f->channels << 3) ?
+            f->channels << 3 : 0;
+
+        bandbits     = FFMIN(bandbits, f->caps[i]);
+        f->pulses[i] = bandbits;
+        total      += bandbits;
+    }
+
+    /* Band skipping */
+    for (f->coded_bands = f->end_band; ; f->coded_bands--) {
+        int allocation;
+        j = f->coded_bands - 1;
+
+        if (j == skip_startband) {
+            /* all remaining bands are not skipped */
+            tbits_8ths += skip_bit;
+            break;
+        }
+
+        /* determine the number of bits available for coding "do not skip" markers */
+        remaining   = tbits_8ths - total;
+        bandbits    = remaining / (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]);
+        remaining  -= bandbits  * (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]);
+        allocation  = f->pulses[j] + bandbits * ff_celt_freq_range[j];
+        allocation += FFMAX(remaining - (ff_celt_freq_bands[j] - ff_celt_freq_bands[f->start_band]), 0);
+
+        /* a "do not skip" marker is only coded if the allocation is
+         * above the chosen threshold */
+        if (allocation >= FFMAX(threshold[j], (f->channels + 1) << 3)) {
+            int do_not_skip;
+            if (encode) {
+                do_not_skip = f->coded_bands <= f->skip_band_floor;
+                ff_opus_rc_enc_log(rc, do_not_skip, 1);
+            } else {
+                do_not_skip = ff_opus_rc_dec_log(rc, 1);
+            }
+
+            if (do_not_skip)
+                break;
+
+            total      += 1 << 3;
+            allocation -= 1 << 3;
+        }
+
+        /* the band is skipped, so reclaim its bits */
+        total -= f->pulses[j];
+        if (intensitystereo_bit) {
+            total -= intensitystereo_bit;
+            intensitystereo_bit = ff_celt_log2_frac[j - f->start_band];
+            total += intensitystereo_bit;
+        }
+
+        total += f->pulses[j] = (allocation >= f->channels << 3) ? f->channels << 3 : 0;
+    }
+
+    /* IS start band */
+    if (encode) {
+        if (intensitystereo_bit) {
+            f->intensity_stereo = FFMIN(f->intensity_stereo, f->coded_bands);
+            ff_opus_rc_enc_uint(rc, f->intensity_stereo, f->coded_bands + 1 - f->start_band);
+        }
+    } else {
+        f->intensity_stereo = f->dual_stereo = 0;
+        if (intensitystereo_bit)
+            f->intensity_stereo = f->start_band + ff_opus_rc_dec_uint(rc, f->coded_bands + 1 - f->start_band);
+    }
+
+    /* DS flag */
+    if (f->intensity_stereo <= f->start_band)
+        tbits_8ths += dualstereo_bit; /* no intensity stereo means no dual stereo */
+    else if (dualstereo_bit)
+        if (encode)
+            ff_opus_rc_enc_log(rc, f->dual_stereo, 1);
+        else
+            f->dual_stereo = ff_opus_rc_dec_log(rc, 1);
+
+    /* Supply the remaining bits in this frame to lower bands */
+    remaining = tbits_8ths - total;
+    bandbits  = remaining / (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]);
+    remaining -= bandbits * (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]);
+    for (i = f->start_band; i < f->coded_bands; i++) {
+        const int bits = FFMIN(remaining, ff_celt_freq_range[i]);
+        f->pulses[i] += bits + bandbits * ff_celt_freq_range[i];
+        remaining    -= bits;
+    }
+
+    /* Finally determine the allocation */
+    for (i = f->start_band; i < f->coded_bands; i++) {
+        int N = ff_celt_freq_range[i] << f->size;
+        int prev_extra = extrabits;
+        f->pulses[i] += extrabits;
+
+        if (N > 1) {
+            int dof;        /* degrees of freedom */
+            int temp;       /* dof * channels * log(dof) */
+            int fine_bits;
+            int max_bits;
+            int offset;     /* fine energy quantization offset, i.e.
+                             * extra bits assigned over the standard
+                             * totalbits/dof */
+
+            extrabits = FFMAX(f->pulses[i] - f->caps[i], 0);
+            f->pulses[i] -= extrabits;
+
+            /* intensity stereo makes use of an extra degree of freedom */
+            dof = N * f->channels + (f->channels == 2 && N > 2 && !f->dual_stereo && i < f->intensity_stereo);
+            temp = dof * (ff_celt_log_freq_range[i] + (f->size << 3));
+            offset = (temp >> 1) - dof * CELT_FINE_OFFSET;
+            if (N == 2) /* dof=2 is the only case that doesn't fit the model */
+                offset += dof << 1;
+
+            /* grant an additional bias for the first and second pulses */
+            if (f->pulses[i] + offset < 2 * (dof << 3))
+                offset += temp >> 2;
+            else if (f->pulses[i] + offset < 3 * (dof << 3))
+                offset += temp >> 3;
+
+            fine_bits = (f->pulses[i] + offset + (dof << 2)) / (dof << 3);
+            max_bits  = FFMIN((f->pulses[i] >> 3) >> (f->channels - 1), CELT_MAX_FINE_BITS);
+            max_bits  = FFMAX(max_bits, 0);
+            f->fine_bits[i] = av_clip(fine_bits, 0, max_bits);
+
+            /* If fine_bits was rounded down or capped,
+             * give priority for the final fine energy pass */
+            f->fine_priority[i] = (f->fine_bits[i] * (dof << 3) >= f->pulses[i] + offset);
+
+            /* the remaining bits are assigned to PVQ */
+            f->pulses[i] -= f->fine_bits[i] << (f->channels - 1) << 3;
+        } else {
+            /* all bits go to fine energy except for the sign bit */
+            extrabits = FFMAX(f->pulses[i] - (f->channels << 3), 0);
+            f->pulses[i] -= extrabits;
+            f->fine_bits[i] = 0;
+            f->fine_priority[i] = 1;
+        }
+
+        /* hand back a limited number of extra fine energy bits to this band */
+        if (extrabits > 0) {
+            int fineextra = FFMIN(extrabits >> (f->channels + 2),
+                                  CELT_MAX_FINE_BITS - f->fine_bits[i]);
+            f->fine_bits[i] += fineextra;
+
+            fineextra <<= f->channels + 2;
+            f->fine_priority[i] = (fineextra >= extrabits - prev_extra);
+            extrabits -= fineextra;
+        }
+    }
+    f->remaining = extrabits;
+
+    /* skipped bands dedicate all of their bits for fine energy */
+    for (; i < f->end_band; i++) {
+        f->fine_bits[i]     = f->pulses[i] >> (f->channels - 1) >> 3;
+        f->pulses[i]        = 0;
+        f->fine_priority[i] = f->fine_bits[i] < 1;
+    }
+}

diff --git a/libavcodec/opus.h b/libavcodec/opus.h
index c3cbaec..edbaab5 100644
--- a/libavcodec/opus.h
+++ b/libavcodec/opus.h

@@ -150,7 +150,9 @@
 } ChannelMap;
 
 typedef struct OpusContext {
+    AVClass *av_class;
     OpusStreamContext *streams;
+    int apply_phase_inv;
 
     /* current output buffers for each streams */
     float **out;
@@ -189,4 +191,10 @@
                               enum OpusBandwidth bandwidth, int coded_channels,
                               int duration_ms);
 
+/* Encode or decode CELT bands */
+void ff_celt_quant_bands(CeltFrame *f, OpusRangeCoder *rc);
+
+/* Encode or decode CELT bitallocation */
+void ff_celt_bitalloc(CeltFrame *f, OpusRangeCoder *rc, int encode);
+
 #endif /* AVCODEC_OPUS_H */

diff --git a/libavcodec/opus_celt.c b/libavcodec/opus_celt.c
index 84d4847..115dd8c 100644
--- a/libavcodec/opus_celt.c
+++ b/libavcodec/opus_celt.c

@@ -143,345 +143,14 @@
     }
 }
 
-static void celt_decode_allocation(CeltFrame *f, OpusRangeCoder *rc)
-{
-    // approx. maximum bit allocation for each band before boost/trim
-    int cap[CELT_MAX_BANDS];
-    int boost[CELT_MAX_BANDS];
-    int threshold[CELT_MAX_BANDS];
-    int bits1[CELT_MAX_BANDS];
-    int bits2[CELT_MAX_BANDS];
-    int trim_offset[CELT_MAX_BANDS];
-
-    int skip_start_band = f->start_band;
-    int dynalloc       = 6;
-    int alloctrim      = 5;
-    int extrabits      = 0;
-
-    int skip_bit             = 0;
-    int intensity_stereo_bit = 0;
-    int dual_stereo_bit      = 0;
-
-    int remaining, bandbits;
-    int low, high, total, done;
-    int totalbits;
-    int consumed;
-    int i, j;
-
-    consumed = opus_rc_tell(rc);
-
-    /* obtain spread flag */
-    f->spread = CELT_SPREAD_NORMAL;
-    if (consumed + 4 <= f->framebits)
-        f->spread = ff_opus_rc_dec_cdf(rc, ff_celt_model_spread);
-
-    /* generate static allocation caps */
-    for (i = 0; i < CELT_MAX_BANDS; i++) {
-        cap[i] = (ff_celt_static_caps[f->size][f->channels - 1][i] + 64)
-                 * ff_celt_freq_range[i] << (f->channels - 1) << f->size >> 2;
-    }
-
-    /* obtain band boost */
-    totalbits = f->framebits << 3; // convert to 1/8 bits
-    consumed = opus_rc_tell_frac(rc);
-    for (i = f->start_band; i < f->end_band; i++) {
-        int quanta, band_dynalloc;
-
-        boost[i] = 0;
-
-        quanta = ff_celt_freq_range[i] << (f->channels - 1) << f->size;
-        quanta = FFMIN(quanta << 3, FFMAX(6 << 3, quanta));
-        band_dynalloc = dynalloc;
-        while (consumed + (band_dynalloc<<3) < totalbits && boost[i] < cap[i]) {
-            int add = ff_opus_rc_dec_log(rc, band_dynalloc);
-            consumed = opus_rc_tell_frac(rc);
-            if (!add)
-                break;
-
-            boost[i]     += quanta;
-            totalbits    -= quanta;
-            band_dynalloc = 1;
-        }
-        /* dynalloc is more likely to occur if it's already been used for earlier bands */
-        if (boost[i])
-            dynalloc = FFMAX(2, dynalloc - 1);
-    }
-
-    /* obtain allocation trim */
-    if (consumed + (6 << 3) <= totalbits)
-        alloctrim = ff_opus_rc_dec_cdf(rc, ff_celt_model_alloc_trim);
-
-    /* anti-collapse bit reservation */
-    totalbits = (f->framebits << 3) - opus_rc_tell_frac(rc) - 1;
-    f->anticollapse_needed = 0;
-    if (f->blocks > 1 && f->size >= 2 &&
-        totalbits >= ((f->size + 2) << 3))
-        f->anticollapse_needed = 1 << 3;
-    totalbits -= f->anticollapse_needed;
-
-    /* band skip bit reservation */
-    if (totalbits >= 1 << 3)
-        skip_bit = 1 << 3;
-    totalbits -= skip_bit;
-
-    /* intensity/dual stereo bit reservation */
-    if (f->channels == 2) {
-        intensity_stereo_bit = ff_celt_log2_frac[f->end_band - f->start_band];
-        if (intensity_stereo_bit <= totalbits) {
-            totalbits -= intensity_stereo_bit;
-            if (totalbits >= 1 << 3) {
-                dual_stereo_bit = 1 << 3;
-                totalbits -= 1 << 3;
-            }
-        } else
-            intensity_stereo_bit = 0;
-    }
-
-    for (i = f->start_band; i < f->end_band; i++) {
-        int trim     = alloctrim - 5 - f->size;
-        int band     = ff_celt_freq_range[i] * (f->end_band - i - 1);
-        int duration = f->size + 3;
-        int scale    = duration + f->channels - 1;
-
-        /* PVQ minimum allocation threshold, below this value the band is
-         * skipped */
-        threshold[i] = FFMAX(3 * ff_celt_freq_range[i] << duration >> 4,
-                             f->channels << 3);
-
-        trim_offset[i] = trim * (band << scale) >> 6;
-
-        if (ff_celt_freq_range[i] << f->size == 1)
-            trim_offset[i] -= f->channels << 3;
-    }
-
-    /* bisection */
-    low  = 1;
-    high = CELT_VECTORS - 1;
-    while (low <= high) {
-        int center = (low + high) >> 1;
-        done = total = 0;
-
-        for (i = f->end_band - 1; i >= f->start_band; i--) {
-            bandbits = ff_celt_freq_range[i] * ff_celt_static_alloc[center][i]
-                       << (f->channels - 1) << f->size >> 2;
-
-            if (bandbits)
-                bandbits = FFMAX(0, bandbits + trim_offset[i]);
-            bandbits += boost[i];
-
-            if (bandbits >= threshold[i] || done) {
-                done = 1;
-                total += FFMIN(bandbits, cap[i]);
-            } else if (bandbits >= f->channels << 3)
-                total += f->channels << 3;
-        }
-
-        if (total > totalbits)
-            high = center - 1;
-        else
-            low = center + 1;
-    }
-    high = low--;
-
-    for (i = f->start_band; i < f->end_band; i++) {
-        bits1[i] = ff_celt_freq_range[i] * ff_celt_static_alloc[low][i]
-                   << (f->channels - 1) << f->size >> 2;
-        bits2[i] = high >= CELT_VECTORS ? cap[i] :
-                   ff_celt_freq_range[i] * ff_celt_static_alloc[high][i]
-                   << (f->channels - 1) << f->size >> 2;
-
-        if (bits1[i])
-            bits1[i] = FFMAX(0, bits1[i] + trim_offset[i]);
-        if (bits2[i])
-            bits2[i] = FFMAX(0, bits2[i] + trim_offset[i]);
-        if (low)
-            bits1[i] += boost[i];
-        bits2[i] += boost[i];
-
-        if (boost[i])
-            skip_start_band = i;
-        bits2[i] = FFMAX(0, bits2[i] - bits1[i]);
-    }
-
-    /* bisection */
-    low  = 0;
-    high = 1 << CELT_ALLOC_STEPS;
-    for (i = 0; i < CELT_ALLOC_STEPS; i++) {
-        int center = (low + high) >> 1;
-        done = total = 0;
-
-        for (j = f->end_band - 1; j >= f->start_band; j--) {
-            bandbits = bits1[j] + (center * bits2[j] >> CELT_ALLOC_STEPS);
-
-            if (bandbits >= threshold[j] || done) {
-                done = 1;
-                total += FFMIN(bandbits, cap[j]);
-            } else if (bandbits >= f->channels << 3)
-                total += f->channels << 3;
-        }
-        if (total > totalbits)
-            high = center;
-        else
-            low = center;
-    }
-
-    done = total = 0;
-    for (i = f->end_band - 1; i >= f->start_band; i--) {
-        bandbits = bits1[i] + (low * bits2[i] >> CELT_ALLOC_STEPS);
-
-        if (bandbits >= threshold[i] || done)
-            done = 1;
-        else
-            bandbits = (bandbits >= f->channels << 3) ?
-                       f->channels << 3 : 0;
-
-        bandbits     = FFMIN(bandbits, cap[i]);
-        f->pulses[i] = bandbits;
-        total      += bandbits;
-    }
-
-    /* band skipping */
-    for (f->coded_bands = f->end_band; ; f->coded_bands--) {
-        int allocation;
-        j = f->coded_bands - 1;
-
-        if (j == skip_start_band) {
-            /* all remaining bands are not skipped */
-            totalbits += skip_bit;
-            break;
-        }
-
-        /* determine the number of bits available for coding "do not skip" markers */
-        remaining   = totalbits - total;
-        bandbits    = remaining / (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]);
-        remaining  -= bandbits  * (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]);
-        allocation  = f->pulses[j] + bandbits * ff_celt_freq_range[j]
-                      + FFMAX(0, remaining - (ff_celt_freq_bands[j] - ff_celt_freq_bands[f->start_band]));
-
-        /* a "do not skip" marker is only coded if the allocation is
-           above the chosen threshold */
-        if (allocation >= FFMAX(threshold[j], (f->channels + 1) <<3 )) {
-            if (ff_opus_rc_dec_log(rc, 1))
-                break;
-
-            total      += 1 << 3;
-            allocation -= 1 << 3;
-        }
-
-        /* the band is skipped, so reclaim its bits */
-        total -= f->pulses[j];
-        if (intensity_stereo_bit) {
-            total -= intensity_stereo_bit;
-            intensity_stereo_bit = ff_celt_log2_frac[j - f->start_band];
-            total += intensity_stereo_bit;
-        }
-
-        total += f->pulses[j] = (allocation >= f->channels << 3) ?
-                              f->channels << 3 : 0;
-    }
-
-    /* obtain stereo flags */
-    f->intensity_stereo = 0;
-    f->dual_stereo      = 0;
-    if (intensity_stereo_bit)
-        f->intensity_stereo = f->start_band +
-                          ff_opus_rc_dec_uint(rc, f->coded_bands + 1 - f->start_band);
-    if (f->intensity_stereo <= f->start_band)
-        totalbits += dual_stereo_bit; /* no intensity stereo means no dual stereo */
-    else if (dual_stereo_bit)
-        f->dual_stereo = ff_opus_rc_dec_log(rc, 1);
-
-    /* supply the remaining bits in this frame to lower bands */
-    remaining = totalbits - total;
-    bandbits  = remaining / (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]);
-    remaining -= bandbits * (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]);
-    for (i = f->start_band; i < f->coded_bands; i++) {
-        int bits = FFMIN(remaining, ff_celt_freq_range[i]);
-
-        f->pulses[i] += bits + bandbits * ff_celt_freq_range[i];
-        remaining    -= bits;
-    }
-
-    for (i = f->start_band; i < f->coded_bands; i++) {
-        int N = ff_celt_freq_range[i] << f->size;
-        int prev_extra = extrabits;
-        f->pulses[i] += extrabits;
-
-        if (N > 1) {
-            int dof;        // degrees of freedom
-            int temp;       // dof * channels * log(dof)
-            int offset;     // fine energy quantization offset, i.e.
-                            // extra bits assigned over the standard
-                            // totalbits/dof
-            int fine_bits, max_bits;
-
-            extrabits = FFMAX(0, f->pulses[i] - cap[i]);
-            f->pulses[i] -= extrabits;
-
-            /* intensity stereo makes use of an extra degree of freedom */
-            dof = N * f->channels
-                  + (f->channels == 2 && N > 2 && !f->dual_stereo && i < f->intensity_stereo);
-            temp = dof * (ff_celt_log_freq_range[i] + (f->size<<3));
-            offset = (temp >> 1) - dof * CELT_FINE_OFFSET;
-            if (N == 2) /* dof=2 is the only case that doesn't fit the model */
-                offset += dof<<1;
-
-            /* grant an additional bias for the first and second pulses */
-            if (f->pulses[i] + offset < 2 * (dof << 3))
-                offset += temp >> 2;
-            else if (f->pulses[i] + offset < 3 * (dof << 3))
-                offset += temp >> 3;
-
-            fine_bits = (f->pulses[i] + offset + (dof << 2)) / (dof << 3);
-            max_bits  = FFMIN((f->pulses[i]>>3) >> (f->channels - 1),
-                              CELT_MAX_FINE_BITS);
-
-            max_bits  = FFMAX(max_bits, 0);
-
-            f->fine_bits[i] = av_clip(fine_bits, 0, max_bits);
-
-            /* if fine_bits was rounded down or capped,
-               give priority for the final fine energy pass */
-            f->fine_priority[i] = (f->fine_bits[i] * (dof<<3) >= f->pulses[i] + offset);
-
-            /* the remaining bits are assigned to PVQ */
-            f->pulses[i] -= f->fine_bits[i] << (f->channels - 1) << 3;
-        } else {
-            /* all bits go to fine energy except for the sign bit */
-            extrabits = FFMAX(0, f->pulses[i] - (f->channels << 3));
-            f->pulses[i] -= extrabits;
-            f->fine_bits[i] = 0;
-            f->fine_priority[i] = 1;
-        }
-
-        /* hand back a limited number of extra fine energy bits to this band */
-        if (extrabits > 0) {
-            int fineextra = FFMIN(extrabits >> (f->channels + 2),
-                                  CELT_MAX_FINE_BITS - f->fine_bits[i]);
-            f->fine_bits[i] += fineextra;
-
-            fineextra <<= f->channels + 2;
-            f->fine_priority[i] = (fineextra >= extrabits - prev_extra);
-            extrabits -= fineextra;
-        }
-    }
-    f->remaining = extrabits;
-
-    /* skipped bands dedicate all of their bits for fine energy */
-    for (; i < f->end_band; i++) {
-        f->fine_bits[i]     = f->pulses[i] >> (f->channels - 1) >> 3;
-        f->pulses[i]        = 0;
-        f->fine_priority[i] = f->fine_bits[i] < 1;
-    }
-}
-
 static void celt_denormalize(CeltFrame *f, CeltBlock *block, float *data)
 {
     int i, j;
 
     for (i = f->start_band; i < f->end_band; i++) {
         float *dst = data + (ff_celt_freq_bands[i] << f->size);
-        float norm = exp2f(block->energy[i] + ff_celt_mean_energy[i]);
+        float log_norm = block->energy[i] + ff_celt_mean_energy[i];
+        float norm = exp2f(FFMIN(log_norm, 32.0f));
 
         for (j = 0; j < ff_celt_freq_range[i] << f->size; j++)
             dst[j] *= norm;
@@ -675,98 +344,6 @@
     }
 }
 
-static void celt_decode_bands(CeltFrame *f, OpusRangeCoder *rc)
-{
-    float lowband_scratch[8 * 22];
-    float norm[2 * 8 * 100];
-
-    int totalbits = (f->framebits << 3) - f->anticollapse_needed;
-
-    int update_lowband = 1;
-    int lowband_offset = 0;
-
-    int i, j;
-
-    memset(f->block[0].coeffs, 0, sizeof(f->block[0].coeffs));
-    memset(f->block[1].coeffs, 0, sizeof(f->block[0].coeffs));
-
-    for (i = f->start_band; i < f->end_band; i++) {
-        uint32_t cm[2] = { (1 << f->blocks) - 1, (1 << f->blocks) - 1 };
-        int band_offset = ff_celt_freq_bands[i] << f->size;
-        int band_size   = ff_celt_freq_range[i] << f->size;
-        float *X = f->block[0].coeffs + band_offset;
-        float *Y = (f->channels == 2) ? f->block[1].coeffs + band_offset : NULL;
-
-        int consumed = opus_rc_tell_frac(rc);
-        float *norm2 = norm + 8 * 100;
-        int effective_lowband = -1;
-        int b = 0;
-
-        /* Compute how many bits we want to allocate to this band */
-        if (i != f->start_band)
-            f->remaining -= consumed;
-        f->remaining2 = totalbits - consumed - 1;
-        if (i <= f->coded_bands - 1) {
-            int curr_balance = f->remaining / FFMIN(3, f->coded_bands-i);
-            b = av_clip_uintp2(FFMIN(f->remaining2 + 1, f->pulses[i] + curr_balance), 14);
-        }
-
-        if (ff_celt_freq_bands[i] - ff_celt_freq_range[i] >= ff_celt_freq_bands[f->start_band] &&
-            (update_lowband || lowband_offset == 0))
-            lowband_offset = i;
-
-        /* Get a conservative estimate of the collapse_mask's for the bands we're
-           going to be folding from. */
-        if (lowband_offset != 0 && (f->spread != CELT_SPREAD_AGGRESSIVE ||
-                                    f->blocks > 1 || f->tf_change[i] < 0)) {
-            int foldstart, foldend;
-
-            /* This ensures we never repeat spectral content within one band */
-            effective_lowband = FFMAX(ff_celt_freq_bands[f->start_band],
-                                      ff_celt_freq_bands[lowband_offset] - ff_celt_freq_range[i]);
-            foldstart = lowband_offset;
-            while (ff_celt_freq_bands[--foldstart] > effective_lowband);
-            foldend = lowband_offset - 1;
-            while (ff_celt_freq_bands[++foldend] < effective_lowband + ff_celt_freq_range[i]);
-
-            cm[0] = cm[1] = 0;
-            for (j = foldstart; j < foldend; j++) {
-                cm[0] |= f->block[0].collapse_masks[j];
-                cm[1] |= f->block[f->channels - 1].collapse_masks[j];
-            }
-        }
-
-        if (f->dual_stereo && i == f->intensity_stereo) {
-            /* Switch off dual stereo to do intensity */
-            f->dual_stereo = 0;
-            for (j = ff_celt_freq_bands[f->start_band] << f->size; j < band_offset; j++)
-                norm[j] = (norm[j] + norm2[j]) / 2;
-        }
-
-        if (f->dual_stereo) {
-            cm[0] = f->pvq->decode_band(f->pvq, f, rc, i, X, NULL, band_size, b / 2, f->blocks,
-                                        effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size,
-                                        norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]);
-
-            cm[1] = f->pvq->decode_band(f->pvq, f, rc, i, Y, NULL, band_size, b/2, f->blocks,
-                                        effective_lowband != -1 ? norm2 + (effective_lowband << f->size) : NULL, f->size,
-                                        norm2 + band_offset, 0, 1.0f, lowband_scratch, cm[1]);
-        } else {
-            cm[0] = f->pvq->decode_band(f->pvq, f, rc, i, X, Y, band_size, b, f->blocks,
-                                        effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size,
-                                        norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]|cm[1]);
-            cm[1] = cm[0];
-        }
-
-        f->block[0].collapse_masks[i]               = (uint8_t)cm[0];
-        f->block[f->channels - 1].collapse_masks[i] = (uint8_t)cm[1];
-        f->remaining += f->pulses[i] + consumed;
-
-        /* Update the folding position only as long as we have 1 bit/sample depth */
-        update_lowband = (b > band_size << 3);
-    }
-}
-
 int ff_celt_decode_frame(CeltFrame *f, OpusRangeCoder *rc,
                          float **output, int channels, int frame_size,
                          int start_band,  int end_band)
@@ -806,8 +383,10 @@
     if (!f->output_channels)
         f->output_channels = channels;
 
-    memset(f->block[0].collapse_masks, 0, sizeof(f->block[0].collapse_masks));
-    memset(f->block[1].collapse_masks, 0, sizeof(f->block[1].collapse_masks));
+    for (i = 0; i < f->channels; i++) {
+        memset(f->block[i].coeffs,         0, sizeof(f->block[i].coeffs));
+        memset(f->block[i].collapse_masks, 0, sizeof(f->block[i].collapse_masks));
+    }
 
     consumed = opus_rc_tell(rc);
 
@@ -842,9 +421,9 @@
 
     celt_decode_coarse_energy(f, rc);
     celt_decode_tf_changes   (f, rc);
-    celt_decode_allocation   (f, rc);
+    ff_celt_bitalloc         (f, rc, 0);
     celt_decode_fine_energy  (f, rc);
-    celt_decode_bands        (f, rc);
+    ff_celt_quant_bands      (f, rc);
 
     if (f->anticollapse_needed)
         f->anticollapse = ff_opus_rc_get_raw(rc, 1);
@@ -984,7 +563,8 @@
     av_freep(f);
 }
 
-int ff_celt_init(AVCodecContext *avctx, CeltFrame **f, int output_channels)
+int ff_celt_init(AVCodecContext *avctx, CeltFrame **f, int output_channels,
+                 int apply_phase_inv)
 {
     CeltFrame *frm;
     int i, ret;
@@ -1001,12 +581,13 @@
 
     frm->avctx           = avctx;
     frm->output_channels = output_channels;
+    frm->apply_phase_inv = apply_phase_inv;
 
     for (i = 0; i < FF_ARRAY_ELEMS(frm->imdct); i++)
         if ((ret = ff_mdct15_init(&frm->imdct[i], 1, i + 3, -1.0f/32768)) < 0)
             goto fail;
 
-    if ((ret = ff_celt_pvq_init(&frm->pvq)) < 0)
+    if ((ret = ff_celt_pvq_init(&frm->pvq, 0)) < 0)
         goto fail;
 
     frm->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);

diff --git a/libavcodec/opus_celt.h b/libavcodec/opus_celt.h
index 45d50ab..9289a18 100644
--- a/libavcodec/opus_celt.h
+++ b/libavcodec/opus_celt.h

@@ -98,6 +98,7 @@
     CeltPVQ             *pvq;
     int channels;
     int output_channels;
+    int apply_phase_inv;
 
     enum CeltBlockSize size;
     int start_band;
@@ -156,7 +157,8 @@
         X[i] *= g;
 }
 
-int ff_celt_init(AVCodecContext *avctx, CeltFrame **f, int output_channels);
+int ff_celt_init(AVCodecContext *avctx, CeltFrame **f, int output_channels,
+                 int apply_phase_inv);
 
 void ff_celt_free(CeltFrame **f);
 

diff --git a/libavcodec/opus_pvq.c b/libavcodec/opus_pvq.c
index f98b85d..0dbf141 100644
--- a/libavcodec/opus_pvq.c
+++ b/libavcodec/opus_pvq.c

@@ -486,8 +486,7 @@
                                                      int duration, float *lowband_out,
                                                      int level, float gain,
                                                      float *lowband_scratch,
-                                                     int fill, int quant,
-                                                     QUANT_FN(*rec))
+                                                     int fill, int quant)
 {
     int i;
     const uint8_t *cache;
@@ -643,6 +642,7 @@
                 }
             } else {
                 inv = (b > 2 << 3 && f->remaining2 > 2 << 3) ? ff_opus_rc_dec_log(rc, 2) : 0;
+                inv = f->apply_phase_inv ? inv : 0;
             }
             itheta = 0;
         }
@@ -699,8 +699,8 @@
             sign = 1 - 2 * sign;
             /* We use orig_fill here because we want to fold the side, but if
             itheta==16384, we'll have cleared the low bits of fill. */
-            cm = rec(pvq, f, rc, band, x2, NULL, N, mbits, blocks, lowband, duration,
-                     lowband_out, level, gain, lowband_scratch, orig_fill);
+            cm = pvq->quant_band(pvq, f, rc, band, x2, NULL, N, mbits, blocks, lowband, duration,
+                                 lowband_out, level, gain, lowband_scratch, orig_fill);
             /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
             and there's no need to worry about mixing with the other channel. */
             y2[0] = -sign * x2[1];
@@ -752,24 +752,25 @@
             if (mbits >= sbits) {
                 /* In stereo mode, we do not apply a scaling to the mid
                  * because we need the normalized mid for folding later */
-                cm = rec(pvq, f, rc, band, X, NULL, N, mbits, blocks, lowband,
-                         duration, next_lowband_out1, next_level,
-                         stereo ? 1.0f : (gain * mid), lowband_scratch, fill);
+                cm = pvq->quant_band(pvq, f, rc, band, X, NULL, N, mbits, blocks,
+                                     lowband, duration, next_lowband_out1, next_level,
+                                     stereo ? 1.0f : (gain * mid), lowband_scratch, fill);
                 rebalance = mbits - (rebalance - f->remaining2);
                 if (rebalance > 3 << 3 && itheta != 0)
                     sbits += rebalance - (3 << 3);
 
                 /* For a stereo split, the high bits of fill are always zero,
                  * so no folding will be done to the side. */
-                cmt = rec(pvq, f, rc, band, Y, NULL, N, sbits, blocks, next_lowband2,
-                          duration, NULL, next_level, gain * side, NULL,
-                          fill >> blocks);
+                cmt = pvq->quant_band(pvq, f, rc, band, Y, NULL, N, sbits, blocks,
+                                      next_lowband2, duration, NULL, next_level,
+                                      gain * side, NULL, fill >> blocks);
                 cm |= cmt << ((B0 >> 1) & (stereo - 1));
             } else {
                 /* For a stereo split, the high bits of fill are always zero,
                  * so no folding will be done to the side. */
-                cm = rec(pvq, f, rc, band, Y, NULL, N, sbits, blocks, next_lowband2,
-                         duration, NULL, next_level, gain * side, NULL, fill >> blocks);
+                cm = pvq->quant_band(pvq, f, rc, band, Y, NULL, N, sbits, blocks,
+                                     next_lowband2, duration, NULL, next_level,
+                                     gain * side, NULL, fill >> blocks);
                 cm <<= ((B0 >> 1) & (stereo - 1));
                 rebalance = sbits - (rebalance - f->remaining2);
                 if (rebalance > 3 << 3 && itheta != 16384)
@@ -777,9 +778,9 @@
 
                 /* In stereo mode, we do not apply a scaling to the mid because
                  * we need the normalized mid for folding later */
-                cm |= rec(pvq, f, rc, band, X, NULL, N, mbits, blocks, lowband, duration,
-                          next_lowband_out1, next_level, stereo ? 1.0f : (gain * mid),
-                          lowband_scratch, fill);
+                cm |= pvq->quant_band(pvq, f, rc, band, X, NULL, N, mbits, blocks,
+                                      lowband, duration, next_lowband_out1, next_level,
+                                      stereo ? 1.0f : (gain * mid), lowband_scratch, fill);
             }
         }
     } else {
@@ -873,80 +874,34 @@
     return cm;
 }
 
-
 static QUANT_FN(pvq_decode_band)
 {
+#if CONFIG_OPUS_DECODER
     return quant_band_template(pvq, f, rc, band, X, Y, N, b, blocks, lowband, duration,
-                               lowband_out, level, gain, lowband_scratch, fill, 0,
-                               pvq->decode_band);
+                               lowband_out, level, gain, lowband_scratch, fill, 0);
+#else
+    return 0;
+#endif
 }
 
 static QUANT_FN(pvq_encode_band)
 {
+#if CONFIG_OPUS_ENCODER
     return quant_band_template(pvq, f, rc, band, X, Y, N, b, blocks, lowband, duration,
-                               lowband_out, level, gain, lowband_scratch, fill, 1,
-                               pvq->encode_band);
+                               lowband_out, level, gain, lowband_scratch, fill, 1);
+#else
+    return 0;
+#endif
 }
 
-static float pvq_band_cost(CeltPVQ *pvq, CeltFrame *f, OpusRangeCoder *rc, int band,
-                           float *bits, float lambda)
-{
-    int i, b = 0;
-    uint32_t cm[2] = { (1 << f->blocks) - 1, (1 << f->blocks) - 1 };
-    const int band_size = ff_celt_freq_range[band] << f->size;
-    float buf[176 * 2], lowband_scratch[176], norm1[176], norm2[176];
-    float dist, cost, err_x = 0.0f, err_y = 0.0f;
-    float *X = buf;
-    float *X_orig = f->block[0].coeffs + (ff_celt_freq_bands[band] << f->size);
-    float *Y = (f->channels == 2) ? &buf[176] : NULL;
-    float *Y_orig = f->block[1].coeffs + (ff_celt_freq_bands[band] << f->size);
-    OPUS_RC_CHECKPOINT_SPAWN(rc);
-
-    memcpy(X, X_orig, band_size*sizeof(float));
-    if (Y)
-        memcpy(Y, Y_orig, band_size*sizeof(float));
-
-    f->remaining2 = ((f->framebits << 3) - f->anticollapse_needed) - opus_rc_tell_frac(rc) - 1;
-    if (band <= f->coded_bands - 1) {
-        int curr_balance = f->remaining / FFMIN(3, f->coded_bands - band);
-        b = av_clip_uintp2(FFMIN(f->remaining2 + 1, f->pulses[band] + curr_balance), 14);
-    }
-
-    if (f->dual_stereo) {
-        pvq->encode_band(pvq, f, rc, band, X, NULL, band_size, b / 2, f->blocks, NULL,
-                         f->size, norm1, 0, 1.0f, lowband_scratch, cm[0]);
-
-        pvq->encode_band(pvq, f, rc, band, Y, NULL, band_size, b / 2, f->blocks, NULL,
-                         f->size, norm2, 0, 1.0f, lowband_scratch, cm[1]);
-    } else {
-        pvq->encode_band(pvq, f, rc, band, X, Y, band_size, b, f->blocks, NULL, f->size,
-                         norm1, 0, 1.0f, lowband_scratch, cm[0] | cm[1]);
-    }
-
-    for (i = 0; i < band_size; i++) {
-        err_x += (X[i] - X_orig[i])*(X[i] - X_orig[i]);
-        err_y += (Y[i] - Y_orig[i])*(Y[i] - Y_orig[i]);
-    }
-
-    dist = sqrtf(err_x) + sqrtf(err_y);
-    cost = OPUS_RC_CHECKPOINT_BITS(rc)/8.0f;
-    *bits += cost;
-
-    OPUS_RC_CHECKPOINT_ROLLBACK(rc);
-
-    return lambda*dist*cost;
-}
-
-int av_cold ff_celt_pvq_init(CeltPVQ **pvq)
+int av_cold ff_celt_pvq_init(CeltPVQ **pvq, int encode)
 {
     CeltPVQ *s = av_malloc(sizeof(CeltPVQ));
     if (!s)
         return AVERROR(ENOMEM);
 
-    s->pvq_search         = ppp_pvq_search_c;
-    s->decode_band        = pvq_decode_band;
-    s->encode_band        = pvq_encode_band;
-    s->band_cost          = pvq_band_cost;
+    s->pvq_search = ppp_pvq_search_c;
+    s->quant_band = encode ? pvq_encode_band : pvq_decode_band;
 
     if (ARCH_X86)
         ff_opus_dsp_init_x86(s);

diff --git a/libavcodec/opus_pvq.h b/libavcodec/opus_pvq.h
index 9246337..e2f01a0 100644
--- a/libavcodec/opus_pvq.h
+++ b/libavcodec/opus_pvq.h

@@ -37,15 +37,12 @@
     DECLARE_ALIGNED(32, float, hadamard_tmp)[256];
 
     float (*pvq_search)(float *X, int *y, int K, int N);
-
-    QUANT_FN(*decode_band);
-    QUANT_FN(*encode_band);
-    float (*band_cost)(struct CeltPVQ *pvq, CeltFrame *f, OpusRangeCoder *rc,
-                       int band, float *bits, float lambda);
+    QUANT_FN(*quant_band);
 };
 
-int  ff_celt_pvq_init  (struct CeltPVQ **pvq);
 void ff_opus_dsp_init_x86(struct CeltPVQ *s);
+
+int  ff_celt_pvq_init(struct CeltPVQ **pvq, int encode);
 void ff_celt_pvq_uninit(struct CeltPVQ **pvq);
 
 #endif /* AVCODEC_OPUS_PVQ_H */

diff --git a/libavcodec/opus_silk.c b/libavcodec/opus_silk.c
index 3c9c849..2fcbf3b 100644
--- a/libavcodec/opus_silk.c
+++ b/libavcodec/opus_silk.c

@@ -185,8 +185,15 @@
         row = lpc32[k & 1];
 
         for (j = 0; j < k; j++) {
-            int x = prevrow[j] - ROUND_MULL(prevrow[k - j - 1], rc, 31);
-            row[j] = ROUND_MULL(x, gain, fbits);
+            int x = av_sat_sub32(prevrow[j], ROUND_MULL(prevrow[k - j - 1], rc, 31));
+            int64_t tmp = ROUND_MULL(x, gain, fbits);
+
+            /* per RFC 8251 section 6, if this calculation overflows, the filter
+               is considered unstable. */
+            if (tmp < INT32_MIN || tmp > INT32_MAX)
+                return 0;
+
+            row[j] = (int32_t)tmp;
         }
     }
 }
@@ -232,8 +239,10 @@
 
     /* reconstruct A(z) */
     for (k = 0; k < order>>1; k++) {
-        lpc32[k]         = -p[k + 1] - p[k] - q[k + 1] + q[k];
-        lpc32[order-k-1] = -p[k + 1] - p[k] + q[k + 1] - q[k];
+        int32_t p_tmp = p[k + 1] + p[k];
+        int32_t q_tmp = q[k + 1] - q[k];
+        lpc32[k]         = -q_tmp - p_tmp;
+        lpc32[order-k-1] =  q_tmp - p_tmp;
     }
 
     /* limit the range of the LPC coefficients to each fit within an int16_t */

diff --git a/libavcodec/opusdec.c b/libavcodec/opusdec.c
index 5a7ba9d..03086de 100644
--- a/libavcodec/opusdec.c
+++ b/libavcodec/opusdec.c

@@ -687,7 +687,7 @@
         if (ret < 0)
             goto fail;
 
-        ret = ff_celt_init(avctx, &s->celt, s->output_channels);
+        ret = ff_celt_init(avctx, &s->celt, s->output_channels, c->apply_phase_inv);
         if (ret < 0)
             goto fail;
 
@@ -712,9 +712,24 @@
     return ret;
 }
 
+#define OFFSET(x) offsetof(OpusContext, x)
+#define AD AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM
+static const AVOption opus_options[] = {
+    { "apply_phase_inv", "Apply intensity stereo phase inversion", OFFSET(apply_phase_inv), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, AD },
+    { NULL },
+};
+
+static const AVClass opus_class = {
+    .class_name = "Opus Decoder",
+    .item_name  = av_default_item_name,
+    .option     = opus_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_opus_decoder = {
     .name            = "opus",
     .long_name       = NULL_IF_CONFIG_SMALL("Opus"),
+    .priv_class      = &opus_class,
     .type            = AVMEDIA_TYPE_AUDIO,
     .id              = AV_CODEC_ID_OPUS,
     .priv_data_size  = sizeof(OpusContext),

diff --git a/libavcodec/opusenc.c b/libavcodec/opusenc.c
index 79d20dc..578785f 100644
--- a/libavcodec/opusenc.c
+++ b/libavcodec/opusenc.c

@@ -72,7 +72,7 @@
 
 static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_needed)
 {
-    int i, tmp = 0x0, extended_toc = 0;
+    int tmp = 0x0, extended_toc = 0;
     static const int toc_cfg[][OPUS_MODE_NB][OPUS_BANDWITH_NB] = {
         /*  Silk                    Hybrid                  Celt                    Layer     */
         /*  NB  MB  WB SWB  FB      NB  MB  WB SWB  FB      NB  MB  WB SWB  FB      Bandwidth */
@@ -102,7 +102,7 @@
     tmp |= (cfg - 1)         << 3;                           /* codec configuration */
     *toc++ = tmp;
     if (extended_toc) {
-        for (i = 0; i < (s->packet.frames - 1); i++)
+        for (int i = 0; i < (s->packet.frames - 1); i++)
             *fsize_needed |= (s->frame[i].framebits != s->frame[i + 1].framebits);
         tmp = (*fsize_needed) << 7;                                /* vbr flag */
         tmp |= (0) << 6;                                       /* padding flag */
@@ -115,14 +115,13 @@
 
 static void celt_frame_setup_input(OpusEncContext *s, CeltFrame *f)
 {
-    int sf, ch;
     AVFrame *cur = NULL;
     const int subframesize = s->avctx->frame_size;
     int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
 
     cur = ff_bufqueue_get(&s->bufqueue);
 
-    for (ch = 0; ch < f->channels; ch++) {
+    for (int ch = 0; ch < f->channels; ch++) {
         CeltBlock *b = &f->block[ch];
         const void *input = cur->extended_data[ch];
         size_t bps = av_get_bytes_per_sample(cur->format);
@@ -131,13 +130,13 @@
 
     av_frame_free(&cur);
 
-    for (sf = 0; sf < subframes; sf++) {
+    for (int sf = 0; sf < subframes; sf++) {
         if (sf != (subframes - 1))
             cur = ff_bufqueue_get(&s->bufqueue);
         else
             cur = ff_bufqueue_peek(&s->bufqueue, 0);
 
-        for (ch = 0; ch < f->channels; ch++) {
+        for (int ch = 0; ch < f->channels; ch++) {
             CeltBlock *b = &f->block[ch];
             const void *input = cur->extended_data[ch];
             const size_t bps  = av_get_bytes_per_sample(cur->format);
@@ -156,15 +155,14 @@
 /* Apply the pre emphasis filter */
 static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
 {
-    int i, sf, ch;
     const int subframesize = s->avctx->frame_size;
     const int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
 
     /* Filter overlap */
-    for (ch = 0; ch < f->channels; ch++) {
+    for (int ch = 0; ch < f->channels; ch++) {
         CeltBlock *b = &f->block[ch];
         float m = b->emph_coeff;
-        for (i = 0; i < CELT_OVERLAP; i++) {
+        for (int i = 0; i < CELT_OVERLAP; i++) {
             float sample = b->overlap[i];
             b->overlap[i] = sample - m;
             m = sample * CELT_EMPH_COEFF;
@@ -173,11 +171,11 @@
     }
 
     /* Filter the samples but do not update the last subframe's coeff - overlap ^^^ */
-    for (sf = 0; sf < subframes; sf++) {
-        for (ch = 0; ch < f->channels; ch++) {
+    for (int sf = 0; sf < subframes; sf++) {
+        for (int ch = 0; ch < f->channels; ch++) {
             CeltBlock *b = &f->block[ch];
             float m = b->emph_coeff;
-            for (i = 0; i < subframesize; i++) {
+            for (int i = 0; i < subframesize; i++) {
                 float sample = b->samples[sf*subframesize + i];
                 b->samples[sf*subframesize + i] = sample - m;
                 m = sample * CELT_EMPH_COEFF;
@@ -191,14 +189,13 @@
 /* Create the window and do the mdct */
 static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
 {
-    int i, j, t, ch;
     float *win = s->scratch, *temp = s->scratch + 1920;
 
     if (f->transient) {
-        for (ch = 0; ch < f->channels; ch++) {
+        for (int ch = 0; ch < f->channels; ch++) {
             CeltBlock *b = &f->block[ch];
             float *src1 = b->overlap;
-            for (t = 0; t < f->blocks; t++) {
+            for (int t = 0; t < f->blocks; t++) {
                 float *src2 = &b->samples[CELT_OVERLAP*t];
                 s->dsp->vector_fmul(win, src1, ff_celt_window, 128);
                 s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2,
@@ -211,7 +208,7 @@
         int blk_len = OPUS_BLOCK_SIZE(f->size), wlen = OPUS_BLOCK_SIZE(f->size + 1);
         int rwin = blk_len - CELT_OVERLAP, lap_dst = (wlen - blk_len - CELT_OVERLAP) >> 1;
         memset(win, 0, wlen*sizeof(float));
-        for (ch = 0; ch < f->channels; ch++) {
+        for (int ch = 0; ch < f->channels; ch++) {
             CeltBlock *b = &f->block[ch];
 
             /* Overlap */
@@ -230,21 +227,21 @@
         }
     }
 
-    for (ch = 0; ch < f->channels; ch++) {
+    for (int ch = 0; ch < f->channels; ch++) {
         CeltBlock *block = &f->block[ch];
-        for (i = 0; i < CELT_MAX_BANDS; i++) {
+        for (int i = 0; i < CELT_MAX_BANDS; i++) {
             float ener = 0.0f;
             int band_offset = ff_celt_freq_bands[i] << f->size;
             int band_size   = ff_celt_freq_range[i] << f->size;
             float *coeffs   = &block->coeffs[band_offset];
 
-            for (j = 0; j < band_size; j++)
+            for (int j = 0; j < band_size; j++)
                 ener += coeffs[j]*coeffs[j];
 
             block->lin_energy[i] = sqrtf(ener) + FLT_EPSILON;
             ener = 1.0f/block->lin_energy[i];
 
-            for (j = 0; j < band_size; j++)
+            for (int j = 0; j < band_size; j++)
                 coeffs[j] *= ener;
 
             block->energy[i] = log2f(block->lin_energy[i]) - ff_celt_mean_energy[i];
@@ -255,14 +252,14 @@
     }
 }
 
-static void celt_enc_tf(OpusRangeCoder *rc, CeltFrame *f)
+static void celt_enc_tf(CeltFrame *f, OpusRangeCoder *rc)
 {
-    int i, tf_select = 0, diff = 0, tf_changed = 0, tf_select_needed;
+    int tf_select = 0, diff = 0, tf_changed = 0, tf_select_needed;
     int bits = f->transient ? 2 : 4;
 
     tf_select_needed = ((f->size && (opus_rc_tell(rc) + bits + 1) <= f->framebits));
 
-    for (i = f->start_band; i < f->end_band; i++) {
+    for (int i = f->start_band; i < f->end_band; i++) {
         if ((opus_rc_tell(rc) + bits + tf_select_needed) <= f->framebits) {
             const int tbit = (diff ^ 1) == f->tf_change[i];
             ff_opus_rc_enc_log(rc, tbit, bits);
@@ -278,341 +275,14 @@
         tf_select = f->tf_select;
     }
 
-    for (i = f->start_band; i < f->end_band; i++)
+    for (int i = f->start_band; i < f->end_band; i++)
         f->tf_change[i] = ff_celt_tf_select[f->size][f->transient][tf_select][f->tf_change[i]];
 }
 
-void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f)
-{
-    int i, j, low, high, total, done, bandbits, remaining, tbits_8ths;
-    int skip_startband      = f->start_band;
-    int skip_bit            = 0;
-    int intensitystereo_bit = 0;
-    int dualstereo_bit      = 0;
-    int dynalloc            = 6;
-    int extrabits           = 0;
-
-    int *cap = f->caps;
-    int boost[CELT_MAX_BANDS];
-    int trim_offset[CELT_MAX_BANDS];
-    int threshold[CELT_MAX_BANDS];
-    int bits1[CELT_MAX_BANDS];
-    int bits2[CELT_MAX_BANDS];
-
-    /* Tell the spread to the decoder */
-    if (opus_rc_tell(rc) + 4 <= f->framebits)
-        ff_opus_rc_enc_cdf(rc, f->spread, ff_celt_model_spread);
-    else
-        f->spread = CELT_SPREAD_NORMAL;
-
-    /* Generate static allocation caps */
-    for (i = 0; i < CELT_MAX_BANDS; i++) {
-        cap[i] = (ff_celt_static_caps[f->size][f->channels - 1][i] + 64)
-                 * ff_celt_freq_range[i] << (f->channels - 1) << f->size >> 2;
-    }
-
-    /* Band boosts */
-    tbits_8ths = f->framebits << 3;
-    for (i = f->start_band; i < f->end_band; i++) {
-        int quanta, b_dynalloc, boost_amount = f->alloc_boost[i];
-
-        boost[i] = 0;
-
-        quanta = ff_celt_freq_range[i] << (f->channels - 1) << f->size;
-        quanta = FFMIN(quanta << 3, FFMAX(6 << 3, quanta));
-        b_dynalloc = dynalloc;
-
-        while (opus_rc_tell_frac(rc) + (b_dynalloc << 3) < tbits_8ths && boost[i] < cap[i]) {
-            int is_boost = boost_amount--;
-
-            ff_opus_rc_enc_log(rc, is_boost, b_dynalloc);
-            if (!is_boost)
-                break;
-
-            boost[i]   += quanta;
-            tbits_8ths -= quanta;
-
-            b_dynalloc = 1;
-        }
-
-        if (boost[i])
-            dynalloc = FFMAX(2, dynalloc - 1);
-    }
-
-    /* Put allocation trim */
-    if (opus_rc_tell_frac(rc) + (6 << 3) <= tbits_8ths)
-        ff_opus_rc_enc_cdf(rc, f->alloc_trim, ff_celt_model_alloc_trim);
-
-    /* Anti-collapse bit reservation */
-    tbits_8ths = (f->framebits << 3) - opus_rc_tell_frac(rc) - 1;
-    f->anticollapse_needed = 0;
-    if (f->transient && f->size >= 2 && tbits_8ths >= ((f->size + 2) << 3))
-        f->anticollapse_needed = 1 << 3;
-    tbits_8ths -= f->anticollapse_needed;
-
-    /* Band skip bit reservation */
-    if (tbits_8ths >= 1 << 3)
-        skip_bit = 1 << 3;
-    tbits_8ths -= skip_bit;
-
-    /* Intensity/dual stereo bit reservation */
-    if (f->channels == 2) {
-        intensitystereo_bit = ff_celt_log2_frac[f->end_band - f->start_band];
-        if (intensitystereo_bit <= tbits_8ths) {
-            tbits_8ths -= intensitystereo_bit;
-            if (tbits_8ths >= 1 << 3) {
-                dualstereo_bit = 1 << 3;
-                tbits_8ths -= 1 << 3;
-            }
-        } else {
-            intensitystereo_bit = 0;
-        }
-    }
-
-    /* Trim offsets */
-    for (i = f->start_band; i < f->end_band; i++) {
-        int trim     = f->alloc_trim - 5 - f->size;
-        int band     = ff_celt_freq_range[i] * (f->end_band - i - 1);
-        int duration = f->size + 3;
-        int scale    = duration + f->channels - 1;
-
-        /* PVQ minimum allocation threshold, below this value the band is
-         * skipped */
-        threshold[i] = FFMAX(3 * ff_celt_freq_range[i] << duration >> 4,
-                             f->channels << 3);
-
-        trim_offset[i] = trim * (band << scale) >> 6;
-
-        if (ff_celt_freq_range[i] << f->size == 1)
-            trim_offset[i] -= f->channels << 3;
-    }
-
-    /* Bisection */
-    low  = 1;
-    high = CELT_VECTORS - 1;
-    while (low <= high) {
-        int center = (low + high) >> 1;
-        done = total = 0;
-
-        for (i = f->end_band - 1; i >= f->start_band; i--) {
-            bandbits = ff_celt_freq_range[i] * ff_celt_static_alloc[center][i]
-                       << (f->channels - 1) << f->size >> 2;
-
-            if (bandbits)
-                bandbits = FFMAX(0, bandbits + trim_offset[i]);
-            bandbits += boost[i];
-
-            if (bandbits >= threshold[i] || done) {
-                done = 1;
-                total += FFMIN(bandbits, cap[i]);
-            } else if (bandbits >= f->channels << 3)
-                total += f->channels << 3;
-        }
-
-        if (total > tbits_8ths)
-            high = center - 1;
-        else
-            low = center + 1;
-    }
-    high = low--;
-
-    /* Bisection */
-    for (i = f->start_band; i < f->end_band; i++) {
-        bits1[i] = ff_celt_freq_range[i] * ff_celt_static_alloc[low][i]
-                   << (f->channels - 1) << f->size >> 2;
-        bits2[i] = high >= CELT_VECTORS ? cap[i] :
-                   ff_celt_freq_range[i] * ff_celt_static_alloc[high][i]
-                   << (f->channels - 1) << f->size >> 2;
-
-        if (bits1[i])
-            bits1[i] = FFMAX(0, bits1[i] + trim_offset[i]);
-        if (bits2[i])
-            bits2[i] = FFMAX(0, bits2[i] + trim_offset[i]);
-        if (low)
-            bits1[i] += boost[i];
-        bits2[i] += boost[i];
-
-        if (boost[i])
-            skip_startband = i;
-        bits2[i] = FFMAX(0, bits2[i] - bits1[i]);
-    }
-
-    /* Bisection */
-    low  = 0;
-    high = 1 << CELT_ALLOC_STEPS;
-    for (i = 0; i < CELT_ALLOC_STEPS; i++) {
-        int center = (low + high) >> 1;
-        done = total = 0;
-
-        for (j = f->end_band - 1; j >= f->start_band; j--) {
-            bandbits = bits1[j] + (center * bits2[j] >> CELT_ALLOC_STEPS);
-
-            if (bandbits >= threshold[j] || done) {
-                done = 1;
-                total += FFMIN(bandbits, cap[j]);
-            } else if (bandbits >= f->channels << 3)
-                total += f->channels << 3;
-        }
-        if (total > tbits_8ths)
-            high = center;
-        else
-            low = center;
-    }
-
-    /* Bisection */
-    done = total = 0;
-    for (i = f->end_band - 1; i >= f->start_band; i--) {
-        bandbits = bits1[i] + (low * bits2[i] >> CELT_ALLOC_STEPS);
-
-        if (bandbits >= threshold[i] || done)
-            done = 1;
-        else
-            bandbits = (bandbits >= f->channels << 3) ?
-                       f->channels << 3 : 0;
-
-        bandbits     = FFMIN(bandbits, cap[i]);
-        f->pulses[i] = bandbits;
-        total      += bandbits;
-    }
-
-    /* Band skipping */
-    for (f->coded_bands = f->end_band; ; f->coded_bands--) {
-        int allocation;
-        j = f->coded_bands - 1;
-
-        if (j == skip_startband) {
-            /* all remaining bands are not skipped */
-            tbits_8ths += skip_bit;
-            break;
-        }
-
-        /* determine the number of bits available for coding "do not skip" markers */
-        remaining   = tbits_8ths - total;
-        bandbits    = remaining / (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]);
-        remaining  -= bandbits  * (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]);
-        allocation  = f->pulses[j] + bandbits * ff_celt_freq_range[j]
-                      + FFMAX(0, remaining - (ff_celt_freq_bands[j] - ff_celt_freq_bands[f->start_band]));
-
-        /* a "do not skip" marker is only coded if the allocation is
-           above the chosen threshold */
-        if (allocation >= FFMAX(threshold[j], (f->channels + 1) << 3)) {
-            const int do_not_skip = f->coded_bands <= f->skip_band_floor;
-            ff_opus_rc_enc_log(rc, do_not_skip, 1);
-            if (do_not_skip)
-                break;
-
-            total      += 1 << 3;
-            allocation -= 1 << 3;
-        }
-
-        /* the band is skipped, so reclaim its bits */
-        total -= f->pulses[j];
-        if (intensitystereo_bit) {
-            total -= intensitystereo_bit;
-            intensitystereo_bit = ff_celt_log2_frac[j - f->start_band];
-            total += intensitystereo_bit;
-        }
-
-        total += f->pulses[j] = (allocation >= f->channels << 3) ? f->channels << 3 : 0;
-    }
-
-    /* Encode stereo flags */
-    if (intensitystereo_bit) {
-        f->intensity_stereo = FFMIN(f->intensity_stereo, f->coded_bands);
-        ff_opus_rc_enc_uint(rc, f->intensity_stereo, f->coded_bands + 1 - f->start_band);
-    }
-    if (f->intensity_stereo <= f->start_band)
-        tbits_8ths += dualstereo_bit; /* no intensity stereo means no dual stereo */
-    else if (dualstereo_bit)
-        ff_opus_rc_enc_log(rc, f->dual_stereo, 1);
-
-    /* Supply the remaining bits in this frame to lower bands */
-    remaining = tbits_8ths - total;
-    bandbits  = remaining / (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]);
-    remaining -= bandbits * (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]);
-    for (i = f->start_band; i < f->coded_bands; i++) {
-        int bits = FFMIN(remaining, ff_celt_freq_range[i]);
-
-        f->pulses[i] += bits + bandbits * ff_celt_freq_range[i];
-        remaining    -= bits;
-    }
-
-    /* Finally determine the allocation */
-    for (i = f->start_band; i < f->coded_bands; i++) {
-        int N = ff_celt_freq_range[i] << f->size;
-        int prev_extra = extrabits;
-        f->pulses[i] += extrabits;
-
-        if (N > 1) {
-            int dof;        // degrees of freedom
-            int temp;       // dof * channels * log(dof)
-            int offset;     // fine energy quantization offset, i.e.
-                            // extra bits assigned over the standard
-                            // totalbits/dof
-            int fine_bits, max_bits;
-
-            extrabits = FFMAX(0, f->pulses[i] - cap[i]);
-            f->pulses[i] -= extrabits;
-
-            /* intensity stereo makes use of an extra degree of freedom */
-            dof = N * f->channels + (f->channels == 2 && N > 2 && !f->dual_stereo && i < f->intensity_stereo);
-            temp = dof * (ff_celt_log_freq_range[i] + (f->size << 3));
-            offset = (temp >> 1) - dof * CELT_FINE_OFFSET;
-            if (N == 2) /* dof=2 is the only case that doesn't fit the model */
-                offset += dof << 1;
-
-            /* grant an additional bias for the first and second pulses */
-            if (f->pulses[i] + offset < 2 * (dof << 3))
-                offset += temp >> 2;
-            else if (f->pulses[i] + offset < 3 * (dof << 3))
-                offset += temp >> 3;
-
-            fine_bits = (f->pulses[i] + offset + (dof << 2)) / (dof << 3);
-            max_bits  = FFMIN((f->pulses[i] >> 3) >> (f->channels - 1), CELT_MAX_FINE_BITS);
-
-            max_bits  = FFMAX(max_bits, 0);
-
-            f->fine_bits[i] = av_clip(fine_bits, 0, max_bits);
-
-            /* if fine_bits was rounded down or capped,
-               give priority for the final fine energy pass */
-            f->fine_priority[i] = (f->fine_bits[i] * (dof << 3) >= f->pulses[i] + offset);
-
-            /* the remaining bits are assigned to PVQ */
-            f->pulses[i] -= f->fine_bits[i] << (f->channels - 1) << 3;
-        } else {
-            /* all bits go to fine energy except for the sign bit */
-            extrabits = FFMAX(0, f->pulses[i] - (f->channels << 3));
-            f->pulses[i] -= extrabits;
-            f->fine_bits[i] = 0;
-            f->fine_priority[i] = 1;
-        }
-
-        /* hand back a limited number of extra fine energy bits to this band */
-        if (extrabits > 0) {
-            int fineextra = FFMIN(extrabits >> (f->channels + 2),
-                                  CELT_MAX_FINE_BITS - f->fine_bits[i]);
-            f->fine_bits[i] += fineextra;
-
-            fineextra <<= f->channels + 2;
-            f->fine_priority[i] = (fineextra >= extrabits - prev_extra);
-            extrabits -= fineextra;
-        }
-    }
-    f->remaining = extrabits;
-
-    /* skipped bands dedicate all of their bits for fine energy */
-    for (; i < f->end_band; i++) {
-        f->fine_bits[i]     = f->pulses[i] >> (f->channels - 1) >> 3;
-        f->pulses[i]        = 0;
-        f->fine_priority[i] = f->fine_bits[i] < 1;
-    }
-}
-
 static void celt_enc_quant_pfilter(OpusRangeCoder *rc, CeltFrame *f)
 {
     float gain = f->pf_gain;
-    int i, txval, octave = f->pf_octave, period = f->pf_period, tapset = f->pf_tapset;
+    int txval, octave = f->pf_octave, period = f->pf_period, tapset = f->pf_tapset;
 
     ff_opus_rc_enc_log(rc, f->pfilter, 1);
     if (!f->pfilter)
@@ -636,7 +306,7 @@
     else
         tapset = 0;
     /* Finally create the coeffs */
-    for (i = 0; i < 2; i++) {
+    for (int i = 0; i < 2; i++) {
         CeltBlock *block = &f->block[i];
 
         block->pf_period_new = FFMAX(period, CELT_POSTFILTER_MINPERIOD);
@@ -649,7 +319,6 @@
 static void exp_quant_coarse(OpusRangeCoder *rc, CeltFrame *f,
                              float last_energy[][CELT_MAX_BANDS], int intra)
 {
-    int i, ch;
     float alpha, beta, prev[2] = { 0, 0 };
     const uint8_t *pmod = ff_celt_coarse_energy_dist[f->size][intra];
 
@@ -667,8 +336,8 @@
         beta  = ff_celt_beta_coef[f->size];
     }
 
-    for (i = f->start_band; i < f->end_band; i++) {
-        for (ch = 0; ch < f->channels; ch++) {
+    for (int i = f->start_band; i < f->end_band; i++) {
+        for (int ch = 0; ch < f->channels; ch++) {
             CeltBlock *block = &f->block[ch];
             const int left = f->framebits - opus_rc_tell(rc);
             const float last = FFMAX(-9.0f, last_energy[ch][i]);
@@ -690,7 +359,7 @@
     }
 }
 
-static void celt_quant_coarse(OpusRangeCoder *rc, CeltFrame *f,
+static void celt_quant_coarse(CeltFrame *f, OpusRangeCoder *rc,
                               float last_energy[][CELT_MAX_BANDS])
 {
     uint32_t inter, intra;
@@ -710,13 +379,12 @@
     }
 }
 
-static void celt_quant_fine(OpusRangeCoder *rc, CeltFrame *f)
+static void celt_quant_fine(CeltFrame *f, OpusRangeCoder *rc)
 {
-    int i, ch;
-    for (i = f->start_band; i < f->end_band; i++) {
+    for (int i = f->start_band; i < f->end_band; i++) {
         if (!f->fine_bits[i])
             continue;
-        for (ch = 0; ch < f->channels; ch++) {
+        for (int ch = 0; ch < f->channels; ch++) {
             CeltBlock *block = &f->block[ch];
             int quant, lim = (1 << f->fine_bits[i]);
             float offset, diff = 0.5f - block->error_energy[i];
@@ -730,12 +398,11 @@
 
 static void celt_quant_final(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f)
 {
-    int i, ch, priority;
-    for (priority = 0; priority < 2; priority++) {
-        for (i = f->start_band; i < f->end_band && (f->framebits - opus_rc_tell(rc)) >= f->channels; i++) {
+    for (int priority = 0; priority < 2; priority++) {
+        for (int i = f->start_band; i < f->end_band && (f->framebits - opus_rc_tell(rc)) >= f->channels; i++) {
             if (f->fine_priority[i] != priority || f->fine_bits[i] >= CELT_MAX_FINE_BITS)
                 continue;
-            for (ch = 0; ch < f->channels; ch++) {
+            for (int ch = 0; ch < f->channels; ch++) {
                 CeltBlock *block = &f->block[ch];
                 const float err = block->error_energy[i];
                 const float offset = 0.5f * (1 << (14 - f->fine_bits[i] - 1)) / 16384.0f;
@@ -747,100 +414,9 @@
     }
 }
 
-static void celt_quant_bands(OpusRangeCoder *rc, CeltFrame *f)
-{
-    float lowband_scratch[8 * 22];
-    float norm[2 * 8 * 100];
-
-    int totalbits = (f->framebits << 3) - f->anticollapse_needed;
-
-    int update_lowband = 1;
-    int lowband_offset = 0;
-
-    int i, j;
-
-    for (i = f->start_band; i < f->end_band; i++) {
-        uint32_t cm[2] = { (1 << f->blocks) - 1, (1 << f->blocks) - 1 };
-        int band_offset = ff_celt_freq_bands[i] << f->size;
-        int band_size   = ff_celt_freq_range[i] << f->size;
-        float *X = f->block[0].coeffs + band_offset;
-        float *Y = (f->channels == 2) ? f->block[1].coeffs + band_offset : NULL;
-
-        int consumed = opus_rc_tell_frac(rc);
-        float *norm2 = norm + 8 * 100;
-        int effective_lowband = -1;
-        int b = 0;
-
-        /* Compute how many bits we want to allocate to this band */
-        if (i != f->start_band)
-            f->remaining -= consumed;
-        f->remaining2 = totalbits - consumed - 1;
-        if (i <= f->coded_bands - 1) {
-            int curr_balance = f->remaining / FFMIN(3, f->coded_bands-i);
-            b = av_clip_uintp2(FFMIN(f->remaining2 + 1, f->pulses[i] + curr_balance), 14);
-        }
-
-        if (ff_celt_freq_bands[i] - ff_celt_freq_range[i] >= ff_celt_freq_bands[f->start_band] &&
-            (update_lowband || lowband_offset == 0))
-            lowband_offset = i;
-
-        /* Get a conservative estimate of the collapse_mask's for the bands we're
-        going to be folding from. */
-        if (lowband_offset != 0 && (f->spread != CELT_SPREAD_AGGRESSIVE ||
-                                    f->blocks > 1 || f->tf_change[i] < 0)) {
-            int foldstart, foldend;
-
-            /* This ensures we never repeat spectral content within one band */
-            effective_lowband = FFMAX(ff_celt_freq_bands[f->start_band],
-                                      ff_celt_freq_bands[lowband_offset] - ff_celt_freq_range[i]);
-            foldstart = lowband_offset;
-            while (ff_celt_freq_bands[--foldstart] > effective_lowband);
-            foldend = lowband_offset - 1;
-            while (ff_celt_freq_bands[++foldend] < effective_lowband + ff_celt_freq_range[i]);
-
-            cm[0] = cm[1] = 0;
-            for (j = foldstart; j < foldend; j++) {
-                cm[0] |= f->block[0].collapse_masks[j];
-                cm[1] |= f->block[f->channels - 1].collapse_masks[j];
-            }
-        }
-
-        if (f->dual_stereo && i == f->intensity_stereo) {
-            /* Switch off dual stereo to do intensity */
-            f->dual_stereo = 0;
-            for (j = ff_celt_freq_bands[f->start_band] << f->size; j < band_offset; j++)
-                norm[j] = (norm[j] + norm2[j]) / 2;
-        }
-
-        if (f->dual_stereo) {
-            cm[0] = f->pvq->encode_band(f->pvq, f, rc, i, X, NULL, band_size, b / 2, f->blocks,
-                                        effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size,
-                                        norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]);
-
-            cm[1] = f->pvq->encode_band(f->pvq, f, rc, i, Y, NULL, band_size, b / 2, f->blocks,
-                                        effective_lowband != -1 ? norm2 + (effective_lowband << f->size) : NULL, f->size,
-                                        norm2 + band_offset, 0, 1.0f, lowband_scratch, cm[1]);
-        } else {
-            cm[0] = f->pvq->encode_band(f->pvq, f, rc, i, X, Y, band_size, b, f->blocks,
-                                        effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size,
-                                        norm + band_offset, 0, 1.0f, lowband_scratch, cm[0] | cm[1]);
-            cm[1] = cm[0];
-        }
-
-        f->block[0].collapse_masks[i]               = (uint8_t)cm[0];
-        f->block[f->channels - 1].collapse_masks[i] = (uint8_t)cm[1];
-        f->remaining += f->pulses[i] + consumed;
-
-        /* Update the folding position only as long as we have 1 bit/sample depth */
-        update_lowband = (b > band_size << 3);
-    }
-}
-
 static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc,
                               CeltFrame *f, int index)
 {
-    int i, ch;
-
     ff_opus_rc_enc_init(rc);
 
     ff_opus_psy_celt_frame_init(&s->psyctx, f, index);
@@ -850,7 +426,7 @@
     if (f->silence) {
         if (f->framebits >= 16)
             ff_opus_rc_enc_log(rc, 1, 15); /* Silence (if using explicit singalling) */
-        for (ch = 0; ch < s->channels; ch++)
+        for (int ch = 0; ch < s->channels; ch++)
             memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
         return;
     }
@@ -883,11 +459,11 @@
         ff_opus_rc_enc_log(rc, f->transient, 3);
 
     /* Main encoding */
-    celt_quant_coarse(rc, f, s->last_quantized_energy);
-    celt_enc_tf      (rc, f);
-    ff_celt_enc_bitalloc(rc, f);
-    celt_quant_fine  (rc, f);
-    celt_quant_bands (rc, f);
+    celt_quant_coarse  (f, rc, s->last_quantized_energy);
+    celt_enc_tf        (f, rc);
+    ff_celt_bitalloc   (f, rc, 1);
+    celt_quant_fine    (f, rc);
+    ff_celt_quant_bands(f, rc);
 
     /* Anticollapse bit */
     if (f->anticollapse_needed)
@@ -896,9 +472,9 @@
     /* Final per-band energy adjustments from leftover bits */
     celt_quant_final(s, rc, f);
 
-    for (ch = 0; ch < f->channels; ch++) {
+    for (int ch = 0; ch < f->channels; ch++) {
         CeltBlock *block = &f->block[ch];
-        for (i = 0; i < CELT_MAX_BANDS; i++)
+        for (int i = 0; i < CELT_MAX_BANDS; i++)
             s->last_quantized_energy[ch][i] = block->energy[i] + block->error_energy[i];
     }
 }
@@ -912,21 +488,21 @@
 
 static void opus_packet_assembler(OpusEncContext *s, AVPacket *avpkt)
 {
-    int i, offset, fsize_needed;
+    int offset, fsize_needed;
 
     /* Write toc */
     opus_gen_toc(s, avpkt->data, &offset, &fsize_needed);
 
     /* Frame sizes if needed */
     if (fsize_needed) {
-        for (i = 0; i < s->packet.frames - 1; i++) {
+        for (int i = 0; i < s->packet.frames - 1; i++) {
             offset += write_opuslacing(avpkt->data + offset,
                                        s->frame[i].framebits >> 3);
         }
     }
 
     /* Packets */
-    for (i = 0; i < s->packet.frames; i++) {
+    for (int i = 0; i < s->packet.frames; i++) {
         ff_opus_rc_enc_end(&s->rc[i], avpkt->data + offset,
                            s->frame[i].framebits >> 3);
         offset += s->frame[i].framebits >> 3;
@@ -938,7 +514,6 @@
 /* Used as overlap for the first frame and padding for the last encoded packet */
 static AVFrame *spawn_empty_frame(OpusEncContext *s)
 {
-    int i;
     AVFrame *f = av_frame_alloc();
     if (!f)
         return NULL;
@@ -949,7 +524,7 @@
         av_frame_free(&f);
         return NULL;
     }
-    for (i = 0; i < s->channels; i++) {
+    for (int i = 0; i < s->channels; i++) {
         size_t bps = av_get_bytes_per_sample(f->format);
         memset(f->extended_data[i], 0, bps*f->nb_samples);
     }
@@ -960,7 +535,7 @@
                              const AVFrame *frame, int *got_packet_ptr)
 {
     OpusEncContext *s = avctx->priv_data;
-    int i, ret, frame_size, alloc_size = 0;
+    int ret, frame_size, alloc_size = 0;
 
     if (frame) { /* Add new frame to queue */
         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
@@ -985,7 +560,7 @@
          * this should only happen at the very last flush frame. The frames
          * allocated here will be freed (because they have no other references)
          * after they get used by celt_frame_setup_input() */
-        for (i = 0; i < pad_empty; i++) {
+        for (int i = 0; i < pad_empty; i++) {
             AVFrame *empty = spawn_empty_frame(s);
             if (!empty)
                 return AVERROR(ENOMEM);
@@ -993,7 +568,7 @@
         }
     }
 
-    for (i = 0; i < s->packet.frames; i++) {
+    for (int i = 0; i < s->packet.frames; i++) {
         celt_encode_frame(s, &s->rc[i], &s->frame[i], i);
         alloc_size += s->frame[i].framebits >> 3;
     }
@@ -1026,10 +601,9 @@
 
 static av_cold int opus_encode_end(AVCodecContext *avctx)
 {
-    int i;
     OpusEncContext *s = avctx->priv_data;
 
-    for (i = 0; i < CELT_BLOCK_NB; i++)
+    for (int i = 0; i < CELT_BLOCK_NB; i++)
         ff_mdct15_uninit(&s->mdct[i]);
 
     ff_celt_pvq_uninit(&s->pvq);
@@ -1046,7 +620,7 @@
 
 static av_cold int opus_encode_init(AVCodecContext *avctx)
 {
-    int i, ch, ret, max_frames;
+    int ret, max_frames;
     OpusEncContext *s = avctx->priv_data;
 
     s->avctx = avctx;
@@ -1080,19 +654,19 @@
 
     ff_af_queue_init(avctx, &s->afq);
 
-    if ((ret = ff_celt_pvq_init(&s->pvq)) < 0)
+    if ((ret = ff_celt_pvq_init(&s->pvq, 1)) < 0)
         return ret;
 
     if (!(s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT)))
         return AVERROR(ENOMEM);
 
     /* I have no idea why a base scaling factor of 68 works, could be the twiddles */
-    for (i = 0; i < CELT_BLOCK_NB; i++)
+    for (int i = 0; i < CELT_BLOCK_NB; i++)
         if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i))))
             return AVERROR(ENOMEM);
 
     /* Zero out previous energy (matters for inter first frame) */
-    for (ch = 0; ch < s->channels; ch++)
+    for (int ch = 0; ch < s->channels; ch++)
         memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
 
     /* Allocate an empty frame to use as overlap for the first frame of audio */
@@ -1112,11 +686,12 @@
     if (!s->rc)
         return AVERROR(ENOMEM);
 
-    for (i = 0; i < max_frames; i++) {
+    for (int i = 0; i < max_frames; i++) {
         s->frame[i].dsp = s->dsp;
         s->frame[i].avctx = s->avctx;
         s->frame[i].seed = 0;
         s->frame[i].pvq = s->pvq;
+        s->frame[i].apply_phase_inv = 1;
         s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f;
     }
 

diff --git a/libavcodec/opusenc.h b/libavcodec/opusenc.h
index 3273d0a..b9162eb 100644
--- a/libavcodec/opusenc.h
+++ b/libavcodec/opusenc.h

@@ -51,6 +51,4 @@
     int frames;
 } OpusPacketInfo;
 
-void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f);
-
 #endif /* AVCODEC_OPUSENC_H */

diff --git a/libavcodec/opusenc_psy.c b/libavcodec/opusenc_psy.c
index b446d41..5a50db9 100644
--- a/libavcodec/opusenc_psy.c
+++ b/libavcodec/opusenc_psy.c

@@ -25,6 +25,56 @@
 #include "mdct15.h"
 #include "libavutil/qsort.h"
 
+static float pvq_band_cost(CeltPVQ *pvq, CeltFrame *f, OpusRangeCoder *rc, int band,
+                           float *bits, float lambda)
+{
+    int i, b = 0;
+    uint32_t cm[2] = { (1 << f->blocks) - 1, (1 << f->blocks) - 1 };
+    const int band_size = ff_celt_freq_range[band] << f->size;
+    float buf[176 * 2], lowband_scratch[176], norm1[176], norm2[176];
+    float dist, cost, err_x = 0.0f, err_y = 0.0f;
+    float *X = buf;
+    float *X_orig = f->block[0].coeffs + (ff_celt_freq_bands[band] << f->size);
+    float *Y = (f->channels == 2) ? &buf[176] : NULL;
+    float *Y_orig = f->block[1].coeffs + (ff_celt_freq_bands[band] << f->size);
+    OPUS_RC_CHECKPOINT_SPAWN(rc);
+
+    memcpy(X, X_orig, band_size*sizeof(float));
+    if (Y)
+        memcpy(Y, Y_orig, band_size*sizeof(float));
+
+    f->remaining2 = ((f->framebits << 3) - f->anticollapse_needed) - opus_rc_tell_frac(rc) - 1;
+    if (band <= f->coded_bands - 1) {
+        int curr_balance = f->remaining / FFMIN(3, f->coded_bands - band);
+        b = av_clip_uintp2(FFMIN(f->remaining2 + 1, f->pulses[band] + curr_balance), 14);
+    }
+
+    if (f->dual_stereo) {
+        pvq->quant_band(pvq, f, rc, band, X, NULL, band_size, b / 2, f->blocks, NULL,
+                        f->size, norm1, 0, 1.0f, lowband_scratch, cm[0]);
+
+        pvq->quant_band(pvq, f, rc, band, Y, NULL, band_size, b / 2, f->blocks, NULL,
+                        f->size, norm2, 0, 1.0f, lowband_scratch, cm[1]);
+    } else {
+        pvq->quant_band(pvq, f, rc, band, X, Y, band_size, b, f->blocks, NULL, f->size,
+                        norm1, 0, 1.0f, lowband_scratch, cm[0] | cm[1]);
+    }
+
+    for (i = 0; i < band_size; i++) {
+        err_x += (X[i] - X_orig[i])*(X[i] - X_orig[i]);
+        if (Y)
+            err_y += (Y[i] - Y_orig[i])*(Y[i] - Y_orig[i]);
+    }
+
+    dist = sqrtf(err_x) + sqrtf(err_y);
+    cost = OPUS_RC_CHECKPOINT_BITS(rc)/8.0f;
+    *bits += cost;
+
+    OPUS_RC_CHECKPOINT_ROLLBACK(rc);
+
+    return lambda*dist*cost;
+}
+
 /* Populate metrics without taking into consideration neighbouring steps */
 static void step_collect_psy_metrics(OpusPsyContext *s, int index)
 {
@@ -69,7 +119,7 @@
 
             for (j = 0; j < range; j++) {
                 const float c_s = coeffs[j]*coeffs[j];
-                dist_dev = (avg_c_s - c_s)*(avg_c_s - c_s);
+                dist_dev += (avg_c_s - c_s)*(avg_c_s - c_s);
             }
 
             st->tone[ch][i] += sqrtf(dist_dev);
@@ -316,11 +366,11 @@
     OpusRangeCoder dump;
 
     ff_opus_rc_enc_init(&dump);
-    ff_celt_enc_bitalloc(&dump, f);
+    ff_celt_bitalloc(f, &dump, 1);
 
     for (i = 0; i < CELT_MAX_BANDS; i++) {
         float bits = 0.0f;
-        float dist = f->pvq->band_cost(f->pvq, f, &dump, i, &bits, s->lambda);
+        float dist = pvq_band_cost(f->pvq, f, &dump, i, &bits, s->lambda);
         tdist += dist;
     }
 
@@ -333,6 +383,10 @@
 {
     float td1, td2;
     f->dual_stereo = 0;
+
+    if (s->avctx->channels < 2)
+        return;
+
     bands_dist(s, f, &td1);
     f->dual_stereo = 1;
     bands_dist(s, f, &td2);
@@ -345,10 +399,12 @@
 {
     int i, best_band = CELT_MAX_BANDS - 1;
     float dist, best_dist = FLT_MAX;
-
     /* TODO: fix, make some heuristic up here using the lambda value */
     float end_band = 0;
 
+    if (s->avctx->channels < 2)
+        return;
+
     for (i = f->end_band; i >= end_band; i--) {
         f->intensity_stereo = i;
         bands_dist(s, f, &dist);
@@ -370,7 +426,6 @@
     for (cway = 0; cway < 2; cway++) {
         int mag[2];
         int base = f->transient ? 120 : 960;
-        int i;
 
         for (i = 0; i < 2; i++) {
             int c = ff_celt_tf_select[f->size][f->transient][cway][i];

diff --git a/libavcodec/parser.c b/libavcodec/parser.c
index 670680e..0a994a3 100644
--- a/libavcodec/parser.c
+++ b/libavcodec/parser.c

@@ -25,40 +25,23 @@
 #include <string.h>
 
 #include "libavutil/avassert.h"
-#include "libavutil/atomic.h"
 #include "libavutil/internal.h"
 #include "libavutil/mem.h"
 
 #include "internal.h"
 #include "parser.h"
 
-static AVCodecParser *av_first_parser = NULL;
-
-AVCodecParser *av_parser_next(const AVCodecParser *p)
-{
-    if (p)
-        return p->next;
-    else
-        return av_first_parser;
-}
-
-void av_register_codec_parser(AVCodecParser *parser)
-{
-    do {
-        parser->next = av_first_parser;
-    } while (parser->next != avpriv_atomic_ptr_cas((void * volatile *)&av_first_parser, parser->next, parser));
-}
-
 AVCodecParserContext *av_parser_init(int codec_id)
 {
     AVCodecParserContext *s = NULL;
-    AVCodecParser *parser;
+    const AVCodecParser *parser;
+    void *i = 0;
     int ret;
 
     if (codec_id == AV_CODEC_ID_NONE)
         return NULL;
 
-    for (parser = av_first_parser; parser; parser = parser->next) {
+    while ((parser = av_parser_iterate(&i))) {
         if (parser->codec_ids[0] == codec_id ||
             parser->codec_ids[1] == codec_id ||
             parser->codec_ids[2] == codec_id ||
@@ -72,7 +55,7 @@
     s = av_mallocz(sizeof(AVCodecParserContext));
     if (!s)
         goto err_out;
-    s->parser = parser;
+    s->parser = (AVCodecParser*)parser;
     s->priv_data = av_mallocz(parser->priv_data_size);
     if (!s->priv_data)
         goto err_out;

diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
new file mode 100644
index 0000000..f01cad4
--- /dev/null
+++ b/libavcodec/parsers.c

@@ -0,0 +1,108 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/thread.h"
+
+#include "avcodec.h"
+
+extern AVCodecParser ff_aac_parser;
+extern AVCodecParser ff_aac_latm_parser;
+extern AVCodecParser ff_ac3_parser;
+extern AVCodecParser ff_adx_parser;
+extern AVCodecParser ff_av1_parser;
+extern AVCodecParser ff_avs2_parser;
+extern AVCodecParser ff_bmp_parser;
+extern AVCodecParser ff_cavsvideo_parser;
+extern AVCodecParser ff_cook_parser;
+extern AVCodecParser ff_dca_parser;
+extern AVCodecParser ff_dirac_parser;
+extern AVCodecParser ff_dnxhd_parser;
+extern AVCodecParser ff_dpx_parser;
+extern AVCodecParser ff_dvaudio_parser;
+extern AVCodecParser ff_dvbsub_parser;
+extern AVCodecParser ff_dvdsub_parser;
+extern AVCodecParser ff_dvd_nav_parser;
+extern AVCodecParser ff_flac_parser;
+extern AVCodecParser ff_g729_parser;
+extern AVCodecParser ff_gsm_parser;
+extern AVCodecParser ff_h261_parser;
+extern AVCodecParser ff_h263_parser;
+extern AVCodecParser ff_h264_parser;
+extern AVCodecParser ff_hevc_parser;
+extern AVCodecParser ff_mjpeg_parser;
+extern AVCodecParser ff_mlp_parser;
+extern AVCodecParser ff_mpeg4video_parser;
+extern AVCodecParser ff_mpegaudio_parser;
+extern AVCodecParser ff_mpegvideo_parser;
+extern AVCodecParser ff_opus_parser;
+extern AVCodecParser ff_png_parser;
+extern AVCodecParser ff_pnm_parser;
+extern AVCodecParser ff_rv30_parser;
+extern AVCodecParser ff_rv40_parser;
+extern AVCodecParser ff_sbc_parser;
+extern AVCodecParser ff_sipr_parser;
+extern AVCodecParser ff_tak_parser;
+extern AVCodecParser ff_vc1_parser;
+extern AVCodecParser ff_vorbis_parser;
+extern AVCodecParser ff_vp3_parser;
+extern AVCodecParser ff_vp8_parser;
+extern AVCodecParser ff_vp9_parser;
+extern AVCodecParser ff_xma_parser;
+
+#include "libavcodec/parser_list.c"
+
+static AVOnce av_parser_next_init = AV_ONCE_INIT;
+
+static void av_parser_init_next(void)
+{
+    AVCodecParser *prev = NULL, *p;
+    int i = 0;
+    while ((p = (AVCodecParser*)parser_list[i++])) {
+        if (prev)
+            prev->next = p;
+        prev = p;
+    }
+}
+
+AVCodecParser *av_parser_next(const AVCodecParser *p)
+{
+    ff_thread_once(&av_parser_next_init, av_parser_init_next);
+
+    if (p)
+        return p->next;
+    else
+        return (AVCodecParser*)parser_list[0];
+}
+
+const AVCodecParser *av_parser_iterate(void **opaque)
+{
+    uintptr_t i = (uintptr_t)*opaque;
+    const AVCodecParser *p = parser_list[i];
+
+    if (p)
+        *opaque = (void*)(i + 1);
+
+    return p;
+}
+
+void av_register_codec_parser(AVCodecParser *parser)
+{
+    ff_thread_once(&av_parser_next_init, av_parser_init_next);
+}

diff --git a/libavcodec/pcx.c b/libavcodec/pcx.c
index 58a5e1e..4505db7 100644
--- a/libavcodec/pcx.c
+++ b/libavcodec/pcx.c

@@ -30,7 +30,7 @@
 
 #define PCX_HEADER_SIZE 128
 
-static void pcx_rle_decode(GetByteContext *gb,
+static int pcx_rle_decode(GetByteContext *gb,
                            uint8_t *dst,
                            unsigned int bytes_per_scanline,
                            int compressed)
@@ -38,6 +38,9 @@
     unsigned int i = 0;
     unsigned char run, value;
 
+    if (bytestream2_get_bytes_left(gb) < 1)
+        return AVERROR_INVALIDDATA;
+
     if (compressed) {
         while (i < bytes_per_scanline && bytestream2_get_bytes_left(gb)>0) {
             run   = 1;
@@ -52,6 +55,7 @@
     } else {
         bytestream2_get_buffer(gb, dst, bytes_per_scanline);
     }
+    return 0;
 }
 
 static void pcx_palette(GetByteContext *gb, uint32_t *dst, int pallen)
@@ -153,7 +157,9 @@
 
     if (nplanes == 3 && bits_per_pixel == 8) {
         for (y = 0; y < h; y++) {
-            pcx_rle_decode(&gb, scanline, bytes_per_scanline, compressed);
+            ret = pcx_rle_decode(&gb, scanline, bytes_per_scanline, compressed);
+            if (ret < 0)
+                goto end;
 
             for (x = 0; x < w; x++) {
                 ptr[3 * x]     = scanline[x];
@@ -174,7 +180,9 @@
         }
 
         for (y = 0; y < h; y++, ptr += stride) {
-            pcx_rle_decode(&gb, scanline, bytes_per_scanline, compressed);
+            ret = pcx_rle_decode(&gb, scanline, bytes_per_scanline, compressed);
+            if (ret < 0)
+                goto end;
             memcpy(ptr, scanline, w);
         }
 
@@ -194,7 +202,9 @@
         for (y = 0; y < h; y++) {
             init_get_bits8(&s, scanline, bytes_per_scanline);
 
-            pcx_rle_decode(&gb, scanline, bytes_per_scanline, compressed);
+            ret = pcx_rle_decode(&gb, scanline, bytes_per_scanline, compressed);
+            if (ret < 0)
+                goto end;
 
             for (x = 0; x < w; x++)
                 ptr[x] = get_bits(&s, bits_per_pixel);
@@ -204,7 +214,9 @@
         int i;
 
         for (y = 0; y < h; y++) {
-            pcx_rle_decode(&gb, scanline, bytes_per_scanline, compressed);
+            ret = pcx_rle_decode(&gb, scanline, bytes_per_scanline, compressed);
+            if (ret < 0)
+                goto end;
 
             for (x = 0; x < w; x++) {
                 int m = 0x80 >> (x & 7), v = 0;

diff --git a/libavcodec/pixlet.c b/libavcodec/pixlet.c
index a9cfe08..03a2cda 100644
--- a/libavcodec/pixlet.c
+++ b/libavcodec/pixlet.c

@@ -28,12 +28,14 @@
 #include "avcodec.h"
 #include "bytestream.h"
 #include "get_bits.h"
-#include "unary.h"
 #include "internal.h"
 #include "thread.h"
+#include "unary.h"
 
 #define NB_LEVELS 4
 
+#define PIXLET_MAGIC 0xDEADBEEF
+
 #define H 0
 #define V 1
 
@@ -47,11 +49,11 @@
     AVClass *class;
 
     GetByteContext gb;
-    GetBitContext gbit;
+    GetBitContext bc;
 
     int levels;
     int depth;
-    int h, w;
+    int w, h;
 
     int16_t *filter[2];
     int16_t *prediction;
@@ -59,38 +61,10 @@
     SubBand band[4][NB_LEVELS * 3 + 1];
 } PixletContext;
 
-static int init_decoder(AVCodecContext *avctx)
+static av_cold int pixlet_init(AVCodecContext *avctx)
 {
-    PixletContext *ctx = avctx->priv_data;
-    int i, plane;
-
-    ctx->filter[0]  = av_malloc_array(ctx->h, sizeof(int16_t));
-    ctx->filter[1]  = av_malloc_array(FFMAX(ctx->h, ctx->w) + 16, sizeof(int16_t));
-    ctx->prediction = av_malloc_array((ctx->w >> NB_LEVELS), sizeof(int16_t));
-    if (!ctx->filter[0] || !ctx->filter[1] || !ctx->prediction)
-        return AVERROR(ENOMEM);
-
-    for (plane = 0; plane < 3; plane++) {
-        unsigned shift = plane > 0;
-        unsigned w = ctx->w >> shift;
-        unsigned h = ctx->h >> shift;
-
-        ctx->band[plane][0].width  = w >> NB_LEVELS;
-        ctx->band[plane][0].height = h >> NB_LEVELS;
-        ctx->band[plane][0].size = (w >> NB_LEVELS) * (h >> NB_LEVELS);
-
-        for (i = 0; i < NB_LEVELS * 3; i++) {
-            unsigned scale = ctx->levels - (i / 3);
-
-            ctx->band[plane][i + 1].width  = w >> scale;
-            ctx->band[plane][i + 1].height = h >> scale;
-            ctx->band[plane][i + 1].size = (w >> scale) * (h >> scale);
-
-            ctx->band[plane][i + 1].x = (w >> scale) * (((i + 1) % 3) != 2);
-            ctx->band[plane][i + 1].y = (h >> scale) * (((i + 1) % 3) != 1);
-        }
-    }
-
+    avctx->pix_fmt     = AV_PIX_FMT_YUV420P16;
+    avctx->color_range = AVCOL_RANGE_JPEG;
     return 0;
 }
 
@@ -112,17 +86,46 @@
     return 0;
 }
 
-static av_cold int pixlet_init(AVCodecContext *avctx)
+static int init_decoder(AVCodecContext *avctx)
 {
-    avctx->pix_fmt = AV_PIX_FMT_YUV420P16;
-    avctx->color_range = AVCOL_RANGE_JPEG;
+    PixletContext *ctx = avctx->priv_data;
+    int i, plane;
+
+    ctx->filter[0]  = av_malloc_array(ctx->h, sizeof(int16_t));
+    ctx->filter[1]  = av_malloc_array(FFMAX(ctx->h, ctx->w) + 16, sizeof(int16_t));
+    ctx->prediction = av_malloc_array((ctx->w >> NB_LEVELS), sizeof(int16_t));
+    if (!ctx->filter[0] || !ctx->filter[1] || !ctx->prediction)
+        return AVERROR(ENOMEM);
+
+    for (plane = 0; plane < 3; plane++) {
+        unsigned shift = plane > 0;
+        unsigned w     = ctx->w >> shift;
+        unsigned h     = ctx->h >> shift;
+
+        ctx->band[plane][0].width  =  w >> NB_LEVELS;
+        ctx->band[plane][0].height =  h >> NB_LEVELS;
+        ctx->band[plane][0].size   = (w >> NB_LEVELS) * (h >> NB_LEVELS);
+
+        for (i = 0; i < NB_LEVELS * 3; i++) {
+            unsigned scale = ctx->levels - (i / 3);
+
+            ctx->band[plane][i + 1].width  =  w >> scale;
+            ctx->band[plane][i + 1].height =  h >> scale;
+            ctx->band[plane][i + 1].size   = (w >> scale) * (h >> scale);
+
+            ctx->band[plane][i + 1].x = (w >> scale) * (((i + 1) % 3) != 2);
+            ctx->band[plane][i + 1].y = (h >> scale) * (((i + 1) % 3) != 1);
+        }
+    }
+
     return 0;
 }
 
-static int read_low_coeffs(AVCodecContext *avctx, int16_t *dst, int size, int width, ptrdiff_t stride)
+static int read_low_coeffs(AVCodecContext *avctx, int16_t *dst, int size,
+                           int width, ptrdiff_t stride)
 {
     PixletContext *ctx = avctx->priv_data;
-    GetBitContext *b = &ctx->gbit;
+    GetBitContext *bc = &ctx->bc;
     unsigned cnt1, nbits, k, j = 0, i = 0;
     int64_t value, state = 3;
     int rlen, escape, flag = 0;
@@ -130,45 +133,45 @@
     while (i < size) {
         nbits = FFMIN(ff_clz((state >> 8) + 3) ^ 0x1F, 14);
 
-        cnt1 = get_unary(b, 0, 8);
+        cnt1 = get_unary(bc, 0, 8);
         if (cnt1 < 8) {
-            value = show_bits(b, nbits);
+            value = show_bits(bc, nbits);
             if (value <= 1) {
-                skip_bits(b, nbits - 1);
+                skip_bits(bc, nbits - 1);
                 escape = ((1 << nbits) - 1) * cnt1;
             } else {
-                skip_bits(b, nbits);
+                skip_bits(bc, nbits);
                 escape = value + ((1 << nbits) - 1) * cnt1 - 1;
             }
         } else {
-            escape = get_bits(b, 16);
+            escape = get_bits(bc, 16);
         }
 
-        value = -((escape + flag) & 1) | 1;
+        value    = -((escape + flag) & 1) | 1;
         dst[j++] = value * ((escape + flag + 1) >> 1);
         i++;
         if (j == width) {
-            j = 0;
+            j    = 0;
             dst += stride;
         }
         state = 120 * (escape + flag) + state - (120 * state >> 8);
-        flag = 0;
+        flag  = 0;
 
         if (state * 4ULL > 0xFF || i >= size)
             continue;
 
-        nbits = ((state + 8) >> 5) + (state ? ff_clz(state) : 32) - 24;
+        nbits  = ((state + 8) >> 5) + (state ? ff_clz(state) : 32) - 24;
         escape = av_mod_uintp2(16383, nbits);
-        cnt1 = get_unary(b, 0, 8);
+        cnt1   = get_unary(bc, 0, 8);
         if (cnt1 > 7) {
-            rlen = get_bits(b, 16);
+            rlen = get_bits(bc, 16);
         } else {
-            value = show_bits(b, nbits);
+            value = show_bits(bc, nbits);
             if (value > 1) {
-                skip_bits(b, nbits);
+                skip_bits(bc, nbits);
                 rlen = value + escape * cnt1 - 1;
             } else {
-                skip_bits(b, nbits - 1);
+                skip_bits(bc, nbits - 1);
                 rlen = escape * cnt1;
             }
         }
@@ -180,31 +183,32 @@
         for (k = 0; k < rlen; k++) {
             dst[j++] = 0;
             if (j == width) {
-                j = 0;
+                j    = 0;
                 dst += stride;
             }
         }
 
         state = 0;
-        flag = rlen < 0xFFFF ? 1 : 0;
+        flag  = rlen < 0xFFFF ? 1 : 0;
     }
 
-    align_get_bits(b);
-    return get_bits_count(b) >> 3;
+    align_get_bits(bc);
+    return get_bits_count(bc) >> 3;
 }
 
-static int read_high_coeffs(AVCodecContext *avctx, uint8_t *src, int16_t *dst, int size,
-                            int c, int a, int d,
+static int read_high_coeffs(AVCodecContext *avctx, uint8_t *src, int16_t *dst,
+                            int size, int c, int a, int d,
                             int width, ptrdiff_t stride)
 {
     PixletContext *ctx = avctx->priv_data;
-    GetBitContext *b = &ctx->gbit;
+    GetBitContext *bc = &ctx->bc;
     unsigned cnt1, shbits, rlen, nbits, length, i = 0, j = 0, k;
     int ret, escape, pfx, value, yflag, xflag, flag = 0;
     int64_t state = 3, tmp;
 
-    if ((ret = init_get_bits8(b, src, bytestream2_get_bytes_left(&ctx->gb))) < 0)
-      return ret;
+    ret = init_get_bits8(bc, src, bytestream2_get_bytes_left(&ctx->gb));
+    if (ret < 0)
+        return ret;
 
     if (a ^ (a >> 31)) {
         nbits = 33 - ff_clz(a ^ (a >> 31));
@@ -217,26 +221,24 @@
     length = 25 - nbits;
 
     while (i < size) {
-        if (state >> 8 != -3) {
+        if (state >> 8 != -3)
             value = ff_clz((state >> 8) + 3) ^ 0x1F;
-        } else {
+        else
             value = -1;
-        }
 
-        cnt1 = get_unary(b, 0, length);
-
+        cnt1 = get_unary(bc, 0, length);
         if (cnt1 >= length) {
-            cnt1 = get_bits(b, nbits);
+            cnt1 = get_bits(bc, nbits);
         } else {
             pfx = 14 + ((((uint64_t)(value - 14)) >> 32) & (value - 14));
             if (pfx < 1 || pfx > 25)
                 return AVERROR_INVALIDDATA;
             cnt1 *= (1 << pfx) - 1;
-            shbits = show_bits(b, pfx);
+            shbits = show_bits(bc, pfx);
             if (shbits <= 1) {
-                skip_bits(b, pfx - 1);
+                skip_bits(bc, pfx - 1);
             } else {
-                skip_bits(b, pfx);
+                skip_bits(bc, pfx);
                 cnt1 += shbits - 1;
             }
         }
@@ -248,14 +250,14 @@
             value = 0;
         } else {
             xflag &= 1u;
-            tmp = (int64_t)c * ((yflag + 1) >> 1) + (c >> 1);
-            value = xflag + (tmp ^ -xflag);
+            tmp    = (int64_t)c * ((yflag + 1) >> 1) + (c >> 1);
+            value  = xflag + (tmp ^ -xflag);
         }
 
         i++;
         dst[j++] = value;
         if (j == width) {
-            j = 0;
+            j    = 0;
             dst += stride;
         }
         state += (int64_t)d * (uint64_t)yflag - ((int64_t)(d * (uint64_t)state) >> 8);
@@ -265,25 +267,26 @@
         if ((uint64_t)state > 0xFF / 4 || i >= size)
             continue;
 
-        pfx = ((state + 8) >> 5) + (state ? ff_clz(state): 32) - 24;
+        pfx    = ((state + 8) >> 5) + (state ? ff_clz(state) : 32) - 24;
         escape = av_mod_uintp2(16383, pfx);
-        cnt1 = get_unary(b, 0, 8);
+        cnt1   = get_unary(bc, 0, 8);
         if (cnt1 < 8) {
             if (pfx < 1 || pfx > 25)
                 return AVERROR_INVALIDDATA;
-            value = show_bits(b, pfx);
+
+            value = show_bits(bc, pfx);
             if (value > 1) {
-                skip_bits(b, pfx);
+                skip_bits(bc, pfx);
                 rlen = value + escape * cnt1 - 1;
             } else {
-                skip_bits(b, pfx - 1);
+                skip_bits(bc, pfx - 1);
                 rlen = escape * cnt1;
             }
         } else {
-            if (get_bits1(b))
-                value = get_bits(b, 16);
+            if (get_bits1(bc))
+                value = get_bits(bc, 16);
             else
-                value = get_bits(b, 8);
+                value = get_bits(bc, 8);
 
             rlen = value + 8 * escape;
         }
@@ -295,20 +298,21 @@
         for (k = 0; k < rlen; k++) {
             dst[j++] = 0;
             if (j == width) {
-                j = 0;
+                j    = 0;
                 dst += stride;
             }
         }
 
         state = 0;
-        flag = rlen < 0xFFFF ? 1 : 0;
+        flag  = rlen < 0xFFFF ? 1 : 0;
     }
 
-    align_get_bits(b);
-    return get_bits_count(b) >> 3;
+    align_get_bits(bc);
+    return get_bits_count(bc) >> 3;
 }
 
-static int read_highpass(AVCodecContext *avctx, uint8_t *ptr, int plane, AVFrame *frame)
+static int read_highpass(AVCodecContext *avctx, uint8_t *ptr,
+                         int plane, AVFrame *frame)
 {
     PixletContext *ctx = avctx->priv_data;
     ptrdiff_t stride = frame->linesize[plane] / 2;
@@ -319,15 +323,16 @@
         int32_t b = bytestream2_get_be32(&ctx->gb);
         int32_t c = bytestream2_get_be32(&ctx->gb);
         int32_t d = bytestream2_get_be32(&ctx->gb);
-        int16_t *dest = (int16_t *)frame->data[plane] + ctx->band[plane][i + 1].x +
-                                               stride * ctx->band[plane][i + 1].y;
+        int16_t *dest = (int16_t *)frame->data[plane] +
+                        ctx->band[plane][i + 1].x +
+                        ctx->band[plane][i + 1].y * stride;
         unsigned size = ctx->band[plane][i + 1].size;
-        uint32_t magic;
+        uint32_t magic = bytestream2_get_be32(&ctx->gb);
 
-        magic = bytestream2_get_be32(&ctx->gb);
-        if (magic != 0xDEADBEEF) {
-            av_log(avctx, AV_LOG_ERROR, "wrong magic number: 0x%08"PRIX32
-                   " for plane %d, band %d\n", magic, plane, i);
+        if (magic != PIXLET_MAGIC) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "wrong magic number: 0x%08"PRIX32" for plane %d, band %d\n",
+                   magic, plane, i);
             return AVERROR_INVALIDDATA;
         }
 
@@ -338,7 +343,9 @@
                                c, (b >= FFABS(a)) ? b : a, d,
                                ctx->band[plane][i + 1].width, stride);
         if (ret < 0) {
-            av_log(avctx, AV_LOG_ERROR, "error in highpass coefficients for plane %d, band %d\n", plane, i);
+            av_log(avctx, AV_LOG_ERROR,
+                   "error in highpass coefficients for plane %d, band %d\n",
+                   plane, i);
             return ret;
         }
         bytestream2_skip(&ctx->gb, ret);
@@ -347,7 +354,8 @@
     return 0;
 }
 
-static void lowpass_prediction(int16_t *dst, int16_t *pred, int width, int height, ptrdiff_t stride)
+static void lowpass_prediction(int16_t *dst, int16_t *pred,
+                               int width, int height, ptrdiff_t stride)
 {
     int16_t val;
     int i, j;
@@ -373,8 +381,8 @@
     int64_t value;
 
     hsize = size >> 1;
-    low = tmp + 4;
-    high = &low[hsize + 8];
+    low   = tmp + 4;
+    high  = &low[hsize + 8];
 
     memcpy(low, dest, size);
     memcpy(high, dest + hsize, size);
@@ -411,29 +419,28 @@
     }
 }
 
-static void reconstruction(AVCodecContext *avctx,
-                           int16_t *dest, unsigned width, unsigned height, ptrdiff_t stride, int nb_levels,
-                           int64_t *scaling_H, int64_t *scaling_V)
+static void reconstruction(AVCodecContext *avctx, int16_t *dest,
+                           unsigned width, unsigned height, ptrdiff_t stride,
+                           int64_t *scaling_h, int64_t *scaling_v)
 {
     PixletContext *ctx = avctx->priv_data;
     unsigned scaled_width, scaled_height;
-    int64_t scale_H, scale_V;
     int16_t *ptr, *tmp;
     int i, j, k;
 
-    scaled_height = height >> nb_levels;
-    scaled_width  = width  >> nb_levels;
-    tmp = ctx->filter[0];
+    scaled_width  = width  >> NB_LEVELS;
+    scaled_height = height >> NB_LEVELS;
+    tmp           = ctx->filter[0];
 
-    for (i = 0; i < nb_levels; i++) {
+    for (i = 0; i < NB_LEVELS; i++) {
+        int64_t scale_v = scaling_v[i];
+        int64_t scale_h = scaling_h[i];
         scaled_width  <<= 1;
         scaled_height <<= 1;
-        scale_H = scaling_H[i];
-        scale_V = scaling_V[i];
 
         ptr = dest;
         for (j = 0; j < scaled_height; j++) {
-            filterfn(ptr, ctx->filter[1], scaled_width, scale_V);
+            filterfn(ptr, ctx->filter[1], scaled_width, scale_v);
             ptr += stride;
         }
 
@@ -441,10 +448,10 @@
             ptr = dest + j;
             for (k = 0; k < scaled_height; k++) {
                 tmp[k] = *ptr;
-                ptr += stride;
+                ptr   += stride;
             }
 
-            filterfn(tmp, ctx->filter[1], scaled_height, scale_H);
+            filterfn(tmp, ctx->filter[1], scaled_height, scale_h);
 
             ptr = dest + j;
             for (k = 0; k < scaled_height; k++) {
@@ -501,11 +508,12 @@
     }
 }
 
-static int decode_plane(AVCodecContext *avctx, int plane, AVPacket *avpkt, AVFrame *frame)
+static int decode_plane(AVCodecContext *avctx, int plane,
+                        AVPacket *avpkt, AVFrame *frame)
 {
     PixletContext *ctx = avctx->priv_data;
-    ptrdiff_t stride = frame->linesize[plane] / 2;
-    unsigned shift = plane > 0;
+    ptrdiff_t stride   = frame->linesize[plane] / 2;
+    unsigned shift     = plane > 0;
     int16_t *dst;
     int i, ret;
 
@@ -522,22 +530,28 @@
 
     bytestream2_skip(&ctx->gb, 4);
 
-    dst = (int16_t *)frame->data[plane];
+    dst    = (int16_t *)frame->data[plane];
     dst[0] = sign_extend(bytestream2_get_be16(&ctx->gb), 16);
 
-    if ((ret = init_get_bits8(&ctx->gbit, avpkt->data + bytestream2_tell(&ctx->gb),
-                              bytestream2_get_bytes_left(&ctx->gb))) < 0)
+    ret = init_get_bits8(&ctx->bc, avpkt->data + bytestream2_tell(&ctx->gb),
+                         bytestream2_get_bytes_left(&ctx->gb));
+    if (ret < 0)
         return ret;
 
-    ret = read_low_coeffs(avctx, dst + 1, ctx->band[plane][0].width - 1, ctx->band[plane][0].width - 1, 0);
+    ret = read_low_coeffs(avctx, dst + 1, ctx->band[plane][0].width - 1,
+                          ctx->band[plane][0].width - 1, 0);
     if (ret < 0) {
-        av_log(avctx, AV_LOG_ERROR, "error in lowpass coefficients for plane %d, top row\n", plane);
+        av_log(avctx, AV_LOG_ERROR,
+               "error in lowpass coefficients for plane %d, top row\n", plane);
         return ret;
     }
 
-    ret = read_low_coeffs(avctx, dst + stride, ctx->band[plane][0].height - 1, 1, stride);
+    ret = read_low_coeffs(avctx, dst + stride,
+                          ctx->band[plane][0].height - 1, 1, stride);
     if (ret < 0) {
-        av_log(avctx, AV_LOG_ERROR, "error in lowpass coefficients for plane %d, left column\n", plane);
+        av_log(avctx, AV_LOG_ERROR,
+               "error in lowpass coefficients for plane %d, left column\n",
+               plane);
         return ret;
     }
 
@@ -545,7 +559,8 @@
                           (ctx->band[plane][0].width - 1) * (ctx->band[plane][0].height - 1),
                           ctx->band[plane][0].width - 1, stride);
     if (ret < 0) {
-        av_log(avctx, AV_LOG_ERROR, "error in lowpass coefficients for plane %d, rest\n", plane);
+        av_log(avctx, AV_LOG_ERROR,
+               "error in lowpass coefficients for plane %d, rest\n", plane);
         return ret;
     }
 
@@ -559,11 +574,12 @@
     if (ret < 0)
         return ret;
 
-    lowpass_prediction(dst, ctx->prediction,
-                       ctx->band[plane][0].width, ctx->band[plane][0].height, stride);
+    lowpass_prediction(dst, ctx->prediction, ctx->band[plane][0].width,
+                       ctx->band[plane][0].height, stride);
 
-    reconstruction(avctx, (int16_t *)frame->data[plane], ctx->w >> shift, ctx->h >> shift,
-                   stride, NB_LEVELS, ctx->scaling[plane][H], ctx->scaling[plane][V]);
+    reconstruction(avctx, (int16_t *)frame->data[plane], ctx->w >> shift,
+                   ctx->h >> shift, stride, ctx->scaling[plane][H],
+                   ctx->scaling[plane][V]);
 
     return 0;
 }
@@ -664,14 +680,15 @@
 {
     PixletContext *ctx = avctx->priv_data;
 
-    ctx->filter[0] = NULL;
-    ctx->filter[1] = NULL;
+    ctx->filter[0]  = NULL;
+    ctx->filter[1]  = NULL;
     ctx->prediction = NULL;
-    ctx->w = ctx->h = 0;
+    ctx->w = 0;
+    ctx->h = 0;
 
     return 0;
 }
-#endif
+#endif /* HAVE_THREADS */
 
 AVCodec ff_pixlet_decoder = {
     .name             = "pixlet",

diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index 0d6612c..0114468 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c

@@ -25,6 +25,7 @@
 #include "libavutil/bprint.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/stereo3d.h"
+#include "libavutil/mastering_display_metadata.h"
 
 #include "avcodec.h"
 #include "bytestream.h"
@@ -445,7 +446,7 @@
         return AVERROR_EXTERNAL;
     zstream.next_in  = (unsigned char *)data;
     zstream.avail_in = data_end - data;
-    av_bprint_init(bp, 0, -1);
+    av_bprint_init(bp, 0, AV_BPRINT_SIZE_UNLIMITED);
 
     while (zstream.avail_in > 0) {
         av_bprint_get_buffer(bp, 2, &buf, &buf_size);
@@ -523,9 +524,9 @@
         if ((ret = decode_zbuf(&bp, data, data_end)) < 0)
             return ret;
         text_len = bp.len;
-        av_bprint_finalize(&bp, (char **)&text);
-        if (!text)
-            return AVERROR(ENOMEM);
+        ret = av_bprint_finalize(&bp, (char **)&text);
+        if (ret < 0)
+            return ret;
     } else {
         text = (uint8_t *)data;
         text_len = data_end - text;
@@ -661,10 +662,10 @@
                 s->color_type == PNG_COLOR_TYPE_GRAY_ALPHA) {
             avctx->pix_fmt = AV_PIX_FMT_YA16BE;
         } else {
-            av_log(avctx, AV_LOG_ERROR, "unsupported bit depth %d "
-                    "and color type %d\n",
-                    s->bit_depth, s->color_type);
-            return AVERROR_INVALIDDATA;
+            avpriv_report_missing_feature(avctx,
+                                          "Bit depth %d color type %d",
+                                          s->bit_depth, s->color_type);
+            return AVERROR_PATCHWELCOME;
         }
 
         if (s->has_trns && s->color_type != PNG_COLOR_TYPE_PALETTE) {
@@ -861,7 +862,9 @@
     if ((ret = decode_zbuf(&bp, s->gb.buffer, s->gb.buffer + length)) < 0)
         return ret;
 
-    av_bprint_finalize(&bp, (char **)&data);
+    ret = av_bprint_finalize(&bp, (char **)&data);
+    if (ret < 0)
+        return ret;
 
     sd = av_frame_new_side_data(f, AV_FRAME_DATA_ICC_PROFILE, bp.len);
     if (!sd) {
@@ -1165,7 +1168,7 @@
     AVDictionary **metadatap = NULL;
     uint32_t tag, length;
     int decode_next_dat = 0;
-    int ret;
+    int i, ret;
 
     for (;;) {
         length = bytestream2_get_bytes_left(&s->gb);
@@ -1287,6 +1290,42 @@
                 goto fail;
             break;
         }
+        case MKTAG('c', 'H', 'R', 'M'): {
+            AVMasteringDisplayMetadata *mdm = av_mastering_display_metadata_create_side_data(p);
+            if (!mdm) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+
+            mdm->white_point[0] = av_make_q(bytestream2_get_be32(&s->gb), 100000);
+            mdm->white_point[1] = av_make_q(bytestream2_get_be32(&s->gb), 100000);
+
+            /* RGB Primaries */
+            for (i = 0; i < 3; i++) {
+                mdm->display_primaries[i][0] = av_make_q(bytestream2_get_be32(&s->gb), 100000);
+                mdm->display_primaries[i][1] = av_make_q(bytestream2_get_be32(&s->gb), 100000);
+            }
+
+            mdm->has_primaries = 1;
+            bytestream2_skip(&s->gb, 4); /* crc */
+            break;
+        }
+        case MKTAG('g', 'A', 'M', 'A'): {
+            AVBPrint bp;
+            char *gamma_str;
+            int num = bytestream2_get_be32(&s->gb);
+
+            av_bprint_init(&bp, 0, AV_BPRINT_SIZE_UNLIMITED);
+            av_bprintf(&bp, "%i/%i", num, 100000);
+            ret = av_bprint_finalize(&bp, &gamma_str);
+            if (ret < 0)
+                return ret;
+
+            av_dict_set(&p->metadata, "gamma", gamma_str, AV_DICT_DONT_STRDUP_VAL);
+
+            bytestream2_skip(&s->gb, 4); /* crc */
+            break;
+        }
         case MKTAG('I', 'E', 'N', 'D'):
             if (!(s->pic_state & PNG_ALLIMAGE))
                 av_log(avctx, AV_LOG_ERROR, "IEND without all image\n");

diff --git a/libavcodec/pnm_parser.c b/libavcodec/pnm_parser.c
index bdd7f2b..9bf1fdc 100644
--- a/libavcodec/pnm_parser.c
+++ b/libavcodec/pnm_parser.c

@@ -32,6 +32,7 @@
     ParseContext *pc = s->priv_data;
     PNMContext pnmctx;
     int next;
+    int skip = 0;
 
     for (; pc->overread > 0; pc->overread--) {
         pc->buffer[pc->index++]= pc->buffer[pc->overread_index++];
@@ -43,16 +44,17 @@
         pnmctx.bytestream_end   = pc->buffer + pc->index;
     } else {
         pnmctx.bytestream_start =
-        pnmctx.bytestream       = (uint8_t *) buf; /* casts avoid warnings */
-        pnmctx.bytestream_end   = (uint8_t *) buf + buf_size;
+        pnmctx.bytestream       = (uint8_t *) buf + skip; /* casts avoid warnings */
+        pnmctx.bytestream_end   = (uint8_t *) buf + buf_size - skip;
     }
     if (ff_pnm_decode_header(avctx, &pnmctx) < 0) {
         if (pnmctx.bytestream < pnmctx.bytestream_end) {
             if (pc->index) {
                 pc->index = 0;
             } else {
-                buf++;
-                buf_size--;
+                unsigned step = FFMAX(1, pnmctx.bytestream - pnmctx.bytestream_start);
+
+                skip += step;
             }
             goto retry;
         }
@@ -60,9 +62,9 @@
     } else if (pnmctx.type < 4) {
         next = END_NOT_FOUND;
     } else {
-        next = pnmctx.bytestream - pnmctx.bytestream_start
+        next = pnmctx.bytestream - pnmctx.bytestream_start + skip
                + av_image_get_buffer_size(avctx->pix_fmt, avctx->width, avctx->height, 1);
-        if (pnmctx.bytestream_start != buf)
+        if (pnmctx.bytestream_start != buf + skip)
             next -= pc->index;
         if (next > buf_size)
             next = END_NOT_FOUND;

diff --git a/libavcodec/ppc/fft_init.c b/libavcodec/ppc/fft_init.c
index 57d7c80..733e58b 100644
--- a/libavcodec/ppc/fft_init.c
+++ b/libavcodec/ppc/fft_init.c

@@ -42,7 +42,7 @@
 void ff_fft_calc_interleave_altivec(FFTContext *s, FFTComplex *z);
 #endif
 
-#if HAVE_GNU_AS && HAVE_ALTIVEC
+#if HAVE_GNU_AS && HAVE_ALTIVEC && (HAVE_BIGENDIAN || HAVE_VSX)
 static void imdct_half_altivec(FFTContext *s, FFTSample *output, const FFTSample *input)
 {
     int j, k;
@@ -146,11 +146,11 @@
         p1[k]    = vec_perm(b, b, vcprm(3,2,1,0));
     }
 }
-#endif /* HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN */
+#endif /* HAVE_GNU_AS && HAVE_ALTIVEC && (HAVE_BIGENDIAN || HAVE_VSX) */
 
 av_cold void ff_fft_init_ppc(FFTContext *s)
 {
-#if HAVE_GNU_AS && HAVE_ALTIVEC
+#if HAVE_GNU_AS && HAVE_ALTIVEC && (HAVE_BIGENDIAN || HAVE_VSX)
     if (!PPC_ALTIVEC(av_get_cpu_flags()))
         return;
 

diff --git a/libavcodec/ppc/h264dsp.c b/libavcodec/ppc/h264dsp.c
index e84a058..f510544 100644
--- a/libavcodec/ppc/h264dsp.c
+++ b/libavcodec/ppc/h264dsp.c

@@ -771,12 +771,12 @@
 }
 
 #define H264_WEIGHT(W) \
-static void weight_h264_pixels ## W ## _altivec(uint8_t *block, int stride, int height, \
+static void weight_h264_pixels ## W ## _altivec(uint8_t *block, ptrdiff_t stride, int height, \
                                                 int log2_denom, int weight, int offset) \
 { \
     weight_h264_W_altivec(block, stride, height, log2_denom, weight, offset, W); \
 }\
-static void biweight_h264_pixels ## W ## _altivec(uint8_t *dst, uint8_t *src, int stride, int height, \
+static void biweight_h264_pixels ## W ## _altivec(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, \
                                                   int log2_denom, int weightd, int weights, int offset) \
 { \
     biweight_h264_W_altivec(dst, src, stride, height, log2_denom, weightd, weights, offset, W); \

diff --git a/libavcodec/ppc/hevcdsp.c b/libavcodec/ppc/hevcdsp.c
index 4b1037d..dcae433 100644
--- a/libavcodec/ppc/hevcdsp.c
+++ b/libavcodec/ppc/hevcdsp.c

@@ -41,8 +41,9 @@
     { 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, 0x0F, 0x16, 0x17, 0x1E, 0x1F },
 };
 
-static void transform4x4(vec_s16 src_01, vec_s16 src_23, vec_s32 res[4],
-                         const int shift, int16_t *coeffs)
+static av_always_inline void transform4x4(vec_s16 src_01, vec_s16 src_23,
+                                          vec_s32 res[4], const int shift,
+                                          int16_t *coeffs)
 {
     vec_s16 src_02, src_13;
     vec_s32 zero = vec_splat_s32(0);
@@ -67,7 +68,8 @@
     res[3] = vec_sub(e0, o0);
 }
 
-static void scale(vec_s32 res[4], vec_s16 res_packed[2], int shift)
+static av_always_inline void scale(vec_s32 res[4], vec_s16 res_packed[2],
+                                   const int shift)
 {
     int i;
     vec_u32 v_shift = vec_splat_u32(shift);

diff --git a/libavcodec/ppc/hpeldsp_altivec.c b/libavcodec/ppc/hpeldsp_altivec.c
index 4f19521..a531b6b 100644
--- a/libavcodec/ppc/hpeldsp_altivec.c
+++ b/libavcodec/ppc/hpeldsp_altivec.c

@@ -84,7 +84,7 @@
 /* next one assumes that ((line_size % 8) == 0) */
 static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h)
 {
-    register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
+    register vector unsigned char pixelsv, blockv;
     int i;
 
    for (i = 0; i < h; i++) {

diff --git a/libavcodec/ppc/svq1enc_altivec.c b/libavcodec/ppc/svq1enc_altivec.c
index f63f086..aa66b40 100644
--- a/libavcodec/ppc/svq1enc_altivec.c
+++ b/libavcodec/ppc/svq1enc_altivec.c

@@ -31,7 +31,7 @@
 
 #if HAVE_ALTIVEC
 static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
-                                     int size)
+                                     intptr_t size)
 {
     int i, size16 = size >> 4;
     vector signed char vpix1;

diff --git a/libavcodec/profiles.c b/libavcodec/profiles.c
index 30498ef..c31399f 100644
--- a/libavcodec/profiles.c
+++ b/libavcodec/profiles.c

@@ -30,7 +30,6 @@
     { FF_PROFILE_AAC_LD,    "LD"       },
     { FF_PROFILE_AAC_ELD,   "ELD"      },
     { FF_PROFILE_AAC_MAIN,  "Main" },
-    { FF_PROFILE_AAC_LOW,   "LC"   },
     { FF_PROFILE_AAC_SSR,   "SSR"  },
     { FF_PROFILE_AAC_LTP,   "LTP"  },
     { FF_PROFILE_UNKNOWN },
@@ -140,4 +139,16 @@
     { FF_PROFILE_UNKNOWN },
 };
 
+const AVProfile ff_av1_profiles[] = {
+    { FF_PROFILE_AV1_MAIN,         "Main" },
+    { FF_PROFILE_AV1_HIGH,         "High" },
+    { FF_PROFILE_AV1_PROFESSIONAL, "Professional" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_sbc_profiles[] = {
+    { FF_PROFILE_SBC_MSBC, "mSBC" },
+    { FF_PROFILE_UNKNOWN },
+};
+
 #endif /* !CONFIG_SMALL */

diff --git a/libavcodec/profiles.h b/libavcodec/profiles.h
index eb18b40..9d7e211 100644
--- a/libavcodec/profiles.h
+++ b/libavcodec/profiles.h

@@ -31,5 +31,7 @@
 extern const AVProfile ff_mpeg4_video_profiles[];
 extern const AVProfile ff_vc1_profiles[];
 extern const AVProfile ff_vp9_profiles[];
+extern const AVProfile ff_av1_profiles[];
+extern const AVProfile ff_sbc_profiles[];
 
 #endif /* AVCODEC_PROFILES_H */

diff --git a/libavcodec/proresdec2.c b/libavcodec/proresdec2.c
index 2ebda78..d818e5d 100644
--- a/libavcodec/proresdec2.c
+++ b/libavcodec/proresdec2.c

@@ -36,6 +36,7 @@
 #include "simple_idct.h"
 #include "proresdec.h"
 #include "proresdata.h"
+#include "thread.h"
 
 static void permute(uint8_t *dst, const uint8_t *src, const uint8_t permutation[64])
 {
@@ -117,6 +118,11 @@
         avctx->pix_fmt = (buf[12] & 0xC0) == 0xC0 ? AV_PIX_FMT_YUV444P10 : AV_PIX_FMT_YUV422P10;
     }
 
+    avctx->color_primaries = buf[14];
+    avctx->color_trc       = buf[15];
+    avctx->colorspace      = buf[16];
+    avctx->color_range     = AVCOL_RANGE_MPEG;
+
     ptr   = buf + 20;
     flags = buf[19];
     ff_dlog(avctx, "flags %x\n", flags);
@@ -598,8 +604,9 @@
     }
     else {
         size_t mb_max_x = slice->mb_count << (mb_x_shift - 1);
-        for (size_t i = 0; i < 16; ++i)
-            for (size_t j = 0; j < mb_max_x; ++j) {
+        size_t i, j;
+        for (i = 0; i < 16; ++i)
+            for (j = 0; j < mb_max_x; ++j) {
                 *(uint16_t*)(dest_u + (i * chroma_stride) + (j << 1)) = 511;
                 *(uint16_t*)(dest_v + (i * chroma_stride) + (j << 1)) = 511;
             }
@@ -638,6 +645,7 @@
                         AVPacket *avpkt)
 {
     ProresContext *ctx = avctx->priv_data;
+    ThreadFrame tframe = { .f = data };
     AVFrame *frame = data;
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
@@ -663,7 +671,7 @@
     buf += frame_hdr_size;
     buf_size -= frame_hdr_size;
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+    if ((ret = ff_thread_get_buffer(avctx, &tframe, 0)) < 0)
         return ret;
 
  decode_picture:
@@ -691,6 +699,17 @@
     return avpkt->size;
 }
 
+#if HAVE_THREADS
+static int decode_init_thread_copy(AVCodecContext *avctx)
+{
+    ProresContext *ctx = avctx->priv_data;
+
+    ctx->slices = NULL;
+
+    return 0;
+}
+#endif
+
 static av_cold int decode_close(AVCodecContext *avctx)
 {
     ProresContext *ctx = avctx->priv_data;
@@ -702,12 +721,13 @@
 
 AVCodec ff_prores_decoder = {
     .name           = "prores",
-    .long_name      = NULL_IF_CONFIG_SMALL("ProRes"),
+    .long_name      = NULL_IF_CONFIG_SMALL("ProRes (iCodec Pro)"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_PRORES,
     .priv_data_size = sizeof(ProresContext),
     .init           = decode_init,
+    .init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
     .close          = decode_close,
     .decode         = decode_frame,
-    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
 };

diff --git a/libavcodec/proresdec_lgpl.c b/libavcodec/proresdec_lgpl.c
deleted file mode 100644
index bc5bdb5..0000000
--- a/libavcodec/proresdec_lgpl.c
+++ /dev/null

@@ -1,784 +0,0 @@
-/*
- * Apple ProRes compatible decoder
- *
- * Copyright (c) 2010-2011 Maxim Poliakovski
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * This is a decoder for Apple ProRes 422 SD/HQ/LT/Proxy and ProRes 4444.
- * It is used for storing and editing high definition video data in Apple's Final Cut Pro.
- *
- * @see http://wiki.multimedia.cx/index.php?title=Apple_ProRes
- */
-
-#define LONG_BITSTREAM_READER // some ProRes vlc codes require up to 28 bits to be read at once
-
-#include <stdint.h>
-
-#include "libavutil/intmath.h"
-#include "avcodec.h"
-#include "idctdsp.h"
-#include "internal.h"
-#include "proresdata.h"
-#include "proresdsp.h"
-#include "get_bits.h"
-
-typedef struct ProresThreadData {
-    const uint8_t *index;            ///< pointers to the data of this slice
-    int slice_num;
-    int x_pos, y_pos;
-    int slice_width;
-    int prev_slice_sf;               ///< scalefactor of the previous decoded slice
-    DECLARE_ALIGNED(16, int16_t, blocks)[8 * 4 * 64];
-    DECLARE_ALIGNED(16, int16_t, qmat_luma_scaled)[64];
-    DECLARE_ALIGNED(16, int16_t, qmat_chroma_scaled)[64];
-} ProresThreadData;
-
-typedef struct ProresContext {
-    ProresDSPContext dsp;
-    AVFrame    *frame;
-    ScanTable  scantable;
-    int        scantable_type;           ///< -1 = uninitialized, 0 = progressive, 1/2 = interlaced
-
-    int        frame_type;               ///< 0 = progressive, 1 = top-field first, 2 = bottom-field first
-    int        pic_format;               ///< 2 = 422, 3 = 444
-    uint8_t    qmat_luma[64];            ///< dequantization matrix for luma
-    uint8_t    qmat_chroma[64];          ///< dequantization matrix for chroma
-    int        qmat_changed;             ///< 1 - global quantization matrices changed
-    int        total_slices;            ///< total number of slices in a picture
-    ProresThreadData *slice_data;
-    int        pic_num;
-    int        chroma_factor;
-    int        mb_chroma_factor;
-    int        num_chroma_blocks;       ///< number of chrominance blocks in a macroblock
-    int        num_x_slices;
-    int        num_y_slices;
-    int        slice_width_factor;
-    int        slice_height_factor;
-    int        num_x_mbs;
-    int        num_y_mbs;
-    int        alpha_info;
-} ProresContext;
-
-
-static av_cold int decode_init(AVCodecContext *avctx)
-{
-    ProresContext *ctx = avctx->priv_data;
-
-    ctx->total_slices     = 0;
-    ctx->slice_data       = NULL;
-
-    avctx->bits_per_raw_sample = PRORES_BITS_PER_SAMPLE;
-    ff_proresdsp_init(&ctx->dsp, avctx);
-
-    ctx->scantable_type = -1;   // set scantable type to uninitialized
-    memset(ctx->qmat_luma, 4, 64);
-    memset(ctx->qmat_chroma, 4, 64);
-
-    return 0;
-}
-
-
-static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
-                               const int data_size, AVCodecContext *avctx)
-{
-    int hdr_size, version, width, height, flags;
-    const uint8_t *ptr;
-
-    hdr_size = AV_RB16(buf);
-    if (hdr_size > data_size) {
-        av_log(avctx, AV_LOG_ERROR, "frame data too small\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    version = AV_RB16(buf + 2);
-    if (version >= 2) {
-        av_log(avctx, AV_LOG_ERROR,
-               "unsupported header version: %d\n", version);
-        return AVERROR_INVALIDDATA;
-    }
-
-    width  = AV_RB16(buf + 8);
-    height = AV_RB16(buf + 10);
-    if (width != avctx->width || height != avctx->height) {
-        av_log(avctx, AV_LOG_ERROR,
-               "picture dimension changed: old: %d x %d, new: %d x %d\n",
-               avctx->width, avctx->height, width, height);
-        return AVERROR_INVALIDDATA;
-    }
-
-    ctx->frame_type = (buf[12] >> 2) & 3;
-    if (ctx->frame_type > 2) {
-        av_log(avctx, AV_LOG_ERROR,
-               "unsupported frame type: %d\n", ctx->frame_type);
-        return AVERROR_INVALIDDATA;
-    }
-
-    ctx->chroma_factor     = (buf[12] >> 6) & 3;
-    ctx->mb_chroma_factor  = ctx->chroma_factor + 2;
-    ctx->num_chroma_blocks = (1 << ctx->chroma_factor) >> 1;
-    ctx->alpha_info        = buf[17] & 0xf;
-
-    if (ctx->alpha_info > 2) {
-        av_log(avctx, AV_LOG_ERROR, "Invalid alpha mode %d\n", ctx->alpha_info);
-        return AVERROR_INVALIDDATA;
-    }
-    if (avctx->skip_alpha) ctx->alpha_info = 0;
-
-    switch (ctx->chroma_factor) {
-    case 2:
-        avctx->pix_fmt = ctx->alpha_info ? AV_PIX_FMT_YUVA422P10
-                                         : AV_PIX_FMT_YUV422P10;
-        break;
-    case 3:
-        avctx->pix_fmt = ctx->alpha_info ? AV_PIX_FMT_YUVA444P10
-                                         : AV_PIX_FMT_YUV444P10;
-        break;
-    default:
-        av_log(avctx, AV_LOG_ERROR,
-               "unsupported picture format: %d\n", ctx->pic_format);
-        return AVERROR_INVALIDDATA;
-    }
-
-    if (ctx->scantable_type != ctx->frame_type) {
-        if (!ctx->frame_type)
-            ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable,
-                              ff_prores_progressive_scan);
-        else
-            ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable,
-                              ff_prores_interlaced_scan);
-        ctx->scantable_type = ctx->frame_type;
-    }
-
-    if (ctx->frame_type) {      /* if interlaced */
-        ctx->frame->interlaced_frame = 1;
-        ctx->frame->top_field_first  = ctx->frame_type & 1;
-    } else {
-        ctx->frame->interlaced_frame = 0;
-    }
-
-    avctx->color_primaries = buf[14];
-    avctx->color_trc       = buf[15];
-    avctx->colorspace      = buf[16];
-
-    ctx->qmat_changed = 0;
-    ptr   = buf + 20;
-    flags = buf[19];
-    if (flags & 2) {
-        if (ptr - buf > hdr_size - 64) {
-            av_log(avctx, AV_LOG_ERROR, "header data too small\n");
-            return AVERROR_INVALIDDATA;
-        }
-        if (memcmp(ctx->qmat_luma, ptr, 64)) {
-            memcpy(ctx->qmat_luma, ptr, 64);
-            ctx->qmat_changed = 1;
-        }
-        ptr += 64;
-    } else {
-        memset(ctx->qmat_luma, 4, 64);
-        ctx->qmat_changed = 1;
-    }
-
-    if (flags & 1) {
-        if (ptr - buf > hdr_size - 64) {
-            av_log(avctx, AV_LOG_ERROR, "header data too small\n");
-            return -1;
-        }
-        if (memcmp(ctx->qmat_chroma, ptr, 64)) {
-            memcpy(ctx->qmat_chroma, ptr, 64);
-            ctx->qmat_changed = 1;
-        }
-    } else {
-        memset(ctx->qmat_chroma, 4, 64);
-        ctx->qmat_changed = 1;
-    }
-
-    return hdr_size;
-}
-
-
-static int decode_picture_header(ProresContext *ctx, const uint8_t *buf,
-                                 const int data_size, AVCodecContext *avctx)
-{
-    int   i, hdr_size, pic_data_size, num_slices;
-    int   slice_width_factor, slice_height_factor;
-    int   remainder, num_x_slices;
-    const uint8_t *data_ptr, *index_ptr;
-
-    hdr_size = data_size > 0 ? buf[0] >> 3 : 0;
-    if (hdr_size < 8 || hdr_size > data_size) {
-        av_log(avctx, AV_LOG_ERROR, "picture header too small\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    pic_data_size = AV_RB32(buf + 1);
-    if (pic_data_size > data_size) {
-        av_log(avctx, AV_LOG_ERROR, "picture data too small\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    slice_width_factor  = buf[7] >> 4;
-    slice_height_factor = buf[7] & 0xF;
-    if (slice_width_factor > 3 || slice_height_factor) {
-        av_log(avctx, AV_LOG_ERROR,
-               "unsupported slice dimension: %d x %d\n",
-               1 << slice_width_factor, 1 << slice_height_factor);
-        return AVERROR_INVALIDDATA;
-    }
-
-    ctx->slice_width_factor  = slice_width_factor;
-    ctx->slice_height_factor = slice_height_factor;
-
-    ctx->num_x_mbs = (avctx->width + 15) >> 4;
-    ctx->num_y_mbs = (avctx->height +
-                      (1 << (4 + ctx->frame->interlaced_frame)) - 1) >>
-                     (4 + ctx->frame->interlaced_frame);
-
-    remainder    = av_mod_uintp2(ctx->num_x_mbs, slice_width_factor);
-    num_x_slices = (ctx->num_x_mbs >> slice_width_factor) + (remainder & 1) +
-                   ((remainder >> 1) & 1) + ((remainder >> 2) & 1);
-
-    num_slices = num_x_slices * ctx->num_y_mbs;
-    if (num_slices != AV_RB16(buf + 5)) {
-        av_log(avctx, AV_LOG_ERROR, "invalid number of slices\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    if (ctx->total_slices != num_slices) {
-        av_freep(&ctx->slice_data);
-        ctx->slice_data = av_malloc_array(num_slices + 1, sizeof(ctx->slice_data[0]));
-        if (!ctx->slice_data)
-            return AVERROR(ENOMEM);
-        ctx->total_slices = num_slices;
-    }
-
-    if (hdr_size + num_slices * 2 > data_size) {
-        av_log(avctx, AV_LOG_ERROR, "slice table too small\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    /* parse slice table allowing quick access to the slice data */
-    index_ptr = buf + hdr_size;
-    data_ptr = index_ptr + num_slices * 2;
-
-    for (i = 0; i < num_slices; i++) {
-        ctx->slice_data[i].index = data_ptr;
-        ctx->slice_data[i].prev_slice_sf = 0;
-        data_ptr += AV_RB16(index_ptr + i * 2);
-    }
-    ctx->slice_data[i].index = data_ptr;
-    ctx->slice_data[i].prev_slice_sf = 0;
-
-    if (data_ptr > buf + data_size) {
-        av_log(avctx, AV_LOG_ERROR, "out of slice data\n");
-        return -1;
-    }
-
-    return pic_data_size;
-}
-
-
-/**
- * Read an unsigned rice/exp golomb codeword.
- */
-static inline int decode_vlc_codeword(GetBitContext *gb, unsigned codebook)
-{
-    unsigned int rice_order, exp_order, switch_bits;
-    unsigned int buf, code;
-    int log, prefix_len, len;
-
-    OPEN_READER(re, gb);
-    UPDATE_CACHE(re, gb);
-    buf = GET_CACHE(re, gb);
-
-    /* number of prefix bits to switch between Rice and expGolomb */
-    switch_bits = (codebook & 3) + 1;
-    rice_order  = codebook >> 5;        /* rice code order */
-    exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
-
-    log = 31 - av_log2(buf); /* count prefix bits (zeroes) */
-
-    if (log < switch_bits) { /* ok, we got a rice code */
-        if (!rice_order) {
-            /* shortcut for faster decoding of rice codes without remainder */
-            code = log;
-            LAST_SKIP_BITS(re, gb, log + 1);
-        } else {
-            prefix_len = log + 1;
-            code = (log << rice_order) + NEG_USR32(buf << prefix_len, rice_order);
-            LAST_SKIP_BITS(re, gb, prefix_len + rice_order);
-        }
-    } else { /* otherwise we got a exp golomb code */
-        len  = (log << 1) - switch_bits + exp_order + 1;
-        code = NEG_USR32(buf, len) - (1 << exp_order) + (switch_bits << rice_order);
-        LAST_SKIP_BITS(re, gb, len);
-    }
-
-    CLOSE_READER(re, gb);
-
-    return code;
-}
-
-#define LSB2SIGN(x) (-((x) & 1))
-#define TOSIGNED(x) (((x) >> 1) ^ LSB2SIGN(x))
-
-/**
- * Decode DC coefficients for all blocks in a slice.
- */
-static inline void decode_dc_coeffs(GetBitContext *gb, int16_t *out,
-                                    int nblocks)
-{
-    int16_t prev_dc;
-    int     i, sign;
-    int16_t delta;
-    unsigned int code;
-
-    code   = decode_vlc_codeword(gb, FIRST_DC_CB);
-    out[0] = prev_dc = TOSIGNED(code);
-
-    out   += 64; /* move to the DC coeff of the next block */
-    delta  = 3;
-
-    for (i = 1; i < nblocks; i++, out += 64) {
-        code = decode_vlc_codeword(gb, ff_prores_dc_codebook[FFMIN(FFABS(delta), 3)]);
-
-        sign     = -(((delta >> 15) & 1) ^ (code & 1));
-        delta    = (((code + 1) >> 1) ^ sign) - sign;
-        prev_dc += delta;
-        out[0]   = prev_dc;
-    }
-}
-
-#define MAX_PADDING 16
-
-/**
- * Decode AC coefficients for all blocks in a slice.
- */
-static inline int decode_ac_coeffs(GetBitContext *gb, int16_t *out,
-                                   int blocks_per_slice,
-                                   int plane_size_factor,
-                                   const uint8_t *scan)
-{
-    int pos, block_mask, run, level, sign, run_cb_index, lev_cb_index;
-    int max_coeffs, bits_left;
-
-    /* set initial prediction values */
-    run   = 4;
-    level = 2;
-
-    max_coeffs = blocks_per_slice << 6;
-    block_mask = blocks_per_slice - 1;
-
-    for (pos = blocks_per_slice - 1; pos < max_coeffs;) {
-        run_cb_index = ff_prores_run_to_cb_index[FFMIN(run, 15)];
-        lev_cb_index = ff_prores_lev_to_cb_index[FFMIN(level, 9)];
-
-        bits_left = get_bits_left(gb);
-        if (bits_left <= 0 || (bits_left <= MAX_PADDING && !show_bits(gb, bits_left)))
-            return 0;
-
-        run = decode_vlc_codeword(gb, ff_prores_ac_codebook[run_cb_index]);
-        if (run < 0)
-            return AVERROR_INVALIDDATA;
-
-        bits_left = get_bits_left(gb);
-        if (bits_left <= 0 || (bits_left <= MAX_PADDING && !show_bits(gb, bits_left)))
-            return AVERROR_INVALIDDATA;
-
-        level = decode_vlc_codeword(gb, ff_prores_ac_codebook[lev_cb_index]) + 1;
-        if (level < 0)
-            return AVERROR_INVALIDDATA;
-
-        pos += run + 1;
-        if (pos >= max_coeffs)
-            break;
-
-        sign = get_sbits(gb, 1);
-        out[((pos & block_mask) << 6) + scan[pos >> plane_size_factor]] =
-            (level ^ sign) - sign;
-    }
-
-    return 0;
-}
-
-
-/**
- * Decode a slice plane (luma or chroma).
- */
-static int decode_slice_plane(ProresContext *ctx, ProresThreadData *td,
-                              const uint8_t *buf,
-                              int data_size, uint16_t *out_ptr,
-                              int linesize, int mbs_per_slice,
-                              int blocks_per_mb, int plane_size_factor,
-                              const int16_t *qmat, int is_chroma)
-{
-    GetBitContext gb;
-    int16_t *block_ptr;
-    int mb_num, blocks_per_slice, ret;
-
-    blocks_per_slice = mbs_per_slice * blocks_per_mb;
-
-    memset(td->blocks, 0, 8 * 4 * 64 * sizeof(*td->blocks));
-
-    init_get_bits(&gb, buf, data_size << 3);
-
-    decode_dc_coeffs(&gb, td->blocks, blocks_per_slice);
-
-    ret = decode_ac_coeffs(&gb, td->blocks, blocks_per_slice,
-                           plane_size_factor, ctx->scantable.permutated);
-    if (ret < 0)
-        return ret;
-
-    /* inverse quantization, inverse transform and output */
-    block_ptr = td->blocks;
-
-    if (!is_chroma) {
-        for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += blocks_per_mb * 4) {
-            ctx->dsp.idct_put(out_ptr,                    linesize, block_ptr, qmat);
-            block_ptr += 64;
-            if (blocks_per_mb > 2) {
-                ctx->dsp.idct_put(out_ptr + 8,            linesize, block_ptr, qmat);
-                block_ptr += 64;
-            }
-            ctx->dsp.idct_put(out_ptr + linesize * 4,     linesize, block_ptr, qmat);
-            block_ptr += 64;
-            if (blocks_per_mb > 2) {
-                ctx->dsp.idct_put(out_ptr + linesize * 4 + 8, linesize, block_ptr, qmat);
-                block_ptr += 64;
-            }
-        }
-    } else {
-        for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += blocks_per_mb * 4) {
-            ctx->dsp.idct_put(out_ptr,                    linesize, block_ptr, qmat);
-            block_ptr += 64;
-            ctx->dsp.idct_put(out_ptr + linesize * 4,     linesize, block_ptr, qmat);
-            block_ptr += 64;
-            if (blocks_per_mb > 2) {
-                ctx->dsp.idct_put(out_ptr + 8,            linesize, block_ptr, qmat);
-                block_ptr += 64;
-                ctx->dsp.idct_put(out_ptr + linesize * 4 + 8, linesize, block_ptr, qmat);
-                block_ptr += 64;
-            }
-        }
-    }
-    return 0;
-}
-
-
-static void unpack_alpha(GetBitContext *gb, uint16_t *dst, int num_coeffs,
-                         const int num_bits)
-{
-    const int mask = (1 << num_bits) - 1;
-    int i, idx, val, alpha_val;
-
-    idx       = 0;
-    alpha_val = mask;
-    do {
-        do {
-            if (get_bits1(gb))
-                val = get_bits(gb, num_bits);
-            else {
-                int sign;
-                val  = get_bits(gb, num_bits == 16 ? 7 : 4);
-                sign = val & 1;
-                val  = (val + 2) >> 1;
-                if (sign)
-                    val = -val;
-            }
-            alpha_val = (alpha_val + val) & mask;
-            if (num_bits == 16)
-                dst[idx++] = alpha_val >> 6;
-            else
-                dst[idx++] = (alpha_val << 2) | (alpha_val >> 6);
-            if (idx >= num_coeffs) {
-                break;
-            }
-        } while (get_bits1(gb));
-        val = get_bits(gb, 4);
-        if (!val)
-            val = get_bits(gb, 11);
-        if (idx + val > num_coeffs)
-            val = num_coeffs - idx;
-        if (num_bits == 16)
-            for (i = 0; i < val; i++)
-                dst[idx++] = alpha_val >> 6;
-        else
-            for (i = 0; i < val; i++)
-                dst[idx++] = (alpha_val << 2) | (alpha_val >> 6);
-    } while (idx < num_coeffs);
-}
-
-/**
- * Decode alpha slice plane.
- */
-static void decode_alpha_plane(ProresContext *ctx, ProresThreadData *td,
-                               const uint8_t *buf, int data_size,
-                               uint16_t *out_ptr, int linesize,
-                               int mbs_per_slice)
-{
-    GetBitContext gb;
-    int i;
-    uint16_t *block_ptr;
-
-    memset(td->blocks, 0, 8 * 4 * 64 * sizeof(*td->blocks));
-
-    init_get_bits(&gb, buf, data_size << 3);
-
-    if (ctx->alpha_info == 2)
-        unpack_alpha(&gb, td->blocks, mbs_per_slice * 4 * 64, 16);
-    else
-        unpack_alpha(&gb, td->blocks, mbs_per_slice * 4 * 64, 8);
-
-    block_ptr = td->blocks;
-
-    for (i = 0; i < 16; i++) {
-        memcpy(out_ptr, block_ptr, 16 * mbs_per_slice * sizeof(*out_ptr));
-        out_ptr   += linesize >> 1;
-        block_ptr += 16 * mbs_per_slice;
-    }
-}
-
-static int decode_slice(AVCodecContext *avctx, void *tdata)
-{
-    ProresThreadData *td = tdata;
-    ProresContext *ctx = avctx->priv_data;
-    int mb_x_pos  = td->x_pos;
-    int mb_y_pos  = td->y_pos;
-    int pic_num   = ctx->pic_num;
-    int slice_num = td->slice_num;
-    int mbs_per_slice = td->slice_width;
-    const uint8_t *buf;
-    uint8_t *y_data, *u_data, *v_data, *a_data;
-    AVFrame *pic = ctx->frame;
-    int i, sf, slice_width_factor;
-    int slice_data_size, hdr_size;
-    int y_data_size, u_data_size, v_data_size, a_data_size;
-    int y_linesize, u_linesize, v_linesize, a_linesize;
-    int coff[4];
-    int ret;
-
-    buf             = ctx->slice_data[slice_num].index;
-    slice_data_size = ctx->slice_data[slice_num + 1].index - buf;
-
-    slice_width_factor = av_log2(mbs_per_slice);
-
-    y_data     = pic->data[0];
-    u_data     = pic->data[1];
-    v_data     = pic->data[2];
-    a_data     = pic->data[3];
-    y_linesize = pic->linesize[0];
-    u_linesize = pic->linesize[1];
-    v_linesize = pic->linesize[2];
-    a_linesize = pic->linesize[3];
-
-    if (pic->interlaced_frame) {
-        if (!(pic_num ^ pic->top_field_first)) {
-            y_data += y_linesize;
-            u_data += u_linesize;
-            v_data += v_linesize;
-            if (a_data)
-                a_data += a_linesize;
-        }
-        y_linesize <<= 1;
-        u_linesize <<= 1;
-        v_linesize <<= 1;
-        a_linesize <<= 1;
-    }
-    y_data += (mb_y_pos << 4) * y_linesize + (mb_x_pos << 5);
-    u_data += (mb_y_pos << 4) * u_linesize + (mb_x_pos << ctx->mb_chroma_factor);
-    v_data += (mb_y_pos << 4) * v_linesize + (mb_x_pos << ctx->mb_chroma_factor);
-    if (a_data)
-        a_data += (mb_y_pos << 4) * a_linesize + (mb_x_pos << 5);
-
-    if (slice_data_size < 6) {
-        av_log(avctx, AV_LOG_ERROR, "slice data too small\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    /* parse slice header */
-    hdr_size    = buf[0] >> 3;
-    coff[0]     = hdr_size;
-    y_data_size = AV_RB16(buf + 2);
-    coff[1]     = coff[0] + y_data_size;
-    u_data_size = AV_RB16(buf + 4);
-    coff[2]     = coff[1] + u_data_size;
-    v_data_size = hdr_size > 7 ? AV_RB16(buf + 6) : slice_data_size - coff[2];
-    coff[3]     = coff[2] + v_data_size;
-    a_data_size = ctx->alpha_info ? slice_data_size - coff[3] : 0;
-
-    /* if V or alpha component size is negative that means that previous
-       component sizes are too large */
-    if (v_data_size < 0 || a_data_size < 0 || hdr_size < 6 || coff[3] > slice_data_size) {
-        av_log(avctx, AV_LOG_ERROR, "invalid data size\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    sf = av_clip(buf[1], 1, 224);
-    sf = sf > 128 ? (sf - 96) << 2 : sf;
-
-    /* scale quantization matrixes according with slice's scale factor */
-    /* TODO: this can be SIMD-optimized a lot */
-    if (ctx->qmat_changed || sf != td->prev_slice_sf) {
-        td->prev_slice_sf = sf;
-        for (i = 0; i < 64; i++) {
-            td->qmat_luma_scaled[ctx->dsp.idct_permutation[i]]   = ctx->qmat_luma[i]   * sf;
-            td->qmat_chroma_scaled[ctx->dsp.idct_permutation[i]] = ctx->qmat_chroma[i] * sf;
-        }
-    }
-
-    /* decode luma plane */
-    ret = decode_slice_plane(ctx, td, buf + coff[0], y_data_size,
-                             (uint16_t*) y_data, y_linesize,
-                             mbs_per_slice, 4, slice_width_factor + 2,
-                             td->qmat_luma_scaled, 0);
-
-    if (ret < 0)
-        return ret;
-
-    /* decode U chroma plane */
-    ret = decode_slice_plane(ctx, td, buf + coff[1], u_data_size,
-                             (uint16_t*) u_data, u_linesize,
-                             mbs_per_slice, ctx->num_chroma_blocks,
-                             slice_width_factor + ctx->chroma_factor - 1,
-                             td->qmat_chroma_scaled, 1);
-    if (ret < 0)
-        return ret;
-
-    /* decode V chroma plane */
-    ret = decode_slice_plane(ctx, td, buf + coff[2], v_data_size,
-                             (uint16_t*) v_data, v_linesize,
-                             mbs_per_slice, ctx->num_chroma_blocks,
-                             slice_width_factor + ctx->chroma_factor - 1,
-                             td->qmat_chroma_scaled, 1);
-    if (ret < 0)
-        return ret;
-
-    /* decode alpha plane if available */
-    if (a_data && a_data_size)
-        decode_alpha_plane(ctx, td, buf + coff[3], a_data_size,
-                           (uint16_t*) a_data, a_linesize,
-                           mbs_per_slice);
-
-    return 0;
-}
-
-
-static int decode_picture(ProresContext *ctx, int pic_num,
-                          AVCodecContext *avctx)
-{
-    int slice_num, slice_width, x_pos, y_pos;
-
-    slice_num = 0;
-
-    ctx->pic_num = pic_num;
-    for (y_pos = 0; y_pos < ctx->num_y_mbs; y_pos++) {
-        slice_width = 1 << ctx->slice_width_factor;
-
-        for (x_pos = 0; x_pos < ctx->num_x_mbs && slice_width;
-             x_pos += slice_width) {
-            while (ctx->num_x_mbs - x_pos < slice_width)
-                slice_width >>= 1;
-
-            ctx->slice_data[slice_num].slice_num   = slice_num;
-            ctx->slice_data[slice_num].x_pos       = x_pos;
-            ctx->slice_data[slice_num].y_pos       = y_pos;
-            ctx->slice_data[slice_num].slice_width = slice_width;
-
-            slice_num++;
-        }
-    }
-
-    return avctx->execute(avctx, decode_slice,
-                          ctx->slice_data, NULL, slice_num,
-                          sizeof(ctx->slice_data[0]));
-}
-
-
-#define MOVE_DATA_PTR(nbytes) buf += (nbytes); buf_size -= (nbytes)
-
-static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
-                        AVPacket *avpkt)
-{
-    ProresContext *ctx = avctx->priv_data;
-    const uint8_t *buf = avpkt->data;
-    int buf_size       = avpkt->size;
-    int frame_hdr_size, pic_num, pic_data_size;
-
-    ctx->frame            = data;
-    ctx->frame->pict_type = AV_PICTURE_TYPE_I;
-    ctx->frame->key_frame = 1;
-
-    /* check frame atom container */
-    if (buf_size < 28 || buf_size < AV_RB32(buf) ||
-        AV_RB32(buf + 4) != FRAME_ID) {
-        av_log(avctx, AV_LOG_ERROR, "invalid frame\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    MOVE_DATA_PTR(8);
-
-    frame_hdr_size = decode_frame_header(ctx, buf, buf_size, avctx);
-    if (frame_hdr_size < 0)
-        return AVERROR_INVALIDDATA;
-
-    MOVE_DATA_PTR(frame_hdr_size);
-
-    if (ff_get_buffer(avctx, ctx->frame, 0) < 0)
-        return -1;
-
-    for (pic_num = 0; ctx->frame->interlaced_frame - pic_num + 1; pic_num++) {
-        pic_data_size = decode_picture_header(ctx, buf, buf_size, avctx);
-        if (pic_data_size < 0)
-            return AVERROR_INVALIDDATA;
-
-        if (decode_picture(ctx, pic_num, avctx))
-            return -1;
-
-        MOVE_DATA_PTR(pic_data_size);
-    }
-
-    ctx->frame = NULL;
-    *got_frame = 1;
-
-    return avpkt->size;
-}
-
-
-static av_cold int decode_close(AVCodecContext *avctx)
-{
-    ProresContext *ctx = avctx->priv_data;
-
-    av_freep(&ctx->slice_data);
-
-    return 0;
-}
-
-
-AVCodec ff_prores_lgpl_decoder = {
-    .name           = "prores_lgpl",
-    .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_PRORES,
-    .priv_data_size = sizeof(ProresContext),
-    .init           = decode_init,
-    .close          = decode_close,
-    .decode         = decode_frame,
-    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS,
-};

diff --git a/libavcodec/proresenc_kostya.c b/libavcodec/proresenc_kostya.c
index 149dc81..81f3865 100644
--- a/libavcodec/proresenc_kostya.c
+++ b/libavcodec/proresenc_kostya.c

@@ -51,9 +51,11 @@
 
 enum {
     QUANT_MAT_PROXY = 0,
+    QUANT_MAT_PROXY_CHROMA,
     QUANT_MAT_LT,
     QUANT_MAT_STANDARD,
     QUANT_MAT_HQ,
+    QUANT_MAT_XQ_LUMA,
     QUANT_MAT_DEFAULT,
 };
 
@@ -68,6 +70,16 @@
         13, 63, 63, 63, 63, 63, 63, 63,
         63, 63, 63, 63, 63, 63, 63, 63,
     },
+    { // proxy chromas
+        4,  7,  9, 11, 13, 14, 63, 63,
+        7,  7, 11, 12, 14, 63, 63, 63,
+        9, 11, 13, 14, 63, 63, 63, 63,
+        11, 11, 13, 14, 63, 63, 63, 63,
+        11, 13, 14, 63, 63, 63, 63, 63,
+        13, 14, 63, 63, 63, 63, 63, 63,
+        13, 63, 63, 63, 63, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 63, 63
+    },
     { // LT
          4,  5,  6,  7,  9, 11, 13, 15,
          5,  5,  7,  8, 11, 13, 15, 17,
@@ -98,6 +110,16 @@
          4,  4,  4,  4,  5,  5,  6,  7,
          4,  4,  4,  4,  5,  6,  7,  7,
     },
+    { // XQ luma
+        2,  2,  2,  2,  2,  2,  2,  2,
+        2,  2,  2,  2,  2,  2,  2,  2,
+        2,  2,  2,  2,  2,  2,  2,  2,
+        2,  2,  2,  2,  2,  2,  2,  3,
+        2,  2,  2,  2,  2,  2,  3,  3,
+        2,  2,  2,  2,  2,  3,  3,  3,
+        2,  2,  2,  2,  3,  3,  3,  4,
+        2,  2,  2,  2,  3,  3,  4,  4,
+    },
     { // codec default
          4,  4,  4,  4,  4,  4,  4,  4,
          4,  4,  4,  4,  4,  4,  4,  4,
@@ -125,6 +147,7 @@
     int         max_quant;
     int         br_tab[NUM_MB_LIMITS];
     int         quant;
+    int         quant_chroma;
 } prores_profile_info[6] = {
     {
         .full_name = "proxy",
@@ -133,6 +156,7 @@
         .max_quant = 8,
         .br_tab    = { 300, 242, 220, 194 },
         .quant     = QUANT_MAT_PROXY,
+        .quant_chroma = QUANT_MAT_PROXY_CHROMA,
     },
     {
         .full_name = "LT",
@@ -141,6 +165,7 @@
         .max_quant = 9,
         .br_tab    = { 720, 560, 490, 440 },
         .quant     = QUANT_MAT_LT,
+        .quant_chroma = QUANT_MAT_LT,
     },
     {
         .full_name = "standard",
@@ -149,6 +174,7 @@
         .max_quant = 6,
         .br_tab    = { 1050, 808, 710, 632 },
         .quant     = QUANT_MAT_STANDARD,
+        .quant_chroma = QUANT_MAT_STANDARD,
     },
     {
         .full_name = "high quality",
@@ -157,6 +183,7 @@
         .max_quant = 6,
         .br_tab    = { 1566, 1216, 1070, 950 },
         .quant     = QUANT_MAT_HQ,
+        .quant_chroma = QUANT_MAT_HQ,
     },
     {
         .full_name = "4444",
@@ -165,6 +192,7 @@
         .max_quant = 6,
         .br_tab    = { 2350, 1828, 1600, 1425 },
         .quant     = QUANT_MAT_HQ,
+        .quant_chroma = QUANT_MAT_HQ,
     },
     {
         .full_name = "4444XQ",
@@ -172,7 +200,8 @@
         .min_quant = 1,
         .max_quant = 6,
         .br_tab    = { 3525, 2742, 2400, 2137 },
-        .quant     = QUANT_MAT_HQ,
+        .quant     = QUANT_MAT_HQ, /* Fix me : use QUANT_MAT_XQ_LUMA */
+        .quant_chroma = QUANT_MAT_HQ,
     }
 };
 
@@ -200,8 +229,10 @@
     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
     int16_t quants[MAX_STORED_Q][64];
+    int16_t quants_chroma[MAX_STORED_Q][64];
     int16_t custom_q[64];
     const uint8_t *quant_mat;
+    const uint8_t *quant_chroma_mat;
     const uint8_t *scantable;
 
     void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
@@ -527,6 +558,7 @@
     ptrdiff_t linesize;
     int plane_factor, is_chroma;
     uint16_t *qmat;
+    uint16_t *qmat_chroma;
 
     if (ctx->pictures_per_frame == 1)
         line_add = 0;
@@ -535,12 +567,17 @@
 
     if (ctx->force_quant) {
         qmat = ctx->quants[0];
+        qmat_chroma = ctx->quants_chroma[0];
     } else if (quant < MAX_STORED_Q) {
         qmat = ctx->quants[quant];
+        qmat_chroma = ctx->quants_chroma[quant];
     } else {
         qmat = ctx->custom_q;
-        for (i = 0; i < 64; i++)
+        qmat_chroma = ctx->custom_q;
+        for (i = 0; i < 64; i++) {
             qmat[i] = ctx->quant_mat[i] * quant;
+            qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant;
+        }
     }
 
     for (i = 0; i < ctx->num_planes; i++) {
@@ -569,10 +606,17 @@
                            pwidth, avctx->height / ctx->pictures_per_frame,
                            ctx->blocks[0], ctx->emu_buf,
                            mbs_per_slice, num_cblocks, is_chroma);
-            sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
-                                          mbs_per_slice, ctx->blocks[0],
-                                          num_cblocks, plane_factor,
-                                          qmat);
+            if (!is_chroma) {/* luma quant */
+                sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
+                                              mbs_per_slice, ctx->blocks[0],
+                                              num_cblocks, plane_factor,
+                                              qmat);
+            } else { /* chroma plane */
+                sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
+                                              mbs_per_slice, ctx->blocks[0],
+                                              num_cblocks, plane_factor,
+                                              qmat_chroma);
+            }
         } else {
             get_alpha_data(ctx, src, linesize, xp, yp,
                            pwidth, avctx->height / ctx->pictures_per_frame,
@@ -712,10 +756,9 @@
         return dbits + 1;
 }
 
-static int estimate_alpha_plane(ProresContext *ctx, int *error,
+static int estimate_alpha_plane(ProresContext *ctx,
                                 const uint16_t *src, ptrdiff_t linesize,
-                                int mbs_per_slice, int quant,
-                                int16_t *blocks)
+                                int mbs_per_slice, int16_t *blocks)
 {
     const int abits = ctx->alpha_bits;
     const int mask  = (1 << abits) - 1;
@@ -725,7 +768,6 @@
     int run = 0;
     int bits;
 
-    *error = 0;
     cur = blocks[idx++];
     bits = est_alpha_diff(cur, prev, abits);
     prev = cur;
@@ -773,7 +815,9 @@
     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
     int overquant;
     uint16_t *qmat;
+    uint16_t *qmat_chroma;
     int linesize[4], line_add;
+    int alpha_bits = 0;
 
     if (ctx->pictures_per_frame == 1)
         line_add = 0;
@@ -819,20 +863,25 @@
         td->nodes[trellis_node + q].quant     = q;
     }
 
+    if (ctx->alpha_bits)
+        alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
+                                          mbs_per_slice, td->blocks[3]);
     // todo: maybe perform coarser quantising to fit into frame size when needed
     for (q = min_quant; q <= max_quant; q++) {
-        bits  = 0;
+        bits  = alpha_bits;
         error = 0;
-        for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
+        bits += estimate_slice_plane(ctx, &error, 0,
+                                     src, linesize[0],
+                                     mbs_per_slice,
+                                     num_cblocks[0], plane_factor[0],
+                                     ctx->quants[q], td); /* estimate luma plane */
+        for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
             bits += estimate_slice_plane(ctx, &error, i,
                                          src, linesize[i],
                                          mbs_per_slice,
                                          num_cblocks[i], plane_factor[i],
-                                         ctx->quants[q], td);
+                                         ctx->quants_chroma[q], td);
         }
-        if (ctx->alpha_bits)
-            bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
-                                         mbs_per_slice, q, td->blocks[3]);
         if (bits > 65000 * 8)
             error = SCORE_LIMIT;
 
@@ -845,25 +894,31 @@
         overquant = max_quant;
     } else {
         for (q = max_quant + 1; q < 128; q++) {
-            bits  = 0;
+            bits  = alpha_bits;
             error = 0;
             if (q < MAX_STORED_Q) {
                 qmat = ctx->quants[q];
+                qmat_chroma = ctx->quants_chroma[q];
             } else {
                 qmat = td->custom_q;
-                for (i = 0; i < 64; i++)
+                qmat_chroma = td->custom_q;
+                for (i = 0; i < 64; i++) {
                     qmat[i] = ctx->quant_mat[i] * q;
+                    qmat_chroma[i] = ctx->quant_chroma_mat[i] * q;
+                }
             }
-            for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
+            bits += estimate_slice_plane(ctx, &error, 0,
+                                         src, linesize[0],
+                                         mbs_per_slice,
+                                         num_cblocks[0], plane_factor[0],
+                                         qmat, td);/* estimate luma plane */
+            for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
                 bits += estimate_slice_plane(ctx, &error, i,
                                              src, linesize[i],
                                              mbs_per_slice,
                                              num_cblocks[i], plane_factor[i],
-                                             qmat, td);
+                                             qmat_chroma, td);
             }
-            if (ctx->alpha_bits)
-                bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
-                                             mbs_per_slice, q, td->blocks[3]);
             if (bits <= ctx->bits_per_mb * mbs_per_slice)
                 break;
         }
@@ -1198,10 +1253,13 @@
     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
     ctx->pictures_per_frame = 1 + interlaced;
 
-    if (ctx->quant_sel == -1)
+    if (ctx->quant_sel == -1) {
         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
-    else
+        ctx->quant_chroma_mat = prores_quant_matrices[ctx->profile_info->quant_chroma];
+    } else {
         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
+        ctx->quant_chroma_mat = prores_quant_matrices[ctx->quant_sel];
+    }
 
     if (strlen(ctx->vendor) != 4) {
         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
@@ -1226,8 +1284,10 @@
         min_quant = ctx->profile_info->min_quant;
         max_quant = ctx->profile_info->max_quant;
         for (i = min_quant; i < MAX_STORED_Q; i++) {
-            for (j = 0; j < 64; j++)
+            for (j = 0; j < 64; j++) {
                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
+                ctx->quants_chroma[i][j] = ctx->quant_chroma_mat[j] * i;
+            }
         }
 
         ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
@@ -1258,6 +1318,7 @@
         }
     } else {
         int ls = 0;
+        int ls_chroma = 0;
 
         if (ctx->force_quant > 64) {
             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
@@ -1266,12 +1327,14 @@
 
         for (j = 0; j < 64; j++) {
             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
+            ctx->quants_chroma[0][j] = ctx->quant_chroma_mat[j] * ctx->force_quant;
             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
+            ls_chroma += av_log2((1 << 11)  / ctx->quants_chroma[0][j]) * 2 + 1;
         }
 
-        ctx->bits_per_mb = ls * 8;
+        ctx->bits_per_mb = ls * 4 + ls_chroma * 4;
         if (ctx->chroma_factor == CFACTOR_Y444)
-            ctx->bits_per_mb += ls * 4;
+            ctx->bits_per_mb += ls_chroma * 4;
     }
 
     ctx->frame_size_upper_bound = (ctx->pictures_per_frame *

diff --git a/libavcodec/prosumer.c b/libavcodec/prosumer.c
new file mode 100644
index 0000000..6e98677
--- /dev/null
+++ b/libavcodec/prosumer.c

@@ -0,0 +1,379 @@
+/*
+ * Brooktree ProSumer Video decoder
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+
+typedef struct ProSumerContext {
+    GetByteContext gb;
+    PutByteContext pb;
+
+    unsigned stride;
+    unsigned size;
+    uint32_t lut[0x10000];
+    uint8_t *initial_line;
+    uint8_t *decbuffer;
+} ProSumerContext;
+
+#define PAIR(high, low) (((uint64_t)(high) << 32) | low)
+
+static int decompress(GetByteContext *gb, int size, PutByteContext *pb, const uint32_t *lut)
+{
+    int pos, idx, cnt, fill;
+    uint32_t a, b, c;
+
+    bytestream2_skip(gb, 32);
+    cnt = 4;
+    a = bytestream2_get_le32(gb);
+    idx = a >> 20;
+    b = lut[2 * idx];
+
+    while (1) {
+        if (bytestream2_get_bytes_left_p(pb) <= 0)
+            return 0;
+        if (((b & 0xFF00u) != 0x8000u) || (b & 0xFFu)) {
+            if ((b & 0xFF00u) != 0x8000u) {
+                bytestream2_put_le16(pb, b);
+            } else if (b & 0xFFu) {
+                idx = 0;
+                for (int i = 0; i < (b & 0xFFu); i++)
+                    bytestream2_put_le32(pb, 0);
+            }
+            c = b >> 16;
+            if (c & 0xFF00u) {
+                c = (((c >> 8) & 0xFFu) | (c & 0xFF00)) & 0xF00F;
+                fill = lut[2 * idx + 1];
+                if ((c & 0xFF00u) == 0x1000) {
+                    bytestream2_put_le16(pb, fill);
+                    c &= 0xFFFF00FFu;
+                } else {
+                    bytestream2_put_le32(pb, fill);
+                    c &= 0xFFFF00FFu;
+                }
+            }
+            while (c) {
+                a <<= 4;
+                cnt--;
+                if (!cnt) {
+                    if (bytestream2_get_bytes_left(gb) <= 0) {
+                        if (!a)
+                            return 0;
+                        cnt = 4;
+                    } else {
+                        pos = bytestream2_tell(gb) ^ 2;
+                        bytestream2_seek(gb, pos, SEEK_SET);
+                        AV_WN16(&a, bytestream2_peek_le16(gb));
+                        pos = pos ^ 2;
+                        bytestream2_seek(gb, pos, SEEK_SET);
+                        bytestream2_skip(gb, 2);
+                        cnt = 4;
+                    }
+                }
+                c--;
+            }
+            idx = a >> 20;
+            b = lut[2 * idx];
+            continue;
+        }
+        idx = 2;
+        while (idx) {
+            a <<= 4;
+            cnt--;
+            if (cnt) {
+                idx--;
+                continue;
+            }
+            if (bytestream2_get_bytes_left(gb) <= 0) {
+                if (a) {
+                    cnt = 4;
+                    idx--;
+                    continue;
+                }
+                return 0;
+            }
+            pos = bytestream2_tell(gb) ^ 2;
+            bytestream2_seek(gb, pos, SEEK_SET);
+            AV_WN16(&a, bytestream2_peek_le16(gb));
+            pos = pos ^ 2;
+            bytestream2_seek(gb, pos, SEEK_SET);
+            bytestream2_skip(gb, 2);
+            cnt = 4;
+            idx--;
+        }
+        b = PAIR(4, a) >> 16;
+    }
+
+    return 0;
+}
+
+static void vertical_predict(uint32_t *dst, int offset, const uint32_t *src, int stride, int height)
+{
+    dst += offset >> 2;
+
+    for (int i = 0; i < height; i++) {
+        for (int j = 0; j < stride >> 2; j++) {
+            dst[j] = (((src[j] >> 3) + (0x3F3F3F3F & dst[j])) << 3) & 0xFCFCFCFC;
+        }
+
+        dst += stride >> 2;
+        src += stride >> 2;
+    }
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data,
+                        int *got_frame, AVPacket *avpkt)
+{
+    ProSumerContext *s = avctx->priv_data;
+    AVFrame * const frame = data;
+    int ret;
+
+    if (avpkt->size <= 32)
+        return AVERROR_INVALIDDATA;
+
+    memset(s->decbuffer, 0, s->size);
+    bytestream2_init(&s->gb, avpkt->data, avpkt->size);
+    bytestream2_init_writer(&s->pb, s->decbuffer, s->size);
+
+    decompress(&s->gb, AV_RL32(avpkt->data + 28) >> 1, &s->pb, s->lut);
+    vertical_predict((uint32_t *)s->decbuffer, 0, (uint32_t *)s->initial_line, s->stride, 1);
+    vertical_predict((uint32_t *)s->decbuffer, s->stride, (uint32_t *)s->decbuffer, s->stride, avctx->height - 1);
+
+    ret = ff_get_buffer(avctx, frame, 0);
+    if (ret < 0)
+        return ret;
+
+    for (int i = avctx->height - 1; i >= 0 ; i--) {
+        uint8_t *y = &frame->data[0][i * frame->linesize[0]];
+        uint8_t *u = &frame->data[1][i * frame->linesize[1]];
+        uint8_t *v = &frame->data[2][i * frame->linesize[2]];
+        const uint8_t *src = s->decbuffer + (avctx->height - 1 - i) * s->stride;
+
+        for (int j = 0; j < avctx->width; j += 8) {
+            *(u++) = *src++;
+            *(y++) = *src++;
+            *(v++) = *src++;
+            *(y++) = *src++;
+
+            *(u++) = *src++;
+            *(y++) = *src++;
+            *(v++) = *src++;
+            *(y++) = *src++;
+
+            *(y++) = *src++;
+            *(y++) = *src++;
+            *(y++) = *src++;
+            *(y++) = *src++;
+        }
+    }
+
+    frame->pict_type = AV_PICTURE_TYPE_I;
+    frame->key_frame = 1;
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+#define TB(i) (1 + ((i) > 10) + ((i) > 49))
+static const uint16_t table[] = {
+    0x0000, 0x100, 0x0101, 0x200, 0x0202, 0x300, 0xFFFF, 0x400, 0xFEFE, 0x500,
+    0x0001, 0x700, 0x0100, 0x800, 0x00FF, 0x900, 0xFF00, 0xA00, 0x8001, 0x600,
+    0x8002, 0xB00, 0xFCFC, 0x010, 0x0404, 0x030, 0x0002, 0xD30, 0xFEFC, 0x020,
+    0xFCFE, 0x040, 0xFEFF, 0xD20, 0x0808, 0x060, 0xFFFE, 0x050, 0x0402, 0xC00,
+    0x0204, 0xC10, 0xF8F8, 0xC30, 0x0201, 0xC40, 0x0102, 0xC60, 0x0804, 0xF30,
+    0x0408, 0xE00, 0xF8FC, 0xE10, 0xFCF8, 0xC70, 0x00FE, 0xD00, 0xFE00, 0xD40,
+    0xFF01, 0xD50, 0x01FF, 0xD60, 0x0200, 0xD70, 0xFCFF, 0xE20, 0x0104, 0xE30,
+    0xF0F0, 0xE50, 0x0401, 0xE70, 0x02FE, 0xF00, 0xFE02, 0xF10, 0xFE01, 0xF20,
+    0x01FE, 0xF40, 0xFF02, 0xF50, 0x02FF, 0xF60, 0x8003, 0xC20, 0x8004, 0x070,
+    0x8005, 0xD10, 0x8006, 0xC50, 0x8007, 0xE60, 0x8008, 0xE40, 0x8009, 0xF70,
+    0xFC02, 0x080, 0xFE04, 0x081, 0xFC00, 0x082, 0x02FC, 0x083, 0x1010, 0x084,
+    0x00FC, 0x085, 0x0004, 0x086, 0x0400, 0x087, 0xFFFC, 0x088, 0x1008, 0x089,
+    0x0810, 0x08A, 0x0802, 0x08B, 0x0208, 0x08C, 0xFEF8, 0x08D, 0xFC01, 0x08E,
+    0x04FF, 0x08F, 0xF8FE, 0x090, 0xFC04, 0x091, 0x04FC, 0x092, 0xFF04, 0x093,
+    0x01FC, 0x094, 0xF0F8, 0x095, 0xF8F0, 0x096, 0x04FE, 0x097, 0xF0FC, 0x098,
+    0x0008, 0x099, 0x08FE, 0x09A, 0x01F8, 0x09B, 0x0800, 0x09C, 0x08FC, 0x09D,
+    0xFE08, 0x09E, 0xFC08, 0x09F, 0xF800, 0x0A0, 0x0108, 0x0A1, 0xF802, 0x0A2,
+    0x0801, 0x0A3, 0x00F8, 0x0A4, 0xF804, 0x0A5, 0xF8FF, 0x0A6, 0xFFF8, 0x0A7,
+    0x04F8, 0x0A8, 0x02F8, 0x0A9, 0x1004, 0x0AA, 0x08F8, 0x0AB, 0xF808, 0x0AC,
+    0x0410, 0x0AD, 0xFF08, 0x0AE, 0x08FF, 0x0AF, 0xFCF0, 0x0B0, 0xF801, 0x0B1,
+    0xE0F0, 0x0B2, 0xF3F3, 0x0B3, 0xF0E0, 0x0B4, 0xFAFA, 0x0B5, 0xF7F7, 0x0B6,
+    0xFEF0, 0x0B7, 0xF0FE, 0x0B8, 0xE9E9, 0x0B9, 0xF9F9, 0x0BA, 0x2020, 0x0BB,
+    0xE0E0, 0x0BC, 0x02F0, 0x0BD, 0x04F0, 0x0BE, 0x2010, 0x0BF, 0xECEC, 0x0C0,
+    0xEFEF, 0x0C1, 0x1020, 0x0C2, 0xF5F5, 0x0C3, 0xF4F4, 0x0C4, 0xEDED, 0x0C5,
+    0xEAEA, 0x0C6, 0xFBFB, 0x0C7, 0x1002, 0x0C8, 0xF2F2, 0x0C9, 0xF6F6, 0x0CA,
+    0xF1F1, 0x0CB, 0xFDFD, 0x0CC, 0x0210, 0x0CD, 0x10FF, 0x0CE, 0xFDFE, 0x0CF,
+    0x10F8, 0x0D0, 0x1000, 0x0D1, 0xF001, 0x0D2, 0x1001, 0x0D3, 0x0010, 0x0D4,
+    0x10FE, 0x0D5, 0xEBEB, 0x0D6, 0xFE10, 0x0D7, 0x0110, 0x0D8, 0xF000, 0x0D9,
+    0x08F0, 0x0DA, 0x01F0, 0x0DB, 0x0303, 0x0DC, 0x00F0, 0x0DD, 0xF002, 0x0DE,
+    0x10FC, 0x0DF, 0xFC10, 0x0E0, 0xF0FF, 0x0E1, 0xEEEE, 0x0E2, 0xF004, 0x0E3,
+    0xFFF0, 0x0E4, 0xF7F8, 0x0E5, 0xF3F2, 0x0E6, 0xF9FA, 0x0E7, 0x0820, 0x0E8,
+    0x0302, 0x0E9, 0xE0F8, 0x0EA, 0x0505, 0x0EB, 0x2008, 0x0EC, 0xE8E8, 0x0ED,
+    0x0403, 0x0EE, 0xFBFC, 0x0EF, 0xFCFD, 0x0F0, 0xFBFA, 0x0F1, 0x0203, 0x0F2,
+    0xFCFB, 0x0F3, 0x0304, 0x0F4, 0xF810, 0x0F5, 0xFF10, 0x0F6, 0xF008, 0x0F7,
+    0xFEFD, 0x0F8, 0xF7F6, 0x0F9, 0xF2F1, 0x0FA, 0xF3F4, 0x0FB, 0xEDEC, 0x0FC,
+    0xF4F1, 0x0FD, 0xF5F6, 0x0FE, 0xF0F1, 0x0FF, 0xF9F8, 0xC80, 0x10F0, 0xC81,
+    0xF2F3, 0xC82, 0xF7F9, 0xC83, 0xF6F5, 0xC84, 0xF0EF, 0xC85, 0xF4F5, 0xC86,
+    0xF6F7, 0xC87, 0xFAF9, 0xC88, 0x0405, 0xC89, 0xF8F9, 0xC8A, 0xFAFB, 0xC8B,
+    0xF1F0, 0xC8C, 0xF4F3, 0xC8D, 0xF1F2, 0xC8E, 0xF8E0, 0xC8F, 0xF8F7, 0xC90,
+    0xFDFC, 0xC91, 0xF8FA, 0xC92, 0xFAF6, 0xC93, 0xEEEF, 0xC94, 0xF5F7, 0xC95,
+    0xFDFB, 0xC96, 0xF4F6, 0xC97, 0xFCFA, 0xC98, 0xECED, 0xC99, 0xF0F3, 0xC9A,
+    0xF3F1, 0xC9B, 0xECEB, 0xC9C, 0xEDEE, 0xC9D, 0xF9F7, 0xC9E, 0x0420, 0xC9F,
+    0xEBEA, 0xCA0, 0xF0F4, 0xCA1, 0xF3F5, 0xCA2, 0xFAF7, 0xCA3, 0x0301, 0xCA4,
+    0xF3F7, 0xCA5, 0xF7F3, 0xCA6, 0xEFF0, 0xCA7, 0xF9F6, 0xCA8, 0xEFEE, 0xCA9,
+    0xF4F7, 0xCAA, 0x0504, 0xCAB, 0xF5F4, 0xCAC, 0xF1F3, 0xCAD, 0xEBEE, 0xCAE,
+    0xF2F5, 0xCAF, 0xF3EF, 0xCB0, 0xF5F1, 0xCB1, 0xF9F3, 0xCB2, 0xEDF0, 0xCB3,
+    0xEEF1, 0xCB4, 0xF6F9, 0xCB5, 0xF8FB, 0xCB6, 0xF010, 0xCB7, 0xF2F6, 0xCB8,
+    0xF4ED, 0xCB9, 0xF7FB, 0xCBA, 0xF8F3, 0xCBB, 0xEDEB, 0xCBC, 0xF0F2, 0xCBD,
+    0xF2F9, 0xCBE, 0xF8F1, 0xCBF, 0xFAFC, 0xCC0, 0xFBF8, 0xCC1, 0xF6F0, 0xCC2,
+    0xFAF8, 0xCC3, 0x0103, 0xCC4, 0xF3F6, 0xCC5, 0xF4F9, 0xCC6, 0xF7F2, 0xCC7,
+    0x2004, 0xCC8, 0xF2F0, 0xCC9, 0xF4F2, 0xCCA, 0xEEED, 0xCCB, 0xFCE0, 0xCCC,
+    0xEAE9, 0xCCD, 0xEAEB, 0xCCE, 0xF6F4, 0xCCF, 0xFFFD, 0xCD0, 0xE9EA, 0xCD1,
+    0xF1F4, 0xCD2, 0xF6EF, 0xCD3, 0xF6F8, 0xCD4, 0xF8F6, 0xCD5, 0xEFF2, 0xCD6,
+    0xEFF1, 0xCD7, 0xF7F1, 0xCD8, 0xFBFD, 0xCD9, 0xFEF6, 0xCDA, 0xFFF7, 0xCDB,
+    0x0605, 0xCDC, 0xF0F5, 0xCDD, 0xF0FA, 0xCDE, 0xF1F9, 0xCDF, 0xF2FC, 0xCE0,
+    0xF7EE, 0xCE1, 0xF7F5, 0xCE2, 0xF9FC, 0xCE3, 0xFAF5, 0xCE4, 0xFBF1, 0xCE5,
+    0xF1EF, 0xCE6, 0xF1FA, 0xCE7, 0xF4F8, 0xCE8, 0xF7F0, 0xCE9, 0xF7F4, 0xCEA,
+    0xF7FC, 0xCEB, 0xF9FB, 0xCEC, 0xFAF1, 0xCED, 0xFBF9, 0xCEE, 0xFDFF, 0xCEF,
+    0xE0FC, 0xCF0, 0xEBEC, 0xCF1, 0xEDEF, 0xCF2, 0xEFED, 0xCF3, 0xF1F6, 0xCF4,
+    0xF2F7, 0xCF5, 0xF3EE, 0xCF6, 0xF3F8, 0xCF7, 0xF5F2, 0xCF8, 0xF8F2, 0xCF9,
+    0xF9F1, 0xCFA, 0xF9F2, 0xCFB, 0xFBEF, 0xCFC, 0x00FD, 0xCFD, 0xECEE, 0xCFE,
+    0xF2EF, 0xCFF, 0xF2F8, 0xD80, 0xF5F0, 0xD81, 0xF6F2, 0xD82, 0xFCF7, 0xD83,
+    0xFCF9, 0xD84, 0x0506, 0xD85, 0xEEEC, 0xD86, 0xF0F6, 0xD87, 0xF2F4, 0xD88,
+    0xF6F1, 0xD89, 0xF8F5, 0xD8A, 0xF9F4, 0xD8B, 0xFBF7, 0xD8C, 0x0503, 0xD8D,
+    0xEFEC, 0xD8E, 0xF3F0, 0xD8F, 0xF4F0, 0xD90, 0xF5F3, 0xD91, 0xF6F3, 0xD92,
+    0xF7FA, 0xD93, 0x800A, 0xD94, 0x800B, 0xD95, 0x800C, 0xD96, 0x800D, 0xD97,
+    0x800E, 0xD98, 0x800F, 0xD99, 0x8010, 0xD9A, 0x8011, 0xD9B, 0x8012, 0xD9C,
+    0x8013, 0xD9D, 0x8014, 0xD9E, 0x8015, 0xD9F, 0x8016, 0xDA0, 0x8017, 0xDA1,
+    0x8018, 0xDA2, 0x8019, 0xDA3, 0x801A, 0xDA4, 0x801B, 0xDA5, 0x801C, 0xDA6,
+    0x801D, 0xDA7, 0x801E, 0xDA8, 0x801F, 0xDA9, 0x8020, 0xDAA, 0x8021, 0xDAB,
+    0x8022, 0xDAC, 0x8023, 0xDAD, 0x8024, 0xDAE, 0x8025, 0xDAF, 0x8026, 0xDB0,
+    0x8027, 0xDB1, 0x8028, 0xDB2, 0x8029, 0xDB3, 0x802A, 0xDB4, 0x802B, 0xDB5,
+    0x802C, 0xDB6, 0x802D, 0xDB7, 0x802E, 0xDB8, 0x802F, 0xDB9, 0x80FF, 0xDBA,
+};
+
+static void fill_elements(uint32_t idx, uint32_t shift, uint32_t *e0, uint32_t *e1)
+{
+    uint32_t b, h = idx << (32 - shift);
+
+    for (int j = 0; j < 2; j++) {
+        for (int i = 0; i < 43; i++) {
+            b = 4 * TB(i);
+            if (shift >= b && ((h & (0xFFF00000u << (12 - b))) >> 20) == table[2 * i + 1]) {
+                if (table[2 * i] >> 8 == 0x80u) {
+                    return;
+                } else {
+                    *e0 = (*e0 & 0xFFFFFFu) | (((12 + b - shift)  | (0x40u<<j)) << 22);
+                    if (j == 0) {
+                        *e1 = table[2 * i];
+                        shift -= b;
+                        h <<= b;
+                    } else {
+                        *e1 |= (unsigned)table[2 * i] << 16;
+                    }
+                    break;
+                }
+            }
+        }
+    }
+}
+
+static void fill_lut(uint32_t *lut)
+{
+    for (int i = 1; i < FF_ARRAY_ELEMS(table); i += 2) {
+        uint32_t a = table[i];
+        uint32_t b = TB(i>>1);
+        uint32_t c, d;
+
+        c = (b << 16) | table[i-1];
+        d = 4 * (3 - b);
+        if (d <= 0) {
+            lut[2 * a] = c;
+            lut[2 * a + 1] = 0;
+        } else {
+            for (int j = 0; j < 1 << d; j++) {
+                uint32_t f = 0xFFFFFFFFu;
+                c &= 0xFFFFFFu;
+                if ((c & 0xFF00u) != 0x8000u)
+                    fill_elements(j, d, &c, &f);
+                lut[2 * a + 2 * j] = c;
+                lut[2 * a + 2 * j + 1] = f;
+            }
+        }
+    }
+
+    for (int i = 0; i < 32; i += 2) {
+        lut[i  ] = 0x68000;
+        lut[i+1] = 0;
+    }
+}
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    ProSumerContext *s = avctx->priv_data;
+
+    s->stride = 3LL * FFALIGN(avctx->width, 8) >> 1;
+    s->size = avctx->height * s->stride;
+
+    avctx->pix_fmt = AV_PIX_FMT_YUV411P;
+
+    s->initial_line = av_malloc(s->stride);
+    s->decbuffer = av_malloc(s->size);
+    if (!s->initial_line || !s->decbuffer)
+        return AVERROR(ENOMEM);
+    memset(s->initial_line, 0x80u, s->stride);
+
+    fill_lut(s->lut);
+
+    return 0;
+}
+
+static av_cold int decode_close(AVCodecContext *avctx)
+{
+    ProSumerContext *s = avctx->priv_data;
+
+    av_freep(&s->initial_line);
+    av_freep(&s->decbuffer);
+
+    return 0;
+}
+
+AVCodec ff_prosumer_decoder = {
+    .name           = "prosumer",
+    .long_name      = NULL_IF_CONFIG_SMALL("Brooktree ProSumer Video"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_PROSUMER,
+    .priv_data_size = sizeof(ProSumerContext),
+    .init           = decode_init,
+    .decode         = decode_frame,
+    .close          = decode_close,
+    .capabilities   = AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
+                      FF_CODEC_CAP_INIT_CLEANUP,
+};

diff --git a/libavcodec/psd.c b/libavcodec/psd.c
index 66f2ec2..4381447 100644
--- a/libavcodec/psd.c
+++ b/libavcodec/psd.c

@@ -369,6 +369,8 @@
                 avctx->pix_fmt = AV_PIX_FMT_GRAY8;
             } else if (s->channel_depth == 16) {
                 avctx->pix_fmt = AV_PIX_FMT_GRAY16BE;
+            } else if (s->channel_depth == 32) {
+                avctx->pix_fmt = AV_PIX_FMT_GRAYF32BE;
             } else {
                 avpriv_report_missing_feature(avctx, "channel depth %d for grayscale", s->channel_depth);
                 return AVERROR_PATCHWELCOME;

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 2c702c7..36ac0ac 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c

@@ -246,7 +246,7 @@
 {
     int err = 0;
 
-    if (dst != src && (for_user || !(av_codec_get_codec_descriptor(src)->props & AV_CODEC_PROP_INTRA_ONLY))) {
+    if (dst != src && (for_user || !(src->codec_descriptor->props & AV_CODEC_PROP_INTRA_ONLY))) {
         dst->time_base = src->time_base;
         dst->framerate = src->framerate;
         dst->width     = src->width;
@@ -262,11 +262,6 @@
 
         dst->bits_per_coded_sample = src->bits_per_coded_sample;
         dst->sample_aspect_ratio   = src->sample_aspect_ratio;
-#if FF_API_AFD
-FF_DISABLE_DEPRECATION_WARNINGS
-        dst->dtg_active_format     = src->dtg_active_format;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_AFD */
 
         dst->profile = src->profile;
         dst->level   = src->level;
@@ -733,10 +728,6 @@
     FrameThreadContext *fctx;
     int i, err = 0;
 
-#if HAVE_W32THREADS
-    w32thread_init();
-#endif
-
     if (!thread_count) {
         int nb_cpus = av_cpu_count();
 #if FF_API_DEBUG_MV
@@ -895,8 +886,6 @@
 
     f->owner[0] = f->owner[1] = avctx;
 
-    ff_init_buffer_info(avctx, f->f);
-
     if (!(avctx->active_thread_type & FF_THREAD_FRAME))
         return ff_get_buffer(avctx, f->f, flags);
 
@@ -919,8 +908,7 @@
     }
 
     pthread_mutex_lock(&p->parent->buffer_mutex);
-    if (avctx->thread_safe_callbacks ||
-        avctx->get_buffer2 == avcodec_default_get_buffer2) {
+    if (THREAD_SAFE_CALLBACKS(avctx)) {
         err = ff_get_buffer(avctx, f->f, flags);
     } else {
         pthread_mutex_lock(&p->progress_mutex);
@@ -987,8 +975,7 @@
     FrameThreadContext *fctx;
     AVFrame *dst, *tmp;
     int can_direct_free = !(avctx->active_thread_type & FF_THREAD_FRAME) ||
-                          avctx->thread_safe_callbacks                   ||
-                          avctx->get_buffer2 == avcodec_default_get_buffer2;
+                          THREAD_SAFE_CALLBACKS(avctx);
 
     if (!f->f || !f->f->buf[0])
         return;

diff --git a/libavcodec/pthread_slice.c b/libavcodec/pthread_slice.c
index d659f9b..77cfe3c 100644
--- a/libavcodec/pthread_slice.c
+++ b/libavcodec/pthread_slice.c

@@ -132,10 +132,6 @@
     int thread_count = avctx->thread_count;
     static void (*mainfunc)(void *);
 
-#if HAVE_W32THREADS
-    w32thread_init();
-#endif
-
     // We cannot do this in the encoder init as the threads are created before
     if (av_codec_is_encoder(avctx->codec) &&
         avctx->codec_id == AV_CODEC_ID_MPEG1VIDEO &&

diff --git a/libavcodec/qdmc.c b/libavcodec/qdmc.c
index 1c8952b..8f5b7b9 100644
--- a/libavcodec/qdmc.c
+++ b/libavcodec/qdmc.c

@@ -26,6 +26,7 @@
 #define BITSTREAM_READER_LE
 
 #include "libavutil/channel_layout.h"
+#include "libavutil/thread.h"
 
 #include "avcodec.h"
 #include "bytestream.h"
@@ -204,7 +205,7 @@
                            INIT_VLC_LE | INIT_VLC_USE_NEW_STATIC); \
     } while (0)
 
-static av_cold void qdmc_init_static_data(AVCodec *codec)
+static av_cold void qdmc_init_static_data(void)
 {
     int i;
 
@@ -250,10 +251,13 @@
 
 static av_cold int qdmc_decode_init(AVCodecContext *avctx)
 {
+    static AVOnce init_static_once = AV_ONCE_INIT;
     QDMCContext *s = avctx->priv_data;
-    int fft_size, fft_order, size, g, j, x;
+    int ret, fft_size, fft_order, size, g, j, x;
     GetByteContext b;
 
+    ff_thread_once(&init_static_once, qdmc_init_static_data);
+
     if (!avctx->extradata || (avctx->extradata_size < 48)) {
         av_log(avctx, AV_LOG_ERROR, "extradata missing or truncated\n");
         return AVERROR_INVALIDDATA;
@@ -334,7 +338,9 @@
         return AVERROR_INVALIDDATA;
     }
 
-    ff_fft_init(&s->fft_ctx, fft_order, 1);
+    ret = ff_fft_init(&s->fft_ctx, fft_order, 1);
+    if (ret < 0)
+        return ret;
 
     avctx->sample_fmt = AV_SAMPLE_FMT_S16;
 
@@ -775,7 +781,6 @@
     .id               = AV_CODEC_ID_QDMC,
     .priv_data_size   = sizeof(QDMCContext),
     .init             = qdmc_decode_init,
-    .init_static_data = qdmc_init_static_data,
     .close            = qdmc_decode_close,
     .decode           = qdmc_decode_frame,
     .flush            = qdmc_flush,

diff --git a/libavcodec/qdrw.c b/libavcodec/qdrw.c
index 3a0bc6f..32ba410 100644
--- a/libavcodec/qdrw.c
+++ b/libavcodec/qdrw.c

@@ -45,14 +45,14 @@
 };
 
 static int parse_palette(AVCodecContext *avctx, GetByteContext *gbc,
-                         uint32_t *pal, int colors)
+                         uint32_t *pal, int colors, int pixmap)
 {
     int i;
 
     for (i = 0; i <= colors; i++) {
         uint8_t r, g, b;
         unsigned int idx = bytestream2_get_be16(gbc); /* color index */
-        if (idx > 255) {
+        if (idx > 255 && !pixmap) {
             av_log(avctx, AV_LOG_WARNING,
                    "Palette index out of range: %u\n", idx);
             bytestream2_skip(gbc, 6);
@@ -66,7 +66,7 @@
         bytestream2_skip(gbc, 1);
         b = bytestream2_get_byte(gbc);
         bytestream2_skip(gbc, 1);
-        pal[idx] = (0xFFU << 24) | (r << 16) | (g << 8) | b;
+        pal[pixmap ? i : idx] = (0xFFU << 24) | (r << 16) | (g << 8) | b;
     }
     return 0;
 }
@@ -335,6 +335,7 @@
     while (bytestream2_get_bytes_left(&gbc) >= 4) {
         int bppcnt, bpp;
         int rowbytes, pack_type;
+        int flags;
         int opcode = bytestream2_get_be16(&gbc);
 
         switch(opcode) {
@@ -345,7 +346,8 @@
         case PACKBITSRGN:
             av_log(avctx, AV_LOG_DEBUG, "Parsing Packbit opcode\n");
 
-            bytestream2_skip(&gbc, 30);
+            flags = bytestream2_get_be16(&gbc) & 0xC000;
+            bytestream2_skip(&gbc, 28);
             bppcnt = bytestream2_get_be16(&gbc); /* cmpCount */
             bpp    = bytestream2_get_be16(&gbc); /* cmpSize */
 
@@ -380,7 +382,7 @@
             if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
                 return ret;
 
-            ret = parse_palette(avctx, &gbc, (uint32_t *)p->data[1], colors);
+            ret = parse_palette(avctx, &gbc, (uint32_t *)p->data[1], colors, flags & 0x8000);
             if (ret < 0)
                 return ret;
             p->palette_has_changed = 1;
@@ -434,7 +436,7 @@
             av_log(avctx, AV_LOG_DEBUG, "bppcount %d bpp %d\n", bppcnt, bpp);
             if (bppcnt == 3 && bpp == 8) {
                 avctx->pix_fmt = AV_PIX_FMT_RGB24;
-            } else if (bppcnt == 3 && bpp == 5) {
+            } else if (bppcnt == 3 && bpp == 5 || bppcnt == 2 && bpp == 8) {
                 avctx->pix_fmt = AV_PIX_FMT_RGB555;
             } else if (bppcnt == 4 && bpp == 8) {
                 avctx->pix_fmt = AV_PIX_FMT_ARGB;

diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c
index 56ca9df..bb0d795 100644
--- a/libavcodec/qsv.c
+++ b/libavcodec/qsv.c

@@ -20,6 +20,7 @@
 
 #include <mfx/mfxvideo.h>
 #include <mfx/mfxplugin.h>
+#include <mfx/mfxjpeg.h>
 
 #include <stdio.h>
 #include <string.h>
@@ -30,6 +31,7 @@
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_qsv.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/avassert.h"
 
 #include "avcodec.h"
 #include "qsv_internal.h"
@@ -56,6 +58,8 @@
     case AV_CODEC_ID_VP8:
         return MFX_CODEC_VP8;
 #endif
+    case AV_CODEC_ID_MJPEG:
+        return MFX_CODEC_JPEG;
     default:
         break;
     }
@@ -192,6 +196,35 @@
     return AVERROR_BUG;
 }
 
+enum AVPictureType ff_qsv_map_pictype(int mfx_pic_type)
+{
+    enum AVPictureType type;
+    switch (mfx_pic_type & 0x7) {
+    case MFX_FRAMETYPE_I:
+        if (mfx_pic_type & MFX_FRAMETYPE_S)
+            type = AV_PICTURE_TYPE_SI;
+        else
+            type = AV_PICTURE_TYPE_I;
+        break;
+    case MFX_FRAMETYPE_B:
+        type = AV_PICTURE_TYPE_B;
+        break;
+    case MFX_FRAMETYPE_P:
+        if (mfx_pic_type & MFX_FRAMETYPE_S)
+            type = AV_PICTURE_TYPE_SP;
+        else
+            type = AV_PICTURE_TYPE_P;
+        break;
+    case MFX_FRAMETYPE_UNKNOWN:
+        type = AV_PICTURE_TYPE_NONE;
+        break;
+    default:
+        av_assert0(0);
+    }
+
+    return type;
+}
+
 static int qsv_load_plugins(mfxSession session, const char *load_plugins,
                             void *logctx)
 {
@@ -386,7 +419,7 @@
         mfxFrameInfo      *i  = &req->Info;
         mfxFrameInfo      *i1 = &frames_hwctx->surfaces[0].Info;
 
-        if (i->Width  != i1->Width  || i->Height != i1->Height ||
+        if (i->Width  > i1->Width  || i->Height > i1->Height ||
             i->FourCC != i1->FourCC || i->ChromaFormat != i1->ChromaFormat) {
             av_log(ctx->logctx, AV_LOG_ERROR, "Mismatching surface properties in an "
                    "allocation request: %dx%d %d %d vs %dx%d %d %d\n",
@@ -590,6 +623,13 @@
                                       "Error setting a HW handle");
     }
 
+    if (QSV_RUNTIME_VERSION_ATLEAST(ver, 1, 25)) {
+        err = MFXJoinSession(parent_session, session);
+        if (err != MFX_ERR_NONE)
+            return ff_qsv_print_error(avctx, err,
+                                      "Error joining session");
+    }
+
     ret = qsv_load_plugins(session, load_plugins, avctx);
     if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error loading plugins\n");

diff --git a/libavcodec/qsv_internal.h b/libavcodec/qsv_internal.h
index c030550..394c558 100644
--- a/libavcodec/qsv_internal.h
+++ b/libavcodec/qsv_internal.h

@@ -38,6 +38,10 @@
     (MFX_VERSION_MAJOR > (MAJOR) ||         \
      MFX_VERSION_MAJOR == (MAJOR) && MFX_VERSION_MINOR >= (MINOR))
 
+#define QSV_RUNTIME_VERSION_ATLEAST(MFX_VERSION, MAJOR, MINOR) \
+    (MFX_VERSION.Major > (MAJOR)) ||                           \
+    (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= (MINOR))
+
 typedef struct QSVMid {
     AVBufferRef *hw_frames_ref;
     mfxHDL handle;
@@ -51,6 +55,8 @@
     AVFrame *frame;
     mfxFrameSurface1 surface;
     mfxEncodeCtrl enc_ctrl;
+    mfxExtDecodedFrameInfo dec_info;
+    mfxExtBuffer *ext_param;
 
     int queued;
     int used;
@@ -86,6 +92,7 @@
 int ff_qsv_profile_to_mfx(enum AVCodecID codec_id, int profile);
 
 int ff_qsv_map_pixfmt(enum AVPixelFormat format, uint32_t *fourcc);
+enum AVPictureType ff_qsv_map_pictype(int mfx_pic_type);
 
 int ff_qsv_init_internal_session(AVCodecContext *avctx, mfxSession *session,
                                  const char *load_plugins);

diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
index c00817f..22e7a46 100644
--- a/libavcodec/qsvdec.c
+++ b/libavcodec/qsvdec.c

@@ -41,6 +41,19 @@
 #include "qsv_internal.h"
 #include "qsvdec.h"
 
+const AVCodecHWConfigInternal *ff_qsv_hw_configs[] = {
+    &(const AVCodecHWConfigInternal) {
+        .public = {
+            .pix_fmt     = AV_PIX_FMT_QSV,
+            .methods     = AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX |
+                           AV_CODEC_HW_CONFIG_METHOD_AD_HOC,
+            .device_type = AV_HWDEVICE_TYPE_QSV,
+        },
+        .hwaccel = NULL,
+    },
+    NULL
+};
+
 static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession session,
                             AVBufferRef *hw_frames_ref, AVBufferRef *hw_device_ref)
 {
@@ -97,6 +110,16 @@
     return 0;
 }
 
+static inline unsigned int qsv_fifo_item_size(void)
+{
+    return sizeof(mfxSyncPoint*) + sizeof(QSVFrame*);
+}
+
+static inline unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
+{
+    return av_fifo_size(fifo) / qsv_fifo_item_size();
+}
+
 static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)
 {
     const AVPixFmtDescriptor *desc;
@@ -112,8 +135,7 @@
         return AVERROR_BUG;
 
     if (!q->async_fifo) {
-        q->async_fifo = av_fifo_alloc((1 + q->async_depth) *
-                                      (sizeof(mfxSyncPoint*) + sizeof(QSVFrame*)));
+        q->async_fifo = av_fifo_alloc(q->async_depth * qsv_fifo_item_size());
         if (!q->async_fifo)
             return AVERROR(ENOMEM);
     }
@@ -136,9 +158,6 @@
             else if (frames_hwctx->frame_type & MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET)
                 iopattern = MFX_IOPATTERN_OUT_VIDEO_MEMORY;
         }
-
-        frame_width  = frames_hwctx->surfaces[0].Info.Width;
-        frame_height = frames_hwctx->surfaces[0].Info.Height;
     }
 
     if (!iopattern)
@@ -222,6 +241,11 @@
 
         frame->surface.Data.MemId = &q->frames_ctx.mids[ret];
     }
+    frame->surface.Data.ExtParam    = &frame->ext_param;
+    frame->surface.Data.NumExtParam = 1;
+    frame->ext_param                = (mfxExtBuffer*)&frame->dec_info;
+    frame->dec_info.Header.BufferId = MFX_EXTBUFF_DECODED_FRAME_INFO;
+    frame->dec_info.Header.BufferSz = sizeof(frame->dec_info);
 
     frame->used = 1;
 
@@ -308,6 +332,8 @@
         bs.DataLength = avpkt->size;
         bs.MaxLength  = bs.DataLength;
         bs.TimeStamp  = avpkt->pts;
+        if (avctx->field_order == AV_FIELD_PROGRESSIVE)
+            bs.DataFlag   |= MFX_BITSTREAM_COMPLETE_FRAME;
     }
 
     sync = av_mallocz(sizeof(*sync));
@@ -367,7 +393,7 @@
         av_freep(&sync);
     }
 
-    if (!av_fifo_space(q->async_fifo) ||
+    if ((qsv_fifo_size(q->async_fifo) >= q->async_depth) ||
         (!avpkt->size && av_fifo_size(q->async_fifo))) {
         AVFrame *src_frame;
 
@@ -406,6 +432,10 @@
             outsurf->Info.PicStruct & MFX_PICSTRUCT_FIELD_TFF;
         frame->interlaced_frame =
             !(outsurf->Info.PicStruct & MFX_PICSTRUCT_PROGRESSIVE);
+        frame->pict_type = ff_qsv_map_pictype(out_frame->dec_info.FrameType);
+        //Key frame is IDR frame is only suitable for H264. For HEVC, IRAPs are key frames.
+        if (avctx->codec_id == AV_CODEC_ID_H264)
+            frame->key_frame = !!(out_frame->dec_info.FrameType & MFX_FRAMETYPE_IDR);
 
         /* update the surface properties */
         if (avctx->pix_fmt == AV_PIX_FMT_QSV)
@@ -462,6 +492,7 @@
     uint8_t *dummy_data;
     int dummy_size;
     int ret;
+    const AVPixFmtDescriptor *desc;
 
     if (!q->avctx_internal) {
         q->avctx_internal = avcodec_alloc_context3(NULL);
@@ -486,10 +517,11 @@
                      pkt->data, pkt->size, pkt->pts, pkt->dts,
                      pkt->pos);
 
+    avctx->field_order  = q->parser->field_order;
     /* TODO: flush delayed frames on reinit */
     if (q->parser->format       != q->orig_pix_fmt    ||
-        q->parser->coded_width  != avctx->coded_width ||
-        q->parser->coded_height != avctx->coded_height) {
+        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx->coded_width, 16) ||
+        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx->coded_height, 16)) {
         enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,
                                            AV_PIX_FMT_NONE,
                                            AV_PIX_FMT_NONE };
@@ -508,9 +540,8 @@
         avctx->pix_fmt      = pix_fmts[1] = qsv_format;
         avctx->width        = q->parser->width;
         avctx->height       = q->parser->height;
-        avctx->coded_width  = q->parser->coded_width;
-        avctx->coded_height = q->parser->coded_height;
-        avctx->field_order  = q->parser->field_order;
+        avctx->coded_width  = FFALIGN(q->parser->coded_width, 16);
+        avctx->coded_height = FFALIGN(q->parser->coded_height, 16);
         avctx->level        = q->avctx_internal->level;
         avctx->profile      = q->avctx_internal->profile;
 
@@ -520,6 +551,15 @@
 
         avctx->pix_fmt = ret;
 
+        desc = av_pix_fmt_desc_get(avctx->pix_fmt);
+        if (!desc)
+            goto reinit_fail;
+
+         if (desc->comp[0].depth > 8) {
+            avctx->coded_width =  FFALIGN(q->parser->coded_width, 32);
+            avctx->coded_height = FFALIGN(q->parser->coded_height, 32);
+        }
+
         ret = qsv_decode_init(avctx, q);
         if (ret < 0)
             goto reinit_fail;

diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h
index 4e86e4b..5b7b03a 100644
--- a/libavcodec/qsvdec.h
+++ b/libavcodec/qsvdec.h

@@ -33,6 +33,7 @@
 #include "libavutil/pixfmt.h"
 
 #include "avcodec.h"
+#include "hwaccel.h"
 #include "qsv_internal.h"
 
 typedef struct QSVContext {
@@ -70,6 +71,8 @@
     int         nb_ext_buffers;
 } QSVContext;
 
+extern const AVCodecHWConfigInternal *ff_qsv_hw_configs[];
+
 int ff_qsv_process_data(AVCodecContext *avctx, QSVContext *q,
                         AVFrame *frame, int *got_frame, AVPacket *pkt);
 

diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c
index a6b53cc..d9d2318 100644
--- a/libavcodec/qsvdec_h2645.c
+++ b/libavcodec/qsvdec_h2645.c

@@ -153,8 +153,12 @@
         }
 
         ret = ff_qsv_process_data(avctx, &s->qsv, frame, got_frame, &s->buffer_pkt);
-        if (ret < 0)
+        if (ret < 0){
+            /* Drop buffer_pkt when failed to decode the packet. Otherwise,
+               the decoder will keep decoding the failure packet. */
+            av_packet_unref(&s->buffer_pkt);
             return ret;
+        }
 
         s->buffer_pkt.size -= ret;
         s->buffer_pkt.data += ret;
@@ -171,23 +175,20 @@
     ff_qsv_decode_flush(avctx, &s->qsv);
 }
 
+#if defined(_WIN32)
+#define LOAD_PLUGIN_DEFAULT LOAD_PLUGIN_HEVC_SW
+#else
+#define LOAD_PLUGIN_DEFAULT LOAD_PLUGIN_HEVC_HW
+#endif
+
 #define OFFSET(x) offsetof(QSVH2645Context, x)
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 
-#if CONFIG_HEVC_QSV_HWACCEL
-AVHWAccel ff_hevc_qsv_hwaccel = {
-    .name           = "hevc_qsv",
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_HEVC,
-    .pix_fmt        = AV_PIX_FMT_QSV,
-};
-#endif
-
 #if CONFIG_HEVC_QSV_DECODER
 static const AVOption hevc_options[] = {
-    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VD },
+    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
 
-    { "load_plugin", "A user plugin to load in an internal session", OFFSET(load_plugin), AV_OPT_TYPE_INT, { .i64 = LOAD_PLUGIN_HEVC_SW }, LOAD_PLUGIN_NONE, LOAD_PLUGIN_HEVC_HW, VD, "load_plugin" },
+    { "load_plugin", "A user plugin to load in an internal session", OFFSET(load_plugin), AV_OPT_TYPE_INT, { .i64 = LOAD_PLUGIN_DEFAULT }, LOAD_PLUGIN_NONE, LOAD_PLUGIN_HEVC_HW, VD, "load_plugin" },
     { "none",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_NONE },    0, 0, VD, "load_plugin" },
     { "hevc_sw",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_HEVC_SW }, 0, 0, VD, "load_plugin" },
     { "hevc_hw",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_HEVC_HW }, 0, 0, VD, "load_plugin" },
@@ -214,28 +215,21 @@
     .decode         = qsv_decode_frame,
     .flush          = qsv_decode_flush,
     .close          = qsv_decode_close,
-    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_AVOID_PROBING,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HYBRID,
     .priv_class     = &hevc_class,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
                                                     AV_PIX_FMT_P010,
                                                     AV_PIX_FMT_QSV,
                                                     AV_PIX_FMT_NONE },
+    .hw_configs     = ff_qsv_hw_configs,
     .bsfs           = "hevc_mp4toannexb",
-};
-#endif
-
-#if CONFIG_H264_QSV_HWACCEL
-AVHWAccel ff_h264_qsv_hwaccel = {
-    .name           = "h264_qsv",
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_H264,
-    .pix_fmt        = AV_PIX_FMT_QSV,
+    .wrapper_name   = "qsv",
 };
 #endif
 
 #if CONFIG_H264_QSV_DECODER
 static const AVOption options[] = {
-    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VD },
+    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
     { NULL },
 };
 
@@ -256,12 +250,14 @@
     .decode         = qsv_decode_frame,
     .flush          = qsv_decode_flush,
     .close          = qsv_decode_close,
-    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_AVOID_PROBING,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HYBRID,
     .priv_class     = &class,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
                                                     AV_PIX_FMT_P010,
                                                     AV_PIX_FMT_QSV,
                                                     AV_PIX_FMT_NONE },
+    .hw_configs     = ff_qsv_hw_configs,
     .bsfs           = "h264_mp4toannexb",
+    .wrapper_name   = "qsv",
 };
 #endif

diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c
index b94093d..993c7a8 100644
--- a/libavcodec/qsvdec_other.c
+++ b/libavcodec/qsvdec_other.c

@@ -60,6 +60,11 @@
 {
     QSVOtherContext *s = avctx->priv_data;
 
+#if CONFIG_VP8_QSV_DECODER
+    if (avctx->codec_id == AV_CODEC_ID_VP8)
+        av_freep(&s->qsv.load_plugins);
+#endif
+
     ff_qsv_decode_close(&s->qsv);
 
     qsv_clear_buffers(s);
@@ -133,8 +138,13 @@
         }
 
         ret = ff_qsv_process_data(avctx, &s->qsv, frame, got_frame, &s->input_ref);
-        if (ret < 0)
+        if (ret < 0) {
+            /* Drop input packet when failed to decode the packet. Otherwise,
+               the decoder will keep decoding the failure packet. */
+            av_packet_unref(&s->input_ref);
+
             return ret;
+        }
 
         s->input_ref.size -= ret;
         s->input_ref.data += ret;
@@ -154,19 +164,10 @@
 #define OFFSET(x) offsetof(QSVOtherContext, x)
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
-    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VD },
+    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
     { NULL },
 };
 
-#if CONFIG_MPEG2_QSV_HWACCEL
-AVHWAccel ff_mpeg2_qsv_hwaccel = {
-    .name           = "mpeg2_qsv",
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_MPEG2VIDEO,
-    .pix_fmt        = AV_PIX_FMT_QSV,
-};
-#endif
-
 #if CONFIG_MPEG2_QSV_DECODER
 static const AVClass mpeg2_qsv_class = {
     .class_name = "mpeg2_qsv",
@@ -185,20 +186,13 @@
     .decode         = qsv_decode_frame,
     .flush          = qsv_decode_flush,
     .close          = qsv_decode_close,
-    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_AVOID_PROBING,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HYBRID,
     .priv_class     = &mpeg2_qsv_class,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
                                                     AV_PIX_FMT_QSV,
                                                     AV_PIX_FMT_NONE },
-};
-#endif
-
-#if CONFIG_VC1_QSV_HWACCEL
-AVHWAccel ff_vc1_qsv_hwaccel = {
-    .name           = "vc1_qsv",
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_VC1,
-    .pix_fmt        = AV_PIX_FMT_QSV,
+    .hw_configs     = ff_qsv_hw_configs,
+    .wrapper_name   = "qsv",
 };
 #endif
 
@@ -220,20 +214,13 @@
     .decode         = qsv_decode_frame,
     .flush          = qsv_decode_flush,
     .close          = qsv_decode_close,
-    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_AVOID_PROBING,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HYBRID,
     .priv_class     = &vc1_qsv_class,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
                                                     AV_PIX_FMT_QSV,
                                                     AV_PIX_FMT_NONE },
-};
-#endif
-
-#if CONFIG_VP8_QSV_HWACCEL
-AVHWAccel ff_vp8_qsv_hwaccel = {
-    .name           = "vp8_qsv",
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_VP8,
-    .pix_fmt        = AV_PIX_FMT_QSV,
+    .hw_configs     = ff_qsv_hw_configs,
+    .wrapper_name   = "qsv",
 };
 #endif
 
@@ -255,10 +242,12 @@
     .decode         = qsv_decode_frame,
     .flush          = qsv_decode_flush,
     .close          = qsv_decode_close,
-    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_AVOID_PROBING,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HYBRID,
     .priv_class     = &vp8_qsv_class,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
                                                     AV_PIX_FMT_QSV,
                                                     AV_PIX_FMT_NONE },
+    .hw_configs     = ff_qsv_hw_configs,
+    .wrapper_name   = "qsv",
 };
 #endif

diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index 5eb506f..948751d 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c

@@ -85,7 +85,9 @@
     { MFX_RATECONTROL_CBR,     "CBR" },
     { MFX_RATECONTROL_VBR,     "VBR" },
     { MFX_RATECONTROL_CQP,     "CQP" },
+#if QSV_HAVE_AVBR
     { MFX_RATECONTROL_AVBR,    "AVBR" },
+#endif
 #if QSV_HAVE_LA
     { MFX_RATECONTROL_LA,      "LA" },
 #endif
@@ -156,16 +158,19 @@
 #endif
         ) {
         av_log(avctx, AV_LOG_VERBOSE,
-               "InitialDelayInKB: %"PRIu16"; TargetKbps: %"PRIu16"; MaxKbps: %"PRIu16"\n",
-               info->InitialDelayInKB, info->TargetKbps, info->MaxKbps);
+               "BufferSizeInKB: %"PRIu16"; InitialDelayInKB: %"PRIu16"; TargetKbps: %"PRIu16"; MaxKbps: %"PRIu16"\n",
+               info->BufferSizeInKB, info->InitialDelayInKB, info->TargetKbps, info->MaxKbps);
     } else if (info->RateControlMethod == MFX_RATECONTROL_CQP) {
         av_log(avctx, AV_LOG_VERBOSE, "QPI: %"PRIu16"; QPP: %"PRIu16"; QPB: %"PRIu16"\n",
                info->QPI, info->QPP, info->QPB);
-    } else if (info->RateControlMethod == MFX_RATECONTROL_AVBR) {
+    }
+#if QSV_HAVE_AVBR
+    else if (info->RateControlMethod == MFX_RATECONTROL_AVBR) {
         av_log(avctx, AV_LOG_VERBOSE,
                "TargetKbps: %"PRIu16"; Accuracy: %"PRIu16"; Convergence: %"PRIu16"\n",
                info->TargetKbps, info->Accuracy, info->Convergence);
     }
+#endif
 #if QSV_HAVE_LA
     else if (info->RateControlMethod == MFX_RATECONTROL_LA
 #if QSV_HAVE_LA_HRD
@@ -287,6 +292,12 @@
         return AVERROR(EINVAL);
     }
 
+    if (!want_qscale && avctx->global_quality > 0 && !QSV_HAVE_ICQ){
+        av_log(avctx, AV_LOG_ERROR,
+               "ICQ ratecontrol mode requested, but is not supported by this SDK version\n");
+        return AVERROR(ENOSYS);
+    }
+
     if (want_qscale) {
         rc_mode = MFX_RATECONTROL_CQP;
         rc_desc = "constant quantization parameter (CQP)";
@@ -319,10 +330,14 @@
     else if (avctx->rc_max_rate == avctx->bit_rate) {
         rc_mode = MFX_RATECONTROL_CBR;
         rc_desc = "constant bitrate (CBR)";
-    } else if (!avctx->rc_max_rate) {
+    }
+#if QSV_HAVE_AVBR
+    else if (!avctx->rc_max_rate) {
         rc_mode = MFX_RATECONTROL_AVBR;
         rc_desc = "average variable bitrate (AVBR)";
-    } else {
+    }
+#endif
+    else {
         rc_mode = MFX_RATECONTROL_VBR;
         rc_desc = "variable bitrate (VBR)";
     }
@@ -333,18 +348,95 @@
     return 0;
 }
 
-static int rc_supported(QSVEncContext *q)
+static int check_enc_param(AVCodecContext *avctx, QSVEncContext *q)
 {
     mfxVideoParam param_out = { .mfx.CodecId = q->param.mfx.CodecId };
     mfxStatus ret;
 
+#define UNMATCH(x) (param_out.mfx.x != q->param.mfx.x)
+
     ret = MFXVideoENCODE_Query(q->session, &q->param, &param_out);
-    if (ret < 0 ||
-        param_out.mfx.RateControlMethod != q->param.mfx.RateControlMethod)
+
+    if (ret < 0) {
+        if (UNMATCH(CodecId))
+            av_log(avctx, AV_LOG_ERROR, "Current codec type is unsupported\n");
+        if (UNMATCH(CodecProfile))
+            av_log(avctx, AV_LOG_ERROR, "Current profile is unsupported\n");
+        if (UNMATCH(RateControlMethod))
+            av_log(avctx, AV_LOG_ERROR, "Selected ratecontrol mode is unsupported\n");
+        if (UNMATCH(LowPower))
+              av_log(avctx, AV_LOG_ERROR, "Low power mode is unsupported\n");
+        if (UNMATCH(FrameInfo.FrameRateExtN) || UNMATCH(FrameInfo.FrameRateExtD))
+              av_log(avctx, AV_LOG_ERROR, "Current frame rate is unsupported\n");
+        if (UNMATCH(FrameInfo.PicStruct))
+              av_log(avctx, AV_LOG_ERROR, "Current picture structure is unsupported\n");
+        if (UNMATCH(FrameInfo.Width) || UNMATCH(FrameInfo.Height))
+              av_log(avctx, AV_LOG_ERROR, "Current resolution is unsupported\n");
+        if (UNMATCH(FrameInfo.FourCC))
+              av_log(avctx, AV_LOG_ERROR, "Current pixel format is unsupported\n");
         return 0;
+    }
     return 1;
 }
 
+static int init_video_param_jpeg(AVCodecContext *avctx, QSVEncContext *q)
+{
+    enum AVPixelFormat sw_format = avctx->pix_fmt == AV_PIX_FMT_QSV ?
+                                   avctx->sw_pix_fmt : avctx->pix_fmt;
+    const AVPixFmtDescriptor *desc;
+    int ret;
+
+    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);
+    if (ret < 0)
+        return AVERROR_BUG;
+    q->param.mfx.CodecId = ret;
+
+    if (avctx->level > 0)
+        q->param.mfx.CodecLevel = avctx->level;
+    q->param.mfx.CodecProfile       = q->profile;
+
+    desc = av_pix_fmt_desc_get(sw_format);
+    if (!desc)
+        return AVERROR_BUG;
+
+    ff_qsv_map_pixfmt(sw_format, &q->param.mfx.FrameInfo.FourCC);
+
+    q->param.mfx.FrameInfo.CropX          = 0;
+    q->param.mfx.FrameInfo.CropY          = 0;
+    q->param.mfx.FrameInfo.CropW          = avctx->width;
+    q->param.mfx.FrameInfo.CropH          = avctx->height;
+    q->param.mfx.FrameInfo.AspectRatioW   = avctx->sample_aspect_ratio.num;
+    q->param.mfx.FrameInfo.AspectRatioH   = avctx->sample_aspect_ratio.den;
+    q->param.mfx.FrameInfo.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;
+    q->param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;
+    q->param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;
+    q->param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;
+
+    q->param.mfx.FrameInfo.Width  = FFALIGN(avctx->width, 16);
+    q->param.mfx.FrameInfo.Height = FFALIGN(avctx->height, 16);
+
+    if (avctx->hw_frames_ctx) {
+        AVHWFramesContext *frames_ctx    = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
+        AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx;
+        q->param.mfx.FrameInfo.Width  = frames_hwctx->surfaces[0].Info.Width;
+        q->param.mfx.FrameInfo.Height = frames_hwctx->surfaces[0].Info.Height;
+    }
+
+    if (avctx->framerate.den > 0 && avctx->framerate.num > 0) {
+        q->param.mfx.FrameInfo.FrameRateExtN = avctx->framerate.num;
+        q->param.mfx.FrameInfo.FrameRateExtD = avctx->framerate.den;
+    } else {
+        q->param.mfx.FrameInfo.FrameRateExtN  = avctx->time_base.den;
+        q->param.mfx.FrameInfo.FrameRateExtD  = avctx->time_base.num;
+    }
+
+    q->param.mfx.Interleaved          = 1;
+    q->param.mfx.Quality              = av_clip(avctx->global_quality, 1, 100);
+    q->param.mfx.RestartInterval      = 0;
+
+    return 0;
+}
+
 static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
 {
     enum AVPixelFormat sw_format = avctx->pix_fmt == AV_PIX_FMT_QSV ?
@@ -361,8 +453,19 @@
     if (avctx->level > 0)
         q->param.mfx.CodecLevel = avctx->level;
 
+    if (avctx->compression_level == FF_COMPRESSION_DEFAULT) {
+        avctx->compression_level = q->preset;
+    } else if (avctx->compression_level >= 0) {
+        if (avctx->compression_level > MFX_TARGETUSAGE_BEST_SPEED) {
+            av_log(avctx, AV_LOG_WARNING, "Invalid compression level: "
+                    "valid range is 0-%d, using %d instead\n",
+                    MFX_TARGETUSAGE_BEST_SPEED, MFX_TARGETUSAGE_BEST_SPEED);
+            avctx->compression_level = MFX_TARGETUSAGE_BEST_SPEED;
+        }
+    }
+
     q->param.mfx.CodecProfile       = q->profile;
-    q->param.mfx.TargetUsage        = q->preset;
+    q->param.mfx.TargetUsage        = avctx->compression_level;
     q->param.mfx.GopPicSize         = FFMAX(0, avctx->gop_size);
     q->param.mfx.GopRefDist         = FFMAX(-1, avctx->max_b_frames) + 1;
     q->param.mfx.GopOptFlag         = avctx->flags & AV_CODEC_FLAG_CLOSED_GOP ?
@@ -437,6 +540,7 @@
 #if QSV_HAVE_VCM
     case MFX_RATECONTROL_VCM:
 #endif
+        q->param.mfx.BufferSizeInKB   = avctx->rc_buffer_size / 8000;
         q->param.mfx.InitialDelayInKB = avctx->rc_initial_buffer_occupancy / 1000;
         q->param.mfx.TargetKbps       = avctx->bit_rate / 1000;
         q->param.mfx.MaxKbps          = avctx->rc_max_rate / 1000;
@@ -449,11 +553,13 @@
         q->param.mfx.QPB = av_clip(quant * fabs(avctx->b_quant_factor) + avctx->b_quant_offset, 0, 51);
 
         break;
+#if QSV_HAVE_AVBR
     case MFX_RATECONTROL_AVBR:
         q->param.mfx.TargetKbps  = avctx->bit_rate / 1000;
         q->param.mfx.Convergence = q->avbr_convergence;
         q->param.mfx.Accuracy    = q->avbr_accuracy;
         break;
+#endif
 #if QSV_HAVE_LA
     case MFX_RATECONTROL_LA:
         q->param.mfx.TargetKbps  = avctx->bit_rate / 1000;
@@ -474,14 +580,6 @@
     if (avctx->codec_id != AV_CODEC_ID_HEVC) {
         q->extco.Header.BufferId      = MFX_EXTBUFF_CODING_OPTION;
         q->extco.Header.BufferSz      = sizeof(q->extco);
-#if FF_API_CODER_TYPE
-FF_DISABLE_DEPRECATION_WARNINGS
-        if (avctx->coder_type != 0)
-            q->cavlc = avctx->coder_type == FF_CODER_TYPE_VLC;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-        q->extco.CAVLC = q->cavlc ? MFX_CODINGOPTION_ON
-                                  : MFX_CODINGOPTION_UNKNOWN;
 
         q->extco.PicTimingSEI         = q->pic_timing_sei ?
                                         MFX_CODINGOPTION_ON : MFX_CODINGOPTION_UNKNOWN;
@@ -490,6 +588,15 @@
             q->extco.RateDistortionOpt = q->rdo > 0 ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
 
         if (avctx->codec_id == AV_CODEC_ID_H264) {
+#if FF_API_CODER_TYPE
+FF_DISABLE_DEPRECATION_WARNINGS
+            if (avctx->coder_type >= 0)
+                q->cavlc = avctx->coder_type == FF_CODER_TYPE_VLC;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+            q->extco.CAVLC = q->cavlc ? MFX_CODINGOPTION_ON
+                                      : MFX_CODINGOPTION_UNKNOWN;
+
             if (avctx->strict_std_compliance != FF_COMPLIANCE_NORMAL)
                 q->extco.NalHrdConformance = avctx->strict_std_compliance > FF_COMPLIANCE_NORMAL ?
                                              MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
@@ -499,6 +606,7 @@
             if (q->recovery_point_sei >= 0)
                 q->extco.RecoveryPointSEI = q->recovery_point_sei ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
             q->extco.MaxDecFrameBuffering = q->max_dec_frame_buffering;
+            q->extco.AUDelimiter          = q->aud ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
         }
 
         q->extparam_internal[q->nb_extparam_internal++] = (mfxExtBuffer *)&q->extco;
@@ -533,6 +641,10 @@
             q->extco2.Trellis = q->trellis;
 #endif
 
+#if QSV_HAVE_LA_DS
+            q->extco2.LookAheadDS = q->look_ahead_downsampling;
+#endif
+
 #if QSV_HAVE_BREF_TYPE
 #if FF_API_PRIVATE_OPT
 FF_DISABLE_DEPRECATION_WARNINGS
@@ -548,25 +660,67 @@
                 q->extco2.AdaptiveB = q->adaptive_b ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
 #endif
 
-            q->extparam_internal[q->nb_extparam_internal++] = (mfxExtBuffer *)&q->extco2;
-
-#if QSV_HAVE_LA_DS
-            q->extco2.LookAheadDS           = q->look_ahead_downsampling;
+#if QSV_VERSION_ATLEAST(1, 9)
+            if (avctx->qmin >= 0 && avctx->qmax >= 0 && avctx->qmin > avctx->qmax) {
+                av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are set but invalid, please make sure min <= max\n");
+                return AVERROR(EINVAL);
+            }
+            if (avctx->qmin >= 0) {
+                q->extco2.MinQPI = avctx->qmin > 51 ? 51 : avctx->qmin;
+                q->extco2.MinQPP = q->extco2.MinQPB = q->extco2.MinQPI;
+            }
+            if (avctx->qmax >= 0) {
+                q->extco2.MaxQPI = avctx->qmax > 51 ? 51 : avctx->qmax;
+                q->extco2.MaxQPP = q->extco2.MaxQPB = q->extco2.MaxQPI;
+            }
 #endif
+            q->extparam_internal[q->nb_extparam_internal++] = (mfxExtBuffer *)&q->extco2;
+        }
+#endif
+#if QSV_HAVE_MF
+        if (avctx->codec_id == AV_CODEC_ID_H264) {
+            mfxVersion    ver;
+            ret = MFXQueryVersion(q->session,&ver);
+            if (ret >= MFX_ERR_NONE && QSV_RUNTIME_VERSION_ATLEAST(ver, 1, 25)) {
+                q->extmfp.Header.BufferId     = MFX_EXTBUFF_MULTI_FRAME_PARAM;
+                q->extmfp.Header.BufferSz     = sizeof(q->extmfp);
+
+                q->extmfp.MFMode = q->mfmode;
+                av_log(avctx,AV_LOG_VERBOSE,"MFMode:%d\n", q->extmfp.MFMode);
+                q->extparam_internal[q->nb_extparam_internal++] = (mfxExtBuffer *)&q->extmfp;
+            }
         }
 #endif
     }
 
-    if (!rc_supported(q)) {
+    if (!check_enc_param(avctx,q)) {
         av_log(avctx, AV_LOG_ERROR,
-               "Selected ratecontrol mode is not supported by the QSV "
-               "runtime. Choose a different mode.\n");
+               "some encoding parameters are not supported by the QSV "
+               "runtime. Please double check the input parameters.\n");
         return AVERROR(ENOSYS);
     }
 
     return 0;
 }
 
+static int qsv_retrieve_enc_jpeg_params(AVCodecContext *avctx, QSVEncContext *q)
+{
+    int ret = 0;
+
+    ret = MFXVideoENCODE_GetVideoParam(q->session, &q->param);
+    if (ret < 0)
+        return ff_qsv_print_error(avctx, ret,
+                                  "Error calling GetVideoParam");
+
+    q->packet_size = q->param.mfx.BufferSizeInKB * 1000;
+
+    // for qsv mjpeg the return value maybe 0 so alloc the buffer
+    if (q->packet_size == 0)
+        q->packet_size = q->param.mfx.FrameInfo.Height * q->param.mfx.FrameInfo.Width * 4;
+
+    return 0;
+}
+
 static int qsv_retrieve_enc_params(AVCodecContext *avctx, QSVEncContext *q)
 {
     AVCPBProperties *cpb_props;
@@ -648,7 +802,7 @@
     mfxFrameSurface1 *surfaces;
     int nb_surfaces, i;
 
-    nb_surfaces = qsv->nb_opaque_surfaces + q->req.NumFrameSuggested + q->async_depth;
+    nb_surfaces = qsv->nb_opaque_surfaces + q->req.NumFrameSuggested;
 
     q->opaque_alloc_buf = av_buffer_allocz(sizeof(*surfaces) * nb_surfaces);
     if (!q->opaque_alloc_buf)
@@ -719,6 +873,16 @@
     return 0;
 }
 
+static inline unsigned int qsv_fifo_item_size(void)
+{
+    return sizeof(AVPacket) + sizeof(mfxSyncPoint*) + sizeof(mfxBitstream*);
+}
+
+static inline unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
+{
+    return av_fifo_size(fifo)/qsv_fifo_item_size();
+}
+
 int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q)
 {
     int iopattern = 0;
@@ -727,8 +891,7 @@
 
     q->param.AsyncDepth = q->async_depth;
 
-    q->async_fifo = av_fifo_alloc((1 + q->async_depth) *
-                                  (sizeof(AVPacket) + sizeof(mfxSyncPoint*) + sizeof(mfxBitstream*)));
+    q->async_fifo = av_fifo_alloc(q->async_depth * qsv_fifo_item_size());
     if (!q->async_fifo)
         return AVERROR(ENOMEM);
 
@@ -760,7 +923,15 @@
     if (ret < 0)
         return ret;
 
-    ret = init_video_param(avctx, q);
+    // in the mfxInfoMFX struct, JPEG is different from other codecs
+    switch (avctx->codec_id) {
+    case AV_CODEC_ID_MJPEG:
+        ret = init_video_param_jpeg(avctx, q);
+        break;
+    default:
+        ret = init_video_param(avctx, q);
+        break;
+    }
     if (ret < 0)
         return ret;
 
@@ -820,7 +991,14 @@
         ff_qsv_print_warning(avctx, ret,
                              "Warning in encoder initialization");
 
-    ret = qsv_retrieve_enc_params(avctx, q);
+    switch (avctx->codec_id) {
+    case AV_CODEC_ID_MJPEG:
+        ret = qsv_retrieve_enc_jpeg_params(avctx, q);
+        break;
+    default:
+        ret = qsv_retrieve_enc_params(avctx, q);
+        break;
+    }
     if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error retrieving encoding parameters.\n");
         return ret;
@@ -848,7 +1026,9 @@
     while (cur) {
         if (cur->used && !cur->surface.Data.Locked) {
             free_encoder_ctrl_payloads(&cur->enc_ctrl);
-            av_frame_unref(cur->frame);
+            if (cur->frame->format == AV_PIX_FMT_QSV) {
+                av_frame_unref(cur->frame);
+            }
             cur->used = 0;
         }
         cur = cur->next;
@@ -921,16 +1101,23 @@
         }
     } else {
         /* make a copy if the input is not padded as libmfx requires */
-        if (frame->height & 31 || frame->linesize[0] & (q->width_align - 1)) {
+        /* and to make allocation continious for data[0]/data[1] */
+         if ((frame->height & 31 || frame->linesize[0] & (q->width_align - 1)) ||
+            (frame->data[1] - frame->data[0] != frame->linesize[0] * FFALIGN(qf->frame->height, q->height_align))) {
             qf->frame->height = FFALIGN(frame->height, q->height_align);
             qf->frame->width  = FFALIGN(frame->width, q->width_align);
 
-            ret = ff_get_buffer(q->avctx, qf->frame, AV_GET_BUFFER_FLAG_REF);
-            if (ret < 0)
-                return ret;
+            qf->frame->format = frame->format;
+
+            if (!qf->frame->data[0]) {
+                ret = av_frame_get_buffer(qf->frame, q->width_align);
+                if (ret < 0)
+                    return ret;
+            }
 
             qf->frame->height = frame->height;
             qf->frame->width  = frame->width;
+
             ret = av_frame_copy(qf->frame, frame);
             if (ret < 0) {
                 av_frame_unref(qf->frame);
@@ -975,7 +1162,7 @@
             q->param.mfx.CodecLevel > MFX_LEVEL_AVC_41)
             av_log(avctx, AV_LOG_WARNING,
                    "Interlaced coding is supported"
-                   " at Main/High Profile Level 2.1-4.1\n");
+                   " at Main/High Profile Level 2.2-4.0\n");
     }
 }
 
@@ -984,6 +1171,10 @@
 {
     AVPacket new_pkt = { 0 };
     mfxBitstream *bs;
+#if QSV_VERSION_ATLEAST(1, 26)
+    mfxExtAVCEncodedFrameInfo *enc_info;
+    mfxExtBuffer **enc_buf;
+#endif
 
     mfxFrameSurface1 *surf = NULL;
     mfxSyncPoint *sync     = NULL;
@@ -1017,6 +1208,24 @@
     bs->Data      = new_pkt.data;
     bs->MaxLength = new_pkt.size;
 
+#if QSV_VERSION_ATLEAST(1, 26)
+    if (avctx->codec_id == AV_CODEC_ID_H264) {
+        enc_info = av_mallocz(sizeof(*enc_info));
+        if (!enc_info)
+            return AVERROR(ENOMEM);
+
+        enc_info->Header.BufferId = MFX_EXTBUFF_ENCODED_FRAME_INFO;
+        enc_info->Header.BufferSz = sizeof (*enc_info);
+        bs->NumExtParam = 1;
+        enc_buf = av_mallocz(sizeof(mfxExtBuffer *));
+        if (!enc_buf)
+            return AVERROR(ENOMEM);
+        enc_buf[0] = (mfxExtBuffer *)enc_info;
+
+        bs->ExtParam = enc_buf;
+    }
+#endif
+
     if (q->set_encode_ctrl_cb) {
         q->set_encode_ctrl_cb(avctx, frame, &qsv_frame->enc_ctrl);
     }
@@ -1024,6 +1233,12 @@
     sync = av_mallocz(sizeof(*sync));
     if (!sync) {
         av_freep(&bs);
+ #if QSV_VERSION_ATLEAST(1, 26)
+        if (avctx->codec_id == AV_CODEC_ID_H264) {
+            av_freep(&enc_info);
+            av_freep(&enc_buf);
+        }
+ #endif
         av_packet_unref(&new_pkt);
         return AVERROR(ENOMEM);
     }
@@ -1040,6 +1255,12 @@
     if (ret < 0) {
         av_packet_unref(&new_pkt);
         av_freep(&bs);
+#if QSV_VERSION_ATLEAST(1, 26)
+        if (avctx->codec_id == AV_CODEC_ID_H264) {
+            av_freep(&enc_info);
+            av_freep(&enc_buf);
+        }
+#endif
         av_freep(&sync);
         return (ret == MFX_ERR_MORE_DATA) ?
                0 : ff_qsv_print_error(avctx, ret, "Error during encoding");
@@ -1056,6 +1277,12 @@
         av_freep(&sync);
         av_packet_unref(&new_pkt);
         av_freep(&bs);
+#if QSV_VERSION_ATLEAST(1, 26)
+        if (avctx->codec_id == AV_CODEC_ID_H264) {
+            av_freep(&enc_info);
+            av_freep(&enc_buf);
+        }
+#endif
     }
 
     return 0;
@@ -1070,11 +1297,16 @@
     if (ret < 0)
         return ret;
 
-    if (!av_fifo_space(q->async_fifo) ||
+    if ((qsv_fifo_size(q->async_fifo) >= q->async_depth) ||
         (!frame && av_fifo_size(q->async_fifo))) {
         AVPacket new_pkt;
         mfxBitstream *bs;
         mfxSyncPoint *sync;
+#if QSV_VERSION_ATLEAST(1, 26)
+        mfxExtAVCEncodedFrameInfo *enc_info;
+        mfxExtBuffer **enc_buf;
+#endif
+        enum AVPictureType pict_type;
 
         av_fifo_generic_read(q->async_fifo, &new_pkt, sizeof(new_pkt), NULL);
         av_fifo_generic_read(q->async_fifo, &sync,    sizeof(sync),    NULL);
@@ -1092,17 +1324,29 @@
             bs->FrameType & MFX_FRAMETYPE_xIDR)
             new_pkt.flags |= AV_PKT_FLAG_KEY;
 
+        if (bs->FrameType & MFX_FRAMETYPE_I || bs->FrameType & MFX_FRAMETYPE_xI)
+            pict_type = AV_PICTURE_TYPE_I;
+        else if (bs->FrameType & MFX_FRAMETYPE_P || bs->FrameType & MFX_FRAMETYPE_xP)
+            pict_type = AV_PICTURE_TYPE_P;
+        else if (bs->FrameType & MFX_FRAMETYPE_B || bs->FrameType & MFX_FRAMETYPE_xB)
+            pict_type = AV_PICTURE_TYPE_B;
+
 #if FF_API_CODED_FRAME
 FF_DISABLE_DEPRECATION_WARNINGS
-        if (bs->FrameType & MFX_FRAMETYPE_I || bs->FrameType & MFX_FRAMETYPE_xI)
-            avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
-        else if (bs->FrameType & MFX_FRAMETYPE_P || bs->FrameType & MFX_FRAMETYPE_xP)
-            avctx->coded_frame->pict_type = AV_PICTURE_TYPE_P;
-        else if (bs->FrameType & MFX_FRAMETYPE_B || bs->FrameType & MFX_FRAMETYPE_xB)
-            avctx->coded_frame->pict_type = AV_PICTURE_TYPE_B;
+        avctx->coded_frame->pict_type = pict_type;
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
+#if QSV_VERSION_ATLEAST(1, 26)
+        if (avctx->codec_id == AV_CODEC_ID_H264) {
+            enc_buf = bs->ExtParam;
+            enc_info = (mfxExtAVCEncodedFrameInfo *)(*bs->ExtParam);
+            ff_side_data_set_encoder_stats(&new_pkt,
+                enc_info->QP * FF_QP2LAMBDA, NULL, 0, pict_type);
+            av_freep(&enc_info);
+            av_freep(&enc_buf);
+        }
+#endif
         av_freep(&bs);
         av_freep(&sync);
 

diff --git a/libavcodec/qsvenc.h b/libavcodec/qsvenc.h
index 12e3444..50cc426 100644
--- a/libavcodec/qsvenc.h
+++ b/libavcodec/qsvenc.h

@@ -44,12 +44,30 @@
 #define QSV_HAVE_LA     QSV_VERSION_ATLEAST(1, 7)
 #define QSV_HAVE_LA_DS  QSV_VERSION_ATLEAST(1, 8)
 #define QSV_HAVE_LA_HRD QSV_VERSION_ATLEAST(1, 11)
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define QSV_HAVE_AVBR   QSV_VERSION_ATLEAST(1, 3)
 #define QSV_HAVE_ICQ    QSV_VERSION_ATLEAST(1, 8)
 #define QSV_HAVE_VCM    QSV_VERSION_ATLEAST(1, 8)
 #define QSV_HAVE_QVBR   QSV_VERSION_ATLEAST(1, 11)
+#define QSV_HAVE_MF     0
+#else
+#define QSV_HAVE_AVBR   0
+#define QSV_HAVE_ICQ    0
+#define QSV_HAVE_VCM    0
+#define QSV_HAVE_QVBR   0
+#define QSV_HAVE_MF     QSV_VERSION_ATLEAST(1, 25)
+#endif
+
+#if !QSV_HAVE_LA_DS
+#define MFX_LOOKAHEAD_DS_UNKNOWN 0
+#define MFX_LOOKAHEAD_DS_OFF 0
+#define MFX_LOOKAHEAD_DS_2x 0
+#define MFX_LOOKAHEAD_DS_4x 0
+#endif
 
 #define QSV_COMMON_OPTS \
-{ "async_depth", "Maximum processing parallelism", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VE },                          \
+{ "async_depth", "Maximum processing parallelism", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VE },                          \
 { "avbr_accuracy",    "Accuracy of the AVBR ratecontrol",    OFFSET(qsv.avbr_accuracy),    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },                             \
 { "avbr_convergence", "Convergence of the AVBR ratecontrol", OFFSET(qsv.avbr_convergence), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },                             \
 { "preset", NULL, OFFSET(qsv.preset), AV_OPT_TYPE_INT, { .i64 = MFX_TARGETUSAGE_BALANCED }, MFX_TARGETUSAGE_BEST_QUALITY, MFX_TARGETUSAGE_BEST_SPEED,   VE, "preset" }, \
@@ -60,7 +78,6 @@
 { "slow",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_3  },            INT_MIN, INT_MAX, VE, "preset" },                                                \
 { "slower",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_2  },            INT_MIN, INT_MAX, VE, "preset" },                                                \
 { "veryslow",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_BEST_QUALITY  }, INT_MIN, INT_MAX, VE, "preset" },                                                \
-{ "vcm",      "Use the video conferencing mode ratecontrol",  OFFSET(qsv.vcm),      AV_OPT_TYPE_INT, { .i64 = 0  },  0, 1,         VE },                                \
 { "rdo",            "Enable rate distortion optimization",    OFFSET(qsv.rdo),            AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
 { "max_frame_size", "Maximum encoded frame size in bytes",    OFFSET(qsv.max_frame_size), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, UINT16_MAX, VE },                         \
 { "max_slice_size", "Maximum encoded slice size in bytes",    OFFSET(qsv.max_slice_size), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, UINT16_MAX, VE },                         \
@@ -70,7 +87,6 @@
 { "adaptive_i",     "Adaptive I-frame placement",             OFFSET(qsv.adaptive_i),     AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
 { "adaptive_b",     "Adaptive B-frame placement",             OFFSET(qsv.adaptive_b),     AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
 { "b_strategy",     "Strategy to choose between I/P/B-frames", OFFSET(qsv.b_strategy),    AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
-{ "cavlc",          "Enable CAVLC",                           OFFSET(qsv.cavlc),          AV_OPT_TYPE_INT, { .i64 = 0 },   0,          1, VE },                         \
 
 typedef int SetEncodeCtrlCB (AVCodecContext *avctx,
                              const AVFrame *frame, mfxEncodeCtrl* enc_ctrl);
@@ -93,12 +109,15 @@
 #if QSV_HAVE_CO2
     mfxExtCodingOption2 extco2;
 #endif
-
+#if QSV_HAVE_MF
+    mfxExtMultiFrameParam   extmfp;
+    mfxExtMultiFrameControl extmfc;
+#endif
     mfxExtOpaqueSurfaceAlloc opaque_alloc;
     mfxFrameSurface1       **opaque_surfaces;
     AVBufferRef             *opaque_alloc_buf;
 
-    mfxExtBuffer  *extparam_internal[2 + QSV_HAVE_CO2];
+    mfxExtBuffer  *extparam_internal[2 + QSV_HAVE_CO2 + (QSV_HAVE_MF * 2)];
     int         nb_extparam_internal;
 
     mfxExtBuffer **extparam;
@@ -123,6 +142,8 @@
     int max_frame_size;
     int max_slice_size;
 
+    int aud;
+
     int single_sei_nal_unit;
     int max_dec_frame_buffering;
     int trellis;
@@ -141,6 +162,10 @@
     int recovery_point_sei;
 
     int a53_cc;
+
+#if QSV_HAVE_MF
+    int mfmode;
+#endif
     char *load_plugins;
     SetEncodeCtrlCB *set_encode_ctrl_cb;
 } QSVEncContext;

diff --git a/libavcodec/qsvenc_h264.c b/libavcodec/qsvenc_h264.c
index 389335f..07c9d64 100644
--- a/libavcodec/qsvenc_h264.c
+++ b/libavcodec/qsvenc_h264.c

@@ -1,5 +1,5 @@
 /*
- * Intel MediaSDK QSV based H.264 enccoder
+ * Intel MediaSDK QSV based H.264 encoder
  *
  * copyright (c) 2013 Yukinori Yamazoe
  *
@@ -102,20 +102,27 @@
 static const AVOption options[] = {
     QSV_COMMON_OPTS
 
+    { "cavlc",          "Enable CAVLC",                           OFFSET(qsv.cavlc),          AV_OPT_TYPE_INT, { .i64 = 0 },   0,          1, VE },
+#if QSV_HAVE_VCM
+    { "vcm",      "Use the video conferencing mode ratecontrol",  OFFSET(qsv.vcm),      AV_OPT_TYPE_INT, { .i64 = 0  },  0, 1,         VE },
+#endif
     { "idr_interval", "Distance (in I-frames) between IDR frames", OFFSET(qsv.idr_interval), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
     { "pic_timing_sei",    "Insert picture timing SEI with pic_struct_syntax element", OFFSET(qsv.pic_timing_sei), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, VE },
     { "single_sei_nal_unit",    "Put all the SEI messages into one NALU",        OFFSET(qsv.single_sei_nal_unit),     AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },
     { "max_dec_frame_buffering", "Maximum number of frames buffered in the DPB", OFFSET(qsv.max_dec_frame_buffering), AV_OPT_TYPE_INT, { .i64 = 0 },   0, UINT16_MAX, VE },
 
 #if QSV_HAVE_LA
-    { "look_ahead",       "Use VBR algorithm with look ahead",    OFFSET(qsv.look_ahead),       AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, VE },
+    { "look_ahead",       "Use VBR algorithm with look ahead",    OFFSET(qsv.look_ahead),       AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
     { "look_ahead_depth", "Depth of look ahead in number frames", OFFSET(qsv.look_ahead_depth), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 100, VE },
 #endif
 #if QSV_HAVE_LA_DS
-    { "look_ahead_downsampling", NULL, OFFSET(qsv.look_ahead_downsampling), AV_OPT_TYPE_INT, { .i64 = MFX_LOOKAHEAD_DS_UNKNOWN }, MFX_LOOKAHEAD_DS_UNKNOWN, MFX_LOOKAHEAD_DS_2x, VE, "look_ahead_downsampling" },
+    { "look_ahead_downsampling", "Downscaling factor for the frames saved for the lookahead analysis", OFFSET(qsv.look_ahead_downsampling),
+                                          AV_OPT_TYPE_INT,   { .i64 = MFX_LOOKAHEAD_DS_UNKNOWN }, MFX_LOOKAHEAD_DS_UNKNOWN, MFX_LOOKAHEAD_DS_4x, VE, "look_ahead_downsampling" },
     { "unknown"                , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_LOOKAHEAD_DS_UNKNOWN }, INT_MIN, INT_MAX,     VE, "look_ahead_downsampling" },
+    { "auto"                   , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_LOOKAHEAD_DS_UNKNOWN }, INT_MIN, INT_MAX,     VE, "look_ahead_downsampling" },
     { "off"                    , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_LOOKAHEAD_DS_OFF     }, INT_MIN, INT_MAX,     VE, "look_ahead_downsampling" },
     { "2x"                     , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_LOOKAHEAD_DS_2x      }, INT_MIN, INT_MAX,     VE, "look_ahead_downsampling" },
+    { "4x"                     , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_LOOKAHEAD_DS_4x      }, INT_MIN, INT_MAX,     VE, "look_ahead_downsampling" },
 #endif
 
     { "int_ref_type", "Intra refresh type",                                      OFFSET(qsv.int_ref_type),            AV_OPT_TYPE_INT, { .i64 = -1 }, -1, UINT16_MAX, VE, "int_ref_type" },
@@ -138,6 +145,15 @@
     { "high"    , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_AVC_HIGH     }, INT_MIN, INT_MAX,     VE, "profile" },
 
     { "a53cc" , "Use A53 Closed Captions (if available)", OFFSET(qsv.a53_cc), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, VE},
+
+    { "aud", "Insert the Access Unit Delimiter NAL", OFFSET(qsv.aud), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
+
+#if QSV_HAVE_MF
+    { "mfmode", "Multi-Frame Mode", OFFSET(qsv.mfmode), AV_OPT_TYPE_INT, { .i64 = MFX_MF_AUTO }, MFX_MF_DEFAULT, MFX_MF_AUTO, VE, "mfmode"},
+    { "off"    , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_MF_DISABLED }, INT_MIN, INT_MAX,     VE, "mfmode" },
+    { "auto"   , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_MF_AUTO     }, INT_MIN, INT_MAX,     VE, "mfmode" },
+#endif
+
     { NULL },
 };
 
@@ -154,7 +170,11 @@
     // same as the x264 default
     { "g",         "250"   },
     { "bf",        "3"     },
-    { "coder",     "ac"    },
+    { "qmin",      "-1"    },
+    { "qmax",      "-1"    },
+#if FF_API_CODER_TYPE
+    { "coder",     "-1"    },
+#endif
 
     { "flags",     "+cgop" },
 #if FF_API_PRIVATE_OPT
@@ -172,7 +192,7 @@
     .init           = qsv_enc_init,
     .encode2        = qsv_enc_frame,
     .close          = qsv_enc_close,
-    .capabilities   = AV_CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
                                                     AV_PIX_FMT_P010,
                                                     AV_PIX_FMT_QSV,
@@ -180,4 +200,5 @@
     .priv_class     = &class,
     .defaults       = qsv_enc_defaults,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+    .wrapper_name   = "qsv",
 };

diff --git a/libavcodec/qsvenc_hevc.c b/libavcodec/qsvenc_hevc.c
index b0e5ace..4339b31 100644
--- a/libavcodec/qsvenc_hevc.c
+++ b/libavcodec/qsvenc_hevc.c

@@ -56,6 +56,7 @@
     PutByteContext pbc;
 
     GetBitContext gb;
+    H2645RBSP sps_rbsp = { NULL };
     H2645NAL sps_nal = { NULL };
     HEVCSPS sps = { 0 };
     HEVCVPS vps = { 0 };
@@ -69,8 +70,12 @@
         return AVERROR_UNKNOWN;
     }
 
+    av_fast_padded_malloc(&sps_rbsp.rbsp_buffer, &sps_rbsp.rbsp_buffer_alloc_size, avctx->extradata_size);
+    if (!sps_rbsp.rbsp_buffer)
+        return AVERROR(ENOMEM);
+
     /* parse the SPS */
-    ret = ff_h2645_extract_rbsp(avctx->extradata + 4, avctx->extradata_size - 4, &sps_nal, 1);
+    ret = ff_h2645_extract_rbsp(avctx->extradata + 4, avctx->extradata_size - 4, &sps_rbsp, &sps_nal, 1);
     if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error unescaping the SPS buffer\n");
         return ret;
@@ -78,7 +83,7 @@
 
     ret = init_get_bits8(&gb, sps_nal.data, sps_nal.size);
     if (ret < 0) {
-        av_freep(&sps_nal.rbsp_buffer);
+        av_freep(&sps_rbsp.rbsp_buffer);
         return ret;
     }
 
@@ -87,13 +92,13 @@
     if (type != HEVC_NAL_SPS) {
         av_log(avctx, AV_LOG_ERROR, "Unexpected NAL type in the extradata: %d\n",
                type);
-        av_freep(&sps_nal.rbsp_buffer);
+        av_freep(&sps_rbsp.rbsp_buffer);
         return AVERROR_INVALIDDATA;
     }
     get_bits(&gb, 9);
 
     ret = ff_hevc_parse_sps(&sps, &gb, &sps_id, 0, NULL, avctx);
-    av_freep(&sps_nal.rbsp_buffer);
+    av_freep(&sps_rbsp.rbsp_buffer);
     if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error parsing the SPS\n");
         return ret;
@@ -181,6 +186,9 @@
         }
     }
 
+    // HEVC and H264 meaning of the value is shifted by 1, make it consistent
+    q->qsv.idr_interval++;
+
     ret = ff_qsv_enc_init(avctx, &q->qsv);
     if (ret < 0)
         return ret;
@@ -209,12 +217,20 @@
     return ff_qsv_enc_close(avctx, &q->qsv);
 }
 
+#if defined(_WIN32)
+#define LOAD_PLUGIN_DEFAULT LOAD_PLUGIN_HEVC_SW
+#else
+#define LOAD_PLUGIN_DEFAULT LOAD_PLUGIN_HEVC_HW
+#endif
+
 #define OFFSET(x) offsetof(QSVHEVCEncContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
     QSV_COMMON_OPTS
 
-    { "load_plugin", "A user plugin to load in an internal session", OFFSET(load_plugin), AV_OPT_TYPE_INT, { .i64 = LOAD_PLUGIN_HEVC_SW }, LOAD_PLUGIN_NONE, LOAD_PLUGIN_HEVC_HW, VE, "load_plugin" },
+    { "idr_interval", "Distance (in I-frames) between IDR frames", OFFSET(qsv.idr_interval), AV_OPT_TYPE_INT, { .i64 = 0 }, -1, INT_MAX, VE, "idr_interval" },
+    { "begin_only", "Output an IDR-frame only at the beginning of the stream", 0, AV_OPT_TYPE_CONST, { .i64 = -1 }, 0, 0, VE, "idr_interval" },
+    { "load_plugin", "A user plugin to load in an internal session", OFFSET(load_plugin), AV_OPT_TYPE_INT, { .i64 = LOAD_PLUGIN_DEFAULT }, LOAD_PLUGIN_NONE, LOAD_PLUGIN_HEVC_HW, VE, "load_plugin" },
     { "none",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_NONE },    0, 0, VE, "load_plugin" },
     { "hevc_sw",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_HEVC_SW }, 0, 0, VE, "load_plugin" },
     { "hevc_hw",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_HEVC_HW }, 0, 0, VE, "load_plugin" },
@@ -261,7 +277,7 @@
     .init           = qsv_enc_init,
     .encode2        = qsv_enc_frame,
     .close          = qsv_enc_close,
-    .capabilities   = AV_CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
                                                     AV_PIX_FMT_P010,
                                                     AV_PIX_FMT_QSV,
@@ -269,4 +285,5 @@
     .priv_class     = &class,
     .defaults       = qsv_enc_defaults,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+    .wrapper_name   = "qsv",
 };

diff --git a/libavcodec/qsvenc_jpeg.c b/libavcodec/qsvenc_jpeg.c
new file mode 100644
index 0000000..c18fe91
--- /dev/null
+++ b/libavcodec/qsvenc_jpeg.c

@@ -0,0 +1,92 @@
+/*
+ * Intel MediaSDK QSV based MJPEG encoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <mfx/mfxvideo.h>
+
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "avcodec.h"
+#include "internal.h"
+#include "h264.h"
+#include "qsv.h"
+#include "qsv_internal.h"
+#include "qsvenc.h"
+
+typedef struct QSVMJPEGEncContext {
+    AVClass *class;
+    QSVEncContext qsv;
+} QSVMJPEGEncContext;
+
+static av_cold int qsv_enc_init(AVCodecContext *avctx)
+{
+    QSVMJPEGEncContext *q = avctx->priv_data;
+
+    return ff_qsv_enc_init(avctx, &q->qsv);
+}
+
+static int qsv_enc_frame(AVCodecContext *avctx, AVPacket *pkt,
+                         const AVFrame *frame, int *got_packet)
+{
+    QSVMJPEGEncContext *q = avctx->priv_data;
+
+    return ff_qsv_encode(avctx, &q->qsv, pkt, frame, got_packet);
+}
+
+static av_cold int qsv_enc_close(AVCodecContext *avctx)
+{
+    QSVMJPEGEncContext *q = avctx->priv_data;
+
+    return ff_qsv_enc_close(avctx, &q->qsv);
+}
+
+#define OFFSET(x) offsetof(QSVMJPEGEncContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { NULL },
+};
+
+static const AVClass class = {
+    .class_name = "mjpeg_qsv encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_mjpeg_qsv_encoder = {
+    .name           = "mjpeg_qsv",
+    .long_name      = NULL_IF_CONFIG_SMALL("MJPEG (Intel Quick Sync Video acceleration)"),
+    .priv_data_size = sizeof(QSVMJPEGEncContext),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MJPEG,
+    .init           = qsv_enc_init,
+    .encode2        = qsv_enc_frame,
+    .close          = qsv_enc_close,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID,
+    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
+                                                    AV_PIX_FMT_QSV,
+                                                    AV_PIX_FMT_NONE },
+    .priv_class     = &class,
+    .wrapper_name   = "qsv",
+};

diff --git a/libavcodec/qsvenc_mpeg2.c b/libavcodec/qsvenc_mpeg2.c
index 5b583fb..a7427d8 100644
--- a/libavcodec/qsvenc_mpeg2.c
+++ b/libavcodec/qsvenc_mpeg2.c

@@ -104,11 +104,12 @@
     .init           = qsv_enc_init,
     .encode2        = qsv_enc_frame,
     .close          = qsv_enc_close,
-    .capabilities   = AV_CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
                                                     AV_PIX_FMT_QSV,
                                                     AV_PIX_FMT_NONE },
     .priv_class     = &class,
     .defaults       = qsv_enc_defaults,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+    .wrapper_name   = "qsv",
 };

diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
index 1b0d201..6155b4f 100644
--- a/libavcodec/qtrle.c
+++ b/libavcodec/qtrle.c

@@ -155,6 +155,8 @@
         CHECK_PIXEL_PTR(0);
 
         while ((rle_code = (int8_t)bytestream2_get_byte(&s->g)) != -1) {
+            if (bytestream2_get_bytes_left(&s->g) < 1)
+                return;
             if (rle_code == 0) {
                 /* there's another skip code in the stream */
                 pixel_ptr += (num_pixels * (bytestream2_get_byte(&s->g) - 1));
@@ -210,6 +212,8 @@
         CHECK_PIXEL_PTR(0);
 
         while ((rle_code = (int8_t)bytestream2_get_byte(&s->g)) != -1) {
+            if (bytestream2_get_bytes_left(&s->g) < 1)
+                return;
             if (rle_code == 0) {
                 /* there's another skip code in the stream */
                 pixel_ptr += (4 * (bytestream2_get_byte(&s->g) - 1));
@@ -259,6 +263,8 @@
         CHECK_PIXEL_PTR(0);
 
         while ((rle_code = (int8_t)bytestream2_get_byte(&s->g)) != -1) {
+            if (bytestream2_get_bytes_left(&s->g) < 1)
+                return;
             if (rle_code == 0) {
                 /* there's another skip code in the stream */
                 pixel_ptr += (bytestream2_get_byte(&s->g) - 1) * 2;
@@ -303,6 +309,8 @@
         CHECK_PIXEL_PTR(0);
 
         while ((rle_code = (int8_t)bytestream2_get_byte(&s->g)) != -1) {
+            if (bytestream2_get_bytes_left(&s->g) < 1)
+                return;
             if (rle_code == 0) {
                 /* there's another skip code in the stream */
                 pixel_ptr += (bytestream2_get_byte(&s->g) - 1) * 3;
@@ -350,6 +358,8 @@
         CHECK_PIXEL_PTR(0);
 
         while ((rle_code = (int8_t)bytestream2_get_byte(&s->g)) != -1) {
+            if (bytestream2_get_bytes_left(&s->g) < 1)
+                return;
             if (rle_code == 0) {
                 /* there's another skip code in the stream */
                 pixel_ptr += (bytestream2_get_byte(&s->g) - 1) * 4;
@@ -433,12 +443,10 @@
     int ret;
 
     bytestream2_init(&s->g, avpkt->data, avpkt->size);
-    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
-        return ret;
 
     /* check if this frame is even supposed to change */
     if (avpkt->size < 8)
-        goto done;
+        return avpkt->size;
 
     /* start after the chunk size */
     bytestream2_seek(&s->g, 4, SEEK_SET);
@@ -449,17 +457,20 @@
     /* if a header is present, fetch additional decoding parameters */
     if (header & 0x0008) {
         if (avpkt->size < 14)
-            goto done;
+            return avpkt->size;
         start_line = bytestream2_get_be16(&s->g);
         bytestream2_skip(&s->g, 2);
         height     = bytestream2_get_be16(&s->g);
         bytestream2_skip(&s->g, 2);
         if (height > s->avctx->height - start_line)
-            goto done;
+            return avpkt->size;
     } else {
         start_line = 0;
         height     = s->avctx->height;
     }
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
+        return ret;
+
     row_ptr = s->frame->linesize[0] * start_line;
 
     switch (avctx->bits_per_coded_sample) {
@@ -520,7 +531,6 @@
         memcpy(s->frame->data[1], s->pal, AVPALETTE_SIZE);
     }
 
-done:
     if ((ret = av_frame_ref(data, s->frame)) < 0)
         return ret;
     *got_frame      = 1;

diff --git a/libavcodec/ra144.c b/libavcodec/ra144.c
index c077b7b..573703d 100644
--- a/libavcodec/ra144.c
+++ b/libavcodec/ra144.c

@@ -1516,7 +1516,7 @@
 
     if (v[0]) {
         for (i=0; i < BLOCKSIZE; i++)
-            dest[i] = (s1[i]*v[0] + s2[i]*v[1] + s3[i]*v[2]) >> 12;
+            dest[i] = ((int)(s1[i]*(unsigned)v[0]) + s2[i]*v[1] + s3[i]*v[2]) >> 12;
     } else {
         for (i=0; i < BLOCKSIZE; i++)
             dest[i] = (             s2[i]*v[1] + s3[i]*v[2]) >> 12;
@@ -1569,11 +1569,11 @@
         b = 0x1000000 / b;
         for (j=0; j <= i; j++) {
 #if CONFIG_FTRAPV
-            int a = bp2[j] - ((refl[i+1] * bp2[i-j]) >> 12);
+            int a = bp2[j] - ((int)(refl[i+1] * (unsigned)bp2[i-j]) >> 12);
             if((int)(a*(unsigned)b) != a*(int64_t)b)
                 return 1;
 #endif
-            bp1[j] = (int)((bp2[j] - ((refl[i+1] * bp2[i-j]) >> 12)) * (unsigned)b) >> 12;
+            bp1[j] = (int)((bp2[j] - ((int)(refl[i+1] * (unsigned)bp2[i-j]) >> 12)) * (unsigned)b) >> 12;
         }
 
         if ((unsigned) bp1[i] + 0x1000 > 0x1fff)

diff --git a/libavcodec/rasc.c b/libavcodec/rasc.c
new file mode 100644
index 0000000..e8e0740
--- /dev/null
+++ b/libavcodec/rasc.c

@@ -0,0 +1,812 @@
+/*
+ * RemotelyAnywhere Screen Capture decoder
+ *
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+
+#include <zlib.h>
+
+#define KBND MKTAG('K', 'B', 'N', 'D')
+#define FINT MKTAG('F', 'I', 'N', 'T')
+#define INIT MKTAG('I', 'N', 'I', 'T')
+#define BNDL MKTAG('B', 'N', 'D', 'L')
+#define KFRM MKTAG('K', 'F', 'R', 'M')
+#define DLTA MKTAG('D', 'L', 'T', 'A')
+#define MOUS MKTAG('M', 'O', 'U', 'S')
+#define MPOS MKTAG('M', 'P', 'O', 'S')
+#define MOVE MKTAG('M', 'O', 'V', 'E')
+#define EMPT MKTAG('E', 'M', 'P', 'T')
+
+typedef struct RASCContext {
+    AVClass        *class;
+    int             skip_cursor;
+    GetByteContext  gb;
+    uint8_t        *delta;
+    int             delta_size;
+    uint8_t        *cursor;
+    int             cursor_size;
+    unsigned        cursor_w;
+    unsigned        cursor_h;
+    unsigned        cursor_x;
+    unsigned        cursor_y;
+    int             stride;
+    int             bpp;
+    z_stream        zstream;
+    AVFrame        *frame;
+    AVFrame        *frame1;
+    AVFrame        *frame2;
+} RASCContext;
+
+static void clear_plane(AVCodecContext *avctx, AVFrame *frame)
+{
+    RASCContext *s = avctx->priv_data;
+    uint8_t *dst = frame->data[0];
+
+    for (int y = 0; y < avctx->height; y++) {
+        memset(dst, 0, avctx->width * s->bpp);
+        dst += frame->linesize[0];
+    }
+}
+
+static void copy_plane(AVCodecContext *avctx, AVFrame *src, AVFrame *dst)
+{
+    RASCContext *s = avctx->priv_data;
+    uint8_t *srcp = src->data[0];
+    uint8_t *dstp = dst->data[0];
+
+    for (int y = 0; y < avctx->height; y++) {
+        memcpy(dstp, srcp, s->stride);
+        srcp += src->linesize[0];
+        dstp += dst->linesize[0];
+    }
+}
+
+static int init_frames(AVCodecContext *avctx)
+{
+    RASCContext *s = avctx->priv_data;
+    int ret;
+
+    av_frame_unref(s->frame1);
+    av_frame_unref(s->frame2);
+    if ((ret = ff_get_buffer(avctx, s->frame1, 0)) < 0)
+        return ret;
+
+    if ((ret = ff_get_buffer(avctx, s->frame2, 0)) < 0)
+        return ret;
+
+    clear_plane(avctx, s->frame2);
+    clear_plane(avctx, s->frame1);
+
+    return 0;
+}
+
+static int decode_fint(AVCodecContext *avctx,
+                       AVPacket *avpkt, unsigned size)
+{
+    RASCContext *s = avctx->priv_data;
+    GetByteContext *gb = &s->gb;
+    unsigned w, h, fmt;
+    int ret;
+
+    if (bytestream2_peek_le32(gb) != 0x65) {
+        if (!s->frame2->data[0] || !s->frame1->data[0])
+            return AVERROR_INVALIDDATA;
+
+        clear_plane(avctx, s->frame2);
+        clear_plane(avctx, s->frame1);
+        return 0;
+    }
+
+    bytestream2_skip(gb, 8);
+    w = bytestream2_get_le32(gb);
+    h = bytestream2_get_le32(gb);
+    bytestream2_skip(gb, 30);
+    fmt = bytestream2_get_le16(gb);
+    bytestream2_skip(gb, 24);
+
+    switch (fmt) {
+    case 8:  s->stride = FFALIGN(w, 4);
+             s->bpp    = 1;
+             fmt = AV_PIX_FMT_PAL8; break;
+    case 16: s->stride = w * 2;
+             s->bpp    = 2;
+             fmt = AV_PIX_FMT_RGB555LE; break;
+    case 32: s->stride = w * 4;
+             s->bpp    = 4;
+             fmt = AV_PIX_FMT_BGR0; break;
+    default: return AVERROR_INVALIDDATA;
+    }
+
+    ret = ff_set_dimensions(avctx, w, h);
+    if (ret < 0)
+        return ret;
+    avctx->width  = w;
+    avctx->height = h;
+    avctx->pix_fmt = fmt;
+
+    ret = init_frames(avctx);
+    if (ret < 0)
+        return ret;
+
+    if (avctx->pix_fmt == AV_PIX_FMT_PAL8) {
+        uint32_t *pal = (uint32_t *)s->frame2->data[1];
+
+        for (int i = 0; i < 256; i++)
+            pal[i] = bytestream2_get_le32(gb) | 0xFF000000u;
+    }
+
+    return 0;
+}
+
+static int decode_zlib(AVCodecContext *avctx, AVPacket *avpkt,
+                       unsigned size, unsigned uncompressed_size)
+{
+    RASCContext *s = avctx->priv_data;
+    GetByteContext *gb = &s->gb;
+    int zret;
+
+    zret = inflateReset(&s->zstream);
+    if (zret != Z_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", zret);
+        return AVERROR_EXTERNAL;
+    }
+
+    av_fast_padded_malloc(&s->delta, &s->delta_size, uncompressed_size);
+    if (!s->delta)
+        return AVERROR(ENOMEM);
+
+    s->zstream.next_in  = avpkt->data + bytestream2_tell(gb);
+    s->zstream.avail_in = FFMIN(size, bytestream2_get_bytes_left(gb));
+
+    s->zstream.next_out  = s->delta;
+    s->zstream.avail_out = s->delta_size;
+
+    zret = inflate(&s->zstream, Z_FINISH);
+    if (zret != Z_STREAM_END) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Inflate failed with return code: %d.\n", zret);
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+static int decode_move(AVCodecContext *avctx,
+                       AVPacket *avpkt, unsigned size)
+{
+    RASCContext *s = avctx->priv_data;
+    GetByteContext *gb = &s->gb;
+    GetByteContext mc;
+    unsigned pos, compression, nb_moves;
+    unsigned uncompressed_size;
+    int ret;
+
+    pos = bytestream2_tell(gb);
+    bytestream2_skip(gb, 8);
+    nb_moves = bytestream2_get_le32(gb);
+    bytestream2_skip(gb, 8);
+    compression = bytestream2_get_le32(gb);
+
+    if (nb_moves > INT32_MAX / 16)
+        return AVERROR_INVALIDDATA;
+
+    uncompressed_size = 16 * nb_moves;
+
+    if (compression == 1) {
+        ret = decode_zlib(avctx, avpkt,
+                          size - (bytestream2_tell(gb) - pos),
+                          uncompressed_size);
+        if (ret < 0)
+            return ret;
+        bytestream2_init(&mc, s->delta, uncompressed_size);
+    } else if (compression == 0) {
+        bytestream2_init(&mc, avpkt->data + bytestream2_tell(gb),
+                         bytestream2_get_bytes_left(gb));
+    } else if (compression == 2) {
+        avpriv_request_sample(avctx, "compression %d", compression);
+        return AVERROR_PATCHWELCOME;
+    } else {
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (bytestream2_get_bytes_left(&mc) < uncompressed_size)
+        return AVERROR_INVALIDDATA;
+
+    for (int i = 0; i < nb_moves; i++) {
+        int type, start_x, start_y, end_x, end_y, mov_x, mov_y;
+        uint8_t *e2, *b1, *b2;
+        int w, h;
+
+        type = bytestream2_get_le16(&mc);
+        start_x = bytestream2_get_le16(&mc);
+        start_y = bytestream2_get_le16(&mc);
+        end_x = bytestream2_get_le16(&mc);
+        end_y = bytestream2_get_le16(&mc);
+        mov_x = bytestream2_get_le16(&mc);
+        mov_y = bytestream2_get_le16(&mc);
+        bytestream2_skip(&mc, 2);
+
+        if (start_x >= avctx->width || start_y >= avctx->height ||
+            end_x >= avctx->width || end_y >= avctx->height ||
+            mov_x >= avctx->width || mov_y >= avctx->height) {
+            continue;
+        }
+
+        if (start_x >= end_x || start_y >= end_y)
+            continue;
+
+        w = end_x - start_x;
+        h = end_y - start_y;
+
+        if (mov_x + w > avctx->width || mov_y + h > avctx->height)
+            continue;
+
+        if (!s->frame2->data[0] || !s->frame1->data[0])
+            return AVERROR_INVALIDDATA;
+
+        b1 = s->frame1->data[0] + s->frame1->linesize[0] * (start_y + h - 1) + start_x * s->bpp;
+        b2 = s->frame2->data[0] + s->frame2->linesize[0] * (start_y + h - 1) + start_x * s->bpp;
+        e2 = s->frame2->data[0] + s->frame2->linesize[0] * (mov_y + h - 1) + mov_x * s->bpp;
+
+        if (type == 2) {
+            for (int j = 0; j < h; j++) {
+                memcpy(b1, b2, w * s->bpp);
+                b1 -= s->frame1->linesize[0];
+                b2 -= s->frame2->linesize[0];
+            }
+        } else if (type == 1) {
+            for (int j = 0; j < h; j++) {
+                memset(b2, 0, w * s->bpp);
+                b2 -= s->frame2->linesize[0];
+            }
+        } else if (type == 0) {
+            uint8_t *buffer;
+
+            av_fast_padded_malloc(&s->delta, &s->delta_size, w * h * s->bpp);
+            buffer = s->delta;
+            if (!buffer)
+                return AVERROR(ENOMEM);
+
+            for (int j = 0; j < h; j++) {
+                memcpy(buffer + j * w * s->bpp, e2, w * s->bpp);
+                e2 -= s->frame2->linesize[0];
+            }
+
+            for (int j = 0; j < h; j++) {
+                memcpy(b2, buffer + j * w * s->bpp, w * s->bpp);
+                b2 -= s->frame2->linesize[0];
+            }
+        } else {
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    bytestream2_skip(gb, size - (bytestream2_tell(gb) - pos));
+
+    return 0;
+}
+
+#define NEXT_LINE                        \
+    if (cx >= w * s->bpp) {              \
+        cx = 0;                          \
+        cy--;                            \
+        b1 -= s->frame1->linesize[0];    \
+        b2 -= s->frame2->linesize[0];    \
+    }                                    \
+    len--;
+
+static int decode_dlta(AVCodecContext *avctx,
+                       AVPacket *avpkt, unsigned size)
+{
+    RASCContext *s = avctx->priv_data;
+    GetByteContext *gb = &s->gb;
+    GetByteContext dc;
+    unsigned uncompressed_size, pos;
+    unsigned x, y, w, h;
+    int ret, cx, cy, compression;
+    uint8_t *b1, *b2;
+
+    pos = bytestream2_tell(gb);
+    bytestream2_skip(gb, 12);
+    uncompressed_size = bytestream2_get_le32(gb);
+    x = bytestream2_get_le32(gb);
+    y = bytestream2_get_le32(gb);
+    w = bytestream2_get_le32(gb);
+    h = bytestream2_get_le32(gb);
+
+    if (x >= avctx->width || y >= avctx->height ||
+        w > avctx->width || h > avctx->height)
+        return AVERROR_INVALIDDATA;
+
+    if (x + w > avctx->width || y + h > avctx->height)
+        return AVERROR_INVALIDDATA;
+
+    bytestream2_skip(gb, 4);
+    compression = bytestream2_get_le32(gb);
+
+    if (compression == 1) {
+        ret = decode_zlib(avctx, avpkt, size, uncompressed_size);
+        if (ret < 0)
+            return ret;
+        bytestream2_init(&dc, s->delta, uncompressed_size);
+    } else if (compression == 0) {
+        if (bytestream2_get_bytes_left(gb) < uncompressed_size)
+            return AVERROR_INVALIDDATA;
+        bytestream2_init(&dc, avpkt->data + bytestream2_tell(gb),
+                         uncompressed_size);
+    } else if (compression == 2) {
+        avpriv_request_sample(avctx, "compression %d", compression);
+        return AVERROR_PATCHWELCOME;
+    } else {
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (!s->frame2->data[0] || !s->frame1->data[0])
+        return AVERROR_INVALIDDATA;
+
+    b1  = s->frame1->data[0] + s->frame1->linesize[0] * (y + h - 1) + x * s->bpp;
+    b2  = s->frame2->data[0] + s->frame2->linesize[0] * (y + h - 1) + x * s->bpp;
+    cx = 0, cy = h;
+    while (bytestream2_get_bytes_left(&dc) > 0) {
+        int type = bytestream2_get_byte(&dc);
+        int len = bytestream2_get_byte(&dc);
+        unsigned fill;
+
+        switch (type) {
+        case 1:
+            while (len > 0 && cy > 0) {
+                cx++;
+                NEXT_LINE
+            }
+            break;
+        case 2:
+            while (len > 0 && cy > 0) {
+                int v0 = b1[cx];
+                int v1 = b2[cx];
+
+                b2[cx] = v0;
+                b1[cx] = v1;
+                cx++;
+                NEXT_LINE
+            }
+            break;
+        case 3:
+            while (len > 0 && cy > 0) {
+                fill = bytestream2_get_byte(&dc);
+                b1[cx] = b2[cx];
+                b2[cx] = fill;
+                cx++;
+                NEXT_LINE
+            }
+            break;
+        case 4:
+            fill = bytestream2_get_byte(&dc);
+            while (len > 0 && cy > 0) {
+                AV_WL32(b1 + cx, AV_RL32(b2 + cx));
+                AV_WL32(b2 + cx, fill);
+                cx++;
+                NEXT_LINE
+            }
+            break;
+        case 7:
+            fill = bytestream2_get_le32(&dc);
+            while (len > 0 && cy > 0) {
+                AV_WL32(b1 + cx, AV_RL32(b2 + cx));
+                AV_WL32(b2 + cx, fill);
+                cx += 4;
+                NEXT_LINE
+            }
+            break;
+        case 10:
+            while (len > 0 && cy > 0) {
+                cx += 4;
+                NEXT_LINE
+            }
+            break;
+        case 12:
+            while (len > 0 && cy > 0) {
+                unsigned v0, v1;
+
+                v0 = AV_RL32(b2 + cx);
+                v1 = AV_RL32(b1 + cx);
+                AV_WL32(b2 + cx, v1);
+                AV_WL32(b1 + cx, v0);
+                cx += 4;
+                NEXT_LINE
+            }
+            break;
+        case 13:
+            while (len > 0 && cy > 0) {
+                fill = bytestream2_get_le32(&dc);
+                AV_WL32(b1 + cx, AV_RL32(b2 + cx));
+                AV_WL32(b2 + cx, fill);
+                cx += 4;
+                NEXT_LINE
+            }
+            break;
+        default:
+            avpriv_request_sample(avctx, "runlen %d", type);
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    bytestream2_skip(gb, size - (bytestream2_tell(gb) - pos));
+
+    return 0;
+}
+
+static int decode_kfrm(AVCodecContext *avctx,
+                       AVPacket *avpkt, unsigned size)
+{
+    RASCContext *s = avctx->priv_data;
+    GetByteContext *gb = &s->gb;
+    uint8_t *dst;
+    unsigned pos;
+    int zret, ret;
+
+    pos = bytestream2_tell(gb);
+    if (bytestream2_peek_le32(gb) == 0x65) {
+        ret = decode_fint(avctx, avpkt, size);
+        if (ret < 0)
+            return ret;
+    }
+
+    if (!s->frame2->data[0])
+        return AVERROR_INVALIDDATA;
+
+    zret = inflateReset(&s->zstream);
+    if (zret != Z_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", zret);
+        return AVERROR_EXTERNAL;
+    }
+
+    s->zstream.next_in  = avpkt->data + bytestream2_tell(gb);
+    s->zstream.avail_in = bytestream2_get_bytes_left(gb);
+
+    dst = s->frame2->data[0] + (avctx->height - 1) * s->frame2->linesize[0];
+    for (int i = 0; i < avctx->height; i++) {
+        s->zstream.next_out  = dst;
+        s->zstream.avail_out = s->stride;
+
+        zret = inflate(&s->zstream, Z_SYNC_FLUSH);
+        if (zret != Z_OK && zret != Z_STREAM_END) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Inflate failed with return code: %d.\n", zret);
+            return AVERROR_INVALIDDATA;
+        }
+
+        dst -= s->frame2->linesize[0];
+    }
+
+    dst = s->frame1->data[0] + (avctx->height - 1) * s->frame1->linesize[0];
+    for (int i = 0; i < avctx->height; i++) {
+        s->zstream.next_out  = dst;
+        s->zstream.avail_out = s->stride;
+
+        zret = inflate(&s->zstream, Z_SYNC_FLUSH);
+        if (zret != Z_OK && zret != Z_STREAM_END) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Inflate failed with return code: %d.\n", zret);
+            return AVERROR_INVALIDDATA;
+        }
+
+        dst -= s->frame1->linesize[0];
+    }
+
+    bytestream2_skip(gb, size - (bytestream2_tell(gb) - pos));
+
+    return 0;
+}
+
+static int decode_mous(AVCodecContext *avctx,
+                       AVPacket *avpkt, unsigned size)
+{
+    RASCContext *s = avctx->priv_data;
+    GetByteContext *gb = &s->gb;
+    unsigned w, h, pos, uncompressed_size;
+    int ret;
+
+    pos = bytestream2_tell(gb);
+    bytestream2_skip(gb, 8);
+    w = bytestream2_get_le32(gb);
+    h = bytestream2_get_le32(gb);
+    bytestream2_skip(gb, 12);
+    uncompressed_size = bytestream2_get_le32(gb);
+
+    if (w > avctx->width || h > avctx->height)
+        return AVERROR_INVALIDDATA;
+
+    if (uncompressed_size != 3 * w * h)
+        return AVERROR_INVALIDDATA;
+
+    av_fast_padded_malloc(&s->cursor, &s->cursor_size, uncompressed_size);
+    if (!s->cursor)
+        return AVERROR(ENOMEM);
+
+    ret = decode_zlib(avctx, avpkt,
+                      size - (bytestream2_tell(gb) - pos),
+                      uncompressed_size);
+    if (ret < 0)
+        return ret;
+    memcpy(s->cursor, s->delta, uncompressed_size);
+
+    bytestream2_skip(gb, size - (bytestream2_tell(gb) - pos));
+
+    s->cursor_w = w;
+    s->cursor_h = h;
+
+    return 0;
+}
+
+static int decode_mpos(AVCodecContext *avctx,
+                       AVPacket *avpkt, unsigned size)
+{
+    RASCContext *s = avctx->priv_data;
+    GetByteContext *gb = &s->gb;
+    unsigned pos;
+
+    pos = bytestream2_tell(gb);
+    bytestream2_skip(gb, 8);
+    s->cursor_x = bytestream2_get_le32(gb);
+    s->cursor_y = bytestream2_get_le32(gb);
+
+    bytestream2_skip(gb, size - (bytestream2_tell(gb) - pos));
+
+    return 0;
+}
+
+static void draw_cursor(AVCodecContext *avctx)
+{
+    RASCContext *s = avctx->priv_data;
+    uint8_t *dst, *pal;
+
+    if (!s->cursor)
+        return;
+
+    if (s->cursor_x >= avctx->width || s->cursor_y >= avctx->height)
+        return;
+
+    if (s->cursor_x + s->cursor_w > avctx->width ||
+        s->cursor_y + s->cursor_h > avctx->height)
+        return;
+
+    if (avctx->pix_fmt == AV_PIX_FMT_PAL8) {
+        pal = s->frame->data[1];
+        for (int i = 0; i < s->cursor_h; i++) {
+            for (int j = 0; j < s->cursor_w; j++) {
+                int cr = s->cursor[3 * s->cursor_w * (s->cursor_h - i - 1) + 3 * j + 0];
+                int cg = s->cursor[3 * s->cursor_w * (s->cursor_h - i - 1) + 3 * j + 1];
+                int cb = s->cursor[3 * s->cursor_w * (s->cursor_h - i - 1) + 3 * j + 2];
+                int best = INT_MAX;
+                int index = 0;
+                int dist;
+
+                if (cr == s->cursor[0] && cg == s->cursor[1] && cb == s->cursor[2])
+                    continue;
+
+                dst = s->frame->data[0] + s->frame->linesize[0] * (s->cursor_y + i) + (s->cursor_x + j);
+                for (int k = 0; k < 256; k++) {
+                    int pr = pal[k * 4 + 0];
+                    int pg = pal[k * 4 + 1];
+                    int pb = pal[k * 4 + 2];
+
+                    dist = FFABS(cr - pr) + FFABS(cg - pg) + FFABS(cb - pb);
+                    if (dist < best) {
+                        best = dist;
+                        index = k;
+                    }
+                }
+                dst[0] = index;
+            }
+        }
+    } else if (avctx->pix_fmt == AV_PIX_FMT_RGB555LE) {
+        for (int i = 0; i < s->cursor_h; i++) {
+            for (int j = 0; j < s->cursor_w; j++) {
+                int cr = s->cursor[3 * s->cursor_w * (s->cursor_h - i - 1) + 3 * j + 0];
+                int cg = s->cursor[3 * s->cursor_w * (s->cursor_h - i - 1) + 3 * j + 1];
+                int cb = s->cursor[3 * s->cursor_w * (s->cursor_h - i - 1) + 3 * j + 2];
+
+                if (cr == s->cursor[0] && cg == s->cursor[1] && cb == s->cursor[2])
+                    continue;
+
+                cr >>= 3; cg >>=3; cb >>= 3;
+                dst = s->frame->data[0] + s->frame->linesize[0] * (s->cursor_y + i) + 2 * (s->cursor_x + j);
+                AV_WL16(dst, cr | cg << 5 | cb << 10);
+            }
+        }
+    } else if (avctx->pix_fmt == AV_PIX_FMT_BGR0) {
+        for (int i = 0; i < s->cursor_h; i++) {
+            for (int j = 0; j < s->cursor_w; j++) {
+                int cr = s->cursor[3 * s->cursor_w * (s->cursor_h - i - 1) + 3 * j + 0];
+                int cg = s->cursor[3 * s->cursor_w * (s->cursor_h - i - 1) + 3 * j + 1];
+                int cb = s->cursor[3 * s->cursor_w * (s->cursor_h - i - 1) + 3 * j + 2];
+
+                if (cr == s->cursor[0] && cg == s->cursor[1] && cb == s->cursor[2])
+                    continue;
+
+                dst = s->frame->data[0] + s->frame->linesize[0] * (s->cursor_y + i) + 4 * (s->cursor_x + j);
+                dst[0] = cb;
+                dst[1] = cg;
+                dst[2] = cr;
+            }
+        }
+    }
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *got_frame,
+                        AVPacket *avpkt)
+{
+    RASCContext *s = avctx->priv_data;
+    GetByteContext *gb = &s->gb;
+    int ret, intra = 0;
+    AVFrame *frame = data;
+
+    bytestream2_init(gb, avpkt->data, avpkt->size);
+
+    if (bytestream2_peek_le32(gb) == EMPT)
+        return avpkt->size;
+
+    s->frame = frame;
+
+    while (bytestream2_get_bytes_left(gb) > 0) {
+        unsigned type, size = 0;
+
+        type = bytestream2_get_le32(gb);
+        if (type == KBND || type == BNDL) {
+            intra = type == KBND;
+            type = bytestream2_get_le32(gb);
+        }
+
+        size = bytestream2_get_le32(gb);
+        if (bytestream2_get_bytes_left(gb) < size)
+            return AVERROR_INVALIDDATA;
+
+        switch (type) {
+        case FINT:
+        case INIT:
+            ret = decode_fint(avctx, avpkt, size);
+            break;
+        case KFRM:
+            ret = decode_kfrm(avctx, avpkt, size);
+            break;
+        case DLTA:
+            ret = decode_dlta(avctx, avpkt, size);
+            break;
+        case MOVE:
+            ret = decode_move(avctx, avpkt, size);
+            break;
+        case MOUS:
+            ret = decode_mous(avctx, avpkt, size);
+            break;
+        case MPOS:
+            ret = decode_mpos(avctx, avpkt, size);
+            break;
+        default:
+            bytestream2_skip(gb, size);
+        }
+
+        if (ret < 0)
+            return ret;
+    }
+
+    if ((ret = ff_get_buffer(avctx, s->frame, 0)) < 0)
+        return ret;
+
+    if (!s->frame2->data[0] || !s->frame1->data[0])
+        return AVERROR_INVALIDDATA;
+
+    copy_plane(avctx, s->frame2, s->frame);
+    if (avctx->pix_fmt == AV_PIX_FMT_PAL8)
+        memcpy(s->frame->data[1], s->frame2->data[1], 1024);
+    if (!s->skip_cursor)
+        draw_cursor(avctx);
+
+    s->frame->key_frame = intra;
+    s->frame->pict_type = intra ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    RASCContext *s = avctx->priv_data;
+    int zret;
+
+    s->zstream.zalloc = Z_NULL;
+    s->zstream.zfree = Z_NULL;
+    s->zstream.opaque = Z_NULL;
+    zret = inflateInit(&s->zstream);
+    if (zret != Z_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Inflate init error: %d\n", zret);
+        return AVERROR_EXTERNAL;
+    }
+
+    s->frame1 = av_frame_alloc();
+    s->frame2 = av_frame_alloc();
+    if (!s->frame1 || !s->frame2)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static av_cold int decode_close(AVCodecContext *avctx)
+{
+    RASCContext *s = avctx->priv_data;
+
+    av_freep(&s->cursor);
+    s->cursor_size = 0;
+    av_freep(&s->delta);
+    s->delta_size = 0;
+    av_frame_free(&s->frame1);
+    av_frame_free(&s->frame2);
+    inflateEnd(&s->zstream);
+
+    return 0;
+}
+
+static void decode_flush(AVCodecContext *avctx)
+{
+    RASCContext *s = avctx->priv_data;
+
+    clear_plane(avctx, s->frame1);
+    clear_plane(avctx, s->frame2);
+}
+
+static const AVOption options[] = {
+{ "skip_cursor", "skip the cursor", offsetof(RASCContext, skip_cursor), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM },
+{ NULL },
+};
+
+static const AVClass rasc_decoder_class = {
+    .class_name = "rasc decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_rasc_decoder = {
+    .name             = "rasc",
+    .long_name        = NULL_IF_CONFIG_SMALL("RemotelyAnywhere Screen Capture"),
+    .type             = AVMEDIA_TYPE_VIDEO,
+    .id               = AV_CODEC_ID_RASC,
+    .priv_data_size   = sizeof(RASCContext),
+    .init             = decode_init,
+    .close            = decode_close,
+    .decode           = decode_frame,
+    .flush            = decode_flush,
+    .capabilities     = AV_CODEC_CAP_DR1,
+    .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
+                        FF_CODEC_CAP_INIT_CLEANUP,
+    .priv_class       = &rasc_decoder_class,
+};

diff --git a/libavcodec/raw.c b/libavcodec/raw.c
index 8da2a97..d731c08 100644
--- a/libavcodec/raw.c
+++ b/libavcodec/raw.c

@@ -125,6 +125,8 @@
     { AV_PIX_FMT_GRAY10BE,    MKTAG(10 ,  0 , '1', 'Y') },
     { AV_PIX_FMT_GRAY12LE,    MKTAG('Y', '1',  0 , 12 ) },
     { AV_PIX_FMT_GRAY12BE,    MKTAG(12 ,  0 , '1', 'Y') },
+    { AV_PIX_FMT_GRAY14LE,    MKTAG('Y', '1',  0 , 14 ) },
+    { AV_PIX_FMT_GRAY14BE,    MKTAG(14 ,  0 , '1', 'Y') },
     { AV_PIX_FMT_GRAY16LE,    MKTAG('Y', '1',  0 , 16 ) },
     { AV_PIX_FMT_GRAY16BE,    MKTAG(16 ,  0 , '1', 'Y') },
     { AV_PIX_FMT_YUV420P9LE,  MKTAG('Y', '3', 11 ,  9 ) },

diff --git a/libavcodec/raw.h b/libavcodec/raw.h
index 24bf4cc..28a27b1 100644
--- a/libavcodec/raw.h
+++ b/libavcodec/raw.h

@@ -28,6 +28,7 @@
 #define AVCODEC_RAW_H
 
 #include "avcodec.h"
+#include "internal.h"
 #include "libavutil/internal.h"
 
 typedef struct PixelFormatTag {
@@ -41,7 +42,7 @@
 
 enum AVPixelFormat avpriv_find_pix_fmt(const PixelFormatTag *tags, unsigned int fourcc);
 
-extern av_export const PixelFormatTag avpriv_pix_fmt_bps_avi[];
-extern av_export const PixelFormatTag avpriv_pix_fmt_bps_mov[];
+extern av_export_avcodec const PixelFormatTag avpriv_pix_fmt_bps_avi[];
+extern av_export_avcodec const PixelFormatTag avpriv_pix_fmt_bps_mov[];
 
 #endif /* AVCODEC_RAW_H */

diff --git a/libavcodec/rawdec.c b/libavcodec/rawdec.c
index 1893b26..53f5b76 100644
--- a/libavcodec/rawdec.c
+++ b/libavcodec/rawdec.c

@@ -92,12 +92,14 @@
         return AVERROR(EINVAL);
     }
 
-    if (desc->flags & (AV_PIX_FMT_FLAG_PAL | AV_PIX_FMT_FLAG_PSEUDOPAL)) {
+    if (desc->flags & (AV_PIX_FMT_FLAG_PAL | FF_PSEUDOPAL)) {
         context->palette = av_buffer_alloc(AVPALETTE_SIZE);
         if (!context->palette)
             return AVERROR(ENOMEM);
+#if FF_API_PSEUDOPAL
         if (desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL)
             avpriv_set_systematic_pal2((uint32_t*)context->palette->data, avctx->pix_fmt);
+#endif
         else {
             memset(context->palette->data, 0, AVPALETTE_SIZE);
             if (avctx->bits_per_coded_sample == 1)
@@ -423,7 +425,7 @@
     }
 
     if ((avctx->pix_fmt == AV_PIX_FMT_PAL8 && buf_size < context->frame_size) ||
-        (desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL)) {
+        (desc->flags & FF_PSEUDOPAL)) {
         frame->buf[1]  = av_buffer_ref(context->palette);
         if (!frame->buf[1]) {
             av_buffer_unref(&frame->buf[0]);

diff --git a/libavcodec/remove_extradata_bsf.c b/libavcodec/remove_extradata_bsf.c
index a54bbdb..b762079 100644
--- a/libavcodec/remove_extradata_bsf.c
+++ b/libavcodec/remove_extradata_bsf.c

@@ -38,30 +38,26 @@
     AVCodecContext *avctx;
 } RemoveExtradataContext;
 
-static int remove_extradata(AVBSFContext *ctx, AVPacket *out)
+static int remove_extradata(AVBSFContext *ctx, AVPacket *pkt)
 {
     RemoveExtradataContext *s = ctx->priv_data;
 
-    AVPacket *in;
     int ret;
 
-    ret = ff_bsf_get_packet(ctx, &in);
+    ret = ff_bsf_get_packet_ref(ctx, pkt);
     if (ret < 0)
         return ret;
 
     if (s->parser && s->parser->parser->split) {
         if (s->freq == REMOVE_FREQ_ALL ||
-            (s->freq == REMOVE_FREQ_NONKEYFRAME && !(in->flags & AV_PKT_FLAG_KEY)) ||
-            (s->freq == REMOVE_FREQ_KEYFRAME && in->flags & AV_PKT_FLAG_KEY)) {
-            int i = s->parser->parser->split(s->avctx, in->data, in->size);
-            in->data += i;
-            in->size -= i;
+            (s->freq == REMOVE_FREQ_NONKEYFRAME && !(pkt->flags & AV_PKT_FLAG_KEY)) ||
+            (s->freq == REMOVE_FREQ_KEYFRAME && pkt->flags & AV_PKT_FLAG_KEY)) {
+            int i = s->parser->parser->split(s->avctx, pkt->data, pkt->size);
+            pkt->data += i;
+            pkt->size -= i;
         }
     }
 
-    av_packet_move_ref(out, in);
-    av_packet_free(&in);
-
     return 0;
 }
 
@@ -94,12 +90,13 @@
 }
 
 #define OFFSET(x) offsetof(RemoveExtradataContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_BSF_PARAM)
 static const AVOption options[] = {
-    { "freq", NULL, OFFSET(freq), AV_OPT_TYPE_INT, { .i64 = REMOVE_FREQ_KEYFRAME }, REMOVE_FREQ_KEYFRAME, REMOVE_FREQ_NONKEYFRAME, 0, "freq" },
-        { "k",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE_FREQ_NONKEYFRAME }, .unit = "freq" },
-        { "keyframe", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE_FREQ_KEYFRAME }, .unit = "freq" },
-        { "e",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE_FREQ_ALL      }, .unit = "freq" },
-        { "all",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE_FREQ_ALL      }, .unit = "freq" },
+    { "freq", NULL, OFFSET(freq), AV_OPT_TYPE_INT, { .i64 = REMOVE_FREQ_KEYFRAME }, REMOVE_FREQ_KEYFRAME, REMOVE_FREQ_NONKEYFRAME, FLAGS, "freq" },
+        { "k",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE_FREQ_NONKEYFRAME }, .flags = FLAGS, .unit = "freq" },
+        { "keyframe", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE_FREQ_KEYFRAME }, .flags = FLAGS, .unit = "freq" },
+        { "e",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE_FREQ_ALL      }, .flags = FLAGS, .unit = "freq" },
+        { "all",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE_FREQ_ALL      }, .flags = FLAGS, .unit = "freq" },
     { NULL },
 };
 

diff --git a/libavcodec/resample.c b/libavcodec/resample.c
deleted file mode 100644
index 4c5eb9f..0000000
--- a/libavcodec/resample.c
+++ /dev/null

@@ -1,439 +0,0 @@
-/*
- * samplerate conversion for both audio and video
- * Copyright (c) 2000 Fabrice Bellard
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * samplerate conversion for both audio and video
- */
-
-#include <string.h>
-
-#include "avcodec.h"
-#include "audioconvert.h"
-#include "libavutil/opt.h"
-#include "libavutil/mem.h"
-#include "libavutil/samplefmt.h"
-
-#if FF_API_AVCODEC_RESAMPLE
-FF_DISABLE_DEPRECATION_WARNINGS
-
-#define MAX_CHANNELS 8
-
-struct AVResampleContext;
-
-static const char *context_to_name(void *ptr)
-{
-    return "audioresample";
-}
-
-static const AVOption options[] = {{NULL}};
-static const AVClass audioresample_context_class = {
-    "ReSampleContext", context_to_name, options, LIBAVUTIL_VERSION_INT
-};
-
-struct ReSampleContext {
-    struct AVResampleContext *resample_context;
-    short *temp[MAX_CHANNELS];
-    int temp_len;
-    float ratio;
-    /* channel convert */
-    int input_channels, output_channels, filter_channels;
-    AVAudioConvert *convert_ctx[2];
-    enum AVSampleFormat sample_fmt[2]; ///< input and output sample format
-    unsigned sample_size[2];           ///< size of one sample in sample_fmt
-    short *buffer[2];                  ///< buffers used for conversion to S16
-    unsigned buffer_size[2];           ///< sizes of allocated buffers
-};
-
-/* n1: number of samples */
-static void stereo_to_mono(short *output, short *input, int n1)
-{
-    short *p, *q;
-    int n = n1;
-
-    p = input;
-    q = output;
-    while (n >= 4) {
-        q[0] = (p[0] + p[1]) >> 1;
-        q[1] = (p[2] + p[3]) >> 1;
-        q[2] = (p[4] + p[5]) >> 1;
-        q[3] = (p[6] + p[7]) >> 1;
-        q += 4;
-        p += 8;
-        n -= 4;
-    }
-    while (n > 0) {
-        q[0] = (p[0] + p[1]) >> 1;
-        q++;
-        p += 2;
-        n--;
-    }
-}
-
-/* n1: number of samples */
-static void mono_to_stereo(short *output, short *input, int n1)
-{
-    short *p, *q;
-    int n = n1;
-    int v;
-
-    p = input;
-    q = output;
-    while (n >= 4) {
-        v = p[0]; q[0] = v; q[1] = v;
-        v = p[1]; q[2] = v; q[3] = v;
-        v = p[2]; q[4] = v; q[5] = v;
-        v = p[3]; q[6] = v; q[7] = v;
-        q += 8;
-        p += 4;
-        n -= 4;
-    }
-    while (n > 0) {
-        v = p[0]; q[0] = v; q[1] = v;
-        q += 2;
-        p += 1;
-        n--;
-    }
-}
-
-/*
-5.1 to stereo input: [fl, fr, c, lfe, rl, rr]
-- Left = front_left + rear_gain * rear_left + center_gain * center
-- Right = front_right + rear_gain * rear_right + center_gain * center
-Where rear_gain is usually around 0.5-1.0 and
-      center_gain is almost always 0.7 (-3 dB)
-*/
-static void surround_to_stereo(short **output, short *input, int channels, int samples)
-{
-    int i;
-    short l, r;
-
-    for (i = 0; i < samples; i++) {
-        int fl,fr,c,rl,rr;
-        fl = input[0];
-        fr = input[1];
-        c = input[2];
-        // lfe = input[3];
-        rl = input[4];
-        rr = input[5];
-
-        l = av_clip_int16(fl + (0.5 * rl) + (0.7 * c));
-        r = av_clip_int16(fr + (0.5 * rr) + (0.7 * c));
-
-        /* output l & r. */
-        *output[0]++ = l;
-        *output[1]++ = r;
-
-        /* increment input. */
-        input += channels;
-    }
-}
-
-static void deinterleave(short **output, short *input, int channels, int samples)
-{
-    int i, j;
-
-    for (i = 0; i < samples; i++) {
-        for (j = 0; j < channels; j++) {
-            *output[j]++ = *input++;
-        }
-    }
-}
-
-static void interleave(short *output, short **input, int channels, int samples)
-{
-    int i, j;
-
-    for (i = 0; i < samples; i++) {
-        for (j = 0; j < channels; j++) {
-            *output++ = *input[j]++;
-        }
-    }
-}
-
-static void ac3_5p1_mux(short *output, short *input1, short *input2, int n)
-{
-    int i;
-    short l, r;
-
-    for (i = 0; i < n; i++) {
-        l = *input1++;
-        r = *input2++;
-        *output++ = l;                  /* left */
-        *output++ = (l / 2) + (r / 2);  /* center */
-        *output++ = r;                  /* right */
-        *output++ = 0;                  /* left surround */
-        *output++ = 0;                  /* right surroud */
-        *output++ = 0;                  /* low freq */
-    }
-}
-
-#define SUPPORT_RESAMPLE(ch1, ch2, ch3, ch4, ch5, ch6, ch7, ch8) \
-    ch8<<7 | ch7<<6 | ch6<<5 | ch5<<4 | ch4<<3 | ch3<<2 | ch2<<1 | ch1<<0
-
-static const uint8_t supported_resampling[MAX_CHANNELS] = {
-    // output ch:    1  2  3  4  5  6  7  8
-    SUPPORT_RESAMPLE(1, 1, 0, 0, 0, 0, 0, 0), // 1 input channel
-    SUPPORT_RESAMPLE(1, 1, 0, 0, 0, 1, 0, 0), // 2 input channels
-    SUPPORT_RESAMPLE(0, 0, 1, 0, 0, 0, 0, 0), // 3 input channels
-    SUPPORT_RESAMPLE(0, 0, 0, 1, 0, 0, 0, 0), // 4 input channels
-    SUPPORT_RESAMPLE(0, 0, 0, 0, 1, 0, 0, 0), // 5 input channels
-    SUPPORT_RESAMPLE(0, 1, 0, 0, 0, 1, 0, 0), // 6 input channels
-    SUPPORT_RESAMPLE(0, 0, 0, 0, 0, 0, 1, 0), // 7 input channels
-    SUPPORT_RESAMPLE(0, 0, 0, 0, 0, 0, 0, 1), // 8 input channels
-};
-
-ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
-                                        int output_rate, int input_rate,
-                                        enum AVSampleFormat sample_fmt_out,
-                                        enum AVSampleFormat sample_fmt_in,
-                                        int filter_length, int log2_phase_count,
-                                        int linear, double cutoff)
-{
-    ReSampleContext *s;
-
-    if (input_channels > MAX_CHANNELS) {
-        av_log(NULL, AV_LOG_ERROR,
-               "Resampling with input channels greater than %d is unsupported.\n",
-               MAX_CHANNELS);
-        return NULL;
-    }
-    if (!(supported_resampling[input_channels-1] & (1<<(output_channels-1)))) {
-        int i;
-        av_log(NULL, AV_LOG_ERROR, "Unsupported audio resampling. Allowed "
-               "output channels for %d input channel%s", input_channels,
-               input_channels > 1 ? "s:" : ":");
-        for (i = 0; i < MAX_CHANNELS; i++)
-            if (supported_resampling[input_channels-1] & (1<<i))
-                av_log(NULL, AV_LOG_ERROR, " %d", i + 1);
-        av_log(NULL, AV_LOG_ERROR, "\n");
-        return NULL;
-    }
-
-    s = av_mallocz(sizeof(ReSampleContext));
-    if (!s) {
-        av_log(NULL, AV_LOG_ERROR, "Can't allocate memory for resample context.\n");
-        return NULL;
-    }
-
-    s->ratio = (float)output_rate / (float)input_rate;
-
-    s->input_channels = input_channels;
-    s->output_channels = output_channels;
-
-    s->filter_channels = s->input_channels;
-    if (s->output_channels < s->filter_channels)
-        s->filter_channels = s->output_channels;
-
-    s->sample_fmt[0]  = sample_fmt_in;
-    s->sample_fmt[1]  = sample_fmt_out;
-    s->sample_size[0] = av_get_bytes_per_sample(s->sample_fmt[0]);
-    s->sample_size[1] = av_get_bytes_per_sample(s->sample_fmt[1]);
-
-    if (s->sample_fmt[0] != AV_SAMPLE_FMT_S16) {
-        if (!(s->convert_ctx[0] = av_audio_convert_alloc(AV_SAMPLE_FMT_S16, 1,
-                                                         s->sample_fmt[0], 1, NULL, 0))) {
-            av_log(s, AV_LOG_ERROR,
-                   "Cannot convert %s sample format to s16 sample format\n",
-                   av_get_sample_fmt_name(s->sample_fmt[0]));
-            av_free(s);
-            return NULL;
-        }
-    }
-
-    if (s->sample_fmt[1] != AV_SAMPLE_FMT_S16) {
-        if (!(s->convert_ctx[1] = av_audio_convert_alloc(s->sample_fmt[1], 1,
-                                                         AV_SAMPLE_FMT_S16, 1, NULL, 0))) {
-            av_log(s, AV_LOG_ERROR,
-                   "Cannot convert s16 sample format to %s sample format\n",
-                   av_get_sample_fmt_name(s->sample_fmt[1]));
-            av_audio_convert_free(s->convert_ctx[0]);
-            av_free(s);
-            return NULL;
-        }
-    }
-
-    s->resample_context = av_resample_init(output_rate, input_rate,
-                                           filter_length, log2_phase_count,
-                                           linear, cutoff);
-
-    *(const AVClass**)s->resample_context = &audioresample_context_class;
-
-    return s;
-}
-
-/* resample audio. 'nb_samples' is the number of input samples */
-/* XXX: optimize it ! */
-int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples)
-{
-    int i, nb_samples1;
-    short *bufin[MAX_CHANNELS];
-    short *bufout[MAX_CHANNELS];
-    short *buftmp2[MAX_CHANNELS], *buftmp3[MAX_CHANNELS];
-    short *output_bak = NULL;
-    int lenout;
-
-    if (s->sample_fmt[0] != AV_SAMPLE_FMT_S16) {
-        int istride[1] = { s->sample_size[0] };
-        int ostride[1] = { 2 };
-        const void *ibuf[1] = { input };
-        void       *obuf[1];
-        unsigned input_size = nb_samples * s->input_channels * 2;
-
-        if (!s->buffer_size[0] || s->buffer_size[0] < input_size) {
-            av_free(s->buffer[0]);
-            s->buffer_size[0] = input_size;
-            s->buffer[0] = av_malloc(s->buffer_size[0]);
-            if (!s->buffer[0]) {
-                av_log(s->resample_context, AV_LOG_ERROR, "Could not allocate buffer\n");
-                return 0;
-            }
-        }
-
-        obuf[0] = s->buffer[0];
-
-        if (av_audio_convert(s->convert_ctx[0], obuf, ostride,
-                             ibuf, istride, nb_samples * s->input_channels) < 0) {
-            av_log(s->resample_context, AV_LOG_ERROR,
-                   "Audio sample format conversion failed\n");
-            return 0;
-        }
-
-        input = s->buffer[0];
-    }
-
-    lenout= 2*s->output_channels*nb_samples * s->ratio + 16;
-
-    if (s->sample_fmt[1] != AV_SAMPLE_FMT_S16) {
-        int out_size = lenout * av_get_bytes_per_sample(s->sample_fmt[1]) *
-                       s->output_channels;
-        output_bak = output;
-
-        if (!s->buffer_size[1] || s->buffer_size[1] < out_size) {
-            av_free(s->buffer[1]);
-            s->buffer_size[1] = out_size;
-            s->buffer[1] = av_malloc(s->buffer_size[1]);
-            if (!s->buffer[1]) {
-                av_log(s->resample_context, AV_LOG_ERROR, "Could not allocate buffer\n");
-                return 0;
-            }
-        }
-
-        output = s->buffer[1];
-    }
-
-    /* XXX: move those malloc to resample init code */
-    for (i = 0; i < s->filter_channels; i++) {
-        bufin[i] = av_malloc_array((nb_samples + s->temp_len), sizeof(short));
-        bufout[i] = av_malloc_array(lenout, sizeof(short));
-
-        if (!bufin[i] || !bufout[i]) {
-            av_log(s->resample_context, AV_LOG_ERROR, "Could not allocate buffer\n");
-            nb_samples1 = 0;
-            goto fail;
-        }
-
-        memcpy(bufin[i], s->temp[i], s->temp_len * sizeof(short));
-        buftmp2[i] = bufin[i] + s->temp_len;
-    }
-
-    if (s->input_channels == 2 && s->output_channels == 1) {
-        buftmp3[0] = output;
-        stereo_to_mono(buftmp2[0], input, nb_samples);
-    } else if (s->output_channels >= 2 && s->input_channels == 1) {
-        buftmp3[0] = bufout[0];
-        memcpy(buftmp2[0], input, nb_samples * sizeof(short));
-    } else if (s->input_channels == 6 && s->output_channels ==2) {
-        buftmp3[0] = bufout[0];
-        buftmp3[1] = bufout[1];
-        surround_to_stereo(buftmp2, input, s->input_channels, nb_samples);
-    } else if (s->output_channels >= s->input_channels && s->input_channels >= 2) {
-        for (i = 0; i < s->input_channels; i++) {
-            buftmp3[i] = bufout[i];
-        }
-        deinterleave(buftmp2, input, s->input_channels, nb_samples);
-    } else {
-        buftmp3[0] = output;
-        memcpy(buftmp2[0], input, nb_samples * sizeof(short));
-    }
-
-    nb_samples += s->temp_len;
-
-    /* resample each channel */
-    nb_samples1 = 0; /* avoid warning */
-    for (i = 0; i < s->filter_channels; i++) {
-        int consumed;
-        int is_last = i + 1 == s->filter_channels;
-
-        nb_samples1 = av_resample(s->resample_context, buftmp3[i], bufin[i],
-                                  &consumed, nb_samples, lenout, is_last);
-        s->temp_len = nb_samples - consumed;
-        s->temp[i] = av_realloc_array(s->temp[i], s->temp_len, sizeof(short));
-        memcpy(s->temp[i], bufin[i] + consumed, s->temp_len * sizeof(short));
-    }
-
-    if (s->output_channels == 2 && s->input_channels == 1) {
-        mono_to_stereo(output, buftmp3[0], nb_samples1);
-    } else if (s->output_channels == 6 && s->input_channels == 2) {
-        ac3_5p1_mux(output, buftmp3[0], buftmp3[1], nb_samples1);
-    } else if ((s->output_channels == s->input_channels && s->input_channels >= 2) ||
-               (s->output_channels == 2 && s->input_channels == 6)) {
-        interleave(output, buftmp3, s->output_channels, nb_samples1);
-    }
-
-    if (s->sample_fmt[1] != AV_SAMPLE_FMT_S16) {
-        int istride[1] = { 2 };
-        int ostride[1] = { s->sample_size[1] };
-        const void *ibuf[1] = { output };
-        void       *obuf[1] = { output_bak };
-
-        if (av_audio_convert(s->convert_ctx[1], obuf, ostride,
-                             ibuf, istride, nb_samples1 * s->output_channels) < 0) {
-            av_log(s->resample_context, AV_LOG_ERROR,
-                   "Audio sample format conversion failed\n");
-            return 0;
-        }
-    }
-
-fail:
-    for (i = 0; i < s->filter_channels; i++) {
-        av_free(bufin[i]);
-        av_free(bufout[i]);
-    }
-
-    return nb_samples1;
-}
-
-void audio_resample_close(ReSampleContext *s)
-{
-    int i;
-    av_resample_close(s->resample_context);
-    for (i = 0; i < s->filter_channels; i++)
-        av_freep(&s->temp[i]);
-    av_freep(&s->buffer[0]);
-    av_freep(&s->buffer[1]);
-    av_audio_convert_free(s->convert_ctx[0]);
-    av_audio_convert_free(s->convert_ctx[1]);
-    av_free(s);
-}
-
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif

diff --git a/libavcodec/resample2.c b/libavcodec/resample2.c
deleted file mode 100644
index 56ae9f7..0000000
--- a/libavcodec/resample2.c
+++ /dev/null

@@ -1,319 +0,0 @@
-/*
- * audio resampling
- * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * audio resampling
- * @author Michael Niedermayer <michaelni@gmx.at>
- */
-
-#include "libavutil/avassert.h"
-#include "avcodec.h"
-#include "libavutil/common.h"
-
-#if FF_API_AVCODEC_RESAMPLE
-
-#ifndef CONFIG_RESAMPLE_HP
-#define FILTER_SHIFT 15
-
-typedef int16_t FELEM;
-typedef int32_t FELEM2;
-typedef int64_t FELEML;
-#define FELEM_MAX INT16_MAX
-#define FELEM_MIN INT16_MIN
-#define WINDOW_TYPE 9
-#elif !defined(CONFIG_RESAMPLE_AUDIOPHILE_KIDDY_MODE)
-#define FILTER_SHIFT 30
-
-#define FELEM int32_t
-#define FELEM2 int64_t
-#define FELEML int64_t
-#define FELEM_MAX INT32_MAX
-#define FELEM_MIN INT32_MIN
-#define WINDOW_TYPE 12
-#else
-#define FILTER_SHIFT 0
-
-typedef double FELEM;
-typedef double FELEM2;
-typedef double FELEML;
-#define WINDOW_TYPE 24
-#endif
-
-
-typedef struct AVResampleContext{
-    const AVClass *av_class;
-    FELEM *filter_bank;
-    int filter_length;
-    int ideal_dst_incr;
-    int dst_incr;
-    int index;
-    int frac;
-    int src_incr;
-    int compensation_distance;
-    int phase_shift;
-    int phase_mask;
-    int linear;
-}AVResampleContext;
-
-/**
- * 0th order modified bessel function of the first kind.
- */
-static double bessel(double x){
-    double v=1;
-    double lastv=0;
-    double t=1;
-    int i;
-
-    x= x*x/4;
-    for(i=1; v != lastv; i++){
-        lastv=v;
-        t *= x/(i*i);
-        v += t;
-    }
-    return v;
-}
-
-/**
- * Build a polyphase filterbank.
- * @param factor resampling factor
- * @param scale wanted sum of coefficients for each filter
- * @param type 0->cubic, 1->blackman nuttall windowed sinc, 2..16->kaiser windowed sinc beta=2..16
- * @return 0 on success, negative on error
- */
-static int build_filter(FELEM *filter, double factor, int tap_count, int phase_count, int scale, int type){
-    int ph, i;
-    double x, y, w;
-    double *tab = av_malloc_array(tap_count, sizeof(*tab));
-    const int center= (tap_count-1)/2;
-
-    if (!tab)
-        return AVERROR(ENOMEM);
-
-    /* if upsampling, only need to interpolate, no filter */
-    if (factor > 1.0)
-        factor = 1.0;
-
-    for(ph=0;ph<phase_count;ph++) {
-        double norm = 0;
-        for(i=0;i<tap_count;i++) {
-            x = M_PI * ((double)(i - center) - (double)ph / phase_count) * factor;
-            if (x == 0) y = 1.0;
-            else        y = sin(x) / x;
-            switch(type){
-            case 0:{
-                const float d= -0.5; //first order derivative = -0.5
-                x = fabs(((double)(i - center) - (double)ph / phase_count) * factor);
-                if(x<1.0) y= 1 - 3*x*x + 2*x*x*x + d*(            -x*x + x*x*x);
-                else      y=                       d*(-4 + 8*x - 5*x*x + x*x*x);
-                break;}
-            case 1:
-                w = 2.0*x / (factor*tap_count) + M_PI;
-                y *= 0.3635819 - 0.4891775 * cos(w) + 0.1365995 * cos(2*w) - 0.0106411 * cos(3*w);
-                break;
-            default:
-                w = 2.0*x / (factor*tap_count*M_PI);
-                y *= bessel(type*sqrt(FFMAX(1-w*w, 0)));
-                break;
-            }
-
-            tab[i] = y;
-            norm += y;
-        }
-
-        /* normalize so that an uniform color remains the same */
-        for(i=0;i<tap_count;i++) {
-#ifdef CONFIG_RESAMPLE_AUDIOPHILE_KIDDY_MODE
-            filter[ph * tap_count + i] = tab[i] / norm;
-#else
-            filter[ph * tap_count + i] = av_clip(lrintf(tab[i] * scale / norm), FELEM_MIN, FELEM_MAX);
-#endif
-        }
-    }
-#if 0
-    {
-#define LEN 1024
-        int j,k;
-        double sine[LEN + tap_count];
-        double filtered[LEN];
-        double maxff=-2, minff=2, maxsf=-2, minsf=2;
-        for(i=0; i<LEN; i++){
-            double ss=0, sf=0, ff=0;
-            for(j=0; j<LEN+tap_count; j++)
-                sine[j]= cos(i*j*M_PI/LEN);
-            for(j=0; j<LEN; j++){
-                double sum=0;
-                ph=0;
-                for(k=0; k<tap_count; k++)
-                    sum += filter[ph * tap_count + k] * sine[k+j];
-                filtered[j]= sum / (1<<FILTER_SHIFT);
-                ss+= sine[j + center] * sine[j + center];
-                ff+= filtered[j] * filtered[j];
-                sf+= sine[j + center] * filtered[j];
-            }
-            ss= sqrt(2*ss/LEN);
-            ff= sqrt(2*ff/LEN);
-            sf= 2*sf/LEN;
-            maxff= FFMAX(maxff, ff);
-            minff= FFMIN(minff, ff);
-            maxsf= FFMAX(maxsf, sf);
-            minsf= FFMIN(minsf, sf);
-            if(i%11==0){
-                av_log(NULL, AV_LOG_ERROR, "i:%4d ss:%f ff:%13.6e-%13.6e sf:%13.6e-%13.6e\n", i, ss, maxff, minff, maxsf, minsf);
-                minff=minsf= 2;
-                maxff=maxsf= -2;
-            }
-        }
-    }
-#endif
-
-    av_free(tab);
-    return 0;
-}
-
-AVResampleContext *av_resample_init(int out_rate, int in_rate, int filter_size, int phase_shift, int linear, double cutoff){
-    AVResampleContext *c= av_mallocz(sizeof(AVResampleContext));
-    double factor= FFMIN(out_rate * cutoff / in_rate, 1.0);
-    int phase_count= 1<<phase_shift;
-
-    if (!c)
-        return NULL;
-
-    c->phase_shift= phase_shift;
-    c->phase_mask= phase_count-1;
-    c->linear= linear;
-
-    c->filter_length= FFMAX((int)ceil(filter_size/factor), 1);
-    c->filter_bank= av_mallocz_array(c->filter_length, (phase_count+1)*sizeof(FELEM));
-    if (!c->filter_bank)
-        goto error;
-    if (build_filter(c->filter_bank, factor, c->filter_length, phase_count, 1<<FILTER_SHIFT, WINDOW_TYPE))
-        goto error;
-    memcpy(&c->filter_bank[c->filter_length*phase_count+1], c->filter_bank, (c->filter_length-1)*sizeof(FELEM));
-    c->filter_bank[c->filter_length*phase_count]= c->filter_bank[c->filter_length - 1];
-
-    if(!av_reduce(&c->src_incr, &c->dst_incr, out_rate, in_rate * (int64_t)phase_count, INT32_MAX/2))
-        goto error;
-    c->ideal_dst_incr= c->dst_incr;
-
-    c->index= -phase_count*((c->filter_length-1)/2);
-
-    return c;
-error:
-    av_free(c->filter_bank);
-    av_free(c);
-    return NULL;
-}
-
-void av_resample_close(AVResampleContext *c){
-    av_freep(&c->filter_bank);
-    av_freep(&c);
-}
-
-void av_resample_compensate(AVResampleContext *c, int sample_delta, int compensation_distance){
-//    sample_delta += (c->ideal_dst_incr - c->dst_incr)*(int64_t)c->compensation_distance / c->ideal_dst_incr;
-    c->compensation_distance= compensation_distance;
-    c->dst_incr = c->ideal_dst_incr - c->ideal_dst_incr * (int64_t)sample_delta / compensation_distance;
-}
-
-int av_resample(AVResampleContext *c, short *dst, short *src, int *consumed, int src_size, int dst_size, int update_ctx){
-    int dst_index, i;
-    int index= c->index;
-    int frac= c->frac;
-    int dst_incr_frac= c->dst_incr % c->src_incr;
-    int dst_incr=      c->dst_incr / c->src_incr;
-    int compensation_distance= c->compensation_distance;
-
-  if(compensation_distance == 0 && c->filter_length == 1 && c->phase_shift==0){
-        int64_t index2= ((int64_t)index)<<32;
-        int64_t incr= (1LL<<32) * c->dst_incr / c->src_incr;
-        dst_size= FFMIN(dst_size, (src_size-1-index) * (int64_t)c->src_incr / c->dst_incr);
-
-        for(dst_index=0; dst_index < dst_size; dst_index++){
-            dst[dst_index] = src[index2>>32];
-            index2 += incr;
-        }
-        index += dst_index * dst_incr;
-        index += (frac + dst_index * (int64_t)dst_incr_frac) / c->src_incr;
-        frac   = (frac + dst_index * (int64_t)dst_incr_frac) % c->src_incr;
-  }else{
-    for(dst_index=0; dst_index < dst_size; dst_index++){
-        FELEM *filter= c->filter_bank + c->filter_length*(index & c->phase_mask);
-        int sample_index= index >> c->phase_shift;
-        FELEM2 val=0;
-
-        if(sample_index < 0){
-            for(i=0; i<c->filter_length; i++)
-                val += src[FFABS(sample_index + i) % src_size] * filter[i];
-        }else if(sample_index + c->filter_length > src_size){
-            break;
-        }else if(c->linear){
-            FELEM2 v2=0;
-            for(i=0; i<c->filter_length; i++){
-                val += src[sample_index + i] * (FELEM2)filter[i];
-                v2  += src[sample_index + i] * (FELEM2)filter[i + c->filter_length];
-            }
-            val+=(v2-val)*(FELEML)frac / c->src_incr;
-        }else{
-            for(i=0; i<c->filter_length; i++){
-                val += src[sample_index + i] * (FELEM2)filter[i];
-            }
-        }
-
-#ifdef CONFIG_RESAMPLE_AUDIOPHILE_KIDDY_MODE
-        dst[dst_index] = av_clip_int16(lrintf(val));
-#else
-        val = (val + (1<<(FILTER_SHIFT-1)))>>FILTER_SHIFT;
-        dst[dst_index] = (unsigned)(val + 32768) > 65535 ? (val>>31) ^ 32767 : val;
-#endif
-
-        frac += dst_incr_frac;
-        index += dst_incr;
-        if(frac >= c->src_incr){
-            frac -= c->src_incr;
-            index++;
-        }
-
-        if(dst_index + 1 == compensation_distance){
-            compensation_distance= 0;
-            dst_incr_frac= c->ideal_dst_incr % c->src_incr;
-            dst_incr=      c->ideal_dst_incr / c->src_incr;
-        }
-    }
-  }
-    *consumed= FFMAX(index, 0) >> c->phase_shift;
-    if(index>=0) index &= c->phase_mask;
-
-    if(compensation_distance){
-        compensation_distance -= dst_index;
-        av_assert2(compensation_distance > 0);
-    }
-    if(update_ctx){
-        c->frac= frac;
-        c->index= index;
-        c->dst_incr= dst_incr_frac + c->src_incr*dst_incr;
-        c->compensation_distance= compensation_distance;
-    }
-
-    return dst_index;
-}
-
-#endif

diff --git a/libavcodec/rkmppdec.c b/libavcodec/rkmppdec.c
index bdf4dc4..143d05b 100644
--- a/libavcodec/rkmppdec.c
+++ b/libavcodec/rkmppdec.c

@@ -28,6 +28,7 @@
 
 #include "avcodec.h"
 #include "decode.h"
+#include "hwaccel.h"
 #include "internal.h"
 #include "libavutil/buffer.h"
 #include "libavutil/common.h"
@@ -39,13 +40,13 @@
 
 #define RECEIVE_FRAME_TIMEOUT   100
 #define FRAMEGROUP_MAX_FRAMES   16
+#define INPUT_MAX_PACKETS       4
 
 typedef struct {
     MppCtx ctx;
     MppApi *mpi;
     MppBufferGroup frame_group;
 
-    char first_frame;
     char first_packet;
     char eos_reached;
 
@@ -327,28 +328,14 @@
     MppBuffer buffer = NULL;
     AVDRMFrameDescriptor *desc = NULL;
     AVDRMLayerDescriptor *layer = NULL;
-    int retrycount = 0;
     int mode;
     MppFrameFormat mppformat;
     uint32_t drmformat;
 
-    // on start of decoding, MPP can return -1, which is supposed to be expected
-    // this is due to some internal MPP init which is not completed, that will
-    // only happen in the first few frames queries, but should not be interpreted
-    // as an error, Therefore we need to retry a couple times when we get -1
-    // in order to let it time to complete it's init, then we sleep a bit between retries.
-retry_get_frame:
     ret = decoder->mpi->decode_get_frame(decoder->ctx, &mppframe);
-    if (ret != MPP_OK && ret != MPP_ERR_TIMEOUT && !decoder->first_frame) {
-        if (retrycount < 5) {
-            av_log(avctx, AV_LOG_DEBUG, "Failed to get a frame, retrying (code = %d, retrycount = %d)\n", ret, retrycount);
-            usleep(10000);
-            retrycount++;
-            goto retry_get_frame;
-        } else {
-            av_log(avctx, AV_LOG_ERROR, "Failed to get a frame from MPP (code = %d)\n", ret);
-            goto fail;
-        }
+    if (ret != MPP_OK && ret != MPP_ERR_TIMEOUT) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to get a frame from MPP (code = %d)\n", ret);
+        goto fail;
     }
 
     if (mppframe) {
@@ -364,7 +351,6 @@
             avctx->height = mpp_frame_get_height(mppframe);
 
             decoder->mpi->control(decoder->ctx, MPP_DEC_SET_INFO_CHANGE_READY, NULL);
-            decoder->first_frame = 1;
 
             av_buffer_unref(&decoder->frames_ref);
 
@@ -478,7 +464,6 @@
                 goto fail;
             }
 
-            decoder->first_frame = 0;
             return 0;
         } else {
             av_log(avctx, AV_LOG_ERROR, "Failed to retrieve the frame buffer, frame is dropped (code = %d)\n", ret);
@@ -514,16 +499,17 @@
     RKMPPDecoder *decoder = (RKMPPDecoder *)rk_context->decoder_ref->data;
     int ret = MPP_NOK;
     AVPacket pkt = {0};
-    RK_S32 freeslots;
+    RK_S32 usedslots, freeslots;
 
     if (!decoder->eos_reached) {
         // we get the available slots in decoder
-        ret = decoder->mpi->control(decoder->ctx, MPP_DEC_GET_FREE_PACKET_SLOT_COUNT, &freeslots);
+        ret = decoder->mpi->control(decoder->ctx, MPP_DEC_GET_STREAM_COUNT, &usedslots);
         if (ret != MPP_OK) {
-            av_log(avctx, AV_LOG_ERROR, "Failed to get decoder free slots (code = %d).\n", ret);
+            av_log(avctx, AV_LOG_ERROR, "Failed to get decoder used slots (code = %d).\n", ret);
             return ret;
         }
 
+        freeslots = INPUT_MAX_PACKETS - usedslots;
         if (freeslots > 0) {
             ret = ff_decode_get_packet(avctx, &pkt);
             if (ret < 0 && ret != AVERROR_EOF) {
@@ -540,7 +526,7 @@
         }
 
         // make sure we keep decoder full
-        if (freeslots > 1 && decoder->first_frame)
+        if (freeslots > 1)
             return AVERROR(EAGAIN);
     }
 
@@ -557,12 +543,15 @@
 
     ret = decoder->mpi->reset(decoder->ctx);
     if (ret == MPP_OK) {
-        decoder->first_frame = 1;
         decoder->first_packet = 1;
     } else
         av_log(avctx, AV_LOG_ERROR, "Failed to reset MPI (code = %d)\n", ret);
 }
 
+static const AVCodecHWConfigInternal *rkmpp_hw_configs[] = {
+    HW_CONFIG_INTERNAL(DRM_PRIME),
+    NULL
+};
 
 #define RKMPP_DEC_CLASS(NAME) \
     static const AVClass rkmpp_##NAME##_dec_class = { \
@@ -583,11 +572,12 @@
         .receive_frame  = rkmpp_receive_frame, \
         .flush          = rkmpp_flush, \
         .priv_class     = &rkmpp_##NAME##_dec_class, \
-        .capabilities   = AV_CODEC_CAP_DELAY, \
-        .caps_internal  = AV_CODEC_CAP_AVOID_PROBING, \
+        .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
         .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_DRM_PRIME, \
                                                          AV_PIX_FMT_NONE}, \
+        .hw_configs     = rkmpp_hw_configs, \
         .bsfs           = BSFS, \
+        .wrapper_name   = "rkmpp", \
     };
 
 RKMPP_DEC(h264,  AV_CODEC_ID_H264,          "h264_mp4toannexb")

diff --git a/libavcodec/rscc.c b/libavcodec/rscc.c
index f270cd5..7921f14 100644
--- a/libavcodec/rscc.c
+++ b/libavcodec/rscc.c

@@ -85,8 +85,18 @@
 
     /* Get pixel format and the size of the pixel */
     if (avctx->codec_tag == MKTAG('I', 'S', 'C', 'C')) {
-        avctx->pix_fmt = AV_PIX_FMT_BGRA;
-        ctx->component_size = 4;
+        if (avctx->extradata && avctx->extradata_size == 4) {
+            if ((avctx->extradata[0] >> 1) & 1) {
+                avctx->pix_fmt = AV_PIX_FMT_BGRA;
+                ctx->component_size = 4;
+            } else {
+                avctx->pix_fmt = AV_PIX_FMT_BGR24;
+                ctx->component_size = 3;
+            }
+        } else {
+            avctx->pix_fmt = AV_PIX_FMT_BGRA;
+            ctx->component_size = 4;
+        }
     } else if (avctx->codec_tag == MKTAG('R', 'S', 'C', 'C')) {
         ctx->component_size = avctx->bits_per_coded_sample / 8;
         switch (avctx->bits_per_coded_sample) {
@@ -157,6 +167,12 @@
 
     /* Read number of tiles, and allocate the array */
     tiles_nb = bytestream2_get_le16(gbc);
+
+    if (tiles_nb == 0) {
+        av_log(avctx, AV_LOG_DEBUG, "no tiles\n");
+        return avpkt->size;
+    }
+
     av_fast_malloc(&ctx->tiles, &ctx->tiles_size,
                    tiles_nb * sizeof(*ctx->tiles));
     if (!ctx->tiles) {

diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index f1495fb..d171e6e 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c

@@ -1585,7 +1585,7 @@
 
     // Do no call ff_mpeg_update_thread_context on a partially initialized
     // decoder context.
-    if (!s1->linesize)
+    if (!s1->context_initialized)
         return 0;
 
     return ff_mpeg_update_thread_context(dst, src);
@@ -1733,6 +1733,8 @@
             if ((err = rv34_decoder_realloc(r)) < 0)
                 return err;
         }
+        if (faulty_b)
+            return AVERROR_INVALIDDATA;
         s->pict_type = si.type ? si.type : AV_PICTURE_TYPE_I;
         if (ff_mpv_frame_start(s, s->avctx) < 0)
             return -1;
@@ -1786,8 +1788,6 @@
                "multithreading mode (start MB is %d).\n", si.start);
         return AVERROR_INVALIDDATA;
     }
-    if (faulty_b)
-        return AVERROR_INVALIDDATA;
 
     for(i = 0; i < slice_count; i++){
         int offset  = get_slice_offset(avctx, slices_hdr, i  , slice_count, buf_size);

diff --git a/libavcodec/s302m.c b/libavcodec/s302m.c
index 4350d97..584b58e 100644
--- a/libavcodec/s302m.c
+++ b/libavcodec/s302m.c

@@ -212,10 +212,10 @@
 };
 
 static const AVClass s302m_class = {
-    "SMPTE 302M Decoder",
-    av_default_item_name,
-    s302m_options,
-    LIBAVUTIL_VERSION_INT,
+    .class_name = "SMPTE 302M Decoder",
+    .item_name  = av_default_item_name,
+    .option     = s302m_options,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
 AVCodec ff_s302m_decoder = {

diff --git a/libavcodec/samidec.c b/libavcodec/samidec.c
index 2620424..e32f238 100644
--- a/libavcodec/samidec.c
+++ b/libavcodec/samidec.c

@@ -48,6 +48,9 @@
     AVBPrint *dst_content = &sami->encoded_content;
     AVBPrint *dst_source = &sami->encoded_source;
 
+    if (!dupsrc)
+        return AVERROR(ENOMEM);
+
     av_bprint_clear(&sami->encoded_content);
     av_bprint_clear(&sami->content);
     av_bprint_clear(&sami->encoded_source);
@@ -135,9 +138,12 @@
     const char *ptr = avpkt->data;
     SAMIContext *sami = avctx->priv_data;
 
-    if (ptr && avpkt->size > 0 && !sami_paragraph_to_ass(avctx, ptr)) {
+    if (ptr && avpkt->size > 0) {
+        int ret = sami_paragraph_to_ass(avctx, ptr);
+        if (ret < 0)
+            return ret;
         // TODO: pass escaped sami->encoded_source.str as source
-        int ret = ff_ass_add_rect(sub, sami->full.str, sami->readorder++, 0, NULL, NULL);
+        ret = ff_ass_add_rect(sub, sami->full.str, sami->readorder++, 0, NULL, NULL);
         if (ret < 0)
             return ret;
     }

diff --git a/libavcodec/sbc.c b/libavcodec/sbc.c
new file mode 100644
index 0000000..b43b66e
--- /dev/null
+++ b/libavcodec/sbc.c

@@ -0,0 +1,271 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2012-2013  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2008  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC common functions for the encoder and decoder
+ */
+
+#include "avcodec.h"
+#include "sbc.h"
+
+/* A2DP specification: Appendix B, page 69 */
+static const int sbc_offset4[4][4] = {
+    { -1, 0, 0, 0 },
+    { -2, 0, 0, 1 },
+    { -2, 0, 0, 1 },
+    { -2, 0, 0, 1 }
+};
+
+/* A2DP specification: Appendix B, page 69 */
+static const int sbc_offset8[4][8] = {
+    { -2, 0, 0, 0, 0, 0, 0, 1 },
+    { -3, 0, 0, 0, 0, 0, 1, 2 },
+    { -4, 0, 0, 0, 0, 0, 1, 2 },
+    { -4, 0, 0, 0, 0, 0, 1, 2 }
+};
+
+/*
+ * Calculates the CRC-8 of the first len bits in data
+ */
+uint8_t ff_sbc_crc8(const AVCRC *ctx, const uint8_t *data, size_t len)
+{
+    size_t byte_length = len >> 3;
+    int bit_length = len & 7;
+    uint8_t crc;
+
+    crc = av_crc(ctx, 0x0F, data, byte_length);
+
+    if (bit_length) {
+        uint8_t bits = data[byte_length];
+        while (bit_length--) {
+            int8_t mask = bits ^ crc;
+            crc = (crc << 1) ^ ((mask >> 7) & 0x1D);
+            bits <<= 1;
+        }
+    }
+
+    return crc;
+}
+
+/*
+ * Code straight from the spec to calculate the bits array
+ * Takes a pointer to the frame in question and a pointer to the bits array
+ */
+void ff_sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
+{
+    int subbands = frame->subbands;
+    uint8_t sf = frame->frequency;
+
+    if (frame->mode == MONO || frame->mode == DUAL_CHANNEL) {
+        int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice;
+        int ch, sb;
+
+        for (ch = 0; ch < frame->channels; ch++) {
+            max_bitneed = 0;
+            if (frame->allocation == SNR) {
+                for (sb = 0; sb < subbands; sb++) {
+                    bitneed[ch][sb] = frame->scale_factor[ch][sb];
+                    if (bitneed[ch][sb] > max_bitneed)
+                        max_bitneed = bitneed[ch][sb];
+                }
+            } else {
+                for (sb = 0; sb < subbands; sb++) {
+                    if (frame->scale_factor[ch][sb] == 0)
+                        bitneed[ch][sb] = -5;
+                    else {
+                        if (subbands == 4)
+                            loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
+                        else
+                            loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
+                        if (loudness > 0)
+                            bitneed[ch][sb] = loudness / 2;
+                        else
+                            bitneed[ch][sb] = loudness;
+                    }
+                    if (bitneed[ch][sb] > max_bitneed)
+                        max_bitneed = bitneed[ch][sb];
+                }
+            }
+
+            bitcount = 0;
+            slicecount = 0;
+            bitslice = max_bitneed + 1;
+            do {
+                bitslice--;
+                bitcount += slicecount;
+                slicecount = 0;
+                for (sb = 0; sb < subbands; sb++) {
+                    if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
+                        slicecount++;
+                    else if (bitneed[ch][sb] == bitslice + 1)
+                        slicecount += 2;
+                }
+            } while (bitcount + slicecount < frame->bitpool);
+
+            if (bitcount + slicecount == frame->bitpool) {
+                bitcount += slicecount;
+                bitslice--;
+            }
+
+            for (sb = 0; sb < subbands; sb++) {
+                if (bitneed[ch][sb] < bitslice + 2)
+                    bits[ch][sb] = 0;
+                else {
+                    bits[ch][sb] = bitneed[ch][sb] - bitslice;
+                    if (bits[ch][sb] > 16)
+                        bits[ch][sb] = 16;
+                }
+            }
+
+            for (sb = 0; bitcount < frame->bitpool &&
+                            sb < subbands; sb++) {
+                if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) {
+                    bits[ch][sb]++;
+                    bitcount++;
+                } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) {
+                    bits[ch][sb] = 2;
+                    bitcount += 2;
+                }
+            }
+
+            for (sb = 0; bitcount < frame->bitpool &&
+                            sb < subbands; sb++) {
+                if (bits[ch][sb] < 16) {
+                    bits[ch][sb]++;
+                    bitcount++;
+                }
+            }
+
+        }
+
+    } else if (frame->mode == STEREO || frame->mode == JOINT_STEREO) {
+        int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice;
+        int ch, sb;
+
+        max_bitneed = 0;
+        if (frame->allocation == SNR) {
+            for (ch = 0; ch < 2; ch++) {
+                for (sb = 0; sb < subbands; sb++) {
+                    bitneed[ch][sb] = frame->scale_factor[ch][sb];
+                    if (bitneed[ch][sb] > max_bitneed)
+                        max_bitneed = bitneed[ch][sb];
+                }
+            }
+        } else {
+            for (ch = 0; ch < 2; ch++) {
+                for (sb = 0; sb < subbands; sb++) {
+                    if (frame->scale_factor[ch][sb] == 0)
+                        bitneed[ch][sb] = -5;
+                    else {
+                        if (subbands == 4)
+                            loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
+                        else
+                            loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
+                        if (loudness > 0)
+                            bitneed[ch][sb] = loudness / 2;
+                        else
+                            bitneed[ch][sb] = loudness;
+                    }
+                    if (bitneed[ch][sb] > max_bitneed)
+                        max_bitneed = bitneed[ch][sb];
+                }
+            }
+        }
+
+        bitcount = 0;
+        slicecount = 0;
+        bitslice = max_bitneed + 1;
+        do {
+            bitslice--;
+            bitcount += slicecount;
+            slicecount = 0;
+            for (ch = 0; ch < 2; ch++) {
+                for (sb = 0; sb < subbands; sb++) {
+                    if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
+                        slicecount++;
+                    else if (bitneed[ch][sb] == bitslice + 1)
+                        slicecount += 2;
+                }
+            }
+        } while (bitcount + slicecount < frame->bitpool);
+
+        if (bitcount + slicecount == frame->bitpool) {
+            bitcount += slicecount;
+            bitslice--;
+        }
+
+        for (ch = 0; ch < 2; ch++) {
+            for (sb = 0; sb < subbands; sb++) {
+                if (bitneed[ch][sb] < bitslice + 2) {
+                    bits[ch][sb] = 0;
+                } else {
+                    bits[ch][sb] = bitneed[ch][sb] - bitslice;
+                    if (bits[ch][sb] > 16)
+                        bits[ch][sb] = 16;
+                }
+            }
+        }
+
+        ch = 0;
+        sb = 0;
+        while (bitcount < frame->bitpool) {
+            if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) {
+                bits[ch][sb]++;
+                bitcount++;
+            } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) {
+                bits[ch][sb] = 2;
+                bitcount += 2;
+            }
+            if (ch == 1) {
+                ch = 0;
+                sb++;
+                if (sb >= subbands)
+                    break;
+            } else
+                ch = 1;
+        }
+
+        ch = 0;
+        sb = 0;
+        while (bitcount < frame->bitpool) {
+            if (bits[ch][sb] < 16) {
+                bits[ch][sb]++;
+                bitcount++;
+            }
+            if (ch == 1) {
+                ch = 0;
+                sb++;
+                if (sb >= subbands)
+                    break;
+            } else
+                ch = 1;
+        }
+
+    }
+
+}

diff --git a/libavcodec/sbc.h b/libavcodec/sbc.h
new file mode 100644
index 0000000..de9c8d9
--- /dev/null
+++ b/libavcodec/sbc.h

@@ -0,0 +1,118 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2012-2014  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC common definitions for the encoder and decoder
+ */
+
+#ifndef AVCODEC_SBC_H
+#define AVCODEC_SBC_H
+
+#include "avcodec.h"
+#include "libavutil/crc.h"
+
+#define MSBC_BLOCKS 15
+
+/* sampling frequency */
+#define SBC_FREQ_16000  0x00
+#define SBC_FREQ_32000  0x01
+#define SBC_FREQ_44100  0x02
+#define SBC_FREQ_48000  0x03
+
+/* blocks */
+#define SBC_BLK_4       0x00
+#define SBC_BLK_8       0x01
+#define SBC_BLK_12      0x02
+#define SBC_BLK_16      0x03
+
+/* channel mode */
+#define SBC_MODE_MONO         0x00
+#define SBC_MODE_DUAL_CHANNEL 0x01
+#define SBC_MODE_STEREO       0x02
+#define SBC_MODE_JOINT_STEREO 0x03
+
+/* allocation method */
+#define SBC_AM_LOUDNESS 0x00
+#define SBC_AM_SNR      0x01
+
+/* subbands */
+#define SBC_SB_4        0x00
+#define SBC_SB_8        0x01
+
+/* synchronisation words */
+#define SBC_SYNCWORD   0x9C
+#define MSBC_SYNCWORD  0xAD
+
+/* extra bits of precision for the synthesis filter input data */
+#define SBCDEC_FIXED_EXTRA_BITS 2
+
+/*
+ * Enforce 16 byte alignment for the data, which is supposed to be used
+ * with SIMD optimized code.
+ */
+#define SBC_ALIGN 16
+
+/* This structure contains an unpacked SBC frame.
+   Yes, there is probably quite some unused space herein */
+struct sbc_frame {
+    uint8_t frequency;
+    uint8_t blocks;
+    enum {
+        MONO         = SBC_MODE_MONO,
+        DUAL_CHANNEL = SBC_MODE_DUAL_CHANNEL,
+        STEREO       = SBC_MODE_STEREO,
+        JOINT_STEREO = SBC_MODE_JOINT_STEREO
+    } mode;
+    uint8_t channels;
+    enum {
+        LOUDNESS = SBC_AM_LOUDNESS,
+        SNR      = SBC_AM_SNR
+    } allocation;
+    uint8_t subbands;
+    uint8_t bitpool;
+    uint16_t codesize;
+
+    /* bit number x set means joint stereo has been used in subband x */
+    uint8_t joint;
+
+    /* only the lower 4 bits of every element are to be used */
+    DECLARE_ALIGNED(SBC_ALIGN, uint32_t, scale_factor)[2][8];
+
+    /* raw integer subband samples in the frame */
+    DECLARE_ALIGNED(SBC_ALIGN, int32_t, sb_sample_f)[16][2][8];
+
+    /* modified subband samples */
+    DECLARE_ALIGNED(SBC_ALIGN, int32_t, sb_sample)[16][2][8];
+
+    const AVCRC *crc_ctx;
+};
+
+uint8_t ff_sbc_crc8(const AVCRC *crc_ctx, const uint8_t *data, size_t len);
+void ff_sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]);
+
+#endif /* AVCODEC_SBC_H */

diff --git a/libavcodec/sbc_parser.c b/libavcodec/sbc_parser.c
new file mode 100644
index 0000000..f565641
--- /dev/null
+++ b/libavcodec/sbc_parser.c

@@ -0,0 +1,122 @@
+/*
+ * SBC parser
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "sbc.h"
+#include "parser.h"
+
+typedef struct SBCParseContext {
+    ParseContext pc;
+    uint8_t header[3];
+    int header_size;
+    int buffered_size;
+} SBCParseContext;
+
+static int sbc_parse_header(AVCodecParserContext *s, AVCodecContext *avctx,
+                            const uint8_t *data, size_t len)
+{
+    static const int sample_rates[4] = { 16000, 32000, 44100, 48000 };
+    int sr, blocks, mode, subbands, bitpool, channels, joint;
+    int length;
+
+    if (len < 3)
+        return -1;
+
+    if (data[0] == MSBC_SYNCWORD && data[1] == 0 && data[2] == 0) {
+        avctx->channels = 1;
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+        avctx->sample_rate = 16000;
+        avctx->frame_size = 120;
+        s->duration = avctx->frame_size;
+        return 57;
+    }
+
+    if (data[0] != SBC_SYNCWORD)
+        return -2;
+
+    sr       =   (data[1] >> 6) & 0x03;
+    blocks   = (((data[1] >> 4) & 0x03) + 1) << 2;
+    mode     =   (data[1] >> 2) & 0x03;
+    subbands = (((data[1] >> 0) & 0x01) + 1) << 2;
+    bitpool  = data[2];
+
+    channels = mode == SBC_MODE_MONO ? 1 : 2;
+    joint    = mode == SBC_MODE_JOINT_STEREO;
+
+    length = 4 + (subbands * channels) / 2
+             + ((((mode == SBC_MODE_DUAL_CHANNEL) + 1) * blocks * bitpool
+                 + (joint * subbands)) + 7) / 8;
+
+    avctx->channels = channels;
+    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+    avctx->sample_rate = sample_rates[sr];
+    avctx->frame_size = subbands * blocks;
+    s->duration = avctx->frame_size;
+    return length;
+}
+
+static int sbc_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                     const uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size)
+{
+    SBCParseContext *pc = s->priv_data;
+    int next;
+
+    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
+        next = buf_size;
+    } else {
+        if (pc->header_size) {
+            memcpy(pc->header + pc->header_size, buf,
+                   sizeof(pc->header) - pc->header_size);
+            next = sbc_parse_header(s, avctx, pc->header, sizeof(pc->header))
+                 - pc->buffered_size;
+            pc->header_size = 0;
+        } else {
+            next = sbc_parse_header(s, avctx, buf, buf_size);
+            if (next >= buf_size)
+                next = -1;
+        }
+
+        if (next < 0) {
+            pc->header_size = FFMIN(sizeof(pc->header), buf_size);
+            memcpy(pc->header, buf, pc->header_size);
+            pc->buffered_size = buf_size;
+            next = END_NOT_FOUND;
+        }
+
+        if (ff_combine_frame(&pc->pc, next, &buf, &buf_size) < 0) {
+            *poutbuf      = NULL;
+            *poutbuf_size = 0;
+            return buf_size;
+        }
+    }
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+AVCodecParser ff_sbc_parser = {
+    .codec_ids      = { AV_CODEC_ID_SBC },
+    .priv_data_size = sizeof(SBCParseContext),
+    .parser_parse   = sbc_parse,
+    .parser_close   = ff_parse_close,
+};

diff --git a/libavcodec/sbcdec.c b/libavcodec/sbcdec.c
new file mode 100644
index 0000000..546b38c
--- /dev/null
+++ b/libavcodec/sbcdec.c

@@ -0,0 +1,379 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2012-2013  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2008  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC decoder implementation
+ */
+
+#include <stdbool.h>
+#include "avcodec.h"
+#include "internal.h"
+#include "libavutil/intreadwrite.h"
+#include "sbc.h"
+#include "sbcdec_data.h"
+
+struct sbc_decoder_state {
+    int32_t V[2][170];
+    int offset[2][16];
+};
+
+typedef struct SBCDecContext {
+    AVClass *class;
+    DECLARE_ALIGNED(SBC_ALIGN, struct sbc_frame, frame);
+    DECLARE_ALIGNED(SBC_ALIGN, struct sbc_decoder_state, dsp);
+} SBCDecContext;
+
+/*
+ * Unpacks a SBC frame at the beginning of the stream in data,
+ * which has at most len bytes into frame.
+ * Returns the length in bytes of the packed frame, or a negative
+ * value on error. The error codes are:
+ *
+ *  -1   Data stream too short
+ *  -2   Sync byte incorrect
+ *  -3   CRC8 incorrect
+ *  -4   Bitpool value out of bounds
+ */
+static int sbc_unpack_frame(const uint8_t *data, struct sbc_frame *frame,
+                            size_t len)
+{
+    unsigned int consumed;
+    /* Will copy the parts of the header that are relevant to crc
+     * calculation here */
+    uint8_t crc_header[11] = { 0 };
+    int crc_pos;
+    int32_t temp;
+
+    uint32_t audio_sample;
+    int ch, sb, blk, bit;   /* channel, subband, block and bit standard
+                               counters */
+    int bits[2][8];         /* bits distribution */
+    uint32_t levels[2][8];  /* levels derived from that */
+
+    if (len < 4)
+        return -1;
+
+    if (data[0] == MSBC_SYNCWORD) {
+        if (data[1] != 0)
+            return -2;
+        if (data[2] != 0)
+            return -2;
+
+        frame->frequency = SBC_FREQ_16000;
+        frame->blocks = MSBC_BLOCKS;
+        frame->allocation = LOUDNESS;
+        frame->mode = MONO;
+        frame->channels = 1;
+        frame->subbands = 8;
+        frame->bitpool = 26;
+    } else if (data[0] == SBC_SYNCWORD) {
+        frame->frequency  = (data[1] >> 6) & 0x03;
+        frame->blocks = 4 * ((data[1] >> 4) & 0x03) + 4;
+        frame->mode = (data[1] >> 2) & 0x03;
+        frame->channels = frame->mode == MONO ? 1 : 2;
+        frame->allocation = (data[1] >> 1) & 0x01;
+        frame->subbands = data[1] & 0x01 ? 8 : 4;
+        frame->bitpool = data[2];
+
+        if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) &&
+            frame->bitpool > 16 * frame->subbands)
+            return -4;
+
+        if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) &&
+            frame->bitpool > 32 * frame->subbands)
+            return -4;
+    } else
+        return -2;
+
+    consumed = 32;
+    crc_header[0] = data[1];
+    crc_header[1] = data[2];
+    crc_pos = 16;
+
+    if (frame->mode == JOINT_STEREO) {
+        if (len * 8 < consumed + frame->subbands)
+            return -1;
+
+        frame->joint = 0x00;
+        for (sb = 0; sb < frame->subbands - 1; sb++)
+            frame->joint |= ((data[4] >> (7 - sb)) & 0x01) << sb;
+        if (frame->subbands == 4)
+            crc_header[crc_pos / 8] = data[4] & 0xf0;
+        else
+            crc_header[crc_pos / 8] = data[4];
+
+        consumed += frame->subbands;
+        crc_pos += frame->subbands;
+    }
+
+    if (len * 8 < consumed + (4 * frame->subbands * frame->channels))
+        return -1;
+
+    for (ch = 0; ch < frame->channels; ch++) {
+        for (sb = 0; sb < frame->subbands; sb++) {
+            /* FIXME assert(consumed % 4 == 0); */
+            frame->scale_factor[ch][sb] =
+                (data[consumed >> 3] >> (4 - (consumed & 0x7))) & 0x0F;
+            crc_header[crc_pos >> 3] |=
+                frame->scale_factor[ch][sb] << (4 - (crc_pos & 0x7));
+
+            consumed += 4;
+            crc_pos += 4;
+        }
+    }
+
+    if (data[3] != ff_sbc_crc8(frame->crc_ctx, crc_header, crc_pos))
+        return -3;
+
+    ff_sbc_calculate_bits(frame, bits);
+
+    for (ch = 0; ch < frame->channels; ch++) {
+        for (sb = 0; sb < frame->subbands; sb++)
+            levels[ch][sb] = (1 << bits[ch][sb]) - 1;
+    }
+
+    for (blk = 0; blk < frame->blocks; blk++) {
+        for (ch = 0; ch < frame->channels; ch++) {
+            for (sb = 0; sb < frame->subbands; sb++) {
+                uint32_t shift;
+
+                if (levels[ch][sb] == 0) {
+                    frame->sb_sample[blk][ch][sb] = 0;
+                    continue;
+                }
+
+                shift = frame->scale_factor[ch][sb] +
+                        1 + SBCDEC_FIXED_EXTRA_BITS;
+
+                audio_sample = 0;
+                for (bit = 0; bit < bits[ch][sb]; bit++) {
+                    if (consumed > len * 8)
+                        return -1;
+
+                    if ((data[consumed >> 3] >> (7 - (consumed & 0x7))) & 0x01)
+                        audio_sample |= 1 << (bits[ch][sb] - bit - 1);
+
+                    consumed++;
+                }
+
+                frame->sb_sample[blk][ch][sb] = (int32_t)
+                    (((((uint64_t) audio_sample << 1) | 1) << shift) /
+                    levels[ch][sb]) - (1 << shift);
+            }
+        }
+    }
+
+    if (frame->mode == JOINT_STEREO) {
+        for (blk = 0; blk < frame->blocks; blk++) {
+            for (sb = 0; sb < frame->subbands; sb++) {
+                if (frame->joint & (0x01 << sb)) {
+                    temp = frame->sb_sample[blk][0][sb] +
+                           frame->sb_sample[blk][1][sb];
+                    frame->sb_sample[blk][1][sb] =
+                        frame->sb_sample[blk][0][sb] -
+                        frame->sb_sample[blk][1][sb];
+                    frame->sb_sample[blk][0][sb] = temp;
+                }
+            }
+        }
+    }
+
+    if ((consumed & 0x7) != 0)
+        consumed += 8 - (consumed & 0x7);
+
+    return consumed >> 3;
+}
+
+static inline void sbc_synthesize_four(struct sbc_decoder_state *state,
+                                       struct sbc_frame *frame,
+                                       int ch, int blk, AVFrame *output_frame)
+{
+    int i, k, idx;
+    int32_t *v = state->V[ch];
+    int *offset = state->offset[ch];
+
+    for (i = 0; i < 8; i++) {
+        /* Shifting */
+        offset[i]--;
+        if (offset[i] < 0) {
+            offset[i] = 79;
+            memcpy(v + 80, v, 9 * sizeof(*v));
+        }
+
+        /* Distribute the new matrix value to the shifted position */
+        v[offset[i]] =
+            ( ff_synmatrix4[i][0] * frame->sb_sample[blk][ch][0] +
+              ff_synmatrix4[i][1] * frame->sb_sample[blk][ch][1] +
+              ff_synmatrix4[i][2] * frame->sb_sample[blk][ch][2] +
+              ff_synmatrix4[i][3] * frame->sb_sample[blk][ch][3] ) >> 15;
+    }
+
+    /* Compute the samples */
+    for (idx = 0, i = 0; i < 4; i++, idx += 5) {
+        k = (i + 4) & 0xf;
+
+        /* Store in output, Q0 */
+        AV_WN16A(&output_frame->data[ch][blk * 8 + i * 2], av_clip_int16(
+            ( v[offset[i] + 0] * ff_sbc_proto_4_40m0[idx + 0] +
+              v[offset[k] + 1] * ff_sbc_proto_4_40m1[idx + 0] +
+              v[offset[i] + 2] * ff_sbc_proto_4_40m0[idx + 1] +
+              v[offset[k] + 3] * ff_sbc_proto_4_40m1[idx + 1] +
+              v[offset[i] + 4] * ff_sbc_proto_4_40m0[idx + 2] +
+              v[offset[k] + 5] * ff_sbc_proto_4_40m1[idx + 2] +
+              v[offset[i] + 6] * ff_sbc_proto_4_40m0[idx + 3] +
+              v[offset[k] + 7] * ff_sbc_proto_4_40m1[idx + 3] +
+              v[offset[i] + 8] * ff_sbc_proto_4_40m0[idx + 4] +
+              v[offset[k] + 9] * ff_sbc_proto_4_40m1[idx + 4] ) >> 15));
+    }
+}
+
+static inline void sbc_synthesize_eight(struct sbc_decoder_state *state,
+                                        struct sbc_frame *frame,
+                                        int ch, int blk, AVFrame *output_frame)
+{
+    int i, k, idx;
+    int32_t *v = state->V[ch];
+    int *offset = state->offset[ch];
+
+    for (i = 0; i < 16; i++) {
+        /* Shifting */
+        offset[i]--;
+        if (offset[i] < 0) {
+            offset[i] = 159;
+            memcpy(v + 160, v, 9 * sizeof(*v));
+        }
+
+        /* Distribute the new matrix value to the shifted position */
+        v[offset[i]] =
+            ( ff_synmatrix8[i][0] * frame->sb_sample[blk][ch][0] +
+              ff_synmatrix8[i][1] * frame->sb_sample[blk][ch][1] +
+              ff_synmatrix8[i][2] * frame->sb_sample[blk][ch][2] +
+              ff_synmatrix8[i][3] * frame->sb_sample[blk][ch][3] +
+              ff_synmatrix8[i][4] * frame->sb_sample[blk][ch][4] +
+              ff_synmatrix8[i][5] * frame->sb_sample[blk][ch][5] +
+              ff_synmatrix8[i][6] * frame->sb_sample[blk][ch][6] +
+              ff_synmatrix8[i][7] * frame->sb_sample[blk][ch][7] ) >> 15;
+    }
+
+    /* Compute the samples */
+    for (idx = 0, i = 0; i < 8; i++, idx += 5) {
+        k = (i + 8) & 0xf;
+
+        /* Store in output, Q0 */
+        AV_WN16A(&output_frame->data[ch][blk * 16 + i * 2], av_clip_int16(
+            ( v[offset[i] + 0] * ff_sbc_proto_8_80m0[idx + 0] +
+              v[offset[k] + 1] * ff_sbc_proto_8_80m1[idx + 0] +
+              v[offset[i] + 2] * ff_sbc_proto_8_80m0[idx + 1] +
+              v[offset[k] + 3] * ff_sbc_proto_8_80m1[idx + 1] +
+              v[offset[i] + 4] * ff_sbc_proto_8_80m0[idx + 2] +
+              v[offset[k] + 5] * ff_sbc_proto_8_80m1[idx + 2] +
+              v[offset[i] + 6] * ff_sbc_proto_8_80m0[idx + 3] +
+              v[offset[k] + 7] * ff_sbc_proto_8_80m1[idx + 3] +
+              v[offset[i] + 8] * ff_sbc_proto_8_80m0[idx + 4] +
+              v[offset[k] + 9] * ff_sbc_proto_8_80m1[idx + 4] ) >> 15));
+    }
+}
+
+static void sbc_synthesize_audio(struct sbc_decoder_state *state,
+                                 struct sbc_frame *frame, AVFrame *output_frame)
+{
+    int ch, blk;
+
+    switch (frame->subbands) {
+    case 4:
+        for (ch = 0; ch < frame->channels; ch++)
+            for (blk = 0; blk < frame->blocks; blk++)
+                sbc_synthesize_four(state, frame, ch, blk, output_frame);
+        break;
+
+    case 8:
+        for (ch = 0; ch < frame->channels; ch++)
+            for (blk = 0; blk < frame->blocks; blk++)
+                sbc_synthesize_eight(state, frame, ch, blk, output_frame);
+        break;
+    }
+}
+
+static int sbc_decode_init(AVCodecContext *avctx)
+{
+    SBCDecContext *sbc = avctx->priv_data;
+    int i, ch;
+
+    sbc->frame.crc_ctx = av_crc_get_table(AV_CRC_8_EBU);
+
+    memset(sbc->dsp.V, 0, sizeof(sbc->dsp.V));
+    for (ch = 0; ch < 2; ch++)
+        for (i = 0; i < FF_ARRAY_ELEMS(sbc->dsp.offset[0]); i++)
+            sbc->dsp.offset[ch][i] = (10 * i + 10);
+    return 0;
+}
+
+static int sbc_decode_frame(AVCodecContext *avctx,
+                            void *data, int *got_frame_ptr,
+                            AVPacket *avpkt)
+{
+    SBCDecContext *sbc = avctx->priv_data;
+    AVFrame *frame = data;
+    int ret, frame_length;
+
+    if (!sbc)
+        return AVERROR(EIO);
+
+    frame_length = sbc_unpack_frame(avpkt->data, &sbc->frame, avpkt->size);
+    if (frame_length <= 0)
+        return frame_length;
+
+    frame->channels = sbc->frame.channels;
+    frame->format = AV_SAMPLE_FMT_S16P;
+    frame->nb_samples = sbc->frame.blocks * sbc->frame.subbands;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    sbc_synthesize_audio(&sbc->dsp, &sbc->frame, frame);
+
+    *got_frame_ptr = 1;
+
+    return frame_length;
+}
+
+AVCodec ff_sbc_decoder = {
+    .name                  = "sbc",
+    .long_name             = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband codec)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_SBC,
+    .priv_data_size        = sizeof(SBCDecContext),
+    .init                  = sbc_decode_init,
+    .decode                = sbc_decode_frame,
+    .capabilities          = AV_CODEC_CAP_DR1,
+    .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
+                                                  AV_CH_LAYOUT_STEREO, 0},
+    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
+                                                             AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]) { 16000, 32000, 44100, 48000, 0 },
+};

diff --git a/libavcodec/sbcdec_data.c b/libavcodec/sbcdec_data.c
new file mode 100644
index 0000000..2152162
--- /dev/null
+++ b/libavcodec/sbcdec_data.c

@@ -0,0 +1,127 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC decoder tables
+ */
+
+#include <stdint.h>
+#include "sbcdec_data.h"
+#include "sbc.h"
+
+#define SS4(val)  ((int32_t)val >> 12)
+#define SS8(val)  ((int32_t)val >> 14)
+#define SN4(val)  ((int32_t)val >> 11 + 1 + SBCDEC_FIXED_EXTRA_BITS)
+#define SN8(val)  ((int32_t)val >> 11 + 1 + SBCDEC_FIXED_EXTRA_BITS)
+
+const int32_t ff_sbc_proto_4_40m0[] = {
+    SS4(0x00000000), SS4(0xffa6982f), SS4(0xfba93848), SS4(0x0456c7b8),
+    SS4(0x005967d1), SS4(0xfffb9ac7), SS4(0xff589157), SS4(0xf9c2a8d8),
+    SS4(0x027c1434), SS4(0x0019118b), SS4(0xfff3c74c), SS4(0xff137330),
+    SS4(0xf81b8d70), SS4(0x00ec1b8b), SS4(0xfff0b71a), SS4(0xffe99b00),
+    SS4(0xfef84470), SS4(0xf6fb4370), SS4(0xffcdc351), SS4(0xffe01dc7)
+};
+
+const int32_t ff_sbc_proto_4_40m1[] = {
+    SS4(0xffe090ce), SS4(0xff2c0475), SS4(0xf694f800), SS4(0xff2c0475),
+    SS4(0xffe090ce), SS4(0xffe01dc7), SS4(0xffcdc351), SS4(0xf6fb4370),
+    SS4(0xfef84470), SS4(0xffe99b00), SS4(0xfff0b71a), SS4(0x00ec1b8b),
+    SS4(0xf81b8d70), SS4(0xff137330), SS4(0xfff3c74c), SS4(0x0019118b),
+    SS4(0x027c1434), SS4(0xf9c2a8d8), SS4(0xff589157), SS4(0xfffb9ac7)
+};
+
+const int32_t ff_sbc_proto_8_80m0[] = {
+    SS8(0x00000000), SS8(0xfe8d1970), SS8(0xee979f00), SS8(0x11686100),
+    SS8(0x0172e690), SS8(0xfff5bd1a), SS8(0xfdf1c8d4), SS8(0xeac182c0),
+    SS8(0x0d9daee0), SS8(0x00e530da), SS8(0xffe9811d), SS8(0xfd52986c),
+    SS8(0xe7054ca0), SS8(0x0a00d410), SS8(0x006c1de4), SS8(0xffdba705),
+    SS8(0xfcbc98e8), SS8(0xe3889d20), SS8(0x06af2308), SS8(0x000bb7db),
+    SS8(0xffca00ed), SS8(0xfc3fbb68), SS8(0xe071bc00), SS8(0x03bf7948),
+    SS8(0xffc4e05c), SS8(0xffb54b3b), SS8(0xfbedadc0), SS8(0xdde26200),
+    SS8(0x0142291c), SS8(0xff960e94), SS8(0xff9f3e17), SS8(0xfbd8f358),
+    SS8(0xdbf79400), SS8(0xff405e01), SS8(0xff7d4914), SS8(0xff8b1a31),
+    SS8(0xfc1417b8), SS8(0xdac7bb40), SS8(0xfdbb828c), SS8(0xff762170)
+};
+
+const int32_t ff_sbc_proto_8_80m1[] = {
+    SS8(0xff7c272c), SS8(0xfcb02620), SS8(0xda612700), SS8(0xfcb02620),
+    SS8(0xff7c272c), SS8(0xff762170), SS8(0xfdbb828c), SS8(0xdac7bb40),
+    SS8(0xfc1417b8), SS8(0xff8b1a31), SS8(0xff7d4914), SS8(0xff405e01),
+    SS8(0xdbf79400), SS8(0xfbd8f358), SS8(0xff9f3e17), SS8(0xff960e94),
+    SS8(0x0142291c), SS8(0xdde26200), SS8(0xfbedadc0), SS8(0xffb54b3b),
+    SS8(0xffc4e05c), SS8(0x03bf7948), SS8(0xe071bc00), SS8(0xfc3fbb68),
+    SS8(0xffca00ed), SS8(0x000bb7db), SS8(0x06af2308), SS8(0xe3889d20),
+    SS8(0xfcbc98e8), SS8(0xffdba705), SS8(0x006c1de4), SS8(0x0a00d410),
+    SS8(0xe7054ca0), SS8(0xfd52986c), SS8(0xffe9811d), SS8(0x00e530da),
+    SS8(0x0d9daee0), SS8(0xeac182c0), SS8(0xfdf1c8d4), SS8(0xfff5bd1a)
+};
+
+const int32_t ff_synmatrix4[8][4] = {
+    { SN4(0x05a82798), SN4(0xfa57d868), SN4(0xfa57d868), SN4(0x05a82798) },
+    { SN4(0x030fbc54), SN4(0xf89be510), SN4(0x07641af0), SN4(0xfcf043ac) },
+    { SN4(0x00000000), SN4(0x00000000), SN4(0x00000000), SN4(0x00000000) },
+    { SN4(0xfcf043ac), SN4(0x07641af0), SN4(0xf89be510), SN4(0x030fbc54) },
+    { SN4(0xfa57d868), SN4(0x05a82798), SN4(0x05a82798), SN4(0xfa57d868) },
+    { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) },
+    { SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000) },
+    { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) }
+};
+
+const int32_t ff_synmatrix8[16][8] = {
+    { SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798),
+      SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798) },
+    { SN8(0x0471ced0), SN8(0xf8275a10), SN8(0x018f8b84), SN8(0x06a6d988),
+      SN8(0xf9592678), SN8(0xfe70747c), SN8(0x07d8a5f0), SN8(0xfb8e3130) },
+    { SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac),
+      SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54) },
+    { SN8(0x018f8b84), SN8(0xfb8e3130), SN8(0x06a6d988), SN8(0xf8275a10),
+      SN8(0x07d8a5f0), SN8(0xf9592678), SN8(0x0471ced0), SN8(0xfe70747c) },
+    { SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000),
+      SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000) },
+    { SN8(0xfe70747c), SN8(0x0471ced0), SN8(0xf9592678), SN8(0x07d8a5f0),
+      SN8(0xf8275a10), SN8(0x06a6d988), SN8(0xfb8e3130), SN8(0x018f8b84) },
+    { SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54),
+      SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac) },
+    { SN8(0xfb8e3130), SN8(0x07d8a5f0), SN8(0xfe70747c), SN8(0xf9592678),
+      SN8(0x06a6d988), SN8(0x018f8b84), SN8(0xf8275a10), SN8(0x0471ced0) },
+    { SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868),
+      SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868) },
+    { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0),
+      SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) },
+    { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0),
+      SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) },
+    { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c),
+      SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) },
+    { SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000),
+      SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000) },
+    { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c),
+      SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) },
+    { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0),
+      SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) },
+    { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0),
+      SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) }
+};

diff --git a/libavcodec/sbcdec_data.h b/libavcodec/sbcdec_data.h
new file mode 100644
index 0000000..1b79d1d
--- /dev/null
+++ b/libavcodec/sbcdec_data.h

@@ -0,0 +1,44 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC decoder tables
+ */
+
+#ifndef AVCODEC_SBCDEC_DATA_H
+#define AVCODEC_SBCDEC_DATA_H
+
+#include <stdint.h>
+
+extern const int32_t ff_sbc_proto_4_40m0[];
+extern const int32_t ff_sbc_proto_4_40m1[];
+extern const int32_t ff_sbc_proto_8_80m0[];
+extern const int32_t ff_sbc_proto_8_80m1[];
+extern const int32_t ff_synmatrix4[8][4];
+extern const int32_t ff_synmatrix8[16][8];
+
+#endif /* AVCODEC_SBCDEC_DATA_H */

diff --git a/libavcodec/sbcdsp.c b/libavcodec/sbcdsp.c
new file mode 100644
index 0000000..e745595
--- /dev/null
+++ b/libavcodec/sbcdsp.c

@@ -0,0 +1,387 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2012-2013  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC basic "building bricks"
+ */
+
+#include <stdint.h>
+#include <limits.h>
+#include <string.h>
+#include "libavutil/common.h"
+#include "libavutil/intmath.h"
+#include "libavutil/intreadwrite.h"
+#include "sbc.h"
+#include "sbcdsp.h"
+#include "sbcdsp_data.h"
+
+/*
+ * A reference C code of analysis filter with SIMD-friendly tables
+ * reordering and code layout. This code can be used to develop platform
+ * specific SIMD optimizations. Also it may be used as some kind of test
+ * for compiler autovectorization capabilities (who knows, if the compiler
+ * is very good at this stuff, hand optimized assembly may be not strictly
+ * needed for some platform).
+ *
+ * Note: It is also possible to make a simple variant of analysis filter,
+ * which needs only a single constants table without taking care about
+ * even/odd cases. This simple variant of filter can be implemented without
+ * input data permutation. The only thing that would be lost is the
+ * possibility to use pairwise SIMD multiplications. But for some simple
+ * CPU cores without SIMD extensions it can be useful. If anybody is
+ * interested in implementing such variant of a filter, sourcecode from
+ * bluez versions 4.26/4.27 can be used as a reference and the history of
+ * the changes in git repository done around that time may be worth checking.
+ */
+
+static av_always_inline void sbc_analyze_simd(const int16_t *in, int32_t *out,
+                                              const int16_t *consts,
+                                              unsigned subbands)
+{
+    int32_t t1[8];
+    int16_t t2[8];
+    int i, j, hop = 0;
+
+    /* rounding coefficient */
+    for (i = 0; i < subbands; i++)
+        t1[i] = 1 << (SBC_PROTO_FIXED_SCALE - 1);
+
+    /* low pass polyphase filter */
+    for (hop = 0; hop < 10*subbands; hop += 2*subbands)
+        for (i = 0; i < 2*subbands; i++)
+            t1[i >> 1] += in[hop + i] * consts[hop + i];
+
+    /* scaling */
+    for (i = 0; i < subbands; i++)
+        t2[i] = t1[i] >> SBC_PROTO_FIXED_SCALE;
+
+    memset(t1, 0, sizeof(t1));
+
+    /* do the cos transform */
+    for (i = 0; i < subbands/2; i++)
+        for (j = 0; j < 2*subbands; j++)
+            t1[j>>1] += t2[i * 2 + (j&1)] * consts[10*subbands + i*2*subbands + j];
+
+    for (i = 0; i < subbands; i++)
+        out[i] = t1[i] >> (SBC_COS_TABLE_FIXED_SCALE - SCALE_OUT_BITS);
+}
+
+static void sbc_analyze_4_simd(const int16_t *in, int32_t *out,
+                               const int16_t *consts)
+{
+    sbc_analyze_simd(in, out, consts, 4);
+}
+
+static void sbc_analyze_8_simd(const int16_t *in, int32_t *out,
+                               const int16_t *consts)
+{
+    sbc_analyze_simd(in, out, consts, 8);
+}
+
+static inline void sbc_analyze_4b_4s_simd(SBCDSPContext *s,
+                                          int16_t *x, int32_t *out, int out_stride)
+{
+    /* Analyze blocks */
+    s->sbc_analyze_4(x + 12, out, ff_sbcdsp_analysis_consts_fixed4_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_4(x + 8, out, ff_sbcdsp_analysis_consts_fixed4_simd_even);
+    out += out_stride;
+    s->sbc_analyze_4(x + 4, out, ff_sbcdsp_analysis_consts_fixed4_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_4(x + 0, out, ff_sbcdsp_analysis_consts_fixed4_simd_even);
+}
+
+static inline void sbc_analyze_4b_8s_simd(SBCDSPContext *s,
+                                          int16_t *x, int32_t *out, int out_stride)
+{
+    /* Analyze blocks */
+    s->sbc_analyze_8(x + 24, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_8(x + 16, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
+    out += out_stride;
+    s->sbc_analyze_8(x + 8, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_8(x + 0, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
+}
+
+static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
+                                               int16_t *x, int32_t *out,
+                                               int out_stride);
+
+static inline void sbc_analyze_1b_8s_simd_odd(SBCDSPContext *s,
+                                              int16_t *x, int32_t *out,
+                                              int out_stride)
+{
+    s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
+    s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_even;
+}
+
+static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
+                                               int16_t *x, int32_t *out,
+                                               int out_stride)
+{
+    s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
+    s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
+}
+
+/*
+ * Input data processing functions. The data is endian converted if needed,
+ * channels are deintrleaved and audio samples are reordered for use in
+ * SIMD-friendly analysis filter function. The results are put into "X"
+ * array, getting appended to the previous data (or it is better to say
+ * prepended, as the buffer is filled from top to bottom). Old data is
+ * discarded when neededed, but availability of (10 * nrof_subbands)
+ * contiguous samples is always guaranteed for the input to the analysis
+ * filter. This is achieved by copying a sufficient part of old data
+ * to the top of the buffer on buffer wraparound.
+ */
+
+static int sbc_enc_process_input_4s(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels)
+{
+    int c;
+
+    /* handle X buffer wraparound */
+    if (position < nsamples) {
+        for (c = 0; c < nchannels; c++)
+            memcpy(&X[c][SBC_X_BUFFER_SIZE - 40], &X[c][position],
+                            36 * sizeof(int16_t));
+        position = SBC_X_BUFFER_SIZE - 40;
+    }
+
+    /* copy/permutate audio samples */
+    for (; nsamples >= 8; nsamples -= 8, pcm += 16 * nchannels) {
+        position -= 8;
+        for (c = 0; c < nchannels; c++) {
+            int16_t *x = &X[c][position];
+            x[0] = AV_RN16(pcm + 14*nchannels + 2*c);
+            x[1] = AV_RN16(pcm +  6*nchannels + 2*c);
+            x[2] = AV_RN16(pcm + 12*nchannels + 2*c);
+            x[3] = AV_RN16(pcm +  8*nchannels + 2*c);
+            x[4] = AV_RN16(pcm +  0*nchannels + 2*c);
+            x[5] = AV_RN16(pcm +  4*nchannels + 2*c);
+            x[6] = AV_RN16(pcm +  2*nchannels + 2*c);
+            x[7] = AV_RN16(pcm + 10*nchannels + 2*c);
+        }
+    }
+
+    return position;
+}
+
+static int sbc_enc_process_input_8s(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels)
+{
+    int c;
+
+    /* handle X buffer wraparound */
+    if (position < nsamples) {
+        for (c = 0; c < nchannels; c++)
+            memcpy(&X[c][SBC_X_BUFFER_SIZE - 72], &X[c][position],
+                            72 * sizeof(int16_t));
+        position = SBC_X_BUFFER_SIZE - 72;
+    }
+
+    if (position % 16 == 8) {
+        position -= 8;
+        nsamples -= 8;
+        for (c = 0; c < nchannels; c++) {
+            int16_t *x = &X[c][position];
+            x[0] = AV_RN16(pcm + 14*nchannels + 2*c);
+            x[2] = AV_RN16(pcm + 12*nchannels + 2*c);
+            x[3] = AV_RN16(pcm +  0*nchannels + 2*c);
+            x[4] = AV_RN16(pcm + 10*nchannels + 2*c);
+            x[5] = AV_RN16(pcm +  2*nchannels + 2*c);
+            x[6] = AV_RN16(pcm +  8*nchannels + 2*c);
+            x[7] = AV_RN16(pcm +  4*nchannels + 2*c);
+            x[8] = AV_RN16(pcm +  6*nchannels + 2*c);
+        }
+        pcm += 16 * nchannels;
+    }
+
+    /* copy/permutate audio samples */
+    for (; nsamples >= 16; nsamples -= 16, pcm += 32 * nchannels) {
+        position -= 16;
+        for (c = 0; c < nchannels; c++) {
+            int16_t *x = &X[c][position];
+            x[0]  = AV_RN16(pcm + 30*nchannels + 2*c);
+            x[1]  = AV_RN16(pcm + 14*nchannels + 2*c);
+            x[2]  = AV_RN16(pcm + 28*nchannels + 2*c);
+            x[3]  = AV_RN16(pcm + 16*nchannels + 2*c);
+            x[4]  = AV_RN16(pcm + 26*nchannels + 2*c);
+            x[5]  = AV_RN16(pcm + 18*nchannels + 2*c);
+            x[6]  = AV_RN16(pcm + 24*nchannels + 2*c);
+            x[7]  = AV_RN16(pcm + 20*nchannels + 2*c);
+            x[8]  = AV_RN16(pcm + 22*nchannels + 2*c);
+            x[9]  = AV_RN16(pcm +  6*nchannels + 2*c);
+            x[10] = AV_RN16(pcm + 12*nchannels + 2*c);
+            x[11] = AV_RN16(pcm +  0*nchannels + 2*c);
+            x[12] = AV_RN16(pcm + 10*nchannels + 2*c);
+            x[13] = AV_RN16(pcm +  2*nchannels + 2*c);
+            x[14] = AV_RN16(pcm +  8*nchannels + 2*c);
+            x[15] = AV_RN16(pcm +  4*nchannels + 2*c);
+        }
+    }
+
+    if (nsamples == 8) {
+        position -= 8;
+        for (c = 0; c < nchannels; c++) {
+            int16_t *x = &X[c][position];
+            x[-7] = AV_RN16(pcm + 14*nchannels + 2*c);
+            x[1]  = AV_RN16(pcm +  6*nchannels + 2*c);
+            x[2]  = AV_RN16(pcm + 12*nchannels + 2*c);
+            x[3]  = AV_RN16(pcm +  0*nchannels + 2*c);
+            x[4]  = AV_RN16(pcm + 10*nchannels + 2*c);
+            x[5]  = AV_RN16(pcm +  2*nchannels + 2*c);
+            x[6]  = AV_RN16(pcm +  8*nchannels + 2*c);
+            x[7]  = AV_RN16(pcm +  4*nchannels + 2*c);
+        }
+    }
+
+    return position;
+}
+
+static void sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
+                                  uint32_t scale_factor[2][8],
+                                  int blocks, int channels, int subbands)
+{
+    int ch, sb, blk;
+    for (ch = 0; ch < channels; ch++) {
+        for (sb = 0; sb < subbands; sb++) {
+            uint32_t x = 1 << SCALE_OUT_BITS;
+            for (blk = 0; blk < blocks; blk++) {
+                int32_t tmp = FFABS(sb_sample_f[blk][ch][sb]);
+                if (tmp != 0)
+                    x |= tmp - 1;
+            }
+            scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) - ff_clz(x);
+        }
+    }
+}
+
+static int sbc_calc_scalefactors_j(int32_t sb_sample_f[16][2][8],
+                                   uint32_t scale_factor[2][8],
+                                   int blocks, int subbands)
+{
+    int blk, joint = 0;
+    int32_t tmp0, tmp1;
+    uint32_t x, y;
+
+    /* last subband does not use joint stereo */
+    int sb = subbands - 1;
+    x = 1 << SCALE_OUT_BITS;
+    y = 1 << SCALE_OUT_BITS;
+    for (blk = 0; blk < blocks; blk++) {
+        tmp0 = FFABS(sb_sample_f[blk][0][sb]);
+        tmp1 = FFABS(sb_sample_f[blk][1][sb]);
+        if (tmp0 != 0)
+            x |= tmp0 - 1;
+        if (tmp1 != 0)
+            y |= tmp1 - 1;
+    }
+    scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - ff_clz(x);
+    scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - ff_clz(y);
+
+    /* the rest of subbands can use joint stereo */
+    while (--sb >= 0) {
+        int32_t sb_sample_j[16][2];
+        x = 1 << SCALE_OUT_BITS;
+        y = 1 << SCALE_OUT_BITS;
+        for (blk = 0; blk < blocks; blk++) {
+            tmp0 = sb_sample_f[blk][0][sb];
+            tmp1 = sb_sample_f[blk][1][sb];
+            sb_sample_j[blk][0] = (tmp0 >> 1) + (tmp1 >> 1);
+            sb_sample_j[blk][1] = (tmp0 >> 1) - (tmp1 >> 1);
+            tmp0 = FFABS(tmp0);
+            tmp1 = FFABS(tmp1);
+            if (tmp0 != 0)
+                x |= tmp0 - 1;
+            if (tmp1 != 0)
+                y |= tmp1 - 1;
+        }
+        scale_factor[0][sb] = (31 - SCALE_OUT_BITS) -
+            ff_clz(x);
+        scale_factor[1][sb] = (31 - SCALE_OUT_BITS) -
+            ff_clz(y);
+        x = 1 << SCALE_OUT_BITS;
+        y = 1 << SCALE_OUT_BITS;
+        for (blk = 0; blk < blocks; blk++) {
+            tmp0 = FFABS(sb_sample_j[blk][0]);
+            tmp1 = FFABS(sb_sample_j[blk][1]);
+            if (tmp0 != 0)
+                x |= tmp0 - 1;
+            if (tmp1 != 0)
+                y |= tmp1 - 1;
+        }
+        x = (31 - SCALE_OUT_BITS) - ff_clz(x);
+        y = (31 - SCALE_OUT_BITS) - ff_clz(y);
+
+        /* decide whether to use joint stereo for this subband */
+        if ((scale_factor[0][sb] + scale_factor[1][sb]) > x + y) {
+            joint |= 1 << (subbands - 1 - sb);
+            scale_factor[0][sb] = x;
+            scale_factor[1][sb] = y;
+            for (blk = 0; blk < blocks; blk++) {
+                sb_sample_f[blk][0][sb] = sb_sample_j[blk][0];
+                sb_sample_f[blk][1][sb] = sb_sample_j[blk][1];
+            }
+        }
+    }
+
+    /* bitmask with the information about subbands using joint stereo */
+    return joint;
+}
+
+/*
+ * Detect CPU features and setup function pointers
+ */
+av_cold void ff_sbcdsp_init(SBCDSPContext *s)
+{
+    /* Default implementation for analyze functions */
+    s->sbc_analyze_4 = sbc_analyze_4_simd;
+    s->sbc_analyze_8 = sbc_analyze_8_simd;
+    s->sbc_analyze_4s = sbc_analyze_4b_4s_simd;
+    if (s->increment == 1)
+        s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
+    else
+        s->sbc_analyze_8s = sbc_analyze_4b_8s_simd;
+
+    /* Default implementation for input reordering / deinterleaving */
+    s->sbc_enc_process_input_4s = sbc_enc_process_input_4s;
+    s->sbc_enc_process_input_8s = sbc_enc_process_input_8s;
+
+    /* Default implementation for scale factors calculation */
+    s->sbc_calc_scalefactors = sbc_calc_scalefactors;
+    s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j;
+
+    if (ARCH_ARM)
+        ff_sbcdsp_init_arm(s);
+    if (ARCH_X86)
+        ff_sbcdsp_init_x86(s);
+}

diff --git a/libavcodec/sbcdsp.h b/libavcodec/sbcdsp.h
new file mode 100644
index 0000000..334c058
--- /dev/null
+++ b/libavcodec/sbcdsp.h

@@ -0,0 +1,86 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC basic "building bricks"
+ */
+
+#ifndef AVCODEC_SBCDSP_H
+#define AVCODEC_SBCDSP_H
+
+#include "sbc.h"
+#include "sbcdsp_data.h"
+
+#define SCALE_OUT_BITS 15
+#define SBC_X_BUFFER_SIZE 328
+
+typedef struct sbc_dsp_context SBCDSPContext;
+
+struct sbc_dsp_context {
+    int position;
+    /* Number of consecutive blocks handled by the encoder */
+    uint8_t increment;
+    DECLARE_ALIGNED(SBC_ALIGN, int16_t, X)[2][SBC_X_BUFFER_SIZE];
+    void (*sbc_analyze_4)(const int16_t *in, int32_t *out, const int16_t *consts);
+    void (*sbc_analyze_8)(const int16_t *in, int32_t *out, const int16_t *consts);
+    /* Polyphase analysis filter for 4 subbands configuration,
+     * it handles "increment" blocks at once */
+    void (*sbc_analyze_4s)(SBCDSPContext *s,
+                           int16_t *x, int32_t *out, int out_stride);
+    /* Polyphase analysis filter for 8 subbands configuration,
+     * it handles "increment" blocks at once */
+    void (*sbc_analyze_8s)(SBCDSPContext *s,
+                           int16_t *x, int32_t *out, int out_stride);
+    /* Process input data (deinterleave, endian conversion, reordering),
+     * depending on the number of subbands and input data byte order */
+    int (*sbc_enc_process_input_4s)(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels);
+    int (*sbc_enc_process_input_8s)(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels);
+    /* Scale factors calculation */
+    void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8],
+                                  uint32_t scale_factor[2][8],
+                                  int blocks, int channels, int subbands);
+    /* Scale factors calculation with joint stereo support */
+    int (*sbc_calc_scalefactors_j)(int32_t sb_sample_f[16][2][8],
+                                   uint32_t scale_factor[2][8],
+                                   int blocks, int subbands);
+};
+
+/*
+ * Initialize pointers to the functions which are the basic "building bricks"
+ * of SBC codec. Best implementation is selected based on target CPU
+ * capabilities.
+ */
+void ff_sbcdsp_init(SBCDSPContext *s);
+
+void ff_sbcdsp_init_arm(SBCDSPContext *s);
+void ff_sbcdsp_init_x86(SBCDSPContext *s);
+
+#endif /* AVCODEC_SBCDSP_H */

diff --git a/libavcodec/sbcdsp_data.c b/libavcodec/sbcdsp_data.c
new file mode 100644
index 0000000..78c07c0
--- /dev/null
+++ b/libavcodec/sbcdsp_data.c

@@ -0,0 +1,329 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * miscellaneous SBC tables
+ */
+
+#include "sbcdsp_data.h"
+
+#define F_PROTO(x) ((int32_t) (((x) * 2) * ((int32_t) 1 << 15) + 0.5))
+#define F_COS(x)   ((int32_t) (((x)    ) * ((int32_t) 1 << 15) + 0.5))
+
+/*
+ * Constant tables for the use in SIMD optimized analysis filters
+ * Each table consists of two parts:
+ * 1. reordered "proto" table
+ * 2. reordered "cos" table
+ *
+ * Due to non-symmetrical reordering, separate tables for "even"
+ * and "odd" cases are needed
+ */
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed4_simd_even)[40 + 16] = {
+#define C0 1.0932568993
+#define C1 1.3056875580
+#define C2 1.3056875580
+#define C3 1.6772280856
+
+#define F(x) F_PROTO(x)
+     F(0.00000000E+00 * C0),  F(3.83720193E-03 * C0),
+     F(5.36548976E-04 * C1),  F(2.73370904E-03 * C1),
+     F(3.06012286E-03 * C2),  F(3.89205149E-03 * C2),
+     F(0.00000000E+00 * C3), -F(1.49188357E-03 * C3),
+     F(1.09137620E-02 * C0),  F(2.58767811E-02 * C0),
+     F(2.04385087E-02 * C1),  F(3.21939290E-02 * C1),
+     F(7.76463494E-02 * C2),  F(6.13245186E-03 * C2),
+     F(0.00000000E+00 * C3), -F(2.88757392E-02 * C3),
+     F(1.35593274E-01 * C0),  F(2.94315332E-01 * C0),
+     F(1.94987841E-01 * C1),  F(2.81828203E-01 * C1),
+    -F(1.94987841E-01 * C2),  F(2.81828203E-01 * C2),
+     F(0.00000000E+00 * C3), -F(2.46636662E-01 * C3),
+    -F(1.35593274E-01 * C0),  F(2.58767811E-02 * C0),
+    -F(7.76463494E-02 * C1),  F(6.13245186E-03 * C1),
+    -F(2.04385087E-02 * C2),  F(3.21939290E-02 * C2),
+     F(0.00000000E+00 * C3),  F(2.88217274E-02 * C3),
+    -F(1.09137620E-02 * C0),  F(3.83720193E-03 * C0),
+    -F(3.06012286E-03 * C1),  F(3.89205149E-03 * C1),
+    -F(5.36548976E-04 * C2),  F(2.73370904E-03 * C2),
+     F(0.00000000E+00 * C3), -F(1.86581691E-03 * C3),
+#undef F
+#define F(x) F_COS(x)
+     F(0.7071067812 / C0),  F(0.9238795325 / C1),
+    -F(0.7071067812 / C0),  F(0.3826834324 / C1),
+    -F(0.7071067812 / C0), -F(0.3826834324 / C1),
+     F(0.7071067812 / C0), -F(0.9238795325 / C1),
+     F(0.3826834324 / C2), -F(1.0000000000 / C3),
+    -F(0.9238795325 / C2), -F(1.0000000000 / C3),
+     F(0.9238795325 / C2), -F(1.0000000000 / C3),
+    -F(0.3826834324 / C2), -F(1.0000000000 / C3),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed4_simd_odd)[40 + 16] = {
+#define C0 1.3056875580
+#define C1 1.6772280856
+#define C2 1.0932568993
+#define C3 1.3056875580
+
+#define F(x) F_PROTO(x)
+     F(2.73370904E-03 * C0),  F(5.36548976E-04 * C0),
+    -F(1.49188357E-03 * C1),  F(0.00000000E+00 * C1),
+     F(3.83720193E-03 * C2),  F(1.09137620E-02 * C2),
+     F(3.89205149E-03 * C3),  F(3.06012286E-03 * C3),
+     F(3.21939290E-02 * C0),  F(2.04385087E-02 * C0),
+    -F(2.88757392E-02 * C1),  F(0.00000000E+00 * C1),
+     F(2.58767811E-02 * C2),  F(1.35593274E-01 * C2),
+     F(6.13245186E-03 * C3),  F(7.76463494E-02 * C3),
+     F(2.81828203E-01 * C0),  F(1.94987841E-01 * C0),
+    -F(2.46636662E-01 * C1),  F(0.00000000E+00 * C1),
+     F(2.94315332E-01 * C2), -F(1.35593274E-01 * C2),
+     F(2.81828203E-01 * C3), -F(1.94987841E-01 * C3),
+     F(6.13245186E-03 * C0), -F(7.76463494E-02 * C0),
+     F(2.88217274E-02 * C1),  F(0.00000000E+00 * C1),
+     F(2.58767811E-02 * C2), -F(1.09137620E-02 * C2),
+     F(3.21939290E-02 * C3), -F(2.04385087E-02 * C3),
+     F(3.89205149E-03 * C0), -F(3.06012286E-03 * C0),
+    -F(1.86581691E-03 * C1),  F(0.00000000E+00 * C1),
+     F(3.83720193E-03 * C2),  F(0.00000000E+00 * C2),
+     F(2.73370904E-03 * C3), -F(5.36548976E-04 * C3),
+#undef F
+#define F(x) F_COS(x)
+     F(0.9238795325 / C0), -F(1.0000000000 / C1),
+     F(0.3826834324 / C0), -F(1.0000000000 / C1),
+    -F(0.3826834324 / C0), -F(1.0000000000 / C1),
+    -F(0.9238795325 / C0), -F(1.0000000000 / C1),
+     F(0.7071067812 / C2),  F(0.3826834324 / C3),
+    -F(0.7071067812 / C2), -F(0.9238795325 / C3),
+    -F(0.7071067812 / C2),  F(0.9238795325 / C3),
+     F(0.7071067812 / C2), -F(0.3826834324 / C3),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed8_simd_even)[80 + 64] = {
+#define C0 2.7906148894
+#define C1 2.4270044280
+#define C2 2.8015616024
+#define C3 3.1710363741
+#define C4 2.5377944043
+#define C5 2.4270044280
+#define C6 2.8015616024
+#define C7 3.1710363741
+
+#define F(x) F_PROTO(x)
+     F(0.00000000E+00 * C0),  F(2.01182542E-03 * C0),
+     F(1.56575398E-04 * C1),  F(1.78371725E-03 * C1),
+     F(3.43256425E-04 * C2),  F(1.47640169E-03 * C2),
+     F(5.54620202E-04 * C3),  F(1.13992507E-03 * C3),
+    -F(8.23919506E-04 * C4),  F(0.00000000E+00 * C4),
+     F(2.10371989E-03 * C5),  F(3.49717454E-03 * C5),
+     F(1.99454554E-03 * C6),  F(1.64973098E-03 * C6),
+     F(1.61656283E-03 * C7),  F(1.78805361E-04 * C7),
+     F(5.65949473E-03 * C0),  F(1.29371806E-02 * C0),
+     F(8.02941163E-03 * C1),  F(1.53184106E-02 * C1),
+     F(1.04584443E-02 * C2),  F(1.62208471E-02 * C2),
+     F(1.27472335E-02 * C3),  F(1.59045603E-02 * C3),
+    -F(1.46525263E-02 * C4),  F(0.00000000E+00 * C4),
+     F(8.85757540E-03 * C5),  F(5.31873032E-02 * C5),
+     F(2.92408442E-03 * C6),  F(3.90751381E-02 * C6),
+    -F(4.91578024E-03 * C7),  F(2.61098752E-02 * C7),
+     F(6.79989431E-02 * C0),  F(1.46955068E-01 * C0),
+     F(8.29847578E-02 * C1),  F(1.45389847E-01 * C1),
+     F(9.75753918E-02 * C2),  F(1.40753505E-01 * C2),
+     F(1.11196689E-01 * C3),  F(1.33264415E-01 * C3),
+    -F(1.23264548E-01 * C4),  F(0.00000000E+00 * C4),
+     F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5),
+     F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6),
+     F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7),
+    -F(6.79989431E-02 * C0),  F(1.29371806E-02 * C0),
+    -F(5.31873032E-02 * C1),  F(8.85757540E-03 * C1),
+    -F(3.90751381E-02 * C2),  F(2.92408442E-03 * C2),
+    -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3),
+     F(1.46404076E-02 * C4),  F(0.00000000E+00 * C4),
+     F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5),
+     F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6),
+     F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7),
+    -F(5.65949473E-03 * C0),  F(2.01182542E-03 * C0),
+    -F(3.49717454E-03 * C1),  F(2.10371989E-03 * C1),
+    -F(1.64973098E-03 * C2),  F(1.99454554E-03 * C2),
+    -F(1.78805361E-04 * C3),  F(1.61656283E-03 * C3),
+    -F(9.02154502E-04 * C4),  F(0.00000000E+00 * C4),
+     F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5),
+     F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6),
+     F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7),
+#undef F
+#define F(x) F_COS(x)
+     F(0.7071067812 / C0),  F(0.8314696123 / C1),
+    -F(0.7071067812 / C0), -F(0.1950903220 / C1),
+    -F(0.7071067812 / C0), -F(0.9807852804 / C1),
+     F(0.7071067812 / C0), -F(0.5555702330 / C1),
+     F(0.7071067812 / C0),  F(0.5555702330 / C1),
+    -F(0.7071067812 / C0),  F(0.9807852804 / C1),
+    -F(0.7071067812 / C0),  F(0.1950903220 / C1),
+     F(0.7071067812 / C0), -F(0.8314696123 / C1),
+     F(0.9238795325 / C2),  F(0.9807852804 / C3),
+     F(0.3826834324 / C2),  F(0.8314696123 / C3),
+    -F(0.3826834324 / C2),  F(0.5555702330 / C3),
+    -F(0.9238795325 / C2),  F(0.1950903220 / C3),
+    -F(0.9238795325 / C2), -F(0.1950903220 / C3),
+    -F(0.3826834324 / C2), -F(0.5555702330 / C3),
+     F(0.3826834324 / C2), -F(0.8314696123 / C3),
+     F(0.9238795325 / C2), -F(0.9807852804 / C3),
+    -F(1.0000000000 / C4),  F(0.5555702330 / C5),
+    -F(1.0000000000 / C4), -F(0.9807852804 / C5),
+    -F(1.0000000000 / C4),  F(0.1950903220 / C5),
+    -F(1.0000000000 / C4),  F(0.8314696123 / C5),
+    -F(1.0000000000 / C4), -F(0.8314696123 / C5),
+    -F(1.0000000000 / C4), -F(0.1950903220 / C5),
+    -F(1.0000000000 / C4),  F(0.9807852804 / C5),
+    -F(1.0000000000 / C4), -F(0.5555702330 / C5),
+     F(0.3826834324 / C6),  F(0.1950903220 / C7),
+    -F(0.9238795325 / C6), -F(0.5555702330 / C7),
+     F(0.9238795325 / C6),  F(0.8314696123 / C7),
+    -F(0.3826834324 / C6), -F(0.9807852804 / C7),
+    -F(0.3826834324 / C6),  F(0.9807852804 / C7),
+     F(0.9238795325 / C6), -F(0.8314696123 / C7),
+    -F(0.9238795325 / C6),  F(0.5555702330 / C7),
+     F(0.3826834324 / C6), -F(0.1950903220 / C7),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed8_simd_odd)[80 + 64] = {
+#define C0 2.5377944043
+#define C1 2.4270044280
+#define C2 2.8015616024
+#define C3 3.1710363741
+#define C4 2.7906148894
+#define C5 2.4270044280
+#define C6 2.8015616024
+#define C7 3.1710363741
+
+#define F(x) F_PROTO(x)
+     F(0.00000000E+00 * C0), -F(8.23919506E-04 * C0),
+     F(1.56575398E-04 * C1),  F(1.78371725E-03 * C1),
+     F(3.43256425E-04 * C2),  F(1.47640169E-03 * C2),
+     F(5.54620202E-04 * C3),  F(1.13992507E-03 * C3),
+     F(2.01182542E-03 * C4),  F(5.65949473E-03 * C4),
+     F(2.10371989E-03 * C5),  F(3.49717454E-03 * C5),
+     F(1.99454554E-03 * C6),  F(1.64973098E-03 * C6),
+     F(1.61656283E-03 * C7),  F(1.78805361E-04 * C7),
+     F(0.00000000E+00 * C0), -F(1.46525263E-02 * C0),
+     F(8.02941163E-03 * C1),  F(1.53184106E-02 * C1),
+     F(1.04584443E-02 * C2),  F(1.62208471E-02 * C2),
+     F(1.27472335E-02 * C3),  F(1.59045603E-02 * C3),
+     F(1.29371806E-02 * C4),  F(6.79989431E-02 * C4),
+     F(8.85757540E-03 * C5),  F(5.31873032E-02 * C5),
+     F(2.92408442E-03 * C6),  F(3.90751381E-02 * C6),
+    -F(4.91578024E-03 * C7),  F(2.61098752E-02 * C7),
+     F(0.00000000E+00 * C0), -F(1.23264548E-01 * C0),
+     F(8.29847578E-02 * C1),  F(1.45389847E-01 * C1),
+     F(9.75753918E-02 * C2),  F(1.40753505E-01 * C2),
+     F(1.11196689E-01 * C3),  F(1.33264415E-01 * C3),
+     F(1.46955068E-01 * C4), -F(6.79989431E-02 * C4),
+     F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5),
+     F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6),
+     F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7),
+     F(0.00000000E+00 * C0),  F(1.46404076E-02 * C0),
+    -F(5.31873032E-02 * C1),  F(8.85757540E-03 * C1),
+    -F(3.90751381E-02 * C2),  F(2.92408442E-03 * C2),
+    -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3),
+     F(1.29371806E-02 * C4), -F(5.65949473E-03 * C4),
+     F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5),
+     F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6),
+     F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7),
+     F(0.00000000E+00 * C0), -F(9.02154502E-04 * C0),
+    -F(3.49717454E-03 * C1),  F(2.10371989E-03 * C1),
+    -F(1.64973098E-03 * C2),  F(1.99454554E-03 * C2),
+    -F(1.78805361E-04 * C3),  F(1.61656283E-03 * C3),
+     F(2.01182542E-03 * C4),  F(0.00000000E+00 * C4),
+     F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5),
+     F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6),
+     F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7),
+#undef F
+#define F(x) F_COS(x)
+    -F(1.0000000000 / C0),  F(0.8314696123 / C1),
+    -F(1.0000000000 / C0), -F(0.1950903220 / C1),
+    -F(1.0000000000 / C0), -F(0.9807852804 / C1),
+    -F(1.0000000000 / C0), -F(0.5555702330 / C1),
+    -F(1.0000000000 / C0),  F(0.5555702330 / C1),
+    -F(1.0000000000 / C0),  F(0.9807852804 / C1),
+    -F(1.0000000000 / C0),  F(0.1950903220 / C1),
+    -F(1.0000000000 / C0), -F(0.8314696123 / C1),
+     F(0.9238795325 / C2),  F(0.9807852804 / C3),
+     F(0.3826834324 / C2),  F(0.8314696123 / C3),
+    -F(0.3826834324 / C2),  F(0.5555702330 / C3),
+    -F(0.9238795325 / C2),  F(0.1950903220 / C3),
+    -F(0.9238795325 / C2), -F(0.1950903220 / C3),
+    -F(0.3826834324 / C2), -F(0.5555702330 / C3),
+     F(0.3826834324 / C2), -F(0.8314696123 / C3),
+     F(0.9238795325 / C2), -F(0.9807852804 / C3),
+     F(0.7071067812 / C4),  F(0.5555702330 / C5),
+    -F(0.7071067812 / C4), -F(0.9807852804 / C5),
+    -F(0.7071067812 / C4),  F(0.1950903220 / C5),
+     F(0.7071067812 / C4),  F(0.8314696123 / C5),
+     F(0.7071067812 / C4), -F(0.8314696123 / C5),
+    -F(0.7071067812 / C4), -F(0.1950903220 / C5),
+    -F(0.7071067812 / C4),  F(0.9807852804 / C5),
+     F(0.7071067812 / C4), -F(0.5555702330 / C5),
+     F(0.3826834324 / C6),  F(0.1950903220 / C7),
+    -F(0.9238795325 / C6), -F(0.5555702330 / C7),
+     F(0.9238795325 / C6),  F(0.8314696123 / C7),
+    -F(0.3826834324 / C6), -F(0.9807852804 / C7),
+    -F(0.3826834324 / C6),  F(0.9807852804 / C7),
+     F(0.9238795325 / C6), -F(0.8314696123 / C7),
+    -F(0.9238795325 / C6),  F(0.5555702330 / C7),
+     F(0.3826834324 / C6), -F(0.1950903220 / C7),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+};

diff --git a/libavcodec/sbcdsp_data.h b/libavcodec/sbcdsp_data.h
new file mode 100644
index 0000000..10fad5c
--- /dev/null
+++ b/libavcodec/sbcdsp_data.h

@@ -0,0 +1,55 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * miscellaneous SBC tables
+ */
+
+#ifndef AVCODEC_SBCDSP_DATA_H
+#define AVCODEC_SBCDSP_DATA_H
+
+#include "sbc.h"
+
+#define SBC_PROTO_FIXED_SCALE      16
+#define SBC_COS_TABLE_FIXED_SCALE  15
+
+/*
+ * Constant tables for the use in SIMD optimized analysis filters
+ * Each table consists of two parts:
+ * 1. reordered "proto" table
+ * 2. reordered "cos" table
+ *
+ * Due to non-symmetrical reordering, separate tables for "even"
+ * and "odd" cases are needed
+ */
+
+extern const int16_t ff_sbcdsp_analysis_consts_fixed4_simd_even[];
+extern const int16_t ff_sbcdsp_analysis_consts_fixed4_simd_odd[];
+extern const int16_t ff_sbcdsp_analysis_consts_fixed8_simd_even[];
+extern const int16_t ff_sbcdsp_analysis_consts_fixed8_simd_odd[];
+
+#endif /* AVCODEC_SBCDSP_DATA_H */

diff --git a/libavcodec/sbcenc.c b/libavcodec/sbcenc.c
new file mode 100644
index 0000000..e2929e2
--- /dev/null
+++ b/libavcodec/sbcenc.c

@@ -0,0 +1,361 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2012-2013  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2008  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC encoder implementation
+ */
+
+#include <stdbool.h>
+#include "libavutil/opt.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "profiles.h"
+#include "put_bits.h"
+#include "sbc.h"
+#include "sbcdsp.h"
+
+typedef struct SBCEncContext {
+    AVClass *class;
+    int64_t max_delay;
+    int msbc;
+    DECLARE_ALIGNED(SBC_ALIGN, struct sbc_frame, frame);
+    DECLARE_ALIGNED(SBC_ALIGN, SBCDSPContext, dsp);
+} SBCEncContext;
+
+static int sbc_analyze_audio(SBCDSPContext *s, struct sbc_frame *frame)
+{
+    int ch, blk;
+    int16_t *x;
+
+    switch (frame->subbands) {
+    case 4:
+        for (ch = 0; ch < frame->channels; ch++) {
+            x = &s->X[ch][s->position - 4 *
+                    s->increment + frame->blocks * 4];
+            for (blk = 0; blk < frame->blocks;
+                        blk += s->increment) {
+                s->sbc_analyze_4s(
+                    s, x,
+                    frame->sb_sample_f[blk][ch],
+                    frame->sb_sample_f[blk + 1][ch] -
+                    frame->sb_sample_f[blk][ch]);
+                x -= 4 * s->increment;
+            }
+        }
+        return frame->blocks * 4;
+
+    case 8:
+        for (ch = 0; ch < frame->channels; ch++) {
+            x = &s->X[ch][s->position - 8 *
+                    s->increment + frame->blocks * 8];
+            for (blk = 0; blk < frame->blocks;
+                        blk += s->increment) {
+                s->sbc_analyze_8s(
+                    s, x,
+                    frame->sb_sample_f[blk][ch],
+                    frame->sb_sample_f[blk + 1][ch] -
+                    frame->sb_sample_f[blk][ch]);
+                x -= 8 * s->increment;
+            }
+        }
+        return frame->blocks * 8;
+
+    default:
+        return AVERROR(EIO);
+    }
+}
+
+/*
+ * Packs the SBC frame from frame into the memory in avpkt.
+ * Returns the length of the packed frame.
+ */
+static size_t sbc_pack_frame(AVPacket *avpkt, struct sbc_frame *frame,
+                             int joint, bool msbc)
+{
+    PutBitContext pb;
+
+    /* Will copy the header parts for CRC-8 calculation here */
+    uint8_t crc_header[11] = { 0 };
+    int crc_pos;
+
+    uint32_t audio_sample;
+
+    int ch, sb, blk;        /* channel, subband, block and bit counters */
+    int bits[2][8];         /* bits distribution */
+    uint32_t levels[2][8];  /* levels are derived from that */
+    uint32_t sb_sample_delta[2][8];
+
+    if (msbc) {
+        avpkt->data[0] = MSBC_SYNCWORD;
+        avpkt->data[1] = 0;
+        avpkt->data[2] = 0;
+    } else {
+        avpkt->data[0] = SBC_SYNCWORD;
+
+        avpkt->data[1]  = (frame->frequency           & 0x03) << 6;
+        avpkt->data[1] |= (((frame->blocks >> 2) - 1) & 0x03) << 4;
+        avpkt->data[1] |= (frame->mode                & 0x03) << 2;
+        avpkt->data[1] |= (frame->allocation          & 0x01) << 1;
+        avpkt->data[1] |= ((frame->subbands == 8)     & 0x01) << 0;
+
+        avpkt->data[2] = frame->bitpool;
+
+        if (frame->bitpool > frame->subbands << (4 + (frame->mode == STEREO
+                                                   || frame->mode == JOINT_STEREO)))
+            return -5;
+    }
+
+    /* Can't fill in crc yet */
+    crc_header[0] = avpkt->data[1];
+    crc_header[1] = avpkt->data[2];
+    crc_pos = 16;
+
+    init_put_bits(&pb, avpkt->data + 4, avpkt->size);
+
+    if (frame->mode == JOINT_STEREO) {
+        put_bits(&pb, frame->subbands, joint);
+        crc_header[crc_pos >> 3] = joint;
+        crc_pos += frame->subbands;
+    }
+
+    for (ch = 0; ch < frame->channels; ch++) {
+        for (sb = 0; sb < frame->subbands; sb++) {
+            put_bits(&pb, 4, frame->scale_factor[ch][sb] & 0x0F);
+            crc_header[crc_pos >> 3] <<= 4;
+            crc_header[crc_pos >> 3] |= frame->scale_factor[ch][sb] & 0x0F;
+            crc_pos += 4;
+        }
+    }
+
+    /* align the last crc byte */
+    if (crc_pos % 8)
+        crc_header[crc_pos >> 3] <<= 8 - (crc_pos % 8);
+
+    avpkt->data[3] = ff_sbc_crc8(frame->crc_ctx, crc_header, crc_pos);
+
+    ff_sbc_calculate_bits(frame, bits);
+
+    for (ch = 0; ch < frame->channels; ch++) {
+        for (sb = 0; sb < frame->subbands; sb++) {
+            levels[ch][sb] = ((1 << bits[ch][sb]) - 1) <<
+                (32 - (frame->scale_factor[ch][sb] +
+                    SCALE_OUT_BITS + 2));
+            sb_sample_delta[ch][sb] = (uint32_t) 1 <<
+                (frame->scale_factor[ch][sb] +
+                    SCALE_OUT_BITS + 1);
+        }
+    }
+
+    for (blk = 0; blk < frame->blocks; blk++) {
+        for (ch = 0; ch < frame->channels; ch++) {
+            for (sb = 0; sb < frame->subbands; sb++) {
+
+                if (bits[ch][sb] == 0)
+                    continue;
+
+                audio_sample = ((uint64_t) levels[ch][sb] *
+                    (sb_sample_delta[ch][sb] +
+                    frame->sb_sample_f[blk][ch][sb])) >> 32;
+
+                put_bits(&pb, bits[ch][sb], audio_sample);
+            }
+        }
+    }
+
+    flush_put_bits(&pb);
+
+    return (put_bits_count(&pb) + 7) / 8;
+}
+
+static int sbc_encode_init(AVCodecContext *avctx)
+{
+    SBCEncContext *sbc = avctx->priv_data;
+    struct sbc_frame *frame = &sbc->frame;
+
+    if (avctx->profile == FF_PROFILE_SBC_MSBC)
+        sbc->msbc = 1;
+
+    if (sbc->msbc) {
+        if (avctx->channels != 1) {
+            av_log(avctx, AV_LOG_ERROR, "mSBC require mono channel.\n");
+            return AVERROR(EINVAL);
+        }
+
+        if (avctx->sample_rate != 16000) {
+            av_log(avctx, AV_LOG_ERROR, "mSBC require 16 kHz samplerate.\n");
+            return AVERROR(EINVAL);
+        }
+
+        frame->mode = SBC_MODE_MONO;
+        frame->subbands = 8;
+        frame->blocks = MSBC_BLOCKS;
+        frame->allocation = SBC_AM_LOUDNESS;
+        frame->bitpool = 26;
+
+        avctx->frame_size = 8 * MSBC_BLOCKS;
+    } else {
+        int d;
+
+        if (avctx->global_quality > 255*FF_QP2LAMBDA) {
+            av_log(avctx, AV_LOG_ERROR, "bitpool > 255 is not allowed.\n");
+            return AVERROR(EINVAL);
+        }
+
+        if (avctx->channels == 1) {
+            frame->mode = SBC_MODE_MONO;
+            if (sbc->max_delay <= 3000 || avctx->bit_rate > 270000)
+                frame->subbands = 4;
+            else
+                frame->subbands = 8;
+        } else {
+            if (avctx->bit_rate < 180000 || avctx->bit_rate > 420000)
+                frame->mode = SBC_MODE_JOINT_STEREO;
+            else
+                frame->mode = SBC_MODE_STEREO;
+            if (sbc->max_delay <= 4000 || avctx->bit_rate > 420000)
+                frame->subbands = 4;
+            else
+                frame->subbands = 8;
+        }
+        /* sbc algorithmic delay is ((blocks + 10) * subbands - 2) / sample_rate */
+        frame->blocks = av_clip(((sbc->max_delay * avctx->sample_rate + 2)
+                               / (1000000 * frame->subbands)) - 10, 4, 16) & ~3;
+
+        frame->allocation = SBC_AM_LOUDNESS;
+
+        d = frame->blocks * ((frame->mode == SBC_MODE_DUAL_CHANNEL) + 1);
+        frame->bitpool = (((avctx->bit_rate * frame->subbands * frame->blocks) / avctx->sample_rate)
+                          - 4 * frame->subbands * avctx->channels
+                          - (frame->mode == SBC_MODE_JOINT_STEREO)*frame->subbands - 32 + d/2) / d;
+        if (avctx->global_quality > 0)
+            frame->bitpool = avctx->global_quality / FF_QP2LAMBDA;
+
+        avctx->frame_size = 4*((frame->subbands >> 3) + 1) * 4*(frame->blocks >> 2);
+    }
+
+    for (int i = 0; avctx->codec->supported_samplerates[i]; i++)
+        if (avctx->sample_rate == avctx->codec->supported_samplerates[i])
+            frame->frequency = i;
+
+    frame->channels = avctx->channels;
+    frame->codesize = frame->subbands * frame->blocks * avctx->channels * 2;
+    frame->crc_ctx = av_crc_get_table(AV_CRC_8_EBU);
+
+    memset(&sbc->dsp.X, 0, sizeof(sbc->dsp.X));
+    sbc->dsp.position = (SBC_X_BUFFER_SIZE - frame->subbands * 9) & ~7;
+    sbc->dsp.increment = sbc->msbc ? 1 : 4;
+    ff_sbcdsp_init(&sbc->dsp);
+
+    return 0;
+}
+
+static int sbc_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                            const AVFrame *av_frame, int *got_packet_ptr)
+{
+    SBCEncContext *sbc = avctx->priv_data;
+    struct sbc_frame *frame = &sbc->frame;
+    uint8_t joint = frame->mode == SBC_MODE_JOINT_STEREO;
+    uint8_t dual  = frame->mode == SBC_MODE_DUAL_CHANNEL;
+    int ret, j = 0;
+
+    int frame_length = 4 + (4 * frame->subbands * frame->channels) / 8
+                     + ((frame->blocks * frame->bitpool * (1 + dual)
+                     + joint * frame->subbands) + 7) / 8;
+
+    /* input must be large enough to encode a complete frame */
+    if (av_frame->nb_samples * frame->channels * 2 < frame->codesize)
+        return 0;
+
+    if ((ret = ff_alloc_packet2(avctx, avpkt, frame_length, 0)) < 0)
+        return ret;
+
+    /* Select the needed input data processing function and call it */
+    if (frame->subbands == 8)
+        sbc->dsp.position = sbc->dsp.sbc_enc_process_input_8s(
+                sbc->dsp.position, av_frame->data[0], sbc->dsp.X,
+                frame->subbands * frame->blocks, frame->channels);
+    else
+        sbc->dsp.position = sbc->dsp.sbc_enc_process_input_4s(
+                sbc->dsp.position, av_frame->data[0], sbc->dsp.X,
+                frame->subbands * frame->blocks, frame->channels);
+
+    sbc_analyze_audio(&sbc->dsp, &sbc->frame);
+
+    if (frame->mode == JOINT_STEREO)
+        j = sbc->dsp.sbc_calc_scalefactors_j(frame->sb_sample_f,
+                                             frame->scale_factor,
+                                             frame->blocks,
+                                             frame->subbands);
+    else
+        sbc->dsp.sbc_calc_scalefactors(frame->sb_sample_f,
+                                       frame->scale_factor,
+                                       frame->blocks,
+                                       frame->channels,
+                                       frame->subbands);
+    emms_c();
+    sbc_pack_frame(avpkt, frame, j, sbc->msbc);
+
+    *got_packet_ptr = 1;
+    return 0;
+}
+
+#define OFFSET(x) offsetof(SBCEncContext, x)
+#define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "sbc_delay", "set maximum algorithmic latency",
+      OFFSET(max_delay), AV_OPT_TYPE_DURATION, {.i64 = 13000}, 1000,13000, AE },
+    { "msbc",      "use mSBC mode (wideband speech mono SBC)",
+      OFFSET(msbc),      AV_OPT_TYPE_BOOL,     {.i64 = 0},        0,    1, AE },
+    { NULL },
+};
+
+static const AVClass sbc_class = {
+    .class_name = "sbc encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_sbc_encoder = {
+    .name                  = "sbc",
+    .long_name             = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband codec)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_SBC,
+    .priv_data_size        = sizeof(SBCEncContext),
+    .init                  = sbc_encode_init,
+    .encode2               = sbc_encode_frame,
+    .capabilities          = AV_CODEC_CAP_SMALL_LAST_FRAME,
+    .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
+                                                  AV_CH_LAYOUT_STEREO, 0},
+    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+                                                             AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]) { 16000, 32000, 44100, 48000, 0 },
+    .priv_class            = &sbc_class,
+    .profiles              = NULL_IF_CONFIG_SMALL(ff_sbc_profiles),
+};

diff --git a/libavcodec/scpr.c b/libavcodec/scpr.c
index ad6073d..e41fbbe 100644
--- a/libavcodec/scpr.c
+++ b/libavcodec/scpr.c

@@ -136,7 +136,7 @@
     rc->range *= freq;
 
     while (rc->range < TOP && bytestream2_get_bytes_left(gb) > 0) {
-        unsigned byte = bytestream2_get_byte(gb);
+        unsigned byte = bytestream2_get_byteu(gb);
         rc->code = (rc->code << 8) | byte;
         rc->range <<= 8;
     }
@@ -172,7 +172,7 @@
     rc->range = rc->range * (uint64_t)(freq + cumFreq) / total_freq - (t + 1);
 
     while (rc->range < TOP && bytestream2_get_bytes_left(gb) > 0) {
-        unsigned byte = bytestream2_get_byte(gb);
+        unsigned byte = bytestream2_get_byteu(gb);
         rc->code = (rc->code << 8) | byte;
         rc->code1 <<= 8;
         rc->range <<= 8;
@@ -211,6 +211,10 @@
             break;
         c++;
     }
+
+    if (c >= maxc)
+        return AVERROR_INVALIDDATA;
+
     if ((ret = s->decode(gb, rc, cumfr, cnt_c, totfr)) < 0)
         return ret;
 
@@ -293,14 +297,41 @@
     return 0;
 }
 
+static int decode_units(SCPRContext *s, unsigned *r, unsigned *g, unsigned *b,
+                        int *cx, int *cx1)
+{
+    const int cxshift = s->cxshift;
+    int ret;
+
+    ret = decode_unit(s, &s->pixel_model[0][*cx + *cx1], 400, r);
+    if (ret < 0)
+        return ret;
+
+    *cx1 = (*cx << 6) & 0xFC0;
+    *cx = *r >> cxshift;
+    ret = decode_unit(s, &s->pixel_model[1][*cx + *cx1], 400, g);
+    if (ret < 0)
+        return ret;
+
+    *cx1 = (*cx << 6) & 0xFC0;
+    *cx = *g >> cxshift;
+    ret = decode_unit(s, &s->pixel_model[2][*cx + *cx1], 400, b);
+    if (ret < 0)
+        return ret;
+
+    *cx1 = (*cx << 6) & 0xFC0;
+    *cx = *b >> cxshift;
+
+    return 0;
+}
+
 static int decompress_i(AVCodecContext *avctx, uint32_t *dst, int linesize)
 {
     SCPRContext *s = avctx->priv_data;
     GetByteContext *gb = &s->gb;
     int cx = 0, cx1 = 0, k = 0, clr = 0;
-    int run, r, g, b, off, y = 0, x = 0, z, ret;
-    unsigned backstep = linesize - avctx->width;
-    const int cxshift = s->cxshift;
+    int run, off, y = 0, x = 0, z, ret;
+    unsigned r, g, b, backstep = linesize - avctx->width;
     unsigned lx, ly, ptype;
 
     reinit_tables(s);
@@ -308,28 +339,15 @@
     init_rangecoder(&s->rc, gb);
 
     while (k < avctx->width + 1) {
-        ret = decode_unit(s, &s->pixel_model[0][cx + cx1], 400, &r);
+        ret = decode_units(s, &r, &g, &b, &cx, &cx1);
         if (ret < 0)
             return ret;
 
-        cx1 = (cx << 6) & 0xFC0;
-        cx = r >> cxshift;
-        ret = decode_unit(s, &s->pixel_model[1][cx + cx1], 400, &g);
-        if (ret < 0)
-            return ret;
-
-        cx1 = (cx << 6) & 0xFC0;
-        cx = g >> cxshift;
-        ret = decode_unit(s, &s->pixel_model[2][cx + cx1], 400, &b);
-        if (ret < 0)
-            return ret;
-
-        cx1 = (cx << 6) & 0xFC0;
-        cx = b >> cxshift;
-
         ret = decode_value(s, s->run_model[0], 256, 400, &run);
         if (ret < 0)
             return ret;
+        if (run <= 0)
+            return AVERROR_INVALIDDATA;
 
         clr = (b << 16) + (g << 8) + r;
         k += run;
@@ -355,19 +373,7 @@
         if (ret < 0)
             return ret;
         if (ptype == 0) {
-            ret = decode_unit(s, &s->pixel_model[0][cx + cx1], 400, &r);
-            if (ret < 0)
-                return ret;
-
-            cx1 = (cx << 6) & 0xFC0;
-            cx = r >> cxshift;
-            ret = decode_unit(s, &s->pixel_model[1][cx + cx1], 400, &g);
-            if (ret < 0)
-                return ret;
-
-            cx1 = (cx << 6) & 0xFC0;
-            cx = g >> cxshift;
-            ret = decode_unit(s, &s->pixel_model[2][cx + cx1], 400, &b);
+            ret = decode_units(s, &r, &g, &b, &cx, &cx1);
             if (ret < 0)
                 return ret;
 
@@ -378,6 +384,8 @@
         ret = decode_value(s, s->run_model[ptype], 256, 400, &run);
         if (ret < 0)
             return ret;
+        if (run <= 0)
+            return AVERROR_INVALIDDATA;
 
         switch (ptype) {
         case 0:
@@ -442,13 +450,13 @@
                 }
 
                 r = odst[(ly * linesize + lx) * 4] +
-                    odst[((y * linesize + x) + off - z) * 4 + 4] -
+                    odst[((y * linesize + x) + off) * 4 + 4] -
                     odst[((y * linesize + x) + off - z) * 4];
                 g = odst[(ly * linesize + lx) * 4 + 1] +
-                    odst[((y * linesize + x) + off - z) * 4 + 5] -
+                    odst[((y * linesize + x) + off) * 4 + 5] -
                     odst[((y * linesize + x) + off - z) * 4 + 1];
                 b = odst[(ly * linesize + lx) * 4 + 2] +
-                    odst[((y * linesize + x) + off - z) * 4 + 6] -
+                    odst[((y * linesize + x) + off) * 4 + 6] -
                     odst[((y * linesize + x) + off - z) * 4 + 2];
                 clr = ((b & 0xFF) << 16) + ((g & 0xFF) << 8) + (r & 0xFF);
                 dst[y * linesize + x] = clr;
@@ -506,10 +514,9 @@
     GetByteContext *gb = &s->gb;
     int ret, temp, min, max, x, y, cx = 0, cx1 = 0;
     int backstep = linesize - avctx->width;
-    const int cxshift = s->cxshift;
 
     if (bytestream2_get_byte(gb) == 0)
-        return 0;
+        return 1;
     bytestream2_skip(gb, 1);
     init_rangecoder(&s->rc, gb);
 
@@ -522,6 +529,9 @@
         return ret;
 
     max += temp << 8;
+    if (min > max || min >= s->nbcount)
+        return AVERROR_INVALIDDATA;
+
     memset(s->blocks, 0, sizeof(*s->blocks) * s->nbcount);
 
     while (min <= max) {
@@ -531,6 +541,8 @@
         ret |= decode_value(s, s->count_model, 256, 20, &count);
         if (ret < 0)
             return ret;
+        if (count <= 0)
+            return AVERROR_INVALIDDATA;
 
         while (min < s->nbcount && count-- > 0) {
             s->blocks[min++] = fill;
@@ -577,27 +589,15 @@
                     }
                 }
             } else {
-                int run, r, g, b, z, bx = x * 16 + sx1, by = y * 16 + sy1;
-                unsigned clr, ptype = 0;
+                int run, z, bx = x * 16 + sx1, by = y * 16 + sy1;
+                unsigned r, g, b, clr, ptype = 0;
 
                 for (; by < y * 16 + sy2 && by < avctx->height;) {
                     ret = decode_value(s, s->op_model[ptype], 6, 1000, &ptype);
                     if (ret < 0)
                         return ret;
                     if (ptype == 0) {
-                        ret = decode_unit(s, &s->pixel_model[0][cx + cx1], 400, &r);
-                        if (ret < 0)
-                            return ret;
-
-                        cx1 = (cx << 6) & 0xFC0;
-                        cx = r >> cxshift;
-                        ret = decode_unit(s, &s->pixel_model[1][cx + cx1], 400, &g);
-                        if (ret < 0)
-                            return ret;
-
-                        cx1 = (cx << 6) & 0xFC0;
-                        cx = g >> cxshift;
-                        ret = decode_unit(s, &s->pixel_model[2][cx + cx1], 400, &b);
+                        ret = decode_units(s, &r, &g, &b, &cx, &cx1);
                         if (ret < 0)
                             return ret;
 
@@ -608,6 +608,8 @@
                     ret = decode_value(s, s->run_model[ptype], 256, 400, &run);
                     if (ret < 0)
                         return ret;
+                    if (run <= 0)
+                        return AVERROR_INVALIDDATA;
 
                     switch (ptype) {
                     case 0:
@@ -811,6 +813,8 @@
                            s->current_frame->linesize[0] / 4,
                            (uint32_t *)s->last_frame->data[0],
                            s->last_frame->linesize[0] / 4);
+        if (ret == 1)
+            return avpkt->size;
     } else {
         return AVERROR_PATCHWELCOME;
     }

diff --git a/libavcodec/sheervideo.c b/libavcodec/sheervideo.c
index 6f99b5c..50c3ebc 100644
--- a/libavcodec/sheervideo.c
+++ b/libavcodec/sheervideo.c

@@ -28,6 +28,7 @@
 #include "get_bits.h"
 #include "internal.h"
 #include "thread.h"
+#include "sheervideodata.h"
 
 typedef struct SheerVideoContext {
     unsigned format;
@@ -36,1076 +37,6 @@
     void (*decode_frame)(AVCodecContext *avctx, AVFrame *p, GetBitContext *gb);
 } SheerVideoContext;
 
-static const uint8_t l_r_rgb[256] = {
-     3,  3,  4,  4,  5,  5,  5,  6,  6,  6,  7,  7,  7,  7,  7,  8,
-     8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10,
-    10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10,
-    10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,  9,  8,  8,  8,
-     8,  8,  8,  7,  7,  7,  7,  7,  6,  6,  6,  5,  5,  4,  4,  4,
-};
-
-static const uint8_t l_r_rgbi[256] = {
-     3,  4,  4,  4,  5,  5,  5,  6,  6,  6,  7,  7,  7,  7,  7,  7,
-     8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10,  9,  9,  9,  9,  9,  9,  9,  8,  8,  8,  8,  8,  8,  8,
-     8,  7,  7,  7,  7,  7,  7,  6,  6,  6,  6,  5,  5,  4,  4,  4,
-};
-
-static const uint8_t l_g_rgbi[256] = {
-     1,  3,  4,  5,  6,  7,  7,  8,  9,  9, 10, 10, 10, 10, 11, 11,
-    11, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14,
-    14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14,
-    14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12,
-    11, 11, 11, 10, 10, 10,  9,  9,  9,  8,  8,  7,  6,  5,  5,  3,
-};
-
-static const uint8_t l_g_rgb[256] = {
-     2,  2,  4,  4,  6,  7,  9,  9, 10, 11, 11, 11, 12, 12, 12, 13,
-    13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13,
-    13, 13, 12, 12, 12, 11, 11, 11, 10,  9,  9,  8,  6,  4,  3,  3,
-};
-
-static const uint8_t l_y_ybr[256] = {
-     3,  3,  4,  4,  5,  5,  6,  6,  6,  7,  7,  7,  7,  7,  8,  8,
-     8,  8,  8,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10,
-    10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,  8,  8,
-     8,  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  5,  4,  4,  3,
-};
-
-static const uint8_t l_u_ybr[256] = {
-     1,  2,  4,  6,  9, 10, 11, 11, 12, 12, 13, 13, 13, 14, 14, 14,
-    14, 14, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15,
-    14, 14, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11, 10,  8,  5,  3,
-};
-
-static const uint8_t l_y_ybyr[256] = {
-     3,  3,  4,  4,  5,  5,  5,  6,  6,  6,  7,  7,  7,  7,  7,  8,
-     8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10,
-    10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10,
-    10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,  9,  8,  8,  8,
-     8,  8,  8,  7,  7,  7,  7,  7,  6,  6,  6,  5,  5,  4,  4,  4,
-};
-
-static const uint8_t l_u_ybyr[256] = {
-     1,  2,  4,  6,  8,  9, 10, 10, 11, 11, 12, 12, 12, 13, 13, 14,
-    14, 14, 14, 14, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 14, 14,
-    14, 14, 13, 13, 13, 12, 12, 11, 11, 10, 10,  9,  8,  7,  6,  3,
-};
-
-static const uint8_t l_y_byry[256] = {
-     3,  3,  4,  4,  5,  5,  6,  6,  6,  7,  7,  7,  7,  7,  8,  8,
-     8,  8,  8,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10,
-    10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11,
-    11, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,  8,  8,
-     8,  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  5,  4,  4,  3,
-};
-
-static const uint8_t l_u_byry[256] = {
-     1,  2,  4,  6,  8,  9,  9, 10, 11, 11, 12, 12, 13, 13, 13, 14,
-    14, 14, 14, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 14,
-    14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10,  9,  8,  7,  6,  3,
-};
-
-static const uint8_t l_y_ybr10i[1024] = {
-     3,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,
-     7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
-     8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
-     9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,
-     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
-     8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  7,  7,  7,  7,
-     7,  7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,
-};
-
-static const uint8_t l_y_ybr10[1024] = {
-     4,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  7,  7,  7,
-     7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
-     8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
-     9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,
-     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
-     8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  7,  7,  7,
-     7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,
-};
-
-static const uint8_t l_u_ybr10i[1024] = {
-     2,  3,  4,  4,  5,  5,  6,  7,  7,  8,  8,  9,  9,  9,  9, 10,
-    10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 10, 10,
-    10, 10, 10,  9,  9,  9,  8,  8,  8,  7,  6,  5,  5,  4,  4,  3,
-};
-
-static const uint8_t l_u_ybr10[1024] = {
-     2,  3,  3,  4,  5,  5,  6,  7,  8,  9,  9, 10, 10, 10, 11, 11,
-    12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 12, 12, 12,
-    12, 11, 11, 11, 10, 10,  9,  9,  8,  8,  7,  6,  5,  4,  4,  3,
-};
-
-static const uint8_t l_r_rgbx[1024] = {
-     4,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,
-     7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,
-     8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
-     9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,
-     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  8,  8,  8,
-     8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  7,  7,  7,  7,  7,  7,
-     7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,
-};
-
-static const uint8_t l_g_rgbx[1024] = {
-     3,  4,  4,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,
-     8,  8,  9,  9,  9, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12,
-    12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12,
-    12, 12, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10,  9,  9,  9,  9,
-     8,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  4,  4,  4,
-};
-
-static const uint8_t l_y_yry10[1024] = {
-     4,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  7,  7,  7,
-     7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
-     8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
-     9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,
-     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
-     8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  7,  7,  7,
-     7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,
-};
-
-static const uint8_t l_y_yry10i[1024] = {
-     3,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,
-     7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
-     8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
-     9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,
-     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
-     8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  7,  7,  7,  7,
-     7,  7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,
-};
-
-static const uint8_t l_u_yry10[1024] = {
-     2,  3,  3,  4,  5,  6,  7,  7,  8,  8,  8,  9,  9, 10, 10, 10,
-    10, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13,
-    13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13,
-    13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
-    10, 10, 10, 10,  9,  9,  9,  8,  8,  7,  7,  6,  5,  4,  4,  3,
-};
-
-static const uint8_t l_u_yry10i[1024] = {
-     2,  4,  4,  4,  5,  6,  6,  6,  7,  7,  7,  8,  8,  8,  9,  9,
-     9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11,
-    11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11,
-    11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10,  9,  9,  9,
-     9,  9,  8,  8,  8,  8,  7,  7,  7,  6,  6,  5,  5,  4,  4,  3,
-};
-
-static const uint8_t l_y_ybri[256] = {
-     3,  3,  4,  4,  5,  5,  6,  6,  6,  7,  7,  7,  7,  7,  8,  8,
-     8,  8,  8,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10,
-    10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,  9,  8,  8,
-     8,  8,  8,  7,  7,  7,  7,  7,  6,  6,  6,  5,  5,  5,  4,  3,
-};
-
-static const uint8_t l_u_ybri[256] = {
-     1,  3,  5,  6,  8,  8,  9, 10, 10, 11, 11, 12, 12, 13, 13, 13,
-    14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14,
-    14, 13, 13, 13, 12, 12, 11, 11, 10, 10,  9,  8,  8,  6,  5,  2,
-};
-
-static const uint8_t l_y_byryi[256] = {
-     3,  3,  4,  4,  5,  5,  6,  6,  7,  7,  7,  7,  7,  7,  8,  8,
-     8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10,
-    10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10,
-    10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,  9,  8,  8,
-     8,  8,  8,  7,  7,  7,  7,  7,  7,  6,  6,  6,  5,  4,  4,  3,
-};
-
-static const uint8_t l_u_byryi[256] = {
-     1,  3,  4,  6,  6,  7,  8,  8,  9,  9, 10, 10, 10, 11, 11, 11,
-    12, 12, 12, 12, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15,
-    15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15,
-    15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12,
-    12, 11, 11, 11, 10, 10, 10,  9,  9,  8,  8,  7,  7,  5,  4,  3,
-};
-
-static const uint8_t l_r_rgbxi[1024] = {
-     3,  4,  4,  4,  5,  5,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,
-     8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
-     9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,  9,
-     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  8,  8,  8,
-     8,  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  5,  5,  4,  4,  4,
-};
-
-static const uint8_t l_g_rgbxi[1024] = {
-     2,  3,  4,  4,  6,  6,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,
-     9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11,
-    11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11,
-    11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,
-     9,  8,  8,  8,  8,  7,  7,  7,  7,  7,  7,  6,  6,  4,  4,  3,
-};
-
 static void decode_ca4i(AVCodecContext *avctx, AVFrame *p, GetBitContext *gb)
 {
     SheerVideoContext *s = avctx->priv_data;

diff --git a/libavcodec/sheervideodata.h b/libavcodec/sheervideodata.h
new file mode 100644
index 0000000..3b6e2f6
--- /dev/null
+++ b/libavcodec/sheervideodata.h

@@ -0,0 +1,1097 @@
+/*
+ * BitJazz SheerVideo decoder
+ * Copyright (c) 2016 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_SHEERVIDEODATA_H
+#define AVCODEC_SHEERVIDEODATA_H
+
+#include "libavutil/common.h"
+
+static const uint8_t l_r_rgb[256] = {
+     3,  3,  4,  4,  5,  5,  5,  6,  6,  6,  7,  7,  7,  7,  7,  8,
+     8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10,
+    10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10,
+    10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,  9,  8,  8,  8,
+     8,  8,  8,  7,  7,  7,  7,  7,  6,  6,  6,  5,  5,  4,  4,  4,
+};
+
+static const uint8_t l_r_rgbi[256] = {
+     3,  4,  4,  4,  5,  5,  5,  6,  6,  6,  7,  7,  7,  7,  7,  7,
+     8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10,  9,  9,  9,  9,  9,  9,  9,  8,  8,  8,  8,  8,  8,  8,
+     8,  7,  7,  7,  7,  7,  7,  6,  6,  6,  6,  5,  5,  4,  4,  4,
+};
+
+static const uint8_t l_g_rgbi[256] = {
+     1,  3,  4,  5,  6,  7,  7,  8,  9,  9, 10, 10, 10, 10, 11, 11,
+    11, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14,
+    14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14,
+    14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12,
+    11, 11, 11, 10, 10, 10,  9,  9,  9,  8,  8,  7,  6,  5,  5,  3,
+};
+
+static const uint8_t l_g_rgb[256] = {
+     2,  2,  4,  4,  6,  7,  9,  9, 10, 11, 11, 11, 12, 12, 12, 13,
+    13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13,
+    13, 13, 12, 12, 12, 11, 11, 11, 10,  9,  9,  8,  6,  4,  3,  3,
+};
+
+static const uint8_t l_y_ybr[256] = {
+     3,  3,  4,  4,  5,  5,  6,  6,  6,  7,  7,  7,  7,  7,  8,  8,
+     8,  8,  8,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10,
+    10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,  8,  8,
+     8,  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  5,  4,  4,  3,
+};
+
+static const uint8_t l_u_ybr[256] = {
+     1,  2,  4,  6,  9, 10, 11, 11, 12, 12, 13, 13, 13, 14, 14, 14,
+    14, 14, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15,
+    14, 14, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11, 10,  8,  5,  3,
+};
+
+static const uint8_t l_y_ybyr[256] = {
+     3,  3,  4,  4,  5,  5,  5,  6,  6,  6,  7,  7,  7,  7,  7,  8,
+     8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10,
+    10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10,
+    10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,  9,  8,  8,  8,
+     8,  8,  8,  7,  7,  7,  7,  7,  6,  6,  6,  5,  5,  4,  4,  4,
+};
+
+static const uint8_t l_u_ybyr[256] = {
+     1,  2,  4,  6,  8,  9, 10, 10, 11, 11, 12, 12, 12, 13, 13, 14,
+    14, 14, 14, 14, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 14, 14,
+    14, 14, 13, 13, 13, 12, 12, 11, 11, 10, 10,  9,  8,  7,  6,  3,
+};
+
+static const uint8_t l_y_byry[256] = {
+     3,  3,  4,  4,  5,  5,  6,  6,  6,  7,  7,  7,  7,  7,  8,  8,
+     8,  8,  8,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10,
+    10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11,
+    11, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,  8,  8,
+     8,  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  5,  4,  4,  3,
+};
+
+static const uint8_t l_u_byry[256] = {
+     1,  2,  4,  6,  8,  9,  9, 10, 11, 11, 12, 12, 13, 13, 13, 14,
+    14, 14, 14, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 14,
+    14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10,  9,  8,  7,  6,  3,
+};
+
+static const uint8_t l_y_ybr10i[1024] = {
+     3,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,
+     7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+     8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+     9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,
+     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+     8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  7,  7,  7,  7,
+     7,  7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,
+};
+
+static const uint8_t l_y_ybr10[1024] = {
+     4,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  7,  7,  7,
+     7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+     8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+     9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,
+     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+     8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  7,  7,  7,
+     7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,
+};
+
+static const uint8_t l_u_ybr10i[1024] = {
+     2,  3,  4,  4,  5,  5,  6,  7,  7,  8,  8,  9,  9,  9,  9, 10,
+    10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 10, 10,
+    10, 10, 10,  9,  9,  9,  8,  8,  8,  7,  6,  5,  5,  4,  4,  3,
+};
+
+static const uint8_t l_u_ybr10[1024] = {
+     2,  3,  3,  4,  5,  5,  6,  7,  8,  9,  9, 10, 10, 10, 11, 11,
+    12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 12, 12, 12,
+    12, 11, 11, 11, 10, 10,  9,  9,  8,  8,  7,  6,  5,  4,  4,  3,
+};
+
+static const uint8_t l_r_rgbx[1024] = {
+     4,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,
+     7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+     8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+     9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,
+     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  8,  8,  8,
+     8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  7,  7,  7,  7,  7,  7,
+     7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,
+};
+
+static const uint8_t l_g_rgbx[1024] = {
+     3,  4,  4,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,
+     8,  8,  9,  9,  9, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12,
+    12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12,
+    12, 12, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10,  9,  9,  9,  9,
+     8,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  4,  4,  4,
+};
+
+static const uint8_t l_y_yry10[1024] = {
+     4,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  7,  7,  7,
+     7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+     8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+     9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,
+     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+     8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  7,  7,  7,
+     7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,
+};
+
+static const uint8_t l_y_yry10i[1024] = {
+     3,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,
+     7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+     8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+     9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,
+     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+     8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  7,  7,  7,  7,
+     7,  7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,
+};
+
+static const uint8_t l_u_yry10[1024] = {
+     2,  3,  3,  4,  5,  6,  7,  7,  8,  8,  8,  9,  9, 10, 10, 10,
+    10, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13,
+    13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13,
+    13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
+    10, 10, 10, 10,  9,  9,  9,  8,  8,  7,  7,  6,  5,  4,  4,  3,
+};
+
+static const uint8_t l_u_yry10i[1024] = {
+     2,  4,  4,  4,  5,  6,  6,  6,  7,  7,  7,  8,  8,  8,  9,  9,
+     9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11,
+    11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11,
+    11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10,  9,  9,  9,
+     9,  9,  8,  8,  8,  8,  7,  7,  7,  6,  6,  5,  5,  4,  4,  3,
+};
+
+static const uint8_t l_y_ybri[256] = {
+     3,  3,  4,  4,  5,  5,  6,  6,  6,  7,  7,  7,  7,  7,  8,  8,
+     8,  8,  8,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10,
+    10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,  9,  8,  8,
+     8,  8,  8,  7,  7,  7,  7,  7,  6,  6,  6,  5,  5,  5,  4,  3,
+};
+
+static const uint8_t l_u_ybri[256] = {
+     1,  3,  5,  6,  8,  8,  9, 10, 10, 11, 11, 12, 12, 13, 13, 13,
+    14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14,
+    14, 13, 13, 13, 12, 12, 11, 11, 10, 10,  9,  8,  8,  6,  5,  2,
+};
+
+static const uint8_t l_y_byryi[256] = {
+     3,  3,  4,  4,  5,  5,  6,  6,  7,  7,  7,  7,  7,  7,  8,  8,
+     8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10,
+    10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10,
+    10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  9,  9,  8,  8,
+     8,  8,  8,  7,  7,  7,  7,  7,  7,  6,  6,  6,  5,  4,  4,  3,
+};
+
+static const uint8_t l_u_byryi[256] = {
+     1,  3,  4,  6,  6,  7,  8,  8,  9,  9, 10, 10, 10, 11, 11, 11,
+    12, 12, 12, 12, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15,
+    15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15,
+    15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12,
+    12, 11, 11, 11, 10, 10, 10,  9,  9,  8,  8,  7,  7,  5,  4,  3,
+};
+
+static const uint8_t l_r_rgbxi[1024] = {
+     3,  4,  4,  4,  5,  5,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,
+     8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+     9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,  9,
+     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  8,  8,  8,
+     8,  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  5,  5,  4,  4,  4,
+};
+
+static const uint8_t l_g_rgbxi[1024] = {
+     2,  3,  4,  4,  6,  6,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,
+     9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11,
+    11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11,
+    11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,
+     9,  8,  8,  8,  8,  7,  7,  7,  7,  7,  7,  6,  6,  4,  4,  3,
+};
+
+#endif /* AVCODEC_SHEERVIDEODATA_H */

diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c
index 49af6be..4b45e6d 100644
--- a/libavcodec/shorten.c
+++ b/libavcodec/shorten.c

@@ -177,7 +177,7 @@
             buffer[i] = 0;
     } else if (s->bitshift != 0) {
         for (i = 0; i < s->blocksize; i++)
-            buffer[i] <<= s->bitshift;
+            buffer[i] *= 1U << s->bitshift;
     }
 }
 
@@ -234,11 +234,11 @@
 
     while (bytestream2_get_le32(&gb) != MKTAG('C', 'O', 'M', 'M')) {
         len = bytestream2_get_be32(&gb);
-        bytestream2_skip(&gb, len + (len & 1));
-        if (len < 0 || bytestream2_get_bytes_left(&gb) < 18) {
+        if (len < 0 || bytestream2_get_bytes_left(&gb) < 18LL + len + (len&1)) {
             av_log(avctx, AV_LOG_ERROR, "no COMM chunk found\n");
             return AVERROR_INVALIDDATA;
         }
+        bytestream2_skip(&gb, len + (len & 1));
     }
     len = bytestream2_get_be32(&gb);
 
@@ -389,9 +389,9 @@
     for (i = 0; i < s->blocksize; i++) {
         sum = init_sum;
         for (j = 0; j < pred_order; j++)
-            sum += coeffs[j] * s->decoded[channel][i - j - 1];
+            sum += coeffs[j] * (unsigned)s->decoded[channel][i - j - 1];
         s->decoded[channel][i] = get_sr_golomb_shorten(&s->gb, residual_size) +
-                                 (sum >> qshift);
+                                 (unsigned)(sum >> qshift);
     }
 
     /* add offset to current samples */
@@ -450,9 +450,13 @@
             return AVERROR_INVALIDDATA;
         }
         s->nmean = get_uint(s, 0);
+        if (s->nmean > 32768U) {
+            av_log(s->avctx, AV_LOG_ERROR, "nmean is: %d\n", s->nmean);
+            return AVERROR_INVALIDDATA;
+        }
 
         skip_bytes = get_uint(s, NSKIPSIZE);
-        if ((unsigned)skip_bytes > get_bits_left(&s->gb)/8) {
+        if ((unsigned)skip_bytes > FFMAX(get_bits_left(&s->gb), 0)/8) {
             av_log(s->avctx, AV_LOG_ERROR, "invalid skip_bytes: %d\n", skip_bytes);
             return AVERROR_INVALIDDATA;
         }
@@ -619,6 +623,11 @@
             switch (cmd) {
             case FN_VERBATIM:
                 len = get_ur_golomb_shorten(&s->gb, VERBATIM_CKSIZE_SIZE);
+                if (len < 0 || len > get_bits_left(&s->gb)) {
+                    av_log(avctx, AV_LOG_ERROR, "verbatim length %d invalid\n",
+                           len);
+                    return AVERROR_INVALIDDATA;
+                }
                 while (len--)
                     get_ur_golomb_shorten(&s->gb, VERBATIM_BYTE_SIZE);
                 break;
@@ -678,7 +687,7 @@
             else {
                 int32_t sum = (s->version < 2) ? 0 : s->nmean / 2;
                 for (i = 0; i < s->nmean; i++)
-                    sum += s->offset[channel][i];
+                    sum += (unsigned)s->offset[channel][i];
                 coffset = sum / s->nmean;
                 if (s->version >= 2)
                     coffset = s->bitshift == 0 ? coffset : coffset >> s->bitshift - 1 >> 1;
@@ -696,7 +705,7 @@
 
             /* update means with info from the current block */
             if (s->nmean > 0) {
-                int32_t sum = (s->version < 2) ? 0 : s->blocksize / 2;
+                int64_t sum = (s->version < 2) ? 0 : s->blocksize / 2;
                 for (i = 0; i < s->blocksize; i++)
                     sum += s->decoded[channel][i];
 
@@ -706,7 +715,7 @@
                 if (s->version < 2)
                     s->offset[channel][s->nmean - 1] = sum / s->blocksize;
                 else
-                    s->offset[channel][s->nmean - 1] = s->bitshift == 32 ? 0 : (sum / s->blocksize) << s->bitshift;
+                    s->offset[channel][s->nmean - 1] = s->bitshift == 32 ? 0 : (sum / s->blocksize) * (1LL << s->bitshift);
             }
 
             /* copy wrap samples for use with next block */

diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c
index 1d05b2f..78b29c0 100644
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c

@@ -30,6 +30,8 @@
 #include "mathops.h"
 #include "simple_idct.h"
 
+#define IN_IDCT_DEPTH 16
+
 #define BIT_DEPTH 8
 #include "simple_idct_template.c"
 #undef BIT_DEPTH
@@ -46,6 +48,13 @@
 #define BIT_DEPTH 12
 #include "simple_idct_template.c"
 #undef BIT_DEPTH
+#undef IN_IDCT_DEPTH
+
+#define IN_IDCT_DEPTH 32
+#define BIT_DEPTH 10
+#include "simple_idct_template.c"
+#undef BIT_DEPTH
+#undef IN_IDCT_DEPTH
 
 /* 2x4x8 idct */
 
@@ -115,7 +124,7 @@
 
     /* IDCT8 on each line */
     for(i=0; i<8; i++) {
-        idctRowCondDC_8(block + i*8, 0);
+        idctRowCondDC_int16_8bit(block + i*8, 0);
     }
 
     /* IDCT4 and store */
@@ -188,7 +197,7 @@
 
     /* IDCT8 on each line */
     for(i=0; i<4; i++) {
-        idctRowCondDC_8(block + i*8, 0);
+        idctRowCondDC_int16_8bit(block + i*8, 0);
     }
 
     /* IDCT4 and store */
@@ -208,7 +217,7 @@
 
     /* IDCT8 and store */
     for(i=0; i<4; i++){
-        idctSparseColAdd_8(dest + i, line_size, block + i);
+        idctSparseColAdd_int16_8bit(dest + i, line_size, block + i);
     }
 }
 

diff --git a/libavcodec/simple_idct.h b/libavcodec/simple_idct.h
index 2a5e1d7..39df230 100644
--- a/libavcodec/simple_idct.h
+++ b/libavcodec/simple_idct.h

@@ -31,20 +31,24 @@
 #include <stddef.h>
 #include <stdint.h>
 
-void ff_simple_idct_put_8(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
-void ff_simple_idct_add_8(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
-void ff_simple_idct_8(int16_t *block);
+void ff_simple_idct_put_int16_8bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_add_int16_8bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_int16_8bit(int16_t *block);
 
-void ff_simple_idct_put_10(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
-void ff_simple_idct_add_10(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
-void ff_simple_idct_10(int16_t *block);
+void ff_simple_idct_put_int16_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_add_int16_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_int16_10bit(int16_t *block);
 
-void ff_simple_idct_put_12(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
-void ff_simple_idct_add_12(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
-void ff_simple_idct_12(int16_t *block);
+void ff_simple_idct_put_int32_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_add_int32_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_int32_10bit(int16_t *block);
+
+void ff_simple_idct_put_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_add_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_int16_12bit(int16_t *block);
 
 /**
- * Special version of ff_simple_idct_10() which does dequantization
+ * Special version of ff_simple_idct_int16_10bit() which does dequantization
  * and scales by a factor of 2 more between the two IDCTs to account
  * for larger scale of input coefficients.
  */

diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c
index f532313..35c3132 100644
--- a/libavcodec/simple_idct_template.c
+++ b/libavcodec/simple_idct_template.c

@@ -77,6 +77,10 @@
 #define ROW_SHIFT 13
 #define COL_SHIFT 18
 #define DC_SHIFT  1
+#   elif IN_IDCT_DEPTH == 32
+#define ROW_SHIFT 13
+#define COL_SHIFT 21
+#define DC_SHIFT  2
 #   else
 #define ROW_SHIFT 12
 #define COL_SHIFT 19
@@ -97,8 +101,8 @@
 #define DC_SHIFT -1
 # endif
 
-#define MUL(a, b)    ((a) * (b))
-#define MAC(a, b, c) ((a) += (b) * (c))
+#define MUL(a, b)    ((int)((SUINT)(a) * (b)))
+#define MAC(a, b, c) ((a) += (SUINT)(b) * (c))
 
 #else
 
@@ -109,11 +113,13 @@
 #ifdef EXTRA_SHIFT
 static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
 #else
-static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
+static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift)
 #endif
 {
     SUINT a0, a1, a2, a3, b0, b1, b2, b3;
 
+// TODO: Add DC-only support for int32_t input
+#if IN_IDCT_DEPTH == 16
 #if HAVE_FAST_64BIT
 #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN)
     if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
@@ -148,16 +154,17 @@
         return;
     }
 #endif
+#endif
 
-    a0 = (W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
+    a0 = ((SUINT)W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
     a1 = a0;
     a2 = a0;
     a3 = a0;
 
-    a0 += W2 * row[2];
-    a1 += W6 * row[2];
-    a2 -= W6 * row[2];
-    a3 -= W2 * row[2];
+    a0 += (SUINT)W2 * row[2];
+    a1 += (SUINT)W6 * row[2];
+    a2 -= (SUINT)W6 * row[2];
+    a3 -= (SUINT)W2 * row[2];
 
     b0 = MUL(W1, row[1]);
     MAC(b0, W3, row[3]);
@@ -168,11 +175,15 @@
     b3 = MUL(W7, row[1]);
     MAC(b3, -W5, row[3]);
 
+#if IN_IDCT_DEPTH == 32
+    if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) {
+#else
     if (AV_RN64A(row + 4)) {
-        a0 +=   W4*row[4] + W6*row[6];
-        a1 += - W4*row[4] - W2*row[6];
-        a2 += - W4*row[4] + W2*row[6];
-        a3 +=   W4*row[4] - W6*row[6];
+#endif
+        a0 += (SUINT)  W4*row[4] + (SUINT)W6*row[6];
+        a1 += (SUINT)- W4*row[4] - (SUINT)W2*row[6];
+        a2 += (SUINT)- W4*row[4] + (SUINT)W2*row[6];
+        a3 += (SUINT)  W4*row[4] - (SUINT)W6*row[6];
 
         MAC(b0,  W5, row[5]);
         MAC(b0,  W7, row[7]);
@@ -198,15 +209,15 @@
 }
 
 #define IDCT_COLS do {                                  \
-        a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \
+        a0 = (SUINT)W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \
         a1 = a0;                                        \
         a2 = a0;                                        \
         a3 = a0;                                        \
                                                         \
-        a0 +=  W2*col[8*2];                             \
-        a1 +=  W6*col[8*2];                             \
-        a2 += -W6*col[8*2];                             \
-        a3 += -W2*col[8*2];                             \
+        a0 += (SUINT) W2*col[8*2];                             \
+        a1 += (SUINT) W6*col[8*2];                             \
+        a2 += (SUINT)-W6*col[8*2];                             \
+        a3 += (SUINT)-W2*col[8*2];                             \
                                                         \
         b0 = MUL(W1, col[8*1]);                         \
         b1 = MUL(W3, col[8*1]);                         \
@@ -219,10 +230,10 @@
         MAC(b3, -W5, col[8*3]);                         \
                                                         \
         if (col[8*4]) {                                 \
-            a0 +=  W4*col[8*4];                         \
-            a1 += -W4*col[8*4];                         \
-            a2 += -W4*col[8*4];                         \
-            a3 +=  W4*col[8*4];                         \
+            a0 += (SUINT) W4*col[8*4];                         \
+            a1 += (SUINT)-W4*col[8*4];                         \
+            a2 += (SUINT)-W4*col[8*4];                         \
+            a3 += (SUINT) W4*col[8*4];                         \
         }                                               \
                                                         \
         if (col[8*5]) {                                 \
@@ -233,10 +244,10 @@
         }                                               \
                                                         \
         if (col[8*6]) {                                 \
-            a0 +=  W6*col[8*6];                         \
-            a1 += -W2*col[8*6];                         \
-            a2 +=  W2*col[8*6];                         \
-            a3 += -W6*col[8*6];                         \
+            a0 += (SUINT) W6*col[8*6];                         \
+            a1 += (SUINT)-W2*col[8*6];                         \
+            a2 += (SUINT) W2*col[8*6];                         \
+            a3 += (SUINT)-W6*col[8*6];                         \
         }                                               \
                                                         \
         if (col[8*7]) {                                 \
@@ -250,8 +261,8 @@
 #ifdef EXTRA_SHIFT
 static inline void FUNC(idctSparseCol_extrashift)(int16_t *col)
 #else
-static inline void FUNC(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
-                                          int16_t *col)
+static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
+                                          idctin *col)
 {
     SUINT a0, a1, a2, a3, b0, b1, b2, b3;
 
@@ -274,8 +285,8 @@
     dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT);
 }
 
-static inline void FUNC(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
-                                          int16_t *col)
+static inline void FUNC6(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
+                                          idctin *col)
 {
     int a0, a1, a2, a3, b0, b1, b2, b3;
 
@@ -298,7 +309,7 @@
     dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT));
 }
 
-static inline void FUNC(idctSparseCol)(int16_t *col)
+static inline void FUNC6(idctSparseCol)(idctin *col)
 #endif
 {
     int a0, a1, a2, a3, b0, b1, b2, b3;
@@ -316,7 +327,23 @@
 }
 
 #ifndef EXTRA_SHIFT
-void FUNC(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block)
+void FUNC6(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block_)
+{
+    idctin *block = (idctin *)block_;
+    pixel *dest = (pixel *)dest_;
+    int i;
+
+    line_size /= sizeof(pixel);
+
+    for (i = 0; i < 8; i++)
+        FUNC6(idctRowCondDC)(block + i*8, 0);
+
+    for (i = 0; i < 8; i++)
+        FUNC6(idctSparseColPut)(dest + i, line_size, block + i);
+}
+
+#if IN_IDCT_DEPTH == 16
+void FUNC6(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block)
 {
     pixel *dest = (pixel *)dest_;
     int i;
@@ -324,34 +351,21 @@
     line_size /= sizeof(pixel);
 
     for (i = 0; i < 8; i++)
-        FUNC(idctRowCondDC)(block + i*8, 0);
+        FUNC6(idctRowCondDC)(block + i*8, 0);
 
     for (i = 0; i < 8; i++)
-        FUNC(idctSparseColPut)(dest + i, line_size, block + i);
+        FUNC6(idctSparseColAdd)(dest + i, line_size, block + i);
 }
 
-void FUNC(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block)
-{
-    pixel *dest = (pixel *)dest_;
-    int i;
-
-    line_size /= sizeof(pixel);
-
-    for (i = 0; i < 8; i++)
-        FUNC(idctRowCondDC)(block + i*8, 0);
-
-    for (i = 0; i < 8; i++)
-        FUNC(idctSparseColAdd)(dest + i, line_size, block + i);
-}
-
-void FUNC(ff_simple_idct)(int16_t *block)
+void FUNC6(ff_simple_idct)(int16_t *block)
 {
     int i;
 
     for (i = 0; i < 8; i++)
-        FUNC(idctRowCondDC)(block + i*8, 0);
+        FUNC6(idctRowCondDC)(block + i*8, 0);
 
     for (i = 0; i < 8; i++)
-        FUNC(idctSparseCol)(block + i);
+        FUNC6(idctSparseCol)(block + i);
 }
 #endif
+#endif

diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c
index 2d20be9..61e3169 100644
--- a/libavcodec/smacker.c
+++ b/libavcodec/smacker.c

@@ -43,6 +43,8 @@
 #define SMKTREE_BITS 9
 #define SMK_NODE 0x80000000
 
+#define SMKTREE_DECODE_MAX_RECURSION 32
+#define SMKTREE_DECODE_BIG_MAX_RECURSION 500
 
 typedef struct SmackVContext {
     AVCodecContext *avctx;
@@ -95,10 +97,11 @@
  */
 static int smacker_decode_tree(GetBitContext *gb, HuffContext *hc, uint32_t prefix, int length)
 {
-    if(length > 32 || length > 3*SMKTREE_BITS) {
-        av_log(NULL, AV_LOG_ERROR, "length too long\n");
+    if (length > SMKTREE_DECODE_MAX_RECURSION || length > 3 * SMKTREE_BITS) {
+        av_log(NULL, AV_LOG_ERROR, "Maximum tree recursion level exceeded.\n");
         return AVERROR_INVALIDDATA;
     }
+
     if(!get_bits1(gb)){ //Leaf
         if(hc->current >= hc->length){
             av_log(NULL, AV_LOG_ERROR, "Tree size exceeded!\n");
@@ -129,12 +132,15 @@
 /**
  * Decode header tree
  */
-static int smacker_decode_bigtree(GetBitContext *gb, HuffContext *hc, DBCtx *ctx, int length)
+static int smacker_decode_bigtree(GetBitContext *gb, HuffContext *hc,
+                                  DBCtx *ctx, int length)
 {
-    if(length > 500) { // Larger length can cause segmentation faults due to too deep recursion.
-        av_log(NULL, AV_LOG_ERROR, "length too long\n");
+    // Larger length can cause segmentation faults due to too deep recursion.
+    if (length > SMKTREE_DECODE_BIG_MAX_RECURSION) {
+        av_log(NULL, AV_LOG_ERROR, "Maximum bigtree recursion level exceeded.\n");
         return AVERROR_INVALIDDATA;
     }
+
     if (hc->current + 1 >= hc->length) {
         av_log(NULL, AV_LOG_ERROR, "Tree size exceeded!\n");
         return AVERROR_INVALIDDATA;
@@ -279,8 +285,9 @@
         goto error;
     }
 
-    if (smacker_decode_bigtree(gb, &huff, &ctx, 0) < 0)
-        err = -1;
+    res = smacker_decode_bigtree(gb, &huff, &ctx, 0);
+    if (res < 0)
+        err = res;
     skip_bits1(gb);
     if(ctx.last[0] == -1) ctx.last[0] = huff.current++;
     if(ctx.last[1] == -1) ctx.last[1] = huff.current++;
@@ -600,7 +607,7 @@
 {
     if (avctx->channels < 1 || avctx->channels > 2) {
         av_log(avctx, AV_LOG_ERROR, "invalid number of channels\n");
-        return AVERROR(EINVAL);
+        return AVERROR_INVALIDDATA;
     }
     avctx->channel_layout = (avctx->channels==2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
     avctx->sample_fmt = avctx->bits_per_coded_sample == 8 ? AV_SAMPLE_FMT_U8 : AV_SAMPLE_FMT_S16;
@@ -630,7 +637,7 @@
 
     if (buf_size <= 4) {
         av_log(avctx, AV_LOG_ERROR, "packet is too small\n");
-        return AVERROR(EINVAL);
+        return AVERROR_INVALIDDATA;
     }
 
     unp_size = AV_RL32(buf);
@@ -652,18 +659,19 @@
     bits = get_bits1(&gb);
     if (stereo ^ (avctx->channels != 1)) {
         av_log(avctx, AV_LOG_ERROR, "channels mismatch\n");
-        return AVERROR(EINVAL);
+        return AVERROR_INVALIDDATA;
     }
     if (bits == (avctx->sample_fmt == AV_SAMPLE_FMT_U8)) {
         av_log(avctx, AV_LOG_ERROR, "sample format mismatch\n");
-        return AVERROR(EINVAL);
+        return AVERROR_INVALIDDATA;
     }
 
     /* get output buffer */
     frame->nb_samples = unp_size / (avctx->channels * (bits + 1));
     if (unp_size % (avctx->channels * (bits + 1))) {
-        av_log(avctx, AV_LOG_ERROR, "unp_size %d is odd\n", unp_size);
-        return AVERROR(EINVAL);
+        av_log(avctx, AV_LOG_ERROR,
+               "The buffer does not contain an integer number of samples\n");
+        return AVERROR_INVALIDDATA;
     }
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;

diff --git a/libavcodec/smc.c b/libavcodec/smc.c
index 79f9a75..3cb4834 100644
--- a/libavcodec/smc.c
+++ b/libavcodec/smc.c

@@ -438,6 +438,10 @@
     int pal_size;
     const uint8_t *pal = av_packet_get_side_data(avpkt, AV_PKT_DATA_PALETTE, &pal_size);
     int ret;
+    int total_blocks = ((s->avctx->width + 3) / 4) * ((s->avctx->height + 3) / 4);
+
+    if (total_blocks / 1024 > avpkt->size)
+        return AVERROR_INVALIDDATA;
 
     bytestream2_init(&s->gb, buf, buf_size);
 

diff --git a/libavcodec/smvjpegdec.c b/libavcodec/smvjpegdec.c
index 018e135..7ea82eb 100644
--- a/libavcodec/smvjpegdec.c
+++ b/libavcodec/smvjpegdec.c

@@ -71,7 +71,7 @@
             src_linesizes[i], h, nlines);
     }
     if (desc->flags & AV_PIX_FMT_FLAG_PAL ||
-        desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL)
+        desc->flags & FF_PSEUDOPAL)
         dst_data[1] = src_data[1];
 }
 
@@ -193,7 +193,6 @@
         s->picture[1]->width         = avctx->width;
         s->picture[1]->height        = avctx->height;
         s->picture[1]->format        = avctx->pix_fmt;
-        /* ff_init_buffer_info(avctx, &s->picture[1]); */
         smv_img_pnt(s->picture[1]->data, mjpeg_data->data, mjpeg_data->linesize,
                     avctx->pix_fmt, avctx->width, avctx->height, cur_frame);
         for (i = 0; i < AV_NUM_DATA_POINTERS; i++)

diff --git a/libavcodec/snow_dwt.h b/libavcodec/snow_dwt.h
index e2d7528..ee699de 100644
--- a/libavcodec/snow_dwt.h
+++ b/libavcodec/snow_dwt.h

@@ -24,6 +24,8 @@
 #include <stddef.h>
 #include <stdint.h>
 
+struct MpegEncContext;
+
 typedef int DWTELEM;
 typedef short IDWTELEM;
 

diff --git a/libavcodec/snowdec.c b/libavcodec/snowdec.c
index 0146a2a..59bd24e 100644
--- a/libavcodec/snowdec.c
+++ b/libavcodec/snowdec.c

@@ -208,8 +208,8 @@
                 return AVERROR_INVALIDDATA;
             }
             pred_mv(s, &mx, &my, ref, left, top, tr);
-            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
-            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
+            mx+= (unsigned)get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
+            my+= (unsigned)get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
         }
         set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
     }else{

diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c
index 0d800b9..61a658f 100644
--- a/libavcodec/snowenc.c
+++ b/libavcodec/snowenc.c

@@ -22,6 +22,7 @@
 #include "libavutil/libm.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
 #include "avcodec.h"
 #include "internal.h"
 #include "snow_dwt.h"
@@ -52,12 +53,6 @@
         av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
         return AVERROR(EINVAL);
     }
-#if FF_API_MOTION_EST
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (avctx->me_method == ME_ITER)
-        s->motion_est = FF_ME_ITER;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
 
     s->spatial_decomposition_type= s->pred; //FIXME add decorrelator type r transform_type
 
@@ -133,7 +128,13 @@
         av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
         return AVERROR_PATCHWELCOME;
     }
-    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
+
+    ret = av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift,
+                                           &s->chroma_v_shift);
+    if (ret) {
+        av_log(avctx, AV_LOG_ERROR, "pixel format invalid or unknown\n");
+        return ret;
+    }
 
     ff_set_cmp(&s->mecc, s->mecc.me_cmp, s->avctx->me_cmp);
     ff_set_cmp(&s->mecc, s->mecc.me_sub_cmp, s->avctx->me_sub_cmp);
@@ -178,7 +179,7 @@
 static int pix_norm1(uint8_t * pix, int line_size, int w)
 {
     int s, i, j;
-    uint32_t *sq = ff_square_tab + 256;
+    const uint32_t *sq = ff_square_tab + 256;
 
     s = 0;
     for (i = 0; i < w; i++) {
@@ -1622,11 +1623,7 @@
         s->lambda = 0;
     }//else keep previous frame's qlog until after motion estimation
 
-    if (s->current_picture->data[0]
-#if FF_API_EMU_EDGE
-        && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)
-#endif
-        ) {
+    if (s->current_picture->data[0]) {
         int w = s->avctx->width;
         int h = s->avctx->height;
 
@@ -1679,11 +1676,6 @@
         s->m.b8_stride= 2*s->m.mb_width+1;
         s->m.f_code=1;
         s->m.pict_type = pic->pict_type;
-#if FF_API_MOTION_EST
-FF_DISABLE_DEPRECATION_WARNINGS
-        s->m.me_method= s->avctx->me_method;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
         s->m.motion_est= s->motion_est;
         s->m.me.scene_change_score=0;
         s->m.me.dia_size = avctx->dia_size;

diff --git a/libavcodec/speedhq.c b/libavcodec/speedhq.c
index 6d3487c..890b825 100644
--- a/libavcodec/speedhq.c
+++ b/libavcodec/speedhq.c

@@ -58,40 +58,40 @@
 
 
 /* AC codes: Very similar but not identical to MPEG-2. */
-static uint16_t speedhq_vlc[123][2] = {
-    {0x02, 2}, {0x06, 3}, {0x07, 4}, {0x1c, 5},
-    {0x1d, 5}, {0x05, 6}, {0x04, 6}, {0x7b, 7},
-    {0x7c, 7}, {0x23, 8}, {0x22, 8}, {0xfa, 8},
-    {0xfb, 8}, {0xfe, 8}, {0xff, 8}, {0x1f,14},
-    {0x1e,14}, {0x1d,14}, {0x1c,14}, {0x1b,14},
-    {0x1a,14}, {0x19,14}, {0x18,14}, {0x17,14},
-    {0x16,14}, {0x15,14}, {0x14,14}, {0x13,14},
-    {0x12,14}, {0x11,14}, {0x10,14}, {0x18,15},
-    {0x17,15}, {0x16,15}, {0x15,15}, {0x14,15},
-    {0x13,15}, {0x12,15}, {0x11,15}, {0x10,15},
-    {0x02, 3}, {0x06, 5}, {0x79, 7}, {0x27, 8},
-    {0x20, 8}, {0x16,13}, {0x15,13}, {0x1f,15},
-    {0x1e,15}, {0x1d,15}, {0x1c,15}, {0x1b,15},
-    {0x1a,15}, {0x19,15}, {0x13,16}, {0x12,16},
-    {0x11,16}, {0x10,16}, {0x18,13}, {0x17,13},
-    {0x05, 5}, {0x07, 7}, {0xfc, 8}, {0x0c,10},
-    {0x14,13}, {0x18,12}, {0x14,12}, {0x13,12},
-    {0x10,12}, {0x1a,13}, {0x19,13}, {0x07, 5},
-    {0x26, 8}, {0x1c,12}, {0x13,13}, {0x1b,12},
-    {0x06, 6}, {0xfd, 8}, {0x12,12}, {0x1d,12},
-    {0x07, 6}, {0x04, 9}, {0x12,13}, {0x06, 7},
-    {0x1e,12}, {0x14,16}, {0x04, 7}, {0x15,12},
-    {0x05, 7}, {0x11,12}, {0x78, 7}, {0x11,13},
-    {0x7a, 7}, {0x10,13}, {0x21, 8}, {0x1a,16},
-    {0x25, 8}, {0x19,16}, {0x24, 8}, {0x18,16},
-    {0x05, 9}, {0x17,16}, {0x07, 9}, {0x16,16},
-    {0x0d,10}, {0x15,16}, {0x1f,12}, {0x1a,12},
-    {0x19,12}, {0x17,12}, {0x16,12}, {0x1f,13},
-    {0x1e,13}, {0x1d,13}, {0x1c,13}, {0x1b,13},
-    {0x1f,16}, {0x1e,16}, {0x1d,16}, {0x1c,16},
-    {0x1b,16},
-    {0x01,6}, /* escape */
-    {0x06,4}, /* EOB */
+static const uint16_t speedhq_vlc[123][2] = {
+    {0x0001,  2}, {0x0003,  3}, {0x000E,  4}, {0x0007,  5},
+    {0x0017,  5}, {0x0028,  6}, {0x0008,  6}, {0x006F,  7},
+    {0x001F,  7}, {0x00C4,  8}, {0x0044,  8}, {0x005F,  8},
+    {0x00DF,  8}, {0x007F,  8}, {0x00FF,  8}, {0x3E00, 14},
+    {0x1E00, 14}, {0x2E00, 14}, {0x0E00, 14}, {0x3600, 14},
+    {0x1600, 14}, {0x2600, 14}, {0x0600, 14}, {0x3A00, 14},
+    {0x1A00, 14}, {0x2A00, 14}, {0x0A00, 14}, {0x3200, 14},
+    {0x1200, 14}, {0x2200, 14}, {0x0200, 14}, {0x0C00, 15},
+    {0x7400, 15}, {0x3400, 15}, {0x5400, 15}, {0x1400, 15},
+    {0x6400, 15}, {0x2400, 15}, {0x4400, 15}, {0x0400, 15},
+    {0x0002,  3}, {0x000C,  5}, {0x004F,  7}, {0x00E4,  8},
+    {0x0004,  8}, {0x0D00, 13}, {0x1500, 13}, {0x7C00, 15},
+    {0x3C00, 15}, {0x5C00, 15}, {0x1C00, 15}, {0x6C00, 15},
+    {0x2C00, 15}, {0x4C00, 15}, {0xC800, 16}, {0x4800, 16},
+    {0x8800, 16}, {0x0800, 16}, {0x0300, 13}, {0x1D00, 13},
+    {0x0014,  5}, {0x0070,  7}, {0x003F,  8}, {0x00C0, 10},
+    {0x0500, 13}, {0x0180, 12}, {0x0280, 12}, {0x0C80, 12},
+    {0x0080, 12}, {0x0B00, 13}, {0x1300, 13}, {0x001C,  5},
+    {0x0064,  8}, {0x0380, 12}, {0x1900, 13}, {0x0D80, 12},
+    {0x0018,  6}, {0x00BF,  8}, {0x0480, 12}, {0x0B80, 12},
+    {0x0038,  6}, {0x0040,  9}, {0x0900, 13}, {0x0030,  7},
+    {0x0780, 12}, {0x2800, 16}, {0x0010,  7}, {0x0A80, 12},
+    {0x0050,  7}, {0x0880, 12}, {0x000F,  7}, {0x1100, 13},
+    {0x002F,  7}, {0x0100, 13}, {0x0084,  8}, {0x5800, 16},
+    {0x00A4,  8}, {0x9800, 16}, {0x0024,  8}, {0x1800, 16},
+    {0x0140,  9}, {0xE800, 16}, {0x01C0,  9}, {0x6800, 16},
+    {0x02C0, 10}, {0xA800, 16}, {0x0F80, 12}, {0x0580, 12},
+    {0x0980, 12}, {0x0E80, 12}, {0x0680, 12}, {0x1F00, 13},
+    {0x0F00, 13}, {0x1700, 13}, {0x0700, 13}, {0x1B00, 13},
+    {0xF800, 16}, {0x7800, 16}, {0xB800, 16}, {0x3800, 16},
+    {0xD800, 16},
+    {0x0020,  6}, /* escape */
+    {0x0006,  4}  /* EOB */
 };
 
 static const uint8_t speedhq_level[121] = {
@@ -580,7 +580,6 @@
 {
     uint16_t ff_mpeg12_vlc_dc_lum_code_reversed[12];
     uint16_t ff_mpeg12_vlc_dc_chroma_code_reversed[12];
-    int i;
 
     /* Exactly the same as MPEG-2, except little-endian. */
     reverse_code(ff_mpeg12_vlc_dc_lum_code,
@@ -598,10 +597,6 @@
                        ff_mpeg12_vlc_dc_chroma_bits, 1, 1,
                        ff_mpeg12_vlc_dc_chroma_code_reversed, 2, 2, 514);
 
-    /* Reverse the AC VLC, because INIT_VLC_LE wants it in that order. */
-    for (i = 0; i < FF_ARRAY_ELEMS(speedhq_vlc); ++i) {
-        speedhq_vlc[i][0] = reverse(speedhq_vlc[i][0], speedhq_vlc[i][1]);
-    }
     ff_rl_init(&ff_rl_speedhq, ff_speedhq_static_rl_table_store);
     INIT_2D_VLC_RL(ff_rl_speedhq, 674, INIT_VLC_LE);
 

diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c
index d78ede7..80a8af1 100644
--- a/libavcodec/svq1enc.c
+++ b/libavcodec/svq1enc.c

@@ -283,19 +283,6 @@
         s->m.b8_stride                     = 2 * s->m.mb_width + 1;
         s->m.f_code                        = 1;
         s->m.pict_type                     = s->pict_type;
-#if FF_API_MOTION_EST
-FF_DISABLE_DEPRECATION_WARNINGS
-        s->m.me_method                     = s->avctx->me_method;
-        if (s->motion_est == FF_ME_EPZS) {
-            if (s->avctx->me_method == ME_ZERO)
-                s->motion_est = FF_ME_ZERO;
-            else if (s->avctx->me_method == ME_EPZS)
-                s->motion_est = FF_ME_EPZS;
-            else if (s->avctx->me_method == ME_X1)
-                s->motion_est = FF_ME_XONE;
-        }
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
         s->m.motion_est                    = s->motion_est;
         s->m.me.scene_change_score         = 0;
         // s->m.out_format                    = FMT_H263;

diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index a937b2f..18a4448 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c

@@ -1048,12 +1048,12 @@
         }
         memcpy(s->slice_buf, s->gb.buffer + s->gb.index / 8, slice_bytes);
 
+        if (s->watermark_key) {
+            uint32_t header = AV_RL32(&s->slice_buf[1]);
+            AV_WL32(&s->slice_buf[1], header ^ s->watermark_key);
+        }
         init_get_bits(&s->gb_slice, s->slice_buf, slice_bits);
 
-        if (s->watermark_key) {
-            uint32_t header = AV_RL32(&s->gb_slice.buffer[1]);
-            AV_WL32(&s->gb_slice.buffer[1], header ^ s->watermark_key);
-        }
         if (length > 0) {
             memmove(s->slice_buf, &s->slice_buf[slice_length], length - 1);
         }
@@ -1064,16 +1064,15 @@
         av_log(s->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
         return -1;
     }
-    if (get_bits1(&s->gb_slice)) {
-        avpriv_report_missing_feature(s->avctx, "Media key encryption");
-        return AVERROR_PATCHWELCOME;
-    }
 
     s->slice_type = ff_h264_golomb_to_pict_type[slice_id];
 
     if ((header & 0x9F) == 2) {
-        i = (s->mb_num < 64) ? 5 : av_log2(s->mb_num - 1);
+        i = (s->mb_num < 64) ? 6 : (1 + av_log2(s->mb_num - 1));
         get_bits(&s->gb_slice, i);
+    } else if (get_bits1(&s->gb_slice)) {
+        avpriv_report_missing_feature(s->avctx, "Media key encryption");
+        return AVERROR_PATCHWELCOME;
     }
 
     s->slice_num      = get_bits(&s->gb_slice, 8);

diff --git a/libavcodec/tableprint_vlc.h b/libavcodec/tableprint_vlc.h
index 675251a..3004be3 100644
--- a/libavcodec/tableprint_vlc.h
+++ b/libavcodec/tableprint_vlc.h

@@ -35,6 +35,7 @@
 #define av_freep(p) while(0)
 #define AVCODEC_AVCODEC_H
 #define AVCODEC_INTERNAL_H
+#define AV_INPUT_BUFFER_PADDING_SIZE 64 // the value does not matter for this
 #include "tableprint.h"
 #include "get_bits.h"
 #include "mathtables.c"

diff --git a/libavcodec/tak.c b/libavcodec/tak.c
index d2670e0..8aa956b 100644
--- a/libavcodec/tak.c
+++ b/libavcodec/tak.c

@@ -90,7 +90,7 @@
     return 0;
 }
 
-void avpriv_tak_parse_streaminfo(GetBitContext *gb, TAKStreamInfo *s)
+void ff_tak_parse_streaminfo(TAKStreamInfo *s, GetBitContext *gb)
 {
     uint64_t channel_mask = 0;
     int frame_type, i;
@@ -125,6 +125,19 @@
     s->frame_samples = tak_get_nb_samples(s->sample_rate, frame_type);
 }
 
+int avpriv_tak_parse_streaminfo(TAKStreamInfo *s, const uint8_t *buf, int size)
+{
+    GetBitContext gb;
+    int ret = init_get_bits8(&gb, buf, size);
+
+    if (ret < 0)
+        return AVERROR_INVALIDDATA;
+
+    ff_tak_parse_streaminfo(s, &gb);
+
+    return 0;
+}
+
 int ff_tak_decode_frame_header(AVCodecContext *avctx, GetBitContext *gb,
                                TAKStreamInfo *ti, int log_level_offset)
 {
@@ -144,7 +157,7 @@
     }
 
     if (ti->flags & TAK_FRAME_FLAG_HAS_INFO) {
-        avpriv_tak_parse_streaminfo(gb, ti);
+        ff_tak_parse_streaminfo(ti, gb);
 
         if (get_bits(gb, 6))
             skip_bits(gb, 25);

diff --git a/libavcodec/tak.h b/libavcodec/tak.h
index 4fa1239..dc45a8c 100644
--- a/libavcodec/tak.h
+++ b/libavcodec/tak.h

@@ -143,10 +143,14 @@
 
 /**
  * Parse the Streaminfo metadata block.
- * @param[in]  gb pointer to GetBitContext
  * @param[out] s  storage for parsed information
+ * @param[in]  buf   input buffer
+ * @param[in]  size  size of input buffer in bytes
+ * @return non-zero on error, 0 if OK
  */
-void avpriv_tak_parse_streaminfo(GetBitContext *gb, TAKStreamInfo *s);
+int avpriv_tak_parse_streaminfo(TAKStreamInfo *s, const uint8_t *buf, int size);
+
+void ff_tak_parse_streaminfo(TAKStreamInfo *s, GetBitContext *gb);
 
 /**
  * Validate and decode a frame header.

diff --git a/libavcodec/tests/.gitignore b/libavcodec/tests/.gitignore
index 7f9e382..73945a7 100644
--- a/libavcodec/tests/.gitignore
+++ b/libavcodec/tests/.gitignore

@@ -7,6 +7,7 @@
 /fft-fixed
 /fft-fixed32
 /golomb
+/h264_levels
 /htmlsubtitles
 /iirfilter
 /imgconvert
@@ -14,6 +15,7 @@
 /mathops
 /mjpegenc_huffman
 /motion
+/mpeg12framerate
 /options
 /rangecoder
 /snowenc

diff --git a/libavcodec/tests/codec_desc.c b/libavcodec/tests/codec_desc.c
new file mode 100644
index 0000000..c9b3497
--- /dev/null
+++ b/libavcodec/tests/codec_desc.c

@@ -0,0 +1,45 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/avcodec.h"
+
+int main(int argc, char **argv)
+{
+    const AVCodecDescriptor *old_desc = NULL, *desc;
+
+    while (desc = avcodec_descriptor_next(old_desc)) {
+        if (old_desc && old_desc->id >= desc->id) {
+            av_log(NULL, AV_LOG_FATAL, "Unsorted codec_descriptors '%s' and '%s'.\n", old_desc->name, desc->name);
+            return 1;
+        }
+
+        if (avcodec_descriptor_get(desc->id) != desc) {
+            av_log(NULL, AV_LOG_FATAL, "avcodec_descriptor_get() failed with '%s'.\n", desc->name);
+            return 1;
+        }
+
+        if (avcodec_descriptor_get_by_name(desc->name) != desc) {
+            av_log(NULL, AV_LOG_FATAL, "avcodec_descriptor_get_by_name() failed with '%s'.\n", desc->name);
+            return 1;
+        }
+
+        old_desc = desc;
+    }
+
+    return 0;
+}

diff --git a/libavcodec/tests/dct.c b/libavcodec/tests/dct.c
index b44c66f..e8fa4a3 100644
--- a/libavcodec/tests/dct.c
+++ b/libavcodec/tests/dct.c

@@ -82,9 +82,9 @@
 static const struct algo idct_tab[] = {
     { "REF-DBL",     ff_ref_idct,          FF_IDCT_PERM_NONE },
     { "INT",         ff_j_rev_dct,         FF_IDCT_PERM_LIBMPEG2 },
-    { "SIMPLE-C",    ff_simple_idct_8,     FF_IDCT_PERM_NONE },
-    { "SIMPLE-C10",  ff_simple_idct_10,    FF_IDCT_PERM_NONE },
-    { "SIMPLE-C12",  ff_simple_idct_12,    FF_IDCT_PERM_NONE, 0, 1 },
+    { "SIMPLE-C",    ff_simple_idct_int16_8bit,     FF_IDCT_PERM_NONE },
+    { "SIMPLE-C10",  ff_simple_idct_int16_10bit,    FF_IDCT_PERM_NONE },
+    { "SIMPLE-C12",  ff_simple_idct_int16_12bit,    FF_IDCT_PERM_NONE, 0, 1 },
     { "PR-C",        ff_prores_idct_wrap,  FF_IDCT_PERM_NONE, 0, 1 },
 #if CONFIG_FAANIDCT
     { "FAANI",       ff_faanidct,          FF_IDCT_PERM_NONE },

diff --git a/libavcodec/tests/h264_levels.c b/libavcodec/tests/h264_levels.c
new file mode 100644
index 0000000..794517e
--- /dev/null
+++ b/libavcodec/tests/h264_levels.c

@@ -0,0 +1,183 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavcodec/h264_levels.h"
+
+static const struct {
+    int width;
+    int height;
+    int level_idc;
+} test_sizes[] = {
+    // First level usable at some standard sizes.
+    // (From H.264 table A-6.)
+    {  176,  144, 10 }, // QCIF
+    {  352,  288, 11 }, // CIF
+    {  640,  480, 22 }, // VGA
+    {  720,  480, 22 }, // NTSC
+    {  720,  576, 22 }, // PAL
+    {  800,  600, 31 }, // SVGA
+    { 1280,  720, 31 }, // 720p
+    { 1280, 1024, 32 }, // SXGA
+    { 1920, 1080, 40 }, // 1080p
+    { 2048, 1080, 42 }, // 2Kx1080
+    { 2048, 1536, 50 }, // 4XGA
+    { 3840, 2160, 51 }, // 4K
+    { 7680, 4320, 60 }, // 8K
+
+    // Overly wide or tall sizes.
+    {    1,  256, 10 },
+    {    1,  512, 11 },
+    {    1, 1024, 21 },
+    {    1, 1808, 22 },
+    {    1, 1824, 31 },
+    {  256,    1, 10 },
+    {  512,    1, 11 },
+    { 1024,    1, 21 },
+    { 1808,    1, 22 },
+    { 1824,    1, 31 },
+    {  512, 4096, 40 },
+    {  256, 4112, 42 },
+    { 8688, 1024, 51 },
+    { 8704,  512, 60 },
+    { 16880,   1, 60 },
+    { 16896,   1,  0 },
+};
+
+static const struct {
+    int width;
+    int height;
+    int dpb_size;
+    int level_idc;
+} test_dpb[] = {
+    // First level usable for some DPB sizes.
+    // (From H.264 table A-7.)
+    {  176,  144,  4, 10 },
+    {  176,  144,  8, 11 },
+    {  176,  144, 16, 12 },
+    { 1280,  720,  1, 31 },
+    { 1280,  720,  5, 31 },
+    { 1280,  720,  9, 40 },
+    { 1280,  720, 10, 50 },
+    { 1920, 1080,  1, 40 },
+    { 1920, 1080,  5, 50 },
+    { 1920, 1080, 13, 50 },
+    { 1920, 1080, 14, 51 },
+    { 3840, 2160,  5, 51 },
+    { 3840, 2160,  6, 60 },
+    { 3840, 2160, 16, 60 },
+    { 7680, 4320,  5, 60 },
+    { 7680, 4320,  6,  0 },
+};
+
+static const struct {
+    int64_t bitrate;
+    int profile_idc;
+    int level_idc;
+} test_bitrate[] = {
+    // Values where profile affects level at a given bitrate.
+    {   2500000,  77, 21 },
+    {   2500000, 100, 20 },
+    {   2500000, 244, 13 },
+    { 100000000,  77, 50 },
+    { 100000000, 100, 50 },
+    { 100000000, 244, 41 },
+    { 999999999,  77,  0 },
+    { 999999999, 100, 62 },
+    // Check level 1b.
+    {  32 * 1200,  66, 10 },
+    {  32 * 1500, 100, 10 },
+    {  96 * 1200,  66, 10 },
+    {  96 * 1500, 100,  9 },
+    { 144 * 1200,  66, 11 },
+    { 144 * 1500, 100, 11 },
+};
+
+static const struct {
+    const char *name;
+    int profile_idc;
+    int64_t bitrate;
+    int width;
+    int height;
+    int dpb_frames;
+    int level_idc;
+} test_all[] = {
+    { "Bluray 1080p 40Mb/s", 100, 40000000, 1920, 1080, 4, 41 },
+    { "Bluray 1080p 24Mb/s", 100, 24000000, 1920, 1080, 4, 40 },
+    { "Bluray 720p 40Mb/s",  100, 40000000, 1280,  720, 6, 41 },
+    { "Bluray 720p 24Mb/s",  100, 24000000, 1280,  720, 6, 40 },
+    { "Bluray PAL 40Mb/s",   100, 40000000,  720,  576, 6, 41 },
+    { "Bluray PAL 24Mb/s",   100, 24000000,  720,  576, 6, 32 },
+    { "Bluray PAL 16Mb/s",   100, 16800000,  720,  576, 6, 31 },
+    { "Bluray PAL 12Mb/s",   100, 12000000,  720,  576, 5, 30 },
+    { "Bluray NTSC 40Mb/s",  100, 40000000,  720,  480, 6, 41 },
+    { "Bluray NTSC 24Mb/s",  100, 24000000,  720,  480, 6, 32 },
+    { "Bluray NTSC 16Mb/s",  100, 16800000,  720,  480, 6, 31 },
+    { "Bluray NTSC 12Mb/s",  100, 12000000,  720,  480, 6, 30 },
+};
+
+int main(void)
+{
+    const H264LevelDescriptor *level;
+    int i;
+
+#define CHECK(expected, format, ...) do { \
+        if (expected ? (!level || level->level_idc != expected) \
+                     : !!level) { \
+            av_log(NULL, AV_LOG_ERROR, "Incorrect level for " \
+                   format ": expected %d, got %d.\n", __VA_ARGS__, \
+                   expected, level ? level->level_idc : -1); \
+            return 1; \
+        } \
+    } while (0)
+
+    for (i = 0; i < FF_ARRAY_ELEMS(test_sizes); i++) {
+        level = ff_h264_guess_level(0, 0, test_sizes[i].width,
+                                    test_sizes[i].height, 0);
+        CHECK(test_sizes[i].level_idc, "size %dx%d",
+              test_sizes[i].width, test_sizes[i].height);
+    }
+
+    for (i = 0; i < FF_ARRAY_ELEMS(test_dpb); i++) {
+        level = ff_h264_guess_level(0, 0, test_dpb[i].width,
+                                    test_dpb[i].height,
+                                    test_dpb[i].dpb_size);
+        CHECK(test_dpb[i].level_idc, "size %dx%d dpb %d",
+              test_dpb[i].width, test_dpb[i].height,
+              test_dpb[i].dpb_size);
+    }
+
+    for (i = 0; i < FF_ARRAY_ELEMS(test_bitrate); i++) {
+        level = ff_h264_guess_level(test_bitrate[i].profile_idc,
+                                    test_bitrate[i].bitrate,
+                                    0, 0, 0);
+        CHECK(test_bitrate[i].level_idc, "bitrate %"PRId64" profile %d",
+              test_bitrate[i].bitrate, test_bitrate[i].profile_idc);
+    }
+
+    for (i = 0; i < FF_ARRAY_ELEMS(test_all); i++) {
+        level = ff_h264_guess_level(test_all[i].profile_idc,
+                                    test_all[i].bitrate,
+                                    test_all[i].width,
+                                    test_all[i].height,
+                                    test_all[i].dpb_frames);
+        CHECK(test_all[i].level_idc, "%s", test_all[i].name);
+    }
+
+    return 0;
+}

diff --git a/libavcodec/tests/imgconvert.c b/libavcodec/tests/imgconvert.c
index c598d46..aefc324 100644
--- a/libavcodec/tests/imgconvert.c
+++ b/libavcodec/tests/imgconvert.c

@@ -39,10 +39,6 @@
             skip = 0;
         }
         av_log(NULL, AV_LOG_INFO, "pix fmt %s yuv_plan:%d avg_bpp:%d\n", desc->name, is_yuv_planar(desc), av_get_padded_bits_per_pixel(desc));
-        if ((!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) != (desc->nb_components != 2 && desc->nb_components != 4)) {
-            av_log(NULL, AV_LOG_ERROR, "Alpha flag mismatch\n");
-            err = 1;
-        }
     }
     return err;
 }

diff --git a/libavcodec/tests/mpeg12framerate.c b/libavcodec/tests/mpeg12framerate.c
new file mode 100644
index 0000000..595bdb2
--- /dev/null
+++ b/libavcodec/tests/mpeg12framerate.c

@@ -0,0 +1,87 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/mpeg12.h"
+#include "libavcodec/mpeg12data.h"
+
+int main(void)
+{
+    int i;
+
+#define TEST_MATCH(frame_rate, code, ext_n, ext_d) do { \
+        AVRational fr = frame_rate; \
+        int c, n, d; \
+        ff_mpeg12_find_best_frame_rate(fr, &c, &n, &d, 0); \
+        if (c != code || n != ext_n || d != ext_d) { \
+            av_log(NULL, AV_LOG_ERROR, "Failed to match %d/%d: " \
+                   "code = %d, ext_n = %d, ext_d = %d.\n", \
+                   fr.num, fr.den, c, n, d); \
+            return 1; \
+        } \
+    } while (0)
+#define TEST_EXACT(frn, frd) do { \
+        AVRational fr = (AVRational) { frn, frd }; \
+        int c, n, d; \
+        ff_mpeg12_find_best_frame_rate(fr, &c, &n, &d, 0); \
+        if (av_cmp_q(fr, av_mul_q(ff_mpeg12_frame_rate_tab[c], \
+                                  (AVRational) { n + 1, d + 1 })) != 0) { \
+            av_log(NULL, AV_LOG_ERROR, "Failed to find exact %d/%d: " \
+                   "code = %d, ext_n = %d, ext_d = %d.\n", \
+                   fr.num, fr.den, c, n, d); \
+            return 1; \
+        } \
+    } while (0)
+
+    // Framerates in the table must be chosen exactly.
+    for (i = 1; i <= 8; i++)
+        TEST_MATCH(ff_mpeg12_frame_rate_tab[i], i, 0, 0);
+
+    // As should the same ones with small perturbations.
+    // (1/1000 used here to be smaller than half the difference
+    // between 24 and 24000/1001.)
+    for (i = 1; i <= 8; i++) {
+        TEST_MATCH(av_sub_q(ff_mpeg12_frame_rate_tab[i],
+                            (AVRational) { 1, 1000 }), i, 0, 0);
+        TEST_MATCH(av_add_q(ff_mpeg12_frame_rate_tab[i],
+                            (AVRational) { 1, 1000 }), i, 0, 0);
+    }
+
+    // Exactly constructable framerates should be exact.  Note that some
+    // values can be made in multiple ways (e.g. 12 = 24 / 2 == 60 / 5),
+    // and there is no reason to favour any particular choice.
+    TEST_EXACT(     1,    1);
+    TEST_EXACT(     2,    1);
+    TEST_EXACT(    12,    1);
+    TEST_EXACT( 15000, 1001);
+    TEST_EXACT(    15,    1);
+    TEST_EXACT(   120,    1);
+    TEST_EXACT(120000, 1001);
+    TEST_EXACT(   200,    1);
+    TEST_EXACT(   240,    1);
+
+    // Values higher than 240 (the highest representable, as 60 * 4 / 1)
+    // should be mapped to 240.
+    for (i = 240; i < 1000; i += 10)
+        TEST_MATCH(((AVRational) { i, 1 }), 8, 3, 0);
+    // Values lower than 24000/32032 (the lowest representable, as
+    // 24000/1001 * 1 / 32) should be mapped to 24000/32032.
+    for (i = 74; i > 0; i--)
+        TEST_MATCH(((AVRational) { i, 100 }), 1, 0, 31);
+
+    return 0;
+}

diff --git a/libavcodec/tests/utils.c b/libavcodec/tests/utils.c
index e2891fb..f6ba7fe 100644
--- a/libavcodec/tests/utils.c
+++ b/libavcodec/tests/utils.c

@@ -21,7 +21,6 @@
 int main(void){
     AVCodec *codec = NULL;
     int ret = 0;
-    avcodec_register_all();
 
     while (codec = av_codec_next(codec)) {
         if (av_codec_is_encoder(codec)) {

diff --git a/libavcodec/texturedsp.c b/libavcodec/texturedsp.c
index 90b1eb4..b7dd8ba 100644
--- a/libavcodec/texturedsp.c
+++ b/libavcodec/texturedsp.c

@@ -413,7 +413,7 @@
 
 static inline void rgtc_block_internal(uint8_t *dst, ptrdiff_t stride,
                                        const uint8_t *block,
-                                       const int *color_tab)
+                                       const int *color_tab, int mono, int offset, int pix_size)
 {
     uint8_t indices[16];
     int x, y;
@@ -429,14 +429,20 @@
             int i = indices[x + y * 4];
             /* Interval expansion from [-1 1] or [0 1] to [0 255]. */
             int c = color_tab[i];
-            uint32_t pixel = RGBA(c, c, c, 255U);
-            AV_WL32(dst + x * 4 + y * stride, pixel);
+
+            if (mono){
+                dst [x * pix_size + y * stride + offset] = (uint8_t)c;
+            }
+            else{
+                uint32_t pixel = RGBA(c, c, c, 255U);
+                AV_WL32(dst + x * pix_size + y * stride, pixel);
+            }
         }
     }
 }
 
 static inline void rgtc1_block_internal(uint8_t *dst, ptrdiff_t stride,
-                                        const uint8_t *block, int sign)
+                                        const uint8_t *block, int sign, int mono, int offset, int pix_size)
 {
     int color_table[8];
     int r0, r1;
@@ -472,7 +478,7 @@
         color_table[7] = 255;  /* max range */  // bit code 111
     }
 
-    rgtc_block_internal(dst, stride, block, color_table);
+    rgtc_block_internal(dst, stride, block, color_table, mono, offset, pix_size);
 }
 
 /**
@@ -486,7 +492,7 @@
  */
 static int rgtc1s_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
 {
-    rgtc1_block_internal(dst, stride, block, 1);
+    rgtc1_block_internal(dst, stride, block, 1, 0, 0, 4);
 
     return 8;
 }
@@ -502,7 +508,39 @@
  */
 static int rgtc1u_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
 {
-    rgtc1_block_internal(dst, stride, block, 0);
+    rgtc1_block_internal(dst, stride, block, 0, 0, 0, 4);
+
+    return 8;
+}
+
+/**
+ * Decompress one block of a RGTC1 texture with unsigned components
+ * and overwrite the alpha component in 'dst' (RGBA data).
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int rgtc1u_alpha_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    rgtc1_block_internal(dst, stride, block, 0, 1, 3, 4);
+
+    return 8;
+}
+
+/**
+ * Decompress one block of a RGTC1 texture with unsigned components
+ * to Gray 8.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int rgtc1u_gray_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    rgtc1_block_internal(dst, stride, block, 0, 1, 0, 1);
 
     return 8;
 }
@@ -516,8 +554,8 @@
     int x, y;
 
     /* Decompress the two channels separately and interleave them afterwards. */
-    rgtc1_block_internal(c0, 16, block, sign);
-    rgtc1_block_internal(c1, 16, block + 8, sign);
+    rgtc1_block_internal(c0, 16, block, sign, 0, 0, 4);
+    rgtc1_block_internal(c1, 16, block + 8, sign, 0, 0, 4);
 
     /* B is rebuilt exactly like a normal map. */
     for (y = 0; y < 4; y++) {
@@ -598,17 +636,19 @@
 
 av_cold void ff_texturedsp_init(TextureDSPContext *c)
 {
-    c->dxt1_block   = dxt1_block;
-    c->dxt1a_block  = dxt1a_block;
-    c->dxt2_block   = dxt2_block;
-    c->dxt3_block   = dxt3_block;
-    c->dxt4_block   = dxt4_block;
-    c->dxt5_block   = dxt5_block;
-    c->dxt5y_block  = dxt5y_block;
-    c->dxt5ys_block = dxt5ys_block;
-    c->rgtc1s_block = rgtc1s_block;
-    c->rgtc1u_block = rgtc1u_block;
-    c->rgtc2s_block = rgtc2s_block;
-    c->rgtc2u_block = rgtc2u_block;
-    c->dxn3dc_block = dxn3dc_block;
+    c->dxt1_block         = dxt1_block;
+    c->dxt1a_block        = dxt1a_block;
+    c->dxt2_block         = dxt2_block;
+    c->dxt3_block         = dxt3_block;
+    c->dxt4_block         = dxt4_block;
+    c->dxt5_block         = dxt5_block;
+    c->dxt5y_block        = dxt5y_block;
+    c->dxt5ys_block       = dxt5ys_block;
+    c->rgtc1s_block       = rgtc1s_block;
+    c->rgtc1u_block       = rgtc1u_block;
+    c->rgtc1u_gray_block  = rgtc1u_gray_block;
+    c->rgtc1u_alpha_block = rgtc1u_alpha_block;
+    c->rgtc2s_block       = rgtc2s_block;
+    c->rgtc2u_block       = rgtc2u_block;
+    c->dxn3dc_block       = dxn3dc_block;
 }

diff --git a/libavcodec/texturedsp.h b/libavcodec/texturedsp.h
index 26f3b64..90ceb2b 100644
--- a/libavcodec/texturedsp.h
+++ b/libavcodec/texturedsp.h

@@ -43,19 +43,21 @@
 #define TEXTURE_BLOCK_H 4
 
 typedef struct TextureDSPContext {
-    int (*dxt1_block)  (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
-    int (*dxt1a_block) (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
-    int (*dxt2_block)  (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
-    int (*dxt3_block)  (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
-    int (*dxt4_block)  (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
-    int (*dxt5_block)  (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
-    int (*dxt5y_block) (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
-    int (*dxt5ys_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
-    int (*rgtc1s_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
-    int (*rgtc1u_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
-    int (*rgtc2s_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
-    int (*rgtc2u_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
-    int (*dxn3dc_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt1_block)        (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt1a_block)       (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt2_block)        (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt3_block)        (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt4_block)        (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt5_block)        (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt5y_block)       (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt5ys_block)      (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*rgtc1s_block)      (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*rgtc1u_block)      (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*rgtc1u_gray_block) (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*rgtc1u_alpha_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*rgtc2s_block)      (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*rgtc2u_block)      (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxn3dc_block)      (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
 } TextureDSPContext;
 
 void ff_texturedsp_init(TextureDSPContext *c);

diff --git a/libavcodec/texturedspenc.c b/libavcodec/texturedspenc.c
index 8b28630..3d68e0c 100644
--- a/libavcodec/texturedspenc.c
+++ b/libavcodec/texturedspenc.c

@@ -647,9 +647,26 @@
     return 16;
 }
 
+/**
+ * Compress one block of RGBA pixels in a RGTC1U texture and store the
+ * resulting bytes in 'dst'. Use the alpha channel of the input image.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to compress.
+ * @return how much texture data has been written.
+ */
+static int rgtc1u_alpha_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    compress_alpha(dst, stride, block);
+
+    return 8;
+}
+
 av_cold void ff_texturedspenc_init(TextureDSPContext *c)
 {
-    c->dxt1_block   = dxt1_block;
-    c->dxt5_block   = dxt5_block;
-    c->dxt5ys_block = dxt5ys_block;
+    c->dxt1_block         = dxt1_block;
+    c->dxt5_block         = dxt5_block;
+    c->dxt5ys_block       = dxt5ys_block;
+    c->rgtc1u_alpha_block = rgtc1u_alpha_block;
 }

diff --git a/libavcodec/thread.h b/libavcodec/thread.h
index 3186193..540135f 100644
--- a/libavcodec/thread.h
+++ b/libavcodec/thread.h

@@ -29,7 +29,6 @@
 
 #include "libavutil/buffer.h"
 
-#include "config.h"
 #include "avcodec.h"
 
 typedef struct ThreadFrame {

diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c
index 1b332a7..b537ec0 100644
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c

@@ -1303,6 +1303,7 @@
 
     planes = s->planar ? s->bppcount : 1;
     for (plane = 0; plane < planes; plane++) {
+        int remaining = avpkt->size;
         stride = p->linesize[plane];
         dst = p->data[plane];
         for (i = 0; i < s->height; i += s->rps) {
@@ -1318,10 +1319,11 @@
             else
                 soff = s->stripoff;
 
-            if (soff > avpkt->size || ssize > avpkt->size - soff) {
+            if (soff > avpkt->size || ssize > avpkt->size - soff || ssize > remaining) {
                 av_log(avctx, AV_LOG_ERROR, "Invalid strip size/offset\n");
                 return AVERROR_INVALIDDATA;
             }
+            remaining -= ssize;
             if ((ret = tiff_unpack_strip(s, p, dst, stride, avpkt->data + soff, ssize, i,
                                          FFMIN(s->rps, s->height - i))) < 0) {
                 if (avctx->err_recognition & AV_EF_EXPLODE)
@@ -1403,6 +1405,8 @@
     s->subsampling[1] = 1;
     s->avctx  = avctx;
     ff_lzw_decode_open(&s->lzw);
+    if (!s->lzw)
+        return AVERROR(ENOMEM);
     ff_ccitt_unpack_init();
 
     return 0;

diff --git a/libavcodec/trace_headers_bsf.c b/libavcodec/trace_headers_bsf.c
new file mode 100644
index 0000000..94a3ef7
--- /dev/null
+++ b/libavcodec/trace_headers_bsf.c

@@ -0,0 +1,117 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+
+#include "libavutil/avstring.h"
+#include "libavutil/common.h"
+#include "libavutil/log.h"
+
+#include "bsf.h"
+#include "cbs.h"
+
+
+typedef struct TraceHeadersContext {
+    CodedBitstreamContext *cbc;
+} TraceHeadersContext;
+
+
+static int trace_headers_init(AVBSFContext *bsf)
+{
+    TraceHeadersContext *ctx = bsf->priv_data;
+    int err;
+
+    err = ff_cbs_init(&ctx->cbc, bsf->par_in->codec_id, bsf);
+    if (err < 0)
+        return err;
+
+    ctx->cbc->trace_enable = 1;
+    ctx->cbc->trace_level  = AV_LOG_INFO;
+
+    if (bsf->par_in->extradata) {
+        CodedBitstreamFragment ps;
+
+        av_log(bsf, AV_LOG_INFO, "Extradata\n");
+
+        err = ff_cbs_read_extradata(ctx->cbc, &ps, bsf->par_in);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to read extradata.\n");
+            return err;
+        }
+
+        ff_cbs_fragment_uninit(ctx->cbc, &ps);
+    }
+
+    return 0;
+}
+
+static void trace_headers_close(AVBSFContext *bsf)
+{
+    TraceHeadersContext *ctx = bsf->priv_data;
+
+    ff_cbs_close(&ctx->cbc);
+}
+
+static int trace_headers(AVBSFContext *bsf, AVPacket *pkt)
+{
+    TraceHeadersContext *ctx = bsf->priv_data;
+    CodedBitstreamFragment au;
+    char tmp[256] = { 0 };
+    int err;
+
+    err = ff_bsf_get_packet_ref(bsf, pkt);
+    if (err < 0)
+        return err;
+
+    if (pkt->flags & AV_PKT_FLAG_KEY)
+        av_strlcat(tmp, ", key frame", sizeof(tmp));
+    if (pkt->flags & AV_PKT_FLAG_CORRUPT)
+        av_strlcat(tmp, ", corrupt", sizeof(tmp));
+
+    if (pkt->pts != AV_NOPTS_VALUE)
+        av_strlcatf(tmp, sizeof(tmp), ", pts %"PRId64, pkt->pts);
+    else
+        av_strlcat(tmp, ", no pts", sizeof(tmp));
+    if (pkt->dts != AV_NOPTS_VALUE)
+        av_strlcatf(tmp, sizeof(tmp), ", dts %"PRId64, pkt->dts);
+    else
+        av_strlcat(tmp, ", no dts", sizeof(tmp));
+    if (pkt->duration > 0)
+        av_strlcatf(tmp, sizeof(tmp), ", duration %"PRId64, pkt->duration);
+
+    av_log(bsf, AV_LOG_INFO, "Packet: %d bytes%s.\n", pkt->size, tmp);
+
+    err = ff_cbs_read_packet(ctx->cbc, &au, pkt);
+    if (err < 0) {
+        av_packet_unref(pkt);
+        return err;
+    }
+
+    ff_cbs_fragment_uninit(ctx->cbc, &au);
+
+    return 0;
+}
+
+const AVBitStreamFilter ff_trace_headers_bsf = {
+    .name           = "trace_headers",
+    .priv_data_size = sizeof(TraceHeadersContext),
+    .init           = &trace_headers_init,
+    .close          = &trace_headers_close,
+    .filter         = &trace_headers,
+    .codec_ids      = ff_cbs_all_codec_ids,
+};

diff --git a/libavcodec/truemotion2.c b/libavcodec/truemotion2.c
index 97c38f7..58a577f 100644
--- a/libavcodec/truemotion2.c
+++ b/libavcodec/truemotion2.c

@@ -63,6 +63,7 @@
     AVFrame *pic;
 
     GetBitContext gb;
+    int error;
     BswapDSPContext bdsp;
 
     uint8_t *buffer;
@@ -376,6 +377,10 @@
             }
         }
     } else {
+        if (len < 0) {
+            ret = AVERROR_INVALIDDATA;
+            goto end;
+        }
         for (i = 0; i < toks; i++) {
             ctx->tokens[stream_id][i] = codes.recode[0];
             if (stream_id <= TM2_MOT && ctx->tokens[stream_id][i] >= TM2_DELTAS) {
@@ -398,6 +403,7 @@
 {
     if (ctx->tok_ptrs[type] >= ctx->tok_lens[type]) {
         av_log(ctx->avctx, AV_LOG_ERROR, "Read token from stream %i out of bounds (%i>=%i)\n", type, ctx->tok_ptrs[type], ctx->tok_lens[type]);
+        ctx->error = 1;
         return 0;
     }
     if (type <= TM2_MOT) {
@@ -449,7 +455,7 @@
 /* common operations - add deltas to 4x4 block of luma or 2x2 blocks of chroma */
 static inline void tm2_apply_deltas(TM2Context *ctx, int* Y, int stride, int *deltas, int *last)
 {
-    int ct, d;
+    unsigned ct, d;
     int i, j;
 
     for (j = 0; j < 4; j++){
@@ -809,6 +815,8 @@
             default:
                 av_log(ctx->avctx, AV_LOG_ERROR, "Skipping unknown block type %i\n", type);
             }
+            if (ctx->error)
+                return AVERROR_INVALIDDATA;
         }
     }
 
@@ -889,6 +897,8 @@
     int offset           = TM2_HEADER_SIZE;
     int i, t, ret;
 
+    l->error = 0;
+
     av_fast_padded_malloc(&l->buffer, &l->buffer_size, buf_size);
     if (!l->buffer) {
         av_log(avctx, AV_LOG_ERROR, "Cannot allocate temporary buffer\n");

diff --git a/libavcodec/truemotion2rt.c b/libavcodec/truemotion2rt.c
index d639187..9df0b52 100644
--- a/libavcodec/truemotion2rt.c
+++ b/libavcodec/truemotion2rt.c

@@ -116,6 +116,9 @@
     if (ret < 0)
         return ret;
 
+    if (avctx->width / s->hscale * avctx->height * s->delta_size > avpkt->size * 8LL * 4)
+        return AVERROR_INVALIDDATA;
+
     ret = init_get_bits8(gb, avpkt->data + ret, avpkt->size - ret);
     if (ret < 0)
         return ret;

diff --git a/libavcodec/tscc.c b/libavcodec/tscc.c
index cb86b58..fc1ec4d 100644
--- a/libavcodec/tscc.c
+++ b/libavcodec/tscc.c

@@ -69,9 +69,19 @@
     CamtasiaContext * const c = avctx->priv_data;
     AVFrame *frame = c->frame;
     int ret;
+    int palette_has_changed = 0;
 
-    if ((ret = ff_reget_buffer(avctx, frame)) < 0)
-        return ret;
+    if (c->avctx->pix_fmt == AV_PIX_FMT_PAL8) {
+        int size;
+        const uint8_t *pal = av_packet_get_side_data(avpkt, AV_PKT_DATA_PALETTE, &size);
+
+        if (pal && size == AVPALETTE_SIZE) {
+            palette_has_changed = 1;
+            memcpy(c->pal, pal, AVPALETTE_SIZE);
+        } else if (pal) {
+            av_log(avctx, AV_LOG_ERROR, "Palette size %d is wrong\n", size);
+        }
+    }
 
     ret = inflateReset(&c->zstream);
     if (ret != Z_OK) {
@@ -84,11 +94,17 @@
     c->zstream.avail_out = c->decomp_size;
     ret = inflate(&c->zstream, Z_FINISH);
     // Z_DATA_ERROR means empty picture
+    if (ret == Z_DATA_ERROR && !palette_has_changed) {
+        return buf_size;
+    }
+
     if ((ret != Z_OK) && (ret != Z_STREAM_END) && (ret != Z_DATA_ERROR)) {
         av_log(avctx, AV_LOG_ERROR, "Inflate error: %d\n", ret);
         return AVERROR_UNKNOWN;
     }
 
+    if ((ret = ff_reget_buffer(avctx, frame)) < 0)
+        return ret;
 
     if (ret != Z_DATA_ERROR) {
         bytestream2_init(&c->gb, c->decomp_buf,
@@ -98,15 +114,7 @@
 
     /* make the palette available on the way out */
     if (c->avctx->pix_fmt == AV_PIX_FMT_PAL8) {
-        int size;
-        const uint8_t *pal = av_packet_get_side_data(avpkt, AV_PKT_DATA_PALETTE, &size);
-
-        if (pal && size == AVPALETTE_SIZE) {
-            frame->palette_has_changed = 1;
-            memcpy(c->pal, pal, AVPALETTE_SIZE);
-        } else if (pal) {
-            av_log(avctx, AV_LOG_ERROR, "Palette size %d is wrong\n", size);
-        }
+        frame->palette_has_changed = palette_has_changed;
         memcpy(frame->data[1], c->pal, AVPALETTE_SIZE);
     }
 
@@ -161,6 +169,8 @@
     }
 
     c->frame = av_frame_alloc();
+    if (!c->frame)
+        return AVERROR(ENOMEM);
 
     return 0;
 }
@@ -187,4 +197,5 @@
     .close          = decode_end,
     .decode         = decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };

diff --git a/libavcodec/ulti.c b/libavcodec/ulti.c
index 9e4c088..9318af0 100644
--- a/libavcodec/ulti.c
+++ b/libavcodec/ulti.c

@@ -62,7 +62,8 @@
     return 0;
 }
 
-static av_cold int ulti_decode_end(AVCodecContext *avctx){
+static av_cold int ulti_decode_end(AVCodecContext *avctx)
+{
     UltimotionDecodeContext *s = avctx->priv_data;
 
     av_frame_free(&s->frame);

diff --git a/libavcodec/unary.h b/libavcodec/unary.h
index 908dc93..d57f9f7 100644
--- a/libavcodec/unary.h
+++ b/libavcodec/unary.h

@@ -28,7 +28,20 @@
  * @param gb GetBitContext
  * @param[in] stop The bitstop value (unary code of 1's or 0's)
  * @param[in] len Maximum length
- * @return Unary length/index
+ * @return unary 0 based code index. This is also the length in bits of the
+ * code excluding the stop bit.
+ * (in case len=1)
+ * 1            0
+ * 0            1
+ * (in case len=2)
+ * 1            0
+ * 01           1
+ * 00           2
+ * (in case len=3)
+ * 1            0
+ * 01           1
+ * 001          2
+ * 000          3
  */
 static inline int get_unary(GetBitContext *gb, int stop, int len)
 {

diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 0c47e76..285bfdb 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c

@@ -26,7 +26,6 @@
  */
 
 #include "config.h"
-#include "libavutil/atomic.h"
 #include "libavutil/attributes.h"
 #include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
@@ -45,8 +44,8 @@
 #include "libavutil/thread.h"
 #include "avcodec.h"
 #include "decode.h"
+#include "hwaccel.h"
 #include "libavutil/opt.h"
-#include "me_cmp.h"
 #include "mpegvideo.h"
 #include "thread.h"
 #include "frame_thread_encoder.h"
@@ -56,6 +55,7 @@
 #include "version.h"
 #include <stdlib.h>
 #include <stdarg.h>
+#include <stdatomic.h>
 #include <limits.h>
 #include <float.h>
 #if CONFIG_ICONV
@@ -65,58 +65,7 @@
 #include "libavutil/ffversion.h"
 const char av_codec_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
 
-#if HAVE_PTHREADS || HAVE_W32THREADS || HAVE_OS2THREADS
-static int default_lockmgr_cb(void **arg, enum AVLockOp op)
-{
-    void * volatile * mutex = arg;
-    int err;
-
-    switch (op) {
-    case AV_LOCK_CREATE:
-        return 0;
-    case AV_LOCK_OBTAIN:
-        if (!*mutex) {
-            pthread_mutex_t *tmp = av_malloc(sizeof(pthread_mutex_t));
-            if (!tmp)
-                return AVERROR(ENOMEM);
-            if ((err = pthread_mutex_init(tmp, NULL))) {
-                av_free(tmp);
-                return AVERROR(err);
-            }
-            if (avpriv_atomic_ptr_cas(mutex, NULL, tmp)) {
-                pthread_mutex_destroy(tmp);
-                av_free(tmp);
-            }
-        }
-
-        if ((err = pthread_mutex_lock(*mutex)))
-            return AVERROR(err);
-
-        return 0;
-    case AV_LOCK_RELEASE:
-        if ((err = pthread_mutex_unlock(*mutex)))
-            return AVERROR(err);
-
-        return 0;
-    case AV_LOCK_DESTROY:
-        if (*mutex)
-            pthread_mutex_destroy(*mutex);
-        av_free(*mutex);
-        avpriv_atomic_ptr_cas(mutex, *mutex, NULL);
-        return 0;
-    }
-    return 1;
-}
-static int (*lockmgr_cb)(void **mutex, enum AVLockOp op) = default_lockmgr_cb;
-#else
-static int (*lockmgr_cb)(void **mutex, enum AVLockOp op) = NULL;
-#endif
-
-
-volatile int ff_avcodec_locked;
-static int volatile entangled_thread_counter = 0;
-static void *codec_mutex;
-static void *avformat_mutex;
+static AVMutex codec_mutex = AV_MUTEX_INITIALIZER;
 
 void av_fast_padded_malloc(void *ptr, unsigned int *size, size_t min_size)
 {
@@ -142,30 +91,6 @@
         memset(*p, 0, min_size + AV_INPUT_BUFFER_PADDING_SIZE);
 }
 
-/* encoder management */
-static AVCodec *first_avcodec = NULL;
-static AVCodec **last_avcodec = &first_avcodec;
-
-AVCodec *av_codec_next(const AVCodec *c)
-{
-    if (c)
-        return c->next;
-    else
-        return first_avcodec;
-}
-
-static av_cold void avcodec_init(void)
-{
-    static int initialized = 0;
-
-    if (initialized != 0)
-        return;
-    initialized = 1;
-
-    if (CONFIG_ME_CMP)
-        ff_me_cmp_init_static();
-}
-
 int av_codec_is_encoder(const AVCodec *codec)
 {
     return codec && (codec->encode_sub || codec->encode2 ||codec->send_frame);
@@ -176,38 +101,6 @@
     return codec && (codec->decode || codec->receive_frame);
 }
 
-av_cold void avcodec_register(AVCodec *codec)
-{
-    AVCodec **p;
-    avcodec_init();
-    p = last_avcodec;
-    codec->next = NULL;
-
-    while(*p || avpriv_atomic_ptr_cas((void * volatile *)p, NULL, codec))
-        p = &(*p)->next;
-    last_avcodec = &codec->next;
-
-    if (codec->init_static_data)
-        codec->init_static_data(codec);
-}
-
-#if FF_API_EMU_EDGE
-unsigned avcodec_get_edge_width(void)
-{
-    return EDGE_WIDTH;
-}
-#endif
-
-#if FF_API_SET_DIMENSIONS
-void avcodec_set_dimensions(AVCodecContext *s, int width, int height)
-{
-    int ret = ff_set_dimensions(s, width, height);
-    if (ret < 0) {
-        av_log(s, AV_LOG_WARNING, "Failed to set dimensions %d %d\n", width, height);
-    }
-}
-#endif
-
 int ff_set_dimensions(AVCodecContext *s, int width, int height)
 {
     int ret = av_image_check_size2(width, height, s->max_pixels, AV_PIX_FMT_NONE, 0, s);
@@ -419,7 +312,10 @@
 
     *width  = FFALIGN(*width, w_align);
     *height = FFALIGN(*height, h_align);
-    if (s->codec_id == AV_CODEC_ID_H264 || s->lowres) {
+    if (s->codec_id == AV_CODEC_ID_H264 || s->lowres ||
+        s->codec_id == AV_CODEC_ID_VP5  || s->codec_id == AV_CODEC_ID_VP6 ||
+        s->codec_id == AV_CODEC_ID_VP6F || s->codec_id == AV_CODEC_ID_VP6A
+    ) {
         // some of the optimized chroma MC reads one line too much
         // which is also done in mpeg decoders with lowres > 0
         *height += 2;
@@ -569,6 +465,7 @@
     return AV_PIX_FMT_NONE;
 }
 
+#if FF_API_CODEC_GET_SET
 MAKE_ACCESSORS(AVCodecContext, codec, AVRational, pkt_timebase)
 MAKE_ACCESSORS(AVCodecContext, codec, const AVCodecDescriptor *, codec_descriptor)
 MAKE_ACCESSORS(AVCodecContext, codec, int, lowres)
@@ -584,6 +481,7 @@
 {
     return codec->max_lowres;
 }
+#endif
 
 int avpriv_codec_get_cap_skip_frame_fill_param(const AVCodec *codec){
     return !!(codec->caps_internal & FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM);
@@ -612,6 +510,19 @@
     return bit_rate;
 }
 
+
+static void ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec)
+{
+    if (!(codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE) && codec->init)
+        ff_mutex_lock(&codec_mutex);
+}
+
+static void ff_unlock_avcodec(const AVCodec *codec)
+{
+    if (!(codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE) && codec->init)
+        ff_mutex_unlock(&codec_mutex);
+}
+
 int attribute_align_arg ff_codec_open2_recursive(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options)
 {
     int ret = 0;
@@ -651,9 +562,7 @@
     if (options)
         av_dict_copy(&tmp, *options, 0);
 
-    ret = ff_lock_avcodec(avctx, codec);
-    if (ret < 0)
-        return ret;
+    ff_lock_avcodec(avctx, codec);
 
     avctx->internal = av_mallocz(sizeof(*avctx->internal));
     if (!avctx->internal) {
@@ -765,6 +674,7 @@
         av_freep(&avctx->subtitle_header);
 
     if (avctx->channels > FF_SANE_NB_CHANNELS) {
+        av_log(avctx, AV_LOG_ERROR, "Too many channels: %d\n", avctx->channels);
         ret = AVERROR(EINVAL);
         goto free_and_end;
     }
@@ -817,6 +727,12 @@
             goto free_and_end;
     }
 
+    if (av_codec_is_decoder(avctx->codec)) {
+        ret = ff_decode_bsfs_init(avctx);
+        if (ret < 0)
+            goto free_and_end;
+    }
+
     if (HAVE_THREADS
         && !(avctx->internal->frame_thread_encoder && (avctx->active_thread_type&FF_THREAD_FRAME))) {
         ret = ff_thread_init(avctx);
@@ -833,12 +749,6 @@
         avctx->lowres = avctx->codec->max_lowres;
     }
 
-#if FF_API_VISMV
-    if (avctx->debug_mv)
-        av_log(avctx, AV_LOG_WARNING, "The 'vismv' option is deprecated, "
-               "see the codecview filter instead.\n");
-#endif
-
     if (av_codec_is_encoder(avctx->codec)) {
         int i;
 #if FF_API_CODED_FRAME
@@ -1025,11 +935,6 @@
 
     ret=0;
 
-#if FF_API_AUDIOENC_DELAY
-    if (av_codec_is_encoder(avctx->codec))
-        avctx->delay = avctx->initial_padding;
-#endif
-
     if (av_codec_is_decoder(avctx->codec)) {
         if (!avctx->bit_rate)
             avctx->bit_rate = get_bit_rate(avctx);
@@ -1133,6 +1038,7 @@
         av_packet_free(&avctx->internal->last_pkt_props);
 
         av_packet_free(&avctx->internal->ds.in_pkt);
+        ff_decode_bsfs_uninit(avctx);
 
         av_freep(&avctx->internal->pool);
     }
@@ -1226,71 +1132,6 @@
     return 0;
 }
 
-static enum AVCodecID remap_deprecated_codec_id(enum AVCodecID id)
-{
-    switch(id){
-        //This is for future deprecatec codec ids, its empty since
-        //last major bump but will fill up again over time, please don't remove it
-        default                                         : return id;
-    }
-}
-
-static AVCodec *find_encdec(enum AVCodecID id, int encoder)
-{
-    AVCodec *p, *experimental = NULL;
-    p = first_avcodec;
-    id= remap_deprecated_codec_id(id);
-    while (p) {
-        if ((encoder ? av_codec_is_encoder(p) : av_codec_is_decoder(p)) &&
-            p->id == id) {
-            if (p->capabilities & AV_CODEC_CAP_EXPERIMENTAL && !experimental) {
-                experimental = p;
-            } else
-                return p;
-        }
-        p = p->next;
-    }
-    return experimental;
-}
-
-AVCodec *avcodec_find_encoder(enum AVCodecID id)
-{
-    return find_encdec(id, 1);
-}
-
-AVCodec *avcodec_find_encoder_by_name(const char *name)
-{
-    AVCodec *p;
-    if (!name)
-        return NULL;
-    p = first_avcodec;
-    while (p) {
-        if (av_codec_is_encoder(p) && strcmp(name, p->name) == 0)
-            return p;
-        p = p->next;
-    }
-    return NULL;
-}
-
-AVCodec *avcodec_find_decoder(enum AVCodecID id)
-{
-    return find_encdec(id, 0);
-}
-
-AVCodec *avcodec_find_decoder_by_name(const char *name)
-{
-    AVCodec *p;
-    if (!name)
-        return NULL;
-    p = first_avcodec;
-    while (p) {
-        if (av_codec_is_decoder(p) && strcmp(name, p->name) == 0)
-            return p;
-        p = p->next;
-    }
-    return NULL;
-}
-
 const char *avcodec_get_name(enum AVCodecID id)
 {
     const AVCodecDescriptor *cd;
@@ -1703,6 +1544,7 @@
     case AV_CODEC_ID_GSM_MS:       return  320;
     case AV_CODEC_ID_MP1:          return  384;
     case AV_CODEC_ID_ATRAC1:       return  512;
+    case AV_CODEC_ID_ATRAC9:
     case AV_CODEC_ID_ATRAC3:       return 1024 * framecount;
     case AV_CODEC_ID_ATRAC3P:      return 2048;
     case AV_CODEC_ID_MP2:
@@ -1836,13 +1678,13 @@
                 /* calc from frame_bytes, channels, and bits_per_coded_sample */
                 switch (id) {
                 case AV_CODEC_ID_PCM_DVD:
-                    if(bps<4)
+                    if(bps<4 || frame_bytes<3)
                         return 0;
-                    return 2 * (frame_bytes / ((bps * 2 / 8) * ch));
+                    return 2 * ((frame_bytes - 3) / ((bps * 2 / 8) * ch));
                 case AV_CODEC_ID_PCM_BLURAY:
-                    if(bps<4)
+                    if(bps<4 || frame_bytes<4)
                         return 0;
-                    return frame_bytes / ((FFALIGN(ch, 2) * bps) / 8);
+                    return (frame_bytes - 4) / ((FFALIGN(ch, 2) * bps) / 8);
                 case AV_CODEC_ID_S302M:
                     return 2 * (frame_bytes / ((bps + 4) / 4)) / ch;
                 }
@@ -1911,143 +1753,34 @@
     return i;
 }
 
-#if FF_API_MISSING_SAMPLE
-FF_DISABLE_DEPRECATION_WARNINGS
-void av_log_missing_feature(void *avc, const char *feature, int want_sample)
+const AVCodecHWConfig *avcodec_get_hw_config(const AVCodec *codec, int index)
 {
-    av_log(avc, AV_LOG_WARNING, "%s is not implemented. Update your FFmpeg "
-            "version to the newest one from Git. If the problem still "
-            "occurs, it means that your file has a feature which has not "
-            "been implemented.\n", feature);
-    if(want_sample)
-        av_log_ask_for_sample(avc, NULL);
+    int i;
+    if (!codec->hw_configs || index < 0)
+        return NULL;
+    for (i = 0; i <= index; i++)
+        if (!codec->hw_configs[i])
+            return NULL;
+    return &codec->hw_configs[index]->public;
 }
 
-void av_log_ask_for_sample(void *avc, const char *msg, ...)
+#if FF_API_USER_VISIBLE_AVHWACCEL
+AVHWAccel *av_hwaccel_next(const AVHWAccel *hwaccel)
 {
-    va_list argument_list;
-
-    va_start(argument_list, msg);
-
-    if (msg)
-        av_vlog(avc, AV_LOG_WARNING, msg, argument_list);
-    av_log(avc, AV_LOG_WARNING, "If you want to help, upload a sample "
-            "of this file to ftp://upload.ffmpeg.org/incoming/ "
-            "and contact the ffmpeg-devel mailing list. (ffmpeg-devel@ffmpeg.org)\n");
-
-    va_end(argument_list);
+    return NULL;
 }
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_MISSING_SAMPLE */
-
-static AVHWAccel *first_hwaccel = NULL;
-static AVHWAccel **last_hwaccel = &first_hwaccel;
 
 void av_register_hwaccel(AVHWAccel *hwaccel)
 {
-    AVHWAccel **p = last_hwaccel;
-    hwaccel->next = NULL;
-    while(*p || avpriv_atomic_ptr_cas((void * volatile *)p, NULL, hwaccel))
-        p = &(*p)->next;
-    last_hwaccel = &hwaccel->next;
 }
+#endif
 
-AVHWAccel *av_hwaccel_next(const AVHWAccel *hwaccel)
-{
-    return hwaccel ? hwaccel->next : first_hwaccel;
-}
-
+#if FF_API_LOCKMGR
 int av_lockmgr_register(int (*cb)(void **mutex, enum AVLockOp op))
 {
-    if (lockmgr_cb) {
-        // There is no good way to rollback a failure to destroy the
-        // mutex, so we ignore failures.
-        lockmgr_cb(&codec_mutex,    AV_LOCK_DESTROY);
-        lockmgr_cb(&avformat_mutex, AV_LOCK_DESTROY);
-        lockmgr_cb     = NULL;
-        codec_mutex    = NULL;
-        avformat_mutex = NULL;
-    }
-
-    if (cb) {
-        void *new_codec_mutex    = NULL;
-        void *new_avformat_mutex = NULL;
-        int err;
-        if (err = cb(&new_codec_mutex, AV_LOCK_CREATE)) {
-            return err > 0 ? AVERROR_UNKNOWN : err;
-        }
-        if (err = cb(&new_avformat_mutex, AV_LOCK_CREATE)) {
-            // Ignore failures to destroy the newly created mutex.
-            cb(&new_codec_mutex, AV_LOCK_DESTROY);
-            return err > 0 ? AVERROR_UNKNOWN : err;
-        }
-        lockmgr_cb     = cb;
-        codec_mutex    = new_codec_mutex;
-        avformat_mutex = new_avformat_mutex;
-    }
-
     return 0;
 }
-
-int ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec)
-{
-    if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || !codec->init)
-        return 0;
-
-    if (lockmgr_cb) {
-        if ((*lockmgr_cb)(&codec_mutex, AV_LOCK_OBTAIN))
-            return -1;
-    }
-
-    if (avpriv_atomic_int_add_and_fetch(&entangled_thread_counter, 1) != 1) {
-        av_log(log_ctx, AV_LOG_ERROR,
-               "Insufficient thread locking. At least %d threads are "
-               "calling avcodec_open2() at the same time right now.\n",
-               entangled_thread_counter);
-        if (!lockmgr_cb)
-            av_log(log_ctx, AV_LOG_ERROR, "No lock manager is set, please see av_lockmgr_register()\n");
-        ff_avcodec_locked = 1;
-        ff_unlock_avcodec(codec);
-        return AVERROR(EINVAL);
-    }
-    av_assert0(!ff_avcodec_locked);
-    ff_avcodec_locked = 1;
-    return 0;
-}
-
-int ff_unlock_avcodec(const AVCodec *codec)
-{
-    if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || !codec->init)
-        return 0;
-
-    av_assert0(ff_avcodec_locked);
-    ff_avcodec_locked = 0;
-    avpriv_atomic_int_add_and_fetch(&entangled_thread_counter, -1);
-    if (lockmgr_cb) {
-        if ((*lockmgr_cb)(&codec_mutex, AV_LOCK_RELEASE))
-            return -1;
-    }
-
-    return 0;
-}
-
-int avpriv_lock_avformat(void)
-{
-    if (lockmgr_cb) {
-        if ((*lockmgr_cb)(&avformat_mutex, AV_LOCK_OBTAIN))
-            return -1;
-    }
-    return 0;
-}
-
-int avpriv_unlock_avformat(void)
-{
-    if (lockmgr_cb) {
-        if ((*lockmgr_cb)(&avformat_mutex, AV_LOCK_RELEASE))
-            return -1;
-    }
-    return 0;
-}
+#endif
 
 unsigned int avpriv_toupper4(unsigned int x)
 {

diff --git a/libavcodec/utvideo.h b/libavcodec/utvideo.h
index a811785..cf0bb28 100644
--- a/libavcodec/utvideo.h
+++ b/libavcodec/utvideo.h

@@ -72,17 +72,23 @@
     LLVidDSPContext llviddsp;
     LLVidEncDSPContext llvidencdsp;
 
-    uint32_t frame_info_size, flags, frame_info;
+    uint32_t frame_info_size, flags, frame_info, offset;
     int      planes;
     int      slices;
     int      compression;
     int      interlaced;
     int      frame_pred;
     int      pro;
+    int      pack;
 
     ptrdiff_t slice_stride;
     uint8_t *slice_bits, *slice_buffer[4];
     int      slice_bits_size;
+
+    const uint8_t *packed_stream[4][256];
+    size_t packed_stream_size[4][256];
+    const uint8_t *control_stream[4][256];
+    size_t control_stream_size[4][256];
 } UtvideoContext;
 
 typedef struct HuffEntry {

diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c
index d888cc3..3891df3 100644
--- a/libavcodec/utvideodec.c
+++ b/libavcodec/utvideodec.c

@@ -27,9 +27,11 @@
 #include <inttypes.h>
 #include <stdlib.h>
 
+#define CACHED_BITSTREAM_READER !ARCH_X86_32
 #define UNCHECKED_BITSTREAM_READER 1
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/pixdesc.h"
 #include "avcodec.h"
 #include "bswapdsp.h"
 #include "bytestream.h"
@@ -126,7 +128,7 @@
 }
 
 static int decode_plane10(UtvideoContext *c, int plane_no,
-                          uint16_t *dst, int step, ptrdiff_t stride,
+                          uint16_t *dst, ptrdiff_t stride,
                           int width, int height,
                           const uint8_t *src, const uint8_t *huff,
                           int use_pred)
@@ -152,7 +154,7 @@
 
             prev = 0x200;
             for (j = sstart; j < send; j++) {
-                for (i = 0; i < width * step; i += step) {
+                for (i = 0; i < width; i++) {
                     pix = fsym;
                     if (use_pred) {
                         prev += pix;
@@ -195,8 +197,7 @@
 
         prev = 0x200;
         for (j = sstart; j < send; j++) {
-            int ws = width * step;
-            for (i = 0; i < ws; i += step) {
+            for (i = 0; i < width; i++) {
                 pix = get_vlc2(&gb, vlc.table, VLC_BITS, 3);
                 if (pix < 0) {
                     av_log(c->avctx, AV_LOG_ERROR, "Decoding error\n");
@@ -229,8 +230,18 @@
     return AVERROR_INVALIDDATA;
 }
 
+static int compute_cmask(int plane_no, int interlaced, enum AVPixelFormat pix_fmt)
+{
+    const int is_luma = (pix_fmt == AV_PIX_FMT_YUV420P) && !plane_no;
+
+    if (interlaced)
+        return ~(1 + 2 * is_luma);
+
+    return ~is_luma;
+}
+
 static int decode_plane(UtvideoContext *c, int plane_no,
-                        uint8_t *dst, int step, ptrdiff_t stride,
+                        uint8_t *dst, ptrdiff_t stride,
                         int width, int height,
                         const uint8_t *src, int use_pred)
 {
@@ -238,8 +249,55 @@
     int sstart, send;
     VLC vlc;
     GetBitContext gb;
-    int prev, fsym;
-    const int cmask = c->interlaced ? ~(1 + 2 * (!plane_no && c->avctx->pix_fmt == AV_PIX_FMT_YUV420P)) : ~(!plane_no && c->avctx->pix_fmt == AV_PIX_FMT_YUV420P);
+    int ret, prev, fsym;
+    const int cmask = compute_cmask(plane_no, c->interlaced, c->avctx->pix_fmt);
+
+    if (c->pack) {
+        send = 0;
+        for (slice = 0; slice < c->slices; slice++) {
+            GetBitContext cbit, pbit;
+            uint8_t *dest, *p;
+
+            ret = init_get_bits8(&cbit, c->control_stream[plane_no][slice], c->control_stream_size[plane_no][slice]);
+            if (ret < 0)
+                return ret;
+
+            ret = init_get_bits8(&pbit, c->packed_stream[plane_no][slice], c->packed_stream_size[plane_no][slice]);
+            if (ret < 0)
+                return ret;
+
+            sstart = send;
+            send   = (height * (slice + 1) / c->slices) & cmask;
+            dest   = dst + sstart * stride;
+
+            if (3 * ((dst + send * stride - dest + 7)/8) > get_bits_left(&cbit))
+                return AVERROR_INVALIDDATA;
+
+            for (p = dest; p < dst + send * stride; p += 8) {
+                int bits = get_bits_le(&cbit, 3);
+
+                if (bits == 0) {
+                    *(uint64_t *) p = 0;
+                } else {
+                    uint32_t sub = 0x80 >> (8 - (bits + 1)), add;
+                    int k;
+
+                    if ((bits + 1) * 8 > get_bits_left(&pbit))
+                        return AVERROR_INVALIDDATA;
+
+                    for (k = 0; k < 8; k++) {
+
+                        p[k] = get_bits_le(&pbit, bits + 1);
+                        add = (~p[k] & sub) << (8 - bits);
+                        p[k] -= sub;
+                        p[k] += add;
+                    }
+                }
+            }
+        }
+
+        return 0;
+    }
 
     if (build_huff(src, &vlc, &fsym)) {
         av_log(c->avctx, AV_LOG_ERROR, "Cannot build Huffman codes\n");
@@ -256,7 +314,7 @@
 
             prev = 0x80;
             for (j = sstart; j < send; j++) {
-                for (i = 0; i < width * step; i += step) {
+                for (i = 0; i < width; i++) {
                     pix = fsym;
                     if (use_pred) {
                         prev += pix;
@@ -300,8 +358,7 @@
 
         prev = 0x80;
         for (j = sstart; j < send; j++) {
-            int ws = width * step;
-            for (i = 0; i < ws; i += step) {
+            for (i = 0; i < width; i++) {
                 pix = get_vlc2(&gb, vlc.table, VLC_BITS, 3);
                 if (pix < 0) {
                     av_log(c->avctx, AV_LOG_ERROR, "Decoding error\n");
@@ -365,12 +422,16 @@
         C        = bsrc[-stride];
         bsrc[0] += C;
         A        = bsrc[0];
-        for (i = 1; i < width; i++) {
+        for (i = 1; i < FFMIN(width, 16); i++) { /* scalar loop (DSP need align 16) */
             B        = bsrc[i - stride];
             bsrc[i] += mid_pred(A, B, (uint8_t)(A + B - C));
             C        = B;
             A        = bsrc[i];
         }
+        if (width > 16)
+            c->llviddsp.add_median_pred(bsrc + 16, bsrc - stride + 16,
+                                        bsrc + 16, width - 16, &A, &B);
+
         bsrc += stride;
         // the rest of lines use continuous median prediction
         for (j = 2; j < slice_height; j++) {
@@ -416,12 +477,16 @@
         C        = bsrc[-stride2];
         bsrc[0] += C;
         A        = bsrc[0];
-        for (i = 1; i < width; i++) {
+        for (i = 1; i < FFMIN(width, 16); i++) { /* scalar loop (DSP need align 16) */
             B        = bsrc[i - stride2];
             bsrc[i] += mid_pred(A, B, (uint8_t)(A + B - C));
             C        = B;
             A        = bsrc[i];
         }
+        if (width > 16)
+            c->llviddsp.add_median_pred(bsrc + 16, bsrc - stride2 + 16,
+                                        bsrc + 16, width - 16, &A, &B);
+
         c->llviddsp.add_median_pred(bsrc + stride, bsrc - stride,
                                         bsrc + stride, width, &A, &B);
         bsrc += stride2;
@@ -444,6 +509,7 @@
     uint8_t *bsrc;
     int slice_start, slice_height;
     const int cmask = ~rmode;
+    int min_width = FFMIN(width, 32);
 
     for (slice = 0; slice < slices; slice++) {
         slice_start  = ((slice * height) / slices) & cmask;
@@ -463,12 +529,14 @@
         for (j = 1; j < slice_height; j++) {
             // second line - first element has top prediction, the rest uses gradient
             bsrc[0] = (bsrc[0] + bsrc[-stride]) & 0xFF;
-            for (i = 1; i < width; i++) {
+            for (i = 1; i < min_width; i++) { /* dsp need align 32 */
                 A = bsrc[i - stride];
                 B = bsrc[i - (stride + 1)];
                 C = bsrc[i - 1];
                 bsrc[i] = (A - B + C + bsrc[i]) & 0xFF;
             }
+            if (width > 32)
+                c->llviddsp.add_gradient_pred(bsrc + 32, stride, width - 32);
             bsrc += stride;
         }
     }
@@ -483,6 +551,7 @@
     int slice_start, slice_height;
     const int cmask   = ~(rmode ? 3 : 1);
     const ptrdiff_t stride2 = stride << 1;
+    int min_width = FFMIN(width, 32);
 
     for (slice = 0; slice < slices; slice++) {
         slice_start    = ((slice * height) / slices) & cmask;
@@ -504,12 +573,15 @@
         for (j = 1; j < slice_height; j++) {
             // second line - first element has top prediction, the rest uses gradient
             bsrc[0] = (bsrc[0] + bsrc[-stride2]) & 0xFF;
-            for (i = 1; i < width; i++) {
+            for (i = 1; i < min_width; i++) { /* dsp need align 32 */
                 A = bsrc[i - stride2];
                 B = bsrc[i - (stride2 + 1)];
                 C = bsrc[i - 1];
                 bsrc[i] = (A - B + C + bsrc[i]) & 0xFF;
             }
+            if (width > 32)
+                c->llviddsp.add_gradient_pred(bsrc + 32, stride2, width - 32);
+
             A = bsrc[-stride];
             B = bsrc[-(1 + stride + stride - width)];
             C = bsrc[width - 1];
@@ -543,7 +615,58 @@
 
     /* parse plane structure to get frame flags and validate slice offsets */
     bytestream2_init(&gb, buf, buf_size);
-    if (c->pro) {
+
+    if (c->pack) {
+        const uint8_t *packed_stream;
+        const uint8_t *control_stream;
+        GetByteContext pb;
+        uint32_t nb_cbs;
+        int left;
+
+        c->frame_info = PRED_GRADIENT << 8;
+
+        if (bytestream2_get_byte(&gb) != 1)
+            return AVERROR_INVALIDDATA;
+        bytestream2_skip(&gb, 3);
+        c->offset = bytestream2_get_le32(&gb);
+
+        if (buf_size <= c->offset + 8LL)
+            return AVERROR_INVALIDDATA;
+
+        bytestream2_init(&pb, buf + 8 + c->offset, buf_size - 8 - c->offset);
+
+        nb_cbs = bytestream2_get_le32(&pb);
+        if (nb_cbs > c->offset)
+            return AVERROR_INVALIDDATA;
+
+        packed_stream = buf + 8;
+        control_stream = packed_stream + (c->offset - nb_cbs);
+        left = control_stream - packed_stream;
+
+        for (i = 0; i < c->planes; i++) {
+            for (j = 0; j < c->slices; j++) {
+                c->packed_stream[i][j] = packed_stream;
+                c->packed_stream_size[i][j] = bytestream2_get_le32(&pb);
+                if (c->packed_stream_size[i][j] > left)
+                    return AVERROR_INVALIDDATA;
+                left -= c->packed_stream_size[i][j];
+                packed_stream += c->packed_stream_size[i][j];
+            }
+        }
+
+        left = buf + buf_size - control_stream;
+
+        for (i = 0; i < c->planes; i++) {
+            for (j = 0; j < c->slices; j++) {
+                c->control_stream[i][j] = control_stream;
+                c->control_stream_size[i][j] = bytestream2_get_le32(&pb);
+                if (c->control_stream_size[i][j] > left)
+                    return AVERROR_INVALIDDATA;
+                left -= c->control_stream_size[i][j];
+                control_stream += c->control_stream_size[i][j];
+            }
+        }
+    } else if (c->pro) {
         if (bytestream2_get_bytes_left(&gb) < c->frame_info_size) {
             av_log(avctx, AV_LOG_ERROR, "Not enough data for frame information\n");
             return AVERROR_INVALIDDATA;
@@ -612,19 +735,21 @@
 
     max_slice_size += 4*avctx->width;
 
-    av_fast_malloc(&c->slice_bits, &c->slice_bits_size,
-                   max_slice_size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!c->pack) {
+        av_fast_malloc(&c->slice_bits, &c->slice_bits_size,
+                       max_slice_size + AV_INPUT_BUFFER_PADDING_SIZE);
 
-    if (!c->slice_bits) {
-        av_log(avctx, AV_LOG_ERROR, "Cannot allocate temporary buffer\n");
-        return AVERROR(ENOMEM);
+        if (!c->slice_bits) {
+            av_log(avctx, AV_LOG_ERROR, "Cannot allocate temporary buffer\n");
+            return AVERROR(ENOMEM);
+        }
     }
 
     switch (c->avctx->pix_fmt) {
     case AV_PIX_FMT_GBRP:
     case AV_PIX_FMT_GBRAP:
         for (i = 0; i < c->planes; i++) {
-            ret = decode_plane(c, i, frame.f->data[i], 1,
+            ret = decode_plane(c, i, frame.f->data[i],
                                frame.f->linesize[i], avctx->width,
                                avctx->height, plane_start[i],
                                c->frame_pred == PRED_LEFT);
@@ -661,7 +786,7 @@
     case AV_PIX_FMT_GBRAP10:
     case AV_PIX_FMT_GBRP10:
         for (i = 0; i < c->planes; i++) {
-            ret = decode_plane10(c, i, (uint16_t *)frame.f->data[i], 1,
+            ret = decode_plane10(c, i, (uint16_t *)frame.f->data[i],
                                  frame.f->linesize[i] / 2, avctx->width,
                                  avctx->height, plane_start[i],
                                  plane_start[i + 1] - 1024,
@@ -675,7 +800,7 @@
         break;
     case AV_PIX_FMT_YUV420P:
         for (i = 0; i < 3; i++) {
-            ret = decode_plane(c, i, frame.f->data[i], 1, frame.f->linesize[i],
+            ret = decode_plane(c, i, frame.f->data[i], frame.f->linesize[i],
                                avctx->width >> !!i, avctx->height >> !!i,
                                plane_start[i], c->frame_pred == PRED_LEFT);
             if (ret)
@@ -707,7 +832,7 @@
         break;
     case AV_PIX_FMT_YUV422P:
         for (i = 0; i < 3; i++) {
-            ret = decode_plane(c, i, frame.f->data[i], 1, frame.f->linesize[i],
+            ret = decode_plane(c, i, frame.f->data[i], frame.f->linesize[i],
                                avctx->width >> !!i, avctx->height,
                                plane_start[i], c->frame_pred == PRED_LEFT);
             if (ret)
@@ -737,7 +862,7 @@
         break;
     case AV_PIX_FMT_YUV444P:
         for (i = 0; i < 3; i++) {
-            ret = decode_plane(c, i, frame.f->data[i], 1, frame.f->linesize[i],
+            ret = decode_plane(c, i, frame.f->data[i], frame.f->linesize[i],
                                avctx->width, avctx->height,
                                plane_start[i], c->frame_pred == PRED_LEFT);
             if (ret)
@@ -767,7 +892,7 @@
         break;
     case AV_PIX_FMT_YUV422P10:
         for (i = 0; i < 3; i++) {
-            ret = decode_plane10(c, i, (uint16_t *)frame.f->data[i], 1, frame.f->linesize[i] / 2,
+            ret = decode_plane10(c, i, (uint16_t *)frame.f->data[i], frame.f->linesize[i] / 2,
                                  avctx->width >> !!i, avctx->height,
                                  plane_start[i], plane_start[i + 1] - 1024, c->frame_pred == PRED_LEFT);
             if (ret)
@@ -789,6 +914,7 @@
 static av_cold int decode_init(AVCodecContext *avctx)
 {
     UtvideoContext * const c = avctx->priv_data;
+    int h_shift, v_shift;
 
     c->avctx = avctx;
 
@@ -796,37 +922,6 @@
     ff_bswapdsp_init(&c->bdsp);
     ff_llviddsp_init(&c->llviddsp);
 
-    if (avctx->extradata_size >= 16) {
-        av_log(avctx, AV_LOG_DEBUG, "Encoder version %d.%d.%d.%d\n",
-               avctx->extradata[3], avctx->extradata[2],
-               avctx->extradata[1], avctx->extradata[0]);
-        av_log(avctx, AV_LOG_DEBUG, "Original format %"PRIX32"\n",
-               AV_RB32(avctx->extradata + 4));
-        c->frame_info_size = AV_RL32(avctx->extradata + 8);
-        c->flags           = AV_RL32(avctx->extradata + 12);
-
-        if (c->frame_info_size != 4)
-            avpriv_request_sample(avctx, "Frame info not 4 bytes");
-        av_log(avctx, AV_LOG_DEBUG, "Encoding parameters %08"PRIX32"\n", c->flags);
-        c->slices      = (c->flags >> 24) + 1;
-        c->compression = c->flags & 1;
-        c->interlaced  = c->flags & 0x800;
-    } else if (avctx->extradata_size == 8) {
-        av_log(avctx, AV_LOG_DEBUG, "Encoder version %d.%d.%d.%d\n",
-               avctx->extradata[3], avctx->extradata[2],
-               avctx->extradata[1], avctx->extradata[0]);
-        av_log(avctx, AV_LOG_DEBUG, "Original format %"PRIX32"\n",
-               AV_RB32(avctx->extradata + 4));
-        c->interlaced  = 0;
-        c->pro         = 1;
-        c->frame_info_size = 4;
-    } else {
-        av_log(avctx, AV_LOG_ERROR,
-               "Insufficient extradata size %d, should be at least 16\n",
-               avctx->extradata_size);
-        return AVERROR_INVALIDDATA;
-    }
-
     c->slice_bits_size = 0;
 
     switch (avctx->codec_tag) {
@@ -855,14 +950,17 @@
         break;
     case MKTAG('U', 'Q', 'Y', '2'):
         c->planes      = 3;
+        c->pro         = 1;
         avctx->pix_fmt = AV_PIX_FMT_YUV422P10;
         break;
     case MKTAG('U', 'Q', 'R', 'G'):
         c->planes      = 3;
+        c->pro         = 1;
         avctx->pix_fmt = AV_PIX_FMT_GBRP10;
         break;
     case MKTAG('U', 'Q', 'R', 'A'):
         c->planes      = 4;
+        c->pro         = 1;
         avctx->pix_fmt = AV_PIX_FMT_GBRAP10;
         break;
     case MKTAG('U', 'L', 'H', '0'):
@@ -880,12 +978,93 @@
         avctx->pix_fmt = AV_PIX_FMT_YUV444P;
         avctx->colorspace = AVCOL_SPC_BT709;
         break;
+    case MKTAG('U', 'M', 'Y', '2'):
+        c->planes      = 3;
+        c->pack        = 1;
+        avctx->pix_fmt = AV_PIX_FMT_YUV422P;
+        avctx->colorspace = AVCOL_SPC_BT470BG;
+        break;
+    case MKTAG('U', 'M', 'H', '2'):
+        c->planes      = 3;
+        c->pack        = 1;
+        avctx->pix_fmt = AV_PIX_FMT_YUV422P;
+        avctx->colorspace = AVCOL_SPC_BT709;
+        break;
+    case MKTAG('U', 'M', 'Y', '4'):
+        c->planes      = 3;
+        c->pack        = 1;
+        avctx->pix_fmt = AV_PIX_FMT_YUV444P;
+        avctx->colorspace = AVCOL_SPC_BT470BG;
+        break;
+    case MKTAG('U', 'M', 'H', '4'):
+        c->planes      = 3;
+        c->pack        = 1;
+        avctx->pix_fmt = AV_PIX_FMT_YUV444P;
+        avctx->colorspace = AVCOL_SPC_BT709;
+        break;
+    case MKTAG('U', 'M', 'R', 'G'):
+        c->planes      = 3;
+        c->pack        = 1;
+        avctx->pix_fmt = AV_PIX_FMT_GBRP;
+        break;
+    case MKTAG('U', 'M', 'R', 'A'):
+        c->planes      = 4;
+        c->pack        = 1;
+        avctx->pix_fmt = AV_PIX_FMT_GBRAP;
+        break;
     default:
         av_log(avctx, AV_LOG_ERROR, "Unknown Ut Video FOURCC provided (%08X)\n",
                avctx->codec_tag);
         return AVERROR_INVALIDDATA;
     }
 
+    av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &h_shift, &v_shift);
+    if ((avctx->width  & ((1<<h_shift)-1)) ||
+        (avctx->height & ((1<<v_shift)-1))) {
+        avpriv_request_sample(avctx, "Odd dimensions");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    if (c->pack && avctx->extradata_size >= 16) {
+        av_log(avctx, AV_LOG_DEBUG, "Encoder version %d.%d.%d.%d\n",
+               avctx->extradata[3], avctx->extradata[2],
+               avctx->extradata[1], avctx->extradata[0]);
+        av_log(avctx, AV_LOG_DEBUG, "Original format %"PRIX32"\n",
+               AV_RB32(avctx->extradata + 4));
+        c->compression = avctx->extradata[8];
+        if (c->compression != 2)
+            avpriv_request_sample(avctx, "Unknown compression type");
+        c->slices      = avctx->extradata[9] + 1;
+    } else if (!c->pro && avctx->extradata_size >= 16) {
+        av_log(avctx, AV_LOG_DEBUG, "Encoder version %d.%d.%d.%d\n",
+               avctx->extradata[3], avctx->extradata[2],
+               avctx->extradata[1], avctx->extradata[0]);
+        av_log(avctx, AV_LOG_DEBUG, "Original format %"PRIX32"\n",
+               AV_RB32(avctx->extradata + 4));
+        c->frame_info_size = AV_RL32(avctx->extradata + 8);
+        c->flags           = AV_RL32(avctx->extradata + 12);
+
+        if (c->frame_info_size != 4)
+            avpriv_request_sample(avctx, "Frame info not 4 bytes");
+        av_log(avctx, AV_LOG_DEBUG, "Encoding parameters %08"PRIX32"\n", c->flags);
+        c->slices      = (c->flags >> 24) + 1;
+        c->compression = c->flags & 1;
+        c->interlaced  = c->flags & 0x800;
+    } else if (c->pro && avctx->extradata_size == 8) {
+        av_log(avctx, AV_LOG_DEBUG, "Encoder version %d.%d.%d.%d\n",
+               avctx->extradata[3], avctx->extradata[2],
+               avctx->extradata[1], avctx->extradata[0]);
+        av_log(avctx, AV_LOG_DEBUG, "Original format %"PRIX32"\n",
+               AV_RB32(avctx->extradata + 4));
+        c->interlaced  = 0;
+        c->frame_info_size = 4;
+    } else {
+        av_log(avctx, AV_LOG_ERROR,
+               "Insufficient extradata size %d, should be at least 16\n",
+               avctx->extradata_size);
+        return AVERROR_INVALIDDATA;
+    }
+
     return 0;
 }
 

diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c
index 840742c..db00e1e 100644
--- a/libavcodec/utvideoenc.c
+++ b/libavcodec/utvideoenc.c

@@ -67,12 +67,12 @@
     c->slice_stride    = FFALIGN(avctx->width, 32);
 
     switch (avctx->pix_fmt) {
-    case AV_PIX_FMT_RGB24:
+    case AV_PIX_FMT_GBRP:
         c->planes        = 3;
         avctx->codec_tag = MKTAG('U', 'L', 'R', 'G');
         original_format  = UTVIDEO_RGB;
         break;
-    case AV_PIX_FMT_RGBA:
+    case AV_PIX_FMT_GBRAP:
         c->planes        = 4;
         avctx->codec_tag = MKTAG('U', 'L', 'R', 'A');
         original_format  = UTVIDEO_RGBA;
@@ -243,53 +243,43 @@
 }
 
 static void mangle_rgb_planes(uint8_t *dst[4], ptrdiff_t dst_stride,
-                              uint8_t *src, int step, ptrdiff_t stride,
+                              uint8_t *const src[4], int planes, const int stride[4],
                               int width, int height)
 {
     int i, j;
     int k = 2 * dst_stride;
+    const uint8_t *sg = src[0];
+    const uint8_t *sb = src[1];
+    const uint8_t *sr = src[2];
+    const uint8_t *sa = src[3];
     unsigned int g;
 
     for (j = 0; j < height; j++) {
-        if (step == 3) {
-            for (i = 0; i < width * step; i += step) {
-                g         = src[i + 1];
+        if (planes == 3) {
+            for (i = 0; i < width; i++) {
+                g         = sg[i];
                 dst[0][k] = g;
                 g        += 0x80;
-                dst[1][k] = src[i + 2] - g;
-                dst[2][k] = src[i + 0] - g;
+                dst[1][k] = sb[i] - g;
+                dst[2][k] = sr[i] - g;
                 k++;
             }
         } else {
-            for (i = 0; i < width * step; i += step) {
-                g         = src[i + 1];
+            for (i = 0; i < width; i++) {
+                g         = sg[i];
                 dst[0][k] = g;
                 g        += 0x80;
-                dst[1][k] = src[i + 2] - g;
-                dst[2][k] = src[i + 0] - g;
-                dst[3][k] = src[i + 3];
+                dst[1][k] = sb[i] - g;
+                dst[2][k] = sr[i] - g;
+                dst[3][k] = sa[i];
                 k++;
             }
+            sa += stride[3];
         }
         k += dst_stride - width;
-        src += stride;
-    }
-}
-
-/* Write data to a plane with left prediction */
-static void left_predict(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
-                         int width, int height)
-{
-    int i, j;
-    uint8_t prev;
-
-    prev = 0x80; /* Set the initial value */
-    for (j = 0; j < height; j++) {
-        for (i = 0; i < width; i++) {
-            *dst++ = src[i] - prev;
-            prev   = src[i];
-        }
-        src += stride;
+        sg += stride[0];
+        sb += stride[1];
+        sr += stride[2];
     }
 }
 
@@ -429,8 +419,7 @@
         for (i = 0; i < c->slices; i++) {
             sstart = send;
             send   = height * (i + 1) / c->slices & cmask;
-            left_predict(src + sstart * stride, dst + sstart * width,
-                         stride, width, send - sstart);
+            c->llvidencdsp.sub_left_predict(dst + sstart * width, src + sstart * stride, stride, width, send - sstart);
         }
         break;
     case PRED_MEDIAN:
@@ -572,14 +561,14 @@
     }
 
     /* In case of RGB, mangle the planes to Ut Video's format */
-    if (avctx->pix_fmt == AV_PIX_FMT_RGBA || avctx->pix_fmt == AV_PIX_FMT_RGB24)
-        mangle_rgb_planes(c->slice_buffer, c->slice_stride, pic->data[0],
-                          c->planes, pic->linesize[0], width, height);
+    if (avctx->pix_fmt == AV_PIX_FMT_GBRAP || avctx->pix_fmt == AV_PIX_FMT_GBRP)
+        mangle_rgb_planes(c->slice_buffer, c->slice_stride, pic->data,
+                          c->planes, pic->linesize, width, height);
 
     /* Deal with the planes */
     switch (avctx->pix_fmt) {
-    case AV_PIX_FMT_RGB24:
-    case AV_PIX_FMT_RGBA:
+    case AV_PIX_FMT_GBRP:
+    case AV_PIX_FMT_GBRAP:
         for (i = 0; i < c->planes; i++) {
             ret = encode_plane(avctx, c->slice_buffer[i] + 2 * c->slice_stride,
                                c->slice_buffer[i], c->slice_stride, i,
@@ -690,7 +679,7 @@
     .close          = utvideo_encode_close,
     .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
     .pix_fmts       = (const enum AVPixelFormat[]) {
-                          AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA, AV_PIX_FMT_YUV422P,
+                          AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP, AV_PIX_FMT_YUV422P,
                           AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_NONE
                       },
 };

diff --git a/libavcodec/v210dec.c b/libavcodec/v210dec.c
index 9af9af6..ddc5dbe 100644
--- a/libavcodec/v210dec.c
+++ b/libavcodec/v210dec.c

@@ -27,6 +27,7 @@
 #include "libavutil/bswap.h"
 #include "libavutil/internal.h"
 #include "libavutil/mem.h"
+#include "libavutil/intreadwrite.h"
 
 #define READ_PIXELS(a, b, c)         \
     do {                             \
@@ -92,6 +93,11 @@
             return AVERROR_INVALIDDATA;
         }
     }
+    if (   avctx->codec_tag == MKTAG('C', '2', '1', '0')
+        && avpkt->size > 64
+        && AV_RN32(psrc) == AV_RN32("INFO")
+        && avpkt->size - 64 >= stride * avctx->height)
+        psrc += 64;
 
     aligned_input = !((uintptr_t)psrc & 0xf) && !(stride & 0xf);
     if (aligned_input != s->aligned_input) {
@@ -162,10 +168,10 @@
 };
 
 static const AVClass v210dec_class = {
-    "V210 Decoder",
-    av_default_item_name,
-    v210dec_options,
-    LIBAVUTIL_VERSION_INT,
+    .class_name = "V210 Decoder",
+    .item_name  = av_default_item_name,
+    .option     = v210dec_options,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
 AVCodec ff_v210_decoder = {

diff --git a/libavcodec/v210enc.c b/libavcodec/v210enc.c
index a6afbbf..b024806 100644
--- a/libavcodec/v210enc.c
+++ b/libavcodec/v210enc.c

@@ -123,6 +123,7 @@
     int aligned_width = ((avctx->width + 47) / 48) * 48;
     int stride = aligned_width * 8 / 3;
     int line_padding = stride - ((avctx->width * 8 + 11) / 12) * 4;
+    AVFrameSideData *side_data;
     int h, w, ret;
     uint8_t *dst;
 
@@ -233,6 +234,22 @@
         }
     }
 
+    side_data = av_frame_get_side_data(pic, AV_FRAME_DATA_A53_CC);
+    if (side_data && side_data->size) {
+        uint8_t *buf = av_packet_new_side_data(pkt, AV_PKT_DATA_A53_CC, side_data->size);
+        if (!buf)
+            return AVERROR(ENOMEM);
+        memcpy(buf, side_data->data, side_data->size);
+    }
+
+    side_data = av_frame_get_side_data(pic, AV_FRAME_DATA_AFD);
+    if (side_data && side_data->size) {
+        uint8_t *buf = av_packet_new_side_data(pkt, AV_PKT_DATA_AFD, side_data->size);
+        if (!buf)
+            return AVERROR(ENOMEM);
+        memcpy(buf, side_data->data, side_data->size);
+    }
+
     pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;
     return 0;

diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
index ba70c5d..aef911f 100644
--- a/libavcodec/v4l2_buffers.c
+++ b/libavcodec/v4l2_buffers.c

@@ -69,7 +69,8 @@
     int64_t v4l2_pts;
 
     /* convert pts back to encoder timebase */
-    v4l2_pts = avbuf->buf.timestamp.tv_sec * USEC_PER_SEC + avbuf->buf.timestamp.tv_usec;
+    v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC +
+                        avbuf->buf.timestamp.tv_usec;
 
     return av_rescale_q(v4l2_pts, v4l2_timebase, s->avctx->time_base);
 }
@@ -207,20 +208,23 @@
     V4L2Buffer* avbuf = opaque;
     V4L2m2mContext *s = buf_to_m2mctx(avbuf);
 
-    atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel);
-    if (s->reinit) {
-        if (!atomic_load(&s->refcount))
-            sem_post(&s->refsync);
-        return;
-    }
+    if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) {
+        atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel);
 
-    if (avbuf->context->streamon) {
-        ff_v4l2_buffer_enqueue(avbuf);
-        return;
-    }
+        if (s->reinit) {
+            if (!atomic_load(&s->refcount))
+                sem_post(&s->refsync);
+        } else {
+            if (s->draining) {
+                /* no need to queue more buffers to the driver */
+                avbuf->status = V4L2BUF_AVAILABLE;
+            }
+            else if (avbuf->context->streamon)
+                ff_v4l2_buffer_enqueue(avbuf);
+        }
 
-    if (!atomic_load(&s->refcount))
-        ff_v4l2_m2m_codec_end(s->avctx);
+        av_buffer_unref(&avbuf->context_ref);
+    }
 }
 
 static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf)
@@ -236,6 +240,17 @@
     if (!*buf)
         return AVERROR(ENOMEM);
 
+    if (in->context_ref)
+        atomic_fetch_add(&in->context_refcount, 1);
+    else {
+        in->context_ref = av_buffer_ref(s->self_ref);
+        if (!in->context_ref) {
+            av_buffer_unref(buf);
+            return AVERROR(ENOMEM);
+        }
+        in->context_refcount = 1;
+    }
+
     in->status = V4L2BUF_RET_USER;
     atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed);
 

diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h
index e28a4a6..7a57caf 100644
--- a/libavcodec/v4l2_buffers.h
+++ b/libavcodec/v4l2_buffers.h

@@ -24,6 +24,7 @@
 #ifndef AVCODEC_V4L2_BUFFERS_H
 #define AVCODEC_V4L2_BUFFERS_H
 
+#include <stdatomic.h>
 #include <linux/videodev2.h>
 
 #include "avcodec.h"
@@ -41,6 +42,11 @@
     /* each buffer needs to have a reference to its context */
     struct V4L2Context *context;
 
+    /* This object is refcounted per-plane, so we need to keep track
+     * of how many context-refs we are holding. */
+    AVBufferRef *context_ref;
+    atomic_uint context_refcount;
+
     /* keep track of the mmap address and mmap length */
     struct V4L2Plane_info {
         int bytesperline;
@@ -66,7 +72,7 @@
  * @param[in] buf The V4L2Buffer to get the information from
  *
  * @returns 0 in case of success, AVERROR(EINVAL) if the number of planes is incorrect,
- * AVERROR(ENOMEM) if the AVBufferRef cant be created.
+ * AVERROR(ENOMEM) if the AVBufferRef can't be created.
  */
 int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *buf);
 
@@ -77,7 +83,7 @@
  * @param[in] buf The V4L2Buffer to get the information from
  *
  * @returns 0 in case of success, AVERROR(EINVAL) if the number of planes is incorrect,
- * AVERROR(ENOMEM) if the AVBufferRef cant be created.
+ * AVERROR(ENOMEM) if the AVBufferRef can't be created.
  *
  */
 int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf);

diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
index 9f3b56d..efcb042 100644
--- a/libavcodec/v4l2_context.c
+++ b/libavcodec/v4l2_context.c

@@ -217,6 +217,7 @@
 {
     struct v4l2_decoder_cmd cmd = {
         .cmd = V4L2_DEC_CMD_STOP,
+        .flags = 0,
     };
     int ret;
 
@@ -234,6 +235,7 @@
 {
     struct v4l2_encoder_cmd cmd = {
         .cmd = V4L2_ENC_CMD_STOP,
+        .flags = 0,
     };
     int ret;
 
@@ -256,10 +258,26 @@
         .events =  POLLIN | POLLRDNORM | POLLPRI | POLLOUT | POLLWRNORM, /* default blocking capture */
         .fd = ctx_to_m2mctx(ctx)->fd,
     };
-    int ret;
+    int i, ret;
 
+    /* if we are draining and there are no more capture buffers queued in the driver we are done */
+    if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx_to_m2mctx(ctx)->draining) {
+        for (i = 0; i < ctx->num_buffers; i++) {
+            if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER)
+                goto start;
+        }
+        ctx->done = 1;
+        return NULL;
+    }
+
+start:
     if (V4L2_TYPE_IS_OUTPUT(ctx->type))
         pfd.events =  POLLOUT | POLLWRNORM;
+    else {
+        /* no need to listen to requests for more input while draining */
+        if (ctx_to_m2mctx(ctx)->draining)
+            pfd.events =  POLLIN | POLLRDNORM | POLLPRI;
+    }
 
     for (;;) {
         ret = poll(&pfd, 1, timeout);
@@ -267,17 +285,22 @@
             break;
         if (errno == EINTR)
             continue;
-
-        /* timeout is being used to indicate last valid bufer when draining */
-        if (ctx_to_m2mctx(ctx)->draining)
-            ctx->done = 1;
-
         return NULL;
     }
 
     /* 0. handle errors */
     if (pfd.revents & POLLERR) {
-        av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
+        /* if we are trying to get free buffers but none have been queued yet
+           no need to raise a warning */
+        if (timeout == 0) {
+            for (i = 0; i < ctx->num_buffers; i++) {
+                if (ctx->buffers[i].status != V4L2BUF_AVAILABLE)
+                    av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
+            }
+        }
+        else
+            av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
+
         return NULL;
     }
 
@@ -286,7 +309,7 @@
         ret = v4l2_handle_event(ctx);
         if (ret < 0) {
             /* if re-init failed, abort */
-            ctx->done = EINVAL;
+            ctx->done = 1;
             return NULL;
         }
         if (ret) {
@@ -325,23 +348,25 @@
         ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf);
         if (ret) {
             if (errno != EAGAIN) {
-                ctx->done = errno;
+                ctx->done = 1;
                 if (errno != EPIPE)
                     av_log(logger(ctx), AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n",
                         ctx->name, av_err2str(AVERROR(errno)));
             }
-        } else {
-            avbuf = &ctx->buffers[buf.index];
-            avbuf->status = V4L2BUF_AVAILABLE;
-            avbuf->buf = buf;
-            if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
-                memcpy(avbuf->planes, planes, sizeof(planes));
-                avbuf->buf.m.planes = avbuf->planes;
-            }
+            return NULL;
         }
+
+        avbuf = &ctx->buffers[buf.index];
+        avbuf->status = V4L2BUF_AVAILABLE;
+        avbuf->buf = buf;
+        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+            memcpy(avbuf->planes, planes, sizeof(planes));
+            avbuf->buf.m.planes = avbuf->planes;
+        }
+        return avbuf;
     }
 
-    return avbuf;
+    return NULL;
 }
 
 static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
@@ -421,9 +446,7 @@
 
     if (pixfmt != AV_PIX_FMT_NONE) {
         ret = v4l2_try_raw_format(ctx, pixfmt);
-        if (ret)
-            pixfmt = AV_PIX_FMT_NONE;
-        else
+        if (!ret)
             return 0;
     }
 
@@ -484,7 +507,7 @@
   *
   *****************************************************************************/
 
-int ff_v4l2_context_set_status(V4L2Context* ctx, int cmd)
+int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
 {
     int type = ctx->type;
     int ret;
@@ -552,14 +575,12 @@
 {
     V4L2Buffer* avbuf = NULL;
 
-    /* if we are draining, we are no longer inputing data, therefore enable a
-     * timeout so we can dequeue and flag the last valid buffer.
-     *
+    /*
      * blocks until:
      *  1. decoded frame available
      *  2. an input buffer is ready to be dequeued
      */
-    avbuf = v4l2_dequeue_v4l2buf(ctx, ctx_to_m2mctx(ctx)->draining ? 200 : -1);
+    avbuf = v4l2_dequeue_v4l2buf(ctx, -1);
     if (!avbuf) {
         if (ctx->done)
             return AVERROR_EOF;
@@ -574,14 +595,12 @@
 {
     V4L2Buffer* avbuf = NULL;
 
-    /* if we are draining, we are no longer inputing data, therefore enable a
-     * timeout so we can dequeue and flag the last valid buffer.
-     *
+    /*
      * blocks until:
      *  1. encoded packet available
      *  2. an input buffer ready to be dequeued
      */
-    avbuf = v4l2_dequeue_v4l2buf(ctx, ctx_to_m2mctx(ctx)->draining ? 200 : -1);
+    avbuf = v4l2_dequeue_v4l2buf(ctx, -1);
     if (!avbuf) {
         if (ctx->done)
             return AVERROR_EOF;

diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
index 503cc36..632f1d0 100644
--- a/libavcodec/v4l2_context.h
+++ b/libavcodec/v4l2_context.h

@@ -135,7 +135,7 @@
  *                those frames will be dropped.
  * @return 0 in case of success, a negative value representing the error otherwise.
  */
-int ff_v4l2_context_set_status(V4L2Context* ctx, int cmd);
+int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd);
 
 /**
  * Dequeues a buffer from a V4L2Context to an AVPacket.

diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
index 1d7a852..427e165 100644
--- a/libavcodec/v4l2_m2m.c
+++ b/libavcodec/v4l2_m2m.c

@@ -222,8 +222,6 @@
     }
 
     /* 5. complete reinit */
-    sem_destroy(&s->refsync);
-    sem_init(&s->refsync, 0, 0);
     s->draining = 0;
     s->reinit = 0;
 
@@ -241,24 +239,26 @@
     if (atomic_load(&s->refcount))
         while(sem_wait(&s->refsync) == -1 && errno == EINTR);
 
-    /* close the driver */
-    ff_v4l2_m2m_codec_end(s->avctx);
+    ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF);
+    if (ret) {
+        av_log(s->avctx, AV_LOG_ERROR, "output VIDIOC_STREAMOFF\n");
+        goto error;
+    }
+
+    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
+    if (ret) {
+            av_log(s->avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n");
+            goto error;
+    }
+
+    /* release and unmmap the buffers */
+    ff_v4l2_context_release(&s->output);
+    ff_v4l2_context_release(&s->capture);
 
     /* start again now that we know the stream dimensions */
     s->draining = 0;
     s->reinit = 0;
 
-    s->fd = open(s->devname, O_RDWR | O_NONBLOCK, 0);
-    if (s->fd < 0)
-        return AVERROR(errno);
-
-    ret = v4l2_prepare_contexts(s);
-    if (ret < 0)
-        goto error;
-
-    /* if a full re-init was requested - probe didn't run - we need to populate
-     * the format for each context
-     */
     ret = ff_v4l2_context_get_format(&s->output);
     if (ret) {
         av_log(log_ctx, AV_LOG_DEBUG, "v4l2 output format not supported\n");
@@ -301,19 +301,25 @@
     return 0;
 
 error:
-    if (close(s->fd) < 0) {
-        ret = AVERROR(errno);
-        av_log(log_ctx, AV_LOG_ERROR, "error closing %s (%s)\n",
-            s->devname, av_err2str(AVERROR(errno)));
-    }
-    s->fd = -1;
-
     return ret;
 }
 
+static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context)
+{
+    V4L2m2mContext *s = (V4L2m2mContext*)context;
+
+    ff_v4l2_context_release(&s->capture);
+    sem_destroy(&s->refsync);
+
+    close(s->fd);
+
+    av_free(s);
+}
+
 int ff_v4l2_m2m_codec_end(AVCodecContext *avctx)
 {
-    V4L2m2mContext* s = avctx->priv_data;
+    V4L2m2mPriv *priv = avctx->priv_data;
+    V4L2m2mContext* s = priv->context;
     int ret;
 
     ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF);
@@ -326,17 +332,8 @@
 
     ff_v4l2_context_release(&s->output);
 
-    if (atomic_load(&s->refcount))
-        av_log(avctx, AV_LOG_ERROR, "ff_v4l2m2m_codec_end leaving pending buffers\n");
-
-    ff_v4l2_context_release(&s->capture);
-    sem_destroy(&s->refsync);
-
-    /* release the hardware */
-    if (close(s->fd) < 0 )
-        av_log(avctx, AV_LOG_ERROR, "failure closing %s (%s)\n", s->devname, av_err2str(AVERROR(errno)));
-
-    s->fd = -1;
+    s->self_ref = NULL;
+    av_buffer_unref(&priv->context_ref);
 
     return 0;
 }
@@ -348,7 +345,7 @@
     char node[PATH_MAX];
     DIR *dirp;
 
-    V4L2m2mContext *s = avctx->priv_data;
+    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
     s->avctx = avctx;
 
     dirp = opendir("/dev");
@@ -381,3 +378,29 @@
 
     return v4l2_configure_contexts(s);
 }
+
+int ff_v4l2_m2m_create_context(AVCodecContext *avctx, V4L2m2mContext **s)
+{
+    V4L2m2mPriv *priv = avctx->priv_data;
+
+    *s = av_mallocz(sizeof(V4L2m2mContext));
+    if (!*s)
+        return AVERROR(ENOMEM);
+
+    priv->context_ref = av_buffer_create((uint8_t *) *s, sizeof(V4L2m2mContext),
+                                         &v4l2_m2m_destroy_context, NULL, 0);
+    if (!priv->context_ref) {
+        av_freep(s);
+        return AVERROR(ENOMEM);
+    }
+
+    /* assign the context */
+    priv->context = *s;
+
+    /* populate it */
+    priv->context->capture.num_buffers = priv->num_capture_buffers;
+    priv->context->output.num_buffers  = priv->num_output_buffers;
+    priv->context->self_ref = priv->context_ref;
+
+    return 0;
+}

diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
index afa3987..452bf0d 100644
--- a/libavcodec/v4l2_m2m.h
+++ b/libavcodec/v4l2_m2m.h

@@ -38,11 +38,9 @@
 
 #define V4L_M2M_DEFAULT_OPTS \
     { "num_output_buffers", "Number of buffers in the output context",\
-        OFFSET(output.num_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 6, INT_MAX, FLAGS }
+        OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 6, INT_MAX, FLAGS }
 
-typedef struct V4L2m2mContext
-{
-    AVClass *class;
+typedef struct V4L2m2mContext {
     char devname[PATH_MAX];
     int fd;
 
@@ -50,18 +48,41 @@
     V4L2Context capture;
     V4L2Context output;
 
-    /* refcount of buffers held by the user */
-    atomic_uint refcount;
-
     /* dynamic stream reconfig */
     AVCodecContext *avctx;
     sem_t refsync;
+    atomic_uint refcount;
     int reinit;
 
     /* null frame/packet received */
     int draining;
+
+    /* Reference to self; only valid while codec is active. */
+    AVBufferRef *self_ref;
 } V4L2m2mContext;
 
+typedef struct V4L2m2mPriv
+{
+    AVClass *class;
+
+    V4L2m2mContext *context;
+    AVBufferRef    *context_ref;
+
+    int num_output_buffers;
+    int num_capture_buffers;
+} V4L2m2mPriv;
+
+/**
+ * Allocate a new context and references for a V4L2 M2M instance.
+ *
+ * @param[in] ctx The AVCodecContext instantiated by the encoder/decoder.
+ * @param[out] ctx The V4L2m2mContext.
+ *
+ * @returns 0 in success, a negative error code otherwise.
+ */
+int ff_v4l2_m2m_create_context(AVCodecContext *avctx, V4L2m2mContext **s);
+
+
 /**
  * Probes the video nodes looking for the required codec capabilities.
  *

diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
index 958cdc5..710e40e 100644
--- a/libavcodec/v4l2_m2m_dec.c
+++ b/libavcodec/v4l2_m2m_dec.c

@@ -35,7 +35,7 @@
 
 static int v4l2_try_start(AVCodecContext *avctx)
 {
-    V4L2m2mContext *s = avctx->priv_data;
+    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
     V4L2Context *const capture = &s->capture;
     V4L2Context *const output = &s->output;
     struct v4l2_selection selection;
@@ -127,7 +127,7 @@
 
 static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
 {
-    V4L2m2mContext *s = avctx->priv_data;
+    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
     V4L2Context *const capture = &s->capture;
     V4L2Context *const output = &s->output;
     AVPacket avpkt = {0};
@@ -149,21 +149,30 @@
 
     if (avpkt.size) {
         ret = v4l2_try_start(avctx);
-        if (ret)
+        if (ret) {
+            av_packet_unref(&avpkt);
             return 0;
+        }
     }
 
 dequeue:
+    av_packet_unref(&avpkt);
     return ff_v4l2_context_dequeue_frame(capture, frame);
 }
 
 static av_cold int v4l2_decode_init(AVCodecContext *avctx)
 {
-    V4L2m2mContext *s = avctx->priv_data;
-    V4L2Context *capture = &s->capture;
-    V4L2Context *output = &s->output;
+    V4L2Context *capture, *output;
+    V4L2m2mContext *s;
     int ret;
 
+    ret = ff_v4l2_m2m_create_context(avctx, &s);
+    if (ret < 0)
+        return ret;
+
+    capture = &s->capture;
+    output = &s->output;
+
     /* if these dimensions are invalid (ie, 0 or too small) an event will be raised
      * by the v4l2 driver; this event will trigger a full pipeline reconfig and
      * the proper values will be retrieved from the kernel driver.
@@ -186,13 +195,13 @@
     return v4l2_prepare_decoder(s);
 }
 
-#define OFFSET(x) offsetof(V4L2m2mContext, x)
+#define OFFSET(x) offsetof(V4L2m2mPriv, x)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 
 static const AVOption options[] = {
     V4L_M2M_DEFAULT_OPTS,
     { "num_capture_buffers", "Number of buffers in the capture context",
-        OFFSET(capture.num_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 20, INT_MAX, FLAGS },
+        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 20, INT_MAX, FLAGS },
     { NULL},
 };
 
@@ -209,12 +218,15 @@
     .long_name      = NULL_IF_CONFIG_SMALL("V4L2 mem2mem " LONGNAME " decoder wrapper"),\
     .type           = AVMEDIA_TYPE_VIDEO,\
     .id             = CODEC ,\
-    .priv_data_size = sizeof(V4L2m2mContext),\
+    .priv_data_size = sizeof(V4L2m2mPriv),\
     .priv_class     = &v4l2_m2m_ ## NAME ## _dec_class,\
     .init           = v4l2_decode_init,\
     .receive_frame  = v4l2_receive_frame,\
     .close          = ff_v4l2_m2m_codec_end,\
     .bsfs           = bsf_name, \
+    .capabilities   = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY | \
+                      AV_CODEC_CAP_AVOID_PROBING, \
+    .wrapper_name   = "v4l2m2m", \
 };
 
 M2MDEC(h264,  "H.264", AV_CODEC_ID_H264,       "h264_mp4toannexb");

diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c
index f71ce5f..636e1a9 100644
--- a/libavcodec/v4l2_m2m_enc.c
+++ b/libavcodec/v4l2_m2m_enc.c

@@ -48,7 +48,7 @@
 
 static inline void v4l2_set_ext_ctrl(V4L2m2mContext *s, unsigned int id, signed int value, const char *name)
 {
-    struct v4l2_ext_controls ctrls = { 0 };
+    struct v4l2_ext_controls ctrls = { { 0 } };
     struct v4l2_ext_control ctrl = { 0 };
 
     /* set ctrls */
@@ -68,7 +68,7 @@
 
 static inline int v4l2_get_ext_ctrl(V4L2m2mContext *s, unsigned int id, signed int *value, const char *name)
 {
-    struct v4l2_ext_controls ctrls = { 0 };
+    struct v4l2_ext_controls ctrls = { { 0 } };
     struct v4l2_ext_control ctrl = { 0 };
     int ret;
 
@@ -204,7 +204,7 @@
             v4l2_set_ext_ctrl(s, MPEG_CID(MPEG4_PROFILE), val, "mpeg4 profile");
         qmin_cid = MPEG_CID(MPEG4_MIN_QP);
         qmax_cid = MPEG_CID(MPEG4_MAX_QP);
-        if (avctx->flags & CODEC_FLAG_QPEL)
+        if (avctx->flags & AV_CODEC_FLAG_QPEL)
             v4l2_set_ext_ctrl(s, MPEG_CID(MPEG4_QPEL), 1, "qpel");
         qmin = 1;
         qmax = 31;
@@ -242,7 +242,7 @@
 
 static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame)
 {
-    V4L2m2mContext *s = avctx->priv_data;
+    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
     V4L2Context *const output = &s->output;
 
     return ff_v4l2_context_enqueue_frame(output, frame);
@@ -250,7 +250,7 @@
 
 static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
 {
-    V4L2m2mContext *s = avctx->priv_data;
+    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
     V4L2Context *const capture = &s->capture;
     V4L2Context *const output = &s->output;
     int ret;
@@ -280,11 +280,17 @@
 
 static av_cold int v4l2_encode_init(AVCodecContext *avctx)
 {
-    V4L2m2mContext *s = avctx->priv_data;
-    V4L2Context *capture = &s->capture;
-    V4L2Context *output = &s->output;
+    V4L2Context *capture, *output;
+    V4L2m2mContext *s;
     int ret;
 
+    ret = ff_v4l2_m2m_create_context(avctx, &s);
+    if (ret < 0)
+        return ret;
+
+    capture = &s->capture;
+    output  = &s->output;
+
     /* common settings output/capture */
     output->height = capture->height = avctx->height;
     output->width = capture->width = avctx->width;
@@ -306,13 +312,13 @@
     return v4l2_prepare_encoder(s);
 }
 
-#define OFFSET(x) offsetof(V4L2m2mContext, x)
+#define OFFSET(x) offsetof(V4L2m2mPriv, x)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 
 static const AVOption options[] = {
     V4L_M2M_DEFAULT_OPTS,
     { "num_capture_buffers", "Number of buffers in the capture context",
-        OFFSET(capture.num_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS },
+        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS },
     { NULL },
 };
 
@@ -329,12 +335,14 @@
     .long_name      = NULL_IF_CONFIG_SMALL("V4L2 mem2mem " LONGNAME " encoder wrapper"),\
     .type           = AVMEDIA_TYPE_VIDEO,\
     .id             = CODEC ,\
-    .priv_data_size = sizeof(V4L2m2mContext),\
+    .priv_data_size = sizeof(V4L2m2mPriv),\
     .priv_class     = &v4l2_m2m_ ## NAME ##_enc_class,\
     .init           = v4l2_encode_init,\
     .send_frame     = v4l2_send_frame,\
     .receive_packet = v4l2_receive_packet,\
     .close          = ff_v4l2_m2m_codec_end,\
+    .capabilities   = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY, \
+    .wrapper_name   = "v4l2m2m", \
 };
 
 M2MENC(mpeg4,"MPEG4", AV_CODEC_ID_MPEG4);

diff --git a/libavcodec/vaapi.h b/libavcodec/vaapi.h
index bb28455..2cf7da5 100644
--- a/libavcodec/vaapi.h
+++ b/libavcodec/vaapi.h

@@ -77,115 +77,6 @@
      * - decoding: Set by user
      */
     uint32_t context_id;
-
-#if FF_API_VAAPI_CONTEXT
-    /**
-     * VAPictureParameterBuffer ID
-     *
-     * - encoding: unused
-     * - decoding: Set by libavcodec
-     */
-    attribute_deprecated
-    uint32_t pic_param_buf_id;
-
-    /**
-     * VAIQMatrixBuffer ID
-     *
-     * - encoding: unused
-     * - decoding: Set by libavcodec
-     */
-    attribute_deprecated
-    uint32_t iq_matrix_buf_id;
-
-    /**
-     * VABitPlaneBuffer ID (for VC-1 decoding)
-     *
-     * - encoding: unused
-     * - decoding: Set by libavcodec
-     */
-    attribute_deprecated
-    uint32_t bitplane_buf_id;
-
-    /**
-     * Slice parameter/data buffer IDs
-     *
-     * - encoding: unused
-     * - decoding: Set by libavcodec
-     */
-    attribute_deprecated
-    uint32_t *slice_buf_ids;
-
-    /**
-     * Number of effective slice buffer IDs to send to the HW
-     *
-     * - encoding: unused
-     * - decoding: Set by libavcodec
-     */
-    attribute_deprecated
-    unsigned int n_slice_buf_ids;
-
-    /**
-     * Size of pre-allocated slice_buf_ids
-     *
-     * - encoding: unused
-     * - decoding: Set by libavcodec
-     */
-    attribute_deprecated
-    unsigned int slice_buf_ids_alloc;
-
-    /**
-     * Pointer to VASliceParameterBuffers
-     *
-     * - encoding: unused
-     * - decoding: Set by libavcodec
-     */
-    attribute_deprecated
-    void *slice_params;
-
-    /**
-     * Size of a VASliceParameterBuffer element
-     *
-     * - encoding: unused
-     * - decoding: Set by libavcodec
-     */
-    attribute_deprecated
-    unsigned int slice_param_size;
-
-    /**
-     * Size of pre-allocated slice_params
-     *
-     * - encoding: unused
-     * - decoding: Set by libavcodec
-     */
-    attribute_deprecated
-    unsigned int slice_params_alloc;
-
-    /**
-     * Number of slices currently filled in
-     *
-     * - encoding: unused
-     * - decoding: Set by libavcodec
-     */
-    attribute_deprecated
-    unsigned int slice_count;
-
-    /**
-     * Pointer to slice data buffer base
-     * - encoding: unused
-     * - decoding: Set by libavcodec
-     */
-    attribute_deprecated
-    const uint8_t *slice_data;
-
-    /**
-     * Current size of slice data
-     *
-     * - encoding: unused
-     * - decoding: Set by libavcodec
-     */
-    attribute_deprecated
-    uint32_t slice_data_size;
-#endif
 };
 
 /* @} */

diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
index 27ef338..69512e1 100644
--- a/libavcodec/vaapi_decode.c
+++ b/libavcodec/vaapi_decode.c

@@ -21,6 +21,7 @@
 #include "libavutil/pixdesc.h"
 
 #include "avcodec.h"
+#include "decode.h"
 #include "internal.h"
 #include "vaapi_decode.h"
 
@@ -199,12 +200,8 @@
         AV_VAAPI_DRIVER_QUIRK_RENDER_PARAM_BUFFERS)
         ff_vaapi_decode_destroy_buffers(avctx, pic);
 
-    pic->nb_param_buffers = 0;
-    pic->nb_slices        = 0;
-    pic->slices_allocated = 0;
-    av_freep(&pic->slice_buffers);
-
-    return 0;
+    err = 0;
+    goto exit;
 
 fail_with_picture:
     vas = vaEndPicture(ctx->hwctx->display, ctx->va_context);
@@ -215,6 +212,12 @@
 fail:
     ff_vaapi_decode_destroy_buffers(avctx, pic);
 fail_at_end:
+exit:
+    pic->nb_param_buffers = 0;
+    pic->nb_slices        = 0;
+    pic->slices_allocated = 0;
+    av_freep(&pic->slice_buffers);
+
     return err;
 }
 
@@ -232,6 +235,132 @@
 }
 
 static const struct {
+    uint32_t fourcc;
+    enum AVPixelFormat pix_fmt;
+} vaapi_format_map[] = {
+#define MAP(va, av) { VA_FOURCC_ ## va, AV_PIX_FMT_ ## av }
+    // 4:0:0
+    MAP(Y800, GRAY8),
+    // 4:2:0
+    MAP(NV12, NV12),
+    MAP(YV12, YUV420P),
+    MAP(IYUV, YUV420P),
+#ifdef VA_FOURCC_I420
+    MAP(I420, YUV420P),
+#endif
+    MAP(IMC3, YUV420P),
+    // 4:1:1
+    MAP(411P, YUV411P),
+    // 4:2:2
+    MAP(422H, YUV422P),
+#ifdef VA_FOURCC_YV16
+    MAP(YV16, YUV422P),
+#endif
+    // 4:4:0
+    MAP(422V, YUV440P),
+    // 4:4:4
+    MAP(444P, YUV444P),
+    // 4:2:0 10-bit
+#ifdef VA_FOURCC_P010
+    MAP(P010, P010),
+#endif
+#ifdef VA_FOURCC_I010
+    MAP(I010, YUV420P10),
+#endif
+#undef MAP
+};
+
+static int vaapi_decode_find_best_format(AVCodecContext *avctx,
+                                         AVHWDeviceContext *device,
+                                         VAConfigID config_id,
+                                         AVHWFramesContext *frames)
+{
+    AVVAAPIDeviceContext *hwctx = device->hwctx;
+    VAStatus vas;
+    VASurfaceAttrib *attr;
+    enum AVPixelFormat source_format, best_format, format;
+    uint32_t best_fourcc, fourcc;
+    int i, j, nb_attr;
+
+    source_format = avctx->sw_pix_fmt;
+    av_assert0(source_format != AV_PIX_FMT_NONE);
+
+    vas = vaQuerySurfaceAttributes(hwctx->display, config_id,
+                                   NULL, &nb_attr);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query surface attributes: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR(ENOSYS);
+    }
+
+    attr = av_malloc_array(nb_attr, sizeof(*attr));
+    if (!attr)
+        return AVERROR(ENOMEM);
+
+    vas = vaQuerySurfaceAttributes(hwctx->display, config_id,
+                                   attr, &nb_attr);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query surface attributes: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        av_freep(&attr);
+        return AVERROR(ENOSYS);
+    }
+
+    best_format = AV_PIX_FMT_NONE;
+
+    for (i = 0; i < nb_attr; i++) {
+        if (attr[i].type != VASurfaceAttribPixelFormat)
+            continue;
+
+        fourcc = attr[i].value.value.i;
+        for (j = 0; j < FF_ARRAY_ELEMS(vaapi_format_map); j++) {
+            if (fourcc == vaapi_format_map[j].fourcc)
+                break;
+        }
+        if (j >= FF_ARRAY_ELEMS(vaapi_format_map)) {
+            av_log(avctx, AV_LOG_DEBUG, "Ignoring unknown format %#x.\n",
+                   fourcc);
+            continue;
+        }
+        format = vaapi_format_map[j].pix_fmt;
+        av_log(avctx, AV_LOG_DEBUG, "Considering format %#x -> %s.\n",
+               fourcc, av_get_pix_fmt_name(format));
+
+        best_format = av_find_best_pix_fmt_of_2(format, best_format,
+                                                source_format, 0, NULL);
+        if (format == best_format)
+            best_fourcc = fourcc;
+    }
+
+    av_freep(&attr);
+
+    if (best_format == AV_PIX_FMT_NONE) {
+        av_log(avctx, AV_LOG_ERROR, "No usable formats for decoding!\n");
+        return AVERROR(EINVAL);
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "Picked %s (%#x) as best match for %s.\n",
+           av_get_pix_fmt_name(best_format), best_fourcc,
+           av_get_pix_fmt_name(source_format));
+
+    frames->sw_format = best_format;
+    if (avctx->internal->hwaccel_priv_data) {
+        VAAPIDecodeContext    *ctx = avctx->internal->hwaccel_priv_data;
+        AVVAAPIFramesContext *avfc = frames->hwctx;
+
+        ctx->pixel_format_attribute = (VASurfaceAttrib) {
+            .type          = VASurfaceAttribPixelFormat,
+            .value.value.i = best_fourcc,
+        };
+
+        avfc->attributes    = &ctx->pixel_format_attribute;
+        avfc->nb_attributes = 1;
+    }
+
+    return 0;
+}
+
+static const struct {
     enum AVCodecID codec_id;
     int codec_profile;
     VAProfile va_profile;
@@ -252,6 +381,8 @@
     MAP(HEVC,        HEVC_MAIN,       HEVCMain    ),
     MAP(HEVC,        HEVC_MAIN_10,    HEVCMain10  ),
 #endif
+    MAP(MJPEG,       MJPEG_HUFFMAN_BASELINE_DCT,
+                                      JPEGBaseline),
     MAP(WMV3,        VC1_SIMPLE,      VC1Simple   ),
     MAP(WMV3,        VC1_MAIN,        VC1Main     ),
     MAP(WMV3,        VC1_COMPLEX,     VC1Advanced ),
@@ -260,9 +391,7 @@
     MAP(VC1,         VC1_MAIN,        VC1Main     ),
     MAP(VC1,         VC1_COMPLEX,     VC1Advanced ),
     MAP(VC1,         VC1_ADVANCED,    VC1Advanced ),
-#if VA_CHECK_VERSION(0, 35, 0)
     MAP(VP8,         UNKNOWN,       VP8Version0_3 ),
-#endif
 #if VA_CHECK_VERSION(0, 38, 0)
     MAP(VP9,         VP9_0,           VP9Profile0 ),
 #endif
@@ -272,18 +401,25 @@
 #undef MAP
 };
 
-static int vaapi_decode_make_config(AVCodecContext *avctx)
+/*
+ * Set *va_config and the frames_ref fields from the current codec parameters
+ * in avctx.
+ */
+static int vaapi_decode_make_config(AVCodecContext *avctx,
+                                    AVBufferRef *device_ref,
+                                    VAConfigID *va_config,
+                                    AVBufferRef *frames_ref)
 {
-    VAAPIDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
-
     AVVAAPIHWConfig       *hwconfig    = NULL;
     AVHWFramesConstraints *constraints = NULL;
     VAStatus vas;
     int err, i, j;
     const AVCodecDescriptor *codec_desc;
-    VAProfile profile, va_profile, *profile_list = NULL;
-    int profile_count, exact_match, alt_profile;
-    const AVPixFmtDescriptor *sw_desc, *desc;
+    VAProfile *profile_list = NULL, matched_va_profile;
+    int profile_count, exact_match, matched_ff_profile;
+
+    AVHWDeviceContext    *device = (AVHWDeviceContext*)device_ref->data;
+    AVVAAPIDeviceContext *hwctx = device->hwctx;
 
     codec_desc = avcodec_descriptor_get(avctx->codec_id);
     if (!codec_desc) {
@@ -291,7 +427,7 @@
         goto fail;
     }
 
-    profile_count = vaMaxNumProfiles(ctx->hwctx->display);
+    profile_count = vaMaxNumProfiles(hwctx->display);
     profile_list  = av_malloc_array(profile_count,
                                     sizeof(VAProfile));
     if (!profile_list) {
@@ -299,7 +435,7 @@
         goto fail;
     }
 
-    vas = vaQueryConfigProfiles(ctx->hwctx->display,
+    vas = vaQueryConfigProfiles(hwctx->display,
                                 profile_list, &profile_count);
     if (vas != VA_STATUS_SUCCESS) {
         av_log(avctx, AV_LOG_ERROR, "Failed to query profiles: "
@@ -308,32 +444,32 @@
         goto fail;
     }
 
-    profile = VAProfileNone;
+    matched_va_profile = VAProfileNone;
     exact_match = 0;
 
     for (i = 0; i < FF_ARRAY_ELEMS(vaapi_profile_map); i++) {
         int profile_match = 0;
         if (avctx->codec_id != vaapi_profile_map[i].codec_id)
             continue;
-        if (avctx->profile == vaapi_profile_map[i].codec_profile)
+        if (avctx->profile == vaapi_profile_map[i].codec_profile ||
+            vaapi_profile_map[i].codec_profile == FF_PROFILE_UNKNOWN)
             profile_match = 1;
-        profile = vaapi_profile_map[i].va_profile;
         for (j = 0; j < profile_count; j++) {
-            if (profile == profile_list[j]) {
+            if (vaapi_profile_map[i].va_profile == profile_list[j]) {
                 exact_match = profile_match;
                 break;
             }
         }
         if (j < profile_count) {
+            matched_va_profile = vaapi_profile_map[i].va_profile;
+            matched_ff_profile = vaapi_profile_map[i].codec_profile;
             if (exact_match)
                 break;
-            alt_profile = vaapi_profile_map[i].codec_profile;
-            va_profile = vaapi_profile_map[i].va_profile;
         }
     }
     av_freep(&profile_list);
 
-    if (profile == VAProfileNone) {
+    if (matched_va_profile == VAProfileNone) {
         av_log(avctx, AV_LOG_ERROR, "No support for codec %s "
                "profile %d.\n", codec_desc->name, avctx->profile);
         err = AVERROR(ENOSYS);
@@ -347,8 +483,7 @@
                    codec_desc->name, avctx->profile);
             av_log(avctx, AV_LOG_WARNING, "Using possibly-"
                    "incompatible profile %d instead.\n",
-                   alt_profile);
-            profile = va_profile;
+                   matched_ff_profile);
         } else {
             av_log(avctx, AV_LOG_VERBOSE, "Codec %s profile %d not "
                    "supported for hardware decode.\n",
@@ -358,12 +493,9 @@
         }
     }
 
-    ctx->va_profile    = profile;
-    ctx->va_entrypoint = VAEntrypointVLD;
-
-    vas = vaCreateConfig(ctx->hwctx->display, ctx->va_profile,
-                         ctx->va_entrypoint, NULL, 0,
-                         &ctx->va_config);
+    vas = vaCreateConfig(hwctx->display, matched_va_profile,
+                         VAEntrypointVLD, NULL, 0,
+                         va_config);
     if (vas != VA_STATUS_SUCCESS) {
         av_log(avctx, AV_LOG_ERROR, "Failed to create decode "
                "configuration: %d (%s).\n", vas, vaErrorStr(vas));
@@ -371,20 +503,15 @@
         goto fail;
     }
 
-    hwconfig = av_hwdevice_hwconfig_alloc(avctx->hw_device_ctx ?
-                                          avctx->hw_device_ctx :
-                                          ctx->frames->device_ref);
+    hwconfig = av_hwdevice_hwconfig_alloc(device_ref);
     if (!hwconfig) {
         err = AVERROR(ENOMEM);
         goto fail;
     }
-    hwconfig->config_id = ctx->va_config;
+    hwconfig->config_id = *va_config;
 
     constraints =
-        av_hwdevice_get_hwframe_constraints(avctx->hw_device_ctx ?
-                                            avctx->hw_device_ctx :
-                                            ctx->frames->device_ref,
-                                            hwconfig);
+        av_hwdevice_get_hwframe_constraints(device_ref, hwconfig);
     if (!constraints) {
         err = AVERROR(ENOMEM);
         goto fail;
@@ -410,48 +537,35 @@
         goto fail;
     }
 
-    // Find the first format in the list which matches the expected
-    // bit depth and subsampling.  If none are found (this can happen
-    // when 10-bit streams are decoded to 8-bit surfaces, for example)
-    // then just take the first format on the list.
-    ctx->surface_format = constraints->valid_sw_formats[0];
-    sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
-    for (i = 0; constraints->valid_sw_formats[i] != AV_PIX_FMT_NONE; i++) {
-        desc = av_pix_fmt_desc_get(constraints->valid_sw_formats[i]);
-        if (desc->nb_components != sw_desc->nb_components ||
-            desc->log2_chroma_w != sw_desc->log2_chroma_w ||
-            desc->log2_chroma_h != sw_desc->log2_chroma_h)
-            continue;
-        for (j = 0; j < desc->nb_components; j++) {
-            if (desc->comp[j].depth != sw_desc->comp[j].depth)
-                break;
-        }
-        if (j < desc->nb_components)
-            continue;
-        ctx->surface_format = constraints->valid_sw_formats[i];
-        break;
-    }
+    if (frames_ref) {
+        AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data;
 
-    // Start with at least four surfaces.
-    ctx->surface_count = 4;
-    // Add per-codec number of surfaces used for storing reference frames.
-    switch (avctx->codec_id) {
-    case AV_CODEC_ID_H264:
-    case AV_CODEC_ID_HEVC:
-        ctx->surface_count += 16;
-        break;
-    case AV_CODEC_ID_VP9:
-        ctx->surface_count += 8;
-        break;
-    case AV_CODEC_ID_VP8:
-        ctx->surface_count += 3;
-        break;
-    default:
-        ctx->surface_count += 2;
+        frames->format = AV_PIX_FMT_VAAPI;
+        frames->width = avctx->coded_width;
+        frames->height = avctx->coded_height;
+
+        err = vaapi_decode_find_best_format(avctx, device,
+                                            *va_config, frames);
+        if (err < 0)
+            goto fail;
+
+        frames->initial_pool_size = 1;
+        // Add per-codec number of surfaces used for storing reference frames.
+        switch (avctx->codec_id) {
+        case AV_CODEC_ID_H264:
+        case AV_CODEC_ID_HEVC:
+            frames->initial_pool_size += 16;
+            break;
+        case AV_CODEC_ID_VP9:
+            frames->initial_pool_size += 8;
+            break;
+        case AV_CODEC_ID_VP8:
+            frames->initial_pool_size += 3;
+            break;
+        default:
+            frames->initial_pool_size += 2;
+        }
     }
-    // Add an additional surface per thread is frame threading is enabled.
-    if (avctx->active_thread_type & FF_THREAD_FRAME)
-        ctx->surface_count += avctx->thread_count;
 
     av_hwframe_constraints_free(&constraints);
     av_freep(&hwconfig);
@@ -461,14 +575,38 @@
 fail:
     av_hwframe_constraints_free(&constraints);
     av_freep(&hwconfig);
-    if (ctx->va_config != VA_INVALID_ID) {
-        vaDestroyConfig(ctx->hwctx->display, ctx->va_config);
-        ctx->va_config = VA_INVALID_ID;
+    if (*va_config != VA_INVALID_ID) {
+        vaDestroyConfig(hwctx->display, *va_config);
+        *va_config = VA_INVALID_ID;
     }
     av_freep(&profile_list);
     return err;
 }
 
+int ff_vaapi_common_frame_params(AVCodecContext *avctx,
+                                 AVBufferRef *hw_frames_ctx)
+{
+    AVHWFramesContext *hw_frames = (AVHWFramesContext *)hw_frames_ctx->data;
+    AVHWDeviceContext *device_ctx = hw_frames->device_ctx;
+    AVVAAPIDeviceContext *hwctx;
+    VAConfigID va_config = VA_INVALID_ID;
+    int err;
+
+    if (device_ctx->type != AV_HWDEVICE_TYPE_VAAPI)
+        return AVERROR(EINVAL);
+    hwctx = device_ctx->hwctx;
+
+    err = vaapi_decode_make_config(avctx, hw_frames->device_ref, &va_config,
+                                   hw_frames_ctx);
+    if (err)
+        return err;
+
+    if (va_config != VA_INVALID_ID)
+        vaDestroyConfig(hwctx->display, va_config);
+
+    return 0;
+}
+
 int ff_vaapi_decode_init(AVCodecContext *avctx)
 {
     VAAPIDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
@@ -505,36 +643,8 @@
         ctx->hwctx->driver_quirks =
             AV_VAAPI_DRIVER_QUIRK_RENDER_PARAM_BUFFERS;
 
-    } else
-#endif
-    if (avctx->hw_frames_ctx) {
-        // This structure has a shorter lifetime than the enclosing
-        // AVCodecContext, so we inherit the references from there
-        // and do not need to make separate ones.
-
-        ctx->frames = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
-        ctx->hwfc   = ctx->frames->hwctx;
-        ctx->device = ctx->frames->device_ctx;
-        ctx->hwctx  = ctx->device->hwctx;
-
-    } else if (avctx->hw_device_ctx) {
-        ctx->device = (AVHWDeviceContext*)avctx->hw_device_ctx->data;
-        ctx->hwctx  = ctx->device->hwctx;
-
-        if (ctx->device->type != AV_HWDEVICE_TYPE_VAAPI) {
-            av_log(avctx, AV_LOG_ERROR, "Device supplied for VAAPI "
-                   "decoding must be a VAAPI device (not %d).\n",
-                   ctx->device->type);
-            err = AVERROR(EINVAL);
-            goto fail;
-        }
-
-    } else {
-        av_log(avctx, AV_LOG_ERROR, "A hardware device or frames context "
-               "is required for VAAPI decoding.\n");
-        err = AVERROR(EINVAL);
-        goto fail;
     }
+#endif
 
 #if FF_API_STRUCT_VAAPI_CONTEXT
     if (ctx->have_old_context) {
@@ -546,34 +656,19 @@
     } else {
 #endif
 
-    err = vaapi_decode_make_config(avctx);
-    if (err)
+    err = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_VAAPI);
+    if (err < 0)
         goto fail;
 
-    if (!avctx->hw_frames_ctx) {
-        avctx->hw_frames_ctx = av_hwframe_ctx_alloc(avctx->hw_device_ctx);
-        if (!avctx->hw_frames_ctx) {
-            err = AVERROR(ENOMEM);
-            goto fail;
-        }
-        ctx->frames = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+    ctx->frames = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+    ctx->hwfc   = ctx->frames->hwctx;
+    ctx->device = ctx->frames->device_ctx;
+    ctx->hwctx  = ctx->device->hwctx;
 
-        ctx->frames->format = AV_PIX_FMT_VAAPI;
-        ctx->frames->width  = avctx->coded_width;
-        ctx->frames->height = avctx->coded_height;
-
-        ctx->frames->sw_format         = ctx->surface_format;
-        ctx->frames->initial_pool_size = ctx->surface_count;
-
-        err = av_hwframe_ctx_init(avctx->hw_frames_ctx);
-        if (err < 0) {
-            av_log(avctx, AV_LOG_ERROR, "Failed to initialise internal "
-                   "frames context: %d.\n", err);
-            goto fail;
-        }
-
-        ctx->hwfc = ctx->frames->hwctx;
-    }
+    err = vaapi_decode_make_config(avctx, ctx->frames->device_ref,
+                                   &ctx->va_config, avctx->hw_frames_ctx);
+    if (err)
+        goto fail;
 
     vas = vaCreateContext(ctx->hwctx->display, ctx->va_config,
                           avctx->coded_width, avctx->coded_height,

diff --git a/libavcodec/vaapi_decode.h b/libavcodec/vaapi_decode.h
index 550ee05..6b415dd 100644
--- a/libavcodec/vaapi_decode.h
+++ b/libavcodec/vaapi_decode.h

@@ -53,8 +53,6 @@
 } VAAPIDecodePicture;
 
 typedef struct VAAPIDecodeContext {
-    VAProfile             va_profile;
-    VAEntrypoint          va_entrypoint;
     VAConfigID            va_config;
     VAContextID           va_context;
 
@@ -74,6 +72,8 @@
 
     enum AVPixelFormat    surface_format;
     int                   surface_count;
+
+    VASurfaceAttrib       pixel_format_attribute;
 } VAAPIDecodeContext;
 
 
@@ -98,4 +98,7 @@
 int ff_vaapi_decode_init(AVCodecContext *avctx);
 int ff_vaapi_decode_uninit(AVCodecContext *avctx);
 
+int ff_vaapi_common_frame_params(AVCodecContext *avctx,
+                                 AVBufferRef *hw_frames_ctx);
+
 #endif /* AVCODEC_VAAPI_DECODE_H */

diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index 590f4be..2c34cdc 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c

@@ -207,9 +207,16 @@
 
     pic->nb_param_buffers = 0;
 
-    if (pic->encode_order == 0) {
-        // Global parameter buffers are set on the first picture only.
+    if (pic->type == PICTURE_TYPE_IDR && ctx->codec->init_sequence_params) {
+        err = vaapi_encode_make_param_buffer(avctx, pic,
+                                             VAEncSequenceParameterBufferType,
+                                             ctx->codec_sequence_params,
+                                             ctx->codec->sequence_params_size);
+        if (err < 0)
+            goto fail;
+    }
 
+    if (pic->type == PICTURE_TYPE_IDR) {
         for (i = 0; i < ctx->nb_global_params; i++) {
             err = vaapi_encode_make_param_buffer(avctx, pic,
                                                  VAEncMiscParameterBufferType,
@@ -220,15 +227,6 @@
         }
     }
 
-    if (pic->type == PICTURE_TYPE_IDR && ctx->codec->init_sequence_params) {
-        err = vaapi_encode_make_param_buffer(avctx, pic,
-                                             VAEncSequenceParameterBufferType,
-                                             ctx->codec_sequence_params,
-                                             ctx->codec->sequence_params_size);
-        if (err < 0)
-            goto fail;
-    }
-
     if (ctx->codec->init_picture_params) {
         err = ctx->codec->init_picture_params(avctx, pic);
         if (err < 0) {
@@ -321,10 +319,12 @@
         }
     }
 
-    pic->slices = av_mallocz_array(pic->nb_slices, sizeof(*pic->slices));
-    if (!pic->slices) {
-        err = AVERROR(ENOMEM);
-        goto fail;
+    if (pic->nb_slices > 0) {
+        pic->slices = av_mallocz_array(pic->nb_slices, sizeof(*pic->slices));
+        if (!pic->slices) {
+            err = AVERROR(ENOMEM);
+            goto fail;
+        }
     }
     for (i = 0; i < pic->nb_slices; i++) {
         slice = &pic->slices[i];
@@ -671,7 +671,7 @@
         return AVERROR(ENOMEM);
 
     if (ctx->input_order == 0 || ctx->force_idr ||
-        ctx->gop_counter >= avctx->gop_size) {
+        ctx->gop_counter >= ctx->gop_size) {
         pic->type = PICTURE_TYPE_IDR;
         ctx->force_idr = 0;
         ctx->gop_counter = 1;
@@ -694,7 +694,7 @@
         // encode-after it, but not exceeding the GOP size.
 
         for (i = 0; i < ctx->b_per_p &&
-             ctx->gop_counter < avctx->gop_size; i++) {
+             ctx->gop_counter < ctx->gop_size; i++) {
             pic = vaapi_encode_alloc();
             if (!pic)
                 goto fail;
@@ -760,6 +760,8 @@
     VAAPIEncodeContext *ctx = avctx->priv_data;
     VAAPIEncodePicture *pic, *last_pic, *next;
 
+    av_assert0(!ctx->pic_start || ctx->pic_start->input_available);
+
     // Find the last picture we actually have input for.
     for (pic = ctx->pic_start; pic; pic = pic->next) {
         if (!pic->input_available)
@@ -768,8 +770,6 @@
     }
 
     if (pic) {
-        av_assert0(last_pic);
-
         if (last_pic->type == PICTURE_TYPE_B) {
             // Some fixing up is required.  Change the type of this
             // picture to P, then modify preceding B references which
@@ -958,217 +958,403 @@
     return err;
 }
 
-static av_cold int vaapi_encode_config_attributes(AVCodecContext *avctx)
+static av_cold void vaapi_encode_add_global_param(AVCodecContext *avctx,
+                                                  VAEncMiscParameterBuffer *buffer,
+                                                  size_t size)
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
+
+    av_assert0(ctx->nb_global_params < MAX_GLOBAL_PARAMS);
+
+    ctx->global_params     [ctx->nb_global_params] = buffer;
+    ctx->global_params_size[ctx->nb_global_params] = size;
+
+    ++ctx->nb_global_params;
+}
+
+typedef struct VAAPIEncodeRTFormat {
+    const char *name;
+    unsigned int value;
+    int depth;
+    int nb_components;
+    int log2_chroma_w;
+    int log2_chroma_h;
+} VAAPIEncodeRTFormat;
+
+static const VAAPIEncodeRTFormat vaapi_encode_rt_formats[] = {
+    { "YUV400",    VA_RT_FORMAT_YUV400,        8, 1,      },
+    { "YUV420",    VA_RT_FORMAT_YUV420,        8, 3, 1, 1 },
+    { "YUV422",    VA_RT_FORMAT_YUV422,        8, 3, 1, 0 },
+    { "YUV444",    VA_RT_FORMAT_YUV444,        8, 3, 0, 0 },
+    { "YUV411",    VA_RT_FORMAT_YUV411,        8, 3, 2, 0 },
+#if VA_CHECK_VERSION(0, 38, 1)
+    { "YUV420_10", VA_RT_FORMAT_YUV420_10BPP, 10, 3, 1, 1 },
+#endif
+};
+
+static const VAEntrypoint vaapi_encode_entrypoints_normal[] = {
+    VAEntrypointEncSlice,
+    VAEntrypointEncPicture,
+#if VA_CHECK_VERSION(0, 39, 2)
+    VAEntrypointEncSliceLP,
+#endif
+    0
+};
+#if VA_CHECK_VERSION(0, 39, 2)
+static const VAEntrypoint vaapi_encode_entrypoints_low_power[] = {
+    VAEntrypointEncSliceLP,
+    0
+};
+#endif
+
+static av_cold int vaapi_encode_profile_entrypoint(AVCodecContext *avctx)
+{
+    VAAPIEncodeContext      *ctx = avctx->priv_data;
+    VAProfile    *va_profiles    = NULL;
+    VAEntrypoint *va_entrypoints = NULL;
     VAStatus vas;
-    int i, n, err;
-    VAProfile    *profiles    = NULL;
-    VAEntrypoint *entrypoints = NULL;
-    VAConfigAttrib attr[] = {
-        { VAConfigAttribRTFormat         },
-        { VAConfigAttribRateControl      },
-        { VAConfigAttribEncMaxRefFrames  },
-        { VAConfigAttribEncPackedHeaders },
-    };
+    const VAEntrypoint *usable_entrypoints;
+    const VAAPIEncodeProfile *profile;
+    const AVPixFmtDescriptor *desc;
+    VAConfigAttrib rt_format_attr;
+    const VAAPIEncodeRTFormat *rt_format;
+    const char *profile_string, *entrypoint_string;
+    int i, j, n, depth, err;
+
+
+    if (ctx->low_power) {
+#if VA_CHECK_VERSION(0, 39, 2)
+        usable_entrypoints = vaapi_encode_entrypoints_low_power;
+#else
+        av_log(avctx, AV_LOG_ERROR, "Low-power encoding is not "
+               "supported with this VAAPI version.\n");
+        return AVERROR(EINVAL);
+#endif
+    } else {
+        usable_entrypoints = vaapi_encode_entrypoints_normal;
+    }
+
+    desc = av_pix_fmt_desc_get(ctx->input_frames->sw_format);
+    if (!desc) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid input pixfmt (%d).\n",
+               ctx->input_frames->sw_format);
+        return AVERROR(EINVAL);
+    }
+    depth = desc->comp[0].depth;
+    for (i = 1; i < desc->nb_components; i++) {
+        if (desc->comp[i].depth != depth) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid input pixfmt (%s).\n",
+                   desc->name);
+            return AVERROR(EINVAL);
+        }
+    }
+    av_log(avctx, AV_LOG_VERBOSE, "Input surface format is %s.\n",
+           desc->name);
 
     n = vaMaxNumProfiles(ctx->hwctx->display);
-    profiles = av_malloc_array(n, sizeof(VAProfile));
-    if (!profiles) {
+    va_profiles = av_malloc_array(n, sizeof(VAProfile));
+    if (!va_profiles) {
         err = AVERROR(ENOMEM);
         goto fail;
     }
-    vas = vaQueryConfigProfiles(ctx->hwctx->display, profiles, &n);
+    vas = vaQueryConfigProfiles(ctx->hwctx->display, va_profiles, &n);
     if (vas != VA_STATUS_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to query profiles: %d (%s).\n",
+        av_log(avctx, AV_LOG_ERROR, "Failed to query profiles: %d (%s).\n",
                vas, vaErrorStr(vas));
-        err = AVERROR(ENOSYS);
+        err = AVERROR_EXTERNAL;
         goto fail;
     }
-    for (i = 0; i < n; i++) {
-        if (profiles[i] == ctx->va_profile)
-            break;
+
+    av_assert0(ctx->codec->profiles);
+    for (i = 0; (ctx->codec->profiles[i].av_profile !=
+                 FF_PROFILE_UNKNOWN); i++) {
+        profile = &ctx->codec->profiles[i];
+        if (depth               != profile->depth ||
+            desc->nb_components != profile->nb_components)
+            continue;
+        if (desc->nb_components > 1 &&
+            (desc->log2_chroma_w != profile->log2_chroma_w ||
+             desc->log2_chroma_h != profile->log2_chroma_h))
+            continue;
+        if (avctx->profile != profile->av_profile &&
+            avctx->profile != FF_PROFILE_UNKNOWN)
+            continue;
+
+#if VA_CHECK_VERSION(1, 0, 0)
+        profile_string = vaProfileStr(profile->va_profile);
+#else
+        profile_string = "(no profile names)";
+#endif
+
+        for (j = 0; j < n; j++) {
+            if (va_profiles[j] == profile->va_profile)
+                break;
+        }
+        if (j >= n) {
+            av_log(avctx, AV_LOG_VERBOSE, "Matching profile %d is "
+                   "not supported by driver.\n", profile->va_profile);
+            continue;
+        }
+
+        ctx->profile = profile;
+        break;
     }
-    if (i >= n) {
-        av_log(ctx, AV_LOG_ERROR, "Encoding profile not found (%d).\n",
-               ctx->va_profile);
+    if (!ctx->profile) {
+        av_log(avctx, AV_LOG_ERROR, "No usable encoding profile found.\n");
         err = AVERROR(ENOSYS);
         goto fail;
     }
 
+    avctx->profile  = profile->av_profile;
+    ctx->va_profile = profile->va_profile;
+    av_log(avctx, AV_LOG_VERBOSE, "Using VAAPI profile %s (%d).\n",
+           profile_string, ctx->va_profile);
+
     n = vaMaxNumEntrypoints(ctx->hwctx->display);
-    entrypoints = av_malloc_array(n, sizeof(VAEntrypoint));
-    if (!entrypoints) {
+    va_entrypoints = av_malloc_array(n, sizeof(VAEntrypoint));
+    if (!va_entrypoints) {
         err = AVERROR(ENOMEM);
         goto fail;
     }
     vas = vaQueryConfigEntrypoints(ctx->hwctx->display, ctx->va_profile,
-                                   entrypoints, &n);
+                                   va_entrypoints, &n);
     if (vas != VA_STATUS_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to query entrypoints for "
-               "profile %u: %d (%s).\n", ctx->va_profile,
-               vas, vaErrorStr(vas));
-        err = AVERROR(ENOSYS);
+        av_log(avctx, AV_LOG_ERROR, "Failed to query entrypoints for "
+               "profile %s (%d): %d (%s).\n", profile_string,
+               ctx->va_profile, vas, vaErrorStr(vas));
+        err = AVERROR_EXTERNAL;
         goto fail;
     }
+
     for (i = 0; i < n; i++) {
-        if (entrypoints[i] == ctx->va_entrypoint)
+        for (j = 0; usable_entrypoints[j]; j++) {
+            if (va_entrypoints[i] == usable_entrypoints[j])
+                break;
+        }
+        if (usable_entrypoints[j])
             break;
     }
     if (i >= n) {
-        av_log(ctx, AV_LOG_ERROR, "Encoding entrypoint not found "
-               "(%d / %d).\n", ctx->va_profile, ctx->va_entrypoint);
+        av_log(avctx, AV_LOG_ERROR, "No usable encoding entrypoint found "
+               "for profile %s (%d).\n", profile_string, ctx->va_profile);
         err = AVERROR(ENOSYS);
         goto fail;
     }
 
-    vas = vaGetConfigAttributes(ctx->hwctx->display,
-                                ctx->va_profile, ctx->va_entrypoint,
-                                attr, FF_ARRAY_ELEMS(attr));
-    if (vas != VA_STATUS_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "Failed to fetch config "
-               "attributes: %d (%s).\n", vas, vaErrorStr(vas));
-        return AVERROR(EINVAL);
+    ctx->va_entrypoint = va_entrypoints[i];
+#if VA_CHECK_VERSION(1, 0, 0)
+    entrypoint_string = vaEntrypointStr(ctx->va_entrypoint);
+#else
+    entrypoint_string = "(no entrypoint names)";
+#endif
+    av_log(avctx, AV_LOG_VERBOSE, "Using VAAPI entrypoint %s (%d).\n",
+           entrypoint_string, ctx->va_entrypoint);
+
+    for (i = 0; i < FF_ARRAY_ELEMS(vaapi_encode_rt_formats); i++) {
+        rt_format = &vaapi_encode_rt_formats[i];
+        if (rt_format->depth         == depth &&
+            rt_format->nb_components == profile->nb_components &&
+            rt_format->log2_chroma_w == profile->log2_chroma_w &&
+            rt_format->log2_chroma_h == profile->log2_chroma_h)
+            break;
+    }
+    if (i >= FF_ARRAY_ELEMS(vaapi_encode_rt_formats)) {
+        av_log(avctx, AV_LOG_ERROR, "No usable render target format "
+               "found for profile %s (%d) entrypoint %s (%d).\n",
+               profile_string, ctx->va_profile,
+               entrypoint_string, ctx->va_entrypoint);
+        err = AVERROR(ENOSYS);
+        goto fail;
     }
 
-    for (i = 0; i < FF_ARRAY_ELEMS(attr); i++) {
-        if (attr[i].value == VA_ATTRIB_NOT_SUPPORTED) {
-            // Unfortunately we have to treat this as "don't know" and hope
-            // for the best, because the Intel MJPEG encoder returns this
-            // for all the interesting attributes.
-            continue;
-        }
-        switch (attr[i].type) {
-        case VAConfigAttribRTFormat:
-            if (!(ctx->va_rt_format & attr[i].value)) {
-                av_log(avctx, AV_LOG_ERROR, "Surface RT format %#x "
-                       "is not supported (mask %#x).\n",
-                       ctx->va_rt_format, attr[i].value);
-                err = AVERROR(EINVAL);
-                goto fail;
-            }
-            ctx->config_attributes[ctx->nb_config_attributes++] =
-                (VAConfigAttrib) {
-                .type  = VAConfigAttribRTFormat,
-                .value = ctx->va_rt_format,
-            };
-            break;
-        case VAConfigAttribRateControl:
-            // Hack for backward compatibility: CBR was the only
-            // usable RC mode for a long time, so old drivers will
-            // only have it.  Normal default options may now choose
-            // VBR and then fail, however, so override it here with
-            // CBR if that is the only supported mode.
-            if (ctx->va_rc_mode == VA_RC_VBR &&
-                !(attr[i].value & VA_RC_VBR) &&
-                (attr[i].value & VA_RC_CBR)) {
-                av_log(avctx, AV_LOG_WARNING, "VBR rate control is "
-                       "not supported with this driver version; "
-                       "using CBR instead.\n");
-                ctx->va_rc_mode = VA_RC_CBR;
-            }
-            if (!(ctx->va_rc_mode & attr[i].value)) {
-                av_log(avctx, AV_LOG_ERROR, "Rate control mode %#x "
-                       "is not supported (mask: %#x).\n",
-                       ctx->va_rc_mode, attr[i].value);
-                err = AVERROR(EINVAL);
-                goto fail;
-            }
-            ctx->config_attributes[ctx->nb_config_attributes++] =
-                (VAConfigAttrib) {
-                .type  = VAConfigAttribRateControl,
-                .value = ctx->va_rc_mode,
-            };
-            break;
-        case VAConfigAttribEncMaxRefFrames:
-        {
-            unsigned int ref_l0 = attr[i].value & 0xffff;
-            unsigned int ref_l1 = (attr[i].value >> 16) & 0xffff;
+    rt_format_attr = (VAConfigAttrib) { VAConfigAttribRTFormat };
+    vas = vaGetConfigAttributes(ctx->hwctx->display,
+                                ctx->va_profile, ctx->va_entrypoint,
+                                &rt_format_attr, 1);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query RT format "
+               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR_EXTERNAL;
+        goto fail;
+    }
 
-            if (avctx->gop_size > 1 && ref_l0 < 1) {
-                av_log(avctx, AV_LOG_ERROR, "P frames are not "
-                       "supported (%#x).\n", attr[i].value);
-                err = AVERROR(EINVAL);
-                goto fail;
-            }
-            if (avctx->max_b_frames > 0 && ref_l1 < 1) {
-                av_log(avctx, AV_LOG_ERROR, "B frames are not "
-                       "supported (%#x).\n", attr[i].value);
-                err = AVERROR(EINVAL);
-                goto fail;
-            }
-        }
-        break;
-        case VAConfigAttribEncPackedHeaders:
-            if (ctx->va_packed_headers & ~attr[i].value) {
-                // This isn't fatal, but packed headers are always
-                // preferable because they are under our control.
-                // When absent, the driver is generating them and some
-                // features may not work (e.g. VUI or SEI in H.264).
-                av_log(avctx, AV_LOG_WARNING, "Warning: some packed "
-                       "headers are not supported (want %#x, got %#x).\n",
-                       ctx->va_packed_headers, attr[i].value);
-                ctx->va_packed_headers &= attr[i].value;
-            }
-            ctx->config_attributes[ctx->nb_config_attributes++] =
-                (VAConfigAttrib) {
-                .type  = VAConfigAttribEncPackedHeaders,
-                .value = ctx->va_packed_headers,
-            };
-            break;
-        default:
-            av_assert0(0 && "Unexpected config attribute.");
-        }
+    if (rt_format_attr.value == VA_ATTRIB_NOT_SUPPORTED) {
+        av_log(avctx, AV_LOG_VERBOSE, "RT format config attribute not "
+               "supported by driver: assuming surface RT format %s "
+               "is valid.\n", rt_format->name);
+    } else if (!(rt_format_attr.value & rt_format->value)) {
+        av_log(avctx, AV_LOG_ERROR, "Surface RT format %s not supported "
+               "by driver for encoding profile %s (%d) entrypoint %s (%d).\n",
+               rt_format->name, profile_string, ctx->va_profile,
+               entrypoint_string, ctx->va_entrypoint);
+        err = AVERROR(ENOSYS);
+        goto fail;
+    } else {
+        av_log(avctx, AV_LOG_VERBOSE, "Using VAAPI render target "
+               "format %s (%#x).\n", rt_format->name, rt_format->value);
+        ctx->config_attributes[ctx->nb_config_attributes++] =
+            (VAConfigAttrib) {
+            .type  = VAConfigAttribRTFormat,
+            .value = rt_format->value,
+        };
     }
 
     err = 0;
 fail:
-    av_freep(&profiles);
-    av_freep(&entrypoints);
+    av_freep(&va_profiles);
+    av_freep(&va_entrypoints);
     return err;
 }
 
 static av_cold int vaapi_encode_init_rate_control(AVCodecContext *avctx)
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
-    int rc_bits_per_second;
-    int rc_target_percentage;
-    int rc_window_size;
-    int hrd_buffer_size;
-    int hrd_initial_buffer_fullness;
+    int64_t rc_bits_per_second;
+    int     rc_target_percentage;
+    int     rc_window_size;
+    int64_t hrd_buffer_size;
+    int64_t hrd_initial_buffer_fullness;
     int fr_num, fr_den;
+    VAConfigAttrib rc_attr = { VAConfigAttribRateControl };
+    VAStatus vas;
 
-    if (avctx->bit_rate > INT32_MAX) {
-        av_log(avctx, AV_LOG_ERROR, "Target bitrate of 2^31 bps or "
-               "higher is not supported.\n");
+    vas = vaGetConfigAttributes(ctx->hwctx->display,
+                                ctx->va_profile, ctx->va_entrypoint,
+                                &rc_attr, 1);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query rate control "
+               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+
+    if (rc_attr.value == VA_ATTRIB_NOT_SUPPORTED) {
+        av_log(avctx, AV_LOG_VERBOSE, "Driver does not report any "
+               "supported rate control modes: assuming constant-quality.\n");
+        ctx->va_rc_mode = VA_RC_CQP;
+        return 0;
+    }
+    if (avctx->flags & AV_CODEC_FLAG_QSCALE ||
+        avctx->bit_rate <= 0) {
+        if (rc_attr.value & VA_RC_CQP) {
+            av_log(avctx, AV_LOG_VERBOSE, "Using constant-quality mode.\n");
+            ctx->va_rc_mode = VA_RC_CQP;
+            if (avctx->bit_rate > 0 || avctx->rc_max_rate > 0) {
+                av_log(avctx, AV_LOG_WARNING, "Bitrate target parameters "
+                       "ignored in constant-quality mode.\n");
+            }
+            return 0;
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "Driver does not support "
+                   "constant-quality mode (%#x).\n", rc_attr.value);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    if (!(rc_attr.value & (VA_RC_CBR | VA_RC_VBR))) {
+        av_log(avctx, AV_LOG_ERROR, "Driver does not support any "
+               "bitrate-targetted rate control modes.\n");
         return AVERROR(EINVAL);
     }
 
     if (avctx->rc_buffer_size)
         hrd_buffer_size = avctx->rc_buffer_size;
+    else if (avctx->rc_max_rate > 0)
+        hrd_buffer_size = avctx->rc_max_rate;
     else
         hrd_buffer_size = avctx->bit_rate;
-    if (avctx->rc_initial_buffer_occupancy)
+    if (avctx->rc_initial_buffer_occupancy) {
+        if (avctx->rc_initial_buffer_occupancy > hrd_buffer_size) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid RC buffer settings: "
+                   "must have initial buffer size (%d) < "
+                   "buffer size (%"PRId64").\n",
+                   avctx->rc_initial_buffer_occupancy, hrd_buffer_size);
+            return AVERROR(EINVAL);
+        }
         hrd_initial_buffer_fullness = avctx->rc_initial_buffer_occupancy;
-    else
-        hrd_initial_buffer_fullness = hrd_buffer_size * 3 / 4;
-
-    if (ctx->va_rc_mode == VA_RC_CBR) {
-        rc_bits_per_second   = avctx->bit_rate;
-        rc_target_percentage = 100;
-        rc_window_size       = 1000;
     } else {
-        if (avctx->rc_max_rate < avctx->bit_rate) {
-            // Max rate is unset or invalid, just use the normal bitrate.
+        hrd_initial_buffer_fullness = hrd_buffer_size * 3 / 4;
+    }
+
+    if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid bitrate settings: must have "
+               "bitrate (%"PRId64") <= maxrate (%"PRId64").\n",
+               avctx->bit_rate, avctx->rc_max_rate);
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx->rc_max_rate > avctx->bit_rate) {
+        if (!(rc_attr.value & VA_RC_VBR)) {
+            av_log(avctx, AV_LOG_WARNING, "Driver does not support "
+                   "VBR mode (%#x), using CBR mode instead.\n",
+                   rc_attr.value);
+            ctx->va_rc_mode = VA_RC_CBR;
+
             rc_bits_per_second   = avctx->bit_rate;
             rc_target_percentage = 100;
         } else {
+            ctx->va_rc_mode = VA_RC_VBR;
+
             rc_bits_per_second   = avctx->rc_max_rate;
-            rc_target_percentage = (avctx->bit_rate * 100) / rc_bits_per_second;
+            rc_target_percentage = (avctx->bit_rate * 100) /
+                                   avctx->rc_max_rate;
         }
-        rc_window_size = (hrd_buffer_size * 1000) / avctx->bit_rate;
+
+    } else if (avctx->rc_max_rate == avctx->bit_rate) {
+        if (!(rc_attr.value & VA_RC_CBR)) {
+            av_log(avctx, AV_LOG_WARNING, "Driver does not support "
+                   "CBR mode (%#x), using VBR mode instead.\n",
+                   rc_attr.value);
+            ctx->va_rc_mode = VA_RC_VBR;
+        } else {
+            ctx->va_rc_mode = VA_RC_CBR;
+        }
+
+        rc_bits_per_second   = avctx->bit_rate;
+        rc_target_percentage = 100;
+
+    } else {
+        if (rc_attr.value & VA_RC_VBR) {
+            ctx->va_rc_mode = VA_RC_VBR;
+
+            // We only have a target bitrate, but VAAPI requires that a
+            // maximum rate be supplied as well.  Since the user has
+            // offered no particular constraint, arbitrarily pick a
+            // maximum rate of double the target rate.
+            rc_bits_per_second   = 2 * avctx->bit_rate;
+            rc_target_percentage = 50;
+        } else {
+            ctx->va_rc_mode = VA_RC_CBR;
+
+            rc_bits_per_second   = avctx->bit_rate;
+            rc_target_percentage = 100;
+        }
     }
 
+    rc_window_size = (hrd_buffer_size * 1000) / rc_bits_per_second;
+
+    av_log(avctx, AV_LOG_VERBOSE, "RC mode: %s, %d%% of %"PRId64" bps "
+           "over %d ms.\n", ctx->va_rc_mode == VA_RC_VBR ? "VBR" : "CBR",
+           rc_target_percentage, rc_bits_per_second, rc_window_size);
+    av_log(avctx, AV_LOG_VERBOSE, "RC buffer: %"PRId64" bits, "
+           "initial fullness %"PRId64" bits.\n",
+           hrd_buffer_size, hrd_initial_buffer_fullness);
+
+    if (rc_bits_per_second          > UINT32_MAX ||
+        hrd_buffer_size             > UINT32_MAX ||
+        hrd_initial_buffer_fullness > UINT32_MAX) {
+        av_log(avctx, AV_LOG_ERROR, "RC parameters of 2^32 or "
+               "greater are not supported by VAAPI.\n");
+        return AVERROR(EINVAL);
+    }
+
+    ctx->va_bit_rate = rc_bits_per_second;
+
+    ctx->config_attributes[ctx->nb_config_attributes++] =
+        (VAConfigAttrib) {
+        .type  = VAConfigAttribRateControl,
+        .value = ctx->va_rc_mode,
+    };
+
     ctx->rc_params.misc.type = VAEncMiscParameterTypeRateControl;
     ctx->rc_params.rc = (VAEncMiscParameterRateControl) {
         .bits_per_second   = rc_bits_per_second,
@@ -1177,21 +1363,20 @@
         .initial_qp        = 0,
         .min_qp            = (avctx->qmin > 0 ? avctx->qmin : 0),
         .basic_unit_size   = 0,
+#if VA_CHECK_VERSION(1, 1, 0)
+        .max_qp            = (avctx->qmax > 0 ? avctx->qmax : 0),
+#endif
     };
-    ctx->global_params[ctx->nb_global_params] =
-        &ctx->rc_params.misc;
-    ctx->global_params_size[ctx->nb_global_params++] =
-        sizeof(ctx->rc_params);
+    vaapi_encode_add_global_param(avctx, &ctx->rc_params.misc,
+                                  sizeof(ctx->rc_params));
 
     ctx->hrd_params.misc.type = VAEncMiscParameterTypeHRD;
     ctx->hrd_params.hrd = (VAEncMiscParameterHRD) {
         .initial_buffer_fullness = hrd_initial_buffer_fullness,
         .buffer_size             = hrd_buffer_size,
     };
-    ctx->global_params[ctx->nb_global_params] =
-        &ctx->hrd_params.misc;
-    ctx->global_params_size[ctx->nb_global_params++] =
-        sizeof(ctx->hrd_params);
+    vaapi_encode_add_global_param(avctx, &ctx->hrd_params.misc,
+                                  sizeof(ctx->hrd_params));
 
     if (avctx->framerate.num > 0 && avctx->framerate.den > 0)
         av_reduce(&fr_num, &fr_den,
@@ -1204,10 +1389,161 @@
     ctx->fr_params.fr.framerate = (unsigned int)fr_den << 16 | fr_num;
 
 #if VA_CHECK_VERSION(0, 40, 0)
-    ctx->global_params[ctx->nb_global_params] =
-        &ctx->fr_params.misc;
-    ctx->global_params_size[ctx->nb_global_params++] =
-        sizeof(ctx->fr_params);
+    vaapi_encode_add_global_param(avctx, &ctx->fr_params.misc,
+                                  sizeof(ctx->fr_params));
+#endif
+
+    return 0;
+}
+
+static av_cold int vaapi_encode_init_gop_structure(AVCodecContext *avctx)
+{
+    VAAPIEncodeContext *ctx = avctx->priv_data;
+    VAStatus vas;
+    VAConfigAttrib attr = { VAConfigAttribEncMaxRefFrames };
+    uint32_t ref_l0, ref_l1;
+
+    vas = vaGetConfigAttributes(ctx->hwctx->display,
+                                ctx->va_profile,
+                                ctx->va_entrypoint,
+                                &attr, 1);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query reference frames "
+               "attribute: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+
+    if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
+        ref_l0 = ref_l1 = 0;
+    } else {
+        ref_l0 = attr.value       & 0xffff;
+        ref_l1 = attr.value >> 16 & 0xffff;
+    }
+
+    if (avctx->gop_size <= 1) {
+        av_log(avctx, AV_LOG_VERBOSE, "Using intra frames only.\n");
+        ctx->gop_size = 1;
+    } else if (ref_l0 < 1) {
+        av_log(avctx, AV_LOG_ERROR, "Driver does not support any "
+               "reference frames.\n");
+        return AVERROR(EINVAL);
+    } else if (ref_l1 < 1 || avctx->max_b_frames < 1) {
+        av_log(avctx, AV_LOG_VERBOSE, "Using intra and P-frames "
+               "(supported references: %d / %d).\n", ref_l0, ref_l1);
+        ctx->gop_size = avctx->gop_size;
+        ctx->p_per_i  = INT_MAX;
+        ctx->b_per_p  = 0;
+    } else {
+        av_log(avctx, AV_LOG_VERBOSE, "Using intra, P- and B-frames "
+               "(supported references: %d / %d).\n", ref_l0, ref_l1);
+        ctx->gop_size = avctx->gop_size;
+        ctx->p_per_i  = INT_MAX;
+        ctx->b_per_p  = avctx->max_b_frames;
+    }
+
+    return 0;
+}
+
+static av_cold int vaapi_encode_init_packed_headers(AVCodecContext *avctx)
+{
+    VAAPIEncodeContext *ctx = avctx->priv_data;
+    VAStatus vas;
+    VAConfigAttrib attr = { VAConfigAttribEncPackedHeaders };
+
+    vas = vaGetConfigAttributes(ctx->hwctx->display,
+                                ctx->va_profile,
+                                ctx->va_entrypoint,
+                                &attr, 1);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query packed headers "
+               "attribute: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+
+    if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
+        if (ctx->desired_packed_headers) {
+            av_log(avctx, AV_LOG_WARNING, "Driver does not support any "
+                   "packed headers (wanted %#x).\n",
+                   ctx->desired_packed_headers);
+        } else {
+            av_log(avctx, AV_LOG_VERBOSE, "Driver does not support any "
+                   "packed headers (none wanted).\n");
+        }
+        ctx->va_packed_headers = 0;
+    } else {
+        if (ctx->desired_packed_headers & ~attr.value) {
+            av_log(avctx, AV_LOG_WARNING, "Driver does not support some "
+                   "wanted packed headers (wanted %#x, found %#x).\n",
+                   ctx->desired_packed_headers, attr.value);
+        } else {
+            av_log(avctx, AV_LOG_VERBOSE, "All wanted packed headers "
+                   "available (wanted %#x, found %#x).\n",
+                   ctx->desired_packed_headers, attr.value);
+        }
+        ctx->va_packed_headers = ctx->desired_packed_headers & attr.value;
+    }
+
+    if (ctx->va_packed_headers) {
+        ctx->config_attributes[ctx->nb_config_attributes++] =
+            (VAConfigAttrib) {
+            .type  = VAConfigAttribEncPackedHeaders,
+            .value = ctx->va_packed_headers,
+        };
+    }
+
+    if ( (ctx->desired_packed_headers & VA_ENC_PACKED_HEADER_SEQUENCE) &&
+        !(ctx->va_packed_headers      & VA_ENC_PACKED_HEADER_SEQUENCE) &&
+         (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) {
+        av_log(avctx, AV_LOG_WARNING, "Driver does not support packed "
+               "sequence headers, but a global header is requested.\n");
+        av_log(avctx, AV_LOG_WARNING, "No global header will be written: "
+               "this may result in a stream which is not usable for some "
+               "purposes (e.g. not muxable to some containers).\n");
+    }
+
+    return 0;
+}
+
+static av_cold int vaapi_encode_init_quality(AVCodecContext *avctx)
+{
+#if VA_CHECK_VERSION(0, 36, 0)
+    VAAPIEncodeContext *ctx = avctx->priv_data;
+    VAStatus vas;
+    VAConfigAttrib attr = { VAConfigAttribEncQualityRange };
+    int quality = avctx->compression_level;
+
+    vas = vaGetConfigAttributes(ctx->hwctx->display,
+                                ctx->va_profile,
+                                ctx->va_entrypoint,
+                                &attr, 1);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query quality "
+               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+
+    if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
+        if (quality != 0) {
+            av_log(avctx, AV_LOG_WARNING, "Quality attribute is not "
+                   "supported: will use default quality level.\n");
+        }
+    } else {
+        if (quality > attr.value) {
+            av_log(avctx, AV_LOG_WARNING, "Invalid quality level: "
+                   "valid range is 0-%d, using %d.\n",
+                   attr.value, attr.value);
+            quality = attr.value;
+        }
+
+        ctx->quality_params.misc.type = VAEncMiscParameterTypeQualityLevel;
+        ctx->quality_params.quality.quality_level = quality;
+
+        vaapi_encode_add_global_param(avctx, &ctx->quality_params.misc,
+                                      sizeof(ctx->quality_params));
+    }
+#else
+    av_log(avctx, AV_LOG_WARNING, "The encode quality option is "
+           "not supported with this VAAPI version.\n");
 #endif
 
     return 0;
@@ -1339,7 +1675,7 @@
     ctx->recon_frames->height    = ctx->surface_height;
     // At most three IDR/I/P frames and two runs of B frames can be in
     // flight at any one time.
-    ctx->recon_frames->initial_pool_size = 3 + 2 * avctx->max_b_frames;
+    ctx->recon_frames->initial_pool_size = 3 + 2 * ctx->b_per_p;
 
     err = av_hwframe_ctx_init(ctx->recon_frames_ref);
     if (err < 0) {
@@ -1368,17 +1704,9 @@
         return AVERROR(EINVAL);
     }
 
-    ctx->codec_options = ctx->codec_options_data;
-
     ctx->va_config  = VA_INVALID_ID;
     ctx->va_context = VA_INVALID_ID;
 
-    ctx->priv_data = av_mallocz(ctx->codec->priv_data_size);
-    if (!ctx->priv_data) {
-        err = AVERROR(ENOMEM);
-        goto fail;
-    }
-
     ctx->input_frames_ref = av_buffer_ref(avctx->hw_frames_ctx);
     if (!ctx->input_frames_ref) {
         err = AVERROR(ENOMEM);
@@ -1394,10 +1722,28 @@
     ctx->device = (AVHWDeviceContext*)ctx->device_ref->data;
     ctx->hwctx = ctx->device->hwctx;
 
-    err = vaapi_encode_config_attributes(avctx);
+    err = vaapi_encode_profile_entrypoint(avctx);
     if (err < 0)
         goto fail;
 
+    err = vaapi_encode_init_rate_control(avctx);
+    if (err < 0)
+        goto fail;
+
+    err = vaapi_encode_init_gop_structure(avctx);
+    if (err < 0)
+        goto fail;
+
+    err = vaapi_encode_init_packed_headers(avctx);
+    if (err < 0)
+        goto fail;
+
+    if (avctx->compression_level >= 0) {
+        err = vaapi_encode_init_quality(avctx);
+        if (err < 0)
+            goto fail;
+    }
+
     vas = vaCreateConfig(ctx->hwctx->display,
                          ctx->va_profile, ctx->va_entrypoint,
                          ctx->config_attributes, ctx->nb_config_attributes,
@@ -1435,62 +1781,17 @@
         goto fail;
     }
 
-    if (ctx->va_rc_mode & ~VA_RC_CQP) {
-        err = vaapi_encode_init_rate_control(avctx);
-        if (err < 0)
-            goto fail;
-    }
-
     if (ctx->codec->configure) {
         err = ctx->codec->configure(avctx);
         if (err < 0)
             goto fail;
     }
 
-    if (avctx->compression_level >= 0) {
-#if VA_CHECK_VERSION(0, 36, 0)
-        VAConfigAttrib attr = { VAConfigAttribEncQualityRange };
-
-        vas = vaGetConfigAttributes(ctx->hwctx->display,
-                                    ctx->va_profile,
-                                    ctx->va_entrypoint,
-                                    &attr, 1);
-        if (vas != VA_STATUS_SUCCESS) {
-            av_log(avctx, AV_LOG_WARNING, "Failed to query quality "
-                   "attribute: will use default compression level.\n");
-        } else {
-            if (avctx->compression_level > attr.value) {
-                av_log(avctx, AV_LOG_WARNING, "Invalid compression "
-                       "level: valid range is 0-%d, using %d.\n",
-                       attr.value, attr.value);
-                avctx->compression_level = attr.value;
-            }
-
-            ctx->quality_params.misc.type =
-                VAEncMiscParameterTypeQualityLevel;
-            ctx->quality_params.quality.quality_level =
-                avctx->compression_level;
-
-            ctx->global_params[ctx->nb_global_params] =
-                &ctx->quality_params.misc;
-            ctx->global_params_size[ctx->nb_global_params++] =
-                sizeof(ctx->quality_params);
-        }
-#else
-        av_log(avctx, AV_LOG_WARNING, "The encode compression level "
-               "option is not supported with this VAAPI version.\n");
-#endif
-    }
-
     ctx->input_order  = 0;
-    ctx->output_delay = avctx->max_b_frames;
+    ctx->output_delay = ctx->b_per_p;
     ctx->decode_delay = 1;
     ctx->output_order = - ctx->output_delay - 1;
 
-    // Currently we never generate I frames, only IDR.
-    ctx->p_per_i = INT_MAX;
-    ctx->b_per_p = avctx->max_b_frames;
-
     if (ctx->codec->sequence_params_size > 0) {
         ctx->codec_sequence_params =
             av_mallocz(ctx->codec->sequence_params_size);
@@ -1522,7 +1823,8 @@
     ctx->issue_mode = ISSUE_MODE_MAXIMISE_THROUGHPUT;
 
     if (ctx->va_packed_headers & VA_ENC_PACKED_HEADER_SEQUENCE &&
-        ctx->codec->write_sequence_header) {
+        ctx->codec->write_sequence_header &&
+        avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
         char data[MAX_PARAM_BUFFER_SIZE];
         size_t bit_len = 8 * sizeof(data);
 
@@ -1560,6 +1862,8 @@
         vaapi_encode_free(avctx, pic);
     }
 
+    av_buffer_pool_uninit(&ctx->output_buffer_pool);
+
     if (ctx->va_context != VA_INVALID_ID) {
         vaDestroyContext(ctx->hwctx->display, ctx->va_context);
         ctx->va_context = VA_INVALID_ID;
@@ -1570,8 +1874,6 @@
         ctx->va_config = VA_INVALID_ID;
     }
 
-    av_buffer_pool_uninit(&ctx->output_buffer_pool);
-
     av_freep(&ctx->codec_sequence_params);
     av_freep(&ctx->codec_picture_params);
 
@@ -1579,7 +1881,5 @@
     av_buffer_unref(&ctx->input_frames_ref);
     av_buffer_unref(&ctx->device_ref);
 
-    av_freep(&ctx->priv_data);
-
     return 0;
 }

diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
index bcb9d57..091889f 100644
--- a/libavcodec/vaapi_encode.h
+++ b/libavcodec/vaapi_encode.h

@@ -23,6 +23,10 @@
 
 #include <va/va.h>
 
+#if VA_CHECK_VERSION(1, 0, 0)
+#include <va/va_str.h>
+#endif
+
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_vaapi.h"
 
@@ -86,23 +90,34 @@
     VAAPIEncodeSlice *slices;
 } VAAPIEncodePicture;
 
+typedef struct VAAPIEncodeProfile {
+    // lavc profile value (FF_PROFILE_*).
+    int       av_profile;
+    // Supported bit depth.
+    int       depth;
+    // Number of components.
+    int       nb_components;
+    // Chroma subsampling in width dimension.
+    int       log2_chroma_w;
+    // Chroma subsampling in height dimension.
+    int       log2_chroma_h;
+    // VAAPI profile value.
+    VAProfile va_profile;
+} VAAPIEncodeProfile;
+
 typedef struct VAAPIEncodeContext {
     const AVClass *class;
 
     // Codec-specific hooks.
     const struct VAAPIEncodeType *codec;
 
-    // Encoding profile (VAProfileXXX).
-    VAProfile       va_profile;
-    // Encoding entrypoint (usually VAEntryointEncSlice).
-    VAEntrypoint    va_entrypoint;
-    // Surface colour/sampling format (usually VA_RT_FORMAT_YUV420).
-    unsigned int    va_rt_format;
-    // Rate control mode.
-    unsigned int    va_rc_mode;
-    // Supported packed headers (initially the desired set, modified
-    // later to what is actually supported).
-    unsigned int    va_packed_headers;
+    // Global options.
+
+    // Use low power encoding mode.
+    int             low_power;
+
+    // Desired packed headers.
+    unsigned int    desired_packed_headers;
 
     // The required size of surfaces.  This is probably the input
     // size (AVCodecContext.width|height) aligned up to whatever
@@ -113,8 +128,19 @@
     // Everything above this point must be set before calling
     // ff_vaapi_encode_init().
 
-    // Codec-specific state.
-    void *priv_data;
+    // Chosen encoding profile details.
+    const VAAPIEncodeProfile *profile;
+
+    // Encoding profile (VAProfile*).
+    VAProfile       va_profile;
+    // Encoding entrypoint (VAEntryoint*).
+    VAEntrypoint    va_entrypoint;
+    // Rate control mode.
+    unsigned int    va_rc_mode;
+    // Bitrate for codec-specific encoder parameters.
+    unsigned int    va_bit_rate;
+    // Packed headers which will actually be sent.
+    unsigned int    va_packed_headers;
 
     // Configuration attributes to use when creating va_config.
     VAConfigAttrib  config_attributes[MAX_CONFIG_ATTRIBUTES];
@@ -199,23 +225,19 @@
     int64_t         ts_ring[MAX_REORDER_DELAY * 3];
 
     // Frame type decision.
+    int gop_size;
     int p_per_i;
     int b_per_p;
     int force_idr;
     int gop_counter;
     int p_counter;
     int end_of_stream;
-
-    // Codec-local options are allocated to follow this structure in
-    // memory (in the AVCodec definition, set priv_data_size to
-    // sizeof(VAAPIEncodeContext) + sizeof(VAAPIEncodeFooOptions)).
-    void *codec_options;
-    char codec_options_data[0];
 } VAAPIEncodeContext;
 
-
 typedef struct VAAPIEncodeType {
-    size_t priv_data_size;
+    // List of supported profiles and corresponding VAAPI profiles.
+    // (Must end with FF_PROFILE_UNKNOWN.)
+    const VAAPIEncodeProfile *profiles;
 
     // Perform any extra codec-specific configuration after the
     // codec context is initialised (set up the private data and
@@ -280,4 +302,13 @@
 int ff_vaapi_encode_init(AVCodecContext *avctx);
 int ff_vaapi_encode_close(AVCodecContext *avctx);
 
+
+#define VAAPI_ENCODE_COMMON_OPTIONS \
+    { "low_power", \
+      "Use low-power encoding mode (only available on some platforms; " \
+      "may not support all encoding features)", \
+      OFFSET(common.low_power), AV_OPT_TYPE_BOOL, \
+      { .i64 = 0 }, 0, 1, FLAGS }
+
+
 #endif /* AVCODEC_VAAPI_ENCODE_H */

diff --git a/libavcodec/vaapi_encode_h264.c b/libavcodec/vaapi_encode_h264.c
index efde80b..8feae0d 100644
--- a/libavcodec/vaapi_encode_h264.c
+++ b/libavcodec/vaapi_encode_h264.c

@@ -16,128 +16,50 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <string.h>
+
 #include <va/va.h>
 #include <va/va_enc_h264.h>
 
 #include "libavutil/avassert.h"
+#include "libavutil/common.h"
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
-#include "libavutil/pixfmt.h"
 
 #include "avcodec.h"
+#include "cbs.h"
+#include "cbs_h264.h"
 #include "h264.h"
+#include "h264_levels.h"
 #include "h264_sei.h"
 #include "internal.h"
 #include "vaapi_encode.h"
-#include "vaapi_encode_h26x.h"
 
 enum {
-    SLICE_TYPE_P  = 0,
-    SLICE_TYPE_B  = 1,
-    SLICE_TYPE_I  = 2,
-    SLICE_TYPE_SP = 3,
-    SLICE_TYPE_SI = 4,
+    SEI_TIMING         = 0x01,
+    SEI_IDENTIFIER     = 0x02,
+    SEI_RECOVERY_POINT = 0x04,
 };
 
-// This structure contains all possibly-useful per-sequence syntax elements
-// which are not already contained in the various VAAPI structures.
-typedef struct VAAPIEncodeH264MiscSequenceParams {
-    unsigned int profile_idc;
-    char constraint_set0_flag;
-    char constraint_set1_flag;
-    char constraint_set2_flag;
-    char constraint_set3_flag;
-    char constraint_set4_flag;
-    char constraint_set5_flag;
-
-    char separate_colour_plane_flag;
-    char qpprime_y_zero_transform_bypass_flag;
-
-    char gaps_in_frame_num_allowed_flag;
-    char delta_pic_order_always_zero_flag;
-    char bottom_field_pic_order_in_frame_present_flag;
-
-    unsigned int num_slice_groups_minus1;
-    unsigned int slice_group_map_type;
-
-    int pic_init_qs_minus26;
-
-    char overscan_info_present_flag;
-    char overscan_appropriate_flag;
-
-    char video_signal_type_present_flag;
-    unsigned int video_format;
-    char video_full_range_flag;
-    char colour_description_present_flag;
-    unsigned int colour_primaries;
-    unsigned int transfer_characteristics;
-    unsigned int matrix_coefficients;
-
-    char chroma_loc_info_present_flag;
-    unsigned int chroma_sample_loc_type_top_field;
-    unsigned int chroma_sample_loc_type_bottom_field;
-
-    // Some timing elements are in VAEncSequenceParameterBufferH264.
-    char fixed_frame_rate_flag;
-
-    char nal_hrd_parameters_present_flag;
-    char vcl_hrd_parameters_present_flag;
-    char low_delay_hrd_flag;
-    char pic_struct_present_flag;
-
-    char motion_vectors_over_pic_boundaries_flag;
-    unsigned int max_bytes_per_pic_denom;
-    unsigned int max_bits_per_mb_denom;
-    unsigned int max_num_reorder_frames;
-    unsigned int max_dec_pic_buffering;
-
-    unsigned int cpb_cnt_minus1;
-    unsigned int bit_rate_scale;
-    unsigned int cpb_size_scale;
-    unsigned int bit_rate_value_minus1[32];
-    unsigned int cpb_size_value_minus1[32];
-    char cbr_flag[32];
-    unsigned int initial_cpb_removal_delay_length_minus1;
-    unsigned int cpb_removal_delay_length_minus1;
-    unsigned int dpb_output_delay_length_minus1;
-    unsigned int time_offset_length;
-
-    unsigned int initial_cpb_removal_delay;
-    unsigned int initial_cpb_removal_delay_offset;
-
-    unsigned int pic_struct;
-} VAAPIEncodeH264MiscSequenceParams;
-
-// This structure contains all possibly-useful per-slice syntax elements
-// which are not already contained in the various VAAPI structures.
-typedef struct VAAPIEncodeH264MiscSliceParams {
-    unsigned int nal_unit_type;
-    unsigned int nal_ref_idc;
-
-    unsigned int colour_plane_id;
-    char field_pic_flag;
-    char bottom_field_flag;
-
-    unsigned int redundant_pic_cnt;
-
-    char sp_for_switch_flag;
-    int slice_qs_delta;
-
-    char ref_pic_list_modification_flag_l0;
-    char ref_pic_list_modification_flag_l1;
-
-    char no_output_of_prior_pics_flag;
-    char long_term_reference_flag;
-    char adaptive_ref_pic_marking_mode_flag;
-} VAAPIEncodeH264MiscSliceParams;
-
-typedef struct VAAPIEncodeH264Slice {
-    VAAPIEncodeH264MiscSliceParams misc_slice_params;
-} VAAPIEncodeH264Slice;
+// Random (version 4) ISO 11578 UUID.
+static const uint8_t vaapi_encode_h264_sei_identifier_uuid[16] = {
+    0x59, 0x94, 0x8b, 0x28, 0x11, 0xec, 0x45, 0xaf,
+    0x96, 0x75, 0x19, 0xd4, 0x1f, 0xea, 0xa9, 0x4d,
+};
 
 typedef struct VAAPIEncodeH264Context {
-    VAAPIEncodeH264MiscSequenceParams misc_sequence_params;
+    VAAPIEncodeContext common;
 
+    // User options.
+    int qp;
+    int quality;
+    int coder;
+    int aud;
+    int sei;
+    int profile;
+    int level;
+
+    // Derived settings.
     int mb_width;
     int mb_height;
 
@@ -145,584 +67,112 @@
     int fixed_qp_p;
     int fixed_qp_b;
 
+    // Stream state.
+    int frame_num;
+    int pic_order_cnt;
     int next_frame_num;
     int64_t last_idr_frame;
     int64_t idr_pic_count;
 
+    int primary_pic_type;
+    int slice_type;
+
     int cpb_delay;
     int dpb_delay;
 
-    // Rate control configuration.
-    int send_timing_sei;
+    // Writer structures.
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment current_access_unit;
+
+    H264RawAUD   raw_aud;
+    H264RawSPS   raw_sps;
+    H264RawPPS   raw_pps;
+    H264RawSEI   raw_sei;
+    H264RawSlice raw_slice;
+
+    H264RawSEIBufferingPeriod      sei_buffering_period;
+    H264RawSEIPicTiming            sei_pic_timing;
+    H264RawSEIRecoveryPoint        sei_recovery_point;
+    H264RawSEIUserDataUnregistered sei_identifier;
+    char                          *sei_identifier_string;
+
+    int aud_needed;
+    int sei_needed;
+    int sei_cbr_workaround_needed;
 } VAAPIEncodeH264Context;
 
-typedef struct VAAPIEncodeH264Options {
-    int qp;
-    int quality;
-    int low_power;
-    // Entropy encoder type.
-    int coder;
-} VAAPIEncodeH264Options;
 
-
-#define vseq_var(name)     vseq->name, name
-#define vseq_field(name)   vseq->seq_fields.bits.name, name
-#define vvui_field(name)   vseq->vui_fields.bits.name, name
-#define vpic_var(name)     vpic->name, name
-#define vpic_field(name)   vpic->pic_fields.bits.name, name
-#define vslice_var(name)   vslice->name, name
-#define vslice_field(name) vslice->slice_fields.bits.name, name
-#define mseq_var(name)     mseq->name, name
-#define mslice_var(name)   mslice->name, name
-
-static void vaapi_encode_h264_write_nal_header(PutBitContext *pbc,
-                                               int nal_unit_type, int nal_ref_idc)
+static int vaapi_encode_h264_write_access_unit(AVCodecContext *avctx,
+                                               char *data, size_t *data_len,
+                                               CodedBitstreamFragment *au)
 {
-    u(1, 0, forbidden_zero_bit);
-    u(2, nal_ref_idc, nal_ref_idc);
-    u(5, nal_unit_type, nal_unit_type);
+    VAAPIEncodeH264Context *priv = avctx->priv_data;
+    int err;
+
+    err = ff_cbs_write_fragment_data(priv->cbc, au);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n");
+        return err;
+    }
+
+    if (*data_len < 8 * au->data_size - au->data_bit_padding) {
+        av_log(avctx, AV_LOG_ERROR, "Access unit too large: "
+               "%zu < %zu.\n", *data_len,
+               8 * au->data_size - au->data_bit_padding);
+        return AVERROR(ENOSPC);
+    }
+
+    memcpy(data, au->data, au->data_size);
+    *data_len = 8 * au->data_size - au->data_bit_padding;
+
+    return 0;
 }
 
-static void vaapi_encode_h264_write_trailing_rbsp(PutBitContext *pbc)
+static int vaapi_encode_h264_add_nal(AVCodecContext *avctx,
+                                     CodedBitstreamFragment *au,
+                                     void *nal_unit)
 {
-    u(1, 1, rbsp_stop_one_bit);
-    while (put_bits_count(pbc) & 7)
-        u(1, 0, rbsp_alignment_zero_bit);
-}
+    VAAPIEncodeH264Context *priv = avctx->priv_data;
+    H264RawNALUnitHeader *header = nal_unit;
+    int err;
 
-static void vaapi_encode_h264_write_vui(PutBitContext *pbc,
-                                        VAAPIEncodeContext *ctx)
-{
-    VAEncSequenceParameterBufferH264  *vseq = ctx->codec_sequence_params;
-    VAAPIEncodeH264Context            *priv = ctx->priv_data;
-    VAAPIEncodeH264MiscSequenceParams *mseq = &priv->misc_sequence_params;
-    int i;
-
-    u(1, vvui_field(aspect_ratio_info_present_flag));
-    if (vseq->vui_fields.bits.aspect_ratio_info_present_flag) {
-        u(8, vseq_var(aspect_ratio_idc));
-        if (vseq->aspect_ratio_idc == 255) {
-            u(16, vseq_var(sar_width));
-            u(16, vseq_var(sar_height));
-        }
+    err = ff_cbs_insert_unit_content(priv->cbc, au, -1,
+                                     header->nal_unit_type, nal_unit, NULL);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to add NAL unit: "
+               "type = %d.\n", header->nal_unit_type);
+        return err;
     }
 
-    u(1, mseq_var(overscan_info_present_flag));
-    if (mseq->overscan_info_present_flag)
-        u(1, mseq_var(overscan_appropriate_flag));
-
-    u(1, mseq_var(video_signal_type_present_flag));
-    if (mseq->video_signal_type_present_flag) {
-        u(3, mseq_var(video_format));
-        u(1, mseq_var(video_full_range_flag));
-        u(1, mseq_var(colour_description_present_flag));
-        if (mseq->colour_description_present_flag) {
-            u(8, mseq_var(colour_primaries));
-            u(8, mseq_var(transfer_characteristics));
-            u(8, mseq_var(matrix_coefficients));
-        }
-    }
-
-    u(1, mseq_var(chroma_loc_info_present_flag));
-    if (mseq->chroma_loc_info_present_flag) {
-        ue(mseq_var(chroma_sample_loc_type_top_field));
-        ue(mseq_var(chroma_sample_loc_type_bottom_field));
-    }
-
-    u(1, vvui_field(timing_info_present_flag));
-    if (vseq->vui_fields.bits.timing_info_present_flag) {
-        u(32, vseq_var(num_units_in_tick));
-        u(32, vseq_var(time_scale));
-        u(1, mseq_var(fixed_frame_rate_flag));
-    }
-
-    u(1, mseq_var(nal_hrd_parameters_present_flag));
-    if (mseq->nal_hrd_parameters_present_flag) {
-        ue(mseq_var(cpb_cnt_minus1));
-        u(4, mseq_var(bit_rate_scale));
-        u(4, mseq_var(cpb_size_scale));
-        for (i = 0; i <= mseq->cpb_cnt_minus1; i++) {
-            ue(mseq_var(bit_rate_value_minus1[i]));
-            ue(mseq_var(cpb_size_value_minus1[i]));
-            u(1, mseq_var(cbr_flag[i]));
-        }
-        u(5, mseq_var(initial_cpb_removal_delay_length_minus1));
-        u(5, mseq_var(cpb_removal_delay_length_minus1));
-        u(5, mseq_var(dpb_output_delay_length_minus1));
-        u(5, mseq_var(time_offset_length));
-    }
-    u(1, mseq_var(vcl_hrd_parameters_present_flag));
-    if (mseq->vcl_hrd_parameters_present_flag) {
-        av_assert0(0 && "vcl hrd parameters not supported");
-    }
-
-    if (mseq->nal_hrd_parameters_present_flag ||
-        mseq->vcl_hrd_parameters_present_flag)
-        u(1, mseq_var(low_delay_hrd_flag));
-    u(1, mseq_var(pic_struct_present_flag));
-
-    u(1, vvui_field(bitstream_restriction_flag));
-    if (vseq->vui_fields.bits.bitstream_restriction_flag) {
-        u(1, mseq_var(motion_vectors_over_pic_boundaries_flag));
-        ue(mseq_var(max_bytes_per_pic_denom));
-        ue(mseq_var(max_bits_per_mb_denom));
-        ue(vvui_field(log2_max_mv_length_horizontal));
-        ue(vvui_field(log2_max_mv_length_vertical));
-        ue(mseq_var(max_num_reorder_frames));
-        ue(mseq_var(max_dec_pic_buffering));
-    }
-}
-
-static void vaapi_encode_h264_write_sps(PutBitContext *pbc,
-                                        VAAPIEncodeContext *ctx)
-{
-    VAEncSequenceParameterBufferH264  *vseq = ctx->codec_sequence_params;
-    VAAPIEncodeH264Context            *priv = ctx->priv_data;
-    VAAPIEncodeH264MiscSequenceParams *mseq = &priv->misc_sequence_params;
-    int i;
-
-    vaapi_encode_h264_write_nal_header(pbc, H264_NAL_SPS, 3);
-
-    u(8, mseq_var(profile_idc));
-    u(1, mseq_var(constraint_set0_flag));
-    u(1, mseq_var(constraint_set1_flag));
-    u(1, mseq_var(constraint_set2_flag));
-    u(1, mseq_var(constraint_set3_flag));
-    u(1, mseq_var(constraint_set4_flag));
-    u(1, mseq_var(constraint_set5_flag));
-    u(2, 0, reserved_zero_2bits);
-
-    u(8, vseq_var(level_idc));
-
-    ue(vseq_var(seq_parameter_set_id));
-
-    if (mseq->profile_idc == 100 || mseq->profile_idc == 110 ||
-        mseq->profile_idc == 122 || mseq->profile_idc == 244 ||
-        mseq->profile_idc ==  44 || mseq->profile_idc ==  83 ||
-        mseq->profile_idc ==  86 || mseq->profile_idc == 118 ||
-        mseq->profile_idc == 128 || mseq->profile_idc == 138) {
-        ue(vseq_field(chroma_format_idc));
-
-        if (vseq->seq_fields.bits.chroma_format_idc == 3)
-            u(1, mseq_var(separate_colour_plane_flag));
-
-        ue(vseq_var(bit_depth_luma_minus8));
-        ue(vseq_var(bit_depth_chroma_minus8));
-
-        u(1, mseq_var(qpprime_y_zero_transform_bypass_flag));
-
-        u(1, vseq_field(seq_scaling_matrix_present_flag));
-        if (vseq->seq_fields.bits.seq_scaling_matrix_present_flag) {
-            av_assert0(0 && "scaling matrices not supported");
-        }
-    }
-
-    ue(vseq_field(log2_max_frame_num_minus4));
-    ue(vseq_field(pic_order_cnt_type));
-
-    if (vseq->seq_fields.bits.pic_order_cnt_type == 0) {
-        ue(vseq_field(log2_max_pic_order_cnt_lsb_minus4));
-    } else if (vseq->seq_fields.bits.pic_order_cnt_type == 1) {
-        u(1, mseq_var(delta_pic_order_always_zero_flag));
-        se(vseq_var(offset_for_non_ref_pic));
-        se(vseq_var(offset_for_top_to_bottom_field));
-        ue(vseq_var(num_ref_frames_in_pic_order_cnt_cycle));
-
-        for (i = 0; i < vseq->num_ref_frames_in_pic_order_cnt_cycle; i++)
-            se(vseq_var(offset_for_ref_frame[i]));
-    }
-
-    ue(vseq_var(max_num_ref_frames));
-    u(1, mseq_var(gaps_in_frame_num_allowed_flag));
-
-    ue(vseq->picture_width_in_mbs  - 1, pic_width_in_mbs_minus1);
-    ue(vseq->picture_height_in_mbs - 1, pic_height_in_mbs_minus1);
-
-    u(1, vseq_field(frame_mbs_only_flag));
-    if (!vseq->seq_fields.bits.frame_mbs_only_flag)
-        u(1, vseq_field(mb_adaptive_frame_field_flag));
-
-    u(1, vseq_field(direct_8x8_inference_flag));
-
-    u(1, vseq_var(frame_cropping_flag));
-    if (vseq->frame_cropping_flag) {
-        ue(vseq_var(frame_crop_left_offset));
-        ue(vseq_var(frame_crop_right_offset));
-        ue(vseq_var(frame_crop_top_offset));
-        ue(vseq_var(frame_crop_bottom_offset));
-    }
-
-    u(1, vseq_var(vui_parameters_present_flag));
-    if (vseq->vui_parameters_present_flag)
-        vaapi_encode_h264_write_vui(pbc, ctx);
-
-    vaapi_encode_h264_write_trailing_rbsp(pbc);
-}
-
-static void vaapi_encode_h264_write_pps(PutBitContext *pbc,
-                                        VAAPIEncodeContext *ctx)
-{
-    VAEncPictureParameterBufferH264   *vpic = ctx->codec_picture_params;
-    VAAPIEncodeH264Context            *priv = ctx->priv_data;
-    VAAPIEncodeH264MiscSequenceParams *mseq = &priv->misc_sequence_params;
-
-    vaapi_encode_h264_write_nal_header(pbc, H264_NAL_PPS, 3);
-
-    ue(vpic_var(pic_parameter_set_id));
-    ue(vpic_var(seq_parameter_set_id));
-
-    u(1, vpic_field(entropy_coding_mode_flag));
-    u(1, mseq_var(bottom_field_pic_order_in_frame_present_flag));
-
-    ue(mseq_var(num_slice_groups_minus1));
-    if (mseq->num_slice_groups_minus1 > 0) {
-        ue(mseq_var(slice_group_map_type));
-        av_assert0(0 && "slice groups not supported");
-    }
-
-    ue(vpic_var(num_ref_idx_l0_active_minus1));
-    ue(vpic_var(num_ref_idx_l1_active_minus1));
-
-    u(1, vpic_field(weighted_pred_flag));
-    u(2, vpic_field(weighted_bipred_idc));
-
-    se(vpic->pic_init_qp - 26, pic_init_qp_minus26);
-    se(mseq_var(pic_init_qs_minus26));
-    se(vpic_var(chroma_qp_index_offset));
-
-    u(1, vpic_field(deblocking_filter_control_present_flag));
-    u(1, vpic_field(constrained_intra_pred_flag));
-    u(1, vpic_field(redundant_pic_cnt_present_flag));
-    u(1, vpic_field(transform_8x8_mode_flag));
-
-    u(1, vpic_field(pic_scaling_matrix_present_flag));
-    if (vpic->pic_fields.bits.pic_scaling_matrix_present_flag) {
-        av_assert0(0 && "scaling matrices not supported");
-    }
-
-    se(vpic_var(second_chroma_qp_index_offset));
-
-    vaapi_encode_h264_write_trailing_rbsp(pbc);
-}
-
-static void vaapi_encode_h264_write_slice_header2(PutBitContext *pbc,
-                                                  VAAPIEncodeContext *ctx,
-                                                  VAAPIEncodePicture *pic,
-                                                  VAAPIEncodeSlice *slice)
-{
-    VAEncSequenceParameterBufferH264  *vseq = ctx->codec_sequence_params;
-    VAEncPictureParameterBufferH264   *vpic = pic->codec_picture_params;
-    VAEncSliceParameterBufferH264   *vslice = slice->codec_slice_params;
-    VAAPIEncodeH264Context            *priv = ctx->priv_data;
-    VAAPIEncodeH264MiscSequenceParams *mseq = &priv->misc_sequence_params;
-    VAAPIEncodeH264Slice            *pslice = slice->priv_data;
-    VAAPIEncodeH264MiscSliceParams  *mslice = &pslice->misc_slice_params;
-
-    vaapi_encode_h264_write_nal_header(pbc, mslice->nal_unit_type,
-                                       mslice->nal_ref_idc);
-
-    ue(vslice->macroblock_address, first_mb_in_slice);
-    ue(vslice_var(slice_type));
-    ue(vpic_var(pic_parameter_set_id));
-
-    if (mseq->separate_colour_plane_flag) {
-        u(2, mslice_var(colour_plane_id));
-    }
-
-    u(4 + vseq->seq_fields.bits.log2_max_frame_num_minus4,
-      (vpic->frame_num &
-       ((1 << (4 + vseq->seq_fields.bits.log2_max_frame_num_minus4)) - 1)),
-      frame_num);
-
-    if (!vseq->seq_fields.bits.frame_mbs_only_flag) {
-        u(1, mslice_var(field_pic_flag));
-        if (mslice->field_pic_flag)
-            u(1, mslice_var(bottom_field_flag));
-    }
-
-    if (vpic->pic_fields.bits.idr_pic_flag) {
-        ue(vslice_var(idr_pic_id));
-    }
-
-    if (vseq->seq_fields.bits.pic_order_cnt_type == 0) {
-        u(4 + vseq->seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4,
-          vslice_var(pic_order_cnt_lsb));
-        if (mseq->bottom_field_pic_order_in_frame_present_flag &&
-            !mslice->field_pic_flag) {
-            se(vslice_var(delta_pic_order_cnt_bottom));
-        }
-    }
-
-    if (vseq->seq_fields.bits.pic_order_cnt_type == 1 &&
-        !vseq->seq_fields.bits.delta_pic_order_always_zero_flag) {
-        se(vslice_var(delta_pic_order_cnt[0]));
-        if (mseq->bottom_field_pic_order_in_frame_present_flag &&
-            !mslice->field_pic_flag) {
-            se(vslice_var(delta_pic_order_cnt[1]));
-        }
-    }
-
-    if (vpic->pic_fields.bits.redundant_pic_cnt_present_flag) {
-        ue(mslice_var(redundant_pic_cnt));
-    }
-
-    if (vslice->slice_type == SLICE_TYPE_B) {
-        u(1, vslice_var(direct_spatial_mv_pred_flag));
-    }
-
-    if (vslice->slice_type == SLICE_TYPE_P ||
-        vslice->slice_type == SLICE_TYPE_SP ||
-        vslice->slice_type == SLICE_TYPE_B) {
-        u(1, vslice_var(num_ref_idx_active_override_flag));
-        if (vslice->num_ref_idx_active_override_flag) {
-            ue(vslice_var(num_ref_idx_l0_active_minus1));
-            if (vslice->slice_type == SLICE_TYPE_B)
-                ue(vslice_var(num_ref_idx_l1_active_minus1));
-        }
-    }
-
-    if (mslice->nal_unit_type == 20 || mslice->nal_unit_type == 21) {
-        av_assert0(0 && "no MVC support");
-    } else {
-        if (vslice->slice_type % 5 != 2 && vslice->slice_type % 5 != 4) {
-            u(1, mslice_var(ref_pic_list_modification_flag_l0));
-            if (mslice->ref_pic_list_modification_flag_l0) {
-                av_assert0(0 && "ref pic list modification");
-            }
-        }
-        if (vslice->slice_type % 5 == 1) {
-            u(1, mslice_var(ref_pic_list_modification_flag_l1));
-            if (mslice->ref_pic_list_modification_flag_l1) {
-                av_assert0(0 && "ref pic list modification");
-            }
-        }
-    }
-
-    if ((vpic->pic_fields.bits.weighted_pred_flag &&
-         (vslice->slice_type == SLICE_TYPE_P ||
-          vslice->slice_type == SLICE_TYPE_SP)) ||
-        (vpic->pic_fields.bits.weighted_bipred_idc == 1 &&
-         vslice->slice_type == SLICE_TYPE_B)) {
-        av_assert0(0 && "prediction weights not supported");
-    }
-
-    av_assert0(mslice->nal_ref_idc > 0 ==
-               vpic->pic_fields.bits.reference_pic_flag);
-    if (mslice->nal_ref_idc != 0) {
-        if (vpic->pic_fields.bits.idr_pic_flag) {
-            u(1, mslice_var(no_output_of_prior_pics_flag));
-            u(1, mslice_var(long_term_reference_flag));
-        } else {
-            u(1, mslice_var(adaptive_ref_pic_marking_mode_flag));
-            if (mslice->adaptive_ref_pic_marking_mode_flag) {
-                av_assert0(0 && "MMCOs not supported");
-            }
-        }
-    }
-
-    if (vpic->pic_fields.bits.entropy_coding_mode_flag &&
-        vslice->slice_type != SLICE_TYPE_I &&
-        vslice->slice_type != SLICE_TYPE_SI) {
-        ue(vslice_var(cabac_init_idc));
-    }
-
-    se(vslice_var(slice_qp_delta));
-    if (vslice->slice_type == SLICE_TYPE_SP ||
-        vslice->slice_type == SLICE_TYPE_SI) {
-        if (vslice->slice_type == SLICE_TYPE_SP)
-            u(1, mslice_var(sp_for_switch_flag));
-        se(mslice_var(slice_qs_delta));
-    }
-
-    if (vpic->pic_fields.bits.deblocking_filter_control_present_flag) {
-        ue(vslice_var(disable_deblocking_filter_idc));
-        if (vslice->disable_deblocking_filter_idc != 1) {
-            se(vslice_var(slice_alpha_c0_offset_div2));
-            se(vslice_var(slice_beta_offset_div2));
-        }
-    }
-
-    if (mseq->num_slice_groups_minus1 > 0 &&
-        mseq->slice_group_map_type >= 3 && mseq->slice_group_map_type <= 5) {
-        av_assert0(0 && "slice groups not supported");
-    }
-
-    // No alignment - this need not be a byte boundary.
-}
-
-static void vaapi_encode_h264_write_buffering_period(PutBitContext *pbc,
-                                                     VAAPIEncodeContext *ctx,
-                                                     VAAPIEncodePicture *pic)
-{
-    VAAPIEncodeH264Context            *priv = ctx->priv_data;
-    VAAPIEncodeH264MiscSequenceParams *mseq = &priv->misc_sequence_params;
-    VAEncPictureParameterBufferH264   *vpic = pic->codec_picture_params;
-    int i;
-
-    ue(vpic_var(seq_parameter_set_id));
-
-    if (mseq->nal_hrd_parameters_present_flag) {
-        for (i = 0; i <= mseq->cpb_cnt_minus1; i++) {
-            u(mseq->initial_cpb_removal_delay_length_minus1 + 1,
-              mseq_var(initial_cpb_removal_delay));
-            u(mseq->initial_cpb_removal_delay_length_minus1 + 1,
-              mseq_var(initial_cpb_removal_delay_offset));
-        }
-    }
-    if (mseq->vcl_hrd_parameters_present_flag) {
-        av_assert0(0 && "vcl hrd parameters not supported");
-    }
-}
-
-static void vaapi_encode_h264_write_pic_timing(PutBitContext *pbc,
-                                               VAAPIEncodeContext *ctx,
-                                               VAAPIEncodePicture *pic)
-{
-    VAEncSequenceParameterBufferH264  *vseq = ctx->codec_sequence_params;
-    VAAPIEncodeH264Context            *priv = ctx->priv_data;
-    VAAPIEncodeH264MiscSequenceParams *mseq = &priv->misc_sequence_params;
-    int i, num_clock_ts;
-
-    if (mseq->nal_hrd_parameters_present_flag ||
-        mseq->vcl_hrd_parameters_present_flag) {
-        u(mseq->cpb_removal_delay_length_minus1 + 1,
-          2 * vseq->num_units_in_tick * priv->cpb_delay,
-          cpb_removal_delay);
-        u(mseq->dpb_output_delay_length_minus1 + 1,
-          2 * vseq->num_units_in_tick * priv->dpb_delay,
-          dpb_output_delay);
-    }
-    if (mseq->pic_struct_present_flag) {
-        u(4, mseq_var(pic_struct));
-        num_clock_ts = (mseq->pic_struct <= 2 ? 1 :
-                        mseq->pic_struct <= 4 ? 2 :
-                        mseq->pic_struct <= 8 ? 3 : 0);
-        for (i = 0; i < num_clock_ts; i++) {
-            u(1, 0, clock_timestamp_flag[i]);
-            // No full timestamp information.
-        }
-    }
-}
-
-static void vaapi_encode_h264_write_identifier(PutBitContext *pbc,
-                                               VAAPIEncodeContext *ctx,
-                                               VAAPIEncodePicture *pic)
-{
-    const char *lavc   = LIBAVCODEC_IDENT;
-    const char *vaapi  = VA_VERSION_S;
-    const char *driver = vaQueryVendorString(ctx->hwctx->display);
-    char tmp[256];
-    int i;
-
-    // Random (version 4) ISO 11578 UUID.
-    uint8_t uuid[16] = {
-        0x59, 0x94, 0x8b, 0x28, 0x11, 0xec, 0x45, 0xaf,
-        0x96, 0x75, 0x19, 0xd4, 0x1f, 0xea, 0xa9, 0x4d,
-    };
-
-    for (i = 0; i < 16; i++)
-        u(8, uuid[i], uuid_iso_iec_11578);
-
-    snprintf(tmp, sizeof(tmp), "%s / VAAPI %s / %s", lavc, vaapi, driver);
-    for (i = 0; i < sizeof(tmp) && tmp[i]; i++)
-        u(8, tmp[i], user_data_payload_byte);
-}
-
-static void vaapi_encode_h264_write_sei(PutBitContext *pbc,
-                                        VAAPIEncodeContext *ctx,
-                                        VAAPIEncodePicture *pic)
-{
-    VAAPIEncodeH264Context *priv = ctx->priv_data;
-    PutBitContext payload_bits;
-    char payload[256];
-    int payload_type, payload_size, i;
-    void (*write_payload)(PutBitContext *pbc,
-                          VAAPIEncodeContext *ctx,
-                          VAAPIEncodePicture *pic) = NULL;
-
-    vaapi_encode_h264_write_nal_header(pbc, H264_NAL_SEI, 0);
-
-    for (payload_type = 0; payload_type < 64; payload_type++) {
-        switch (payload_type) {
-        case H264_SEI_TYPE_BUFFERING_PERIOD:
-            if (!priv->send_timing_sei ||
-                pic->type != PICTURE_TYPE_IDR)
-                continue;
-            write_payload = &vaapi_encode_h264_write_buffering_period;
-            break;
-        case H264_SEI_TYPE_PIC_TIMING:
-            if (!priv->send_timing_sei)
-                continue;
-            write_payload = &vaapi_encode_h264_write_pic_timing;
-            break;
-        case H264_SEI_TYPE_USER_DATA_UNREGISTERED:
-            if (pic->encode_order != 0)
-                continue;
-            write_payload = &vaapi_encode_h264_write_identifier;
-            break;
-        default:
-            continue;
-        }
-
-        init_put_bits(&payload_bits, payload, sizeof(payload));
-        write_payload(&payload_bits, ctx, pic);
-        if (put_bits_count(&payload_bits) & 7) {
-            write_u(&payload_bits, 1, 1, bit_equal_to_one);
-            while (put_bits_count(&payload_bits) & 7)
-                write_u(&payload_bits, 1, 0, bit_equal_to_zero);
-        }
-        payload_size = put_bits_count(&payload_bits) / 8;
-        flush_put_bits(&payload_bits);
-
-        u(8, payload_type, last_payload_type_byte);
-        u(8, payload_size, last_payload_size_byte);
-        for (i = 0; i < payload_size; i++)
-            u(8, payload[i] & 0xff, sei_payload);
-    }
-
-    vaapi_encode_h264_write_trailing_rbsp(pbc);
+    return 0;
 }
 
 static int vaapi_encode_h264_write_sequence_header(AVCodecContext *avctx,
                                                    char *data, size_t *data_len)
 {
-    VAAPIEncodeContext *ctx = avctx->priv_data;
-    PutBitContext pbc;
-    char tmp[256];
+    VAAPIEncodeH264Context *priv = avctx->priv_data;
+    CodedBitstreamFragment   *au = &priv->current_access_unit;
     int err;
-    size_t nal_len, bit_len, bit_pos, next_len;
 
-    bit_len = *data_len;
-    bit_pos = 0;
+    if (priv->aud_needed) {
+        err = vaapi_encode_h264_add_nal(avctx, au, &priv->raw_aud);
+        if (err < 0)
+            goto fail;
+        priv->aud_needed = 0;
+    }
 
-    init_put_bits(&pbc, tmp, sizeof(tmp));
-    vaapi_encode_h264_write_sps(&pbc, ctx);
-    nal_len = put_bits_count(&pbc);
-    flush_put_bits(&pbc);
-
-    next_len = bit_len - bit_pos;
-    err = ff_vaapi_encode_h26x_nal_unit_to_byte_stream(data + bit_pos / 8,
-                                                       &next_len,
-                                                       tmp, nal_len);
+    err = vaapi_encode_h264_add_nal(avctx, au, &priv->raw_sps);
     if (err < 0)
-        return err;
-    bit_pos += next_len;
+        goto fail;
 
-    init_put_bits(&pbc, tmp, sizeof(tmp));
-    vaapi_encode_h264_write_pps(&pbc, ctx);
-    nal_len = put_bits_count(&pbc);
-    flush_put_bits(&pbc);
-
-    next_len = bit_len - bit_pos;
-    err = ff_vaapi_encode_h26x_nal_unit_to_byte_stream(data + bit_pos / 8,
-                                                       &next_len,
-                                                       tmp, nal_len);
+    err = vaapi_encode_h264_add_nal(avctx, au, &priv->raw_pps);
     if (err < 0)
-        return err;
-    bit_pos += next_len;
+        goto fail;
 
-    *data_len = bit_pos;
-    return 0;
+    err = vaapi_encode_h264_write_access_unit(avctx, data, data_len, au);
+fail:
+    ff_cbs_fragment_uninit(priv->cbc, au);
+    return err;
 }
 
 static int vaapi_encode_h264_write_slice_header(AVCodecContext *avctx,
@@ -730,18 +180,25 @@
                                                 VAAPIEncodeSlice *slice,
                                                 char *data, size_t *data_len)
 {
-    VAAPIEncodeContext *ctx = avctx->priv_data;
-    PutBitContext pbc;
-    char tmp[256];
-    size_t header_len;
+    VAAPIEncodeH264Context *priv = avctx->priv_data;
+    CodedBitstreamFragment   *au = &priv->current_access_unit;
+    int err;
 
-    init_put_bits(&pbc, tmp, sizeof(tmp));
-    vaapi_encode_h264_write_slice_header2(&pbc, ctx, pic, slice);
-    header_len = put_bits_count(&pbc);
-    flush_put_bits(&pbc);
+    if (priv->aud_needed) {
+        err = vaapi_encode_h264_add_nal(avctx, au, &priv->raw_aud);
+        if (err < 0)
+            goto fail;
+        priv->aud_needed = 0;
+    }
 
-    return ff_vaapi_encode_h26x_nal_unit_to_byte_stream(data, data_len,
-                                                        tmp, header_len);
+    err = vaapi_encode_h264_add_nal(avctx, au, &priv->raw_slice);
+    if (err < 0)
+        goto fail;
+
+    err = vaapi_encode_h264_write_access_unit(avctx, data, data_len, au);
+fail:
+    ff_cbs_fragment_uninit(priv->cbc, au);
+    return err;
 }
 
 static int vaapi_encode_h264_write_extra_header(AVCodecContext *avctx,
@@ -749,253 +206,531 @@
                                                 int index, int *type,
                                                 char *data, size_t *data_len)
 {
-    VAAPIEncodeContext *ctx = avctx->priv_data;
-    PutBitContext pbc;
-    char tmp[256];
-    size_t header_len;
+    VAAPIEncodeH264Context *priv = avctx->priv_data;
+    CodedBitstreamFragment   *au = &priv->current_access_unit;
+    int err, i;
 
-    if (index == 0 && ctx->va_rc_mode == VA_RC_CBR) {
+    if (priv->sei_needed) {
+        H264RawSEI *sei = &priv->raw_sei;
+
+        if (priv->aud_needed) {
+            err = vaapi_encode_h264_add_nal(avctx, au, &priv->raw_aud);
+            if (err < 0)
+                goto fail;
+            priv->aud_needed = 0;
+        }
+
+        *sei = (H264RawSEI) {
+            .nal_unit_header = {
+                .nal_unit_type = H264_NAL_SEI,
+            },
+        };
+
+        i = 0;
+
+        if (priv->sei_needed & SEI_IDENTIFIER) {
+            sei->payload[i].payload_type = H264_SEI_TYPE_USER_DATA_UNREGISTERED;
+            sei->payload[i].payload.user_data_unregistered = priv->sei_identifier;
+            ++i;
+        }
+        if (priv->sei_needed & SEI_TIMING) {
+            if (pic->type == PICTURE_TYPE_IDR) {
+                sei->payload[i].payload_type = H264_SEI_TYPE_BUFFERING_PERIOD;
+                sei->payload[i].payload.buffering_period = priv->sei_buffering_period;
+                ++i;
+            }
+            sei->payload[i].payload_type = H264_SEI_TYPE_PIC_TIMING;
+            sei->payload[i].payload.pic_timing = priv->sei_pic_timing;
+            ++i;
+        }
+        if (priv->sei_needed & SEI_RECOVERY_POINT) {
+            sei->payload[i].payload_type = H264_SEI_TYPE_RECOVERY_POINT;
+            sei->payload[i].payload.recovery_point = priv->sei_recovery_point;
+            ++i;
+        }
+
+        sei->payload_count = i;
+        av_assert0(sei->payload_count > 0);
+
+        err = vaapi_encode_h264_add_nal(avctx, au, sei);
+        if (err < 0)
+            goto fail;
+        priv->sei_needed = 0;
+
+        err = vaapi_encode_h264_write_access_unit(avctx, data, data_len, au);
+        if (err < 0)
+            goto fail;
+
+        ff_cbs_fragment_uninit(priv->cbc, au);
+
+        *type = VAEncPackedHeaderRawData;
+        return 0;
+
+#if !CONFIG_VAAPI_1
+    } else if (priv->sei_cbr_workaround_needed) {
+        // Insert a zero-length header using the old SEI type.  This is
+        // required to avoid triggering broken behaviour on Intel platforms
+        // in CBR mode where an invalid SEI message is generated by the
+        // driver and inserted into the stream.
+        *data_len = 0;
         *type = VAEncPackedHeaderH264_SEI;
-
-        init_put_bits(&pbc, tmp, sizeof(tmp));
-        vaapi_encode_h264_write_sei(&pbc, ctx, pic);
-        header_len = put_bits_count(&pbc);
-        flush_put_bits(&pbc);
-
-        return ff_vaapi_encode_h26x_nal_unit_to_byte_stream(data, data_len,
-                                                            tmp, header_len);
+        priv->sei_cbr_workaround_needed = 0;
+        return 0;
+#endif
 
     } else {
         return AVERROR_EOF;
     }
+
+fail:
+    ff_cbs_fragment_uninit(priv->cbc, au);
+    return err;
 }
 
 static int vaapi_encode_h264_init_sequence_params(AVCodecContext *avctx)
 {
-    VAAPIEncodeContext                 *ctx = avctx->priv_data;
-    VAEncSequenceParameterBufferH264  *vseq = ctx->codec_sequence_params;
-    VAEncPictureParameterBufferH264   *vpic = ctx->codec_picture_params;
-    VAAPIEncodeH264Context            *priv = ctx->priv_data;
-    VAAPIEncodeH264MiscSequenceParams *mseq = &priv->misc_sequence_params;
-    VAAPIEncodeH264Options             *opt =
-        (VAAPIEncodeH264Options*)ctx->codec_options_data;
-    int i;
+    VAAPIEncodeContext                *ctx = avctx->priv_data;
+    VAAPIEncodeH264Context           *priv = avctx->priv_data;
+    H264RawSPS                        *sps = &priv->raw_sps;
+    H264RawPPS                        *pps = &priv->raw_pps;
+    VAEncSequenceParameterBufferH264 *vseq = ctx->codec_sequence_params;
+    VAEncPictureParameterBufferH264  *vpic = ctx->codec_picture_params;
+    int dpb_frames;
 
-    {
-        vseq->seq_parameter_set_id = 0;
+    memset(&priv->current_access_unit, 0,
+           sizeof(priv->current_access_unit));
 
-        vseq->level_idc = avctx->level;
+    memset(sps, 0, sizeof(*sps));
+    memset(pps, 0, sizeof(*pps));
 
-        vseq->max_num_ref_frames = 1 + (avctx->max_b_frames > 0);
+    sps->nal_unit_header.nal_ref_idc   = 3;
+    sps->nal_unit_header.nal_unit_type = H264_NAL_SPS;
 
-        vseq->picture_width_in_mbs  = priv->mb_width;
-        vseq->picture_height_in_mbs = priv->mb_height;
+    sps->profile_idc = avctx->profile & 0xff;
 
-        vseq->seq_fields.bits.chroma_format_idc = 1;
-        vseq->seq_fields.bits.frame_mbs_only_flag = 1;
-        vseq->seq_fields.bits.direct_8x8_inference_flag = 1;
-        vseq->seq_fields.bits.log2_max_frame_num_minus4 = 4;
-        vseq->seq_fields.bits.pic_order_cnt_type = 0;
-        vseq->seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4 =
-            av_clip(av_log2(avctx->max_b_frames + 1) - 2, 0, 12);
+    if (avctx->profile == FF_PROFILE_H264_CONSTRAINED_BASELINE ||
+        avctx->profile == FF_PROFILE_H264_MAIN)
+        sps->constraint_set1_flag = 1;
 
-        if (avctx->width  != ctx->surface_width ||
-            avctx->height != ctx->surface_height) {
-            vseq->frame_cropping_flag = 1;
+    if (avctx->profile == FF_PROFILE_H264_HIGH)
+        sps->constraint_set3_flag = ctx->gop_size == 1;
 
-            vseq->frame_crop_left_offset   = 0;
-            vseq->frame_crop_right_offset  =
-                (ctx->surface_width - avctx->width) / 2;
-            vseq->frame_crop_top_offset    = 0;
-            vseq->frame_crop_bottom_offset =
-                (ctx->surface_height - avctx->height) / 2;
+    if (avctx->profile == FF_PROFILE_H264_MAIN ||
+        avctx->profile == FF_PROFILE_H264_HIGH) {
+        sps->constraint_set4_flag = 1;
+        sps->constraint_set5_flag = ctx->b_per_p == 0;
+    }
+
+    if (ctx->gop_size == 1)
+        dpb_frames = 0;
+    else
+        dpb_frames = 1 + (ctx->b_per_p > 0);
+
+    if (avctx->level != FF_LEVEL_UNKNOWN) {
+        sps->level_idc = avctx->level;
+    } else {
+        const H264LevelDescriptor *level;
+
+        level = ff_h264_guess_level(sps->profile_idc,
+                                    avctx->bit_rate,
+                                    priv->mb_width  * 16,
+                                    priv->mb_height * 16,
+                                    dpb_frames);
+        if (level) {
+            av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name);
+            if (level->constraint_set3_flag)
+                sps->constraint_set3_flag = 1;
+            sps->level_idc = level->level_idc;
         } else {
-            vseq->frame_cropping_flag = 0;
+            av_log(avctx, AV_LOG_WARNING, "Stream will not conform "
+                   "to any level: using level 6.2.\n");
+            sps->level_idc = 62;
         }
+    }
 
-        vseq->vui_parameters_present_flag = 1;
-        if (avctx->sample_aspect_ratio.num != 0) {
-            vseq->vui_fields.bits.aspect_ratio_info_present_flag = 1;
-            // There is a large enum of these which we could support
-            // individually rather than using the generic X/Y form?
-            if (avctx->sample_aspect_ratio.num ==
-                avctx->sample_aspect_ratio.den) {
-                vseq->aspect_ratio_idc = 1;
-            } else {
-                vseq->aspect_ratio_idc = 255; // Extended SAR.
-                vseq->sar_width  = avctx->sample_aspect_ratio.num;
-                vseq->sar_height = avctx->sample_aspect_ratio.den;
+    sps->seq_parameter_set_id = 0;
+    sps->chroma_format_idc    = 1;
+
+    sps->log2_max_frame_num_minus4 = 4;
+    sps->pic_order_cnt_type        = 0;
+    sps->log2_max_pic_order_cnt_lsb_minus4 =
+        av_clip(av_log2(ctx->b_per_p + 1) - 2, 0, 12);
+
+    sps->max_num_ref_frames = dpb_frames;
+
+    sps->pic_width_in_mbs_minus1        = priv->mb_width  - 1;
+    sps->pic_height_in_map_units_minus1 = priv->mb_height - 1;
+
+    sps->frame_mbs_only_flag = 1;
+    sps->direct_8x8_inference_flag = 1;
+
+    if (avctx->width  != 16 * priv->mb_width ||
+        avctx->height != 16 * priv->mb_height) {
+        sps->frame_cropping_flag = 1;
+
+        sps->frame_crop_left_offset   = 0;
+        sps->frame_crop_right_offset  =
+            (16 * priv->mb_width - avctx->width) / 2;
+        sps->frame_crop_top_offset    = 0;
+        sps->frame_crop_bottom_offset =
+            (16 * priv->mb_height - avctx->height) / 2;
+    } else {
+        sps->frame_cropping_flag = 0;
+    }
+
+    sps->vui_parameters_present_flag = 1;
+
+    if (avctx->sample_aspect_ratio.num != 0 &&
+        avctx->sample_aspect_ratio.den != 0) {
+        static const AVRational sar_idc[] = {
+            {   0,  0 },
+            {   1,  1 }, {  12, 11 }, {  10, 11 }, {  16, 11 },
+            {  40, 33 }, {  24, 11 }, {  20, 11 }, {  32, 11 },
+            {  80, 33 }, {  18, 11 }, {  15, 11 }, {  64, 33 },
+            { 160, 99 }, {   4,  3 }, {   3,  2 }, {   2,  1 },
+        };
+        int i;
+        for (i = 0; i < FF_ARRAY_ELEMS(sar_idc); i++) {
+            if (avctx->sample_aspect_ratio.num == sar_idc[i].num &&
+                avctx->sample_aspect_ratio.den == sar_idc[i].den) {
+                sps->vui.aspect_ratio_idc = i;
+                break;
             }
         }
+        if (i >= FF_ARRAY_ELEMS(sar_idc)) {
+            sps->vui.aspect_ratio_idc = 255;
+            sps->vui.sar_width  = avctx->sample_aspect_ratio.num;
+            sps->vui.sar_height = avctx->sample_aspect_ratio.den;
+        }
+        sps->vui.aspect_ratio_info_present_flag = 1;
+    }
+
+    if (avctx->color_range     != AVCOL_RANGE_UNSPECIFIED ||
+        avctx->color_primaries != AVCOL_PRI_UNSPECIFIED ||
+        avctx->color_trc       != AVCOL_TRC_UNSPECIFIED ||
+        avctx->colorspace      != AVCOL_SPC_UNSPECIFIED) {
+        sps->vui.video_signal_type_present_flag = 1;
+        sps->vui.video_format      = 5; // Unspecified.
+        sps->vui.video_full_range_flag =
+            avctx->color_range == AVCOL_RANGE_JPEG;
+
         if (avctx->color_primaries != AVCOL_PRI_UNSPECIFIED ||
             avctx->color_trc       != AVCOL_TRC_UNSPECIFIED ||
             avctx->colorspace      != AVCOL_SPC_UNSPECIFIED) {
-            mseq->video_signal_type_present_flag = 1;
-            mseq->video_format             = 5; // Unspecified.
-            mseq->video_full_range_flag    = 0;
-            mseq->colour_description_present_flag = 1;
-            // These enums are derived from the standard and hence
-            // we can just use the values directly.
-            mseq->colour_primaries         = avctx->color_primaries;
-            mseq->transfer_characteristics = avctx->color_trc;
-            mseq->matrix_coefficients      = avctx->colorspace;
+            sps->vui.colour_description_present_flag = 1;
+            sps->vui.colour_primaries         = avctx->color_primaries;
+            sps->vui.transfer_characteristics = avctx->color_trc;
+            sps->vui.matrix_coefficients      = avctx->colorspace;
         }
-
-        vseq->vui_fields.bits.bitstream_restriction_flag = 1;
-        mseq->motion_vectors_over_pic_boundaries_flag = 1;
-        mseq->max_bytes_per_pic_denom = 0;
-        mseq->max_bits_per_mb_denom   = 0;
-        vseq->vui_fields.bits.log2_max_mv_length_horizontal = 16;
-        vseq->vui_fields.bits.log2_max_mv_length_vertical   = 16;
-
-        mseq->max_num_reorder_frames = (avctx->max_b_frames > 0);
-        mseq->max_dec_pic_buffering  = vseq->max_num_ref_frames;
-
-        vseq->bits_per_second = avctx->bit_rate;
-
-        vseq->vui_fields.bits.timing_info_present_flag = 1;
-        if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
-            vseq->num_units_in_tick = avctx->framerate.den;
-            vseq->time_scale        = 2 * avctx->framerate.num;
-            mseq->fixed_frame_rate_flag = 1;
-        } else {
-            vseq->num_units_in_tick = avctx->time_base.num;
-            vseq->time_scale        = 2 * avctx->time_base.den;
-            mseq->fixed_frame_rate_flag = 0;
-        }
-
-        if (ctx->va_rc_mode == VA_RC_CBR) {
-            priv->send_timing_sei = 1;
-            mseq->nal_hrd_parameters_present_flag = 1;
-
-            mseq->cpb_cnt_minus1 = 0;
-
-            // Try to scale these to a sensible range so that the
-            // golomb encode of the value is not overlong.
-            mseq->bit_rate_scale =
-                av_clip_uintp2(av_log2(avctx->bit_rate) - 15 - 6, 4);
-            mseq->bit_rate_value_minus1[0] =
-                (avctx->bit_rate >> mseq->bit_rate_scale + 6) - 1;
-
-            mseq->cpb_size_scale =
-                av_clip_uintp2(av_log2(ctx->hrd_params.hrd.buffer_size) - 15 - 4, 4);
-            mseq->cpb_size_value_minus1[0] =
-                (ctx->hrd_params.hrd.buffer_size >> mseq->cpb_size_scale + 4) - 1;
-
-            // CBR mode isn't actually available here, despite naming.
-            mseq->cbr_flag[0] = 0;
-
-            mseq->initial_cpb_removal_delay_length_minus1 = 23;
-            mseq->cpb_removal_delay_length_minus1         = 23;
-            mseq->dpb_output_delay_length_minus1          = 7;
-            mseq->time_offset_length = 0;
-
-            // This calculation can easily overflow 32 bits.
-            mseq->initial_cpb_removal_delay = 90000 *
-                (uint64_t)ctx->hrd_params.hrd.initial_buffer_fullness /
-                ctx->hrd_params.hrd.buffer_size;
-
-            mseq->initial_cpb_removal_delay_offset = 0;
-        } else {
-            priv->send_timing_sei = 0;
-            mseq->nal_hrd_parameters_present_flag = 0;
-        }
-
-        vseq->intra_period     = avctx->gop_size;
-        vseq->intra_idr_period = avctx->gop_size;
-        vseq->ip_period        = ctx->b_per_p + 1;
+    } else {
+        sps->vui.video_format             = 5;
+        sps->vui.video_full_range_flag    = 0;
+        sps->vui.colour_primaries         = avctx->color_primaries;
+        sps->vui.transfer_characteristics = avctx->color_trc;
+        sps->vui.matrix_coefficients      = avctx->colorspace;
     }
 
-    {
-        vpic->CurrPic.picture_id = VA_INVALID_ID;
-        vpic->CurrPic.flags      = VA_PICTURE_H264_INVALID;
-
-        for (i = 0; i < FF_ARRAY_ELEMS(vpic->ReferenceFrames); i++) {
-            vpic->ReferenceFrames[i].picture_id = VA_INVALID_ID;
-            vpic->ReferenceFrames[i].flags      = VA_PICTURE_H264_INVALID;
-        }
-
-        vpic->coded_buf = VA_INVALID_ID;
-
-        vpic->pic_parameter_set_id = 0;
-        vpic->seq_parameter_set_id = 0;
-
-        vpic->num_ref_idx_l0_active_minus1 = 0;
-        vpic->num_ref_idx_l1_active_minus1 = 0;
-
-        vpic->pic_fields.bits.entropy_coding_mode_flag =
-            opt->coder ? ((avctx->profile & 0xff) != 66) : 0;
-        vpic->pic_fields.bits.weighted_pred_flag = 0;
-        vpic->pic_fields.bits.weighted_bipred_idc = 0;
-        vpic->pic_fields.bits.transform_8x8_mode_flag =
-            ((avctx->profile & 0xff) >= 100);
-
-        vpic->pic_init_qp = priv->fixed_qp_idr;
+    if (avctx->chroma_sample_location != AVCHROMA_LOC_UNSPECIFIED) {
+        sps->vui.chroma_loc_info_present_flag = 1;
+        sps->vui.chroma_sample_loc_type_top_field    =
+        sps->vui.chroma_sample_loc_type_bottom_field =
+            avctx->chroma_sample_location - 1;
     }
 
-    {
-        mseq->profile_idc = avctx->profile & 0xff;
-
-        if (avctx->profile & FF_PROFILE_H264_CONSTRAINED)
-            mseq->constraint_set1_flag = 1;
-        if (avctx->profile & FF_PROFILE_H264_INTRA)
-            mseq->constraint_set3_flag = 1;
+    sps->vui.timing_info_present_flag = 1;
+    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
+        sps->vui.num_units_in_tick = avctx->framerate.den;
+        sps->vui.time_scale        = 2 * avctx->framerate.num;
+        sps->vui.fixed_frame_rate_flag = 1;
+    } else {
+        sps->vui.num_units_in_tick = avctx->time_base.num;
+        sps->vui.time_scale        = 2 * avctx->time_base.den;
+        sps->vui.fixed_frame_rate_flag = 0;
     }
 
+    if (priv->sei & SEI_TIMING) {
+        H264RawHRD *hrd = &sps->vui.nal_hrd_parameters;
+        H264RawSEIBufferingPeriod *bp = &priv->sei_buffering_period;
+
+        sps->vui.nal_hrd_parameters_present_flag = 1;
+
+        hrd->cpb_cnt_minus1 = 0;
+
+        // Try to scale these to a sensible range so that the
+        // golomb encode of the value is not overlong.
+        hrd->bit_rate_scale =
+            av_clip_uintp2(av_log2(ctx->va_bit_rate) - 15 - 6, 4);
+        hrd->bit_rate_value_minus1[0] =
+            (ctx->va_bit_rate >> hrd->bit_rate_scale + 6) - 1;
+
+        hrd->cpb_size_scale =
+            av_clip_uintp2(av_log2(ctx->hrd_params.hrd.buffer_size) - 15 - 4, 4);
+        hrd->cpb_size_value_minus1[0] =
+            (ctx->hrd_params.hrd.buffer_size >> hrd->cpb_size_scale + 4) - 1;
+
+        // CBR mode as defined for the HRD cannot be achieved without filler
+        // data, so this flag cannot be set even with VAAPI CBR modes.
+        hrd->cbr_flag[0] = 0;
+
+        hrd->initial_cpb_removal_delay_length_minus1 = 23;
+        hrd->cpb_removal_delay_length_minus1         = 23;
+        hrd->dpb_output_delay_length_minus1          = 7;
+        hrd->time_offset_length                      = 0;
+
+        bp->seq_parameter_set_id = sps->seq_parameter_set_id;
+
+        // This calculation can easily overflow 32 bits.
+        bp->nal.initial_cpb_removal_delay[0] = 90000 *
+            (uint64_t)ctx->hrd_params.hrd.initial_buffer_fullness /
+            ctx->hrd_params.hrd.buffer_size;
+        bp->nal.initial_cpb_removal_delay_offset[0] = 0;
+    } else {
+        sps->vui.nal_hrd_parameters_present_flag = 0;
+        sps->vui.low_delay_hrd_flag = 1 - sps->vui.fixed_frame_rate_flag;
+    }
+
+    sps->vui.bitstream_restriction_flag    = 1;
+    sps->vui.motion_vectors_over_pic_boundaries_flag = 1;
+    sps->vui.log2_max_mv_length_horizontal = 15;
+    sps->vui.log2_max_mv_length_vertical   = 15;
+    sps->vui.max_num_reorder_frames        = (ctx->b_per_p > 0);
+    sps->vui.max_dec_frame_buffering       = sps->max_num_ref_frames;
+
+    pps->nal_unit_header.nal_ref_idc = 3;
+    pps->nal_unit_header.nal_unit_type = H264_NAL_PPS;
+
+    pps->pic_parameter_set_id = 0;
+    pps->seq_parameter_set_id = 0;
+
+    pps->entropy_coding_mode_flag =
+        !(sps->profile_idc == FF_PROFILE_H264_BASELINE ||
+          sps->profile_idc == FF_PROFILE_H264_EXTENDED ||
+          sps->profile_idc == FF_PROFILE_H264_CAVLC_444);
+    if (!priv->coder && pps->entropy_coding_mode_flag)
+        pps->entropy_coding_mode_flag = 0;
+
+    pps->num_ref_idx_l0_default_active_minus1 = 0;
+    pps->num_ref_idx_l1_default_active_minus1 = 0;
+
+    pps->pic_init_qp_minus26 = priv->fixed_qp_idr - 26;
+
+    if (sps->profile_idc == FF_PROFILE_H264_BASELINE ||
+        sps->profile_idc == FF_PROFILE_H264_EXTENDED ||
+        sps->profile_idc == FF_PROFILE_H264_MAIN) {
+        pps->more_rbsp_data = 0;
+    } else {
+        pps->more_rbsp_data = 1;
+
+        pps->transform_8x8_mode_flag = 1;
+    }
+
+    *vseq = (VAEncSequenceParameterBufferH264) {
+        .seq_parameter_set_id = sps->seq_parameter_set_id,
+        .level_idc        = sps->level_idc,
+        .intra_period     = ctx->gop_size,
+        .intra_idr_period = ctx->gop_size,
+        .ip_period        = ctx->b_per_p + 1,
+
+        .bits_per_second       = ctx->va_bit_rate,
+        .max_num_ref_frames    = sps->max_num_ref_frames,
+        .picture_width_in_mbs  = sps->pic_width_in_mbs_minus1 + 1,
+        .picture_height_in_mbs = sps->pic_height_in_map_units_minus1 + 1,
+
+        .seq_fields.bits = {
+            .chroma_format_idc                 = sps->chroma_format_idc,
+            .frame_mbs_only_flag               = sps->frame_mbs_only_flag,
+            .mb_adaptive_frame_field_flag      = sps->mb_adaptive_frame_field_flag,
+            .seq_scaling_matrix_present_flag   = sps->seq_scaling_matrix_present_flag,
+            .direct_8x8_inference_flag         = sps->direct_8x8_inference_flag,
+            .log2_max_frame_num_minus4         = sps->log2_max_frame_num_minus4,
+            .pic_order_cnt_type                = sps->pic_order_cnt_type,
+            .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4,
+            .delta_pic_order_always_zero_flag  = sps->delta_pic_order_always_zero_flag,
+        },
+
+        .bit_depth_luma_minus8   = sps->bit_depth_luma_minus8,
+        .bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8,
+
+        .frame_cropping_flag      = sps->frame_cropping_flag,
+        .frame_crop_left_offset   = sps->frame_crop_left_offset,
+        .frame_crop_right_offset  = sps->frame_crop_right_offset,
+        .frame_crop_top_offset    = sps->frame_crop_top_offset,
+        .frame_crop_bottom_offset = sps->frame_crop_bottom_offset,
+
+        .vui_parameters_present_flag = sps->vui_parameters_present_flag,
+
+        .vui_fields.bits = {
+            .aspect_ratio_info_present_flag = sps->vui.aspect_ratio_info_present_flag,
+            .timing_info_present_flag       = sps->vui.timing_info_present_flag,
+            .bitstream_restriction_flag     = sps->vui.bitstream_restriction_flag,
+            .log2_max_mv_length_horizontal  = sps->vui.log2_max_mv_length_horizontal,
+            .log2_max_mv_length_vertical    = sps->vui.log2_max_mv_length_vertical,
+        },
+
+        .aspect_ratio_idc  = sps->vui.aspect_ratio_idc,
+        .sar_width         = sps->vui.sar_width,
+        .sar_height        = sps->vui.sar_height,
+        .num_units_in_tick = sps->vui.num_units_in_tick,
+        .time_scale        = sps->vui.time_scale,
+    };
+
+    *vpic = (VAEncPictureParameterBufferH264) {
+        .CurrPic = {
+            .picture_id = VA_INVALID_ID,
+            .flags      = VA_PICTURE_H264_INVALID,
+        },
+
+        .coded_buf = VA_INVALID_ID,
+
+        .pic_parameter_set_id = pps->pic_parameter_set_id,
+        .seq_parameter_set_id = pps->seq_parameter_set_id,
+
+        .pic_init_qp                  = pps->pic_init_qp_minus26 + 26,
+        .num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1,
+        .num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1,
+
+        .chroma_qp_index_offset        = pps->chroma_qp_index_offset,
+        .second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset,
+
+        .pic_fields.bits = {
+            .entropy_coding_mode_flag        = pps->entropy_coding_mode_flag,
+            .weighted_pred_flag              = pps->weighted_pred_flag,
+            .weighted_bipred_idc             = pps->weighted_bipred_idc,
+            .constrained_intra_pred_flag     = pps->constrained_intra_pred_flag,
+            .transform_8x8_mode_flag         = pps->transform_8x8_mode_flag,
+            .deblocking_filter_control_present_flag =
+                pps->deblocking_filter_control_present_flag,
+            .redundant_pic_cnt_present_flag  = pps->redundant_pic_cnt_present_flag,
+            .pic_order_present_flag          =
+                pps->bottom_field_pic_order_in_frame_present_flag,
+            .pic_scaling_matrix_present_flag = pps->pic_scaling_matrix_present_flag,
+        },
+    };
+
     return 0;
 }
 
 static int vaapi_encode_h264_init_picture_params(AVCodecContext *avctx,
                                                  VAAPIEncodePicture *pic)
 {
-    VAAPIEncodeContext                *ctx = avctx->priv_data;
-    VAEncSequenceParameterBufferH264 *vseq = ctx->codec_sequence_params;
-    VAEncPictureParameterBufferH264  *vpic = pic->codec_picture_params;
-    VAAPIEncodeH264Context           *priv = ctx->priv_data;
+    VAAPIEncodeContext               *ctx = avctx->priv_data;
+    VAAPIEncodeH264Context          *priv = avctx->priv_data;
+    H264RawSPS                       *sps = &priv->raw_sps;
+    VAEncPictureParameterBufferH264 *vpic = pic->codec_picture_params;
     int i;
 
+    memset(&priv->current_access_unit, 0,
+           sizeof(priv->current_access_unit));
+
     if (pic->type == PICTURE_TYPE_IDR) {
         av_assert0(pic->display_order == pic->encode_order);
-        vpic->frame_num = 0;
+        priv->frame_num      = 0;
         priv->next_frame_num = 1;
-        priv->cpb_delay = 0;
+        priv->cpb_delay      = 0;
         priv->last_idr_frame = pic->display_order;
+        ++priv->idr_pic_count;
+
+        priv->slice_type       = 7;
+        priv->primary_pic_type = 0;
     } else {
-        vpic->frame_num = priv->next_frame_num;
+        priv->frame_num      = priv->next_frame_num;
+
         if (pic->type != PICTURE_TYPE_B) {
-            // nal_ref_idc != 0
-            ++priv->next_frame_num;
+            // Reference picture, so frame_num advances.
+            priv->next_frame_num = (priv->frame_num + 1) &
+                ((1 << (4 + sps->log2_max_frame_num_minus4)) - 1);
         }
         ++priv->cpb_delay;
+
+        if (pic->type == PICTURE_TYPE_I) {
+            priv->slice_type       = 7;
+            priv->primary_pic_type = 0;
+        } else if (pic->type == PICTURE_TYPE_P) {
+            priv->slice_type       = 5;
+            priv->primary_pic_type = 1;
+        } else {
+            priv->slice_type       = 6;
+            priv->primary_pic_type = 2;
+        }
     }
-    priv->dpb_delay = pic->display_order - pic->encode_order + 1;
+    priv->pic_order_cnt = pic->display_order - priv->last_idr_frame;
+    priv->dpb_delay     = pic->display_order - pic->encode_order + 1;
 
-    vpic->frame_num = vpic->frame_num &
-        ((1 << (4 + vseq->seq_fields.bits.log2_max_frame_num_minus4)) - 1);
+    if (priv->aud) {
+        priv->aud_needed = 1;
+        priv->raw_aud = (H264RawAUD) {
+            .nal_unit_header = {
+                .nal_unit_type = H264_NAL_AUD,
+            },
+            .primary_pic_type  = priv->primary_pic_type,
+        };
+    } else {
+        priv->aud_needed = 0;
+    }
 
-    vpic->CurrPic.picture_id          = pic->recon_surface;
-    vpic->CurrPic.frame_idx           = vpic->frame_num;
-    vpic->CurrPic.flags               = 0;
-    vpic->CurrPic.TopFieldOrderCnt    = pic->display_order - priv->last_idr_frame;
-    vpic->CurrPic.BottomFieldOrderCnt = pic->display_order - priv->last_idr_frame;
+    priv->sei_needed = 0;
+
+    if (priv->sei & SEI_IDENTIFIER && pic->encode_order == 0)
+        priv->sei_needed |= SEI_IDENTIFIER;
+#if !CONFIG_VAAPI_1
+    if (ctx->va_rc_mode == VA_RC_CBR)
+        priv->sei_cbr_workaround_needed = 1;
+#endif
+
+    if (priv->sei & SEI_TIMING) {
+        priv->sei_pic_timing = (H264RawSEIPicTiming) {
+            .cpb_removal_delay = 2 * priv->cpb_delay,
+            .dpb_output_delay  = 2 * priv->dpb_delay,
+        };
+
+        priv->sei_needed |= SEI_TIMING;
+    }
+
+    if (priv->sei & SEI_RECOVERY_POINT && pic->type == PICTURE_TYPE_I) {
+        priv->sei_recovery_point = (H264RawSEIRecoveryPoint) {
+            .recovery_frame_cnt = 0,
+            .exact_match_flag   = 1,
+            .broken_link_flag   = ctx->b_per_p > 0,
+        };
+
+        priv->sei_needed |= SEI_RECOVERY_POINT;
+    }
+
+    vpic->CurrPic = (VAPictureH264) {
+        .picture_id          = pic->recon_surface,
+        .frame_idx           = priv->frame_num,
+        .flags               = 0,
+        .TopFieldOrderCnt    = priv->pic_order_cnt,
+        .BottomFieldOrderCnt = priv->pic_order_cnt,
+    };
 
     for (i = 0; i < pic->nb_refs; i++) {
         VAAPIEncodePicture *ref = pic->refs[i];
+        unsigned int frame_num = (ref->encode_order - priv->last_idr_frame) &
+            ((1 << (4 + sps->log2_max_frame_num_minus4)) - 1);
+        unsigned int pic_order_cnt = ref->display_order - priv->last_idr_frame;
+
         av_assert0(ref && ref->encode_order < pic->encode_order);
-        vpic->ReferenceFrames[i].picture_id = ref->recon_surface;
-        vpic->ReferenceFrames[i].frame_idx  = ref->encode_order;
-        vpic->ReferenceFrames[i].flags = VA_PICTURE_H264_SHORT_TERM_REFERENCE;
-        vpic->ReferenceFrames[i].TopFieldOrderCnt    = ref->display_order - priv->last_idr_frame;
-        vpic->ReferenceFrames[i].BottomFieldOrderCnt = ref->display_order - priv->last_idr_frame;
+        vpic->ReferenceFrames[i] = (VAPictureH264) {
+            .picture_id          = ref->recon_surface,
+            .frame_idx           = frame_num,
+            .flags               = VA_PICTURE_H264_SHORT_TERM_REFERENCE,
+            .TopFieldOrderCnt    = pic_order_cnt,
+            .BottomFieldOrderCnt = pic_order_cnt,
+        };
     }
     for (; i < FF_ARRAY_ELEMS(vpic->ReferenceFrames); i++) {
-        vpic->ReferenceFrames[i].picture_id = VA_INVALID_ID;
-        vpic->ReferenceFrames[i].flags = VA_PICTURE_H264_INVALID;
+        vpic->ReferenceFrames[i] = (VAPictureH264) {
+            .picture_id = VA_INVALID_ID,
+            .flags      = VA_PICTURE_H264_INVALID,
+        };
     }
 
     vpic->coded_buf = pic->output_buffer;
 
-    vpic->pic_fields.bits.idr_pic_flag = (pic->type == PICTURE_TYPE_IDR);
+    vpic->frame_num = priv->frame_num;
+
+    vpic->pic_fields.bits.idr_pic_flag       = (pic->type == PICTURE_TYPE_IDR);
     vpic->pic_fields.bits.reference_pic_flag = (pic->type != PICTURE_TYPE_B);
 
     pic->nb_slices = 1;
@@ -1007,58 +742,56 @@
                                                VAAPIEncodePicture *pic,
                                                VAAPIEncodeSlice *slice)
 {
-    VAAPIEncodeContext                 *ctx = avctx->priv_data;
-    VAEncSequenceParameterBufferH264  *vseq = ctx->codec_sequence_params;
-    VAEncPictureParameterBufferH264   *vpic = pic->codec_picture_params;
-    VAEncSliceParameterBufferH264   *vslice = slice->codec_slice_params;
-    VAAPIEncodeH264Context            *priv = ctx->priv_data;
-    VAAPIEncodeH264Slice            *pslice;
-    VAAPIEncodeH264MiscSliceParams  *mslice;
+    VAAPIEncodeH264Context          *priv = avctx->priv_data;
+    H264RawSPS                       *sps = &priv->raw_sps;
+    H264RawPPS                       *pps = &priv->raw_pps;
+    H264RawSliceHeader                *sh = &priv->raw_slice.header;
+    VAEncPictureParameterBufferH264 *vpic = pic->codec_picture_params;
+    VAEncSliceParameterBufferH264 *vslice = slice->codec_slice_params;
     int i;
 
-    slice->priv_data = av_mallocz(sizeof(*pslice));
-    if (!slice->priv_data)
-        return AVERROR(ENOMEM);
-    pslice = slice->priv_data;
-    mslice = &pslice->misc_slice_params;
-
-    if (pic->type == PICTURE_TYPE_IDR)
-        mslice->nal_unit_type = H264_NAL_IDR_SLICE;
-    else
-        mslice->nal_unit_type = H264_NAL_SLICE;
-
-    switch (pic->type) {
-    case PICTURE_TYPE_IDR:
-        vslice->slice_type  = SLICE_TYPE_I;
-        mslice->nal_ref_idc = 3;
-        break;
-    case PICTURE_TYPE_I:
-        vslice->slice_type  = SLICE_TYPE_I;
-        mslice->nal_ref_idc = 2;
-        break;
-    case PICTURE_TYPE_P:
-        vslice->slice_type  = SLICE_TYPE_P;
-        mslice->nal_ref_idc = 1;
-        break;
-    case PICTURE_TYPE_B:
-        vslice->slice_type  = SLICE_TYPE_B;
-        mslice->nal_ref_idc = 0;
-        break;
-    default:
-        av_assert0(0 && "invalid picture type");
+    if (pic->type == PICTURE_TYPE_IDR) {
+        sh->nal_unit_header.nal_unit_type = H264_NAL_IDR_SLICE;
+        sh->nal_unit_header.nal_ref_idc   = 3;
+    } else {
+        sh->nal_unit_header.nal_unit_type = H264_NAL_SLICE;
+        sh->nal_unit_header.nal_ref_idc   = pic->type != PICTURE_TYPE_B;
     }
 
     // Only one slice per frame.
-    vslice->macroblock_address = 0;
-    vslice->num_macroblocks = priv->mb_width * priv->mb_height;
+    sh->first_mb_in_slice = 0;
+    sh->slice_type        = priv->slice_type;
+
+    sh->pic_parameter_set_id = pps->pic_parameter_set_id;
+
+    sh->frame_num  = priv->frame_num;
+    sh->idr_pic_id = priv->idr_pic_count;
+
+    sh->pic_order_cnt_lsb = priv->pic_order_cnt &
+        ((1 << (4 + sps->log2_max_pic_order_cnt_lsb_minus4)) - 1);
+
+    sh->direct_spatial_mv_pred_flag = 1;
+
+    if (pic->type == PICTURE_TYPE_B)
+        sh->slice_qp_delta = priv->fixed_qp_b - (pps->pic_init_qp_minus26 + 26);
+    else if (pic->type == PICTURE_TYPE_P)
+        sh->slice_qp_delta = priv->fixed_qp_p - (pps->pic_init_qp_minus26 + 26);
+    else
+        sh->slice_qp_delta = priv->fixed_qp_idr - (pps->pic_init_qp_minus26 + 26);
+
+
+    vslice->macroblock_address = sh->first_mb_in_slice;
+    vslice->num_macroblocks    = priv->mb_width * priv->mb_height;
 
     vslice->macroblock_info = VA_INVALID_ID;
 
-    vslice->pic_parameter_set_id = vpic->pic_parameter_set_id;
-    vslice->idr_pic_id = priv->idr_pic_count++;
+    vslice->slice_type           = sh->slice_type % 5;
+    vslice->pic_parameter_set_id = sh->pic_parameter_set_id;
+    vslice->idr_pic_id           = sh->idr_pic_id;
 
-    vslice->pic_order_cnt_lsb = (pic->display_order - priv->last_idr_frame) &
-        ((1 << (4 + vseq->seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4)) - 1);
+    vslice->pic_order_cnt_lsb = sh->pic_order_cnt_lsb;
+
+    vslice->direct_spatial_mv_pred_flag = sh->direct_spatial_mv_pred_flag;
 
     for (i = 0; i < FF_ARRAY_ELEMS(vslice->RefPicList0); i++) {
         vslice->RefPicList0[i].picture_id = VA_INVALID_ID;
@@ -1072,26 +805,15 @@
         // Backward reference for P- or B-frame.
         av_assert0(pic->type == PICTURE_TYPE_P ||
                    pic->type == PICTURE_TYPE_B);
-
-        vslice->num_ref_idx_l0_active_minus1 = 0;
         vslice->RefPicList0[0] = vpic->ReferenceFrames[0];
     }
     if (pic->nb_refs >= 2) {
         // Forward reference for B-frame.
         av_assert0(pic->type == PICTURE_TYPE_B);
-
-        vslice->num_ref_idx_l1_active_minus1 = 0;
         vslice->RefPicList1[0] = vpic->ReferenceFrames[1];
     }
 
-    if (pic->type == PICTURE_TYPE_B)
-        vslice->slice_qp_delta = priv->fixed_qp_b - vpic->pic_init_qp;
-    else if (pic->type == PICTURE_TYPE_P)
-        vslice->slice_qp_delta = priv->fixed_qp_p - vpic->pic_init_qp;
-    else
-        vslice->slice_qp_delta = priv->fixed_qp_idr - vpic->pic_init_qp;
-
-    vslice->direct_spatial_mv_pred_flag = 1;
+    vslice->slice_qp_delta = sh->slice_qp_delta;
 
     return 0;
 }
@@ -1099,14 +821,18 @@
 static av_cold int vaapi_encode_h264_configure(AVCodecContext *avctx)
 {
     VAAPIEncodeContext      *ctx = avctx->priv_data;
-    VAAPIEncodeH264Context *priv = ctx->priv_data;
-    VAAPIEncodeH264Options  *opt = ctx->codec_options;
+    VAAPIEncodeH264Context *priv = avctx->priv_data;
+    int err;
+
+    err = ff_cbs_init(&priv->cbc, AV_CODEC_ID_H264, avctx);
+    if (err < 0)
+        return err;
 
     priv->mb_width  = FFALIGN(avctx->width,  16) / 16;
     priv->mb_height = FFALIGN(avctx->height, 16) / 16;
 
     if (ctx->va_rc_mode == VA_RC_CQP) {
-        priv->fixed_qp_p = opt->qp;
+        priv->fixed_qp_p = priv->qp;
         if (avctx->i_quant_factor > 0.0)
             priv->fixed_qp_idr = (int)((priv->fixed_qp_p * avctx->i_quant_factor +
                                         avctx->i_quant_offset) + 0.5);
@@ -1118,6 +844,8 @@
         else
             priv->fixed_qp_b = priv->fixed_qp_p;
 
+        priv->sei &= ~SEI_TIMING;
+
         av_log(avctx, AV_LOG_DEBUG, "Using fixed QP = "
                "%d / %d / %d for IDR- / P- / B-frames.\n",
                priv->fixed_qp_idr, priv->fixed_qp_p, priv->fixed_qp_b);
@@ -1129,22 +857,51 @@
         priv->fixed_qp_p   = 26;
         priv->fixed_qp_b   = 26;
 
-        av_log(avctx, AV_LOG_DEBUG, "Using %s-bitrate = %"PRId64" bps.\n",
-               ctx->va_rc_mode == VA_RC_CBR ? "constant" : "variable",
-               avctx->bit_rate);
-
     } else {
         av_assert0(0 && "Invalid RC mode.");
     }
 
-    if (avctx->compression_level == FF_COMPRESSION_DEFAULT)
-        avctx->compression_level = opt->quality;
+    if (priv->sei & SEI_IDENTIFIER) {
+        const char *lavc  = LIBAVCODEC_IDENT;
+        const char *vaapi = VA_VERSION_S;
+        const char *driver;
+        int len;
+
+        memcpy(priv->sei_identifier.uuid_iso_iec_11578,
+               vaapi_encode_h264_sei_identifier_uuid,
+               sizeof(priv->sei_identifier.uuid_iso_iec_11578));
+
+        driver = vaQueryVendorString(ctx->hwctx->display);
+        if (!driver)
+            driver = "unknown driver";
+
+        len = snprintf(NULL, 0, "%s / VAAPI %s / %s", lavc, vaapi, driver);
+        if (len >= 0) {
+            priv->sei_identifier_string = av_malloc(len + 1);
+            if (!priv->sei_identifier_string)
+                return AVERROR(ENOMEM);
+
+            snprintf(priv->sei_identifier_string, len + 1,
+                     "%s / VAAPI %s / %s", lavc, vaapi, driver);
+
+            priv->sei_identifier.data        = priv->sei_identifier_string;
+            priv->sei_identifier.data_length = len + 1;
+        }
+    }
 
     return 0;
 }
 
+static const VAAPIEncodeProfile vaapi_encode_h264_profiles[] = {
+    { FF_PROFILE_H264_HIGH, 8, 3, 1, 1, VAProfileH264High },
+    { FF_PROFILE_H264_MAIN, 8, 3, 1, 1, VAProfileH264Main },
+    { FF_PROFILE_H264_CONSTRAINED_BASELINE,
+                            8, 3, 1, 1, VAProfileH264ConstrainedBaseline },
+    { FF_PROFILE_UNKNOWN }
+};
+
 static const VAAPIEncodeType vaapi_encode_type_h264 = {
-    .priv_data_size        = sizeof(VAAPIEncodeH264Context),
+    .profiles              = vaapi_encode_h264_profiles,
 
     .configure             = &vaapi_encode_h264_configure,
 
@@ -1168,36 +925,29 @@
 
 static av_cold int vaapi_encode_h264_init(AVCodecContext *avctx)
 {
-    VAAPIEncodeContext     *ctx = avctx->priv_data;
-    VAAPIEncodeH264Options *opt =
-        (VAAPIEncodeH264Options*)ctx->codec_options_data;
+    VAAPIEncodeContext      *ctx = avctx->priv_data;
+    VAAPIEncodeH264Context *priv = avctx->priv_data;
 
     ctx->codec = &vaapi_encode_type_h264;
 
+    if (avctx->profile == FF_PROFILE_UNKNOWN)
+        avctx->profile = priv->profile;
+    if (avctx->level == FF_LEVEL_UNKNOWN)
+        avctx->level = priv->level;
+    if (avctx->compression_level == FF_COMPRESSION_DEFAULT)
+        avctx->compression_level = priv->quality;
+
+    // Reject unsupported profiles.
     switch (avctx->profile) {
     case FF_PROFILE_H264_BASELINE:
         av_log(avctx, AV_LOG_WARNING, "H.264 baseline profile is not "
                "supported, using constrained baseline profile instead.\n");
         avctx->profile = FF_PROFILE_H264_CONSTRAINED_BASELINE;
-    case FF_PROFILE_H264_CONSTRAINED_BASELINE:
-        ctx->va_profile = VAProfileH264ConstrainedBaseline;
-        if (avctx->max_b_frames != 0) {
-            avctx->max_b_frames = 0;
-            av_log(avctx, AV_LOG_WARNING, "H.264 constrained baseline profile "
-                   "doesn't support encoding with B frames, disabling them.\n");
-        }
-        break;
-    case FF_PROFILE_H264_MAIN:
-        ctx->va_profile = VAProfileH264Main;
         break;
     case FF_PROFILE_H264_EXTENDED:
         av_log(avctx, AV_LOG_ERROR, "H.264 extended profile "
                "is not supported.\n");
         return AVERROR_PATCHWELCOME;
-    case FF_PROFILE_UNKNOWN:
-    case FF_PROFILE_H264_HIGH:
-        ctx->va_profile = VAProfileH264High;
-        break;
     case FF_PROFILE_H264_HIGH_10:
     case FF_PROFILE_H264_HIGH_10_INTRA:
         av_log(avctx, AV_LOG_ERROR, "H.264 10-bit profiles "
@@ -1212,35 +962,15 @@
         av_log(avctx, AV_LOG_ERROR, "H.264 non-4:2:0 profiles "
                "are not supported.\n");
         return AVERROR_PATCHWELCOME;
-    default:
-        av_log(avctx, AV_LOG_ERROR, "Unknown H.264 profile %d.\n",
-               avctx->profile);
-        return AVERROR(EINVAL);
-    }
-    if (opt->low_power) {
-#if VA_CHECK_VERSION(0, 39, 2)
-        ctx->va_entrypoint = VAEntrypointEncSliceLP;
-#else
-        av_log(avctx, AV_LOG_ERROR, "Low-power encoding is not "
-               "supported with this VAAPI version.\n");
-        return AVERROR(EINVAL);
-#endif
-    } else {
-        ctx->va_entrypoint = VAEntrypointEncSlice;
     }
 
-    // Only 8-bit encode is supported.
-    ctx->va_rt_format = VA_RT_FORMAT_YUV420;
+    if (avctx->level != FF_LEVEL_UNKNOWN && avctx->level & ~0xff) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid level %d: must fit "
+               "in 8-bit unsigned integer.\n", avctx->level);
+        return AVERROR(EINVAL);
+    }
 
-    if (avctx->bit_rate > 0) {
-        if (avctx->rc_max_rate == avctx->bit_rate)
-            ctx->va_rc_mode = VA_RC_CBR;
-        else
-            ctx->va_rc_mode = VA_RC_VBR;
-    } else
-        ctx->va_rc_mode = VA_RC_CQP;
-
-    ctx->va_packed_headers =
+    ctx->desired_packed_headers =
         VA_ENC_PACKED_HEADER_SEQUENCE | // SPS and PPS.
         VA_ENC_PACKED_HEADER_SLICE    | // Slice headers.
         VA_ENC_PACKED_HEADER_MISC;      // SEI.
@@ -1251,29 +981,91 @@
     return ff_vaapi_encode_init(avctx);
 }
 
-#define OFFSET(x) (offsetof(VAAPIEncodeContext, codec_options_data) + \
-                   offsetof(VAAPIEncodeH264Options, x))
+static av_cold int vaapi_encode_h264_close(AVCodecContext *avctx)
+{
+    VAAPIEncodeH264Context *priv = avctx->priv_data;
+
+    ff_cbs_close(&priv->cbc);
+    av_freep(&priv->sei_identifier_string);
+
+    return ff_vaapi_encode_close(avctx);
+}
+
+#define OFFSET(x) offsetof(VAAPIEncodeH264Context, x)
 #define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
 static const AVOption vaapi_encode_h264_options[] = {
+    VAAPI_ENCODE_COMMON_OPTIONS,
+
     { "qp", "Constant QP (for P-frames; scaled by qfactor/qoffset for I/B)",
       OFFSET(qp), AV_OPT_TYPE_INT, { .i64 = 20 }, 0, 52, FLAGS },
     { "quality", "Set encode quality (trades off against speed, higher is faster)",
-      OFFSET(quality), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8, FLAGS },
-    { "low_power", "Use low-power encoding mode (experimental: only supported "
-      "on some platforms, does not support all features)",
-      OFFSET(low_power), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS },
+      OFFSET(quality), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS },
     { "coder", "Entropy coder type",
       OFFSET(coder), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, FLAGS, "coder" },
         { "cavlc", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, FLAGS, "coder" },
         { "cabac", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, FLAGS, "coder" },
         { "vlc",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, FLAGS, "coder" },
         { "ac",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, FLAGS, "coder" },
+
+    { "aud", "Include AUD",
+      OFFSET(aud), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+
+    { "sei", "Set SEI to include",
+      OFFSET(sei), AV_OPT_TYPE_FLAGS,
+      { .i64 = SEI_IDENTIFIER | SEI_TIMING | SEI_RECOVERY_POINT },
+      0, INT_MAX, FLAGS, "sei" },
+    { "identifier", "Include encoder version identifier",
+      0, AV_OPT_TYPE_CONST, { .i64 = SEI_IDENTIFIER },
+      INT_MIN, INT_MAX, FLAGS, "sei" },
+    { "timing", "Include timing parameters (buffering_period and pic_timing)",
+      0, AV_OPT_TYPE_CONST, { .i64 = SEI_TIMING },
+      INT_MIN, INT_MAX, FLAGS, "sei" },
+    { "recovery_point", "Include recovery points where appropriate",
+      0, AV_OPT_TYPE_CONST, { .i64 = SEI_RECOVERY_POINT },
+      INT_MIN, INT_MAX, FLAGS, "sei" },
+
+    { "profile", "Set profile (profile_idc and constraint_set*_flag)",
+      OFFSET(profile), AV_OPT_TYPE_INT,
+      { .i64 = FF_PROFILE_UNKNOWN }, FF_PROFILE_UNKNOWN, 0xffff, FLAGS, "profile" },
+
+#define PROFILE(name, value)  name, NULL, 0, AV_OPT_TYPE_CONST, \
+      { .i64 = value }, 0, 0, FLAGS, "profile"
+    { PROFILE("constrained_baseline", FF_PROFILE_H264_CONSTRAINED_BASELINE) },
+    { PROFILE("main",                 FF_PROFILE_H264_MAIN) },
+    { PROFILE("high",                 FF_PROFILE_H264_HIGH) },
+#undef PROFILE
+
+    { "level", "Set level (level_idc)",
+      OFFSET(level), AV_OPT_TYPE_INT,
+      { .i64 = FF_LEVEL_UNKNOWN }, FF_LEVEL_UNKNOWN, 0xff, FLAGS, "level" },
+
+#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
+      { .i64 = value }, 0, 0, FLAGS, "level"
+    { LEVEL("1",   10) },
+    { LEVEL("1.1", 11) },
+    { LEVEL("1.2", 12) },
+    { LEVEL("1.3", 13) },
+    { LEVEL("2",   20) },
+    { LEVEL("2.1", 21) },
+    { LEVEL("2.2", 22) },
+    { LEVEL("3",   30) },
+    { LEVEL("3.1", 31) },
+    { LEVEL("3.2", 32) },
+    { LEVEL("4",   40) },
+    { LEVEL("4.1", 41) },
+    { LEVEL("4.2", 42) },
+    { LEVEL("5",   50) },
+    { LEVEL("5.1", 51) },
+    { LEVEL("5.2", 52) },
+    { LEVEL("6",   60) },
+    { LEVEL("6.1", 61) },
+    { LEVEL("6.2", 62) },
+#undef LEVEL
+
     { NULL },
 };
 
 static const AVCodecDefault vaapi_encode_h264_defaults[] = {
-    { "profile",        "100" },
-    { "level",          "51"  },
     { "b",              "0"   },
     { "bf",             "2"   },
     { "g",              "120" },
@@ -1281,7 +1073,8 @@
     { "i_qoffset",      "0"   },
     { "b_qfactor",      "6/5" },
     { "b_qoffset",      "0"   },
-    { "qmin",           "0"   },
+    { "qmin",           "-1"  },
+    { "qmax",           "-1"  },
     { NULL },
 };
 
@@ -1297,16 +1090,16 @@
     .long_name      = NULL_IF_CONFIG_SMALL("H.264/AVC (VAAPI)"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H264,
-    .priv_data_size = (sizeof(VAAPIEncodeContext) +
-                       sizeof(VAAPIEncodeH264Options)),
+    .priv_data_size = sizeof(VAAPIEncodeH264Context),
     .init           = &vaapi_encode_h264_init,
     .encode2        = &ff_vaapi_encode2,
-    .close          = &ff_vaapi_encode_close,
+    .close          = &vaapi_encode_h264_close,
     .priv_class     = &vaapi_encode_h264_class,
-    .capabilities   = AV_CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
     .defaults       = vaapi_encode_h264_defaults,
     .pix_fmts = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_VAAPI,
         AV_PIX_FMT_NONE,
     },
+    .wrapper_name   = "vaapi",
 };

diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c
index 971458d..10312fb 100644
--- a/libavcodec/vaapi_encode_h265.c
+++ b/libavcodec/vaapi_encode_h265.c

@@ -16,163 +16,44 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <string.h>
+
 #include <va/va.h>
 #include <va/va_enc_hevc.h>
 
 #include "libavutil/avassert.h"
-#include "libavutil/internal.h"
+#include "libavutil/common.h"
+#include "libavutil/pixdesc.h"
 #include "libavutil/opt.h"
-#include "libavutil/pixfmt.h"
+#include "libavutil/mastering_display_metadata.h"
 
 #include "avcodec.h"
+#include "cbs.h"
+#include "cbs_h265.h"
+#include "h265_profile_level.h"
 #include "hevc.h"
+#include "hevc_sei.h"
 #include "internal.h"
 #include "put_bits.h"
 #include "vaapi_encode.h"
-#include "vaapi_encode_h26x.h"
 
-
-#define MAX_ST_REF_PIC_SETS  32
-#define MAX_DPB_PICS         16
-#define MAX_LAYERS            1
-
-
-typedef struct VAAPIEncodeH265STRPS {
-    char inter_ref_pic_set_prediction_flag;
-
-    unsigned int num_negative_pics;
-    unsigned int num_positive_pics;
-
-    unsigned int delta_poc_s0_minus1[MAX_DPB_PICS];
-    char used_by_curr_pic_s0_flag[MAX_DPB_PICS];
-
-    unsigned int delta_poc_s1_minus1[MAX_DPB_PICS];
-    char used_by_curr_pic_s1_flag[MAX_DPB_PICS];
-} VAAPIEncodeH265STRPS;
-
-// This structure contains all possibly-useful per-sequence syntax elements
-// which are not already contained in the various VAAPI structures.
-typedef struct VAAPIEncodeH265MiscSequenceParams {
-
-    // Parameter set IDs.
-    unsigned int video_parameter_set_id;
-    unsigned int seq_parameter_set_id;
-
-    // Layering.
-    unsigned int vps_max_layers_minus1;
-    unsigned int vps_max_sub_layers_minus1;
-    char vps_temporal_id_nesting_flag;
-    unsigned int vps_max_layer_id;
-    unsigned int vps_num_layer_sets_minus1;
-    unsigned int sps_max_sub_layers_minus1;
-    char sps_temporal_id_nesting_flag;
-    char layer_id_included_flag[MAX_LAYERS][64];
-
-    // Profile/tier/level parameters.
-    char general_profile_compatibility_flag[32];
-    char general_progressive_source_flag;
-    char general_interlaced_source_flag;
-    char general_non_packed_constraint_flag;
-    char general_frame_only_constraint_flag;
-    char general_inbld_flag;
-
-    // Decode/display ordering parameters.
-    unsigned int log2_max_pic_order_cnt_lsb_minus4;
-    char vps_sub_layer_ordering_info_present_flag;
-    unsigned int vps_max_dec_pic_buffering_minus1[MAX_LAYERS];
-    unsigned int vps_max_num_reorder_pics[MAX_LAYERS];
-    unsigned int vps_max_latency_increase_plus1[MAX_LAYERS];
-    char sps_sub_layer_ordering_info_present_flag;
-    unsigned int sps_max_dec_pic_buffering_minus1[MAX_LAYERS];
-    unsigned int sps_max_num_reorder_pics[MAX_LAYERS];
-    unsigned int sps_max_latency_increase_plus1[MAX_LAYERS];
-
-    // Timing information.
-    char vps_timing_info_present_flag;
-    unsigned int vps_num_units_in_tick;
-    unsigned int vps_time_scale;
-    char vps_poc_proportional_to_timing_flag;
-    unsigned int vps_num_ticks_poc_diff_minus1;
-
-    // Cropping information.
-    char conformance_window_flag;
-    unsigned int conf_win_left_offset;
-    unsigned int conf_win_right_offset;
-    unsigned int conf_win_top_offset;
-    unsigned int conf_win_bottom_offset;
-
-    // Short-term reference picture sets.
-    unsigned int num_short_term_ref_pic_sets;
-    VAAPIEncodeH265STRPS st_ref_pic_set[MAX_ST_REF_PIC_SETS];
-
-    // Long-term reference pictures.
-    char long_term_ref_pics_present_flag;
-    unsigned int num_long_term_ref_pics_sps;
-    struct {
-        unsigned int lt_ref_pic_poc_lsb_sps;
-        char used_by_curr_pic_lt_sps_flag;
-    } lt_ref_pic;
-
-    // Deblocking filter control.
-    char deblocking_filter_control_present_flag;
-    char deblocking_filter_override_enabled_flag;
-    char pps_deblocking_filter_disabled_flag;
-    int pps_beta_offset_div2;
-    int pps_tc_offset_div2;
-
-    // Video Usability Information.
-    char vui_parameters_present_flag;
-    char aspect_ratio_info_present_flag;
-    unsigned int aspect_ratio_idc;
-    unsigned int sar_width;
-    unsigned int sar_height;
-    char video_signal_type_present_flag;
-    unsigned int video_format;
-    char video_full_range_flag;
-    char colour_description_present_flag;
-    unsigned int colour_primaries;
-    unsigned int transfer_characteristics;
-    unsigned int matrix_coeffs;
-
-    // Oddments.
-    char uniform_spacing_flag;
-    char output_flag_present_flag;
-    char cabac_init_present_flag;
-    unsigned int num_extra_slice_header_bits;
-    char lists_modification_present_flag;
-    char pps_slice_chroma_qp_offsets_present_flag;
-    char pps_slice_chroma_offset_list_enabled_flag;
-} VAAPIEncodeH265MiscSequenceParams;
-
-// This structure contains all possibly-useful per-slice syntax elements
-// which are not already contained in the various VAAPI structures.
-typedef struct VAAPIEncodeH265MiscSliceParams {
-    // Slice segments.
-    char first_slice_segment_in_pic_flag;
-
-    // Short-term reference picture sets.
-    char short_term_ref_pic_set_sps_flag;
-    unsigned int short_term_ref_pic_idx;
-    VAAPIEncodeH265STRPS st_ref_pic_set;
-
-    // Deblocking filter.
-    char deblocking_filter_override_flag;
-
-    // Oddments.
-    char slice_reserved_flag[8];
-    char no_output_of_prior_pics_flag;
-    char pic_output_flag;
-} VAAPIEncodeH265MiscSliceParams;
-
-typedef struct VAAPIEncodeH265Slice {
-    VAAPIEncodeH265MiscSliceParams misc_slice_params;
-
-    int64_t pic_order_cnt;
-} VAAPIEncodeH265Slice;
+enum {
+    SEI_MASTERING_DISPLAY       = 0x08,
+    SEI_CONTENT_LIGHT_LEVEL     = 0x10,
+};
 
 typedef struct VAAPIEncodeH265Context {
-    VAAPIEncodeH265MiscSequenceParams misc_sequence_params;
+    VAAPIEncodeContext common;
 
+    // User options.
+    int qp;
+    int aud;
+    int profile;
+    int tier;
+    int level;
+    int sei;
+
+    // Derived settings.
     unsigned int ctu_width;
     unsigned int ctu_height;
 
@@ -180,582 +61,107 @@
     int fixed_qp_p;
     int fixed_qp_b;
 
+    // Stream state.
     int64_t last_idr_frame;
+    int pic_order_cnt;
 
-    // Rate control configuration.
-    struct {
-        VAEncMiscParameterBuffer misc;
-        VAEncMiscParameterRateControl rc;
-    } rc_params;
-    struct {
-        VAEncMiscParameterBuffer misc;
-        VAEncMiscParameterHRD hrd;
-    } hrd_params;
+    int slice_nal_unit;
+    int slice_type;
+    int pic_type;
+
+    // Writer structures.
+    H265RawAUD   raw_aud;
+    H265RawVPS   raw_vps;
+    H265RawSPS   raw_sps;
+    H265RawPPS   raw_pps;
+    H265RawSEI   raw_sei;
+    H265RawSlice raw_slice;
+
+    H265RawSEIMasteringDisplayColourVolume sei_mastering_display;
+    H265RawSEIContentLightLevelInfo        sei_content_light_level;
+
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment current_access_unit;
+    int aud_needed;
+    int sei_needed;
 } VAAPIEncodeH265Context;
 
-typedef struct VAAPIEncodeH265Options {
-    int qp;
-} VAAPIEncodeH265Options;
 
-
-#define vseq_var(name)     vseq->name, name
-#define vseq_field(name)   vseq->seq_fields.bits.name, name
-#define vpic_var(name)     vpic->name, name
-#define vpic_field(name)   vpic->pic_fields.bits.name, name
-#define vslice_var(name)   vslice->name, name
-#define vslice_field(name) vslice->slice_fields.bits.name, name
-#define mseq_var(name)     mseq->name, name
-#define mslice_var(name)   mslice->name, name
-#define mstrps_var(name)   mstrps->name, name
-
-static void vaapi_encode_h265_write_nal_unit_header(PutBitContext *pbc,
-                                                    int nal_unit_type)
+static int vaapi_encode_h265_write_access_unit(AVCodecContext *avctx,
+                                               char *data, size_t *data_len,
+                                               CodedBitstreamFragment *au)
 {
-    u(1, 0, forbidden_zero_bit);
-    u(6, nal_unit_type, nal_unit_type);
-    u(6, 0, nuh_layer_id);
-    u(3, 1, nuh_temporal_id_plus1);
+    VAAPIEncodeH265Context *priv = avctx->priv_data;
+    int err;
+
+    err = ff_cbs_write_fragment_data(priv->cbc, au);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n");
+        return err;
+    }
+
+    if (*data_len < 8 * au->data_size - au->data_bit_padding) {
+        av_log(avctx, AV_LOG_ERROR, "Access unit too large: "
+               "%zu < %zu.\n", *data_len,
+               8 * au->data_size - au->data_bit_padding);
+        return AVERROR(ENOSPC);
+    }
+
+    memcpy(data, au->data, au->data_size);
+    *data_len = 8 * au->data_size - au->data_bit_padding;
+
+    return 0;
 }
 
-static void vaapi_encode_h265_write_rbsp_trailing_bits(PutBitContext *pbc)
+static int vaapi_encode_h265_add_nal(AVCodecContext *avctx,
+                                     CodedBitstreamFragment *au,
+                                     void *nal_unit)
 {
-    u(1, 1, rbsp_stop_one_bit);
-    while (put_bits_count(pbc) & 7)
-        u(1, 0, rbsp_alignment_zero_bit);
-}
+    VAAPIEncodeH265Context *priv = avctx->priv_data;
+    H265RawNALUnitHeader *header = nal_unit;
+    int err;
 
-static void vaapi_encode_h265_write_profile_tier_level(PutBitContext *pbc,
-                                                       VAAPIEncodeContext *ctx)
-{
-    VAEncSequenceParameterBufferHEVC  *vseq = ctx->codec_sequence_params;
-    VAAPIEncodeH265Context            *priv = ctx->priv_data;
-    VAAPIEncodeH265MiscSequenceParams *mseq = &priv->misc_sequence_params;
-    int j;
-
-    if (1) {
-        u(2, 0, general_profile_space);
-        u(1, vseq_var(general_tier_flag));
-        u(5, vseq_var(general_profile_idc));
-
-        for (j = 0; j < 32; j++) {
-            u(1, mseq_var(general_profile_compatibility_flag[j]));
-        }
-
-        u(1, mseq_var(general_progressive_source_flag));
-        u(1, mseq_var(general_interlaced_source_flag));
-        u(1, mseq_var(general_non_packed_constraint_flag));
-        u(1, mseq_var(general_frame_only_constraint_flag));
-
-        if (0) {
-            // Not main profile.
-            // Lots of extra constraint flags.
-        } else {
-            // put_bits only handles up to 31 bits.
-            u(23, 0, general_reserved_zero_43bits);
-            u(20, 0, general_reserved_zero_43bits);
-        }
-
-        if (vseq->general_profile_idc >= 1 && vseq->general_profile_idc <= 5) {
-            u(1, mseq_var(general_inbld_flag));
-        } else {
-            u(1, 0, general_reserved_zero_bit);
-        }
+    err = ff_cbs_insert_unit_content(priv->cbc, au, -1,
+                                     header->nal_unit_type, nal_unit, NULL);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to add NAL unit: "
+               "type = %d.\n", header->nal_unit_type);
+        return err;
     }
 
-    u(8, vseq_var(general_level_idc));
-
-    // No sublayers.
-}
-
-static void vaapi_encode_h265_write_vps(PutBitContext *pbc,
-                                        VAAPIEncodeContext *ctx)
-{
-    VAAPIEncodeH265Context            *priv = ctx->priv_data;
-    VAAPIEncodeH265MiscSequenceParams *mseq = &priv->misc_sequence_params;
-    int i, j;
-
-    vaapi_encode_h265_write_nal_unit_header(pbc, HEVC_NAL_VPS);
-
-    u(4, mseq->video_parameter_set_id, vps_video_parameter_set_id);
-
-    u(1, 1, vps_base_layer_internal_flag);
-    u(1, 1, vps_base_layer_available_flag);
-    u(6, mseq_var(vps_max_layers_minus1));
-    u(3, mseq_var(vps_max_sub_layers_minus1));
-    u(1, mseq_var(vps_temporal_id_nesting_flag));
-
-    u(16, 0xffff, vps_reserved_0xffff_16bits);
-
-    vaapi_encode_h265_write_profile_tier_level(pbc, ctx);
-
-    u(1, mseq_var(vps_sub_layer_ordering_info_present_flag));
-    for (i = (mseq->vps_sub_layer_ordering_info_present_flag ?
-              0 : mseq->vps_max_sub_layers_minus1);
-         i <= mseq->vps_max_sub_layers_minus1; i++) {
-        ue(mseq_var(vps_max_dec_pic_buffering_minus1[i]));
-        ue(mseq_var(vps_max_num_reorder_pics[i]));
-        ue(mseq_var(vps_max_latency_increase_plus1[i]));
-    }
-
-    u(6, mseq_var(vps_max_layer_id));
-    ue(mseq_var(vps_num_layer_sets_minus1));
-    for (i = 1; i <= mseq->vps_num_layer_sets_minus1; i++) {
-        for (j = 0; j < mseq->vps_max_layer_id; j++)
-            u(1, mseq_var(layer_id_included_flag[i][j]));
-    }
-
-    u(1, mseq_var(vps_timing_info_present_flag));
-    if (mseq->vps_timing_info_present_flag) {
-        u(1, 0, put_bits_hack_zero_bit);
-        u(31, mseq_var(vps_num_units_in_tick));
-        u(1, 0, put_bits_hack_zero_bit);
-        u(31, mseq_var(vps_time_scale));
-        u(1, mseq_var(vps_poc_proportional_to_timing_flag));
-        if (mseq->vps_poc_proportional_to_timing_flag) {
-            ue(mseq_var(vps_num_ticks_poc_diff_minus1));
-        }
-        ue(0, vps_num_hrd_parameters);
-    }
-
-    u(1, 0, vps_extension_flag);
-
-    vaapi_encode_h265_write_rbsp_trailing_bits(pbc);
-}
-
-static void vaapi_encode_h265_write_st_ref_pic_set(PutBitContext *pbc,
-                                                   int st_rps_idx,
-                                                   VAAPIEncodeH265STRPS *mstrps)
-{
-    int i;
-
-    if (st_rps_idx != 0)
-       u(1, mstrps_var(inter_ref_pic_set_prediction_flag));
-
-    if (mstrps->inter_ref_pic_set_prediction_flag) {
-        av_assert0(0 && "inter ref pic set prediction not supported");
-    } else {
-        ue(mstrps_var(num_negative_pics));
-        ue(mstrps_var(num_positive_pics));
-
-        for (i = 0; i < mstrps->num_negative_pics; i++) {
-            ue(mstrps_var(delta_poc_s0_minus1[i]));
-            u(1, mstrps_var(used_by_curr_pic_s0_flag[i]));
-        }
-        for (i = 0; i < mstrps->num_positive_pics; i++) {
-            ue(mstrps_var(delta_poc_s1_minus1[i]));
-            u(1, mstrps_var(used_by_curr_pic_s1_flag[i]));
-        }
-    }
-}
-
-static void vaapi_encode_h265_write_vui_parameters(PutBitContext *pbc,
-                                                   VAAPIEncodeContext *ctx)
-{
-    VAAPIEncodeH265Context            *priv = ctx->priv_data;
-    VAAPIEncodeH265MiscSequenceParams *mseq = &priv->misc_sequence_params;
-
-    u(1, mseq_var(aspect_ratio_info_present_flag));
-    if (mseq->aspect_ratio_info_present_flag) {
-        u(8, mseq_var(aspect_ratio_idc));
-        if (mseq->aspect_ratio_idc == 255) {
-            u(16, mseq_var(sar_width));
-            u(16, mseq_var(sar_height));
-        }
-    }
-
-    u(1, 0, overscan_info_present_flag);
-
-    u(1, mseq_var(video_signal_type_present_flag));
-    if (mseq->video_signal_type_present_flag) {
-        u(3, mseq_var(video_format));
-        u(1, mseq_var(video_full_range_flag));
-        u(1, mseq_var(colour_description_present_flag));
-        if (mseq->colour_description_present_flag) {
-            u(8, mseq_var(colour_primaries));
-            u(8, mseq_var(transfer_characteristics));
-            u(8, mseq_var(matrix_coeffs));
-        }
-    }
-
-    u(1, 0, chroma_loc_info_present_flag);
-    u(1, 0, neutral_chroma_indication_flag);
-    u(1, 0, field_seq_flag);
-    u(1, 0, frame_field_info_present_flag);
-    u(1, 0, default_display_window_flag);
-    u(1, 0, vui_timing_info_present_flag);
-    u(1, 0, bitstream_restriction_flag_flag);
-}
-
-static void vaapi_encode_h265_write_sps(PutBitContext *pbc,
-                                        VAAPIEncodeContext *ctx)
-{
-    VAEncSequenceParameterBufferHEVC  *vseq = ctx->codec_sequence_params;
-    VAAPIEncodeH265Context            *priv = ctx->priv_data;
-    VAAPIEncodeH265MiscSequenceParams *mseq = &priv->misc_sequence_params;
-    int i;
-
-    vaapi_encode_h265_write_nal_unit_header(pbc, HEVC_NAL_SPS);
-
-    u(4, mseq->video_parameter_set_id, sps_video_parameter_set_id);
-
-    u(3, mseq_var(sps_max_sub_layers_minus1));
-    u(1, mseq_var(sps_temporal_id_nesting_flag));
-
-    vaapi_encode_h265_write_profile_tier_level(pbc, ctx);
-
-    ue(mseq->seq_parameter_set_id, sps_seq_parameter_set_id);
-    ue(vseq_field(chroma_format_idc));
-    if (vseq->seq_fields.bits.chroma_format_idc == 3)
-        u(1, 0, separate_colour_plane_flag);
-
-    ue(vseq_var(pic_width_in_luma_samples));
-    ue(vseq_var(pic_height_in_luma_samples));
-
-    u(1, mseq_var(conformance_window_flag));
-    if (mseq->conformance_window_flag) {
-        ue(mseq_var(conf_win_left_offset));
-        ue(mseq_var(conf_win_right_offset));
-        ue(mseq_var(conf_win_top_offset));
-        ue(mseq_var(conf_win_bottom_offset));
-    }
-
-    ue(vseq_field(bit_depth_luma_minus8));
-    ue(vseq_field(bit_depth_chroma_minus8));
-
-    ue(mseq_var(log2_max_pic_order_cnt_lsb_minus4));
-
-    u(1, mseq_var(sps_sub_layer_ordering_info_present_flag));
-    for (i = (mseq->sps_sub_layer_ordering_info_present_flag ?
-              0 : mseq->sps_max_sub_layers_minus1);
-         i <= mseq->sps_max_sub_layers_minus1; i++) {
-        ue(mseq_var(sps_max_dec_pic_buffering_minus1[i]));
-        ue(mseq_var(sps_max_num_reorder_pics[i]));
-        ue(mseq_var(sps_max_latency_increase_plus1[i]));
-    }
-
-    ue(vseq_var(log2_min_luma_coding_block_size_minus3));
-    ue(vseq_var(log2_diff_max_min_luma_coding_block_size));
-    ue(vseq_var(log2_min_transform_block_size_minus2));
-    ue(vseq_var(log2_diff_max_min_transform_block_size));
-    ue(vseq_var(max_transform_hierarchy_depth_inter));
-    ue(vseq_var(max_transform_hierarchy_depth_intra));
-
-    u(1, vseq_field(scaling_list_enabled_flag));
-    if (vseq->seq_fields.bits.scaling_list_enabled_flag) {
-        u(1, 0, sps_scaling_list_data_present_flag);
-    }
-
-    u(1, vseq_field(amp_enabled_flag));
-    u(1, vseq_field(sample_adaptive_offset_enabled_flag));
-
-    u(1, vseq_field(pcm_enabled_flag));
-    if (vseq->seq_fields.bits.pcm_enabled_flag) {
-        u(4, vseq_var(pcm_sample_bit_depth_luma_minus1));
-        u(4, vseq_var(pcm_sample_bit_depth_chroma_minus1));
-        ue(vseq_var(log2_min_pcm_luma_coding_block_size_minus3));
-        ue(vseq->log2_max_pcm_luma_coding_block_size_minus3 -
-           vseq->log2_min_pcm_luma_coding_block_size_minus3,
-           log2_diff_max_min_pcm_luma_coding_block_size);
-        u(1, vseq_field(pcm_loop_filter_disabled_flag));
-    }
-
-    ue(mseq_var(num_short_term_ref_pic_sets));
-    for (i = 0; i < mseq->num_short_term_ref_pic_sets; i++)
-        vaapi_encode_h265_write_st_ref_pic_set(pbc, i,
-                                               &mseq->st_ref_pic_set[i]);
-
-    u(1, mseq_var(long_term_ref_pics_present_flag));
-    if (mseq->long_term_ref_pics_present_flag) {
-        ue(0, num_long_term_ref_pics_sps);
-    }
-
-    u(1, vseq_field(sps_temporal_mvp_enabled_flag));
-    u(1, vseq_field(strong_intra_smoothing_enabled_flag));
-
-    u(1, mseq_var(vui_parameters_present_flag));
-    if (mseq->vui_parameters_present_flag) {
-        vaapi_encode_h265_write_vui_parameters(pbc, ctx);
-    }
-
-    u(1, 0, sps_extension_present_flag);
-
-    vaapi_encode_h265_write_rbsp_trailing_bits(pbc);
-}
-
-static void vaapi_encode_h265_write_pps(PutBitContext *pbc,
-                                        VAAPIEncodeContext *ctx)
-{
-    VAEncPictureParameterBufferHEVC   *vpic = ctx->codec_picture_params;
-    VAAPIEncodeH265Context            *priv = ctx->priv_data;
-    VAAPIEncodeH265MiscSequenceParams *mseq = &priv->misc_sequence_params;
-    int i;
-
-    vaapi_encode_h265_write_nal_unit_header(pbc, HEVC_NAL_PPS);
-
-    ue(vpic->slice_pic_parameter_set_id, pps_pic_parameter_set_id);
-    ue(mseq->seq_parameter_set_id, pps_seq_parameter_set_id);
-
-    u(1, vpic_field(dependent_slice_segments_enabled_flag));
-    u(1, mseq_var(output_flag_present_flag));
-    u(3, mseq_var(num_extra_slice_header_bits));
-    u(1, vpic_field(sign_data_hiding_enabled_flag));
-    u(1, mseq_var(cabac_init_present_flag));
-
-    ue(vpic_var(num_ref_idx_l0_default_active_minus1));
-    ue(vpic_var(num_ref_idx_l1_default_active_minus1));
-
-    se(vpic->pic_init_qp - 26, init_qp_minus26);
-
-    u(1, vpic_field(constrained_intra_pred_flag));
-    u(1, vpic_field(transform_skip_enabled_flag));
-
-    u(1, vpic_field(cu_qp_delta_enabled_flag));
-    if (vpic->pic_fields.bits.cu_qp_delta_enabled_flag)
-        ue(vpic_var(diff_cu_qp_delta_depth));
-
-    se(vpic_var(pps_cb_qp_offset));
-    se(vpic_var(pps_cr_qp_offset));
-
-    u(1, mseq_var(pps_slice_chroma_qp_offsets_present_flag));
-    u(1, vpic_field(weighted_pred_flag));
-    u(1, vpic_field(weighted_bipred_flag));
-    u(1, vpic_field(transquant_bypass_enabled_flag));
-    u(1, vpic_field(tiles_enabled_flag));
-    u(1, vpic_field(entropy_coding_sync_enabled_flag));
-
-    if (vpic->pic_fields.bits.tiles_enabled_flag) {
-        ue(vpic_var(num_tile_columns_minus1));
-        ue(vpic_var(num_tile_rows_minus1));
-        u(1, mseq_var(uniform_spacing_flag));
-        if (!mseq->uniform_spacing_flag) {
-            for (i = 0; i < vpic->num_tile_columns_minus1; i++)
-                ue(vpic_var(column_width_minus1[i]));
-            for (i = 0; i < vpic->num_tile_rows_minus1; i++)
-                ue(vpic_var(row_height_minus1[i]));
-        }
-        u(1, vpic_field(loop_filter_across_tiles_enabled_flag));
-    }
-
-    u(1, vpic_field(pps_loop_filter_across_slices_enabled_flag));
-    u(1, mseq_var(deblocking_filter_control_present_flag));
-    if (mseq->deblocking_filter_control_present_flag) {
-        u(1, mseq_var(deblocking_filter_override_enabled_flag));
-        u(1, mseq_var(pps_deblocking_filter_disabled_flag));
-        if (!mseq->pps_deblocking_filter_disabled_flag) {
-            se(mseq_var(pps_beta_offset_div2));
-            se(mseq_var(pps_tc_offset_div2));
-        }
-    }
-
-    u(1, 0, pps_scaling_list_data_present_flag);
-    // No scaling list data.
-
-    u(1, mseq_var(lists_modification_present_flag));
-    ue(vpic_var(log2_parallel_merge_level_minus2));
-    u(1, 0, slice_segment_header_extension_present_flag);
-    u(1, 0, pps_extension_present_flag);
-
-    vaapi_encode_h265_write_rbsp_trailing_bits(pbc);
-}
-
-static void vaapi_encode_h265_write_slice_header2(PutBitContext *pbc,
-                                                  VAAPIEncodeContext *ctx,
-                                                  VAAPIEncodePicture *pic,
-                                                  VAAPIEncodeSlice *slice)
-{
-    VAEncSequenceParameterBufferHEVC  *vseq = ctx->codec_sequence_params;
-    VAEncPictureParameterBufferHEVC   *vpic = pic->codec_picture_params;
-    VAEncSliceParameterBufferHEVC   *vslice = slice->codec_slice_params;
-    VAAPIEncodeH265Context            *priv = ctx->priv_data;
-    VAAPIEncodeH265MiscSequenceParams *mseq = &priv->misc_sequence_params;
-    VAAPIEncodeH265Slice            *pslice = slice->priv_data;
-    VAAPIEncodeH265MiscSliceParams  *mslice = &pslice->misc_slice_params;
-    int i;
-
-    vaapi_encode_h265_write_nal_unit_header(pbc, vpic->nal_unit_type);
-
-    u(1, mslice_var(first_slice_segment_in_pic_flag));
-    if (vpic->nal_unit_type >= HEVC_NAL_BLA_W_LP &&
-       vpic->nal_unit_type <= 23)
-        u(1, mslice_var(no_output_of_prior_pics_flag));
-
-    ue(vslice_var(slice_pic_parameter_set_id));
-
-    if (!mslice->first_slice_segment_in_pic_flag) {
-        if (vpic->pic_fields.bits.dependent_slice_segments_enabled_flag)
-            u(1, vslice_field(dependent_slice_segment_flag));
-        u(av_log2((priv->ctu_width * priv->ctu_height) - 1) + 1,
-          vslice_var(slice_segment_address));
-    }
-    if (!vslice->slice_fields.bits.dependent_slice_segment_flag) {
-        for (i = 0; i < mseq->num_extra_slice_header_bits; i++)
-            u(1, mslice_var(slice_reserved_flag[i]));
-
-        ue(vslice_var(slice_type));
-        if (mseq->output_flag_present_flag)
-            u(1, 1, pic_output_flag);
-        if (vseq->seq_fields.bits.separate_colour_plane_flag)
-            u(2, vslice_field(colour_plane_id));
-        if (vpic->nal_unit_type != HEVC_NAL_IDR_W_RADL &&
-           vpic->nal_unit_type != HEVC_NAL_IDR_N_LP) {
-            u(4 + mseq->log2_max_pic_order_cnt_lsb_minus4,
-              (pslice->pic_order_cnt &
-               ((1 << (mseq->log2_max_pic_order_cnt_lsb_minus4 + 4)) - 1)),
-              slice_pic_order_cnt_lsb);
-
-            u(1, mslice_var(short_term_ref_pic_set_sps_flag));
-            if (!mslice->short_term_ref_pic_set_sps_flag) {
-                vaapi_encode_h265_write_st_ref_pic_set(pbc, mseq->num_short_term_ref_pic_sets,
-                                                       &mslice->st_ref_pic_set);
-            } else if (mseq->num_short_term_ref_pic_sets > 1) {
-                u(av_log2(mseq->num_short_term_ref_pic_sets - 1) + 1,
-                  mslice_var(short_term_ref_pic_idx));
-            }
-
-            if (mseq->long_term_ref_pics_present_flag) {
-                av_assert0(0);
-            }
-        }
-
-        if (vseq->seq_fields.bits.sps_temporal_mvp_enabled_flag) {
-            u(1, vslice_field(slice_temporal_mvp_enabled_flag));
-        }
-
-        if (vseq->seq_fields.bits.sample_adaptive_offset_enabled_flag) {
-            u(1, vslice_field(slice_sao_luma_flag));
-            if (!vseq->seq_fields.bits.separate_colour_plane_flag &&
-                vseq->seq_fields.bits.chroma_format_idc != 0) {
-                u(1, vslice_field(slice_sao_chroma_flag));
-            }
-        }
-
-        if (vslice->slice_type == HEVC_SLICE_P || vslice->slice_type == HEVC_SLICE_B) {
-            u(1, vslice_field(num_ref_idx_active_override_flag));
-            if (vslice->slice_fields.bits.num_ref_idx_active_override_flag) {
-                ue(vslice_var(num_ref_idx_l0_active_minus1));
-                if (vslice->slice_type == HEVC_SLICE_B) {
-                    ue(vslice_var(num_ref_idx_l1_active_minus1));
-                }
-            }
-
-            if (mseq->lists_modification_present_flag) {
-                av_assert0(0);
-                // ref_pic_lists_modification()
-            }
-            if (vslice->slice_type == HEVC_SLICE_B) {
-                u(1, vslice_field(mvd_l1_zero_flag));
-            }
-            if (mseq->cabac_init_present_flag) {
-                u(1, vslice_field(cabac_init_flag));
-            }
-            if (vslice->slice_fields.bits.slice_temporal_mvp_enabled_flag) {
-                if (vslice->slice_type == HEVC_SLICE_B)
-                    u(1, vslice_field(collocated_from_l0_flag));
-                ue(vpic->collocated_ref_pic_index, collocated_ref_idx);
-            }
-            if ((vpic->pic_fields.bits.weighted_pred_flag &&
-                 vslice->slice_type == HEVC_SLICE_P) ||
-                (vpic->pic_fields.bits.weighted_bipred_flag &&
-                 vslice->slice_type == HEVC_SLICE_B)) {
-                av_assert0(0);
-                // pred_weight_table()
-            }
-            ue(5 - vslice->max_num_merge_cand, five_minus_max_num_merge_cand);
-        }
-
-        se(vslice_var(slice_qp_delta));
-        if (mseq->pps_slice_chroma_qp_offsets_present_flag) {
-            se(vslice_var(slice_cb_qp_offset));
-            se(vslice_var(slice_cr_qp_offset));
-        }
-        if (mseq->pps_slice_chroma_offset_list_enabled_flag) {
-            u(1, 0, cu_chroma_qp_offset_enabled_flag);
-        }
-        if (mseq->deblocking_filter_override_enabled_flag) {
-            u(1, mslice_var(deblocking_filter_override_flag));
-        }
-        if (mslice->deblocking_filter_override_flag) {
-            u(1, vslice_field(slice_deblocking_filter_disabled_flag));
-            if (!vslice->slice_fields.bits.slice_deblocking_filter_disabled_flag) {
-                se(vslice_var(slice_beta_offset_div2));
-                se(vslice_var(slice_tc_offset_div2));
-            }
-        }
-        if (vpic->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag &&
-            (vslice->slice_fields.bits.slice_sao_luma_flag ||
-             vslice->slice_fields.bits.slice_sao_chroma_flag ||
-             vslice->slice_fields.bits.slice_deblocking_filter_disabled_flag)) {
-            u(1, vslice_field(slice_loop_filter_across_slices_enabled_flag));
-        }
-
-        if (vpic->pic_fields.bits.tiles_enabled_flag ||
-            vpic->pic_fields.bits.entropy_coding_sync_enabled_flag) {
-            // num_entry_point_offsets
-        }
-
-        if (0) {
-            // slice_segment_header_extension_length
-        }
-    }
-
-    u(1, 1, alignment_bit_equal_to_one);
-    while (put_bits_count(pbc) & 7)
-        u(1, 0, alignment_bit_equal_to_zero);
+    return 0;
 }
 
 static int vaapi_encode_h265_write_sequence_header(AVCodecContext *avctx,
                                                    char *data, size_t *data_len)
 {
-    VAAPIEncodeContext *ctx = avctx->priv_data;
-    PutBitContext pbc;
-    char tmp[256];
+    VAAPIEncodeH265Context *priv = avctx->priv_data;
+    CodedBitstreamFragment   *au = &priv->current_access_unit;
     int err;
-    size_t nal_len, bit_len, bit_pos, next_len;
 
-    bit_len = *data_len;
-    bit_pos = 0;
+    if (priv->aud_needed) {
+        err = vaapi_encode_h265_add_nal(avctx, au, &priv->raw_aud);
+        if (err < 0)
+            goto fail;
+        priv->aud_needed = 0;
+    }
 
-    init_put_bits(&pbc, tmp, sizeof(tmp));
-    vaapi_encode_h265_write_vps(&pbc, ctx);
-    nal_len = put_bits_count(&pbc);
-    flush_put_bits(&pbc);
-
-    next_len = bit_len - bit_pos;
-    err = ff_vaapi_encode_h26x_nal_unit_to_byte_stream(data + bit_pos / 8,
-                                                       &next_len,
-                                                       tmp, nal_len);
+    err = vaapi_encode_h265_add_nal(avctx, au, &priv->raw_vps);
     if (err < 0)
-        return err;
-    bit_pos += next_len;
+        goto fail;
 
-    init_put_bits(&pbc, tmp, sizeof(tmp));
-    vaapi_encode_h265_write_sps(&pbc, ctx);
-    nal_len = put_bits_count(&pbc);
-    flush_put_bits(&pbc);
-
-    next_len = bit_len - bit_pos;
-    err = ff_vaapi_encode_h26x_nal_unit_to_byte_stream(data + bit_pos / 8,
-                                                       &next_len,
-                                                       tmp, nal_len);
+    err = vaapi_encode_h265_add_nal(avctx, au, &priv->raw_sps);
     if (err < 0)
-        return err;
-    bit_pos += next_len;
+        goto fail;
 
-    init_put_bits(&pbc, tmp, sizeof(tmp));
-    vaapi_encode_h265_write_pps(&pbc, ctx);
-    nal_len = put_bits_count(&pbc);
-    flush_put_bits(&pbc);
-
-    next_len = bit_len - bit_pos;
-    err = ff_vaapi_encode_h26x_nal_unit_to_byte_stream(data + bit_pos / 8,
-                                                       &next_len,
-                                                       tmp, nal_len);
+    err = vaapi_encode_h265_add_nal(avctx, au, &priv->raw_pps);
     if (err < 0)
-        return err;
-    bit_pos += next_len;
+        goto fail;
 
-    *data_len = bit_pos;
-    return 0;
+    err = vaapi_encode_h265_write_access_unit(avctx, data, data_len, au);
+fail:
+    ff_cbs_fragment_uninit(priv->cbc, au);
+    return err;
 }
 
 static int vaapi_encode_h265_write_slice_header(AVCodecContext *avctx,
@@ -763,261 +169,681 @@
                                                 VAAPIEncodeSlice *slice,
                                                 char *data, size_t *data_len)
 {
-    VAAPIEncodeContext *ctx = avctx->priv_data;
-    PutBitContext pbc;
-    char tmp[256];
-    size_t header_len;
+    VAAPIEncodeH265Context *priv = avctx->priv_data;
+    CodedBitstreamFragment   *au = &priv->current_access_unit;
+    int err;
 
-    init_put_bits(&pbc, tmp, sizeof(tmp));
-    vaapi_encode_h265_write_slice_header2(&pbc, ctx, pic, slice);
-    header_len = put_bits_count(&pbc);
-    flush_put_bits(&pbc);
+    if (priv->aud_needed) {
+        err = vaapi_encode_h265_add_nal(avctx, au, &priv->raw_aud);
+        if (err < 0)
+            goto fail;
+        priv->aud_needed = 0;
+    }
 
-    return ff_vaapi_encode_h26x_nal_unit_to_byte_stream(data, data_len,
-                                                        tmp, header_len);
+    err = vaapi_encode_h265_add_nal(avctx, au, &priv->raw_slice);
+    if (err < 0)
+        goto fail;
+
+    err = vaapi_encode_h265_write_access_unit(avctx, data, data_len, au);
+fail:
+    ff_cbs_fragment_uninit(priv->cbc, au);
+    return err;
+}
+
+static int vaapi_encode_h265_write_extra_header(AVCodecContext *avctx,
+                                                VAAPIEncodePicture *pic,
+                                                int index, int *type,
+                                                char *data, size_t *data_len)
+{
+    VAAPIEncodeH265Context *priv = avctx->priv_data;
+    CodedBitstreamFragment   *au = &priv->current_access_unit;
+    int err, i;
+
+    if (priv->sei_needed) {
+        H265RawSEI *sei = &priv->raw_sei;
+
+        if (priv->aud_needed) {
+            err = vaapi_encode_h265_add_nal(avctx, au, &priv->aud);
+            if (err < 0)
+                goto fail;
+            priv->aud_needed = 0;
+        }
+
+        *sei = (H265RawSEI) {
+            .nal_unit_header = {
+                .nal_unit_type         = HEVC_NAL_SEI_PREFIX,
+                .nuh_layer_id          = 0,
+                .nuh_temporal_id_plus1 = 1,
+            },
+        };
+
+        i = 0;
+
+        if (priv->sei_needed & SEI_MASTERING_DISPLAY) {
+            sei->payload[i].payload_type = HEVC_SEI_TYPE_MASTERING_DISPLAY_INFO;
+            sei->payload[i].payload.mastering_display = priv->sei_mastering_display;
+            ++i;
+        }
+
+        if (priv->sei_needed & SEI_CONTENT_LIGHT_LEVEL) {
+            sei->payload[i].payload_type = HEVC_SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO;
+            sei->payload[i].payload.content_light_level = priv->sei_content_light_level;
+            ++i;
+        }
+
+        sei->payload_count = i;
+        av_assert0(sei->payload_count > 0);
+
+        err = vaapi_encode_h265_add_nal(avctx, au, sei);
+        if (err < 0)
+            goto fail;
+        priv->sei_needed = 0;
+
+        err = vaapi_encode_h265_write_access_unit(avctx, data, data_len, au);
+        if (err < 0)
+            goto fail;
+
+        ff_cbs_fragment_uninit(priv->cbc, au);
+
+        *type = VAEncPackedHeaderRawData;
+        return 0;
+    } else {
+        return AVERROR_EOF;
+    }
+
+fail:
+    ff_cbs_fragment_uninit(priv->cbc, au);
+    return err;
 }
 
 static int vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
 {
-    VAAPIEncodeContext                 *ctx = avctx->priv_data;
-    VAEncSequenceParameterBufferHEVC  *vseq = ctx->codec_sequence_params;
-    VAEncPictureParameterBufferHEVC   *vpic = ctx->codec_picture_params;
-    VAAPIEncodeH265Context            *priv = ctx->priv_data;
-    VAAPIEncodeH265MiscSequenceParams *mseq = &priv->misc_sequence_params;
+    VAAPIEncodeContext                *ctx = avctx->priv_data;
+    VAAPIEncodeH265Context           *priv = avctx->priv_data;
+    H265RawVPS                        *vps = &priv->raw_vps;
+    H265RawSPS                        *sps = &priv->raw_sps;
+    H265RawPPS                        *pps = &priv->raw_pps;
+    H265RawProfileTierLevel           *ptl = &vps->profile_tier_level;
+    H265RawVUI                        *vui = &sps->vui;
+    VAEncSequenceParameterBufferHEVC *vseq = ctx->codec_sequence_params;
+    VAEncPictureParameterBufferHEVC  *vpic = ctx->codec_picture_params;
+    const AVPixFmtDescriptor *desc;
+    int chroma_format, bit_depth;
     int i;
 
-    {
-        // general_profile_space == 0.
-        vseq->general_profile_idc = 1; // Main profile (ctx->codec_profile?)
-        vseq->general_tier_flag = 0;
+    memset(&priv->current_access_unit, 0,
+           sizeof(priv->current_access_unit));
 
-        vseq->general_level_idc = avctx->level * 3;
+    memset(vps, 0, sizeof(*vps));
+    memset(sps, 0, sizeof(*sps));
+    memset(pps, 0, sizeof(*pps));
 
-        vseq->intra_period = 0;
-        vseq->intra_idr_period = 0;
-        vseq->ip_period = 0;
 
-        vseq->pic_width_in_luma_samples  = ctx->surface_width;
-        vseq->pic_height_in_luma_samples = ctx->surface_height;
-
-        vseq->seq_fields.bits.chroma_format_idc = 1; // 4:2:0.
-        vseq->seq_fields.bits.separate_colour_plane_flag = 0;
-        vseq->seq_fields.bits.bit_depth_luma_minus8 =
-            avctx->profile == FF_PROFILE_HEVC_MAIN_10 ? 2 : 0;
-        vseq->seq_fields.bits.bit_depth_chroma_minus8 =
-            avctx->profile == FF_PROFILE_HEVC_MAIN_10 ? 2 : 0;
-        // Other misc flags all zero.
-
-        // These have to come from the capabilities of the encoder.  We have
-        // no way to query it, so just hardcode ones which worked for me...
-        // CTB size from 8x8 to 32x32.
-        vseq->log2_min_luma_coding_block_size_minus3 = 0;
-        vseq->log2_diff_max_min_luma_coding_block_size = 2;
-        // Transform size from 4x4 to 32x32.
-        vseq->log2_min_transform_block_size_minus2 = 0;
-        vseq->log2_diff_max_min_transform_block_size = 3;
-        // Full transform hierarchy allowed (2-5).
-        vseq->max_transform_hierarchy_depth_inter = 3;
-        vseq->max_transform_hierarchy_depth_intra = 3;
-
-        vseq->vui_parameters_present_flag = 0;
-
-        vseq->bits_per_second = avctx->bit_rate;
-        if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
-            vseq->vui_num_units_in_tick = avctx->framerate.den;
-            vseq->vui_time_scale        = avctx->framerate.num;
+    desc = av_pix_fmt_desc_get(priv->common.input_frames->sw_format);
+    av_assert0(desc);
+    if (desc->nb_components == 1) {
+        chroma_format = 0;
+    } else {
+        if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 1) {
+            chroma_format = 1;
+        } else if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 0) {
+            chroma_format = 2;
+        } else if (desc->log2_chroma_w == 0 && desc->log2_chroma_h == 0) {
+            chroma_format = 3;
         } else {
-            vseq->vui_num_units_in_tick = avctx->time_base.num;
-            vseq->vui_time_scale        = avctx->time_base.den;
+            av_log(avctx, AV_LOG_ERROR, "Chroma format of input pixel format "
+                   "%s is not supported.\n", desc->name);
+            return AVERROR(EINVAL);
         }
+    }
+    bit_depth = desc->comp[0].depth;
 
-        vseq->intra_period     = avctx->gop_size;
-        vseq->intra_idr_period = avctx->gop_size;
-        vseq->ip_period        = ctx->b_per_p + 1;
+
+    // VPS
+
+    vps->nal_unit_header = (H265RawNALUnitHeader) {
+        .nal_unit_type         = HEVC_NAL_VPS,
+        .nuh_layer_id          = 0,
+        .nuh_temporal_id_plus1 = 1,
+    };
+
+    vps->vps_video_parameter_set_id = 0;
+
+    vps->vps_base_layer_internal_flag  = 1;
+    vps->vps_base_layer_available_flag = 1;
+    vps->vps_max_layers_minus1         = 0;
+    vps->vps_max_sub_layers_minus1     = 0;
+    vps->vps_temporal_id_nesting_flag  = 1;
+
+    ptl->general_profile_space = 0;
+    ptl->general_profile_idc   = avctx->profile;
+    ptl->general_tier_flag     = priv->tier;
+
+    if (chroma_format == 1) {
+        ptl->general_profile_compatibility_flag[1] = bit_depth ==  8;
+        ptl->general_profile_compatibility_flag[2] = bit_depth <= 10;
+    }
+    ptl->general_profile_compatibility_flag[4] = 1;
+
+    ptl->general_progressive_source_flag    = 1;
+    ptl->general_interlaced_source_flag     = 0;
+    ptl->general_non_packed_constraint_flag = 1;
+    ptl->general_frame_only_constraint_flag = 1;
+
+    ptl->general_max_12bit_constraint_flag = bit_depth <= 12;
+    ptl->general_max_10bit_constraint_flag = bit_depth <= 10;
+    ptl->general_max_8bit_constraint_flag  = bit_depth ==  8;
+
+    ptl->general_max_422chroma_constraint_flag  = chroma_format <= 2;
+    ptl->general_max_420chroma_constraint_flag  = chroma_format <= 1;
+    ptl->general_max_monochrome_constraint_flag = chroma_format == 0;
+
+    ptl->general_intra_constraint_flag = ctx->gop_size == 1;
+
+    ptl->general_lower_bit_rate_constraint_flag = 1;
+
+    if (avctx->level != FF_LEVEL_UNKNOWN) {
+        ptl->general_level_idc = avctx->level;
+    } else {
+        const H265LevelDescriptor *level;
+
+        level = ff_h265_guess_level(ptl, avctx->bit_rate,
+                                    ctx->surface_width, ctx->surface_height,
+                                    1, 1, 1, (ctx->b_per_p > 0) + 1);
+        if (level) {
+            av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name);
+            ptl->general_level_idc = level->level_idc;
+        } else {
+            av_log(avctx, AV_LOG_VERBOSE, "Stream will not conform to "
+                   "any normal level; using level 8.5.\n");
+            ptl->general_level_idc = 255;
+            // The tier flag must be set in level 8.5.
+            ptl->general_tier_flag = 1;
+        }
     }
 
-    {
-        vpic->decoded_curr_pic.picture_id = VA_INVALID_ID;
-        vpic->decoded_curr_pic.flags      = VA_PICTURE_HEVC_INVALID;
+    vps->vps_sub_layer_ordering_info_present_flag = 0;
+    vps->vps_max_dec_pic_buffering_minus1[0]      = (ctx->b_per_p > 0) + 1;
+    vps->vps_max_num_reorder_pics[0]              = (ctx->b_per_p > 0);
+    vps->vps_max_latency_increase_plus1[0]        = 0;
 
-        for (i = 0; i < FF_ARRAY_ELEMS(vpic->reference_frames); i++) {
-            vpic->reference_frames[i].picture_id = VA_INVALID_ID;
-            vpic->reference_frames[i].flags      = VA_PICTURE_HEVC_INVALID;
-        }
+    vps->vps_max_layer_id             = 0;
+    vps->vps_num_layer_sets_minus1    = 0;
+    vps->layer_id_included_flag[0][0] = 1;
 
-        vpic->collocated_ref_pic_index = 0xff;
+    vps->vps_timing_info_present_flag = 1;
+    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
+        vps->vps_num_units_in_tick  = avctx->framerate.den;
+        vps->vps_time_scale         = avctx->framerate.num;
+        vps->vps_poc_proportional_to_timing_flag = 1;
+        vps->vps_num_ticks_poc_diff_one_minus1   = 0;
+    } else {
+        vps->vps_num_units_in_tick  = avctx->time_base.num;
+        vps->vps_time_scale         = avctx->time_base.den;
+        vps->vps_poc_proportional_to_timing_flag = 0;
+    }
+    vps->vps_num_hrd_parameters = 0;
 
-        vpic->last_picture = 0;
 
-        vpic->pic_init_qp = priv->fixed_qp_idr;
+    // SPS
 
-        vpic->diff_cu_qp_delta_depth = 0;
-        vpic->pps_cb_qp_offset = 0;
-        vpic->pps_cr_qp_offset = 0;
+    sps->nal_unit_header = (H265RawNALUnitHeader) {
+        .nal_unit_type         = HEVC_NAL_SPS,
+        .nuh_layer_id          = 0,
+        .nuh_temporal_id_plus1 = 1,
+    };
 
-        // tiles_enabled_flag == 0, so ignore num_tile_(rows|columns)_minus1.
+    sps->sps_video_parameter_set_id = vps->vps_video_parameter_set_id;
 
-        vpic->log2_parallel_merge_level_minus2 = 0;
+    sps->sps_max_sub_layers_minus1    = vps->vps_max_sub_layers_minus1;
+    sps->sps_temporal_id_nesting_flag = vps->vps_temporal_id_nesting_flag;
 
-        // No limit on size.
-        vpic->ctu_max_bitsize_allowed = 0;
+    sps->profile_tier_level = vps->profile_tier_level;
 
-        vpic->num_ref_idx_l0_default_active_minus1 = 0;
-        vpic->num_ref_idx_l1_default_active_minus1 = 0;
+    sps->sps_seq_parameter_set_id = 0;
 
-        vpic->slice_pic_parameter_set_id = 0;
+    sps->chroma_format_idc          = chroma_format;
+    sps->separate_colour_plane_flag = 0;
 
-        vpic->pic_fields.bits.screen_content_flag = 0;
-        vpic->pic_fields.bits.enable_gpu_weighted_prediction = 0;
+    sps->pic_width_in_luma_samples  = ctx->surface_width;
+    sps->pic_height_in_luma_samples = ctx->surface_height;
 
-        // Per-CU QP changes are required for non-constant-QP modes.
-        vpic->pic_fields.bits.cu_qp_delta_enabled_flag =
-            ctx->va_rc_mode != VA_RC_CQP;
+    if (avctx->width  != ctx->surface_width ||
+        avctx->height != ctx->surface_height) {
+        sps->conformance_window_flag = 1;
+        sps->conf_win_left_offset   = 0;
+        sps->conf_win_right_offset  =
+            (ctx->surface_width - avctx->width) / 2;
+        sps->conf_win_top_offset    = 0;
+        sps->conf_win_bottom_offset =
+            (ctx->surface_height - avctx->height) / 2;
+    } else {
+        sps->conformance_window_flag = 0;
     }
 
-    {
-        mseq->video_parameter_set_id = 5;
-        mseq->seq_parameter_set_id = 5;
+    sps->bit_depth_luma_minus8   = bit_depth - 8;
+    sps->bit_depth_chroma_minus8 = bit_depth - 8;
 
-        mseq->vps_max_layers_minus1 = 0;
-        mseq->vps_max_sub_layers_minus1 = 0;
-        mseq->vps_temporal_id_nesting_flag = 1;
-        mseq->sps_max_sub_layers_minus1 = 0;
-        mseq->sps_temporal_id_nesting_flag = 1;
+    sps->log2_max_pic_order_cnt_lsb_minus4 = 8;
 
-        for (i = 0; i < 32; i++) {
-            mseq->general_profile_compatibility_flag[i] =
-                (i == vseq->general_profile_idc);
-        }
+    sps->sps_sub_layer_ordering_info_present_flag =
+        vps->vps_sub_layer_ordering_info_present_flag;
+    for (i = 0; i <= sps->sps_max_sub_layers_minus1; i++) {
+        sps->sps_max_dec_pic_buffering_minus1[i] =
+            vps->vps_max_dec_pic_buffering_minus1[i];
+        sps->sps_max_num_reorder_pics[i] =
+            vps->vps_max_num_reorder_pics[i];
+        sps->sps_max_latency_increase_plus1[i] =
+            vps->vps_max_latency_increase_plus1[i];
+    }
 
-        mseq->general_progressive_source_flag    = 1;
-        mseq->general_interlaced_source_flag     = 0;
-        mseq->general_non_packed_constraint_flag = 0;
-        mseq->general_frame_only_constraint_flag = 1;
-        mseq->general_inbld_flag = 0;
+    // These have to come from the capabilities of the encoder.  We have no
+    // way to query them, so just hardcode parameters which work on the Intel
+    // driver.
+    // CTB size from 8x8 to 32x32.
+    sps->log2_min_luma_coding_block_size_minus3   = 0;
+    sps->log2_diff_max_min_luma_coding_block_size = 2;
+    // Transform size from 4x4 to 32x32.
+    sps->log2_min_luma_transform_block_size_minus2   = 0;
+    sps->log2_diff_max_min_luma_transform_block_size = 3;
+    // Full transform hierarchy allowed (2-5).
+    sps->max_transform_hierarchy_depth_inter = 3;
+    sps->max_transform_hierarchy_depth_intra = 3;
+    // AMP works.
+    sps->amp_enabled_flag = 1;
+    // SAO and temporal MVP do not work.
+    sps->sample_adaptive_offset_enabled_flag = 0;
+    sps->sps_temporal_mvp_enabled_flag       = 0;
 
-        mseq->log2_max_pic_order_cnt_lsb_minus4 = 8;
-        mseq->vps_sub_layer_ordering_info_present_flag = 0;
-        mseq->vps_max_dec_pic_buffering_minus1[0] = (avctx->max_b_frames > 0) + 1;
-        mseq->vps_max_num_reorder_pics[0]         = (avctx->max_b_frames > 0);
-        mseq->vps_max_latency_increase_plus1[0]   = 0;
-        mseq->sps_sub_layer_ordering_info_present_flag = 0;
-        mseq->sps_max_dec_pic_buffering_minus1[0] = (avctx->max_b_frames > 0) + 1;
-        mseq->sps_max_num_reorder_pics[0]         = (avctx->max_b_frames > 0);
-        mseq->sps_max_latency_increase_plus1[0]   = 0;
+    sps->pcm_enabled_flag = 0;
 
-        mseq->vps_timing_info_present_flag = 1;
-        mseq->vps_num_units_in_tick = avctx->time_base.num;
-        mseq->vps_time_scale        = avctx->time_base.den;
-        mseq->vps_poc_proportional_to_timing_flag = 1;
-        mseq->vps_num_ticks_poc_diff_minus1 = 0;
+    // STRPSs should ideally be here rather than defined individually in
+    // each slice, but the structure isn't completely fixed so for now
+    // don't bother.
+    sps->num_short_term_ref_pic_sets     = 0;
+    sps->long_term_ref_pics_present_flag = 0;
 
-        if (avctx->width  != ctx->surface_width ||
-            avctx->height != ctx->surface_height) {
-            mseq->conformance_window_flag = 1;
-            mseq->conf_win_left_offset   = 0;
-            mseq->conf_win_right_offset  =
-                (ctx->surface_width - avctx->width) / 2;
-            mseq->conf_win_top_offset    = 0;
-            mseq->conf_win_bottom_offset =
-                (ctx->surface_height - avctx->height) / 2;
-        } else {
-            mseq->conformance_window_flag = 0;
-        }
+    sps->vui_parameters_present_flag = 1;
 
-        mseq->num_short_term_ref_pic_sets = 0;
-        // STRPSs should ideally be here rather than repeated in each slice.
-
-        mseq->vui_parameters_present_flag = 1;
-        if (avctx->sample_aspect_ratio.num != 0) {
-            mseq->aspect_ratio_info_present_flag = 1;
-            if (avctx->sample_aspect_ratio.num ==
-                avctx->sample_aspect_ratio.den) {
-                mseq->aspect_ratio_idc = 1;
-            } else {
-                mseq->aspect_ratio_idc = 255; // Extended SAR.
-                mseq->sar_width  = avctx->sample_aspect_ratio.num;
-                mseq->sar_height = avctx->sample_aspect_ratio.den;
+    if (avctx->sample_aspect_ratio.num != 0 &&
+        avctx->sample_aspect_ratio.den != 0) {
+        static const AVRational sar_idc[] = {
+            {   0,  0 },
+            {   1,  1 }, {  12, 11 }, {  10, 11 }, {  16, 11 },
+            {  40, 33 }, {  24, 11 }, {  20, 11 }, {  32, 11 },
+            {  80, 33 }, {  18, 11 }, {  15, 11 }, {  64, 33 },
+            { 160, 99 }, {   4,  3 }, {   3,  2 }, {   2,  1 },
+        };
+        int i;
+        for (i = 0; i < FF_ARRAY_ELEMS(sar_idc); i++) {
+            if (avctx->sample_aspect_ratio.num == sar_idc[i].num &&
+                avctx->sample_aspect_ratio.den == sar_idc[i].den) {
+                vui->aspect_ratio_idc = i;
+                break;
             }
         }
-        if (1) {
-            // Should this be conditional on some of these being set?
-            mseq->video_signal_type_present_flag = 1;
-            mseq->video_format = 5; // Unspecified.
-            mseq->video_full_range_flag = 0;
-            mseq->colour_description_present_flag = 1;
-            mseq->colour_primaries = avctx->color_primaries;
-            mseq->transfer_characteristics = avctx->color_trc;
-            mseq->matrix_coeffs = avctx->colorspace;
+        if (i >= FF_ARRAY_ELEMS(sar_idc)) {
+            vui->aspect_ratio_idc = 255;
+            vui->sar_width  = avctx->sample_aspect_ratio.num;
+            vui->sar_height = avctx->sample_aspect_ratio.den;
         }
+        vui->aspect_ratio_info_present_flag = 1;
     }
 
+    if (avctx->color_range     != AVCOL_RANGE_UNSPECIFIED ||
+        avctx->color_primaries != AVCOL_PRI_UNSPECIFIED ||
+        avctx->color_trc       != AVCOL_TRC_UNSPECIFIED ||
+        avctx->colorspace      != AVCOL_SPC_UNSPECIFIED) {
+        vui->video_signal_type_present_flag = 1;
+        vui->video_format      = 5; // Unspecified.
+        vui->video_full_range_flag =
+            avctx->color_range == AVCOL_RANGE_JPEG;
+
+        if (avctx->color_primaries != AVCOL_PRI_UNSPECIFIED ||
+            avctx->color_trc       != AVCOL_TRC_UNSPECIFIED ||
+            avctx->colorspace      != AVCOL_SPC_UNSPECIFIED) {
+            vui->colour_description_present_flag = 1;
+            vui->colour_primaries         = avctx->color_primaries;
+            vui->transfer_characteristics = avctx->color_trc;
+            vui->matrix_coefficients      = avctx->colorspace;
+        }
+    } else {
+        vui->video_format             = 5;
+        vui->video_full_range_flag    = 0;
+        vui->colour_primaries         = avctx->color_primaries;
+        vui->transfer_characteristics = avctx->color_trc;
+        vui->matrix_coefficients      = avctx->colorspace;
+    }
+
+    if (avctx->chroma_sample_location != AVCHROMA_LOC_UNSPECIFIED) {
+        vui->chroma_loc_info_present_flag = 1;
+        vui->chroma_sample_loc_type_top_field    =
+        vui->chroma_sample_loc_type_bottom_field =
+            avctx->chroma_sample_location - 1;
+    }
+
+    vui->vui_timing_info_present_flag        = 1;
+    vui->vui_num_units_in_tick               = vps->vps_num_units_in_tick;
+    vui->vui_time_scale                      = vps->vps_time_scale;
+    vui->vui_poc_proportional_to_timing_flag = vps->vps_poc_proportional_to_timing_flag;
+    vui->vui_num_ticks_poc_diff_one_minus1   = vps->vps_num_ticks_poc_diff_one_minus1;
+    vui->vui_hrd_parameters_present_flag     = 0;
+
+    vui->bitstream_restriction_flag    = 1;
+    vui->motion_vectors_over_pic_boundaries_flag = 1;
+    vui->restricted_ref_pic_lists_flag = 1;
+    vui->max_bytes_per_pic_denom       = 0;
+    vui->max_bits_per_min_cu_denom     = 0;
+    vui->log2_max_mv_length_horizontal = 15;
+    vui->log2_max_mv_length_vertical   = 15;
+
+
+    // PPS
+
+    pps->nal_unit_header = (H265RawNALUnitHeader) {
+        .nal_unit_type         = HEVC_NAL_PPS,
+        .nuh_layer_id          = 0,
+        .nuh_temporal_id_plus1 = 1,
+    };
+
+    pps->pps_pic_parameter_set_id = 0;
+    pps->pps_seq_parameter_set_id = sps->sps_seq_parameter_set_id;
+
+    pps->num_ref_idx_l0_default_active_minus1 = 0;
+    pps->num_ref_idx_l1_default_active_minus1 = 0;
+
+    pps->init_qp_minus26 = priv->fixed_qp_idr - 26;
+
+    pps->cu_qp_delta_enabled_flag = (ctx->va_rc_mode != VA_RC_CQP);
+    pps->diff_cu_qp_delta_depth   = 0;
+
+    pps->pps_loop_filter_across_slices_enabled_flag = 1;
+
+
+    // Fill VAAPI parameter buffers.
+
+    *vseq = (VAEncSequenceParameterBufferHEVC) {
+        .general_profile_idc = vps->profile_tier_level.general_profile_idc,
+        .general_level_idc   = vps->profile_tier_level.general_level_idc,
+        .general_tier_flag   = vps->profile_tier_level.general_tier_flag,
+
+        .intra_period     = ctx->gop_size,
+        .intra_idr_period = ctx->gop_size,
+        .ip_period        = ctx->b_per_p + 1,
+        .bits_per_second  = ctx->va_bit_rate,
+
+        .pic_width_in_luma_samples  = sps->pic_width_in_luma_samples,
+        .pic_height_in_luma_samples = sps->pic_height_in_luma_samples,
+
+        .seq_fields.bits = {
+            .chroma_format_idc             = sps->chroma_format_idc,
+            .separate_colour_plane_flag    = sps->separate_colour_plane_flag,
+            .bit_depth_luma_minus8         = sps->bit_depth_luma_minus8,
+            .bit_depth_chroma_minus8       = sps->bit_depth_chroma_minus8,
+            .scaling_list_enabled_flag     = sps->scaling_list_enabled_flag,
+            .strong_intra_smoothing_enabled_flag =
+                sps->strong_intra_smoothing_enabled_flag,
+            .amp_enabled_flag              = sps->amp_enabled_flag,
+            .sample_adaptive_offset_enabled_flag =
+                sps->sample_adaptive_offset_enabled_flag,
+            .pcm_enabled_flag              = sps->pcm_enabled_flag,
+            .pcm_loop_filter_disabled_flag = sps->pcm_loop_filter_disabled_flag,
+            .sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag,
+        },
+
+        .log2_min_luma_coding_block_size_minus3 =
+            sps->log2_min_luma_coding_block_size_minus3,
+        .log2_diff_max_min_luma_coding_block_size =
+            sps->log2_diff_max_min_luma_coding_block_size,
+        .log2_min_transform_block_size_minus2 =
+            sps->log2_min_luma_transform_block_size_minus2,
+        .log2_diff_max_min_transform_block_size =
+            sps->log2_diff_max_min_luma_transform_block_size,
+        .max_transform_hierarchy_depth_inter =
+            sps->max_transform_hierarchy_depth_inter,
+        .max_transform_hierarchy_depth_intra =
+            sps->max_transform_hierarchy_depth_intra,
+
+        .pcm_sample_bit_depth_luma_minus1 =
+            sps->pcm_sample_bit_depth_luma_minus1,
+        .pcm_sample_bit_depth_chroma_minus1 =
+            sps->pcm_sample_bit_depth_chroma_minus1,
+        .log2_min_pcm_luma_coding_block_size_minus3 =
+            sps->log2_min_pcm_luma_coding_block_size_minus3,
+        .log2_max_pcm_luma_coding_block_size_minus3 =
+            sps->log2_min_pcm_luma_coding_block_size_minus3 +
+            sps->log2_diff_max_min_pcm_luma_coding_block_size,
+
+        .vui_parameters_present_flag = 0,
+    };
+
+    *vpic = (VAEncPictureParameterBufferHEVC) {
+        .decoded_curr_pic = {
+            .picture_id = VA_INVALID_ID,
+            .flags      = VA_PICTURE_HEVC_INVALID,
+        },
+
+        .coded_buf = VA_INVALID_ID,
+
+        .collocated_ref_pic_index = 0xff,
+
+        .last_picture = 0,
+
+        .pic_init_qp            = pps->init_qp_minus26 + 26,
+        .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth,
+        .pps_cb_qp_offset       = pps->pps_cb_qp_offset,
+        .pps_cr_qp_offset       = pps->pps_cr_qp_offset,
+
+        .num_tile_columns_minus1 = pps->num_tile_columns_minus1,
+        .num_tile_rows_minus1    = pps->num_tile_rows_minus1,
+
+        .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2,
+        .ctu_max_bitsize_allowed          = 0,
+
+        .num_ref_idx_l0_default_active_minus1 =
+            pps->num_ref_idx_l0_default_active_minus1,
+        .num_ref_idx_l1_default_active_minus1 =
+            pps->num_ref_idx_l1_default_active_minus1,
+
+        .slice_pic_parameter_set_id = pps->pps_pic_parameter_set_id,
+
+        .pic_fields.bits = {
+            .sign_data_hiding_enabled_flag  = pps->sign_data_hiding_enabled_flag,
+            .constrained_intra_pred_flag    = pps->constrained_intra_pred_flag,
+            .transform_skip_enabled_flag    = pps->transform_skip_enabled_flag,
+            .cu_qp_delta_enabled_flag       = pps->cu_qp_delta_enabled_flag,
+            .weighted_pred_flag             = pps->weighted_pred_flag,
+            .weighted_bipred_flag           = pps->weighted_bipred_flag,
+            .transquant_bypass_enabled_flag = pps->transquant_bypass_enabled_flag,
+            .tiles_enabled_flag             = pps->tiles_enabled_flag,
+            .entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag,
+            .loop_filter_across_tiles_enabled_flag =
+                pps->loop_filter_across_tiles_enabled_flag,
+            .scaling_list_data_present_flag = (sps->sps_scaling_list_data_present_flag |
+                                               pps->pps_scaling_list_data_present_flag),
+            .screen_content_flag            = 0,
+            .enable_gpu_weighted_prediction = 0,
+            .no_output_of_prior_pics_flag   = 0,
+        },
+    };
+
     return 0;
 }
 
 static int vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
                                                  VAAPIEncodePicture *pic)
 {
-    VAAPIEncodeContext               *ctx = avctx->priv_data;
+    VAAPIEncodeH265Context          *priv = avctx->priv_data;
     VAEncPictureParameterBufferHEVC *vpic = pic->codec_picture_params;
-    VAAPIEncodeH265Context          *priv = ctx->priv_data;
     int i;
 
     if (pic->type == PICTURE_TYPE_IDR) {
         av_assert0(pic->display_order == pic->encode_order);
+
         priv->last_idr_frame = pic->display_order;
+
+        priv->slice_nal_unit = HEVC_NAL_IDR_W_RADL;
+        priv->slice_type     = HEVC_SLICE_I;
+        priv->pic_type       = 0;
     } else {
         av_assert0(pic->encode_order > priv->last_idr_frame);
-        // Display order need not be if we have RA[SD]L pictures, though.
+
+        if (pic->type == PICTURE_TYPE_I) {
+            priv->slice_nal_unit = HEVC_NAL_CRA_NUT;
+            priv->slice_type     = HEVC_SLICE_I;
+            priv->pic_type       = 0;
+        } else if (pic->type == PICTURE_TYPE_P) {
+            av_assert0(pic->refs[0]);
+            priv->slice_nal_unit = HEVC_NAL_TRAIL_R;
+            priv->slice_type     = HEVC_SLICE_P;
+            priv->pic_type       = 1;
+        } else {
+            av_assert0(pic->refs[0] && pic->refs[1]);
+            if (pic->refs[1]->type == PICTURE_TYPE_I)
+                priv->slice_nal_unit = HEVC_NAL_RASL_N;
+            else
+                priv->slice_nal_unit = HEVC_NAL_TRAIL_N;
+            priv->slice_type = HEVC_SLICE_B;
+            priv->pic_type   = 2;
+        }
+    }
+    priv->pic_order_cnt = pic->display_order - priv->last_idr_frame;
+
+    if (priv->aud) {
+        priv->aud_needed = 1;
+        priv->raw_aud = (H265RawAUD) {
+            .nal_unit_header = {
+                .nal_unit_type         = HEVC_NAL_AUD,
+                .nuh_layer_id          = 0,
+                .nuh_temporal_id_plus1 = 1,
+            },
+            .pic_type = priv->pic_type,
+        };
+    } else {
+        priv->aud_needed = 0;
     }
 
-    vpic->decoded_curr_pic.picture_id    = pic->recon_surface;
-    vpic->decoded_curr_pic.pic_order_cnt =
-        pic->display_order - priv->last_idr_frame;
-    vpic->decoded_curr_pic.flags         = 0;
+    priv->sei_needed = 0;
+
+    // Only look for the metadata on I/IDR frame on the output. We
+    // may force an IDR frame on the output where the medadata gets
+    // changed on the input frame.
+    if ((priv->sei & SEI_MASTERING_DISPLAY) &&
+        (pic->type == PICTURE_TYPE_I || pic->type == PICTURE_TYPE_IDR)) {
+        AVFrameSideData *sd =
+            av_frame_get_side_data(pic->input_image,
+                                   AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
+
+        if (sd) {
+            AVMasteringDisplayMetadata *mdm =
+                (AVMasteringDisplayMetadata *)sd->data;
+
+            // SEI is needed when both the primaries and luminance are set
+            if (mdm->has_primaries && mdm->has_luminance) {
+                H265RawSEIMasteringDisplayColourVolume *mdcv =
+                    &priv->sei_mastering_display;
+                const int mapping[3] = {1, 2, 0};
+                const int chroma_den = 50000;
+                const int luma_den   = 10000;
+
+                for (i = 0; i < 3; i++) {
+                    const int j = mapping[i];
+                    mdcv->display_primaries_x[i] =
+                        FFMIN(lrint(chroma_den *
+                                    av_q2d(mdm->display_primaries[j][0])),
+                              chroma_den);
+                    mdcv->display_primaries_y[i] =
+                        FFMIN(lrint(chroma_den *
+                                    av_q2d(mdm->display_primaries[j][1])),
+                              chroma_den);
+                }
+
+                mdcv->white_point_x =
+                    FFMIN(lrint(chroma_den * av_q2d(mdm->white_point[0])),
+                          chroma_den);
+                mdcv->white_point_y =
+                    FFMIN(lrint(chroma_den * av_q2d(mdm->white_point[1])),
+                          chroma_den);
+
+                mdcv->max_display_mastering_luminance =
+                    lrint(luma_den * av_q2d(mdm->max_luminance));
+                mdcv->min_display_mastering_luminance =
+                    FFMIN(lrint(luma_den * av_q2d(mdm->min_luminance)),
+                          mdcv->max_display_mastering_luminance);
+
+                priv->sei_needed |= SEI_MASTERING_DISPLAY;
+            }
+        }
+    }
+
+    if ((priv->sei & SEI_CONTENT_LIGHT_LEVEL) &&
+        (pic->type == PICTURE_TYPE_I || pic->type == PICTURE_TYPE_IDR)) {
+        AVFrameSideData *sd =
+            av_frame_get_side_data(pic->input_image,
+                                   AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
+
+        if (sd) {
+            AVContentLightMetadata *clm =
+                (AVContentLightMetadata *)sd->data;
+            H265RawSEIContentLightLevelInfo *clli =
+                &priv->sei_content_light_level;
+
+            clli->max_content_light_level     = FFMIN(clm->MaxCLL,  65535);
+            clli->max_pic_average_light_level = FFMIN(clm->MaxFALL, 65535);
+
+            priv->sei_needed |= SEI_CONTENT_LIGHT_LEVEL;
+        }
+    }
+
+    vpic->decoded_curr_pic = (VAPictureHEVC) {
+        .picture_id    = pic->recon_surface,
+        .pic_order_cnt = priv->pic_order_cnt,
+        .flags         = 0,
+    };
 
     for (i = 0; i < pic->nb_refs; i++) {
         VAAPIEncodePicture *ref = pic->refs[i];
-        av_assert0(ref);
-        vpic->reference_frames[i].picture_id    = ref->recon_surface;
-        vpic->reference_frames[i].pic_order_cnt =
-            ref->display_order - priv->last_idr_frame;
-        vpic->reference_frames[i].flags =
-            (ref->display_order < pic->display_order ?
-             VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE : 0) |
-            (ref->display_order > pic->display_order ?
-             VA_PICTURE_HEVC_RPS_ST_CURR_AFTER  : 0);
+        av_assert0(ref && ref->encode_order < pic->encode_order);
+
+        vpic->reference_frames[i] = (VAPictureHEVC) {
+            .picture_id    = ref->recon_surface,
+            .pic_order_cnt = ref->display_order - priv->last_idr_frame,
+            .flags = (ref->display_order < pic->display_order ?
+                      VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE : 0) |
+                     (ref->display_order > pic->display_order ?
+                      VA_PICTURE_HEVC_RPS_ST_CURR_AFTER  : 0),
+        };
     }
     for (; i < FF_ARRAY_ELEMS(vpic->reference_frames); i++) {
-        vpic->reference_frames[i].picture_id = VA_INVALID_ID;
-        vpic->reference_frames[i].flags      = VA_PICTURE_HEVC_INVALID;
+        vpic->reference_frames[i] = (VAPictureHEVC) {
+            .picture_id = VA_INVALID_ID,
+            .flags      = VA_PICTURE_HEVC_INVALID,
+        };
     }
 
     vpic->coded_buf = pic->output_buffer;
 
+    vpic->nal_unit_type = priv->slice_nal_unit;
+
     switch (pic->type) {
     case PICTURE_TYPE_IDR:
-        vpic->nal_unit_type = HEVC_NAL_IDR_W_RADL;
-        vpic->pic_fields.bits.idr_pic_flag = 1;
-        vpic->pic_fields.bits.coding_type  = 1;
+        vpic->pic_fields.bits.idr_pic_flag       = 1;
+        vpic->pic_fields.bits.coding_type        = 1;
         vpic->pic_fields.bits.reference_pic_flag = 1;
         break;
     case PICTURE_TYPE_I:
-        vpic->nal_unit_type = HEVC_NAL_TRAIL_R;
-        vpic->pic_fields.bits.idr_pic_flag = 0;
-        vpic->pic_fields.bits.coding_type  = 1;
+        vpic->pic_fields.bits.idr_pic_flag       = 0;
+        vpic->pic_fields.bits.coding_type        = 1;
         vpic->pic_fields.bits.reference_pic_flag = 1;
         break;
     case PICTURE_TYPE_P:
-        vpic->nal_unit_type = HEVC_NAL_TRAIL_R;
-        vpic->pic_fields.bits.idr_pic_flag = 0;
-        vpic->pic_fields.bits.coding_type  = 2;
+        vpic->pic_fields.bits.idr_pic_flag       = 0;
+        vpic->pic_fields.bits.coding_type        = 2;
         vpic->pic_fields.bits.reference_pic_flag = 1;
         break;
     case PICTURE_TYPE_B:
-        vpic->nal_unit_type = HEVC_NAL_TRAIL_R;
-        vpic->pic_fields.bits.idr_pic_flag = 0;
-        vpic->pic_fields.bits.coding_type  = 3;
+        vpic->pic_fields.bits.idr_pic_flag       = 0;
+        vpic->pic_fields.bits.coding_type        = 3;
         vpic->pic_fields.bits.reference_pic_flag = 0;
         break;
     default:
@@ -1034,41 +860,153 @@
                                                VAAPIEncodeSlice *slice)
 {
     VAAPIEncodeContext                *ctx = avctx->priv_data;
+    VAAPIEncodeH265Context           *priv = avctx->priv_data;
+    const H265RawSPS                  *sps = &priv->raw_sps;
+    const H265RawPPS                  *pps = &priv->raw_pps;
+    H265RawSliceHeader                 *sh = &priv->raw_slice.header;
     VAEncPictureParameterBufferHEVC  *vpic = pic->codec_picture_params;
     VAEncSliceParameterBufferHEVC  *vslice = slice->codec_slice_params;
-    VAAPIEncodeH265Context           *priv = ctx->priv_data;
-    VAAPIEncodeH265Slice           *pslice;
-    VAAPIEncodeH265MiscSliceParams *mslice;
     int i;
 
-    slice->priv_data = av_mallocz(sizeof(*pslice));
-    if (!slice->priv_data)
-        return AVERROR(ENOMEM);
-    pslice = slice->priv_data;
-    mslice = &pslice->misc_slice_params;
+    sh->nal_unit_header = (H265RawNALUnitHeader) {
+        .nal_unit_type         = priv->slice_nal_unit,
+        .nuh_layer_id          = 0,
+        .nuh_temporal_id_plus1 = 1,
+    };
+
+    sh->slice_pic_parameter_set_id      = pps->pps_pic_parameter_set_id;
 
     // Currently we only support one slice per frame.
-    vslice->slice_segment_address = 0;
-    vslice->num_ctu_in_slice = priv->ctu_width * priv->ctu_height;
+    sh->first_slice_segment_in_pic_flag = 1;
+    sh->slice_segment_address           = 0;
 
-    switch (pic->type) {
-    case PICTURE_TYPE_IDR:
-    case PICTURE_TYPE_I:
-        vslice->slice_type = HEVC_SLICE_I;
-        break;
-    case PICTURE_TYPE_P:
-        vslice->slice_type = HEVC_SLICE_P;
-        break;
-    case PICTURE_TYPE_B:
-        vslice->slice_type = HEVC_SLICE_B;
-        break;
-    default:
-        av_assert0(0 && "invalid picture type");
+    sh->slice_type = priv->slice_type;
+
+    sh->slice_pic_order_cnt_lsb = priv->pic_order_cnt &
+        (1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + 4)) - 1;
+
+    if (pic->type != PICTURE_TYPE_IDR) {
+        H265RawSTRefPicSet *rps;
+        VAAPIEncodePicture *st;
+        int used;
+
+        sh->short_term_ref_pic_set_sps_flag = 0;
+
+        rps = &sh->short_term_ref_pic_set;
+        memset(rps, 0, sizeof(*rps));
+
+        for (st = ctx->pic_start; st; st = st->next) {
+            if (st->encode_order >= pic->encode_order) {
+                // Not yet in DPB.
+                continue;
+            }
+            used = 0;
+            for (i = 0; i < pic->nb_refs; i++) {
+                if (pic->refs[i] == st)
+                    used = 1;
+            }
+            if (!used) {
+                // Usually each picture always uses all of the others in the
+                // DPB as references.  The one case we have to treat here is
+                // a non-IDR IRAP picture, which may need to hold unused
+                // references across itself to be used for the decoding of
+                // following RASL pictures.  This looks for such an RASL
+                // picture, and keeps the reference if there is one.
+                VAAPIEncodePicture *rp;
+                for (rp = ctx->pic_start; rp; rp = rp->next) {
+                    if (rp->encode_order < pic->encode_order)
+                        continue;
+                    if (rp->type != PICTURE_TYPE_B)
+                        continue;
+                    if (rp->refs[0] == st && rp->refs[1] == pic)
+                        break;
+                }
+                if (!rp)
+                    continue;
+            }
+            // This only works for one instance of each (delta_poc_sN_minus1
+            // is relative to the previous frame in the list, not relative to
+            // the current frame directly).
+            if (st->display_order < pic->display_order) {
+                rps->delta_poc_s0_minus1[rps->num_negative_pics] =
+                    pic->display_order - st->display_order - 1;
+                rps->used_by_curr_pic_s0_flag[rps->num_negative_pics] = used;
+                ++rps->num_negative_pics;
+            } else {
+                rps->delta_poc_s1_minus1[rps->num_positive_pics] =
+                    st->display_order - pic->display_order - 1;
+                rps->used_by_curr_pic_s1_flag[rps->num_positive_pics] = used;
+                ++rps->num_positive_pics;
+            }
+        }
+
+        sh->num_long_term_sps  = 0;
+        sh->num_long_term_pics = 0;
+
+        sh->slice_temporal_mvp_enabled_flag =
+            sps->sps_temporal_mvp_enabled_flag;
+        if (sh->slice_temporal_mvp_enabled_flag) {
+            sh->collocated_from_l0_flag = sh->slice_type == HEVC_SLICE_B;
+            sh->collocated_ref_idx      = 0;
+        }
+
+        sh->num_ref_idx_active_override_flag = 0;
+        sh->num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
+        sh->num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
     }
 
-    vslice->slice_pic_parameter_set_id = vpic->slice_pic_parameter_set_id;
+    sh->slice_sao_luma_flag = sh->slice_sao_chroma_flag =
+        sps->sample_adaptive_offset_enabled_flag;
 
-    pslice->pic_order_cnt = pic->display_order - priv->last_idr_frame;
+    if (pic->type == PICTURE_TYPE_B)
+        sh->slice_qp_delta = priv->fixed_qp_b - (pps->init_qp_minus26 + 26);
+    else if (pic->type == PICTURE_TYPE_P)
+        sh->slice_qp_delta = priv->fixed_qp_p - (pps->init_qp_minus26 + 26);
+    else
+        sh->slice_qp_delta = priv->fixed_qp_idr - (pps->init_qp_minus26 + 26);
+
+
+    *vslice = (VAEncSliceParameterBufferHEVC) {
+        .slice_segment_address = sh->slice_segment_address,
+        .num_ctu_in_slice      = priv->ctu_width * priv->ctu_height,
+
+        .slice_type                 = sh->slice_type,
+        .slice_pic_parameter_set_id = sh->slice_pic_parameter_set_id,
+
+        .num_ref_idx_l0_active_minus1 = sh->num_ref_idx_l0_active_minus1,
+        .num_ref_idx_l1_active_minus1 = sh->num_ref_idx_l1_active_minus1,
+
+        .luma_log2_weight_denom         = sh->luma_log2_weight_denom,
+        .delta_chroma_log2_weight_denom = sh->delta_chroma_log2_weight_denom,
+
+        .max_num_merge_cand = 5 - sh->five_minus_max_num_merge_cand,
+
+        .slice_qp_delta     = sh->slice_qp_delta,
+        .slice_cb_qp_offset = sh->slice_cb_qp_offset,
+        .slice_cr_qp_offset = sh->slice_cr_qp_offset,
+
+        .slice_beta_offset_div2 = sh->slice_beta_offset_div2,
+        .slice_tc_offset_div2   = sh->slice_tc_offset_div2,
+
+        .slice_fields.bits = {
+            .last_slice_of_pic_flag       = 1,
+            .dependent_slice_segment_flag = sh->dependent_slice_segment_flag,
+            .colour_plane_id              = sh->colour_plane_id,
+            .slice_temporal_mvp_enabled_flag =
+                sh->slice_temporal_mvp_enabled_flag,
+            .slice_sao_luma_flag          = sh->slice_sao_luma_flag,
+            .slice_sao_chroma_flag        = sh->slice_sao_chroma_flag,
+            .num_ref_idx_active_override_flag =
+                sh->num_ref_idx_active_override_flag,
+            .mvd_l1_zero_flag             = sh->mvd_l1_zero_flag,
+            .cabac_init_flag              = sh->cabac_init_flag,
+            .slice_deblocking_filter_disabled_flag =
+                sh->slice_deblocking_filter_disabled_flag,
+            .slice_loop_filter_across_slices_enabled_flag =
+                sh->slice_loop_filter_across_slices_enabled_flag,
+            .collocated_from_l0_flag      = sh->collocated_from_l0_flag,
+        },
+    };
 
     for (i = 0; i < FF_ARRAY_ELEMS(vslice->ref_pic_list0); i++) {
         vslice->ref_pic_list0[i].picture_id = VA_INVALID_ID;
@@ -1082,84 +1020,26 @@
         // Backward reference for P- or B-frame.
         av_assert0(pic->type == PICTURE_TYPE_P ||
                    pic->type == PICTURE_TYPE_B);
-
-        vslice->num_ref_idx_l0_active_minus1 = 0;
         vslice->ref_pic_list0[0] = vpic->reference_frames[0];
     }
     if (pic->nb_refs >= 2) {
         // Forward reference for B-frame.
         av_assert0(pic->type == PICTURE_TYPE_B);
-
-        vslice->num_ref_idx_l1_active_minus1 = 0;
         vslice->ref_pic_list1[0] = vpic->reference_frames[1];
     }
 
-    vslice->max_num_merge_cand = 5;
-
-    if (pic->type == PICTURE_TYPE_B)
-        vslice->slice_qp_delta = priv->fixed_qp_b  - vpic->pic_init_qp;
-    else if (pic->type == PICTURE_TYPE_P)
-        vslice->slice_qp_delta = priv->fixed_qp_p - vpic->pic_init_qp;
-    else
-        vslice->slice_qp_delta = priv->fixed_qp_idr - vpic->pic_init_qp;
-
-    vslice->slice_fields.bits.last_slice_of_pic_flag = 1;
-
-    mslice->first_slice_segment_in_pic_flag = 1;
-
-    if (pic->type == PICTURE_TYPE_IDR) {
-        // No reference pictures.
-    } else if (0) {
-        mslice->short_term_ref_pic_set_sps_flag = 1;
-        mslice->short_term_ref_pic_idx = 0;
-    } else {
-        VAAPIEncodePicture *st;
-        int used;
-
-        mslice->short_term_ref_pic_set_sps_flag = 0;
-        mslice->st_ref_pic_set.inter_ref_pic_set_prediction_flag = 0;
-
-        for (st = ctx->pic_start; st; st = st->next) {
-            if (st->encode_order >= pic->encode_order) {
-                // Not yet in DPB.
-                continue;
-            }
-            used = 0;
-            for (i = 0; i < pic->nb_refs; i++) {
-                if (pic->refs[i] == st)
-                    used = 1;
-            }
-            if (!used) {
-                // Currently true, but need not be.
-                continue;
-            }
-            // This only works for one instance of each (delta_poc_sN_minus1
-            // is relative to the previous frame in the list, not relative to
-            // the current frame directly).
-            if (st->display_order < pic->display_order) {
-                i = mslice->st_ref_pic_set.num_negative_pics;
-                mslice->st_ref_pic_set.delta_poc_s0_minus1[i] =
-                    pic->display_order - st->display_order - 1;
-                mslice->st_ref_pic_set.used_by_curr_pic_s0_flag[i] = used;
-                ++mslice->st_ref_pic_set.num_negative_pics;
-            } else {
-                i = mslice->st_ref_pic_set.num_positive_pics;
-                mslice->st_ref_pic_set.delta_poc_s1_minus1[i] =
-                    st->display_order - pic->display_order - 1;
-                mslice->st_ref_pic_set.used_by_curr_pic_s1_flag[i] = used;
-                ++mslice->st_ref_pic_set.num_positive_pics;
-            }
-        }
-    }
-
     return 0;
 }
 
 static av_cold int vaapi_encode_h265_configure(AVCodecContext *avctx)
 {
     VAAPIEncodeContext      *ctx = avctx->priv_data;
-    VAAPIEncodeH265Context *priv = ctx->priv_data;
-    VAAPIEncodeH265Options  *opt = ctx->codec_options;
+    VAAPIEncodeH265Context *priv = avctx->priv_data;
+    int err;
+
+    err = ff_cbs_init(&priv->cbc, AV_CODEC_ID_HEVC, avctx);
+    if (err < 0)
+        return err;
 
     priv->ctu_width     = FFALIGN(ctx->surface_width,  32) / 32;
     priv->ctu_height    = FFALIGN(ctx->surface_height, 32) / 32;
@@ -1169,7 +1049,7 @@
            ctx->surface_height, priv->ctu_width, priv->ctu_height);
 
     if (ctx->va_rc_mode == VA_RC_CQP) {
-        priv->fixed_qp_p = opt->qp;
+        priv->fixed_qp_p = priv->qp;
         if (avctx->i_quant_factor > 0.0)
             priv->fixed_qp_idr = (int)((priv->fixed_qp_p * avctx->i_quant_factor +
                                         avctx->i_quant_offset) + 0.5);
@@ -1192,10 +1072,6 @@
         priv->fixed_qp_p   = 30;
         priv->fixed_qp_b   = 30;
 
-        av_log(avctx, AV_LOG_DEBUG, "Using %s-bitrate = %"PRId64" bps.\n",
-               ctx->va_rc_mode == VA_RC_CBR ? "constant" : "variable",
-               avctx->bit_rate);
-
     } else {
         av_assert0(0 && "Invalid RC mode.");
     }
@@ -1203,8 +1079,18 @@
     return 0;
 }
 
+static const VAAPIEncodeProfile vaapi_encode_h265_profiles[] = {
+    { FF_PROFILE_HEVC_MAIN,     8, 3, 1, 1, VAProfileHEVCMain       },
+    { FF_PROFILE_HEVC_REXT,     8, 3, 1, 1, VAProfileHEVCMain       },
+#if VA_CHECK_VERSION(0, 37, 0)
+    { FF_PROFILE_HEVC_MAIN_10, 10, 3, 1, 1, VAProfileHEVCMain10     },
+    { FF_PROFILE_HEVC_REXT,    10, 3, 1, 1, VAProfileHEVCMain10     },
+#endif
+    { FF_PROFILE_UNKNOWN }
+};
+
 static const VAAPIEncodeType vaapi_encode_type_h265 = {
-    .priv_data_size        = sizeof(VAAPIEncodeH265Context),
+    .profiles              = vaapi_encode_h265_profiles,
 
     .configure             = &vaapi_encode_h265_configure,
 
@@ -1222,48 +1108,32 @@
 
     .slice_header_type     = VAEncPackedHeaderHEVC_Slice,
     .write_slice_header    = &vaapi_encode_h265_write_slice_header,
+
+    .write_extra_header    = &vaapi_encode_h265_write_extra_header,
 };
 
 static av_cold int vaapi_encode_h265_init(AVCodecContext *avctx)
 {
-    VAAPIEncodeContext *ctx = avctx->priv_data;
+    VAAPIEncodeContext      *ctx = avctx->priv_data;
+    VAAPIEncodeH265Context *priv = avctx->priv_data;
 
     ctx->codec = &vaapi_encode_type_h265;
 
-    switch (avctx->profile) {
-    case FF_PROFILE_HEVC_MAIN:
-    case FF_PROFILE_UNKNOWN:
-        ctx->va_profile = VAProfileHEVCMain;
-        ctx->va_rt_format = VA_RT_FORMAT_YUV420;
-        break;
-    case FF_PROFILE_HEVC_MAIN_10:
-#ifdef VA_RT_FORMAT_YUV420_10BPP
-        ctx->va_profile = VAProfileHEVCMain10;
-        ctx->va_rt_format = VA_RT_FORMAT_YUV420_10BPP;
-        break;
-#else
-        av_log(avctx, AV_LOG_ERROR, "10-bit encoding is not "
-               "supported with this VAAPI version.\n");
-        return AVERROR(ENOSYS);
-#endif
-    default:
-        av_log(avctx, AV_LOG_ERROR, "Unknown H.265 profile %d.\n",
-               avctx->profile);
+    if (avctx->profile == FF_PROFILE_UNKNOWN)
+        avctx->profile = priv->profile;
+    if (avctx->level == FF_LEVEL_UNKNOWN)
+        avctx->level = priv->level;
+
+    if (avctx->level != FF_LEVEL_UNKNOWN && avctx->level & ~0xff) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid level %d: must fit "
+               "in 8-bit unsigned integer.\n", avctx->level);
         return AVERROR(EINVAL);
     }
-    ctx->va_entrypoint = VAEntrypointEncSlice;
 
-    if (avctx->bit_rate > 0) {
-        if (avctx->rc_max_rate == avctx->bit_rate)
-            ctx->va_rc_mode = VA_RC_CBR;
-        else
-            ctx->va_rc_mode = VA_RC_VBR;
-    } else
-        ctx->va_rc_mode = VA_RC_CQP;
-
-    ctx->va_packed_headers =
+    ctx->desired_packed_headers =
         VA_ENC_PACKED_HEADER_SEQUENCE | // VPS, SPS and PPS.
-        VA_ENC_PACKED_HEADER_SLICE;     // Slice headers.
+        VA_ENC_PACKED_HEADER_SLICE    | // Slice headers.
+        VA_ENC_PACKED_HEADER_MISC;      // SEI
 
     ctx->surface_width  = FFALIGN(avctx->width,  16);
     ctx->surface_height = FFALIGN(avctx->height, 16);
@@ -1271,18 +1141,81 @@
     return ff_vaapi_encode_init(avctx);
 }
 
-#define OFFSET(x) (offsetof(VAAPIEncodeContext, codec_options_data) + \
-                   offsetof(VAAPIEncodeH265Options, x))
+static av_cold int vaapi_encode_h265_close(AVCodecContext *avctx)
+{
+    VAAPIEncodeH265Context *priv = avctx->priv_data;
+
+    ff_cbs_close(&priv->cbc);
+
+    return ff_vaapi_encode_close(avctx);
+}
+
+#define OFFSET(x) offsetof(VAAPIEncodeH265Context, x)
 #define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
 static const AVOption vaapi_encode_h265_options[] = {
+    VAAPI_ENCODE_COMMON_OPTIONS,
+
     { "qp", "Constant QP (for P-frames; scaled by qfactor/qoffset for I/B)",
       OFFSET(qp), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, 52, FLAGS },
+
+    { "aud", "Include AUD",
+      OFFSET(aud), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+
+    { "profile", "Set profile (general_profile_idc)",
+      OFFSET(profile), AV_OPT_TYPE_INT,
+      { .i64 = FF_PROFILE_UNKNOWN }, FF_PROFILE_UNKNOWN, 0xff, FLAGS, "profile" },
+
+#define PROFILE(name, value)  name, NULL, 0, AV_OPT_TYPE_CONST, \
+      { .i64 = value }, 0, 0, FLAGS, "profile"
+    { PROFILE("main",               FF_PROFILE_HEVC_MAIN) },
+    { PROFILE("main10",             FF_PROFILE_HEVC_MAIN_10) },
+    { PROFILE("rext",               FF_PROFILE_HEVC_REXT) },
+#undef PROFILE
+
+    { "tier", "Set tier (general_tier_flag)",
+      OFFSET(tier), AV_OPT_TYPE_INT,
+      { .i64 = 0 }, 0, 1, FLAGS, "tier" },
+    { "main", NULL, 0, AV_OPT_TYPE_CONST,
+      { .i64 = 0 }, 0, 0, FLAGS, "tier" },
+    { "high", NULL, 0, AV_OPT_TYPE_CONST,
+      { .i64 = 1 }, 0, 0, FLAGS, "tier" },
+
+    { "level", "Set level (general_level_idc)",
+      OFFSET(level), AV_OPT_TYPE_INT,
+      { .i64 = FF_LEVEL_UNKNOWN }, FF_LEVEL_UNKNOWN, 0xff, FLAGS, "level" },
+
+#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
+      { .i64 = value }, 0, 0, FLAGS, "level"
+    { LEVEL("1",    30) },
+    { LEVEL("2",    60) },
+    { LEVEL("2.1",  63) },
+    { LEVEL("3",    90) },
+    { LEVEL("3.1",  93) },
+    { LEVEL("4",   120) },
+    { LEVEL("4.1", 123) },
+    { LEVEL("5",   150) },
+    { LEVEL("5.1", 153) },
+    { LEVEL("5.2", 156) },
+    { LEVEL("6",   180) },
+    { LEVEL("6.1", 183) },
+    { LEVEL("6.2", 186) },
+#undef LEVEL
+
+    { "sei", "Set SEI to include",
+      OFFSET(sei), AV_OPT_TYPE_FLAGS,
+      { .i64 = SEI_MASTERING_DISPLAY | SEI_CONTENT_LIGHT_LEVEL },
+      0, INT_MAX, FLAGS, "sei" },
+    { "hdr",
+      "Include HDR metadata for mastering display colour volume "
+      "and content light level information",
+      0, AV_OPT_TYPE_CONST,
+      { .i64 = SEI_MASTERING_DISPLAY | SEI_CONTENT_LIGHT_LEVEL },
+      INT_MIN, INT_MAX, FLAGS, "sei" },
+
     { NULL },
 };
 
 static const AVCodecDefault vaapi_encode_h265_defaults[] = {
-    { "profile",        "1"   },
-    { "level",          "51"  },
     { "b",              "0"   },
     { "bf",             "2"   },
     { "g",              "120" },
@@ -1290,6 +1223,8 @@
     { "i_qoffset",      "0"   },
     { "b_qfactor",      "6/5" },
     { "b_qoffset",      "0"   },
+    { "qmin",           "-1"  },
+    { "qmax",           "-1"  },
     { NULL },
 };
 
@@ -1305,16 +1240,16 @@
     .long_name      = NULL_IF_CONFIG_SMALL("H.265/HEVC (VAAPI)"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_HEVC,
-    .priv_data_size = (sizeof(VAAPIEncodeContext) +
-                       sizeof(VAAPIEncodeH265Options)),
+    .priv_data_size = sizeof(VAAPIEncodeH265Context),
     .init           = &vaapi_encode_h265_init,
     .encode2        = &ff_vaapi_encode2,
-    .close          = &ff_vaapi_encode_close,
+    .close          = &vaapi_encode_h265_close,
     .priv_class     = &vaapi_encode_h265_class,
-    .capabilities   = AV_CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
     .defaults       = vaapi_encode_h265_defaults,
     .pix_fmts = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_VAAPI,
         AV_PIX_FMT_NONE,
     },
+    .wrapper_name   = "vaapi",
 };

diff --git a/libavcodec/vaapi_encode_h26x.c b/libavcodec/vaapi_encode_h26x.c
deleted file mode 100644
index d806f9b..0000000
--- a/libavcodec/vaapi_encode_h26x.c
+++ /dev/null

@@ -1,68 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdint.h>
-
-#include "vaapi_encode_h26x.h"
-
-int ff_vaapi_encode_h26x_nal_unit_to_byte_stream(uint8_t *dst, size_t *dst_bit_len,
-                                                 uint8_t *src, size_t src_bit_len)
-{
-    size_t dp, sp;
-    int zero_run = 0;
-    size_t dst_len = *dst_bit_len / 8;
-    size_t src_len = (src_bit_len + 7) / 8;
-    int trailing_zeroes = src_len * 8 - src_bit_len;
-
-    if (dst_len < src_len + 4) {
-        // Definitely doesn't fit.
-        goto fail;
-    }
-
-    // Start code.
-    dst[0] = dst[1] = dst[2] = 0;
-    dst[3] = 1;
-    dp = 4;
-
-    for (sp = 0; sp < src_len; sp++) {
-        if (dp >= dst_len)
-            goto fail;
-        if (zero_run < 2) {
-            if (src[sp] == 0)
-                ++zero_run;
-            else
-                zero_run = 0;
-        } else {
-            if ((src[sp] & ~3) == 0) {
-                // emulation_prevention_three_byte
-                dst[dp++] = 3;
-                if (dp >= dst_len)
-                    goto fail;
-            }
-            zero_run = src[sp] == 0;
-        }
-        dst[dp++] = src[sp];
-    }
-
-    *dst_bit_len = 8 * dp - trailing_zeroes;
-    return 0;
-
-fail:
-    *dst_bit_len = 0;
-    return AVERROR(ENOSPC);
-}

diff --git a/libavcodec/vaapi_encode_h26x.h b/libavcodec/vaapi_encode_h26x.h
deleted file mode 100644
index f8c6e13..0000000
--- a/libavcodec/vaapi_encode_h26x.h
+++ /dev/null

@@ -1,45 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_VAAPI_ENCODE_H26X_H
-#define AVCODEC_VAAPI_ENCODE_H26X_H
-
-#include <stddef.h>
-#include <stdint.h>
-
-#include "golomb.h"
-#include "put_bits.h"
-
-
-// Debug code may be interested in the name of the syntax element being
-// for tracing purposes.  Here, it is just discarded.
-
-#define write_u(pbc, width, value, name) put_bits(pbc, width, value)
-#define write_ue(pbc, value, name)       set_ue_golomb(pbc, value)
-#define write_se(pbc, value, name)       set_se_golomb(pbc, value)
-
-#define u(width, ...) write_u(pbc, width, __VA_ARGS__)
-#define ue(...)       write_ue(pbc, __VA_ARGS__)
-#define se(...)       write_se(pbc, __VA_ARGS__)
-
-
-// Copy from src to dst, applying emulation prevention.
-int ff_vaapi_encode_h26x_nal_unit_to_byte_stream(uint8_t *dst, size_t *dst_len,
-                                                 uint8_t *src, size_t src_len);
-
-#endif /* AVCODEC_VAAPI_ENCODE_H26X_H */

diff --git a/libavcodec/vaapi_encode_mjpeg.c b/libavcodec/vaapi_encode_mjpeg.c
index 2cbf792..fe8439c 100644
--- a/libavcodec/vaapi_encode_mjpeg.c
+++ b/libavcodec/vaapi_encode_mjpeg.c

@@ -23,9 +23,12 @@
 #include "libavutil/common.h"
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
-#include "libavutil/pixfmt.h"
+#include "libavutil/pixdesc.h"
 
 #include "avcodec.h"
+#include "bytestream.h"
+#include "cbs.h"
+#include "cbs_jpeg.h"
 #include "internal.h"
 #include "jpegtables.h"
 #include "mjpeg.h"
@@ -56,185 +59,91 @@
 };
 
 typedef struct VAAPIEncodeMJPEGContext {
+    VAAPIEncodeContext common;
+
+    // User options.
+    int jfif;
+    int huffman;
+
+    // Derived settings.
     int quality;
-    int component_subsample_h[3];
-    int component_subsample_v[3];
+    uint8_t jfif_data[14];
 
-    VAQMatrixBufferJPEG quant_tables;
-    VAHuffmanTableBufferJPEGBaseline huffman_tables;
+    // Writer structures.
+    JPEGRawFrameHeader     frame_header;
+    JPEGRawScan            scan;
+    JPEGRawApplicationData jfif_header;
+    JPEGRawQuantisationTableSpecification quant_tables;
+    JPEGRawHuffmanTableSpecification      huffman_tables;
+
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment current_fragment;
 } VAAPIEncodeMJPEGContext;
 
-static av_cold void vaapi_encode_mjpeg_copy_huffman(unsigned char *dst_lengths,
-                                                    unsigned char *dst_values,
-                                                    const unsigned char *src_lengths,
-                                                    const unsigned char *src_values)
-{
-    int i, mt;
-
-    ++src_lengths;
-
-    mt = 0;
-    for (i = 0; i < 16; i++)
-        mt += (dst_lengths[i] = src_lengths[i]);
-
-    for (i = 0; i < mt; i++)
-        dst_values[i] = src_values[i];
-}
-
-static av_cold void vaapi_encode_mjpeg_init_tables(AVCodecContext *avctx)
-{
-    VAAPIEncodeContext                *ctx = avctx->priv_data;
-    VAAPIEncodeMJPEGContext          *priv = ctx->priv_data;
-    VAQMatrixBufferJPEG             *quant = &priv->quant_tables;
-    VAHuffmanTableBufferJPEGBaseline *huff = &priv->huffman_tables;
-    int i;
-
-    quant->load_lum_quantiser_matrix = 1;
-    quant->load_chroma_quantiser_matrix = 1;
-
-    for (i = 0; i < 64; i++) {
-        quant->lum_quantiser_matrix[i] =
-            vaapi_encode_mjpeg_quant_luminance[i];
-        quant->chroma_quantiser_matrix[i] =
-            vaapi_encode_mjpeg_quant_chrominance[i];
-    }
-
-    huff->load_huffman_table[0] = 1;
-    vaapi_encode_mjpeg_copy_huffman(huff->huffman_table[0].num_dc_codes,
-                                    huff->huffman_table[0].dc_values,
-                                    avpriv_mjpeg_bits_dc_luminance,
-                                    avpriv_mjpeg_val_dc);
-    vaapi_encode_mjpeg_copy_huffman(huff->huffman_table[0].num_ac_codes,
-                                    huff->huffman_table[0].ac_values,
-                                    avpriv_mjpeg_bits_ac_luminance,
-                                    avpriv_mjpeg_val_ac_luminance);
-    memset(huff->huffman_table[0].pad, 0, sizeof(huff->huffman_table[0].pad));
-
-    huff->load_huffman_table[1] = 1;
-    vaapi_encode_mjpeg_copy_huffman(huff->huffman_table[1].num_dc_codes,
-                                    huff->huffman_table[1].dc_values,
-                                    avpriv_mjpeg_bits_dc_chrominance,
-                                    avpriv_mjpeg_val_dc);
-    vaapi_encode_mjpeg_copy_huffman(huff->huffman_table[1].num_ac_codes,
-                                    huff->huffman_table[1].ac_values,
-                                    avpriv_mjpeg_bits_ac_chrominance,
-                                    avpriv_mjpeg_val_ac_chrominance);
-    memset(huff->huffman_table[1].pad, 0, sizeof(huff->huffman_table[1].pad));
-}
-
-static void vaapi_encode_mjpeg_write_marker(PutBitContext *pbc, int marker)
-{
-    put_bits(pbc, 8, 0xff);
-    put_bits(pbc, 8, marker);
-}
-
 static int vaapi_encode_mjpeg_write_image_header(AVCodecContext *avctx,
                                                  VAAPIEncodePicture *pic,
                                                  VAAPIEncodeSlice *slice,
                                                  char *data, size_t *data_len)
 {
-    VAAPIEncodeContext               *ctx = avctx->priv_data;
-    VAEncPictureParameterBufferJPEG *vpic = pic->codec_picture_params;
-    VAEncSliceParameterBufferJPEG *vslice = slice->codec_slice_params;
-    VAAPIEncodeMJPEGContext         *priv = ctx->priv_data;
-    PutBitContext pbc;
-    int t, i, quant_scale;
+    VAAPIEncodeMJPEGContext *priv = avctx->priv_data;
+    CodedBitstreamFragment  *frag = &priv->current_fragment;
+    int err;
 
-    init_put_bits(&pbc, data, *data_len);
-
-    vaapi_encode_mjpeg_write_marker(&pbc, SOI);
-
-    // Quantisation table coefficients are scaled for quality by the driver,
-    // so we also need to do it ourselves here so that headers match.
-    if (priv->quality < 50)
-        quant_scale = 5000 / priv->quality;
-    else
-        quant_scale = 200 - 2 * priv->quality;
-
-    for (t = 0; t < 2; t++) {
-        int q;
-
-        vaapi_encode_mjpeg_write_marker(&pbc, DQT);
-
-        put_bits(&pbc, 16, 3 + 64); // Lq
-        put_bits(&pbc, 4, 0); // Pq
-        put_bits(&pbc, 4, t); // Tq
-
-        for (i = 0; i < 64; i++) {
-            q = i[t ? priv->quant_tables.chroma_quantiser_matrix
-                    : priv->quant_tables.lum_quantiser_matrix];
-            q = (q * quant_scale) / 100;
-            if (q < 1)   q = 1;
-            if (q > 255) q = 255;
-            put_bits(&pbc, 8, q);
-        }
+    if (priv->jfif) {
+        err = ff_cbs_insert_unit_content(priv->cbc, frag, -1,
+                                         JPEG_MARKER_APPN + 0,
+                                         &priv->jfif_header, NULL);
+        if (err < 0)
+            goto fail;
     }
 
-    vaapi_encode_mjpeg_write_marker(&pbc, SOF0);
+    err = ff_cbs_insert_unit_content(priv->cbc, frag, -1,
+                                     JPEG_MARKER_DQT,
+                                     &priv->quant_tables, NULL);
+    if (err < 0)
+        goto fail;
 
-    put_bits(&pbc, 16, 8 + 3 * vpic->num_components); // Lf
-    put_bits(&pbc, 8,  vpic->sample_bit_depth); // P
-    put_bits(&pbc, 16, vpic->picture_height);   // Y
-    put_bits(&pbc, 16, vpic->picture_width);    // X
-    put_bits(&pbc, 8,  vpic->num_components);   // Nf
+    err = ff_cbs_insert_unit_content(priv->cbc, frag, -1,
+                                     JPEG_MARKER_SOF0,
+                                     &priv->frame_header, NULL);
+    if (err < 0)
+        goto fail;
 
-    for (i = 0; i < vpic->num_components; i++) {
-        put_bits(&pbc, 8, vpic->component_id[i]); // Ci
-        put_bits(&pbc, 4, priv->component_subsample_h[i]); // Hi
-        put_bits(&pbc, 4, priv->component_subsample_v[i]); // Vi
-        put_bits(&pbc, 8, vpic->quantiser_table_selector[i]); // Tqi
+    if (priv->huffman) {
+        err = ff_cbs_insert_unit_content(priv->cbc, frag, -1,
+                                         JPEG_MARKER_DHT,
+                                         &priv->huffman_tables, NULL);
+        if (err < 0)
+            goto fail;
     }
 
-    for (t = 0; t < 4; t++) {
-        int mt;
-        unsigned char *lengths, *values;
+    err = ff_cbs_insert_unit_content(priv->cbc, frag, -1,
+                                     JPEG_MARKER_SOS,
+                                     &priv->scan, NULL);
+    if (err < 0)
+        goto fail;
 
-        vaapi_encode_mjpeg_write_marker(&pbc, DHT);
-
-        if ((t & 1) == 0) {
-            lengths = priv->huffman_tables.huffman_table[t / 2].num_dc_codes;
-            values  = priv->huffman_tables.huffman_table[t / 2].dc_values;
-        } else {
-            lengths = priv->huffman_tables.huffman_table[t / 2].num_ac_codes;
-            values  = priv->huffman_tables.huffman_table[t / 2].ac_values;
-        }
-
-        mt = 0;
-        for (i = 0; i < 16; i++)
-            mt += lengths[i];
-
-        put_bits(&pbc, 16, 2 + 17 + mt); // Lh
-        put_bits(&pbc, 4, t & 1); // Tc
-        put_bits(&pbc, 4, t / 2); // Th
-
-        for (i = 0; i < 16; i++)
-            put_bits(&pbc, 8, lengths[i]);
-        for (i = 0; i < mt; i++)
-            put_bits(&pbc, 8, values[i]);
+    err = ff_cbs_write_fragment_data(priv->cbc, frag);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to write image header.\n");
+        goto fail;
     }
 
-    vaapi_encode_mjpeg_write_marker(&pbc, SOS);
-
-    av_assert0(vpic->num_components == vslice->num_components);
-
-    put_bits(&pbc, 16, 6 + 2 * vslice->num_components); // Ls
-    put_bits(&pbc, 8,  vslice->num_components); // Ns
-
-    for (i = 0; i < vslice->num_components; i++) {
-        put_bits(&pbc, 8, vslice->components[i].component_selector); // Csj
-        put_bits(&pbc, 4, vslice->components[i].dc_table_selector);  // Tdj
-        put_bits(&pbc, 4, vslice->components[i].ac_table_selector);  // Taj
+    if (*data_len < 8 * frag->data_size) {
+        av_log(avctx, AV_LOG_ERROR, "Image header too large: "
+               "%zu < %zu.\n", *data_len, 8 * frag->data_size);
+        err = AVERROR(ENOSPC);
+        goto fail;
     }
 
-    put_bits(&pbc, 8, 0); // Ss
-    put_bits(&pbc, 8, 63); // Se
-    put_bits(&pbc, 4, 0); // Ah
-    put_bits(&pbc, 4, 0); // Al
+    // Remove the EOI at the end of the fragment.
+    memcpy(data, frag->data, frag->data_size - 2);
+    *data_len = 8 * (frag->data_size - 2);
 
-    *data_len = put_bits_count(&pbc);
-    flush_put_bits(&pbc);
-
-    return 0;
+    err = 0;
+fail:
+    ff_cbs_fragment_uninit(priv->cbc, frag);
+    return err;
 }
 
 static int vaapi_encode_mjpeg_write_extra_buffer(AVCodecContext *avctx,
@@ -242,24 +151,67 @@
                                                  int index, int *type,
                                                  char *data, size_t *data_len)
 {
-    VAAPIEncodeContext       *ctx = avctx->priv_data;
-    VAAPIEncodeMJPEGContext *priv = ctx->priv_data;
+    VAAPIEncodeMJPEGContext *priv = avctx->priv_data;
+    int t, i, k;
 
     if (index == 0) {
         // Write quantisation tables.
-        if (*data_len < sizeof(priv->quant_tables))
-            return AVERROR(EINVAL);
-        *type = VAQMatrixBufferType;
-        memcpy(data, &priv->quant_tables,
-               *data_len = sizeof(priv->quant_tables));
+        JPEGRawFrameHeader                     *fh = &priv->frame_header;
+        JPEGRawQuantisationTableSpecification *dqt = &priv->quant_tables;
+        VAQMatrixBufferJPEG *quant;
+
+        if (*data_len < sizeof(*quant))
+            return AVERROR(ENOSPC);
+        *type     = VAQMatrixBufferType;
+        *data_len = sizeof(*quant);
+
+        quant = (VAQMatrixBufferJPEG*)data;
+        memset(quant, 0, sizeof(*quant));
+
+        quant->load_lum_quantiser_matrix = 1;
+        for (i = 0; i < 64; i++)
+            quant->lum_quantiser_matrix[i] = dqt->table[fh->Tq[0]].Q[i];
+
+        if (fh->Nf > 1) {
+            quant->load_chroma_quantiser_matrix = 1;
+            for (i = 0; i < 64; i++)
+                quant->chroma_quantiser_matrix[i] =
+                    dqt->table[fh->Tq[1]].Q[i];
+        }
 
     } else if (index == 1) {
         // Write huffman tables.
-        if (*data_len < sizeof(priv->huffman_tables))
-            return AVERROR(EINVAL);
-        *type = VAHuffmanTableBufferType;
-        memcpy(data, &priv->huffman_tables,
-               *data_len = sizeof(priv->huffman_tables));
+        JPEGRawScanHeader                 *sh = &priv->scan.header;
+        JPEGRawHuffmanTableSpecification *dht = &priv->huffman_tables;
+        VAHuffmanTableBufferJPEGBaseline *huff;
+
+        if (*data_len < sizeof(*huff))
+            return AVERROR(ENOSPC);
+        *type     = VAHuffmanTableBufferType;
+        *data_len = sizeof(*huff);
+
+        huff = (VAHuffmanTableBufferJPEGBaseline*)data;
+        memset(huff, 0, sizeof(*huff));
+
+        for (t = 0; t < 1 + (sh->Ns > 1); t++) {
+            const JPEGRawHuffmanTable *ht;
+
+            huff->load_huffman_table[t] = 1;
+
+            ht = &dht->table[2 * t];
+            for (i = k = 0; i < 16; i++)
+                k += (huff->huffman_table[t].num_dc_codes[i] = ht->L[i]);
+            av_assert0(k <= sizeof(huff->huffman_table[t].dc_values));
+            for (i = 0; i < k; i++)
+                huff->huffman_table[t].dc_values[i] = ht->V[i];
+
+            ht = &dht->table[2 * t + 1];
+            for (i = k = 0; i < 16; i++)
+                k += (huff->huffman_table[t].num_ac_codes[i] = ht->L[i]);
+            av_assert0(k <= sizeof(huff->huffman_table[t].ac_values));
+            for (i = 0; i < k; i++)
+                huff->huffman_table[t].ac_values[i] = ht->V[i];
+        }
 
     } else {
         return AVERROR_EOF;
@@ -270,43 +222,185 @@
 static int vaapi_encode_mjpeg_init_picture_params(AVCodecContext *avctx,
                                                   VAAPIEncodePicture *pic)
 {
-    VAAPIEncodeContext               *ctx = avctx->priv_data;
+    VAAPIEncodeMJPEGContext         *priv = avctx->priv_data;
+    JPEGRawFrameHeader                *fh = &priv->frame_header;
+    JPEGRawScanHeader                 *sh = &priv->scan.header;
     VAEncPictureParameterBufferJPEG *vpic = pic->codec_picture_params;
-    VAAPIEncodeMJPEGContext         *priv = ctx->priv_data;
+    const AVPixFmtDescriptor *desc;
+    const uint8_t *components;
+    int t, i, quant_scale, len;
 
-    vpic->reconstructed_picture = pic->recon_surface;
-    vpic->coded_buf = pic->output_buffer;
+    desc = av_pix_fmt_desc_get(priv->common.input_frames->sw_format);
+    av_assert0(desc);
+    if (desc->flags & AV_PIX_FMT_FLAG_RGB)
+        components = (uint8_t[3]) { 'R', 'G', 'B' };
+    else
+        components = (uint8_t[3]) {  1,   2,   3  };
 
-    vpic->picture_width  = avctx->width;
-    vpic->picture_height = avctx->height;
+    // Frame header.
 
-    vpic->pic_flags.bits.profile      = 0;
-    vpic->pic_flags.bits.progressive  = 0;
-    vpic->pic_flags.bits.huffman      = 1;
-    vpic->pic_flags.bits.interleaved  = 0;
-    vpic->pic_flags.bits.differential = 0;
+    fh->P  = 8;
+    fh->Y  = avctx->height;
+    fh->X  = avctx->width;
+    fh->Nf = desc->nb_components;
 
-    vpic->sample_bit_depth = 8;
-    vpic->num_scan = 1;
+    for (i = 0; i < fh->Nf; i++) {
+        fh->C[i] = components[i];
+        fh->H[i] = 1 + (i == 0 ? desc->log2_chroma_w : 0);
+        fh->V[i] = 1 + (i == 0 ? desc->log2_chroma_h : 0);
 
-    vpic->num_components = 3;
+        fh->Tq[i] = !!i;
+    }
 
-    vpic->component_id[0] = 1;
-    vpic->component_id[1] = 2;
-    vpic->component_id[2] = 3;
+    fh->Lf = 8 + 3 * fh->Nf;
 
-    priv->component_subsample_h[0] = 2;
-    priv->component_subsample_v[0] = 2;
-    priv->component_subsample_h[1] = 1;
-    priv->component_subsample_v[1] = 1;
-    priv->component_subsample_h[2] = 1;
-    priv->component_subsample_v[2] = 1;
+    // JFIF header.
+    if (priv->jfif) {
+        JPEGRawApplicationData *app = &priv->jfif_header;
+        AVRational sar = pic->input_image->sample_aspect_ratio;
+        int sar_w, sar_h;
+        PutByteContext pbc;
 
-    vpic->quantiser_table_selector[0] = 0;
-    vpic->quantiser_table_selector[1] = 1;
-    vpic->quantiser_table_selector[2] = 1;
+        bytestream2_init_writer(&pbc, priv->jfif_data,
+                                sizeof(priv->jfif_data));
 
-    vpic->quality = priv->quality;
+        bytestream2_put_buffer(&pbc, "JFIF", 5);
+        bytestream2_put_be16(&pbc, 0x0102);
+        bytestream2_put_byte(&pbc, 0);
+
+        av_reduce(&sar_w, &sar_h, sar.num, sar.den, 65535);
+        if (sar_w && sar_h) {
+            bytestream2_put_be16(&pbc, sar_w);
+            bytestream2_put_be16(&pbc, sar_h);
+        } else {
+            bytestream2_put_be16(&pbc, 1);
+            bytestream2_put_be16(&pbc, 1);
+        }
+
+        bytestream2_put_byte(&pbc, 0);
+        bytestream2_put_byte(&pbc, 0);
+
+        av_assert0(bytestream2_get_bytes_left_p(&pbc) == 0);
+
+        app->Lp     = 2 + sizeof(priv->jfif_data);
+        app->Ap     = priv->jfif_data;
+        app->Ap_ref = NULL;
+    }
+
+    // Quantisation tables.
+
+    if (priv->quality < 50)
+        quant_scale = 5000 / priv->quality;
+    else
+        quant_scale = 200 - 2 * priv->quality;
+
+    len = 2;
+
+    for (t = 0; t < 1 + (fh->Nf > 1); t++) {
+        JPEGRawQuantisationTable *quant = &priv->quant_tables.table[t];
+        const uint8_t *data = t == 0 ?
+            vaapi_encode_mjpeg_quant_luminance :
+            vaapi_encode_mjpeg_quant_chrominance;
+
+        quant->Pq = 0;
+        quant->Tq = t;
+        for (i = 0; i < 64; i++)
+            quant->Q[i] = av_clip(data[i] * quant_scale / 100, 1, 255);
+
+        len += 65;
+    }
+
+    priv->quant_tables.Lq = len;
+
+    // Huffman tables.
+
+    len = 2;
+
+    for (t = 0; t < 2 + 2 * (fh->Nf > 1); t++) {
+        JPEGRawHuffmanTable *huff = &priv->huffman_tables.table[t];
+        const uint8_t *lengths, *values;
+        int k;
+
+        switch (t) {
+        case 0:
+            lengths = avpriv_mjpeg_bits_dc_luminance + 1;
+            values  = avpriv_mjpeg_val_dc;
+            break;
+        case 1:
+            lengths = avpriv_mjpeg_bits_ac_luminance + 1;
+            values  = avpriv_mjpeg_val_ac_luminance;
+            break;
+        case 2:
+            lengths = avpriv_mjpeg_bits_dc_chrominance + 1;
+            values  = avpriv_mjpeg_val_dc;
+            break;
+        case 3:
+            lengths = avpriv_mjpeg_bits_ac_chrominance + 1;
+            values  = avpriv_mjpeg_val_ac_chrominance;
+            break;
+        }
+
+        huff->Tc = t % 2;
+        huff->Th = t / 2;
+
+        for (i = k = 0; i < 16; i++)
+            k += (huff->L[i] = lengths[i]);
+
+        for (i = 0; i < k; i++)
+            huff->V[i] = values[i];
+
+        len += 17 + k;
+    }
+
+    priv->huffman_tables.Lh = len;
+
+    // Scan header.
+
+    sh->Ns = fh->Nf;
+
+    for (i = 0; i < fh->Nf; i++) {
+        sh->Cs[i] = fh->C[i];
+        sh->Td[i] = i > 0;
+        sh->Ta[i] = i > 0;
+    }
+
+    sh->Ss = 0;
+    sh->Se = 63;
+    sh->Ah = 0;
+    sh->Al = 0;
+
+    sh->Ls = 6 + 2 * sh->Ns;
+
+
+    *vpic = (VAEncPictureParameterBufferJPEG) {
+        .reconstructed_picture = pic->recon_surface,
+        .coded_buf             = pic->output_buffer,
+
+        .picture_width  = fh->X,
+        .picture_height = fh->Y,
+
+        .pic_flags.bits = {
+            .profile      = 0,
+            .progressive  = 0,
+            .huffman      = 1,
+            .interleaved  = 0,
+            .differential = 0,
+        },
+
+        .sample_bit_depth = fh->P,
+        .num_scan         = 1,
+        .num_components   = fh->Nf,
+
+        // The driver modifies the provided quantisation tables according
+        // to this quality value; the middle value of 50 makes that the
+        // identity so that they are used unchanged.
+        .quality = 50,
+    };
+
+    for (i = 0; i < fh->Nf; i++) {
+        vpic->component_id[i]             = fh->C[i];
+        vpic->quantiser_table_selector[i] = fh->Tq[i];
+    }
 
     pic->nb_slices = 1;
 
@@ -317,17 +411,20 @@
                                                 VAAPIEncodePicture *pic,
                                                 VAAPIEncodeSlice *slice)
 {
-    VAEncPictureParameterBufferJPEG *vpic = pic->codec_picture_params;
+    VAAPIEncodeMJPEGContext         *priv = avctx->priv_data;
+    JPEGRawScanHeader                 *sh = &priv->scan.header;
     VAEncSliceParameterBufferJPEG *vslice = slice->codec_slice_params;
     int i;
 
-    vslice->restart_interval = 0;
+    *vslice = (VAEncSliceParameterBufferJPEG) {
+        .restart_interval = 0,
+        .num_components   = sh->Ns,
+    };
 
-    vslice->num_components = vpic->num_components;
-    for (i = 0; i < vslice->num_components; i++) {
-        vslice->components[i].component_selector = i + 1;
-        vslice->components[i].dc_table_selector = (i > 0);
-        vslice->components[i].ac_table_selector = (i > 0);
+    for (i = 0; i < sh->Ns; i++) {
+        vslice->components[i].component_selector = sh->Cs[i];
+        vslice->components[i].dc_table_selector  = sh->Td[i];
+        vslice->components[i].ac_table_selector  = sh->Ta[i];
     }
 
     return 0;
@@ -336,7 +433,8 @@
 static av_cold int vaapi_encode_mjpeg_configure(AVCodecContext *avctx)
 {
     VAAPIEncodeContext       *ctx = avctx->priv_data;
-    VAAPIEncodeMJPEGContext *priv = ctx->priv_data;
+    VAAPIEncodeMJPEGContext *priv = avctx->priv_data;
+    int err;
 
     priv->quality = avctx->global_quality;
     if (priv->quality < 1 || priv->quality > 100) {
@@ -356,13 +454,27 @@
         ctx->va_packed_headers |=  VA_ENC_PACKED_HEADER_SLICE;
     }
 
-    vaapi_encode_mjpeg_init_tables(avctx);
+    err = ff_cbs_init(&priv->cbc, AV_CODEC_ID_MJPEG, avctx);
+    if (err < 0)
+        return err;
 
     return 0;
 }
 
+static const VAAPIEncodeProfile vaapi_encode_mjpeg_profiles[] = {
+    { FF_PROFILE_MJPEG_HUFFMAN_BASELINE_DCT,
+            8, 1, 0, 0, VAProfileJPEGBaseline },
+    { FF_PROFILE_MJPEG_HUFFMAN_BASELINE_DCT,
+            8, 3, 1, 1, VAProfileJPEGBaseline },
+    { FF_PROFILE_MJPEG_HUFFMAN_BASELINE_DCT,
+            8, 3, 1, 0, VAProfileJPEGBaseline },
+    { FF_PROFILE_MJPEG_HUFFMAN_BASELINE_DCT,
+            8, 3, 0, 0, VAProfileJPEGBaseline },
+    { FF_PROFILE_UNKNOWN }
+};
+
 static const VAAPIEncodeType vaapi_encode_type_mjpeg = {
-    .priv_data_size        = sizeof(VAAPIEncodeMJPEGContext),
+    .profiles              = vaapi_encode_mjpeg_profiles,
 
     .configure             = &vaapi_encode_mjpeg_configure,
 
@@ -384,15 +496,8 @@
 
     ctx->codec = &vaapi_encode_type_mjpeg;
 
-    ctx->va_profile    = VAProfileJPEGBaseline;
-    ctx->va_entrypoint = VAEntrypointEncPicture;
-
-    ctx->va_rt_format = VA_RT_FORMAT_YUV420;
-
-    ctx->va_rc_mode = VA_RC_CQP;
-
     // The JPEG image header - see note above.
-    ctx->va_packed_headers =
+    ctx->desired_packed_headers =
         VA_ENC_PACKED_HEADER_RAW_DATA;
 
     ctx->surface_width  = FFALIGN(avctx->width,  8);
@@ -401,14 +506,41 @@
     return ff_vaapi_encode_init(avctx);
 }
 
+static av_cold int vaapi_encode_mjpeg_close(AVCodecContext *avctx)
+{
+    VAAPIEncodeMJPEGContext *priv = avctx->priv_data;
+
+    ff_cbs_close(&priv->cbc);
+
+    return ff_vaapi_encode_close(avctx);
+}
+
+#define OFFSET(x) offsetof(VAAPIEncodeMJPEGContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
+static const AVOption vaapi_encode_mjpeg_options[] = {
+    VAAPI_ENCODE_COMMON_OPTIONS,
+
+    { "jfif", "Include JFIF header",
+      OFFSET(jfif), AV_OPT_TYPE_BOOL,
+      { .i64 = 0 }, 0, 1, FLAGS },
+    { "huffman", "Include huffman tables",
+      OFFSET(huffman), AV_OPT_TYPE_BOOL,
+      { .i64 = 1 }, 0, 1, FLAGS },
+
+    { NULL },
+};
+
 static const AVCodecDefault vaapi_encode_mjpeg_defaults[] = {
     { "global_quality", "80" },
+    { "b",              "0"  },
+    { "g",              "1"  },
     { NULL },
 };
 
 static const AVClass vaapi_encode_mjpeg_class = {
     .class_name = "mjpeg_vaapi",
     .item_name  = av_default_item_name,
+    .option     = vaapi_encode_mjpeg_options,
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
@@ -417,14 +549,17 @@
     .long_name      = NULL_IF_CONFIG_SMALL("MJPEG (VAAPI)"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MJPEG,
-    .priv_data_size = sizeof(VAAPIEncodeContext),
+    .priv_data_size = sizeof(VAAPIEncodeMJPEGContext),
     .init           = &vaapi_encode_mjpeg_init,
     .encode2        = &ff_vaapi_encode2,
-    .close          = &ff_vaapi_encode_close,
+    .close          = &vaapi_encode_mjpeg_close,
     .priv_class     = &vaapi_encode_mjpeg_class,
+    .capabilities   = AV_CODEC_CAP_HARDWARE |
+                      AV_CODEC_CAP_INTRA_ONLY,
     .defaults       = vaapi_encode_mjpeg_defaults,
     .pix_fmts = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_VAAPI,
         AV_PIX_FMT_NONE,
     },
+    .wrapper_name   = "vaapi",
 };

diff --git a/libavcodec/vaapi_encode_mpeg2.c b/libavcodec/vaapi_encode_mpeg2.c
index dc91888..1377eeb 100644
--- a/libavcodec/vaapi_encode_mpeg2.c
+++ b/libavcodec/vaapi_encode_mpeg2.c

@@ -20,18 +20,21 @@
 #include <va/va_enc_mpeg2.h>
 
 #include "libavutil/avassert.h"
-#include "libavutil/common.h"
-#include "libavutil/internal.h"
-#include "libavutil/opt.h"
-#include "libavutil/pixfmt.h"
 
 #include "avcodec.h"
-#include "internal.h"
-#include "mpegvideo.h"
-#include "put_bits.h"
+#include "cbs.h"
+#include "cbs_mpeg2.h"
+#include "mpeg12.h"
 #include "vaapi_encode.h"
 
 typedef struct VAAPIEncodeMPEG2Context {
+    VAAPIEncodeContext common;
+
+    // User options.
+    int profile;
+    int level;
+
+    // Derived settings.
     int mb_width;
     int mb_height;
 
@@ -39,79 +42,103 @@
     int quant_p;
     int quant_b;
 
-    int64_t last_i_frame;
-
     unsigned int bit_rate;
     unsigned int vbv_buffer_size;
+
+    AVRational frame_rate;
+
+    unsigned int f_code_horizontal;
+    unsigned int f_code_vertical;
+
+    // Stream state.
+    int64_t last_i_frame;
+
+    // Writer structures.
+    MPEG2RawSequenceHeader sequence_header;
+    MPEG2RawExtensionData  sequence_extension;
+    MPEG2RawExtensionData  sequence_display_extension;
+    MPEG2RawGroupOfPicturesHeader gop_header;
+    MPEG2RawPictureHeader  picture_header;
+    MPEG2RawExtensionData  picture_coding_extension;
+
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment current_fragment;
 } VAAPIEncodeMPEG2Context;
 
 
-#define vseq_var(name)      vseq->name, name
-#define vseqext_field(name) vseq->sequence_extension.bits.name, name
-#define vgop_field(name)    vseq->gop_header.bits.name, name
-#define vpic_var(name)      vpic->name, name
-#define vpcext_field(name)  vpic->picture_coding_extension.bits.name, name
-#define vcomp_field(name)   vpic->composite_display.bits.name, name
+static int vaapi_encode_mpeg2_write_fragment(AVCodecContext *avctx,
+                                             char *data, size_t *data_len,
+                                             CodedBitstreamFragment *frag)
+{
+    VAAPIEncodeMPEG2Context *priv = avctx->priv_data;
+    int err;
 
-#define u2(width, value, name) put_bits(&pbc, width, value)
-#define u(width, ...) u2(width, __VA_ARGS__)
+    err = ff_cbs_write_fragment_data(priv->cbc, frag);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n");
+        return err;
+    }
+
+    if (*data_len < 8 * frag->data_size - frag->data_bit_padding) {
+        av_log(avctx, AV_LOG_ERROR, "Access unit too large: "
+               "%zu < %zu.\n", *data_len,
+               8 * frag->data_size - frag->data_bit_padding);
+        return AVERROR(ENOSPC);
+    }
+
+    memcpy(data, frag->data, frag->data_size);
+    *data_len = 8 * frag->data_size - frag->data_bit_padding;
+
+    return 0;
+}
+
+static int vaapi_encode_mpeg2_add_header(AVCodecContext *avctx,
+                                         CodedBitstreamFragment *frag,
+                                         int type, void *header)
+{
+    VAAPIEncodeMPEG2Context *priv = avctx->priv_data;
+    int err;
+
+    err = ff_cbs_insert_unit_content(priv->cbc, frag, -1, type, header, NULL);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to add header: "
+               "type = %d.\n", type);
+        return err;
+    }
+
+    return 0;
+}
 
 static int vaapi_encode_mpeg2_write_sequence_header(AVCodecContext *avctx,
                                                     char *data, size_t *data_len)
 {
-    VAAPIEncodeContext                 *ctx = avctx->priv_data;
-    VAEncSequenceParameterBufferMPEG2 *vseq = ctx->codec_sequence_params;
-    VAAPIEncodeMPEG2Context           *priv = ctx->priv_data;
-    PutBitContext pbc;
+    VAAPIEncodeMPEG2Context *priv = avctx->priv_data;
+    CodedBitstreamFragment  *frag = &priv->current_fragment;
+    int err;
 
-    init_put_bits(&pbc, data, 8 * *data_len);
+    err = vaapi_encode_mpeg2_add_header(avctx, frag, MPEG2_START_SEQUENCE_HEADER,
+                                        &priv->sequence_header);
+    if (err < 0)
+        goto fail;
 
-    u(32, SEQ_START_CODE, sequence_header_code);
+    err = vaapi_encode_mpeg2_add_header(avctx, frag, MPEG2_START_EXTENSION,
+                                        &priv->sequence_extension);
+    if (err < 0)
+        goto fail;
 
-    u(12, vseq->picture_width,  horizontal_size_value);
-    u(12, vseq->picture_height, vertical_size_value);
-    u(4, vseq_var(aspect_ratio_information));
-    u(4, 8, frame_rate_code);
-    u(18, priv->bit_rate & 0x3fff, bit_rate_value);
-    u(1, 1, marker_bit);
-    u(10, priv->vbv_buffer_size & 0x3ff, vbv_buffer_size_value);
-    u(1, 0, constrained_parameters_flag);
-    u(1, 0, load_intra_quantiser_matrix);
-    // intra_quantiser_matrix[64]
-    u(1, 0, load_non_intra_quantiser_matrix);
-    // non_intra_quantiser_matrix[64]
+    err = vaapi_encode_mpeg2_add_header(avctx, frag, MPEG2_START_EXTENSION,
+                                        &priv->sequence_display_extension);
+    if (err < 0)
+        goto fail;
 
-    while (put_bits_count(&pbc) % 8)
-        u(1, 0, zero_bit);
+    err = vaapi_encode_mpeg2_add_header(avctx, frag, MPEG2_START_GROUP,
+                                        &priv->gop_header);
+    if (err < 0)
+        goto fail;
 
-    u(32, EXT_START_CODE, extension_start_code);
-    u(4, 1, extension_start_code_identifier);
-    u(8, vseqext_field(profile_and_level_indication));
-    u(1, vseqext_field(progressive_sequence));
-    u(2, vseqext_field(chroma_format));
-    u(2, 0, horizontal_size_extension);
-    u(2, 0, vertical_size_extension);
-    u(12, priv->bit_rate >> 18, bit_rate_extension);
-    u(1, 1, marker_bit);
-    u(8, priv->vbv_buffer_size >> 10, vbv_buffer_size_extension);
-    u(1, vseqext_field(low_delay));
-    u(2, vseqext_field(frame_rate_extension_n));
-    u(2, vseqext_field(frame_rate_extension_d));
-
-    while (put_bits_count(&pbc) % 8)
-        u(1, 0, zero_bit);
-
-    u(32, GOP_START_CODE, group_start_code);
-    u(25, vgop_field(time_code));
-    u(1, vgop_field(closed_gop));
-    u(1, vgop_field(broken_link));
-
-    while (put_bits_count(&pbc) % 8)
-        u(1, 0, zero_bit);
-
-    *data_len = put_bits_count(&pbc);
-    flush_put_bits(&pbc);
-
+    err = vaapi_encode_mpeg2_write_fragment(avctx, data, data_len, frag);
+fail:
+    ff_cbs_fragment_uninit(priv->cbc, frag);
     return 0;
 }
 
@@ -119,139 +146,274 @@
                                                    VAAPIEncodePicture *pic,
                                                    char *data, size_t *data_len)
 {
-    VAEncPictureParameterBufferMPEG2 *vpic = pic->codec_picture_params;
-    int picture_coding_type;
-    PutBitContext pbc;
+    VAAPIEncodeMPEG2Context *priv = avctx->priv_data;
+    CodedBitstreamFragment  *frag = &priv->current_fragment;
+    int err;
 
-    init_put_bits(&pbc, data, 8 * *data_len);
+    err = vaapi_encode_mpeg2_add_header(avctx, frag, MPEG2_START_PICTURE,
+                                        &priv->picture_header);
+    if (err < 0)
+        goto fail;
 
-    u(32, PICTURE_START_CODE, picture_start_code);
-    u(10, vpic_var(temporal_reference));
+    err = vaapi_encode_mpeg2_add_header(avctx, frag, MPEG2_START_EXTENSION,
+                                        &priv->picture_coding_extension);
+    if (err < 0)
+        goto fail;
 
-    switch (vpic->picture_type) {
-    case VAEncPictureTypeIntra:
-        picture_coding_type = AV_PICTURE_TYPE_I;
-        break;
-    case VAEncPictureTypePredictive:
-        picture_coding_type = AV_PICTURE_TYPE_P;
-        break;
-    case VAEncPictureTypeBidirectional:
-        picture_coding_type = AV_PICTURE_TYPE_B;
-        break;
-    default:
-        av_assert0(0 && "invalid picture_coding_type");
-    }
-    u(3, picture_coding_type, picture_coding_type);
-    u(16, 0xffff, vbv_delay);
-    if (picture_coding_type == 2 || picture_coding_type == 3) {
-        u(1, 0, full_pel_forward_vector);
-        u(3, 7, forward_f_code);
-    }
-    if (picture_coding_type == 3) {
-        u(1, 0, full_pel_backward_vector);
-        u(3, 7, backward_f_code);
-    }
-    u(1, 0, extra_bit_picture);
-
-    while (put_bits_count(&pbc) % 8)
-        u(1, 0, zero_bit);
-
-    u(32, EXT_START_CODE, extension_start_code);
-    u(4, 8, extension_start_code_identifier);
-    u(4, vpic_var(f_code[0][0]));
-    u(4, vpic_var(f_code[0][1]));
-    u(4, vpic_var(f_code[1][0]));
-    u(4, vpic_var(f_code[1][1]));
-    u(2, vpcext_field(intra_dc_precision));
-    u(2, vpcext_field(picture_structure));
-    u(1, vpcext_field(top_field_first));
-    u(1, vpcext_field(frame_pred_frame_dct));
-    u(1, vpcext_field(concealment_motion_vectors));
-    u(1, vpcext_field(q_scale_type));
-    u(1, vpcext_field(intra_vlc_format));
-    u(1, vpcext_field(alternate_scan));
-    u(1, vpcext_field(repeat_first_field));
-    u(1, 1, chroma_420_type);
-    u(1, vpcext_field(progressive_frame));
-    u(1, vpcext_field(composite_display_flag));
-    if (vpic->picture_coding_extension.bits.composite_display_flag) {
-        u(1, vcomp_field(v_axis));
-        u(3, vcomp_field(field_sequence));
-        u(1, vcomp_field(sub_carrier));
-        u(7, vcomp_field(burst_amplitude));
-        u(8, vcomp_field(sub_carrier_phase));
-    }
-
-    while (put_bits_count(&pbc) % 8)
-        u(1, 0, zero_bit);
-
-    *data_len = put_bits_count(&pbc);
-    flush_put_bits(&pbc);
-
+    err = vaapi_encode_mpeg2_write_fragment(avctx, data, data_len, frag);
+fail:
+    ff_cbs_fragment_uninit(priv->cbc, frag);
     return 0;
 }
 
 static int vaapi_encode_mpeg2_init_sequence_params(AVCodecContext *avctx)
 {
     VAAPIEncodeContext                 *ctx = avctx->priv_data;
+    VAAPIEncodeMPEG2Context           *priv = avctx->priv_data;
+    MPEG2RawSequenceHeader              *sh = &priv->sequence_header;
+    MPEG2RawSequenceExtension           *se = &priv->sequence_extension.data.sequence;
+    MPEG2RawSequenceDisplayExtension   *sde = &priv->sequence_display_extension.data.sequence_display;
+    MPEG2RawGroupOfPicturesHeader     *goph = &priv->gop_header;
+    MPEG2RawPictureHeader               *ph = &priv->picture_header;
+    MPEG2RawPictureCodingExtension     *pce = &priv->picture_coding_extension.data.picture_coding;
     VAEncSequenceParameterBufferMPEG2 *vseq = ctx->codec_sequence_params;
     VAEncPictureParameterBufferMPEG2  *vpic = ctx->codec_picture_params;
-    VAAPIEncodeMPEG2Context           *priv = ctx->priv_data;
+    int code, ext_n, ext_d;
 
-    vseq->intra_period   = avctx->gop_size;
-    vseq->ip_period      = ctx->b_per_p + 1;
+    memset(sh,   0, sizeof(*sh));
+    memset(se,   0, sizeof(*se));
+    memset(sde,  0, sizeof(*sde));
+    memset(goph, 0, sizeof(*goph));
+    memset(ph,   0, sizeof(*ph));
+    memset(pce,  0, sizeof(*pce));
 
-    vseq->picture_width  = avctx->width;
-    vseq->picture_height = avctx->height;
 
-    vseq->bits_per_second = avctx->bit_rate;
+    if (ctx->va_bit_rate > 0) {
+        priv->bit_rate = (ctx->va_bit_rate + 399) / 400;
+    } else {
+        // Unknown (not a bitrate-targetting mode), so just use the
+        // highest value.
+        priv->bit_rate = 0x3fffffff;
+    }
+    if (avctx->rc_buffer_size > 0) {
+        priv->vbv_buffer_size = (avctx->rc_buffer_size + (1 << 14) - 1) >> 14;
+    } else {
+        // Unknown, so guess a value from the bitrate.
+        priv->vbv_buffer_size = priv->bit_rate >> 14;
+    }
+
+    switch (avctx->level) {
+    case 4: // High.
+    case 6: // High 1440.
+        priv->f_code_horizontal = 9;
+        priv->f_code_vertical   = 5;
+        break;
+    case 8: // Main.
+        priv->f_code_horizontal = 8;
+        priv->f_code_vertical   = 5;
+        break;
+    case 10: // Low.
+    default:
+        priv->f_code_horizontal = 7;
+        priv->f_code_vertical   = 4;
+        break;
+    }
+
+
+    // Sequence header
+
+    sh->sequence_header_code = MPEG2_START_SEQUENCE_HEADER;
+
+    sh->horizontal_size_value = avctx->width  & 0xfff;
+    sh->vertical_size_value   = avctx->height & 0xfff;
+
+    if (avctx->sample_aspect_ratio.num != 0 &&
+        avctx->sample_aspect_ratio.den != 0) {
+        AVRational dar = av_div_q(avctx->sample_aspect_ratio,
+                                  (AVRational) { avctx->width, avctx->height });
+
+        if (av_cmp_q(avctx->sample_aspect_ratio, (AVRational) { 1, 1 }) == 0) {
+            sh->aspect_ratio_information = 1;
+        } else if (av_cmp_q(dar, (AVRational) { 3, 4 }) == 0) {
+            sh->aspect_ratio_information = 2;
+        } else if (av_cmp_q(dar, (AVRational) { 9, 16 }) == 0) {
+            sh->aspect_ratio_information = 3;
+        } else if (av_cmp_q(dar, (AVRational) { 100, 221 }) == 0) {
+            sh->aspect_ratio_information = 4;
+        } else {
+            av_log(avctx, AV_LOG_WARNING, "Sample aspect ratio %d:%d is not "
+                   "representable, signalling square pixels instead.\n",
+                   avctx->sample_aspect_ratio.num,
+                   avctx->sample_aspect_ratio.den);
+            sh->aspect_ratio_information = 1;
+        }
+    } else {
+        // Unknown - assume square pixels.
+        sh->aspect_ratio_information = 1;
+    }
+
     if (avctx->framerate.num > 0 && avctx->framerate.den > 0)
-        vseq->frame_rate = (float)avctx->framerate.num / avctx->framerate.den;
+        priv->frame_rate = avctx->framerate;
     else
-        vseq->frame_rate = (float)avctx->time_base.den / avctx->time_base.num;
+        priv->frame_rate = av_inv_q(avctx->time_base);
+    ff_mpeg12_find_best_frame_rate(priv->frame_rate,
+                                   &code, &ext_n, &ext_d, 0);
+    sh->frame_rate_code = code;
 
-    vseq->aspect_ratio_information = 1;
-    vseq->vbv_buffer_size = avctx->rc_buffer_size / (16 * 1024);
+    sh->bit_rate_value        = priv->bit_rate & 0x3ffff;
+    sh->vbv_buffer_size_value = priv->vbv_buffer_size & 0x3ff;
 
-    vseq->sequence_extension.bits.profile_and_level_indication =
-        avctx->profile << 4 | avctx->level;
-    vseq->sequence_extension.bits.progressive_sequence   = 1;
-    vseq->sequence_extension.bits.chroma_format          = 1;
-    vseq->sequence_extension.bits.low_delay              = 0;
-    vseq->sequence_extension.bits.frame_rate_extension_n = 0;
-    vseq->sequence_extension.bits.frame_rate_extension_d = 0;
+    sh->constrained_parameters_flag     = 0;
+    sh->load_intra_quantiser_matrix     = 0;
+    sh->load_non_intra_quantiser_matrix = 0;
 
-    vseq->new_gop_header              = 0;
-    vseq->gop_header.bits.time_code   = 0;
-    vseq->gop_header.bits.closed_gop  = 1;
-    vseq->gop_header.bits.broken_link = 0;
 
-    vpic->forward_reference_picture  = VA_INVALID_ID;
-    vpic->backward_reference_picture = VA_INVALID_ID;
-    vpic->reconstructed_picture      = VA_INVALID_ID;
+    // Sequence extension
 
-    vpic->coded_buf = VA_INVALID_ID;
+    priv->sequence_extension.extension_start_code = MPEG2_START_EXTENSION;
+    priv->sequence_extension.extension_start_code_identifier =
+        MPEG2_EXTENSION_SEQUENCE;
 
-    vpic->temporal_reference = 0;
-    vpic->f_code[0][0] = 15;
-    vpic->f_code[0][1] = 15;
-    vpic->f_code[1][0] = 15;
-    vpic->f_code[1][1] = 15;
+    se->profile_and_level_indication = avctx->profile << 4 | avctx->level;
+    se->progressive_sequence = 1;
+    se->chroma_format        = 1;
 
-    vpic->picture_coding_extension.bits.intra_dc_precision     = 0;
-    vpic->picture_coding_extension.bits.picture_structure      = 3;
-    vpic->picture_coding_extension.bits.top_field_first        = 0;
-    vpic->picture_coding_extension.bits.frame_pred_frame_dct   = 1;
-    vpic->picture_coding_extension.bits.concealment_motion_vectors = 0;
-    vpic->picture_coding_extension.bits.q_scale_type           = 0;
-    vpic->picture_coding_extension.bits.intra_vlc_format       = 0;
-    vpic->picture_coding_extension.bits.alternate_scan         = 0;
-    vpic->picture_coding_extension.bits.repeat_first_field     = 0;
-    vpic->picture_coding_extension.bits.progressive_frame      = 1;
-    vpic->picture_coding_extension.bits.composite_display_flag = 0;
+    se->horizontal_size_extension = avctx->width  >> 12;
+    se->vertical_size_extension   = avctx->height >> 12;
 
-    priv->bit_rate = (avctx->bit_rate + 399) / 400;
-    priv->vbv_buffer_size = avctx->rc_buffer_size / (16 * 1024);
+    se->bit_rate_extension        = priv->bit_rate >> 18;
+    se->vbv_buffer_size_extension = priv->vbv_buffer_size >> 10;
+    se->low_delay                 = ctx->b_per_p == 0;
+
+    se->frame_rate_extension_n = ext_n;
+    se->frame_rate_extension_d = ext_d;
+
+
+    // Sequence display extension
+
+    priv->sequence_display_extension.extension_start_code =
+        MPEG2_START_EXTENSION;
+    priv->sequence_display_extension.extension_start_code_identifier =
+        MPEG2_EXTENSION_SEQUENCE_DISPLAY;
+
+    sde->video_format = 5;
+    if (avctx->color_primaries != AVCOL_PRI_UNSPECIFIED ||
+        avctx->color_trc       != AVCOL_TRC_UNSPECIFIED ||
+        avctx->colorspace      != AVCOL_SPC_UNSPECIFIED) {
+        sde->colour_description       = 1;
+        sde->colour_primaries         = avctx->color_primaries;
+        sde->transfer_characteristics = avctx->color_trc;
+        sde->matrix_coefficients      = avctx->colorspace;
+    } else {
+        sde->colour_description = 0;
+    }
+
+    sde->display_horizontal_size = avctx->width;
+    sde->display_vertical_size   = avctx->height;
+
+
+    // GOP header
+
+    goph->group_start_code = MPEG2_START_GROUP;
+
+    goph->time_code   = 0;
+    goph->closed_gop  = 1;
+    goph->broken_link = 0;
+
+
+    // Defaults for picture header
+
+    ph->picture_start_code = MPEG2_START_PICTURE;
+
+    ph->vbv_delay = 0xffff; // Not currently calculated.
+
+    ph->full_pel_forward_vector  = 0;
+    ph->forward_f_code           = 7;
+    ph->full_pel_backward_vector = 0;
+    ph->forward_f_code           = 7;
+
+
+    // Defaults for picture coding extension
+
+    priv->picture_coding_extension.extension_start_code =
+        MPEG2_START_EXTENSION;
+    priv->picture_coding_extension.extension_start_code_identifier =
+        MPEG2_EXTENSION_PICTURE_CODING;
+
+    pce->intra_dc_precision         = 0;
+    pce->picture_structure          = 3;
+    pce->top_field_first            = 0;
+    pce->frame_pred_frame_dct       = 1;
+    pce->concealment_motion_vectors = 0;
+    pce->q_scale_type               = 0;
+    pce->intra_vlc_format           = 0;
+    pce->alternate_scan             = 0;
+    pce->repeat_first_field         = 0;
+    pce->progressive_frame          = 1;
+    pce->composite_display_flag     = 0;
+
+
+
+    *vseq = (VAEncSequenceParameterBufferMPEG2) {
+        .intra_period = ctx->gop_size,
+        .ip_period    = ctx->b_per_p + 1,
+
+        .picture_width  = avctx->width,
+        .picture_height = avctx->height,
+
+        .bits_per_second          = ctx->va_bit_rate,
+        .frame_rate               = av_q2d(priv->frame_rate),
+        .aspect_ratio_information = sh->aspect_ratio_information,
+        .vbv_buffer_size          = priv->vbv_buffer_size,
+
+        .sequence_extension.bits = {
+            .profile_and_level_indication = se->profile_and_level_indication,
+            .progressive_sequence         = se->progressive_sequence,
+            .chroma_format                = se->chroma_format,
+            .low_delay                    = se->low_delay,
+            .frame_rate_extension_n       = se->frame_rate_extension_n,
+            .frame_rate_extension_d       = se->frame_rate_extension_d,
+        },
+
+        .new_gop_header = 1,
+        .gop_header.bits = {
+            .time_code   = goph->time_code,
+            .closed_gop  = goph->closed_gop,
+            .broken_link = goph->broken_link,
+        },
+    };
+
+    *vpic = (VAEncPictureParameterBufferMPEG2) {
+        .forward_reference_picture  = VA_INVALID_ID,
+        .backward_reference_picture = VA_INVALID_ID,
+        .reconstructed_picture      = VA_INVALID_ID,
+        .coded_buf                  = VA_INVALID_ID,
+
+        .vbv_delay = 0xffff,
+        .f_code    = { { 15, 15 }, { 15, 15 } },
+
+        .picture_coding_extension.bits = {
+            .intra_dc_precision         = pce->intra_dc_precision,
+            .picture_structure          = pce->picture_structure,
+            .top_field_first            = pce->top_field_first,
+            .frame_pred_frame_dct       = pce->frame_pred_frame_dct,
+            .concealment_motion_vectors = pce->concealment_motion_vectors,
+            .q_scale_type               = pce->q_scale_type,
+            .intra_vlc_format           = pce->intra_vlc_format,
+            .alternate_scan             = pce->alternate_scan,
+            .repeat_first_field         = pce->repeat_first_field,
+            .progressive_frame          = pce->progressive_frame,
+            .composite_display_flag     = pce->composite_display_flag,
+        },
+
+        .composite_display.bits = {
+            .v_axis            = pce->v_axis,
+            .field_sequence    = pce->field_sequence,
+            .sub_carrier       = pce->sub_carrier,
+            .burst_amplitude   = pce->burst_amplitude,
+            .sub_carrier_phase = pce->sub_carrier_phase,
+        },
+    };
 
     return 0;
 }
@@ -259,57 +421,61 @@
 static int vaapi_encode_mpeg2_init_picture_params(AVCodecContext *avctx,
                                                  VAAPIEncodePicture *pic)
 {
-    VAAPIEncodeContext                *ctx = avctx->priv_data;
+    VAAPIEncodeMPEG2Context          *priv = avctx->priv_data;
+    MPEG2RawPictureHeader              *ph = &priv->picture_header;
+    MPEG2RawPictureCodingExtension    *pce = &priv->picture_coding_extension.data.picture_coding;
     VAEncPictureParameterBufferMPEG2 *vpic = pic->codec_picture_params;
-    VAAPIEncodeMPEG2Context          *priv = ctx->priv_data;
-    int fch, fcv;
 
-    switch (avctx->level) {
-    case 4: // High.
-    case 6: // High 1440.
-        fch = 9;
-        fcv = 5;
-        break;
-    case 8: // Main.
-        fch = 8;
-        fcv = 5;
-        break;
-    case 10: // Low.
-    default:
-        fch = 7;
-        fcv = 4;
-        break;
+    if (pic->type == PICTURE_TYPE_IDR || pic->type == PICTURE_TYPE_I) {
+        ph->temporal_reference  = 0;
+        ph->picture_coding_type = 1;
+        priv->last_i_frame = pic->display_order;
+    } else {
+        ph->temporal_reference = pic->display_order - priv->last_i_frame;
+        ph->picture_coding_type = pic->type == PICTURE_TYPE_B ? 3 : 2;
     }
 
+    if (pic->type == PICTURE_TYPE_P || pic->type == PICTURE_TYPE_B) {
+        pce->f_code[0][0] = priv->f_code_horizontal;
+        pce->f_code[0][1] = priv->f_code_vertical;
+    } else {
+        pce->f_code[0][0] = 15;
+        pce->f_code[0][1] = 15;
+    }
+    if (pic->type == PICTURE_TYPE_B) {
+        pce->f_code[1][0] = priv->f_code_horizontal;
+        pce->f_code[1][1] = priv->f_code_vertical;
+    } else {
+        pce->f_code[1][0] = 15;
+        pce->f_code[1][1] = 15;
+    }
+
+    vpic->reconstructed_picture = pic->recon_surface;
+    vpic->coded_buf             = pic->output_buffer;
+
     switch (pic->type) {
     case PICTURE_TYPE_IDR:
     case PICTURE_TYPE_I:
         vpic->picture_type = VAEncPictureTypeIntra;
-        priv->last_i_frame = pic->display_order;
         break;
     case PICTURE_TYPE_P:
         vpic->picture_type = VAEncPictureTypePredictive;
         vpic->forward_reference_picture = pic->refs[0]->recon_surface;
-        vpic->f_code[0][0] = fch;
-        vpic->f_code[0][1] = fcv;
         break;
     case PICTURE_TYPE_B:
         vpic->picture_type = VAEncPictureTypeBidirectional;
         vpic->forward_reference_picture  = pic->refs[0]->recon_surface;
         vpic->backward_reference_picture = pic->refs[1]->recon_surface;
-        vpic->f_code[0][0] = fch;
-        vpic->f_code[0][1] = fcv;
-        vpic->f_code[1][0] = fch;
-        vpic->f_code[1][1] = fcv;
         break;
     default:
         av_assert0(0 && "invalid picture type");
     }
 
-    vpic->reconstructed_picture = pic->recon_surface;
-    vpic->coded_buf = pic->output_buffer;
-
-    vpic->temporal_reference = pic->display_order - priv->last_i_frame;
+    vpic->temporal_reference = ph->temporal_reference;
+    vpic->f_code[0][0]       = pce->f_code[0][0];
+    vpic->f_code[0][1]       = pce->f_code[0][1];
+    vpic->f_code[1][0]       = pce->f_code[1][0];
+    vpic->f_code[1][1]       = pce->f_code[1][1];
 
     pic->nb_slices = priv->mb_height;
 
@@ -320,9 +486,8 @@
                                                VAAPIEncodePicture *pic,
                                                VAAPIEncodeSlice *slice)
 {
-    VAAPIEncodeContext                  *ctx = avctx->priv_data;
+    VAAPIEncodeMPEG2Context            *priv = avctx->priv_data;
     VAEncSliceParameterBufferMPEG2   *vslice = slice->codec_slice_params;
-    VAAPIEncodeMPEG2Context            *priv = ctx->priv_data;
     int qp;
 
     vslice->macroblock_address = priv->mb_width * slice->index;
@@ -353,7 +518,12 @@
 static av_cold int vaapi_encode_mpeg2_configure(AVCodecContext *avctx)
 {
     VAAPIEncodeContext       *ctx = avctx->priv_data;
-    VAAPIEncodeMPEG2Context *priv = ctx->priv_data;
+    VAAPIEncodeMPEG2Context *priv = avctx->priv_data;
+    int err;
+
+    err = ff_cbs_init(&priv->cbc, AV_CODEC_ID_MPEG2VIDEO, avctx);
+    if (err < 0)
+        return err;
 
     priv->mb_width  = FFALIGN(avctx->width,  16) / 16;
     priv->mb_height = FFALIGN(avctx->height, 16) / 16;
@@ -386,8 +556,14 @@
     return 0;
 }
 
+static const VAAPIEncodeProfile vaapi_encode_mpeg2_profiles[] = {
+    { FF_PROFILE_MPEG2_MAIN,   8, 3, 1, 1, VAProfileMPEG2Main   },
+    { FF_PROFILE_MPEG2_SIMPLE, 8, 3, 1, 1, VAProfileMPEG2Simple },
+    { FF_PROFILE_UNKNOWN }
+};
+
 static const VAAPIEncodeType vaapi_encode_type_mpeg2 = {
-    .priv_data_size        = sizeof(VAAPIEncodeMPEG2Context),
+    .profiles              = vaapi_encode_mpeg2_profiles,
 
     .configure             = &vaapi_encode_mpeg2_configure,
 
@@ -409,29 +585,38 @@
 
 static av_cold int vaapi_encode_mpeg2_init(AVCodecContext *avctx)
 {
-    VAAPIEncodeContext *ctx = avctx->priv_data;
+    VAAPIEncodeContext       *ctx = avctx->priv_data;
+    VAAPIEncodeMPEG2Context *priv = avctx->priv_data;
 
     ctx->codec = &vaapi_encode_type_mpeg2;
 
-    switch (avctx->profile) {
-    case FF_PROFILE_MPEG2_SIMPLE:
-        ctx->va_profile = VAProfileMPEG2Simple;
-        break;
-    case FF_PROFILE_MPEG2_MAIN:
-        ctx->va_profile = VAProfileMPEG2Main;
+    if (avctx->profile == FF_PROFILE_UNKNOWN)
+        avctx->profile = priv->profile;
+    if (avctx->level == FF_LEVEL_UNKNOWN)
+        avctx->level = priv->level;
+
+    // Reject unknown levels (these are required to set f_code for
+    // motion vector encoding).
+    switch (avctx->level) {
+    case 4: // High
+    case 6: // High 1440
+    case 8: // Main
+    case 10: // Low
         break;
     default:
-        av_log(avctx, AV_LOG_ERROR, "Unknown MPEG-2 profile %d.\n",
-               avctx->profile);
+        av_log(avctx, AV_LOG_ERROR, "Unknown MPEG-2 level %d.\n",
+               avctx->level);
         return AVERROR(EINVAL);
     }
 
-    ctx->va_entrypoint = VAEntrypointEncSlice;
-    ctx->va_rt_format  = VA_RT_FORMAT_YUV420;
-    ctx->va_rc_mode    = VA_RC_CQP;
+    if (avctx->height % 4096 == 0 || avctx->width % 4096 == 0) {
+        av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support picture "
+               "height or width divisible by 4096.\n");
+        return AVERROR(EINVAL);
+    }
 
-    ctx->va_packed_headers = VA_ENC_PACKED_HEADER_SEQUENCE |
-                             VA_ENC_PACKED_HEADER_PICTURE;
+    ctx->desired_packed_headers = VA_ENC_PACKED_HEADER_SEQUENCE |
+                                  VA_ENC_PACKED_HEADER_PICTURE;
 
     ctx->surface_width  = FFALIGN(avctx->width,  16);
     ctx->surface_height = FFALIGN(avctx->height, 16);
@@ -439,9 +624,47 @@
     return ff_vaapi_encode_init(avctx);
 }
 
+static av_cold int vaapi_encode_mpeg2_close(AVCodecContext *avctx)
+{
+    VAAPIEncodeMPEG2Context *priv = avctx->priv_data;
+
+    ff_cbs_close(&priv->cbc);
+
+    return ff_vaapi_encode_close(avctx);
+}
+
+#define OFFSET(x) offsetof(VAAPIEncodeMPEG2Context, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
+static const AVOption vaapi_encode_mpeg2_options[] = {
+    VAAPI_ENCODE_COMMON_OPTIONS,
+
+    { "profile", "Set profile (in profile_and_level_indication)",
+      OFFSET(profile), AV_OPT_TYPE_INT,
+      { .i64 = FF_PROFILE_UNKNOWN }, FF_PROFILE_UNKNOWN, 7, FLAGS, "profile" },
+
+#define PROFILE(name, value)  name, NULL, 0, AV_OPT_TYPE_CONST, \
+      { .i64 = value }, 0, 0, FLAGS, "profile"
+    { PROFILE("simple", FF_PROFILE_MPEG2_SIMPLE) },
+    { PROFILE("main",   FF_PROFILE_MPEG2_MAIN)   },
+#undef PROFILE
+
+    { "level", "Set level (in profile_and_level_indication)",
+      OFFSET(level), AV_OPT_TYPE_INT,
+      { .i64 = 4 }, 0, 15, FLAGS, "level" },
+
+#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
+      { .i64 = value }, 0, 0, FLAGS, "level"
+    { LEVEL("low",       10) },
+    { LEVEL("main",       8) },
+    { LEVEL("high_1440",  6) },
+    { LEVEL("high",       4) },
+#undef LEVEL
+
+    { NULL },
+};
+
 static const AVCodecDefault vaapi_encode_mpeg2_defaults[] = {
-    { "profile",        "4"   },
-    { "level",          "4"   },
+    { "b",              "0"   },
     { "bf",             "1"   },
     { "g",              "120" },
     { "i_qfactor",      "1"   },
@@ -449,22 +672,33 @@
     { "b_qfactor",      "6/5" },
     { "b_qoffset",      "0"   },
     { "global_quality", "10"  },
+    { "qmin",           "-1"  },
+    { "qmax",           "-1"  },
     { NULL },
 };
 
+static const AVClass vaapi_encode_mpeg2_class = {
+    .class_name = "mpeg2_vaapi",
+    .item_name  = av_default_item_name,
+    .option     = vaapi_encode_mpeg2_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_mpeg2_vaapi_encoder = {
     .name           = "mpeg2_vaapi",
     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-2 (VAAPI)"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG2VIDEO,
-    .priv_data_size = sizeof(VAAPIEncodeContext),
+    .priv_data_size = sizeof(VAAPIEncodeMPEG2Context),
     .init           = &vaapi_encode_mpeg2_init,
     .encode2        = &ff_vaapi_encode2,
-    .close          = &ff_vaapi_encode_close,
-    .capabilities   = AV_CODEC_CAP_DELAY,
+    .close          = &vaapi_encode_mpeg2_close,
+    .priv_class     = &vaapi_encode_mpeg2_class,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
     .defaults       = vaapi_encode_mpeg2_defaults,
     .pix_fmts = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_VAAPI,
         AV_PIX_FMT_NONE,
     },
+    .wrapper_name   = "vaapi",
 };

diff --git a/libavcodec/vaapi_encode_vp8.c b/libavcodec/vaapi_encode_vp8.c
index 423f748..697b465 100644
--- a/libavcodec/vaapi_encode_vp8.c
+++ b/libavcodec/vaapi_encode_vp8.c

@@ -32,15 +32,17 @@
 
 
 typedef struct VAAPIEncodeVP8Context {
+    VAAPIEncodeContext common;
+
+    // User options.
+    int loop_filter_level;
+    int loop_filter_sharpness;
+
+    // Derived settings.
     int q_index_i;
     int q_index_p;
 } VAAPIEncodeVP8Context;
 
-typedef struct VAAPIEncodeVP8Options {
-    int loop_filter_level;
-    int loop_filter_sharpness;
-} VAAPIEncodeVP8Options;
-
 
 #define vseq_var(name)     vseq->name, name
 #define vseq_field(name)   vseq->seq_fields.bits.name, name
@@ -63,8 +65,8 @@
     vseq->kf_auto = 0;
 
     if (!(ctx->va_rc_mode & VA_RC_CQP)) {
-        vseq->bits_per_second = avctx->bit_rate;
-        vseq->intra_period    = avctx->gop_size;
+        vseq->bits_per_second = ctx->va_bit_rate;
+        vseq->intra_period    = ctx->gop_size;
     }
 
     return 0;
@@ -73,9 +75,8 @@
 static int vaapi_encode_vp8_init_picture_params(AVCodecContext *avctx,
                                                 VAAPIEncodePicture *pic)
 {
-    VAAPIEncodeContext              *ctx = avctx->priv_data;
+    VAAPIEncodeVP8Context          *priv = avctx->priv_data;
     VAEncPictureParameterBufferVP8 *vpic = pic->codec_picture_params;
-    VAAPIEncodeVP8Options           *opt = ctx->codec_options;
     int i;
 
     vpic->reconstructed_frame = pic->recon_surface;
@@ -116,8 +117,8 @@
     vpic->pic_flags.bits.version = 0;
     vpic->pic_flags.bits.loop_filter_type = 0;
     for (i = 0; i < 4; i++)
-        vpic->loop_filter_level[i] = opt->loop_filter_level;
-    vpic->sharpness_level = opt->loop_filter_sharpness;
+        vpic->loop_filter_level[i] = priv->loop_filter_level;
+    vpic->sharpness_level = priv->loop_filter_sharpness;
 
     vpic->clamp_qindex_low  = 0;
     vpic->clamp_qindex_high = 127;
@@ -130,8 +131,7 @@
                                               int index, int *type,
                                               char *data, size_t *data_len)
 {
-    VAAPIEncodeContext     *ctx = avctx->priv_data;
-    VAAPIEncodeVP8Context *priv = ctx->priv_data;
+    VAAPIEncodeVP8Context *priv = avctx->priv_data;
     VAQMatrixBufferVP8 quant;
     int i, q;
 
@@ -143,6 +143,8 @@
     *type     = VAQMatrixBufferType;
     *data_len = sizeof(quant);
 
+    memset(&quant, 0, sizeof(quant));
+
     if (pic->type == PICTURE_TYPE_P)
         q = priv->q_index_p;
     else
@@ -159,8 +161,7 @@
 
 static av_cold int vaapi_encode_vp8_configure(AVCodecContext *avctx)
 {
-    VAAPIEncodeContext     *ctx = avctx->priv_data;
-    VAAPIEncodeVP8Context *priv = ctx->priv_data;
+    VAAPIEncodeVP8Context *priv = avctx->priv_data;
 
     priv->q_index_p = av_clip(avctx->global_quality, 0, VP8_MAX_QUANT);
     if (avctx->i_quant_factor > 0.0)
@@ -174,10 +175,15 @@
     return 0;
 }
 
-static const VAAPIEncodeType vaapi_encode_type_vp8 = {
-    .configure             = &vaapi_encode_vp8_configure,
+static const VAAPIEncodeProfile vaapi_encode_vp8_profiles[] = {
+    { 0 /* VP8 has no profiles */, 8, 3, 1, 1, VAProfileVP8Version0_3 },
+    { FF_PROFILE_UNKNOWN }
+};
 
-    .priv_data_size        = sizeof(VAAPIEncodeVP8Context),
+static const VAAPIEncodeType vaapi_encode_type_vp8 = {
+    .profiles              = vaapi_encode_vp8_profiles,
+
+    .configure             = &vaapi_encode_vp8_configure,
 
     .sequence_params_size  = sizeof(VAEncSequenceParameterBufferVP8),
     .init_sequence_params  = &vaapi_encode_vp8_init_sequence_params,
@@ -192,30 +198,12 @@
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
 
-    if (avctx->max_b_frames > 0) {
-        av_log(avctx, AV_LOG_ERROR, "B-frames are not supported.\n");
-        return AVERROR_PATCHWELCOME;
-    }
-
     ctx->codec = &vaapi_encode_type_vp8;
 
-    ctx->va_profile    = VAProfileVP8Version0_3;
-    ctx->va_entrypoint = VAEntrypointEncSlice;
-    ctx->va_rt_format  = VA_RT_FORMAT_YUV420;
-
-    if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
-        ctx->va_rc_mode = VA_RC_CQP;
-    } else if (avctx->bit_rate > 0) {
-        if (avctx->rc_max_rate == avctx->bit_rate)
-            ctx->va_rc_mode = VA_RC_CBR;
-        else
-            ctx->va_rc_mode = VA_RC_VBR;
-    } else {
-        ctx->va_rc_mode = VA_RC_CQP;
-    }
-
-    // Packed headers are not currently supported.
-    ctx->va_packed_headers = 0;
+    // No packed headers are currently desired.  VP8 has no metadata
+    // which would be useful to write, and no existing driver supports
+    // adding them anyway.
+    ctx->desired_packed_headers = 0;
 
     ctx->surface_width  = FFALIGN(avctx->width,  16);
     ctx->surface_height = FFALIGN(avctx->height, 16);
@@ -223,10 +211,10 @@
     return ff_vaapi_encode_init(avctx);
 }
 
-#define OFFSET(x) (offsetof(VAAPIEncodeContext, codec_options_data) + \
-                   offsetof(VAAPIEncodeVP8Options, x))
+#define OFFSET(x) offsetof(VAAPIEncodeVP8Context, x)
 #define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
 static const AVOption vaapi_encode_vp8_options[] = {
+    VAAPI_ENCODE_COMMON_OPTIONS,
     { "loop_filter_level", "Loop filter level",
       OFFSET(loop_filter_level), AV_OPT_TYPE_INT, { .i64 = 16 }, 0, 63, FLAGS },
     { "loop_filter_sharpness", "Loop filter sharpness",
@@ -239,6 +227,8 @@
     { "bf",             "0"   },
     { "g",              "120" },
     { "global_quality", "40"  },
+    { "qmin",           "-1"  },
+    { "qmax",           "-1"  },
     { NULL },
 };
 
@@ -254,16 +244,16 @@
     .long_name      = NULL_IF_CONFIG_SMALL("VP8 (VAAPI)"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_VP8,
-    .priv_data_size = (sizeof(VAAPIEncodeContext) +
-                       sizeof(VAAPIEncodeVP8Options)),
+    .priv_data_size = sizeof(VAAPIEncodeVP8Context),
     .init           = &vaapi_encode_vp8_init,
     .encode2        = &ff_vaapi_encode2,
     .close          = &ff_vaapi_encode_close,
     .priv_class     = &vaapi_encode_vp8_class,
-    .capabilities   = AV_CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
     .defaults       = vaapi_encode_vp8_defaults,
     .pix_fmts = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_VAAPI,
         AV_PIX_FMT_NONE,
     },
+    .wrapper_name   = "vaapi",
 };

diff --git a/libavcodec/vaapi_encode_vp9.c b/libavcodec/vaapi_encode_vp9.c
index a987d1c..39bc868 100644
--- a/libavcodec/vaapi_encode_vp9.c
+++ b/libavcodec/vaapi_encode_vp9.c

@@ -33,21 +33,25 @@
 
 
 typedef struct VAAPIEncodeVP9Context {
+    VAAPIEncodeContext common;
+
+    // User options.
+    int loop_filter_level;
+    int loop_filter_sharpness;
+
+    // Derived settings.
     int q_idx_idr;
     int q_idx_p;
     int q_idx_b;
 
+    // Stream state.
+
     // Reference direction for B-like frames:
     // 0 - most recent P/IDR frame is last.
     // 1 - most recent P frame is golden.
     int last_ref_dir;
 } VAAPIEncodeVP9Context;
 
-typedef struct VAAPIEncodeVP9Options {
-    int loop_filter_level;
-    int loop_filter_sharpness;
-} VAAPIEncodeVP9Options;
-
 
 #define vseq_var(name)     vseq->name, name
 #define vseq_field(name)   vseq->seq_fields.bits.name, name
@@ -67,8 +71,8 @@
     vseq->kf_auto = 0;
 
     if (!(ctx->va_rc_mode & VA_RC_CQP)) {
-        vseq->bits_per_second = avctx->bit_rate;
-        vseq->intra_period    = avctx->gop_size;
+        vseq->bits_per_second = ctx->va_bit_rate;
+        vseq->intra_period    = ctx->gop_size;
     }
 
     vpic->frame_width_src  = avctx->width;
@@ -83,9 +87,8 @@
                                                 VAAPIEncodePicture *pic)
 {
     VAAPIEncodeContext              *ctx = avctx->priv_data;
+    VAAPIEncodeVP9Context          *priv = avctx->priv_data;
     VAEncPictureParameterBufferVP9 *vpic = pic->codec_picture_params;
-    VAAPIEncodeVP9Context          *priv = ctx->priv_data;
-    VAAPIEncodeVP9Options           *opt = ctx->codec_options;
     int i;
 
     vpic->reconstructed_frame = pic->recon_surface;
@@ -100,7 +103,7 @@
         break;
     case PICTURE_TYPE_P:
         av_assert0(pic->nb_refs == 1);
-        if (avctx->max_b_frames > 0) {
+        if (ctx->b_per_p > 0) {
             if (priv->last_ref_dir) {
                 vpic->ref_flags.bits.ref_frame_ctrl_l0  = 2;
                 vpic->ref_flags.bits.ref_gf_idx         = 1;
@@ -169,10 +172,10 @@
     vpic->chroma_ac_qindex_delta = 0;
     vpic->chroma_dc_qindex_delta = 0;
 
-    vpic->filter_level    = opt->loop_filter_level;
-    vpic->sharpness_level = opt->loop_filter_sharpness;
+    vpic->filter_level    = priv->loop_filter_level;
+    vpic->sharpness_level = priv->loop_filter_sharpness;
 
-    if (avctx->max_b_frames > 0 && pic->type == PICTURE_TYPE_P)
+    if (ctx->b_per_p > 0 && pic->type == PICTURE_TYPE_P)
         priv->last_ref_dir = !priv->last_ref_dir;
 
     return 0;
@@ -180,8 +183,7 @@
 
 static av_cold int vaapi_encode_vp9_configure(AVCodecContext *avctx)
 {
-    VAAPIEncodeContext     *ctx = avctx->priv_data;
-    VAAPIEncodeVP9Context *priv = ctx->priv_data;
+    VAAPIEncodeVP9Context *priv = avctx->priv_data;
 
     priv->q_idx_p = av_clip(avctx->global_quality, 0, VP9_MAX_QUANT);
     if (avctx->i_quant_factor > 0.0)
@@ -202,10 +204,16 @@
     return 0;
 }
 
-static const VAAPIEncodeType vaapi_encode_type_vp9 = {
-    .configure             = &vaapi_encode_vp9_configure,
+static const VAAPIEncodeProfile vaapi_encode_vp9_profiles[] = {
+    { FF_PROFILE_VP9_0,  8, 3, 1, 1, VAProfileVP9Profile0 },
+    { FF_PROFILE_VP9_2, 10, 3, 1, 1, VAProfileVP9Profile2 },
+    { FF_PROFILE_UNKNOWN }
+};
 
-    .priv_data_size        = sizeof(VAAPIEncodeVP9Context),
+static const VAAPIEncodeType vaapi_encode_type_vp9 = {
+    .profiles              = vaapi_encode_vp9_profiles,
+
+    .configure             = &vaapi_encode_vp9_configure,
 
     .sequence_params_size  = sizeof(VAEncSequenceParameterBufferVP9),
     .init_sequence_params  = &vaapi_encode_vp9_init_sequence_params,
@@ -220,44 +228,10 @@
 
     ctx->codec = &vaapi_encode_type_vp9;
 
-    switch (avctx->profile) {
-    case FF_PROFILE_VP9_0:
-    case FF_PROFILE_UNKNOWN:
-        ctx->va_profile = VAProfileVP9Profile0;
-        ctx->va_rt_format = VA_RT_FORMAT_YUV420;
-        break;
-    case FF_PROFILE_VP9_1:
-        av_log(avctx, AV_LOG_ERROR, "VP9 profile 1 is not "
-               "supported.\n");
-        return AVERROR_PATCHWELCOME;
-    case FF_PROFILE_VP9_2:
-        ctx->va_profile = VAProfileVP9Profile2;
-        ctx->va_rt_format = VA_RT_FORMAT_YUV420_10BPP;
-        break;
-    case FF_PROFILE_VP9_3:
-        av_log(avctx, AV_LOG_ERROR, "VP9 profile 3 is not "
-               "supported.\n");
-        return AVERROR_PATCHWELCOME;
-    default:
-        av_log(avctx, AV_LOG_ERROR, "Unknown VP9 profile %d.\n",
-               avctx->profile);
-        return AVERROR(EINVAL);
-    }
-    ctx->va_entrypoint = VAEntrypointEncSlice;
-
-    if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
-        ctx->va_rc_mode = VA_RC_CQP;
-    } else if (avctx->bit_rate > 0) {
-        if (avctx->bit_rate == avctx->rc_max_rate)
-            ctx->va_rc_mode = VA_RC_CBR;
-        else
-            ctx->va_rc_mode = VA_RC_VBR;
-    } else {
-        ctx->va_rc_mode = VA_RC_CQP;
-    }
-
-    // Packed headers are not currently supported.
-    ctx->va_packed_headers = 0;
+    // No packed headers are currently desired.  They could be written,
+    // but there isn't any reason to do so - the one usable driver (i965)
+    // can write its own headers and there is no metadata to include.
+    ctx->desired_packed_headers = 0;
 
     // Surfaces must be aligned to superblock boundaries.
     ctx->surface_width  = FFALIGN(avctx->width,  64);
@@ -266,10 +240,10 @@
     return ff_vaapi_encode_init(avctx);
 }
 
-#define OFFSET(x) (offsetof(VAAPIEncodeContext, codec_options_data) + \
-                   offsetof(VAAPIEncodeVP9Options, x))
+#define OFFSET(x) offsetof(VAAPIEncodeVP9Context, x)
 #define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
 static const AVOption vaapi_encode_vp9_options[] = {
+    VAAPI_ENCODE_COMMON_OPTIONS,
     { "loop_filter_level", "Loop filter level",
       OFFSET(loop_filter_level), AV_OPT_TYPE_INT, { .i64 = 16 }, 0, 63, FLAGS },
     { "loop_filter_sharpness", "Loop filter sharpness",
@@ -278,11 +252,12 @@
 };
 
 static const AVCodecDefault vaapi_encode_vp9_defaults[] = {
-    { "profile",        "0"   },
     { "b",              "0"   },
     { "bf",             "0"   },
     { "g",              "250" },
     { "global_quality", "100" },
+    { "qmin",           "-1"  },
+    { "qmax",           "-1"  },
     { NULL },
 };
 
@@ -298,16 +273,16 @@
     .long_name      = NULL_IF_CONFIG_SMALL("VP9 (VAAPI)"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_VP9,
-    .priv_data_size = (sizeof(VAAPIEncodeContext) +
-                       sizeof(VAAPIEncodeVP9Options)),
+    .priv_data_size = sizeof(VAAPIEncodeVP9Context),
     .init           = &vaapi_encode_vp9_init,
     .encode2        = &ff_vaapi_encode2,
     .close          = &ff_vaapi_encode_close,
     .priv_class     = &vaapi_encode_vp9_class,
-    .capabilities   = AV_CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
     .defaults       = vaapi_encode_vp9_defaults,
     .pix_fmts = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_VAAPI,
         AV_PIX_FMT_NONE,
     },
+    .wrapper_name   = "vaapi",
 };

diff --git a/libavcodec/vaapi_h264.c b/libavcodec/vaapi_h264.c
index 30e7026..5854587 100644
--- a/libavcodec/vaapi_h264.c
+++ b/libavcodec/vaapi_h264.c

@@ -388,7 +388,7 @@
     return 0;
 }
 
-AVHWAccel ff_h264_vaapi_hwaccel = {
+const AVHWAccel ff_h264_vaapi_hwaccel = {
     .name                 = "h264_vaapi",
     .type                 = AVMEDIA_TYPE_VIDEO,
     .id                   = AV_CODEC_ID_H264,
@@ -399,6 +399,7 @@
     .frame_priv_data_size = sizeof(VAAPIDecodePicture),
     .init                 = &ff_vaapi_decode_init,
     .uninit               = &ff_vaapi_decode_uninit,
+    .frame_params         = &ff_vaapi_common_frame_params,
     .priv_data_size       = sizeof(VAAPIDecodeContext),
     .caps_internal        = HWACCEL_CAP_ASYNC_SAFE,
 };

diff --git a/libavcodec/vaapi_hevc.c b/libavcodec/vaapi_hevc.c
index 69b8e47..19aabcd 100644
--- a/libavcodec/vaapi_hevc.c
+++ b/libavcodec/vaapi_hevc.c

@@ -423,7 +423,7 @@
     return 0;
 }
 
-AVHWAccel ff_hevc_vaapi_hwaccel = {
+const AVHWAccel ff_hevc_vaapi_hwaccel = {
     .name                 = "hevc_vaapi",
     .type                 = AVMEDIA_TYPE_VIDEO,
     .id                   = AV_CODEC_ID_HEVC,
@@ -434,6 +434,7 @@
     .frame_priv_data_size = sizeof(VAAPIDecodePictureHEVC),
     .init                 = ff_vaapi_decode_init,
     .uninit               = ff_vaapi_decode_uninit,
+    .frame_params         = ff_vaapi_common_frame_params,
     .priv_data_size       = sizeof(VAAPIDecodeContext),
     .caps_internal        = HWACCEL_CAP_ASYNC_SAFE,
 };

diff --git a/libavcodec/vaapi_mjpeg.c b/libavcodec/vaapi_mjpeg.c
new file mode 100644
index 0000000..14e0206
--- /dev/null
+++ b/libavcodec/vaapi_mjpeg.c

@@ -0,0 +1,159 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <va/va.h>
+#include <va/va_dec_jpeg.h>
+
+#include "hwaccel.h"
+#include "vaapi_decode.h"
+#include "mjpegdec.h"
+
+static int vaapi_mjpeg_start_frame(AVCodecContext          *avctx,
+                                   av_unused const uint8_t *buffer,
+                                   av_unused uint32_t       size)
+{
+    const MJpegDecodeContext *s = avctx->priv_data;
+    VAAPIDecodePicture *pic = s->hwaccel_picture_private;
+    VAPictureParameterBufferJPEGBaseline pp;
+    int err, i;
+
+    pic->output_surface = ff_vaapi_get_surface_id(s->picture_ptr);
+
+    pp = (VAPictureParameterBufferJPEGBaseline) {
+        .picture_width  = avctx->width,
+        .picture_height = avctx->height,
+
+        .num_components = s->nb_components,
+    };
+
+    for (i = 0; i < s->nb_components; i++) {
+        pp.components[i].component_id             = s->component_id[i];
+        pp.components[i].h_sampling_factor        = s->h_count[i];
+        pp.components[i].v_sampling_factor        = s->v_count[i];
+        pp.components[i].quantiser_table_selector = s->quant_index[i];
+    }
+
+    err = ff_vaapi_decode_make_param_buffer(avctx, pic,
+                                            VAPictureParameterBufferType,
+                                            &pp, sizeof(pp));
+    if (err < 0)
+        goto fail;
+
+    return 0;
+
+fail:
+    ff_vaapi_decode_cancel(avctx, pic);
+    return err;
+}
+
+static int vaapi_mjpeg_end_frame(AVCodecContext *avctx)
+{
+    const MJpegDecodeContext *s = avctx->priv_data;
+    VAAPIDecodePicture *pic = s->hwaccel_picture_private;
+
+    return ff_vaapi_decode_issue(avctx, pic);
+}
+
+static int vaapi_mjpeg_decode_slice(AVCodecContext *avctx,
+                                    const uint8_t  *buffer,
+                                    uint32_t        size)
+{
+    const MJpegDecodeContext *s = avctx->priv_data;
+    VAAPIDecodePicture *pic = s->hwaccel_picture_private;
+    VAHuffmanTableBufferJPEGBaseline huff;
+    VAIQMatrixBufferJPEGBaseline quant;
+    VASliceParameterBufferJPEGBaseline sp;
+    int err, i, j;
+
+    memset(&huff, 0, sizeof(huff));
+    for (i = 0; i < 2; i++) {
+        huff.load_huffman_table[i] = 1;
+        for (j = 0; j < 16; j++)
+            huff.huffman_table[i].num_dc_codes[j] = s->raw_huffman_lengths[0][i][j];
+        for (j = 0; j < 12; j++)
+            huff.huffman_table[i].dc_values[j] = s->raw_huffman_values[0][i][j];
+        for (j = 0; j < 16; j++)
+            huff.huffman_table[i].num_ac_codes[j] = s->raw_huffman_lengths[1][i][j];
+        for (j = 0; j < 162; j++)
+            huff.huffman_table[i].ac_values[j] = s->raw_huffman_values[1][i][j];
+    }
+
+    err = ff_vaapi_decode_make_param_buffer(avctx, pic,
+                                            VAHuffmanTableBufferType,
+                                            &huff, sizeof(huff));
+    if (err < 0)
+        goto fail;
+
+    memset(&quant, 0, sizeof(quant));
+    for (i = 0; i < 4; i++) {
+        quant.load_quantiser_table[i] = 1;
+        for (j = 0; j < 64; j++)
+            quant.quantiser_table[i][j] = s->quant_matrixes[i][j];
+    }
+
+    err = ff_vaapi_decode_make_param_buffer(avctx, pic,
+                                            VAIQMatrixBufferType,
+                                            &quant, sizeof(quant));
+    if (err < 0)
+        goto fail;
+
+    sp = (VASliceParameterBufferJPEGBaseline) {
+        .slice_data_size   = size,
+        .slice_data_offset = 0,
+        .slice_data_flag   = VA_SLICE_DATA_FLAG_ALL,
+
+        .slice_horizontal_position = 0,
+        .slice_vertical_position   = 0,
+
+        .restart_interval          = s->restart_interval,
+        .num_mcus                  = s->mb_width * s->mb_height,
+    };
+
+    sp.num_components = s->nb_components;
+    for (i = 0; i < s->nb_components; i++) {
+        sp.components[i].component_selector = s->component_id[s->comp_index[i]];
+        sp.components[i].dc_table_selector  = s->dc_index[i];
+        sp.components[i].ac_table_selector  = s->ac_index[i];
+    }
+
+    err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &sp, sizeof(sp), buffer, size);
+    if (err)
+        goto fail;
+
+    return 0;
+
+fail:
+    ff_vaapi_decode_cancel(avctx, pic);
+    return err;
+}
+
+const AVHWAccel ff_mjpeg_vaapi_hwaccel = {
+    .name                 = "mjpeg_vaapi",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_MJPEG,
+    .pix_fmt              = AV_PIX_FMT_VAAPI,
+    .start_frame          = &vaapi_mjpeg_start_frame,
+    .end_frame            = &vaapi_mjpeg_end_frame,
+    .decode_slice         = &vaapi_mjpeg_decode_slice,
+    .frame_priv_data_size = sizeof(VAAPIDecodePicture),
+    .init                 = &ff_vaapi_decode_init,
+    .uninit               = &ff_vaapi_decode_uninit,
+    .frame_params         = &ff_vaapi_common_frame_params,
+    .priv_data_size       = sizeof(VAAPIDecodeContext),
+    .caps_internal        = HWACCEL_CAP_ASYNC_SAFE,
+};

diff --git a/libavcodec/vaapi_mpeg2.c b/libavcodec/vaapi_mpeg2.c
index 0d197c9..aaed434 100644
--- a/libavcodec/vaapi_mpeg2.c
+++ b/libavcodec/vaapi_mpeg2.c

@@ -172,7 +172,7 @@
     return 0;
 }
 
-AVHWAccel ff_mpeg2_vaapi_hwaccel = {
+const AVHWAccel ff_mpeg2_vaapi_hwaccel = {
     .name                 = "mpeg2_vaapi",
     .type                 = AVMEDIA_TYPE_VIDEO,
     .id                   = AV_CODEC_ID_MPEG2VIDEO,
@@ -183,6 +183,7 @@
     .frame_priv_data_size = sizeof(VAAPIDecodePicture),
     .init                 = &ff_vaapi_decode_init,
     .uninit               = &ff_vaapi_decode_uninit,
+    .frame_params         = &ff_vaapi_common_frame_params,
     .priv_data_size       = sizeof(VAAPIDecodeContext),
     .caps_internal        = HWACCEL_CAP_ASYNC_SAFE,
 };

diff --git a/libavcodec/vaapi_mpeg4.c b/libavcodec/vaapi_mpeg4.c
index f8c5ddf..11860ff 100644
--- a/libavcodec/vaapi_mpeg4.c
+++ b/libavcodec/vaapi_mpeg4.c

@@ -178,7 +178,7 @@
 }
 
 #if CONFIG_MPEG4_VAAPI_HWACCEL
-AVHWAccel ff_mpeg4_vaapi_hwaccel = {
+const AVHWAccel ff_mpeg4_vaapi_hwaccel = {
     .name                 = "mpeg4_vaapi",
     .type                 = AVMEDIA_TYPE_VIDEO,
     .id                   = AV_CODEC_ID_MPEG4,
@@ -189,13 +189,14 @@
     .frame_priv_data_size = sizeof(VAAPIDecodePicture),
     .init                 = &ff_vaapi_decode_init,
     .uninit               = &ff_vaapi_decode_uninit,
+    .frame_params         = &ff_vaapi_common_frame_params,
     .priv_data_size       = sizeof(VAAPIDecodeContext),
     .caps_internal        = HWACCEL_CAP_ASYNC_SAFE,
 };
 #endif
 
 #if CONFIG_H263_VAAPI_HWACCEL
-AVHWAccel ff_h263_vaapi_hwaccel = {
+const AVHWAccel ff_h263_vaapi_hwaccel = {
     .name                 = "h263_vaapi",
     .type                 = AVMEDIA_TYPE_VIDEO,
     .id                   = AV_CODEC_ID_H263,
@@ -206,6 +207,7 @@
     .frame_priv_data_size = sizeof(VAAPIDecodePicture),
     .init                 = &ff_vaapi_decode_init,
     .uninit               = &ff_vaapi_decode_uninit,
+    .frame_params         = &ff_vaapi_common_frame_params,
     .priv_data_size       = sizeof(VAAPIDecodeContext),
     .caps_internal        = HWACCEL_CAP_ASYNC_SAFE,
 };

diff --git a/libavcodec/vaapi_vc1.c b/libavcodec/vaapi_vc1.c
index 30c9ed3..921ca63 100644
--- a/libavcodec/vaapi_vc1.c
+++ b/libavcodec/vaapi_vc1.c

@@ -44,7 +44,8 @@
 {
     if (v->mv_type_is_raw)
         return 0;
-    return v->s.pict_type == AV_PICTURE_TYPE_P &&
+    return v->fcm == PROGRESSIVE &&
+           (v->s.pict_type == AV_PICTURE_TYPE_P && !v->p_frame_skipped) &&
            (v->mv_mode == MV_PMODE_MIXED_MV ||
             (v->mv_mode == MV_PMODE_INTENSITY_COMP &&
              v->mv_mode2 == MV_PMODE_MIXED_MV));
@@ -55,8 +56,9 @@
 {
     if (v->skip_is_raw)
         return 0;
-    return v->s.pict_type == AV_PICTURE_TYPE_P ||
-           (v->s.pict_type == AV_PICTURE_TYPE_B && !v->bi_type);
+    return (v->fcm == PROGRESSIVE || v->fcm == ILACE_FRAME) &&
+           ((v->s.pict_type == AV_PICTURE_TYPE_P && !v->p_frame_skipped) ||
+            (v->s.pict_type == AV_PICTURE_TYPE_B && !v->bi_type));
 }
 
 /** Check whether the DIRECTMB bitplane is present */
@@ -64,7 +66,8 @@
 {
     if (v->dmb_is_raw)
         return 0;
-    return v->s.pict_type == AV_PICTURE_TYPE_B && !v->bi_type;
+    return (v->fcm == PROGRESSIVE || v->fcm == ILACE_FRAME) &&
+           (v->s.pict_type == AV_PICTURE_TYPE_B && !v->bi_type);
 }
 
 /** Check whether the ACPRED bitplane is present */
@@ -89,6 +92,25 @@
            v->condover == CONDOVER_SELECT;
 }
 
+/** Check whether the FIELDTX bitplane is present */
+static inline int vc1_has_FIELDTX_bitplane(const VC1Context *v)
+{
+    if (v->fieldtx_is_raw)
+        return 0;
+    return v->fcm == ILACE_FRAME &&
+           (v->s.pict_type == AV_PICTURE_TYPE_I ||
+            (v->s.pict_type == AV_PICTURE_TYPE_B && v->bi_type));
+}
+
+/** Check whether the FORWARDMB bitplane is present */
+static inline int vc1_has_FORWARDMB_bitplane(const VC1Context *v)
+{
+    if (v->fmb_is_raw)
+        return 0;
+    return v->fcm == ILACE_FIELD &&
+           (v->s.pict_type == AV_PICTURE_TYPE_B && !v->bi_type);
+}
+
 /** Reconstruct bitstream PTYPE (7.1.1.4, index into Table-35) */
 static int vc1_get_PTYPE(const VC1Context *v)
 {
@@ -101,11 +123,24 @@
     return 0;
 }
 
+/** Reconstruct bitstream FPTYPE (9.1.1.42, index into Table-105) */
+static int vc1_get_FPTYPE(const VC1Context *v)
+{
+    const MpegEncContext *s = &v->s;
+    switch (s->pict_type) {
+    case AV_PICTURE_TYPE_I: return 0;
+    case AV_PICTURE_TYPE_P: return 3;
+    case AV_PICTURE_TYPE_B: return v->bi_type ? 7 : 4;
+    }
+    return 0;
+}
+
 /** Reconstruct bitstream MVMODE (7.1.1.32) */
 static inline VAMvModeVC1 vc1_get_MVMODE(const VC1Context *v)
 {
-    if (v->s.pict_type == AV_PICTURE_TYPE_P ||
-        (v->s.pict_type == AV_PICTURE_TYPE_B && !v->bi_type))
+    if ((v->fcm == PROGRESSIVE || v->fcm == ILACE_FIELD) &&
+        ((v->s.pict_type == AV_PICTURE_TYPE_P && !v->p_frame_skipped) ||
+         (v->s.pict_type == AV_PICTURE_TYPE_B && !v->bi_type)))
         return get_VAMvModeVC1(v->mv_mode);
     return 0;
 }
@@ -113,11 +148,78 @@
 /** Reconstruct bitstream MVMODE2 (7.1.1.33) */
 static inline VAMvModeVC1 vc1_get_MVMODE2(const VC1Context *v)
 {
-    if (v->s.pict_type == AV_PICTURE_TYPE_P && v->mv_mode == MV_PMODE_INTENSITY_COMP)
+    if ((v->fcm == PROGRESSIVE || v->fcm == ILACE_FIELD) &&
+        (v->s.pict_type == AV_PICTURE_TYPE_P && !v->p_frame_skipped) &&
+        v->mv_mode == MV_PMODE_INTENSITY_COMP)
         return get_VAMvModeVC1(v->mv_mode2);
     return 0;
 }
 
+av_unused static inline int vc1_get_INTCOMPFIELD(const VC1Context *v)
+{
+    if ((v->s.pict_type == AV_PICTURE_TYPE_P && !v->p_frame_skipped) &&
+        v->fcm == ILACE_FIELD &&
+        v->mv_mode == MV_PMODE_INTENSITY_COMP)
+        switch (v->intcompfield) {
+        case 1: return 1;
+        case 2: return 2;
+        case 3: return 0;
+        }
+    return 0;
+}
+
+static inline int vc1_get_LUMSCALE(const VC1Context *v)
+{
+    if (v->s.pict_type == AV_PICTURE_TYPE_P && !v->p_frame_skipped) {
+        if ((v->fcm == PROGRESSIVE && v->mv_mode == MV_PMODE_INTENSITY_COMP) ||
+            (v->fcm == ILACE_FRAME && v->intcomp))
+            return v->lumscale;
+        else if (v->fcm == ILACE_FIELD && v->mv_mode == MV_PMODE_INTENSITY_COMP)
+            switch (v->intcompfield) {
+            case 1: return v->lumscale;
+            case 2: return v->lumscale2;
+            case 3: return v->lumscale;
+        }
+    }
+    return 0;
+}
+
+static inline int vc1_get_LUMSHIFT(const VC1Context *v)
+{
+    if (v->s.pict_type == AV_PICTURE_TYPE_P && !v->p_frame_skipped) {
+        if ((v->fcm == PROGRESSIVE && v->mv_mode == MV_PMODE_INTENSITY_COMP) ||
+            (v->fcm == ILACE_FRAME && v->intcomp))
+            return v->lumshift;
+        else if (v->fcm == ILACE_FIELD && v->mv_mode == MV_PMODE_INTENSITY_COMP)
+            switch (v->intcompfield) {
+            case 1: return v->lumshift;
+            case 2: return v->lumshift2;
+            case 3: return v->lumshift;
+        }
+    }
+    return 0;
+}
+
+av_unused static inline int vc1_get_LUMSCALE2(const VC1Context *v)
+{
+    if ((v->s.pict_type == AV_PICTURE_TYPE_P && !v->p_frame_skipped) &&
+        v->fcm == ILACE_FIELD &&
+        v->mv_mode == MV_PMODE_INTENSITY_COMP &&
+        v->intcompfield == 3)
+        return v->lumscale2;
+    return 0;
+}
+
+av_unused static inline int vc1_get_LUMSHIFT2(const VC1Context *v)
+{
+    if ((v->s.pict_type == AV_PICTURE_TYPE_P && !v->p_frame_skipped) &&
+        v->fcm == ILACE_FIELD &&
+        v->mv_mode == MV_PMODE_INTENSITY_COMP &&
+        v->intcompfield == 3)
+        return v->lumshift2;
+    return 0;
+}
+
 /** Reconstruct bitstream TTFRM (7.1.1.41, Table-53) */
 static inline int vc1_get_TTFRM(const VC1Context *v)
 {
@@ -189,27 +291,32 @@
             .chroma                        = v->range_mapuv,
         },
         .b_picture_fraction                = v->bfraction_lut_index,
-        .cbp_table                         = v->cbpcy_vlc ? v->cbpcy_vlc - ff_vc1_cbpcy_p_vlc : 0,
-        .mb_mode_table                     = 0, /* XXX: interlaced frame */
+        .cbp_table                         = (v->fcm == PROGRESSIVE ? v->cbptab : v->icbptab),
+        .mb_mode_table                     = v->mbmodetab,
         .range_reduction_frame             = v->rangeredfrm,
         .rounding_control                  = v->rnd,
         .post_processing                   = v->postproc,
         .picture_resolution_index          = v->respic,
-        .luma_scale                        = v->lumscale,
-        .luma_shift                        = v->lumshift,
         .picture_fields.bits = {
-            .picture_type                  = vc1_get_PTYPE(v),
+            .picture_type                  = (v->fcm == ILACE_FIELD ? vc1_get_FPTYPE(v) : vc1_get_PTYPE(v)),
             .frame_coding_mode             = v->fcm,
             .top_field_first               = v->tff,
-            .is_first_field                = v->fcm == 0, /* XXX: interlaced frame */
-            .intensity_compensation        = v->mv_mode == MV_PMODE_INTENSITY_COMP,
+            .is_first_field                = !v->second_field,
+            .intensity_compensation        = v->intcomp,
         },
+        .luma_scale                        = vc1_get_LUMSCALE(v),
+        .luma_shift                        = vc1_get_LUMSHIFT(v),
+#if VA_CHECK_VERSION(1, 1, 0)
+        .luma_scale2                       = vc1_get_LUMSCALE2(v),
+        .luma_shift2                       = vc1_get_LUMSHIFT2(v),
+        .intensity_compensation_field      = vc1_get_INTCOMPFIELD(v),
+#endif
         .raw_coding.flags = {
             .mv_type_mb                    = v->mv_type_is_raw,
             .direct_mb                     = v->dmb_is_raw,
             .skip_mb                       = v->skip_is_raw,
-            .field_tx                      = 0, /* XXX: interlaced frame */
-            .forward_mb                    = 0, /* XXX: interlaced frame */
+            .field_tx                      = v->fieldtx_is_raw,
+            .forward_mb                    = v->fmb_is_raw,
             .ac_pred                       = v->acpred_is_raw,
             .overflags                     = v->overflg_is_raw,
         },
@@ -217,28 +324,28 @@
             .bp_mv_type_mb                 = vc1_has_MVTYPEMB_bitplane(v),
             .bp_direct_mb                  = vc1_has_DIRECTMB_bitplane(v),
             .bp_skip_mb                    = vc1_has_SKIPMB_bitplane(v),
-            .bp_field_tx                   = 0, /* XXX: interlaced frame */
-            .bp_forward_mb                 = 0, /* XXX: interlaced frame */
+            .bp_field_tx                   = vc1_has_FIELDTX_bitplane(v),
+            .bp_forward_mb                 = vc1_has_FORWARDMB_bitplane(v),
             .bp_ac_pred                    = vc1_has_ACPRED_bitplane(v),
             .bp_overflags                  = vc1_has_OVERFLAGS_bitplane(v),
         },
         .reference_fields.bits = {
             .reference_distance_flag       = v->refdist_flag,
-            .reference_distance            = 0, /* XXX: interlaced frame */
-            .num_reference_pictures        = 0, /* XXX: interlaced frame */
-            .reference_field_pic_indicator = 0, /* XXX: interlaced frame */
+            .reference_distance            = v->refdist,
+            .num_reference_pictures        = v->numref,
+            .reference_field_pic_indicator = v->reffield,
         },
         .mv_fields.bits = {
             .mv_mode                       = vc1_get_MVMODE(v),
             .mv_mode2                      = vc1_get_MVMODE2(v),
-            .mv_table                      = s->mv_table_index,
-            .two_mv_block_pattern_table    = 0, /* XXX: interlaced frame */
-            .four_mv_switch                = 0, /* XXX: interlaced frame */
-            .four_mv_block_pattern_table   = 0, /* XXX: interlaced frame */
+            .mv_table                      = (v->fcm == PROGRESSIVE ? s->mv_table_index : v->imvtab),
+            .two_mv_block_pattern_table    = v->twomvbptab,
+            .four_mv_switch                = v->fourmvswitch,
+            .four_mv_block_pattern_table   = v->fourmvbptab,
             .extended_mv_flag              = v->extended_mv,
             .extended_mv_range             = v->mvrange,
             .extended_dmv_flag             = v->extended_dmv,
-            .extended_dmv_range            = 0, /* XXX: interlaced frame */
+            .extended_dmv_range            = v->dmvrange,
         },
         .pic_quantizer_fields.bits = {
             .dquant                        = v->dquant,
@@ -278,7 +385,7 @@
     if (err)
         goto fail;
 
-    if (pic_param.bitplane_present.value) {
+    if (pic_param.bitplane_present.value & 0x7f) {
         uint8_t *bitplane;
         const uint8_t *ff_bp[3];
         int x, y, n;
@@ -298,14 +405,14 @@
             break;
         case AV_PICTURE_TYPE_B:
             if (!v->bi_type) {
-                ff_bp[0] = pic_param.bitplane_present.flags.bp_direct_mb ? v->direct_mb_plane : NULL;
-                ff_bp[1] = pic_param.bitplane_present.flags.bp_skip_mb   ? s->mbskip_table    : NULL;
-                ff_bp[2] = NULL; /* XXX: interlaced frame (FORWARD plane) */
+                ff_bp[0] = pic_param.bitplane_present.flags.bp_direct_mb  ? v->direct_mb_plane  : NULL;
+                ff_bp[1] = pic_param.bitplane_present.flags.bp_skip_mb    ? s->mbskip_table     : NULL;
+                ff_bp[2] = pic_param.bitplane_present.flags.bp_forward_mb ? v->forward_mb_plane : NULL;
                 break;
             }
             /* fall-through (BI-type) */
         case AV_PICTURE_TYPE_I:
-            ff_bp[0] = NULL; /* XXX: interlaced frame (FIELDTX plane) */
+            ff_bp[0] = pic_param.bitplane_present.flags.bp_field_tx   ? v->fieldtx_plane      : NULL;
             ff_bp[1] = pic_param.bitplane_present.flags.bp_ac_pred    ? v->acpred_plane       : NULL;
             ff_bp[2] = pic_param.bitplane_present.flags.bp_overflags  ? v->over_flags_plane   : NULL;
             break;
@@ -360,6 +467,7 @@
     const MpegEncContext *s = &v->s;
     VAAPIDecodePicture *pic = s->current_picture_ptr->hwaccel_picture_private;
     VASliceParameterBufferVC1 slice_param;
+    int mb_height;
     int err;
 
     /* Current bit buffer is beyond any marker for VC-1, so skip it */
@@ -368,12 +476,17 @@
         size -= 4;
     }
 
+    if (v->fcm == ILACE_FIELD)
+        mb_height = avctx->coded_height + 31 >> 5;
+    else
+        mb_height = avctx->coded_height + 15 >> 4;
+
     slice_param = (VASliceParameterBufferVC1) {
         .slice_data_size         = size,
         .slice_data_offset       = 0,
         .slice_data_flag         = VA_SLICE_DATA_FLAG_ALL,
         .macroblock_offset       = get_bits_count(&s->gb),
-        .slice_vertical_position = s->mb_y,
+        .slice_vertical_position = s->mb_y % mb_height,
     };
 
     err = ff_vaapi_decode_make_slice_buffer(avctx, pic,
@@ -388,7 +501,7 @@
 }
 
 #if CONFIG_WMV3_VAAPI_HWACCEL
-AVHWAccel ff_wmv3_vaapi_hwaccel = {
+const AVHWAccel ff_wmv3_vaapi_hwaccel = {
     .name                 = "wmv3_vaapi",
     .type                 = AVMEDIA_TYPE_VIDEO,
     .id                   = AV_CODEC_ID_WMV3,
@@ -399,12 +512,13 @@
     .frame_priv_data_size = sizeof(VAAPIDecodePicture),
     .init                 = &ff_vaapi_decode_init,
     .uninit               = &ff_vaapi_decode_uninit,
+    .frame_params         = &ff_vaapi_common_frame_params,
     .priv_data_size       = sizeof(VAAPIDecodeContext),
     .caps_internal        = HWACCEL_CAP_ASYNC_SAFE,
 };
 #endif
 
-AVHWAccel ff_vc1_vaapi_hwaccel = {
+const AVHWAccel ff_vc1_vaapi_hwaccel = {
     .name                 = "vc1_vaapi",
     .type                 = AVMEDIA_TYPE_VIDEO,
     .id                   = AV_CODEC_ID_VC1,
@@ -415,6 +529,7 @@
     .frame_priv_data_size = sizeof(VAAPIDecodePicture),
     .init                 = &ff_vaapi_decode_init,
     .uninit               = &ff_vaapi_decode_uninit,
+    .frame_params         = &ff_vaapi_common_frame_params,
     .priv_data_size       = sizeof(VAAPIDecodeContext),
     .caps_internal        = HWACCEL_CAP_ASYNC_SAFE,
 };

diff --git a/libavcodec/vaapi_vp8.c b/libavcodec/vaapi_vp8.c
new file mode 100644
index 0000000..2426b30
--- /dev/null
+++ b/libavcodec/vaapi_vp8.c

@@ -0,0 +1,237 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <va/va.h>
+#include <va/va_dec_vp8.h>
+
+#include "hwaccel.h"
+#include "vaapi_decode.h"
+#include "vp8.h"
+
+static VASurfaceID vaapi_vp8_surface_id(VP8Frame *vf)
+{
+    if (vf)
+        return ff_vaapi_get_surface_id(vf->tf.f);
+    else
+        return VA_INVALID_SURFACE;
+}
+
+static int vaapi_vp8_start_frame(AVCodecContext          *avctx,
+                                 av_unused const uint8_t *buffer,
+                                 av_unused uint32_t       size)
+{
+    const VP8Context *s = avctx->priv_data;
+    VAAPIDecodePicture *pic = s->framep[VP56_FRAME_CURRENT]->hwaccel_picture_private;
+    VAPictureParameterBufferVP8 pp;
+    VAProbabilityDataBufferVP8 prob;
+    VAIQMatrixBufferVP8 quant;
+    int err, i, j, k;
+
+    pic->output_surface = vaapi_vp8_surface_id(s->framep[VP56_FRAME_CURRENT]);
+
+    pp = (VAPictureParameterBufferVP8) {
+        .frame_width                     = avctx->width,
+        .frame_height                    = avctx->height,
+
+        .last_ref_frame                  = vaapi_vp8_surface_id(s->framep[VP56_FRAME_PREVIOUS]),
+        .golden_ref_frame                = vaapi_vp8_surface_id(s->framep[VP56_FRAME_GOLDEN]),
+        .alt_ref_frame                   = vaapi_vp8_surface_id(s->framep[VP56_FRAME_GOLDEN2]),
+        .out_of_loop_frame               = VA_INVALID_SURFACE,
+
+        .pic_fields.bits = {
+            .key_frame                   = !s->keyframe,
+            .version                     = s->profile,
+
+            .segmentation_enabled        = s->segmentation.enabled,
+            .update_mb_segmentation_map  = s->segmentation.update_map,
+            .update_segment_feature_data = s->segmentation.update_feature_data,
+
+            .filter_type                 = s->filter.simple,
+            .sharpness_level             = s->filter.sharpness,
+
+            .loop_filter_adj_enable      = s->lf_delta.enabled,
+            .mode_ref_lf_delta_update    = s->lf_delta.update,
+
+            .sign_bias_golden            = s->sign_bias[VP56_FRAME_GOLDEN],
+            .sign_bias_alternate         = s->sign_bias[VP56_FRAME_GOLDEN2],
+
+            .mb_no_coeff_skip            = s->mbskip_enabled,
+            .loop_filter_disable         = s->filter.level == 0,
+        },
+
+        .prob_skip_false                 = s->prob->mbskip,
+        .prob_intra                      = s->prob->intra,
+        .prob_last                       = s->prob->last,
+        .prob_gf                         = s->prob->golden,
+    };
+
+    for (i = 0; i < 3; i++)
+        pp.mb_segment_tree_probs[i] = s->prob->segmentid[i];
+
+    for (i = 0; i < 4; i++) {
+        if (s->segmentation.enabled) {
+            pp.loop_filter_level[i] = s->segmentation.filter_level[i];
+            if (!s->segmentation.absolute_vals)
+                pp.loop_filter_level[i] += s->filter.level;
+        } else {
+            pp.loop_filter_level[i] = s->filter.level;
+        }
+        pp.loop_filter_level[i] = av_clip_uintp2(pp.loop_filter_level[i], 6);
+    }
+
+    for (i = 0; i < 4; i++) {
+        pp.loop_filter_deltas_ref_frame[i] = s->lf_delta.ref[i];
+        pp.loop_filter_deltas_mode[i] = s->lf_delta.mode[i + 4];
+    }
+
+    if (s->keyframe) {
+        static const uint8_t keyframe_y_mode_probs[4] = {
+            145, 156, 163, 128
+        };
+        static const uint8_t keyframe_uv_mode_probs[3] = {
+            142, 114, 183
+        };
+        memcpy(pp.y_mode_probs,  keyframe_y_mode_probs,  4);
+        memcpy(pp.uv_mode_probs, keyframe_uv_mode_probs, 3);
+    } else {
+        for (i = 0; i < 4; i++)
+            pp.y_mode_probs[i] = s->prob->pred16x16[i];
+        for (i = 0; i < 3; i++)
+            pp.uv_mode_probs[i] = s->prob->pred8x8c[i];
+    }
+    for (i = 0; i < 2; i++)
+        for (j = 0; j < 19; j++)
+            pp.mv_probs[i][j] = s->prob->mvc[i][j];
+
+    pp.bool_coder_ctx.range = s->coder_state_at_header_end.range;
+    pp.bool_coder_ctx.value = s->coder_state_at_header_end.value;
+    pp.bool_coder_ctx.count = s->coder_state_at_header_end.bit_count;
+
+    err = ff_vaapi_decode_make_param_buffer(avctx, pic,
+                                            VAPictureParameterBufferType,
+                                            &pp, sizeof(pp));
+    if (err < 0)
+        goto fail;
+
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < 8; j++) {
+            static const int coeff_bands_inverse[8] = {
+                0, 1, 2, 3, 5, 6, 4, 15
+            };
+            int coeff_pos = coeff_bands_inverse[j];
+
+            for (k = 0; k < 3; k++) {
+                memcpy(prob.dct_coeff_probs[i][j][k],
+                       s->prob->token[i][coeff_pos][k], 11);
+            }
+        }
+    }
+
+    err = ff_vaapi_decode_make_param_buffer(avctx, pic,
+                                            VAProbabilityBufferType,
+                                            &prob, sizeof(prob));
+    if (err < 0)
+        goto fail;
+
+    for (i = 0; i < 4; i++) {
+        int base_qi = s->segmentation.base_quant[i];
+        if (!s->segmentation.absolute_vals)
+            base_qi += s->quant.yac_qi;
+
+        quant.quantization_index[i][0] = av_clip_uintp2(base_qi,                       7);
+        quant.quantization_index[i][1] = av_clip_uintp2(base_qi + s->quant.ydc_delta,  7);
+        quant.quantization_index[i][2] = av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7);
+        quant.quantization_index[i][3] = av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7);
+        quant.quantization_index[i][4] = av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7);
+        quant.quantization_index[i][5] = av_clip_uintp2(base_qi + s->quant.uvac_delta, 7);
+    }
+
+    err = ff_vaapi_decode_make_param_buffer(avctx, pic,
+                                            VAIQMatrixBufferType,
+                                            &quant, sizeof(quant));
+    if (err < 0)
+        goto fail;
+
+    return 0;
+
+fail:
+    ff_vaapi_decode_cancel(avctx, pic);
+    return err;
+}
+
+static int vaapi_vp8_end_frame(AVCodecContext *avctx)
+{
+    const VP8Context *s = avctx->priv_data;
+    VAAPIDecodePicture *pic = s->framep[VP56_FRAME_CURRENT]->hwaccel_picture_private;
+
+    return ff_vaapi_decode_issue(avctx, pic);
+}
+
+static int vaapi_vp8_decode_slice(AVCodecContext *avctx,
+                                  const uint8_t  *buffer,
+                                  uint32_t        size)
+{
+    const VP8Context *s = avctx->priv_data;
+    VAAPIDecodePicture *pic = s->framep[VP56_FRAME_CURRENT]->hwaccel_picture_private;
+    VASliceParameterBufferVP8 sp;
+    int err, i;
+
+    unsigned int header_size = 3 + 7 * s->keyframe;
+    const uint8_t *data = buffer + header_size;
+    unsigned int data_size = size - header_size;
+
+    sp = (VASliceParameterBufferVP8) {
+        .slice_data_size   = data_size,
+        .slice_data_offset = 0,
+        .slice_data_flag   = VA_SLICE_DATA_FLAG_ALL,
+
+        .macroblock_offset = (8 * (s->coder_state_at_header_end.input - data) -
+                              s->coder_state_at_header_end.bit_count - 8),
+        .num_of_partitions = s->num_coeff_partitions + 1,
+    };
+
+    sp.partition_size[0] = s->header_partition_size - ((sp.macroblock_offset + 7) / 8);
+    for (i = 0; i < 8; i++)
+        sp.partition_size[i+1] = s->coeff_partition_size[i];
+
+    err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &sp, sizeof(sp), data, data_size);
+    if (err)
+        goto fail;
+
+    return 0;
+
+fail:
+    ff_vaapi_decode_cancel(avctx, pic);
+    return err;
+}
+
+const AVHWAccel ff_vp8_vaapi_hwaccel = {
+    .name                 = "vp8_vaapi",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_VP8,
+    .pix_fmt              = AV_PIX_FMT_VAAPI,
+    .start_frame          = &vaapi_vp8_start_frame,
+    .end_frame            = &vaapi_vp8_end_frame,
+    .decode_slice         = &vaapi_vp8_decode_slice,
+    .frame_priv_data_size = sizeof(VAAPIDecodePicture),
+    .init                 = &ff_vaapi_decode_init,
+    .uninit               = &ff_vaapi_decode_uninit,
+    .frame_params         = &ff_vaapi_common_frame_params,
+    .priv_data_size       = sizeof(VAAPIDecodeContext),
+    .caps_internal        = HWACCEL_CAP_ASYNC_SAFE,
+};

diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c
index d8ece75..f384ba7 100644
--- a/libavcodec/vaapi_vp9.c
+++ b/libavcodec/vaapi_vp9.c

@@ -168,7 +168,7 @@
     return 0;
 }
 
-AVHWAccel ff_vp9_vaapi_hwaccel = {
+const AVHWAccel ff_vp9_vaapi_hwaccel = {
     .name                 = "vp9_vaapi",
     .type                 = AVMEDIA_TYPE_VIDEO,
     .id                   = AV_CODEC_ID_VP9,
@@ -179,6 +179,7 @@
     .frame_priv_data_size = sizeof(VAAPIDecodePicture),
     .init                 = ff_vaapi_decode_init,
     .uninit               = ff_vaapi_decode_uninit,
+    .frame_params         = ff_vaapi_common_frame_params,
     .priv_data_size       = sizeof(VAAPIDecodeContext),
     .caps_internal        = HWACCEL_CAP_ASYNC_SAFE,
 };

diff --git a/libavcodec/vb.c b/libavcodec/vb.c
index 021657f..c6dd6fb 100644
--- a/libavcodec/vb.c
+++ b/libavcodec/vb.c

@@ -107,6 +107,10 @@
     blk2   = 0;
     for (blk = 0; blk < blocks; blk++) {
         if (!(blk & 3)) {
+            if (bytestream2_get_bytes_left(&g) < 1) {
+                av_log(c->avctx, AV_LOG_ERROR, "Insufficient data\n");
+                return AVERROR_INVALIDDATA;
+            }
             blocktypes = bytestream2_get_byte(&g);
         }
         switch (blocktypes & 0xC0) {

diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c
index 48a2cc1..3581d87 100644
--- a/libavcodec/vc1.c
+++ b/libavcodec/vc1.c

@@ -314,11 +314,11 @@
     v->multires        = get_bits1(gb);
     v->res_fasttx      = get_bits1(gb);
     if (!v->res_fasttx) {
-        v->vc1dsp.vc1_inv_trans_8x8    = ff_simple_idct_8;
+        v->vc1dsp.vc1_inv_trans_8x8    = ff_simple_idct_int16_8bit;
         v->vc1dsp.vc1_inv_trans_8x4    = ff_simple_idct84_add;
         v->vc1dsp.vc1_inv_trans_4x8    = ff_simple_idct48_add;
         v->vc1dsp.vc1_inv_trans_4x4    = ff_simple_idct44_add;
-        v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add_8;
+        v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add_int16_8bit;
         v->vc1dsp.vc1_inv_trans_8x4_dc = ff_simple_idct84_add;
         v->vc1dsp.vc1_inv_trans_4x8_dc = ff_simple_idct48_add;
         v->vc1dsp.vc1_inv_trans_4x4_dc = ff_simple_idct44_add;
@@ -629,7 +629,7 @@
     int pqindex, lowquant, status;
 
     v->field_mode = 0;
-    v->fcm = 0;
+    v->fcm = PROGRESSIVE;
     if (v->finterpflag)
         v->interpfrm = get_bits1(gb);
     if (!v->s.avctx->codec)
@@ -766,7 +766,8 @@
 
         /* Hopefully this is correct for P-frames */
         v->s.mv_table_index = get_bits(gb, 2); //but using ff_vc1_ tables
-        v->cbpcy_vlc = &ff_vc1_cbpcy_p_vlc[get_bits(gb, 2)];
+        v->cbptab = get_bits(gb, 2);
+        v->cbpcy_vlc = &ff_vc1_cbpcy_p_vlc[v->cbptab];
 
         if (v->dquant) {
             av_log(v->s.avctx, AV_LOG_DEBUG, "VOP DQuant info\n");
@@ -804,7 +805,8 @@
                "Imode: %i, Invert: %i\n", status>>1, status&1);
 
         v->s.mv_table_index = get_bits(gb, 2);
-        v->cbpcy_vlc        = &ff_vc1_cbpcy_p_vlc[get_bits(gb, 2)];
+        v->cbptab           = get_bits(gb, 2);
+        v->cbpcy_vlc        = &ff_vc1_cbpcy_p_vlc[v->cbptab];
 
         if (v->dquant) {
             av_log(v->s.avctx, AV_LOG_DEBUG, "VOP DQuant info\n");
@@ -845,7 +847,6 @@
 {
     int pqindex, lowquant;
     int status;
-    int mbmodetab, imvtab, icbptab, twomvbptab, fourmvbptab; /* useful only for debugging */
     int field_mode, fcm;
 
     v->numref          = 0;
@@ -990,6 +991,7 @@
         v->pquantizer = 1;
         break;
     }
+    v->dquantfrm = 0;
     if (v->postprocflag)
         v->postproc = get_bits(gb, 2);
 
@@ -1008,7 +1010,8 @@
                 return -1;
             av_log(v->s.avctx, AV_LOG_DEBUG, "FIELDTX plane encoding: "
                    "Imode: %i, Invert: %i\n", status>>1, status&1);
-        }
+        } else
+            v->fieldtx_is_raw = 0;
         status = bitplane_decoding(v->acpred_plane, &v->acpred_is_raw, v);
         if (status < 0)
             return -1;
@@ -1054,23 +1057,25 @@
                     v->last_use_ic = 1;
                 }
                 status = bitplane_decoding(v->s.mbskip_table, &v->skip_is_raw, v);
+                if (status < 0)
+                    return -1;
                 av_log(v->s.avctx, AV_LOG_DEBUG, "SKIPMB plane encoding: "
                        "Imode: %i, Invert: %i\n", status>>1, status&1);
-                mbmodetab = get_bits(gb, 2);
+                v->mbmodetab = get_bits(gb, 2);
                 if (v->fourmvswitch)
-                    v->mbmode_vlc = &ff_vc1_intfr_4mv_mbmode_vlc[mbmodetab];
+                    v->mbmode_vlc = &ff_vc1_intfr_4mv_mbmode_vlc[v->mbmodetab];
                 else
-                    v->mbmode_vlc = &ff_vc1_intfr_non4mv_mbmode_vlc[mbmodetab];
-                imvtab         = get_bits(gb, 2);
-                v->imv_vlc     = &ff_vc1_1ref_mvdata_vlc[imvtab];
+                    v->mbmode_vlc = &ff_vc1_intfr_non4mv_mbmode_vlc[v->mbmodetab];
+                v->imvtab      = get_bits(gb, 2);
+                v->imv_vlc     = &ff_vc1_1ref_mvdata_vlc[v->imvtab];
                 // interlaced p-picture cbpcy range is [1, 63]
-                icbptab        = get_bits(gb, 3);
-                v->cbpcy_vlc   = &ff_vc1_icbpcy_vlc[icbptab];
-                twomvbptab     = get_bits(gb, 2);
-                v->twomvbp_vlc = &ff_vc1_2mv_block_pattern_vlc[twomvbptab];
+                v->icbptab     = get_bits(gb, 3);
+                v->cbpcy_vlc   = &ff_vc1_icbpcy_vlc[v->icbptab];
+                v->twomvbptab     = get_bits(gb, 2);
+                v->twomvbp_vlc = &ff_vc1_2mv_block_pattern_vlc[v->twomvbptab];
                 if (v->fourmvswitch) {
-                    fourmvbptab     = get_bits(gb, 2);
-                    v->fourmvbp_vlc = &ff_vc1_4mv_block_pattern_vlc[fourmvbptab];
+                    v->fourmvbptab     = get_bits(gb, 2);
+                    v->fourmvbp_vlc = &ff_vc1_4mv_block_pattern_vlc[v->fourmvbptab];
                 }
             }
         }
@@ -1154,27 +1159,28 @@
 
             /* Hopefully this is correct for P-frames */
             v->s.mv_table_index = get_bits(gb, 2); //but using ff_vc1_ tables
-            v->cbpcy_vlc        = &ff_vc1_cbpcy_p_vlc[get_bits(gb, 2)];
+            v->cbptab           = get_bits(gb, 2);
+            v->cbpcy_vlc        = &ff_vc1_cbpcy_p_vlc[v->cbptab];
         } else if (v->fcm == ILACE_FRAME) { // frame interlaced
             v->qs_last          = v->s.quarter_sample;
             v->s.quarter_sample = 1;
             v->s.mspel          = 1;
         } else {    // field interlaced
-            mbmodetab = get_bits(gb, 3);
-            imvtab = get_bits(gb, 2 + v->numref);
+            v->mbmodetab = get_bits(gb, 3);
+            v->imvtab = get_bits(gb, 2 + v->numref);
             if (!v->numref)
-                v->imv_vlc = &ff_vc1_1ref_mvdata_vlc[imvtab];
+                v->imv_vlc = &ff_vc1_1ref_mvdata_vlc[v->imvtab];
             else
-                v->imv_vlc = &ff_vc1_2ref_mvdata_vlc[imvtab];
-            icbptab = get_bits(gb, 3);
-            v->cbpcy_vlc = &ff_vc1_icbpcy_vlc[icbptab];
+                v->imv_vlc = &ff_vc1_2ref_mvdata_vlc[v->imvtab];
+            v->icbptab = get_bits(gb, 3);
+            v->cbpcy_vlc = &ff_vc1_icbpcy_vlc[v->icbptab];
             if ((v->mv_mode == MV_PMODE_INTENSITY_COMP &&
                 v->mv_mode2 == MV_PMODE_MIXED_MV) || v->mv_mode == MV_PMODE_MIXED_MV) {
-                fourmvbptab     = get_bits(gb, 2);
-                v->fourmvbp_vlc = &ff_vc1_4mv_block_pattern_vlc[fourmvbptab];
-                v->mbmode_vlc = &ff_vc1_if_mmv_mbmode_vlc[mbmodetab];
+                v->fourmvbptab     = get_bits(gb, 2);
+                v->fourmvbp_vlc = &ff_vc1_4mv_block_pattern_vlc[v->fourmvbptab];
+                v->mbmode_vlc = &ff_vc1_if_mmv_mbmode_vlc[v->mbmodetab];
             } else {
-                v->mbmode_vlc = &ff_vc1_if_1mv_mbmode_vlc[mbmodetab];
+                v->mbmode_vlc = &ff_vc1_if_1mv_mbmode_vlc[v->mbmodetab];
             }
         }
         if (v->dquant) {
@@ -1222,24 +1228,24 @@
             v->mv_mode          = ff_vc1_mv_pmode_table2[lowquant][mvmode];
             v->qs_last          = v->s.quarter_sample;
             v->s.quarter_sample = (v->mv_mode == MV_PMODE_1MV || v->mv_mode == MV_PMODE_MIXED_MV);
-            v->s.mspel          = !(v->mv_mode == MV_PMODE_1MV_HPEL_BILIN || v->mv_mode == MV_PMODE_1MV_HPEL);
+            v->s.mspel          = (v->mv_mode != MV_PMODE_1MV_HPEL_BILIN);
             status = bitplane_decoding(v->forward_mb_plane, &v->fmb_is_raw, v);
             if (status < 0)
                 return -1;
             av_log(v->s.avctx, AV_LOG_DEBUG, "MB Forward Type plane encoding: "
                    "Imode: %i, Invert: %i\n", status>>1, status&1);
-            mbmodetab = get_bits(gb, 3);
+            v->mbmodetab = get_bits(gb, 3);
             if (v->mv_mode == MV_PMODE_MIXED_MV)
-                v->mbmode_vlc = &ff_vc1_if_mmv_mbmode_vlc[mbmodetab];
+                v->mbmode_vlc = &ff_vc1_if_mmv_mbmode_vlc[v->mbmodetab];
             else
-                v->mbmode_vlc = &ff_vc1_if_1mv_mbmode_vlc[mbmodetab];
-            imvtab       = get_bits(gb, 3);
-            v->imv_vlc   = &ff_vc1_2ref_mvdata_vlc[imvtab];
-            icbptab      = get_bits(gb, 3);
-            v->cbpcy_vlc = &ff_vc1_icbpcy_vlc[icbptab];
+                v->mbmode_vlc = &ff_vc1_if_1mv_mbmode_vlc[v->mbmodetab];
+            v->imvtab     = get_bits(gb, 3);
+            v->imv_vlc   = &ff_vc1_2ref_mvdata_vlc[v->imvtab];
+            v->icbptab   = get_bits(gb, 3);
+            v->cbpcy_vlc = &ff_vc1_icbpcy_vlc[v->icbptab];
             if (v->mv_mode == MV_PMODE_MIXED_MV) {
-                fourmvbptab     = get_bits(gb, 2);
-                v->fourmvbp_vlc = &ff_vc1_4mv_block_pattern_vlc[fourmvbptab];
+                v->fourmvbptab     = get_bits(gb, 2);
+                v->fourmvbp_vlc = &ff_vc1_4mv_block_pattern_vlc[v->fourmvbptab];
             }
             v->numref = 1; // interlaced field B pictures are always 2-ref
         } else if (v->fcm == ILACE_FRAME) {
@@ -1263,17 +1269,17 @@
                 return -1;
             av_log(v->s.avctx, AV_LOG_DEBUG, "MB Skip plane encoding: "
                    "Imode: %i, Invert: %i\n", status>>1, status&1);
-            mbmodetab       = get_bits(gb, 2);
-            v->mbmode_vlc   = &ff_vc1_intfr_non4mv_mbmode_vlc[mbmodetab];
-            imvtab          = get_bits(gb, 2);
-            v->imv_vlc      = &ff_vc1_1ref_mvdata_vlc[imvtab];
+            v->mbmodetab       = get_bits(gb, 2);
+            v->mbmode_vlc   = &ff_vc1_intfr_non4mv_mbmode_vlc[v->mbmodetab];
+            v->imvtab       = get_bits(gb, 2);
+            v->imv_vlc      = &ff_vc1_1ref_mvdata_vlc[v->imvtab];
             // interlaced p/b-picture cbpcy range is [1, 63]
-            icbptab         = get_bits(gb, 3);
-            v->cbpcy_vlc    = &ff_vc1_icbpcy_vlc[icbptab];
-            twomvbptab      = get_bits(gb, 2);
-            v->twomvbp_vlc  = &ff_vc1_2mv_block_pattern_vlc[twomvbptab];
-            fourmvbptab     = get_bits(gb, 2);
-            v->fourmvbp_vlc = &ff_vc1_4mv_block_pattern_vlc[fourmvbptab];
+            v->icbptab      = get_bits(gb, 3);
+            v->cbpcy_vlc    = &ff_vc1_icbpcy_vlc[v->icbptab];
+            v->twomvbptab      = get_bits(gb, 2);
+            v->twomvbp_vlc  = &ff_vc1_2mv_block_pattern_vlc[v->twomvbptab];
+            v->fourmvbptab     = get_bits(gb, 2);
+            v->fourmvbp_vlc = &ff_vc1_4mv_block_pattern_vlc[v->fourmvbptab];
         } else {
             v->mv_mode          = get_bits1(gb) ? MV_PMODE_1MV : MV_PMODE_1MV_HPEL_BILIN;
             v->qs_last          = v->s.quarter_sample;
@@ -1290,7 +1296,8 @@
             av_log(v->s.avctx, AV_LOG_DEBUG, "MB Skip plane encoding: "
                    "Imode: %i, Invert: %i\n", status>>1, status&1);
             v->s.mv_table_index = get_bits(gb, 2);
-            v->cbpcy_vlc = &ff_vc1_cbpcy_p_vlc[get_bits(gb, 2)];
+            v->cbptab = get_bits(gb, 2);
+            v->cbpcy_vlc = &ff_vc1_cbpcy_p_vlc[v->cbptab];
         }
 
         if (v->dquant) {

diff --git a/libavcodec/vc1.h b/libavcodec/vc1.h
index 556906d..69f6ca9 100644
--- a/libavcodec/vc1.h
+++ b/libavcodec/vc1.h

@@ -296,6 +296,7 @@
     uint8_t (*curr_luty)[256]  ,(*curr_lutuv)[256];
     int last_use_ic, *curr_use_ic, next_use_ic, aux_use_ic;
     int rnd;                        ///< rounding control
+    int cbptab;
 
     /** Frame decoding info for S/M profiles only */
     //@{
@@ -367,6 +368,11 @@
     int frfd, brfd;         ///< reference frame distance (forward or backward)
     int first_pic_header_flag;
     int pic_header_flag;
+    int mbmodetab;
+    int icbptab;
+    int imvtab;
+    int twomvbptab;
+    int fourmvbptab;
 
     /** Frame decoding info for sprite modes */
     //@{
@@ -416,10 +422,12 @@
 int  ff_vc1_decode_end(AVCodecContext *avctx);
 void ff_vc1_decode_blocks(VC1Context *v);
 
-void ff_vc1_loop_filter_iblk(VC1Context *v, int pq);
-void ff_vc1_loop_filter_iblk_delayed(VC1Context *v, int pq);
-void ff_vc1_smooth_overlap_filter_iblk(VC1Context *v);
-void ff_vc1_apply_p_loop_filter(VC1Context *v);
+void ff_vc1_i_overlap_filter(VC1Context *v);
+void ff_vc1_p_overlap_filter(VC1Context *v);
+void ff_vc1_i_loop_filter(VC1Context *v);
+void ff_vc1_p_loop_filter(VC1Context *v);
+void ff_vc1_p_intfr_loop_filter(VC1Context *v);
+void ff_vc1_b_intfi_loop_filter(VC1Context *v);
 
 void ff_vc1_mc_1mv(VC1Context *v, int dir);
 void ff_vc1_mc_4mv_luma(VC1Context *v, int n, int dir, int avg);

diff --git a/libavcodec/vc1_block.c b/libavcodec/vc1_block.c
index f9f26f7..86320db 100644
--- a/libavcodec/vc1_block.c
+++ b/libavcodec/vc1_block.c

@@ -45,6 +45,9 @@
     {  0,  1,  3,  7, 15, 31, 63, 127, 255 },
 };
 
+// mapping table for internal block representation
+static const int block_map[6] = {0, 2, 1, 3, 4, 5};
+
 /***********************************************************************/
 /**
  * @name VC-1 Bitplane decoding
@@ -66,76 +69,97 @@
 
 /** @} */ //Bitplane group
 
-static void vc1_put_signed_blocks_clamped(VC1Context *v)
+static void vc1_put_blocks_clamped(VC1Context *v, int put_signed)
 {
     MpegEncContext *s = &v->s;
-    int topleft_mb_pos, top_mb_pos;
-    int stride_y, fieldtx = 0;
-    int v_dist;
+    uint8_t *dest;
+    int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
+    int fieldtx = 0;
+    int i;
 
-    /* The put pixels loop is always one MB row behind the decoding loop,
-     * because we can only put pixels when overlap filtering is done, and
-     * for filtering of the bottom edge of a MB, we need the next MB row
-     * present as well.
-     * Within the row, the put pixels loop is also one MB col behind the
-     * decoding loop. The reason for this is again, because for filtering
-     * of the right MB edge, we need the next MB present. */
-    if (!s->first_slice_line) {
+    /* The put pixels loop is one MB row and one MB column behind the decoding
+     * loop because we can only put pixels when overlap filtering is done. For
+     * interlaced frame pictures, however, the put pixels loop is only one
+     * column behind the decoding loop as interlaced frame pictures only need
+     * horizontal overlap filtering. */
+    if (!s->first_slice_line && v->fcm != ILACE_FRAME) {
         if (s->mb_x) {
-            topleft_mb_pos = (s->mb_y - 1) * s->mb_stride + s->mb_x - 1;
-            if (v->fcm == ILACE_FRAME)
-                fieldtx = v->fieldtx_plane[topleft_mb_pos];
-            stride_y       = s->linesize << fieldtx;
-            v_dist         = (16 - fieldtx) >> (fieldtx == 0);
-            s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][0],
-                                              s->dest[0] - 16 * s->linesize - 16,
-                                              stride_y);
-            s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][1],
-                                              s->dest[0] - 16 * s->linesize - 8,
-                                              stride_y);
-            s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][2],
-                                              s->dest[0] - v_dist * s->linesize - 16,
-                                              stride_y);
-            s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][3],
-                                              s->dest[0] - v_dist * s->linesize - 8,
-                                              stride_y);
-            if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
-            s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][4],
-                                              s->dest[1] - 8 * s->uvlinesize - 8,
-                                              s->uvlinesize);
-            s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][5],
-                                              s->dest[2] - 8 * s->uvlinesize - 8,
-                                              s->uvlinesize);
+            for (i = 0; i < block_count; i++) {
+                if (i > 3 ? v->mb_type[0][s->block_index[i] - s->block_wrap[i] - 1] :
+                            v->mb_type[0][s->block_index[i] - 2 * s->block_wrap[i] - 2]) {
+                    dest = s->dest[0] + ((i & 2) - 4) * 4 * s->linesize + ((i & 1) - 2) * 8;
+                    if (put_signed)
+                        s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][block_map[i]],
+                                                          i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
+                                                          i > 3 ? s->uvlinesize : s->linesize);
+                    else
+                        s->idsp.put_pixels_clamped(v->block[v->topleft_blk_idx][block_map[i]],
+                                                   i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
+                                                   i > 3 ? s->uvlinesize : s->linesize);
+                }
             }
         }
-        if (s->mb_x == s->mb_width - 1) {
-            top_mb_pos = (s->mb_y - 1) * s->mb_stride + s->mb_x;
-            if (v->fcm == ILACE_FRAME)
-                fieldtx = v->fieldtx_plane[top_mb_pos];
-            stride_y   = s->linesize << fieldtx;
-            v_dist     = fieldtx ? 15 : 8;
-            s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][0],
-                                              s->dest[0] - 16 * s->linesize,
-                                              stride_y);
-            s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][1],
-                                              s->dest[0] - 16 * s->linesize + 8,
-                                              stride_y);
-            s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][2],
-                                              s->dest[0] - v_dist * s->linesize,
-                                              stride_y);
-            s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][3],
-                                              s->dest[0] - v_dist * s->linesize + 8,
-                                              stride_y);
-            if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
-            s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][4],
-                                              s->dest[1] - 8 * s->uvlinesize,
-                                              s->uvlinesize);
-            s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][5],
-                                              s->dest[2] - 8 * s->uvlinesize,
-                                              s->uvlinesize);
+        if (s->mb_x == v->end_mb_x - 1) {
+            for (i = 0; i < block_count; i++) {
+                if (i > 3 ? v->mb_type[0][s->block_index[i] - s->block_wrap[i]] :
+                            v->mb_type[0][s->block_index[i] - 2 * s->block_wrap[i]]) {
+                    dest = s->dest[0] + ((i & 2) - 4) * 4 * s->linesize + (i & 1) * 8;
+                    if (put_signed)
+                        s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][block_map[i]],
+                                                          i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
+                                                          i > 3 ? s->uvlinesize : s->linesize);
+                    else
+                        s->idsp.put_pixels_clamped(v->block[v->top_blk_idx][block_map[i]],
+                                                   i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
+                                                   i > 3 ? s->uvlinesize : s->linesize);
+                }
             }
         }
     }
+    if (s->mb_y == s->end_mb_y - 1 || v->fcm == ILACE_FRAME) {
+        if (s->mb_x) {
+            if (v->fcm == ILACE_FRAME)
+                fieldtx = v->fieldtx_plane[s->mb_y * s->mb_stride + s->mb_x - 1];
+            for (i = 0; i < block_count; i++) {
+                if (i > 3 ? v->mb_type[0][s->block_index[i] - 1] :
+                            v->mb_type[0][s->block_index[i] - 2]) {
+                    if (fieldtx)
+                        dest = s->dest[0] + ((i & 2) >> 1) * s->linesize + ((i & 1) - 2) * 8;
+                    else
+                        dest = s->dest[0] + (i & 2) * 4 * s->linesize + ((i & 1) - 2) * 8;
+                    if (put_signed)
+                        s->idsp.put_signed_pixels_clamped(v->block[v->left_blk_idx][block_map[i]],
+                                                          i > 3 ? s->dest[i - 3] - 8 : dest,
+                                                          i > 3 ? s->uvlinesize : s->linesize << fieldtx);
+                    else
+                        s->idsp.put_pixels_clamped(v->block[v->left_blk_idx][block_map[i]],
+                                                   i > 3 ? s->dest[i - 3] - 8 : dest,
+                                                   i > 3 ? s->uvlinesize : s->linesize << fieldtx);
+                }
+            }
+        }
+        if (s->mb_x == v->end_mb_x - 1) {
+            if (v->fcm == ILACE_FRAME)
+                fieldtx = v->fieldtx_plane[s->mb_y * s->mb_stride + s->mb_x];
+            for (i = 0; i < block_count; i++) {
+                if (v->mb_type[0][s->block_index[i]]) {
+                    if (fieldtx)
+                        dest = s->dest[0] + ((i & 2) >> 1) * s->linesize + (i & 1) * 8;
+                    else
+                        dest = s->dest[0] + (i & 2) * 4 * s->linesize + (i & 1) * 8;
+                    if (put_signed)
+                        s->idsp.put_signed_pixels_clamped(v->block[v->cur_blk_idx][block_map[i]],
+                                                          i > 3 ? s->dest[i - 3] : dest,
+                                                          i > 3 ? s->uvlinesize : s->linesize << fieldtx);
+                    else
+                        s->idsp.put_pixels_clamped(v->block[v->cur_blk_idx][block_map[i]],
+                                                   i > 3 ? s->dest[i - 3] : dest,
+                                                   i > 3 ? s->uvlinesize : s->linesize << fieldtx);
+                }
+            }
+        }
+    }
+}
 
 #define inc_blk_idx(idx) do { \
         idx++; \
@@ -143,12 +167,6 @@
             idx = 0; \
     } while (0)
 
-    inc_blk_idx(v->topleft_blk_idx);
-    inc_blk_idx(v->top_blk_idx);
-    inc_blk_idx(v->left_blk_idx);
-    inc_blk_idx(v->cur_blk_idx);
-}
-
 /***********************************************************************/
 /**
  * @name VC-1 Block-level functions
@@ -165,13 +183,13 @@
         int edges = 0;                                         \
         if (v->dqprofile == DQPROFILE_ALL_MBS) {               \
             if (v->dqbilevel) {                                \
-                mquant = (get_bits1(gb)) ? v->altpq : v->pq;   \
+                mquant = (get_bits1(gb)) ? -v->altpq : v->pq;  \
             } else {                                           \
                 mqdiff = get_bits(gb, 3);                      \
                 if (mqdiff != 7)                               \
-                    mquant = v->pq + mqdiff;                   \
+                    mquant = -v->pq - mqdiff;                  \
                 else                                           \
-                    mquant = get_bits(gb, 5);                  \
+                    mquant = -get_bits(gb, 5);                 \
             }                                                  \
         }                                                      \
         if (v->dqprofile == DQPROFILE_SINGLE_EDGE)             \
@@ -181,14 +199,15 @@
         else if (v->dqprofile == DQPROFILE_FOUR_EDGES)         \
             edges = 15;                                        \
         if ((edges&1) && !s->mb_x)                             \
-            mquant = v->altpq;                                 \
-        if ((edges&2) && s->first_slice_line)                  \
-            mquant = v->altpq;                                 \
+            mquant = -v->altpq;                                \
+        if ((edges&2) && !s->mb_y)                             \
+            mquant = -v->altpq;                                \
         if ((edges&4) && s->mb_x == (s->mb_width - 1))         \
-            mquant = v->altpq;                                 \
-        if ((edges&8) && s->mb_y == (s->mb_height - 1))        \
-            mquant = v->altpq;                                 \
-        if (!mquant || mquant > 31) {                          \
+            mquant = -v->altpq;                                \
+        if ((edges&8) &&                                       \
+            s->mb_y == ((s->mb_height >> v->field_mode) - 1))  \
+            mquant = -v->altpq;                                \
+        if (!mquant || mquant > 31 || mquant < -31) {                          \
             av_log(v->s.avctx, AV_LOG_ERROR,                   \
                    "Overriding invalid mquant %d\n", mquant);  \
             mquant = 1;                                        \
@@ -393,7 +412,7 @@
     int dqscale_index;
 
     /* scale predictors if needed */
-    q1 = s->current_picture.qscale_table[mb_pos];
+    q1 = FFABS(s->current_picture.qscale_table[mb_pos]);
     dqscale_index = s->y_dc_scale_table[q1] - 1;
     if (dqscale_index < 0)
         return 0;
@@ -409,12 +428,12 @@
     a = dc_val[ - wrap];
 
     if (c_avail && (n != 1 && n != 3)) {
-        q2 = s->current_picture.qscale_table[mb_pos - 1];
+        q2 = FFABS(s->current_picture.qscale_table[mb_pos - 1]);
         if (q2 && q2 != q1)
             c = (c * s->y_dc_scale_table[q2] * ff_vc1_dqscale[dqscale_index] + 0x20000) >> 18;
     }
     if (a_avail && (n != 2 && n != 3)) {
-        q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride];
+        q2 = FFABS(s->current_picture.qscale_table[mb_pos - s->mb_stride]);
         if (q2 && q2 != q1)
             a = (a * s->y_dc_scale_table[q2] * ff_vc1_dqscale[dqscale_index] + 0x20000) >> 18;
     }
@@ -424,7 +443,7 @@
             off--;
         if (n != 2)
             off -= s->mb_stride;
-        q2 = s->current_picture.qscale_table[off];
+        q2 = FFABS(s->current_picture.qscale_table[off]);
         if (q2 && q2 != q1)
             b = (b * s->y_dc_scale_table[q2] * ff_vc1_dqscale[dqscale_index] + 0x20000) >> 18;
     }
@@ -594,7 +613,7 @@
         scale = s->c_dc_scale;
     block[0] = dcdiff * scale;
 
-    ac_val  = s->ac_val[0][0] + s->block_index[n] * 16;
+    ac_val  = s->ac_val[0][s->block_index[n]];
     ac_val2 = ac_val;
     if (dc_pred_dir) // left
         ac_val -= 16;
@@ -705,6 +724,7 @@
     int scale;
     int q1, q2 = 0;
     int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+    int quant = FFABS(mquant);
 
     /* Get DC differential */
     if (n < 4) {
@@ -717,7 +737,7 @@
         return -1;
     }
     if (dcdiff) {
-        const int m = (mquant == 1 || mquant == 2) ? 3 - mquant : 0;
+        const int m = (quant == 1 || quant == 2) ? 3 - quant : 0;
         if (dcdiff == 119 /* ESC index value */) {
             dcdiff = get_bits(gb, 8 + m);
         } else {
@@ -729,7 +749,7 @@
     }
 
     /* Prediction */
-    dcdiff += ff_vc1_pred_dc(&v->s, v->overlap, mquant, n, v->a_avail, v->c_avail, &dc_val, &dc_pred_dir);
+    dcdiff += ff_vc1_pred_dc(&v->s, v->overlap, quant, n, v->a_avail, v->c_avail, &dc_val, &dc_pred_dir);
     *dc_val = dcdiff;
 
     /* Store the quantized DC coeff, used for prediction */
@@ -743,9 +763,9 @@
     if (!a_avail && !c_avail)
         use_pred = 0;
 
-    scale = mquant * 2 + ((mquant == v->pq) ? v->halfpq : 0);
+    scale = quant * 2 + ((mquant < 0) ? 0 : v->halfpq);
 
-    ac_val  = s->ac_val[0][0] + s->block_index[n] * 16;
+    ac_val  = s->ac_val[0][s->block_index[n]];
     ac_val2 = ac_val;
     if (dc_pred_dir) // left
         ac_val -= 16;
@@ -809,11 +829,12 @@
                 ac_val += 8;
             }
             /* scale predictors if needed*/
+            q1 = FFABS(q1) * 2 + ((q1 < 0) ? 0 : v->halfpq) - 1;
+            if (q1 < 1)
+                return AVERROR_INVALIDDATA;
+            if (q2)
+                q2 = FFABS(q2) * 2 + ((q2 < 0) ? 0 : v->halfpq) - 1;
             if (q2 && q1 != q2) {
-                q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
-                if (q1 < 1)
-                    return AVERROR_INVALIDDATA;
-                q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
                 for (k = 1; k < 8; k++)
                     block[k << sh] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
             } else {
@@ -832,7 +853,7 @@
             if (block[k]) {
                 block[k] *= scale;
                 if (!v->pquantizer)
-                    block[k] += (block[k] < 0) ? -mquant : mquant;
+                    block[k] += (block[k] < 0) ? -quant : quant;
             }
 
     } else { // no AC coeffs
@@ -851,18 +872,19 @@
                 ac_val2 += 8;
             }
             memcpy(ac_val2, ac_val, 8 * 2);
+            q1 = FFABS(q1) * 2 + ((q1 < 0) ? 0 : v->halfpq) - 1;
+            if (q1 < 1)
+                return AVERROR_INVALIDDATA;
+            if (q2)
+                q2 = FFABS(q2) * 2 + ((q2 < 0) ? 0 : v->halfpq) - 1;
             if (q2 && q1 != q2) {
-                q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
-                q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
-                if (q1 < 1)
-                    return AVERROR_INVALIDDATA;
                 for (k = 1; k < 8; k++)
                     ac_val2[k] = (ac_val2[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
             }
             for (k = 1; k < 8; k++) {
                 block[k << sh] = ac_val2[k] * scale;
                 if (!v->pquantizer && block[k << sh])
-                    block[k << sh] += (block[k << sh] < 0) ? -mquant : mquant;
+                    block[k << sh] += (block[k << sh] < 0) ? -quant : quant;
             }
         }
     }
@@ -895,15 +917,16 @@
     int use_pred = s->ac_pred;
     int scale;
     int q1, q2 = 0;
+    int quant = FFABS(mquant);
 
     s->bdsp.clear_block(block);
 
     /* XXX: Guard against dumb values of mquant */
-    mquant = av_clip_uintp2(mquant, 5);
+    quant = av_clip_uintp2(quant, 5);
 
     /* Set DC scale - y and c use the same */
-    s->y_dc_scale = s->y_dc_scale_table[mquant];
-    s->c_dc_scale = s->c_dc_scale_table[mquant];
+    s->y_dc_scale = s->y_dc_scale_table[quant];
+    s->c_dc_scale = s->c_dc_scale_table[quant];
 
     /* Get DC differential */
     if (n < 4) {
@@ -916,7 +939,7 @@
         return -1;
     }
     if (dcdiff) {
-        const int m = (mquant == 1 || mquant == 2) ? 3 - mquant : 0;
+        const int m = (quant == 1 || quant == 2) ? 3 - quant : 0;
         if (dcdiff == 119 /* ESC index value */) {
             dcdiff = get_bits(gb, 8 + m);
         } else {
@@ -928,7 +951,7 @@
     }
 
     /* Prediction */
-    dcdiff += ff_vc1_pred_dc(&v->s, v->overlap, mquant, n, a_avail, c_avail, &dc_val, &dc_pred_dir);
+    dcdiff += ff_vc1_pred_dc(&v->s, v->overlap, quant, n, a_avail, c_avail, &dc_val, &dc_pred_dir);
     *dc_val = dcdiff;
 
     /* Store the quantized DC coeff, used for prediction */
@@ -946,10 +969,10 @@
     if (!a_avail) dc_pred_dir = 1;
     if (!c_avail) dc_pred_dir = 0;
     if (!a_avail && !c_avail) use_pred = 0;
-    ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
+    ac_val = s->ac_val[0][s->block_index[n]];
     ac_val2 = ac_val;
 
-    scale = mquant * 2 + v->halfpq;
+    scale = quant * 2 + ((mquant < 0) ? 0 : v->halfpq);
 
     if (dc_pred_dir) //left
         ac_val -= 16;
@@ -993,12 +1016,12 @@
         /* apply AC prediction if needed */
         if (use_pred) {
             /* scale predictors if needed*/
+            q1 = FFABS(q1) * 2 + ((q1 < 0) ? 0 : v->halfpq) - 1;
+            if (q1 < 1)
+                return AVERROR_INVALIDDATA;
+            if (q2)
+                q2 = FFABS(q2) * 2 + ((q2 < 0) ? 0 : v->halfpq) - 1;
             if (q2 && q1 != q2) {
-                q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
-                q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
-
-                if (q1 < 1)
-                    return AVERROR_INVALIDDATA;
                 if (dc_pred_dir) { // left
                     for (k = 1; k < 8; k++)
                         block[k << v->left_blk_sh] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
@@ -1027,7 +1050,7 @@
             if (block[k]) {
                 block[k] *= scale;
                 if (!v->pquantizer)
-                    block[k] += (block[k] < 0) ? -mquant : mquant;
+                    block[k] += (block[k] < 0) ? -quant : quant;
             }
 
         if (use_pred) i = 63;
@@ -1038,11 +1061,12 @@
         if (dc_pred_dir) { // left
             if (use_pred) {
                 memcpy(ac_val2, ac_val, 8 * 2);
+                q1 = FFABS(q1) * 2 + ((q1 < 0) ? 0 : v->halfpq) - 1;
+                if (q1 < 1)
+                    return AVERROR_INVALIDDATA;
+                if (q2)
+                    q2 = FFABS(q2) * 2 + ((q2 < 0) ? 0 : v->halfpq) - 1;
                 if (q2 && q1 != q2) {
-                    q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
-                    q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
-                    if (q1 < 1)
-                        return AVERROR_INVALIDDATA;
                     for (k = 1; k < 8; k++)
                         ac_val2[k] = (ac_val2[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
                 }
@@ -1050,11 +1074,12 @@
         } else { // top
             if (use_pred) {
                 memcpy(ac_val2 + 8, ac_val + 8, 8 * 2);
+                q1 = FFABS(q1) * 2 + ((q1 < 0) ? 0 : v->halfpq) - 1;
+                if (q1 < 1)
+                    return AVERROR_INVALIDDATA;
+                if (q2)
+                    q2 = FFABS(q2) * 2 + ((q2 < 0) ? 0 : v->halfpq) - 1;
                 if (q2 && q1 != q2) {
-                    q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
-                    q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
-                    if (q1 < 1)
-                        return AVERROR_INVALIDDATA;
                     for (k = 1; k < 8; k++)
                         ac_val2[k + 8] = (ac_val2[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
                 }
@@ -1067,13 +1092,13 @@
                 for (k = 1; k < 8; k++) {
                     block[k << v->left_blk_sh] = ac_val2[k] * scale;
                     if (!v->pquantizer && block[k << v->left_blk_sh])
-                        block[k << v->left_blk_sh] += (block[k << v->left_blk_sh] < 0) ? -mquant : mquant;
+                        block[k << v->left_blk_sh] += (block[k << v->left_blk_sh] < 0) ? -quant : quant;
                 }
             } else { // top
                 for (k = 1; k < 8; k++) {
                     block[k << v->top_blk_sh] = ac_val2[k + 8] * scale;
                     if (!v->pquantizer && block[k << v->top_blk_sh])
-                        block[k << v->top_blk_sh] += (block[k << v->top_blk_sh] < 0) ? -mquant : mquant;
+                        block[k << v->top_blk_sh] += (block[k << v->top_blk_sh] < 0) ? -quant : quant;
                 }
             }
             i = 63;
@@ -1098,6 +1123,7 @@
     int scale, off, idx, last, skip, value;
     int ttblk = ttmb & 7;
     int pat = 0;
+    int quant = FFABS(mquant);
 
     s->bdsp.clear_block(block);
 
@@ -1118,7 +1144,7 @@
         if (ttblk == TT_4X8_RIGHT || ttblk == TT_4X8_LEFT)
             ttblk = TT_4X8;
     }
-    scale = 2 * mquant + ((v->pq == mquant) ? v->halfpq : 0);
+    scale = quant * 2 + ((mquant < 0) ? 0 : v->halfpq);
 
     // convert transforms like 8X4_TOP to generic TT and SUBBLKPAT
     if (ttblk == TT_8X4_TOP || ttblk == TT_8X4_BOTTOM) {
@@ -1145,7 +1171,7 @@
                 idx = v->zzi_8x8[i++];
             block[idx] = value * scale;
             if (!v->pquantizer)
-                block[idx] += (block[idx] < 0) ? -mquant : mquant;
+                block[idx] += (block[idx] < 0) ? -quant : quant;
         }
         if (!skip_block) {
             if (i == 1)
@@ -1173,7 +1199,7 @@
                     idx = ff_vc1_adv_interlaced_4x4_zz[i++];
                 block[idx + off] = value * scale;
                 if (!v->pquantizer)
-                    block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant;
+                    block[idx + off] += (block[idx + off] < 0) ? -quant : quant;
             }
             if (!(subblkpat & (1 << (3 - j))) && !skip_block) {
                 if (i == 1)
@@ -1200,7 +1226,7 @@
                     idx = ff_vc1_adv_interlaced_8x4_zz[i++] + off;
                 block[idx] = value * scale;
                 if (!v->pquantizer)
-                    block[idx] += (block[idx] < 0) ? -mquant : mquant;
+                    block[idx] += (block[idx] < 0) ? -quant : quant;
             }
             if (!(subblkpat & (1 << (1 - j))) && !skip_block) {
                 if (i == 1)
@@ -1227,7 +1253,7 @@
                     idx = ff_vc1_adv_interlaced_4x8_zz[i++] + off;
                 block[idx] = value * scale;
                 if (!v->pquantizer)
-                    block[idx] += (block[idx] < 0) ? -mquant : mquant;
+                    block[idx] += (block[idx] < 0) ? -quant : quant;
             }
             if (!(subblkpat & (1 << (1 - j))) && !skip_block) {
                 if (i == 1)
@@ -1325,28 +1351,18 @@
                     if (i == 1 || i == 3 || s->mb_x)
                         v->c_avail = v->mb_type[0][s->block_index[i] - 1];
 
-                    vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+                    vc1_decode_intra_block(v, v->block[v->cur_blk_idx][block_map[i]], i, val, mquant,
                                            (i & 4) ? v->codingset2 : v->codingset);
                     if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                         continue;
-                    v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
+                    v->vc1dsp.vc1_inv_trans_8x8(v->block[v->cur_blk_idx][block_map[i]]);
                     if (v->rangeredfrm)
                         for (j = 0; j < 64; j++)
-                            s->block[i][j] <<= 1;
-                    s->idsp.put_signed_pixels_clamped(s->block[i],
-                                                      s->dest[dst_idx] + off,
-                                                      i & 4 ? s->uvlinesize
-                                                            : s->linesize);
-                    if (v->pq >= 9 && v->overlap) {
-                        if (v->c_avail)
-                            v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
-                        if (v->a_avail)
-                            v->vc1dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
-                    }
+                            v->block[v->cur_blk_idx][block_map[i]][j] <<= 1;
                     block_cbp   |= 0xF << (i << 2);
                     block_intra |= 1 << i;
                 } else if (val) {
-                    pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block,
+                    pat = vc1_decode_p_block(v, v->block[v->cur_blk_idx][block_map[i]], i, mquant, ttmb, first_block,
                                              s->dest[dst_idx] + off, (i & 4) ? s->uvlinesize : s->linesize,
                                              CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY), &block_tt);
                     block_cbp |= pat << (i << 2);
@@ -1436,28 +1452,18 @@
                     if (i == 1 || i == 3 || s->mb_x)
                         v->c_avail = v->mb_type[0][s->block_index[i] - 1];
 
-                    vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant,
+                    vc1_decode_intra_block(v, v->block[v->cur_blk_idx][block_map[i]], i, is_coded[i], mquant,
                                            (i & 4) ? v->codingset2 : v->codingset);
                     if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                         continue;
-                    v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
+                    v->vc1dsp.vc1_inv_trans_8x8(v->block[v->cur_blk_idx][block_map[i]]);
                     if (v->rangeredfrm)
                         for (j = 0; j < 64; j++)
-                            s->block[i][j] <<= 1;
-                    s->idsp.put_signed_pixels_clamped(s->block[i],
-                                                      s->dest[dst_idx] + off,
-                                                      (i & 4) ? s->uvlinesize
-                                                              : s->linesize);
-                    if (v->pq >= 9 && v->overlap) {
-                        if (v->c_avail)
-                            v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
-                        if (v->a_avail)
-                            v->vc1dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
-                    }
+                            v->block[v->cur_blk_idx][block_map[i]][j] <<= 1;
                     block_cbp   |= 0xF << (i << 2);
                     block_intra |= 1 << i;
                 } else if (is_coded[i]) {
-                    pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+                    pat = vc1_decode_p_block(v, v->block[v->cur_blk_idx][block_map[i]], i, mquant, ttmb,
                                              first_block, s->dest[dst_idx] + off,
                                              (i & 4) ? s->uvlinesize : s->linesize,
                                              CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY),
@@ -1484,6 +1490,10 @@
         }
     }
 end:
+    if (v->overlap && v->pq >= 9)
+        ff_vc1_p_overlap_filter(v);
+    vc1_put_blocks_clamped(v, 1);
+
     v->cbp[s->mb_x]      = block_cbp;
     v->ttblk[s->mb_x]    = block_tt;
     v->is_intra[s->mb_x] = block_intra;
@@ -1511,7 +1521,7 @@
     int skipped, fourmv = 0, twomv = 0;
     int block_cbp = 0, pat, block_tt = 0;
     int idx_mbmode = 0, mvbp;
-    int stride_y, fieldtx;
+    int fieldtx;
 
     mquant = v->pq; /* Lossy initialization */
 
@@ -1570,8 +1580,8 @@
             GET_MQUANT();
             s->current_picture.qscale_table[mb_pos] = mquant;
             /* Set DC scale - y and c use the same (not sure if necessary here) */
-            s->y_dc_scale = s->y_dc_scale_table[mquant];
-            s->c_dc_scale = s->c_dc_scale_table[mquant];
+            s->y_dc_scale = s->y_dc_scale_table[FFABS(mquant)];
+            s->c_dc_scale = s->c_dc_scale_table[FFABS(mquant)];
             dst_idx = 0;
             for (i = 0; i < 6; i++) {
                 v->a_avail = v->c_avail          = 0;
@@ -1584,22 +1594,16 @@
                 if (i == 1 || i == 3 || s->mb_x)
                     v->c_avail = v->mb_type[0][s->block_index[i] - 1];
 
-                vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+                vc1_decode_intra_block(v, v->block[v->cur_blk_idx][block_map[i]], i, val, mquant,
                                        (i & 4) ? v->codingset2 : v->codingset);
                 if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                     continue;
-                v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
-                if (i < 4) {
-                    stride_y = s->linesize << fieldtx;
+                v->vc1dsp.vc1_inv_trans_8x8(v->block[v->cur_blk_idx][block_map[i]]);
+                if (i < 4)
                     off = (fieldtx) ? ((i & 1) * 8) + ((i & 2) >> 1) * s->linesize : (i & 1) * 8 + 4 * (i & 2) * s->linesize;
-                } else {
-                    stride_y = s->uvlinesize;
+                else
                     off = 0;
-                }
-                s->idsp.put_signed_pixels_clamped(s->block[i],
-                                                  s->dest[dst_idx] + off,
-                                                  stride_y);
-                //TODO: loop filter
+                block_cbp |= 0xf << (i << 2);
             }
 
         } else { // inter MB
@@ -1670,7 +1674,7 @@
                 else
                     off = (i & 4) ? 0 : ((i & 1) * 8 + ((i > 1) * s->linesize));
                 if (val) {
-                    pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+                    pat = vc1_decode_p_block(v, v->block[v->cur_blk_idx][block_map[i]], i, mquant, ttmb,
                                              first_block, s->dest[dst_idx] + off,
                                              (i & 4) ? s->uvlinesize : (s->linesize << fieldtx),
                                              CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY), &block_tt);
@@ -1695,9 +1699,15 @@
         v->blk_mv_type[s->block_index[3]] = 0;
         ff_vc1_pred_mv_intfr(v, 0, 0, 0, 1, v->range_x, v->range_y, v->mb_type[0], 0);
         ff_vc1_mc_1mv(v, 0);
+        v->fieldtx_plane[mb_pos] = 0;
     }
-    if (s->mb_x == s->mb_width - 1)
-        memmove(v->is_intra_base, v->is_intra, sizeof(v->is_intra_base[0])*s->mb_stride);
+    if (v->overlap && v->pq >= 9)
+        ff_vc1_p_overlap_filter(v);
+    vc1_put_blocks_clamped(v, 1);
+
+    v->cbp[s->mb_x]      = block_cbp;
+    v->ttblk[s->mb_x]    = block_tt;
+
     return 0;
 }
 
@@ -1732,8 +1742,8 @@
         GET_MQUANT();
         s->current_picture.qscale_table[mb_pos] = mquant;
         /* Set DC scale - y and c use the same (not sure if necessary here) */
-        s->y_dc_scale = s->y_dc_scale_table[mquant];
-        s->c_dc_scale = s->c_dc_scale_table[mquant];
+        s->y_dc_scale = s->y_dc_scale_table[FFABS(mquant)];
+        s->c_dc_scale = s->c_dc_scale_table[FFABS(mquant)];
         v->s.ac_pred  = v->acpred_plane[mb_pos] = get_bits1(gb);
         mb_has_coeffs = idx_mbmode & 1;
         if (mb_has_coeffs)
@@ -1750,17 +1760,13 @@
             if (i == 1 || i == 3 || s->mb_x)
                 v->c_avail = v->mb_type[0][s->block_index[i] - 1];
 
-            vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+            vc1_decode_intra_block(v, v->block[v->cur_blk_idx][block_map[i]], i, val, mquant,
                                    (i & 4) ? v->codingset2 : v->codingset);
             if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                 continue;
-            v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
+            v->vc1dsp.vc1_inv_trans_8x8(v->block[v->cur_blk_idx][block_map[i]]);
             off  = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize);
-            s->idsp.put_signed_pixels_clamped(s->block[i],
-                                              s->dest[dst_idx] + off,
-                                              (i & 4) ? s->uvlinesize
-                                                      : s->linesize);
-            // TODO: loop filter
+            block_cbp |= 0xf << (i << 2);
         }
     } else {
         s->mb_intra = v->is_intra[s->mb_x] = 0;
@@ -1803,7 +1809,7 @@
             val = ((cbp >> (5 - i)) & 1);
             off = (i & 4) ? 0 : (i & 1) * 8 + (i & 2) * 4 * s->linesize;
             if (val) {
-                pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+                pat = vc1_decode_p_block(v, v->block[v->cur_blk_idx][block_map[i]], i, mquant, ttmb,
                                          first_block, s->dest[dst_idx] + off,
                                          (i & 4) ? s->uvlinesize : s->linesize,
                                          CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY),
@@ -1815,8 +1821,13 @@
             }
         }
     }
-    if (s->mb_x == s->mb_width - 1)
-        memmove(v->is_intra_base, v->is_intra, sizeof(v->is_intra_base[0]) * s->mb_stride);
+    if (v->overlap && v->pq >= 9)
+        ff_vc1_p_overlap_filter(v);
+    vc1_put_blocks_clamped(v, 1);
+
+    v->cbp[s->mb_x]      = block_cbp;
+    v->ttblk[s->mb_x]    = block_tt;
+
     return 0;
 }
 
@@ -1993,6 +2004,7 @@
     int fwd;
     int dmv_x[2], dmv_y[2], pred_flag[2];
     int bmvtype = BMV_TYPE_BACKWARD;
+    int block_cbp = 0, pat, block_tt = 0;
     int idx_mbmode;
 
     mquant      = v->pq; /* Lossy initialization */
@@ -2008,8 +2020,8 @@
         GET_MQUANT();
         s->current_picture.qscale_table[mb_pos] = mquant;
         /* Set DC scale - y and c use the same (not sure if necessary here) */
-        s->y_dc_scale = s->y_dc_scale_table[mquant];
-        s->c_dc_scale = s->c_dc_scale_table[mquant];
+        s->y_dc_scale = s->y_dc_scale_table[FFABS(mquant)];
+        s->c_dc_scale = s->c_dc_scale_table[FFABS(mquant)];
         v->s.ac_pred  = v->acpred_plane[mb_pos] = get_bits1(gb);
         mb_has_coeffs = idx_mbmode & 1;
         if (mb_has_coeffs)
@@ -2039,7 +2051,6 @@
                                               s->dest[dst_idx] + off,
                                               (i & 4) ? s->uvlinesize
                                                       : s->linesize);
-            // TODO: yet to perform loop filter
         }
     } else {
         s->mb_intra = v->is_intra[s->mb_x] = 0;
@@ -2123,16 +2134,19 @@
             val = ((cbp >> (5 - i)) & 1);
             off = (i & 4) ? 0 : (i & 1) * 8 + (i & 2) * 4 * s->linesize;
             if (val) {
-                vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
-                                   first_block, s->dest[dst_idx] + off,
-                                   (i & 4) ? s->uvlinesize : s->linesize,
-                                   CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY), NULL);
+                pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+                                         first_block, s->dest[dst_idx] + off,
+                                         (i & 4) ? s->uvlinesize : s->linesize,
+                                         CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY), &block_tt);
+                block_cbp |= pat << (i << 2);
                 if (!v->ttmbf && ttmb < 8)
                     ttmb = -1;
                 first_block = 0;
             }
         }
     }
+    v->cbp[s->mb_x]      = block_cbp;
+    v->ttblk[s->mb_x]    = block_tt;
 }
 
 /** Decode one B-frame MB (in interlaced frame B picture)
@@ -2182,41 +2196,6 @@
         }
     }
 
-    if (v->dmb_is_raw)
-        direct = get_bits1(gb);
-    else
-        direct = v->direct_mb_plane[mb_pos];
-
-    if (direct) {
-        if (s->next_picture_ptr->field_picture)
-            av_log(s->avctx, AV_LOG_WARNING, "Mixed frame/field direct mode not supported\n");
-        s->mv[0][0][0] = s->current_picture.motion_val[0][s->block_index[0]][0] = scale_mv(s->next_picture.motion_val[1][s->block_index[0]][0], v->bfraction, 0, s->quarter_sample);
-        s->mv[0][0][1] = s->current_picture.motion_val[0][s->block_index[0]][1] = scale_mv(s->next_picture.motion_val[1][s->block_index[0]][1], v->bfraction, 0, s->quarter_sample);
-        s->mv[1][0][0] = s->current_picture.motion_val[1][s->block_index[0]][0] = scale_mv(s->next_picture.motion_val[1][s->block_index[0]][0], v->bfraction, 1, s->quarter_sample);
-        s->mv[1][0][1] = s->current_picture.motion_val[1][s->block_index[0]][1] = scale_mv(s->next_picture.motion_val[1][s->block_index[0]][1], v->bfraction, 1, s->quarter_sample);
-
-        if (twomv) {
-            s->mv[0][2][0] = s->current_picture.motion_val[0][s->block_index[2]][0] = scale_mv(s->next_picture.motion_val[1][s->block_index[2]][0], v->bfraction, 0, s->quarter_sample);
-            s->mv[0][2][1] = s->current_picture.motion_val[0][s->block_index[2]][1] = scale_mv(s->next_picture.motion_val[1][s->block_index[2]][1], v->bfraction, 0, s->quarter_sample);
-            s->mv[1][2][0] = s->current_picture.motion_val[1][s->block_index[2]][0] = scale_mv(s->next_picture.motion_val[1][s->block_index[2]][0], v->bfraction, 1, s->quarter_sample);
-            s->mv[1][2][1] = s->current_picture.motion_val[1][s->block_index[2]][1] = scale_mv(s->next_picture.motion_val[1][s->block_index[2]][1], v->bfraction, 1, s->quarter_sample);
-
-            for (i = 1; i < 4; i += 2) {
-                s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0] = s->mv[0][i-1][0];
-                s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1] = s->mv[0][i-1][1];
-                s->mv[1][i][0] = s->current_picture.motion_val[1][s->block_index[i]][0] = s->mv[1][i-1][0];
-                s->mv[1][i][1] = s->current_picture.motion_val[1][s->block_index[i]][1] = s->mv[1][i-1][1];
-            }
-        } else {
-            for (i = 1; i < 4; i++) {
-                s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0] = s->mv[0][0][0];
-                s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1] = s->mv[0][0][1];
-                s->mv[1][i][0] = s->current_picture.motion_val[1][s->block_index[i]][0] = s->mv[1][0][0];
-                s->mv[1][i][1] = s->current_picture.motion_val[1][s->block_index[i]][1] = s->mv[1][0][1];
-            }
-        }
-    }
-
     if (ff_vc1_mbmode_intfrp[0][idx_mbmode][0] == MV_PMODE_INTFR_INTRA) { // intra MB
         for (i = 0; i < 4; i++) {
             s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0] = 0;
@@ -2235,8 +2214,8 @@
         GET_MQUANT();
         s->current_picture.qscale_table[mb_pos] = mquant;
         /* Set DC scale - y and c use the same (not sure if necessary here) */
-        s->y_dc_scale = s->y_dc_scale_table[mquant];
-        s->c_dc_scale = s->c_dc_scale_table[mquant];
+        s->y_dc_scale = s->y_dc_scale_table[FFABS(mquant)];
+        s->c_dc_scale = s->c_dc_scale_table[FFABS(mquant)];
         dst_idx = 0;
         for (i = 0; i < 6; i++) {
             v->a_avail = v->c_avail          = 0;
@@ -2267,6 +2246,42 @@
         }
     } else {
         s->mb_intra = v->is_intra[s->mb_x] = 0;
+
+        if (v->dmb_is_raw)
+            direct = get_bits1(gb);
+        else
+            direct = v->direct_mb_plane[mb_pos];
+
+        if (direct) {
+            if (s->next_picture_ptr->field_picture)
+                av_log(s->avctx, AV_LOG_WARNING, "Mixed frame/field direct mode not supported\n");
+            s->mv[0][0][0] = s->current_picture.motion_val[0][s->block_index[0]][0] = scale_mv(s->next_picture.motion_val[1][s->block_index[0]][0], v->bfraction, 0, s->quarter_sample);
+            s->mv[0][0][1] = s->current_picture.motion_val[0][s->block_index[0]][1] = scale_mv(s->next_picture.motion_val[1][s->block_index[0]][1], v->bfraction, 0, s->quarter_sample);
+            s->mv[1][0][0] = s->current_picture.motion_val[1][s->block_index[0]][0] = scale_mv(s->next_picture.motion_val[1][s->block_index[0]][0], v->bfraction, 1, s->quarter_sample);
+            s->mv[1][0][1] = s->current_picture.motion_val[1][s->block_index[0]][1] = scale_mv(s->next_picture.motion_val[1][s->block_index[0]][1], v->bfraction, 1, s->quarter_sample);
+
+            if (twomv) {
+                s->mv[0][2][0] = s->current_picture.motion_val[0][s->block_index[2]][0] = scale_mv(s->next_picture.motion_val[1][s->block_index[2]][0], v->bfraction, 0, s->quarter_sample);
+                s->mv[0][2][1] = s->current_picture.motion_val[0][s->block_index[2]][1] = scale_mv(s->next_picture.motion_val[1][s->block_index[2]][1], v->bfraction, 0, s->quarter_sample);
+                s->mv[1][2][0] = s->current_picture.motion_val[1][s->block_index[2]][0] = scale_mv(s->next_picture.motion_val[1][s->block_index[2]][0], v->bfraction, 1, s->quarter_sample);
+                s->mv[1][2][1] = s->current_picture.motion_val[1][s->block_index[2]][1] = scale_mv(s->next_picture.motion_val[1][s->block_index[2]][1], v->bfraction, 1, s->quarter_sample);
+
+                for (i = 1; i < 4; i += 2) {
+                    s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0] = s->mv[0][i-1][0];
+                    s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1] = s->mv[0][i-1][1];
+                    s->mv[1][i][0] = s->current_picture.motion_val[1][s->block_index[i]][0] = s->mv[1][i-1][0];
+                    s->mv[1][i][1] = s->current_picture.motion_val[1][s->block_index[i]][1] = s->mv[1][i-1][1];
+                }
+            } else {
+                for (i = 1; i < 4; i++) {
+                    s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0] = s->mv[0][0][0];
+                    s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1] = s->mv[0][0][1];
+                    s->mv[1][i][0] = s->current_picture.motion_val[1][s->block_index[i]][0] = s->mv[1][0][0];
+                    s->mv[1][i][1] = s->current_picture.motion_val[1][s->block_index[i]][1] = s->mv[1][0][1];
+                }
+            }
+        }
+
         if (!direct) {
             if (skipped || !s->mb_intra) {
                 bmvtype = decode012(gb);
@@ -2473,12 +2488,12 @@
             if (direct || bmvtype == BMV_TYPE_INTERPOLATED) {
                 ff_vc1_interp_mc(v);
             }
+            v->fieldtx_plane[mb_pos] = 0;
         }
     }
-    if (s->mb_x == s->mb_width - 1)
-        memmove(v->is_intra_base, v->is_intra, sizeof(v->is_intra_base[0]) * s->mb_stride);
     v->cbp[s->mb_x]      = block_cbp;
     v->ttblk[s->mb_x]    = block_tt;
+
     return 0;
 }
 
@@ -2525,30 +2540,27 @@
     s->mb_x = s->mb_y = 0;
     s->mb_intra         = 1;
     s->first_slice_line = 1;
-    for (s->mb_y = 0; s->mb_y < s->end_mb_y; s->mb_y++) {
+    for (s->mb_y = s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
         s->mb_x = 0;
         init_block_index(v);
         for (; s->mb_x < v->end_mb_x; s->mb_x++) {
-            uint8_t *dst[6];
             ff_update_block_index(s);
-            dst[0] = s->dest[0];
-            dst[1] = dst[0] + 8;
-            dst[2] = s->dest[0] + s->linesize * 8;
-            dst[3] = dst[2] + 8;
-            dst[4] = s->dest[1];
-            dst[5] = s->dest[2];
-            s->bdsp.clear_blocks(s->block[0]);
+            s->bdsp.clear_blocks(v->block[v->cur_blk_idx][0]);
             mb_pos = s->mb_x + s->mb_y * s->mb_width;
             s->current_picture.mb_type[mb_pos]                     = MB_TYPE_INTRA;
             s->current_picture.qscale_table[mb_pos]                = v->pq;
-            s->current_picture.motion_val[1][s->block_index[0]][0] = 0;
-            s->current_picture.motion_val[1][s->block_index[0]][1] = 0;
+            for (int i = 0; i < 4; i++) {
+                s->current_picture.motion_val[1][s->block_index[i]][0] = 0;
+                s->current_picture.motion_val[1][s->block_index[i]][1] = 0;
+            }
 
             // do actual MB decoding and displaying
             cbp = get_vlc2(&v->s.gb, ff_msmp4_mb_i_vlc.table, MB_INTRA_VLC_BITS, 2);
             v->s.ac_pred = get_bits1(&v->s.gb);
 
             for (k = 0; k < 6; k++) {
+                v->mb_type[0][s->block_index[k]] = 1;
+
                 val = ((cbp >> (5 - k)) & 1);
 
                 if (k < 4) {
@@ -2558,52 +2570,30 @@
                 }
                 cbp |= val << (5 - k);
 
-                vc1_decode_i_block(v, s->block[k], k, val, (k < 4) ? v->codingset : v->codingset2);
+                vc1_decode_i_block(v, v->block[v->cur_blk_idx][block_map[k]], k, val, (k < 4) ? v->codingset : v->codingset2);
 
                 if (CONFIG_GRAY && k > 3 && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                     continue;
-                v->vc1dsp.vc1_inv_trans_8x8(s->block[k]);
-                if (v->pq >= 9 && v->overlap) {
-                    if (v->rangeredfrm)
-                        for (j = 0; j < 64; j++)
-                            s->block[k][j] <<= 1;
-                    s->idsp.put_signed_pixels_clamped(s->block[k], dst[k],
-                                                      k & 4 ? s->uvlinesize
-                                                            : s->linesize);
-                } else {
-                    if (v->rangeredfrm)
-                        for (j = 0; j < 64; j++)
-                            s->block[k][j] = (s->block[k][j] - 64) << 1;
-                    s->idsp.put_pixels_clamped(s->block[k], dst[k],
-                                               k & 4 ? s->uvlinesize
-                                                     : s->linesize);
-                }
+                v->vc1dsp.vc1_inv_trans_8x8(v->block[v->cur_blk_idx][block_map[k]]);
             }
 
-            if (v->pq >= 9 && v->overlap) {
-                if (s->mb_x) {
-                    v->vc1dsp.vc1_h_overlap(s->dest[0], s->linesize);
-                    v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
-                    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
-                        v->vc1dsp.vc1_h_overlap(s->dest[1], s->uvlinesize);
-                        v->vc1dsp.vc1_h_overlap(s->dest[2], s->uvlinesize);
-                    }
-                }
-                v->vc1dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize);
-                v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
-                if (!s->first_slice_line) {
-                    v->vc1dsp.vc1_v_overlap(s->dest[0], s->linesize);
-                    v->vc1dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize);
-                    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
-                        v->vc1dsp.vc1_v_overlap(s->dest[1], s->uvlinesize);
-                        v->vc1dsp.vc1_v_overlap(s->dest[2], s->uvlinesize);
-                    }
-                }
-                v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
-                v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
+            if (v->overlap && v->pq >= 9) {
+                ff_vc1_i_overlap_filter(v);
+                if (v->rangeredfrm)
+                    for (k = 0; k < 6; k++)
+                        for (j = 0; j < 64; j++)
+                            v->block[v->cur_blk_idx][block_map[k]][j] <<= 1;
+                vc1_put_blocks_clamped(v, 1);
+            } else {
+                if (v->rangeredfrm)
+                    for (k = 0; k < 6; k++)
+                        for (j = 0; j < 64; j++)
+                            v->block[v->cur_blk_idx][block_map[k]][j] = (v->block[v->cur_blk_idx][block_map[k]][j] - 64) << 1;
+                vc1_put_blocks_clamped(v, 0);
             }
+
             if (v->s.loop_filter)
-                ff_vc1_loop_filter_iblk(v, v->pq);
+                ff_vc1_i_loop_filter(v);
 
             if (get_bits_count(&s->gb) > v->bits) {
                 ff_er_add_slice(&s->er, 0, 0, s->mb_x, s->mb_y, ER_MB_ERROR);
@@ -2611,6 +2601,11 @@
                        get_bits_count(&s->gb), v->bits);
                 return;
             }
+
+            v->topleft_blk_idx = (v->topleft_blk_idx + 1) % (v->end_mb_x + 2);
+            v->top_blk_idx = (v->top_blk_idx + 1) % (v->end_mb_x + 2);
+            v->left_blk_idx = (v->left_blk_idx + 1) % (v->end_mb_x + 2);
+            v->cur_blk_idx = (v->cur_blk_idx + 1) % (v->end_mb_x + 2);
         }
         if (!v->s.loop_filter)
             ff_mpeg_draw_horiz_band(s, s->mb_y * 16, 16);
@@ -2636,7 +2631,7 @@
     int cbp, val;
     uint8_t *coded_val;
     int mb_pos;
-    int mquant = v->pq;
+    int mquant;
     int mqdiff;
     GetBitContext *gb = &s->gb;
 
@@ -2680,13 +2675,15 @@
         s->mb_x = 0;
         init_block_index(v);
         for (;s->mb_x < s->mb_width; s->mb_x++) {
-            int16_t (*block)[64] = v->block[v->cur_blk_idx];
+            mquant = v->pq;
             ff_update_block_index(s);
-            s->bdsp.clear_blocks(block[0]);
+            s->bdsp.clear_blocks(v->block[v->cur_blk_idx][0]);
             mb_pos = s->mb_x + s->mb_y * s->mb_stride;
             s->current_picture.mb_type[mb_pos + v->mb_off]                         = MB_TYPE_INTRA;
-            s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = 0;
-            s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][1] = 0;
+            for (int i = 0; i < 4; i++) {
+                s->current_picture.motion_val[1][s->block_index[i] + v->blocks_off][0] = 0;
+                s->current_picture.motion_val[1][s->block_index[i] + v->blocks_off][1] = 0;
+            }
 
             // do actual MB decoding and displaying
             if (v->fieldtx_is_raw)
@@ -2704,10 +2701,12 @@
 
             s->current_picture.qscale_table[mb_pos] = mquant;
             /* Set DC scale - y and c use the same */
-            s->y_dc_scale = s->y_dc_scale_table[mquant];
-            s->c_dc_scale = s->c_dc_scale_table[mquant];
+            s->y_dc_scale = s->y_dc_scale_table[FFABS(mquant)];
+            s->c_dc_scale = s->c_dc_scale_table[FFABS(mquant)];
 
             for (k = 0; k < 6; k++) {
+                v->mb_type[0][s->block_index[k]] = 1;
+
                 val = ((cbp >> (5 - k)) & 1);
 
                 if (k < 4) {
@@ -2720,18 +2719,19 @@
                 v->a_avail = !s->first_slice_line || (k == 2 || k == 3);
                 v->c_avail = !!s->mb_x || (k == 1 || k == 3);
 
-                vc1_decode_i_block_adv(v, block[k], k, val,
+                vc1_decode_i_block_adv(v, v->block[v->cur_blk_idx][block_map[k]], k, val,
                                        (k < 4) ? v->codingset : v->codingset2, mquant);
 
                 if (CONFIG_GRAY && k > 3 && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                     continue;
-                v->vc1dsp.vc1_inv_trans_8x8(block[k]);
+                v->vc1dsp.vc1_inv_trans_8x8(v->block[v->cur_blk_idx][block_map[k]]);
             }
 
-            ff_vc1_smooth_overlap_filter_iblk(v);
-            vc1_put_signed_blocks_clamped(v);
+            if (v->overlap && (v->pq >= 9 || v->condover != CONDOVER_NONE))
+                ff_vc1_i_overlap_filter(v);
+            vc1_put_blocks_clamped(v, 1);
             if (v->s.loop_filter)
-                ff_vc1_loop_filter_iblk_delayed(v, v->pq);
+                ff_vc1_i_loop_filter(v);
 
             if (get_bits_count(&s->gb) > v->bits) {
                 // TODO: may need modification to handle slice coding
@@ -2740,6 +2740,10 @@
                        get_bits_count(&s->gb), v->bits);
                 return;
             }
+            inc_blk_idx(v->topleft_blk_idx);
+            inc_blk_idx(v->top_blk_idx);
+            inc_blk_idx(v->left_blk_idx);
+            inc_blk_idx(v->cur_blk_idx);
         }
         if (!v->s.loop_filter)
             ff_mpeg_draw_horiz_band(s, s->mb_y * 16, 16);
@@ -2748,15 +2752,6 @@
         s->first_slice_line = 0;
     }
 
-    /* raw bottom MB row */
-    s->mb_x = 0;
-    init_block_index(v);
-    for (; s->mb_x < s->mb_width; s->mb_x++) {
-        ff_update_block_index(s);
-        vc1_put_signed_blocks_clamped(v);
-        if (v->s.loop_filter)
-            ff_vc1_loop_filter_iblk_delayed(v, v->pq);
-    }
     if (v->s.loop_filter)
         ff_mpeg_draw_horiz_band(s, (s->end_mb_y - 1) * 16, 16);
     ff_er_add_slice(&s->er, 0, s->start_mb_y << v->field_mode, s->mb_width - 1,
@@ -2793,23 +2788,28 @@
         break;
     }
 
-    apply_loop_filter   = s->loop_filter && !(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY) &&
-                          v->fcm == PROGRESSIVE;
+    apply_loop_filter   = s->loop_filter && !(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY);
     s->first_slice_line = 1;
-    memset(v->cbp_base, 0, sizeof(v->cbp_base[0])*2*s->mb_stride);
+    memset(v->cbp_base, 0, sizeof(v->cbp_base[0]) * 3 * s->mb_stride);
     for (s->mb_y = s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
         s->mb_x = 0;
         init_block_index(v);
         for (; s->mb_x < s->mb_width; s->mb_x++) {
             ff_update_block_index(s);
 
-            if (v->fcm == ILACE_FIELD)
+            if (v->fcm == ILACE_FIELD) {
                 vc1_decode_p_mb_intfi(v);
-            else if (v->fcm == ILACE_FRAME)
+                if (apply_loop_filter)
+                    ff_vc1_p_loop_filter(v);
+            } else if (v->fcm == ILACE_FRAME) {
                 vc1_decode_p_mb_intfr(v);
-            else vc1_decode_p_mb(v);
-            if (s->mb_y != s->start_mb_y && apply_loop_filter)
-                ff_vc1_apply_p_loop_filter(v);
+                if (apply_loop_filter)
+                    ff_vc1_p_intfr_loop_filter(v);
+            } else {
+                vc1_decode_p_mb(v);
+                if (apply_loop_filter)
+                    ff_vc1_p_loop_filter(v);
+            }
             if (get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) {
                 // TODO: may need modification to handle slice coding
                 ff_er_add_slice(&s->er, 0, s->start_mb_y, s->mb_x, s->mb_y, ER_MB_ERROR);
@@ -2817,23 +2817,27 @@
                        get_bits_count(&s->gb), v->bits, s->mb_x, s->mb_y);
                 return;
             }
+            inc_blk_idx(v->topleft_blk_idx);
+            inc_blk_idx(v->top_blk_idx);
+            inc_blk_idx(v->left_blk_idx);
+            inc_blk_idx(v->cur_blk_idx);
         }
-        memmove(v->cbp_base,      v->cbp,      sizeof(v->cbp_base[0])      * s->mb_stride);
-        memmove(v->ttblk_base,    v->ttblk,    sizeof(v->ttblk_base[0])    * s->mb_stride);
-        memmove(v->is_intra_base, v->is_intra, sizeof(v->is_intra_base[0]) * s->mb_stride);
-        memmove(v->luma_mv_base,  v->luma_mv,  sizeof(v->luma_mv_base[0])  * s->mb_stride);
+        memmove(v->cbp_base,
+                v->cbp - s->mb_stride,
+                sizeof(v->cbp_base[0]) * 2 * s->mb_stride);
+        memmove(v->ttblk_base,
+                v->ttblk - s->mb_stride,
+                sizeof(v->ttblk_base[0]) * 2 * s->mb_stride);
+        memmove(v->is_intra_base,
+                v->is_intra - s->mb_stride,
+                sizeof(v->is_intra_base[0]) * 2 * s->mb_stride);
+        memmove(v->luma_mv_base,
+                v->luma_mv - s->mb_stride,
+                sizeof(v->luma_mv_base[0]) * 2 * s->mb_stride);
         if (s->mb_y != s->start_mb_y)
             ff_mpeg_draw_horiz_band(s, (s->mb_y - 1) * 16, 16);
         s->first_slice_line = 0;
     }
-    if (apply_loop_filter) {
-        s->mb_x = 0;
-        init_block_index(v);
-        for (; s->mb_x < s->mb_width; s->mb_x++) {
-            ff_update_block_index(s);
-            ff_vc1_apply_p_loop_filter(v);
-        }
-    }
     if (s->end_mb_y >= s->start_mb_y)
         ff_mpeg_draw_horiz_band(s, (s->end_mb_y - 1) * 16, 16);
     ff_er_add_slice(&s->er, 0, s->start_mb_y << v->field_mode, s->mb_width - 1,
@@ -2876,12 +2880,19 @@
         for (; s->mb_x < s->mb_width; s->mb_x++) {
             ff_update_block_index(s);
 
-            if (v->fcm == ILACE_FIELD)
+            if (v->fcm == ILACE_FIELD) {
                 vc1_decode_b_mb_intfi(v);
-            else if (v->fcm == ILACE_FRAME)
+                if (v->s.loop_filter)
+                    ff_vc1_b_intfi_loop_filter(v);
+            } else if (v->fcm == ILACE_FRAME) {
                 vc1_decode_b_mb_intfr(v);
-            else
+                if (v->s.loop_filter)
+                    ff_vc1_p_intfr_loop_filter(v);
+            } else {
                 vc1_decode_b_mb(v);
+                if (v->s.loop_filter)
+                    ff_vc1_i_loop_filter(v);
+            }
             if (get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) {
                 // TODO: may need modification to handle slice coding
                 ff_er_add_slice(&s->er, 0, s->start_mb_y, s->mb_x, s->mb_y, ER_MB_ERROR);
@@ -2889,9 +2900,16 @@
                        get_bits_count(&s->gb), v->bits, s->mb_x, s->mb_y);
                 return;
             }
-            if (v->s.loop_filter)
-                ff_vc1_loop_filter_iblk(v, v->pq);
         }
+        memmove(v->cbp_base,
+                v->cbp - s->mb_stride,
+                sizeof(v->cbp_base[0]) * 2 * s->mb_stride);
+        memmove(v->ttblk_base,
+                v->ttblk - s->mb_stride,
+                sizeof(v->ttblk_base[0]) * 2 * s->mb_stride);
+        memmove(v->is_intra_base,
+                v->is_intra - s->mb_stride,
+                sizeof(v->is_intra_base[0]) * 2 * s->mb_stride);
         if (!v->s.loop_filter)
             ff_mpeg_draw_horiz_band(s, s->mb_y * 16, 16);
         else if (s->mb_y)

diff --git a/libavcodec/vc1_loopfilter.c b/libavcodec/vc1_loopfilter.c
index 025776b..0f990cc 100644
--- a/libavcodec/vc1_loopfilter.c
+++ b/libavcodec/vc1_loopfilter.c

@@ -31,329 +31,1211 @@
 #include "vc1.h"
 #include "vc1dsp.h"
 
-void ff_vc1_loop_filter_iblk(VC1Context *v, int pq)
+static av_always_inline void vc1_h_overlap_filter(VC1Context *v, int16_t (*left_block)[64],
+                                                  int16_t (*right_block)[64], int left_fieldtx,
+                                                  int right_fieldtx, int block_num)
 {
-    MpegEncContext *s = &v->s;
-    int j;
-    if (!s->first_slice_line) {
-        v->vc1dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq);
-        if (s->mb_x)
-            v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
-        v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
-        if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
-        for (j = 0; j < 2; j++) {
-            v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1], s->uvlinesize, pq);
-            if (s->mb_x)
-                v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
-        }
-    }
-    v->vc1dsp.vc1_v_loop_filter16(s->dest[0] + 8 * s->linesize, s->linesize, pq);
+    switch (block_num) {
+    case 0:
+        v->vc1dsp.vc1_h_s_overlap(left_block[2],
+                                  right_block[0],
+                                  left_fieldtx ^ right_fieldtx ? 16 - 8 * left_fieldtx : 8,
+                                  left_fieldtx ^ right_fieldtx ? 16 - 8 * right_fieldtx : 8,
+                                  left_fieldtx || right_fieldtx ? 0 : 1);
+        break;
 
-    if (s->mb_y == s->end_mb_y - 1) {
-        if (s->mb_x) {
-            v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq);
-            if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
-            v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq);
-            v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq);
-            }
-        }
-        v->vc1dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq);
+    case 1:
+        v->vc1dsp.vc1_h_s_overlap(right_block[0],
+                                  right_block[2],
+                                  8,
+                                  8,
+                                  right_fieldtx ? 0 : 1);
+        break;
+
+    case 2:
+        v->vc1dsp.vc1_h_s_overlap(!left_fieldtx && right_fieldtx ? left_block[2] + 8 : left_block[3],
+                                  left_fieldtx && !right_fieldtx ? right_block[0] + 8 : right_block[1],
+                                  left_fieldtx ^ right_fieldtx ? 16 - 8 * left_fieldtx : 8,
+                                  left_fieldtx ^ right_fieldtx ? 16 - 8 * right_fieldtx : 8,
+                                  left_fieldtx || right_fieldtx ? 2 : 1);
+        break;
+
+    case 3:
+        v->vc1dsp.vc1_h_s_overlap(right_block[1],
+                                  right_block[3],
+                                  8,
+                                  8,
+                                  right_fieldtx ? 2 : 1);
+        break;
+
+    case 4:
+    case 5:
+        v->vc1dsp.vc1_h_s_overlap(left_block[block_num], right_block[block_num], 8, 8, 1);
+        break;
     }
 }
 
-void ff_vc1_loop_filter_iblk_delayed(VC1Context *v, int pq)
+static av_always_inline void vc1_v_overlap_filter(VC1Context *v, int16_t (*top_block)[64],
+                                                  int16_t (*bottom_block)[64], int block_num)
 {
-    MpegEncContext *s = &v->s;
-    int j;
+    switch (block_num) {
+    case 0:
+        v->vc1dsp.vc1_v_s_overlap(top_block[1], bottom_block[0]);
+        break;
 
-    /* The loopfilter runs 1 row and 1 column behind the overlap filter, which
-     * means it runs two rows/cols behind the decoding loop. */
-    if (!s->first_slice_line) {
-        if (s->mb_x) {
-            if (s->mb_y >= s->start_mb_y + 2) {
-                v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
+    case 1:
+        v->vc1dsp.vc1_v_s_overlap(top_block[3], bottom_block[2]);
+        break;
 
-                if (s->mb_x >= 2)
-                    v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 16, s->linesize, pq);
-                v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 8, s->linesize, pq);
-                if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
-                for (j = 0; j < 2; j++) {
-                    v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
-                    if (s->mb_x >= 2) {
-                        v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize - 8, s->uvlinesize, pq);
-                    }
-                }
-            }
-            v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize - 16, s->linesize, pq);
-        }
+    case 2:
+        v->vc1dsp.vc1_v_s_overlap(bottom_block[0], bottom_block[1]);
+        break;
 
-        if (s->mb_x == s->mb_width - 1) {
-            if (s->mb_y >= s->start_mb_y + 2) {
-                v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
+    case 3:
+        v->vc1dsp.vc1_v_s_overlap(bottom_block[2], bottom_block[3]);
+        break;
 
-                if (s->mb_x)
-                    v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize, s->linesize, pq);
-                v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize + 8, s->linesize, pq);
-                if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
-                for (j = 0; j < 2; j++) {
-                    v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
-                    if (s->mb_x >= 2) {
-                        v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize, s->uvlinesize, pq);
-                    }
-                }
-            }
-            v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize, s->linesize, pq);
-        }
-
-        if (s->mb_y == s->end_mb_y) {
-            if (s->mb_x) {
-                if (s->mb_x >= 2)
-                    v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
-                v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 8, s->linesize, pq);
-                if (s->mb_x >= 2 && (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))) {
-                    for (j = 0; j < 2; j++) {
-                        v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
-                    }
-                }
-            }
-
-            if (s->mb_x == s->mb_width - 1) {
-                if (s->mb_x)
-                    v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
-                v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
-                if (s->mb_x && (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))) {
-                    for (j = 0; j < 2; j++) {
-                        v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
-                    }
-                }
-            }
-        }
+    case 4:
+    case 5:
+        v->vc1dsp.vc1_v_s_overlap(top_block[block_num], bottom_block[block_num]);
+        break;
     }
 }
 
-void ff_vc1_smooth_overlap_filter_iblk(VC1Context *v)
+void ff_vc1_i_overlap_filter(VC1Context *v)
 {
     MpegEncContext *s = &v->s;
-    int mb_pos;
+    int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64];
+    int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
+    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+    int i;
 
-    if (v->condover == CONDOVER_NONE)
-        return;
-
-    mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+    topleft_blk = v->block[v->topleft_blk_idx];
+    top_blk = v->block[v->top_blk_idx];
+    left_blk = v->block[v->left_blk_idx];
+    cur_blk = v->block[v->cur_blk_idx];
 
     /* Within a MB, the horizontal overlap always runs before the vertical.
-     * To accomplish that, we run the H on left and internal borders of the
-     * currently decoded MB. Then, we wait for the next overlap iteration
-     * to do H overlap on the right edge of this MB, before moving over and
-     * running the V overlap. Therefore, the V overlap makes us trail by one
-     * MB col and the H overlap filter makes us trail by one MB row. This
-     * is reflected in the time at which we run the put_pixels loop. */
-    if (v->condover == CONDOVER_ALL || v->pq >= 9 || v->over_flags_plane[mb_pos]) {
-        if (s->mb_x && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
-                        v->over_flags_plane[mb_pos - 1])) {
-            v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][1],
-                                      v->block[v->cur_blk_idx][0]);
-            v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][3],
-                                      v->block[v->cur_blk_idx][2]);
-            if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
-                v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][4],
-                                          v->block[v->cur_blk_idx][4]);
-                v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][5],
-                                          v->block[v->cur_blk_idx][5]);
-            }
-        }
-        v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][0],
-                                  v->block[v->cur_blk_idx][1]);
-        v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][2],
-                                  v->block[v->cur_blk_idx][3]);
+     * To accomplish that, we run the H on the left and internal vertical
+     * borders of the currently decoded MB. Then, we wait for the next overlap
+     * iteration to do H overlap on the right edge of this MB, before moving
+     * over and running the V overlap on the top and internal horizontal
+     * borders. Therefore, the H overlap trails by one MB col and the
+     * V overlap trails by one MB row. This is reflected in the time at which
+     * we run the put_pixels loop, i.e. delayed by one row and one column. */
+    for (i = 0; i < block_count; i++) {
+        if (s->mb_x == 0 && (i & 5) != 1)
+            continue;
 
-        if (s->mb_x == s->mb_width - 1) {
-            if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
-                                         v->over_flags_plane[mb_pos - s->mb_stride])) {
-                v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][2],
-                                          v->block[v->cur_blk_idx][0]);
-                v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][3],
-                                          v->block[v->cur_blk_idx][1]);
-                if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
-                    v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][4],
-                                              v->block[v->cur_blk_idx][4]);
-                    v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][5],
-                                              v->block[v->cur_blk_idx][5]);
-                }
-            }
-            v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][0],
-                                      v->block[v->cur_blk_idx][2]);
-            v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][1],
-                                      v->block[v->cur_blk_idx][3]);
-        }
+        if (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
+                           (v->condover == CONDOVER_ALL ||
+                            (v->over_flags_plane[mb_pos] &&
+                             ((i & 5) == 1 || v->over_flags_plane[mb_pos - 1])))))
+            vc1_h_overlap_filter(v,
+                                 s->mb_x ? left_blk : cur_blk, cur_blk,
+                                 v->fcm == ILACE_FRAME && s->mb_x && v->fieldtx_plane[mb_pos - 1],
+                                 v->fcm == ILACE_FRAME && v->fieldtx_plane[mb_pos],
+                                 i);
     }
-    if (s->mb_x && (v->condover == CONDOVER_ALL || v->over_flags_plane[mb_pos - 1])) {
-        if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
-                                     v->over_flags_plane[mb_pos - s->mb_stride - 1])) {
-            v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][2],
-                                      v->block[v->left_blk_idx][0]);
-            v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][3],
-                                      v->block[v->left_blk_idx][1]);
-            if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
-                v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][4],
-                                          v->block[v->left_blk_idx][4]);
-                v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][5],
-                                          v->block[v->left_blk_idx][5]);
-            }
+
+    if (v->fcm != ILACE_FRAME)
+        for (i = 0; i < block_count; i++) {
+            if (s->first_slice_line && !(i & 2))
+                continue;
+
+            if (s->mb_x &&
+                (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
+                                (v->condover == CONDOVER_ALL ||
+                                 (v->over_flags_plane[mb_pos - 1] &&
+                                  ((i & 2) || v->over_flags_plane[mb_pos - 1 - s->mb_stride]))))))
+                vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
+            if (s->mb_x == s->mb_width - 1 &&
+                (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
+                                (v->condover == CONDOVER_ALL ||
+                                 (v->over_flags_plane[mb_pos] &&
+                                  ((i & 2) || v->over_flags_plane[mb_pos - s->mb_stride]))))))
+                vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
         }
-        v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][0],
-                                  v->block[v->left_blk_idx][2]);
-        v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][1],
-                                  v->block[v->left_blk_idx][3]);
-    }
 }
 
-static av_always_inline void vc1_apply_p_v_loop_filter(VC1Context *v, int block_num)
-{
-    MpegEncContext *s  = &v->s;
-    int mb_cbp         = v->cbp[s->mb_x - s->mb_stride],
-        block_cbp      = mb_cbp      >> (block_num * 4), bottom_cbp,
-        mb_is_intra    = v->is_intra[s->mb_x - s->mb_stride],
-        block_is_intra = mb_is_intra >> block_num, bottom_is_intra;
-    int idx, linesize  = block_num > 3 ? s->uvlinesize : s->linesize, ttblk;
-    uint8_t *dst;
-
-    if (block_num > 3) {
-        dst      = s->dest[block_num - 3];
-    } else {
-        dst      = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 8) * linesize;
-    }
-    if (s->mb_y != s->end_mb_y || block_num < 2) {
-        int16_t (*mv)[2];
-        int mv_stride;
-
-        if (block_num > 3) {
-            bottom_cbp      = v->cbp[s->mb_x]      >> (block_num * 4);
-            bottom_is_intra = v->is_intra[s->mb_x] >> block_num;
-            mv              = &v->luma_mv[s->mb_x - s->mb_stride];
-            mv_stride       = s->mb_stride;
-        } else {
-            bottom_cbp      = (block_num < 2) ? (mb_cbp               >> ((block_num + 2) * 4))
-                                              : (v->cbp[s->mb_x]      >> ((block_num - 2) * 4));
-            bottom_is_intra = (block_num < 2) ? (mb_is_intra          >> (block_num + 2))
-                                              : (v->is_intra[s->mb_x] >> (block_num - 2));
-            mv_stride       = s->b8_stride;
-            mv              = &s->current_picture.motion_val[0][s->block_index[block_num] - 2 * mv_stride];
-        }
-
-        if (bottom_is_intra & 1 || block_is_intra & 1 ||
-            mv[0][0] != mv[mv_stride][0] || mv[0][1] != mv[mv_stride][1]) {
-            v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
-        } else {
-            idx = ((bottom_cbp >> 2) | block_cbp) & 3;
-            if (idx == 3) {
-                v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
-            } else if (idx) {
-                if (idx == 1)
-                    v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq);
-                else
-                    v->vc1dsp.vc1_v_loop_filter4(dst,     linesize, v->pq);
-            }
-        }
-    }
-
-    dst -= 4 * linesize;
-    ttblk = (v->ttblk[s->mb_x - s->mb_stride] >> (block_num * 4)) & 0xF;
-    if (ttblk == TT_4X4 || ttblk == TT_8X4) {
-        idx = (block_cbp | (block_cbp >> 2)) & 3;
-        if (idx == 3) {
-            v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
-        } else if (idx) {
-            if (idx == 1)
-                v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq);
-            else
-                v->vc1dsp.vc1_v_loop_filter4(dst,     linesize, v->pq);
-        }
-    }
-}
-
-static av_always_inline void vc1_apply_p_h_loop_filter(VC1Context *v, int block_num)
-{
-    MpegEncContext *s  = &v->s;
-    int mb_cbp         = v->cbp[s->mb_x - 1 - s->mb_stride],
-        block_cbp      = mb_cbp      >> (block_num * 4), right_cbp,
-        mb_is_intra    = v->is_intra[s->mb_x - 1 - s->mb_stride],
-        block_is_intra = mb_is_intra >> block_num, right_is_intra;
-    int idx, linesize  = block_num > 3 ? s->uvlinesize : s->linesize, ttblk;
-    uint8_t *dst;
-
-    if (block_num > 3) {
-        dst = s->dest[block_num - 3] - 8 * linesize;
-    } else {
-        dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 16) * linesize - 8;
-    }
-
-    if (s->mb_x != s->mb_width || !(block_num & 5)) {
-        int16_t (*mv)[2];
-
-        if (block_num > 3) {
-            right_cbp      = v->cbp[s->mb_x - s->mb_stride] >> (block_num * 4);
-            right_is_intra = v->is_intra[s->mb_x - s->mb_stride] >> block_num;
-            mv             = &v->luma_mv[s->mb_x - s->mb_stride - 1];
-        } else {
-            right_cbp      = (block_num & 1) ? (v->cbp[s->mb_x - s->mb_stride]      >> ((block_num - 1) * 4))
-                                             : (mb_cbp                              >> ((block_num + 1) * 4));
-            right_is_intra = (block_num & 1) ? (v->is_intra[s->mb_x - s->mb_stride] >> (block_num - 1))
-                                             : (mb_is_intra                         >> (block_num + 1));
-            mv             = &s->current_picture.motion_val[0][s->block_index[block_num] - s->b8_stride * 2 - 2];
-        }
-        if (block_is_intra & 1 || right_is_intra & 1 || mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1]) {
-            v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
-        } else {
-            idx = ((right_cbp >> 1) | block_cbp) & 5; // FIXME check
-            if (idx == 5) {
-                v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
-            } else if (idx) {
-                if (idx == 1)
-                    v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize, linesize, v->pq);
-                else
-                    v->vc1dsp.vc1_h_loop_filter4(dst,                linesize, v->pq);
-            }
-        }
-    }
-
-    dst -= 4;
-    ttblk = (v->ttblk[s->mb_x - s->mb_stride - 1] >> (block_num * 4)) & 0xf;
-    if (ttblk == TT_4X4 || ttblk == TT_4X8) {
-        idx = (block_cbp | (block_cbp >> 1)) & 5;
-        if (idx == 5) {
-            v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
-        } else if (idx) {
-            if (idx == 1)
-                v->vc1dsp.vc1_h_loop_filter4(dst + linesize * 4, linesize, v->pq);
-            else
-                v->vc1dsp.vc1_h_loop_filter4(dst,                linesize, v->pq);
-        }
-    }
-}
-
-void ff_vc1_apply_p_loop_filter(VC1Context *v)
+void ff_vc1_p_overlap_filter(VC1Context *v)
 {
     MpegEncContext *s = &v->s;
-    int i;
+    int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64];
     int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
+    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+    int i;
+
+    topleft_blk = v->block[v->topleft_blk_idx];
+    top_blk = v->block[v->top_blk_idx];
+    left_blk = v->block[v->left_blk_idx];
+    cur_blk = v->block[v->cur_blk_idx];
 
     for (i = 0; i < block_count; i++) {
-        vc1_apply_p_v_loop_filter(v, i);
+        if (s->mb_x == 0 && (i & 5) != 1)
+            continue;
+
+        if (v->mb_type[0][s->block_index[i]] && v->mb_type[0][s->block_index[i] - 1])
+            vc1_h_overlap_filter(v,
+                                 s->mb_x ? left_blk : cur_blk, cur_blk,
+                                 v->fcm == ILACE_FRAME && s->mb_x && v->fieldtx_plane[mb_pos - 1],
+                                 v->fcm == ILACE_FRAME && v->fieldtx_plane[mb_pos],
+                                 i);
     }
 
-    /* V always precedes H, therefore we run H one MB before V;
-     * at the end of a row, we catch up to complete the row */
-    if (s->mb_x) {
+    if (v->fcm != ILACE_FRAME)
         for (i = 0; i < block_count; i++) {
-            vc1_apply_p_h_loop_filter(v, i);
+            if (s->first_slice_line && !(i & 2))
+                continue;
+
+            if (s->mb_x && v->mb_type[0][s->block_index[i] - 2 + (i > 3)] &&
+                v->mb_type[0][s->block_index[i] - s->block_wrap[i] - 2 + (i > 3)])
+                vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
+            if (s->mb_x == s->mb_width - 1)
+                if (v->mb_type[0][s->block_index[i]] &&
+                    v->mb_type[0][s->block_index[i] - s->block_wrap[i]])
+                    vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
+        }
+}
+
+#define LEFT_EDGE   (1 << 0)
+#define RIGHT_EDGE  (1 << 1)
+#define TOP_EDGE    (1 << 2)
+#define BOTTOM_EDGE (1 << 3)
+
+static av_always_inline void vc1_i_h_loop_filter(VC1Context *v, uint8_t *dest,
+                                                 uint32_t flags, int block_num)
+{
+    MpegEncContext *s  = &v->s;
+    int pq = v->pq;
+    uint8_t *dst;
+
+    if (block_num & 2)
+        return;
+
+    if (!(flags & LEFT_EDGE) || (block_num & 5) == 1) {
+        if (block_num > 3)
+            dst = dest;
+        else
+            dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+        if (v->fcm == ILACE_FRAME)
+            if (block_num > 3) {
+                v->vc1dsp.vc1_h_loop_filter4(dst, 2 * s->uvlinesize, pq);
+                v->vc1dsp.vc1_h_loop_filter4(dst + s->uvlinesize, 2 * s->uvlinesize, pq);
+            } else {
+                v->vc1dsp.vc1_h_loop_filter8(dst, 2 * s->linesize, pq);
+                v->vc1dsp.vc1_h_loop_filter8(dst + s->linesize, 2 * s->linesize, pq);
+            }
+        else
+            if (block_num > 3)
+                v->vc1dsp.vc1_h_loop_filter8(dst, s->uvlinesize, pq);
+            else
+                v->vc1dsp.vc1_h_loop_filter16(dst, s->linesize, pq);
+    }
+}
+
+static av_always_inline void vc1_i_v_loop_filter(VC1Context *v, uint8_t *dest,
+                                                 uint32_t flags, uint8_t fieldtx,
+                                                 int block_num)
+{
+    MpegEncContext *s  = &v->s;
+    int pq = v->pq;
+    uint8_t *dst;
+
+    if ((block_num & 5) == 1)
+        return;
+
+    if (!(flags & TOP_EDGE) || block_num & 2) {
+        if (block_num > 3)
+            dst = dest;
+        else
+            dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+        if (v->fcm == ILACE_FRAME) {
+            if (block_num > 3) {
+                v->vc1dsp.vc1_v_loop_filter8(dst, 2 * s->uvlinesize, pq);
+                v->vc1dsp.vc1_v_loop_filter8(dst + s->uvlinesize, 2 * s->uvlinesize, pq);
+            } else if (block_num < 2 || !fieldtx) {
+                v->vc1dsp.vc1_v_loop_filter16(dst, 2 * s->linesize, pq);
+                v->vc1dsp.vc1_v_loop_filter16(dst + s->linesize, 2 * s->linesize, pq);
+            }
+        } else
+            if (block_num > 3)
+                v->vc1dsp.vc1_v_loop_filter8(dst, s->uvlinesize, pq);
+            else
+                v->vc1dsp.vc1_v_loop_filter16(dst, s->linesize, pq);
+    }
+}
+
+void ff_vc1_i_loop_filter(VC1Context *v)
+{
+    MpegEncContext *s = &v->s;
+    int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
+    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+    uint8_t *dest, fieldtx;
+    uint32_t flags = 0;
+    int i;
+
+    /* Within a MB, the vertical loop filter always runs before the horizontal.
+     * To accomplish that, we run the V loop filter on top and internal
+     * horizontal borders of the last overlap filtered MB. Then, we wait for
+     * the loop filter iteration on the next row to do V loop filter on the
+     * bottom edge of this MB, before moving over and running the H loop
+     * filter on the left and internal vertical borders. Therefore, the loop
+     * filter trails by one row and one column relative to the overlap filter
+     * and two rows and two columns relative to the decoding loop. */
+    if (!s->first_slice_line) {
+        dest = s->dest[0] - 16 * s->linesize - 16;
+        flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
+        if (s->mb_x) {
+            fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
+            for (i = 0; i < block_count; i++)
+                vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, fieldtx, i);
+        }
+        if (s->mb_x == v->end_mb_x - 1) {
+            dest += 16;
+            fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
+            for (i = 0; i < block_count; i++)
+                vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, flags, fieldtx, i);
+        }
+    }
+    if (s->mb_y == s->end_mb_y - 1) {
+        dest = s->dest[0] - 16;
+        flags = s->first_slice_line ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
+        if (s->mb_x) {
+            fieldtx = v->fieldtx_plane[mb_pos - 1];
+            for (i = 0; i < block_count; i++)
+                vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, fieldtx, i);
+        }
+        if (s->mb_x == v->end_mb_x - 1) {
+            dest += 16;
+            fieldtx = v->fieldtx_plane[mb_pos];
+            for (i = 0; i < block_count; i++)
+                vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, flags, fieldtx, i);
+        }
+    }
+
+    if (s->mb_y >= s->start_mb_y + 2) {
+        dest = s->dest[0] - 32 * s->linesize - 16;
+        if (s->mb_x) {
+            flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+            for (i = 0; i < block_count; i++)
+                vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest, flags, i);
+        }
+        if (s->mb_x == v->end_mb_x - 1) {
+            dest += 16;
+            flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
+            for (i = 0; i < block_count; i++)
+                vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest, flags, i);
+        }
+    }
+    if (s->mb_y == s->end_mb_y - 1) {
+        if (s->mb_y >= s->start_mb_y + 1) {
+            dest = s->dest[0] - 16 * s->linesize - 16;
+            if (s->mb_x) {
+                flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+                for (i = 0; i < block_count; i++)
+                    vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, i);
+            }
+            if (s->mb_x == v->end_mb_x - 1) {
+                flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
+                dest += 16;
+                for (i = 0; i < block_count; i++)
+                    vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, flags, i);
+            }
+        }
+        dest = s->dest[0] - 16;
+        if (s->mb_x) {
+            flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+            for (i = 0; i < block_count; i++)
+                vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, i);
+        }
+        if (s->mb_x == v->end_mb_x - 1) {
+            dest += 16;
+            flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
+            for (i = 0; i < block_count; i++)
+                vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, flags, i);
+        }
+    }
+}
+
+static av_always_inline void vc1_p_h_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
+                                                 uint8_t *is_intra, int16_t (*mv)[2], uint8_t *mv_f,
+                                                 int *ttblk, uint32_t flags, int block_num)
+{
+    MpegEncContext *s  = &v->s;
+    int pq = v->pq;
+    uint32_t left_cbp = cbp[0] >> (block_num * 4), right_cbp;
+    uint8_t left_is_intra, right_is_intra;
+    int tt;
+    int idx, linesize  = block_num > 3 ? s->uvlinesize : s->linesize;
+    uint8_t *dst;
+
+    if (block_num > 3)
+        dst = dest;
+    else
+        dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+    if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
+        left_is_intra = is_intra[0] & (1 << block_num);
+
+        if (block_num > 3) {
+            right_is_intra = is_intra[1] & (1 << block_num);
+            right_cbp = cbp[1] >> (block_num * 4);
+        } else if (block_num & 1) {
+            right_is_intra = is_intra[1] & (1 << block_num - 1);
+            right_cbp = cbp[1] >> ((block_num - 1) * 4);
+        } else {
+            right_is_intra = is_intra[0] & (1 << block_num + 1);
+            right_cbp = cbp[0] >> ((block_num + 1) * 4);
+        }
+
+        if (left_is_intra || right_is_intra ||
+            mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1] ||
+            (v->fcm == ILACE_FIELD && mv_f[0] != mv_f[1]))
+            v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
+        else {
+            idx = (left_cbp | (right_cbp >> 1)) & 5;
+            if (idx & 1)
+                v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 8, linesize, pq);
+            if (idx & 4)
+                v->vc1dsp.vc1_h_loop_filter4(dst + 8, linesize, pq);
+        }
+    }
+
+    tt = ttblk[0] >> (block_num * 4) & 0xf;
+    if (tt == TT_4X4 || tt == TT_4X8) {
+        if (left_cbp & 3)
+            v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
+        if (left_cbp & 12)
+            v->vc1dsp.vc1_h_loop_filter4(dst + 4, linesize, pq);
+    }
+}
+
+static av_always_inline void vc1_p_v_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
+                                                 uint8_t *is_intra, int16_t (*mv)[2], uint8_t *mv_f,
+                                                 int *ttblk, uint32_t flags, int block_num)
+{
+    MpegEncContext *s  = &v->s;
+    int pq = v->pq;
+    uint32_t top_cbp = cbp[0] >> (block_num * 4), bottom_cbp;
+    uint8_t top_is_intra, bottom_is_intra;
+    int tt;
+    int idx, linesize  = block_num > 3 ? s->uvlinesize : s->linesize;
+    uint8_t *dst;
+
+    if (block_num > 3)
+        dst = dest;
+    else
+        dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+    if(!(flags & BOTTOM_EDGE) || block_num < 2) {
+        top_is_intra = is_intra[0] & (1 << block_num);
+
+        if (block_num > 3) {
+            bottom_is_intra = is_intra[s->mb_stride] & (1 << block_num);
+            bottom_cbp = cbp[s->mb_stride] >> (block_num * 4);
+        } else if (block_num < 2) {
+            bottom_is_intra = is_intra[0] & (1 << block_num + 2);
+            bottom_cbp = cbp[0] >> ((block_num + 2) * 4);
+        } else {
+            bottom_is_intra = is_intra[s->mb_stride] & (1 << block_num - 2);
+            bottom_cbp = cbp[s->mb_stride] >> ((block_num - 2) * 4);
+        }
+
+        if (top_is_intra || bottom_is_intra ||
+            mv[0][0] != mv[block_num > 3 ? s->mb_stride : s->b8_stride][0] ||
+            mv[0][1] != mv[block_num > 3 ? s->mb_stride : s->b8_stride][1] ||
+            (v->fcm == ILACE_FIELD && mv_f[0] != mv_f[block_num > 3 ? s->mb_stride : s->b8_stride]))
+            v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, linesize, pq);
+        else {
+            idx = (top_cbp | (bottom_cbp >> 2)) & 3;
+            if (idx & 1)
+                v->vc1dsp.vc1_v_loop_filter4(dst + 8 * linesize + 4, linesize, pq);
+            if (idx & 2)
+                v->vc1dsp.vc1_v_loop_filter4(dst + 8 * linesize, linesize, pq);
+        }
+    }
+
+    tt = ttblk[0] >> (block_num * 4) & 0xf;
+    if (tt == TT_4X4 || tt == TT_8X4) {
+        if (top_cbp & 5)
+            v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
+        if (top_cbp & 10)
+            v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize, linesize, pq);
+    }
+}
+
+void ff_vc1_p_loop_filter(VC1Context *v)
+{
+    MpegEncContext *s = &v->s;
+    int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
+    uint8_t *dest;
+    uint32_t *cbp;
+    uint8_t *is_intra;
+    int16_t (*uvmv)[2];
+    int *ttblk;
+    uint32_t flags;
+    int i;
+
+    /* Within a MB, the vertical loop filter always runs before the horizontal.
+     * To accomplish that, we run the V loop filter on all applicable
+     * horizontal borders of the MB above the last overlap filtered MB. Then,
+     * we wait for the next loop filter iteration to do H loop filter on all
+     * applicable vertical borders of this MB. Therefore, the loop filter
+     * trails by one row and one column relative to the overlap filter and two
+     * rows and two columns relative to the decoding loop. */
+    if (s->mb_y >= s->start_mb_y + 2) {
+        if (s->mb_x) {
+            dest = s->dest[0] - 32 * s->linesize - 16;
+            cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 1];
+            is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 1];
+            uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 1];
+            ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
+            flags = s->mb_y == s->start_mb_y + 2 ? TOP_EDGE : 0;
+            for (i = 0; i < block_count; i++)
+                vc1_p_v_loop_filter(v,
+                                    i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
+                                    cbp,
+                                    is_intra,
+                                    i > 3 ? uvmv :
+                                            &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
+                                    i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 1 + v->mb_off] :
+                                            &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
+                                    ttblk,
+                                    flags,
+                                    i);
         }
         if (s->mb_x == s->mb_width - 1) {
-            s->mb_x++;
-            ff_update_block_index(s);
-            for (i = 0; i < block_count; i++) {
-                vc1_apply_p_h_loop_filter(v, i);
+            dest = s->dest[0] - 32 * s->linesize;
+            cbp = &v->cbp[s->mb_x - 2 * s->mb_stride];
+            is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride];
+            uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride];
+            ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
+            flags = s->mb_y == s->start_mb_y + 2 ? TOP_EDGE : 0;
+            for (i = 0; i < block_count; i++)
+                vc1_p_v_loop_filter(v,
+                                    i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
+                                    cbp,
+                                    is_intra,
+                                    i > 3 ? uvmv :
+                                            &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
+                                    i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride + v->mb_off] :
+                                            &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
+                                    ttblk,
+                                    flags,
+                                    i);
+        }
+    }
+    if (s->mb_y == s->end_mb_y - 1) {
+        if (s->mb_x) {
+            if (s->mb_y >= s->start_mb_y + 1) {
+                dest = s->dest[0] - 16 * s->linesize - 16;
+                cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
+                is_intra = &v->is_intra[s->mb_x - s->mb_stride - 1];
+                uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
+                ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
+                flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
+                for (i = 0; i < block_count; i++)
+                    vc1_p_v_loop_filter(v,
+                                        i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
+                                        cbp,
+                                        is_intra,
+                                        i > 3 ? uvmv :
+                                                &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
+                                        i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 1 + v->mb_off] :
+                                                &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
+                                        ttblk,
+                                        flags,
+                                        i);
             }
+            dest = s->dest[0] - 16;
+            cbp = &v->cbp[s->mb_x - 1];
+            is_intra = &v->is_intra[s->mb_x - 1];
+            uvmv = &v->luma_mv[s->mb_x - 1];
+            ttblk = &v->ttblk[s->mb_x - 1];
+            flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
+            for (i = 0; i < block_count; i++)
+                vc1_p_v_loop_filter(v,
+                                    i > 3 ? s->dest[i - 3] - 8 : dest,
+                                    cbp,
+                                    is_intra,
+                                    i > 3 ? uvmv :
+                                            &s->current_picture.motion_val[0][s->block_index[i] - 2 + v->blocks_off],
+                                    i > 3 ? &v->mv_f[0][s->block_index[i] - 1 + v->mb_off] :
+                                            &v->mv_f[0][s->block_index[i] - 2 + v->blocks_off],
+                                    ttblk,
+                                    flags,
+                                    i);
+        }
+        if (s->mb_x == s->mb_width - 1) {
+            if (s->mb_y >= s->start_mb_y + 1) {
+                dest = s->dest[0] - 16 * s->linesize;
+                cbp = &v->cbp[s->mb_x - s->mb_stride];
+                is_intra = &v->is_intra[s->mb_x - s->mb_stride];
+                uvmv = &v->luma_mv[s->mb_x - s->mb_stride];
+                ttblk = &v->ttblk[s->mb_x - s->mb_stride];
+                flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
+                for (i = 0; i < block_count; i++)
+                    vc1_p_v_loop_filter(v,
+                                        i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
+                                        cbp,
+                                        is_intra,
+                                        i > 3 ? uvmv :
+                                                &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
+                                        i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride + v->mb_off] :
+                                                &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
+                                        ttblk,
+                                        flags,
+                                        i);
+            }
+            dest = s->dest[0];
+            cbp = &v->cbp[s->mb_x];
+            is_intra = &v->is_intra[s->mb_x];
+            uvmv = &v->luma_mv[s->mb_x];
+            ttblk = &v->ttblk[s->mb_x];
+            flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
+            for (i = 0; i < block_count; i++)
+                vc1_p_v_loop_filter(v,
+                                    i > 3 ? s->dest[i - 3] : dest,
+                                    cbp,
+                                    is_intra,
+                                    i > 3 ? uvmv :
+                                            &s->current_picture.motion_val[0][s->block_index[i] + v->blocks_off],
+                                    i > 3 ? &v->mv_f[0][s->block_index[i] + v->mb_off] :
+                                            &v->mv_f[0][s->block_index[i] + v->blocks_off],
+                                    ttblk,
+                                    flags,
+                                    i);
+        }
+    }
+
+    if (s->mb_y >= s->start_mb_y + 2) {
+        if (s->mb_x >= 2) {
+            dest = s->dest[0] - 32 * s->linesize - 32;
+            cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 2];
+            is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 2];
+            uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 2];
+            ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 2];
+            flags = s->mb_x == 2 ? LEFT_EDGE : 0;
+            for (i = 0; i < block_count; i++)
+                vc1_p_h_loop_filter(v,
+                                    i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 16 : dest,
+                                    cbp,
+                                    is_intra,
+                                    i > 3 ? uvmv :
+                                            &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 4 + v->blocks_off],
+                                    i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 2 + v->mb_off] :
+                                            &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 4 + v->blocks_off],
+                                    ttblk,
+                                    flags,
+                                    i);
+        }
+        if (s->mb_x == s->mb_width - 1) {
+            if (s->mb_x >= 1) {
+                dest = s->dest[0] - 32 * s->linesize - 16;
+                cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 1];
+                is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 1];
+                uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 1];
+                ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
+                flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+                for (i = 0; i < block_count; i++)
+                        vc1_p_h_loop_filter(v,
+                                            i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
+                                            cbp,
+                                            is_intra,
+                                            i > 3 ? uvmv :
+                                                    &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
+                                            i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 1 + v->mb_off] :
+                                                    &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
+                                            ttblk,
+                                            flags,
+                                            i);
+            }
+            dest = s->dest[0] - 32 * s->linesize;
+            cbp = &v->cbp[s->mb_x - 2 * s->mb_stride];
+            is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride];
+            uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride];
+            ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
+            flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
+            for (i = 0; i < block_count; i++)
+                vc1_p_h_loop_filter(v,
+                                    i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
+                                    cbp,
+                                    is_intra,
+                                    i > 3 ? uvmv :
+                                            &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
+                                    i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride + v->mb_off] :
+                                            &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
+                                    ttblk,
+                                    flags,
+                                    i);
+        }
+    }
+    if (s->mb_y == s->end_mb_y - 1) {
+        if (s->mb_y >= s->start_mb_y + 1) {
+            if (s->mb_x >= 2) {
+                dest = s->dest[0] - 16 * s->linesize - 32;
+                cbp = &v->cbp[s->mb_x - s->mb_stride - 2];
+                is_intra = &v->is_intra[s->mb_x - s->mb_stride - 2];
+                uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 2];
+                ttblk = &v->ttblk[s->mb_x - s->mb_stride - 2];
+                flags = s->mb_x == 2 ? LEFT_EDGE : 0;
+                for (i = 0; i < block_count; i++)
+                    vc1_p_h_loop_filter(v,
+                                        i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 16 : dest,
+                                        cbp,
+                                        is_intra,
+                                        i > 3 ? uvmv :
+                                                &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 4 + v->blocks_off],
+                                        i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 2 + v->mb_off] :
+                                                &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 4 + v->blocks_off],
+                                        ttblk,
+                                        flags,
+                                        i);
+            }
+            if (s->mb_x == s->mb_width - 1) {
+                if (s->mb_x >= 1) {
+                    dest = s->dest[0] - 16 * s->linesize - 16;
+                    cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
+                    is_intra = &v->is_intra[s->mb_x - s->mb_stride - 1];
+                    uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
+                    ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
+                    flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+                    for (i = 0; i < block_count; i++)
+                            vc1_p_h_loop_filter(v,
+                                                i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
+                                                cbp,
+                                                is_intra,
+                                                i > 3 ? uvmv :
+                                                        &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
+                                                i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 1 + v->mb_off] :
+                                                        &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
+                                                ttblk,
+                                                flags,
+                                                i);
+                }
+                dest = s->dest[0] - 16 * s->linesize;
+                cbp = &v->cbp[s->mb_x - s->mb_stride];
+                is_intra = &v->is_intra[s->mb_x - s->mb_stride];
+                uvmv = &v->luma_mv[s->mb_x - s->mb_stride];
+                ttblk = &v->ttblk[s->mb_x - s->mb_stride];
+                flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
+                for (i = 0; i < block_count; i++)
+                    vc1_p_h_loop_filter(v,
+                                        i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
+                                        cbp,
+                                        is_intra,
+                                        i > 3 ? uvmv :
+                                                &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
+                                        i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride + v->mb_off] :
+                                                &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
+                                        ttblk,
+                                        flags,
+                                        i);
+            }
+        }
+        if (s->mb_x >= 2) {
+            dest = s->dest[0] - 32;
+            cbp = &v->cbp[s->mb_x - 2];
+            is_intra = &v->is_intra[s->mb_x - 2];
+            uvmv = &v->luma_mv[s->mb_x - 2];
+            ttblk = &v->ttblk[s->mb_x - 2];
+            flags = s->mb_x == 2 ? LEFT_EDGE : 0;
+            for (i = 0; i < block_count; i++)
+                vc1_p_h_loop_filter(v,
+                                    i > 3 ? s->dest[i - 3] - 16 : dest,
+                                    cbp,
+                                    is_intra,
+                                    i > 3 ? uvmv :
+                                            &s->current_picture.motion_val[0][s->block_index[i] - 4 + v->blocks_off],
+                                    i > 3 ? &v->mv_f[0][s->block_index[i] - 2 + v->mb_off] :
+                                            &v->mv_f[0][s->block_index[i] - 4 + v->blocks_off],
+                                    ttblk,
+                                    flags,
+                                    i);
+        }
+        if (s->mb_x == s->mb_width - 1) {
+            if (s->mb_x >= 1) {
+                dest = s->dest[0] - 16;
+                cbp = &v->cbp[s->mb_x - 1];
+                is_intra = &v->is_intra[s->mb_x - 1];
+                uvmv = &v->luma_mv[s->mb_x - 1];
+                ttblk = &v->ttblk[s->mb_x - 1];
+                flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+                for (i = 0; i < block_count; i++)
+                    vc1_p_h_loop_filter(v,
+                                        i > 3 ? s->dest[i - 3] - 8 : dest,
+                                        cbp,
+                                        is_intra,
+                                        i > 3 ? uvmv :
+                                                &s->current_picture.motion_val[0][s->block_index[i] - 2 + v->blocks_off],
+                                        i > 3 ? &v->mv_f[0][s->block_index[i] - 1 + v->mb_off] :
+                                                &v->mv_f[0][s->block_index[i] - 2 + v->blocks_off],
+                                        ttblk,
+                                        flags,
+                                        i);
+            }
+            dest = s->dest[0];
+            cbp = &v->cbp[s->mb_x];
+            is_intra = &v->is_intra[s->mb_x];
+            uvmv = &v->luma_mv[s->mb_x];
+            ttblk = &v->ttblk[s->mb_x];
+            flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
+            for (i = 0; i < block_count; i++)
+                vc1_p_h_loop_filter(v,
+                                    i > 3 ? s->dest[i - 3] : dest,
+                                    cbp,
+                                    is_intra,
+                                    i > 3 ? uvmv :
+                                            &s->current_picture.motion_val[0][s->block_index[i] + v->blocks_off],
+                                    i > 3 ? &v->mv_f[0][s->block_index[i] + v->mb_off] :
+                                            &v->mv_f[0][s->block_index[i] + v->blocks_off],
+                                    ttblk,
+                                    flags,
+                                    i);
+        }
+    }
+}
+
+static av_always_inline void vc1_p_h_intfr_loop_filter(VC1Context *v, uint8_t *dest, int *ttblk,
+                                                       uint32_t flags, uint8_t fieldtx, int block_num)
+{
+    MpegEncContext *s  = &v->s;
+    int pq = v->pq;
+    int tt;
+    int linesize  = block_num > 3 ? s->uvlinesize : s->linesize;
+    uint8_t *dst;
+
+    if (block_num > 3)
+        dst = dest;
+    else
+        dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+    tt = ttblk[0] >> (block_num * 4) & 0xf;
+    if (block_num < 4) {
+        if (fieldtx) {
+            if (block_num < 2) {
+                if (tt == TT_4X4 || tt == TT_4X8)
+                    v->vc1dsp.vc1_h_loop_filter8(dst + 4, 2 * linesize, pq);
+                if (!(flags & RIGHT_EDGE) || block_num == 0)
+                    v->vc1dsp.vc1_h_loop_filter8(dst + 8, 2 * linesize, pq);
+            } else {
+                if (tt == TT_4X4 || tt == TT_4X8)
+                    v->vc1dsp.vc1_h_loop_filter8(dst - 7 * linesize + 4, 2 * linesize, pq);
+                if (!(flags & RIGHT_EDGE) || block_num == 2)
+                    v->vc1dsp.vc1_h_loop_filter8(dst - 7 * linesize + 8, 2 * linesize, pq);
+            }
+        } else {
+            if(tt == TT_4X4 || tt == TT_4X8) {
+                v->vc1dsp.vc1_h_loop_filter4(dst + 4, 2 * linesize, pq);
+                v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 4, 2 * linesize, pq);
+            }
+            if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
+                v->vc1dsp.vc1_h_loop_filter4(dst + 8, 2 * linesize, pq);
+                v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 8, 2 * linesize, pq);
+            }
+        }
+    } else {
+        if (tt == TT_4X4 || tt == TT_4X8) {
+            v->vc1dsp.vc1_h_loop_filter4(dst + 4, 2 * linesize, pq);
+            v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 4, 2 * linesize, pq);
+        }
+        if (!(flags & RIGHT_EDGE)) {
+            v->vc1dsp.vc1_h_loop_filter4(dst + 8, 2 * linesize, pq);
+            v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 8, 2 * linesize, pq);
+        }
+    }
+}
+
+static av_always_inline void vc1_p_v_intfr_loop_filter(VC1Context *v, uint8_t *dest, int *ttblk,
+                                                       uint32_t flags, uint8_t fieldtx, int block_num)
+{
+    MpegEncContext *s  = &v->s;
+    int pq = v->pq;
+    int tt;
+    int linesize  = block_num > 3 ? s->uvlinesize : s->linesize;
+    uint8_t *dst;
+
+    if (block_num > 3)
+        dst = dest;
+    else
+        dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+    tt = ttblk[0] >> (block_num * 4) & 0xf;
+    if (block_num < 4) {
+        if (fieldtx) {
+            if (block_num < 2) {
+                if (tt == TT_4X4 || tt == TT_8X4)
+                    v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
+                if (!(flags & BOTTOM_EDGE))
+                    v->vc1dsp.vc1_v_loop_filter8(dst + 16 * linesize, 2 * linesize, pq);
+            } else {
+                if (tt == TT_4X4 || tt == TT_8X4)
+                    v->vc1dsp.vc1_v_loop_filter8(dst + linesize, 2 * linesize, pq);
+                if (!(flags & BOTTOM_EDGE))
+                    v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
+            }
+        } else {
+            if (block_num < 2) {
+                if (!(flags & TOP_EDGE) && (tt == TT_4X4 || tt == TT_8X4)) {
+                    v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
+                    v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
+                }
+                v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
+                v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
+            } else if (!(flags & BOTTOM_EDGE)) {
+                if (tt == TT_4X4 || tt == TT_8X4) {
+                    v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
+                    v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
+                }
+                v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
+                v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
+            }
+        }
+    } else {
+        if (!(flags & BOTTOM_EDGE)) {
+            if (!(flags & TOP_EDGE) && (tt == TT_4X4 || tt == TT_8X4)) {
+                v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
+                v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
+            }
+                v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
+                v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
+        }
+    }
+}
+
+void ff_vc1_p_intfr_loop_filter(VC1Context *v)
+{
+    MpegEncContext *s = &v->s;
+    int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
+    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+    uint8_t *dest;
+    int *ttblk;
+    uint32_t flags;
+    uint8_t fieldtx;
+    int i;
+
+    /* Within a MB, the vertical loop filter always runs before the horizontal.
+     * To accomplish that, we run the V loop filter on all applicable
+     * horizontal borders of the MB above the last overlap filtered MB. Then,
+     * we wait for the loop filter iteration on the next row and next column to
+     * do H loop filter on all applicable vertical borders of this MB.
+     * Therefore, the loop filter trails by two rows and one column relative to
+     * the overlap filter and two rows and two columns relative to the decoding
+     * loop. */
+    if (s->mb_x) {
+        if (s->mb_y >= s->start_mb_y + 1) {
+            dest = s->dest[0] - 16 * s->linesize - 16;
+            ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
+            flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
+            fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
+            for (i = 0; i < block_count; i++)
+                vc1_p_v_intfr_loop_filter(v,
+                                          i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
+                                          ttblk,
+                                          flags,
+                                          fieldtx,
+                                          i);
+        }
+    }
+    if (s->mb_x == s->mb_width - 1) {
+        if (s->mb_y >= s->start_mb_y + 1) {
+            dest = s->dest[0] - 16 * s->linesize;
+            ttblk = &v->ttblk[s->mb_x - s->mb_stride];
+            flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
+            fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
+            for (i = 0; i < block_count; i++)
+                vc1_p_v_intfr_loop_filter(v,
+                                          i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
+                                          ttblk,
+                                          flags,
+                                          fieldtx,
+                                          i);
+        }
+    }
+    if (s->mb_y == s->end_mb_y - 1) {
+        if (s->mb_x) {
+            dest = s->dest[0] - 16;
+            ttblk = &v->ttblk[s->mb_x - 1];
+            flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
+            fieldtx = v->fieldtx_plane[mb_pos - 1];
+            for (i = 0; i < block_count; i++)
+                vc1_p_v_intfr_loop_filter(v,
+                                          i > 3 ? s->dest[i - 3] - 8 : dest,
+                                          ttblk,
+                                          flags,
+                                          fieldtx,
+                                          i);
+        }
+        if (s->mb_x == s->mb_width - 1) {
+            dest = s->dest[0];
+            ttblk = &v->ttblk[s->mb_x];
+            flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
+            fieldtx = v->fieldtx_plane[mb_pos];
+            for (i = 0; i < block_count; i++)
+                vc1_p_v_intfr_loop_filter(v,
+                                          i > 3 ? s->dest[i - 3] : dest,
+                                          ttblk,
+                                          flags,
+                                          fieldtx,
+                                          i);
+        }
+    }
+
+    if (s->mb_y >= s->start_mb_y + 2) {
+        if (s->mb_x >= 2) {
+            dest = s->dest[0] - 32 * s->linesize - 32;
+            ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 2];
+            flags = s->mb_x == 2 ? LEFT_EDGE : 0;
+            fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride - 2];
+            for (i = 0; i < block_count; i++)
+                vc1_p_h_intfr_loop_filter(v,
+                                          i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 16 : dest,
+                                          ttblk,
+                                          flags,
+                                          fieldtx,
+                                          i);
+        }
+        if (s->mb_x == s->mb_width - 1) {
+            if (s->mb_x >= 1) {
+                dest = s->dest[0] - 32 * s->linesize - 16;
+                ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
+                flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+                fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride - 1];
+                for (i = 0; i < block_count; i++)
+                    vc1_p_h_intfr_loop_filter(v,
+                                              i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
+                                              ttblk,
+                                              flags,
+                                              fieldtx,
+                                              i);
+            }
+            dest = s->dest[0] - 32 * s->linesize;
+            ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
+            flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
+            fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride];
+            for (i = 0; i < block_count; i++)
+                vc1_p_h_intfr_loop_filter(v,
+                                          i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
+                                          ttblk,
+                                          flags,
+                                          fieldtx,
+                                          i);
+        }
+    }
+    if (s->mb_y == s->end_mb_y - 1) {
+        if (s->mb_y >= s->start_mb_y + 1) {
+            if (s->mb_x >= 2) {
+                dest = s->dest[0] - 16 * s->linesize - 32;
+                ttblk = &v->ttblk[s->mb_x - s->mb_stride - 2];
+                flags = s->mb_x == 2 ? LEFT_EDGE : 0;
+                fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 2];
+                for (i = 0; i < block_count; i++)
+                    vc1_p_h_intfr_loop_filter(v,
+                                              i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 16 : dest,
+                                              ttblk,
+                                              flags,
+                                              fieldtx,
+                                              i);
+            }
+            if (s->mb_x == s->mb_width - 1) {
+                if (s->mb_x >= 1) {
+                    dest = s->dest[0] - 16 * s->linesize - 16;
+                    ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
+                    flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+                    fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
+                    for (i = 0; i < block_count; i++)
+                        vc1_p_h_intfr_loop_filter(v,
+                                                  i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
+                                                  ttblk,
+                                                  flags,
+                                                  fieldtx,
+                                                  i);
+                }
+                dest = s->dest[0] - 16 * s->linesize;
+                ttblk = &v->ttblk[s->mb_x - s->mb_stride];
+                flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
+                fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
+                for (i = 0; i < block_count; i++)
+                    vc1_p_h_intfr_loop_filter(v,
+                                              i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
+                                              ttblk,
+                                              flags,
+                                              fieldtx,
+                                              i);
+            }
+        }
+        if (s->mb_x >= 2) {
+            dest = s->dest[0] - 32;
+            ttblk = &v->ttblk[s->mb_x - 2];
+            flags = s->mb_x == 2 ? LEFT_EDGE : 0;
+            fieldtx = v->fieldtx_plane[mb_pos - 2];
+            for (i = 0; i < block_count; i++)
+                vc1_p_h_intfr_loop_filter(v,
+                                          i > 3 ? s->dest[i - 3] - 16 : dest,
+                                          ttblk,
+                                          flags,
+                                          fieldtx,
+                                          i);
+        }
+        if (s->mb_x == s->mb_width - 1) {
+            if (s->mb_x >= 1) {
+                dest = s->dest[0] - 16;
+                ttblk = &v->ttblk[s->mb_x - 1];
+                flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+                fieldtx = v->fieldtx_plane[mb_pos - 1];
+                for (i = 0; i < block_count; i++)
+                    vc1_p_h_intfr_loop_filter(v,
+                                              i > 3 ? s->dest[i - 3] - 8 : dest,
+                                              ttblk,
+                                              flags,
+                                              fieldtx,
+                                              i);
+            }
+            dest = s->dest[0];
+            ttblk = &v->ttblk[s->mb_x];
+            flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
+            fieldtx = v->fieldtx_plane[mb_pos];
+            for (i = 0; i < block_count; i++)
+                vc1_p_h_intfr_loop_filter(v,
+                                          i > 3 ? s->dest[i - 3] : dest,
+                                          ttblk,
+                                          flags,
+                                          fieldtx,
+                                          i);
+        }
+    }
+}
+
+static av_always_inline void vc1_b_h_intfi_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
+                                                       int *ttblk, uint32_t flags, int block_num)
+{
+    MpegEncContext *s  = &v->s;
+    int pq = v->pq;
+    uint8_t *dst;
+    uint32_t block_cbp = cbp[0] >> (block_num * 4);
+    int tt;
+    int idx, linesize  = block_num > 3 ? s->uvlinesize : s->linesize;
+
+    if (block_num > 3)
+        dst = dest;
+    else
+        dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+    if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
+        if (block_num > 3)
+            v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
+        else
+            v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
+    }
+
+    tt = ttblk[0] >> (block_num * 4) & 0xf;
+    if (tt == TT_4X4 || tt == TT_4X8) {
+        idx = (block_cbp | (block_cbp >> 1)) & 5;
+        if (idx & 1)
+            v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
+        if (idx & 4)
+            v->vc1dsp.vc1_h_loop_filter4(dst + 4, linesize, pq);
+    }
+}
+
+static av_always_inline void vc1_b_v_intfi_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
+                                                       int *ttblk, uint32_t flags, int block_num)
+{
+    MpegEncContext *s  = &v->s;
+    int pq = v->pq;
+    uint8_t *dst;
+    uint32_t block_cbp = cbp[0] >> (block_num * 4);
+    int tt;
+    int idx, linesize  = block_num > 3 ? s->uvlinesize : s->linesize;
+
+    if (block_num > 3)
+        dst = dest;
+    else
+        dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+    if(!(flags & BOTTOM_EDGE) || block_num < 2)
+        v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, linesize, pq);
+
+    tt = ttblk[0] >> (block_num * 4) & 0xf;
+    if (tt == TT_4X4 || tt == TT_8X4) {
+        idx = (block_cbp | (block_cbp >> 2)) & 3;
+        if (idx & 1)
+            v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
+        if (idx & 2)
+            v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize, linesize, pq);
+    }
+}
+
+void ff_vc1_b_intfi_loop_filter(VC1Context *v)
+{
+    MpegEncContext *s = &v->s;
+    int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
+    uint8_t *dest;
+    uint32_t *cbp;
+    int *ttblk;
+    uint32_t flags = 0;
+    int i;
+
+    /* Within a MB, the vertical loop filter always runs before the horizontal.
+     * To accomplish that, we run the V loop filter on all applicable
+     * horizontal borders of the MB above the currently decoded MB. Then,
+     * we wait for the next loop filter iteration to do H loop filter on all
+     * applicable vertical borders of this MB. Therefore, the loop filter
+     * trails by one row and one column relative to the decoding loop. */
+    if (!s->first_slice_line) {
+        dest = s->dest[0] - 16 * s->linesize;
+        cbp = &v->cbp[s->mb_x - s->mb_stride];
+        ttblk = &v->ttblk[s->mb_x - s->mb_stride];
+        flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
+        for (i = 0; i < block_count; i++)
+            vc1_b_v_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, cbp, ttblk, flags, i);
+    }
+    if (s->mb_y == s->end_mb_y - 1) {
+        dest = s->dest[0];
+        cbp = &v->cbp[s->mb_x];
+        ttblk = &v->ttblk[s->mb_x];
+        flags = s->first_slice_line ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
+        for (i = 0; i < block_count; i++)
+            vc1_b_v_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, cbp, ttblk, flags, i);
+    }
+
+    if (!s->first_slice_line) {
+        dest = s->dest[0] - 16 * s->linesize - 16;
+        cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
+        ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
+        if (s->mb_x) {
+            flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+            for (i = 0; i < block_count; i++)
+                vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, cbp, ttblk, flags, i);
+        }
+        if (s->mb_x == s->mb_width - 1) {
+            dest += 16;
+            cbp++;
+            ttblk++;
+            flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
+            for (i = 0; i < block_count; i++)
+                vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, cbp, ttblk, flags, i);
+        }
+    }
+    if (s->mb_y == s->end_mb_y - 1) {
+        dest = s->dest[0] - 16;
+        cbp = &v->cbp[s->mb_x - 1];
+        ttblk = &v->ttblk[s->mb_x - 1];
+        if (s->mb_x) {
+            flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+            for (i = 0; i < block_count; i++)
+                vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, cbp, ttblk, flags, i);
+        }
+        if (s->mb_x == s->mb_width - 1) {
+            dest += 16;
+            cbp++;
+            ttblk++;
+            flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
+            for (i = 0; i < block_count; i++)
+                vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, cbp, ttblk, flags, i);
         }
     }
 }

diff --git a/libavcodec/vc1_mc.c b/libavcodec/vc1_mc.c
index 75c74ca..1b8d879 100644
--- a/libavcodec/vc1_mc.c
+++ b/libavcodec/vc1_mc.c

@@ -179,12 +179,17 @@
     int i;
     uint8_t (*luty)[256], (*lutuv)[256];
     int use_ic;
+    int interlace;
+    int linesize, uvlinesize;
 
     if ((!v->field_mode ||
          (v->ref_field_type[dir] == 1 && v->cur_field_type == 1)) &&
         !v->s.last_picture.f->data[0])
         return;
 
+    linesize = s->current_picture_ptr->f->linesize[0];
+    uvlinesize = s->current_picture_ptr->f->linesize[1];
+
     mx = s->mv[dir][0][0];
     my = s->mv[dir][0][1];
 
@@ -220,6 +225,7 @@
             luty  = v->curr_luty;
             lutuv = v->curr_lutuv;
             use_ic = *v->curr_use_ic;
+            interlace = 1;
         } else {
             srcY = s->last_picture.f->data[0];
             srcU = s->last_picture.f->data[1];
@@ -227,6 +233,7 @@
             luty  = v->last_luty;
             lutuv = v->last_lutuv;
             use_ic = v->last_use_ic;
+            interlace = s->last_picture.f->interlaced_frame;
         }
     } else {
         srcY = s->next_picture.f->data[0];
@@ -235,6 +242,7 @@
         luty  = v->next_luty;
         lutuv = v->next_lutuv;
         use_ic = v->next_use_ic;
+        interlace = s->next_picture.f->interlaced_frame;
     }
 
     if (!srcY || !srcU) {
@@ -254,9 +262,14 @@
         uvsrc_y = av_clip(uvsrc_y,  -8, s->mb_height *  8);
     } else {
         src_x   = av_clip(  src_x, -17, s->avctx->coded_width);
-        src_y   = av_clip(  src_y, -18, s->avctx->coded_height + 1);
         uvsrc_x = av_clip(uvsrc_x,  -8, s->avctx->coded_width  >> 1);
-        uvsrc_y = av_clip(uvsrc_y,  -8, s->avctx->coded_height >> 1);
+        if (v->fcm == ILACE_FRAME) {
+            src_y = av_clip(src_y, -18 + (src_y & 1), s->avctx->coded_height + (src_y & 1));
+            uvsrc_y = av_clip(uvsrc_y, -8 + (uvsrc_y & 1), (s->avctx->coded_height >> 1) + (uvsrc_y & 1));
+        } else {
+            src_y = av_clip(src_y, -18, s->avctx->coded_height + 1);
+            uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
+        }
     }
 
     srcY += src_y   * s->linesize   + src_x;
@@ -264,9 +277,9 @@
     srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
 
     if (v->field_mode && v->ref_field_type[dir]) {
-        srcY += s->current_picture_ptr->f->linesize[0];
-        srcU += s->current_picture_ptr->f->linesize[1];
-        srcV += s->current_picture_ptr->f->linesize[2];
+        srcY += linesize;
+        srcU += uvlinesize;
+        srcV += uvlinesize;
     }
 
     /* for grayscale we should not try to read from unknown area */
@@ -284,22 +297,106 @@
         const int k = 17 + s->mspel * 2;
 
         srcY -= s->mspel * (1 + s->linesize);
-        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, srcY,
-                                 s->linesize, s->linesize,
-                                 k, k,
-                                 src_x - s->mspel, src_y - s->mspel,
-                                 s->h_edge_pos, v_edge_pos);
+        if (interlace) {
+            s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer,
+                                     srcY,
+                                     linesize << 1,
+                                     linesize << 1,
+                                     k,
+                                     v->field_mode ? k : k + 1 >> 1,
+                                     src_x - s->mspel,
+                                     src_y - s->mspel >> !v->field_mode,
+                                     s->h_edge_pos,
+                                     s->v_edge_pos >> 1);
+            if (!v->field_mode)
+                s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + linesize,
+                                         srcY + linesize,
+                                         linesize << 1,
+                                         linesize << 1,
+                                         k,
+                                         k >> 1,
+                                         src_x - s->mspel,
+                                         src_y - s->mspel + 1 >> 1,
+                                         s->h_edge_pos,
+                                         s->v_edge_pos >> 1);
+        } else
+            s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer,
+                                     srcY,
+                                     linesize,
+                                     linesize,
+                                     k,
+                                     v->field_mode ? (k << 1) - 1 : k,
+                                     src_x - s->mspel,
+                                     v->field_mode ? 2 * (src_y - s->mspel) + v->ref_field_type[dir] :
+                                                     src_y - s->mspel,
+                                     s->h_edge_pos,
+                                     s->v_edge_pos);
         srcY = s->sc.edge_emu_buffer;
-        s->vdsp.emulated_edge_mc(ubuf, srcU,
-                                 s->uvlinesize, s->uvlinesize,
-                                 8 + 1, 8 + 1,
-                                 uvsrc_x, uvsrc_y,
-                                 s->h_edge_pos >> 1, v_edge_pos >> 1);
-        s->vdsp.emulated_edge_mc(vbuf, srcV,
-                                 s->uvlinesize, s->uvlinesize,
-                                 8 + 1, 8 + 1,
-                                 uvsrc_x, uvsrc_y,
-                                 s->h_edge_pos >> 1, v_edge_pos >> 1);
+        if (interlace) {
+            s->vdsp.emulated_edge_mc(ubuf,
+                                     srcU,
+                                     uvlinesize << 1,
+                                     uvlinesize << 1,
+                                     9,
+                                     v->field_mode ? 9 : 5,
+                                     uvsrc_x,
+                                     uvsrc_y >> !v->field_mode,
+                                     s->h_edge_pos >> 1,
+                                     s->v_edge_pos >> 2);
+            s->vdsp.emulated_edge_mc(vbuf,
+                                     srcV,
+                                     uvlinesize << 1,
+                                     uvlinesize << 1,
+                                     9,
+                                     v->field_mode ? 9 : 5,
+                                     uvsrc_x,
+                                     uvsrc_y >> !v->field_mode,
+                                     s->h_edge_pos >> 1,
+                                     s->v_edge_pos >> 2);
+            if (!v->field_mode) {
+                s->vdsp.emulated_edge_mc(ubuf + uvlinesize,
+                                         srcU + uvlinesize,
+                                         uvlinesize << 1,
+                                         uvlinesize << 1,
+                                         9,
+                                         4,
+                                         uvsrc_x,
+                                         uvsrc_y + 1 >> 1,
+                                         s->h_edge_pos >> 1,
+                                         s->v_edge_pos >> 2);
+                s->vdsp.emulated_edge_mc(vbuf + uvlinesize,
+                                         srcV + uvlinesize,
+                                         uvlinesize << 1,
+                                         uvlinesize << 1,
+                                         9,
+                                         4,
+                                         uvsrc_x,
+                                         uvsrc_y + 1 >> 1,
+                                         s->h_edge_pos >> 1,
+                                         s->v_edge_pos >> 2);
+            }
+        } else {
+            s->vdsp.emulated_edge_mc(ubuf,
+                                     srcU,
+                                     uvlinesize,
+                                     uvlinesize,
+                                     9,
+                                     v->field_mode ? 17 : 9,
+                                     uvsrc_x,
+                                     v->field_mode ? 2 * uvsrc_y + v->ref_field_type[dir] : uvsrc_y,
+                                     s->h_edge_pos >> 1,
+                                     s->v_edge_pos >> 1);
+            s->vdsp.emulated_edge_mc(vbuf,
+                                     srcV,
+                                     uvlinesize,
+                                     uvlinesize,
+                                     9,
+                                     v->field_mode ? 17 : 9,
+                                     uvsrc_x,
+                                     v->field_mode ? 2 * uvsrc_y + v->ref_field_type[dir] : uvsrc_y,
+                                     s->h_edge_pos >> 1,
+                                     s->v_edge_pos >> 1);
+        }
         srcU = ubuf;
         srcV = vbuf;
         /* if we deal with range reduction we need to scale source blocks */
@@ -344,6 +441,10 @@
         v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
         v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
     }
+    if (v->field_mode) {
+        v->mv_f[dir][s->block_index[4] + v->mb_off] = v->cur_field_type != v->ref_field_type[dir];
+        v->mv_f[dir][s->block_index[5] + v->mb_off] = v->cur_field_type != v->ref_field_type[dir];
+    }
 }
 
 /** Do motion compensation for 4-MV macroblock - luminance block
@@ -358,12 +459,16 @@
     int v_edge_pos = s->v_edge_pos >> v->field_mode;
     uint8_t (*luty)[256];
     int use_ic;
+    int interlace;
+    int linesize;
 
     if ((!v->field_mode ||
          (v->ref_field_type[dir] == 1 && v->cur_field_type == 1)) &&
         !v->s.last_picture.f->data[0])
         return;
 
+    linesize = s->current_picture_ptr->f->linesize[0];
+
     mx = s->mv[dir][n][0];
     my = s->mv[dir][n][1];
 
@@ -372,15 +477,18 @@
             srcY = s->current_picture.f->data[0];
             luty = v->curr_luty;
             use_ic = *v->curr_use_ic;
+            interlace = 1;
         } else {
             srcY = s->last_picture.f->data[0];
             luty = v->last_luty;
             use_ic = v->last_use_ic;
+            interlace = s->last_picture.f->interlaced_frame;
         }
     } else {
         srcY = s->next_picture.f->data[0];
         luty = v->next_luty;
         use_ic = v->next_use_ic;
+        interlace = s->next_picture.f->interlaced_frame;
     }
 
     if (!srcY) {
@@ -439,26 +547,16 @@
         src_y = av_clip(src_y, -16, s->mb_height * 16);
     } else {
         src_x = av_clip(src_x, -17, s->avctx->coded_width);
-        if (v->fcm == ILACE_FRAME) {
-            if (src_y & 1)
-                src_y = av_clip(src_y, -17, s->avctx->coded_height + 1);
-            else
-                src_y = av_clip(src_y, -18, s->avctx->coded_height);
-        } else {
+        if (v->fcm == ILACE_FRAME)
+            src_y = av_clip(src_y, -18 + (src_y & 1), s->avctx->coded_height + (src_y & 1));
+        else
             src_y = av_clip(src_y, -18, s->avctx->coded_height + 1);
-        }
     }
 
     srcY += src_y * s->linesize + src_x;
     if (v->field_mode && v->ref_field_type[dir])
-        srcY += s->current_picture_ptr->f->linesize[0];
+        srcY += linesize;
 
-    if (fieldmv) {
-        if (!(src_y & 1))
-            v_edge_pos--;
-        else
-            src_y -= (src_y < 4);
-    }
     if (v->rangeredfrm || use_ic
         || s->h_edge_pos < 13 || v_edge_pos < 23
         || (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx & 3) - 8 - s->mspel * 2
@@ -467,11 +565,40 @@
 
         srcY -= s->mspel * (1 + (s->linesize << fieldmv));
         /* check emulate edge stride and offset */
-        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, srcY,
-                                 s->linesize, s->linesize,
-                                 k, k << fieldmv,
-                                 src_x - s->mspel, src_y - (s->mspel << fieldmv),
-                                 s->h_edge_pos, v_edge_pos);
+        if (interlace) {
+            s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer,
+                                     srcY,
+                                     linesize << 1,
+                                     linesize << 1,
+                                     k,
+                                     v->field_mode ? k : (k << fieldmv) + 1 >> 1,
+                                     src_x - s->mspel,
+                                     src_y - (s->mspel << fieldmv) >> !v->field_mode,
+                                     s->h_edge_pos,
+                                     s->v_edge_pos >> 1);
+            if (!v->field_mode && !fieldmv)
+                s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + linesize,
+                                         srcY + linesize,
+                                         linesize << 1,
+                                         linesize << 1,
+                                         k,
+                                         k >> 1,
+                                         src_x - s->mspel,
+                                         src_y - s->mspel + 1 >> 1,
+                                         s->h_edge_pos,
+                                         s->v_edge_pos >> 1);
+        } else
+            s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer,
+                                     srcY,
+                                     linesize,
+                                     linesize,
+                                     k,
+                                     v->field_mode ? (k << 1) - 1 : k << fieldmv,
+                                     src_x - s->mspel,
+                                     v->field_mode ? 2 * (src_y - s->mspel) + v->ref_field_type[dir] :
+                                                     src_y - (s->mspel << fieldmv),
+                                     s->h_edge_pos,
+                                     s->v_edge_pos);
         srcY = s->sc.edge_emu_buffer;
         /* if we deal with range reduction we need to scale source blocks */
         if (v->rangeredfrm) {
@@ -515,6 +642,8 @@
     int v_edge_pos = s->v_edge_pos >> v->field_mode;
     uint8_t (*lutuv)[256];
     int use_ic;
+    int interlace;
+    int uvlinesize;
 
     if (!v->field_mode && !v->s.last_picture.f->data[0])
         return;
@@ -539,6 +668,9 @@
         return;
     s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = tx;
     s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][1] = ty;
+
+    uvlinesize = s->current_picture_ptr->f->linesize[1];
+
     uvmx = (tx + ((tx & 3) == 3)) >> 1;
     uvmy = (ty + ((ty & 3) == 3)) >> 1;
 
@@ -570,17 +702,20 @@
             srcV = s->current_picture.f->data[2];
             lutuv = v->curr_lutuv;
             use_ic = *v->curr_use_ic;
+            interlace = 1;
         } else {
             srcU = s->last_picture.f->data[1];
             srcV = s->last_picture.f->data[2];
             lutuv = v->last_lutuv;
             use_ic = v->last_use_ic;
+            interlace = s->last_picture.f->interlaced_frame;
         }
     } else {
         srcU = s->next_picture.f->data[1];
         srcV = s->next_picture.f->data[2];
         lutuv = v->next_lutuv;
         use_ic = v->next_use_ic;
+        interlace = s->next_picture.f->interlaced_frame;
     }
 
     if (!srcU) {
@@ -593,8 +728,8 @@
 
     if (v->field_mode) {
         if (chroma_ref_type) {
-            srcU += s->current_picture_ptr->f->linesize[1];
-            srcV += s->current_picture_ptr->f->linesize[2];
+            srcU += uvlinesize;
+            srcV += uvlinesize;
         }
     }
 
@@ -602,14 +737,71 @@
         || s->h_edge_pos < 18 || v_edge_pos < 18
         || (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 9
         || (unsigned)uvsrc_y > (v_edge_pos    >> 1) - 9) {
-        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, srcU,
-                                 s->uvlinesize, s->uvlinesize,
-                                 8 + 1, 8 + 1, uvsrc_x, uvsrc_y,
-                                 s->h_edge_pos >> 1, v_edge_pos >> 1);
-        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + 16, srcV,
-                                 s->uvlinesize, s->uvlinesize,
-                                 8 + 1, 8 + 1, uvsrc_x, uvsrc_y,
-                                 s->h_edge_pos >> 1, v_edge_pos >> 1);
+        if (interlace) {
+            s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer,
+                                     srcU,
+                                     uvlinesize << 1,
+                                     uvlinesize << 1,
+                                     9,
+                                     v->field_mode ? 9 : 5,
+                                     uvsrc_x,
+                                     uvsrc_y >> !v->field_mode,
+                                     s->h_edge_pos >> 1,
+                                     s->v_edge_pos >> 2);
+            s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + 16,
+                                     srcV,
+                                     uvlinesize << 1,
+                                     uvlinesize << 1,
+                                     9,
+                                     v->field_mode ? 9 : 5,
+                                     uvsrc_x,
+                                     uvsrc_y >> !v->field_mode,
+                                     s->h_edge_pos >> 1,
+                                     s->v_edge_pos >> 2);
+            if (!v->field_mode) {
+                s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + uvlinesize,
+                                         srcU + uvlinesize,
+                                         uvlinesize << 1,
+                                         uvlinesize << 1,
+                                         9,
+                                         4,
+                                         uvsrc_x,
+                                         uvsrc_y + 1 >> 1,
+                                         s->h_edge_pos >> 1,
+                                         s->v_edge_pos >> 2);
+                s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + 16 + uvlinesize,
+                                         srcV + uvlinesize,
+                                         uvlinesize << 1,
+                                         uvlinesize << 1,
+                                         9,
+                                         4,
+                                         uvsrc_x,
+                                         uvsrc_y + 1 >> 1,
+                                         s->h_edge_pos >> 1,
+                                         s->v_edge_pos >> 2);
+            }
+        } else {
+            s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer,
+                                     srcU,
+                                     uvlinesize,
+                                     uvlinesize,
+                                     9,
+                                     v->field_mode ? 17 : 9,
+                                     uvsrc_x,
+                                     v->field_mode ? 2 * uvsrc_y + chroma_ref_type : uvsrc_y,
+                                     s->h_edge_pos >> 1,
+                                     s->v_edge_pos >> 1);
+            s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + 16,
+                                     srcV,
+                                     uvlinesize,
+                                     uvlinesize,
+                                     9,
+                                     v->field_mode ? 17 : 9,
+                                     uvsrc_x,
+                                     v->field_mode ? 2 * uvsrc_y + chroma_ref_type : uvsrc_y,
+                                     s->h_edge_pos >> 1,
+                                     s->v_edge_pos >> 1);
+        }
         srcU = s->sc.edge_emu_buffer;
         srcV = s->sc.edge_emu_buffer + 16;
 
@@ -636,6 +828,10 @@
         v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
         v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
     }
+    if (v->field_mode) {
+        v->mv_f[dir][s->block_index[4] + v->mb_off] = v->cur_field_type != chroma_ref_type;
+        v->mv_f[dir][s->block_index[5] + v->mb_off] = v->cur_field_type != chroma_ref_type;
+    }
 }
 
 /** Do motion compensation for 4-MV interlaced frame chroma macroblock (both U and V)
@@ -653,11 +849,15 @@
     int v_dist = fieldmv ? 1 : 4; // vertical offset for lower sub-blocks
     int v_edge_pos = s->v_edge_pos >> 1;
     int use_ic;
+    int interlace;
+    int uvlinesize;
     uint8_t (*lutuv)[256];
 
     if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY)
         return;
 
+    uvlinesize = s->current_picture_ptr->f->linesize[1];
+
     for (i = 0; i < 4; i++) {
         int d = i < 2 ? dir: dir2;
         tx = s->mv[d][i][0];
@@ -675,17 +875,22 @@
         uvsrc_y = s->mb_y * 8 + ((i & 2) ? v_dist : 0) + (uvmy_field[i] >> 2);
         // FIXME: implement proper pull-back (see vc1cropmv.c, vc1CROPMV_ChromaPullBack())
         uvsrc_x = av_clip(uvsrc_x, -8, s->avctx->coded_width  >> 1);
-        uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
+        if (v->fcm == ILACE_FRAME)
+            uvsrc_y = av_clip(uvsrc_y, -8 + (uvsrc_y & 1), (s->avctx->coded_height >> 1) + (uvsrc_y & 1));
+        else
+            uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
         if (i < 2 ? dir : dir2) {
             srcU = s->next_picture.f->data[1];
             srcV = s->next_picture.f->data[2];
             lutuv  = v->next_lutuv;
             use_ic = v->next_use_ic;
+            interlace = s->next_picture.f->interlaced_frame;
         } else {
             srcU = s->last_picture.f->data[1];
             srcV = s->last_picture.f->data[2];
             lutuv  = v->last_lutuv;
             use_ic = v->last_use_ic;
+            interlace = s->last_picture.f->interlaced_frame;
         }
         if (!srcU)
             return;
@@ -694,24 +899,75 @@
         uvmx_field[i] = (uvmx_field[i] & 3) << 1;
         uvmy_field[i] = (uvmy_field[i] & 3) << 1;
 
-        if (fieldmv) {
-            if (!(uvsrc_y & 1))
-                v_edge_pos = (s->v_edge_pos >> 1) - 1;
-            else
-                uvsrc_y -= (uvsrc_y < 2);
-        }
         if (use_ic
             || s->h_edge_pos < 10 || v_edge_pos < (5 << fieldmv)
             || (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 5
             || (unsigned)uvsrc_y > v_edge_pos - (5 << fieldmv)) {
-            s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, srcU,
-                                     s->uvlinesize, s->uvlinesize,
-                                     5, (5 << fieldmv), uvsrc_x, uvsrc_y,
-                                     s->h_edge_pos >> 1, v_edge_pos);
-            s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + 16, srcV,
-                                     s->uvlinesize, s->uvlinesize,
-                                     5, (5 << fieldmv), uvsrc_x, uvsrc_y,
-                                     s->h_edge_pos >> 1, v_edge_pos);
+            if (interlace) {
+                s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer,
+                                         srcU,
+                                         uvlinesize << 1,
+                                         uvlinesize << 1,
+                                         5,
+                                         (5 << fieldmv) + 1 >> 1,
+                                         uvsrc_x,
+                                         uvsrc_y >> 1,
+                                         s->h_edge_pos >> 1,
+                                         s->v_edge_pos >> 2);
+                s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + 16,
+                                         srcV,
+                                         uvlinesize << 1,
+                                         uvlinesize << 1,
+                                         5,
+                                         (5 << fieldmv) + 1 >> 1,
+                                         uvsrc_x,
+                                         uvsrc_y >> 1,
+                                         s->h_edge_pos >> 1,
+                                         s->v_edge_pos >> 2);
+                if (!fieldmv) {
+                    s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + uvlinesize,
+                                             srcU + uvlinesize,
+                                             uvlinesize << 1,
+                                             uvlinesize << 1,
+                                             5,
+                                             2,
+                                             uvsrc_x,
+                                             uvsrc_y + 1 >> 1,
+                                             s->h_edge_pos >> 1,
+                                             s->v_edge_pos >> 2);
+                    s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + 16 + uvlinesize,
+                                             srcV + uvlinesize,
+                                             uvlinesize << 1,
+                                             uvlinesize << 1,
+                                             5,
+                                             2,
+                                             uvsrc_x,
+                                             uvsrc_y + 1 >> 1,
+                                             s->h_edge_pos >> 1,
+                                             s->v_edge_pos >> 2);
+                }
+            } else {
+                s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer,
+                                         srcU,
+                                         uvlinesize,
+                                         uvlinesize,
+                                         5,
+                                         5 << fieldmv,
+                                         uvsrc_x,
+                                         uvsrc_y,
+                                         s->h_edge_pos >> 1,
+                                         s->v_edge_pos >> 1);
+                s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + 16,
+                                         srcV,
+                                         uvlinesize,
+                                         uvlinesize,
+                                         5,
+                                         5 << fieldmv,
+                                         uvsrc_x,
+                                         uvsrc_y,
+                                         s->h_edge_pos >> 1,
+                                         s->v_edge_pos >> 1);
+            }
             srcU = s->sc.edge_emu_buffer;
             srcV = s->sc.edge_emu_buffer + 16;
 
@@ -753,10 +1009,15 @@
     int dxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
     int v_edge_pos = s->v_edge_pos >> v->field_mode;
     int use_ic = v->next_use_ic;
+    int interlace;
+    int linesize, uvlinesize;
 
     if (!v->field_mode && !v->s.next_picture.f->data[0])
         return;
 
+    linesize = s->current_picture_ptr->f->linesize[0];
+    uvlinesize = s->current_picture_ptr->f->linesize[1];
+
     mx   = s->mv[1][0][0];
     my   = s->mv[1][0][1];
     uvmx = (mx + ((mx & 3) == 3)) >> 1;
@@ -773,6 +1034,8 @@
     srcU = s->next_picture.f->data[1];
     srcV = s->next_picture.f->data[2];
 
+    interlace = s->next_picture.f->interlaced_frame;
+
     src_x   = s->mb_x * 16 + (mx   >> 2);
     src_y   = s->mb_y * 16 + (my   >> 2);
     uvsrc_x = s->mb_x *  8 + (uvmx >> 2);
@@ -785,9 +1048,14 @@
         uvsrc_y = av_clip(uvsrc_y,  -8, s->mb_height *  8);
     } else {
         src_x   = av_clip(  src_x, -17, s->avctx->coded_width);
-        src_y   = av_clip(  src_y, -18, s->avctx->coded_height + 1);
         uvsrc_x = av_clip(uvsrc_x,  -8, s->avctx->coded_width  >> 1);
-        uvsrc_y = av_clip(uvsrc_y,  -8, s->avctx->coded_height >> 1);
+        if (v->fcm == ILACE_FRAME) {
+            src_y = av_clip(src_y, -18 + (src_y & 1), s->avctx->coded_height + (src_y & 1));
+            uvsrc_y = av_clip(uvsrc_y, -8 + (uvsrc_y & 1), (s->avctx->coded_height >> 1) + (uvsrc_y & 1));
+        } else {
+            src_y = av_clip(src_y, -18, s->avctx->coded_height + 1);
+            uvsrc_y = av_clip(uvsrc_y,  -8, s->avctx->coded_height >> 1);
+        }
     }
 
     srcY += src_y   * s->linesize   + src_x;
@@ -795,9 +1063,9 @@
     srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
 
     if (v->field_mode && v->ref_field_type[1]) {
-        srcY += s->current_picture_ptr->f->linesize[0];
-        srcU += s->current_picture_ptr->f->linesize[1];
-        srcV += s->current_picture_ptr->f->linesize[2];
+        srcY += linesize;
+        srcU += uvlinesize;
+        srcV += uvlinesize;
     }
 
     /* for grayscale we should not try to read from unknown area */
@@ -814,22 +1082,106 @@
         const int k = 17 + s->mspel * 2;
 
         srcY -= s->mspel * (1 + s->linesize);
-        s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, srcY,
-                                 s->linesize, s->linesize,
-                                 k, k,
-                                 src_x - s->mspel, src_y - s->mspel,
-                                 s->h_edge_pos, v_edge_pos);
+        if (interlace) {
+            s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer,
+                                     srcY,
+                                     linesize << 1,
+                                     linesize << 1,
+                                     k,
+                                     v->field_mode ? k : (k + 1 >> 1),
+                                     src_x - s->mspel,
+                                     src_y - s->mspel >> !v->field_mode,
+                                     s->h_edge_pos,
+                                     s->v_edge_pos >> 1);
+            if (!v->field_mode)
+                s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer + linesize,
+                                         srcY + linesize,
+                                         linesize << 1,
+                                         linesize << 1,
+                                         k,
+                                         k >> 1,
+                                         src_x - s->mspel,
+                                         src_y - s->mspel + 1 >> 1,
+                                         s->h_edge_pos,
+                                         s->v_edge_pos >> 1);
+        } else
+            s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer,
+                                     srcY,
+                                     linesize,
+                                     linesize,
+                                     k,
+                                     v->field_mode ? (k << 1) - 1 : k,
+                                     src_x - s->mspel,
+                                     v->field_mode ? 2 * (src_y - s->mspel) + v->ref_field_type[1] :
+                                                     src_y - s->mspel,
+                                     s->h_edge_pos,
+                                     s->v_edge_pos);
         srcY = s->sc.edge_emu_buffer;
-        s->vdsp.emulated_edge_mc(ubuf, srcU,
-                                 s->uvlinesize, s->uvlinesize,
-                                 8 + 1, 8 + 1,
-                                 uvsrc_x, uvsrc_y,
-                                 s->h_edge_pos >> 1, v_edge_pos >> 1);
-        s->vdsp.emulated_edge_mc(vbuf, srcV,
-                                 s->uvlinesize, s->uvlinesize,
-                                 8 + 1, 8 + 1,
-                                 uvsrc_x, uvsrc_y,
-                                 s->h_edge_pos >> 1, v_edge_pos >> 1);
+        if (interlace) {
+            s->vdsp.emulated_edge_mc(ubuf,
+                                     srcU,
+                                     uvlinesize << 1,
+                                     uvlinesize << 1,
+                                     9,
+                                     v->field_mode ? 9 : 5,
+                                     uvsrc_x,
+                                     uvsrc_y >> !v->field_mode,
+                                     s->h_edge_pos >> 1,
+                                     s->v_edge_pos >> 2);
+            s->vdsp.emulated_edge_mc(vbuf,
+                                     srcV,
+                                     uvlinesize << 1,
+                                     uvlinesize << 1,
+                                     9,
+                                     v->field_mode ? 9 : 5,
+                                     uvsrc_x,
+                                     uvsrc_y >> !v->field_mode,
+                                     s->h_edge_pos >> 1,
+                                     s->v_edge_pos >> 2);
+            if (!v->field_mode) {
+                s->vdsp.emulated_edge_mc(ubuf + uvlinesize,
+                                         srcU + uvlinesize,
+                                         uvlinesize << 1,
+                                         uvlinesize << 1,
+                                         9,
+                                         4,
+                                         uvsrc_x,
+                                         uvsrc_y + 1 >> 1,
+                                         s->h_edge_pos >> 1,
+                                         s->v_edge_pos >> 2);
+                s->vdsp.emulated_edge_mc(vbuf + uvlinesize,
+                                         srcV + uvlinesize,
+                                         uvlinesize << 1,
+                                         uvlinesize << 1,
+                                         9,
+                                         4,
+                                         uvsrc_x,
+                                         uvsrc_y + 1 >> 1,
+                                         s->h_edge_pos >> 1,
+                                         s->v_edge_pos >> 2);
+            }
+        } else {
+            s->vdsp.emulated_edge_mc(ubuf,
+                                     srcU,
+                                     uvlinesize,
+                                     uvlinesize,
+                                     9,
+                                     v->field_mode ? 17 : 9,
+                                     uvsrc_x,
+                                     v->field_mode ? 2 * uvsrc_y + v->ref_field_type[1] : uvsrc_y,
+                                     s->h_edge_pos >> 1,
+                                     s->v_edge_pos >> 1);
+            s->vdsp.emulated_edge_mc(vbuf,
+                                     srcV,
+                                     uvlinesize,
+                                     uvlinesize,
+                                     9,
+                                     v->field_mode ? 17 : 9,
+                                     uvsrc_x,
+                                     v->field_mode ? 2 * uvsrc_y + v->ref_field_type[1] : uvsrc_y,
+                                     s->h_edge_pos >> 1,
+                                     s->v_edge_pos >> 1);
+        }
         srcU = ubuf;
         srcV = vbuf;
         /* if we deal with range reduction we need to scale source blocks */

diff --git a/libavcodec/vc1_pred.c b/libavcodec/vc1_pred.c
index 54712f6..de736ec 100644
--- a/libavcodec/vc1_pred.c
+++ b/libavcodec/vc1_pred.c

@@ -254,7 +254,7 @@
             v->luma_mv[s->mb_x][0] = v->luma_mv[s->mb_x][1] = 0;
             s->current_picture.motion_val[1][xy + 1 + v->blocks_off][0]        = 0;
             s->current_picture.motion_val[1][xy + 1 + v->blocks_off][1]        = 0;
-            s->current_picture.motion_val[1][xy + wrap][0]                     = 0;
+            s->current_picture.motion_val[1][xy + wrap + v->blocks_off][0]     = 0;
             s->current_picture.motion_val[1][xy + wrap + v->blocks_off][1]     = 0;
             s->current_picture.motion_val[1][xy + wrap + 1 + v->blocks_off][0] = 0;
             s->current_picture.motion_val[1][xy + wrap + 1 + v->blocks_off][1] = 0;
@@ -341,6 +341,8 @@
     } else
         opposite = 0;
     if (opposite) {
+        v->mv_f[dir][xy + v->blocks_off] = 1;
+        v->ref_field_type[dir] = !v->cur_field_type;
         if (a_valid && !a_f) {
             field_predA[0] = scaleforopp(v, field_predA[0], 0, dir);
             field_predA[1] = scaleforopp(v, field_predA[1], 1, dir);
@@ -353,9 +355,9 @@
             field_predC[0] = scaleforopp(v, field_predC[0], 0, dir);
             field_predC[1] = scaleforopp(v, field_predC[1], 1, dir);
         }
-        v->mv_f[dir][xy + v->blocks_off] = 1;
-        v->ref_field_type[dir] = !v->cur_field_type;
     } else {
+        v->mv_f[dir][xy + v->blocks_off] = 0;
+        v->ref_field_type[dir] = v->cur_field_type;
         if (a_valid && a_f) {
             field_predA[0] = scaleforsame(v, n, field_predA[0], 0, dir);
             field_predA[1] = scaleforsame(v, n, field_predA[1], 1, dir);
@@ -368,8 +370,6 @@
             field_predC[0] = scaleforsame(v, n, field_predC[0], 0, dir);
             field_predC[1] = scaleforsame(v, n, field_predC[1], 1, dir);
         }
-        v->mv_f[dir][xy + v->blocks_off] = 0;
-        v->ref_field_type[dir] = v->cur_field_type;
     }
 
     if (a_valid) {

diff --git a/libavcodec/vc1data.c b/libavcodec/vc1data.c
index fc9ba6d..19f1cad 100644
--- a/libavcodec/vc1data.c
+++ b/libavcodec/vc1data.c

@@ -61,7 +61,7 @@
         { MV_PMODE_INTFR_1MV      , 1, 0, 1 },
         { MV_PMODE_INTFR_2MV_FIELD, 0, 0, 1 },
         { MV_PMODE_INTFR_2MV_FIELD, 1, 0, 1 },
-        { MV_PMODE_INTFR_2MV_FIELD, 0, 0, 0 },
+        { MV_PMODE_INTFR_2MV_FIELD, 1, 0, 0 },
         { MV_PMODE_INTFR_INTRA    , 0, 0, 0 }
     },
     {
@@ -73,13 +73,13 @@
         { MV_PMODE_INTFR_1MV      , 1, 0, 1 },
         { MV_PMODE_INTFR_2MV_FIELD, 0, 0, 1 },
         { MV_PMODE_INTFR_2MV_FIELD, 1, 0, 1 },
-        { MV_PMODE_INTFR_2MV_FIELD, 0, 0, 0 },
+        { MV_PMODE_INTFR_2MV_FIELD, 1, 0, 0 },
         { MV_PMODE_INTFR_4MV      , 0, 0, 1 },
         { MV_PMODE_INTFR_4MV      , 1, 0, 1 },
         { MV_PMODE_INTFR_4MV      , 0, 0, 0 },
         { MV_PMODE_INTFR_4MV_FIELD, 0, 0, 1 },
         { MV_PMODE_INTFR_4MV_FIELD, 1, 0, 1 },
-        { MV_PMODE_INTFR_4MV_FIELD, 0, 0, 0 },
+        { MV_PMODE_INTFR_4MV_FIELD, 1, 0, 0 },
         { MV_PMODE_INTFR_INTRA    , 0, 0, 0 }
     }
 };
@@ -1090,7 +1090,7 @@
      0x1F08,  0x1E1E,  0x1D42,  0x1C72, 0x1BAD, 0x1AF3, 0x1A42, 0x199A,
      0x18FA,  0x1862,  0x17D0,  0x1746, 0x16C1, 0x1643, 0x15CA, 0x1555,
      0x14E6,  0x147B,  0x1414,  0x13B1, 0x1352, 0x12F7, 0x129E, 0x1249,
-     0x11F7,  0x11A8,  0x115B,  0x1111, 0x10C9, 0x1084, 0x1000
+     0x11F7,  0x11A8,  0x115B,  0x1111, 0x10C9, 0x1084, 0x1041
 };
 
 /* P Interlaced field picture MV predictor scaling values (Table 114) */

diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 16c601e..9519864 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c

@@ -29,6 +29,7 @@
 #include "avcodec.h"
 #include "blockdsp.h"
 #include "get_bits.h"
+#include "hwaccel.h"
 #include "internal.h"
 #include "mpeg_er.h"
 #include "mpegvideo.h"
@@ -37,7 +38,6 @@
 #include "profiles.h"
 #include "vc1.h"
 #include "vc1data.h"
-#include "vdpau_compat.h"
 #include "libavutil/avassert.h"
 
 
@@ -340,22 +340,22 @@
 
     v->n_allocated_blks = s->mb_width + 2;
     v->block            = av_malloc(sizeof(*v->block) * v->n_allocated_blks);
-    v->cbp_base         = av_malloc(sizeof(v->cbp_base[0]) * 2 * s->mb_stride);
+    v->cbp_base         = av_malloc(sizeof(v->cbp_base[0]) * 3 * s->mb_stride);
     if (!v->block || !v->cbp_base)
         goto error;
-    v->cbp              = v->cbp_base + s->mb_stride;
-    v->ttblk_base       = av_malloc(sizeof(v->ttblk_base[0]) * 2 * s->mb_stride);
+    v->cbp              = v->cbp_base + 2 * s->mb_stride;
+    v->ttblk_base       = av_malloc(sizeof(v->ttblk_base[0]) * 3 * s->mb_stride);
     if (!v->ttblk_base)
         goto error;
-    v->ttblk            = v->ttblk_base + s->mb_stride;
-    v->is_intra_base    = av_mallocz(sizeof(v->is_intra_base[0]) * 2 * s->mb_stride);
+    v->ttblk            = v->ttblk_base + 2 * s->mb_stride;
+    v->is_intra_base    = av_mallocz(sizeof(v->is_intra_base[0]) * 3 * s->mb_stride);
     if (!v->is_intra_base)
         goto error;
-    v->is_intra         = v->is_intra_base + s->mb_stride;
-    v->luma_mv_base     = av_mallocz(sizeof(v->luma_mv_base[0]) * 2 * s->mb_stride);
+    v->is_intra         = v->is_intra_base + 2 * s->mb_stride;
+    v->luma_mv_base     = av_mallocz(sizeof(v->luma_mv_base[0]) * 3 * s->mb_stride);
     if (!v->luma_mv_base)
         goto error;
-    v->luma_mv          = v->luma_mv_base + s->mb_stride;
+    v->luma_mv          = v->luma_mv_base + 2 * s->mb_stride;
 
     /* allocate block type info in that way so it could be used with s->block_index[] */
     v->mb_type_base = av_malloc(s->b8_stride * (mb_height * 2 + 1) + s->mb_stride * (mb_height + 1) * 2);
@@ -657,15 +657,6 @@
         return buf_size;
     }
 
-#if FF_API_CAP_VDPAU
-    if (s->avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU) {
-        if (v->profile < PROFILE_ADVANCED)
-            avctx->pix_fmt = AV_PIX_FMT_VDPAU_WMV3;
-        else
-            avctx->pix_fmt = AV_PIX_FMT_VDPAU_VC1;
-    }
-#endif
-
     //for advanced profile we may need to parse and unescape data
     if (avctx->codec_id == AV_CODEC_ID_VC1 || avctx->codec_id == AV_CODEC_ID_VC1IMAGE) {
         int buf_size2 = 0;
@@ -684,21 +675,13 @@
                 if (size <= 0) continue;
                 switch (AV_RB32(start)) {
                 case VC1_CODE_FRAME:
-                    if (avctx->hwaccel
-#if FF_API_CAP_VDPAU
-                        || s->avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU
-#endif
-                        )
+                    if (avctx->hwaccel)
                         buf_start = start;
                     buf_size2 = vc1_unescape_buffer(start + 4, size, buf2);
                     break;
                 case VC1_CODE_FIELD: {
                     int buf_size3;
-                    if (avctx->hwaccel
-#if FF_API_CAP_VDPAU
-                        || s->avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU
-#endif
-                        )
+                    if (avctx->hwaccel)
                         buf_start_second_field = start;
                     tmp = av_realloc_array(slices, sizeof(*slices), (n_slices+1));
                     if (!tmp) {
@@ -715,9 +698,7 @@
                                                     slices[n_slices].buf);
                     init_get_bits(&slices[n_slices].gb, slices[n_slices].buf,
                                   buf_size3 << 3);
-                    /* assuming that the field marker is at the exact middle,
-                       hope it's correct */
-                    slices[n_slices].mby_start = s->mb_height + 1 >> 1;
+                    slices[n_slices].mby_start = avctx->coded_height + 31 >> 5;
                     slices[n_slices].rawbuf = start;
                     slices[n_slices].raw_size = size + 4;
                     n_slices1 = n_slices - 1; // index of the last slice of the first field
@@ -764,11 +745,7 @@
                 ret = AVERROR_INVALIDDATA;
                 goto err;
             } else { // found field marker, unescape second field
-                if (avctx->hwaccel
-#if FF_API_CAP_VDPAU
-                    || s->avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU
-#endif
-                    )
+                if (avctx->hwaccel)
                     buf_start_second_field = divider;
                 tmp = av_realloc_array(slices, sizeof(*slices), (n_slices+1));
                 if (!tmp) {
@@ -917,17 +894,6 @@
     s->me.qpel_put = s->qdsp.put_qpel_pixels_tab;
     s->me.qpel_avg = s->qdsp.avg_qpel_pixels_tab;
 
-#if FF_API_CAP_VDPAU
-    if ((CONFIG_VC1_VDPAU_DECODER)
-        &&s->avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU) {
-        if (v->field_mode && buf_start_second_field) {
-            ff_vdpau_vc1_decode_picture(s, buf_start, buf_start_second_field - buf_start);
-            ff_vdpau_vc1_decode_picture(s, buf_start_second_field, (buf + buf_size) - buf_start_second_field);
-        } else {
-            ff_vdpau_vc1_decode_picture(s, buf_start, (buf + buf_size) - buf_start);
-        }
-    } else
-#endif
     if (avctx->hwaccel) {
         s->mb_y = 0;
         if (v->field_mode && buf_start_second_field) {
@@ -935,13 +901,41 @@
             s->picture_structure = PICT_BOTTOM_FIELD - v->tff;
             if ((ret = avctx->hwaccel->start_frame(avctx, buf_start, buf_start_second_field - buf_start)) < 0)
                 goto err;
-            if ((ret = avctx->hwaccel->decode_slice(avctx, buf_start, buf_start_second_field - buf_start)) < 0)
-                goto err;
+
+            if (n_slices1 == -1) {
+                // no slices, decode the field as-is
+                if ((ret = avctx->hwaccel->decode_slice(avctx, buf_start, buf_start_second_field - buf_start)) < 0)
+                    goto err;
+            } else {
+                if ((ret = avctx->hwaccel->decode_slice(avctx, buf_start, slices[0].rawbuf - buf_start)) < 0)
+                    goto err;
+
+                for (i = 0 ; i < n_slices1 + 1; i++) {
+                    s->gb = slices[i].gb;
+                    s->mb_y = slices[i].mby_start;
+
+                    v->pic_header_flag = get_bits1(&s->gb);
+                    if (v->pic_header_flag) {
+                        if (ff_vc1_parse_frame_header_adv(v, &s->gb) < 0) {
+                            av_log(v->s.avctx, AV_LOG_ERROR, "Slice header damaged\n");
+                            ret = AVERROR_INVALIDDATA;
+                            if (avctx->err_recognition & AV_EF_EXPLODE)
+                                goto err;
+                            continue;
+                        }
+                    }
+
+                    if ((ret = avctx->hwaccel->decode_slice(avctx, slices[i].rawbuf, slices[i].raw_size)) < 0)
+                        goto err;
+                }
+            }
+
             if ((ret = avctx->hwaccel->end_frame(avctx)) < 0)
                 goto err;
 
             // decode second field
             s->gb = slices[n_slices1 + 1].gb;
+            s->mb_y = slices[n_slices1 + 1].mby_start;
             s->picture_structure = PICT_TOP_FIELD + v->tff;
             v->second_field = 1;
             v->pic_header_flag = 0;
@@ -954,8 +948,35 @@
 
             if ((ret = avctx->hwaccel->start_frame(avctx, buf_start_second_field, (buf + buf_size) - buf_start_second_field)) < 0)
                 goto err;
-            if ((ret = avctx->hwaccel->decode_slice(avctx, buf_start_second_field, (buf + buf_size) - buf_start_second_field)) < 0)
-                goto err;
+
+            if (n_slices - n_slices1 == 2) {
+                // no slices, decode the field as-is
+                if ((ret = avctx->hwaccel->decode_slice(avctx, buf_start_second_field, (buf + buf_size) - buf_start_second_field)) < 0)
+                    goto err;
+            } else {
+                if ((ret = avctx->hwaccel->decode_slice(avctx, buf_start_second_field, slices[n_slices1 + 2].rawbuf - buf_start_second_field)) < 0)
+                    goto err;
+
+                for (i = n_slices1 + 2; i < n_slices; i++) {
+                    s->gb = slices[i].gb;
+                    s->mb_y = slices[i].mby_start;
+
+                    v->pic_header_flag = get_bits1(&s->gb);
+                    if (v->pic_header_flag) {
+                        if (ff_vc1_parse_frame_header_adv(v, &s->gb) < 0) {
+                            av_log(v->s.avctx, AV_LOG_ERROR, "Slice header damaged\n");
+                            ret = AVERROR_INVALIDDATA;
+                            if (avctx->err_recognition & AV_EF_EXPLODE)
+                                goto err;
+                            continue;
+                        }
+                    }
+
+                    if ((ret = avctx->hwaccel->decode_slice(avctx, slices[i].rawbuf, slices[i].raw_size)) < 0)
+                        goto err;
+                }
+            }
+
             if ((ret = avctx->hwaccel->end_frame(avctx)) < 0)
                 goto err;
         } else {
@@ -1061,13 +1082,15 @@
                     av_log(v->s.avctx, AV_LOG_ERROR, "first field slice count too large\n");
                     continue;
                 }
-                s->end_mb_y = (i <= n_slices1 + 1) ? mb_height : FFMIN(mb_height, slices[i].mby_start % mb_height);
+                s->end_mb_y = (i == n_slices1 + 1) ? mb_height : FFMIN(mb_height, slices[i].mby_start % mb_height);
             }
             if (s->end_mb_y <= s->start_mb_y) {
                 av_log(v->s.avctx, AV_LOG_ERROR, "end mb y %d %d invalid\n", s->end_mb_y, s->start_mb_y);
                 continue;
             }
-            if (!v->p_frame_skipped && s->pict_type != AV_PICTURE_TYPE_I && !v->cbpcy_vlc) {
+            if (((s->pict_type == AV_PICTURE_TYPE_P && !v->p_frame_skipped) ||
+                 (s->pict_type == AV_PICTURE_TYPE_B && !v->bi_type)) &&
+                !v->cbpcy_vlc) {
                 av_log(v->s.avctx, AV_LOG_ERROR, "missing cbpcy_vlc\n");
                 continue;
             }
@@ -1152,6 +1175,9 @@
     AV_PIX_FMT_D3D11VA_VLD,
     AV_PIX_FMT_D3D11,
 #endif
+#if CONFIG_VC1_NVDEC_HWACCEL
+    AV_PIX_FMT_CUDA,
+#endif
 #if CONFIG_VC1_VAAPI_HWACCEL
     AV_PIX_FMT_VAAPI,
 #endif
@@ -1174,6 +1200,27 @@
     .flush          = ff_mpeg_flush,
     .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .pix_fmts       = vc1_hwaccel_pixfmt_list_420,
+    .hw_configs     = (const AVCodecHWConfigInternal*[]) {
+#if CONFIG_VC1_DXVA2_HWACCEL
+                        HWACCEL_DXVA2(vc1),
+#endif
+#if CONFIG_VC1_D3D11VA_HWACCEL
+                        HWACCEL_D3D11VA(vc1),
+#endif
+#if CONFIG_VC1_D3D11VA2_HWACCEL
+                        HWACCEL_D3D11VA2(vc1),
+#endif
+#if CONFIG_VC1_NVDEC_HWACCEL
+                        HWACCEL_NVDEC(vc1),
+#endif
+#if CONFIG_VC1_VAAPI_HWACCEL
+                        HWACCEL_VAAPI(vc1),
+#endif
+#if CONFIG_VC1_VDPAU_HWACCEL
+                        HWACCEL_VDPAU(vc1),
+#endif
+                        NULL
+                    },
     .profiles       = NULL_IF_CONFIG_SMALL(ff_vc1_profiles)
 };
 
@@ -1190,38 +1237,27 @@
     .flush          = ff_mpeg_flush,
     .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .pix_fmts       = vc1_hwaccel_pixfmt_list_420,
-    .profiles       = NULL_IF_CONFIG_SMALL(ff_vc1_profiles)
-};
+    .hw_configs     = (const AVCodecHWConfigInternal*[]) {
+#if CONFIG_WMV3_DXVA2_HWACCEL
+                        HWACCEL_DXVA2(wmv3),
 #endif
-
-#if CONFIG_WMV3_VDPAU_DECODER && FF_API_VDPAU
-AVCodec ff_wmv3_vdpau_decoder = {
-    .name           = "wmv3_vdpau",
-    .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 9 VDPAU"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_WMV3,
-    .priv_data_size = sizeof(VC1Context),
-    .init           = vc1_decode_init,
-    .close          = ff_vc1_decode_end,
-    .decode         = vc1_decode_frame,
-    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HWACCEL_VDPAU,
-    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_VDPAU_WMV3, AV_PIX_FMT_NONE },
-    .profiles       = NULL_IF_CONFIG_SMALL(ff_vc1_profiles)
-};
+#if CONFIG_WMV3_D3D11VA_HWACCEL
+                        HWACCEL_D3D11VA(wmv3),
 #endif
-
-#if CONFIG_VC1_VDPAU_DECODER && FF_API_VDPAU
-AVCodec ff_vc1_vdpau_decoder = {
-    .name           = "vc1_vdpau",
-    .long_name      = NULL_IF_CONFIG_SMALL("SMPTE VC-1 VDPAU"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_VC1,
-    .priv_data_size = sizeof(VC1Context),
-    .init           = vc1_decode_init,
-    .close          = ff_vc1_decode_end,
-    .decode         = vc1_decode_frame,
-    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HWACCEL_VDPAU,
-    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_VDPAU_VC1, AV_PIX_FMT_NONE },
+#if CONFIG_WMV3_D3D11VA2_HWACCEL
+                        HWACCEL_D3D11VA2(wmv3),
+#endif
+#if CONFIG_WMV3_NVDEC_HWACCEL
+                        HWACCEL_NVDEC(wmv3),
+#endif
+#if CONFIG_WMV3_VAAPI_HWACCEL
+                        HWACCEL_VAAPI(wmv3),
+#endif
+#if CONFIG_WMV3_VDPAU_HWACCEL
+                        HWACCEL_VDPAU(wmv3),
+#endif
+                        NULL
+                    },
     .profiles       = NULL_IF_CONFIG_SMALL(ff_vc1_profiles)
 };
 #endif

diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c
index 9239a4a..778b811 100644
--- a/libavcodec/vc1dsp.c
+++ b/libavcodec/vc1dsp.c

@@ -107,12 +107,13 @@
     }
 }
 
-static void vc1_h_s_overlap_c(int16_t *left, int16_t *right)
+static void vc1_h_s_overlap_c(int16_t *left, int16_t *right, int left_stride, int right_stride, int flags)
 {
     int i;
     int a, b, c, d;
     int d1, d2;
-    int rnd1 = 4, rnd2 = 3;
+    int rnd1 = flags & 2 ? 3 : 4;
+    int rnd2 = 7 - rnd1;
     for (i = 0; i < 8; i++) {
         a  = left[6];
         b  = left[7];
@@ -126,10 +127,12 @@
         right[0] = ((c << 3) + d2 + rnd1) >> 3;
         right[1] = ((d << 3) + d1 + rnd2) >> 3;
 
-        right += 8;
-        left  += 8;
-        rnd2   = 7 - rnd2;
-        rnd1   = 7 - rnd1;
+        right += right_stride;
+        left  += left_stride;
+        if (flags & 1) {
+            rnd2   = 7 - rnd2;
+            rnd1   = 7 - rnd1;
+        }
     }
 }
 

diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h
index 16b3528..75db62b 100644
--- a/libavcodec/vc1dsp.h
+++ b/libavcodec/vc1dsp.h

@@ -45,7 +45,7 @@
     void (*vc1_v_overlap)(uint8_t *src, int stride);
     void (*vc1_h_overlap)(uint8_t *src, int stride);
     void (*vc1_v_s_overlap)(int16_t *top,  int16_t *bottom);
-    void (*vc1_h_s_overlap)(int16_t *left, int16_t *right);
+    void (*vc1_h_s_overlap)(int16_t *left, int16_t *right, int left_stride, int right_stride, int flags);
     void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq);
     void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq);
     void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq);

diff --git a/libavcodec/vc2enc.c b/libavcodec/vc2enc.c
index cdc7d42..d0101e0 100644
--- a/libavcodec/vc2enc.c
+++ b/libavcodec/vc2enc.c

@@ -29,10 +29,6 @@
 #include "vc2enc_dwt.h"
 #include "diractab.h"
 
-/* Total range is -COEF_LUT_TAB to +COEFF_LUT_TAB, but total tab size is half
- * (COEF_LUT_TAB*DIRAC_MAX_QUANT_INDEX), as the sign is appended during encoding */
-#define COEF_LUT_TAB 2048
-
 /* The limited size resolution of each slice forces us to do this */
 #define SSIZE_ROUND(b) (FFALIGN((b), s->size_scaler) + 4 + s->prefix_bytes)
 
@@ -152,9 +148,8 @@
     uint8_t quant[MAX_DWT_LEVELS][4];
     int custom_quant_matrix;
 
-    /* Coefficient LUT */
-    uint32_t *coef_lut_val;
-    uint8_t  *coef_lut_len;
+    /* Division LUT */
+    uint32_t qmagic_lut[116][2];
 
     int num_x; /* #slices horizontally */
     int num_y; /* #slices vertically */
@@ -164,6 +159,7 @@
     int chroma_y_shift;
 
     /* Rate control stuff */
+    int frame_max_bytes;
     int slice_max_bytes;
     int slice_min_bytes;
     int q_ceil;
@@ -228,37 +224,6 @@
     return ff_log2(topbit)*2 + 1;
 }
 
-static av_always_inline void get_vc2_ue_uint(int val, uint8_t *nbits,
-                                             uint32_t *eval)
-{
-    int i;
-    int pbits = 0, bits = 0, topbit = 1, maxval = 1;
-
-    if (!val++) {
-        *nbits = 1;
-        *eval = 1;
-        return;
-    }
-
-    while (val > maxval) {
-        topbit <<= 1;
-        maxval <<= 1;
-        maxval |=  1;
-    }
-
-    bits = ff_log2(topbit);
-
-    for (i = 0; i < bits; i++) {
-        topbit >>= 1;
-        pbits <<= 2;
-        if (val & topbit)
-            pbits |= 0x1;
-    }
-
-    *nbits = bits*2 + 1;
-    *eval = (pbits << 1) | 1;
-}
-
 /* VC-2 10.4 - parse_info() */
 static void encode_parse_info(VC2EncContext *s, enum DiracParseCodes pcode)
 {
@@ -556,7 +521,7 @@
     encode_wavelet_transform(s);
 }
 
-#define QUANT(c, qf) (((c) << 2)/(qf))
+#define QUANT(c, mul, add, shift) (((mul) * (c) + (add)) >> (shift))
 
 /* VC-2 13.5.5.2 - slice_band() */
 static void encode_subband(VC2EncContext *s, PutBitContext *pb, int sx, int sy,
@@ -569,24 +534,17 @@
     const int top    = b->height * (sy+0) / s->num_y;
     const int bottom = b->height * (sy+1) / s->num_y;
 
-    const int qfactor = ff_dirac_qscale_tab[quant];
-    const uint8_t  *len_lut = &s->coef_lut_len[quant*COEF_LUT_TAB];
-    const uint32_t *val_lut = &s->coef_lut_val[quant*COEF_LUT_TAB];
-
     dwtcoef *coeff = b->buf + top * b->stride;
+    const uint64_t q_m = ((uint64_t)(s->qmagic_lut[quant][0])) << 2;
+    const uint64_t q_a = s->qmagic_lut[quant][1];
+    const int q_s = av_log2(ff_dirac_qscale_tab[quant]) + 32;
 
     for (y = top; y < bottom; y++) {
         for (x = left; x < right; x++) {
-            const int neg = coeff[x] < 0;
-            uint32_t c_abs = FFABS(coeff[x]);
-            if (c_abs < COEF_LUT_TAB) {
-                put_bits(pb, len_lut[c_abs], val_lut[c_abs] | neg);
-            } else {
-                c_abs = QUANT(c_abs, qfactor);
-                put_vc2_ue_uint(pb, c_abs);
-                if (c_abs)
-                    put_bits(pb, 1, neg);
-            }
+            uint32_t c_abs = QUANT(FFABS(coeff[x]), q_m, q_a, q_s);
+            put_vc2_ue_uint(pb, c_abs);
+            if (c_abs)
+                put_bits(pb, 1, coeff[x] < 0);
         }
         coeff += b->stride;
     }
@@ -618,8 +576,9 @@
                 SubBand *b = &s->plane[p].band[level][orientation];
 
                 const int q_idx = quants[level][orientation];
-                const uint8_t *len_lut = &s->coef_lut_len[q_idx*COEF_LUT_TAB];
-                const int qfactor = ff_dirac_qscale_tab[q_idx];
+                const uint64_t q_m = ((uint64_t)s->qmagic_lut[q_idx][0]) << 2;
+                const uint64_t q_a = s->qmagic_lut[q_idx][1];
+                const int q_s = av_log2(ff_dirac_qscale_tab[q_idx]) + 32;
 
                 const int left   = b->width  * slice->x    / s->num_x;
                 const int right  = b->width  *(slice->x+1) / s->num_x;
@@ -630,14 +589,9 @@
 
                 for (y = top; y < bottom; y++) {
                     for (x = left; x < right; x++) {
-                        uint32_t c_abs = FFABS(buf[x]);
-                        if (c_abs < COEF_LUT_TAB) {
-                            bits += len_lut[c_abs];
-                        } else {
-                            c_abs = QUANT(c_abs, qfactor);
-                            bits += count_vc2_ue_uint(c_abs);
-                            bits += !!c_abs;
-                        }
+                        uint32_t c_abs = QUANT(FFABS(buf[x]), q_m, q_a, q_s);
+                        bits += count_vc2_ue_uint(c_abs);
+                        bits += !!c_abs;
                     }
                     buf += b->stride;
                 }
@@ -715,7 +669,7 @@
 
     for (i = 0; i < s->num_x*s->num_y; i++) {
         SliceArgs *args = &enc_args[i];
-        bytes_left += s->slice_max_bytes - args->bytes;
+        bytes_left += args->bytes;
         for (j = 0; j < slice_redist_range; j++) {
             if (args->bytes > bytes_top[j]) {
                 bytes_top[j] = args->bytes;
@@ -725,8 +679,10 @@
         }
     }
 
+    bytes_left = s->frame_max_bytes - bytes_left;
+
     /* Second pass - distribute leftover bytes */
-    while (1) {
+    while (bytes_left > 0) {
         int distributed = 0;
         for (i = 0; i < slice_redist_range; i++) {
             SliceArgs *args;
@@ -994,13 +950,13 @@
                                       const AVFrame *frame, int *got_packet)
 {
     int ret = 0;
-    int sig_size = 256;
+    int slice_ceil, sig_size = 256;
     VC2EncContext *s = avctx->priv_data;
     const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT;
     const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT;
     const int aux_data_size = bitexact ? sizeof("Lavc") : sizeof(LIBAVCODEC_IDENT);
     const int header_size = 100 + aux_data_size;
-    int64_t max_frame_bytes, r_bitrate = avctx->bit_rate >> (s->interlaced);
+    int64_t r_bitrate = avctx->bit_rate >> (s->interlaced);
 
     s->avctx = avctx;
     s->size_scaler = 2;
@@ -1009,18 +965,21 @@
     s->next_parse_offset = 0;
 
     /* Rate control */
-    max_frame_bytes = (av_rescale(r_bitrate, s->avctx->time_base.num,
-                                  s->avctx->time_base.den) >> 3) - header_size;
-    s->slice_max_bytes = av_rescale(max_frame_bytes, 1, s->num_x*s->num_y);
+    s->frame_max_bytes = (av_rescale(r_bitrate, s->avctx->time_base.num,
+                                     s->avctx->time_base.den) >> 3) - header_size;
+    s->slice_max_bytes = slice_ceil = av_rescale(s->frame_max_bytes, 1, s->num_x*s->num_y);
 
     /* Find an appropriate size scaler */
     while (sig_size > 255) {
         int r_size = SSIZE_ROUND(s->slice_max_bytes);
+        if (r_size > slice_ceil) {
+            s->slice_max_bytes -= r_size - slice_ceil;
+            r_size = SSIZE_ROUND(s->slice_max_bytes);
+        }
         sig_size = r_size/s->size_scaler; /* Signalled slize size */
         s->size_scaler <<= 1;
     }
 
-    s->slice_max_bytes = SSIZE_ROUND(s->slice_max_bytes);
     s->slice_min_bytes = s->slice_max_bytes - s->slice_max_bytes*(s->tolerance/100.0f);
 
     ret = encode_frame(s, avpkt, frame, aux_data, header_size, s->interlaced);
@@ -1053,8 +1012,6 @@
     }
 
     av_freep(&s->slice_args);
-    av_freep(&s->coef_lut_len);
-    av_freep(&s->coef_lut_val);
 
     return 0;
 }
@@ -1063,7 +1020,7 @@
 {
     Plane *p;
     SubBand *b;
-    int i, j, level, o, shift;
+    int i, level, o, shift, ret;
     const AVPixFmtDescriptor *fmt = av_pix_fmt_desc_get(avctx->pix_fmt);
     const int depth = fmt->comp[0].depth;
     VC2EncContext *s = avctx->priv_data;
@@ -1138,7 +1095,9 @@
     }
 
     /* Chroma subsampling */
-    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
+    ret = av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
+    if (ret)
+        return ret;
 
     /* Bit depth and color range index */
     if (depth == 8 && avctx->color_range == AVCOL_RANGE_JPEG) {
@@ -1203,27 +1162,20 @@
     if (!s->slice_args)
         goto alloc_fail;
 
-    /* Lookup tables */
-    s->coef_lut_len = av_malloc(COEF_LUT_TAB*(s->q_ceil+1)*sizeof(*s->coef_lut_len));
-    if (!s->coef_lut_len)
-        goto alloc_fail;
-
-    s->coef_lut_val = av_malloc(COEF_LUT_TAB*(s->q_ceil+1)*sizeof(*s->coef_lut_val));
-    if (!s->coef_lut_val)
-        goto alloc_fail;
-
-    for (i = 0; i < s->q_ceil; i++) {
-        uint8_t  *len_lut = &s->coef_lut_len[i*COEF_LUT_TAB];
-        uint32_t *val_lut = &s->coef_lut_val[i*COEF_LUT_TAB];
-        for (j = 0; j < COEF_LUT_TAB; j++) {
-            get_vc2_ue_uint(QUANT(j, ff_dirac_qscale_tab[i]),
-                            &len_lut[j], &val_lut[j]);
-            if (len_lut[j] != 1) {
-                len_lut[j] += 1;
-                val_lut[j] <<= 1;
-            } else {
-                val_lut[j] = 1;
-            }
+    for (i = 0; i < 116; i++) {
+        const uint64_t qf = ff_dirac_qscale_tab[i];
+        const uint32_t m = av_log2(qf);
+        const uint32_t t = (1ULL << (m + 32)) / qf;
+        const uint32_t r = (t*qf + qf) & UINT32_MAX;
+        if (!(qf & (qf - 1))) {
+            s->qmagic_lut[i][0] = 0xFFFFFFFF;
+            s->qmagic_lut[i][1] = 0xFFFFFFFF;
+        } else if (r <= 1 << m) {
+            s->qmagic_lut[i][0] = t + 1;
+            s->qmagic_lut[i][1] = 0;
+        } else {
+            s->qmagic_lut[i][0] = t;
+            s->qmagic_lut[i][1] = t;
         }
     }
 

diff --git a/libavcodec/vda.c b/libavcodec/vda.c
deleted file mode 100644
index 819ae03..0000000
--- a/libavcodec/vda.c
+++ /dev/null

@@ -1,84 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "config.h"
-
-#include "libavutil/mem.h"
-
-#include "vda.h"
-#include "vda_vt_internal.h"
-
-#if CONFIG_H264_VDA_HWACCEL
-AVVDAContext *av_vda_alloc_context(void)
-{
-    AVVDAContext *ret = av_mallocz(sizeof(*ret));
-
-    if (ret) {
-        ret->output_callback = ff_vda_output_callback;
-        ret->cv_pix_fmt_type = kCVPixelFormatType_422YpCbCr8;
-    }
-
-    return ret;
-}
-
-int av_vda_default_init(AVCodecContext *avctx)
-{
-    return av_vda_default_init2(avctx, NULL);
-}
-
-int av_vda_default_init2(AVCodecContext *avctx, AVVDAContext *vdactx)
-{
-    avctx->hwaccel_context = vdactx ?: av_vda_alloc_context();
-    if (!avctx->hwaccel_context)
-        return AVERROR(ENOMEM);
-    return ff_vda_default_init(avctx);
-}
-
-void av_vda_default_free(AVCodecContext *avctx)
-{
-    ff_vda_default_free(avctx);
-    av_freep(&avctx->hwaccel_context);
-}
-
-void ff_vda_default_free(AVCodecContext *avctx)
-{
-    AVVDAContext *vda = avctx->hwaccel_context;
-    if (vda && vda->decoder)
-        VDADecoderDestroy(vda->decoder);
-}
-
-#else
-AVVDAContext *av_vda_alloc_context(void)
-{
-    return NULL;
-}
-
-int av_vda_default_init(AVCodecContext *avctx)
-{
-    return AVERROR(ENOSYS);
-}
-
-int av_vda_default_init2(AVCodecContext *avctx, AVVDAContext *vdactx)
-{
-    return AVERROR(ENOSYS);
-}
-
-void av_vda_default_free(AVCodecContext *ctx)
-{
-}
-#endif

diff --git a/libavcodec/vda.h b/libavcodec/vda.h
deleted file mode 100644
index bde14e3..0000000
--- a/libavcodec/vda.h
+++ /dev/null

@@ -1,230 +0,0 @@
-/*
- * VDA HW acceleration
- *
- * copyright (c) 2011 Sebastien Zwickert
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_VDA_H
-#define AVCODEC_VDA_H
-
-/**
- * @file
- * @ingroup lavc_codec_hwaccel_vda
- * Public libavcodec VDA header.
- */
-
-#include "libavcodec/avcodec.h"
-
-#include <stdint.h>
-
-// emmintrin.h is unable to compile with -std=c99 -Werror=missing-prototypes
-// http://openradar.appspot.com/8026390
-#undef __GNUC_STDC_INLINE__
-
-#define Picture QuickdrawPicture
-#include <VideoDecodeAcceleration/VDADecoder.h>
-#undef Picture
-
-#include "libavcodec/version.h"
-
-// extra flags not defined in VDADecoder.h
-enum {
-    kVDADecodeInfo_Asynchronous = 1UL << 0,
-    kVDADecodeInfo_FrameDropped = 1UL << 1
-};
-
-/**
- * @defgroup lavc_codec_hwaccel_vda VDA
- * @ingroup lavc_codec_hwaccel
- *
- * @{
- */
-
-/**
- * This structure is used to provide the necessary configurations and data
- * to the VDA FFmpeg HWAccel implementation.
- *
- * The application must make it available as AVCodecContext.hwaccel_context.
- */
-struct vda_context {
-    /**
-     * VDA decoder object.
-     *
-     * - encoding: unused
-     * - decoding: Set/Unset by libavcodec.
-     */
-    VDADecoder          decoder;
-
-    /**
-     * The Core Video pixel buffer that contains the current image data.
-     *
-     * encoding: unused
-     * decoding: Set by libavcodec. Unset by user.
-     */
-    CVPixelBufferRef    cv_buffer;
-
-    /**
-     * Use the hardware decoder in synchronous mode.
-     *
-     * encoding: unused
-     * decoding: Set by user.
-     */
-    int                 use_sync_decoding;
-
-    /**
-     * The frame width.
-     *
-     * - encoding: unused
-     * - decoding: Set/Unset by user.
-     */
-    int                 width;
-
-    /**
-     * The frame height.
-     *
-     * - encoding: unused
-     * - decoding: Set/Unset by user.
-     */
-    int                 height;
-
-    /**
-     * The frame format.
-     *
-     * - encoding: unused
-     * - decoding: Set/Unset by user.
-     */
-    int                 format;
-
-    /**
-     * The pixel format for output image buffers.
-     *
-     * - encoding: unused
-     * - decoding: Set/Unset by user.
-     */
-    OSType              cv_pix_fmt_type;
-
-    /**
-     * unused
-     */
-    uint8_t             *priv_bitstream;
-
-    /**
-     * unused
-     */
-    int                 priv_bitstream_size;
-
-    /**
-     * unused
-     */
-    int                 priv_allocated_size;
-
-    /**
-     * Use av_buffer to manage buffer.
-     * When the flag is set, the CVPixelBuffers returned by the decoder will
-     * be released automatically, so you have to retain them if necessary.
-     * Not setting this flag may cause memory leak.
-     *
-     * encoding: unused
-     * decoding: Set by user.
-     */
-    int                 use_ref_buffer;
-};
-
-/** Create the video decoder. */
-int ff_vda_create_decoder(struct vda_context *vda_ctx,
-                          uint8_t *extradata,
-                          int extradata_size);
-
-/** Destroy the video decoder. */
-int ff_vda_destroy_decoder(struct vda_context *vda_ctx);
-
-/**
- * This struct holds all the information that needs to be passed
- * between the caller and libavcodec for initializing VDA decoding.
- * Its size is not a part of the public ABI, it must be allocated with
- * av_vda_alloc_context() and freed with av_free().
- */
-typedef struct AVVDAContext {
-    /**
-     * VDA decoder object. Created and freed by the caller.
-     */
-    VDADecoder decoder;
-
-    /**
-     * The output callback that must be passed to VDADecoderCreate.
-     * Set by av_vda_alloc_context().
-     */
-    VDADecoderOutputCallback output_callback;
-
-    /**
-     * CVPixelBuffer Format Type that VDA will use for decoded frames; set by
-     * the caller.
-     */
-    OSType cv_pix_fmt_type;
-} AVVDAContext;
-
-/**
- * Allocate and initialize a VDA context.
- *
- * This function should be called from the get_format() callback when the caller
- * selects the AV_PIX_FMT_VDA format. The caller must then create the decoder
- * object (using the output callback provided by libavcodec) that will be used
- * for VDA-accelerated decoding.
- *
- * When decoding with VDA is finished, the caller must destroy the decoder
- * object and free the VDA context using av_free().
- *
- * @return the newly allocated context or NULL on failure
- */
-AVVDAContext *av_vda_alloc_context(void);
-
-/**
- * This is a convenience function that creates and sets up the VDA context using
- * an internal implementation.
- *
- * @param avctx the corresponding codec context
- *
- * @return >= 0 on success, a negative AVERROR code on failure
- */
-int av_vda_default_init(AVCodecContext *avctx);
-
-/**
- * This is a convenience function that creates and sets up the VDA context using
- * an internal implementation.
- *
- * @param avctx the corresponding codec context
- * @param vdactx the VDA context to use
- *
- * @return >= 0 on success, a negative AVERROR code on failure
- */
-int av_vda_default_init2(AVCodecContext *avctx, AVVDAContext *vdactx);
-
-/**
- * This function must be called to free the VDA context initialized with
- * av_vda_default_init().
- *
- * @param avctx the corresponding codec context
- */
-void av_vda_default_free(AVCodecContext *avctx);
-
-/**
- * @}
- */
-
-#endif /* AVCODEC_VDA_H */

diff --git a/libavcodec/vda_h264.c b/libavcodec/vda_h264.c
deleted file mode 100644
index 7b88ec7..0000000
--- a/libavcodec/vda_h264.c
+++ /dev/null

@@ -1,425 +0,0 @@
-/*
- * VDA H264 HW acceleration.
- *
- * copyright (c) 2011 Sebastien Zwickert
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <CoreFoundation/CFDictionary.h>
-#include <CoreFoundation/CFNumber.h>
-#include <CoreFoundation/CFData.h>
-
-#include "vda.h"
-#include "libavutil/avutil.h"
-#include "h264dec.h"
-
-struct vda_buffer {
-    CVPixelBufferRef cv_buffer;
-};
-#include "internal.h"
-#include "vda_vt_internal.h"
-
-/* Decoder callback that adds the vda frame to the queue in display order. */
-static void vda_decoder_callback(void *vda_hw_ctx,
-                                 CFDictionaryRef user_info,
-                                 OSStatus status,
-                                 uint32_t infoFlags,
-                                 CVImageBufferRef image_buffer)
-{
-    struct vda_context *vda_ctx = vda_hw_ctx;
-
-    if (infoFlags & kVDADecodeInfo_FrameDropped)
-        vda_ctx->cv_buffer = NULL;
-
-    if (!image_buffer)
-        return;
-
-    if (vda_ctx->cv_pix_fmt_type != CVPixelBufferGetPixelFormatType(image_buffer))
-        return;
-
-    vda_ctx->cv_buffer = CVPixelBufferRetain(image_buffer);
-}
-
-static int vda_sync_decode(VTContext *ctx, struct vda_context *vda_ctx)
-{
-    OSStatus status;
-    CFDataRef coded_frame;
-    uint32_t flush_flags = 1 << 0; ///< kVDADecoderFlush_emitFrames
-
-    coded_frame = CFDataCreate(kCFAllocatorDefault,
-                               ctx->bitstream,
-                               ctx->bitstream_size);
-
-    status = VDADecoderDecode(vda_ctx->decoder, 0, coded_frame, NULL);
-
-    if (kVDADecoderNoErr == status)
-        status = VDADecoderFlush(vda_ctx->decoder, flush_flags);
-
-    CFRelease(coded_frame);
-
-    return status;
-}
-
-
-static int vda_old_h264_start_frame(AVCodecContext *avctx,
-                                av_unused const uint8_t *buffer,
-                                av_unused uint32_t size)
-{
-    VTContext *vda = avctx->internal->hwaccel_priv_data;
-    struct vda_context *vda_ctx = avctx->hwaccel_context;
-
-    if (!vda_ctx->decoder)
-        return -1;
-
-    vda->bitstream_size = 0;
-
-    return 0;
-}
-
-static int vda_old_h264_decode_slice(AVCodecContext *avctx,
-                                 const uint8_t *buffer,
-                                 uint32_t size)
-{
-    VTContext *vda              = avctx->internal->hwaccel_priv_data;
-    struct vda_context *vda_ctx = avctx->hwaccel_context;
-    void *tmp;
-
-    if (!vda_ctx->decoder)
-        return -1;
-
-    tmp = av_fast_realloc(vda->bitstream,
-                          &vda->allocated_size,
-                          vda->bitstream_size + size + 4);
-    if (!tmp)
-        return AVERROR(ENOMEM);
-
-    vda->bitstream = tmp;
-
-    AV_WB32(vda->bitstream + vda->bitstream_size, size);
-    memcpy(vda->bitstream + vda->bitstream_size + 4, buffer, size);
-
-    vda->bitstream_size += size + 4;
-
-    return 0;
-}
-
-static void vda_h264_release_buffer(void *opaque, uint8_t *data)
-{
-    struct vda_buffer *context = opaque;
-    CVPixelBufferRelease(context->cv_buffer);
-    av_free(context);
-}
-
-static int vda_old_h264_end_frame(AVCodecContext *avctx)
-{
-    H264Context *h                      = avctx->priv_data;
-    VTContext *vda                      = avctx->internal->hwaccel_priv_data;
-    struct vda_context *vda_ctx         = avctx->hwaccel_context;
-    AVFrame *frame                      = h->cur_pic_ptr->f;
-    struct vda_buffer *context;
-    AVBufferRef *buffer;
-    int status;
-
-    if (!vda_ctx->decoder || !vda->bitstream)
-        return -1;
-
-    status = vda_sync_decode(vda, vda_ctx);
-    frame->data[3] = (void*)vda_ctx->cv_buffer;
-
-    if (status)
-        av_log(avctx, AV_LOG_ERROR, "Failed to decode frame (%d)\n", status);
-
-    if (!vda_ctx->use_ref_buffer || status)
-        return status;
-
-    context = av_mallocz(sizeof(*context));
-    buffer = av_buffer_create(NULL, 0, vda_h264_release_buffer, context, 0);
-    if (!context || !buffer) {
-        CVPixelBufferRelease(vda_ctx->cv_buffer);
-        av_free(context);
-        return -1;
-    }
-
-    context->cv_buffer = vda_ctx->cv_buffer;
-    frame->buf[3] = buffer;
-
-    return status;
-}
-
-int ff_vda_create_decoder(struct vda_context *vda_ctx,
-                          uint8_t *extradata,
-                          int extradata_size)
-{
-    OSStatus status;
-    CFNumberRef height;
-    CFNumberRef width;
-    CFNumberRef format;
-    CFDataRef avc_data;
-    CFMutableDictionaryRef config_info;
-    CFMutableDictionaryRef buffer_attributes;
-    CFMutableDictionaryRef io_surface_properties;
-    CFNumberRef cv_pix_fmt;
-
-    vda_ctx->priv_bitstream = NULL;
-    vda_ctx->priv_allocated_size = 0;
-
-    /* Each VCL NAL in the bitstream sent to the decoder
-     * is preceded by a 4 bytes length header.
-     * Change the avcC atom header if needed, to signal headers of 4 bytes. */
-    if (extradata_size >= 4 && (extradata[4] & 0x03) != 0x03) {
-        uint8_t *rw_extradata;
-
-        if (!(rw_extradata = av_malloc(extradata_size)))
-            return AVERROR(ENOMEM);
-
-        memcpy(rw_extradata, extradata, extradata_size);
-
-        rw_extradata[4] |= 0x03;
-
-        avc_data = CFDataCreate(kCFAllocatorDefault, rw_extradata, extradata_size);
-
-        av_freep(&rw_extradata);
-    } else {
-        avc_data = CFDataCreate(kCFAllocatorDefault, extradata, extradata_size);
-    }
-
-    config_info = CFDictionaryCreateMutable(kCFAllocatorDefault,
-                                            4,
-                                            &kCFTypeDictionaryKeyCallBacks,
-                                            &kCFTypeDictionaryValueCallBacks);
-
-    height   = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &vda_ctx->height);
-    width    = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &vda_ctx->width);
-    format   = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &vda_ctx->format);
-
-    CFDictionarySetValue(config_info, kVDADecoderConfiguration_Height, height);
-    CFDictionarySetValue(config_info, kVDADecoderConfiguration_Width, width);
-    CFDictionarySetValue(config_info, kVDADecoderConfiguration_SourceFormat, format);
-    CFDictionarySetValue(config_info, kVDADecoderConfiguration_avcCData, avc_data);
-
-    buffer_attributes = CFDictionaryCreateMutable(kCFAllocatorDefault,
-                                                  2,
-                                                  &kCFTypeDictionaryKeyCallBacks,
-                                                  &kCFTypeDictionaryValueCallBacks);
-    io_surface_properties = CFDictionaryCreateMutable(kCFAllocatorDefault,
-                                                      0,
-                                                      &kCFTypeDictionaryKeyCallBacks,
-                                                      &kCFTypeDictionaryValueCallBacks);
-    cv_pix_fmt  = CFNumberCreate(kCFAllocatorDefault,
-                                 kCFNumberSInt32Type,
-                                 &vda_ctx->cv_pix_fmt_type);
-    CFDictionarySetValue(buffer_attributes,
-                         kCVPixelBufferPixelFormatTypeKey,
-                         cv_pix_fmt);
-    CFDictionarySetValue(buffer_attributes,
-                         kCVPixelBufferIOSurfacePropertiesKey,
-                         io_surface_properties);
-
-    status = VDADecoderCreate(config_info,
-                              buffer_attributes,
-                              (VDADecoderOutputCallback *)vda_decoder_callback,
-                              vda_ctx,
-                              &vda_ctx->decoder);
-
-    CFRelease(height);
-    CFRelease(width);
-    CFRelease(format);
-    CFRelease(avc_data);
-    CFRelease(config_info);
-    CFRelease(io_surface_properties);
-    CFRelease(cv_pix_fmt);
-    CFRelease(buffer_attributes);
-
-    return status;
-}
-
-int ff_vda_destroy_decoder(struct vda_context *vda_ctx)
-{
-    OSStatus status = kVDADecoderNoErr;
-
-    if (vda_ctx->decoder)
-        status = VDADecoderDestroy(vda_ctx->decoder);
-
-    return status;
-}
-
-AVHWAccel ff_h264_vda_old_hwaccel = {
-    .name           = "h264_vda",
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_H264,
-    .pix_fmt        = AV_PIX_FMT_VDA_VLD,
-    .start_frame    = vda_old_h264_start_frame,
-    .decode_slice   = vda_old_h264_decode_slice,
-    .end_frame      = vda_old_h264_end_frame,
-    .uninit         = ff_videotoolbox_uninit,
-    .priv_data_size = sizeof(VTContext),
-};
-
-void ff_vda_output_callback(void *opaque,
-                            CFDictionaryRef user_info,
-                            OSStatus status,
-                            uint32_t infoFlags,
-                            CVImageBufferRef image_buffer)
-{
-    AVCodecContext *ctx = opaque;
-    VTContext *vda = ctx->internal->hwaccel_priv_data;
-
-
-    if (vda->frame) {
-        CVPixelBufferRelease(vda->frame);
-        vda->frame = NULL;
-    }
-
-    if (!image_buffer)
-        return;
-
-    vda->frame = CVPixelBufferRetain(image_buffer);
-}
-
-static int vda_h264_end_frame(AVCodecContext *avctx)
-{
-    H264Context *h        = avctx->priv_data;
-    VTContext *vda        = avctx->internal->hwaccel_priv_data;
-    AVVDAContext *vda_ctx = avctx->hwaccel_context;
-    AVFrame *frame        = h->cur_pic_ptr->f;
-    uint32_t flush_flags  = 1 << 0; ///< kVDADecoderFlush_emitFrames
-    CFDataRef coded_frame;
-    OSStatus status;
-
-    if (!vda->bitstream_size)
-        return AVERROR_INVALIDDATA;
-
-
-    coded_frame = CFDataCreate(kCFAllocatorDefault,
-                               vda->bitstream,
-                               vda->bitstream_size);
-
-    status = VDADecoderDecode(vda_ctx->decoder, 0, coded_frame, NULL);
-
-    if (status == kVDADecoderNoErr)
-        status = VDADecoderFlush(vda_ctx->decoder, flush_flags);
-
-    CFRelease(coded_frame);
-
-    if (!vda->frame)
-        return AVERROR_UNKNOWN;
-
-    if (status != kVDADecoderNoErr) {
-        av_log(avctx, AV_LOG_ERROR, "Failed to decode frame (%d)\n", status);
-        return AVERROR_UNKNOWN;
-    }
-
-    return ff_videotoolbox_buffer_create(vda, frame);
-}
-
-int ff_vda_default_init(AVCodecContext *avctx)
-{
-    AVVDAContext *vda_ctx = avctx->hwaccel_context;
-    OSStatus status = kVDADecoderNoErr;
-    CFNumberRef height;
-    CFNumberRef width;
-    CFNumberRef format;
-    CFDataRef avc_data;
-    CFMutableDictionaryRef config_info;
-    CFMutableDictionaryRef buffer_attributes;
-    CFMutableDictionaryRef io_surface_properties;
-    CFNumberRef cv_pix_fmt;
-    int32_t fmt = 'avc1', pix_fmt = vda_ctx->cv_pix_fmt_type;
-
-    // kCVPixelFormatType_420YpCbCr8Planar;
-
-    avc_data = ff_videotoolbox_avcc_extradata_create(avctx);
-
-    config_info = CFDictionaryCreateMutable(kCFAllocatorDefault,
-                                            4,
-                                            &kCFTypeDictionaryKeyCallBacks,
-                                            &kCFTypeDictionaryValueCallBacks);
-
-    height = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &avctx->height);
-    width  = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &avctx->width);
-    format = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &fmt);
-    CFDictionarySetValue(config_info, kVDADecoderConfiguration_Height, height);
-    CFDictionarySetValue(config_info, kVDADecoderConfiguration_Width, width);
-    CFDictionarySetValue(config_info, kVDADecoderConfiguration_avcCData, avc_data);
-    CFDictionarySetValue(config_info, kVDADecoderConfiguration_SourceFormat, format);
-
-    buffer_attributes = CFDictionaryCreateMutable(kCFAllocatorDefault,
-                                                  2,
-                                                  &kCFTypeDictionaryKeyCallBacks,
-                                                  &kCFTypeDictionaryValueCallBacks);
-    io_surface_properties = CFDictionaryCreateMutable(kCFAllocatorDefault,
-                                                      0,
-                                                      &kCFTypeDictionaryKeyCallBacks,
-                                                      &kCFTypeDictionaryValueCallBacks);
-    cv_pix_fmt      = CFNumberCreate(kCFAllocatorDefault,
-                                     kCFNumberSInt32Type,
-                                     &pix_fmt);
-
-    CFDictionarySetValue(buffer_attributes,
-                         kCVPixelBufferPixelFormatTypeKey,
-                         cv_pix_fmt);
-    CFDictionarySetValue(buffer_attributes,
-                         kCVPixelBufferIOSurfacePropertiesKey,
-                         io_surface_properties);
-
-    status = VDADecoderCreate(config_info,
-                              buffer_attributes,
-                              (VDADecoderOutputCallback *)ff_vda_output_callback,
-                              avctx,
-                              &vda_ctx->decoder);
-
-    CFRelease(format);
-    CFRelease(height);
-    CFRelease(width);
-    CFRelease(avc_data);
-    CFRelease(config_info);
-    CFRelease(cv_pix_fmt);
-    CFRelease(io_surface_properties);
-    CFRelease(buffer_attributes);
-
-    if (status != kVDADecoderNoErr) {
-        av_log(avctx, AV_LOG_ERROR, "Cannot initialize VDA %d\n", status);
-    }
-
-    switch (status) {
-    case kVDADecoderHardwareNotSupportedErr:
-    case kVDADecoderFormatNotSupportedErr:
-        return AVERROR(ENOSYS);
-    case kVDADecoderConfigurationError:
-        return AVERROR(EINVAL);
-    case kVDADecoderDecoderFailedErr:
-        return AVERROR_INVALIDDATA;
-    case kVDADecoderNoErr:
-        return 0;
-    default:
-        return AVERROR_UNKNOWN;
-    }
-}
-
-AVHWAccel ff_h264_vda_hwaccel = {
-    .name           = "h264_vda",
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_H264,
-    .pix_fmt        = AV_PIX_FMT_VDA,
-    .alloc_frame    = ff_videotoolbox_alloc_frame,
-    .start_frame    = ff_videotoolbox_h264_start_frame,
-    .decode_slice   = ff_videotoolbox_h264_decode_slice,
-    .end_frame      = vda_h264_end_frame,
-    .uninit         = ff_videotoolbox_uninit,
-    .priv_data_size = sizeof(VTContext),
-};

diff --git a/libavcodec/vda_h264_dec.c b/libavcodec/vda_h264_dec.c
deleted file mode 100644
index 972bd6b..0000000
--- a/libavcodec/vda_h264_dec.c
+++ /dev/null

@@ -1,263 +0,0 @@
-/*
- * Copyright (c) 2012, Xidorn Quan
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * H.264 decoder via VDA
- * @author Xidorn Quan <quanxunzhen@gmail.com>
- */
-
-#include <string.h>
-#include <CoreFoundation/CoreFoundation.h>
-
-#include "vda.h"
-#include "h264dec.h"
-#include "avcodec.h"
-
-#ifndef kCFCoreFoundationVersionNumber10_7
-#define kCFCoreFoundationVersionNumber10_7      635.00
-#endif
-
-extern AVCodec ff_h264_decoder, ff_h264_vda_decoder;
-
-static const enum AVPixelFormat vda_pixfmts_prior_10_7[] = {
-    AV_PIX_FMT_UYVY422,
-    AV_PIX_FMT_YUV420P,
-    AV_PIX_FMT_NONE
-};
-
-static const enum AVPixelFormat vda_pixfmts[] = {
-    AV_PIX_FMT_UYVY422,
-    AV_PIX_FMT_YUYV422,
-    AV_PIX_FMT_NV12,
-    AV_PIX_FMT_YUV420P,
-    AV_PIX_FMT_NONE
-};
-
-typedef struct {
-    H264Context h264ctx;
-    int h264_initialized;
-    struct vda_context vda_ctx;
-    enum AVPixelFormat pix_fmt;
-
-    /* for backing-up fields set by user.
-     * we have to gain full control of such fields here */
-    void *hwaccel_context;
-    enum AVPixelFormat (*get_format)(struct AVCodecContext *s, const enum AVPixelFormat * fmt);
-    int (*get_buffer2)(struct AVCodecContext *s, AVFrame *frame, int flags);
-} VDADecoderContext;
-
-static enum AVPixelFormat get_format(struct AVCodecContext *avctx,
-        const enum AVPixelFormat *fmt)
-{
-    return AV_PIX_FMT_VDA_VLD;
-}
-
-typedef struct {
-    CVPixelBufferRef cv_buffer;
-} VDABufferContext;
-
-static void release_buffer(void *opaque, uint8_t *data)
-{
-    VDABufferContext *context = opaque;
-    CVPixelBufferUnlockBaseAddress(context->cv_buffer, 0);
-    CVPixelBufferRelease(context->cv_buffer);
-    av_free(context);
-}
-
-static int get_buffer2(AVCodecContext *avctx, AVFrame *pic, int flag)
-{
-    VDABufferContext *context = av_mallocz(sizeof(VDABufferContext));
-    AVBufferRef *buffer = av_buffer_create(NULL, 0, release_buffer, context, 0);
-    if (!context || !buffer) {
-        av_free(context);
-        return AVERROR(ENOMEM);
-    }
-
-    pic->buf[0] = buffer;
-    pic->data[0] = (void *)1;
-    return 0;
-}
-
-static inline void set_context(AVCodecContext *avctx)
-{
-    VDADecoderContext *ctx = avctx->priv_data;
-    ctx->hwaccel_context = avctx->hwaccel_context;
-    avctx->hwaccel_context = &ctx->vda_ctx;
-    ctx->get_format = avctx->get_format;
-    avctx->get_format = get_format;
-    ctx->get_buffer2 = avctx->get_buffer2;
-    avctx->get_buffer2 = get_buffer2;
-}
-
-static inline void restore_context(AVCodecContext *avctx)
-{
-    VDADecoderContext *ctx = avctx->priv_data;
-    avctx->hwaccel_context = ctx->hwaccel_context;
-    avctx->get_format = ctx->get_format;
-    avctx->get_buffer2 = ctx->get_buffer2;
-}
-
-static int vdadec_decode(AVCodecContext *avctx,
-        void *data, int *got_frame, AVPacket *avpkt)
-{
-    VDADecoderContext *ctx = avctx->priv_data;
-    AVFrame *pic = data;
-    int ret;
-
-    set_context(avctx);
-    ret = ff_h264_decoder.decode(avctx, data, got_frame, avpkt);
-    restore_context(avctx);
-    if (*got_frame) {
-        AVBufferRef *buffer = pic->buf[0];
-        VDABufferContext *context = av_buffer_get_opaque(buffer);
-        CVPixelBufferRef cv_buffer = (CVPixelBufferRef)pic->data[3];
-
-        CVPixelBufferRetain(cv_buffer);
-        CVPixelBufferLockBaseAddress(cv_buffer, 0);
-        context->cv_buffer = cv_buffer;
-        pic->format = ctx->pix_fmt;
-        if (CVPixelBufferIsPlanar(cv_buffer)) {
-            int i, count = CVPixelBufferGetPlaneCount(cv_buffer);
-            av_assert0(count < 4);
-            for (i = 0; i < count; i++) {
-                pic->data[i] = CVPixelBufferGetBaseAddressOfPlane(cv_buffer, i);
-                pic->linesize[i] = CVPixelBufferGetBytesPerRowOfPlane(cv_buffer, i);
-            }
-        } else {
-            pic->data[0] = CVPixelBufferGetBaseAddress(cv_buffer);
-            pic->linesize[0] = CVPixelBufferGetBytesPerRow(cv_buffer);
-        }
-    }
-    avctx->pix_fmt = ctx->pix_fmt;
-
-    return ret;
-}
-
-static av_cold int vdadec_close(AVCodecContext *avctx)
-{
-    VDADecoderContext *ctx = avctx->priv_data;
-    /* release buffers and decoder */
-    ff_vda_destroy_decoder(&ctx->vda_ctx);
-    /* close H.264 decoder */
-    if (ctx->h264_initialized) {
-        set_context(avctx);
-        ff_h264_decoder.close(avctx);
-        restore_context(avctx);
-    }
-    return 0;
-}
-
-static av_cold int vdadec_init(AVCodecContext *avctx)
-{
-    VDADecoderContext *ctx = avctx->priv_data;
-    struct vda_context *vda_ctx = &ctx->vda_ctx;
-    OSStatus status;
-    int ret, i;
-
-    ctx->h264_initialized = 0;
-
-    /* init pix_fmts of codec */
-    if (!ff_h264_vda_decoder.pix_fmts) {
-        if (kCFCoreFoundationVersionNumber < kCFCoreFoundationVersionNumber10_7)
-            ff_h264_vda_decoder.pix_fmts = vda_pixfmts_prior_10_7;
-        else
-            ff_h264_vda_decoder.pix_fmts = vda_pixfmts;
-    }
-
-    /* init vda */
-    memset(vda_ctx, 0, sizeof(struct vda_context));
-    vda_ctx->width = avctx->width;
-    vda_ctx->height = avctx->height;
-    vda_ctx->format = 'avc1';
-    vda_ctx->use_sync_decoding = 1;
-    vda_ctx->use_ref_buffer = 1;
-    ctx->pix_fmt = avctx->get_format(avctx, avctx->codec->pix_fmts);
-    switch (ctx->pix_fmt) {
-    case AV_PIX_FMT_UYVY422:
-        vda_ctx->cv_pix_fmt_type = '2vuy';
-        break;
-    case AV_PIX_FMT_YUYV422:
-        vda_ctx->cv_pix_fmt_type = 'yuvs';
-        break;
-    case AV_PIX_FMT_NV12:
-        vda_ctx->cv_pix_fmt_type = '420v';
-        break;
-    case AV_PIX_FMT_YUV420P:
-        vda_ctx->cv_pix_fmt_type = 'y420';
-        break;
-    default:
-        av_log(avctx, AV_LOG_ERROR, "Unsupported pixel format: %d\n", avctx->pix_fmt);
-        goto failed;
-    }
-    status = ff_vda_create_decoder(vda_ctx,
-                                   avctx->extradata, avctx->extradata_size);
-    if (status != kVDADecoderNoErr) {
-        av_log(avctx, AV_LOG_ERROR,
-                "Failed to init VDA decoder: %d.\n", status);
-        goto failed;
-    }
-
-    /* init H.264 decoder */
-    set_context(avctx);
-    ret = ff_h264_decoder.init(avctx);
-    restore_context(avctx);
-    if (ret < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Failed to open H.264 decoder.\n");
-        goto failed;
-    }
-    ctx->h264_initialized = 1;
-
-    for (i = 0; i < MAX_SPS_COUNT; i++) {
-        const SPS *sps = ctx->h264ctx.ps.sps_list[i] ? (const SPS*)ctx->h264ctx.ps.sps_list[i]->data : NULL;
-        if (sps && (sps->bit_depth_luma != 8 ||
-                sps->chroma_format_idc == 2 ||
-                sps->chroma_format_idc == 3)) {
-            av_log(avctx, AV_LOG_ERROR, "Format is not supported.\n");
-            goto failed;
-        }
-    }
-
-    return 0;
-
-failed:
-    vdadec_close(avctx);
-    return -1;
-}
-
-static void vdadec_flush(AVCodecContext *avctx)
-{
-    set_context(avctx);
-    ff_h264_decoder.flush(avctx);
-    restore_context(avctx);
-}
-
-AVCodec ff_h264_vda_decoder = {
-    .name           = "h264_vda",
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_H264,
-    .priv_data_size = sizeof(VDADecoderContext),
-    .init           = vdadec_init,
-    .close          = vdadec_close,
-    .decode         = vdadec_decode,
-    .capabilities   = AV_CODEC_CAP_DELAY,
-    .flush          = vdadec_flush,
-    .long_name      = NULL_IF_CONFIG_SMALL("H.264 (VDA acceleration)"),
-};

diff --git a/libavcodec/vdpau.c b/libavcodec/vdpau.c
index 4cc51cb..1b2ec98 100644
--- a/libavcodec/vdpau.c
+++ b/libavcodec/vdpau.c

@@ -24,23 +24,15 @@
 #include <limits.h>
 
 #include "avcodec.h"
+#include "decode.h"
 #include "internal.h"
 #include "h264dec.h"
 #include "vc1.h"
 #include "vdpau.h"
-#include "vdpau_compat.h"
 #include "vdpau_internal.h"
 
 // XXX: at the time of adding this ifdefery, av_assert* wasn't use outside.
 // When dropping it, make sure other av_assert* were not added since then.
-#if FF_API_BUFS_VDPAU
-#include "libavutil/avassert.h"
-#endif
-
-#if FF_API_VDPAU
-#undef NDEBUG
-#include <assert.h>
-#endif
 
 /**
  * @addtogroup VDPAU_Decoding
@@ -119,6 +111,25 @@
     return 0;
 }
 
+int ff_vdpau_common_frame_params(AVCodecContext *avctx,
+                                 AVBufferRef *hw_frames_ctx)
+{
+    AVHWFramesContext *hw_frames = (AVHWFramesContext*)hw_frames_ctx->data;
+    VdpChromaType type;
+    uint32_t width;
+    uint32_t height;
+
+    if (av_vdpau_get_surface_parameters(avctx, &type, &width, &height))
+        return AVERROR(EINVAL);
+
+    hw_frames->format    = AV_PIX_FMT_VDPAU;
+    hw_frames->sw_format = avctx->sw_pix_fmt;
+    hw_frames->width     = width;
+    hw_frames->height    = height;
+
+    return 0;
+}
+
 int ff_vdpau_common_init(AVCodecContext *avctx, VdpDecoderProfile profile,
                          int level)
 {
@@ -136,6 +147,7 @@
     VdpChromaType type;
     uint32_t width;
     uint32_t height;
+    int ret;
 
     vdctx->width            = UINT32_MAX;
     vdctx->height           = UINT32_MAX;
@@ -163,41 +175,14 @@
             type != VDP_CHROMA_TYPE_420)
             return AVERROR(ENOSYS);
     } else {
-        AVHWFramesContext *frames_ctx = NULL;
+        AVHWFramesContext *frames_ctx;
         AVVDPAUDeviceContext *dev_ctx;
 
-        // We assume the hw_frames_ctx always survives until ff_vdpau_common_uninit
-        // is called. This holds true as the user is not allowed to touch
-        // hw_device_ctx, or hw_frames_ctx after get_format (and ff_get_format
-        // itself also uninits before unreffing hw_frames_ctx).
-        if (avctx->hw_frames_ctx) {
-            frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
-        } else if (avctx->hw_device_ctx) {
-            int ret;
+        ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_VDPAU);
+        if (ret < 0)
+            return ret;
 
-            avctx->hw_frames_ctx = av_hwframe_ctx_alloc(avctx->hw_device_ctx);
-            if (!avctx->hw_frames_ctx)
-                return AVERROR(ENOMEM);
-
-            frames_ctx            = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
-            frames_ctx->format    = AV_PIX_FMT_VDPAU;
-            frames_ctx->sw_format = avctx->sw_pix_fmt;
-            frames_ctx->width     = avctx->coded_width;
-            frames_ctx->height    = avctx->coded_height;
-
-            ret = av_hwframe_ctx_init(avctx->hw_frames_ctx);
-            if (ret < 0) {
-                av_buffer_unref(&avctx->hw_frames_ctx);
-                return ret;
-            }
-        }
-
-        if (!frames_ctx) {
-            av_log(avctx, AV_LOG_ERROR, "A hardware frames context is "
-                   "required for VDPAU decoding.\n");
-            return AVERROR(EINVAL);
-        }
-
+        frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
         dev_ctx = frames_ctx->device_ctx->hwctx;
 
         vdctx->device           = dev_ctx->device;
@@ -353,18 +338,6 @@
     if (val < 0)
         return val;
 
-#if FF_API_BUFS_VDPAU
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (hwctx) {
-    av_assert0(sizeof(hwctx->info) <= sizeof(pic_ctx->info));
-    memcpy(&hwctx->info, &pic_ctx->info, sizeof(hwctx->info));
-    hwctx->bitstream_buffers = pic_ctx->bitstream_buffers;
-    hwctx->bitstream_buffers_used = pic_ctx->bitstream_buffers_used;
-    hwctx->bitstream_buffers_allocated = pic_ctx->bitstream_buffers_allocated;
-    }
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
     if (hwctx && !hwctx->render && hwctx->render2) {
         status = hwctx->render2(avctx, frame, (void *)&pic_ctx->info,
                                 pic_ctx->bitstream_buffers_used, pic_ctx->bitstream_buffers);
@@ -375,16 +348,6 @@
 
     av_freep(&pic_ctx->bitstream_buffers);
 
-#if FF_API_BUFS_VDPAU
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (hwctx) {
-    hwctx->bitstream_buffers = NULL;
-    hwctx->bitstream_buffers_used = 0;
-    hwctx->bitstream_buffers_allocated = 0;
-    }
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
     return vdpau_error(status);
 }
 
@@ -426,345 +389,6 @@
     return 0;
 }
 
-/* Obsolete non-hwaccel VDPAU support below... */
-
-#if FF_API_VDPAU
-void ff_vdpau_add_data_chunk(uint8_t *data, const uint8_t *buf, int buf_size)
-{
-    struct vdpau_render_state *render = (struct vdpau_render_state*)data;
-    assert(render);
-
-    render->bitstream_buffers= av_fast_realloc(
-        render->bitstream_buffers,
-        &render->bitstream_buffers_allocated,
-        sizeof(*render->bitstream_buffers)*(render->bitstream_buffers_used + 1)
-    );
-
-    render->bitstream_buffers[render->bitstream_buffers_used].struct_version  = VDP_BITSTREAM_BUFFER_VERSION;
-    render->bitstream_buffers[render->bitstream_buffers_used].bitstream       = buf;
-    render->bitstream_buffers[render->bitstream_buffers_used].bitstream_bytes = buf_size;
-    render->bitstream_buffers_used++;
-}
-
-#if CONFIG_H264_VDPAU_DECODER
-void ff_vdpau_h264_set_reference_frames(H264Context *h)
-{
-    struct vdpau_render_state *render, *render_ref;
-    VdpReferenceFrameH264 *rf, *rf2;
-    H264Picture *pic;
-    int i, list, pic_frame_idx;
-
-    render = (struct vdpau_render_state *)h->cur_pic_ptr->f->data[0];
-    assert(render);
-
-    rf = &render->info.h264.referenceFrames[0];
-#define H264_RF_COUNT FF_ARRAY_ELEMS(render->info.h264.referenceFrames)
-
-    for (list = 0; list < 2; ++list) {
-        H264Picture **lp = list ? h->long_ref : h->short_ref;
-        int ls = list ? 16 : h->short_ref_count;
-
-        for (i = 0; i < ls; ++i) {
-            pic = lp[i];
-            if (!pic || !pic->reference)
-                continue;
-            pic_frame_idx = pic->long_ref ? pic->pic_id : pic->frame_num;
-
-            render_ref = (struct vdpau_render_state *)pic->f->data[0];
-            assert(render_ref);
-
-            rf2 = &render->info.h264.referenceFrames[0];
-            while (rf2 != rf) {
-                if (
-                    (rf2->surface == render_ref->surface)
-                    && (rf2->is_long_term == pic->long_ref)
-                    && (rf2->frame_idx == pic_frame_idx)
-                )
-                    break;
-                ++rf2;
-            }
-            if (rf2 != rf) {
-                rf2->top_is_reference    |= (pic->reference & PICT_TOP_FIELD)    ? VDP_TRUE : VDP_FALSE;
-                rf2->bottom_is_reference |= (pic->reference & PICT_BOTTOM_FIELD) ? VDP_TRUE : VDP_FALSE;
-                continue;
-            }
-
-            if (rf >= &render->info.h264.referenceFrames[H264_RF_COUNT])
-                continue;
-
-            rf->surface             = render_ref->surface;
-            rf->is_long_term        = pic->long_ref;
-            rf->top_is_reference    = (pic->reference & PICT_TOP_FIELD)    ? VDP_TRUE : VDP_FALSE;
-            rf->bottom_is_reference = (pic->reference & PICT_BOTTOM_FIELD) ? VDP_TRUE : VDP_FALSE;
-            rf->field_order_cnt[0]  = pic->field_poc[0];
-            rf->field_order_cnt[1]  = pic->field_poc[1];
-            rf->frame_idx           = pic_frame_idx;
-
-            ++rf;
-        }
-    }
-
-    for (; rf < &render->info.h264.referenceFrames[H264_RF_COUNT]; ++rf) {
-        rf->surface             = VDP_INVALID_HANDLE;
-        rf->is_long_term        = 0;
-        rf->top_is_reference    = 0;
-        rf->bottom_is_reference = 0;
-        rf->field_order_cnt[0]  = 0;
-        rf->field_order_cnt[1]  = 0;
-        rf->frame_idx           = 0;
-    }
-}
-
-void ff_vdpau_h264_picture_start(H264Context *h)
-{
-    struct vdpau_render_state *render;
-    int i;
-
-    render = (struct vdpau_render_state *)h->cur_pic_ptr->f->data[0];
-    assert(render);
-
-    for (i = 0; i < 2; ++i) {
-        int foc = h->cur_pic_ptr->field_poc[i];
-        if (foc == INT_MAX)
-            foc = 0;
-        render->info.h264.field_order_cnt[i] = foc;
-    }
-
-    render->info.h264.frame_num = h->poc.frame_num;
-}
-
-void ff_vdpau_h264_picture_complete(H264Context *h)
-{
-    struct vdpau_render_state *render;
-
-    render = (struct vdpau_render_state *)h->cur_pic_ptr->f->data[0];
-    assert(render);
-
-    render->info.h264.slice_count = h->current_slice;
-    if (render->info.h264.slice_count < 1)
-        return;
-
-    render->info.h264.is_reference                           = (h->cur_pic_ptr->reference & 3) ? VDP_TRUE : VDP_FALSE;
-    render->info.h264.field_pic_flag                         = h->picture_structure != PICT_FRAME;
-    render->info.h264.bottom_field_flag                      = h->picture_structure == PICT_BOTTOM_FIELD;
-    render->info.h264.num_ref_frames                         = h->ps.sps->ref_frame_count;
-    render->info.h264.mb_adaptive_frame_field_flag           = h->ps.sps->mb_aff && !render->info.h264.field_pic_flag;
-    render->info.h264.constrained_intra_pred_flag            = h->ps.pps->constrained_intra_pred;
-    render->info.h264.weighted_pred_flag                     = h->ps.pps->weighted_pred;
-    render->info.h264.weighted_bipred_idc                    = h->ps.pps->weighted_bipred_idc;
-    render->info.h264.frame_mbs_only_flag                    = h->ps.sps->frame_mbs_only_flag;
-    render->info.h264.transform_8x8_mode_flag                = h->ps.pps->transform_8x8_mode;
-    render->info.h264.chroma_qp_index_offset                 = h->ps.pps->chroma_qp_index_offset[0];
-    render->info.h264.second_chroma_qp_index_offset          = h->ps.pps->chroma_qp_index_offset[1];
-    render->info.h264.pic_init_qp_minus26                    = h->ps.pps->init_qp - 26;
-    render->info.h264.num_ref_idx_l0_active_minus1           = h->ps.pps->ref_count[0] - 1;
-    render->info.h264.num_ref_idx_l1_active_minus1           = h->ps.pps->ref_count[1] - 1;
-    render->info.h264.log2_max_frame_num_minus4              = h->ps.sps->log2_max_frame_num - 4;
-    render->info.h264.pic_order_cnt_type                     = h->ps.sps->poc_type;
-    render->info.h264.log2_max_pic_order_cnt_lsb_minus4      = h->ps.sps->poc_type ? 0 : h->ps.sps->log2_max_poc_lsb - 4;
-    render->info.h264.delta_pic_order_always_zero_flag       = h->ps.sps->delta_pic_order_always_zero_flag;
-    render->info.h264.direct_8x8_inference_flag              = h->ps.sps->direct_8x8_inference_flag;
-    render->info.h264.entropy_coding_mode_flag               = h->ps.pps->cabac;
-    render->info.h264.pic_order_present_flag                 = h->ps.pps->pic_order_present;
-    render->info.h264.deblocking_filter_control_present_flag = h->ps.pps->deblocking_filter_parameters_present;
-    render->info.h264.redundant_pic_cnt_present_flag         = h->ps.pps->redundant_pic_cnt_present;
-    memcpy(render->info.h264.scaling_lists_4x4, h->ps.pps->scaling_matrix4, sizeof(render->info.h264.scaling_lists_4x4));
-    memcpy(render->info.h264.scaling_lists_8x8[0], h->ps.pps->scaling_matrix8[0], sizeof(render->info.h264.scaling_lists_8x8[0]));
-    memcpy(render->info.h264.scaling_lists_8x8[1], h->ps.pps->scaling_matrix8[3], sizeof(render->info.h264.scaling_lists_8x8[0]));
-
-    ff_h264_draw_horiz_band(h, &h->slice_ctx[0], 0, h->avctx->height);
-    render->bitstream_buffers_used = 0;
-}
-#endif /* CONFIG_H264_VDPAU_DECODER */
-
-#if CONFIG_MPEG_VDPAU_DECODER || CONFIG_MPEG1_VDPAU_DECODER
-void ff_vdpau_mpeg_picture_complete(MpegEncContext *s, const uint8_t *buf,
-                                    int buf_size, int slice_count)
-{
-    struct vdpau_render_state *render, *last, *next;
-    int i;
-
-    if (!s->current_picture_ptr) return;
-
-    render = (struct vdpau_render_state *)s->current_picture_ptr->f->data[0];
-    assert(render);
-
-    /* fill VdpPictureInfoMPEG1Or2 struct */
-    render->info.mpeg.picture_structure          = s->picture_structure;
-    render->info.mpeg.picture_coding_type        = s->pict_type;
-    render->info.mpeg.intra_dc_precision         = s->intra_dc_precision;
-    render->info.mpeg.frame_pred_frame_dct       = s->frame_pred_frame_dct;
-    render->info.mpeg.concealment_motion_vectors = s->concealment_motion_vectors;
-    render->info.mpeg.intra_vlc_format           = s->intra_vlc_format;
-    render->info.mpeg.alternate_scan             = s->alternate_scan;
-    render->info.mpeg.q_scale_type               = s->q_scale_type;
-    render->info.mpeg.top_field_first            = s->top_field_first;
-    render->info.mpeg.full_pel_forward_vector    = s->full_pel[0]; // MPEG-1 only.  Set 0 for MPEG-2
-    render->info.mpeg.full_pel_backward_vector   = s->full_pel[1]; // MPEG-1 only.  Set 0 for MPEG-2
-    render->info.mpeg.f_code[0][0]               = s->mpeg_f_code[0][0]; // For MPEG-1 fill both horiz. & vert.
-    render->info.mpeg.f_code[0][1]               = s->mpeg_f_code[0][1];
-    render->info.mpeg.f_code[1][0]               = s->mpeg_f_code[1][0];
-    render->info.mpeg.f_code[1][1]               = s->mpeg_f_code[1][1];
-    for (i = 0; i < 64; ++i) {
-        render->info.mpeg.intra_quantizer_matrix[i]     = s->intra_matrix[i];
-        render->info.mpeg.non_intra_quantizer_matrix[i] = s->inter_matrix[i];
-    }
-
-    render->info.mpeg.forward_reference          = VDP_INVALID_HANDLE;
-    render->info.mpeg.backward_reference         = VDP_INVALID_HANDLE;
-
-    switch(s->pict_type){
-    case  AV_PICTURE_TYPE_B:
-        next = (struct vdpau_render_state *)s->next_picture.f->data[0];
-        assert(next);
-        render->info.mpeg.backward_reference     = next->surface;
-        // no return here, going to set forward prediction
-    case  AV_PICTURE_TYPE_P:
-        last = (struct vdpau_render_state *)s->last_picture.f->data[0];
-        if (!last) // FIXME: Does this test make sense?
-            last = render; // predict second field from the first
-        render->info.mpeg.forward_reference      = last->surface;
-    }
-
-    ff_vdpau_add_data_chunk(s->current_picture_ptr->f->data[0], buf, buf_size);
-
-    render->info.mpeg.slice_count                = slice_count;
-
-    if (slice_count)
-        ff_mpeg_draw_horiz_band(s, 0, s->avctx->height);
-    render->bitstream_buffers_used               = 0;
-}
-#endif /* CONFIG_MPEG_VDPAU_DECODER || CONFIG_MPEG1_VDPAU_DECODER */
-
-#if CONFIG_VC1_VDPAU_DECODER
-void ff_vdpau_vc1_decode_picture(MpegEncContext *s, const uint8_t *buf,
-                                 int buf_size)
-{
-    VC1Context *v = s->avctx->priv_data;
-    struct vdpau_render_state *render, *last, *next;
-
-    render = (struct vdpau_render_state *)s->current_picture.f->data[0];
-    assert(render);
-
-    /*  fill LvPictureInfoVC1 struct */
-    render->info.vc1.frame_coding_mode  = v->fcm ? v->fcm + 1 : 0;
-    render->info.vc1.postprocflag       = v->postprocflag;
-    render->info.vc1.pulldown           = v->broadcast;
-    render->info.vc1.interlace          = v->interlace;
-    render->info.vc1.tfcntrflag         = v->tfcntrflag;
-    render->info.vc1.finterpflag        = v->finterpflag;
-    render->info.vc1.psf                = v->psf;
-    render->info.vc1.dquant             = v->dquant;
-    render->info.vc1.panscan_flag       = v->panscanflag;
-    render->info.vc1.refdist_flag       = v->refdist_flag;
-    render->info.vc1.quantizer          = v->quantizer_mode;
-    render->info.vc1.extended_mv        = v->extended_mv;
-    render->info.vc1.extended_dmv       = v->extended_dmv;
-    render->info.vc1.overlap            = v->overlap;
-    render->info.vc1.vstransform        = v->vstransform;
-    render->info.vc1.loopfilter         = v->s.loop_filter;
-    render->info.vc1.fastuvmc           = v->fastuvmc;
-    render->info.vc1.range_mapy_flag    = v->range_mapy_flag;
-    render->info.vc1.range_mapy         = v->range_mapy;
-    render->info.vc1.range_mapuv_flag   = v->range_mapuv_flag;
-    render->info.vc1.range_mapuv        = v->range_mapuv;
-    /* Specific to simple/main profile only */
-    render->info.vc1.multires           = v->multires;
-    render->info.vc1.syncmarker         = v->resync_marker;
-    render->info.vc1.rangered           = v->rangered | (v->rangeredfrm << 1);
-    render->info.vc1.maxbframes         = v->s.max_b_frames;
-
-    render->info.vc1.deblockEnable      = v->postprocflag & 1;
-    render->info.vc1.pquant             = v->pq;
-
-    render->info.vc1.forward_reference  = VDP_INVALID_HANDLE;
-    render->info.vc1.backward_reference = VDP_INVALID_HANDLE;
-
-    if (v->bi_type)
-        render->info.vc1.picture_type = 4;
-    else
-        render->info.vc1.picture_type = s->pict_type - 1 + s->pict_type / 3;
-
-    switch(s->pict_type){
-    case  AV_PICTURE_TYPE_B:
-        next = (struct vdpau_render_state *)s->next_picture.f->data[0];
-        assert(next);
-        render->info.vc1.backward_reference = next->surface;
-        // no break here, going to set forward prediction
-    case  AV_PICTURE_TYPE_P:
-        last = (struct vdpau_render_state *)s->last_picture.f->data[0];
-        if (!last) // FIXME: Does this test make sense?
-            last = render; // predict second field from the first
-        render->info.vc1.forward_reference = last->surface;
-    }
-
-    ff_vdpau_add_data_chunk(s->current_picture_ptr->f->data[0], buf, buf_size);
-
-    render->info.vc1.slice_count          = 1;
-
-    ff_mpeg_draw_horiz_band(s, 0, s->avctx->height);
-    render->bitstream_buffers_used        = 0;
-}
-#endif /* (CONFIG_VC1_VDPAU_DECODER */
-
-#if CONFIG_MPEG4_VDPAU_DECODER
-void ff_vdpau_mpeg4_decode_picture(Mpeg4DecContext *ctx, const uint8_t *buf,
-                                   int buf_size)
-{
-    MpegEncContext *s = &ctx->m;
-    struct vdpau_render_state *render, *last, *next;
-    int i;
-
-    if (!s->current_picture_ptr) return;
-
-    render = (struct vdpau_render_state *)s->current_picture_ptr->f->data[0];
-    assert(render);
-
-    /* fill VdpPictureInfoMPEG4Part2 struct */
-    render->info.mpeg4.trd[0]                            = s->pp_time;
-    render->info.mpeg4.trb[0]                            = s->pb_time;
-    render->info.mpeg4.trd[1]                            = s->pp_field_time >> 1;
-    render->info.mpeg4.trb[1]                            = s->pb_field_time >> 1;
-    render->info.mpeg4.vop_time_increment_resolution     = s->avctx->time_base.den;
-    render->info.mpeg4.vop_coding_type                   = 0;
-    render->info.mpeg4.vop_fcode_forward                 = s->f_code;
-    render->info.mpeg4.vop_fcode_backward                = s->b_code;
-    render->info.mpeg4.resync_marker_disable             = !ctx->resync_marker;
-    render->info.mpeg4.interlaced                        = !s->progressive_sequence;
-    render->info.mpeg4.quant_type                        = s->mpeg_quant;
-    render->info.mpeg4.quarter_sample                    = s->quarter_sample;
-    render->info.mpeg4.short_video_header                = s->avctx->codec->id == AV_CODEC_ID_H263;
-    render->info.mpeg4.rounding_control                  = s->no_rounding;
-    render->info.mpeg4.alternate_vertical_scan_flag      = s->alternate_scan;
-    render->info.mpeg4.top_field_first                   = s->top_field_first;
-    for (i = 0; i < 64; ++i) {
-        render->info.mpeg4.intra_quantizer_matrix[i]     = s->intra_matrix[i];
-        render->info.mpeg4.non_intra_quantizer_matrix[i] = s->inter_matrix[i];
-    }
-    render->info.mpeg4.forward_reference                 = VDP_INVALID_HANDLE;
-    render->info.mpeg4.backward_reference                = VDP_INVALID_HANDLE;
-
-    switch (s->pict_type) {
-    case AV_PICTURE_TYPE_B:
-        next = (struct vdpau_render_state *)s->next_picture.f->data[0];
-        assert(next);
-        render->info.mpeg4.backward_reference     = next->surface;
-        render->info.mpeg4.vop_coding_type        = 2;
-        // no break here, going to set forward prediction
-    case AV_PICTURE_TYPE_P:
-        last = (struct vdpau_render_state *)s->last_picture.f->data[0];
-        assert(last);
-        render->info.mpeg4.forward_reference      = last->surface;
-    }
-
-    ff_vdpau_add_data_chunk(s->current_picture_ptr->f->data[0], buf, buf_size);
-
-    ff_mpeg_draw_horiz_band(s, 0, s->avctx->height);
-    render->bitstream_buffers_used = 0;
-}
-#endif /* CONFIG_MPEG4_VDPAU_DECODER */
-#endif /* FF_API_VDPAU */
-
 #if FF_API_VDPAU_PROFILE
 int av_vdpau_get_profile(AVCodecContext *avctx, VdpDecoderProfile *profile)
 {

diff --git a/libavcodec/vdpau.h b/libavcodec/vdpau.h
index 855d387..4d99943 100644
--- a/libavcodec/vdpau.h
+++ b/libavcodec/vdpau.h

@@ -57,15 +57,6 @@
 #include "avcodec.h"
 #include "version.h"
 
-#if FF_API_BUFS_VDPAU
-union AVVDPAUPictureInfo {
-    VdpPictureInfoH264        h264;
-    VdpPictureInfoMPEG1Or2    mpeg;
-    VdpPictureInfoVC1          vc1;
-    VdpPictureInfoMPEG4Part2 mpeg4;
-};
-#endif
-
 struct AVCodecContext;
 struct AVFrame;
 
@@ -102,40 +93,6 @@
      */
     VdpDecoderRender *render;
 
-#if FF_API_BUFS_VDPAU
-    /**
-     * VDPAU picture information
-     *
-     * Set by libavcodec.
-     */
-    attribute_deprecated
-    union AVVDPAUPictureInfo info;
-
-    /**
-     * Allocated size of the bitstream_buffers table.
-     *
-     * Set by libavcodec.
-     */
-    attribute_deprecated
-    int bitstream_buffers_allocated;
-
-    /**
-     * Useful bitstream buffers in the bitstream buffers table.
-     *
-     * Set by libavcodec.
-     */
-    attribute_deprecated
-    int bitstream_buffers_used;
-
-   /**
-     * Table of bitstream buffers.
-     * The user is responsible for freeing this buffer using av_freep().
-     *
-     * Set by libavcodec.
-     */
-    attribute_deprecated
-    VdpBitstreamBuffer *bitstream_buffers;
-#endif
     AVVDPAU_Render2 render2;
 } AVVDPAUContext;
 
@@ -214,40 +171,6 @@
 int av_vdpau_get_profile(AVCodecContext *avctx, VdpDecoderProfile *profile);
 #endif
 
-#if FF_API_CAP_VDPAU
-/** @brief The videoSurface is used for rendering. */
-#define FF_VDPAU_STATE_USED_FOR_RENDER 1
-
-/**
- * @brief The videoSurface is needed for reference/prediction.
- * The codec manipulates this.
- */
-#define FF_VDPAU_STATE_USED_FOR_REFERENCE 2
-
-/**
- * @brief This structure is used as a callback between the FFmpeg
- * decoder (vd_) and presentation (vo_) module.
- * This is used for defining a video frame containing surface,
- * picture parameter, bitstream information etc which are passed
- * between the FFmpeg decoder and its clients.
- */
-struct vdpau_render_state {
-    VdpVideoSurface surface; ///< Used as rendered surface, never changed.
-
-    int state; ///< Holds FF_VDPAU_STATE_* values.
-
-    /** picture parameter information for all supported codecs */
-    union AVVDPAUPictureInfo info;
-
-    /** Describe size/location of the compressed video data.
-        Set to 0 when freeing bitstream_buffers. */
-    int bitstream_buffers_allocated;
-    int bitstream_buffers_used;
-    /** The user is responsible for freeing this buffer using av_freep(). */
-    VdpBitstreamBuffer *bitstream_buffers;
-};
-#endif
-
 /* @}*/
 
 #endif /* AVCODEC_VDPAU_H */

diff --git a/libavcodec/vdpau_compat.h b/libavcodec/vdpau_compat.h
deleted file mode 100644
index 768acce..0000000
--- a/libavcodec/vdpau_compat.h
+++ /dev/null

@@ -1,48 +0,0 @@
-/*
- * Video Decode and Presentation API for UNIX (VDPAU) is used for
- * HW decode acceleration for MPEG-1/2, H.264 and VC-1.
- *
- * Copyright (C) 2008 NVIDIA
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_VDPAU_COMPAT_H
-#define AVCODEC_VDPAU_COMPAT_H
-
-#include <stdint.h>
-
-#include "h264dec.h"
-#include "mpeg4video.h"
-
-void ff_vdpau_add_data_chunk(uint8_t *data, const uint8_t *buf,
-                             int buf_size);
-
-void ff_vdpau_mpeg_picture_complete(MpegEncContext *s, const uint8_t *buf,
-                                    int buf_size, int slice_count);
-
-void ff_vdpau_h264_picture_start(H264Context *h);
-void ff_vdpau_h264_set_reference_frames(H264Context *h);
-void ff_vdpau_h264_picture_complete(H264Context *h);
-
-void ff_vdpau_vc1_decode_picture(MpegEncContext *s, const uint8_t *buf,
-                                 int buf_size);
-
-void ff_vdpau_mpeg4_decode_picture(Mpeg4DecContext *s, const uint8_t *buf,
-                                   int buf_size);
-
-#endif /* AVCODEC_VDPAU_COMPAT_H */

diff --git a/libavcodec/vdpau_h264.c b/libavcodec/vdpau_h264.c
index be6ba71..2a260f7 100644
--- a/libavcodec/vdpau_h264.c
+++ b/libavcodec/vdpau_h264.c

@@ -262,7 +262,7 @@
     return ff_vdpau_common_init(avctx, profile, level);
 }
 
-AVHWAccel ff_h264_vdpau_hwaccel = {
+const AVHWAccel ff_h264_vdpau_hwaccel = {
     .name           = "h264_vdpau",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H264,
@@ -273,6 +273,7 @@
     .frame_priv_data_size = sizeof(struct vdpau_picture_context),
     .init           = vdpau_h264_init,
     .uninit         = ff_vdpau_common_uninit,
+    .frame_params   = ff_vdpau_common_frame_params,
     .priv_data_size = sizeof(VDPAUContext),
     .caps_internal  = HWACCEL_CAP_ASYNC_SAFE,
 };

diff --git a/libavcodec/vdpau_hevc.c b/libavcodec/vdpau_hevc.c
index fcdf4b2..421135b 100644
--- a/libavcodec/vdpau_hevc.c
+++ b/libavcodec/vdpau_hevc.c

@@ -413,7 +413,7 @@
     return ff_vdpau_common_init(avctx, profile, level);
 }
 
-AVHWAccel ff_hevc_vdpau_hwaccel = {
+const AVHWAccel ff_hevc_vdpau_hwaccel = {
     .name           = "hevc_vdpau",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_HEVC,
@@ -424,6 +424,7 @@
     .frame_priv_data_size = sizeof(struct vdpau_picture_context),
     .init           = vdpau_hevc_init,
     .uninit         = ff_vdpau_common_uninit,
+    .frame_params   = ff_vdpau_common_frame_params,
     .priv_data_size = sizeof(VDPAUContext),
     .caps_internal  = HWACCEL_CAP_ASYNC_SAFE,
 };

diff --git a/libavcodec/vdpau_internal.h b/libavcodec/vdpau_internal.h
index 30d01af..4d63e50 100644
--- a/libavcodec/vdpau_internal.h
+++ b/libavcodec/vdpau_internal.h

@@ -119,5 +119,7 @@
 int ff_vdpau_mpeg_end_frame(AVCodecContext *avctx);
 int ff_vdpau_add_buffer(struct vdpau_picture_context *pic, const uint8_t *buf,
                         uint32_t buf_size);
+int ff_vdpau_common_frame_params(AVCodecContext *avctx,
+                                 AVBufferRef *hw_frames_ctx);
 
 #endif /* AVCODEC_VDPAU_INTERNAL_H */

diff --git a/libavcodec/vdpau_mpeg12.c b/libavcodec/vdpau_mpeg12.c
index b657007..d286e7e 100644
--- a/libavcodec/vdpau_mpeg12.c
+++ b/libavcodec/vdpau_mpeg12.c

@@ -103,7 +103,7 @@
                                 VDP_DECODER_LEVEL_MPEG1_NA);
 }
 
-AVHWAccel ff_mpeg1_vdpau_hwaccel = {
+const AVHWAccel ff_mpeg1_vdpau_hwaccel = {
     .name           = "mpeg1_vdpau",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG1VIDEO,
@@ -138,7 +138,7 @@
     return ff_vdpau_common_init(avctx, profile, VDP_DECODER_LEVEL_MPEG2_HL);
 }
 
-AVHWAccel ff_mpeg2_vdpau_hwaccel = {
+const AVHWAccel ff_mpeg2_vdpau_hwaccel = {
     .name           = "mpeg2_vdpau",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG2VIDEO,
@@ -149,6 +149,7 @@
     .frame_priv_data_size = sizeof(struct vdpau_picture_context),
     .init           = vdpau_mpeg2_init,
     .uninit         = ff_vdpau_common_uninit,
+    .frame_params   = ff_vdpau_common_frame_params,
     .priv_data_size = sizeof(VDPAUContext),
     .caps_internal  = HWACCEL_CAP_ASYNC_SAFE,
 };

diff --git a/libavcodec/vdpau_mpeg4.c b/libavcodec/vdpau_mpeg4.c
index bbdd843..96f8302 100644
--- a/libavcodec/vdpau_mpeg4.c
+++ b/libavcodec/vdpau_mpeg4.c

@@ -110,7 +110,7 @@
     return ff_vdpau_common_init(avctx, profile, avctx->level);
 }
 
-AVHWAccel ff_mpeg4_vdpau_hwaccel = {
+const AVHWAccel ff_mpeg4_vdpau_hwaccel = {
     .name           = "mpeg4_vdpau",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG4,
@@ -121,6 +121,7 @@
     .frame_priv_data_size = sizeof(struct vdpau_picture_context),
     .init           = vdpau_mpeg4_init,
     .uninit         = ff_vdpau_common_uninit,
+    .frame_params   = ff_vdpau_common_frame_params,
     .priv_data_size = sizeof(VDPAUContext),
     .caps_internal  = HWACCEL_CAP_ASYNC_SAFE,
 };

diff --git a/libavcodec/vdpau_vc1.c b/libavcodec/vdpau_vc1.c
index 665a233..671baf9 100644
--- a/libavcodec/vdpau_vc1.c
+++ b/libavcodec/vdpau_vc1.c

@@ -136,7 +136,7 @@
 }
 
 #if CONFIG_WMV3_VDPAU_HWACCEL
-AVHWAccel ff_wmv3_vdpau_hwaccel = {
+const AVHWAccel ff_wmv3_vdpau_hwaccel = {
     .name           = "wm3_vdpau",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_WMV3,
@@ -147,12 +147,13 @@
     .frame_priv_data_size = sizeof(struct vdpau_picture_context),
     .init           = vdpau_vc1_init,
     .uninit         = ff_vdpau_common_uninit,
+    .frame_params   = ff_vdpau_common_frame_params,
     .priv_data_size = sizeof(VDPAUContext),
     .caps_internal  = HWACCEL_CAP_ASYNC_SAFE,
 };
 #endif
 
-AVHWAccel ff_vc1_vdpau_hwaccel = {
+const AVHWAccel ff_vc1_vdpau_hwaccel = {
     .name           = "vc1_vdpau",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_VC1,
@@ -163,6 +164,7 @@
     .frame_priv_data_size = sizeof(struct vdpau_picture_context),
     .init           = vdpau_vc1_init,
     .uninit         = ff_vdpau_common_uninit,
+    .frame_params   = ff_vdpau_common_frame_params,
     .priv_data_size = sizeof(VDPAUContext),
     .caps_internal  = HWACCEL_CAP_ASYNC_SAFE,
 };

diff --git a/libavcodec/version.h b/libavcodec/version.h
index 10d9ac4..7e51585 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h

@@ -27,8 +27,8 @@
 
 #include "libavutil/version.h"
 
-#define LIBAVCODEC_VERSION_MAJOR  57
-#define LIBAVCODEC_VERSION_MINOR 107
+#define LIBAVCODEC_VERSION_MAJOR  58
+#define LIBAVCODEC_VERSION_MINOR  33
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
@@ -51,136 +51,18 @@
  * at once through the bump. This improves the git bisect-ability of the change.
  */
 
-#ifndef FF_API_VIMA_DECODER
-#define FF_API_VIMA_DECODER     (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_AUDIO_CONVERT
-#define FF_API_AUDIO_CONVERT     (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_AVCODEC_RESAMPLE
-#define FF_API_AVCODEC_RESAMPLE  FF_API_AUDIO_CONVERT
-#endif
-#ifndef FF_API_MISSING_SAMPLE
-#define FF_API_MISSING_SAMPLE    (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
 #ifndef FF_API_LOWRES
-#define FF_API_LOWRES            (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_CAP_VDPAU
-#define FF_API_CAP_VDPAU         (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_BUFS_VDPAU
-#define FF_API_BUFS_VDPAU        (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_VOXWARE
-#define FF_API_VOXWARE           (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_SET_DIMENSIONS
-#define FF_API_SET_DIMENSIONS    (LIBAVCODEC_VERSION_MAJOR < 58)
+#define FF_API_LOWRES            (LIBAVCODEC_VERSION_MAJOR < 59)
 #endif
 #ifndef FF_API_DEBUG_MV
 #define FF_API_DEBUG_MV          (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
-#ifndef FF_API_AC_VLC
-#define FF_API_AC_VLC            (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_OLD_MSMPEG4
-#define FF_API_OLD_MSMPEG4       (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_ASPECT_EXTENDED
-#define FF_API_ASPECT_EXTENDED   (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_ARCH_ALPHA
-#define FF_API_ARCH_ALPHA        (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_XVMC
-#define FF_API_XVMC              (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_ERROR_RATE
-#define FF_API_ERROR_RATE        (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_QSCALE_TYPE
-#define FF_API_QSCALE_TYPE       (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_MB_TYPE
-#define FF_API_MB_TYPE           (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_MAX_BFRAMES
-#define FF_API_MAX_BFRAMES       (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_NEG_LINESIZES
-#define FF_API_NEG_LINESIZES     (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_EMU_EDGE
-#define FF_API_EMU_EDGE          (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_ARCH_SH4
-#define FF_API_ARCH_SH4          (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_ARCH_SPARC
-#define FF_API_ARCH_SPARC        (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_UNUSED_MEMBERS
-#define FF_API_UNUSED_MEMBERS    (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_IDCT_XVIDMMX
-#define FF_API_IDCT_XVIDMMX      (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_INPUT_PRESERVED
-#define FF_API_INPUT_PRESERVED   (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_NORMALIZE_AQP
-#define FF_API_NORMALIZE_AQP     (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_GMC
-#define FF_API_GMC               (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_MV0
-#define FF_API_MV0               (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_CODEC_NAME
-#define FF_API_CODEC_NAME        (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_AFD
-#define FF_API_AFD               (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_VISMV
-/* XXX: don't forget to drop the -vismv documentation */
-#define FF_API_VISMV             (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_AUDIOENC_DELAY
-#define FF_API_AUDIOENC_DELAY    (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_VAAPI_CONTEXT
-#define FF_API_VAAPI_CONTEXT     (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_MERGE_SD
-#define FF_API_MERGE_SD          (LIBAVCODEC_VERSION_MAJOR < 58)
-#endif
 #ifndef FF_API_AVCTX_TIMEBASE
 #define FF_API_AVCTX_TIMEBASE    (LIBAVCODEC_VERSION_MAJOR < 59)
 #endif
-#ifndef FF_API_MPV_OPT
-#define FF_API_MPV_OPT           (LIBAVCODEC_VERSION_MAJOR < 59)
-#endif
-#ifndef FF_API_STREAM_CODEC_TAG
-#define FF_API_STREAM_CODEC_TAG  (LIBAVCODEC_VERSION_MAJOR < 59)
-#endif
-#ifndef FF_API_QUANT_BIAS
-#define FF_API_QUANT_BIAS        (LIBAVCODEC_VERSION_MAJOR < 59)
-#endif
-#ifndef FF_API_RC_STRATEGY
-#define FF_API_RC_STRATEGY       (LIBAVCODEC_VERSION_MAJOR < 59)
-#endif
 #ifndef FF_API_CODED_FRAME
 #define FF_API_CODED_FRAME       (LIBAVCODEC_VERSION_MAJOR < 59)
 #endif
-#ifndef FF_API_MOTION_EST
-#define FF_API_MOTION_EST        (LIBAVCODEC_VERSION_MAJOR < 59)
-#endif
-#ifndef FF_API_WITHOUT_PREFIX
-#define FF_API_WITHOUT_PREFIX    (LIBAVCODEC_VERSION_MAJOR < 59)
-#endif
 #ifndef FF_API_SIDEDATA_ONLY_PKT
 #define FF_API_SIDEDATA_ONLY_PKT (LIBAVCODEC_VERSION_MAJOR < 59)
 #endif
@@ -238,6 +120,18 @@
 #ifndef FF_API_GETCHROMA
 #define FF_API_GETCHROMA         (LIBAVCODEC_VERSION_MAJOR < 59)
 #endif
+#ifndef FF_API_CODEC_GET_SET
+#define FF_API_CODEC_GET_SET     (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_USER_VISIBLE_AVHWACCEL
+#define FF_API_USER_VISIBLE_AVHWACCEL (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_LOCKMGR
+#define FF_API_LOCKMGR (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_NEXT
+#define FF_API_NEXT              (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
 
 
 #endif /* AVCODEC_VERSION_H */

diff --git a/libavcodec/videotoolbox.c b/libavcodec/videotoolbox.c
index ec8b6d8..ac45e23 100644
--- a/libavcodec/videotoolbox.c
+++ b/libavcodec/videotoolbox.c

@@ -21,16 +21,13 @@
  */
 
 #include "config.h"
-#if CONFIG_VIDEOTOOLBOX
-#  include "videotoolbox.h"
-#  include "libavutil/hwcontext_videotoolbox.h"
-#else
-#  include "vda.h"
-#endif
-#include "vda_vt_internal.h"
+#include "videotoolbox.h"
+#include "libavutil/hwcontext_videotoolbox.h"
+#include "vt_internal.h"
 #include "libavutil/avutil.h"
 #include "libavutil/hwcontext.h"
 #include "bytestream.h"
+#include "decode.h"
 #include "h264dec.h"
 #include "hevcdec.h"
 #include "mpegvideo.h"
@@ -39,6 +36,9 @@
 #ifndef kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder
 #  define kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder CFSTR("RequireHardwareAcceleratedVideoDecoder")
 #endif
+#ifndef kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder
+#  define kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder CFSTR("EnableHardwareAcceleratedVideoDecoder")
+#endif
 
 #if !HAVE_KCMVIDEOCODECTYPE_HEVC
 enum { kCMVideoCodecType_HEVC = 'hvc1' };
@@ -46,10 +46,18 @@
 
 #define VIDEOTOOLBOX_ESDS_EXTRADATA_PADDING  12
 
+typedef struct VTHWFrame {
+    CVPixelBufferRef pixbuf;
+    AVBufferRef *hw_frames_ctx;
+} VTHWFrame;
+
 static void videotoolbox_buffer_release(void *opaque, uint8_t *data)
 {
-    CVPixelBufferRef cv_buffer = (CVImageBufferRef)data;
-    CVPixelBufferRelease(cv_buffer);
+    VTHWFrame *ref = (VTHWFrame *)data;
+    av_buffer_unref(&ref->hw_frames_ctx);
+    CVPixelBufferRelease(ref->pixbuf);
+
+    av_free(data);
 }
 
 static int videotoolbox_buffer_copy(VTContext *vtctx,
@@ -72,15 +80,54 @@
     return 0;
 }
 
+static int videotoolbox_postproc_frame(void *avctx, AVFrame *frame)
+{
+    VTHWFrame *ref = (VTHWFrame *)frame->buf[0]->data;
+
+    if (!ref->pixbuf) {
+        av_log(avctx, AV_LOG_ERROR, "No frame decoded?\n");
+        av_frame_unref(frame);
+        return AVERROR_EXTERNAL;
+    }
+
+    frame->data[3] = (uint8_t*)ref->pixbuf;
+
+    if (ref->hw_frames_ctx) {
+        av_buffer_unref(&frame->hw_frames_ctx);
+        frame->hw_frames_ctx = av_buffer_ref(ref->hw_frames_ctx);
+        if (!frame->hw_frames_ctx)
+            return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
 int ff_videotoolbox_alloc_frame(AVCodecContext *avctx, AVFrame *frame)
 {
+    size_t      size = sizeof(VTHWFrame);
+    uint8_t    *data = NULL;
+    AVBufferRef *buf = NULL;
+    int ret = ff_attach_decode_data(frame);
+    FrameDecodeData *fdd;
+    if (ret < 0)
+        return ret;
+
+    data = av_mallocz(size);
+    if (!data)
+        return AVERROR(ENOMEM);
+    buf = av_buffer_create(data, size, videotoolbox_buffer_release, NULL, 0);
+    if (!buf) {
+        av_freep(&data);
+        return AVERROR(ENOMEM);
+    }
+    frame->buf[0] = buf;
+
+    fdd = (FrameDecodeData*)frame->private_ref->data;
+    fdd->post_process = videotoolbox_postproc_frame;
+
     frame->width  = avctx->width;
     frame->height = avctx->height;
     frame->format = avctx->pix_fmt;
-    frame->buf[0] = av_buffer_alloc(1);
-
-    if (!frame->buf[0])
-        return AVERROR(ENOMEM);
 
     return 0;
 }
@@ -89,7 +136,8 @@
 
 CFDataRef ff_videotoolbox_avcc_extradata_create(AVCodecContext *avctx)
 {
-    H264Context *h     = avctx->priv_data;
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+    H264Context *h = avctx->priv_data;
     CFDataRef data = NULL;
     uint8_t *p;
     int vt_extradata_size = 6 + 2 + h->ps.sps->data_size + 3 + h->ps.pps->data_size;
@@ -115,6 +163,11 @@
     p += 3 + h->ps.pps->data_size;
     av_assert0(p - vt_extradata == vt_extradata_size);
 
+    // save sps header (profile/level) used to create decoder session,
+    // so we can detect changes and recreate it.
+    if (vtctx)
+        memcpy(vtctx->sps, h->ps.sps->data + 1, 3);
+
     data = CFDataCreate(kCFAllocatorDefault, vt_extradata, vt_extradata_size);
     av_free(vt_extradata);
     return data;
@@ -135,7 +188,7 @@
     int vt_extradata_size = 23 + 5 + vps->data_size + 5 + sps->data_size + 3;
     uint8_t *vt_extradata;
 
-    for (i = 0; i < MAX_PPS_COUNT; i++) {
+    for (i = 0; i < HEVC_MAX_PPS_COUNT; i++) {
         if (h->ps.pps_list[i]) {
             const HEVCPPS *pps = (const HEVCPPS *)h->ps.pps_list[i]->data;
             vt_extradata_size += 2 + pps->data_size;
@@ -262,7 +315,7 @@
              HEVC_NAL_PPS & 0x3f);
     AV_WB16(p + 1, num_pps);
     p += 3;
-    for (i = 0; i < MAX_PPS_COUNT; i++) {
+    for (i = 0; i < HEVC_MAX_PPS_COUNT; i++) {
         if (h->ps.pps_list[i]) {
             const HEVCPPS *pps = (const HEVCPPS *)h->ps.pps_list[i]->data;
             AV_WB16(p, pps->data_size);
@@ -278,33 +331,12 @@
     return data;
 }
 
-int ff_videotoolbox_buffer_create(VTContext *vtctx, AVFrame *frame)
-{
-    av_buffer_unref(&frame->buf[0]);
-
-    frame->buf[0] = av_buffer_create((uint8_t*)vtctx->frame,
-                                     sizeof(vtctx->frame),
-                                     videotoolbox_buffer_release,
-                                     NULL,
-                                     AV_BUFFER_FLAG_READONLY);
-    if (!frame->buf[0]) {
-        return AVERROR(ENOMEM);
-    }
-
-    frame->data[3] = (uint8_t*)vtctx->frame;
-    vtctx->frame = NULL;
-
-    return 0;
-}
-
 int ff_videotoolbox_h264_start_frame(AVCodecContext *avctx,
                                      const uint8_t *buffer,
                                      uint32_t size)
 {
     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
-    H264Context *h  = avctx->priv_data;
-
-    vtctx->bitstream_size = 0;
+    H264Context *h = avctx->priv_data;
 
     if (h->is_avc == 1) {
         return videotoolbox_buffer_copy(vtctx, buffer, size);
@@ -313,16 +345,35 @@
     return 0;
 }
 
-int ff_videotoolbox_h264_decode_slice(AVCodecContext *avctx,
-                                      const uint8_t *buffer,
-                                      uint32_t size)
+static int videotoolbox_h264_decode_params(AVCodecContext *avctx,
+                                           int type,
+                                           const uint8_t *buffer,
+                                           uint32_t size)
 {
     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
-    H264Context *h  = avctx->priv_data;
-    void *tmp;
+    H264Context *h = avctx->priv_data;
 
-    if (h->is_avc == 1)
-        return 0;
+    // save sps header (profile/level) used to create decoder session
+    if (!vtctx->sps[0])
+        memcpy(vtctx->sps, h->ps.sps->data + 1, 3);
+
+    if (type == H264_NAL_SPS) {
+        if (size > 4 && memcmp(vtctx->sps, buffer + 1, 3) != 0) {
+            vtctx->reconfig_needed = true;
+            memcpy(vtctx->sps, buffer + 1, 3);
+        }
+    }
+
+    // pass-through SPS/PPS changes to the decoder
+    return ff_videotoolbox_h264_decode_slice(avctx, buffer, size);
+}
+
+static int videotoolbox_common_decode_slice(AVCodecContext *avctx,
+                                            const uint8_t *buffer,
+                                            uint32_t size)
+{
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+    void *tmp;
 
     tmp = av_fast_realloc(vtctx->bitstream,
                           &vtctx->allocated_size,
@@ -340,6 +391,18 @@
     return 0;
 }
 
+int ff_videotoolbox_h264_decode_slice(AVCodecContext *avctx,
+                                      const uint8_t *buffer,
+                                      uint32_t size)
+{
+    H264Context *h = avctx->priv_data;
+
+    if (h->is_avc == 1)
+        return 0;
+
+    return videotoolbox_common_decode_slice(avctx, buffer, size);
+}
+
 int ff_videotoolbox_uninit(AVCodecContext *avctx)
 {
     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
@@ -376,11 +439,21 @@
     int width = CVPixelBufferGetWidth(pixbuf);
     int height = CVPixelBufferGetHeight(pixbuf);
     AVHWFramesContext *cached_frames;
+    VTHWFrame *ref;
     int ret;
 
-    ret = ff_videotoolbox_buffer_create(vtctx, frame);
-    if (ret < 0)
-        return ret;
+    if (!frame->buf[0] || frame->data[3]) {
+        av_log(avctx, AV_LOG_ERROR, "videotoolbox: invalid state\n");
+        av_frame_unref(frame);
+        return AVERROR_EXTERNAL;
+    }
+
+    ref = (VTHWFrame *)frame->buf[0]->data;
+
+    if (ref->pixbuf)
+        CVPixelBufferRelease(ref->pixbuf);
+    ref->pixbuf = vtctx->frame;
+    vtctx->frame = NULL;
 
     // Old API code path.
     if (!vtctx->cached_hw_frames_ctx)
@@ -412,9 +485,9 @@
         vtctx->cached_hw_frames_ctx = hw_frames_ctx;
     }
 
-    av_buffer_unref(&frame->hw_frames_ctx);
-    frame->hw_frames_ctx = av_buffer_ref(vtctx->cached_hw_frames_ctx);
-    if (!frame->hw_frames_ctx)
+    av_buffer_unref(&ref->hw_frames_ctx);
+    ref->hw_frames_ctx = av_buffer_ref(vtctx->cached_hw_frames_ctx);
+    if (!ref->hw_frames_ctx)
         return AVERROR(ENOMEM);
 
     return 0;
@@ -578,123 +651,25 @@
     return status;
 }
 
-static int videotoolbox_common_end_frame(AVCodecContext *avctx, AVFrame *frame)
+static CMVideoFormatDescriptionRef videotoolbox_format_desc_create(CMVideoCodecType codec_type,
+                                                                   CFDictionaryRef decoder_spec,
+                                                                   int width,
+                                                                   int height)
 {
-    int status;
-    AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
-    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+    CMFormatDescriptionRef cm_fmt_desc;
+    OSStatus status;
 
-    if (!videotoolbox->session || !vtctx->bitstream)
-        return AVERROR_INVALIDDATA;
+    status = CMVideoFormatDescriptionCreate(kCFAllocatorDefault,
+                                            codec_type,
+                                            width,
+                                            height,
+                                            decoder_spec, // Dictionary of extension
+                                            &cm_fmt_desc);
 
-    status = videotoolbox_session_decode_frame(avctx);
+    if (status)
+        return NULL;
 
-    if (status) {
-        av_log(avctx, AV_LOG_ERROR, "Failed to decode frame (%d)\n", status);
-        return AVERROR_UNKNOWN;
-    }
-
-    if (!vtctx->frame)
-        return AVERROR_UNKNOWN;
-
-    return videotoolbox_buffer_create(avctx, frame);
-}
-
-static int videotoolbox_h264_end_frame(AVCodecContext *avctx)
-{
-    H264Context *h = avctx->priv_data;
-    AVFrame *frame = h->cur_pic_ptr->f;
-
-    return videotoolbox_common_end_frame(avctx, frame);
-}
-
-static int videotoolbox_hevc_end_frame(AVCodecContext *avctx)
-{
-    HEVCContext *h = avctx->priv_data;
-    AVFrame *frame = h->ref->frame;
-    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
-    int ret;
-
-    ret = videotoolbox_common_end_frame(avctx, frame);
-    vtctx->bitstream_size = 0;
-    return ret;
-}
-
-static int videotoolbox_mpeg_start_frame(AVCodecContext *avctx,
-                                         const uint8_t *buffer,
-                                         uint32_t size)
-{
-    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
-
-    return videotoolbox_buffer_copy(vtctx, buffer, size);
-}
-
-static int videotoolbox_mpeg_decode_slice(AVCodecContext *avctx,
-                                          const uint8_t *buffer,
-                                          uint32_t size)
-{
-    return 0;
-}
-
-static int videotoolbox_mpeg_end_frame(AVCodecContext *avctx)
-{
-    MpegEncContext *s = avctx->priv_data;
-    AVFrame *frame = s->current_picture_ptr->f;
-
-    return videotoolbox_common_end_frame(avctx, frame);
-}
-
-static CFDictionaryRef videotoolbox_decoder_config_create(CMVideoCodecType codec_type,
-                                                          AVCodecContext *avctx)
-{
-    CFMutableDictionaryRef config_info = CFDictionaryCreateMutable(kCFAllocatorDefault,
-                                                                   0,
-                                                                   &kCFTypeDictionaryKeyCallBacks,
-                                                                   &kCFTypeDictionaryValueCallBacks);
-
-    CFDictionarySetValue(config_info,
-                         kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder,
-                         kCFBooleanTrue);
-
-    if (avctx->extradata_size) {
-        CFMutableDictionaryRef avc_info;
-        CFDataRef data = NULL;
-
-        avc_info = CFDictionaryCreateMutable(kCFAllocatorDefault,
-                                             1,
-                                             &kCFTypeDictionaryKeyCallBacks,
-                                             &kCFTypeDictionaryValueCallBacks);
-
-        switch (codec_type) {
-        case kCMVideoCodecType_MPEG4Video :
-            data = videotoolbox_esds_extradata_create(avctx);
-            if (data)
-                CFDictionarySetValue(avc_info, CFSTR("esds"), data);
-            break;
-        case kCMVideoCodecType_H264 :
-            data = ff_videotoolbox_avcc_extradata_create(avctx);
-            if (data)
-                CFDictionarySetValue(avc_info, CFSTR("avcC"), data);
-            break;
-        case kCMVideoCodecType_HEVC :
-            data = ff_videotoolbox_hvcc_extradata_create(avctx);
-            if (data)
-                CFDictionarySetValue(avc_info, CFSTR("hvcC"), data);
-            break;
-        default:
-            break;
-        }
-
-        CFDictionarySetValue(config_info,
-                kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms,
-                avc_info);
-
-        if (data)
-            CFRelease(data);
-
-        CFRelease(avc_info);
-    }
-    return config_info;
+    return cm_fmt_desc;
 }
 
 static CFDictionaryRef videotoolbox_buffer_attributes_create(int width,
@@ -739,28 +714,61 @@
     return buffer_attributes;
 }
 
-static CMVideoFormatDescriptionRef videotoolbox_format_desc_create(CMVideoCodecType codec_type,
-                                                                   CFDictionaryRef decoder_spec,
-                                                                   int width,
-                                                                   int height)
+static CFDictionaryRef videotoolbox_decoder_config_create(CMVideoCodecType codec_type,
+                                                          AVCodecContext *avctx)
 {
-    CMFormatDescriptionRef cm_fmt_desc;
-    OSStatus status;
+    CFMutableDictionaryRef config_info = CFDictionaryCreateMutable(kCFAllocatorDefault,
+                                                                   0,
+                                                                   &kCFTypeDictionaryKeyCallBacks,
+                                                                   &kCFTypeDictionaryValueCallBacks);
 
-    status = CMVideoFormatDescriptionCreate(kCFAllocatorDefault,
-                                            codec_type,
-                                            width,
-                                            height,
-                                            decoder_spec, // Dictionary of extension
-                                            &cm_fmt_desc);
+    CFDictionarySetValue(config_info,
+                         codec_type == kCMVideoCodecType_HEVC ?
+                            kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder :
+                            kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder,
+                         kCFBooleanTrue);
 
-    if (status)
-        return NULL;
+    CFMutableDictionaryRef avc_info;
+    CFDataRef data = NULL;
 
-    return cm_fmt_desc;
+    avc_info = CFDictionaryCreateMutable(kCFAllocatorDefault,
+                                         1,
+                                         &kCFTypeDictionaryKeyCallBacks,
+                                         &kCFTypeDictionaryValueCallBacks);
+
+    switch (codec_type) {
+    case kCMVideoCodecType_MPEG4Video :
+        if (avctx->extradata_size)
+            data = videotoolbox_esds_extradata_create(avctx);
+        if (data)
+            CFDictionarySetValue(avc_info, CFSTR("esds"), data);
+        break;
+    case kCMVideoCodecType_H264 :
+        data = ff_videotoolbox_avcc_extradata_create(avctx);
+        if (data)
+            CFDictionarySetValue(avc_info, CFSTR("avcC"), data);
+        break;
+    case kCMVideoCodecType_HEVC :
+        data = ff_videotoolbox_hvcc_extradata_create(avctx);
+        if (data)
+            CFDictionarySetValue(avc_info, CFSTR("hvcC"), data);
+        break;
+    default:
+        break;
+    }
+
+    CFDictionarySetValue(config_info,
+            kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms,
+            avc_info);
+
+    if (data)
+        CFRelease(data);
+
+    CFRelease(avc_info);
+    return config_info;
 }
 
-static int videotoolbox_default_init(AVCodecContext *avctx)
+static int videotoolbox_start(AVCodecContext *avctx)
 {
     AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
     OSStatus status;
@@ -798,6 +806,11 @@
 
     decoder_spec = videotoolbox_decoder_config_create(videotoolbox->cm_codec_type, avctx);
 
+    if (!decoder_spec) {
+        av_log(avctx, AV_LOG_ERROR, "decoder specification creation failed\n");
+        return -1;
+    }
+
     videotoolbox->cm_fmt_desc = videotoolbox_format_desc_create(videotoolbox->cm_codec_type,
                                                                 decoder_spec,
                                                                 avctx->width,
@@ -836,33 +849,153 @@
     case kVTVideoDecoderUnsupportedDataFormatErr:
         av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox does not support this format.\n");
         return AVERROR(ENOSYS);
+    case kVTCouldNotFindVideoDecoderErr:
+        av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox decoder for this format not found.\n");
+        return AVERROR(ENOSYS);
     case kVTVideoDecoderMalfunctionErr:
         av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox malfunction.\n");
         return AVERROR(EINVAL);
-    case kVTVideoDecoderBadDataErr :
+    case kVTVideoDecoderBadDataErr:
         av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox reported invalid data.\n");
         return AVERROR_INVALIDDATA;
     case 0:
         return 0;
     default:
-        av_log(avctx, AV_LOG_VERBOSE, "Unknown VideoToolbox session creation error %u\n", (unsigned)status);
+        av_log(avctx, AV_LOG_VERBOSE, "Unknown VideoToolbox session creation error %d\n", (int)status);
         return AVERROR_UNKNOWN;
     }
 }
 
-static void videotoolbox_default_free(AVCodecContext *avctx)
+static void videotoolbox_stop(AVCodecContext *avctx)
 {
     AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
+    if (!videotoolbox)
+        return;
 
-    if (videotoolbox) {
-        if (videotoolbox->cm_fmt_desc)
-            CFRelease(videotoolbox->cm_fmt_desc);
+    if (videotoolbox->cm_fmt_desc) {
+        CFRelease(videotoolbox->cm_fmt_desc);
+        videotoolbox->cm_fmt_desc = NULL;
+    }
 
-        if (videotoolbox->session) {
-            VTDecompressionSessionInvalidate(videotoolbox->session);
-            CFRelease(videotoolbox->session);
+    if (videotoolbox->session) {
+        VTDecompressionSessionInvalidate(videotoolbox->session);
+        CFRelease(videotoolbox->session);
+        videotoolbox->session = NULL;
+    }
+}
+
+static const char *videotoolbox_error_string(OSStatus status)
+{
+    switch (status) {
+        case kVTVideoDecoderBadDataErr:
+            return "bad data";
+        case kVTVideoDecoderMalfunctionErr:
+            return "decoder malfunction";
+        case kVTInvalidSessionErr:
+            return "invalid session";
+    }
+    return "unknown";
+}
+
+static int videotoolbox_common_end_frame(AVCodecContext *avctx, AVFrame *frame)
+{
+    OSStatus status;
+    AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+
+    if (vtctx->reconfig_needed == true) {
+        vtctx->reconfig_needed = false;
+        av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox decoder needs reconfig, restarting..\n");
+        videotoolbox_stop(avctx);
+        if (videotoolbox_start(avctx) != 0) {
+            return AVERROR_EXTERNAL;
         }
     }
+
+    if (!videotoolbox->session || !vtctx->bitstream || !vtctx->bitstream_size)
+        return AVERROR_INVALIDDATA;
+
+    status = videotoolbox_session_decode_frame(avctx);
+    if (status != noErr) {
+        if (status == kVTVideoDecoderMalfunctionErr || status == kVTInvalidSessionErr)
+            vtctx->reconfig_needed = true;
+        av_log(avctx, AV_LOG_ERROR, "Failed to decode frame (%s, %d)\n", videotoolbox_error_string(status), (int)status);
+        return AVERROR_UNKNOWN;
+    }
+
+    if (!vtctx->frame) {
+        vtctx->reconfig_needed = true;
+        return AVERROR_UNKNOWN;
+    }
+
+    return videotoolbox_buffer_create(avctx, frame);
+}
+
+static int videotoolbox_h264_end_frame(AVCodecContext *avctx)
+{
+    H264Context *h = avctx->priv_data;
+    AVFrame *frame = h->cur_pic_ptr->f;
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+    int ret = videotoolbox_common_end_frame(avctx, frame);
+    vtctx->bitstream_size = 0;
+    return ret;
+}
+
+static int videotoolbox_hevc_start_frame(AVCodecContext *avctx,
+                                         const uint8_t *buffer,
+                                         uint32_t size)
+{
+    return 0;
+}
+
+static int videotoolbox_hevc_decode_slice(AVCodecContext *avctx,
+                                          const uint8_t *buffer,
+                                          uint32_t size)
+{
+    return videotoolbox_common_decode_slice(avctx, buffer, size);
+}
+
+
+static int videotoolbox_hevc_decode_params(AVCodecContext *avctx,
+                                           int type,
+                                           const uint8_t *buffer,
+                                           uint32_t size)
+{
+    return videotoolbox_common_decode_slice(avctx, buffer, size);
+}
+
+static int videotoolbox_hevc_end_frame(AVCodecContext *avctx)
+{
+    HEVCContext *h = avctx->priv_data;
+    AVFrame *frame = h->ref->frame;
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+    int ret = videotoolbox_common_end_frame(avctx, frame);
+    vtctx->bitstream_size = 0;
+    return ret;
+}
+
+static int videotoolbox_mpeg_start_frame(AVCodecContext *avctx,
+                                         const uint8_t *buffer,
+                                         uint32_t size)
+{
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+
+    return videotoolbox_buffer_copy(vtctx, buffer, size);
+}
+
+static int videotoolbox_mpeg_decode_slice(AVCodecContext *avctx,
+                                          const uint8_t *buffer,
+                                          uint32_t size)
+{
+    return 0;
+}
+
+static int videotoolbox_mpeg_end_frame(AVCodecContext *avctx)
+{
+    MpegEncContext *s = avctx->priv_data;
+    AVFrame *frame = s->current_picture_ptr->f;
+
+    return videotoolbox_common_end_frame(avctx, frame);
 }
 
 static int videotoolbox_uninit(AVCodecContext *avctx)
@@ -874,7 +1007,7 @@
     ff_videotoolbox_uninit(avctx);
 
     if (vtctx->vt_ctx)
-        videotoolbox_default_free(avctx);
+        videotoolbox_stop(avctx);
 
     av_buffer_unref(&vtctx->cached_hw_frames_ctx);
     av_freep(&vtctx->vt_ctx);
@@ -940,7 +1073,7 @@
         goto fail;
     }
 
-    err = videotoolbox_default_init(avctx);
+    err = videotoolbox_start(avctx);
     if (err < 0)
         goto fail;
 
@@ -951,7 +1084,20 @@
     return err;
 }
 
-AVHWAccel ff_h263_videotoolbox_hwaccel = {
+static int videotoolbox_frame_params(AVCodecContext *avctx,
+                                     AVBufferRef *hw_frames_ctx)
+{
+    AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
+
+    frames_ctx->format            = AV_PIX_FMT_VIDEOTOOLBOX;
+    frames_ctx->width             = avctx->coded_width;
+    frames_ctx->height            = avctx->coded_height;
+    frames_ctx->sw_format         = AV_PIX_FMT_NV12;
+
+    return 0;
+}
+
+const AVHWAccel ff_h263_videotoolbox_hwaccel = {
     .name           = "h263_videotoolbox",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H263,
@@ -960,26 +1106,29 @@
     .start_frame    = videotoolbox_mpeg_start_frame,
     .decode_slice   = videotoolbox_mpeg_decode_slice,
     .end_frame      = videotoolbox_mpeg_end_frame,
+    .frame_params   = videotoolbox_frame_params,
     .init           = videotoolbox_common_init,
     .uninit         = videotoolbox_uninit,
     .priv_data_size = sizeof(VTContext),
 };
 
-AVHWAccel ff_hevc_videotoolbox_hwaccel = {
+const AVHWAccel ff_hevc_videotoolbox_hwaccel = {
     .name           = "hevc_videotoolbox",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_HEVC,
     .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
     .alloc_frame    = ff_videotoolbox_alloc_frame,
-    .start_frame    = ff_videotoolbox_h264_start_frame,
-    .decode_slice   = ff_videotoolbox_h264_decode_slice,
+    .start_frame    = videotoolbox_hevc_start_frame,
+    .decode_slice   = videotoolbox_hevc_decode_slice,
+    .decode_params  = videotoolbox_hevc_decode_params,
     .end_frame      = videotoolbox_hevc_end_frame,
+    .frame_params   = videotoolbox_frame_params,
     .init           = videotoolbox_common_init,
     .uninit         = ff_videotoolbox_uninit,
     .priv_data_size = sizeof(VTContext),
 };
 
-AVHWAccel ff_h264_videotoolbox_hwaccel = {
+const AVHWAccel ff_h264_videotoolbox_hwaccel = {
     .name           = "h264_videotoolbox",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H264,
@@ -987,13 +1136,15 @@
     .alloc_frame    = ff_videotoolbox_alloc_frame,
     .start_frame    = ff_videotoolbox_h264_start_frame,
     .decode_slice   = ff_videotoolbox_h264_decode_slice,
+    .decode_params  = videotoolbox_h264_decode_params,
     .end_frame      = videotoolbox_h264_end_frame,
+    .frame_params   = videotoolbox_frame_params,
     .init           = videotoolbox_common_init,
     .uninit         = videotoolbox_uninit,
     .priv_data_size = sizeof(VTContext),
 };
 
-AVHWAccel ff_mpeg1_videotoolbox_hwaccel = {
+const AVHWAccel ff_mpeg1_videotoolbox_hwaccel = {
     .name           = "mpeg1_videotoolbox",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG1VIDEO,
@@ -1002,12 +1153,13 @@
     .start_frame    = videotoolbox_mpeg_start_frame,
     .decode_slice   = videotoolbox_mpeg_decode_slice,
     .end_frame      = videotoolbox_mpeg_end_frame,
+    .frame_params   = videotoolbox_frame_params,
     .init           = videotoolbox_common_init,
     .uninit         = videotoolbox_uninit,
     .priv_data_size = sizeof(VTContext),
 };
 
-AVHWAccel ff_mpeg2_videotoolbox_hwaccel = {
+const AVHWAccel ff_mpeg2_videotoolbox_hwaccel = {
     .name           = "mpeg2_videotoolbox",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG2VIDEO,
@@ -1016,12 +1168,13 @@
     .start_frame    = videotoolbox_mpeg_start_frame,
     .decode_slice   = videotoolbox_mpeg_decode_slice,
     .end_frame      = videotoolbox_mpeg_end_frame,
+    .frame_params   = videotoolbox_frame_params,
     .init           = videotoolbox_common_init,
     .uninit         = videotoolbox_uninit,
     .priv_data_size = sizeof(VTContext),
 };
 
-AVHWAccel ff_mpeg4_videotoolbox_hwaccel = {
+const AVHWAccel ff_mpeg4_videotoolbox_hwaccel = {
     .name           = "mpeg4_videotoolbox",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG4,
@@ -1030,6 +1183,7 @@
     .start_frame    = videotoolbox_mpeg_start_frame,
     .decode_slice   = videotoolbox_mpeg_decode_slice,
     .end_frame      = videotoolbox_mpeg_end_frame,
+    .frame_params   = videotoolbox_frame_params,
     .init           = videotoolbox_common_init,
     .uninit         = videotoolbox_uninit,
     .priv_data_size = sizeof(VTContext),
@@ -1057,13 +1211,13 @@
     avctx->hwaccel_context = vtctx ?: av_videotoolbox_alloc_context();
     if (!avctx->hwaccel_context)
         return AVERROR(ENOMEM);
-    return videotoolbox_default_init(avctx);
+    return videotoolbox_start(avctx);
 }
 
 void av_videotoolbox_default_free(AVCodecContext *avctx)
 {
 
-    videotoolbox_default_free(avctx);
+    videotoolbox_stop(avctx);
     av_freep(&avctx->hwaccel_context);
 }
 #endif /* CONFIG_VIDEOTOOLBOX */

diff --git a/libavcodec/videotoolboxenc.c b/libavcodec/videotoolboxenc.c
index eba6cc6..50aba2d 100644
--- a/libavcodec/videotoolboxenc.c
+++ b/libavcodec/videotoolboxenc.c

@@ -35,6 +35,17 @@
 #include "h264_sei.h"
 #include <dlfcn.h>
 
+#if !HAVE_KCMVIDEOCODECTYPE_HEVC
+enum { kCMVideoCodecType_HEVC = 'hvc1' };
+#endif
+
+typedef OSStatus (*getParameterSetAtIndex)(CMFormatDescriptionRef videoDesc,
+                                           size_t parameterSetIndex,
+                                           const uint8_t **parameterSetPointerOut,
+                                           size_t *parameterSetSizeOut,
+                                           size_t *parameterSetCountOut,
+                                           int *NALUnitHeaderLengthOut);
+
 //These symbols may not be present
 static struct{
     CFStringRef kCVImageBufferColorPrimaries_ITU_R_2020;
@@ -65,10 +76,15 @@
     CFStringRef kVTProfileLevel_H264_High_5_2;
     CFStringRef kVTProfileLevel_H264_High_AutoLevel;
 
+    CFStringRef kVTProfileLevel_HEVC_Main_AutoLevel;
+    CFStringRef kVTProfileLevel_HEVC_Main10_AutoLevel;
+
     CFStringRef kVTCompressionPropertyKey_RealTime;
 
     CFStringRef kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder;
     CFStringRef kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder;
+
+    getParameterSetAtIndex CMVideoFormatDescriptionGetHEVCParameterSetAtIndex;
 } compat_keys;
 
 #define GET_SYM(symbol, defaultVal)                                     \
@@ -83,6 +99,12 @@
 static pthread_once_t once_ctrl = PTHREAD_ONCE_INIT;
 
 static void loadVTEncSymbols(){
+    compat_keys.CMVideoFormatDescriptionGetHEVCParameterSetAtIndex =
+        (getParameterSetAtIndex)dlsym(
+            RTLD_DEFAULT,
+            "CMVideoFormatDescriptionGetHEVCParameterSetAtIndex"
+        );
+
     GET_SYM(kCVImageBufferColorPrimaries_ITU_R_2020,   "ITU_R_2020");
     GET_SYM(kCVImageBufferTransferFunction_ITU_R_2020, "ITU_R_2020");
     GET_SYM(kCVImageBufferYCbCrMatrix_ITU_R_2020,      "ITU_R_2020");
@@ -111,6 +133,9 @@
     GET_SYM(kVTProfileLevel_H264_High_5_2,           "H264_High_5_2");
     GET_SYM(kVTProfileLevel_H264_High_AutoLevel,     "H264_High_AutoLevel");
 
+    GET_SYM(kVTProfileLevel_HEVC_Main_AutoLevel,     "HEVC_Main_AutoLevel");
+    GET_SYM(kVTProfileLevel_HEVC_Main10_AutoLevel,   "HEVC_Main10_AutoLevel");
+
     GET_SYM(kVTCompressionPropertyKey_RealTime, "RealTime");
 
     GET_SYM(kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
@@ -133,6 +158,13 @@
     VT_CABAC
 } VTH264Entropy;
 
+typedef enum VT_HEVCProfile {
+    HEVC_PROF_AUTO,
+    HEVC_PROF_MAIN,
+    HEVC_PROF_MAIN10,
+    HEVC_PROF_COUNT
+} VT_HEVCProfile;
+
 static const uint8_t start_code[] = { 0, 0, 0, 1 };
 
 typedef struct ExtraSEI {
@@ -149,10 +181,12 @@
 
 typedef struct VTEncContext {
     AVClass *class;
+    enum AVCodecID codec_id;
     VTCompressionSessionRef session;
     CFStringRef ycbcr_matrix;
     CFStringRef color_primaries;
     CFStringRef transfer_function;
+    getParameterSetAtIndex get_param_set_func;
 
     pthread_mutex_t lock;
     pthread_cond_t  cv_sample_sent;
@@ -348,6 +382,7 @@
 {
     switch (id) {
     case AV_CODEC_ID_H264: return kCMVideoCodecType_H264;
+    case AV_CODEC_ID_HEVC: return kCMVideoCodecType_HEVC;
     default:               return 0;
     }
 }
@@ -365,17 +400,18 @@
     CMVideoFormatDescriptionRef vid_fmt,
     size_t                      *size)
 {
+    VTEncContext *vtctx = avctx->priv_data;
     size_t total_size = 0;
     size_t ps_count;
     int is_count_bad = 0;
     size_t i;
     int status;
-    status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
-                                                                0,
-                                                                NULL,
-                                                                NULL,
-                                                                &ps_count,
-                                                                NULL);
+    status = vtctx->get_param_set_func(vid_fmt,
+                                       0,
+                                       NULL,
+                                       NULL,
+                                       &ps_count,
+                                       NULL);
     if (status) {
         is_count_bad = 1;
         ps_count     = 0;
@@ -385,12 +421,12 @@
     for (i = 0; i < ps_count || is_count_bad; i++) {
         const uint8_t *ps;
         size_t ps_size;
-        status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
-                                                                    i,
-                                                                    &ps,
-                                                                    &ps_size,
-                                                                    NULL,
-                                                                    NULL);
+        status = vtctx->get_param_set_func(vid_fmt,
+                                           i,
+                                           &ps,
+                                           &ps_size,
+                                           NULL,
+                                           NULL);
         if (status) {
             /*
              * When ps_count is invalid, status != 0 ends the loop normally
@@ -419,18 +455,19 @@
     uint8_t                     *dst,
     size_t                      dst_size)
 {
+    VTEncContext *vtctx = avctx->priv_data;
     size_t ps_count;
     int is_count_bad = 0;
     int status;
     size_t offset = 0;
     size_t i;
 
-    status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
-                                                                0,
-                                                                NULL,
-                                                                NULL,
-                                                                &ps_count,
-                                                                NULL);
+    status = vtctx->get_param_set_func(vid_fmt,
+                                       0,
+                                       NULL,
+                                       NULL,
+                                       &ps_count,
+                                       NULL);
     if (status) {
         is_count_bad = 1;
         ps_count     = 0;
@@ -443,12 +480,12 @@
         size_t ps_size;
         size_t next_offset;
 
-        status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
-                                                                    i,
-                                                                    &ps,
-                                                                    &ps_size,
-                                                                    NULL,
-                                                                    NULL);
+        status = vtctx->get_param_set_func(vid_fmt,
+                                           i,
+                                           &ps,
+                                           &ps_size,
+                                           NULL,
+                                           NULL);
         if (status) {
             if (i > 0 && is_count_bad) status = 0;
 
@@ -548,6 +585,7 @@
     CMSampleBufferRef sample_buffer,
     size_t            *size)
 {
+    VTEncContext *vtctx = avctx->priv_data;
     CMVideoFormatDescriptionRef vid_fmt;
     int isize;
     int status;
@@ -558,12 +596,12 @@
         return AVERROR_EXTERNAL;
     }
 
-    status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
-                                                                0,
-                                                                NULL,
-                                                                NULL,
-                                                                NULL,
-                                                                &isize);
+    status = vtctx->get_param_set_func(vid_fmt,
+                                       0,
+                                       NULL,
+                                       NULL,
+                                       NULL,
+                                       &isize);
     if (status) {
         av_log(avctx, AV_LOG_ERROR, "Error getting length code size: %d\n", status);
         return AVERROR_EXTERNAL;
@@ -579,8 +617,8 @@
  * If profile_level_val is NULL and this method returns true, don't specify the
  * profile/level to the encoder.
  */
-static bool get_vt_profile_level(AVCodecContext *avctx,
-                                 CFStringRef    *profile_level_val)
+static bool get_vt_h264_profile_level(AVCodecContext *avctx,
+                                      CFStringRef    *profile_level_val)
 {
     VTEncContext *vtctx = avctx->priv_data;
     int64_t profile = vtctx->profile;
@@ -670,6 +708,41 @@
     return true;
 }
 
+/*
+ * Returns true on success.
+ *
+ * If profile_level_val is NULL and this method returns true, don't specify the
+ * profile/level to the encoder.
+ */
+static bool get_vt_hevc_profile_level(AVCodecContext *avctx,
+                                      CFStringRef    *profile_level_val)
+{
+    VTEncContext *vtctx = avctx->priv_data;
+    int64_t profile = vtctx->profile;
+
+    *profile_level_val = NULL;
+
+    switch (profile) {
+        case HEVC_PROF_AUTO:
+            return true;
+        case HEVC_PROF_MAIN:
+            *profile_level_val =
+                compat_keys.kVTProfileLevel_HEVC_Main_AutoLevel;
+            break;
+        case HEVC_PROF_MAIN10:
+            *profile_level_val =
+                compat_keys.kVTProfileLevel_HEVC_Main10_AutoLevel;
+            break;
+    }
+
+    if (!*profile_level_val) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid Profile/Level.\n");
+        return false;
+    }
+
+    return true;
+}
+
 static int get_cv_pixel_format(AVCodecContext* avctx,
                                enum AVPixelFormat fmt,
                                enum AVColorRange range,
@@ -944,52 +1017,58 @@
         return AVERROR_EXTERNAL;
     }
 
-    bytes_per_second_value = max_rate >> 3;
-    bytes_per_second = CFNumberCreate(kCFAllocatorDefault,
-                                      kCFNumberSInt64Type,
-                                      &bytes_per_second_value);
-    if (!bytes_per_second) {
-        return AVERROR(ENOMEM);
-    }
-    one_second_value = 1;
-    one_second = CFNumberCreate(kCFAllocatorDefault,
-                                kCFNumberSInt64Type,
-                                &one_second_value);
-    if (!one_second) {
-        CFRelease(bytes_per_second);
-        return AVERROR(ENOMEM);
-    }
-    nums[0] = bytes_per_second;
-    nums[1] = one_second;
-    data_rate_limits = CFArrayCreate(kCFAllocatorDefault,
-                                     nums,
-                                     2,
-                                     &kCFTypeArrayCallBacks);
+    if (vtctx->codec_id == AV_CODEC_ID_H264 && max_rate > 0) {
+        // kVTCompressionPropertyKey_DataRateLimits is not available for HEVC
+        bytes_per_second_value = max_rate >> 3;
+        bytes_per_second = CFNumberCreate(kCFAllocatorDefault,
+                                          kCFNumberSInt64Type,
+                                          &bytes_per_second_value);
+        if (!bytes_per_second) {
+            return AVERROR(ENOMEM);
+        }
+        one_second_value = 1;
+        one_second = CFNumberCreate(kCFAllocatorDefault,
+                                    kCFNumberSInt64Type,
+                                    &one_second_value);
+        if (!one_second) {
+            CFRelease(bytes_per_second);
+            return AVERROR(ENOMEM);
+        }
+        nums[0] = (void *)bytes_per_second;
+        nums[1] = (void *)one_second;
+        data_rate_limits = CFArrayCreate(kCFAllocatorDefault,
+                                         (const void **)nums,
+                                         2,
+                                         &kCFTypeArrayCallBacks);
 
-    if (!data_rate_limits) {
+        if (!data_rate_limits) {
+            CFRelease(bytes_per_second);
+            CFRelease(one_second);
+            return AVERROR(ENOMEM);
+        }
+        status = VTSessionSetProperty(vtctx->session,
+                                      kVTCompressionPropertyKey_DataRateLimits,
+                                      data_rate_limits);
+
         CFRelease(bytes_per_second);
         CFRelease(one_second);
-        return AVERROR(ENOMEM);
-    }
-    status = VTSessionSetProperty(vtctx->session,
-                                  kVTCompressionPropertyKey_DataRateLimits,
-                                  data_rate_limits);
+        CFRelease(data_rate_limits);
 
-    CFRelease(bytes_per_second);
-    CFRelease(one_second);
-    CFRelease(data_rate_limits);
-
-    if (status) {
-        av_log(avctx, AV_LOG_ERROR, "Error setting max bitrate property: %d\n", status);
-        return AVERROR_EXTERNAL;
-    }
-
-    if (profile_level) {
-        status = VTSessionSetProperty(vtctx->session,
-                                      kVTCompressionPropertyKey_ProfileLevel,
-                                      profile_level);
         if (status) {
-            av_log(avctx, AV_LOG_ERROR, "Error setting profile/level property: %d\n", status);
+            av_log(avctx, AV_LOG_ERROR, "Error setting max bitrate property: %d\n", status);
+            return AVERROR_EXTERNAL;
+        }
+    }
+
+    if (vtctx->codec_id == AV_CODEC_ID_H264) {
+        // kVTCompressionPropertyKey_ProfileLevel is not available for HEVC
+        if (profile_level) {
+            status = VTSessionSetProperty(vtctx->session,
+                                        kVTCompressionPropertyKey_ProfileLevel,
+                                        profile_level);
+            if (status) {
+                av_log(avctx, AV_LOG_ERROR, "Error setting profile/level property: %d\n", status);
+            }
         }
     }
 
@@ -1186,40 +1265,45 @@
     return 0;
 }
 
-static av_cold int vtenc_init(AVCodecContext *avctx)
+static int vtenc_configure_encoder(AVCodecContext *avctx)
 {
     CFMutableDictionaryRef enc_info;
     CFMutableDictionaryRef pixel_buffer_info;
     CMVideoCodecType       codec_type;
     VTEncContext           *vtctx = avctx->priv_data;
     CFStringRef            profile_level;
-    CFBooleanRef           has_b_frames_cfbool;
     CFNumberRef            gamma_level = NULL;
     int                    status;
 
-    pthread_once(&once_ctrl, loadVTEncSymbols);
-
     codec_type = get_cm_codec_type(avctx->codec_id);
     if (!codec_type) {
         av_log(avctx, AV_LOG_ERROR, "Error: no mapping for AVCodecID %d\n", avctx->codec_id);
         return AVERROR(EINVAL);
     }
 
-    vtctx->has_b_frames = avctx->max_b_frames > 0;
-    if(vtctx->has_b_frames && vtctx->profile == H264_PROF_BASELINE){
-        av_log(avctx, AV_LOG_WARNING, "Cannot use B-frames with baseline profile. Output will not contain B-frames.\n");
-        vtctx->has_b_frames = false;
+    vtctx->codec_id = avctx->codec_id;
+
+    if (vtctx->codec_id == AV_CODEC_ID_H264) {
+        vtctx->get_param_set_func = CMVideoFormatDescriptionGetH264ParameterSetAtIndex;
+
+        vtctx->has_b_frames = avctx->max_b_frames > 0;
+        if(vtctx->has_b_frames && vtctx->profile == H264_PROF_BASELINE){
+            av_log(avctx, AV_LOG_WARNING, "Cannot use B-frames with baseline profile. Output will not contain B-frames.\n");
+            vtctx->has_b_frames = false;
+        }
+
+        if (vtctx->entropy == VT_CABAC && vtctx->profile == H264_PROF_BASELINE) {
+            av_log(avctx, AV_LOG_WARNING, "CABAC entropy requires 'main' or 'high' profile, but baseline was requested. Encode will not use CABAC entropy.\n");
+            vtctx->entropy = VT_ENTROPY_NOT_SET;
+        }
+
+        if (!get_vt_h264_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);
+    } else {
+        vtctx->get_param_set_func = compat_keys.CMVideoFormatDescriptionGetHEVCParameterSetAtIndex;
+        if (!vtctx->get_param_set_func) return AVERROR(EINVAL);
+        if (!get_vt_hevc_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);
     }
 
-    if (vtctx->entropy == VT_CABAC && vtctx->profile == H264_PROF_BASELINE) {
-        av_log(avctx, AV_LOG_WARNING, "CABAC entropy requires 'main' or 'high' profile, but baseline was requested. Encode will not use CABAC entropy.\n");
-        vtctx->entropy = VT_ENTROPY_NOT_SET;
-    }
-
-    if (!get_vt_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);
-
-    vtctx->session = NULL;
-
     enc_info = CFDictionaryCreateMutable(
         kCFAllocatorDefault,
         20,
@@ -1249,8 +1333,6 @@
         pixel_buffer_info = NULL;
     }
 
-    pthread_mutex_init(&vtctx->lock, NULL);
-    pthread_cond_init(&vtctx->cv_sample_sent, NULL);
     vtctx->dts_delta = vtctx->has_b_frames ? -1 : 0;
 
     get_cv_transfer_function(avctx, &vtctx->transfer_function, &gamma_level);
@@ -1277,8 +1359,32 @@
                                   pixel_buffer_info,
                                   &vtctx->session);
 
-    if (status < 0)
-        goto init_cleanup;
+init_cleanup:
+    if (gamma_level)
+        CFRelease(gamma_level);
+
+    if (pixel_buffer_info)
+        CFRelease(pixel_buffer_info);
+
+    CFRelease(enc_info);
+
+    return status;
+}
+
+static av_cold int vtenc_init(AVCodecContext *avctx)
+{
+    VTEncContext    *vtctx = avctx->priv_data;
+    CFBooleanRef    has_b_frames_cfbool;
+    int             status;
+
+    pthread_once(&once_ctrl, loadVTEncSymbols);
+
+    pthread_mutex_init(&vtctx->lock, NULL);
+    pthread_cond_init(&vtctx->cv_sample_sent, NULL);
+
+    vtctx->session = NULL;
+    status = vtenc_configure_encoder(avctx);
+    if (status) return status;
 
     status = VTSessionCopyProperty(vtctx->session,
                                    kVTCompressionPropertyKey_AllowFrameReordering,
@@ -1292,16 +1398,7 @@
     }
     avctx->has_b_frames = vtctx->has_b_frames;
 
-init_cleanup:
-    if (gamma_level)
-        CFRelease(gamma_level);
-
-    if (pixel_buffer_info)
-        CFRelease(pixel_buffer_info);
-
-    CFRelease(enc_info);
-
-    return status;
+    return 0;
 }
 
 static void vtenc_get_frame_info(CMSampleBufferRef buffer, bool *is_key_frame)
@@ -1853,8 +1950,6 @@
                    "Color range not set for %s. Using MPEG range.\n",
                    av_get_pix_fmt_name(av_format));
         }
-
-        av_log(avctx, AV_LOG_WARNING, "");
     }
 
     switch (av_format) {
@@ -2083,8 +2178,27 @@
 #if TARGET_OS_IPHONE
     pix_buf_pool = VTCompressionSessionGetPixelBufferPool(vtctx->session);
     if (!pix_buf_pool) {
-        av_log(avctx, AV_LOG_ERROR, "Could not get pixel buffer pool.\n");
-        return AVERROR_EXTERNAL;
+        /* On iOS, the VT session is invalidated when the APP switches from
+         * foreground to background and vice versa. Fetch the actual error code
+         * of the VT session to detect that case and restart the VT session
+         * accordingly. */
+        OSStatus vtstatus;
+
+        vtstatus = VTCompressionSessionPrepareToEncodeFrames(vtctx->session);
+        if (vtstatus == kVTInvalidSessionErr) {
+            CFRelease(vtctx->session);
+            vtctx->session = NULL;
+            status = vtenc_configure_encoder(avctx);
+            if (status == 0)
+                pix_buf_pool = VTCompressionSessionGetPixelBufferPool(vtctx->session);
+        }
+        if (!pix_buf_pool) {
+            av_log(avctx, AV_LOG_ERROR, "Could not get pixel buffer pool.\n");
+            return AVERROR_EXTERNAL;
+        }
+        else
+            av_log(avctx, AV_LOG_WARNING, "VT session restarted because of a "
+                   "kVTInvalidSessionErr error.\n");
     }
 
     status = CVPixelBufferPoolCreatePixelBuffer(NULL,
@@ -2389,13 +2503,14 @@
 {
     VTEncContext *vtctx = avctx->priv_data;
 
+    pthread_cond_destroy(&vtctx->cv_sample_sent);
+    pthread_mutex_destroy(&vtctx->lock);
+
     if(!vtctx->session) return 0;
 
     VTCompressionSessionCompleteFrames(vtctx->session,
                                        kCMTimeIndefinite);
     clear_frame_queue(vtctx);
-    pthread_cond_destroy(&vtctx->cv_sample_sent);
-    pthread_mutex_destroy(&vtctx->lock);
     CFRelease(vtctx->session);
     vtctx->session = NULL;
 
@@ -2424,9 +2539,19 @@
     AV_PIX_FMT_NONE
 };
 
-#define OFFSET(x) offsetof(VTEncContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
-static const AVOption options[] = {
+#define COMMON_OPTIONS \
+    { "allow_sw", "Allow software encoding", OFFSET(allow_sw), AV_OPT_TYPE_BOOL, \
+        { .i64 = 0 }, 0, 1, VE }, \
+    { "realtime", "Hint that encoding should happen in real-time if not faster (e.g. capturing from camera).", \
+        OFFSET(realtime), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, \
+    { "frames_before", "Other frames will come before the frames in this session. This helps smooth concatenation issues.", \
+        OFFSET(frames_before), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, \
+    { "frames_after", "Other frames will come after the frames in this session. This helps smooth concatenation issues.", \
+        OFFSET(frames_after), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+
+#define OFFSET(x) offsetof(VTEncContext, x)
+static const AVOption h264_options[] = {
     { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = H264_PROF_AUTO }, H264_PROF_AUTO, H264_PROF_COUNT, VE, "profile" },
     { "baseline", "Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_BASELINE }, INT_MIN, INT_MAX, VE, "profile" },
     { "main",     "Main Profile",     0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_MAIN     }, INT_MIN, INT_MAX, VE, "profile" },
@@ -2444,32 +2569,22 @@
     { "5.1", "Level 5.1", 0, AV_OPT_TYPE_CONST, { .i64 = 51 }, INT_MIN, INT_MAX, VE, "level" },
     { "5.2", "Level 5.2", 0, AV_OPT_TYPE_CONST, { .i64 = 52 }, INT_MIN, INT_MAX, VE, "level" },
 
-    { "allow_sw", "Allow software encoding", OFFSET(allow_sw), AV_OPT_TYPE_BOOL,
-        { .i64 = 0 }, 0, 1, VE },
-
     { "coder", "Entropy coding", OFFSET(entropy), AV_OPT_TYPE_INT, { .i64 = VT_ENTROPY_NOT_SET }, VT_ENTROPY_NOT_SET, VT_CABAC, VE, "coder" },
     { "cavlc", "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
     { "vlc",   "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
     { "cabac", "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
     { "ac",    "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
 
-    { "realtime", "Hint that encoding should happen in real-time if not faster (e.g. capturing from camera).",
-        OFFSET(realtime), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
-
-    { "frames_before", "Other frames will come before the frames in this session. This helps smooth concatenation issues.",
-        OFFSET(frames_before), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
-    { "frames_after", "Other frames will come after the frames in this session. This helps smooth concatenation issues.",
-        OFFSET(frames_after), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
-
     { "a53cc", "Use A53 Closed Captions (if available)", OFFSET(a53_cc), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, VE },
 
+    COMMON_OPTIONS
     { NULL },
 };
 
 static const AVClass h264_videotoolbox_class = {
     .class_name = "h264_videotoolbox",
     .item_name  = av_default_item_name,
-    .option     = options,
+    .option     = h264_options,
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
@@ -2488,3 +2603,36 @@
     .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
                         FF_CODEC_CAP_INIT_CLEANUP,
 };
+
+static const AVOption hevc_options[] = {
+    { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = HEVC_PROF_AUTO }, HEVC_PROF_AUTO, HEVC_PROF_COUNT, VE, "profile" },
+    { "main",     "Main Profile",     0, AV_OPT_TYPE_CONST, { .i64 = HEVC_PROF_MAIN   }, INT_MIN, INT_MAX, VE, "profile" },
+    { "main10",   "Main10 Profile",   0, AV_OPT_TYPE_CONST, { .i64 = HEVC_PROF_MAIN10 }, INT_MIN, INT_MAX, VE, "profile" },
+
+    COMMON_OPTIONS
+    { NULL },
+};
+
+static const AVClass hevc_videotoolbox_class = {
+    .class_name = "hevc_videotoolbox",
+    .item_name  = av_default_item_name,
+    .option     = hevc_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_hevc_videotoolbox_encoder = {
+    .name             = "hevc_videotoolbox",
+    .long_name        = NULL_IF_CONFIG_SMALL("VideoToolbox H.265 Encoder"),
+    .type             = AVMEDIA_TYPE_VIDEO,
+    .id               = AV_CODEC_ID_HEVC,
+    .priv_data_size   = sizeof(VTEncContext),
+    .pix_fmts         = pix_fmts,
+    .init             = vtenc_init,
+    .encode2          = vtenc_frame,
+    .close            = vtenc_close,
+    .capabilities     = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
+    .priv_class       = &hevc_videotoolbox_class,
+    .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
+                        FF_CODEC_CAP_INIT_CLEANUP,
+    .wrapper_name     = "videotoolbox",
+};

diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c
index aabd9bb..cca2aa7 100644
--- a/libavcodec/vorbis.c
+++ b/libavcodec/vorbis.c

@@ -58,7 +58,7 @@
     uint32_t exit_at_level[33] = { 404 };
     unsigned i, j, p, code;
 
-    for (p = 0; (bits[p] == 0) && (p < num); ++p)
+    for (p = 0; (p < num) && (bits[p] == 0); ++p)
         ;
     if (p == num)
         return 0;
@@ -71,7 +71,7 @@
 
     ++p;
 
-    for (i = p; (bits[i] == 0) && (i < num); ++i)
+    for (i = p; (i < num) && (bits[i] == 0); ++i)
         ;
     if (i == num)
         return 0;

diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index 2a4f482..00e9cd8 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c

@@ -1862,6 +1862,7 @@
     .decode          = vorbis_decode_frame,
     .flush           = vorbis_decode_flush,
     .capabilities    = AV_CODEC_CAP_DR1,
+    .caps_internal   = FF_CODEC_CAP_INIT_CLEANUP,
     .channel_layouts = ff_vorbis_channel_layouts,
     .sample_fmts     = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                        AV_SAMPLE_FMT_NONE },

diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c
index a4ecd8f..18a679f 100644
--- a/libavcodec/vorbisenc.c
+++ b/libavcodec/vorbisenc.c

@@ -1093,9 +1093,13 @@
     PutBitContext pb;
 
     if (frame) {
+        AVFrame *clone;
         if ((ret = ff_af_queue_add(&venc->afq, frame)) < 0)
             return ret;
-        ff_bufqueue_add(avctx, &venc->bufqueue, av_frame_clone(frame));
+        clone = av_frame_clone(frame);
+        if (!clone)
+            return AVERROR(ENOMEM);
+        ff_bufqueue_add(avctx, &venc->bufqueue, clone);
     } else
         if (!venc->afq.remaining_samples)
             return 0;

diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index e9bb7dc..0e6da89 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c

@@ -451,6 +451,7 @@
     int i, j;
     int current_fragment;
     int plane;
+    int plane0_num_coded_frags = 0;
 
     if (s->keyframe) {
         memset(s->superblock_coding, SB_FULLY_CODED, s->superblock_count);
@@ -544,6 +545,9 @@
         int num_coded_frags = 0;
 
         for (i = sb_start; i < sb_end && get_bits_left(gb) > 0; i++) {
+            if (s->keyframe == 0 && get_bits_left(gb) < plane0_num_coded_frags >> 2) {
+                return AVERROR_INVALIDDATA;
+            }
             /* iterate through all 16 fragments in a superblock */
             for (j = 0; j < 16; j++) {
                 /* if the fragment is in bounds, check its coding status */
@@ -576,6 +580,8 @@
                 }
             }
         }
+        if (!plane)
+            plane0_num_coded_frags = num_coded_frags;
         s->total_num_coded_frags += num_coded_frags;
         for (i = 0; i < 64; i++)
             s->num_coded_frags[plane][i] = num_coded_frags;
@@ -1766,7 +1772,9 @@
     for (i = 0; i < 3; i++)
         s->qps[i] = -1;
 
-    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
+    ret = av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
+    if (ret)
+        return ret;
 
     s->y_superblock_width  = (s->width  + 31) / 32;
     s->y_superblock_height = (s->height + 31) / 32;

diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 7841a9d..a06692c 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c

@@ -27,6 +27,7 @@
 #include "libavutil/imgutils.h"
 
 #include "avcodec.h"
+#include "hwaccel.h"
 #include "internal.h"
 #include "mathops.h"
 #include "rectangle.h"
@@ -72,16 +73,30 @@
     if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
                                     ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
         return ret;
-    if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
-        ff_thread_release_buffer(s->avctx, &f->tf);
-        return AVERROR(ENOMEM);
+    if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
+        goto fail;
+    if (s->avctx->hwaccel) {
+        const AVHWAccel *hwaccel = s->avctx->hwaccel;
+        if (hwaccel->frame_priv_data_size) {
+            f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+            if (!f->hwaccel_priv_buf)
+                goto fail;
+            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
+        }
     }
     return 0;
+
+fail:
+    av_buffer_unref(&f->seg_map);
+    ff_thread_release_buffer(s->avctx, &f->tf);
+    return AVERROR(ENOMEM);
 }
 
 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
 {
     av_buffer_unref(&f->seg_map);
+    av_buffer_unref(&f->hwaccel_priv_buf);
+    f->hwaccel_picture_private = NULL;
     ff_thread_release_buffer(s->avctx, &f->tf);
 }
 
@@ -99,6 +114,12 @@
         vp8_release_frame(s, dst);
         return AVERROR(ENOMEM);
     }
+    if (src->hwaccel_picture_private) {
+        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
+        if (!dst->hwaccel_priv_buf)
+            return AVERROR(ENOMEM);
+        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
+    }
 
     return 0;
 }
@@ -140,12 +161,28 @@
         av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
         abort();
     }
-    if (frame->tf.f->data[0])
+    if (frame->tf.f->buf[0])
         vp8_release_frame(s, frame);
 
     return frame;
 }
 
+static enum AVPixelFormat get_pixel_format(VP8Context *s)
+{
+    enum AVPixelFormat pix_fmts[] = {
+#if CONFIG_VP8_VAAPI_HWACCEL
+        AV_PIX_FMT_VAAPI,
+#endif
+#if CONFIG_VP8_NVDEC_HWACCEL
+        AV_PIX_FMT_CUDA,
+#endif
+        AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_NONE,
+    };
+
+    return ff_get_format(s->avctx, pix_fmts);
+}
+
 static av_always_inline
 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
 {
@@ -161,6 +198,13 @@
             return ret;
     }
 
+    if (!s->actually_webp && !is_vp7) {
+        s->pix_fmt = get_pixel_format(s);
+        if (s->pix_fmt < 0)
+            return AVERROR(EINVAL);
+        avctx->pix_fmt = s->pix_fmt;
+    }
+
     s->mb_width  = (s->avctx->coded_width  + 15) / 16;
     s->mb_height = (s->avctx->coded_height + 15) / 16;
 
@@ -218,8 +262,9 @@
     int i;
 
     s->segmentation.update_map = vp8_rac_get(c);
+    s->segmentation.update_feature_data = vp8_rac_get(c);
 
-    if (vp8_rac_get(c)) { // update segment feature data
+    if (s->segmentation.update_feature_data) {
         s->segmentation.absolute_vals = vp8_rac_get(c);
 
         for (i = 0; i < 4; i++)
@@ -274,6 +319,7 @@
         int size = AV_RL24(sizes + 3 * i);
         if (buf_size - size < 0)
             return -1;
+        s->coeff_partition_size[i] = size;
 
         ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
         if (ret < 0)
@@ -281,7 +327,11 @@
         buf      += size;
         buf_size -= size;
     }
-    return ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
+
+    s->coeff_partition_size[i] = buf_size;
+    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
+
+    return 0;
 }
 
 static void vp7_get_quants(VP8Context *s)
@@ -308,28 +358,28 @@
     VP56RangeCoder *c = &s->c;
     int i, base_qi;
 
-    int yac_qi     = vp8_rac_get_uint(c, 7);
-    int ydc_delta  = vp8_rac_get_sint(c, 4);
-    int y2dc_delta = vp8_rac_get_sint(c, 4);
-    int y2ac_delta = vp8_rac_get_sint(c, 4);
-    int uvdc_delta = vp8_rac_get_sint(c, 4);
-    int uvac_delta = vp8_rac_get_sint(c, 4);
+    s->quant.yac_qi     = vp8_rac_get_uint(c, 7);
+    s->quant.ydc_delta  = vp8_rac_get_sint(c, 4);
+    s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
+    s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
+    s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
+    s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
 
     for (i = 0; i < 4; i++) {
         if (s->segmentation.enabled) {
             base_qi = s->segmentation.base_quant[i];
             if (!s->segmentation.absolute_vals)
-                base_qi += yac_qi;
+                base_qi += s->quant.yac_qi;
         } else
-            base_qi = yac_qi;
+            base_qi = s->quant.yac_qi;
 
-        s->qmat[i].luma_qmul[0]    = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta,  7)];
+        s->qmat[i].luma_qmul[0]    = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta,  7)];
         s->qmat[i].luma_qmul[1]    = vp8_ac_qlookup[av_clip_uintp2(base_qi,              7)];
-        s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
+        s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
         /* 101581>>16 is equivalent to 155/100 */
-        s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
-        s->qmat[i].chroma_qmul[0]  = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
-        s->qmat[i].chroma_qmul[1]  = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
+        s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
+        s->qmat[i].chroma_qmul[0]  = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
+        s->qmat[i].chroma_qmul[1]  = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
 
         s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
         s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
@@ -442,9 +492,11 @@
 {
     int i, j;
     for (j = 0; j < height; j++) {
+        const uint8_t *src2 = src + j * src_linesize;
+        uint8_t *dst2 = dst + j * dst_linesize;
         for (i = 0; i < width; i++) {
-            uint8_t y = src[j * src_linesize + i];
-            dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
+            uint8_t y = src2[i];
+            dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
         }
     }
 }
@@ -455,6 +507,9 @@
     int beta  = (int8_t) vp8_rac_get_uint(c, 8);
     int ret;
 
+    if (c->end <= c->buffer && c->bits >= 0)
+        return AVERROR_INVALIDDATA;
+
     if (!s->keyframe && (alpha || beta)) {
         int width  = s->mb_width * 16;
         int height = s->mb_height * 16;
@@ -606,6 +661,8 @@
             s->fade_present = vp8_rac_get(c);
     }
 
+    if (c->end <= c->buffer && c->bits >= 0)
+        return AVERROR_INVALIDDATA;
     /* E. Fading information for previous frame */
     if (s->fade_present && vp8_rac_get(c)) {
         if ((ret = vp7_fade_frame(s ,c)) < 0)
@@ -661,6 +718,8 @@
     buf      += 3;
     buf_size -= 3;
 
+    s->header_partition_size = header_size;
+
     if (s->profile > 3)
         av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
 
@@ -726,9 +785,11 @@
     s->filter.level     = vp8_rac_get_uint(c, 6);
     s->filter.sharpness = vp8_rac_get_uint(c, 3);
 
-    if ((s->lf_delta.enabled = vp8_rac_get(c)))
-        if (vp8_rac_get(c))
+    if ((s->lf_delta.enabled = vp8_rac_get(c))) {
+        s->lf_delta.update = vp8_rac_get(c);
+        if (s->lf_delta.update)
             update_lf_deltas(s);
+    }
 
     if (setup_partitions(s, buf, buf_size)) {
         av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
@@ -768,6 +829,13 @@
         vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
     }
 
+    // Record the entropy coder state here so that hwaccels can use it.
+    s->c.code_word = vp56_rac_renorm(&s->c);
+    s->coder_state_at_header_end.input     = s->c.buffer - (-s->c.bits / 8);
+    s->coder_state_at_header_end.range     = s->c.high;
+    s->coder_state_at_header_end.value     = s->c.code_word >> 16;
+    s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
+
     return 0;
 }
 
@@ -2540,7 +2608,6 @@
     return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
 }
 
-
 static av_always_inline
 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                       AVPacket *avpkt, int is_vp7)
@@ -2550,8 +2617,6 @@
     enum AVDiscard skip_thresh;
     VP8Frame *av_uninit(curframe), *prev_frame;
 
-    av_assert0(avctx->pix_fmt == AV_PIX_FMT_YUVA420P || avctx->pix_fmt == AV_PIX_FMT_YUV420P);
-
     if (is_vp7)
         ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
     else
@@ -2560,6 +2625,17 @@
     if (ret < 0)
         goto err;
 
+    if (s->actually_webp) {
+        // avctx->pix_fmt already set in caller.
+    } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
+        s->pix_fmt = get_pixel_format(s);
+        if (s->pix_fmt < 0) {
+            ret = AVERROR(EINVAL);
+            goto err;
+        }
+        avctx->pix_fmt = s->pix_fmt;
+    }
+
     prev_frame = s->framep[VP56_FRAME_CURRENT];
 
     referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
@@ -2578,7 +2654,7 @@
 
     // release no longer referenced frames
     for (i = 0; i < 5; i++)
-        if (s->frames[i].tf.f->data[0] &&
+        if (s->frames[i].tf.f->buf[0] &&
             &s->frames[i] != prev_frame &&
             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN]   &&
@@ -2631,56 +2707,70 @@
 
     s->next_framep[VP56_FRAME_CURRENT] = curframe;
 
-    if (avctx->codec->update_thread_context)
-        ff_thread_finish_setup(avctx);
+    ff_thread_finish_setup(avctx);
 
-    s->linesize   = curframe->tf.f->linesize[0];
-    s->uvlinesize = curframe->tf.f->linesize[1];
+    if (avctx->hwaccel) {
+        ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
+        if (ret < 0)
+            goto err;
 
-    memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
-    /* Zero macroblock structures for top/top-left prediction
-     * from outside the frame. */
-    if (!s->mb_layout)
-        memset(s->macroblocks + s->mb_height * 2 - 1, 0,
-               (s->mb_width + 1) * sizeof(*s->macroblocks));
-    if (!s->mb_layout && s->keyframe)
-        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
+        ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
+        if (ret < 0)
+            goto err;
 
-    memset(s->ref_count, 0, sizeof(s->ref_count));
+        ret = avctx->hwaccel->end_frame(avctx);
+        if (ret < 0)
+            goto err;
 
-    if (s->mb_layout == 1) {
-        // Make sure the previous frame has read its segmentation map,
-        // if we re-use the same map.
-        if (prev_frame && s->segmentation.enabled &&
-            !s->segmentation.update_map)
-            ff_thread_await_progress(&prev_frame->tf, 1, 0);
-        if (is_vp7)
-            vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
+    } else {
+        s->linesize   = curframe->tf.f->linesize[0];
+        s->uvlinesize = curframe->tf.f->linesize[1];
+
+        memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
+        /* Zero macroblock structures for top/top-left prediction
+         * from outside the frame. */
+        if (!s->mb_layout)
+            memset(s->macroblocks + s->mb_height * 2 - 1, 0,
+                   (s->mb_width + 1) * sizeof(*s->macroblocks));
+        if (!s->mb_layout && s->keyframe)
+            memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
+
+        memset(s->ref_count, 0, sizeof(s->ref_count));
+
+        if (s->mb_layout == 1) {
+            // Make sure the previous frame has read its segmentation map,
+            // if we re-use the same map.
+            if (prev_frame && s->segmentation.enabled &&
+                !s->segmentation.update_map)
+                ff_thread_await_progress(&prev_frame->tf, 1, 0);
+            if (is_vp7)
+                vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
+            else
+                vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
+        }
+
+        if (avctx->active_thread_type == FF_THREAD_FRAME)
+            num_jobs = 1;
         else
-            vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
+            num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
+        s->num_jobs   = num_jobs;
+        s->curframe   = curframe;
+        s->prev_frame = prev_frame;
+        s->mv_bounds.mv_min.y   = -MARGIN;
+        s->mv_bounds.mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
+        for (i = 0; i < MAX_THREADS; i++) {
+            VP8ThreadData *td = &s->thread_data[i];
+            atomic_init(&td->thread_mb_pos, 0);
+            atomic_init(&td->wait_mb_pos, INT_MAX);
+        }
+        if (is_vp7)
+            avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
+                            num_jobs);
+        else
+            avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
+                            num_jobs);
     }
 
-    if (avctx->active_thread_type == FF_THREAD_FRAME)
-        num_jobs = 1;
-    else
-        num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
-    s->num_jobs   = num_jobs;
-    s->curframe   = curframe;
-    s->prev_frame = prev_frame;
-    s->mv_bounds.mv_min.y   = -MARGIN;
-    s->mv_bounds.mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
-    for (i = 0; i < MAX_THREADS; i++) {
-        VP8ThreadData *td = &s->thread_data[i];
-        atomic_init(&td->thread_mb_pos, 0);
-        atomic_init(&td->wait_mb_pos, INT_MAX);
-    }
-    if (is_vp7)
-        avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
-                        num_jobs);
-    else
-        avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
-                        num_jobs);
-
     ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
     memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
 
@@ -2750,6 +2840,7 @@
 
     s->avctx = avctx;
     s->vp7   = avctx->codec->id == AV_CODEC_ID_VP7;
+    s->pix_fmt = AV_PIX_FMT_NONE;
     avctx->pix_fmt = AV_PIX_FMT_YUV420P;
     avctx->internal->allocate_progress = 1;
 
@@ -2823,13 +2914,14 @@
         s->mb_height = s_src->mb_height;
     }
 
+    s->pix_fmt      = s_src->pix_fmt;
     s->prob[0]      = s_src->prob[!s_src->update_probabilities];
     s->segmentation = s_src->segmentation;
     s->lf_delta     = s_src->lf_delta;
     memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
 
     for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
-        if (s_src->frames[i].tf.f->data[0]) {
+        if (s_src->frames[i].tf.f->buf[0]) {
             int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
             if (ret < 0)
                 return ret;
@@ -2876,5 +2968,14 @@
     .flush                 = vp8_decode_flush,
     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
+    .hw_configs            = (const AVCodecHWConfigInternal*[]) {
+#if CONFIG_VP8_VAAPI_HWACCEL
+                               HWACCEL_VAAPI(vp8),
+#endif
+#if CONFIG_VP8_NVDEC_HWACCEL
+                               HWACCEL_NVDEC(vp8),
+#endif
+                               NULL
+                           },
 };
 #endif /* CONFIG_VP7_DECODER */

diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
index 8263997..70d21e3 100644
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h

@@ -138,12 +138,18 @@
 typedef struct VP8Frame {
     ThreadFrame tf;
     AVBufferRef *seg_map;
+
+    AVBufferRef *hwaccel_priv_buf;
+    void *hwaccel_picture_private;
 } VP8Frame;
 
 #define MAX_THREADS 8
 typedef struct VP8Context {
     VP8ThreadData *thread_data;
     AVCodecContext *avctx;
+    enum AVPixelFormat pix_fmt;
+    int actually_webp;
+
     VP8Frame *framep[4];
     VP8Frame *next_framep[4];
     VP8Frame *curframe;
@@ -172,6 +178,7 @@
         uint8_t enabled;
         uint8_t absolute_vals;
         uint8_t update_map;
+        uint8_t update_feature_data;
         int8_t base_quant[4];
         int8_t filter_level[4];     ///< base loop filter level
     } segmentation;
@@ -199,8 +206,19 @@
         int16_t chroma_qmul[2];
     } qmat[4];
 
+    // Raw quantisation values, which may be needed by hwaccel decode.
+    struct {
+        int yac_qi;
+        int ydc_delta;
+        int y2dc_delta;
+        int y2ac_delta;
+        int uvdc_delta;
+        int uvac_delta;
+    } quant;
+
     struct {
         uint8_t enabled;    ///< whether each mb can have a different strength based on mode/ref
+        uint8_t update;
 
         /**
          * filter strength adjustment for the following macroblock modes:
@@ -228,6 +246,20 @@
 
     VP56RangeCoder c;   ///< header context, includes mb modes and motion vectors
 
+    /* This contains the entropy coder state at the end of the header
+     * block, in the form specified by the standard.  For use by
+     * hwaccels, so that a hardware decoder has the information to
+     * start decoding at the macroblock layer.
+     */
+    struct {
+        const uint8_t *input;
+        uint32_t range;
+        uint32_t value;
+        int bit_count;
+    } coder_state_at_header_end;
+
+    int header_partition_size;
+
     /**
      * These are all of the updatable probabilities for binary decisions.
      * They are only implicitly reset on keyframes, making it quite likely
@@ -265,6 +297,7 @@
      */
     int num_coeff_partitions;
     VP56RangeCoder coeff_partition[8];
+    int coeff_partition_size[8];
     VideoDSPContext vdsp;
     VP8DSPContext vp8dsp;
     H264PredContext hpc;

diff --git a/libavcodec/vp8_parser.c b/libavcodec/vp8_parser.c
index 609f507..7ce35e7 100644
--- a/libavcodec/vp8_parser.c
+++ b/libavcodec/vp8_parser.c

@@ -28,6 +28,9 @@
     unsigned int frame_type;
     unsigned int profile;
 
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+
     if (buf_size < 3)
         return buf_size;
 
@@ -67,8 +70,6 @@
         s->coded_height = FFALIGN(height, 16);
     }
 
-    *poutbuf      = buf;
-    *poutbuf_size = buf_size;
     return buf_size;
 }
 

diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index f2cf194..b1178c9 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c

@@ -23,6 +23,7 @@
 
 #include "avcodec.h"
 #include "get_bits.h"
+#include "hwaccel.h"
 #include "internal.h"
 #include "profiles.h"
 #include "thread.h"
@@ -169,7 +170,10 @@
 
 static int update_size(AVCodecContext *avctx, int w, int h)
 {
-#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL * 2 + CONFIG_VP9_VAAPI_HWACCEL)
+#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
+                     CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
+                     CONFIG_VP9_NVDEC_HWACCEL + \
+                     CONFIG_VP9_VAAPI_HWACCEL)
     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
     VP9Context *s = avctx->priv_data;
     uint8_t *p;
@@ -184,6 +188,7 @@
 
         switch (s->pix_fmt) {
         case AV_PIX_FMT_YUV420P:
+        case AV_PIX_FMT_YUV420P10:
 #if CONFIG_VP9_DXVA2_HWACCEL
             *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
 #endif
@@ -191,12 +196,17 @@
             *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
             *fmtp++ = AV_PIX_FMT_D3D11;
 #endif
+#if CONFIG_VP9_NVDEC_HWACCEL
+            *fmtp++ = AV_PIX_FMT_CUDA;
+#endif
 #if CONFIG_VP9_VAAPI_HWACCEL
             *fmtp++ = AV_PIX_FMT_VAAPI;
 #endif
             break;
-        case AV_PIX_FMT_YUV420P10:
         case AV_PIX_FMT_YUV420P12:
+#if CONFIG_VP9_NVDEC_HWACCEL
+            *fmtp++ = AV_PIX_FMT_CUDA;
+#endif
 #if CONFIG_VP9_VAAPI_HWACCEL
             *fmtp++ = AV_PIX_FMT_VAAPI;
 #endif
@@ -1787,4 +1797,23 @@
     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
     .profiles              = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
+    .bsfs                  = "vp9_superframe_split",
+    .hw_configs            = (const AVCodecHWConfigInternal*[]) {
+#if CONFIG_VP9_DXVA2_HWACCEL
+                               HWACCEL_DXVA2(vp9),
+#endif
+#if CONFIG_VP9_D3D11VA_HWACCEL
+                               HWACCEL_D3D11VA(vp9),
+#endif
+#if CONFIG_VP9_D3D11VA2_HWACCEL
+                               HWACCEL_D3D11VA2(vp9),
+#endif
+#if CONFIG_VP9_NVDEC_HWACCEL
+                               HWACCEL_NVDEC(vp9),
+#endif
+#if CONFIG_VP9_VAAPI_HWACCEL
+                               HWACCEL_VAAPI(vp9),
+#endif
+                               NULL
+                           },
 };

diff --git a/libavcodec/vp9_metadata_bsf.c b/libavcodec/vp9_metadata_bsf.c
new file mode 100644
index 0000000..be010ed
--- /dev/null
+++ b/libavcodec/vp9_metadata_bsf.c

@@ -0,0 +1,162 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "bsf.h"
+#include "cbs.h"
+#include "cbs_vp9.h"
+
+typedef struct VP9MetadataContext {
+    const AVClass *class;
+
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment fragment;
+
+    int color_space;
+    int color_range;
+
+    int color_range_rgb_warned;
+} VP9MetadataContext;
+
+
+static int vp9_metadata_filter(AVBSFContext *bsf, AVPacket *out)
+{
+    VP9MetadataContext *ctx = bsf->priv_data;
+    AVPacket *in = NULL;
+    CodedBitstreamFragment *frag = &ctx->fragment;
+    int err, i;
+
+    err = ff_bsf_get_packet(bsf, &in);
+    if (err < 0)
+        return err;
+
+    err = ff_cbs_read_packet(ctx->cbc, frag, in);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to read packet.\n");
+        goto fail;
+    }
+
+    for (i = 0; i < frag->nb_units; i++) {
+        VP9RawFrame *frame = frag->units[i].content;
+        VP9RawFrameHeader *header = &frame->header;
+
+        if (ctx->color_space >= 0) {
+            header->color_space = ctx->color_space;
+        }
+        if (ctx->color_range >= 0) {
+            if (ctx->color_range == 0 &&
+                header->color_space == VP9_CS_RGB &&
+                !ctx->color_range_rgb_warned) {
+                av_log(bsf, AV_LOG_WARNING, "Warning: color_range cannot "
+                       "be set to limited in RGB streams.\n");
+                ctx->color_range_rgb_warned = 1;
+            } else {
+                header->color_range = ctx->color_range;
+            }
+        }
+    }
+
+    err = ff_cbs_write_packet(ctx->cbc, out, frag);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
+        goto fail;
+    }
+
+    err = av_packet_copy_props(out, in);
+    if (err < 0)
+        goto fail;
+
+    err = 0;
+fail:
+    ff_cbs_fragment_uninit(ctx->cbc, frag);
+
+    if (err < 0)
+        av_packet_unref(out);
+    av_packet_free(&in);
+
+    return err;
+}
+
+static int vp9_metadata_init(AVBSFContext *bsf)
+{
+    VP9MetadataContext *ctx = bsf->priv_data;
+
+    return ff_cbs_init(&ctx->cbc, AV_CODEC_ID_VP9, bsf);
+}
+
+static void vp9_metadata_close(AVBSFContext *bsf)
+{
+    VP9MetadataContext *ctx = bsf->priv_data;
+    ff_cbs_close(&ctx->cbc);
+}
+
+#define OFFSET(x) offsetof(VP9MetadataContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_BSF_PARAM)
+static const AVOption vp9_metadata_options[] = {
+    { "color_space", "Set colour space (section 7.2.2)",
+        OFFSET(color_space), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, VP9_CS_RGB, FLAGS, "cs" },
+    { "unknown",  "Unknown/unspecified",  0, AV_OPT_TYPE_CONST,
+        { .i64 = VP9_CS_UNKNOWN   }, .flags = FLAGS, .unit = "cs" },
+    { "bt601",    "ITU-R BT.601-7",       0, AV_OPT_TYPE_CONST,
+        { .i64 = VP9_CS_BT_601    }, .flags = FLAGS, .unit = "cs" },
+    { "bt709",    "ITU-R BT.709-6",       0, AV_OPT_TYPE_CONST,
+        { .i64 = VP9_CS_BT_709    }, .flags = FLAGS, .unit = "cs" },
+    { "smpte170", "SMPTE-170",            0, AV_OPT_TYPE_CONST,
+        { .i64 = VP9_CS_SMPTE_170 }, .flags = FLAGS, .unit = "cs" },
+    { "smpte240", "SMPTE-240",            0, AV_OPT_TYPE_CONST,
+        { .i64 = VP9_CS_SMPTE_240 }, .flags = FLAGS, .unit = "cs" },
+    { "bt2020",   "ITU-R BT.2020-2",      0, AV_OPT_TYPE_CONST,
+        { .i64 = VP9_CS_BT_2020   }, .flags = FLAGS, .unit = "cs" },
+    { "rgb",      "sRGB / IEC 61966-2-1", 0, AV_OPT_TYPE_CONST,
+        { .i64 = VP9_CS_RGB       }, .flags = FLAGS, .unit = "cs" },
+
+    { "color_range", "Set colour range (section 7.2.2)",
+        OFFSET(color_range), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 1, FLAGS, "cr" },
+    { "tv", "TV (limited) range", 0, AV_OPT_TYPE_CONST,
+        { .i64 = 0 }, .flags = FLAGS, .unit = "cr" },
+    { "pc", "PC (full) range",    0, AV_OPT_TYPE_CONST,
+        { .i64 = 1 }, .flags = FLAGS, .unit = "cr" },
+
+    { NULL }
+};
+
+static const AVClass vp9_metadata_class = {
+    .class_name = "vp9_metadata_bsf",
+    .item_name  = av_default_item_name,
+    .option     = vp9_metadata_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const enum AVCodecID vp9_metadata_codec_ids[] = {
+    AV_CODEC_ID_VP9, AV_CODEC_ID_NONE,
+};
+
+const AVBitStreamFilter ff_vp9_metadata_bsf = {
+    .name           = "vp9_metadata",
+    .priv_data_size = sizeof(VP9MetadataContext),
+    .priv_class     = &vp9_metadata_class,
+    .init           = &vp9_metadata_init,
+    .close          = &vp9_metadata_close,
+    .filter         = &vp9_metadata_filter,
+    .codec_ids      = vp9_metadata_codec_ids,
+};

diff --git a/libavcodec/vp9_parser.c b/libavcodec/vp9_parser.c
index 9900e7a..9531f34 100644
--- a/libavcodec/vp9_parser.c
+++ b/libavcodec/vp9_parser.c

@@ -25,21 +25,19 @@
 #include "libavcodec/get_bits.h"
 #include "parser.h"
 
-typedef struct VP9ParseContext {
-    int n_frames; // 1-8
-    int size[8];
-    int marker_size;
-    int64_t pts;
-} VP9ParseContext;
-
-static int parse_frame(AVCodecParserContext *ctx, const uint8_t *buf, int size)
+static int parse(AVCodecParserContext *ctx,
+                 AVCodecContext *avctx,
+                 const uint8_t **out_data, int *out_size,
+                 const uint8_t *data, int size)
 {
-    VP9ParseContext *s = ctx->priv_data;
     GetBitContext gb;
-    int res, profile, keyframe, invisible;
+    int res, profile, keyframe;
 
-    if ((res = init_get_bits8(&gb, buf, size)) < 0)
-        return res;
+    *out_data = data;
+    *out_size = size;
+
+    if ((res = init_get_bits8(&gb, data, size)) < 0)
+        return size; // parsers can't return errors
     get_bits(&gb, 2); // frame marker
     profile  = get_bits1(&gb);
     profile |= get_bits1(&gb) << 1;
@@ -47,10 +45,8 @@
 
     if (get_bits1(&gb)) {
         keyframe = 0;
-        invisible = 0;
     } else {
         keyframe  = !get_bits1(&gb);
-        invisible = !get_bits1(&gb);
     }
 
     if (!keyframe) {
@@ -61,113 +57,10 @@
         ctx->key_frame = 1;
     }
 
-    if (!invisible) {
-        if (ctx->pts == AV_NOPTS_VALUE)
-            ctx->pts = s->pts;
-        s->pts = AV_NOPTS_VALUE;
-    } else if (ctx->pts != AV_NOPTS_VALUE) {
-        s->pts = ctx->pts;
-        ctx->pts = AV_NOPTS_VALUE;
-    }
-
-    return 0;
-}
-
-static int parse(AVCodecParserContext *ctx,
-                 AVCodecContext *avctx,
-                 const uint8_t **out_data, int *out_size,
-                 const uint8_t *data, int size)
-{
-    VP9ParseContext *s = ctx->priv_data;
-    int full_size = size;
-    int marker;
-
-    if (size <= 0) {
-        *out_size = 0;
-        *out_data = data;
-
-        return 0;
-    }
-
-    if (s->n_frames > 0) {
-        int i;
-        int size_sum = 0;
-
-        for (i = 0; i < s->n_frames ;i++)
-            size_sum += s->size[i];
-        size_sum += s->marker_size;
-
-        if (size_sum != size) {
-            av_log(avctx, AV_LOG_ERROR, "Inconsistent input frame sizes %d %d\n",
-                   size_sum, size);
-            s->n_frames = 0;
-        }
-    }
-
-    if (s->n_frames > 0) {
-        *out_data = data;
-        *out_size = s->size[--s->n_frames];
-        parse_frame(ctx, *out_data, *out_size);
-
-        return s->n_frames > 0 ? *out_size : size /* i.e. include idx tail */;
-    }
-
-    marker = data[size - 1];
-    if ((marker & 0xe0) == 0xc0) {
-        int nbytes = 1 + ((marker >> 3) & 0x3);
-        int n_frames = 1 + (marker & 0x7), idx_sz = 2 + n_frames * nbytes;
-
-        if (size >= idx_sz && data[size - idx_sz] == marker) {
-            const uint8_t *idx = data + size + 1 - idx_sz;
-            int first = 1;
-
-            switch (nbytes) {
-#define case_n(a, rd) \
-            case a: \
-                while (n_frames--) { \
-                    unsigned sz = rd; \
-                    idx += a; \
-                    if (sz == 0 || sz > size) { \
-                        s->n_frames = 0; \
-                        *out_size = size; \
-                        *out_data = data; \
-                        av_log(avctx, AV_LOG_ERROR, \
-                               "Invalid superframe packet size: %u frame size: %d\n", \
-                               sz, size); \
-                        return full_size; \
-                    } \
-                    if (first) { \
-                        first = 0; \
-                        *out_data = data; \
-                        *out_size = sz; \
-                        s->n_frames = n_frames; \
-                    } else { \
-                        s->size[n_frames] = sz; \
-                    } \
-                    data += sz; \
-                    size -= sz; \
-                } \
-                s->marker_size = size; \
-                parse_frame(ctx, *out_data, *out_size); \
-                return s->n_frames > 0 ? *out_size : full_size
-
-                case_n(1, *idx);
-                case_n(2, AV_RL16(idx));
-                case_n(3, AV_RL24(idx));
-                case_n(4, AV_RL32(idx));
-            }
-        }
-    }
-
-    *out_data = data;
-    *out_size = size;
-    parse_frame(ctx, data, size);
-
     return size;
 }
 
 AVCodecParser ff_vp9_parser = {
     .codec_ids      = { AV_CODEC_ID_VP9 },
-    .priv_data_size = sizeof(VP9ParseContext),
     .parser_parse   = parse,
 };

diff --git a/libavcodec/vp9_superframe_bsf.c b/libavcodec/vp9_superframe_bsf.c
index b686adb..ea67507 100644
--- a/libavcodec/vp9_superframe_bsf.c
+++ b/libavcodec/vp9_superframe_bsf.c

@@ -27,20 +27,17 @@
 #define MAX_CACHE 8
 typedef struct VP9BSFContext {
     int n_cache;
-    struct CachedBuf {
-        uint8_t *data;
-        int size;
-    } cache[MAX_CACHE];
+    AVPacket *cache[MAX_CACHE];
 } VP9BSFContext;
 
-static void stats(const struct CachedBuf *in, int n_in,
+static void stats(AVPacket * const *in, int n_in,
                   unsigned *_max, unsigned *_sum)
 {
     int n;
     unsigned max = 0, sum = 0;
 
     for (n = 0; n < n_in; n++) {
-        unsigned sz = in[n].size;
+        unsigned sz = in[n]->size;
 
         if (sz > max)
             max = sz;
@@ -51,7 +48,7 @@
     *_sum = sum;
 }
 
-static int merge_superframe(const struct CachedBuf *in, int n_in, AVPacket *out)
+static int merge_superframe(AVPacket * const *in, int n_in, AVPacket *out)
 {
     unsigned max, sum, mag, marker, n, sz;
     uint8_t *ptr;
@@ -66,30 +63,32 @@
         return res;
     ptr = out->data;
     for (n = 0; n < n_in; n++) {
-        memcpy(ptr, in[n].data, in[n].size);
-        ptr += in[n].size;
+        memcpy(ptr, in[n]->data, in[n]->size);
+        ptr += in[n]->size;
     }
 
 #define wloop(mag, wr) \
-    for (n = 0; n < n_in; n++) { \
-        wr; \
-        ptr += mag + 1; \
-    }
+    do { \
+        for (n = 0; n < n_in; n++) { \
+            wr; \
+            ptr += mag + 1; \
+        } \
+    } while (0)
 
     // write superframe with marker 110[mag:2][nframes:3]
     *ptr++ = marker;
     switch (mag) {
     case 0:
-        wloop(mag, *ptr = in[n].size);
+        wloop(mag, *ptr = in[n]->size);
         break;
     case 1:
-        wloop(mag, AV_WL16(ptr, in[n].size));
+        wloop(mag, AV_WL16(ptr, in[n]->size));
         break;
     case 2:
-        wloop(mag, AV_WL24(ptr, in[n].size));
+        wloop(mag, AV_WL24(ptr, in[n]->size));
         break;
     case 3:
-        wloop(mag, AV_WL32(ptr, in[n].size));
+        wloop(mag, AV_WL32(ptr, in[n]->size));
         break;
     }
     *ptr++ = marker;
@@ -135,7 +134,7 @@
     if (uses_superframe_syntax && s->n_cache > 0) {
         av_log(ctx, AV_LOG_ERROR,
                "Mixing of superframe syntax and naked VP9 frames not supported");
-        res = AVERROR_INVALIDDATA;
+        res = AVERROR(ENOSYS);
         goto done;
     } else if ((!invisible || uses_superframe_syntax) && !s->n_cache) {
         // passthrough
@@ -148,33 +147,26 @@
         goto done;
     }
 
-    s->cache[s->n_cache].size = in->size;
-    if (invisible && !uses_superframe_syntax) {
-        s->cache[s->n_cache].data = av_malloc(in->size);
-        if (!s->cache[s->n_cache].data) {
-            res = AVERROR(ENOMEM);
-            goto done;
-        }
-        memcpy(s->cache[s->n_cache++].data, in->data, in->size);
+    av_packet_move_ref(s->cache[s->n_cache++], in);
+
+    if (invisible) {
         res = AVERROR(EAGAIN);
         goto done;
     }
     av_assert0(s->n_cache > 0);
 
-    s->cache[s->n_cache].data = in->data;
-
     // build superframe
-    if ((res = merge_superframe(s->cache, s->n_cache + 1, out)) < 0)
+    if ((res = merge_superframe(s->cache, s->n_cache, out)) < 0)
+        goto done;
+
+    res = av_packet_copy_props(out, s->cache[s->n_cache - 1]);
+    if (res < 0)
         goto done;
 
     for (n = 0; n < s->n_cache; n++)
-        av_freep(&s->cache[n].data);
+        av_packet_unref(s->cache[n]);
     s->n_cache = 0;
 
-    res = av_packet_copy_props(out, in);
-    if (res < 0)
-        goto done;
-
 done:
     if (res < 0)
         av_packet_unref(out);
@@ -182,14 +174,40 @@
     return res;
 }
 
+static int vp9_superframe_init(AVBSFContext *ctx)
+{
+    VP9BSFContext *s = ctx->priv_data;
+    int n;
+
+    // alloc cache packets
+    for (n = 0; n < MAX_CACHE; n++) {
+        s->cache[n] = av_packet_alloc();
+        if (!s->cache[n])
+            return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static void vp9_superframe_flush(AVBSFContext *ctx)
+{
+    VP9BSFContext *s = ctx->priv_data;
+    int n;
+
+    // unref cached data
+    for (n = 0; n < s->n_cache; n++)
+        av_packet_unref(s->cache[n]);
+    s->n_cache = 0;
+}
+
 static void vp9_superframe_close(AVBSFContext *ctx)
 {
     VP9BSFContext *s = ctx->priv_data;
     int n;
 
     // free cached data
-    for (n = 0; n < s->n_cache; n++)
-        av_freep(&s->cache[n].data);
+    for (n = 0; n < MAX_CACHE; n++)
+        av_packet_free(&s->cache[n]);
 }
 
 static const enum AVCodecID codec_ids[] = {
@@ -200,6 +218,8 @@
     .name           = "vp9_superframe",
     .priv_data_size = sizeof(VP9BSFContext),
     .filter         = vp9_superframe_filter,
+    .init           = vp9_superframe_init,
+    .flush          = vp9_superframe_flush,
     .close          = vp9_superframe_close,
     .codec_ids      = codec_ids,
 };

diff --git a/libavcodec/vp9_superframe_split_bsf.c b/libavcodec/vp9_superframe_split_bsf.c
index 0d2523e..13e85c3 100644
--- a/libavcodec/vp9_superframe_split_bsf.c
+++ b/libavcodec/vp9_superframe_split_bsf.c

@@ -43,10 +43,10 @@
     VP9SFSplitContext *s = ctx->priv_data;
     AVPacket *in;
     int i, j, ret, marker;
-    int is_superframe = !!s->buffer_pkt;
+    int is_superframe = !!s->buffer_pkt->data;
 
-    if (!s->buffer_pkt) {
-        ret = ff_bsf_get_packet(ctx, &s->buffer_pkt);
+    if (!s->buffer_pkt->data) {
+        ret = ff_bsf_get_packet_ref(ctx, s->buffer_pkt);
         if (ret < 0)
             return ret;
         in = s->buffer_pkt;
@@ -101,7 +101,7 @@
         s->next_frame++;
 
         if (s->next_frame >= s->nb_frames)
-            av_packet_free(&s->buffer_pkt);
+            av_packet_unref(s->buffer_pkt);
 
         ret = init_get_bits8(&gb, out->data, out->size);
         if (ret < 0)
@@ -122,15 +122,33 @@
 
     } else {
         av_packet_move_ref(out, s->buffer_pkt);
-        av_packet_free(&s->buffer_pkt);
     }
 
     return 0;
 fail:
-    av_packet_free(&s->buffer_pkt);
+    if (ret < 0)
+        av_packet_unref(out);
+    av_packet_unref(s->buffer_pkt);
     return ret;
 }
 
+static int vp9_superframe_split_init(AVBSFContext *ctx)
+{
+    VP9SFSplitContext *s = ctx->priv_data;
+
+    s->buffer_pkt = av_packet_alloc();
+    if (!s->buffer_pkt)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void vp9_superframe_split_flush(AVBSFContext *ctx)
+{
+    VP9SFSplitContext *s = ctx->priv_data;
+    av_packet_unref(s->buffer_pkt);
+}
+
 static void vp9_superframe_split_uninit(AVBSFContext *ctx)
 {
     VP9SFSplitContext *s = ctx->priv_data;
@@ -140,6 +158,8 @@
 const AVBitStreamFilter ff_vp9_superframe_split_bsf = {
     .name = "vp9_superframe_split",
     .priv_data_size = sizeof(VP9SFSplitContext),
+    .init           = vp9_superframe_split_init,
+    .flush          = vp9_superframe_split_flush,
     .close          = vp9_superframe_split_uninit,
     .filter         = vp9_superframe_split_filter,
     .codec_ids      = (const enum AVCodecID []){ AV_CODEC_ID_VP9, AV_CODEC_ID_NONE },

diff --git a/libavcodec/vda_vt_internal.h b/libavcodec/vt_internal.h
similarity index 79%
rename from libavcodec/vda_vt_internal.h
rename to libavcodec/vt_internal.h
index 326a60a..fb64735 100644
--- a/libavcodec/vda_vt_internal.h
+++ b/libavcodec/vt_internal.h

@@ -16,17 +16,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_VDA_VT_INTERNAL_H
-#define AVCODEC_VDA_VT_INTERNAL_H
-
-void ff_vda_output_callback(void *vda_hw_ctx,
-                            CFDictionaryRef user_info,
-                            OSStatus status,
-                            uint32_t infoFlags,
-                            CVImageBufferRef image_buffer);
-
-int ff_vda_default_init(AVCodecContext *avctx);
-void ff_vda_default_free(AVCodecContext *avctx);
+#ifndef AVCODEC_VT_INTERNAL_H
+#define AVCODEC_VT_INTERNAL_H
 
 typedef struct VTContext {
     // The current bitstream buffer.
@@ -47,11 +38,14 @@
     // Non-NULL if the new hwaccel API is used. This is only a separate struct
     // to ease compatibility with the old API.
     struct AVVideotoolboxContext *vt_ctx;
+
+    // Current H264 parameters (used to trigger decoder restart on SPS changes).
+    uint8_t                     sps[3];
+    bool                        reconfig_needed;
 } VTContext;
 
 int ff_videotoolbox_alloc_frame(AVCodecContext *avctx, AVFrame *frame);
 int ff_videotoolbox_uninit(AVCodecContext *avctx);
-int ff_videotoolbox_buffer_create(VTContext *vtctx, AVFrame *frame);
 int ff_videotoolbox_h264_start_frame(AVCodecContext *avctx,
                                      const uint8_t *buffer,
                                      uint32_t size);
@@ -60,4 +54,5 @@
                                       uint32_t size);
 CFDataRef ff_videotoolbox_avcc_extradata_create(AVCodecContext *avctx);
 CFDataRef ff_videotoolbox_hvcc_extradata_create(AVCodecContext *avctx);
-#endif /* AVCODEC_VDA_VT_INTERNAL_H */
+
+#endif /* AVCODEC_VT_INTERNAL_H */

diff --git a/libavcodec/wavpack.c b/libavcodec/wavpack.c
index 0e40b29..8306ec0 100644
--- a/libavcodec/wavpack.c
+++ b/libavcodec/wavpack.c

@@ -85,7 +85,7 @@
 
 #define LEVEL_DECAY(a)  (((a) + 0x80) >> 8)
 
-static av_always_inline int get_tail(GetBitContext *gb, int k)
+static av_always_inline unsigned get_tail(GetBitContext *gb, int k)
 {
     int p, e, res;
 
@@ -452,7 +452,7 @@
                 if (type != AV_SAMPLE_FMT_S16P)
                     R2 = R + ((s->decorr[i].weightB * (int64_t)L2 + 512) >> 10);
                 else
-                    R2 = R + ((int)(s->decorr[i].weightB * (unsigned)L2 + 512) >> 10);
+                    R2 = R + (unsigned)((int)(s->decorr[i].weightB * (unsigned)L2 + 512) >> 10);
                 UPDATE_WEIGHT_CLIP(s->decorr[i].weightB, s->decorr[i].delta, L2, R);
                 R                        = R2;
                 s->decorr[i].samplesA[0] = R;

diff --git a/libavcodec/wavpack.h b/libavcodec/wavpack.h
index 3ae601f..6caad03 100644
--- a/libavcodec/wavpack.h
+++ b/libavcodec/wavpack.h

@@ -99,8 +99,8 @@
 
 // macros for manipulating median values
 #define GET_MED(n) ((c->median[n] >> 4) + 1)
-#define DEC_MED(n) c->median[n] -= ((c->median[n] + (128 >> (n)) - 2) / (128 >> (n))) * 2U
-#define INC_MED(n) c->median[n] += ((c->median[n] + (128 >> (n))    ) / (128 >> (n))) * 5U
+#define DEC_MED(n) c->median[n] -= ((int)(c->median[n] + (128U >> (n)) - 2) / (128 >> (n))) * 2U
+#define INC_MED(n) c->median[n] += ((int)(c->median[n] + (128U >> (n))    ) / (128 >> (n))) * 5U
 
 // macros for applying weight
 #define UPDATE_WEIGHT_CLIP(weight, delta, samples, in) \

diff --git a/libavcodec/wcmv.c b/libavcodec/wcmv.c
new file mode 100644
index 0000000..ebd5ef6
--- /dev/null
+++ b/libavcodec/wcmv.c

@@ -0,0 +1,264 @@
+/*
+ * WinCAM Motion Video decoder
+ *
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libavutil/imgutils.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+
+#include <zlib.h>
+
+typedef struct WCMVContext {
+    int         bpp;
+    z_stream    zstream;
+    AVFrame    *prev_frame;
+    uint8_t     block_data[65536*8];
+} WCMVContext;
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *got_frame,
+                        AVPacket *avpkt)
+{
+    WCMVContext *s = avctx->priv_data;
+    AVFrame *frame = data;
+    int skip, blocks, zret, ret, intra = 0, bpp = s->bpp;
+    GetByteContext gb;
+    uint8_t *dst;
+
+    ret = inflateReset(&s->zstream);
+    if (ret != Z_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", ret);
+        return AVERROR_EXTERNAL;
+    }
+
+    bytestream2_init(&gb, avpkt->data, avpkt->size);
+
+    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
+        return ret;
+
+    if (s->prev_frame->data[0]) {
+        ret = av_frame_copy(frame, s->prev_frame);
+        if (ret < 0)
+            return ret;
+    } else {
+        ptrdiff_t linesize[4] = { frame->linesize[0], 0, 0, 0 };
+        av_image_fill_black(frame->data, linesize, avctx->pix_fmt, 0,
+                            avctx->width, avctx->height);
+    }
+
+    blocks = bytestream2_get_le16(&gb);
+    if (blocks > 5) {
+        GetByteContext bgb;
+        int x = 0, size;
+
+        if (blocks * 8 >= 0xFFFF)
+            size = bytestream2_get_le24(&gb);
+        else if (blocks * 8 >= 0xFF)
+            size = bytestream2_get_le16(&gb);
+        else
+            size = bytestream2_get_byte(&gb);
+
+        skip = bytestream2_tell(&gb);
+        if (size > avpkt->size - skip)
+            return AVERROR_INVALIDDATA;
+
+        s->zstream.next_in  = avpkt->data + skip;
+        s->zstream.avail_in = size;
+        s->zstream.next_out  = s->block_data;
+        s->zstream.avail_out = sizeof(s->block_data);
+
+        zret = inflate(&s->zstream, Z_FINISH);
+        if (zret != Z_STREAM_END) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Inflate failed with return code: %d.\n", zret);
+            return AVERROR_INVALIDDATA;
+        }
+
+        ret = inflateReset(&s->zstream);
+        if (ret != Z_OK) {
+            av_log(avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", ret);
+            return AVERROR_EXTERNAL;
+        }
+
+        bytestream2_skip(&gb, size);
+        bytestream2_init(&bgb, s->block_data, blocks * 8);
+
+        for (int i = 0; i < blocks; i++) {
+            int w, h;
+
+            bytestream2_skip(&bgb, 4);
+            w = bytestream2_get_le16(&bgb);
+            h = bytestream2_get_le16(&bgb);
+            if (x + bpp * (int64_t)w * h > INT_MAX)
+                return AVERROR_INVALIDDATA;
+            x += bpp * w * h;
+        }
+
+        if (x >= 0xFFFF)
+            bytestream2_skip(&gb, 3);
+        else if (x >= 0xFF)
+            bytestream2_skip(&gb, 2);
+        else
+            bytestream2_skip(&gb, 1);
+
+        skip = bytestream2_tell(&gb);
+
+        s->zstream.next_in  = avpkt->data + skip;
+        s->zstream.avail_in = avpkt->size - skip;
+
+        bytestream2_init(&gb, s->block_data, blocks * 8);
+    } else if (blocks) {
+        int x = 0;
+
+        bytestream2_seek(&gb, 2, SEEK_SET);
+
+        for (int i = 0; i < blocks; i++) {
+            int w, h;
+
+            bytestream2_skip(&gb, 4);
+            w = bytestream2_get_le16(&gb);
+            h = bytestream2_get_le16(&gb);
+            if (x + bpp * (int64_t)w * h > INT_MAX)
+                return AVERROR_INVALIDDATA;
+            x += bpp * w * h;
+        }
+
+        if (x >= 0xFFFF)
+            bytestream2_skip(&gb, 3);
+        else if (x >= 0xFF)
+            bytestream2_skip(&gb, 2);
+        else
+            bytestream2_skip(&gb, 1);
+
+        skip = bytestream2_tell(&gb);
+
+        s->zstream.next_in  = avpkt->data + skip;
+        s->zstream.avail_in = avpkt->size - skip;
+
+        bytestream2_seek(&gb, 2, SEEK_SET);
+    }
+
+    for (int block = 0; block < blocks; block++) {
+        int x, y, w, h;
+
+        x = bytestream2_get_le16(&gb);
+        y = bytestream2_get_le16(&gb);
+        w = bytestream2_get_le16(&gb);
+        h = bytestream2_get_le16(&gb);
+
+        if (blocks == 1 && x == 0 && y == 0 && w == avctx->width && h == avctx->height)
+            intra = 1;
+
+        if (x + w > avctx->width || y + h > avctx->height)
+            return AVERROR_INVALIDDATA;
+
+        if (w > avctx->width || h > avctx->height)
+            return AVERROR_INVALIDDATA;
+
+        dst = frame->data[0] + (avctx->height - y - 1) * frame->linesize[0] + x * bpp;
+        for (int i = 0; i < h; i++) {
+            s->zstream.next_out  = dst;
+            s->zstream.avail_out = w * bpp;
+
+            zret = inflate(&s->zstream, Z_SYNC_FLUSH);
+            if (zret != Z_OK && zret != Z_STREAM_END) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "Inflate failed with return code: %d.\n", zret);
+                return AVERROR_INVALIDDATA;
+            }
+
+            dst -= frame->linesize[0];
+        }
+    }
+
+    frame->key_frame = intra;
+    frame->pict_type = intra ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+
+    av_frame_unref(s->prev_frame);
+    if ((ret = av_frame_ref(s->prev_frame, frame)) < 0)
+        return ret;
+
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    WCMVContext *s = avctx->priv_data;
+    int zret;
+
+    switch (avctx->bits_per_coded_sample) {
+    case 16: avctx->pix_fmt = AV_PIX_FMT_RGB565LE; break;
+    case 24: avctx->pix_fmt = AV_PIX_FMT_BGR24;  break;
+    case 32: avctx->pix_fmt = AV_PIX_FMT_BGRA;   break;
+    default: av_log(avctx, AV_LOG_ERROR, "Unsupported bits_per_coded_sample: %d\n",
+                    avctx->bits_per_coded_sample);
+             return AVERROR_PATCHWELCOME;
+    }
+
+    s->bpp = avctx->bits_per_coded_sample >> 3;
+
+    s->zstream.zalloc = Z_NULL;
+    s->zstream.zfree = Z_NULL;
+    s->zstream.opaque = Z_NULL;
+    zret = inflateInit(&s->zstream);
+    if (zret != Z_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Inflate init error: %d\n", zret);
+        return AVERROR_EXTERNAL;
+    }
+
+    s->prev_frame = av_frame_alloc();
+    if (!s->prev_frame)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static av_cold int decode_close(AVCodecContext *avctx)
+{
+    WCMVContext *s = avctx->priv_data;
+
+    av_frame_free(&s->prev_frame);
+    inflateEnd(&s->zstream);
+
+    return 0;
+}
+
+AVCodec ff_wcmv_decoder = {
+    .name             = "wcmv",
+    .long_name        = NULL_IF_CONFIG_SMALL("WinCAM Motion Video"),
+    .type             = AVMEDIA_TYPE_VIDEO,
+    .id               = AV_CODEC_ID_WCMV,
+    .priv_data_size   = sizeof(WCMVContext),
+    .init             = decode_init,
+    .close            = decode_close,
+    .decode           = decode_frame,
+    .capabilities     = AV_CODEC_CAP_DR1,
+    .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
+                        FF_CODEC_CAP_INIT_CLEANUP,
+};

diff --git a/libavcodec/webp.c b/libavcodec/webp.c
index efa864a..077bb06 100644
--- a/libavcodec/webp.c
+++ b/libavcodec/webp.c

@@ -1335,6 +1335,7 @@
     if (!s->initialized) {
         ff_vp8_decode_init(avctx);
         s->initialized = 1;
+        s->v.actually_webp = 1;
     }
     avctx->pix_fmt = s->has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P;
     s->lossless = 0;
@@ -1504,7 +1505,7 @@
             }
 
             bytestream2_seek(&exif_gb, ifd_offset, SEEK_SET);
-            if (avpriv_exif_decode_ifd(avctx, &exif_gb, le, 0, &exif_metadata) < 0) {
+            if (ff_exif_decode_ifd(avctx, &exif_gb, le, 0, &exif_metadata) < 0) {
                 av_log(avctx, AV_LOG_ERROR, "error decoding Exif data\n");
                 goto exif_end;
             }

diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index 133a3e9..eb1db61 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c

@@ -1148,6 +1148,7 @@
     if (len <= 0 || buflen > s->max_frame_size) {
         avpriv_request_sample(s->avctx, "Too small input buffer");
         s->packet_loss = 1;
+        s->num_saved_bits = 0;
         return;
     }
 
@@ -1255,7 +1256,9 @@
             (frame_size = show_bits(gb, s->log2_frame_size)) &&
             frame_size <= remaining_bits(s, gb)) {
             save_bits(s, gb, frame_size, 0);
-            s->packet_done = !decode_frame(s);
+
+            if (!s->packet_loss)
+                s->packet_done = !decode_frame(s);
         } else if (!s->len_prefix
                    && s->num_saved_bits > get_bits_count(&s->gb)) {
             /* when the frames do not have a length prefix, we don't know the

diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c
index 77a49c9..9439bfa 100644
--- a/libavcodec/wmaprodec.c
+++ b/libavcodec/wmaprodec.c

@@ -107,8 +107,8 @@
 #define MAX_BANDS      29                                    ///< max number of scale factor bands
 #define MAX_FRAMESIZE  32768                                 ///< maximum compressed frame size
 #define XMA_MAX_STREAMS         8
-#define XMA_MAX_CHANNELS        8
 #define XMA_MAX_CHANNELS_STREAM 2
+#define XMA_MAX_CHANNELS        (XMA_MAX_STREAMS * XMA_MAX_CHANNELS_STREAM)
 
 #define WMAPRO_BLOCK_MIN_BITS  6                                           ///< log2 of min block size
 #define WMAPRO_BLOCK_MAX_BITS 13                                           ///< log2 of max block size

diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 2ec4499..444e303 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c

@@ -30,6 +30,7 @@
 #include "libavutil/channel_layout.h"
 #include "libavutil/float_dsp.h"
 #include "libavutil/mem.h"
+#include "libavutil/thread.h"
 #include "avcodec.h"
 #include "internal.h"
 #include "get_bits.h"
@@ -310,7 +311,7 @@
     return 0;
 }
 
-static av_cold void wmavoice_init_static_data(AVCodec *codec)
+static av_cold void wmavoice_init_static_data(void)
 {
     static const uint8_t bits[] = {
          2,  2,  2,  4,  4,  4,
@@ -365,9 +366,12 @@
  */
 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
 {
+    static AVOnce init_static_once = AV_ONCE_INIT;
     int n, flags, pitch_range, lsp16_flag;
     WMAVoiceContext *s = ctx->priv_data;
 
+    ff_thread_once(&init_static_once, wmavoice_init_static_data);
+
     /**
      * Extradata layout:
      * - byte  0-18: WMAPro-in-WMAVoice extradata (see wmaprodec.c),
@@ -1756,6 +1760,10 @@
             stabilize_lsps(lsps[n], s->lsps);
     }
 
+    /* synth_superframe can run multiple times per packet
+     * free potential previous frame */
+    av_frame_unref(frame);
+
     /* get output buffer */
     frame->nb_samples = MAX_SFRAMESIZE;
     if ((res = ff_get_buffer(ctx, frame, 0)) < 0)
@@ -1987,7 +1995,6 @@
     .id               = AV_CODEC_ID_WMAVOICE,
     .priv_data_size   = sizeof(WMAVoiceContext),
     .init             = wmavoice_decode_init,
-    .init_static_data = wmavoice_init_static_data,
     .close            = wmavoice_decode_end,
     .decode           = wmavoice_decode_packet,
     .capabilities     = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,

diff --git a/libavcodec/wmv2dec.c b/libavcodec/wmv2dec.c
index 225e30a..4f97d92 100644
--- a/libavcodec/wmv2dec.c
+++ b/libavcodec/wmv2dec.c

@@ -141,6 +141,21 @@
     if (s->qscale <= 0)
         return AVERROR_INVALIDDATA;
 
+    if (s->pict_type != AV_PICTURE_TYPE_I && show_bits(&s->gb, 1)) {
+        GetBitContext gb = s->gb;
+        int skip_type = get_bits(&gb, 2);
+        int run = skip_type == SKIP_TYPE_COL ? s->mb_width : s->mb_height;
+
+        while (run > 0) {
+            int block = FFMIN(run, 25);
+            if (get_bits(&gb, block) + 1 != 1<<block)
+                break;
+            run -= block;
+        }
+        if (!run)
+            return FRAME_SKIPPED;
+    }
+
     return 0;
 }
 
@@ -474,10 +489,6 @@
     Wmv2Context *const w = avctx->priv_data;
     int ret;
 
-#if FF_API_EMU_EDGE
-    avctx->flags |= CODEC_FLAG_EMU_EDGE;
-#endif
-
     if ((ret = ff_msmpeg4_decode_init(avctx)) < 0)
         return ret;
 

diff --git a/libavcodec/wrapped_avframe.c b/libavcodec/wrapped_avframe.c
index 5f88a66..85ff32d 100644
--- a/libavcodec/wrapped_avframe.c
+++ b/libavcodec/wrapped_avframe.c

@@ -25,6 +25,7 @@
  */
 
 #include "avcodec.h"
+#include "decode.h"
 #include "internal.h"
 
 #include "libavutil/internal.h"
@@ -98,6 +99,12 @@
 
     av_frame_move_ref(out, in);
 
+    err = ff_attach_decode_data(out);
+    if (err < 0) {
+        av_frame_unref(out);
+        return err;
+    }
+
     *got_frame = 1;
     return 0;
 }

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index a805cd3..2350c8b 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile

@@ -63,6 +63,7 @@
 OBJS-$(CONFIG_PRORES_DECODER)          += x86/proresdsp_init.o
 OBJS-$(CONFIG_PRORES_LGPL_DECODER)     += x86/proresdsp_init.o
 OBJS-$(CONFIG_RV40_DECODER)            += x86/rv40dsp_init.o
+OBJS-$(CONFIG_SBC_ENCODER)             += x86/sbcdsp_init.o
 OBJS-$(CONFIG_SVQ1_ENCODER)            += x86/svq1enc_init.o
 OBJS-$(CONFIG_TAK_DECODER)             += x86/takdsp_init.o
 OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp_init.o
@@ -172,6 +173,7 @@
 X86ASM-OBJS-$(CONFIG_PRORES_DECODER)   += x86/proresdsp.o
 X86ASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
 X86ASM-OBJS-$(CONFIG_RV40_DECODER)     += x86/rv40dsp.o
+X86ASM-OBJS-$(CONFIG_SBC_ENCODER)      += x86/sbcdsp.o
 X86ASM-OBJS-$(CONFIG_SVQ1_ENCODER)     += x86/svq1enc.o
 X86ASM-OBJS-$(CONFIG_TAK_DECODER)      += x86/takdsp.o
 X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER)   += x86/mlpdsp.o

diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index 3973808..de395e5 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm

@@ -62,7 +62,7 @@
 ; %1 = number of xmm registers used
 ; %2 = number of inline load/process/store loops per asm loop
 ; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop
-; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2)
+; %4 = CLIPD function takes min/max as float instead of int (SSE2 version)
 ; %5 = suffix
 %macro VECTOR_CLIP_INT32 4-5
 cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
@@ -118,14 +118,11 @@
 %endmacro
 
 INIT_MMX mmx
-%define CLIPD CLIPD_MMX
 VECTOR_CLIP_INT32 0, 1, 0, 0
 INIT_XMM sse2
 VECTOR_CLIP_INT32 6, 1, 0, 0, _int
-%define CLIPD CLIPD_SSE2
 VECTOR_CLIP_INT32 6, 2, 0, 1
 INIT_XMM sse4
-%define CLIPD CLIPD_SSE41
 %ifdef m8
 VECTOR_CLIP_INT32 11, 1, 1, 0
 %else

diff --git a/libavcodec/x86/bswapdsp.asm b/libavcodec/x86/bswapdsp.asm
index 56d8083..31c6c48 100644
--- a/libavcodec/x86/bswapdsp.asm
+++ b/libavcodec/x86/bswapdsp.asm

@@ -35,14 +35,18 @@
     mov      r3d, r2d
     sar      r2d, 3
     jz       .left4_%1
+%if cpuflag(avx2)
+    sar      r2d, 1
+    jz       .left8_%1
+%endif
 .loop8_%1:
     mov%1    m0, [r1 +  0]
-    mov%1    m1, [r1 + 16]
-%if cpuflag(ssse3)
+    mov%1    m1, [r1 + mmsize]
+%if cpuflag(ssse3)||cpuflag(avx2)
     pshufb   m0, m2
     pshufb   m1, m2
     mov%1    [r0 +  0], m0
-    mov%1    [r0 + 16], m1
+    mov%1    [r0 + mmsize], m1
 %else
     pshuflw  m0, m0, 10110001b
     pshuflw  m1, m1, 10110001b
@@ -59,18 +63,29 @@
     mov%1    [r0 +  0], m2
     mov%1    [r0 + 16], m3
 %endif
-    add      r0, 32
-    add      r1, 32
+    add      r0, mmsize*2
+    add      r1, mmsize*2
     dec      r2d
     jnz      .loop8_%1
+%if cpuflag(avx2)
+.left8_%1:
+    mov      r2d, r3d
+    test     r3d, 8
+    jz       .left4_%1
+    mov%1    m0, [r1]
+    pshufb   m0, m2
+    mov%1    [r0 +  0], m0
+    add r1, mmsize
+    add r0, mmsize
+%endif
 .left4_%1:
     mov      r2d, r3d
     test     r3d, 4
     jz       .left
-    mov%1    m0, [r1]
+    mov%1    xm0, [r1]
 %if cpuflag(ssse3)
-    pshufb   m0, m2
-    mov%1    [r0], m0
+    pshufb   xm0, xm2
+    mov%1    [r0], xm0
 %else
     pshuflw  m0, m0, 10110001b
     pshufhw  m0, m0, 10110001b
@@ -86,16 +101,16 @@
 
 ; void ff_bswap_buf(uint32_t *dst, const uint32_t *src, int w);
 %macro BSWAP32_BUF 0
-%if cpuflag(ssse3)
+%if cpuflag(ssse3)||cpuflag(avx2)
 cglobal bswap32_buf, 3,4,3
     mov      r3, r1
-    mova     m2, [pb_bswap32]
+    VBROADCASTI128  m2, [pb_bswap32]
 %else
 cglobal bswap32_buf, 3,4,5
     mov      r3, r1
 %endif
     or       r3, r0
-    test     r3, 15
+    test     r3, mmsize - 1
     jz       .start_align
     BSWAP_LOOPS  u
     jmp      .left
@@ -105,9 +120,9 @@
 %if cpuflag(ssse3)
     test     r2d, 2
     jz       .left1
-    movq     m0, [r1]
-    pshufb   m0, m2
-    movq     [r0], m0
+    movq     xm0, [r1]
+    pshufb   xm0, xm2
+    movq     [r0], xm0
     add      r1, 8
     add      r0, 8
 .left1:
@@ -137,3 +152,8 @@
 
 INIT_XMM ssse3
 BSWAP32_BUF
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+BSWAP32_BUF
+%endif

diff --git a/libavcodec/x86/bswapdsp_init.c b/libavcodec/x86/bswapdsp_init.c
index c042e56..877bab1 100644
--- a/libavcodec/x86/bswapdsp_init.c
+++ b/libavcodec/x86/bswapdsp_init.c

@@ -25,6 +25,7 @@
 
 void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w);
 void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w);
+void ff_bswap32_buf_avx2(uint32_t *dst, const uint32_t *src, int w);
 
 av_cold void ff_bswapdsp_init_x86(BswapDSPContext *c)
 {
@@ -34,4 +35,6 @@
         c->bswap_buf = ff_bswap32_buf_sse2;
     if (EXTERNAL_SSSE3(cpu_flags))
         c->bswap_buf = ff_bswap32_buf_ssse3;
+    if (EXTERNAL_AVX2_FAST(cpu_flags))
+        c->bswap_buf = ff_bswap32_buf_avx2;
 }

diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index 11002ee..4bfb78c 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c

@@ -26,23 +26,23 @@
                                                     0x0001000100010001ULL, 0x0001000100010001ULL };
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2)    = { 0x0002000200020002ULL, 0x0002000200020002ULL,
                                                     0x0002000200020002ULL, 0x0002000200020002ULL };
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_3)    = { 0x0003000300030003ULL, 0x0003000300030003ULL };
-DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_4)    = { 0x0004000400040004ULL, 0x0004000400040004ULL,
+DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_3)    = { 0x0003000300030003ULL, 0x0003000300030003ULL };
+DECLARE_ASM_ALIGNED(32, const ymm_reg,  ff_pw_4)    = { 0x0004000400040004ULL, 0x0004000400040004ULL,
                                                     0x0004000400040004ULL, 0x0004000400040004ULL };
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_5)    = { 0x0005000500050005ULL, 0x0005000500050005ULL };
+DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_5)    = { 0x0005000500050005ULL, 0x0005000500050005ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_8)    = { 0x0008000800080008ULL, 0x0008000800080008ULL };
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_9)    = { 0x0009000900090009ULL, 0x0009000900090009ULL };
+DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_9)    = { 0x0009000900090009ULL, 0x0009000900090009ULL };
 DECLARE_ALIGNED(8,  const uint64_t, ff_pw_15)   =   0x000F000F000F000FULL;
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_16)   = { 0x0010001000100010ULL, 0x0010001000100010ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_17)   = { 0x0011001100110011ULL, 0x0011001100110011ULL };
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_18)   = { 0x0012001200120012ULL, 0x0012001200120012ULL };
+DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_18)   = { 0x0012001200120012ULL, 0x0012001200120012ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_20)   = { 0x0014001400140014ULL, 0x0014001400140014ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_32)   = { 0x0020002000200020ULL, 0x0020002000200020ULL };
-DECLARE_ALIGNED(8,  const uint64_t, ff_pw_42)   =   0x002A002A002A002AULL;
-DECLARE_ALIGNED(8,  const uint64_t, ff_pw_53)   =   0x0035003500350035ULL;
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_64)   = { 0x0040004000400040ULL, 0x0040004000400040ULL };
-DECLARE_ALIGNED(8,  const uint64_t, ff_pw_96)   =   0x0060006000600060ULL;
-DECLARE_ALIGNED(8,  const uint64_t, ff_pw_128)  =   0x0080008000800080ULL;
+DECLARE_ASM_ALIGNED(8,  const uint64_t, ff_pw_42)   =   0x002A002A002A002AULL;
+DECLARE_ASM_ALIGNED(8,  const uint64_t, ff_pw_53)   =   0x0035003500350035ULL;
+DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_64)   = { 0x0040004000400040ULL, 0x0040004000400040ULL };
+DECLARE_ASM_ALIGNED(8,  const uint64_t, ff_pw_96)   =   0x0060006000600060ULL;
+DECLARE_ASM_ALIGNED(8,  const uint64_t, ff_pw_128)  =   0x0080008000800080ULL;
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_255)  = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
                                                     0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL };
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_256)  = { 0x0100010001000100ULL, 0x0100010001000100ULL,
@@ -74,7 +74,8 @@
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_3)    = { 0x0303030303030303ULL, 0x0303030303030303ULL,
                                                     0x0303030303030303ULL, 0x0303030303030303ULL };
 DECLARE_ALIGNED(32, const xmm_reg,  ff_pb_15)   = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL };
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_80)   = { 0x8080808080808080ULL, 0x8080808080808080ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_80)   = { 0x8080808080808080ULL, 0x8080808080808080ULL,
+                                                    0x8080808080808080ULL, 0x8080808080808080ULL };
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_FE)   = { 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL,
                                                     0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL };
 DECLARE_ALIGNED(8,  const uint64_t, ff_pb_FC)   =   0xFCFCFCFCFCFCFCFCULL;

diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h
index bbb0ef8..85da38b 100644
--- a/libavcodec/x86/constants.h
+++ b/libavcodec/x86/constants.h

@@ -57,7 +57,7 @@
 extern const ymm_reg  ff_pb_1;
 extern const ymm_reg  ff_pb_2;
 extern const ymm_reg  ff_pb_3;
-extern const xmm_reg  ff_pb_80;
+extern const ymm_reg  ff_pb_80;
 extern const ymm_reg  ff_pb_FE;
 extern const uint64_t ff_pb_FC;
 

diff --git a/libavcodec/x86/dct32.asm b/libavcodec/x86/dct32.asm
index 4e657b5..21e2f21 100644
--- a/libavcodec/x86/dct32.asm
+++ b/libavcodec/x86/dct32.asm

@@ -23,7 +23,8 @@
 
 SECTION_RODATA 32
 
-align 32
+ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
+
 ps_cos_vec: dd   0.500603,  0.505471,  0.515447,  0.531043
             dd   0.553104,  0.582935,  0.622504,  0.674808
             dd -10.190008, -3.407609, -2.057781, -1.484165
@@ -38,9 +39,6 @@
             dd   1.000000,  0.707107,  1.000000, -0.707107
             dd   0.707107,  0.707107,  0.707107,  0.707107
 
-align 32
-ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
-
 %macro BUTTERFLY 4
     subps  %4, %1, %2
     addps  %2, %2, %1

diff --git a/libavcodec/x86/exrdsp.asm b/libavcodec/x86/exrdsp.asm
index 23c9397..3bf240c 100644
--- a/libavcodec/x86/exrdsp.asm
+++ b/libavcodec/x86/exrdsp.asm

@@ -73,11 +73,7 @@
 
 %macro PREDICTOR 0
 cglobal predictor, 2,2,5, src, size
-%if mmsize == 32
-    vbroadcasti128   m0, [pb_80]
-%else
-    mova            xm0, [pb_80]
-%endif
+    mova             m0, [pb_80]
     mova            xm1, [pb_15]
     mova            xm2, xm0
     add            srcq, sizeq

diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm
index cdbfd66..a671e8f 100644
--- a/libavcodec/x86/fft.asm
+++ b/libavcodec/x86/fft.asm

@@ -191,6 +191,23 @@
     addps    %2, %2, %5       ; {i0,i1,i2,i3}
 %endmacro
 
+%macro INTERL 5
+%if cpuflag(avx)
+    vunpckhps      %3, %2, %1
+    vunpcklps      %2, %2, %1
+    vextractf128   %4(%5), %2, 0
+    vextractf128  %4 %+ H(%5), %3, 0
+    vextractf128   %4(%5 + 1), %2, 1
+    vextractf128  %4 %+ H(%5 + 1), %3, 1
+%elif cpuflag(sse) || cpuflag(3dnow)
+    mova     %3, %2
+    unpcklps %2, %1
+    unpckhps %3, %1
+    mova  %4(%5), %2
+    mova  %4(%5+1), %3
+%endif
+%endmacro
+
 ; scheduled for cpu-bound sizes
 %macro PASS_SMALL 3 ; (to load m4-m7), wre, wim
 IF%1 mova    m4, Z(4)
@@ -541,17 +558,6 @@
 INIT_YMM avx
 
 %if HAVE_AVX_EXTERNAL
-%macro INTERL_AVX 5
-    vunpckhps      %3, %2, %1
-    vunpcklps      %2, %2, %1
-    vextractf128   %4(%5), %2, 0
-    vextractf128  %4 %+ H(%5), %3, 0
-    vextractf128   %4(%5 + 1), %2, 1
-    vextractf128  %4 %+ H(%5 + 1), %3, 1
-%endmacro
-
-%define INTERL INTERL_AVX
-
 DECL_PASS pass_avx, PASS_BIG 1
 DECL_PASS pass_interleave_avx, PASS_BIG 0
 
@@ -566,16 +572,6 @@
 
 INIT_XMM sse
 
-%macro INTERL_SSE 5
-    mova     %3, %2
-    unpcklps %2, %1
-    unpckhps %3, %1
-    mova  %4(%5), %2
-    mova  %4(%5+1), %3
-%endmacro
-
-%define INTERL INTERL_SSE
-
 DECL_PASS pass_sse, PASS_BIG 1
 DECL_PASS pass_interleave_sse, PASS_BIG 0
 
@@ -861,16 +857,30 @@
 %endmacro
 
 %macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5
+%if cpuflag(sse)
     mulps      m6, %3, [%5+%1]
     mulps      m7, %2, [%5+%1]
     mulps      %2, %2, [%6+%1]
     mulps      %3, %3, [%6+%1]
     subps      %2, %2, m6
     addps      %3, %3, m7
+%elif cpuflag(3dnow)
+    mova       m6, [%1+%2*2]
+    mova       %3, [%1+%2*2+8]
+    mova       %4, m6
+    mova       m7, %3
+    pfmul      m6, [%5+%2]
+    pfmul      %3, [%6+%2]
+    pfmul      %4, [%6+%2]
+    pfmul      m7, [%5+%2]
+    pfsub      %3, m6
+    pfadd      %4, m7
+%endif
 %endmacro
 
-%macro POSROTATESHUF_AVX 5 ;j, k, z+n8, tcos+n8, tsin+n8
+%macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8
 .post:
+%if cpuflag(avx)
     vmovaps      ymm1,   [%3+%1*2]
     vmovaps      ymm0,   [%3+%1*2+0x20]
     vmovaps      ymm3,   [%3+%2*2]
@@ -899,10 +909,7 @@
     sub      %2,   0x20
     add      %1,   0x20
     jl       .post
-%endmacro
-
-%macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8
-.post:
+%elif cpuflag(sse)
     movaps   xmm1, [%3+%1*2]
     movaps   xmm0, [%3+%1*2+0x10]
     CMUL     %1,   xmm0, xmm1, %3, %4, %5
@@ -924,25 +931,9 @@
     sub      %2,   0x10
     add      %1,   0x10
     jl       .post
-%endmacro
-
-%macro CMUL_3DNOW 6
-    mova       m6, [%1+%2*2]
-    mova       %3, [%1+%2*2+8]
-    mova       %4, m6
-    mova       m7, %3
-    pfmul      m6, [%5+%2]
-    pfmul      %3, [%6+%2]
-    pfmul      %4, [%6+%2]
-    pfmul      m7, [%5+%2]
-    pfsub      %3, m6
-    pfadd      %4, m7
-%endmacro
-
-%macro POSROTATESHUF_3DNOW 5 ;j, k, z+n8, tcos+n8, tsin+n8
-.post:
-    CMUL_3DNOW %3, %1, m0, m1, %4, %5
-    CMUL_3DNOW %3, %2, m2, m3, %4, %5
+%elif cpuflag(3dnow)
+    CMUL  %3, %1, m0, m1, %4, %5
+    CMUL  %3, %2, m2, m3, %4, %5
     movd  [%3+%1*2+ 0], m0
     movd  [%3+%2*2+12], m1
     movd  [%3+%2*2+ 0], m2
@@ -958,9 +949,10 @@
     sub        %2, 8
     add        %1, 8
     jl         .post
+%endif
 %endmacro
 
-%macro DECL_IMDCT 1
+%macro DECL_IMDCT 0
 cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *input
 %if ARCH_X86_64
 %define rrevtab r7
@@ -1066,7 +1058,7 @@
     neg  r0
     mov  r1, -mmsize
     sub  r1, r0
-    %1 r0, r1, r6, rtcos, rtsin
+    POSROTATESHUF r0, r1, r6, rtcos, rtsin
 %if ARCH_X86_64 == 0
     add esp, 12
 %endif
@@ -1076,18 +1068,18 @@
     RET
 %endmacro
 
-DECL_IMDCT POSROTATESHUF
+DECL_IMDCT
 
 %if ARCH_X86_32
 INIT_MMX 3dnow
-DECL_IMDCT POSROTATESHUF_3DNOW
+DECL_IMDCT
 
 INIT_MMX 3dnowext
-DECL_IMDCT POSROTATESHUF_3DNOW
+DECL_IMDCT
 %endif
 
 INIT_YMM avx
 
 %if HAVE_AVX_EXTERNAL
-DECL_IMDCT POSROTATESHUF_AVX
+DECL_IMDCT
 %endif

diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index ea91e1a..c54f9f1 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm

@@ -995,7 +995,30 @@
     SWAP %1, %4, %3
 %endmacro
 
-%macro DEQUANT_MMX 3
+%macro DEQUANT 1-3
+%if cpuflag(sse2)
+    movd      xmm4, t3d
+    movq      xmm5, [pw_1]
+    pshufd    xmm4, xmm4, 0
+    movq2dq   xmm0, m0
+    movq2dq   xmm1, m1
+    movq2dq   xmm2, m2
+    movq2dq   xmm3, m3
+    punpcklwd xmm0, xmm5
+    punpcklwd xmm1, xmm5
+    punpcklwd xmm2, xmm5
+    punpcklwd xmm3, xmm5
+    pmaddwd   xmm0, xmm4
+    pmaddwd   xmm1, xmm4
+    pmaddwd   xmm2, xmm4
+    pmaddwd   xmm3, xmm4
+    psrad     xmm0, %1
+    psrad     xmm1, %1
+    psrad     xmm2, %1
+    psrad     xmm3, %1
+    packssdw  xmm0, xmm1
+    packssdw  xmm2, xmm3
+%else
     mova        m7, [pw_1]
     mova        m4, %1
     punpcklwd   %1, m7
@@ -1015,6 +1038,7 @@
     psrad       m5, %3
     packssdw    %1, m4
     packssdw    %2, m5
+%endif
 %endmacro
 
 %macro STORE_WORDS 5-9
@@ -1053,35 +1077,15 @@
 
 %macro DEQUANT_STORE 1
 %if cpuflag(sse2)
-    movd      xmm4, t3d
-    movq      xmm5, [pw_1]
-    pshufd    xmm4, xmm4, 0
-    movq2dq   xmm0, m0
-    movq2dq   xmm1, m1
-    movq2dq   xmm2, m2
-    movq2dq   xmm3, m3
-    punpcklwd xmm0, xmm5
-    punpcklwd xmm1, xmm5
-    punpcklwd xmm2, xmm5
-    punpcklwd xmm3, xmm5
-    pmaddwd   xmm0, xmm4
-    pmaddwd   xmm1, xmm4
-    pmaddwd   xmm2, xmm4
-    pmaddwd   xmm3, xmm4
-    psrad     xmm0, %1
-    psrad     xmm1, %1
-    psrad     xmm2, %1
-    psrad     xmm3, %1
-    packssdw  xmm0, xmm1
-    packssdw  xmm2, xmm3
+    DEQUANT     %1
     STORE_WORDS xmm0,  0,  1,  4,  5,  2,  3,  6,  7
     STORE_WORDS xmm2,  8,  9, 12, 13, 10, 11, 14, 15
 %else
-    DEQUANT_MMX m0, m1, %1
+    DEQUANT     m0, m1, %1
     STORE_WORDS m0,  0,  1,  4,  5
     STORE_WORDS m1,  2,  3,  6,  7
 
-    DEQUANT_MMX m2, m3, %1
+    DEQUANT     m2, m3, %1
     STORE_WORDS m2,  8,  9, 12, 13
     STORE_WORDS m3, 10, 11, 14, 15
 %endif
@@ -1140,7 +1144,11 @@
 INIT_MMX sse2
 IDCT_DC_DEQUANT 7
 
-; %unmacro STORE_DIFFx2 8 ; remove macro from x86util.asm but yasm doesn't have this yet
+%ifdef __NASM_VER__
+%if __NASM_MAJOR__ >= 2 && __NASM_MINOR__ >= 4
+%unmacro STORE_DIFFx2 8 ; remove macro from x86util.asm but yasm doesn't have this yet
+%endif
+%endif
 %macro STORE_DIFFx2 8 ; add1, add2, reg1, reg2, zero, shift, source, stride
     movd       %3, [%7]
     movd       %4, [%7+%8]

diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm
index 888a28a..756adfe 100644
--- a/libavcodec/x86/hevc_sao.asm
+++ b/libavcodec/x86/hevc_sao.asm

@@ -198,7 +198,7 @@
 ;******************************************************************************
 
 %define MAX_PB_SIZE  64
-%define PADDING_SIZE 32 ; AV_INPUT_BUFFER_PADDING_SIZE
+%define PADDING_SIZE 64 ; AV_INPUT_BUFFER_PADDING_SIZE
 %define EDGE_SRCSTRIDE 2 * MAX_PB_SIZE + PADDING_SIZE
 
 %macro HEVC_SAO_EDGE_FILTER_INIT 0

diff --git a/libavcodec/x86/hevc_sao_10bit.asm b/libavcodec/x86/hevc_sao_10bit.asm
index f81e2d5..b30583d 100644
--- a/libavcodec/x86/hevc_sao_10bit.asm
+++ b/libavcodec/x86/hevc_sao_10bit.asm

@@ -190,7 +190,7 @@
 ;******************************************************************************
 
 %define MAX_PB_SIZE  64
-%define PADDING_SIZE 32 ; AV_INPUT_BUFFER_PADDING_SIZE
+%define PADDING_SIZE 64 ; AV_INPUT_BUFFER_PADDING_SIZE
 %define EDGE_SRCSTRIDE 2 * MAX_PB_SIZE + PADDING_SIZE
 
 %macro PMINUW 4

diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm
index 0d8cae3..a1231f1 100644
--- a/libavcodec/x86/huffyuvdsp.asm
+++ b/libavcodec/x86/huffyuvdsp.asm

@@ -24,77 +24,39 @@
 
 SECTION .text
 
+%include "libavcodec/x86/huffyuvdsp_template.asm"
 
-%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
-    movd    m4, maskd
-    SPLATW  m4, m4
-    add     wd, wd
-    test    wq, 2*mmsize - 1
-    jz %%.tomainloop
-    push  tmpq
-%%.wordloop:
-    sub     wq, 2
-%ifidn %2, add
-    mov   tmpw, [srcq+wq]
-    add   tmpw, [dstq+wq]
-%else
-    mov   tmpw, [src1q+wq]
-    sub   tmpw, [src2q+wq]
-%endif
-    and   tmpw, maskw
-    mov     [dstq+wq], tmpw
-    test    wq, 2*mmsize - 1
-    jnz %%.wordloop
-    pop   tmpq
-%%.tomainloop:
-%ifidn %2, add
-    add     srcq, wq
-%else
-    add     src1q, wq
-    add     src2q, wq
-%endif
-    add     dstq, wq
-    neg     wq
-    jz      %%.end
-%%.loop:
-%ifidn %2, add
-    mov%1   m0, [srcq+wq]
-    mov%1   m1, [dstq+wq]
-    mov%1   m2, [srcq+wq+mmsize]
-    mov%1   m3, [dstq+wq+mmsize]
-%else
-    mov%1   m0, [src1q+wq]
-    mov%1   m1, [src2q+wq]
-    mov%1   m2, [src1q+wq+mmsize]
-    mov%1   m3, [src2q+wq+mmsize]
-%endif
-    p%2w    m0, m1
-    p%2w    m2, m3
-    pand    m0, m4
-    pand    m2, m4
-    mov%1   [dstq+wq]       , m0
-    mov%1   [dstq+wq+mmsize], m2
-    add     wq, 2*mmsize
-    jl %%.loop
-%%.end:
-    RET
-%endmacro
+;------------------------------------------------------------------------------
+; void (*add_int16)(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
+;------------------------------------------------------------------------------
 
-%if ARCH_X86_32
-INIT_MMX mmx
+%macro ADD_INT16 0
 cglobal add_int16, 4,4,5, dst, src, mask, w, tmp
-    INT16_LOOP a, add
-%endif
-
-INIT_XMM sse2
-cglobal add_int16, 4,4,5, dst, src, mask, w, tmp
+%if mmsize > 8
     test srcq, mmsize-1
     jnz .unaligned
     test dstq, mmsize-1
     jnz .unaligned
+%endif
     INT16_LOOP a, add
+%if mmsize > 8
 .unaligned:
     INT16_LOOP u, add
+%endif
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX mmx
+ADD_INT16
+%endif
+
+INIT_XMM sse2
+ADD_INT16
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+ADD_INT16
+%endif
 
 ; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src,
 ;                               intptr_t w, uint8_t *left)

diff --git a/libavcodec/x86/huffyuvdsp_init.c b/libavcodec/x86/huffyuvdsp_init.c
index 26cf621..eb10de3 100644
--- a/libavcodec/x86/huffyuvdsp_init.c
+++ b/libavcodec/x86/huffyuvdsp_init.c

@@ -28,6 +28,8 @@
 
 void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
 void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
+void ff_add_int16_avx2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
+
 void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src,
                                      intptr_t w, uint8_t *left);
 void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src,
@@ -52,4 +54,8 @@
         c->add_int16 = ff_add_int16_sse2;
         c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_sse2;
     }
+
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        c->add_int16 = ff_add_int16_avx2;
+    }
 }

diff --git a/libavcodec/x86/huffyuvdsp_template.asm b/libavcodec/x86/huffyuvdsp_template.asm
new file mode 100644
index 0000000..89721f4
--- /dev/null
+++ b/libavcodec/x86/huffyuvdsp_template.asm

@@ -0,0 +1,76 @@
+;******************************************************************************
+;* SIMD-optimized HuffYUV functions
+;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2014 Christophe Gisquet
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
+    movd    xm4, maskd
+    SPLATW  m4, xm4
+    add     wd, wd
+    test    wq, 2*mmsize - 1
+    jz %%.tomainloop
+    push  tmpq
+%%.wordloop:
+    sub     wq, 2
+%ifidn %2, add
+    mov   tmpw, [srcq+wq]
+    add   tmpw, [dstq+wq]
+%else
+    mov   tmpw, [src1q+wq]
+    sub   tmpw, [src2q+wq]
+%endif
+    and   tmpw, maskw
+    mov     [dstq+wq], tmpw
+    test    wq, 2*mmsize - 1
+    jnz %%.wordloop
+    pop   tmpq
+%%.tomainloop:
+%ifidn %2, add
+    add     srcq, wq
+%else
+    add     src1q, wq
+    add     src2q, wq
+%endif
+    add     dstq, wq
+    neg     wq
+    jz      %%.end
+%%.loop:
+%ifidn %2, add
+    mov%1   m0, [srcq+wq]
+    mov%1   m1, [dstq+wq]
+    mov%1   m2, [srcq+wq+mmsize]
+    mov%1   m3, [dstq+wq+mmsize]
+%else
+    mov%1   m0, [src1q+wq]
+    mov%1   m1, [src2q+wq]
+    mov%1   m2, [src1q+wq+mmsize]
+    mov%1   m3, [src2q+wq+mmsize]
+%endif
+    p%2w    m0, m1
+    p%2w    m2, m3
+    pand    m0, m4
+    pand    m2, m4
+    mov%1   [dstq+wq]       , m0
+    mov%1   [dstq+wq+mmsize], m2
+    add     wq, 2*mmsize
+    jl %%.loop
+%%.end:
+    RET
+%endmacro

diff --git a/libavcodec/x86/huffyuvencdsp.asm b/libavcodec/x86/huffyuvencdsp.asm
index eeef81a..d994fd0 100644
--- a/libavcodec/x86/huffyuvencdsp.asm
+++ b/libavcodec/x86/huffyuvencdsp.asm

@@ -27,80 +27,42 @@
 
 SECTION .text
 
+%include "libavcodec/x86/huffyuvdsp_template.asm"
+
+;------------------------------------------------------------------------------
 ; void ff_diff_int16(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
 ;                    unsigned mask, int w);
-%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
-    movd    m4, maskd
-    SPLATW  m4, m4
-    add     wd, wd
-    test    wq, 2*mmsize - 1
-    jz %%.tomainloop
-    push  tmpq
-%%.wordloop:
-    sub     wq, 2
-%ifidn %2, add
-    mov   tmpw, [srcq+wq]
-    add   tmpw, [dstq+wq]
-%else
-    mov   tmpw, [src1q+wq]
-    sub   tmpw, [src2q+wq]
-%endif
-    and   tmpw, maskw
-    mov     [dstq+wq], tmpw
-    test    wq, 2*mmsize - 1
-    jnz %%.wordloop
-    pop   tmpq
-%%.tomainloop:
-%ifidn %2, add
-    add     srcq, wq
-%else
-    add     src1q, wq
-    add     src2q, wq
-%endif
-    add     dstq, wq
-    neg     wq
-    jz      %%.end
-%%.loop:
-%ifidn %2, add
-    mov%1   m0, [srcq+wq]
-    mov%1   m1, [dstq+wq]
-    mov%1   m2, [srcq+wq+mmsize]
-    mov%1   m3, [dstq+wq+mmsize]
-%else
-    mov%1   m0, [src1q+wq]
-    mov%1   m1, [src2q+wq]
-    mov%1   m2, [src1q+wq+mmsize]
-    mov%1   m3, [src2q+wq+mmsize]
-%endif
-    p%2w    m0, m1
-    p%2w    m2, m3
-    pand    m0, m4
-    pand    m2, m4
-    mov%1   [dstq+wq]       , m0
-    mov%1   [dstq+wq+mmsize], m2
-    add     wq, 2*mmsize
-    jl %%.loop
-%%.end:
-    RET
-%endmacro
+;------------------------------------------------------------------------------
 
-%if ARCH_X86_32
-INIT_MMX mmx
+%macro DIFF_INT16 0
 cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
-    INT16_LOOP a, sub
-%endif
-
-INIT_XMM sse2
-cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
+%if mmsize > 8
     test src1q, mmsize-1
     jnz .unaligned
     test src2q, mmsize-1
     jnz .unaligned
     test dstq, mmsize-1
     jnz .unaligned
+%endif
     INT16_LOOP a, sub
+%if mmsize > 8
 .unaligned:
     INT16_LOOP u, sub
+%endif
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX mmx
+DIFF_INT16
+%endif
+
+INIT_XMM sse2
+DIFF_INT16
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+DIFF_INT16
+%endif
 
 INIT_MMX mmxext
 cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top

diff --git a/libavcodec/x86/huffyuvencdsp_init.c b/libavcodec/x86/huffyuvencdsp_init.c
index f66bc8c..6c6e068 100644
--- a/libavcodec/x86/huffyuvencdsp_init.c
+++ b/libavcodec/x86/huffyuvencdsp_init.c

@@ -32,6 +32,8 @@
                         unsigned mask, int w);
 void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2,
                         unsigned mask, int w);
+void ff_diff_int16_avx2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2,
+                        unsigned mask, int w);
 void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2,
                                           unsigned mask, int w, int *left, int *left_top);
 
@@ -51,4 +53,8 @@
     if (EXTERNAL_SSE2(cpu_flags)) {
         c->diff_int16 = ff_diff_int16_sse2;
     }
+
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        c->diff_int16 = ff_diff_int16_avx2;
+    }
 }

diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c
index 162560d..9103b92 100644
--- a/libavcodec/x86/idctdsp_init.c
+++ b/libavcodec/x86/idctdsp_init.c

@@ -123,6 +123,7 @@
         }
 
         if (avctx->bits_per_raw_sample == 10 &&
+            avctx->codec_id != AV_CODEC_ID_MPEG4 &&
             (avctx->idct_algo == FF_IDCT_AUTO ||
              avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
              avctx->idct_algo == FF_IDCT_SIMPLE)) {

diff --git a/libavcodec/x86/imdct36.asm b/libavcodec/x86/imdct36.asm
index 960eabd..b386ab9 100644
--- a/libavcodec/x86/imdct36.asm
+++ b/libavcodec/x86/imdct36.asm

@@ -23,7 +23,6 @@
 
 SECTION_RODATA
 
-align 16
 ps_mask:  dd 0, ~0, ~0, ~0
 ps_mask2: dd 0, ~0,  0, ~0
 ps_mask3: dd 0,  0,  0, ~0

diff --git a/libavcodec/x86/jpeg2000dsp.asm b/libavcodec/x86/jpeg2000dsp.asm
index 56b5fbd..61dfdd4 100644
--- a/libavcodec/x86/jpeg2000dsp.asm
+++ b/libavcodec/x86/jpeg2000dsp.asm

@@ -74,6 +74,19 @@
     movaps   m1, [src1q+csizeq]
     movaps   m2, [src2q+csizeq]
 
+%if cpuflag(fma4) || cpuflag(fma3)
+%if cpuflag(fma4)
+    fnmaddps  m5, m1, ICT1, m0
+    fmaddps   m4, m2, ICT0, m0
+%else ; fma3
+    movaps    m5, m1
+    movaps    m4, m2
+    fnmaddps  m5, m5, ICT1, m0
+    fmaddps   m4, m4, ICT0, m0
+%endif
+    fmaddps   m0, m1, ICT3, m0
+    fnmaddps  m5, m2, ICT2, m5
+%else ; non FMA
 %if cpuflag(avx)
     mulps    m5, m1, ICT1
     mulps    m4, m2, ICT0
@@ -93,6 +106,7 @@
     addps    m4, m4, m0
     addps    m0, m0, m1
     subps    m5, m5, m2
+%endif
 
     movaps   [src0q+csizeq], m4
     movaps   [src2q+csizeq], m0
@@ -106,6 +120,12 @@
 ICT_FLOAT 10
 INIT_YMM avx
 ICT_FLOAT 9
+%if HAVE_FMA4_EXTERNAL
+INIT_XMM fma4
+ICT_FLOAT 9
+%endif
+INIT_YMM fma3
+ICT_FLOAT 9
 
 ;***************************************************************************
 ; ff_rct_int_<opt>(int32_t *src0, int32_t *src1, int32_t *src2, int csize)

diff --git a/libavcodec/x86/jpeg2000dsp_init.c b/libavcodec/x86/jpeg2000dsp_init.c
index baa8138..7310a1d 100644
--- a/libavcodec/x86/jpeg2000dsp_init.c
+++ b/libavcodec/x86/jpeg2000dsp_init.c

@@ -26,6 +26,8 @@
 
 void ff_ict_float_sse(void *src0, void *src1, void *src2, int csize);
 void ff_ict_float_avx(void *src0, void *src1, void *src2, int csize);
+void ff_ict_float_fma3(void *src0, void *src1, void *src2, int csize);
+void ff_ict_float_fma4(void *src0, void *src1, void *src2, int csize);
 void ff_rct_int_sse2 (void *src0, void *src1, void *src2, int csize);
 void ff_rct_int_avx2 (void *src0, void *src1, void *src2, int csize);
 
@@ -44,6 +46,14 @@
         c->mct_decode[FF_DWT97] = ff_ict_float_avx;
     }
 
+    if (EXTERNAL_FMA4(cpu_flags)) {
+        c->mct_decode[FF_DWT97] = ff_ict_float_fma4;
+    }
+
+    if (EXTERNAL_FMA3_FAST(cpu_flags)) {
+        c->mct_decode[FF_DWT97] = ff_ict_float_fma3;
+    }
+
     if (EXTERNAL_AVX2_FAST(cpu_flags)) {
         c->mct_decode[FF_DWT53] = ff_rct_int_avx2;
     }

diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm
index 443fe02..0a1b709 100644
--- a/libavcodec/x86/lossless_videodsp.asm
+++ b/libavcodec/x86/lossless_videodsp.asm

@@ -2,6 +2,7 @@
 ;* SIMD lossless video DSP utils
 ;* Copyright (c) 2008 Loren Merritt
 ;* Copyright (c) 2014 Michael Niedermayer
+;* Copyright (c) 2017 Jokyo Images
 ;*
 ;* This file is part of FFmpeg.
 ;*
@@ -36,9 +37,11 @@
 
 SECTION .text
 
+;------------------------------------------------------------------------------
 ; void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
 ;                                const uint8_t *diff, int w,
 ;                                int *left, int *left_top)
+;------------------------------------------------------------------------------
 %macro MEDIAN_PRED 0
 cglobal add_median_pred, 6,6,8, dst, top, diff, w, left, left_top
     movu    m0, [topq]
@@ -112,44 +115,59 @@
     add     dstq, wq
     neg     wq
 %%.loop:
+    pshufb  xm0, xm5
 %if %2
     mova    m1, [srcq+wq]
 %else
     movu    m1, [srcq+wq]
 %endif
-    mova    m2, m1
-    psllw   m1, 8
+    psllw   m2, m1, 8
     paddb   m1, m2
-    mova    m2, m1
-    pshufb  m1, m3
+    pshufb  m2, m1, m3
     paddb   m1, m2
-    pshufb  m0, m5
-    mova    m2, m1
-    pshufb  m1, m4
+    pshufb  m2, m1, m4
     paddb   m1, m2
-%if mmsize == 16
-    mova    m2, m1
-    pshufb  m1, m6
+%if mmsize >= 16
+    pshufb  m2, m1, m6
     paddb   m1, m2
 %endif
-    paddb   m0, m1
+    paddb   xm0, xm1
 %if %1
-    mova    [dstq+wq], m0
+    mova    [dstq+wq], xm0
 %else
-    movq    [dstq+wq], m0
-    movhps  [dstq+wq+8], m0
+    movq    [dstq+wq], xm0
+    movhps  [dstq+wq+8], xm0
+%endif
+
+%if mmsize == 32
+    vextracti128    xm2, m1, 1 ; get second lane of the ymm
+    pshufb          xm0, xm5   ; set alls val to last val of the first lane
+    paddb           xm0, xm2
+;store val
+%if %1
+    mova    [dstq+wq+16], xm0
+%else;
+    movq    [dstq+wq+16], xm0
+    movhps  [dstq+wq+16+8], xm0
+%endif
 %endif
     add     wq, mmsize
     jl %%.loop
+%if mmsize == 32
+    movzx   eax, byte [dstq - 1]
+%else;
     mov     eax, mmsize-1
     sub     eax, wd
     movd    m1, eax
     pshufb  m0, m1
     movd    eax, m0
+%endif
     RET
 %endmacro
 
+;------------------------------------------------------------------------------
 ; int ff_add_left_pred(uint8_t *dst, const uint8_t *src, int w, int left)
+;------------------------------------------------------------------------------
 INIT_MMX ssse3
 cglobal add_left_pred, 3,3,7, dst, src, w, left
 .skip_prologue:
@@ -160,24 +178,36 @@
     psllq   m0, 56
     ADD_LEFT_LOOP 1, 1
 
-INIT_XMM ssse3
+%macro ADD_LEFT_PRED_UNALIGNED 0
 cglobal add_left_pred_unaligned, 3,3,7, dst, src, w, left
-    mova    m5, [pb_15]
-    mova    m6, [pb_zzzzzzzz77777777]
-    mova    m4, [pb_zzzz3333zzzzbbbb]
-    mova    m3, [pb_zz11zz55zz99zzdd]
-    movd    m0, leftm
-    pslldq  m0, 15
-    test    srcq, 15
+    mova    xm5, [pb_15]
+    VBROADCASTI128    m6, [pb_zzzzzzzz77777777]
+    VBROADCASTI128    m4, [pb_zzzz3333zzzzbbbb]
+    VBROADCASTI128    m3, [pb_zz11zz55zz99zzdd]
+    movd    xm0, leftm
+    pslldq  xm0, 15
+    test    srcq, mmsize - 1
     jnz .src_unaligned
-    test    dstq, 15
+    test    dstq, mmsize - 1
     jnz .dst_unaligned
     ADD_LEFT_LOOP 1, 1
 .dst_unaligned:
     ADD_LEFT_LOOP 0, 1
 .src_unaligned:
     ADD_LEFT_LOOP 0, 0
+%endmacro
 
+INIT_XMM ssse3
+ADD_LEFT_PRED_UNALIGNED
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+ADD_LEFT_PRED_UNALIGNED
+%endif
+
+;------------------------------------------------------------------------------
+; void ff_add_bytes(uint8_t *dst, uint8_t *src, ptrdiff_t w);
+;------------------------------------------------------------------------------
 %macro ADD_BYTES 0
 cglobal add_bytes, 3,4,2, dst, src, w, size
     mov  sizeq, wq
@@ -217,6 +247,11 @@
 INIT_XMM sse2
 ADD_BYTES
 
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+ADD_BYTES
+%endif
+
 %macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u)
     add     wd, wd
     add     srcq, wq
@@ -258,7 +293,9 @@
     RET
 %endmacro
 
+;---------------------------------------------------------------------------------------------
 ; int add_left_pred_int16(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int left)
+;---------------------------------------------------------------------------------------------
 INIT_MMX ssse3
 cglobal add_left_pred_int16, 4,4,8, dst, src, mask, w, left
 .skip_prologue:
@@ -270,8 +307,8 @@
     SPLATW  m7 ,m7
     ADD_HFYU_LEFT_LOOP_INT16 a, a
 
-INIT_XMM sse4
-cglobal add_left_pred_int16, 4,4,8, dst, src, mask, w, left
+INIT_XMM ssse3
+cglobal add_left_pred_int16_unaligned, 4,4,8, dst, src, mask, w, left
     mova    m5, [pb_ef]
     mova    m4, [pb_zzzzzzzz67676767]
     mova    m3, [pb_zzzz2323zzzzabab]
@@ -288,3 +325,82 @@
     ADD_HFYU_LEFT_LOOP_INT16 u, a
 .src_unaligned:
     ADD_HFYU_LEFT_LOOP_INT16 u, u
+
+
+;---------------------------------------------------------------------------------------------
+; void add_gradient_pred(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width)
+;---------------------------------------------------------------------------------------------
+%macro ADD_GRADIENT_PRED 0
+cglobal add_gradient_pred, 3,4,5, src, stride, width, tmp
+    mova         xm0, [pb_15]
+
+;load src - 1 in xm1
+    movd         xm1, [srcq-1]
+%if cpuflag(avx2)
+    vpbroadcastb xm1, xm1
+%else
+    pxor         xm2, xm2
+    pshufb       xm1, xm2
+%endif
+
+    add    srcq, widthq
+    neg  widthq
+    neg strideq
+
+.loop:
+    lea    tmpq, [srcq + strideq]
+    mova     m2, [tmpq + widthq] ; A = src[x-stride]
+    movu     m3, [tmpq + widthq - 1] ; B = src[x - (stride + 1)]
+    mova     m4, [srcq + widthq] ; current val (src[x])
+
+    psubb    m2, m3; A - B
+
+; prefix sum A-B
+    pslldq   m3, m2, 1
+    paddb    m2, m3
+    pslldq   m3, m2, 2
+    paddb    m2, m3
+    pslldq   m3, m2, 4
+    paddb    m2, m3
+    pslldq   m3, m2, 8
+    paddb    m2, m3
+
+; prefix sum current val
+    pslldq   m3, m4, 1
+    paddb    m4, m3
+    pslldq   m3, m4, 2
+    paddb    m4, m3
+    pslldq   m3, m4, 4
+    paddb    m4, m3
+    pslldq   m3, m4, 8
+    paddb    m4, m3
+
+; last sum
+    paddb                    m2, m4 ; current + (A - B)
+
+    paddb                   xm1, xm2 ; += C
+    mova        [srcq + widthq], xm1 ; store
+
+    pshufb                  xm1, xm0 ; put last val in all val of xm1
+
+%if mmsize == 32
+    vextracti128            xm2, m2, 1 ; get second lane of the ymm
+    paddb                   xm1, xm2; += C
+
+    mova   [srcq + widthq + 16], xm1 ; store
+    pshufb                  xm1, xm0 ; put last val in all val of m1
+%endif
+
+    add         widthq, mmsize
+    jl .loop
+    RET
+
+%endmacro
+
+INIT_XMM ssse3
+ADD_GRADIENT_PRED
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+ADD_GRADIENT_PRED
+%endif

diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c
index 21bbd12..6d71f14 100644
--- a/libavcodec/x86/lossless_videodsp_init.c
+++ b/libavcodec/x86/lossless_videodsp_init.c

@@ -25,6 +25,7 @@
 
 void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t w);
 void ff_add_bytes_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t w);
+void ff_add_bytes_avx2(uint8_t *dst, uint8_t *src, ptrdiff_t w);
 
 void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
                                const uint8_t *diff, ptrdiff_t w,
@@ -37,9 +38,14 @@
                             ptrdiff_t w, int left);
 int  ff_add_left_pred_unaligned_ssse3(uint8_t *dst, const uint8_t *src,
                                       ptrdiff_t w, int left);
+int  ff_add_left_pred_unaligned_avx2(uint8_t *dst, const uint8_t *src,
+                                     ptrdiff_t w, int left);
 
 int ff_add_left_pred_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, ptrdiff_t w, unsigned acc);
-int ff_add_left_pred_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, ptrdiff_t w, unsigned acc);
+int ff_add_left_pred_int16_unaligned_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, ptrdiff_t w, unsigned acc);
+
+void ff_add_gradient_pred_ssse3(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width);
+void ff_add_gradient_pred_avx2(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width);
 
 #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
 static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top,
@@ -106,13 +112,17 @@
     if (EXTERNAL_SSSE3(cpu_flags)) {
         c->add_left_pred = ff_add_left_pred_ssse3;
         c->add_left_pred_int16 = ff_add_left_pred_int16_ssse3;
+        c->add_gradient_pred   = ff_add_gradient_pred_ssse3;
     }
 
     if (EXTERNAL_SSSE3_FAST(cpu_flags)) {
         c->add_left_pred = ff_add_left_pred_unaligned_ssse3;
+        c->add_left_pred_int16 = ff_add_left_pred_int16_unaligned_ssse3;
     }
 
-    if (EXTERNAL_SSE4(cpu_flags)) {
-        c->add_left_pred_int16 = ff_add_left_pred_int16_sse4;
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        c->add_bytes       = ff_add_bytes_avx2;
+        c->add_left_pred   = ff_add_left_pred_unaligned_avx2;
+        c->add_gradient_pred = ff_add_gradient_pred_avx2;
     }
 }

diff --git a/libavcodec/x86/lossless_videoencdsp.asm b/libavcodec/x86/lossless_videoencdsp.asm
index 4d79eee..fb1204f 100644
--- a/libavcodec/x86/lossless_videoencdsp.asm
+++ b/libavcodec/x86/lossless_videoencdsp.asm

@@ -25,6 +25,8 @@
 
 %include "libavutil/x86/x86util.asm"
 
+cextern pb_80
+
 SECTION .text
 
 ; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
@@ -149,3 +151,44 @@
     DIFF_BYTES_BODY    u, u
 %undef i
 %endif
+
+
+;--------------------------------------------------------------------------------------------------
+;void sub_left_predict(uint8_t *dst, uint8_t *src, ptrdiff_t stride, ptrdiff_t width, int height)
+;--------------------------------------------------------------------------------------------------
+
+INIT_XMM avx
+cglobal sub_left_predict, 5,6,5, dst, src, stride, width, height, x
+    mova             m1, [pb_80] ; prev initial
+    add            dstq, widthq
+    add            srcq, widthq
+    lea              xd, [widthq-1]
+    neg          widthq
+    and              xd, 15
+    pinsrb           m4, m1, xd, 15
+    mov              xq, widthq
+
+    .loop:
+        movu                     m0, [srcq + widthq]
+        palignr                  m2, m0, m1, 15
+        movu                     m1, [srcq + widthq + 16]
+        palignr                  m3, m1, m0, 15
+        psubb                    m2, m0, m2
+        psubb                    m3, m1, m3
+        movu        [dstq + widthq], m2
+        movu   [dstq + widthq + 16], m3
+        add                  widthq, 2 * 16
+        jl .loop
+
+    add   srcq, strideq
+    sub   dstq, xq ; dst + width
+    test    xd, 16
+    jz .mod32
+    mova    m1, m0
+
+.mod32:
+    pshufb    m1, m4
+    mov   widthq, xq
+    dec  heightd
+    jg .loop
+    RET

diff --git a/libavcodec/x86/lossless_videoencdsp_init.c b/libavcodec/x86/lossless_videoencdsp_init.c
index fc728c9..40407ad 100644
--- a/libavcodec/x86/lossless_videoencdsp_init.c
+++ b/libavcodec/x86/lossless_videoencdsp_init.c

@@ -36,6 +36,9 @@
 void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
                         intptr_t w);
 
+void ff_sub_left_predict_avx(uint8_t *dst, uint8_t *src,
+                            ptrdiff_t stride, ptrdiff_t width, int height);
+
 #if HAVE_INLINE_ASM
 
 static void sub_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
@@ -98,6 +101,10 @@
         c->diff_bytes = ff_diff_bytes_sse2;
     }
 
+    if (EXTERNAL_AVX(cpu_flags)) {
+        c->sub_left_predict = ff_sub_left_predict_avx;
+    }
+
     if (EXTERNAL_AVX2_FAST(cpu_flags)) {
         c->diff_bytes = ff_diff_bytes_avx2;
     }

diff --git a/libavcodec/x86/mdct15.asm b/libavcodec/x86/mdct15.asm
index 0309112..2a2cdbd 100644
--- a/libavcodec/x86/mdct15.asm
+++ b/libavcodec/x86/mdct15.asm

@@ -76,7 +76,7 @@
     addps       m%3,  m%3,  m0          ; Finally offset with DCs
 %endmacro
 
-%macro BUTTERFLIES_DC 2 ; %1 - exptab_offset, %2 - out
+%macro BUTTERFLIES_DC 1 ; %1 - exptab_offset
     mulps xm0,  xm9, [exptabq + %1 + 16*0]
     mulps xm1, xm10, [exptabq + %1 + 16*1]
 
@@ -86,10 +86,10 @@
     addps   xm0,  xm1
     addps   xm0,  xm8
 
-    movsd [%2q], xm0
+    movsd [outq], xm0
 %endmacro
 
-%macro BUTTERFLIES_AC 2 ; exptab, exptab_offset, src1, src2, src3, out (uses m0-m3)
+%macro BUTTERFLIES_AC 1 ; %1 - exptab_offset
     mulps  m0, m12, [exptabq + 64*0 + 0*mmsize + %1]
     mulps  m1, m12, [exptabq + 64*0 + 1*mmsize + %1]
     mulps  m2, m13, [exptabq + 64*1 + 0*mmsize + %1]
@@ -104,15 +104,14 @@
 
     vextractf128 xm1, m0, 1
 
-    movlps [%2q + strideq*1], xm0
-    movhps [%2q + strideq*2], xm0
-    movlps [%2q +  stride3q], xm1
-    movhps [%2q + strideq*4], xm1
+    movlps [outq + strideq*1], xm0
+    movhps [outq + strideq*2], xm0
+    movlps [outq +  stride3q], xm1
+    movhps [outq + strideq*4], xm1
 %endmacro
 
 INIT_YMM avx
-cglobal fft15, 4, 6, 14, out, in, exptab, stride, stride3, stride5
-%define out0q inq
+cglobal fft15, 4, 5, 14, out, in, exptab, stride, stride5
     shl strideq, 3
 
     movaps xm5, [exptabq + 480 + 16*0]
@@ -123,22 +122,20 @@
     FFT5  8,  xm9, 12
     FFT5 16, xm10, 13
 
+%define stride3q inq
     lea stride3q, [strideq + strideq*2]
     lea stride5q, [strideq + strideq*4]
 
-    mov out0q, outq
+    BUTTERFLIES_DC (8*6 + 4*0)*2*4
+    BUTTERFLIES_AC (8*0 + 0*0)*2*4
 
-    BUTTERFLIES_DC (8*6 + 4*0)*2*4, out0
-    lea outq, [out0q + stride5q*1]
-    BUTTERFLIES_DC (8*6 + 4*1)*2*4, out
-    lea outq, [out0q + stride5q*2]
-    BUTTERFLIES_DC (8*6 + 4*2)*2*4, out
+    add outq, stride5q
+    BUTTERFLIES_DC (8*6 + 4*1)*2*4
+    BUTTERFLIES_AC (8*2 + 0*0)*2*4
 
-    BUTTERFLIES_AC (8*0)*2*4, out0
-    lea outq, [out0q + stride5q*1]
-    BUTTERFLIES_AC (8*2)*2*4, out
-    lea outq, [out0q + stride5q*2]
-    BUTTERFLIES_AC (8*4)*2*4, out
+    add outq, stride5q
+    BUTTERFLIES_DC (8*6 + 4*2)*2*4
+    BUTTERFLIES_AC (8*4 + 0*0)*2*4
 
     RET
 

diff --git a/libavcodec/x86/mdct15_init.c b/libavcodec/x86/mdct15_init.c
index 45b91b7..444801d 100644
--- a/libavcodec/x86/mdct15_init.c
+++ b/libavcodec/x86/mdct15_init.c

@@ -33,32 +33,35 @@
 static void perm_twiddles(MDCT15Context *s)
 {
     int k;
-    FFTComplex exp_5point[4];
+    FFTComplex tmp[30];
 
-    FFTComplex tmp[21], tmp2[30];
-    memcpy(tmp, s->exptab, sizeof(FFTComplex)*21);
+    /* 5-point FFT twiddles */
+    s->exptab[60].re = s->exptab[60].im = s->exptab[19].re;
+    s->exptab[61].re = s->exptab[61].im = s->exptab[19].im;
+    s->exptab[62].re = s->exptab[62].im = s->exptab[20].re;
+    s->exptab[63].re = s->exptab[63].im = s->exptab[20].im;
 
     /* 15-point FFT twiddles */
     for (k = 0; k < 5; k++) {
-        tmp2[6*k + 0] = tmp[k +  0];
-        tmp2[6*k + 2] = tmp[k +  5];
-        tmp2[6*k + 4] = tmp[k + 10];
+        tmp[6*k + 0] = s->exptab[k +  0];
+        tmp[6*k + 2] = s->exptab[k +  5];
+        tmp[6*k + 4] = s->exptab[k + 10];
 
-        tmp2[6*k + 1] = tmp[2 * (k + 0)];
-        tmp2[6*k + 3] = tmp[2 * (k + 5)];
-        tmp2[6*k + 5] = tmp[2 *  k + 5 ];
+        tmp[6*k + 1] = s->exptab[2 * (k + 0)];
+        tmp[6*k + 3] = s->exptab[2 * (k + 5)];
+        tmp[6*k + 5] = s->exptab[2 *  k + 5 ];
     }
 
     for (k = 0; k < 6; k++) {
         FFTComplex ac_exp[] = {
-            { tmp2[6*1 + k].re,  tmp2[6*1 + k].re },
-            { tmp2[6*2 + k].re,  tmp2[6*2 + k].re },
-            { tmp2[6*3 + k].re,  tmp2[6*3 + k].re },
-            { tmp2[6*4 + k].re,  tmp2[6*4 + k].re },
-            { tmp2[6*1 + k].im, -tmp2[6*1 + k].im },
-            { tmp2[6*2 + k].im, -tmp2[6*2 + k].im },
-            { tmp2[6*3 + k].im, -tmp2[6*3 + k].im },
-            { tmp2[6*4 + k].im, -tmp2[6*4 + k].im },
+            { tmp[6*1 + k].re,  tmp[6*1 + k].re },
+            { tmp[6*2 + k].re,  tmp[6*2 + k].re },
+            { tmp[6*3 + k].re,  tmp[6*3 + k].re },
+            { tmp[6*4 + k].re,  tmp[6*4 + k].re },
+            { tmp[6*1 + k].im, -tmp[6*1 + k].im },
+            { tmp[6*2 + k].im, -tmp[6*2 + k].im },
+            { tmp[6*3 + k].im, -tmp[6*3 + k].im },
+            { tmp[6*4 + k].im, -tmp[6*4 + k].im },
         };
         memcpy(s->exptab + 8*k, ac_exp, 8*sizeof(FFTComplex));
     }
@@ -66,21 +69,13 @@
     /* Specialcase when k = 0 */
     for (k = 0; k < 3; k++) {
         FFTComplex dc_exp[] = {
-            { tmp2[2*k + 0].re, -tmp2[2*k + 0].im },
-            { tmp2[2*k + 0].im,  tmp2[2*k + 0].re },
-            { tmp2[2*k + 1].re, -tmp2[2*k + 1].im },
-            { tmp2[2*k + 1].im,  tmp2[2*k + 1].re },
+            { tmp[2*k + 0].re, -tmp[2*k + 0].im },
+            { tmp[2*k + 0].im,  tmp[2*k + 0].re },
+            { tmp[2*k + 1].re, -tmp[2*k + 1].im },
+            { tmp[2*k + 1].im,  tmp[2*k + 1].re },
         };
         memcpy(s->exptab + 8*6 + 4*k, dc_exp, 4*sizeof(FFTComplex));
     }
-
-    /* 5-point FFT twiddles */
-    exp_5point[0].re = exp_5point[0].im = tmp[19].re;
-    exp_5point[1].re = exp_5point[1].im = tmp[19].im;
-    exp_5point[2].re = exp_5point[2].im = tmp[20].re;
-    exp_5point[3].re = exp_5point[3].im = tmp[20].im;
-
-    memcpy(s->exptab + 8*6 + 4*3, exp_5point, 4*sizeof(FFTComplex));
 }
 
 av_cold void ff_mdct15_init_x86(MDCT15Context *s)

diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm
index d0c3af0..bcad1ae 100644
--- a/libavcodec/x86/rv40dsp.asm
+++ b/libavcodec/x86/rv40dsp.asm

@@ -25,7 +25,6 @@
 
 SECTION_RODATA
 
-align 16
 pw_1024:   times 8 dw 1 << (16 - 6) ; pw_1024
 
 sixtap_filter_hb_m:  times 8 db   1, -5

diff --git a/libavcodec/x86/sbcdsp.asm b/libavcodec/x86/sbcdsp.asm
new file mode 100644
index 0000000..d68d3a9
--- /dev/null
+++ b/libavcodec/x86/sbcdsp.asm

@@ -0,0 +1,168 @@
+;******************************************************************************
+;* SIMD optimized SBC encoder DSP functions
+;*
+;* Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+;* Copyright (C) 2008-2010  Nokia Corporation
+;* Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+;* Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+;* Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+scale_mask: times 2 dd 0x8000    ; 1 << (SBC_PROTO_FIXED_SCALE - 1)
+
+SECTION .text
+
+%macro NIDN 3
+%ifnidn %2, %3
+    %1            %2, %3
+%endif
+%endmacro
+
+%macro ANALYZE_MAC 9 ; out1, out2, in1, in2, tmp1, tmp2, add1, add2, offset
+    NIDN movq,    %5, %3
+    NIDN movq,    %6, %4
+    pmaddwd       %5, [constsq+%9]
+    pmaddwd       %6, [constsq+%9+8]
+    NIDN paddd,   %1, %7
+    NIDN paddd,   %2, %8
+%endmacro
+
+%macro ANALYZE_MAC_IN 7 ; out1, out2, tmp1, tmp2, add1, add2, offset
+    ANALYZE_MAC   %1, %2, [inq+%7], [inq+%7+8], %3, %4, %5, %6, %7
+%endmacro
+
+%macro ANALYZE_MAC_REG 7 ; out1, out2, in, tmp1, tmp2, offset, pack
+%ifidn %7, pack
+    psrad         %3, 16    ; SBC_PROTO_FIXED_SCALE
+    packssdw      %3, %3
+%endif
+    ANALYZE_MAC   %1, %2, %3, %3, %4, %5, %4, %5, %6
+%endmacro
+
+;*******************************************************************
+;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t *consts);
+;*******************************************************************
+INIT_MMX mmx
+cglobal sbc_analyze_4, 3, 3, 4, in, out, consts
+    ANALYZE_MAC_IN   m0, m1, m0, m1, [scale_mask], [scale_mask], 0
+    ANALYZE_MAC_IN   m0, m1, m2, m3, m2, m3, 16
+    ANALYZE_MAC_IN   m0, m1, m2, m3, m2, m3, 32
+    ANALYZE_MAC_IN   m0, m1, m2, m3, m2, m3, 48
+    ANALYZE_MAC_IN   m0, m1, m2, m3, m2, m3, 64
+
+    ANALYZE_MAC_REG  m0, m2, m0, m0, m2, 80, pack
+    ANALYZE_MAC_REG  m0, m2, m1, m1, m3, 96, pack
+
+    movq          [outq  ], m0
+    movq          [outq+8], m2
+
+    RET
+
+
+;*******************************************************************
+;void ff_sbc_analyze_8(const int16_t *in, int32_t *out, const int16_t *consts);
+;*******************************************************************
+INIT_MMX mmx
+cglobal sbc_analyze_8, 3, 3, 4, in, out, consts
+    ANALYZE_MAC_IN   m0, m1, m0, m1, [scale_mask], [scale_mask],  0
+    ANALYZE_MAC_IN   m2, m3, m2, m3, [scale_mask], [scale_mask], 16
+    ANALYZE_MAC_IN   m0, m1, m4, m5, m4, m5,  32
+    ANALYZE_MAC_IN   m2, m3, m6, m7, m6, m7,  48
+    ANALYZE_MAC_IN   m0, m1, m4, m5, m4, m5,  64
+    ANALYZE_MAC_IN   m2, m3, m6, m7, m6, m7,  80
+    ANALYZE_MAC_IN   m0, m1, m4, m5, m4, m5,  96
+    ANALYZE_MAC_IN   m2, m3, m6, m7, m6, m7, 112
+    ANALYZE_MAC_IN   m0, m1, m4, m5, m4, m5, 128
+    ANALYZE_MAC_IN   m2, m3, m6, m7, m6, m7, 144
+
+    ANALYZE_MAC_REG  m4, m5, m0, m4, m5, 160, pack
+    ANALYZE_MAC_REG  m4, m5, m1, m6, m7, 192, pack
+    ANALYZE_MAC_REG  m4, m5, m2, m6, m7, 224, pack
+    ANALYZE_MAC_REG  m4, m5, m3, m6, m7, 256, pack
+
+    movq          [outq  ], m4
+    movq          [outq+8], m5
+
+    ANALYZE_MAC_REG  m0, m5, m0, m0, m5, 176, no
+    ANALYZE_MAC_REG  m0, m5, m1, m1, m7, 208, no
+    ANALYZE_MAC_REG  m0, m5, m2, m2, m7, 240, no
+    ANALYZE_MAC_REG  m0, m5, m3, m3, m7, 272, no
+
+    movq          [outq+16], m0
+    movq          [outq+24], m5
+
+    RET
+
+
+;*******************************************************************
+;void ff_sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
+;                              uint32_t scale_factor[2][8],
+;                              int blocks, int channels, int subbands)
+;*******************************************************************
+INIT_MMX mmx
+cglobal sbc_calc_scalefactors, 5, 7, 4, sb_sample_f, scale_factor, blocks, channels, subbands, ptr, blk
+    ; subbands = 4 * subbands * channels
+    movq          m3, [scale_mask]
+    shl           subbandsd, 2
+    cmp           channelsd, 2
+    jl            .loop_1
+    shl           subbandsd, 1
+
+.loop_1:
+    sub           subbandsq, 8
+    lea           ptrq, [sb_sample_fq + subbandsq]
+
+    ; blk = (blocks - 1) * 64;
+    lea           blkq, [blocksq - 1]
+    shl           blkd, 6
+
+    movq          m0, m3
+.loop_2:
+    movq          m1, [ptrq+blkq]
+    pxor          m2, m2
+    pcmpgtd       m1, m2
+    paddd         m1, [ptrq+blkq]
+    pcmpgtd       m2, m1
+    pxor          m1, m2
+
+    por           m0, m1
+
+    sub           blkq, 64
+    jns           .loop_2
+
+    movd          blkd, m0
+    psrlq         m0,   32
+    bsr           blkd, blkd
+    sub           blkd, 15    ; SCALE_OUT_BITS
+    mov           [scale_factorq + subbandsq], blkd
+
+    movd          blkd, m0
+    bsr           blkd, blkd
+    sub           blkd, 15    ; SCALE_OUT_BITS
+    mov           [scale_factorq + subbandsq + 4], blkd
+
+    cmp           subbandsq, 0
+    jg            .loop_1
+
+    emms
+    RET

diff --git a/libavcodec/x86/sbcdsp_init.c b/libavcodec/x86/sbcdsp_init.c
new file mode 100644
index 0000000..86effec
--- /dev/null
+++ b/libavcodec/x86/sbcdsp_init.c

@@ -0,0 +1,51 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC MMX optimization for some basic "building bricks"
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/sbcdsp.h"
+
+void ff_sbc_analyze_4_mmx(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_analyze_8_mmx(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_calc_scalefactors_mmx(int32_t sb_sample_f[16][2][8],
+                                  uint32_t scale_factor[2][8],
+                                  int blocks, int channels, int subbands);
+
+av_cold void ff_sbcdsp_init_x86(SBCDSPContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_MMX(cpu_flags)) {
+        s->sbc_analyze_4 = ff_sbc_analyze_4_mmx;
+        s->sbc_analyze_8 = ff_sbc_analyze_8_mmx;
+        s->sbc_calc_scalefactors = ff_sbc_calc_scalefactors_mmx;
+    }
+}

diff --git a/libavcodec/x86/utvideodsp.asm b/libavcodec/x86/utvideodsp.asm
index e44c1ea..b799c44 100644
--- a/libavcodec/x86/utvideodsp.asm
+++ b/libavcodec/x86/utvideodsp.asm

@@ -1,6 +1,7 @@
 ;******************************************************************************
 ;* SIMD-optimized UTVideo functions
 ;* Copyright (c) 2017 Paul B Mahol
+;* Copyright (c) 2017 Jokyo Images
 ;*
 ;* This file is part of FFmpeg.
 ;*
@@ -23,17 +24,18 @@
 
 SECTION_RODATA
 
-pb_128:  times 16 db 128
-pw_512:  times 8  dw 512
-pw_1023: times 8  dw 1023
+cextern pb_80
+cextern pw_512
+cextern pw_1023
 
 SECTION .text
 
-INIT_XMM sse2
-
+;-------------------------------------------------------------------------------------------
 ; void restore_rgb_planes(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
 ;                         ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b,
 ;                         int width, int height)
+;-------------------------------------------------------------------------------------------
+%macro RESTORE_RGB_PLANES 0
 cglobal restore_rgb_planes, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 4, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
     movsxdifnidn wq, wd
     add      src_rq, wq
@@ -46,7 +48,7 @@
 %define wq r6m
 %define hd r7mp
 %endif
-    mova         m3, [pb_128]
+    mova         m3, [pb_80]
 .nextrow:
     mov          xq, wq
 
@@ -68,7 +70,22 @@
     sub        hd, 1
     jg .nextrow
     REP_RET
+%endmacro
 
+INIT_XMM sse2
+RESTORE_RGB_PLANES
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+RESTORE_RGB_PLANES
+%endif
+
+;-------------------------------------------------------------------------------------------
+; void restore_rgb_planes10(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
+;                         ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b,
+;                         int width, int height)
+;-------------------------------------------------------------------------------------------
+%macro RESTORE_RGB_PLANES10 0
 cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
     shl          wd, 1
     shl linesize_rq, 1
@@ -109,3 +126,12 @@
     sub        hd, 1
     jg .nextrow
     REP_RET
+%endmacro
+
+INIT_XMM sse2
+RESTORE_RGB_PLANES10
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+RESTORE_RGB_PLANES10
+%endif

diff --git a/libavcodec/x86/utvideodsp_init.c b/libavcodec/x86/utvideodsp_init.c
index f8b2a9b..2b436c6 100644
--- a/libavcodec/x86/utvideodsp_init.c
+++ b/libavcodec/x86/utvideodsp_init.c

@@ -28,9 +28,16 @@
 void ff_restore_rgb_planes_sse2(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
                                 ptrdiff_t linesize_r, ptrdiff_t linesize_g,
                                 ptrdiff_t linesize_b, int width, int height);
+void ff_restore_rgb_planes_avx2(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
+                                ptrdiff_t linesize_r, ptrdiff_t linesize_g,
+                                ptrdiff_t linesize_b, int width, int height);
+
 void ff_restore_rgb_planes10_sse2(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
                                   ptrdiff_t linesize_r, ptrdiff_t linesize_g,
                                   ptrdiff_t linesize_b, int width, int height);
+void ff_restore_rgb_planes10_avx2(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
+                                  ptrdiff_t linesize_r, ptrdiff_t linesize_g,
+                                  ptrdiff_t linesize_b, int width, int height);
 
 av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c)
 {
@@ -40,4 +47,8 @@
         c->restore_rgb_planes   = ff_restore_rgb_planes_sse2;
         c->restore_rgb_planes10 = ff_restore_rgb_planes10_sse2;
     }
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        c->restore_rgb_planes   = ff_restore_rgb_planes_avx2;
+        c->restore_rgb_planes10 = ff_restore_rgb_planes10_avx2;
+    }
 }

diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index e303b80..75de569 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm

@@ -664,6 +664,37 @@
 FILTER_V 8
 
 %macro FILTER_BILINEAR 1
+%if cpuflag(ssse3)
+cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
+    shl      myd, 4
+%ifdef PIC
+    lea  picregq, [bilinear_filter_vb_m]
+%endif
+    pxor      m4, m4
+    mova      m3, [bilinear_filter_vb+myq-16]
+.nextrow:
+    movh      m0, [srcq+srcstrideq*0]
+    movh      m1, [srcq+srcstrideq*1]
+    movh      m2, [srcq+srcstrideq*2]
+    punpcklbw m0, m1
+    punpcklbw m1, m2
+    pmaddubsw m0, m3
+    pmaddubsw m1, m3
+    psraw     m0, 2
+    psraw     m1, 2
+    pavgw     m0, m4
+    pavgw     m1, m4
+%if mmsize==8
+    packuswb  m0, m0
+    packuswb  m1, m1
+    movh   [dstq+dststrideq*0], m0
+    movh   [dstq+dststrideq*1], m1
+%else
+    packuswb  m0, m1
+    movh   [dstq+dststrideq*0], m0
+    movhps [dstq+dststrideq*1], m0
+%endif
+%else ; cpuflag(ssse3)
 cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my
     shl      myd, 4
 %ifdef PIC
@@ -701,6 +732,7 @@
     movh   [dstq+dststrideq*0], m0
     movhps [dstq+dststrideq*1], m0
 %endif
+%endif ; cpuflag(ssse3)
 
     lea     dstq, [dstq+dststrideq*2]
     lea     srcq, [srcq+srcstrideq*2]
@@ -708,6 +740,37 @@
     jg .nextrow
     REP_RET
 
+%if cpuflag(ssse3)
+cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
+    shl      mxd, 4
+%ifdef PIC
+    lea  picregq, [bilinear_filter_vb_m]
+%endif
+    pxor      m4, m4
+    mova      m2, [filter_h2_shuf]
+    mova      m3, [bilinear_filter_vb+mxq-16]
+.nextrow:
+    movu      m0, [srcq+srcstrideq*0]
+    movu      m1, [srcq+srcstrideq*1]
+    pshufb    m0, m2
+    pshufb    m1, m2
+    pmaddubsw m0, m3
+    pmaddubsw m1, m3
+    psraw     m0, 2
+    psraw     m1, 2
+    pavgw     m0, m4
+    pavgw     m1, m4
+%if mmsize==8
+    packuswb  m0, m0
+    packuswb  m1, m1
+    movh   [dstq+dststrideq*0], m0
+    movh   [dstq+dststrideq*1], m1
+%else
+    packuswb  m0, m1
+    movh   [dstq+dststrideq*0], m0
+    movhps [dstq+dststrideq*1], m0
+%endif
+%else ; cpuflag(ssse3)
 cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
     shl      mxd, 4
 %ifdef PIC
@@ -746,6 +809,7 @@
     movh   [dstq+dststrideq*0], m0
     movhps [dstq+dststrideq*1], m0
 %endif
+%endif ; cpuflag(ssse3)
 
     lea     dstq, [dstq+dststrideq*2]
     lea     srcq, [srcq+srcstrideq*2]
@@ -758,85 +822,10 @@
 FILTER_BILINEAR 4
 INIT_XMM sse2
 FILTER_BILINEAR 8
-
-%macro FILTER_BILINEAR_SSSE3 1
-cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
-    shl      myd, 4
-%ifdef PIC
-    lea  picregq, [bilinear_filter_vb_m]
-%endif
-    pxor      m4, m4
-    mova      m3, [bilinear_filter_vb+myq-16]
-.nextrow:
-    movh      m0, [srcq+srcstrideq*0]
-    movh      m1, [srcq+srcstrideq*1]
-    movh      m2, [srcq+srcstrideq*2]
-    punpcklbw m0, m1
-    punpcklbw m1, m2
-    pmaddubsw m0, m3
-    pmaddubsw m1, m3
-    psraw     m0, 2
-    psraw     m1, 2
-    pavgw     m0, m4
-    pavgw     m1, m4
-%if mmsize==8
-    packuswb  m0, m0
-    packuswb  m1, m1
-    movh   [dstq+dststrideq*0], m0
-    movh   [dstq+dststrideq*1], m1
-%else
-    packuswb  m0, m1
-    movh   [dstq+dststrideq*0], m0
-    movhps [dstq+dststrideq*1], m0
-%endif
-
-    lea     dstq, [dstq+dststrideq*2]
-    lea     srcq, [srcq+srcstrideq*2]
-    sub  heightd, 2
-    jg .nextrow
-    REP_RET
-
-cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
-    shl      mxd, 4
-%ifdef PIC
-    lea  picregq, [bilinear_filter_vb_m]
-%endif
-    pxor      m4, m4
-    mova      m2, [filter_h2_shuf]
-    mova      m3, [bilinear_filter_vb+mxq-16]
-.nextrow:
-    movu      m0, [srcq+srcstrideq*0]
-    movu      m1, [srcq+srcstrideq*1]
-    pshufb    m0, m2
-    pshufb    m1, m2
-    pmaddubsw m0, m3
-    pmaddubsw m1, m3
-    psraw     m0, 2
-    psraw     m1, 2
-    pavgw     m0, m4
-    pavgw     m1, m4
-%if mmsize==8
-    packuswb  m0, m0
-    packuswb  m1, m1
-    movh   [dstq+dststrideq*0], m0
-    movh   [dstq+dststrideq*1], m1
-%else
-    packuswb  m0, m1
-    movh   [dstq+dststrideq*0], m0
-    movhps [dstq+dststrideq*1], m0
-%endif
-
-    lea     dstq, [dstq+dststrideq*2]
-    lea     srcq, [srcq+srcstrideq*2]
-    sub  heightd, 2
-    jg .nextrow
-    REP_RET
-%endmacro
-
 INIT_MMX ssse3
-FILTER_BILINEAR_SSSE3 4
+FILTER_BILINEAR 4
 INIT_XMM ssse3
-FILTER_BILINEAR_SSSE3 8
+FILTER_BILINEAR 8
 
 INIT_MMX mmx
 cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height

diff --git a/libavcodec/xwddec.c b/libavcodec/xwddec.c
index 8b0845f..8c4358f 100644
--- a/libavcodec/xwddec.c
+++ b/libavcodec/xwddec.c

@@ -39,6 +39,7 @@
     uint32_t pixformat, pixdepth, bunit, bitorder, bpad;
     uint32_t rgb[3];
     uint8_t *ptr;
+    int width, height;
     GetByteContext gb;
 
     if (buf_size < XWD_HEADER_SIZE)
@@ -60,8 +61,8 @@
 
     pixformat     = bytestream2_get_be32u(&gb);
     pixdepth      = bytestream2_get_be32u(&gb);
-    avctx->width  = bytestream2_get_be32u(&gb);
-    avctx->height = bytestream2_get_be32u(&gb);
+    width         = bytestream2_get_be32u(&gb);
+    height        = bytestream2_get_be32u(&gb);
     xoffset       = bytestream2_get_be32u(&gb);
     be            = bytestream2_get_be32u(&gb);
     bunit         = bytestream2_get_be32u(&gb);
@@ -77,6 +78,9 @@
     ncolors       = bytestream2_get_be32u(&gb);
     bytestream2_skipu(&gb, header_size - (XWD_HEADER_SIZE - 20));
 
+    if ((ret = ff_set_dimensions(avctx, width, height)) < 0)
+        return ret;
+
     av_log(avctx, AV_LOG_DEBUG,
            "pixformat %"PRIu32", pixdepth %"PRIu32", bunit %"PRIu32", bitorder %"PRIu32", bpad %"PRIu32"\n",
            pixformat, pixdepth, bunit, bitorder, bpad);
@@ -227,7 +231,7 @@
             blue   = bytestream2_get_byteu(&gb);
             bytestream2_skipu(&gb, 3); // skip bitmask flag and padding
 
-            dst[i] = red << 16 | green << 8 | blue;
+            dst[i] = 0xFFU << 24 | red << 16 | green << 8 | blue;
         }
     }
 

diff --git a/libavcodec/xwdenc.c b/libavcodec/xwdenc.c
index 43bca89..81cca6c 100644
--- a/libavcodec/xwdenc.c
+++ b/libavcodec/xwdenc.c

@@ -41,6 +41,7 @@
     int i, out_size, ret;
     uint8_t *ptr, *buf;
     AVFrame * const p = (AVFrame *)pict;
+    uint32_t pal[256];
 
     pixdepth = av_get_bits_per_pixel(desc);
     if (desc->flags & AV_PIX_FMT_FLAG_BE)
@@ -180,11 +181,17 @@
     bytestream_put_be32(&buf, 0);             // window border width
     bytestream_put_buffer(&buf, WINDOW_NAME, WINDOW_NAME_SIZE);
 
+    if (pix_fmt == AV_PIX_FMT_PAL8) {
+        memcpy(pal, p->data[1], sizeof(pal));
+    } else {
+        avpriv_set_systematic_pal2(pal, pix_fmt);
+    }
+
     for (i = 0; i < ncolors; i++) {
         uint32_t val;
         uint8_t red, green, blue;
 
-        val   = AV_RN32A(p->data[1] + i * 4);
+        val   = pal[i];
         red   = (val >> 16) & 0xFF;
         green = (val >>  8) & 0xFF;
         blue  =  val        & 0xFF;

diff --git a/libavcodec/zmbv.c b/libavcodec/zmbv.c
index f91d2e3..79e0892 100644
--- a/libavcodec/zmbv.c
+++ b/libavcodec/zmbv.c

@@ -57,6 +57,7 @@
     AVCodecContext *avctx;
 
     int bpp;
+    int alloc_bpp;
     unsigned int decomp_size;
     uint8_t* decomp_buf;
     uint8_t pal[768];
@@ -408,6 +409,7 @@
     int zret = Z_OK; // Zlib return code
     int len = buf_size;
     int hi_ver, lo_ver, ret;
+    int expected_size;
 
     /* parse header */
     if (len < 1)
@@ -494,16 +496,29 @@
             return AVERROR_UNKNOWN;
         }
 
-        c->cur  = av_realloc_f(c->cur, avctx->width * avctx->height,  (c->bpp / 8));
-        c->prev = av_realloc_f(c->prev, avctx->width * avctx->height,  (c->bpp / 8));
+        if (c->alloc_bpp < c->bpp) {
+            c->cur  = av_realloc_f(c->cur, avctx->width * avctx->height,  (c->bpp / 8));
+            c->prev = av_realloc_f(c->prev, avctx->width * avctx->height,  (c->bpp / 8));
+            c->alloc_bpp = c->bpp;
+        }
         c->bx = (c->width + c->bw - 1) / c->bw;
         c->by = (c->height+ c->bh - 1) / c->bh;
-        if (!c->cur || !c->prev)
+        if (!c->cur || !c->prev) {
+            c->alloc_bpp = 0;
             return AVERROR(ENOMEM);
+        }
         memset(c->cur, 0, avctx->width * avctx->height * (c->bpp / 8));
         memset(c->prev, 0, avctx->width * avctx->height * (c->bpp / 8));
         c->decode_intra= decode_intra;
     }
+    if (c->flags & ZMBV_KEYFRAME) {
+        expected_size = avctx->width * avctx->height * (c->bpp / 8);
+    } else {
+        expected_size = (c->bx * c->by * 2 + 3) & ~3;
+    }
+    if (avctx->pix_fmt == AV_PIX_FMT_PAL8 &&
+        (c->flags & (ZMBV_DELTAPAL | ZMBV_KEYFRAME)))
+        expected_size += 768;
 
     if (!c->decode_intra) {
         av_log(avctx, AV_LOG_ERROR, "Error! Got no format or no keyframe!\n");
@@ -519,6 +534,7 @@
             return AVERROR_INVALIDDATA;
         }
         memcpy(c->decomp_buf, buf, len);
+        c->decomp_len = len;
     } else { // ZLIB-compressed data
         c->zstream.total_in = c->zstream.total_out = 0;
         c->zstream.next_in = (uint8_t*)buf;
@@ -532,6 +548,11 @@
         }
         c->decomp_len = c->zstream.total_out;
     }
+    if (expected_size > c->decomp_len ||
+        (c->flags & ZMBV_KEYFRAME) && expected_size < c->decomp_len) {
+        av_log(avctx, AV_LOG_ERROR, "decompressed size %d is incorrect, expected %d\n", c->decomp_len, expected_size);
+        return AVERROR_INVALIDDATA;
+    }
     if (c->flags & ZMBV_KEYFRAME) {
         frame->key_frame = 1;
         frame->pict_type = AV_PICTURE_TYPE_I;
@@ -599,12 +620,11 @@
     c->decomp_size = (avctx->width + 255) * 4 * (avctx->height + 64);
 
     /* Allocate decompression buffer */
-    if (c->decomp_size) {
-        if (!(c->decomp_buf = av_mallocz(c->decomp_size))) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Can't allocate decompression buffer.\n");
-            return AVERROR(ENOMEM);
-        }
+    c->decomp_buf = av_mallocz(c->decomp_size);
+    if (!c->decomp_buf) {
+        av_log(avctx, AV_LOG_ERROR,
+                "Can't allocate decompression buffer.\n");
+        return AVERROR(ENOMEM);
     }
 
     c->zstream.zalloc = Z_NULL;
@@ -642,4 +662,5 @@
     .close          = decode_end,
     .decode         = decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };

diff --git a/libavdevice/.gitignore b/libavdevice/.gitignore
new file mode 100644
index 0000000..08ac3eb
--- /dev/null
+++ b/libavdevice/.gitignore

@@ -0,0 +1,2 @@
+/indev_list.c
+/outdev_list.c

diff --git a/libavdevice/Makefile b/libavdevice/Makefile
index 8228d62..f11a6f2 100644
--- a/libavdevice/Makefile
+++ b/libavdevice/Makefile

@@ -14,6 +14,7 @@
 # input/output devices
 OBJS-$(CONFIG_ALSA_INDEV)                += alsa_dec.o alsa.o timefilter.o
 OBJS-$(CONFIG_ALSA_OUTDEV)               += alsa_enc.o alsa.o
+OBJS-$(CONFIG_ANDROID_CAMERA_INDEV)      += android_camera.o
 OBJS-$(CONFIG_AVFOUNDATION_INDEV)        += avfoundation.o
 OBJS-$(CONFIG_BKTR_INDEV)                += bktr.o
 OBJS-$(CONFIG_CACA_OUTDEV)               += caca.o

diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c
index b767b6a..adde749 100644
--- a/libavdevice/alldevices.c
+++ b/libavdevice/alldevices.c

@@ -20,59 +20,51 @@
 
 #include "config.h"
 #include "libavutil/thread.h"
+#include "libavformat/internal.h"
 #include "avdevice.h"
 
-#define REGISTER_OUTDEV(X, x)                                           \
-    {                                                                   \
-        extern AVOutputFormat ff_##x##_muxer;                           \
-        if (CONFIG_##X##_OUTDEV)                                        \
-            av_register_output_format(&ff_##x##_muxer);                 \
-    }
+/* devices */
+extern AVInputFormat  ff_alsa_demuxer;
+extern AVOutputFormat ff_alsa_muxer;
+extern AVInputFormat  ff_android_camera_demuxer;
+extern AVInputFormat  ff_avfoundation_demuxer;
+extern AVInputFormat  ff_bktr_demuxer;
+extern AVOutputFormat ff_caca_muxer;
+extern AVInputFormat  ff_decklink_demuxer;
+extern AVOutputFormat ff_decklink_muxer;
+extern AVInputFormat  ff_libndi_newtek_demuxer;
+extern AVOutputFormat ff_libndi_newtek_muxer;
+extern AVInputFormat  ff_dshow_demuxer;
+extern AVInputFormat  ff_fbdev_demuxer;
+extern AVOutputFormat ff_fbdev_muxer;
+extern AVInputFormat  ff_gdigrab_demuxer;
+extern AVInputFormat  ff_iec61883_demuxer;
+extern AVInputFormat  ff_jack_demuxer;
+extern AVInputFormat  ff_kmsgrab_demuxer;
+extern AVInputFormat  ff_lavfi_demuxer;
+extern AVInputFormat  ff_openal_demuxer;
+extern AVOutputFormat ff_opengl_muxer;
+extern AVInputFormat  ff_oss_demuxer;
+extern AVOutputFormat ff_oss_muxer;
+extern AVInputFormat  ff_pulse_demuxer;
+extern AVOutputFormat ff_pulse_muxer;
+extern AVOutputFormat ff_sdl2_muxer;
+extern AVInputFormat  ff_sndio_demuxer;
+extern AVOutputFormat ff_sndio_muxer;
+extern AVInputFormat  ff_v4l2_demuxer;
+extern AVOutputFormat ff_v4l2_muxer;
+extern AVInputFormat  ff_vfwcap_demuxer;
+extern AVInputFormat  ff_xcbgrab_demuxer;
+extern AVOutputFormat ff_xv_muxer;
 
-#define REGISTER_INDEV(X, x)                                            \
-    {                                                                   \
-        extern AVInputFormat ff_##x##_demuxer;                          \
-        if (CONFIG_##X##_INDEV)                                         \
-            av_register_input_format(&ff_##x##_demuxer);                \
-    }
+/* external libraries */
+extern AVInputFormat  ff_libcdio_demuxer;
+extern AVInputFormat  ff_libdc1394_demuxer;
 
-#define REGISTER_INOUTDEV(X, x) REGISTER_OUTDEV(X, x); REGISTER_INDEV(X, x)
-
-static void register_all(void)
-{
-    /* devices */
-    REGISTER_INOUTDEV(ALSA,             alsa);
-    REGISTER_INDEV   (AVFOUNDATION,     avfoundation);
-    REGISTER_INDEV   (BKTR,             bktr);
-    REGISTER_OUTDEV  (CACA,             caca);
-    REGISTER_INOUTDEV(DECKLINK,         decklink);
-    REGISTER_INOUTDEV(LIBNDI_NEWTEK,    libndi_newtek);
-    REGISTER_INDEV   (DSHOW,            dshow);
-    REGISTER_INOUTDEV(FBDEV,            fbdev);
-    REGISTER_INDEV   (GDIGRAB,          gdigrab);
-    REGISTER_INDEV   (IEC61883,         iec61883);
-    REGISTER_INDEV   (JACK,             jack);
-    REGISTER_INDEV   (KMSGRAB,          kmsgrab);
-    REGISTER_INDEV   (LAVFI,            lavfi);
-    REGISTER_INDEV   (OPENAL,           openal);
-    REGISTER_OUTDEV  (OPENGL,           opengl);
-    REGISTER_INOUTDEV(OSS,              oss);
-    REGISTER_INOUTDEV(PULSE,            pulse);
-    REGISTER_OUTDEV  (SDL2,             sdl2);
-    REGISTER_INOUTDEV(SNDIO,            sndio);
-    REGISTER_INOUTDEV(V4L2,             v4l2);
-    REGISTER_INDEV   (VFWCAP,           vfwcap);
-    REGISTER_INDEV   (XCBGRAB,          xcbgrab);
-    REGISTER_OUTDEV  (XV,               xv);
-
-    /* external libraries */
-    REGISTER_INDEV   (LIBCDIO,          libcdio);
-    REGISTER_INDEV   (LIBDC1394,        libdc1394);
-}
+#include "libavdevice/outdev_list.c"
+#include "libavdevice/indev_list.c"
 
 void avdevice_register_all(void)
 {
-    static AVOnce control = AV_ONCE_INIT;
-
-    ff_thread_once(&control, register_all);
+    avpriv_register_devices(outdev_list, indev_list);
 }

diff --git a/libavdevice/alsa.c b/libavdevice/alsa.c
index 1bbff30..1b21beb 100644
--- a/libavdevice/alsa.c
+++ b/libavdevice/alsa.c

@@ -177,8 +177,8 @@
     snd_pcm_uframes_t buffer_size, period_size;
     uint64_t layout = ctx->streams[0]->codecpar->channel_layout;
 
-    if (ctx->filename[0] == 0) audio_device = "default";
-    else                       audio_device = ctx->filename;
+    if (ctx->url[0] == 0) audio_device = "default";
+    else                  audio_device = ctx->url;
 
     if (*codec_id == AV_CODEC_ID_NONE)
         *codec_id = DEFAULT_CODEC_ID;

diff --git a/libavdevice/alsa.h b/libavdevice/alsa.h
index cd41d96..1ed8c82 100644
--- a/libavdevice/alsa.h
+++ b/libavdevice/alsa.h

@@ -43,7 +43,7 @@
 
 typedef void (*ff_reorder_func)(const void *, void *, int);
 
-#define ALSA_BUFFER_SIZE_MAX 65536
+#define ALSA_BUFFER_SIZE_MAX 131072
 
 typedef struct AlsaData {
     AVClass *class;

diff --git a/libavdevice/android_camera.c b/libavdevice/android_camera.c
new file mode 100644
index 0000000..4a956a7
--- /dev/null
+++ b/libavdevice/android_camera.c

@@ -0,0 +1,871 @@
+/*
+ * Android camera input device
+ *
+ * Copyright (C) 2017 Felix Matouschek
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <camera/NdkCameraDevice.h>
+#include <camera/NdkCameraManager.h>
+#include <media/NdkImage.h>
+#include <media/NdkImageReader.h>
+
+#include "libavformat/avformat.h"
+#include "libavformat/internal.h"
+#include "libavutil/avstring.h"
+#include "libavutil/display.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/pixfmt.h"
+#include "libavutil/threadmessage.h"
+#include "libavutil/time.h"
+
+#include "version.h"
+
+/* This image format is available on all Android devices
+ * supporting the Camera2 API */
+#define IMAGE_FORMAT_ANDROID AIMAGE_FORMAT_YUV_420_888
+
+#define MAX_BUF_COUNT 2
+#define VIDEO_STREAM_INDEX 0
+#define VIDEO_TIMEBASE_ANDROID 1000000000
+
+#define RETURN_CASE(x) case x: return AV_STRINGIFY(x);
+#define RETURN_DEFAULT(x) default: return AV_STRINGIFY(x);
+
+typedef struct AndroidCameraCtx {
+    const AVClass *class;
+
+    int requested_width;
+    int requested_height;
+    AVRational framerate;
+    int camera_index;
+    int input_queue_size;
+
+    uint8_t lens_facing;
+    int32_t sensor_orientation;
+    int width;
+    int height;
+    int32_t framerate_range[2];
+    int image_format;
+
+    ACameraManager *camera_mgr;
+    char *camera_id;
+    ACameraMetadata *camera_metadata;
+    ACameraDevice *camera_dev;
+    ACameraDevice_StateCallbacks camera_state_callbacks;
+    AImageReader *image_reader;
+    AImageReader_ImageListener image_listener;
+    ANativeWindow *image_reader_window;
+    ACaptureSessionOutputContainer *capture_session_output_container;
+    ACaptureSessionOutput *capture_session_output;
+    ACameraOutputTarget *camera_output_target;
+    ACaptureRequest *capture_request;
+    ACameraCaptureSession_stateCallbacks capture_session_state_callbacks;
+    ACameraCaptureSession *capture_session;
+
+    AVThreadMessageQueue *input_queue;
+    atomic_int exit;
+    atomic_int got_image_format;
+} AndroidCameraCtx;
+
+static const char *camera_status_string(camera_status_t val)
+{
+    switch(val) {
+        RETURN_CASE(ACAMERA_OK)
+        RETURN_CASE(ACAMERA_ERROR_UNKNOWN)
+        RETURN_CASE(ACAMERA_ERROR_INVALID_PARAMETER)
+        RETURN_CASE(ACAMERA_ERROR_CAMERA_DISCONNECTED)
+        RETURN_CASE(ACAMERA_ERROR_NOT_ENOUGH_MEMORY)
+        RETURN_CASE(ACAMERA_ERROR_METADATA_NOT_FOUND)
+        RETURN_CASE(ACAMERA_ERROR_CAMERA_DEVICE)
+        RETURN_CASE(ACAMERA_ERROR_CAMERA_SERVICE)
+        RETURN_CASE(ACAMERA_ERROR_SESSION_CLOSED)
+        RETURN_CASE(ACAMERA_ERROR_INVALID_OPERATION)
+        RETURN_CASE(ACAMERA_ERROR_STREAM_CONFIGURE_FAIL)
+        RETURN_CASE(ACAMERA_ERROR_CAMERA_IN_USE)
+        RETURN_CASE(ACAMERA_ERROR_MAX_CAMERA_IN_USE)
+        RETURN_CASE(ACAMERA_ERROR_CAMERA_DISABLED)
+        RETURN_CASE(ACAMERA_ERROR_PERMISSION_DENIED)
+        RETURN_DEFAULT(ACAMERA_ERROR_UNKNOWN)
+    }
+}
+
+static const char *media_status_string(media_status_t val)
+{
+    switch(val) {
+        RETURN_CASE(AMEDIA_OK)
+        RETURN_CASE(AMEDIA_ERROR_UNKNOWN)
+        RETURN_CASE(AMEDIA_ERROR_MALFORMED)
+        RETURN_CASE(AMEDIA_ERROR_UNSUPPORTED)
+        RETURN_CASE(AMEDIA_ERROR_INVALID_OBJECT)
+        RETURN_CASE(AMEDIA_ERROR_INVALID_PARAMETER)
+        RETURN_CASE(AMEDIA_ERROR_INVALID_OPERATION)
+        RETURN_CASE(AMEDIA_DRM_NOT_PROVISIONED)
+        RETURN_CASE(AMEDIA_DRM_RESOURCE_BUSY)
+        RETURN_CASE(AMEDIA_DRM_DEVICE_REVOKED)
+        RETURN_CASE(AMEDIA_DRM_SHORT_BUFFER)
+        RETURN_CASE(AMEDIA_DRM_SESSION_NOT_OPENED)
+        RETURN_CASE(AMEDIA_DRM_TAMPER_DETECTED)
+        RETURN_CASE(AMEDIA_DRM_VERIFY_FAILED)
+        RETURN_CASE(AMEDIA_DRM_NEED_KEY)
+        RETURN_CASE(AMEDIA_DRM_LICENSE_EXPIRED)
+        RETURN_CASE(AMEDIA_IMGREADER_NO_BUFFER_AVAILABLE)
+        RETURN_CASE(AMEDIA_IMGREADER_MAX_IMAGES_ACQUIRED)
+        RETURN_CASE(AMEDIA_IMGREADER_CANNOT_LOCK_IMAGE)
+        RETURN_CASE(AMEDIA_IMGREADER_CANNOT_UNLOCK_IMAGE)
+        RETURN_CASE(AMEDIA_IMGREADER_IMAGE_NOT_LOCKED)
+        RETURN_DEFAULT(AMEDIA_ERROR_UNKNOWN)
+    }
+}
+
+static const char *error_state_callback_string(int val)
+{
+    switch(val) {
+        RETURN_CASE(ERROR_CAMERA_IN_USE)
+        RETURN_CASE(ERROR_MAX_CAMERAS_IN_USE)
+        RETURN_CASE(ERROR_CAMERA_DISABLED)
+        RETURN_CASE(ERROR_CAMERA_DEVICE)
+        RETURN_CASE(ERROR_CAMERA_SERVICE)
+        default:
+            return "ERROR_CAMERA_UNKNOWN";
+    }
+}
+
+static void camera_dev_disconnected(void *context, ACameraDevice *device)
+{
+    AVFormatContext *avctx = context;
+    AndroidCameraCtx *ctx = avctx->priv_data;
+    atomic_store(&ctx->exit, 1);
+    av_log(avctx, AV_LOG_ERROR, "Camera with id %s disconnected.\n",
+           ACameraDevice_getId(device));
+}
+
+static void camera_dev_error(void *context, ACameraDevice *device, int error)
+{
+    AVFormatContext *avctx = context;
+    AndroidCameraCtx *ctx = avctx->priv_data;
+    atomic_store(&ctx->exit, 1);
+    av_log(avctx, AV_LOG_ERROR, "Error %s on camera with id %s.\n",
+           error_state_callback_string(error), ACameraDevice_getId(device));
+}
+
+static int open_camera(AVFormatContext *avctx)
+{
+    AndroidCameraCtx *ctx = avctx->priv_data;
+    camera_status_t ret;
+    ACameraIdList *camera_ids;
+
+    ret = ACameraManager_getCameraIdList(ctx->camera_mgr, &camera_ids);
+    if (ret != ACAMERA_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to get camera id list, error: %s.\n",
+               camera_status_string(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    if (ctx->camera_index < camera_ids->numCameras) {
+        ctx->camera_id = av_strdup(camera_ids->cameraIds[ctx->camera_index]);
+        if (!ctx->camera_id) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to allocate memory for camera_id.\n");
+            return AVERROR(ENOMEM);
+        }
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "No camera with index %d available.\n",
+               ctx->camera_index);
+        return AVERROR(ENXIO);
+    }
+
+    ACameraManager_deleteCameraIdList(camera_ids);
+
+    ret = ACameraManager_getCameraCharacteristics(ctx->camera_mgr,
+            ctx->camera_id, &ctx->camera_metadata);
+    if (ret != ACAMERA_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to get metadata for camera with id %s, error: %s.\n",
+               ctx->camera_id, camera_status_string(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    ctx->camera_state_callbacks.context = avctx;
+    ctx->camera_state_callbacks.onDisconnected = camera_dev_disconnected;
+    ctx->camera_state_callbacks.onError = camera_dev_error;
+
+    ret = ACameraManager_openCamera(ctx->camera_mgr, ctx->camera_id,
+                                    &ctx->camera_state_callbacks, &ctx->camera_dev);
+    if (ret != ACAMERA_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to open camera with id %s, error: %s.\n",
+               ctx->camera_id, camera_status_string(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    return 0;
+}
+
+static void get_sensor_orientation(AVFormatContext *avctx)
+{
+    AndroidCameraCtx *ctx = avctx->priv_data;
+    ACameraMetadata_const_entry lens_facing;
+    ACameraMetadata_const_entry sensor_orientation;
+
+    ACameraMetadata_getConstEntry(ctx->camera_metadata,
+                                  ACAMERA_LENS_FACING, &lens_facing);
+    ACameraMetadata_getConstEntry(ctx->camera_metadata,
+                                  ACAMERA_SENSOR_ORIENTATION, &sensor_orientation);
+
+    ctx->lens_facing = lens_facing.data.u8[0];
+    ctx->sensor_orientation = sensor_orientation.data.i32[0];
+}
+
+static void match_video_size(AVFormatContext *avctx)
+{
+    AndroidCameraCtx *ctx = avctx->priv_data;
+    ACameraMetadata_const_entry available_configs;
+    int found = 0;
+
+    ACameraMetadata_getConstEntry(ctx->camera_metadata,
+                                  ACAMERA_SCALER_AVAILABLE_STREAM_CONFIGURATIONS,
+                                  &available_configs);
+
+    for (int i = 0; i < available_configs.count; i++) {
+        int32_t input = available_configs.data.i32[i * 4 + 3];
+        int32_t format = available_configs.data.i32[i * 4 + 0];
+
+        if (input) {
+            continue;
+        }
+
+        if (format == IMAGE_FORMAT_ANDROID) {
+            int32_t width = available_configs.data.i32[i * 4 + 1];
+            int32_t height = available_configs.data.i32[i * 4 + 2];
+
+            //Same ratio
+            if ((ctx->requested_width == width && ctx->requested_height == height) ||
+                    (ctx->requested_width == height && ctx->requested_height == width)) {
+                ctx->width = width;
+                ctx->height = height;
+                found = 1;
+                break;
+            }
+        }
+    }
+
+    if (!found || ctx->width == 0 || ctx->height == 0) {
+        ctx->width = available_configs.data.i32[1];
+        ctx->height = available_configs.data.i32[2];
+
+        av_log(avctx, AV_LOG_WARNING,
+               "Requested video_size %dx%d not available, falling back to %dx%d\n",
+               ctx->requested_width, ctx->requested_height, ctx->width, ctx->height);
+    }
+
+    return;
+}
+
+static void match_framerate(AVFormatContext *avctx)
+{
+    AndroidCameraCtx *ctx = avctx->priv_data;
+    ACameraMetadata_const_entry available_framerates;
+    int found = 0;
+    int current_best_match = -1;
+    int requested_framerate = av_q2d(ctx->framerate);
+
+    ACameraMetadata_getConstEntry(ctx->camera_metadata,
+                                  ACAMERA_CONTROL_AE_AVAILABLE_TARGET_FPS_RANGES,
+                                  &available_framerates);
+
+    for (int i = 0; i < available_framerates.count; i++) {
+        int32_t min = available_framerates.data.i32[i * 2 + 0];
+        int32_t max = available_framerates.data.i32[i * 2 + 1];
+
+        if (requested_framerate == max) {
+            if (min == max) {
+                ctx->framerate_range[0] = min;
+                ctx->framerate_range[1] = max;
+                found = 1;
+                break;
+            } else if (current_best_match >= 0) {
+                int32_t current_best_match_min = available_framerates.data.i32[current_best_match * 2 + 0];
+                if (min > current_best_match_min) {
+                    current_best_match = i;
+                }
+            } else {
+                current_best_match = i;
+            }
+        }
+    }
+
+    if (!found) {
+        if (current_best_match >= 0) {
+            ctx->framerate_range[0] = available_framerates.data.i32[current_best_match * 2 + 0];
+            ctx->framerate_range[1] = available_framerates.data.i32[current_best_match * 2 + 1];
+
+        } else {
+            ctx->framerate_range[0] = available_framerates.data.i32[0];
+            ctx->framerate_range[1] = available_framerates.data.i32[1];
+        }
+
+        av_log(avctx, AV_LOG_WARNING,
+               "Requested framerate %d not available, falling back to min: %d and max: %d fps\n",
+               requested_framerate, ctx->framerate_range[0], ctx->framerate_range[1]);
+    }
+
+    return;
+}
+
+static int get_image_format(AVFormatContext *avctx, AImage *image)
+{
+    AndroidCameraCtx *ctx = avctx->priv_data;
+    int32_t image_pixelstrides[2];
+    uint8_t *image_plane_data[2];
+    int plane_data_length[2];
+
+    for (int i = 0; i < 2; i++) {
+        AImage_getPlanePixelStride(image, i + 1, &image_pixelstrides[i]);
+        AImage_getPlaneData(image, i + 1, &image_plane_data[i], &plane_data_length[i]);
+    }
+
+    if (image_pixelstrides[0] != image_pixelstrides[1]) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Pixel strides of U and V plane should have been the same.\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    switch (image_pixelstrides[0]) {
+        case 1:
+            ctx->image_format = AV_PIX_FMT_YUV420P;
+            break;
+        case 2:
+            if (image_plane_data[0] < image_plane_data[1]) {
+                ctx->image_format = AV_PIX_FMT_NV12;
+            } else {
+                ctx->image_format = AV_PIX_FMT_NV21;
+            }
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR,
+                   "Unknown pixel stride %d of U and V plane, cannot determine camera image format.\n",
+                   image_pixelstrides[0]);
+            return AVERROR(ENOSYS);
+    }
+
+    return 0;
+}
+
+static void image_available(void *context, AImageReader *reader)
+{
+    AVFormatContext *avctx = context;
+    AndroidCameraCtx *ctx = avctx->priv_data;
+    media_status_t media_status;
+    int ret = 0;
+
+    AImage *image;
+    int64_t image_timestamp;
+    int32_t image_linestrides[4];
+    uint8_t *image_plane_data[4];
+    int plane_data_length[4];
+
+    AVPacket pkt;
+    int pkt_buffer_size = 0;
+
+    media_status = AImageReader_acquireLatestImage(reader, &image);
+    if (media_status != AMEDIA_OK) {
+        if (media_status == AMEDIA_IMGREADER_NO_BUFFER_AVAILABLE) {
+            av_log(avctx, AV_LOG_WARNING,
+                   "An image reader frame was discarded");
+        } else {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Failed to acquire latest image from image reader, error: %s.\n",
+                   media_status_string(media_status));
+            ret = AVERROR_EXTERNAL;
+        }
+        goto error;
+    }
+
+    // Silently drop frames when exit is set
+    if (atomic_load(&ctx->exit)) {
+        goto error;
+    }
+
+    // Determine actual image format
+    if (!atomic_load(&ctx->got_image_format)) {
+        ret = get_image_format(avctx, image);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Could not get image format of camera.\n");
+            goto error;
+        } else {
+            atomic_store(&ctx->got_image_format, 1);
+        }
+    }
+
+    pkt_buffer_size = av_image_get_buffer_size(ctx->image_format, ctx->width, ctx->height, 32);
+    AImage_getTimestamp(image, &image_timestamp);
+
+    AImage_getPlaneRowStride(image, 0, &image_linestrides[0]);
+    AImage_getPlaneData(image, 0, &image_plane_data[0], &plane_data_length[0]);
+
+    switch (ctx->image_format) {
+        case AV_PIX_FMT_YUV420P:
+            AImage_getPlaneRowStride(image, 1, &image_linestrides[1]);
+            AImage_getPlaneData(image, 1, &image_plane_data[1], &plane_data_length[1]);
+            AImage_getPlaneRowStride(image, 2, &image_linestrides[2]);
+            AImage_getPlaneData(image, 2, &image_plane_data[2], &plane_data_length[2]);
+            break;
+        case AV_PIX_FMT_NV12:
+            AImage_getPlaneRowStride(image, 1, &image_linestrides[1]);
+            AImage_getPlaneData(image, 1, &image_plane_data[1], &plane_data_length[1]);
+            break;
+        case AV_PIX_FMT_NV21:
+            AImage_getPlaneRowStride(image, 2, &image_linestrides[1]);
+            AImage_getPlaneData(image, 2, &image_plane_data[1], &plane_data_length[1]);
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Unsupported camera image format.\n");
+            ret = AVERROR(ENOSYS);
+            goto error;
+    }
+
+    ret = av_new_packet(&pkt, pkt_buffer_size);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to create new av packet, error: %s.\n", av_err2str(ret));
+        goto error;
+    }
+
+    pkt.stream_index = VIDEO_STREAM_INDEX;
+    pkt.pts = image_timestamp;
+    av_image_copy_to_buffer(pkt.data, pkt_buffer_size,
+                            (const uint8_t * const *) image_plane_data,
+                            image_linestrides, ctx->image_format,
+                            ctx->width, ctx->height, 32);
+
+    ret = av_thread_message_queue_send(ctx->input_queue, &pkt, AV_THREAD_MESSAGE_NONBLOCK);
+
+error:
+    if (ret < 0) {
+        if (ret != AVERROR(EAGAIN)) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Error while processing new image, error: %s.\n", av_err2str(ret));
+            av_thread_message_queue_set_err_recv(ctx->input_queue, ret);
+            atomic_store(&ctx->exit, 1);
+        } else {
+            av_log(avctx, AV_LOG_WARNING,
+                   "Input queue was full, dropping frame, consider raising the input_queue_size option (current value: %d)\n",
+                   ctx->input_queue_size);
+        }
+        if (pkt_buffer_size) {
+            av_packet_unref(&pkt);
+        }
+    }
+
+    AImage_delete(image);
+
+    return;
+}
+
+static int create_image_reader(AVFormatContext *avctx)
+{
+    AndroidCameraCtx *ctx = avctx->priv_data;
+    media_status_t ret;
+
+    ret = AImageReader_new(ctx->width, ctx->height, IMAGE_FORMAT_ANDROID,
+                           MAX_BUF_COUNT, &ctx->image_reader);
+    if (ret != AMEDIA_OK) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to create image reader, error: %s.\n", media_status_string(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    ctx->image_listener.context = avctx;
+    ctx->image_listener.onImageAvailable = image_available;
+
+    ret = AImageReader_setImageListener(ctx->image_reader, &ctx->image_listener);
+    if (ret != AMEDIA_OK) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to set image listener on image reader, error: %s.\n",
+               media_status_string(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    ret = AImageReader_getWindow(ctx->image_reader, &ctx->image_reader_window);
+    if (ret != AMEDIA_OK) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Could not get image reader window, error: %s.\n",
+               media_status_string(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    return 0;
+}
+
+static void capture_session_closed(void *context, ACameraCaptureSession *session)
+{
+    av_log(context, AV_LOG_INFO, "Android camera capture session was closed.\n");
+}
+
+static void capture_session_ready(void *context, ACameraCaptureSession *session)
+{
+    av_log(context, AV_LOG_INFO, "Android camera capture session is ready.\n");
+}
+
+static void capture_session_active(void *context, ACameraCaptureSession *session)
+{
+    av_log(context, AV_LOG_INFO, "Android camera capture session is active.\n");
+}
+
+static int create_capture_session(AVFormatContext *avctx)
+{
+    AndroidCameraCtx *ctx = avctx->priv_data;
+    camera_status_t ret;
+
+    ret = ACaptureSessionOutputContainer_create(&ctx->capture_session_output_container);
+    if (ret != ACAMERA_OK) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to create capture session output container, error: %s.\n",
+               camera_status_string(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    ANativeWindow_acquire(ctx->image_reader_window);
+
+    ret = ACaptureSessionOutput_create(ctx->image_reader_window, &ctx->capture_session_output);
+    if (ret != ACAMERA_OK) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to create capture session container, error: %s.\n",
+               camera_status_string(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    ret = ACaptureSessionOutputContainer_add(ctx->capture_session_output_container,
+                                             ctx->capture_session_output);
+    if (ret != ACAMERA_OK) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to add output to output container, error: %s.\n",
+               camera_status_string(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    ret = ACameraOutputTarget_create(ctx->image_reader_window, &ctx->camera_output_target);
+    if (ret != ACAMERA_OK) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to create camera output target, error: %s.\n",
+               camera_status_string(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    ret = ACameraDevice_createCaptureRequest(ctx->camera_dev, TEMPLATE_RECORD, &ctx->capture_request);
+    if (ret != ACAMERA_OK) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to create capture request, error: %s.\n",
+               camera_status_string(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    ret = ACaptureRequest_setEntry_i32(ctx->capture_request, ACAMERA_CONTROL_AE_TARGET_FPS_RANGE,
+                                       2, ctx->framerate_range);
+    if (ret != ACAMERA_OK) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to set target fps range in capture request, error: %s.\n",
+               camera_status_string(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    ret = ACaptureRequest_addTarget(ctx->capture_request, ctx->camera_output_target);
+    if (ret != ACAMERA_OK) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to add capture request capture request, error: %s.\n",
+               camera_status_string(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    ctx->capture_session_state_callbacks.context = avctx;
+    ctx->capture_session_state_callbacks.onClosed = capture_session_closed;
+    ctx->capture_session_state_callbacks.onReady = capture_session_ready;
+    ctx->capture_session_state_callbacks.onActive = capture_session_active;
+
+    ret = ACameraDevice_createCaptureSession(ctx->camera_dev, ctx->capture_session_output_container,
+                                             &ctx->capture_session_state_callbacks, &ctx->capture_session);
+    if (ret != ACAMERA_OK) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to create capture session, error: %s.\n",
+               camera_status_string(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    ret = ACameraCaptureSession_setRepeatingRequest(ctx->capture_session, NULL, 1, &ctx->capture_request, NULL);
+    if (ret != ACAMERA_OK) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to set repeating request on capture session, error: %s.\n",
+               camera_status_string(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    return 0;
+}
+
+static int wait_for_image_format(AVFormatContext *avctx)
+{
+    AndroidCameraCtx *ctx = avctx->priv_data;
+
+    while (!atomic_load(&ctx->got_image_format) && !atomic_load(&ctx->exit)) {
+        //Wait until first frame arrived and actual image format was determined
+        usleep(1000);
+    }
+
+    return atomic_load(&ctx->got_image_format);
+}
+
+static int add_display_matrix(AVFormatContext *avctx, AVStream *st)
+{
+    AndroidCameraCtx *ctx = avctx->priv_data;
+    uint8_t *side_data;
+    int32_t display_matrix[9];
+
+    av_display_rotation_set(display_matrix, ctx->sensor_orientation);
+
+    if (ctx->lens_facing == ACAMERA_LENS_FACING_FRONT) {
+        av_display_matrix_flip(display_matrix, 1, 0);
+    }
+
+    side_data = av_stream_new_side_data(st,
+            AV_PKT_DATA_DISPLAYMATRIX, sizeof(display_matrix));
+
+    if (!side_data) {
+        return AVERROR(ENOMEM);
+    }
+
+    memcpy(side_data, display_matrix, sizeof(display_matrix));
+
+    return 0;
+}
+
+static int add_video_stream(AVFormatContext *avctx)
+{
+    AndroidCameraCtx *ctx = avctx->priv_data;
+    AVStream *st;
+    AVCodecParameters *codecpar;
+
+    st = avformat_new_stream(avctx, NULL);
+    if (!st) {
+        return AVERROR(ENOMEM);
+    }
+
+    st->id = VIDEO_STREAM_INDEX;
+    st->avg_frame_rate = (AVRational) { ctx->framerate_range[1], 1 };
+    st->r_frame_rate = (AVRational) { ctx->framerate_range[1], 1 };
+
+    if (!wait_for_image_format(avctx)) {
+        return AVERROR_EXTERNAL;
+    }
+
+    codecpar = st->codecpar;
+    codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
+    codecpar->codec_id = AV_CODEC_ID_RAWVIDEO;
+    codecpar->format = ctx->image_format;
+    codecpar->width = ctx->width;
+    codecpar->height = ctx->height;
+
+    avpriv_set_pts_info(st, 64, 1, VIDEO_TIMEBASE_ANDROID);
+
+    return add_display_matrix(avctx, st);
+}
+
+static int android_camera_read_close(AVFormatContext *avctx)
+{
+    AndroidCameraCtx *ctx = avctx->priv_data;
+
+    atomic_store(&ctx->exit, 1);
+
+    if (ctx->capture_session) {
+        ACameraCaptureSession_stopRepeating(ctx->capture_session);
+        // Following warning is emitted, after capture session closed callback is received:
+        // ACameraCaptureSession: Device is closed but session 0 is not notified
+        // Seems to be a bug in Android, we can ignore this
+        ACameraCaptureSession_close(ctx->capture_session);
+        ctx->capture_session = NULL;
+    }
+
+    if (ctx->capture_request) {
+        ACaptureRequest_removeTarget(ctx->capture_request, ctx->camera_output_target);
+        ACaptureRequest_free(ctx->capture_request);
+        ctx->capture_request = NULL;
+    }
+
+    if (ctx->camera_output_target) {
+        ACameraOutputTarget_free(ctx->camera_output_target);
+        ctx->camera_output_target = NULL;
+    }
+
+    if (ctx->capture_session_output) {
+        ACaptureSessionOutputContainer_remove(ctx->capture_session_output_container,
+                ctx->capture_session_output);
+        ACaptureSessionOutput_free(ctx->capture_session_output);
+        ctx->capture_session_output = NULL;
+    }
+
+    if (ctx->image_reader_window) {
+        ANativeWindow_release(ctx->image_reader_window);
+        ctx->image_reader_window = NULL;
+    }
+
+    if (ctx->capture_session_output_container) {
+        ACaptureSessionOutputContainer_free(ctx->capture_session_output_container);
+        ctx->capture_session_output_container = NULL;
+    }
+
+    if (ctx->camera_dev) {
+        ACameraDevice_close(ctx->camera_dev);
+        ctx->camera_dev = NULL;
+    }
+
+    if (ctx->image_reader) {
+        AImageReader_delete(ctx->image_reader);
+        ctx->image_reader = NULL;
+    }
+
+    if (ctx->camera_metadata) {
+        ACameraMetadata_free(ctx->camera_metadata);
+        ctx->camera_metadata = NULL;
+    }
+
+    av_freep(&ctx->camera_id);
+
+    if (ctx->camera_mgr) {
+        ACameraManager_delete(ctx->camera_mgr);
+        ctx->camera_mgr = NULL;
+    }
+
+    if (ctx->input_queue) {
+        AVPacket pkt;
+        av_thread_message_queue_set_err_send(ctx->input_queue, AVERROR_EOF);
+        while (av_thread_message_queue_recv(ctx->input_queue, &pkt, AV_THREAD_MESSAGE_NONBLOCK) >= 0) {
+            av_packet_unref(&pkt);
+        }
+        av_thread_message_queue_free(&ctx->input_queue);
+    }
+
+    return 0;
+}
+
+static int android_camera_read_header(AVFormatContext *avctx)
+{
+    AndroidCameraCtx *ctx = avctx->priv_data;
+    int ret;
+
+    atomic_init(&ctx->got_image_format, 0);
+    atomic_init(&ctx->exit, 0);
+
+    ret = av_thread_message_queue_alloc(&ctx->input_queue, ctx->input_queue_size, sizeof(AVPacket));
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to allocate input queue, error: %s.\n", av_err2str(ret));
+        goto error;
+    }
+
+    ctx->camera_mgr = ACameraManager_create();
+    if (!ctx->camera_mgr) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create Android camera manager.\n");
+        ret = AVERROR_EXTERNAL;
+        goto error;
+    }
+
+    ret = open_camera(avctx);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to open camera.\n");
+        goto error;
+    }
+
+    get_sensor_orientation(avctx);
+    match_video_size(avctx);
+    match_framerate(avctx);
+
+    ret = create_image_reader(avctx);
+    if (ret < 0) {
+        goto error;
+    }
+
+    ret = create_capture_session(avctx);
+    if (ret < 0) {
+        goto error;
+    }
+
+    ret = add_video_stream(avctx);
+
+error:
+    if (ret < 0) {
+        android_camera_read_close(avctx);
+        av_log(avctx, AV_LOG_ERROR, "Failed to open android_camera.\n");
+    }
+
+    return ret;
+}
+
+static int android_camera_read_packet(AVFormatContext *avctx, AVPacket *pkt)
+{
+    AndroidCameraCtx *ctx = avctx->priv_data;
+    int ret;
+
+    if (!atomic_load(&ctx->exit)) {
+        ret = av_thread_message_queue_recv(ctx->input_queue, pkt,
+                avctx->flags & AVFMT_FLAG_NONBLOCK ? AV_THREAD_MESSAGE_NONBLOCK : 0);
+    } else {
+        ret = AVERROR_EOF;
+    }
+
+    if (ret < 0) {
+        return ret;
+    } else {
+        return pkt->size;
+    }
+}
+
+#define OFFSET(x) offsetof(AndroidCameraCtx, x)
+#define DEC AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+    { "video_size", "set video size given as a string such as 640x480 or hd720", OFFSET(requested_width), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, DEC },
+    { "framerate", "set video frame rate", OFFSET(framerate), AV_OPT_TYPE_VIDEO_RATE, {.str = "30"}, 0, INT_MAX, DEC },
+    { "camera_index", "set index of camera to use", OFFSET(camera_index), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, DEC },
+    { "input_queue_size", "set maximum number of frames to buffer", OFFSET(input_queue_size), AV_OPT_TYPE_INT, {.i64 = 5}, 0, INT_MAX, DEC },
+    { NULL },
+};
+
+static const AVClass android_camera_class = {
+    .class_name = "android_camera indev",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+    .category   = AV_CLASS_CATEGORY_DEVICE_VIDEO_INPUT,
+};
+
+AVInputFormat ff_android_camera_demuxer = {
+    .name           = "android_camera",
+    .long_name      = NULL_IF_CONFIG_SMALL("Android camera input device"),
+    .priv_data_size = sizeof(AndroidCameraCtx),
+    .read_header    = android_camera_read_header,
+    .read_packet    = android_camera_read_packet,
+    .read_close     = android_camera_read_close,
+    .flags          = AVFMT_NOFILE,
+    .priv_class     = &android_camera_class,
+};

diff --git a/libavdevice/avdevice.c b/libavdevice/avdevice.c
index 01c4692..72e1b67 100644
--- a/libavdevice/avdevice.c
+++ b/libavdevice/avdevice.c

@@ -135,9 +135,9 @@
 int avdevice_dev_to_app_control_message(struct AVFormatContext *s, enum AVDevToAppMessageType type,
                                         void *data, size_t data_size)
 {
-    if (!av_format_get_control_message_cb(s))
+    if (!s->control_message_cb)
         return AVERROR(ENOSYS);
-    return av_format_get_control_message_cb(s)(s, type, data, data_size);
+    return s->control_message_cb(s, type, data, data_size);
 }
 
 int avdevice_capabilities_create(AVDeviceCapabilitiesQuery **caps, AVFormatContext *s,

diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
index e2ddf47..98552ac 100644
--- a/libavdevice/avfoundation.m
+++ b/libavdevice/avfoundation.m

@@ -35,6 +35,7 @@
 #include "libavutil/internal.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/time.h"
+#include "libavutil/imgutils.h"
 #include "avdevice.h"
 
 static const int avf_time_base = 1000000;
@@ -259,7 +260,7 @@
 static void parse_device_name(AVFormatContext *s)
 {
     AVFContext *ctx = (AVFContext*)s->priv_data;
-    char *tmp = av_strdup(s->filename);
+    char *tmp = av_strdup(s->url);
     char *save;
 
     if (tmp[0] != ':') {
@@ -892,6 +893,49 @@
     return AVERROR(EIO);
 }
 
+static int copy_cvpixelbuffer(AVFormatContext *s,
+                               CVPixelBufferRef image_buffer,
+                               AVPacket *pkt)
+{
+    AVFContext *ctx = s->priv_data;
+    int src_linesize[4];
+    const uint8_t *src_data[4];
+    int width  = CVPixelBufferGetWidth(image_buffer);
+    int height = CVPixelBufferGetHeight(image_buffer);
+    int status;
+
+    memset(src_linesize, 0, sizeof(src_linesize));
+    memset(src_data, 0, sizeof(src_data));
+
+    status = CVPixelBufferLockBaseAddress(image_buffer, 0);
+    if (status != kCVReturnSuccess) {
+        av_log(s, AV_LOG_ERROR, "Could not lock base address: %d\n", status);
+        return AVERROR_EXTERNAL;
+    }
+
+    if (CVPixelBufferIsPlanar(image_buffer)) {
+        size_t plane_count = CVPixelBufferGetPlaneCount(image_buffer);
+        int i;
+        for(i = 0; i < plane_count; i++){
+            src_linesize[i] = CVPixelBufferGetBytesPerRowOfPlane(image_buffer, i);
+            src_data[i] = CVPixelBufferGetBaseAddressOfPlane(image_buffer, i);
+        }
+    } else {
+        src_linesize[0] = CVPixelBufferGetBytesPerRow(image_buffer);
+        src_data[0] = CVPixelBufferGetBaseAddress(image_buffer);
+    }
+
+    status = av_image_copy_to_buffer(pkt->data, pkt->size,
+                                     src_data, src_linesize,
+                                     ctx->pixel_format, width, height, 1);
+
+
+
+    CVPixelBufferUnlockBaseAddress(image_buffer, 0);
+
+    return status;
+}
+
 static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     AVFContext* ctx = (AVFContext*)s->priv_data;
@@ -903,7 +947,7 @@
         image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame);
 
         if (ctx->current_frame != nil) {
-            void *data;
+            int status;
             if (av_new_packet(pkt, (int)CVPixelBufferGetDataSize(image_buffer)) < 0) {
                 return AVERROR(EIO);
             }
@@ -919,14 +963,12 @@
             pkt->stream_index  = ctx->video_stream_index;
             pkt->flags        |= AV_PKT_FLAG_KEY;
 
-            CVPixelBufferLockBaseAddress(image_buffer, 0);
-
-            data = CVPixelBufferGetBaseAddress(image_buffer);
-            memcpy(pkt->data, data, pkt->size);
-
-            CVPixelBufferUnlockBaseAddress(image_buffer, 0);
+            status = copy_cvpixelbuffer(s, image_buffer, pkt);
             CFRelease(ctx->current_frame);
             ctx->current_frame = nil;
+
+            if (status < 0)
+                return status;
         } else if (ctx->current_audio_frame != nil) {
             CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
             int block_buffer_size         = CMBlockBufferGetDataLength(block_buffer);

diff --git a/libavdevice/bktr.c b/libavdevice/bktr.c
index 418247d..993cc19 100644
--- a/libavdevice/bktr.c
+++ b/libavdevice/bktr.c

@@ -294,7 +294,7 @@
     st->codecpar->height = s->height;
     st->avg_frame_rate = framerate;
 
-    if (bktr_init(s1->filename, s->width, s->height, s->standard,
+    if (bktr_init(s1->url, s->width, s->height, s->standard,
                   &s->video_fd, &s->tuner_fd, -1, 0.0) < 0) {
         ret = AVERROR(EIO);
         goto out;

diff --git a/libavdevice/caca.c b/libavdevice/caca.c
index 93cc0ff..47de824 100644
--- a/libavdevice/caca.c
+++ b/libavdevice/caca.c

@@ -178,7 +178,7 @@
     }
 
     if (!c->window_title)
-        c->window_title = av_strdup(s->filename);
+        c->window_title = av_strdup(s->url);
     caca_set_display_title(c->display, c->window_title);
     caca_set_display_time(c->display, av_rescale_q(1, st->codec->time_base, AV_TIME_BASE_Q));
 

diff --git a/libavdevice/decklink_common.cpp b/libavdevice/decklink_common.cpp
index 2bd63ac..b88d6c6 100644
--- a/libavdevice/decklink_common.cpp
+++ b/libavdevice/decklink_common.cpp

@@ -29,7 +29,18 @@
 #ifdef _WIN32
 #include <DeckLinkAPI_i.c>
 #else
+/* The file provided by the SDK is known to be missing prototypes, which doesn't
+   cause issues with GCC since the warning doesn't apply to C++ files.  However
+   Clang does complain (and warnings are treated as errors), so suppress the
+   warning just for this one file */
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#endif
 #include <DeckLinkAPIDispatch.cpp>
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
 #endif
 
 extern "C" {
@@ -42,68 +53,49 @@
 
 #include "decklink_common.h"
 
-#ifdef _WIN32
-IDeckLinkIterator *CreateDeckLinkIteratorInstance(void)
+static IDeckLinkIterator *decklink_create_iterator(AVFormatContext *avctx)
 {
     IDeckLinkIterator *iter;
 
+#ifdef _WIN32
     if (CoInitialize(NULL) < 0) {
-        av_log(NULL, AV_LOG_ERROR, "COM initialization failed.\n");
+        av_log(avctx, AV_LOG_ERROR, "COM initialization failed.\n");
         return NULL;
     }
 
     if (CoCreateInstance(CLSID_CDeckLinkIterator, NULL, CLSCTX_ALL,
                          IID_IDeckLinkIterator, (void**) &iter) != S_OK) {
-        av_log(NULL, AV_LOG_ERROR, "DeckLink drivers not installed.\n");
-        return NULL;
+        iter = NULL;
     }
+#else
+    iter = CreateDeckLinkIteratorInstance();
+#endif
+    if (!iter)
+        av_log(avctx, AV_LOG_ERROR, "Could not create DeckLink iterator. "
+                                    "Make sure you have DeckLink drivers " BLACKMAGIC_DECKLINK_API_VERSION_STRING " or newer installed.\n");
 
     return iter;
 }
-#endif
 
-#ifdef _WIN32
-static char *dup_wchar_to_utf8(wchar_t *w)
+int decklink_get_attr_string(IDeckLink *dl, BMDDeckLinkAttributeID cfg_id, const char **s)
 {
-    char *s = NULL;
-    int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
-    s = (char *) av_malloc(l);
-    if (s)
-        WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
-    return s;
-}
-#define DECKLINK_STR    OLECHAR *
-#define DECKLINK_STRDUP dup_wchar_to_utf8
-#define DECKLINK_FREE(s) SysFreeString(s)
-#define DECKLINK_BOOL BOOL
-#elif defined(__APPLE__)
-static char *dup_cfstring_to_utf8(CFStringRef w)
-{
-    char s[256];
-    CFStringGetCString(w, s, 255, kCFStringEncodingUTF8);
-    return av_strdup(s);
-}
-#define DECKLINK_STR    const __CFString *
-#define DECKLINK_STRDUP dup_cfstring_to_utf8
-#define DECKLINK_FREE(s) CFRelease(s)
-#define DECKLINK_BOOL bool
-#else
-#define DECKLINK_STR    const char *
-#define DECKLINK_STRDUP av_strdup
-/* free() is needed for a string returned by the DeckLink SDL. */
-#define DECKLINK_FREE(s) free((void *) s)
-#define DECKLINK_BOOL bool
-#endif
-
-HRESULT ff_decklink_get_display_name(IDeckLink *This, const char **displayName)
-{
-    DECKLINK_STR tmpDisplayName;
-    HRESULT hr = This->GetDisplayName(&tmpDisplayName);
-    if (hr != S_OK)
-        return hr;
-    *displayName = DECKLINK_STRDUP(tmpDisplayName);
-    DECKLINK_FREE(tmpDisplayName);
-    return hr;
+    DECKLINK_STR tmp;
+    HRESULT hr;
+    IDeckLinkAttributes *attr;
+    *s = NULL;
+    if (dl->QueryInterface(IID_IDeckLinkAttributes, (void **)&attr) != S_OK)
+        return AVERROR_EXTERNAL;
+    hr = attr->GetString(cfg_id, &tmp);
+    attr->Release();
+    if (hr == S_OK) {
+        *s = DECKLINK_STRDUP(tmp);
+        DECKLINK_FREE(tmp);
+        if (!*s)
+            return AVERROR(ENOMEM);
+    } else if (hr == E_FAIL) {
+        return AVERROR_EXTERNAL;
+    }
+    return 0;
 }
 
 static int decklink_select_input(AVFormatContext *avctx, BMDDeckLinkConfigurationID cfg_id)
@@ -148,23 +140,12 @@
     return false;
 }
 
-int ff_decklink_set_format(AVFormatContext *avctx,
-                               int width, int height,
-                               int tb_num, int tb_den,
-                               enum AVFieldOrder field_order,
-                               decklink_direction_t direction, int num)
-{
+int ff_decklink_set_configs(AVFormatContext *avctx,
+                            decklink_direction_t direction) {
     struct decklink_cctx *cctx = (struct decklink_cctx *)avctx->priv_data;
     struct decklink_ctx *ctx = (struct decklink_ctx *)cctx->ctx;
-    BMDDisplayModeSupport support;
-    IDeckLinkDisplayModeIterator *itermode;
-    IDeckLinkDisplayMode *mode;
-    int i = 1;
     HRESULT res;
 
-    av_log(avctx, AV_LOG_DEBUG, "Trying to find mode for frame size %dx%d, frame timing %d/%d, field order %d, direction %d, mode number %d, format code %s\n",
-        width, height, tb_num, tb_den, field_order, direction, num, (cctx->format_code) ? cctx->format_code : "(unset)");
-
     if (ctx->duplex_mode) {
         DECKLINK_BOOL duplex_supported = false;
 
@@ -181,7 +162,6 @@
             av_log(avctx, AV_LOG_WARNING, "Unable to set duplex mode, because it is not supported.\n");
         }
     }
-
     if (direction == DIRECTION_IN) {
         int ret;
         ret = decklink_select_input(avctx, bmdDeckLinkConfigAudioInputConnection);
@@ -190,6 +170,28 @@
         ret = decklink_select_input(avctx, bmdDeckLinkConfigVideoInputConnection);
         if (ret < 0)
             return ret;
+    }
+    return 0;
+}
+
+int ff_decklink_set_format(AVFormatContext *avctx,
+                               int width, int height,
+                               int tb_num, int tb_den,
+                               enum AVFieldOrder field_order,
+                               decklink_direction_t direction, int num)
+{
+    struct decklink_cctx *cctx = (struct decklink_cctx *)avctx->priv_data;
+    struct decklink_ctx *ctx = (struct decklink_ctx *)cctx->ctx;
+    BMDDisplayModeSupport support;
+    IDeckLinkDisplayModeIterator *itermode;
+    IDeckLinkDisplayMode *mode;
+    int i = 1;
+    HRESULT res;
+
+    av_log(avctx, AV_LOG_DEBUG, "Trying to find mode for frame size %dx%d, frame timing %d/%d, field order %d, direction %d, mode number %d, format code %s\n",
+        width, height, tb_num, tb_den, field_order, direction, num, (cctx->format_code) ? cctx->format_code : "(unset)");
+
+    if (direction == DIRECTION_IN) {
         res = ctx->dli->GetDisplayModeIterator (&itermode);
     } else {
         res = ctx->dlo->GetDisplayModeIterator (&itermode);
@@ -247,10 +249,18 @@
                                            &support, NULL) != S_OK)
             return -1;
     } else {
-        if (ctx->dlo->DoesSupportVideoMode(ctx->bmd_mode, bmdFormat8BitYUV,
-                                           bmdVideoOutputFlagDefault,
-                                           &support, NULL) != S_OK)
-        return -1;
+        if (!ctx->supports_vanc || ctx->dlo->DoesSupportVideoMode(ctx->bmd_mode, ctx->raw_format,
+                                                                  bmdVideoOutputVANC,
+                                                                  &support, NULL) != S_OK) {
+            /* Try without VANC enabled */
+            if (ctx->dlo->DoesSupportVideoMode(ctx->bmd_mode, ctx->raw_format,
+                                               bmdVideoOutputFlagDefault,
+                                               &support, NULL) != S_OK) {
+                return -1;
+            }
+            ctx->supports_vanc = 0;
+        }
+
     }
     if (support == bmdDisplayModeSupported)
         return 0;
@@ -267,22 +277,26 @@
                              int show_inputs, int show_outputs)
 {
     IDeckLink *dl = NULL;
-    IDeckLinkIterator *iter = CreateDeckLinkIteratorInstance();
+    IDeckLinkIterator *iter = decklink_create_iterator(avctx);
     int ret = 0;
 
-    if (!iter) {
-        av_log(avctx, AV_LOG_ERROR, "Could not create DeckLink iterator\n");
+    if (!iter)
         return AVERROR(EIO);
-    }
 
     while (ret == 0 && iter->Next(&dl) == S_OK) {
         IDeckLinkOutput *output_config;
         IDeckLinkInput *input_config;
-        const char *displayName;
+        const char *display_name = NULL;
+        const char *unique_name = NULL;
         AVDeviceInfo *new_device = NULL;
         int add = 0;
 
-        ff_decklink_get_display_name(dl, &displayName);
+        ret = decklink_get_attr_string(dl, BMDDeckLinkDisplayName, &display_name);
+        if (ret < 0)
+            goto next;
+        ret = decklink_get_attr_string(dl, BMDDeckLinkDeviceHandle, &unique_name);
+        if (ret < 0)
+            goto next;
 
         if (show_outputs) {
             if (dl->QueryInterface(IID_IDeckLinkOutput, (void **)&output_config) == S_OK) {
@@ -304,21 +318,14 @@
                 ret = AVERROR(ENOMEM);
                 goto next;
             }
-            new_device->device_name = av_strdup(displayName);
-            if (!new_device->device_name) {
-                ret = AVERROR(ENOMEM);
-                goto next;
-            }
 
-            new_device->device_description = av_strdup(displayName);
-            if (!new_device->device_description) {
-                av_freep(&new_device->device_name);
-                ret = AVERROR(ENOMEM);
-                goto next;
-            }
+            new_device->device_name = av_strdup(unique_name ? unique_name : display_name);
+            new_device->device_description = av_strdup(display_name);
 
-            if ((ret = av_dynarray_add_nofree(&device_list->devices,
-                                              &device_list->nb_devices, new_device)) < 0) {
+            if (!new_device->device_name ||
+                !new_device->device_description ||
+                av_dynarray_add_nofree(&device_list->devices, &device_list->nb_devices, new_device) < 0) {
+                ret = AVERROR(ENOMEM);
                 av_freep(&new_device->device_name);
                 av_freep(&new_device->device_description);
                 av_freep(&new_device);
@@ -327,7 +334,8 @@
         }
 
     next:
-        av_freep(&displayName);
+        av_freep(&display_name);
+        av_freep(&unique_name);
         dl->Release();
     }
     iter->Release();
@@ -352,7 +360,7 @@
         av_log(avctx, AV_LOG_INFO, "Blackmagic DeckLink %s devices:\n",
                show_inputs ? "input" : "output");
         for (int i = 0; i < device_list->nb_devices; i++) {
-            av_log(avctx, AV_LOG_INFO, "\t'%s'\n", device_list->devices[i]->device_name);
+            av_log(avctx, AV_LOG_INFO, "\t'%s'\n", device_list->devices[i]->device_description);
         }
     }
     avdevice_free_list_devices(&device_list);
@@ -386,7 +394,7 @@
     }
 
     av_log(avctx, AV_LOG_INFO, "Supported formats for '%s':\n\tformat_code\tdescription",
-               avctx->filename);
+               avctx->url);
     while (itermode->Next(&mode) == S_OK) {
         BMDTimeValue tb_num, tb_den;
         mode->GetFrameRate(&tb_num, &tb_den);
@@ -431,21 +439,23 @@
     struct decklink_cctx *cctx = (struct decklink_cctx *)avctx->priv_data;
     struct decklink_ctx *ctx = (struct decklink_ctx *)cctx->ctx;
     IDeckLink *dl = NULL;
-    IDeckLinkIterator *iter = CreateDeckLinkIteratorInstance();
-    if (!iter) {
-        av_log(avctx, AV_LOG_ERROR, "Could not create DeckLink iterator\n");
+    IDeckLinkIterator *iter = decklink_create_iterator(avctx);
+    if (!iter)
         return AVERROR_EXTERNAL;
-    }
 
     while (iter->Next(&dl) == S_OK) {
-        const char *displayName;
-        ff_decklink_get_display_name(dl, &displayName);
-        if (!strcmp(name, displayName)) {
-            av_free((void *)displayName);
+        const char *display_name = NULL;
+        const char *unique_name = NULL;
+        decklink_get_attr_string(dl, BMDDeckLinkDisplayName, &display_name);
+        decklink_get_attr_string(dl, BMDDeckLinkDeviceHandle, &unique_name);
+        if (display_name && !strcmp(name, display_name) || unique_name && !strcmp(name, unique_name)) {
+            av_free((void *)unique_name);
+            av_free((void *)display_name);
             ctx->dl = dl;
             break;
         }
-        av_free((void *)displayName);
+        av_free((void *)display_name);
+        av_free((void *)unique_name);
         dl->Release();
     }
     iter->Release();

diff --git a/libavdevice/decklink_common.h b/libavdevice/decklink_common.h
index 6b2525f..d2fc3f7 100644
--- a/libavdevice/decklink_common.h
+++ b/libavdevice/decklink_common.h

@@ -27,6 +27,45 @@
 
 #include "libavutil/thread.h"
 #include "decklink_common_c.h"
+#if CONFIG_LIBKLVANC
+#include "libklvanc/vanc.h"
+#endif
+
+#ifdef _WIN32
+#define DECKLINK_BOOL BOOL
+#else
+#define DECKLINK_BOOL bool
+#endif
+
+#ifdef _WIN32
+static char *dup_wchar_to_utf8(wchar_t *w)
+{
+    char *s = NULL;
+    int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
+    s = (char *) av_malloc(l);
+    if (s)
+        WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
+    return s;
+}
+#define DECKLINK_STR    OLECHAR *
+#define DECKLINK_STRDUP dup_wchar_to_utf8
+#define DECKLINK_FREE(s) SysFreeString(s)
+#elif defined(__APPLE__)
+static char *dup_cfstring_to_utf8(CFStringRef w)
+{
+    char s[256];
+    CFStringGetCString(w, s, 255, kCFStringEncodingUTF8);
+    return av_strdup(s);
+}
+#define DECKLINK_STR    const __CFString *
+#define DECKLINK_STRDUP dup_cfstring_to_utf8
+#define DECKLINK_FREE(s) CFRelease(s)
+#else
+#define DECKLINK_STR    const char *
+#define DECKLINK_STRDUP av_strdup
+/* free() is needed for a string returned by the DeckLink SDL. */
+#define DECKLINK_FREE(s) free((void *) s)
+#endif
 
 class decklink_output_callback;
 class decklink_input_callback;
@@ -50,7 +89,6 @@
     IDeckLinkConfiguration *cfg;
     IDeckLinkAttributes *attr;
     decklink_output_callback *output_callback;
-    decklink_input_callback *input_callback;
 
     /* DeckLink mode information */
     BMDTimeValue bmd_tb_den;
@@ -58,9 +96,11 @@
     BMDDisplayMode bmd_mode;
     BMDVideoConnection video_input;
     BMDAudioConnection audio_input;
+    BMDTimecodeFormat tc_format;
     int bmd_width;
     int bmd_height;
     int bmd_field_dominance;
+    int supports_vanc;
 
     /* Capture buffer queue */
     AVPacketQueue queue;
@@ -78,6 +118,7 @@
     AVStream *audio_st;
     AVStream *video_st;
     AVStream *teletext_st;
+    uint16_t cdp_sequence_num;
 
     /* Options */
     int list_devices;
@@ -88,6 +129,7 @@
     DecklinkPtsSource audio_pts_source;
     DecklinkPtsSource video_pts_source;
     int draw_bars;
+    BMDPixelFormat raw_format;
 
     int frames_preroll;
     int frames_buffer;
@@ -95,8 +137,14 @@
     pthread_mutex_t mutex;
     pthread_cond_t cond;
     int frames_buffer_available_spots;
+    int autodetect;
+
+#if CONFIG_LIBKLVANC
+    struct klvanc_context_s *vanc_ctx;
+#endif
 
     int channels;
+    int audio_depth;
 };
 
 typedef enum { DIRECTION_IN, DIRECTION_OUT} decklink_direction_t;
@@ -132,7 +180,18 @@
     bmdVideoConnectionSVideo,
 };
 
-HRESULT ff_decklink_get_display_name(IDeckLink *This, const char **displayName);
+static const BMDTimecodeFormat decklink_timecode_format_map[] = {
+    (BMDTimecodeFormat)0,
+    bmdTimecodeRP188VITC1,
+    bmdTimecodeRP188VITC2,
+    bmdTimecodeRP188LTC,
+    bmdTimecodeRP188Any,
+    bmdTimecodeVITC,
+    bmdTimecodeVITCField2,
+    bmdTimecodeSerial,
+};
+
+int ff_decklink_set_configs(AVFormatContext *avctx, decklink_direction_t direction);
 int ff_decklink_set_format(AVFormatContext *avctx, int width, int height, int tb_num, int tb_den, enum AVFieldOrder field_order, decklink_direction_t direction = DIRECTION_OUT, int num = 0);
 int ff_decklink_set_format(AVFormatContext *avctx, decklink_direction_t direction, int num);
 int ff_decklink_list_devices(AVFormatContext *avctx, struct AVDeviceInfoList *device_list, int show_inputs, int show_outputs);

diff --git a/libavdevice/decklink_common_c.h b/libavdevice/decklink_common_c.h
index 5616ab3..8e3bbeb 100644
--- a/libavdevice/decklink_common_c.h
+++ b/libavdevice/decklink_common_c.h

@@ -28,6 +28,8 @@
     PTS_SRC_VIDEO     = 2,
     PTS_SRC_REFERENCE = 3,
     PTS_SRC_WALLCLOCK = 4,
+    PTS_SRC_ABS_WALLCLOCK = 5,
+    PTS_SRC_NB
 } DecklinkPtsSource;
 
 struct decklink_cctx {
@@ -42,15 +44,19 @@
     double preroll;
     int v210;
     int audio_channels;
+    int audio_depth;
     int duplex_mode;
     DecklinkPtsSource audio_pts_source;
     DecklinkPtsSource video_pts_source;
     int audio_input;
     int video_input;
+    int tc_format;
     int draw_bars;
     char *format_code;
     int raw_format;
     int64_t queue_size;
+    int copyts;
+    int64_t timestamp_align;
 };
 
 #endif /* AVDEVICE_DECKLINK_COMMON_C_H */

diff --git a/libavdevice/decklink_dec.cpp b/libavdevice/decklink_dec.cpp
index d9ac01a..deb8f78 100644
--- a/libavdevice/decklink_dec.cpp
+++ b/libavdevice/decklink_dec.cpp

@@ -21,6 +21,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <atomic>
+using std::atomic;
+
 /* Include internal.h first to avoid conflict between winsock.h (used by
  * DeckLink headers) and winsock2.h (used by libavformat) in MSVC++ builds */
 extern "C" {
@@ -36,6 +39,7 @@
 #include "libavutil/avutil.h"
 #include "libavutil/common.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/intreadwrite.h"
 #include "libavutil/time.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/reverse.h"
@@ -49,6 +53,7 @@
 #include "decklink_dec.h"
 
 #define MAX_WIDTH_VANC 1920
+const BMDDisplayMode AUTODETECT_DEFAULT_MODE = bmdModeNTSC;
 
 typedef struct VANCLineNumber {
     BMDDisplayMode mode;
@@ -96,6 +101,52 @@
     {bmdModeUnknown, 0, -1, -1, -1}
 };
 
+class decklink_allocator : public IDeckLinkMemoryAllocator
+{
+public:
+        decklink_allocator(): _refs(1) { }
+        virtual ~decklink_allocator() { }
+
+        // IDeckLinkMemoryAllocator methods
+        virtual HRESULT STDMETHODCALLTYPE AllocateBuffer(unsigned int bufferSize, void* *allocatedBuffer)
+        {
+            void *buf = av_malloc(bufferSize + AV_INPUT_BUFFER_PADDING_SIZE);
+            if (!buf)
+                return E_OUTOFMEMORY;
+            *allocatedBuffer = buf;
+            return S_OK;
+        }
+        virtual HRESULT STDMETHODCALLTYPE ReleaseBuffer(void* buffer)
+        {
+            av_free(buffer);
+            return S_OK;
+        }
+        virtual HRESULT STDMETHODCALLTYPE Commit() { return S_OK; }
+        virtual HRESULT STDMETHODCALLTYPE Decommit() { return S_OK; }
+
+        // IUnknown methods
+        virtual HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, LPVOID *ppv) { return E_NOINTERFACE; }
+        virtual ULONG   STDMETHODCALLTYPE AddRef(void) { return ++_refs; }
+        virtual ULONG   STDMETHODCALLTYPE Release(void)
+        {
+            int ret = --_refs;
+            if (!ret)
+                delete this;
+            return ret;
+        }
+
+private:
+        std::atomic<int>  _refs;
+};
+
+extern "C" {
+static void decklink_object_free(void *opaque, uint8_t *data)
+{
+    IUnknown *obj = (class IUnknown *)opaque;
+    obj->Release();
+}
+}
+
 static int get_vanc_line_idx(BMDDisplayMode mode)
 {
     unsigned int i;
@@ -147,6 +198,17 @@
     }
 }
 
+static void unpack_v210(uint16_t *dst, const uint8_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width * 2 / 3; i++) {
+        *dst++ =  src[0]       + ((src[1] & 3)  << 8);
+        *dst++ = (src[1] >> 2) + ((src[2] & 15) << 6);
+        *dst++ = (src[2] >> 4) + ((src[3] & 63) << 4);
+        src += 4;
+    }
+}
+
 static uint8_t calc_parity_and_line_offset(int line)
 {
     uint8_t ret = (line < 313) << 5;
@@ -262,8 +324,8 @@
     return tgt;
 }
 
-uint8_t *vanc_to_cc(AVFormatContext *avctx, uint16_t *buf, size_t words,
-    unsigned &cc_count)
+static uint8_t *vanc_to_cc(AVFormatContext *avctx, uint16_t *buf, size_t words,
+                           unsigned &cc_count)
 {
     size_t i, len = (buf[5] & 0xff) + 6 + 1;
     uint8_t cdp_sum, rate;
@@ -352,8 +414,8 @@
     return cc;
 }
 
-uint8_t *get_metadata(AVFormatContext *avctx, uint16_t *buf, size_t width,
-                      uint8_t *tgt, size_t tgt_size, AVPacket *pkt)
+static uint8_t *get_metadata(AVFormatContext *avctx, uint16_t *buf, size_t width,
+                             uint8_t *tgt, size_t tgt_size, AVPacket *pkt)
 {
     decklink_cctx *cctx = (struct decklink_cctx *) avctx->priv_data;
     uint16_t *max_buf = buf + width;
@@ -451,24 +513,25 @@
 static int avpacket_queue_put(AVPacketQueue *q, AVPacket *pkt)
 {
     AVPacketList *pkt1;
-    int ret;
 
     // Drop Packet if queue size is > maximum queue size
     if (avpacket_queue_size(q) > (uint64_t)q->max_q_size) {
+        av_packet_unref(pkt);
         av_log(q->avctx, AV_LOG_WARNING,  "Decklink input buffer overrun!\n");
         return -1;
     }
+    /* ensure the packet is reference counted */
+    if (av_packet_make_refcounted(pkt) < 0) {
+        av_packet_unref(pkt);
+        return -1;
+    }
 
-    pkt1 = (AVPacketList *)av_mallocz(sizeof(AVPacketList));
+    pkt1 = (AVPacketList *)av_malloc(sizeof(AVPacketList));
     if (!pkt1) {
+        av_packet_unref(pkt);
         return -1;
     }
-    ret = av_packet_ref(&pkt1->pkt, pkt);
-    av_packet_unref(pkt);
-    if (ret < 0) {
-        av_free(pkt1);
-        return -1;
-    }
+    av_packet_move_ref(&pkt1->pkt, pkt);
     pkt1->next = NULL;
 
     pthread_mutex_lock(&q->mutex);
@@ -533,8 +596,7 @@
         virtual HRESULT STDMETHODCALLTYPE VideoInputFrameArrived(IDeckLinkVideoInputFrame*, IDeckLinkAudioInputPacket*);
 
 private:
-        ULONG           m_refCount;
-        pthread_mutex_t m_mutex;
+        std::atomic<int>  _refs;
         AVFormatContext *avctx;
         decklink_ctx    *ctx;
         int no_video;
@@ -542,49 +604,39 @@
         int64_t initial_audio_pts;
 };
 
-decklink_input_callback::decklink_input_callback(AVFormatContext *_avctx) : m_refCount(0)
+decklink_input_callback::decklink_input_callback(AVFormatContext *_avctx) : _refs(1)
 {
     avctx = _avctx;
     decklink_cctx       *cctx = (struct decklink_cctx *)avctx->priv_data;
     ctx = (struct decklink_ctx *)cctx->ctx;
     no_video = 0;
     initial_audio_pts = initial_video_pts = AV_NOPTS_VALUE;
-    pthread_mutex_init(&m_mutex, NULL);
 }
 
 decklink_input_callback::~decklink_input_callback()
 {
-    pthread_mutex_destroy(&m_mutex);
 }
 
 ULONG decklink_input_callback::AddRef(void)
 {
-    pthread_mutex_lock(&m_mutex);
-    m_refCount++;
-    pthread_mutex_unlock(&m_mutex);
-
-    return (ULONG)m_refCount;
+    return ++_refs;
 }
 
 ULONG decklink_input_callback::Release(void)
 {
-    pthread_mutex_lock(&m_mutex);
-    m_refCount--;
-    pthread_mutex_unlock(&m_mutex);
-
-    if (m_refCount == 0) {
+    int ret = --_refs;
+    if (!ret)
         delete this;
-        return 0;
-    }
-
-    return (ULONG)m_refCount;
+    return ret;
 }
 
 static int64_t get_pkt_pts(IDeckLinkVideoInputFrame *videoFrame,
                            IDeckLinkAudioInputPacket *audioFrame,
                            int64_t wallclock,
+                           int64_t abs_wallclock,
                            DecklinkPtsSource pts_src,
-                           AVRational time_base, int64_t *initial_pts)
+                           AVRational time_base, int64_t *initial_pts,
+                           int copyts)
 {
     int64_t pts = AV_NOPTS_VALUE;
     BMDTimeValue bmd_pts;
@@ -604,23 +656,30 @@
                 res = videoFrame->GetHardwareReferenceTimestamp(time_base.den, &bmd_pts, &bmd_duration);
             break;
         case PTS_SRC_WALLCLOCK:
+            /* fall through */
+        case PTS_SRC_ABS_WALLCLOCK:
         {
             /* MSVC does not support compound literals like AV_TIME_BASE_Q
              * in C++ code (compiler error C4576) */
             AVRational timebase;
             timebase.num = 1;
             timebase.den = AV_TIME_BASE;
-            pts = av_rescale_q(wallclock, timebase, time_base);
+            if (pts_src == PTS_SRC_WALLCLOCK)
+                pts = av_rescale_q(wallclock, timebase, time_base);
+            else
+                pts = av_rescale_q(abs_wallclock, timebase, time_base);
             break;
         }
     }
     if (res == S_OK)
         pts = bmd_pts / time_base.num;
 
-    if (pts != AV_NOPTS_VALUE && *initial_pts == AV_NOPTS_VALUE)
-        *initial_pts = pts;
-    if (*initial_pts != AV_NOPTS_VALUE)
-        pts -= *initial_pts;
+    if (!copyts) {
+        if (pts != AV_NOPTS_VALUE && *initial_pts == AV_NOPTS_VALUE)
+            *initial_pts = pts;
+        if (*initial_pts != AV_NOPTS_VALUE)
+            pts -= *initial_pts;
+    }
 
     return pts;
 }
@@ -632,11 +691,33 @@
     void *audioFrameBytes;
     BMDTimeValue frameTime;
     BMDTimeValue frameDuration;
-    int64_t wallclock = 0;
+    int64_t wallclock = 0, abs_wallclock = 0;
+    struct decklink_cctx *cctx = (struct decklink_cctx *) avctx->priv_data;
+
+    if (ctx->autodetect) {
+        if (videoFrame && !(videoFrame->GetFlags() & bmdFrameHasNoInputSource) &&
+            ctx->bmd_mode == bmdModeUnknown)
+        {
+            ctx->bmd_mode = AUTODETECT_DEFAULT_MODE;
+        }
+        return S_OK;
+    }
+
+    // Drop the frames till system's timestamp aligns with the configured value.
+    if (0 == ctx->frameCount && cctx->timestamp_align) {
+        AVRational remainder = av_make_q(av_gettime() % cctx->timestamp_align, 1000000);
+        AVRational frame_duration = av_inv_q(ctx->video_st->r_frame_rate);
+        if (av_cmp_q(remainder, frame_duration) > 0) {
+            ++ctx->dropped;
+            return S_OK;
+        }
+    }
 
     ctx->frameCount++;
     if (ctx->audio_pts_source == PTS_SRC_WALLCLOCK || ctx->video_pts_source == PTS_SRC_WALLCLOCK)
         wallclock = av_gettime_relative();
+    if (ctx->audio_pts_source == PTS_SRC_ABS_WALLCLOCK || ctx->video_pts_source == PTS_SRC_ABS_WALLCLOCK)
+        abs_wallclock = av_gettime();
 
     // Handle Video Frame
     if (videoFrame) {
@@ -681,9 +762,38 @@
                         "- Frames dropped %u\n", ctx->frameCount, ++ctx->dropped);
             }
             no_video = 0;
+
+            // Handle Timecode (if requested)
+            if (ctx->tc_format) {
+                IDeckLinkTimecode *timecode;
+                if (videoFrame->GetTimecode(ctx->tc_format, &timecode) == S_OK) {
+                    const char *tc = NULL;
+                    DECKLINK_STR decklink_tc;
+                    if (timecode->GetString(&decklink_tc) == S_OK) {
+                        tc = DECKLINK_STRDUP(decklink_tc);
+                        DECKLINK_FREE(decklink_tc);
+                    }
+                    timecode->Release();
+                    if (tc) {
+                        AVDictionary* metadata_dict = NULL;
+                        int metadata_len;
+                        uint8_t* packed_metadata;
+                        if (av_dict_set(&metadata_dict, "timecode", tc, AV_DICT_DONT_STRDUP_VAL) >= 0) {
+                            packed_metadata = av_packet_pack_dictionary(metadata_dict, &metadata_len);
+                            av_dict_free(&metadata_dict);
+                            if (packed_metadata) {
+                                if (av_packet_add_side_data(&pkt, AV_PKT_DATA_STRINGS_METADATA, packed_metadata, metadata_len) < 0)
+                                    av_freep(&packed_metadata);
+                            }
+                        }
+                    }
+                } else {
+                    av_log(avctx, AV_LOG_DEBUG, "Unable to find timecode.\n");
+                }
+            }
         }
 
-        pkt.pts = get_pkt_pts(videoFrame, audioFrame, wallclock, ctx->video_pts_source, ctx->video_st->time_base, &initial_video_pts);
+        pkt.pts = get_pkt_pts(videoFrame, audioFrame, wallclock, abs_wallclock, ctx->video_pts_source, ctx->video_st->time_base, &initial_video_pts, cctx->copyts);
         pkt.dts = pkt.pts;
 
         pkt.duration = frameDuration;
@@ -729,9 +839,15 @@
                     for (i = vanc_line_numbers[idx].vanc_start; i <= vanc_line_numbers[idx].vanc_end; i++) {
                         uint8_t *buf;
                         if (vanc->GetBufferForVerticalBlankingLine(i, (void**)&buf) == S_OK) {
-                            uint16_t luma_vanc[MAX_WIDTH_VANC];
-                            extract_luma_from_v210(luma_vanc, buf, videoFrame->GetWidth());
-                            txt_buf = get_metadata(avctx, luma_vanc, videoFrame->GetWidth(),
+                            uint16_t vanc[MAX_WIDTH_VANC];
+                            size_t vanc_size = videoFrame->GetWidth();
+                            if (ctx->bmd_mode == bmdModeNTSC && videoFrame->GetWidth() * 2 <= MAX_WIDTH_VANC) {
+                                vanc_size = vanc_size * 2;
+                                unpack_v210(vanc, buf, videoFrame->GetWidth());
+                            } else {
+                                extract_luma_from_v210(vanc, buf, videoFrame->GetWidth());
+                            }
+                            txt_buf = get_metadata(avctx, vanc, vanc_size,
                                                    txt_buf, sizeof(txt_buf0) - (txt_buf - txt_buf0), &pkt);
                         }
                         if (i == vanc_line_numbers[idx].field0_vanc_end)
@@ -759,6 +875,10 @@
             }
         }
 
+        pkt.buf = av_buffer_create(pkt.data, pkt.size, decklink_object_free, videoFrame, 0);
+        if (pkt.buf)
+            videoFrame->AddRef();
+
         if (avpacket_queue_put(&ctx->queue, &pkt) < 0) {
             ++ctx->dropped;
         }
@@ -771,10 +891,10 @@
         av_init_packet(&pkt);
 
         //hack among hacks
-        pkt.size = audioFrame->GetSampleFrameCount() * ctx->audio_st->codecpar->channels * (16 / 8);
+        pkt.size = audioFrame->GetSampleFrameCount() * ctx->audio_st->codecpar->channels * (ctx->audio_depth / 8);
         audioFrame->GetBytes(&audioFrameBytes);
         audioFrame->GetPacketTime(&audio_pts, ctx->audio_st->time_base.den);
-        pkt.pts = get_pkt_pts(videoFrame, audioFrame, wallclock, ctx->audio_pts_source, ctx->audio_st->time_base, &initial_audio_pts);
+        pkt.pts = get_pkt_pts(videoFrame, audioFrame, wallclock, abs_wallclock, ctx->audio_pts_source, ctx->audio_st->time_base, &initial_audio_pts, cctx->copyts);
         pkt.dts = pkt.pts;
 
         //fprintf(stderr,"Audio Frame size %d ts %d\n", pkt.size, pkt.pts);
@@ -794,17 +914,56 @@
     BMDVideoInputFormatChangedEvents events, IDeckLinkDisplayMode *mode,
     BMDDetectedVideoInputFormatFlags)
 {
+    ctx->bmd_mode = mode->GetDisplayMode();
     return S_OK;
 }
 
-static HRESULT decklink_start_input(AVFormatContext *avctx)
-{
-    struct decklink_cctx *cctx = (struct decklink_cctx *)avctx->priv_data;
+static int decklink_autodetect(struct decklink_cctx *cctx) {
     struct decklink_ctx *ctx = (struct decklink_ctx *)cctx->ctx;
+    DECKLINK_BOOL autodetect_supported = false;
+    int i;
 
-    ctx->input_callback = new decklink_input_callback(avctx);
-    ctx->dli->SetCallback(ctx->input_callback);
-    return ctx->dli->StartStreams();
+    if (ctx->attr->GetFlag(BMDDeckLinkSupportsInputFormatDetection, &autodetect_supported) != S_OK)
+        return -1;
+    if (autodetect_supported == false)
+        return -1;
+
+    ctx->autodetect = 1;
+    ctx->bmd_mode  = bmdModeUnknown;
+    if (ctx->dli->EnableVideoInput(AUTODETECT_DEFAULT_MODE,
+                                   bmdFormat8BitYUV,
+                                   bmdVideoInputEnableFormatDetection) != S_OK) {
+        return -1;
+    }
+
+    if (ctx->dli->StartStreams() != S_OK) {
+        return -1;
+    }
+
+    // 1 second timeout
+    for (i = 0; i < 10; i++) {
+        av_usleep(100000);
+        /* Sometimes VideoInputFrameArrived is called without the
+         * bmdFrameHasNoInputSource flag before VideoInputFormatChanged.
+         * So don't break for bmd_mode == AUTODETECT_DEFAULT_MODE. */
+        if (ctx->bmd_mode != bmdModeUnknown &&
+            ctx->bmd_mode != AUTODETECT_DEFAULT_MODE)
+            break;
+    }
+
+    ctx->dli->PauseStreams();
+    ctx->dli->FlushStreams();
+    ctx->autodetect = 0;
+    if (ctx->bmd_mode != bmdModeUnknown) {
+        cctx->format_code = (char *)av_mallocz(5);
+        if (!cctx->format_code)
+            return -1;
+        AV_WB32(cctx->format_code, ctx->bmd_mode);
+        return 0;
+    } else {
+        return -1;
+    }
+
 }
 
 extern "C" {
@@ -832,6 +991,8 @@
 {
     struct decklink_cctx *cctx = (struct decklink_cctx *)avctx->priv_data;
     struct decklink_ctx *ctx;
+    class decklink_allocator *allocator;
+    class decklink_input_callback *input_callback;
     AVStream *st;
     HRESULT result;
     char fname[1024];
@@ -847,6 +1008,8 @@
     ctx->teletext_lines = cctx->teletext_lines;
     ctx->preroll      = cctx->preroll;
     ctx->duplex_mode  = cctx->duplex_mode;
+    if (cctx->tc_format > 0 && (unsigned int)cctx->tc_format < FF_ARRAY_ELEMS(decklink_timecode_format_map))
+        ctx->tc_format = decklink_timecode_format_map[cctx->tc_format];
     if (cctx->video_input > 0 && (unsigned int)cctx->video_input < FF_ARRAY_ELEMS(decklink_video_connection_map))
         ctx->video_input = decklink_video_connection_map[cctx->video_input];
     if (cctx->audio_input > 0 && (unsigned int)cctx->audio_input < FF_ARRAY_ELEMS(decklink_audio_connection_map))
@@ -854,6 +1017,7 @@
     ctx->audio_pts_source = cctx->audio_pts_source;
     ctx->video_pts_source = cctx->video_pts_source;
     ctx->draw_bars = cctx->draw_bars;
+    ctx->audio_depth = cctx->audio_depth;
     cctx->ctx = ctx;
 
     /* Check audio channel option for valid values: 2, 8 or 16 */
@@ -867,6 +1031,16 @@
             return AVERROR(EINVAL);
     }
 
+    /* Check audio bit depth option for valid values: 16 or 32 */
+    switch (cctx->audio_depth) {
+        case 16:
+        case 32:
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Value for audio bit depth option must be either 16 or 32\n");
+            return AVERROR(EINVAL);
+    }
+
     /* List available devices. */
     if (ctx->list_devices) {
         ff_decklink_list_devices_legacy(avctx, 1, 0);
@@ -878,7 +1052,7 @@
         cctx->raw_format = MKBETAG('v','2','1','0');
     }
 
-    strcpy (fname, avctx->filename);
+    av_strlcpy(fname, avctx->url, sizeof(fname));
     tmp=strchr (fname, '@');
     if (tmp != NULL) {
         av_log(avctx, AV_LOG_WARNING, "The @mode syntax is deprecated and will be removed. Please use the -format_code option.\n");
@@ -893,7 +1067,7 @@
     /* Get input device. */
     if (ctx->dl->QueryInterface(IID_IDeckLinkInput, (void **) &ctx->dli) != S_OK) {
         av_log(avctx, AV_LOG_ERROR, "Could not open input device from '%s'\n",
-               avctx->filename);
+               avctx->url);
         ret = AVERROR(EIO);
         goto error;
     }
@@ -905,13 +1079,41 @@
         goto error;
     }
 
-    if (mode_num > 0 || cctx->format_code) {
-        if (ff_decklink_set_format(avctx, DIRECTION_IN, mode_num) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "Could not set mode number %d or format code %s for %s\n",
-                mode_num, (cctx->format_code) ? cctx->format_code : "(unset)", fname);
+    if (ff_decklink_set_configs(avctx, DIRECTION_IN) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Could not set input configuration\n");
+        ret = AVERROR(EIO);
+        goto error;
+    }
+
+    input_callback = new decklink_input_callback(avctx);
+    ret = (ctx->dli->SetCallback(input_callback) == S_OK ? 0 : AVERROR_EXTERNAL);
+    input_callback->Release();
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Cannot set input callback\n");
+        goto error;
+    }
+
+    allocator = new decklink_allocator();
+    ret = (ctx->dli->SetVideoInputFrameMemoryAllocator(allocator) == S_OK ? 0 : AVERROR_EXTERNAL);
+    allocator->Release();
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Cannot set custom memory allocator\n");
+        goto error;
+    }
+
+    if (mode_num == 0 && !cctx->format_code) {
+        if (decklink_autodetect(cctx) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Cannot Autodetect input stream or No signal\n");
             ret = AVERROR(EIO);
             goto error;
         }
+        av_log(avctx, AV_LOG_INFO, "Autodetected the input mode\n");
+    }
+    if (ff_decklink_set_format(avctx, DIRECTION_IN, mode_num) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Could not set mode number %d or format code %s for %s\n",
+            mode_num, (cctx->format_code) ? cctx->format_code : "(unset)", fname);
+        ret = AVERROR(EIO);
+        goto error;
     }
 
 #if !CONFIG_LIBZVBI
@@ -930,7 +1132,7 @@
         goto error;
     }
     st->codecpar->codec_type  = AVMEDIA_TYPE_AUDIO;
-    st->codecpar->codec_id    = AV_CODEC_ID_PCM_S16LE;
+    st->codecpar->codec_id    = cctx->audio_depth == 32 ? AV_CODEC_ID_PCM_S32LE : AV_CODEC_ID_PCM_S16LE;
     st->codecpar->sample_rate = bmdAudioSampleRate48kHz;
     st->codecpar->channels    = cctx->audio_channels;
     avpriv_set_pts_info(st, 64, 1, 1000000);  /* 64 bits pts in us */
@@ -948,7 +1150,7 @@
 
     st->time_base.den      = ctx->bmd_tb_den;
     st->time_base.num      = ctx->bmd_tb_num;
-    av_stream_set_r_frame_rate(st, av_make_q(st->time_base.den, st->time_base.num));
+    st->r_frame_rate       = av_make_q(st->time_base.den, st->time_base.num);
 
     switch((BMDPixelFormat)cctx->raw_format) {
     case bmdFormat8BitYUV:
@@ -965,7 +1167,7 @@
         break;
     case bmdFormat8BitARGB:
         st->codecpar->codec_id    = AV_CODEC_ID_RAWVIDEO;
-        st->codecpar->codec_tag   = avcodec_pix_fmt_to_codec_tag((enum AVPixelFormat)st->codecpar->format);;
+        st->codecpar->codec_tag   = avcodec_pix_fmt_to_codec_tag((enum AVPixelFormat)st->codecpar->format);
         st->codecpar->format      = AV_PIX_FMT_0RGB;
         st->codecpar->bit_rate    = av_rescale(ctx->bmd_width * ctx->bmd_height * 32, st->time_base.den, st->time_base.num);
         break;
@@ -1021,7 +1223,7 @@
     }
 
     av_log(avctx, AV_LOG_VERBOSE, "Using %d input audio channels\n", ctx->audio_st->codecpar->channels);
-    result = ctx->dli->EnableAudioInput(bmdAudioSampleRate48kHz, bmdAudioSampleType16bitInteger, ctx->audio_st->codecpar->channels);
+    result = ctx->dli->EnableAudioInput(bmdAudioSampleRate48kHz, cctx->audio_depth == 32 ? bmdAudioSampleType32bitInteger : bmdAudioSampleType16bitInteger, ctx->audio_st->codecpar->channels);
 
     if (result != S_OK) {
         av_log(avctx, AV_LOG_ERROR, "Cannot enable audio input\n");
@@ -1041,7 +1243,7 @@
 
     avpacket_queue_init (avctx, &ctx->queue);
 
-    if (decklink_start_input (avctx) != S_OK) {
+    if (ctx->dli->StartStreams() != S_OK) {
         av_log(avctx, AV_LOG_ERROR, "Cannot start input stream\n");
         ret = AVERROR(EIO);
         goto error;
@@ -1061,6 +1263,15 @@
 
     avpacket_queue_get(&ctx->queue, pkt, 1);
 
+    if (ctx->tc_format && !(av_dict_get(ctx->video_st->metadata, "timecode", NULL, 0))) {
+        int size;
+        const uint8_t *side_metadata = av_packet_get_side_data(pkt, AV_PKT_DATA_STRINGS_METADATA, &size);
+        if (side_metadata) {
+           if (av_packet_unpack_dictionary(side_metadata, size, &ctx->video_st->metadata) < 0)
+               av_log(avctx, AV_LOG_ERROR, "Unable to set timecode\n");
+        }
+    }
+
     return 0;
 }
 

diff --git a/libavdevice/decklink_dec_c.c b/libavdevice/decklink_dec_c.c
index 1127d23..91d2839 100644
--- a/libavdevice/decklink_dec_c.c
+++ b/libavdevice/decklink_dec_c.c

@@ -48,6 +48,15 @@
     { "unset",         NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 0}, 0, 0,    DEC, "duplex_mode"},
     { "half",          NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 1}, 0, 0,    DEC, "duplex_mode"},
     { "full",          NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 2}, 0, 0,    DEC, "duplex_mode"},
+    { "timecode_format", "timecode format",           OFFSET(tc_format),  AV_OPT_TYPE_INT,   { .i64 = 0}, 0, 7,    DEC, "tc_format"},
+    { "none",          NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 0}, 0, 0,    DEC, "tc_format"},
+    { "rp188vitc",     NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 1}, 0, 0,    DEC, "tc_format"},
+    { "rp188vitc2",    NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 2}, 0, 0,    DEC, "tc_format"},
+    { "rp188ltc",      NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 3}, 0, 0,    DEC, "tc_format"},
+    { "rp188any",      NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 4}, 0, 0,    DEC, "tc_format"},
+    { "vitc",          NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 5}, 0, 0,    DEC, "tc_format"},
+    { "vitc2",         NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 6}, 0, 0,    DEC, "tc_format"},
+    { "serial",        NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 7}, 0, 0,    DEC, "tc_format"},
     { "video_input",  "video input",              OFFSET(video_input),    AV_OPT_TYPE_INT,   { .i64 = 0}, 0, 6,    DEC, "video_input"},
     { "unset",         NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 0}, 0, 0,    DEC, "video_input"},
     { "sdi",           NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 1}, 0, 0,    DEC, "video_input"},
@@ -64,14 +73,18 @@
     { "analog_xlr",    NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 4}, 0, 0,    DEC, "audio_input"},
     { "analog_rca",    NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 5}, 0, 0,    DEC, "audio_input"},
     { "microphone",    NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = 6}, 0, 0,    DEC, "audio_input"},
-    { "audio_pts",     "audio pts source",   OFFSET(audio_pts_source),    AV_OPT_TYPE_INT,   { .i64 = PTS_SRC_AUDIO    }, 1, 4, DEC, "pts_source"},
-    { "video_pts",     "video pts source",   OFFSET(video_pts_source),    AV_OPT_TYPE_INT,   { .i64 = PTS_SRC_VIDEO    }, 1, 4, DEC, "pts_source"},
+    { "audio_pts",     "audio pts source",   OFFSET(audio_pts_source),    AV_OPT_TYPE_INT,   { .i64 = PTS_SRC_AUDIO    }, 1, PTS_SRC_NB-1, DEC, "pts_source"},
+    { "video_pts",     "video pts source",   OFFSET(video_pts_source),    AV_OPT_TYPE_INT,   { .i64 = PTS_SRC_VIDEO    }, 1, PTS_SRC_NB-1, DEC, "pts_source"},
     { "audio",         NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = PTS_SRC_AUDIO    }, 0, 0, DEC, "pts_source"},
     { "video",         NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = PTS_SRC_VIDEO    }, 0, 0, DEC, "pts_source"},
     { "reference",     NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = PTS_SRC_REFERENCE}, 0, 0, DEC, "pts_source"},
     { "wallclock",     NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = PTS_SRC_WALLCLOCK}, 0, 0, DEC, "pts_source"},
+    { "abs_wallclock", NULL,                                          0,  AV_OPT_TYPE_CONST, { .i64 = PTS_SRC_ABS_WALLCLOCK}, 0, 0, DEC, "pts_source"},
     { "draw_bars",     "draw bars on signal loss" , OFFSET(draw_bars),    AV_OPT_TYPE_BOOL,  { .i64 = 1}, 0, 1, DEC },
     { "queue_size",    "input queue buffer size",   OFFSET(queue_size),   AV_OPT_TYPE_INT64, { .i64 = (1024 * 1024 * 1024)}, 0, INT64_MAX, DEC },
+    { "audio_depth",   "audio bitdepth (16 or 32)", OFFSET(audio_depth),  AV_OPT_TYPE_INT,   { .i64 = 16}, 16, 32, DEC },
+    { "decklink_copyts", "copy timestamps, do not remove the initial offset", OFFSET(copyts), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, DEC },
+    { "timestamp_align", "capture start time alignment (in seconds)", OFFSET(timestamp_align), AV_OPT_TYPE_DURATION, { .i64 = 0 }, 0, INT_MAX, DEC },
     { NULL },
 };
 

diff --git a/libavdevice/decklink_enc.cpp b/libavdevice/decklink_enc.cpp
index 81df563..8b621d0 100644
--- a/libavdevice/decklink_enc.cpp
+++ b/libavdevice/decklink_enc.cpp

@@ -38,17 +38,20 @@
 
 #include "decklink_common.h"
 #include "decklink_enc.h"
-
+#if CONFIG_LIBKLVANC
+#include "libklvanc/vanc.h"
+#include "libklvanc/vanc-lines.h"
+#include "libklvanc/pixels.h"
+#endif
 
 /* DeckLink callback class declaration */
 class decklink_frame : public IDeckLinkVideoFrame
 {
 public:
     decklink_frame(struct decklink_ctx *ctx, AVFrame *avframe, AVCodecID codec_id, int height, int width) :
-        _ctx(ctx), _avframe(avframe), _avpacket(NULL), _codec_id(codec_id), _height(height), _width(width),  _refs(1) { }
+        _ctx(ctx), _avframe(avframe), _avpacket(NULL), _codec_id(codec_id), _ancillary(NULL), _height(height), _width(width),  _refs(1) { }
     decklink_frame(struct decklink_ctx *ctx, AVPacket *avpacket, AVCodecID codec_id, int height, int width) :
-        _ctx(ctx), _avframe(NULL), _avpacket(avpacket), _codec_id(codec_id), _height(height), _width(width), _refs(1) { }
-
+        _ctx(ctx), _avframe(NULL), _avpacket(avpacket), _codec_id(codec_id), _ancillary(NULL), _height(height), _width(width), _refs(1) { }
     virtual long           STDMETHODCALLTYPE GetWidth      (void)          { return _width; }
     virtual long           STDMETHODCALLTYPE GetHeight     (void)          { return _height; }
     virtual long           STDMETHODCALLTYPE GetRowBytes   (void)
@@ -87,8 +90,24 @@
     }
 
     virtual HRESULT STDMETHODCALLTYPE GetTimecode     (BMDTimecodeFormat format, IDeckLinkTimecode **timecode) { return S_FALSE; }
-    virtual HRESULT STDMETHODCALLTYPE GetAncillaryData(IDeckLinkVideoFrameAncillary **ancillary)               { return S_FALSE; }
-
+    virtual HRESULT STDMETHODCALLTYPE GetAncillaryData(IDeckLinkVideoFrameAncillary **ancillary)
+    {
+        *ancillary = _ancillary;
+        if (_ancillary) {
+            _ancillary->AddRef();
+            return S_OK;
+        } else {
+            return S_FALSE;
+        }
+    }
+    virtual HRESULT STDMETHODCALLTYPE SetAncillaryData(IDeckLinkVideoFrameAncillary *ancillary)
+    {
+        if (_ancillary)
+            _ancillary->Release();
+        _ancillary = ancillary;
+        _ancillary->AddRef();
+        return S_OK;
+    }
     virtual HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, LPVOID *ppv) { return E_NOINTERFACE; }
     virtual ULONG   STDMETHODCALLTYPE AddRef(void)                            { return ++_refs; }
     virtual ULONG   STDMETHODCALLTYPE Release(void)
@@ -97,6 +116,8 @@
         if (!ret) {
             av_frame_free(&_avframe);
             av_packet_free(&_avpacket);
+            if (_ancillary)
+                _ancillary->Release();
             delete this;
         }
         return ret;
@@ -106,6 +127,7 @@
     AVFrame *_avframe;
     AVPacket *_avpacket;
     AVCodecID _codec_id;
+    IDeckLinkVideoFrameAncillary *_ancillary;
     int _height;
     int _width;
 
@@ -156,12 +178,19 @@
                    " Only AV_PIX_FMT_UYVY422 is supported.\n");
             return -1;
         }
+        ctx->raw_format = bmdFormat8BitYUV;
     } else if (c->codec_id != AV_CODEC_ID_V210) {
         av_log(avctx, AV_LOG_ERROR, "Unsupported codec type!"
                " Only V210 and wrapped frame with AV_PIX_FMT_UYVY422 are supported.\n");
         return -1;
+    } else {
+        ctx->raw_format = bmdFormat10BitYUV;
     }
 
+    if (ff_decklink_set_configs(avctx, DIRECTION_OUT) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Could not set output configuration\n");
+        return -1;
+    }
     if (ff_decklink_set_format(avctx, c->width, c->height,
                             st->time_base.num, st->time_base.den, c->field_order)) {
         av_log(avctx, AV_LOG_ERROR, "Unsupported video size, framerate or field order!"
@@ -169,7 +198,7 @@
         return -1;
     }
     if (ctx->dlo->EnableVideoOutput(ctx->bmd_mode,
-                                    bmdVideoOutputFlagDefault) != S_OK) {
+                                    ctx->supports_vanc ? bmdVideoOutputVANC : bmdVideoOutputFlagDefault) != S_OK) {
         av_log(avctx, AV_LOG_ERROR, "Could not enable video output!\n");
         return -1;
     }
@@ -189,6 +218,9 @@
     pthread_cond_init(&ctx->cond, NULL);
     ctx->frames_buffer_available_spots = ctx->frames_buffer;
 
+    av_log(avctx, AV_LOG_DEBUG, "output: %s, preroll: %d, frames buffer size: %d\n",
+           avctx->url, ctx->frames_preroll, ctx->frames_buffer);
+
     /* The device expects the framerate to be fixed. */
     avpriv_set_pts_info(st, 64, st->time_base.num, st->time_base.den);
 
@@ -260,11 +292,139 @@
     pthread_mutex_destroy(&ctx->mutex);
     pthread_cond_destroy(&ctx->cond);
 
+#if CONFIG_LIBKLVANC
+    klvanc_context_destroy(ctx->vanc_ctx);
+#endif
+
     av_freep(&cctx->ctx);
 
     return 0;
 }
 
+#if CONFIG_LIBKLVANC
+static void construct_cc(AVFormatContext *avctx, struct decklink_ctx *ctx,
+                         AVPacket *pkt, struct klvanc_line_set_s *vanc_lines)
+{
+    struct klvanc_packet_eia_708b_s *cdp;
+    uint16_t *cdp_words;
+    uint16_t len;
+    uint8_t cc_count;
+    int size, ret, i;
+
+    const uint8_t *data = av_packet_get_side_data(pkt, AV_PKT_DATA_A53_CC, &size);
+    if (!data)
+        return;
+
+    cc_count = size / 3;
+
+    ret = klvanc_create_eia708_cdp(&cdp);
+    if (ret)
+        return;
+
+    ret = klvanc_set_framerate_EIA_708B(cdp, ctx->bmd_tb_num, ctx->bmd_tb_den);
+    if (ret) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid framerate specified: %lld/%lld\n",
+               ctx->bmd_tb_num, ctx->bmd_tb_den);
+        klvanc_destroy_eia708_cdp(cdp);
+        return;
+    }
+
+    if (cc_count > KLVANC_MAX_CC_COUNT) {
+        av_log(avctx, AV_LOG_ERROR, "Illegal cc_count received: %d\n", cc_count);
+        cc_count = KLVANC_MAX_CC_COUNT;
+    }
+
+    /* CC data */
+    cdp->header.ccdata_present = 1;
+    cdp->header.caption_service_active = 1;
+    cdp->ccdata.cc_count = cc_count;
+    for (i = 0; i < cc_count; i++) {
+        if (data [3*i] & 0x04)
+            cdp->ccdata.cc[i].cc_valid = 1;
+        cdp->ccdata.cc[i].cc_type = data[3*i] & 0x03;
+        cdp->ccdata.cc[i].cc_data[0] = data[3*i+1];
+        cdp->ccdata.cc[i].cc_data[1] = data[3*i+2];
+    }
+
+    klvanc_finalize_EIA_708B(cdp, ctx->cdp_sequence_num++);
+    ret = klvanc_convert_EIA_708B_to_words(cdp, &cdp_words, &len);
+    klvanc_destroy_eia708_cdp(cdp);
+    if (ret != 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed converting 708 packet to words\n");
+        return;
+    }
+
+    ret = klvanc_line_insert(ctx->vanc_ctx, vanc_lines, cdp_words, len, 11, 0);
+    free(cdp_words);
+    if (ret != 0) {
+        av_log(avctx, AV_LOG_ERROR, "VANC line insertion failed\n");
+        return;
+    }
+}
+
+static int decklink_construct_vanc(AVFormatContext *avctx, struct decklink_ctx *ctx,
+                                   AVPacket *pkt, decklink_frame *frame)
+{
+    struct klvanc_line_set_s vanc_lines = { 0 };
+    int ret = 0, i;
+
+    if (!ctx->supports_vanc)
+        return 0;
+
+    construct_cc(avctx, ctx, pkt, &vanc_lines);
+
+    IDeckLinkVideoFrameAncillary *vanc;
+    int result = ctx->dlo->CreateAncillaryData(bmdFormat10BitYUV, &vanc);
+    if (result != S_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create vanc\n");
+        ret = AVERROR(EIO);
+        goto done;
+    }
+
+    /* Now that we've got all the VANC lines in a nice orderly manner, generate the
+       final VANC sections for the Decklink output */
+    for (i = 0; i < vanc_lines.num_lines; i++) {
+        struct klvanc_line_s *line = vanc_lines.lines[i];
+        int real_line;
+        void *buf;
+
+        if (!line)
+            break;
+
+        /* FIXME: include hack for certain Decklink cards which mis-represent
+           line numbers for pSF frames */
+        real_line = line->line_number;
+
+        result = vanc->GetBufferForVerticalBlankingLine(real_line, &buf);
+        if (result != S_OK) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to get VANC line %d: %d", real_line, result);
+            continue;
+        }
+
+        /* Generate the full line taking into account all VANC packets on that line */
+        result = klvanc_generate_vanc_line_v210(ctx->vanc_ctx, line, (uint8_t *) buf,
+                                                ctx->bmd_width);
+        if (result) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to generate VANC line\n");
+            continue;
+        }
+    }
+
+    result = frame->SetAncillaryData(vanc);
+    vanc->Release();
+    if (result != S_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to set vanc: %d", result);
+        ret = AVERROR(EIO);
+    }
+
+done:
+    for (i = 0; i < vanc_lines.num_lines; i++)
+        klvanc_line_free(vanc_lines.lines[i]);
+
+    return ret;
+}
+#endif
+
 static int decklink_write_video_packet(AVFormatContext *avctx, AVPacket *pkt)
 {
     struct decklink_cctx *cctx = (struct decklink_cctx *)avctx->priv_data;
@@ -299,6 +459,11 @@
         }
 
         frame = new decklink_frame(ctx, avpacket, st->codecpar->codec_id, ctx->bmd_height, ctx->bmd_width);
+
+#if CONFIG_LIBKLVANC
+        if (decklink_construct_vanc(avctx, ctx, pkt, frame))
+            av_log(avctx, AV_LOG_ERROR, "Failed to construct VANC\n");
+#endif
     }
 
     if (!frame) {
@@ -317,7 +482,7 @@
     pthread_mutex_unlock(&ctx->mutex);
 
     /* Schedule frame for playback. */
-    hr = ctx->dlo->ScheduleVideoFrame((struct IDeckLinkVideoFrame *) frame,
+    hr = ctx->dlo->ScheduleVideoFrame((class IDeckLinkVideoFrame *) frame,
                                       pkt->pts * ctx->bmd_tb_num,
                                       ctx->bmd_tb_num, ctx->bmd_tb_den);
     /* Pass ownership to DeckLink, or release on failure */
@@ -388,7 +553,15 @@
     ctx->list_devices = cctx->list_devices;
     ctx->list_formats = cctx->list_formats;
     ctx->preroll      = cctx->preroll;
+    ctx->duplex_mode  = cctx->duplex_mode;
     cctx->ctx = ctx;
+#if CONFIG_LIBKLVANC
+    if (klvanc_context_create(&ctx->vanc_ctx) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Cannot create VANC library context\n");
+        return AVERROR(ENOMEM);
+    }
+    ctx->supports_vanc = 1;
+#endif
 
     /* List available devices and exit. */
     if (ctx->list_devices) {
@@ -396,14 +569,14 @@
         return AVERROR_EXIT;
     }
 
-    ret = ff_decklink_init_device(avctx, avctx->filename);
+    ret = ff_decklink_init_device(avctx, avctx->url);
     if (ret < 0)
         return ret;
 
     /* Get output device. */
     if (ctx->dl->QueryInterface(IID_IDeckLinkOutput, (void **) &ctx->dlo) != S_OK) {
         av_log(avctx, AV_LOG_ERROR, "Could not open output device from '%s'\n",
-               avctx->filename);
+               avctx->url);
         ret = AVERROR(EIO);
         goto error;
     }

diff --git a/libavdevice/decklink_enc_c.c b/libavdevice/decklink_enc_c.c
index 360535c..6169078 100644
--- a/libavdevice/decklink_enc_c.c
+++ b/libavdevice/decklink_enc_c.c

@@ -31,6 +31,10 @@
     { "list_devices", "list available devices"  , OFFSET(list_devices), AV_OPT_TYPE_INT   , { .i64 = 0   }, 0, 1, ENC },
     { "list_formats", "list supported formats"  , OFFSET(list_formats), AV_OPT_TYPE_INT   , { .i64 = 0   }, 0, 1, ENC },
     { "preroll"     , "video preroll in seconds", OFFSET(preroll     ), AV_OPT_TYPE_DOUBLE, { .dbl = 0.5 }, 0, 5, ENC },
+    { "duplex_mode" , "duplex mode"             , OFFSET(duplex_mode ), AV_OPT_TYPE_INT   , { .i64 = 0   }, 0, 2, ENC, "duplex_mode"},
+    { "unset"       ,  NULL                     , 0                   , AV_OPT_TYPE_CONST , { .i64 = 0   }, 0, 0, ENC, "duplex_mode"},
+    { "half"        ,  NULL                     , 0                   , AV_OPT_TYPE_CONST , { .i64 = 1   }, 0, 0, ENC, "duplex_mode"},
+    { "full"        ,  NULL                     , 0                   , AV_OPT_TYPE_CONST , { .i64 = 2   }, 0, 0, ENC, "duplex_mode"},
     { NULL },
 };
 

diff --git a/libavdevice/dshow.c b/libavdevice/dshow.c
index f2453e6..2548158 100644
--- a/libavdevice/dshow.c
+++ b/libavdevice/dshow.c

@@ -996,10 +996,14 @@
             par->codec_id = AV_CODEC_ID_RAWVIDEO;
             if (bih->biCompression == BI_RGB || bih->biCompression == BI_BITFIELDS) {
                 par->bits_per_coded_sample = bih->biBitCount;
-                par->extradata = av_malloc(9 + AV_INPUT_BUFFER_PADDING_SIZE);
-                if (par->extradata) {
-                    par->extradata_size = 9;
-                    memcpy(par->extradata, "BottomUp", 9);
+                if (par->height < 0) {
+                    par->height *= -1;
+                } else {
+                    par->extradata = av_malloc(9 + AV_INPUT_BUFFER_PADDING_SIZE);
+                    if (par->extradata) {
+                        par->extradata_size = 9;
+                        memcpy(par->extradata, "BottomUp", 9);
+                    }
                 }
             }
         }
@@ -1033,7 +1037,7 @@
 {
     struct dshow_ctx *ctx = avctx->priv_data;
     char **device_name = ctx->device_name;
-    char *name = av_strdup(avctx->filename);
+    char *name = av_strdup(avctx->url);
     char *tmp = name;
     int ret = 1;
     char *type;

diff --git a/libavdevice/fbdev_dec.c b/libavdevice/fbdev_dec.c
index d9c75df..6a51816 100644
--- a/libavdevice/fbdev_dec.c
+++ b/libavdevice/fbdev_dec.c

@@ -78,8 +78,8 @@
     if (avctx->flags & AVFMT_FLAG_NONBLOCK)
         flags |= O_NONBLOCK;
 
-    if (avctx->filename[0])
-        device = avctx->filename;
+    if (avctx->url[0])
+        device = avctx->url;
     else
         device = ff_fbdev_default_device();
 

diff --git a/libavdevice/fbdev_enc.c b/libavdevice/fbdev_enc.c
index b4e5f84..4191596 100644
--- a/libavdevice/fbdev_enc.c
+++ b/libavdevice/fbdev_enc.c

@@ -53,8 +53,8 @@
         return AVERROR(EINVAL);
     }
 
-    if (h->filename[0])
-        device = h->filename;
+    if (h->url[0])
+        device = h->url;
     else
         device = ff_fbdev_default_device();
 

diff --git a/libavdevice/gdigrab.c b/libavdevice/gdigrab.c
index 87f5012..ab08c11 100644
--- a/libavdevice/gdigrab.c
+++ b/libavdevice/gdigrab.c

@@ -230,12 +230,14 @@
     HBITMAP hbmp   = NULL;
     void *buffer   = NULL;
 
-    const char *filename = s1->filename;
+    const char *filename = s1->url;
     const char *name     = NULL;
     AVStream   *st       = NULL;
 
     int bpp;
+    int horzres;
     int vertres;
+    int desktophorzres;
     int desktopvertres;
     RECT virtual_rect;
     RECT clip_rect;
@@ -279,11 +281,13 @@
         GetClientRect(hwnd, &virtual_rect);
     } else {
         /* desktop -- get the right height and width for scaling DPI */
+        horzres = GetDeviceCaps(source_hdc, HORZRES);
         vertres = GetDeviceCaps(source_hdc, VERTRES);
+        desktophorzres = GetDeviceCaps(source_hdc, DESKTOPHORZRES);
         desktopvertres = GetDeviceCaps(source_hdc, DESKTOPVERTRES);
         virtual_rect.left = GetSystemMetrics(SM_XVIRTUALSCREEN);
         virtual_rect.top = GetSystemMetrics(SM_YVIRTUALSCREEN);
-        virtual_rect.right = (virtual_rect.left + GetSystemMetrics(SM_CXVIRTUALSCREEN)) * desktopvertres / vertres;
+        virtual_rect.right = (virtual_rect.left + GetSystemMetrics(SM_CXVIRTUALSCREEN)) * desktophorzres / horzres;
         virtual_rect.bottom = (virtual_rect.top + GetSystemMetrics(SM_CYVIRTUALSCREEN)) * desktopvertres / vertres;
     }
 
@@ -447,7 +451,9 @@
         POINT pos;
         RECT clip_rect = gdigrab->clip_rect;
         HWND hwnd = gdigrab->hwnd;
+        int horzres = GetDeviceCaps(gdigrab->source_hdc, HORZRES);
         int vertres = GetDeviceCaps(gdigrab->source_hdc, VERTRES);
+        int desktophorzres = GetDeviceCaps(gdigrab->source_hdc, DESKTOPHORZRES);
         int desktopvertres = GetDeviceCaps(gdigrab->source_hdc, DESKTOPVERTRES);
         info.hbmMask = NULL;
         info.hbmColor = NULL;
@@ -483,7 +489,7 @@
         }
 
         //that would keep the correct location of mouse with hidpi screens
-        pos.x = pos.x * desktopvertres / vertres;
+        pos.x = pos.x * desktophorzres / horzres;
         pos.y = pos.y * desktopvertres / vertres;
 
         av_log(s1, AV_LOG_DEBUG, "Cursor pos (%li,%li) -> (%li,%li)\n",

diff --git a/libavdevice/iec61883.c b/libavdevice/iec61883.c
index 721dca3..dcf7553 100644
--- a/libavdevice/iec61883.c
+++ b/libavdevice/iec61883.c

@@ -118,7 +118,7 @@
         goto exit;
     }
 
-    packet->buf = av_malloc(length);
+    packet->buf = av_malloc(length + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!packet->buf) {
         av_free(packet);
         ret = -1;
@@ -127,6 +127,7 @@
     packet->len = length;
 
     memcpy(packet->buf, data, length);
+    memset(packet->buf + length, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
     if (dv->queue_first) {
         dv->queue_last->next = packet;
@@ -200,13 +201,21 @@
     size = avpriv_dv_produce_packet(dv->dv_demux, pkt,
                                     packet->buf, packet->len, -1);
     dv->queue_first = packet->next;
+    if (size < 0)
+        av_free(packet->buf);
     av_free(packet);
     dv->packets--;
 
-    if (size > 0)
-        return size;
+    if (size < 0)
+        return -1;
 
-    return -1;
+    if (av_packet_from_data(pkt, pkt->data, pkt->size) < 0) {
+        av_freep(&pkt->data);
+        av_packet_unref(pkt);
+        return -1;
+    }
+
+    return size;
 }
 
 static int iec61883_parse_queue_hdv(struct iec61883_data *dv, AVPacket *pkt)
@@ -259,14 +268,14 @@
         goto fail;
     }
 
-    inport = strtol(context->filename, &endptr, 10);
-    if (endptr != context->filename && *endptr == '\0') {
+    inport = strtol(context->url, &endptr, 10);
+    if (endptr != context->url && *endptr == '\0') {
         av_log(context, AV_LOG_INFO, "Selecting IEEE1394 port: %d\n", inport);
         j = inport;
         nb_ports = inport + 1;
-    } else if (strcmp(context->filename, "auto")) {
+    } else if (strcmp(context->url, "auto")) {
         av_log(context, AV_LOG_ERROR, "Invalid input \"%s\", you should specify "
-               "\"auto\" for auto-detection, or the port number.\n", context->filename);
+               "\"auto\" for auto-detection, or the port number.\n", context->url);
         goto fail;
     }
 
@@ -454,6 +463,7 @@
     } else {
         iec61883_dv_fb_stop(dv->iec61883_dv);
         iec61883_dv_fb_close(dv->iec61883_dv);
+        av_freep(&dv->dv_demux);
     }
     while (dv->queue_first) {
         DVPacket *packet = dv->queue_first;

diff --git a/libavdevice/jack.c b/libavdevice/jack.c
index 076078c..34f1c6d 100644
--- a/libavdevice/jack.c
+++ b/libavdevice/jack.c

@@ -94,13 +94,9 @@
 
     /* Copy and interleave audio data from the JACK buffer into the packet */
     for (i = 0; i < self->nports; i++) {
-    #if HAVE_JACK_PORT_GET_LATENCY_RANGE
         jack_latency_range_t range;
         jack_port_get_latency_range(self->ports[i], JackCaptureLatency, &range);
         latency += range.max;
-    #else
-        latency += jack_port_get_total_latency(self->client, self->ports[i]);
-    #endif
         buffer = jack_port_get_buffer(self->ports[i], self->buffer_size);
         for (j = 0; j < self->buffer_size; j++)
             pkt_data[j * self->nports + i] = buffer[j];
@@ -154,8 +150,8 @@
     jack_status_t status;
     int i, test;
 
-    /* Register as a JACK client, using the context filename as client name. */
-    self->client = jack_client_open(context->filename, JackNullOption, &status);
+    /* Register as a JACK client, using the context url as client name. */
+    self->client = jack_client_open(context->url, JackNullOption, &status);
     if (!self->client) {
         av_log(context, AV_LOG_ERROR, "Unable to register as a JACK client\n");
         return AVERROR(EIO);
@@ -178,7 +174,7 @@
                                             JackPortIsInput, 0);
         if (!self->ports[i]) {
             av_log(context, AV_LOG_ERROR, "Unable to register port %s:%s\n",
-                   context->filename, str);
+                   context->url, str);
             jack_client_close(self->client);
             return AVERROR(EIO);
         }

diff --git a/libavdevice/kmsgrab.c b/libavdevice/kmsgrab.c
index 6a6de09..d0de774 100644
--- a/libavdevice/kmsgrab.c
+++ b/libavdevice/kmsgrab.c

@@ -451,6 +451,7 @@
     .item_name  = av_default_item_name,
     .option     = options,
     .version    = LIBAVUTIL_VERSION_INT,
+    .category   = AV_CLASS_CATEGORY_DEVICE_VIDEO_INPUT,
 };
 
 AVInputFormat ff_kmsgrab_demuxer = {

diff --git a/libavdevice/lavfi.c b/libavdevice/lavfi.c
index ede391f..ca8f05f 100644
--- a/libavdevice/lavfi.c
+++ b/libavdevice/lavfi.c

@@ -38,7 +38,6 @@
 #include "libavutil/parseutils.h"
 #include "libavutil/pixdesc.h"
 #include "libavfilter/avfilter.h"
-#include "libavfilter/avfiltergraph.h"
 #include "libavfilter/buffersink.h"
 #include "libavformat/avio_internal.h"
 #include "libavformat/internal.h"
@@ -121,7 +120,7 @@
 {
     LavfiContext *lavfi = avctx->priv_data;
     AVFilterInOut *input_links = NULL, *output_links = NULL, *inout;
-    AVFilter *buffersink, *abuffersink;
+    const AVFilter *buffersink, *abuffersink;
     int *pix_fmts = create_all_formats(AV_PIX_FMT_NB);
     enum AVMediaType type;
     int ret = 0, i, n;
@@ -131,8 +130,6 @@
     if (!pix_fmts)
         FAIL(AVERROR(ENOMEM));
 
-    avfilter_register_all();
-
     buffersink = avfilter_get_by_name("buffersink");
     abuffersink = avfilter_get_by_name("abuffersink");
 
@@ -167,7 +164,7 @@
     }
 
     if (!lavfi->graph_str)
-        lavfi->graph_str = av_strdup(avctx->filename);
+        lavfi->graph_str = av_strdup(avctx->url);
 
     /* parse the graph, create a stream for each open output */
     if (!(lavfi->graph = avfilter_graph_alloc()))

diff --git a/libavdevice/libcdio.c b/libavdevice/libcdio.c
index f6d4fce..a4c9f52 100644
--- a/libavdevice/libcdio.c
+++ b/libavdevice/libcdio.c

@@ -60,9 +60,9 @@
 
     if (!(st = avformat_new_stream(ctx, NULL)))
         return AVERROR(ENOMEM);
-    s->drive = cdio_cddap_identify(ctx->filename, CDDA_MESSAGE_LOGIT, &err);
+    s->drive = cdio_cddap_identify(ctx->url, CDDA_MESSAGE_LOGIT, &err);
     if (!s->drive) {
-        av_log(ctx, AV_LOG_ERROR, "Could not open drive %s.\n", ctx->filename);
+        av_log(ctx, AV_LOG_ERROR, "Could not open drive %s.\n", ctx->url);
         return AVERROR(EINVAL);
     }
     if (err) {
@@ -70,7 +70,7 @@
         free(err);
     }
     if ((ret = cdio_cddap_open(s->drive)) < 0 || !s->drive->opened) {
-        av_log(ctx, AV_LOG_ERROR, "Could not open disk in drive %s.\n", ctx->filename);
+        av_log(ctx, AV_LOG_ERROR, "Could not open disk in drive %s.\n", ctx->url);
         return AVERROR(EINVAL);
     }
 

diff --git a/libavdevice/libndi_newtek_dec.c b/libavdevice/libndi_newtek_dec.c
index 8cbcd9a..4fb7197 100644
--- a/libavdevice/libndi_newtek_dec.c
+++ b/libavdevice/libndi_newtek_dec.c

@@ -149,7 +149,7 @@
     }
 
     /* Find available sources. */
-    ret = ndi_find_sources(avctx, avctx->filename, &recv_create_desc.source_to_connect_to);
+    ret = ndi_find_sources(avctx, avctx->url, &recv_create_desc.source_to_connect_to);
     if (ctx->find_sources) {
         return AVERROR_EXIT;
     }
@@ -189,7 +189,7 @@
     }
 
     st->time_base                   = NDI_TIME_BASE_Q;
-    av_stream_set_r_frame_rate(st, av_make_q(v->frame_rate_N, v->frame_rate_D));
+    st->r_frame_rate                = av_make_q(v->frame_rate_N, v->frame_rate_D);
 
     tmp = av_mul_q(av_d2q(v->picture_aspect_ratio, INT_MAX), (AVRational){v->yres, v->xres});
     av_reduce(&st->sample_aspect_ratio.num, &st->sample_aspect_ratio.den, tmp.num, tmp.den, 1000);

diff --git a/libavdevice/libndi_newtek_enc.c b/libavdevice/libndi_newtek_enc.c
index 6ca6f41..f3603f5 100644
--- a/libavdevice/libndi_newtek_enc.c
+++ b/libavdevice/libndi_newtek_enc.c

@@ -233,7 +233,7 @@
     int ret = 0;
     unsigned int n;
     struct NDIContext *ctx = avctx->priv_data;
-    const NDIlib_send_create_t ndi_send_desc = { .p_ndi_name = avctx->filename,
+    const NDIlib_send_create_t ndi_send_desc = { .p_ndi_name = avctx->url,
         .p_groups = NULL, .clock_video = ctx->clock_video, .clock_audio = ctx->clock_audio };
 
     if (!NDIlib_initialize()) {
@@ -260,7 +260,7 @@
 
     ctx->ndi_send = NDIlib_send_create(&ndi_send_desc);
     if (!ctx->ndi_send) {
-        av_log(avctx, AV_LOG_ERROR, "Failed to create NDI output %s\n", avctx->filename);
+        av_log(avctx, AV_LOG_ERROR, "Failed to create NDI output %s\n", avctx->url);
         ret = AVERROR_EXTERNAL;
     }
 

diff --git a/libavdevice/openal-dec.c b/libavdevice/openal-dec.c
index 6eb0efe..c19048e 100644
--- a/libavdevice/openal-dec.c
+++ b/libavdevice/openal-dec.c

@@ -139,7 +139,7 @@
 
     /* Open device for capture */
     ad->device =
-        alcCaptureOpenDevice(ctx->filename[0] ? ctx->filename : NULL,
+        alcCaptureOpenDevice(ctx->url[0] ? ctx->url : NULL,
                              ad->sample_rate,
                              ad->sample_format,
                              ad->sample_rate); /* Maximum 1 second of sample data to be read at once */

diff --git a/libavdevice/opengl_enc.c b/libavdevice/opengl_enc.c
index bb6787c..54c7e61 100644
--- a/libavdevice/opengl_enc.c
+++ b/libavdevice/opengl_enc.c

@@ -1070,7 +1070,7 @@
         opengl->window_height = opengl->height;
 
     if (!opengl->window_title && !opengl->no_window)
-        opengl->window_title = av_strdup(h->filename);
+        opengl->window_title = av_strdup(h->url);
 
     if ((ret = opengl_create_window(h)))
         goto fail;

diff --git a/libavdevice/oss.c b/libavdevice/oss.c
index d741128..d92cde3 100644
--- a/libavdevice/oss.c
+++ b/libavdevice/oss.c

@@ -23,17 +23,12 @@
 
 #include <string.h>
 
-#if HAVE_SOUNDCARD_H
-#include <soundcard.h>
-#else
-#include <sys/soundcard.h>
-#endif
-
 #if HAVE_UNISTD_H
 #include <unistd.h>
 #endif
 #include <fcntl.h>
 #include <sys/ioctl.h>
+#include <sys/soundcard.h>
 
 #include "libavutil/log.h"
 

diff --git a/libavdevice/oss_dec.c b/libavdevice/oss_dec.c
index 9f748f2..d0dc327 100644
--- a/libavdevice/oss_dec.c
+++ b/libavdevice/oss_dec.c

@@ -23,17 +23,12 @@
 
 #include <stdint.h>
 
-#if HAVE_SOUNDCARD_H
-#include <soundcard.h>
-#else
-#include <sys/soundcard.h>
-#endif
-
 #if HAVE_UNISTD_H
 #include <unistd.h>
 #endif
 #include <fcntl.h>
 #include <sys/ioctl.h>
+#include <sys/soundcard.h>
 
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
@@ -57,7 +52,7 @@
         return AVERROR(ENOMEM);
     }
 
-    ret = ff_oss_audio_open(s1, 0, s1->filename);
+    ret = ff_oss_audio_open(s1, 0, s1->url);
     if (ret < 0) {
         return AVERROR(EIO);
     }

diff --git a/libavdevice/oss_enc.c b/libavdevice/oss_enc.c
index 2268b4c..e3172af 100644
--- a/libavdevice/oss_enc.c
+++ b/libavdevice/oss_enc.c

@@ -21,17 +21,12 @@
 
 #include "config.h"
 
-#if HAVE_SOUNDCARD_H
-#include <soundcard.h>
-#else
-#include <sys/soundcard.h>
-#endif
-
 #if HAVE_UNISTD_H
 #include <unistd.h>
 #endif
 #include <fcntl.h>
 #include <sys/ioctl.h>
+#include <sys/soundcard.h>
 
 #include "libavutil/internal.h"
 
@@ -51,7 +46,7 @@
     st = s1->streams[0];
     s->sample_rate = st->codecpar->sample_rate;
     s->channels = st->codecpar->channels;
-    ret = ff_oss_audio_open(s1, 1, s1->filename);
+    ret = ff_oss_audio_open(s1, 1, s1->url);
     if (ret < 0) {
         return AVERROR(EIO);
     } else {

diff --git a/libavdevice/pulse_audio_dec.c b/libavdevice/pulse_audio_dec.c
index 95a1d6e..042fe76 100644
--- a/libavdevice/pulse_audio_dec.c
+++ b/libavdevice/pulse_audio_dec.c

@@ -148,6 +148,9 @@
                                 pd->channels };
 
     pa_buffer_attr attr = { -1 };
+    pa_channel_map cmap;
+
+    pa_channel_map_init_extend(&cmap, pd->channels, PA_CHANNEL_MAP_WAVEEX);
 
     st = avformat_new_stream(s, NULL);
 
@@ -158,8 +161,8 @@
 
     attr.fragsize = pd->fragment_size;
 
-    if (s->filename[0] != '\0' && strcmp(s->filename, "default"))
-        device = s->filename;
+    if (s->url[0] != '\0' && strcmp(s->url, "default"))
+        device = s->url;
 
     if (!(pd->mainloop = pa_threaded_mainloop_new())) {
         pulse_close(s);
@@ -202,7 +205,7 @@
         pa_threaded_mainloop_wait(pd->mainloop);
     }
 
-    if (!(pd->stream = pa_stream_new(pd->context, pd->stream_name, &ss, NULL))) {
+    if (!(pd->stream = pa_stream_new(pd->context, pd->stream_name, &ss, &cmap))) {
         ret = AVERROR(pa_context_errno(pd->context));
         goto unlock_and_fail;
     }

diff --git a/libavdevice/pulse_audio_enc.c b/libavdevice/pulse_audio_enc.c
index 0efcf0f..d430b77 100644
--- a/libavdevice/pulse_audio_enc.c
+++ b/libavdevice/pulse_audio_enc.c

@@ -459,8 +459,8 @@
     st = h->streams[0];
 
     if (!stream_name) {
-        if (h->filename[0])
-            stream_name = h->filename;
+        if (h->url[0])
+            stream_name = h->url;
         else
             stream_name = "Playback";
     }

diff --git a/libavdevice/sdl2.c b/libavdevice/sdl2.c
index 5d9e91e..da51430 100644
--- a/libavdevice/sdl2.c
+++ b/libavdevice/sdl2.c

@@ -42,6 +42,7 @@
     int window_width, window_height;  /**< size of the window */
     int window_fullscreen;
     int window_borderless;
+    int enable_quit_action;
 
     SDL_Texture *texture;
     int texture_fmt;
@@ -165,7 +166,7 @@
     int flags  = 0;
 
     if (!sdl->window_title)
-        sdl->window_title = av_strdup(s->filename);
+        sdl->window_title = av_strdup(s->url);
 
     if (SDL_WasInit(SDL_INIT_VIDEO)) {
         av_log(s, AV_LOG_WARNING,
@@ -206,9 +207,7 @@
         }
     }
 
-    sdl->window_width = sdl->texture_rect.w = codecpar->width;
-    sdl->window_height = sdl->texture_rect.h = codecpar->height;
-    sdl->texture_rect.x = sdl->texture_rect.y = 0;
+    compute_texture_rect(s);
 
     if (SDL_CreateWindowAndRenderer(sdl->window_width, sdl->window_height,
                                     flags, &sdl->window, &sdl->renderer) != 0){
@@ -219,7 +218,7 @@
     SDL_SetWindowTitle(sdl->window, sdl->window_title);
 
     sdl->texture = SDL_CreateTexture(sdl->renderer, sdl->texture_fmt, SDL_TEXTUREACCESS_STREAMING,
-                                     sdl->window_width, sdl->window_height);
+                                     codecpar->width, codecpar->height);
 
     if (!sdl->texture) {
         av_log(sdl, AV_LOG_ERROR, "Unable to set create mode: %s\n", SDL_GetError());
@@ -279,7 +278,7 @@
         }
     }
 
-    if (quit) {
+    if (quit && sdl->enable_quit_action) {
         sdl2_write_trailer(s);
         return AVERROR(EIO);
     }
@@ -340,6 +339,7 @@
     { "window_size",       "set SDL window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
     { "window_fullscreen", "set SDL window fullscreen",  OFFSET(window_fullscreen), AV_OPT_TYPE_BOOL,  { .i64 = 0 },    0, 1, AV_OPT_FLAG_ENCODING_PARAM },
     { "window_borderless", "set SDL window border off",  OFFSET(window_borderless), AV_OPT_TYPE_BOOL,  { .i64 = 0 },    0, 1, AV_OPT_FLAG_ENCODING_PARAM },
+    { "window_enable_quit", "set if quit action is available", OFFSET(enable_quit_action), AV_OPT_TYPE_INT, {.i64=1},   0, 1, AV_OPT_FLAG_ENCODING_PARAM },
     { NULL },
 };
 

diff --git a/libavdevice/sndio_dec.c b/libavdevice/sndio_dec.c
index 2d13232..ebb485a 100644
--- a/libavdevice/sndio_dec.c
+++ b/libavdevice/sndio_dec.c

@@ -41,7 +41,7 @@
     if (!st)
         return AVERROR(ENOMEM);
 
-    ret = ff_sndio_open(s1, 0, s1->filename);
+    ret = ff_sndio_open(s1, 0, s1->url);
     if (ret < 0)
         return ret;
 

diff --git a/libavdevice/sndio_enc.c b/libavdevice/sndio_enc.c
index 47f500d..f6dd290 100644
--- a/libavdevice/sndio_enc.c
+++ b/libavdevice/sndio_enc.c

@@ -38,7 +38,7 @@
     s->sample_rate = st->codecpar->sample_rate;
     s->channels    = st->codecpar->channels;
 
-    ret = ff_sndio_open(s1, 1, s1->filename);
+    ret = ff_sndio_open(s1, 1, s1->url);
 
     return ret;
 }

diff --git a/libavdevice/v4l2-common.c b/libavdevice/v4l2-common.c
index 196c09b..2d6bfac 100644
--- a/libavdevice/v4l2-common.c
+++ b/libavdevice/v4l2-common.c

@@ -34,6 +34,12 @@
     { AV_PIX_FMT_RGB565BE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB565X },
     { AV_PIX_FMT_BGR24,   AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_BGR24   },
     { AV_PIX_FMT_RGB24,   AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB24   },
+#ifdef V4L2_PIX_FMT_XBGR32
+    { AV_PIX_FMT_BGR0,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_XBGR32  },
+    { AV_PIX_FMT_0RGB,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_XRGB32  },
+    { AV_PIX_FMT_BGRA,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_ABGR32  },
+    { AV_PIX_FMT_ARGB,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_ARGB32  },
+#endif
     { AV_PIX_FMT_BGR0,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_BGR32   },
     { AV_PIX_FMT_0RGB,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB32   },
     { AV_PIX_FMT_GRAY8,   AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_GREY    },

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index f087bad..10a0ff0 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c

@@ -106,7 +106,7 @@
     int index;
 };
 
-static int device_open(AVFormatContext *ctx)
+static int device_open(AVFormatContext *ctx, const char* device_path)
 {
     struct video_data *s = ctx->priv_data;
     struct v4l2_capability cap;
@@ -147,11 +147,11 @@
         flags |= O_NONBLOCK;
     }
 
-    fd = v4l2_open(ctx->filename, flags, 0);
+    fd = v4l2_open(device_path, flags, 0);
     if (fd < 0) {
         err = AVERROR(errno);
         av_log(ctx, AV_LOG_ERROR, "Cannot open video device %s: %s\n",
-               ctx->filename, av_err2str(err));
+               device_path, av_err2str(err));
         return err;
     }
 
@@ -840,7 +840,7 @@
         v4l2_log_file = fopen("/dev/null", "w");
 #endif
 
-    s->fd = device_open(ctx);
+    s->fd = device_open(ctx, ctx->url);
     if (s->fd < 0)
         return s->fd;
 
@@ -1042,11 +1042,13 @@
         return ret;
     }
     while ((entry = readdir(dir))) {
+        char device_name[256];
+
         if (!v4l2_is_v4l_dev(entry->d_name))
             continue;
 
-        snprintf(ctx->filename, sizeof(ctx->filename), "/dev/%s", entry->d_name);
-        if ((s->fd = device_open(ctx)) < 0)
+        snprintf(device_name, sizeof(device_name), "/dev/%s", entry->d_name);
+        if ((s->fd = device_open(ctx, device_name)) < 0)
             continue;
 
         if (v4l2_ioctl(s->fd, VIDIOC_QUERYCAP, &cap) < 0) {
@@ -1060,7 +1062,7 @@
             ret = AVERROR(ENOMEM);
             goto fail;
         }
-        device->device_name = av_strdup(ctx->filename);
+        device->device_name = av_strdup(device_name);
         device->device_description = av_strdup(cap.card);
         if (!device->device_name || !device->device_description) {
             ret = AVERROR(ENOMEM);

diff --git a/libavdevice/v4l2enc.c b/libavdevice/v4l2enc.c
index faf6e07..1c36f81 100644
--- a/libavdevice/v4l2enc.c
+++ b/libavdevice/v4l2enc.c

@@ -39,10 +39,10 @@
     if (s1->flags & AVFMT_FLAG_NONBLOCK)
         flags |= O_NONBLOCK;
 
-    s->fd = open(s1->filename, flags);
+    s->fd = open(s1->url, flags);
     if (s->fd < 0) {
         res = AVERROR(errno);
-        av_log(s1, AV_LOG_ERROR, "Unable to open V4L2 device '%s'\n", s1->filename);
+        av_log(s1, AV_LOG_ERROR, "Unable to open V4L2 device '%s'\n", s1->url);
         return res;
     }
 
@@ -106,7 +106,7 @@
 };
 
 AVOutputFormat ff_v4l2_muxer = {
-    .name           = "v4l2",
+    .name           = "video4linux2,v4l2",
     .long_name      = NULL_IF_CONFIG_SMALL("Video4Linux2 output device"),
     .priv_data_size = sizeof(V4L2Context),
     .audio_codec    = AV_CODEC_ID_NONE,

diff --git a/libavdevice/version.h b/libavdevice/version.h
index 9d90087..e6ee009 100644
--- a/libavdevice/version.h
+++ b/libavdevice/version.h

@@ -27,9 +27,9 @@
 
 #include "libavutil/version.h"
 
-#define LIBAVDEVICE_VERSION_MAJOR  57
-#define LIBAVDEVICE_VERSION_MINOR  10
-#define LIBAVDEVICE_VERSION_MICRO 100
+#define LIBAVDEVICE_VERSION_MAJOR  58
+#define LIBAVDEVICE_VERSION_MINOR   4
+#define LIBAVDEVICE_VERSION_MICRO 105
 
 #define LIBAVDEVICE_VERSION_INT AV_VERSION_INT(LIBAVDEVICE_VERSION_MAJOR, \
                                                LIBAVDEVICE_VERSION_MINOR, \

diff --git a/libavdevice/vfwcap.c b/libavdevice/vfwcap.c
index f03d38a..e2ab276 100644
--- a/libavdevice/vfwcap.c
+++ b/libavdevice/vfwcap.c

@@ -256,7 +256,7 @@
     int ret;
     AVRational framerate_q;
 
-    if (!strcmp(s->filename, "list")) {
+    if (!strcmp(s->url, "list")) {
         for (devnum = 0; devnum <= 9; devnum++) {
             char driver_name[256];
             char driver_ver[256];
@@ -279,7 +279,7 @@
     }
 
     /* If atoi fails, devnum==0 and the default device is used */
-    devnum = atoi(s->filename);
+    devnum = atoi(s->url);
 
     ret = SendMessage(ctx->hwnd, WM_CAP_DRIVER_CONNECT, devnum, 0);
     if(!ret) {
@@ -328,11 +328,14 @@
     }
 
     if (ctx->video_size) {
-        ret = av_parse_video_size(&bi->bmiHeader.biWidth, &bi->bmiHeader.biHeight, ctx->video_size);
+        int w, h;
+        ret = av_parse_video_size(&w, &h, ctx->video_size);
         if (ret < 0) {
             av_log(s, AV_LOG_ERROR, "Couldn't parse video size.\n");
             goto fail;
         }
+        bi->bmiHeader.biWidth  = w;
+        bi->bmiHeader.biHeight = h;
     }
 
     if (0) {

diff --git a/libavdevice/xcbgrab.c b/libavdevice/xcbgrab.c
index 1968fe0..6d142ab 100644
--- a/libavdevice/xcbgrab.c
+++ b/libavdevice/xcbgrab.c

@@ -629,14 +629,14 @@
     XCBGrabContext *c = s->priv_data;
     int screen_num, ret;
     const xcb_setup_t *setup;
-    char *display_name = av_strdup(s->filename);
+    char *display_name = av_strdup(s->url);
 
     if (!display_name)
         return AVERROR(ENOMEM);
 
-    if (!sscanf(s->filename, "%[^+]+%d,%d", display_name, &c->x, &c->y)) {
+    if (!sscanf(s->url, "%[^+]+%d,%d", display_name, &c->x, &c->y)) {
         *display_name = 0;
-        sscanf(s->filename, "+%d,%d", &c->x, &c->y);
+        sscanf(s->url, "+%d,%d", &c->x, &c->y);
     }
 
     c->conn = xcb_connect(display_name[0] ? display_name : NULL, &screen_num);
@@ -644,7 +644,7 @@
 
     if ((ret = xcb_connection_has_error(c->conn))) {
         av_log(s, AV_LOG_ERROR, "Cannot open display %s, error %d.\n",
-               s->filename[0] ? s->filename : "default", ret);
+               s->url[0] ? s->url : "default", ret);
         return AVERROR(EIO);
     }
 

diff --git a/libavdevice/xv.c b/libavdevice/xv.c
index 185de75..c3ed2e4 100644
--- a/libavdevice/xv.c
+++ b/libavdevice/xv.c

@@ -151,7 +151,7 @@
                                          xv->window_width, xv->window_height,
                                          0, 0, 0);
         if (!xv->window_title) {
-            if (!(xv->window_title = av_strdup(s->filename))) {
+            if (!(xv->window_title = av_strdup(s->url))) {
                 ret = AVERROR(ENOMEM);
                 goto fail;
             }

diff --git a/libavfilter/.gitignore b/libavfilter/.gitignore
new file mode 100644
index 0000000..26bddeb
--- /dev/null
+++ b/libavfilter/.gitignore

@@ -0,0 +1 @@
+/filter_list.c

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index d2f0495..62cc2f5 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile

@@ -2,7 +2,6 @@
 DESC = FFmpeg audio/video filtering library
 
 HEADERS = avfilter.h                                                    \
-          avfiltergraph.h                                               \
           buffersink.h                                                  \
           buffersrc.h                                                   \
           version.h                                                     \
@@ -20,27 +19,40 @@
        framequeue.o                                                     \
        graphdump.o                                                      \
        graphparser.o                                                    \
-       opencl_allkernels.o                                              \
        transform.o                                                      \
        video.o                                                          \
 
 OBJS-$(HAVE_THREADS)                         += pthread.o
 
+# subsystems
+OBJS-$(CONFIG_QSVVPP)                        += qsvvpp.o
+DNN-OBJS-$(CONFIG_LIBTENSORFLOW)             += dnn_backend_tf.o
+OBJS-$(CONFIG_DNN)                           += dnn_interface.o dnn_backend_native.o $(DNN-OBJS-yes)
+
 # audio filters
 OBJS-$(CONFIG_ABENCH_FILTER)                 += f_bench.o
 OBJS-$(CONFIG_ACOMPRESSOR_FILTER)            += af_sidechaincompress.o
+OBJS-$(CONFIG_ACONTRAST_FILTER)              += af_acontrast.o
 OBJS-$(CONFIG_ACOPY_FILTER)                  += af_acopy.o
 OBJS-$(CONFIG_ACROSSFADE_FILTER)             += af_afade.o
+OBJS-$(CONFIG_ACROSSOVER_FILTER)             += af_acrossover.o
 OBJS-$(CONFIG_ACRUSHER_FILTER)               += af_acrusher.o
+OBJS-$(CONFIG_ACUE_FILTER)                   += f_cue.o
+OBJS-$(CONFIG_ADECLICK_FILTER)               += af_adeclick.o
+OBJS-$(CONFIG_ADECLIP_FILTER)                += af_adeclick.o
 OBJS-$(CONFIG_ADELAY_FILTER)                 += af_adelay.o
+OBJS-$(CONFIG_ADERIVATIVE_FILTER)            += af_aderivative.o
 OBJS-$(CONFIG_AECHO_FILTER)                  += af_aecho.o
 OBJS-$(CONFIG_AEMPHASIS_FILTER)              += af_aemphasis.o
 OBJS-$(CONFIG_AEVAL_FILTER)                  += aeval.o
 OBJS-$(CONFIG_AFADE_FILTER)                  += af_afade.o
+OBJS-$(CONFIG_AFFTDN_FILTER)                 += af_afftdn.o
 OBJS-$(CONFIG_AFFTFILT_FILTER)               += af_afftfilt.o
 OBJS-$(CONFIG_AFIR_FILTER)                   += af_afir.o
 OBJS-$(CONFIG_AFORMAT_FILTER)                += af_aformat.o
 OBJS-$(CONFIG_AGATE_FILTER)                  += af_agate.o
+OBJS-$(CONFIG_AIIR_FILTER)                   += af_aiir.o
+OBJS-$(CONFIG_AINTEGRAL_FILTER)              += af_aderivative.o
 OBJS-$(CONFIG_AINTERLEAVE_FILTER)            += f_interleave.o
 OBJS-$(CONFIG_ALIMITER_FILTER)               += af_alimiter.o
 OBJS-$(CONFIG_ALLPASS_FILTER)                += af_biquads.o
@@ -48,6 +60,7 @@
 OBJS-$(CONFIG_AMERGE_FILTER)                 += af_amerge.o
 OBJS-$(CONFIG_AMETADATA_FILTER)              += f_metadata.o
 OBJS-$(CONFIG_AMIX_FILTER)                   += af_amix.o
+OBJS-$(CONFIG_AMULTIPLY_FILTER)              += af_amultiply.o
 OBJS-$(CONFIG_ANEQUALIZER_FILTER)            += af_anequalizer.o
 OBJS-$(CONFIG_ANULL_FILTER)                  += af_anull.o
 OBJS-$(CONFIG_APAD_FILTER)                   += af_apad.o
@@ -84,6 +97,7 @@
 OBJS-$(CONFIG_CROSSFEED_FILTER)              += af_crossfeed.o
 OBJS-$(CONFIG_CRYSTALIZER_FILTER)            += af_crystalizer.o
 OBJS-$(CONFIG_DCSHIFT_FILTER)                += af_dcshift.o
+OBJS-$(CONFIG_DRMETER_FILTER)                += af_drmeter.o
 OBJS-$(CONFIG_DYNAUDNORM_FILTER)             += af_dynaudnorm.o
 OBJS-$(CONFIG_EARWAX_FILTER)                 += af_earwax.o
 OBJS-$(CONFIG_EBUR128_FILTER)                += f_ebur128.o
@@ -95,10 +109,14 @@
 OBJS-$(CONFIG_HDCD_FILTER)                   += af_hdcd.o
 OBJS-$(CONFIG_HEADPHONE_FILTER)              += af_headphone.o
 OBJS-$(CONFIG_HIGHPASS_FILTER)               += af_biquads.o
+OBJS-$(CONFIG_HIGHSHELF_FILTER)              += af_biquads.o
 OBJS-$(CONFIG_JOIN_FILTER)                   += af_join.o
 OBJS-$(CONFIG_LADSPA_FILTER)                 += af_ladspa.o
 OBJS-$(CONFIG_LOUDNORM_FILTER)               += af_loudnorm.o ebur128.o
 OBJS-$(CONFIG_LOWPASS_FILTER)                += af_biquads.o
+OBJS-$(CONFIG_LOWSHELF_FILTER)               += af_biquads.o
+OBJS-$(CONFIG_LV2_FILTER)                    += af_lv2.o
+OBJS-$(CONFIG_MCOMPAND_FILTER)               += af_mcompand.o
 OBJS-$(CONFIG_PAN_FILTER)                    += af_pan.o
 OBJS-$(CONFIG_REPLAYGAIN_FILTER)             += af_replaygain.o
 OBJS-$(CONFIG_RESAMPLE_FILTER)               += af_resample.o
@@ -122,6 +140,7 @@
 OBJS-$(CONFIG_ANOISESRC_FILTER)              += asrc_anoisesrc.o
 OBJS-$(CONFIG_ANULLSRC_FILTER)               += asrc_anullsrc.o
 OBJS-$(CONFIG_FLITE_FILTER)                  += asrc_flite.o
+OBJS-$(CONFIG_HILBERT_FILTER)                += asrc_hilbert.o
 OBJS-$(CONFIG_SINE_FILTER)                   += asrc_sine.o
 
 OBJS-$(CONFIG_ANULLSINK_FILTER)              += asink_anullsink.o
@@ -129,16 +148,22 @@
 # video filters
 OBJS-$(CONFIG_ALPHAEXTRACT_FILTER)           += vf_extractplanes.o
 OBJS-$(CONFIG_ALPHAMERGE_FILTER)             += vf_alphamerge.o
+OBJS-$(CONFIG_AMPLIFY_FILTER)                += vf_amplify.o
 OBJS-$(CONFIG_ASS_FILTER)                    += vf_subtitles.o
 OBJS-$(CONFIG_ATADENOISE_FILTER)             += vf_atadenoise.o
 OBJS-$(CONFIG_AVGBLUR_FILTER)                += vf_avgblur.o
+OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER)         += vf_avgblur_opencl.o opencl.o \
+                                                opencl/avgblur.o boxblur.o
 OBJS-$(CONFIG_BBOX_FILTER)                   += bbox.o vf_bbox.o
 OBJS-$(CONFIG_BENCH_FILTER)                  += f_bench.o
 OBJS-$(CONFIG_BITPLANENOISE_FILTER)          += vf_bitplanenoise.o
 OBJS-$(CONFIG_BLACKDETECT_FILTER)            += vf_blackdetect.o
 OBJS-$(CONFIG_BLACKFRAME_FILTER)             += vf_blackframe.o
 OBJS-$(CONFIG_BLEND_FILTER)                  += vf_blend.o framesync.o
-OBJS-$(CONFIG_BOXBLUR_FILTER)                += vf_boxblur.o
+OBJS-$(CONFIG_BM3D_FILTER)                   += vf_bm3d.o
+OBJS-$(CONFIG_BOXBLUR_FILTER)                += vf_boxblur.o boxblur.o
+OBJS-$(CONFIG_BOXBLUR_OPENCL_FILTER)         += vf_avgblur_opencl.o opencl.o \
+                                                opencl/avgblur.o boxblur.o
 OBJS-$(CONFIG_BWDIF_FILTER)                  += vf_bwdif.o
 OBJS-$(CONFIG_CHROMAKEY_FILTER)              += vf_chromakey.o
 OBJS-$(CONFIG_CIESCOPE_FILTER)               += vf_ciescope.o
@@ -148,29 +173,37 @@
 OBJS-$(CONFIG_COLORKEY_FILTER)               += vf_colorkey.o
 OBJS-$(CONFIG_COLORLEVELS_FILTER)            += vf_colorlevels.o
 OBJS-$(CONFIG_COLORMATRIX_FILTER)            += vf_colormatrix.o
-OBJS-$(CONFIG_COLORSPACE_FILTER)             += vf_colorspace.o colorspacedsp.o
+OBJS-$(CONFIG_COLORSPACE_FILTER)             += vf_colorspace.o colorspace.o colorspacedsp.o
 OBJS-$(CONFIG_CONVOLUTION_FILTER)            += vf_convolution.o
+OBJS-$(CONFIG_CONVOLUTION_OPENCL_FILTER)     += vf_convolution_opencl.o opencl.o \
+                                                opencl/convolution.o
 OBJS-$(CONFIG_CONVOLVE_FILTER)               += vf_convolve.o framesync.o
 OBJS-$(CONFIG_COPY_FILTER)                   += vf_copy.o
 OBJS-$(CONFIG_COREIMAGE_FILTER)              += vf_coreimage.o
 OBJS-$(CONFIG_COVER_RECT_FILTER)             += vf_cover_rect.o lavfutils.o
 OBJS-$(CONFIG_CROP_FILTER)                   += vf_crop.o
 OBJS-$(CONFIG_CROPDETECT_FILTER)             += vf_cropdetect.o
+OBJS-$(CONFIG_CUE_FILTER)                    += f_cue.o
 OBJS-$(CONFIG_CURVES_FILTER)                 += vf_curves.o
 OBJS-$(CONFIG_DATASCOPE_FILTER)              += vf_datascope.o
 OBJS-$(CONFIG_DCTDNOIZ_FILTER)               += vf_dctdnoiz.o
 OBJS-$(CONFIG_DEBAND_FILTER)                 += vf_deband.o
+OBJS-$(CONFIG_DEBLOCK_FILTER)                += vf_deblock.o
 OBJS-$(CONFIG_DECIMATE_FILTER)               += vf_decimate.o
+OBJS-$(CONFIG_DECONVOLVE_FILTER)             += vf_convolve.o framesync.o
 OBJS-$(CONFIG_DEFLATE_FILTER)                += vf_neighbor.o
 OBJS-$(CONFIG_DEFLICKER_FILTER)              += vf_deflicker.o
 OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER)        += vf_deinterlace_qsv.o
-OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER)      += vf_deinterlace_vaapi.o
+OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER)      += vf_deinterlace_vaapi.o vaapi_vpp.o
 OBJS-$(CONFIG_DEJUDDER_FILTER)               += vf_dejudder.o
 OBJS-$(CONFIG_DELOGO_FILTER)                 += vf_delogo.o
+OBJS-$(CONFIG_DENOISE_VAAPI_FILTER)          += vf_misc_vaapi.o vaapi_vpp.o
 OBJS-$(CONFIG_DESHAKE_FILTER)                += vf_deshake.o
 OBJS-$(CONFIG_DESPILL_FILTER)                += vf_despill.o
 OBJS-$(CONFIG_DETELECINE_FILTER)             += vf_detelecine.o
 OBJS-$(CONFIG_DILATION_FILTER)               += vf_neighbor.o
+OBJS-$(CONFIG_DILATION_OPENCL_FILTER)        += vf_neighbor_opencl.o opencl.o \
+                                                opencl/neighbor.o
 OBJS-$(CONFIG_DISPLACE_FILTER)               += vf_displace.o framesync.o
 OBJS-$(CONFIG_DOUBLEWEAVE_FILTER)            += vf_weave.o
 OBJS-$(CONFIG_DRAWBOX_FILTER)                += vf_drawbox.o
@@ -179,15 +212,20 @@
 OBJS-$(CONFIG_DRAWTEXT_FILTER)               += vf_drawtext.o
 OBJS-$(CONFIG_EDGEDETECT_FILTER)             += vf_edgedetect.o
 OBJS-$(CONFIG_ELBG_FILTER)                   += vf_elbg.o
+OBJS-$(CONFIG_ENTROPY_FILTER)                += vf_entropy.o
 OBJS-$(CONFIG_EQ_FILTER)                     += vf_eq.o
 OBJS-$(CONFIG_EROSION_FILTER)                += vf_neighbor.o
+OBJS-$(CONFIG_EROSION_OPENCL_FILTER)         += vf_neighbor_opencl.o opencl.o \
+                                                opencl/neighbor.o
 OBJS-$(CONFIG_EXTRACTPLANES_FILTER)          += vf_extractplanes.o
 OBJS-$(CONFIG_FADE_FILTER)                   += vf_fade.o
+OBJS-$(CONFIG_FFTDNOIZ_FILTER)               += vf_fftdnoiz.o
 OBJS-$(CONFIG_FFTFILT_FILTER)                += vf_fftfilt.o
 OBJS-$(CONFIG_FIELD_FILTER)                  += vf_field.o
 OBJS-$(CONFIG_FIELDHINT_FILTER)              += vf_fieldhint.o
 OBJS-$(CONFIG_FIELDMATCH_FILTER)             += vf_fieldmatch.o
 OBJS-$(CONFIG_FIELDORDER_FILTER)             += vf_fieldorder.o
+OBJS-$(CONFIG_FILLBORDERS_FILTER)            += vf_fillborders.o
 OBJS-$(CONFIG_FIND_RECT_FILTER)              += vf_find_rect.o lavfutils.o
 OBJS-$(CONFIG_FLOODFILL_FILTER)              += vf_floodfill.o
 OBJS-$(CONFIG_FORMAT_FILTER)                 += vf_format.o
@@ -200,6 +238,7 @@
 OBJS-$(CONFIG_GBLUR_FILTER)                  += vf_gblur.o
 OBJS-$(CONFIG_GEQ_FILTER)                    += vf_geq.o
 OBJS-$(CONFIG_GRADFUN_FILTER)                += vf_gradfun.o
+OBJS-$(CONFIG_GREYEDGE_FILTER)               += vf_colorconstancy.o
 OBJS-$(CONFIG_HALDCLUT_FILTER)               += vf_lut3d.o framesync.o
 OBJS-$(CONFIG_HFLIP_FILTER)                  += vf_hflip.o
 OBJS-$(CONFIG_HISTEQ_FILTER)                 += vf_histeq.o
@@ -216,14 +255,16 @@
 OBJS-$(CONFIG_IDET_FILTER)                   += vf_idet.o
 OBJS-$(CONFIG_IL_FILTER)                     += vf_il.o
 OBJS-$(CONFIG_INFLATE_FILTER)                += vf_neighbor.o
-OBJS-$(CONFIG_INTERLACE_FILTER)              += vf_interlace.o
+OBJS-$(CONFIG_INTERLACE_FILTER)              += vf_tinterlace.o
 OBJS-$(CONFIG_INTERLEAVE_FILTER)             += f_interleave.o
 OBJS-$(CONFIG_KERNDEINT_FILTER)              += vf_kerndeint.o
 OBJS-$(CONFIG_LENSCORRECTION_FILTER)         += vf_lenscorrection.o
+OBJS-$(CONFIG_LENSFUN_FILTER)                += vf_lensfun.o
 OBJS-$(CONFIG_LIBVMAF_FILTER)                += vf_libvmaf.o framesync.o
 OBJS-$(CONFIG_LIMITER_FILTER)                += vf_limiter.o
 OBJS-$(CONFIG_LOOP_FILTER)                   += f_loop.o
 OBJS-$(CONFIG_LUMAKEY_FILTER)                += vf_lumakey.o
+OBJS-$(CONFIG_LUT1D_FILTER)                  += vf_lut3d.o
 OBJS-$(CONFIG_LUT_FILTER)                    += vf_lut.o
 OBJS-$(CONFIG_LUT2_FILTER)                   += vf_lut2.o framesync.o
 OBJS-$(CONFIG_LUT3D_FILTER)                  += vf_lut3d.o
@@ -237,18 +278,22 @@
 OBJS-$(CONFIG_METADATA_FILTER)               += f_metadata.o
 OBJS-$(CONFIG_MIDEQUALIZER_FILTER)           += vf_midequalizer.o framesync.o
 OBJS-$(CONFIG_MINTERPOLATE_FILTER)           += vf_minterpolate.o motion_estimation.o
+OBJS-$(CONFIG_MIX_FILTER)                    += vf_mix.o
 OBJS-$(CONFIG_MPDECIMATE_FILTER)             += vf_mpdecimate.o
 OBJS-$(CONFIG_NEGATE_FILTER)                 += vf_lut.o
 OBJS-$(CONFIG_NLMEANS_FILTER)                += vf_nlmeans.o
 OBJS-$(CONFIG_NNEDI_FILTER)                  += vf_nnedi.o
 OBJS-$(CONFIG_NOFORMAT_FILTER)               += vf_format.o
 OBJS-$(CONFIG_NOISE_FILTER)                  += vf_noise.o
+OBJS-$(CONFIG_NORMALIZE_FILTER)              += vf_normalize.o
 OBJS-$(CONFIG_NULL_FILTER)                   += vf_null.o
 OBJS-$(CONFIG_OCR_FILTER)                    += vf_ocr.o
 OBJS-$(CONFIG_OCV_FILTER)                    += vf_libopencv.o
-OBJS-$(CONFIG_OPENCL)                        += deshake_opencl.o unsharp_opencl.o
 OBJS-$(CONFIG_OSCILLOSCOPE_FILTER)           += vf_datascope.o
 OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o framesync.o
+OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER)         += vf_overlay_opencl.o opencl.o \
+                                                opencl/overlay.o framesync.o
+OBJS-$(CONFIG_OVERLAY_QSV_FILTER)            += vf_overlay_qsv.o framesync.o
 OBJS-$(CONFIG_OWDENOISE_FILTER)              += vf_owdenoise.o
 OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o
 OBJS-$(CONFIG_PALETTEGEN_FILTER)             += vf_palettegen.o
@@ -262,6 +307,10 @@
 OBJS-$(CONFIG_PP7_FILTER)                    += vf_pp7.o
 OBJS-$(CONFIG_PREMULTIPLY_FILTER)            += vf_premultiply.o framesync.o
 OBJS-$(CONFIG_PREWITT_FILTER)                += vf_convolution.o
+OBJS-$(CONFIG_PREWITT_OPENCL_FILTER)         += vf_convolution_opencl.o opencl.o \
+                                                opencl/convolution.o
+OBJS-$(CONFIG_PROCAMP_VAAPI_FILTER)          += vf_procamp_vaapi.o vaapi_vpp.o
+OBJS-$(CONFIG_PROGRAM_OPENCL_FILTER)         += vf_program_opencl.o opencl.o framesync.o
 OBJS-$(CONFIG_PSEUDOCOLOR_FILTER)            += vf_pseudocolor.o
 OBJS-$(CONFIG_PSNR_FILTER)                   += vf_psnr.o framesync.o
 OBJS-$(CONFIG_PULLUP_FILTER)                 += vf_pullup.o
@@ -276,13 +325,15 @@
 OBJS-$(CONFIG_REPEATFIELDS_FILTER)           += vf_repeatfields.o
 OBJS-$(CONFIG_REVERSE_FILTER)                += f_reverse.o
 OBJS-$(CONFIG_ROBERTS_FILTER)                += vf_convolution.o
+OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER)         += vf_convolution_opencl.o opencl.o \
+                                                opencl/convolution.o
 OBJS-$(CONFIG_ROTATE_FILTER)                 += vf_rotate.o
 OBJS-$(CONFIG_SAB_FILTER)                    += vf_sab.o
 OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o scale.o
 OBJS-$(CONFIG_SCALE_CUDA_FILTER)             += vf_scale_cuda.o vf_scale_cuda.ptx.o
 OBJS-$(CONFIG_SCALE_NPP_FILTER)              += vf_scale_npp.o scale.o
 OBJS-$(CONFIG_SCALE_QSV_FILTER)              += vf_scale_qsv.o
-OBJS-$(CONFIG_SCALE_VAAPI_FILTER)            += vf_scale_vaapi.o scale.o
+OBJS-$(CONFIG_SCALE_VAAPI_FILTER)            += vf_scale_vaapi.o scale.o vaapi_vpp.o
 OBJS-$(CONFIG_SCALE2REF_FILTER)              += vf_scale.o scale.o
 OBJS-$(CONFIG_SELECT_FILTER)                 += f_select.o
 OBJS-$(CONFIG_SELECTIVECOLOR_FILTER)         += vf_selectivecolor.o
@@ -291,8 +342,10 @@
 OBJS-$(CONFIG_SETDAR_FILTER)                 += vf_aspect.o
 OBJS-$(CONFIG_SETFIELD_FILTER)               += vf_setfield.o
 OBJS-$(CONFIG_SETPTS_FILTER)                 += setpts.o
+OBJS-$(CONFIG_SETRANGE_FILTER)               += vf_setparams.o
 OBJS-$(CONFIG_SETSAR_FILTER)                 += vf_aspect.o
 OBJS-$(CONFIG_SETTB_FILTER)                  += settb.o
+OBJS-$(CONFIG_SHARPNESS_VAAPI_FILTER)        += vf_misc_vaapi.o vaapi_vpp.o
 OBJS-$(CONFIG_SHOWINFO_FILTER)               += vf_showinfo.o
 OBJS-$(CONFIG_SHOWPALETTE_FILTER)            += vf_showpalette.o
 OBJS-$(CONFIG_SHUFFLEFRAMES_FILTER)          += vf_shuffleframes.o
@@ -302,8 +355,11 @@
 OBJS-$(CONFIG_SIGNATURE_FILTER)              += vf_signature.o
 OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
+OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
+                                                opencl/convolution.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o
+OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
 OBJS-$(CONFIG_SSIM_FILTER)                   += vf_ssim.o framesync.o
 OBJS-$(CONFIG_STEREO3D_FILTER)               += vf_stereo3d.o
 OBJS-$(CONFIG_STREAMSELECT_FILTER)           += f_streamselect.o framesync.o
@@ -319,19 +375,27 @@
 OBJS-$(CONFIG_TILE_FILTER)                   += vf_tile.o
 OBJS-$(CONFIG_TINTERLACE_FILTER)             += vf_tinterlace.o
 OBJS-$(CONFIG_TLUT2_FILTER)                  += vf_lut2.o framesync.o
-OBJS-$(CONFIG_TONEMAP_FILTER)                += vf_tonemap.o
+OBJS-$(CONFIG_TMIX_FILTER)                   += vf_mix.o framesync.o
+OBJS-$(CONFIG_TONEMAP_FILTER)                += vf_tonemap.o colorspace.o
+OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER)         += vf_tonemap_opencl.o colorspace.o opencl.o \
+                                                opencl/tonemap.o opencl/colorspace_common.o
 OBJS-$(CONFIG_TRANSPOSE_FILTER)              += vf_transpose.o
+OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER)          += vf_transpose_npp.o
 OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o
 OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)          += vf_premultiply.o framesync.o
 OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
+OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER)         += vf_unsharp_opencl.o opencl.o \
+                                                opencl/unsharp.o
 OBJS-$(CONFIG_USPP_FILTER)                   += vf_uspp.o
 OBJS-$(CONFIG_VAGUEDENOISER_FILTER)          += vf_vaguedenoiser.o
 OBJS-$(CONFIG_VECTORSCOPE_FILTER)            += vf_vectorscope.o
 OBJS-$(CONFIG_VFLIP_FILTER)                  += vf_vflip.o
+OBJS-$(CONFIG_VFRDET_FILTER)                 += vf_vfrdet.o
 OBJS-$(CONFIG_VIDSTABDETECT_FILTER)          += vidstabutils.o vf_vidstabdetect.o
 OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER)       += vidstabutils.o vf_vidstabtransform.o
 OBJS-$(CONFIG_VIGNETTE_FILTER)               += vf_vignette.o
 OBJS-$(CONFIG_VMAFMOTION_FILTER)             += vf_vmafmotion.o framesync.o
+OBJS-$(CONFIG_VPP_QSV_FILTER)                += vf_vpp_qsv.o
 OBJS-$(CONFIG_VSTACK_FILTER)                 += vf_stack.o framesync.o
 OBJS-$(CONFIG_W3FDIF_FILTER)                 += vf_w3fdif.o
 OBJS-$(CONFIG_WAVEFORM_FILTER)               += vf_waveform.o
@@ -353,6 +417,9 @@
 OBJS-$(CONFIG_MANDELBROT_FILTER)             += vsrc_mandelbrot.o
 OBJS-$(CONFIG_MPTESTSRC_FILTER)              += vsrc_mptestsrc.o
 OBJS-$(CONFIG_NULLSRC_FILTER)                += vsrc_testsrc.o
+OBJS-$(CONFIG_OPENCLSRC_FILTER)              += vf_program_opencl.o opencl.o
+OBJS-$(CONFIG_PAL75BARS_FILTER)              += vsrc_testsrc.o
+OBJS-$(CONFIG_PAL100BARS_FILTER)             += vsrc_testsrc.o
 OBJS-$(CONFIG_RGBTESTSRC_FILTER)             += vsrc_testsrc.o
 OBJS-$(CONFIG_SMPTEBARS_FILTER)              += vsrc_testsrc.o
 OBJS-$(CONFIG_SMPTEHDBARS_FILTER)            += vsrc_testsrc.o
@@ -386,10 +453,13 @@
 SLIBOBJS-$(HAVE_GNU_WINDRES)                 += avfilterres.o
 
 SKIPHEADERS-$(CONFIG_LIBVIDSTAB)             += vidstabutils.h
-SKIPHEADERS-$(CONFIG_OPENCL)                 += opencl_internal.h deshake_opencl_kernel.h unsharp_opencl_kernel.h
 
 OBJS-$(CONFIG_SHARED)                        += log2_tab.o
 
+SKIPHEADERS-$(CONFIG_QSVVPP)                 += qsvvpp.h
+SKIPHEADERS-$(CONFIG_OPENCL)                 += opencl.h
+SKIPHEADERS-$(CONFIG_VAAPI)                  += vaapi_vpp.h
+
 TOOLS     = graph2dot
 TESTPROGS = drawutils filtfmts formats integral
 
@@ -397,3 +467,9 @@
 
 clean::
 	$(RM) $(CLEANSUFFIXES:%=libavfilter/libmpcodecs/%)
+
+OPENCL = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavfilter/opencl/*.cl))
+.SECONDARY: $(OPENCL:.cl=.c)
+libavfilter/opencl/%.c: TAG = OPENCL
+libavfilter/opencl/%.c: $(SRC_PATH)/libavfilter/opencl/%.cl
+	$(M)$(SRC_PATH)/tools/cl2c $< $@

diff --git a/libavfilter/aarch64/Makefile b/libavfilter/aarch64/Makefile
new file mode 100644
index 0000000..b58daa3
--- /dev/null
+++ b/libavfilter/aarch64/Makefile

@@ -0,0 +1,3 @@
+OBJS-$(CONFIG_NLMEANS_FILTER)                += aarch64/vf_nlmeans_init.o
+
+NEON-OBJS-$(CONFIG_NLMEANS_FILTER)           += aarch64/vf_nlmeans_neon.o

diff --git a/libavfilter/aarch64/vf_nlmeans_init.c b/libavfilter/aarch64/vf_nlmeans_init.c
new file mode 100644
index 0000000..a1edefb
--- /dev/null
+++ b/libavfilter/aarch64/vf_nlmeans_init.c

@@ -0,0 +1,33 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/cpu.h"
+#include "libavfilter/vf_nlmeans.h"
+
+void ff_compute_safe_ssd_integral_image_neon(uint32_t *dst, ptrdiff_t dst_linesize_32,
+                                             const uint8_t *s1, ptrdiff_t linesize1,
+                                             const uint8_t *s2, ptrdiff_t linesize2,
+                                             int w, int h);
+
+av_cold void ff_nlmeans_init_aarch64(NLMeansDSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags))
+        dsp->compute_safe_ssd_integral_image = ff_compute_safe_ssd_integral_image_neon;
+}

diff --git a/libavfilter/aarch64/vf_nlmeans_neon.S b/libavfilter/aarch64/vf_nlmeans_neon.S
new file mode 100644
index 0000000..e69b0dd
--- /dev/null
+++ b/libavfilter/aarch64/vf_nlmeans_neon.S

@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2018 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+// acc_sum_store(ABCD) = {X+A, X+A+B, X+A+B+C, X+A+B+C+D}
+.macro acc_sum_store x, xb
+        dup             v24.4S, v24.S[3]                                // ...X -> XXXX
+        ext             v25.16B, v26.16B, \xb, #12                      // ext(0000,ABCD,12)=0ABC
+        add             v24.4S, v24.4S, \x                              // XXXX+ABCD={X+A,X+B,X+C,X+D}
+        add             v24.4S, v24.4S, v25.4S                          // {X+A,X+B+A,X+C+B,X+D+C}       (+0ABC)
+        ext             v25.16B, v26.16B, v25.16B, #12                  // ext(0000,0ABC,12)=00AB
+        add             v24.4S, v24.4S, v25.4S                          // {X+A,X+B+A,X+C+B+A,X+D+C+B}   (+00AB)
+        ext             v25.16B, v26.16B, v25.16B, #12                  // ext(0000,00AB,12)=000A
+        add             v24.4S, v24.4S, v25.4S                          // {X+A,X+B+A,X+C+B+A,X+D+C+B+A} (+000A)
+        st1             {v24.4S}, [x0], #16                             // write 4x32-bit final values
+.endm
+
+function ff_compute_safe_ssd_integral_image_neon, export=1
+        movi            v26.4S, #0                                      // used as zero for the "rotations" in acc_sum_store
+        sub             x3, x3, w6, UXTW                                // s1 padding (s1_linesize - w)
+        sub             x5, x5, w6, UXTW                                // s2 padding (s2_linesize - w)
+        sub             x9, x0, w1, UXTW #2                             // dst_top
+        sub             x1, x1, w6, UXTW                                // dst padding (dst_linesize_32 - w)
+        lsl             x1, x1, #2                                      // dst padding expressed in bytes
+1:      mov             w10, w6                                         // width copy for each line
+        sub             x0, x0, #16                                     // beginning of the dst line minus 4 sums
+        sub             x8, x9, #4                                      // dst_top-1
+        ld1             {v24.4S}, [x0], #16                             // load ...X (contextual last sums)
+2:      ld1             {v0.16B}, [x2], #16                             // s1[x + 0..15]
+        ld1             {v1.16B}, [x4], #16                             // s2[x + 0..15]
+        ld1             {v16.4S,v17.4S}, [x8], #32                      // dst_top[x + 0..7 - 1]
+        usubl           v2.8H, v0.8B,  v1.8B                            // d[x + 0..7]  = s1[x + 0..7]  - s2[x + 0..7]
+        usubl2          v3.8H, v0.16B, v1.16B                           // d[x + 8..15] = s1[x + 8..15] - s2[x + 8..15]
+        ld1             {v18.4S,v19.4S}, [x8], #32                      // dst_top[x + 8..15 - 1]
+        smull           v4.4S, v2.4H, v2.4H                             // d[x + 0..3]^2
+        smull2          v5.4S, v2.8H, v2.8H                             // d[x + 4..7]^2
+        ld1             {v20.4S,v21.4S}, [x9], #32                      // dst_top[x + 0..7]
+        smull           v6.4S, v3.4H, v3.4H                             // d[x + 8..11]^2
+        smull2          v7.4S, v3.8H, v3.8H                             // d[x + 12..15]^2
+        ld1             {v22.4S,v23.4S}, [x9], #32                      // dst_top[x + 8..15]
+        sub             v0.4S, v20.4S, v16.4S                           // dst_top[x + 0..3] - dst_top[x + 0..3 - 1]
+        sub             v1.4S, v21.4S, v17.4S                           // dst_top[x + 4..7] - dst_top[x + 4..7 - 1]
+        add             v0.4S, v0.4S, v4.4S                             // + d[x + 0..3]^2
+        add             v1.4S, v1.4S, v5.4S                             // + d[x + 4..7]^2
+        sub             v2.4S, v22.4S, v18.4S                           // dst_top[x +  8..11] - dst_top[x +  8..11 - 1]
+        sub             v3.4S, v23.4S, v19.4S                           // dst_top[x + 12..15] - dst_top[x + 12..15 - 1]
+        add             v2.4S, v2.4S, v6.4S                             // + d[x +  8..11]^2
+        add             v3.4S, v3.4S, v7.4S                             // + d[x + 12..15]^2
+        acc_sum_store   v0.4S, v0.16B                                   // accumulate and store dst[ 0..3]
+        acc_sum_store   v1.4S, v1.16B                                   // accumulate and store dst[ 4..7]
+        acc_sum_store   v2.4S, v2.16B                                   // accumulate and store dst[ 8..11]
+        acc_sum_store   v3.4S, v3.16B                                   // accumulate and store dst[12..15]
+        subs            w10, w10, #16                                   // width dec
+        b.ne            2b                                              // loop til next line
+        add             x2, x2, x3                                      // skip to next line (s1)
+        add             x4, x4, x5                                      // skip to next line (s2)
+        add             x0, x0, x1                                      // skip to next line (dst)
+        add             x9, x9, x1                                      // skip to next line (dst_top)
+        subs            w7, w7, #1                                      // height dec
+        b.ne            1b
+        ret
+endfunc

diff --git a/libavfilter/af_acontrast.c b/libavfilter/af_acontrast.c
new file mode 100644
index 0000000..e080531
--- /dev/null
+++ b/libavfilter/af_acontrast.c

@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2008 Rob Sykes
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/channel_layout.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "formats.h"
+
+typedef struct AudioContrastContext {
+    const AVClass *class;
+    float contrast;
+    void (*filter)(void **dst, const void **src,
+                   int nb_samples, int channels, float contrast);
+} AudioContrastContext;
+
+#define OFFSET(x) offsetof(AudioContrastContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption acontrast_options[] = {
+    { "contrast", "set contrast", OFFSET(contrast), AV_OPT_TYPE_FLOAT, {.dbl=33}, 0, 100, A },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(acontrast);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layouts = NULL;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
+        AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBLP,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static void filter_flt(void **d, const void **s,
+                       int nb_samples, int channels,
+                       float contrast)
+{
+    const float *src = s[0];
+    float *dst = d[0];
+    int n, c;
+
+    for (n = 0; n < nb_samples; n++) {
+        for (c = 0; c < channels; c++) {
+            float d = src[c] * M_PI_2;
+
+            dst[c] = sinf(d + contrast * sinf(d * 4));
+        }
+
+        dst += c;
+        src += c;
+    }
+}
+
+static void filter_dbl(void **d, const void **s,
+                       int nb_samples, int channels,
+                       float contrast)
+{
+    const double *src = s[0];
+    double *dst = d[0];
+    int n, c;
+
+    for (n = 0; n < nb_samples; n++) {
+        for (c = 0; c < channels; c++) {
+            double d = src[c] * M_PI_2;
+
+            dst[c] = sin(d + contrast * sin(d * 4));
+        }
+
+        dst += c;
+        src += c;
+    }
+}
+
+static void filter_fltp(void **d, const void **s,
+                        int nb_samples, int channels,
+                        float contrast)
+{
+    int n, c;
+
+    for (c = 0; c < channels; c++) {
+        const float *src = s[c];
+        float *dst = d[c];
+
+        for (n = 0; n < nb_samples; n++) {
+            float d = src[n] * M_PI_2;
+
+            dst[n] = sinf(d + contrast * sinf(d * 4));
+        }
+    }
+}
+
+static void filter_dblp(void **d, const void **s,
+                        int nb_samples, int channels,
+                        float contrast)
+{
+    int n, c;
+
+    for (c = 0; c < channels; c++) {
+        const double *src = s[c];
+        double *dst = d[c];
+
+        for (n = 0; n < nb_samples; n++) {
+            double d = src[n] * M_PI_2;
+
+            dst[n] = sin(d + contrast * sin(d * 4));
+        }
+    }
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AudioContrastContext *s    = ctx->priv;
+
+    switch (inlink->format) {
+    case AV_SAMPLE_FMT_FLT:  s->filter = filter_flt;  break;
+    case AV_SAMPLE_FMT_DBL:  s->filter = filter_dbl;  break;
+    case AV_SAMPLE_FMT_FLTP: s->filter = filter_fltp; break;
+    case AV_SAMPLE_FMT_DBLP: s->filter = filter_dblp; break;
+    }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AudioContrastContext *s = ctx->priv;
+    AVFrame *out;
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_audio_buffer(outlink, in->nb_samples);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+
+    s->filter((void **)out->extended_data, (const void **)in->extended_data,
+              in->nb_samples, in->channels, s->contrast / 750);
+
+    if (out != in)
+        av_frame_free(&in);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_acontrast = {
+    .name           = "acontrast",
+    .description    = NULL_IF_CONFIG_SMALL("Simple audio dynamic range compression/expansion filter."),
+    .query_formats  = query_formats,
+    .priv_size      = sizeof(AudioContrastContext),
+    .priv_class     = &acontrast_class,
+    .inputs         = inputs,
+    .outputs        = outputs,
+};

diff --git a/libavfilter/af_acrossover.c b/libavfilter/af_acrossover.c
new file mode 100644
index 0000000..9acf3f1
--- /dev/null
+++ b/libavfilter/af_acrossover.c

@@ -0,0 +1,343 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Crossover filter
+ *
+ * Split an audio stream into several bands.
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/avstring.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "audio.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+
+#define MAX_SPLITS 16
+#define MAX_BANDS MAX_SPLITS + 1
+
+typedef struct BiquadContext {
+    double a0, a1, a2;
+    double b1, b2;
+    double i1, i2;
+    double o1, o2;
+} BiquadContext;
+
+typedef struct CrossoverChannel {
+    BiquadContext lp[MAX_BANDS][4];
+    BiquadContext hp[MAX_BANDS][4];
+} CrossoverChannel;
+
+typedef struct AudioCrossoverContext {
+    const AVClass *class;
+
+    char *splits_str;
+    int order;
+
+    int filter_count;
+    int nb_splits;
+    float *splits;
+
+    CrossoverChannel *xover;
+} AudioCrossoverContext;
+
+#define OFFSET(x) offsetof(AudioCrossoverContext, x)
+#define AF AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption acrossover_options[] = {
+    { "split", "set split frequencies", OFFSET(splits_str), AV_OPT_TYPE_STRING, {.str="500"}, 0, 0, AF },
+    { "order", "set order",             OFFSET(order),      AV_OPT_TYPE_INT,    {.i64=1},     0, 2, AF, "m" },
+    { "2nd",   "2nd order",             0,                  AV_OPT_TYPE_CONST,  {.i64=0},     0, 0, AF, "m" },
+    { "4th",   "4th order",             0,                  AV_OPT_TYPE_CONST,  {.i64=1},     0, 0, AF, "m" },
+    { "8th",   "8th order",             0,                  AV_OPT_TYPE_CONST,  {.i64=2},     0, 0, AF, "m" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(acrossover);
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    AudioCrossoverContext *s = ctx->priv;
+    char *p, *arg, *saveptr = NULL;
+    int i, ret = 0;
+
+    s->splits = av_calloc(MAX_SPLITS, sizeof(*s->splits));
+    if (!s->splits)
+        return AVERROR(ENOMEM);
+
+    p = s->splits_str;
+    for (i = 0; i < MAX_SPLITS; i++) {
+        float freq;
+
+        if (!(arg = av_strtok(p, " |", &saveptr)))
+            break;
+
+        p = NULL;
+
+        ret = sscanf(arg, "%f", &freq);
+
+        if (freq <= 0) {
+            av_log(ctx, AV_LOG_ERROR, "Frequency %f must be positive number.\n", freq);
+            return AVERROR(EINVAL);
+        }
+
+        if (i > 0 && freq <= s->splits[i-1]) {
+            av_log(ctx, AV_LOG_ERROR, "Frequency %f must be in increasing order.\n", freq);
+            return AVERROR(EINVAL);
+        }
+
+        s->splits[i] = freq;
+    }
+
+    s->nb_splits = i;
+
+    for (i = 0; i <= s->nb_splits; i++) {
+        AVFilterPad pad  = { 0 };
+        char *name;
+
+        pad.type = AVMEDIA_TYPE_AUDIO;
+        name = av_asprintf("out%d", ctx->nb_outputs);
+        if (!name)
+            return AVERROR(ENOMEM);
+        pad.name = name;
+
+        if ((ret = ff_insert_outpad(ctx, i, &pad)) < 0) {
+            av_freep(&pad.name);
+            return ret;
+        }
+    }
+
+    return ret;
+}
+
+static void set_lp(BiquadContext *b, float fc, float q, float sr)
+{
+    double omega = (2.0 * M_PI * fc / sr);
+    double sn = sin(omega);
+    double cs = cos(omega);
+    double alpha = (sn / (2 * q));
+    double inv = (1.0 / (1.0 + alpha));
+
+    b->a2 = b->a0 = (inv * (1.0 - cs) * 0.5);
+    b->a1 = b->a0 + b->a0;
+    b->b1 = -2. * cs * inv;
+    b->b2 = (1. - alpha) * inv;
+}
+
+static void set_hp(BiquadContext *b, float fc, float q, float sr)
+{
+    double omega = 2 * M_PI * fc / sr;
+    double sn = sin(omega);
+    double cs = cos(omega);
+    double alpha = sn / (2 * q);
+    double inv = 1.0 / (1.0 + alpha);
+
+    b->a0 = inv * (1. + cs) / 2.;
+    b->a1 = -2. * b->a0;
+    b->a2 = b->a0;
+    b->b1 = -2. * cs * inv;
+    b->b2 = (1. - alpha) * inv;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AudioCrossoverContext *s = ctx->priv;
+    int ch, band, sample_rate = inlink->sample_rate;
+    double q;
+
+    s->xover = av_calloc(inlink->channels, sizeof(*s->xover));
+    if (!s->xover)
+        return AVERROR(ENOMEM);
+
+    switch (s->order) {
+    case 0:
+        q = 0.5;
+        s->filter_count = 1;
+        break;
+    case 1:
+        q = M_SQRT1_2;
+        s->filter_count = 2;
+        break;
+    case 2:
+        q = 0.54;
+        s->filter_count = 4;
+        break;
+    }
+
+    for (ch = 0; ch < inlink->channels; ch++) {
+        for (band = 0; band <= s->nb_splits; band++) {
+            set_lp(&s->xover[ch].lp[band][0], s->splits[band], q, sample_rate);
+            set_hp(&s->xover[ch].hp[band][0], s->splits[band], q, sample_rate);
+
+            if (s->order > 1) {
+                set_lp(&s->xover[ch].lp[band][1], s->splits[band], 1.34, sample_rate);
+                set_hp(&s->xover[ch].hp[band][1], s->splits[band], 1.34, sample_rate);
+                set_lp(&s->xover[ch].lp[band][2], s->splits[band],    q, sample_rate);
+                set_hp(&s->xover[ch].hp[band][2], s->splits[band],    q, sample_rate);
+                set_lp(&s->xover[ch].lp[band][3], s->splits[band], 1.34, sample_rate);
+                set_hp(&s->xover[ch].hp[band][3], s->splits[band], 1.34, sample_rate);
+            } else {
+                set_lp(&s->xover[ch].lp[band][1], s->splits[band], q, sample_rate);
+                set_hp(&s->xover[ch].hp[band][1], s->splits[band], q, sample_rate);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBLP,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static double biquad_process(BiquadContext *b, double in)
+{
+    double out = in * b->a0 + b->i1 * b->a1 + b->i2 * b->a2 - b->o1 * b->b1 - b->o2 * b->b2;
+
+    b->i2 = b->i1;
+    b->o2 = b->o1;
+    b->i1 = in;
+    b->o1 = out;
+
+    return out;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AudioCrossoverContext *s = ctx->priv;
+    AVFrame *frames[MAX_BANDS] = { NULL };
+    int i, f, ch, band, ret = 0;
+
+    for (i = 0; i < ctx->nb_outputs; i++) {
+        frames[i] = ff_get_audio_buffer(ctx->outputs[i], in->nb_samples);
+
+        if (!frames[i]) {
+            ret = AVERROR(ENOMEM);
+            break;
+        }
+
+        frames[i]->pts = in->pts;
+    }
+
+    if (ret < 0)
+        goto fail;
+
+    for (ch = 0; ch < inlink->channels; ch++) {
+        const double *src = (const double *)in->extended_data[ch];
+        CrossoverChannel *xover = &s->xover[ch];
+
+        for (band = 0; band < ctx->nb_outputs; band++) {
+            double *dst = (double *)frames[band]->extended_data[ch];
+
+            for (i = 0; i < in->nb_samples; i++) {
+                dst[i] = src[i];
+
+                for (f = 0; f < s->filter_count; f++) {
+                    if (band + 1 < ctx->nb_outputs) {
+                        BiquadContext *lp = &xover->lp[band][f];
+                        dst[i] = biquad_process(lp, dst[i]);
+                    }
+
+                    if (band - 1 >= 0) {
+                        BiquadContext *hp = &xover->hp[band - 1][f];
+                        dst[i] = biquad_process(hp, dst[i]);
+                    }
+                }
+            }
+        }
+    }
+
+    for (i = 0; i < ctx->nb_outputs; i++) {
+        ret = ff_filter_frame(ctx->outputs[i], frames[i]);
+        if (ret < 0)
+            break;
+    }
+
+fail:
+    av_frame_free(&in);
+
+    return ret;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AudioCrossoverContext *s = ctx->priv;
+    int i;
+
+    av_freep(&s->splits);
+
+    for (i = 0; i < ctx->nb_outputs; i++)
+        av_freep(&ctx->output_pads[i].name);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_acrossover = {
+    .name           = "acrossover",
+    .description    = NULL_IF_CONFIG_SMALL("Split audio into per-bands streams."),
+    .priv_size      = sizeof(AudioCrossoverContext),
+    .priv_class     = &acrossover_class,
+    .init           = init,
+    .uninit         = uninit,
+    .query_formats  = query_formats,
+    .inputs         = inputs,
+    .outputs        = NULL,
+    .flags          = AVFILTER_FLAG_DYNAMIC_OUTPUTS,
+};

diff --git a/libavfilter/af_adeclick.c b/libavfilter/af_adeclick.c
new file mode 100644
index 0000000..bf0b7cb
--- /dev/null
+++ b/libavfilter/af_adeclick.c

@@ -0,0 +1,753 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/audio_fifo.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "formats.h"
+
+typedef struct DeclickChannel {
+    double *auxiliary;
+    double *detection;
+    double *acoefficients;
+    double *acorrelation;
+    double *tmp;
+    double *interpolated;
+    double *matrix;
+    int matrix_size;
+    double *vector;
+    int vector_size;
+    double *y;
+    int y_size;
+    uint8_t *click;
+    int *index;
+    unsigned *histogram;
+    int histogram_size;
+} DeclickChannel;
+
+typedef struct AudioDeclickContext {
+    const AVClass *class;
+
+    double w;
+    double overlap;
+    double threshold;
+    double ar;
+    double burst;
+    int method;
+    int nb_hbins;
+
+    int is_declip;
+    int ar_order;
+    int nb_burst_samples;
+    int window_size;
+    int hop_size;
+    int overlap_skip;
+
+    AVFrame *in;
+    AVFrame *out;
+    AVFrame *buffer;
+    AVFrame *is;
+
+    DeclickChannel *chan;
+
+    int64_t pts;
+    int nb_channels;
+    uint64_t nb_samples;
+    uint64_t detected_errors;
+    int samples_left;
+
+    AVAudioFifo *fifo;
+    double *window_func_lut;
+
+    int (*detector)(struct AudioDeclickContext *s, DeclickChannel *c,
+                    double sigmae, double *detection,
+                    double *acoefficients, uint8_t *click, int *index,
+                    const double *src, double *dst);
+} AudioDeclickContext;
+
+#define OFFSET(x) offsetof(AudioDeclickContext, x)
+#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption adeclick_options[] = {
+    { "w", "set window size",          OFFSET(w),         AV_OPT_TYPE_DOUBLE, {.dbl=55}, 10,  100, AF },
+    { "o", "set window overlap",       OFFSET(overlap),   AV_OPT_TYPE_DOUBLE, {.dbl=75}, 50,   95, AF },
+    { "a", "set autoregression order", OFFSET(ar),        AV_OPT_TYPE_DOUBLE, {.dbl=2},   0,   25, AF },
+    { "t", "set threshold",            OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl=2},   1,  100, AF },
+    { "b", "set burst fusion",         OFFSET(burst),     AV_OPT_TYPE_DOUBLE, {.dbl=2},   0,   10, AF },
+    { "m", "set overlap method",       OFFSET(method),    AV_OPT_TYPE_INT,    {.i64=0},   0,    1, AF, "m" },
+    { "a", "overlap-add",              0,                 AV_OPT_TYPE_CONST,  {.i64=0},   0,    0, AF, "m" },
+    { "s", "overlap-save",             0,                 AV_OPT_TYPE_CONST,  {.i64=1},   0,    0, AF, "m" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(adeclick);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layouts = NULL;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBLP,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AudioDeclickContext *s = ctx->priv;
+    int i;
+
+    s->pts = AV_NOPTS_VALUE;
+    s->window_size = inlink->sample_rate * s->w / 1000.;
+    if (s->window_size < 100)
+        return AVERROR(EINVAL);
+    s->ar_order = FFMAX(s->window_size * s->ar / 100., 1);
+    s->nb_burst_samples = s->window_size * s->burst / 1000.;
+    s->hop_size = s->window_size * (1. - (s->overlap / 100.));
+    if (s->hop_size < 1)
+        return AVERROR(EINVAL);
+
+    s->window_func_lut = av_calloc(s->window_size, sizeof(*s->window_func_lut));
+    if (!s->window_func_lut)
+        return AVERROR(ENOMEM);
+    for (i = 0; i < s->window_size; i++)
+        s->window_func_lut[i] = sin(M_PI * i / s->window_size) *
+                                (1. - (s->overlap / 100.)) * M_PI_2;
+
+    av_frame_free(&s->in);
+    av_frame_free(&s->out);
+    av_frame_free(&s->buffer);
+    av_frame_free(&s->is);
+    s->in = ff_get_audio_buffer(inlink, s->window_size);
+    s->out = ff_get_audio_buffer(inlink, s->window_size);
+    s->buffer = ff_get_audio_buffer(inlink, s->window_size * 2);
+    s->is = ff_get_audio_buffer(inlink, s->window_size);
+    if (!s->in || !s->out || !s->buffer || !s->is)
+        return AVERROR(ENOMEM);
+
+    s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->window_size);
+    if (!s->fifo)
+        return AVERROR(ENOMEM);
+    s->overlap_skip = s->method ? (s->window_size - s->hop_size) / 2 : 0;
+    if (s->overlap_skip > 0) {
+        av_audio_fifo_write(s->fifo, (void **)s->in->extended_data,
+                            s->overlap_skip);
+    }
+
+    s->nb_channels = inlink->channels;
+    s->chan = av_calloc(inlink->channels, sizeof(*s->chan));
+    if (!s->chan)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < inlink->channels; i++) {
+        DeclickChannel *c = &s->chan[i];
+
+        c->detection = av_calloc(s->window_size, sizeof(*c->detection));
+        c->auxiliary = av_calloc(s->ar_order + 1, sizeof(*c->auxiliary));
+        c->acoefficients = av_calloc(s->ar_order + 1, sizeof(*c->acoefficients));
+        c->acorrelation = av_calloc(s->ar_order + 1, sizeof(*c->acorrelation));
+        c->tmp = av_calloc(s->ar_order, sizeof(*c->tmp));
+        c->click = av_calloc(s->window_size, sizeof(*c->click));
+        c->index = av_calloc(s->window_size, sizeof(*c->index));
+        c->interpolated = av_calloc(s->window_size, sizeof(*c->interpolated));
+        if (!c->auxiliary || !c->acoefficients || !c->detection || !c->click ||
+            !c->index || !c->interpolated || !c->acorrelation || !c->tmp)
+            return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static void autocorrelation(const double *input, int order, int size,
+                            double *output, double scale)
+{
+    int i, j;
+
+    for (i = 0; i <= order; i++) {
+        double value = 0.;
+
+        for (j = i; j < size; j++)
+            value += input[j] * input[j - i];
+
+        output[i] = value * scale;
+    }
+}
+
+static double autoregression(const double *samples, int ar_order,
+                             int nb_samples, double *k, double *r, double *a)
+{
+    double alpha;
+    int i, j;
+
+    memset(a, 0, ar_order * sizeof(*a));
+
+    autocorrelation(samples, ar_order, nb_samples, r, 1. / nb_samples);
+
+    /* Levinson-Durbin algorithm */
+    k[0] = a[0] = -r[1] / r[0];
+    alpha = r[0] * (1. - k[0] * k[0]);
+    for (i = 1; i < ar_order; i++) {
+        double epsilon = 0.;
+
+        for (j = 0; j < i; j++)
+            epsilon += a[j] * r[i - j];
+        epsilon += r[i + 1];
+
+        k[i] = -epsilon / alpha;
+        alpha *= (1. - k[i] * k[i]);
+        for (j = i - 1; j >= 0; j--)
+            k[j] = a[j] + k[i] * a[i - j - 1];
+        for (j = 0; j <= i; j++)
+            a[j] = k[j];
+    }
+
+    k[0] = 1.;
+    for (i = 1; i <= ar_order; i++)
+        k[i] = a[i - 1];
+
+    return sqrt(alpha);
+}
+
+static int isfinite_array(double *samples, int nb_samples)
+{
+    int i;
+
+    for (i = 0; i < nb_samples; i++)
+        if (!isfinite(samples[i]))
+            return 0;
+
+    return 1;
+}
+
+static int find_index(int *index, int value, int size)
+{
+    int i, start, end;
+
+    if ((value < index[0]) || (value > index[size - 1]))
+        return 1;
+
+    i = start = 0;
+    end = size - 1;
+
+    while (start <= end) {
+        i = (end + start) / 2;
+        if (index[i] == value)
+            return 0;
+        if (value < index[i])
+            end = i - 1;
+        if (value > index[i])
+            start = i + 1;
+    }
+
+    return 1;
+}
+
+static int factorization(double *matrix, int n)
+{
+    int i, j, k;
+
+    for (i = 0; i < n; i++) {
+        const int in = i * n;
+        double value;
+
+        value = matrix[in + i];
+        for (j = 0; j < i; j++)
+            value -= matrix[j * n + j] * matrix[in + j] * matrix[in + j];
+
+        if (value == 0.) {
+            return -1;
+        }
+
+        matrix[in + i] = value;
+        for (j = i + 1; j < n; j++) {
+            const int jn = j * n;
+            double x;
+
+            x = matrix[jn + i];
+            for (k = 0; k < i; k++)
+                x -= matrix[k * n + k] * matrix[in + k] * matrix[jn + k];
+            matrix[jn + i] = x / matrix[in + i];
+        }
+    }
+
+    return 0;
+}
+
+static int do_interpolation(DeclickChannel *c, double *matrix,
+                            double *vector, int n, double *out)
+{
+    int i, j, ret;
+    double *y;
+
+    ret = factorization(matrix, n);
+    if (ret < 0)
+        return ret;
+
+    av_fast_malloc(&c->y, &c->y_size, n * sizeof(*c->y));
+    y = c->y;
+    if (!y)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < n; i++) {
+        const int in = i * n;
+        double value;
+
+        value = vector[i];
+        for (j = 0; j < i; j++)
+            value -= matrix[in + j] * y[j];
+        y[i] = value;
+    }
+
+    for (i = n - 1; i >= 0; i--) {
+        out[i] = y[i] / matrix[i * n + i];
+        for (j = i + 1; j < n; j++)
+            out[i] -= matrix[j * n + i] * out[j];
+    }
+
+    return 0;
+}
+
+static int interpolation(DeclickChannel *c, const double *src, int ar_order,
+                         double *acoefficients, int *index, int nb_errors,
+                         double *auxiliary, double *interpolated)
+{
+    double *vector, *matrix;
+    int i, j;
+
+    av_fast_malloc(&c->matrix, &c->matrix_size, nb_errors * nb_errors * sizeof(*c->matrix));
+    matrix = c->matrix;
+    if (!matrix)
+        return AVERROR(ENOMEM);
+
+    av_fast_malloc(&c->vector, &c->vector_size, nb_errors * sizeof(*c->vector));
+    vector = c->vector;
+    if (!vector)
+        return AVERROR(ENOMEM);
+
+    autocorrelation(acoefficients, ar_order, ar_order + 1, auxiliary, 1.);
+
+    for (i = 0; i < nb_errors; i++) {
+        const int im = i * nb_errors;
+
+        for (j = i; j < nb_errors; j++) {
+            if (abs(index[j] - index[i]) <= ar_order) {
+                matrix[j * nb_errors + i] = matrix[im + j] = auxiliary[abs(index[j] - index[i])];
+            } else {
+                matrix[j * nb_errors + i] = matrix[im + j] = 0;
+            }
+        }
+    }
+
+    for (i = 0; i < nb_errors; i++) {
+        double value = 0.;
+
+        for (j = -ar_order; j <= ar_order; j++)
+            if (find_index(index, index[i] - j, nb_errors))
+                value -= src[index[i] - j] * auxiliary[abs(j)];
+
+        vector[i] = value;
+    }
+
+    return do_interpolation(c, matrix, vector, nb_errors, interpolated);
+}
+
+static int detect_clips(AudioDeclickContext *s, DeclickChannel *c,
+                        double unused0,
+                        double *unused1, double *unused2,
+                        uint8_t *clip, int *index,
+                        const double *src, double *dst)
+{
+    const double threshold = s->threshold;
+    double max_amplitude = 0;
+    unsigned *histogram;
+    int i, nb_clips = 0;
+
+    av_fast_malloc(&c->histogram, &c->histogram_size, s->nb_hbins * sizeof(*c->histogram));
+    if (!c->histogram)
+        return AVERROR(ENOMEM);
+    histogram = c->histogram;
+    memset(histogram, 0, sizeof(*histogram) * s->nb_hbins);
+
+    for (i = 0; i < s->window_size; i++) {
+        const unsigned index = fmin(fabs(src[i]), 1) * (s->nb_hbins - 1);
+
+        histogram[index]++;
+        dst[i] = src[i];
+        clip[i] = 0;
+    }
+
+    for (i = s->nb_hbins - 1; i > 1; i--) {
+        if (histogram[i]) {
+            if (histogram[i] / (double)FFMAX(histogram[i - 1], 1) > threshold) {
+                max_amplitude = i / (double)s->nb_hbins;
+            }
+            break;
+        }
+    }
+
+    if (max_amplitude > 0.) {
+        for (i = 0; i < s->window_size; i++) {
+            clip[i] = fabs(src[i]) >= max_amplitude;
+        }
+    }
+
+    memset(clip, 0, s->ar_order * sizeof(*clip));
+    memset(clip + (s->window_size - s->ar_order), 0, s->ar_order * sizeof(*clip));
+
+    for (i = s->ar_order; i < s->window_size - s->ar_order; i++)
+        if (clip[i])
+            index[nb_clips++] = i;
+
+    return nb_clips;
+}
+
+static int detect_clicks(AudioDeclickContext *s, DeclickChannel *c,
+                         double sigmae,
+                         double *detection, double *acoefficients,
+                         uint8_t *click, int *index,
+                         const double *src, double *dst)
+{
+    const double threshold = s->threshold;
+    int i, j, nb_clicks = 0, prev = -1;
+
+    memset(detection, 0, s->window_size * sizeof(*detection));
+
+    for (i = s->ar_order; i < s->window_size; i++) {
+        for (j = 0; j <= s->ar_order; j++) {
+            detection[i] += acoefficients[j] * src[i - j];
+        }
+    }
+
+    for (i = 0; i < s->window_size; i++) {
+        click[i] = fabs(detection[i]) > sigmae * threshold;
+        dst[i] = src[i];
+    }
+
+    for (i = 0; i < s->window_size; i++) {
+        if (!click[i])
+            continue;
+
+        if (prev >= 0 && (i > prev + 1) && (i <= s->nb_burst_samples + prev))
+            for (j = prev + 1; j < i; j++)
+                click[j] = 1;
+        prev = i;
+    }
+
+    memset(click, 0, s->ar_order * sizeof(*click));
+    memset(click + (s->window_size - s->ar_order), 0, s->ar_order * sizeof(*click));
+
+    for (i = s->ar_order; i < s->window_size - s->ar_order; i++)
+        if (click[i])
+            index[nb_clicks++] = i;
+
+    return nb_clicks;
+}
+
+typedef struct ThreadData {
+    AVFrame *out;
+} ThreadData;
+
+static int filter_channel(AVFilterContext *ctx, void *arg, int ch, int nb_jobs)
+{
+    AudioDeclickContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *out = td->out;
+    const double *src = (const double *)s->in->extended_data[ch];
+    double *is = (double *)s->is->extended_data[ch];
+    double *dst = (double *)s->out->extended_data[ch];
+    double *ptr = (double *)out->extended_data[ch];
+    double *buf = (double *)s->buffer->extended_data[ch];
+    const double *w = s->window_func_lut;
+    DeclickChannel *c = &s->chan[ch];
+    double sigmae;
+    int j, ret;
+
+    sigmae = autoregression(src, s->ar_order, s->window_size, c->acoefficients, c->acorrelation, c->tmp);
+
+    if (isfinite_array(c->acoefficients, s->ar_order + 1)) {
+        double *interpolated = c->interpolated;
+        int *index = c->index;
+        int nb_errors;
+
+        nb_errors = s->detector(s, c, sigmae, c->detection, c->acoefficients,
+                                c->click, index, src, dst);
+        if (nb_errors > 0) {
+            ret = interpolation(c, src, s->ar_order, c->acoefficients, index,
+                                nb_errors, c->auxiliary, interpolated);
+            if (ret < 0)
+                return ret;
+
+            for (j = 0; j < nb_errors; j++) {
+                dst[index[j]] = interpolated[j];
+                is[index[j]] = 1;
+            }
+        }
+    } else {
+        memcpy(dst, src, s->window_size * sizeof(*dst));
+    }
+
+    if (s->method == 0) {
+        for (j = 0; j < s->window_size; j++)
+            buf[j] += dst[j] * w[j];
+    } else {
+        const int skip = s->overlap_skip;
+
+        for (j = 0; j < s->hop_size; j++)
+            buf[j] = dst[skip + j];
+    }
+    for (j = 0; j < s->hop_size; j++)
+        ptr[j] = buf[j];
+
+    memmove(buf, buf + s->hop_size, (s->window_size * 2 - s->hop_size) * sizeof(*buf));
+    memmove(is, is + s->hop_size, (s->window_size - s->hop_size) * sizeof(*is));
+    memset(buf + s->window_size * 2 - s->hop_size, 0, s->hop_size * sizeof(*buf));
+    memset(is + s->window_size - s->hop_size, 0, s->hop_size * sizeof(*is));
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AudioDeclickContext *s = ctx->priv;
+    AVFrame *out = NULL;
+    int ret = 0;
+
+    if (s->pts == AV_NOPTS_VALUE)
+        s->pts = in->pts;
+
+    ret = av_audio_fifo_write(s->fifo, (void **)in->extended_data,
+                              in->nb_samples);
+    av_frame_free(&in);
+
+    while (av_audio_fifo_size(s->fifo) >= s->window_size) {
+        int j, ch, detected_errors = 0;
+        ThreadData td;
+
+        out = ff_get_audio_buffer(outlink, s->hop_size);
+        if (!out)
+            return AVERROR(ENOMEM);
+
+        ret = av_audio_fifo_peek(s->fifo, (void **)s->in->extended_data,
+                                 s->window_size);
+        if (ret < 0)
+            break;
+
+        td.out = out;
+        ret = ctx->internal->execute(ctx, filter_channel, &td, NULL, inlink->channels);
+        if (ret < 0)
+            goto fail;
+
+        for (ch = 0; ch < s->in->channels; ch++) {
+            double *is = (double *)s->is->extended_data[ch];
+
+            for (j = 0; j < s->hop_size; j++) {
+                if (is[j])
+                    detected_errors++;
+            }
+        }
+
+        av_audio_fifo_drain(s->fifo, s->hop_size);
+
+        if (s->samples_left > 0)
+            out->nb_samples = FFMIN(s->hop_size, s->samples_left);
+
+        out->pts = s->pts;
+        s->pts += s->hop_size;
+
+        s->detected_errors += detected_errors;
+        s->nb_samples += out->nb_samples * inlink->channels;
+
+        ret = ff_filter_frame(outlink, out);
+        if (ret < 0)
+            break;
+
+        if (s->samples_left > 0) {
+            s->samples_left -= s->hop_size;
+            if (s->samples_left <= 0)
+                av_audio_fifo_drain(s->fifo, av_audio_fifo_size(s->fifo));
+        }
+    }
+
+fail:
+    if (ret < 0)
+        av_frame_free(&out);
+    return ret;
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AudioDeclickContext *s = ctx->priv;
+    int ret = 0;
+
+    ret = ff_request_frame(ctx->inputs[0]);
+
+    if (ret == AVERROR_EOF && av_audio_fifo_size(s->fifo) > 0) {
+        if (!s->samples_left)
+            s->samples_left = av_audio_fifo_size(s->fifo) - s->overlap_skip;
+
+        if (s->samples_left > 0) {
+            AVFrame *in = ff_get_audio_buffer(outlink, s->window_size - s->samples_left);
+            if (!in)
+                return AVERROR(ENOMEM);
+            ret = filter_frame(ctx->inputs[0], in);
+        }
+    }
+
+    return ret;
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    AudioDeclickContext *s = ctx->priv;
+
+    s->is_declip = !strcmp(ctx->filter->name, "adeclip");
+    if (s->is_declip) {
+        s->detector = detect_clips;
+    } else {
+        s->detector = detect_clicks;
+    }
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AudioDeclickContext *s = ctx->priv;
+    int i;
+
+    av_log(ctx, AV_LOG_INFO, "Detected %s in %"PRId64" of %"PRId64" samples (%g%%).\n",
+           s->is_declip ? "clips" : "clicks", s->detected_errors,
+           s->nb_samples, 100. * s->detected_errors / s->nb_samples);
+
+    av_audio_fifo_free(s->fifo);
+    av_freep(&s->window_func_lut);
+    av_frame_free(&s->in);
+    av_frame_free(&s->out);
+    av_frame_free(&s->buffer);
+    av_frame_free(&s->is);
+
+    if (s->chan) {
+        for (i = 0; i < s->nb_channels; i++) {
+            DeclickChannel *c = &s->chan[i];
+
+            av_freep(&c->detection);
+            av_freep(&c->auxiliary);
+            av_freep(&c->acoefficients);
+            av_freep(&c->acorrelation);
+            av_freep(&c->tmp);
+            av_freep(&c->click);
+            av_freep(&c->index);
+            av_freep(&c->interpolated);
+            av_freep(&c->matrix);
+            c->matrix_size = 0;
+            av_freep(&c->histogram);
+            c->histogram_size = 0;
+            av_freep(&c->vector);
+            c->vector_size = 0;
+            av_freep(&c->y);
+            c->y_size = 0;
+        }
+    }
+    av_freep(&s->chan);
+    s->nb_channels = 0;
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_adeclick = {
+    .name          = "adeclick",
+    .description   = NULL_IF_CONFIG_SMALL("Remove impulsive noise from input audio."),
+    .query_formats = query_formats,
+    .priv_size     = sizeof(AudioDeclickContext),
+    .priv_class    = &adeclick_class,
+    .init          = init,
+    .uninit        = uninit,
+    .inputs        = inputs,
+    .outputs       = outputs,
+    .flags         = AVFILTER_FLAG_SLICE_THREADS,
+};
+
+static const AVOption adeclip_options[] = {
+    { "w", "set window size",          OFFSET(w),              AV_OPT_TYPE_DOUBLE, {.dbl=55},     10,  100, AF },
+    { "o", "set window overlap",       OFFSET(overlap),        AV_OPT_TYPE_DOUBLE, {.dbl=75},     50,   95, AF },
+    { "a", "set autoregression order", OFFSET(ar),             AV_OPT_TYPE_DOUBLE, {.dbl=8},       0,   25, AF },
+    { "t", "set threshold",            OFFSET(threshold),      AV_OPT_TYPE_DOUBLE, {.dbl=10},      1,  100, AF },
+    { "n", "set histogram size",       OFFSET(nb_hbins),       AV_OPT_TYPE_INT,    {.i64=1000},  100, 9999, AF },
+    { "m", "set overlap method",       OFFSET(method),         AV_OPT_TYPE_INT,    {.i64=0},       0,    1, AF, "m" },
+    { "a", "overlap-add",              0,                      AV_OPT_TYPE_CONST,  {.i64=0},       0,    0, AF, "m" },
+    { "s", "overlap-save",             0,                      AV_OPT_TYPE_CONST,  {.i64=1},       0,    0, AF, "m" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(adeclip);
+
+AVFilter ff_af_adeclip = {
+    .name          = "adeclip",
+    .description   = NULL_IF_CONFIG_SMALL("Remove clipping from input audio."),
+    .query_formats = query_formats,
+    .priv_size     = sizeof(AudioDeclickContext),
+    .priv_class    = &adeclip_class,
+    .init          = init,
+    .uninit        = uninit,
+    .inputs        = inputs,
+    .outputs       = outputs,
+    .flags         = AVFILTER_FLAG_SLICE_THREADS,
+};

diff --git a/libavfilter/af_adelay.c b/libavfilter/af_adelay.c
index 983f089..d6d81ba 100644
--- a/libavfilter/af_adelay.c
+++ b/libavfilter/af_adelay.c

@@ -192,7 +192,7 @@
     if (ctx->is_disabled || !s->delays)
         return ff_filter_frame(ctx->outputs[0], frame);
 
-    out_frame = ff_get_audio_buffer(inlink, frame->nb_samples);
+    out_frame = ff_get_audio_buffer(ctx->outputs[0], frame->nb_samples);
     if (!out_frame) {
         av_frame_free(&frame);
         return AVERROR(ENOMEM);

diff --git a/libavfilter/af_aderivative.c b/libavfilter/af_aderivative.c
new file mode 100644
index 0000000..a591515
--- /dev/null
+++ b/libavfilter/af_aderivative.c

@@ -0,0 +1,207 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "audio.h"
+#include "avfilter.h"
+#include "internal.h"
+
+typedef struct ADerivativeContext {
+    const AVClass *class;
+    AVFrame *prev;
+    void (*filter)(void **dst, void **prv, const void **src,
+                   int nb_samples, int channels);
+} ADerivativeContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layouts = NULL;
+    static const enum AVSampleFormat derivative_sample_fmts[] = {
+        AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLTP,
+        AV_SAMPLE_FMT_S32P, AV_SAMPLE_FMT_DBLP,
+        AV_SAMPLE_FMT_NONE
+    };
+    static const enum AVSampleFormat integral_sample_fmts[] = {
+        AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    formats = ff_make_format_list(strcmp(ctx->filter->name, "aintegral") ?
+                                  derivative_sample_fmts : integral_sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+#define DERIVATIVE(name, type)                                          \
+static void aderivative_## name ##p(void **d, void **p, const void **s, \
+                                    int nb_samples, int channels)       \
+{                                                                       \
+    int n, c;                                                           \
+                                                                        \
+    for (c = 0; c < channels; c++) {                                    \
+        const type *src = s[c];                                         \
+        type *dst = d[c];                                               \
+        type *prv = p[c];                                               \
+                                                                        \
+        for (n = 0; n < nb_samples; n++) {                              \
+            const type current = src[n];                                \
+                                                                        \
+            dst[n] = current - prv[0];                                  \
+            prv[0] = current;                                           \
+        }                                                               \
+    }                                                                   \
+}
+
+DERIVATIVE(flt, float)
+DERIVATIVE(dbl, double)
+DERIVATIVE(s16, int16_t)
+DERIVATIVE(s32, int32_t)
+
+#define INTEGRAL(name, type)                                          \
+static void aintegral_## name ##p(void **d, void **p, const void **s, \
+                                  int nb_samples, int channels)       \
+{                                                                     \
+    int n, c;                                                         \
+                                                                      \
+    for (c = 0; c < channels; c++) {                                  \
+        const type *src = s[c];                                       \
+        type *dst = d[c];                                             \
+        type *prv = p[c];                                             \
+                                                                      \
+        for (n = 0; n < nb_samples; n++) {                            \
+            const type current = src[n];                              \
+                                                                      \
+            dst[n] = current + prv[0];                                \
+            prv[0] = dst[n];                                          \
+        }                                                             \
+    }                                                                 \
+}
+
+INTEGRAL(flt, float)
+INTEGRAL(dbl, double)
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    ADerivativeContext *s = ctx->priv;
+
+    switch (inlink->format) {
+    case AV_SAMPLE_FMT_FLTP: s->filter = aderivative_fltp; break;
+    case AV_SAMPLE_FMT_DBLP: s->filter = aderivative_dblp; break;
+    case AV_SAMPLE_FMT_S32P: s->filter = aderivative_s32p; break;
+    case AV_SAMPLE_FMT_S16P: s->filter = aderivative_s16p; break;
+    }
+
+    if (strcmp(ctx->filter->name, "aintegral"))
+        return 0;
+
+    switch (inlink->format) {
+    case AV_SAMPLE_FMT_FLTP: s->filter = aintegral_fltp; break;
+    case AV_SAMPLE_FMT_DBLP: s->filter = aintegral_dblp; break;
+    }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    ADerivativeContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out = ff_get_audio_buffer(outlink, in->nb_samples);
+
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    av_frame_copy_props(out, in);
+
+    if (!s->prev) {
+        s->prev = ff_get_audio_buffer(inlink, 1);
+        if (!s->prev) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    s->filter((void **)out->extended_data, (void **)s->prev->extended_data, (const void **)in->extended_data,
+              in->nb_samples, in->channels);
+
+    av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    ADerivativeContext *s = ctx->priv;
+
+    av_frame_free(&s->prev);
+}
+
+static const AVFilterPad aderivative_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad aderivative_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_aderivative = {
+    .name          = "aderivative",
+    .description   = NULL_IF_CONFIG_SMALL("Compute derivative of input audio."),
+    .query_formats = query_formats,
+    .priv_size     = sizeof(ADerivativeContext),
+    .uninit        = uninit,
+    .inputs        = aderivative_inputs,
+    .outputs       = aderivative_outputs,
+};
+
+AVFilter ff_af_aintegral = {
+    .name          = "aintegral",
+    .description   = NULL_IF_CONFIG_SMALL("Compute integral of input audio."),
+    .query_formats = query_formats,
+    .priv_size     = sizeof(ADerivativeContext),
+    .uninit        = uninit,
+    .inputs        = aderivative_inputs,
+    .outputs       = aderivative_outputs,
+};

diff --git a/libavfilter/af_aecho.c b/libavfilter/af_aecho.c
index cfaea3d..b9ac18d 100644
--- a/libavfilter/af_aecho.c
+++ b/libavfilter/af_aecho.c

@@ -279,7 +279,7 @@
     if (av_frame_is_writable(frame)) {
         out_frame = frame;
     } else {
-        out_frame = ff_get_audio_buffer(inlink, frame->nb_samples);
+        out_frame = ff_get_audio_buffer(ctx->outputs[0], frame->nb_samples);
         if (!out_frame) {
             av_frame_free(&frame);
             return AVERROR(ENOMEM);

diff --git a/libavfilter/af_aemphasis.c b/libavfilter/af_aemphasis.c
index a5b8e30..e1fa93a 100644
--- a/libavfilter/af_aemphasis.c
+++ b/libavfilter/af_aemphasis.c

@@ -96,7 +96,7 @@
     if (av_frame_is_writable(in)) {
         out = in;
     } else {
-        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        out = ff_get_audio_buffer(outlink, in->nb_samples);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);

diff --git a/libavfilter/af_afade.c b/libavfilter/af_afade.c
index 7ad124e..9aab644 100644
--- a/libavfilter/af_afade.c
+++ b/libavfilter/af_afade.c

@@ -23,10 +23,14 @@
  * fade audio filter
  */
 
+#define FF_INTERNAL_FIELDS 1
+#include "framequeue.h"
+
 #include "libavutil/audio_fifo.h"
 #include "libavutil/opt.h"
 #include "audio.h"
 #include "avfilter.h"
+#include "filters.h"
 #include "internal.h"
 
 typedef struct AudioFadeContext {
@@ -39,6 +43,7 @@
     int64_t start_time;
     int overlap;
     int cf0_eof;
+    int prev_size;
     int crossfade_is_over;
     AVAudioFifo *fifo[2];
     int64_t pts;
@@ -52,7 +57,7 @@
                               int curve0, int curve1);
 } AudioFadeContext;
 
-enum CurveType { TRI, QSIN, ESIN, HSIN, LOG, IPAR, QUA, CUB, SQU, CBR, PAR, EXP, IQSIN, IHSIN, DESE, DESI, NB_CURVES };
+enum CurveType { TRI, QSIN, ESIN, HSIN, LOG, IPAR, QUA, CUB, SQU, CBR, PAR, EXP, IQSIN, IHSIN, DESE, DESI, LOSI, NB_CURVES };
 
 #define OFFSET(x) offsetof(AudioFadeContext, x)
 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
@@ -146,6 +151,14 @@
     case DESI:
         gain = gain <= 0.5 ? CUBE(2 * gain) / 2: 1 - CUBE(2 * (1 - gain)) / 2;
         break;
+    case LOSI: {
+                   const double a = 1. / (1. - 0.787) - 1;
+                   double A = 1. / (1.0 + exp(0 -((gain-0.5) * a * 2.0)));
+                   double B = 1. / (1.0 + exp(a));
+                   double C = 1. / (1.0 + exp(0-a));
+                   gain = (A - B) / (C - B);
+               }
+        break;
     }
 
     return gain;
@@ -252,6 +265,7 @@
     { "ihsin",        "inverted half of sine wave",                  0,                    AV_OPT_TYPE_CONST,  {.i64 = IHSIN}, 0, 0, FLAGS, "curve" },
     { "dese",         "double-exponential seat",                     0,                    AV_OPT_TYPE_CONST,  {.i64 = DESE }, 0, 0, FLAGS, "curve" },
     { "desi",         "double-exponential sigmoid",                  0,                    AV_OPT_TYPE_CONST,  {.i64 = DESI }, 0, 0, FLAGS, "curve" },
+    { "losi",         "logistic sigmoid",                            0,                    AV_OPT_TYPE_CONST,  {.i64 = LOSI }, 0, 0, FLAGS, "curve" },
     { NULL }
 };
 
@@ -282,7 +296,7 @@
     if (av_frame_is_writable(buf)) {
         out_buf = buf;
     } else {
-        out_buf = ff_get_audio_buffer(inlink, nb_samples);
+        out_buf = ff_get_audio_buffer(outlink, nb_samples);
         if (!out_buf)
             return AVERROR(ENOMEM);
         av_frame_copy_props(out_buf, buf);
@@ -349,8 +363,8 @@
 static const AVOption acrossfade_options[] = {
     { "nb_samples",   "set number of samples for cross fade duration", OFFSET(nb_samples),   AV_OPT_TYPE_INT,    {.i64 = 44100}, 1, INT32_MAX/10, FLAGS },
     { "ns",           "set number of samples for cross fade duration", OFFSET(nb_samples),   AV_OPT_TYPE_INT,    {.i64 = 44100}, 1, INT32_MAX/10, FLAGS },
-    { "duration",     "set cross fade duration",                       OFFSET(duration),     AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, 60, FLAGS },
-    { "d",            "set cross fade duration",                       OFFSET(duration),     AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, 60, FLAGS },
+    { "duration",     "set cross fade duration",                       OFFSET(duration),     AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, 60000000, FLAGS },
+    { "d",            "set cross fade duration",                       OFFSET(duration),     AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, 60000000, FLAGS },
     { "overlap",      "overlap 1st stream end with 2nd stream start",  OFFSET(overlap),      AV_OPT_TYPE_BOOL,   {.i64 = 1    }, 0,  1, FLAGS },
     { "o",            "overlap 1st stream end with 2nd stream start",  OFFSET(overlap),      AV_OPT_TYPE_BOOL,   {.i64 = 1    }, 0,  1, FLAGS },
     { "curve1",       "set fade curve type for 1st stream",            OFFSET(curve),        AV_OPT_TYPE_INT,    {.i64 = TRI  }, 0, NB_CURVES - 1, FLAGS, "curve" },
@@ -371,6 +385,7 @@
     {     "ihsin",    "inverted half of sine wave",                    0,                    AV_OPT_TYPE_CONST,  {.i64 = IHSIN}, 0, 0, FLAGS, "curve" },
     {     "dese",     "double-exponential seat",                       0,                    AV_OPT_TYPE_CONST,  {.i64 = DESE }, 0, 0, FLAGS, "curve" },
     {     "desi",     "double-exponential sigmoid",                    0,                    AV_OPT_TYPE_CONST,  {.i64 = DESI }, 0, 0, FLAGS, "curve" },
+    {     "losi",     "logistic sigmoid",                              0,                    AV_OPT_TYPE_CONST,  {.i64 = LOSI }, 0, 0, FLAGS, "curve" },
     { "curve2",       "set fade curve type for 2nd stream",            OFFSET(curve2),       AV_OPT_TYPE_INT,    {.i64 = TRI  }, 0, NB_CURVES - 1, FLAGS, "curve" },
     { "c2",           "set fade curve type for 2nd stream",            OFFSET(curve2),       AV_OPT_TYPE_INT,    {.i64 = TRI  }, 0, NB_CURVES - 1, FLAGS, "curve" },
     { NULL }
@@ -428,157 +443,129 @@
 CROSSFADE(s16, int16_t)
 CROSSFADE(s32, int32_t)
 
-static int acrossfade_filter_frame(AVFilterLink *inlink, AVFrame *in)
+static int activate(AVFilterContext *ctx)
 {
-    AVFilterContext *ctx  = inlink->dst;
     AudioFadeContext *s   = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
-    AVFrame *out, *cf[2] = { NULL };
-    int ret = 0, nb_samples;
+    AVFrame *in = NULL, *out, *cf[2] = { NULL };
+    int ret = 0, nb_samples, status;
+    int64_t pts;
+
+    FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, ctx);
 
     if (s->crossfade_is_over) {
+        ret = ff_inlink_consume_frame(ctx->inputs[1], &in);
+        if (ret < 0) {
+            return ret;
+        } else if (ff_inlink_acknowledge_status(ctx->inputs[1], &status, &pts)) {
+            ff_outlink_set_status(ctx->outputs[0], status, pts);
+            return 0;
+        } else {
+            if (ff_outlink_frame_wanted(ctx->outputs[0]) && !in) {
+                ff_inlink_request_frame(ctx->inputs[1]);
+                return 0;
+            }
+        }
         in->pts = s->pts;
         s->pts += av_rescale_q(in->nb_samples,
             (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
         return ff_filter_frame(outlink, in);
-    } else if (inlink == ctx->inputs[0]) {
-        av_audio_fifo_write(s->fifo[0], (void **)in->extended_data, in->nb_samples);
+    }
 
-        nb_samples = av_audio_fifo_size(s->fifo[0]) - s->nb_samples;
+    if (ff_framequeue_queued_samples(&ctx->inputs[0]->fifo) > s->nb_samples) {
+        nb_samples = ff_framequeue_queued_samples(&ctx->inputs[0]->fifo) - s->nb_samples;
         if (nb_samples > 0) {
-            out = ff_get_audio_buffer(outlink, nb_samples);
-            if (!out) {
-                ret = AVERROR(ENOMEM);
-                goto fail;
+            ret = ff_inlink_consume_samples(ctx->inputs[0], nb_samples, nb_samples, &in);
+            if (ret < 0) {
+                return ret;
             }
-            av_audio_fifo_read(s->fifo[0], (void **)out->extended_data, nb_samples);
-            out->pts = s->pts;
-            s->pts += av_rescale_q(nb_samples,
-                (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
-            ret = ff_filter_frame(outlink, out);
         }
-    } else if (av_audio_fifo_size(s->fifo[1]) < s->nb_samples) {
-        if (!s->overlap && av_audio_fifo_size(s->fifo[0]) > 0) {
-            nb_samples = av_audio_fifo_size(s->fifo[0]);
-
-            cf[0] = ff_get_audio_buffer(outlink, nb_samples);
-            out = ff_get_audio_buffer(outlink, nb_samples);
-            if (!out || !cf[0]) {
-                ret = AVERROR(ENOMEM);
-                goto fail;
-            }
-            av_audio_fifo_read(s->fifo[0], (void **)cf[0]->extended_data, nb_samples);
-
-            s->fade_samples(out->extended_data, cf[0]->extended_data, nb_samples,
-                            outlink->channels, -1, nb_samples - 1, nb_samples, s->curve);
-            out->pts = s->pts;
-            s->pts += av_rescale_q(nb_samples,
-                (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
-            ret = ff_filter_frame(outlink, out);
-            if (ret < 0)
-                goto fail;
-        }
-
-        av_audio_fifo_write(s->fifo[1], (void **)in->extended_data, in->nb_samples);
-    } else if (av_audio_fifo_size(s->fifo[1]) >= s->nb_samples) {
-        av_audio_fifo_write(s->fifo[1], (void **)in->extended_data, in->nb_samples);
-
+        in->pts = s->pts;
+        s->pts += av_rescale_q(in->nb_samples,
+            (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
+        return ff_filter_frame(outlink, in);
+    } else if (ff_framequeue_queued_samples(&ctx->inputs[1]->fifo) >= s->nb_samples) {
         if (s->overlap) {
-            cf[0] = ff_get_audio_buffer(outlink, s->nb_samples);
-            cf[1] = ff_get_audio_buffer(outlink, s->nb_samples);
             out = ff_get_audio_buffer(outlink, s->nb_samples);
-            if (!out || !cf[0] || !cf[1]) {
+            if (!out)
+                return AVERROR(ENOMEM);
+
+            ret = ff_inlink_consume_samples(ctx->inputs[0], s->nb_samples, s->nb_samples, &cf[0]);
+            if (ret < 0) {
                 av_frame_free(&out);
-                ret = AVERROR(ENOMEM);
-                goto fail;
+                return ret;
             }
 
-            av_audio_fifo_read(s->fifo[0], (void **)cf[0]->extended_data, s->nb_samples);
-            av_audio_fifo_read(s->fifo[1], (void **)cf[1]->extended_data, s->nb_samples);
+            ret = ff_inlink_consume_samples(ctx->inputs[1], s->nb_samples, s->nb_samples, &cf[1]);
+            if (ret < 0) {
+                av_frame_free(&out);
+                return ret;
+            }
 
             s->crossfade_samples(out->extended_data, cf[0]->extended_data,
                                  cf[1]->extended_data,
-                                 s->nb_samples, in->channels,
+                                 s->nb_samples, out->channels,
                                  s->curve, s->curve2);
             out->pts = s->pts;
             s->pts += av_rescale_q(s->nb_samples,
                 (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
-            ret = ff_filter_frame(outlink, out);
-            if (ret < 0)
-                goto fail;
+            s->crossfade_is_over = 1;
+            av_frame_free(&cf[0]);
+            av_frame_free(&cf[1]);
+            return ff_filter_frame(outlink, out);
         } else {
             out = ff_get_audio_buffer(outlink, s->nb_samples);
-            cf[1] = ff_get_audio_buffer(outlink, s->nb_samples);
-            if (!out || !cf[1]) {
-                ret = AVERROR(ENOMEM);
+            if (!out)
+                return AVERROR(ENOMEM);
+
+            ret = ff_inlink_consume_samples(ctx->inputs[0], s->nb_samples, s->nb_samples, &cf[0]);
+            if (ret < 0) {
                 av_frame_free(&out);
-                goto fail;
+                return ret;
             }
 
-            av_audio_fifo_read(s->fifo[1], (void **)cf[1]->extended_data, s->nb_samples);
+            s->fade_samples(out->extended_data, cf[0]->extended_data, s->nb_samples,
+                            outlink->channels, -1, s->nb_samples - 1, s->nb_samples, s->curve);
+            out->pts = s->pts;
+            s->pts += av_rescale_q(s->nb_samples,
+                (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
+            av_frame_free(&cf[0]);
+            ret = ff_filter_frame(outlink, out);
+            if (ret < 0)
+                return ret;
+
+            out = ff_get_audio_buffer(outlink, s->nb_samples);
+            if (!out)
+                return AVERROR(ENOMEM);
+
+            ret = ff_inlink_consume_samples(ctx->inputs[1], s->nb_samples, s->nb_samples, &cf[1]);
+            if (ret < 0) {
+                av_frame_free(&out);
+                return ret;
+            }
 
             s->fade_samples(out->extended_data, cf[1]->extended_data, s->nb_samples,
                             outlink->channels, 1, 0, s->nb_samples, s->curve2);
             out->pts = s->pts;
             s->pts += av_rescale_q(s->nb_samples,
                 (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
-            ret = ff_filter_frame(outlink, out);
-            if (ret < 0)
-                goto fail;
+            s->crossfade_is_over = 1;
+            av_frame_free(&cf[1]);
+            return ff_filter_frame(outlink, out);
         }
-
-        nb_samples = av_audio_fifo_size(s->fifo[1]);
-        if (nb_samples > 0) {
-            out = ff_get_audio_buffer(outlink, nb_samples);
-            if (!out) {
-                ret = AVERROR(ENOMEM);
-                goto fail;
-            }
-
-            av_audio_fifo_read(s->fifo[1], (void **)out->extended_data, nb_samples);
-            out->pts = s->pts;
-            s->pts += av_rescale_q(nb_samples,
-                (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
-            ret = ff_filter_frame(outlink, out);
-        }
-        s->crossfade_is_over = 1;
-    }
-
-fail:
-    av_frame_free(&in);
-    av_frame_free(&cf[0]);
-    av_frame_free(&cf[1]);
-    return ret;
-}
-
-static int acrossfade_request_frame(AVFilterLink *outlink)
-{
-    AVFilterContext *ctx = outlink->src;
-    AudioFadeContext *s = ctx->priv;
-    int ret = 0;
-
-    if (!s->cf0_eof) {
-        AVFilterLink *cf0 = ctx->inputs[0];
-        ret = ff_request_frame(cf0);
-        if (ret < 0 && ret != AVERROR_EOF)
-            return ret;
-        if (ret == AVERROR_EOF) {
+    } else if (ff_outlink_frame_wanted(ctx->outputs[0])) {
+        if (!s->cf0_eof && ctx->inputs[0]->status_in) {
             s->cf0_eof = 1;
-            ret = 0;
         }
-    } else {
-        AVFilterLink *cf1 = ctx->inputs[1];
-        int nb_samples = av_audio_fifo_size(s->fifo[1]);
-
-        ret = ff_request_frame(cf1);
-        if (ret == AVERROR_EOF && nb_samples > 0) {
-            AVFrame *out = ff_get_audio_buffer(outlink, nb_samples);
-            if (!out)
-                return AVERROR(ENOMEM);
-
-            av_audio_fifo_read(s->fifo[1], (void **)out->extended_data, nb_samples);
-            ret = ff_filter_frame(outlink, out);
+        if (ctx->inputs[1]->status_in) {
+            ff_outlink_set_status(ctx->outputs[0], AVERROR_EOF, AV_NOPTS_VALUE);
+            return 0;
         }
+        if (!s->cf0_eof)
+            ff_inlink_request_frame(ctx->inputs[0]);
+        else
+            ff_inlink_request_frame(ctx->inputs[1]);
+        return 0;
     }
 
     return ret;
@@ -615,32 +602,17 @@
 
     config_output(outlink);
 
-    s->fifo[0] = av_audio_fifo_alloc(outlink->format, outlink->channels, s->nb_samples);
-    s->fifo[1] = av_audio_fifo_alloc(outlink->format, outlink->channels, s->nb_samples);
-    if (!s->fifo[0] || !s->fifo[1])
-        return AVERROR(ENOMEM);
-
     return 0;
 }
 
-static av_cold void uninit(AVFilterContext *ctx)
-{
-    AudioFadeContext *s = ctx->priv;
-
-    av_audio_fifo_free(s->fifo[0]);
-    av_audio_fifo_free(s->fifo[1]);
-}
-
 static const AVFilterPad avfilter_af_acrossfade_inputs[] = {
     {
         .name         = "crossfade0",
         .type         = AVMEDIA_TYPE_AUDIO,
-        .filter_frame = acrossfade_filter_frame,
     },
     {
         .name         = "crossfade1",
         .type         = AVMEDIA_TYPE_AUDIO,
-        .filter_frame = acrossfade_filter_frame,
     },
     { NULL }
 };
@@ -649,7 +621,6 @@
     {
         .name          = "default",
         .type          = AVMEDIA_TYPE_AUDIO,
-        .request_frame = acrossfade_request_frame,
         .config_props  = acrossfade_config_output,
     },
     { NULL }
@@ -660,7 +631,7 @@
     .description   = NULL_IF_CONFIG_SMALL("Cross fade two input audio streams."),
     .query_formats = query_formats,
     .priv_size     = sizeof(AudioFadeContext),
-    .uninit        = uninit,
+    .activate      = activate,
     .priv_class    = &acrossfade_class,
     .inputs        = avfilter_af_acrossfade_inputs,
     .outputs       = avfilter_af_acrossfade_outputs,

diff --git a/libavfilter/af_afftdn.c b/libavfilter/af_afftdn.c
new file mode 100644
index 0000000..fbcb0f1
--- /dev/null
+++ b/libavfilter/af_afftdn.c

@@ -0,0 +1,1422 @@
+/*
+ * Copyright (c) 2018 The FFmpeg Project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <float.h>
+
+#include "libavutil/audio_fifo.h"
+#include "libavutil/avstring.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/opt.h"
+#include "libavcodec/avfft.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "formats.h"
+
+#define C       (M_LN10 * 0.1)
+#define RATIO    0.98
+#define RRATIO  (1.0 - RATIO)
+
+enum OutModes {
+    IN_MODE,
+    OUT_MODE,
+    NOISE_MODE,
+    NB_MODES
+};
+
+enum NoiseType {
+    WHITE_NOISE,
+    VINYL_NOISE,
+    SHELLAC_NOISE,
+    CUSTOM_NOISE,
+    NB_NOISE
+};
+
+typedef struct DeNoiseChannel {
+    int         band_noise[15];
+    double      noise_band_auto_var[15];
+    double      noise_band_sample[15];
+    double     *amt;
+    double     *band_amt;
+    double     *band_excit;
+    double     *gain;
+    double     *prior;
+    double     *prior_band_excit;
+    double     *clean_data;
+    double     *noisy_data;
+    double     *out_samples;
+    double     *spread_function;
+    double     *abs_var;
+    double     *rel_var;
+    double     *min_abs_var;
+    FFTComplex *fft_data;
+    FFTContext *fft, *ifft;
+
+    double      noise_band_norm[15];
+    double      noise_band_avr[15];
+    double      noise_band_avi[15];
+    double      noise_band_var[15];
+
+    double      sfm_threshold;
+    double      sfm_alpha;
+    double      sfm_results[3];
+    int         sfm_fail_flags[512];
+    int         sfm_fail_total;
+} DeNoiseChannel;
+
+typedef struct AudioFFTDeNoiseContext {
+    const AVClass *class;
+
+    float   noise_reduction;
+    float   noise_floor;
+    int     noise_type;
+    char   *band_noise_str;
+    float   residual_floor;
+    int     track_noise;
+    int     track_residual;
+    int     output_mode;
+
+    float   last_residual_floor;
+    float   last_noise_floor;
+    float   last_noise_reduction;
+    float   last_noise_balance;
+    int64_t block_count;
+
+    int64_t pts;
+    int     channels;
+    int     sample_noise;
+    int     sample_noise_start;
+    int     sample_noise_end;
+    float   sample_rate;
+    int     buffer_length;
+    int     fft_length;
+    int     fft_length2;
+    int     bin_count;
+    int     window_length;
+    int     sample_advance;
+    int     number_of_bands;
+
+    int     band_centre[15];
+
+    int    *bin2band;
+    double *window;
+    double *band_alpha;
+    double *band_beta;
+
+    DeNoiseChannel *dnch;
+
+    double  max_gain;
+    double  max_var;
+    double  gain_scale;
+    double  window_weight;
+    double  floor;
+    double  sample_floor;
+    double  auto_floor;
+
+    int     noise_band_edge[17];
+    int     noise_band_count;
+    double  matrix_a[25];
+    double  vector_b[5];
+    double  matrix_b[75];
+    double  matrix_c[75];
+
+    AVAudioFifo *fifo;
+} AudioFFTDeNoiseContext;
+
+#define OFFSET(x) offsetof(AudioFFTDeNoiseContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption afftdn_options[] = {
+    { "nr", "set the noise reduction",    OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT,  {.dbl = 12},          .01, 97, A },
+    { "nf", "set the noise floor",        OFFSET(noise_floor),     AV_OPT_TYPE_FLOAT,  {.dbl =-50},          -80,-20, A },
+    { "nt", "set the noise type",         OFFSET(noise_type),      AV_OPT_TYPE_INT,    {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, A, "type" },
+    {  "w", "white noise",                0,                       AV_OPT_TYPE_CONST,  {.i64 = WHITE_NOISE},   0,  0, A, "type" },
+    {  "v", "vinyl noise",                0,                       AV_OPT_TYPE_CONST,  {.i64 = VINYL_NOISE},   0,  0, A, "type" },
+    {  "s", "shellac noise",              0,                       AV_OPT_TYPE_CONST,  {.i64 = SHELLAC_NOISE}, 0,  0, A, "type" },
+    {  "c", "custom noise",               0,                       AV_OPT_TYPE_CONST,  {.i64 = CUSTOM_NOISE},  0,  0, A, "type" },
+    { "bn", "set the custom bands noise", OFFSET(band_noise_str),  AV_OPT_TYPE_STRING, {.str = 0},             0,  0, A },
+    { "rf", "set the residual floor",     OFFSET(residual_floor),  AV_OPT_TYPE_FLOAT,  {.dbl =-38},          -80,-20, A },
+    { "tn", "track noise",                OFFSET(track_noise),     AV_OPT_TYPE_BOOL,   {.i64 =  0},            0,  1, A },
+    { "tr", "track residual",             OFFSET(track_residual),  AV_OPT_TYPE_BOOL,   {.i64 =  0},            0,  1, A },
+    { "om", "set output mode",            OFFSET(output_mode),     AV_OPT_TYPE_INT,    {.i64 = OUT_MODE},      0,  NB_MODES-1, A, "mode" },
+    {  "i", "input",                      0,                       AV_OPT_TYPE_CONST,  {.i64 = IN_MODE},       0,  0, A, "mode" },
+    {  "o", "output",                     0,                       AV_OPT_TYPE_CONST,  {.i64 = OUT_MODE},      0,  0, A, "mode" },
+    {  "n", "noise",                      0,                       AV_OPT_TYPE_CONST,  {.i64 = NOISE_MODE},    0,  0, A, "mode" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(afftdn);
+
+static int get_band_noise(AudioFFTDeNoiseContext *s,
+                          int band, double a,
+                          double b, double c)
+{
+    double d1, d2, d3;
+
+    d1 = a / s->band_centre[band];
+    d1 = 10.0 * log(1.0 + d1 * d1) / M_LN10;
+    d2 = b / s->band_centre[band];
+    d2 = 10.0 * log(1.0 + d2 * d2) / M_LN10;
+    d3 = s->band_centre[band] / c;
+    d3 = 10.0 * log(1.0 + d3 * d3) / M_LN10;
+
+    return lrint(-d1 + d2 - d3);
+}
+
+static void factor(double *array, int size)
+{
+    for (int i = 0; i < size - 1; i++) {
+        for (int j = i + 1; j < size; j++) {
+            double d = array[j + i * size] / array[i + i * size];
+
+            array[j + i * size] = d;
+            for (int k = i + 1; k < size; k++) {
+                array[j + k * size] -= d * array[i + k * size];
+            }
+        }
+    }
+}
+
+static void solve(double *matrix, double *vector, int size)
+{
+    for (int i = 0; i < size - 1; i++) {
+        for (int j = i + 1; j < size; j++) {
+            double d = matrix[j + i * size];
+            vector[j] -= d * vector[i];
+        }
+    }
+
+    vector[size - 1] /= matrix[size * size - 1];
+
+    for (int i = size - 2; i >= 0; i--) {
+        double d = vector[i];
+        for (int j = i + 1; j < size; j++)
+            d -= matrix[i + j * size] * vector[j];
+        vector[i] = d / matrix[i + i * size];
+    }
+}
+
+static int process_get_band_noise(AudioFFTDeNoiseContext *s,
+                                  DeNoiseChannel *dnch,
+                                  int band)
+{
+    double product, sum, f;
+    int i = 0;
+
+    if (band < 15)
+        return dnch->band_noise[band];
+
+    for (int j = 0; j < 5; j++) {
+        sum = 0.0;
+        for (int k = 0; k < 15; k++)
+            sum += s->matrix_b[i++] * dnch->band_noise[k];
+        s->vector_b[j] = sum;
+    }
+
+    solve(s->matrix_a, s->vector_b, 5);
+    f = (0.5 * s->sample_rate) / s->band_centre[14];
+    f = 15.0 + log(f / 1.5) / log(1.5);
+    sum = 0.0;
+    product = 1.0;
+    for (int j = 0; j < 5; j++) {
+        sum += product * s->vector_b[j];
+        product *= f;
+    }
+
+    return lrint(sum);
+}
+
+static void calculate_sfm(AudioFFTDeNoiseContext *s,
+                          DeNoiseChannel *dnch,
+                          int start, int end)
+{
+    double d1 = 0.0, d2 = 1.0;
+    int i = 0, j = 0;
+
+    for (int k = start; k < end; k++) {
+        if (dnch->noisy_data[k] > s->sample_floor) {
+            j++;
+            d1 += dnch->noisy_data[k];
+            d2 *= dnch->noisy_data[k];
+            if (d2 > 1.0E100) {
+                d2 *= 1.0E-100;
+                i++;
+            } else if (d2 < 1.0E-100) {
+                d2 *= 1.0E100;
+                i--;
+            }
+        }
+    }
+    if (j > 1) {
+        d1 /= j;
+        dnch->sfm_results[0] = d1;
+        d2 = log(d2) + 230.2585 * i;
+        d2 /= j;
+        d1 = log(d1);
+        dnch->sfm_results[1] = d1;
+        dnch->sfm_results[2] = d1 - d2;
+    } else {
+        dnch->sfm_results[0] = s->auto_floor;
+        dnch->sfm_results[1] = dnch->sfm_threshold;
+        dnch->sfm_results[2] = dnch->sfm_threshold;
+    }
+}
+
+static double limit_gain(double a, double b)
+{
+    if (a > 1.0)
+        return (b * a - 1.0) / (b + a - 2.0);
+    if (a < 1.0)
+        return (b * a - 2.0 * a + 1.0) / (b - a);
+    return 1.0;
+}
+
+static void process_frame(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch,
+                          FFTComplex *fft_data,
+                          double *prior, double *prior_band_excit, int track_noise)
+{
+    double d1, d2, d3, gain;
+    int n, i1;
+
+    d1 = fft_data[0].re * fft_data[0].re;
+    dnch->noisy_data[0] = d1;
+    d2 = d1 / dnch->abs_var[0];
+    d3 = RATIO * prior[0] + RRATIO * fmax(d2 - 1.0, 0.0);
+    gain = d3 / (1.0 + d3);
+    gain *= (gain + M_PI_4 / fmax(d2, 1.0E-6));
+    prior[0] = (d2 * gain);
+    dnch->clean_data[0] = (d1 * gain);
+    gain = sqrt(gain);
+    dnch->gain[0] = gain;
+    n = 0;
+    for (int i = 1; i < s->fft_length2; i++) {
+        d1 = fft_data[i].re * fft_data[i].re + fft_data[i].im * fft_data[i].im;
+        if (d1 > s->sample_floor)
+            n = i;
+
+        dnch->noisy_data[i] = d1;
+        d2 = d1 / dnch->abs_var[i];
+        d3 = RATIO * prior[i] + RRATIO * fmax(d2 - 1.0, 0.0);
+        gain = d3 / (1.0 + d3);
+        gain *= (gain + M_PI_4 / fmax(d2, 1.0E-6));
+        prior[i] = d2 * gain;
+        dnch->clean_data[i] = d1 * gain;
+        gain = sqrt(gain);
+        dnch->gain[i] = gain;
+    }
+    d1 = fft_data[0].im * fft_data[0].im;
+    if (d1 > s->sample_floor)
+        n = s->fft_length2;
+
+    dnch->noisy_data[s->fft_length2] = d1;
+    d2 = d1 / dnch->abs_var[s->fft_length2];
+    d3 = RATIO * prior[s->fft_length2] + RRATIO * fmax(d2 - 1.0, 0.0);
+    gain = d3 / (1.0 + d3);
+    gain *= gain + M_PI_4 / fmax(d2, 1.0E-6);
+    prior[s->fft_length2] = d2 * gain;
+    dnch->clean_data[s->fft_length2] = d1 * gain;
+    gain = sqrt(gain);
+    dnch->gain[s->fft_length2] = gain;
+    if (n > s->fft_length2 - 2) {
+        n = s->bin_count;
+        i1 = s->noise_band_count;
+    } else {
+        i1 = 0;
+        for (int i = 0; i <= s->noise_band_count; i++) {
+            if (n > 1.1 * s->noise_band_edge[i]) {
+                i1 = i;
+            }
+        }
+    }
+
+    if (track_noise && (i1 > s->noise_band_count / 2)) {
+        int j = FFMIN(n, s->noise_band_edge[i1]);
+        int m = 3, k;
+
+        for (k = i1 - 1; k >= 0; k--) {
+            int i = s->noise_band_edge[k];
+            calculate_sfm(s, dnch, i, j);
+            dnch->noise_band_sample[k] = dnch->sfm_results[0];
+            if (dnch->sfm_results[2] + 0.013 * m * fmax(0.0, dnch->sfm_results[1] - 20.53) >= dnch->sfm_threshold) {
+                break;
+            }
+            j = i;
+            m++;
+        }
+
+        if (k < i1 - 1) {
+            double sum = 0.0, min, max;
+            int i;
+
+            for (i = i1 - 1; i > k; i--) {
+                min = log(dnch->noise_band_sample[i] / dnch->noise_band_auto_var[i]);
+                sum += min;
+            }
+
+            i = i1 - k - 1;
+            if (i < 5) {
+                min = 3.0E-4 * i * i;
+            } else {
+                min = 3.0E-4 * (8 * i - 16);
+            }
+            if (i < 3) {
+                max = 2.0E-4 * i * i;
+            } else {
+                max = 2.0E-4 * (4 * i - 4);
+            }
+
+            if (s->track_residual) {
+                if (s->last_noise_floor > s->last_residual_floor + 9) {
+                    min *= 0.5;
+                    max *= 0.75;
+                } else if (s->last_noise_floor > s->last_residual_floor + 6) {
+                    min *= 0.4;
+                    max *= 1.0;
+                } else if (s->last_noise_floor > s->last_residual_floor + 4) {
+                    min *= 0.3;
+                    max *= 1.3;
+                } else if (s->last_noise_floor > s->last_residual_floor + 2) {
+                    min *= 0.2;
+                    max *= 1.6;
+                } else if (s->last_noise_floor > s->last_residual_floor) {
+                    min *= 0.1;
+                    max *= 2.0;
+                } else {
+                    min = 0.0;
+                    max *= 2.5;
+                }
+            }
+
+            sum = av_clipd(sum, -min, max);
+            sum = exp(sum);
+            for (int i = 0; i < 15; i++)
+                dnch->noise_band_auto_var[i] *= sum;
+        } else if (dnch->sfm_results[2] >= dnch->sfm_threshold) {
+            dnch->sfm_fail_flags[s->block_count & 0x1FF] = 1;
+            dnch->sfm_fail_total += 1;
+        }
+    }
+
+    for (int i = 0; i < s->number_of_bands; i++) {
+        dnch->band_excit[i] = 0.0;
+        dnch->band_amt[i] = 0.0;
+    }
+
+    for (int i = 0; i < s->bin_count; i++) {
+        dnch->band_excit[s->bin2band[i]] += dnch->clean_data[i];
+    }
+
+    for (int i = 0; i < s->number_of_bands; i++) {
+        dnch->band_excit[i] = fmax(dnch->band_excit[i],
+                                s->band_alpha[i] * dnch->band_excit[i] +
+                                s->band_beta[i] * prior_band_excit[i]);
+        prior_band_excit[i] = dnch->band_excit[i];
+    }
+
+    for (int j = 0, i = 0; j < s->number_of_bands; j++) {
+        for (int k = 0; k < s->number_of_bands; k++) {
+            dnch->band_amt[j] += dnch->spread_function[i++] * dnch->band_excit[k];
+        }
+    }
+
+    for (int i = 0; i < s->bin_count; i++)
+        dnch->amt[i] = dnch->band_amt[s->bin2band[i]];
+
+    if (dnch->amt[0] > dnch->abs_var[0]) {
+        dnch->gain[0] = 1.0;
+    } else if (dnch->amt[0] > dnch->min_abs_var[0]) {
+        double limit = sqrt(dnch->abs_var[0] / dnch->amt[0]);
+        dnch->gain[0] = limit_gain(dnch->gain[0], limit);
+    } else {
+        dnch->gain[0] = limit_gain(dnch->gain[0], s->max_gain);
+    }
+    if (dnch->amt[s->fft_length2] > dnch->abs_var[s->fft_length2]) {
+        dnch->gain[s->fft_length2] = 1.0;
+    } else if (dnch->amt[s->fft_length2] > dnch->min_abs_var[s->fft_length2]) {
+        double limit = sqrt(dnch->abs_var[s->fft_length2] / dnch->amt[s->fft_length2]);
+        dnch->gain[s->fft_length2] = limit_gain(dnch->gain[s->fft_length2], limit);
+    } else {
+        dnch->gain[s->fft_length2] = limit_gain(dnch->gain[s->fft_length2], s->max_gain);
+    }
+
+    for (int i = 1; i < s->fft_length2; i++) {
+        if (dnch->amt[i] > dnch->abs_var[i]) {
+            dnch->gain[i] = 1.0;
+        } else if (dnch->amt[i] > dnch->min_abs_var[i]) {
+            double limit = sqrt(dnch->abs_var[i] / dnch->amt[i]);
+            dnch->gain[i] = limit_gain(dnch->gain[i], limit);
+        } else {
+            dnch->gain[i] = limit_gain(dnch->gain[i], s->max_gain);
+        }
+    }
+
+    gain = dnch->gain[0];
+    dnch->clean_data[0] = (gain * gain * dnch->noisy_data[0]);
+    fft_data[0].re *= gain;
+    gain = dnch->gain[s->fft_length2];
+    dnch->clean_data[s->fft_length2] = (gain * gain * dnch->noisy_data[s->fft_length2]);
+    fft_data[0].im *= gain;
+    for (int i = 1; i < s->fft_length2; i++) {
+        gain = dnch->gain[i];
+        dnch->clean_data[i] = (gain * gain * dnch->noisy_data[i]);
+        fft_data[i].re *= gain;
+        fft_data[i].im *= gain;
+    }
+}
+
+static double freq2bark(double x)
+{
+    double d = x / 7500.0;
+
+    return 13.0 * atan(7.6E-4 * x) + 3.5 * atan(d * d);
+}
+
+static int get_band_centre(AudioFFTDeNoiseContext *s, int band)
+{
+    if (band == -1)
+        return lrint(s->band_centre[0] / 1.5);
+
+    return s->band_centre[band];
+}
+
+static int get_band_edge(AudioFFTDeNoiseContext *s, int band)
+{
+    int i;
+
+    if (band == 15) {
+        i = lrint(s->band_centre[14] * 1.224745);
+    } else {
+        i = lrint(s->band_centre[band] / 1.224745);
+    }
+
+    return FFMIN(i, s->sample_rate / 2);
+}
+
+static void set_band_parameters(AudioFFTDeNoiseContext *s,
+                                DeNoiseChannel *dnch)
+{
+    double band_noise, d2, d3, d4, d5;
+    int i = 0, j = 0, k = 0;
+
+    d5 = 0.0;
+    band_noise = process_get_band_noise(s, dnch, 0);
+    for (int m = j; m <= s->fft_length2; m++) {
+        if (m == j) {
+            i = j;
+            d5 = band_noise;
+            if (k == 15) {
+                j = s->bin_count;
+            } else {
+                j = s->fft_length * get_band_centre(s, k) / s->sample_rate;
+            }
+            d2 = j - i;
+            band_noise = process_get_band_noise(s, dnch, k);
+            k++;
+        }
+        d3 = (j - m) / d2;
+        d4 = (m - i) / d2;
+        dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C);
+    }
+    dnch->rel_var[s->fft_length2] = exp(band_noise * C);
+
+    for (i = 0; i < 15; i++)
+        dnch->noise_band_auto_var[i] = s->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
+
+    for (i = 0; i <= s->fft_length2; i++) {
+        dnch->abs_var[i] = fmax(s->max_var * dnch->rel_var[i], 1.0);
+        dnch->min_abs_var[i] = s->gain_scale * dnch->abs_var[i];
+    }
+}
+
+static void read_custom_noise(AudioFFTDeNoiseContext *s, int ch)
+{
+    DeNoiseChannel *dnch = &s->dnch[ch];
+    char *p, *arg, *saveptr = NULL;
+    int i, ret, band_noise[15] = { 0 };
+
+    if (!s->band_noise_str)
+        return;
+
+    p = av_strdup(s->band_noise_str);
+    if (!p)
+        return;
+
+    for (i = 0; i < 15; i++) {
+        if (!(arg = av_strtok(p, "| ", &saveptr)))
+            break;
+
+        p = NULL;
+
+        ret = sscanf(arg, "%d", &band_noise[i]);
+        if (ret != 1) {
+            av_log(s, AV_LOG_ERROR, "Custom band noise must be integer.\n");
+            break;
+        }
+
+        band_noise[i] = av_clip(band_noise[i], -24, 24);
+    }
+
+    av_free(p);
+    memcpy(dnch->band_noise, band_noise, sizeof(band_noise));
+}
+
+static void set_parameters(AudioFFTDeNoiseContext *s)
+{
+    if (s->last_noise_floor != s->noise_floor)
+        s->last_noise_floor = s->noise_floor;
+
+    if (s->track_residual)
+        s->last_noise_floor = fmaxf(s->last_noise_floor, s->residual_floor);
+
+    s->max_var = s->floor * exp((100.0 + s->last_noise_floor) * C);
+
+    if (s->track_residual) {
+        s->last_residual_floor = s->residual_floor;
+        s->last_noise_reduction = fmax(s->last_noise_floor - s->last_residual_floor, 0);
+        s->max_gain = exp(s->last_noise_reduction * (0.5 * C));
+    } else if (s->noise_reduction != s->last_noise_reduction) {
+        s->last_noise_reduction = s->noise_reduction;
+        s->last_residual_floor = av_clipf(s->last_noise_floor - s->last_noise_reduction, -80, -20);
+        s->max_gain = exp(s->last_noise_reduction * (0.5 * C));
+    }
+
+    s->gain_scale = 1.0 / (s->max_gain * s->max_gain);
+
+    for (int ch = 0; ch < s->channels; ch++) {
+        DeNoiseChannel *dnch = &s->dnch[ch];
+
+        set_band_parameters(s, dnch);
+    }
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AudioFFTDeNoiseContext *s = ctx->priv;
+    double wscale, sar, sum, sdiv;
+    int i, j, k, m, n;
+
+    s->dnch = av_calloc(inlink->channels, sizeof(*s->dnch));
+    if (!s->dnch)
+        return AVERROR(ENOMEM);
+
+    s->pts = AV_NOPTS_VALUE;
+    s->channels = inlink->channels;
+    s->sample_rate = inlink->sample_rate;
+    s->sample_advance = s->sample_rate / 80;
+    s->window_length = 3 * s->sample_advance;
+    s->fft_length2 = 1 << (32 - ff_clz(s->window_length));
+    s->fft_length = s->fft_length2 * 2;
+    s->buffer_length = s->fft_length * 2;
+    s->bin_count = s->fft_length2 + 1;
+
+    s->band_centre[0] = 80;
+    for (i = 1; i < 15; i++) {
+        s->band_centre[i] = lrint(1.5 * s->band_centre[i - 1] + 5.0);
+        if (s->band_centre[i] < 1000) {
+            s->band_centre[i] = 10 * (s->band_centre[i] / 10);
+        } else if (s->band_centre[i] < 5000) {
+            s->band_centre[i] = 50 * ((s->band_centre[i] + 20) / 50);
+        } else if (s->band_centre[i] < 15000) {
+            s->band_centre[i] = 100 * ((s->band_centre[i] + 45) / 100);
+        } else {
+            s->band_centre[i] = 1000 * ((s->band_centre[i] + 495) / 1000);
+        }
+    }
+
+    for (j = 0; j < 5; j++) {
+        for (k = 0; k < 5; k++) {
+            s->matrix_a[j + k * 5] = 0.0;
+            for (m = 0; m < 15; m++)
+                s->matrix_a[j + k * 5] += pow(m, j + k);
+        }
+    }
+
+    factor(s->matrix_a, 5);
+
+    i = 0;
+    for (j = 0; j < 5; j++)
+        for (k = 0; k < 15; k++)
+            s->matrix_b[i++] = pow(k, j);
+
+    i = 0;
+    for (j = 0; j < 15; j++)
+        for (k = 0; k < 5; k++)
+            s->matrix_c[i++] = pow(j, k);
+
+    s->window = av_calloc(s->window_length, sizeof(*s->window));
+    s->bin2band = av_calloc(s->bin_count, sizeof(*s->bin2band));
+    if (!s->window || !s->bin2band)
+        return AVERROR(ENOMEM);
+
+    sdiv = s->sample_rate / 17640.0;
+    for (i = 0; i <= s->fft_length2; i++)
+        s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2));
+
+    s->number_of_bands = s->bin2band[s->fft_length2] + 1;
+
+    s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha));
+    s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta));
+    if (!s->band_alpha || !s->band_beta)
+        return AVERROR(ENOMEM);
+
+    for (int ch = 0; ch < inlink->channels; ch++) {
+        DeNoiseChannel *dnch = &s->dnch[ch];
+
+        switch (s->noise_type) {
+        case WHITE_NOISE:
+            for (i = 0; i < 15; i++)
+                dnch->band_noise[i] = 0;
+            break;
+        case VINYL_NOISE:
+            for (i = 0; i < 15; i++)
+                dnch->band_noise[i] = get_band_noise(s, i, 50.0, 500.5, 2125.0) + FFMAX(i - 7, 0);
+            break;
+        case SHELLAC_NOISE:
+            for (i = 0; i < 15; i++)
+                dnch->band_noise[i] = get_band_noise(s, i, 1.0, 500.0, 1.0E10) + FFMAX(i - 12, -5);
+            break;
+        case CUSTOM_NOISE:
+            read_custom_noise(s, ch);
+            break;
+        default:
+            return AVERROR_BUG;
+        }
+
+
+        dnch->sfm_threshold = 0.8;
+        dnch->sfm_alpha = 0.05;
+        for (i = 0; i < 512; i++)
+            dnch->sfm_fail_flags[i] = 0;
+
+        dnch->sfm_fail_total = 0;
+        j = FFMAX((int)(10.0 * (1.3 - dnch->sfm_threshold)), 1);
+
+        for (i = 0; i < 512; i += j) {
+            dnch->sfm_fail_flags[i] = 1;
+            dnch->sfm_fail_total += 1;
+        }
+
+        dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt));
+        dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt));
+        dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit));
+        dnch->gain = av_calloc(s->bin_count, sizeof(*dnch->gain));
+        dnch->prior = av_calloc(s->bin_count, sizeof(*dnch->prior));
+        dnch->prior_band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->prior_band_excit));
+        dnch->clean_data = av_calloc(s->bin_count, sizeof(*dnch->clean_data));
+        dnch->noisy_data = av_calloc(s->bin_count, sizeof(*dnch->noisy_data));
+        dnch->out_samples = av_calloc(s->buffer_length, sizeof(*dnch->out_samples));
+        dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var));
+        dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var));
+        dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var));
+        dnch->fft_data = av_calloc(s->fft_length2 + 1, sizeof(*dnch->fft_data));
+        dnch->fft  = av_fft_init(av_log2(s->fft_length2), 0);
+        dnch->ifft = av_fft_init(av_log2(s->fft_length2), 1);
+        dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands,
+                                          sizeof(*dnch->spread_function));
+
+        if (!dnch->amt ||
+            !dnch->band_amt ||
+            !dnch->band_excit ||
+            !dnch->gain ||
+            !dnch->prior ||
+            !dnch->prior_band_excit ||
+            !dnch->clean_data ||
+            !dnch->noisy_data ||
+            !dnch->out_samples ||
+            !dnch->fft_data ||
+            !dnch->abs_var ||
+            !dnch->rel_var ||
+            !dnch->min_abs_var ||
+            !dnch->spread_function ||
+            !dnch->fft ||
+            !dnch->ifft)
+            return AVERROR(ENOMEM);
+    }
+
+    for (int ch = 0; ch < inlink->channels; ch++) {
+        DeNoiseChannel *dnch = &s->dnch[ch];
+        double *prior_band_excit = dnch->prior_band_excit;
+        double *prior = dnch->prior;
+        double min, max;
+        double p1, p2;
+
+        p1 = pow(0.1, 2.5 / sdiv);
+        p2 = pow(0.1, 1.0 / sdiv);
+        j = 0;
+        for (m = 0; m < s->number_of_bands; m++) {
+            for (n = 0; n < s->number_of_bands; n++) {
+                if (n < m) {
+                    dnch->spread_function[j++] = pow(p2, m - n);
+                } else if (n > m) {
+                    dnch->spread_function[j++] = pow(p1, n - m);
+                } else {
+                    dnch->spread_function[j++] = 1.0;
+                }
+            }
+        }
+
+        for (m = 0; m < s->number_of_bands; m++) {
+            dnch->band_excit[m] = 0.0;
+            prior_band_excit[m] = 0.0;
+        }
+
+        for (m = 0; m <= s->fft_length2; m++)
+            dnch->band_excit[s->bin2band[m]] += 1.0;
+
+        j = 0;
+        for (m = 0; m < s->number_of_bands; m++) {
+            for (n = 0; n < s->number_of_bands; n++)
+                prior_band_excit[m] += dnch->spread_function[j++] * dnch->band_excit[n];
+        }
+
+        min = pow(0.1, 2.5);
+        max = pow(0.1, 1.0);
+        for (int i = 0; i < s->number_of_bands; i++) {
+            if (i < lrint(12.0 * sdiv)) {
+                dnch->band_excit[i] = pow(0.1, 1.45 + 0.1 * i / sdiv);
+            } else {
+                dnch->band_excit[i] = pow(0.1, 2.5 - 0.2 * (i / sdiv - 14.0));
+            }
+            dnch->band_excit[i] = av_clipd(dnch->band_excit[i], min, max);
+        }
+
+        for (int i = 0; i <= s->fft_length2; i++)
+            prior[i] = RRATIO;
+        for (int i = 0; i < s->buffer_length; i++)
+            dnch->out_samples[i] = 0;
+
+        j = 0;
+        for (int i = 0; i < s->number_of_bands; i++)
+            for (int k = 0; k < s->number_of_bands; k++)
+                dnch->spread_function[j++] *= dnch->band_excit[i] / prior_band_excit[i];
+    }
+
+    j = 0;
+    sar = s->sample_advance / s->sample_rate;
+    for (int i = 0; i <= s->fft_length2; i++) {
+        if ((i == s->fft_length2) || (s->bin2band[i] > j)) {
+            double d6 = (i - 1) * s->sample_rate / s->fft_length;
+            double d7 = fmin(0.008 + 2.2 / d6, 0.03);
+            s->band_alpha[j] = exp(-sar / d7);
+            s->band_beta[j] = 1.0 - s->band_alpha[j];
+            j = s->bin2band[i];
+        }
+    }
+
+    wscale = sqrt(16.0 / (9.0 * s->fft_length));
+    sum = 0.0;
+    for (int i = 0; i < s->window_length; i++) {
+        double d10 = sin(i * M_PI / s->window_length);
+        d10 *= wscale * d10;
+        s->window[i] = d10;
+        sum += d10 * d10;
+    }
+
+    s->window_weight = 0.5 * sum;
+    s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight;
+    s->sample_floor = s->floor * exp(4.144600506562284);
+    s->auto_floor = s->floor * exp(6.907667510937141);
+
+    set_parameters(s);
+
+    s->noise_band_edge[0] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, 0) / s->sample_rate);
+    i = 0;
+    for (int j = 1; j < 16; j++) {
+        s->noise_band_edge[j] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, j) / s->sample_rate);
+        if (s->noise_band_edge[j] > lrint(1.1 * s->noise_band_edge[j - 1]))
+            i++;
+        s->noise_band_edge[16] = i;
+    }
+    s->noise_band_count = s->noise_band_edge[16];
+
+    s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->fft_length);
+    if (!s->fifo)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void preprocess(FFTComplex *in, int len)
+{
+    double d1, d2, d3, d4, d5, d6, d7, d8, d9, d10;
+    int n, i, k;
+
+    d5 = 2.0 * M_PI / len;
+    d8 = sin(0.5 * d5);
+    d8 = -2.0 * d8 * d8;
+    d7 = sin(d5);
+    d9 = 1.0 + d8;
+    d6 = d7;
+    n = len / 2;
+
+    for (i = 1; i < len / 4; i++) {
+        k = n - i;
+        d2 = 0.5 * (in[i].re + in[k].re);
+        d1 = 0.5 * (in[i].im - in[k].im);
+        d4 = 0.5 * (in[i].im + in[k].im);
+        d3 = 0.5 * (in[k].re - in[i].re);
+        in[i].re = d2 + d9 * d4 + d6 * d3;
+        in[i].im = d1 + d9 * d3 - d6 * d4;
+        in[k].re = d2 - d9 * d4 - d6 * d3;
+        in[k].im = -d1 + d9 * d3 - d6 * d4;
+        d10 = d9;
+        d9 += d9 * d8 - d6 * d7;
+        d6 += d6 * d8 + d10 * d7;
+    }
+
+    d2 = in[0].re;
+    in[0].re = d2 + in[0].im;
+    in[0].im = d2 - in[0].im;
+}
+
+static void postprocess(FFTComplex *in, int len)
+{
+    double d1, d2, d3, d4, d5, d6, d7, d8, d9, d10;
+    int n, i, k;
+
+    d5 = 2.0 * M_PI / len;
+    d8 = sin(0.5 * d5);
+    d8 = -2.0 * d8 * d8;
+    d7 = sin(d5);
+    d9 = 1.0 + d8;
+    d6 = d7;
+    n = len / 2;
+    for (i = 1; i < len / 4; i++) {
+        k = n - i;
+        d2 = 0.5 * (in[i].re + in[k].re);
+        d1 = 0.5 * (in[i].im - in[k].im);
+        d4 = 0.5 * (in[i].re - in[k].re);
+        d3 = 0.5 * (in[i].im + in[k].im);
+        in[i].re = d2 - d9 * d3 - d6 * d4;
+        in[i].im = d1 + d9 * d4 - d6 * d3;
+        in[k].re = d2 + d9 * d3 + d6 * d4;
+        in[k].im = -d1 + d9 * d4 - d6 * d3;
+        d10 = d9;
+        d9 += d9 * d8 - d6 * d7;
+        d6 += d6 * d8 + d10 * d7;
+    }
+    d2 = in[0].re;
+    in[0].re = 0.5 * (d2 + in[0].im);
+    in[0].im = 0.5 * (d2 - in[0].im);
+}
+
+static void init_sample_noise(DeNoiseChannel *dnch)
+{
+    for (int i = 0; i < 15; i++) {
+        dnch->noise_band_norm[i] = 0.0;
+        dnch->noise_band_avr[i] = 0.0;
+        dnch->noise_band_avi[i] = 0.0;
+        dnch->noise_band_var[i] = 0.0;
+    }
+}
+
+static void sample_noise_block(AudioFFTDeNoiseContext *s,
+                               DeNoiseChannel *dnch,
+                               AVFrame *in, int ch)
+{
+    float *src = (float *)in->extended_data[ch];
+    double mag2, var = 0.0, avr = 0.0, avi = 0.0;
+    int edge, j, k, n, edgemax;
+
+    for (int i = 0; i < s->window_length; i++) {
+        dnch->fft_data[i].re = s->window[i] * src[i] * (1LL << 24);
+        dnch->fft_data[i].im = 0.0;
+    }
+
+    for (int i = s->window_length; i < s->fft_length2; i++) {
+        dnch->fft_data[i].re = 0.0;
+        dnch->fft_data[i].im = 0.0;
+    }
+
+    av_fft_permute(dnch->fft, dnch->fft_data);
+    av_fft_calc(dnch->fft, dnch->fft_data);
+
+    preprocess(dnch->fft_data, s->fft_length);
+
+    edge = s->noise_band_edge[0];
+    j = edge;
+    k = 0;
+    n = j;
+    edgemax = fmin(s->fft_length2, s->noise_band_edge[15]);
+    dnch->fft_data[s->fft_length2].re = dnch->fft_data[0].im;
+    dnch->fft_data[0].im = 0.0;
+    dnch->fft_data[s->fft_length2].im = 0.0;
+
+    for (int i = j; i <= edgemax; i++) {
+        if ((i == j) && (i < edgemax)) {
+            if (j > edge) {
+                dnch->noise_band_norm[k - 1] += j - edge;
+                dnch->noise_band_avr[k - 1] += avr;
+                dnch->noise_band_avi[k - 1] += avi;
+                dnch->noise_band_var[k - 1] += var;
+            }
+            k++;
+            edge = j;
+            j = s->noise_band_edge[k];
+            if (k == 15) {
+                j++;
+            }
+            var = 0.0;
+            avr = 0.0;
+            avi = 0.0;
+        }
+        avr += dnch->fft_data[n].re;
+        avi += dnch->fft_data[n].im;
+        mag2 = dnch->fft_data[n].re * dnch->fft_data[n].re +
+               dnch->fft_data[n].im * dnch->fft_data[n].im;
+
+        mag2 = fmax(mag2, s->sample_floor);
+
+        dnch->noisy_data[i] = mag2;
+        var += mag2;
+        n++;
+    }
+
+    dnch->noise_band_norm[k - 1] += j - edge;
+    dnch->noise_band_avr[k - 1] += avr;
+    dnch->noise_band_avi[k - 1] += avi;
+    dnch->noise_band_var[k - 1] += var;
+}
+
+static void finish_sample_noise(AudioFFTDeNoiseContext *s,
+                                DeNoiseChannel *dnch,
+                                double *sample_noise)
+{
+    for (int i = 0; i < s->noise_band_count; i++) {
+        dnch->noise_band_avr[i] /= dnch->noise_band_norm[i];
+        dnch->noise_band_avi[i] /= dnch->noise_band_norm[i];
+        dnch->noise_band_var[i] /= dnch->noise_band_norm[i];
+        dnch->noise_band_var[i] -= dnch->noise_band_avr[i] * dnch->noise_band_avr[i] +
+                                   dnch->noise_band_avi[i] * dnch->noise_band_avi[i];
+        dnch->noise_band_auto_var[i] = dnch->noise_band_var[i];
+        sample_noise[i] = (1.0 / C) * log(dnch->noise_band_var[i] / s->floor) - 100.0;
+    }
+    if (s->noise_band_count < 15) {
+        for (int i = s->noise_band_count; i < 15; i++)
+            sample_noise[i] = sample_noise[i - 1];
+    }
+}
+
+static void set_noise_profile(AudioFFTDeNoiseContext *s,
+                              DeNoiseChannel *dnch,
+                              double *sample_noise,
+                              int new_profile)
+{
+    int new_band_noise[15];
+    double temp[15];
+    double sum = 0.0, d1;
+    float new_noise_floor;
+    int i, n;
+
+    for (int m = 0; m < 15; m++)
+        temp[m] = sample_noise[m];
+
+    if (new_profile) {
+        i = 0;
+        for (int m = 0; m < 5; m++) {
+            sum = 0.0;
+            for (n = 0; n < 15; n++)
+                sum += s->matrix_b[i++] * temp[n];
+            s->vector_b[m] = sum;
+        }
+        solve(s->matrix_a, s->vector_b, 5);
+        i = 0;
+        for (int m = 0; m < 15; m++) {
+            sum = 0.0;
+            for (n = 0; n < 5; n++)
+                sum += s->matrix_c[i++] * s->vector_b[n];
+            temp[m] = sum;
+        }
+    }
+
+    sum = 0.0;
+    for (int m = 0; m < 15; m++)
+        sum += temp[m];
+
+    d1 = (int)(sum / 15.0 - 0.5);
+    if (!new_profile)
+        i = lrint(temp[7] - d1);
+
+    for (d1 -= dnch->band_noise[7] - i; d1 > -20.0; d1 -= 1.0)
+        ;
+
+    for (int m = 0; m < 15; m++)
+        temp[m] -= d1;
+
+    new_noise_floor = d1 + 2.5;
+
+    if (new_profile) {
+        av_log(s, AV_LOG_INFO, "bn=");
+        for (int m = 0; m < 15; m++) {
+            new_band_noise[m] = lrint(temp[m]);
+            new_band_noise[m] = av_clip(new_band_noise[m], -24, 24);
+            av_log(s, AV_LOG_INFO, "%d ", new_band_noise[m]);
+        }
+        av_log(s, AV_LOG_INFO, "\n");
+        memcpy(dnch->band_noise, new_band_noise, sizeof(new_band_noise));
+    }
+
+    if (s->track_noise)
+        s->noise_floor = new_noise_floor;
+}
+
+typedef struct ThreadData {
+    AVFrame *in;
+} ThreadData;
+
+static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    AudioFFTDeNoiseContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    const int start = (in->channels * jobnr) / nb_jobs;
+    const int end = (in->channels * (jobnr+1)) / nb_jobs;
+
+    for (int ch = start; ch < end; ch++) {
+        DeNoiseChannel *dnch = &s->dnch[ch];
+        const float *src = (const float *)in->extended_data[ch];
+        double *dst = dnch->out_samples;
+
+        if (s->track_noise) {
+            int i = s->block_count & 0x1FF;
+
+            if (dnch->sfm_fail_flags[i])
+                dnch->sfm_fail_total--;
+            dnch->sfm_fail_flags[i] = 0;
+            dnch->sfm_threshold *= 1.0 - dnch->sfm_alpha;
+            dnch->sfm_threshold += dnch->sfm_alpha * (0.5 + (1.0 / 640) * dnch->sfm_fail_total);
+        }
+
+        for (int m = 0; m < s->window_length; m++) {
+            dnch->fft_data[m].re = s->window[m] * src[m] * (1LL << 24);
+            dnch->fft_data[m].im = 0;
+        }
+
+        for (int m = s->window_length; m < s->fft_length2; m++) {
+            dnch->fft_data[m].re = 0;
+            dnch->fft_data[m].im = 0;
+        }
+
+        av_fft_permute(dnch->fft, dnch->fft_data);
+        av_fft_calc(dnch->fft, dnch->fft_data);
+
+        preprocess(dnch->fft_data, s->fft_length);
+        process_frame(s, dnch, dnch->fft_data,
+                      dnch->prior,
+                      dnch->prior_band_excit,
+                      s->track_noise);
+        postprocess(dnch->fft_data, s->fft_length);
+
+        av_fft_permute(dnch->ifft, dnch->fft_data);
+        av_fft_calc(dnch->ifft, dnch->fft_data);
+
+        for (int m = 0; m < s->window_length; m++)
+            dst[m] += s->window[m] * dnch->fft_data[m].re / (1LL << 24);
+    }
+
+    return 0;
+}
+
+static void get_auto_noise_levels(AudioFFTDeNoiseContext *s,
+                                  DeNoiseChannel *dnch,
+                                  double *levels)
+{
+    if (s->noise_band_count > 0) {
+        for (int i = 0; i < s->noise_band_count; i++) {
+            levels[i] = (1.0 / C) * log(dnch->noise_band_auto_var[i] / s->floor) - 100.0;
+        }
+        if (s->noise_band_count < 15) {
+            for (int i = s->noise_band_count; i < 15; i++)
+                levels[i] = levels[i - 1];
+        }
+    } else {
+        for (int i = 0; i < 15; i++) {
+            levels[i] = -100.0;
+        }
+    }
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AudioFFTDeNoiseContext *s = ctx->priv;
+    AVFrame *out = NULL, *in = NULL;
+    ThreadData td;
+    int ret = 0;
+
+    if (s->pts == AV_NOPTS_VALUE)
+        s->pts = frame->pts;
+
+    ret = av_audio_fifo_write(s->fifo, (void **)frame->extended_data, frame->nb_samples);
+    av_frame_free(&frame);
+    if (ret < 0)
+        return ret;
+
+    while (av_audio_fifo_size(s->fifo) >= s->window_length) {
+        if (!in) {
+            in = ff_get_audio_buffer(outlink, s->window_length);
+            if (!in)
+                return AVERROR(ENOMEM);
+        }
+
+        ret = av_audio_fifo_peek(s->fifo, (void **)in->extended_data, s->window_length);
+        if (ret < 0)
+            break;
+
+        if (s->track_noise) {
+            for (int ch = 0; ch < inlink->channels; ch++) {
+                DeNoiseChannel *dnch = &s->dnch[ch];
+                double levels[15];
+
+                get_auto_noise_levels(s, dnch, levels);
+                set_noise_profile(s, dnch, levels, 0);
+            }
+
+            if (s->noise_floor != s->last_noise_floor)
+                set_parameters(s);
+        }
+
+        if (s->sample_noise_start) {
+            for (int ch = 0; ch < inlink->channels; ch++) {
+                DeNoiseChannel *dnch = &s->dnch[ch];
+
+                init_sample_noise(dnch);
+            }
+            s->sample_noise_start = 0;
+            s->sample_noise = 1;
+        }
+
+        if (s->sample_noise) {
+            for (int ch = 0; ch < inlink->channels; ch++) {
+                DeNoiseChannel *dnch = &s->dnch[ch];
+
+                sample_noise_block(s, dnch, in, ch);
+            }
+        }
+
+        if (s->sample_noise_end) {
+            for (int ch = 0; ch < inlink->channels; ch++) {
+                DeNoiseChannel *dnch = &s->dnch[ch];
+                double sample_noise[15];
+
+                finish_sample_noise(s, dnch, sample_noise);
+                set_noise_profile(s, dnch, sample_noise, 1);
+                set_band_parameters(s, dnch);
+            }
+            s->sample_noise = 0;
+            s->sample_noise_end = 0;
+        }
+
+        s->block_count++;
+        td.in = in;
+        ctx->internal->execute(ctx, filter_channel, &td, NULL,
+                               FFMIN(outlink->channels, ff_filter_get_nb_threads(ctx)));
+
+        out = ff_get_audio_buffer(outlink, s->sample_advance);
+        if (!out) {
+            ret = AVERROR(ENOMEM);
+            break;
+        }
+
+        for (int ch = 0; ch < inlink->channels; ch++) {
+            DeNoiseChannel *dnch = &s->dnch[ch];
+            double *src = dnch->out_samples;
+            float *orig = (float *)in->extended_data[ch];
+            float *dst = (float *)out->extended_data[ch];
+
+            switch (s->output_mode) {
+            case IN_MODE:
+                for (int m = 0; m < s->sample_advance; m++)
+                    dst[m] = orig[m];
+                break;
+            case OUT_MODE:
+                for (int m = 0; m < s->sample_advance; m++)
+                    dst[m] = src[m];
+                break;
+            case NOISE_MODE:
+                for (int m = 0; m < s->sample_advance; m++)
+                    dst[m] = orig[m] - src[m];
+                break;
+            default:
+                return AVERROR_BUG;
+            }
+            memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src));
+            memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src));
+        }
+
+        av_audio_fifo_drain(s->fifo, s->sample_advance);
+
+        out->pts = s->pts;
+        ret = ff_filter_frame(outlink, out);
+        if (ret < 0)
+            break;
+        s->pts += s->sample_advance;
+    }
+    av_frame_free(&in);
+
+    return ret;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AudioFFTDeNoiseContext *s = ctx->priv;
+
+    av_freep(&s->window);
+    av_freep(&s->bin2band);
+    av_freep(&s->band_alpha);
+    av_freep(&s->band_beta);
+
+    if (s->dnch) {
+        for (int ch = 0; ch < s->channels; ch++) {
+            DeNoiseChannel *dnch = &s->dnch[ch];
+            av_freep(&dnch->amt);
+            av_freep(&dnch->band_amt);
+            av_freep(&dnch->band_excit);
+            av_freep(&dnch->gain);
+            av_freep(&dnch->prior);
+            av_freep(&dnch->prior_band_excit);
+            av_freep(&dnch->clean_data);
+            av_freep(&dnch->noisy_data);
+            av_freep(&dnch->out_samples);
+            av_freep(&dnch->spread_function);
+            av_freep(&dnch->abs_var);
+            av_freep(&dnch->rel_var);
+            av_freep(&dnch->min_abs_var);
+            av_freep(&dnch->fft_data);
+            av_fft_end(dnch->fft);
+            dnch->fft = NULL;
+            av_fft_end(dnch->ifft);
+            dnch->ifft = NULL;
+        }
+        av_freep(&s->dnch);
+    }
+
+    av_audio_fifo_free(s->fifo);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layouts = NULL;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_FLTP,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
+                           char *res, int res_len, int flags)
+{
+    AudioFFTDeNoiseContext *s = ctx->priv;
+    int need_reset = 0;
+
+    if (!strcmp(cmd, "sample_noise") ||
+        !strcmp(cmd, "sn")) {
+        if (!strcmp(args, "start")) {
+            s->sample_noise_start = 1;
+            s->sample_noise_end = 0;
+        } else if (!strcmp(args, "end")) {
+            s->sample_noise_start = 0;
+            s->sample_noise_end = 1;
+        }
+    } else if (!strcmp(cmd, "nr") ||
+               !strcmp(cmd, "noise_reduction")) {
+        float nr;
+
+        if (sscanf(args, "%f", &nr) == 1) {
+            s->noise_reduction = av_clipf(nr, 0.01, 97);
+            need_reset = 1;
+        }
+    } else if (!strcmp(cmd, "nf") ||
+               !strcmp(cmd, "noise_floor")) {
+        float nf;
+
+        if (sscanf(args, "%f", &nf) == 1) {
+            s->noise_floor = av_clipf(nf, -80, -20);
+            need_reset = 1;
+        }
+    } else if (!strcmp(cmd, "output_mode") ||
+               !strcmp(cmd, "om")) {
+        if (!strcmp(args, "i")) {
+            s->output_mode = IN_MODE;
+        } else if (!strcmp(args, "o")) {
+            s->output_mode = OUT_MODE;
+        } else if (!strcmp(args, "n")) {
+            s->output_mode = NOISE_MODE;
+        }
+    }
+
+    if (need_reset)
+        set_parameters(s);
+
+    return 0;
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_afftdn = {
+    .name            = "afftdn",
+    .description     = NULL_IF_CONFIG_SMALL("Denoise audio samples using FFT."),
+    .query_formats   = query_formats,
+    .priv_size       = sizeof(AudioFFTDeNoiseContext),
+    .priv_class      = &afftdn_class,
+    .uninit          = uninit,
+    .inputs          = inputs,
+    .outputs         = outputs,
+    .process_command = process_command,
+    .flags           = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC |
+                       AVFILTER_FLAG_SLICE_THREADS,
+};

diff --git a/libavfilter/af_afftfilt.c b/libavfilter/af_afftfilt.c
index 52755a1..7f28e1f 100644
--- a/libavfilter/af_afftfilt.c
+++ b/libavfilter/af_afftfilt.c

@@ -197,8 +197,10 @@
     int ch, n, ret, i, j, k;
     int start = s->start, end = s->end;
 
-    av_audio_fifo_write(s->fifo, (void **)frame->extended_data, frame->nb_samples);
+    ret = av_audio_fifo_write(s->fifo, (void **)frame->extended_data, frame->nb_samples);
     av_frame_free(&frame);
+    if (ret < 0)
+        return ret;
 
     while (av_audio_fifo_size(s->fifo) >= window_size) {
         if (!in) {
@@ -316,7 +318,7 @@
     }
 
     av_frame_free(&in);
-    return ret;
+    return ret < 0 ? ret : 0;
 }
 
 static int query_formats(AVFilterContext *ctx)

diff --git a/libavfilter/af_afir.c b/libavfilter/af_afir.c
index c4443fd..244da3a 100644
--- a/libavfilter/af_afir.c
+++ b/libavfilter/af_afir.c

@@ -23,14 +23,18 @@
  * An arbitrary audio FIR filter
  */
 
-#include "libavutil/audio_fifo.h"
+#include <float.h>
+
 #include "libavutil/common.h"
 #include "libavutil/float_dsp.h"
+#include "libavutil/intreadwrite.h"
 #include "libavutil/opt.h"
+#include "libavutil/xga_font_data.h"
 #include "libavcodec/avfft.h"
 
 #include "audio.h"
 #include "avfilter.h"
+#include "filters.h"
 #include "formats.h"
 #include "internal.h"
 #include "af_afir.h"
@@ -105,20 +109,20 @@
 
     if (out) {
         float *ptr = (float *)out->extended_data[ch];
-        s->fdsp->vector_fmul_scalar(ptr, dst, s->gain * s->wet_gain, FFALIGN(out->nb_samples, 4));
+        s->fdsp->vector_fmul_scalar(ptr, dst, s->wet_gain, FFALIGN(out->nb_samples, 4));
         emms_c();
     }
 
     return 0;
 }
 
-static int fir_frame(AudioFIRContext *s, AVFilterLink *outlink)
+static int fir_frame(AudioFIRContext *s, AVFrame *in, AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
     AVFrame *out = NULL;
     int ret;
 
-    s->nb_samples = FFMIN(s->part_size, av_audio_fifo_size(s->fifo[0]));
+    s->nb_samples = in->nb_samples;
 
     if (!s->want_skip) {
         out = ff_get_audio_buffer(outlink, s->nb_samples);
@@ -126,20 +130,13 @@
             return AVERROR(ENOMEM);
     }
 
-    s->in[0] = ff_get_audio_buffer(ctx->inputs[0], s->nb_samples);
-    if (!s->in[0]) {
-        av_frame_free(&out);
-        return AVERROR(ENOMEM);
-    }
-
-    av_audio_fifo_peek(s->fifo[0], (void **)s->in[0]->extended_data, s->nb_samples);
-
+    if (s->pts == AV_NOPTS_VALUE)
+        s->pts = in->pts;
+    s->in[0] = in;
     ctx->internal->execute(ctx, fir_channel, out, NULL, outlink->channels);
 
     s->part_index = (s->part_index + 1) % s->nb_partitions;
 
-    av_audio_fifo_drain(s->fifo[0], s->nb_samples);
-
     if (!s->want_skip) {
         out->pts = s->pts;
         if (s->pts != AV_NOPTS_VALUE)
@@ -150,7 +147,7 @@
     if (s->index == 3)
         s->index = 0;
 
-    av_frame_free(&s->in[0]);
+    av_frame_free(&in);
 
     if (s->want_skip == 1) {
         s->want_skip = 0;
@@ -162,13 +159,129 @@
     return ret;
 }
 
+static void drawtext(AVFrame *pic, int x, int y, const char *txt, uint32_t color)
+{
+    const uint8_t *font;
+    int font_height;
+    int i;
+
+    font = avpriv_cga_font, font_height = 8;
+
+    for (i = 0; txt[i]; i++) {
+        int char_y, mask;
+
+        uint8_t *p = pic->data[0] + y * pic->linesize[0] + (x + i * 8) * 4;
+        for (char_y = 0; char_y < font_height; char_y++) {
+            for (mask = 0x80; mask; mask >>= 1) {
+                if (font[txt[i] * font_height + char_y] & mask)
+                    AV_WL32(p, color);
+                p += 4;
+            }
+            p += pic->linesize[0] - 8 * 4;
+        }
+    }
+}
+
+static void draw_line(AVFrame *out, int x0, int y0, int x1, int y1, uint32_t color)
+{
+    int dx = FFABS(x1-x0);
+    int dy = FFABS(y1-y0), sy = y0 < y1 ? 1 : -1;
+    int err = (dx>dy ? dx : -dy) / 2, e2;
+
+    for (;;) {
+        AV_WL32(out->data[0] + y0 * out->linesize[0] + x0 * 4, color);
+
+        if (x0 == x1 && y0 == y1)
+            break;
+
+        e2 = err;
+
+        if (e2 >-dx) {
+            err -= dy;
+            x0--;
+        }
+
+        if (e2 < dy) {
+            err += dx;
+            y0 += sy;
+        }
+    }
+}
+
+static void draw_response(AVFilterContext *ctx, AVFrame *out)
+{
+    AudioFIRContext *s = ctx->priv;
+    float *mag, *phase, min = FLT_MAX, max = FLT_MIN;
+    int prev_ymag = -1, prev_yphase = -1;
+    char text[32];
+    int channel, i, x;
+
+    memset(out->data[0], 0, s->h * out->linesize[0]);
+
+    phase = av_malloc_array(s->w, sizeof(*phase));
+    mag = av_malloc_array(s->w, sizeof(*mag));
+    if (!mag || !phase)
+        goto end;
+
+    channel = av_clip(s->ir_channel, 0, s->in[1]->channels - 1);
+    for (i = 0; i < s->w; i++) {
+        const float *src = (const float *)s->in[1]->extended_data[channel];
+        double w = i * M_PI / (s->w - 1);
+        double real = 0.;
+        double imag = 0.;
+
+        for (x = 0; x < s->nb_taps; x++) {
+            real += cos(-x * w) * src[x];
+            imag += sin(-x * w) * src[x];
+        }
+
+        mag[i] = hypot(real, imag);
+        phase[i] = atan2(imag, real);
+        min = fminf(min, mag[i]);
+        max = fmaxf(max, mag[i]);
+    }
+
+    for (i = 0; i < s->w; i++) {
+        int ymag = mag[i] / max * (s->h - 1);
+        int yphase = (0.5 * (1. + phase[i] / M_PI)) * (s->h - 1);
+
+        ymag = s->h - 1 - av_clip(ymag, 0, s->h - 1);
+        yphase = s->h - 1 - av_clip(yphase, 0, s->h - 1);
+
+        if (prev_ymag < 0)
+            prev_ymag = ymag;
+        if (prev_yphase < 0)
+            prev_yphase = yphase;
+
+        draw_line(out, i,   ymag, FFMAX(i - 1, 0),   prev_ymag, 0xFFFF00FF);
+        draw_line(out, i, yphase, FFMAX(i - 1, 0), prev_yphase, 0xFF00FF00);
+
+        prev_ymag   = ymag;
+        prev_yphase = yphase;
+    }
+
+    if (s->w > 400 && s->h > 100) {
+        drawtext(out, 2, 2, "Max Magnitude:", 0xDDDDDDDD);
+        snprintf(text, sizeof(text), "%.2f", max);
+        drawtext(out, 15 * 8 + 2, 2, text, 0xDDDDDDDD);
+
+        drawtext(out, 2, 12, "Min Magnitude:", 0xDDDDDDDD);
+        snprintf(text, sizeof(text), "%.2f", min);
+        drawtext(out, 15 * 8 + 2, 12, text, 0xDDDDDDDD);
+    }
+
+end:
+    av_free(phase);
+    av_free(mag);
+}
+
 static int convert_coeffs(AVFilterContext *ctx)
 {
     AudioFIRContext *s = ctx->priv;
-    int i, ch, n, N;
+    int ret, i, ch, n, N;
     float power = 0;
 
-    s->nb_taps = av_audio_fifo_size(s->fifo[1]);
+    s->nb_taps = ff_inlink_queued_samples(ctx->inputs[1]);
     if (s->nb_taps <= 0)
         return AVERROR(EINVAL);
 
@@ -207,23 +320,69 @@
             return AVERROR(ENOMEM);
     }
 
-    s->in[1] = ff_get_audio_buffer(ctx->inputs[1], s->nb_taps);
-    if (!s->in[1])
-        return AVERROR(ENOMEM);
-
     s->buffer = ff_get_audio_buffer(ctx->inputs[0], s->part_size * 3);
     if (!s->buffer)
         return AVERROR(ENOMEM);
 
-    av_audio_fifo_read(s->fifo[1], (void **)s->in[1]->extended_data, s->nb_taps);
+    ret = ff_inlink_consume_samples(ctx->inputs[1], s->nb_taps, s->nb_taps, &s->in[1]);
+    if (ret < 0)
+        return ret;
+    if (ret == 0)
+        return AVERROR_BUG;
+
+    if (s->response)
+        draw_response(ctx, s->video);
+
+    s->gain = 1;
+
+    switch (s->gtype) {
+    case -1:
+        /* nothinkg to do */
+        break;
+    case 0:
+        for (ch = 0; ch < ctx->inputs[1]->channels; ch++) {
+            float *time = (float *)s->in[1]->extended_data[!s->one2many * ch];
+
+            for (i = 0; i < s->nb_taps; i++)
+                power += FFABS(time[i]);
+        }
+        s->gain = ctx->inputs[1]->channels / power;
+        break;
+    case 1:
+        for (ch = 0; ch < ctx->inputs[1]->channels; ch++) {
+            float *time = (float *)s->in[1]->extended_data[!s->one2many * ch];
+
+            for (i = 0; i < s->nb_taps; i++)
+                power += time[i];
+        }
+        s->gain = ctx->inputs[1]->channels / power;
+        break;
+    case 2:
+        for (ch = 0; ch < ctx->inputs[1]->channels; ch++) {
+            float *time = (float *)s->in[1]->extended_data[!s->one2many * ch];
+
+            for (i = 0; i < s->nb_taps; i++)
+                power += time[i] * time[i];
+        }
+        s->gain = sqrtf(ch / power);
+        break;
+    default:
+        return AVERROR_BUG;
+    }
+
+    s->gain = FFMIN(s->gain * s->ir_gain, 1.f);
+    av_log(ctx, AV_LOG_DEBUG, "power %f, gain %f\n", power, s->gain);
+    for (ch = 0; ch < ctx->inputs[1]->channels; ch++) {
+        float *time = (float *)s->in[1]->extended_data[!s->one2many * ch];
+
+        s->fdsp->vector_fmul_scalar(time, time, s->gain, FFALIGN(s->nb_taps, 4));
+    }
 
     for (ch = 0; ch < ctx->inputs[1]->channels; ch++) {
         float *time = (float *)s->in[1]->extended_data[!s->one2many * ch];
         float *block = s->block[ch];
         FFTComplex *coeff = s->coeff[ch];
 
-        power += s->fdsp->scalarproduct_float(time, time, s->nb_taps);
-
         for (i = FFMAX(1, s->length * s->nb_taps); i < s->nb_taps; i++)
             time[i] = 0;
 
@@ -252,7 +411,6 @@
     }
 
     av_frame_free(&s->in[1]);
-    s->gain = s->again ? 1.f / sqrtf(power / ctx->inputs[1]->channels) : 1.f;
     av_log(ctx, AV_LOG_DEBUG, "nb_taps: %d\n", s->nb_taps);
     av_log(ctx, AV_LOG_DEBUG, "nb_partitions: %d\n", s->nb_partitions);
     av_log(ctx, AV_LOG_DEBUG, "partition size: %d\n", s->part_size);
@@ -263,18 +421,14 @@
     return 0;
 }
 
-static int read_ir(AVFilterLink *link, AVFrame *frame)
+static int check_ir(AVFilterLink *link, AVFrame *frame)
 {
     AVFilterContext *ctx = link->dst;
     AudioFIRContext *s = ctx->priv;
     int nb_taps, max_nb_taps;
 
-    av_audio_fifo_write(s->fifo[1], (void **)frame->extended_data,
-                        frame->nb_samples);
-    av_frame_free(&frame);
-
-    nb_taps = av_audio_fifo_size(s->fifo[1]);
-    max_nb_taps = MAX_IR_DURATION * ctx->outputs[0]->sample_rate;
+    nb_taps = ff_inlink_queued_samples(link);
+    max_nb_taps = s->max_ir_len * ctx->outputs[0]->sample_rate;
     if (nb_taps > max_nb_taps) {
         av_log(ctx, AV_LOG_ERROR, "Too big number of coefficients: %d > %d.\n", nb_taps, max_nb_taps);
         return AVERROR(EINVAL);
@@ -283,19 +437,33 @@
     return 0;
 }
 
-static int filter_frame(AVFilterLink *link, AVFrame *frame)
+static int activate(AVFilterContext *ctx)
 {
-    AVFilterContext *ctx = link->dst;
     AudioFIRContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
-    int ret = 0;
+    AVFrame *in = NULL;
+    int ret, status;
+    int64_t pts;
 
-    av_audio_fifo_write(s->fifo[0], (void **)frame->extended_data,
-                        frame->nb_samples);
-    if (s->pts == AV_NOPTS_VALUE)
-        s->pts = frame->pts;
+    FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx);
+    if (s->response)
+        FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[1], ctx);
+    if (!s->eof_coeffs) {
+        AVFrame *ir = NULL;
 
-    av_frame_free(&frame);
+        ret = check_ir(ctx->inputs[1], ir);
+        if (ret < 0)
+            return ret;
+
+        if (ff_outlink_get_status(ctx->inputs[1]) == AVERROR_EOF)
+            s->eof_coeffs = 1;
+
+        if (!s->eof_coeffs) {
+            if (ff_outlink_frame_wanted(ctx->outputs[0]))
+                ff_inlink_request_frame(ctx->inputs[1]);
+            return 0;
+        }
+    }
 
     if (!s->have_coeffs && s->eof_coeffs) {
         ret = convert_coeffs(ctx);
@@ -303,70 +471,98 @@
             return ret;
     }
 
-    if (s->have_coeffs) {
-        while (av_audio_fifo_size(s->fifo[0]) >= s->part_size) {
-            ret = fir_frame(s, outlink);
-            if (ret < 0)
-                break;
-        }
+    if (s->need_padding) {
+        in = ff_get_audio_buffer(outlink, s->part_size);
+        if (!in)
+            return AVERROR(ENOMEM);
+        s->need_padding = 0;
+        ret = 1;
+    } else {
+        ret = ff_inlink_consume_samples(ctx->inputs[0], s->part_size, s->part_size, &in);
     }
-    return ret;
-}
 
-static int request_frame(AVFilterLink *outlink)
-{
-    AVFilterContext *ctx = outlink->src;
-    AudioFIRContext *s = ctx->priv;
-    int ret;
+    if (ret > 0) {
+        ret = fir_frame(s, in, outlink);
+        if (ret < 0)
+            return ret;
+    }
 
-    if (!s->eof_coeffs) {
-        ret = ff_request_frame(ctx->inputs[1]);
-        if (ret == AVERROR_EOF) {
-            s->eof_coeffs = 1;
-            ret = 0;
-        }
+    if (ret < 0)
         return ret;
-    }
-    ret = ff_request_frame(ctx->inputs[0]);
-    if (ret == AVERROR_EOF && s->have_coeffs) {
-        if (s->need_padding) {
-            AVFrame *silence = ff_get_audio_buffer(outlink, s->part_size);
 
-            if (!silence)
-                return AVERROR(ENOMEM);
-            av_audio_fifo_write(s->fifo[0], (void **)silence->extended_data,
-                        silence->nb_samples);
-            av_frame_free(&silence);
-            s->need_padding = 0;
-        }
-
-        while (av_audio_fifo_size(s->fifo[0]) > 0) {
-            ret = fir_frame(s, outlink);
+    if (s->response && s->have_coeffs) {
+        if (ff_outlink_frame_wanted(ctx->outputs[1])) {
+            s->video->pts = s->pts;
+            ret = ff_filter_frame(ctx->outputs[1], av_frame_clone(s->video));
             if (ret < 0)
                 return ret;
         }
-        ret = AVERROR_EOF;
     }
-    return ret;
+
+    if (ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts)) {
+        if (status == AVERROR_EOF) {
+            ff_outlink_set_status(ctx->outputs[0], status, pts);
+            if (s->response)
+                ff_outlink_set_status(ctx->outputs[1], status, pts);
+            return 0;
+        }
+    }
+
+    if (ff_outlink_frame_wanted(ctx->outputs[0])) {
+        ff_inlink_request_frame(ctx->inputs[0]);
+        return 0;
+    }
+
+    if (s->response && ff_outlink_frame_wanted(ctx->outputs[1])) {
+        ff_inlink_request_frame(ctx->inputs[0]);
+        return 0;
+    }
+
+    return 0;
 }
 
 static int query_formats(AVFilterContext *ctx)
 {
+    AudioFIRContext *s = ctx->priv;
     AVFilterFormats *formats;
     AVFilterChannelLayouts *layouts;
     static const enum AVSampleFormat sample_fmts[] = {
         AV_SAMPLE_FMT_FLTP,
         AV_SAMPLE_FMT_NONE
     };
-    int ret, i;
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_RGB0,
+        AV_PIX_FMT_NONE
+    };
+    int ret;
+
+    if (s->response) {
+        AVFilterLink *videolink = ctx->outputs[1];
+        formats = ff_make_format_list(pix_fmts);
+        if ((ret = ff_formats_ref(formats, &videolink->in_formats)) < 0)
+            return ret;
+    }
 
     layouts = ff_all_channel_counts();
-    if ((ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts)) < 0)
-        return ret;
+    if (!layouts)
+        return AVERROR(ENOMEM);
 
-    for (i = 0; i < 2; i++) {
-        layouts = ff_all_channel_counts();
-        if ((ret = ff_channel_layouts_ref(layouts, &ctx->inputs[i]->out_channel_layouts)) < 0)
+    if (s->ir_format) {
+        ret = ff_set_common_channel_layouts(ctx, layouts);
+        if (ret < 0)
+            return ret;
+    } else {
+        AVFilterChannelLayouts *mono = NULL;
+
+        ret = ff_add_channel_layout(&mono, AV_CH_LAYOUT_MONO);
+        if (ret)
+            return ret;
+
+        if ((ret = ff_channel_layouts_ref(layouts, &ctx->inputs[0]->out_channel_layouts)) < 0)
+            return ret;
+        if ((ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts)) < 0)
+            return ret;
+        if ((ret = ff_channel_layouts_ref(mono, &ctx->inputs[1]->out_channel_layouts)) < 0)
             return ret;
     }
 
@@ -383,25 +579,12 @@
     AVFilterContext *ctx = outlink->src;
     AudioFIRContext *s = ctx->priv;
 
-    if (ctx->inputs[0]->channels != ctx->inputs[1]->channels &&
-        ctx->inputs[1]->channels != 1) {
-        av_log(ctx, AV_LOG_ERROR,
-               "Second input must have same number of channels as first input or "
-               "exactly 1 channel.\n");
-        return AVERROR(EINVAL);
-    }
-
     s->one2many = ctx->inputs[1]->channels == 1;
     outlink->sample_rate = ctx->inputs[0]->sample_rate;
     outlink->time_base   = ctx->inputs[0]->time_base;
     outlink->channel_layout = ctx->inputs[0]->channel_layout;
     outlink->channels = ctx->inputs[0]->channels;
 
-    s->fifo[0] = av_audio_fifo_alloc(ctx->inputs[0]->format, ctx->inputs[0]->channels, 1024);
-    s->fifo[1] = av_audio_fifo_alloc(ctx->inputs[1]->format, ctx->inputs[1]->channels, 1024);
-    if (!s->fifo[0] || !s->fifo[1])
-        return AVERROR(ENOMEM);
-
     s->sum = av_calloc(outlink->channels, sizeof(*s->sum));
     s->coeff = av_calloc(ctx->inputs[1]->channels, sizeof(*s->coeff));
     s->block = av_calloc(ctx->inputs[0]->channels, sizeof(*s->block));
@@ -459,19 +642,71 @@
     }
     av_freep(&s->irdft);
 
-    av_frame_free(&s->in[0]);
     av_frame_free(&s->in[1]);
     av_frame_free(&s->buffer);
 
-    av_audio_fifo_free(s->fifo[0]);
-    av_audio_fifo_free(s->fifo[1]);
-
     av_freep(&s->fdsp);
+
+    for (int i = 0; i < ctx->nb_outputs; i++)
+        av_freep(&ctx->output_pads[i].name);
+    av_frame_free(&s->video);
+}
+
+static int config_video(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AudioFIRContext *s = ctx->priv;
+
+    outlink->sample_aspect_ratio = (AVRational){1,1};
+    outlink->w = s->w;
+    outlink->h = s->h;
+
+    av_frame_free(&s->video);
+    s->video = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!s->video)
+        return AVERROR(ENOMEM);
+
+    return 0;
 }
 
 static av_cold int init(AVFilterContext *ctx)
 {
     AudioFIRContext *s = ctx->priv;
+    AVFilterPad pad, vpad;
+    int ret;
+
+    pad = (AVFilterPad){
+        .name          = av_strdup("default"),
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .config_props  = config_output,
+    };
+
+    if (!pad.name)
+        return AVERROR(ENOMEM);
+
+    if (s->response) {
+        vpad = (AVFilterPad){
+            .name         = av_strdup("filter_response"),
+            .type         = AVMEDIA_TYPE_VIDEO,
+            .config_props = config_video,
+        };
+        if (!vpad.name)
+            return AVERROR(ENOMEM);
+    }
+
+    ret = ff_insert_outpad(ctx, 0, &pad);
+    if (ret < 0) {
+        av_freep(&pad.name);
+        return ret;
+    }
+
+    if (s->response) {
+        ret = ff_insert_outpad(ctx, 1, &vpad);
+        if (ret < 0) {
+            av_freep(&vpad.name);
+            return ret;
+        }
+    }
 
     s->fcmul_add = fcmul_add_c;
 
@@ -489,33 +724,34 @@
     {
         .name           = "main",
         .type           = AVMEDIA_TYPE_AUDIO,
-        .filter_frame   = filter_frame,
     },{
         .name           = "ir",
         .type           = AVMEDIA_TYPE_AUDIO,
-        .filter_frame   = read_ir,
-    },
-    { NULL }
-};
-
-static const AVFilterPad afir_outputs[] = {
-    {
-        .name          = "default",
-        .type          = AVMEDIA_TYPE_AUDIO,
-        .config_props  = config_output,
-        .request_frame = request_frame,
     },
     { NULL }
 };
 
 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+#define VF AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 #define OFFSET(x) offsetof(AudioFIRContext, x)
 
 static const AVOption afir_options[] = {
-    { "dry",    "set dry gain",     OFFSET(dry_gain), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 1, AF },
-    { "wet",    "set wet gain",     OFFSET(wet_gain), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 1, AF },
-    { "length", "set IR length",    OFFSET(length),   AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 1, AF },
-    { "again",  "enable auto gain", OFFSET(again),    AV_OPT_TYPE_BOOL,  {.i64=1}, 0, 1, AF },
+    { "dry",    "set dry gain",      OFFSET(dry_gain),   AV_OPT_TYPE_FLOAT, {.dbl=1},    0, 10, AF },
+    { "wet",    "set wet gain",      OFFSET(wet_gain),   AV_OPT_TYPE_FLOAT, {.dbl=1},    0, 10, AF },
+    { "length", "set IR length",     OFFSET(length),     AV_OPT_TYPE_FLOAT, {.dbl=1},    0,  1, AF },
+    { "gtype",  "set IR auto gain type",OFFSET(gtype),   AV_OPT_TYPE_INT,   {.i64=0},   -1,  2, AF, "gtype" },
+    {  "none",  "without auto gain", 0,                  AV_OPT_TYPE_CONST, {.i64=-1},   0,  0, AF, "gtype" },
+    {  "peak",  "peak gain",         0,                  AV_OPT_TYPE_CONST, {.i64=0},    0,  0, AF, "gtype" },
+    {  "dc",    "DC gain",           0,                  AV_OPT_TYPE_CONST, {.i64=1},    0,  0, AF, "gtype" },
+    {  "gn",    "gain to noise",     0,                  AV_OPT_TYPE_CONST, {.i64=2},    0,  0, AF, "gtype" },
+    { "irgain", "set IR gain",       OFFSET(ir_gain),    AV_OPT_TYPE_FLOAT, {.dbl=1},    0,  1, AF },
+    { "irfmt",  "set IR format",     OFFSET(ir_format),  AV_OPT_TYPE_INT,   {.i64=1},    0,  1, AF, "irfmt" },
+    {  "mono",  "single channel",    0,                  AV_OPT_TYPE_CONST, {.i64=0},    0,  0, AF, "irfmt" },
+    {  "input", "same as input",     0,                  AV_OPT_TYPE_CONST, {.i64=1},    0,  0, AF, "irfmt" },
+    { "maxir",  "set max IR length", OFFSET(max_ir_len), AV_OPT_TYPE_FLOAT, {.dbl=30}, 0.1, 60, AF },
+    { "response", "show IR frequency response", OFFSET(response), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, VF },
+    { "channel", "set IR channel to display frequency response", OFFSET(ir_channel), AV_OPT_TYPE_INT, {.i64=0}, 0, 1024, VF },
+    { "size",   "set video size",    OFFSET(w),          AV_OPT_TYPE_IMAGE_SIZE, {.str = "hd720"}, 0, 0, VF },
     { NULL }
 };
 
@@ -528,8 +764,9 @@
     .priv_class    = &afir_class,
     .query_formats = query_formats,
     .init          = init,
+    .activate      = activate,
     .uninit        = uninit,
     .inputs        = afir_inputs,
-    .outputs       = afir_outputs,
-    .flags         = AVFILTER_FLAG_SLICE_THREADS,
+    .flags         = AVFILTER_FLAG_DYNAMIC_OUTPUTS |
+                     AVFILTER_FLAG_SLICE_THREADS,
 };

diff --git a/libavfilter/af_afir.h b/libavfilter/af_afir.h
index aaeb665..7d4f32e 100644
--- a/libavfilter/af_afir.h
+++ b/libavfilter/af_afir.h

@@ -32,15 +32,19 @@
 #include "formats.h"
 #include "internal.h"
 
-#define MAX_IR_DURATION 30
-
 typedef struct AudioFIRContext {
     const AVClass *class;
 
     float wet_gain;
     float dry_gain;
     float length;
-    int again;
+    int gtype;
+    float ir_gain;
+    int ir_format;
+    float max_ir_len;
+    int response;
+    int w, h;
+    int ir_channel;
 
     float gain;
 
@@ -67,9 +71,10 @@
     float **block;
     FFTComplex **coeff;
 
-    AVAudioFifo *fifo[2];
+    AVAudioFifo *fifo;
     AVFrame *in[2];
     AVFrame *buffer;
+    AVFrame *video;
     int64_t pts;
     int index;
 

diff --git a/libavfilter/af_agate.c b/libavfilter/af_agate.c
index 20905cc..ba96863 100644
--- a/libavfilter/af_agate.c
+++ b/libavfilter/af_agate.c

@@ -214,7 +214,7 @@
     if (av_frame_is_writable(in)) {
         out = in;
     } else {
-        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        out = ff_get_audio_buffer(outlink, in->nb_samples);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);
@@ -323,14 +323,13 @@
     }
     FF_FILTER_FORWARD_STATUS(ctx->inputs[0], ctx->outputs[0]);
     FF_FILTER_FORWARD_STATUS(ctx->inputs[1], ctx->outputs[0]);
-    /* TODO reindent */
-        if (ff_outlink_frame_wanted(ctx->outputs[0])) {
-            if (!av_audio_fifo_size(s->fifo[0]))
-                ff_inlink_request_frame(ctx->inputs[0]);
-            if (!av_audio_fifo_size(s->fifo[1]))
-                ff_inlink_request_frame(ctx->inputs[1]);
-        }
-        return 0;
+    if (ff_outlink_frame_wanted(ctx->outputs[0])) {
+        if (!av_audio_fifo_size(s->fifo[0]))
+            ff_inlink_request_frame(ctx->inputs[0]);
+        if (!av_audio_fifo_size(s->fifo[1]))
+            ff_inlink_request_frame(ctx->inputs[1]);
+    }
+    return 0;
 }
 
 static int scquery_formats(AVFilterContext *ctx)

diff --git a/libavfilter/af_aiir.c b/libavfilter/af_aiir.c
new file mode 100644
index 0000000..845d542
--- /dev/null
+++ b/libavfilter/af_aiir.c

@@ -0,0 +1,1089 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <float.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "libavutil/xga_font_data.h"
+#include "audio.h"
+#include "avfilter.h"
+#include "internal.h"
+
+typedef struct ThreadData {
+    AVFrame *in, *out;
+} ThreadData;
+
+typedef struct Pair {
+    int a, b;
+} Pair;
+
+typedef struct BiquadContext {
+    double a0, a1, a2;
+    double b0, b1, b2;
+    double i1, i2;
+    double o1, o2;
+} BiquadContext;
+
+typedef struct IIRChannel {
+    int nb_ab[2];
+    double *ab[2];
+    double g;
+    double *cache[2];
+    BiquadContext *biquads;
+    int clippings;
+} IIRChannel;
+
+typedef struct AudioIIRContext {
+    const AVClass *class;
+    char *a_str, *b_str, *g_str;
+    double dry_gain, wet_gain;
+    int format;
+    int process;
+    int precision;
+    int response;
+    int w, h;
+    int ir_channel;
+
+    AVFrame *video;
+
+    IIRChannel *iir;
+    int channels;
+    enum AVSampleFormat sample_format;
+
+    int (*iir_channel)(AVFilterContext *ctx, void *arg, int ch, int nb_jobs);
+} AudioIIRContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AudioIIRContext *s = ctx->priv;
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBLP,
+        AV_SAMPLE_FMT_NONE
+    };
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_RGB0,
+        AV_PIX_FMT_NONE
+    };
+    int ret;
+
+    if (s->response) {
+        AVFilterLink *videolink = ctx->outputs[1];
+
+        formats = ff_make_format_list(pix_fmts);
+        if ((ret = ff_formats_ref(formats, &videolink->in_formats)) < 0)
+            return ret;
+    }
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    sample_fmts[0] = s->sample_format;
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+#define IIR_CH(name, type, min, max, need_clipping)                     \
+static int iir_ch_## name(AVFilterContext *ctx, void *arg, int ch, int nb_jobs)  \
+{                                                                       \
+    AudioIIRContext *s = ctx->priv;                                     \
+    const double ig = s->dry_gain;                                      \
+    const double og = s->wet_gain;                                      \
+    ThreadData *td = arg;                                               \
+    AVFrame *in = td->in, *out = td->out;                               \
+    const type *src = (const type *)in->extended_data[ch];              \
+    double *ic = (double *)s->iir[ch].cache[0];                         \
+    double *oc = (double *)s->iir[ch].cache[1];                         \
+    const int nb_a = s->iir[ch].nb_ab[0];                               \
+    const int nb_b = s->iir[ch].nb_ab[1];                               \
+    const double *a = s->iir[ch].ab[0];                                 \
+    const double *b = s->iir[ch].ab[1];                                 \
+    int *clippings = &s->iir[ch].clippings;                             \
+    type *dst = (type *)out->extended_data[ch];                         \
+    int n;                                                              \
+                                                                        \
+    for (n = 0; n < in->nb_samples; n++) {                              \
+        double sample = 0.;                                             \
+        int x;                                                          \
+                                                                        \
+        memmove(&ic[1], &ic[0], (nb_b - 1) * sizeof(*ic));              \
+        memmove(&oc[1], &oc[0], (nb_a - 1) * sizeof(*oc));              \
+        ic[0] = src[n] * ig;                                            \
+        for (x = 0; x < nb_b; x++)                                      \
+            sample += b[x] * ic[x];                                     \
+                                                                        \
+        for (x = 1; x < nb_a; x++)                                      \
+            sample -= a[x] * oc[x];                                     \
+                                                                        \
+        oc[0] = sample;                                                 \
+        sample *= og;                                                   \
+        if (need_clipping && sample < min) {                            \
+            (*clippings)++;                                             \
+            dst[n] = min;                                               \
+        } else if (need_clipping && sample > max) {                     \
+            (*clippings)++;                                             \
+            dst[n] = max;                                               \
+        } else {                                                        \
+            dst[n] = sample;                                            \
+        }                                                               \
+    }                                                                   \
+                                                                        \
+    return 0;                                                           \
+}
+
+IIR_CH(s16p, int16_t, INT16_MIN, INT16_MAX, 1)
+IIR_CH(s32p, int32_t, INT32_MIN, INT32_MAX, 1)
+IIR_CH(fltp, float,         -1.,        1., 0)
+IIR_CH(dblp, double,        -1.,        1., 0)
+
+#define SERIAL_IIR_CH(name, type, min, max, need_clipping)                  \
+static int iir_ch_serial_## name(AVFilterContext *ctx, void *arg, int ch, int nb_jobs)  \
+{                                                                       \
+    AudioIIRContext *s = ctx->priv;                                     \
+    const double ig = s->dry_gain;                                      \
+    const double og = s->wet_gain;                                      \
+    ThreadData *td = arg;                                               \
+    AVFrame *in = td->in, *out = td->out;                               \
+    const type *src = (const type *)in->extended_data[ch];              \
+    type *dst = (type *)out->extended_data[ch];                         \
+    IIRChannel *iir = &s->iir[ch];                                      \
+    int *clippings = &iir->clippings;                                   \
+    int nb_biquads = (FFMAX(iir->nb_ab[0], iir->nb_ab[1]) + 1) / 2;     \
+    int n, i;                                                           \
+                                                                        \
+    for (i = 0; i < nb_biquads; i++) {                                  \
+        const double a1 = -iir->biquads[i].a1;                          \
+        const double a2 = -iir->biquads[i].a2;                          \
+        const double b0 = iir->biquads[i].b0;                           \
+        const double b1 = iir->biquads[i].b1;                           \
+        const double b2 = iir->biquads[i].b2;                           \
+        double i1 = iir->biquads[i].i1;                                 \
+        double i2 = iir->biquads[i].i2;                                 \
+        double o1 = iir->biquads[i].o1;                                 \
+        double o2 = iir->biquads[i].o2;                                 \
+                                                                        \
+        for (n = 0; n < in->nb_samples; n++) {                          \
+            double sample = ig * (i ? dst[n] : src[n]);                 \
+            double o0 = sample * b0 + i1 * b1 + i2 * b2 + o1 * a1 + o2 * a2; \
+                                                                        \
+            i2 = i1;                                                    \
+            i1 = src[n];                                                \
+            o2 = o1;                                                    \
+            o1 = o0;                                                    \
+            o0 *= og;                                                   \
+                                                                        \
+            if (need_clipping && o0 < min) {                            \
+                (*clippings)++;                                         \
+                dst[n] = min;                                           \
+            } else if (need_clipping && o0 > max) {                     \
+                (*clippings)++;                                         \
+                dst[n] = max;                                           \
+            } else {                                                    \
+                dst[n] = o0;                                            \
+            }                                                           \
+        }                                                               \
+        iir->biquads[i].i1 = i1;                                        \
+        iir->biquads[i].i2 = i2;                                        \
+        iir->biquads[i].o1 = o1;                                        \
+        iir->biquads[i].o2 = o2;                                        \
+    }                                                                   \
+                                                                        \
+    return 0;                                                           \
+}
+
+SERIAL_IIR_CH(s16p, int16_t, INT16_MIN, INT16_MAX, 1)
+SERIAL_IIR_CH(s32p, int32_t, INT32_MIN, INT32_MAX, 1)
+SERIAL_IIR_CH(fltp, float,         -1.,        1., 0)
+SERIAL_IIR_CH(dblp, double,        -1.,        1., 0)
+
+static void count_coefficients(char *item_str, int *nb_items)
+{
+    char *p;
+
+    if (!item_str)
+        return;
+
+    *nb_items = 1;
+    for (p = item_str; *p && *p != '|'; p++) {
+        if (*p == ' ')
+            (*nb_items)++;
+    }
+}
+
+static int read_gains(AVFilterContext *ctx, char *item_str, int nb_items)
+{
+    AudioIIRContext *s = ctx->priv;
+    char *p, *arg, *old_str, *prev_arg = NULL, *saveptr = NULL;
+    int i;
+
+    p = old_str = av_strdup(item_str);
+    if (!p)
+        return AVERROR(ENOMEM);
+    for (i = 0; i < nb_items; i++) {
+        if (!(arg = av_strtok(p, "|", &saveptr)))
+            arg = prev_arg;
+
+        if (!arg) {
+            av_freep(&old_str);
+            return AVERROR(EINVAL);
+        }
+
+        p = NULL;
+        if (sscanf(arg, "%lf", &s->iir[i].g) != 1) {
+            av_log(ctx, AV_LOG_ERROR, "Invalid gains supplied: %s\n", arg);
+            av_freep(&old_str);
+            return AVERROR(EINVAL);
+        }
+
+        prev_arg = arg;
+    }
+
+    av_freep(&old_str);
+
+    return 0;
+}
+
+static int read_tf_coefficients(AVFilterContext *ctx, char *item_str, int nb_items, double *dst)
+{
+    char *p, *arg, *old_str, *saveptr = NULL;
+    int i;
+
+    p = old_str = av_strdup(item_str);
+    if (!p)
+        return AVERROR(ENOMEM);
+    for (i = 0; i < nb_items; i++) {
+        if (!(arg = av_strtok(p, " ", &saveptr)))
+            break;
+
+        p = NULL;
+        if (sscanf(arg, "%lf", &dst[i]) != 1) {
+            av_log(ctx, AV_LOG_ERROR, "Invalid coefficients supplied: %s\n", arg);
+            av_freep(&old_str);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    av_freep(&old_str);
+
+    return 0;
+}
+
+static int read_zp_coefficients(AVFilterContext *ctx, char *item_str, int nb_items, double *dst, const char *format)
+{
+    char *p, *arg, *old_str, *saveptr = NULL;
+    int i;
+
+    p = old_str = av_strdup(item_str);
+    if (!p)
+        return AVERROR(ENOMEM);
+    for (i = 0; i < nb_items; i++) {
+        if (!(arg = av_strtok(p, " ", &saveptr)))
+            break;
+
+        p = NULL;
+        if (sscanf(arg, format, &dst[i*2], &dst[i*2+1]) != 2) {
+            av_log(ctx, AV_LOG_ERROR, "Invalid coefficients supplied: %s\n", arg);
+            av_freep(&old_str);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    av_freep(&old_str);
+
+    return 0;
+}
+
+static const char *format[] = { "%lf", "%lf %lfi", "%lf %lfr", "%lf %lfd" };
+
+static int read_channels(AVFilterContext *ctx, int channels, uint8_t *item_str, int ab)
+{
+    AudioIIRContext *s = ctx->priv;
+    char *p, *arg, *old_str, *prev_arg = NULL, *saveptr = NULL;
+    int i, ret;
+
+    p = old_str = av_strdup(item_str);
+    if (!p)
+        return AVERROR(ENOMEM);
+    for (i = 0; i < channels; i++) {
+        IIRChannel *iir = &s->iir[i];
+
+        if (!(arg = av_strtok(p, "|", &saveptr)))
+            arg = prev_arg;
+
+        if (!arg) {
+            av_freep(&old_str);
+            return AVERROR(EINVAL);
+        }
+
+        count_coefficients(arg, &iir->nb_ab[ab]);
+
+        p = NULL;
+        iir->cache[ab] = av_calloc(iir->nb_ab[ab] + 1, sizeof(double));
+        iir->ab[ab] = av_calloc(iir->nb_ab[ab] * (!!s->format + 1), sizeof(double));
+        if (!iir->ab[ab] || !iir->cache[ab]) {
+            av_freep(&old_str);
+            return AVERROR(ENOMEM);
+        }
+
+        if (s->format) {
+            ret = read_zp_coefficients(ctx, arg, iir->nb_ab[ab], iir->ab[ab], format[s->format]);
+        } else {
+            ret = read_tf_coefficients(ctx, arg, iir->nb_ab[ab], iir->ab[ab]);
+        }
+        if (ret < 0) {
+            av_freep(&old_str);
+            return ret;
+        }
+        prev_arg = arg;
+    }
+
+    av_freep(&old_str);
+
+    return 0;
+}
+
+static void multiply(double wre, double wim, int npz, double *coeffs)
+{
+    double nwre = -wre, nwim = -wim;
+    double cre, cim;
+    int i;
+
+    for (i = npz; i >= 1; i--) {
+        cre = coeffs[2 * i + 0];
+        cim = coeffs[2 * i + 1];
+
+        coeffs[2 * i + 0] = (nwre * cre - nwim * cim) + coeffs[2 * (i - 1) + 0];
+        coeffs[2 * i + 1] = (nwre * cim + nwim * cre) + coeffs[2 * (i - 1) + 1];
+    }
+
+    cre = coeffs[0];
+    cim = coeffs[1];
+    coeffs[0] = nwre * cre - nwim * cim;
+    coeffs[1] = nwre * cim + nwim * cre;
+}
+
+static int expand(AVFilterContext *ctx, double *pz, int nb, double *coeffs)
+{
+    int i;
+
+    coeffs[0] = 1.0;
+    coeffs[1] = 0.0;
+
+    for (i = 0; i < nb; i++) {
+        coeffs[2 * (i + 1)    ] = 0.0;
+        coeffs[2 * (i + 1) + 1] = 0.0;
+    }
+
+    for (i = 0; i < nb; i++)
+        multiply(pz[2 * i], pz[2 * i + 1], nb, coeffs);
+
+    for (i = 0; i < nb + 1; i++) {
+        if (fabs(coeffs[2 * i + 1]) > FLT_EPSILON) {
+            av_log(ctx, AV_LOG_ERROR, "coeff: %f of z^%d is not real; poles/zeros are not complex conjugates.\n",
+                   coeffs[2 * i + 1], i);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    return 0;
+}
+
+static int convert_zp2tf(AVFilterContext *ctx, int channels)
+{
+    AudioIIRContext *s = ctx->priv;
+    int ch, i, j, ret = 0;
+
+    for (ch = 0; ch < channels; ch++) {
+        IIRChannel *iir = &s->iir[ch];
+        double *topc, *botc;
+
+        topc = av_calloc((iir->nb_ab[0] + 1) * 2, sizeof(*topc));
+        botc = av_calloc((iir->nb_ab[1] + 1) * 2, sizeof(*botc));
+        if (!topc || !botc) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        ret = expand(ctx, iir->ab[0], iir->nb_ab[0], botc);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        ret = expand(ctx, iir->ab[1], iir->nb_ab[1], topc);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        for (j = 0, i = iir->nb_ab[1]; i >= 0; j++, i--) {
+            iir->ab[1][j] = topc[2 * i];
+        }
+        iir->nb_ab[1]++;
+
+        for (j = 0, i = iir->nb_ab[0]; i >= 0; j++, i--) {
+            iir->ab[0][j] = botc[2 * i];
+        }
+        iir->nb_ab[0]++;
+
+fail:
+        av_free(topc);
+        av_free(botc);
+        if (ret < 0)
+            break;
+    }
+
+    return ret;
+}
+
+static int decompose_zp2biquads(AVFilterContext *ctx, int channels)
+{
+    AudioIIRContext *s = ctx->priv;
+    int ch, ret;
+
+    for (ch = 0; ch < channels; ch++) {
+        IIRChannel *iir = &s->iir[ch];
+        int nb_biquads = (FFMAX(iir->nb_ab[0], iir->nb_ab[1]) + 1) / 2;
+        int current_biquad = 0;
+
+        iir->biquads = av_calloc(nb_biquads, sizeof(BiquadContext));
+        if (!iir->biquads)
+            return AVERROR(ENOMEM);
+
+        while (nb_biquads--) {
+            Pair outmost_pole = { -1, -1 };
+            Pair nearest_zero = { -1, -1 };
+            double zeros[4] = { 0 };
+            double poles[4] = { 0 };
+            double b[6] = { 0 };
+            double a[6] = { 0 };
+            double min_distance = DBL_MAX;
+            double max_mag = 0;
+            int i;
+
+            for (i = 0; i < iir->nb_ab[0]; i++) {
+                double mag;
+
+                if (isnan(iir->ab[0][2 * i]) || isnan(iir->ab[0][2 * i + 1]))
+                    continue;
+                mag = hypot(iir->ab[0][2 * i], iir->ab[0][2 * i + 1]);
+
+                if (mag > max_mag) {
+                    max_mag = mag;
+                    outmost_pole.a = i;
+                }
+            }
+
+            for (i = 0; i < iir->nb_ab[1]; i++) {
+                if (isnan(iir->ab[0][2 * i]) || isnan(iir->ab[0][2 * i + 1]))
+                    continue;
+
+                if (iir->ab[0][2 * i    ] ==  iir->ab[0][2 * outmost_pole.a    ] &&
+                    iir->ab[0][2 * i + 1] == -iir->ab[0][2 * outmost_pole.a + 1]) {
+                    outmost_pole.b = i;
+                    break;
+                }
+            }
+
+            av_log(ctx, AV_LOG_VERBOSE, "outmost_pole is %d.%d\n", outmost_pole.a, outmost_pole.b);
+
+            if (outmost_pole.a < 0 || outmost_pole.b < 0)
+                return AVERROR(EINVAL);
+
+            for (i = 0; i < iir->nb_ab[1]; i++) {
+                double distance;
+
+                if (isnan(iir->ab[1][2 * i]) || isnan(iir->ab[1][2 * i + 1]))
+                    continue;
+                distance = hypot(iir->ab[0][2 * outmost_pole.a    ] - iir->ab[1][2 * i    ],
+                                 iir->ab[0][2 * outmost_pole.a + 1] - iir->ab[1][2 * i + 1]);
+
+                if (distance < min_distance) {
+                    min_distance = distance;
+                    nearest_zero.a = i;
+                }
+            }
+
+            for (i = 0; i < iir->nb_ab[1]; i++) {
+                if (isnan(iir->ab[1][2 * i]) || isnan(iir->ab[1][2 * i + 1]))
+                    continue;
+
+                if (iir->ab[1][2 * i    ] ==  iir->ab[1][2 * nearest_zero.a    ] &&
+                    iir->ab[1][2 * i + 1] == -iir->ab[1][2 * nearest_zero.a + 1]) {
+                    nearest_zero.b = i;
+                    break;
+                }
+            }
+
+            av_log(ctx, AV_LOG_VERBOSE, "nearest_zero is %d.%d\n", nearest_zero.a, nearest_zero.b);
+
+            if (nearest_zero.a < 0 || nearest_zero.b < 0)
+                return AVERROR(EINVAL);
+
+            poles[0] = iir->ab[0][2 * outmost_pole.a    ];
+            poles[1] = iir->ab[0][2 * outmost_pole.a + 1];
+
+            zeros[0] = iir->ab[1][2 * nearest_zero.a    ];
+            zeros[1] = iir->ab[1][2 * nearest_zero.a + 1];
+
+            if (nearest_zero.a == nearest_zero.b && outmost_pole.a == outmost_pole.b) {
+                zeros[2] = 0;
+                zeros[3] = 0;
+
+                poles[2] = 0;
+                poles[3] = 0;
+            } else {
+                poles[2] = iir->ab[0][2 * outmost_pole.b    ];
+                poles[3] = iir->ab[0][2 * outmost_pole.b + 1];
+
+                zeros[2] = iir->ab[1][2 * nearest_zero.b    ];
+                zeros[3] = iir->ab[1][2 * nearest_zero.b + 1];
+            }
+
+            ret = expand(ctx, zeros, 2, b);
+            if (ret < 0)
+                return ret;
+
+            ret = expand(ctx, poles, 2, a);
+            if (ret < 0)
+                return ret;
+
+            iir->ab[0][2 * outmost_pole.a] = iir->ab[0][2 * outmost_pole.a + 1] = NAN;
+            iir->ab[0][2 * outmost_pole.b] = iir->ab[0][2 * outmost_pole.b + 1] = NAN;
+            iir->ab[1][2 * nearest_zero.a] = iir->ab[1][2 * nearest_zero.a + 1] = NAN;
+            iir->ab[1][2 * nearest_zero.b] = iir->ab[1][2 * nearest_zero.b + 1] = NAN;
+
+            iir->biquads[current_biquad].a0 = 1.0;
+            iir->biquads[current_biquad].a1 = a[2] / a[4];
+            iir->biquads[current_biquad].a2 = a[0] / a[4];
+            iir->biquads[current_biquad].b0 = b[4] / a[4] * (current_biquad ? 1.0 : iir->g);
+            iir->biquads[current_biquad].b1 = b[2] / a[4] * (current_biquad ? 1.0 : iir->g);
+            iir->biquads[current_biquad].b2 = b[0] / a[4] * (current_biquad ? 1.0 : iir->g);
+
+            av_log(ctx, AV_LOG_VERBOSE, "a=%f %f %f:b=%f %f %f\n",
+                   iir->biquads[current_biquad].a0,
+                   iir->biquads[current_biquad].a1,
+                   iir->biquads[current_biquad].a2,
+                   iir->biquads[current_biquad].b0,
+                   iir->biquads[current_biquad].b1,
+                   iir->biquads[current_biquad].b2);
+
+            current_biquad++;
+        }
+    }
+
+    return 0;
+}
+
+static void convert_pr2zp(AVFilterContext *ctx, int channels)
+{
+    AudioIIRContext *s = ctx->priv;
+    int ch;
+
+    for (ch = 0; ch < channels; ch++) {
+        IIRChannel *iir = &s->iir[ch];
+        int n;
+
+        for (n = 0; n < iir->nb_ab[0]; n++) {
+            double r = iir->ab[0][2*n];
+            double angle = iir->ab[0][2*n+1];
+
+            iir->ab[0][2*n]   = r * cos(angle);
+            iir->ab[0][2*n+1] = r * sin(angle);
+        }
+
+        for (n = 0; n < iir->nb_ab[1]; n++) {
+            double r = iir->ab[1][2*n];
+            double angle = iir->ab[1][2*n+1];
+
+            iir->ab[1][2*n]   = r * cos(angle);
+            iir->ab[1][2*n+1] = r * sin(angle);
+        }
+    }
+}
+
+static void convert_pd2zp(AVFilterContext *ctx, int channels)
+{
+    AudioIIRContext *s = ctx->priv;
+    int ch;
+
+    for (ch = 0; ch < channels; ch++) {
+        IIRChannel *iir = &s->iir[ch];
+        int n;
+
+        for (n = 0; n < iir->nb_ab[0]; n++) {
+            double r = iir->ab[0][2*n];
+            double angle = M_PI*iir->ab[0][2*n+1]/180.;
+
+            iir->ab[0][2*n]   = r * cos(angle);
+            iir->ab[0][2*n+1] = r * sin(angle);
+        }
+
+        for (n = 0; n < iir->nb_ab[1]; n++) {
+            double r = iir->ab[1][2*n];
+            double angle = M_PI*iir->ab[1][2*n+1]/180.;
+
+            iir->ab[1][2*n]   = r * cos(angle);
+            iir->ab[1][2*n+1] = r * sin(angle);
+        }
+    }
+}
+
+static void drawtext(AVFrame *pic, int x, int y, const char *txt, uint32_t color)
+{
+    const uint8_t *font;
+    int font_height;
+    int i;
+
+    font = avpriv_cga_font, font_height = 8;
+
+    for (i = 0; txt[i]; i++) {
+        int char_y, mask;
+
+        uint8_t *p = pic->data[0] + y * pic->linesize[0] + (x + i * 8) * 4;
+        for (char_y = 0; char_y < font_height; char_y++) {
+            for (mask = 0x80; mask; mask >>= 1) {
+                if (font[txt[i] * font_height + char_y] & mask)
+                    AV_WL32(p, color);
+                p += 4;
+            }
+            p += pic->linesize[0] - 8 * 4;
+        }
+    }
+}
+
+static void draw_line(AVFrame *out, int x0, int y0, int x1, int y1, uint32_t color)
+{
+    int dx = FFABS(x1-x0);
+    int dy = FFABS(y1-y0), sy = y0 < y1 ? 1 : -1;
+    int err = (dx>dy ? dx : -dy) / 2, e2;
+
+    for (;;) {
+        AV_WL32(out->data[0] + y0 * out->linesize[0] + x0 * 4, color);
+
+        if (x0 == x1 && y0 == y1)
+            break;
+
+        e2 = err;
+
+        if (e2 >-dx) {
+            err -= dy;
+            x0--;
+        }
+
+        if (e2 < dy) {
+            err += dx;
+            y0 += sy;
+        }
+    }
+}
+
+static void draw_response(AVFilterContext *ctx, AVFrame *out)
+{
+    AudioIIRContext *s = ctx->priv;
+    float *mag, *phase, min = FLT_MAX, max = FLT_MIN;
+    int prev_ymag = -1, prev_yphase = -1;
+    char text[32];
+    int ch, i, x;
+
+    memset(out->data[0], 0, s->h * out->linesize[0]);
+
+    phase = av_malloc_array(s->w, sizeof(*phase));
+    mag = av_malloc_array(s->w, sizeof(*mag));
+    if (!mag || !phase)
+        goto end;
+
+    ch = av_clip(s->ir_channel, 0, s->channels - 1);
+    for (i = 0; i < s->w; i++) {
+        const double *b = s->iir[ch].ab[0];
+        const double *a = s->iir[ch].ab[1];
+        double w = i * M_PI / (s->w - 1);
+        double realz, realp;
+        double imagz, imagp;
+        double real, imag, div;
+
+        if (s->format == 0) {
+            realz = 0., realp = 0.;
+            imagz = 0., imagp = 0.;
+            for (x = 0; x < s->iir[ch].nb_ab[1]; x++) {
+                realz += cos(-x * w) * a[x];
+                imagz += sin(-x * w) * a[x];
+            }
+
+            for (x = 0; x < s->iir[ch].nb_ab[0]; x++) {
+                realp += cos(-x * w) * b[x];
+                imagp += sin(-x * w) * b[x];
+            }
+
+            div = realp * realp + imagp * imagp;
+            real = (realz * realp + imagz * imagp) / div;
+            imag = (imagz * realp - imagp * realz) / div;
+        } else {
+            real = 1;
+            imag = 0;
+            for (x = 0; x < s->iir[ch].nb_ab[1]; x++) {
+                double ore, oim, re, im;
+
+                re = cos(w) - a[2 * x];
+                im = sin(w) - a[2 * x + 1];
+
+                ore = real;
+                oim = imag;
+
+                real = ore * re - oim * im;
+                imag = ore * im + oim * re;
+            }
+
+            for (x = 0; x < s->iir[ch].nb_ab[0]; x++) {
+                double ore, oim, re, im;
+
+                re = cos(w) - b[2 * x];
+                im = sin(w) - b[2 * x + 1];
+
+                ore = real;
+                oim = imag;
+                div = re * re + im * im;
+
+                real = (ore * re + oim * im) / div;
+                imag = (oim * re - ore * im) / div;
+            }
+        }
+
+        mag[i] = s->iir[ch].g * hypot(real, imag);
+        phase[i] = atan2(imag, real);
+        min = fminf(min, mag[i]);
+        max = fmaxf(max, mag[i]);
+    }
+
+    for (i = 0; i < s->w; i++) {
+        int ymag = mag[i] / max * (s->h - 1);
+        int yphase = (0.5 * (1. + phase[i] / M_PI)) * (s->h - 1);
+
+        ymag = s->h - 1 - av_clip(ymag, 0, s->h - 1);
+        yphase = s->h - 1 - av_clip(yphase, 0, s->h - 1);
+
+        if (prev_ymag < 0)
+            prev_ymag = ymag;
+        if (prev_yphase < 0)
+            prev_yphase = yphase;
+
+        draw_line(out, i,   ymag, FFMAX(i - 1, 0),   prev_ymag, 0xFFFF00FF);
+        draw_line(out, i, yphase, FFMAX(i - 1, 0), prev_yphase, 0xFF00FF00);
+
+        prev_ymag   = ymag;
+        prev_yphase = yphase;
+    }
+
+    if (s->w > 400 && s->h > 100) {
+        drawtext(out, 2, 2, "Max Magnitude:", 0xDDDDDDDD);
+        snprintf(text, sizeof(text), "%.2f", max);
+        drawtext(out, 15 * 8 + 2, 2, text, 0xDDDDDDDD);
+
+        drawtext(out, 2, 12, "Min Magnitude:", 0xDDDDDDDD);
+        snprintf(text, sizeof(text), "%.2f", min);
+        drawtext(out, 15 * 8 + 2, 12, text, 0xDDDDDDDD);
+    }
+
+end:
+    av_free(phase);
+    av_free(mag);
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AudioIIRContext *s = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+    int ch, ret, i;
+
+    s->channels = inlink->channels;
+    s->iir = av_calloc(s->channels, sizeof(*s->iir));
+    if (!s->iir)
+        return AVERROR(ENOMEM);
+
+    ret = read_gains(ctx, s->g_str, inlink->channels);
+    if (ret < 0)
+        return ret;
+
+    ret = read_channels(ctx, inlink->channels, s->a_str, 0);
+    if (ret < 0)
+        return ret;
+
+    ret = read_channels(ctx, inlink->channels, s->b_str, 1);
+    if (ret < 0)
+        return ret;
+
+    if (s->format == 2) {
+        convert_pr2zp(ctx, inlink->channels);
+    } else if (s->format == 3) {
+        convert_pd2zp(ctx, inlink->channels);
+    }
+
+    av_frame_free(&s->video);
+    if (s->response) {
+        s->video = ff_get_video_buffer(ctx->outputs[1], s->w, s->h);
+        if (!s->video)
+            return AVERROR(ENOMEM);
+
+        draw_response(ctx, s->video);
+    }
+
+    if (s->format == 0)
+        av_log(ctx, AV_LOG_WARNING, "tf coefficients format is not recommended for too high number of zeros/poles.\n");
+
+    if (s->format > 0 && s->process == 0) {
+        av_log(ctx, AV_LOG_WARNING, "Direct processsing is not recommended for zp coefficients format.\n");
+
+        ret = convert_zp2tf(ctx, inlink->channels);
+        if (ret < 0)
+            return ret;
+    } else if (s->format == 0 && s->process == 1) {
+        av_log(ctx, AV_LOG_ERROR, "Serial cascading is not implemented for transfer function.\n");
+        return AVERROR_PATCHWELCOME;
+    } else if (s->format > 0 && s->process == 1) {
+        if (inlink->format == AV_SAMPLE_FMT_S16P)
+            av_log(ctx, AV_LOG_WARNING, "Serial cascading is not recommended for i16 precision.\n");
+
+        ret = decompose_zp2biquads(ctx, inlink->channels);
+        if (ret < 0)
+            return ret;
+    }
+
+    for (ch = 0; s->format == 0 && ch < inlink->channels; ch++) {
+        IIRChannel *iir = &s->iir[ch];
+
+        for (i = 1; i < iir->nb_ab[0]; i++) {
+            iir->ab[0][i] /= iir->ab[0][0];
+        }
+
+        for (i = 0; i < iir->nb_ab[1]; i++) {
+            iir->ab[1][i] *= iir->g / iir->ab[0][0];
+        }
+    }
+
+    switch (inlink->format) {
+    case AV_SAMPLE_FMT_DBLP: s->iir_channel = s->process == 1 ? iir_ch_serial_dblp : iir_ch_dblp; break;
+    case AV_SAMPLE_FMT_FLTP: s->iir_channel = s->process == 1 ? iir_ch_serial_fltp : iir_ch_fltp; break;
+    case AV_SAMPLE_FMT_S32P: s->iir_channel = s->process == 1 ? iir_ch_serial_s32p : iir_ch_s32p; break;
+    case AV_SAMPLE_FMT_S16P: s->iir_channel = s->process == 1 ? iir_ch_serial_s16p : iir_ch_s16p; break;
+    }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AudioIIRContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    ThreadData td;
+    AVFrame *out;
+    int ch, ret;
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_audio_buffer(outlink, in->nb_samples);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+
+    td.in  = in;
+    td.out = out;
+    ctx->internal->execute(ctx, s->iir_channel, &td, NULL, outlink->channels);
+
+    for (ch = 0; ch < outlink->channels; ch++) {
+        if (s->iir[ch].clippings > 0)
+            av_log(ctx, AV_LOG_WARNING, "Channel %d clipping %d times. Please reduce gain.\n",
+                   ch, s->iir[ch].clippings);
+        s->iir[ch].clippings = 0;
+    }
+
+    if (in != out)
+        av_frame_free(&in);
+
+    if (s->response) {
+        AVFilterLink *outlink = ctx->outputs[1];
+
+        s->video->pts = out->pts;
+        ret = ff_filter_frame(outlink, av_frame_clone(s->video));
+        if (ret < 0)
+            return ret;
+    }
+
+    return ff_filter_frame(outlink, out);
+}
+
+static int config_video(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AudioIIRContext *s = ctx->priv;
+
+    outlink->sample_aspect_ratio = (AVRational){1,1};
+    outlink->w = s->w;
+    outlink->h = s->h;
+
+    return 0;
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    AudioIIRContext *s = ctx->priv;
+    AVFilterPad pad, vpad;
+    int ret;
+
+    if (!s->a_str || !s->b_str || !s->g_str) {
+        av_log(ctx, AV_LOG_ERROR, "Valid coefficients are mandatory.\n");
+        return AVERROR(EINVAL);
+    }
+
+    switch (s->precision) {
+    case 0: s->sample_format = AV_SAMPLE_FMT_DBLP; break;
+    case 1: s->sample_format = AV_SAMPLE_FMT_FLTP; break;
+    case 2: s->sample_format = AV_SAMPLE_FMT_S32P; break;
+    case 3: s->sample_format = AV_SAMPLE_FMT_S16P; break;
+    default: return AVERROR_BUG;
+    }
+
+    pad = (AVFilterPad){
+        .name         = av_strdup("default"),
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_output,
+    };
+
+    if (!pad.name)
+        return AVERROR(ENOMEM);
+
+    if (s->response) {
+        vpad = (AVFilterPad){
+            .name         = av_strdup("filter_response"),
+            .type         = AVMEDIA_TYPE_VIDEO,
+            .config_props = config_video,
+        };
+        if (!vpad.name)
+            return AVERROR(ENOMEM);
+    }
+
+    ret = ff_insert_outpad(ctx, 0, &pad);
+    if (ret < 0)
+        return ret;
+
+    if (s->response) {
+        ret = ff_insert_outpad(ctx, 1, &vpad);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AudioIIRContext *s = ctx->priv;
+    int ch;
+
+    if (s->iir) {
+        for (ch = 0; ch < s->channels; ch++) {
+            IIRChannel *iir = &s->iir[ch];
+            av_freep(&iir->ab[0]);
+            av_freep(&iir->ab[1]);
+            av_freep(&iir->cache[0]);
+            av_freep(&iir->cache[1]);
+            av_freep(&iir->biquads);
+        }
+    }
+    av_freep(&s->iir);
+
+    av_freep(&ctx->output_pads[0].name);
+    if (s->response)
+        av_freep(&ctx->output_pads[1].name);
+    av_frame_free(&s->video);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+#define OFFSET(x) offsetof(AudioIIRContext, x)
+#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+#define VF AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption aiir_options[] = {
+    { "z", "set B/numerator/zeros coefficients",   OFFSET(b_str),    AV_OPT_TYPE_STRING, {.str="1+0i 1-0i"}, 0, 0, AF },
+    { "p", "set A/denominator/poles coefficients", OFFSET(a_str),    AV_OPT_TYPE_STRING, {.str="1+0i 1-0i"}, 0, 0, AF },
+    { "k", "set channels gains",                   OFFSET(g_str),    AV_OPT_TYPE_STRING, {.str="1|1"}, 0, 0, AF },
+    { "dry", "set dry gain",                       OFFSET(dry_gain), AV_OPT_TYPE_DOUBLE, {.dbl=1},     0, 1, AF },
+    { "wet", "set wet gain",                       OFFSET(wet_gain), AV_OPT_TYPE_DOUBLE, {.dbl=1},     0, 1, AF },
+    { "f", "set coefficients format",              OFFSET(format),   AV_OPT_TYPE_INT,    {.i64=1},     0, 3, AF, "format" },
+    { "tf", "transfer function",                   0,                AV_OPT_TYPE_CONST,  {.i64=0},     0, 0, AF, "format" },
+    { "zp", "Z-plane zeros/poles",                 0,                AV_OPT_TYPE_CONST,  {.i64=1},     0, 0, AF, "format" },
+    { "pr", "Z-plane zeros/poles (polar radians)", 0,                AV_OPT_TYPE_CONST,  {.i64=2},     0, 0, AF, "format" },
+    { "pd", "Z-plane zeros/poles (polar degrees)", 0,                AV_OPT_TYPE_CONST,  {.i64=3},     0, 0, AF, "format" },
+    { "r", "set kind of processing",               OFFSET(process),  AV_OPT_TYPE_INT,    {.i64=1},     0, 1, AF, "process" },
+    { "d", "direct",                               0,                AV_OPT_TYPE_CONST,  {.i64=0},     0, 0, AF, "process" },
+    { "s", "serial cascading",                     0,                AV_OPT_TYPE_CONST,  {.i64=1},     0, 0, AF, "process" },
+    { "e", "set precision",                        OFFSET(precision),AV_OPT_TYPE_INT,    {.i64=0},     0, 3, AF, "precision" },
+    { "dbl", "double-precision floating-point",    0,                AV_OPT_TYPE_CONST,  {.i64=0},     0, 0, AF, "precision" },
+    { "flt", "single-precision floating-point",    0,                AV_OPT_TYPE_CONST,  {.i64=1},     0, 0, AF, "precision" },
+    { "i32", "32-bit integers",                    0,                AV_OPT_TYPE_CONST,  {.i64=2},     0, 0, AF, "precision" },
+    { "i16", "16-bit integers",                    0,                AV_OPT_TYPE_CONST,  {.i64=3},     0, 0, AF, "precision" },
+    { "response", "show IR frequency response",    OFFSET(response), AV_OPT_TYPE_BOOL,   {.i64=0},     0, 1, VF },
+    { "channel", "set IR channel to display frequency response", OFFSET(ir_channel), AV_OPT_TYPE_INT, {.i64=0}, 0, 1024, VF },
+    { "size",   "set video size",                  OFFSET(w),        AV_OPT_TYPE_IMAGE_SIZE, {.str = "hd720"}, 0, 0, VF },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(aiir);
+
+AVFilter ff_af_aiir = {
+    .name          = "aiir",
+    .description   = NULL_IF_CONFIG_SMALL("Apply Infinite Impulse Response filter with supplied coefficients."),
+    .priv_size     = sizeof(AudioIIRContext),
+    .priv_class    = &aiir_class,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = inputs,
+    .flags         = AVFILTER_FLAG_DYNAMIC_OUTPUTS |
+                     AVFILTER_FLAG_SLICE_THREADS,
+};

diff --git a/libavfilter/af_alimiter.c b/libavfilter/af_alimiter.c
index 46211a7..c41e955 100644
--- a/libavfilter/af_alimiter.c
+++ b/libavfilter/af_alimiter.c

@@ -135,7 +135,7 @@
     if (av_frame_is_writable(in)) {
         out = in;
     } else {
-        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        out = ff_get_audio_buffer(outlink, in->nb_samples);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);
@@ -327,6 +327,11 @@
     s->buffer_size = inlink->sample_rate * s->attack * inlink->channels;
     s->buffer_size -= s->buffer_size % inlink->channels;
 
+    if (s->buffer_size <= 0) {
+        av_log(ctx, AV_LOG_ERROR, "Attack is too small.\n");
+        return AVERROR(EINVAL);
+    }
+
     return 0;
 }
 

diff --git a/libavfilter/af_amerge.c b/libavfilter/af_amerge.c
index 09c660e..3961c90 100644
--- a/libavfilter/af_amerge.c
+++ b/libavfilter/af_amerge.c

@@ -31,8 +31,8 @@
 #include "libavutil/channel_layout.h"
 #include "libavutil/opt.h"
 #include "avfilter.h"
+#include "filters.h"
 #include "audio.h"
-#include "bufferqueue.h"
 #include "internal.h"
 
 #define SWR_CH_MAX 64
@@ -43,10 +43,7 @@
     int route[SWR_CH_MAX]; /**< channels routing, see copy_samples */
     int bps;
     struct amerge_input {
-        struct FFBufQueue queue;
         int nb_ch;         /**< number of channels for the input */
-        int nb_samples;
-        int pos;
     } *in;
 } AMergeContext;
 
@@ -67,8 +64,6 @@
     int i;
 
     for (i = 0; i < s->nb_inputs; i++) {
-        if (s->in)
-            ff_bufqueue_discard_all(&s->in[i].queue);
         if (ctx->input_pads)
             av_freep(&ctx->input_pads[i].name);
     }
@@ -171,7 +166,7 @@
     outlink->sample_rate = ctx->inputs[0]->sample_rate;
     outlink->time_base   = ctx->inputs[0]->time_base;
 
-    av_bprint_init(&bp, 0, 1);
+    av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
     for (i = 0; i < s->nb_inputs; i++) {
         av_bprintf(&bp, "%sin%d:", i ? " + " : "", i);
         av_bprint_channel_layout(&bp, -1, ctx->inputs[i]->channel_layout);
@@ -183,21 +178,6 @@
     return 0;
 }
 
-static int request_frame(AVFilterLink *outlink)
-{
-    AVFilterContext *ctx = outlink->src;
-    AMergeContext *s = ctx->priv;
-    int i, ret;
-
-    for (i = 0; i < s->nb_inputs; i++)
-        if (!s->in[i].nb_samples ||
-            /* detect EOF immediately */
-            (ctx->inputs[i]->status_in && !ctx->inputs[i]->status_out))
-            if ((ret = ff_request_frame(ctx->inputs[i])) < 0)
-                return ret;
-    return 0;
-}
-
 /**
  * Copy samples from several input streams to one output stream.
  * @param nb_inputs number of inputs
@@ -235,88 +215,101 @@
     }
 }
 
-static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
+static void free_frames(int nb_inputs, AVFrame **input_frames)
 {
-    AVFilterContext *ctx = inlink->dst;
-    AMergeContext *s = ctx->priv;
-    AVFilterLink *const outlink = ctx->outputs[0];
-    int input_number;
-    int nb_samples, ns, i;
-    AVFrame *outbuf, *inbuf[SWR_CH_MAX];
-    uint8_t *ins[SWR_CH_MAX], *outs;
+    int i;
+    for (i = 0; i < nb_inputs; i++)
+        av_frame_free(&input_frames[i]);
+}
 
-    for (input_number = 0; input_number < s->nb_inputs; input_number++)
-        if (inlink == ctx->inputs[input_number])
-            break;
-    av_assert1(input_number < s->nb_inputs);
-    if (ff_bufqueue_is_full(&s->in[input_number].queue)) {
-        av_frame_free(&insamples);
-        return AVERROR(ENOMEM);
+static int try_push_frame(AVFilterContext *ctx, int nb_samples)
+{
+    AMergeContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    int i, ret;
+    AVFrame *outbuf, *inbuf[SWR_CH_MAX] = { NULL };
+    uint8_t *outs, *ins[SWR_CH_MAX];
+
+    for (i = 0; i < ctx->nb_inputs; i++) {
+        ret = ff_inlink_consume_samples(ctx->inputs[i], nb_samples, nb_samples, &inbuf[i]);
+        if (ret < 0) {
+            free_frames(i, inbuf);
+            return ret;
+        }
+        ins[i] = inbuf[i]->data[0];
     }
-    ff_bufqueue_add(ctx, &s->in[input_number].queue, av_frame_clone(insamples));
-    s->in[input_number].nb_samples += insamples->nb_samples;
-    av_frame_free(&insamples);
-    nb_samples = s->in[0].nb_samples;
-    for (i = 1; i < s->nb_inputs; i++)
-        nb_samples = FFMIN(nb_samples, s->in[i].nb_samples);
-    if (!nb_samples)
-        return 0;
 
     outbuf = ff_get_audio_buffer(ctx->outputs[0], nb_samples);
-    if (!outbuf)
+    if (!outbuf) {
+        free_frames(s->nb_inputs, inbuf);
         return AVERROR(ENOMEM);
-    outs = outbuf->data[0];
-    for (i = 0; i < s->nb_inputs; i++) {
-        inbuf[i] = ff_bufqueue_peek(&s->in[i].queue, 0);
-        ins[i] = inbuf[i]->data[0] +
-                 s->in[i].pos * s->in[i].nb_ch * s->bps;
     }
-    av_frame_copy_props(outbuf, inbuf[0]);
-    outbuf->pts = inbuf[0]->pts == AV_NOPTS_VALUE ? AV_NOPTS_VALUE :
-                  inbuf[0]->pts +
-                  av_rescale_q(s->in[0].pos,
-                               av_make_q(1, ctx->inputs[0]->sample_rate),
-                               ctx->outputs[0]->time_base);
+
+    outs = outbuf->data[0];
+    outbuf->pts = inbuf[0]->pts;
 
     outbuf->nb_samples     = nb_samples;
     outbuf->channel_layout = outlink->channel_layout;
     outbuf->channels       = outlink->channels;
 
     while (nb_samples) {
-        ns = nb_samples;
-        for (i = 0; i < s->nb_inputs; i++)
-            ns = FFMIN(ns, inbuf[i]->nb_samples - s->in[i].pos);
         /* Unroll the most common sample formats: speed +~350% for the loop,
            +~13% overall (including two common decoders) */
         switch (s->bps) {
             case 1:
-                copy_samples(s->nb_inputs, s->in, s->route, ins, &outs, ns, 1);
+                copy_samples(s->nb_inputs, s->in, s->route, ins, &outs, nb_samples, 1);
                 break;
             case 2:
-                copy_samples(s->nb_inputs, s->in, s->route, ins, &outs, ns, 2);
+                copy_samples(s->nb_inputs, s->in, s->route, ins, &outs, nb_samples, 2);
                 break;
             case 4:
-                copy_samples(s->nb_inputs, s->in, s->route, ins, &outs, ns, 4);
+                copy_samples(s->nb_inputs, s->in, s->route, ins, &outs, nb_samples, 4);
                 break;
             default:
-                copy_samples(s->nb_inputs, s->in, s->route, ins, &outs, ns, s->bps);
+                copy_samples(s->nb_inputs, s->in, s->route, ins, &outs, nb_samples, s->bps);
                 break;
         }
 
-        nb_samples -= ns;
-        for (i = 0; i < s->nb_inputs; i++) {
-            s->in[i].nb_samples -= ns;
-            s->in[i].pos += ns;
-            if (s->in[i].pos == inbuf[i]->nb_samples) {
-                s->in[i].pos = 0;
-                av_frame_free(&inbuf[i]);
-                ff_bufqueue_get(&s->in[i].queue);
-                inbuf[i] = ff_bufqueue_peek(&s->in[i].queue, 0);
-                ins[i] = inbuf[i] ? inbuf[i]->data[0] : NULL;
-            }
+        nb_samples = 0;
+    }
+
+    free_frames(s->nb_inputs, inbuf);
+    return ff_filter_frame(ctx->outputs[0], outbuf);
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    int i, status;
+    int ret, nb_samples;
+    int64_t pts;
+
+    FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx);
+
+    nb_samples = ff_framequeue_queued_samples(&ctx->inputs[0]->fifo);
+    for (i = 1; i < ctx->nb_inputs && nb_samples > 0; i++) {
+        nb_samples = FFMIN(ff_framequeue_queued_samples(&ctx->inputs[i]->fifo), nb_samples);
+    }
+
+    if (nb_samples) {
+        ret = try_push_frame(ctx, nb_samples);
+        if (ret < 0)
+            return ret;
+    }
+
+    for (i = 0; i < ctx->nb_inputs; i++) {
+        if (ff_framequeue_queued_samples(&ctx->inputs[i]->fifo))
+            continue;
+
+        if (ff_inlink_acknowledge_status(ctx->inputs[i], &status, &pts)) {
+            ff_outlink_set_status(ctx->outputs[0], status, pts);
+            return 0;
+        } else if (ff_outlink_frame_wanted(ctx->outputs[0])) {
+            ff_inlink_request_frame(ctx->inputs[i]);
+            return 0;
         }
     }
-    return ff_filter_frame(ctx->outputs[0], outbuf);
+
+    return 0;
 }
 
 static av_cold int init(AVFilterContext *ctx)
@@ -332,7 +325,6 @@
         AVFilterPad pad = {
             .name             = name,
             .type             = AVMEDIA_TYPE_AUDIO,
-            .filter_frame     = filter_frame,
         };
         if (!name)
             return AVERROR(ENOMEM);
@@ -349,7 +341,6 @@
         .name          = "default",
         .type          = AVMEDIA_TYPE_AUDIO,
         .config_props  = config_output,
-        .request_frame = request_frame,
     },
     { NULL }
 };
@@ -362,6 +353,7 @@
     .init          = init,
     .uninit        = uninit,
     .query_formats = query_formats,
+    .activate      = activate,
     .inputs        = NULL,
     .outputs       = amerge_outputs,
     .priv_class    = &amerge_class,

diff --git a/libavfilter/af_amix.c b/libavfilter/af_amix.c
index 09848e5..ec2556f 100644
--- a/libavfilter/af_amix.c
+++ b/libavfilter/af_amix.c

@@ -162,6 +162,7 @@
     int active_inputs;          /**< number of input currently active */
     int duration_mode;          /**< mode for determining duration */
     float dropout_transition;   /**< transition time when an input drops out */
+    char *weights_str;          /**< string for custom weights for every input */
 
     int nb_channels;            /**< number of channels */
     int sample_rate;            /**< sample rate */
@@ -169,7 +170,9 @@
     AVAudioFifo **fifos;        /**< audio fifo for each input */
     uint8_t *input_state;       /**< current state of each input */
     float *input_scale;         /**< mixing scale factor for each input */
-    float scale_norm;           /**< normalization factor for all inputs */
+    float *weights;             /**< custom weights for every input */
+    float weight_sum;           /**< sum of custom weights for every input */
+    float *scale_norm;          /**< normalization factor for every input */
     int64_t next_pts;           /**< calculated pts for next output frame */
     FrameList *frame_list;      /**< list of frame info for the first input */
 } MixContext;
@@ -188,6 +191,8 @@
     { "dropout_transition", "Transition time, in seconds, for volume "
                             "renormalization when an input stream ends.",
             OFFSET(dropout_transition), AV_OPT_TYPE_FLOAT, { .dbl = 2.0 }, 0, INT_MAX, A|F },
+    { "weights", "Set weight for each input.",
+            OFFSET(weights_str), AV_OPT_TYPE_STRING, {.str="1 1"}, 0, 0, A|F },
     { NULL }
 };
 
@@ -202,16 +207,26 @@
  */
 static void calculate_scales(MixContext *s, int nb_samples)
 {
+    float weight_sum = 0.f;
     int i;
 
-    if (s->scale_norm > s->active_inputs) {
-        s->scale_norm -= nb_samples / (s->dropout_transition * s->sample_rate);
-        s->scale_norm = FFMAX(s->scale_norm, s->active_inputs);
+    for (i = 0; i < s->nb_inputs; i++)
+        if (s->input_state[i] & INPUT_ON)
+            weight_sum += s->weights[i];
+
+    for (i = 0; i < s->nb_inputs; i++) {
+        if (s->input_state[i] & INPUT_ON) {
+            if (s->scale_norm[i] > weight_sum / s->weights[i]) {
+                s->scale_norm[i] -= ((s->weight_sum / s->weights[i]) / s->nb_inputs) *
+                                    nb_samples / (s->dropout_transition * s->sample_rate);
+                s->scale_norm[i] = FFMAX(s->scale_norm[i], weight_sum / s->weights[i]);
+            }
+        }
     }
 
     for (i = 0; i < s->nb_inputs; i++) {
         if (s->input_state[i] & INPUT_ON)
-            s->input_scale[i] = 1.0f / s->scale_norm;
+            s->input_scale[i] = 1.0f / s->scale_norm[i];
         else
             s->input_scale[i] = 0.0f;
     }
@@ -251,9 +266,11 @@
     s->active_inputs = s->nb_inputs;
 
     s->input_scale = av_mallocz_array(s->nb_inputs, sizeof(*s->input_scale));
-    if (!s->input_scale)
+    s->scale_norm  = av_mallocz_array(s->nb_inputs, sizeof(*s->scale_norm));
+    if (!s->input_scale || !s->scale_norm)
         return AVERROR(ENOMEM);
-    s->scale_norm = s->active_inputs;
+    for (i = 0; i < s->nb_inputs; i++)
+        s->scale_norm[i] = s->weight_sum / s->weights[i];
     calculate_scales(s, 0);
 
     av_get_channel_layout_string(buf, sizeof(buf), -1, outlink->channel_layout);
@@ -408,6 +425,8 @@
     AVFrame *buf = NULL;
     int i, ret;
 
+    FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, ctx);
+
     for (i = 0; i < s->nb_inputs; i++) {
         AVFilterLink *inlink = ctx->inputs[i];
 
@@ -487,15 +506,15 @@
 static av_cold int init(AVFilterContext *ctx)
 {
     MixContext *s = ctx->priv;
+    char *p, *arg, *saveptr = NULL;
+    float last_weight = 1.f;
     int i, ret;
 
     for (i = 0; i < s->nb_inputs; i++) {
-        char name[32];
         AVFilterPad pad = { 0 };
 
-        snprintf(name, sizeof(name), "input%d", i);
         pad.type           = AVMEDIA_TYPE_AUDIO;
-        pad.name           = av_strdup(name);
+        pad.name           = av_asprintf("input%d", i);
         if (!pad.name)
             return AVERROR(ENOMEM);
 
@@ -509,6 +528,26 @@
     if (!s->fdsp)
         return AVERROR(ENOMEM);
 
+    s->weights = av_mallocz_array(s->nb_inputs, sizeof(*s->weights));
+    if (!s->weights)
+        return AVERROR(ENOMEM);
+
+    p = s->weights_str;
+    for (i = 0; i < s->nb_inputs; i++) {
+        if (!(arg = av_strtok(p, " ", &saveptr)))
+            break;
+
+        p = NULL;
+        sscanf(arg, "%f", &last_weight);
+        s->weights[i] = last_weight;
+        s->weight_sum += last_weight;
+    }
+
+    for (; i < s->nb_inputs; i++) {
+        s->weights[i] = last_weight;
+        s->weight_sum += last_weight;
+    }
+
     return 0;
 }
 
@@ -526,6 +565,8 @@
     av_freep(&s->frame_list);
     av_freep(&s->input_state);
     av_freep(&s->input_scale);
+    av_freep(&s->scale_norm);
+    av_freep(&s->weights);
     av_freep(&s->fdsp);
 
     for (i = 0; i < ctx->nb_inputs; i++)

diff --git a/libavfilter/af_amultiply.c b/libavfilter/af_amultiply.c
new file mode 100644
index 0000000..a742f6a
--- /dev/null
+++ b/libavfilter/af_amultiply.c

@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/common.h"
+#include "libavutil/float_dsp.h"
+#include "libavutil/opt.h"
+
+#define FF_INTERNAL_FIELDS 1
+#include "framequeue.h"
+
+#include "audio.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "filters.h"
+#include "internal.h"
+
+typedef struct AudioMultiplyContext {
+    const AVClass *class;
+
+    AVFrame *frames[2];
+    int64_t pts;
+    int planes;
+    int channels;
+    int samples_align;
+
+    AVFloatDSPContext *fdsp;
+} AudioMultiplyContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
+        AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBLP,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    AudioMultiplyContext *s = ctx->priv;
+    int i, ret, status;
+    int nb_samples;
+    int64_t pts;
+
+    FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx);
+
+    nb_samples = FFMIN(ff_framequeue_queued_samples(&ctx->inputs[0]->fifo),
+                       ff_framequeue_queued_samples(&ctx->inputs[1]->fifo));
+    for (i = 0; i < ctx->nb_inputs && nb_samples > 0; i++) {
+        if (s->frames[i])
+            continue;
+
+        if (ff_inlink_check_available_samples(ctx->inputs[i], nb_samples) > 0) {
+            ret = ff_inlink_consume_samples(ctx->inputs[i], nb_samples, nb_samples, &s->frames[i]);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
+    if (nb_samples > 0 && s->frames[0] && s->frames[1]) {
+        AVFrame *out;
+        int plane_samples;
+
+        if (av_sample_fmt_is_planar(ctx->inputs[0]->format))
+            plane_samples = FFALIGN(nb_samples, s->samples_align);
+        else
+            plane_samples = FFALIGN(nb_samples * s->channels, s->samples_align);
+
+        out = ff_get_audio_buffer(ctx->outputs[0], nb_samples);
+        if (!out)
+            return AVERROR(ENOMEM);
+
+        out->pts = s->pts;
+        s->pts += nb_samples;
+
+        if (av_get_packed_sample_fmt(ctx->inputs[0]->format) == AV_SAMPLE_FMT_FLT) {
+            for (i = 0; i < s->planes; i++) {
+                s->fdsp->vector_fmul((float *)out->extended_data[i],
+                                     (const float *)s->frames[0]->extended_data[i],
+                                     (const float *)s->frames[1]->extended_data[i],
+                                     plane_samples);
+            }
+        } else {
+            for (i = 0; i < s->planes; i++) {
+                s->fdsp->vector_dmul((double *)out->extended_data[i],
+                                     (const double *)s->frames[0]->extended_data[i],
+                                     (const double *)s->frames[1]->extended_data[i],
+                                     plane_samples);
+            }
+        }
+        emms_c();
+
+        av_frame_free(&s->frames[0]);
+        av_frame_free(&s->frames[1]);
+
+        ret = ff_filter_frame(ctx->outputs[0], out);
+        if (ret < 0)
+            return ret;
+    }
+
+    if (!nb_samples) {
+        for (i = 0; i < 2; i++) {
+            if (ff_inlink_acknowledge_status(ctx->inputs[i], &status, &pts)) {
+                ff_outlink_set_status(ctx->outputs[0], status, pts);
+                return 0;
+            }
+        }
+    }
+
+    if (ff_outlink_frame_wanted(ctx->outputs[0])) {
+        for (i = 0; i < 2; i++) {
+            if (ff_framequeue_queued_samples(&ctx->inputs[i]->fifo) > 0)
+                continue;
+            ff_inlink_request_frame(ctx->inputs[i]);
+            return 0;
+        }
+    }
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AudioMultiplyContext *s = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+
+    s->channels = inlink->channels;
+    s->planes = av_sample_fmt_is_planar(inlink->format) ? inlink->channels : 1;
+    s->samples_align = 16;
+
+    return 0;
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    AudioMultiplyContext *s = ctx->priv;
+
+    s->fdsp = avpriv_float_dsp_alloc(0);
+    if (!s->fdsp)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AudioMultiplyContext *s = ctx->priv;
+    av_freep(&s->fdsp);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name = "multiply0",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    {
+        .name = "multiply1",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_amultiply = {
+    .name           = "amultiply",
+    .description    = NULL_IF_CONFIG_SMALL("Multiply two audio streams."),
+    .priv_size      = sizeof(AudioMultiplyContext),
+    .init           = init,
+    .uninit         = uninit,
+    .activate       = activate,
+    .query_formats  = query_formats,
+    .inputs         = inputs,
+    .outputs        = outputs,
+};

diff --git a/libavfilter/af_anequalizer.c b/libavfilter/af_anequalizer.c
index 2403460..03d939f 100644
--- a/libavfilter/af_anequalizer.c
+++ b/libavfilter/af_anequalizer.c

@@ -189,6 +189,7 @@
 {
     AudioNEqualizerContext *s = ctx->priv;
     AVFilterPad pad, vpad;
+    int ret;
 
     pad = (AVFilterPad){
         .name         = av_strdup("out0"),
@@ -208,10 +209,19 @@
             return AVERROR(ENOMEM);
     }
 
-    ff_insert_outpad(ctx, 0, &pad);
+    ret = ff_insert_outpad(ctx, 0, &pad);
+    if (ret < 0) {
+        av_freep(&pad.name);
+        return ret;
+    }
 
-    if (s->draw_curves)
-        ff_insert_outpad(ctx, 1, &vpad);
+    if (s->draw_curves) {
+        ret = ff_insert_outpad(ctx, 1, &vpad);
+        if (ret < 0) {
+            av_freep(&vpad.name);
+            return ret;
+        }
+    }
 
     return 0;
 }
@@ -259,9 +269,8 @@
 {
     AudioNEqualizerContext *s = ctx->priv;
 
-    av_freep(&ctx->output_pads[0].name);
-    if (s->draw_curves)
-        av_freep(&ctx->output_pads[1].name);
+    for (int i = 0; i < ctx->nb_outputs; i++)
+        av_freep(&ctx->output_pads[i].name);
     av_frame_free(&s->video);
     av_freep(&s->filters);
     s->nb_filters = 0;

diff --git a/libavfilter/af_aphaser.c b/libavfilter/af_aphaser.c
index dcffc21..bf46cc8 100644
--- a/libavfilter/af_aphaser.c
+++ b/libavfilter/af_aphaser.c

@@ -247,7 +247,7 @@
     if (av_frame_is_writable(inbuf)) {
         outbuf = inbuf;
     } else {
-        outbuf = ff_get_audio_buffer(inlink, inbuf->nb_samples);
+        outbuf = ff_get_audio_buffer(outlink, inbuf->nb_samples);
         if (!outbuf) {
             av_frame_free(&inbuf);
             return AVERROR(ENOMEM);

diff --git a/libavfilter/af_asetnsamples.c b/libavfilter/af_asetnsamples.c
index 3c2f66b..e8daec8 100644
--- a/libavfilter/af_asetnsamples.c
+++ b/libavfilter/af_asetnsamples.c

@@ -24,20 +24,18 @@
  * Filter that changes number of samples on single output operation
  */
 
-#include "libavutil/audio_fifo.h"
 #include "libavutil/avassert.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/opt.h"
 #include "avfilter.h"
 #include "audio.h"
+#include "filters.h"
 #include "internal.h"
 #include "formats.h"
 
 typedef struct ASNSContext {
     const AVClass *class;
     int nb_out_samples;  ///< how many samples to output
-    AVAudioFifo *fifo;   ///< samples are queued here
-    int64_t next_out_pts;
     int pad;
 } ASNSContext;
 
@@ -54,131 +52,57 @@
 
 AVFILTER_DEFINE_CLASS(asetnsamples);
 
-static av_cold int init(AVFilterContext *ctx)
+static int activate(AVFilterContext *ctx)
 {
-    ASNSContext *asns = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    ASNSContext *s = ctx->priv;
+    AVFrame *frame = NULL, *pad_frame;
+    int ret;
 
-    asns->next_out_pts = AV_NOPTS_VALUE;
-    av_log(ctx, AV_LOG_VERBOSE, "nb_out_samples:%d pad:%d\n", asns->nb_out_samples, asns->pad);
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
 
-    return 0;
-}
-
-static av_cold void uninit(AVFilterContext *ctx)
-{
-    ASNSContext *asns = ctx->priv;
-    av_audio_fifo_free(asns->fifo);
-}
-
-static int config_props_output(AVFilterLink *outlink)
-{
-    ASNSContext *asns = outlink->src->priv;
-
-    asns->fifo = av_audio_fifo_alloc(outlink->format, outlink->channels, asns->nb_out_samples);
-    if (!asns->fifo)
-        return AVERROR(ENOMEM);
-
-    return 0;
-}
-
-static int push_samples(AVFilterLink *outlink)
-{
-    ASNSContext *asns = outlink->src->priv;
-    AVFrame *outsamples = NULL;
-    int ret, nb_out_samples, nb_pad_samples;
-
-    if (asns->pad) {
-        nb_out_samples = av_audio_fifo_size(asns->fifo) ? asns->nb_out_samples : 0;
-        nb_pad_samples = nb_out_samples - FFMIN(nb_out_samples, av_audio_fifo_size(asns->fifo));
-    } else {
-        nb_out_samples = FFMIN(asns->nb_out_samples, av_audio_fifo_size(asns->fifo));
-        nb_pad_samples = 0;
-    }
-
-    if (!nb_out_samples)
-        return 0;
-
-    outsamples = ff_get_audio_buffer(outlink, nb_out_samples);
-    if (!outsamples)
-        return AVERROR(ENOMEM);
-
-    av_audio_fifo_read(asns->fifo,
-                       (void **)outsamples->extended_data, nb_out_samples);
-
-    if (nb_pad_samples)
-        av_samples_set_silence(outsamples->extended_data, nb_out_samples - nb_pad_samples,
-                               nb_pad_samples, outlink->channels,
-                               outlink->format);
-    outsamples->nb_samples     = nb_out_samples;
-    outsamples->channel_layout = outlink->channel_layout;
-    outsamples->sample_rate    = outlink->sample_rate;
-    outsamples->pts = asns->next_out_pts;
-
-    if (asns->next_out_pts != AV_NOPTS_VALUE)
-        asns->next_out_pts += av_rescale_q(nb_out_samples, (AVRational){1, outlink->sample_rate}, outlink->time_base);
-
-    ret = ff_filter_frame(outlink, outsamples);
+    ret = ff_inlink_consume_samples(inlink, s->nb_out_samples, s->nb_out_samples, &frame);
     if (ret < 0)
         return ret;
-    return nb_out_samples;
-}
 
-static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
-{
-    AVFilterContext *ctx = inlink->dst;
-    ASNSContext *asns = ctx->priv;
-    AVFilterLink *outlink = ctx->outputs[0];
-    int ret;
-    int nb_samples = insamples->nb_samples;
+    if (ret > 0) {
+        if ((!s->pad || (s->pad && frame->nb_samples == s->nb_out_samples)))
+            return ff_filter_frame(outlink, frame);
 
-    if (av_audio_fifo_space(asns->fifo) < nb_samples) {
-        av_log(ctx, AV_LOG_DEBUG, "No space for %d samples, stretching audio fifo\n", nb_samples);
-        ret = av_audio_fifo_realloc(asns->fifo, av_audio_fifo_size(asns->fifo) + nb_samples);
-        if (ret < 0) {
-            av_log(ctx, AV_LOG_ERROR,
-                   "Stretching audio fifo failed, discarded %d samples\n", nb_samples);
-            return -1;
+        pad_frame = ff_get_audio_buffer(outlink, s->nb_out_samples);
+        if (!pad_frame) {
+            av_frame_free(&frame);
+            return AVERROR(ENOMEM);
         }
-    }
-    av_audio_fifo_write(asns->fifo, (void **)insamples->extended_data, nb_samples);
-    if (asns->next_out_pts == AV_NOPTS_VALUE)
-        asns->next_out_pts = insamples->pts;
-    av_frame_free(&insamples);
 
-    while (av_audio_fifo_size(asns->fifo) >= asns->nb_out_samples)
-        push_samples(outlink);
-    return 0;
-}
-
-static int request_frame(AVFilterLink *outlink)
-{
-    AVFilterLink *inlink = outlink->src->inputs[0];
-    int ret;
-
-    ret = ff_request_frame(inlink);
-    if (ret == AVERROR_EOF) {
-        ret = push_samples(outlink);
-        return ret < 0 ? ret : ret > 0 ? 0 : AVERROR_EOF;
+        av_samples_copy(pad_frame->extended_data, frame->extended_data,
+                        0, 0, frame->nb_samples, frame->channels, frame->format);
+        av_samples_set_silence(pad_frame->extended_data, frame->nb_samples,
+                               s->nb_out_samples - frame->nb_samples, frame->channels,
+                               frame->format);
+        av_frame_free(&frame);
+        return ff_filter_frame(outlink, pad_frame);
     }
 
-    return ret;
+    FF_FILTER_FORWARD_STATUS(inlink, outlink);
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
 }
 
 static const AVFilterPad asetnsamples_inputs[] = {
     {
-        .name         = "default",
-        .type         = AVMEDIA_TYPE_AUDIO,
-        .filter_frame = filter_frame,
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
     },
     { NULL }
 };
 
 static const AVFilterPad asetnsamples_outputs[] = {
     {
-        .name          = "default",
-        .type          = AVMEDIA_TYPE_AUDIO,
-        .request_frame = request_frame,
-        .config_props  = config_props_output,
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
     },
     { NULL }
 };
@@ -188,8 +112,7 @@
     .description = NULL_IF_CONFIG_SMALL("Set the number of samples for each output audio frames."),
     .priv_size   = sizeof(ASNSContext),
     .priv_class  = &asetnsamples_class,
-    .init        = init,
-    .uninit      = uninit,
     .inputs      = asetnsamples_inputs,
     .outputs     = asetnsamples_outputs,
+    .activate    = activate,
 };

diff --git a/libavfilter/af_astats.c b/libavfilter/af_astats.c
index 2922da9..a91cfdc 100644
--- a/libavfilter/af_astats.c
+++ b/libavfilter/af_astats.c

@@ -28,6 +28,7 @@
 
 typedef struct ChannelStats {
     double last;
+    double last_non_zero;
     double min_non_zero;
     double sigma_x, sigma_x2;
     double avg_sigma_x2, min_sigma_x2, max_sigma_x2;
@@ -40,6 +41,7 @@
     double diff1_sum_x2;
     uint64_t mask, imask;
     uint64_t min_count, max_count;
+    uint64_t zero_runs;
     uint64_t nb_samples;
 } ChannelStats;
 
@@ -127,6 +129,7 @@
         p->imask = 0xFFFFFFFFFFFFFFFF;
         p->min_count = 0;
         p->max_count = 0;
+        p->zero_runs = 0;
         p->nb_samples = 0;
     }
 }
@@ -196,6 +199,11 @@
         p->max_runs += p->max_run * p->max_run;
     }
 
+    if (d != 0) {
+        p->zero_runs += FFSIGN(d) != FFSIGN(p->last_non_zero);
+        p->last_non_zero = d;
+    }
+
     p->sigma_x += nd;
     p->sigma_x2 += nd * nd;
     p->avg_sigma_x2 = p->avg_sigma_x2 * s->mult + (1.0 - s->mult) * nd * nd;
@@ -292,6 +300,8 @@
         set_meta(metadata, c + 1, "Bit_depth", "%f", depth.num);
         set_meta(metadata, c + 1, "Bit_depth2", "%f", depth.den);
         set_meta(metadata, c + 1, "Dynamic_range", "%f", LINEAR_TO_DB(2 * FFMAX(FFABS(p->min), FFABS(p->max))/ p->min_non_zero));
+        set_meta(metadata, c + 1, "Zero_crossings", "%f", p->zero_runs);
+        set_meta(metadata, c + 1, "Zero_crossings_rate", "%f", p->zero_runs/(double)p->nb_samples);
     }
 
     set_meta(metadata, 0, "Overall.DC_offset", "%f", max_sigma_x / (nb_samples / s->nb_channels));
@@ -486,6 +496,8 @@
         bit_depth(s, p->mask, p->imask, &depth);
         av_log(ctx, AV_LOG_INFO, "Bit depth: %u/%u\n", depth.num, depth.den);
         av_log(ctx, AV_LOG_INFO, "Dynamic range: %f\n", LINEAR_TO_DB(2 * FFMAX(FFABS(p->min), FFABS(p->max))/ p->min_non_zero));
+        av_log(ctx, AV_LOG_INFO, "Zero crossings: %"PRId64"\n", p->zero_runs);
+        av_log(ctx, AV_LOG_INFO, "Zero crossings rate: %f\n", p->zero_runs/(double)p->nb_samples);
     }
 
     av_log(ctx, AV_LOG_INFO, "Overall\n");

diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c
index 8b214bc..bfdad7d 100644
--- a/libavfilter/af_atempo.c
+++ b/libavfilter/af_atempo.c

@@ -149,11 +149,16 @@
     uint64_t nsamples_out;
 } ATempoContext;
 
+#define YAE_ATEMPO_MIN 0.5
+#define YAE_ATEMPO_MAX 100.0
+
 #define OFFSET(x) offsetof(ATempoContext, x)
 
 static const AVOption atempo_options[] = {
     { "tempo", "set tempo scale factor",
-      OFFSET(tempo), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 0.5, 2.0,
+      OFFSET(tempo), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 },
+      YAE_ATEMPO_MIN,
+      YAE_ATEMPO_MAX,
       AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM },
     { NULL }
 };
@@ -331,9 +336,9 @@
         return AVERROR(EINVAL);
     }
 
-    if (tempo < 0.5 || tempo > 2.0) {
-        av_log(ctx, AV_LOG_ERROR, "Tempo value %f exceeds [0.5, 2.0] range\n",
-               tempo);
+    if (tempo < YAE_ATEMPO_MIN || tempo > YAE_ATEMPO_MAX) {
+        av_log(ctx, AV_LOG_ERROR, "Tempo value %f exceeds [%f, %f] range\n",
+               tempo, YAE_ATEMPO_MIN, YAE_ATEMPO_MAX);
         return AVERROR(EINVAL);
     }
 
@@ -439,8 +444,8 @@
         return 0;
     }
 
-    // samples are not expected to be skipped:
-    av_assert0(read_size <= atempo->ring);
+    // samples are not expected to be skipped, unless tempo is greater than 2:
+    av_assert0(read_size <= atempo->ring || atempo->tempo > 2.0);
 
     while (atempo->position[0] < stop_here && src < src_end) {
         int src_samples = (src_end - src) / atempo->stride;

diff --git a/libavfilter/af_biquads.c b/libavfilter/af_biquads.c
index c4f619a..ae5e1c6 100644
--- a/libavfilter/af_biquads.c
+++ b/libavfilter/af_biquads.c

@@ -73,12 +73,13 @@
     equalizer,
     bass,
     treble,
-    band,
     bandpass,
     bandreject,
     allpass,
     highpass,
     lowpass,
+    lowshelf,
+    highshelf,
 };
 
 enum WidthType {
@@ -87,11 +88,14 @@
     OCTAVE,
     QFACTOR,
     SLOPE,
+    KHERTZ,
+    NB_WTYPE,
 };
 
 typedef struct ChanCache {
     double i1, i2;
     double o1, o2;
+    int clippings;
 } ChanCache;
 
 typedef struct BiquadsContext {
@@ -111,12 +115,11 @@
     double b0, b1, b2;
 
     ChanCache *cache;
-    int clippings;
     int block_align;
 
     void (*filter)(struct BiquadsContext *s, const void *ibuf, void *obuf, int len,
                    double *i1, double *i2, double *o1, double *o2,
-                   double b0, double b1, double b2, double a1, double a2);
+                   double b0, double b1, double b2, double a1, double a2, int *clippings);
 } BiquadsContext;
 
 static av_cold int init(AVFilterContext *ctx)
@@ -173,7 +176,7 @@
                             double *in1, double *in2,                         \
                             double *out1, double *out2,                       \
                             double b0, double b1, double b2,                  \
-                            double a1, double a2)                             \
+                            double a1, double a2, int *clippings)             \
 {                                                                             \
     const type *ibuf = input;                                                 \
     type *obuf = output;                                                      \
@@ -189,10 +192,10 @@
         o2 = i2 * b2 + i1 * b1 + ibuf[i] * b0 + o2 * a2 + o1 * a1;            \
         i2 = ibuf[i];                                                         \
         if (need_clipping && o2 < min) {                                      \
-            s->clippings++;                                                   \
+            (*clippings)++;                                                   \
             obuf[i] = min;                                                    \
         } else if (need_clipping && o2 > max) {                               \
-            s->clippings++;                                                   \
+            (*clippings)++;                                                   \
             obuf[i] = max;                                                    \
         } else {                                                              \
             obuf[i] = o2;                                                     \
@@ -201,10 +204,10 @@
         o1 = i1 * b2 + i2 * b1 + ibuf[i] * b0 + o1 * a2 + o2 * a1;            \
         i1 = ibuf[i];                                                         \
         if (need_clipping && o1 < min) {                                      \
-            s->clippings++;                                                   \
+            (*clippings)++;                                                   \
             obuf[i] = min;                                                    \
         } else if (need_clipping && o1 > max) {                               \
-            s->clippings++;                                                   \
+            (*clippings)++;                                                   \
             obuf[i] = max;                                                    \
         } else {                                                              \
             obuf[i] = o1;                                                     \
@@ -217,10 +220,10 @@
         o2 = o1;                                                              \
         o1 = o0;                                                              \
         if (need_clipping && o0 < min) {                                      \
-            s->clippings++;                                                   \
+            (*clippings)++;                                                   \
             obuf[i] = min;                                                    \
         } else if (need_clipping && o0 > max) {                               \
-            s->clippings++;                                                   \
+            (*clippings)++;                                                   \
             obuf[i] = max;                                                    \
         } else {                                                              \
             obuf[i] = o0;                                                     \
@@ -237,14 +240,14 @@
 BIQUAD_FILTER(flt, float,   -1., 1., 0)
 BIQUAD_FILTER(dbl, double,  -1., 1., 0)
 
-static int config_output(AVFilterLink *outlink)
+static int config_filter(AVFilterLink *outlink, int reset)
 {
     AVFilterContext *ctx    = outlink->src;
     BiquadsContext *s       = ctx->priv;
     AVFilterLink *inlink    = ctx->inputs[0];
     double A = exp(s->gain / 40 * log(10.));
     double w0 = 2 * M_PI * s->frequency / inlink->sample_rate;
-    double alpha;
+    double alpha, beta;
 
     if (w0 > M_PI) {
         av_log(ctx, AV_LOG_ERROR,
@@ -260,6 +263,9 @@
     case HERTZ:
         alpha = sin(w0) / (2 * s->frequency / s->width);
         break;
+    case KHERTZ:
+        alpha = sin(w0) / (2 * s->frequency / (s->width * 1000));
+        break;
     case OCTAVE:
         alpha = sin(w0) * sinh(log(2.) / 2 * s->width * w0 / sin(w0));
         break;
@@ -273,6 +279,8 @@
         av_assert0(0);
     }
 
+    beta = 2 * sqrt(A);
+
     switch (s->filter_type) {
     case biquad:
         break;
@@ -285,20 +293,24 @@
         s->b2 =   1 - alpha * A;
         break;
     case bass:
-        s->a0 =          (A + 1) + (A - 1) * cos(w0) + 2 * sqrt(A) * alpha;
+        beta = sqrt((A * A + 1) - (A - 1) * (A - 1));
+    case lowshelf:
+        s->a0 =          (A + 1) + (A - 1) * cos(w0) + beta * alpha;
         s->a1 =    -2 * ((A - 1) + (A + 1) * cos(w0));
-        s->a2 =          (A + 1) + (A - 1) * cos(w0) - 2 * sqrt(A) * alpha;
-        s->b0 =     A * ((A + 1) - (A - 1) * cos(w0) + 2 * sqrt(A) * alpha);
+        s->a2 =          (A + 1) + (A - 1) * cos(w0) - beta * alpha;
+        s->b0 =     A * ((A + 1) - (A - 1) * cos(w0) + beta * alpha);
         s->b1 = 2 * A * ((A - 1) - (A + 1) * cos(w0));
-        s->b2 =     A * ((A + 1) - (A - 1) * cos(w0) - 2 * sqrt(A) * alpha);
+        s->b2 =     A * ((A + 1) - (A - 1) * cos(w0) - beta * alpha);
         break;
     case treble:
-        s->a0 =          (A + 1) - (A - 1) * cos(w0) + 2 * sqrt(A) * alpha;
+        beta = sqrt((A * A + 1) - (A - 1) * (A - 1));
+    case highshelf:
+        s->a0 =          (A + 1) - (A - 1) * cos(w0) + beta * alpha;
         s->a1 =     2 * ((A - 1) - (A + 1) * cos(w0));
-        s->a2 =          (A + 1) - (A - 1) * cos(w0) - 2 * sqrt(A) * alpha;
-        s->b0 =     A * ((A + 1) + (A - 1) * cos(w0) + 2 * sqrt(A) * alpha);
+        s->a2 =          (A + 1) - (A - 1) * cos(w0) - beta * alpha;
+        s->b0 =     A * ((A + 1) + (A - 1) * cos(w0) + beta * alpha);
         s->b1 =-2 * A * ((A - 1) + (A + 1) * cos(w0));
-        s->b2 =     A * ((A + 1) + (A - 1) * cos(w0) - 2 * sqrt(A) * alpha);
+        s->b2 =     A * ((A + 1) + (A - 1) * cos(w0) - beta * alpha);
         break;
     case bandpass:
         if (s->csg) {
@@ -371,16 +383,20 @@
         av_assert0(0);
     }
 
+    av_log(ctx, AV_LOG_VERBOSE, "a=%f %f %f:b=%f %f %f\n", s->a0, s->a1, s->a2, s->b0, s->b1, s->b2);
+
     s->a1 /= s->a0;
     s->a2 /= s->a0;
     s->b0 /= s->a0;
     s->b1 /= s->a0;
     s->b2 /= s->a0;
+    s->a0 /= s->a0;
 
     s->cache = av_realloc_f(s->cache, sizeof(ChanCache), inlink->channels);
     if (!s->cache)
         return AVERROR(ENOMEM);
-    memset(s->cache, 0, sizeof(ChanCache) * inlink->channels);
+    if (reset)
+        memset(s->cache, 0, sizeof(ChanCache) * inlink->channels);
 
     switch (inlink->format) {
     case AV_SAMPLE_FMT_S16P: s->filter = biquad_s16; break;
@@ -395,19 +411,55 @@
     return 0;
 }
 
+static int config_output(AVFilterLink *outlink)
+{
+    return config_filter(outlink, 1);
+}
+
+typedef struct ThreadData {
+    AVFrame *in, *out;
+} ThreadData;
+
+static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    AVFilterLink *inlink = ctx->inputs[0];
+    ThreadData *td = arg;
+    AVFrame *buf = td->in;
+    AVFrame *out_buf = td->out;
+    BiquadsContext *s = ctx->priv;
+    const int start = (buf->channels * jobnr) / nb_jobs;
+    const int end = (buf->channels * (jobnr+1)) / nb_jobs;
+    int ch;
+
+    for (ch = start; ch < end; ch++) {
+        if (!((av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels))) {
+            if (buf != out_buf)
+                memcpy(out_buf->extended_data[ch], buf->extended_data[ch],
+                       buf->nb_samples * s->block_align);
+            continue;
+        }
+
+        s->filter(s, buf->extended_data[ch], out_buf->extended_data[ch], buf->nb_samples,
+                  &s->cache[ch].i1, &s->cache[ch].i2, &s->cache[ch].o1, &s->cache[ch].o2,
+                  s->b0, s->b1, s->b2, s->a1, s->a2, &s->cache[ch].clippings);
+    }
+
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
 {
     AVFilterContext  *ctx = inlink->dst;
     BiquadsContext *s     = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *out_buf;
-    int nb_samples = buf->nb_samples;
+    ThreadData td;
     int ch;
 
     if (av_frame_is_writable(buf)) {
         out_buf = buf;
     } else {
-        out_buf = ff_get_audio_buffer(inlink, nb_samples);
+        out_buf = ff_get_audio_buffer(outlink, buf->nb_samples);
         if (!out_buf) {
             av_frame_free(&buf);
             return AVERROR(ENOMEM);
@@ -415,22 +467,16 @@
         av_frame_copy_props(out_buf, buf);
     }
 
-    for (ch = 0; ch < buf->channels; ch++) {
-        if (!((av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels))) {
-            if (buf != out_buf)
-                memcpy(out_buf->extended_data[ch], buf->extended_data[ch], nb_samples * s->block_align);
-            continue;
-        }
-        s->filter(s, buf->extended_data[ch],
-                  out_buf->extended_data[ch], nb_samples,
-                  &s->cache[ch].i1, &s->cache[ch].i2,
-                  &s->cache[ch].o1, &s->cache[ch].o2,
-                  s->b0, s->b1, s->b2, s->a1, s->a2);
-    }
+    td.in = buf;
+    td.out = out_buf;
+    ctx->internal->execute(ctx, filter_channel, &td, NULL, FFMIN(outlink->channels, ff_filter_get_nb_threads(ctx)));
 
-    if (s->clippings > 0)
-        av_log(ctx, AV_LOG_WARNING, "clipping %d times. Please reduce gain.\n", s->clippings);
-    s->clippings = 0;
+    for (ch = 0; ch < outlink->channels; ch++) {
+        if (s->cache[ch].clippings > 0)
+            av_log(ctx, AV_LOG_WARNING, "Channel %d clipping %d times. Please reduce gain.\n",
+                   ch, s->cache[ch].clippings);
+        s->cache[ch].clippings = 0;
+    }
 
     if (buf != out_buf)
         av_frame_free(&buf);
@@ -438,6 +484,125 @@
     return ff_filter_frame(outlink, out_buf);
 }
 
+static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
+                           char *res, int res_len, int flags)
+{
+    BiquadsContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+
+    if ((!strcmp(cmd, "frequency") || !strcmp(cmd, "f")) &&
+        (s->filter_type == equalizer ||
+         s->filter_type == lowshelf  ||
+         s->filter_type == highshelf ||
+         s->filter_type == bass      ||
+         s->filter_type == treble    ||
+         s->filter_type == bandpass  ||
+         s->filter_type == bandreject||
+         s->filter_type == lowpass   ||
+         s->filter_type == highpass  ||
+         s->filter_type == allpass)) {
+        double freq;
+
+        if (sscanf(args, "%lf", &freq) != 1) {
+            av_log(ctx, AV_LOG_ERROR, "Invalid frequency value.\n");
+            return AVERROR(EINVAL);
+        }
+
+        s->frequency = freq;
+    } else if ((!strcmp(cmd, "gain") || !strcmp(cmd, "g")) &&
+        (s->filter_type == equalizer ||
+         s->filter_type == lowshelf  ||
+         s->filter_type == highshelf ||
+         s->filter_type == bass      ||
+         s->filter_type == treble)) {
+        double gain;
+
+        if (sscanf(args, "%lf", &gain) != 1) {
+            av_log(ctx, AV_LOG_ERROR, "Invalid gain value.\n");
+            return AVERROR(EINVAL);
+        }
+
+        s->gain = gain;
+    } else if ((!strcmp(cmd, "width") || !strcmp(cmd, "w")) &&
+        (s->filter_type == equalizer ||
+         s->filter_type == lowshelf  ||
+         s->filter_type == highshelf ||
+         s->filter_type == bass      ||
+         s->filter_type == treble    ||
+         s->filter_type == bandpass  ||
+         s->filter_type == bandreject||
+         s->filter_type == lowpass   ||
+         s->filter_type == highpass  ||
+         s->filter_type == allpass)) {
+        double width;
+
+        if (sscanf(args, "%lf", &width) != 1) {
+            av_log(ctx, AV_LOG_ERROR, "Invalid width value.\n");
+            return AVERROR(EINVAL);
+        }
+
+        s->width = width;
+    } else if ((!strcmp(cmd, "width_type") || !strcmp(cmd, "t")) &&
+        (s->filter_type == equalizer ||
+         s->filter_type == lowshelf  ||
+         s->filter_type == highshelf ||
+         s->filter_type == bass      ||
+         s->filter_type == treble    ||
+         s->filter_type == bandpass  ||
+         s->filter_type == bandreject||
+         s->filter_type == lowpass   ||
+         s->filter_type == highpass  ||
+         s->filter_type == allpass)) {
+        char width_type;
+
+        if (sscanf(args, "%c", &width_type) != 1) {
+            av_log(ctx, AV_LOG_ERROR, "Invalid width_type value.\n");
+            return AVERROR(EINVAL);
+        }
+
+        switch (width_type) {
+        case 'h': width_type = HERTZ;   break;
+        case 'q': width_type = QFACTOR; break;
+        case 'o': width_type = OCTAVE;  break;
+        case 's': width_type = SLOPE;   break;
+        case 'k': width_type = KHERTZ;  break;
+        default:
+            av_log(ctx, AV_LOG_ERROR, "Invalid width_type value: %c\n", width_type);
+            return AVERROR(EINVAL);
+        }
+
+        s->width_type = width_type;
+    } else if ((!strcmp(cmd, "a0") ||
+                !strcmp(cmd, "a1") ||
+                !strcmp(cmd, "a2") ||
+                !strcmp(cmd, "b0") ||
+                !strcmp(cmd, "b1") ||
+                !strcmp(cmd, "b2")) &&
+               s->filter_type == biquad) {
+        double value;
+
+        if (sscanf(args, "%lf", &value) != 1) {
+            av_log(ctx, AV_LOG_ERROR, "Invalid biquad value.\n");
+            return AVERROR(EINVAL);
+        }
+
+        if (!strcmp(cmd, "a0"))
+            s->a0 = value;
+        else if (!strcmp(cmd, "a1"))
+            s->a1 = value;
+        else if (!strcmp(cmd, "a2"))
+            s->a2 = value;
+        else if (!strcmp(cmd, "b0"))
+            s->b0 = value;
+        else if (!strcmp(cmd, "b1"))
+            s->b1 = value;
+        else if (!strcmp(cmd, "b2"))
+            s->b2 = value;
+    }
+
+    return config_filter(outlink, 0);
+}
+
 static av_cold void uninit(AVFilterContext *ctx)
 {
     BiquadsContext *s = ctx->priv;
@@ -486,20 +651,23 @@
     .inputs        = inputs,                             \
     .outputs       = outputs,                            \
     .priv_class    = &name_##_class,                     \
+    .process_command = process_command,                  \
+    .flags         = AVFILTER_FLAG_SLICE_THREADS,        \
 }
 
 #if CONFIG_EQUALIZER_FILTER
 static const AVOption equalizer_options[] = {
     {"frequency", "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, 999999, FLAGS},
     {"f",         "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, 999999, FLAGS},
-    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, SLOPE, FLAGS, "width_type"},
-    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, SLOPE, FLAGS, "width_type"},
+    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
+    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
     {"h", "Hz", 0, AV_OPT_TYPE_CONST, {.i64=HERTZ}, 0, 0, FLAGS, "width_type"},
     {"q", "Q-Factor", 0, AV_OPT_TYPE_CONST, {.i64=QFACTOR}, 0, 0, FLAGS, "width_type"},
     {"o", "octave", 0, AV_OPT_TYPE_CONST, {.i64=OCTAVE}, 0, 0, FLAGS, "width_type"},
     {"s", "slope", 0, AV_OPT_TYPE_CONST, {.i64=SLOPE}, 0, 0, FLAGS, "width_type"},
-    {"width", "set band-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0, 999, FLAGS},
-    {"w",     "set band-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0, 999, FLAGS},
+    {"k", "kHz", 0, AV_OPT_TYPE_CONST, {.i64=KHERTZ}, 0, 0, FLAGS, "width_type"},
+    {"width", "set band-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0, 99999, FLAGS},
+    {"w",     "set band-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0, 99999, FLAGS},
     {"gain", "set gain", OFFSET(gain), AV_OPT_TYPE_DOUBLE, {.dbl=0}, -900, 900, FLAGS},
     {"g",    "set gain", OFFSET(gain), AV_OPT_TYPE_DOUBLE, {.dbl=0}, -900, 900, FLAGS},
     {"channels", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS},
@@ -513,12 +681,13 @@
 static const AVOption bass_options[] = {
     {"frequency", "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=100}, 0, 999999, FLAGS},
     {"f",         "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=100}, 0, 999999, FLAGS},
-    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, SLOPE, FLAGS, "width_type"},
-    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, SLOPE, FLAGS, "width_type"},
+    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
+    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
     {"h", "Hz", 0, AV_OPT_TYPE_CONST, {.i64=HERTZ}, 0, 0, FLAGS, "width_type"},
     {"q", "Q-Factor", 0, AV_OPT_TYPE_CONST, {.i64=QFACTOR}, 0, 0, FLAGS, "width_type"},
     {"o", "octave", 0, AV_OPT_TYPE_CONST, {.i64=OCTAVE}, 0, 0, FLAGS, "width_type"},
     {"s", "slope", 0, AV_OPT_TYPE_CONST, {.i64=SLOPE}, 0, 0, FLAGS, "width_type"},
+    {"k", "kHz", 0, AV_OPT_TYPE_CONST, {.i64=KHERTZ}, 0, 0, FLAGS, "width_type"},
     {"width", "set shelf transition steep", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 99999, FLAGS},
     {"w",     "set shelf transition steep", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 99999, FLAGS},
     {"gain", "set gain", OFFSET(gain), AV_OPT_TYPE_DOUBLE, {.dbl=0}, -900, 900, FLAGS},
@@ -534,12 +703,13 @@
 static const AVOption treble_options[] = {
     {"frequency", "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=3000}, 0, 999999, FLAGS},
     {"f",         "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=3000}, 0, 999999, FLAGS},
-    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, SLOPE, FLAGS, "width_type"},
-    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, SLOPE, FLAGS, "width_type"},
+    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
+    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
     {"h", "Hz", 0, AV_OPT_TYPE_CONST, {.i64=HERTZ}, 0, 0, FLAGS, "width_type"},
     {"q", "Q-Factor", 0, AV_OPT_TYPE_CONST, {.i64=QFACTOR}, 0, 0, FLAGS, "width_type"},
     {"o", "octave", 0, AV_OPT_TYPE_CONST, {.i64=OCTAVE}, 0, 0, FLAGS, "width_type"},
     {"s", "slope", 0, AV_OPT_TYPE_CONST, {.i64=SLOPE}, 0, 0, FLAGS, "width_type"},
+    {"k", "kHz", 0, AV_OPT_TYPE_CONST, {.i64=KHERTZ}, 0, 0, FLAGS, "width_type"},
     {"width", "set shelf transition steep", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 99999, FLAGS},
     {"w",     "set shelf transition steep", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 99999, FLAGS},
     {"gain", "set gain", OFFSET(gain), AV_OPT_TYPE_DOUBLE, {.dbl=0}, -900, 900, FLAGS},
@@ -555,14 +725,15 @@
 static const AVOption bandpass_options[] = {
     {"frequency", "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=3000}, 0, 999999, FLAGS},
     {"f",         "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=3000}, 0, 999999, FLAGS},
-    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, SLOPE, FLAGS, "width_type"},
-    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, SLOPE, FLAGS, "width_type"},
+    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
+    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
     {"h", "Hz", 0, AV_OPT_TYPE_CONST, {.i64=HERTZ}, 0, 0, FLAGS, "width_type"},
     {"q", "Q-Factor", 0, AV_OPT_TYPE_CONST, {.i64=QFACTOR}, 0, 0, FLAGS, "width_type"},
     {"o", "octave", 0, AV_OPT_TYPE_CONST, {.i64=OCTAVE}, 0, 0, FLAGS, "width_type"},
     {"s", "slope", 0, AV_OPT_TYPE_CONST, {.i64=SLOPE}, 0, 0, FLAGS, "width_type"},
-    {"width", "set band-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 999, FLAGS},
-    {"w",     "set band-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 999, FLAGS},
+    {"k", "kHz", 0, AV_OPT_TYPE_CONST, {.i64=KHERTZ}, 0, 0, FLAGS, "width_type"},
+    {"width", "set band-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 99999, FLAGS},
+    {"w",     "set band-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 99999, FLAGS},
     {"csg",   "use constant skirt gain", OFFSET(csg), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
     {"channels", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS},
     {"c",        "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS},
@@ -575,14 +746,15 @@
 static const AVOption bandreject_options[] = {
     {"frequency", "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=3000}, 0, 999999, FLAGS},
     {"f",         "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=3000}, 0, 999999, FLAGS},
-    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, SLOPE, FLAGS, "width_type"},
-    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, SLOPE, FLAGS, "width_type"},
+    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
+    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
     {"h", "Hz", 0, AV_OPT_TYPE_CONST, {.i64=HERTZ}, 0, 0, FLAGS, "width_type"},
     {"q", "Q-Factor", 0, AV_OPT_TYPE_CONST, {.i64=QFACTOR}, 0, 0, FLAGS, "width_type"},
     {"o", "octave", 0, AV_OPT_TYPE_CONST, {.i64=OCTAVE}, 0, 0, FLAGS, "width_type"},
     {"s", "slope", 0, AV_OPT_TYPE_CONST, {.i64=SLOPE}, 0, 0, FLAGS, "width_type"},
-    {"width", "set band-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 999, FLAGS},
-    {"w",     "set band-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 999, FLAGS},
+    {"k", "kHz", 0, AV_OPT_TYPE_CONST, {.i64=KHERTZ}, 0, 0, FLAGS, "width_type"},
+    {"width", "set band-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 99999, FLAGS},
+    {"w",     "set band-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 99999, FLAGS},
     {"channels", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS},
     {"c",        "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS},
     {NULL}
@@ -594,12 +766,13 @@
 static const AVOption lowpass_options[] = {
     {"frequency", "set frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=500}, 0, 999999, FLAGS},
     {"f",         "set frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=500}, 0, 999999, FLAGS},
-    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, SLOPE, FLAGS, "width_type"},
-    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, SLOPE, FLAGS, "width_type"},
+    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
+    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
     {"h", "Hz", 0, AV_OPT_TYPE_CONST, {.i64=HERTZ}, 0, 0, FLAGS, "width_type"},
     {"q", "Q-Factor", 0, AV_OPT_TYPE_CONST, {.i64=QFACTOR}, 0, 0, FLAGS, "width_type"},
     {"o", "octave", 0, AV_OPT_TYPE_CONST, {.i64=OCTAVE}, 0, 0, FLAGS, "width_type"},
     {"s", "slope", 0, AV_OPT_TYPE_CONST, {.i64=SLOPE}, 0, 0, FLAGS, "width_type"},
+    {"k", "kHz", 0, AV_OPT_TYPE_CONST, {.i64=KHERTZ}, 0, 0, FLAGS, "width_type"},
     {"width", "set width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.707}, 0, 99999, FLAGS},
     {"w",     "set width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.707}, 0, 99999, FLAGS},
     {"poles", "set number of poles", OFFSET(poles), AV_OPT_TYPE_INT, {.i64=2}, 1, 2, FLAGS},
@@ -615,12 +788,13 @@
 static const AVOption highpass_options[] = {
     {"frequency", "set frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=3000}, 0, 999999, FLAGS},
     {"f",         "set frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=3000}, 0, 999999, FLAGS},
-    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, SLOPE, FLAGS, "width_type"},
-    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, SLOPE, FLAGS, "width_type"},
+    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
+    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
     {"h", "Hz", 0, AV_OPT_TYPE_CONST, {.i64=HERTZ}, 0, 0, FLAGS, "width_type"},
     {"q", "Q-Factor", 0, AV_OPT_TYPE_CONST, {.i64=QFACTOR}, 0, 0, FLAGS, "width_type"},
     {"o", "octave", 0, AV_OPT_TYPE_CONST, {.i64=OCTAVE}, 0, 0, FLAGS, "width_type"},
     {"s", "slope", 0, AV_OPT_TYPE_CONST, {.i64=SLOPE}, 0, 0, FLAGS, "width_type"},
+    {"k", "kHz", 0, AV_OPT_TYPE_CONST, {.i64=KHERTZ}, 0, 0, FLAGS, "width_type"},
     {"width", "set width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.707}, 0, 99999, FLAGS},
     {"w",     "set width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.707}, 0, 99999, FLAGS},
     {"poles", "set number of poles", OFFSET(poles), AV_OPT_TYPE_INT, {.i64=2}, 1, 2, FLAGS},
@@ -636,12 +810,13 @@
 static const AVOption allpass_options[] = {
     {"frequency", "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=3000}, 0, 999999, FLAGS},
     {"f",         "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=3000}, 0, 999999, FLAGS},
-    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=HERTZ}, HERTZ, SLOPE, FLAGS, "width_type"},
-    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=HERTZ}, HERTZ, SLOPE, FLAGS, "width_type"},
+    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=HERTZ}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
+    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=HERTZ}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
     {"h", "Hz", 0, AV_OPT_TYPE_CONST, {.i64=HERTZ}, 0, 0, FLAGS, "width_type"},
     {"q", "Q-Factor", 0, AV_OPT_TYPE_CONST, {.i64=QFACTOR}, 0, 0, FLAGS, "width_type"},
     {"o", "octave", 0, AV_OPT_TYPE_CONST, {.i64=OCTAVE}, 0, 0, FLAGS, "width_type"},
     {"s", "slope", 0, AV_OPT_TYPE_CONST, {.i64=SLOPE}, 0, 0, FLAGS, "width_type"},
+    {"k", "kHz", 0, AV_OPT_TYPE_CONST, {.i64=KHERTZ}, 0, 0, FLAGS, "width_type"},
     {"width", "set filter-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=707.1}, 0, 99999, FLAGS},
     {"w",     "set filter-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=707.1}, 0, 99999, FLAGS},
     {"channels", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS},
@@ -651,14 +826,58 @@
 
 DEFINE_BIQUAD_FILTER(allpass, "Apply a two-pole all-pass filter.");
 #endif  /* CONFIG_ALLPASS_FILTER */
+#if CONFIG_LOWSHELF_FILTER
+static const AVOption lowshelf_options[] = {
+    {"frequency", "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=100}, 0, 999999, FLAGS},
+    {"f",         "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=100}, 0, 999999, FLAGS},
+    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
+    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
+    {"h", "Hz", 0, AV_OPT_TYPE_CONST, {.i64=HERTZ}, 0, 0, FLAGS, "width_type"},
+    {"q", "Q-Factor", 0, AV_OPT_TYPE_CONST, {.i64=QFACTOR}, 0, 0, FLAGS, "width_type"},
+    {"o", "octave", 0, AV_OPT_TYPE_CONST, {.i64=OCTAVE}, 0, 0, FLAGS, "width_type"},
+    {"s", "slope", 0, AV_OPT_TYPE_CONST, {.i64=SLOPE}, 0, 0, FLAGS, "width_type"},
+    {"k", "kHz", 0, AV_OPT_TYPE_CONST, {.i64=KHERTZ}, 0, 0, FLAGS, "width_type"},
+    {"width", "set shelf transition steep", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 99999, FLAGS},
+    {"w",     "set shelf transition steep", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 99999, FLAGS},
+    {"gain", "set gain", OFFSET(gain), AV_OPT_TYPE_DOUBLE, {.dbl=0}, -900, 900, FLAGS},
+    {"g",    "set gain", OFFSET(gain), AV_OPT_TYPE_DOUBLE, {.dbl=0}, -900, 900, FLAGS},
+    {"channels", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS},
+    {"c",        "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS},
+    {NULL}
+};
+
+DEFINE_BIQUAD_FILTER(lowshelf, "Apply a low shelf filter.");
+#endif  /* CONFIG_LOWSHELF_FILTER */
+#if CONFIG_HIGHSHELF_FILTER
+static const AVOption highshelf_options[] = {
+    {"frequency", "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=3000}, 0, 999999, FLAGS},
+    {"f",         "set central frequency", OFFSET(frequency), AV_OPT_TYPE_DOUBLE, {.dbl=3000}, 0, 999999, FLAGS},
+    {"width_type", "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
+    {"t",          "set filter-width type", OFFSET(width_type), AV_OPT_TYPE_INT, {.i64=QFACTOR}, HERTZ, NB_WTYPE-1, FLAGS, "width_type"},
+    {"h", "Hz", 0, AV_OPT_TYPE_CONST, {.i64=HERTZ}, 0, 0, FLAGS, "width_type"},
+    {"q", "Q-Factor", 0, AV_OPT_TYPE_CONST, {.i64=QFACTOR}, 0, 0, FLAGS, "width_type"},
+    {"o", "octave", 0, AV_OPT_TYPE_CONST, {.i64=OCTAVE}, 0, 0, FLAGS, "width_type"},
+    {"s", "slope", 0, AV_OPT_TYPE_CONST, {.i64=SLOPE}, 0, 0, FLAGS, "width_type"},
+    {"k", "kHz", 0, AV_OPT_TYPE_CONST, {.i64=KHERTZ}, 0, 0, FLAGS, "width_type"},
+    {"width", "set shelf transition steep", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 99999, FLAGS},
+    {"w",     "set shelf transition steep", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 99999, FLAGS},
+    {"gain", "set gain", OFFSET(gain), AV_OPT_TYPE_DOUBLE, {.dbl=0}, -900, 900, FLAGS},
+    {"g",    "set gain", OFFSET(gain), AV_OPT_TYPE_DOUBLE, {.dbl=0}, -900, 900, FLAGS},
+    {"channels", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS},
+    {"c",        "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS},
+    {NULL}
+};
+
+DEFINE_BIQUAD_FILTER(highshelf, "Apply a high shelf filter.");
+#endif  /* CONFIG_HIGHSHELF_FILTER */
 #if CONFIG_BIQUAD_FILTER
 static const AVOption biquad_options[] = {
-    {"a0", NULL, OFFSET(a0), AV_OPT_TYPE_DOUBLE, {.dbl=1}, INT16_MIN, INT16_MAX, FLAGS},
-    {"a1", NULL, OFFSET(a1), AV_OPT_TYPE_DOUBLE, {.dbl=1}, INT16_MIN, INT16_MAX, FLAGS},
-    {"a2", NULL, OFFSET(a2), AV_OPT_TYPE_DOUBLE, {.dbl=1}, INT16_MIN, INT16_MAX, FLAGS},
-    {"b0", NULL, OFFSET(b0), AV_OPT_TYPE_DOUBLE, {.dbl=1}, INT16_MIN, INT16_MAX, FLAGS},
-    {"b1", NULL, OFFSET(b1), AV_OPT_TYPE_DOUBLE, {.dbl=1}, INT16_MIN, INT16_MAX, FLAGS},
-    {"b2", NULL, OFFSET(b2), AV_OPT_TYPE_DOUBLE, {.dbl=1}, INT16_MIN, INT16_MAX, FLAGS},
+    {"a0", NULL, OFFSET(a0), AV_OPT_TYPE_DOUBLE, {.dbl=1}, INT32_MIN, INT32_MAX, FLAGS},
+    {"a1", NULL, OFFSET(a1), AV_OPT_TYPE_DOUBLE, {.dbl=0}, INT32_MIN, INT32_MAX, FLAGS},
+    {"a2", NULL, OFFSET(a2), AV_OPT_TYPE_DOUBLE, {.dbl=0}, INT32_MIN, INT32_MAX, FLAGS},
+    {"b0", NULL, OFFSET(b0), AV_OPT_TYPE_DOUBLE, {.dbl=0}, INT32_MIN, INT32_MAX, FLAGS},
+    {"b1", NULL, OFFSET(b1), AV_OPT_TYPE_DOUBLE, {.dbl=0}, INT32_MIN, INT32_MAX, FLAGS},
+    {"b2", NULL, OFFSET(b2), AV_OPT_TYPE_DOUBLE, {.dbl=0}, INT32_MIN, INT32_MAX, FLAGS},
     {"channels", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS},
     {"c",        "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS},
     {NULL}

diff --git a/libavfilter/af_bs2b.c b/libavfilter/af_bs2b.c
index b7cfd38..c01b983 100644
--- a/libavfilter/af_bs2b.c
+++ b/libavfilter/af_bs2b.c

@@ -32,6 +32,8 @@
 #include "formats.h"
 #include "internal.h"
 
+typedef void (*filter_func)(t_bs2bdp bs2bdp, uint8_t *sample, int n);
+
 typedef struct Bs2bContext {
     const AVClass *class;
 
@@ -41,7 +43,7 @@
 
     t_bs2bdp bs2bp;
 
-    void (*filter)(t_bs2bdp bs2bdp, uint8_t *sample, int n);
+    filter_func filter;
 } Bs2bContext;
 
 #define OFFSET(x) offsetof(Bs2bContext, x)
@@ -133,7 +135,7 @@
     if (av_frame_is_writable(frame)) {
         out_frame = frame;
     } else {
-        out_frame = ff_get_audio_buffer(inlink, frame->nb_samples);
+        out_frame = ff_get_audio_buffer(outlink, frame->nb_samples);
         if (!out_frame) {
             av_frame_free(&frame);
             return AVERROR(ENOMEM);
@@ -165,19 +167,19 @@
 
     switch (inlink->format) {
     case AV_SAMPLE_FMT_U8:
-        bs2b->filter = bs2b_cross_feed_u8;
+        bs2b->filter = (filter_func) bs2b_cross_feed_u8;
         break;
     case AV_SAMPLE_FMT_S16:
-        bs2b->filter = (void*)bs2b_cross_feed_s16;
+        bs2b->filter = (filter_func) bs2b_cross_feed_s16;
         break;
     case AV_SAMPLE_FMT_S32:
-        bs2b->filter = (void*)bs2b_cross_feed_s32;
+        bs2b->filter = (filter_func) bs2b_cross_feed_s32;
         break;
     case AV_SAMPLE_FMT_FLT:
-        bs2b->filter = (void*)bs2b_cross_feed_f;
+        bs2b->filter = (filter_func) bs2b_cross_feed_f;
         break;
     case AV_SAMPLE_FMT_DBL:
-        bs2b->filter = (void*)bs2b_cross_feed_d;
+        bs2b->filter = (filter_func) bs2b_cross_feed_d;
         break;
     default:
         return AVERROR_BUG;

diff --git a/libavfilter/af_channelmap.c b/libavfilter/af_channelmap.c
index 7c2be95..285d76a 100644
--- a/libavfilter/af_channelmap.c
+++ b/libavfilter/af_channelmap.c

@@ -149,13 +149,6 @@
             else
                 mode = MAP_PAIR_STR_STR;
         }
-#if FF_API_OLD_FILTER_OPTS
-        if (strchr(mapping, ',')) {
-            av_log(ctx, AV_LOG_WARNING, "This syntax is deprecated, use "
-                   "'|' to separate the mappings.\n");
-            separator = ',';
-        }
-#endif
     }
 
     if (mode != MAP_NONE) {

diff --git a/libavfilter/af_channelsplit.c b/libavfilter/af_channelsplit.c
index 8c6b00f..821bb73 100644
--- a/libavfilter/af_channelsplit.c
+++ b/libavfilter/af_channelsplit.c

@@ -38,6 +38,9 @@
 
     uint64_t channel_layout;
     char    *channel_layout_str;
+    char    *channels_str;
+
+    int      map[64];
 } ChannelSplitContext;
 
 #define OFFSET(x) offsetof(ChannelSplitContext, x)
@@ -45,6 +48,7 @@
 #define F AV_OPT_FLAG_FILTERING_PARAM
 static const AVOption channelsplit_options[] = {
     { "channel_layout", "Input channel layout.", OFFSET(channel_layout_str), AV_OPT_TYPE_STRING, { .str = "stereo" }, .flags = A|F },
+    { "channels",        "Channels to extract.", OFFSET(channels_str),       AV_OPT_TYPE_STRING, { .str = "all" },    .flags = A|F },
     { NULL }
 };
 
@@ -53,8 +57,9 @@
 static av_cold int init(AVFilterContext *ctx)
 {
     ChannelSplitContext *s = ctx->priv;
+    uint64_t channel_layout;
     int nb_channels;
-    int ret = 0, i;
+    int all = 0, ret = 0, i;
 
     if (!(s->channel_layout = av_get_channel_layout(s->channel_layout_str))) {
         av_log(ctx, AV_LOG_ERROR, "Error parsing channel layout '%s'.\n",
@@ -63,14 +68,35 @@
         goto fail;
     }
 
-    nb_channels = av_get_channel_layout_nb_channels(s->channel_layout);
+
+    if (!strcmp(s->channels_str, "all")) {
+        nb_channels = av_get_channel_layout_nb_channels(s->channel_layout);
+        channel_layout = s->channel_layout;
+        all = 1;
+    } else {
+        if ((ret = av_get_extended_channel_layout(s->channels_str, &channel_layout, &nb_channels)) < 0)
+            return ret;
+    }
+
     for (i = 0; i < nb_channels; i++) {
-        uint64_t channel = av_channel_layout_extract_channel(s->channel_layout, i);
+        uint64_t channel = av_channel_layout_extract_channel(channel_layout, i);
         AVFilterPad pad  = { 0 };
 
         pad.type = AVMEDIA_TYPE_AUDIO;
         pad.name = av_get_channel_name(channel);
 
+        if (all) {
+            s->map[i] = i;
+        } else {
+            if ((ret = av_get_channel_layout_channel_index(s->channel_layout, channel)) < 0) {
+                av_log(ctx, AV_LOG_ERROR, "Channel name '%s' not present in channel layout '%s'.\n",
+                       av_get_channel_name(channel), s->channel_layout_str);
+                return ret;
+            }
+
+            s->map[i] = ret;
+        }
+
         if ((ret = ff_insert_outpad(ctx, i, &pad)) < 0) {
             return ret;
         }
@@ -96,7 +122,7 @@
 
     for (i = 0; i < ctx->nb_outputs; i++) {
         AVFilterChannelLayouts *out_layouts = NULL;
-        uint64_t channel = av_channel_layout_extract_channel(s->channel_layout, i);
+        uint64_t channel = av_channel_layout_extract_channel(s->channel_layout, s->map[i]);
 
         if ((ret = ff_add_channel_layout(&out_layouts, channel)) < 0 ||
             (ret = ff_channel_layouts_ref(out_layouts, &ctx->outputs[i]->in_channel_layouts)) < 0)
@@ -109,6 +135,7 @@
 static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
 {
     AVFilterContext *ctx = inlink->dst;
+    ChannelSplitContext *s = ctx->priv;
     int i, ret = 0;
 
     for (i = 0; i < ctx->nb_outputs; i++) {
@@ -119,9 +146,9 @@
             break;
         }
 
-        buf_out->data[0] = buf_out->extended_data[0] = buf_out->extended_data[i];
+        buf_out->data[0] = buf_out->extended_data[0] = buf_out->extended_data[s->map[i]];
         buf_out->channel_layout =
-            av_channel_layout_extract_channel(buf->channel_layout, i);
+            av_channel_layout_extract_channel(buf->channel_layout, s->map[i]);
         buf_out->channels = 1;
 
         ret = ff_filter_frame(ctx->outputs[i], buf_out);

diff --git a/libavfilter/af_chorus.c b/libavfilter/af_chorus.c
index 87c8290..29c47ab 100644
--- a/libavfilter/af_chorus.c
+++ b/libavfilter/af_chorus.c

@@ -247,7 +247,7 @@
     if (av_frame_is_writable(frame)) {
         out_frame = frame;
     } else {
-        out_frame = ff_get_audio_buffer(inlink, frame->nb_samples);
+        out_frame = ff_get_audio_buffer(ctx->outputs[0], frame->nb_samples);
         if (!out_frame) {
             av_frame_free(&frame);
             return AVERROR(ENOMEM);

diff --git a/libavfilter/af_compand.c b/libavfilter/af_compand.c
index 8589b1e..c138f0b 100644
--- a/libavfilter/af_compand.c
+++ b/libavfilter/af_compand.c

@@ -185,7 +185,7 @@
     if (av_frame_is_writable(frame)) {
         out_frame = frame;
     } else {
-        out_frame = ff_get_audio_buffer(inlink, nb_samples);
+        out_frame = ff_get_audio_buffer(ctx->outputs[0], nb_samples);
         if (!out_frame) {
             av_frame_free(&frame);
             return AVERROR(ENOMEM);
@@ -249,7 +249,7 @@
 
             if (count >= s->delay_samples) {
                 if (!out_frame) {
-                    out_frame = ff_get_audio_buffer(inlink, nb_samples - i);
+                    out_frame = ff_get_audio_buffer(ctx->outputs[0], nb_samples - i);
                     if (!out_frame) {
                         av_frame_free(&frame);
                         return AVERROR(ENOMEM);

diff --git a/libavfilter/af_compensationdelay.c b/libavfilter/af_compensationdelay.c
index d5a3484..05285cd 100644
--- a/libavfilter/af_compensationdelay.c
+++ b/libavfilter/af_compensationdelay.c

@@ -131,7 +131,7 @@
     AVFrame *out;
     int n, ch;
 
-    out = ff_get_audio_buffer(inlink, in->nb_samples);
+    out = ff_get_audio_buffer(ctx->outputs[0], in->nb_samples);
     if (!out) {
         av_frame_free(&in);
         return AVERROR(ENOMEM);

diff --git a/libavfilter/af_crossfeed.c b/libavfilter/af_crossfeed.c
index d3def92..a0af280 100644
--- a/libavfilter/af_crossfeed.c
+++ b/libavfilter/af_crossfeed.c

@@ -99,7 +99,7 @@
     if (av_frame_is_writable(in)) {
         out = in;
     } else {
-        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        out = ff_get_audio_buffer(outlink, in->nb_samples);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);

diff --git a/libavfilter/af_crystalizer.c b/libavfilter/af_crystalizer.c
index dec30aa..5b27e1f 100644
--- a/libavfilter/af_crystalizer.c
+++ b/libavfilter/af_crystalizer.c

@@ -173,7 +173,7 @@
 static int config_input(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->dst;
-    CrystalizerContext *s    = ctx->priv;
+    CrystalizerContext *s = ctx->priv;
 
     switch (inlink->format) {
     case AV_SAMPLE_FMT_FLT:  s->filter = filter_flt;  break;
@@ -203,7 +203,7 @@
     if (av_frame_is_writable(in)) {
         out = in;
     } else {
-        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        out = ff_get_audio_buffer(outlink, in->nb_samples);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);

diff --git a/libavfilter/af_drmeter.c b/libavfilter/af_drmeter.c
new file mode 100644
index 0000000..ecccb65
--- /dev/null
+++ b/libavfilter/af_drmeter.c

@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <float.h>
+
+#include "libavutil/ffmath.h"
+#include "libavutil/opt.h"
+#include "audio.h"
+#include "avfilter.h"
+#include "internal.h"
+
+typedef struct ChannelStats {
+    uint64_t nb_samples;
+    uint64_t blknum;
+    float peak;
+    float sum;
+    uint32_t peaks[10001];
+    uint32_t rms[10001];
+} ChannelStats;
+
+typedef struct DRMeterContext {
+    const AVClass *class;
+    ChannelStats *chstats;
+    int nb_channels;
+    uint64_t tc_samples;
+    double time_constant;
+} DRMeterContext;
+
+#define OFFSET(x) offsetof(DRMeterContext, x)
+#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption drmeter_options[] = {
+    { "length", "set the window length", OFFSET(time_constant), AV_OPT_TYPE_DOUBLE, {.dbl=3}, .01, 10, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(drmeter);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    DRMeterContext *s = outlink->src->priv;
+
+    s->chstats = av_calloc(sizeof(*s->chstats), outlink->channels);
+    if (!s->chstats)
+        return AVERROR(ENOMEM);
+    s->nb_channels = outlink->channels;
+    s->tc_samples = s->time_constant * outlink->sample_rate + .5;
+
+    return 0;
+}
+
+static void finish_block(ChannelStats *p)
+{
+    int peak_bin, rms_bin;
+    float peak, rms;
+
+    rms = sqrt(2 * p->sum / p->nb_samples);
+    peak = p->peak;
+    rms_bin = av_clip(rms * 10000, 0, 10000);
+    peak_bin = av_clip(peak * 10000, 0, 10000);
+    p->rms[rms_bin]++;
+    p->peaks[peak_bin]++;
+
+    p->peak = 0;
+    p->sum = 0;
+    p->nb_samples = 0;
+    p->blknum++;
+}
+
+static void update_stat(DRMeterContext *s, ChannelStats *p, float sample)
+{
+    if (p->nb_samples >= s->tc_samples) {
+        finish_block(p);
+    }
+
+    p->peak = FFMAX(FFABS(sample), p->peak);
+    p->sum += sample * sample;
+    p->nb_samples++;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
+{
+    DRMeterContext *s = inlink->dst->priv;
+    const int channels = s->nb_channels;
+    int i, c;
+
+    switch (inlink->format) {
+    case AV_SAMPLE_FMT_FLTP:
+        for (c = 0; c < channels; c++) {
+            ChannelStats *p = &s->chstats[c];
+            const float *src = (const float *)buf->extended_data[c];
+
+            for (i = 0; i < buf->nb_samples; i++, src++)
+                update_stat(s, p, *src);
+        }
+        break;
+    case AV_SAMPLE_FMT_FLT: {
+        const float *src = (const float *)buf->extended_data[0];
+
+        for (i = 0; i < buf->nb_samples; i++) {
+            for (c = 0; c < channels; c++, src++)
+                update_stat(s, &s->chstats[c], *src);
+        }}
+        break;
+    }
+
+    return ff_filter_frame(inlink->dst->outputs[0], buf);
+}
+
+#define SQR(a) ((a)*(a))
+
+static void print_stats(AVFilterContext *ctx)
+{
+    DRMeterContext *s = ctx->priv;
+    float dr = 0;
+    int ch;
+
+    for (ch = 0; ch < s->nb_channels; ch++) {
+        ChannelStats *p = &s->chstats[ch];
+        float chdr, secondpeak, rmssum = 0;
+        int i, j, first = 0;
+
+        finish_block(p);
+
+        for (i = 0; i <= 10000; i++) {
+            if (p->peaks[10000 - i]) {
+                if (first)
+                    break;
+                first = 1;
+            }
+        }
+
+        secondpeak = (10000 - i) / 10000.;
+
+        for (i = 10000, j = 0; i >= 0 && j < 0.2 * p->blknum; i--) {
+            if (p->rms[i]) {
+                rmssum += SQR(i / 10000.) * p->rms[i];
+                j += p->rms[i];
+            }
+        }
+
+        chdr = 20 * log10(secondpeak / sqrt(rmssum / (0.2 * p->blknum)));
+        dr += chdr;
+        av_log(ctx, AV_LOG_INFO, "Channel %d: DR: %.1f\n", ch + 1, chdr);
+    }
+
+    av_log(ctx, AV_LOG_INFO, "Overall DR: %.1f\n", dr / s->nb_channels);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    DRMeterContext *s = ctx->priv;
+
+    if (s->nb_channels)
+        print_stats(ctx);
+    av_freep(&s->chstats);
+}
+
+static const AVFilterPad drmeter_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad drmeter_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_drmeter = {
+    .name          = "drmeter",
+    .description   = NULL_IF_CONFIG_SMALL("Measure audio dynamic range."),
+    .query_formats = query_formats,
+    .priv_size     = sizeof(DRMeterContext),
+    .priv_class    = &drmeter_class,
+    .uninit        = uninit,
+    .inputs        = drmeter_inputs,
+    .outputs       = drmeter_outputs,
+};

diff --git a/libavfilter/af_earwax.c b/libavfilter/af_earwax.c
index 7b880c8..cdd2b4f 100644
--- a/libavfilter/af_earwax.c
+++ b/libavfilter/af_earwax.c

@@ -115,7 +115,7 @@
 {
     AVFilterLink *outlink = inlink->dst->outputs[0];
     int16_t *taps, *endin, *in, *out;
-    AVFrame *outsamples = ff_get_audio_buffer(inlink, insamples->nb_samples);
+    AVFrame *outsamples = ff_get_audio_buffer(outlink, insamples->nb_samples);
     int len;
 
     if (!outsamples) {

diff --git a/libavfilter/af_extrastereo.c b/libavfilter/af_extrastereo.c
index a746006..13c6f47 100644
--- a/libavfilter/af_extrastereo.c
+++ b/libavfilter/af_extrastereo.c

@@ -71,7 +71,7 @@
     if (av_frame_is_writable(in)) {
         out = in;
     } else {
-        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        out = ff_get_audio_buffer(outlink, in->nb_samples);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);
@@ -90,9 +90,12 @@
         right   = average + mult * (right - average);
 
         if (s->clip) {
-            dst[n * 2    ] = av_clipf(left,  -1, 1);
-            dst[n * 2 + 1] = av_clipf(right, -1, 1);
+            left  = av_clipf(left,  -1, 1);
+            right = av_clipf(right, -1, 1);
         }
+
+        dst[n * 2    ] = left;
+        dst[n * 2 + 1] = right;
     }
 
     if (out != in)

diff --git a/libavfilter/af_flanger.c b/libavfilter/af_flanger.c
index a92367c..b7497a1 100644
--- a/libavfilter/af_flanger.c
+++ b/libavfilter/af_flanger.c

@@ -148,7 +148,7 @@
     if (av_frame_is_writable(frame)) {
         out_frame = frame;
     } else {
-        out_frame = ff_get_audio_buffer(inlink, frame->nb_samples);
+        out_frame = ff_get_audio_buffer(ctx->outputs[0], frame->nb_samples);
         if (!out_frame) {
             av_frame_free(&frame);
             return AVERROR(ENOMEM);

diff --git a/libavfilter/af_haas.c b/libavfilter/af_haas.c
index 691c251..0cfc93a 100644
--- a/libavfilter/af_haas.c
+++ b/libavfilter/af_haas.c

@@ -144,7 +144,7 @@
     if (av_frame_is_writable(in)) {
         out = in;
     } else {
-        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        out = ff_get_audio_buffer(outlink, in->nb_samples);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);

diff --git a/libavfilter/af_headphone.c b/libavfilter/af_headphone.c
index 3b76d1d..760b97b 100644
--- a/libavfilter/af_headphone.c
+++ b/libavfilter/af_headphone.c

@@ -20,7 +20,6 @@
 
 #include <math.h>
 
-#include "libavutil/audio_fifo.h"
 #include "libavutil/avstring.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/float_dsp.h"
@@ -29,12 +28,16 @@
 #include "libavcodec/avfft.h"
 
 #include "avfilter.h"
+#include "filters.h"
 #include "internal.h"
 #include "audio.h"
 
 #define TIME_DOMAIN      0
 #define FREQUENCY_DOMAIN 1
 
+#define HRIR_STEREO 0
+#define HRIR_MULTI  1
+
 typedef struct HeadphoneContext {
     const AVClass *class;
 
@@ -45,7 +48,6 @@
 
     int have_hrirs;
     int eof_hrirs;
-    int64_t pts;
 
     int ir_len;
 
@@ -64,6 +66,7 @@
     int buffer_length;
     int n_fft;
     int size;
+    int hrir_fmt;
 
     int *delay[2];
     float *data_ir[2];
@@ -75,7 +78,6 @@
 
     AVFloatDSPContext *fdsp;
     struct headphone_inputs {
-        AVAudioFifo *fifo;
         AVFrame     *frame;
         int          ir_len;
         int          delay_l;
@@ -130,14 +132,18 @@
         char buf[8];
 
         p = NULL;
-        if (parse_channel_name(s, s->nb_inputs - 1, &arg, &out_ch_id, buf)) {
+        if (parse_channel_name(s, s->nb_irs, &arg, &out_ch_id, buf)) {
             av_log(ctx, AV_LOG_WARNING, "Failed to parse \'%s\' as channel name.\n", buf);
             continue;
         }
-        s->mapping[s->nb_inputs - 1] = out_ch_id;
-        s->nb_inputs++;
+        s->mapping[s->nb_irs] = out_ch_id;
+        s->nb_irs++;
     }
-    s->nb_irs = s->nb_inputs - 1;
+
+    if (s->hrir_fmt == HRIR_MULTI)
+        s->nb_inputs = 2;
+    else
+        s->nb_inputs = s->nb_irs + 1;
 
     av_free(args);
 }
@@ -320,21 +326,13 @@
     return 0;
 }
 
-static int read_ir(AVFilterLink *inlink, AVFrame *frame)
+static int check_ir(AVFilterLink *inlink, int input_number)
 {
     AVFilterContext *ctx = inlink->dst;
     HeadphoneContext *s = ctx->priv;
-    int ir_len, max_ir_len, input_number;
+    int ir_len, max_ir_len;
 
-    for (input_number = 0; input_number < s->nb_inputs; input_number++)
-        if (inlink == ctx->inputs[input_number])
-            break;
-
-    av_audio_fifo_write(s->in[input_number].fifo, (void **)frame->extended_data,
-                        frame->nb_samples);
-    av_frame_free(&frame);
-
-    ir_len = av_audio_fifo_size(s->in[input_number].fifo);
+    ir_len = ff_inlink_queued_samples(inlink);
     max_ir_len = 65536;
     if (ir_len > max_ir_len) {
         av_log(ctx, AV_LOG_ERROR, "Too big length of IRs: %d > %d.\n", ir_len, max_ir_len);
@@ -346,22 +344,19 @@
     return 0;
 }
 
-static int headphone_frame(HeadphoneContext *s, AVFilterLink *outlink)
+static int headphone_frame(HeadphoneContext *s, AVFrame *in, AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
-    AVFrame *in = s->in[0].frame;
     int n_clippings[2] = { 0 };
     ThreadData td;
     AVFrame *out;
 
-    av_audio_fifo_read(s->in[0].fifo, (void **)in->extended_data, s->size);
-
     out = ff_get_audio_buffer(outlink, in->nb_samples);
-    if (!out)
+    if (!out) {
+        av_frame_free(&in);
         return AVERROR(ENOMEM);
-    out->pts = s->pts;
-    if (s->pts != AV_NOPTS_VALUE)
-        s->pts += av_rescale_q(out->nb_samples, (AVRational){1, outlink->sample_rate}, outlink->time_base);
+    }
+    out->pts = in->pts;
 
     td.in = in; td.out = out; td.write = s->write;
     td.delay = s->delay; td.ir = s->data_ir; td.n_clippings = n_clippings;
@@ -380,6 +375,7 @@
                n_clippings[0] + n_clippings[1], out->nb_samples * 2);
     }
 
+    av_frame_free(&in);
     return ff_filter_frame(outlink, out);
 }
 
@@ -398,10 +394,10 @@
     float *data_ir_r = NULL;
     int offset = 0, ret = 0;
     int n_fft;
-    int i, j;
+    int i, j, k;
 
     s->buffer_length = 1 << (32 - ff_clz(s->ir_len));
-    s->n_fft = n_fft = 1 << (32 - ff_clz(s->ir_len + inlink->sample_rate));
+    s->n_fft = n_fft = 1 << (32 - ff_clz(s->ir_len + s->size));
 
     if (s->type == FREQUENCY_DOMAIN) {
         fft_in_l = av_calloc(n_fft, sizeof(*fft_in_l));
@@ -429,8 +425,8 @@
 
     s->data_ir[0] = av_calloc(FFALIGN(s->ir_len, 16), sizeof(float) * s->nb_irs);
     s->data_ir[1] = av_calloc(FFALIGN(s->ir_len, 16), sizeof(float) * s->nb_irs);
-    s->delay[0] = av_malloc_array(s->nb_irs, sizeof(float));
-    s->delay[1] = av_malloc_array(s->nb_irs, sizeof(float));
+    s->delay[0] = av_calloc(s->nb_irs, sizeof(float));
+    s->delay[1] = av_calloc(s->nb_irs, sizeof(float));
 
     if (s->type == TIME_DOMAIN) {
         s->ringbuffer[0] = av_calloc(s->buffer_length, sizeof(float) * nb_input_channels);
@@ -438,8 +434,8 @@
     } else {
         s->ringbuffer[0] = av_calloc(s->buffer_length, sizeof(float));
         s->ringbuffer[1] = av_calloc(s->buffer_length, sizeof(float));
-        s->temp_fft[0] = av_malloc_array(s->n_fft, sizeof(FFTComplex));
-        s->temp_fft[1] = av_malloc_array(s->n_fft, sizeof(FFTComplex));
+        s->temp_fft[0] = av_calloc(s->n_fft, sizeof(FFTComplex));
+        s->temp_fft[1] = av_calloc(s->n_fft, sizeof(FFTComplex));
         if (!s->temp_fft[0] || !s->temp_fft[1]) {
             ret = AVERROR(ENOMEM);
             goto fail;
@@ -452,19 +448,6 @@
         goto fail;
     }
 
-    s->in[0].frame = ff_get_audio_buffer(ctx->inputs[0], s->size);
-    if (!s->in[0].frame) {
-        ret = AVERROR(ENOMEM);
-        goto fail;
-    }
-    for (i = 0; i < s->nb_irs; i++) {
-        s->in[i + 1].frame = ff_get_audio_buffer(ctx->inputs[i + 1], s->ir_len);
-        if (!s->in[i + 1].frame) {
-            ret = AVERROR(ENOMEM);
-            goto fail;
-        }
-    }
-
     if (s->type == TIME_DOMAIN) {
         s->temp_src[0] = av_calloc(FFALIGN(ir_len, 16), sizeof(float));
         s->temp_src[1] = av_calloc(FFALIGN(ir_len, 16), sizeof(float));
@@ -476,68 +459,119 @@
             goto fail;
         }
     } else {
-        data_hrtf_l = av_malloc_array(n_fft, sizeof(*data_hrtf_l) * nb_irs);
-        data_hrtf_r = av_malloc_array(n_fft, sizeof(*data_hrtf_r) * nb_irs);
+        data_hrtf_l = av_calloc(n_fft, sizeof(*data_hrtf_l) * nb_irs);
+        data_hrtf_r = av_calloc(n_fft, sizeof(*data_hrtf_r) * nb_irs);
         if (!data_hrtf_r || !data_hrtf_l) {
             ret = AVERROR(ENOMEM);
             goto fail;
         }
     }
 
-    for (i = 0; i < s->nb_irs; i++) {
+    for (i = 0; i < s->nb_inputs - 1; i++) {
         int len = s->in[i + 1].ir_len;
         int delay_l = s->in[i + 1].delay_l;
         int delay_r = s->in[i + 1].delay_r;
-        int idx = -1;
         float *ptr;
 
-        for (j = 0; j < inlink->channels; j++) {
-            if (s->mapping[i] < 0) {
-                continue;
-            }
-
-            if ((av_channel_layout_extract_channel(inlink->channel_layout, j)) == (1LL << s->mapping[i])) {
-                idx = j;
-                break;
-            }
-        }
-        if (idx == -1)
-            continue;
-
-        av_audio_fifo_read(s->in[i + 1].fifo, (void **)s->in[i + 1].frame->extended_data, len);
+        ret = ff_inlink_consume_samples(ctx->inputs[i + 1], len, len, &s->in[i + 1].frame);
+        if (ret < 0)
+            return ret;
         ptr = (float *)s->in[i + 1].frame->extended_data[0];
 
-        if (s->type == TIME_DOMAIN) {
-            offset = idx * FFALIGN(len, 16);
-            for (j = 0; j < len; j++) {
-                data_ir_l[offset + j] = ptr[len * 2 - j * 2 - 2] * gain_lin;
-                data_ir_r[offset + j] = ptr[len * 2 - j * 2 - 1] * gain_lin;
+        if (s->hrir_fmt == HRIR_STEREO) {
+            int idx = -1;
+
+            for (j = 0; j < inlink->channels; j++) {
+                if (s->mapping[i] < 0) {
+                    continue;
+                }
+
+                if ((av_channel_layout_extract_channel(inlink->channel_layout, j)) == (1LL << s->mapping[i])) {
+                    idx = i;
+                    break;
+                }
+            }
+
+            if (idx == -1)
+                continue;
+            if (s->type == TIME_DOMAIN) {
+                offset = idx * FFALIGN(len, 16);
+                for (j = 0; j < len; j++) {
+                    data_ir_l[offset + j] = ptr[len * 2 - j * 2 - 2] * gain_lin;
+                    data_ir_r[offset + j] = ptr[len * 2 - j * 2 - 1] * gain_lin;
+                }
+            } else {
+                memset(fft_in_l, 0, n_fft * sizeof(*fft_in_l));
+                memset(fft_in_r, 0, n_fft * sizeof(*fft_in_r));
+
+                offset = idx * n_fft;
+                for (j = 0; j < len; j++) {
+                    fft_in_l[delay_l + j].re = ptr[j * 2    ] * gain_lin;
+                    fft_in_r[delay_r + j].re = ptr[j * 2 + 1] * gain_lin;
+                }
+
+                av_fft_permute(s->fft[0], fft_in_l);
+                av_fft_calc(s->fft[0], fft_in_l);
+                memcpy(data_hrtf_l + offset, fft_in_l, n_fft * sizeof(*fft_in_l));
+                av_fft_permute(s->fft[0], fft_in_r);
+                av_fft_calc(s->fft[0], fft_in_r);
+                memcpy(data_hrtf_r + offset, fft_in_r, n_fft * sizeof(*fft_in_r));
             }
         } else {
-            memset(fft_in_l, 0, n_fft * sizeof(*fft_in_l));
-            memset(fft_in_r, 0, n_fft * sizeof(*fft_in_r));
+            int I, N = ctx->inputs[1]->channels;
 
-            offset = idx * n_fft;
-            for (j = 0; j < len; j++) {
-                fft_in_l[delay_l + j].re = ptr[j * 2    ] * gain_lin;
-                fft_in_r[delay_r + j].re = ptr[j * 2 + 1] * gain_lin;
+            for (k = 0; k < N / 2; k++) {
+                int idx = -1;
+
+                for (j = 0; j < inlink->channels; j++) {
+                    if (s->mapping[k] < 0) {
+                        continue;
+                    }
+
+                    if ((av_channel_layout_extract_channel(inlink->channel_layout, j)) == (1LL << s->mapping[k])) {
+                        idx = k;
+                        break;
+                    }
+                }
+                if (idx == -1)
+                    continue;
+
+                I = idx * 2;
+                if (s->type == TIME_DOMAIN) {
+                    offset = idx * FFALIGN(len, 16);
+                    for (j = 0; j < len; j++) {
+                        data_ir_l[offset + j] = ptr[len * N - j * N - N + I    ] * gain_lin;
+                        data_ir_r[offset + j] = ptr[len * N - j * N - N + I + 1] * gain_lin;
+                    }
+                } else {
+                    memset(fft_in_l, 0, n_fft * sizeof(*fft_in_l));
+                    memset(fft_in_r, 0, n_fft * sizeof(*fft_in_r));
+
+                    offset = idx * n_fft;
+                    for (j = 0; j < len; j++) {
+                        fft_in_l[delay_l + j].re = ptr[j * N + I    ] * gain_lin;
+                        fft_in_r[delay_r + j].re = ptr[j * N + I + 1] * gain_lin;
+                    }
+
+                    av_fft_permute(s->fft[0], fft_in_l);
+                    av_fft_calc(s->fft[0], fft_in_l);
+                    memcpy(data_hrtf_l + offset, fft_in_l, n_fft * sizeof(*fft_in_l));
+                    av_fft_permute(s->fft[0], fft_in_r);
+                    av_fft_calc(s->fft[0], fft_in_r);
+                    memcpy(data_hrtf_r + offset, fft_in_r, n_fft * sizeof(*fft_in_r));
+                }
             }
-
-            av_fft_permute(s->fft[0], fft_in_l);
-            av_fft_calc(s->fft[0], fft_in_l);
-            memcpy(data_hrtf_l + offset, fft_in_l, n_fft * sizeof(*fft_in_l));
-            av_fft_permute(s->fft[0], fft_in_r);
-            av_fft_calc(s->fft[0], fft_in_r);
-            memcpy(data_hrtf_r + offset, fft_in_r, n_fft * sizeof(*fft_in_r));
         }
+
+        av_frame_free(&s->in[i + 1].frame);
     }
 
     if (s->type == TIME_DOMAIN) {
         memcpy(s->data_ir[0], data_ir_l, sizeof(float) * nb_irs * FFALIGN(ir_len, 16));
         memcpy(s->data_ir[1], data_ir_r, sizeof(float) * nb_irs * FFALIGN(ir_len, 16));
     } else {
-        s->data_hrtf[0] = av_malloc_array(n_fft * s->nb_irs, sizeof(FFTComplex));
-        s->data_hrtf[1] = av_malloc_array(n_fft * s->nb_irs, sizeof(FFTComplex));
+        s->data_hrtf[0] = av_calloc(n_fft * s->nb_irs, sizeof(FFTComplex));
+        s->data_hrtf[1] = av_calloc(n_fft * s->nb_irs, sizeof(FFTComplex));
         if (!s->data_hrtf[0] || !s->data_hrtf[1]) {
             ret = AVERROR(ENOMEM);
             goto fail;
@@ -565,19 +599,47 @@
     return ret;
 }
 
-static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+static int activate(AVFilterContext *ctx)
 {
-    AVFilterContext *ctx = inlink->dst;
     HeadphoneContext *s = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
     AVFilterLink *outlink = ctx->outputs[0];
-    int ret = 0;
+    AVFrame *in = NULL;
+    int i, ret;
 
-    av_audio_fifo_write(s->in[0].fifo, (void **)in->extended_data,
-                        in->nb_samples);
-    if (s->pts == AV_NOPTS_VALUE)
-        s->pts = in->pts;
+    FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx);
+    if (!s->eof_hrirs) {
+        for (i = 1; i < s->nb_inputs; i++) {
+            if (s->in[i].eof)
+                continue;
 
-    av_frame_free(&in);
+            if ((ret = check_ir(ctx->inputs[i], i)) < 0)
+                return ret;
+
+            if (!s->in[i].eof) {
+                if (ff_outlink_get_status(ctx->inputs[i]) == AVERROR_EOF)
+                    s->in[i].eof = 1;
+            }
+        }
+
+        for (i = 1; i < s->nb_inputs; i++) {
+            if (!s->in[i].eof)
+                break;
+        }
+
+        if (i != s->nb_inputs) {
+            if (ff_outlink_frame_wanted(ctx->outputs[0])) {
+                for (i = 1; i < s->nb_inputs; i++) {
+                    if (!s->in[i].eof)
+                        ff_inlink_request_frame(ctx->inputs[i]);
+                }
+            }
+
+            return 0;
+        } else {
+            s->eof_hrirs = 1;
+        }
+    }
 
     if (!s->have_hrirs && s->eof_hrirs) {
         ret = convert_coeffs(ctx, inlink);
@@ -585,14 +647,20 @@
             return ret;
     }
 
-    if (s->have_hrirs) {
-        while (av_audio_fifo_size(s->in[0].fifo) >= s->size) {
-            ret = headphone_frame(s, outlink);
-            if (ret < 0)
-                break;
-        }
+    if ((ret = ff_inlink_consume_samples(ctx->inputs[0], s->size, s->size, &in)) > 0) {
+        ret = headphone_frame(s, in, outlink);
+        if (ret < 0)
+            return ret;
     }
-    return ret;
+
+    if (ret < 0)
+        return ret;
+
+    FF_FILTER_FORWARD_STATUS(ctx->inputs[0], ctx->outputs[0]);
+    if (ff_outlink_frame_wanted(ctx->outputs[0]))
+        ff_inlink_request_frame(ctx->inputs[0]);
+
+    return 0;
 }
 
 static int query_formats(AVFilterContext *ctx)
@@ -600,6 +668,8 @@
     struct HeadphoneContext *s = ctx->priv;
     AVFilterFormats *formats = NULL;
     AVFilterChannelLayouts *layouts = NULL;
+    AVFilterChannelLayouts *stereo_layout = NULL;
+    AVFilterChannelLayouts *hrir_layouts = NULL;
     int ret, i;
 
     ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLT);
@@ -617,18 +687,26 @@
     if (ret)
         return ret;
 
-    layouts = NULL;
-    ret = ff_add_channel_layout(&layouts, AV_CH_LAYOUT_STEREO);
+    ret = ff_add_channel_layout(&stereo_layout, AV_CH_LAYOUT_STEREO);
     if (ret)
         return ret;
 
-    for (i = 1; i < s->nb_inputs; i++) {
-        ret = ff_channel_layouts_ref(layouts, &ctx->inputs[i]->out_channel_layouts);
+    if (s->hrir_fmt == HRIR_MULTI) {
+        hrir_layouts = ff_all_channel_counts();
+        if (!hrir_layouts)
+            ret = AVERROR(ENOMEM);
+        ret = ff_channel_layouts_ref(hrir_layouts, &ctx->inputs[1]->out_channel_layouts);
         if (ret)
             return ret;
+    } else {
+        for (i = 1; i < s->nb_inputs; i++) {
+            ret = ff_channel_layouts_ref(stereo_layout, &ctx->inputs[i]->out_channel_layouts);
+            if (ret)
+                return ret;
+        }
     }
 
-    ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts);
+    ret = ff_channel_layouts_ref(stereo_layout, &ctx->outputs[0]->in_channel_layouts);
     if (ret)
         return ret;
 
@@ -643,14 +721,8 @@
     AVFilterContext *ctx = inlink->dst;
     HeadphoneContext *s = ctx->priv;
 
-    if (s->type == FREQUENCY_DOMAIN) {
-        inlink->partial_buf_size =
-        inlink->min_samples =
-        inlink->max_samples = inlink->sample_rate;
-    }
-
     if (s->nb_irs < inlink->channels) {
-        av_log(ctx, AV_LOG_ERROR, "Number of inputs must be >= %d.\n", inlink->channels + 1);
+        av_log(ctx, AV_LOG_ERROR, "Number of HRIRs must be >= %d.\n", inlink->channels);
         return AVERROR(EINVAL);
     }
 
@@ -666,7 +738,6 @@
         .name         = "in0",
         .type         = AVMEDIA_TYPE_AUDIO,
         .config_props = config_input,
-        .filter_frame = filter_frame,
     };
     if ((ret = ff_insert_inpad(ctx, 0, &pad)) < 0)
         return ret;
@@ -687,7 +758,6 @@
         AVFilterPad pad = {
             .name         = name,
             .type         = AVMEDIA_TYPE_AUDIO,
-            .filter_frame = read_ir,
         };
         if (!name)
             return AVERROR(ENOMEM);
@@ -700,7 +770,6 @@
     s->fdsp = avpriv_float_dsp_alloc(0);
     if (!s->fdsp)
         return AVERROR(ENOMEM);
-    s->pts = AV_NOPTS_VALUE;
 
     return 0;
 }
@@ -710,45 +779,21 @@
     AVFilterContext *ctx = outlink->src;
     HeadphoneContext *s = ctx->priv;
     AVFilterLink *inlink = ctx->inputs[0];
-    int i;
 
-    if (s->type == TIME_DOMAIN)
-        s->size = 1024;
-    else
-        s->size = inlink->sample_rate;
+    if (s->hrir_fmt == HRIR_MULTI) {
+        AVFilterLink *hrir_link = ctx->inputs[1];
 
-    for (i = 0; i < s->nb_inputs; i++) {
-        s->in[i].fifo = av_audio_fifo_alloc(ctx->inputs[i]->format, ctx->inputs[i]->channels, 1024);
-        if (!s->in[i].fifo)
-            return AVERROR(ENOMEM);
+        if (hrir_link->channels < inlink->channels * 2) {
+            av_log(ctx, AV_LOG_ERROR, "Number of channels in HRIR stream must be >= %d.\n", inlink->channels * 2);
+            return AVERROR(EINVAL);
+        }
     }
+
     s->gain_lfe = expf((s->gain - 3 * inlink->channels - 6 + s->lfe_gain) / 20 * M_LN10);
 
     return 0;
 }
 
-static int request_frame(AVFilterLink *outlink)
-{
-    AVFilterContext *ctx = outlink->src;
-    HeadphoneContext *s = ctx->priv;
-    int i, ret;
-
-    for (i = 1; !s->eof_hrirs && i < s->nb_inputs; i++) {
-        if (!s->in[i].eof) {
-            ret = ff_request_frame(ctx->inputs[i]);
-            if (ret == AVERROR_EOF) {
-                s->in[i].eof = 1;
-                ret = 0;
-            }
-            return ret;
-        } else {
-            if (i == s->nb_inputs - 1)
-                s->eof_hrirs = 1;
-        }
-    }
-    return ff_request_frame(ctx->inputs[0]);
-}
-
 static av_cold void uninit(AVFilterContext *ctx)
 {
     HeadphoneContext *s = ctx->priv;
@@ -773,8 +818,6 @@
     av_freep(&s->fdsp);
 
     for (i = 0; i < s->nb_inputs; i++) {
-        av_frame_free(&s->in[i].frame);
-        av_audio_fifo_free(s->in[i].fifo);
         if (ctx->input_pads && i)
             av_freep(&ctx->input_pads[i].name);
     }
@@ -791,6 +834,10 @@
     { "type",      "set processing",                     OFFSET(type),     AV_OPT_TYPE_INT,    {.i64=1},       0,   1, .flags = FLAGS, "type" },
     { "time",      "time domain",                        0,                AV_OPT_TYPE_CONST,  {.i64=0},       0,   0, .flags = FLAGS, "type" },
     { "freq",      "frequency domain",                   0,                AV_OPT_TYPE_CONST,  {.i64=1},       0,   0, .flags = FLAGS, "type" },
+    { "size",      "set frame size",                     OFFSET(size),     AV_OPT_TYPE_INT,    {.i64=1024},1024,96000, .flags = FLAGS },
+    { "hrir",      "set hrir format",                    OFFSET(hrir_fmt), AV_OPT_TYPE_INT,    {.i64=HRIR_STEREO}, 0, 1, .flags = FLAGS, "hrir" },
+    { "stereo",    "hrir files have exactly 2 channels", 0,                AV_OPT_TYPE_CONST,  {.i64=HRIR_STEREO}, 0, 0, .flags = FLAGS, "hrir" },
+    { "multich",   "single multichannel hrir file",      0,                AV_OPT_TYPE_CONST,  {.i64=HRIR_MULTI},  0, 0, .flags = FLAGS, "hrir" },
     { NULL }
 };
 
@@ -801,7 +848,6 @@
         .name          = "default",
         .type          = AVMEDIA_TYPE_AUDIO,
         .config_props  = config_output,
-        .request_frame = request_frame,
     },
     { NULL }
 };
@@ -814,6 +860,7 @@
     .init          = init,
     .uninit        = uninit,
     .query_formats = query_formats,
+    .activate      = activate,
     .inputs        = NULL,
     .outputs       = outputs,
     .flags         = AVFILTER_FLAG_SLICE_THREADS | AVFILTER_FLAG_DYNAMIC_INPUTS,

diff --git a/libavfilter/af_join.c b/libavfilter/af_join.c
index f8af0a1..930c9e4 100644
--- a/libavfilter/af_join.c
+++ b/libavfilter/af_join.c

@@ -32,6 +32,7 @@
 #include "audio.h"
 #include "avfilter.h"
 #include "formats.h"
+#include "filters.h"
 #include "internal.h"
 
 typedef struct ChannelMap {
@@ -78,48 +79,12 @@
 
 AVFILTER_DEFINE_CLASS(join);
 
-static int try_push_frame(AVFilterContext *ctx);
-
-static int filter_frame(AVFilterLink *link, AVFrame *frame)
-{
-    AVFilterContext *ctx = link->dst;
-    JoinContext       *s = ctx->priv;
-    int i, j;
-
-    for (i = 0; i < ctx->nb_inputs; i++)
-        if (link == ctx->inputs[i])
-            break;
-    av_assert0(i < ctx->nb_inputs);
-    av_assert0(!s->input_frames[i]);
-    s->input_frames[i] = frame;
-
-    /* request the same number of samples on all inputs */
-    /* FIXME that means a frame arriving asynchronously on a different input
-       will not have the requested number of samples */
-    if (i == 0) {
-        int nb_samples = s->input_frames[0]->nb_samples;
-
-        for (j = 1; !i && j < ctx->nb_inputs; j++)
-            ctx->inputs[j]->request_samples = nb_samples;
-    }
-
-    return try_push_frame(ctx);
-}
-
 static int parse_maps(AVFilterContext *ctx)
 {
     JoinContext *s = ctx->priv;
     char separator = '|';
     char *cur      = s->map;
 
-#if FF_API_OLD_FILTER_OPTS
-    if (cur && strchr(cur, ',')) {
-        av_log(ctx, AV_LOG_WARNING, "This syntax is deprecated, use '|' to "
-               "separate the mappings.\n");
-        separator = ',';
-    }
-#endif
-
     while (cur && *cur) {
         char *sep, *next, *p;
         uint64_t in_channel = 0, out_channel = 0;
@@ -228,9 +193,6 @@
         pad.name           = av_strdup(name);
         if (!pad.name)
             return AVERROR(ENOMEM);
-        pad.filter_frame   = filter_frame;
-
-        pad.needs_fifo = 1;
 
         if ((ret = ff_insert_inpad(ctx, i, &pad)) < 0) {
             av_freep(&pad.name);
@@ -398,21 +360,6 @@
     return ret;
 }
 
-static int join_request_frame(AVFilterLink *outlink)
-{
-    AVFilterContext *ctx = outlink->src;
-    JoinContext *s       = ctx->priv;
-    int i;
-
-    /* get a frame on each input */
-    for (i = 0; i < ctx->nb_inputs; i++) {
-        AVFilterLink *inlink = ctx->inputs[i];
-        if (!s->input_frames[i])
-            return ff_request_frame(inlink);
-    }
-    return 0;
-}
-
 static int try_push_frame(AVFilterContext *ctx)
 {
     AVFilterLink *outlink = ctx->outputs[0];
@@ -428,6 +375,8 @@
             return 0;
         nb_samples = FFMIN(nb_samples, s->input_frames[i]->nb_samples);
     }
+    if (!nb_samples)
+        return 0;
 
     /* setup the output frame */
     frame = av_frame_alloc();
@@ -516,12 +465,63 @@
     return ret;
 }
 
+static int activate(AVFilterContext *ctx)
+{
+    JoinContext *s = ctx->priv;
+    int i, ret, status;
+    int nb_samples = 0;
+    int64_t pts;
+
+    FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx);
+
+    if (!s->input_frames[0]) {
+        ret = ff_inlink_consume_frame(ctx->inputs[0], &s->input_frames[0]);
+        if (ret < 0) {
+            return ret;
+        } else if (ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts)) {
+            ff_outlink_set_status(ctx->outputs[0], status, pts);
+            return 0;
+        } else {
+            if (ff_outlink_frame_wanted(ctx->outputs[0]) && !s->input_frames[0]) {
+                ff_inlink_request_frame(ctx->inputs[0]);
+                return 0;
+            }
+        }
+        if (!s->input_frames[0]) {
+            return 0;
+        }
+    }
+
+    nb_samples = s->input_frames[0]->nb_samples;
+
+    for (i = 1; i < ctx->nb_inputs && nb_samples > 0; i++) {
+        if (s->input_frames[i])
+            continue;
+
+        if (ff_inlink_check_available_samples(ctx->inputs[i], nb_samples) > 0) {
+            ret = ff_inlink_consume_samples(ctx->inputs[i], nb_samples, nb_samples, &s->input_frames[i]);
+            if (ret < 0) {
+                return ret;
+            } else if (ff_inlink_acknowledge_status(ctx->inputs[i], &status, &pts)) {
+                ff_outlink_set_status(ctx->outputs[0], status, pts);
+                return 0;
+            }
+        } else {
+            if (ff_outlink_frame_wanted(ctx->outputs[0])) {
+                ff_inlink_request_frame(ctx->inputs[i]);
+                return 0;
+            }
+        }
+    }
+
+    return try_push_frame(ctx);
+}
+
 static const AVFilterPad avfilter_af_join_outputs[] = {
     {
         .name          = "default",
         .type          = AVMEDIA_TYPE_AUDIO,
         .config_props  = join_config_output,
-        .request_frame = join_request_frame,
     },
     { NULL }
 };
@@ -534,6 +534,7 @@
     .priv_class     = &join_class,
     .init           = join_init,
     .uninit         = join_uninit,
+    .activate       = activate,
     .query_formats  = join_query_formats,
     .inputs         = NULL,
     .outputs        = avfilter_af_join_outputs,

diff --git a/libavfilter/af_ladspa.c b/libavfilter/af_ladspa.c
index 5532dac..3be26bc 100644
--- a/libavfilter/af_ladspa.c
+++ b/libavfilter/af_ladspa.c

@@ -318,8 +318,6 @@
 
         ret = 0;
     } else {
-        LADSPAContext *s = ctx->priv;
-
         outlink->sample_rate = s->sample_rate;
         outlink->time_base   = (AVRational){1, s->sample_rate};
 

diff --git a/libavfilter/af_loudnorm.c b/libavfilter/af_loudnorm.c
index e3e815e..314b25f 100644
--- a/libavfilter/af_loudnorm.c
+++ b/libavfilter/af_loudnorm.c

@@ -423,7 +423,7 @@
     if (av_frame_is_writable(in)) {
         out = in;
     } else {
-        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        out = ff_get_audio_buffer(outlink, in->nb_samples);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);
@@ -431,6 +431,9 @@
         av_frame_copy_props(out, in);
     }
 
+    if (s->pts == AV_NOPTS_VALUE)
+        s->pts = in->pts;
+
     out->pts = s->pts;
     src = (const double *)in->data[0];
     dst = (double *)out->data[0];
@@ -763,7 +766,7 @@
         inlink->partial_buf_size = frame_size(inlink->sample_rate, 3000);
     }
 
-    s->pts =
+    s->pts = AV_NOPTS_VALUE;
     s->buf_index =
     s->prev_buf_index =
     s->limiter_buf_index = 0;

diff --git a/libavfilter/af_lv2.c b/libavfilter/af_lv2.c
new file mode 100644
index 0000000..8a0a6fd
--- /dev/null
+++ b/libavfilter/af_lv2.c

@@ -0,0 +1,602 @@
+/*
+ * Copyright (c) 2017 Paul B Mahol
+ * Copyright (c) 2007-2016 David Robillard <http://drobilla.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * LV2 wrapper
+ */
+
+#include <lilv/lilv.h>
+#include <lv2/lv2plug.in/ns/ext/atom/atom.h>
+#include <lv2/lv2plug.in/ns/ext/buf-size/buf-size.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/opt.h"
+#include "audio.h"
+#include "avfilter.h"
+#include "internal.h"
+
+typedef struct URITable {
+    char    **uris;
+    size_t    n_uris;
+} URITable;
+
+typedef struct LV2Context {
+    const AVClass *class;
+    char *plugin_uri;
+    char *options;
+
+    unsigned nb_inputs;
+    unsigned nb_inputcontrols;
+    unsigned nb_outputs;
+
+    int sample_rate;
+    int nb_samples;
+    int64_t pts;
+    int64_t duration;
+
+    LilvWorld         *world;
+    const LilvPlugin  *plugin;
+    uint32_t           nb_ports;
+    float             *values;
+    URITable           uri_table;
+    LV2_URID_Map       map;
+    LV2_Feature        map_feature;
+    LV2_URID_Unmap     unmap;
+    LV2_Feature        unmap_feature;
+    LV2_Atom_Sequence  seq_in[2];
+    LV2_Atom_Sequence *seq_out;
+    const LV2_Feature *features[5];
+
+    float *mins;
+    float *maxes;
+    float *controls;
+
+    LilvInstance *instance;
+
+    LilvNode  *atom_AtomPort;
+    LilvNode  *atom_Sequence;
+    LilvNode  *lv2_AudioPort;
+    LilvNode  *lv2_CVPort;
+    LilvNode  *lv2_ControlPort;
+    LilvNode  *lv2_Optional;
+    LilvNode  *lv2_InputPort;
+    LilvNode  *lv2_OutputPort;
+    LilvNode  *urid_map;
+    LilvNode  *powerOf2BlockLength;
+    LilvNode  *fixedBlockLength;
+    LilvNode  *boundedBlockLength;
+} LV2Context;
+
+#define OFFSET(x) offsetof(LV2Context, x)
+#define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption lv2_options[] = {
+    { "plugin", "set plugin uri", OFFSET(plugin_uri), AV_OPT_TYPE_STRING, .flags = FLAGS },
+    { "p",      "set plugin uri", OFFSET(plugin_uri), AV_OPT_TYPE_STRING, .flags = FLAGS },
+    { "controls", "set plugin options", OFFSET(options), AV_OPT_TYPE_STRING, .flags = FLAGS },
+    { "c",        "set plugin options", OFFSET(options), AV_OPT_TYPE_STRING, .flags = FLAGS },
+    { "sample_rate", "set sample rate", OFFSET(sample_rate), AV_OPT_TYPE_INT, {.i64=44100}, 1, INT32_MAX, FLAGS },
+    { "s",           "set sample rate", OFFSET(sample_rate), AV_OPT_TYPE_INT, {.i64=44100}, 1, INT32_MAX, FLAGS },
+    { "nb_samples", "set the number of samples per requested frame", OFFSET(nb_samples), AV_OPT_TYPE_INT, {.i64=1024}, 1, INT_MAX, FLAGS },
+    { "n",          "set the number of samples per requested frame", OFFSET(nb_samples), AV_OPT_TYPE_INT, {.i64=1024}, 1, INT_MAX, FLAGS },
+    { "duration", "set audio duration", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64=-1}, -1, INT64_MAX, FLAGS },
+    { "d",        "set audio duration", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64=-1}, -1, INT64_MAX, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(lv2);
+
+static void uri_table_init(URITable *table)
+{
+    table->uris   = NULL;
+    table->n_uris = 0;
+}
+
+static void uri_table_destroy(URITable *table)
+{
+    int i;
+
+    for (i = 0; i < table->n_uris; i++) {
+        av_freep(&table->uris[i]);
+    }
+
+    av_freep(&table->uris);
+}
+
+static LV2_URID uri_table_map(LV2_URID_Map_Handle handle, const char *uri)
+{
+    URITable *table = (URITable*)handle;
+    const size_t len = strlen(uri);
+    size_t i;
+    char **tmp;
+
+    for (i = 0; i < table->n_uris; i++) {
+        if (!strcmp(table->uris[i], uri)) {
+            return i + 1;
+        }
+    }
+
+    tmp = av_calloc(table->n_uris + 1, sizeof(char*));
+    if (!tmp)
+        return table->n_uris;
+    memcpy(tmp, table->uris, table->n_uris * sizeof(char**));
+
+    av_free(table->uris);
+    table->uris = tmp;
+    table->uris[table->n_uris] = av_malloc(len + 1);
+    if (!table->uris[table->n_uris])
+        return table->n_uris;
+
+    memcpy(table->uris[table->n_uris], uri, len + 1);
+    table->n_uris++;
+
+    return table->n_uris;
+}
+
+static const char *uri_table_unmap(LV2_URID_Map_Handle handle, LV2_URID urid)
+{
+    URITable *table = (URITable*)handle;
+
+    if (urid > 0 && urid <= table->n_uris) {
+        return table->uris[urid - 1];
+    }
+
+    return NULL;
+}
+
+static void connect_ports(LV2Context *s, AVFrame *in, AVFrame *out)
+{
+    int ich = 0, och = 0, i;
+
+    for (i = 0; i < s->nb_ports; i++) {
+        const LilvPort *port = lilv_plugin_get_port_by_index(s->plugin, i);
+
+        if (lilv_port_is_a(s->plugin, port, s->lv2_AudioPort) ||
+            lilv_port_is_a(s->plugin, port, s->lv2_CVPort)) {
+            if (lilv_port_is_a(s->plugin, port, s->lv2_InputPort)) {
+                lilv_instance_connect_port(s->instance, i, in->extended_data[ich++]);
+            } else if (lilv_port_is_a(s->plugin, port, s->lv2_OutputPort)) {
+                lilv_instance_connect_port(s->instance, i, out->extended_data[och++]);
+            } else {
+                av_log(s, AV_LOG_WARNING, "port %d neither input nor output, skipping\n", i);
+            }
+        } else if (lilv_port_is_a(s->plugin, port, s->atom_AtomPort)) {
+            if (lilv_port_is_a(s->plugin, port, s->lv2_InputPort)) {
+                lilv_instance_connect_port(s->instance, i, &s->seq_in);
+            } else {
+                lilv_instance_connect_port(s->instance, i, s->seq_out);
+            }
+        } else if (lilv_port_is_a(s->plugin, port, s->lv2_ControlPort)) {
+            lilv_instance_connect_port(s->instance, i, &s->controls[i]);
+        }
+    }
+
+    s->seq_in[0].atom.size = sizeof(LV2_Atom_Sequence_Body);
+    s->seq_in[0].atom.type = uri_table_map(&s->uri_table, LV2_ATOM__Sequence);
+    s->seq_out->atom.size  = 9624;
+    s->seq_out->atom.type  = uri_table_map(&s->uri_table, LV2_ATOM__Chunk);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    LV2Context *s = ctx->priv;
+    AVFrame *out;
+
+    if (!s->nb_outputs ||
+        (av_frame_is_writable(in) && s->nb_inputs == s->nb_outputs)) {
+        out = in;
+    } else {
+        out = ff_get_audio_buffer(ctx->outputs[0], in->nb_samples);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+
+    connect_ports(s, in, out);
+
+    lilv_instance_run(s->instance, in->nb_samples);
+
+    if (out != in)
+        av_frame_free(&in);
+
+    return ff_filter_frame(ctx->outputs[0], out);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    LV2Context *s = ctx->priv;
+    AVFrame *out;
+    int64_t t;
+
+    if (ctx->nb_inputs)
+        return ff_request_frame(ctx->inputs[0]);
+
+    t = av_rescale(s->pts, AV_TIME_BASE, s->sample_rate);
+    if (s->duration >= 0 && t >= s->duration)
+        return AVERROR_EOF;
+
+    out = ff_get_audio_buffer(outlink, s->nb_samples);
+    if (!out)
+        return AVERROR(ENOMEM);
+
+    connect_ports(s, out, out);
+
+    lilv_instance_run(s->instance, out->nb_samples);
+
+    out->sample_rate = s->sample_rate;
+    out->pts         = s->pts;
+    s->pts          += s->nb_samples;
+
+    return ff_filter_frame(outlink, out);
+}
+
+static const LV2_Feature buf_size_features[3] = {
+    { LV2_BUF_SIZE__powerOf2BlockLength, NULL },
+    { LV2_BUF_SIZE__fixedBlockLength,    NULL },
+    { LV2_BUF_SIZE__boundedBlockLength,  NULL },
+};
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    LV2Context *s = ctx->priv;
+    char *p, *arg, *saveptr = NULL;
+    int i, sample_rate;
+
+    uri_table_init(&s->uri_table);
+    s->map.handle = &s->uri_table;
+    s->map.map = uri_table_map;
+    s->map_feature.URI = LV2_URID_MAP_URI;
+    s->map_feature.data = &s->map;
+    s->unmap.handle = &s->uri_table;
+    s->unmap.unmap  = uri_table_unmap;
+    s->unmap_feature.URI = LV2_URID_UNMAP_URI;
+    s->unmap_feature.data = &s->unmap;
+    s->features[0] = &s->map_feature;
+    s->features[1] = &s->unmap_feature;
+    s->features[2] = &buf_size_features[0];
+    s->features[3] = &buf_size_features[1];
+    s->features[4] = &buf_size_features[2];
+
+    if (ctx->nb_inputs) {
+        AVFilterLink *inlink = ctx->inputs[0];
+
+        outlink->format      = inlink->format;
+        outlink->sample_rate = sample_rate = inlink->sample_rate;
+        if (s->nb_inputs == s->nb_outputs) {
+            outlink->channel_layout = inlink->channel_layout;
+            outlink->channels = inlink->channels;
+        }
+
+    } else {
+        outlink->sample_rate = sample_rate = s->sample_rate;
+        outlink->time_base   = (AVRational){1, s->sample_rate};
+    }
+
+    s->instance = lilv_plugin_instantiate(s->plugin, sample_rate, s->features);
+    if (!s->instance) {
+        av_log(s, AV_LOG_ERROR, "Failed to instantiate <%s>\n", lilv_node_as_uri(lilv_plugin_get_uri(s->plugin)));
+        return AVERROR(EINVAL);
+    }
+
+    s->mins     = av_calloc(s->nb_ports, sizeof(float));
+    s->maxes    = av_calloc(s->nb_ports, sizeof(float));
+    s->controls = av_calloc(s->nb_ports, sizeof(float));
+
+    if (!s->mins || !s->maxes || !s->controls)
+        return AVERROR(ENOMEM);
+
+    lilv_plugin_get_port_ranges_float(s->plugin, s->mins, s->maxes, s->controls);
+    s->seq_out = av_malloc(sizeof(LV2_Atom_Sequence) + 9624);
+    if (!s->seq_out)
+        return AVERROR(ENOMEM);
+
+    if (s->options && !strcmp(s->options, "help")) {
+        if (!s->nb_inputcontrols) {
+            av_log(ctx, AV_LOG_INFO,
+                   "The '%s' plugin does not have any input controls.\n",
+                   s->plugin_uri);
+        } else {
+            av_log(ctx, AV_LOG_INFO,
+                   "The '%s' plugin has the following input controls:\n",
+                   s->plugin_uri);
+            for (i = 0; i < s->nb_ports; i++) {
+                const LilvPort *port = lilv_plugin_get_port_by_index(s->plugin, i);
+                const LilvNode *symbol = lilv_port_get_symbol(s->plugin, port);
+                LilvNode *name = lilv_port_get_name(s->plugin, port);
+
+                if (lilv_port_is_a(s->plugin, port, s->lv2_InputPort) &&
+                    lilv_port_is_a(s->plugin, port, s->lv2_ControlPort)) {
+                    av_log(ctx, AV_LOG_INFO, "%s\t\t<float> (from %f to %f) (default %f)\t\t%s\n",
+                           lilv_node_as_string(symbol), s->mins[i], s->maxes[i], s->controls[i],
+                           lilv_node_as_string(name));
+                }
+
+                lilv_node_free(name);
+            }
+        }
+        return AVERROR_EXIT;
+    }
+
+    p = s->options;
+    while (s->options) {
+        const LilvPort *port;
+        LilvNode *sym;
+        float val;
+        char *str, *vstr;
+        int index;
+
+        if (!(arg = av_strtok(p, " |", &saveptr)))
+            break;
+        p = NULL;
+
+        vstr = strstr(arg, "=");
+        if (vstr == NULL) {
+            av_log(ctx, AV_LOG_ERROR, "Invalid syntax.\n");
+            return AVERROR(EINVAL);
+        }
+
+        vstr[0] = 0;
+        str  = arg;
+        val  = atof(vstr+1);
+        sym  = lilv_new_string(s->world, str);
+        port = lilv_plugin_get_port_by_symbol(s->plugin, sym);
+        lilv_node_free(sym);
+        if (!port) {
+            av_log(s, AV_LOG_WARNING, "Unknown option: <%s>\n", str);
+        } else {
+            index = lilv_port_get_index(s->plugin, port);
+            s->controls[index] = val;
+        }
+    }
+
+    if (s->nb_inputs &&
+        (lilv_plugin_has_feature(s->plugin, s->powerOf2BlockLength) ||
+         lilv_plugin_has_feature(s->plugin, s->fixedBlockLength) ||
+         lilv_plugin_has_feature(s->plugin, s->boundedBlockLength))) {
+        AVFilterLink *inlink = ctx->inputs[0];
+
+        inlink->partial_buf_size = inlink->min_samples = inlink->max_samples = 4096;
+    }
+
+    return 0;
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    LV2Context *s = ctx->priv;
+    const LilvPlugins *plugins;
+    const LilvPlugin *plugin;
+    AVFilterPad pad = { NULL };
+    LilvNode *uri;
+    int i;
+
+    s->world = lilv_world_new();
+    if (!s->world)
+        return AVERROR(ENOMEM);
+
+    uri = lilv_new_uri(s->world, s->plugin_uri);
+    if (!uri) {
+        av_log(s, AV_LOG_ERROR, "Invalid plugin URI <%s>\n", s->plugin_uri);
+        return AVERROR(EINVAL);
+    }
+
+    lilv_world_load_all(s->world);
+    plugins = lilv_world_get_all_plugins(s->world);
+    plugin  = lilv_plugins_get_by_uri(plugins, uri);
+    lilv_node_free(uri);
+
+    if (!plugin) {
+        av_log(s, AV_LOG_ERROR, "Plugin <%s> not found\n", s->plugin_uri);
+        return AVERROR(EINVAL);
+    }
+
+    s->plugin = plugin;
+    s->nb_ports = lilv_plugin_get_num_ports(s->plugin);
+
+    s->lv2_InputPort       = lilv_new_uri(s->world, LV2_CORE__InputPort);
+    s->lv2_OutputPort      = lilv_new_uri(s->world, LV2_CORE__OutputPort);
+    s->lv2_AudioPort       = lilv_new_uri(s->world, LV2_CORE__AudioPort);
+    s->lv2_ControlPort     = lilv_new_uri(s->world, LV2_CORE__ControlPort);
+    s->lv2_Optional        = lilv_new_uri(s->world, LV2_CORE__connectionOptional);
+    s->atom_AtomPort       = lilv_new_uri(s->world, LV2_ATOM__AtomPort);
+    s->atom_Sequence       = lilv_new_uri(s->world, LV2_ATOM__Sequence);
+    s->urid_map            = lilv_new_uri(s->world, LV2_URID__map);
+    s->powerOf2BlockLength = lilv_new_uri(s->world, LV2_BUF_SIZE__powerOf2BlockLength);
+    s->fixedBlockLength    = lilv_new_uri(s->world, LV2_BUF_SIZE__fixedBlockLength);
+    s->boundedBlockLength  = lilv_new_uri(s->world, LV2_BUF_SIZE__boundedBlockLength);
+
+    for (i = 0; i < s->nb_ports; i++) {
+        const LilvPort *lport = lilv_plugin_get_port_by_index(s->plugin, i);
+        int is_input = 0;
+        int is_optional = 0;
+
+        is_optional = lilv_port_has_property(s->plugin, lport, s->lv2_Optional);
+
+        if (lilv_port_is_a(s->plugin, lport, s->lv2_InputPort)) {
+            is_input = 1;
+        } else if (!lilv_port_is_a(s->plugin, lport, s->lv2_OutputPort) && !is_optional) {
+            return AVERROR(EINVAL);
+        }
+
+        if (lilv_port_is_a(s->plugin, lport, s->lv2_ControlPort)) {
+            if (is_input) {
+                s->nb_inputcontrols++;
+            }
+        } else if (lilv_port_is_a(s->plugin, lport, s->lv2_AudioPort)) {
+            if (is_input) {
+                s->nb_inputs++;
+            } else {
+                s->nb_outputs++;
+            }
+        }
+    }
+
+    pad.type = AVMEDIA_TYPE_AUDIO;
+
+    if (s->nb_inputs) {
+        pad.name = av_asprintf("in0:%s:%u", s->plugin_uri, s->nb_inputs);
+        if (!pad.name)
+            return AVERROR(ENOMEM);
+
+        pad.filter_frame = filter_frame;
+        if (ff_insert_inpad(ctx, ctx->nb_inputs, &pad) < 0) {
+            av_freep(&pad.name);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    LV2Context *s = ctx->priv;
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    AVFilterLink *outlink = ctx->outputs[0];
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
+    int ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    if (s->nb_inputs) {
+        formats = ff_all_samplerates();
+        if (!formats)
+            return AVERROR(ENOMEM);
+
+        ret = ff_set_common_samplerates(ctx, formats);
+        if (ret < 0)
+            return ret;
+    } else {
+        int sample_rates[] = { s->sample_rate, -1 };
+
+        ret = ff_set_common_samplerates(ctx, ff_make_format_list(sample_rates));
+        if (ret < 0)
+            return ret;
+    }
+
+    if (s->nb_inputs == 2 && s->nb_outputs == 2) {
+        layouts = NULL;
+        ret = ff_add_channel_layout(&layouts, AV_CH_LAYOUT_STEREO);
+        if (ret < 0)
+            return ret;
+        ret = ff_set_common_channel_layouts(ctx, layouts);
+        if (ret < 0)
+            return ret;
+    } else {
+        if (s->nb_inputs >= 1) {
+            AVFilterLink *inlink = ctx->inputs[0];
+            uint64_t inlayout = FF_COUNT2LAYOUT(s->nb_inputs);
+
+            layouts = NULL;
+            ret = ff_add_channel_layout(&layouts, inlayout);
+            if (ret < 0)
+                return ret;
+            ret = ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts);
+            if (ret < 0)
+                return ret;
+
+            if (!s->nb_outputs) {
+                ret = ff_channel_layouts_ref(layouts, &outlink->in_channel_layouts);
+                if (ret < 0)
+                    return ret;
+            }
+        }
+
+        if (s->nb_outputs >= 1) {
+            uint64_t outlayout = FF_COUNT2LAYOUT(s->nb_outputs);
+
+            layouts = NULL;
+            ret = ff_add_channel_layout(&layouts, outlayout);
+            if (ret < 0)
+                return ret;
+            ret = ff_channel_layouts_ref(layouts, &outlink->in_channel_layouts);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    LV2Context *s = ctx->priv;
+
+    lilv_node_free(s->powerOf2BlockLength);
+    lilv_node_free(s->fixedBlockLength);
+    lilv_node_free(s->boundedBlockLength);
+    lilv_node_free(s->urid_map);
+    lilv_node_free(s->atom_Sequence);
+    lilv_node_free(s->atom_AtomPort);
+    lilv_node_free(s->lv2_Optional);
+    lilv_node_free(s->lv2_ControlPort);
+    lilv_node_free(s->lv2_AudioPort);
+    lilv_node_free(s->lv2_OutputPort);
+    lilv_node_free(s->lv2_InputPort);
+    uri_table_destroy(&s->uri_table);
+    lilv_instance_free(s->instance);
+    lilv_world_free(s->world);
+    av_freep(&s->mins);
+    av_freep(&s->maxes);
+    av_freep(&s->controls);
+    av_freep(&s->seq_out);
+
+    if (ctx->nb_inputs)
+        av_freep(&ctx->input_pads[0].name);
+}
+
+static const AVFilterPad lv2_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .config_props  = config_output,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_lv2 = {
+    .name          = "lv2",
+    .description   = NULL_IF_CONFIG_SMALL("Apply LV2 effect."),
+    .priv_size     = sizeof(LV2Context),
+    .priv_class    = &lv2_class,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = 0,
+    .outputs       = lv2_outputs,
+    .flags         = AVFILTER_FLAG_DYNAMIC_INPUTS,
+};

diff --git a/libavfilter/af_mcompand.c b/libavfilter/af_mcompand.c
new file mode 100644
index 0000000..f142573
--- /dev/null
+++ b/libavfilter/af_mcompand.c

@@ -0,0 +1,689 @@
+/*
+ * COpyright (c) 2002 Daniel Pouzzner
+ * Copyright (c) 1999 Chris Bagwell
+ * Copyright (c) 1999 Nick Bailey
+ * Copyright (c) 2007 Rob Sykes <robs@users.sourceforge.net>
+ * Copyright (c) 2013 Paul B Mahol
+ * Copyright (c) 2014 Andrew Kelley
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * audio multiband compand filter
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/ffmath.h"
+#include "libavutil/opt.h"
+#include "libavutil/samplefmt.h"
+#include "audio.h"
+#include "avfilter.h"
+#include "internal.h"
+
+typedef struct CompandSegment {
+    double x, y;
+    double a, b;
+} CompandSegment;
+
+typedef struct CompandT {
+    CompandSegment *segments;
+    int nb_segments;
+    double in_min_lin;
+    double out_min_lin;
+    double curve_dB;
+    double gain_dB;
+} CompandT;
+
+#define N 4
+
+typedef struct PrevCrossover {
+    double in;
+    double out_low;
+    double out_high;
+} PrevCrossover[N * 2];
+
+typedef struct Crossover {
+  PrevCrossover *previous;
+  size_t         pos;
+  double         coefs[3 *(N+1)];
+} Crossover;
+
+typedef struct CompBand {
+    CompandT transfer_fn;
+    double *attack_rate;
+    double *decay_rate;
+    double *volume;
+    double delay;
+    double topfreq;
+    Crossover filter;
+    AVFrame *delay_buf;
+    size_t delay_size;
+    ptrdiff_t delay_buf_ptr;
+    size_t delay_buf_cnt;
+} CompBand;
+
+typedef struct MCompandContext {
+    const AVClass *class;
+
+    char *args;
+
+    int nb_bands;
+    CompBand *bands;
+    AVFrame *band_buf1, *band_buf2, *band_buf3;
+    int band_samples;
+    size_t delay_buf_size;
+} MCompandContext;
+
+#define OFFSET(x) offsetof(MCompandContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption mcompand_options[] = {
+    { "args", "set parameters for each band", OFFSET(args), AV_OPT_TYPE_STRING, { .str = "0.005,0.1 6 -47/-40,-34/-34,-17/-33 100 | 0.003,0.05 6 -47/-40,-34/-34,-17/-33 400 | 0.000625,0.0125 6 -47/-40,-34/-34,-15/-33 1600 | 0.0001,0.025 6 -47/-40,-34/-34,-31/-31,-0/-30 6400 | 0,0.025 6 -38/-31,-28/-28,-0/-25 22000" }, 0, 0, A },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(mcompand);
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    MCompandContext *s = ctx->priv;
+    int i;
+
+    av_frame_free(&s->band_buf1);
+    av_frame_free(&s->band_buf2);
+    av_frame_free(&s->band_buf3);
+
+    if (s->bands) {
+        for (i = 0; i < s->nb_bands; i++) {
+            av_freep(&s->bands[i].attack_rate);
+            av_freep(&s->bands[i].decay_rate);
+            av_freep(&s->bands[i].volume);
+            av_freep(&s->bands[i].transfer_fn.segments);
+            av_freep(&s->bands[i].filter.previous);
+            av_frame_free(&s->bands[i].delay_buf);
+        }
+    }
+    av_freep(&s->bands);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterChannelLayouts *layouts;
+    AVFilterFormats *formats;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBLP,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static void count_items(char *item_str, int *nb_items, char delimiter)
+{
+    char *p;
+
+    *nb_items = 1;
+    for (p = item_str; *p; p++) {
+        if (*p == delimiter)
+            (*nb_items)++;
+    }
+}
+
+static void update_volume(CompBand *cb, double in, int ch)
+{
+    double delta = in - cb->volume[ch];
+
+    if (delta > 0.0)
+        cb->volume[ch] += delta * cb->attack_rate[ch];
+    else
+        cb->volume[ch] += delta * cb->decay_rate[ch];
+}
+
+static double get_volume(CompandT *s, double in_lin)
+{
+    CompandSegment *cs;
+    double in_log, out_log;
+    int i;
+
+    if (in_lin <= s->in_min_lin)
+        return s->out_min_lin;
+
+    in_log = log(in_lin);
+
+    for (i = 1; i < s->nb_segments; i++)
+        if (in_log <= s->segments[i].x)
+            break;
+    cs = &s->segments[i - 1];
+    in_log -= cs->x;
+    out_log = cs->y + in_log * (cs->a * in_log + cs->b);
+
+    return exp(out_log);
+}
+
+static int parse_points(char *points, int nb_points, double radius,
+                        CompandT *s, AVFilterContext *ctx)
+{
+    int new_nb_items, num;
+    char *saveptr = NULL;
+    char *p = points;
+    int i;
+
+#define S(x) s->segments[2 * ((x) + 1)]
+    for (i = 0, new_nb_items = 0; i < nb_points; i++) {
+        char *tstr = av_strtok(p, ",", &saveptr);
+        p = NULL;
+        if (!tstr || sscanf(tstr, "%lf/%lf", &S(i).x, &S(i).y) != 2) {
+            av_log(ctx, AV_LOG_ERROR,
+                    "Invalid and/or missing input/output value.\n");
+            return AVERROR(EINVAL);
+        }
+        if (i && S(i - 1).x > S(i).x) {
+            av_log(ctx, AV_LOG_ERROR,
+                    "Transfer function input values must be increasing.\n");
+            return AVERROR(EINVAL);
+        }
+        S(i).y -= S(i).x;
+        av_log(ctx, AV_LOG_DEBUG, "%d: x=%f y=%f\n", i, S(i).x, S(i).y);
+        new_nb_items++;
+    }
+    num = new_nb_items;
+
+    /* Add 0,0 if necessary */
+    if (num == 0 || S(num - 1).x)
+        num++;
+
+#undef S
+#define S(x) s->segments[2 * (x)]
+    /* Add a tail off segment at the start */
+    S(0).x = S(1).x - 2 * s->curve_dB;
+    S(0).y = S(1).y;
+    num++;
+
+    /* Join adjacent colinear segments */
+    for (i = 2; i < num; i++) {
+        double g1 = (S(i - 1).y - S(i - 2).y) * (S(i - 0).x - S(i - 1).x);
+        double g2 = (S(i - 0).y - S(i - 1).y) * (S(i - 1).x - S(i - 2).x);
+        int j;
+
+        if (fabs(g1 - g2))
+            continue;
+        num--;
+        for (j = --i; j < num; j++)
+            S(j) = S(j + 1);
+    }
+
+    for (i = 0; i < s->nb_segments; i += 2) {
+        s->segments[i].y += s->gain_dB;
+        s->segments[i].x *= M_LN10 / 20;
+        s->segments[i].y *= M_LN10 / 20;
+    }
+
+#define L(x) s->segments[i - (x)]
+    for (i = 4; i < s->nb_segments; i += 2) {
+        double x, y, cx, cy, in1, in2, out1, out2, theta, len, r;
+
+        L(4).a = 0;
+        L(4).b = (L(2).y - L(4).y) / (L(2).x - L(4).x);
+
+        L(2).a = 0;
+        L(2).b = (L(0).y - L(2).y) / (L(0).x - L(2).x);
+
+        theta = atan2(L(2).y - L(4).y, L(2).x - L(4).x);
+        len = hypot(L(2).x - L(4).x, L(2).y - L(4).y);
+        r = FFMIN(radius, len);
+        L(3).x = L(2).x - r * cos(theta);
+        L(3).y = L(2).y - r * sin(theta);
+
+        theta = atan2(L(0).y - L(2).y, L(0).x - L(2).x);
+        len = hypot(L(0).x - L(2).x, L(0).y - L(2).y);
+        r = FFMIN(radius, len / 2);
+        x = L(2).x + r * cos(theta);
+        y = L(2).y + r * sin(theta);
+
+        cx = (L(3).x + L(2).x + x) / 3;
+        cy = (L(3).y + L(2).y + y) / 3;
+
+        L(2).x = x;
+        L(2).y = y;
+
+        in1  = cx - L(3).x;
+        out1 = cy - L(3).y;
+        in2  = L(2).x - L(3).x;
+        out2 = L(2).y - L(3).y;
+        L(3).a = (out2 / in2 - out1 / in1) / (in2 - in1);
+        L(3).b = out1 / in1 - L(3).a * in1;
+    }
+    L(3).x = 0;
+    L(3).y = L(2).y;
+
+    s->in_min_lin  = exp(s->segments[1].x);
+    s->out_min_lin = exp(s->segments[1].y);
+
+    return 0;
+}
+
+static void square_quadratic(double const *x, double *y)
+{
+    y[0] = x[0] * x[0];
+    y[1] = 2 * x[0] * x[1];
+    y[2] = 2 * x[0] * x[2] + x[1] * x[1];
+    y[3] = 2 * x[1] * x[2];
+    y[4] = x[2] * x[2];
+}
+
+static int crossover_setup(AVFilterLink *outlink, Crossover *p, double frequency)
+{
+    double w0 = 2 * M_PI * frequency / outlink->sample_rate;
+    double Q = sqrt(.5), alpha = sin(w0) / (2*Q);
+    double x[9], norm;
+    int i;
+
+    if (w0 > M_PI)
+        return AVERROR(EINVAL);
+
+    x[0] =  (1 - cos(w0))/2;           /* Cf. filter_LPF in biquads.c */
+    x[1] =   1 - cos(w0);
+    x[2] =  (1 - cos(w0))/2;
+    x[3] =  (1 + cos(w0))/2;           /* Cf. filter_HPF in biquads.c */
+    x[4] = -(1 + cos(w0));
+    x[5] =  (1 + cos(w0))/2;
+    x[6] =   1 + alpha;
+    x[7] =  -2*cos(w0);
+    x[8] =   1 - alpha;
+
+    for (norm = x[6], i = 0; i < 9; ++i)
+        x[i] /= norm;
+
+    square_quadratic(x    , p->coefs);
+    square_quadratic(x + 3, p->coefs + 5);
+    square_quadratic(x + 6, p->coefs + 10);
+
+    p->previous = av_calloc(outlink->channels, sizeof(*p->previous));
+    if (!p->previous)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx  = outlink->src;
+    MCompandContext *s    = ctx->priv;
+    int ret, ch, i, k, new_nb_items, nb_bands;
+    char *p = s->args, *saveptr = NULL;
+    int max_delay_size = 0;
+
+    count_items(s->args, &nb_bands, '|');
+    s->nb_bands = FFMAX(1, nb_bands);
+
+    s->bands = av_calloc(nb_bands, sizeof(*s->bands));
+    if (!s->bands)
+        return AVERROR(ENOMEM);
+
+    for (i = 0, new_nb_items = 0; i < nb_bands; i++) {
+        int nb_points, nb_attacks, nb_items = 0;
+        char *tstr2, *tstr = av_strtok(p, "|", &saveptr);
+        char *p2, *p3, *saveptr2 = NULL, *saveptr3 = NULL;
+        double radius;
+
+        if (!tstr) {
+            uninit(ctx);
+            return AVERROR(EINVAL);
+        }
+        p = NULL;
+
+        p2 = tstr;
+        count_items(tstr, &nb_items, ' ');
+        tstr2 = av_strtok(p2, " ", &saveptr2);
+        if (!tstr2) {
+            av_log(ctx, AV_LOG_ERROR, "at least one attacks/decays rate is mandatory\n");
+            uninit(ctx);
+            return AVERROR(EINVAL);
+        }
+        p2 = NULL;
+        p3 = tstr2;
+
+        count_items(tstr2, &nb_attacks, ',');
+        if (!nb_attacks || nb_attacks & 1) {
+            av_log(ctx, AV_LOG_ERROR, "number of attacks rate plus decays rate must be even\n");
+            uninit(ctx);
+            return AVERROR(EINVAL);
+        }
+
+        s->bands[i].attack_rate = av_calloc(outlink->channels, sizeof(double));
+        s->bands[i].decay_rate = av_calloc(outlink->channels, sizeof(double));
+        s->bands[i].volume = av_calloc(outlink->channels, sizeof(double));
+        for (k = 0; k < FFMIN(nb_attacks / 2, outlink->channels); k++) {
+            char *tstr3 = av_strtok(p3, ",", &saveptr3);
+
+            p3 = NULL;
+            sscanf(tstr3, "%lf", &s->bands[i].attack_rate[k]);
+            tstr3 = av_strtok(p3, ",", &saveptr3);
+            sscanf(tstr3, "%lf", &s->bands[i].decay_rate[k]);
+
+            if (s->bands[i].attack_rate[k] > 1.0 / outlink->sample_rate) {
+                s->bands[i].attack_rate[k] = 1.0 - exp(-1.0 / (outlink->sample_rate * s->bands[i].attack_rate[k]));
+            } else {
+                s->bands[i].attack_rate[k] = 1.0;
+            }
+
+            if (s->bands[i].decay_rate[k] > 1.0 / outlink->sample_rate) {
+                s->bands[i].decay_rate[k] = 1.0 - exp(-1.0 / (outlink->sample_rate * s->bands[i].decay_rate[k]));
+            } else {
+                s->bands[i].decay_rate[k] = 1.0;
+            }
+        }
+
+        for (ch = k; ch < outlink->channels; ch++) {
+            s->bands[i].attack_rate[ch] = s->bands[i].attack_rate[k - 1];
+            s->bands[i].decay_rate[ch]  = s->bands[i].decay_rate[k - 1];
+        }
+
+        tstr2 = av_strtok(p2, " ", &saveptr2);
+        if (!tstr2) {
+            av_log(ctx, AV_LOG_ERROR, "transfer function curve in dB must be set\n");
+            uninit(ctx);
+            return AVERROR(EINVAL);
+        }
+        sscanf(tstr2, "%lf", &s->bands[i].transfer_fn.curve_dB);
+
+        radius = s->bands[i].transfer_fn.curve_dB * M_LN10 / 20.0;
+
+        tstr2 = av_strtok(p2, " ", &saveptr2);
+        if (!tstr2) {
+            av_log(ctx, AV_LOG_ERROR, "transfer points missing\n");
+            uninit(ctx);
+            return AVERROR(EINVAL);
+        }
+
+        count_items(tstr2, &nb_points, ',');
+        s->bands[i].transfer_fn.nb_segments = (nb_points + 4) * 2;
+        s->bands[i].transfer_fn.segments = av_calloc(s->bands[i].transfer_fn.nb_segments,
+                                                     sizeof(CompandSegment));
+        if (!s->bands[i].transfer_fn.segments) {
+            uninit(ctx);
+            return AVERROR(ENOMEM);
+        }
+
+        ret = parse_points(tstr2, nb_points, radius, &s->bands[i].transfer_fn, ctx);
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR, "transfer points parsing failed\n");
+            uninit(ctx);
+            return ret;
+        }
+
+        tstr2 = av_strtok(p2, " ", &saveptr2);
+        if (!tstr2) {
+            av_log(ctx, AV_LOG_ERROR, "crossover_frequency is missing\n");
+            uninit(ctx);
+            return AVERROR(EINVAL);
+        }
+
+        new_nb_items += sscanf(tstr2, "%lf", &s->bands[i].topfreq) == 1;
+        if (s->bands[i].topfreq < 0 || s->bands[i].topfreq >= outlink->sample_rate / 2) {
+            av_log(ctx, AV_LOG_ERROR, "crossover_frequency: %f, should be >=0 and lower than half of sample rate: %d.\n", s->bands[i].topfreq, outlink->sample_rate / 2);
+            uninit(ctx);
+            return AVERROR(EINVAL);
+        }
+
+        if (s->bands[i].topfreq != 0) {
+            ret = crossover_setup(outlink, &s->bands[i].filter, s->bands[i].topfreq);
+            if (ret < 0) {
+                uninit(ctx);
+                return ret;
+            }
+        }
+
+        tstr2 = av_strtok(p2, " ", &saveptr2);
+        if (tstr2) {
+            sscanf(tstr2, "%lf", &s->bands[i].delay);
+            max_delay_size = FFMAX(max_delay_size, s->bands[i].delay * outlink->sample_rate);
+
+            tstr2 = av_strtok(p2, " ", &saveptr2);
+            if (tstr2) {
+                double initial_volume;
+
+                sscanf(tstr2, "%lf", &initial_volume);
+                initial_volume = pow(10.0, initial_volume / 20);
+
+                for (k = 0; k < outlink->channels; k++) {
+                    s->bands[i].volume[k] = initial_volume;
+                }
+
+                tstr2 = av_strtok(p2, " ", &saveptr2);
+                if (tstr2) {
+                    sscanf(tstr2, "%lf", &s->bands[i].transfer_fn.gain_dB);
+                }
+            }
+        }
+    }
+    s->nb_bands = new_nb_items;
+
+    for (i = 0; max_delay_size > 0 && i < s->nb_bands; i++) {
+        s->bands[i].delay_buf = ff_get_audio_buffer(outlink, max_delay_size);
+        if (!s->bands[i].delay_buf)
+            return AVERROR(ENOMEM);
+    }
+    s->delay_buf_size = max_delay_size;
+
+    return 0;
+}
+
+#define CONVOLVE _ _ _ _
+
+static void crossover(int ch, Crossover *p,
+                      double *ibuf, double *obuf_low,
+                      double *obuf_high, size_t len)
+{
+    double out_low, out_high;
+
+    while (len--) {
+        p->pos = p->pos ? p->pos - 1 : N - 1;
+#define _ out_low += p->coefs[j] * p->previous[ch][p->pos + j].in \
+            - p->coefs[2*N+2 + j] * p->previous[ch][p->pos + j].out_low, j++;
+        {
+            int j = 1;
+            out_low = p->coefs[0] * *ibuf;
+            CONVOLVE
+            *obuf_low++ = out_low;
+        }
+#undef _
+#define _ out_high += p->coefs[j+N+1] * p->previous[ch][p->pos + j].in \
+            - p->coefs[2*N+2 + j] * p->previous[ch][p->pos + j].out_high, j++;
+        {
+            int j = 1;
+            out_high = p->coefs[N+1] * *ibuf;
+            CONVOLVE
+            *obuf_high++ = out_high;
+        }
+        p->previous[ch][p->pos + N].in = p->previous[ch][p->pos].in = *ibuf++;
+        p->previous[ch][p->pos + N].out_low = p->previous[ch][p->pos].out_low = out_low;
+        p->previous[ch][p->pos + N].out_high = p->previous[ch][p->pos].out_high = out_high;
+    }
+}
+
+static int mcompand_channel(MCompandContext *c, CompBand *l, double *ibuf, double *obuf, int len, int ch)
+{
+    int i;
+
+    for (i = 0; i < len; i++) {
+        double level_in_lin, level_out_lin, checkbuf;
+        /* Maintain the volume fields by simulating a leaky pump circuit */
+        update_volume(l, fabs(ibuf[i]), ch);
+
+        /* Volume memory is updated: perform compand */
+        level_in_lin = l->volume[ch];
+        level_out_lin = get_volume(&l->transfer_fn, level_in_lin);
+
+        if (c->delay_buf_size <= 0) {
+            checkbuf = ibuf[i] * level_out_lin;
+            obuf[i] = checkbuf;
+        } else {
+            double *delay_buf = (double *)l->delay_buf->extended_data[ch];
+
+            /* FIXME: note that this lookahead algorithm is really lame:
+               the response to a peak is released before the peak
+               arrives. */
+
+            /* because volume application delays differ band to band, but
+               total delay doesn't, the volume is applied in an iteration
+               preceding that in which the sample goes to obuf, except in
+               the band(s) with the longest vol app delay.
+
+               the offset between delay_buf_ptr and the sample to apply
+               vol to, is a constant equal to the difference between this
+               band's delay and the longest delay of all the bands. */
+
+            if (l->delay_buf_cnt >= l->delay_size) {
+                checkbuf =
+                    delay_buf[(l->delay_buf_ptr +
+                               c->delay_buf_size -
+                               l->delay_size) % c->delay_buf_size] * level_out_lin;
+                delay_buf[(l->delay_buf_ptr + c->delay_buf_size -
+                           l->delay_size) % c->delay_buf_size] = checkbuf;
+            }
+            if (l->delay_buf_cnt >= c->delay_buf_size) {
+                obuf[i] = delay_buf[l->delay_buf_ptr];
+            } else {
+                l->delay_buf_cnt++;
+            }
+            delay_buf[l->delay_buf_ptr++] = ibuf[i];
+            l->delay_buf_ptr %= c->delay_buf_size;
+        }
+    }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext  *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    MCompandContext *s    = ctx->priv;
+    AVFrame *out, *abuf, *bbuf, *cbuf;
+    int ch, band, i;
+
+    out = ff_get_audio_buffer(outlink, in->nb_samples);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+
+    if (s->band_samples < in->nb_samples) {
+        av_frame_free(&s->band_buf1);
+        av_frame_free(&s->band_buf2);
+        av_frame_free(&s->band_buf3);
+
+        s->band_buf1 = ff_get_audio_buffer(outlink, in->nb_samples);
+        s->band_buf2 = ff_get_audio_buffer(outlink, in->nb_samples);
+        s->band_buf3 = ff_get_audio_buffer(outlink, in->nb_samples);
+        s->band_samples = in->nb_samples;
+    }
+
+    for (ch = 0; ch < outlink->channels; ch++) {
+        double *a, *dst = (double *)out->extended_data[ch];
+
+        for (band = 0, abuf = in, bbuf = s->band_buf2, cbuf = s->band_buf1; band < s->nb_bands; band++) {
+            CompBand *b = &s->bands[band];
+
+            if (b->topfreq) {
+                crossover(ch, &b->filter, (double *)abuf->extended_data[ch],
+                          (double *)bbuf->extended_data[ch], (double *)cbuf->extended_data[ch], in->nb_samples);
+            } else {
+                bbuf = abuf;
+                abuf = cbuf;
+            }
+
+            if (abuf == in)
+                abuf = s->band_buf3;
+            mcompand_channel(s, b, (double *)bbuf->extended_data[ch], (double *)abuf->extended_data[ch], out->nb_samples, ch);
+            a = (double *)abuf->extended_data[ch];
+            for (i = 0; i < out->nb_samples; i++) {
+                dst[i] += a[i];
+            }
+
+            FFSWAP(AVFrame *, abuf, cbuf);
+        }
+    }
+
+    out->pts = in->pts;
+    av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    int ret;
+
+    ret = ff_request_frame(ctx->inputs[0]);
+
+    return ret;
+}
+
+static const AVFilterPad mcompand_inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_AUDIO,
+        .filter_frame   = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad mcompand_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .request_frame = request_frame,
+        .config_props  = config_output,
+    },
+    { NULL }
+};
+
+
+AVFilter ff_af_mcompand = {
+    .name           = "mcompand",
+    .description    = NULL_IF_CONFIG_SMALL(
+            "Multiband Compress or expand audio dynamic range."),
+    .query_formats  = query_formats,
+    .priv_size      = sizeof(MCompandContext),
+    .priv_class     = &mcompand_class,
+    .uninit         = uninit,
+    .inputs         = mcompand_inputs,
+    .outputs        = mcompand_outputs,
+};

diff --git a/libavfilter/af_pan.c b/libavfilter/af_pan.c
index d8a63a7..34e522c 100644
--- a/libavfilter/af_pan.c
+++ b/libavfilter/af_pan.c

@@ -104,6 +104,7 @@
     char *arg, *arg0, *tokenizer, *args = av_strdup(pan->args);
     int out_ch_id, in_ch_id, len, named, ret, sign = 1;
     int nb_in_channels[2] = { 0, 0 }; // number of unnamed and named input channels
+    int used_out_ch[MAX_CHANNELS] = {0};
     double gain;
 
     if (!pan->args) {
@@ -127,6 +128,7 @@
 
     /* parse channel specifications */
     while ((arg = arg0 = av_strtok(NULL, "|", &tokenizer))) {
+        int used_in_ch[MAX_CHANNELS] = {0};
         /* channel name */
         if (parse_channel_name(&arg, &out_ch_id, &named)) {
             av_log(ctx, AV_LOG_ERROR,
@@ -153,6 +155,13 @@
             ret = AVERROR(EINVAL);
             goto fail;
         }
+        if (used_out_ch[out_ch_id]) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Can not reference out channel %d twice\n", out_ch_id);
+            ret = AVERROR(EINVAL);
+            goto fail;
+        }
+        used_out_ch[out_ch_id] = 1;
         skip_spaces(&arg);
         if (*arg == '=') {
             arg++;
@@ -184,6 +193,13 @@
                 ret = AVERROR(EINVAL);
                 goto fail;
             }
+            if (used_in_ch[in_ch_id]) {
+                av_log(ctx, AV_LOG_ERROR,
+                       "Can not reference in channel %d twice\n", in_ch_id);
+                ret = AVERROR(EINVAL);
+                goto fail;
+            }
+            used_in_ch[in_ch_id] = 1;
             pan->gain[out_ch_id][in_ch_id] = sign * gain;
             skip_spaces(&arg);
             if (!*arg)

diff --git a/libavfilter/af_replaygain.c b/libavfilter/af_replaygain.c
index c8f6f96..9761734 100644
--- a/libavfilter/af_replaygain.c
+++ b/libavfilter/af_replaygain.c

@@ -554,7 +554,7 @@
     uint32_t level;
     AVFrame *out;
 
-    out = ff_get_audio_buffer(inlink, in->nb_samples);
+    out = ff_get_audio_buffer(outlink, in->nb_samples);
     if (!out) {
         av_frame_free(&in);
         return AVERROR(ENOMEM);

diff --git a/libavfilter/af_rubberband.c b/libavfilter/af_rubberband.c
index ded2544..ea6f4ff 100644
--- a/libavfilter/af_rubberband.c
+++ b/libavfilter/af_rubberband.c

@@ -128,7 +128,7 @@
 
     nb_samples = rubberband_available(s->rbs);
     if (nb_samples > 0) {
-        out = ff_get_audio_buffer(inlink, nb_samples);
+        out = ff_get_audio_buffer(outlink, nb_samples);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);
@@ -187,7 +187,7 @@
             nb_samples = rubberband_available(s->rbs);
 
             if (nb_samples > 0) {
-                out = ff_get_audio_buffer(inlink, nb_samples);
+                out = ff_get_audio_buffer(outlink, nb_samples);
                 if (!out)
                     return AVERROR(ENOMEM);
                 out->pts = av_rescale_q(s->nb_samples_out,

diff --git a/libavfilter/af_sidechaincompress.c b/libavfilter/af_sidechaincompress.c
index 55bed43..888049e 100644
--- a/libavfilter/af_sidechaincompress.c
+++ b/libavfilter/af_sidechaincompress.c

@@ -241,14 +241,13 @@
     }
     FF_FILTER_FORWARD_STATUS(ctx->inputs[0], ctx->outputs[0]);
     FF_FILTER_FORWARD_STATUS(ctx->inputs[1], ctx->outputs[0]);
-    /* TODO reindent */
-        if (ff_outlink_frame_wanted(ctx->outputs[0])) {
-            if (!av_audio_fifo_size(s->fifo[0]))
-                ff_inlink_request_frame(ctx->inputs[0]);
-            if (!av_audio_fifo_size(s->fifo[1]))
-                ff_inlink_request_frame(ctx->inputs[1]);
-        }
-        return 0;
+    if (ff_outlink_frame_wanted(ctx->outputs[0])) {
+        if (!av_audio_fifo_size(s->fifo[0]))
+            ff_inlink_request_frame(ctx->inputs[0]);
+        if (!av_audio_fifo_size(s->fifo[1]))
+            ff_inlink_request_frame(ctx->inputs[1]);
+    }
+    return 0;
 }
 
 static int query_formats(AVFilterContext *ctx)
@@ -368,7 +367,7 @@
     if (av_frame_is_writable(in)) {
         out = in;
     } else {
-        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        out = ff_get_audio_buffer(outlink, in->nb_samples);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);

diff --git a/libavfilter/af_silencedetect.c b/libavfilter/af_silencedetect.c
index b048d63..3a71f39 100644
--- a/libavfilter/af_silencedetect.c
+++ b/libavfilter/af_silencedetect.c

@@ -36,9 +36,14 @@
     const AVClass *class;
     double noise;               ///< noise amplitude ratio
     double duration;            ///< minimum duration of silence until notification
-    int64_t nb_null_samples;    ///< current number of continuous zero samples
-    int64_t start;              ///< if silence is detected, this value contains the time of the first zero sample
+    int mono;                   ///< mono mode : check each channel separately (default = check when ALL channels are silent)
+    int channels;               ///< number of channels
+    int independent_channels;   ///< number of entries in following arrays (always 1 in mono mode)
+    int64_t *nb_null_samples;   ///< (array) current number of continuous zero samples
+    int64_t *start;             ///< (array) if silence is detected, this value contains the time of the first zero sample (default/unset = INT64_MIN)
+    int64_t frame_end;          ///< pts of the end of the current frame (used to compute duration of silence at EOS)
     int last_sample_rate;       ///< last sample rate to check for sample rate changes
+    AVRational time_base;       ///< time_base
 
     void (*silencedetect)(struct SilenceDetectContext *s, AVFrame *insamples,
                           int nb_samples, int64_t nb_samples_notify,
@@ -52,44 +57,62 @@
     { "noise",     "set noise tolerance",              OFFSET(noise),     AV_OPT_TYPE_DOUBLE, {.dbl=0.001},          0, DBL_MAX,  FLAGS },
     { "d",         "set minimum duration in seconds",  OFFSET(duration),  AV_OPT_TYPE_DOUBLE, {.dbl=2.},             0, 24*60*60, FLAGS },
     { "duration",  "set minimum duration in seconds",  OFFSET(duration),  AV_OPT_TYPE_DOUBLE, {.dbl=2.},             0, 24*60*60, FLAGS },
+    { "mono",      "check each channel separately",    OFFSET(mono),      AV_OPT_TYPE_BOOL,   {.i64=0.},             0, 1,        FLAGS },
+    { "m",         "check each channel separately",    OFFSET(mono),      AV_OPT_TYPE_BOOL,   {.i64=0.},             0, 1,        FLAGS },
     { NULL }
 };
 
 AVFILTER_DEFINE_CLASS(silencedetect);
 
-static char *get_metadata_val(AVFrame *insamples, const char *key)
+static void set_meta(AVFrame *insamples, int channel, const char *key, char *value)
 {
-    AVDictionaryEntry *e = av_dict_get(insamples->metadata, key, NULL, 0);
-    return e && e->value ? e->value : NULL;
-}
+    char key2[128];
 
+    if (channel)
+        snprintf(key2, sizeof(key2), "lavfi.%s.%d", key, channel);
+    else
+        snprintf(key2, sizeof(key2), "lavfi.%s", key);
+    av_dict_set(&insamples->metadata, key2, value, 0);
+}
 static av_always_inline void update(SilenceDetectContext *s, AVFrame *insamples,
-                                    int is_silence, int64_t nb_samples_notify,
+                                    int is_silence, int current_sample, int64_t nb_samples_notify,
                                     AVRational time_base)
 {
+    int channel = current_sample % s->independent_channels;
     if (is_silence) {
-        if (!s->start) {
-            s->nb_null_samples++;
-            if (s->nb_null_samples >= nb_samples_notify) {
-                s->start = insamples->pts - (int64_t)(s->duration / av_q2d(time_base) + .5);
-                av_dict_set(&insamples->metadata, "lavfi.silence_start",
-                            av_ts2timestr(s->start, &time_base), 0);
+        if (s->start[channel] == INT64_MIN) {
+            s->nb_null_samples[channel]++;
+            if (s->nb_null_samples[channel] >= nb_samples_notify) {
+                s->start[channel] = insamples->pts + av_rescale_q(current_sample / s->channels + 1 - nb_samples_notify * s->independent_channels / s->channels,
+                        (AVRational){ 1, s->last_sample_rate }, time_base);
+                set_meta(insamples, s->mono ? channel + 1 : 0, "silence_start",
+                        av_ts2timestr(s->start[channel], &time_base));
+                if (s->mono)
+                    av_log(s, AV_LOG_INFO, "channel: %d | ", channel);
                 av_log(s, AV_LOG_INFO, "silence_start: %s\n",
-                       get_metadata_val(insamples, "lavfi.silence_start"));
+                        av_ts2timestr(s->start[channel], &time_base));
             }
         }
     } else {
-        if (s->start) {
-            av_dict_set(&insamples->metadata, "lavfi.silence_end",
-                        av_ts2timestr(insamples->pts, &time_base), 0);
-            av_dict_set(&insamples->metadata, "lavfi.silence_duration",
-                        av_ts2timestr(insamples->pts - s->start, &time_base), 0);
-            av_log(s, AV_LOG_INFO,
-                   "silence_end: %s | silence_duration: %s\n",
-                   get_metadata_val(insamples, "lavfi.silence_end"),
-                   get_metadata_val(insamples, "lavfi.silence_duration"));
+        if (s->start[channel] > INT64_MIN) {
+            int64_t end_pts = insamples ? insamples->pts + av_rescale_q(current_sample / s->channels,
+                    (AVRational){ 1, s->last_sample_rate }, time_base)
+                    : s->frame_end;
+            int64_t duration_ts = end_pts - s->start[channel];
+            if (insamples) {
+                set_meta(insamples, s->mono ? channel + 1 : 0, "silence_end",
+                        av_ts2timestr(end_pts, &time_base));
+                set_meta(insamples, s->mono ? channel + 1 : 0, "silence_duration",
+                        av_ts2timestr(duration_ts, &time_base));
+            }
+            if (s->mono)
+                av_log(s, AV_LOG_INFO, "channel: %d | ", channel);
+            av_log(s, AV_LOG_INFO, "silence_end: %s | silence_duration: %s\n",
+                    av_ts2timestr(end_pts, &time_base),
+                    av_ts2timestr(duration_ts, &time_base));
         }
-        s->nb_null_samples = s->start = 0;
+        s->nb_null_samples[channel] = 0;
+        s->start[channel] = INT64_MIN;
     }
 }
 
@@ -103,7 +126,7 @@
     int i;                                                                       \
                                                                                  \
     for (i = 0; i < nb_samples; i++, p++)                                        \
-        update(s, insamples, *p < noise && *p > -noise,                          \
+        update(s, insamples, *p < noise && *p > -noise, i,                       \
                nb_samples_notify, time_base);                                    \
 }
 
@@ -116,6 +139,18 @@
 {
     AVFilterContext *ctx = inlink->dst;
     SilenceDetectContext *s = ctx->priv;
+    int c;
+
+    s->channels = inlink->channels;
+    s->independent_channels = s->mono ? s->channels : 1;
+    s->nb_null_samples = av_mallocz_array(sizeof(*s->nb_null_samples), s->independent_channels);
+    if (!s->nb_null_samples)
+        return AVERROR(ENOMEM);
+    s->start = av_malloc_array(sizeof(*s->start), s->independent_channels);
+    if (!s->start)
+        return AVERROR(ENOMEM);
+    for (c = 0; c < s->independent_channels; c++)
+        s->start[c] = INT64_MIN;
 
     switch (inlink->format) {
     case AV_SAMPLE_FMT_DBL: s->silencedetect = silencedetect_dbl; break;
@@ -139,12 +174,18 @@
     const int nb_channels           = inlink->channels;
     const int srate                 = inlink->sample_rate;
     const int nb_samples            = insamples->nb_samples     * nb_channels;
-    const int64_t nb_samples_notify = srate * s->duration * nb_channels;
+    const int64_t nb_samples_notify = srate * s->duration * (s->mono ? 1 : nb_channels);
+    int c;
 
     // scale number of null samples to the new sample rate
     if (s->last_sample_rate && s->last_sample_rate != srate)
-        s->nb_null_samples = srate * s->nb_null_samples / s->last_sample_rate;
+        for (c = 0; c < s->independent_channels; c++) {
+            s->nb_null_samples[c] = srate * s->nb_null_samples[c] / s->last_sample_rate;
+        }
     s->last_sample_rate = srate;
+    s->time_base = inlink->time_base;
+    s->frame_end = insamples->pts + av_rescale_q(insamples->nb_samples,
+            (AVRational){ 1, s->last_sample_rate }, inlink->time_base);
 
     // TODO: document metadata
     s->silencedetect(s, insamples, nb_samples, nb_samples_notify,
@@ -186,6 +227,18 @@
     return ff_set_common_samplerates(ctx, formats);
 }
 
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SilenceDetectContext *s = ctx->priv;
+    int c;
+
+    for (c = 0; c < s->independent_channels; c++)
+        if (s->start[c] > INT64_MIN)
+            update(s, NULL, 0, c, 0, s->time_base);
+    av_freep(&s->nb_null_samples);
+    av_freep(&s->start);
+}
+
 static const AVFilterPad silencedetect_inputs[] = {
     {
         .name         = "default",
@@ -209,6 +262,7 @@
     .description   = NULL_IF_CONFIG_SMALL("Detect silence."),
     .priv_size     = sizeof(SilenceDetectContext),
     .query_formats = query_formats,
+    .uninit        = uninit,
     .inputs        = silencedetect_inputs,
     .outputs       = silencedetect_outputs,
     .priv_class    = &silencedetect_class,

diff --git a/libavfilter/af_silenceremove.c b/libavfilter/af_silenceremove.c
index af50463..335f55b 100644
--- a/libavfilter/af_silenceremove.c
+++ b/libavfilter/af_silenceremove.c

@@ -30,6 +30,16 @@
 #include "avfilter.h"
 #include "internal.h"
 
+enum SilenceDetect {
+    D_PEAK,
+    D_RMS,
+};
+
+enum ThresholdMode {
+    T_ANY,
+    T_ALL,
+};
+
 enum SilenceMode {
     SILENCE_TRIM,
     SILENCE_TRIM_FLUSH,
@@ -45,20 +55,34 @@
 
     int start_periods;
     int64_t start_duration;
+    int64_t start_duration_opt;
     double start_threshold;
+    int64_t start_silence;
+    int64_t start_silence_opt;
+    int start_mode;
 
     int stop_periods;
     int64_t stop_duration;
+    int64_t stop_duration_opt;
     double stop_threshold;
+    int64_t stop_silence;
+    int64_t stop_silence_opt;
+    int stop_mode;
 
     double *start_holdoff;
+    double *start_silence_hold;
     size_t start_holdoff_offset;
     size_t start_holdoff_end;
+    size_t start_silence_offset;
+    size_t start_silence_end;
     int    start_found_periods;
 
     double *stop_holdoff;
+    double *stop_silence_hold;
     size_t stop_holdoff_offset;
     size_t stop_holdoff_end;
+    size_t stop_silence_offset;
+    size_t stop_silence_end;
     int    stop_found_periods;
 
     double window_ratio;
@@ -68,7 +92,6 @@
     int window_size;
     double sum;
 
-    int leave_silence;
     int restart;
     int64_t next_pts;
 
@@ -78,19 +101,25 @@
 } SilenceRemoveContext;
 
 #define OFFSET(x) offsetof(SilenceRemoveContext, x)
-#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
+#define AF AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
+
 static const AVOption silenceremove_options[] = {
-    { "start_periods",   NULL, OFFSET(start_periods),   AV_OPT_TYPE_INT,      {.i64=0},     0,    9000, FLAGS },
-    { "start_duration",  NULL, OFFSET(start_duration),  AV_OPT_TYPE_DURATION, {.i64=0},     0,    9000, FLAGS },
-    { "start_threshold", NULL, OFFSET(start_threshold), AV_OPT_TYPE_DOUBLE,   {.dbl=0},     0, DBL_MAX, FLAGS },
-    { "stop_periods",    NULL, OFFSET(stop_periods),    AV_OPT_TYPE_INT,      {.i64=0}, -9000,    9000, FLAGS },
-    { "stop_duration",   NULL, OFFSET(stop_duration),   AV_OPT_TYPE_DURATION, {.i64=0},     0,    9000, FLAGS },
-    { "stop_threshold",  NULL, OFFSET(stop_threshold),  AV_OPT_TYPE_DOUBLE,   {.dbl=0},     0, DBL_MAX, FLAGS },
-    { "leave_silence",   NULL, OFFSET(leave_silence),   AV_OPT_TYPE_BOOL,     {.i64=0},     0,       1, FLAGS },
-    { "detection",       NULL, OFFSET(detection),       AV_OPT_TYPE_INT,      {.i64=1},     0,       1, FLAGS, "detection" },
-    {   "peak",          0,    0,                       AV_OPT_TYPE_CONST,    {.i64=0},     0,       0, FLAGS, "detection" },
-    {   "rms",           0,    0,                       AV_OPT_TYPE_CONST,    {.i64=1},     0,       0, FLAGS, "detection" },
-    { "window",          NULL, OFFSET(window_ratio),    AV_OPT_TYPE_DOUBLE,   {.dbl=0.02},  0,      10, FLAGS },
+    { "start_periods",   NULL,                                                 OFFSET(start_periods),       AV_OPT_TYPE_INT,      {.i64=0},     0,      9000, AF },
+    { "start_duration",  "set start duration of non-silence part",             OFFSET(start_duration_opt),  AV_OPT_TYPE_DURATION, {.i64=0},     0, INT32_MAX, AF },
+    { "start_threshold", "set threshold for start silence detection",          OFFSET(start_threshold),     AV_OPT_TYPE_DOUBLE,   {.dbl=0},     0,   DBL_MAX, AF },
+    { "start_silence",   "set start duration of silence part to keep",         OFFSET(start_silence_opt),   AV_OPT_TYPE_DURATION, {.i64=0},     0, INT32_MAX, AF },
+    { "start_mode",      "set which channel will trigger trimming from start", OFFSET(start_mode),          AV_OPT_TYPE_INT,      {.i64=T_ANY}, T_ANY, T_ALL, AF, "mode" },
+    {   "any",           0,                                                    0,                           AV_OPT_TYPE_CONST,    {.i64=T_ANY}, 0,         0, AF, "mode" },
+    {   "all",           0,                                                    0,                           AV_OPT_TYPE_CONST,    {.i64=T_ALL}, 0,         0, AF, "mode" },
+    { "stop_periods",    NULL,                                                 OFFSET(stop_periods),        AV_OPT_TYPE_INT,      {.i64=0}, -9000,      9000, AF },
+    { "stop_duration",   "set stop duration of non-silence part",              OFFSET(stop_duration_opt),   AV_OPT_TYPE_DURATION, {.i64=0},     0, INT32_MAX, AF },
+    { "stop_threshold",  "set threshold for stop silence detection",           OFFSET(stop_threshold),      AV_OPT_TYPE_DOUBLE,   {.dbl=0},     0,   DBL_MAX, AF },
+    { "stop_silence",    "set stop duration of silence part to keep",          OFFSET(stop_silence_opt),    AV_OPT_TYPE_DURATION, {.i64=0},     0, INT32_MAX, AF },
+    { "stop_mode",       "set which channel will trigger trimming from end",   OFFSET(stop_mode),           AV_OPT_TYPE_INT,      {.i64=T_ANY}, T_ANY, T_ALL, AF, "mode" },
+    { "detection",       "set how silence is detected",                        OFFSET(detection),           AV_OPT_TYPE_INT,      {.i64=D_RMS}, D_PEAK,D_RMS, AF, "detection" },
+    {   "peak",          "use absolute values of samples",                     0,                           AV_OPT_TYPE_CONST,    {.i64=D_PEAK},0,         0, AF, "detection" },
+    {   "rms",           "use squared values of samples",                      0,                           AV_OPT_TYPE_CONST,    {.i64=D_RMS}, 0,         0, AF, "detection" },
+    { "window",          "set duration of window in seconds",                  OFFSET(window_ratio),        AV_OPT_TYPE_DOUBLE,   {.dbl=0.02},  0,        10, AF },
     { NULL }
 };
 
@@ -150,15 +179,15 @@
     }
 
     switch (s->detection) {
-    case 0:
+    case D_PEAK:
         s->update = update_peak;
         s->compute = compute_peak;
         break;
-    case 1:
+    case D_RMS:
         s->update = update_rms;
         s->compute = compute_rms;
         break;
-    };
+    }
 
     return 0;
 }
@@ -184,9 +213,13 @@
 
     clear_window(s);
 
-    s->start_duration = av_rescale(s->start_duration, inlink->sample_rate,
+    s->start_duration = av_rescale(s->start_duration_opt, inlink->sample_rate,
                                    AV_TIME_BASE);
-    s->stop_duration  = av_rescale(s->stop_duration, inlink->sample_rate,
+    s->start_silence  = av_rescale(s->start_silence_opt, inlink->sample_rate,
+                                   AV_TIME_BASE);
+    s->stop_duration  = av_rescale(s->stop_duration_opt, inlink->sample_rate,
+                                   AV_TIME_BASE);
+    s->stop_silence   = av_rescale(s->stop_silence_opt, inlink->sample_rate,
                                    AV_TIME_BASE);
 
     s->start_holdoff = av_malloc_array(FFMAX(s->start_duration, 1),
@@ -195,6 +228,12 @@
     if (!s->start_holdoff)
         return AVERROR(ENOMEM);
 
+    s->start_silence_hold = av_malloc_array(FFMAX(s->start_silence, 1),
+                                            sizeof(*s->start_silence_hold) *
+                                            inlink->channels);
+    if (!s->start_silence_hold)
+        return AVERROR(ENOMEM);
+
     s->start_holdoff_offset = 0;
     s->start_holdoff_end    = 0;
     s->start_found_periods  = 0;
@@ -205,6 +244,12 @@
     if (!s->stop_holdoff)
         return AVERROR(ENOMEM);
 
+    s->stop_silence_hold = av_malloc_array(FFMAX(s->stop_silence, 1),
+                                           sizeof(*s->stop_silence_hold) *
+                                           inlink->channels);
+    if (!s->stop_silence_hold)
+        return AVERROR(ENOMEM);
+
     s->stop_holdoff_offset = 0;
     s->stop_holdoff_end    = 0;
     s->stop_found_periods  = 0;
@@ -219,8 +264,10 @@
 
 static void flush(SilenceRemoveContext *s,
                   AVFrame *out, AVFilterLink *outlink,
-                  int *nb_samples_written, int *ret)
+                  int *nb_samples_written, int *ret, int flush_silence)
 {
+    AVFrame *silence;
+
     if (*nb_samples_written) {
         out->nb_samples = *nb_samples_written / outlink->channels;
 
@@ -230,10 +277,43 @@
                                     outlink->time_base);
 
         *ret = ff_filter_frame(outlink, out);
+        if (*ret < 0)
+            return;
         *nb_samples_written = 0;
     } else {
         av_frame_free(&out);
     }
+
+    if (s->stop_silence_end <= 0 || !flush_silence)
+        return;
+
+    silence = ff_get_audio_buffer(outlink, s->stop_silence_end / outlink->channels);
+    if (!silence) {
+        *ret = AVERROR(ENOMEM);
+        return;
+    }
+
+    if (s->stop_silence_offset < s->stop_silence_end) {
+        memcpy(silence->data[0],
+               &s->stop_silence_hold[s->stop_silence_offset],
+               (s->stop_silence_end - s->stop_silence_offset) * sizeof(double));
+    }
+
+    if (s->stop_silence_offset > 0) {
+        memcpy(silence->data[0] + (s->stop_silence_end - s->stop_silence_offset) * sizeof(double),
+               &s->stop_silence_hold[0],
+               s->stop_silence_offset * sizeof(double));
+    }
+
+    s->stop_silence_offset = 0;
+    s->stop_silence_end = 0;
+
+    silence->pts = s->next_pts;
+    s->next_pts += av_rescale_q(silence->nb_samples,
+                                (AVRational){1, outlink->sample_rate},
+                                outlink->time_base);
+
+    *ret = ff_filter_frame(outlink, silence);
 }
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
@@ -251,24 +331,31 @@
     switch (s->mode) {
     case SILENCE_TRIM:
 silence_trim:
-        nbs = in->nb_samples - nb_samples_read / inlink->channels;
+        nbs = in->nb_samples - nb_samples_read / outlink->channels;
         if (!nbs)
             break;
 
         for (i = 0; i < nbs; i++) {
-            threshold = 0;
-            for (j = 0; j < inlink->channels; j++) {
-                threshold |= s->compute(s, ibuf[j]) > s->start_threshold;
+            if (s->start_mode == T_ANY) {
+                threshold = 0;
+                for (j = 0; j < outlink->channels; j++) {
+                    threshold |= s->compute(s, ibuf[j]) > s->start_threshold;
+                }
+            } else {
+                threshold = 1;
+                for (j = 0; j < outlink->channels; j++) {
+                    threshold &= s->compute(s, ibuf[j]) > s->start_threshold;
+                }
             }
 
             if (threshold) {
-                for (j = 0; j < inlink->channels; j++) {
+                for (j = 0; j < outlink->channels; j++) {
                     s->update(s, *ibuf);
                     s->start_holdoff[s->start_holdoff_end++] = *ibuf++;
                 }
-                nb_samples_read += inlink->channels;
+                nb_samples_read += outlink->channels;
 
-                if (s->start_holdoff_end >= s->start_duration * inlink->channels) {
+                if (s->start_holdoff_end >= s->start_duration * outlink->channels) {
                     if (++s->start_found_periods >= s->start_periods) {
                         s->mode = SILENCE_TRIM_FLUSH;
                         goto silence_trim_flush;
@@ -276,15 +363,25 @@
 
                     s->start_holdoff_offset = 0;
                     s->start_holdoff_end = 0;
+                    s->start_silence_offset = 0;
+                    s->start_silence_end = 0;
                 }
             } else {
                 s->start_holdoff_end = 0;
 
-                for (j = 0; j < inlink->channels; j++)
+                for (j = 0; j < outlink->channels; j++) {
                     s->update(s, ibuf[j]);
+                    if (s->start_silence) {
+                        s->start_silence_hold[s->start_silence_offset++] = ibuf[j];
+                        s->start_silence_end = FFMIN(s->start_silence_end + 1, outlink->channels * s->start_silence);
+                        if (s->start_silence_offset >= outlink->channels * s->start_silence) {
+                            s->start_silence_offset = 0;
+                        }
+                    }
+                }
 
-                ibuf += inlink->channels;
-                nb_samples_read += inlink->channels;
+                ibuf += outlink->channels;
+                nb_samples_read += outlink->channels;
             }
         }
         break;
@@ -292,17 +389,32 @@
     case SILENCE_TRIM_FLUSH:
 silence_trim_flush:
         nbs  = s->start_holdoff_end - s->start_holdoff_offset;
-        nbs -= nbs % inlink->channels;
+        nbs -= nbs % outlink->channels;
         if (!nbs)
             break;
 
-        out = ff_get_audio_buffer(inlink, nbs / inlink->channels);
+        out = ff_get_audio_buffer(outlink, nbs / outlink->channels + s->start_silence_end / outlink->channels);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);
         }
 
-        memcpy(out->data[0], &s->start_holdoff[s->start_holdoff_offset],
+        if (s->start_silence_end > 0) {
+            if (s->start_silence_offset < s->start_silence_end) {
+                memcpy(out->data[0],
+                       &s->start_silence_hold[s->start_silence_offset],
+                       (s->start_silence_end - s->start_silence_offset) * sizeof(double));
+            }
+
+            if (s->start_silence_offset > 0) {
+                memcpy(out->data[0] + (s->start_silence_end - s->start_silence_offset) * sizeof(double),
+                       &s->start_silence_hold[0],
+                       s->start_silence_offset * sizeof(double));
+            }
+        }
+
+        memcpy(out->data[0] + s->start_silence_end * sizeof(double),
+               &s->start_holdoff[s->start_holdoff_offset],
                nbs * sizeof(double));
 
         out->pts = s->next_pts;
@@ -317,6 +429,8 @@
         if (s->start_holdoff_offset == s->start_holdoff_end) {
             s->start_holdoff_offset = 0;
             s->start_holdoff_end = 0;
+            s->start_silence_offset = 0;
+            s->start_silence_end = 0;
             s->mode = SILENCE_COPY;
             goto silence_copy;
         }
@@ -324,11 +438,11 @@
 
     case SILENCE_COPY:
 silence_copy:
-        nbs = in->nb_samples - nb_samples_read / inlink->channels;
+        nbs = in->nb_samples - nb_samples_read / outlink->channels;
         if (!nbs)
             break;
 
-        out = ff_get_audio_buffer(inlink, nbs);
+        out = ff_get_audio_buffer(outlink, nbs);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);
@@ -337,62 +451,75 @@
 
         if (s->stop_periods) {
             for (i = 0; i < nbs; i++) {
-                threshold = 1;
-                for (j = 0; j < inlink->channels; j++)
-                    threshold &= s->compute(s, ibuf[j]) > s->stop_threshold;
+                if (s->stop_mode == T_ANY) {
+                    threshold = 0;
+                    for (j = 0; j < outlink->channels; j++) {
+                        threshold |= s->compute(s, ibuf[j]) > s->stop_threshold;
+                    }
+                } else {
+                    threshold = 1;
+                    for (j = 0; j < outlink->channels; j++) {
+                        threshold &= s->compute(s, ibuf[j]) > s->stop_threshold;
+                    }
+                }
 
-                if (threshold && s->stop_holdoff_end && !s->leave_silence) {
+                if (threshold && s->stop_holdoff_end && !s->stop_silence) {
                     s->mode = SILENCE_COPY_FLUSH;
-                    flush(s, out, outlink, &nb_samples_written, &ret);
+                    flush(s, out, outlink, &nb_samples_written, &ret, 0);
                     goto silence_copy_flush;
                 } else if (threshold) {
-                    for (j = 0; j < inlink->channels; j++) {
+                    for (j = 0; j < outlink->channels; j++) {
                         s->update(s, *ibuf);
                         *obuf++ = *ibuf++;
                     }
-                    nb_samples_read    += inlink->channels;
-                    nb_samples_written += inlink->channels;
+                    nb_samples_read    += outlink->channels;
+                    nb_samples_written += outlink->channels;
                 } else if (!threshold) {
-                    for (j = 0; j < inlink->channels; j++) {
+                    for (j = 0; j < outlink->channels; j++) {
                         s->update(s, *ibuf);
-                        if (s->leave_silence) {
-                            *obuf++ = *ibuf;
-                            nb_samples_written++;
+                        if (s->stop_silence) {
+                            s->stop_silence_hold[s->stop_silence_offset++] = *ibuf;
+                            s->stop_silence_end = FFMIN(s->stop_silence_end + 1, outlink->channels * s->stop_silence);
+                            if (s->stop_silence_offset >= outlink->channels * s->stop_silence) {
+                                s->stop_silence_offset = 0;
+                            }
                         }
 
                         s->stop_holdoff[s->stop_holdoff_end++] = *ibuf++;
                     }
-                    nb_samples_read += inlink->channels;
+                    nb_samples_read += outlink->channels;
 
-                    if (s->stop_holdoff_end >= s->stop_duration * inlink->channels) {
+                    if (s->stop_holdoff_end >= s->stop_duration * outlink->channels) {
                         if (++s->stop_found_periods >= s->stop_periods) {
                             s->stop_holdoff_offset = 0;
                             s->stop_holdoff_end = 0;
 
                             if (!s->restart) {
                                 s->mode = SILENCE_STOP;
-                                flush(s, out, outlink, &nb_samples_written, &ret);
+                                flush(s, out, outlink, &nb_samples_written, &ret, 1);
                                 goto silence_stop;
                             } else {
                                 s->stop_found_periods = 0;
                                 s->start_found_periods = 0;
                                 s->start_holdoff_offset = 0;
                                 s->start_holdoff_end = 0;
+                                s->start_silence_offset = 0;
+                                s->start_silence_end = 0;
                                 clear_window(s);
                                 s->mode = SILENCE_TRIM;
-                                flush(s, out, outlink, &nb_samples_written, &ret);
+                                flush(s, out, outlink, &nb_samples_written, &ret, 1);
                                 goto silence_trim;
                             }
                         }
                         s->mode = SILENCE_COPY_FLUSH;
-                        flush(s, out, outlink, &nb_samples_written, &ret);
+                        flush(s, out, outlink, &nb_samples_written, &ret, 0);
                         goto silence_copy_flush;
                     }
                 }
             }
-            flush(s, out, outlink, &nb_samples_written, &ret);
+            flush(s, out, outlink, &nb_samples_written, &ret, 0);
         } else {
-            memcpy(obuf, ibuf, sizeof(double) * nbs * inlink->channels);
+            memcpy(obuf, ibuf, sizeof(double) * nbs * outlink->channels);
 
             out->pts = s->next_pts;
             s->next_pts += av_rescale_q(out->nb_samples,
@@ -406,11 +533,11 @@
     case SILENCE_COPY_FLUSH:
 silence_copy_flush:
         nbs  = s->stop_holdoff_end - s->stop_holdoff_offset;
-        nbs -= nbs % inlink->channels;
+        nbs -= nbs % outlink->channels;
         if (!nbs)
             break;
 
-        out = ff_get_audio_buffer(inlink, nbs / inlink->channels);
+        out = ff_get_audio_buffer(outlink, nbs / outlink->channels);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);
@@ -430,6 +557,8 @@
         if (s->stop_holdoff_offset == s->stop_holdoff_end) {
             s->stop_holdoff_offset = 0;
             s->stop_holdoff_end = 0;
+            s->stop_silence_offset = 0;
+            s->stop_silence_end = 0;
             s->mode = SILENCE_COPY;
             goto silence_copy;
         }
@@ -469,7 +598,7 @@
                                         (AVRational){1, outlink->sample_rate},
                                         outlink->time_base);
 
-            ret = ff_filter_frame(ctx->inputs[0], frame);
+            ret = ff_filter_frame(outlink, frame);
         }
         s->mode = SILENCE_STOP;
     }
@@ -510,7 +639,9 @@
     SilenceRemoveContext *s = ctx->priv;
 
     av_freep(&s->start_holdoff);
+    av_freep(&s->start_silence_hold);
     av_freep(&s->stop_holdoff);
+    av_freep(&s->stop_silence_hold);
     av_freep(&s->window);
 }
 

diff --git a/libavfilter/af_stereotools.c b/libavfilter/af_stereotools.c
index a5e0b42..7e52978 100644
--- a/libavfilter/af_stereotools.c
+++ b/libavfilter/af_stereotools.c

@@ -166,7 +166,7 @@
     if (av_frame_is_writable(in)) {
         out = in;
     } else {
-        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        out = ff_get_audio_buffer(outlink, in->nb_samples);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);

diff --git a/libavfilter/af_stereowiden.c b/libavfilter/af_stereowiden.c
index 24146ff..ef16fce 100644
--- a/libavfilter/af_stereowiden.c
+++ b/libavfilter/af_stereowiden.c

@@ -98,7 +98,7 @@
     if (av_frame_is_writable(in)) {
         out = in;
     } else {
-        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        out = ff_get_audio_buffer(outlink, in->nb_samples);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);

diff --git a/libavfilter/af_surround.c b/libavfilter/af_surround.c
index c712237..f29afec 100644
--- a/libavfilter/af_surround.c
+++ b/libavfilter/af_surround.c

@@ -90,6 +90,15 @@
                       float mag_total,
                       float x, float y,
                       int n);
+    void (*upmix_5_0)(AVFilterContext *ctx,
+                      float c_re, float c_im,
+                      float mag_totall, float mag_totalr,
+                      float fl_phase, float fr_phase,
+                      float bl_phase, float br_phase,
+                      float sl_phase, float sr_phase,
+                      float xl, float yl,
+                      float xr, float yr,
+                      int n);
     void (*upmix_5_1)(AVFilterContext *ctx,
                       float c_re, float c_im,
                       float lfe_re, float lfe_im,
@@ -764,6 +773,66 @@
     dstrs[2 * n + 1] = rs_mag * sinf(r_phase);
 }
 
+static void upmix_7_1_5_0_side(AVFilterContext *ctx,
+                               float c_re, float c_im,
+                               float mag_totall, float mag_totalr,
+                               float fl_phase, float fr_phase,
+                               float bl_phase, float br_phase,
+                               float sl_phase, float sr_phase,
+                               float xl, float yl,
+                               float xr, float yr,
+                               int n)
+{
+    float fl_mag, fr_mag, ls_mag, rs_mag, lb_mag, rb_mag;
+    float *dstc, *dstl, *dstr, *dstls, *dstrs, *dstlb, *dstrb, *dstlfe;
+    float lfe_mag, c_phase, mag_total = (mag_totall + mag_totalr) * 0.5;
+    AudioSurroundContext *s = ctx->priv;
+
+    dstl  = (float *)s->output->extended_data[0];
+    dstr  = (float *)s->output->extended_data[1];
+    dstc  = (float *)s->output->extended_data[2];
+    dstlfe = (float *)s->output->extended_data[3];
+    dstlb = (float *)s->output->extended_data[4];
+    dstrb = (float *)s->output->extended_data[5];
+    dstls = (float *)s->output->extended_data[6];
+    dstrs = (float *)s->output->extended_data[7];
+
+    c_phase = atan2f(c_im, c_re);
+
+    get_lfe(s->output_lfe, n, s->lowcut, s->highcut, &lfe_mag, &mag_total);
+
+    fl_mag = sqrtf(.5f * (xl + 1.f)) * ((yl + 1.f) * .5f) * mag_totall;
+    fr_mag = sqrtf(.5f * (xr + 1.f)) * ((yr + 1.f) * .5f) * mag_totalr;
+    lb_mag = sqrtf(.5f * (-xl + 1.f)) * ((yl + 1.f) * .5f) * mag_totall;
+    rb_mag = sqrtf(.5f * (-xr + 1.f)) * ((yr + 1.f) * .5f) * mag_totalr;
+    ls_mag = sqrtf(1.f - fabsf(xl)) * ((yl + 1.f) * .5f) * mag_totall;
+    rs_mag = sqrtf(1.f - fabsf(xr)) * ((yr + 1.f) * .5f) * mag_totalr;
+
+    dstl[2 * n    ] = fl_mag * cosf(fl_phase);
+    dstl[2 * n + 1] = fl_mag * sinf(fl_phase);
+
+    dstr[2 * n    ] = fr_mag * cosf(fr_phase);
+    dstr[2 * n + 1] = fr_mag * sinf(fr_phase);
+
+    dstc[2 * n    ] = c_re;
+    dstc[2 * n + 1] = c_im;
+
+    dstlfe[2 * n    ] = lfe_mag * cosf(c_phase);
+    dstlfe[2 * n + 1] = lfe_mag * sinf(c_phase);
+
+    dstlb[2 * n    ] = lb_mag * cosf(bl_phase);
+    dstlb[2 * n + 1] = lb_mag * sinf(bl_phase);
+
+    dstrb[2 * n    ] = rb_mag * cosf(br_phase);
+    dstrb[2 * n + 1] = rb_mag * sinf(br_phase);
+
+    dstls[2 * n    ] = ls_mag * cosf(sl_phase);
+    dstls[2 * n + 1] = ls_mag * sinf(sl_phase);
+
+    dstrs[2 * n    ] = rs_mag * cosf(sr_phase);
+    dstrs[2 * n + 1] = rs_mag * sinf(sr_phase);
+}
+
 static void upmix_7_1_5_1(AVFilterContext *ctx,
                           float c_re, float c_im,
                           float lfe_re, float lfe_im,
@@ -918,6 +987,118 @@
     }
 }
 
+static void filter_5_0_side(AVFilterContext *ctx)
+{
+    AudioSurroundContext *s = ctx->priv;
+    float *srcl, *srcr, *srcc, *srcsl, *srcsr;
+    int n;
+
+    srcl = (float *)s->input->extended_data[0];
+    srcr = (float *)s->input->extended_data[1];
+    srcc = (float *)s->input->extended_data[2];
+    srcsl = (float *)s->input->extended_data[3];
+    srcsr = (float *)s->input->extended_data[4];
+
+    for (n = 0; n < s->buf_size; n++) {
+        float fl_re = srcl[2 * n], fr_re = srcr[2 * n];
+        float fl_im = srcl[2 * n + 1], fr_im = srcr[2 * n + 1];
+        float c_re = srcc[2 * n], c_im = srcc[2 * n + 1];
+        float sl_re = srcsl[2 * n], sl_im = srcsl[2 * n + 1];
+        float sr_re = srcsr[2 * n], sr_im = srcsr[2 * n + 1];
+        float fl_mag = hypotf(fl_re, fl_im);
+        float fr_mag = hypotf(fr_re, fr_im);
+        float fl_phase = atan2f(fl_im, fl_re);
+        float fr_phase = atan2f(fr_im, fr_re);
+        float sl_mag = hypotf(sl_re, sl_im);
+        float sr_mag = hypotf(sr_re, sr_im);
+        float sl_phase = atan2f(sl_im, sl_re);
+        float sr_phase = atan2f(sr_im, sr_re);
+        float phase_difl = fabsf(fl_phase - sl_phase);
+        float phase_difr = fabsf(fr_phase - sr_phase);
+        float mag_difl = (fl_mag - sl_mag) / (fl_mag + sl_mag);
+        float mag_difr = (fr_mag - sr_mag) / (fr_mag + sr_mag);
+        float mag_totall = hypotf(fl_mag, sl_mag);
+        float mag_totalr = hypotf(fr_mag, sr_mag);
+        float bl_phase = atan2f(fl_im + sl_im, fl_re + sl_re);
+        float br_phase = atan2f(fr_im + sr_im, fr_re + sr_re);
+        float xl, yl;
+        float xr, yr;
+
+        if (phase_difl > M_PI)
+            phase_difl = 2 * M_PI - phase_difl;
+
+        if (phase_difr > M_PI)
+            phase_difr = 2 * M_PI - phase_difr;
+
+        stereo_position(mag_difl, phase_difl, &xl, &yl);
+        stereo_position(mag_difr, phase_difr, &xr, &yr);
+
+        s->upmix_5_0(ctx, c_re, c_im,
+                     mag_totall, mag_totalr,
+                     fl_phase, fr_phase,
+                     bl_phase, br_phase,
+                     sl_phase, sr_phase,
+                     xl, yl, xr, yr, n);
+    }
+}
+
+static void filter_5_1_side(AVFilterContext *ctx)
+{
+    AudioSurroundContext *s = ctx->priv;
+    float *srcl, *srcr, *srcc, *srclfe, *srcsl, *srcsr;
+    int n;
+
+    srcl = (float *)s->input->extended_data[0];
+    srcr = (float *)s->input->extended_data[1];
+    srcc = (float *)s->input->extended_data[2];
+    srclfe = (float *)s->input->extended_data[3];
+    srcsl = (float *)s->input->extended_data[4];
+    srcsr = (float *)s->input->extended_data[5];
+
+    for (n = 0; n < s->buf_size; n++) {
+        float fl_re = srcl[2 * n], fr_re = srcr[2 * n];
+        float fl_im = srcl[2 * n + 1], fr_im = srcr[2 * n + 1];
+        float c_re = srcc[2 * n], c_im = srcc[2 * n + 1];
+        float lfe_re = srclfe[2 * n], lfe_im = srclfe[2 * n + 1];
+        float sl_re = srcsl[2 * n], sl_im = srcsl[2 * n + 1];
+        float sr_re = srcsr[2 * n], sr_im = srcsr[2 * n + 1];
+        float fl_mag = hypotf(fl_re, fl_im);
+        float fr_mag = hypotf(fr_re, fr_im);
+        float fl_phase = atan2f(fl_im, fl_re);
+        float fr_phase = atan2f(fr_im, fr_re);
+        float sl_mag = hypotf(sl_re, sl_im);
+        float sr_mag = hypotf(sr_re, sr_im);
+        float sl_phase = atan2f(sl_im, sl_re);
+        float sr_phase = atan2f(sr_im, sr_re);
+        float phase_difl = fabsf(fl_phase - sl_phase);
+        float phase_difr = fabsf(fr_phase - sr_phase);
+        float mag_difl = (fl_mag - sl_mag) / (fl_mag + sl_mag);
+        float mag_difr = (fr_mag - sr_mag) / (fr_mag + sr_mag);
+        float mag_totall = hypotf(fl_mag, sl_mag);
+        float mag_totalr = hypotf(fr_mag, sr_mag);
+        float bl_phase = atan2f(fl_im + sl_im, fl_re + sl_re);
+        float br_phase = atan2f(fr_im + sr_im, fr_re + sr_re);
+        float xl, yl;
+        float xr, yr;
+
+        if (phase_difl > M_PI)
+            phase_difl = 2 * M_PI - phase_difl;
+
+        if (phase_difr > M_PI)
+            phase_difr = 2 * M_PI - phase_difr;
+
+        stereo_position(mag_difl, phase_difl, &xl, &yl);
+        stereo_position(mag_difr, phase_difr, &xr, &yr);
+
+        s->upmix_5_1(ctx, c_re, c_im, lfe_re, lfe_im,
+                     mag_totall, mag_totalr,
+                     fl_phase, fr_phase,
+                     bl_phase, br_phase,
+                     sl_phase, sr_phase,
+                     xl, yl, xr, yr, n);
+    }
+}
+
 static void filter_5_1_back(AVFilterContext *ctx)
 {
     AudioSurroundContext *s = ctx->priv;
@@ -1063,6 +1244,26 @@
             goto fail;
         }
         break;
+    case AV_CH_LAYOUT_5POINT0:
+        s->filter = filter_5_0_side;
+        switch (s->out_channel_layout) {
+        case AV_CH_LAYOUT_7POINT1:
+            s->upmix_5_0 = upmix_7_1_5_0_side;
+            break;
+        default:
+            goto fail;
+        }
+        break;
+    case AV_CH_LAYOUT_5POINT1:
+        s->filter = filter_5_1_side;
+        switch (s->out_channel_layout) {
+        case AV_CH_LAYOUT_7POINT1:
+            s->upmix_5_1 = upmix_7_1_5_1;
+            break;
+        default:
+            goto fail;
+        }
+        break;
     case AV_CH_LAYOUT_5POINT1_BACK:
         s->filter = filter_5_1_back;
         switch (s->out_channel_layout) {
@@ -1149,18 +1350,19 @@
     AVFilterContext *ctx = inlink->dst;
     AVFilterLink *outlink = ctx->outputs[0];
     AudioSurroundContext *s = ctx->priv;
+    int ret;
 
-    av_audio_fifo_write(s->fifo, (void **)in->extended_data,
-                        in->nb_samples);
-
-    if (s->pts == AV_NOPTS_VALUE)
+    ret = av_audio_fifo_write(s->fifo, (void **)in->extended_data,
+                              in->nb_samples);
+    if (ret >= 0 && s->pts == AV_NOPTS_VALUE)
         s->pts = in->pts;
 
     av_frame_free(&in);
+    if (ret < 0)
+        return ret;
 
     while (av_audio_fifo_size(s->fifo) >= s->buf_size) {
         AVFrame *out;
-        int ret;
 
         ret = av_audio_fifo_peek(s->fifo, (void **)s->input->extended_data, s->buf_size);
         if (ret < 0)
@@ -1188,6 +1390,27 @@
     return 0;
 }
 
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AudioSurroundContext *s = ctx->priv;
+    int ret = 0;
+
+    ret = ff_request_frame(ctx->inputs[0]);
+
+    if (ret == AVERROR_EOF && av_audio_fifo_size(s->fifo) > 0 && av_audio_fifo_size(s->fifo) < s->buf_size) {
+        AVFrame *in;
+
+        in = ff_get_audio_buffer(outlink, s->buf_size - av_audio_fifo_size(s->fifo));
+        if (!in)
+            return AVERROR(ENOMEM);
+        ret = filter_frame(ctx->inputs[0], in);
+        av_audio_fifo_drain(s->fifo, s->buf_size);
+    }
+
+    return ret;
+}
+
 static av_cold void uninit(AVFilterContext *ctx)
 {
     AudioSurroundContext *s = ctx->priv;
@@ -1243,9 +1466,10 @@
 
 static const AVFilterPad outputs[] = {
     {
-        .name         = "default",
-        .type         = AVMEDIA_TYPE_AUDIO,
-        .config_props = config_output,
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .request_frame = request_frame,
+        .config_props  = config_output,
     },
     { NULL }
 };

diff --git a/libavfilter/af_tremolo.c b/libavfilter/af_tremolo.c
index 572e9e3..8cbc798 100644
--- a/libavfilter/af_tremolo.c
+++ b/libavfilter/af_tremolo.c

@@ -57,7 +57,7 @@
     if (av_frame_is_writable(in)) {
         out = in;
     } else {
-        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        out = ff_get_audio_buffer(outlink, in->nb_samples);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);

diff --git a/libavfilter/af_vibrato.c b/libavfilter/af_vibrato.c
index c7691f2..22bbab6 100644
--- a/libavfilter/af_vibrato.c
+++ b/libavfilter/af_vibrato.c

@@ -63,7 +63,7 @@
     if (av_frame_is_writable(in)) {
         out = in;
     } else {
-        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        out = ff_get_audio_buffer(outlink, in->nb_samples);
         if (!out) {
             av_frame_free(&in);
             return AVERROR(ENOMEM);

diff --git a/libavfilter/af_volume.c b/libavfilter/af_volume.c
index 3d76f12..b106ed8 100644
--- a/libavfilter/af_volume.c
+++ b/libavfilter/af_volume.c

@@ -410,7 +410,7 @@
             && (vol->precision != PRECISION_FIXED || vol->volume_i > 0)) {
         out_buf = buf;
     } else {
-        out_buf = ff_get_audio_buffer(inlink, nb_samples);
+        out_buf = ff_get_audio_buffer(outlink, nb_samples);
         if (!out_buf) {
             av_frame_free(&buf);
             return AVERROR(ENOMEM);

diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 9b672a7..5e72803 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c

@@ -22,391 +22,488 @@
 #include "libavutil/thread.h"
 #include "avfilter.h"
 #include "config.h"
-#include "opencl_allkernels.h"
+
+extern AVFilter ff_af_abench;
+extern AVFilter ff_af_acompressor;
+extern AVFilter ff_af_acontrast;
+extern AVFilter ff_af_acopy;
+extern AVFilter ff_af_acue;
+extern AVFilter ff_af_acrossfade;
+extern AVFilter ff_af_acrossover;
+extern AVFilter ff_af_acrusher;
+extern AVFilter ff_af_adeclick;
+extern AVFilter ff_af_adeclip;
+extern AVFilter ff_af_adelay;
+extern AVFilter ff_af_aderivative;
+extern AVFilter ff_af_aecho;
+extern AVFilter ff_af_aemphasis;
+extern AVFilter ff_af_aeval;
+extern AVFilter ff_af_afade;
+extern AVFilter ff_af_afftdn;
+extern AVFilter ff_af_afftfilt;
+extern AVFilter ff_af_afir;
+extern AVFilter ff_af_aformat;
+extern AVFilter ff_af_agate;
+extern AVFilter ff_af_aiir;
+extern AVFilter ff_af_aintegral;
+extern AVFilter ff_af_ainterleave;
+extern AVFilter ff_af_alimiter;
+extern AVFilter ff_af_allpass;
+extern AVFilter ff_af_aloop;
+extern AVFilter ff_af_amerge;
+extern AVFilter ff_af_ametadata;
+extern AVFilter ff_af_amix;
+extern AVFilter ff_af_amultiply;
+extern AVFilter ff_af_anequalizer;
+extern AVFilter ff_af_anull;
+extern AVFilter ff_af_apad;
+extern AVFilter ff_af_aperms;
+extern AVFilter ff_af_aphaser;
+extern AVFilter ff_af_apulsator;
+extern AVFilter ff_af_arealtime;
+extern AVFilter ff_af_aresample;
+extern AVFilter ff_af_areverse;
+extern AVFilter ff_af_aselect;
+extern AVFilter ff_af_asendcmd;
+extern AVFilter ff_af_asetnsamples;
+extern AVFilter ff_af_asetpts;
+extern AVFilter ff_af_asetrate;
+extern AVFilter ff_af_asettb;
+extern AVFilter ff_af_ashowinfo;
+extern AVFilter ff_af_asidedata;
+extern AVFilter ff_af_asplit;
+extern AVFilter ff_af_astats;
+extern AVFilter ff_af_astreamselect;
+extern AVFilter ff_af_atempo;
+extern AVFilter ff_af_atrim;
+extern AVFilter ff_af_azmq;
+extern AVFilter ff_af_bandpass;
+extern AVFilter ff_af_bandreject;
+extern AVFilter ff_af_bass;
+extern AVFilter ff_af_biquad;
+extern AVFilter ff_af_bs2b;
+extern AVFilter ff_af_channelmap;
+extern AVFilter ff_af_channelsplit;
+extern AVFilter ff_af_chorus;
+extern AVFilter ff_af_compand;
+extern AVFilter ff_af_compensationdelay;
+extern AVFilter ff_af_crossfeed;
+extern AVFilter ff_af_crystalizer;
+extern AVFilter ff_af_dcshift;
+extern AVFilter ff_af_drmeter;
+extern AVFilter ff_af_dynaudnorm;
+extern AVFilter ff_af_earwax;
+extern AVFilter ff_af_ebur128;
+extern AVFilter ff_af_equalizer;
+extern AVFilter ff_af_extrastereo;
+extern AVFilter ff_af_firequalizer;
+extern AVFilter ff_af_flanger;
+extern AVFilter ff_af_haas;
+extern AVFilter ff_af_hdcd;
+extern AVFilter ff_af_headphone;
+extern AVFilter ff_af_highpass;
+extern AVFilter ff_af_highshelf;
+extern AVFilter ff_af_join;
+extern AVFilter ff_af_ladspa;
+extern AVFilter ff_af_loudnorm;
+extern AVFilter ff_af_lowpass;
+extern AVFilter ff_af_lowshelf;
+extern AVFilter ff_af_lv2;
+extern AVFilter ff_af_mcompand;
+extern AVFilter ff_af_pan;
+extern AVFilter ff_af_replaygain;
+extern AVFilter ff_af_resample;
+extern AVFilter ff_af_rubberband;
+extern AVFilter ff_af_sidechaincompress;
+extern AVFilter ff_af_sidechaingate;
+extern AVFilter ff_af_silencedetect;
+extern AVFilter ff_af_silenceremove;
+extern AVFilter ff_af_sofalizer;
+extern AVFilter ff_af_stereotools;
+extern AVFilter ff_af_stereowiden;
+extern AVFilter ff_af_superequalizer;
+extern AVFilter ff_af_surround;
+extern AVFilter ff_af_treble;
+extern AVFilter ff_af_tremolo;
+extern AVFilter ff_af_vibrato;
+extern AVFilter ff_af_volume;
+extern AVFilter ff_af_volumedetect;
+
+extern AVFilter ff_asrc_aevalsrc;
+extern AVFilter ff_asrc_anoisesrc;
+extern AVFilter ff_asrc_anullsrc;
+extern AVFilter ff_asrc_flite;
+extern AVFilter ff_asrc_hilbert;
+extern AVFilter ff_asrc_sine;
+
+extern AVFilter ff_asink_anullsink;
+
+extern AVFilter ff_vf_alphaextract;
+extern AVFilter ff_vf_alphamerge;
+extern AVFilter ff_vf_amplify;
+extern AVFilter ff_vf_ass;
+extern AVFilter ff_vf_atadenoise;
+extern AVFilter ff_vf_avgblur;
+extern AVFilter ff_vf_avgblur_opencl;
+extern AVFilter ff_vf_bbox;
+extern AVFilter ff_vf_bench;
+extern AVFilter ff_vf_bitplanenoise;
+extern AVFilter ff_vf_blackdetect;
+extern AVFilter ff_vf_blackframe;
+extern AVFilter ff_vf_blend;
+extern AVFilter ff_vf_bm3d;
+extern AVFilter ff_vf_boxblur;
+extern AVFilter ff_vf_boxblur_opencl;
+extern AVFilter ff_vf_bwdif;
+extern AVFilter ff_vf_chromakey;
+extern AVFilter ff_vf_ciescope;
+extern AVFilter ff_vf_codecview;
+extern AVFilter ff_vf_colorbalance;
+extern AVFilter ff_vf_colorchannelmixer;
+extern AVFilter ff_vf_colorkey;
+extern AVFilter ff_vf_colorlevels;
+extern AVFilter ff_vf_colormatrix;
+extern AVFilter ff_vf_colorspace;
+extern AVFilter ff_vf_convolution;
+extern AVFilter ff_vf_convolution_opencl;
+extern AVFilter ff_vf_convolve;
+extern AVFilter ff_vf_copy;
+extern AVFilter ff_vf_coreimage;
+extern AVFilter ff_vf_cover_rect;
+extern AVFilter ff_vf_crop;
+extern AVFilter ff_vf_cropdetect;
+extern AVFilter ff_vf_cue;
+extern AVFilter ff_vf_curves;
+extern AVFilter ff_vf_datascope;
+extern AVFilter ff_vf_dctdnoiz;
+extern AVFilter ff_vf_deband;
+extern AVFilter ff_vf_deblock;
+extern AVFilter ff_vf_decimate;
+extern AVFilter ff_vf_deconvolve;
+extern AVFilter ff_vf_deflate;
+extern AVFilter ff_vf_deflicker;
+extern AVFilter ff_vf_deinterlace_qsv;
+extern AVFilter ff_vf_deinterlace_vaapi;
+extern AVFilter ff_vf_dejudder;
+extern AVFilter ff_vf_delogo;
+extern AVFilter ff_vf_denoise_vaapi;
+extern AVFilter ff_vf_deshake;
+extern AVFilter ff_vf_despill;
+extern AVFilter ff_vf_detelecine;
+extern AVFilter ff_vf_dilation;
+extern AVFilter ff_vf_dilation_opencl;
+extern AVFilter ff_vf_displace;
+extern AVFilter ff_vf_doubleweave;
+extern AVFilter ff_vf_drawbox;
+extern AVFilter ff_vf_drawgraph;
+extern AVFilter ff_vf_drawgrid;
+extern AVFilter ff_vf_drawtext;
+extern AVFilter ff_vf_edgedetect;
+extern AVFilter ff_vf_elbg;
+extern AVFilter ff_vf_entropy;
+extern AVFilter ff_vf_eq;
+extern AVFilter ff_vf_erosion;
+extern AVFilter ff_vf_erosion_opencl;
+extern AVFilter ff_vf_extractplanes;
+extern AVFilter ff_vf_fade;
+extern AVFilter ff_vf_fftdnoiz;
+extern AVFilter ff_vf_fftfilt;
+extern AVFilter ff_vf_field;
+extern AVFilter ff_vf_fieldhint;
+extern AVFilter ff_vf_fieldmatch;
+extern AVFilter ff_vf_fieldorder;
+extern AVFilter ff_vf_fillborders;
+extern AVFilter ff_vf_find_rect;
+extern AVFilter ff_vf_floodfill;
+extern AVFilter ff_vf_format;
+extern AVFilter ff_vf_fps;
+extern AVFilter ff_vf_framepack;
+extern AVFilter ff_vf_framerate;
+extern AVFilter ff_vf_framestep;
+extern AVFilter ff_vf_frei0r;
+extern AVFilter ff_vf_fspp;
+extern AVFilter ff_vf_gblur;
+extern AVFilter ff_vf_geq;
+extern AVFilter ff_vf_gradfun;
+extern AVFilter ff_vf_greyedge;
+extern AVFilter ff_vf_haldclut;
+extern AVFilter ff_vf_hflip;
+extern AVFilter ff_vf_histeq;
+extern AVFilter ff_vf_histogram;
+extern AVFilter ff_vf_hqdn3d;
+extern AVFilter ff_vf_hqx;
+extern AVFilter ff_vf_hstack;
+extern AVFilter ff_vf_hue;
+extern AVFilter ff_vf_hwdownload;
+extern AVFilter ff_vf_hwmap;
+extern AVFilter ff_vf_hwupload;
+extern AVFilter ff_vf_hwupload_cuda;
+extern AVFilter ff_vf_hysteresis;
+extern AVFilter ff_vf_idet;
+extern AVFilter ff_vf_il;
+extern AVFilter ff_vf_inflate;
+extern AVFilter ff_vf_interlace;
+extern AVFilter ff_vf_interleave;
+extern AVFilter ff_vf_kerndeint;
+extern AVFilter ff_vf_lenscorrection;
+extern AVFilter ff_vf_lensfun;
+extern AVFilter ff_vf_libvmaf;
+extern AVFilter ff_vf_limiter;
+extern AVFilter ff_vf_loop;
+extern AVFilter ff_vf_lumakey;
+extern AVFilter ff_vf_lut;
+extern AVFilter ff_vf_lut1d;
+extern AVFilter ff_vf_lut2;
+extern AVFilter ff_vf_lut3d;
+extern AVFilter ff_vf_lutrgb;
+extern AVFilter ff_vf_lutyuv;
+extern AVFilter ff_vf_maskedclamp;
+extern AVFilter ff_vf_maskedmerge;
+extern AVFilter ff_vf_mcdeint;
+extern AVFilter ff_vf_mergeplanes;
+extern AVFilter ff_vf_mestimate;
+extern AVFilter ff_vf_metadata;
+extern AVFilter ff_vf_midequalizer;
+extern AVFilter ff_vf_minterpolate;
+extern AVFilter ff_vf_mix;
+extern AVFilter ff_vf_mpdecimate;
+extern AVFilter ff_vf_negate;
+extern AVFilter ff_vf_nlmeans;
+extern AVFilter ff_vf_nnedi;
+extern AVFilter ff_vf_noformat;
+extern AVFilter ff_vf_noise;
+extern AVFilter ff_vf_normalize;
+extern AVFilter ff_vf_null;
+extern AVFilter ff_vf_ocr;
+extern AVFilter ff_vf_ocv;
+extern AVFilter ff_vf_oscilloscope;
+extern AVFilter ff_vf_overlay;
+extern AVFilter ff_vf_overlay_opencl;
+extern AVFilter ff_vf_overlay_qsv;
+extern AVFilter ff_vf_owdenoise;
+extern AVFilter ff_vf_pad;
+extern AVFilter ff_vf_palettegen;
+extern AVFilter ff_vf_paletteuse;
+extern AVFilter ff_vf_perms;
+extern AVFilter ff_vf_perspective;
+extern AVFilter ff_vf_phase;
+extern AVFilter ff_vf_pixdesctest;
+extern AVFilter ff_vf_pixscope;
+extern AVFilter ff_vf_pp;
+extern AVFilter ff_vf_pp7;
+extern AVFilter ff_vf_premultiply;
+extern AVFilter ff_vf_prewitt;
+extern AVFilter ff_vf_prewitt_opencl;
+extern AVFilter ff_vf_procamp_vaapi;
+extern AVFilter ff_vf_program_opencl;
+extern AVFilter ff_vf_pseudocolor;
+extern AVFilter ff_vf_psnr;
+extern AVFilter ff_vf_pullup;
+extern AVFilter ff_vf_qp;
+extern AVFilter ff_vf_random;
+extern AVFilter ff_vf_readeia608;
+extern AVFilter ff_vf_readvitc;
+extern AVFilter ff_vf_realtime;
+extern AVFilter ff_vf_remap;
+extern AVFilter ff_vf_removegrain;
+extern AVFilter ff_vf_removelogo;
+extern AVFilter ff_vf_repeatfields;
+extern AVFilter ff_vf_reverse;
+extern AVFilter ff_vf_roberts;
+extern AVFilter ff_vf_roberts_opencl;
+extern AVFilter ff_vf_rotate;
+extern AVFilter ff_vf_sab;
+extern AVFilter ff_vf_scale;
+extern AVFilter ff_vf_scale_cuda;
+extern AVFilter ff_vf_scale_npp;
+extern AVFilter ff_vf_scale_qsv;
+extern AVFilter ff_vf_scale_vaapi;
+extern AVFilter ff_vf_scale2ref;
+extern AVFilter ff_vf_select;
+extern AVFilter ff_vf_selectivecolor;
+extern AVFilter ff_vf_sendcmd;
+extern AVFilter ff_vf_separatefields;
+extern AVFilter ff_vf_setdar;
+extern AVFilter ff_vf_setfield;
+extern AVFilter ff_vf_setpts;
+extern AVFilter ff_vf_setrange;
+extern AVFilter ff_vf_setsar;
+extern AVFilter ff_vf_settb;
+extern AVFilter ff_vf_sharpness_vaapi;
+extern AVFilter ff_vf_showinfo;
+extern AVFilter ff_vf_showpalette;
+extern AVFilter ff_vf_shuffleframes;
+extern AVFilter ff_vf_shuffleplanes;
+extern AVFilter ff_vf_sidedata;
+extern AVFilter ff_vf_signalstats;
+extern AVFilter ff_vf_signature;
+extern AVFilter ff_vf_smartblur;
+extern AVFilter ff_vf_sobel;
+extern AVFilter ff_vf_sobel_opencl;
+extern AVFilter ff_vf_split;
+extern AVFilter ff_vf_spp;
+extern AVFilter ff_vf_sr;
+extern AVFilter ff_vf_ssim;
+extern AVFilter ff_vf_stereo3d;
+extern AVFilter ff_vf_streamselect;
+extern AVFilter ff_vf_subtitles;
+extern AVFilter ff_vf_super2xsai;
+extern AVFilter ff_vf_swaprect;
+extern AVFilter ff_vf_swapuv;
+extern AVFilter ff_vf_tblend;
+extern AVFilter ff_vf_telecine;
+extern AVFilter ff_vf_threshold;
+extern AVFilter ff_vf_thumbnail;
+extern AVFilter ff_vf_thumbnail_cuda;
+extern AVFilter ff_vf_tile;
+extern AVFilter ff_vf_tinterlace;
+extern AVFilter ff_vf_tlut2;
+extern AVFilter ff_vf_tmix;
+extern AVFilter ff_vf_tonemap;
+extern AVFilter ff_vf_tonemap_opencl;
+extern AVFilter ff_vf_transpose;
+extern AVFilter ff_vf_transpose_npp;
+extern AVFilter ff_vf_trim;
+extern AVFilter ff_vf_unpremultiply;
+extern AVFilter ff_vf_unsharp;
+extern AVFilter ff_vf_unsharp_opencl;
+extern AVFilter ff_vf_uspp;
+extern AVFilter ff_vf_vaguedenoiser;
+extern AVFilter ff_vf_vectorscope;
+extern AVFilter ff_vf_vflip;
+extern AVFilter ff_vf_vfrdet;
+extern AVFilter ff_vf_vidstabdetect;
+extern AVFilter ff_vf_vidstabtransform;
+extern AVFilter ff_vf_vignette;
+extern AVFilter ff_vf_vmafmotion;
+extern AVFilter ff_vf_vpp_qsv;
+extern AVFilter ff_vf_vstack;
+extern AVFilter ff_vf_w3fdif;
+extern AVFilter ff_vf_waveform;
+extern AVFilter ff_vf_weave;
+extern AVFilter ff_vf_xbr;
+extern AVFilter ff_vf_yadif;
+extern AVFilter ff_vf_zmq;
+extern AVFilter ff_vf_zoompan;
+extern AVFilter ff_vf_zscale;
+
+extern AVFilter ff_vsrc_allrgb;
+extern AVFilter ff_vsrc_allyuv;
+extern AVFilter ff_vsrc_cellauto;
+extern AVFilter ff_vsrc_color;
+extern AVFilter ff_vsrc_coreimagesrc;
+extern AVFilter ff_vsrc_frei0r_src;
+extern AVFilter ff_vsrc_haldclutsrc;
+extern AVFilter ff_vsrc_life;
+extern AVFilter ff_vsrc_mandelbrot;
+extern AVFilter ff_vsrc_mptestsrc;
+extern AVFilter ff_vsrc_nullsrc;
+extern AVFilter ff_vsrc_openclsrc;
+extern AVFilter ff_vsrc_pal75bars;
+extern AVFilter ff_vsrc_pal100bars;
+extern AVFilter ff_vsrc_rgbtestsrc;
+extern AVFilter ff_vsrc_smptebars;
+extern AVFilter ff_vsrc_smptehdbars;
+extern AVFilter ff_vsrc_testsrc;
+extern AVFilter ff_vsrc_testsrc2;
+extern AVFilter ff_vsrc_yuvtestsrc;
+
+extern AVFilter ff_vsink_nullsink;
+
+/* multimedia filters */
+extern AVFilter ff_avf_abitscope;
+extern AVFilter ff_avf_adrawgraph;
+extern AVFilter ff_avf_ahistogram;
+extern AVFilter ff_avf_aphasemeter;
+extern AVFilter ff_avf_avectorscope;
+extern AVFilter ff_avf_concat;
+extern AVFilter ff_avf_showcqt;
+extern AVFilter ff_avf_showfreqs;
+extern AVFilter ff_avf_showspectrum;
+extern AVFilter ff_avf_showspectrumpic;
+extern AVFilter ff_avf_showvolume;
+extern AVFilter ff_avf_showwaves;
+extern AVFilter ff_avf_showwavespic;
+extern AVFilter ff_vaf_spectrumsynth;
+
+/* multimedia sources */
+extern AVFilter ff_avsrc_amovie;
+extern AVFilter ff_avsrc_movie;
+
+/* those filters are part of public or internal API,
+ * they are formatted to not be found by the grep
+ * as they are manually added again (due to their 'names'
+ * being the same while having different 'types'). */
+extern  AVFilter ff_asrc_abuffer;
+extern  AVFilter ff_vsrc_buffer;
+extern  AVFilter ff_asink_abuffer;
+extern  AVFilter ff_vsink_buffer;
+extern AVFilter ff_af_afifo;
+extern AVFilter ff_vf_fifo;
+
+#include "libavfilter/filter_list.c"
 
 
-#define REGISTER_FILTER(X, x, y)                                        \
-    {                                                                   \
-        extern AVFilter ff_##y##_##x;                                   \
-        if (CONFIG_##X##_FILTER)                                        \
-            avfilter_register(&ff_##y##_##x);                           \
-    }
-
-#define REGISTER_FILTER_UNCONDITIONAL(x)                                \
-    {                                                                   \
-        extern AVFilter ff_##x;                                         \
-        avfilter_register(&ff_##x);                                     \
-    }
-
-static void register_all(void)
+const AVFilter *av_filter_iterate(void **opaque)
 {
-    REGISTER_FILTER(ABENCH,         abench,         af);
-    REGISTER_FILTER(ACOMPRESSOR,    acompressor,    af);
-    REGISTER_FILTER(ACOPY,          acopy,          af);
-    REGISTER_FILTER(ACROSSFADE,     acrossfade,     af);
-    REGISTER_FILTER(ACRUSHER,       acrusher,       af);
-    REGISTER_FILTER(ADELAY,         adelay,         af);
-    REGISTER_FILTER(AECHO,          aecho,          af);
-    REGISTER_FILTER(AEMPHASIS,      aemphasis,      af);
-    REGISTER_FILTER(AEVAL,          aeval,          af);
-    REGISTER_FILTER(AFADE,          afade,          af);
-    REGISTER_FILTER(AFFTFILT,       afftfilt,       af);
-    REGISTER_FILTER(AFIR,           afir,           af);
-    REGISTER_FILTER(AFORMAT,        aformat,        af);
-    REGISTER_FILTER(AGATE,          agate,          af);
-    REGISTER_FILTER(AINTERLEAVE,    ainterleave,    af);
-    REGISTER_FILTER(ALIMITER,       alimiter,       af);
-    REGISTER_FILTER(ALLPASS,        allpass,        af);
-    REGISTER_FILTER(ALOOP,          aloop,          af);
-    REGISTER_FILTER(AMERGE,         amerge,         af);
-    REGISTER_FILTER(AMETADATA,      ametadata,      af);
-    REGISTER_FILTER(AMIX,           amix,           af);
-    REGISTER_FILTER(ANEQUALIZER,    anequalizer,    af);
-    REGISTER_FILTER(ANULL,          anull,          af);
-    REGISTER_FILTER(APAD,           apad,           af);
-    REGISTER_FILTER(APERMS,         aperms,         af);
-    REGISTER_FILTER(APHASER,        aphaser,        af);
-    REGISTER_FILTER(APULSATOR,      apulsator,      af);
-    REGISTER_FILTER(AREALTIME,      arealtime,      af);
-    REGISTER_FILTER(ARESAMPLE,      aresample,      af);
-    REGISTER_FILTER(AREVERSE,       areverse,       af);
-    REGISTER_FILTER(ASELECT,        aselect,        af);
-    REGISTER_FILTER(ASENDCMD,       asendcmd,       af);
-    REGISTER_FILTER(ASETNSAMPLES,   asetnsamples,   af);
-    REGISTER_FILTER(ASETPTS,        asetpts,        af);
-    REGISTER_FILTER(ASETRATE,       asetrate,       af);
-    REGISTER_FILTER(ASETTB,         asettb,         af);
-    REGISTER_FILTER(ASHOWINFO,      ashowinfo,      af);
-    REGISTER_FILTER(ASIDEDATA,      asidedata,      af);
-    REGISTER_FILTER(ASPLIT,         asplit,         af);
-    REGISTER_FILTER(ASTATS,         astats,         af);
-    REGISTER_FILTER(ASTREAMSELECT,  astreamselect,  af);
-    REGISTER_FILTER(ATEMPO,         atempo,         af);
-    REGISTER_FILTER(ATRIM,          atrim,          af);
-    REGISTER_FILTER(AZMQ,           azmq,           af);
-    REGISTER_FILTER(BANDPASS,       bandpass,       af);
-    REGISTER_FILTER(BANDREJECT,     bandreject,     af);
-    REGISTER_FILTER(BASS,           bass,           af);
-    REGISTER_FILTER(BIQUAD,         biquad,         af);
-    REGISTER_FILTER(BS2B,           bs2b,           af);
-    REGISTER_FILTER(CHANNELMAP,     channelmap,     af);
-    REGISTER_FILTER(CHANNELSPLIT,   channelsplit,   af);
-    REGISTER_FILTER(CHORUS,         chorus,         af);
-    REGISTER_FILTER(COMPAND,        compand,        af);
-    REGISTER_FILTER(COMPENSATIONDELAY, compensationdelay, af);
-    REGISTER_FILTER(CROSSFEED,      crossfeed,      af);
-    REGISTER_FILTER(CRYSTALIZER,    crystalizer,    af);
-    REGISTER_FILTER(DCSHIFT,        dcshift,        af);
-    REGISTER_FILTER(DYNAUDNORM,     dynaudnorm,     af);
-    REGISTER_FILTER(EARWAX,         earwax,         af);
-    REGISTER_FILTER(EBUR128,        ebur128,        af);
-    REGISTER_FILTER(EQUALIZER,      equalizer,      af);
-    REGISTER_FILTER(EXTRASTEREO,    extrastereo,    af);
-    REGISTER_FILTER(FIREQUALIZER,   firequalizer,   af);
-    REGISTER_FILTER(FLANGER,        flanger,        af);
-    REGISTER_FILTER(HAAS,           haas,           af);
-    REGISTER_FILTER(HDCD,           hdcd,           af);
-    REGISTER_FILTER(HEADPHONE,      headphone,      af);
-    REGISTER_FILTER(HIGHPASS,       highpass,       af);
-    REGISTER_FILTER(JOIN,           join,           af);
-    REGISTER_FILTER(LADSPA,         ladspa,         af);
-    REGISTER_FILTER(LOUDNORM,       loudnorm,       af);
-    REGISTER_FILTER(LOWPASS,        lowpass,        af);
-    REGISTER_FILTER(PAN,            pan,            af);
-    REGISTER_FILTER(REPLAYGAIN,     replaygain,     af);
-    REGISTER_FILTER(RESAMPLE,       resample,       af);
-    REGISTER_FILTER(RUBBERBAND,     rubberband,     af);
-    REGISTER_FILTER(SIDECHAINCOMPRESS, sidechaincompress, af);
-    REGISTER_FILTER(SIDECHAINGATE,  sidechaingate,  af);
-    REGISTER_FILTER(SILENCEDETECT,  silencedetect,  af);
-    REGISTER_FILTER(SILENCEREMOVE,  silenceremove,  af);
-    REGISTER_FILTER(SOFALIZER,      sofalizer,      af);
-    REGISTER_FILTER(STEREOTOOLS,    stereotools,    af);
-    REGISTER_FILTER(STEREOWIDEN,    stereowiden,    af);
-    REGISTER_FILTER(SUPEREQUALIZER, superequalizer, af);
-    REGISTER_FILTER(SURROUND,       surround,       af);
-    REGISTER_FILTER(TREBLE,         treble,         af);
-    REGISTER_FILTER(TREMOLO,        tremolo,        af);
-    REGISTER_FILTER(VIBRATO,        vibrato,        af);
-    REGISTER_FILTER(VOLUME,         volume,         af);
-    REGISTER_FILTER(VOLUMEDETECT,   volumedetect,   af);
+    uintptr_t i = (uintptr_t)*opaque;
+    const AVFilter *f = filter_list[i];
 
-    REGISTER_FILTER(AEVALSRC,       aevalsrc,       asrc);
-    REGISTER_FILTER(ANOISESRC,      anoisesrc,      asrc);
-    REGISTER_FILTER(ANULLSRC,       anullsrc,       asrc);
-    REGISTER_FILTER(FLITE,          flite,          asrc);
-    REGISTER_FILTER(SINE,           sine,           asrc);
+    if (f)
+        *opaque = (void*)(i + 1);
 
-    REGISTER_FILTER(ANULLSINK,      anullsink,      asink);
+    return f;
+}
 
-    REGISTER_FILTER(ALPHAEXTRACT,   alphaextract,   vf);
-    REGISTER_FILTER(ALPHAMERGE,     alphamerge,     vf);
-    REGISTER_FILTER(ASS,            ass,            vf);
-    REGISTER_FILTER(ATADENOISE,     atadenoise,     vf);
-    REGISTER_FILTER(AVGBLUR,        avgblur,        vf);
-    REGISTER_FILTER(BBOX,           bbox,           vf);
-    REGISTER_FILTER(BENCH,          bench,          vf);
-    REGISTER_FILTER(BITPLANENOISE,  bitplanenoise,  vf);
-    REGISTER_FILTER(BLACKDETECT,    blackdetect,    vf);
-    REGISTER_FILTER(BLACKFRAME,     blackframe,     vf);
-    REGISTER_FILTER(BLEND,          blend,          vf);
-    REGISTER_FILTER(BOXBLUR,        boxblur,        vf);
-    REGISTER_FILTER(BWDIF,          bwdif,          vf);
-    REGISTER_FILTER(CHROMAKEY,      chromakey,      vf);
-    REGISTER_FILTER(CIESCOPE,       ciescope,       vf);
-    REGISTER_FILTER(CODECVIEW,      codecview,      vf);
-    REGISTER_FILTER(COLORBALANCE,   colorbalance,   vf);
-    REGISTER_FILTER(COLORCHANNELMIXER, colorchannelmixer, vf);
-    REGISTER_FILTER(COLORKEY,       colorkey,       vf);
-    REGISTER_FILTER(COLORLEVELS,    colorlevels,    vf);
-    REGISTER_FILTER(COLORMATRIX,    colormatrix,    vf);
-    REGISTER_FILTER(COLORSPACE,     colorspace,     vf);
-    REGISTER_FILTER(CONVOLUTION,    convolution,    vf);
-    REGISTER_FILTER(CONVOLVE,       convolve,       vf);
-    REGISTER_FILTER(COPY,           copy,           vf);
-    REGISTER_FILTER(COREIMAGE,      coreimage,      vf);
-    REGISTER_FILTER(COVER_RECT,     cover_rect,     vf);
-    REGISTER_FILTER(CROP,           crop,           vf);
-    REGISTER_FILTER(CROPDETECT,     cropdetect,     vf);
-    REGISTER_FILTER(CURVES,         curves,         vf);
-    REGISTER_FILTER(DATASCOPE,      datascope,      vf);
-    REGISTER_FILTER(DCTDNOIZ,       dctdnoiz,       vf);
-    REGISTER_FILTER(DEBAND,         deband,         vf);
-    REGISTER_FILTER(DECIMATE,       decimate,       vf);
-    REGISTER_FILTER(DEFLATE,        deflate,        vf);
-    REGISTER_FILTER(DEFLICKER,      deflicker,      vf);
-    REGISTER_FILTER(DEINTERLACE_QSV,deinterlace_qsv,vf);
-    REGISTER_FILTER(DEINTERLACE_VAAPI, deinterlace_vaapi, vf);
-    REGISTER_FILTER(DEJUDDER,       dejudder,       vf);
-    REGISTER_FILTER(DELOGO,         delogo,         vf);
-    REGISTER_FILTER(DESHAKE,        deshake,        vf);
-    REGISTER_FILTER(DESPILL,        despill,        vf);
-    REGISTER_FILTER(DETELECINE,     detelecine,     vf);
-    REGISTER_FILTER(DILATION,       dilation,       vf);
-    REGISTER_FILTER(DISPLACE,       displace,       vf);
-    REGISTER_FILTER(DOUBLEWEAVE,    doubleweave,    vf);
-    REGISTER_FILTER(DRAWBOX,        drawbox,        vf);
-    REGISTER_FILTER(DRAWGRAPH,      drawgraph,      vf);
-    REGISTER_FILTER(DRAWGRID,       drawgrid,       vf);
-    REGISTER_FILTER(DRAWTEXT,       drawtext,       vf);
-    REGISTER_FILTER(EDGEDETECT,     edgedetect,     vf);
-    REGISTER_FILTER(ELBG,           elbg,           vf);
-    REGISTER_FILTER(EQ,             eq,             vf);
-    REGISTER_FILTER(EROSION,        erosion,        vf);
-    REGISTER_FILTER(EXTRACTPLANES,  extractplanes,  vf);
-    REGISTER_FILTER(FADE,           fade,           vf);
-    REGISTER_FILTER(FFTFILT,        fftfilt,        vf);
-    REGISTER_FILTER(FIELD,          field,          vf);
-    REGISTER_FILTER(FIELDHINT,      fieldhint,      vf);
-    REGISTER_FILTER(FIELDMATCH,     fieldmatch,     vf);
-    REGISTER_FILTER(FIELDORDER,     fieldorder,     vf);
-    REGISTER_FILTER(FIND_RECT,      find_rect,      vf);
-    REGISTER_FILTER(FLOODFILL,      floodfill,      vf);
-    REGISTER_FILTER(FORMAT,         format,         vf);
-    REGISTER_FILTER(FPS,            fps,            vf);
-    REGISTER_FILTER(FRAMEPACK,      framepack,      vf);
-    REGISTER_FILTER(FRAMERATE,      framerate,      vf);
-    REGISTER_FILTER(FRAMESTEP,      framestep,      vf);
-    REGISTER_FILTER(FREI0R,         frei0r,         vf);
-    REGISTER_FILTER(FSPP,           fspp,           vf);
-    REGISTER_FILTER(GBLUR,          gblur,          vf);
-    REGISTER_FILTER(GEQ,            geq,            vf);
-    REGISTER_FILTER(GRADFUN,        gradfun,        vf);
-    REGISTER_FILTER(HALDCLUT,       haldclut,       vf);
-    REGISTER_FILTER(HFLIP,          hflip,          vf);
-    REGISTER_FILTER(HISTEQ,         histeq,         vf);
-    REGISTER_FILTER(HISTOGRAM,      histogram,      vf);
-    REGISTER_FILTER(HQDN3D,         hqdn3d,         vf);
-    REGISTER_FILTER(HQX,            hqx,            vf);
-    REGISTER_FILTER(HSTACK,         hstack,         vf);
-    REGISTER_FILTER(HUE,            hue,            vf);
-    REGISTER_FILTER(HWDOWNLOAD,     hwdownload,     vf);
-    REGISTER_FILTER(HWMAP,          hwmap,          vf);
-    REGISTER_FILTER(HWUPLOAD,       hwupload,       vf);
-    REGISTER_FILTER(HWUPLOAD_CUDA,  hwupload_cuda,  vf);
-    REGISTER_FILTER(HYSTERESIS,     hysteresis,     vf);
-    REGISTER_FILTER(IDET,           idet,           vf);
-    REGISTER_FILTER(IL,             il,             vf);
-    REGISTER_FILTER(INFLATE,        inflate,        vf);
-    REGISTER_FILTER(INTERLACE,      interlace,      vf);
-    REGISTER_FILTER(INTERLEAVE,     interleave,     vf);
-    REGISTER_FILTER(KERNDEINT,      kerndeint,      vf);
-    REGISTER_FILTER(LENSCORRECTION, lenscorrection, vf);
-    REGISTER_FILTER(LIBVMAF,        libvmaf,        vf);
-    REGISTER_FILTER(LIMITER,        limiter,        vf);
-    REGISTER_FILTER(LOOP,           loop,           vf);
-    REGISTER_FILTER(LUMAKEY,        lumakey,        vf);
-    REGISTER_FILTER(LUT,            lut,            vf);
-    REGISTER_FILTER(LUT2,           lut2,           vf);
-    REGISTER_FILTER(LUT3D,          lut3d,          vf);
-    REGISTER_FILTER(LUTRGB,         lutrgb,         vf);
-    REGISTER_FILTER(LUTYUV,         lutyuv,         vf);
-    REGISTER_FILTER(MASKEDCLAMP,    maskedclamp,    vf);
-    REGISTER_FILTER(MASKEDMERGE,    maskedmerge,    vf);
-    REGISTER_FILTER(MCDEINT,        mcdeint,        vf);
-    REGISTER_FILTER(MERGEPLANES,    mergeplanes,    vf);
-    REGISTER_FILTER(MESTIMATE,      mestimate,      vf);
-    REGISTER_FILTER(METADATA,       metadata,       vf);
-    REGISTER_FILTER(MIDEQUALIZER,   midequalizer,   vf);
-    REGISTER_FILTER(MINTERPOLATE,   minterpolate,   vf);
-    REGISTER_FILTER(MPDECIMATE,     mpdecimate,     vf);
-    REGISTER_FILTER(NEGATE,         negate,         vf);
-    REGISTER_FILTER(NLMEANS,        nlmeans,        vf);
-    REGISTER_FILTER(NNEDI,          nnedi,          vf);
-    REGISTER_FILTER(NOFORMAT,       noformat,       vf);
-    REGISTER_FILTER(NOISE,          noise,          vf);
-    REGISTER_FILTER(NULL,           null,           vf);
-    REGISTER_FILTER(OCR,            ocr,            vf);
-    REGISTER_FILTER(OCV,            ocv,            vf);
-    REGISTER_FILTER(OSCILLOSCOPE,   oscilloscope,   vf);
-    REGISTER_FILTER(OVERLAY,        overlay,        vf);
-    REGISTER_FILTER(OWDENOISE,      owdenoise,      vf);
-    REGISTER_FILTER(PAD,            pad,            vf);
-    REGISTER_FILTER(PALETTEGEN,     palettegen,     vf);
-    REGISTER_FILTER(PALETTEUSE,     paletteuse,     vf);
-    REGISTER_FILTER(PERMS,          perms,          vf);
-    REGISTER_FILTER(PERSPECTIVE,    perspective,    vf);
-    REGISTER_FILTER(PHASE,          phase,          vf);
-    REGISTER_FILTER(PIXDESCTEST,    pixdesctest,    vf);
-    REGISTER_FILTER(PIXSCOPE,       pixscope,       vf);
-    REGISTER_FILTER(PP,             pp,             vf);
-    REGISTER_FILTER(PP7,            pp7,            vf);
-    REGISTER_FILTER(PREMULTIPLY,    premultiply,    vf);
-    REGISTER_FILTER(PREWITT,        prewitt,        vf);
-    REGISTER_FILTER(PSEUDOCOLOR,    pseudocolor,    vf);
-    REGISTER_FILTER(PSNR,           psnr,           vf);
-    REGISTER_FILTER(PULLUP,         pullup,         vf);
-    REGISTER_FILTER(QP,             qp,             vf);
-    REGISTER_FILTER(RANDOM,         random,         vf);
-    REGISTER_FILTER(READEIA608,     readeia608,     vf);
-    REGISTER_FILTER(READVITC,       readvitc,       vf);
-    REGISTER_FILTER(REALTIME,       realtime,       vf);
-    REGISTER_FILTER(REMAP,          remap,          vf);
-    REGISTER_FILTER(REMOVEGRAIN,    removegrain,    vf);
-    REGISTER_FILTER(REMOVELOGO,     removelogo,     vf);
-    REGISTER_FILTER(REPEATFIELDS,   repeatfields,   vf);
-    REGISTER_FILTER(REVERSE,        reverse,        vf);
-    REGISTER_FILTER(ROBERTS,        roberts,        vf);
-    REGISTER_FILTER(ROTATE,         rotate,         vf);
-    REGISTER_FILTER(SAB,            sab,            vf);
-    REGISTER_FILTER(SCALE,          scale,          vf);
-    REGISTER_FILTER(SCALE_CUDA,     scale_cuda,     vf);
-    REGISTER_FILTER(SCALE_NPP,      scale_npp,      vf);
-    REGISTER_FILTER(SCALE_QSV,      scale_qsv,      vf);
-    REGISTER_FILTER(SCALE_VAAPI,    scale_vaapi,    vf);
-    REGISTER_FILTER(SCALE2REF,      scale2ref,      vf);
-    REGISTER_FILTER(SELECT,         select,         vf);
-    REGISTER_FILTER(SELECTIVECOLOR, selectivecolor, vf);
-    REGISTER_FILTER(SENDCMD,        sendcmd,        vf);
-    REGISTER_FILTER(SEPARATEFIELDS, separatefields, vf);
-    REGISTER_FILTER(SETDAR,         setdar,         vf);
-    REGISTER_FILTER(SETFIELD,       setfield,       vf);
-    REGISTER_FILTER(SETPTS,         setpts,         vf);
-    REGISTER_FILTER(SETSAR,         setsar,         vf);
-    REGISTER_FILTER(SETTB,          settb,          vf);
-    REGISTER_FILTER(SHOWINFO,       showinfo,       vf);
-    REGISTER_FILTER(SHOWPALETTE,    showpalette,    vf);
-    REGISTER_FILTER(SHUFFLEFRAMES,  shuffleframes,  vf);
-    REGISTER_FILTER(SHUFFLEPLANES,  shuffleplanes,  vf);
-    REGISTER_FILTER(SIDEDATA,       sidedata,       vf);
-    REGISTER_FILTER(SIGNALSTATS,    signalstats,    vf);
-    REGISTER_FILTER(SIGNATURE,      signature,      vf);
-    REGISTER_FILTER(SMARTBLUR,      smartblur,      vf);
-    REGISTER_FILTER(SOBEL,          sobel,          vf);
-    REGISTER_FILTER(SPLIT,          split,          vf);
-    REGISTER_FILTER(SPP,            spp,            vf);
-    REGISTER_FILTER(SSIM,           ssim,           vf);
-    REGISTER_FILTER(STEREO3D,       stereo3d,       vf);
-    REGISTER_FILTER(STREAMSELECT,   streamselect,   vf);
-    REGISTER_FILTER(SUBTITLES,      subtitles,      vf);
-    REGISTER_FILTER(SUPER2XSAI,     super2xsai,     vf);
-    REGISTER_FILTER(SWAPRECT,       swaprect,       vf);
-    REGISTER_FILTER(SWAPUV,         swapuv,         vf);
-    REGISTER_FILTER(TBLEND,         tblend,         vf);
-    REGISTER_FILTER(TELECINE,       telecine,       vf);
-    REGISTER_FILTER(THRESHOLD,      threshold,      vf);
-    REGISTER_FILTER(THUMBNAIL,      thumbnail,      vf);
-    REGISTER_FILTER(THUMBNAIL_CUDA, thumbnail_cuda, vf);
-    REGISTER_FILTER(TILE,           tile,           vf);
-    REGISTER_FILTER(TINTERLACE,     tinterlace,     vf);
-    REGISTER_FILTER(TLUT2,          tlut2,          vf);
-    REGISTER_FILTER(TONEMAP,        tonemap,        vf);
-    REGISTER_FILTER(TRANSPOSE,      transpose,      vf);
-    REGISTER_FILTER(TRIM,           trim,           vf);
-    REGISTER_FILTER(UNPREMULTIPLY,  unpremultiply,  vf);
-    REGISTER_FILTER(UNSHARP,        unsharp,        vf);
-    REGISTER_FILTER(USPP,           uspp,           vf);
-    REGISTER_FILTER(VAGUEDENOISER,  vaguedenoiser,  vf);
-    REGISTER_FILTER(VECTORSCOPE,    vectorscope,    vf);
-    REGISTER_FILTER(VFLIP,          vflip,          vf);
-    REGISTER_FILTER(VIDSTABDETECT,  vidstabdetect,  vf);
-    REGISTER_FILTER(VIDSTABTRANSFORM, vidstabtransform, vf);
-    REGISTER_FILTER(VIGNETTE,       vignette,       vf);
-    REGISTER_FILTER(VMAFMOTION,     vmafmotion,     vf);
-    REGISTER_FILTER(VSTACK,         vstack,         vf);
-    REGISTER_FILTER(W3FDIF,         w3fdif,         vf);
-    REGISTER_FILTER(WAVEFORM,       waveform,       vf);
-    REGISTER_FILTER(WEAVE,          weave,          vf);
-    REGISTER_FILTER(XBR,            xbr,            vf);
-    REGISTER_FILTER(YADIF,          yadif,          vf);
-    REGISTER_FILTER(ZMQ,            zmq,            vf);
-    REGISTER_FILTER(ZOOMPAN,        zoompan,        vf);
-    REGISTER_FILTER(ZSCALE,         zscale,         vf);
+const AVFilter *avfilter_get_by_name(const char *name)
+{
+    const AVFilter *f = NULL;
+    void *opaque = 0;
 
-    REGISTER_FILTER(ALLRGB,         allrgb,         vsrc);
-    REGISTER_FILTER(ALLYUV,         allyuv,         vsrc);
-    REGISTER_FILTER(CELLAUTO,       cellauto,       vsrc);
-    REGISTER_FILTER(COLOR,          color,          vsrc);
-    REGISTER_FILTER(COREIMAGESRC,   coreimagesrc,   vsrc);
-    REGISTER_FILTER(FREI0R,         frei0r_src,     vsrc);
-    REGISTER_FILTER(HALDCLUTSRC,    haldclutsrc,    vsrc);
-    REGISTER_FILTER(LIFE,           life,           vsrc);
-    REGISTER_FILTER(MANDELBROT,     mandelbrot,     vsrc);
-    REGISTER_FILTER(MPTESTSRC,      mptestsrc,      vsrc);
-    REGISTER_FILTER(NULLSRC,        nullsrc,        vsrc);
-    REGISTER_FILTER(RGBTESTSRC,     rgbtestsrc,     vsrc);
-    REGISTER_FILTER(SMPTEBARS,      smptebars,      vsrc);
-    REGISTER_FILTER(SMPTEHDBARS,    smptehdbars,    vsrc);
-    REGISTER_FILTER(TESTSRC,        testsrc,        vsrc);
-    REGISTER_FILTER(TESTSRC2,       testsrc2,       vsrc);
-    REGISTER_FILTER(YUVTESTSRC,     yuvtestsrc,     vsrc);
+    if (!name)
+        return NULL;
 
-    REGISTER_FILTER(NULLSINK,       nullsink,       vsink);
+    while ((f = av_filter_iterate(&opaque)))
+        if (!strcmp(f->name, name))
+            return (AVFilter *)f;
 
-    /* multimedia filters */
-    REGISTER_FILTER(ABITSCOPE,      abitscope,      avf);
-    REGISTER_FILTER(ADRAWGRAPH,     adrawgraph,     avf);
-    REGISTER_FILTER(AHISTOGRAM,     ahistogram,     avf);
-    REGISTER_FILTER(APHASEMETER,    aphasemeter,    avf);
-    REGISTER_FILTER(AVECTORSCOPE,   avectorscope,   avf);
-    REGISTER_FILTER(CONCAT,         concat,         avf);
-    REGISTER_FILTER(SHOWCQT,        showcqt,        avf);
-    REGISTER_FILTER(SHOWFREQS,      showfreqs,      avf);
-    REGISTER_FILTER(SHOWSPECTRUM,   showspectrum,   avf);
-    REGISTER_FILTER(SHOWSPECTRUMPIC, showspectrumpic, avf);
-    REGISTER_FILTER(SHOWVOLUME,     showvolume,     avf);
-    REGISTER_FILTER(SHOWWAVES,      showwaves,      avf);
-    REGISTER_FILTER(SHOWWAVESPIC,   showwavespic,   avf);
-    REGISTER_FILTER(SPECTRUMSYNTH,  spectrumsynth,  vaf);
+    return NULL;
+}
 
-    /* multimedia sources */
-    REGISTER_FILTER(AMOVIE,         amovie,         avsrc);
-    REGISTER_FILTER(MOVIE,          movie,          avsrc);
 
-    /* those filters are part of public or internal API => registered
-     * unconditionally */
-    REGISTER_FILTER_UNCONDITIONAL(asrc_abuffer);
-    REGISTER_FILTER_UNCONDITIONAL(vsrc_buffer);
-    REGISTER_FILTER_UNCONDITIONAL(asink_abuffer);
-    REGISTER_FILTER_UNCONDITIONAL(vsink_buffer);
-    REGISTER_FILTER_UNCONDITIONAL(af_afifo);
-    REGISTER_FILTER_UNCONDITIONAL(vf_fifo);
-    ff_opencl_register_filter_kernel_code_all();
+#if FF_API_NEXT
+FF_DISABLE_DEPRECATION_WARNINGS
+static AVOnce av_filter_next_init = AV_ONCE_INIT;
+
+static void av_filter_init_next(void)
+{
+    AVFilter *prev = NULL, *p;
+    void *i = 0;
+    while ((p = (AVFilter*)av_filter_iterate(&i))) {
+        if (prev)
+            prev->next = p;
+        prev = p;
+    }
 }
 
 void avfilter_register_all(void)
 {
-    static AVOnce control = AV_ONCE_INIT;
-
-    ff_thread_once(&control, register_all);
+    ff_thread_once(&av_filter_next_init, av_filter_init_next);
 }
+
+int avfilter_register(AVFilter *filter)
+{
+    ff_thread_once(&av_filter_next_init, av_filter_init_next);
+
+    return 0;
+}
+
+const AVFilter *avfilter_next(const AVFilter *prev)
+{
+    ff_thread_once(&av_filter_next_init, av_filter_init_next);
+
+    return prev ? prev->next : filter_list[0];
+}
+
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif

diff --git a/libavfilter/asrc_hilbert.c b/libavfilter/asrc_hilbert.c
new file mode 100644
index 0000000..a3a3952
--- /dev/null
+++ b/libavfilter/asrc_hilbert.c

@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "audio.h"
+#include "avfilter.h"
+#include "internal.h"
+#include "window_func.h"
+
+typedef struct HilbertContext {
+    const AVClass *class;
+
+    int sample_rate;
+    int nb_taps;
+    int nb_samples;
+    int win_func;
+
+    float *taps;
+    int64_t pts;
+} HilbertContext;
+
+#define OFFSET(x) offsetof(HilbertContext, x)
+#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption hilbert_options[] = {
+    { "sample_rate", "set sample rate",    OFFSET(sample_rate), AV_OPT_TYPE_INT, {.i64=44100},  1, INT_MAX,    FLAGS },
+    { "r",           "set sample rate",    OFFSET(sample_rate), AV_OPT_TYPE_INT, {.i64=44100},  1, INT_MAX,    FLAGS },
+    { "taps",        "set number of taps", OFFSET(nb_taps),     AV_OPT_TYPE_INT, {.i64=22051}, 11, UINT16_MAX, FLAGS },
+    { "t",           "set number of taps", OFFSET(nb_taps),     AV_OPT_TYPE_INT, {.i64=22051}, 11, UINT16_MAX, FLAGS },
+    { "nb_samples",  "set the number of samples per requested frame", OFFSET(nb_samples), AV_OPT_TYPE_INT, {.i64 = 1024}, 1, INT_MAX, FLAGS },
+    { "n",           "set the number of samples per requested frame", OFFSET(nb_samples), AV_OPT_TYPE_INT, {.i64 = 1024}, 1, INT_MAX, FLAGS },
+    { "win_func", "set window function", OFFSET(win_func), AV_OPT_TYPE_INT, {.i64=WFUNC_BLACKMAN}, 0, NB_WFUNC-1, FLAGS, "win_func" },
+    { "w",        "set window function", OFFSET(win_func), AV_OPT_TYPE_INT, {.i64=WFUNC_BLACKMAN}, 0, NB_WFUNC-1, FLAGS, "win_func" },
+        { "rect",     "Rectangular",      0, AV_OPT_TYPE_CONST, {.i64=WFUNC_RECT},     0, 0, FLAGS, "win_func" },
+        { "bartlett", "Bartlett",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BARTLETT}, 0, 0, FLAGS, "win_func" },
+        { "hanning",  "Hanning",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, FLAGS, "win_func" },
+        { "hamming",  "Hamming",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HAMMING},  0, 0, FLAGS, "win_func" },
+        { "blackman", "Blackman",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BLACKMAN}, 0, 0, FLAGS, "win_func" },
+        { "welch",    "Welch",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_WELCH},    0, 0, FLAGS, "win_func" },
+        { "flattop",  "Flat-top",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_FLATTOP},  0, 0, FLAGS, "win_func" },
+        { "bharris",  "Blackman-Harris",  0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BHARRIS},  0, 0, FLAGS, "win_func" },
+        { "bnuttall", "Blackman-Nuttall", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BNUTTALL}, 0, 0, FLAGS, "win_func" },
+        { "bhann",    "Bartlett-Hann",    0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BHANN},    0, 0, FLAGS, "win_func" },
+        { "sine",     "Sine",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_SINE},     0, 0, FLAGS, "win_func" },
+        { "nuttall",  "Nuttall",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_NUTTALL},  0, 0, FLAGS, "win_func" },
+        { "lanczos",  "Lanczos",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_LANCZOS},  0, 0, FLAGS, "win_func" },
+        { "gauss",    "Gauss",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_GAUSS},    0, 0, FLAGS, "win_func" },
+        { "tukey",    "Tukey",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_TUKEY},    0, 0, FLAGS, "win_func" },
+        { "dolph",    "Dolph-Chebyshev",  0, AV_OPT_TYPE_CONST, {.i64=WFUNC_DOLPH},    0, 0, FLAGS, "win_func" },
+        { "cauchy",   "Cauchy",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_CAUCHY},   0, 0, FLAGS, "win_func" },
+        { "parzen",   "Parzen",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_PARZEN},   0, 0, FLAGS, "win_func" },
+        { "poisson",  "Poisson",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_POISSON},  0, 0, FLAGS, "win_func" },
+    {NULL}
+};
+
+AVFILTER_DEFINE_CLASS(hilbert);
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    HilbertContext *s = ctx->priv;
+
+    if (!(s->nb_taps & 1)) {
+        av_log(s, AV_LOG_ERROR, "Number of taps %d must be odd length.\n", s->nb_taps);
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    HilbertContext *s = ctx->priv;
+
+    av_freep(&s->taps);
+}
+
+static av_cold int query_formats(AVFilterContext *ctx)
+{
+    HilbertContext *s = ctx->priv;
+    static const int64_t chlayouts[] = { AV_CH_LAYOUT_MONO, -1 };
+    int sample_rates[] = { s->sample_rate, -1 };
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_FLT,
+        AV_SAMPLE_FMT_NONE
+    };
+
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    int ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats (ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    layouts = avfilter_make_format64_list(chlayouts);
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_rates);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static av_cold int config_props(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    HilbertContext *s = ctx->priv;
+    float overlap;
+    int i;
+
+    s->taps = av_malloc_array(s->nb_taps, sizeof(*s->taps));
+    if (!s->taps)
+        return AVERROR(ENOMEM);
+
+    generate_window_func(s->taps, s->nb_taps, s->win_func, &overlap);
+
+    for (i = 0; i < s->nb_taps; i++) {
+        int k = -(s->nb_taps / 2) + i;
+
+        if (k & 1) {
+            float pk = M_PI * k;
+
+            s->taps[i] *= (1.f - cosf(pk)) / pk;
+        } else {
+            s->taps[i] = 0.f;
+        }
+    }
+
+    s->pts = 0;
+
+    return 0;
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    HilbertContext *s = ctx->priv;
+    AVFrame *frame;
+    int nb_samples;
+
+    nb_samples = FFMIN(s->nb_samples, s->nb_taps - s->pts);
+    if (!nb_samples)
+        return AVERROR_EOF;
+
+    if (!(frame = ff_get_audio_buffer(outlink, nb_samples)))
+        return AVERROR(ENOMEM);
+
+    memcpy(frame->data[0], s->taps + s->pts, nb_samples * sizeof(float));
+
+    frame->pts = s->pts;
+    s->pts    += nb_samples;
+    return ff_filter_frame(outlink, frame);
+}
+
+static const AVFilterPad hilbert_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .request_frame = request_frame,
+        .config_props  = config_props,
+    },
+    { NULL }
+};
+
+AVFilter ff_asrc_hilbert = {
+    .name          = "hilbert",
+    .description   = NULL_IF_CONFIG_SMALL("Generate a Hilbert transform FIR coefficients."),
+    .query_formats = query_formats,
+    .init          = init,
+    .uninit        = uninit,
+    .priv_size     = sizeof(HilbertContext),
+    .inputs        = NULL,
+    .outputs       = hilbert_outputs,
+    .priv_class    = &hilbert_class,
+};

diff --git a/libavfilter/avf_aphasemeter.c b/libavfilter/avf_aphasemeter.c
index 8cdee94..ed83705 100644
--- a/libavfilter/avf_aphasemeter.c
+++ b/libavfilter/avf_aphasemeter.c

@@ -233,6 +233,7 @@
 {
     AudioPhaseMeterContext *s = ctx->priv;
     AVFilterPad pad;
+    int ret;
 
     pad = (AVFilterPad){
         .name         = av_strdup("out0"),
@@ -240,7 +241,11 @@
     };
     if (!pad.name)
         return AVERROR(ENOMEM);
-    ff_insert_outpad(ctx, 0, &pad);
+    ret = ff_insert_outpad(ctx, 0, &pad);
+    if (ret < 0) {
+        av_freep(&pad.name);
+        return ret;
+    }
 
     if (s->do_video) {
         pad = (AVFilterPad){
@@ -250,7 +255,11 @@
         };
         if (!pad.name)
             return AVERROR(ENOMEM);
-        ff_insert_outpad(ctx, 1, &pad);
+        ret = ff_insert_outpad(ctx, 1, &pad);
+        if (ret < 0) {
+            av_freep(&pad.name);
+            return ret;
+        }
     }
 
     return 0;

diff --git a/libavfilter/avf_avectorscope.c b/libavfilter/avf_avectorscope.c
index c8ff069..af9f1da 100644
--- a/libavfilter/avf_avectorscope.c
+++ b/libavfilter/avf_avectorscope.c

@@ -65,6 +65,8 @@
     int contrast[4];
     int fade[4];
     double zoom;
+    int swap;
+    int mirror;
     unsigned prev_x, prev_y;
     AVRational frame_rate;
 } AudioVectorScopeContext;
@@ -99,6 +101,12 @@
     { "sqrt",  "square root", 0, AV_OPT_TYPE_CONST, {.i64=SQRT}, 0, 0, FLAGS, "scale" },
     { "cbrt",  "cube root",   0, AV_OPT_TYPE_CONST, {.i64=CBRT}, 0, 0, FLAGS, "scale" },
     { "log",   "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=LOG},  0, 0, FLAGS, "scale" },
+    { "swap", "swap x axis with y axis", OFFSET(swap), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
+    { "mirror", "mirror axis", OFFSET(mirror), AV_OPT_TYPE_INT, {.i64=0}, 0, 3, FLAGS, "mirror" },
+    { "none",  "no mirror", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "mirror" },
+    { "x",  "mirror x",     0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "mirror" },
+    { "y",  "mirror y",     0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "mirror" },
+    { "xy", "mirror both",  0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "mirror" },
     { NULL }
 };
 
@@ -316,6 +324,15 @@
             break;
         }
 
+        if (s->mirror & 1)
+            src[0] = -src[0];
+
+        if (s->mirror & 2)
+            src[1] = -src[1];
+
+        if (s->swap)
+            FFSWAP(float, src[0], src[1]);
+
         if (s->mode == LISSAJOUS) {
             x = ((src[1] - src[0]) * zoom / 2 + 1) * hw;
             y = (1.0 - (src[0] + src[1]) * zoom / 2) * hh;

diff --git a/libavfilter/avf_concat.c b/libavfilter/avf_concat.c
index 6198a33..1d0c2de 100644
--- a/libavfilter/avf_concat.c
+++ b/libavfilter/avf_concat.c

@@ -28,8 +28,7 @@
 #include "libavutil/channel_layout.h"
 #include "libavutil/opt.h"
 #include "avfilter.h"
-#define FF_BUFQUEUE_SIZE 256
-#include "bufferqueue.h"
+#include "filters.h"
 #include "internal.h"
 #include "video.h"
 #include "audio.h"
@@ -48,7 +47,6 @@
         int64_t pts;
         int64_t nb_frames;
         unsigned eof;
-        struct FFBufQueue queue;
     } *in;
 } ConcatContext;
 
@@ -185,24 +183,6 @@
     return ff_filter_frame(outlink, buf);
 }
 
-static int process_frame(AVFilterLink *inlink, AVFrame *buf)
-{
-    AVFilterContext *ctx  = inlink->dst;
-    ConcatContext *cat    = ctx->priv;
-    unsigned in_no = FF_INLINK_IDX(inlink);
-
-    if (in_no < cat->cur_idx) {
-        av_log(ctx, AV_LOG_ERROR, "Frame after EOF on input %s\n",
-               ctx->input_pads[in_no].name);
-        av_frame_free(&buf);
-    } else if (in_no >= cat->cur_idx + ctx->nb_outputs) {
-        ff_bufqueue_add(ctx, &cat->in[in_no].queue, buf);
-    } else {
-        return push_frame(ctx, in_no, buf);
-    }
-    return 0;
-}
-
 static AVFrame *get_video_buffer(AVFilterLink *inlink, int w, int h)
 {
     AVFilterContext *ctx = inlink->dst;
@@ -221,11 +201,6 @@
     return ff_get_audio_buffer(outlink, nb_samples);
 }
 
-static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
-{
-    return process_frame(inlink, buf);
-}
-
 static void close_input(AVFilterContext *ctx, unsigned in_no)
 {
     ConcatContext *cat = ctx->priv;
@@ -306,57 +281,10 @@
             if (ret < 0)
                 return ret;
         }
-        /* flush queued buffers */
-        /* possible enhancement: flush in PTS order */
-        str_max = cat->cur_idx + ctx->nb_outputs;
-        for (str = cat->cur_idx; str < str_max; str++) {
-            while (cat->in[str].queue.available) {
-                ret = push_frame(ctx, str, ff_bufqueue_get(&cat->in[str].queue));
-                if (ret < 0)
-                    return ret;
-            }
-        }
     }
     return 0;
 }
 
-static int request_frame(AVFilterLink *outlink)
-{
-    AVFilterContext *ctx = outlink->src;
-    ConcatContext *cat   = ctx->priv;
-    unsigned out_no = FF_OUTLINK_IDX(outlink);
-    unsigned in_no  = out_no + cat->cur_idx;
-    unsigned str, str_max;
-    int ret;
-
-    while (1) {
-        if (in_no >= ctx->nb_inputs)
-            return AVERROR_EOF;
-        if (!cat->in[in_no].eof) {
-            ret = ff_request_frame(ctx->inputs[in_no]);
-            if (ret != AVERROR_EOF)
-                return ret;
-            close_input(ctx, in_no);
-        }
-        /* cycle on all inputs to finish the segment */
-        /* possible enhancement: request in PTS order */
-        str_max = cat->cur_idx + ctx->nb_outputs - 1;
-        for (str = cat->cur_idx; cat->nb_in_active;
-             str = str == str_max ? cat->cur_idx : str + 1) {
-            if (cat->in[str].eof)
-                continue;
-            ret = ff_request_frame(ctx->inputs[str]);
-            if (ret != AVERROR_EOF)
-                return ret;
-            close_input(ctx, str);
-        }
-        ret = flush_segment(ctx);
-        if (ret < 0)
-            return ret;
-        in_no += ctx->nb_outputs;
-    }
-}
-
 static av_cold int init(AVFilterContext *ctx)
 {
     ConcatContext *cat = ctx->priv;
@@ -371,7 +299,6 @@
                     .type             = type,
                     .get_video_buffer = get_video_buffer,
                     .get_audio_buffer = get_audio_buffer,
-                    .filter_frame     = filter_frame,
                 };
                 pad.name = av_asprintf("in%d:%c%d", seg, "va"[type], str);
                 if ((ret = ff_insert_inpad(ctx, ctx->nb_inputs, &pad)) < 0) {
@@ -387,7 +314,6 @@
             AVFilterPad pad = {
                 .type          = type,
                 .config_props  = config_output,
-                .request_frame = request_frame,
             };
             pad.name = av_asprintf("out:%c%d", "va"[type], str);
             if ((ret = ff_insert_outpad(ctx, ctx->nb_outputs, &pad)) < 0) {
@@ -409,24 +335,112 @@
     ConcatContext *cat = ctx->priv;
     unsigned i;
 
-    for (i = 0; i < ctx->nb_inputs; i++) {
+    for (i = 0; i < ctx->nb_inputs; i++)
         av_freep(&ctx->input_pads[i].name);
-        ff_bufqueue_discard_all(&cat->in[i].queue);
-    }
     for (i = 0; i < ctx->nb_outputs; i++)
         av_freep(&ctx->output_pads[i].name);
     av_freep(&cat->in);
 }
 
+static int activate(AVFilterContext *ctx)
+{
+    ConcatContext *cat = ctx->priv;
+    AVFrame *frame;
+    unsigned i, j;
+    int ret, status;
+    int64_t pts;
+
+    /* Forward status back */
+    for (i = 0; i < ctx->nb_outputs; i++) {
+        status = ff_outlink_get_status(ctx->outputs[i]);
+        if (!status)
+            continue;
+        for (j = i; j < ctx->nb_inputs; j += ctx->nb_outputs) {
+            if (!cat->in[j].eof) {
+                cat->in[j].eof = 1;
+                ff_inlink_set_status(ctx->inputs[j], status);
+                return 0;
+            }
+        }
+
+    }
+
+    /* Forward available frames */
+    if (cat->cur_idx < ctx->nb_inputs) {
+        for (i = 0; i < ctx->nb_outputs; i++) {
+            ret = ff_inlink_consume_frame(ctx->inputs[cat->cur_idx + i], &frame);
+            if (ret < 0)
+                return ret;
+            if (ret) {
+                ff_filter_set_ready(ctx, 10);
+                return push_frame(ctx, cat->cur_idx + i, frame);
+            }
+        }
+    }
+
+    /* Forward status change */
+    if (cat->cur_idx < ctx->nb_inputs) {
+        for (i = 0; i < ctx->nb_outputs; i++) {
+            ret = ff_inlink_acknowledge_status(ctx->inputs[cat->cur_idx + i], &status, &pts);
+            /* TODO use pts */
+            if (ret > 0) {
+                close_input(ctx, cat->cur_idx + i);
+                if (cat->cur_idx + ctx->nb_outputs >= ctx->nb_inputs) {
+                    ff_outlink_set_status(ctx->outputs[i], status, pts);
+                }
+                if (!cat->nb_in_active) {
+                    ret = flush_segment(ctx);
+                    if (ret < 0)
+                        return ret;
+                }
+                ff_filter_set_ready(ctx, 10);
+                return 0;
+            }
+        }
+    }
+
+    ret = FFERROR_NOT_READY;
+    for (i = 0; i < ctx->nb_outputs; i++) {
+        if (ff_outlink_frame_wanted(ctx->outputs[i])) {
+            if (cat->in[cat->cur_idx + i].eof) {
+                for (j = 0; j < ctx->nb_outputs; j++)
+                    if (!cat->in[cat->cur_idx + j].eof)
+                        ff_inlink_request_frame(ctx->inputs[cat->cur_idx + j]);
+                return 0;
+            } else {
+                ff_inlink_request_frame(ctx->inputs[cat->cur_idx + i]);
+                ret = 0;
+            }
+        }
+    }
+
+    return ret;
+}
+
+static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
+                           char *res, int res_len, int flags)
+{
+    int ret = AVERROR(ENOSYS);
+
+    if (!strcmp(cmd, "next")) {
+        av_log(ctx, AV_LOG_VERBOSE, "Command received: next\n");
+        return flush_segment(ctx);
+    }
+
+    return ret;
+}
+
 AVFilter ff_avf_concat = {
     .name          = "concat",
     .description   = NULL_IF_CONFIG_SMALL("Concatenate audio and video streams."),
     .init          = init,
     .uninit        = uninit,
     .query_formats = query_formats,
+    .activate      = activate,
     .priv_size     = sizeof(ConcatContext),
     .inputs        = NULL,
     .outputs       = NULL,
     .priv_class    = &concat_class,
     .flags         = AVFILTER_FLAG_DYNAMIC_INPUTS | AVFILTER_FLAG_DYNAMIC_OUTPUTS,
+    .process_command = process_command,
 };

diff --git a/libavfilter/avf_showspectrum.c b/libavfilter/avf_showspectrum.c
index 956f62f..41693a0 100644
--- a/libavfilter/avf_showspectrum.c
+++ b/libavfilter/avf_showspectrum.c

@@ -34,23 +34,28 @@
 #include "libavutil/avstring.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
 #include "libavutil/xga_font_data.h"
 #include "audio.h"
 #include "video.h"
 #include "avfilter.h"
+#include "filters.h"
 #include "internal.h"
 #include "window_func.h"
 
 enum DisplayMode  { COMBINED, SEPARATE, NB_MODES };
 enum DataMode     { D_MAGNITUDE, D_PHASE, NB_DMODES };
 enum DisplayScale { LINEAR, SQRT, CBRT, LOG, FOURTHRT, FIFTHRT, NB_SCALES };
-enum ColorMode    { CHANNEL, INTENSITY, RAINBOW, MORELAND, NEBULAE, FIRE, FIERY, FRUIT, COOL, NB_CLMODES };
+enum ColorMode    { CHANNEL, INTENSITY, RAINBOW, MORELAND, NEBULAE, FIRE, FIERY, FRUIT, COOL, MAGMA, GREEN, NB_CLMODES };
 enum SlideMode    { REPLACE, SCROLL, FULLFRAME, RSCROLL, NB_SLIDES };
 enum Orientation  { VERTICAL, HORIZONTAL, NB_ORIENTATIONS };
 
 typedef struct ShowSpectrumContext {
     const AVClass *class;
     int w, h;
+    char *rate_str;
+    AVRational auto_frame_rate;
+    AVRational frame_rate;
     AVFrame *outpicref;
     int nb_display_channels;
     int orientation;
@@ -62,24 +67,31 @@
     int scale;
     float saturation;           ///< color saturation multiplier
     float rotation;             ///< color rotation
+    int start, stop;            ///< zoom mode
     int data;
     int xpos;                   ///< x position (current column)
     FFTContext **fft;           ///< Fast Fourier Transform context
+    FFTContext **ifft;          ///< Inverse Fast Fourier Transform context
     int fft_bits;               ///< number of bits (FFT window size = 1<<fft_bits)
     FFTComplex **fft_data;      ///< bins holder for each (displayed) channels
+    FFTComplex **fft_scratch;   ///< scratch buffers
     float *window_func_lut;     ///< Window function LUT
     float **magnitudes;
     float **phases;
     int win_func;
     int win_size;
+    int buf_size;
     double win_scale;
     float overlap;
     float gain;
+    int consumed;
     int hop_size;
     float *combine_buffer;      ///< color combining buffer (3 * h items)
     float **color_buffer;       ///< color buffer (3 * h * ch items)
     AVAudioFifo *fifo;
     int64_t pts;
+    int64_t old_pts;
+    int old_len;
     int single_pic;
     int legend;
     int start_x, start_y;
@@ -109,6 +121,8 @@
         { "fiery",     "fiery based coloring",            0, AV_OPT_TYPE_CONST, {.i64=FIERY},     0, 0, FLAGS, "color" },
         { "fruit",     "fruit based coloring",            0, AV_OPT_TYPE_CONST, {.i64=FRUIT},     0, 0, FLAGS, "color" },
         { "cool",      "cool based coloring",             0, AV_OPT_TYPE_CONST, {.i64=COOL},      0, 0, FLAGS, "color" },
+        { "magma",     "magma based coloring",            0, AV_OPT_TYPE_CONST, {.i64=MAGMA},     0, 0, FLAGS, "color" },
+        { "green",     "green based coloring",            0, AV_OPT_TYPE_CONST, {.i64=GREEN},     0, 0, FLAGS, "color" },
     { "scale", "set display scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64=SQRT}, LINEAR, NB_SCALES-1, FLAGS, "scale" },
         { "lin",  "linear",      0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, FLAGS, "scale" },
         { "sqrt", "square root", 0, AV_OPT_TYPE_CONST, {.i64=SQRT},   0, 0, FLAGS, "scale" },
@@ -147,6 +161,10 @@
         { "magnitude", NULL, 0, AV_OPT_TYPE_CONST, {.i64=D_MAGNITUDE}, 0, 0, FLAGS, "data" },
         { "phase",     NULL, 0, AV_OPT_TYPE_CONST, {.i64=D_PHASE},     0, 0, FLAGS, "data" },
     { "rotation", "color rotation", OFFSET(rotation), AV_OPT_TYPE_FLOAT, {.dbl = 0}, -1, 1, FLAGS },
+    { "start", "start frequency", OFFSET(start), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT32_MAX, FLAGS },
+    { "stop",  "stop frequency",  OFFSET(stop),  AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT32_MAX, FLAGS },
+    { "fps",   "set video rate",  OFFSET(rate_str), AV_OPT_TYPE_STRING, {.str = "auto"}, 0, 0, FLAGS },
+    { "legend", "draw legend", OFFSET(legend), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
     { NULL }
 };
 
@@ -222,6 +240,19 @@
     {    0,                  0,                  0,                   0 },
     {  .15,                  0,                 .5,                 -.5 },
     {    1,                  1,                -.5,                  .5 }},
+    [MAGMA] = {
+    {    0,                  0,                  0,                   0 },
+    { 0.10,            23/256.,     (175-128)/256.,      (120-128)/256. },
+    { 0.23,            43/256.,     (158-128)/256.,      (144-128)/256. },
+    { 0.35,            85/256.,     (138-128)/256.,      (179-128)/256. },
+    { 0.48,            96/256.,     (128-128)/256.,      (189-128)/256. },
+    { 0.64,           128/256.,     (103-128)/256.,      (214-128)/256. },
+    { 0.78,           167/256.,      (85-128)/256.,      (174-128)/256. },
+    {    1,           205/256.,      (80-128)/256.,      (152-128)/256. }},
+    [GREEN] = {
+    {    0,                  0,                  0,                   0 },
+    {  .75,                 .5,                  0,                 -.5 },
+    {    1,                  1,                  0,                   0 }},
 };
 
 static av_cold void uninit(AVFilterContext *ctx)
@@ -235,11 +266,21 @@
             av_fft_end(s->fft[i]);
     }
     av_freep(&s->fft);
+    if (s->ifft) {
+        for (i = 0; i < s->nb_display_channels; i++)
+            av_fft_end(s->ifft[i]);
+    }
+    av_freep(&s->ifft);
     if (s->fft_data) {
         for (i = 0; i < s->nb_display_channels; i++)
             av_freep(&s->fft_data[i]);
     }
     av_freep(&s->fft_data);
+    if (s->fft_scratch) {
+        for (i = 0; i < s->nb_display_channels; i++)
+            av_freep(&s->fft_scratch[i]);
+    }
+    av_freep(&s->fft_scratch);
     if (s->color_buffer) {
         for (i = 0; i < s->nb_display_channels; i++)
             av_freep(&s->color_buffer[i]);
@@ -291,180 +332,10 @@
     return 0;
 }
 
-static int config_output(AVFilterLink *outlink)
-{
-    AVFilterContext *ctx = outlink->src;
-    AVFilterLink *inlink = ctx->inputs[0];
-    ShowSpectrumContext *s = ctx->priv;
-    int i, fft_bits, h, w;
-    float overlap;
-
-    s->pts = AV_NOPTS_VALUE;
-
-    if (!strcmp(ctx->filter->name, "showspectrumpic"))
-        s->single_pic = 1;
-
-    outlink->w = s->w;
-    outlink->h = s->h;
-    outlink->sample_aspect_ratio = (AVRational){1,1};
-
-    if (s->legend) {
-        s->start_x = log10(inlink->sample_rate) * 25;
-        s->start_y = 64;
-        outlink->w += s->start_x * 2;
-        outlink->h += s->start_y * 2;
-    }
-
-    h = (s->mode == COMBINED || s->orientation == HORIZONTAL) ? s->h : s->h / inlink->channels;
-    w = (s->mode == COMBINED || s->orientation == VERTICAL)   ? s->w : s->w / inlink->channels;
-    s->channel_height = h;
-    s->channel_width  = w;
-
-    if (s->orientation == VERTICAL) {
-        /* FFT window size (precision) according to the requested output frame height */
-        for (fft_bits = 1; 1 << fft_bits < 2 * h; fft_bits++);
-    } else {
-        /* FFT window size (precision) according to the requested output frame width */
-        for (fft_bits = 1; 1 << fft_bits < 2 * w; fft_bits++);
-    }
-    s->win_size = 1 << fft_bits;
-
-    if (!s->fft) {
-        s->fft = av_calloc(inlink->channels, sizeof(*s->fft));
-        if (!s->fft)
-            return AVERROR(ENOMEM);
-    }
-
-    /* (re-)configuration if the video output changed (or first init) */
-    if (fft_bits != s->fft_bits) {
-        AVFrame *outpicref;
-
-        s->fft_bits = fft_bits;
-
-        /* FFT buffers: x2 for each (display) channel buffer.
-         * Note: we use free and malloc instead of a realloc-like function to
-         * make sure the buffer is aligned in memory for the FFT functions. */
-        for (i = 0; i < s->nb_display_channels; i++) {
-            av_fft_end(s->fft[i]);
-            av_freep(&s->fft_data[i]);
-        }
-        av_freep(&s->fft_data);
-
-        s->nb_display_channels = inlink->channels;
-        for (i = 0; i < s->nb_display_channels; i++) {
-            s->fft[i] = av_fft_init(fft_bits, 0);
-            if (!s->fft[i]) {
-                av_log(ctx, AV_LOG_ERROR, "Unable to create FFT context. "
-                       "The window size might be too high.\n");
-                return AVERROR(EINVAL);
-            }
-        }
-
-        s->magnitudes = av_calloc(s->nb_display_channels, sizeof(*s->magnitudes));
-        if (!s->magnitudes)
-            return AVERROR(ENOMEM);
-        for (i = 0; i < s->nb_display_channels; i++) {
-            s->magnitudes[i] = av_calloc(s->orientation == VERTICAL ? s->h : s->w, sizeof(**s->magnitudes));
-            if (!s->magnitudes[i])
-                return AVERROR(ENOMEM);
-        }
-
-        s->phases = av_calloc(s->nb_display_channels, sizeof(*s->phases));
-        if (!s->phases)
-            return AVERROR(ENOMEM);
-        for (i = 0; i < s->nb_display_channels; i++) {
-            s->phases[i] = av_calloc(s->orientation == VERTICAL ? s->h : s->w, sizeof(**s->phases));
-            if (!s->phases[i])
-                return AVERROR(ENOMEM);
-        }
-
-        av_freep(&s->color_buffer);
-        s->color_buffer = av_calloc(s->nb_display_channels, sizeof(*s->color_buffer));
-        if (!s->color_buffer)
-            return AVERROR(ENOMEM);
-        for (i = 0; i < s->nb_display_channels; i++) {
-            s->color_buffer[i] = av_calloc(s->orientation == VERTICAL ? s->h * 3 : s->w * 3, sizeof(**s->color_buffer));
-            if (!s->color_buffer[i])
-                return AVERROR(ENOMEM);
-        }
-
-        s->fft_data = av_calloc(s->nb_display_channels, sizeof(*s->fft_data));
-        if (!s->fft_data)
-            return AVERROR(ENOMEM);
-        for (i = 0; i < s->nb_display_channels; i++) {
-            s->fft_data[i] = av_calloc(s->win_size, sizeof(**s->fft_data));
-            if (!s->fft_data[i])
-                return AVERROR(ENOMEM);
-        }
-
-        /* pre-calc windowing function */
-        s->window_func_lut =
-            av_realloc_f(s->window_func_lut, s->win_size,
-                         sizeof(*s->window_func_lut));
-        if (!s->window_func_lut)
-            return AVERROR(ENOMEM);
-        generate_window_func(s->window_func_lut, s->win_size, s->win_func, &overlap);
-        if (s->overlap == 1)
-            s->overlap = overlap;
-        s->hop_size = (1. - s->overlap) * s->win_size;
-        if (s->hop_size < 1) {
-            av_log(ctx, AV_LOG_ERROR, "overlap %f too big\n", s->overlap);
-            return AVERROR(EINVAL);
-        }
-
-        for (s->win_scale = 0, i = 0; i < s->win_size; i++) {
-            s->win_scale += s->window_func_lut[i] * s->window_func_lut[i];
-        }
-        s->win_scale = 1. / sqrt(s->win_scale);
-
-        /* prepare the initial picref buffer (black frame) */
-        av_frame_free(&s->outpicref);
-        s->outpicref = outpicref =
-            ff_get_video_buffer(outlink, outlink->w, outlink->h);
-        if (!outpicref)
-            return AVERROR(ENOMEM);
-        outpicref->sample_aspect_ratio = (AVRational){1,1};
-        for (i = 0; i < outlink->h; i++) {
-            memset(outpicref->data[0] + i * outpicref->linesize[0],   0, outlink->w);
-            memset(outpicref->data[1] + i * outpicref->linesize[1], 128, outlink->w);
-            memset(outpicref->data[2] + i * outpicref->linesize[2], 128, outlink->w);
-        }
-        outpicref->color_range = AVCOL_RANGE_JPEG;
-    }
-
-    if ((s->orientation == VERTICAL   && s->xpos >= s->w) ||
-        (s->orientation == HORIZONTAL && s->xpos >= s->h))
-        s->xpos = 0;
-
-    outlink->frame_rate = av_make_q(inlink->sample_rate, s->win_size * (1.-s->overlap));
-    if (s->orientation == VERTICAL && s->sliding == FULLFRAME)
-        outlink->frame_rate.den *= s->w;
-    if (s->orientation == HORIZONTAL && s->sliding == FULLFRAME)
-        outlink->frame_rate.den *= s->h;
-
-    if (s->orientation == VERTICAL) {
-        s->combine_buffer =
-            av_realloc_f(s->combine_buffer, s->h * 3,
-                         sizeof(*s->combine_buffer));
-    } else {
-        s->combine_buffer =
-            av_realloc_f(s->combine_buffer, s->w * 3,
-                         sizeof(*s->combine_buffer));
-    }
-
-    av_log(ctx, AV_LOG_VERBOSE, "s:%dx%d FFT window size:%d\n",
-           s->w, s->h, s->win_size);
-
-    av_audio_fifo_free(s->fifo);
-    s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->win_size);
-    if (!s->fifo)
-        return AVERROR(ENOMEM);
-    return 0;
-}
-
 static int run_channel_fft(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
     ShowSpectrumContext *s = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
     const float *window_func_lut = s->window_func_lut;
     AVFrame *fin = arg;
     const int ch = jobnr;
@@ -478,70 +349,120 @@
         s->fft_data[ch][n].im = 0;
     }
 
-    /* run FFT on each samples set */
-    av_fft_permute(s->fft[ch], s->fft_data[ch]);
-    av_fft_calc(s->fft[ch], s->fft_data[ch]);
+    if (s->stop) {
+        double theta, phi, psi, a, b, S, c;
+        FFTComplex *g = s->fft_data[ch];
+        FFTComplex *h = s->fft_scratch[ch];
+        int L = s->buf_size;
+        int N = s->win_size;
+        int M = s->win_size / 2;
 
-    return 0;
-}
+        phi = 2.0 * M_PI * (s->stop - s->start) / (double)inlink->sample_rate / (M - 1);
+        theta = 2.0 * M_PI * s->start / (double)inlink->sample_rate;
 
-#define RE(y, ch) s->fft_data[ch][y].re
-#define IM(y, ch) s->fft_data[ch][y].im
-#define MAGNITUDE(y, ch) hypot(RE(y, ch), IM(y, ch))
-#define PHASE(y, ch) atan2(IM(y, ch), RE(y, ch))
+        for (int n = 0; n < M; n++) {
+            h[n].re = cos(n * n / 2.0 * phi);
+            h[n].im = sin(n * n / 2.0 * phi);
+        }
 
-static int calc_channel_magnitudes(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-{
-    ShowSpectrumContext *s = ctx->priv;
-    const double w = s->win_scale * (s->scale == LOG ? s->win_scale : 1);
-    int y, h = s->orientation == VERTICAL ? s->h : s->w;
-    const float f = s->gain * w;
-    const int ch = jobnr;
-    float *magnitudes = s->magnitudes[ch];
+        for (int n = M; n < L; n++) {
+            h[n].re = 0.0;
+            h[n].im = 0.0;
+        }
 
-    for (y = 0; y < h; y++)
-        magnitudes[y] = MAGNITUDE(y, ch) * f;
+        for (int n = L - N; n < L; n++) {
+            h[n].re = cos((L - n) * (L - n) / 2.0 * phi);
+            h[n].im = sin((L - n) * (L - n) / 2.0 * phi);
+        }
 
-    return 0;
-}
+        for (int n = 0; n < N; n++) {
+            g[n].re = s->fft_data[ch][n].re;
+            g[n].im = s->fft_data[ch][n].im;
+        }
 
-static int calc_channel_phases(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-{
-    ShowSpectrumContext *s = ctx->priv;
-    const int h = s->orientation == VERTICAL ? s->h : s->w;
-    const int ch = jobnr;
-    float *phases = s->phases[ch];
-    int y;
+        for (int n = N; n < L; n++) {
+            g[n].re = 0.;
+            g[n].im = 0.;
+        }
 
-    for (y = 0; y < h; y++)
-        phases[y] = (PHASE(y, ch) / M_PI + 1) / 2;
+        for (int n = 0; n < N; n++) {
+            psi = n * theta + n * n / 2.0 * phi;
+            c =  cos(psi);
+            S = -sin(psi);
+            a = c * g[n].re - S * g[n].im;
+            b = S * g[n].re + c * g[n].im;
+            g[n].re = a;
+            g[n].im = b;
+        }
 
-    return 0;
-}
+        av_fft_permute(s->fft[ch], h);
+        av_fft_calc(s->fft[ch], h);
 
-static void acalc_magnitudes(ShowSpectrumContext *s)
-{
-    const double w = s->win_scale * (s->scale == LOG ? s->win_scale : 1);
-    int ch, y, h = s->orientation == VERTICAL ? s->h : s->w;
-    const float f = s->gain * w;
+        av_fft_permute(s->fft[ch], g);
+        av_fft_calc(s->fft[ch], g);
 
-    for (ch = 0; ch < s->nb_display_channels; ch++) {
-        float *magnitudes = s->magnitudes[ch];
+        for (int n = 0; n < L; n++) {
+            c = g[n].re;
+            S = g[n].im;
+            a = c * h[n].re - S * h[n].im;
+            b = S * h[n].re + c * h[n].im;
 
-        for (y = 0; y < h; y++)
-            magnitudes[y] += MAGNITUDE(y, ch) * f;
+            g[n].re = a / L;
+            g[n].im = b / L;
+        }
+
+        av_fft_permute(s->ifft[ch], g);
+        av_fft_calc(s->ifft[ch], g);
+
+        for (int k = 0; k < M; k++) {
+            psi = k * k / 2.0 * phi;
+            c =  cos(psi);
+            S = -sin(psi);
+            a = c * g[k].re - S * g[k].im;
+            b = S * g[k].re + c * g[k].im;
+            s->fft_data[ch][k].re = a;
+            s->fft_data[ch][k].im = b;
+        }
+    } else {
+        /* run FFT on each samples set */
+        av_fft_permute(s->fft[ch], s->fft_data[ch]);
+        av_fft_calc(s->fft[ch], s->fft_data[ch]);
     }
+
+    return 0;
 }
 
-static void scale_magnitudes(ShowSpectrumContext *s, float scale)
+static void drawtext(AVFrame *pic, int x, int y, const char *txt, int o)
 {
-    int ch, y, h = s->orientation == VERTICAL ? s->h : s->w;
+    const uint8_t *font;
+    int font_height;
+    int i;
 
-    for (ch = 0; ch < s->nb_display_channels; ch++) {
-        float *magnitudes = s->magnitudes[ch];
+    font = avpriv_cga_font,   font_height =  8;
 
-        for (y = 0; y < h; y++)
-            magnitudes[y] *= scale;
+    for (i = 0; txt[i]; i++) {
+        int char_y, mask;
+
+        if (o) {
+            for (char_y = font_height - 1; char_y >= 0; char_y--) {
+                uint8_t *p = pic->data[0] + (y + i * 10) * pic->linesize[0] + x;
+                for (mask = 0x80; mask; mask >>= 1) {
+                    if (font[txt[i] * font_height + font_height - 1 - char_y] & mask)
+                        p[char_y] = ~p[char_y];
+                    p += pic->linesize[0];
+                }
+            }
+        } else {
+            uint8_t *p = pic->data[0] + y*pic->linesize[0] + (x + i*8);
+            for (char_y = 0; char_y < font_height; char_y++) {
+                for (mask = 0x80; mask; mask >>= 1) {
+                    if (font[txt[i] * font_height + char_y] & mask)
+                        *p = ~(*p);
+                    p++;
+                }
+                p += pic->linesize[0] - 8;
+            }
+        }
     }
 }
 
@@ -560,6 +481,8 @@
         case FIERY:
         case FRUIT:
         case COOL:
+        case GREEN:
+        case MAGMA:
         case INTENSITY:
             *uf = *yf;
             *vf = *yf;
@@ -645,6 +568,486 @@
     }
 }
 
+static char *get_time(AVFilterContext *ctx, float seconds, int x)
+{
+    char *units;
+
+    if (x == 0)
+        units = av_asprintf("0");
+    else if (log10(seconds) > 6)
+        units = av_asprintf("%.2fh", seconds / (60 * 60));
+    else if (log10(seconds) > 3)
+        units = av_asprintf("%.2fm", seconds / 60);
+    else
+        units = av_asprintf("%.2fs", seconds);
+    return units;
+}
+
+static int draw_legend(AVFilterContext *ctx, int samples)
+{
+    ShowSpectrumContext *s = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    int ch, y, x = 0, sz = s->orientation == VERTICAL ? s->w : s->h;
+    int multi = (s->mode == SEPARATE && s->color_mode == CHANNEL);
+    float spp = samples / (float)sz;
+    char *text;
+    uint8_t *dst;
+    char chlayout_str[128];
+
+    av_get_channel_layout_string(chlayout_str, sizeof(chlayout_str), inlink->channels,
+                                 inlink->channel_layout);
+
+    text = av_asprintf("%d Hz | %s", inlink->sample_rate, chlayout_str);
+
+    drawtext(s->outpicref, 2, outlink->h - 10, "CREATED BY LIBAVFILTER", 0);
+    drawtext(s->outpicref, outlink->w - 2 - strlen(text) * 10, outlink->h - 10, text, 0);
+    if (s->stop) {
+        char *text = av_asprintf("Zoom: %d Hz - %d Hz", s->start, s->stop);
+        drawtext(s->outpicref, outlink->w - 2 - strlen(text) * 10, 3, text, 0);
+        av_freep(&text);
+    }
+
+    av_freep(&text);
+
+    dst = s->outpicref->data[0] + (s->start_y - 1) * s->outpicref->linesize[0] + s->start_x - 1;
+    for (x = 0; x < s->w + 1; x++)
+        dst[x] = 200;
+    dst = s->outpicref->data[0] + (s->start_y + s->h) * s->outpicref->linesize[0] + s->start_x - 1;
+    for (x = 0; x < s->w + 1; x++)
+        dst[x] = 200;
+    for (y = 0; y < s->h + 2; y++) {
+        dst = s->outpicref->data[0] + (y + s->start_y - 1) * s->outpicref->linesize[0];
+        dst[s->start_x - 1] = 200;
+        dst[s->start_x + s->w] = 200;
+    }
+    if (s->orientation == VERTICAL) {
+        int h = s->mode == SEPARATE ? s->h / s->nb_display_channels : s->h;
+        int hh = s->mode == SEPARATE ? -(s->h % s->nb_display_channels) + 1 : 1;
+        for (ch = 0; ch < (s->mode == SEPARATE ? s->nb_display_channels : 1); ch++) {
+            for (y = 0; y < h; y += 20) {
+                dst = s->outpicref->data[0] + (s->start_y + h * (ch + 1) - y - hh) * s->outpicref->linesize[0];
+                dst[s->start_x - 2] = 200;
+                dst[s->start_x + s->w + 1] = 200;
+            }
+            for (y = 0; y < h; y += 40) {
+                dst = s->outpicref->data[0] + (s->start_y + h * (ch + 1) - y - hh) * s->outpicref->linesize[0];
+                dst[s->start_x - 3] = 200;
+                dst[s->start_x + s->w + 2] = 200;
+            }
+            dst = s->outpicref->data[0] + (s->start_y - 2) * s->outpicref->linesize[0] + s->start_x;
+            for (x = 0; x < s->w; x+=40)
+                dst[x] = 200;
+            dst = s->outpicref->data[0] + (s->start_y - 3) * s->outpicref->linesize[0] + s->start_x;
+            for (x = 0; x < s->w; x+=80)
+                dst[x] = 200;
+            dst = s->outpicref->data[0] + (s->h + s->start_y + 1) * s->outpicref->linesize[0] + s->start_x;
+            for (x = 0; x < s->w; x+=40) {
+                dst[x] = 200;
+            }
+            dst = s->outpicref->data[0] + (s->h + s->start_y + 2) * s->outpicref->linesize[0] + s->start_x;
+            for (x = 0; x < s->w; x+=80) {
+                dst[x] = 200;
+            }
+            for (y = 0; y < h; y += 40) {
+                float range = s->stop ? s->stop - s->start : inlink->sample_rate / 2;
+                float hertz = s->start + y * range / (float)(1 << (int)ceil(log2(h)));
+                char *units;
+
+                if (hertz == 0)
+                    units = av_asprintf("DC");
+                else
+                    units = av_asprintf("%.2f", hertz);
+                if (!units)
+                    return AVERROR(ENOMEM);
+
+                drawtext(s->outpicref, s->start_x - 8 * strlen(units) - 4, h * (ch + 1) + s->start_y - y - 4 - hh, units, 0);
+                av_free(units);
+            }
+        }
+
+        for (x = 0; x < s->w && s->single_pic; x+=80) {
+            float seconds = x * spp / inlink->sample_rate;
+            char *units = get_time(ctx, seconds, x);
+
+            drawtext(s->outpicref, s->start_x + x - 4 * strlen(units), s->h + s->start_y + 6, units, 0);
+            drawtext(s->outpicref, s->start_x + x - 4 * strlen(units), s->start_y - 12, units, 0);
+            av_free(units);
+        }
+
+        drawtext(s->outpicref, outlink->w / 2 - 4 * 4, outlink->h - s->start_y / 2, "TIME", 0);
+        drawtext(s->outpicref, s->start_x / 7, outlink->h / 2 - 14 * 4, "FREQUENCY (Hz)", 1);
+    } else {
+        int w = s->mode == SEPARATE ? s->w / s->nb_display_channels : s->w;
+        for (y = 0; y < s->h; y += 20) {
+            dst = s->outpicref->data[0] + (s->start_y + y) * s->outpicref->linesize[0];
+            dst[s->start_x - 2] = 200;
+            dst[s->start_x + s->w + 1] = 200;
+        }
+        for (y = 0; y < s->h; y += 40) {
+            dst = s->outpicref->data[0] + (s->start_y + y) * s->outpicref->linesize[0];
+            dst[s->start_x - 3] = 200;
+            dst[s->start_x + s->w + 2] = 200;
+        }
+        for (ch = 0; ch < (s->mode == SEPARATE ? s->nb_display_channels : 1); ch++) {
+            dst = s->outpicref->data[0] + (s->start_y - 2) * s->outpicref->linesize[0] + s->start_x + w * ch;
+            for (x = 0; x < w; x+=40)
+                dst[x] = 200;
+            dst = s->outpicref->data[0] + (s->start_y - 3) * s->outpicref->linesize[0] + s->start_x + w * ch;
+            for (x = 0; x < w; x+=80)
+                dst[x] = 200;
+            dst = s->outpicref->data[0] + (s->h + s->start_y + 1) * s->outpicref->linesize[0] + s->start_x + w * ch;
+            for (x = 0; x < w; x+=40) {
+                dst[x] = 200;
+            }
+            dst = s->outpicref->data[0] + (s->h + s->start_y + 2) * s->outpicref->linesize[0] + s->start_x + w * ch;
+            for (x = 0; x < w; x+=80) {
+                dst[x] = 200;
+            }
+            for (x = 0; x < w - 79; x += 80) {
+                float range = s->stop ? s->stop - s->start : inlink->sample_rate / 2;
+                float hertz = s->start + x * range / (float)(1 << (int)ceil(log2(w)));
+                char *units;
+
+                if (hertz == 0)
+                    units = av_asprintf("DC");
+                else
+                    units = av_asprintf("%.2f", hertz);
+                if (!units)
+                    return AVERROR(ENOMEM);
+
+                drawtext(s->outpicref, s->start_x - 4 * strlen(units) + x + w * ch, s->start_y - 12, units, 0);
+                drawtext(s->outpicref, s->start_x - 4 * strlen(units) + x + w * ch, s->h + s->start_y + 6, units, 0);
+                av_free(units);
+            }
+        }
+        for (y = 0; y < s->h && s->single_pic; y+=40) {
+            float seconds = y * spp / inlink->sample_rate;
+            char *units = get_time(ctx, seconds, x);
+
+            drawtext(s->outpicref, s->start_x - 8 * strlen(units) - 4, s->start_y + y - 4, units, 0);
+            av_free(units);
+        }
+        drawtext(s->outpicref, s->start_x / 7, outlink->h / 2 - 4 * 4, "TIME", 1);
+        drawtext(s->outpicref, outlink->w / 2 - 14 * 4, outlink->h - s->start_y / 2, "FREQUENCY (Hz)", 0);
+    }
+
+    for (ch = 0; ch < (multi ? s->nb_display_channels : 1); ch++) {
+        int h = multi ? s->h / s->nb_display_channels : s->h;
+
+        for (y = 0; y < h; y++) {
+            float out[3] = { 0., 127.5, 127.5};
+            int chn;
+
+            for (chn = 0; chn < (s->mode == SEPARATE ? 1 : s->nb_display_channels); chn++) {
+                float yf, uf, vf;
+                int channel = (multi) ? s->nb_display_channels - ch - 1 : chn;
+                float lout[3];
+
+                color_range(s, channel, &yf, &uf, &vf);
+                pick_color(s, yf, uf, vf, y / (float)h, lout);
+                out[0] += lout[0];
+                out[1] += lout[1];
+                out[2] += lout[2];
+            }
+            memset(s->outpicref->data[0]+(s->start_y + h * (ch + 1) - y - 1) * s->outpicref->linesize[0] + s->w + s->start_x + 20, av_clip_uint8(out[0]), 10);
+            memset(s->outpicref->data[1]+(s->start_y + h * (ch + 1) - y - 1) * s->outpicref->linesize[1] + s->w + s->start_x + 20, av_clip_uint8(out[1]), 10);
+            memset(s->outpicref->data[2]+(s->start_y + h * (ch + 1) - y - 1) * s->outpicref->linesize[2] + s->w + s->start_x + 20, av_clip_uint8(out[2]), 10);
+        }
+
+        for (y = 0; ch == 0 && y < h; y += h / 10) {
+            float value = 120.0 * log10(1. - y / (float)h);
+            char *text;
+
+            if (value < -120)
+                break;
+            text = av_asprintf("%.0f dB", value);
+            if (!text)
+                continue;
+            drawtext(s->outpicref, s->w + s->start_x + 35, s->start_y + y - 5, text, 0);
+            av_free(text);
+        }
+    }
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = ctx->inputs[0];
+    ShowSpectrumContext *s = ctx->priv;
+    int i, fft_bits, h, w;
+    float overlap;
+
+    s->stop = FFMIN(s->stop, inlink->sample_rate / 2);
+    if (s->stop && s->stop <= s->start) {
+        av_log(ctx, AV_LOG_ERROR, "Stop frequency should be greater than start.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (!strcmp(ctx->filter->name, "showspectrumpic"))
+        s->single_pic = 1;
+
+    outlink->w = s->w;
+    outlink->h = s->h;
+    outlink->sample_aspect_ratio = (AVRational){1,1};
+
+    if (s->legend) {
+        s->start_x = (log10(inlink->sample_rate) + 1) * 25;
+        s->start_y = 64;
+        outlink->w += s->start_x * 2;
+        outlink->h += s->start_y * 2;
+    }
+
+    h = (s->mode == COMBINED || s->orientation == HORIZONTAL) ? s->h : s->h / inlink->channels;
+    w = (s->mode == COMBINED || s->orientation == VERTICAL)   ? s->w : s->w / inlink->channels;
+    s->channel_height = h;
+    s->channel_width  = w;
+
+    if (s->orientation == VERTICAL) {
+        /* FFT window size (precision) according to the requested output frame height */
+        for (fft_bits = 1; 1 << fft_bits < 2 * h; fft_bits++);
+    } else {
+        /* FFT window size (precision) according to the requested output frame width */
+        for (fft_bits = 1; 1 << fft_bits < 2 * w; fft_bits++);
+    }
+
+    s->win_size = 1 << fft_bits;
+    s->buf_size = s->win_size << !!s->stop;
+
+    if (!s->fft) {
+        s->fft = av_calloc(inlink->channels, sizeof(*s->fft));
+        if (!s->fft)
+            return AVERROR(ENOMEM);
+    }
+
+    if (s->stop) {
+        if (!s->ifft) {
+            s->ifft = av_calloc(inlink->channels, sizeof(*s->ifft));
+            if (!s->ifft)
+                return AVERROR(ENOMEM);
+        }
+    }
+
+    /* (re-)configuration if the video output changed (or first init) */
+    if (fft_bits != s->fft_bits) {
+        AVFrame *outpicref;
+
+        s->fft_bits = fft_bits;
+
+        /* FFT buffers: x2 for each (display) channel buffer.
+         * Note: we use free and malloc instead of a realloc-like function to
+         * make sure the buffer is aligned in memory for the FFT functions. */
+        for (i = 0; i < s->nb_display_channels; i++) {
+            if (s->stop) {
+                av_fft_end(s->ifft[i]);
+                av_freep(&s->fft_scratch[i]);
+            }
+            av_fft_end(s->fft[i]);
+            av_freep(&s->fft_data[i]);
+        }
+        av_freep(&s->fft_data);
+
+        s->nb_display_channels = inlink->channels;
+        for (i = 0; i < s->nb_display_channels; i++) {
+            s->fft[i] = av_fft_init(fft_bits + !!s->stop, 0);
+            if (s->stop) {
+                s->ifft[i] = av_fft_init(fft_bits + !!s->stop, 1);
+                if (!s->ifft[i]) {
+                    av_log(ctx, AV_LOG_ERROR, "Unable to create Inverse FFT context. "
+                           "The window size might be too high.\n");
+                    return AVERROR(EINVAL);
+                }
+            }
+            if (!s->fft[i]) {
+                av_log(ctx, AV_LOG_ERROR, "Unable to create FFT context. "
+                       "The window size might be too high.\n");
+                return AVERROR(EINVAL);
+            }
+        }
+
+        s->magnitudes = av_calloc(s->nb_display_channels, sizeof(*s->magnitudes));
+        if (!s->magnitudes)
+            return AVERROR(ENOMEM);
+        for (i = 0; i < s->nb_display_channels; i++) {
+            s->magnitudes[i] = av_calloc(s->orientation == VERTICAL ? s->h : s->w, sizeof(**s->magnitudes));
+            if (!s->magnitudes[i])
+                return AVERROR(ENOMEM);
+        }
+
+        s->phases = av_calloc(s->nb_display_channels, sizeof(*s->phases));
+        if (!s->phases)
+            return AVERROR(ENOMEM);
+        for (i = 0; i < s->nb_display_channels; i++) {
+            s->phases[i] = av_calloc(s->orientation == VERTICAL ? s->h : s->w, sizeof(**s->phases));
+            if (!s->phases[i])
+                return AVERROR(ENOMEM);
+        }
+
+        av_freep(&s->color_buffer);
+        s->color_buffer = av_calloc(s->nb_display_channels, sizeof(*s->color_buffer));
+        if (!s->color_buffer)
+            return AVERROR(ENOMEM);
+        for (i = 0; i < s->nb_display_channels; i++) {
+            s->color_buffer[i] = av_calloc(s->orientation == VERTICAL ? s->h * 3 : s->w * 3, sizeof(**s->color_buffer));
+            if (!s->color_buffer[i])
+                return AVERROR(ENOMEM);
+        }
+
+        s->fft_data = av_calloc(s->nb_display_channels, sizeof(*s->fft_data));
+        if (!s->fft_data)
+            return AVERROR(ENOMEM);
+        s->fft_scratch = av_calloc(s->nb_display_channels, sizeof(*s->fft_scratch));
+        if (!s->fft_scratch)
+            return AVERROR(ENOMEM);
+        for (i = 0; i < s->nb_display_channels; i++) {
+            s->fft_data[i] = av_calloc(s->buf_size, sizeof(**s->fft_data));
+            if (!s->fft_data[i])
+                return AVERROR(ENOMEM);
+
+            s->fft_scratch[i] = av_calloc(s->buf_size, sizeof(**s->fft_scratch));
+            if (!s->fft_scratch[i])
+                return AVERROR(ENOMEM);
+        }
+
+        /* pre-calc windowing function */
+        s->window_func_lut =
+            av_realloc_f(s->window_func_lut, s->win_size,
+                         sizeof(*s->window_func_lut));
+        if (!s->window_func_lut)
+            return AVERROR(ENOMEM);
+        generate_window_func(s->window_func_lut, s->win_size, s->win_func, &overlap);
+        if (s->overlap == 1)
+            s->overlap = overlap;
+        s->hop_size = (1. - s->overlap) * s->win_size;
+        if (s->hop_size < 1) {
+            av_log(ctx, AV_LOG_ERROR, "overlap %f too big\n", s->overlap);
+            return AVERROR(EINVAL);
+        }
+
+        for (s->win_scale = 0, i = 0; i < s->win_size; i++) {
+            s->win_scale += s->window_func_lut[i] * s->window_func_lut[i];
+        }
+        s->win_scale = 1. / sqrt(s->win_scale);
+
+        /* prepare the initial picref buffer (black frame) */
+        av_frame_free(&s->outpicref);
+        s->outpicref = outpicref =
+            ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!outpicref)
+            return AVERROR(ENOMEM);
+        outpicref->sample_aspect_ratio = (AVRational){1,1};
+        for (i = 0; i < outlink->h; i++) {
+            memset(outpicref->data[0] + i * outpicref->linesize[0],   0, outlink->w);
+            memset(outpicref->data[1] + i * outpicref->linesize[1], 128, outlink->w);
+            memset(outpicref->data[2] + i * outpicref->linesize[2], 128, outlink->w);
+        }
+        outpicref->color_range = AVCOL_RANGE_JPEG;
+
+        if (!s->single_pic && s->legend)
+            draw_legend(ctx, 0);
+    }
+
+    if ((s->orientation == VERTICAL   && s->xpos >= s->w) ||
+        (s->orientation == HORIZONTAL && s->xpos >= s->h))
+        s->xpos = 0;
+
+    s->auto_frame_rate = av_make_q(inlink->sample_rate, s->hop_size);
+    if (s->orientation == VERTICAL && s->sliding == FULLFRAME)
+        s->auto_frame_rate.den *= s->w;
+    if (s->orientation == HORIZONTAL && s->sliding == FULLFRAME)
+        s->auto_frame_rate.den *= s->h;
+    if (!s->single_pic && strcmp(s->rate_str, "auto")) {
+        int ret = av_parse_video_rate(&s->frame_rate, s->rate_str);
+        if (ret < 0)
+            return ret;
+    } else {
+        s->frame_rate = s->auto_frame_rate;
+    }
+    outlink->frame_rate = s->frame_rate;
+    outlink->time_base = av_inv_q(outlink->frame_rate);
+
+    if (s->orientation == VERTICAL) {
+        s->combine_buffer =
+            av_realloc_f(s->combine_buffer, s->h * 3,
+                         sizeof(*s->combine_buffer));
+    } else {
+        s->combine_buffer =
+            av_realloc_f(s->combine_buffer, s->w * 3,
+                         sizeof(*s->combine_buffer));
+    }
+
+    av_log(ctx, AV_LOG_VERBOSE, "s:%dx%d FFT window size:%d\n",
+           s->w, s->h, s->win_size);
+
+    av_audio_fifo_free(s->fifo);
+    s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->win_size);
+    if (!s->fifo)
+        return AVERROR(ENOMEM);
+    return 0;
+}
+
+#define RE(y, ch) s->fft_data[ch][y].re
+#define IM(y, ch) s->fft_data[ch][y].im
+#define MAGNITUDE(y, ch) hypot(RE(y, ch), IM(y, ch))
+#define PHASE(y, ch) atan2(IM(y, ch), RE(y, ch))
+
+static int calc_channel_magnitudes(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ShowSpectrumContext *s = ctx->priv;
+    const double w = s->win_scale * (s->scale == LOG ? s->win_scale : 1);
+    int y, h = s->orientation == VERTICAL ? s->h : s->w;
+    const float f = s->gain * w;
+    const int ch = jobnr;
+    float *magnitudes = s->magnitudes[ch];
+
+    for (y = 0; y < h; y++)
+        magnitudes[y] = MAGNITUDE(y, ch) * f;
+
+    return 0;
+}
+
+static int calc_channel_phases(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ShowSpectrumContext *s = ctx->priv;
+    const int h = s->orientation == VERTICAL ? s->h : s->w;
+    const int ch = jobnr;
+    float *phases = s->phases[ch];
+    int y;
+
+    for (y = 0; y < h; y++)
+        phases[y] = (PHASE(y, ch) / M_PI + 1) / 2;
+
+    return 0;
+}
+
+static void acalc_magnitudes(ShowSpectrumContext *s)
+{
+    const double w = s->win_scale * (s->scale == LOG ? s->win_scale : 1);
+    int ch, y, h = s->orientation == VERTICAL ? s->h : s->w;
+    const float f = s->gain * w;
+
+    for (ch = 0; ch < s->nb_display_channels; ch++) {
+        float *magnitudes = s->magnitudes[ch];
+
+        for (y = 0; y < h; y++)
+            magnitudes[y] += MAGNITUDE(y, ch) * f;
+    }
+}
+
+static void scale_magnitudes(ShowSpectrumContext *s, float scale)
+{
+    int ch, y, h = s->orientation == VERTICAL ? s->h : s->w;
+
+    for (ch = 0; ch < s->nb_display_channels; ch++) {
+        float *magnitudes = s->magnitudes[ch];
+
+        for (y = 0; y < h; y++)
+            magnitudes[y] *= scale;
+    }
+}
+
 static void clear_combine_buffer(ShowSpectrumContext *s, int size)
 {
     int y;
@@ -744,8 +1147,8 @@
         if (s->sliding == SCROLL) {
             for (plane = 0; plane < 3; plane++) {
                 for (y = 0; y < s->h; y++) {
-                    uint8_t *p = outpicref->data[plane] +
-                                 y * outpicref->linesize[plane];
+                    uint8_t *p = outpicref->data[plane] + s->start_x +
+                                 (y + s->start_y) * outpicref->linesize[plane];
                     memmove(p, p + 1, s->w - 1);
                 }
             }
@@ -753,8 +1156,8 @@
         } else if (s->sliding == RSCROLL) {
             for (plane = 0; plane < 3; plane++) {
                 for (y = 0; y < s->h; y++) {
-                    uint8_t *p = outpicref->data[plane] +
-                                 y * outpicref->linesize[plane];
+                    uint8_t *p = outpicref->data[plane] + s->start_x +
+                                 (y + s->start_y) * outpicref->linesize[plane];
                     memmove(p + 1, p, s->w - 1);
                 }
             }
@@ -773,8 +1176,8 @@
         if (s->sliding == SCROLL) {
             for (plane = 0; plane < 3; plane++) {
                 for (y = 1; y < s->h; y++) {
-                    memmove(outpicref->data[plane] + (y-1) * outpicref->linesize[plane],
-                            outpicref->data[plane] + (y  ) * outpicref->linesize[plane],
+                    memmove(outpicref->data[plane] + (y-1 + s->start_y) * outpicref->linesize[plane] + s->start_x,
+                            outpicref->data[plane] + (y   + s->start_y) * outpicref->linesize[plane] + s->start_x,
                             s->w);
                 }
             }
@@ -782,8 +1185,8 @@
         } else if (s->sliding == RSCROLL) {
             for (plane = 0; plane < 3; plane++) {
                 for (y = s->h - 1; y >= 1; y--) {
-                    memmove(outpicref->data[plane] + (y  ) * outpicref->linesize[plane],
-                            outpicref->data[plane] + (y-1) * outpicref->linesize[plane],
+                    memmove(outpicref->data[plane] + (y   + s->start_y) * outpicref->linesize[plane] + s->start_x,
+                            outpicref->data[plane] + (y-1 + s->start_y) * outpicref->linesize[plane] + s->start_x,
                             s->w);
                 }
             }
@@ -800,7 +1203,7 @@
     }
 
     if (s->sliding != FULLFRAME || s->xpos == 0)
-        outpicref->pts = insamples->pts;
+        outpicref->pts = av_rescale_q(insamples->pts, inlink->time_base, outlink->time_base);
 
     s->xpos++;
     if (s->orientation == VERTICAL && s->xpos >= s->w)
@@ -808,71 +1211,83 @@
     if (s->orientation == HORIZONTAL && s->xpos >= s->h)
         s->xpos = 0;
     if (!s->single_pic && (s->sliding != FULLFRAME || s->xpos == 0)) {
-        ret = ff_filter_frame(outlink, av_frame_clone(s->outpicref));
-        if (ret < 0)
-            return ret;
+        if (s->old_pts < outpicref->pts) {
+            if (s->legend) {
+                char *units = get_time(ctx, insamples->pts /(float)inlink->sample_rate, x);
+
+                if (s->orientation == VERTICAL) {
+                    for (y = 0; y < 10; y++) {
+                        memset(s->outpicref->data[0] + outlink->w / 2 - 4 * s->old_len +
+                               (outlink->h - s->start_y / 2 - 20 + y) * s->outpicref->linesize[0], 0, 10 * s->old_len);
+                    }
+                    drawtext(s->outpicref,
+                             outlink->w / 2 - 4 * strlen(units),
+                             outlink->h - s->start_y / 2 - 20,
+                             units, 0);
+                } else  {
+                    for (y = 0; y < 10 * s->old_len; y++) {
+                        memset(s->outpicref->data[0] + s->start_x / 7 + 20 +
+                               (outlink->h / 2 - 4 * s->old_len + y) * s->outpicref->linesize[0], 0, 10);
+                    }
+                    drawtext(s->outpicref,
+                             s->start_x / 7 + 20,
+                             outlink->h / 2 - 4 * strlen(units),
+                             units, 1);
+                }
+                s->old_len = strlen(units);
+                av_free(units);
+            }
+            s->old_pts = outpicref->pts;
+            ret = ff_filter_frame(outlink, av_frame_clone(s->outpicref));
+            if (ret < 0)
+                return ret;
+            return 0;
+        }
     }
 
-    return s->win_size;
+    return 1;
 }
 
 #if CONFIG_SHOWSPECTRUM_FILTER
 
-static int request_frame(AVFilterLink *outlink)
+static int activate(AVFilterContext *ctx)
 {
-    ShowSpectrumContext *s = outlink->src->priv;
-    AVFilterLink *inlink = outlink->src->inputs[0];
-    unsigned i;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    ShowSpectrumContext *s = ctx->priv;
     int ret;
 
-    ret = ff_request_frame(inlink);
-    if (ret == AVERROR_EOF && s->sliding == FULLFRAME && s->xpos > 0 &&
-        s->outpicref) {
-        if (s->orientation == VERTICAL) {
-            for (i = 0; i < outlink->h; i++) {
-                memset(s->outpicref->data[0] + i * s->outpicref->linesize[0] + s->xpos,   0, outlink->w - s->xpos);
-                memset(s->outpicref->data[1] + i * s->outpicref->linesize[1] + s->xpos, 128, outlink->w - s->xpos);
-                memset(s->outpicref->data[2] + i * s->outpicref->linesize[2] + s->xpos, 128, outlink->w - s->xpos);
-            }
-        } else {
-            for (i = s->xpos; i < outlink->h; i++) {
-                memset(s->outpicref->data[0] + i * s->outpicref->linesize[0],   0, outlink->w);
-                memset(s->outpicref->data[1] + i * s->outpicref->linesize[1], 128, outlink->w);
-                memset(s->outpicref->data[2] + i * s->outpicref->linesize[2], 128, outlink->w);
-            }
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    if (av_audio_fifo_size(s->fifo) < s->win_size) {
+        AVFrame *frame = NULL;
+
+        ret = ff_inlink_consume_frame(inlink, &frame);
+        if (ret < 0)
+            return ret;
+        if (ret > 0) {
+            s->pts = frame->pts;
+            s->consumed = 0;
+
+            av_audio_fifo_write(s->fifo, (void **)frame->extended_data, frame->nb_samples);
+            av_frame_free(&frame);
         }
-        ret = ff_filter_frame(outlink, s->outpicref);
-        s->outpicref = NULL;
     }
 
-    return ret;
-}
+    if (s->outpicref && av_audio_fifo_size(s->fifo) >= s->win_size) {
+        AVFrame *fin = ff_get_audio_buffer(inlink, s->win_size);
+        if (!fin)
+            return AVERROR(ENOMEM);
 
-static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
-{
-    AVFilterContext *ctx = inlink->dst;
-    ShowSpectrumContext *s = ctx->priv;
-    AVFrame *fin = NULL;
-    int ret = 0, consumed = 0;
-
-    if (s->pts == AV_NOPTS_VALUE)
-        s->pts = insamples->pts - av_audio_fifo_size(s->fifo);
-
-    av_audio_fifo_write(s->fifo, (void **)insamples->extended_data, insamples->nb_samples);
-    av_frame_free(&insamples);
-    while (av_audio_fifo_size(s->fifo) >= s->win_size) {
-        fin = ff_get_audio_buffer(inlink, s->win_size);
-        if (!fin) {
-            ret = AVERROR(ENOMEM);
-            goto fail;
+        fin->pts = s->pts + s->consumed;
+        s->consumed += s->hop_size;
+        ret = av_audio_fifo_peek(s->fifo, (void **)fin->extended_data,
+                                 FFMIN(s->win_size, av_audio_fifo_size(s->fifo)));
+        if (ret < 0) {
+            av_frame_free(&fin);
+            return ret;
         }
 
-        fin->pts = s->pts + consumed;
-        consumed += s->hop_size;
-        ret = av_audio_fifo_peek(s->fifo, (void **)fin->extended_data, s->win_size);
-        if (ret < 0)
-            goto fail;
-
         av_assert0(fin->nb_samples == s->win_size);
 
         ctx->internal->execute(ctx, run_channel_fft, fin, NULL, s->nb_display_channels);
@@ -884,23 +1299,56 @@
             ctx->internal->execute(ctx, calc_channel_phases, NULL, NULL, s->nb_display_channels);
 
         ret = plot_spectrum_column(inlink, fin);
+
         av_frame_free(&fin);
         av_audio_fifo_drain(s->fifo, s->hop_size);
-        if (ret < 0)
-            goto fail;
+        if (ret <= 0)
+            return ret;
     }
 
-fail:
-    s->pts = AV_NOPTS_VALUE;
-    av_frame_free(&fin);
-    return ret;
+    if (ff_outlink_get_status(inlink) == AVERROR_EOF &&
+        s->sliding == FULLFRAME &&
+        s->xpos > 0 && s->outpicref) {
+        int64_t pts;
+
+        if (s->orientation == VERTICAL) {
+            for (int i = 0; i < outlink->h; i++) {
+                memset(s->outpicref->data[0] + i * s->outpicref->linesize[0] + s->xpos,   0, outlink->w - s->xpos);
+                memset(s->outpicref->data[1] + i * s->outpicref->linesize[1] + s->xpos, 128, outlink->w - s->xpos);
+                memset(s->outpicref->data[2] + i * s->outpicref->linesize[2] + s->xpos, 128, outlink->w - s->xpos);
+            }
+        } else {
+            for (int i = s->xpos; i < outlink->h; i++) {
+                memset(s->outpicref->data[0] + i * s->outpicref->linesize[0],   0, outlink->w);
+                memset(s->outpicref->data[1] + i * s->outpicref->linesize[1], 128, outlink->w);
+                memset(s->outpicref->data[2] + i * s->outpicref->linesize[2], 128, outlink->w);
+            }
+        }
+        s->outpicref->pts += s->consumed;
+        pts = s->outpicref->pts;
+        ret = ff_filter_frame(outlink, s->outpicref);
+        s->outpicref = NULL;
+        ff_outlink_set_status(outlink, AVERROR_EOF, pts);
+        return 0;
+    }
+
+    FF_FILTER_FORWARD_STATUS(inlink, outlink);
+    if (ff_outlink_frame_wanted(outlink) && av_audio_fifo_size(s->fifo) < s->win_size) {
+        ff_inlink_request_frame(inlink);
+        return 0;
+    }
+
+    if (av_audio_fifo_size(s->fifo) >= s->win_size) {
+        ff_filter_set_ready(ctx, 10);
+        return 0;
+    }
+    return FFERROR_NOT_READY;
 }
 
 static const AVFilterPad showspectrum_inputs[] = {
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_AUDIO,
-        .filter_frame = filter_frame,
     },
     { NULL }
 };
@@ -910,7 +1358,6 @@
         .name          = "default",
         .type          = AVMEDIA_TYPE_VIDEO,
         .config_props  = config_output,
-        .request_frame = request_frame,
     },
     { NULL }
 };
@@ -923,6 +1370,7 @@
     .priv_size     = sizeof(ShowSpectrumContext),
     .inputs        = showspectrum_inputs,
     .outputs       = showspectrum_outputs,
+    .activate      = activate,
     .priv_class    = &showspectrum_class,
     .flags         = AVFILTER_FLAG_SLICE_THREADS,
 };
@@ -946,6 +1394,8 @@
         { "fiery",     "fiery based coloring",            0, AV_OPT_TYPE_CONST, {.i64=FIERY},     0, 0, FLAGS, "color" },
         { "fruit",     "fruit based coloring",            0, AV_OPT_TYPE_CONST, {.i64=FRUIT},     0, 0, FLAGS, "color" },
         { "cool",      "cool based coloring",             0, AV_OPT_TYPE_CONST, {.i64=COOL},      0, 0, FLAGS, "color" },
+        { "magma",     "magma based coloring",            0, AV_OPT_TYPE_CONST, {.i64=MAGMA},     0, 0, FLAGS, "color" },
+        { "green",     "green based coloring",            0, AV_OPT_TYPE_CONST, {.i64=GREEN},     0, 0, FLAGS, "color" },
     { "scale", "set display scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64=LOG}, 0, NB_SCALES-1, FLAGS, "scale" },
         { "lin",  "linear",      0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, FLAGS, "scale" },
         { "sqrt", "square root", 0, AV_OPT_TYPE_CONST, {.i64=SQRT},   0, 0, FLAGS, "scale" },
@@ -981,45 +1431,13 @@
     { "gain", "set scale gain", OFFSET(gain), AV_OPT_TYPE_FLOAT, {.dbl = 1}, 0, 128, FLAGS },
     { "legend", "draw legend", OFFSET(legend), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
     { "rotation", "color rotation", OFFSET(rotation), AV_OPT_TYPE_FLOAT, {.dbl = 0}, -1, 1, FLAGS },
+    { "start", "start frequency", OFFSET(start), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT32_MAX, FLAGS },
+    { "stop",  "stop frequency",  OFFSET(stop),  AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT32_MAX, FLAGS },
     { NULL }
 };
 
 AVFILTER_DEFINE_CLASS(showspectrumpic);
 
-static void drawtext(AVFrame *pic, int x, int y, const char *txt, int o)
-{
-    const uint8_t *font;
-    int font_height;
-    int i;
-
-    font = avpriv_cga_font,   font_height =  8;
-
-    for (i = 0; txt[i]; i++) {
-        int char_y, mask;
-
-        if (o) {
-            for (char_y = font_height - 1; char_y >= 0; char_y--) {
-                uint8_t *p = pic->data[0] + (y + i * 10) * pic->linesize[0] + x;
-                for (mask = 0x80; mask; mask >>= 1) {
-                    if (font[txt[i] * font_height + font_height - 1 - char_y] & mask)
-                        p[char_y] = ~p[char_y];
-                    p += pic->linesize[0];
-                }
-            }
-        } else {
-            uint8_t *p = pic->data[0] + y*pic->linesize[0] + (x + i*8);
-            for (char_y = 0; char_y < font_height; char_y++) {
-                for (mask = 0x80; mask; mask >>= 1) {
-                    if (font[txt[i] * font_height + char_y] & mask)
-                        *p = ~(*p);
-                    p++;
-                }
-                p += pic->linesize[0] - 8;
-            }
-        }
-    }
-}
-
 static int showspectrumpic_request_frame(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
@@ -1031,7 +1449,7 @@
     samples = av_audio_fifo_size(s->fifo);
     if (ret == AVERROR_EOF && s->outpicref && samples > 0) {
         int consumed = 0;
-        int y, x = 0, sz = s->orientation == VERTICAL ? s->w : s->h;
+        int x = 0, sz = s->orientation == VERTICAL ? s->w : s->h;
         int ch, spf, spb;
         AVFrame *fin;
 
@@ -1079,191 +1497,8 @@
         av_frame_free(&fin);
         s->outpicref->pts = 0;
 
-        if (s->legend) {
-            int multi = (s->mode == SEPARATE && s->color_mode == CHANNEL);
-            float spp = samples / (float)sz;
-            uint8_t *dst;
-
-            drawtext(s->outpicref, 2, outlink->h - 10, "CREATED BY LIBAVFILTER", 0);
-
-            dst = s->outpicref->data[0] + (s->start_y - 1) * s->outpicref->linesize[0] + s->start_x - 1;
-            for (x = 0; x < s->w + 1; x++)
-                dst[x] = 200;
-            dst = s->outpicref->data[0] + (s->start_y + s->h) * s->outpicref->linesize[0] + s->start_x - 1;
-            for (x = 0; x < s->w + 1; x++)
-                dst[x] = 200;
-            for (y = 0; y < s->h + 2; y++) {
-                dst = s->outpicref->data[0] + (y + s->start_y - 1) * s->outpicref->linesize[0];
-                dst[s->start_x - 1] = 200;
-                dst[s->start_x + s->w] = 200;
-            }
-            if (s->orientation == VERTICAL) {
-                int h = s->mode == SEPARATE ? s->h / s->nb_display_channels : s->h;
-                for (ch = 0; ch < (s->mode == SEPARATE ? s->nb_display_channels : 1); ch++) {
-                    for (y = 0; y < h; y += 20) {
-                        dst = s->outpicref->data[0] + (s->start_y + h * (ch + 1) - y - 1) * s->outpicref->linesize[0];
-                        dst[s->start_x - 2] = 200;
-                        dst[s->start_x + s->w + 1] = 200;
-                    }
-                    for (y = 0; y < h; y += 40) {
-                        dst = s->outpicref->data[0] + (s->start_y + h * (ch + 1) - y - 1) * s->outpicref->linesize[0];
-                        dst[s->start_x - 3] = 200;
-                        dst[s->start_x + s->w + 2] = 200;
-                    }
-                    dst = s->outpicref->data[0] + (s->start_y - 2) * s->outpicref->linesize[0] + s->start_x;
-                    for (x = 0; x < s->w; x+=40)
-                        dst[x] = 200;
-                    dst = s->outpicref->data[0] + (s->start_y - 3) * s->outpicref->linesize[0] + s->start_x;
-                    for (x = 0; x < s->w; x+=80)
-                        dst[x] = 200;
-                    dst = s->outpicref->data[0] + (s->h + s->start_y + 1) * s->outpicref->linesize[0] + s->start_x;
-                    for (x = 0; x < s->w; x+=40) {
-                        dst[x] = 200;
-                    }
-                    dst = s->outpicref->data[0] + (s->h + s->start_y + 2) * s->outpicref->linesize[0] + s->start_x;
-                    for (x = 0; x < s->w; x+=80) {
-                        dst[x] = 200;
-                    }
-                    for (y = 0; y < h; y += 40) {
-                        float hertz = y * (inlink->sample_rate / 2) / (float)(1 << (int)ceil(log2(h)));
-                        char *units;
-
-                        if (hertz == 0)
-                            units = av_asprintf("DC");
-                        else
-                            units = av_asprintf("%.2f", hertz);
-                        if (!units)
-                            return AVERROR(ENOMEM);
-
-                        drawtext(s->outpicref, s->start_x - 8 * strlen(units) - 4, h * (ch + 1) + s->start_y - y - 4, units, 0);
-                        av_free(units);
-                    }
-                }
-
-                for (x = 0; x < s->w; x+=80) {
-                    float seconds = x * spp / inlink->sample_rate;
-                    char *units;
-
-                    if (x == 0)
-                        units = av_asprintf("0");
-                    else if (log10(seconds) > 6)
-                        units = av_asprintf("%.2fh", seconds / (60 * 60));
-                    else if (log10(seconds) > 3)
-                        units = av_asprintf("%.2fm", seconds / 60);
-                    else
-                        units = av_asprintf("%.2fs", seconds);
-                    if (!units)
-                        return AVERROR(ENOMEM);
-
-                    drawtext(s->outpicref, s->start_x + x - 4 * strlen(units), s->h + s->start_y + 6, units, 0);
-                    drawtext(s->outpicref, s->start_x + x - 4 * strlen(units), s->start_y - 12, units, 0);
-                    av_free(units);
-                }
-
-                drawtext(s->outpicref, outlink->w / 2 - 4 * 4, outlink->h - s->start_y / 2, "TIME", 0);
-                drawtext(s->outpicref, s->start_x / 7, outlink->h / 2 - 14 * 4, "FREQUENCY (Hz)", 1);
-            } else {
-                int w = s->mode == SEPARATE ? s->w / s->nb_display_channels : s->w;
-                for (y = 0; y < s->h; y += 20) {
-                    dst = s->outpicref->data[0] + (s->start_y + y) * s->outpicref->linesize[0];
-                    dst[s->start_x - 2] = 200;
-                    dst[s->start_x + s->w + 1] = 200;
-                }
-                for (y = 0; y < s->h; y += 40) {
-                    dst = s->outpicref->data[0] + (s->start_y + y) * s->outpicref->linesize[0];
-                    dst[s->start_x - 3] = 200;
-                    dst[s->start_x + s->w + 2] = 200;
-                }
-                for (ch = 0; ch < (s->mode == SEPARATE ? s->nb_display_channels : 1); ch++) {
-                    dst = s->outpicref->data[0] + (s->start_y - 2) * s->outpicref->linesize[0] + s->start_x + w * ch;
-                    for (x = 0; x < w; x+=40)
-                        dst[x] = 200;
-                    dst = s->outpicref->data[0] + (s->start_y - 3) * s->outpicref->linesize[0] + s->start_x + w * ch;
-                    for (x = 0; x < w; x+=80)
-                        dst[x] = 200;
-                    dst = s->outpicref->data[0] + (s->h + s->start_y + 1) * s->outpicref->linesize[0] + s->start_x + w * ch;
-                    for (x = 0; x < w; x+=40) {
-                        dst[x] = 200;
-                    }
-                    dst = s->outpicref->data[0] + (s->h + s->start_y + 2) * s->outpicref->linesize[0] + s->start_x + w * ch;
-                    for (x = 0; x < w; x+=80) {
-                        dst[x] = 200;
-                    }
-                    for (x = 0; x < w; x += 80) {
-                        float hertz = x * (inlink->sample_rate / 2) / (float)(1 << (int)ceil(log2(w)));
-                        char *units;
-
-                        if (hertz == 0)
-                            units = av_asprintf("DC");
-                        else
-                            units = av_asprintf("%.2f", hertz);
-                        if (!units)
-                            return AVERROR(ENOMEM);
-
-                        drawtext(s->outpicref, s->start_x - 4 * strlen(units) + x + w * ch, s->start_y - 12, units, 0);
-                        drawtext(s->outpicref, s->start_x - 4 * strlen(units) + x + w * ch, s->h + s->start_y + 6, units, 0);
-                        av_free(units);
-                    }
-                }
-                for (y = 0; y < s->h; y+=40) {
-                    float seconds = y * spp / inlink->sample_rate;
-                    char *units;
-
-                    if (x == 0)
-                        units = av_asprintf("0");
-                    else if (log10(seconds) > 6)
-                        units = av_asprintf("%.2fh", seconds / (60 * 60));
-                    else if (log10(seconds) > 3)
-                        units = av_asprintf("%.2fm", seconds / 60);
-                    else
-                        units = av_asprintf("%.2fs", seconds);
-                    if (!units)
-                        return AVERROR(ENOMEM);
-
-                    drawtext(s->outpicref, s->start_x - 8 * strlen(units) - 4, s->start_y + y - 4, units, 0);
-                    av_free(units);
-                }
-                drawtext(s->outpicref, s->start_x / 7, outlink->h / 2 - 4 * 4, "TIME", 1);
-                drawtext(s->outpicref, outlink->w / 2 - 14 * 4, outlink->h - s->start_y / 2, "FREQUENCY (Hz)", 0);
-            }
-
-            for (ch = 0; ch < (multi ? s->nb_display_channels : 1); ch++) {
-                int h = multi ? s->h / s->nb_display_channels : s->h;
-
-                for (y = 0; y < h; y++) {
-                    float out[3] = { 0., 127.5, 127.5};
-                    int chn;
-
-                    for (chn = 0; chn < (s->mode == SEPARATE ? 1 : s->nb_display_channels); chn++) {
-                        float yf, uf, vf;
-                        int channel = (multi) ? s->nb_display_channels - ch - 1 : chn;
-                        float lout[3];
-
-                        color_range(s, channel, &yf, &uf, &vf);
-                        pick_color(s, yf, uf, vf, y / (float)h, lout);
-                        out[0] += lout[0];
-                        out[1] += lout[1];
-                        out[2] += lout[2];
-                    }
-                    memset(s->outpicref->data[0]+(s->start_y + h * (ch + 1) - y - 1) * s->outpicref->linesize[0] + s->w + s->start_x + 20, av_clip_uint8(out[0]), 10);
-                    memset(s->outpicref->data[1]+(s->start_y + h * (ch + 1) - y - 1) * s->outpicref->linesize[1] + s->w + s->start_x + 20, av_clip_uint8(out[1]), 10);
-                    memset(s->outpicref->data[2]+(s->start_y + h * (ch + 1) - y - 1) * s->outpicref->linesize[2] + s->w + s->start_x + 20, av_clip_uint8(out[2]), 10);
-                }
-
-                for (y = 0; ch == 0 && y < h; y += h / 10) {
-                    float value = 120.0 * log10(1. - y / (float)h);
-                    char *text;
-
-                    if (value < -120)
-                        break;
-                    text = av_asprintf("%.0f dB", value);
-                    if (!text)
-                        continue;
-                    drawtext(s->outpicref, s->w + s->start_x + 35, s->start_y + y - 5, text, 0);
-                    av_free(text);
-                }
-            }
-        }
+        if (s->legend)
+            draw_legend(ctx, samples);
 
         ret = ff_filter_frame(outlink, s->outpicref);
         s->outpicref = NULL;

diff --git a/libavfilter/avf_showvolume.c b/libavfilter/avf_showvolume.c
index 897e570..6b553c4 100644
--- a/libavfilter/avf_showvolume.c
+++ b/libavfilter/avf_showvolume.c

@@ -33,6 +33,7 @@
 
 static const char *const var_names[] = {   "VOLUME",   "CHANNEL",   "PEAK",        NULL };
 enum                                   { VAR_VOLUME, VAR_CHANNEL, VAR_PEAK, VAR_VARS_NB };
+enum DisplayScale   { LINEAR, LOG, NB_DISPLAY_SCALE };
 
 typedef struct ShowVolumeContext {
     const AVClass *class;
@@ -43,6 +44,8 @@
     char *color;
     int orientation;
     int step;
+    float bgopacity;
+    int mode;
 
     AVFrame *out;
     AVExpr *c_expr;
@@ -50,6 +53,17 @@
     int draw_volume;
     double *values;
     uint32_t *color_lut;
+    float *max;
+    float rms_factor;
+    int display_scale;
+
+    double draw_persistent_duration; /* in second */
+    uint8_t persistant_max_rgba[4];
+    int persistent_max_frames; /* number of frames to check max value */
+    float *max_persistent; /* max value for draw_persistent_max for each channel */
+    int *nb_frames_max_display; /* number of frame for each channel, for displaying the max value */
+
+    void (*meter)(float *src, int nb_samples, float *max, float factor);
 } ShowVolumeContext;
 
 #define OFFSET(x) offsetof(ShowVolumeContext, x)
@@ -61,14 +75,23 @@
     { "b", "set border width",   OFFSET(b), AV_OPT_TYPE_INT, {.i64=1}, 0, 5, FLAGS },
     { "w", "set channel width",  OFFSET(w), AV_OPT_TYPE_INT, {.i64=400}, 80, 8192, FLAGS },
     { "h", "set channel height", OFFSET(h), AV_OPT_TYPE_INT, {.i64=20}, 1, 900, FLAGS },
-    { "f", "set fade",           OFFSET(f), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.001, 1, FLAGS },
+    { "f", "set fade",           OFFSET(f), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0, 1, FLAGS },
     { "c", "set volume color expression", OFFSET(color), AV_OPT_TYPE_STRING, {.str="PEAK*255+floor((1-PEAK)*255)*256+0xff000000"}, 0, 0, FLAGS },
     { "t", "display channel names", OFFSET(draw_text), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
     { "v", "display volume value", OFFSET(draw_volume), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
+    { "dm", "duration for max value display", OFFSET(draw_persistent_duration), AV_OPT_TYPE_DOUBLE, {.dbl=0.}, 0, 9000, FLAGS},
+    { "dmc","set color of the max value line", OFFSET(persistant_max_rgba), AV_OPT_TYPE_COLOR, {.str = "orange"}, CHAR_MIN, CHAR_MAX, FLAGS },
     { "o", "set orientation", OFFSET(orientation), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "orientation" },
     {   "h", "horizontal", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "orientation" },
     {   "v", "vertical",   0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "orientation" },
     { "s", "set step size", OFFSET(step), AV_OPT_TYPE_INT, {.i64=0}, 0, 5, FLAGS },
+    { "p", "set background opacity", OFFSET(bgopacity), AV_OPT_TYPE_FLOAT, {.dbl=0}, 0, 1, FLAGS },
+    { "m", "set mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "mode" },
+    {   "p", "peak", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "mode" },
+    {   "r", "rms",  0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "mode" },
+    { "ds", "set display scale", OFFSET(display_scale), AV_OPT_TYPE_INT, {.i64=LINEAR}, LINEAR, NB_DISPLAY_SCALE - 1, FLAGS, "display_scale" },
+    {   "lin", "linear", 0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, FLAGS, "display_scale" },
+    {   "log", "log",  0, AV_OPT_TYPE_CONST, {.i64=LOG}, 0, 0, FLAGS, "display_scale" },
     { NULL }
 };
 
@@ -118,6 +141,23 @@
     return 0;
 }
 
+static void find_peak(float *src, int nb_samples, float *peak, float factor)
+{
+    int i;
+
+    *peak = 0;
+    for (i = 0; i < nb_samples; i++)
+        *peak = FFMAX(*peak, FFABS(src[i]));
+}
+
+static void find_rms(float *src, int nb_samples, float *rms, float factor)
+{
+    int i;
+
+    for (i = 0; i < nb_samples; i++)
+        *rms += factor * (src[i] * src[i] - *rms);
+}
+
 static int config_input(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->dst;
@@ -136,6 +176,23 @@
     if (!s->color_lut)
         return AVERROR(ENOMEM);
 
+    s->max = av_calloc(inlink->channels, sizeof(*s->max));
+    if (!s->max)
+        return AVERROR(ENOMEM);
+
+    s->rms_factor = 10000. / inlink->sample_rate;
+
+    switch (s->mode) {
+    case 0: s->meter = find_peak; break;
+    case 1: s->meter = find_rms;  break;
+    default: return AVERROR_BUG;
+    }
+
+    if (s->draw_persistent_duration > 0.) {
+        s->persistent_max_frames = (int) FFMAX(av_q2d(s->frame_rate) * s->draw_persistent_duration, 1.);
+        s->max_persistent = av_calloc(inlink->channels * s->persistent_max_frames, sizeof(*s->max_persistent));
+        s->nb_frames_max_display = av_calloc(inlink->channels * s->persistent_max_frames, sizeof(*s->nb_frames_max_display));
+    }
     return 0;
 }
 
@@ -183,7 +240,7 @@
     for (i = 0; txt[i]; i++) {
         int char_y, mask;
 
-        if (o) {
+        if (o) { /* vertical orientation */
             for (char_y = font_height - 1; char_y >= 0; char_y--) {
                 uint8_t *p = pic->data[0] + (y + i * 10) * pic->linesize[0] + x * 4;
                 for (mask = 0x80; mask; mask >>= 1) {
@@ -192,7 +249,7 @@
                     p += pic->linesize[0];
                 }
             }
-        } else {
+        } else { /* horizontal orientation */
             uint8_t *p = pic->data[0] + y * pic->linesize[0] + (x + i * 8) * 4;
             for (char_y = 0; char_y < font_height; char_y++) {
                 for (mask = 0x80; mask; mask >>= 1) {
@@ -206,13 +263,67 @@
     }
 }
 
+static void clear_picture(ShowVolumeContext *s, AVFilterLink *outlink)
+{
+    int i, j;
+    const uint32_t bg = (uint32_t)(s->bgopacity * 255) << 24;
+
+    for (i = 0; i < outlink->h; i++) {
+        uint32_t *dst = (uint32_t *)(s->out->data[0] + i * s->out->linesize[0]);
+        for (j = 0; j < outlink->w; j++)
+            AV_WN32A(dst + j, bg);
+    }
+}
+
+static inline int calc_max_draw(ShowVolumeContext *s, AVFilterLink *outlink, float max)
+{
+    float max_val;
+    if (s->display_scale == LINEAR) {
+        max_val = max;
+    } else { /* log */
+        max_val = av_clipf(0.21 * log10(max) + 1, 0, 1);
+    }
+    if (s->orientation) { /* vertical */
+        return outlink->h - outlink->h * max_val;
+    } else { /* horizontal */
+        return s->w * max_val;
+    }
+}
+
+static inline void calc_persistent_max(ShowVolumeContext *s, float max, int channel)
+{
+    /* update max value for persistent max display */
+    if ((max >= s->max_persistent[channel]) || (s->nb_frames_max_display[channel] >= s->persistent_max_frames)) { /* update max value for display */
+        s->max_persistent[channel] = max;
+        s->nb_frames_max_display[channel] = 0;
+    } else {
+        s->nb_frames_max_display[channel] += 1; /* incremente display frame count */
+    }
+}
+
+static inline void draw_max_line(ShowVolumeContext *s, int max_draw, int channel)
+{
+    int k;
+    if (s->orientation) { /* vertical */
+        uint8_t *dst = s->out->data[0] + max_draw * s->out->linesize[0] + channel * (s->b + s->h) * 4;
+        for (k = 0; k < s->h; k++) {
+            memcpy(dst + k * 4, s->persistant_max_rgba, sizeof(s->persistant_max_rgba));
+        }
+    } else { /* horizontal */
+        for (k = 0; k < s->h; k++) {
+            uint8_t *dst = s->out->data[0] + (channel * s->h + channel * s->b + k) * s->out->linesize[0];
+            memcpy(dst + max_draw * 4, s->persistant_max_rgba, sizeof(s->persistant_max_rgba));
+        }
+    }
+}
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
 {
     AVFilterContext *ctx = inlink->dst;
     AVFilterLink *outlink = ctx->outputs[0];
     ShowVolumeContext *s = ctx->priv;
     const int step = s->step;
-    int c, i, j, k;
+    int c, j, k, max_draw;
     AVFrame *out;
 
     if (!s->out || s->out->width  != outlink->w ||
@@ -223,35 +334,40 @@
             av_frame_free(&insamples);
             return AVERROR(ENOMEM);
         }
-
-        for (i = 0; i < outlink->h; i++)
-            memset(s->out->data[0] + i * s->out->linesize[0], 0, outlink->w * 4);
+        clear_picture(s, outlink);
     }
     s->out->pts = insamples->pts;
 
-    for (j = 0; j < outlink->h; j++) {
-        uint8_t *dst = s->out->data[0] + j * s->out->linesize[0];
-        for (k = 0; k < outlink->w; k++) {
-            dst[k * 4 + 0] = FFMAX(dst[k * 4 + 0] * s->f, 0);
-            dst[k * 4 + 1] = FFMAX(dst[k * 4 + 1] * s->f, 0);
-            dst[k * 4 + 2] = FFMAX(dst[k * 4 + 2] * s->f, 0);
-            dst[k * 4 + 3] = FFMAX(dst[k * 4 + 3] * s->f, 0);
+    if ((s->f < 1.) && (s->f > 0.)) {
+        for (j = 0; j < outlink->h; j++) {
+            uint8_t *dst = s->out->data[0] + j * s->out->linesize[0];
+            const uint32_t alpha = s->bgopacity * 255;
+
+            for (k = 0; k < outlink->w; k++) {
+                dst[k * 4 + 0] = FFMAX(dst[k * 4 + 0] * s->f, 0);
+                dst[k * 4 + 1] = FFMAX(dst[k * 4 + 1] * s->f, 0);
+                dst[k * 4 + 2] = FFMAX(dst[k * 4 + 2] * s->f, 0);
+                dst[k * 4 + 3] = FFMAX(dst[k * 4 + 3] * s->f, alpha);
+            }
         }
+    } else if (s->f == 0.) {
+        clear_picture(s, outlink);
     }
 
-    if (s->orientation) {
+    if (s->orientation) { /* vertical */
         for (c = 0; c < inlink->channels; c++) {
             float *src = (float *)insamples->extended_data[c];
             uint32_t *lut = s->color_lut + s->w * c;
-            float max = 0;
+            float max;
 
-            for (i = 0; i < insamples->nb_samples; i++)
-                max = FFMAX(max, src[i]);
+            s->meter(src, insamples->nb_samples, &s->max[c], s->rms_factor);
+            max = s->max[c];
 
             s->values[c * VAR_VARS_NB + VAR_VOLUME] = 20.0 * log10(max);
             max = av_clipf(max, 0, 1);
+            max_draw = calc_max_draw(s, outlink, max);
 
-            for (j = outlink->h - outlink->h * max; j < s->w; j++) {
+            for (j = max_draw; j < s->w; j++) {
                 uint8_t *dst = s->out->data[0] + j * s->out->linesize[0] + c * (s->b + s->h) * 4;
                 for (k = 0; k < s->h; k++) {
                     AV_WN32A(&dst[k * 4], lut[s->w - j - 1]);
@@ -266,23 +382,30 @@
                     continue;
                 drawtext(s->out, c * (s->h + s->b) + (s->h - 10) / 2, outlink->h - 35, channel_name, 1);
             }
+
+            if (s->draw_persistent_duration > 0.) {
+                calc_persistent_max(s, max, c);
+                max_draw = FFMAX(0, calc_max_draw(s, outlink, s->max_persistent[c]) - 1);
+                draw_max_line(s, max_draw, c);
+            }
         }
-    } else {
+    } else { /* horizontal */
         for (c = 0; c < inlink->channels; c++) {
             float *src = (float *)insamples->extended_data[c];
             uint32_t *lut = s->color_lut + s->w * c;
-            float max = 0;
+            float max;
 
-            for (i = 0; i < insamples->nb_samples; i++)
-                max = FFMAX(max, src[i]);
+            s->meter(src, insamples->nb_samples, &s->max[c], s->rms_factor);
+            max = s->max[c];
 
             s->values[c * VAR_VARS_NB + VAR_VOLUME] = 20.0 * log10(max);
             max = av_clipf(max, 0, 1);
+            max_draw = calc_max_draw(s, outlink, max);
 
             for (j = 0; j < s->h; j++) {
                 uint8_t *dst = s->out->data[0] + (c * s->h + c * s->b + j) * s->out->linesize[0];
 
-                for (k = 0; k < s->w * max; k++) {
+                for (k = 0; k < max_draw; k++) {
                     AV_WN32A(dst + k * 4, lut[k]);
                     if (k & step)
                         k += step;
@@ -295,6 +418,12 @@
                     continue;
                 drawtext(s->out, 2, c * (s->h + s->b) + (s->h - 8) / 2, channel_name, 0);
             }
+
+            if (s->draw_persistent_duration > 0.) {
+                calc_persistent_max(s, max, c);
+                max_draw = FFMAX(0, calc_max_draw(s, outlink, s->max_persistent[c]) - 1);
+                draw_max_line(s, max_draw, c);
+            }
         }
     }
 
@@ -304,18 +433,16 @@
         return AVERROR(ENOMEM);
     av_frame_make_writable(out);
 
-    for (c = 0; c < inlink->channels && s->draw_volume; c++) {
+    /* draw volume level */
+    for (c = 0; c < inlink->channels && s->h >= 8 && s->draw_volume; c++) {
         char buf[16];
-        if (s->orientation) {
-            if (s->h >= 8) {
-                snprintf(buf, sizeof(buf), "%.2f", s->values[c * VAR_VARS_NB + VAR_VOLUME]);
-                drawtext(out, c * (s->h + s->b) + (s->h - 8) / 2, 2, buf, 1);
-            }
-        } else {
-            if (s->h >= 8) {
-                snprintf(buf, sizeof(buf), "%.2f", s->values[c * VAR_VARS_NB + VAR_VOLUME]);
-                drawtext(out, FFMAX(0, s->w - 8 * (int)strlen(buf)), c * (s->h + s->b) + (s->h - 8) / 2, buf, 0);
-            }
+
+        if (s->orientation) { /* vertical */
+            snprintf(buf, sizeof(buf), "%.2f", s->values[c * VAR_VARS_NB + VAR_VOLUME]);
+            drawtext(out, c * (s->h + s->b) + (s->h - 8) / 2, 2, buf, 1);
+        } else { /* horizontal */
+            snprintf(buf, sizeof(buf), "%.2f", s->values[c * VAR_VARS_NB + VAR_VOLUME]);
+            drawtext(out, FFMAX(0, s->w - 8 * (int)strlen(buf)), c * (s->h + s->b) + (s->h - 8) / 2, buf, 0);
         }
     }
 
@@ -330,6 +457,7 @@
     av_expr_free(s->c_expr);
     av_freep(&s->values);
     av_freep(&s->color_lut);
+    av_freep(&s->max);
 }
 
 static const AVFilterPad showvolume_inputs[] = {

diff --git a/libavfilter/avf_showwaves.c b/libavfilter/avf_showwaves.c
index 0866967..bb7f4ea 100644
--- a/libavfilter/avf_showwaves.c
+++ b/libavfilter/avf_showwaves.c

@@ -50,6 +50,12 @@
     SCALE_NB,
 };
 
+enum ShowWavesDrawMode {
+    DRAW_SCALE,
+    DRAW_FULL,
+    DRAW_NB,
+};
+
 struct frame_node {
     AVFrame *frame;
     struct frame_node *next;
@@ -68,6 +74,7 @@
     int sample_count_mod;
     int mode;                   ///< ShowWavesMode
     int scale;                  ///< ShowWavesScale
+    int draw_mode;              ///< ShowWavesDrawMode
     int split_channels;
     uint8_t *fg;
 
@@ -104,6 +111,9 @@
         { "log", "logarithmic",    0, AV_OPT_TYPE_CONST, {.i64=SCALE_LOG}, .flags=FLAGS, .unit="scale"},
         { "sqrt", "square root",   0, AV_OPT_TYPE_CONST, {.i64=SCALE_SQRT}, .flags=FLAGS, .unit="scale"},
         { "cbrt", "cubic root",    0, AV_OPT_TYPE_CONST, {.i64=SCALE_CBRT}, .flags=FLAGS, .unit="scale"},
+    { "draw", "set draw mode", OFFSET(draw_mode), AV_OPT_TYPE_INT, {.i64 = DRAW_SCALE}, 0, DRAW_NB-1, FLAGS, .unit="draw" },
+        { "scale", "scale pixel values for each drawn sample", 0, AV_OPT_TYPE_CONST, {.i64=DRAW_SCALE}, .flags=FLAGS, .unit="draw"},
+        { "full",  "draw every pixel for sample directly",     0, AV_OPT_TYPE_CONST, {.i64=DRAW_FULL},  .flags=FLAGS, .unit="draw"},
     { NULL }
 };
 
@@ -202,9 +212,9 @@
     return cbrt(FFABS(sample)) * height / cbrt(INT16_MAX);
 }
 
-static void draw_sample_point_rgba(uint8_t *buf, int height, int linesize,
-                                   int16_t *prev_y,
-                                   const uint8_t color[4], int h)
+static void draw_sample_point_rgba_scale(uint8_t *buf, int height, int linesize,
+                                         int16_t *prev_y,
+                                         const uint8_t color[4], int h)
 {
     if (h >= 0 && h < height) {
         buf[h * linesize + 0] += color[0];
@@ -214,9 +224,21 @@
     }
 }
 
-static void draw_sample_line_rgba(uint8_t *buf, int height, int linesize,
-                                  int16_t *prev_y,
-                                  const uint8_t color[4], int h)
+static void draw_sample_point_rgba_full(uint8_t *buf, int height, int linesize,
+                                   int16_t *prev_y,
+                                   const uint8_t color[4], int h)
+{
+    if (h >= 0 && h < height) {
+        buf[h * linesize + 0] = color[0];
+        buf[h * linesize + 1] = color[1];
+        buf[h * linesize + 2] = color[2];
+        buf[h * linesize + 3] = color[3];
+    }
+}
+
+static void draw_sample_line_rgba_scale(uint8_t *buf, int height, int linesize,
+                                        int16_t *prev_y,
+                                        const uint8_t color[4], int h)
 {
     int k;
     int start   = height/2;
@@ -231,9 +253,26 @@
     }
 }
 
-static void draw_sample_p2p_rgba(uint8_t *buf, int height, int linesize,
-                                 int16_t *prev_y,
-                                 const uint8_t color[4], int h)
+static void draw_sample_line_rgba_full(uint8_t *buf, int height, int linesize,
+                                       int16_t *prev_y,
+                                       const uint8_t color[4], int h)
+{
+    int k;
+    int start   = height/2;
+    int end     = av_clip(h, 0, height-1);
+    if (start > end)
+        FFSWAP(int16_t, start, end);
+    for (k = start; k < end; k++) {
+        buf[k * linesize + 0] = color[0];
+        buf[k * linesize + 1] = color[1];
+        buf[k * linesize + 2] = color[2];
+        buf[k * linesize + 3] = color[3];
+    }
+}
+
+static void draw_sample_p2p_rgba_scale(uint8_t *buf, int height, int linesize,
+                                       int16_t *prev_y,
+                                       const uint8_t color[4], int h)
 {
     int k;
     if (h >= 0 && h < height) {
@@ -257,9 +296,35 @@
     *prev_y = h;
 }
 
-static void draw_sample_cline_rgba(uint8_t *buf, int height, int linesize,
-                                   int16_t *prev_y,
-                                   const uint8_t color[4], int h)
+static void draw_sample_p2p_rgba_full(uint8_t *buf, int height, int linesize,
+                                      int16_t *prev_y,
+                                      const uint8_t color[4], int h)
+{
+    int k;
+    if (h >= 0 && h < height) {
+        buf[h * linesize + 0] = color[0];
+        buf[h * linesize + 1] = color[1];
+        buf[h * linesize + 2] = color[2];
+        buf[h * linesize + 3] = color[3];
+        if (*prev_y && h != *prev_y) {
+            int start = *prev_y;
+            int end = av_clip(h, 0, height-1);
+            if (start > end)
+                FFSWAP(int16_t, start, end);
+            for (k = start + 1; k < end; k++) {
+                buf[k * linesize + 0] = color[0];
+                buf[k * linesize + 1] = color[1];
+                buf[k * linesize + 2] = color[2];
+                buf[k * linesize + 3] = color[3];
+            }
+        }
+    }
+    *prev_y = h;
+}
+
+static void draw_sample_cline_rgba_scale(uint8_t *buf, int height, int linesize,
+                                         int16_t *prev_y,
+                                         const uint8_t color[4], int h)
 {
     int k;
     const int start = (height - h) / 2;
@@ -271,6 +336,20 @@
         buf[k * linesize + 3] += color[3];
     }
 }
+ static void draw_sample_cline_rgba_full(uint8_t *buf, int height, int linesize,
+                                         int16_t *prev_y,
+                                         const uint8_t color[4], int h)
+{
+    int k;
+    const int start = (height - h) / 2;
+    const int end   = start + h;
+    for (k = start; k < end; k++) {
+        buf[k * linesize + 0] = color[0];
+        buf[k * linesize + 1] = color[1];
+        buf[k * linesize + 2] = color[2];
+        buf[k * linesize + 3] = color[3];
+    }
+}
 
 static void draw_sample_point_gray(uint8_t *buf, int height, int linesize,
                                    int16_t *prev_y,
@@ -368,10 +447,10 @@
         break;
     case AV_PIX_FMT_RGBA:
         switch (showwaves->mode) {
-        case MODE_POINT:         showwaves->draw_sample = draw_sample_point_rgba; break;
-        case MODE_LINE:          showwaves->draw_sample = draw_sample_line_rgba;  break;
-        case MODE_P2P:           showwaves->draw_sample = draw_sample_p2p_rgba;   break;
-        case MODE_CENTERED_LINE: showwaves->draw_sample = draw_sample_cline_rgba; break;
+        case MODE_POINT:         showwaves->draw_sample = showwaves->draw_mode == DRAW_SCALE ? draw_sample_point_rgba_scale : draw_sample_point_rgba_full; break;
+        case MODE_LINE:          showwaves->draw_sample = showwaves->draw_mode == DRAW_SCALE ? draw_sample_line_rgba_scale  : draw_sample_line_rgba_full;  break;
+        case MODE_P2P:           showwaves->draw_sample = showwaves->draw_mode == DRAW_SCALE ? draw_sample_p2p_rgba_scale   : draw_sample_p2p_rgba_full;   break;
+        case MODE_CENTERED_LINE: showwaves->draw_sample = showwaves->draw_mode == DRAW_SCALE ? draw_sample_cline_rgba_scale : draw_sample_cline_rgba_full; break;
         default:
             return AVERROR_BUG;
         }
@@ -430,8 +509,12 @@
     if (!colors)
         return AVERROR(ENOMEM);
 
-    /* multiplication factor, pre-computed to avoid in-loop divisions */
-    x = 255 / ((showwaves->split_channels ? 1 : nb_channels) * showwaves->n);
+    if (showwaves->draw_mode == DRAW_SCALE) {
+        /* multiplication factor, pre-computed to avoid in-loop divisions */
+        x = 255 / ((showwaves->split_channels ? 1 : nb_channels) * showwaves->n);
+    } else {
+        x = 255;
+    }
     if (outlink->format == AV_PIX_FMT_RGBA) {
         uint8_t fg[4] = { 0xff, 0xff, 0xff, 0xff };
 

diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index f0f849b..93e866b 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c

@@ -19,7 +19,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavutil/atomic.h"
 #include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 #include "libavutil/buffer.h"
@@ -33,6 +32,7 @@
 #include "libavutil/pixdesc.h"
 #include "libavutil/rational.h"
 #include "libavutil/samplefmt.h"
+#include "libavutil/thread.h"
 
 #define FF_INTERNAL_FIELDS 1
 #include "framequeue.h"
@@ -183,10 +183,12 @@
     av_freep(link);
 }
 
+#if FF_API_FILTER_GET_SET
 int avfilter_link_get_channels(AVFilterLink *link)
 {
     return link->channels;
 }
+#endif
 
 void ff_filter_set_ready(AVFilterContext *filter, unsigned priority)
 {
@@ -573,58 +575,6 @@
     return AVERROR(ENOSYS);
 }
 
-static AVFilter *first_filter;
-static AVFilter **last_filter = &first_filter;
-
-#if !FF_API_NOCONST_GET_NAME
-const
-#endif
-AVFilter *avfilter_get_by_name(const char *name)
-{
-    const AVFilter *f = NULL;
-
-    if (!name)
-        return NULL;
-
-    while ((f = avfilter_next(f)))
-        if (!strcmp(f->name, name))
-            return (AVFilter *)f;
-
-    return NULL;
-}
-
-int avfilter_register(AVFilter *filter)
-{
-    AVFilter **f = last_filter;
-
-    /* the filter must select generic or internal exclusively */
-    av_assert0((filter->flags & AVFILTER_FLAG_SUPPORT_TIMELINE) != AVFILTER_FLAG_SUPPORT_TIMELINE);
-
-    filter->next = NULL;
-
-    while(*f || avpriv_atomic_ptr_cas((void * volatile *)f, NULL, filter))
-        f = &(*f)->next;
-    last_filter = &filter->next;
-
-    return 0;
-}
-
-const AVFilter *avfilter_next(const AVFilter *prev)
-{
-    return prev ? prev->next : first_filter;
-}
-
-#if FF_API_OLD_FILTER_REGISTER
-AVFilter **av_filter_next(AVFilter **filter)
-{
-    return filter ? &(*filter)->next : &first_filter;
-}
-
-void avfilter_uninit(void)
-{
-}
-#endif
-
 int avfilter_pad_count(const AVFilterPad *pads)
 {
     int count;
@@ -653,10 +603,11 @@
 
 static const AVClass *filter_child_class_next(const AVClass *prev)
 {
+    void *opaque = NULL;
     const AVFilter *f = NULL;
 
     /* find the filter that corresponds to prev */
-    while (prev && (f = avfilter_next(f)))
+    while (prev && (f = av_filter_iterate(&opaque)))
         if (f->priv_class == prev)
             break;
 
@@ -665,7 +616,7 @@
         return NULL;
 
     /* find next filter with specific options */
-    while ((f = avfilter_next(f)))
+    while ((f = av_filter_iterate(&opaque)))
         if (f->priv_class)
             return f->priv_class;
 
@@ -677,10 +628,12 @@
 static const AVOption avfilter_options[] = {
     { "thread_type", "Allowed thread types", OFFSET(thread_type), AV_OPT_TYPE_FLAGS,
         { .i64 = AVFILTER_THREAD_SLICE }, 0, INT_MAX, FLAGS, "thread_type" },
-        { "slice", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AVFILTER_THREAD_SLICE }, .unit = "thread_type" },
+        { "slice", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AVFILTER_THREAD_SLICE }, .flags = FLAGS, .unit = "thread_type" },
     { "enable", "set enable expression", OFFSET(enable_str), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
     { "threads", "Allowed number of threads", OFFSET(nb_threads), AV_OPT_TYPE_INT,
         { .i64 = 0 }, 0, INT_MAX, FLAGS },
+    { "extra_hw_frames", "Number of extra hardware frames to allocate for the user",
+        OFFSET(extra_hw_frames), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS },
     { NULL },
 };
 
@@ -783,14 +736,6 @@
     return NULL;
 }
 
-#if FF_API_AVFILTER_OPEN
-int avfilter_open(AVFilterContext **filter_ctx, AVFilter *filter, const char *inst_name)
-{
-    *filter_ctx = ff_filter_alloc(filter, inst_name);
-    return *filter_ctx ? 0 : AVERROR(ENOMEM);
-}
-#endif
-
 static void free_link(AVFilterLink *link)
 {
     if (!link)
@@ -939,13 +884,6 @@
     return count;
 }
 
-#if FF_API_AVFILTER_INIT_FILTER
-int avfilter_init_filter(AVFilterContext *filter, const char *args, void *opaque)
-{
-    return avfilter_init_str(filter, args);
-}
-#endif
-
 int avfilter_init_dict(AVFilterContext *ctx, AVDictionary **options)
 {
     int ret = 0;
@@ -996,7 +934,7 @@
             return AVERROR(EINVAL);
         }
 
-#if FF_API_OLD_FILTER_OPTS || FF_API_OLD_FILTER_OPTS_ERROR
+#if FF_API_OLD_FILTER_OPTS_ERROR
             if (   !strcmp(filter->filter->name, "format")     ||
                    !strcmp(filter->filter->name, "noformat")   ||
                    !strcmp(filter->filter->name, "frei0r")     ||
@@ -1056,14 +994,6 @@
             while ((p = strchr(p, ':')))
                 *p++ = '|';
 
-#if FF_API_OLD_FILTER_OPTS
-            if (deprecated)
-                av_log(filter, AV_LOG_WARNING, "This syntax is deprecated. Use "
-                       "'|' to separate the list items.\n");
-
-            av_log(filter, AV_LOG_DEBUG, "compat: called with args=[%s]\n", copy);
-            ret = process_options(filter, &options, copy);
-#else
             if (deprecated) {
                 av_log(filter, AV_LOG_ERROR, "This syntax is deprecated. Use "
                        "'|' to separate the list items ('%s' instead of '%s')\n",
@@ -1072,7 +1002,6 @@
             } else {
                 ret = process_options(filter, &options, copy);
             }
-#endif
             av_freep(&copy);
 
             if (ret < 0)
@@ -1441,7 +1370,7 @@
      and request_frame() to acknowledge status changes), to run once more
      and check if enough input was present for several frames.
 
-   Exemples of scenarios to consider:
+   Examples of scenarios to consider:
 
    - buffersrc: activate if frame_wanted_out to notify the application;
      activate when the application adds a frame to push it immediately.
@@ -1467,7 +1396,7 @@
    - If an input has frames in fifo and frame_wanted_out == 0, dequeue a
      frame and call filter_frame().
 
-     Ratinale: filter frames as soon as possible instead of leaving them
+     Rationale: filter frames as soon as possible instead of leaving them
      queued; frame_wanted_out < 0 is not possible since the old API does not
      set it nor provides any similar feedback; frame_wanted_out > 0 happens
      when min_samples > 0 and there are not enough samples queued.
@@ -1519,11 +1448,21 @@
     return 1;
 }
 
+size_t ff_inlink_queued_frames(AVFilterLink *link)
+{
+    return ff_framequeue_queued_frames(&link->fifo);
+}
+
 int ff_inlink_check_available_frame(AVFilterLink *link)
 {
     return ff_framequeue_queued_frames(&link->fifo) > 0;
 }
 
+int ff_inlink_queued_samples(AVFilterLink *link)
+{
+    return ff_framequeue_queued_samples(&link->fifo);
+}
+
 int ff_inlink_check_available_samples(AVFilterLink *link, unsigned min)
 {
     uint64_t samples = ff_framequeue_queued_samples(&link->fifo);
@@ -1570,7 +1509,7 @@
         return 0;
     if (link->status_in)
         min = FFMIN(min, ff_framequeue_queued_samples(&link->fifo));
-    ret = take_samples(link, min, link->max_samples, &frame);
+    ret = take_samples(link, min, max, &frame);
     if (ret < 0)
         return ret;
     consume_update(link, frame);
@@ -1578,6 +1517,11 @@
     return 1;
 }
 
+AVFrame *ff_inlink_peek_frame(AVFilterLink *link, size_t idx)
+{
+    return ff_framequeue_peek(&link->fifo, idx);
+}
+
 int ff_inlink_make_frame_writable(AVFilterLink *link, AVFrame **rframe)
 {
     AVFrame *frame = *rframe;
@@ -1692,3 +1636,24 @@
 {
     return &avfilter_class;
 }
+
+int ff_filter_init_hw_frames(AVFilterContext *avctx, AVFilterLink *link,
+                             int default_pool_size)
+{
+    AVHWFramesContext *frames;
+
+    // Must already be set by caller.
+    av_assert0(link->hw_frames_ctx);
+
+    frames = (AVHWFramesContext*)link->hw_frames_ctx->data;
+
+    if (frames->initial_pool_size == 0) {
+        // Dynamic allocation is necessarily supported.
+    } else if (avctx->extra_hw_frames >= 0) {
+        frames->initial_pool_size += avctx->extra_hw_frames;
+    } else {
+        frames->initial_pool_size = default_pool_size;
+    }
+
+    return 0;
+}

diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 73a723d..9d70e71 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h

@@ -406,6 +406,22 @@
      * a higher value suggests a more urgent activation.
      */
     unsigned ready;
+
+    /**
+     * Sets the number of extra hardware frames which the filter will
+     * allocate on its output links for use in following filters or by
+     * the caller.
+     *
+     * Some hardware filters require all frames that they will use for
+     * output to be defined in advance before filtering starts.  For such
+     * filters, any hardware frame pools used for output must therefore be
+     * of fixed size.  The extra frames set here are on top of any number
+     * that the filter needs internally in order to operate normally.
+     *
+     * This field must be set before the graph containing this filter is
+     * configured.
+     */
+    int extra_hw_frames;
 };
 
 /**
@@ -647,10 +663,14 @@
  */
 void avfilter_link_free(AVFilterLink **link);
 
+#if FF_API_FILTER_GET_SET
 /**
  * Get the number of channels of a link.
+ * @deprecated Use av_buffersink_get_channels()
  */
+attribute_deprecated
 int avfilter_link_get_channels(AVFilterLink *link);
+#endif
 
 /**
  * Set the closed field of a link.
@@ -677,14 +697,21 @@
  */
 int avfilter_process_command(AVFilterContext *filter, const char *cmd, const char *arg, char *res, int res_len, int flags);
 
-/** Initialize the filter system. Register all builtin filters. */
-void avfilter_register_all(void);
+/**
+ * Iterate over all registered filters.
+ *
+ * @param opaque a pointer where libavfilter will store the iteration state. Must
+ *               point to NULL to start the iteration.
+ *
+ * @return the next registered filter or NULL when the iteration is
+ *         finished
+ */
+const AVFilter *av_filter_iterate(void **opaque);
 
-#if FF_API_OLD_FILTER_REGISTER
-/** Uninitialize the filter system. Unregister all filters. */
+#if FF_API_NEXT
+/** Initialize the filter system. Register all builtin filters. */
 attribute_deprecated
-void avfilter_uninit(void);
-#endif
+void avfilter_register_all(void);
 
 /**
  * Register a filter. This is only needed if you plan to use
@@ -696,69 +723,27 @@
  * @return 0 if the registration was successful, a negative value
  * otherwise
  */
+attribute_deprecated
 int avfilter_register(AVFilter *filter);
 
 /**
+ * Iterate over all registered filters.
+ * @return If prev is non-NULL, next registered filter after prev or NULL if
+ * prev is the last filter. If prev is NULL, return the first registered filter.
+ */
+attribute_deprecated
+const AVFilter *avfilter_next(const AVFilter *prev);
+#endif
+
+/**
  * Get a filter definition matching the given name.
  *
  * @param name the filter name to find
  * @return     the filter definition, if any matching one is registered.
  *             NULL if none found.
  */
-#if !FF_API_NOCONST_GET_NAME
-const
-#endif
-AVFilter *avfilter_get_by_name(const char *name);
+const AVFilter *avfilter_get_by_name(const char *name);
 
-/**
- * Iterate over all registered filters.
- * @return If prev is non-NULL, next registered filter after prev or NULL if
- * prev is the last filter. If prev is NULL, return the first registered filter.
- */
-const AVFilter *avfilter_next(const AVFilter *prev);
-
-#if FF_API_OLD_FILTER_REGISTER
-/**
- * If filter is NULL, returns a pointer to the first registered filter pointer,
- * if filter is non-NULL, returns the next pointer after filter.
- * If the returned pointer points to NULL, the last registered filter
- * was already reached.
- * @deprecated use avfilter_next()
- */
-attribute_deprecated
-AVFilter **av_filter_next(AVFilter **filter);
-#endif
-
-#if FF_API_AVFILTER_OPEN
-/**
- * Create a filter instance.
- *
- * @param filter_ctx put here a pointer to the created filter context
- * on success, NULL on failure
- * @param filter    the filter to create an instance of
- * @param inst_name Name to give to the new instance. Can be NULL for none.
- * @return >= 0 in case of success, a negative error code otherwise
- * @deprecated use avfilter_graph_alloc_filter() instead
- */
-attribute_deprecated
-int avfilter_open(AVFilterContext **filter_ctx, AVFilter *filter, const char *inst_name);
-#endif
-
-
-#if FF_API_AVFILTER_INIT_FILTER
-/**
- * Initialize a filter.
- *
- * @param filter the filter to initialize
- * @param args   A string of parameters to use when initializing the filter.
- *               The format and meaning of this string varies by filter.
- * @param opaque Any extra non-string data needed by the filter. The meaning
- *               of this parameter varies by filter.
- * @return       zero on success
- */
-attribute_deprecated
-int avfilter_init_filter(AVFilterContext *filter, const char *args, void *opaque);
-#endif
 
 /**
  * Initialize a filter with the supplied parameters.
@@ -959,20 +944,6 @@
  */
 AVFilterContext *avfilter_graph_get_filter(AVFilterGraph *graph, const char *name);
 
-#if FF_API_AVFILTER_OPEN
-/**
- * Add an existing filter instance to a filter graph.
- *
- * @param graphctx  the filter graph
- * @param filter the filter to be added
- *
- * @deprecated use avfilter_graph_alloc_filter() to allocate a filter in a
- * filter graph
- */
-attribute_deprecated
-int avfilter_graph_add_filter(AVFilterGraph *graphctx, AVFilterContext *filter);
-#endif
-
 /**
  * Create and add a filter instance into an existing graph.
  * The filter instance is created from the filter filt and inited

diff --git a/libavfilter/avfiltergraph.c b/libavfilter/avfiltergraph.c
index 4304c06..a149f8f 100644
--- a/libavfilter/avfiltergraph.c
+++ b/libavfilter/avfiltergraph.c

@@ -28,6 +28,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/bprint.h"
 #include "libavutil/channel_layout.h"
+#include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
@@ -42,17 +43,19 @@
 #include "thread.h"
 
 #define OFFSET(x) offsetof(AVFilterGraph, x)
-#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+#define F AV_OPT_FLAG_FILTERING_PARAM
+#define V AV_OPT_FLAG_VIDEO_PARAM
+#define A AV_OPT_FLAG_AUDIO_PARAM
 static const AVOption filtergraph_options[] = {
     { "thread_type", "Allowed thread types", OFFSET(thread_type), AV_OPT_TYPE_FLAGS,
-        { .i64 = AVFILTER_THREAD_SLICE }, 0, INT_MAX, FLAGS, "thread_type" },
-        { "slice", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AVFILTER_THREAD_SLICE }, .flags = FLAGS, .unit = "thread_type" },
+        { .i64 = AVFILTER_THREAD_SLICE }, 0, INT_MAX, F|V|A, "thread_type" },
+        { "slice", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AVFILTER_THREAD_SLICE }, .flags = F|V|A, .unit = "thread_type" },
     { "threads",     "Maximum number of threads", OFFSET(nb_threads),
-        AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, INT_MAX, FLAGS },
+        AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, INT_MAX, F|V|A },
     {"scale_sws_opts"       , "default scale filter options"        , OFFSET(scale_sws_opts)        ,
-        AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS },
+        AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, F|V },
     {"aresample_swr_opts"   , "default aresample filter options"    , OFFSET(aresample_swr_opts)    ,
-        AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS },
+        AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, F|A },
     { NULL },
 };
 
@@ -136,23 +139,6 @@
     av_freep(graph);
 }
 
-#if FF_API_AVFILTER_OPEN
-int avfilter_graph_add_filter(AVFilterGraph *graph, AVFilterContext *filter)
-{
-    AVFilterContext **filters = av_realloc(graph->filters,
-                                           sizeof(*filters) * (graph->nb_filters + 1));
-    if (!filters)
-        return AVERROR(ENOMEM);
-
-    graph->filters = filters;
-    graph->filters[graph->nb_filters++] = filter;
-
-    filter->graph = graph;
-
-    return 0;
-}
-#endif
-
 int avfilter_graph_create_filter(AVFilterContext **filt_ctx, const AVFilter *filt,
                                  const char *name, const char *args, void *opaque,
                                  AVFilterGraph *graph_ctx)
@@ -280,6 +266,27 @@
     return 0;
 }
 
+static int graph_check_links(AVFilterGraph *graph, AVClass *log_ctx)
+{
+    AVFilterContext *f;
+    AVFilterLink *l;
+    unsigned i, j;
+    int ret;
+
+    for (i = 0; i < graph->nb_filters; i++) {
+        f = graph->filters[i];
+        for (j = 0; j < f->nb_outputs; j++) {
+            l = f->outputs[j];
+            if (l->type == AVMEDIA_TYPE_VIDEO) {
+                ret = av_image_check_size2(l->w, l->h, INT64_MAX, l->format, 0, f);
+                if (ret < 0)
+                    return ret;
+            }
+        }
+    }
+    return 0;
+}
+
 AVFilterContext *avfilter_graph_get_filter(AVFilterGraph *graph, const char *name)
 {
     int i;
@@ -509,9 +516,8 @@
 
             if (convert_needed) {
                 AVFilterContext *convert;
-                AVFilter *filter;
+                const AVFilter *filter;
                 AVFilterLink *inlink, *outlink;
-                char scale_args[256];
                 char inst_name[30];
 
                 if (graph->disable_auto_convert) {
@@ -548,10 +554,6 @@
 
                     snprintf(inst_name, sizeof(inst_name), "auto_resampler_%d",
                              resampler_count++);
-                    scale_args[0] = '\0';
-                    if (graph->aresample_swr_opts)
-                        snprintf(scale_args, sizeof(scale_args), "%s",
-                                 graph->aresample_swr_opts);
                     if ((ret = avfilter_graph_create_filter(&convert, filter,
                                                             inst_name, graph->aresample_swr_opts,
                                                             NULL, graph)) < 0)
@@ -677,6 +679,7 @@
 
     if (link->type == AVMEDIA_TYPE_VIDEO) {
         if(ref && ref->type == AVMEDIA_TYPE_VIDEO){
+            //FIXME: This should check for AV_PIX_FMT_FLAG_ALPHA after PAL8 pixel format without alpha is implemented
             int has_alpha= av_pix_fmt_desc_get(ref->format)->nb_components % 2 == 0;
             enum AVPixelFormat best= AV_PIX_FMT_NONE;
             int i;
@@ -1235,7 +1238,7 @@
         for (j = 0; j < f->nb_inputs; j++) {
             AVFilterLink *link = f->inputs[j];
             AVFilterContext *fifo_ctx;
-            AVFilter *fifo;
+            const AVFilter *fifo;
             char name[32];
 
             if (!link->dstpad->needs_fifo)
@@ -1273,6 +1276,8 @@
         return ret;
     if ((ret = graph_config_links(graphctx, log_ctx)))
         return ret;
+    if ((ret = graph_check_links(graphctx, log_ctx)))
+        return ret;
     if ((ret = graph_config_pointers(graphctx, log_ctx)))
         return ret;
 

diff --git a/libavfilter/boxblur.c b/libavfilter/boxblur.c
new file mode 100644
index 0000000..4534b45
--- /dev/null
+++ b/libavfilter/boxblur.c

@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2011 Stefano Sabatini
+ * Copyright (c) 2018 Danil Iashchenko
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "boxblur.h"
+
+static const char *const var_names[] = {
+    "w",
+    "h",
+    "cw",
+    "ch",
+    "hsub",
+    "vsub",
+    NULL
+};
+
+enum var_name {
+    VAR_W,
+    VAR_H,
+    VAR_CW,
+    VAR_CH,
+    VAR_HSUB,
+    VAR_VSUB,
+    VARS_NB
+};
+
+
+int ff_boxblur_eval_filter_params(AVFilterLink *inlink,
+                                  FilterParam *luma_param,
+                                  FilterParam *chroma_param,
+                                  FilterParam *alpha_param)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    AVFilterContext *ctx = inlink->dst;
+    int w = inlink->w, h = inlink->h;
+    int cw, ch;
+    double var_values[VARS_NB], res;
+    char *expr;
+    int ret;
+
+    if (!luma_param->radius_expr) {
+        av_log(ctx, AV_LOG_ERROR, "Luma radius expression is not set.\n");
+        return AVERROR(EINVAL);
+    }
+
+    /* fill missing params */
+    if (!chroma_param->radius_expr) {
+        chroma_param->radius_expr = av_strdup(luma_param->radius_expr);
+        if (!chroma_param->radius_expr)
+            return AVERROR(ENOMEM);
+    }
+    if (chroma_param->power < 0)
+        chroma_param->power = luma_param->power;
+
+    if (!alpha_param->radius_expr) {
+        alpha_param->radius_expr = av_strdup(luma_param->radius_expr);
+        if (!alpha_param->radius_expr)
+            return AVERROR(ENOMEM);
+    }
+    if (alpha_param->power < 0)
+        alpha_param->power = luma_param->power;
+
+    var_values[VAR_W]       = inlink->w;
+    var_values[VAR_H]       = inlink->h;
+    var_values[VAR_CW] = cw = w>>(desc->log2_chroma_w);
+    var_values[VAR_CH] = ch = h>>(desc->log2_chroma_h);
+    var_values[VAR_HSUB]    = 1<<(desc->log2_chroma_w);
+    var_values[VAR_VSUB]    = 1<<(desc->log2_chroma_h);
+
+#define EVAL_RADIUS_EXPR(comp)                                          \
+    expr = comp->radius_expr;                                           \
+    ret = av_expr_parse_and_eval(&res, expr, var_names, var_values,     \
+                                 NULL, NULL, NULL, NULL, NULL, 0, ctx); \
+    comp->radius = res;                                                 \
+    if (ret < 0) {                                                      \
+        av_log(NULL, AV_LOG_ERROR,                                      \
+               "Error when evaluating " #comp " radius expression '%s'\n", expr); \
+        return ret;                                                     \
+    }
+
+    EVAL_RADIUS_EXPR(luma_param);
+    EVAL_RADIUS_EXPR(chroma_param);
+    EVAL_RADIUS_EXPR(alpha_param);
+
+    av_log(ctx, AV_LOG_VERBOSE,
+           "luma_radius:%d luma_power:%d "
+           "chroma_radius:%d chroma_power:%d "
+           "alpha_radius:%d alpha_power:%d "
+           "w:%d chroma_w:%d h:%d chroma_h:%d\n",
+           luma_param  ->radius, luma_param  ->power,
+           chroma_param->radius, chroma_param->power,
+           alpha_param ->radius, alpha_param ->power,
+           w, cw, h, ch);
+
+
+#define CHECK_RADIUS_VAL(w_, h_, comp)                                  \
+    if (comp->radius < 0 ||                                   \
+        2*comp->radius > FFMIN(w_, h_)) {                     \
+        av_log(ctx, AV_LOG_ERROR,                                       \
+               "Invalid " #comp " radius value %d, must be >= 0 and <= %d\n", \
+               comp->radius, FFMIN(w_, h_)/2);                \
+        return AVERROR(EINVAL);                                         \
+    }
+    CHECK_RADIUS_VAL(w,  h,  luma_param);
+    CHECK_RADIUS_VAL(cw, ch, chroma_param);
+    CHECK_RADIUS_VAL(w,  h,  alpha_param);
+
+    return 0;
+}

diff --git a/libavfilter/boxblur.h b/libavfilter/boxblur.h
new file mode 100644
index 0000000..5694722
--- /dev/null
+++ b/libavfilter/boxblur.h

@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2011 Stefano Sabatini
+ * Copyright (c) 2018 Danil Iashchenko
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_BOXBLUR_H
+#define AVFILTER_BOXBLUR_H
+
+#include "libavutil/eval.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/mem.h"
+
+#include "avfilter.h"
+
+typedef struct FilterParam {
+    int radius;
+    int power;
+    char *radius_expr;
+} FilterParam;
+
+#define Y 0
+#define U 1
+#define V 2
+#define A 3
+
+int ff_boxblur_eval_filter_params(AVFilterLink *inlink,
+                                  FilterParam *luma_param,
+                                  FilterParam *chroma_param,
+                                  FilterParam *alpha_param);
+
+#endif // AVFILTER_BOXBLUR_H

diff --git a/libavfilter/buffersink.h b/libavfilter/buffersink.h
index 21d6bb5..3c846bb 100644
--- a/libavfilter/buffersink.h
+++ b/libavfilter/buffersink.h

@@ -151,7 +151,7 @@
  *              the end of stream, when it can contain less than nb_samples.
  *
  * @return The return codes have the same meaning as for
- *         av_buffersink_get_samples().
+ *         av_buffersink_get_frame().
  *
  * @warning do not mix this function with av_buffersink_get_frame(). Use only one or
  * the other with a single sink, not both.

diff --git a/libavfilter/buffersrc.c b/libavfilter/buffersrc.c
index ad5aedd..cd56f8c 100644
--- a/libavfilter/buffersrc.c
+++ b/libavfilter/buffersrc.c

@@ -304,14 +304,6 @@
     { "video_size",    NULL,                     OFFSET(w),                AV_OPT_TYPE_IMAGE_SIZE,                .flags = V },
     { "height",        NULL,                     OFFSET(h),                AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, INT_MAX, V },
     { "pix_fmt",       NULL,                     OFFSET(pix_fmt),          AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, .min = AV_PIX_FMT_NONE, .max = INT_MAX, .flags = V },
-#if FF_API_OLD_FILTER_OPTS
-    /* those 4 are for compatibility with the old option passing system where each filter
-     * did its own parsing */
-    { "time_base_num", "deprecated, do not use", OFFSET(time_base.num),    AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, INT_MAX, V },
-    { "time_base_den", "deprecated, do not use", OFFSET(time_base.den),    AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, INT_MAX, V },
-    { "sar_num",       "deprecated, do not use", OFFSET(pixel_aspect.num), AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, INT_MAX, V },
-    { "sar_den",       "deprecated, do not use", OFFSET(pixel_aspect.den), AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, INT_MAX, V },
-#endif
     { "sar",           "sample aspect ratio",    OFFSET(pixel_aspect),     AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, 0, DBL_MAX, V },
     { "pixel_aspect",  "sample aspect ratio",    OFFSET(pixel_aspect),     AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, 0, DBL_MAX, V },
     { "time_base",     NULL,                     OFFSET(time_base),        AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, 0, DBL_MAX, V },

diff --git a/libavfilter/buffersrc.h b/libavfilter/buffersrc.h
index 0652113..08fbd18 100644
--- a/libavfilter/buffersrc.h
+++ b/libavfilter/buffersrc.h

@@ -106,7 +106,7 @@
     AVBufferRef *hw_frames_ctx;
 
     /**
-     * Audio only, the audio sampling rate in samples per secon.
+     * Audio only, the audio sampling rate in samples per second.
      */
     int sample_rate;
 

diff --git a/libavfilter/colorspace.c b/libavfilter/colorspace.c
new file mode 100644
index 0000000..c668221
--- /dev/null
+++ b/libavfilter/colorspace.c

@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/frame.h"
+#include "libavutil/mastering_display_metadata.h"
+#include "libavutil/pixdesc.h"
+
+#include "colorspace.h"
+
+
+void ff_matrix_invert_3x3(const double in[3][3], double out[3][3])
+{
+    double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2],
+           m10 = in[1][0], m11 = in[1][1], m12 = in[1][2],
+           m20 = in[2][0], m21 = in[2][1], m22 = in[2][2];
+    int i, j;
+    double det;
+
+    out[0][0] =  (m11 * m22 - m21 * m12);
+    out[0][1] = -(m01 * m22 - m21 * m02);
+    out[0][2] =  (m01 * m12 - m11 * m02);
+    out[1][0] = -(m10 * m22 - m20 * m12);
+    out[1][1] =  (m00 * m22 - m20 * m02);
+    out[1][2] = -(m00 * m12 - m10 * m02);
+    out[2][0] =  (m10 * m21 - m20 * m11);
+    out[2][1] = -(m00 * m21 - m20 * m01);
+    out[2][2] =  (m00 * m11 - m10 * m01);
+
+    det = m00 * out[0][0] + m10 * out[0][1] + m20 * out[0][2];
+    det = 1.0 / det;
+
+    for (i = 0; i < 3; i++) {
+        for (j = 0; j < 3; j++)
+            out[i][j] *= det;
+    }
+}
+
+void ff_matrix_mul_3x3(double dst[3][3],
+               const double src1[3][3], const double src2[3][3])
+{
+    int m, n;
+
+    for (m = 0; m < 3; m++)
+        for (n = 0; n < 3; n++)
+            dst[m][n] = src2[m][0] * src1[0][n] +
+                        src2[m][1] * src1[1][n] +
+                        src2[m][2] * src1[2][n];
+}
+/*
+ * see e.g. http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html
+ */
+void ff_fill_rgb2xyz_table(const struct PrimaryCoefficients *coeffs,
+                           const struct WhitepointCoefficients *wp,
+                           double rgb2xyz[3][3])
+{
+    double i[3][3], sr, sg, sb, zw;
+
+    rgb2xyz[0][0] = coeffs->xr / coeffs->yr;
+    rgb2xyz[0][1] = coeffs->xg / coeffs->yg;
+    rgb2xyz[0][2] = coeffs->xb / coeffs->yb;
+    rgb2xyz[1][0] = rgb2xyz[1][1] = rgb2xyz[1][2] = 1.0;
+    rgb2xyz[2][0] = (1.0 - coeffs->xr - coeffs->yr) / coeffs->yr;
+    rgb2xyz[2][1] = (1.0 - coeffs->xg - coeffs->yg) / coeffs->yg;
+    rgb2xyz[2][2] = (1.0 - coeffs->xb - coeffs->yb) / coeffs->yb;
+    ff_matrix_invert_3x3(rgb2xyz, i);
+    zw = 1.0 - wp->xw - wp->yw;
+    sr = i[0][0] * wp->xw + i[0][1] * wp->yw + i[0][2] * zw;
+    sg = i[1][0] * wp->xw + i[1][1] * wp->yw + i[1][2] * zw;
+    sb = i[2][0] * wp->xw + i[2][1] * wp->yw + i[2][2] * zw;
+    rgb2xyz[0][0] *= sr;
+    rgb2xyz[0][1] *= sg;
+    rgb2xyz[0][2] *= sb;
+    rgb2xyz[1][0] *= sr;
+    rgb2xyz[1][1] *= sg;
+    rgb2xyz[1][2] *= sb;
+    rgb2xyz[2][0] *= sr;
+    rgb2xyz[2][1] *= sg;
+    rgb2xyz[2][2] *= sb;
+}
+
+double ff_determine_signal_peak(AVFrame *in)
+{
+    AVFrameSideData *sd = av_frame_get_side_data(in, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
+    double peak = 0;
+
+    if (sd) {
+        AVContentLightMetadata *clm = (AVContentLightMetadata *)sd->data;
+        peak = clm->MaxCLL / REFERENCE_WHITE;
+    }
+
+    sd = av_frame_get_side_data(in, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
+    if (!peak && sd) {
+        AVMasteringDisplayMetadata *metadata = (AVMasteringDisplayMetadata *)sd->data;
+        if (metadata->has_luminance)
+            peak = av_q2d(metadata->max_luminance) / REFERENCE_WHITE;
+    }
+
+    // For untagged source, use peak of 10000 if SMPTE ST.2084
+    // otherwise assume HLG with reference display peak 1000.
+    if (!peak)
+        peak = in->color_trc == AVCOL_TRC_SMPTE2084 ? 100.0f : 10.0f;
+
+    return peak;
+}
+
+void ff_update_hdr_metadata(AVFrame *in, double peak)
+{
+    AVFrameSideData *sd = av_frame_get_side_data(in, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
+
+    if (sd) {
+        AVContentLightMetadata *clm = (AVContentLightMetadata *)sd->data;
+        clm->MaxCLL = (unsigned)(peak * REFERENCE_WHITE);
+    }
+
+    sd = av_frame_get_side_data(in, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
+    if (sd) {
+        AVMasteringDisplayMetadata *metadata = (AVMasteringDisplayMetadata *)sd->data;
+        if (metadata->has_luminance)
+            metadata->max_luminance = av_d2q(peak * REFERENCE_WHITE, 10000);
+    }
+}

diff --git a/libavfilter/colorspace.h b/libavfilter/colorspace.h
new file mode 100644
index 0000000..9366818
--- /dev/null
+++ b/libavfilter/colorspace.h

@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_COLORSPACE_H
+#define AVFILTER_COLORSPACE_H
+
+#include "libavutil/common.h"
+#include "libavutil/frame.h"
+
+#define REFERENCE_WHITE 100.0f
+
+struct LumaCoefficients {
+    double cr, cg, cb;
+};
+
+struct PrimaryCoefficients {
+    double xr, yr, xg, yg, xb, yb;
+};
+
+struct WhitepointCoefficients {
+    double xw, yw;
+};
+
+void ff_matrix_invert_3x3(const double in[3][3], double out[3][3]);
+void ff_matrix_mul_3x3(double dst[3][3],
+               const double src1[3][3], const double src2[3][3]);
+void ff_fill_rgb2xyz_table(const struct PrimaryCoefficients *coeffs,
+                           const struct WhitepointCoefficients *wp,
+                           double rgb2xyz[3][3]);
+
+double ff_determine_signal_peak(AVFrame *in);
+void ff_update_hdr_metadata(AVFrame *in, double peak);
+
+#endif

diff --git a/libavfilter/deshake.h b/libavfilter/deshake.h
index 5e25bb3..406cbab 100644
--- a/libavfilter/deshake.h
+++ b/libavfilter/deshake.h

@@ -26,9 +26,6 @@
 #include "avfilter.h"
 #include "transform.h"
 #include "libavutil/pixelutils.h"
-#if CONFIG_OPENCL
-#include "libavutil/opencl.h"
-#endif
 
 
 enum SearchMethod {
@@ -53,24 +50,6 @@
     double zoom;          ///< Zoom percentage
 } Transform;
 
-#if CONFIG_OPENCL
-
-typedef struct DeshakeOpenclContext {
-    cl_command_queue command_queue;
-    cl_program program;
-    cl_kernel kernel_luma;
-    cl_kernel kernel_chroma;
-    int in_plane_size[8];
-    int out_plane_size[8];
-    int plane_num;
-    cl_mem cl_inbuf;
-    size_t cl_inbuf_size;
-    cl_mem cl_outbuf;
-    size_t cl_outbuf_size;
-} DeshakeOpenclContext;
-
-#endif
-
 #define MAX_R 64
 
 typedef struct DeshakeContext {
@@ -96,9 +75,6 @@
     int cy;
     char *filename;            ///< Motion search detailed log filename
     int opencl;
-#if CONFIG_OPENCL
-    DeshakeOpenclContext opencl_ctx;
-#endif
     int (* transform)(AVFilterContext *ctx, int width, int height, int cw, int ch,
                       const float *matrix_y, const float *matrix_uv, enum InterpolateMethod interpolate,
                       enum FillMethod fill, AVFrame *in, AVFrame *out);

diff --git a/libavfilter/deshake_opencl.c b/libavfilter/deshake_opencl.c
deleted file mode 100644
index 877ec1d..0000000
--- a/libavfilter/deshake_opencl.c
+++ /dev/null

@@ -1,198 +0,0 @@
-/*
- * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
- * Copyright (C) 2013 Lenny Wang
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * transform input video
- */
-
-#include "libavutil/common.h"
-#include "libavutil/dict.h"
-#include "libavutil/pixdesc.h"
-#include "deshake_opencl.h"
-#include "libavutil/opencl_internal.h"
-
-#define PLANE_NUM 3
-#define ROUND_TO_16(a) (((((a) - 1)/16)+1)*16)
-
-int ff_opencl_transform(AVFilterContext *ctx,
-                        int width, int height, int cw, int ch,
-                        const float *matrix_y, const float *matrix_uv,
-                        enum InterpolateMethod interpolate,
-                        enum FillMethod fill, AVFrame *in, AVFrame *out)
-{
-    int ret = 0;
-    cl_int status;
-    DeshakeContext *deshake = ctx->priv;
-    float4 packed_matrix_lu = {matrix_y[0], matrix_y[1], matrix_y[2], matrix_y[5]};
-    float4 packed_matrix_ch = {matrix_uv[0], matrix_uv[1], matrix_uv[2], matrix_uv[5]};
-    size_t global_worksize_lu[2] = {(size_t)ROUND_TO_16(width), (size_t)ROUND_TO_16(height)};
-    size_t global_worksize_ch[2] = {(size_t)ROUND_TO_16(cw), (size_t)(2*ROUND_TO_16(ch))};
-    size_t local_worksize[2] = {16, 16};
-    FFOpenclParam param_lu = {0};
-    FFOpenclParam param_ch = {0};
-    param_lu.ctx = param_ch.ctx = ctx;
-    param_lu.kernel = deshake->opencl_ctx.kernel_luma;
-    param_ch.kernel = deshake->opencl_ctx.kernel_chroma;
-
-    if ((unsigned int)interpolate > INTERPOLATE_BIQUADRATIC) {
-        av_log(ctx, AV_LOG_ERROR, "Selected interpolate method is invalid\n");
-        return AVERROR(EINVAL);
-    }
-    ret = avpriv_opencl_set_parameter(&param_lu,
-                                  FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_inbuf),
-                                  FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_outbuf),
-                                  FF_OPENCL_PARAM_INFO(packed_matrix_lu),
-                                  FF_OPENCL_PARAM_INFO(interpolate),
-                                  FF_OPENCL_PARAM_INFO(fill),
-                                  FF_OPENCL_PARAM_INFO(in->linesize[0]),
-                                  FF_OPENCL_PARAM_INFO(out->linesize[0]),
-                                  FF_OPENCL_PARAM_INFO(height),
-                                  FF_OPENCL_PARAM_INFO(width),
-                                  NULL);
-    if (ret < 0)
-        return ret;
-    ret = avpriv_opencl_set_parameter(&param_ch,
-                                  FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_inbuf),
-                                  FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_outbuf),
-                                  FF_OPENCL_PARAM_INFO(packed_matrix_ch),
-                                  FF_OPENCL_PARAM_INFO(interpolate),
-                                  FF_OPENCL_PARAM_INFO(fill),
-                                  FF_OPENCL_PARAM_INFO(in->linesize[0]),
-                                  FF_OPENCL_PARAM_INFO(out->linesize[0]),
-                                  FF_OPENCL_PARAM_INFO(in->linesize[1]),
-                                  FF_OPENCL_PARAM_INFO(out->linesize[1]),
-                                  FF_OPENCL_PARAM_INFO(height),
-                                  FF_OPENCL_PARAM_INFO(width),
-                                  FF_OPENCL_PARAM_INFO(ch),
-                                  FF_OPENCL_PARAM_INFO(cw),
-                                  NULL);
-    if (ret < 0)
-        return ret;
-    status = clEnqueueNDRangeKernel(deshake->opencl_ctx.command_queue,
-                                    deshake->opencl_ctx.kernel_luma, 2, NULL,
-                                    global_worksize_lu, local_worksize, 0, NULL, NULL);
-    status |= clEnqueueNDRangeKernel(deshake->opencl_ctx.command_queue,
-                                    deshake->opencl_ctx.kernel_chroma, 2, NULL,
-                                    global_worksize_ch, local_worksize, 0, NULL, NULL);
-    if (status != CL_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status));
-        return AVERROR_EXTERNAL;
-    }
-    ret = av_opencl_buffer_read_image(out->data, deshake->opencl_ctx.out_plane_size,
-                                      deshake->opencl_ctx.plane_num, deshake->opencl_ctx.cl_outbuf,
-                                      deshake->opencl_ctx.cl_outbuf_size);
-    if (ret < 0)
-        return ret;
-    return ret;
-}
-
-int ff_opencl_deshake_init(AVFilterContext *ctx)
-{
-    int ret = 0;
-    DeshakeContext *deshake = ctx->priv;
-    ret = av_opencl_init(NULL);
-    if (ret < 0)
-        return ret;
-    deshake->opencl_ctx.plane_num = PLANE_NUM;
-    deshake->opencl_ctx.command_queue = av_opencl_get_command_queue();
-    if (!deshake->opencl_ctx.command_queue) {
-        av_log(ctx, AV_LOG_ERROR, "Unable to get OpenCL command queue in filter 'deshake'\n");
-        return AVERROR(EINVAL);
-    }
-    deshake->opencl_ctx.program = av_opencl_compile("avfilter_transform", NULL);
-    if (!deshake->opencl_ctx.program) {
-        av_log(ctx, AV_LOG_ERROR, "OpenCL failed to compile program 'avfilter_transform'\n");
-        return AVERROR(EINVAL);
-    }
-    if (!deshake->opencl_ctx.kernel_luma) {
-        deshake->opencl_ctx.kernel_luma = clCreateKernel(deshake->opencl_ctx.program,
-                                                         "avfilter_transform_luma", &ret);
-        if (ret != CL_SUCCESS) {
-            av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'avfilter_transform_luma'\n");
-            return AVERROR(EINVAL);
-        }
-    }
-    if (!deshake->opencl_ctx.kernel_chroma) {
-        deshake->opencl_ctx.kernel_chroma = clCreateKernel(deshake->opencl_ctx.program,
-                                                           "avfilter_transform_chroma", &ret);
-        if (ret != CL_SUCCESS) {
-            av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'avfilter_transform_chroma'\n");
-            return AVERROR(EINVAL);
-        }
-    }
-    return ret;
-}
-
-void ff_opencl_deshake_uninit(AVFilterContext *ctx)
-{
-    DeshakeContext *deshake = ctx->priv;
-    av_opencl_buffer_release(&deshake->opencl_ctx.cl_inbuf);
-    av_opencl_buffer_release(&deshake->opencl_ctx.cl_outbuf);
-    clReleaseKernel(deshake->opencl_ctx.kernel_luma);
-    clReleaseKernel(deshake->opencl_ctx.kernel_chroma);
-    clReleaseProgram(deshake->opencl_ctx.program);
-    deshake->opencl_ctx.command_queue = NULL;
-    av_opencl_uninit();
-}
-
-int ff_opencl_deshake_process_inout_buf(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
-{
-    int ret = 0;
-    AVFilterLink *link = ctx->inputs[0];
-    DeshakeContext *deshake = ctx->priv;
-    const int hshift = av_pix_fmt_desc_get(link->format)->log2_chroma_h;
-    int chroma_height = AV_CEIL_RSHIFT(link->h, hshift);
-
-    if ((!deshake->opencl_ctx.cl_inbuf) || (!deshake->opencl_ctx.cl_outbuf)) {
-        deshake->opencl_ctx.in_plane_size[0]  = (in->linesize[0] * in->height);
-        deshake->opencl_ctx.in_plane_size[1]  = (in->linesize[1] * chroma_height);
-        deshake->opencl_ctx.in_plane_size[2]  = (in->linesize[2] * chroma_height);
-        deshake->opencl_ctx.out_plane_size[0] = (out->linesize[0] * out->height);
-        deshake->opencl_ctx.out_plane_size[1] = (out->linesize[1] * chroma_height);
-        deshake->opencl_ctx.out_plane_size[2] = (out->linesize[2] * chroma_height);
-        deshake->opencl_ctx.cl_inbuf_size  = deshake->opencl_ctx.in_plane_size[0] +
-                                             deshake->opencl_ctx.in_plane_size[1] +
-                                             deshake->opencl_ctx.in_plane_size[2];
-        deshake->opencl_ctx.cl_outbuf_size = deshake->opencl_ctx.out_plane_size[0] +
-                                             deshake->opencl_ctx.out_plane_size[1] +
-                                             deshake->opencl_ctx.out_plane_size[2];
-        if (!deshake->opencl_ctx.cl_inbuf) {
-            ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_inbuf,
-                                            deshake->opencl_ctx.cl_inbuf_size,
-                                            CL_MEM_READ_ONLY, NULL);
-            if (ret < 0)
-                return ret;
-        }
-        if (!deshake->opencl_ctx.cl_outbuf) {
-            ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_outbuf,
-                                            deshake->opencl_ctx.cl_outbuf_size,
-                                            CL_MEM_READ_WRITE, NULL);
-            if (ret < 0)
-                return ret;
-        }
-    }
-    ret = av_opencl_buffer_write_image(deshake->opencl_ctx.cl_inbuf,
-                                 deshake->opencl_ctx.cl_inbuf_size,
-                                 0, in->data,deshake->opencl_ctx.in_plane_size,
-                                 deshake->opencl_ctx.plane_num);
-    return ret;
-}

diff --git a/libavfilter/deshake_opencl.h b/libavfilter/deshake_opencl.h
deleted file mode 100644
index f3d96dc..0000000
--- a/libavfilter/deshake_opencl.h
+++ /dev/null

@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVFILTER_DESHAKE_OPENCL_H
-#define AVFILTER_DESHAKE_OPENCL_H
-
-#include "deshake.h"
-
-typedef struct float4 {
-    float x;
-    float y;
-    float z;
-    float w;
-} float4;
-
-int ff_opencl_deshake_init(AVFilterContext *ctx);
-
-void ff_opencl_deshake_uninit(AVFilterContext *ctx);
-
-int ff_opencl_deshake_process_inout_buf(AVFilterContext *ctx, AVFrame *in, AVFrame *out);
-
-int ff_opencl_transform(AVFilterContext *ctx,
-                        int width, int height, int cw, int ch,
-                        const float *matrix_y, const float *matrix_uv,
-                        enum InterpolateMethod interpolate,
-                        enum FillMethod fill, AVFrame *in, AVFrame *out);
-
-#endif /* AVFILTER_DESHAKE_OPENCL_H */

diff --git a/libavfilter/deshake_opencl_kernel.h b/libavfilter/deshake_opencl_kernel.h
deleted file mode 100644
index dd45d6f..0000000
--- a/libavfilter/deshake_opencl_kernel.h
+++ /dev/null

@@ -1,225 +0,0 @@
-/*
- * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
- * Copyright (C) 2013 Lenny Wang
- *
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVFILTER_DESHAKE_OPENCL_KERNEL_H
-#define AVFILTER_DESHAKE_OPENCL_KERNEL_H
-
-#include "libavutil/opencl.h"
-
-const char *ff_kernel_deshake_opencl = AV_OPENCL_KERNEL(
-inline unsigned char pixel(global const unsigned char *src, int x, int y,
-                           int w, int h,int stride, unsigned char def)
-{
-    return (x < 0 || y < 0 || x >= w || y >= h) ? def : src[x + y * stride];
-}
-
-unsigned char interpolate_nearest(float x, float y, global const unsigned char *src,
-                                  int width, int height, int stride, unsigned char def)
-{
-    return pixel(src, (int)(x + 0.5f), (int)(y + 0.5f), width, height, stride, def);
-}
-
-unsigned char interpolate_bilinear(float x, float y, global const unsigned char *src,
-                                   int width, int height, int stride, unsigned char def)
-{
-    int x_c, x_f, y_c, y_f;
-    int v1, v2, v3, v4;
-    x_f = (int)x;
-    y_f = (int)y;
-    x_c = x_f + 1;
-    y_c = y_f + 1;
-
-    if (x_f < -1 || x_f > width || y_f < -1 || y_f > height) {
-        return def;
-    } else {
-        v4 = pixel(src, x_f, y_f, width, height, stride, def);
-        v2 = pixel(src, x_c, y_f, width, height, stride, def);
-        v3 = pixel(src, x_f, y_c, width, height, stride, def);
-        v1 = pixel(src, x_c, y_c, width, height, stride, def);
-        return (v1*(x - x_f)*(y - y_f) + v2*((x - x_f)*(y_c - y)) +
-                v3*(x_c - x)*(y - y_f) + v4*((x_c - x)*(y_c - y)));
-    }
-}
-
-unsigned char interpolate_biquadratic(float x, float y, global const unsigned char *src,
-                                      int width, int height, int stride, unsigned char def)
-{
-    int     x_c, x_f, y_c, y_f;
-    unsigned char v1,  v2,  v3,  v4;
-    float   f1,  f2,  f3,  f4;
-    x_f = (int)x;
-    y_f = (int)y;
-    x_c = x_f + 1;
-    y_c = y_f + 1;
-
-    if (x_f < - 1 || x_f > width || y_f < -1 || y_f > height)
-        return def;
-    else {
-        v4 = pixel(src, x_f, y_f, width, height, stride, def);
-        v2 = pixel(src, x_c, y_f, width, height, stride, def);
-        v3 = pixel(src, x_f, y_c, width, height, stride, def);
-        v1 = pixel(src, x_c, y_c, width, height, stride, def);
-
-        f1 = 1 - sqrt((x_c - x) * (y_c - y));
-        f2 = 1 - sqrt((x_c - x) * (y - y_f));
-        f3 = 1 - sqrt((x - x_f) * (y_c - y));
-        f4 = 1 - sqrt((x - x_f) * (y - y_f));
-        return (v1 * f1 + v2 * f2 + v3 * f3 + v4 * f4) / (f1 + f2 + f3 + f4);
-    }
-}
-
-inline const float clipf(float a, float amin, float amax)
-{
-    if      (a < amin) return amin;
-    else if (a > amax) return amax;
-    else               return a;
-}
-
-inline int mirror(int v, int m)
-{
-    while ((unsigned)v > (unsigned)m) {
-        v = -v;
-        if (v < 0)
-            v += 2 * m;
-    }
-    return v;
-}
-
-kernel void avfilter_transform_luma(global unsigned char *src,
-                                    global unsigned char *dst,
-                                    float4 matrix,
-                                    int interpolate,
-                                    int fill,
-                                    int src_stride_lu,
-                                    int dst_stride_lu,
-                                    int height,
-                                    int width)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-    int idx_dst = y * dst_stride_lu + x;
-    unsigned char def = 0;
-    float x_s = x * matrix.x + y * matrix.y + matrix.z;
-    float y_s = x * (-matrix.y) + y * matrix.x + matrix.w;
-
-    if (x < width && y < height) {
-        switch (fill) {
-            case 0: //FILL_BLANK
-                def = 0;
-                break;
-            case 1: //FILL_ORIGINAL
-                def = src[y*src_stride_lu + x];
-                break;
-            case 2: //FILL_CLAMP
-                y_s = clipf(y_s, 0, height - 1);
-                x_s = clipf(x_s, 0, width - 1);
-                def = src[(int)y_s * src_stride_lu + (int)x_s];
-                break;
-            case 3: //FILL_MIRROR
-                y_s = mirror(y_s, height - 1);
-                x_s = mirror(x_s, width - 1);
-                def = src[(int)y_s * src_stride_lu + (int)x_s];
-                break;
-        }
-        switch (interpolate) {
-            case 0: //INTERPOLATE_NEAREST
-                dst[idx_dst] = interpolate_nearest(x_s, y_s, src, width, height, src_stride_lu, def);
-                break;
-            case 1: //INTERPOLATE_BILINEAR
-                dst[idx_dst] = interpolate_bilinear(x_s, y_s, src, width, height, src_stride_lu, def);
-                break;
-            case 2: //INTERPOLATE_BIQUADRATIC
-                dst[idx_dst] = interpolate_biquadratic(x_s, y_s, src, width, height, src_stride_lu, def);
-                break;
-            default:
-                return;
-        }
-    }
-}
-
-kernel void avfilter_transform_chroma(global unsigned char *src,
-                                      global unsigned char *dst,
-                                      float4 matrix,
-                                      int interpolate,
-                                      int fill,
-                                      int src_stride_lu,
-                                      int dst_stride_lu,
-                                      int src_stride_ch,
-                                      int dst_stride_ch,
-                                      int height,
-                                      int width,
-                                      int ch,
-                                      int cw)
-{
-
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-    int pad_ch = get_global_size(1)>>1;
-    global unsigned char *dst_u = dst + height * dst_stride_lu;
-    global unsigned char *src_u = src + height * src_stride_lu;
-    global unsigned char *dst_v = dst_u + ch * dst_stride_ch;
-    global unsigned char *src_v = src_u + ch * src_stride_ch;
-    src = y < pad_ch ? src_u : src_v;
-    dst = y < pad_ch ? dst_u : dst_v;
-    y = select(y - pad_ch, y, y < pad_ch);
-    float x_s = x * matrix.x + y * matrix.y + matrix.z;
-    float y_s = x * (-matrix.y) + y * matrix.x + matrix.w;
-    int idx_dst = y * dst_stride_ch + x;
-    unsigned char def;
-
-    if (x < cw && y < ch) {
-        switch (fill) {
-            case 0: //FILL_BLANK
-                def = 0;
-                break;
-            case 1: //FILL_ORIGINAL
-                def = src[y*src_stride_ch + x];
-                break;
-            case 2: //FILL_CLAMP
-                y_s = clipf(y_s, 0, ch - 1);
-                x_s = clipf(x_s, 0, cw - 1);
-                def = src[(int)y_s * src_stride_ch + (int)x_s];
-                break;
-            case 3: //FILL_MIRROR
-                y_s = mirror(y_s, ch - 1);
-                x_s = mirror(x_s, cw - 1);
-                def = src[(int)y_s * src_stride_ch + (int)x_s];
-                break;
-        }
-        switch (interpolate) {
-            case 0: //INTERPOLATE_NEAREST
-                dst[idx_dst] = interpolate_nearest(x_s, y_s, src, cw, ch, src_stride_ch, def);
-                break;
-            case 1: //INTERPOLATE_BILINEAR
-                dst[idx_dst] = interpolate_bilinear(x_s, y_s, src, cw, ch, src_stride_ch, def);
-                break;
-            case 2: //INTERPOLATE_BIQUADRATIC
-                dst[idx_dst] = interpolate_biquadratic(x_s, y_s, src, cw, ch, src_stride_ch, def);
-                break;
-            default:
-                return;
-        }
-    }
-}
-);
-
-#endif /* AVFILTER_DESHAKE_OPENCL_KERNEL_H */

diff --git a/libavfilter/dnn_backend_native.c b/libavfilter/dnn_backend_native.c
new file mode 100644
index 0000000..70d857f
--- /dev/null
+++ b/libavfilter/dnn_backend_native.c

@@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2018 Sergey Lavrushkin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DNN native backend implementation.
+ */
+
+#include "dnn_backend_native.h"
+
+static DNNReturnType set_input_output_native(void *model, DNNData *input, DNNData *output)
+{
+    ConvolutionalNetwork *network = (ConvolutionalNetwork *)model;
+    InputParams *input_params;
+    ConvolutionalParams *conv_params;
+    DepthToSpaceParams *depth_to_space_params;
+    int cur_width, cur_height, cur_channels;
+    int32_t layer;
+
+    if (network->layers_num <= 0 || network->layers[0].type != INPUT){
+        return DNN_ERROR;
+    }
+    else{
+        input_params = (InputParams *)network->layers[0].params;
+        input_params->width = cur_width = input->width;
+        input_params->height = cur_height = input->height;
+        input_params->channels = cur_channels = input->channels;
+        if (input->data){
+            av_freep(&input->data);
+        }
+        network->layers[0].output = input->data = av_malloc(cur_height * cur_width * cur_channels * sizeof(float));
+        if (!network->layers[0].output){
+            return DNN_ERROR;
+        }
+    }
+
+    for (layer = 1; layer < network->layers_num; ++layer){
+        switch (network->layers[layer].type){
+        case CONV:
+            conv_params = (ConvolutionalParams *)network->layers[layer].params;
+            if (conv_params->input_num != cur_channels){
+                return DNN_ERROR;
+            }
+            cur_channels = conv_params->output_num;
+            break;
+        case DEPTH_TO_SPACE:
+            depth_to_space_params = (DepthToSpaceParams *)network->layers[layer].params;
+            if (cur_channels % (depth_to_space_params->block_size * depth_to_space_params->block_size) != 0){
+                return DNN_ERROR;
+            }
+            cur_channels = cur_channels / (depth_to_space_params->block_size * depth_to_space_params->block_size);
+            cur_height *= depth_to_space_params->block_size;
+            cur_width *= depth_to_space_params->block_size;
+            break;
+        default:
+            return DNN_ERROR;
+        }
+        if (network->layers[layer].output){
+            av_freep(&network->layers[layer].output);
+        }
+        network->layers[layer].output = av_malloc(cur_height * cur_width * cur_channels * sizeof(float));
+        if (!network->layers[layer].output){
+            return DNN_ERROR;
+        }
+    }
+
+    output->data = network->layers[network->layers_num - 1].output;
+    output->height = cur_height;
+    output->width = cur_width;
+    output->channels = cur_channels;
+
+    return DNN_SUCCESS;
+}
+
+// Loads model and its parameters that are stored in a binary file with following structure:
+// layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
+// For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
+// For DEPTH_TO_SPACE layer: block_size
+DNNModel *ff_dnn_load_model_native(const char *model_filename)
+{
+    DNNModel *model = NULL;
+    ConvolutionalNetwork *network = NULL;
+    AVIOContext *model_file_context;
+    int file_size, dnn_size, kernel_size, i;
+    int32_t layer;
+    DNNLayerType layer_type;
+    ConvolutionalParams *conv_params;
+    DepthToSpaceParams *depth_to_space_params;
+
+    model = av_malloc(sizeof(DNNModel));
+    if (!model){
+        return NULL;
+    }
+
+    if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
+        av_freep(&model);
+        return NULL;
+    }
+    file_size = avio_size(model_file_context);
+
+    network = av_malloc(sizeof(ConvolutionalNetwork));
+    if (!network){
+        avio_closep(&model_file_context);
+        av_freep(&model);
+        return NULL;
+    }
+    model->model = (void *)network;
+
+    network->layers_num = 1 + (int32_t)avio_rl32(model_file_context);
+    dnn_size = 4;
+
+    network->layers = av_malloc(network->layers_num * sizeof(Layer));
+    if (!network->layers){
+        av_freep(&network);
+        avio_closep(&model_file_context);
+        av_freep(&model);
+        return NULL;
+    }
+
+    for (layer = 0; layer < network->layers_num; ++layer){
+        network->layers[layer].output = NULL;
+        network->layers[layer].params = NULL;
+    }
+    network->layers[0].type = INPUT;
+    network->layers[0].params = av_malloc(sizeof(InputParams));
+    if (!network->layers[0].params){
+        avio_closep(&model_file_context);
+        ff_dnn_free_model_native(&model);
+        return NULL;
+    }
+
+    for (layer = 1; layer < network->layers_num; ++layer){
+        layer_type = (int32_t)avio_rl32(model_file_context);
+        dnn_size += 4;
+        switch (layer_type){
+        case CONV:
+            conv_params = av_malloc(sizeof(ConvolutionalParams));
+            if (!conv_params){
+                avio_closep(&model_file_context);
+                ff_dnn_free_model_native(&model);
+                return NULL;
+            }
+            conv_params->activation = (int32_t)avio_rl32(model_file_context);
+            conv_params->input_num = (int32_t)avio_rl32(model_file_context);
+            conv_params->output_num = (int32_t)avio_rl32(model_file_context);
+            conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
+            kernel_size = conv_params->input_num * conv_params->output_num *
+                          conv_params->kernel_size * conv_params->kernel_size;
+            dnn_size += 16 + (kernel_size + conv_params->output_num << 2);
+            if (dnn_size > file_size || conv_params->input_num <= 0 ||
+                conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
+                avio_closep(&model_file_context);
+                ff_dnn_free_model_native(&model);
+                return NULL;
+            }
+            conv_params->kernel = av_malloc(kernel_size * sizeof(float));
+            conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
+            if (!conv_params->kernel || !conv_params->biases){
+                avio_closep(&model_file_context);
+                ff_dnn_free_model_native(&model);
+                return NULL;
+            }
+            for (i = 0; i < kernel_size; ++i){
+                conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
+            }
+            for (i = 0; i < conv_params->output_num; ++i){
+                conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
+            }
+            network->layers[layer].type = CONV;
+            network->layers[layer].params = conv_params;
+            break;
+        case DEPTH_TO_SPACE:
+            depth_to_space_params = av_malloc(sizeof(DepthToSpaceParams));
+            if (!depth_to_space_params){
+                avio_closep(&model_file_context);
+                ff_dnn_free_model_native(&model);
+                return NULL;
+            }
+            depth_to_space_params->block_size = (int32_t)avio_rl32(model_file_context);
+            dnn_size += 4;
+            network->layers[layer].type = DEPTH_TO_SPACE;
+            network->layers[layer].params = depth_to_space_params;
+            break;
+        default:
+            avio_closep(&model_file_context);
+            ff_dnn_free_model_native(&model);
+            return NULL;
+        }
+    }
+
+    avio_closep(&model_file_context);
+
+    if (dnn_size != file_size){
+        ff_dnn_free_model_native(&model);
+        return NULL;
+    }
+
+    model->set_input_output = &set_input_output_native;
+
+    return model;
+}
+
+#define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
+
+static void convolve(const float *input, float *output, const ConvolutionalParams *conv_params, int width, int height)
+{
+    int y, x, n_filter, ch, kernel_y, kernel_x;
+    int radius = conv_params->kernel_size >> 1;
+    int src_linesize = width * conv_params->input_num;
+    int filter_linesize = conv_params->kernel_size * conv_params->input_num;
+    int filter_size = conv_params->kernel_size * filter_linesize;
+
+    for (y = 0; y < height; ++y){
+        for (x = 0; x < width; ++x){
+            for (n_filter = 0; n_filter < conv_params->output_num; ++n_filter){
+                output[n_filter] = conv_params->biases[n_filter];
+                for (ch = 0; ch < conv_params->input_num; ++ch){
+                    for (kernel_y = 0; kernel_y < conv_params->kernel_size; ++kernel_y){
+                        for (kernel_x = 0; kernel_x < conv_params->kernel_size; ++kernel_x){
+                            output[n_filter] += input[CLAMP_TO_EDGE(y + kernel_y - radius, height) * src_linesize +
+                                                      CLAMP_TO_EDGE(x + kernel_x - radius, width) * conv_params->input_num + ch] *
+                                                conv_params->kernel[n_filter * filter_size + kernel_y * filter_linesize +
+                                                                    kernel_x * conv_params->input_num + ch];
+                        }
+                    }
+                }
+                switch (conv_params->activation){
+                case RELU:
+                    output[n_filter] = FFMAX(output[n_filter], 0.0);
+                    break;
+                case TANH:
+                    output[n_filter] = 2.0f  / (1.0f + exp(-2.0f * output[n_filter])) - 1.0f;
+                    break;
+                case SIGMOID:
+                    output[n_filter] = 1.0f / (1.0f + exp(-output[n_filter]));
+                }
+            }
+            output += conv_params->output_num;
+        }
+    }
+}
+
+static void depth_to_space(const float *input, float *output, int block_size, int width, int height, int channels)
+{
+    int y, x, by, bx, ch;
+    int new_channels = channels / (block_size * block_size);
+    int output_linesize = width * channels;
+    int by_linesize = output_linesize / block_size;
+    int x_linesize = new_channels * block_size;
+
+    for (y = 0; y < height; ++y){
+        for (x = 0; x < width; ++x){
+            for (by = 0; by < block_size; ++by){
+                for (bx = 0; bx < block_size; ++bx){
+                    for (ch = 0; ch < new_channels; ++ch){
+                        output[by * by_linesize + x * x_linesize + bx * new_channels + ch] = input[ch];
+                    }
+                    input += new_channels;
+                }
+            }
+        }
+        output += output_linesize;
+    }
+}
+
+DNNReturnType ff_dnn_execute_model_native(const DNNModel *model)
+{
+    ConvolutionalNetwork *network = (ConvolutionalNetwork *)model->model;
+    int cur_width, cur_height, cur_channels;
+    int32_t layer;
+    InputParams *input_params;
+    ConvolutionalParams *conv_params;
+    DepthToSpaceParams *depth_to_space_params;
+
+    if (network->layers_num <= 0 || network->layers[0].type != INPUT || !network->layers[0].output){
+        return DNN_ERROR;
+    }
+    else{
+        input_params = (InputParams *)network->layers[0].params;
+        cur_width = input_params->width;
+        cur_height = input_params->height;
+        cur_channels = input_params->channels;
+    }
+
+    for (layer = 1; layer < network->layers_num; ++layer){
+        if (!network->layers[layer].output){
+            return DNN_ERROR;
+        }
+        switch (network->layers[layer].type){
+        case CONV:
+            conv_params = (ConvolutionalParams *)network->layers[layer].params;
+            convolve(network->layers[layer - 1].output, network->layers[layer].output, conv_params, cur_width, cur_height);
+            cur_channels = conv_params->output_num;
+            break;
+        case DEPTH_TO_SPACE:
+            depth_to_space_params = (DepthToSpaceParams *)network->layers[layer].params;
+            depth_to_space(network->layers[layer - 1].output, network->layers[layer].output,
+                           depth_to_space_params->block_size, cur_width, cur_height, cur_channels);
+            cur_height *= depth_to_space_params->block_size;
+            cur_width *= depth_to_space_params->block_size;
+            cur_channels /= depth_to_space_params->block_size * depth_to_space_params->block_size;
+            break;
+        case INPUT:
+            return DNN_ERROR;
+        }
+    }
+
+    return DNN_SUCCESS;
+}
+
+void ff_dnn_free_model_native(DNNModel **model)
+{
+    ConvolutionalNetwork *network;
+    ConvolutionalParams *conv_params;
+    int32_t layer;
+
+    if (*model)
+    {
+        network = (ConvolutionalNetwork *)(*model)->model;
+        for (layer = 0; layer < network->layers_num; ++layer){
+            av_freep(&network->layers[layer].output);
+            if (network->layers[layer].type == CONV){
+                conv_params = (ConvolutionalParams *)network->layers[layer].params;
+                av_freep(&conv_params->kernel);
+                av_freep(&conv_params->biases);
+            }
+            av_freep(&network->layers[layer].params);
+        }
+        av_freep(&network->layers);
+        av_freep(&network);
+        av_freep(model);
+    }
+}

diff --git a/libavfilter/dnn_backend_native.h b/libavfilter/dnn_backend_native.h
new file mode 100644
index 0000000..51d4cac
--- /dev/null
+++ b/libavfilter/dnn_backend_native.h

@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Sergey Lavrushkin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DNN inference functions interface for native backend.
+ */
+
+
+#ifndef AVFILTER_DNN_BACKEND_NATIVE_H
+#define AVFILTER_DNN_BACKEND_NATIVE_H
+
+#include "dnn_interface.h"
+#include "libavformat/avio.h"
+
+typedef enum {INPUT, CONV, DEPTH_TO_SPACE} DNNLayerType;
+
+typedef enum {RELU, TANH, SIGMOID} DNNActivationFunc;
+
+typedef struct Layer{
+    DNNLayerType type;
+    float *output;
+    void *params;
+} Layer;
+
+typedef struct ConvolutionalParams{
+    int32_t input_num, output_num, kernel_size;
+    DNNActivationFunc activation;
+    float *kernel;
+    float *biases;
+} ConvolutionalParams;
+
+typedef struct InputParams{
+    int height, width, channels;
+} InputParams;
+
+typedef struct DepthToSpaceParams{
+    int block_size;
+} DepthToSpaceParams;
+
+// Represents simple feed-forward convolutional network.
+typedef struct ConvolutionalNetwork{
+    Layer *layers;
+    int32_t layers_num;
+} ConvolutionalNetwork;
+
+DNNModel *ff_dnn_load_model_native(const char *model_filename);
+
+DNNReturnType ff_dnn_execute_model_native(const DNNModel *model);
+
+void ff_dnn_free_model_native(DNNModel **model);
+
+#endif

diff --git a/libavfilter/dnn_backend_tf.c b/libavfilter/dnn_backend_tf.c
new file mode 100644
index 0000000..5bc7f06
--- /dev/null
+++ b/libavfilter/dnn_backend_tf.c

@@ -0,0 +1,558 @@
+/*
+ * Copyright (c) 2018 Sergey Lavrushkin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DNN tensorflow backend implementation.
+ */
+
+#include "dnn_backend_tf.h"
+#include "dnn_backend_native.h"
+#include "libavformat/avio.h"
+
+#include <tensorflow/c/c_api.h>
+
+typedef struct TFModel{
+    TF_Graph *graph;
+    TF_Session *session;
+    TF_Status *status;
+    TF_Output input, output;
+    TF_Tensor *input_tensor;
+    DNNData *output_data;
+} TFModel;
+
+static void free_buffer(void *data, size_t length)
+{
+    av_freep(&data);
+}
+
+static TF_Buffer *read_graph(const char *model_filename)
+{
+    TF_Buffer *graph_buf;
+    unsigned char *graph_data = NULL;
+    AVIOContext *model_file_context;
+    long size, bytes_read;
+
+    if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
+        return NULL;
+    }
+
+    size = avio_size(model_file_context);
+
+    graph_data = av_malloc(size);
+    if (!graph_data){
+        avio_closep(&model_file_context);
+        return NULL;
+    }
+    bytes_read = avio_read(model_file_context, graph_data, size);
+    avio_closep(&model_file_context);
+    if (bytes_read != size){
+        av_freep(&graph_data);
+        return NULL;
+    }
+
+    graph_buf = TF_NewBuffer();
+    graph_buf->data = (void *)graph_data;
+    graph_buf->length = size;
+    graph_buf->data_deallocator = free_buffer;
+
+    return graph_buf;
+}
+
+static DNNReturnType set_input_output_tf(void *model, DNNData *input, DNNData *output)
+{
+    TFModel *tf_model = (TFModel *)model;
+    int64_t input_dims[] = {1, input->height, input->width, input->channels};
+    TF_SessionOptions *sess_opts;
+    const TF_Operation *init_op = TF_GraphOperationByName(tf_model->graph, "init");
+    TF_Tensor *output_tensor;
+
+    // Input operation should be named 'x'
+    tf_model->input.oper = TF_GraphOperationByName(tf_model->graph, "x");
+    if (!tf_model->input.oper){
+        return DNN_ERROR;
+    }
+    tf_model->input.index = 0;
+    if (tf_model->input_tensor){
+        TF_DeleteTensor(tf_model->input_tensor);
+    }
+    tf_model->input_tensor = TF_AllocateTensor(TF_FLOAT, input_dims, 4,
+                                               input_dims[1] * input_dims[2] * input_dims[3] * sizeof(float));
+    if (!tf_model->input_tensor){
+        return DNN_ERROR;
+    }
+    input->data = (float *)TF_TensorData(tf_model->input_tensor);
+
+    // Output operation should be named 'y'
+    tf_model->output.oper = TF_GraphOperationByName(tf_model->graph, "y");
+    if (!tf_model->output.oper){
+        return DNN_ERROR;
+    }
+    tf_model->output.index = 0;
+
+    if (tf_model->session){
+        TF_CloseSession(tf_model->session, tf_model->status);
+        TF_DeleteSession(tf_model->session, tf_model->status);
+    }
+
+    sess_opts = TF_NewSessionOptions();
+    tf_model->session = TF_NewSession(tf_model->graph, sess_opts, tf_model->status);
+    TF_DeleteSessionOptions(sess_opts);
+    if (TF_GetCode(tf_model->status) != TF_OK)
+    {
+        return DNN_ERROR;
+    }
+
+    // Run initialization operation with name "init" if it is present in graph
+    if (init_op){
+        TF_SessionRun(tf_model->session, NULL,
+                      NULL, NULL, 0,
+                      NULL, NULL, 0,
+                      &init_op, 1, NULL, tf_model->status);
+        if (TF_GetCode(tf_model->status) != TF_OK)
+        {
+            return DNN_ERROR;
+        }
+    }
+
+    // Execute network to get output height, width and number of channels
+    TF_SessionRun(tf_model->session, NULL,
+                  &tf_model->input, &tf_model->input_tensor, 1,
+                  &tf_model->output, &output_tensor, 1,
+                  NULL, 0, NULL, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+    else{
+        output->height = TF_Dim(output_tensor, 1);
+        output->width = TF_Dim(output_tensor, 2);
+        output->channels = TF_Dim(output_tensor, 3);
+        output->data = av_malloc(output->height * output->width * output->channels * sizeof(float));
+        if (!output->data){
+            return DNN_ERROR;
+        }
+        tf_model->output_data = output;
+        TF_DeleteTensor(output_tensor);
+    }
+
+    return DNN_SUCCESS;
+}
+
+static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
+{
+    TF_Buffer *graph_def;
+    TF_ImportGraphDefOptions *graph_opts;
+
+    graph_def = read_graph(model_filename);
+    if (!graph_def){
+        return DNN_ERROR;
+    }
+    tf_model->graph = TF_NewGraph();
+    tf_model->status = TF_NewStatus();
+    graph_opts = TF_NewImportGraphDefOptions();
+    TF_GraphImportGraphDef(tf_model->graph, graph_def, graph_opts, tf_model->status);
+    TF_DeleteImportGraphDefOptions(graph_opts);
+    TF_DeleteBuffer(graph_def);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        TF_DeleteGraph(tf_model->graph);
+        TF_DeleteStatus(tf_model->status);
+        return DNN_ERROR;
+    }
+
+    return DNN_SUCCESS;
+}
+
+#define NAME_BUFFER_SIZE 256
+
+static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op,
+                                    ConvolutionalParams* params, const int layer)
+{
+    TF_Operation *op;
+    TF_OperationDescription *op_desc;
+    TF_Output input;
+    int64_t strides[] = {1, 1, 1, 1};
+    TF_Tensor *tensor;
+    int64_t dims[4];
+    int dims_len;
+    char name_buffer[NAME_BUFFER_SIZE];
+    int32_t size;
+
+    size = params->input_num * params->output_num * params->kernel_size * params->kernel_size;
+    input.index = 0;
+
+    snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_kernel%d", layer);
+    op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
+    TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
+    dims[0] = params->output_num;
+    dims[1] = params->kernel_size;
+    dims[2] = params->kernel_size;
+    dims[3] = params->input_num;
+    dims_len = 4;
+    tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float));
+    memcpy(TF_TensorData(tensor), params->kernel, size * sizeof(float));
+    TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+    op = TF_FinishOperation(op_desc, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+
+    snprintf(name_buffer, NAME_BUFFER_SIZE, "transpose%d", layer);
+    op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
+    input.oper = op;
+    TF_AddInput(op_desc, input);
+    input.oper = transpose_op;
+    TF_AddInput(op_desc, input);
+    TF_SetAttrType(op_desc, "T", TF_FLOAT);
+    TF_SetAttrType(op_desc, "Tperm", TF_INT32);
+    op = TF_FinishOperation(op_desc, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+
+    snprintf(name_buffer, NAME_BUFFER_SIZE, "conv2d%d", layer);
+    op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
+    input.oper = *cur_op;
+    TF_AddInput(op_desc, input);
+    input.oper = op;
+    TF_AddInput(op_desc, input);
+    TF_SetAttrType(op_desc, "T", TF_FLOAT);
+    TF_SetAttrIntList(op_desc, "strides", strides, 4);
+    TF_SetAttrString(op_desc, "padding", "VALID", 5);
+    *cur_op = TF_FinishOperation(op_desc, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+
+    snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_biases%d", layer);
+    op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
+    TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
+    dims[0] = params->output_num;
+    dims_len = 1;
+    tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * sizeof(float));
+    memcpy(TF_TensorData(tensor), params->biases, params->output_num * sizeof(float));
+    TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+    op = TF_FinishOperation(op_desc, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+
+    snprintf(name_buffer, NAME_BUFFER_SIZE, "bias_add%d", layer);
+    op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
+    input.oper = *cur_op;
+    TF_AddInput(op_desc, input);
+    input.oper = op;
+    TF_AddInput(op_desc, input);
+    TF_SetAttrType(op_desc, "T", TF_FLOAT);
+    *cur_op = TF_FinishOperation(op_desc, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+
+    snprintf(name_buffer, NAME_BUFFER_SIZE, "activation%d", layer);
+    switch (params->activation){
+    case RELU:
+        op_desc = TF_NewOperation(tf_model->graph, "Relu", name_buffer);
+        break;
+    case TANH:
+        op_desc = TF_NewOperation(tf_model->graph, "Tanh", name_buffer);
+        break;
+    case SIGMOID:
+        op_desc = TF_NewOperation(tf_model->graph, "Sigmoid", name_buffer);
+        break;
+    default:
+        return DNN_ERROR;
+    }
+    input.oper = *cur_op;
+    TF_AddInput(op_desc, input);
+    TF_SetAttrType(op_desc, "T", TF_FLOAT);
+    *cur_op = TF_FinishOperation(op_desc, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+
+    return DNN_SUCCESS;
+}
+
+static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op,
+                                              DepthToSpaceParams *params, const int layer)
+{
+    TF_OperationDescription *op_desc;
+    TF_Output input;
+    char name_buffer[NAME_BUFFER_SIZE];
+
+    snprintf(name_buffer, NAME_BUFFER_SIZE, "depth_to_space%d", layer);
+    op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", name_buffer);
+    input.oper = *cur_op;
+    input.index = 0;
+    TF_AddInput(op_desc, input);
+    TF_SetAttrType(op_desc, "T", TF_FLOAT);
+    TF_SetAttrInt(op_desc, "block_size", params->block_size);
+    *cur_op = TF_FinishOperation(op_desc, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+
+    return DNN_SUCCESS;
+}
+
+static int calculate_pad(const ConvolutionalNetwork *conv_network)
+{
+    ConvolutionalParams *params;
+    int32_t layer;
+    int pad = 0;
+
+    for (layer = 0; layer < conv_network->layers_num; ++layer){
+        if (conv_network->layers[layer].type == CONV){
+            params = (ConvolutionalParams *)conv_network->layers[layer].params;
+            pad += params->kernel_size >> 1;
+        }
+    }
+
+    return pad;
+}
+
+static DNNReturnType add_pad_op(TFModel *tf_model, TF_Operation **cur_op, const int32_t pad)
+{
+    TF_Operation *op;
+    TF_Tensor *tensor;
+    TF_OperationDescription *op_desc;
+    TF_Output input;
+    int32_t *pads;
+    int64_t pads_shape[] = {4, 2};
+
+    input.index = 0;
+
+    op_desc = TF_NewOperation(tf_model->graph, "Const", "pads");
+    TF_SetAttrType(op_desc, "dtype", TF_INT32);
+    tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
+    pads = (int32_t *)TF_TensorData(tensor);
+    pads[0] = 0;   pads[1] = 0;
+    pads[2] = pad; pads[3] = pad;
+    pads[4] = pad; pads[5] = pad;
+    pads[6] = 0;   pads[7] = 0;
+    TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+    op = TF_FinishOperation(op_desc, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+
+    op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
+    input.oper = *cur_op;
+    TF_AddInput(op_desc, input);
+    input.oper = op;
+    TF_AddInput(op_desc, input);
+    TF_SetAttrType(op_desc, "T", TF_FLOAT);
+    TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
+    TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
+    *cur_op = TF_FinishOperation(op_desc, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+
+    return DNN_SUCCESS;
+}
+
+static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
+{
+    int32_t layer;
+    TF_OperationDescription *op_desc;
+    TF_Operation *op;
+    TF_Operation *transpose_op;
+    TF_Tensor *tensor;
+    TF_Output input;
+    int32_t *transpose_perm;
+    int64_t transpose_perm_shape[] = {4};
+    int64_t input_shape[] = {1, -1, -1, -1};
+    int32_t pad;
+    DNNReturnType layer_add_res;
+    DNNModel *native_model = NULL;
+    ConvolutionalNetwork *conv_network;
+
+    native_model = ff_dnn_load_model_native(model_filename);
+    if (!native_model){
+        return DNN_ERROR;
+    }
+
+    conv_network = (ConvolutionalNetwork *)native_model->model;
+    pad = calculate_pad(conv_network);
+    tf_model->graph = TF_NewGraph();
+    tf_model->status = TF_NewStatus();
+
+#define CLEANUP_ON_ERROR(tf_model) \
+    { \
+        TF_DeleteGraph(tf_model->graph); \
+        TF_DeleteStatus(tf_model->status); \
+        return DNN_ERROR; \
+    }
+
+    op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
+    TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
+    TF_SetAttrShape(op_desc, "shape", input_shape, 4);
+    op = TF_FinishOperation(op_desc, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        CLEANUP_ON_ERROR(tf_model);
+    }
+
+    if (add_pad_op(tf_model, &op, pad) != DNN_SUCCESS){
+        CLEANUP_ON_ERROR(tf_model);
+    }
+
+    op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
+    TF_SetAttrType(op_desc, "dtype", TF_INT32);
+    tensor = TF_AllocateTensor(TF_INT32, transpose_perm_shape, 1, 4 * sizeof(int32_t));
+    transpose_perm = (int32_t *)TF_TensorData(tensor);
+    transpose_perm[0] = 1;
+    transpose_perm[1] = 2;
+    transpose_perm[2] = 3;
+    transpose_perm[3] = 0;
+    TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        CLEANUP_ON_ERROR(tf_model);
+    }
+    transpose_op = TF_FinishOperation(op_desc, tf_model->status);
+
+    for (layer = 0; layer < conv_network->layers_num; ++layer){
+        switch (conv_network->layers[layer].type){
+        case INPUT:
+            break;
+        case CONV:
+            layer_add_res = add_conv_layer(tf_model, transpose_op, &op,
+                                           (ConvolutionalParams *)conv_network->layers[layer].params, layer);
+            break;
+        case DEPTH_TO_SPACE:
+            layer_add_res = add_depth_to_space_layer(tf_model, &op,
+                                                     (DepthToSpaceParams *)conv_network->layers[layer].params, layer);
+            break;
+        default:
+            CLEANUP_ON_ERROR(tf_model);
+        }
+
+        if (layer_add_res != DNN_SUCCESS){
+            CLEANUP_ON_ERROR(tf_model);
+        }
+    }
+
+    op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
+    input.oper = op;
+    TF_AddInput(op_desc, input);
+    TF_FinishOperation(op_desc, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        CLEANUP_ON_ERROR(tf_model);
+    }
+
+    ff_dnn_free_model_native(&native_model);
+
+    return DNN_SUCCESS;
+}
+
+DNNModel *ff_dnn_load_model_tf(const char *model_filename)
+{
+    DNNModel *model = NULL;
+    TFModel *tf_model = NULL;
+
+    model = av_malloc(sizeof(DNNModel));
+    if (!model){
+        return NULL;
+    }
+
+    tf_model = av_malloc(sizeof(TFModel));
+    if (!tf_model){
+        av_freep(&model);
+        return NULL;
+    }
+    tf_model->session = NULL;
+    tf_model->input_tensor = NULL;
+    tf_model->output_data = NULL;
+
+    if (load_tf_model(tf_model, model_filename) != DNN_SUCCESS){
+        if (load_native_model(tf_model, model_filename) != DNN_SUCCESS){
+            av_freep(&tf_model);
+            av_freep(&model);
+
+            return NULL;
+        }
+    }
+
+    model->model = (void *)tf_model;
+    model->set_input_output = &set_input_output_tf;
+
+    return model;
+}
+
+
+
+DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model)
+{
+    TFModel *tf_model = (TFModel *)model->model;
+    TF_Tensor *output_tensor;
+
+    TF_SessionRun(tf_model->session, NULL,
+                  &tf_model->input, &tf_model->input_tensor, 1,
+                  &tf_model->output, &output_tensor, 1,
+                  NULL, 0, NULL, tf_model->status);
+
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+    else{
+        memcpy(tf_model->output_data->data, TF_TensorData(output_tensor),
+               tf_model->output_data->height * tf_model->output_data->width *
+               tf_model->output_data->channels * sizeof(float));
+        TF_DeleteTensor(output_tensor);
+
+        return DNN_SUCCESS;
+    }
+}
+
+void ff_dnn_free_model_tf(DNNModel **model)
+{
+    TFModel *tf_model;
+
+    if (*model){
+        tf_model = (TFModel *)(*model)->model;
+        if (tf_model->graph){
+            TF_DeleteGraph(tf_model->graph);
+        }
+        if (tf_model->session){
+            TF_CloseSession(tf_model->session, tf_model->status);
+            TF_DeleteSession(tf_model->session, tf_model->status);
+        }
+        if (tf_model->status){
+            TF_DeleteStatus(tf_model->status);
+        }
+        if (tf_model->input_tensor){
+            TF_DeleteTensor(tf_model->input_tensor);
+        }
+        if (tf_model->output_data){
+            av_freep(&tf_model->output_data->data);
+        }
+        av_freep(&tf_model);
+        av_freep(model);
+    }
+}

diff --git a/libavfilter/avfiltergraph.h b/libavfilter/dnn_backend_tf.h
similarity index 66%
rename from libavfilter/avfiltergraph.h
rename to libavfilter/dnn_backend_tf.h
index b31d581..7ba84f4 100644
--- a/libavfilter/avfiltergraph.h
+++ b/libavfilter/dnn_backend_tf.h

@@ -1,6 +1,5 @@
 /*
- * Filter graphs
- * copyright (c) 2007 Bobby Bingham
+ * Copyright (c) 2018 Sergey Lavrushkin
  *
  * This file is part of FFmpeg.
  *
@@ -19,10 +18,21 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVFILTER_AVFILTERGRAPH_H
-#define AVFILTER_AVFILTERGRAPH_H
+/**
+ * @file
+ * DNN inference functions interface for TensorFlow backend.
+ */
 
-#include "avfilter.h"
-#include "libavutil/log.h"
 
-#endif /* AVFILTER_AVFILTERGRAPH_H */
+#ifndef AVFILTER_DNN_BACKEND_TF_H
+#define AVFILTER_DNN_BACKEND_TF_H
+
+#include "dnn_interface.h"
+
+DNNModel *ff_dnn_load_model_tf(const char *model_filename);
+
+DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model);
+
+void ff_dnn_free_model_tf(DNNModel **model);
+
+#endif

diff --git a/libavfilter/dnn_interface.c b/libavfilter/dnn_interface.c
new file mode 100644
index 0000000..86fc283
--- /dev/null
+++ b/libavfilter/dnn_interface.c

@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Sergey Lavrushkin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Implements DNN module initialization with specified backend.
+ */
+
+#include "dnn_interface.h"
+#include "dnn_backend_native.h"
+#include "dnn_backend_tf.h"
+#include "libavutil/mem.h"
+
+DNNModule *ff_get_dnn_module(DNNBackendType backend_type)
+{
+    DNNModule *dnn_module;
+
+    dnn_module = av_malloc(sizeof(DNNModule));
+    if(!dnn_module){
+        return NULL;
+    }
+
+    switch(backend_type){
+    case DNN_NATIVE:
+        dnn_module->load_model = &ff_dnn_load_model_native;
+        dnn_module->execute_model = &ff_dnn_execute_model_native;
+        dnn_module->free_model = &ff_dnn_free_model_native;
+        break;
+    case DNN_TF:
+    #if (CONFIG_LIBTENSORFLOW == 1)
+        dnn_module->load_model = &ff_dnn_load_model_tf;
+        dnn_module->execute_model = &ff_dnn_execute_model_tf;
+        dnn_module->free_model = &ff_dnn_free_model_tf;
+    #else
+        av_freep(&dnn_module);
+        return NULL;
+    #endif
+        break;
+    default:
+        av_log(NULL, AV_LOG_ERROR, "Module backend_type is not native or tensorflow\n");
+        av_freep(&dnn_module);
+        return NULL;
+    }
+
+    return dnn_module;
+}

diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h
new file mode 100644
index 0000000..e367343
--- /dev/null
+++ b/libavfilter/dnn_interface.h

@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Sergey Lavrushkin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DNN inference engine interface.
+ */
+
+#ifndef AVFILTER_DNN_INTERFACE_H
+#define AVFILTER_DNN_INTERFACE_H
+
+typedef enum {DNN_SUCCESS, DNN_ERROR} DNNReturnType;
+
+typedef enum {DNN_NATIVE, DNN_TF} DNNBackendType;
+
+typedef struct DNNData{
+    float *data;
+    int width, height, channels;
+} DNNData;
+
+typedef struct DNNModel{
+    // Stores model that can be different for different backends.
+    void *model;
+    // Sets model input and output, while allocating additional memory for intermediate calculations.
+    // Should be called at least once before model execution.
+    DNNReturnType (*set_input_output)(void *model, DNNData *input, DNNData *output);
+} DNNModel;
+
+// Stores pointers to functions for loading, executing, freeing DNN models for one of the backends.
+typedef struct DNNModule{
+    // Loads model and parameters from given file. Returns NULL if it is not possible.
+    DNNModel *(*load_model)(const char *model_filename);
+    // Executes model with specified input and output. Returns DNN_ERROR otherwise.
+    DNNReturnType (*execute_model)(const DNNModel *model);
+    // Frees memory allocated for model.
+    void (*free_model)(DNNModel **model);
+} DNNModule;
+
+// Initializes DNNModule depending on chosen backend.
+DNNModule *ff_get_dnn_module(DNNBackendType backend_type);
+
+#endif

diff --git a/libavfilter/drawutils.c b/libavfilter/drawutils.c
index 77ab86b..5f4cb54 100644
--- a/libavfilter/drawutils.c
+++ b/libavfilter/drawutils.c

@@ -181,13 +181,17 @@
     const AVComponentDescriptor *c;
     unsigned i, nb_planes = 0;
     int pixelstep[MAX_PLANES] = { 0 };
+    int full_range = 0;
 
     if (!desc || !desc->name)
         return AVERROR(EINVAL);
-    if (desc->flags & ~(AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PSEUDOPAL | AV_PIX_FMT_FLAG_ALPHA))
+    if (desc->flags & ~(AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB | FF_PSEUDOPAL | AV_PIX_FMT_FLAG_ALPHA))
         return AVERROR(ENOSYS);
-    if (format == AV_PIX_FMT_P010LE || format == AV_PIX_FMT_P010BE)
+    if (format == AV_PIX_FMT_P010LE || format == AV_PIX_FMT_P010BE || format == AV_PIX_FMT_P016LE || format == AV_PIX_FMT_P016BE)
         return AVERROR(ENOSYS);
+    if (format == AV_PIX_FMT_YUVJ420P || format == AV_PIX_FMT_YUVJ422P || format == AV_PIX_FMT_YUVJ444P ||
+        format == AV_PIX_FMT_YUVJ411P || format == AV_PIX_FMT_YUVJ440P)
+        full_range = 1;
     for (i = 0; i < desc->nb_components; i++) {
         c = &desc->comp[i];
         /* for now, only 8-16 bits formats */
@@ -214,6 +218,7 @@
     draw->format    = format;
     draw->nb_planes = nb_planes;
     draw->flags     = flags;
+    draw->full_range = full_range;
     memcpy(draw->pixelstep, pixelstep, sizeof(draw->pixelstep));
     draw->hsub[1] = draw->hsub[2] = draw->hsub_max = desc->log2_chroma_w;
     draw->vsub[1] = draw->vsub[2] = draw->vsub_max = desc->log2_chroma_h;
@@ -249,9 +254,9 @@
     } else if (draw->nb_planes >= 2) {
         /* assume YUV */
         const AVPixFmtDescriptor *desc = draw->desc;
-        color->comp[desc->comp[0].plane].u8[desc->comp[0].offset] = RGB_TO_Y_CCIR(rgba[0], rgba[1], rgba[2]);
-        color->comp[desc->comp[1].plane].u8[desc->comp[1].offset] = RGB_TO_U_CCIR(rgba[0], rgba[1], rgba[2], 0);
-        color->comp[desc->comp[2].plane].u8[desc->comp[2].offset] = RGB_TO_V_CCIR(rgba[0], rgba[1], rgba[2], 0);
+        color->comp[desc->comp[0].plane].u8[desc->comp[0].offset] = draw->full_range ? RGB_TO_Y_JPEG(rgba[0], rgba[1], rgba[2]) : RGB_TO_Y_CCIR(rgba[0], rgba[1], rgba[2]);
+        color->comp[desc->comp[1].plane].u8[desc->comp[1].offset] = draw->full_range ? RGB_TO_U_JPEG(rgba[0], rgba[1], rgba[2]) : RGB_TO_U_CCIR(rgba[0], rgba[1], rgba[2], 0);
+        color->comp[desc->comp[2].plane].u8[desc->comp[2].offset] = draw->full_range ? RGB_TO_V_JPEG(rgba[0], rgba[1], rgba[2]) : RGB_TO_V_CCIR(rgba[0], rgba[1], rgba[2], 0);
         color->comp[3].u8[0] = rgba[3];
 #define EXPAND(compn) \
         if (desc->comp[compn].depth > 8) \
@@ -266,7 +271,8 @@
                draw->format == AV_PIX_FMT_GRAY16LE || draw->format == AV_PIX_FMT_YA16LE ||
                draw->format == AV_PIX_FMT_GRAY9LE  ||
                draw->format == AV_PIX_FMT_GRAY10LE ||
-               draw->format == AV_PIX_FMT_GRAY12LE) {
+               draw->format == AV_PIX_FMT_GRAY12LE ||
+               draw->format == AV_PIX_FMT_GRAY14LE) {
         const AVPixFmtDescriptor *desc = draw->desc;
         color->comp[0].u8[0] = RGB_TO_Y_CCIR(rgba[0], rgba[1], rgba[2]);
         EXPAND(0);

diff --git a/libavfilter/drawutils.h b/libavfilter/drawutils.h
index cf53635..b999d70 100644
--- a/libavfilter/drawutils.h
+++ b/libavfilter/drawutils.h

@@ -55,6 +55,7 @@
     uint8_t vsub[MAX_PLANES];  /*< vertical subsampling */
     uint8_t hsub_max;
     uint8_t vsub_max;
+    int full_range;
     unsigned flags;
 } FFDrawContext;
 

diff --git a/libavfilter/f_cue.c b/libavfilter/f_cue.c
new file mode 100644
index 0000000..b48dfc9
--- /dev/null
+++ b/libavfilter/f_cue.c

@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2018 Marton Balint
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "libavutil/time.h"
+#include "avfilter.h"
+#include "filters.h"
+#include "internal.h"
+
+typedef struct CueContext {
+    const AVClass *class;
+    int64_t first_pts;
+    int64_t cue;
+    int64_t preroll;
+    int64_t buffer;
+    int status;
+} CueContext;
+
+static int activate(AVFilterContext *ctx)
+{
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    CueContext *s = ctx->priv;
+
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    if (ff_inlink_queued_frames(inlink)) {
+        AVFrame *frame = ff_inlink_peek_frame(inlink, 0);
+        int64_t pts = av_rescale_q(frame->pts, inlink->time_base, AV_TIME_BASE_Q);
+
+        if (!s->status) {
+            s->first_pts = pts;
+            s->status++;
+        }
+        if (s->status == 1) {
+            if (pts - s->first_pts < s->preroll) {
+                int ret = ff_inlink_consume_frame(inlink, &frame);
+                if (ret < 0)
+                    return ret;
+                return ff_filter_frame(outlink, frame);
+            }
+            s->first_pts = pts;
+            s->status++;
+        }
+        if (s->status == 2) {
+            frame = ff_inlink_peek_frame(inlink, ff_inlink_queued_frames(inlink) - 1);
+            pts = av_rescale_q(frame->pts, inlink->time_base, AV_TIME_BASE_Q);
+            if (!(pts - s->first_pts < s->buffer && (av_gettime() - s->cue) < 0))
+                s->status++;
+        }
+        if (s->status == 3) {
+            int64_t diff;
+            while ((diff = (av_gettime() - s->cue)) < 0)
+                av_usleep(av_clip(-diff / 2, 100, 1000000));
+            s->status++;
+        }
+        if (s->status == 4) {
+            int ret = ff_inlink_consume_frame(inlink, &frame);
+            if (ret < 0)
+                return ret;
+            return ff_filter_frame(outlink, frame);
+        }
+    }
+
+    FF_FILTER_FORWARD_STATUS(inlink, outlink);
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
+}
+
+#define OFFSET(x) offsetof(CueContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+static const AVOption options[] = {
+    { "cue", "cue unix timestamp in microseconds", OFFSET(cue), AV_OPT_TYPE_INT64, { .i64 = 0 }, 0, INT64_MAX, FLAGS },
+    { "preroll", "preroll duration in seconds", OFFSET(preroll), AV_OPT_TYPE_DURATION, { .i64 = 0 }, 0, INT64_MAX, FLAGS },
+    { "buffer", "buffer duration in seconds", OFFSET(buffer), AV_OPT_TYPE_DURATION, { .i64 = 0 }, 0, INT64_MAX, FLAGS },
+    { NULL }
+};
+
+#if CONFIG_CUE_FILTER
+#define cue_options options
+AVFILTER_DEFINE_CLASS(cue);
+
+static const AVFilterPad cue_inputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+static const AVFilterPad cue_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_cue = {
+    .name        = "cue",
+    .description = NULL_IF_CONFIG_SMALL("Delay filtering to match a cue."),
+    .priv_size   = sizeof(CueContext),
+    .priv_class  = &cue_class,
+    .inputs      = cue_inputs,
+    .outputs     = cue_outputs,
+    .activate    = activate,
+};
+#endif /* CONFIG_CUE_FILTER */
+
+#if CONFIG_ACUE_FILTER
+#define acue_options options
+AVFILTER_DEFINE_CLASS(acue);
+
+static const AVFilterPad acue_inputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+static const AVFilterPad acue_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_acue = {
+    .name        = "acue",
+    .description = NULL_IF_CONFIG_SMALL("Delay filtering to match a cue."),
+    .priv_size   = sizeof(CueContext),
+    .priv_class  = &acue_class,
+    .inputs      = acue_inputs,
+    .outputs     = acue_outputs,
+    .activate    = activate,
+};
+#endif /* CONFIG_ACUE_FILTER */

diff --git a/libavfilter/f_drawgraph.c b/libavfilter/f_drawgraph.c
index 8be9b9f..f49d5b8 100644
--- a/libavfilter/f_drawgraph.c
+++ b/libavfilter/f_drawgraph.c

@@ -43,7 +43,7 @@
     AVFrame       *out;
     int           x;
     int           prev_y[4];
-    int           first;
+    int           first[4];
     float         *values[4];
     int           values_size[4];
     int           nb_values;
@@ -102,7 +102,7 @@
         }
     }
 
-    s->first = 1;
+    s->first[0] = s->first[1] = s->first[2] = s->first[3] = 1;
 
     if (s->slide == 4) {
         s->values[0] = av_fast_realloc(NULL, &s->values_size[0], 2000);
@@ -282,8 +282,8 @@
             draw_dot(fg, x, y, out);
             break;
         case 2:
-            if (s->first) {
-                s->first = 0;
+            if (s->first[i]) {
+                s->first[i] = 0;
                 s->prev_y[i] = y;
             }
 
@@ -366,8 +366,8 @@
                     draw_dot(fg, x, y, out);
                     break;
                 case 2:
-                    if (s->first) {
-                        s->first = 0;
+                    if (s->first[i]) {
+                        s->first[i] = 0;
                         s->prev_y[i] = y;
                     }
 

diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index 1e8b90f..e03adc9 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c

@@ -114,6 +114,8 @@
     int meter;                      ///< select a EBU mode between +9 and +18
     int scale_range;                ///< the range of LU values according to the meter
     int y_zero_lu;                  ///< the y value (pixel position) for 0 LU
+    int y_opt_max;                  ///< the y value (pixel position) for 1 LU
+    int y_opt_min;                  ///< the y value (pixel position) for -1 LU
     int *y_line_ref;                ///< y reference values for drawing the LU lines in the graph and the gauge
 
     /* audio */
@@ -142,6 +144,9 @@
     int metadata;                   ///< whether or not to inject loudness results in frames
     int dual_mono;                  ///< whether or not to treat single channel input files as dual-mono
     double pan_law;                 ///< pan law value used to calculate dual-mono measurements
+    int target;                     ///< target level in LUFS used to set relative zero LU in visualization
+    int gauge_type;                 ///< whether gauge shows momentary or short
+    int scale;                      ///< display scale type of statistics
 } EBUR128Context;
 
 enum {
@@ -150,6 +155,16 @@
     PEAK_MODE_TRUE_PEAKS    = 1<<2,
 };
 
+enum {
+    GAUGE_TYPE_MOMENTARY = 0,
+    GAUGE_TYPE_SHORTTERM = 1,
+};
+
+enum {
+    SCALE_TYPE_ABSOLUTE = 0,
+    SCALE_TYPE_RELATIVE = 1,
+};
+
 #define OFFSET(x) offsetof(EBUR128Context, x)
 #define A AV_OPT_FLAG_AUDIO_PARAM
 #define V AV_OPT_FLAG_VIDEO_PARAM
@@ -168,28 +183,48 @@
         { "true",   "enable true-peak mode",   0, AV_OPT_TYPE_CONST, {.i64 = PEAK_MODE_TRUE_PEAKS},    INT_MIN, INT_MAX, A|F, "mode" },
     { "dualmono", "treat mono input files as dual-mono", OFFSET(dual_mono), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, A|F },
     { "panlaw", "set a specific pan law for dual-mono files", OFFSET(pan_law), AV_OPT_TYPE_DOUBLE, {.dbl = -3.01029995663978}, -10.0, 0.0, A|F },
+    { "target", "set a specific target level in LUFS (-23 to 0)", OFFSET(target), AV_OPT_TYPE_INT, {.i64 = -23}, -23, 0, V|F },
+    { "gauge", "set gauge display type", OFFSET(gauge_type), AV_OPT_TYPE_INT, {.i64 = 0 }, GAUGE_TYPE_MOMENTARY, GAUGE_TYPE_SHORTTERM, V|F, "gaugetype" },
+        { "momentary",   "display momentary value",   0, AV_OPT_TYPE_CONST, {.i64 = GAUGE_TYPE_MOMENTARY}, INT_MIN, INT_MAX, V|F, "gaugetype" },
+        { "m",           "display momentary value",   0, AV_OPT_TYPE_CONST, {.i64 = GAUGE_TYPE_MOMENTARY}, INT_MIN, INT_MAX, V|F, "gaugetype" },
+        { "shortterm",   "display short-term value",  0, AV_OPT_TYPE_CONST, {.i64 = GAUGE_TYPE_SHORTTERM}, INT_MIN, INT_MAX, V|F, "gaugetype" },
+        { "s",           "display short-term value",  0, AV_OPT_TYPE_CONST, {.i64 = GAUGE_TYPE_SHORTTERM}, INT_MIN, INT_MAX, V|F, "gaugetype" },
+    { "scale", "sets display method for the stats", OFFSET(scale), AV_OPT_TYPE_INT, {.i64 = 0}, SCALE_TYPE_ABSOLUTE, SCALE_TYPE_RELATIVE, V|F, "scaletype" },
+        { "absolute",   "display absolute values (LUFS)",          0, AV_OPT_TYPE_CONST, {.i64 = SCALE_TYPE_ABSOLUTE}, INT_MIN, INT_MAX, V|F, "scaletype" },
+        { "LUFS",       "display absolute values (LUFS)",          0, AV_OPT_TYPE_CONST, {.i64 = SCALE_TYPE_ABSOLUTE}, INT_MIN, INT_MAX, V|F, "scaletype" },
+        { "relative",   "display values relative to target (LU)",  0, AV_OPT_TYPE_CONST, {.i64 = SCALE_TYPE_RELATIVE}, INT_MIN, INT_MAX, V|F, "scaletype" },
+        { "LU",         "display values relative to target (LU)",  0, AV_OPT_TYPE_CONST, {.i64 = SCALE_TYPE_RELATIVE}, INT_MIN, INT_MAX, V|F, "scaletype" },
     { NULL },
 };
 
 AVFILTER_DEFINE_CLASS(ebur128);
 
 static const uint8_t graph_colors[] = {
-    0xdd, 0x66, 0x66,   // value above 0LU non reached
-    0x66, 0x66, 0xdd,   // value below 0LU non reached
-    0x96, 0x33, 0x33,   // value above 0LU reached
-    0x33, 0x33, 0x96,   // value below 0LU reached
-    0xdd, 0x96, 0x96,   // value above 0LU line non reached
-    0x96, 0x96, 0xdd,   // value below 0LU line non reached
-    0xdd, 0x33, 0x33,   // value above 0LU line reached
-    0x33, 0x33, 0xdd,   // value below 0LU line reached
+    0xdd, 0x66, 0x66,   // value above 1LU non reached below -1LU (impossible)
+    0x66, 0x66, 0xdd,   // value below 1LU non reached below -1LU
+    0x96, 0x33, 0x33,   // value above 1LU reached below -1LU (impossible)
+    0x33, 0x33, 0x96,   // value below 1LU reached below -1LU
+    0xdd, 0x96, 0x96,   // value above 1LU line non reached below -1LU (impossible)
+    0x96, 0x96, 0xdd,   // value below 1LU line non reached below -1LU
+    0xdd, 0x33, 0x33,   // value above 1LU line reached below -1LU (impossible)
+    0x33, 0x33, 0xdd,   // value below 1LU line reached below -1LU
+    0xdd, 0x66, 0x66,   // value above 1LU non reached above -1LU
+    0x66, 0xdd, 0x66,   // value below 1LU non reached above -1LU
+    0x96, 0x33, 0x33,   // value above 1LU reached above -1LU
+    0x33, 0x96, 0x33,   // value below 1LU reached above -1LU
+    0xdd, 0x96, 0x96,   // value above 1LU line non reached above -1LU
+    0x96, 0xdd, 0x96,   // value below 1LU line non reached above -1LU
+    0xdd, 0x33, 0x33,   // value above 1LU line reached above -1LU
+    0x33, 0xdd, 0x33,   // value below 1LU line reached above -1LU
 };
 
 static const uint8_t *get_graph_color(const EBUR128Context *ebur128, int v, int y)
 {
-    const int below0  = y > ebur128->y_zero_lu;
+    const int above_opt_max = y > ebur128->y_opt_max;
+    const int below_opt_min = y < ebur128->y_opt_min;
     const int reached = y >= v;
     const int line    = ebur128->y_line_ref[y] || y == ebur128->y_zero_lu;
-    const int colorid = 4*line + 2*reached + below0;
+    const int colorid = 8*below_opt_min+ 4*line + 2*reached + above_opt_max;
     return graph_colors + 3*colorid;
 }
 
@@ -323,6 +358,8 @@
 
     /* draw graph */
     ebur128->y_zero_lu = lu_to_y(ebur128, 0);
+    ebur128->y_opt_max = lu_to_y(ebur128, 1);
+    ebur128->y_opt_min = lu_to_y(ebur128, -1);
     p = outpicref->data[0] + ebur128->graph.y * outpicref->linesize[0]
                            + ebur128->graph.x * 3;
     for (y = 0; y < ebur128->graph.h; y++) {
@@ -459,6 +496,7 @@
 {
     EBUR128Context *ebur128 = ctx->priv;
     AVFilterPad pad;
+    int ret;
 
     if (ebur128->loglevel != AV_LOG_INFO &&
         ebur128->loglevel != AV_LOG_VERBOSE) {
@@ -495,7 +533,11 @@
         };
         if (!pad.name)
             return AVERROR(ENOMEM);
-        ff_insert_outpad(ctx, 0, &pad);
+        ret = ff_insert_outpad(ctx, 0, &pad);
+        if (ret < 0) {
+            av_freep(&pad.name);
+            return ret;
+        }
     }
     pad = (AVFilterPad){
         .name         = av_asprintf("out%d", ebur128->do_video),
@@ -504,7 +546,11 @@
     };
     if (!pad.name)
         return AVERROR(ENOMEM);
-    ff_insert_outpad(ctx, ebur128->do_video, &pad);
+    ret = ff_insert_outpad(ctx, ebur128->do_video, &pad);
+    if (ret < 0) {
+        av_freep(&pad.name);
+        return ret;
+    }
 
     /* summary */
     av_log(ctx, AV_LOG_VERBOSE, "EBU +%d scale\n", ebur128->meter);
@@ -724,15 +770,22 @@
                 loudness_3000 -= ebur128->pan_law;
             }
 
-#define LOG_FMT "M:%6.1f S:%6.1f     I:%6.1f LUFS     LRA:%6.1f LU"
+#define LOG_FMT "TARGET:%d LUFS    M:%6.1f S:%6.1f     I:%6.1f %s       LRA:%6.1f LU"
 
             /* push one video frame */
             if (ebur128->do_video) {
                 int x, y, ret;
                 uint8_t *p;
+                double gauge_value;
 
-                const int y_loudness_lu_graph = lu_to_y(ebur128, loudness_3000 + 23);
-                const int y_loudness_lu_gauge = lu_to_y(ebur128, loudness_400  + 23);
+                if (ebur128->gauge_type == GAUGE_TYPE_MOMENTARY) {
+                    gauge_value = loudness_400 - ebur128->target;
+                } else {
+                    gauge_value = loudness_3000 - ebur128->target;
+                }
+
+                const int y_loudness_lu_graph = lu_to_y(ebur128, loudness_3000 - ebur128->target);
+                const int y_loudness_lu_gauge = lu_to_y(ebur128, gauge_value);
 
                 /* draw the graph using the short-term loudness */
                 p = pic->data[0] + ebur128->graph.y*pic->linesize[0] + ebur128->graph.x*3;
@@ -744,7 +797,7 @@
                     p += pic->linesize[0];
                 }
 
-                /* draw the gauge using the momentary loudness */
+                /* draw the gauge using either momentary or short-term loudness */
                 p = pic->data[0] + ebur128->gauge.y*pic->linesize[0] + ebur128->gauge.x*3;
                 for (y = 0; y < ebur128->gauge.h; y++) {
                     const uint8_t *c = get_graph_color(ebur128, y_loudness_lu_gauge, y);
@@ -755,10 +808,17 @@
                 }
 
                 /* draw textual info */
-                drawtext(pic, PAD, PAD - PAD/2, FONT16, font_colors,
-                         LOG_FMT "     ", // padding to erase trailing characters
-                         loudness_400, loudness_3000,
-                         ebur128->integrated_loudness, ebur128->loudness_range);
+                if (ebur128->scale == SCALE_TYPE_ABSOLUTE) {
+                    drawtext(pic, PAD, PAD - PAD/2, FONT16, font_colors,
+                             LOG_FMT "     ", // padding to erase trailing characters
+                             ebur128->target, loudness_400, loudness_3000,
+                             ebur128->integrated_loudness, "LUFS", ebur128->loudness_range);
+                } else {
+                    drawtext(pic, PAD, PAD - PAD/2, FONT16, font_colors,
+                             LOG_FMT "     ", // padding to erase trailing characters
+                             ebur128->target, loudness_400-ebur128->target, loudness_3000-ebur128->target,
+                             ebur128->integrated_loudness-ebur128->target, "LU", ebur128->loudness_range);
+                }
 
                 /* set pts and push frame */
                 pic->pts = pts;
@@ -798,10 +858,17 @@
                 SET_META_PEAK(true,   TRUE);
             }
 
-            av_log(ctx, ebur128->loglevel, "t: %-10s " LOG_FMT,
-                   av_ts2timestr(pts, &outlink->time_base),
-                   loudness_400, loudness_3000,
-                   ebur128->integrated_loudness, ebur128->loudness_range);
+            if (ebur128->scale == SCALE_TYPE_ABSOLUTE) {
+                av_log(ctx, ebur128->loglevel, "t: %-10s " LOG_FMT,
+                       av_ts2timestr(pts, &outlink->time_base),
+                       ebur128->target, loudness_400, loudness_3000,
+                       ebur128->integrated_loudness, "LUFS", ebur128->loudness_range);
+            } else {
+                av_log(ctx, ebur128->loglevel, "t: %-10s " LOG_FMT,
+                       av_ts2timestr(pts, &outlink->time_base),
+                       ebur128->target, loudness_400-ebur128->target, loudness_3000-ebur128->target,
+                       ebur128->integrated_loudness-ebur128->target, "LU", ebur128->loudness_range);
+            }
 
 #define PRINT_PEAKS(str, sp, ptype) do {                            \
     if (ebur128->peak_mode & PEAK_MODE_ ## ptype ## _PEAKS) {       \

diff --git a/libavfilter/f_reverse.c b/libavfilter/f_reverse.c
index 5bf71b3..5f27927 100644
--- a/libavfilter/f_reverse.c
+++ b/libavfilter/f_reverse.c

@@ -154,7 +154,7 @@
     if (ret < 0)
         return ret;
 
-    ret = ff_set_common_formats(ctx, ff_planar_sample_fmts());
+    ret = ff_set_common_formats(ctx, ff_all_formats(AVMEDIA_TYPE_AUDIO));
     if (ret < 0)
         return ret;
 
@@ -164,11 +164,92 @@
     return ff_set_common_samplerates(ctx, formats);
 }
 
+static void reverse_samples_planar(AVFrame *out)
+{
+    for (int p = 0; p < out->channels; p++) {
+        switch (out->format) {
+        case AV_SAMPLE_FMT_U8P: {
+            uint8_t *dst = (uint8_t *)out->extended_data[p];
+            for (int i = 0, j = out->nb_samples - 1; i < j; i++, j--)
+                FFSWAP(uint8_t, dst[i], dst[j]);
+        }
+            break;
+        case AV_SAMPLE_FMT_S16P: {
+            int16_t *dst = (int16_t *)out->extended_data[p];
+            for (int i = 0, j = out->nb_samples - 1; i < j; i++, j--)
+                FFSWAP(int16_t, dst[i], dst[j]);
+        }
+            break;
+        case AV_SAMPLE_FMT_S32P: {
+            int32_t *dst = (int32_t *)out->extended_data[p];
+            for (int i = 0, j = out->nb_samples - 1; i < j; i++, j--)
+                FFSWAP(int32_t, dst[i], dst[j]);
+        }
+            break;
+        case AV_SAMPLE_FMT_FLTP: {
+            float *dst = (float *)out->extended_data[p];
+            for (int i = 0, j = out->nb_samples - 1; i < j; i++, j--)
+                FFSWAP(float, dst[i], dst[j]);
+        }
+            break;
+        case AV_SAMPLE_FMT_DBLP: {
+            double *dst = (double *)out->extended_data[p];
+            for (int i = 0, j = out->nb_samples - 1; i < j; i++, j--)
+                FFSWAP(double, dst[i], dst[j]);
+        }
+            break;
+        }
+    }
+}
+
+static void reverse_samples_packed(AVFrame *out)
+{
+    const int channels = out->channels;
+
+    switch (out->format) {
+    case AV_SAMPLE_FMT_U8: {
+        uint8_t *dst = (uint8_t *)out->extended_data[0];
+        for (int i = 0, j = out->nb_samples - 1; i < j; i++, j--)
+            for (int p = 0; p < channels; p++)
+                FFSWAP(uint8_t, dst[i * channels + p], dst[j * channels + p]);
+    }
+        break;
+    case AV_SAMPLE_FMT_S16: {
+        int16_t *dst = (int16_t *)out->extended_data[0];
+        for (int i = 0, j = out->nb_samples - 1; i < j; i++, j--)
+            for (int p = 0; p < channels; p++)
+                FFSWAP(int16_t, dst[i * channels + p], dst[j * channels + p]);
+    }
+        break;
+    case AV_SAMPLE_FMT_S32: {
+        int32_t *dst = (int32_t *)out->extended_data[0];
+        for (int i = 0, j = out->nb_samples - 1; i < j; i++, j--)
+            for (int p = 0; p < channels; p++)
+                FFSWAP(int32_t, dst[i * channels + p], dst[j * channels + p]);
+    }
+        break;
+    case AV_SAMPLE_FMT_FLT: {
+        float *dst = (float *)out->extended_data[0];
+        for (int i = 0, j = out->nb_samples - 1; i < j; i++, j--)
+            for (int p = 0; p < channels; p++)
+                FFSWAP(float, dst[i * channels + p], dst[j * channels + p]);
+    }
+        break;
+    case AV_SAMPLE_FMT_DBL: {
+        double *dst = (double *)out->extended_data[0];
+        for (int i = 0, j = out->nb_samples - 1; i < j; i++, j--)
+            for (int p = 0; p < channels; p++)
+                FFSWAP(double, dst[i * channels + p], dst[j * channels + p]);
+    }
+        break;
+    }
+}
+
 static int areverse_request_frame(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
     ReverseContext *s = ctx->priv;
-    int ret, p, i, j;
+    int ret;
 
     ret = ff_request_frame(ctx->inputs[0]);
 
@@ -176,41 +257,10 @@
         AVFrame *out = s->frames[s->nb_frames - 1];
         out->pts     = s->pts[s->flush_idx++];
 
-        for (p = 0; p < outlink->channels; p++) {
-            switch (outlink->format) {
-            case AV_SAMPLE_FMT_U8P: {
-                uint8_t *dst = (uint8_t *)out->extended_data[p];
-                for (i = 0, j = out->nb_samples - 1; i < j; i++, j--)
-                    FFSWAP(uint8_t, dst[i], dst[j]);
-            }
-                break;
-            case AV_SAMPLE_FMT_S16P: {
-                int16_t *dst = (int16_t *)out->extended_data[p];
-                for (i = 0, j = out->nb_samples - 1; i < j; i++, j--)
-                    FFSWAP(int16_t, dst[i], dst[j]);
-            }
-                break;
-            case AV_SAMPLE_FMT_S32P: {
-                int32_t *dst = (int32_t *)out->extended_data[p];
-                for (i = 0, j = out->nb_samples - 1; i < j; i++, j--)
-                    FFSWAP(int32_t, dst[i], dst[j]);
-            }
-                break;
-            case AV_SAMPLE_FMT_FLTP: {
-                float *dst = (float *)out->extended_data[p];
-                for (i = 0, j = out->nb_samples - 1; i < j; i++, j--)
-                    FFSWAP(float, dst[i], dst[j]);
-            }
-                break;
-            case AV_SAMPLE_FMT_DBLP: {
-                double *dst = (double *)out->extended_data[p];
-                for (i = 0, j = out->nb_samples - 1; i < j; i++, j--)
-                    FFSWAP(double, dst[i], dst[j]);
-            }
-                break;
-            }
-        }
-
+        if (av_sample_fmt_is_planar(out->format))
+            reverse_samples_planar(out);
+        else
+            reverse_samples_packed(out);
         ret = ff_filter_frame(outlink, out);
         s->nb_frames--;
     }

diff --git a/libavfilter/filters.h b/libavfilter/filters.h
index 4e2652e..1157755 100644
--- a/libavfilter/filters.h
+++ b/libavfilter/filters.h

@@ -61,11 +61,24 @@
 int ff_inlink_evaluate_timeline_at_frame(AVFilterLink *link, const AVFrame *frame);
 
 /**
+ * Get the number of frames available on the link.
+ * @return the number of frames available in the link fifo.
+ */
+size_t ff_inlink_queued_frames(AVFilterLink *link);
+
+/**
  * Test if a frame is available on the link.
  * @return  >0 if a frame is available
  */
 int ff_inlink_check_available_frame(AVFilterLink *link);
 
+
+/***
+  * Get the number of samples available on the link.
+  * @return the numer of samples available on the link.
+  */
+int ff_inlink_queued_samples(AVFilterLink *link);
+
 /**
  * Test if enough samples are available on the link.
  * @return  >0 if enough samples are available
@@ -103,6 +116,13 @@
                             AVFrame **rframe);
 
 /**
+ * Access a frame in the link fifo without consuming it.
+ * The first frame is numbered 0; the designated frame must exist.
+ * @return the frame at idx position in the link fifo.
+ */
+AVFrame *ff_inlink_peek_frame(AVFilterLink *link, size_t idx);
+
+/**
  * Make sure a frame is writable.
  * This is similar to av_frame_make_writable() except it uses the link's
  * buffer allocation callback, and therefore allows direct rendering.

diff --git a/libavfilter/formats.c b/libavfilter/formats.c
index 20a2c89..31ee445 100644
--- a/libavfilter/formats.c
+++ b/libavfilter/formats.c

@@ -662,20 +662,12 @@
 int ff_parse_channel_layout(int64_t *ret, int *nret, const char *arg,
                             void *log_ctx)
 {
-    char *tail;
     int64_t chlayout;
     int nb_channels;
 
     if (av_get_extended_channel_layout(arg, &chlayout, &nb_channels) < 0) {
-        /* [TEMPORARY 2016-12 -> 2017-12]*/
-        nb_channels = strtol(arg, &tail, 10);
-        if (!errno && *tail == 'c' && *(tail + 1) == '\0' && nb_channels > 0 && nb_channels < 64) {
-            chlayout = 0;
-            av_log(log_ctx, AV_LOG_WARNING, "Deprecated channel count specification '%s'. This will stop working in releases made in 2018 and after.\n", arg);
-        } else {
-            av_log(log_ctx, AV_LOG_ERROR, "Invalid channel layout '%s'\n", arg);
-            return AVERROR(EINVAL);
-        }
+        av_log(log_ctx, AV_LOG_ERROR, "Invalid channel layout '%s'\n", arg);
+        return AVERROR(EINVAL);
     }
     if (!chlayout && !nret) {
         av_log(log_ctx, AV_LOG_ERROR, "Unknown channel layout '%s' is not supported.\n", arg);

diff --git a/libavfilter/framepool.c b/libavfilter/framepool.c
index 42c0e58..3b178ce 100644
--- a/libavfilter/framepool.c
+++ b/libavfilter/framepool.c

@@ -71,7 +71,7 @@
     pool->format = format;
     pool->align = align;
 
-    if ((ret = av_image_check_size(width, height, 0, NULL)) < 0) {
+    if ((ret = av_image_check_size2(width, height, INT64_MAX, format, 0, NULL)) < 0) {
         goto fail;
     }
 
@@ -103,7 +103,7 @@
     }
 
     if (desc->flags & AV_PIX_FMT_FLAG_PAL ||
-        desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) {
+        desc->flags & FF_PSEUDOPAL) {
         pool->pools[1] = av_buffer_pool_init(AVPALETTE_SIZE, alloc);
         if (!pool->pools[1])
             goto fail;
@@ -227,7 +227,7 @@
         }
 
         if (desc->flags & AV_PIX_FMT_FLAG_PAL ||
-            desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) {
+            desc->flags & FF_PSEUDOPAL) {
             enum AVPixelFormat format =
                 pool->format == AV_PIX_FMT_PAL8 ? AV_PIX_FMT_BGR8 : pool->format;
 

diff --git a/libavfilter/framerate.h b/libavfilter/framerate.h
new file mode 100644
index 0000000..a42d5af
--- /dev/null
+++ b/libavfilter/framerate.h

@@ -0,0 +1,74 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_FRAMERATE_H
+#define AVFILTER_FRAMERATE_H
+
+#include "libavutil/pixelutils.h"
+#include "avfilter.h"
+
+#define BLEND_FUNC_PARAMS const uint8_t *src1, ptrdiff_t src1_linesize, \
+                          const uint8_t *src2, ptrdiff_t src2_linesize, \
+                          uint8_t *dst, ptrdiff_t dst_linesize, \
+                          ptrdiff_t width, ptrdiff_t height, \
+                          int factor1, int factor2, int half
+
+#define BLEND_FACTOR_DEPTH8   7
+#define BLEND_FACTOR_DEPTH16 15
+
+typedef void (*blend_func)(BLEND_FUNC_PARAMS);
+
+typedef struct FrameRateContext {
+    const AVClass *class;
+    // parameters
+    AVRational dest_frame_rate;         ///< output frames per second
+    int flags;                          ///< flags affecting frame rate conversion algorithm
+    double scene_score;                 ///< score that denotes a scene change has happened
+    int interp_start;                   ///< start of range to apply linear interpolation
+    int interp_end;                     ///< end of range to apply linear interpolation
+
+    int line_size[4];                   ///< bytes of pixel data per line for each plane
+    int vsub;
+
+    AVRational srce_time_base;          ///< timebase of source
+    AVRational dest_time_base;          ///< timebase of destination
+
+    av_pixelutils_sad_fn sad;           ///< Sum of the absolute difference function (scene detect only)
+    double prev_mafd;                   ///< previous MAFD                           (scene detect only)
+
+    int blend_factor_max;
+    int bitdepth;
+    AVFrame *work;
+
+    AVFrame *f0;                        ///< last frame
+    AVFrame *f1;                        ///< current frame
+    int64_t pts0;                       ///< last frame pts in dest_time_base
+    int64_t pts1;                       ///< current frame pts in dest_time_base
+    int64_t delta;                      ///< pts1 to pts0 delta
+    double score;                       ///< scene change score (f0 to f1)
+    int flush;                          ///< 1 if the filter is being flushed
+    int64_t start_pts;                  ///< pts of the first output frame
+    int64_t n;                          ///< output frame counter
+
+    blend_func blend;
+} FrameRateContext;
+
+void ff_framerate_init(FrameRateContext *s);
+void ff_framerate_init_x86(FrameRateContext *s);
+
+#endif /* AVFILTER_FRAMERATE_H */

diff --git a/libavfilter/framesync.c b/libavfilter/framesync.c
index 82d7157..22d3f09 100644
--- a/libavfilter/framesync.c
+++ b/libavfilter/framesync.c

@@ -61,7 +61,7 @@
 
 static int consume_from_fifos(FFFrameSync *fs);
 
-const AVClass *framesync_get_class(void)
+const AVClass *ff_framesync_get_class(void)
 {
     return &framesync_class;
 }
@@ -406,7 +406,7 @@
     ret = ff_inlink_make_frame_writable(fs->parent->inputs[0], f0);
     if (ret < 0) {
         av_frame_free(f0);
-        av_frame_free(f1);
+        *f1 = NULL;
         return ret;
     }
     return 0;

diff --git a/libavfilter/framesync.h b/libavfilter/framesync.h
index 9fdc4d1..37743cc 100644
--- a/libavfilter/framesync.h
+++ b/libavfilter/framesync.h

@@ -211,7 +211,7 @@
 /**
  * Get the class for the framesync object.
  */
-const AVClass *framesync_get_class(void);
+const AVClass *ff_framesync_get_class(void);
 
 /**
  * Pre-initialize a frame sync structure.
@@ -286,6 +286,9 @@
  * @param f0  used to return the main frame
  * @param f1  used to return the second frame, or NULL if disabled
  * @return  >=0 for success or AVERROR code
+ * @note  The frame returned in f0 belongs to the caller (get = 1 in
+ * ff_framesync_get_frame()) while the frame returned in f1 is still owned
+ * by the framesync structure.
  */
 int ff_framesync_dualinput_get(FFFrameSync *fs, AVFrame **f0, AVFrame **f1);
 
@@ -301,11 +304,11 @@
     return 0; \
 } \
 static const AVClass *name##_child_class_next(const AVClass *prev) { \
-    return prev ? NULL : framesync_get_class(); \
+    return prev ? NULL : ff_framesync_get_class(); \
 } \
 static void *name##_child_next(void *obj, void *prev) { \
     context *s = obj; \
-    s->fs.class = framesync_get_class(); /* FIXME */ \
+    s->fs.class = ff_framesync_get_class(); /* FIXME */ \
     return prev ? NULL : &s->field; \
 } \
 static const AVClass name##_class = { \

diff --git a/libavfilter/graphdump.c b/libavfilter/graphdump.c
index 7377719..8bc7b16 100644
--- a/libavfilter/graphdump.c
+++ b/libavfilter/graphdump.c

@@ -156,7 +156,7 @@
     AVBPrint buf;
     char *dump;
 
-    av_bprint_init(&buf, 0, 0);
+    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_COUNT_ONLY);
     avfilter_graph_dump_to_buf(&buf, graph);
     av_bprint_init(&buf, buf.len + 1, buf.len + 1);
     avfilter_graph_dump_to_buf(&buf, graph);

diff --git a/libavfilter/graphparser.c b/libavfilter/graphparser.c
index 1405926..d92b536 100644
--- a/libavfilter/graphparser.c
+++ b/libavfilter/graphparser.c

@@ -96,7 +96,7 @@
 static int create_filter(AVFilterContext **filt_ctx, AVFilterGraph *ctx, int index,
                          const char *name, const char *args, void *log_ctx)
 {
-    AVFilter *filt;
+    const AVFilter *filt;
     char name2[30];
     const char *inst_name = NULL, *filt_name = NULL;
     char *tmp_args = NULL;

diff --git a/libavfilter/hflip.h b/libavfilter/hflip.h
new file mode 100644
index 0000000..204090d
--- /dev/null
+++ b/libavfilter/hflip.h

@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2007 Benoit Fouet
+ * Copyright (c) 2010 Stefano Sabatini
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_HFLIP_H
+#define AVFILTER_HFLIP_H
+
+#include "avfilter.h"
+
+typedef struct FlipContext {
+    const AVClass *class;
+    int max_step[4];    ///< max pixel step for each plane, expressed as a number of bytes
+    int planewidth[4];  ///< width of each plane
+    int planeheight[4]; ///< height of each plane
+
+    void (*flip_line[4])(const uint8_t *src, uint8_t *dst, int w);
+} FlipContext;
+
+int ff_hflip_init(FlipContext *s, int step[4], int nb_planes);
+void ff_hflip_init_x86(FlipContext *s, int step[4], int nb_planes);
+
+#endif /* AVFILTER_HFLIP_H */

diff --git a/libavfilter/internal.h b/libavfilter/internal.h
index f9679ed..498bd33 100644
--- a/libavfilter/internal.h
+++ b/libavfilter/internal.h

@@ -411,4 +411,20 @@
  */
 int ff_filter_get_nb_threads(AVFilterContext *ctx);
 
+/**
+ * Perform any additional setup required for hardware frames.
+ *
+ * link->hw_frames_ctx must be set before calling this function.
+ * Inside link->hw_frames_ctx, the fields format, sw_format, width and
+ * height must be set.  If dynamically allocated pools are not supported,
+ * then initial_pool_size must also be set, to the minimum hardware frame
+ * pool size necessary for the filter to work (taking into account any
+ * frames which need to stored for use in operations as appropriate).  If
+ * default_pool_size is nonzero, then it will be used as the pool size if
+ * no other modification takes place (this can be used to preserve
+ * compatibility).
+ */
+int ff_filter_init_hw_frames(AVFilterContext *avctx, AVFilterLink *link,
+                             int default_pool_size);
+
 #endif /* AVFILTER_INTERNAL_H */

diff --git a/libavfilter/lavfutils.c b/libavfilter/lavfutils.c
index b6319cf..db4b69b 100644
--- a/libavfilter/lavfutils.c
+++ b/libavfilter/lavfutils.c

@@ -37,8 +37,6 @@
 
     av_init_packet(&pkt);
 
-    av_register_all();
-
     iformat = av_find_input_format("image2pipe");
     if ((ret = avformat_open_input(&format_ctx, filename, iformat, NULL)) < 0) {
         av_log(log_ctx, AV_LOG_ERROR,

diff --git a/libavfilter/opencl.c b/libavfilter/opencl.c
new file mode 100644
index 0000000..ac5eec6
--- /dev/null
+++ b/libavfilter/opencl.c

@@ -0,0 +1,339 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "libavutil/mem.h"
+#include "libavutil/pixdesc.h"
+
+#include "formats.h"
+#include "opencl.h"
+
+int ff_opencl_filter_query_formats(AVFilterContext *avctx)
+{
+    const static enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_OPENCL,
+        AV_PIX_FMT_NONE,
+    };
+    AVFilterFormats *formats;
+
+    formats = ff_make_format_list(pix_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+
+    return ff_set_common_formats(avctx, formats);
+}
+
+static int opencl_filter_set_device(AVFilterContext *avctx,
+                                    AVBufferRef *device)
+{
+    OpenCLFilterContext *ctx = avctx->priv;
+
+    av_buffer_unref(&ctx->device_ref);
+
+    ctx->device_ref = av_buffer_ref(device);
+    if (!ctx->device_ref)
+        return AVERROR(ENOMEM);
+
+    ctx->device = (AVHWDeviceContext*)ctx->device_ref->data;
+    ctx->hwctx  = ctx->device->hwctx;
+
+    return 0;
+}
+
+int ff_opencl_filter_config_input(AVFilterLink *inlink)
+{
+    AVFilterContext   *avctx = inlink->dst;
+    OpenCLFilterContext *ctx = avctx->priv;
+    AVHWFramesContext *input_frames;
+    int err;
+
+    if (!inlink->hw_frames_ctx) {
+        av_log(avctx, AV_LOG_ERROR, "OpenCL filtering requires a "
+               "hardware frames context on the input.\n");
+        return AVERROR(EINVAL);
+    }
+
+    // Extract the device and default output format from the first input.
+    if (avctx->inputs[0] != inlink)
+        return 0;
+
+    input_frames = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
+    if (input_frames->format != AV_PIX_FMT_OPENCL)
+        return AVERROR(EINVAL);
+
+    err = opencl_filter_set_device(avctx, input_frames->device_ref);
+    if (err < 0)
+        return err;
+
+    // Default output parameters match input parameters.
+    if (ctx->output_format == AV_PIX_FMT_NONE)
+        ctx->output_format = input_frames->sw_format;
+    if (!ctx->output_width)
+        ctx->output_width  = inlink->w;
+    if (!ctx->output_height)
+        ctx->output_height = inlink->h;
+
+    return 0;
+}
+
+int ff_opencl_filter_config_output(AVFilterLink *outlink)
+{
+    AVFilterContext   *avctx = outlink->src;
+    OpenCLFilterContext *ctx = avctx->priv;
+    AVBufferRef       *output_frames_ref = NULL;
+    AVHWFramesContext *output_frames;
+    int err;
+
+    av_buffer_unref(&outlink->hw_frames_ctx);
+
+    if (!ctx->device_ref) {
+        if (!avctx->hw_device_ctx) {
+            av_log(avctx, AV_LOG_ERROR, "OpenCL filtering requires an "
+                   "OpenCL device.\n");
+            return AVERROR(EINVAL);
+        }
+
+        err = opencl_filter_set_device(avctx, avctx->hw_device_ctx);
+        if (err < 0)
+            return err;
+    }
+
+    output_frames_ref = av_hwframe_ctx_alloc(ctx->device_ref);
+    if (!output_frames_ref) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+    output_frames = (AVHWFramesContext*)output_frames_ref->data;
+
+    output_frames->format    = AV_PIX_FMT_OPENCL;
+    output_frames->sw_format = ctx->output_format;
+    output_frames->width     = ctx->output_width;
+    output_frames->height    = ctx->output_height;
+
+    err = av_hwframe_ctx_init(output_frames_ref);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to initialise output "
+               "frames: %d.\n", err);
+        goto fail;
+    }
+
+    outlink->hw_frames_ctx = output_frames_ref;
+    outlink->w = ctx->output_width;
+    outlink->h = ctx->output_height;
+
+    return 0;
+fail:
+    av_buffer_unref(&output_frames_ref);
+    return err;
+}
+
+int ff_opencl_filter_init(AVFilterContext *avctx)
+{
+    OpenCLFilterContext *ctx = avctx->priv;
+
+    ctx->output_format = AV_PIX_FMT_NONE;
+
+    return 0;
+}
+
+void ff_opencl_filter_uninit(AVFilterContext *avctx)
+{
+    OpenCLFilterContext *ctx = avctx->priv;
+    cl_int cle;
+
+    if (ctx->program) {
+        cle = clReleaseProgram(ctx->program);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "program: %d.\n", cle);
+    }
+
+    av_buffer_unref(&ctx->device_ref);
+}
+
+int ff_opencl_filter_load_program(AVFilterContext *avctx,
+                                  const char **program_source_array,
+                                  int nb_strings)
+{
+    OpenCLFilterContext *ctx = avctx->priv;
+    cl_int cle;
+
+    ctx->program = clCreateProgramWithSource(ctx->hwctx->context, nb_strings,
+                                             program_source_array,
+                                             NULL, &cle);
+    if (!ctx->program) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create program: %d.\n", cle);
+        return AVERROR(EIO);
+    }
+
+    cle = clBuildProgram(ctx->program, 1, &ctx->hwctx->device_id,
+                         NULL, NULL, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to build program: %d.\n", cle);
+
+        if (cle == CL_BUILD_PROGRAM_FAILURE) {
+            char *log;
+            size_t log_length;
+
+            clGetProgramBuildInfo(ctx->program, ctx->hwctx->device_id,
+                                  CL_PROGRAM_BUILD_LOG, 0, NULL, &log_length);
+
+            log = av_malloc(log_length);
+            if (log) {
+                cle = clGetProgramBuildInfo(ctx->program,
+                                            ctx->hwctx->device_id,
+                                            CL_PROGRAM_BUILD_LOG,
+                                            log_length, log, NULL);
+                if (cle == CL_SUCCESS)
+                    av_log(avctx, AV_LOG_ERROR, "Build log:\n%s\n", log);
+            }
+
+            av_free(log);
+        }
+
+        clReleaseProgram(ctx->program);
+        ctx->program = NULL;
+        return AVERROR(EIO);
+    }
+
+    return 0;
+}
+
+int ff_opencl_filter_load_program_from_file(AVFilterContext *avctx,
+                                            const char *filename)
+{
+    FILE *file;
+    char *src = NULL;
+    size_t pos, len, rb;
+    const char *src_const;
+    int err;
+
+    file = fopen(filename, "r");
+    if (!file) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to open program "
+               "source file \"%s\".\n", filename);
+        return AVERROR(ENOENT);
+    }
+
+    len = 1 << 16;
+    pos = 0;
+
+    err = av_reallocp(&src, len);
+    if (err < 0)
+        goto fail;
+
+    err = snprintf(src, len, "#line 1 \"%s\"\n", filename);
+    if (err < 0) {
+        err = AVERROR(errno);
+        goto fail;
+    }
+    if (err > len / 2) {
+        err = AVERROR(EINVAL);
+        goto fail;
+    }
+    pos = err;
+
+    while (1) {
+        rb = fread(src + pos, 1, len - pos - 1, file);
+        if (rb == 0 && ferror(file)) {
+            err = AVERROR(EIO);
+            goto fail;
+        }
+        pos += rb;
+        if (pos < len)
+            break;
+        len <<= 1;
+        err = av_reallocp(&src, len);
+        if (err < 0)
+            goto fail;
+    }
+    src[pos] = 0;
+
+    src_const = src;
+
+    err = ff_opencl_filter_load_program(avctx, &src_const, 1);
+fail:
+    fclose(file);
+    av_freep(&src);
+    return err;
+}
+
+int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx,
+                                          size_t *work_size,
+                                          AVFrame *frame, int plane,
+                                          int block_alignment)
+{
+    cl_mem image;
+    cl_mem_object_type type;
+    size_t width, height;
+    cl_int cle;
+
+    if (frame->format != AV_PIX_FMT_OPENCL) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid frame format %s, "
+               "opencl required.\n", av_get_pix_fmt_name(frame->format));
+        return AVERROR(EINVAL);
+    }
+
+    image = (cl_mem)frame->data[plane];
+    if (!image) {
+        av_log(avctx, AV_LOG_ERROR, "Plane %d required but not set.\n",
+               plane);
+        return AVERROR(EINVAL);
+    }
+
+    cle = clGetMemObjectInfo(image, CL_MEM_TYPE, sizeof(type),
+                             &type, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query object type of "
+               "plane %d: %d.\n", plane, cle);
+        return AVERROR_UNKNOWN;
+    }
+    if (type != CL_MEM_OBJECT_IMAGE2D) {
+        av_log(avctx, AV_LOG_ERROR, "Plane %d is not a 2D image.\n",
+               plane);
+        return AVERROR(EINVAL);
+    }
+
+    cle = clGetImageInfo(image, CL_IMAGE_WIDTH,  sizeof(size_t),
+                         &width, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query plane %d width: %d.\n",
+               plane, cle);
+        return AVERROR_UNKNOWN;
+    }
+
+    cle = clGetImageInfo(image, CL_IMAGE_HEIGHT, sizeof(size_t),
+                         &height, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query plane %d height: %d.\n",
+               plane, cle);
+        return AVERROR_UNKNOWN;
+    }
+
+    if (block_alignment) {
+        width  = FFALIGN(width,  block_alignment);
+        height = FFALIGN(height, block_alignment);
+    }
+
+    work_size[0] = width;
+    work_size[1] = height;
+
+    return 0;
+}

diff --git a/libavfilter/opencl.h b/libavfilter/opencl.h
new file mode 100644
index 0000000..1b7f117
--- /dev/null
+++ b/libavfilter/opencl.h

@@ -0,0 +1,128 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_OPENCL_H
+#define AVFILTER_OPENCL_H
+
+// The intended target is OpenCL 1.2, so disable warnings for APIs
+// deprecated after that.  This primarily applies to clCreateCommandQueue(),
+// we can't use the replacement clCreateCommandQueueWithProperties() because
+// it was introduced in OpenCL 2.0.
+#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
+
+#include "libavutil/buffer.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_opencl.h"
+#include "libavutil/pixfmt.h"
+
+#include "avfilter.h"
+
+typedef struct OpenCLFilterContext {
+    const AVClass     *class;
+
+    AVBufferRef       *device_ref;
+    AVHWDeviceContext *device;
+    AVOpenCLDeviceContext *hwctx;
+
+    cl_program         program;
+
+    enum AVPixelFormat output_format;
+    int                output_width;
+    int                output_height;
+} OpenCLFilterContext;
+
+
+/**
+ * set argument to specific Kernel.
+ * This macro relies on usage of local label "fail" and variables:
+ * avctx, cle and err.
+ */
+#define CL_SET_KERNEL_ARG(kernel, arg_num, type, arg)          \
+    cle = clSetKernelArg(kernel, arg_num, sizeof(type), arg);  \
+    if (cle != CL_SUCCESS) {                                   \
+        av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "    \
+               "argument %d: error %d.\n", arg_num, cle);      \
+        err = AVERROR(EIO);                                    \
+        goto fail;                                             \
+    }
+
+/**
+ * A helper macro to handle OpenCL errors. It will assign errcode to
+ * variable err, log error msg, and jump to fail label on error.
+ */
+#define CL_FAIL_ON_ERROR(errcode, ...) do {                    \
+        if (cle != CL_SUCCESS) {                               \
+            av_log(avctx, AV_LOG_ERROR, __VA_ARGS__);          \
+            err = errcode;                                     \
+            goto fail;                                         \
+        }                                                      \
+    } while(0)
+
+/**
+ * Return that all inputs and outputs support only AV_PIX_FMT_OPENCL.
+ */
+int ff_opencl_filter_query_formats(AVFilterContext *avctx);
+
+/**
+ * Check that the input link contains a suitable hardware frames
+ * context and extract the device from it.
+ */
+int ff_opencl_filter_config_input(AVFilterLink *inlink);
+
+/**
+ * Create a suitable hardware frames context for the output.
+ */
+int ff_opencl_filter_config_output(AVFilterLink *outlink);
+
+/**
+ * Initialise an OpenCL filter context.
+ */
+int ff_opencl_filter_init(AVFilterContext *avctx);
+
+/**
+ * Uninitialise an OpenCL filter context.
+ */
+void ff_opencl_filter_uninit(AVFilterContext *avctx);
+
+/**
+ * Load a new OpenCL program from strings in memory.
+ *
+ * Creates a new program and compiles it for the current device.
+ * Will log any build errors if compilation fails.
+ */
+int ff_opencl_filter_load_program(AVFilterContext *avctx,
+                                  const char **program_source_array,
+                                  int nb_strings);
+
+/**
+ * Load a new OpenCL program from a file.
+ *
+ * Same as ff_opencl_filter_load_program(), but from a file.
+ */
+int ff_opencl_filter_load_program_from_file(AVFilterContext *avctx,
+                                            const char *filename);
+
+/**
+ * Find the work size needed needed for a given plane of an image.
+ */
+int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx,
+                                          size_t *work_size,
+                                          AVFrame *frame, int plane,
+                                          int block_alignment);
+
+#endif /* AVFILTER_OPENCL_H */

diff --git a/libavfilter/opencl/.gitignore b/libavfilter/opencl/.gitignore
new file mode 100644
index 0000000..064a8d8
--- /dev/null
+++ b/libavfilter/opencl/.gitignore

@@ -0,0 +1 @@
+*.c

diff --git a/libavfilter/opencl/avgblur.cl b/libavfilter/opencl/avgblur.cl
new file mode 100644
index 0000000..6a8d70d
--- /dev/null
+++ b/libavfilter/opencl/avgblur.cl

@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Dylan Fernando
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+__kernel void avgblur_horiz(__write_only image2d_t dst,
+                            __read_only  image2d_t src,
+                            int rad)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_FILTER_NEAREST);
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+    int2 size = (int2)(get_global_size(0), get_global_size(1));
+
+    int count = 0;
+    float4 acc = (float4)(0,0,0,0);
+
+    for (int xx = max(0, loc.x - rad); xx < min(loc.x + rad + 1, size.x); xx++) {
+        count++;
+        acc += read_imagef(src, sampler, (int2)(xx, loc.y));
+    }
+
+    write_imagef(dst, loc, acc / count);
+}
+
+__kernel void avgblur_vert(__write_only image2d_t dst,
+                           __read_only  image2d_t src,
+                           int radv)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_FILTER_NEAREST);
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+    int2 size = (int2)(get_global_size(0), get_global_size(1));
+
+    int count = 0;
+    float4 acc = (float4)(0,0,0,0);
+
+    for (int yy = max(0, loc.y - radv); yy < min(loc.y + radv + 1, size.y); yy++) {
+        count++;
+        acc += read_imagef(src, sampler, (int2)(loc.x, yy));
+    }
+
+    write_imagef(dst, loc, acc / count);
+}

diff --git a/libavfilter/opencl/colorspace_common.cl b/libavfilter/opencl/colorspace_common.cl
new file mode 100644
index 0000000..94a4dd0
--- /dev/null
+++ b/libavfilter/opencl/colorspace_common.cl

@@ -0,0 +1,220 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define ST2084_MAX_LUMINANCE 10000.0f
+#define REFERENCE_WHITE 100.0f
+
+#if chroma_loc == 1
+    #define chroma_sample(a,b,c,d) (((a) + (c)) * 0.5f)
+#elif chroma_loc == 3
+    #define chroma_sample(a,b,c,d) (a)
+#elif chroma_loc == 4
+    #define chroma_sample(a,b,c,d) (((a) + (b)) * 0.5f)
+#elif chroma_loc == 5
+    #define chroma_sample(a,b,c,d) (c)
+#elif chroma_loc == 6
+    #define chroma_sample(a,b,c,d) (((c) + (d)) * 0.5f)
+#else
+    #define chroma_sample(a,b,c,d) (((a) + (b) + (c) + (d)) * 0.25f)
+#endif
+
+constant const float ST2084_M1 = 0.1593017578125f;
+constant const float ST2084_M2 = 78.84375f;
+constant const float ST2084_C1 = 0.8359375f;
+constant const float ST2084_C2 = 18.8515625f;
+constant const float ST2084_C3 = 18.6875f;
+
+__constant float yuv2rgb_bt2020[] = {
+    1.0f, 0.0f, 1.4746f,
+    1.0f, -0.16455f, -0.57135f,
+    1.0f, 1.8814f, 0.0f
+};
+
+__constant float yuv2rgb_bt709[] = {
+    1.0f, 0.0f, 1.5748f,
+    1.0f, -0.18732f, -0.46812f,
+    1.0f, 1.8556f, 0.0f
+};
+
+__constant float rgb2yuv_bt709[] = {
+    0.2126f, 0.7152f, 0.0722f,
+    -0.11457f, -0.38543f, 0.5f,
+    0.5f, -0.45415f, -0.04585f
+};
+
+__constant float rgb2yuv_bt2020[] ={
+    0.2627f, 0.678f, 0.0593f,
+    -0.1396f, -0.36037f, 0.5f,
+    0.5f, -0.4598f, -0.0402f,
+};
+
+
+float get_luma_dst(float3 c) {
+    return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z;
+}
+
+float get_luma_src(float3 c) {
+    return luma_src.x * c.x + luma_src.y * c.y + luma_src.z * c.z;
+}
+
+float3 get_chroma_sample(float3 a, float3 b, float3 c, float3 d) {
+    return chroma_sample(a, b, c, d);
+}
+
+float eotf_st2084(float x) {
+    float p = powr(x, 1.0f / ST2084_M2);
+    float a = max(p -ST2084_C1, 0.0f);
+    float b = max(ST2084_C2 - ST2084_C3 * p, 1e-6f);
+    float c  = powr(a / b, 1.0f / ST2084_M1);
+    return x > 0.0f ? c * ST2084_MAX_LUMINANCE / REFERENCE_WHITE : 0.0f;
+}
+
+__constant const float HLG_A = 0.17883277f;
+__constant const float HLG_B = 0.28466892f;
+__constant const float HLG_C = 0.55991073f;
+
+// linearizer for HLG
+float inverse_oetf_hlg(float x) {
+    float a = 4.0f * x * x;
+    float b = exp((x - HLG_C) / HLG_A) + HLG_B;
+    return x < 0.5f ? a : b;
+}
+
+// delinearizer for HLG
+float oetf_hlg(float x) {
+    float a = 0.5f * sqrt(x);
+    float b = HLG_A * log(x - HLG_B) + HLG_C;
+    return x <= 1.0f ? a : b;
+}
+
+float3 ootf_hlg(float3 c, float peak) {
+    float luma = get_luma_src(c);
+    float gamma =  1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f);
+    gamma = max(1.0f, gamma);
+    float factor = peak * powr(luma, gamma - 1.0f) / powr(12.0f, gamma);
+    return c * factor;
+}
+
+float3 inverse_ootf_hlg(float3 c, float peak) {
+    float gamma = 1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f);
+    c *=  powr(12.0f, gamma) / peak;
+    c /= powr(get_luma_dst(c), (gamma - 1.0f) / gamma);
+    return c;
+}
+
+float inverse_eotf_bt1886(float c) {
+    return c < 0.0f ? 0.0f : powr(c, 1.0f / 2.4f);
+}
+
+float oetf_bt709(float c) {
+    c = c < 0.0f ? 0.0f : c;
+    float r1 = 4.5f * c;
+    float r2 = 1.099f * powr(c, 0.45f) - 0.099f;
+    return c < 0.018f ? r1 : r2;
+}
+float inverse_oetf_bt709(float c) {
+    float r1 = c / 4.5f;
+    float r2 = powr((c + 0.099f) / 1.099f, 1.0f / 0.45f);
+    return c < 0.081f ? r1 : r2;
+}
+
+float3 yuv2rgb(float y, float u, float v) {
+#ifdef FULL_RANGE_IN
+    u -= 0.5f; v -= 0.5f;
+#else
+    y = (y * 255.0f -  16.0f) / 219.0f;
+    u = (u * 255.0f - 128.0f) / 224.0f;
+    v = (v * 255.0f - 128.0f) / 224.0f;
+#endif
+    float r = y * rgb_matrix[0] + u * rgb_matrix[1] + v * rgb_matrix[2];
+    float g = y * rgb_matrix[3] + u * rgb_matrix[4] + v * rgb_matrix[5];
+    float b = y * rgb_matrix[6] + u * rgb_matrix[7] + v * rgb_matrix[8];
+    return (float3)(r, g, b);
+}
+
+float3 yuv2lrgb(float3 yuv) {
+    float3 rgb = yuv2rgb(yuv.x, yuv.y, yuv.z);
+    float r = linearize(rgb.x);
+    float g = linearize(rgb.y);
+    float b = linearize(rgb.z);
+    return (float3)(r, g, b);
+}
+
+float3 rgb2yuv(float r, float g, float b) {
+    float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2];
+    float u = r*yuv_matrix[3] + g*yuv_matrix[4] + b*yuv_matrix[5];
+    float v = r*yuv_matrix[6] + g*yuv_matrix[7] + b*yuv_matrix[8];
+#ifdef FULL_RANGE_OUT
+    u += 0.5f; v += 0.5f;
+#else
+    y = (219.0f * y + 16.0f) / 255.0f;
+    u = (224.0f * u + 128.0f) / 255.0f;
+    v = (224.0f * v + 128.0f) / 255.0f;
+#endif
+    return (float3)(y, u, v);
+}
+
+float rgb2y(float r, float g, float b) {
+    float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2];
+    y = (219.0f * y + 16.0f) / 255.0f;
+    return y;
+}
+
+float3 lrgb2yuv(float3 c) {
+    float r = delinearize(c.x);
+    float g = delinearize(c.y);
+    float b = delinearize(c.z);
+
+    return rgb2yuv(r, g, b);
+}
+
+float lrgb2y(float3 c) {
+    float r = delinearize(c.x);
+    float g = delinearize(c.y);
+    float b = delinearize(c.z);
+
+    return rgb2y(r, g, b);
+}
+
+float3 lrgb2lrgb(float3 c) {
+#ifdef RGB2RGB_PASSTHROUGH
+    return c;
+#else
+    float r = c.x, g = c.y, b = c.z;
+    float rr = rgb2rgb[0] * r + rgb2rgb[1] * g + rgb2rgb[2] * b;
+    float gg = rgb2rgb[3] * r + rgb2rgb[4] * g + rgb2rgb[5] * b;
+    float bb = rgb2rgb[6] * r + rgb2rgb[7] * g + rgb2rgb[8] * b;
+    return (float3)(rr, gg, bb);
+#endif
+}
+
+float3 ootf(float3 c, float peak) {
+#ifdef ootf_impl
+    return ootf_impl(c, peak);
+#else
+    return c;
+#endif
+}
+
+float3 inverse_ootf(float3 c, float peak) {
+#ifdef inverse_ootf_impl
+    return inverse_ootf_impl(c, peak);
+#else
+    return c;
+#endif
+}

diff --git a/libavfilter/opencl/convolution.cl b/libavfilter/opencl/convolution.cl
new file mode 100644
index 0000000..815c779
--- /dev/null
+++ b/libavfilter/opencl/convolution.cl

@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2018 Danil Iashchenko
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+__kernel void convolution_global(__write_only image2d_t dst,
+                                 __read_only  image2d_t src,
+                                 int coef_matrix_dim,
+                                 __constant float *coef_matrix,
+                                 float div,
+                                 float bias)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_ADDRESS_CLAMP_TO_EDGE   |
+                               CLK_FILTER_NEAREST);
+
+    const int half_matrix_dim = (coef_matrix_dim / 2);
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+    float4 convPix = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
+
+    for (int conv_i = -half_matrix_dim; conv_i <= half_matrix_dim; conv_i++) {
+        for (int conv_j = -half_matrix_dim; conv_j <= half_matrix_dim; conv_j++) {
+            float4 px = read_imagef(src, sampler, loc + (int2)(conv_j, conv_i));
+            convPix += px * coef_matrix[(conv_i + half_matrix_dim) * coef_matrix_dim +
+                                        (conv_j + half_matrix_dim)];
+        }
+     }
+     float4 dstPix = convPix * div + bias;
+     write_imagef(dst, loc, dstPix);
+}
+
+
+__kernel void sobel_global(__write_only image2d_t dst,
+                           __read_only  image2d_t src,
+                             float div,
+                             float bias)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_ADDRESS_CLAMP_TO_EDGE   |
+                               CLK_FILTER_NEAREST);
+
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    float4 sum1 = read_imagef(src, sampler, loc + (int2)(-1,-1)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)( 0,-1)) * -2 +
+                  read_imagef(src, sampler, loc + (int2)( 1,-1)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)(-1, 1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)( 0, 1)) *  2 +
+                  read_imagef(src, sampler, loc + (int2)( 1, 1)) *  1;
+
+    float4 sum2 = read_imagef(src, sampler, loc + (int2)(-1,-1)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)(-1, 0)) * -2 +
+                  read_imagef(src, sampler, loc + (int2)(-1, 1)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)( 1,-1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)( 1, 0)) *  2 +
+                  read_imagef(src, sampler, loc + (int2)( 1, 1)) *  1;
+
+    float4 dstPix = hypot(sum1, sum2) * div + bias;
+    write_imagef(dst, loc, dstPix);
+}
+
+__kernel void prewitt_global(__write_only image2d_t dst,
+                             __read_only  image2d_t src,
+                             float div,
+                             float bias)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_ADDRESS_CLAMP_TO_EDGE   |
+                               CLK_FILTER_NEAREST);
+
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    float4 sum1 = read_imagef(src, sampler, loc + (int2)(-1,-1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)( 0,-1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)( 1,-1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)(-1, 1)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)( 0, 1)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)( 1, 1)) * -1;
+
+    float4 sum2 = read_imagef(src, sampler, loc + (int2)(-1,-1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)(-1, 0)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)(-1, 1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)( 1,-1)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)( 1, 0)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)( 1, 1)) * -1;
+
+    float4 dstPix = hypot(sum1, sum2) * div + bias;
+    write_imagef(dst, loc, dstPix);
+}
+
+__kernel void roberts_global(__write_only image2d_t dst,
+                             __read_only  image2d_t src,
+                             float div,
+                             float bias)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_ADDRESS_CLAMP_TO_EDGE   |
+                               CLK_FILTER_NEAREST);
+
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    float4 sum1 = read_imagef(src, sampler, loc + (int2)(-1,-1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)( 0,-1)) * -1;
+
+
+    float4 sum2 = read_imagef(src, sampler, loc + (int2)(-1, 0)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)( 0, 0)) *  1;
+
+
+    float4 dstPix = hypot(sum1, sum2) * div + bias;
+    write_imagef(dst, loc, dstPix);
+}

diff --git a/libavfilter/opencl/neighbor.cl b/libavfilter/opencl/neighbor.cl
new file mode 100644
index 0000000..e619af3
--- /dev/null
+++ b/libavfilter/opencl/neighbor.cl

@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Danil Iashchenko
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+__kernel void erosion_global(__write_only image2d_t dst,
+                             __read_only  image2d_t src,
+                             float threshold,
+                             __constant int *coord)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_ADDRESS_CLAMP_TO_EDGE   |
+                               CLK_FILTER_NEAREST);
+
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    float4 px = read_imagef(src, sampler, loc);
+    float limit = px.x - threshold;
+    if (limit < 0) {
+        limit = 0;
+    }
+
+    for (int i = -1; i <= 1; i++) {
+        for (int j = -1; j <= 1; j++) {
+            if (coord[(j + 1) * 3 + (i + 1)] == 1) {
+                float4 cur = read_imagef(src, sampler, loc + (int2)(i, j));
+                if (cur.x < px.x) {
+                    px = cur;
+                }
+            }
+        }
+    }
+    if (limit > px.x) {
+        px = (float4)(limit);
+    }
+    write_imagef(dst, loc, px);
+}
+
+
+__kernel void dilation_global(__write_only image2d_t dst,
+                              __read_only  image2d_t src,
+                              float threshold,
+                              __constant int *coord)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_ADDRESS_CLAMP_TO_EDGE   |
+                               CLK_FILTER_NEAREST);
+
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    float4 px = read_imagef(src, sampler, loc);
+    float limit = px.x + threshold;
+    if (limit > 1) {
+        limit = 1;
+    }
+
+    for (int i = -1; i <= 1; i++) {
+        for (int j = -1; j <= 1; j++) {
+            if (coord[(j + 1) * 3 + (i + 1)] == 1) {
+                float4 cur = read_imagef(src, sampler, loc + (int2)(i, j));
+                if (cur.x > px.x) {
+                    px = cur;
+                }
+            }
+        }
+    }
+    if (limit < px.x) {
+        px = (float4)(limit);
+    }
+    write_imagef(dst, loc, px);
+}

diff --git a/libavfilter/opencl/overlay.cl b/libavfilter/opencl/overlay.cl
new file mode 100644
index 0000000..8c783d0
--- /dev/null
+++ b/libavfilter/opencl/overlay.cl

@@ -0,0 +1,104 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+__kernel void overlay_no_alpha(__write_only image2d_t dst,
+                               __read_only  image2d_t main,
+                               __read_only  image2d_t overlay,
+                               int x_position,
+                               int y_position)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_FILTER_NEAREST);
+
+    int2 overlay_size = get_image_dim(overlay);
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    if (loc.x <  x_position ||
+        loc.y <  y_position ||
+        loc.x >= overlay_size.x + x_position ||
+        loc.y >= overlay_size.y + y_position) {
+        float4 val = read_imagef(main, sampler, loc);
+        write_imagef(dst, loc, val);
+    } else {
+        int2 loc_overlay = (int2)(x_position, y_position);
+        float4 val       = read_imagef(overlay, sampler, loc - loc_overlay);
+        write_imagef(dst, loc, val);
+    }
+}
+
+__kernel void overlay_internal_alpha(__write_only image2d_t dst,
+                                     __read_only  image2d_t main,
+                                     __read_only  image2d_t overlay,
+                                     int x_position,
+                                     int y_position)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_FILTER_NEAREST);
+
+    int2 overlay_size = get_image_dim(overlay);
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    if (loc.x <  x_position ||
+        loc.y <  y_position ||
+        loc.x >= overlay_size.x + x_position ||
+        loc.y >= overlay_size.y + y_position) {
+        float4 val = read_imagef(main, sampler, loc);
+        write_imagef(dst, loc, val);
+    } else {
+        int2 loc_overlay  = (int2)(x_position, y_position);
+        float4 in_main    = read_imagef(main,    sampler, loc);
+        float4 in_overlay = read_imagef(overlay, sampler, loc - loc_overlay);
+        float4 val        = in_overlay * in_overlay.w + in_main * (1.0f - in_overlay.w);
+        write_imagef(dst, loc, val);
+    }
+}
+
+__kernel void overlay_external_alpha(__write_only image2d_t dst,
+                                     __read_only  image2d_t main,
+                                     __read_only  image2d_t overlay,
+                                     __read_only  image2d_t alpha,
+                                     int x_position,
+                                     int y_position,
+                                     int alpha_adj_x,
+                                     int alpha_adj_y)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_FILTER_NEAREST);
+
+    int2 overlay_size = get_image_dim(overlay);
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    if (loc.x <  x_position ||
+        loc.y <  y_position ||
+        loc.x >= overlay_size.x + x_position ||
+        loc.y >= overlay_size.y + y_position) {
+        float4 val = read_imagef(main, sampler, loc);
+        write_imagef(dst, loc, val);
+    } else {
+        int2 loc_overlay  = (int2)(x_position, y_position);
+        float4 in_main    = read_imagef(main,    sampler, loc);
+        float4 in_overlay = read_imagef(overlay, sampler, loc - loc_overlay);
+
+        int2 loc_alpha    = (int2)(loc.x * alpha_adj_x,
+                                   loc.y * alpha_adj_y) - loc_overlay;
+        float4 in_alpha   = read_imagef(alpha,   sampler, loc_alpha);
+
+        float4 val = in_overlay * in_alpha.x + in_main * (1.0f - in_alpha.x);
+        write_imagef(dst, loc, val);
+    }
+}

diff --git a/libavfilter/opencl/tonemap.cl b/libavfilter/opencl/tonemap.cl
new file mode 100644
index 0000000..9448ba4
--- /dev/null
+++ b/libavfilter/opencl/tonemap.cl

@@ -0,0 +1,272 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define REFERENCE_WHITE 100.0f
+extern float3 lrgb2yuv(float3);
+extern float  lrgb2y(float3);
+extern float3 yuv2lrgb(float3);
+extern float3 lrgb2lrgb(float3);
+extern float  get_luma_src(float3);
+extern float  get_luma_dst(float3);
+extern float3 ootf(float3 c, float peak);
+extern float3 inverse_ootf(float3 c, float peak);
+extern float3 get_chroma_sample(float3, float3, float3, float3);
+
+struct detection_result {
+    float peak;
+    float average;
+};
+
+float hable_f(float in) {
+    float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f;
+    return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f;
+}
+
+float direct(float s, float peak) {
+    return s;
+}
+
+float linear(float s, float peak) {
+    return s * tone_param / peak;
+}
+
+float gamma(float s, float peak) {
+    float p = s > 0.05f ? s /peak : 0.05f / peak;
+    float v = powr(p, 1.0f / tone_param);
+    return s > 0.05f ? v : (s * v /0.05f);
+}
+
+float clip(float s, float peak) {
+    return clamp(s * tone_param, 0.0f, 1.0f);
+}
+
+float reinhard(float s, float peak) {
+    return s / (s + tone_param) * (peak + tone_param) / peak;
+}
+
+float hable(float s, float peak) {
+    return hable_f(s)/hable_f(peak);
+}
+
+float mobius(float s, float peak) {
+    float j = tone_param;
+    float a, b;
+
+    if (s <= j)
+        return s;
+
+    a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak);
+    b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, 1e-6f);
+
+    return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b);
+}
+
+// detect peak/average signal of a frame, the algorithm was ported from:
+// libplacebo (https://github.com/haasn/libplacebo)
+struct detection_result
+detect_peak_avg(global uint *util_buf, __local uint *sum_wg,
+            float signal, float peak) {
+// layout of the util buffer
+//
+// Name:             : Size (units of 4-bytes)
+// average buffer    : detection_frames + 1
+// peak buffer       : detection_frames + 1
+// workgroup counter : 1
+// total of peak     : 1
+// total of average  : 1
+// frame index       : 1
+// frame number      : 1
+    global uint *avg_buf = util_buf;
+    global uint *peak_buf = avg_buf + DETECTION_FRAMES + 1;
+    global uint *counter_wg_p = peak_buf + DETECTION_FRAMES + 1;
+    global uint *max_total_p = counter_wg_p + 1;
+    global uint *avg_total_p = max_total_p + 1;
+    global uint *frame_idx_p = avg_total_p + 1;
+    global uint *scene_frame_num_p = frame_idx_p + 1;
+
+    uint frame_idx = *frame_idx_p;
+    uint scene_frame_num = *scene_frame_num_p;
+
+    size_t lidx = get_local_id(0);
+    size_t lidy = get_local_id(1);
+    size_t lsizex = get_local_size(0);
+    size_t lsizey = get_local_size(1);
+    uint num_wg = get_num_groups(0) * get_num_groups(1);
+    size_t group_idx = get_group_id(0);
+    size_t group_idy = get_group_id(1);
+    struct detection_result r = {peak, sdr_avg};
+    if (lidx == 0 && lidy == 0)
+        *sum_wg = 0;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // update workgroup sum
+    atomic_add(sum_wg, (uint)(signal * REFERENCE_WHITE));
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // update frame peak/avg using work-group-average.
+    if (lidx == 0 && lidy == 0) {
+        uint avg_wg = *sum_wg / (lsizex * lsizey);
+        atomic_max(&peak_buf[frame_idx], avg_wg);
+        atomic_add(&avg_buf[frame_idx], avg_wg);
+    }
+
+    if (scene_frame_num > 0) {
+        float peak = (float)*max_total_p / (REFERENCE_WHITE * scene_frame_num);
+        float avg = (float)*avg_total_p / (REFERENCE_WHITE * scene_frame_num);
+        r.peak = max(1.0f, peak);
+        r.average = max(0.25f, avg);
+    }
+
+    if (lidx == 0 && lidy == 0 && atomic_add(counter_wg_p, 1) == num_wg - 1) {
+        *counter_wg_p = 0;
+        avg_buf[frame_idx] /= num_wg;
+
+        if (scene_threshold > 0.0f) {
+            uint cur_max = peak_buf[frame_idx];
+            uint cur_avg = avg_buf[frame_idx];
+            int diff = (int)(scene_frame_num * cur_avg) - (int)*avg_total_p;
+
+            if (abs(diff) > scene_frame_num * scene_threshold * REFERENCE_WHITE) {
+                for (uint i = 0; i < DETECTION_FRAMES + 1; i++)
+                  avg_buf[i] = 0;
+                for (uint i = 0; i < DETECTION_FRAMES + 1; i++)
+                  peak_buf[i] = 0;
+                *avg_total_p = *max_total_p = 0;
+                *scene_frame_num_p = 0;
+                avg_buf[frame_idx] = cur_avg;
+                peak_buf[frame_idx] = cur_max;
+            }
+        }
+        uint next = (frame_idx + 1) % (DETECTION_FRAMES + 1);
+        // add current frame, subtract next frame
+        *max_total_p += peak_buf[frame_idx] - peak_buf[next];
+        *avg_total_p += avg_buf[frame_idx] - avg_buf[next];
+        // reset next frame
+        peak_buf[next] = avg_buf[next] = 0;
+        *frame_idx_p = next;
+        *scene_frame_num_p = min(*scene_frame_num_p + 1,
+                                 (uint)DETECTION_FRAMES);
+    }
+    return r;
+}
+
+float3 map_one_pixel_rgb(float3 rgb, float peak, float average) {
+    float sig = max(max(rgb.x, max(rgb.y, rgb.z)), 1e-6f);
+
+    // Rescale the variables in order to bring it into a representation where
+    // 1.0 represents the dst_peak. This is because all of the tone mapping
+    // algorithms are defined in such a way that they map to the range [0.0, 1.0].
+    if (target_peak > 1.0f) {
+        sig *= 1.0f / target_peak;
+        peak *= 1.0f / target_peak;
+    }
+
+    float sig_old = sig;
+
+    // Scale the signal to compensate for differences in the average brightness
+    float slope = min(1.0f, sdr_avg / average);
+    sig *= slope;
+    peak *= slope;
+
+    // Desaturate the color using a coefficient dependent on the signal level
+    if (desat_param > 0.0f) {
+        float luma = get_luma_dst(rgb);
+        float coeff = max(sig - 0.18f, 1e-6f) / max(sig, 1e-6f);
+        coeff = native_powr(coeff, 10.0f / desat_param);
+        rgb = mix(rgb, (float3)luma, (float3)coeff);
+        sig = mix(sig, luma * slope, coeff);
+    }
+
+    sig = TONE_FUNC(sig, peak);
+
+    sig = min(sig, 1.0f);
+    rgb *= (sig/sig_old);
+    return rgb;
+}
+// map from source space YUV to destination space RGB
+float3 map_to_dst_space_from_yuv(float3 yuv, float peak) {
+    float3 c = yuv2lrgb(yuv);
+    c = ootf(c, peak);
+    c = lrgb2lrgb(c);
+    return c;
+}
+
+__kernel void tonemap(__write_only image2d_t dst1,
+                      __read_only  image2d_t src1,
+                      __write_only image2d_t dst2,
+                      __read_only  image2d_t src2,
+                      global uint *util_buf,
+                      float peak
+                      )
+{
+    __local uint sum_wg;
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_ADDRESS_CLAMP_TO_EDGE   |
+                               CLK_FILTER_NEAREST);
+    int xi = get_global_id(0);
+    int yi = get_global_id(1);
+    // each work item process four pixels
+    int x = 2 * xi;
+    int y = 2 * yi;
+
+    float y0 = read_imagef(src1, sampler, (int2)(x,     y)).x;
+    float y1 = read_imagef(src1, sampler, (int2)(x + 1, y)).x;
+    float y2 = read_imagef(src1, sampler, (int2)(x,     y + 1)).x;
+    float y3 = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x;
+    float2 uv = read_imagef(src2, sampler, (int2)(xi,     yi)).xy;
+
+    float3 c0 = map_to_dst_space_from_yuv((float3)(y0, uv.x, uv.y), peak);
+    float3 c1 = map_to_dst_space_from_yuv((float3)(y1, uv.x, uv.y), peak);
+    float3 c2 = map_to_dst_space_from_yuv((float3)(y2, uv.x, uv.y), peak);
+    float3 c3 = map_to_dst_space_from_yuv((float3)(y3, uv.x, uv.y), peak);
+
+    float sig0 = max(c0.x, max(c0.y, c0.z));
+    float sig1 = max(c1.x, max(c1.y, c1.z));
+    float sig2 = max(c2.x, max(c2.y, c2.z));
+    float sig3 = max(c3.x, max(c3.y, c3.z));
+    float sig = max(sig0, max(sig1, max(sig2, sig3)));
+
+    struct detection_result r = detect_peak_avg(util_buf, &sum_wg, sig, peak);
+
+    float3 c0_old = c0, c1_old = c1, c2_old = c2;
+    c0 = map_one_pixel_rgb(c0, r.peak, r.average);
+    c1 = map_one_pixel_rgb(c1, r.peak, r.average);
+    c2 = map_one_pixel_rgb(c2, r.peak, r.average);
+    c3 = map_one_pixel_rgb(c3, r.peak, r.average);
+
+    c0 = inverse_ootf(c0, target_peak);
+    c1 = inverse_ootf(c1, target_peak);
+    c2 = inverse_ootf(c2, target_peak);
+    c3 = inverse_ootf(c3, target_peak);
+
+    y0 = lrgb2y(c0);
+    y1 = lrgb2y(c1);
+    y2 = lrgb2y(c2);
+    y3 = lrgb2y(c3);
+    float3 chroma_c = get_chroma_sample(c0, c1, c2, c3);
+    float3 chroma = lrgb2yuv(chroma_c);
+
+    if (xi < get_image_width(dst2) && yi < get_image_height(dst2)) {
+        write_imagef(dst1, (int2)(x, y), (float4)(y0, 0.0f, 0.0f, 1.0f));
+        write_imagef(dst1, (int2)(x+1, y), (float4)(y1, 0.0f, 0.0f, 1.0f));
+        write_imagef(dst1, (int2)(x, y+1), (float4)(y2, 0.0f, 0.0f, 1.0f));
+        write_imagef(dst1, (int2)(x+1, y+1), (float4)(y3, 0.0f, 0.0f, 1.0f));
+        write_imagef(dst2, (int2)(xi, yi),
+                     (float4)(chroma.y, chroma.z, 0.0f, 1.0f));
+    }
+}

diff --git a/libavfilter/opencl/unsharp.cl b/libavfilter/opencl/unsharp.cl
new file mode 100644
index 0000000..e629834
--- /dev/null
+++ b/libavfilter/opencl/unsharp.cl

@@ -0,0 +1,99 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+__kernel void unsharp_global(__write_only image2d_t dst,
+                             __read_only  image2d_t src,
+                             int size_x,
+                             int size_y,
+                             float amount,
+                             __constant float *coef_matrix)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_FILTER_NEAREST);
+    int2 loc    = (int2)(get_global_id(0), get_global_id(1));
+    int2 centre = (int2)(size_x / 2, size_y / 2);
+
+    float4 val = read_imagef(src, sampler, loc);
+    float4 sum = 0.0f;
+    int x, y;
+
+    for (y = 0; y < size_y; y++) {
+        for (x = 0; x < size_x; x++) {
+            int2 pos = loc + (int2)(x, y) - centre;
+            sum += coef_matrix[y * size_x + x] *
+                read_imagef(src, sampler, pos);
+        }
+    }
+
+    write_imagef(dst, loc, val + (val - sum) * amount);
+}
+
+__kernel void unsharp_local(__write_only image2d_t dst,
+                            __read_only  image2d_t src,
+                            int size_x,
+                            int size_y,
+                            float amount,
+                            __constant float *coef_x,
+                            __constant float *coef_y)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_ADDRESS_CLAMP_TO_EDGE |
+                               CLK_FILTER_NEAREST);
+    int2 block = (int2)(get_group_id(0), get_group_id(1)) * 16;
+    int2 pos   = (int2)(get_local_id(0), get_local_id(1));
+
+    __local float4 tmp[32][32];
+
+    int rad_x = size_x / 2;
+    int rad_y = size_y / 2;
+    int x, y;
+
+    for (y = 0; y <= 1; y++) {
+        for (x = 0; x <= 1; x++) {
+            tmp[pos.y + 16 * y][pos.x + 16 * x] =
+                read_imagef(src, sampler, block + pos + (int2)(16 * x - 8, 16 * y - 8));
+        }
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    float4 val = tmp[pos.y + 8][pos.x + 8];
+
+    float4 horiz[2];
+    for (y = 0; y <= 1; y++) {
+        horiz[y] = 0.0f;
+        for (x = 0; x < size_x; x++)
+            horiz[y] += coef_x[x] * tmp[pos.y + y * 16][pos.x + 8 + x - rad_x];
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    for (y = 0; y <= 1; y++) {
+        tmp[pos.y + y * 16][pos.x + 8] = horiz[y];
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    float4 sum = 0.0f;
+    for (y = 0; y < size_y; y++)
+        sum += coef_y[y] * tmp[pos.y + 8 + y - rad_y][pos.x + 8];
+
+    if (block.x + pos.x < get_image_width(dst) &&
+        block.y + pos.y < get_image_height(dst))
+        write_imagef(dst, block + pos, val + (val - sum) * amount);
+}

diff --git a/libavfilter/opencl_allkernels.c b/libavfilter/opencl_allkernels.c
deleted file mode 100644
index 6d80fa8..0000000
--- a/libavfilter/opencl_allkernels.c
+++ /dev/null

@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "opencl_allkernels.h"
-#if CONFIG_OPENCL
-#include "libavutil/opencl.h"
-#include "deshake_opencl_kernel.h"
-#include "unsharp_opencl_kernel.h"
-#endif
-
-#define OPENCL_REGISTER_KERNEL_CODE(X, x)                                              \
-    {                                                                                  \
-        if (CONFIG_##X##_FILTER) {                                                     \
-            av_opencl_register_kernel_code(ff_kernel_##x##_opencl);                    \
-        }                                                                              \
-    }
-
-void ff_opencl_register_filter_kernel_code_all(void)
-{
- #if CONFIG_OPENCL
-   OPENCL_REGISTER_KERNEL_CODE(DESHAKE,     deshake);
-   OPENCL_REGISTER_KERNEL_CODE(UNSHARP,     unsharp);
- #endif
-}

diff --git a/libavfilter/opencl_allkernels.h b/libavfilter/opencl_allkernels.h
deleted file mode 100644
index 57b650d..0000000
--- a/libavfilter/opencl_allkernels.h
+++ /dev/null

@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVFILTER_OPENCL_ALLKERNELS_H
-#define AVFILTER_OPENCL_ALLKERNELS_H
-
-#include "avfilter.h"
-#include "config.h"
-
-void ff_opencl_register_filter_kernel_code_all(void);
-
-#endif /* AVFILTER_OPENCL_ALLKERNELS_H */

diff --git a/libavfilter/avfiltergraph.h b/libavfilter/opencl_source.h
similarity index 63%
copy from libavfilter/avfiltergraph.h
copy to libavfilter/opencl_source.h
index b31d581..2f67d89 100644
--- a/libavfilter/avfiltergraph.h
+++ b/libavfilter/opencl_source.h

@@ -1,7 +1,4 @@
 /*
- * Filter graphs
- * copyright (c) 2007 Bobby Bingham
- *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -19,10 +16,15 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVFILTER_AVFILTERGRAPH_H
-#define AVFILTER_AVFILTERGRAPH_H
+#ifndef AVFILTER_OPENCL_SOURCE_H
+#define AVFILTER_OPENCL_SOURCE_H
 
-#include "avfilter.h"
-#include "libavutil/log.h"
+extern const char *ff_opencl_source_avgblur;
+extern const char *ff_opencl_source_colorspace_common;
+extern const char *ff_opencl_source_convolution;
+extern const char *ff_opencl_source_neighbor;
+extern const char *ff_opencl_source_overlay;
+extern const char *ff_opencl_source_tonemap;
+extern const char *ff_opencl_source_unsharp;
 
-#endif /* AVFILTER_AVFILTERGRAPH_H */
+#endif /* AVFILTER_OPENCL_SOURCE_H */

diff --git a/libavfilter/pthread.c b/libavfilter/pthread.c
index 567dd4c..7e37c73 100644
--- a/libavfilter/pthread.c
+++ b/libavfilter/pthread.c

@@ -85,10 +85,6 @@
 {
     int ret;
 
-#if HAVE_W32THREADS
-    w32thread_init();
-#endif
-
     if (graph->nb_threads == 1) {
         graph->thread_type = 0;
         return 0;

diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c
new file mode 100644
index 0000000..06efdf5
--- /dev/null
+++ b/libavfilter/qsvvpp.c

@@ -0,0 +1,738 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Intel Quick Sync Video VPP base function
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_qsv.h"
+#include "libavutil/time.h"
+#include "libavutil/pixdesc.h"
+
+#include "internal.h"
+#include "qsvvpp.h"
+#include "video.h"
+
+#define IS_VIDEO_MEMORY(mode)  (mode & (MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET | \
+                                        MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET))
+#define IS_OPAQUE_MEMORY(mode) (mode & MFX_MEMTYPE_OPAQUE_FRAME)
+#define IS_SYSTEM_MEMORY(mode) (mode & MFX_MEMTYPE_SYSTEM_MEMORY)
+
+typedef struct QSVFrame {
+    AVFrame          *frame;
+    mfxFrameSurface1 *surface;
+    mfxFrameSurface1  surface_internal;  /* for system memory */
+    struct QSVFrame  *next;
+} QSVFrame;
+
+/* abstract struct for all QSV filters */
+struct QSVVPPContext {
+    mfxSession          session;
+    int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/* callback */
+    enum AVPixelFormat  out_sw_format;   /* Real output format */
+    mfxVideoParam       vpp_param;
+    mfxFrameInfo       *frame_infos;     /* frame info for each input */
+
+    /* members related to the input/output surface */
+    int                 in_mem_mode;
+    int                 out_mem_mode;
+    QSVFrame           *in_frame_list;
+    QSVFrame           *out_frame_list;
+    int                 nb_surface_ptrs_in;
+    int                 nb_surface_ptrs_out;
+    mfxFrameSurface1  **surface_ptrs_in;
+    mfxFrameSurface1  **surface_ptrs_out;
+
+    /* MFXVPP extern parameters */
+    mfxExtOpaqueSurfaceAlloc opaque_alloc;
+    mfxExtBuffer      **ext_buffers;
+    int                 nb_ext_buffers;
+};
+
+static const mfxHandleType handle_types[] = {
+    MFX_HANDLE_VA_DISPLAY,
+    MFX_HANDLE_D3D9_DEVICE_MANAGER,
+    MFX_HANDLE_D3D11_DEVICE,
+};
+
+static const AVRational default_tb = { 1, 90000 };
+
+/* functions for frameAlloc */
+static mfxStatus frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *req,
+                             mfxFrameAllocResponse *resp)
+{
+    QSVVPPContext *s = pthis;
+    int i;
+
+    if (!(req->Type & MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET) ||
+        !(req->Type & (MFX_MEMTYPE_FROM_VPPIN | MFX_MEMTYPE_FROM_VPPOUT)) ||
+        !(req->Type & MFX_MEMTYPE_EXTERNAL_FRAME))
+        return MFX_ERR_UNSUPPORTED;
+
+    if (req->Type & MFX_MEMTYPE_FROM_VPPIN) {
+        resp->mids = av_mallocz(s->nb_surface_ptrs_in * sizeof(*resp->mids));
+        if (!resp->mids)
+            return AVERROR(ENOMEM);
+
+        for (i = 0; i < s->nb_surface_ptrs_in; i++)
+            resp->mids[i] = s->surface_ptrs_in[i]->Data.MemId;
+
+        resp->NumFrameActual = s->nb_surface_ptrs_in;
+    } else {
+        resp->mids = av_mallocz(s->nb_surface_ptrs_out * sizeof(*resp->mids));
+        if (!resp->mids)
+            return AVERROR(ENOMEM);
+
+        for (i = 0; i < s->nb_surface_ptrs_out; i++)
+            resp->mids[i] = s->surface_ptrs_out[i]->Data.MemId;
+
+        resp->NumFrameActual = s->nb_surface_ptrs_out;
+    }
+
+    return MFX_ERR_NONE;
+}
+
+static mfxStatus frame_free(mfxHDL pthis, mfxFrameAllocResponse *resp)
+{
+    av_freep(&resp->mids);
+    return MFX_ERR_NONE;
+}
+
+static mfxStatus frame_lock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr)
+{
+    return MFX_ERR_UNSUPPORTED;
+}
+
+static mfxStatus frame_unlock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr)
+{
+    return MFX_ERR_UNSUPPORTED;
+}
+
+static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl)
+{
+    *hdl = mid;
+    return MFX_ERR_NONE;
+}
+
+static int pix_fmt_to_mfx_fourcc(int format)
+{
+    switch (format) {
+    case AV_PIX_FMT_YUV420P:
+        return MFX_FOURCC_YV12;
+    case AV_PIX_FMT_NV12:
+        return MFX_FOURCC_NV12;
+    case AV_PIX_FMT_YUYV422:
+        return MFX_FOURCC_YUY2;
+    case AV_PIX_FMT_BGRA:
+        return MFX_FOURCC_RGB4;
+    }
+
+    return MFX_FOURCC_NV12;
+}
+
+static int map_frame_to_surface(AVFrame *frame, mfxFrameSurface1 *surface)
+{
+    switch (frame->format) {
+    case AV_PIX_FMT_NV12:
+        surface->Data.Y  = frame->data[0];
+        surface->Data.UV = frame->data[1];
+        break;
+    case AV_PIX_FMT_YUV420P:
+        surface->Data.Y = frame->data[0];
+        surface->Data.U = frame->data[1];
+        surface->Data.V = frame->data[2];
+        break;
+    case AV_PIX_FMT_YUYV422:
+        surface->Data.Y = frame->data[0];
+        surface->Data.U = frame->data[0] + 1;
+        surface->Data.V = frame->data[0] + 3;
+        break;
+    case AV_PIX_FMT_RGB32:
+        surface->Data.B = frame->data[0];
+        surface->Data.G = frame->data[0] + 1;
+        surface->Data.R = frame->data[0] + 2;
+        surface->Data.A = frame->data[0] + 3;
+        break;
+    default:
+        return MFX_ERR_UNSUPPORTED;
+    }
+    surface->Data.Pitch = frame->linesize[0];
+
+    return 0;
+}
+
+/* fill the surface info */
+static int fill_frameinfo_by_link(mfxFrameInfo *frameinfo, AVFilterLink *link)
+{
+    enum AVPixelFormat        pix_fmt;
+    AVHWFramesContext        *frames_ctx;
+    AVQSVFramesContext       *frames_hwctx;
+    const AVPixFmtDescriptor *desc;
+
+    if (link->format == AV_PIX_FMT_QSV) {
+        if (!link->hw_frames_ctx)
+            return AVERROR(EINVAL);
+
+        frames_ctx   = (AVHWFramesContext *)link->hw_frames_ctx->data;
+        frames_hwctx = frames_ctx->hwctx;
+        *frameinfo   = frames_hwctx->surfaces[0].Info;
+    } else {
+        pix_fmt = link->format;
+        desc = av_pix_fmt_desc_get(pix_fmt);
+        if (!desc)
+            return AVERROR_BUG;
+
+        frameinfo->CropX          = 0;
+        frameinfo->CropY          = 0;
+        frameinfo->Width          = FFALIGN(link->w, 32);
+        frameinfo->Height         = FFALIGN(link->h, 32);
+        frameinfo->PicStruct      = MFX_PICSTRUCT_PROGRESSIVE;
+        frameinfo->FourCC         = pix_fmt_to_mfx_fourcc(pix_fmt);
+        frameinfo->BitDepthLuma   = desc->comp[0].depth;
+        frameinfo->BitDepthChroma = desc->comp[0].depth;
+        frameinfo->Shift          = desc->comp[0].depth > 8;
+        if (desc->log2_chroma_w && desc->log2_chroma_h)
+            frameinfo->ChromaFormat = MFX_CHROMAFORMAT_YUV420;
+        else if (desc->log2_chroma_w)
+            frameinfo->ChromaFormat = MFX_CHROMAFORMAT_YUV422;
+        else
+            frameinfo->ChromaFormat = MFX_CHROMAFORMAT_YUV444;
+    }
+
+    frameinfo->CropW          = link->w;
+    frameinfo->CropH          = link->h;
+    frameinfo->FrameRateExtN  = link->frame_rate.num;
+    frameinfo->FrameRateExtD  = link->frame_rate.den;
+    frameinfo->AspectRatioW   = link->sample_aspect_ratio.num ? link->sample_aspect_ratio.num : 1;
+    frameinfo->AspectRatioH   = link->sample_aspect_ratio.den ? link->sample_aspect_ratio.den : 1;
+
+    return 0;
+}
+
+static void clear_unused_frames(QSVFrame *list)
+{
+    while (list) {
+        if (list->surface && !list->surface->Data.Locked) {
+            list->surface = NULL;
+            av_frame_free(&list->frame);
+        }
+        list = list->next;
+    }
+}
+
+static void clear_frame_list(QSVFrame **list)
+{
+    while (*list) {
+        QSVFrame *frame;
+
+        frame = *list;
+        *list = (*list)->next;
+        av_frame_free(&frame->frame);
+        av_freep(&frame);
+    }
+}
+
+static QSVFrame *get_free_frame(QSVFrame **list)
+{
+    QSVFrame *out = *list;
+
+    for (; out; out = out->next) {
+        if (!out->surface)
+            break;
+    }
+
+    if (!out) {
+        out = av_mallocz(sizeof(*out));
+        if (!out) {
+            av_log(NULL, AV_LOG_ERROR, "Can't alloc new output frame.\n");
+            return NULL;
+        }
+        out->next  = *list;
+        *list      = out;
+    }
+
+    return out;
+}
+
+/* get the input surface */
+static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picref)
+{
+    QSVFrame        *qsv_frame;
+    AVFilterContext *ctx = inlink->dst;
+
+    clear_unused_frames(s->in_frame_list);
+
+    qsv_frame = get_free_frame(&s->in_frame_list);
+    if (!qsv_frame)
+        return NULL;
+
+    /* Turn AVFrame into mfxFrameSurface1.
+     * For video/opaque memory mode, pix_fmt is AV_PIX_FMT_QSV, and
+     * mfxFrameSurface1 is stored in AVFrame->data[3];
+     * for system memory mode, raw video data is stored in
+     * AVFrame, we should map it into mfxFrameSurface1.
+     */
+    if (!IS_SYSTEM_MEMORY(s->in_mem_mode)) {
+        if (picref->format != AV_PIX_FMT_QSV) {
+            av_log(ctx, AV_LOG_ERROR, "QSVVPP gets a wrong frame.\n");
+            return NULL;
+        }
+        qsv_frame->frame   = av_frame_clone(picref);
+        qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame->data[3];
+    } else {
+        /* make a copy if the input is not padded as libmfx requires */
+        if (picref->height & 31 || picref->linesize[0] & 31) {
+            qsv_frame->frame = ff_get_video_buffer(inlink,
+                                                   FFALIGN(inlink->w, 32),
+                                                   FFALIGN(inlink->h, 32));
+            if (!qsv_frame->frame)
+                return NULL;
+
+            qsv_frame->frame->width   = picref->width;
+            qsv_frame->frame->height  = picref->height;
+
+            if (av_frame_copy(qsv_frame->frame, picref) < 0) {
+                av_frame_free(&qsv_frame->frame);
+                return NULL;
+            }
+
+            av_frame_copy_props(qsv_frame->frame, picref);
+            av_frame_free(&picref);
+        } else
+            qsv_frame->frame = av_frame_clone(picref);
+
+        if (map_frame_to_surface(qsv_frame->frame,
+                                &qsv_frame->surface_internal) < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n");
+            return NULL;
+        }
+        qsv_frame->surface = &qsv_frame->surface_internal;
+    }
+
+    qsv_frame->surface->Info           = s->frame_infos[FF_INLINK_IDX(inlink)];
+    qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts,
+                                                      inlink->time_base, default_tb);
+
+    qsv_frame->surface->Info.PicStruct =
+            !qsv_frame->frame->interlaced_frame ? MFX_PICSTRUCT_PROGRESSIVE :
+            (qsv_frame->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF :
+                                                 MFX_PICSTRUCT_FIELD_BFF);
+    if (qsv_frame->frame->repeat_pict == 1)
+        qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED;
+    else if (qsv_frame->frame->repeat_pict == 2)
+        qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING;
+    else if (qsv_frame->frame->repeat_pict == 4)
+        qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING;
+
+    return qsv_frame;
+}
+
+/* get the output surface */
+static QSVFrame *query_frame(QSVVPPContext *s, AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    QSVFrame        *out_frame;
+    int              ret;
+
+    clear_unused_frames(s->out_frame_list);
+
+    out_frame = get_free_frame(&s->out_frame_list);
+    if (!out_frame)
+        return NULL;
+
+    /* For video memory, get a hw frame;
+     * For system memory, get a sw frame and map it into a mfx_surface. */
+    if (!IS_SYSTEM_MEMORY(s->out_mem_mode)) {
+        out_frame->frame = av_frame_alloc();
+        if (!out_frame->frame)
+            return NULL;
+
+        ret = av_hwframe_get_buffer(outlink->hw_frames_ctx, out_frame->frame, 0);
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Can't allocate a surface.\n");
+            return NULL;
+        }
+
+        out_frame->surface = (mfxFrameSurface1 *)out_frame->frame->data[3];
+    } else {
+        /* Get a frame with aligned dimensions.
+         * Libmfx need system memory being 128x64 aligned */
+        out_frame->frame = ff_get_video_buffer(outlink,
+                                               FFALIGN(outlink->w, 128),
+                                               FFALIGN(outlink->h, 64));
+        if (!out_frame->frame)
+            return NULL;
+
+        out_frame->frame->width  = outlink->w;
+        out_frame->frame->height = outlink->h;
+
+        ret = map_frame_to_surface(out_frame->frame,
+                                  &out_frame->surface_internal);
+        if (ret < 0)
+            return NULL;
+
+        out_frame->surface = &out_frame->surface_internal;
+    }
+
+    out_frame->surface->Info = s->vpp_param.vpp.Out;
+
+    return out_frame;
+}
+
+/* create the QSV session */
+static int init_vpp_session(AVFilterContext *avctx, QSVVPPContext *s)
+{
+    AVFilterLink                 *inlink = avctx->inputs[0];
+    AVFilterLink                *outlink = avctx->outputs[0];
+    AVQSVFramesContext  *in_frames_hwctx = NULL;
+    AVQSVFramesContext *out_frames_hwctx = NULL;
+
+    AVBufferRef *device_ref;
+    AVHWDeviceContext *device_ctx;
+    AVQSVDeviceContext *device_hwctx;
+    mfxHDL handle;
+    mfxHandleType handle_type;
+    mfxVersion ver;
+    mfxIMPL impl;
+    int ret, i;
+
+    if (inlink->hw_frames_ctx) {
+        AVHWFramesContext *frames_ctx = (AVHWFramesContext *)inlink->hw_frames_ctx->data;
+
+        device_ref      = frames_ctx->device_ref;
+        in_frames_hwctx = frames_ctx->hwctx;
+
+        s->in_mem_mode = in_frames_hwctx->frame_type;
+
+        s->surface_ptrs_in = av_mallocz_array(in_frames_hwctx->nb_surfaces,
+                                              sizeof(*s->surface_ptrs_in));
+        if (!s->surface_ptrs_in)
+            return AVERROR(ENOMEM);
+
+        for (i = 0; i < in_frames_hwctx->nb_surfaces; i++)
+            s->surface_ptrs_in[i] = in_frames_hwctx->surfaces + i;
+
+        s->nb_surface_ptrs_in = in_frames_hwctx->nb_surfaces;
+    } else if (avctx->hw_device_ctx) {
+        device_ref     = avctx->hw_device_ctx;
+        s->in_mem_mode = MFX_MEMTYPE_SYSTEM_MEMORY;
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "No hw context provided.\n");
+        return AVERROR(EINVAL);
+    }
+
+    device_ctx   = (AVHWDeviceContext *)device_ref->data;
+    device_hwctx = device_ctx->hwctx;
+
+    if (outlink->format == AV_PIX_FMT_QSV) {
+        AVHWFramesContext *out_frames_ctx;
+        AVBufferRef *out_frames_ref = av_hwframe_ctx_alloc(device_ref);
+        if (!out_frames_ref)
+            return AVERROR(ENOMEM);
+
+        s->out_mem_mode = IS_OPAQUE_MEMORY(s->in_mem_mode) ?
+                          MFX_MEMTYPE_OPAQUE_FRAME :
+                          MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET;
+
+        out_frames_ctx   = (AVHWFramesContext *)out_frames_ref->data;
+        out_frames_hwctx = out_frames_ctx->hwctx;
+
+        out_frames_ctx->format            = AV_PIX_FMT_QSV;
+        out_frames_ctx->width             = FFALIGN(outlink->w, 32);
+        out_frames_ctx->height            = FFALIGN(outlink->h, 32);
+        out_frames_ctx->sw_format         = s->out_sw_format;
+        out_frames_ctx->initial_pool_size = 64;
+        out_frames_hwctx->frame_type      = s->out_mem_mode;
+
+        ret = av_hwframe_ctx_init(out_frames_ref);
+        if (ret < 0) {
+            av_buffer_unref(&out_frames_ref);
+            av_log(avctx, AV_LOG_ERROR, "Error creating frames_ctx for output pad.\n");
+            return ret;
+        }
+
+        s->surface_ptrs_out = av_mallocz_array(out_frames_hwctx->nb_surfaces,
+                                               sizeof(*s->surface_ptrs_out));
+        if (!s->surface_ptrs_out) {
+            av_buffer_unref(&out_frames_ref);
+            return AVERROR(ENOMEM);
+        }
+
+        for (i = 0; i < out_frames_hwctx->nb_surfaces; i++)
+            s->surface_ptrs_out[i] = out_frames_hwctx->surfaces + i;
+        s->nb_surface_ptrs_out = out_frames_hwctx->nb_surfaces;
+
+        av_buffer_unref(&outlink->hw_frames_ctx);
+        outlink->hw_frames_ctx = out_frames_ref;
+    } else
+        s->out_mem_mode = MFX_MEMTYPE_SYSTEM_MEMORY;
+
+    /* extract the properties of the "master" session given to us */
+    ret = MFXQueryIMPL(device_hwctx->session, &impl);
+    if (ret == MFX_ERR_NONE)
+        ret = MFXQueryVersion(device_hwctx->session, &ver);
+    if (ret != MFX_ERR_NONE) {
+        av_log(avctx, AV_LOG_ERROR, "Error querying the session attributes\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    for (i = 0; i < FF_ARRAY_ELEMS(handle_types); i++) {
+        ret = MFXVideoCORE_GetHandle(device_hwctx->session, handle_types[i], &handle);
+        if (ret == MFX_ERR_NONE) {
+            handle_type = handle_types[i];
+            break;
+        }
+    }
+
+    if (ret != MFX_ERR_NONE) {
+        av_log(avctx, AV_LOG_ERROR, "Error getting the session handle\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    /* create a "slave" session with those same properties, to be used for vpp */
+    ret = MFXInit(impl, &ver, &s->session);
+    if (ret != MFX_ERR_NONE) {
+        av_log(avctx, AV_LOG_ERROR, "Error initializing a session for scaling\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    if (handle) {
+        ret = MFXVideoCORE_SetHandle(s->session, handle_type, handle);
+        if (ret != MFX_ERR_NONE)
+            return AVERROR_UNKNOWN;
+    }
+
+    if (QSV_RUNTIME_VERSION_ATLEAST(ver, 1, 25)) {
+        ret = MFXJoinSession(device_hwctx->session, s->session);
+        if (ret != MFX_ERR_NONE)
+            return AVERROR_UNKNOWN;
+    }
+
+    if (IS_OPAQUE_MEMORY(s->in_mem_mode) || IS_OPAQUE_MEMORY(s->out_mem_mode)) {
+        s->opaque_alloc.In.Surfaces   = s->surface_ptrs_in;
+        s->opaque_alloc.In.NumSurface = s->nb_surface_ptrs_in;
+        s->opaque_alloc.In.Type       = s->in_mem_mode;
+
+        s->opaque_alloc.Out.Surfaces   = s->surface_ptrs_out;
+        s->opaque_alloc.Out.NumSurface = s->nb_surface_ptrs_out;
+        s->opaque_alloc.Out.Type       = s->out_mem_mode;
+
+        s->opaque_alloc.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION;
+        s->opaque_alloc.Header.BufferSz = sizeof(s->opaque_alloc);
+    } else if (IS_VIDEO_MEMORY(s->in_mem_mode) || IS_VIDEO_MEMORY(s->out_mem_mode)) {
+        mfxFrameAllocator frame_allocator = {
+            .pthis  = s,
+            .Alloc  = frame_alloc,
+            .Lock   = frame_lock,
+            .Unlock = frame_unlock,
+            .GetHDL = frame_get_hdl,
+            .Free   = frame_free,
+        };
+
+        ret = MFXVideoCORE_SetFrameAllocator(s->session, &frame_allocator);
+        if (ret != MFX_ERR_NONE)
+            return AVERROR_UNKNOWN;
+    }
+
+    return 0;
+}
+
+int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *param)
+{
+    int i;
+    int ret;
+    QSVVPPContext *s;
+
+    s = av_mallocz(sizeof(*s));
+    if (!s)
+        return AVERROR(ENOMEM);
+
+    s->filter_frame  = param->filter_frame;
+    if (!s->filter_frame)
+        s->filter_frame = ff_filter_frame;
+    s->out_sw_format = param->out_sw_format;
+
+    /* create the vpp session */
+    ret = init_vpp_session(avctx, s);
+    if (ret < 0)
+        goto failed;
+
+    s->frame_infos = av_mallocz_array(avctx->nb_inputs, sizeof(*s->frame_infos));
+    if (!s->frame_infos) {
+        ret = AVERROR(ENOMEM);
+        goto failed;
+    }
+
+    /* Init each input's information */
+    for (i = 0; i < avctx->nb_inputs; i++) {
+        ret = fill_frameinfo_by_link(&s->frame_infos[i], avctx->inputs[i]);
+        if (ret < 0)
+            goto failed;
+    }
+
+    /* Update input's frame info according to crop */
+    for (i = 0; i < param->num_crop; i++) {
+        QSVVPPCrop *crop = param->crop + i;
+        if (crop->in_idx > avctx->nb_inputs) {
+            ret = AVERROR(EINVAL);
+            goto failed;
+        }
+        s->frame_infos[crop->in_idx].CropX = crop->x;
+        s->frame_infos[crop->in_idx].CropY = crop->y;
+        s->frame_infos[crop->in_idx].CropW = crop->w;
+        s->frame_infos[crop->in_idx].CropH = crop->h;
+    }
+
+    s->vpp_param.vpp.In = s->frame_infos[0];
+
+    ret = fill_frameinfo_by_link(&s->vpp_param.vpp.Out, avctx->outputs[0]);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Fail to get frame info from link.\n");
+        goto failed;
+    }
+
+    if (IS_OPAQUE_MEMORY(s->in_mem_mode) || IS_OPAQUE_MEMORY(s->out_mem_mode)) {
+        s->nb_ext_buffers = param->num_ext_buf + 1;
+        s->ext_buffers = av_mallocz_array(s->nb_ext_buffers, sizeof(*s->ext_buffers));
+        if (!s->ext_buffers) {
+            ret = AVERROR(ENOMEM);
+            goto failed;
+        }
+
+        s->ext_buffers[0] = (mfxExtBuffer *)&s->opaque_alloc;
+        for (i = 1; i < param->num_ext_buf; i++)
+            s->ext_buffers[i]    = param->ext_buf[i - 1];
+        s->vpp_param.ExtParam    = s->ext_buffers;
+        s->vpp_param.NumExtParam = s->nb_ext_buffers;
+    } else {
+        s->vpp_param.NumExtParam = param->num_ext_buf;
+        s->vpp_param.ExtParam    = param->ext_buf;
+    }
+
+    s->vpp_param.AsyncDepth = 1;
+
+    if (IS_SYSTEM_MEMORY(s->in_mem_mode))
+        s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY;
+    else if (IS_VIDEO_MEMORY(s->in_mem_mode))
+        s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_VIDEO_MEMORY;
+    else if (IS_OPAQUE_MEMORY(s->in_mem_mode))
+        s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_OPAQUE_MEMORY;
+
+    if (IS_SYSTEM_MEMORY(s->out_mem_mode))
+        s->vpp_param.IOPattern |= MFX_IOPATTERN_OUT_SYSTEM_MEMORY;
+    else if (IS_VIDEO_MEMORY(s->out_mem_mode))
+        s->vpp_param.IOPattern |= MFX_IOPATTERN_OUT_VIDEO_MEMORY;
+    else if (IS_OPAQUE_MEMORY(s->out_mem_mode))
+        s->vpp_param.IOPattern |= MFX_IOPATTERN_OUT_OPAQUE_MEMORY;
+
+    ret = MFXVideoVPP_Init(s->session, &s->vpp_param);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create a qsvvpp, ret = %d.\n", ret);
+        goto failed;
+    }
+
+    *vpp = s;
+    return 0;
+
+failed:
+    ff_qsvvpp_free(&s);
+
+    return ret;
+}
+
+int ff_qsvvpp_free(QSVVPPContext **vpp)
+{
+    QSVVPPContext *s = *vpp;
+
+    if (!s)
+        return 0;
+
+    if (s->session) {
+        MFXVideoVPP_Close(s->session);
+        MFXClose(s->session);
+    }
+
+    /* release all the resources */
+    clear_frame_list(&s->in_frame_list);
+    clear_frame_list(&s->out_frame_list);
+    av_freep(&s->surface_ptrs_in);
+    av_freep(&s->surface_ptrs_out);
+    av_freep(&s->ext_buffers);
+    av_freep(&s->frame_infos);
+    av_freep(vpp);
+
+    return 0;
+}
+
+int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picref)
+{
+    AVFilterContext  *ctx     = inlink->dst;
+    AVFilterLink     *outlink = ctx->outputs[0];
+    mfxSyncPoint      sync;
+    QSVFrame         *in_frame, *out_frame;
+    int               ret, filter_ret;
+
+    in_frame = submit_frame(s, inlink, picref);
+    if (!in_frame) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on input[%d]\n",
+               FF_INLINK_IDX(inlink));
+        return AVERROR(ENOMEM);
+    }
+
+    do {
+        out_frame = query_frame(s, outlink);
+        if (!out_frame) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to query an output frame.\n");
+            return AVERROR(ENOMEM);
+        }
+
+        do {
+            ret = MFXVideoVPP_RunFrameVPPAsync(s->session, in_frame->surface,
+                                               out_frame->surface, NULL, &sync);
+            if (ret == MFX_WRN_DEVICE_BUSY)
+                av_usleep(500);
+        } while (ret == MFX_WRN_DEVICE_BUSY);
+
+        if (ret < 0 && ret != MFX_ERR_MORE_SURFACE) {
+            /* Ignore more_data error */
+            if (ret == MFX_ERR_MORE_DATA)
+                ret = AVERROR(EAGAIN);
+            break;
+        }
+
+        if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
+            av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
+
+        out_frame->frame->pts = av_rescale_q(out_frame->surface->Data.TimeStamp,
+                                             default_tb, outlink->time_base);
+
+        filter_ret = s->filter_frame(outlink, out_frame->frame);
+        if (filter_ret < 0) {
+            av_frame_free(&out_frame->frame);
+            ret = filter_ret;
+            break;
+        }
+        out_frame->frame = NULL;
+    } while(ret == MFX_ERR_MORE_SURFACE);
+
+    return ret;
+}

diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h
new file mode 100644
index 0000000..ff02b64
--- /dev/null
+++ b/libavfilter/qsvvpp.h

@@ -0,0 +1,74 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Intel Quick Sync Video VPP base function
+ */
+
+#ifndef AVFILTER_QSVVPP_H
+#define AVFILTER_QSVVPP_H
+
+#include <mfx/mfxvideo.h>
+
+#include "avfilter.h"
+
+#define FF_INLINK_IDX(link)  ((int)((link)->dstpad - (link)->dst->input_pads))
+#define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src->output_pads))
+
+#define QSV_VERSION_ATLEAST(MAJOR, MINOR)   \
+    (MFX_VERSION_MAJOR > (MAJOR) ||         \
+     MFX_VERSION_MAJOR == (MAJOR) && MFX_VERSION_MINOR >= (MINOR))
+
+#define QSV_RUNTIME_VERSION_ATLEAST(MFX_VERSION, MAJOR, MINOR) \
+    (MFX_VERSION.Major > (MAJOR)) ||                           \
+    (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= (MINOR))
+
+typedef struct QSVVPPContext QSVVPPContext;
+
+typedef struct QSVVPPCrop {
+    int in_idx;        ///< Input index
+    int x, y, w, h;    ///< Crop rectangle
+} QSVVPPCrop;
+
+typedef struct QSVVPPParam {
+    /* default is ff_filter_frame */
+    int (*filter_frame)(AVFilterLink *outlink, AVFrame *frame);
+
+    /* To fill with MFX enhanced filter configurations */
+    int num_ext_buf;
+    mfxExtBuffer **ext_buf;
+
+    /* Real output format */
+    enum AVPixelFormat out_sw_format;
+
+    /* Crop information for each input, if needed */
+    int num_crop;
+    QSVVPPCrop *crop;
+} QSVVPPParam;
+
+/* create and initialize the QSV session */
+int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *param);
+
+/* release the resources (eg.surfaces) */
+int ff_qsvvpp_free(QSVVPPContext **vpp);
+
+/* vpp filter frame and call the cb if needed */
+int ff_qsvvpp_filter_frame(QSVVPPContext *vpp, AVFilterLink *inlink, AVFrame *frame);
+
+#endif /* AVFILTER_QSVVPP_H */

diff --git a/libavfilter/setpts.c b/libavfilter/setpts.c
index 4505498..800ba6a 100644
--- a/libavfilter/setpts.c
+++ b/libavfilter/setpts.c

@@ -57,6 +57,7 @@
     "RTCSTART",    ///< wallclock (RTC) time at the start of the movie in micro seconds
     "S",           //   Number of samples in the current frame
     "SR",          //   Audio sample rate
+    "FR",          ///< defined only for constant frame-rate video
     NULL
 };
 
@@ -81,6 +82,7 @@
     VAR_RTCSTART,
     VAR_S,
     VAR_SR,
+    VAR_FR,
     VAR_VARS_NB
 };
 
@@ -127,7 +129,8 @@
     setpts->var_values[VAR_SAMPLE_RATE] =
         setpts->type == AVMEDIA_TYPE_AUDIO ? inlink->sample_rate : NAN;
 
-    setpts->var_values[VAR_FRAME_RATE] = inlink->frame_rate.num &&
+    setpts->var_values[VAR_FRAME_RATE] =
+    setpts->var_values[VAR_FR] =         inlink->frame_rate.num &&
                                          inlink->frame_rate.den ?
                                             av_q2d(inlink->frame_rate) : NAN;
 

diff --git a/libavfilter/src_movie.c b/libavfilter/src_movie.c
index 258ba50..bcabfcc 100644
--- a/libavfilter/src_movie.c
+++ b/libavfilter/src_movie.c

@@ -239,8 +239,6 @@
         return AVERROR_PATCHWELCOME;
     }
 
-    av_register_all();
-
     // Try to find the movie format (container)
     iformat = movie->format_name ? av_find_input_format(movie->format_name) : NULL;
 

diff --git a/libavfilter/tests/filtfmts.c b/libavfilter/tests/filtfmts.c
index 199d74d..317df86 100644
--- a/libavfilter/tests/filtfmts.c
+++ b/libavfilter/tests/filtfmts.c

@@ -73,7 +73,7 @@
 
 int main(int argc, char **argv)
 {
-    AVFilter *filter;
+    const AVFilter *filter;
     AVFilterContext *filter_ctx;
     AVFilterGraph *graph_ctx;
     const char *filter_name;
@@ -97,8 +97,6 @@
     if (!graph_ctx)
         return 1;
 
-    avfilter_register_all();
-
     /* get a corresponding filter and open it */
     if (!(filter = avfilter_get_by_name(filter_name))) {
         fprintf(stderr, "Unrecognized filter with name '%s'\n", filter_name);
@@ -140,9 +138,9 @@
     }
 
     if (filter->query_formats)
-        filter->query_formats(filter_ctx);
+        ret = filter->query_formats(filter_ctx);
     else
-        ff_default_query_formats(filter_ctx);
+        ret = ff_default_query_formats(filter_ctx);
 
     print_formats(filter_ctx);
 

diff --git a/libavfilter/tests/integral.c b/libavfilter/tests/integral.c
index 049fefa..2a8e8ff 100644
--- a/libavfilter/tests/integral.c
+++ b/libavfilter/tests/integral.c

@@ -57,6 +57,10 @@
     uint32_t *ii_start  = ii  + ii_lz_32 + 1; // skip top 0-line and left 0-column
     uint32_t *ii_start2 = ii2 + ii_lz_32 + 1; // skip top 0-line and left 0-column
 
+    NLMeansDSPContext dsp = {0};
+
+    ff_nlmeans_init(&dsp);
+
     if (!ii || !ii2)
         return -1;
 
@@ -64,7 +68,7 @@
         for (xoff = -e; xoff <= e; xoff++) {
             printf("xoff=%d yoff=%d\n", xoff, yoff);
 
-            compute_ssd_integral_image(ii_start, ii_lz_32,
+            compute_ssd_integral_image(&dsp, ii_start, ii_lz_32,
                                        src, lz, xoff, yoff, e, w, h);
             display_integral(ii_start, ii_w, ii_h, ii_lz_32);
 

diff --git a/libavfilter/threshold.h b/libavfilter/threshold.h
new file mode 100644
index 0000000..775a9f9
--- /dev/null
+++ b/libavfilter/threshold.h

@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_THRESHOLD_H
+#define AVFILTER_THRESHOLD_H
+
+#include "avfilter.h"
+#include "framesync.h"
+
+typedef struct ThresholdContext {
+    const AVClass *class;
+
+    int depth;
+    int planes;
+    int bpc;
+
+    int nb_planes;
+    int width[4], height[4];
+
+    void (*threshold)(const uint8_t *in, const uint8_t *threshold,
+                      const uint8_t *min, const uint8_t *max,
+                      uint8_t *out,
+                      ptrdiff_t ilinesize, ptrdiff_t tlinesize,
+                      ptrdiff_t flinesize, ptrdiff_t slinesize,
+                      ptrdiff_t olinesize,
+                      int w, int h);
+
+    AVFrame *frames[4];
+    FFFrameSync fs;
+} ThresholdContext;
+
+void ff_threshold_init(ThresholdContext *s);
+void ff_threshold_init_x86(ThresholdContext *s);
+
+#endif /* AVFILTER_THRESHOLD_H */

diff --git a/libavfilter/unsharp.h b/libavfilter/unsharp.h
index 340a6a0..caff986 100644
--- a/libavfilter/unsharp.h
+++ b/libavfilter/unsharp.h

@@ -24,38 +24,10 @@
 
 #include "config.h"
 #include "avfilter.h"
-#if CONFIG_OPENCL
-#include "libavutil/opencl.h"
-#endif
 
 #define MIN_MATRIX_SIZE 3
 #define MAX_MATRIX_SIZE 63
 
-#if CONFIG_OPENCL
-
-typedef struct UnsharpOpenclContext {
-    cl_command_queue command_queue;
-    cl_program program;
-    cl_kernel kernel_default;
-    cl_kernel kernel_luma;
-    cl_kernel kernel_chroma;
-    cl_mem cl_luma_mask;
-    cl_mem cl_chroma_mask;
-    cl_mem cl_luma_mask_x;
-    cl_mem cl_chroma_mask_x;
-    cl_mem cl_luma_mask_y;
-    cl_mem cl_chroma_mask_y;
-    int in_plane_size[8];
-    int out_plane_size[8];
-    int plane_num;
-    cl_mem cl_inbuf;
-    size_t cl_inbuf_size;
-    cl_mem cl_outbuf;
-    size_t cl_outbuf_size;
-    int use_fast_kernels;
-} UnsharpOpenclContext;
-
-#endif
 
 typedef struct UnsharpFilterParam {
     int msize_x;                             ///< matrix width
@@ -76,9 +48,6 @@
     UnsharpFilterParam chroma; ///< chroma parameters (width, height, amount)
     int hsub, vsub;
     int opencl;
-#if CONFIG_OPENCL
-    UnsharpOpenclContext opencl_ctx;
-#endif
     int (* apply_unsharp)(AVFilterContext *ctx, AVFrame *in, AVFrame *out);
 } UnsharpContext;
 

diff --git a/libavfilter/unsharp_opencl.c b/libavfilter/unsharp_opencl.c
deleted file mode 100644
index 1545455..0000000
--- a/libavfilter/unsharp_opencl.c
+++ /dev/null

@@ -1,422 +0,0 @@
-/*
- * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
- * Copyright (C) 2013 Lenny Wang
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * unsharp input video
- */
-
-#include "unsharp_opencl.h"
-#include "libavutil/common.h"
-#include "libavutil/opencl_internal.h"
-
-#define PLANE_NUM 3
-#define ROUND_TO_16(a) (((((a) - 1)/16)+1)*16)
-
-static inline void add_mask_counter(uint32_t *dst, uint32_t *counter1, uint32_t *counter2, int len)
-{
-    int i;
-    for (i = 0; i < len; i++) {
-        dst[i] = counter1[i] + counter2[i];
-    }
-}
-
-static int compute_mask(int step, uint32_t *mask)
-{
-    int i, z, ret = 0;
-    int counter_size = sizeof(uint32_t) * (2 * step + 1);
-    uint32_t *temp1_counter, *temp2_counter, **counter = NULL;
-    temp1_counter = av_mallocz(counter_size);
-    if (!temp1_counter) {
-        ret = AVERROR(ENOMEM);
-        goto end;
-    }
-    temp2_counter = av_mallocz(counter_size);
-    if (!temp2_counter) {
-        ret = AVERROR(ENOMEM);
-        goto end;
-    }
-    counter = av_mallocz_array(2 * step + 1, sizeof(uint32_t *));
-    if (!counter) {
-        ret = AVERROR(ENOMEM);
-        goto end;
-    }
-    for (i = 0; i < 2 * step + 1; i++) {
-        counter[i] = av_mallocz(counter_size);
-        if (!counter[i]) {
-            ret = AVERROR(ENOMEM);
-            goto end;
-        }
-    }
-    for (i = 0; i < 2 * step + 1; i++) {
-        memset(temp1_counter, 0, counter_size);
-        temp1_counter[i] = 1;
-        for (z = 0; z < step * 2; z += 2) {
-            add_mask_counter(temp2_counter, counter[z], temp1_counter, step * 2);
-            memcpy(counter[z], temp1_counter, counter_size);
-            add_mask_counter(temp1_counter, counter[z + 1], temp2_counter, step * 2);
-            memcpy(counter[z + 1], temp2_counter, counter_size);
-        }
-    }
-    memcpy(mask, temp1_counter, counter_size);
-end:
-    av_freep(&temp1_counter);
-    av_freep(&temp2_counter);
-    for (i = 0; counter && i < 2 * step + 1; i++) {
-        av_freep(&counter[i]);
-    }
-    av_freep(&counter);
-    return ret;
-}
-
-static int copy_separable_masks(cl_mem cl_mask_x, cl_mem cl_mask_y, int step_x, int step_y)
-{
-    int ret = 0;
-    uint32_t *mask_x, *mask_y;
-    size_t size_mask_x = sizeof(uint32_t) * (2 * step_x + 1);
-    size_t size_mask_y = sizeof(uint32_t) * (2 * step_y + 1);
-    mask_x = av_mallocz_array(2 * step_x + 1, sizeof(uint32_t));
-    if (!mask_x) {
-        ret = AVERROR(ENOMEM);
-        goto end;
-    }
-    mask_y = av_mallocz_array(2 * step_y + 1, sizeof(uint32_t));
-    if (!mask_y) {
-        ret = AVERROR(ENOMEM);
-        goto end;
-    }
-
-    ret = compute_mask(step_x, mask_x);
-    if (ret < 0)
-        goto end;
-    ret = compute_mask(step_y, mask_y);
-    if (ret < 0)
-        goto end;
-
-    ret = av_opencl_buffer_write(cl_mask_x, (uint8_t *)mask_x, size_mask_x);
-    ret = av_opencl_buffer_write(cl_mask_y, (uint8_t *)mask_y, size_mask_y);
-end:
-    av_freep(&mask_x);
-    av_freep(&mask_y);
-
-    return ret;
-}
-
-static int generate_mask(AVFilterContext *ctx)
-{
-    cl_mem masks[4];
-    cl_mem mask_matrix[2];
-    int i, ret = 0, step_x[2], step_y[2];
-
-    UnsharpContext *unsharp = ctx->priv;
-    mask_matrix[0] = unsharp->opencl_ctx.cl_luma_mask;
-    mask_matrix[1] = unsharp->opencl_ctx.cl_chroma_mask;
-    masks[0] = unsharp->opencl_ctx.cl_luma_mask_x;
-    masks[1] = unsharp->opencl_ctx.cl_luma_mask_y;
-    masks[2] = unsharp->opencl_ctx.cl_chroma_mask_x;
-    masks[3] = unsharp->opencl_ctx.cl_chroma_mask_y;
-    step_x[0] = unsharp->luma.steps_x;
-    step_x[1] = unsharp->chroma.steps_x;
-    step_y[0] = unsharp->luma.steps_y;
-    step_y[1] = unsharp->chroma.steps_y;
-
-    /* use default kernel if any matrix dim larger than 8 due to limited local mem size */
-    if (step_x[0]>8 || step_x[1]>8 || step_y[0]>8 || step_y[1]>8)
-        unsharp->opencl_ctx.use_fast_kernels = 0;
-    else
-        unsharp->opencl_ctx.use_fast_kernels = 1;
-
-    if (!masks[0] || !masks[1] || !masks[2] || !masks[3]) {
-        av_log(ctx, AV_LOG_ERROR, "Luma mask and chroma mask should not be NULL\n");
-        return AVERROR(EINVAL);
-    }
-    if (!mask_matrix[0] || !mask_matrix[1]) {
-        av_log(ctx, AV_LOG_ERROR, "Luma mask and chroma mask should not be NULL\n");
-        return AVERROR(EINVAL);
-    }
-    for (i = 0; i < 2; i++) {
-        ret = copy_separable_masks(masks[2*i], masks[2*i+1], step_x[i], step_y[i]);
-        if (ret < 0)
-            return ret;
-    }
-    return ret;
-}
-
-int ff_opencl_apply_unsharp(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
-{
-    int ret;
-    AVFilterLink *link = ctx->inputs[0];
-    UnsharpContext *unsharp = ctx->priv;
-    cl_int status;
-    FFOpenclParam kernel1 = {0};
-    FFOpenclParam kernel2 = {0};
-    int width = link->w;
-    int height = link->h;
-    int cw = AV_CEIL_RSHIFT(link->w, unsharp->hsub);
-    int ch = AV_CEIL_RSHIFT(link->h, unsharp->vsub);
-    size_t globalWorkSize1d = width * height + 2 * ch * cw;
-    size_t globalWorkSize2dLuma[2];
-    size_t globalWorkSize2dChroma[2];
-    size_t localWorkSize2d[2] = {16, 16};
-
-    if (unsharp->opencl_ctx.use_fast_kernels) {
-        globalWorkSize2dLuma[0] = (size_t)ROUND_TO_16(width);
-        globalWorkSize2dLuma[1] = (size_t)ROUND_TO_16(height);
-        globalWorkSize2dChroma[0] = (size_t)ROUND_TO_16(cw);
-        globalWorkSize2dChroma[1] = (size_t)(2*ROUND_TO_16(ch));
-
-        kernel1.ctx = ctx;
-        kernel1.kernel = unsharp->opencl_ctx.kernel_luma;
-        ret = avpriv_opencl_set_parameter(&kernel1,
-                                      FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
-                                      FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
-                                      FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_luma_mask_x),
-                                      FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_luma_mask_y),
-                                      FF_OPENCL_PARAM_INFO(unsharp->luma.amount),
-                                      FF_OPENCL_PARAM_INFO(unsharp->luma.scalebits),
-                                      FF_OPENCL_PARAM_INFO(unsharp->luma.halfscale),
-                                      FF_OPENCL_PARAM_INFO(in->linesize[0]),
-                                      FF_OPENCL_PARAM_INFO(out->linesize[0]),
-                                      FF_OPENCL_PARAM_INFO(width),
-                                      FF_OPENCL_PARAM_INFO(height),
-                                      NULL);
-        if (ret < 0)
-            return ret;
-
-        kernel2.ctx = ctx;
-        kernel2.kernel = unsharp->opencl_ctx.kernel_chroma;
-        ret = avpriv_opencl_set_parameter(&kernel2,
-                                      FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
-                                      FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
-                                      FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_chroma_mask_x),
-                                      FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_chroma_mask_y),
-                                      FF_OPENCL_PARAM_INFO(unsharp->chroma.amount),
-                                      FF_OPENCL_PARAM_INFO(unsharp->chroma.scalebits),
-                                      FF_OPENCL_PARAM_INFO(unsharp->chroma.halfscale),
-                                      FF_OPENCL_PARAM_INFO(in->linesize[0]),
-                                      FF_OPENCL_PARAM_INFO(in->linesize[1]),
-                                      FF_OPENCL_PARAM_INFO(out->linesize[0]),
-                                      FF_OPENCL_PARAM_INFO(out->linesize[1]),
-                                      FF_OPENCL_PARAM_INFO(link->w),
-                                      FF_OPENCL_PARAM_INFO(link->h),
-                                      FF_OPENCL_PARAM_INFO(cw),
-                                      FF_OPENCL_PARAM_INFO(ch),
-                                      NULL);
-        if (ret < 0)
-            return ret;
-        status = clEnqueueNDRangeKernel(unsharp->opencl_ctx.command_queue,
-                                        unsharp->opencl_ctx.kernel_luma, 2, NULL,
-                                        globalWorkSize2dLuma, localWorkSize2d, 0, NULL, NULL);
-        status |=clEnqueueNDRangeKernel(unsharp->opencl_ctx.command_queue,
-                                        unsharp->opencl_ctx.kernel_chroma, 2, NULL,
-                                        globalWorkSize2dChroma, localWorkSize2d, 0, NULL, NULL);
-        if (status != CL_SUCCESS) {
-            av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status));
-            return AVERROR_EXTERNAL;
-        }
-    } else {    /* use default kernel */
-        kernel1.ctx = ctx;
-        kernel1.kernel = unsharp->opencl_ctx.kernel_default;
-
-        ret = avpriv_opencl_set_parameter(&kernel1,
-                                      FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
-                                      FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
-                                      FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_luma_mask),
-                                      FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_chroma_mask),
-                                      FF_OPENCL_PARAM_INFO(unsharp->luma.amount),
-                                      FF_OPENCL_PARAM_INFO(unsharp->chroma.amount),
-                                      FF_OPENCL_PARAM_INFO(unsharp->luma.steps_x),
-                                      FF_OPENCL_PARAM_INFO(unsharp->luma.steps_y),
-                                      FF_OPENCL_PARAM_INFO(unsharp->chroma.steps_x),
-                                      FF_OPENCL_PARAM_INFO(unsharp->chroma.steps_y),
-                                      FF_OPENCL_PARAM_INFO(unsharp->luma.scalebits),
-                                      FF_OPENCL_PARAM_INFO(unsharp->chroma.scalebits),
-                                      FF_OPENCL_PARAM_INFO(unsharp->luma.halfscale),
-                                      FF_OPENCL_PARAM_INFO(unsharp->chroma.halfscale),
-                                      FF_OPENCL_PARAM_INFO(in->linesize[0]),
-                                      FF_OPENCL_PARAM_INFO(in->linesize[1]),
-                                      FF_OPENCL_PARAM_INFO(out->linesize[0]),
-                                      FF_OPENCL_PARAM_INFO(out->linesize[1]),
-                                      FF_OPENCL_PARAM_INFO(link->h),
-                                      FF_OPENCL_PARAM_INFO(link->w),
-                                      FF_OPENCL_PARAM_INFO(ch),
-                                      FF_OPENCL_PARAM_INFO(cw),
-                                      NULL);
-        if (ret < 0)
-            return ret;
-        status = clEnqueueNDRangeKernel(unsharp->opencl_ctx.command_queue,
-                                        unsharp->opencl_ctx.kernel_default, 1, NULL,
-                                        &globalWorkSize1d, NULL, 0, NULL, NULL);
-        if (status != CL_SUCCESS) {
-            av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status));
-            return AVERROR_EXTERNAL;
-        }
-    }
-    //blocking map is suffficient, no need for clFinish
-    //clFinish(unsharp->opencl_ctx.command_queue);
-
-    return av_opencl_buffer_read_image(out->data, unsharp->opencl_ctx.out_plane_size,
-                                       unsharp->opencl_ctx.plane_num, unsharp->opencl_ctx.cl_outbuf,
-                                       unsharp->opencl_ctx.cl_outbuf_size);
-}
-
-int ff_opencl_unsharp_init(AVFilterContext *ctx)
-{
-    int ret = 0;
-    char build_opts[96];
-    UnsharpContext *unsharp = ctx->priv;
-    ret = av_opencl_init(NULL);
-    if (ret < 0)
-        return ret;
-    ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_luma_mask,
-                                  sizeof(uint32_t) * (2 * unsharp->luma.steps_x + 1) * (2 * unsharp->luma.steps_y + 1),
-                                  CL_MEM_READ_ONLY, NULL);
-    if (ret < 0)
-        return ret;
-    ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_chroma_mask,
-                                  sizeof(uint32_t) * (2 * unsharp->chroma.steps_x + 1) * (2 * unsharp->chroma.steps_y + 1),
-                                  CL_MEM_READ_ONLY, NULL);
-    // separable filters
-    if (ret < 0)
-        return ret;
-    ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_luma_mask_x,
-                                  sizeof(uint32_t) * (2 * unsharp->luma.steps_x + 1),
-                                  CL_MEM_READ_ONLY, NULL);
-    if (ret < 0)
-        return ret;
-    ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_luma_mask_y,
-                                  sizeof(uint32_t) * (2 * unsharp->luma.steps_y + 1),
-                                  CL_MEM_READ_ONLY, NULL);
-    if (ret < 0)
-        return ret;
-    ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_chroma_mask_x,
-                                  sizeof(uint32_t) * (2 * unsharp->chroma.steps_x + 1),
-                                  CL_MEM_READ_ONLY, NULL);
-    if (ret < 0)
-        return ret;
-    ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_chroma_mask_y,
-                                  sizeof(uint32_t) * (2 * unsharp->chroma.steps_y + 1),
-                                  CL_MEM_READ_ONLY, NULL);
-    if (ret < 0)
-        return ret;
-    ret = generate_mask(ctx);
-    if (ret < 0)
-        return ret;
-    unsharp->opencl_ctx.plane_num = PLANE_NUM;
-    unsharp->opencl_ctx.command_queue = av_opencl_get_command_queue();
-    if (!unsharp->opencl_ctx.command_queue) {
-        av_log(ctx, AV_LOG_ERROR, "Unable to get OpenCL command queue in filter 'unsharp'\n");
-        return AVERROR(EINVAL);
-    }
-    snprintf(build_opts, 96, "-D LU_RADIUS_X=%d -D LU_RADIUS_Y=%d -D CH_RADIUS_X=%d -D CH_RADIUS_Y=%d",
-            2*unsharp->luma.steps_x+1, 2*unsharp->luma.steps_y+1, 2*unsharp->chroma.steps_x+1, 2*unsharp->chroma.steps_y+1);
-    unsharp->opencl_ctx.program = av_opencl_compile("unsharp", build_opts);
-    if (!unsharp->opencl_ctx.program) {
-        av_log(ctx, AV_LOG_ERROR, "OpenCL failed to compile program 'unsharp'\n");
-        return AVERROR(EINVAL);
-    }
-    if (unsharp->opencl_ctx.use_fast_kernels) {
-        if (!unsharp->opencl_ctx.kernel_luma) {
-            unsharp->opencl_ctx.kernel_luma = clCreateKernel(unsharp->opencl_ctx.program, "unsharp_luma", &ret);
-            if (ret != CL_SUCCESS) {
-                av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'unsharp_luma'\n");
-                return ret;
-            }
-        }
-        if (!unsharp->opencl_ctx.kernel_chroma) {
-            unsharp->opencl_ctx.kernel_chroma = clCreateKernel(unsharp->opencl_ctx.program, "unsharp_chroma", &ret);
-            if (ret < 0) {
-                av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'unsharp_chroma'\n");
-                return ret;
-            }
-        }
-    }
-    else {
-        if (!unsharp->opencl_ctx.kernel_default) {
-            unsharp->opencl_ctx.kernel_default = clCreateKernel(unsharp->opencl_ctx.program, "unsharp_default", &ret);
-            if (ret < 0) {
-                av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'unsharp_default'\n");
-                return ret;
-            }
-        }
-    }
-    return ret;
-}
-
-void ff_opencl_unsharp_uninit(AVFilterContext *ctx)
-{
-    UnsharpContext *unsharp = ctx->priv;
-    av_opencl_buffer_release(&unsharp->opencl_ctx.cl_inbuf);
-    av_opencl_buffer_release(&unsharp->opencl_ctx.cl_outbuf);
-    av_opencl_buffer_release(&unsharp->opencl_ctx.cl_luma_mask);
-    av_opencl_buffer_release(&unsharp->opencl_ctx.cl_chroma_mask);
-    av_opencl_buffer_release(&unsharp->opencl_ctx.cl_luma_mask_x);
-    av_opencl_buffer_release(&unsharp->opencl_ctx.cl_chroma_mask_x);
-    av_opencl_buffer_release(&unsharp->opencl_ctx.cl_luma_mask_y);
-    av_opencl_buffer_release(&unsharp->opencl_ctx.cl_chroma_mask_y);
-    clReleaseKernel(unsharp->opencl_ctx.kernel_default);
-    clReleaseKernel(unsharp->opencl_ctx.kernel_luma);
-    clReleaseKernel(unsharp->opencl_ctx.kernel_chroma);
-    clReleaseProgram(unsharp->opencl_ctx.program);
-    unsharp->opencl_ctx.command_queue = NULL;
-    av_opencl_uninit();
-}
-
-int ff_opencl_unsharp_process_inout_buf(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
-{
-    int ret = 0;
-    AVFilterLink *link = ctx->inputs[0];
-    UnsharpContext *unsharp = ctx->priv;
-    int ch = AV_CEIL_RSHIFT(link->h, unsharp->vsub);
-
-    if ((!unsharp->opencl_ctx.cl_inbuf) || (!unsharp->opencl_ctx.cl_outbuf)) {
-        unsharp->opencl_ctx.in_plane_size[0]  = (in->linesize[0] * in->height);
-        unsharp->opencl_ctx.in_plane_size[1]  = (in->linesize[1] * ch);
-        unsharp->opencl_ctx.in_plane_size[2]  = (in->linesize[2] * ch);
-        unsharp->opencl_ctx.out_plane_size[0] = (out->linesize[0] * out->height);
-        unsharp->opencl_ctx.out_plane_size[1] = (out->linesize[1] * ch);
-        unsharp->opencl_ctx.out_plane_size[2] = (out->linesize[2] * ch);
-        unsharp->opencl_ctx.cl_inbuf_size  = unsharp->opencl_ctx.in_plane_size[0] +
-                                             unsharp->opencl_ctx.in_plane_size[1] +
-                                             unsharp->opencl_ctx.in_plane_size[2];
-        unsharp->opencl_ctx.cl_outbuf_size = unsharp->opencl_ctx.out_plane_size[0] +
-                                             unsharp->opencl_ctx.out_plane_size[1] +
-                                             unsharp->opencl_ctx.out_plane_size[2];
-        if (!unsharp->opencl_ctx.cl_inbuf) {
-            ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_inbuf,
-                                          unsharp->opencl_ctx.cl_inbuf_size,
-                                          CL_MEM_READ_ONLY, NULL);
-            if (ret < 0)
-                return ret;
-        }
-        if (!unsharp->opencl_ctx.cl_outbuf) {
-            ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_outbuf,
-                                          unsharp->opencl_ctx.cl_outbuf_size,
-                                          CL_MEM_READ_WRITE, NULL);
-            if (ret < 0)
-                return ret;
-        }
-    }
-    return av_opencl_buffer_write_image(unsharp->opencl_ctx.cl_inbuf,
-                                        unsharp->opencl_ctx.cl_inbuf_size,
-                                        0, in->data, unsharp->opencl_ctx.in_plane_size,
-                                        unsharp->opencl_ctx.plane_num);
-}

diff --git a/libavfilter/unsharp_opencl.h b/libavfilter/unsharp_opencl.h
deleted file mode 100644
index 3aefab6..0000000
--- a/libavfilter/unsharp_opencl.h
+++ /dev/null

@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVFILTER_UNSHARP_OPENCL_H
-#define AVFILTER_UNSHARP_OPENCL_H
-
-#include "unsharp.h"
-
-int ff_opencl_unsharp_init(AVFilterContext *ctx);
-
-void ff_opencl_unsharp_uninit(AVFilterContext *ctx);
-
-int ff_opencl_unsharp_process_inout_buf(AVFilterContext *ctx, AVFrame *in, AVFrame *out);
-
-int ff_opencl_apply_unsharp(AVFilterContext *ctx, AVFrame *in, AVFrame *out);
-
-#endif /* AVFILTER_UNSHARP_OPENCL_H */

diff --git a/libavfilter/unsharp_opencl_kernel.h b/libavfilter/unsharp_opencl_kernel.h
deleted file mode 100644
index 307d0f1..0000000
--- a/libavfilter/unsharp_opencl_kernel.h
+++ /dev/null

@@ -1,342 +0,0 @@
-/*
- * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
- * Copyright (C) 2013 Lenny Wang
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVFILTER_UNSHARP_OPENCL_KERNEL_H
-#define AVFILTER_UNSHARP_OPENCL_KERNEL_H
-
-#include "libavutil/opencl.h"
-
-const char *ff_kernel_unsharp_opencl = AV_OPENCL_KERNEL(
-inline unsigned char clip_uint8(int a)
-{
-    if (a & (~0xFF))
-        return (-a)>>31;
-    else
-        return a;
-}
-
-kernel void unsharp_luma(
-                    global unsigned char *src,
-                    global unsigned char *dst,
-                    global int *mask_x,
-                    global int *mask_y,
-                    int amount,
-                    int scalebits,
-                    int halfscale,
-                    int src_stride,
-                    int dst_stride,
-                    int width,
-                    int height)
-{
-    int2 threadIdx, blockIdx, globalIdx;
-    threadIdx.x = get_local_id(0);
-    threadIdx.y = get_local_id(1);
-    blockIdx.x = get_group_id(0);
-    blockIdx.y = get_group_id(1);
-    globalIdx.x = get_global_id(0);
-    globalIdx.y = get_global_id(1);
-
-    if (!amount) {
-        if (globalIdx.x < width && globalIdx.y < height)
-            dst[globalIdx.x + globalIdx.y*dst_stride] = src[globalIdx.x + globalIdx.y*src_stride];
-        return;
-    }
-
-    local unsigned int l[32][32];
-    local unsigned int lcx[LU_RADIUS_X];
-    local unsigned int lcy[LU_RADIUS_Y];
-    int indexIx, indexIy, i, j;
-
-    //load up tile: actual workspace + halo of 8 points in x and y \n
-    for(i = 0; i <= 1; i++) {
-        indexIy = -8 + (blockIdx.y + i) * 16 + threadIdx.y;
-        indexIy = indexIy < 0 ? 0 : indexIy;
-        indexIy = indexIy >= height ? height - 1: indexIy;
-        for(j = 0; j <= 1; j++) {
-            indexIx = -8 + (blockIdx.x + j) * 16 + threadIdx.x;
-            indexIx = indexIx < 0 ? 0 : indexIx;
-            indexIx = indexIx >= width ? width - 1: indexIx;
-            l[i*16 + threadIdx.y][j*16 + threadIdx.x] = src[indexIy*src_stride + indexIx];
-        }
-    }
-
-    int indexL = threadIdx.y*16 + threadIdx.x;
-    if (indexL < LU_RADIUS_X)
-        lcx[indexL] = mask_x[indexL];
-    if (indexL < LU_RADIUS_Y)
-        lcy[indexL] = mask_y[indexL];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    //needed for unsharp mask application in the end \n
-    int orig_value = (int)l[threadIdx.y + 8][threadIdx.x + 8];
-
-    int idx, idy, maskIndex;
-    int temp[2] = {0};
-    int steps_x = (LU_RADIUS_X-1)/2;
-    int steps_y = (LU_RADIUS_Y-1)/2;
-
-    // compute the actual workspace + left&right halos \n
-      \n#pragma unroll\n
-    for (j = 0; j <=1; j++) {
-      //extra work to cover left and right halos \n
-      idx = 16*j + threadIdx.x;
-      \n#pragma unroll\n
-        for (i = -steps_y; i <= steps_y; i++) {
-          idy = 8 + i + threadIdx.y;
-          maskIndex = (i + steps_y);
-          temp[j] += (int)l[idy][idx] * lcy[maskIndex];
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    //save results from the vertical filter in local memory \n
-    idy = 8 + threadIdx.y;
-      \n#pragma unroll\n
-    for (j = 0; j <=1; j++) {
-      idx = 16*j + threadIdx.x;
-      l[idy][idx] = temp[j];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    //compute results with the horizontal filter \n
-    int sum = 0;
-    idy = 8 + threadIdx.y;
-    \n#pragma unroll\n
-      for (j = -steps_x; j <= steps_x; j++) {
-        idx = 8 + j + threadIdx.x;
-        maskIndex = j + steps_x;
-        sum += (int)l[idy][idx] * lcx[maskIndex];
-      }
-
-    int res = orig_value + (((orig_value - (int)((sum + halfscale) >> scalebits)) * amount) >> 16);
-
-    if (globalIdx.x < width && globalIdx.y < height)
-        dst[globalIdx.x + globalIdx.y*dst_stride] = clip_uint8(res);
-}
-
-kernel void unsharp_chroma(
-                    global unsigned char *src_y,
-                    global unsigned char *dst_y,
-                    global int *mask_x,
-                    global int *mask_y,
-                    int amount,
-                    int scalebits,
-                    int halfscale,
-                    int src_stride_lu,
-                    int src_stride_ch,
-                    int dst_stride_lu,
-                    int dst_stride_ch,
-                    int width,
-                    int height,
-                    int cw,
-                    int ch)
-{
-    global unsigned char *dst_u = dst_y + height * dst_stride_lu;
-    global unsigned char *dst_v = dst_u + ch * dst_stride_ch;
-    global unsigned char *src_u = src_y + height * src_stride_lu;
-    global unsigned char *src_v = src_u + ch * src_stride_ch;
-    int2 threadIdx, blockIdx, globalIdx;
-    threadIdx.x = get_local_id(0);
-    threadIdx.y = get_local_id(1);
-    blockIdx.x = get_group_id(0);
-    blockIdx.y = get_group_id(1);
-    globalIdx.x = get_global_id(0);
-    globalIdx.y = get_global_id(1);
-    int padch = get_global_size(1)/2;
-    global unsigned char *src = globalIdx.y>=padch ? src_v : src_u;
-    global unsigned char *dst = globalIdx.y>=padch ? dst_v : dst_u;
-
-    blockIdx.y = globalIdx.y>=padch ? blockIdx.y - get_num_groups(1)/2 : blockIdx.y;
-    globalIdx.y = globalIdx.y>=padch ? globalIdx.y - padch : globalIdx.y;
-
-    if (!amount) {
-        if (globalIdx.x < cw && globalIdx.y < ch)
-            dst[globalIdx.x + globalIdx.y*dst_stride_ch] = src[globalIdx.x + globalIdx.y*src_stride_ch];
-        return;
-    }
-
-    local unsigned int l[32][32];
-    local unsigned int lcx[CH_RADIUS_X];
-    local unsigned int lcy[CH_RADIUS_Y];
-    int indexIx, indexIy, i, j;
-    for(i = 0; i <= 1; i++) {
-        indexIy = -8 + (blockIdx.y + i) * 16 + threadIdx.y;
-        indexIy = indexIy < 0 ? 0 : indexIy;
-        indexIy = indexIy >= ch ? ch - 1: indexIy;
-        for(j = 0; j <= 1; j++) {
-            indexIx = -8 + (blockIdx.x + j) * 16 + threadIdx.x;
-            indexIx = indexIx < 0 ? 0 : indexIx;
-            indexIx = indexIx >= cw ? cw - 1: indexIx;
-            l[i*16 + threadIdx.y][j*16 + threadIdx.x] = src[indexIy * src_stride_ch + indexIx];
-        }
-    }
-
-    int indexL = threadIdx.y*16 + threadIdx.x;
-    if (indexL < CH_RADIUS_X)
-        lcx[indexL] = mask_x[indexL];
-    if (indexL < CH_RADIUS_Y)
-        lcy[indexL] = mask_y[indexL];
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    int orig_value = (int)l[threadIdx.y + 8][threadIdx.x + 8];
-
-    int idx, idy, maskIndex;
-    int steps_x = CH_RADIUS_X/2;
-    int steps_y = CH_RADIUS_Y/2;
-    int temp[2] = {0,0};
-
-    \n#pragma unroll\n
-      for (j = 0; j <= 1; j++) {
-        idx = 16*j + threadIdx.x;
-        \n#pragma unroll\n
-          for (i = -steps_y; i <= steps_y; i++) {
-            idy = 8 + i + threadIdx.y;
-            maskIndex = i + steps_y;
-            temp[j] += (int)l[idy][idx] * lcy[maskIndex];
-          }
-      }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-    idy = 8 + threadIdx.y;
-    \n#pragma unroll\n
-    for (j = 0; j <= 1; j++) {
-      idx = 16*j + threadIdx.x;
-      l[idy][idx] = temp[j];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    //compute results with the horizontal filter \n
-    int sum = 0;
-    idy = 8 + threadIdx.y;
-    \n#pragma unroll\n
-      for (j = -steps_x; j <= steps_x; j++) {
-        idx = 8 + j + threadIdx.x;
-        maskIndex = j + steps_x;
-        sum += (int)l[idy][idx] * lcx[maskIndex];
-      }
-
-    int res = orig_value + (((orig_value - (int)((sum + halfscale) >> scalebits)) * amount) >> 16);
-
-    if (globalIdx.x < cw && globalIdx.y < ch)
-        dst[globalIdx.x + globalIdx.y*dst_stride_ch] = clip_uint8(res);
-}
-
-kernel void unsharp_default(global  unsigned char *src,
-                    global  unsigned char *dst,
-                    const global  unsigned int *mask_lu,
-                    const global  unsigned int *mask_ch,
-                    int amount_lu,
-                    int amount_ch,
-                    int step_x_lu,
-                    int step_y_lu,
-                    int step_x_ch,
-                    int step_y_ch,
-                    int scalebits_lu,
-                    int scalebits_ch,
-                    int halfscale_lu,
-                    int halfscale_ch,
-                    int src_stride_lu,
-                    int src_stride_ch,
-                    int dst_stride_lu,
-                    int dst_stride_ch,
-                    int height,
-                    int width,
-                    int ch,
-                    int cw)
-{
-    global unsigned char *dst_y = dst;
-    global unsigned char *dst_u = dst_y + height * dst_stride_lu;
-    global unsigned char *dst_v = dst_u + ch * dst_stride_ch;
-
-    global unsigned char *src_y = src;
-    global unsigned char *src_u = src_y + height * src_stride_lu;
-    global unsigned char *src_v = src_u + ch * src_stride_ch;
-
-    global unsigned char *temp_dst;
-    global unsigned char *temp_src;
-    const global unsigned int *temp_mask;
-    int global_id = get_global_id(0);
-    int i, j, x, y, temp_src_stride, temp_dst_stride, temp_height, temp_width, temp_steps_x, temp_steps_y,
-        temp_amount, temp_scalebits, temp_halfscale, sum, idx_x, idx_y, temp, res;
-    if (global_id < width * height) {
-        y = global_id / width;
-        x = global_id % width;
-        temp_dst = dst_y;
-        temp_src = src_y;
-        temp_src_stride = src_stride_lu;
-        temp_dst_stride = dst_stride_lu;
-        temp_height = height;
-        temp_width = width;
-        temp_steps_x = step_x_lu;
-        temp_steps_y = step_y_lu;
-        temp_mask = mask_lu;
-        temp_amount = amount_lu;
-        temp_scalebits = scalebits_lu;
-        temp_halfscale = halfscale_lu;
-    } else if ((global_id >= width * height) && (global_id < width * height + ch * cw)) {
-        y = (global_id - width * height) / cw;
-        x = (global_id - width * height) % cw;
-        temp_dst = dst_u;
-        temp_src = src_u;
-        temp_src_stride = src_stride_ch;
-        temp_dst_stride = dst_stride_ch;
-        temp_height = ch;
-        temp_width = cw;
-        temp_steps_x = step_x_ch;
-        temp_steps_y = step_y_ch;
-        temp_mask = mask_ch;
-        temp_amount = amount_ch;
-        temp_scalebits = scalebits_ch;
-        temp_halfscale = halfscale_ch;
-    } else {
-        y = (global_id - width * height - ch * cw) / cw;
-        x = (global_id - width * height - ch * cw) % cw;
-        temp_dst = dst_v;
-        temp_src = src_v;
-        temp_src_stride = src_stride_ch;
-        temp_dst_stride = dst_stride_ch;
-        temp_height = ch;
-        temp_width = cw;
-        temp_steps_x = step_x_ch;
-        temp_steps_y = step_y_ch;
-        temp_mask = mask_ch;
-        temp_amount = amount_ch;
-        temp_scalebits = scalebits_ch;
-        temp_halfscale = halfscale_ch;
-    }
-    if (temp_amount) {
-        sum = 0;
-        for (j = 0; j <= 2 * temp_steps_y; j++) {
-            idx_y = (y - temp_steps_y + j) <= 0 ? 0 : (y - temp_steps_y + j) >= temp_height ? temp_height-1 : y - temp_steps_y + j;
-            for (i = 0; i <= 2 * temp_steps_x; i++) {
-                idx_x = (x - temp_steps_x + i) <= 0 ? 0 : (x - temp_steps_x + i) >= temp_width ? temp_width-1 : x - temp_steps_x + i;
-                sum += temp_mask[i + j * (2 * temp_steps_x + 1)] * temp_src[idx_x + idx_y * temp_src_stride];
-            }
-        }
-        temp = (int)temp_src[x + y * temp_src_stride];
-        res = temp + (((temp - (int)((sum + temp_halfscale) >> temp_scalebits)) * temp_amount) >> 16);
-        temp_dst[x + y * temp_dst_stride] = clip_uint8(res);
-    } else {
-        temp_dst[x + y * temp_dst_stride] = temp_src[x + y * temp_src_stride];
-    }
-}
-);
-
-#endif /* AVFILTER_UNSHARP_OPENCL_KERNEL_H */

diff --git a/libavfilter/vaapi_vpp.c b/libavfilter/vaapi_vpp.c
new file mode 100644
index 0000000..c5bbc3b
--- /dev/null
+++ b/libavfilter/vaapi_vpp.c

@@ -0,0 +1,373 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/pixdesc.h"
+#include "formats.h"
+#include "internal.h"
+#include "vaapi_vpp.h"
+
+int ff_vaapi_vpp_query_formats(AVFilterContext *avctx)
+{
+    enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_VAAPI, AV_PIX_FMT_NONE,
+    };
+    int err;
+
+    if ((err = ff_formats_ref(ff_make_format_list(pix_fmts),
+                              &avctx->inputs[0]->out_formats)) < 0)
+        return err;
+    if ((err = ff_formats_ref(ff_make_format_list(pix_fmts),
+                              &avctx->outputs[0]->in_formats)) < 0)
+        return err;
+
+    return 0;
+}
+
+void ff_vaapi_vpp_pipeline_uninit(AVFilterContext *avctx)
+{
+    VAAPIVPPContext *ctx   = avctx->priv;
+    int i;
+    for (i = 0; i < ctx->nb_filter_buffers; i++) {
+        if (ctx->filter_buffers[i] != VA_INVALID_ID) {
+            vaDestroyBuffer(ctx->hwctx->display, ctx->filter_buffers[i]);
+            ctx->filter_buffers[i] = VA_INVALID_ID;
+        }
+    }
+    ctx->nb_filter_buffers = 0;
+
+    if (ctx->va_context != VA_INVALID_ID) {
+        vaDestroyContext(ctx->hwctx->display, ctx->va_context);
+        ctx->va_context = VA_INVALID_ID;
+    }
+
+    if (ctx->va_config != VA_INVALID_ID) {
+        vaDestroyConfig(ctx->hwctx->display, ctx->va_config);
+        ctx->va_config = VA_INVALID_ID;
+    }
+
+    av_buffer_unref(&ctx->device_ref);
+    ctx->hwctx = NULL;
+}
+
+int ff_vaapi_vpp_config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *avctx = inlink->dst;
+    VAAPIVPPContext *ctx   = avctx->priv;
+
+    if (ctx->pipeline_uninit)
+        ctx->pipeline_uninit(avctx);
+
+    if (!inlink->hw_frames_ctx) {
+        av_log(avctx, AV_LOG_ERROR, "A hardware frames reference is "
+               "required to associate the processing device.\n");
+        return AVERROR(EINVAL);
+    }
+
+    ctx->input_frames_ref = av_buffer_ref(inlink->hw_frames_ctx);
+    if (!ctx->input_frames_ref) {
+        av_log(avctx, AV_LOG_ERROR, "A input frames reference create "
+               "failed.\n");
+        return AVERROR(ENOMEM);
+    }
+    ctx->input_frames = (AVHWFramesContext*)ctx->input_frames_ref->data;
+
+    return 0;
+}
+
+int ff_vaapi_vpp_config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *avctx = outlink->src;
+    VAAPIVPPContext *ctx   = avctx->priv;
+    AVVAAPIHWConfig *hwconfig = NULL;
+    AVHWFramesConstraints *constraints = NULL;
+    AVHWFramesContext *output_frames;
+    AVVAAPIFramesContext *va_frames;
+    VAStatus vas;
+    int err, i;
+
+    if (ctx->pipeline_uninit)
+        ctx->pipeline_uninit(avctx);
+
+    if (!ctx->output_width)
+        ctx->output_width  = avctx->inputs[0]->w;
+    if (!ctx->output_height)
+        ctx->output_height = avctx->inputs[0]->h;
+
+    av_assert0(ctx->input_frames);
+    ctx->device_ref = av_buffer_ref(ctx->input_frames->device_ref);
+    if (!ctx->device_ref) {
+        av_log(avctx, AV_LOG_ERROR, "A device reference create "
+               "failed.\n");
+        return AVERROR(ENOMEM);
+    }
+    ctx->hwctx = ((AVHWDeviceContext*)ctx->device_ref->data)->hwctx;
+
+    av_assert0(ctx->va_config == VA_INVALID_ID);
+    vas = vaCreateConfig(ctx->hwctx->display, VAProfileNone,
+                         VAEntrypointVideoProc, NULL, 0, &ctx->va_config);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create processing pipeline "
+               "config: %d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    hwconfig = av_hwdevice_hwconfig_alloc(ctx->device_ref);
+    if (!hwconfig) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+    hwconfig->config_id = ctx->va_config;
+
+    constraints = av_hwdevice_get_hwframe_constraints(ctx->device_ref,
+                                                      hwconfig);
+    if (!constraints) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    if (ctx->output_format == AV_PIX_FMT_NONE)
+        ctx->output_format = ctx->input_frames->sw_format;
+    if (constraints->valid_sw_formats) {
+        for (i = 0; constraints->valid_sw_formats[i] != AV_PIX_FMT_NONE; i++) {
+            if (ctx->output_format == constraints->valid_sw_formats[i])
+                break;
+        }
+        if (constraints->valid_sw_formats[i] == AV_PIX_FMT_NONE) {
+            av_log(avctx, AV_LOG_ERROR, "Hardware does not support output "
+                   "format %s.\n", av_get_pix_fmt_name(ctx->output_format));
+            err = AVERROR(EINVAL);
+            goto fail;
+        }
+    }
+
+    if (ctx->output_width  < constraints->min_width  ||
+        ctx->output_height < constraints->min_height ||
+        ctx->output_width  > constraints->max_width  ||
+        ctx->output_height > constraints->max_height) {
+        av_log(avctx, AV_LOG_ERROR, "Hardware does not support scaling to "
+               "size %dx%d (constraints: width %d-%d height %d-%d).\n",
+               ctx->output_width, ctx->output_height,
+               constraints->min_width,  constraints->max_width,
+               constraints->min_height, constraints->max_height);
+        err = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    outlink->hw_frames_ctx = av_hwframe_ctx_alloc(ctx->device_ref);
+    if (!outlink->hw_frames_ctx) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create HW frame context "
+               "for output.\n");
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    output_frames = (AVHWFramesContext*)outlink->hw_frames_ctx->data;
+
+    output_frames->format    = AV_PIX_FMT_VAAPI;
+    output_frames->sw_format = ctx->output_format;
+    output_frames->width     = ctx->output_width;
+    output_frames->height    = ctx->output_height;
+
+    output_frames->initial_pool_size = 4;
+
+    err = ff_filter_init_hw_frames(avctx, outlink, 10);
+    if (err < 0)
+        goto fail;
+
+    err = av_hwframe_ctx_init(outlink->hw_frames_ctx);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to initialise VAAPI frame "
+               "context for output: %d\n", err);
+        goto fail;
+    }
+
+    va_frames = output_frames->hwctx;
+
+    av_assert0(ctx->va_context == VA_INVALID_ID);
+    vas = vaCreateContext(ctx->hwctx->display, ctx->va_config,
+                          ctx->output_width, ctx->output_height,
+                          VA_PROGRESSIVE,
+                          va_frames->surface_ids, va_frames->nb_surfaces,
+                          &ctx->va_context);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create processing pipeline "
+               "context: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR(EIO);
+    }
+
+    outlink->w = ctx->output_width;
+    outlink->h = ctx->output_height;
+
+    if (ctx->build_filter_params) {
+        err = ctx->build_filter_params(avctx);
+        if (err < 0)
+            goto fail;
+    }
+
+    av_freep(&hwconfig);
+    av_hwframe_constraints_free(&constraints);
+    return 0;
+
+fail:
+    av_buffer_unref(&outlink->hw_frames_ctx);
+    av_freep(&hwconfig);
+    av_hwframe_constraints_free(&constraints);
+    return err;
+}
+
+int ff_vaapi_vpp_colour_standard(enum AVColorSpace av_cs)
+{
+    switch(av_cs) {
+#define CS(av, va) case AVCOL_SPC_ ## av: return VAProcColorStandard ## va;
+        CS(BT709,     BT709);
+        CS(BT470BG,   BT601);
+        CS(SMPTE170M, SMPTE170M);
+        CS(SMPTE240M, SMPTE240M);
+#undef CS
+    default:
+        return VAProcColorStandardNone;
+    }
+}
+
+int ff_vaapi_vpp_make_param_buffers(AVFilterContext *avctx,
+                                    int type,
+                                    const void *data,
+                                    size_t size,
+                                    int count)
+{
+    VAStatus vas;
+    VABufferID buffer;
+    VAAPIVPPContext *ctx   = avctx->priv;
+
+    av_assert0(ctx->nb_filter_buffers + 1 <= VAProcFilterCount);
+
+    vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
+                         type, size, count, (void*)data, &buffer);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create parameter "
+               "buffer (type %d): %d (%s).\n",
+               type, vas, vaErrorStr(vas));
+        return AVERROR(EIO);
+    }
+
+    ctx->filter_buffers[ctx->nb_filter_buffers++] = buffer;
+
+    av_log(avctx, AV_LOG_DEBUG, "Param buffer (type %d, %zu bytes, count %d) "
+           "is %#x.\n", type, size, count, buffer);
+    return 0;
+}
+
+
+int ff_vaapi_vpp_render_picture(AVFilterContext *avctx,
+                                VAProcPipelineParameterBuffer *params,
+                                VASurfaceID output_surface)
+{
+    VABufferID params_id;
+    VAStatus vas;
+    int err = 0;
+    VAAPIVPPContext *ctx   = avctx->priv;
+
+    vas = vaBeginPicture(ctx->hwctx->display,
+                         ctx->va_context, output_surface);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to attach new picture: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
+                         VAProcPipelineParameterBufferType,
+                         sizeof(*params), 1, params, &params_id);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create parameter buffer: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR(EIO);
+        goto fail_after_begin;
+    }
+    av_log(avctx, AV_LOG_DEBUG, "Pipeline parameter buffer is %#x.\n",
+           params_id);
+
+    vas = vaRenderPicture(ctx->hwctx->display, ctx->va_context,
+                          &params_id, 1);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to render parameter buffer: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR(EIO);
+        goto fail_after_begin;
+    }
+
+    vas = vaEndPicture(ctx->hwctx->display, ctx->va_context);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to start picture processing: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR(EIO);
+        goto fail_after_render;
+    }
+
+    if (CONFIG_VAAPI_1 || ctx->hwctx->driver_quirks &
+        AV_VAAPI_DRIVER_QUIRK_RENDER_PARAM_BUFFERS) {
+        vas = vaDestroyBuffer(ctx->hwctx->display, params_id);
+        if (vas != VA_STATUS_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to free parameter buffer: "
+                   "%d (%s).\n", vas, vaErrorStr(vas));
+            // And ignore.
+        }
+    }
+
+    return 0;
+
+    // We want to make sure that if vaBeginPicture has been called, we also
+    // call vaRenderPicture and vaEndPicture.  These calls may well fail or
+    // do something else nasty, but once we're in this failure case there
+    // isn't much else we can do.
+fail_after_begin:
+    vaRenderPicture(ctx->hwctx->display, ctx->va_context, &params_id, 1);
+fail_after_render:
+    vaEndPicture(ctx->hwctx->display, ctx->va_context);
+fail:
+    return err;
+}
+
+void ff_vaapi_vpp_ctx_init(AVFilterContext *avctx)
+{
+    int i;
+    VAAPIVPPContext *ctx   = avctx->priv;
+
+    ctx->va_config  = VA_INVALID_ID;
+    ctx->va_context = VA_INVALID_ID;
+    ctx->valid_ids  = 1;
+
+    for (i = 0; i < VAProcFilterCount; i++)
+        ctx->filter_buffers[i] = VA_INVALID_ID;
+    ctx->nb_filter_buffers = 0;
+}
+
+void ff_vaapi_vpp_ctx_uninit(AVFilterContext *avctx)
+{
+    VAAPIVPPContext *ctx   = avctx->priv;
+    if (ctx->valid_ids && ctx->pipeline_uninit)
+        ctx->pipeline_uninit(avctx);
+
+    av_buffer_unref(&ctx->input_frames_ref);
+    av_buffer_unref(&ctx->device_ref);
+}

diff --git a/libavfilter/vaapi_vpp.h b/libavfilter/vaapi_vpp.h
new file mode 100644
index 0000000..0bc3101
--- /dev/null
+++ b/libavfilter/vaapi_vpp.h

@@ -0,0 +1,79 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_VAAPI_VPP_H
+#define AVFILTER_VAAPI_VPP_H
+
+#include <va/va.h>
+#include <va/va_vpp.h>
+
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_vaapi.h"
+
+#include "avfilter.h"
+
+typedef struct VAAPIVPPContext {
+    const AVClass *class;
+
+    AVVAAPIDeviceContext *hwctx;
+    AVBufferRef *device_ref;
+
+    int valid_ids;
+    VAConfigID  va_config;
+    VAContextID va_context;
+
+    AVBufferRef       *input_frames_ref;
+    AVHWFramesContext *input_frames;
+
+    enum AVPixelFormat output_format;
+    int output_width;   // computed width
+    int output_height;  // computed height
+
+    VABufferID         filter_buffers[VAProcFilterCount];
+    int                nb_filter_buffers;
+
+    int (*build_filter_params)(AVFilterContext *avctx);
+
+    void (*pipeline_uninit)(AVFilterContext *avctx);
+} VAAPIVPPContext;
+
+void ff_vaapi_vpp_ctx_init(AVFilterContext *avctx);
+
+void ff_vaapi_vpp_ctx_uninit(AVFilterContext *avctx);
+
+int ff_vaapi_vpp_query_formats(AVFilterContext *avctx);
+
+void ff_vaapi_vpp_pipeline_uninit(AVFilterContext *avctx);
+
+int ff_vaapi_vpp_config_input(AVFilterLink *inlink);
+
+int ff_vaapi_vpp_config_output(AVFilterLink *outlink);
+
+int ff_vaapi_vpp_colour_standard(enum AVColorSpace av_cs);
+
+int ff_vaapi_vpp_make_param_buffers(AVFilterContext *avctx,
+                                    int type,
+                                    const void *data,
+                                    size_t size,
+                                    int count);
+
+int ff_vaapi_vpp_render_picture(AVFilterContext *avctx,
+                                VAProcPipelineParameterBuffer *params,
+                                VASurfaceID output_surface);
+
+#endif /* AVFILTER_VAAPI_VPP_H */

diff --git a/libavfilter/version.h b/libavfilter/version.h
index 3e67ad3..30e961b 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h

@@ -29,9 +29,9 @@
 
 #include "libavutil/version.h"
 
-#define LIBAVFILTER_VERSION_MAJOR   6
-#define LIBAVFILTER_VERSION_MINOR 107
-#define LIBAVFILTER_VERSION_MICRO 100
+#define LIBAVFILTER_VERSION_MAJOR   7
+#define LIBAVFILTER_VERSION_MINOR  33
+#define LIBAVFILTER_VERSION_MICRO 101
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                                LIBAVFILTER_VERSION_MINOR, \
@@ -49,26 +49,17 @@
  * the public API and may change, break or disappear at any time.
  */
 
-#ifndef FF_API_OLD_FILTER_OPTS
-#define FF_API_OLD_FILTER_OPTS              (LIBAVFILTER_VERSION_MAJOR < 7)
-#endif
 #ifndef FF_API_OLD_FILTER_OPTS_ERROR
-#define FF_API_OLD_FILTER_OPTS_ERROR        (LIBAVFILTER_VERSION_MAJOR < 7)
-#endif
-#ifndef FF_API_AVFILTER_OPEN
-#define FF_API_AVFILTER_OPEN                (LIBAVFILTER_VERSION_MAJOR < 7)
-#endif
-#ifndef FF_API_AVFILTER_INIT_FILTER
-#define FF_API_AVFILTER_INIT_FILTER         (LIBAVFILTER_VERSION_MAJOR < 7)
-#endif
-#ifndef FF_API_OLD_FILTER_REGISTER
-#define FF_API_OLD_FILTER_REGISTER          (LIBAVFILTER_VERSION_MAJOR < 7)
-#endif
-#ifndef FF_API_NOCONST_GET_NAME
-#define FF_API_NOCONST_GET_NAME             (LIBAVFILTER_VERSION_MAJOR < 7)
+#define FF_API_OLD_FILTER_OPTS_ERROR        (LIBAVFILTER_VERSION_MAJOR < 8)
 #endif
 #ifndef FF_API_LAVR_OPTS
-#define FF_API_LAVR_OPTS                    (LIBAVFILTER_VERSION_MAJOR < 7)
+#define FF_API_LAVR_OPTS                    (LIBAVFILTER_VERSION_MAJOR < 8)
+#endif
+#ifndef FF_API_FILTER_GET_SET
+#define FF_API_FILTER_GET_SET               (LIBAVFILTER_VERSION_MAJOR < 8)
+#endif
+#ifndef FF_API_NEXT
+#define FF_API_NEXT                         (LIBAVFILTER_VERSION_MAJOR < 8)
 #endif
 
 #endif /* AVFILTER_VERSION_H */

diff --git a/libavfilter/vf_amplify.c b/libavfilter/vf_amplify.c
new file mode 100644
index 0000000..08243ad
--- /dev/null
+++ b/libavfilter/vf_amplify.c

@@ -0,0 +1,311 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct AmplifyContext {
+    const AVClass *class;
+    const AVPixFmtDescriptor *desc;
+    int radius;
+    float factor;
+    float threshold;
+    int planes;
+
+    int llimit;
+    int hlimit;
+    int nb_inputs;
+    int nb_frames;
+
+    int depth;
+    int nb_planes;
+    int linesize[4];
+    int height[4];
+
+    AVFrame **frames;
+} AmplifyContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pixel_fmts[] = {
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9,
+        AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14,
+        AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
+        AV_PIX_FMT_YUVJ411P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUV440P10,
+        AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV420P12,
+        AV_PIX_FMT_YUV440P12,
+        AV_PIX_FMT_YUV444P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV420P14,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+        AV_PIX_FMT_NONE
+    };
+    AVFilterFormats *formats = ff_make_format_list(pixel_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, formats);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    AmplifyContext *s = ctx->priv;
+
+    s->nb_inputs = s->radius * 2 + 1;
+
+    s->frames = av_calloc(s->nb_inputs, sizeof(*s->frames));
+    if (!s->frames)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+typedef struct ThreadData {
+    AVFrame **in, *out;
+} ThreadData;
+
+static int amplify_frame(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    AmplifyContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame **in = td->in;
+    AVFrame *out = td->out;
+    const int radius = s->radius;
+    const int nb_inputs = s->nb_inputs;
+    const float threshold = s->threshold;
+    const float factor = s->factor;
+    const int llimit = s->llimit;
+    const int hlimit = s->hlimit;
+    const int depth = s->depth;
+    int i, p, x, y;
+
+    if (s->depth <= 8) {
+        for (p = 0; p < s->nb_planes; p++) {
+            const int slice_start = (s->height[p] * jobnr) / nb_jobs;
+            const int slice_end = (s->height[p] * (jobnr+1)) / nb_jobs;
+            uint8_t *dst = out->data[p] + slice_start * out->linesize[p];
+
+            if (!((1 << p) & s->planes)) {
+                av_image_copy_plane(dst, out->linesize[p],
+                                    in[radius]->data[p] + slice_start * in[radius]->linesize[p],
+                                    in[radius]->linesize[p],
+                                    s->linesize[p], slice_end - slice_start);
+                continue;
+            }
+
+            for (y = slice_start; y < slice_end; y++) {
+                for (x = 0; x < s->linesize[p]; x++) {
+                    int src = in[radius]->data[p][y * in[radius]->linesize[p] + x];
+                    float diff, avg;
+                    int sum = 0;
+
+                    for (i = 0; i < nb_inputs; i++) {
+                        sum += in[i]->data[p][y * in[i]->linesize[p] + x];
+                    }
+
+                    avg = sum / (float)nb_inputs;
+                    diff = src - avg;
+                    if (fabsf(diff) < threshold) {
+                        int amp;
+                        if (diff < 0) {
+                            amp = -FFMIN(FFABS(diff * factor), llimit);
+                        } else {
+                            amp = FFMIN(FFABS(diff * factor), hlimit);
+                        }
+                        dst[x] = av_clip_uint8(src + amp);
+                    } else {
+                        dst[x] = src;
+                    }
+                }
+
+                dst += out->linesize[p];
+            }
+        }
+    } else {
+        for (p = 0; p < s->nb_planes; p++) {
+            const int slice_start = (s->height[p] * jobnr) / nb_jobs;
+            const int slice_end = (s->height[p] * (jobnr+1)) / nb_jobs;
+            uint16_t *dst = (uint16_t *)(out->data[p] + slice_start * out->linesize[p]);
+
+            if (!((1 << p) & s->planes)) {
+                av_image_copy_plane((uint8_t *)dst, out->linesize[p],
+                                    in[radius]->data[p] + slice_start * in[radius]->linesize[p],
+                                    in[radius]->linesize[p],
+                                    s->linesize[p], slice_end - slice_start);
+                continue;
+            }
+
+            for (y = slice_start; y < slice_end; y++) {
+                for (x = 0; x < s->linesize[p] / 2; x++) {
+                    int src = AV_RN16(in[radius]->data[p] + y * in[radius]->linesize[p] + x * 2);
+                    float diff, avg;
+                    int sum = 0;
+
+                    for (i = 0; i < nb_inputs; i++) {
+                        sum += AV_RN16(in[i]->data[p] + y * in[i]->linesize[p] + x * 2);
+                    }
+
+                    avg = sum / (float)nb_inputs;
+                    diff = src - avg;
+
+                    if (fabsf(diff) < threshold) {
+                        int amp;
+                        if (diff < 0) {
+                            amp = -FFMIN(FFABS(diff * factor), llimit);
+                        } else {
+                            amp = FFMIN(FFABS(diff * factor), hlimit);
+                        }
+                        dst[x] = av_clip_uintp2_c(src + amp, depth);
+                    } else {
+                        dst[x] = src;
+                    }
+                }
+
+                dst += out->linesize[p] / 2;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AmplifyContext *s = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+    int ret;
+
+    s->desc = av_pix_fmt_desc_get(outlink->format);
+    if (!s->desc)
+        return AVERROR_BUG;
+    s->nb_planes = av_pix_fmt_count_planes(outlink->format);
+    s->depth = s->desc->comp[0].depth;
+
+    if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
+        return ret;
+
+    s->height[1] = s->height[2] = AV_CEIL_RSHIFT(inlink->h, s->desc->log2_chroma_h);
+    s->height[0] = s->height[3] = inlink->h;
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AmplifyContext *s = ctx->priv;
+    int i;
+
+    if (s->frames) {
+        for (i = 0; i < s->nb_frames; i++)
+           av_frame_free(&s->frames[i]);
+    }
+    av_freep(&s->frames);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AmplifyContext *s = ctx->priv;
+    ThreadData td;
+    AVFrame *out;
+
+    if (s->nb_frames < s->nb_inputs) {
+        s->frames[s->nb_frames] = in;
+        s->nb_frames++;
+        return 0;
+    } else {
+        av_frame_free(&s->frames[0]);
+        memmove(&s->frames[0], &s->frames[1], sizeof(*s->frames) * (s->nb_inputs - 1));
+        s->frames[s->nb_inputs - 1] = in;
+    }
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out)
+        return AVERROR(ENOMEM);
+    out->pts = s->frames[0]->pts;
+
+    td.out = out;
+    td.in = s->frames;
+    ctx->internal->execute(ctx, amplify_frame, &td, NULL, FFMIN(s->height[1], ff_filter_get_nb_threads(ctx)));
+
+    return ff_filter_frame(outlink, out);
+}
+
+#define OFFSET(x) offsetof(AmplifyContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption amplify_options[] = {
+    { "radius", "set radius", OFFSET(radius), AV_OPT_TYPE_INT, {.i64=2}, 1, 63, .flags = FLAGS },
+    { "factor", "set factor", OFFSET(factor), AV_OPT_TYPE_FLOAT, {.dbl=2}, 0, UINT16_MAX, .flags = FLAGS },
+    { "threshold", "set threshold", OFFSET(threshold), AV_OPT_TYPE_FLOAT, {.dbl=10}, 0, UINT16_MAX, .flags = FLAGS },
+    { "low", "set low limit for amplification", OFFSET(llimit), AV_OPT_TYPE_INT, {.i64=UINT16_MAX}, 0, UINT16_MAX, .flags = FLAGS },
+    { "high", "set high limit for amplification", OFFSET(hlimit), AV_OPT_TYPE_INT, {.i64=UINT16_MAX}, 0, UINT16_MAX, .flags = FLAGS },
+    { "planes", "set what planes to filter", OFFSET(planes), AV_OPT_TYPE_FLAGS, {.i64=7},    0, 15,  FLAGS },
+    { NULL },
+};
+
+static const AVFilterPad inputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .filter_frame  = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+    },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(amplify);
+
+AVFilter ff_vf_amplify = {
+    .name          = "amplify",
+    .description   = NULL_IF_CONFIG_SMALL("Amplify changes between successive video frames."),
+    .priv_size     = sizeof(AmplifyContext),
+    .priv_class    = &amplify_class,
+    .query_formats = query_formats,
+    .outputs       = outputs,
+    .inputs        = inputs,
+    .init          = init,
+    .uninit        = uninit,
+    .flags         = AVFILTER_FLAG_SLICE_THREADS,
+};

diff --git a/libavfilter/vf_aspect.c b/libavfilter/vf_aspect.c
index bf30824..c042698 100644
--- a/libavfilter/vf_aspect.c
+++ b/libavfilter/vf_aspect.c

@@ -61,35 +61,9 @@
     AVRational dar;
     AVRational sar;
     int max;
-#if FF_API_OLD_FILTER_OPTS
-    float aspect_den;
-#endif
     char *ratio_expr;
 } AspectContext;
 
-static av_cold int init(AVFilterContext *ctx)
-{
-#if FF_API_OLD_FILTER_OPTS
-    AspectContext *s = ctx->priv;
-    int ret;
-
-    if (s->ratio_expr && s->aspect_den > 0) {
-        double num;
-        av_log(ctx, AV_LOG_WARNING,
-               "num:den syntax is deprecated, please use num/den or named options instead\n");
-        ret = av_expr_parse_and_eval(&num, s->ratio_expr, NULL, NULL,
-                                     NULL, NULL, NULL, NULL, NULL, 0, ctx);
-        if (ret < 0) {
-            av_log(ctx, AV_LOG_ERROR, "Unable to parse ratio numerator \"%s\"\n", s->ratio_expr);
-            return AVERROR(EINVAL);
-        }
-        s->sar = s->dar = av_d2q(num / s->aspect_den, s->max);
-    }
-#endif
-
-    return 0;
-}
-
 static int filter_frame(AVFilterLink *link, AVFrame *frame)
 {
     AspectContext *s = link->dst->priv;
@@ -151,38 +125,34 @@
 
 #if CONFIG_SETDAR_FILTER
 
-static int setdar_config_props(AVFilterLink *inlink)
+static int setdar_config_props(AVFilterLink *outlink)
 {
-    AspectContext *s = inlink->dst->priv;
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AspectContext *s = ctx->priv;
     AVRational dar;
     AVRational old_dar;
     AVRational old_sar = inlink->sample_aspect_ratio;
     int ret;
 
-#if FF_API_OLD_FILTER_OPTS
-    if (!(s->ratio_expr && s->aspect_den > 0)) {
-#endif
     if ((ret = get_aspect_ratio(inlink, &s->dar)))
         return ret;
-#if FF_API_OLD_FILTER_OPTS
-    }
-#endif
 
     if (s->dar.num && s->dar.den) {
         av_reduce(&s->sar.num, &s->sar.den,
                    s->dar.num * inlink->h,
                    s->dar.den * inlink->w, INT_MAX);
-        inlink->sample_aspect_ratio = s->sar;
+        outlink->sample_aspect_ratio = s->sar;
         dar = s->dar;
     } else {
-        inlink->sample_aspect_ratio = (AVRational){ 1, 1 };
+        outlink->sample_aspect_ratio = (AVRational){ 1, 1 };
         dar = (AVRational){ inlink->w, inlink->h };
     }
 
     compute_dar(&old_dar, old_sar, inlink->w, inlink->h);
-    av_log(inlink->dst, AV_LOG_VERBOSE, "w:%d h:%d dar:%d/%d sar:%d/%d -> dar:%d/%d sar:%d/%d\n",
+    av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d dar:%d/%d sar:%d/%d -> dar:%d/%d sar:%d/%d\n",
            inlink->w, inlink->h, old_dar.num, old_dar.den, old_sar.num, old_sar.den,
-           dar.num, dar.den, inlink->sample_aspect_ratio.num, inlink->sample_aspect_ratio.den);
+           dar.num, dar.den, outlink->sample_aspect_ratio.num, outlink->sample_aspect_ratio.den);
 
     return 0;
 }
@@ -191,9 +161,6 @@
     { "dar",   "set display aspect ratio", OFFSET(ratio_expr), AV_OPT_TYPE_STRING, { .str = "0" }, .flags = FLAGS },
     { "ratio", "set display aspect ratio", OFFSET(ratio_expr), AV_OPT_TYPE_STRING, { .str = "0" }, .flags = FLAGS },
     { "r",     "set display aspect ratio", OFFSET(ratio_expr), AV_OPT_TYPE_STRING, { .str = "0" }, .flags = FLAGS },
-#if FF_API_OLD_FILTER_OPTS
-    { "dar_den", NULL, OFFSET(aspect_den), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, 0, FLT_MAX, FLAGS },
-#endif
     { "max",   "set max value for nominator or denominator in the ratio", OFFSET(max), AV_OPT_TYPE_INT, {.i64=100}, 1, INT_MAX, FLAGS },
     { NULL }
 };
@@ -204,7 +171,6 @@
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
-        .config_props = setdar_config_props,
         .filter_frame = filter_frame,
     },
     { NULL }
@@ -214,6 +180,7 @@
     {
         .name = "default",
         .type = AVMEDIA_TYPE_VIDEO,
+        .config_props = setdar_config_props,
     },
     { NULL }
 };
@@ -221,7 +188,6 @@
 AVFilter ff_vf_setdar = {
     .name        = "setdar",
     .description = NULL_IF_CONFIG_SMALL("Set the frame display aspect ratio."),
-    .init        = init,
     .priv_size   = sizeof(AspectContext),
     .priv_class  = &setdar_class,
     .inputs      = avfilter_vf_setdar_inputs,
@@ -232,29 +198,25 @@
 
 #if CONFIG_SETSAR_FILTER
 
-static int setsar_config_props(AVFilterLink *inlink)
+static int setsar_config_props(AVFilterLink *outlink)
 {
-    AspectContext *s = inlink->dst->priv;
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AspectContext *s = ctx->priv;
     AVRational old_sar = inlink->sample_aspect_ratio;
     AVRational old_dar, dar;
     int ret;
 
-#if FF_API_OLD_FILTER_OPTS
-    if (!(s->ratio_expr && s->aspect_den > 0)) {
-#endif
     if ((ret = get_aspect_ratio(inlink, &s->sar)))
         return ret;
-#if FF_API_OLD_FILTER_OPTS
-    }
-#endif
 
-    inlink->sample_aspect_ratio = s->sar;
+    outlink->sample_aspect_ratio = s->sar;
 
     compute_dar(&old_dar, old_sar, inlink->w, inlink->h);
     compute_dar(&dar, s->sar, inlink->w, inlink->h);
-    av_log(inlink->dst, AV_LOG_VERBOSE, "w:%d h:%d sar:%d/%d dar:%d/%d -> sar:%d/%d dar:%d/%d\n",
+    av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d sar:%d/%d dar:%d/%d -> sar:%d/%d dar:%d/%d\n",
            inlink->w, inlink->h, old_sar.num, old_sar.den, old_dar.num, old_dar.den,
-           inlink->sample_aspect_ratio.num, inlink->sample_aspect_ratio.den, dar.num, dar.den);
+           outlink->sample_aspect_ratio.num, outlink->sample_aspect_ratio.den, dar.num, dar.den);
 
     return 0;
 }
@@ -263,9 +225,6 @@
     { "sar",   "set sample (pixel) aspect ratio", OFFSET(ratio_expr), AV_OPT_TYPE_STRING, { .str = "0" }, .flags = FLAGS },
     { "ratio", "set sample (pixel) aspect ratio", OFFSET(ratio_expr), AV_OPT_TYPE_STRING, { .str = "0" }, .flags = FLAGS },
     { "r",     "set sample (pixel) aspect ratio", OFFSET(ratio_expr), AV_OPT_TYPE_STRING, { .str = "0" }, .flags = FLAGS },
-#if FF_API_OLD_FILTER_OPTS
-    { "sar_den", NULL, OFFSET(aspect_den), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, 0, FLT_MAX, FLAGS },
-#endif
     { "max",   "set max value for nominator or denominator in the ratio", OFFSET(max), AV_OPT_TYPE_INT, {.i64=100}, 1, INT_MAX, FLAGS },
     { NULL }
 };
@@ -276,7 +235,6 @@
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
-        .config_props = setsar_config_props,
         .filter_frame = filter_frame,
     },
     { NULL }
@@ -286,6 +244,7 @@
     {
         .name = "default",
         .type = AVMEDIA_TYPE_VIDEO,
+        .config_props = setsar_config_props,
     },
     { NULL }
 };
@@ -293,7 +252,6 @@
 AVFilter ff_vf_setsar = {
     .name        = "setsar",
     .description = NULL_IF_CONFIG_SMALL("Set the pixel sample aspect ratio."),
-    .init        = init,
     .priv_size   = sizeof(AspectContext),
     .priv_class  = &setsar_class,
     .inputs      = avfilter_vf_setsar_inputs,

diff --git a/libavfilter/vf_atadenoise.c b/libavfilter/vf_atadenoise.c
index 03b772c..b7d958b 100644
--- a/libavfilter/vf_atadenoise.c
+++ b/libavfilter/vf_atadenoise.c

@@ -83,6 +83,7 @@
         AV_PIX_FMT_GRAY9,
         AV_PIX_FMT_GRAY10,
         AV_PIX_FMT_GRAY12,
+        AV_PIX_FMT_GRAY14,
         AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
@@ -112,8 +113,8 @@
     ATADenoiseContext *s = ctx->priv;
 
     if (!(s->size & 1)) {
-        av_log(ctx, AV_LOG_ERROR, "size %d is invalid. Must be an odd value.\n", s->size);
-        return AVERROR(EINVAL);
+        av_log(ctx, AV_LOG_WARNING, "size %d is invalid. Must be an odd value, setting it to %d.\n", s->size, s->size|1);
+        s->size |= 1;
     }
     s->mid = s->size / 2 + 1;
 

diff --git a/libavfilter/vf_avgblur.c b/libavfilter/vf_avgblur.c
index afd4a6a..c7b8842 100644
--- a/libavfilter/vf_avgblur.c
+++ b/libavfilter/vf_avgblur.c

@@ -242,7 +242,7 @@
         AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
         AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
         AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_NONE
     };
 

diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_opencl.c
new file mode 100644
index 0000000..f0e5f01
--- /dev/null
+++ b/libavfilter/vf_avgblur_opencl.c

@@ -0,0 +1,395 @@
+/*
+ * Copyright (c) 2018 Dylan Fernando
+ * Copyright (c) 2018 Danil Iashchenko
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "internal.h"
+#include "opencl.h"
+#include "opencl_source.h"
+#include "video.h"
+#include "boxblur.h"
+
+typedef struct AverageBlurOpenCLContext {
+    OpenCLFilterContext ocf;
+
+    int              initialised;
+    cl_kernel        kernel_horiz;
+    cl_kernel        kernel_vert;
+    cl_command_queue command_queue;
+
+    int radiusH;
+    int radiusV;
+    int planes;
+
+    FilterParam luma_param;
+    FilterParam chroma_param;
+    FilterParam alpha_param;
+    int radius[4];
+    int power[4];
+
+} AverageBlurOpenCLContext;
+
+
+static int avgblur_opencl_init(AVFilterContext *avctx)
+{
+    AverageBlurOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+    int err;
+
+    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_avgblur, 1);
+    if (err < 0)
+        goto fail;
+
+    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+                                              ctx->ocf.hwctx->device_id,
+                                              0, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
+                     "command queue %d.\n", cle);
+
+    ctx->kernel_horiz = clCreateKernel(ctx->ocf.program,"avgblur_horiz", &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create horizontal "
+                     "kernel %d.\n", cle);
+
+    ctx->kernel_vert = clCreateKernel(ctx->ocf.program,"avgblur_vert", &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create vertical "
+                     "kernel %d.\n", cle);
+
+    ctx->initialised = 1;
+    return 0;
+
+fail:
+    if (ctx->command_queue)
+        clReleaseCommandQueue(ctx->command_queue);
+    if (ctx->kernel_horiz)
+        clReleaseKernel(ctx->kernel_horiz);
+    if (ctx->kernel_vert)
+        clReleaseKernel(ctx->kernel_vert);
+    return err;
+}
+
+
+static int avgblur_opencl_make_filter_params(AVFilterLink *inlink)
+{
+    AVFilterContext    *ctx = inlink->dst;
+    AverageBlurOpenCLContext *s = ctx->priv;
+    int i;
+
+    if (s->radiusV <= 0) {
+        s->radiusV = s->radiusH;
+    }
+
+    for (i = 0; i < 4; i++) {
+        s->power[i] = 1;
+    }
+    return 0;
+}
+
+
+static int boxblur_opencl_make_filter_params(AVFilterLink *inlink)
+{
+    AVFilterContext    *ctx = inlink->dst;
+    AverageBlurOpenCLContext *s = ctx->priv;
+    int err, i;
+
+    err = ff_boxblur_eval_filter_params(inlink,
+                                        &s->luma_param,
+                                        &s->chroma_param,
+                                        &s->alpha_param);
+
+    if (err != 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to evaluate "
+               "filter params: %d.\n", err);
+        return err;
+    }
+
+    s->radius[Y] = s->luma_param.radius;
+    s->radius[U] = s->radius[V] = s->chroma_param.radius;
+    s->radius[A] = s->alpha_param.radius;
+
+    s->power[Y] = s->luma_param.power;
+    s->power[U] = s->power[V] = s->chroma_param.power;
+    s->power[A] = s->alpha_param.power;
+
+    for (i = 0; i < 4; i++) {
+        if (s->power[i] == 0) {
+            s->power[i] = 1;
+            s->radius[i] = 0;
+        }
+    }
+
+    return 0;
+}
+
+
+static int avgblur_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+    AVFilterContext    *avctx = inlink->dst;
+    AVFilterLink     *outlink = avctx->outputs[0];
+    AverageBlurOpenCLContext *ctx = avctx->priv;
+    AVFrame *output = NULL;
+    AVFrame *intermediate = NULL;
+    cl_int cle;
+    size_t global_work[2];
+    cl_mem src, dst, inter;
+    int err, p, radius_x, radius_y, i;
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(input->format),
+           input->width, input->height, input->pts);
+
+    if (!input->hw_frames_ctx)
+        return AVERROR(EINVAL);
+
+    if (!ctx->initialised) {
+        err = avgblur_opencl_init(avctx);
+        if (err < 0)
+            goto fail;
+
+        if (!strcmp(avctx->filter->name, "avgblur_opencl")) {
+            err = avgblur_opencl_make_filter_params(inlink);
+            if (err < 0)
+                goto fail;
+        } else if (!strcmp(avctx->filter->name, "boxblur_opencl")) {
+            err = boxblur_opencl_make_filter_params(inlink);
+            if (err < 0)
+                goto fail;
+        }
+
+    }
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+    intermediate = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!intermediate) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
+        src = (cl_mem) input->data[p];
+        dst = (cl_mem) output->data[p];
+        inter = (cl_mem)intermediate->data[p];
+
+        if (!dst)
+            break;
+
+        radius_x = ctx->radiusH;
+        radius_y = ctx->radiusV;
+
+        if (!(ctx->planes & (1 << p))) {
+            radius_x = 0;
+            radius_y = 0;
+        }
+
+        for (i = 0; i < ctx->power[p]; i++) {
+            CL_SET_KERNEL_ARG(ctx->kernel_horiz, 0, cl_mem, &inter);
+            CL_SET_KERNEL_ARG(ctx->kernel_horiz, 1, cl_mem, i == 0 ? &src : &dst);
+            if (!strcmp(avctx->filter->name, "avgblur_opencl")) {
+                CL_SET_KERNEL_ARG(ctx->kernel_horiz, 2, cl_int, &radius_x);
+            } else if (!strcmp(avctx->filter->name, "boxblur_opencl")) {
+                CL_SET_KERNEL_ARG(ctx->kernel_horiz, 2, cl_int, &ctx->radius[p]);
+            }
+
+            err = ff_opencl_filter_work_size_from_image(avctx, global_work,
+                                                        i == 0 ? intermediate : output, p, 0);
+            if (err < 0)
+                goto fail;
+
+            av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
+                   "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
+                   p, global_work[0], global_work[1]);
+
+            cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_horiz, 2, NULL,
+                                         global_work, NULL,
+                                         0, NULL, NULL);
+            CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue horizontal "
+                             "kernel: %d.\n", cle);
+
+            err = ff_opencl_filter_work_size_from_image(avctx, global_work,
+                                                        i == 0 ? output : intermediate, p, 0);
+
+            CL_SET_KERNEL_ARG(ctx->kernel_vert, 0, cl_mem, &dst);
+            CL_SET_KERNEL_ARG(ctx->kernel_vert, 1, cl_mem, &inter);
+
+            if (!strcmp(avctx->filter->name, "avgblur_opencl")) {
+                CL_SET_KERNEL_ARG(ctx->kernel_vert, 2, cl_int, &radius_y);
+            } else if (!strcmp(avctx->filter->name, "boxblur_opencl")) {
+                CL_SET_KERNEL_ARG(ctx->kernel_vert, 2, cl_int, &ctx->radius[p]);
+            }
+
+            cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_vert, 2, NULL,
+                                         global_work, NULL,
+                                         0, NULL, NULL);
+            CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue vertical "
+                             "kernel: %d.\n", cle);
+        }
+    }
+
+    cle = clFinish(ctx->command_queue);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
+
+    err = av_frame_copy_props(output, input);
+    if (err < 0)
+        goto fail;
+
+    av_frame_free(&input);
+    av_frame_free(&intermediate);
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output->format),
+           output->width, output->height, output->pts);
+
+    return ff_filter_frame(outlink, output);
+
+fail:
+    clFinish(ctx->command_queue);
+    av_frame_free(&input);
+    av_frame_free(&output);
+    av_frame_free(&intermediate);
+    return err;
+}
+
+
+static av_cold void avgblur_opencl_uninit(AVFilterContext *avctx)
+{
+    AverageBlurOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+
+    if (ctx->kernel_horiz) {
+        cle = clReleaseKernel(ctx->kernel_horiz);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->kernel_vert) {
+        cle = clReleaseKernel(ctx->kernel_vert);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->command_queue) {
+        cle = clReleaseCommandQueue(ctx->command_queue);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "command queue: %d.\n", cle);
+    }
+
+    ff_opencl_filter_uninit(avctx);
+}
+
+
+static const AVFilterPad avgblur_opencl_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &avgblur_opencl_filter_frame,
+        .config_props = &ff_opencl_filter_config_input,
+    },
+    { NULL }
+};
+
+
+static const AVFilterPad avgblur_opencl_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_opencl_filter_config_output,
+    },
+    { NULL }
+};
+
+
+#define OFFSET(x) offsetof(AverageBlurOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+
+#if CONFIG_AVGBLUR_OPENCL_FILTER
+
+static const AVOption avgblur_opencl_options[] = {
+    { "sizeX",  "set horizontal size",  OFFSET(radiusH), AV_OPT_TYPE_INT, {.i64=1},   1, 1024, FLAGS },
+    { "planes", "set planes to filter", OFFSET(planes),  AV_OPT_TYPE_INT, {.i64=0xF}, 0,  0xF, FLAGS },
+    { "sizeY",  "set vertical size",    OFFSET(radiusV), AV_OPT_TYPE_INT, {.i64=0},   0, 1024, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(avgblur_opencl);
+
+
+AVFilter ff_vf_avgblur_opencl = {
+    .name           = "avgblur_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Apply average blur filter"),
+    .priv_size      = sizeof(AverageBlurOpenCLContext),
+    .priv_class     = &avgblur_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &avgblur_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = avgblur_opencl_inputs,
+    .outputs        = avgblur_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif /* CONFIG_AVGBLUR_OPENCL_FILTER */
+
+
+#if CONFIG_BOXBLUR_OPENCL_FILTER
+
+static const AVOption boxblur_opencl_options[] = {
+    { "luma_radius", "Radius of the luma blurring box", OFFSET(luma_param.radius_expr), AV_OPT_TYPE_STRING, {.str="2"}, .flags = FLAGS },
+    { "lr",          "Radius of the luma blurring box", OFFSET(luma_param.radius_expr), AV_OPT_TYPE_STRING, {.str="2"}, .flags = FLAGS },
+    { "luma_power",  "How many times should the boxblur be applied to luma",  OFFSET(luma_param.power), AV_OPT_TYPE_INT, {.i64=2}, 0, INT_MAX, .flags = FLAGS },
+    { "lp",          "How many times should the boxblur be applied to luma",  OFFSET(luma_param.power), AV_OPT_TYPE_INT, {.i64=2}, 0, INT_MAX, .flags = FLAGS },
+
+    { "chroma_radius", "Radius of the chroma blurring box", OFFSET(chroma_param.radius_expr), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
+    { "cr",            "Radius of the chroma blurring box", OFFSET(chroma_param.radius_expr), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
+    { "chroma_power",  "How many times should the boxblur be applied to chroma",  OFFSET(chroma_param.power), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX, .flags = FLAGS },
+    { "cp",            "How many times should the boxblur be applied to chroma",  OFFSET(chroma_param.power), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX, .flags = FLAGS },
+
+    { "alpha_radius", "Radius of the alpha blurring box", OFFSET(alpha_param.radius_expr), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
+    { "ar",           "Radius of the alpha blurring box", OFFSET(alpha_param.radius_expr), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
+    { "alpha_power",  "How many times should the boxblur be applied to alpha",  OFFSET(alpha_param.power), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX, .flags = FLAGS },
+    { "ap",           "How many times should the boxblur be applied to alpha",  OFFSET(alpha_param.power), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX, .flags = FLAGS },
+
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(boxblur_opencl);
+
+AVFilter ff_vf_boxblur_opencl = {
+    .name           = "boxblur_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Apply boxblur filter to input video"),
+    .priv_size      = sizeof(AverageBlurOpenCLContext),
+    .priv_class     = &boxblur_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &avgblur_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = avgblur_opencl_inputs,
+    .outputs        = avgblur_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif /* CONFIG_BOXBLUR_OPENCL_FILTER */

diff --git a/libavfilter/vf_bitplanenoise.c b/libavfilter/vf_bitplanenoise.c
index dd6864b..4ec3a22 100644
--- a/libavfilter/vf_bitplanenoise.c
+++ b/libavfilter/vf_bitplanenoise.c

@@ -63,7 +63,7 @@
         AV_PIX_FMT_YUV444P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV420P16,
         AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
         AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_NONE
     };
 

diff --git a/libavfilter/vf_blend.c b/libavfilter/vf_blend.c
index 054c0d5..e83a0db 100644
--- a/libavfilter/vf_blend.c
+++ b/libavfilter/vf_blend.c

@@ -524,19 +524,12 @@
             av_log(ctx, AV_LOG_ERROR, "inputs must be of same pixel format\n");
             return AVERROR(EINVAL);
         }
-        if (toplink->w                       != bottomlink->w ||
-            toplink->h                       != bottomlink->h ||
-            toplink->sample_aspect_ratio.num != bottomlink->sample_aspect_ratio.num ||
-            toplink->sample_aspect_ratio.den != bottomlink->sample_aspect_ratio.den) {
+        if (toplink->w != bottomlink->w || toplink->h != bottomlink->h) {
             av_log(ctx, AV_LOG_ERROR, "First input link %s parameters "
-                   "(size %dx%d, SAR %d:%d) do not match the corresponding "
-                   "second input link %s parameters (%dx%d, SAR %d:%d)\n",
+                   "(size %dx%d) do not match the corresponding "
+                   "second input link %s parameters (size %dx%d)\n",
                    ctx->input_pads[TOP].name, toplink->w, toplink->h,
-                   toplink->sample_aspect_ratio.num,
-                   toplink->sample_aspect_ratio.den,
-                   ctx->input_pads[BOTTOM].name, bottomlink->w, bottomlink->h,
-                   bottomlink->sample_aspect_ratio.num,
-                   bottomlink->sample_aspect_ratio.den);
+                   ctx->input_pads[BOTTOM].name, bottomlink->w, bottomlink->h);
             return AVERROR(EINVAL);
         }
     }
@@ -633,11 +626,17 @@
 
 static int tblend_filter_frame(AVFilterLink *inlink, AVFrame *frame)
 {
-    BlendContext *s = inlink->dst->priv;
-    AVFilterLink *outlink = inlink->dst->outputs[0];
+    AVFilterContext *ctx = inlink->dst;
+    BlendContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
 
     if (s->prev_frame) {
-        AVFrame *out = blend_frame(inlink->dst, frame, s->prev_frame);
+        AVFrame *out;
+
+        if (ctx->is_disabled)
+            out = av_frame_clone(frame);
+        else
+            out = blend_frame(ctx, frame, s->prev_frame);
         av_frame_free(&s->prev_frame);
         s->prev_frame = frame;
         return ff_filter_frame(outlink, out);
@@ -681,7 +680,7 @@
     .uninit        = uninit,
     .inputs        = tblend_inputs,
     .outputs       = tblend_outputs,
-    .flags         = AVFILTER_FLAG_SLICE_THREADS,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
 };
 
 #endif

diff --git a/libavfilter/vf_bm3d.c b/libavfilter/vf_bm3d.c
new file mode 100644
index 0000000..75c3567
--- /dev/null
+++ b/libavfilter/vf_bm3d.c

@@ -0,0 +1,1077 @@
+/*
+ * Copyright (c) 2015-2016 mawen1250
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * @todo
+ * - non-power of 2 DCT
+ * - opponent color space
+ * - temporal support
+ */
+
+#include <float.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavcodec/avfft.h"
+#include "avfilter.h"
+#include "filters.h"
+#include "formats.h"
+#include "framesync.h"
+#include "internal.h"
+#include "video.h"
+
+#define MAX_NB_THREADS 32
+
+enum FilterModes {
+    BASIC,
+    FINAL,
+    NB_MODES,
+};
+
+typedef struct ThreadData {
+    const uint8_t *src;
+    int src_linesize;
+    const uint8_t *ref;
+    int ref_linesize;
+    int plane;
+} ThreadData;
+
+typedef struct PosCode {
+    int x, y;
+} PosCode;
+
+typedef struct PosPairCode {
+    double score;
+    int x, y;
+} PosPairCode;
+
+typedef struct SliceContext {
+    DCTContext *gdctf, *gdcti;
+    DCTContext *dctf, *dcti;
+    FFTSample *bufferh;
+    FFTSample *bufferv;
+    FFTSample *bufferz;
+    FFTSample *buffer;
+    FFTSample *rbufferh;
+    FFTSample *rbufferv;
+    FFTSample *rbufferz;
+    FFTSample *rbuffer;
+    float *num, *den;
+    PosPairCode match_blocks[256];
+    int nb_match_blocks;
+    PosCode *search_positions;
+} SliceContext;
+
+typedef struct BM3DContext {
+    const AVClass *class;
+
+    float sigma;
+    int block_size;
+    int block_step;
+    int group_size;
+    int bm_range;
+    int bm_step;
+    float th_mse;
+    float hard_threshold;
+    int mode;
+    int ref;
+    int planes;
+
+    int depth;
+    int max;
+    int nb_planes;
+    int planewidth[4];
+    int planeheight[4];
+    int group_bits;
+    int pgroup_size;
+
+    SliceContext slices[MAX_NB_THREADS];
+
+    FFFrameSync fs;
+    int nb_threads;
+
+    void (*get_block_row)(const uint8_t *srcp, int src_linesize,
+                          int y, int x, int block_size, float *dst);
+    double (*do_block_ssd)(struct BM3DContext *s, PosCode *pos,
+                           const uint8_t *src, int src_stride,
+                           int r_y, int r_x);
+    void (*do_output)(struct BM3DContext *s, uint8_t *dst, int dst_linesize,
+                      int plane, int nb_jobs);
+    void (*block_filtering)(struct BM3DContext *s,
+                            const uint8_t *src, int src_linesize,
+                            const uint8_t *ref, int ref_linesize,
+                            int y, int x, int plane, int jobnr);
+} BM3DContext;
+
+#define OFFSET(x) offsetof(BM3DContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+static const AVOption bm3d_options[] = {
+    { "sigma",  "set denoising strength",
+        OFFSET(sigma),          AV_OPT_TYPE_FLOAT, {.dbl=1},     0,      99999.9, FLAGS },
+    { "block",  "set log2(size) of local patch",
+        OFFSET(block_size),     AV_OPT_TYPE_INT,   {.i64=4},     4,            6, FLAGS },
+    { "bstep",  "set sliding step for processing blocks",
+        OFFSET(block_step),     AV_OPT_TYPE_INT,   {.i64=4},     1,           64, FLAGS },
+    { "group",  "set maximal number of similar blocks",
+        OFFSET(group_size),     AV_OPT_TYPE_INT,   {.i64=1},     1,          256, FLAGS },
+    { "range",  "set block matching range",
+        OFFSET(bm_range),       AV_OPT_TYPE_INT,   {.i64=9},     1,    INT32_MAX, FLAGS },
+    { "mstep",  "set step for block matching",
+        OFFSET(bm_step),        AV_OPT_TYPE_INT,   {.i64=1},     1,           64, FLAGS },
+    { "thmse",  "set threshold of mean square error for block matching",
+        OFFSET(th_mse),         AV_OPT_TYPE_FLOAT, {.dbl=0},     0,    INT32_MAX, FLAGS },
+    { "hdthr",  "set hard threshold for 3D transfer domain",
+        OFFSET(hard_threshold), AV_OPT_TYPE_FLOAT, {.dbl=2.7},   0,    INT32_MAX, FLAGS },
+    { "estim",  "set filtering estimation mode",
+        OFFSET(mode),           AV_OPT_TYPE_INT,   {.i64=BASIC}, 0,   NB_MODES-1, FLAGS, "mode" },
+    { "basic",  "basic estimate",
+        0,                      AV_OPT_TYPE_CONST, {.i64=BASIC}, 0,            0, FLAGS, "mode" },
+    { "final",  "final estimate",
+        0,                      AV_OPT_TYPE_CONST, {.i64=FINAL}, 0,            0, FLAGS, "mode" },
+    { "ref",    "have reference stream",
+        OFFSET(ref),            AV_OPT_TYPE_INT,    {.i64=0},    0,            1, FLAGS },
+    { "planes", "set planes to filter",
+        OFFSET(planes),         AV_OPT_TYPE_INT,   {.i64=7},     0,           15, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(bm3d);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_GRAY9,   AV_PIX_FMT_GRAY10,
+        AV_PIX_FMT_GRAY12,  AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
+        AV_PIX_FMT_YUVJ411P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUV440P10,
+        AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV420P12,
+        AV_PIX_FMT_YUV440P12,
+        AV_PIX_FMT_YUV444P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV420P14,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static int do_search_boundary(int pos, int plane_boundary, int search_range, int search_step)
+{
+    int search_boundary;
+
+    search_range = search_range / search_step * search_step;
+
+    if (pos == plane_boundary) {
+        search_boundary = plane_boundary;
+    } else if (pos > plane_boundary) {
+        search_boundary = pos - search_range;
+
+        while (search_boundary < plane_boundary) {
+            search_boundary += search_step;
+        }
+    } else {
+        search_boundary = pos + search_range;
+
+        while (search_boundary > plane_boundary) {
+            search_boundary -= search_step;
+        }
+    }
+
+    return search_boundary;
+}
+
+static int search_boundary(int plane_boundary, int search_range, int search_step, int vertical, int y, int x)
+{
+    return do_search_boundary(vertical ? y : x, plane_boundary, search_range, search_step);
+}
+
+static int cmp_scores(const void *a, const void *b)
+{
+    const struct PosPairCode *pair1 = a;
+    const struct PosPairCode *pair2 = b;
+    return FFDIFFSIGN(pair1->score, pair2->score);
+}
+
+static double do_block_ssd(BM3DContext *s, PosCode *pos, const uint8_t *src, int src_stride, int r_y, int r_x)
+{
+    const uint8_t *srcp = src + pos->y * src_stride + pos->x;
+    const uint8_t *refp = src + r_y * src_stride + r_x;
+    const int block_size = s->block_size;
+    double dist = 0.;
+    int x, y;
+
+    for (y = 0; y < block_size; y++) {
+        for (x = 0; x < block_size; x++) {
+            double temp = refp[x] - srcp[x];
+            dist += temp * temp;
+        }
+
+        srcp += src_stride;
+        refp += src_stride;
+    }
+
+    return dist;
+}
+
+static double do_block_ssd16(BM3DContext *s, PosCode *pos, const uint8_t *src, int src_stride, int r_y, int r_x)
+{
+    const uint16_t *srcp = (uint16_t *)src + pos->y * src_stride / 2 + pos->x;
+    const uint16_t *refp = (uint16_t *)src + r_y * src_stride / 2 + r_x;
+    const int block_size = s->block_size;
+    double dist = 0.;
+    int x, y;
+
+    for (y = 0; y < block_size; y++) {
+        for (x = 0; x < block_size; x++) {
+            double temp = refp[x] - srcp[x];
+            dist += temp * temp;
+        }
+
+        srcp += src_stride / 2;
+        refp += src_stride / 2;
+    }
+
+    return dist;
+}
+
+static void do_block_matching_multi(BM3DContext *s, const uint8_t *src, int src_stride, int src_range,
+                                    const PosCode *search_pos, int search_size, float th_mse,
+                                    int r_y, int r_x, int plane, int jobnr)
+{
+    SliceContext *sc = &s->slices[jobnr];
+    double MSE2SSE = s->group_size * s->block_size * s->block_size * src_range * src_range / (s->max * s->max);
+    double distMul = 1. / MSE2SSE;
+    double th_sse = th_mse * MSE2SSE;
+    int i, index = sc->nb_match_blocks;
+
+    for (i = 0; i < search_size; i++) {
+        PosCode pos = search_pos[i];
+        double dist;
+
+        dist = s->do_block_ssd(s, &pos, src, src_stride, r_y, r_x);
+
+        // Only match similar blocks but not identical blocks
+        if (dist <= th_sse && dist != 0) {
+            const double score = dist * distMul;
+
+            if (index >= s->group_size && score >= sc->match_blocks[index - 1].score) {
+                continue;
+            }
+
+            if (index >= s->group_size)
+                index = s->group_size - 1;
+
+            sc->match_blocks[index].score = score;
+            sc->match_blocks[index].y = pos.y;
+            sc->match_blocks[index].x = pos.x;
+            index++;
+            qsort(sc->match_blocks, index, sizeof(PosPairCode), cmp_scores);
+        }
+    }
+
+    sc->nb_match_blocks = index;
+}
+
+static void block_matching_multi(BM3DContext *s, const uint8_t *ref, int ref_linesize, int y, int x,
+                                 int exclude_cur_pos, int plane, int jobnr)
+{
+    SliceContext *sc = &s->slices[jobnr];
+    const int width = s->planewidth[plane];
+    const int height = s->planeheight[plane];
+    const int block_size = s->block_size;
+    const int step = s->bm_step;
+    const int range = s->bm_range / step * step;
+    int l = search_boundary(0, range, step, 0, y, x);
+    int r = search_boundary(width - block_size, range, step, 0, y, x);
+    int t = search_boundary(0, range, step, 1, y, x);
+    int b = search_boundary(height - block_size, range, step, 1, y, x);
+    int j, i, index = 0;
+
+    for (j = t; j <= b; j += step) {
+        for (i = l; i <= r; i += step) {
+            PosCode pos;
+
+            if (exclude_cur_pos > 0 && j == y && i == x) {
+                continue;
+            }
+
+            pos.y = j;
+            pos.x = i;
+            sc->search_positions[index++] = pos;
+        }
+    }
+
+    if (exclude_cur_pos == 1) {
+        sc->match_blocks[0].score = 0;
+        sc->match_blocks[0].y = y;
+        sc->match_blocks[0].x = x;
+        sc->nb_match_blocks = 1;
+    }
+
+    do_block_matching_multi(s, ref, ref_linesize, s->bm_range,
+                            sc->search_positions, index, s->th_mse, y, x, plane, jobnr);
+}
+
+static void block_matching(BM3DContext *s, const uint8_t *ref, int ref_linesize,
+                           int j, int i, int plane, int jobnr)
+{
+    SliceContext *sc = &s->slices[jobnr];
+
+    if (s->group_size == 1 || s->th_mse <= 0.f) {
+        sc->match_blocks[0].score = 1;
+        sc->match_blocks[0].x = i;
+        sc->match_blocks[0].y = j;
+        sc->nb_match_blocks = 1;
+        return;
+    }
+
+    sc->nb_match_blocks = 0;
+    block_matching_multi(s, ref, ref_linesize, j, i, 1, plane, jobnr);
+}
+
+static void get_block_row(const uint8_t *srcp, int src_linesize,
+                          int y, int x, int block_size, float *dst)
+{
+    const uint8_t *src = srcp + y * src_linesize + x;
+    int j;
+
+    for (j = 0; j < block_size; j++) {
+        dst[j] = src[j];
+    }
+}
+
+static void get_block_row16(const uint8_t *srcp, int src_linesize,
+                            int y, int x, int block_size, float *dst)
+{
+    const uint16_t *src = (uint16_t *)srcp + y * src_linesize / 2 + x;
+    int j;
+
+    for (j = 0; j < block_size; j++) {
+        dst[j] = src[j];
+    }
+}
+
+static void basic_block_filtering(BM3DContext *s, const uint8_t *src, int src_linesize,
+                                  const uint8_t *ref, int ref_linesize,
+                                  int y, int x, int plane, int jobnr)
+{
+    SliceContext *sc = &s->slices[jobnr];
+    const int buffer_linesize = s->block_size * s->block_size;
+    const int nb_match_blocks = sc->nb_match_blocks;
+    const int block_size = s->block_size;
+    const int width = s->planewidth[plane];
+    const int pgroup_size = s->pgroup_size;
+    const int group_size = s->group_size;
+    float *buffer = sc->buffer;
+    float *bufferh = sc->bufferh;
+    float *bufferv = sc->bufferv;
+    float *bufferz = sc->bufferz;
+    float threshold[4];
+    float den_weight, num_weight;
+    int retained = 0;
+    int i, j, k;
+
+    for (k = 0; k < nb_match_blocks; k++) {
+        const int y = sc->match_blocks[k].y;
+        const int x = sc->match_blocks[k].x;
+
+        for (i = 0; i < block_size; i++) {
+            s->get_block_row(src, src_linesize, y + i, x, block_size, bufferh + block_size * i);
+            av_dct_calc(sc->dctf, bufferh + block_size * i);
+        }
+
+        for (i = 0; i < block_size; i++) {
+            for (j = 0; j < block_size; j++) {
+                bufferv[i * block_size + j] = bufferh[j * block_size + i];
+            }
+            av_dct_calc(sc->dctf, bufferv + i * block_size);
+        }
+
+        for (i = 0; i < block_size; i++) {
+            memcpy(buffer + k * buffer_linesize + i * block_size,
+                   bufferv + i * block_size, block_size * 4);
+        }
+    }
+
+    for (i = 0; i < block_size; i++) {
+        for (j = 0; j < block_size; j++) {
+            for (k = 0; k < nb_match_blocks; k++)
+                bufferz[k] = buffer[buffer_linesize * k + i * block_size + j];
+            if (group_size > 1)
+                av_dct_calc(sc->gdctf, bufferz);
+            bufferz += pgroup_size;
+        }
+    }
+
+    threshold[0] = s->hard_threshold * s->sigma;
+    threshold[1] = threshold[0] * sqrtf(2.f);
+    threshold[2] = threshold[0] * 2.f;
+    threshold[3] = threshold[0] * sqrtf(8.f);
+    bufferz = sc->bufferz;
+
+    for (i = 0; i < block_size; i++) {
+        for (j = 0; j < block_size; j++) {
+            for (k = 0; k < nb_match_blocks; k++) {
+                const float thresh = threshold[(j == 0) + (i == 0) + (k == 0)];
+
+                if (bufferz[k] > thresh || bufferz[k] < -thresh) {
+                    retained++;
+                } else {
+                    bufferz[k] = 0;
+                }
+            }
+            bufferz += pgroup_size;
+        }
+    }
+
+    bufferz = sc->bufferz;
+    buffer = sc->buffer;
+    for (i = 0; i < block_size; i++) {
+        for (j = 0; j < block_size; j++) {
+            if (group_size > 1)
+                av_dct_calc(sc->gdcti, bufferz);
+            for (k = 0; k < nb_match_blocks; k++) {
+                buffer[buffer_linesize * k + i * block_size + j] = bufferz[k];
+            }
+            bufferz += pgroup_size;
+        }
+    }
+
+    den_weight = retained < 1 ? 1.f : 1.f / retained;
+    num_weight = den_weight;
+
+    buffer = sc->buffer;
+    for (k = 0; k < nb_match_blocks; k++) {
+        float *num = sc->num + y * width + x;
+        float *den = sc->den + y * width + x;
+
+        for (i = 0; i < block_size; i++) {
+            memcpy(bufferv + i * block_size,
+                   buffer + k * buffer_linesize + i * block_size,
+                   block_size * 4);
+        }
+
+        for (i = 0; i < block_size; i++) {
+            av_dct_calc(sc->dcti, bufferv + block_size * i);
+            for (j = 0; j < block_size; j++) {
+                bufferh[j * block_size + i] = bufferv[i * block_size + j];
+            }
+        }
+
+        for (i = 0; i < block_size; i++) {
+            av_dct_calc(sc->dcti, bufferh + block_size * i);
+            for (j = 0; j < block_size; j++) {
+                num[j] += bufferh[i * block_size + j] * num_weight;
+                den[j] += den_weight;
+            }
+            num += width;
+            den += width;
+        }
+    }
+}
+
+static void final_block_filtering(BM3DContext *s, const uint8_t *src, int src_linesize,
+                                  const uint8_t *ref, int ref_linesize,
+                                  int y, int x, int plane, int jobnr)
+{
+    SliceContext *sc = &s->slices[jobnr];
+    const int buffer_linesize = s->block_size * s->block_size;
+    const int nb_match_blocks = sc->nb_match_blocks;
+    const int block_size = s->block_size;
+    const int width = s->planewidth[plane];
+    const int pgroup_size = s->pgroup_size;
+    const int group_size = s->group_size;
+    const float sigma_sqr = s->sigma * s->sigma;
+    float *buffer = sc->buffer;
+    float *bufferh = sc->bufferh;
+    float *bufferv = sc->bufferv;
+    float *bufferz = sc->bufferz;
+    float *rbuffer = sc->rbuffer;
+    float *rbufferh = sc->rbufferh;
+    float *rbufferv = sc->rbufferv;
+    float *rbufferz = sc->rbufferz;
+    float den_weight, num_weight;
+    float l2_wiener = 0;
+    int i, j, k;
+
+    for (k = 0; k < nb_match_blocks; k++) {
+        const int y = sc->match_blocks[k].y;
+        const int x = sc->match_blocks[k].x;
+
+        for (i = 0; i < block_size; i++) {
+            s->get_block_row(src, src_linesize, y + i, x, block_size, bufferh + block_size * i);
+            s->get_block_row(ref, ref_linesize, y + i, x, block_size, rbufferh + block_size * i);
+            av_dct_calc(sc->dctf, bufferh + block_size * i);
+            av_dct_calc(sc->dctf, rbufferh + block_size * i);
+        }
+
+        for (i = 0; i < block_size; i++) {
+            for (j = 0; j < block_size; j++) {
+                bufferv[i * block_size + j] = bufferh[j * block_size + i];
+                rbufferv[i * block_size + j] = rbufferh[j * block_size + i];
+            }
+            av_dct_calc(sc->dctf, bufferv + i * block_size);
+            av_dct_calc(sc->dctf, rbufferv + i * block_size);
+        }
+
+        for (i = 0; i < block_size; i++) {
+            memcpy(buffer + k * buffer_linesize + i * block_size,
+                   bufferv + i * block_size, block_size * 4);
+            memcpy(rbuffer + k * buffer_linesize + i * block_size,
+                   rbufferv + i * block_size, block_size * 4);
+        }
+    }
+
+    for (i = 0; i < block_size; i++) {
+        for (j = 0; j < block_size; j++) {
+            for (k = 0; k < nb_match_blocks; k++) {
+                bufferz[k] = buffer[buffer_linesize * k + i * block_size + j];
+                rbufferz[k] = rbuffer[buffer_linesize * k + i * block_size + j];
+            }
+            if (group_size > 1) {
+                av_dct_calc(sc->gdctf, bufferz);
+                av_dct_calc(sc->gdctf, rbufferz);
+            }
+            bufferz += pgroup_size;
+            rbufferz += pgroup_size;
+        }
+    }
+
+    bufferz = sc->bufferz;
+    rbufferz = sc->rbufferz;
+
+    for (i = 0; i < block_size; i++) {
+        for (j = 0; j < block_size; j++) {
+            for (k = 0; k < nb_match_blocks; k++) {
+                const float ref_sqr = rbufferz[k] * rbufferz[k];
+                float wiener_coef = ref_sqr / (ref_sqr + sigma_sqr);
+
+                if (isnan(wiener_coef))
+                   wiener_coef = 1;
+                bufferz[k] *= wiener_coef;
+                l2_wiener += wiener_coef * wiener_coef;
+            }
+            bufferz += pgroup_size;
+            rbufferz += pgroup_size;
+        }
+    }
+
+    bufferz = sc->bufferz;
+    buffer = sc->buffer;
+    for (i = 0; i < block_size; i++) {
+        for (j = 0; j < block_size; j++) {
+            if (group_size > 1)
+                av_dct_calc(sc->gdcti, bufferz);
+            for (k = 0; k < nb_match_blocks; k++) {
+                buffer[buffer_linesize * k + i * block_size + j] = bufferz[k];
+            }
+            bufferz += pgroup_size;
+        }
+    }
+
+    l2_wiener = FFMAX(l2_wiener, 1e-15f);
+    den_weight = 1.f / l2_wiener;
+    num_weight = den_weight;
+
+    for (k = 0; k < nb_match_blocks; k++) {
+        float *num = sc->num + y * width + x;
+        float *den = sc->den + y * width + x;
+
+        for (i = 0; i < block_size; i++) {
+            memcpy(bufferv + i * block_size,
+                   buffer + k * buffer_linesize + i * block_size,
+                   block_size * 4);
+        }
+
+        for (i = 0; i < block_size; i++) {
+            av_dct_calc(sc->dcti, bufferv + block_size * i);
+            for (j = 0; j < block_size; j++) {
+                bufferh[j * block_size + i] = bufferv[i * block_size + j];
+            }
+        }
+
+        for (i = 0; i < block_size; i++) {
+            av_dct_calc(sc->dcti, bufferh + block_size * i);
+            for (j = 0; j < block_size; j++) {
+                num[j] += bufferh[i * block_size + j] * num_weight;
+                den[j] += den_weight;
+            }
+            num += width;
+            den += width;
+        }
+    }
+}
+
+static void do_output(BM3DContext *s, uint8_t *dst, int dst_linesize,
+                      int plane, int nb_jobs)
+{
+    const int height = s->planeheight[plane];
+    const int width = s->planewidth[plane];
+    int i, j, k;
+
+    for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j++) {
+            uint8_t *dstp = dst + i * dst_linesize;
+            float sum_den = 0.f;
+            float sum_num = 0.f;
+
+            for (k = 0; k < nb_jobs; k++) {
+                SliceContext *sc = &s->slices[k];
+                float num = sc->num[i * width + j];
+                float den = sc->den[i * width + j];
+
+                sum_num += num;
+                sum_den += den;
+            }
+
+            dstp[j] = av_clip_uint8(sum_num / sum_den);
+        }
+    }
+}
+
+static void do_output16(BM3DContext *s, uint8_t *dst, int dst_linesize,
+                        int plane, int nb_jobs)
+{
+    const int height = s->planeheight[plane];
+    const int width = s->planewidth[plane];
+    const int depth = s->depth;
+    int i, j, k;
+
+    for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j++) {
+            uint16_t *dstp = (uint16_t *)dst + i * dst_linesize / 2;
+            float sum_den = 0.f;
+            float sum_num = 0.f;
+
+            for (k = 0; k < nb_jobs; k++) {
+                SliceContext *sc = &s->slices[k];
+                float num = sc->num[i * width + j];
+                float den = sc->den[i * width + j];
+
+                sum_num += num;
+                sum_den += den;
+            }
+
+            dstp[j] = av_clip_uintp2_c(sum_num / sum_den, depth);
+        }
+    }
+}
+
+static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    BM3DContext *s = ctx->priv;
+    SliceContext *sc = &s->slices[jobnr];
+    const int block_step = s->block_step;
+    ThreadData *td = arg;
+    const uint8_t *src = td->src;
+    const uint8_t *ref = td->ref;
+    const int src_linesize = td->src_linesize;
+    const int ref_linesize = td->ref_linesize;
+    const int plane = td->plane;
+    const int width = s->planewidth[plane];
+    const int height = s->planeheight[plane];
+    const int block_pos_bottom = height - s->block_size;
+    const int block_pos_right  = width - s->block_size;
+    const int slice_start = (((height + block_step - 1) / block_step) * jobnr / nb_jobs) * block_step;
+    const int slice_end = (jobnr == nb_jobs - 1) ? block_pos_bottom + block_step :
+                          (((height + block_step - 1) / block_step) * (jobnr + 1) / nb_jobs) * block_step;
+    int i, j;
+
+    memset(sc->num, 0, width * height * sizeof(FFTSample));
+    memset(sc->den, 0, width * height * sizeof(FFTSample));
+
+    for (j = slice_start; j < slice_end; j += block_step) {
+        if (j > block_pos_bottom) {
+            j = block_pos_bottom;
+        }
+
+        for (i = 0; i < block_pos_right + block_step; i += block_step) {
+            if (i > block_pos_right) {
+                i = block_pos_right;
+            }
+
+            block_matching(s, ref, ref_linesize, j, i, plane, jobnr);
+
+            s->block_filtering(s, src, src_linesize,
+                               ref, ref_linesize, j, i, plane, jobnr);
+        }
+    }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterContext *ctx, AVFrame **out, AVFrame *in, AVFrame *ref)
+{
+    BM3DContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    int p;
+
+    *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!*out)
+        return AVERROR(ENOMEM);
+    av_frame_copy_props(*out, in);
+
+    for (p = 0; p < s->nb_planes; p++) {
+        const int nb_jobs = FFMIN(s->nb_threads, s->planeheight[p] / s->block_step);
+        ThreadData td;
+
+        if (!((1 << p) & s->planes) || ctx->is_disabled) {
+            av_image_copy_plane((*out)->data[p], (*out)->linesize[p],
+                                in->data[p], in->linesize[p],
+                                s->planewidth[p], s->planeheight[p]);
+            continue;
+        }
+
+        td.src = in->data[p];
+        td.src_linesize = in->linesize[p];
+        td.ref = ref->data[p];
+        td.ref_linesize = ref->linesize[p];
+        td.plane = p;
+        ctx->internal->execute(ctx, filter_slice, &td, NULL, nb_jobs);
+
+        s->do_output(s, (*out)->data[p], (*out)->linesize[p], p, nb_jobs);
+    }
+
+    return 0;
+}
+
+#define SQR(x) ((x) * (x))
+
+static int config_input(AVFilterLink *inlink)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    AVFilterContext *ctx = inlink->dst;
+    BM3DContext *s = ctx->priv;
+    int i, group_bits;
+
+    s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), MAX_NB_THREADS);
+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
+    s->depth = desc->comp[0].depth;
+    s->max = (1 << s->depth) - 1;
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+    s->planewidth[1]  = s->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->planewidth[0]  = s->planewidth[3]  = inlink->w;
+
+    for (group_bits = 4; 1 << group_bits < s->group_size; group_bits++);
+    s->group_bits = group_bits;
+    s->pgroup_size = 1 << group_bits;
+
+    for (i = 0; i < s->nb_threads; i++) {
+        SliceContext *sc = &s->slices[i];
+
+        sc->num = av_calloc(s->planewidth[0] * s->planeheight[0], sizeof(FFTSample));
+        sc->den = av_calloc(s->planewidth[0] * s->planeheight[0], sizeof(FFTSample));
+        if (!sc->num || !sc->den)
+            return AVERROR(ENOMEM);
+
+        sc->dctf = av_dct_init(av_log2(s->block_size), DCT_II);
+        sc->dcti = av_dct_init(av_log2(s->block_size), DCT_III);
+        if (!sc->dctf || !sc->dcti)
+            return AVERROR(ENOMEM);
+
+        if (s->group_bits > 1) {
+            sc->gdctf = av_dct_init(s->group_bits, DCT_II);
+            sc->gdcti = av_dct_init(s->group_bits, DCT_III);
+            if (!sc->gdctf || !sc->gdcti)
+                return AVERROR(ENOMEM);
+        }
+
+        sc->buffer = av_calloc(s->block_size * s->block_size * s->pgroup_size, sizeof(*sc->buffer));
+        sc->bufferz = av_calloc(s->block_size * s->block_size * s->pgroup_size, sizeof(*sc->bufferz));
+        sc->bufferh = av_calloc(s->block_size * s->block_size, sizeof(*sc->bufferh));
+        sc->bufferv = av_calloc(s->block_size * s->block_size, sizeof(*sc->bufferv));
+        if (!sc->bufferh || !sc->bufferv || !sc->buffer || !sc->bufferz)
+            return AVERROR(ENOMEM);
+
+        if (s->mode == FINAL) {
+            sc->rbuffer = av_calloc(s->block_size * s->block_size * s->pgroup_size, sizeof(*sc->rbuffer));
+            sc->rbufferz = av_calloc(s->block_size * s->block_size * s->pgroup_size, sizeof(*sc->rbufferz));
+            sc->rbufferh = av_calloc(s->block_size * s->block_size, sizeof(*sc->rbufferh));
+            sc->rbufferv = av_calloc(s->block_size * s->block_size, sizeof(*sc->rbufferv));
+            if (!sc->rbufferh || !sc->rbufferv || !sc->rbuffer || !sc->rbufferz)
+                return AVERROR(ENOMEM);
+        }
+
+        sc->search_positions = av_calloc(SQR(2 * s->bm_range / s->bm_step + 1), sizeof(*sc->search_positions));
+        if (!sc->search_positions)
+            return AVERROR(ENOMEM);
+    }
+
+    s->do_output = do_output;
+    s->do_block_ssd = do_block_ssd;
+    s->get_block_row = get_block_row;
+
+    if (s->depth > 8) {
+        s->do_output = do_output16;
+        s->do_block_ssd = do_block_ssd16;
+        s->get_block_row = get_block_row16;
+    }
+
+    return 0;
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    BM3DContext *s = ctx->priv;
+
+    if (!s->ref) {
+        AVFrame *frame = NULL;
+        AVFrame *out = NULL;
+        int ret, status;
+        int64_t pts;
+
+        if ((ret = ff_inlink_consume_frame(ctx->inputs[0], &frame)) > 0) {
+            ret = filter_frame(ctx, &out, frame, frame);
+            av_frame_free(&frame);
+            if (ret < 0)
+                return ret;
+            ret = ff_filter_frame(ctx->outputs[0], out);
+        }
+        if (ret < 0) {
+            return ret;
+        } else if (ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts)) {
+            ff_outlink_set_status(ctx->outputs[0], status, pts);
+            return 0;
+        } else {
+            if (ff_outlink_frame_wanted(ctx->outputs[0]))
+                ff_inlink_request_frame(ctx->inputs[0]);
+            return 0;
+        }
+    } else {
+        return ff_framesync_activate(&s->fs);
+    }
+}
+
+static int process_frame(FFFrameSync *fs)
+{
+    AVFilterContext *ctx = fs->parent;
+    BM3DContext *s = fs->opaque;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out = NULL, *src, *ref;
+    int ret;
+
+    if ((ret = ff_framesync_get_frame(&s->fs, 0, &src, 0)) < 0 ||
+        (ret = ff_framesync_get_frame(&s->fs, 1, &ref, 0)) < 0)
+        return ret;
+
+    if ((ret = filter_frame(ctx, &out, src, ref)) < 0)
+        return ret;
+
+    out->pts = av_rescale_q(src->pts, s->fs.time_base, outlink->time_base);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    BM3DContext *s = ctx->priv;
+    AVFilterPad pad = { 0 };
+    int ret;
+
+    if (s->mode == BASIC) {
+        if (s->th_mse == 0.f)
+            s->th_mse = 400.f + s->sigma * 80.f;
+        s->block_filtering = basic_block_filtering;
+    } else if (s->mode == FINAL) {
+        if (!s->ref) {
+            av_log(ctx, AV_LOG_WARNING, "Reference stream is mandatory in final estimation mode.\n");
+            s->ref = 1;
+        }
+        if (s->th_mse == 0.f)
+            s->th_mse = 200.f + s->sigma * 10.f;
+
+        s->block_filtering = final_block_filtering;
+    } else {
+        return AVERROR_BUG;
+    }
+
+    s->block_size = 1 << s->block_size;
+
+    if (s->block_step > s->block_size) {
+        av_log(ctx, AV_LOG_WARNING, "bstep: %d can't be bigger than block size. Changing to %d.\n",
+               s->block_step, s->block_size);
+        s->block_step = s->block_size;
+    }
+    if (s->bm_step > s->bm_range) {
+        av_log(ctx, AV_LOG_WARNING, "mstep: %d can't be bigger than block matching range. Changing to %d.\n",
+               s->bm_step, s->bm_range);
+        s->bm_step = s->bm_range;
+    }
+
+    pad.type         = AVMEDIA_TYPE_VIDEO;
+    pad.name         = av_strdup("source");
+    pad.config_props = config_input;
+    if (!pad.name)
+        return AVERROR(ENOMEM);
+
+    if ((ret = ff_insert_inpad(ctx, 0, &pad)) < 0) {
+        av_freep(&pad.name);
+        return ret;
+    }
+
+    if (s->ref) {
+        pad.type         = AVMEDIA_TYPE_VIDEO;
+        pad.name         = av_strdup("reference");
+        pad.config_props = NULL;
+        if (!pad.name)
+            return AVERROR(ENOMEM);
+
+        if ((ret = ff_insert_inpad(ctx, 1, &pad)) < 0) {
+            av_freep(&pad.name);
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    BM3DContext *s = ctx->priv;
+    AVFilterLink *src = ctx->inputs[0];
+    AVFilterLink *ref;
+    FFFrameSyncIn *in;
+    int ret;
+
+    if (s->ref) {
+        ref = ctx->inputs[1];
+
+        if (src->format != ref->format) {
+            av_log(ctx, AV_LOG_ERROR, "inputs must be of same pixel format\n");
+            return AVERROR(EINVAL);
+        }
+        if (src->w                       != ref->w ||
+            src->h                       != ref->h) {
+            av_log(ctx, AV_LOG_ERROR, "First input link %s parameters "
+                   "(size %dx%d) do not match the corresponding "
+                   "second input link %s parameters (%dx%d) ",
+                   ctx->input_pads[0].name, src->w, src->h,
+                   ctx->input_pads[1].name, ref->w, ref->h);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    outlink->w = src->w;
+    outlink->h = src->h;
+    outlink->time_base = src->time_base;
+    outlink->sample_aspect_ratio = src->sample_aspect_ratio;
+    outlink->frame_rate = src->frame_rate;
+
+    if (!s->ref)
+        return 0;
+
+    if ((ret = ff_framesync_init(&s->fs, ctx, 2)) < 0)
+        return ret;
+
+    in = s->fs.in;
+    in[0].time_base = src->time_base;
+    in[1].time_base = ref->time_base;
+    in[0].sync   = 1;
+    in[0].before = EXT_STOP;
+    in[0].after  = EXT_STOP;
+    in[1].sync   = 1;
+    in[1].before = EXT_STOP;
+    in[1].after  = EXT_STOP;
+    s->fs.opaque   = s;
+    s->fs.on_event = process_frame;
+
+    return ff_framesync_configure(&s->fs);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    BM3DContext *s = ctx->priv;
+    int i;
+
+    for (i = 0; i < ctx->nb_inputs; i++)
+        av_freep(&ctx->input_pads[i].name);
+
+    if (s->ref)
+        ff_framesync_uninit(&s->fs);
+
+    for (i = 0; i < s->nb_threads; i++) {
+        SliceContext *sc = &s->slices[i];
+
+        av_freep(&sc->num);
+        av_freep(&sc->den);
+
+        av_dct_end(sc->gdctf);
+        av_dct_end(sc->gdcti);
+        av_dct_end(sc->dctf);
+        av_dct_end(sc->dcti);
+
+        av_freep(&sc->buffer);
+        av_freep(&sc->bufferh);
+        av_freep(&sc->bufferv);
+        av_freep(&sc->bufferz);
+        av_freep(&sc->rbuffer);
+        av_freep(&sc->rbufferh);
+        av_freep(&sc->rbufferv);
+        av_freep(&sc->rbufferz);
+
+        av_freep(&sc->search_positions);
+    }
+}
+
+static const AVFilterPad bm3d_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_bm3d = {
+    .name          = "bm3d",
+    .description   = NULL_IF_CONFIG_SMALL("Block-Matching 3D denoiser."),
+    .priv_size     = sizeof(BM3DContext),
+    .init          = init,
+    .uninit        = uninit,
+    .activate      = activate,
+    .query_formats = query_formats,
+    .inputs        = NULL,
+    .outputs       = bm3d_outputs,
+    .priv_class    = &bm3d_class,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
+                     AVFILTER_FLAG_DYNAMIC_INPUTS |
+                     AVFILTER_FLAG_SLICE_THREADS,
+};

diff --git a/libavfilter/vf_colorbalance.c b/libavfilter/vf_colorbalance.c
index f996524..fd003fd 100644
--- a/libavfilter/vf_colorbalance.c
+++ b/libavfilter/vf_colorbalance.c

@@ -31,6 +31,10 @@
 #define B 2
 #define A 3
 
+typedef struct ThreadData {
+    AVFrame *in, *out;
+} ThreadData;
+
 typedef struct Range {
     double shadows;
     double midtones;
@@ -43,10 +47,12 @@
     Range magenta_green;
     Range yellow_blue;
 
-    uint8_t lut[3][256];
+    uint16_t lut[3][65536];
 
     uint8_t rgba_map[4];
     int step;
+
+    int (*apply_lut)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
 } ColorBalanceContext;
 
 #define OFFSET(x) offsetof(ColorBalanceContext, x)
@@ -74,6 +80,14 @@
         AV_PIX_FMT_ABGR,  AV_PIX_FMT_ARGB,
         AV_PIX_FMT_0BGR,  AV_PIX_FMT_0RGB,
         AV_PIX_FMT_RGB0,  AV_PIX_FMT_BGR0,
+        AV_PIX_FMT_RGB48,  AV_PIX_FMT_BGR48,
+        AV_PIX_FMT_RGBA64, AV_PIX_FMT_BGRA64,
+        AV_PIX_FMT_GBRP,   AV_PIX_FMT_GBRAP,
+        AV_PIX_FMT_GBRP9,
+        AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12,
+        AV_PIX_FMT_GBRP14,
+        AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16,
         AV_PIX_FMT_NONE
     };
     AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
@@ -82,46 +96,218 @@
     return ff_set_common_formats(ctx, fmts_list);
 }
 
+static int apply_lut8_p(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ColorBalanceContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    const int slice_start = (out->height * jobnr) / nb_jobs;
+    const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
+    const uint8_t *srcg = in->data[0] + slice_start * in->linesize[0];
+    const uint8_t *srcb = in->data[1] + slice_start * in->linesize[1];
+    const uint8_t *srcr = in->data[2] + slice_start * in->linesize[2];
+    const uint8_t *srca = in->data[3] + slice_start * in->linesize[3];
+    uint8_t *dstg = out->data[0] + slice_start * out->linesize[0];
+    uint8_t *dstb = out->data[1] + slice_start * out->linesize[1];
+    uint8_t *dstr = out->data[2] + slice_start * out->linesize[2];
+    uint8_t *dsta = out->data[3] + slice_start * out->linesize[3];
+    int i, j;
+
+    for (i = slice_start; i < slice_end; i++) {
+        for (j = 0; j < out->width; j++) {
+            dstg[j] = s->lut[G][srcg[j]];
+            dstb[j] = s->lut[B][srcb[j]];
+            dstr[j] = s->lut[R][srcr[j]];
+            if (in != out && out->linesize[3])
+                dsta[j] = srca[j];
+        }
+
+        srcg += in->linesize[0];
+        srcb += in->linesize[1];
+        srcr += in->linesize[2];
+        srca += in->linesize[3];
+        dstg += out->linesize[0];
+        dstb += out->linesize[1];
+        dstr += out->linesize[2];
+        dsta += out->linesize[3];
+    }
+
+    return 0;
+}
+
+static int apply_lut16_p(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ColorBalanceContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    const int slice_start = (out->height * jobnr) / nb_jobs;
+    const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
+    const uint16_t *srcg = (const uint16_t *)in->data[0] + slice_start * in->linesize[0] / 2;
+    const uint16_t *srcb = (const uint16_t *)in->data[1] + slice_start * in->linesize[1] / 2;
+    const uint16_t *srcr = (const uint16_t *)in->data[2] + slice_start * in->linesize[2] / 2;
+    const uint16_t *srca = (const uint16_t *)in->data[3] + slice_start * in->linesize[3] / 2;
+    uint16_t *dstg = (uint16_t *)out->data[0] + slice_start * out->linesize[0] / 2;
+    uint16_t *dstb = (uint16_t *)out->data[1] + slice_start * out->linesize[1] / 2;
+    uint16_t *dstr = (uint16_t *)out->data[2] + slice_start * out->linesize[2] / 2;
+    uint16_t *dsta = (uint16_t *)out->data[3] + slice_start * out->linesize[3] / 2;
+    int i, j;
+
+    for (i = slice_start; i < slice_end; i++) {
+        for (j = 0; j < out->width; j++) {
+            dstg[j] = s->lut[G][srcg[j]];
+            dstb[j] = s->lut[B][srcb[j]];
+            dstr[j] = s->lut[R][srcr[j]];
+            if (in != out && out->linesize[3])
+                dsta[j] = srca[j];
+        }
+
+        srcg += in->linesize[0] / 2;
+        srcb += in->linesize[1] / 2;
+        srcr += in->linesize[2] / 2;
+        srca += in->linesize[3] / 2;
+        dstg += out->linesize[0] / 2;
+        dstb += out->linesize[1] / 2;
+        dstr += out->linesize[2] / 2;
+        dsta += out->linesize[3] / 2;
+    }
+
+    return 0;
+}
+
+static int apply_lut8(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ColorBalanceContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    AVFilterLink *outlink = ctx->outputs[0];
+    const int slice_start = (out->height * jobnr) / nb_jobs;
+    const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
+    const uint8_t *srcrow = in->data[0] + slice_start * in->linesize[0];
+    const uint8_t roffset = s->rgba_map[R];
+    const uint8_t goffset = s->rgba_map[G];
+    const uint8_t boffset = s->rgba_map[B];
+    const uint8_t aoffset = s->rgba_map[A];
+    const int step = s->step;
+    uint8_t *dstrow;
+    int i, j;
+
+    dstrow = out->data[0] + slice_start * out->linesize[0];
+    for (i = slice_start; i < slice_end; i++) {
+        const uint8_t *src = srcrow;
+        uint8_t *dst = dstrow;
+
+        for (j = 0; j < outlink->w * step; j += step) {
+            dst[j + roffset] = s->lut[R][src[j + roffset]];
+            dst[j + goffset] = s->lut[G][src[j + goffset]];
+            dst[j + boffset] = s->lut[B][src[j + boffset]];
+            if (in != out && step == 4)
+                dst[j + aoffset] = src[j + aoffset];
+        }
+
+        srcrow += in->linesize[0];
+        dstrow += out->linesize[0];
+    }
+
+    return 0;
+}
+
+static int apply_lut16(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ColorBalanceContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    AVFilterLink *outlink = ctx->outputs[0];
+    const int slice_start = (out->height * jobnr) / nb_jobs;
+    const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
+    const uint16_t *srcrow = (const uint16_t *)in->data[0] + slice_start * in->linesize[0] / 2;
+    const uint8_t roffset = s->rgba_map[R];
+    const uint8_t goffset = s->rgba_map[G];
+    const uint8_t boffset = s->rgba_map[B];
+    const uint8_t aoffset = s->rgba_map[A];
+    const int step = s->step / 2;
+    uint16_t *dstrow;
+    int i, j;
+
+    dstrow = (uint16_t *)out->data[0] + slice_start * out->linesize[0] / 2;
+    for (i = slice_start; i < slice_end; i++) {
+        const uint16_t *src = srcrow;
+        uint16_t *dst = dstrow;
+
+        for (j = 0; j < outlink->w * step; j += step) {
+            dst[j + roffset] = s->lut[R][src[j + roffset]];
+            dst[j + goffset] = s->lut[G][src[j + goffset]];
+            dst[j + boffset] = s->lut[B][src[j + boffset]];
+            if (in != out && step == 4)
+                dst[j + aoffset] = src[j + aoffset];
+        }
+
+        srcrow += in->linesize[0] / 2;
+        dstrow += out->linesize[0] / 2;
+    }
+
+    return 0;
+}
+
 static int config_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
     ColorBalanceContext *s = ctx->priv;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(outlink->format);
+    const int depth = desc->comp[0].depth;
+    const int max = 1 << depth;
+    const int planar = av_pix_fmt_count_planes(outlink->format) > 1;
     double *shadows, *midtones, *highlights, *buffer;
     int i, r, g, b;
 
-    buffer = av_malloc(256 * 3 * sizeof(*buffer));
+    if (max == 256 && planar) {
+        s->apply_lut = apply_lut8_p;
+    } else if (planar) {
+        s->apply_lut = apply_lut16_p;
+    } else if (max == 256) {
+        s->apply_lut = apply_lut8;
+    } else {
+        s->apply_lut = apply_lut16;
+    }
+
+    buffer = av_malloc(max * 3 * sizeof(*buffer));
     if (!buffer)
         return AVERROR(ENOMEM);
 
-    shadows    = buffer + 256 * 0;
-    midtones   = buffer + 256 * 1;
-    highlights = buffer + 256 * 2;
+    shadows    = buffer + max * 0;
+    midtones   = buffer + max * 1;
+    highlights = buffer + max * 2;
 
-    for (i = 0; i < 256; i++) {
-        double low = av_clipd((i - 85.0) / -64.0 + 0.5, 0, 1) * 178.5;
-        double mid = av_clipd((i - 85.0) /  64.0 + 0.5, 0, 1) *
-                     av_clipd((i + 85.0 - 255.0) / -64.0 + 0.5, 0, 1) * 178.5;
+    for (i = 0; i < max; i++) {
+        const double L = 0.333 * (max - 1);
+        const double M = 0.7 * (max - 1);
+        const double H = 1 * (max - 1);
+        double low = av_clipd((i - L) / (-max * 0.25) + 0.5, 0, 1) * M;
+        double mid = av_clipd((i - L) / ( max * 0.25) + 0.5, 0, 1) *
+                     av_clipd((i + L - H) / (-max * 0.25) + 0.5, 0, 1) * M;
 
         shadows[i] = low;
         midtones[i] = mid;
-        highlights[255 - i] = low;
+        highlights[max - i - 1] = low;
     }
 
-    for (i = 0; i < 256; i++) {
+    for (i = 0; i < max; i++) {
         r = g = b = i;
 
-        r = av_clip_uint8(r + s->cyan_red.shadows         * shadows[r]);
-        r = av_clip_uint8(r + s->cyan_red.midtones        * midtones[r]);
-        r = av_clip_uint8(r + s->cyan_red.highlights      * highlights[r]);
+        r = av_clip_uintp2_c(r + s->cyan_red.shadows         * shadows[r],    depth);
+        r = av_clip_uintp2_c(r + s->cyan_red.midtones        * midtones[r],   depth);
+        r = av_clip_uintp2_c(r + s->cyan_red.highlights      * highlights[r], depth);
 
-        g = av_clip_uint8(g + s->magenta_green.shadows    * shadows[g]);
-        g = av_clip_uint8(g + s->magenta_green.midtones   * midtones[g]);
-        g = av_clip_uint8(g + s->magenta_green.highlights * highlights[g]);
+        g = av_clip_uintp2_c(g + s->magenta_green.shadows    * shadows[g],    depth);
+        g = av_clip_uintp2_c(g + s->magenta_green.midtones   * midtones[g],   depth);
+        g = av_clip_uintp2_c(g + s->magenta_green.highlights * highlights[g], depth);
 
-        b = av_clip_uint8(b + s->yellow_blue.shadows      * shadows[b]);
-        b = av_clip_uint8(b + s->yellow_blue.midtones     * midtones[b]);
-        b = av_clip_uint8(b + s->yellow_blue.highlights   * highlights[b]);
+        b = av_clip_uintp2_c(b + s->yellow_blue.shadows      * shadows[b],    depth);
+        b = av_clip_uintp2_c(b + s->yellow_blue.midtones     * midtones[b],   depth);
+        b = av_clip_uintp2_c(b + s->yellow_blue.highlights   * highlights[b], depth);
 
         s->lut[R][i] = r;
         s->lut[G][i] = g;
@@ -141,15 +327,8 @@
     AVFilterContext *ctx = inlink->dst;
     ColorBalanceContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
-    const uint8_t roffset = s->rgba_map[R];
-    const uint8_t goffset = s->rgba_map[G];
-    const uint8_t boffset = s->rgba_map[B];
-    const uint8_t aoffset = s->rgba_map[A];
-    const int step = s->step;
-    const uint8_t *srcrow = in->data[0];
-    uint8_t *dstrow;
+    ThreadData td;
     AVFrame *out;
-    int i, j;
 
     if (av_frame_is_writable(in)) {
         out = in;
@@ -162,26 +341,13 @@
         av_frame_copy_props(out, in);
     }
 
-    dstrow = out->data[0];
-    for (i = 0; i < outlink->h; i++) {
-        const uint8_t *src = srcrow;
-        uint8_t *dst = dstrow;
-
-        for (j = 0; j < outlink->w * step; j += step) {
-            dst[j + roffset] = s->lut[R][src[j + roffset]];
-            dst[j + goffset] = s->lut[G][src[j + goffset]];
-            dst[j + boffset] = s->lut[B][src[j + boffset]];
-            if (in != out && step == 4)
-                dst[j + aoffset] = src[j + aoffset];
-        }
-
-        srcrow += in->linesize[0];
-        dstrow += out->linesize[0];
-    }
+    td.in = in;
+    td.out = out;
+    ctx->internal->execute(ctx, s->apply_lut, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
 
     if (in != out)
         av_frame_free(&in);
-    return ff_filter_frame(ctx->outputs[0], out);
+    return ff_filter_frame(outlink, out);
 }
 
 static const AVFilterPad colorbalance_inputs[] = {
@@ -210,5 +376,5 @@
     .query_formats = query_formats,
     .inputs        = colorbalance_inputs,
     .outputs       = colorbalance_outputs,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
 };

diff --git a/libavfilter/vf_colorchannelmixer.c b/libavfilter/vf_colorchannelmixer.c
index 2e068fa..3a9cd37 100644
--- a/libavfilter/vf_colorchannelmixer.c
+++ b/libavfilter/vf_colorchannelmixer.c

@@ -19,6 +19,7 @@
  */
 
 #include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
 #include "avfilter.h"
 #include "drawutils.h"
 #include "formats.h"
@@ -30,6 +31,10 @@
 #define B 2
 #define A 3
 
+typedef struct ThreadData {
+    AVFrame *in, *out;
+} ThreadData;
+
 typedef struct ColorChannelMixerContext {
     const AVClass *class;
     double rr, rg, rb, ra;
@@ -42,6 +47,8 @@
     int *buffer;
 
     uint8_t rgba_map[4];
+
+    int (*filter_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
 } ColorChannelMixerContext;
 
 #define OFFSET(x) offsetof(ColorChannelMixerContext, x)
@@ -78,6 +85,12 @@
         AV_PIX_FMT_RGB0,   AV_PIX_FMT_BGR0,
         AV_PIX_FMT_RGB48,  AV_PIX_FMT_BGR48,
         AV_PIX_FMT_RGBA64, AV_PIX_FMT_BGRA64,
+        AV_PIX_FMT_GBRP,   AV_PIX_FMT_GBRAP,
+        AV_PIX_FMT_GBRP9,
+        AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12,
+        AV_PIX_FMT_GBRP14,
+        AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16,
         AV_PIX_FMT_NONE
     };
 
@@ -87,25 +100,319 @@
     return ff_set_common_formats(ctx, fmts_list);
 }
 
+static av_always_inline int filter_slice_rgba_planar(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs,
+                                                     int have_alpha)
+{
+    ColorChannelMixerContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    const int slice_start = (out->height * jobnr) / nb_jobs;
+    const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
+    const uint8_t *srcg = in->data[0] + slice_start * in->linesize[0];
+    const uint8_t *srcb = in->data[1] + slice_start * in->linesize[1];
+    const uint8_t *srcr = in->data[2] + slice_start * in->linesize[2];
+    const uint8_t *srca = in->data[3] + slice_start * in->linesize[3];
+    uint8_t *dstg = out->data[0] + slice_start * out->linesize[0];
+    uint8_t *dstb = out->data[1] + slice_start * out->linesize[1];
+    uint8_t *dstr = out->data[2] + slice_start * out->linesize[2];
+    uint8_t *dsta = out->data[3] + slice_start * out->linesize[3];
+    int i, j;
+
+    for (i = slice_start; i < slice_end; i++) {
+        for (j = 0; j < out->width; j++) {
+            const uint8_t rin = srcr[j];
+            const uint8_t gin = srcg[j];
+            const uint8_t bin = srcb[j];
+            const uint8_t ain = srca[j];
+
+            dstr[j] = av_clip_uint8(s->lut[R][R][rin] +
+                                    s->lut[R][G][gin] +
+                                    s->lut[R][B][bin] +
+                                    (have_alpha == 1 ? s->lut[R][A][ain] : 0));
+            dstg[j] = av_clip_uint8(s->lut[G][R][rin] +
+                                    s->lut[G][G][gin] +
+                                    s->lut[G][B][bin] +
+                                    (have_alpha == 1 ? s->lut[G][A][ain] : 0));
+            dstb[j] = av_clip_uint8(s->lut[B][R][rin] +
+                                    s->lut[B][G][gin] +
+                                    s->lut[B][B][bin] +
+                                    (have_alpha == 1 ? s->lut[B][A][ain] : 0));
+            if (have_alpha == 1) {
+                dsta[j] = av_clip_uint8(s->lut[A][R][rin] +
+                                        s->lut[A][G][gin] +
+                                        s->lut[A][B][bin] +
+                                        s->lut[A][A][ain]);
+            }
+        }
+
+        srcg += in->linesize[0];
+        srcb += in->linesize[1];
+        srcr += in->linesize[2];
+        srca += in->linesize[3];
+        dstg += out->linesize[0];
+        dstb += out->linesize[1];
+        dstr += out->linesize[2];
+        dsta += out->linesize[3];
+    }
+
+    return 0;
+}
+
+static av_always_inline int filter_slice_rgba16_planar(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs,
+                                                       int have_alpha, int depth)
+{
+    ColorChannelMixerContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    const int slice_start = (out->height * jobnr) / nb_jobs;
+    const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
+    const uint16_t *srcg = (const uint16_t *)(in->data[0] + slice_start * in->linesize[0]);
+    const uint16_t *srcb = (const uint16_t *)(in->data[1] + slice_start * in->linesize[1]);
+    const uint16_t *srcr = (const uint16_t *)(in->data[2] + slice_start * in->linesize[2]);
+    const uint16_t *srca = (const uint16_t *)(in->data[3] + slice_start * in->linesize[3]);
+    uint16_t *dstg = (uint16_t *)(out->data[0] + slice_start * out->linesize[0]);
+    uint16_t *dstb = (uint16_t *)(out->data[1] + slice_start * out->linesize[1]);
+    uint16_t *dstr = (uint16_t *)(out->data[2] + slice_start * out->linesize[2]);
+    uint16_t *dsta = (uint16_t *)(out->data[3] + slice_start * out->linesize[3]);
+    int i, j;
+
+    for (i = slice_start; i < slice_end; i++) {
+        for (j = 0; j < out->width; j++) {
+            const uint16_t rin = srcr[j];
+            const uint16_t gin = srcg[j];
+            const uint16_t bin = srcb[j];
+            const uint16_t ain = srca[j];
+
+            dstr[j] = av_clip_uintp2(s->lut[R][R][rin] +
+                                     s->lut[R][G][gin] +
+                                     s->lut[R][B][bin] +
+                                     (have_alpha == 1 ? s->lut[R][A][ain] : 0), depth);
+            dstg[j] = av_clip_uintp2(s->lut[G][R][rin] +
+                                     s->lut[G][G][gin] +
+                                     s->lut[G][B][bin] +
+                                     (have_alpha == 1 ? s->lut[G][A][ain] : 0), depth);
+            dstb[j] = av_clip_uintp2(s->lut[B][R][rin] +
+                                     s->lut[B][G][gin] +
+                                     s->lut[B][B][bin] +
+                                     (have_alpha == 1 ? s->lut[B][A][ain] : 0), depth);
+            if (have_alpha == 1) {
+                dsta[j] = av_clip_uintp2(s->lut[A][R][rin] +
+                                         s->lut[A][G][gin] +
+                                         s->lut[A][B][bin] +
+                                         s->lut[A][A][ain], depth);
+            }
+        }
+
+        srcg += in->linesize[0] / 2;
+        srcb += in->linesize[1] / 2;
+        srcr += in->linesize[2] / 2;
+        srca += in->linesize[3] / 2;
+        dstg += out->linesize[0] / 2;
+        dstb += out->linesize[1] / 2;
+        dstr += out->linesize[2] / 2;
+        dsta += out->linesize[3] / 2;
+    }
+
+    return 0;
+}
+
+static int filter_slice_gbrp(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    return filter_slice_rgba_planar(ctx, arg, jobnr, nb_jobs, 0);
+}
+
+static int filter_slice_gbrap(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    return filter_slice_rgba_planar(ctx, arg, jobnr, nb_jobs, 1);
+}
+
+static int filter_slice_gbrp9(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    return filter_slice_rgba16_planar(ctx, arg, jobnr, nb_jobs, 0, 9);
+}
+
+static int filter_slice_gbrp10(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    return filter_slice_rgba16_planar(ctx, arg, jobnr, nb_jobs, 0, 10);
+}
+
+static int filter_slice_gbrap10(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    return filter_slice_rgba16_planar(ctx, arg, jobnr, nb_jobs, 1, 10);
+}
+
+static int filter_slice_gbrp12(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    return filter_slice_rgba16_planar(ctx, arg, jobnr, nb_jobs, 0, 12);
+}
+
+static int filter_slice_gbrap12(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    return filter_slice_rgba16_planar(ctx, arg, jobnr, nb_jobs, 1, 12);
+}
+
+static int filter_slice_gbrp14(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    return filter_slice_rgba16_planar(ctx, arg, jobnr, nb_jobs, 0, 14);
+}
+
+static int filter_slice_gbrp16(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    return filter_slice_rgba16_planar(ctx, arg, jobnr, nb_jobs, 0, 16);
+}
+
+static int filter_slice_gbrap16(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    return filter_slice_rgba16_planar(ctx, arg, jobnr, nb_jobs, 1, 16);
+}
+
+static av_always_inline int filter_slice_rgba_packed(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs,
+                                                     int have_alpha, int step)
+{
+    ColorChannelMixerContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    const int slice_start = (out->height * jobnr) / nb_jobs;
+    const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
+    const uint8_t roffset = s->rgba_map[R];
+    const uint8_t goffset = s->rgba_map[G];
+    const uint8_t boffset = s->rgba_map[B];
+    const uint8_t aoffset = s->rgba_map[A];
+    const uint8_t *srcrow = in->data[0] + slice_start * in->linesize[0];
+    uint8_t *dstrow = out->data[0] + slice_start * out->linesize[0];
+    int i, j;
+
+    for (i = slice_start; i < slice_end; i++) {
+        const uint8_t *src = srcrow;
+        uint8_t *dst = dstrow;
+
+        for (j = 0; j < out->width * step; j += step) {
+            const uint8_t rin = src[j + roffset];
+            const uint8_t gin = src[j + goffset];
+            const uint8_t bin = src[j + boffset];
+            const uint8_t ain = src[j + aoffset];
+
+            dst[j + roffset] = av_clip_uint8(s->lut[R][R][rin] +
+                                             s->lut[R][G][gin] +
+                                             s->lut[R][B][bin] +
+                                             (have_alpha == 1 ? s->lut[R][A][ain] : 0));
+            dst[j + goffset] = av_clip_uint8(s->lut[G][R][rin] +
+                                             s->lut[G][G][gin] +
+                                             s->lut[G][B][bin] +
+                                             (have_alpha == 1 ? s->lut[G][A][ain] : 0));
+            dst[j + boffset] = av_clip_uint8(s->lut[B][R][rin] +
+                                             s->lut[B][G][gin] +
+                                             s->lut[B][B][bin] +
+                                             (have_alpha == 1 ? s->lut[B][A][ain] : 0));
+            if (have_alpha == 1) {
+                dst[j + aoffset] = av_clip_uint8(s->lut[A][R][rin] +
+                                                 s->lut[A][G][gin] +
+                                                 s->lut[A][B][bin] +
+                                                 s->lut[A][A][ain]);
+            } else if (have_alpha == -1 && in != out)
+                dst[j + aoffset] = 0;
+        }
+
+        srcrow += in->linesize[0];
+        dstrow += out->linesize[0];
+    }
+
+    return 0;
+}
+
+static av_always_inline int filter_slice_rgba16_packed(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs,
+                                                       int have_alpha, int step)
+{
+    ColorChannelMixerContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    const int slice_start = (out->height * jobnr) / nb_jobs;
+    const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
+    const uint8_t roffset = s->rgba_map[R];
+    const uint8_t goffset = s->rgba_map[G];
+    const uint8_t boffset = s->rgba_map[B];
+    const uint8_t aoffset = s->rgba_map[A];
+    const uint8_t *srcrow = in->data[0] + slice_start * in->linesize[0];
+    uint8_t *dstrow = out->data[0] + slice_start * out->linesize[0];
+    int i, j;
+
+    for (i = slice_start; i < slice_end; i++) {
+        const uint16_t *src = (const uint16_t *)srcrow;
+        uint16_t *dst = (uint16_t *)dstrow;
+
+        for (j = 0; j < out->width * step; j += step) {
+            const uint16_t rin = src[j + roffset];
+            const uint16_t gin = src[j + goffset];
+            const uint16_t bin = src[j + boffset];
+            const uint16_t ain = src[j + aoffset];
+
+            dst[j + roffset] = av_clip_uint16(s->lut[R][R][rin] +
+                                              s->lut[R][G][gin] +
+                                              s->lut[R][B][bin] +
+                                              (have_alpha == 1 ? s->lut[R][A][ain] : 0));
+            dst[j + goffset] = av_clip_uint16(s->lut[G][R][rin] +
+                                              s->lut[G][G][gin] +
+                                              s->lut[G][B][bin] +
+                                              (have_alpha == 1 ? s->lut[G][A][ain] : 0));
+            dst[j + boffset] = av_clip_uint16(s->lut[B][R][rin] +
+                                              s->lut[B][G][gin] +
+                                              s->lut[B][B][bin] +
+                                              (have_alpha == 1 ? s->lut[B][A][ain] : 0));
+            if (have_alpha == 1) {
+                dst[j + aoffset] = av_clip_uint16(s->lut[A][R][rin] +
+                                                  s->lut[A][G][gin] +
+                                                  s->lut[A][B][bin] +
+                                                  s->lut[A][A][ain]);
+            }
+        }
+
+        srcrow += in->linesize[0];
+        dstrow += out->linesize[0];
+    }
+
+    return 0;
+}
+
+static int filter_slice_rgba64(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    return filter_slice_rgba16_packed(ctx, arg, jobnr, nb_jobs, 1, 4);
+}
+
+static int filter_slice_rgb48(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    return filter_slice_rgba16_packed(ctx, arg, jobnr, nb_jobs, 0, 3);
+}
+
+static int filter_slice_rgba(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    return filter_slice_rgba_packed(ctx, arg, jobnr, nb_jobs, 1, 4);
+}
+
+static int filter_slice_rgb24(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    return filter_slice_rgba_packed(ctx, arg, jobnr, nb_jobs, 0, 3);
+}
+
+static int filter_slice_rgb0(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    return filter_slice_rgba_packed(ctx, arg, jobnr, nb_jobs, -1, 4);
+}
+
 static int config_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
     ColorChannelMixerContext *s = ctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(outlink->format);
+    const int depth = desc->comp[0].depth;
     int i, j, size, *buffer;
 
     ff_fill_rgba_map(s->rgba_map, outlink->format);
 
-    switch (outlink->format) {
-    case AV_PIX_FMT_RGB48:
-    case AV_PIX_FMT_BGR48:
-    case AV_PIX_FMT_RGBA64:
-    case AV_PIX_FMT_BGRA64:
-        size = 65536;
-        break;
-    default:
-        size = 256;
-    }
-
+    size = 1 << depth;
     s->buffer = buffer = av_malloc(16 * size * sizeof(*s->buffer));
     if (!s->buffer)
         return AVERROR(ENOMEM);
@@ -136,6 +443,63 @@
         s->lut[A][A][i] = lrint(i * s->aa);
     }
 
+    switch (outlink->format) {
+    case AV_PIX_FMT_BGR24:
+    case AV_PIX_FMT_RGB24:
+        s->filter_slice = filter_slice_rgb24;
+        break;
+    case AV_PIX_FMT_0BGR:
+    case AV_PIX_FMT_0RGB:
+    case AV_PIX_FMT_BGR0:
+    case AV_PIX_FMT_RGB0:
+        s->filter_slice = filter_slice_rgb0;
+        break;
+    case AV_PIX_FMT_ABGR:
+    case AV_PIX_FMT_ARGB:
+    case AV_PIX_FMT_BGRA:
+    case AV_PIX_FMT_RGBA:
+        s->filter_slice = filter_slice_rgba;
+        break;
+    case AV_PIX_FMT_BGR48:
+    case AV_PIX_FMT_RGB48:
+        s->filter_slice = filter_slice_rgb48;
+        break;
+    case AV_PIX_FMT_BGRA64:
+    case AV_PIX_FMT_RGBA64:
+        s->filter_slice = filter_slice_rgba64;
+        break;
+    case AV_PIX_FMT_GBRP:
+        s->filter_slice = filter_slice_gbrp;
+        break;
+    case AV_PIX_FMT_GBRAP:
+        s->filter_slice = filter_slice_gbrap;
+        break;
+    case AV_PIX_FMT_GBRP9:
+        s->filter_slice = filter_slice_gbrp9;
+        break;
+    case AV_PIX_FMT_GBRP10:
+        s->filter_slice = filter_slice_gbrp10;
+        break;
+    case AV_PIX_FMT_GBRAP10:
+        s->filter_slice = filter_slice_gbrap10;
+        break;
+    case AV_PIX_FMT_GBRP12:
+        s->filter_slice = filter_slice_gbrp12;
+        break;
+    case AV_PIX_FMT_GBRAP12:
+        s->filter_slice = filter_slice_gbrap12;
+        break;
+    case AV_PIX_FMT_GBRP14:
+        s->filter_slice = filter_slice_gbrp14;
+        break;
+    case AV_PIX_FMT_GBRP16:
+        s->filter_slice = filter_slice_gbrp16;
+        break;
+    case AV_PIX_FMT_GBRAP16:
+        s->filter_slice = filter_slice_gbrap16;
+        break;
+    }
+
     return 0;
 }
 
@@ -144,14 +508,8 @@
     AVFilterContext *ctx = inlink->dst;
     ColorChannelMixerContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
-    const uint8_t roffset = s->rgba_map[R];
-    const uint8_t goffset = s->rgba_map[G];
-    const uint8_t boffset = s->rgba_map[B];
-    const uint8_t aoffset = s->rgba_map[A];
-    const uint8_t *srcrow = in->data[0];
-    uint8_t *dstrow;
+    ThreadData td;
     AVFrame *out;
-    int i, j;
 
     if (av_frame_is_writable(in)) {
         out = in;
@@ -164,164 +522,13 @@
         av_frame_copy_props(out, in);
     }
 
-    dstrow = out->data[0];
-    switch (outlink->format) {
-    case AV_PIX_FMT_BGR24:
-    case AV_PIX_FMT_RGB24:
-        for (i = 0; i < outlink->h; i++) {
-            const uint8_t *src = srcrow;
-            uint8_t *dst = dstrow;
-
-            for (j = 0; j < outlink->w * 3; j += 3) {
-                const uint8_t rin = src[j + roffset];
-                const uint8_t gin = src[j + goffset];
-                const uint8_t bin = src[j + boffset];
-
-                dst[j + roffset] = av_clip_uint8(s->lut[R][R][rin] +
-                                                 s->lut[R][G][gin] +
-                                                 s->lut[R][B][bin]);
-                dst[j + goffset] = av_clip_uint8(s->lut[G][R][rin] +
-                                                 s->lut[G][G][gin] +
-                                                 s->lut[G][B][bin]);
-                dst[j + boffset] = av_clip_uint8(s->lut[B][R][rin] +
-                                                 s->lut[B][G][gin] +
-                                                 s->lut[B][B][bin]);
-            }
-
-            srcrow += in->linesize[0];
-            dstrow += out->linesize[0];
-        }
-        break;
-    case AV_PIX_FMT_0BGR:
-    case AV_PIX_FMT_0RGB:
-    case AV_PIX_FMT_BGR0:
-    case AV_PIX_FMT_RGB0:
-        for (i = 0; i < outlink->h; i++) {
-            const uint8_t *src = srcrow;
-            uint8_t *dst = dstrow;
-
-            for (j = 0; j < outlink->w * 4; j += 4) {
-                const uint8_t rin = src[j + roffset];
-                const uint8_t gin = src[j + goffset];
-                const uint8_t bin = src[j + boffset];
-
-                dst[j + roffset] = av_clip_uint8(s->lut[R][R][rin] +
-                                                 s->lut[R][G][gin] +
-                                                 s->lut[R][B][bin]);
-                dst[j + goffset] = av_clip_uint8(s->lut[G][R][rin] +
-                                                 s->lut[G][G][gin] +
-                                                 s->lut[G][B][bin]);
-                dst[j + boffset] = av_clip_uint8(s->lut[B][R][rin] +
-                                                 s->lut[B][G][gin] +
-                                                 s->lut[B][B][bin]);
-                if (in != out)
-                    dst[j + aoffset] = 0;
-            }
-
-            srcrow += in->linesize[0];
-            dstrow += out->linesize[0];
-        }
-        break;
-    case AV_PIX_FMT_ABGR:
-    case AV_PIX_FMT_ARGB:
-    case AV_PIX_FMT_BGRA:
-    case AV_PIX_FMT_RGBA:
-        for (i = 0; i < outlink->h; i++) {
-            const uint8_t *src = srcrow;
-            uint8_t *dst = dstrow;
-
-            for (j = 0; j < outlink->w * 4; j += 4) {
-                const uint8_t rin = src[j + roffset];
-                const uint8_t gin = src[j + goffset];
-                const uint8_t bin = src[j + boffset];
-                const uint8_t ain = src[j + aoffset];
-
-                dst[j + roffset] = av_clip_uint8(s->lut[R][R][rin] +
-                                                 s->lut[R][G][gin] +
-                                                 s->lut[R][B][bin] +
-                                                 s->lut[R][A][ain]);
-                dst[j + goffset] = av_clip_uint8(s->lut[G][R][rin] +
-                                                 s->lut[G][G][gin] +
-                                                 s->lut[G][B][bin] +
-                                                 s->lut[G][A][ain]);
-                dst[j + boffset] = av_clip_uint8(s->lut[B][R][rin] +
-                                                 s->lut[B][G][gin] +
-                                                 s->lut[B][B][bin] +
-                                                 s->lut[B][A][ain]);
-                dst[j + aoffset] = av_clip_uint8(s->lut[A][R][rin] +
-                                                 s->lut[A][G][gin] +
-                                                 s->lut[A][B][bin] +
-                                                 s->lut[A][A][ain]);
-            }
-
-            srcrow += in->linesize[0];
-            dstrow += out->linesize[0];
-        }
-        break;
-    case AV_PIX_FMT_BGR48:
-    case AV_PIX_FMT_RGB48:
-        for (i = 0; i < outlink->h; i++) {
-            const uint16_t *src = (const uint16_t *)srcrow;
-            uint16_t *dst = (uint16_t *)dstrow;
-
-            for (j = 0; j < outlink->w * 3; j += 3) {
-                const uint16_t rin = src[j + roffset];
-                const uint16_t gin = src[j + goffset];
-                const uint16_t bin = src[j + boffset];
-
-                dst[j + roffset] = av_clip_uint16(s->lut[R][R][rin] +
-                                                  s->lut[R][G][gin] +
-                                                  s->lut[R][B][bin]);
-                dst[j + goffset] = av_clip_uint16(s->lut[G][R][rin] +
-                                                  s->lut[G][G][gin] +
-                                                  s->lut[G][B][bin]);
-                dst[j + boffset] = av_clip_uint16(s->lut[B][R][rin] +
-                                                  s->lut[B][G][gin] +
-                                                  s->lut[B][B][bin]);
-            }
-
-            srcrow += in->linesize[0];
-            dstrow += out->linesize[0];
-        }
-        break;
-    case AV_PIX_FMT_BGRA64:
-    case AV_PIX_FMT_RGBA64:
-        for (i = 0; i < outlink->h; i++) {
-            const uint16_t *src = (const uint16_t *)srcrow;
-            uint16_t *dst = (uint16_t *)dstrow;
-
-            for (j = 0; j < outlink->w * 4; j += 4) {
-                const uint16_t rin = src[j + roffset];
-                const uint16_t gin = src[j + goffset];
-                const uint16_t bin = src[j + boffset];
-                const uint16_t ain = src[j + aoffset];
-
-                dst[j + roffset] = av_clip_uint16(s->lut[R][R][rin] +
-                                                  s->lut[R][G][gin] +
-                                                  s->lut[R][B][bin] +
-                                                  s->lut[R][A][ain]);
-                dst[j + goffset] = av_clip_uint16(s->lut[G][R][rin] +
-                                                  s->lut[G][G][gin] +
-                                                  s->lut[G][B][bin] +
-                                                  s->lut[G][A][ain]);
-                dst[j + boffset] = av_clip_uint16(s->lut[B][R][rin] +
-                                                  s->lut[B][G][gin] +
-                                                  s->lut[B][B][bin] +
-                                                  s->lut[B][A][ain]);
-                dst[j + aoffset] = av_clip_uint16(s->lut[A][R][rin] +
-                                                  s->lut[A][G][gin] +
-                                                  s->lut[A][B][bin] +
-                                                  s->lut[A][A][ain]);
-            }
-
-            srcrow += in->linesize[0];
-            dstrow += out->linesize[0];
-        }
-    }
+    td.in = in;
+    td.out = out;
+    ctx->internal->execute(ctx, s->filter_slice, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
 
     if (in != out)
         av_frame_free(&in);
-    return ff_filter_frame(ctx->outputs[0], out);
+    return ff_filter_frame(outlink, out);
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
@@ -358,5 +565,5 @@
     .query_formats = query_formats,
     .inputs        = colorchannelmixer_inputs,
     .outputs       = colorchannelmixer_outputs,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
 };

diff --git a/libavfilter/vf_colorconstancy.c b/libavfilter/vf_colorconstancy.c
new file mode 100644
index 0000000..e3bb39e
--- /dev/null
+++ b/libavfilter/vf_colorconstancy.c

@@ -0,0 +1,758 @@
+/*
+ * Copyright (c) 2018 Mina Sami
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Color Constancy filter
+ *
+ * @see http://colorconstancy.com/
+ *
+ * @cite
+ * J. van de Weijer, Th. Gevers, A. Gijsenij "Edge-Based Color Constancy".
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+#include <math.h>
+
+#define GREY_EDGE "greyedge"
+
+#define SQRT3 1.73205080757
+
+#define NUM_PLANES    3
+#define MAX_DIFF_ORD  2
+#define MAX_META_DATA 4
+#define MAX_DATA      4
+
+#define INDEX_TEMP 0
+#define INDEX_DX   1
+#define INDEX_DY   2
+#define INDEX_DXY  3
+#define INDEX_NORM INDEX_DX
+#define INDEX_SRC  0
+#define INDEX_DST  1
+#define INDEX_ORD  2
+#define INDEX_DIR  3
+#define DIR_X 0
+#define DIR_Y 1
+
+/**
+ * Used for passing data between threads.
+ */
+typedef struct ThreadData {
+    AVFrame *in, *out;
+    int meta_data[MAX_META_DATA];
+    double  *data[MAX_DATA][NUM_PLANES];
+} ThreadData;
+
+/**
+ * Common struct for all algorithms contexts.
+ */
+typedef struct ColorConstancyContext {
+    const AVClass *class;
+
+    int difford;
+    int minknorm; /**< @minknorm = 0 : getMax instead */
+    double sigma;
+
+    int nb_threads;
+    int planeheight[4];
+    int planewidth[4];
+
+    int filtersize;
+    double *gauss[MAX_DIFF_ORD+1];
+
+    double white[NUM_PLANES];
+} ColorConstancyContext;
+
+#define OFFSET(x) offsetof(ColorConstancyContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+#define GINDX(s, i) ( (i) - ((s) >> 2) )
+
+/**
+ * Sets gauss filters used for calculating gauss derivatives. Filter size
+ * depends on sigma which is a user option hence we calculate these
+ * filters each time. Also each higher order depends on lower ones. Sigma
+ * can be zero only at difford = 0, then we only convert data to double
+ * instead.
+ *
+ * @param ctx the filter context.
+ *
+ * @return 0 in case of success, a negative value corresponding to an
+ * AVERROR code in case of failure.
+ */
+static int set_gauss(AVFilterContext *ctx)
+{
+    ColorConstancyContext *s = ctx->priv;
+    int filtersize = s->filtersize;
+    int difford    = s->difford;
+    double sigma   = s->sigma;
+    double sum1, sum2;
+    int i;
+
+    for (i = 0; i <= difford; ++i) {
+        s->gauss[i] = av_mallocz_array(filtersize, sizeof(*s->gauss[i]));
+        if (!s->gauss[i]) {
+            for (; i >= 0; --i) {
+                av_freep(&s->gauss[i]);
+            }
+            av_log(ctx, AV_LOG_ERROR, "Out of memory while allocating gauss buffers.\n");
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    // Order 0
+    av_log(ctx, AV_LOG_TRACE, "Setting 0-d gauss with filtersize = %d.\n", filtersize);
+    sum1 = 0.0;
+    if (!sigma) {
+        s->gauss[0][0] = 1; // Copying data to double instead of convolution
+    } else {
+        for (i = 0; i < filtersize; ++i) {
+            s->gauss[0][i] = exp(- pow(GINDX(filtersize, i), 2.) / (2 * sigma * sigma)) / ( sqrt(2 * M_PI) * sigma );
+            sum1 += s->gauss[0][i];
+        }
+        for (i = 0; i < filtersize; ++i) {
+            s->gauss[0][i] /= sum1;
+        }
+    }
+    // Order 1
+    if (difford > 0) {
+        av_log(ctx, AV_LOG_TRACE, "Setting 1-d gauss with filtersize = %d.\n", filtersize);
+        sum1 = 0.0;
+        for (i = 0; i < filtersize; ++i) {
+            s->gauss[1][i] = - (GINDX(filtersize, i) / pow(sigma, 2)) * s->gauss[0][i];
+            sum1 += s->gauss[1][i] * GINDX(filtersize, i);
+        }
+
+        for (i = 0; i < filtersize; ++i) {
+            s->gauss[1][i] /= sum1;
+        }
+
+        // Order 2
+        if (difford > 1) {
+            av_log(ctx, AV_LOG_TRACE, "Setting 2-d gauss with filtersize = %d.\n", filtersize);
+            sum1 = 0.0;
+            for (i = 0; i < filtersize; ++i) {
+                s->gauss[2][i] = ( pow(GINDX(filtersize, i), 2) / pow(sigma, 4) - 1/pow(sigma, 2) )
+                                 * s->gauss[0][i];
+                sum1 += s->gauss[2][i];
+            }
+
+            sum2 = 0.0;
+            for (i = 0; i < filtersize; ++i) {
+                s->gauss[2][i] -= sum1 / (filtersize);
+                sum2 += (0.5 * GINDX(filtersize, i) * GINDX(filtersize, i) * s->gauss[2][i]);
+            }
+            for (i = 0; i < filtersize ; ++i) {
+                s->gauss[2][i] /= sum2;
+            }
+        }
+    }
+    return 0;
+}
+
+/**
+ * Frees up buffers used by grey edge for storing derivatives final
+ * and intermidiate results. Number of buffers and number of planes
+ * for last buffer are given so it can be safely called at allocation
+ * failure instances.
+ *
+ * @param td holds the buffers.
+ * @param nb_buff number of buffers to be freed.
+ * @param nb_planes number of planes for last buffer to be freed.
+ */
+static void cleanup_derivative_buffers(ThreadData *td, int nb_buff, int nb_planes)
+{
+    int b, p;
+
+    for (b = 0; b < nb_buff; ++b) {
+        for (p = 0; p < NUM_PLANES; ++p) {
+            av_freep(&td->data[b][p]);
+        }
+    }
+    // Final buffer may not be fully allocated at fail cases
+    for (p = 0; p < nb_planes; ++p) {
+        av_freep(&td->data[b][p]);
+    }
+}
+
+/**
+ * Allocates buffers used by grey edge for storing derivatives final
+ * and intermidiate results.
+ *
+ * @param ctx the filter context.
+ * @param td holds the buffers.
+ *
+ * @return 0 in case of success, a negative value corresponding to an
+ * AVERROR code in case of failure.
+ */
+static int setup_derivative_buffers(AVFilterContext* ctx, ThreadData *td)
+{
+    ColorConstancyContext *s = ctx->priv;
+    int nb_buff = s->difford + 1;
+    int b, p;
+
+    av_log(ctx, AV_LOG_TRACE, "Allocating %d buffer(s) for grey edge.\n", nb_buff);
+    for (b = 0; b <= nb_buff; ++b) { // We need difford + 1 buffers
+        for (p = 0; p < NUM_PLANES; ++p) {
+            td->data[b][p] = av_mallocz_array(s->planeheight[p] * s->planewidth[p], sizeof(*td->data[b][p]));
+            if (!td->data[b][p]) {
+                cleanup_derivative_buffers(td, b + 1, p);
+                av_log(ctx, AV_LOG_ERROR, "Out of memory while allocating derivatives buffers.\n");
+                return AVERROR(ENOMEM);
+            }
+        }
+    }
+    return 0;
+}
+
+#define CLAMP(x, mx) av_clip((x), 0, (mx-1))
+#define INDX2D(r, c, w) ( (r) * (w) + (c) )
+#define GAUSS(s, sr, sc, sls, sh, sw, g) ( (s)[ INDX2D(CLAMP((sr), (sh)), CLAMP((sc), (sw)), (sls)) ] * (g) )
+
+/**
+ * Slice calculation of gaussian derivatives. Applies 1-D gaussian derivative filter
+ * either horizontally or vertically according to meta data given in thread data.
+ * When convoluting horizontally source is always the in frame withing thread data
+ * while when convoluting vertically source is a buffer.
+ *
+ * @param ctx the filter context.
+ * @param arg data to be passed between threads.
+ * @param jobnr current job nubmer.
+ * @param nb_jobs total number of jobs.
+ *
+ * @return 0.
+ */
+static int slice_get_derivative(AVFilterContext* ctx, void* arg, int jobnr, int nb_jobs)
+{
+    ColorConstancyContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    const int ord = td->meta_data[INDEX_ORD];
+    const int dir = td->meta_data[INDEX_DIR];
+    const int src_index  = td->meta_data[INDEX_SRC];
+    const int dst_index  = td->meta_data[INDEX_DST];
+    const int filtersize = s->filtersize;
+    const double *gauss  = s->gauss[ord];
+    int plane;
+
+    for (plane = 0; plane < NUM_PLANES; ++plane) {
+        const int height      = s->planeheight[plane];
+        const int width       = s->planewidth[plane];
+        const int in_linesize = in->linesize[plane];
+        double *dst = td->data[dst_index][plane];
+        int slice_start, slice_end;
+        int r, c, g;
+
+        if (dir == DIR_X) {
+            /** Applying gauss horizontally along each row */
+            const uint8_t *src = in->data[plane];
+            slice_start = (height * jobnr      ) / nb_jobs;
+            slice_end   = (height * (jobnr + 1)) / nb_jobs;
+
+            for (r = slice_start; r < slice_end; ++r) {
+                for (c = 0; c < width; ++c) {
+                    dst[INDX2D(r, c, width)] = 0;
+                    for (g = 0; g < filtersize; ++g) {
+                        dst[INDX2D(r, c, width)] += GAUSS(src, r,                        c + GINDX(filtersize, g),
+                                                          in_linesize, height, width, gauss[GINDX(filtersize, g)]);
+                    }
+                }
+            }
+        } else {
+            /** Applying gauss vertically along each column */
+            const double *src = td->data[src_index][plane];
+            slice_start = (width * jobnr      ) / nb_jobs;
+            slice_end   = (width * (jobnr + 1)) / nb_jobs;
+
+            for (c = slice_start; c < slice_end; ++c) {
+                for (r = 0; r < height; ++r) {
+                    dst[INDX2D(r, c, width)] = 0;
+                    for (g = 0; g < filtersize; ++g) {
+                        dst[INDX2D(r, c, width)] += GAUSS(src, r + GINDX(filtersize, g), c,
+                                                          width, height, width, gauss[GINDX(filtersize, g)]);
+                    }
+                }
+            }
+        }
+
+    }
+    return 0;
+}
+
+/**
+ * Slice Frobius normalization of gaussian derivatives. Only called for difford values of
+ * 1 or 2.
+ *
+ * @param ctx the filter context.
+ * @param arg data to be passed between threads.
+ * @param jobnr current job nubmer.
+ * @param nb_jobs total number of jobs.
+ *
+ * @return 0.
+ */
+static int slice_normalize(AVFilterContext* ctx, void* arg, int jobnr, int nb_jobs)
+{
+    ColorConstancyContext *s = ctx->priv;
+    ThreadData *td = arg;
+    const int difford = s->difford;
+    int plane;
+
+    for (plane = 0; plane < NUM_PLANES; ++plane) {
+        const int height = s->planeheight[plane];
+        const int width  = s->planewidth[plane];
+        const int64_t numpixels = width * (int64_t)height;
+        const int slice_start   = (numpixels * jobnr    ) / nb_jobs;
+        const int slice_end     = (numpixels * (jobnr+1)) / nb_jobs;
+        const double *dx = td->data[INDEX_DX][plane];
+        const double *dy = td->data[INDEX_DY][plane];
+        double *norm = td->data[INDEX_NORM][plane];
+        int i;
+
+        if (difford == 1) {
+            for (i = slice_start; i < slice_end; ++i) {
+                norm[i] = sqrt( pow(dx[i], 2) + pow(dy[i], 2));
+            }
+        } else {
+            const double *dxy = td->data[INDEX_DXY][plane];
+            for (i = slice_start; i < slice_end; ++i) {
+                norm[i] = sqrt( pow(dx[i], 2) + 4 * pow(dxy[i], 2) + pow(dy[i], 2) );
+            }
+        }
+    }
+
+    return 0;
+}
+
+/**
+ * Utility function for setting up differentiation data/metadata.
+ *
+ * @param ctx the filter context.
+ * @param td to be used for passing data between threads.
+ * @param ord ord of differentiation.
+ * @param dir direction of differentiation.
+ * @param src index of source used for differentiation.
+ * @param dst index destination used for saving differentiation result.
+ * @param dim maximum dimension in current direction.
+ * @param nb_threads number of threads to use.
+ */
+static void av_always_inline
+get_deriv(AVFilterContext *ctx, ThreadData *td, int ord, int dir,
+          int src, int dst, int dim, int nb_threads) {
+    td->meta_data[INDEX_ORD] = ord;
+    td->meta_data[INDEX_DIR] = dir;
+    td->meta_data[INDEX_SRC] = src;
+    td->meta_data[INDEX_DST] = dst;
+    ctx->internal->execute(ctx, slice_get_derivative, td, NULL, FFMIN(dim, nb_threads));
+}
+
+/**
+ * Main control function for calculating gaussian derivatives.
+ *
+ * @param ctx the filter context.
+ * @param td holds the buffers used for storing results.
+ *
+ * @return 0 in case of success, a negative value corresponding to an
+ * AVERROR code in case of failure.
+ */
+static int get_derivative(AVFilterContext *ctx, ThreadData *td)
+{
+    ColorConstancyContext *s = ctx->priv;
+    int nb_threads = s->nb_threads;
+    int height = s->planeheight[1];
+    int width  = s->planewidth[1];
+
+    switch(s->difford) {
+    case 0:
+        if (!s->sigma) { // Only copy once
+            get_deriv(ctx, td, 0, DIR_X, 0         , INDEX_NORM, height, nb_threads);
+        } else {
+            get_deriv(ctx, td, 0, DIR_X, 0,          INDEX_TEMP, height, nb_threads);
+            get_deriv(ctx, td, 0, DIR_Y, INDEX_TEMP, INDEX_NORM, width , nb_threads);
+            // save to INDEX_NORM because this will not be normalied and
+            // end gry edge filter expects result to be found in INDEX_NORM
+        }
+        return 0;
+
+    case 1:
+        get_deriv(ctx, td, 1, DIR_X, 0,          INDEX_TEMP, height, nb_threads);
+        get_deriv(ctx, td, 0, DIR_Y, INDEX_TEMP, INDEX_DX,   width , nb_threads);
+
+        get_deriv(ctx, td, 0, DIR_X, 0,          INDEX_TEMP, height, nb_threads);
+        get_deriv(ctx, td, 1, DIR_Y, INDEX_TEMP, INDEX_DY,   width , nb_threads);
+        return 0;
+
+    case 2:
+        get_deriv(ctx, td, 2, DIR_X, 0,          INDEX_TEMP, height, nb_threads);
+        get_deriv(ctx, td, 0, DIR_Y, INDEX_TEMP, INDEX_DX,   width , nb_threads);
+
+        get_deriv(ctx, td, 0, DIR_X, 0,          INDEX_TEMP, height, nb_threads);
+        get_deriv(ctx, td, 2, DIR_Y, INDEX_TEMP, INDEX_DY,   width , nb_threads);
+
+        get_deriv(ctx, td, 1, DIR_X, 0,          INDEX_TEMP, height, nb_threads);
+        get_deriv(ctx, td, 1, DIR_Y, INDEX_TEMP, INDEX_DXY,  width , nb_threads);
+        return 0;
+
+    default:
+        av_log(ctx, AV_LOG_ERROR, "Unsupported difford value: %d.\n", s->difford);
+        return AVERROR(EINVAL);
+    }
+
+}
+
+/**
+ * Slice function for grey edge algorithm that does partial summing/maximizing
+ * of gaussian derivatives.
+ *
+ * @param ctx the filter context.
+ * @param arg data to be passed between threads.
+ * @param jobnr current job nubmer.
+ * @param nb_jobs total number of jobs.
+ *
+ * @return 0.
+ */
+static int filter_slice_grey_edge(AVFilterContext* ctx, void* arg, int jobnr, int nb_jobs)
+{
+    ColorConstancyContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in    = td->in;
+    int minknorm   = s->minknorm;
+    const uint8_t thresh = 255;
+    int plane;
+
+    for (plane = 0; plane < NUM_PLANES; ++plane) {
+        const int height        = s->planeheight[plane];
+        const int width         = s->planewidth[plane];
+        const int in_linesize   = in->linesize[plane];
+        const int slice_start   = (height * jobnr) / nb_jobs;
+        const int slice_end     = (height * (jobnr+1)) / nb_jobs;
+        const uint8_t *img_data = in->data[plane];
+        const double *src       = td->data[INDEX_NORM][plane];
+        double *dst             = td->data[INDEX_DST][plane];
+        int r, c;
+
+        dst[jobnr] = 0;
+        if (!minknorm) {
+            for (r = slice_start; r < slice_end; ++r) {
+                for (c = 0; c < width; ++c) {
+                    dst[jobnr] = FFMAX( dst[jobnr], fabs(src[INDX2D(r, c, width)])
+                                        * (img_data[INDX2D(r, c, in_linesize)] < thresh) );
+                }
+            }
+        } else {
+            for (r = slice_start; r < slice_end; ++r) {
+                for (c = 0; c < width; ++c) {
+                    dst[jobnr] += ( pow( fabs(src[INDX2D(r, c, width)] / 255.), minknorm)
+                                    * (img_data[INDX2D(r, c, in_linesize)] < thresh) );
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+/**
+ * Main control function for grey edge algorithm.
+ *
+ * @param ctx the filter context.
+ * @param in frame to perfrom grey edge on.
+ *
+ * @return 0 in case of success, a negative value corresponding to an
+ * AVERROR code in case of failure.
+ */
+static int filter_grey_edge(AVFilterContext *ctx, AVFrame *in)
+{
+    ColorConstancyContext *s = ctx->priv;
+    ThreadData td;
+    int minknorm  = s->minknorm;
+    int difford   = s->difford;
+    double *white = s->white;
+    int nb_jobs   = FFMIN3(s->planeheight[1], s->planewidth[1], s->nb_threads);
+    int plane, job, ret;
+
+    td.in = in;
+    ret = setup_derivative_buffers(ctx, &td);
+    if (ret) {
+        return ret;
+    }
+    get_derivative(ctx, &td);
+    if (difford > 0) {
+        ctx->internal->execute(ctx, slice_normalize, &td, NULL, nb_jobs);
+    }
+
+    ctx->internal->execute(ctx, filter_slice_grey_edge, &td, NULL, nb_jobs);
+    if (!minknorm) {
+        for (plane = 0; plane < NUM_PLANES; ++plane) {
+            white[plane] = 0; // All values are absolute
+            for (job = 0; job < nb_jobs; ++job) {
+                white[plane] = FFMAX(white[plane] , td.data[INDEX_DST][plane][job]);
+            }
+        }
+    } else {
+        for (plane = 0; plane < NUM_PLANES; ++plane) {
+            white[plane] = 0;
+            for (job = 0; job < nb_jobs; ++job) {
+                white[plane] += td.data[INDEX_DST][plane][job];
+            }
+            white[plane] = pow(white[plane], 1./minknorm);
+        }
+    }
+
+    cleanup_derivative_buffers(&td, difford + 1, NUM_PLANES);
+    return 0;
+}
+
+/**
+ * Normalizes estimated illumination since only illumination vector
+ * direction is required for color constancy.
+ *
+ * @param light the estimated illumination to be normalized in place
+ */
+static void normalize_light(double *light)
+{
+    double abs_val = pow( pow(light[0], 2.0) + pow(light[1], 2.0) + pow(light[2], 2.0), 0.5);
+    int plane;
+
+    // TODO: check if setting to 1.0 when estimated = 0.0 is the best thing to do
+
+    if (!abs_val) {
+        for (plane = 0; plane < NUM_PLANES; ++plane) {
+            light[plane] = 1.0;
+        }
+    } else {
+        for (plane = 0; plane < NUM_PLANES; ++plane) {
+            light[plane] = (light[plane] / abs_val);
+            if (!light[plane]) { // to avoid division by zero when correcting
+                light[plane] = 1.0;
+            }
+        }
+    }
+}
+
+/**
+ * Redirects to corresponding algorithm estimation function and performs normalization
+ * after estimation.
+ *
+ * @param ctx the filter context.
+ * @param in frame to perfrom estimation on.
+ *
+ * @return 0 in case of success, a negative value corresponding to an
+ * AVERROR code in case of failure.
+ */
+static int illumination_estimation(AVFilterContext *ctx, AVFrame *in)
+{
+    ColorConstancyContext *s = ctx->priv;
+    int ret;
+
+    ret = filter_grey_edge(ctx, in);
+
+    av_log(ctx, AV_LOG_DEBUG, "Estimated illumination= %f %f %f\n",
+           s->white[0], s->white[1], s->white[2]);
+    normalize_light(s->white);
+    av_log(ctx, AV_LOG_DEBUG, "Estimated illumination after normalization= %f %f %f\n",
+           s->white[0], s->white[1], s->white[2]);
+
+    return ret;
+}
+
+/**
+ * Performs simple correction via diagonal transformation model.
+ *
+ * @param ctx the filter context.
+ * @param arg data to be passed between threads.
+ * @param jobnr current job nubmer.
+ * @param nb_jobs total number of jobs.
+ *
+ * @return 0.
+ */
+static int diagonal_transformation(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ColorConstancyContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    int plane;
+
+    for (plane = 0; plane < NUM_PLANES; ++plane) {
+        const int height = s->planeheight[plane];
+        const int width  = s->planewidth[plane];
+        const int64_t numpixels = width * (int64_t)height;
+        const int slice_start   = (numpixels * jobnr) / nb_jobs;
+        const int slice_end     = (numpixels * (jobnr+1)) / nb_jobs;
+        const uint8_t *src = in->data[plane];
+        uint8_t *dst       = out->data[plane];
+        double temp;
+        unsigned i;
+
+        for (i = slice_start; i < slice_end; ++i) {
+            temp = src[i] / (s->white[plane] * SQRT3);
+            dst[i] = av_clip_uint8((int)(temp + 0.5));
+        }
+    }
+    return 0;
+}
+
+/**
+ * Main control function for correcting scene illumination based on
+ * estimated illumination.
+ *
+ * @param ctx the filter context.
+ * @param in holds frame to correct
+ * @param out holds corrected frame
+ */
+static void chromatic_adaptation(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
+{
+    ColorConstancyContext *s = ctx->priv;
+    ThreadData td;
+    int nb_jobs = FFMIN3(s->planeheight[1], s->planewidth[1], s->nb_threads);
+
+    td.in  = in;
+    td.out = out;
+    ctx->internal->execute(ctx, diagonal_transformation, &td, NULL, nb_jobs);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        // TODO: support more formats
+        // FIXME: error when saving to .jpg
+        AV_PIX_FMT_GBRP,
+        AV_PIX_FMT_NONE
+    };
+
+    return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
+}
+
+static int config_props(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    ColorConstancyContext *s = ctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    const double break_off_sigma = 3.0;
+    double sigma = s->sigma;
+    int ret;
+
+    if (!floor(break_off_sigma * sigma + 0.5) && s->difford) {
+        av_log(ctx, AV_LOG_ERROR, "floor(%f * sigma) must be > 0 when difford > 0.\n", break_off_sigma);
+        return AVERROR(EINVAL);
+    }
+
+    s->filtersize = 2 * floor(break_off_sigma * sigma + 0.5) + 1;
+    if (ret=set_gauss(ctx)) {
+        return ret;
+    }
+
+    s->nb_threads = ff_filter_get_nb_threads(ctx);
+    s->planewidth[1]  = s->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->planewidth[0]  = s->planewidth[3]  = inlink->w;
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out;
+    int ret;
+
+    ret = illumination_estimation(ctx, in);
+    if (ret) {
+        return ret;
+    }
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!out) {
+            av_log(ctx, AV_LOG_ERROR, "Out of memory while allocating output video buffer.\n");
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+    chromatic_adaptation(ctx, in, out);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    ColorConstancyContext *s = ctx->priv;
+    int difford = s->difford;
+    int i;
+
+    for (i = 0; i <= difford; ++i) {
+        av_freep(&s->gauss[i]);
+    }
+}
+
+static const AVFilterPad colorconstancy_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_props,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad colorconstancy_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+#if CONFIG_GREYEDGE_FILTER
+
+static const AVOption greyedge_options[] = {
+    { "difford",  "set differentiation order", OFFSET(difford),  AV_OPT_TYPE_INT,    {.i64=1}, 0,   2,      FLAGS },
+    { "minknorm", "set Minkowski norm",        OFFSET(minknorm), AV_OPT_TYPE_INT,    {.i64=1}, 0,   20,     FLAGS },
+    { "sigma",    "set sigma",                 OFFSET(sigma),    AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0.0, 1024.0, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(greyedge);
+
+AVFilter ff_vf_greyedge = {
+    .name          = GREY_EDGE,
+    .description   = NULL_IF_CONFIG_SMALL("Estimates scene illumination by grey edge assumption."),
+    .priv_size     = sizeof(ColorConstancyContext),
+    .priv_class    = &greyedge_class,
+    .query_formats = query_formats,
+    .uninit        = uninit,
+    .inputs        = colorconstancy_inputs,
+    .outputs       = colorconstancy_outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
+
+#endif /* CONFIG_GREY_EDGE_FILTER */

diff --git a/libavfilter/vf_colorspace.c b/libavfilter/vf_colorspace.c
index 71ea08a..f8d1ecd 100644
--- a/libavfilter/vf_colorspace.c
+++ b/libavfilter/vf_colorspace.c

@@ -33,6 +33,7 @@
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
+#include "colorspace.h"
 
 enum DitherMode {
     DITHER_NONE,
@@ -110,21 +111,13 @@
 
 struct ColorPrimaries {
     enum Whitepoint wp;
-    double xr, yr, xg, yg, xb, yb;
+    struct PrimaryCoefficients coeff;
 };
 
 struct TransferCharacteristics {
     double alpha, beta, gamma, delta;
 };
 
-struct LumaCoefficients {
-    double cr, cg, cb;
-};
-
-struct WhitepointCoefficients {
-    double xw, yw;
-};
-
 typedef struct ColorSpaceContext {
     const AVClass *class;
 
@@ -286,57 +279,30 @@
 };
 
 static const struct ColorPrimaries color_primaries[AVCOL_PRI_NB] = {
-    [AVCOL_PRI_BT709]     = { WP_D65, 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 },
-    [AVCOL_PRI_BT470M]    = { WP_C,   0.670, 0.330, 0.210, 0.710, 0.140, 0.080 },
-    [AVCOL_PRI_BT470BG]   = { WP_D65, 0.640, 0.330, 0.290, 0.600, 0.150, 0.060,},
-    [AVCOL_PRI_SMPTE170M] = { WP_D65, 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 },
-    [AVCOL_PRI_SMPTE240M] = { WP_D65, 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 },
-    [AVCOL_PRI_SMPTE428]  = { WP_E,   0.735, 0.265, 0.274, 0.718, 0.167, 0.009 },
-    [AVCOL_PRI_SMPTE431]  = { WP_DCI, 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 },
-    [AVCOL_PRI_SMPTE432]  = { WP_D65, 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 },
-    [AVCOL_PRI_FILM]      = { WP_C,   0.681, 0.319, 0.243, 0.692, 0.145, 0.049 },
-    [AVCOL_PRI_BT2020]    = { WP_D65, 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 },
-    [AVCOL_PRI_JEDEC_P22] = { WP_D65, 0.630, 0.340, 0.295, 0.605, 0.155, 0.077 },
+    [AVCOL_PRI_BT709]     = { WP_D65, { 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 } },
+    [AVCOL_PRI_BT470M]    = { WP_C,   { 0.670, 0.330, 0.210, 0.710, 0.140, 0.080 } },
+    [AVCOL_PRI_BT470BG]   = { WP_D65, { 0.640, 0.330, 0.290, 0.600, 0.150, 0.060 } },
+    [AVCOL_PRI_SMPTE170M] = { WP_D65, { 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 } },
+    [AVCOL_PRI_SMPTE240M] = { WP_D65, { 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 } },
+    [AVCOL_PRI_SMPTE428]  = { WP_E,   { 0.735, 0.265, 0.274, 0.718, 0.167, 0.009 } },
+    [AVCOL_PRI_SMPTE431]  = { WP_DCI, { 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 } },
+    [AVCOL_PRI_SMPTE432]  = { WP_D65, { 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 } },
+    [AVCOL_PRI_FILM]      = { WP_C,   { 0.681, 0.319, 0.243, 0.692, 0.145, 0.049 } },
+    [AVCOL_PRI_BT2020]    = { WP_D65, { 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 } },
+    [AVCOL_PRI_JEDEC_P22] = { WP_D65, { 0.630, 0.340, 0.295, 0.605, 0.155, 0.077 } },
 };
 
 static const struct ColorPrimaries *get_color_primaries(enum AVColorPrimaries prm)
 {
-    const struct ColorPrimaries *coeffs;
+    const struct ColorPrimaries *p;
 
     if (prm >= AVCOL_PRI_NB)
         return NULL;
-    coeffs = &color_primaries[prm];
-    if (!coeffs->xr)
+    p = &color_primaries[prm];
+    if (!p->coeff.xr)
         return NULL;
 
-    return coeffs;
-}
-
-static void invert_matrix3x3(const double in[3][3], double out[3][3])
-{
-    double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2],
-           m10 = in[1][0], m11 = in[1][1], m12 = in[1][2],
-           m20 = in[2][0], m21 = in[2][1], m22 = in[2][2];
-    int i, j;
-    double det;
-
-    out[0][0] =  (m11 * m22 - m21 * m12);
-    out[0][1] = -(m01 * m22 - m21 * m02);
-    out[0][2] =  (m01 * m12 - m11 * m02);
-    out[1][0] = -(m10 * m22 - m20 * m12);
-    out[1][1] =  (m00 * m22 - m20 * m02);
-    out[1][2] = -(m00 * m12 - m10 * m02);
-    out[2][0] =  (m10 * m21 - m20 * m11);
-    out[2][1] = -(m00 * m21 - m20 * m01);
-    out[2][2] =  (m00 * m11 - m10 * m01);
-
-    det = m00 * out[0][0] + m10 * out[0][1] + m20 * out[0][2];
-    det = 1.0 / det;
-
-    for (i = 0; i < 3; i++) {
-        for (j = 0; j < 3; j++)
-            out[i][j] *= det;
-    }
+    return p;
 }
 
 static int fill_gamma_table(ColorSpaceContext *s)
@@ -380,49 +346,6 @@
 }
 
 /*
- * see e.g. http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html
- */
-static void fill_rgb2xyz_table(const struct ColorPrimaries *coeffs,
-                               double rgb2xyz[3][3])
-{
-    const struct WhitepointCoefficients *wp = &whitepoint_coefficients[coeffs->wp];
-    double i[3][3], sr, sg, sb, zw;
-
-    rgb2xyz[0][0] = coeffs->xr / coeffs->yr;
-    rgb2xyz[0][1] = coeffs->xg / coeffs->yg;
-    rgb2xyz[0][2] = coeffs->xb / coeffs->yb;
-    rgb2xyz[1][0] = rgb2xyz[1][1] = rgb2xyz[1][2] = 1.0;
-    rgb2xyz[2][0] = (1.0 - coeffs->xr - coeffs->yr) / coeffs->yr;
-    rgb2xyz[2][1] = (1.0 - coeffs->xg - coeffs->yg) / coeffs->yg;
-    rgb2xyz[2][2] = (1.0 - coeffs->xb - coeffs->yb) / coeffs->yb;
-    invert_matrix3x3(rgb2xyz, i);
-    zw = 1.0 - wp->xw - wp->yw;
-    sr = i[0][0] * wp->xw + i[0][1] * wp->yw + i[0][2] * zw;
-    sg = i[1][0] * wp->xw + i[1][1] * wp->yw + i[1][2] * zw;
-    sb = i[2][0] * wp->xw + i[2][1] * wp->yw + i[2][2] * zw;
-    rgb2xyz[0][0] *= sr;
-    rgb2xyz[0][1] *= sg;
-    rgb2xyz[0][2] *= sb;
-    rgb2xyz[1][0] *= sr;
-    rgb2xyz[1][1] *= sg;
-    rgb2xyz[1][2] *= sb;
-    rgb2xyz[2][0] *= sr;
-    rgb2xyz[2][1] *= sg;
-    rgb2xyz[2][2] *= sb;
-}
-
-static void mul3x3(double dst[3][3], const double src1[3][3], const double src2[3][3])
-{
-    int m, n;
-
-    for (m = 0; m < 3; m++)
-        for (n = 0; n < 3; n++)
-            dst[m][n] = src2[m][0] * src1[0][n] +
-                        src2[m][1] * src1[1][n] +
-                        src2[m][2] * src1[2][n];
-}
-
-/*
  * See http://www.brucelindbloom.com/index.html?Eqn_ChromAdapt.html
  * This function uses the Bradford mechanism.
  */
@@ -448,7 +371,7 @@
     double mai[3][3], fac[3][3], tmp[3][3];
     double rs, gs, bs, rd, gd, bd;
 
-    invert_matrix3x3(ma, mai);
+    ff_matrix_invert_3x3(ma, mai);
     rs = ma[0][0] * wp_src->xw + ma[0][1] * wp_src->yw + ma[0][2] * zw_src;
     gs = ma[1][0] * wp_src->xw + ma[1][1] * wp_src->yw + ma[1][2] * zw_src;
     bs = ma[2][0] * wp_src->xw + ma[2][1] * wp_src->yw + ma[2][2] * zw_src;
@@ -459,8 +382,8 @@
     fac[1][1] = gd / gs;
     fac[2][2] = bd / bs;
     fac[0][1] = fac[0][2] = fac[1][0] = fac[1][2] = fac[2][0] = fac[2][1] = 0.0;
-    mul3x3(tmp, ma, fac);
-    mul3x3(out, tmp, mai);
+    ff_matrix_mul_3x3(tmp, ma, fac);
+    ff_matrix_mul_3x3(out, tmp, mai);
 }
 
 static void apply_lut(int16_t *buf[3], ptrdiff_t stride,
@@ -517,7 +440,7 @@
         s->yuv2yuv(out_data, td->out_linesize, in_data, td->in_linesize, w, h,
                    s->yuv2yuv_coeffs, s->yuv_offset);
     } else {
-        // FIXME maybe (for caching effciency) do pipeline per-line instead of
+        // FIXME maybe (for caching efficiency) do pipeline per-line instead of
         // full buffer per function? (Or, since yuv2rgb requires 2 lines: per
         // 2 lines, for yuv420.)
         /*
@@ -661,20 +584,23 @@
                                            sizeof(*s->in_primaries));
         if (!s->lrgb2lrgb_passthrough) {
             double rgb2xyz[3][3], xyz2rgb[3][3], rgb2rgb[3][3];
+            const struct WhitepointCoefficients *wp_out, *wp_in;
 
-            fill_rgb2xyz_table(s->out_primaries, rgb2xyz);
-            invert_matrix3x3(rgb2xyz, xyz2rgb);
-            fill_rgb2xyz_table(s->in_primaries, rgb2xyz);
+            wp_out = &whitepoint_coefficients[s->out_primaries->wp];
+            wp_in = &whitepoint_coefficients[s->in_primaries->wp];
+            ff_fill_rgb2xyz_table(&s->out_primaries->coeff, wp_out, rgb2xyz);
+            ff_matrix_invert_3x3(rgb2xyz, xyz2rgb);
+            ff_fill_rgb2xyz_table(&s->in_primaries->coeff, wp_in, rgb2xyz);
             if (s->out_primaries->wp != s->in_primaries->wp &&
                 s->wp_adapt != WP_ADAPT_IDENTITY) {
                 double wpconv[3][3], tmp[3][3];
 
                 fill_whitepoint_conv_table(wpconv, s->wp_adapt, s->in_primaries->wp,
                                            s->out_primaries->wp);
-                mul3x3(tmp, rgb2xyz, wpconv);
-                mul3x3(rgb2rgb, tmp, xyz2rgb);
+                ff_matrix_mul_3x3(tmp, rgb2xyz, wpconv);
+                ff_matrix_mul_3x3(rgb2rgb, tmp, xyz2rgb);
             } else {
-                mul3x3(rgb2rgb, rgb2xyz, xyz2rgb);
+                ff_matrix_mul_3x3(rgb2rgb, rgb2xyz, xyz2rgb);
             }
             for (m = 0; m < 3; m++)
                 for (n = 0; n < 3; n++) {
@@ -799,7 +725,7 @@
             for (n = 0; n < 8; n++)
                 s->yuv_offset[0][n] = off;
             fill_rgb2yuv_table(s->in_lumacoef, rgb2yuv);
-            invert_matrix3x3(rgb2yuv, yuv2rgb);
+            ff_matrix_invert_3x3(rgb2yuv, yuv2rgb);
             bits = 1 << (in_desc->comp[0].depth - 1);
             for (n = 0; n < 3; n++) {
                 for (in_rng = s->in_y_rng, m = 0; m < 3; m++, in_rng = s->in_uv_rng) {
@@ -855,7 +781,7 @@
             double yuv2yuv[3][3];
             int in_rng, out_rng;
 
-            mul3x3(yuv2yuv, yuv2rgb, rgb2yuv);
+            ff_matrix_mul_3x3(yuv2yuv, yuv2rgb, rgb2yuv);
             for (out_rng = s->out_y_rng, m = 0; m < 3; m++, out_rng = s->out_uv_rng) {
                 for (in_rng = s->in_y_rng, n = 0; n < 3; n++, in_rng = s->in_uv_rng) {
                     s->yuv2yuv_coeffs[m][n][0] =

diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c
index 6020319..421c169 100644
--- a/libavfilter/vf_convolution.c
+++ b/libavfilter/vf_convolution.c

@@ -21,6 +21,7 @@
 
 #include "libavutil/avstring.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/intreadwrite.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "avfilter.h"
@@ -28,31 +29,42 @@
 #include "internal.h"
 #include "video.h"
 
+enum MatrixMode {
+    MATRIX_SQUARE,
+    MATRIX_ROW,
+    MATRIX_COLUMN,
+    MATRIX_NBMODES,
+};
+
 typedef struct ConvolutionContext {
     const AVClass *class;
 
     char *matrix_str[4];
     float rdiv[4];
     float bias[4];
+    int mode[4];
     float scale;
     float delta;
     int planes;
 
     int size[4];
     int depth;
+    int max;
     int bpc;
-    int bstride;
-    uint8_t *buffer;
-    uint8_t **bptrs;
     int nb_planes;
     int nb_threads;
     int planewidth[4];
     int planeheight[4];
-    int matrix[4][25];
+    int matrix[4][49];
     int matrix_length[4];
     int copy[4];
 
-    int (*filter[4])(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
+    void (*setup[4])(int radius, const uint8_t *c[], const uint8_t *src, int stride,
+                     int x, int width, int y, int height, int bpc);
+    void (*filter[4])(uint8_t *dst, int width,
+                      float rdiv, float bias, const int *const matrix,
+                      const uint8_t *c[], int peak, int radius,
+                      int dstride, int stride);
 } ConvolutionContext;
 
 #define OFFSET(x) offsetof(ConvolutionContext, x)
@@ -63,14 +75,21 @@
     { "1m", "set matrix for 2nd plane", OFFSET(matrix_str[1]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
     { "2m", "set matrix for 3rd plane", OFFSET(matrix_str[2]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
     { "3m", "set matrix for 4th plane", OFFSET(matrix_str[3]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
-    { "0rdiv", "set rdiv for 1st plane", OFFSET(rdiv[0]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
-    { "1rdiv", "set rdiv for 2nd plane", OFFSET(rdiv[1]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
-    { "2rdiv", "set rdiv for 3rd plane", OFFSET(rdiv[2]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
-    { "3rdiv", "set rdiv for 4th plane", OFFSET(rdiv[3]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
+    { "0rdiv", "set rdiv for 1st plane", OFFSET(rdiv[0]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { "1rdiv", "set rdiv for 2nd plane", OFFSET(rdiv[1]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { "2rdiv", "set rdiv for 3rd plane", OFFSET(rdiv[2]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { "3rdiv", "set rdiv for 4th plane", OFFSET(rdiv[3]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
     { "0bias", "set bias for 1st plane", OFFSET(bias[0]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
     { "1bias", "set bias for 2nd plane", OFFSET(bias[1]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
     { "2bias", "set bias for 3rd plane", OFFSET(bias[2]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
     { "3bias", "set bias for 4th plane", OFFSET(bias[3]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { "0mode", "set matrix mode for 1st plane", OFFSET(mode[0]), AV_OPT_TYPE_INT, {.i64=MATRIX_SQUARE}, 0, MATRIX_NBMODES-1, FLAGS, "mode" },
+    { "1mode", "set matrix mode for 2nd plane", OFFSET(mode[1]), AV_OPT_TYPE_INT, {.i64=MATRIX_SQUARE}, 0, MATRIX_NBMODES-1, FLAGS, "mode" },
+    { "2mode", "set matrix mode for 3rd plane", OFFSET(mode[2]), AV_OPT_TYPE_INT, {.i64=MATRIX_SQUARE}, 0, MATRIX_NBMODES-1, FLAGS, "mode" },
+    { "3mode", "set matrix mode for 4th plane", OFFSET(mode[3]), AV_OPT_TYPE_INT, {.i64=MATRIX_SQUARE}, 0, MATRIX_NBMODES-1, FLAGS, "mode" },
+    { "square", "square matrix",     0, AV_OPT_TYPE_CONST, {.i64=MATRIX_SQUARE}, 0, 0, FLAGS, "mode" },
+    { "row",    "single row matrix", 0, AV_OPT_TYPE_CONST, {.i64=MATRIX_ROW}   , 0, 0, FLAGS, "mode" },
+    { "column", "single column matrix", 0, AV_OPT_TYPE_CONST, {.i64=MATRIX_COLUMN}, 0, 0, FLAGS, "mode" },
     { NULL }
 };
 
@@ -86,6 +105,14 @@
                                 0, 0, 0, 0, 0,
                                 0, 0, 0, 0, 0};
 
+static const int same7x7[49] = {0, 0, 0, 0, 0, 0, 0,
+                                0, 0, 0, 0, 0, 0, 0,
+                                0, 0, 0, 0, 0, 0, 0,
+                                0, 0, 0, 1, 0, 0, 0,
+                                0, 0, 0, 0, 0, 0, 0,
+                                0, 0, 0, 0, 0, 0, 0,
+                                0, 0, 0, 0, 0, 0, 0};
+
 static int query_formats(AVFilterContext *ctx)
 {
     static const enum AVPixelFormat pix_fmts[] = {
@@ -105,601 +132,460 @@
         AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
         AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
         AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_NONE
     };
 
     return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
 }
 
-static inline void line_copy8(uint8_t *line, const uint8_t *srcp, int width, int mergin)
-{
-    int i;
-
-    memcpy(line, srcp, width);
-
-    for (i = mergin; i > 0; i--) {
-        line[-i] = line[i];
-        line[width - 1 + i] = line[width - 1 - i];
-    }
-}
-
-static inline void line_copy16(uint16_t *line, const uint16_t *srcp, int width, int mergin)
-{
-    int i;
-
-    memcpy(line, srcp, width * 2);
-
-    for (i = mergin; i > 0; i--) {
-        line[-i] = line[i];
-        line[width - 1 + i] = line[width - 1 - i];
-    }
-}
-
 typedef struct ThreadData {
     AVFrame *in, *out;
-    int plane;
 } ThreadData;
 
-static int filter16_prewitt(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+static void filter16_prewitt(uint8_t *dstp, int width,
+                             float scale, float delta, const int *const matrix,
+                             const uint8_t *c[], int peak, int radius,
+                             int dstride, int stride)
 {
-    ConvolutionContext *s = ctx->priv;
-    ThreadData *td = arg;
-    AVFrame *in = td->in;
-    AVFrame *out = td->out;
-    const int plane = td->plane;
-    const int peak = (1 << s->depth) - 1;
-    const int stride = in->linesize[plane] / 2;
-    const int bstride = s->bstride;
-    const int height = s->planeheight[plane];
-    const int width  = s->planewidth[plane];
-    const int slice_start = (height * jobnr) / nb_jobs;
-    const int slice_end = (height * (jobnr+1)) / nb_jobs;
-    const uint16_t *src = (const uint16_t *)in->data[plane] + slice_start * stride;
-    uint16_t *dst = (uint16_t *)out->data[plane] + slice_start * (out->linesize[plane] / 2);
-    const float scale = s->scale;
-    const float delta = s->delta;
-    uint16_t *p0 = (uint16_t *)s->bptrs[jobnr] + 16;
-    uint16_t *p1 = p0 + bstride;
-    uint16_t *p2 = p1 + bstride;
-    uint16_t *orig = p0, *end = p2;
-    int y, x;
+    uint16_t *dst = (uint16_t *)dstp;
+    int x;
 
-    line_copy16(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1);
-    line_copy16(p1, src, width, 1);
+    for (x = 0; x < width; x++) {
+        int suma = AV_RN16A(&c[0][2 * x]) * -1 + AV_RN16A(&c[1][2 * x]) * -1 + AV_RN16A(&c[2][2 * x]) * -1 +
+                   AV_RN16A(&c[6][2 * x]) *  1 + AV_RN16A(&c[7][2 * x]) *  1 + AV_RN16A(&c[8][2 * x]) *  1;
+        int sumb = AV_RN16A(&c[0][2 * x]) * -1 + AV_RN16A(&c[2][2 * x]) *  1 + AV_RN16A(&c[3][2 * x]) * -1 +
+                   AV_RN16A(&c[5][2 * x]) *  1 + AV_RN16A(&c[6][2 * x]) * -1 + AV_RN16A(&c[8][2 * x]) *  1;
 
-    for (y = slice_start; y < slice_end; y++) {
-        src += stride * (y < height - 1 ? 1 : -1);
-        line_copy16(p2, src, width, 1);
-
-        for (x = 0; x < width; x++) {
-            int suma = p0[x - 1] * -1 +
-                       p0[x] *     -1 +
-                       p0[x + 1] * -1 +
-                       p2[x - 1] *  1 +
-                       p2[x] *      1 +
-                       p2[x + 1] *  1;
-            int sumb = p0[x - 1] * -1 +
-                       p0[x + 1] *  1 +
-                       p1[x - 1] * -1 +
-                       p1[x + 1] *  1 +
-                       p2[x - 1] * -1 +
-                       p2[x + 1] *  1;
-
-            dst[x] = av_clip(sqrt(suma*suma + sumb*sumb) * scale + delta, 0, peak);
-        }
-
-        p0 = p1;
-        p1 = p2;
-        p2 = (p2 == end) ? orig: p2 + bstride;
-        dst += out->linesize[plane] / 2;
+        dst[x] = av_clip(sqrt(suma*suma + sumb*sumb) * scale + delta, 0, peak);
     }
-
-    return 0;
 }
 
-static int filter16_roberts(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+static void filter16_roberts(uint8_t *dstp, int width,
+                             float scale, float delta, const int *const matrix,
+                             const uint8_t *c[], int peak, int radius,
+                             int dstride, int stride)
 {
-    ConvolutionContext *s = ctx->priv;
-    ThreadData *td = arg;
-    AVFrame *in = td->in;
-    AVFrame *out = td->out;
-    const int plane = td->plane;
-    const int peak = (1 << s->depth) - 1;
-    const int stride = in->linesize[plane] / 2;
-    const int bstride = s->bstride;
-    const int height = s->planeheight[plane];
-    const int width  = s->planewidth[plane];
-    const int slice_start = (height * jobnr) / nb_jobs;
-    const int slice_end = (height * (jobnr+1)) / nb_jobs;
-    const uint16_t *src = (const uint16_t *)in->data[plane] + slice_start * stride;
-    uint16_t *dst = (uint16_t *)out->data[plane] + slice_start * (out->linesize[plane] / 2);
-    const float scale = s->scale;
-    const float delta = s->delta;
-    uint16_t *p0 = (uint16_t *)s->bptrs[jobnr] + 16;
-    uint16_t *p1 = p0 + bstride;
-    uint16_t *p2 = p1 + bstride;
-    uint16_t *orig = p0, *end = p2;
-    int y, x;
+    uint16_t *dst = (uint16_t *)dstp;
+    int x;
 
-    line_copy16(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1);
-    line_copy16(p1, src, width, 1);
+    for (x = 0; x < width; x++) {
+        int suma = AV_RN16A(&c[0][2 * x]) *  1 + AV_RN16A(&c[1][2 * x]) * -1;
+        int sumb = AV_RN16A(&c[4][2 * x]) *  1 + AV_RN16A(&c[3][2 * x]) * -1;
 
-    for (y = slice_start; y < slice_end; y++) {
-        src += stride * (y < height - 1 ? 1 : -1);
-        line_copy16(p2, src, width, 1);
-
-        for (x = 0; x < width; x++) {
-            int suma = p0[x - 1] *  1 +
-                       p1[x    ] * -1;
-            int sumb = p0[x    ] *  1 +
-                       p1[x - 1] * -1;
-
-            dst[x] = av_clip(sqrt(suma*suma + sumb*sumb) * scale + delta, 0, peak);
-        }
-
-        p0 = p1;
-        p1 = p2;
-        p2 = (p2 == end) ? orig: p2 + bstride;
-        dst += out->linesize[plane] / 2;
+        dst[x] = av_clip(sqrt(suma*suma + sumb*sumb) * scale + delta, 0, peak);
     }
-
-    return 0;
 }
 
-static int filter16_sobel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+static void filter16_sobel(uint8_t *dstp, int width,
+                           float scale, float delta, const int *const matrix,
+                           const uint8_t *c[], int peak, int radius,
+                           int dstride, int stride)
 {
-    ConvolutionContext *s = ctx->priv;
-    ThreadData *td = arg;
-    AVFrame *in = td->in;
-    AVFrame *out = td->out;
-    const int plane = td->plane;
-    const int peak = (1 << s->depth) - 1;
-    const int stride = in->linesize[plane] / 2;
-    const int bstride = s->bstride;
-    const int height = s->planeheight[plane];
-    const int width  = s->planewidth[plane];
-    const int slice_start = (height * jobnr) / nb_jobs;
-    const int slice_end = (height * (jobnr+1)) / nb_jobs;
-    const uint16_t *src = (const uint16_t *)in->data[plane] + slice_start * stride;
-    uint16_t *dst = (uint16_t *)out->data[plane] + slice_start * (out->linesize[plane] / 2);
-    const float scale = s->scale;
-    const float delta = s->delta;
-    uint16_t *p0 = (uint16_t *)s->bptrs[jobnr] + 16;
-    uint16_t *p1 = p0 + bstride;
-    uint16_t *p2 = p1 + bstride;
-    uint16_t *orig = p0, *end = p2;
-    int y, x;
+    uint16_t *dst = (uint16_t *)dstp;
+    int x;
 
-    line_copy16(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1);
-    line_copy16(p1, src, width, 1);
+    for (x = 0; x < width; x++) {
+        int suma = AV_RN16A(&c[0][2 * x]) * -1 + AV_RN16A(&c[1][2 * x]) * -2 + AV_RN16A(&c[2][2 * x]) * -1 +
+                   AV_RN16A(&c[6][2 * x]) *  1 + AV_RN16A(&c[7][2 * x]) *  2 + AV_RN16A(&c[8][2 * x]) *  1;
+        int sumb = AV_RN16A(&c[0][2 * x]) * -1 + AV_RN16A(&c[2][2 * x]) *  1 + AV_RN16A(&c[3][2 * x]) * -2 +
+                   AV_RN16A(&c[5][2 * x]) *  2 + AV_RN16A(&c[6][2 * x]) * -1 + AV_RN16A(&c[8][2 * x]) *  1;
 
-    for (y = slice_start; y < slice_end; y++) {
-        src += stride * (y < height - 1 ? 1 : -1);
-        line_copy16(p2, src, width, 1);
-
-        for (x = 0; x < width; x++) {
-            int suma = p0[x - 1] * -1 +
-                       p0[x] *     -2 +
-                       p0[x + 1] * -1 +
-                       p2[x - 1] *  1 +
-                       p2[x] *      2 +
-                       p2[x + 1] *  1;
-            int sumb = p0[x - 1] * -1 +
-                       p0[x + 1] *  1 +
-                       p1[x - 1] * -2 +
-                       p1[x + 1] *  2 +
-                       p2[x - 1] * -1 +
-                       p2[x + 1] *  1;
-
-            dst[x] = av_clip(sqrt(suma*suma + sumb*sumb) * scale + delta, 0, peak);
-        }
-
-        p0 = p1;
-        p1 = p2;
-        p2 = (p2 == end) ? orig: p2 + bstride;
-        dst += out->linesize[plane] / 2;
+        dst[x] = av_clip(sqrt(suma*suma + sumb*sumb) * scale + delta, 0, peak);
     }
-
-    return 0;
 }
 
-static int filter_prewitt(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+static void filter_prewitt(uint8_t *dst, int width,
+                           float scale, float delta, const int *const matrix,
+                           const uint8_t *c[], int peak, int radius,
+                           int dstride, int stride)
 {
-    ConvolutionContext *s = ctx->priv;
-    ThreadData *td = arg;
-    AVFrame *in = td->in;
-    AVFrame *out = td->out;
-    const int plane = td->plane;
-    const int stride = in->linesize[plane];
-    const int bstride = s->bstride;
-    const int height = s->planeheight[plane];
-    const int width  = s->planewidth[plane];
-    const int slice_start = (height * jobnr) / nb_jobs;
-    const int slice_end = (height * (jobnr+1)) / nb_jobs;
-    const uint8_t *src = in->data[plane] + slice_start * stride;
-    uint8_t *dst = out->data[plane] + slice_start * out->linesize[plane];
-    const float scale = s->scale;
-    const float delta = s->delta;
-    uint8_t *p0 = s->bptrs[jobnr] + 16;
-    uint8_t *p1 = p0 + bstride;
-    uint8_t *p2 = p1 + bstride;
-    uint8_t *orig = p0, *end = p2;
-    int y, x;
+    const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2];
+    const uint8_t *c3 = c[3], *c5 = c[5];
+    const uint8_t *c6 = c[6], *c7 = c[7], *c8 = c[8];
+    int x;
 
-    line_copy8(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1);
-    line_copy8(p1, src, width, 1);
+    for (x = 0; x < width; x++) {
+        int suma = c0[x] * -1 + c1[x] * -1 + c2[x] * -1 +
+                   c6[x] *  1 + c7[x] *  1 + c8[x] *  1;
+        int sumb = c0[x] * -1 + c2[x] *  1 + c3[x] * -1 +
+                   c5[x] *  1 + c6[x] * -1 + c8[x] *  1;
 
-    for (y = slice_start; y < slice_end; y++) {
-        src += stride * (y < height - 1 ? 1 : -1);
-        line_copy8(p2, src, width, 1);
-
-        for (x = 0; x < width; x++) {
-            int suma = p0[x - 1] * -1 +
-                       p0[x] *     -1 +
-                       p0[x + 1] * -1 +
-                       p2[x - 1] *  1 +
-                       p2[x] *      1 +
-                       p2[x + 1] *  1;
-            int sumb = p0[x - 1] * -1 +
-                       p0[x + 1] *  1 +
-                       p1[x - 1] * -1 +
-                       p1[x + 1] *  1 +
-                       p2[x - 1] * -1 +
-                       p2[x + 1] *  1;
-
-            dst[x] = av_clip_uint8(sqrt(suma*suma + sumb*sumb) * scale + delta);
-        }
-
-        p0 = p1;
-        p1 = p2;
-        p2 = (p2 == end) ? orig: p2 + bstride;
-        dst += out->linesize[plane];
+        dst[x] = av_clip_uint8(sqrt(suma*suma + sumb*sumb) * scale + delta);
     }
-
-    return 0;
 }
 
-static int filter_roberts(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+static void filter_roberts(uint8_t *dst, int width,
+                           float scale, float delta, const int *const matrix,
+                           const uint8_t *c[], int peak, int radius,
+                           int dstride, int stride)
 {
-    ConvolutionContext *s = ctx->priv;
-    ThreadData *td = arg;
-    AVFrame *in = td->in;
-    AVFrame *out = td->out;
-    const int plane = td->plane;
-    const int stride = in->linesize[plane];
-    const int bstride = s->bstride;
-    const int height = s->planeheight[plane];
-    const int width  = s->planewidth[plane];
-    const int slice_start = (height * jobnr) / nb_jobs;
-    const int slice_end = (height * (jobnr+1)) / nb_jobs;
-    const uint8_t *src = in->data[plane] + slice_start * stride;
-    uint8_t *dst = out->data[plane] + slice_start * out->linesize[plane];
-    const float scale = s->scale;
-    const float delta = s->delta;
-    uint8_t *p0 = s->bptrs[jobnr] + 16;
-    uint8_t *p1 = p0 + bstride;
-    uint8_t *p2 = p1 + bstride;
-    uint8_t *orig = p0, *end = p2;
-    int y, x;
+    int x;
 
-    line_copy8(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1);
-    line_copy8(p1, src, width, 1);
+    for (x = 0; x < width; x++) {
+        int suma = c[0][x] *  1 + c[1][x] * -1;
+        int sumb = c[4][x] *  1 + c[3][x] * -1;
 
-    for (y = slice_start; y < slice_end; y++) {
-        src += stride * (y < height - 1 ? 1 : -1);
-        line_copy8(p2, src, width, 1);
-
-        for (x = 0; x < width; x++) {
-            int suma = p0[x - 1] *  1 +
-                       p1[x    ] * -1;
-            int sumb = p0[x    ] *  1 +
-                       p1[x - 1] * -1;
-
-            dst[x] = av_clip_uint8(sqrt(suma*suma + sumb*sumb) * scale + delta);
-        }
-
-        p0 = p1;
-        p1 = p2;
-        p2 = (p2 == end) ? orig: p2 + bstride;
-        dst += out->linesize[plane];
+        dst[x] = av_clip_uint8(sqrt(suma*suma + sumb*sumb) * scale + delta);
     }
-
-    return 0;
 }
 
-static int filter_sobel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+static void filter_sobel(uint8_t *dst, int width,
+                         float scale, float delta, const int *const matrix,
+                         const uint8_t *c[], int peak, int radius,
+                         int dstride, int stride)
 {
-    ConvolutionContext *s = ctx->priv;
-    ThreadData *td = arg;
-    AVFrame *in = td->in;
-    AVFrame *out = td->out;
-    const int plane = td->plane;
-    const int stride = in->linesize[plane];
-    const int bstride = s->bstride;
-    const int height = s->planeheight[plane];
-    const int width  = s->planewidth[plane];
-    const int slice_start = (height * jobnr) / nb_jobs;
-    const int slice_end = (height * (jobnr+1)) / nb_jobs;
-    const uint8_t *src = in->data[plane] + slice_start * stride;
-    uint8_t *dst = out->data[plane] + slice_start * out->linesize[plane];
-    const float scale = s->scale;
-    const float delta = s->delta;
-    uint8_t *p0 = s->bptrs[jobnr] + 16;
-    uint8_t *p1 = p0 + bstride;
-    uint8_t *p2 = p1 + bstride;
-    uint8_t *orig = p0, *end = p2;
-    int y, x;
+    const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2];
+    const uint8_t *c3 = c[3], *c5 = c[5];
+    const uint8_t *c6 = c[6], *c7 = c[7], *c8 = c[8];
+    int x;
 
-    line_copy8(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1);
-    line_copy8(p1, src, width, 1);
+    for (x = 0; x < width; x++) {
+        int suma = c0[x] * -1 + c1[x] * -2 + c2[x] * -1 +
+                   c6[x] *  1 + c7[x] *  2 + c8[x] *  1;
+        int sumb = c0[x] * -1 + c2[x] *  1 + c3[x] * -2 +
+                   c5[x] *  2 + c6[x] * -1 + c8[x] *  1;
 
-    for (y = slice_start; y < slice_end; y++) {
-        src += stride * (y < height - 1 ? 1 : -1);
-        line_copy8(p2, src, width, 1);
-
-        for (x = 0; x < width; x++) {
-            int suma = p0[x - 1] * -1 +
-                       p0[x] *     -2 +
-                       p0[x + 1] * -1 +
-                       p2[x - 1] *  1 +
-                       p2[x] *      2 +
-                       p2[x + 1] *  1;
-            int sumb = p0[x - 1] * -1 +
-                       p0[x + 1] *  1 +
-                       p1[x - 1] * -2 +
-                       p1[x + 1] *  2 +
-                       p2[x - 1] * -1 +
-                       p2[x + 1] *  1;
-
-            dst[x] = av_clip_uint8(sqrt(suma*suma + sumb*sumb) * scale + delta);
-        }
-
-        p0 = p1;
-        p1 = p2;
-        p2 = (p2 == end) ? orig: p2 + bstride;
-        dst += out->linesize[plane];
+        dst[x] = av_clip_uint8(sqrt(suma*suma + sumb*sumb) * scale + delta);
     }
-
-    return 0;
 }
 
-static int filter16_3x3(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+static void filter16_3x3(uint8_t *dstp, int width,
+                         float rdiv, float bias, const int *const matrix,
+                         const uint8_t *c[], int peak, int radius,
+                         int dstride, int stride)
 {
-    ConvolutionContext *s = ctx->priv;
-    ThreadData *td = arg;
-    AVFrame *in = td->in;
-    AVFrame *out = td->out;
-    const int plane = td->plane;
-    const int peak = (1 << s->depth) - 1;
-    const int stride = in->linesize[plane] / 2;
-    const int bstride = s->bstride;
-    const int height = s->planeheight[plane];
-    const int width  = s->planewidth[plane];
-    const int slice_start = (height * jobnr) / nb_jobs;
-    const int slice_end = (height * (jobnr+1)) / nb_jobs;
-    const uint16_t *src = (const uint16_t *)in->data[plane] + slice_start * stride;
-    uint16_t *dst = (uint16_t *)out->data[plane] + slice_start * (out->linesize[plane] / 2);
-    uint16_t *p0 = (uint16_t *)s->bptrs[jobnr] + 16;
-    uint16_t *p1 = p0 + bstride;
-    uint16_t *p2 = p1 + bstride;
-    uint16_t *orig = p0, *end = p2;
-    const int *matrix = s->matrix[plane];
-    const float rdiv = s->rdiv[plane];
-    const float bias = s->bias[plane];
-    int y, x;
+    uint16_t *dst = (uint16_t *)dstp;
+    int x;
 
-    line_copy16(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1);
-    line_copy16(p1, src, width, 1);
-
-    for (y = slice_start; y < slice_end; y++) {
-        src += stride * (y < height - 1 ? 1 : -1);
-        line_copy16(p2, src, width, 1);
-
-        for (x = 0; x < width; x++) {
-            int sum = p0[x - 1] * matrix[0] +
-                      p0[x] *     matrix[1] +
-                      p0[x + 1] * matrix[2] +
-                      p1[x - 1] * matrix[3] +
-                      p1[x] *     matrix[4] +
-                      p1[x + 1] * matrix[5] +
-                      p2[x - 1] * matrix[6] +
-                      p2[x] *     matrix[7] +
-                      p2[x + 1] * matrix[8];
-            sum = (int)(sum * rdiv + bias + 0.5f);
-            dst[x] = av_clip(sum, 0, peak);
-        }
-
-        p0 = p1;
-        p1 = p2;
-        p2 = (p2 == end) ? orig: p2 + bstride;
-        dst += out->linesize[plane] / 2;
+    for (x = 0; x < width; x++) {
+        int sum = AV_RN16A(&c[0][2 * x]) * matrix[0] +
+                  AV_RN16A(&c[1][2 * x]) * matrix[1] +
+                  AV_RN16A(&c[2][2 * x]) * matrix[2] +
+                  AV_RN16A(&c[3][2 * x]) * matrix[3] +
+                  AV_RN16A(&c[4][2 * x]) * matrix[4] +
+                  AV_RN16A(&c[5][2 * x]) * matrix[5] +
+                  AV_RN16A(&c[6][2 * x]) * matrix[6] +
+                  AV_RN16A(&c[7][2 * x]) * matrix[7] +
+                  AV_RN16A(&c[8][2 * x]) * matrix[8];
+        sum = (int)(sum * rdiv + bias + 0.5f);
+        dst[x] = av_clip(sum, 0, peak);
     }
-
-    return 0;
 }
 
-static int filter16_5x5(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+static void filter16_5x5(uint8_t *dstp, int width,
+                         float rdiv, float bias, const int *const matrix,
+                         const uint8_t *c[], int peak, int radius,
+                         int dstride, int stride)
+{
+    uint16_t *dst = (uint16_t *)dstp;
+    int x;
+
+    for (x = 0; x < width; x++) {
+        int i, sum = 0;
+
+        for (i = 0; i < 25; i++)
+            sum += AV_RN16A(&c[i][2 * x]) * matrix[i];
+
+        sum = (int)(sum * rdiv + bias + 0.5f);
+        dst[x] = av_clip(sum, 0, peak);
+    }
+}
+
+static void filter16_7x7(uint8_t *dstp, int width,
+                         float rdiv, float bias, const int *const matrix,
+                         const uint8_t *c[], int peak, int radius,
+                         int dstride, int stride)
+{
+    uint16_t *dst = (uint16_t *)dstp;
+    int x;
+
+    for (x = 0; x < width; x++) {
+        int i, sum = 0;
+
+        for (i = 0; i < 49; i++)
+            sum += AV_RN16A(&c[i][2 * x]) * matrix[i];
+
+        sum = (int)(sum * rdiv + bias + 0.5f);
+        dst[x] = av_clip(sum, 0, peak);
+    }
+}
+
+static void filter16_row(uint8_t *dstp, int width,
+                         float rdiv, float bias, const int *const matrix,
+                         const uint8_t *c[], int peak, int radius,
+                         int dstride, int stride)
+{
+    uint16_t *dst = (uint16_t *)dstp;
+    int x;
+
+    for (x = 0; x < width; x++) {
+        int i, sum = 0;
+
+        for (i = 0; i < 2 * radius + 1; i++)
+            sum += AV_RN16A(&c[i][2 * x]) * matrix[i];
+
+        sum = (int)(sum * rdiv + bias + 0.5f);
+        dst[x] = av_clip(sum, 0, peak);
+    }
+}
+
+static void filter16_column(uint8_t *dstp, int height,
+                            float rdiv, float bias, const int *const matrix,
+                            const uint8_t *c[], int peak, int radius,
+                            int dstride, int stride)
+{
+    uint16_t *dst = (uint16_t *)dstp;
+    int y;
+
+    for (y = 0; y < height; y++) {
+        int i, sum = 0;
+
+        for (i = 0; i < 2 * radius + 1; i++)
+            sum += AV_RN16A(&c[i][0 + y * stride]) * matrix[i];
+
+        sum = (int)(sum * rdiv + bias + 0.5f);
+        dst[0] = av_clip(sum, 0, peak);
+        dst += dstride / 2;
+    }
+}
+
+static void filter_7x7(uint8_t *dst, int width,
+                       float rdiv, float bias, const int *const matrix,
+                       const uint8_t *c[], int peak, int radius,
+                       int dstride, int stride)
+{
+    int x;
+
+    for (x = 0; x < width; x++) {
+        int i, sum = 0;
+
+        for (i = 0; i < 49; i++)
+            sum += c[i][x] * matrix[i];
+
+        sum = (int)(sum * rdiv + bias + 0.5f);
+        dst[x] = av_clip_uint8(sum);
+    }
+}
+
+static void filter_5x5(uint8_t *dst, int width,
+                       float rdiv, float bias, const int *const matrix,
+                       const uint8_t *c[], int peak, int radius,
+                       int dstride, int stride)
+{
+    int x;
+
+    for (x = 0; x < width; x++) {
+        int i, sum = 0;
+
+        for (i = 0; i < 25; i++)
+            sum += c[i][x] * matrix[i];
+
+        sum = (int)(sum * rdiv + bias + 0.5f);
+        dst[x] = av_clip_uint8(sum);
+    }
+}
+
+static void filter_3x3(uint8_t *dst, int width,
+                       float rdiv, float bias, const int *const matrix,
+                       const uint8_t *c[], int peak, int radius,
+                       int dstride, int stride)
+{
+    const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2];
+    const uint8_t *c3 = c[3], *c4 = c[4], *c5 = c[5];
+    const uint8_t *c6 = c[6], *c7 = c[7], *c8 = c[8];
+    int x;
+
+    for (x = 0; x < width; x++) {
+        int sum = c0[x] * matrix[0] + c1[x] * matrix[1] + c2[x] * matrix[2] +
+                  c3[x] * matrix[3] + c4[x] * matrix[4] + c5[x] * matrix[5] +
+                  c6[x] * matrix[6] + c7[x] * matrix[7] + c8[x] * matrix[8];
+        sum = (int)(sum * rdiv + bias + 0.5f);
+        dst[x] = av_clip_uint8(sum);
+    }
+}
+
+static void filter_row(uint8_t *dst, int width,
+                       float rdiv, float bias, const int *const matrix,
+                       const uint8_t *c[], int peak, int radius,
+                       int dstride, int stride)
+{
+    int x;
+
+    for (x = 0; x < width; x++) {
+        int i, sum = 0;
+
+        for (i = 0; i < 2 * radius + 1; i++)
+            sum += c[i][x] * matrix[i];
+
+        sum = (int)(sum * rdiv + bias + 0.5f);
+        dst[x] = av_clip_uint8(sum);
+    }
+}
+
+static void filter_column(uint8_t *dst, int height,
+                          float rdiv, float bias, const int *const matrix,
+                          const uint8_t *c[], int peak, int radius,
+                          int dstride, int stride)
+{
+    int y;
+
+    for (y = 0; y < height; y++) {
+        int i, sum = 0;
+
+        for (i = 0; i < 2 * radius + 1; i++)
+            sum += c[i][0 + y * stride] * matrix[i];
+
+        sum = (int)(sum * rdiv + bias + 0.5f);
+        dst[0] = av_clip_uint8(sum);
+        dst += dstride;
+    }
+}
+
+static void setup_3x3(int radius, const uint8_t *c[], const uint8_t *src, int stride,
+                      int x, int w, int y, int h, int bpc)
+{
+    int i;
+
+    for (i = 0; i < 9; i++) {
+        int xoff = FFABS(x + ((i % 3) - 1));
+        int yoff = FFABS(y + (i / 3) - 1);
+
+        xoff = xoff >= w ? 2 * w - 1 - xoff : xoff;
+        yoff = yoff >= h ? 2 * h - 1 - yoff : yoff;
+
+        c[i] = src + xoff * bpc + yoff * stride;
+    }
+}
+
+static void setup_5x5(int radius, const uint8_t *c[], const uint8_t *src, int stride,
+                      int x, int w, int y, int h, int bpc)
+{
+    int i;
+
+    for (i = 0; i < 25; i++) {
+        int xoff = FFABS(x + ((i % 5) - 2));
+        int yoff = FFABS(y + (i / 5) - 2);
+
+        xoff = xoff >= w ? 2 * w - 1 - xoff : xoff;
+        yoff = yoff >= h ? 2 * h - 1 - yoff : yoff;
+
+        c[i] = src + xoff * bpc + yoff * stride;
+    }
+}
+
+static void setup_7x7(int radius, const uint8_t *c[], const uint8_t *src, int stride,
+                      int x, int w, int y, int h, int bpc)
+{
+    int i;
+
+    for (i = 0; i < 49; i++) {
+        int xoff = FFABS(x + ((i % 7) - 3));
+        int yoff = FFABS(y + (i / 7) - 3);
+
+        xoff = xoff >= w ? 2 * w - 1 - xoff : xoff;
+        yoff = yoff >= h ? 2 * h - 1 - yoff : yoff;
+
+        c[i] = src + xoff * bpc + yoff * stride;
+    }
+}
+
+static void setup_row(int radius, const uint8_t *c[], const uint8_t *src, int stride,
+                      int x, int w, int y, int h, int bpc)
+{
+    int i;
+
+    for (i = 0; i < radius * 2 + 1; i++) {
+        int xoff = FFABS(x + i - radius);
+
+        xoff = xoff >= w ? 2 * w - 1 - xoff : xoff;
+
+        c[i] = src + xoff * bpc + y * stride;
+    }
+}
+
+static void setup_column(int radius, const uint8_t *c[], const uint8_t *src, int stride,
+                         int x, int w, int y, int h, int bpc)
+{
+    int i;
+
+    for (i = 0; i < radius * 2 + 1; i++) {
+        int xoff = FFABS(x + i - radius);
+
+        xoff = xoff >= h ? 2 * h - 1 - xoff : xoff;
+
+        c[i] = src + y * bpc + xoff * stride;
+    }
+}
+
+static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
     ConvolutionContext *s = ctx->priv;
     ThreadData *td = arg;
     AVFrame *in = td->in;
     AVFrame *out = td->out;
-    const int plane = td->plane;
-    const int peak = (1 << s->depth) - 1;
-    const int stride = in->linesize[plane] / 2;
-    const int bstride = s->bstride;
-    const int height = s->planeheight[plane];
-    const int width  = s->planewidth[plane];
-    const int slice_start = (height * jobnr) / nb_jobs;
-    const int slice_end = (height * (jobnr+1)) / nb_jobs;
-    const uint16_t *src = (const uint16_t *)in->data[plane] + slice_start * stride;
-    uint16_t *dst = (uint16_t *)out->data[plane] + slice_start * (out->linesize[plane] / 2);
-    uint16_t *p0 = (uint16_t *)s->bptrs[jobnr] + 16;
-    uint16_t *p1 = p0 + bstride;
-    uint16_t *p2 = p1 + bstride;
-    uint16_t *p3 = p2 + bstride;
-    uint16_t *p4 = p3 + bstride;
-    uint16_t *orig = p0, *end = p4;
-    const int *matrix = s->matrix[plane];
-    float rdiv = s->rdiv[plane];
-    float bias = s->bias[plane];
-    int y, x, i;
+    int plane;
 
-    line_copy16(p0, src + 2 * stride * (slice_start < 2 ? 1 : -1), width, 2);
-    line_copy16(p1, src + stride * (slice_start == 0 ? 1 : -1), width, 2);
-    line_copy16(p2, src, width, 2);
-    src += stride;
-    line_copy16(p3, src, width, 2);
+    for (plane = 0; plane < s->nb_planes; plane++) {
+        const int mode = s->mode[plane];
+        const int bpc = s->bpc;
+        const int radius = s->size[plane] / 2;
+        const int height = s->planeheight[plane];
+        const int width  = s->planewidth[plane];
+        const int stride = in->linesize[plane];
+        const int dstride = out->linesize[plane];
+        const int sizeh = mode == MATRIX_COLUMN ? width : height;
+        const int sizew = mode == MATRIX_COLUMN ? height : width;
+        const int slice_start = (sizeh * jobnr) / nb_jobs;
+        const int slice_end = (sizeh * (jobnr+1)) / nb_jobs;
+        const float rdiv = s->rdiv[plane];
+        const float bias = s->bias[plane];
+        const uint8_t *src = in->data[plane];
+        const int dst_pos = slice_start * (mode == MATRIX_COLUMN ? bpc : dstride);
+        uint8_t *dst = out->data[plane] + dst_pos;
+        const int *matrix = s->matrix[plane];
+        const uint8_t *c[49];
+        int y, x;
 
-    for (y = slice_start; y < slice_end; y++) {
-        uint16_t *array[] = {
-            p0 - 2, p0 - 1, p0, p0 + 1, p0 + 2,
-            p1 - 2, p1 - 1, p1, p1 + 1, p1 + 2,
-            p2 - 2, p2 - 1, p2, p2 + 1, p2 + 2,
-            p3 - 2, p3 - 1, p3, p3 + 1, p3 + 2,
-            p4 - 2, p4 - 1, p4, p4 + 1, p4 + 2
-        };
+        if (s->copy[plane]) {
+            if (mode == MATRIX_COLUMN)
+                av_image_copy_plane(dst, dstride, src + slice_start * bpc, stride,
+                                    (slice_end - slice_start) * bpc, height);
+            else
+                av_image_copy_plane(dst, dstride, src + slice_start * stride, stride,
+                                    width * bpc, slice_end - slice_start);
+            continue;
+        }
 
-        src += stride * (y < height - 2 ? 1 : -1);
-        line_copy16(p4, src, width, 2);
+        for (y = slice_start; y < slice_end; y++) {
+            const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : radius * bpc;
+            const int yoff = mode == MATRIX_COLUMN ? radius * stride : 0;
 
-        for (x = 0; x < width; x++) {
-            int sum = 0;
+            for (x = 0; x < radius; x++) {
+                const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : x * bpc;
+                const int yoff = mode == MATRIX_COLUMN ? x * stride : 0;
 
-            for (i = 0; i < 25; i++) {
-                sum += *(array[i] + x) * matrix[i];
+                s->setup[plane](radius, c, src, stride, x, width, y, height, bpc);
+                s->filter[plane](dst + yoff + xoff, 1, rdiv,
+                                 bias, matrix, c, s->max, radius,
+                                 dstride, stride);
             }
-            sum = (int)(sum * rdiv + bias + 0.5f);
-            dst[x] = av_clip(sum, 0, peak);
-        }
+            s->setup[plane](radius, c, src, stride, radius, width, y, height, bpc);
+            s->filter[plane](dst + yoff + xoff, sizew - 2 * radius,
+                             rdiv, bias, matrix, c, s->max, radius,
+                             dstride, stride);
+            for (x = sizew - radius; x < sizew; x++) {
+                const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : x * bpc;
+                const int yoff = mode == MATRIX_COLUMN ? x * stride : 0;
 
-        p0 = p1;
-        p1 = p2;
-        p2 = p3;
-        p3 = p4;
-        p4 = (p4 == end) ? orig: p4 + bstride;
-        dst += out->linesize[plane] / 2;
-    }
-
-    return 0;
-}
-
-static int filter_3x3(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-{
-    ConvolutionContext *s = ctx->priv;
-    ThreadData *td = arg;
-    AVFrame *in = td->in;
-    AVFrame *out = td->out;
-    const int plane = td->plane;
-    const int stride = in->linesize[plane];
-    const int bstride = s->bstride;
-    const int height = s->planeheight[plane];
-    const int width  = s->planewidth[plane];
-    const int slice_start = (height * jobnr) / nb_jobs;
-    const int slice_end = (height * (jobnr+1)) / nb_jobs;
-    const uint8_t *src = in->data[plane] + slice_start * stride;
-    uint8_t *dst = out->data[plane] + slice_start * out->linesize[plane];
-    uint8_t *p0 = s->bptrs[jobnr] + 16;
-    uint8_t *p1 = p0 + bstride;
-    uint8_t *p2 = p1 + bstride;
-    uint8_t *orig = p0, *end = p2;
-    const int *matrix = s->matrix[plane];
-    const float rdiv = s->rdiv[plane];
-    const float bias = s->bias[plane];
-    int y, x;
-
-    line_copy8(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1);
-    line_copy8(p1, src, width, 1);
-
-    for (y = slice_start; y < slice_end; y++) {
-        src += stride * (y < height - 1 ? 1 : -1);
-        line_copy8(p2, src, width, 1);
-
-        for (x = 0; x < width; x++) {
-            int sum = p0[x - 1] * matrix[0] +
-                      p0[x] *     matrix[1] +
-                      p0[x + 1] * matrix[2] +
-                      p1[x - 1] * matrix[3] +
-                      p1[x] *     matrix[4] +
-                      p1[x + 1] * matrix[5] +
-                      p2[x - 1] * matrix[6] +
-                      p2[x] *     matrix[7] +
-                      p2[x + 1] * matrix[8];
-            sum = (int)(sum * rdiv + bias + 0.5f);
-            dst[x] = av_clip_uint8(sum);
-        }
-
-        p0 = p1;
-        p1 = p2;
-        p2 = (p2 == end) ? orig: p2 + bstride;
-        dst += out->linesize[plane];
-    }
-
-    return 0;
-}
-
-static int filter_5x5(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-{
-    ConvolutionContext *s = ctx->priv;
-    ThreadData *td = arg;
-    AVFrame *in = td->in;
-    AVFrame *out = td->out;
-    const int plane = td->plane;
-    const int stride = in->linesize[plane];
-    const int bstride = s->bstride;
-    const int height = s->planeheight[plane];
-    const int width  = s->planewidth[plane];
-    const int slice_start = (height * jobnr) / nb_jobs;
-    const int slice_end = (height * (jobnr+1)) / nb_jobs;
-    const uint8_t *src = in->data[plane] + slice_start * stride;
-    uint8_t *dst = out->data[plane] + slice_start * out->linesize[plane];
-    uint8_t *p0 = s->bptrs[jobnr] + 16;
-    uint8_t *p1 = p0 + bstride;
-    uint8_t *p2 = p1 + bstride;
-    uint8_t *p3 = p2 + bstride;
-    uint8_t *p4 = p3 + bstride;
-    uint8_t *orig = p0, *end = p4;
-    const int *matrix = s->matrix[plane];
-    float rdiv = s->rdiv[plane];
-    float bias = s->bias[plane];
-    int y, x, i;
-
-    line_copy8(p0, src + 2 * stride * (slice_start < 2 ? 1 : -1), width, 2);
-    line_copy8(p1, src + stride * (slice_start == 0 ? 1 : -1), width, 2);
-    line_copy8(p2, src, width, 2);
-    src += stride;
-    line_copy8(p3, src, width, 2);
-
-
-    for (y = slice_start; y < slice_end; y++) {
-        uint8_t *array[] = {
-            p0 - 2, p0 - 1, p0, p0 + 1, p0 + 2,
-            p1 - 2, p1 - 1, p1, p1 + 1, p1 + 2,
-            p2 - 2, p2 - 1, p2, p2 + 1, p2 + 2,
-            p3 - 2, p3 - 1, p3, p3 + 1, p3 + 2,
-            p4 - 2, p4 - 1, p4, p4 + 1, p4 + 2
-        };
-
-        src += stride * (y < height - 2 ? 1 : -1);
-        line_copy8(p4, src, width, 2);
-
-        for (x = 0; x < width; x++) {
-            int sum = 0;
-
-            for (i = 0; i < 25; i++) {
-                sum += *(array[i] + x) * matrix[i];
+                s->setup[plane](radius, c, src, stride, x, width, y, height, bpc);
+                s->filter[plane](dst + yoff + xoff, 1, rdiv,
+                                 bias, matrix, c, s->max, radius,
+                                 dstride, stride);
             }
-            sum = (int)(sum * rdiv + bias + 0.5f);
-            dst[x] = av_clip_uint8(sum);
+            if (mode != MATRIX_COLUMN)
+                dst += dstride;
         }
-
-        p0 = p1;
-        p1 = p2;
-        p2 = p3;
-        p3 = p4;
-        p4 = (p4 == end) ? orig: p4 + bstride;
-        dst += out->linesize[plane];
     }
 
     return 0;
@@ -713,6 +599,7 @@
     int p;
 
     s->depth = desc->comp[0].depth;
+    s->max = (1 << s->depth) - 1;
 
     s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
     s->planewidth[0] = s->planewidth[3] = inlink->w;
@@ -721,27 +608,21 @@
 
     s->nb_planes = av_pix_fmt_count_planes(inlink->format);
     s->nb_threads = ff_filter_get_nb_threads(ctx);
-    s->bptrs = av_calloc(s->nb_threads, sizeof(*s->bptrs));
-    if (!s->bptrs)
-        return AVERROR(ENOMEM);
-
-    s->bstride = s->planewidth[0] + 32;
     s->bpc = (s->depth + 7) / 8;
-    s->buffer = av_malloc_array(5 * s->bstride * s->nb_threads, s->bpc);
-    if (!s->buffer)
-        return AVERROR(ENOMEM);
-
-    for (p = 0; p < s->nb_threads; p++) {
-        s->bptrs[p] = s->buffer + 5 * s->bstride * s->bpc * p;
-    }
 
     if (!strcmp(ctx->filter->name, "convolution")) {
         if (s->depth > 8) {
             for (p = 0; p < s->nb_planes; p++) {
-                if (s->size[p] == 3)
+                if (s->mode[p] == MATRIX_ROW)
+                    s->filter[p] = filter16_row;
+                else if (s->mode[p] == MATRIX_COLUMN)
+                    s->filter[p] = filter16_column;
+                else if (s->size[p] == 3)
                     s->filter[p] = filter16_3x3;
                 else if (s->size[p] == 5)
                     s->filter[p] = filter16_5x5;
+                else if (s->size[p] == 7)
+                    s->filter[p] = filter16_7x7;
             }
         }
     } else if (!strcmp(ctx->filter->name, "prewitt")) {
@@ -767,7 +648,7 @@
     ConvolutionContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *out;
-    int plane;
+    ThreadData td;
 
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     if (!out) {
@@ -776,22 +657,9 @@
     }
     av_frame_copy_props(out, in);
 
-    for (plane = 0; plane < s->nb_planes; plane++) {
-        ThreadData td;
-
-        if (s->copy[plane]) {
-            av_image_copy_plane(out->data[plane], out->linesize[plane],
-                                in->data[plane], in->linesize[plane],
-                                s->planewidth[plane] * s->bpc,
-                                s->planeheight[plane]);
-            continue;
-        }
-
-        td.in = in;
-        td.out = out;
-        td.plane = plane;
-        ctx->internal->execute(ctx, s->filter[plane], &td, NULL, FFMIN(s->planeheight[plane], s->nb_threads));
-    }
+    td.in = in;
+    td.out = out;
+    ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN3(s->planeheight[1], s->planewidth[1], s->nb_threads));
 
     av_frame_free(&in);
     return ff_filter_frame(outlink, out);
@@ -806,32 +674,63 @@
         for (i = 0; i < 4; i++) {
             int *matrix = (int *)s->matrix[i];
             char *p, *arg, *saveptr = NULL;
+            float sum = 0;
 
             p = s->matrix_str[i];
-            while (s->matrix_length[i] < 25) {
+            while (s->matrix_length[i] < 49) {
                 if (!(arg = av_strtok(p, " ", &saveptr)))
                     break;
 
                 p = NULL;
                 sscanf(arg, "%d", &matrix[s->matrix_length[i]]);
+                sum += matrix[s->matrix_length[i]];
                 s->matrix_length[i]++;
             }
 
-            if (s->matrix_length[i] == 9) {
+            if (!(s->matrix_length[i] & 1)) {
+                av_log(ctx, AV_LOG_ERROR, "number of matrix elements must be odd\n");
+                return AVERROR(EINVAL);
+            }
+            if (s->mode[i] == MATRIX_ROW) {
+                s->filter[i] = filter_row;
+                s->setup[i] = setup_row;
+                s->size[i] = s->matrix_length[i];
+            } else if (s->mode[i] == MATRIX_COLUMN) {
+                s->filter[i] = filter_column;
+                s->setup[i] = setup_column;
+                s->size[i] = s->matrix_length[i];
+            } else if (s->matrix_length[i] == 9) {
                 s->size[i] = 3;
                 if (!memcmp(matrix, same3x3, sizeof(same3x3)))
                     s->copy[i] = 1;
                 else
                     s->filter[i] = filter_3x3;
+                s->setup[i] = setup_3x3;
             } else if (s->matrix_length[i] == 25) {
                 s->size[i] = 5;
                 if (!memcmp(matrix, same5x5, sizeof(same5x5)))
                     s->copy[i] = 1;
                 else
                     s->filter[i] = filter_5x5;
+                s->setup[i] = setup_5x5;
+            } else if (s->matrix_length[i] == 49) {
+                s->size[i] = 7;
+                if (!memcmp(matrix, same7x7, sizeof(same7x7)))
+                    s->copy[i] = 1;
+                else
+                    s->filter[i] = filter_7x7;
+                s->setup[i] = setup_7x7;
             } else {
                 return AVERROR(EINVAL);
             }
+
+            if (sum == 0)
+                sum = 1;
+            if (s->rdiv[i] == 0)
+                s->rdiv[i] = 1. / sum;
+
+            if (s->copy[i] && (s->rdiv[i] != 1. || s->bias[i] != 0.))
+                s->copy[i] = 0;
         }
     } else if (!strcmp(ctx->filter->name, "prewitt")) {
         for (i = 0; i < 4; i++) {
@@ -839,6 +738,10 @@
                 s->filter[i] = filter_prewitt;
             else
                 s->copy[i] = 1;
+            s->size[i] = 3;
+            s->setup[i] = setup_3x3;
+            s->rdiv[i] = s->scale;
+            s->bias[i] = s->delta;
         }
     } else if (!strcmp(ctx->filter->name, "roberts")) {
         for (i = 0; i < 4; i++) {
@@ -846,6 +749,10 @@
                 s->filter[i] = filter_roberts;
             else
                 s->copy[i] = 1;
+            s->size[i] = 3;
+            s->setup[i] = setup_3x3;
+            s->rdiv[i] = s->scale;
+            s->bias[i] = s->delta;
         }
     } else if (!strcmp(ctx->filter->name, "sobel")) {
         for (i = 0; i < 4; i++) {
@@ -853,20 +760,16 @@
                 s->filter[i] = filter_sobel;
             else
                 s->copy[i] = 1;
+            s->size[i] = 3;
+            s->setup[i] = setup_3x3;
+            s->rdiv[i] = s->scale;
+            s->bias[i] = s->delta;
         }
     }
 
     return 0;
 }
 
-static av_cold void uninit(AVFilterContext *ctx)
-{
-    ConvolutionContext *s = ctx->priv;
-
-    av_freep(&s->bptrs);
-    av_freep(&s->buffer);
-}
-
 static const AVFilterPad convolution_inputs[] = {
     {
         .name         = "default",
@@ -893,7 +796,6 @@
     .priv_size     = sizeof(ConvolutionContext),
     .priv_class    = &convolution_class,
     .init          = init,
-    .uninit        = uninit,
     .query_formats = query_formats,
     .inputs        = convolution_inputs,
     .outputs       = convolution_outputs,
@@ -919,7 +821,6 @@
     .priv_size     = sizeof(ConvolutionContext),
     .priv_class    = &prewitt_class,
     .init          = init,
-    .uninit        = uninit,
     .query_formats = query_formats,
     .inputs        = convolution_inputs,
     .outputs       = convolution_outputs,
@@ -945,7 +846,6 @@
     .priv_size     = sizeof(ConvolutionContext),
     .priv_class    = &sobel_class,
     .init          = init,
-    .uninit        = uninit,
     .query_formats = query_formats,
     .inputs        = convolution_inputs,
     .outputs       = convolution_outputs,
@@ -971,7 +871,6 @@
     .priv_size     = sizeof(ConvolutionContext),
     .priv_class    = &roberts_class,
     .init          = init,
-    .uninit        = uninit,
     .query_formats = query_formats,
     .inputs        = convolution_inputs,
     .outputs       = convolution_outputs,

diff --git a/libavfilter/vf_convolution_opencl.c b/libavfilter/vf_convolution_opencl.c
new file mode 100644
index 0000000..00246b2
--- /dev/null
+++ b/libavfilter/vf_convolution_opencl.c

@@ -0,0 +1,456 @@
+/*
+ * Copyright (c) 2018 Danil Iashchenko
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/avstring.h"
+
+
+#include "avfilter.h"
+#include "internal.h"
+#include "opencl.h"
+#include "opencl_source.h"
+#include "video.h"
+
+typedef struct ConvolutionOpenCLContext {
+    OpenCLFilterContext ocf;
+
+    int              initialised;
+    cl_kernel        kernel;
+    cl_command_queue command_queue;
+
+    char *matrix_str[4];
+
+    cl_mem matrix[4];
+    cl_int matrix_sizes[4];
+    cl_int dims[4];
+    cl_float rdivs[4];
+    cl_float biases[4];
+
+    cl_int planes;
+    cl_float scale;
+    cl_float delta;
+
+} ConvolutionOpenCLContext;
+
+static int convolution_opencl_init(AVFilterContext *avctx)
+{
+    ConvolutionOpenCLContext *ctx = avctx->priv;
+    const char *kernel_name;
+    cl_int cle;
+    int err;
+
+    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_convolution, 1);
+    if (err < 0)
+        goto fail;
+
+    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+                                              ctx->ocf.hwctx->device_id,
+                                              0, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
+                     "command queue %d.\n", cle);
+
+    if (!strcmp(avctx->filter->name, "convolution_opencl")) {
+        kernel_name = "convolution_global";
+    } else if (!strcmp(avctx->filter->name, "sobel_opencl")) {
+        kernel_name = "sobel_global";
+    } else if (!strcmp(avctx->filter->name, "prewitt_opencl")){
+        kernel_name = "prewitt_global";
+    } else if (!strcmp(avctx->filter->name, "roberts_opencl")){
+        kernel_name = "roberts_global";
+    }
+    ctx->kernel = clCreateKernel(ctx->ocf.program, kernel_name, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create "
+                     "kernel %d.\n", cle);
+
+    ctx->initialised = 1;
+    return 0;
+
+fail:
+    if (ctx->command_queue)
+        clReleaseCommandQueue(ctx->command_queue);
+    if (ctx->kernel)
+        clReleaseKernel(ctx->kernel);
+    return err;
+}
+
+
+
+static int convolution_opencl_make_filter_params(AVFilterContext *avctx)
+{
+    ConvolutionOpenCLContext *ctx = avctx->priv;
+    float *matrix = NULL;
+    size_t matrix_bytes;
+    cl_mem buffer;
+    cl_int cle;
+    int i, j;
+    int sscanf_err;
+    char *p, *arg, *saveptr = NULL;
+    float input_matrix[4][49];
+
+    for (i = 0; i < 4; i++) {
+        ctx->biases[i] = ctx->biases[i] / 255.0;
+    }
+
+    for (i = 0; i < 4; i++) {
+        p = ctx->matrix_str[i];
+        while (ctx->matrix_sizes[i] < 49) {
+            arg = av_strtok(p, " ", &saveptr);
+            if (!arg) {
+                break;
+            }
+            p = NULL;
+            sscanf_err = sscanf(arg, "%f", &input_matrix[i][ctx->matrix_sizes[i]]);
+            if (sscanf_err != 1) {
+                av_log(ctx, AV_LOG_ERROR, "Matrix is sequence of 9, 25 or 49 signed numbers\n");
+                return AVERROR(EINVAL);
+            }
+            ctx->matrix_sizes[i]++;
+        }
+        if (ctx->matrix_sizes[i] == 9) {
+            ctx->dims[i] = 3;
+        } else if (ctx->matrix_sizes[i] == 25) {
+            ctx->dims[i] = 5;
+        } else if (ctx->matrix_sizes[i] == 49) {
+            ctx->dims[i] = 7;
+        } else {
+            av_log(ctx, AV_LOG_ERROR, "Invalid matrix size:%d\n", ctx->matrix_sizes[i]);
+            return AVERROR(EINVAL);
+        }
+
+    }
+
+    for (j = 0; j < 4; j++) {
+        matrix_bytes = sizeof(float)*ctx->matrix_sizes[j];
+        matrix = av_malloc(matrix_bytes);
+        if (!matrix) {
+            av_freep(&matrix);
+            return AVERROR(ENOMEM);
+        }
+
+        for (i = 0; i < ctx->matrix_sizes[j]; i++)
+            matrix[i] = input_matrix[j][i];
+
+        buffer = clCreateBuffer(ctx->ocf.hwctx->context,
+                                CL_MEM_READ_ONLY |
+                                CL_MEM_COPY_HOST_PTR |
+                                CL_MEM_HOST_NO_ACCESS,
+                                matrix_bytes, matrix, &cle);
+        if (!buffer) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to create matrix buffer: "
+                   "%d.\n", cle);
+            av_freep(&matrix);
+            return AVERROR(EIO);
+        }
+        ctx->matrix[j] = buffer;
+        av_freep(&matrix);
+    }
+
+    return 0;
+}
+
+static int convolution_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+    AVFilterContext *avctx = inlink->dst;
+    AVFilterLink *outlink = avctx->outputs[0];
+    ConvolutionOpenCLContext *ctx = avctx->priv;
+    AVFrame *output = NULL;
+    cl_int cle;
+    size_t global_work[2];
+    cl_mem src, dst;
+    int err, p;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {0, 0, 1};
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(input->format),
+           input->width, input->height, input->pts);
+
+    if (!input->hw_frames_ctx)
+        return AVERROR(EINVAL);
+
+    if (!ctx->initialised) {
+        err = convolution_opencl_init(avctx);
+        if (err < 0)
+            goto fail;
+
+        if (!strcmp(avctx->filter->name, "convolution_opencl")) {
+            err = convolution_opencl_make_filter_params(avctx);
+            if (err < 0)
+                goto fail;
+        } else {
+            ctx->delta /= 255.0;
+        }
+
+    }
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
+        src = (cl_mem) input->data[p];
+        dst = (cl_mem)output->data[p];
+
+        if (!dst)
+            break;
+
+        if (!strcmp(avctx->filter->name, "convolution_opencl")) {
+            CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem,   &dst);
+            CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem,   &src);
+            CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_int,   &ctx->dims[p]);
+            CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_mem,   &ctx->matrix[p]);
+            CL_SET_KERNEL_ARG(ctx->kernel, 4, cl_float, &ctx->rdivs[p]);
+            CL_SET_KERNEL_ARG(ctx->kernel, 5, cl_float, &ctx->biases[p]);
+
+            err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0);
+            if (err < 0)
+                goto fail;
+
+            av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
+                   "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
+                   p, global_work[0], global_work[1]);
+
+            cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
+                                         global_work, NULL,
+                                         0, NULL, NULL);
+            CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue "
+                             "kernel: %d.\n", cle);
+        } else {
+            if (!(ctx->planes & (1 << p))) {
+                err = ff_opencl_filter_work_size_from_image(avctx, region, output, p, 0);
+                if (err < 0)
+                    goto fail;
+
+                cle = clEnqueueCopyImage(ctx->command_queue, src, dst,
+                                         origin, origin, region, 0, NULL, NULL);
+                CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to copy plane %d: %d.\n",
+                                 p, cle);
+            } else {
+                CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem,   &dst);
+                CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem,   &src);
+                CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_float, &ctx->scale);
+                CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_float, &ctx->delta);
+
+                err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0);
+                if (err < 0)
+                    goto fail;
+
+                av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
+                       "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
+                       p, global_work[0], global_work[1]);
+
+                cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
+                                                         global_work, NULL,
+                                                         0, NULL, NULL);
+                CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue "
+                                 "kernel: %d.\n", cle);
+            }
+        }
+    }
+
+    cle = clFinish(ctx->command_queue);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
+
+    err = av_frame_copy_props(output, input);
+    if (err < 0)
+        goto fail;
+
+    av_frame_free(&input);
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output->format),
+           output->width, output->height, output->pts);
+
+    return ff_filter_frame(outlink, output);
+
+fail:
+    clFinish(ctx->command_queue);
+    av_frame_free(&input);
+    av_frame_free(&output);
+    return err;
+}
+
+static av_cold void convolution_opencl_uninit(AVFilterContext *avctx)
+{
+    ConvolutionOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        clReleaseMemObject(ctx->matrix[i]);
+    }
+
+    if (ctx->kernel) {
+        cle = clReleaseKernel(ctx->kernel);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->command_queue) {
+        cle = clReleaseCommandQueue(ctx->command_queue);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "command queue: %d.\n", cle);
+    }
+
+    ff_opencl_filter_uninit(avctx);
+}
+
+static const AVFilterPad convolution_opencl_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &convolution_opencl_filter_frame,
+        .config_props = &ff_opencl_filter_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad convolution_opencl_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_opencl_filter_config_output,
+    },
+    { NULL }
+};
+
+#define OFFSET(x) offsetof(ConvolutionOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+
+#if CONFIG_CONVOLUTION_OPENCL_FILTER
+
+static const AVOption convolution_opencl_options[] = {
+    { "0m", "set matrix for 2nd plane", OFFSET(matrix_str[0]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
+    { "1m", "set matrix for 2nd plane", OFFSET(matrix_str[1]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
+    { "2m", "set matrix for 3rd plane", OFFSET(matrix_str[2]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
+    { "3m", "set matrix for 4th plane", OFFSET(matrix_str[3]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
+    { "0rdiv", "set rdiv for 1nd plane", OFFSET(rdivs[0]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
+    { "1rdiv", "set rdiv for 2nd plane", OFFSET(rdivs[1]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
+    { "2rdiv", "set rdiv for 3rd plane", OFFSET(rdivs[2]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
+    { "3rdiv", "set rdiv for 4th plane", OFFSET(rdivs[3]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
+    { "0bias", "set bias for 1st plane", OFFSET(biases[0]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { "1bias", "set bias for 2nd plane", OFFSET(biases[1]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { "2bias", "set bias for 3rd plane", OFFSET(biases[2]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { "3bias", "set bias for 4th plane", OFFSET(biases[3]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(convolution_opencl);
+
+AVFilter ff_vf_convolution_opencl = {
+    .name           = "convolution_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Apply convolution mask to input video"),
+    .priv_size      = sizeof(ConvolutionOpenCLContext),
+    .priv_class     = &convolution_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &convolution_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = convolution_opencl_inputs,
+    .outputs        = convolution_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif /* CONFIG_CONVOLUTION_OPENCL_FILTER */
+
+#if CONFIG_SOBEL_OPENCL_FILTER
+
+static const AVOption sobel_opencl_options[] = {
+    { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,  {.i64=15}, 0, 15, FLAGS},
+    { "scale",  "set scale",            OFFSET(scale), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0,  65535, FLAGS},
+    { "delta",  "set delta",            OFFSET(delta), AV_OPT_TYPE_FLOAT, {.dbl=0}, -65535, 65535, FLAGS},
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(sobel_opencl);
+
+AVFilter ff_vf_sobel_opencl = {
+    .name           = "sobel_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Apply sobel operator"),
+    .priv_size      = sizeof(ConvolutionOpenCLContext),
+    .priv_class     = &sobel_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &convolution_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = convolution_opencl_inputs,
+    .outputs        = convolution_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif /* CONFIG_SOBEL_OPENCL_FILTER */
+
+#if CONFIG_PREWITT_OPENCL_FILTER
+
+static const AVOption prewitt_opencl_options[] = {
+    { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,  {.i64=15}, 0, 15, FLAGS},
+    { "scale",  "set scale",            OFFSET(scale), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0,  65535, FLAGS},
+    { "delta",  "set delta",            OFFSET(delta), AV_OPT_TYPE_FLOAT, {.dbl=0}, -65535, 65535, FLAGS},
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(prewitt_opencl);
+
+AVFilter ff_vf_prewitt_opencl = {
+    .name           = "prewitt_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Apply prewitt operator"),
+    .priv_size      = sizeof(ConvolutionOpenCLContext),
+    .priv_class     = &prewitt_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &convolution_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = convolution_opencl_inputs,
+    .outputs        = convolution_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif /* CONFIG_PREWITT_OPENCL_FILTER */
+
+#if CONFIG_ROBERTS_OPENCL_FILTER
+
+static const AVOption roberts_opencl_options[] = {
+    { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,  {.i64=15}, 0, 15, FLAGS},
+    { "scale",  "set scale",            OFFSET(scale), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0,  65535, FLAGS},
+    { "delta",  "set delta",            OFFSET(delta), AV_OPT_TYPE_FLOAT, {.dbl=0}, -65535, 65535, FLAGS},
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(roberts_opencl);
+
+AVFilter ff_vf_roberts_opencl = {
+    .name           = "roberts_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Apply roberts operator"),
+    .priv_size      = sizeof(ConvolutionOpenCLContext),
+    .priv_class     = &roberts_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &convolution_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = convolution_opencl_inputs,
+    .outputs        = convolution_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif /* CONFIG_ROBERTS_OPENCL_FILTER */

diff --git a/libavfilter/vf_convolve.c b/libavfilter/vf_convolve.c
index e00a44d..024eb68 100644
--- a/libavfilter/vf_convolve.c
+++ b/libavfilter/vf_convolve.c

@@ -18,6 +18,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <float.h>
+
 #include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
@@ -29,12 +31,14 @@
 #include "internal.h"
 #include "video.h"
 
+#define MAX_THREADS 16
+
 typedef struct ConvolveContext {
     const AVClass *class;
     FFFrameSync fs;
 
-    FFTContext *fft[4];
-    FFTContext *ifft[4];
+    FFTContext *fft[4][MAX_THREADS];
+    FFTContext *ifft[4][MAX_THREADS];
 
     int fft_bits[4];
     int fft_len[4];
@@ -49,8 +53,11 @@
     int depth;
     int planes;
     int impulse;
+    float noise;
     int nb_planes;
     int got_impulse[4];
+
+    int (*filter)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
 } ConvolveContext;
 
 #define OFFSET(x) offsetof(ConvolveContext, x)
@@ -61,11 +68,10 @@
     { "impulse", "when to process impulses",                OFFSET(impulse),  AV_OPT_TYPE_INT,   {.i64=1}, 0,  1, FLAGS, "impulse" },
     {   "first", "process only first impulse, ignore rest", 0,                AV_OPT_TYPE_CONST, {.i64=0}, 0,  0, FLAGS, "impulse" },
     {   "all",   "process all impulses",                    0,                AV_OPT_TYPE_CONST, {.i64=1}, 0,  0, FLAGS, "impulse" },
+    { "noise",   "set noise",                               OFFSET(noise),    AV_OPT_TYPE_FLOAT, {.dbl=0.0000001}, 0,  1, FLAGS },
     { NULL },
 };
 
-FRAMESYNC_DEFINE_CLASS(convolve, ConvolveContext, fs);
-
 static int query_formats(AVFilterContext *ctx)
 {
     static const enum AVPixelFormat pixel_fmts_fftfilt[] = {
@@ -85,7 +91,7 @@
         AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
         AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
         AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_NONE
     };
 
@@ -112,7 +118,7 @@
     for (i = 0; i < s->nb_planes; i++) {
         int w = s->planewidth[i];
         int h = s->planeheight[i];
-        int n = FFMAX(w, h) * 10/9;
+        int n = FFMAX(w, h);
 
         for (fft_bits = 1; 1 << fft_bits < n; fft_bits++);
 
@@ -152,108 +158,290 @@
     return 0;
 }
 
-static void fft_horizontal(ConvolveContext *s, FFTComplex *fft_hdata,
-                           AVFrame *in, int w, int h, int n, int plane, float scale)
+typedef struct ThreadData {
+    FFTComplex *hdata, *vdata;
+    int plane, n;
+} ThreadData;
+
+static int fft_horizontal(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    int y, x;
+    ConvolveContext *s = ctx->priv;
+    ThreadData *td = arg;
+    FFTComplex *hdata = td->hdata;
+    const int plane = td->plane;
+    const int n = td->n;
+    int start = (n * jobnr) / nb_jobs;
+    int end = (n * (jobnr+1)) / nb_jobs;
+    int y;
 
-    for (y = 0; y < h; y++) {
-        if (s->depth == 8) {
-            const uint8_t *src = in->data[plane] + in->linesize[plane] * y;
-
-            for (x = 0; x < w; x++) {
-                fft_hdata[y * n + x].re = src[x] * scale;
-                fft_hdata[y * n + x].im = 0;
-            }
-        } else {
-            const uint16_t *src = (const uint16_t *)(in->data[plane] + in->linesize[plane] * y);
-
-            for (x = 0; x < w; x++) {
-                fft_hdata[y * n + x].re = src[x] * scale;
-                fft_hdata[y * n + x].im = 0;
-            }
-        }
-        for (; x < n; x++) {
-            fft_hdata[y * n + x].re = 0;
-            fft_hdata[y * n + x].im = 0;
-        }
+    for (y = start; y < end; y++) {
+        av_fft_permute(s->fft[plane][jobnr], hdata + y * n);
+        av_fft_calc(s->fft[plane][jobnr], hdata + y * n);
     }
 
-    for (; y < n; y++) {
-        for (x = 0; x < n; x++) {
-            fft_hdata[y * n + x].re = 0;
-            fft_hdata[y * n + x].im = 0;
-        }
-    }
-
-    for (y = 0; y < n; y++) {
-        av_fft_permute(s->fft[plane], fft_hdata + y * n);
-        av_fft_calc(s->fft[plane], fft_hdata + y * n);
-    }
+    return 0;
 }
 
-static void fft_vertical(ConvolveContext *s, FFTComplex *fft_hdata, FFTComplex *fft_vdata,
-                         int n, int plane)
+static void get_input(ConvolveContext *s, FFTComplex *fft_hdata,
+                      AVFrame *in, int w, int h, int n, int plane, float scale)
 {
+    const int iw = (n - w) / 2, ih = (n - h) / 2;
     int y, x;
 
-    for (y = 0; y < n; y++) {
-        for (x = 0; x < n; x++) {
-            fft_vdata[y * n + x].re = fft_hdata[x * n + y].re;
-            fft_vdata[y * n + x].im = fft_hdata[x * n + y].im;
-        }
-        for (; x < n; x++) {
-            fft_vdata[y * n + x].re = 0;
-            fft_vdata[y * n + x].im = 0;
-        }
-        av_fft_permute(s->fft[plane], fft_vdata + y * n);
-        av_fft_calc(s->fft[plane], fft_vdata + y * n);
-    }
-}
-
-static void ifft_vertical(ConvolveContext *s, int n, int plane)
-{
-    int y, x;
-
-    for (y = 0; y < n; y++) {
-        av_fft_permute(s->ifft[plane], s->fft_vdata[plane] + y * n);
-        av_fft_calc(s->ifft[plane], s->fft_vdata[plane] + y * n);
-        for (x = 0; x < n; x++) {
-            s->fft_hdata[plane][x * n + y].re = s->fft_vdata[plane][y * n + x].re;
-            s->fft_hdata[plane][x * n + y].im = s->fft_vdata[plane][y * n + x].im;
-        }
-    }
-}
-
-static void ifft_horizontal(ConvolveContext *s, AVFrame *out,
-                            int w, int h, int n, int plane)
-{
-    const float scale = 1.f / (n * n);
-    const int max = (1 << s->depth) - 1;
-    const int oh = h / 2;
-    const int ow = w / 2;
-    int y, x;
-
-    for (y = 0; y < n; y++) {
-        av_fft_permute(s->ifft[plane], s->fft_hdata[plane] + y * n);
-        av_fft_calc(s->ifft[plane], s->fft_hdata[plane] + y * n);
-    }
-
     if (s->depth == 8) {
         for (y = 0; y < h; y++) {
-            uint8_t *dst = out->data[plane] + y * out->linesize[plane];
-            for (x = 0; x < w; x++)
-                dst[x] = av_clip_uint8(s->fft_hdata[plane][(y+oh) * n + x+ow].re * scale);
+            const uint8_t *src = in->data[plane] + in->linesize[plane] * y;
+
+            for (x = 0; x < w; x++) {
+                fft_hdata[(y + ih) * n + iw + x].re = src[x] * scale;
+                fft_hdata[(y + ih) * n + iw + x].im = 0;
+            }
+
+            for (x = 0; x < iw; x++) {
+                fft_hdata[(y + ih) * n + x].re = fft_hdata[(y + ih) * n + iw].re;
+                fft_hdata[(y + ih) * n + x].im = 0;
+            }
+
+            for (x = n - iw; x < n; x++) {
+                fft_hdata[(y + ih) * n + x].re = fft_hdata[(y + ih) * n + n - iw - 1].re;
+                fft_hdata[(y + ih) * n + x].im = 0;
+            }
+        }
+
+        for (y = 0; y < ih; y++) {
+            for (x = 0; x < n; x++) {
+                fft_hdata[y * n + x].re = fft_hdata[ih * n + x].re;
+                fft_hdata[y * n + x].im = 0;
+            }
+        }
+
+        for (y = n - ih; y < n; y++) {
+            for (x = 0; x < n; x++) {
+                fft_hdata[y * n + x].re = fft_hdata[(n - ih - 1) * n + x].re;
+                fft_hdata[y * n + x].im = 0;
+            }
         }
     } else {
         for (y = 0; y < h; y++) {
-            uint16_t *dst = (uint16_t *)(out->data[plane] + y * out->linesize[plane]);
-            for (x = 0; x < w; x++)
-                dst[x] = av_clip(s->fft_hdata[plane][(y+oh) * n + x+ow].re * scale, 0, max);
+            const uint16_t *src = (const uint16_t *)(in->data[plane] + in->linesize[plane] * y);
+
+            for (x = 0; x < w; x++) {
+                fft_hdata[(y + ih) * n + iw + x].re = src[x] * scale;
+                fft_hdata[(y + ih) * n + iw + x].im = 0;
+            }
+
+            for (x = 0; x < iw; x++) {
+                fft_hdata[(y + ih) * n + x].re = fft_hdata[(y + ih) * n + iw].re;
+                fft_hdata[(y + ih) * n + x].im = 0;
+            }
+
+            for (x = n - iw; x < n; x++) {
+                fft_hdata[(y + ih) * n + x].re = fft_hdata[(y + ih) * n + n - iw - 1].re;
+                fft_hdata[(y + ih) * n + x].im = 0;
+            }
+        }
+
+        for (y = 0; y < ih; y++) {
+            for (x = 0; x < n; x++) {
+                fft_hdata[y * n + x].re = fft_hdata[ih * n + x].re;
+                fft_hdata[y * n + x].im = 0;
+            }
+        }
+
+        for (y = n - ih; y < n; y++) {
+            for (x = 0; x < n; x++) {
+                fft_hdata[y * n + x].re = fft_hdata[(n - ih - 1) * n + x].re;
+                fft_hdata[y * n + x].im = 0;
+            }
         }
     }
 }
 
+static int fft_vertical(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ConvolveContext *s = ctx->priv;
+    ThreadData *td = arg;
+    FFTComplex *hdata = td->hdata;
+    FFTComplex *vdata = td->vdata;
+    const int plane = td->plane;
+    const int n = td->n;
+    int start = (n * jobnr) / nb_jobs;
+    int end = (n * (jobnr+1)) / nb_jobs;
+    int y, x;
+
+    for (y = start; y < end; y++) {
+        for (x = 0; x < n; x++) {
+            vdata[y * n + x].re = hdata[x * n + y].re;
+            vdata[y * n + x].im = hdata[x * n + y].im;
+        }
+
+        av_fft_permute(s->fft[plane][jobnr], vdata + y * n);
+        av_fft_calc(s->fft[plane][jobnr], vdata + y * n);
+    }
+
+    return 0;
+}
+
+static int ifft_vertical(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ConvolveContext *s = ctx->priv;
+    ThreadData *td = arg;
+    FFTComplex *hdata = td->hdata;
+    FFTComplex *vdata = td->vdata;
+    const int plane = td->plane;
+    const int n = td->n;
+    int start = (n * jobnr) / nb_jobs;
+    int end = (n * (jobnr+1)) / nb_jobs;
+    int y, x;
+
+    for (y = start; y < end; y++) {
+        av_fft_permute(s->ifft[plane][jobnr], vdata + y * n);
+        av_fft_calc(s->ifft[plane][jobnr], vdata + y * n);
+
+        for (x = 0; x < n; x++) {
+            hdata[x * n + y].re = vdata[y * n + x].re;
+            hdata[x * n + y].im = vdata[y * n + x].im;
+        }
+    }
+
+    return 0;
+}
+
+static int ifft_horizontal(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ConvolveContext *s = ctx->priv;
+    ThreadData *td = arg;
+    FFTComplex *hdata = td->hdata;
+    const int plane = td->plane;
+    const int n = td->n;
+    int start = (n * jobnr) / nb_jobs;
+    int end = (n * (jobnr+1)) / nb_jobs;
+    int y;
+
+    for (y = start; y < end; y++) {
+        av_fft_permute(s->ifft[plane][jobnr], hdata + y * n);
+        av_fft_calc(s->ifft[plane][jobnr], hdata + y * n);
+    }
+
+    return 0;
+}
+
+static void get_output(ConvolveContext *s, FFTComplex *input, AVFrame *out,
+                       int w, int h, int n, int plane, float scale)
+{
+    const int max = (1 << s->depth) - 1;
+    const int hh = h / 2;
+    const int hw = w / 2;
+    int y, x;
+
+    if (s->depth == 8) {
+        for (y = 0; y < hh; y++) {
+            uint8_t *dst = out->data[plane] + (y + hh) * out->linesize[plane] + hw;
+            for (x = 0; x < hw; x++)
+                dst[x] = av_clip_uint8(input[y * n + x].re * scale);
+        }
+        for (y = 0; y < hh; y++) {
+            uint8_t *dst = out->data[plane] + (y + hh) * out->linesize[plane];
+            for (x = 0; x < hw; x++)
+                dst[x] = av_clip_uint8(input[y * n + n - hw + x].re * scale);
+        }
+        for (y = 0; y < hh; y++) {
+            uint8_t *dst = out->data[plane] + y * out->linesize[plane] + hw;
+            for (x = 0; x < hw; x++)
+                dst[x] = av_clip_uint8(input[(n - hh + y) * n + x].re * scale);
+        }
+        for (y = 0; y < hh; y++) {
+            uint8_t *dst = out->data[plane] + y * out->linesize[plane];
+            for (x = 0; x < hw; x++)
+                dst[x] = av_clip_uint8(input[(n - hh + y) * n + n - hw + x].re * scale);
+        }
+    } else {
+        for (y = 0; y < hh; y++) {
+            uint16_t *dst = (uint16_t *)(out->data[plane] + (y + hh) * out->linesize[plane] + hw * 2);
+            for (x = 0; x < hw; x++)
+                dst[x] = av_clip(input[y * n + x].re * scale, 0, max);
+        }
+        for (y = 0; y < hh; y++) {
+            uint16_t *dst = (uint16_t *)(out->data[plane] + (y + hh) * out->linesize[plane]);
+            for (x = 0; x < hw; x++)
+                dst[x] = av_clip(input[y * n + n - hw + x].re * scale, 0, max);
+        }
+        for (y = 0; y < hh; y++) {
+            uint16_t *dst = (uint16_t *)(out->data[plane] + y * out->linesize[plane] + hw * 2);
+            for (x = 0; x < hw; x++)
+                dst[x] = av_clip(input[(n - hh + y) * n + x].re * scale, 0, max);
+        }
+        for (y = 0; y < hh; y++) {
+            uint16_t *dst = (uint16_t *)(out->data[plane] + y * out->linesize[plane]);
+            for (x = 0; x < hw; x++)
+                dst[x] = av_clip(input[(n - hh + y) * n + n - hw + x].re * scale, 0, max);
+        }
+    }
+}
+
+static int complex_multiply(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ConvolveContext *s = ctx->priv;
+    ThreadData *td = arg;
+    FFTComplex *input = td->hdata;
+    FFTComplex *filter = td->vdata;
+    const float noise = s->noise;
+    const int n = td->n;
+    int start = (n * jobnr) / nb_jobs;
+    int end = (n * (jobnr+1)) / nb_jobs;
+    int y, x;
+
+    for (y = start; y < end; y++) {
+        int yn = y * n;
+
+        for (x = 0; x < n; x++) {
+            FFTSample re, im, ire, iim;
+
+            re = input[yn + x].re;
+            im = input[yn + x].im;
+            ire = filter[yn + x].re + noise;
+            iim = filter[yn + x].im;
+
+            input[yn + x].re = ire * re - iim * im;
+            input[yn + x].im = iim * re + ire * im;
+        }
+    }
+
+    return 0;
+}
+
+static int complex_divide(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ConvolveContext *s = ctx->priv;
+    ThreadData *td = arg;
+    FFTComplex *input = td->hdata;
+    FFTComplex *filter = td->vdata;
+    const float noise = s->noise;
+    const int n = td->n;
+    int start = (n * jobnr) / nb_jobs;
+    int end = (n * (jobnr+1)) / nb_jobs;
+    int y, x;
+
+    for (y = start; y < end; y++) {
+        int yn = y * n;
+
+        for (x = 0; x < n; x++) {
+            FFTSample re, im, ire, iim, div;
+
+            re = input[yn + x].re;
+            im = input[yn + x].im;
+            ire = filter[yn + x].re;
+            iim = filter[yn + x].im;
+            div = ire * ire + iim * iim + noise;
+
+            input[yn + x].re = (ire * re + iim * im) / div;
+            input[yn + x].im = (ire * im - iim * re) / div;
+        }
+    }
+
+    return 0;
+}
+
 static int do_convolve(FFFrameSync *fs)
 {
     AVFilterContext *ctx = fs->parent;
@@ -269,18 +457,26 @@
         return ff_filter_frame(outlink, mainpic);
 
     for (plane = 0; plane < s->nb_planes; plane++) {
+        FFTComplex *filter = s->fft_vdata_impulse[plane];
+        FFTComplex *input = s->fft_vdata[plane];
         const int n = s->fft_len[plane];
         const int w = s->planewidth[plane];
         const int h = s->planeheight[plane];
         float total = 0;
+        ThreadData td;
 
         if (!(s->planes & (1 << plane))) {
             continue;
         }
 
-        fft_horizontal(s, s->fft_hdata[plane], mainpic, w, h, n, plane, 1.f);
-        fft_vertical(s, s->fft_hdata[plane], s->fft_vdata[plane],
-                     n, plane);
+        td.plane = plane, td.n = n;
+        get_input(s, s->fft_hdata[plane], mainpic, w, h, n, plane, 1.f);
+
+        td.hdata = s->fft_hdata[plane];
+        td.vdata = s->fft_vdata[plane];
+
+        ctx->internal->execute(ctx, fft_horizontal, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
+        ctx->internal->execute(ctx, fft_vertical, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
 
         if ((!s->impulse && !s->got_impulse[plane]) || s->impulse) {
             if (s->depth == 8) {
@@ -300,29 +496,29 @@
             }
             total = FFMAX(1, total);
 
-            fft_horizontal(s, s->fft_hdata_impulse[plane], impulsepic, w, h, n, plane, 1 / total);
-            fft_vertical(s, s->fft_hdata_impulse[plane], s->fft_vdata_impulse[plane],
-                         n, plane);
+            get_input(s, s->fft_hdata_impulse[plane], impulsepic, w, h, n, plane, 1.f / total);
+
+            td.hdata = s->fft_hdata_impulse[plane];
+            td.vdata = s->fft_vdata_impulse[plane];
+
+            ctx->internal->execute(ctx, fft_horizontal, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
+            ctx->internal->execute(ctx, fft_vertical, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
 
             s->got_impulse[plane] = 1;
         }
 
-        for (y = 0; y < n; y++) {
-            for (x = 0; x < n; x++) {
-                FFTSample re, im, ire, iim;
+        td.hdata = input;
+        td.vdata = filter;
 
-                re = s->fft_vdata[plane][y*n + x].re;
-                im = s->fft_vdata[plane][y*n + x].im;
-                ire = s->fft_vdata_impulse[plane][y*n + x].re;
-                iim = s->fft_vdata_impulse[plane][y*n + x].im;
+        ctx->internal->execute(ctx, s->filter, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
 
-                s->fft_vdata[plane][y*n + x].re = ire * re - iim * im;
-                s->fft_vdata[plane][y*n + x].im = iim * re + ire * im;
-            }
-        }
+        td.hdata = s->fft_hdata[plane];
+        td.vdata = s->fft_vdata[plane];
 
-        ifft_vertical(s, n, plane);
-        ifft_horizontal(s, mainpic, w, h, n, plane);
+        ctx->internal->execute(ctx, ifft_vertical, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
+        ctx->internal->execute(ctx, ifft_horizontal, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx)));
+
+        get_output(s, s->fft_hdata[plane], mainpic, w, h, n, plane, 1.f / (n * n));
     }
 
     return ff_filter_frame(outlink, mainpic);
@@ -333,7 +529,7 @@
     AVFilterContext *ctx = outlink->src;
     ConvolveContext *s = ctx->priv;
     AVFilterLink *mainlink = ctx->inputs[0];
-    int ret, i;
+    int ret, i, j;
 
     s->fs.on_event = do_convolve;
     ret = ff_framesync_init_dualinput(&s->fs, ctx);
@@ -349,10 +545,12 @@
         return ret;
 
     for (i = 0; i < s->nb_planes; i++) {
-        s->fft[i]  = av_fft_init(s->fft_bits[i], 0);
-        s->ifft[i] = av_fft_init(s->fft_bits[i], 1);
-        if (!s->fft[i] || !s->ifft[i])
-            return AVERROR(ENOMEM);
+        for (j = 0; j < MAX_THREADS; j++) {
+            s->fft[i][j]  = av_fft_init(s->fft_bits[i], 0);
+            s->ifft[i][j] = av_fft_init(s->fft_bits[i], 1);
+            if (!s->fft[i][j] || !s->ifft[i][j])
+                return AVERROR(ENOMEM);
+        }
     }
 
     return 0;
@@ -364,18 +562,36 @@
     return ff_framesync_activate(&s->fs);
 }
 
+static av_cold int init(AVFilterContext *ctx)
+{
+    ConvolveContext *s = ctx->priv;
+
+    if (!strcmp(ctx->filter->name, "convolve")) {
+        s->filter = complex_multiply;
+    } else if (!strcmp(ctx->filter->name, "deconvolve")) {
+        s->filter = complex_divide;
+    } else {
+        return AVERROR_BUG;
+    }
+
+    return 0;
+}
+
 static av_cold void uninit(AVFilterContext *ctx)
 {
     ConvolveContext *s = ctx->priv;
-    int i;
+    int i, j;
 
     for (i = 0; i < 4; i++) {
         av_freep(&s->fft_hdata[i]);
         av_freep(&s->fft_vdata[i]);
         av_freep(&s->fft_hdata_impulse[i]);
         av_freep(&s->fft_vdata_impulse[i]);
-        av_fft_end(s->fft[i]);
-        av_fft_end(s->ifft[i]);
+
+        for (j = 0; j < MAX_THREADS; j++) {
+            av_fft_end(s->fft[i][j]);
+            av_fft_end(s->ifft[i][j]);
+        }
     }
 
     ff_framesync_uninit(&s->fs);
@@ -403,10 +619,15 @@
     { NULL }
 };
 
+#if CONFIG_CONVOLVE_FILTER
+
+FRAMESYNC_DEFINE_CLASS(convolve, ConvolveContext, fs);
+
 AVFilter ff_vf_convolve = {
     .name          = "convolve",
     .description   = NULL_IF_CONFIG_SMALL("Convolve first video stream with second video stream."),
     .preinit       = convolve_framesync_preinit,
+    .init          = init,
     .uninit        = uninit,
     .query_formats = query_formats,
     .activate      = activate,
@@ -414,5 +635,37 @@
     .priv_class    = &convolve_class,
     .inputs        = convolve_inputs,
     .outputs       = convolve_outputs,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
 };
+
+#endif /* CONFIG_CONVOLVE_FILTER */
+
+#if CONFIG_DECONVOLVE_FILTER
+
+static const AVOption deconvolve_options[] = {
+    { "planes",  "set planes to deconvolve",                OFFSET(planes),   AV_OPT_TYPE_INT,   {.i64=7}, 0, 15, FLAGS },
+    { "impulse", "when to process impulses",                OFFSET(impulse),  AV_OPT_TYPE_INT,   {.i64=1}, 0,  1, FLAGS, "impulse" },
+    {   "first", "process only first impulse, ignore rest", 0,                AV_OPT_TYPE_CONST, {.i64=0}, 0,  0, FLAGS, "impulse" },
+    {   "all",   "process all impulses",                    0,                AV_OPT_TYPE_CONST, {.i64=1}, 0,  0, FLAGS, "impulse" },
+    { "noise",   "set noise",                               OFFSET(noise),    AV_OPT_TYPE_FLOAT, {.dbl=0.0000001}, 0,  1, FLAGS },
+    { NULL },
+};
+
+FRAMESYNC_DEFINE_CLASS(deconvolve, ConvolveContext, fs);
+
+AVFilter ff_vf_deconvolve = {
+    .name          = "deconvolve",
+    .description   = NULL_IF_CONFIG_SMALL("Deconvolve first video stream with second video stream."),
+    .preinit       = deconvolve_framesync_preinit,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .activate      = activate,
+    .priv_size     = sizeof(ConvolveContext),
+    .priv_class    = &deconvolve_class,
+    .inputs        = convolve_inputs,
+    .outputs       = convolve_outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
+};
+
+#endif /* CONFIG_DECONVOLVE_FILTER */

diff --git a/libavfilter/vf_crop.c b/libavfilter/vf_crop.c
index 7c31c16..84be4c7 100644
--- a/libavfilter/vf_crop.c
+++ b/libavfilter/vf_crop.c

@@ -262,6 +262,7 @@
         NAN : frame->pkt_pos;
     s->var_values[VAR_X] = av_expr_eval(s->x_pexpr, s->var_values, NULL);
     s->var_values[VAR_Y] = av_expr_eval(s->y_pexpr, s->var_values, NULL);
+    /* It is necessary if x is expressed from y  */
     s->var_values[VAR_X] = av_expr_eval(s->x_pexpr, s->var_values, NULL);
 
     normalize_double(&s->x, s->var_values[VAR_X]);
@@ -287,7 +288,7 @@
     frame->data[0] += s->y * frame->linesize[0];
     frame->data[0] += s->x * s->max_step[0];
 
-    if (!(desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL)) {
+    if (!(desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags & FF_PSEUDOPAL)) {
         for (i = 1; i < 3; i ++) {
             if (frame->data[i]) {
                 frame->data[i] += (s->y >> s->vsub) * frame->linesize[i];

diff --git a/libavfilter/vf_curves.c b/libavfilter/vf_curves.c
index 19ab789..883cc1c 100644
--- a/libavfilter/vf_curves.c
+++ b/libavfilter/vf_curves.c

@@ -70,6 +70,9 @@
     int step;
     char *plot_filename;
     int is_16bit;
+    int depth;
+
+    int (*filter_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
 } CurvesContext;
 
 typedef struct ThreadData {
@@ -209,7 +212,7 @@
  * @see http://people.math.sfu.ca/~stockie/teaching/macm316/notes/splines.pdf
  */
 
-#define CLIP(v) (nbits == 8 ? av_clip_uint8(v) : av_clip_uint16(v))
+#define CLIP(v) (nbits == 8 ? av_clip_uint8(v) : av_clip_uintp2_c(v, nbits))
 
 static inline int interpolate(void *log_ctx, uint16_t *y,
                               const struct keypoint *points, int nbits)
@@ -341,6 +344,10 @@
 }
 
 DECLARE_INTERPOLATE_FUNC(8)
+DECLARE_INTERPOLATE_FUNC(9)
+DECLARE_INTERPOLATE_FUNC(10)
+DECLARE_INTERPOLATE_FUNC(12)
+DECLARE_INTERPOLATE_FUNC(14)
 DECLARE_INTERPOLATE_FUNC(16)
 
 static int parse_psfile(AVFilterContext *ctx, const char *fname)
@@ -512,6 +519,12 @@
         AV_PIX_FMT_RGB0,   AV_PIX_FMT_BGR0,
         AV_PIX_FMT_RGB48,  AV_PIX_FMT_BGR48,
         AV_PIX_FMT_RGBA64, AV_PIX_FMT_BGRA64,
+        AV_PIX_FMT_GBRP,   AV_PIX_FMT_GBRAP,
+        AV_PIX_FMT_GBRP9,
+        AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12,
+        AV_PIX_FMT_GBRP14,
+        AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16,
         AV_PIX_FMT_NONE
     };
     AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
@@ -520,66 +533,7 @@
     return ff_set_common_formats(ctx, fmts_list);
 }
 
-static int config_input(AVFilterLink *inlink)
-{
-    int i, j, ret;
-    AVFilterContext *ctx = inlink->dst;
-    CurvesContext *curves = ctx->priv;
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
-    char **pts = curves->comp_points_str;
-    struct keypoint *comp_points[NB_COMP + 1] = {0};
-
-    ff_fill_rgba_map(curves->rgba_map, inlink->format);
-    curves->is_16bit = desc->comp[0].depth > 8;
-    curves->lut_size = curves->is_16bit ? 1<<16 : 1<<8;
-    curves->step = av_get_padded_bits_per_pixel(desc) >> (3 + curves->is_16bit);
-
-    for (i = 0; i < NB_COMP + 1; i++) {
-        curves->graph[i] = av_mallocz_array(curves->lut_size, sizeof(*curves->graph[0]));
-        if (!curves->graph[i])
-            return AVERROR(ENOMEM);
-        ret = parse_points_str(ctx, comp_points + i, curves->comp_points_str[i], curves->lut_size);
-        if (ret < 0)
-            return ret;
-        if (curves->is_16bit) ret = interpolate16(ctx, curves->graph[i], comp_points[i]);
-        else                  ret = interpolate8(ctx, curves->graph[i], comp_points[i]);
-        if (ret < 0)
-            return ret;
-    }
-
-    if (pts[NB_COMP]) {
-        for (i = 0; i < NB_COMP; i++)
-            for (j = 0; j < curves->lut_size; j++)
-                curves->graph[i][j] = curves->graph[NB_COMP][curves->graph[i][j]];
-    }
-
-    if (av_log_get_level() >= AV_LOG_VERBOSE) {
-        for (i = 0; i < NB_COMP; i++) {
-            const struct keypoint *point = comp_points[i];
-            av_log(ctx, AV_LOG_VERBOSE, "#%d points:", i);
-            while (point) {
-                av_log(ctx, AV_LOG_VERBOSE, " (%f;%f)", point->x, point->y);
-                point = point->next;
-            }
-        }
-    }
-
-    if (curves->plot_filename)
-        dump_curves(curves->plot_filename, curves->graph, comp_points, curves->lut_size);
-
-    for (i = 0; i < NB_COMP + 1; i++) {
-        struct keypoint *point = comp_points[i];
-        while (point) {
-            struct keypoint *next = point->next;
-            av_free(point);
-            point = next;
-        }
-    }
-
-    return 0;
-}
-
-static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+static int filter_slice_packed(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
     int x, y;
     const CurvesContext *curves = ctx->priv;
@@ -627,9 +581,143 @@
     return 0;
 }
 
+static int filter_slice_planar(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    int x, y;
+    const CurvesContext *curves = ctx->priv;
+    const ThreadData *td = arg;
+    const AVFrame *in  = td->in;
+    const AVFrame *out = td->out;
+    const int direct = out == in;
+    const int step = curves->step;
+    const uint8_t r = curves->rgba_map[R];
+    const uint8_t g = curves->rgba_map[G];
+    const uint8_t b = curves->rgba_map[B];
+    const uint8_t a = curves->rgba_map[A];
+    const int slice_start = (in->height *  jobnr   ) / nb_jobs;
+    const int slice_end   = (in->height * (jobnr+1)) / nb_jobs;
+
+    if (curves->is_16bit) {
+        for (y = slice_start; y < slice_end; y++) {
+            uint16_t       *dstrp = (      uint16_t *)(out->data[r] + y * out->linesize[r]);
+            uint16_t       *dstgp = (      uint16_t *)(out->data[g] + y * out->linesize[g]);
+            uint16_t       *dstbp = (      uint16_t *)(out->data[b] + y * out->linesize[b]);
+            uint16_t       *dstap = (      uint16_t *)(out->data[a] + y * out->linesize[a]);
+            const uint16_t *srcrp = (const uint16_t *)(in ->data[r] + y *  in->linesize[r]);
+            const uint16_t *srcgp = (const uint16_t *)(in ->data[g] + y *  in->linesize[g]);
+            const uint16_t *srcbp = (const uint16_t *)(in ->data[b] + y *  in->linesize[b]);
+            const uint16_t *srcap = (const uint16_t *)(in ->data[a] + y *  in->linesize[a]);
+
+            for (x = 0; x < in->width; x++) {
+                dstrp[x] = curves->graph[R][srcrp[x]];
+                dstgp[x] = curves->graph[G][srcgp[x]];
+                dstbp[x] = curves->graph[B][srcbp[x]];
+                if (!direct && step == 4)
+                    dstap[x] = srcap[x];
+            }
+        }
+    } else {
+        uint8_t       *dstr = out->data[r] + slice_start * out->linesize[r];
+        uint8_t       *dstg = out->data[g] + slice_start * out->linesize[g];
+        uint8_t       *dstb = out->data[b] + slice_start * out->linesize[b];
+        uint8_t       *dsta = out->data[a] + slice_start * out->linesize[a];
+        const uint8_t *srcr =  in->data[r] + slice_start *  in->linesize[r];
+        const uint8_t *srcg =  in->data[g] + slice_start *  in->linesize[g];
+        const uint8_t *srcb =  in->data[b] + slice_start *  in->linesize[b];
+        const uint8_t *srca =  in->data[a] + slice_start *  in->linesize[a];
+
+        for (y = slice_start; y < slice_end; y++) {
+            for (x = 0; x < in->width; x++) {
+                dstr[x] = curves->graph[R][srcr[x]];
+                dstg[x] = curves->graph[G][srcg[x]];
+                dstb[x] = curves->graph[B][srcb[x]];
+                if (!direct && step == 4)
+                    dsta[x] = srca[x];
+            }
+            dstr += out->linesize[r];
+            dstg += out->linesize[g];
+            dstb += out->linesize[b];
+            dsta += out->linesize[a];
+            srcr += in ->linesize[r];
+            srcg += in ->linesize[g];
+            srcb += in ->linesize[b];
+            srca += in ->linesize[a];
+        }
+    }
+    return 0;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    int i, j, ret;
+    AVFilterContext *ctx = inlink->dst;
+    CurvesContext *curves = ctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    char **pts = curves->comp_points_str;
+    struct keypoint *comp_points[NB_COMP + 1] = {0};
+
+    ff_fill_rgba_map(curves->rgba_map, inlink->format);
+    curves->is_16bit = desc->comp[0].depth > 8;
+    curves->depth = desc->comp[0].depth;
+    curves->lut_size = 1 << curves->depth;
+    curves->step = av_get_padded_bits_per_pixel(desc) >> (3 + curves->is_16bit);
+    curves->filter_slice = desc->flags & AV_PIX_FMT_FLAG_PLANAR ? filter_slice_planar : filter_slice_packed;
+
+    for (i = 0; i < NB_COMP + 1; i++) {
+        curves->graph[i] = av_mallocz_array(curves->lut_size, sizeof(*curves->graph[0]));
+        if (!curves->graph[i])
+            return AVERROR(ENOMEM);
+        ret = parse_points_str(ctx, comp_points + i, curves->comp_points_str[i], curves->lut_size);
+        if (ret < 0)
+            return ret;
+        switch (curves->depth) {
+        case  8: ret = interpolate8 (ctx, curves->graph[i], comp_points[i]); break;
+        case  9: ret = interpolate9 (ctx, curves->graph[i], comp_points[i]); break;
+        case 10: ret = interpolate10(ctx, curves->graph[i], comp_points[i]); break;
+        case 12: ret = interpolate12(ctx, curves->graph[i], comp_points[i]); break;
+        case 14: ret = interpolate14(ctx, curves->graph[i], comp_points[i]); break;
+        case 16: ret = interpolate16(ctx, curves->graph[i], comp_points[i]); break;
+        }
+        if (ret < 0)
+            return ret;
+    }
+
+    if (pts[NB_COMP]) {
+        for (i = 0; i < NB_COMP; i++)
+            for (j = 0; j < curves->lut_size; j++)
+                curves->graph[i][j] = curves->graph[NB_COMP][curves->graph[i][j]];
+    }
+
+    if (av_log_get_level() >= AV_LOG_VERBOSE) {
+        for (i = 0; i < NB_COMP; i++) {
+            const struct keypoint *point = comp_points[i];
+            av_log(ctx, AV_LOG_VERBOSE, "#%d points:", i);
+            while (point) {
+                av_log(ctx, AV_LOG_VERBOSE, " (%f;%f)", point->x, point->y);
+                point = point->next;
+            }
+        }
+    }
+
+    if (curves->plot_filename)
+        dump_curves(curves->plot_filename, curves->graph, comp_points, curves->lut_size);
+
+    for (i = 0; i < NB_COMP + 1; i++) {
+        struct keypoint *point = comp_points[i];
+        while (point) {
+            struct keypoint *next = point->next;
+            av_free(point);
+            point = next;
+        }
+    }
+
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVFilterContext *ctx = inlink->dst;
+    CurvesContext *curves = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *out;
     ThreadData td;
@@ -647,7 +735,7 @@
 
     td.in  = in;
     td.out = out;
-    ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
+    ctx->internal->execute(ctx, curves->filter_slice, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
 
     if (out != in)
         av_frame_free(&in);

diff --git a/libavfilter/vf_datascope.c b/libavfilter/vf_datascope.c
index 4676635..6bcc18e 100644
--- a/libavfilter/vf_datascope.c
+++ b/libavfilter/vf_datascope.c

@@ -506,7 +506,7 @@
     }
 
     s->ww = 300;
-    s->wh = 300 * 1.6180;
+    s->wh = 300 * 1.6;
     s->x = s->xpos * (inlink->w - 1);
     s->y = s->ypos * (inlink->h - 1);
     if (s->x + s->w >= inlink->w || s->y + s->h >= inlink->h) {

diff --git a/libavfilter/vf_deblock.c b/libavfilter/vf_deblock.c
new file mode 100644
index 0000000..62e3248d
--- /dev/null
+++ b/libavfilter/vf_deblock.c

@@ -0,0 +1,414 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * Based on paper: A Simple and Efficient Deblocking Algorithm for Low Bit-Rate Video Coding.
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+enum FilterType { WEAK, STRONG, NB_FILTER };
+
+typedef struct DeblockContext {
+    const AVClass *class;
+    const AVPixFmtDescriptor *desc;
+    int filter;
+    int block;
+    int planes;
+    float alpha;
+    float beta;
+    float gamma;
+    float delta;
+
+    int ath;
+    int bth;
+    int gth;
+    int dth;
+    int max;
+    int depth;
+    int bpc;
+    int nb_planes;
+    int planewidth[4];
+    int planeheight[4];
+
+    void (*deblockh)(uint8_t *dst, ptrdiff_t dst_linesize, int block,
+                     int ath, int bth, int gth, int dth, int max);
+    void (*deblockv)(uint8_t *dst, ptrdiff_t dst_linesize, int block,
+                     int ath, int bth, int gth, int dth, int max);
+} DeblockContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pixel_fmts[] = {
+        AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P,
+        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
+        AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
+        AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12,
+        AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
+        AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+        AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+        AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_NONE
+    };
+    AVFilterFormats *formats = ff_make_format_list(pixel_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, formats);
+}
+
+#define WEAK_HFILTER(name, type, ldiv)                                              \
+static void deblockh##name##_weak(uint8_t *dstp, ptrdiff_t dst_linesize, int block, \
+                                  int ath, int bth, int gth, int dth, int max)      \
+{                                                                                   \
+    type *dst;                                                                      \
+    int x;                                                                          \
+                                                                                    \
+    dst = (type *)dstp;                                                             \
+    dst_linesize /= ldiv;                                                           \
+                                                                                    \
+    for (x = 0; x < block; x++) {                                                   \
+        int delta = dst[x] - dst[x - dst_linesize];                                 \
+        int A, B, C, D, a, b, c, d;                                                 \
+                                                                                    \
+        if (FFABS(delta) >= ath ||                                                  \
+            FFABS(dst[x - 1 * dst_linesize] - dst[x - 2 * dst_linesize]) >= bth ||  \
+            FFABS(dst[x + 0 * dst_linesize] - dst[x + 1 * dst_linesize]) >= gth)    \
+            continue;                                                               \
+                                                                                    \
+        A = dst[x - 2 * dst_linesize];                                              \
+        B = dst[x - 1 * dst_linesize];                                              \
+        C = dst[x + 0 * dst_linesize];                                              \
+        D = dst[x + 1 * dst_linesize];                                              \
+                                                                                    \
+        a = A + delta / 8;                                                          \
+        b = B + delta / 2;                                                          \
+        c = C - delta / 2;                                                          \
+        d = D - delta / 8;                                                          \
+                                                                                    \
+        dst[x - 2 * dst_linesize] = av_clip(a, 0, max);                             \
+        dst[x - 1 * dst_linesize] = av_clip(b, 0, max);                             \
+        dst[x + 0 * dst_linesize] = av_clip(c, 0, max);                             \
+        dst[x + 1 * dst_linesize] = av_clip(d, 0, max);                             \
+    }                                                                               \
+}
+
+WEAK_HFILTER(8, uint8_t, 1)
+WEAK_HFILTER(16, uint16_t, 2)
+
+#define WEAK_VFILTER(name, type, ldiv)                                              \
+static void deblockv##name##_weak(uint8_t *dstp, ptrdiff_t dst_linesize, int block, \
+                                  int ath, int bth, int gth, int dth, int max)      \
+{                                                                                   \
+    type *dst;                                                                      \
+    int y;                                                                          \
+                                                                                    \
+    dst = (type *)dstp;                                                             \
+    dst_linesize /= ldiv;                                                           \
+                                                                                    \
+    for (y = 0; y < block; y++) {                                                   \
+        int delta = dst[0] - dst[-1];                                               \
+        int A, B, C, D, a, b, c, d;                                                 \
+                                                                                    \
+        if (FFABS(delta) >= ath ||                                                  \
+            FFABS(dst[-1] - dst[-2]) >= bth ||                                      \
+            FFABS(dst[0] - dst[1]) >= gth)                                          \
+            continue;                                                               \
+                                                                                    \
+        A = dst[-2];                                                                \
+        B = dst[-1];                                                                \
+        C = dst[+0];                                                                \
+        D = dst[+1];                                                                \
+                                                                                    \
+        a = A + delta / 8;                                                          \
+        b = B + delta / 2;                                                          \
+        c = C - delta / 2;                                                          \
+        d = D - delta / 8;                                                          \
+                                                                                    \
+        dst[-2] = av_clip(a, 0, max);                                               \
+        dst[-1] = av_clip(b, 0, max);                                               \
+        dst[+0] = av_clip(c, 0, max);                                               \
+        dst[+1] = av_clip(d, 0, max);                                               \
+                                                                                    \
+        dst += dst_linesize;                                                        \
+    }                                                                               \
+}
+
+WEAK_VFILTER(8, uint8_t, 1)
+WEAK_VFILTER(16, uint16_t, 2)
+
+#define STRONG_HFILTER(name, type, ldiv)                                           \
+static void deblockh##name##_strong(uint8_t *dstp, ptrdiff_t dst_linesize, int block,\
+                                    int ath, int bth, int gth, int dth, int max)   \
+{                                                                                  \
+    type *dst;                                                                     \
+    int x;                                                                         \
+                                                                                   \
+    dst = (type *)dstp;                                                            \
+    dst_linesize /= ldiv;                                                          \
+                                                                                   \
+    for (x = 0; x < block; x++) {                                                  \
+        int A, B, C, D, E, F, a, b, c, d, e, f;                                    \
+        int delta = dst[x] - dst[x - dst_linesize];                                \
+                                                                                   \
+        if (FFABS(delta) >= ath ||                                                 \
+            FFABS(dst[x - 1 * dst_linesize] - dst[x - 2 * dst_linesize]) >= bth || \
+            FFABS(dst[x + 1 * dst_linesize] - dst[x + 2 * dst_linesize]) >= gth || \
+            FFABS(dst[x + 0 * dst_linesize] - dst[x + 1 * dst_linesize]) >= dth)   \
+            continue;                                                              \
+                                                                                   \
+        A = dst[x - 3 * dst_linesize];                                             \
+        B = dst[x - 2 * dst_linesize];                                             \
+        C = dst[x - 1 * dst_linesize];                                             \
+        D = dst[x + 0 * dst_linesize];                                             \
+        E = dst[x + 1 * dst_linesize];                                             \
+        F = dst[x + 2 * dst_linesize];                                             \
+                                                                                   \
+        a = A + delta / 8;                                                         \
+        b = B + delta / 4;                                                         \
+        c = C + delta / 2;                                                         \
+        d = D - delta / 2;                                                         \
+        e = E - delta / 4;                                                         \
+        f = F - delta / 8;                                                         \
+                                                                                   \
+        dst[x - 3 * dst_linesize] = av_clip(a, 0, max);                            \
+        dst[x - 2 * dst_linesize] = av_clip(b, 0, max);                            \
+        dst[x - 1 * dst_linesize] = av_clip(c, 0, max);                            \
+        dst[x + 0 * dst_linesize] = av_clip(d, 0, max);                            \
+        dst[x + 1 * dst_linesize] = av_clip(e, 0, max);                            \
+        dst[x + 2 * dst_linesize] = av_clip(f, 0, max);                            \
+    }                                                                              \
+}
+
+STRONG_HFILTER(8, uint8_t, 1)
+STRONG_HFILTER(16, uint16_t, 2)
+
+#define STRONG_VFILTER(name, type, ldiv)                                           \
+static void deblockv##name##_strong(uint8_t *dstp, ptrdiff_t dst_linesize, int block,\
+                                    int ath, int bth, int gth, int dth, int max)   \
+{                                                                                  \
+    type *dst;                                                                     \
+    int y;                                                                         \
+                                                                                   \
+    dst = (type *)dstp;                                                            \
+    dst_linesize /= ldiv;                                                          \
+                                                                                   \
+    for (y = 0; y < block; y++) {                                                  \
+        int A, B, C, D, E, F, a, b, c, d, e, f;                                    \
+        int delta = dst[0] - dst[-1];                                              \
+                                                                                   \
+        if (FFABS(delta) >= ath ||                                                 \
+            FFABS(dst[-1] - dst[-2]) >= bth ||                                     \
+            FFABS(dst[+1] - dst[+2]) >= gth ||                                     \
+            FFABS(dst[+0] - dst[+1]) >= dth)                                       \
+            continue;                                                              \
+                                                                                   \
+        A = dst[-3];                                                               \
+        B = dst[-2];                                                               \
+        C = dst[-1];                                                               \
+        D = dst[+0];                                                               \
+        E = dst[+1];                                                               \
+        F = dst[+2];                                                               \
+                                                                                   \
+        a = A + delta / 8;                                                         \
+        b = B + delta / 4;                                                         \
+        c = C + delta / 2;                                                         \
+        d = D - delta / 2;                                                         \
+        e = E - delta / 4;                                                         \
+        f = F - delta / 8;                                                         \
+                                                                                   \
+        dst[-3] = av_clip(a, 0, max);                                              \
+        dst[-2] = av_clip(b, 0, max);                                              \
+        dst[-1] = av_clip(c, 0, max);                                              \
+        dst[+0] = av_clip(d, 0, max);                                              \
+        dst[+1] = av_clip(e, 0, max);                                              \
+        dst[+2] = av_clip(f, 0, max);                                              \
+                                                                                   \
+        dst += dst_linesize;                                                       \
+    }                                                                              \
+}
+
+STRONG_VFILTER(8, uint8_t, 1)
+STRONG_VFILTER(16, uint16_t, 2)
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    DeblockContext *s = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+
+    s->desc = av_pix_fmt_desc_get(outlink->format);
+    if (!s->desc)
+        return AVERROR_BUG;
+    s->nb_planes = av_pix_fmt_count_planes(outlink->format);
+    s->depth = s->desc->comp[0].depth;
+    s->bpc = (s->depth + 7) / 8;
+    s->max = (1 << s->depth) - 1;
+    s->ath = s->alpha * s->max;
+    s->bth = s->beta  * s->max;
+    s->gth = s->gamma * s->max;
+    s->dth = s->delta * s->max;
+
+    if (s->depth <= 8 && s->filter == WEAK) {
+        s->deblockh = deblockh8_weak;
+        s->deblockv = deblockv8_weak;
+    } else if (s->depth >= 8 && s->filter == WEAK) {
+        s->deblockh = deblockh16_weak;
+        s->deblockv = deblockv16_weak;
+    }
+    if (s->depth <= 8 && s->filter == STRONG) {
+        s->deblockh = deblockh8_strong;
+        s->deblockv = deblockv8_strong;
+    } else if (s->depth >= 8 && s->filter == STRONG) {
+        s->deblockh = deblockh16_strong;
+        s->deblockv = deblockv16_strong;
+    }
+
+    s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, s->desc->log2_chroma_w);
+    s->planewidth[0] = s->planewidth[3] = inlink->w;
+
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, s->desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    DeblockContext *s = ctx->priv;
+    const int block = s->block;
+    AVFrame *out;
+    int plane, x, y;
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+
+    for (plane = 0; plane < s->nb_planes; plane++) {
+        const int width = s->planewidth[plane];
+        const int height = s->planeheight[plane];
+        const uint8_t *src = (const uint8_t *)in->data[plane];
+        uint8_t *dst = (uint8_t *)out->data[plane];
+
+        if (in != out)
+            av_image_copy_plane(dst, out->linesize[plane],
+                                src, in->linesize[plane],
+                                width * s->bpc, height);
+
+        if (!((1 << plane) & s->planes))
+            continue;
+
+        for (x = block; x < width; x += block)
+            s->deblockv(dst + x * s->bpc, out->linesize[plane],
+                        FFMIN(block, height), s->ath, s->bth, s->gth, s->dth, s->max);
+
+        for (y = block; y < height; y += block) {
+            dst += out->linesize[plane] * block;
+
+            s->deblockh(dst, out->linesize[plane],
+                        FFMIN(block, width),
+                        s->ath, s->bth, s->gth, s->dth, s->max);
+
+            for (x = block; x < width; x += block) {
+                s->deblockh(dst + x * s->bpc, out->linesize[plane],
+                            FFMIN(block, width - x),
+                            s->ath, s->bth, s->gth, s->dth, s->max);
+                s->deblockv(dst + x * s->bpc, out->linesize[plane],
+                            FFMIN(block, height - y),
+                            s->ath, s->bth, s->gth, s->dth, s->max);
+            }
+        }
+    }
+
+    if (in != out)
+        av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+#define OFFSET(x) offsetof(DeblockContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption deblock_options[] = {
+    { "filter",    "set type of filter",          OFFSET(filter),    AV_OPT_TYPE_INT,   {.i64=STRONG},0, 1,  FLAGS, "filter" },
+    { "weak",      0,                             0,                 AV_OPT_TYPE_CONST, {.i64=WEAK},  0, 0,  FLAGS, "filter" },
+    { "strong",    0,                             0,                 AV_OPT_TYPE_CONST, {.i64=STRONG},0, 0,  FLAGS, "filter" },
+    { "block",     "set size of block",           OFFSET(block),     AV_OPT_TYPE_INT,   {.i64=8},    4, 512, FLAGS },
+    { "alpha",     "set 1st detection threshold", OFFSET(alpha),     AV_OPT_TYPE_FLOAT, {.dbl=.098}, 0,  1,  FLAGS },
+    { "beta",      "set 2nd detection threshold", OFFSET(beta),      AV_OPT_TYPE_FLOAT, {.dbl=.05},  0,  1,  FLAGS },
+    { "gamma",     "set 3rd detection threshold", OFFSET(gamma),     AV_OPT_TYPE_FLOAT, {.dbl=.05},  0,  1,  FLAGS },
+    { "delta",     "set 4th detection threshold", OFFSET(delta),     AV_OPT_TYPE_FLOAT, {.dbl=.05},  0,  1,  FLAGS },
+    { "planes",    "set planes to filter",        OFFSET(planes),    AV_OPT_TYPE_INT,   {.i64=15},   0, 15,  FLAGS },
+    { NULL },
+};
+
+static const AVFilterPad inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_VIDEO,
+        .filter_frame   = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+    },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(deblock);
+
+AVFilter ff_vf_deblock = {
+    .name          = "deblock",
+    .description   = NULL_IF_CONFIG_SMALL("Deblock video."),
+    .priv_size     = sizeof(DeblockContext),
+    .priv_class    = &deblock_class,
+    .query_formats = query_formats,
+    .inputs        = inputs,
+    .outputs       = outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+};

diff --git a/libavfilter/vf_deflicker.c b/libavfilter/vf_deflicker.c
index 863a350..b038b97 100644
--- a/libavfilter/vf_deflicker.c
+++ b/libavfilter/vf_deflicker.c

@@ -95,7 +95,7 @@
 {
     static const enum AVPixelFormat pixel_fmts[] = {
         AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10,
-        AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
         AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,

diff --git a/libavfilter/vf_deinterlace_qsv.c b/libavfilter/vf_deinterlace_qsv.c
index 2810bff..d6b02e9 100644
--- a/libavfilter/vf_deinterlace_qsv.c
+++ b/libavfilter/vf_deinterlace_qsv.c

@@ -35,6 +35,7 @@
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/time.h"
+#include "libavfilter/qsvvpp.h"
 
 #include "avfilter.h"
 #include "formats.h"
@@ -68,13 +69,18 @@
     int             nb_surface_ptrs;
 
     mfxExtOpaqueSurfaceAlloc opaque_alloc;
-    mfxExtBuffer            *ext_buffers[1];
+    mfxExtVPPDeinterlacing   deint_conf;
+    mfxExtBuffer            *ext_buffers[2];
+    int                      num_ext_buffers;
 
     QSVFrame *work_frames;
 
     int64_t last_pts;
 
     int eof;
+
+    /* option for Deinterlacing algorithm to be used */
+    int mode;
 } QSVDeintContext;
 
 static void qsvdeint_uninit(AVFilterContext *ctx)
@@ -196,6 +202,11 @@
         }
     }
 
+    if (err != MFX_ERR_NONE) {
+        av_log(ctx, AV_LOG_ERROR, "Error getting the session handle\n");
+        return AVERROR_UNKNOWN;
+    }
+
     /* create a "slave" session with those same properties, to be used for
      * actual deinterlacing */
     err = MFXInit(impl, &ver, &s->session);
@@ -210,8 +221,20 @@
             return AVERROR_UNKNOWN;
     }
 
+    if (QSV_RUNTIME_VERSION_ATLEAST(ver, 1, 25)) {
+        err = MFXJoinSession(device_hwctx->session, s->session);
+        if (err != MFX_ERR_NONE)
+            return AVERROR_UNKNOWN;
+    }
+
     memset(&par, 0, sizeof(par));
 
+    s->deint_conf.Header.BufferId = MFX_EXTBUFF_VPP_DEINTERLACING;
+    s->deint_conf.Header.BufferSz = sizeof(s->deint_conf);
+    s->deint_conf.Mode = s->mode;
+
+    s->ext_buffers[s->num_ext_buffers++] = (mfxExtBuffer *)&s->deint_conf;
+
     if (opaque) {
         s->surface_ptrs = av_mallocz_array(hw_frames_hwctx->nb_surfaces,
                                            sizeof(*s->surface_ptrs));
@@ -230,10 +253,7 @@
         s->opaque_alloc.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION;
         s->opaque_alloc.Header.BufferSz = sizeof(s->opaque_alloc);
 
-        s->ext_buffers[0] = (mfxExtBuffer*)&s->opaque_alloc;
-
-        par.ExtParam    = s->ext_buffers;
-        par.NumExtParam = FF_ARRAY_ELEMS(s->ext_buffers);
+        s->ext_buffers[s->num_ext_buffers++] = (mfxExtBuffer *)&s->opaque_alloc;
 
         par.IOPattern = MFX_IOPATTERN_IN_OPAQUE_MEMORY | MFX_IOPATTERN_OUT_OPAQUE_MEMORY;
     } else {
@@ -261,6 +281,9 @@
         par.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY;
     }
 
+    par.ExtParam    = s->ext_buffers;
+    par.NumExtParam = s->num_ext_buffers;
+
     par.AsyncDepth = 1;    // TODO async
 
     par.vpp.In = hw_frames_hwctx->surfaces[0].Info;
@@ -529,6 +552,9 @@
 #define OFFSET(x) offsetof(QSVDeintContext, x)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 static const AVOption options[] = {
+    { "mode", "set deinterlace mode", OFFSET(mode),   AV_OPT_TYPE_INT, {.i64 = MFX_DEINTERLACING_ADVANCED}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, "mode"},
+    { "bob",   "bob algorithm",                  0, AV_OPT_TYPE_CONST,      {.i64 = MFX_DEINTERLACING_BOB}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, "mode"},
+    { "advanced", "Motion adaptive algorithm",   0, AV_OPT_TYPE_CONST, {.i64 = MFX_DEINTERLACING_ADVANCED}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, "mode"},
     { NULL },
 };
 

diff --git a/libavfilter/vf_deinterlace_vaapi.c b/libavfilter/vf_deinterlace_vaapi.c
index a38da5d..f7a262d 100644
--- a/libavfilter/vf_deinterlace_vaapi.c
+++ b/libavfilter/vf_deinterlace_vaapi.c

@@ -18,13 +18,8 @@
 
 #include <string.h>
 
-#include <va/va.h>
-#include <va/va_vpp.h>
-
 #include "libavutil/avassert.h"
 #include "libavutil/common.h"
-#include "libavutil/hwcontext.h"
-#include "libavutil/hwcontext_vaapi.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
@@ -33,31 +28,17 @@
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
+#include "vaapi_vpp.h"
 
 #define MAX_REFERENCES 8
 
 typedef struct DeintVAAPIContext {
-    const AVClass     *class;
-
-    AVVAAPIDeviceContext *hwctx;
-    AVBufferRef       *device_ref;
+    VAAPIVPPContext vpp_ctx; // must be the first field
 
     int                mode;
     int                field_rate;
     int                auto_enable;
 
-    int                valid_ids;
-    VAConfigID         va_config;
-    VAContextID        va_context;
-
-    AVBufferRef       *input_frames_ref;
-    AVHWFramesContext *input_frames;
-
-    AVBufferRef       *output_frames_ref;
-    AVHWFramesContext *output_frames;
-    int                output_height;
-    int                output_width;
-
     VAProcFilterCapDeinterlacing
                        deint_caps[VAProcDeinterlacingCount];
     int             nb_deint_caps;
@@ -67,8 +48,6 @@
     int                queue_count;
     AVFrame           *frame_queue[MAX_REFERENCES];
     int                extra_delay_for_timestamps;
-
-    VABufferID         filter_buffer;
 } DeintVAAPIContext;
 
 static const char *deint_vaapi_mode_name(int mode)
@@ -85,82 +64,29 @@
     }
 }
 
-static int deint_vaapi_query_formats(AVFilterContext *avctx)
+static void deint_vaapi_pipeline_uninit(AVFilterContext *avctx)
 {
-    enum AVPixelFormat pix_fmts[] = {
-        AV_PIX_FMT_VAAPI, AV_PIX_FMT_NONE,
-    };
-    int err;
-
-    if ((err = ff_formats_ref(ff_make_format_list(pix_fmts),
-                              &avctx->inputs[0]->out_formats)) < 0)
-        return err;
-    if ((err = ff_formats_ref(ff_make_format_list(pix_fmts),
-                              &avctx->outputs[0]->in_formats)) < 0)
-        return err;
-
-    return 0;
-}
-
-static int deint_vaapi_pipeline_uninit(AVFilterContext *avctx)
-{
-    DeintVAAPIContext *ctx = avctx->priv;
+    DeintVAAPIContext *ctx   = avctx->priv;
     int i;
 
     for (i = 0; i < ctx->queue_count; i++)
         av_frame_free(&ctx->frame_queue[i]);
     ctx->queue_count = 0;
 
-    if (ctx->filter_buffer != VA_INVALID_ID) {
-        vaDestroyBuffer(ctx->hwctx->display, ctx->filter_buffer);
-        ctx->filter_buffer = VA_INVALID_ID;
-    }
-
-    if (ctx->va_context != VA_INVALID_ID) {
-        vaDestroyContext(ctx->hwctx->display, ctx->va_context);
-        ctx->va_context = VA_INVALID_ID;
-    }
-
-    if (ctx->va_config != VA_INVALID_ID) {
-        vaDestroyConfig(ctx->hwctx->display, ctx->va_config);
-        ctx->va_config = VA_INVALID_ID;
-    }
-
-    av_buffer_unref(&ctx->device_ref);
-    ctx->hwctx = NULL;
-
-    return 0;
-}
-
-static int deint_vaapi_config_input(AVFilterLink *inlink)
-{
-    AVFilterContext   *avctx = inlink->dst;
-    DeintVAAPIContext *ctx = avctx->priv;
-
-    deint_vaapi_pipeline_uninit(avctx);
-
-    if (!inlink->hw_frames_ctx) {
-        av_log(avctx, AV_LOG_ERROR, "A hardware frames reference is "
-               "required to associate the processing device.\n");
-        return AVERROR(EINVAL);
-    }
-
-    ctx->input_frames_ref = av_buffer_ref(inlink->hw_frames_ctx);
-    ctx->input_frames = (AVHWFramesContext*)ctx->input_frames_ref->data;
-
-    return 0;
+    ff_vaapi_vpp_pipeline_uninit(avctx);
 }
 
 static int deint_vaapi_build_filter_params(AVFilterContext *avctx)
 {
-    DeintVAAPIContext *ctx = avctx->priv;
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+    DeintVAAPIContext *ctx   = avctx->priv;
     VAStatus vas;
     VAProcFilterParameterBufferDeinterlacing params;
     int i;
 
     ctx->nb_deint_caps = VAProcDeinterlacingCount;
-    vas = vaQueryVideoProcFilterCaps(ctx->hwctx->display,
-                                     ctx->va_context,
+    vas = vaQueryVideoProcFilterCaps(vpp_ctx->hwctx->display,
+                                     vpp_ctx->va_context,
                                      VAProcFilterDeinterlacing,
                                      &ctx->deint_caps,
                                      &ctx->nb_deint_caps);
@@ -194,20 +120,17 @@
     params.algorithm = ctx->mode;
     params.flags     = 0;
 
-    av_assert0(ctx->filter_buffer == VA_INVALID_ID);
-    vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
-                         VAProcFilterParameterBufferType,
-                         sizeof(params), 1, &params,
-                         &ctx->filter_buffer);
-    if (vas != VA_STATUS_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "Failed to create deinterlace "
-               "parameter buffer: %d (%s).\n", vas, vaErrorStr(vas));
-        return AVERROR(EIO);
-    }
+    vas = ff_vaapi_vpp_make_param_buffers(avctx,
+                                          VAProcFilterParameterBufferType,
+                                          &params,
+                                          sizeof(params),
+                                          1);
+    if (vas)
+        return vas;
 
-    vas = vaQueryVideoProcPipelineCaps(ctx->hwctx->display,
-                                       ctx->va_context,
-                                       &ctx->filter_buffer, 1,
+    vas = vaQueryVideoProcPipelineCaps(vpp_ctx->hwctx->display,
+                                       vpp_ctx->va_context,
+                                       &vpp_ctx->filter_buffers[0], 1,
                                        &ctx->pipeline_caps);
     if (vas != VA_STATUS_SUCCESS) {
         av_log(avctx, AV_LOG_ERROR, "Failed to query pipeline "
@@ -234,159 +157,35 @@
 
 static int deint_vaapi_config_output(AVFilterLink *outlink)
 {
-    AVFilterContext    *avctx = outlink->src;
-    AVFilterLink      *inlink = avctx->inputs[0];
-    DeintVAAPIContext    *ctx = avctx->priv;
-    AVVAAPIHWConfig *hwconfig = NULL;
-    AVHWFramesConstraints *constraints = NULL;
-    AVVAAPIFramesContext *va_frames;
-    VAStatus vas;
+    AVFilterLink *inlink = outlink->src->inputs[0];
+    AVFilterContext *avctx = outlink->src;
+    DeintVAAPIContext *ctx = avctx->priv;
     int err;
 
-    deint_vaapi_pipeline_uninit(avctx);
-
-    av_assert0(ctx->input_frames);
-    ctx->device_ref = av_buffer_ref(ctx->input_frames->device_ref);
-    ctx->hwctx = ((AVHWDeviceContext*)ctx->device_ref->data)->hwctx;
-
-    ctx->output_width  = ctx->input_frames->width;
-    ctx->output_height = ctx->input_frames->height;
-
-    av_assert0(ctx->va_config == VA_INVALID_ID);
-    vas = vaCreateConfig(ctx->hwctx->display, VAProfileNone,
-                         VAEntrypointVideoProc, 0, 0, &ctx->va_config);
-    if (vas != VA_STATUS_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "Failed to create processing pipeline "
-               "config: %d (%s).\n", vas, vaErrorStr(vas));
-        err = AVERROR(EIO);
-        goto fail;
-    }
-
-    hwconfig = av_hwdevice_hwconfig_alloc(ctx->device_ref);
-    if (!hwconfig) {
-        err = AVERROR(ENOMEM);
-        goto fail;
-    }
-    hwconfig->config_id = ctx->va_config;
-
-    constraints = av_hwdevice_get_hwframe_constraints(ctx->device_ref,
-                                                      hwconfig);
-    if (!constraints) {
-        err = AVERROR(ENOMEM);
-        goto fail;
-    }
-
-    if (ctx->output_width  < constraints->min_width  ||
-        ctx->output_height < constraints->min_height ||
-        ctx->output_width  > constraints->max_width  ||
-        ctx->output_height > constraints->max_height) {
-        av_log(avctx, AV_LOG_ERROR, "Hardware does not support "
-               "deinterlacing to size %dx%d "
-               "(constraints: width %d-%d height %d-%d).\n",
-               ctx->output_width, ctx->output_height,
-               constraints->min_width,  constraints->max_width,
-               constraints->min_height, constraints->max_height);
-        err = AVERROR(EINVAL);
-        goto fail;
-    }
-
-    ctx->output_frames_ref = av_hwframe_ctx_alloc(ctx->device_ref);
-    if (!ctx->output_frames_ref) {
-        av_log(avctx, AV_LOG_ERROR, "Failed to create HW frame context "
-               "for output.\n");
-        err = AVERROR(ENOMEM);
-        goto fail;
-    }
-
-    ctx->output_frames = (AVHWFramesContext*)ctx->output_frames_ref->data;
-
-    ctx->output_frames->format    = AV_PIX_FMT_VAAPI;
-    ctx->output_frames->sw_format = ctx->input_frames->sw_format;
-    ctx->output_frames->width     = ctx->output_width;
-    ctx->output_frames->height    = ctx->output_height;
-
-    // The number of output frames we need is determined by what follows
-    // the filter.  If it's an encoder with complex frame reference
-    // structures then this could be very high.
-    ctx->output_frames->initial_pool_size = 10;
-
-    err = av_hwframe_ctx_init(ctx->output_frames_ref);
-    if (err < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Failed to initialise VAAPI frame "
-               "context for output: %d\n", err);
-        goto fail;
-    }
-
-    va_frames = ctx->output_frames->hwctx;
-
-    av_assert0(ctx->va_context == VA_INVALID_ID);
-    vas = vaCreateContext(ctx->hwctx->display, ctx->va_config,
-                          ctx->output_width, ctx->output_height, 0,
-                          va_frames->surface_ids, va_frames->nb_surfaces,
-                          &ctx->va_context);
-    if (vas != VA_STATUS_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "Failed to create processing pipeline "
-               "context: %d (%s).\n", vas, vaErrorStr(vas));
-        err = AVERROR(EIO);
-        goto fail;
-    }
-
-    err = deint_vaapi_build_filter_params(avctx);
+    err = ff_vaapi_vpp_config_output(outlink);
     if (err < 0)
-        goto fail;
-
-    outlink->w = inlink->w;
-    outlink->h = inlink->h;
-
+        return err;
     outlink->time_base  = av_mul_q(inlink->time_base,
                                    (AVRational) { 1, ctx->field_rate });
     outlink->frame_rate = av_mul_q(inlink->frame_rate,
                                    (AVRational) { ctx->field_rate, 1 });
 
-    outlink->hw_frames_ctx = av_buffer_ref(ctx->output_frames_ref);
-    if (!outlink->hw_frames_ctx) {
-        err = AVERROR(ENOMEM);
-        goto fail;
-    }
-
-    av_freep(&hwconfig);
-    av_hwframe_constraints_free(&constraints);
     return 0;
-
-fail:
-    av_buffer_unref(&ctx->output_frames_ref);
-    av_freep(&hwconfig);
-    av_hwframe_constraints_free(&constraints);
-    return err;
-}
-
-static int vaapi_proc_colour_standard(enum AVColorSpace av_cs)
-{
-    switch(av_cs) {
-#define CS(av, va) case AVCOL_SPC_ ## av: return VAProcColorStandard ## va;
-        CS(BT709,     BT709);
-        CS(BT470BG,   BT470BG);
-        CS(SMPTE170M, SMPTE170M);
-        CS(SMPTE240M, SMPTE240M);
-#undef CS
-    default:
-        return VAProcColorStandardNone;
-    }
 }
 
 static int deint_vaapi_filter_frame(AVFilterLink *inlink, AVFrame *input_frame)
 {
     AVFilterContext   *avctx = inlink->dst;
     AVFilterLink    *outlink = avctx->outputs[0];
-    DeintVAAPIContext *ctx = avctx->priv;
-    AVFrame *output_frame = NULL;
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+    DeintVAAPIContext *ctx   = avctx->priv;
+    AVFrame *output_frame    = NULL;
     VASurfaceID input_surface, output_surface;
     VASurfaceID backward_references[MAX_REFERENCES];
     VASurfaceID forward_references[MAX_REFERENCES];
     VAProcPipelineParameterBuffer params;
     VAProcFilterParameterBufferDeinterlacing *filter_params;
     VARectangle input_region;
-    VABufferID params_id;
     VAStatus vas;
     void *filter_params_addr = NULL;
     int err, i, field, current_frame_index;
@@ -431,8 +230,8 @@
     av_log(avctx, AV_LOG_DEBUG, "\n");
 
     for (field = 0; field < ctx->field_rate; field++) {
-        output_frame = ff_get_video_buffer(outlink, ctx->output_width,
-                                           ctx->output_height);
+        output_frame = ff_get_video_buffer(outlink, vpp_ctx->output_width,
+                                           vpp_ctx->output_height);
         if (!output_frame) {
             err = AVERROR(ENOMEM);
             goto fail;
@@ -454,7 +253,7 @@
         params.surface = input_surface;
         params.surface_region = &input_region;
         params.surface_color_standard =
-            vaapi_proc_colour_standard(input_frame->colorspace);
+            ff_vaapi_vpp_colour_standard(input_frame->colorspace);
 
         params.output_region = NULL;
         params.output_background_color = 0xff000000;
@@ -464,7 +263,7 @@
         params.filter_flags   = VA_FRAME_PICTURE;
 
         if (!ctx->auto_enable || input_frame->interlaced_frame) {
-            vas = vaMapBuffer(ctx->hwctx->display, ctx->filter_buffer,
+            vas = vaMapBuffer(vpp_ctx->hwctx->display, vpp_ctx->filter_buffers[0],
                               &filter_params_addr);
             if (vas != VA_STATUS_SUCCESS) {
                 av_log(avctx, AV_LOG_ERROR, "Failed to map filter parameter "
@@ -481,12 +280,12 @@
                 filter_params->flags |= field ? 0 : VA_DEINTERLACING_BOTTOM_FIELD;
             }
             filter_params_addr = NULL;
-            vas = vaUnmapBuffer(ctx->hwctx->display, ctx->filter_buffer);
+            vas = vaUnmapBuffer(vpp_ctx->hwctx->display, vpp_ctx->filter_buffers[0]);
             if (vas != VA_STATUS_SUCCESS)
                 av_log(avctx, AV_LOG_ERROR, "Failed to unmap filter parameter "
                        "buffer: %d (%s).\n", vas, vaErrorStr(vas));
 
-            params.filters     = &ctx->filter_buffer;
+            params.filters     = &vpp_ctx->filter_buffers[0];
             params.num_filters = 1;
 
             params.forward_references = forward_references;
@@ -501,53 +300,9 @@
             params.num_filters = 0;
         }
 
-        vas = vaBeginPicture(ctx->hwctx->display,
-                             ctx->va_context, output_surface);
-        if (vas != VA_STATUS_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR, "Failed to attach new picture: "
-                   "%d (%s).\n", vas, vaErrorStr(vas));
-            err = AVERROR(EIO);
+        err = ff_vaapi_vpp_render_picture(avctx, &params, output_surface);
+        if (err < 0)
             goto fail;
-        }
-
-        vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
-                             VAProcPipelineParameterBufferType,
-                             sizeof(params), 1, &params, &params_id);
-        if (vas != VA_STATUS_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR, "Failed to create parameter buffer: "
-                   "%d (%s).\n", vas, vaErrorStr(vas));
-            err = AVERROR(EIO);
-            goto fail_after_begin;
-        }
-        av_log(avctx, AV_LOG_DEBUG, "Pipeline parameter buffer is %#x.\n",
-               params_id);
-
-        vas = vaRenderPicture(ctx->hwctx->display, ctx->va_context,
-                              &params_id, 1);
-        if (vas != VA_STATUS_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR, "Failed to render parameter buffer: "
-                   "%d (%s).\n", vas, vaErrorStr(vas));
-            err = AVERROR(EIO);
-            goto fail_after_begin;
-        }
-
-        vas = vaEndPicture(ctx->hwctx->display, ctx->va_context);
-        if (vas != VA_STATUS_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR, "Failed to start picture processing: "
-                   "%d (%s).\n", vas, vaErrorStr(vas));
-            err = AVERROR(EIO);
-            goto fail_after_render;
-        }
-
-        if (CONFIG_VAAPI_1 || ctx->hwctx->driver_quirks &
-            AV_VAAPI_DRIVER_QUIRK_RENDER_PARAM_BUFFERS) {
-            vas = vaDestroyBuffer(ctx->hwctx->display, params_id);
-            if (vas != VA_STATUS_SUCCESS) {
-                av_log(avctx, AV_LOG_ERROR, "Failed to free parameter buffer: "
-                       "%d (%s).\n", vas, vaErrorStr(vas));
-                // And ignore.
-            }
-        }
 
         err = av_frame_copy_props(output_frame, input_frame);
         if (err < 0)
@@ -573,41 +328,25 @@
 
     return err;
 
-fail_after_begin:
-    vaRenderPicture(ctx->hwctx->display, ctx->va_context, &params_id, 1);
-fail_after_render:
-    vaEndPicture(ctx->hwctx->display, ctx->va_context);
 fail:
     if (filter_params_addr)
-        vaUnmapBuffer(ctx->hwctx->display, ctx->filter_buffer);
+        vaUnmapBuffer(vpp_ctx->hwctx->display, vpp_ctx->filter_buffers[0]);
     av_frame_free(&output_frame);
     return err;
 }
 
 static av_cold int deint_vaapi_init(AVFilterContext *avctx)
 {
-    DeintVAAPIContext *ctx = avctx->priv;
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
 
-    ctx->va_config     = VA_INVALID_ID;
-    ctx->va_context    = VA_INVALID_ID;
-    ctx->filter_buffer = VA_INVALID_ID;
-    ctx->valid_ids = 1;
+    ff_vaapi_vpp_ctx_init(avctx);
+    vpp_ctx->pipeline_uninit     = deint_vaapi_pipeline_uninit;
+    vpp_ctx->build_filter_params = deint_vaapi_build_filter_params;
+    vpp_ctx->output_format       = AV_PIX_FMT_NONE;
 
     return 0;
 }
 
-static av_cold void deint_vaapi_uninit(AVFilterContext *avctx)
-{
-    DeintVAAPIContext *ctx = avctx->priv;
-
-    if (ctx->valid_ids)
-        deint_vaapi_pipeline_uninit(avctx);
-
-    av_buffer_unref(&ctx->input_frames_ref);
-    av_buffer_unref(&ctx->output_frames_ref);
-    av_buffer_unref(&ctx->device_ref);
-}
-
 #define OFFSET(x) offsetof(DeintVAAPIContext, x)
 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
 static const AVOption deint_vaapi_options[] = {
@@ -650,7 +389,7 @@
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
         .filter_frame = &deint_vaapi_filter_frame,
-        .config_props = &deint_vaapi_config_input,
+        .config_props = &ff_vaapi_vpp_config_input,
     },
     { NULL }
 };
@@ -669,8 +408,8 @@
     .description    = NULL_IF_CONFIG_SMALL("Deinterlacing of VAAPI surfaces"),
     .priv_size      = sizeof(DeintVAAPIContext),
     .init           = &deint_vaapi_init,
-    .uninit         = &deint_vaapi_uninit,
-    .query_formats  = &deint_vaapi_query_formats,
+    .uninit         = &ff_vaapi_vpp_ctx_uninit,
+    .query_formats  = &ff_vaapi_vpp_query_formats,
     .inputs         = deint_vaapi_inputs,
     .outputs        = deint_vaapi_outputs,
     .priv_class     = &deint_vaapi_class,

diff --git a/libavfilter/vf_deshake.c b/libavfilter/vf_deshake.c
index 64b48c6..c8480e7 100644
--- a/libavfilter/vf_deshake.c
+++ b/libavfilter/vf_deshake.c

@@ -60,7 +60,6 @@
 #include "libavutil/qsort.h"
 
 #include "deshake.h"
-#include "deshake_opencl.h"
 
 #define OFFSET(x) offsetof(DeshakeContext, x)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
@@ -83,7 +82,7 @@
         { "exhaustive", "exhaustive search",      0, AV_OPT_TYPE_CONST, {.i64=EXHAUSTIVE},       INT_MIN, INT_MAX, FLAGS, "smode" },
         { "less",       "less exhaustive search", 0, AV_OPT_TYPE_CONST, {.i64=SMART_EXHAUSTIVE}, INT_MIN, INT_MAX, FLAGS, "smode" },
     { "filename", "set motion search detailed log file name", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
-    { "opencl", "use OpenCL filtering capabilities", OFFSET(opencl), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, .flags = FLAGS },
+    { "opencl", "ignored",                              OFFSET(opencl), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, .flags = FLAGS },
     { NULL }
 };
 
@@ -197,7 +196,7 @@
     for (i = 0; i <= blocksize * 2; i++) {
         // We use a width of 16 here to match the sad function
         for (j = 0; j <= 15; j++) {
-            pos = (y - i) * stride + (x - j);
+            pos = (y + i) * stride + (x + j);
             if (src[pos] < lowest)
                 lowest = src[pos];
             else if (src[pos] > highest) {
@@ -341,13 +340,8 @@
 
 static av_cold int init(AVFilterContext *ctx)
 {
-    int ret;
     DeshakeContext *deshake = ctx->priv;
 
-    deshake->sad = av_pixelutils_get_sad_fn(4, 4, 1, deshake); // 16x16, 2nd source unaligned
-    if (!deshake->sad)
-        return AVERROR(EINVAL);
-
     deshake->refcount = 20; // XXX: add to options?
     deshake->blocksize /= 2;
     deshake->blocksize = av_clip(deshake->blocksize, 4, 128);
@@ -369,17 +363,7 @@
         deshake->cx &= ~15;
     }
     deshake->transform = deshake_transform_c;
-    if (!CONFIG_OPENCL && deshake->opencl) {
-        av_log(ctx, AV_LOG_ERROR, "OpenCL support was not enabled in this build, cannot be selected\n");
-        return AVERROR(EINVAL);
-    }
 
-    if (CONFIG_OPENCL && deshake->opencl) {
-        deshake->transform = ff_opencl_transform;
-        ret = ff_opencl_deshake_init(ctx);
-        if (ret < 0)
-            return ret;
-    }
     av_log(ctx, AV_LOG_VERBOSE, "cx: %d, cy: %d, cw: %d, ch: %d, rx: %d, ry: %d, edge: %d blocksize: %d contrast: %d search: %d\n",
            deshake->cx, deshake->cy, deshake->cw, deshake->ch,
            deshake->rx, deshake->ry, deshake->edge, deshake->blocksize * 2, deshake->contrast, deshake->search);
@@ -416,9 +400,6 @@
 static av_cold void uninit(AVFilterContext *ctx)
 {
     DeshakeContext *deshake = ctx->priv;
-    if (CONFIG_OPENCL && deshake->opencl) {
-        ff_opencl_deshake_uninit(ctx);
-    }
     av_frame_free(&deshake->ref);
     av_freep(&deshake->angles);
     deshake->angles_size = 0;
@@ -439,6 +420,7 @@
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
     const int chroma_width  = AV_CEIL_RSHIFT(link->w, desc->log2_chroma_w);
     const int chroma_height = AV_CEIL_RSHIFT(link->h, desc->log2_chroma_h);
+    int aligned;
 
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     if (!out) {
@@ -447,11 +429,10 @@
     }
     av_frame_copy_props(out, in);
 
-    if (CONFIG_OPENCL && deshake->opencl) {
-        ret = ff_opencl_deshake_process_inout_buf(link->dst,in, out);
-        if (ret < 0)
-            goto fail;
-    }
+    aligned = !((intptr_t)in->data[0] & 15 | in->linesize[0] & 15);
+    deshake->sad = av_pixelutils_get_sad_fn(4, 4, aligned, deshake); // 16x16, 2nd source unaligned
+    if (!deshake->sad)
+        return AVERROR(EINVAL);
 
     if (deshake->cx < 0 || deshake->cy < 0 || deshake->cw < 0 || deshake->ch < 0) {
         // Find the most likely global motion for the current frame

diff --git a/libavfilter/vf_drawbox.c b/libavfilter/vf_drawbox.c
index 88bb9ae..c9cb63d 100644
--- a/libavfilter/vf_drawbox.c
+++ b/libavfilter/vf_drawbox.c

@@ -47,7 +47,7 @@
     "h",              ///< height of the rendered box
     "w",              ///< width  of the rendered box
     "t",
-    "max",
+    "fill",
     NULL
 };
 
@@ -80,6 +80,7 @@
     char *w_expr, *h_expr; ///< expression for width and height
     char *t_expr;          ///< expression for thickness
     int have_alpha;
+    int replace;
 } DrawBoxContext;
 
 static const int NUM_EXPR_EVALS = 5;
@@ -213,7 +214,7 @@
     int plane, x, y, xb = s->x, yb = s->y;
     unsigned char *row[4];
 
-    if (s->have_alpha) {
+    if (s->have_alpha && s->replace) {
         for (y = FFMAX(yb, 0); y < frame->height && y < (yb + s->h); y++) {
             row[0] = frame->data[0] + y * frame->linesize[0];
             row[3] = frame->data[3] + y * frame->linesize[3];
@@ -286,6 +287,7 @@
     { "c",         "set color of the box",                         OFFSET(color_str), AV_OPT_TYPE_STRING, { .str = "black" }, CHAR_MIN, CHAR_MAX, FLAGS },
     { "thickness", "set the box thickness",                        OFFSET(t_expr),    AV_OPT_TYPE_STRING, { .str="3" },       CHAR_MIN, CHAR_MAX, FLAGS },
     { "t",         "set the box thickness",                        OFFSET(t_expr),    AV_OPT_TYPE_STRING, { .str="3" },       CHAR_MIN, CHAR_MAX, FLAGS },
+    { "replace",   "replace color & alpha",                        OFFSET(replace),   AV_OPT_TYPE_BOOL,   { .i64=0 },         0,        1,        FLAGS },
     { NULL }
 };
 
@@ -354,7 +356,7 @@
     int plane, x, y;
     uint8_t *row[4];
 
-    if (drawgrid->have_alpha) {
+    if (drawgrid->have_alpha && drawgrid->replace) {
         for (y = 0; y < frame->height; y++) {
             row[0] = frame->data[0] + y * frame->linesize[0];
             row[3] = frame->data[3] + y * frame->linesize[3];
@@ -418,6 +420,7 @@
     { "c",         "set color of the grid",   OFFSET(color_str), AV_OPT_TYPE_STRING, { .str = "black" }, CHAR_MIN, CHAR_MAX, FLAGS },
     { "thickness", "set grid line thickness", OFFSET(t_expr),    AV_OPT_TYPE_STRING, {.str="1"},         CHAR_MIN, CHAR_MAX, FLAGS },
     { "t",         "set grid line thickness", OFFSET(t_expr),    AV_OPT_TYPE_STRING, {.str="1"},         CHAR_MIN, CHAR_MAX, FLAGS },
+    { "replace",   "replace color & alpha",   OFFSET(replace),   AV_OPT_TYPE_BOOL,   { .i64=0 },         0,        1,        FLAGS },
     { NULL }
 };
 

diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c
index f615144..cca2cbc 100644
--- a/libavfilter/vf_drawtext.c
+++ b/libavfilter/vf_drawtext.c

@@ -238,7 +238,7 @@
     {"rate",            "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},           0,  INT_MAX, FLAGS},
     {"reload",     "reload text file for each frame",                       OFFSET(reload),     AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
     { "alpha",       "apply alpha while rendering", OFFSET(a_expr),      AV_OPT_TYPE_STRING, { .str = "1"     },          .flags = FLAGS },
-    {"fix_bounds", "check and fix text coords to avoid clipping", OFFSET(fix_bounds), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS},
+    {"fix_bounds", "check and fix text coords to avoid clipping", OFFSET(fix_bounds), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
     {"start_number", "start frame number for n/frame_num variable", OFFSET(start_number), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS},
 
 #if CONFIG_LIBFRIBIDI
@@ -916,6 +916,14 @@
                 sign = '-';
                 ms = -ms;
             }
+            if (argc >= 3) {
+                if (!strcmp(argv[2], "24HH")) {
+                    ms %= 24 * 60 * 60 * 1000;
+                } else {
+                    av_log(ctx, AV_LOG_ERROR, "Invalid argument '%s'\n", argv[2]);
+                    return AVERROR(EINVAL);
+                }
+            }
             av_bprintf(bp, "%c%02d:%02d:%02d.%03d", sign,
                        (int)(ms / (60 * 60 * 1000)),
                        (int)(ms / (60 * 1000)) % 60,
@@ -1390,6 +1398,7 @@
 
     s->x = s->var_values[VAR_X] = av_expr_eval(s->x_pexpr, s->var_values, &s->prng);
     s->y = s->var_values[VAR_Y] = av_expr_eval(s->y_pexpr, s->var_values, &s->prng);
+    /* It is necessary if x is expressed from y  */
     s->x = s->var_values[VAR_X] = av_expr_eval(s->x_pexpr, s->var_values, &s->prng);
 
     update_alpha(s);
@@ -1398,8 +1407,34 @@
     update_color_with_alpha(s, &bordercolor, s->bordercolor);
     update_color_with_alpha(s, &boxcolor   , s->boxcolor   );
 
-    box_w = FFMIN(width - 1 , max_text_line_w);
-    box_h = FFMIN(height - 1, y + s->max_glyph_h);
+    box_w = max_text_line_w;
+    box_h = y + s->max_glyph_h;
+
+    if (s->fix_bounds) {
+
+        /* calculate footprint of text effects */
+        int boxoffset     = s->draw_box ? FFMAX(s->boxborderw, 0) : 0;
+        int borderoffset  = s->borderw  ? FFMAX(s->borderw, 0) : 0;
+
+        int offsetleft = FFMAX3(boxoffset, borderoffset,
+                                (s->shadowx < 0 ? FFABS(s->shadowx) : 0));
+        int offsettop = FFMAX3(boxoffset, borderoffset,
+                                (s->shadowy < 0 ? FFABS(s->shadowy) : 0));
+
+        int offsetright = FFMAX3(boxoffset, borderoffset,
+                                 (s->shadowx > 0 ? s->shadowx : 0));
+        int offsetbottom = FFMAX3(boxoffset, borderoffset,
+                                  (s->shadowy > 0 ? s->shadowy : 0));
+
+
+        if (s->x - offsetleft < 0) s->x = offsetleft;
+        if (s->y - offsettop < 0)  s->y = offsettop;
+
+        if (s->x + box_w + offsetright > width)
+            s->x = FFMAX(width - box_w - offsetright, 0);
+        if (s->y + box_h + offsetbottom > height)
+            s->y = FFMAX(height - box_h - offsetbottom, 0);
+    }
 
     /* draw box */
     if (s->draw_box)

diff --git a/libavfilter/vf_edgedetect.c b/libavfilter/vf_edgedetect.c
index 173f9fe..a0ddcbb 100644
--- a/libavfilter/vf_edgedetect.c
+++ b/libavfilter/vf_edgedetect.c

@@ -26,15 +26,25 @@
  */
 
 #include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
 #include "avfilter.h"
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
 
+#define PLANE_R 0x4
+#define PLANE_G 0x1
+#define PLANE_B 0x2
+#define PLANE_Y 0x1
+#define PLANE_U 0x2
+#define PLANE_V 0x4
+#define PLANE_A 0x8
+
 enum FilterMode {
     MODE_WIRES,
     MODE_COLORMIX,
+    MODE_CANNY,
     NB_MODE
 };
 
@@ -42,11 +52,13 @@
     uint8_t  *tmpbuf;
     uint16_t *gradients;
     char     *directions;
+    int      width, height;
 };
 
 typedef struct EdgeDetectContext {
     const AVClass *class;
     struct plane_info planes[3];
+    int filter_planes;
     int nb_planes;
     double   low, high;
     uint8_t  low_u8, high_u8;
@@ -61,6 +73,14 @@
     { "mode", "set mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=MODE_WIRES}, 0, NB_MODE-1, FLAGS, "mode" },
         { "wires",    "white/gray wires on black",  0, AV_OPT_TYPE_CONST, {.i64=MODE_WIRES},    INT_MIN, INT_MAX, FLAGS, "mode" },
         { "colormix", "mix colors",                 0, AV_OPT_TYPE_CONST, {.i64=MODE_COLORMIX}, INT_MIN, INT_MAX, FLAGS, "mode" },
+        { "canny",    "detect edges on planes",     0, AV_OPT_TYPE_CONST, {.i64=MODE_CANNY},    INT_MIN, INT_MAX, FLAGS, "mode" },
+    { "planes", "set planes to filter",  OFFSET(filter_planes), AV_OPT_TYPE_FLAGS, {.i64=7}, 1, 0x7, FLAGS, "flags" },
+        { "y", "filter luma plane",  0, AV_OPT_TYPE_CONST, {.i64=PLANE_Y}, 0, 0, FLAGS, "flags" },
+        { "u", "filter u plane",     0, AV_OPT_TYPE_CONST, {.i64=PLANE_U}, 0, 0, FLAGS, "flags" },
+        { "v", "filter v plane",     0, AV_OPT_TYPE_CONST, {.i64=PLANE_V}, 0, 0, FLAGS, "flags" },
+        { "r", "filter red plane",   0, AV_OPT_TYPE_CONST, {.i64=PLANE_R}, 0, 0, FLAGS, "flags" },
+        { "g", "filter green plane", 0, AV_OPT_TYPE_CONST, {.i64=PLANE_G}, 0, 0, FLAGS, "flags" },
+        { "b", "filter blue plane",  0, AV_OPT_TYPE_CONST, {.i64=PLANE_B}, 0, 0, FLAGS, "flags" },
     { NULL }
 };
 
@@ -79,6 +99,7 @@
 {
     const EdgeDetectContext *edgedetect = ctx->priv;
     static const enum AVPixelFormat wires_pix_fmts[] = {AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE};
+    static const enum AVPixelFormat canny_pix_fmts[] = {AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE};
     static const enum AVPixelFormat colormix_pix_fmts[] = {AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE};
     AVFilterFormats *fmts_list;
     const enum AVPixelFormat *pix_fmts = NULL;
@@ -87,6 +108,8 @@
         pix_fmts = wires_pix_fmts;
     } else if (edgedetect->mode == MODE_COLORMIX) {
         pix_fmts = colormix_pix_fmts;
+    } else if (edgedetect->mode == MODE_CANNY) {
+        pix_fmts = canny_pix_fmts;
     } else {
         av_assert0(0);
     }
@@ -101,14 +124,19 @@
     int p;
     AVFilterContext *ctx = inlink->dst;
     EdgeDetectContext *edgedetect = ctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
 
     edgedetect->nb_planes = inlink->format == AV_PIX_FMT_GRAY8 ? 1 : 3;
     for (p = 0; p < edgedetect->nb_planes; p++) {
         struct plane_info *plane = &edgedetect->planes[p];
+        int vsub = p ? desc->log2_chroma_h : 0;
+        int hsub = p ? desc->log2_chroma_w : 0;
 
-        plane->tmpbuf     = av_malloc(inlink->w * inlink->h);
-        plane->gradients  = av_calloc(inlink->w * inlink->h, sizeof(*plane->gradients));
-        plane->directions = av_malloc(inlink->w * inlink->h);
+        plane->width      = AV_CEIL_RSHIFT(inlink->w, hsub);
+        plane->height     = AV_CEIL_RSHIFT(inlink->h, vsub);
+        plane->tmpbuf     = av_malloc(plane->width * plane->height);
+        plane->gradients  = av_calloc(plane->width * plane->height, sizeof(*plane->gradients));
+        plane->directions = av_malloc(plane->width * plane->height);
         if (!plane->tmpbuf || !plane->gradients || !plane->directions)
             return AVERROR(ENOMEM);
     }
@@ -316,34 +344,44 @@
         uint8_t  *tmpbuf     = plane->tmpbuf;
         uint16_t *gradients  = plane->gradients;
         int8_t   *directions = plane->directions;
+        const int width      = plane->width;
+        const int height     = plane->height;
+
+        if (!((1 << p) & edgedetect->filter_planes)) {
+            if (!direct)
+                av_image_copy_plane(out->data[p], out->linesize[p],
+                                    in->data[p], in->linesize[p],
+                                    width, height);
+            continue;
+        }
 
         /* gaussian filter to reduce noise  */
-        gaussian_blur(ctx, inlink->w, inlink->h,
-                      tmpbuf,      inlink->w,
+        gaussian_blur(ctx, width, height,
+                      tmpbuf,      width,
                       in->data[p], in->linesize[p]);
 
         /* compute the 16-bits gradients and directions for the next step */
-        sobel(inlink->w, inlink->h,
-              gradients, inlink->w,
-              directions,inlink->w,
-              tmpbuf,    inlink->w);
+        sobel(width, height,
+              gradients, width,
+              directions,width,
+              tmpbuf,    width);
 
         /* non_maximum_suppression() will actually keep & clip what's necessary and
          * ignore the rest, so we need a clean output buffer */
-        memset(tmpbuf, 0, inlink->w * inlink->h);
-        non_maximum_suppression(inlink->w, inlink->h,
-                                tmpbuf,    inlink->w,
-                                directions,inlink->w,
-                                gradients, inlink->w);
+        memset(tmpbuf, 0, width * height);
+        non_maximum_suppression(width, height,
+                                tmpbuf,    width,
+                                directions,width,
+                                gradients, width);
 
         /* keep high values, or low values surrounded by high values */
         double_threshold(edgedetect->low_u8, edgedetect->high_u8,
-                         inlink->w, inlink->h,
+                         width, height,
                          out->data[p], out->linesize[p],
-                         tmpbuf,       inlink->w);
+                         tmpbuf,       width);
 
         if (edgedetect->mode == MODE_COLORMIX) {
-            color_mix(inlink->w, inlink->h,
+            color_mix(width, height,
                       out->data[p], out->linesize[p],
                       in->data[p], in->linesize[p]);
         }

diff --git a/libavfilter/vf_entropy.c b/libavfilter/vf_entropy.c
new file mode 100644
index 0000000..c7361c8
--- /dev/null
+++ b/libavfilter/vf_entropy.c

@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "drawutils.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct EntropyContext {
+    const AVClass *class;
+
+    int mode;
+
+    int nb_planes;
+    int planeheight[4];
+    int planewidth[4];
+    int depth;
+    int is_rgb;
+    uint8_t rgba_map[4];
+    char planenames[4];
+    int64_t *histogram;
+} EntropyContext;
+
+#define OFFSET(x) offsetof(EntropyContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+static const AVOption entropy_options[] = {
+    { "mode", "set kind of histogram entropy measurement",  OFFSET(mode), AV_OPT_TYPE_INT,   {.i64=0}, 0, 1, FLAGS, "mode" },
+    { "normal", NULL,                                       0,            AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "mode" },
+    { "diff",   NULL,                                       0,            AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "mode" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(entropy);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pixfmts[] = {
+        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_YUV440P,
+        AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ411P,
+        AV_PIX_FMT_YUVJ440P,
+        AV_PIX_FMT_YUV444P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV420P9,
+        AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV420P10,
+        AV_PIX_FMT_YUV440P10,
+        AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV420P12,
+        AV_PIX_FMT_YUV440P12,
+        AV_PIX_FMT_YUV444P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV420P14,
+        AV_PIX_FMT_YUV444P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV420P16,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *formats = ff_make_format_list(pixfmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, formats);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    AVFilterContext *ctx = inlink->dst;
+    EntropyContext *s = ctx->priv;
+
+    s->nb_planes = desc->nb_components;
+
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+    s->planewidth[1]  = s->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->planewidth[0]  = s->planewidth[3]  = inlink->w;
+
+    s->depth = desc->comp[0].depth;
+    s->is_rgb = ff_fill_rgba_map(s->rgba_map, inlink->format) >= 0;
+
+    s->planenames[0] = s->is_rgb ? 'R' : 'Y';
+    s->planenames[1] = s->is_rgb ? 'G' : 'U';
+    s->planenames[2] = s->is_rgb ? 'B' : 'V';
+    s->planenames[3] = 'A';
+
+    s->histogram = av_malloc_array(1 << s->depth, sizeof(*s->histogram));
+    if (!s->histogram)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    EntropyContext *s = ctx->priv;
+    int plane, y, x;
+
+    for (plane = 0; plane < s->nb_planes; plane++) {
+        int cidx = s->is_rgb ? s->rgba_map[plane] : plane;
+        const uint8_t *src8 = in->data[plane];
+        const uint16_t *src16 = (const uint16_t *)in->data[plane];
+        float total = s->planewidth[plane] * s->planeheight[plane];
+        float entropy = 0;
+        char metabuf[128];
+        char key[128];
+
+        memset(s->histogram, 0, (1 << s->depth) * sizeof(*s->histogram));
+
+        if (s->depth <= 8) {
+            for (y = 0; y < s->planeheight[plane]; y++) {
+                for (x = 0; x < s->planewidth[plane]; x++) {
+                    s->histogram[src8[x]]++;
+                }
+
+                src8 += in->linesize[plane];
+            }
+        } else {
+            for (y = 0; y < s->planeheight[plane]; y++) {
+                for (x = 0; x < s->planewidth[plane]; x++) {
+                    s->histogram[src16[x]]++;
+                }
+
+                src16 += in->linesize[plane] / 2;
+            }
+        }
+
+        for (y = 0; y < 1 << s->depth; y++) {
+            if (s->mode == 0) {
+                if (s->histogram[y]) {
+                    float p = s->histogram[y] / total;
+                    entropy += -log2(p) * p;
+                }
+            } else if (s->mode == 1) {
+                if (y && (s->histogram[y] - s->histogram[y - 1]) != 0) {
+                    float p = FFABS(s->histogram[y] - s->histogram[y - 1]) / total;
+                    entropy += -log2(p) * p;
+                }
+            }
+        }
+
+        snprintf(key, sizeof(key), "lavfi.entropy.entropy.%s.%c", s->mode ? "diff" : "normal", s->planenames[cidx]);
+        snprintf(metabuf, sizeof(metabuf), "%f", entropy);
+        av_dict_set(&in->metadata, key, metabuf, 0);
+        snprintf(key, sizeof(key), "lavfi.entropy.normalized_entropy.%s.%c", s->mode ? "diff" : "normal", s->planenames[cidx]);
+        snprintf(metabuf, sizeof(metabuf), "%f", entropy / log2(1 << s->depth));
+        av_dict_set(&in->metadata, key, metabuf, 0);
+    }
+
+    return ff_filter_frame(outlink, in);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    EntropyContext *s = ctx->priv;
+
+    av_freep(&s->histogram);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_VIDEO,
+        .filter_frame   = filter_frame,
+        .config_props   = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_entropy = {
+    .name           = "entropy",
+    .description    = NULL_IF_CONFIG_SMALL("Measure video frames entropy."),
+    .priv_size      = sizeof(EntropyContext),
+    .uninit         = uninit,
+    .query_formats  = query_formats,
+    .inputs         = inputs,
+    .outputs        = outputs,
+    .priv_class     = &entropy_class,
+    .flags          = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+};

diff --git a/libavfilter/vf_extractplanes.c b/libavfilter/vf_extractplanes.c
index fc676a2..c1c8e69 100644
--- a/libavfilter/vf_extractplanes.c
+++ b/libavfilter/vf_extractplanes.c

@@ -64,98 +64,67 @@
 
 AVFILTER_DEFINE_CLASS(extractplanes);
 
+#define EIGHTBIT_FORMATS                           \
+        AV_PIX_FMT_YUV410P,                        \
+        AV_PIX_FMT_YUV411P,                        \
+        AV_PIX_FMT_YUV440P,                        \
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P,   \
+        AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA422P,   \
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,  \
+        AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,  \
+        AV_PIX_FMT_YUVJ411P,                       \
+        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVA444P,   \
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY8A,       \
+        AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,        \
+        AV_PIX_FMT_RGBA, AV_PIX_FMT_BGRA,          \
+        AV_PIX_FMT_ARGB, AV_PIX_FMT_ABGR,          \
+        AV_PIX_FMT_RGB0, AV_PIX_FMT_BGR0,          \
+        AV_PIX_FMT_0RGB, AV_PIX_FMT_0BGR,          \
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP
+
+#define HIGHDEPTH_FORMATS(suf)                                 \
+        AV_PIX_FMT_YA16##suf, AV_PIX_FMT_GRAY16##suf,          \
+        AV_PIX_FMT_YUV420P16##suf, AV_PIX_FMT_YUVA420P16##suf, \
+        AV_PIX_FMT_YUV422P16##suf, AV_PIX_FMT_YUVA422P16##suf, \
+        AV_PIX_FMT_YUV444P16##suf, AV_PIX_FMT_YUVA444P16##suf, \
+        AV_PIX_FMT_RGB48##suf, AV_PIX_FMT_BGR48##suf,          \
+        AV_PIX_FMT_RGBA64##suf, AV_PIX_FMT_BGRA64##suf,        \
+        AV_PIX_FMT_GBRP16##suf, AV_PIX_FMT_GBRAP16##suf,       \
+        AV_PIX_FMT_YUV420P10##suf,                             \
+        AV_PIX_FMT_YUV422P10##suf,                             \
+        AV_PIX_FMT_YUV444P10##suf,                             \
+        AV_PIX_FMT_YUV440P10##suf,                             \
+        AV_PIX_FMT_YUVA420P10##suf,                            \
+        AV_PIX_FMT_YUVA422P10##suf,                            \
+        AV_PIX_FMT_YUVA444P10##suf,                            \
+        AV_PIX_FMT_YUV420P12##suf,                             \
+        AV_PIX_FMT_YUV422P12##suf,                             \
+        AV_PIX_FMT_YUV444P12##suf,                             \
+        AV_PIX_FMT_YUV440P12##suf,                             \
+        AV_PIX_FMT_GBRP10##suf, AV_PIX_FMT_GBRAP10##suf,       \
+        AV_PIX_FMT_GBRP12##suf, AV_PIX_FMT_GBRAP12##suf,       \
+        AV_PIX_FMT_YUV420P9##suf,                              \
+        AV_PIX_FMT_YUV422P9##suf,                              \
+        AV_PIX_FMT_YUV444P9##suf,                              \
+        AV_PIX_FMT_YUVA420P9##suf,                             \
+        AV_PIX_FMT_YUVA422P9##suf,                             \
+        AV_PIX_FMT_YUVA444P9##suf,                             \
+        AV_PIX_FMT_GBRP9##suf,                                 \
+        AV_PIX_FMT_GBRP14##suf,                                \
+        AV_PIX_FMT_YUV420P14##suf,                             \
+        AV_PIX_FMT_YUV422P14##suf,                             \
+        AV_PIX_FMT_YUV444P14##suf
+
 static int query_formats(AVFilterContext *ctx)
 {
     static const enum AVPixelFormat in_pixfmts_le[] = {
-        AV_PIX_FMT_YUV410P,
-        AV_PIX_FMT_YUV411P,
-        AV_PIX_FMT_YUV440P,
-        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P,
-        AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA422P,
-        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
-        AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
-        AV_PIX_FMT_YUVJ411P,
-        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVA444P,
-        AV_PIX_FMT_YUV420P16LE, AV_PIX_FMT_YUVA420P16LE,
-        AV_PIX_FMT_YUV422P16LE, AV_PIX_FMT_YUVA422P16LE,
-        AV_PIX_FMT_YUV444P16LE, AV_PIX_FMT_YUVA444P16LE,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY8A,
-        AV_PIX_FMT_YA16LE, AV_PIX_FMT_GRAY16LE,
-        AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
-        AV_PIX_FMT_RGBA, AV_PIX_FMT_BGRA,
-        AV_PIX_FMT_ARGB, AV_PIX_FMT_ABGR,
-        AV_PIX_FMT_RGB0, AV_PIX_FMT_BGR0,
-        AV_PIX_FMT_0RGB, AV_PIX_FMT_0BGR,
-        AV_PIX_FMT_RGB48LE, AV_PIX_FMT_BGR48LE,
-        AV_PIX_FMT_RGBA64LE, AV_PIX_FMT_BGRA64LE,
-        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
-        AV_PIX_FMT_GBRP16LE, AV_PIX_FMT_GBRAP16LE,
-        AV_PIX_FMT_YUV420P10LE,
-        AV_PIX_FMT_YUV422P10LE,
-        AV_PIX_FMT_YUV444P10LE,
-        AV_PIX_FMT_YUV440P10LE,
-        AV_PIX_FMT_YUVA420P10LE,
-        AV_PIX_FMT_YUVA422P10LE,
-        AV_PIX_FMT_YUVA444P10LE,
-        AV_PIX_FMT_YUV420P12LE,
-        AV_PIX_FMT_YUV422P12LE,
-        AV_PIX_FMT_YUV444P12LE,
-        AV_PIX_FMT_YUV440P12LE,
-        AV_PIX_FMT_GBRP10LE, AV_PIX_FMT_GBRAP10LE,
-        AV_PIX_FMT_GBRP12LE, AV_PIX_FMT_GBRAP12LE,
-        AV_PIX_FMT_YUV420P9LE,
-        AV_PIX_FMT_YUV422P9LE,
-        AV_PIX_FMT_YUV444P9LE,
-        AV_PIX_FMT_YUVA420P9LE,
-        AV_PIX_FMT_YUVA422P9LE,
-        AV_PIX_FMT_YUVA444P9LE,
-        AV_PIX_FMT_GBRP9LE,
+        EIGHTBIT_FORMATS,
+        HIGHDEPTH_FORMATS(LE),
         AV_PIX_FMT_NONE,
     };
     static const enum AVPixelFormat in_pixfmts_be[] = {
-        AV_PIX_FMT_YUV410P,
-        AV_PIX_FMT_YUV411P,
-        AV_PIX_FMT_YUV440P,
-        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P,
-        AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA422P,
-        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
-        AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
-        AV_PIX_FMT_YUVJ411P,
-        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVA444P,
-        AV_PIX_FMT_YUV420P16BE, AV_PIX_FMT_YUVA420P16BE,
-        AV_PIX_FMT_YUV422P16BE, AV_PIX_FMT_YUVA422P16BE,
-        AV_PIX_FMT_YUV444P16BE, AV_PIX_FMT_YUVA444P16BE,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY8A,
-        AV_PIX_FMT_YA16BE, AV_PIX_FMT_GRAY16BE,
-        AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
-        AV_PIX_FMT_RGBA, AV_PIX_FMT_BGRA,
-        AV_PIX_FMT_ARGB, AV_PIX_FMT_ABGR,
-        AV_PIX_FMT_RGB0, AV_PIX_FMT_BGR0,
-        AV_PIX_FMT_0RGB, AV_PIX_FMT_0BGR,
-        AV_PIX_FMT_RGB48BE, AV_PIX_FMT_BGR48BE,
-        AV_PIX_FMT_RGBA64BE, AV_PIX_FMT_BGRA64BE,
-        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
-        AV_PIX_FMT_GBRP16BE, AV_PIX_FMT_GBRAP16BE,
-        AV_PIX_FMT_YUV420P10BE,
-        AV_PIX_FMT_YUV422P10BE,
-        AV_PIX_FMT_YUV444P10BE,
-        AV_PIX_FMT_YUV440P10BE,
-        AV_PIX_FMT_YUVA420P10BE,
-        AV_PIX_FMT_YUVA422P10BE,
-        AV_PIX_FMT_YUVA444P10BE,
-        AV_PIX_FMT_YUV420P12BE,
-        AV_PIX_FMT_YUV422P12BE,
-        AV_PIX_FMT_YUV444P12BE,
-        AV_PIX_FMT_YUV440P12BE,
-        AV_PIX_FMT_GBRP10BE, AV_PIX_FMT_GBRAP10BE,
-        AV_PIX_FMT_GBRP12BE, AV_PIX_FMT_GBRAP12BE,
-        AV_PIX_FMT_YUV420P9BE,
-        AV_PIX_FMT_YUV422P9BE,
-        AV_PIX_FMT_YUV444P9BE,
-        AV_PIX_FMT_YUVA420P9BE,
-        AV_PIX_FMT_YUVA422P9BE,
-        AV_PIX_FMT_YUVA444P9BE,
-        AV_PIX_FMT_GBRP9BE,
+        EIGHTBIT_FORMATS,
+        HIGHDEPTH_FORMATS(BE),
         AV_PIX_FMT_NONE,
     };
     static const enum AVPixelFormat out8_pixfmts[] = { AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE };
@@ -165,6 +134,8 @@
     static const enum AVPixelFormat out10be_pixfmts[] = { AV_PIX_FMT_GRAY10BE, AV_PIX_FMT_NONE };
     static const enum AVPixelFormat out12le_pixfmts[] = { AV_PIX_FMT_GRAY12LE, AV_PIX_FMT_NONE };
     static const enum AVPixelFormat out12be_pixfmts[] = { AV_PIX_FMT_GRAY12BE, AV_PIX_FMT_NONE };
+    static const enum AVPixelFormat out14le_pixfmts[] = { AV_PIX_FMT_GRAY14LE, AV_PIX_FMT_NONE };
+    static const enum AVPixelFormat out14be_pixfmts[] = { AV_PIX_FMT_GRAY14BE, AV_PIX_FMT_NONE };
     static const enum AVPixelFormat out16le_pixfmts[] = { AV_PIX_FMT_GRAY16LE, AV_PIX_FMT_NONE };
     static const enum AVPixelFormat out16be_pixfmts[] = { AV_PIX_FMT_GRAY16BE, AV_PIX_FMT_NONE };
     const enum AVPixelFormat *out_pixfmts, *in_pixfmts;
@@ -212,6 +183,10 @@
         out_pixfmts = out12le_pixfmts;
     else if (be && depth == 12)
         out_pixfmts = out12be_pixfmts;
+    else if (!be && depth == 14)
+        out_pixfmts = out14le_pixfmts;
+    else if (be && depth == 14)
+        out_pixfmts = out14be_pixfmts;
     else if (be)
         out_pixfmts = out16be_pixfmts;
     else

diff --git a/libavfilter/vf_fftdnoiz.c b/libavfilter/vf_fftdnoiz.c
new file mode 100644
index 0000000..7ee7dbc
--- /dev/null
+++ b/libavfilter/vf_fftdnoiz.c

@@ -0,0 +1,694 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <float.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "internal.h"
+#include "libavcodec/avfft.h"
+
+enum BufferTypes {
+    CURRENT,
+    PREV,
+    NEXT,
+    BSIZE
+};
+
+typedef struct PlaneContext {
+    int planewidth, planeheight;
+    int nox, noy;
+    int b;
+    int o;
+    float n;
+
+    float *buffer[BSIZE];
+    FFTComplex *hdata, *vdata;
+    int data_linesize;
+    int buffer_linesize;
+
+    FFTContext *fft, *ifft;
+} PlaneContext;
+
+typedef struct FFTdnoizContext {
+    const AVClass *class;
+
+    float sigma;
+    float amount;
+    int   block_bits;
+    float overlap;
+    int   nb_prev;
+    int   nb_next;
+    int   planesf;
+
+    AVFrame *prev, *cur, *next;
+
+    int depth;
+    int nb_planes;
+    PlaneContext planes[4];
+
+    void (*import_row)(FFTComplex *dst, uint8_t *src, int rw);
+    void (*export_row)(FFTComplex *src, uint8_t *dst, int rw, float scale, int depth);
+} FFTdnoizContext;
+
+#define OFFSET(x) offsetof(FFTdnoizContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+static const AVOption fftdnoiz_options[] = {
+    { "sigma",   "set denoise strength",
+        OFFSET(sigma),      AV_OPT_TYPE_FLOAT, {.dbl=1},        0,  30, .flags = FLAGS },
+    { "amount",  "set amount of denoising",
+        OFFSET(amount),     AV_OPT_TYPE_FLOAT, {.dbl=1},     0.01,   1, .flags = FLAGS },
+    { "block",   "set block log2(size)",
+        OFFSET(block_bits), AV_OPT_TYPE_INT,   {.i64=4},        3,   6, .flags = FLAGS },
+    { "overlap", "set block overlap",
+        OFFSET(overlap),    AV_OPT_TYPE_FLOAT, {.dbl=0.5},    0.2, 0.8, .flags = FLAGS },
+    { "prev",    "set number of previous frames for temporal denoising",
+        OFFSET(nb_prev),    AV_OPT_TYPE_INT,   {.i64=0},        0,   1, .flags = FLAGS },
+    { "next",    "set number of next frames for temporal denoising",
+        OFFSET(nb_next),    AV_OPT_TYPE_INT,   {.i64=0},        0,   1, .flags = FLAGS },
+    { "planes",  "set planes to filter",
+        OFFSET(planesf),    AV_OPT_TYPE_INT,   {.i64=7},        0,  15, .flags = FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(fftdnoiz);
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    FFTdnoizContext *s = ctx->priv;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        PlaneContext *p = &s->planes[i];
+
+        p->fft  = av_fft_init(s->block_bits, 0);
+        p->ifft = av_fft_init(s->block_bits, 1);
+        if (!p->fft || !p->ifft)
+            return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9,
+        AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12,
+        AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
+        AV_PIX_FMT_YUVJ411P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUV440P10,
+        AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV420P12,
+        AV_PIX_FMT_YUV440P12,
+        AV_PIX_FMT_YUV444P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV420P14,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+        AV_PIX_FMT_NONE
+    };
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+typedef struct ThreadData {
+    float *src, *dst;
+} ThreadData;
+
+static void import_row8(FFTComplex *dst, uint8_t *src, int rw)
+{
+    int j;
+
+    for (j = 0; j < rw; j++) {
+        dst[j].re = src[j];
+        dst[j].im = 0;
+    }
+}
+
+static void export_row8(FFTComplex *src, uint8_t *dst, int rw, float scale, int depth)
+{
+    int j;
+
+    for (j = 0; j < rw; j++)
+        dst[j] = av_clip_uint8(src[j].re * scale);
+}
+
+static void import_row16(FFTComplex *dst, uint8_t *srcp, int rw)
+{
+    uint16_t *src = (uint16_t *)srcp;
+    int j;
+
+    for (j = 0; j < rw; j++) {
+        dst[j].re = src[j];
+        dst[j].im = 0;
+    }
+}
+
+static void export_row16(FFTComplex *src, uint8_t *dstp, int rw, float scale, int depth)
+{
+    uint16_t *dst = (uint16_t *)dstp;
+    int j;
+
+    for (j = 0; j < rw; j++)
+        dst[j] = av_clip_uintp2_c(src[j].re * scale, depth);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    const AVPixFmtDescriptor *desc;
+    FFTdnoizContext *s = ctx->priv;
+    int i;
+
+    desc = av_pix_fmt_desc_get(inlink->format);
+    s->depth = desc->comp[0].depth;
+
+    if (s->depth <= 8) {
+        s->import_row = import_row8;
+        s->export_row = export_row8;
+    } else {
+        s->import_row = import_row16;
+        s->export_row = export_row16;
+        s->sigma *= 1 << (s->depth - 8) * (1 + s->nb_prev + s->nb_next);
+    }
+
+    s->planes[1].planewidth = s->planes[2].planewidth = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->planes[0].planewidth = s->planes[3].planewidth = inlink->w;
+    s->planes[1].planeheight = s->planes[2].planeheight = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planes[0].planeheight = s->planes[3].planeheight = inlink->h;
+
+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
+
+    for (i = 0; i < s->nb_planes; i++) {
+        PlaneContext *p = &s->planes[i];
+        int size;
+
+        p->b = 1 << s->block_bits;
+        p->n = 1.f / (p->b * p->b);
+        p->o = p->b * s->overlap;
+        size = p->b - p->o;
+        p->nox = (p->planewidth  + (size - 1)) / size;
+        p->noy = (p->planeheight + (size - 1)) / size;
+
+        av_log(ctx, AV_LOG_DEBUG, "nox:%d noy:%d size:%d\n", p->nox, p->noy, size);
+
+        p->buffer_linesize = p->b * p->nox * sizeof(FFTComplex);
+        p->buffer[CURRENT] = av_calloc(p->b * p->noy, p->buffer_linesize);
+        if (!p->buffer[CURRENT])
+            return AVERROR(ENOMEM);
+        if (s->nb_prev > 0) {
+            p->buffer[PREV] = av_calloc(p->b * p->noy, p->buffer_linesize);
+            if (!p->buffer[PREV])
+                return AVERROR(ENOMEM);
+        }
+        if (s->nb_next > 0) {
+            p->buffer[NEXT] = av_calloc(p->b * p->noy, p->buffer_linesize);
+            if (!p->buffer[NEXT])
+                return AVERROR(ENOMEM);
+        }
+        p->data_linesize = 2 * p->b * sizeof(float);
+        p->hdata = av_calloc(p->b, p->data_linesize);
+        p->vdata = av_calloc(p->b, p->data_linesize);
+        if (!p->hdata || !p->vdata)
+            return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static void import_plane(FFTdnoizContext *s,
+                         uint8_t *srcp, int src_linesize,
+                         float *buffer, int buffer_linesize, int plane)
+{
+    PlaneContext *p = &s->planes[plane];
+    const int width = p->planewidth;
+    const int height = p->planeheight;
+    const int block = p->b;
+    const int overlap = p->o;
+    const int size = block - overlap;
+    const int nox = p->nox;
+    const int noy = p->noy;
+    const int bpp = (s->depth + 7) / 8;
+    const int data_linesize = p->data_linesize / sizeof(FFTComplex);
+    FFTComplex *hdata = p->hdata;
+    FFTComplex *vdata = p->vdata;
+    int x, y, i, j;
+
+    buffer_linesize /= sizeof(float);
+    for (y = 0; y < noy; y++) {
+        for (x = 0; x < nox; x++) {
+            const int rh = FFMIN(block, height - y * size);
+            const int rw = FFMIN(block, width  - x * size);
+            uint8_t *src = srcp + src_linesize * y * size + x * size * bpp;
+            float *bdst = buffer + buffer_linesize * y * block + x * block * 2;
+            FFTComplex *ssrc, *dst = hdata;
+
+            for (i = 0; i < rh; i++) {
+                s->import_row(dst, src, rw);
+                for (j = rw; j < block; j++) {
+                    dst[j].re = dst[block - j - 1].re;
+                    dst[j].im = 0;
+                }
+                av_fft_permute(p->fft, dst);
+                av_fft_calc(p->fft, dst);
+
+                src += src_linesize;
+                dst += data_linesize;
+            }
+
+            dst = hdata;
+            for (; i < block; i++) {
+                for (j = 0; j < block; j++) {
+                    dst[j].re = dst[(block - i - 1) * data_linesize + j].re;
+                    dst[j].im = dst[(block - i - 1) * data_linesize + j].im;
+                }
+            }
+
+            ssrc = hdata;
+            dst = vdata;
+            for (i = 0; i < block; i++) {
+                for (j = 0; j < block; j++)
+                    dst[j] = ssrc[j * data_linesize + i];
+                av_fft_permute(p->fft, dst);
+                av_fft_calc(p->fft, dst);
+                memcpy(bdst, dst, block * sizeof(FFTComplex));
+
+                dst += data_linesize;
+                bdst += buffer_linesize;
+            }
+        }
+    }
+}
+
+static void export_plane(FFTdnoizContext *s,
+                         uint8_t *dstp, int dst_linesize,
+                         float *buffer, int buffer_linesize, int plane)
+{
+    PlaneContext *p = &s->planes[plane];
+    const int depth = s->depth;
+    const int bpp = (depth + 7) / 8;
+    const int width = p->planewidth;
+    const int height = p->planeheight;
+    const int block = p->b;
+    const int overlap = p->o;
+    const int hoverlap = overlap / 2;
+    const int size = block - overlap;
+    const int nox = p->nox;
+    const int noy = p->noy;
+    const int data_linesize = p->data_linesize / sizeof(FFTComplex);
+    const float scale = 1.f / (block * block);
+    FFTComplex *hdata = p->hdata;
+    FFTComplex *vdata = p->vdata;
+    int x, y, i, j;
+
+    buffer_linesize /= sizeof(float);
+    for (y = 0; y < noy; y++) {
+        for (x = 0; x < nox; x++) {
+            const int woff = x == 0 ? 0 : hoverlap;
+            const int hoff = y == 0 ? 0 : hoverlap;
+            const int rw = x == 0 ? block : FFMIN(size, width  - x * size - woff);
+            const int rh = y == 0 ? block : FFMIN(size, height - y * size - hoff);
+            float *bsrc = buffer + buffer_linesize * y * block + x * block * 2;
+            uint8_t *dst = dstp + dst_linesize * (y * size + hoff) + (x * size + woff) * bpp;
+            FFTComplex *hdst, *ddst = vdata;
+
+            hdst = hdata;
+            for (i = 0; i < block; i++) {
+                memcpy(ddst, bsrc, block * sizeof(FFTComplex));
+                av_fft_permute(p->ifft, ddst);
+                av_fft_calc(p->ifft, ddst);
+                for (j = 0; j < block; j++) {
+                    hdst[j * data_linesize + i] = ddst[j];
+                }
+
+                ddst += data_linesize;
+                bsrc += buffer_linesize;
+            }
+
+            hdst = hdata + hoff * data_linesize;
+            for (i = 0; i < rh; i++) {
+                av_fft_permute(p->ifft, hdst);
+                av_fft_calc(p->ifft, hdst);
+                s->export_row(hdst + woff, dst, rw, scale, depth);
+
+                hdst += data_linesize;
+                dst += dst_linesize;
+            }
+        }
+    }
+}
+
+static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float *nbuffer)
+{
+    PlaneContext *p = &s->planes[plane];
+    const int block = p->b;
+    const int nox = p->nox;
+    const int noy = p->noy;
+    const int buffer_linesize = p->buffer_linesize / sizeof(float);
+    const float sigma = s->sigma * s->sigma * block * block;
+    const float limit = 1.f - s->amount;
+    float *cbuffer = p->buffer[CURRENT];
+    const float cfactor = sqrtf(3.f) * 0.5f;
+    const float scale = 1.f / 3.f;
+    int y, x, i, j;
+
+    for (y = 0; y < noy; y++) {
+        for (x = 0; x < nox; x++) {
+            float *cbuff = cbuffer + buffer_linesize * y * block + x * block * 2;
+            float *pbuff = pbuffer + buffer_linesize * y * block + x * block * 2;
+            float *nbuff = nbuffer + buffer_linesize * y * block + x * block * 2;
+
+            for (i = 0; i < block; i++) {
+                for (j = 0; j < block; j++) {
+                    float sumr, sumi, difr, difi, mpr, mpi, mnr, mni;
+                    float factor, power, sumpnr, sumpni;
+
+                    sumpnr = pbuff[2 * j    ] + nbuff[2 * j    ];
+                    sumpni = pbuff[2 * j + 1] + nbuff[2 * j + 1];
+                    sumr = cbuff[2 * j    ] + sumpnr;
+                    sumi = cbuff[2 * j + 1] + sumpni;
+                    difr = cfactor * (nbuff[2 * j    ] - pbuff[2 * j    ]);
+                    difi = cfactor * (pbuff[2 * j + 1] - nbuff[2 * j + 1]);
+                    mpr = cbuff[2 * j    ] - 0.5f * sumpnr + difi;
+                    mnr = mpr - difi - difi;
+                    mpi = cbuff[2 * j + 1] - 0.5f * sumpni + difr;
+                    mni = mpi - difr - difr;
+                    power = sumr * sumr + sumi * sumi + 1e-15f;
+                    factor = FFMAX((power - sigma) / power, limit);
+                    sumr *= factor;
+                    sumi *= factor;
+                    power = mpr * mpr + mpi * mpi + 1e-15f;
+                    factor = FFMAX((power - sigma) / power, limit);
+                    mpr *= factor;
+                    mpi *= factor;
+                    power = mnr * mnr + mni * mni + 1e-15f;
+                    factor = FFMAX((power - sigma) / power, limit);
+                    mnr *= factor;
+                    mni *= factor;
+                    cbuff[2 * j    ] = (sumr + mpr + mnr) * scale;
+                    cbuff[2 * j + 1] = (sumi + mpi + mni) * scale;
+
+                }
+
+                cbuff += buffer_linesize;
+                pbuff += buffer_linesize;
+                nbuff += buffer_linesize;
+            }
+        }
+    }
+}
+
+static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer)
+{
+    PlaneContext *p = &s->planes[plane];
+    const int block = p->b;
+    const int nox = p->nox;
+    const int noy = p->noy;
+    const int buffer_linesize = p->buffer_linesize / sizeof(float);
+    const float sigma = s->sigma * s->sigma * block * block;
+    const float limit = 1.f - s->amount;
+    float *cbuffer = p->buffer[CURRENT];
+    int y, x, i, j;
+
+    for (y = 0; y < noy; y++) {
+        for (x = 0; x < nox; x++) {
+            float *cbuff = cbuffer + buffer_linesize * y * block + x * block * 2;
+            float *pbuff = pbuffer + buffer_linesize * y * block + x * block * 2;
+
+            for (i = 0; i < block; i++) {
+                for (j = 0; j < block; j++) {
+                    float factor, power, re, im, pre, pim;
+                    float sumr, sumi, difr, difi;
+
+                    re = cbuff[j * 2    ];
+                    pre = pbuff[j * 2    ];
+                    im = cbuff[j * 2 + 1];
+                    pim = pbuff[j * 2 + 1];
+
+                    sumr = re + pre;
+                    sumi = im + pim;
+                    difr = re - pre;
+                    difi = im - pim;
+
+                    power = sumr * sumr + sumi * sumi + 1e-15f;
+                    factor = FFMAX(limit, (power - sigma) / power);
+                    sumr *= factor;
+                    sumi *= factor;
+                    power = difr * difr + difi * difi + 1e-15f;
+                    factor = FFMAX(limit, (power - sigma) / power);
+                    difr *= factor;
+                    difi *= factor;
+
+                    cbuff[j * 2    ] = (sumr + difr) * 0.5f;
+                    cbuff[j * 2 + 1] = (sumi + difi) * 0.5f;
+                }
+
+                cbuff += buffer_linesize;
+                pbuff += buffer_linesize;
+            }
+        }
+    }
+}
+
+static void filter_plane2d(FFTdnoizContext *s, int plane)
+{
+    PlaneContext *p = &s->planes[plane];
+    const int block = p->b;
+    const int nox = p->nox;
+    const int noy = p->noy;
+    const int buffer_linesize = p->buffer_linesize / 4;
+    const float sigma = s->sigma * s->sigma * block * block;
+    const float limit = 1.f - s->amount;
+    float *buffer = p->buffer[CURRENT];
+    int y, x, i, j;
+
+    for (y = 0; y < noy; y++) {
+        for (x = 0; x < nox; x++) {
+            float *buff = buffer + buffer_linesize * y * block + x * block * 2;
+
+            for (i = 0; i < block; i++) {
+                for (j = 0; j < block; j++) {
+                    float factor, power, re, im;
+
+                    re = buff[j * 2    ];
+                    im = buff[j * 2 + 1];
+                    power = re * re + im * im + 1e-15f;
+                    factor = FFMAX(limit, (power - sigma) / power);
+                    buff[j * 2    ] *= factor;
+                    buff[j * 2 + 1] *= factor;
+                }
+
+                buff += buffer_linesize;
+            }
+        }
+    }
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    FFTdnoizContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    int direct, plane;
+    AVFrame *out;
+
+    if (s->nb_next > 0 && s->nb_prev > 0) {
+        av_frame_free(&s->prev);
+        s->prev = s->cur;
+        s->cur = s->next;
+        s->next = in;
+
+        if (!s->prev && s->cur) {
+            s->prev = av_frame_clone(s->cur);
+            if (!s->prev)
+                return AVERROR(ENOMEM);
+        }
+        if (!s->cur)
+            return 0;
+    } else if (s->nb_next > 0) {
+        av_frame_free(&s->cur);
+        s->cur = s->next;
+        s->next = in;
+
+        if (!s->cur)
+            return 0;
+    } else if (s->nb_prev > 0) {
+        av_frame_free(&s->prev);
+        s->prev = s->cur;
+        s->cur = in;
+
+        if (!s->prev)
+            s->prev = av_frame_clone(s->cur);
+        if (!s->prev)
+            return AVERROR(ENOMEM);
+    } else {
+        s->cur = in;
+    }
+
+    if (av_frame_is_writable(in) && s->nb_next == 0 && s->nb_prev == 0) {
+        direct = 1;
+        out = in;
+    } else {
+        direct = 0;
+        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!out)
+            return AVERROR(ENOMEM);
+        av_frame_copy_props(out, s->cur);
+    }
+
+    for (plane = 0; plane < s->nb_planes; plane++) {
+        PlaneContext *p = &s->planes[plane];
+
+        if (!((1 << plane) & s->planesf) || ctx->is_disabled) {
+            if (!direct)
+                av_image_copy_plane(out->data[plane], out->linesize[plane],
+                                    s->cur->data[plane], s->cur->linesize[plane],
+                                    p->planewidth, p->planeheight);
+            continue;
+        }
+
+        if (s->next) {
+            import_plane(s, s->next->data[plane], s->next->linesize[plane],
+                         p->buffer[NEXT], p->buffer_linesize, plane);
+        }
+
+        if (s->prev) {
+            import_plane(s, s->prev->data[plane], s->prev->linesize[plane],
+                         p->buffer[PREV], p->buffer_linesize, plane);
+        }
+
+        import_plane(s, s->cur->data[plane], s->cur->linesize[plane],
+                     p->buffer[CURRENT], p->buffer_linesize, plane);
+
+        if (s->next && s->prev) {
+            filter_plane3d2(s, plane, p->buffer[PREV], p->buffer[NEXT]);
+        } else if (s->next) {
+            filter_plane3d1(s, plane, p->buffer[NEXT]);
+        } else  if (s->prev) {
+            filter_plane3d1(s, plane, p->buffer[PREV]);
+        } else {
+            filter_plane2d(s, plane);
+        }
+
+        export_plane(s, out->data[plane], out->linesize[plane],
+                     p->buffer[CURRENT], p->buffer_linesize, plane);
+    }
+
+    if (s->nb_next == 0 && s->nb_prev == 0) {
+        if (direct) {
+            s->cur = NULL;
+        } else {
+            av_frame_free(&s->cur);
+        }
+    }
+    return ff_filter_frame(outlink, out);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    FFTdnoizContext *s = ctx->priv;
+    int ret = 0;
+
+    ret = ff_request_frame(ctx->inputs[0]);
+
+    if (ret == AVERROR_EOF && (s->nb_next > 0)) {
+        AVFrame *buf;
+
+        if (s->next && s->nb_next > 0)
+            buf = av_frame_clone(s->next);
+        else if (s->cur)
+            buf = av_frame_clone(s->cur);
+        else
+            buf = av_frame_clone(s->prev);
+        if (!buf)
+            return AVERROR(ENOMEM);
+
+        ret = filter_frame(ctx->inputs[0], buf);
+        if (ret < 0)
+            return ret;
+        ret = AVERROR_EOF;
+    }
+
+    return ret;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    FFTdnoizContext *s = ctx->priv;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        PlaneContext *p = &s->planes[i];
+
+        av_freep(&p->hdata);
+        av_freep(&p->vdata);
+        av_freep(&p->buffer[PREV]);
+        av_freep(&p->buffer[CURRENT]);
+        av_freep(&p->buffer[NEXT]);
+        av_fft_end(p->fft);
+        av_fft_end(p->ifft);
+    }
+
+    av_frame_free(&s->prev);
+    av_frame_free(&s->cur);
+    av_frame_free(&s->next);
+}
+
+static const AVFilterPad fftdnoiz_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad fftdnoiz_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_fftdnoiz = {
+    .name          = "fftdnoiz",
+    .description   = NULL_IF_CONFIG_SMALL("Denoise frames using 3D FFT."),
+    .priv_size     = sizeof(FFTdnoizContext),
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = fftdnoiz_inputs,
+    .outputs       = fftdnoiz_outputs,
+    .priv_class    = &fftdnoiz_class,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+};

diff --git a/libavfilter/vf_fftfilt.c b/libavfilter/vf_fftfilt.c
index 7f60ca1..af44b1e 100644
--- a/libavfilter/vf_fftfilt.c
+++ b/libavfilter/vf_fftfilt.c

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Arwa Arif <arwaarif1994@gmail.com>
+ * Copyright (c) 2017 Paul B Mahol
  *
  * This file is part of FFmpeg.
  *
@@ -64,6 +65,8 @@
     AVExpr *weight_expr[MAX_PLANES];
     double *weight[MAX_PLANES];
 
+    void (*rdft_horizontal)(struct FFTFILTContext *s, AVFrame *in, int w, int h, int plane);
+    void (*irdft_horizontal)(struct FFTFILTContext *s, AVFrame *out, int w, int h, int plane);
 } FFTFILTContext;
 
 static const char *const var_names[] = {   "X",   "Y",   "W",   "H",   "N", NULL        };
@@ -111,7 +114,7 @@
 }
 
 /*Horizontal pass - RDFT*/
-static void rdft_horizontal(FFTFILTContext *s, AVFrame *in, int w, int h, int plane)
+static void rdft_horizontal8(FFTFILTContext *s, AVFrame *in, int w, int h, int plane)
 {
     int i, j;
 
@@ -126,6 +129,23 @@
         av_rdft_calc(s->hrdft[plane], s->rdft_hdata[plane] + i * s->rdft_hlen[plane]);
 }
 
+static void rdft_horizontal16(FFTFILTContext *s, AVFrame *in, int w, int h, int plane)
+{
+    const uint16_t *src = (const uint16_t *)in->data[plane];
+    int linesize = in->linesize[plane] / 2;
+    int i, j;
+
+    for (i = 0; i < h; i++) {
+        for (j = 0; j < w; j++)
+            s->rdft_hdata[plane][i * s->rdft_hlen[plane] + j] = *(src + linesize * i + j);
+
+        copy_rev(s->rdft_hdata[plane] + i * s->rdft_hlen[plane], w, s->rdft_hlen[plane]);
+    }
+
+    for (i = 0; i < h; i++)
+        av_rdft_calc(s->hrdft[plane], s->rdft_hdata[plane] + i * s->rdft_hlen[plane]);
+}
+
 /*Vertical pass - RDFT*/
 static void rdft_vertical(FFTFILTContext *s, int h, int plane)
 {
@@ -156,7 +176,7 @@
 }
 
 /*Horizontal pass - IRDFT*/
-static void irdft_horizontal(FFTFILTContext *s, AVFrame *out, int w, int h, int plane)
+static void irdft_horizontal8(FFTFILTContext *s, AVFrame *out, int w, int h, int plane)
 {
     int i, j;
 
@@ -171,6 +191,24 @@
                                                                           s->rdft_vlen[plane]), 0, 255);
 }
 
+static void irdft_horizontal16(FFTFILTContext *s, AVFrame *out, int w, int h, int plane)
+{
+    uint16_t *dst = (uint16_t *)out->data[plane];
+    int linesize = out->linesize[plane] / 2;
+    int max = (1 << s->depth) - 1;
+    int i, j;
+
+    for (i = 0; i < h; i++)
+        av_rdft_calc(s->ihrdft[plane], s->rdft_hdata[plane] + i * s->rdft_hlen[plane]);
+
+    for (i = 0; i < h; i++)
+        for (j = 0; j < w; j++)
+            *(dst + linesize * i + j) = av_clip(s->rdft_hdata[plane][i
+                                                *s->rdft_hlen[plane] + j] * 4 /
+                                                (s->rdft_hlen[plane] *
+                                                s->rdft_vlen[plane]), 0, max);
+}
+
 static av_cold int initialize(AVFilterContext *ctx)
 {
     FFTFILTContext *s = ctx->priv;
@@ -276,6 +314,16 @@
         if (s->eval_mode == EVAL_MODE_INIT)
             do_eval(s, inlink, plane);
     }
+
+    if (s->depth <= 8) {
+        s->rdft_horizontal = rdft_horizontal8;
+        s->irdft_horizontal = irdft_horizontal8;
+    } else if (s->depth > 8) {
+        s->rdft_horizontal = rdft_horizontal16;
+        s->irdft_horizontal = irdft_horizontal16;
+    } else {
+        return AVERROR_BUG;
+    }
     return 0;
 }
 
@@ -302,7 +350,7 @@
         if (s->eval_mode == EVAL_MODE_FRAME)
             do_eval(s, inlink, plane);
 
-        rdft_horizontal(s, in, w, h, plane);
+        s->rdft_horizontal(s, in, w, h, plane);
         rdft_vertical(s, h, plane);
 
         /*Change user defined parameters*/
@@ -314,7 +362,7 @@
         s->rdft_vdata[plane][0] += s->rdft_hlen[plane] * s->rdft_vlen[plane] * s->dc[plane];
 
         irdft_vertical(s, h, plane);
-        irdft_horizontal(s, out, w, h, plane);
+        s->irdft_horizontal(s, out, w, h, plane);
     }
 
     av_frame_free(&in);
@@ -344,6 +392,15 @@
         AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVJ444P,
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVJ420P,
         AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV420P10,
+        AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV420P14,
+        AV_PIX_FMT_YUV420P16,
+        AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV422P10,
+        AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV422P14,
+        AV_PIX_FMT_YUV422P16,
+        AV_PIX_FMT_YUV444P9, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV444P14,
+        AV_PIX_FMT_YUV444P16,
         AV_PIX_FMT_NONE
     };
 

diff --git a/libavfilter/vf_fieldmatch.c b/libavfilter/vf_fieldmatch.c
index 3694f26..5a73eb4 100644
--- a/libavfilter/vf_fieldmatch.c
+++ b/libavfilter/vf_fieldmatch.c

@@ -37,6 +37,7 @@
 #include "libavutil/opt.h"
 #include "libavutil/timestamp.h"
 #include "avfilter.h"
+#include "filters.h"
 #include "internal.h"
 
 #define INPUT_MAIN     0
@@ -79,6 +80,7 @@
     AVFrame *prv2, *src2, *nxt2;    ///< sliding window of the optional second stream
     int got_frame[2];               ///< frame request flag for each input stream
     int hsub, vsub;                 ///< chroma subsampling values
+    int bpc;                        ///< bytes per component
     uint32_t eof;                   ///< bitmask for end of stream
     int64_t lastscdiff;
     int64_t lastn;
@@ -503,9 +505,9 @@
         int prvf_linesize, nxtf_linesize;
         const int width  = get_width (fm, src, plane);
         const int height = get_height(fm, src, plane);
-        const int y0a = fm->y0 >> (plane != 0);
-        const int y1a = fm->y1 >> (plane != 0);
-        const int startx = (plane == 0 ? 8 : 4);
+        const int y0a = fm->y0 >> (plane ? fm->vsub : 0);
+        const int y1a = fm->y1 >> (plane ? fm->vsub : 0);
+        const int startx = (plane == 0 ? 8 : 8 >> fm->hsub);
         const int stopx  = width - startx;
         const uint8_t *srcpf, *srcf, *srcnf;
         const uint8_t *prvpf, *prvnf, *nxtpf, *nxtnf;
@@ -613,7 +615,7 @@
         const int nb_copy_fields = (plane_h >> 1) + (field ? 0 : (plane_h & 1));
         av_image_copy_plane(dst->data[plane] + field*dst->linesize[plane], dst->linesize[plane] << 1,
                             src->data[plane] + field*src->linesize[plane], src->linesize[plane] << 1,
-                            get_width(fm, src, plane), nb_copy_fields);
+                            get_width(fm, src, plane) * fm->bpc, nb_copy_fields);
     }
 }
 
@@ -697,9 +699,11 @@
         av_assert0(prv && src && nxt);                          \
 } while (0)
     if (FF_INLINK_IDX(inlink) == INPUT_MAIN) {
+        av_assert0(fm->got_frame[INPUT_MAIN] == 0);
         SLIDING_FRAME_WINDOW(fm->prv, fm->src, fm->nxt);
         fm->got_frame[INPUT_MAIN] = 1;
     } else {
+        av_assert0(fm->got_frame[INPUT_CLEANSRC] == 0);
         SLIDING_FRAME_WINDOW(fm->prv2, fm->src2, fm->nxt2);
         fm->got_frame[INPUT_CLEANSRC] = 1;
     }
@@ -818,50 +822,99 @@
     return ff_filter_frame(outlink, dst);
 }
 
-static int request_inlink(AVFilterContext *ctx, int lid)
+static int activate(AVFilterContext *ctx)
 {
-    int ret = 0;
     FieldMatchContext *fm = ctx->priv;
+    AVFrame *frame = NULL;
+    int ret = 0, status;
+    int64_t pts;
 
-    if (!fm->got_frame[lid]) {
-        AVFilterLink *inlink = ctx->inputs[lid];
-        ret = ff_request_frame(inlink);
-        if (ret == AVERROR_EOF) { // flushing
-            fm->eof |= 1 << lid;
-            ret = filter_frame(inlink, NULL);
-        }
+    if ((fm->got_frame[INPUT_MAIN] == 0) &&
+        (ret = ff_inlink_consume_frame(ctx->inputs[INPUT_MAIN], &frame)) > 0) {
+        ret = filter_frame(ctx->inputs[INPUT_MAIN], frame);
+        if (ret < 0)
+            return ret;
     }
-    return ret;
-}
-
-static int request_frame(AVFilterLink *outlink)
-{
-    int ret;
-    AVFilterContext *ctx = outlink->src;
-    FieldMatchContext *fm = ctx->priv;
-    const uint32_t eof_mask = 1<<INPUT_MAIN | fm->ppsrc<<INPUT_CLEANSRC;
-
-    if ((fm->eof & eof_mask) == eof_mask) // flush done?
-        return AVERROR_EOF;
-    if ((ret = request_inlink(ctx, INPUT_MAIN)) < 0)
+    if (ret < 0)
         return ret;
-    if (fm->ppsrc && (ret = request_inlink(ctx, INPUT_CLEANSRC)) < 0)
+    if (fm->ppsrc &&
+        (fm->got_frame[INPUT_CLEANSRC] == 0) &&
+        (ret = ff_inlink_consume_frame(ctx->inputs[INPUT_CLEANSRC], &frame)) > 0) {
+        ret = filter_frame(ctx->inputs[INPUT_CLEANSRC], frame);
+        if (ret < 0)
+            return ret;
+    }
+    if (ret < 0) {
         return ret;
-    return 0;
+    } else if (ff_inlink_acknowledge_status(ctx->inputs[INPUT_MAIN], &status, &pts)) {
+        if (status == AVERROR_EOF) { // flushing
+            fm->eof |= 1 << INPUT_MAIN;
+            ret = filter_frame(ctx->inputs[INPUT_MAIN], NULL);
+        }
+        ff_outlink_set_status(ctx->outputs[0], status, pts);
+        return ret;
+    } else if (fm->ppsrc && ff_inlink_acknowledge_status(ctx->inputs[INPUT_CLEANSRC], &status, &pts)) {
+        if (status == AVERROR_EOF) { // flushing
+            fm->eof |= 1 << INPUT_CLEANSRC;
+            ret = filter_frame(ctx->inputs[INPUT_CLEANSRC], NULL);
+        }
+        ff_outlink_set_status(ctx->outputs[0], status, pts);
+        return ret;
+    } else {
+        if (ff_outlink_frame_wanted(ctx->outputs[0])) {
+            if (fm->got_frame[INPUT_MAIN] == 0)
+                ff_inlink_request_frame(ctx->inputs[INPUT_MAIN]);
+            if (fm->ppsrc && (fm->got_frame[INPUT_CLEANSRC] == 0))
+                ff_inlink_request_frame(ctx->inputs[INPUT_CLEANSRC]);
+        }
+        return 0;
+    }
 }
 
 static int query_formats(AVFilterContext *ctx)
 {
-    // TODO: second input source can support >8bit depth
+    FieldMatchContext *fm = ctx->priv;
+
     static const enum AVPixelFormat pix_fmts[] = {
         AV_PIX_FMT_YUV444P,  AV_PIX_FMT_YUV422P,  AV_PIX_FMT_YUV420P,
         AV_PIX_FMT_YUV411P,  AV_PIX_FMT_YUV410P,
         AV_PIX_FMT_NONE
     };
+    static const enum AVPixelFormat unproc_pix_fmts[] = {
+        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
+        AV_PIX_FMT_YUVJ411P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUV440P10,
+        AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV420P12,
+        AV_PIX_FMT_YUV440P12,
+        AV_PIX_FMT_YUV444P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV420P14,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_NONE
+    };
+    int ret;
+
     AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
     if (!fmts_list)
         return AVERROR(ENOMEM);
-    return ff_set_common_formats(ctx, fmts_list);
+    if (!fm->ppsrc) {
+        return ff_set_common_formats(ctx, fmts_list);
+    }
+
+    if ((ret = ff_formats_ref(fmts_list, &ctx->inputs[INPUT_MAIN]->out_formats)) < 0)
+        return ret;
+    fmts_list = ff_make_format_list(unproc_pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    if ((ret = ff_formats_ref(fmts_list, &ctx->outputs[0]->in_formats)) < 0)
+        return ret;
+    if ((ret = ff_formats_ref(fmts_list, &ctx->inputs[INPUT_CLEANSRC]->out_formats)) < 0)
+        return ret;
+    return 0;
 }
 
 static int config_input(AVFilterLink *inlink)
@@ -901,7 +954,6 @@
     AVFilterPad pad = {
         .name         = av_strdup("main"),
         .type         = AVMEDIA_TYPE_VIDEO,
-        .filter_frame = filter_frame,
         .config_props = config_input,
     };
     int ret;
@@ -947,7 +999,12 @@
         av_frame_free(&fm->prv);
     if (fm->nxt != fm->src)
         av_frame_free(&fm->nxt);
+    if (fm->prv2 != fm->src2)
+        av_frame_free(&fm->prv2);
+    if (fm->nxt2 != fm->src2)
+        av_frame_free(&fm->nxt2);
     av_frame_free(&fm->src);
+    av_frame_free(&fm->src2);
     av_freep(&fm->map_data[0]);
     av_freep(&fm->cmask_data[0]);
     av_freep(&fm->tbuffer);
@@ -959,10 +1016,12 @@
 static int config_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx  = outlink->src;
-    const FieldMatchContext *fm = ctx->priv;
+    FieldMatchContext *fm = ctx->priv;
     const AVFilterLink *inlink =
         ctx->inputs[fm->ppsrc ? INPUT_CLEANSRC : INPUT_MAIN];
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
 
+    fm->bpc = (desc->comp[0].depth + 7) / 8;
     outlink->time_base = inlink->time_base;
     outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
     outlink->frame_rate = inlink->frame_rate;
@@ -975,7 +1034,6 @@
     {
         .name          = "default",
         .type          = AVMEDIA_TYPE_VIDEO,
-        .request_frame = request_frame,
         .config_props  = config_output,
     },
     { NULL }
@@ -987,6 +1045,7 @@
     .query_formats  = query_formats,
     .priv_size      = sizeof(FieldMatchContext),
     .init           = fieldmatch_init,
+    .activate       = activate,
     .uninit         = fieldmatch_uninit,
     .inputs         = NULL,
     .outputs        = fieldmatch_outputs,

diff --git a/libavfilter/vf_fillborders.c b/libavfilter/vf_fillborders.c
new file mode 100644
index 0000000..1344587
--- /dev/null
+++ b/libavfilter/vf_fillborders.c

@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/colorspace.h"
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "drawutils.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+enum { Y, U, V, A };
+enum { R, G, B };
+
+enum FillMode { FM_SMEAR, FM_MIRROR, FM_FIXED, FM_NB_MODES };
+
+typedef struct Borders {
+    int left, right, top, bottom;
+} Borders;
+
+typedef struct FillBordersContext {
+    const AVClass *class;
+    int left, right, top, bottom;
+    int mode;
+
+    int nb_planes;
+    int depth;
+    Borders borders[4];
+    int planewidth[4];
+    int planeheight[4];
+    uint8_t fill[4];
+    uint8_t yuv_color[4];
+    uint8_t rgba_color[4];
+
+    void (*fillborders)(struct FillBordersContext *s, AVFrame *frame);
+} FillBordersContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P,
+        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
+        AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
+        AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12,
+        AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
+        AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+        AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+        AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_NONE
+    };
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static void smear_borders8(FillBordersContext *s, AVFrame *frame)
+{
+    int p, y;
+
+    for (p = 0; p < s->nb_planes; p++) {
+        uint8_t *ptr = frame->data[p];
+        int linesize = frame->linesize[p];
+
+        for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
+            memset(ptr + y * linesize,
+                   *(ptr + y * linesize + s->borders[p].left),
+                   s->borders[p].left);
+            memset(ptr + y * linesize + s->planewidth[p] - s->borders[p].right,
+                   *(ptr + y * linesize + s->planewidth[p] - s->borders[p].right - 1),
+                   s->borders[p].right);
+        }
+
+        for (y = 0; y < s->borders[p].top; y++) {
+            memcpy(ptr + y * linesize,
+                   ptr + s->borders[p].top * linesize, s->planewidth[p]);
+        }
+
+        for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) {
+            memcpy(ptr + y * linesize,
+                   ptr + (s->planeheight[p] - s->borders[p].bottom - 1) * linesize,
+                   s->planewidth[p]);
+        }
+    }
+}
+
+static void smear_borders16(FillBordersContext *s, AVFrame *frame)
+{
+    int p, y, x;
+
+    for (p = 0; p < s->nb_planes; p++) {
+        uint16_t *ptr = (uint16_t *)frame->data[p];
+        int linesize = frame->linesize[p] / 2;
+
+        for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
+            for (x = 0; x < s->borders[p].left; x++) {
+                ptr[y * linesize + x] =  *(ptr + y * linesize + s->borders[p].left);
+            }
+
+            for (x = 0; x < s->borders[p].right; x++) {
+                ptr[y * linesize + s->planewidth[p] - s->borders[p].right + x] =
+                   *(ptr + y * linesize + s->planewidth[p] - s->borders[p].right - 1);
+            }
+        }
+
+        for (y = 0; y < s->borders[p].top; y++) {
+            memcpy(ptr + y * linesize,
+                   ptr + s->borders[p].top * linesize, s->planewidth[p] * 2);
+        }
+
+        for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) {
+            memcpy(ptr + y * linesize,
+                   ptr + (s->planeheight[p] - s->borders[p].bottom - 1) * linesize,
+                   s->planewidth[p] * 2);
+        }
+    }
+}
+
+static void mirror_borders8(FillBordersContext *s, AVFrame *frame)
+{
+    int p, y, x;
+
+    for (p = 0; p < s->nb_planes; p++) {
+        uint8_t *ptr = frame->data[p];
+        int linesize = frame->linesize[p];
+
+        for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
+            for (x = 0; x < s->borders[p].left; x++) {
+                ptr[y * linesize + x] = ptr[y * linesize + s->borders[p].left * 2 - 1 - x];
+            }
+
+            for (x = 0; x < s->borders[p].right; x++) {
+                ptr[y * linesize + s->planewidth[p] - s->borders[p].right + x] =
+                    ptr[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x];
+            }
+        }
+
+        for (y = 0; y < s->borders[p].top; y++) {
+            memcpy(ptr + y * linesize,
+                   ptr + (s->borders[p].top * 2 - 1 - y) * linesize,
+                   s->planewidth[p]);
+        }
+
+        for (y = 0; y < s->borders[p].bottom; y++) {
+            memcpy(ptr + (s->planeheight[p] - s->borders[p].bottom + y) * linesize,
+                   ptr + (s->planeheight[p] - s->borders[p].bottom - 1 - y) * linesize,
+                   s->planewidth[p]);
+        }
+    }
+}
+
+static void mirror_borders16(FillBordersContext *s, AVFrame *frame)
+{
+    int p, y, x;
+
+    for (p = 0; p < s->nb_planes; p++) {
+        uint16_t *ptr = (uint16_t *)frame->data[p];
+        int linesize = frame->linesize[p] / 2;
+
+        for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
+            for (x = 0; x < s->borders[p].left; x++) {
+                ptr[y * linesize + x] = ptr[y * linesize + s->borders[p].left * 2 - 1 - x];
+            }
+
+            for (x = 0; x < s->borders[p].right; x++) {
+                ptr[y * linesize + s->planewidth[p] - s->borders[p].right + x] =
+                    ptr[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x];
+            }
+        }
+
+        for (y = 0; y < s->borders[p].top; y++) {
+            memcpy(ptr + y * linesize,
+                   ptr + (s->borders[p].top * 2 - 1 - y) * linesize,
+                   s->planewidth[p] * 2);
+        }
+
+        for (y = 0; y < s->borders[p].bottom; y++) {
+            memcpy(ptr + (s->planeheight[p] - s->borders[p].bottom + y) * linesize,
+                   ptr + (s->planeheight[p] - s->borders[p].bottom - 1 - y) * linesize,
+                   s->planewidth[p] * 2);
+        }
+    }
+}
+
+static void fixed_borders8(FillBordersContext *s, AVFrame *frame)
+{
+    int p, y;
+
+    for (p = 0; p < s->nb_planes; p++) {
+        uint8_t *ptr = frame->data[p];
+        uint8_t fill = s->fill[p];
+        int linesize = frame->linesize[p];
+
+        for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
+            memset(ptr + y * linesize, fill, s->borders[p].left);
+            memset(ptr + y * linesize + s->planewidth[p] - s->borders[p].right, fill,
+                   s->borders[p].right);
+        }
+
+        for (y = 0; y < s->borders[p].top; y++) {
+            memset(ptr + y * linesize, fill, s->planewidth[p]);
+        }
+
+        for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) {
+            memset(ptr + y * linesize, fill, s->planewidth[p]);
+        }
+    }
+}
+
+static void fixed_borders16(FillBordersContext *s, AVFrame *frame)
+{
+    int p, y, x;
+
+    for (p = 0; p < s->nb_planes; p++) {
+        uint16_t *ptr = (uint16_t *)frame->data[p];
+        uint16_t fill = s->fill[p] << (s->depth - 8);
+        int linesize = frame->linesize[p] / 2;
+
+        for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
+            for (x = 0; x < s->borders[p].left; x++) {
+                ptr[y * linesize + x] = fill;
+            }
+
+            for (x = 0; x < s->borders[p].right; x++) {
+                ptr[y * linesize + s->planewidth[p] - s->borders[p].right + x] = fill;
+            }
+        }
+
+        for (y = 0; y < s->borders[p].top; y++) {
+            for (x = 0; x < s->planewidth[p]; x++) {
+                ptr[y * linesize + x] = fill;
+            }
+        }
+
+        for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) {
+            for (x = 0; x < s->planewidth[p]; x++) {
+                ptr[y * linesize + x] = fill;
+            }
+        }
+    }
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    FillBordersContext *s = inlink->dst->priv;
+
+    s->fillborders(s, frame);
+
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    FillBordersContext *s = ctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+
+    s->nb_planes = desc->nb_components;
+    s->depth = desc->comp[0].depth;
+
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+    s->planewidth[1]  = s->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->planewidth[0]  = s->planewidth[3]  = inlink->w;
+
+    s->borders[0].left   = s->borders[3].left = s->left;
+    s->borders[0].right  = s->borders[3].right = s->right;
+    s->borders[0].top    = s->borders[3].top = s->top;
+    s->borders[0].bottom = s->borders[3].bottom = s->bottom;
+
+    s->borders[1].left   = s->left >> desc->log2_chroma_w;
+    s->borders[1].right  = s->right >> desc->log2_chroma_w;
+    s->borders[1].top    = s->top >> desc->log2_chroma_h;
+    s->borders[1].bottom = s->bottom >> desc->log2_chroma_h;
+
+    s->borders[2].left   = s->left >> desc->log2_chroma_w;
+    s->borders[2].right  = s->right >> desc->log2_chroma_w;
+    s->borders[2].top    = s->top >> desc->log2_chroma_h;
+    s->borders[2].bottom = s->bottom >> desc->log2_chroma_h;
+
+    if (inlink->w < s->left + s->right ||
+        inlink->w <= s->left ||
+        inlink->w <= s->right ||
+        inlink->h < s->top + s->bottom ||
+        inlink->h <= s->top ||
+        inlink->h <= s->bottom ||
+        inlink->w < s->left * 2 ||
+        inlink->w < s->right * 2 ||
+        inlink->h < s->top * 2 ||
+        inlink->h < s->bottom * 2) {
+        av_log(ctx, AV_LOG_ERROR, "Borders are bigger than input frame size.\n");
+        return AVERROR(EINVAL);
+    }
+
+    switch (s->mode) {
+    case FM_SMEAR:  s->fillborders = s->depth <= 8 ? smear_borders8  : smear_borders16;  break;
+    case FM_MIRROR: s->fillborders = s->depth <= 8 ? mirror_borders8 : mirror_borders16; break;
+    case FM_FIXED:  s->fillborders = s->depth <= 8 ? fixed_borders8  : fixed_borders16;  break;
+    }
+
+    s->yuv_color[Y] = RGB_TO_Y_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B]);
+    s->yuv_color[U] = RGB_TO_U_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B], 0);
+    s->yuv_color[V] = RGB_TO_V_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B], 0);
+    s->yuv_color[A] = s->rgba_color[A];
+
+    if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
+        uint8_t rgba_map[4];
+        int i;
+
+        ff_fill_rgba_map(rgba_map, inlink->format);
+        for (i = 0; i < 4; i++)
+            s->fill[rgba_map[i]] = s->rgba_color[i];
+    } else {
+        memcpy(s->fill, s->yuv_color, sizeof(s->yuv_color));
+    }
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(FillBordersContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption fillborders_options[] = {
+    { "left",   "set the left fill border",   OFFSET(left),   AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX,    FLAGS },
+    { "right",  "set the right fill border",  OFFSET(right),  AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX,    FLAGS },
+    { "top",    "set the top fill border",    OFFSET(top),    AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX,    FLAGS },
+    { "bottom", "set the bottom fill border", OFFSET(bottom), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX,    FLAGS },
+    { "mode",   "set the fill borders mode",  OFFSET(mode),   AV_OPT_TYPE_INT, {.i64=FM_SMEAR}, 0, FM_NB_MODES-1, FLAGS, "mode" },
+        { "smear",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=FM_SMEAR},  0, 0, FLAGS, "mode" },
+        { "mirror", NULL, 0, AV_OPT_TYPE_CONST, {.i64=FM_MIRROR}, 0, 0, FLAGS, "mode" },
+        { "fixed",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=FM_FIXED},  0, 0, FLAGS, "mode" },
+    { "color",  "set the color for the fixed mode", OFFSET(rgba_color), AV_OPT_TYPE_COLOR, {.str = "black"}, .flags = FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(fillborders);
+
+static const AVFilterPad fillborders_inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_VIDEO,
+        .config_props   = config_input,
+        .filter_frame   = filter_frame,
+        .needs_writable = 1,
+    },
+    { NULL }
+};
+
+static const AVFilterPad fillborders_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_fillborders = {
+    .name          = "fillborders",
+    .description   = NULL_IF_CONFIG_SMALL("Fill borders of the input video."),
+    .priv_size     = sizeof(FillBordersContext),
+    .priv_class    = &fillborders_class,
+    .query_formats = query_formats,
+    .inputs        = fillborders_inputs,
+    .outputs       = fillborders_outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+};

diff --git a/libavfilter/vf_fps.c b/libavfilter/vf_fps.c
index dbafd2c..9167a00 100644
--- a/libavfilter/vf_fps.c
+++ b/libavfilter/vf_fps.c

@@ -2,6 +2,7 @@
  * Copyright 2007 Bobby Bingham
  * Copyright 2012 Robert Nagy <ronag89 gmail com>
  * Copyright 2012 Anton Khirnov <anton khirnov net>
+ * Copyright 2018 Calvin Walton <calvin.walton@kepstin.ca>
  *
  * This file is part of FFmpeg.
  *
@@ -28,17 +29,12 @@
 #include <float.h>
 #include <stdint.h>
 
-#include "libavutil/common.h"
-#include "libavutil/fifo.h"
+#include "libavutil/avassert.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
-#include "libavutil/parseutils.h"
-
-#define FF_INTERNAL_FIELDS 1
-#include "framequeue.h"
 #include "avfilter.h"
+#include "filters.h"
 #include "internal.h"
-#include "video.h"
 
 enum EOFAction {
     EOF_ACTION_ROUND,
@@ -49,18 +45,27 @@
 typedef struct FPSContext {
     const AVClass *class;
 
-    AVFifoBuffer *fifo;     ///< store frames until we get two successive timestamps
-
-    /* timestamps in input timebase */
-    int64_t first_pts;      ///< pts of the first frame that arrived on this filter
-
     double start_time;      ///< pts, in seconds, of the expected first frame
 
     AVRational framerate;   ///< target framerate
     int rounding;           ///< AVRounding method for timestamps
     int eof_action;         ///< action performed for last frame in FIFO
 
+    /* Set during outlink configuration */
+    int64_t  in_pts_off;    ///< input frame pts offset for start_time handling
+    int64_t  out_pts_off;   ///< output frame pts offset for start_time handling
+
+    /* Runtime state */
+    int      status;        ///< buffered input status
+    int64_t  status_pts;    ///< buffered input status timestamp
+
+    AVFrame *frames[2];     ///< buffered frames
+    int      frames_count;  ///< number of buffered frames
+
+    int64_t  next_pts;      ///< pts of the next frame to output
+
     /* statistics */
+    int cur_frame_out;         ///< number of times current frame has been output
     int frames_in;             ///< number of frames on input
     int frames_out;            ///< number of frames on output
     int dup;                   ///< number of frames duplicated
@@ -91,231 +96,238 @@
 {
     FPSContext *s = ctx->priv;
 
-    if (!(s->fifo = av_fifo_alloc_array(2, sizeof(AVFrame*))))
-        return AVERROR(ENOMEM);
-
-    s->first_pts    = AV_NOPTS_VALUE;
+    s->status_pts   = AV_NOPTS_VALUE;
+    s->next_pts     = AV_NOPTS_VALUE;
 
     av_log(ctx, AV_LOG_VERBOSE, "fps=%d/%d\n", s->framerate.num, s->framerate.den);
     return 0;
 }
 
-static void flush_fifo(AVFifoBuffer *fifo)
+/* Remove the first frame from the buffer, returning it */
+static AVFrame *shift_frame(AVFilterContext *ctx, FPSContext *s)
 {
-    while (av_fifo_size(fifo)) {
-        AVFrame *tmp;
-        av_fifo_generic_read(fifo, &tmp, sizeof(tmp), NULL);
-        av_frame_free(&tmp);
+    AVFrame *frame;
+
+    /* Must only be called when there are frames in the buffer */
+    av_assert1(s->frames_count > 0);
+
+    frame = s->frames[0];
+    s->frames[0] = s->frames[1];
+    s->frames[1] = NULL;
+    s->frames_count--;
+
+    /* Update statistics counters */
+    s->frames_out += s->cur_frame_out;
+    if (s->cur_frame_out > 1) {
+        av_log(ctx, AV_LOG_DEBUG, "Duplicated frame with pts %"PRId64" %d times\n",
+               frame->pts, s->cur_frame_out - 1);
+        s->dup += s->cur_frame_out - 1;
+    } else if (s->cur_frame_out == 0) {
+        av_log(ctx, AV_LOG_DEBUG, "Dropping frame with pts %"PRId64"\n",
+               frame->pts);
+        s->drop++;
     }
+    s->cur_frame_out = 0;
+
+    return frame;
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
     FPSContext *s = ctx->priv;
-    if (s->fifo) {
-        s->drop += av_fifo_size(s->fifo) / sizeof(AVFrame*);
-        flush_fifo(s->fifo);
-        av_fifo_freep(&s->fifo);
+
+    AVFrame *frame;
+
+    while (s->frames_count > 0) {
+        frame = shift_frame(ctx, s);
+        av_frame_free(&frame);
     }
 
     av_log(ctx, AV_LOG_VERBOSE, "%d frames in, %d frames out; %d frames dropped, "
            "%d frames duplicated.\n", s->frames_in, s->frames_out, s->drop, s->dup);
 }
 
-static int config_props(AVFilterLink* link)
+static int config_props(AVFilterLink* outlink)
 {
-    FPSContext   *s = link->src->priv;
+    AVFilterContext *ctx    = outlink->src;
+    AVFilterLink    *inlink = ctx->inputs[0];
+    FPSContext      *s      = ctx->priv;
 
-    link->time_base = av_inv_q(s->framerate);
-    link->frame_rate= s->framerate;
-    link->w         = link->src->inputs[0]->w;
-    link->h         = link->src->inputs[0]->h;
+    outlink->time_base  = av_inv_q(s->framerate);
+    outlink->frame_rate = s->framerate;
 
-    return 0;
-}
-
-static int request_frame(AVFilterLink *outlink)
-{
-    AVFilterContext *ctx = outlink->src;
-    FPSContext        *s = ctx->priv;
-    int ret;
-
-    ret = ff_request_frame(ctx->inputs[0]);
-
-    /* flush the fifo */
-    if (ret == AVERROR_EOF && av_fifo_size(s->fifo)) {
-        int i;
-        for (i = 0; av_fifo_size(s->fifo); i++) {
-            AVFrame *buf;
-
-            av_fifo_generic_read(s->fifo, &buf, sizeof(buf), NULL);
-            if (av_fifo_size(s->fifo)) {
-                buf->pts = av_rescale_q(s->first_pts, ctx->inputs[0]->time_base,
-                                        outlink->time_base) + s->frames_out;
-
-                if ((ret = ff_filter_frame(outlink, buf)) < 0)
-                    return ret;
-
-                s->frames_out++;
-            } else {
-                /* This is the last frame, we may have to duplicate it to match
-                 * the last frame duration */
-                int j;
-                int eof_rounding = (s->eof_action == EOF_ACTION_PASS) ? AV_ROUND_UP : s->rounding;
-                int delta = av_rescale_q_rnd(ctx->inputs[0]->current_pts - s->first_pts,
-                                             ctx->inputs[0]->time_base,
-                                             outlink->time_base, eof_rounding) - s->frames_out;
-                av_log(ctx, AV_LOG_DEBUG, "EOF frames_out:%d delta:%d\n", s->frames_out, delta);
-                /* if the delta is equal to 1, it means we just need to output
-                 * the last frame. Greater than 1 means we will need duplicate
-                 * delta-1 frames */
-                if (delta > 0 ) {
-                    for (j = 0; j < delta; j++) {
-                        AVFrame *dup = av_frame_clone(buf);
-
-                        av_log(ctx, AV_LOG_DEBUG, "Duplicating frame.\n");
-                        dup->pts = av_rescale_q(s->first_pts, ctx->inputs[0]->time_base,
-                                                outlink->time_base) + s->frames_out;
-
-                        if ((ret = ff_filter_frame(outlink, dup)) < 0)
-                            return ret;
-
-                        s->frames_out++;
-                        if (j > 0) s->dup++;
-                    }
-                    av_frame_free(&buf);
-                } else {
-                    /* for delta less or equal to 0, we should drop the frame,
-                     * otherwise, we will have one or more extra frames */
-                    av_frame_free(&buf);
-                    s->drop++;
-                }
-            }
+    /* Calculate the input and output pts offsets for start_time */
+    if (s->start_time != DBL_MAX && s->start_time != AV_NOPTS_VALUE) {
+        double first_pts = s->start_time * AV_TIME_BASE;
+        if (first_pts < INT64_MIN || first_pts > INT64_MAX) {
+            av_log(ctx, AV_LOG_ERROR, "Start time %f cannot be represented in internal time base\n",
+                   s->start_time);
+            return AVERROR(EINVAL);
         }
-        return 0;
+        s->in_pts_off  = av_rescale_q_rnd(first_pts, AV_TIME_BASE_Q, inlink->time_base,
+                                          s->rounding | AV_ROUND_PASS_MINMAX);
+        s->out_pts_off = av_rescale_q_rnd(first_pts, AV_TIME_BASE_Q, outlink->time_base,
+                                          s->rounding | AV_ROUND_PASS_MINMAX);
+        s->next_pts = s->out_pts_off;
+        av_log(ctx, AV_LOG_VERBOSE, "Set first pts to (in:%"PRId64" out:%"PRId64") from start time %f\n",
+               s->in_pts_off, s->out_pts_off, s->start_time);
     }
 
-    return ret;
-}
-
-static int write_to_fifo(AVFifoBuffer *fifo, AVFrame *buf)
-{
-    int ret;
-
-    if (!av_fifo_space(fifo) &&
-        (ret = av_fifo_realloc2(fifo, 2*av_fifo_size(fifo)))) {
-        av_frame_free(&buf);
-        return ret;
-    }
-
-    av_fifo_generic_write(fifo, &buf, sizeof(buf), NULL);
     return 0;
 }
 
-static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
+/* Read a frame from the input and save it in the buffer */
+static int read_frame(AVFilterContext *ctx, FPSContext *s, AVFilterLink *inlink, AVFilterLink *outlink)
 {
-    AVFilterContext    *ctx = inlink->dst;
-    FPSContext           *s = ctx->priv;
-    AVFilterLink   *outlink = ctx->outputs[0];
-    int64_t delta;
-    int i, ret;
+    AVFrame *frame;
+    int ret;
+    int64_t in_pts;
 
+    /* Must only be called when we have buffer room available */
+    av_assert1(s->frames_count < 2);
+
+    ret = ff_inlink_consume_frame(inlink, &frame);
+    /* Caller must have run ff_inlink_check_available_frame first */
+    av_assert1(ret);
+    if (ret < 0)
+        return ret;
+
+    /* Convert frame pts to output timebase.
+     * The dance with offsets is required to match the rounding behaviour of the
+     * previous version of the fps filter when using the start_time option. */
+    in_pts = frame->pts;
+    frame->pts = s->out_pts_off + av_rescale_q_rnd(in_pts - s->in_pts_off,
+                                                   inlink->time_base, outlink->time_base,
+                                                   s->rounding | AV_ROUND_PASS_MINMAX);
+
+    av_log(ctx, AV_LOG_DEBUG, "Read frame with in pts %"PRId64", out pts %"PRId64"\n",
+           in_pts, frame->pts);
+
+    s->frames[s->frames_count++] = frame;
     s->frames_in++;
-    /* discard frames until we get the first timestamp */
-    if (s->first_pts == AV_NOPTS_VALUE) {
-        if (buf->pts != AV_NOPTS_VALUE) {
-            ret = write_to_fifo(s->fifo, buf);
-            if (ret < 0)
-                return ret;
 
-            if (s->start_time != DBL_MAX && s->start_time != AV_NOPTS_VALUE) {
-                double first_pts = s->start_time * AV_TIME_BASE;
-                first_pts = FFMIN(FFMAX(first_pts, INT64_MIN), INT64_MAX);
-                s->first_pts = av_rescale_q(first_pts, AV_TIME_BASE_Q,
-                                                     inlink->time_base);
-                av_log(ctx, AV_LOG_VERBOSE, "Set first pts to (in:%"PRId64" out:%"PRId64")\n",
-                       s->first_pts, av_rescale_q(first_pts, AV_TIME_BASE_Q,
-                                                  outlink->time_base));
-            } else {
-                s->first_pts = buf->pts;
-            }
+    return 1;
+}
+
+/* Write a frame to the output */
+static int write_frame(AVFilterContext *ctx, FPSContext *s, AVFilterLink *outlink, int *again)
+{
+    AVFrame *frame;
+
+    av_assert1(s->frames_count == 2 || (s->status && s->frames_count == 1));
+
+    /* We haven't yet determined the pts of the first frame */
+    if (s->next_pts == AV_NOPTS_VALUE) {
+        if (s->frames[0]->pts != AV_NOPTS_VALUE) {
+            s->next_pts = s->frames[0]->pts;
+            av_log(ctx, AV_LOG_VERBOSE, "Set first pts to %"PRId64"\n", s->next_pts);
         } else {
             av_log(ctx, AV_LOG_WARNING, "Discarding initial frame(s) with no "
                    "timestamp.\n");
-            av_frame_free(&buf);
-            s->drop++;
+            frame = shift_frame(ctx, s);
+            av_frame_free(&frame);
+            *again = 1;
+            return 0;
         }
+    }
+
+    /* There are two conditions where we want to drop a frame:
+     * - If we have two buffered frames and the second frame is acceptable
+     *   as the next output frame, then drop the first buffered frame.
+     * - If we have status (EOF) set, drop frames when we hit the
+     *   status timestamp. */
+    if ((s->frames_count == 2 && s->frames[1]->pts <= s->next_pts) ||
+        (s->status            && s->status_pts     <= s->next_pts)) {
+
+        frame = shift_frame(ctx, s);
+        av_frame_free(&frame);
+        *again = 1;
         return 0;
+
+    /* Output a copy of the first buffered frame */
+    } else {
+        frame = av_frame_clone(s->frames[0]);
+        if (!frame)
+            return AVERROR(ENOMEM);
+        frame->pts = s->next_pts++;
+
+        av_log(ctx, AV_LOG_DEBUG, "Writing frame with pts %"PRId64" to pts %"PRId64"\n",
+               s->frames[0]->pts, frame->pts);
+        s->cur_frame_out++;
+
+        return ff_filter_frame(outlink, frame);
+    }
+}
+
+/* Convert status_pts to outlink timebase */
+static void update_eof_pts(AVFilterContext *ctx, FPSContext *s, AVFilterLink *inlink, AVFilterLink *outlink, int64_t status_pts)
+{
+    int eof_rounding = (s->eof_action == EOF_ACTION_PASS) ? AV_ROUND_UP : s->rounding;
+    s->status_pts = av_rescale_q_rnd(status_pts, inlink->time_base, outlink->time_base,
+                                     eof_rounding | AV_ROUND_PASS_MINMAX);
+
+    av_log(ctx, AV_LOG_DEBUG, "EOF is at pts %"PRId64"\n", s->status_pts);
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    FPSContext   *s       = ctx->priv;
+    AVFilterLink *inlink  = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+
+    int ret;
+    int again = 0;
+    int64_t status_pts;
+
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    /* No buffered status: normal operation */
+    if (!s->status) {
+
+        /* Read available input frames if we have room */
+        while (s->frames_count < 2 && ff_inlink_check_available_frame(inlink)) {
+            ret = read_frame(ctx, s, inlink, outlink);
+            if (ret < 0)
+                return ret;
+        }
+
+        /* We do not yet have enough frames to produce output */
+        if (s->frames_count < 2) {
+            /* Check if we've hit EOF (or otherwise that an error status is set) */
+            ret = ff_inlink_acknowledge_status(inlink, &s->status, &status_pts);
+            if (ret > 0)
+                update_eof_pts(ctx, s, inlink, outlink, status_pts);
+
+            if (!ret) {
+                /* If someone wants us to output, we'd better ask for more input */
+                FF_FILTER_FORWARD_WANTED(outlink, inlink);
+                return 0;
+            }
+        }
     }
 
-    /* now wait for the next timestamp */
-    if (buf->pts == AV_NOPTS_VALUE || av_fifo_size(s->fifo) <= 0) {
-        return write_to_fifo(s->fifo, buf);
-    }
-
-    /* number of output frames */
-    delta = av_rescale_q_rnd(buf->pts - s->first_pts, inlink->time_base,
-                             outlink->time_base, s->rounding) - s->frames_out ;
-
-    if (delta < 1) {
-        /* drop everything buffered except the last */
-        int drop = av_fifo_size(s->fifo)/sizeof(AVFrame*);
-
-        av_log(ctx, AV_LOG_DEBUG, "Dropping %d frame(s).\n", drop);
-        s->drop += drop;
-
-        flush_fifo(s->fifo);
-        ret = write_to_fifo(s->fifo, buf);
-
+    /* Buffered frames are available, so generate an output frame */
+    if (s->frames_count > 0) {
+        ret = write_frame(ctx, s, outlink, &again);
+        /* Couldn't generate a frame, so schedule us to perform another step */
+        if (again)
+            ff_filter_set_ready(ctx, 100);
         return ret;
     }
 
-    /* can output >= 1 frames */
-    for (i = 0; i < delta; i++) {
-        AVFrame *buf_out;
-        av_fifo_generic_read(s->fifo, &buf_out, sizeof(buf_out), NULL);
-
-        /* duplicate the frame if needed */
-        if (!av_fifo_size(s->fifo) && i < delta - 1) {
-            AVFrame *dup = av_frame_clone(buf_out);
-
-            av_log(ctx, AV_LOG_DEBUG, "Duplicating frame.\n");
-            if (dup)
-                ret = write_to_fifo(s->fifo, dup);
-            else
-                ret = AVERROR(ENOMEM);
-
-            if (ret < 0) {
-                av_frame_free(&buf_out);
-                av_frame_free(&buf);
-                return ret;
-            }
-
-            s->dup++;
-        }
-
-        buf_out->pts = av_rescale_q(s->first_pts, inlink->time_base,
-                                    outlink->time_base) + s->frames_out;
-
-        if ((ret = ff_filter_frame(outlink, buf_out)) < 0) {
-            av_frame_free(&buf);
-            return ret;
-        }
-
-        s->frames_out++;
+    /* No frames left, so forward the status */
+    if (s->status && s->frames_count == 0) {
+        ff_outlink_set_status(outlink, s->status, s->next_pts);
+        return 0;
     }
-    flush_fifo(s->fifo);
 
-    ret = write_to_fifo(s->fifo, buf);
-
-    return ret;
+    return FFERROR_NOT_READY;
 }
 
 static const AVFilterPad avfilter_vf_fps_inputs[] = {
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
-        .filter_frame = filter_frame,
     },
     { NULL }
 };
@@ -324,8 +336,7 @@
     {
         .name          = "default",
         .type          = AVMEDIA_TYPE_VIDEO,
-        .request_frame = request_frame,
-        .config_props  = config_props
+        .config_props  = config_props,
     },
     { NULL }
 };
@@ -337,6 +348,7 @@
     .uninit      = uninit,
     .priv_size   = sizeof(FPSContext),
     .priv_class  = &fps_class,
+    .activate    = activate,
     .inputs      = avfilter_vf_fps_inputs,
     .outputs     = avfilter_vf_fps_outputs,
 };

diff --git a/libavfilter/vf_framepack.c b/libavfilter/vf_framepack.c
index a5cd954..12a2996 100644
--- a/libavfilter/vf_framepack.c
+++ b/libavfilter/vf_framepack.c

@@ -324,6 +324,8 @@
             if (!stereo)
                 return AVERROR(ENOMEM);
             stereo->type = s->format;
+            stereo->view = i == LEFT ? AV_STEREO3D_VIEW_LEFT
+                                     : AV_STEREO3D_VIEW_RIGHT;
 
             // filter the frame and immediately relinquish its pointer
             ret = ff_filter_frame(outlink, s->input_views[i]);

diff --git a/libavfilter/vf_framerate.c b/libavfilter/vf_framerate.c
index dc8b05f..fb65381 100644
--- a/libavfilter/vf_framerate.c
+++ b/libavfilter/vf_framerate.c

@@ -38,47 +38,8 @@
 #include "avfilter.h"
 #include "internal.h"
 #include "video.h"
-
-#define N_SRCE 3
-
-typedef struct FrameRateContext {
-    const AVClass *class;
-    // parameters
-    AVRational dest_frame_rate;         ///< output frames per second
-    int flags;                          ///< flags affecting frame rate conversion algorithm
-    double scene_score;                 ///< score that denotes a scene change has happened
-    int interp_start;                   ///< start of range to apply linear interpolation
-    int interp_end;                     ///< end of range to apply linear interpolation
-
-    int line_size[4];                   ///< bytes of pixel data per line for each plane
-    int vsub;
-
-    int frst, next, prev, crnt, last;
-    int pending_srce_frames;            ///< how many input frames are still waiting to be processed
-    int flush;                          ///< are we flushing final frames
-    int pending_end_frame;              ///< flag indicating we are waiting to call filter_frame()
-
-    AVRational srce_time_base;          ///< timebase of source
-
-    AVRational dest_time_base;          ///< timebase of destination
-    int32_t dest_frame_num;
-    int64_t last_dest_frame_pts;        ///< pts of the last frame output
-    int64_t average_srce_pts_dest_delta;///< average input pts delta converted from input rate to output rate
-    int64_t average_dest_pts_delta;     ///< calculated average output pts delta
-
-    av_pixelutils_sad_fn sad;           ///< Sum of the absolute difference function (scene detect only)
-    double prev_mafd;                   ///< previous MAFD                           (scene detect only)
-
-    AVFrame *srce[N_SRCE];              ///< buffered source frames
-    int64_t srce_pts_dest[N_SRCE];      ///< pts for source frames scaled to output timebase
-    int64_t pts;                        ///< pts of frame we are working on
-
-    int (*blend_frames)(AVFilterContext *ctx, float interpolate,
-                        AVFrame *copy_src1, AVFrame *copy_src2);
-    int max;
-    int bitdepth;
-    AVFrame *work;
-} FrameRateContext;
+#include "filters.h"
+#include "framerate.h"
 
 #define OFFSET(x) offsetof(FrameRateContext, x)
 #define V AV_OPT_FLAG_VIDEO_PARAM
@@ -90,7 +51,7 @@
 
     {"interp_start",        "point to start linear interpolation",    OFFSET(interp_start),    AV_OPT_TYPE_INT,      {.i64=15},                 0,       255,     V|F },
     {"interp_end",          "point to end linear interpolation",      OFFSET(interp_end),      AV_OPT_TYPE_INT,      {.i64=240},                0,       255,     V|F },
-    {"scene",               "scene change level",                     OFFSET(scene_score),     AV_OPT_TYPE_DOUBLE,   {.dbl=7.0},                0,       INT_MAX, V|F },
+    {"scene",               "scene change level",                     OFFSET(scene_score),     AV_OPT_TYPE_DOUBLE,   {.dbl=8.2},                0,       INT_MAX, V|F },
 
     {"flags",               "set flags",                              OFFSET(flags),           AV_OPT_TYPE_FLAGS,    {.i64=1},                  0,       INT_MAX, V|F, "flags" },
     {"scene_change_detect", "enable scene change detection",          0,                       AV_OPT_TYPE_CONST,    {.i64=FRAMERATE_FLAG_SCD}, INT_MIN, INT_MAX, V|F, "flags" },
@@ -101,25 +62,6 @@
 
 AVFILTER_DEFINE_CLASS(framerate);
 
-static void next_source(AVFilterContext *ctx)
-{
-    FrameRateContext *s = ctx->priv;
-    int i;
-
-    ff_dlog(ctx,  "next_source()\n");
-
-    if (s->srce[s->last] && s->srce[s->last] != s->srce[s->last-1]) {
-        ff_dlog(ctx, "next_source() unlink %d\n", s->last);
-        av_frame_free(&s->srce[s->last]);
-    }
-    for (i = s->last; i > s->frst; i--) {
-        ff_dlog(ctx, "next_source() copy %d to %d\n", i - 1, i);
-        s->srce[i] = s->srce[i - 1];
-    }
-    ff_dlog(ctx, "next_source() make %d null\n", s->frst);
-    s->srce[s->frst] = NULL;
-}
-
 static av_always_inline int64_t sad_8x8_16(const uint16_t *src1, ptrdiff_t stride1,
                                            const uint16_t *src2, ptrdiff_t stride2)
 {
@@ -135,41 +77,35 @@
     return sum;
 }
 
-static double get_scene_score16(AVFilterContext *ctx, AVFrame *crnt, AVFrame *next)
+static int64_t scene_sad16(FrameRateContext *s, const uint16_t *p1, int p1_linesize, const uint16_t* p2, int p2_linesize, const int width, const int height)
 {
-    FrameRateContext *s = ctx->priv;
-    double ret = 0;
-
-    ff_dlog(ctx, "get_scene_score16()\n");
-
-    if (crnt &&
-        crnt->height == next->height &&
-        crnt->width  == next->width) {
-        int x, y;
-        int64_t sad;
-        double mafd, diff;
-        const uint16_t *p1 = (const uint16_t *)crnt->data[0];
-        const uint16_t *p2 = (const uint16_t *)next->data[0];
-        const int p1_linesize = crnt->linesize[0] / 2;
-        const int p2_linesize = next->linesize[0] / 2;
-
-        ff_dlog(ctx, "get_scene_score16() process\n");
-
-        for (sad = y = 0; y < crnt->height; y += 8) {
-            for (x = 0; x < p1_linesize; x += 8) {
-                sad += sad_8x8_16(p1 + y * p1_linesize + x,
-                                  p1_linesize,
-                                  p2 + y * p2_linesize + x,
-                                  p2_linesize);
-            }
+    int64_t sad;
+    int x, y;
+    for (sad = y = 0; y < height - 7; y += 8) {
+        for (x = 0; x < width - 7; x += 8) {
+            sad += sad_8x8_16(p1 + y * p1_linesize + x,
+                              p1_linesize,
+                              p2 + y * p2_linesize + x,
+                              p2_linesize);
         }
-        mafd = sad / (crnt->height * crnt->width * 3);
-        diff = fabs(mafd - s->prev_mafd);
-        ret  = av_clipf(FFMIN(mafd, diff), 0, 100.0);
-        s->prev_mafd = mafd;
     }
-    ff_dlog(ctx, "get_scene_score16() result is:%f\n", ret);
-    return ret;
+    return sad;
+}
+
+static int64_t scene_sad8(FrameRateContext *s, uint8_t *p1, int p1_linesize, uint8_t* p2, int p2_linesize, const int width, const int height)
+{
+    int64_t sad;
+    int x, y;
+    for (sad = y = 0; y < height - 7; y += 8) {
+        for (x = 0; x < width - 7; x += 8) {
+            sad += s->sad(p1 + y * p1_linesize + x,
+                          p1_linesize,
+                          p2 + y * p2_linesize + x,
+                          p2_linesize);
+        }
+    }
+    emms_c();
+    return sad;
 }
 
 static double get_scene_score(AVFilterContext *ctx, AVFrame *crnt, AVFrame *next)
@@ -179,358 +115,159 @@
 
     ff_dlog(ctx, "get_scene_score()\n");
 
-    if (crnt &&
-        crnt->height == next->height &&
+    if (crnt->height == next->height &&
         crnt->width  == next->width) {
-        int x, y;
         int64_t sad;
         double mafd, diff;
-        uint8_t *p1 = crnt->data[0];
-        uint8_t *p2 = next->data[0];
-        const int p1_linesize = crnt->linesize[0];
-        const int p2_linesize = next->linesize[0];
 
         ff_dlog(ctx, "get_scene_score() process\n");
+        if (s->bitdepth == 8)
+            sad = scene_sad8(s, crnt->data[0], crnt->linesize[0], next->data[0], next->linesize[0], crnt->width, crnt->height);
+        else
+            sad = scene_sad16(s, (const uint16_t*)crnt->data[0], crnt->linesize[0] / 2, (const uint16_t*)next->data[0], next->linesize[0] / 2, crnt->width, crnt->height);
 
-        for (sad = y = 0; y < crnt->height; y += 8) {
-            for (x = 0; x < p1_linesize; x += 8) {
-                sad += s->sad(p1 + y * p1_linesize + x,
-                              p1_linesize,
-                              p2 + y * p2_linesize + x,
-                              p2_linesize);
-            }
-        }
-        emms_c();
-        mafd = sad / (crnt->height * crnt->width * 3);
+        mafd = (double)sad * 100.0 / FFMAX(1, (crnt->height & ~7) * (crnt->width & ~7)) / (1 << s->bitdepth);
         diff = fabs(mafd - s->prev_mafd);
         ret  = av_clipf(FFMIN(mafd, diff), 0, 100.0);
         s->prev_mafd = mafd;
     }
-        ff_dlog(ctx, "get_scene_score() result is:%f\n", ret);
+    ff_dlog(ctx, "get_scene_score() result is:%f\n", ret);
     return ret;
 }
 
-static int blend_frames16(AVFilterContext *ctx, float interpolate,
-                          AVFrame *copy_src1, AVFrame *copy_src2)
+typedef struct ThreadData {
+    AVFrame *copy_src1, *copy_src2;
+    uint16_t src1_factor, src2_factor;
+} ThreadData;
+
+static int filter_slice(AVFilterContext *ctx, void *arg, int job, int nb_jobs)
+{
+    FrameRateContext *s = ctx->priv;
+    ThreadData *td = arg;
+    uint16_t src1_factor = td->src1_factor;
+    uint16_t src2_factor = td->src2_factor;
+    int plane;
+
+    for (plane = 0; plane < 4 && td->copy_src1->data[plane] && td->copy_src2->data[plane]; plane++) {
+        int cpy_line_width = s->line_size[plane];
+        uint8_t *cpy_src1_data = td->copy_src1->data[plane];
+        int cpy_src1_line_size = td->copy_src1->linesize[plane];
+        uint8_t *cpy_src2_data = td->copy_src2->data[plane];
+        int cpy_src2_line_size = td->copy_src2->linesize[plane];
+        int cpy_src_h = (plane > 0 && plane < 3) ? (td->copy_src1->height >> s->vsub) : (td->copy_src1->height);
+        uint8_t *cpy_dst_data = s->work->data[plane];
+        int cpy_dst_line_size = s->work->linesize[plane];
+        const int start = (cpy_src_h *  job   ) / nb_jobs;
+        const int end   = (cpy_src_h * (job+1)) / nb_jobs;
+        cpy_src1_data += start * cpy_src1_line_size;
+        cpy_src2_data += start * cpy_src2_line_size;
+        cpy_dst_data += start * cpy_dst_line_size;
+
+        s->blend(cpy_src1_data, cpy_src1_line_size,
+                 cpy_src2_data, cpy_src2_line_size,
+                 cpy_dst_data,  cpy_dst_line_size,
+                 cpy_line_width, end - start,
+                 src1_factor, src2_factor, s->blend_factor_max >> 1);
+    }
+
+    return 0;
+}
+
+static int blend_frames(AVFilterContext *ctx, int interpolate)
 {
     FrameRateContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
     double interpolate_scene_score = 0;
 
-    if ((s->flags & FRAMERATE_FLAG_SCD) && copy_src2) {
-        interpolate_scene_score = get_scene_score16(ctx, copy_src1, copy_src2);
-        ff_dlog(ctx, "blend_frames16() interpolate scene score:%f\n", interpolate_scene_score);
-    }
-    // decide if the shot-change detection allows us to blend two frames
-    if (interpolate_scene_score < s->scene_score && copy_src2) {
-        uint16_t src2_factor = fabsf(interpolate) * (1 << (s->bitdepth - 8));
-        uint16_t src1_factor = s->max - src2_factor;
-        const int half = s->max / 2;
-        const int uv = (s->max + 1) * half;
-        const int shift = s->bitdepth;
-        int plane, line, pixel;
-
-        // get work-space for output frame
-        s->work = ff_get_video_buffer(outlink, outlink->w, outlink->h);
-        if (!s->work)
-            return AVERROR(ENOMEM);
-
-        av_frame_copy_props(s->work, s->srce[s->crnt]);
-
-        ff_dlog(ctx, "blend_frames16() INTERPOLATE to create work frame\n");
-        for (plane = 0; plane < 4 && copy_src1->data[plane] && copy_src2->data[plane]; plane++) {
-            int cpy_line_width = s->line_size[plane];
-            const uint16_t *cpy_src1_data = (const uint16_t *)copy_src1->data[plane];
-            int cpy_src1_line_size = copy_src1->linesize[plane] / 2;
-            const uint16_t *cpy_src2_data = (const uint16_t *)copy_src2->data[plane];
-            int cpy_src2_line_size = copy_src2->linesize[plane] / 2;
-            int cpy_src_h = (plane > 0 && plane < 3) ? (copy_src1->height >> s->vsub) : (copy_src1->height);
-            uint16_t *cpy_dst_data = (uint16_t *)s->work->data[plane];
-            int cpy_dst_line_size = s->work->linesize[plane] / 2;
-
-            if (plane <1 || plane >2) {
-                // luma or alpha
-                for (line = 0; line < cpy_src_h; line++) {
-                    for (pixel = 0; pixel < cpy_line_width; pixel++)
-                        cpy_dst_data[pixel] = ((cpy_src1_data[pixel] * src1_factor) + (cpy_src2_data[pixel] * src2_factor) + half) >> shift;
-                    cpy_src1_data += cpy_src1_line_size;
-                    cpy_src2_data += cpy_src2_line_size;
-                    cpy_dst_data += cpy_dst_line_size;
-                }
-            } else {
-                // chroma
-                for (line = 0; line < cpy_src_h; line++) {
-                    for (pixel = 0; pixel < cpy_line_width; pixel++) {
-                        cpy_dst_data[pixel] = (((cpy_src1_data[pixel] - half) * src1_factor) + ((cpy_src2_data[pixel] - half) * src2_factor) + uv) >> shift;
-                    }
-                    cpy_src1_data += cpy_src1_line_size;
-                    cpy_src2_data += cpy_src2_line_size;
-                    cpy_dst_data += cpy_dst_line_size;
-                }
-            }
-        }
-        return 1;
-    }
-    return 0;
-}
-
-static int blend_frames8(AVFilterContext *ctx, float interpolate,
-                         AVFrame *copy_src1, AVFrame *copy_src2)
-{
-    FrameRateContext *s = ctx->priv;
-    AVFilterLink *outlink = ctx->outputs[0];
-    double interpolate_scene_score = 0;
-
-    if ((s->flags & FRAMERATE_FLAG_SCD) && copy_src2) {
-        interpolate_scene_score = get_scene_score(ctx, copy_src1, copy_src2);
-        ff_dlog(ctx, "blend_frames8() interpolate scene score:%f\n", interpolate_scene_score);
-    }
-    // decide if the shot-change detection allows us to blend two frames
-    if (interpolate_scene_score < s->scene_score && copy_src2) {
-        uint16_t src2_factor = fabsf(interpolate);
-        uint16_t src1_factor = 256 - src2_factor;
-        int plane, line, pixel;
-
-        // get work-space for output frame
-        s->work = ff_get_video_buffer(outlink, outlink->w, outlink->h);
-        if (!s->work)
-            return AVERROR(ENOMEM);
-
-        av_frame_copy_props(s->work, s->srce[s->crnt]);
-
-        ff_dlog(ctx, "blend_frames8() INTERPOLATE to create work frame\n");
-        for (plane = 0; plane < 4 && copy_src1->data[plane] && copy_src2->data[plane]; plane++) {
-            int cpy_line_width = s->line_size[plane];
-            uint8_t *cpy_src1_data = copy_src1->data[plane];
-            int cpy_src1_line_size = copy_src1->linesize[plane];
-            uint8_t *cpy_src2_data = copy_src2->data[plane];
-            int cpy_src2_line_size = copy_src2->linesize[plane];
-            int cpy_src_h = (plane > 0 && plane < 3) ? (copy_src1->height >> s->vsub) : (copy_src1->height);
-            uint8_t *cpy_dst_data = s->work->data[plane];
-            int cpy_dst_line_size = s->work->linesize[plane];
-            if (plane <1 || plane >2) {
-                // luma or alpha
-                for (line = 0; line < cpy_src_h; line++) {
-                    for (pixel = 0; pixel < cpy_line_width; pixel++) {
-                        // integer version of (src1 * src1_factor) + (src2 + src2_factor) + 0.5
-                        // 0.5 is for rounding
-                        // 128 is the integer representation of 0.5 << 8
-                        cpy_dst_data[pixel] = ((cpy_src1_data[pixel] * src1_factor) + (cpy_src2_data[pixel] * src2_factor) + 128) >> 8;
-                    }
-                    cpy_src1_data += cpy_src1_line_size;
-                    cpy_src2_data += cpy_src2_line_size;
-                    cpy_dst_data += cpy_dst_line_size;
-                }
-            } else {
-                // chroma
-                for (line = 0; line < cpy_src_h; line++) {
-                    for (pixel = 0; pixel < cpy_line_width; pixel++) {
-                        // as above
-                        // because U and V are based around 128 we have to subtract 128 from the components.
-                        // 32896 is the integer representation of 128.5 << 8
-                        cpy_dst_data[pixel] = (((cpy_src1_data[pixel] - 128) * src1_factor) + ((cpy_src2_data[pixel] - 128) * src2_factor) + 32896) >> 8;
-                    }
-                    cpy_src1_data += cpy_src1_line_size;
-                    cpy_src2_data += cpy_src2_line_size;
-                    cpy_dst_data += cpy_dst_line_size;
-                }
-            }
-        }
-        return 1;
-    }
-    return 0;
-}
-
-static int process_work_frame(AVFilterContext *ctx, int stop)
-{
-    FrameRateContext *s = ctx->priv;
-    int64_t work_next_pts;
-    AVFrame *copy_src1;
-    float interpolate;
-
-    ff_dlog(ctx, "process_work_frame()\n");
-
-    ff_dlog(ctx, "process_work_frame() pending_input_frames %d\n", s->pending_srce_frames);
-
-    if (s->srce[s->prev]) ff_dlog(ctx, "process_work_frame() srce prev pts:%"PRId64"\n", s->srce[s->prev]->pts);
-    if (s->srce[s->crnt]) ff_dlog(ctx, "process_work_frame() srce crnt pts:%"PRId64"\n", s->srce[s->crnt]->pts);
-    if (s->srce[s->next]) ff_dlog(ctx, "process_work_frame() srce next pts:%"PRId64"\n", s->srce[s->next]->pts);
-
-    if (!s->srce[s->crnt]) {
-        // the filter cannot do anything
-        ff_dlog(ctx, "process_work_frame() no current frame cached: move on to next frame, do not output a frame\n");
-        next_source(ctx);
-        return 0;
-    }
-
-    work_next_pts = s->pts + s->average_dest_pts_delta;
-
-    ff_dlog(ctx, "process_work_frame() work crnt pts:%"PRId64"\n", s->pts);
-    ff_dlog(ctx, "process_work_frame() work next pts:%"PRId64"\n", work_next_pts);
-    if (s->srce[s->prev])
-        ff_dlog(ctx, "process_work_frame() srce prev pts:%"PRId64" at dest time base:%u/%u\n",
-            s->srce_pts_dest[s->prev], s->dest_time_base.num, s->dest_time_base.den);
-    if (s->srce[s->crnt])
-        ff_dlog(ctx, "process_work_frame() srce crnt pts:%"PRId64" at dest time base:%u/%u\n",
-            s->srce_pts_dest[s->crnt], s->dest_time_base.num, s->dest_time_base.den);
-    if (s->srce[s->next])
-        ff_dlog(ctx, "process_work_frame() srce next pts:%"PRId64" at dest time base:%u/%u\n",
-            s->srce_pts_dest[s->next], s->dest_time_base.num, s->dest_time_base.den);
-
-    av_assert0(s->srce[s->next]);
-
-    // should filter be skipping input frame (output frame rate is lower than input frame rate)
-    if (!s->flush && s->pts >= s->srce_pts_dest[s->next]) {
-        ff_dlog(ctx, "process_work_frame() work crnt pts >= srce next pts: SKIP FRAME, move on to next frame, do not output a frame\n");
-        next_source(ctx);
-        s->pending_srce_frames--;
-        return 0;
-    }
-
-    // calculate interpolation
-    interpolate = ((s->pts - s->srce_pts_dest[s->crnt]) * 256.0 / s->average_srce_pts_dest_delta);
-    ff_dlog(ctx, "process_work_frame() interpolate:%f/256\n", interpolate);
-    copy_src1 = s->srce[s->crnt];
-    if (interpolate > s->interp_end) {
-        ff_dlog(ctx, "process_work_frame() source is:NEXT\n");
-        copy_src1 = s->srce[s->next];
-    }
-    if (s->srce[s->prev] && interpolate < -s->interp_end) {
-        ff_dlog(ctx, "process_work_frame() source is:PREV\n");
-        copy_src1 = s->srce[s->prev];
-    }
-
-    // decide whether to blend two frames
-    if ((interpolate >= s->interp_start && interpolate <= s->interp_end) || (interpolate <= -s->interp_start && interpolate >= -s->interp_end)) {
-        AVFrame *copy_src2;
-
-        if (interpolate > 0) {
-            ff_dlog(ctx, "process_work_frame() interpolate source is:NEXT\n");
-            copy_src2 = s->srce[s->next];
-        } else {
-            ff_dlog(ctx, "process_work_frame() interpolate source is:PREV\n");
-            copy_src2 = s->srce[s->prev];
-        }
-        if (s->blend_frames(ctx, interpolate, copy_src1, copy_src2))
-            goto copy_done;
+    if ((s->flags & FRAMERATE_FLAG_SCD)) {
+        if (s->score >= 0.0)
+            interpolate_scene_score = s->score;
         else
-            ff_dlog(ctx, "process_work_frame() CUT - DON'T INTERPOLATE\n");
+            interpolate_scene_score = s->score = get_scene_score(ctx, s->f0, s->f1);
+        ff_dlog(ctx, "blend_frames() interpolate scene score:%f\n", interpolate_scene_score);
+    }
+    // decide if the shot-change detection allows us to blend two frames
+    if (interpolate_scene_score < s->scene_score) {
+        ThreadData td;
+        td.copy_src1 = s->f0;
+        td.copy_src2 = s->f1;
+        td.src2_factor = interpolate;
+        td.src1_factor = s->blend_factor_max - td.src2_factor;
+
+        // get work-space for output frame
+        s->work = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!s->work)
+            return AVERROR(ENOMEM);
+
+        av_frame_copy_props(s->work, s->f0);
+
+        ff_dlog(ctx, "blend_frames() INTERPOLATE to create work frame\n");
+        ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(FFMAX(1, outlink->h >> 2), ff_filter_get_nb_threads(ctx)));
+        return 1;
+    }
+    return 0;
+}
+
+static int process_work_frame(AVFilterContext *ctx)
+{
+    FrameRateContext *s = ctx->priv;
+    int64_t work_pts;
+    int64_t interpolate, interpolate8;
+    int ret;
+
+    if (!s->f1)
+        return 0;
+    if (!s->f0 && !s->flush)
+        return 0;
+
+    work_pts = s->start_pts + av_rescale_q(s->n, av_inv_q(s->dest_frame_rate), s->dest_time_base);
+
+    if (work_pts >= s->pts1 && !s->flush)
+        return 0;
+
+    if (!s->f0) {
+        s->work = av_frame_clone(s->f1);
+    } else {
+        if (work_pts >= s->pts1 + s->delta && s->flush)
+            return 0;
+
+        interpolate = av_rescale(work_pts - s->pts0, s->blend_factor_max, s->delta);
+        interpolate8 = av_rescale(work_pts - s->pts0, 256, s->delta);
+        ff_dlog(ctx, "process_work_frame() interpolate: %"PRId64"/256\n", interpolate8);
+        if (interpolate >= s->blend_factor_max || interpolate8 > s->interp_end) {
+            s->work = av_frame_clone(s->f1);
+        } else if (interpolate <= 0 || interpolate8 < s->interp_start) {
+            s->work = av_frame_clone(s->f0);
+        } else {
+            ret = blend_frames(ctx, interpolate);
+            if (ret < 0)
+                return ret;
+            if (ret == 0)
+                s->work = av_frame_clone(interpolate > (s->blend_factor_max >> 1) ? s->f1 : s->f0);
+        }
     }
 
-    ff_dlog(ctx, "process_work_frame() COPY to the work frame\n");
-    // copy the frame we decided is our base source
-    s->work = av_frame_clone(copy_src1);
     if (!s->work)
         return AVERROR(ENOMEM);
 
-copy_done:
-    s->work->pts = s->pts;
-
-    // should filter be re-using input frame (output frame rate is higher than input frame rate)
-    if (!s->flush && (work_next_pts + s->average_dest_pts_delta) < (s->srce_pts_dest[s->crnt] + s->average_srce_pts_dest_delta)) {
-        ff_dlog(ctx, "process_work_frame() REPEAT FRAME\n");
-    } else {
-        ff_dlog(ctx, "process_work_frame() CONSUME FRAME, move to next frame\n");
-        s->pending_srce_frames--;
-        next_source(ctx);
-    }
-    ff_dlog(ctx, "process_work_frame() output a frame\n");
-    s->dest_frame_num++;
-    if (stop)
-        s->pending_end_frame = 0;
-    s->last_dest_frame_pts = s->work->pts;
+    s->work->pts = work_pts;
+    s->n++;
 
     return 1;
 }
 
-static void set_srce_frame_dest_pts(AVFilterContext *ctx)
-{
-    FrameRateContext *s = ctx->priv;
-
-    ff_dlog(ctx, "set_srce_frame_output_pts()\n");
-
-    // scale the input pts from the timebase difference between input and output
-    if (s->srce[s->prev])
-        s->srce_pts_dest[s->prev] = av_rescale_q(s->srce[s->prev]->pts, s->srce_time_base, s->dest_time_base);
-    if (s->srce[s->crnt])
-        s->srce_pts_dest[s->crnt] = av_rescale_q(s->srce[s->crnt]->pts, s->srce_time_base, s->dest_time_base);
-    if (s->srce[s->next])
-        s->srce_pts_dest[s->next] = av_rescale_q(s->srce[s->next]->pts, s->srce_time_base, s->dest_time_base);
-}
-
-static void set_work_frame_pts(AVFilterContext *ctx)
-{
-    FrameRateContext *s = ctx->priv;
-    int64_t pts, average_srce_pts_delta = 0;
-
-    ff_dlog(ctx, "set_work_frame_pts()\n");
-
-    av_assert0(s->srce[s->next]);
-    av_assert0(s->srce[s->crnt]);
-
-    ff_dlog(ctx, "set_work_frame_pts() srce crnt pts:%"PRId64"\n", s->srce[s->crnt]->pts);
-    ff_dlog(ctx, "set_work_frame_pts() srce next pts:%"PRId64"\n", s->srce[s->next]->pts);
-    if (s->srce[s->prev])
-        ff_dlog(ctx, "set_work_frame_pts() srce prev pts:%"PRId64"\n", s->srce[s->prev]->pts);
-
-    average_srce_pts_delta = s->average_srce_pts_dest_delta;
-    ff_dlog(ctx, "set_work_frame_pts() initial average srce pts:%"PRId64"\n", average_srce_pts_delta);
-
-    set_srce_frame_dest_pts(ctx);
-
-    // calculate the PTS delta
-    if ((pts = (s->srce_pts_dest[s->next] - s->srce_pts_dest[s->crnt]))) {
-        average_srce_pts_delta = average_srce_pts_delta?((average_srce_pts_delta+pts)>>1):pts;
-    } else if (s->srce[s->prev] && (pts = (s->srce_pts_dest[s->crnt] - s->srce_pts_dest[s->prev]))) {
-        average_srce_pts_delta = average_srce_pts_delta?((average_srce_pts_delta+pts)>>1):pts;
-    }
-
-    s->average_srce_pts_dest_delta = average_srce_pts_delta;
-    ff_dlog(ctx, "set_work_frame_pts() average srce pts:%"PRId64"\n", average_srce_pts_delta);
-    ff_dlog(ctx, "set_work_frame_pts() average srce pts:%"PRId64" at dest time base:%u/%u\n",
-            s->average_srce_pts_dest_delta, s->dest_time_base.num, s->dest_time_base.den);
-
-    if (ctx->inputs[0] && !s->average_dest_pts_delta) {
-        int64_t d = av_q2d(av_inv_q(av_mul_q(s->dest_time_base, s->dest_frame_rate)));
-        s->average_dest_pts_delta = d;
-        ff_dlog(ctx, "set_work_frame_pts() average dest pts delta:%"PRId64"\n", s->average_dest_pts_delta);
-    }
-
-    if (!s->dest_frame_num) {
-        s->pts = s->last_dest_frame_pts = s->srce_pts_dest[s->crnt];
-    } else {
-        s->pts = s->last_dest_frame_pts + s->average_dest_pts_delta;
-    }
-
-    ff_dlog(ctx, "set_work_frame_pts() calculated pts:%"PRId64" at dest time base:%u/%u\n",
-            s->pts, s->dest_time_base.num, s->dest_time_base.den);
-}
-
 static av_cold int init(AVFilterContext *ctx)
 {
     FrameRateContext *s = ctx->priv;
-
-    s->dest_frame_num = 0;
-
-    s->crnt = (N_SRCE)>>1;
-    s->last = N_SRCE - 1;
-
-    s->next = s->crnt - 1;
-    s->prev = s->crnt + 1;
-
+    s->start_pts = AV_NOPTS_VALUE;
     return 0;
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
     FrameRateContext *s = ctx->priv;
-    int i;
-
-    for (i = s->frst; i < s->last; i++) {
-        if (s->srce[i] && (s->srce[i] != s->srce[i + 1]))
-            av_frame_free(&s->srce[i]);
-    }
-    av_frame_free(&s->srce[s->last]);
+    av_frame_free(&s->f0);
+    av_frame_free(&s->f1);
 }
 
 static int query_formats(AVFilterContext *ctx)
@@ -554,6 +291,50 @@
     return ff_set_common_formats(ctx, fmts_list);
 }
 
+static void blend_frames_c(BLEND_FUNC_PARAMS)
+{
+    int line, pixel;
+    for (line = 0; line < height; line++) {
+        for (pixel = 0; pixel < width; pixel++)
+            dst[pixel] = ((src1[pixel] * factor1) + (src2[pixel] * factor2) + half) >> BLEND_FACTOR_DEPTH8;
+        src1 += src1_linesize;
+        src2 += src2_linesize;
+        dst  += dst_linesize;
+    }
+}
+
+static void blend_frames16_c(BLEND_FUNC_PARAMS)
+{
+    int line, pixel;
+    uint16_t *dstw = (uint16_t *)dst;
+    uint16_t *src1w = (uint16_t *)src1;
+    uint16_t *src2w = (uint16_t *)src2;
+    width /= 2;
+    src1_linesize /= 2;
+    src2_linesize /= 2;
+    dst_linesize /= 2;
+    for (line = 0; line < height; line++) {
+        for (pixel = 0; pixel < width; pixel++)
+            dstw[pixel] = ((src1w[pixel] * factor1) + (src2w[pixel] * factor2) + half) >> BLEND_FACTOR_DEPTH16;
+        src1w += src1_linesize;
+        src2w += src2_linesize;
+        dstw  += dst_linesize;
+    }
+}
+
+void ff_framerate_init(FrameRateContext *s)
+{
+    if (s->bitdepth == 8) {
+        s->blend_factor_max = 1 << BLEND_FACTOR_DEPTH8;
+        s->blend = blend_frames_c;
+    } else {
+        s->blend_factor_max = 1 << BLEND_FACTOR_DEPTH16;
+        s->blend = blend_frames16_c;
+    }
+    if (ARCH_X86)
+        ff_framerate_init_x86(s);
+}
+
 static int config_input(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->dst;
@@ -575,42 +356,86 @@
 
     s->srce_time_base = inlink->time_base;
 
-    if (s->bitdepth == 8)
-        s->blend_frames = blend_frames8;
-    else
-        s->blend_frames = blend_frames16;
-    s->max = 1 << (s->bitdepth);
+    ff_framerate_init(s);
 
     return 0;
 }
 
-static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
+static int activate(AVFilterContext *ctx)
 {
-    int ret;
-    AVFilterContext *ctx = inlink->dst;
+    int ret, status;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
     FrameRateContext *s = ctx->priv;
+    AVFrame *inpicref;
+    int64_t pts;
 
-    // we have one new frame
-    s->pending_srce_frames++;
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
 
-    if (inpicref->interlaced_frame)
-        av_log(ctx, AV_LOG_WARNING, "Interlaced frame found - the output will not be correct.\n");
-
-    // store the pointer to the new frame
-    av_frame_free(&s->srce[s->frst]);
-    s->srce[s->frst] = inpicref;
-
-    if (!s->pending_end_frame && s->srce[s->crnt]) {
-        set_work_frame_pts(ctx);
-        s->pending_end_frame = 1;
-    } else {
-        set_srce_frame_dest_pts(ctx);
-    }
-
-    ret = process_work_frame(ctx, 1);
+retry:
+    ret = process_work_frame(ctx);
     if (ret < 0)
         return ret;
-    return ret ? ff_filter_frame(ctx->outputs[0], s->work) : 0;
+    else if (ret == 1)
+        return ff_filter_frame(outlink, s->work);
+
+    ret = ff_inlink_consume_frame(inlink, &inpicref);
+    if (ret < 0)
+        return ret;
+
+    if (inpicref) {
+        if (inpicref->interlaced_frame)
+            av_log(ctx, AV_LOG_WARNING, "Interlaced frame found - the output will not be correct.\n");
+
+        if (inpicref->pts == AV_NOPTS_VALUE) {
+            av_log(ctx, AV_LOG_WARNING, "Ignoring frame without PTS.\n");
+            av_frame_free(&inpicref);
+        }
+    }
+
+    if (inpicref) {
+        pts = av_rescale_q(inpicref->pts, s->srce_time_base, s->dest_time_base);
+
+        if (s->f1 && pts == s->pts1) {
+            av_log(ctx, AV_LOG_WARNING, "Ignoring frame with same PTS.\n");
+            av_frame_free(&inpicref);
+        }
+    }
+
+    if (inpicref) {
+        av_frame_free(&s->f0);
+        s->f0 = s->f1;
+        s->pts0 = s->pts1;
+        s->f1 = inpicref;
+        s->pts1 = pts;
+        s->delta = s->pts1 - s->pts0;
+        s->score = -1.0;
+
+        if (s->delta < 0) {
+            av_log(ctx, AV_LOG_WARNING, "PTS discontinuity.\n");
+            s->start_pts = s->pts1;
+            s->n = 0;
+            av_frame_free(&s->f0);
+        }
+
+        if (s->start_pts == AV_NOPTS_VALUE)
+            s->start_pts = s->pts1;
+
+        goto retry;
+    }
+
+    if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
+        if (!s->flush) {
+            s->flush = 1;
+            goto retry;
+        }
+        ff_outlink_set_status(outlink, status, pts);
+        return 0;
+    }
+
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
 }
 
 static int config_output(AVFilterLink *outlink)
@@ -658,62 +483,11 @@
     return 0;
 }
 
-static int request_frame(AVFilterLink *outlink)
-{
-    AVFilterContext *ctx = outlink->src;
-    FrameRateContext *s = ctx->priv;
-    int ret, i;
-
-    ff_dlog(ctx, "request_frame()\n");
-
-    // if there is no "next" frame AND we are not in flush then get one from our input filter
-    if (!s->srce[s->frst] && !s->flush)
-        goto request;
-
-    ff_dlog(ctx, "request_frame() REPEAT or FLUSH\n");
-
-    if (s->pending_srce_frames <= 0) {
-        ff_dlog(ctx, "request_frame() nothing else to do, return:EOF\n");
-        return AVERROR_EOF;
-    }
-
-    // otherwise, make brand-new frame and pass to our output filter
-    ff_dlog(ctx, "request_frame() FLUSH\n");
-
-    // back fill at end of file when source has no more frames
-    for (i = s->last; i > s->frst; i--) {
-        if (!s->srce[i - 1] && s->srce[i]) {
-            ff_dlog(ctx, "request_frame() copy:%d to:%d\n", i, i - 1);
-            s->srce[i - 1] = s->srce[i];
-        }
-    }
-
-    set_work_frame_pts(ctx);
-    ret = process_work_frame(ctx, 0);
-    if (ret < 0)
-        return ret;
-    if (ret)
-        return ff_filter_frame(ctx->outputs[0], s->work);
-
-request:
-    ff_dlog(ctx, "request_frame() call source's request_frame()\n");
-    ret = ff_request_frame(ctx->inputs[0]);
-    if (ret < 0 && (ret != AVERROR_EOF)) {
-        ff_dlog(ctx, "request_frame() source's request_frame() returned error:%d\n", ret);
-        return ret;
-    } else if (ret == AVERROR_EOF) {
-        s->flush = 1;
-    }
-    ff_dlog(ctx, "request_frame() source's request_frame() returned:%d\n", ret);
-    return 0;
-}
-
 static const AVFilterPad framerate_inputs[] = {
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
         .config_props = config_input,
-        .filter_frame = filter_frame,
     },
     { NULL }
 };
@@ -722,7 +496,6 @@
     {
         .name          = "default",
         .type          = AVMEDIA_TYPE_VIDEO,
-        .request_frame = request_frame,
         .config_props  = config_output,
     },
     { NULL }
@@ -738,4 +511,6 @@
     .query_formats = query_formats,
     .inputs        = framerate_inputs,
     .outputs       = framerate_outputs,
+    .flags         = AVFILTER_FLAG_SLICE_THREADS,
+    .activate      = activate,
 };

diff --git a/libavfilter/vf_frei0r.c b/libavfilter/vf_frei0r.c
index 8aeac08..c775ed1 100644
--- a/libavfilter/vf_frei0r.c
+++ b/libavfilter/vf_frei0r.c

@@ -93,6 +93,7 @@
         double d;
         f0r_param_color_t col;
         f0r_param_position_t pos;
+        f0r_param_string *str;
     } val;
     char *tail;
     uint8_t rgba[4];
@@ -124,6 +125,10 @@
         if (sscanf(param, "%lf/%lf", &val.pos.x, &val.pos.y) != 2)
             goto fail;
         break;
+
+    case F0R_PARAM_STRING:
+        val.str = param;
+        break;
     }
 
     s->set_param_value(s->instance, &val, index);

diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c
index fd901e2..f77a3ff 100644
--- a/libavfilter/vf_gblur.c
+++ b/libavfilter/vf_gblur.c

@@ -202,7 +202,7 @@
         AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
         AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
         AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_NONE
     };
 

diff --git a/libavfilter/vf_hflip.c b/libavfilter/vf_hflip.c
index cf20c19..b77afc7 100644
--- a/libavfilter/vf_hflip.c
+++ b/libavfilter/vf_hflip.c

@@ -29,6 +29,7 @@
 #include "libavutil/opt.h"
 #include "avfilter.h"
 #include "formats.h"
+#include "hflip.h"
 #include "internal.h"
 #include "video.h"
 #include "libavutil/pixdesc.h"
@@ -36,13 +37,6 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/imgutils.h"
 
-typedef struct FlipContext {
-    const AVClass *class;
-    int max_step[4];    ///< max pixel step for each plane, expressed as a number of bytes
-    int planewidth[4];  ///< width of each plane
-    int planeheight[4]; ///< height of each plane
-} FlipContext;
-
 static const AVOption hflip_options[] = {
     { NULL }
 };
@@ -67,12 +61,77 @@
     return ff_set_common_formats(ctx, pix_fmts);
 }
 
+static void hflip_byte_c(const uint8_t *src, uint8_t *dst, int w)
+{
+    int j;
+
+    for (j = 0; j < w; j++)
+        dst[j] = src[-j];
+}
+
+static void hflip_short_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+    const uint16_t *src = (const uint16_t *)ssrc;
+    uint16_t *dst = (uint16_t *)ddst;
+    int j;
+
+    for (j = 0; j < w; j++)
+        dst[j] = src[-j];
+}
+
+static void hflip_dword_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+    const uint32_t *src = (const uint32_t *)ssrc;
+    uint32_t *dst = (uint32_t *)ddst;
+    int j;
+
+    for (j = 0; j < w; j++)
+        dst[j] = src[-j];
+}
+
+static void hflip_b24_c(const uint8_t *src, uint8_t *dst, int w)
+{
+    const uint8_t *in  = src;
+    uint8_t *out = dst;
+    int j;
+
+    for (j = 0; j < w; j++, out += 3, in -= 3) {
+        int32_t v = AV_RB24(in);
+
+        AV_WB24(out, v);
+    }
+}
+
+static void hflip_b48_c(const uint8_t *src, uint8_t *dst, int w)
+{
+    const uint8_t *in  = src;
+    uint8_t *out = dst;
+    int j;
+
+    for (j = 0; j < w; j++, out += 6, in -= 6) {
+        int64_t v = AV_RB48(in);
+
+        AV_WB48(out, v);
+    }
+}
+
+static void hflip_qword_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+    const uint64_t *src = (const uint64_t *)ssrc;
+    uint64_t *dst = (uint64_t *)ddst;
+    int j;
+
+    for (j = 0; j < w; j++)
+        dst[j] = src[-j];
+}
+
 static int config_props(AVFilterLink *inlink)
 {
     FlipContext *s = inlink->dst->priv;
     const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
     const int hsub = pix_desc->log2_chroma_w;
     const int vsub = pix_desc->log2_chroma_h;
+    int nb_planes;
 
     av_image_fill_max_pixsteps(s->max_step, NULL, pix_desc);
     s->planewidth[0]  = s->planewidth[3]  = inlink->w;
@@ -80,6 +139,30 @@
     s->planeheight[0] = s->planeheight[3] = inlink->h;
     s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, vsub);
 
+    nb_planes = av_pix_fmt_count_planes(inlink->format);
+
+    return ff_hflip_init(s, s->max_step, nb_planes);
+}
+
+int ff_hflip_init(FlipContext *s, int step[4], int nb_planes)
+{
+    int i;
+
+    for (i = 0; i < nb_planes; i++) {
+        switch (step[i]) {
+        case 1: s->flip_line[i] = hflip_byte_c;  break;
+        case 2: s->flip_line[i] = hflip_short_c; break;
+        case 3: s->flip_line[i] = hflip_b24_c;   break;
+        case 4: s->flip_line[i] = hflip_dword_c; break;
+        case 6: s->flip_line[i] = hflip_b48_c;   break;
+        case 8: s->flip_line[i] = hflip_qword_c; break;
+        default:
+            return AVERROR_BUG;
+        }
+    }
+    if (ARCH_X86)
+        ff_hflip_init_x86(s, step, nb_planes);
+
     return 0;
 }
 
@@ -94,7 +177,7 @@
     AVFrame *in = td->in;
     AVFrame *out = td->out;
     uint8_t *inrow, *outrow;
-    int i, j, plane, step;
+    int i, plane, step;
 
     for (plane = 0; plane < 4 && in->data[plane] && in->linesize[plane]; plane++) {
         const int width  = s->planewidth[plane];
@@ -107,45 +190,7 @@
         outrow = out->data[plane] + start * out->linesize[plane];
         inrow  = in ->data[plane] + start * in->linesize[plane] + (width - 1) * step;
         for (i = start; i < end; i++) {
-            switch (step) {
-            case 1:
-                for (j = 0; j < width; j++)
-                    outrow[j] = inrow[-j];
-            break;
-
-            case 2:
-            {
-                uint16_t *outrow16 = (uint16_t *)outrow;
-                uint16_t * inrow16 = (uint16_t *) inrow;
-                for (j = 0; j < width; j++)
-                    outrow16[j] = inrow16[-j];
-            }
-            break;
-
-            case 3:
-            {
-                uint8_t *in  =  inrow;
-                uint8_t *out = outrow;
-                for (j = 0; j < width; j++, out += 3, in -= 3) {
-                    int32_t v = AV_RB24(in);
-                    AV_WB24(out, v);
-                }
-            }
-            break;
-
-            case 4:
-            {
-                uint32_t *outrow32 = (uint32_t *)outrow;
-                uint32_t * inrow32 = (uint32_t *) inrow;
-                for (j = 0; j < width; j++)
-                    outrow32[j] = inrow32[-j];
-            }
-            break;
-
-            default:
-                for (j = 0; j < width; j++)
-                    memcpy(outrow + j*step, inrow - j*step, step);
-            }
+            s->flip_line[plane](inrow, outrow, width);
 
             inrow  += in ->linesize[plane];
             outrow += out->linesize[plane];

diff --git a/libavfilter/vf_hue.c b/libavfilter/vf_hue.c
index 45a5a1a..323333b 100644
--- a/libavfilter/vf_hue.c
+++ b/libavfilter/vf_hue.c

@@ -80,6 +80,9 @@
     uint8_t  lut_l[256];
     uint8_t  lut_u[256][256];
     uint8_t  lut_v[256][256];
+    uint16_t  lut_l16[65536];
+    uint16_t  lut_u10[1024][1024];
+    uint16_t  lut_v10[1024][1024];
 } HueContext;
 
 #define OFFSET(x) offsetof(HueContext, x)
@@ -117,6 +120,9 @@
     for (i = 0; i < 256; i++) {
         h->lut_l[i] = av_clip_uint8(i + b * 25.5);
     }
+    for (i = 0; i < 65536; i++) {
+        h->lut_l16[i] = av_clip_uintp2(i + b * 102.4, 10);
+    }
 }
 
 static inline void create_chrominance_lut(HueContext *h, const int32_t c,
@@ -148,6 +154,25 @@
             h->lut_v[i][j] = av_clip_uint8(new_v);
         }
     }
+    for (i = 0; i < 1024; i++) {
+        for (j = 0; j < 1024; j++) {
+            u = i - 512;
+            v = j - 512;
+            /*
+             * Apply the rotation of the vector : (c * u) - (s * v)
+             *                                    (s * u) + (c * v)
+             * De-normalize the components (without forgetting to scale 512
+             * by << 16)
+             * Finally scale back the result by >> 16
+             */
+            new_u = ((c * u) - (s * v) + (1 << 15) + (512 << 16)) >> 16;
+            new_v = ((s * u) + (c * v) + (1 << 15) + (512 << 16)) >> 16;
+
+            /* Prevent a potential overflow */
+            h->lut_u10[i][j] = av_clip_uintp2(new_u, 10);
+            h->lut_v10[i][j] = av_clip_uintp2(new_v, 10);
+        }
+    }
 }
 
 static int set_expr(AVExpr **pexpr_ptr, char **expr_ptr,
@@ -231,6 +256,11 @@
         AV_PIX_FMT_YUV410P,      AV_PIX_FMT_YUV440P,
         AV_PIX_FMT_YUVA444P,     AV_PIX_FMT_YUVA422P,
         AV_PIX_FMT_YUVA420P,
+        AV_PIX_FMT_YUV444P10,      AV_PIX_FMT_YUV422P10,
+        AV_PIX_FMT_YUV420P10,
+        AV_PIX_FMT_YUV440P10,
+        AV_PIX_FMT_YUVA444P10,     AV_PIX_FMT_YUVA422P10,
+        AV_PIX_FMT_YUVA420P10,
         AV_PIX_FMT_NONE
     };
     AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
@@ -271,6 +301,22 @@
     }
 }
 
+static void apply_luma_lut10(HueContext *s,
+                             uint16_t *ldst, const int dst_linesize,
+                             uint16_t *lsrc, const int src_linesize,
+                             int w, int h)
+{
+    int i;
+
+    while (h--) {
+        for (i = 0; i < w; i++)
+            ldst[i] = s->lut_l16[lsrc[i]];
+
+        lsrc += src_linesize;
+        ldst += dst_linesize;
+    }
+}
+
 static void apply_lut(HueContext *s,
                       uint8_t *udst, uint8_t *vdst, const int dst_linesize,
                       uint8_t *usrc, uint8_t *vsrc, const int src_linesize,
@@ -294,6 +340,29 @@
     }
 }
 
+static void apply_lut10(HueContext *s,
+                      uint16_t *udst, uint16_t *vdst, const int dst_linesize,
+                      uint16_t *usrc, uint16_t *vsrc, const int src_linesize,
+                      int w, int h)
+{
+    int i;
+
+    while (h--) {
+        for (i = 0; i < w; i++) {
+            const int u = av_clip_uintp2(usrc[i], 10);
+            const int v = av_clip_uintp2(vsrc[i], 10);
+
+            udst[i] = s->lut_u10[u][v];
+            vdst[i] = s->lut_v10[u][v];
+        }
+
+        usrc += src_linesize;
+        vsrc += src_linesize;
+        udst += dst_linesize;
+        vdst += dst_linesize;
+    }
+}
+
 #define TS2D(ts) ((ts) == AV_NOPTS_VALUE ? NAN : (double)(ts))
 #define TS2T(ts, tb) ((ts) == AV_NOPTS_VALUE ? NAN : (double)(ts) * av_q2d(tb))
 
@@ -305,6 +374,8 @@
     const int32_t old_hue_sin = hue->hue_sin, old_hue_cos = hue->hue_cos;
     const float old_brightness = hue->brightness;
     int direct = 0;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    const int bps = desc->comp[0].depth > 8 ? 2 : 1;
 
     if (av_frame_is_writable(inpic)) {
         direct = 1;
@@ -367,21 +438,31 @@
     if (!direct) {
         if (!hue->brightness)
             av_image_copy_plane(outpic->data[0], outpic->linesize[0],
-                                inpic->data[0],  inpic->linesize[0],
-                                inlink->w, inlink->h);
+                                inpic->data[0],   inpic->linesize[0],
+                                inlink->w * bps, inlink->h);
         if (inpic->data[3])
             av_image_copy_plane(outpic->data[3], outpic->linesize[3],
-                                inpic->data[3],  inpic->linesize[3],
-                                inlink->w, inlink->h);
+                                inpic->data[3],   inpic->linesize[3],
+                                inlink->w * bps, inlink->h);
     }
 
-    apply_lut(hue, outpic->data[1], outpic->data[2], outpic->linesize[1],
-              inpic->data[1],  inpic->data[2],  inpic->linesize[1],
-              AV_CEIL_RSHIFT(inlink->w, hue->hsub),
-              AV_CEIL_RSHIFT(inlink->h, hue->vsub));
-    if (hue->brightness)
-        apply_luma_lut(hue, outpic->data[0], outpic->linesize[0],
-                       inpic->data[0], inpic->linesize[0], inlink->w, inlink->h);
+    if (bps > 1) {
+        apply_lut10(hue, (uint16_t*)outpic->data[1], (uint16_t*)outpic->data[2], outpic->linesize[1]/2,
+                         (uint16_t*) inpic->data[1], (uint16_t*) inpic->data[2],  inpic->linesize[1]/2,
+                    AV_CEIL_RSHIFT(inlink->w, hue->hsub),
+                    AV_CEIL_RSHIFT(inlink->h, hue->vsub));
+        if (hue->brightness)
+            apply_luma_lut10(hue, (uint16_t*)outpic->data[0], outpic->linesize[0]/2,
+                                  (uint16_t*) inpic->data[0],  inpic->linesize[0]/2, inlink->w, inlink->h);
+    } else {
+        apply_lut(hue, outpic->data[1], outpic->data[2], outpic->linesize[1],
+                       inpic->data[1],   inpic->data[2],  inpic->linesize[1],
+                  AV_CEIL_RSHIFT(inlink->w, hue->hsub),
+                  AV_CEIL_RSHIFT(inlink->h, hue->vsub));
+        if (hue->brightness)
+            apply_luma_lut(hue, outpic->data[0], outpic->linesize[0],
+                                inpic->data[0],  inpic->linesize[0], inlink->w, inlink->h);
+    }
 
     if (!direct)
         av_frame_free(&inpic);

diff --git a/libavfilter/vf_hwmap.c b/libavfilter/vf_hwmap.c
index 8277241..290559a 100644
--- a/libavfilter/vf_hwmap.c
+++ b/libavfilter/vf_hwmap.c

@@ -114,7 +114,8 @@
             err = av_hwframe_ctx_create_derived(&ctx->hwframes_ref,
                                                 outlink->format,
                                                 device,
-                                                inlink->hw_frames_ctx, 0);
+                                                inlink->hw_frames_ctx,
+                                                ctx->mode);
             if (err < 0) {
                 av_log(avctx, AV_LOG_ERROR, "Failed to create derived "
                        "frames context: %d.\n", err);
@@ -142,7 +143,9 @@
             frames->sw_format = hwfc->sw_format;
             frames->width     = hwfc->width;
             frames->height    = hwfc->height;
-            frames->initial_pool_size = 64;
+
+            if (avctx->extra_hw_frames >= 0)
+                frames->initial_pool_size = 2 + avctx->extra_hw_frames;
 
             err = av_hwframe_ctx_init(ctx->hwframes_ref);
             if (err < 0) {
@@ -222,6 +225,9 @@
         hwfc->width     = inlink->w;
         hwfc->height    = inlink->h;
 
+        if (avctx->extra_hw_frames >= 0)
+            hwfc->initial_pool_size = 2 + avctx->extra_hw_frames;
+
         err = av_hwframe_ctx_init(ctx->hwframes_ref);
         if (err < 0) {
             av_log(avctx, AV_LOG_ERROR, "Failed to create frame "

diff --git a/libavfilter/vf_hwupload.c b/libavfilter/vf_hwupload.c
index 157686b..50bc7e1 100644
--- a/libavfilter/vf_hwupload.c
+++ b/libavfilter/vf_hwupload.c

@@ -131,6 +131,9 @@
     ctx->hwframes->width     = inlink->w;
     ctx->hwframes->height    = inlink->h;
 
+    if (avctx->extra_hw_frames >= 0)
+        ctx->hwframes->initial_pool_size = 2 + avctx->extra_hw_frames;
+
     err = av_hwframe_ctx_init(ctx->hwframes_ref);
     if (err < 0)
         goto fail;

diff --git a/libavfilter/vf_hwupload_cuda.c b/libavfilter/vf_hwupload_cuda.c
index 063f028..4d83e6c 100644
--- a/libavfilter/vf_hwupload_cuda.c
+++ b/libavfilter/vf_hwupload_cuda.c

@@ -59,6 +59,7 @@
     static const enum AVPixelFormat input_pix_fmts[] = {
         AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P,
         AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32,
         AV_PIX_FMT_NONE,
     };
     static const enum AVPixelFormat output_pix_fmts[] = {

diff --git a/libavfilter/vf_hysteresis.c b/libavfilter/vf_hysteresis.c
index a788e1b..ecb917f 100644
--- a/libavfilter/vf_hysteresis.c
+++ b/libavfilter/vf_hysteresis.c

@@ -33,6 +33,7 @@
 
 typedef struct HysteresisContext {
     const AVClass *class;
+    FFFrameSync fs;
 
     int planes;
     int threshold;
@@ -40,7 +41,6 @@
     int width[4], height[4];
     int nb_planes;
     int depth;
-    FFFrameSync fs;
 
     uint8_t *map;
     uint32_t *xy;
@@ -58,8 +58,6 @@
     { NULL }
 };
 
-AVFILTER_DEFINE_CLASS(hysteresis);
-
 static int query_formats(AVFilterContext *ctx)
 {
     static const enum AVPixelFormat pix_fmts[] = {
@@ -79,7 +77,7 @@
         AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
         AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
         AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_NONE
     };
 
@@ -301,20 +299,13 @@
         av_log(ctx, AV_LOG_ERROR, "inputs must be of same pixel format\n");
         return AVERROR(EINVAL);
     }
-    if (base->w                       != alt->w ||
-        base->h                       != alt->h ||
-        base->sample_aspect_ratio.num != alt->sample_aspect_ratio.num ||
-        base->sample_aspect_ratio.den != alt->sample_aspect_ratio.den) {
+    if (base->w != alt->w || base->h != alt->h) {
         av_log(ctx, AV_LOG_ERROR, "First input link %s parameters "
-               "(size %dx%d, SAR %d:%d) do not match the corresponding "
-               "second input link %s parameters (%dx%d, SAR %d:%d)\n",
+               "(size %dx%d) do not match the corresponding "
+               "second input link %s parameters (size %dx%d)\n",
                ctx->input_pads[0].name, base->w, base->h,
-               base->sample_aspect_ratio.num,
-               base->sample_aspect_ratio.den,
                ctx->input_pads[1].name,
-               alt->w, alt->h,
-               alt->sample_aspect_ratio.num,
-               alt->sample_aspect_ratio.den);
+               alt->w, alt->h);
         return AVERROR(EINVAL);
     }
 
@@ -357,6 +348,8 @@
     av_freep(&s->xy);
 }
 
+FRAMESYNC_DEFINE_CLASS(hysteresis, HysteresisContext, fs);
+
 static const AVFilterPad hysteresis_inputs[] = {
     {
         .name         = "base",
@@ -382,6 +375,7 @@
 AVFilter ff_vf_hysteresis = {
     .name          = "hysteresis",
     .description   = NULL_IF_CONFIG_SMALL("Grow first stream into second stream by connecting components."),
+    .preinit       = hysteresis_framesync_preinit,
     .priv_size     = sizeof(HysteresisContext),
     .uninit        = uninit,
     .query_formats = query_formats,

diff --git a/libavfilter/vf_idet.c b/libavfilter/vf_idet.c
index 14f031a..02ae2ed 100644
--- a/libavfilter/vf_idet.c
+++ b/libavfilter/vf_idet.c

@@ -392,6 +392,8 @@
         AV_PIX_FMT_YUV422P16,
         AV_PIX_FMT_YUV444P16,
         AV_PIX_FMT_YUVA420P,
+        AV_PIX_FMT_YUVA422P,
+        AV_PIX_FMT_YUVA444P,
         AV_PIX_FMT_NONE
     };
     AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);

diff --git a/libavfilter/vf_lensfun.c b/libavfilter/vf_lensfun.c
new file mode 100644
index 0000000..fd14210
--- /dev/null
+++ b/libavfilter/vf_lensfun.c

@@ -0,0 +1,548 @@
+/*
+ * Copyright (C) 2007 by Andrew Zabolotny (author of lensfun, from which this filter derives from)
+ * Copyright (C) 2018 Stephen Seo
+ *
+ * This file is part of FFmpeg.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/**
+ * @file
+ * Lensfun filter, applies lens correction with parameters from the lensfun database
+ *
+ * @see https://lensfun.sourceforge.net/
+ */
+
+#include <float.h>
+#include <math.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libswscale/swscale.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+#include <lensfun.h>
+
+#define LANCZOS_RESOLUTION 256
+
+enum Mode {
+    VIGNETTING = 0x1,
+    GEOMETRY_DISTORTION = 0x2,
+    SUBPIXEL_DISTORTION = 0x4
+};
+
+enum InterpolationType {
+    NEAREST,
+    LINEAR,
+    LANCZOS
+};
+
+typedef struct VignettingThreadData {
+    int width, height;
+    uint8_t *data_in;
+    int linesize_in;
+    int pixel_composition;
+    lfModifier *modifier;
+} VignettingThreadData;
+
+typedef struct DistortionCorrectionThreadData {
+    int width, height;
+    const float *distortion_coords;
+    const uint8_t *data_in;
+    uint8_t *data_out;
+    int linesize_in, linesize_out;
+    const float *interpolation;
+    int mode;
+    int interpolation_type;
+} DistortionCorrectionThreadData;
+
+typedef struct LensfunContext {
+    const AVClass *class;
+    const char *make, *model, *lens_model;
+    int mode;
+    float focal_length;
+    float aperture;
+    float focus_distance;
+    int target_geometry;
+    int reverse;
+    int interpolation_type;
+
+    float *distortion_coords;
+    float *interpolation;
+
+    lfLens *lens;
+    lfCamera *camera;
+    lfModifier *modifier;
+} LensfunContext;
+
+#define OFFSET(x) offsetof(LensfunContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+static const AVOption lensfun_options[] = {
+    { "make", "set camera maker", OFFSET(make), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    { "model", "set camera model", OFFSET(model), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    { "lens_model", "set lens model", OFFSET(lens_model), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    { "mode", "set mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=GEOMETRY_DISTORTION}, 0, VIGNETTING | GEOMETRY_DISTORTION | SUBPIXEL_DISTORTION, FLAGS, "mode" },
+        { "vignetting", "fix lens vignetting", 0, AV_OPT_TYPE_CONST, {.i64=VIGNETTING}, 0, 0, FLAGS, "mode" },
+        { "geometry", "correct geometry distortion", 0, AV_OPT_TYPE_CONST, {.i64=GEOMETRY_DISTORTION}, 0, 0, FLAGS, "mode" },
+        { "subpixel", "fix chromatic aberrations", 0, AV_OPT_TYPE_CONST, {.i64=SUBPIXEL_DISTORTION}, 0, 0, FLAGS, "mode" },
+        { "vig_geo", "fix lens vignetting and correct geometry distortion", 0, AV_OPT_TYPE_CONST, {.i64=VIGNETTING | GEOMETRY_DISTORTION}, 0, 0, FLAGS, "mode" },
+        { "vig_subpixel", "fix lens vignetting and chromatic aberrations", 0, AV_OPT_TYPE_CONST, {.i64=VIGNETTING | SUBPIXEL_DISTORTION}, 0, 0, FLAGS, "mode" },
+        { "distortion", "correct geometry distortion and chromatic aberrations", 0, AV_OPT_TYPE_CONST, {.i64=GEOMETRY_DISTORTION | SUBPIXEL_DISTORTION}, 0, 0, FLAGS, "mode" },
+        { "all", NULL, 0, AV_OPT_TYPE_CONST, {.i64=VIGNETTING | GEOMETRY_DISTORTION | SUBPIXEL_DISTORTION}, 0, 0, FLAGS, "mode" },
+    { "focal_length", "focal length of video (zoom; constant for the duration of the use of this filter)", OFFSET(focal_length), AV_OPT_TYPE_FLOAT, {.dbl=18}, 0.0, DBL_MAX, FLAGS },
+    { "aperture", "aperture (constant for the duration of the use of this filter)", OFFSET(aperture), AV_OPT_TYPE_FLOAT, {.dbl=3.5}, 0.0, DBL_MAX, FLAGS },
+    { "focus_distance", "focus distance (constant for the duration of the use of this filter)", OFFSET(focus_distance), AV_OPT_TYPE_FLOAT, {.dbl=1000.0f}, 0.0, DBL_MAX, FLAGS },
+    { "target_geometry", "target geometry of the lens correction (only when geometry correction is enabled)", OFFSET(target_geometry), AV_OPT_TYPE_INT, {.i64=LF_RECTILINEAR}, 0, INT_MAX, FLAGS, "lens_geometry" },
+        { "rectilinear", "rectilinear lens (default)", 0, AV_OPT_TYPE_CONST, {.i64=LF_RECTILINEAR}, 0, 0, FLAGS, "lens_geometry" },
+        { "fisheye", "fisheye lens", 0, AV_OPT_TYPE_CONST, {.i64=LF_FISHEYE}, 0, 0, FLAGS, "lens_geometry" },
+        { "panoramic", "panoramic (cylindrical)", 0, AV_OPT_TYPE_CONST, {.i64=LF_PANORAMIC}, 0, 0, FLAGS, "lens_geometry" },
+        { "equirectangular", "equirectangular", 0, AV_OPT_TYPE_CONST, {.i64=LF_EQUIRECTANGULAR}, 0, 0, FLAGS, "lens_geometry" },
+        { "fisheye_orthographic", "orthographic fisheye", 0, AV_OPT_TYPE_CONST, {.i64=LF_FISHEYE_ORTHOGRAPHIC}, 0, 0, FLAGS, "lens_geometry" },
+        { "fisheye_stereographic", "stereographic fisheye", 0, AV_OPT_TYPE_CONST, {.i64=LF_FISHEYE_STEREOGRAPHIC}, 0, 0, FLAGS, "lens_geometry" },
+        { "fisheye_equisolid", "equisolid fisheye", 0, AV_OPT_TYPE_CONST, {.i64=LF_FISHEYE_EQUISOLID}, 0, 0, FLAGS, "lens_geometry" },
+        { "fisheye_thoby", "fisheye as measured by thoby", 0, AV_OPT_TYPE_CONST, {.i64=LF_FISHEYE_THOBY}, 0, 0, FLAGS, "lens_geometry" },
+    { "reverse", "Does reverse correction (regular image to lens distorted)", OFFSET(reverse), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
+    { "interpolation", "Type of interpolation", OFFSET(interpolation_type), AV_OPT_TYPE_INT, {.i64=LINEAR}, 0, LANCZOS, FLAGS, "interpolation" },
+        { "nearest", NULL, 0, AV_OPT_TYPE_CONST, {.i64=NEAREST}, 0, 0, FLAGS, "interpolation" },
+        { "linear", NULL, 0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, FLAGS, "interpolation" },
+        { "lanczos", NULL, 0, AV_OPT_TYPE_CONST, {.i64=LANCZOS}, 0, 0, FLAGS, "interpolation" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(lensfun);
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    LensfunContext *lensfun = ctx->priv;
+    lfDatabase *db;
+    const lfCamera **cameras;
+    const lfLens **lenses;
+
+    if (!lensfun->make) {
+        av_log(ctx, AV_LOG_FATAL, "Option \"make\" not specified\n");
+        return AVERROR(EINVAL);
+    } else if (!lensfun->model) {
+        av_log(ctx, AV_LOG_FATAL, "Option \"model\" not specified\n");
+        return AVERROR(EINVAL);
+    } else if (!lensfun->lens_model) {
+        av_log(ctx, AV_LOG_FATAL, "Option \"lens_model\" not specified\n");
+        return AVERROR(EINVAL);
+    }
+
+    lensfun->lens = lf_lens_new();
+    lensfun->camera = lf_camera_new();
+
+    db = lf_db_new();
+    if (lf_db_load(db) != LF_NO_ERROR) {
+        lf_db_destroy(db);
+        av_log(ctx, AV_LOG_FATAL, "Failed to load lensfun database\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    cameras = lf_db_find_cameras(db, lensfun->make, lensfun->model);
+    if (cameras && *cameras) {
+        lf_camera_copy(lensfun->camera, *cameras);
+        av_log(ctx, AV_LOG_INFO, "Using camera %s\n", lensfun->camera->Model);
+    } else {
+        lf_free(cameras);
+        lf_db_destroy(db);
+        av_log(ctx, AV_LOG_FATAL, "Failed to find camera in lensfun database\n");
+        return AVERROR_INVALIDDATA;
+    }
+    lf_free(cameras);
+
+    lenses = lf_db_find_lenses_hd(db, lensfun->camera, NULL, lensfun->lens_model, 0);
+    if (lenses && *lenses) {
+        lf_lens_copy(lensfun->lens, *lenses);
+        av_log(ctx, AV_LOG_INFO, "Using lens %s\n", lensfun->lens->Model);
+    } else {
+        lf_free(lenses);
+        lf_db_destroy(db);
+        av_log(ctx, AV_LOG_FATAL, "Failed to find lens in lensfun database\n");
+        return AVERROR_INVALIDDATA;
+    }
+    lf_free(lenses);
+
+    lf_db_destroy(db);
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    // Some of the functions provided by lensfun require pixels in RGB format
+    static const enum AVPixelFormat fmts[] = {AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE};
+    AVFilterFormats *fmts_list = ff_make_format_list(fmts);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static float lanczos_kernel(float x)
+{
+    if (x == 0.0f) {
+        return 1.0f;
+    } else if (x > -2.0f && x < 2.0f) {
+        return (2.0f * sin(M_PI * x) * sin(M_PI / 2.0f * x)) / (M_PI * M_PI * x * x);
+    } else {
+        return 0.0f;
+    }
+}
+
+static int config_props(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    LensfunContext *lensfun = ctx->priv;
+    int index;
+    float a;
+    int lensfun_mode = 0;
+
+    if (!lensfun->modifier) {
+        if (lensfun->camera && lensfun->lens) {
+            lensfun->modifier = lf_modifier_new(lensfun->lens,
+                                                lensfun->camera->CropFactor,
+                                                inlink->w,
+                                                inlink->h);
+            if (lensfun->mode & VIGNETTING)
+                lensfun_mode |= LF_MODIFY_VIGNETTING;
+            if (lensfun->mode & GEOMETRY_DISTORTION)
+                lensfun_mode |= LF_MODIFY_DISTORTION | LF_MODIFY_GEOMETRY | LF_MODIFY_SCALE;
+            if (lensfun->mode & SUBPIXEL_DISTORTION)
+                lensfun_mode |= LF_MODIFY_TCA;
+            lf_modifier_initialize(lensfun->modifier,
+                                   lensfun->lens,
+                                   LF_PF_U8,
+                                   lensfun->focal_length,
+                                   lensfun->aperture,
+                                   lensfun->focus_distance,
+                                   0.0,
+                                   lensfun->target_geometry,
+                                   lensfun_mode,
+                                   lensfun->reverse);
+        } else {
+            // lensfun->camera and lensfun->lens should have been initialized
+            return AVERROR_BUG;
+        }
+    }
+
+    if (!lensfun->distortion_coords) {
+        if (lensfun->mode & SUBPIXEL_DISTORTION) {
+            lensfun->distortion_coords = av_malloc(sizeof(float) * inlink->w * inlink->h * 2 * 3);
+            if (!lensfun->distortion_coords)
+                return AVERROR(ENOMEM);
+            if (lensfun->mode & GEOMETRY_DISTORTION) {
+                // apply both geometry and subpixel distortion
+                lf_modifier_apply_subpixel_geometry_distortion(lensfun->modifier,
+                                                               0, 0,
+                                                               inlink->w, inlink->h,
+                                                               lensfun->distortion_coords);
+            } else {
+                // apply only subpixel distortion
+                lf_modifier_apply_subpixel_distortion(lensfun->modifier,
+                                                      0, 0,
+                                                      inlink->w, inlink->h,
+                                                      lensfun->distortion_coords);
+            }
+        } else if (lensfun->mode & GEOMETRY_DISTORTION) {
+            lensfun->distortion_coords = av_malloc(sizeof(float) * inlink->w * inlink->h * 2);
+            if (!lensfun->distortion_coords)
+                return AVERROR(ENOMEM);
+            // apply only geometry distortion
+            lf_modifier_apply_geometry_distortion(lensfun->modifier,
+                                                  0, 0,
+                                                  inlink->w, inlink->h,
+                                                  lensfun->distortion_coords);
+        }
+    }
+
+    if (!lensfun->interpolation)
+        if (lensfun->interpolation_type == LANCZOS) {
+            lensfun->interpolation = av_malloc(sizeof(float) * 4 * LANCZOS_RESOLUTION);
+            if (!lensfun->interpolation)
+                return AVERROR(ENOMEM);
+            for (index = 0; index < 4 * LANCZOS_RESOLUTION; ++index) {
+                if (index == 0) {
+                    lensfun->interpolation[index] = 1.0f;
+                } else {
+                    a = sqrtf((float)index / LANCZOS_RESOLUTION);
+                    lensfun->interpolation[index] = lanczos_kernel(a);
+                }
+            }
+        }
+
+    return 0;
+}
+
+static int vignetting_filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    const VignettingThreadData *thread_data = arg;
+    const int slice_start = thread_data->height *  jobnr      / nb_jobs;
+    const int slice_end   = thread_data->height * (jobnr + 1) / nb_jobs;
+
+    lf_modifier_apply_color_modification(thread_data->modifier,
+                                         thread_data->data_in + slice_start * thread_data->linesize_in,
+                                         0,
+                                         slice_start,
+                                         thread_data->width,
+                                         slice_end - slice_start,
+                                         thread_data->pixel_composition,
+                                         thread_data->linesize_in);
+
+    return 0;
+}
+
+static float square(float x)
+{
+    return x * x;
+}
+
+static int distortion_correction_filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    const DistortionCorrectionThreadData *thread_data = arg;
+    const int slice_start = thread_data->height *  jobnr      / nb_jobs;
+    const int slice_end   = thread_data->height * (jobnr + 1) / nb_jobs;
+
+    int x, y, i, j, rgb_index;
+    float interpolated, new_x, new_y, d, norm;
+    int new_x_int, new_y_int;
+    for (y = slice_start; y < slice_end; ++y)
+        for (x = 0; x < thread_data->width; ++x)
+            for (rgb_index = 0; rgb_index < 3; ++rgb_index) {
+                if (thread_data->mode & SUBPIXEL_DISTORTION) {
+                    // subpixel (and possibly geometry) distortion correction was applied, correct distortion
+                    switch(thread_data->interpolation_type) {
+                    case NEAREST:
+                        new_x_int = thread_data->distortion_coords[x * 2 * 3 + y * thread_data->width * 2 * 3 + rgb_index * 2]     + 0.5f;
+                        new_y_int = thread_data->distortion_coords[x * 2 * 3 + y * thread_data->width * 2 * 3 + rgb_index * 2 + 1] + 0.5f;
+                        if (new_x_int < 0 || new_x_int >= thread_data->width || new_y_int < 0 || new_y_int >= thread_data->height) {
+                            thread_data->data_out[x * 3 + rgb_index + y * thread_data->linesize_out] = 0;
+                        } else {
+                            thread_data->data_out[x * 3 + rgb_index + y * thread_data->linesize_out] = thread_data->data_in[new_x_int * 3 + rgb_index + new_y_int * thread_data->linesize_in];
+                        }
+                        break;
+                    case LINEAR:
+                        interpolated = 0.0f;
+                        new_x = thread_data->distortion_coords[x * 2 * 3 + y * thread_data->width * 2 * 3 + rgb_index * 2];
+                        new_x_int = new_x;
+                        new_y = thread_data->distortion_coords[x * 2 * 3 + y * thread_data->width * 2 * 3 + rgb_index * 2 + 1];
+                        new_y_int = new_y;
+                        if (new_x_int < 0 || new_x_int + 1 >= thread_data->width || new_y_int < 0 || new_y_int + 1 >= thread_data->height) {
+                            thread_data->data_out[x * 3 + rgb_index + y * thread_data->linesize_out] = 0;
+                        } else {
+                            thread_data->data_out[x * 3 + rgb_index + y * thread_data->linesize_out] =
+                                  thread_data->data_in[ new_x_int      * 3 + rgb_index +  new_y_int      * thread_data->linesize_in] * (new_x_int + 1 - new_x) * (new_y_int + 1 - new_y)
+                                + thread_data->data_in[(new_x_int + 1) * 3 + rgb_index +  new_y_int      * thread_data->linesize_in] * (new_x - new_x_int) * (new_y_int + 1 - new_y)
+                                + thread_data->data_in[ new_x_int      * 3 + rgb_index + (new_y_int + 1) * thread_data->linesize_in] * (new_x_int + 1 - new_x) * (new_y - new_y_int)
+                                + thread_data->data_in[(new_x_int + 1) * 3 + rgb_index + (new_y_int + 1) * thread_data->linesize_in] * (new_x - new_x_int) * (new_y - new_y_int);
+                        }
+                        break;
+                    case LANCZOS:
+                        interpolated = 0.0f;
+                        norm = 0.0f;
+                        new_x = thread_data->distortion_coords[x * 2 * 3 + y * thread_data->width * 2 * 3 + rgb_index * 2];
+                        new_x_int = new_x;
+                        new_y = thread_data->distortion_coords[x * 2 * 3 + y * thread_data->width * 2 * 3 + rgb_index * 2 + 1];
+                        new_y_int = new_y;
+                        for (j = 0; j < 4; ++j)
+                            for (i = 0; i < 4; ++i) {
+                                if (new_x_int + i - 2 < 0 || new_x_int + i - 2 >= thread_data->width || new_y_int + j - 2 < 0 || new_y_int + j - 2 >= thread_data->height)
+                                    continue;
+                                d = square(new_x - (new_x_int + i - 2)) * square(new_y - (new_y_int + j - 2));
+                                if (d >= 4.0f)
+                                    continue;
+                                d = thread_data->interpolation[(int)(d * LANCZOS_RESOLUTION)];
+                                norm += d;
+                                interpolated += thread_data->data_in[(new_x_int + i - 2) * 3 + rgb_index + (new_y_int + j - 2) * thread_data->linesize_in] * d;
+                            }
+                        if (norm == 0.0f) {
+                            thread_data->data_out[x * 3 + rgb_index + y * thread_data->linesize_out] = 0;
+                        } else {
+                            interpolated /= norm;
+                            thread_data->data_out[x * 3 + rgb_index + y * thread_data->linesize_out] = interpolated < 0.0f ? 0.0f : interpolated > 255.0f ? 255.0f : interpolated;
+                        }
+                        break;
+                    }
+                } else if (thread_data->mode & GEOMETRY_DISTORTION) {
+                    // geometry distortion correction was applied, correct distortion
+                    switch(thread_data->interpolation_type) {
+                    case NEAREST:
+                        new_x_int = thread_data->distortion_coords[x * 2 + y * thread_data->width * 2]     + 0.5f;
+                        new_y_int = thread_data->distortion_coords[x * 2 + y * thread_data->width * 2 + 1] + 0.5f;
+                        if (new_x_int < 0 || new_x_int >= thread_data->width || new_y_int < 0 || new_y_int >= thread_data->height) {
+                            thread_data->data_out[x * 3 + rgb_index + y * thread_data->linesize_out] = 0;
+                        } else {
+                            thread_data->data_out[x * 3 + rgb_index + y * thread_data->linesize_out] = thread_data->data_in[new_x_int * 3 + rgb_index + new_y_int * thread_data->linesize_in];
+                        }
+                        break;
+                    case LINEAR:
+                        interpolated = 0.0f;
+                        new_x = thread_data->distortion_coords[x * 2 + y * thread_data->width * 2];
+                        new_x_int = new_x;
+                        new_y = thread_data->distortion_coords[x * 2 + y * thread_data->width * 2 + 1];
+                        new_y_int = new_y;
+                        if (new_x_int < 0 || new_x_int + 1 >= thread_data->width || new_y_int < 0 || new_y_int + 1 >= thread_data->height) {
+                            thread_data->data_out[x * 3 + rgb_index + y * thread_data->linesize_out] = 0;
+                        } else {
+                            thread_data->data_out[x * 3 + rgb_index + y * thread_data->linesize_out] =
+                                  thread_data->data_in[ new_x_int      * 3 + rgb_index +  new_y_int      * thread_data->linesize_in] * (new_x_int + 1 - new_x) * (new_y_int + 1 - new_y)
+                                + thread_data->data_in[(new_x_int + 1) * 3 + rgb_index +  new_y_int      * thread_data->linesize_in] * (new_x - new_x_int) * (new_y_int + 1 - new_y)
+                                + thread_data->data_in[ new_x_int      * 3 + rgb_index + (new_y_int + 1) * thread_data->linesize_in] * (new_x_int + 1 - new_x) * (new_y - new_y_int)
+                                + thread_data->data_in[(new_x_int + 1) * 3 + rgb_index + (new_y_int + 1) * thread_data->linesize_in] * (new_x - new_x_int) * (new_y - new_y_int);
+                        }
+                        break;
+                    case LANCZOS:
+                        interpolated = 0.0f;
+                        norm = 0.0f;
+                        new_x = thread_data->distortion_coords[x * 2     + y * thread_data->width * 2];
+                        new_x_int = new_x;
+                        new_y = thread_data->distortion_coords[x * 2 + 1 + y * thread_data->width * 2];
+                        new_y_int = new_y;
+                        for (j = 0; j < 4; ++j)
+                            for (i = 0; i < 4; ++i) {
+                                if (new_x_int + i - 2 < 0 || new_x_int + i - 2 >= thread_data->width || new_y_int + j - 2 < 0 || new_y_int + j - 2 >= thread_data->height)
+                                    continue;
+                                d = square(new_x - (new_x_int + i - 2)) * square(new_y - (new_y_int + j - 2));
+                                if (d >= 4.0f)
+                                    continue;
+                                d = thread_data->interpolation[(int)(d * LANCZOS_RESOLUTION)];
+                                norm += d;
+                                interpolated += thread_data->data_in[(new_x_int + i - 2) * 3 + rgb_index + (new_y_int + j - 2) * thread_data->linesize_in] * d;
+                            }
+                        if (norm == 0.0f) {
+                            thread_data->data_out[x * 3 + rgb_index + y * thread_data->linesize_out] = 0;
+                        } else {
+                            interpolated /= norm;
+                            thread_data->data_out[x * 3 + rgb_index + y * thread_data->linesize_out] = interpolated < 0.0f ? 0.0f : interpolated > 255.0f ? 255.0f : interpolated;
+                        }
+                        break;
+                    }
+                } else {
+                    // no distortion correction was applied
+                    thread_data->data_out[x * 3 + rgb_index + y * thread_data->linesize_out] = thread_data->data_in[x * 3 + rgb_index + y * thread_data->linesize_in];
+                }
+            }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    LensfunContext *lensfun = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out;
+    VignettingThreadData vignetting_thread_data;
+    DistortionCorrectionThreadData distortion_correction_thread_data;
+
+    if (lensfun->mode & VIGNETTING) {
+        av_frame_make_writable(in);
+
+        vignetting_thread_data = (VignettingThreadData) {
+            .width = inlink->w,
+            .height = inlink->h,
+            .data_in = in->data[0],
+            .linesize_in = in->linesize[0],
+            .pixel_composition = LF_CR_3(RED, GREEN, BLUE),
+            .modifier = lensfun->modifier
+        };
+
+        ctx->internal->execute(ctx,
+                               vignetting_filter_slice,
+                               &vignetting_thread_data,
+                               NULL,
+                               FFMIN(outlink->h, ctx->graph->nb_threads));
+    }
+
+    if (lensfun->mode & (GEOMETRY_DISTORTION | SUBPIXEL_DISTORTION)) {
+        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+
+        distortion_correction_thread_data = (DistortionCorrectionThreadData) {
+            .width = inlink->w,
+            .height = inlink->h,
+            .distortion_coords = lensfun->distortion_coords,
+            .data_in = in->data[0],
+            .data_out = out->data[0],
+            .linesize_in = in->linesize[0],
+            .linesize_out = out->linesize[0],
+            .interpolation = lensfun->interpolation,
+            .mode = lensfun->mode,
+            .interpolation_type = lensfun->interpolation_type
+        };
+
+        ctx->internal->execute(ctx,
+                               distortion_correction_filter_slice,
+                               &distortion_correction_thread_data,
+                               NULL,
+                               FFMIN(outlink->h, ctx->graph->nb_threads));
+
+        av_frame_free(&in);
+        return ff_filter_frame(outlink, out);
+    } else {
+        return ff_filter_frame(outlink, in);
+    }
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    LensfunContext *lensfun = ctx->priv;
+
+    if (lensfun->camera)
+        lf_camera_destroy(lensfun->camera);
+    if (lensfun->lens)
+        lf_lens_destroy(lensfun->lens);
+    if (lensfun->modifier)
+        lf_modifier_destroy(lensfun->modifier);
+    if (lensfun->distortion_coords)
+        av_free(lensfun->distortion_coords);
+    if (lensfun->interpolation)
+        av_free(lensfun->interpolation);
+}
+
+static const AVFilterPad lensfun_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_props,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad lensfun_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_lensfun = {
+    .name          = "lensfun",
+    .description   = NULL_IF_CONFIG_SMALL("Apply correction to an image based on info derived from the lensfun database."),
+    .priv_size     = sizeof(LensfunContext),
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = lensfun_inputs,
+    .outputs       = lensfun_outputs,
+    .priv_class    = &lensfun_class,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};

diff --git a/libavfilter/vf_libvmaf.c b/libavfilter/vf_libvmaf.c
index 2c3a9f3..249e50c 100644
--- a/libavfilter/vf_libvmaf.c
+++ b/libavfilter/vf_libvmaf.c

@@ -40,10 +40,10 @@
     const AVClass *class;
     FFFrameSync fs;
     const AVPixFmtDescriptor *desc;
-    char *format;
     int width;
     int height;
     double vmaf_score;
+    int vmaf_thread_created;
     pthread_t vmaf_thread;
     pthread_mutex_t lock;
     pthread_cond_t cond;
@@ -62,6 +62,10 @@
     int ssim;
     int ms_ssim;
     char *pool;
+    int n_threads;
+    int n_subsample;
+    int enable_conf_interval;
+    int error;
 } LIBVMAFContext;
 
 #define OFFSET(x) offsetof(LIBVMAFContext, x)
@@ -77,6 +81,9 @@
     {"ssim",  "Enables computing ssim along with vmaf.",                                OFFSET(ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
     {"ms_ssim",  "Enables computing ms-ssim along with vmaf.",                          OFFSET(ms_ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
     {"pool",  "Set the pool method to be used for computing vmaf.",                     OFFSET(pool), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
+    {"n_threads", "Set number of threads to be used when computing vmaf.",              OFFSET(n_threads), AV_OPT_TYPE_INT, {.i64=0}, 0, UINT_MAX, FLAGS},
+    {"n_subsample", "Set interval for frame subsampling used when computing vmaf.",     OFFSET(n_subsample), AV_OPT_TYPE_INT, {.i64=1}, 1, UINT_MAX, FLAGS},
+    {"enable_conf_interval",  "Enables confidence interval.",                           OFFSET(enable_conf_interval), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
     { NULL }
 };
 
@@ -130,6 +137,8 @@
     \
     ret = !s->frame_set;                                                        \
     \
+    av_frame_unref(s->gref);                                                    \
+    av_frame_unref(s->gmain);                                                   \
     s->frame_set = 0;                                                           \
     \
     pthread_cond_signal(&s->cond);                                              \
@@ -149,6 +158,7 @@
 {
     int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
                       int stride, void *ctx);
+    char *format;
 
     if (s->desc->comp[0].depth <= 8) {
         read_frame = read_frame_8bit;
@@ -156,48 +166,66 @@
         read_frame = read_frame_10bit;
     }
 
-    s->vmaf_score = compute_vmaf(s->format, s->width, s->height, read_frame, s,
-                                 s->model_path, s->log_path, s->log_fmt, 0, 0,
-                                 s->enable_transform, s->phone_model, s->psnr,
-                                 s->ssim, s->ms_ssim, s->pool);
+    format = (char *) s->desc->name;
+
+    s->error = compute_vmaf(&s->vmaf_score, format, s->width, s->height,
+                            read_frame, s, s->model_path, s->log_path,
+                            s->log_fmt, 0, 0, s->enable_transform,
+                            s->phone_model, s->psnr, s->ssim,
+                            s->ms_ssim, s->pool,
+                            s->n_threads, s->n_subsample, s->enable_conf_interval);
 }
 
 static void *call_vmaf(void *ctx)
 {
     LIBVMAFContext *s = (LIBVMAFContext *) ctx;
     compute_vmaf_score(s);
-    av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n",s->vmaf_score);
+    if (!s->error) {
+        av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n",s->vmaf_score);
+    } else {
+        pthread_mutex_lock(&s->lock);
+        pthread_cond_signal(&s->cond);
+        pthread_mutex_unlock(&s->lock);
+    }
     pthread_exit(NULL);
+    return NULL;
 }
 
 static int do_vmaf(FFFrameSync *fs)
 {
     AVFilterContext *ctx = fs->parent;
     LIBVMAFContext *s = ctx->priv;
-    AVFrame *main, *ref;
+    AVFrame *master, *ref;
     int ret;
 
-    ret = ff_framesync_dualinput_get(fs, &main, &ref);
+    ret = ff_framesync_dualinput_get(fs, &master, &ref);
     if (ret < 0)
         return ret;
     if (!ref)
-        return ff_filter_frame(ctx->outputs[0], main);
+        return ff_filter_frame(ctx->outputs[0], master);
 
     pthread_mutex_lock(&s->lock);
 
-    while (s->frame_set != 0) {
+    while (s->frame_set && !s->error) {
         pthread_cond_wait(&s->cond, &s->lock);
     }
 
+    if (s->error) {
+        av_log(ctx, AV_LOG_ERROR,
+               "libvmaf encountered an error, check log for details\n");
+        pthread_mutex_unlock(&s->lock);
+        return AVERROR(EINVAL);
+    }
+
     av_frame_ref(s->gref, ref);
-    av_frame_ref(s->gmain, main);
+    av_frame_ref(s->gmain, master);
 
     s->frame_set = 1;
 
     pthread_cond_signal(&s->cond);
     pthread_mutex_unlock(&s->lock);
 
-    return ff_filter_frame(ctx->outputs[0], main);
+    return ff_filter_frame(ctx->outputs[0], master);
 }
 
 static av_cold int init(AVFilterContext *ctx)
@@ -206,7 +234,9 @@
 
     s->gref = av_frame_alloc();
     s->gmain = av_frame_alloc();
+    s->error = 0;
 
+    s->vmaf_thread_created = 0;
     pthread_mutex_init(&s->lock, NULL);
     pthread_cond_init (&s->cond, NULL);
 
@@ -254,11 +284,11 @@
         av_log(ctx, AV_LOG_ERROR, "Thread creation failed.\n");
         return AVERROR(EINVAL);
     }
+    s->vmaf_thread_created = 1;
 
     return 0;
 }
 
-
 static int config_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
@@ -297,7 +327,11 @@
     pthread_cond_signal(&s->cond);
     pthread_mutex_unlock(&s->lock);
 
-    pthread_join(s->vmaf_thread, NULL);
+    if (s->vmaf_thread_created)
+    {
+        pthread_join(s->vmaf_thread, NULL);
+        s->vmaf_thread_created = 0;
+    }
 
     av_frame_free(&s->gref);
     av_frame_free(&s->gmain);

diff --git a/libavfilter/vf_limiter.c b/libavfilter/vf_limiter.c
index 9c62b11..bb7f1d3 100644
--- a/libavfilter/vf_limiter.c
+++ b/libavfilter/vf_limiter.c

@@ -28,6 +28,11 @@
 #include "limiter.h"
 #include "video.h"
 
+typedef struct ThreadData {
+    AVFrame *in;
+    AVFrame *out;
+} ThreadData;
+
 typedef struct LimiterContext {
     const AVClass *class;
     int min;
@@ -81,7 +86,7 @@
         AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
         AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
         AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_NONE
     };
 
@@ -161,13 +166,46 @@
     return 0;
 }
 
+static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    LimiterContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    int p;
+
+    for (p = 0; p < s->nb_planes; p++) {
+        const int h = s->height[p];
+        const int slice_start = (h * jobnr) / nb_jobs;
+        const int slice_end = (h * (jobnr+1)) / nb_jobs;
+
+        if (!((1 << p) & s->planes)) {
+            if (out != in)
+                av_image_copy_plane(out->data[p] + slice_start * out->linesize[p],
+                                    out->linesize[p],
+                                    in->data[p] + slice_start * in->linesize[p],
+                                    in->linesize[p],
+                                    s->linesize[p], slice_end - slice_start);
+            continue;
+        }
+
+        s->dsp.limiter(in->data[p] + slice_start * in->linesize[p],
+                       out->data[p] + slice_start * out->linesize[p],
+                       in->linesize[p], out->linesize[p],
+                       s->width[p], slice_end - slice_start,
+                       s->min, s->max);
+    }
+
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVFilterContext *ctx = inlink->dst;
     LimiterContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
+    ThreadData td;
     AVFrame *out;
-    int p;
 
     if (av_frame_is_writable(in)) {
         out = in;
@@ -180,20 +218,10 @@
         av_frame_copy_props(out, in);
     }
 
-    for (p = 0; p < s->nb_planes; p++) {
-        if (!((1 << p) & s->planes)) {
-            if (out != in)
-                av_image_copy_plane(out->data[p], out->linesize[p], in->data[p], in->linesize[p],
-                                    s->linesize[p], s->height[p]);
-            continue;
-        }
-
-        s->dsp.limiter(in->data[p], out->data[p],
-                       in->linesize[p], out->linesize[p],
-                       s->width[p], s->height[p],
-                       s->min, s->max);
-    }
-
+    td.out = out;
+    td.in = in;
+    ctx->internal->execute(ctx, filter_slice, &td, NULL,
+                           FFMIN(s->height[2], ff_filter_get_nb_threads(ctx)));
     if (out != in)
         av_frame_free(&in);
 
@@ -227,5 +255,5 @@
     .query_formats = query_formats,
     .inputs        = inputs,
     .outputs       = outputs,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
 };

diff --git a/libavfilter/vf_lut.c b/libavfilter/vf_lut.c
index 11c039e..c815ddc 100644
--- a/libavfilter/vf_lut.c
+++ b/libavfilter/vf_lut.c

@@ -135,9 +135,13 @@
     AV_PIX_FMT_GBRP16LE,     AV_PIX_FMT_GBRAP12LE,    \
     AV_PIX_FMT_GBRAP16LE
 
+#define GRAY_FORMATS                            \
+    AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9LE, AV_PIX_FMT_GRAY10LE, \
+    AV_PIX_FMT_GRAY12LE, AV_PIX_FMT_GRAY14LE, AV_PIX_FMT_GRAY16LE
+
 static const enum AVPixelFormat yuv_pix_fmts[] = { YUV_FORMATS, AV_PIX_FMT_NONE };
 static const enum AVPixelFormat rgb_pix_fmts[] = { RGB_FORMATS, AV_PIX_FMT_NONE };
-static const enum AVPixelFormat all_pix_fmts[] = { RGB_FORMATS, YUV_FORMATS, AV_PIX_FMT_NONE };
+static const enum AVPixelFormat all_pix_fmts[] = { RGB_FORMATS, YUV_FORMATS, GRAY_FORMATS, AV_PIX_FMT_NONE };
 
 static int query_formats(AVFilterContext *ctx)
 {

diff --git a/libavfilter/vf_lut2.c b/libavfilter/vf_lut2.c
index 585d121..66c481e 100644
--- a/libavfilter/vf_lut2.c
+++ b/libavfilter/vf_lut2.c

@@ -304,20 +304,13 @@
         av_log(ctx, AV_LOG_ERROR, "inputs must be of same pixel format\n");
         return AVERROR(EINVAL);
     }
-    if (srcx->w                       != srcy->w ||
-        srcx->h                       != srcy->h ||
-        srcx->sample_aspect_ratio.num != srcy->sample_aspect_ratio.num ||
-        srcx->sample_aspect_ratio.den != srcy->sample_aspect_ratio.den) {
+    if (srcx->w != srcy->w || srcx->h != srcy->h) {
         av_log(ctx, AV_LOG_ERROR, "First input link %s parameters "
-               "(size %dx%d, SAR %d:%d) do not match the corresponding "
-               "second input link %s parameters (%dx%d, SAR %d:%d)\n",
+               "(size %dx%d) do not match the corresponding "
+               "second input link %s parameters (size %dx%d)\n",
                ctx->input_pads[0].name, srcx->w, srcx->h,
-               srcx->sample_aspect_ratio.num,
-               srcx->sample_aspect_ratio.den,
                ctx->input_pads[1].name,
-               srcy->w, srcy->h,
-               srcy->sample_aspect_ratio.num,
-               srcy->sample_aspect_ratio.den);
+               srcy->w, srcy->h);
         return AVERROR(EINVAL);
     }
 
@@ -408,18 +401,26 @@
 
 static int tlut2_filter_frame(AVFilterLink *inlink, AVFrame *frame)
 {
-    LUT2Context *s = inlink->dst->priv;
-    AVFilterLink *outlink = inlink->dst->outputs[0];
+    AVFilterContext *ctx = inlink->dst;
+    LUT2Context *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
 
     if (s->prev_frame) {
-        AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
-        if (!out) {
-            av_frame_free(&s->prev_frame);
-            s->prev_frame = frame;
-            return AVERROR(ENOMEM);
+        AVFrame *out;
+
+        if (ctx->is_disabled) {
+            out = av_frame_clone(frame);
+        } else {
+            out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+            if (!out) {
+                av_frame_free(&s->prev_frame);
+                s->prev_frame = frame;
+                return AVERROR(ENOMEM);
+            }
+
+            av_frame_copy_props(out, frame);
+            s->lut2(s, out, frame, s->prev_frame);
         }
-        av_frame_copy_props(out, frame);
-        s->lut2(s, out, frame, s->prev_frame);
         av_frame_free(&s->prev_frame);
         s->prev_frame = frame;
         return ff_filter_frame(outlink, out);
@@ -461,6 +462,7 @@
     .uninit        = uninit,
     .inputs        = tlut2_inputs,
     .outputs       = tlut2_outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
 };
 
 #endif

diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c
index c9b7224..4d985c5 100644
--- a/libavfilter/vf_lut3d.c
+++ b/libavfilter/vf_lut3d.c

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2013 Clément Bœsch
+ * Copyright (c) 2018 Paul B Mahol
  *
  * This file is part of FFmpeg.
  *
@@ -68,7 +69,8 @@
 #if CONFIG_HALDCLUT_FILTER
     uint8_t clut_rgba_map[4];
     int clut_step;
-    int clut_is16bit;
+    int clut_bits;
+    int clut_planar;
     int clut_width;
     FFFrameSync fs;
 #endif
@@ -198,6 +200,83 @@
     return c;
 }
 
+#define DEFINE_INTERP_FUNC_PLANAR(name, nbits, depth)                                                  \
+static int interp_##nbits##_##name##_p##depth(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
+{                                                                                                      \
+    int x, y;                                                                                          \
+    const LUT3DContext *lut3d = ctx->priv;                                                             \
+    const ThreadData *td = arg;                                                                        \
+    const AVFrame *in  = td->in;                                                                       \
+    const AVFrame *out = td->out;                                                                      \
+    const int direct = out == in;                                                                      \
+    const int slice_start = (in->height *  jobnr   ) / nb_jobs;                                        \
+    const int slice_end   = (in->height * (jobnr+1)) / nb_jobs;                                        \
+    uint8_t *grow = out->data[0] + slice_start * out->linesize[0];                                     \
+    uint8_t *brow = out->data[1] + slice_start * out->linesize[1];                                     \
+    uint8_t *rrow = out->data[2] + slice_start * out->linesize[2];                                     \
+    uint8_t *arow = out->data[3] + slice_start * out->linesize[3];                                     \
+    const uint8_t *srcgrow = in->data[0] + slice_start * in->linesize[0];                              \
+    const uint8_t *srcbrow = in->data[1] + slice_start * in->linesize[1];                              \
+    const uint8_t *srcrrow = in->data[2] + slice_start * in->linesize[2];                              \
+    const uint8_t *srcarow = in->data[3] + slice_start * in->linesize[3];                              \
+    const float scale = (1. / ((1<<depth) - 1)) * (lut3d->lutsize - 1);                                \
+                                                                                                       \
+    for (y = slice_start; y < slice_end; y++) {                                                        \
+        uint##nbits##_t *dstg = (uint##nbits##_t *)grow;                                               \
+        uint##nbits##_t *dstb = (uint##nbits##_t *)brow;                                               \
+        uint##nbits##_t *dstr = (uint##nbits##_t *)rrow;                                               \
+        uint##nbits##_t *dsta = (uint##nbits##_t *)arow;                                               \
+        const uint##nbits##_t *srcg = (const uint##nbits##_t *)srcgrow;                                \
+        const uint##nbits##_t *srcb = (const uint##nbits##_t *)srcbrow;                                \
+        const uint##nbits##_t *srcr = (const uint##nbits##_t *)srcrrow;                                \
+        const uint##nbits##_t *srca = (const uint##nbits##_t *)srcarow;                                \
+        for (x = 0; x < in->width; x++) {                                                              \
+            const struct rgbvec scaled_rgb = {srcr[x] * scale,                                         \
+                                              srcg[x] * scale,                                         \
+                                              srcb[x] * scale};                                        \
+            struct rgbvec vec = interp_##name(lut3d, &scaled_rgb);                                     \
+            dstr[x] = av_clip_uintp2(vec.r * (float)((1<<depth) - 1), depth);                          \
+            dstg[x] = av_clip_uintp2(vec.g * (float)((1<<depth) - 1), depth);                          \
+            dstb[x] = av_clip_uintp2(vec.b * (float)((1<<depth) - 1), depth);                          \
+            if (!direct && in->linesize[3])                                                            \
+                dsta[x] = srca[x];                                                                     \
+        }                                                                                              \
+        grow += out->linesize[0];                                                                      \
+        brow += out->linesize[1];                                                                      \
+        rrow += out->linesize[2];                                                                      \
+        arow += out->linesize[3];                                                                      \
+        srcgrow += in->linesize[0];                                                                    \
+        srcbrow += in->linesize[1];                                                                    \
+        srcrrow += in->linesize[2];                                                                    \
+        srcarow += in->linesize[3];                                                                    \
+    }                                                                                                  \
+    return 0;                                                                                          \
+}
+
+DEFINE_INTERP_FUNC_PLANAR(nearest,     8, 8)
+DEFINE_INTERP_FUNC_PLANAR(trilinear,   8, 8)
+DEFINE_INTERP_FUNC_PLANAR(tetrahedral, 8, 8)
+
+DEFINE_INTERP_FUNC_PLANAR(nearest,     16, 9)
+DEFINE_INTERP_FUNC_PLANAR(trilinear,   16, 9)
+DEFINE_INTERP_FUNC_PLANAR(tetrahedral, 16, 9)
+
+DEFINE_INTERP_FUNC_PLANAR(nearest,     16, 10)
+DEFINE_INTERP_FUNC_PLANAR(trilinear,   16, 10)
+DEFINE_INTERP_FUNC_PLANAR(tetrahedral, 16, 10)
+
+DEFINE_INTERP_FUNC_PLANAR(nearest,     16, 12)
+DEFINE_INTERP_FUNC_PLANAR(trilinear,   16, 12)
+DEFINE_INTERP_FUNC_PLANAR(tetrahedral, 16, 12)
+
+DEFINE_INTERP_FUNC_PLANAR(nearest,     16, 14)
+DEFINE_INTERP_FUNC_PLANAR(trilinear,   16, 14)
+DEFINE_INTERP_FUNC_PLANAR(tetrahedral, 16, 14)
+
+DEFINE_INTERP_FUNC_PLANAR(nearest,     16, 16)
+DEFINE_INTERP_FUNC_PLANAR(trilinear,   16, 16)
+DEFINE_INTERP_FUNC_PLANAR(tetrahedral, 16, 16)
+
 #define DEFINE_INTERP_FUNC(name, nbits)                                                             \
 static int interp_##nbits##_##name(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)         \
 {                                                                                                   \
@@ -470,6 +549,12 @@
         AV_PIX_FMT_RGB0,   AV_PIX_FMT_BGR0,
         AV_PIX_FMT_RGB48,  AV_PIX_FMT_BGR48,
         AV_PIX_FMT_RGBA64, AV_PIX_FMT_BGRA64,
+        AV_PIX_FMT_GBRP,   AV_PIX_FMT_GBRAP,
+        AV_PIX_FMT_GBRP9,
+        AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12,
+        AV_PIX_FMT_GBRP14,
+        AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16,
         AV_PIX_FMT_NONE
     };
     AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
@@ -480,24 +565,49 @@
 
 static int config_input(AVFilterLink *inlink)
 {
-    int is16bit = 0;
+    int depth, is16bit = 0, planar = 0;
     LUT3DContext *lut3d = inlink->dst->priv;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
 
+    depth = desc->comp[0].depth;
+
     switch (inlink->format) {
     case AV_PIX_FMT_RGB48:
     case AV_PIX_FMT_BGR48:
     case AV_PIX_FMT_RGBA64:
     case AV_PIX_FMT_BGRA64:
         is16bit = 1;
+        break;
+    case AV_PIX_FMT_GBRP9:
+    case AV_PIX_FMT_GBRP10:
+    case AV_PIX_FMT_GBRP12:
+    case AV_PIX_FMT_GBRP14:
+    case AV_PIX_FMT_GBRP16:
+    case AV_PIX_FMT_GBRAP10:
+    case AV_PIX_FMT_GBRAP12:
+    case AV_PIX_FMT_GBRAP16:
+        is16bit = 1;
+    case AV_PIX_FMT_GBRP:
+    case AV_PIX_FMT_GBRAP:
+        planar = 1;
+        break;
     }
 
     ff_fill_rgba_map(lut3d->rgba_map, inlink->format);
     lut3d->step = av_get_padded_bits_per_pixel(desc) >> (3 + is16bit);
 
-#define SET_FUNC(name) do {                             \
-    if (is16bit) lut3d->interp = interp_16_##name;      \
-    else         lut3d->interp = interp_8_##name;       \
+#define SET_FUNC(name) do {                                     \
+    if (planar) {                                               \
+        switch (depth) {                                        \
+        case  8: lut3d->interp = interp_8_##name##_p8;   break; \
+        case  9: lut3d->interp = interp_16_##name##_p9;  break; \
+        case 10: lut3d->interp = interp_16_##name##_p10; break; \
+        case 12: lut3d->interp = interp_16_##name##_p12; break; \
+        case 14: lut3d->interp = interp_16_##name##_p14; break; \
+        case 16: lut3d->interp = interp_16_##name##_p16; break; \
+        }                                                       \
+    } else if (is16bit) { lut3d->interp = interp_16_##name;     \
+    } else {       lut3d->interp = interp_8_##name; }           \
 } while (0)
 
     switch (lut3d->interpolation) {
@@ -640,7 +750,7 @@
 
 #if CONFIG_HALDCLUT_FILTER
 
-static void update_clut(LUT3DContext *lut3d, const AVFrame *frame)
+static void update_clut_packed(LUT3DContext *lut3d, const AVFrame *frame)
 {
     const uint8_t *data = frame->data[0];
     const int linesize  = frame->linesize[0];
@@ -670,10 +780,57 @@
     }                                                                   \
 } while (0)
 
-    if (!lut3d->clut_is16bit) LOAD_CLUT(8);
-    else                      LOAD_CLUT(16);
+    switch (lut3d->clut_bits) {
+    case  8: LOAD_CLUT(8);  break;
+    case 16: LOAD_CLUT(16); break;
+    }
 }
 
+static void update_clut_planar(LUT3DContext *lut3d, const AVFrame *frame)
+{
+    const uint8_t *datag = frame->data[0];
+    const uint8_t *datab = frame->data[1];
+    const uint8_t *datar = frame->data[2];
+    const int glinesize  = frame->linesize[0];
+    const int blinesize  = frame->linesize[1];
+    const int rlinesize  = frame->linesize[2];
+    const int w = lut3d->clut_width;
+    const int level = lut3d->lutsize;
+
+#define LOAD_CLUT_PLANAR(nbits, depth) do {                             \
+    int i, j, k, x = 0, y = 0;                                          \
+                                                                        \
+    for (k = 0; k < level; k++) {                                       \
+        for (j = 0; j < level; j++) {                                   \
+            for (i = 0; i < level; i++) {                               \
+                const uint##nbits##_t *gsrc = (const uint##nbits##_t *) \
+                    (datag + y*glinesize);                              \
+                const uint##nbits##_t *bsrc = (const uint##nbits##_t *) \
+                    (datab + y*blinesize);                              \
+                const uint##nbits##_t *rsrc = (const uint##nbits##_t *) \
+                    (datar + y*rlinesize);                              \
+                struct rgbvec *vec = &lut3d->lut[i][j][k];              \
+                vec->r = gsrc[x] / (float)((1<<(depth)) - 1);           \
+                vec->g = bsrc[x] / (float)((1<<(depth)) - 1);           \
+                vec->b = rsrc[x] / (float)((1<<(depth)) - 1);           \
+                if (++x == w) {                                         \
+                    x = 0;                                              \
+                    y++;                                                \
+                }                                                       \
+            }                                                           \
+        }                                                               \
+    }                                                                   \
+} while (0)
+
+    switch (lut3d->clut_bits) {
+    case  8: LOAD_CLUT_PLANAR(8, 8);   break;
+    case  9: LOAD_CLUT_PLANAR(16, 9);  break;
+    case 10: LOAD_CLUT_PLANAR(16, 10); break;
+    case 12: LOAD_CLUT_PLANAR(16, 12); break;
+    case 14: LOAD_CLUT_PLANAR(16, 14); break;
+    case 16: LOAD_CLUT_PLANAR(16, 16); break;
+    }
+}
 
 static int config_output(AVFilterLink *outlink)
 {
@@ -707,14 +864,8 @@
 
     av_assert0(desc);
 
-    lut3d->clut_is16bit = 0;
-    switch (inlink->format) {
-    case AV_PIX_FMT_RGB48:
-    case AV_PIX_FMT_BGR48:
-    case AV_PIX_FMT_RGBA64:
-    case AV_PIX_FMT_BGRA64:
-        lut3d->clut_is16bit = 1;
-    }
+    lut3d->clut_bits = desc->comp[0].depth;
+    lut3d->clut_planar = av_pix_fmt_count_planes(inlink->format) > 1;
 
     lut3d->clut_step = av_get_padded_bits_per_pixel(desc) >> 3;
     ff_fill_rgba_map(lut3d->clut_rgba_map, inlink->format);
@@ -751,6 +902,7 @@
 static int update_apply_clut(FFFrameSync *fs)
 {
     AVFilterContext *ctx = fs->parent;
+    LUT3DContext *lut3d = ctx->priv;
     AVFilterLink *inlink = ctx->inputs[0];
     AVFrame *master, *second, *out;
     int ret;
@@ -760,7 +912,10 @@
         return ret;
     if (!second)
         return ff_filter_frame(ctx->outputs[0], master);
-    update_clut(ctx->priv, second);
+    if (lut3d->clut_planar)
+        update_clut_planar(ctx->priv, second);
+    else
+        update_clut_packed(ctx->priv, second);
     out = apply_lut(inlink, master);
     return ff_filter_frame(ctx->outputs[0], out);
 }
@@ -821,3 +976,450 @@
     .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
 };
 #endif
+
+#if CONFIG_LUT1D_FILTER
+
+enum interp_1d_mode {
+    INTERPOLATE_1D_NEAREST,
+    INTERPOLATE_1D_LINEAR,
+    INTERPOLATE_1D_CUBIC,
+    NB_INTERP_1D_MODE
+};
+
+#define MAX_1D_LEVEL 65536
+
+typedef struct LUT1DContext {
+    const AVClass *class;
+    char *file;
+    int interpolation;          ///<interp_1d_mode
+    uint8_t rgba_map[4];
+    int step;
+    float lut[3][MAX_1D_LEVEL];
+    int lutsize;
+    avfilter_action_func *interp;
+} LUT1DContext;
+
+#undef OFFSET
+#define OFFSET(x) offsetof(LUT1DContext, x)
+
+static void set_identity_matrix_1d(LUT1DContext *lut1d, int size)
+{
+    const float c = 1. / (size - 1);
+    int i;
+
+    lut1d->lutsize = size;
+    for (i = 0; i < size; i++) {
+        lut1d->lut[0][i] = i * c;
+        lut1d->lut[1][i] = i * c;
+        lut1d->lut[2][i] = i * c;
+    }
+}
+
+static int parse_cube_1d(AVFilterContext *ctx, FILE *f)
+{
+    LUT1DContext *lut1d = ctx->priv;
+    char line[MAX_LINE_SIZE];
+    float min[3] = {0.0, 0.0, 0.0};
+    float max[3] = {1.0, 1.0, 1.0};
+
+    while (fgets(line, sizeof(line), f)) {
+        if (!strncmp(line, "LUT_1D_SIZE ", 12)) {
+            const int size = strtol(line + 12, NULL, 0);
+            int i;
+
+            if (size < 2 || size > MAX_1D_LEVEL) {
+                av_log(ctx, AV_LOG_ERROR, "Too large or invalid 1D LUT size\n");
+                return AVERROR(EINVAL);
+            }
+            lut1d->lutsize = size;
+            for (i = 0; i < size; i++) {
+                do {
+try_again:
+                    NEXT_LINE(0);
+                    if (!strncmp(line, "DOMAIN_", 7)) {
+                        float *vals = NULL;
+                        if      (!strncmp(line + 7, "MIN ", 4)) vals = min;
+                        else if (!strncmp(line + 7, "MAX ", 4)) vals = max;
+                        if (!vals)
+                            return AVERROR_INVALIDDATA;
+                        sscanf(line + 11, "%f %f %f", vals, vals + 1, vals + 2);
+                        av_log(ctx, AV_LOG_DEBUG, "min: %f %f %f | max: %f %f %f\n",
+                               min[0], min[1], min[2], max[0], max[1], max[2]);
+                        goto try_again;
+                    } else if (!strncmp(line, "LUT_1D_INPUT_RANGE ", 19)) {
+                        sscanf(line + 19, "%f %f", min, max);
+                        min[1] = min[2] = min[0];
+                        max[1] = max[2] = max[0];
+                        goto try_again;
+                    }
+                } while (skip_line(line));
+                if (sscanf(line, "%f %f %f", &lut1d->lut[0][i], &lut1d->lut[1][i], &lut1d->lut[2][i]) != 3)
+                    return AVERROR_INVALIDDATA;
+                lut1d->lut[0][i] *= max[0] - min[0];
+                lut1d->lut[1][i] *= max[1] - min[1];
+                lut1d->lut[2][i] *= max[2] - min[2];
+            }
+            break;
+        }
+    }
+    return 0;
+}
+
+static const AVOption lut1d_options[] = {
+    { "file", "set 1D LUT file name", OFFSET(file), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
+    { "interp", "select interpolation mode", OFFSET(interpolation),    AV_OPT_TYPE_INT, {.i64=INTERPOLATE_1D_LINEAR}, 0, NB_INTERP_1D_MODE-1, FLAGS, "interp_mode" },
+        { "nearest", "use values from the nearest defined points", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_1D_NEAREST},   INT_MIN, INT_MAX, FLAGS, "interp_mode" },
+        { "linear",  "use values from the linear interpolation",   0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_1D_LINEAR},    INT_MIN, INT_MAX, FLAGS, "interp_mode" },
+        { "cubic",   "use values from the cubic interpolation",    0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_1D_CUBIC},     INT_MIN, INT_MAX, FLAGS, "interp_mode" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(lut1d);
+
+static inline float interp_1d_nearest(const LUT1DContext *lut1d,
+                                      int idx, const float s)
+{
+    return lut1d->lut[idx][NEAR(s)];
+}
+
+#define NEXT1D(x) (FFMIN((int)(x) + 1, lut1d->lutsize - 1))
+
+static inline float interp_1d_linear(const LUT1DContext *lut1d,
+                                     int idx, const float s)
+{
+    const int prev = PREV(s);
+    const int next = NEXT1D(s);
+    const float d = s - prev;
+    const float p = lut1d->lut[idx][prev];
+    const float n = lut1d->lut[idx][next];
+
+    return lerpf(p, n, d);
+}
+
+static inline float interp_1d_cubic(const LUT1DContext *lut1d,
+                                    int idx, const float s)
+{
+    const int prev = PREV(s);
+    const int next = NEXT1D(s);
+    const float mu = s - prev;
+    float a0, a1, a2, a3, mu2;
+
+    float y0 = lut1d->lut[idx][FFMAX(prev - 1, 0)];
+    float y1 = lut1d->lut[idx][prev];
+    float y2 = lut1d->lut[idx][next];
+    float y3 = lut1d->lut[idx][FFMIN(next + 1, lut1d->lutsize - 1)];
+
+
+    mu2 = mu * mu;
+    a0 = y3 - y2 - y0 + y1;
+    a1 = y0 - y1 - a0;
+    a2 = y2 - y0;
+    a3 = y1;
+
+    return a0 * mu * mu2 + a1 * mu2 + a2 * mu + a3;
+}
+
+#define DEFINE_INTERP_FUNC_PLANAR_1D(name, nbits, depth)                     \
+static int interp_1d_##nbits##_##name##_p##depth(AVFilterContext *ctx,       \
+                                                 void *arg, int jobnr,       \
+                                                 int nb_jobs)                \
+{                                                                            \
+    int x, y;                                                                \
+    const LUT1DContext *lut1d = ctx->priv;                                   \
+    const ThreadData *td = arg;                                              \
+    const AVFrame *in  = td->in;                                             \
+    const AVFrame *out = td->out;                                            \
+    const int direct = out == in;                                            \
+    const int slice_start = (in->height *  jobnr   ) / nb_jobs;              \
+    const int slice_end   = (in->height * (jobnr+1)) / nb_jobs;              \
+    uint8_t *grow = out->data[0] + slice_start * out->linesize[0];           \
+    uint8_t *brow = out->data[1] + slice_start * out->linesize[1];           \
+    uint8_t *rrow = out->data[2] + slice_start * out->linesize[2];           \
+    uint8_t *arow = out->data[3] + slice_start * out->linesize[3];           \
+    const uint8_t *srcgrow = in->data[0] + slice_start * in->linesize[0];    \
+    const uint8_t *srcbrow = in->data[1] + slice_start * in->linesize[1];    \
+    const uint8_t *srcrrow = in->data[2] + slice_start * in->linesize[2];    \
+    const uint8_t *srcarow = in->data[3] + slice_start * in->linesize[3];    \
+    const float factor = (1 << depth) - 1;                                   \
+    const float scale = (1. / factor) * (lut1d->lutsize - 1);                \
+                                                                             \
+    for (y = slice_start; y < slice_end; y++) {                              \
+        uint##nbits##_t *dstg = (uint##nbits##_t *)grow;                     \
+        uint##nbits##_t *dstb = (uint##nbits##_t *)brow;                     \
+        uint##nbits##_t *dstr = (uint##nbits##_t *)rrow;                     \
+        uint##nbits##_t *dsta = (uint##nbits##_t *)arow;                     \
+        const uint##nbits##_t *srcg = (const uint##nbits##_t *)srcgrow;      \
+        const uint##nbits##_t *srcb = (const uint##nbits##_t *)srcbrow;      \
+        const uint##nbits##_t *srcr = (const uint##nbits##_t *)srcrrow;      \
+        const uint##nbits##_t *srca = (const uint##nbits##_t *)srcarow;      \
+        for (x = 0; x < in->width; x++) {                                    \
+            float r = srcr[x] * scale;                                       \
+            float g = srcg[x] * scale;                                       \
+            float b = srcb[x] * scale;                                       \
+            r = interp_1d_##name(lut1d, 0, r);                               \
+            g = interp_1d_##name(lut1d, 1, g);                               \
+            b = interp_1d_##name(lut1d, 2, b);                               \
+            dstr[x] = av_clip_uintp2(r * factor, depth);                     \
+            dstg[x] = av_clip_uintp2(g * factor, depth);                     \
+            dstb[x] = av_clip_uintp2(b * factor, depth);                     \
+            if (!direct && in->linesize[3])                                  \
+                dsta[x] = srca[x];                                           \
+        }                                                                    \
+        grow += out->linesize[0];                                            \
+        brow += out->linesize[1];                                            \
+        rrow += out->linesize[2];                                            \
+        arow += out->linesize[3];                                            \
+        srcgrow += in->linesize[0];                                          \
+        srcbrow += in->linesize[1];                                          \
+        srcrrow += in->linesize[2];                                          \
+        srcarow += in->linesize[3];                                          \
+    }                                                                        \
+    return 0;                                                                \
+}
+
+DEFINE_INTERP_FUNC_PLANAR_1D(nearest,     8, 8)
+DEFINE_INTERP_FUNC_PLANAR_1D(linear,      8, 8)
+DEFINE_INTERP_FUNC_PLANAR_1D(cubic,       8, 8)
+
+DEFINE_INTERP_FUNC_PLANAR_1D(nearest,     16, 9)
+DEFINE_INTERP_FUNC_PLANAR_1D(linear,      16, 9)
+DEFINE_INTERP_FUNC_PLANAR_1D(cubic,       16, 9)
+
+DEFINE_INTERP_FUNC_PLANAR_1D(nearest,     16, 10)
+DEFINE_INTERP_FUNC_PLANAR_1D(linear,      16, 10)
+DEFINE_INTERP_FUNC_PLANAR_1D(cubic,       16, 10)
+
+DEFINE_INTERP_FUNC_PLANAR_1D(nearest,     16, 12)
+DEFINE_INTERP_FUNC_PLANAR_1D(linear,      16, 12)
+DEFINE_INTERP_FUNC_PLANAR_1D(cubic,       16, 12)
+
+DEFINE_INTERP_FUNC_PLANAR_1D(nearest,     16, 14)
+DEFINE_INTERP_FUNC_PLANAR_1D(linear,      16, 14)
+DEFINE_INTERP_FUNC_PLANAR_1D(cubic,       16, 14)
+
+DEFINE_INTERP_FUNC_PLANAR_1D(nearest,     16, 16)
+DEFINE_INTERP_FUNC_PLANAR_1D(linear,      16, 16)
+DEFINE_INTERP_FUNC_PLANAR_1D(cubic,       16, 16)
+
+#define DEFINE_INTERP_FUNC_1D(name, nbits)                                   \
+static int interp_1d_##nbits##_##name(AVFilterContext *ctx, void *arg,       \
+                                      int jobnr, int nb_jobs)                \
+{                                                                            \
+    int x, y;                                                                \
+    const LUT1DContext *lut1d = ctx->priv;                                   \
+    const ThreadData *td = arg;                                              \
+    const AVFrame *in  = td->in;                                             \
+    const AVFrame *out = td->out;                                            \
+    const int direct = out == in;                                            \
+    const int step = lut1d->step;                                            \
+    const uint8_t r = lut1d->rgba_map[R];                                    \
+    const uint8_t g = lut1d->rgba_map[G];                                    \
+    const uint8_t b = lut1d->rgba_map[B];                                    \
+    const uint8_t a = lut1d->rgba_map[A];                                    \
+    const int slice_start = (in->height *  jobnr   ) / nb_jobs;              \
+    const int slice_end   = (in->height * (jobnr+1)) / nb_jobs;              \
+    uint8_t       *dstrow = out->data[0] + slice_start * out->linesize[0];   \
+    const uint8_t *srcrow = in ->data[0] + slice_start * in ->linesize[0];   \
+    const float factor = (1 << nbits) - 1;                                   \
+    const float scale = (1. / factor) * (lut1d->lutsize - 1);                \
+                                                                             \
+    for (y = slice_start; y < slice_end; y++) {                              \
+        uint##nbits##_t *dst = (uint##nbits##_t *)dstrow;                    \
+        const uint##nbits##_t *src = (const uint##nbits##_t *)srcrow;        \
+        for (x = 0; x < in->width * step; x += step) {                       \
+            float rr = src[x + r] * scale;                                   \
+            float gg = src[x + g] * scale;                                   \
+            float bb = src[x + b] * scale;                                   \
+            rr = interp_1d_##name(lut1d, 0, rr);                             \
+            gg = interp_1d_##name(lut1d, 1, gg);                             \
+            bb = interp_1d_##name(lut1d, 2, bb);                             \
+            dst[x + r] = av_clip_uint##nbits(rr * factor);                   \
+            dst[x + g] = av_clip_uint##nbits(gg * factor);                   \
+            dst[x + b] = av_clip_uint##nbits(bb * factor);                   \
+            if (!direct && step == 4)                                        \
+                dst[x + a] = src[x + a];                                     \
+        }                                                                    \
+        dstrow += out->linesize[0];                                          \
+        srcrow += in ->linesize[0];                                          \
+    }                                                                        \
+    return 0;                                                                \
+}
+
+DEFINE_INTERP_FUNC_1D(nearest,     8)
+DEFINE_INTERP_FUNC_1D(linear,      8)
+DEFINE_INTERP_FUNC_1D(cubic,       8)
+
+DEFINE_INTERP_FUNC_1D(nearest,     16)
+DEFINE_INTERP_FUNC_1D(linear,      16)
+DEFINE_INTERP_FUNC_1D(cubic,       16)
+
+static int config_input_1d(AVFilterLink *inlink)
+{
+    int depth, is16bit = 0, planar = 0;
+    LUT1DContext *lut1d = inlink->dst->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+
+    depth = desc->comp[0].depth;
+
+    switch (inlink->format) {
+    case AV_PIX_FMT_RGB48:
+    case AV_PIX_FMT_BGR48:
+    case AV_PIX_FMT_RGBA64:
+    case AV_PIX_FMT_BGRA64:
+        is16bit = 1;
+        break;
+    case AV_PIX_FMT_GBRP9:
+    case AV_PIX_FMT_GBRP10:
+    case AV_PIX_FMT_GBRP12:
+    case AV_PIX_FMT_GBRP14:
+    case AV_PIX_FMT_GBRP16:
+    case AV_PIX_FMT_GBRAP10:
+    case AV_PIX_FMT_GBRAP12:
+    case AV_PIX_FMT_GBRAP16:
+        is16bit = 1;
+    case AV_PIX_FMT_GBRP:
+    case AV_PIX_FMT_GBRAP:
+        planar = 1;
+        break;
+    }
+
+    ff_fill_rgba_map(lut1d->rgba_map, inlink->format);
+    lut1d->step = av_get_padded_bits_per_pixel(desc) >> (3 + is16bit);
+
+#define SET_FUNC_1D(name) do {                                     \
+    if (planar) {                                                  \
+        switch (depth) {                                           \
+        case  8: lut1d->interp = interp_1d_8_##name##_p8;   break; \
+        case  9: lut1d->interp = interp_1d_16_##name##_p9;  break; \
+        case 10: lut1d->interp = interp_1d_16_##name##_p10; break; \
+        case 12: lut1d->interp = interp_1d_16_##name##_p12; break; \
+        case 14: lut1d->interp = interp_1d_16_##name##_p14; break; \
+        case 16: lut1d->interp = interp_1d_16_##name##_p16; break; \
+        }                                                          \
+    } else if (is16bit) { lut1d->interp = interp_1d_16_##name;     \
+    } else {              lut1d->interp = interp_1d_8_##name; }    \
+} while (0)
+
+    switch (lut1d->interpolation) {
+    case INTERPOLATE_1D_NEAREST:     SET_FUNC_1D(nearest);  break;
+    case INTERPOLATE_1D_LINEAR:      SET_FUNC_1D(linear);   break;
+    case INTERPOLATE_1D_CUBIC:       SET_FUNC_1D(cubic);    break;
+    default:
+        av_assert0(0);
+    }
+
+    return 0;
+}
+
+static av_cold int lut1d_init(AVFilterContext *ctx)
+{
+    int ret;
+    FILE *f;
+    const char *ext;
+    LUT1DContext *lut1d = ctx->priv;
+
+    if (!lut1d->file) {
+        set_identity_matrix_1d(lut1d, 32);
+        return 0;
+    }
+
+    f = fopen(lut1d->file, "r");
+    if (!f) {
+        ret = AVERROR(errno);
+        av_log(ctx, AV_LOG_ERROR, "%s: %s\n", lut1d->file, av_err2str(ret));
+        return ret;
+    }
+
+    ext = strrchr(lut1d->file, '.');
+    if (!ext) {
+        av_log(ctx, AV_LOG_ERROR, "Unable to guess the format from the extension\n");
+        ret = AVERROR_INVALIDDATA;
+        goto end;
+    }
+    ext++;
+
+    if (!av_strcasecmp(ext, "cube") || !av_strcasecmp(ext, "1dlut")) {
+        ret = parse_cube_1d(ctx, f);
+    } else {
+        av_log(ctx, AV_LOG_ERROR, "Unrecognized '.%s' file type\n", ext);
+        ret = AVERROR(EINVAL);
+    }
+
+    if (!ret && !lut1d->lutsize) {
+        av_log(ctx, AV_LOG_ERROR, "1D LUT is empty\n");
+        ret = AVERROR_INVALIDDATA;
+    }
+
+end:
+    fclose(f);
+    return ret;
+}
+
+static AVFrame *apply_1d_lut(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    LUT1DContext *lut1d = ctx->priv;
+    AVFilterLink *outlink = inlink->dst->outputs[0];
+    AVFrame *out;
+    ThreadData td;
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!out) {
+            av_frame_free(&in);
+            return NULL;
+        }
+        av_frame_copy_props(out, in);
+    }
+
+    td.in  = in;
+    td.out = out;
+    ctx->internal->execute(ctx, lut1d->interp, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
+
+    if (out != in)
+        av_frame_free(&in);
+
+    return out;
+}
+
+static int filter_frame_1d(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterLink *outlink = inlink->dst->outputs[0];
+    AVFrame *out = apply_1d_lut(inlink, in);
+    if (!out)
+        return AVERROR(ENOMEM);
+    return ff_filter_frame(outlink, out);
+}
+
+static const AVFilterPad lut1d_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame_1d,
+        .config_props = config_input_1d,
+    },
+    { NULL }
+};
+
+static const AVFilterPad lut1d_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_lut1d = {
+    .name          = "lut1d",
+    .description   = NULL_IF_CONFIG_SMALL("Adjust colors using a 1D LUT."),
+    .priv_size     = sizeof(LUT1DContext),
+    .init          = lut1d_init,
+    .query_formats = query_formats,
+    .inputs        = lut1d_inputs,
+    .outputs       = lut1d_outputs,
+    .priv_class    = &lut1d_class,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
+#endif

diff --git a/libavfilter/vf_maskedclamp.c b/libavfilter/vf_maskedclamp.c
index 67a979f..8a06737 100644
--- a/libavfilter/vf_maskedclamp.c
+++ b/libavfilter/vf_maskedclamp.c

@@ -30,6 +30,10 @@
 #define OFFSET(x) offsetof(MaskedClampContext, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
 
+typedef struct ThreadData {
+    AVFrame *b, *o, *m, *d;
+} ThreadData;
+
 typedef struct MaskedClampContext {
     const AVClass *class;
 
@@ -43,11 +47,7 @@
     int depth;
     FFFrameSync fs;
 
-    void (*maskedclamp)(const uint8_t *bsrc, const uint8_t *osrc,
-                        const uint8_t *msrc, uint8_t *dst,
-                        ptrdiff_t blinesize, ptrdiff_t darklinesize,
-                        ptrdiff_t brightlinesize, ptrdiff_t destlinesize,
-                        int w, int h, int undershoot, int overshoot);
+    int (*maskedclamp)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
 } MaskedClampContext;
 
 static const AVOption maskedclamp_options[] = {
@@ -78,7 +78,7 @@
         AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
         AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
         AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_NONE
     };
 
@@ -103,92 +103,122 @@
         if (!out)
             return AVERROR(ENOMEM);
     } else {
-        int p;
+        ThreadData td;
 
         out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
         if (!out)
             return AVERROR(ENOMEM);
         av_frame_copy_props(out, base);
 
-        for (p = 0; p < s->nb_planes; p++) {
-            if (!((1 << p) & s->planes)) {
-                av_image_copy_plane(out->data[p], out->linesize[p], base->data[p], base->linesize[p],
-                                    s->linesize[p], s->height[p]);
-                continue;
-            }
+        td.b = base;
+        td.o = dark;
+        td.m = bright;
+        td.d = out;
 
-            s->maskedclamp(base->data[p], dark->data[p],
-                           bright->data[p], out->data[p],
-                           base->linesize[p], dark->linesize[p],
-                           bright->linesize[p], out->linesize[p],
-                           s->width[p], s->height[p],
-                           s->undershoot, s->overshoot);
-        }
+        ctx->internal->execute(ctx, s->maskedclamp, &td, NULL, FFMIN(s->height[0],
+                                                                     ff_filter_get_nb_threads(ctx)));
     }
     out->pts = av_rescale_q(s->fs.pts, s->fs.time_base, outlink->time_base);
 
     return ff_filter_frame(outlink, out);
 }
 
-static void maskedclamp8(const uint8_t *bsrc, const uint8_t *darksrc,
-                         const uint8_t *brightsrc, uint8_t *dst,
-                         ptrdiff_t blinesize, ptrdiff_t darklinesize,
-                         ptrdiff_t brightlinesize, ptrdiff_t dlinesize,
-                         int w, int h,
-                         int undershoot, int overshoot)
+static int maskedclamp8(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    int x, y;
+    MaskedClampContext *s = ctx->priv;
+    ThreadData *td = arg;
+    int p;
 
-    for (y = 0; y < h; y++) {
-        for (x = 0; x < w; x++) {
-            if (bsrc[x] < darksrc[x] - undershoot)
-                dst[x] = darksrc[x] - undershoot;
-            else if (bsrc[x] > brightsrc[x] + overshoot)
-                dst[x] = brightsrc[x] + overshoot;
-            else
-                dst[x] = bsrc[x];
+    for (p = 0; p < s->nb_planes; p++) {
+        const ptrdiff_t blinesize = td->b->linesize[p];
+        const ptrdiff_t brightlinesize = td->m->linesize[p];
+        const ptrdiff_t darklinesize = td->o->linesize[p];
+        const ptrdiff_t dlinesize = td->d->linesize[p];
+        const int w = s->width[p];
+        const int h = s->height[p];
+        const int slice_start = (h * jobnr) / nb_jobs;
+        const int slice_end = (h * (jobnr+1)) / nb_jobs;
+        const uint8_t *bsrc = td->b->data[p] + slice_start * blinesize;
+        const uint8_t *darksrc = td->o->data[p] + slice_start * darklinesize;
+        const uint8_t *brightsrc = td->m->data[p] + slice_start * brightlinesize;
+        uint8_t *dst = td->d->data[p] + slice_start * dlinesize;
+        const int undershoot = s->undershoot;
+        const int overshoot = s->overshoot;
+        int x, y;
+
+        if (!((1 << p) & s->planes)) {
+            av_image_copy_plane(dst, dlinesize, bsrc, blinesize,
+                                s->linesize[p], slice_end - slice_start);
+            continue;
         }
 
-        dst  += dlinesize;
-        bsrc += blinesize;
-        darksrc += darklinesize;
-        brightsrc += brightlinesize;
+        for (y = slice_start; y < slice_end; y++) {
+            for (x = 0; x < w; x++) {
+                if (bsrc[x] < darksrc[x] - undershoot)
+                    dst[x] = darksrc[x] - undershoot;
+                else if (bsrc[x] > brightsrc[x] + overshoot)
+                    dst[x] = brightsrc[x] + overshoot;
+                else
+                    dst[x] = bsrc[x];
+            }
+
+            dst  += dlinesize;
+            bsrc += blinesize;
+            darksrc += darklinesize;
+            brightsrc += brightlinesize;
+        }
     }
+
+    return 0;
 }
 
-static void maskedclamp16(const uint8_t *bbsrc, const uint8_t *oosrc,
-                          const uint8_t *mmsrc, uint8_t *ddst,
-                          ptrdiff_t blinesize, ptrdiff_t darklinesize,
-                          ptrdiff_t brightlinesize, ptrdiff_t dlinesize,
-                          int w, int h,
-                          int undershoot, int overshoot)
+static int maskedclamp16(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    const uint16_t *bsrc = (const uint16_t *)bbsrc;
-    const uint16_t *darksrc = (const uint16_t *)oosrc;
-    const uint16_t *brightsrc = (const uint16_t *)mmsrc;
-    uint16_t *dst = (uint16_t *)ddst;
-    int x, y;
+    MaskedClampContext *s = ctx->priv;
+    ThreadData *td = arg;
+    int p;
 
-    dlinesize /= 2;
-    blinesize /= 2;
-    darklinesize /= 2;
-    brightlinesize /= 2;
+    for (p = 0; p < s->nb_planes; p++) {
+        const ptrdiff_t blinesize = td->b->linesize[p] / 2;
+        const ptrdiff_t brightlinesize = td->m->linesize[p] / 2;
+        const ptrdiff_t darklinesize = td->o->linesize[p] / 2;
+        const ptrdiff_t dlinesize = td->d->linesize[p] / 2;
+        const int w = s->width[p];
+        const int h = s->height[p];
+        const int slice_start = (h * jobnr) / nb_jobs;
+        const int slice_end = (h * (jobnr+1)) / nb_jobs;
+        const uint16_t *bsrc = (const uint16_t *)td->b->data[p] + slice_start * blinesize;
+        const uint16_t *darksrc = (const uint16_t *)td->o->data[p] + slice_start * darklinesize;
+        const uint16_t *brightsrc = (const uint16_t *)td->m->data[p] + slice_start * brightlinesize;
+        uint16_t *dst = (uint16_t *)td->d->data[p] + slice_start * dlinesize;
+        const int undershoot = s->undershoot;
+        const int overshoot = s->overshoot;
+        int x, y;
 
-    for (y = 0; y < h; y++) {
-        for (x = 0; x < w; x++) {
-            if (bsrc[x] < darksrc[x] - undershoot)
-                dst[x] = darksrc[x] - undershoot;
-            else if (bsrc[x] > brightsrc[x] + overshoot)
-                dst[x] = brightsrc[x] + overshoot;
-            else
-                dst[x] = bsrc[x];
+        if (!((1 << p) & s->planes)) {
+            av_image_copy_plane((uint8_t *)dst, dlinesize, (const uint8_t *)bsrc, blinesize,
+                                s->linesize[p], slice_end - slice_start);
+            continue;
         }
 
-        dst  += dlinesize;
-        bsrc += blinesize;
-        darksrc += darklinesize;
-        brightsrc += brightlinesize;
+        for (y = slice_start; y < slice_end; y++) {
+            for (x = 0; x < w; x++) {
+                if (bsrc[x] < darksrc[x] - undershoot)
+                    dst[x] = darksrc[x] - undershoot;
+                else if (bsrc[x] > brightsrc[x] + overshoot)
+                    dst[x] = brightsrc[x] + overshoot;
+                else
+                    dst[x] = bsrc[x];
+            }
+
+            dst  += dlinesize;
+            bsrc += blinesize;
+            darksrc += darklinesize;
+            brightsrc += brightlinesize;
+        }
     }
+
+    return 0;
 }
 
 static int config_input(AVFilterLink *inlink)
@@ -235,27 +265,15 @@
         av_log(ctx, AV_LOG_ERROR, "inputs must be of same pixel format\n");
         return AVERROR(EINVAL);
     }
-    if (base->w                       != dark->w ||
-        base->h                       != dark->h ||
-        base->sample_aspect_ratio.num != dark->sample_aspect_ratio.num ||
-        base->sample_aspect_ratio.den != dark->sample_aspect_ratio.den ||
-        base->w                       != bright->w ||
-        base->h                       != bright->h ||
-        base->sample_aspect_ratio.num != bright->sample_aspect_ratio.num ||
-        base->sample_aspect_ratio.den != bright->sample_aspect_ratio.den) {
+    if (base->w != dark->w   || base->h != dark->h ||
+        base->w != bright->w || base->h != bright->h) {
         av_log(ctx, AV_LOG_ERROR, "First input link %s parameters "
-               "(size %dx%d, SAR %d:%d) do not match the corresponding "
-               "second input link %s parameters (%dx%d, SAR %d:%d) "
-               "and/or third input link %s parameters (%dx%d, SAR %d:%d)\n",
+               "(size %dx%d) do not match the corresponding "
+               "second input link %s parameters (%dx%d) "
+               "and/or third input link %s parameters (size %dx%d)\n",
                ctx->input_pads[0].name, base->w, base->h,
-               base->sample_aspect_ratio.num,
-               base->sample_aspect_ratio.den,
                ctx->input_pads[1].name, dark->w, dark->h,
-               dark->sample_aspect_ratio.num,
-               dark->sample_aspect_ratio.den,
-               ctx->input_pads[2].name, bright->w, bright->h,
-               bright->sample_aspect_ratio.num,
-               bright->sample_aspect_ratio.den);
+               ctx->input_pads[2].name, bright->w, bright->h);
         return AVERROR(EINVAL);
     }
 
@@ -336,5 +354,5 @@
     .inputs        = maskedclamp_inputs,
     .outputs       = maskedclamp_outputs,
     .priv_class    = &maskedclamp_class,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
 };

diff --git a/libavfilter/vf_maskedmerge.c b/libavfilter/vf_maskedmerge.c
index d31b926..1a02503 100644
--- a/libavfilter/vf_maskedmerge.c
+++ b/libavfilter/vf_maskedmerge.c

@@ -56,19 +56,62 @@
         AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
         AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
         AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_NONE
     };
 
     return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
 }
 
+typedef struct ThreadData {
+    AVFrame *base, *overlay, *mask;
+    AVFrame *out;
+} ThreadData;
+
+static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    MaskedMergeContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *base = td->base;
+    AVFrame *overlay = td->overlay;
+    AVFrame *mask = td->mask;
+    AVFrame *out = td->out;
+    int p;
+
+    for (p = 0; p < s->nb_planes; p++) {
+        const int h = s->height[p];
+        const int slice_start = (h * jobnr) / nb_jobs;
+        const int slice_end = (h * (jobnr+1)) / nb_jobs;
+
+        if (!((1 << p) & s->planes)) {
+            av_image_copy_plane(out->data[p] + slice_start * out->linesize[p],
+                                out->linesize[p],
+                                base->data[p] + slice_start * base->linesize[p],
+                                base->linesize[p],
+                                s->linesize[p], slice_end - slice_start);
+            continue;
+        }
+
+        s->maskedmerge(base->data[p] + slice_start * base->linesize[p],
+                       overlay->data[p] + slice_start * overlay->linesize[p],
+                       mask->data[p] + slice_start * mask->linesize[p],
+                       out->data[p] + slice_start * out->linesize[p],
+                       base->linesize[p], overlay->linesize[p],
+                       mask->linesize[p], out->linesize[p],
+                       s->width[p], slice_end - slice_start,
+                       s->half, s->depth);
+    }
+
+    return 0;
+}
+
 static int process_frame(FFFrameSync *fs)
 {
     AVFilterContext *ctx = fs->parent;
     MaskedMergeContext *s = fs->opaque;
     AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *out, *base, *overlay, *mask;
+    ThreadData td;
     int ret;
 
     if ((ret = ff_framesync_get_frame(&s->fs, 0, &base,    0)) < 0 ||
@@ -81,27 +124,17 @@
         if (!out)
             return AVERROR(ENOMEM);
     } else {
-        int p;
-
         out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
         if (!out)
             return AVERROR(ENOMEM);
         av_frame_copy_props(out, base);
 
-        for (p = 0; p < s->nb_planes; p++) {
-            if (!((1 << p) & s->planes)) {
-                av_image_copy_plane(out->data[p], out->linesize[p], base->data[p], base->linesize[p],
-                                    s->linesize[p], s->height[p]);
-                continue;
-            }
-
-            s->maskedmerge(base->data[p], overlay->data[p],
-                           mask->data[p], out->data[p],
-                           base->linesize[p], overlay->linesize[p],
-                           mask->linesize[p], out->linesize[p],
-                           s->width[p], s->height[p],
-                           s->half, s->depth);
-        }
+        td.out = out;
+        td.base = base;
+        td.overlay = overlay;
+        td.mask = mask;
+        ctx->internal->execute(ctx, filter_slice, &td, NULL,
+                               FFMIN(s->height[2], ff_filter_get_nb_threads(ctx)));
     }
     out->pts = av_rescale_q(base->pts, s->fs.time_base, outlink->time_base);
 
@@ -199,27 +232,15 @@
         av_log(ctx, AV_LOG_ERROR, "inputs must be of same pixel format\n");
         return AVERROR(EINVAL);
     }
-    if (base->w                       != overlay->w ||
-        base->h                       != overlay->h ||
-        base->sample_aspect_ratio.num != overlay->sample_aspect_ratio.num ||
-        base->sample_aspect_ratio.den != overlay->sample_aspect_ratio.den ||
-        base->w                       != mask->w ||
-        base->h                       != mask->h ||
-        base->sample_aspect_ratio.num != mask->sample_aspect_ratio.num ||
-        base->sample_aspect_ratio.den != mask->sample_aspect_ratio.den) {
+    if (base->w != overlay->w || base->h != overlay->h ||
+        base->w != mask->w    || base->h != mask->h) {
         av_log(ctx, AV_LOG_ERROR, "First input link %s parameters "
-               "(size %dx%d, SAR %d:%d) do not match the corresponding "
-               "second input link %s parameters (%dx%d, SAR %d:%d) "
-               "and/or third input link %s parameters (%dx%d, SAR %d:%d)\n",
+               "(size %dx%d) do not match the corresponding "
+               "second input link %s parameters (size %dx%d) "
+               "and/or third input link %s parameters (size %dx%d)\n",
                ctx->input_pads[0].name, base->w, base->h,
-               base->sample_aspect_ratio.num,
-               base->sample_aspect_ratio.den,
                ctx->input_pads[1].name, overlay->w, overlay->h,
-               overlay->sample_aspect_ratio.num,
-               overlay->sample_aspect_ratio.den,
-               ctx->input_pads[2].name, mask->w, mask->h,
-               mask->sample_aspect_ratio.num,
-               mask->sample_aspect_ratio.den);
+               ctx->input_pads[2].name, mask->w, mask->h);
         return AVERROR(EINVAL);
     }
 
@@ -303,5 +324,5 @@
     .inputs        = maskedmerge_inputs,
     .outputs       = maskedmerge_outputs,
     .priv_class    = &maskedmerge_class,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
 };

diff --git a/libavfilter/vf_midequalizer.c b/libavfilter/vf_midequalizer.c
index c03814a..87d1e7c 100644
--- a/libavfilter/vf_midequalizer.c
+++ b/libavfilter/vf_midequalizer.c

@@ -66,7 +66,7 @@
         AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
         AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
         AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14,
         AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
         AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
         AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12,

diff --git a/libavfilter/vf_minterpolate.c b/libavfilter/vf_minterpolate.c
index 6c5c264..c6a5e63 100644
--- a/libavfilter/vf_minterpolate.c
+++ b/libavfilter/vf_minterpolate.c

@@ -145,12 +145,18 @@
     struct Block *subs;
 } Block;
 
-typedef struct Pixel {
+typedef struct PixelMVS {
     int16_t mvs[NB_PIXEL_MVS][2];
+} PixelMVS;
+
+typedef struct PixelWeights {
     uint32_t weights[NB_PIXEL_MVS];
+} PixelWeights;
+
+typedef struct PixelRefs {
     int8_t refs[NB_PIXEL_MVS];
     int nb;
-} Pixel;
+} PixelRefs;
 
 typedef struct Frame {
     AVFrame *avf;
@@ -172,7 +178,9 @@
     Frame frames[NB_FRAMES];
     Cluster clusters[NB_CLUSTERS];
     Block *int_blocks;
-    Pixel *pixels;
+    PixelMVS *pixel_mvs;
+    PixelWeights *pixel_weights;
+    PixelRefs *pixel_refs;
     int (*mv_table[3])[2][2];
     int64_t out_pts;
     int b_width, b_height, b_count;
@@ -331,7 +339,7 @@
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
     const int height = inlink->h;
     const int width  = inlink->w;
-    int i;
+    int i, ret = 0;
 
     mi_ctx->log2_chroma_h = desc->log2_chroma_h;
     mi_ctx->log2_chroma_w = desc->log2_chroma_w;
@@ -353,8 +361,13 @@
     }
 
     if (mi_ctx->mi_mode == MI_MODE_MCI) {
-        if (!(mi_ctx->pixels = av_mallocz_array(width * height, sizeof(Pixel))))
-            return AVERROR(ENOMEM);
+        mi_ctx->pixel_mvs = av_mallocz_array(width * height, sizeof(PixelMVS));
+        mi_ctx->pixel_weights = av_mallocz_array(width * height, sizeof(PixelWeights));
+        mi_ctx->pixel_refs = av_mallocz_array(width * height, sizeof(PixelRefs));
+        if (!mi_ctx->pixel_mvs || !mi_ctx->pixel_weights || !mi_ctx->pixel_refs) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
 
         if (mi_ctx->me_mode == ME_MODE_BILAT)
             if (!(mi_ctx->int_blocks = av_mallocz_array(mi_ctx->b_count, sizeof(Block))))
@@ -383,6 +396,13 @@
         me_ctx->get_cost = &get_sbad_ob;
 
     return 0;
+fail:
+    for (i = 0; i < NB_FRAMES; i++)
+        av_freep(&mi_ctx->frames[i].blocks);
+    av_freep(&mi_ctx->pixel_mvs);
+    av_freep(&mi_ctx->pixel_weights);
+    av_freep(&mi_ctx->pixel_refs);
+    return ret;
 }
 
 static int config_output(AVFilterLink *outlink)
@@ -833,18 +853,18 @@
 
 #define ADD_PIXELS(b_weight, mv_x, mv_y)\
     do {\
-        if (!b_weight || pixel->nb + 1 >= NB_PIXEL_MVS)\
+        if (!b_weight || pixel_refs->nb + 1 >= NB_PIXEL_MVS)\
             continue;\
-        pixel->refs[pixel->nb] = 1;\
-        pixel->weights[pixel->nb] = b_weight * (ALPHA_MAX - alpha);\
-        pixel->mvs[pixel->nb][0] = av_clip((mv_x * alpha) / ALPHA_MAX, x_min, x_max);\
-        pixel->mvs[pixel->nb][1] = av_clip((mv_y * alpha) / ALPHA_MAX, y_min, y_max);\
-        pixel->nb++;\
-        pixel->refs[pixel->nb] = 2;\
-        pixel->weights[pixel->nb] = b_weight * alpha;\
-        pixel->mvs[pixel->nb][0] = av_clip(-mv_x * (ALPHA_MAX - alpha) / ALPHA_MAX, x_min, x_max);\
-        pixel->mvs[pixel->nb][1] = av_clip(-mv_y * (ALPHA_MAX - alpha) / ALPHA_MAX, y_min, y_max);\
-        pixel->nb++;\
+        pixel_refs->refs[pixel_refs->nb] = 1;\
+        pixel_weights->weights[pixel_refs->nb] = b_weight * (ALPHA_MAX - alpha);\
+        pixel_mvs->mvs[pixel_refs->nb][0] = av_clip((mv_x * alpha) / ALPHA_MAX, x_min, x_max);\
+        pixel_mvs->mvs[pixel_refs->nb][1] = av_clip((mv_y * alpha) / ALPHA_MAX, y_min, y_max);\
+        pixel_refs->nb++;\
+        pixel_refs->refs[pixel_refs->nb] = 2;\
+        pixel_weights->weights[pixel_refs->nb] = b_weight * alpha;\
+        pixel_mvs->mvs[pixel_refs->nb][0] = av_clip(-mv_x * (ALPHA_MAX - alpha) / ALPHA_MAX, x_min, x_max);\
+        pixel_mvs->mvs[pixel_refs->nb][1] = av_clip(-mv_y * (ALPHA_MAX - alpha) / ALPHA_MAX, y_min, y_max);\
+        pixel_refs->nb++;\
     } while(0)
 
 static void bidirectional_obmc(MIContext *mi_ctx, int alpha)
@@ -856,7 +876,7 @@
 
     for (y = 0; y < height; y++)
         for (x = 0; x < width; x++)
-            mi_ctx->pixels[x + y * width].nb = 0;
+            mi_ctx->pixel_refs[x + y * width].nb = 0;
 
     for (dir = 0; dir < 2; dir++)
         for (mb_y = 0; mb_y < mi_ctx->b_height; mb_y++)
@@ -887,7 +907,9 @@
                         int x_min = -x;
                         int x_max = width - x - 1;
                         int obmc_weight = obmc_tab_linear[4 - mi_ctx->log2_mb_size][(x - start_x) + ((y - start_y) << (mi_ctx->log2_mb_size + 1))];
-                        Pixel *pixel = &mi_ctx->pixels[x + y * width];
+                        PixelMVS *pixel_mvs = &mi_ctx->pixel_mvs[x + y * width];
+                        PixelWeights *pixel_weights = &mi_ctx->pixel_weights[x + y * width];
+                        PixelRefs *pixel_refs = &mi_ctx->pixel_refs[x + y * width];
 
                         ADD_PIXELS(obmc_weight, mv_x, mv_y);
                     }
@@ -909,36 +931,38 @@
                 int x_mv, y_mv;
                 int weight_sum = 0;
                 int i, val = 0;
-                Pixel *pixel = &mi_ctx->pixels[x + y * avf_out->width];
+                PixelMVS *pixel_mvs = &mi_ctx->pixel_mvs[x + y * avf_out->width];
+                PixelWeights *pixel_weights = &mi_ctx->pixel_weights[x + y * avf_out->width];
+                PixelRefs *pixel_refs = &mi_ctx->pixel_refs[x + y * avf_out->width];
 
-                for (i = 0; i < pixel->nb; i++)
-                    weight_sum += pixel->weights[i];
+                for (i = 0; i < pixel_refs->nb; i++)
+                    weight_sum += pixel_weights->weights[i];
 
-                if (!weight_sum || !pixel->nb) {
-                    pixel->weights[0] = ALPHA_MAX - alpha;
-                    pixel->refs[0] = 1;
-                    pixel->mvs[0][0] = 0;
-                    pixel->mvs[0][1] = 0;
-                    pixel->weights[1] = alpha;
-                    pixel->refs[1] = 2;
-                    pixel->mvs[1][0] = 0;
-                    pixel->mvs[1][1] = 0;
-                    pixel->nb = 2;
+                if (!weight_sum || !pixel_refs->nb) {
+                    pixel_weights->weights[0] = ALPHA_MAX - alpha;
+                    pixel_refs->refs[0] = 1;
+                    pixel_mvs->mvs[0][0] = 0;
+                    pixel_mvs->mvs[0][1] = 0;
+                    pixel_weights->weights[1] = alpha;
+                    pixel_refs->refs[1] = 2;
+                    pixel_mvs->mvs[1][0] = 0;
+                    pixel_mvs->mvs[1][1] = 0;
+                    pixel_refs->nb = 2;
 
                     weight_sum = ALPHA_MAX;
                 }
 
-                for (i = 0; i < pixel->nb; i++) {
-                    Frame *frame = &mi_ctx->frames[pixel->refs[i]];
+                for (i = 0; i < pixel_refs->nb; i++) {
+                    Frame *frame = &mi_ctx->frames[pixel_refs->refs[i]];
                     if (chroma) {
-                        x_mv = (x >> mi_ctx->log2_chroma_w) + pixel->mvs[i][0] / (1 << mi_ctx->log2_chroma_w);
-                        y_mv = (y >> mi_ctx->log2_chroma_h) + pixel->mvs[i][1] / (1 << mi_ctx->log2_chroma_h);
+                        x_mv = (x >> mi_ctx->log2_chroma_w) + pixel_mvs->mvs[i][0] / (1 << mi_ctx->log2_chroma_w);
+                        y_mv = (y >> mi_ctx->log2_chroma_h) + pixel_mvs->mvs[i][1] / (1 << mi_ctx->log2_chroma_h);
                     } else {
-                        x_mv = x + pixel->mvs[i][0];
-                        y_mv = y + pixel->mvs[i][1];
+                        x_mv = x + pixel_mvs->mvs[i][0];
+                        y_mv = y + pixel_mvs->mvs[i][1];
                     }
 
-                    val += pixel->weights[i] * frame->avf->data[plane][x_mv + y_mv * frame->avf->linesize[plane]];
+                    val += pixel_weights->weights[i] * frame->avf->data[plane][x_mv + y_mv * frame->avf->linesize[plane]];
                 }
 
                 val = ROUNDED_DIV(val, weight_sum);
@@ -979,7 +1003,9 @@
                     for (x = start_x; x < end_x; x++) {
                         int x_min = -x;
                         int x_max = width - x - 1;
-                        Pixel *pixel = &mi_ctx->pixels[x + y * width];
+                        PixelMVS *pixel_mvs = &mi_ctx->pixel_mvs[x + y * width];
+                        PixelWeights *pixel_weights = &mi_ctx->pixel_weights[x + y * width];
+                        PixelRefs *pixel_refs = &mi_ctx->pixel_refs[x + y * width];
 
                         ADD_PIXELS(PX_WEIGHT_MAX, mv_x, mv_y);
                     }
@@ -1028,7 +1054,9 @@
             int x_min = -x;
             int x_max = width - x - 1;
             int obmc_weight = obmc_tab_linear[4 - mi_ctx->log2_mb_size][(x - start_x) + ((y - start_y) << (mi_ctx->log2_mb_size + 1))];
-            Pixel *pixel = &mi_ctx->pixels[x + y * width];
+            PixelMVS *pixel_mvs = &mi_ctx->pixel_mvs[x + y * width];
+            PixelWeights *pixel_weights = &mi_ctx->pixel_weights[x + y * width];
+            PixelRefs *pixel_refs = &mi_ctx->pixel_refs[x + y * width];
 
             if (mi_ctx->mc_mode == MC_MODE_AOBMC) {
                 nb_x = (((x - start_x) >> (mi_ctx->log2_mb_size - 1)) * 2 - 3) / 2;
@@ -1094,8 +1122,8 @@
                 for (y = 0; y < height; y++) {
                     for (x = 0; x < width; x++) {
                         avf_out->data[plane][x + y * avf_out->linesize[plane]] =
-                                          alpha  * mi_ctx->frames[2].avf->data[plane][x + y * mi_ctx->frames[2].avf->linesize[plane]] +
-                            ((ALPHA_MAX - alpha) * mi_ctx->frames[1].avf->data[plane][x + y * mi_ctx->frames[1].avf->linesize[plane]] + 512) >> 10;
+                            (alpha  * mi_ctx->frames[2].avf->data[plane][x + y * mi_ctx->frames[2].avf->linesize[plane]] +
+                             (ALPHA_MAX - alpha) * mi_ctx->frames[1].avf->data[plane][x + y * mi_ctx->frames[1].avf->linesize[plane]] + 512) >> 10;
                     }
                 }
             }
@@ -1112,7 +1140,7 @@
 
                 for (y = 0; y < mi_ctx->frames[0].avf->height; y++)
                     for (x = 0; x < mi_ctx->frames[0].avf->width; x++)
-                        mi_ctx->pixels[x + y * mi_ctx->frames[0].avf->width].nb = 0;
+                        mi_ctx->pixel_refs[x + y * mi_ctx->frames[0].avf->width].nb = 0;
 
                 for (mb_y = 0; mb_y < mi_ctx->b_height; mb_y++)
                     for (mb_x = 0; mb_x < mi_ctx->b_width; mb_x++) {
@@ -1195,7 +1223,9 @@
     MIContext *mi_ctx = ctx->priv;
     int i, m;
 
-    av_freep(&mi_ctx->pixels);
+    av_freep(&mi_ctx->pixel_mvs);
+    av_freep(&mi_ctx->pixel_weights);
+    av_freep(&mi_ctx->pixel_refs);
     if (mi_ctx->int_blocks)
         for (m = 0; m < mi_ctx->b_count; m++)
             free_blocks(&mi_ctx->int_blocks[m], 0);

diff --git a/libavfilter/vf_misc_vaapi.c b/libavfilter/vf_misc_vaapi.c
new file mode 100644
index 0000000..30b808a
--- /dev/null
+++ b/libavfilter/vf_misc_vaapi.c

@@ -0,0 +1,289 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <string.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "vaapi_vpp.h"
+
+// Denoise min/max/default Values
+#define DENOISE_MIN            0
+#define DENOISE_MAX            64
+#define DENOISE_DEFAULT        0
+
+// Sharpness min/max/default values
+#define SHARPNESS_MIN          0
+#define SHARPNESS_MAX          64
+#define SHARPNESS_DEFAULT      44
+
+typedef struct DenoiseVAAPIContext {
+    VAAPIVPPContext vpp_ctx; // must be the first field
+
+    int denoise;         // enable denoise algo.
+} DenoiseVAAPIContext;
+
+typedef struct SharpnessVAAPIContext {
+    VAAPIVPPContext vpp_ctx; // must be the first field
+
+    int sharpness;       // enable sharpness.
+} SharpnessVAAPIContext;
+
+static float map(int x, int in_min, int in_max, float out_min, float out_max)
+{
+    double slope, output;
+
+    slope = 1.0 * (out_max - out_min) / (in_max - in_min);
+    output = out_min + slope * (x - in_min);
+
+    return (float)output;
+}
+
+static int denoise_vaapi_build_filter_params(AVFilterContext *avctx)
+{
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+    DenoiseVAAPIContext *ctx = avctx->priv;
+
+    VAProcFilterCap caps;
+
+    VAStatus vas;
+    uint32_t num_caps = 1;
+
+    VAProcFilterParameterBuffer denoise;
+
+    vas = vaQueryVideoProcFilterCaps(vpp_ctx->hwctx->display, vpp_ctx->va_context,
+                                     VAProcFilterNoiseReduction,
+                                     &caps, &num_caps);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query denoise caps "
+               "context: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR(EIO);
+    }
+
+    denoise.type  = VAProcFilterNoiseReduction;
+    denoise.value =  map(ctx->denoise, DENOISE_MIN, DENOISE_MAX,
+                         caps.range.min_value,
+                         caps.range.max_value);
+    ff_vaapi_vpp_make_param_buffers(avctx, VAProcFilterParameterBufferType,
+                                    &denoise, sizeof(denoise), 1);
+
+    return 0;
+}
+
+static int sharpness_vaapi_build_filter_params(AVFilterContext *avctx)
+{
+    VAAPIVPPContext *vpp_ctx   = avctx->priv;
+    SharpnessVAAPIContext *ctx = avctx->priv;
+
+    VAProcFilterCap caps;
+
+    VAStatus vas;
+    uint32_t num_caps = 1;
+
+    VAProcFilterParameterBuffer sharpness;
+
+    vas = vaQueryVideoProcFilterCaps(vpp_ctx->hwctx->display, vpp_ctx->va_context,
+                                     VAProcFilterSharpening,
+                                     &caps, &num_caps);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query sharpness caps "
+               "context: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR(EIO);
+    }
+
+    sharpness.type  = VAProcFilterSharpening;
+    sharpness.value = map(ctx->sharpness,
+                          SHARPNESS_MIN, SHARPNESS_MAX,
+                          caps.range.min_value,
+                          caps.range.max_value);
+    ff_vaapi_vpp_make_param_buffers(avctx,
+                                    VAProcFilterParameterBufferType,
+                                    &sharpness, sizeof(sharpness), 1);
+
+    return 0;
+}
+
+static int misc_vaapi_filter_frame(AVFilterLink *inlink, AVFrame *input_frame)
+{
+    AVFilterContext *avctx   = inlink->dst;
+    AVFilterLink *outlink    = avctx->outputs[0];
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+    AVFrame *output_frame    = NULL;
+    VASurfaceID input_surface, output_surface;
+    VARectangle input_region;
+
+    VAProcPipelineParameterBuffer params;
+    int err;
+
+    av_log(avctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(input_frame->format),
+           input_frame->width, input_frame->height, input_frame->pts);
+
+    if (vpp_ctx->va_context == VA_INVALID_ID)
+        return AVERROR(EINVAL);
+
+    input_surface = (VASurfaceID)(uintptr_t)input_frame->data[3];
+    av_log(avctx, AV_LOG_DEBUG, "Using surface %#x for misc vpp input.\n",
+           input_surface);
+
+    output_frame = ff_get_video_buffer(outlink, vpp_ctx->output_width,
+                                       vpp_ctx->output_height);
+    if (!output_frame) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    output_surface = (VASurfaceID)(uintptr_t)output_frame->data[3];
+    av_log(avctx, AV_LOG_DEBUG, "Using surface %#x for misc vpp output.\n",
+           output_surface);
+    memset(&params, 0, sizeof(params));
+    input_region = (VARectangle) {
+        .x      = 0,
+        .y      = 0,
+        .width  = input_frame->width,
+        .height = input_frame->height,
+    };
+
+    if (vpp_ctx->nb_filter_buffers) {
+        params.filters     = &vpp_ctx->filter_buffers[0];
+        params.num_filters = vpp_ctx->nb_filter_buffers;
+    }
+    params.surface = input_surface;
+    params.surface_region = &input_region;
+    params.surface_color_standard =
+        ff_vaapi_vpp_colour_standard(input_frame->colorspace);
+
+    params.output_region = NULL;
+    params.output_background_color = 0xff000000;
+    params.output_color_standard = params.surface_color_standard;
+
+    params.pipeline_flags = 0;
+    params.filter_flags = VA_FRAME_PICTURE;
+
+    err = ff_vaapi_vpp_render_picture(avctx, &params, output_surface);
+    if (err < 0)
+        goto fail;
+
+    err = av_frame_copy_props(output_frame, input_frame);
+    if (err < 0)
+        goto fail;
+    av_frame_free(&input_frame);
+
+    av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output_frame->format),
+           output_frame->width, output_frame->height, output_frame->pts);
+
+    return ff_filter_frame(outlink, output_frame);
+
+fail:
+    av_frame_free(&input_frame);
+    av_frame_free(&output_frame);
+    return err;
+}
+
+static av_cold int denoise_vaapi_init(AVFilterContext *avctx)
+{
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+
+    ff_vaapi_vpp_ctx_init(avctx);
+    vpp_ctx->pipeline_uninit     = ff_vaapi_vpp_pipeline_uninit;
+    vpp_ctx->build_filter_params = denoise_vaapi_build_filter_params;
+    vpp_ctx->output_format       = AV_PIX_FMT_NONE;
+
+    return 0;
+}
+
+static av_cold int sharpness_vaapi_init(AVFilterContext *avctx)
+{
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+
+    ff_vaapi_vpp_ctx_init(avctx);
+    vpp_ctx->pipeline_uninit     = ff_vaapi_vpp_pipeline_uninit;
+    vpp_ctx->build_filter_params = sharpness_vaapi_build_filter_params;
+    vpp_ctx->output_format       = AV_PIX_FMT_NONE;
+
+    return 0;
+}
+
+#define DOFFSET(x) offsetof(DenoiseVAAPIContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
+static const AVOption denoise_vaapi_options[] = {
+    { "denoise", "denoise level",
+      DOFFSET(denoise), AV_OPT_TYPE_INT, { .i64 = DENOISE_DEFAULT }, DENOISE_MIN, DENOISE_MAX, .flags = FLAGS },
+    { NULL },
+};
+
+#define SOFFSET(x) offsetof(SharpnessVAAPIContext, x)
+static const AVOption sharpness_vaapi_options[] = {
+    { "sharpness", "sharpness level",
+      SOFFSET(sharpness), AV_OPT_TYPE_INT, { .i64 = SHARPNESS_DEFAULT }, SHARPNESS_MIN, SHARPNESS_MAX, .flags = FLAGS },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(denoise_vaapi);
+AVFILTER_DEFINE_CLASS(sharpness_vaapi);
+
+static const AVFilterPad misc_vaapi_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &misc_vaapi_filter_frame,
+        .config_props = &ff_vaapi_vpp_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad misc_vaapi_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_vaapi_vpp_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_denoise_vaapi = {
+    .name          = "denoise_vaapi",
+    .description   = NULL_IF_CONFIG_SMALL("VAAPI VPP for de-noise"),
+    .priv_size     = sizeof(DenoiseVAAPIContext),
+    .init          = &denoise_vaapi_init,
+    .uninit        = &ff_vaapi_vpp_ctx_uninit,
+    .query_formats = &ff_vaapi_vpp_query_formats,
+    .inputs        = misc_vaapi_inputs,
+    .outputs       = misc_vaapi_outputs,
+    .priv_class    = &denoise_vaapi_class,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+AVFilter ff_vf_sharpness_vaapi = {
+    .name          = "sharpness_vaapi",
+    .description   = NULL_IF_CONFIG_SMALL("VAAPI VPP for sharpness"),
+    .priv_size     = sizeof(SharpnessVAAPIContext),
+    .init          = &sharpness_vaapi_init,
+    .uninit        = &ff_vaapi_vpp_ctx_uninit,
+    .query_formats = &ff_vaapi_vpp_query_formats,
+    .inputs        = misc_vaapi_inputs,
+    .outputs       = misc_vaapi_outputs,
+    .priv_class    = &sharpness_vaapi_class,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};

diff --git a/libavfilter/vf_mix.c b/libavfilter/vf_mix.c
new file mode 100644
index 0000000..d0cc7cb
--- /dev/null
+++ b/libavfilter/vf_mix.c

@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "framesync.h"
+#include "video.h"
+
+typedef struct MixContext {
+    const AVClass *class;
+    const AVPixFmtDescriptor *desc;
+    char *weights_str;
+    int nb_inputs;
+    int duration;
+    float *weights;
+    float scale;
+    float wfactor;
+
+    int tmix;
+    int nb_frames;
+
+    int depth;
+    int max;
+    int nb_planes;
+    int linesize[4];
+    int height[4];
+
+    AVFrame **frames;
+    FFFrameSync fs;
+} MixContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *pix_fmts = NULL;
+    int fmt, ret;
+
+    for (fmt = 0; av_pix_fmt_desc_get(fmt); fmt++) {
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
+        if (!(desc->flags & AV_PIX_FMT_FLAG_PAL ||
+              desc->flags & AV_PIX_FMT_FLAG_HWACCEL ||
+              desc->flags & AV_PIX_FMT_FLAG_BITSTREAM) &&
+            (ret = ff_add_format(&pix_fmts, fmt)) < 0)
+            return ret;
+    }
+
+    return ff_set_common_formats(ctx, pix_fmts);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    MixContext *s = ctx->priv;
+    char *p, *arg, *saveptr = NULL;
+    int i, ret, last = 0;
+
+    s->tmix = !strcmp(ctx->filter->name, "tmix");
+
+    s->frames = av_calloc(s->nb_inputs, sizeof(*s->frames));
+    if (!s->frames)
+        return AVERROR(ENOMEM);
+
+    s->weights = av_calloc(s->nb_inputs, sizeof(*s->weights));
+    if (!s->weights)
+        return AVERROR(ENOMEM);
+
+    if (!s->tmix) {
+        for (i = 0; i < s->nb_inputs; i++) {
+            AVFilterPad pad = { 0 };
+
+            pad.type = AVMEDIA_TYPE_VIDEO;
+            pad.name = av_asprintf("input%d", i);
+            if (!pad.name)
+                return AVERROR(ENOMEM);
+
+            if ((ret = ff_insert_inpad(ctx, i, &pad)) < 0) {
+                av_freep(&pad.name);
+                return ret;
+            }
+        }
+    }
+
+    p = s->weights_str;
+    for (i = 0; i < s->nb_inputs; i++) {
+        if (!(arg = av_strtok(p, " ", &saveptr)))
+            break;
+
+        p = NULL;
+        sscanf(arg, "%f", &s->weights[i]);
+        s->wfactor += s->weights[i];
+        last = i;
+    }
+    for (; i < s->nb_inputs; i++) {
+        s->weights[i] = s->weights[last];
+        s->wfactor += s->weights[i];
+    }
+    if (s->scale == 0) {
+        s->wfactor = 1 / s->wfactor;
+    } else {
+        s->wfactor = s->scale;
+    }
+
+    return 0;
+}
+
+typedef struct ThreadData {
+    AVFrame **in, *out;
+} ThreadData;
+
+static int mix_frames(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    MixContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame **in = td->in;
+    AVFrame *out = td->out;
+    int i, p, x, y;
+
+    if (s->depth <= 8) {
+        for (p = 0; p < s->nb_planes; p++) {
+            const int slice_start = (s->height[p] * jobnr) / nb_jobs;
+            const int slice_end = (s->height[p] * (jobnr+1)) / nb_jobs;
+            uint8_t *dst = out->data[p] + slice_start * out->linesize[p];
+
+            for (y = slice_start; y < slice_end; y++) {
+                for (x = 0; x < s->linesize[p]; x++) {
+                    int val = 0;
+
+                    for (i = 0; i < s->nb_inputs; i++) {
+                        uint8_t src = in[i]->data[p][y * in[i]->linesize[p] + x];
+
+                        val += src * s->weights[i];
+                    }
+
+                    dst[x] = av_clip_uint8(val * s->wfactor);
+                }
+
+                dst += out->linesize[p];
+            }
+        }
+    } else {
+        for (p = 0; p < s->nb_planes; p++) {
+            const int slice_start = (s->height[p] * jobnr) / nb_jobs;
+            const int slice_end = (s->height[p] * (jobnr+1)) / nb_jobs;
+            uint16_t *dst = (uint16_t *)(out->data[p] + slice_start * out->linesize[p]);
+
+            for (y = slice_start; y < slice_end; y++) {
+                for (x = 0; x < s->linesize[p] / 2; x++) {
+                    int val = 0;
+
+                    for (i = 0; i < s->nb_inputs; i++) {
+                        uint16_t src = AV_RN16(in[i]->data[p] + y * in[i]->linesize[p] + x * 2);
+
+                        val += src * s->weights[i];
+                    }
+
+                    dst[x] = av_clip(val * s->wfactor, 0, s->max);
+                }
+
+                dst += out->linesize[p] / 2;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int process_frame(FFFrameSync *fs)
+{
+    AVFilterContext *ctx = fs->parent;
+    AVFilterLink *outlink = ctx->outputs[0];
+    MixContext *s = fs->opaque;
+    AVFrame **in = s->frames;
+    AVFrame *out;
+    ThreadData td;
+    int i, ret;
+
+    for (i = 0; i < s->nb_inputs; i++) {
+        if ((ret = ff_framesync_get_frame(&s->fs, i, &in[i], 0)) < 0)
+            return ret;
+    }
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out)
+        return AVERROR(ENOMEM);
+    out->pts = av_rescale_q(s->fs.pts, s->fs.time_base, outlink->time_base);
+
+    td.in = in;
+    td.out = out;
+    ctx->internal->execute(ctx, mix_frames, &td, NULL, FFMIN(s->height[0], ff_filter_get_nb_threads(ctx)));
+
+    return ff_filter_frame(outlink, out);
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    MixContext *s = ctx->priv;
+    AVRational time_base = ctx->inputs[0]->time_base;
+    AVRational frame_rate = ctx->inputs[0]->frame_rate;
+    AVFilterLink *inlink = ctx->inputs[0];
+    int height = ctx->inputs[0]->h;
+    int width = ctx->inputs[0]->w;
+    FFFrameSyncIn *in;
+    int i, ret;
+
+    if (!s->tmix) {
+        for (i = 1; i < s->nb_inputs; i++) {
+            if (ctx->inputs[i]->h != height || ctx->inputs[i]->w != width) {
+                av_log(ctx, AV_LOG_ERROR, "Input %d size (%dx%d) does not match input %d size (%dx%d).\n", i, ctx->inputs[i]->w, ctx->inputs[i]->h, 0, width, height);
+                return AVERROR(EINVAL);
+            }
+        }
+    }
+
+    s->desc = av_pix_fmt_desc_get(outlink->format);
+    if (!s->desc)
+        return AVERROR_BUG;
+    s->nb_planes = av_pix_fmt_count_planes(outlink->format);
+    s->depth = s->desc->comp[0].depth;
+    s->max = (1 << s->depth) - 1;
+
+    if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
+        return ret;
+
+    s->height[1] = s->height[2] = AV_CEIL_RSHIFT(inlink->h, s->desc->log2_chroma_h);
+    s->height[0] = s->height[3] = inlink->h;
+
+    if (s->tmix)
+        return 0;
+
+    outlink->w          = width;
+    outlink->h          = height;
+    outlink->time_base  = time_base;
+    outlink->frame_rate = frame_rate;
+
+    if ((ret = ff_framesync_init(&s->fs, ctx, s->nb_inputs)) < 0)
+        return ret;
+
+    in = s->fs.in;
+    s->fs.opaque = s;
+    s->fs.on_event = process_frame;
+
+    for (i = 0; i < s->nb_inputs; i++) {
+        AVFilterLink *inlink = ctx->inputs[i];
+
+        in[i].time_base = inlink->time_base;
+        in[i].sync   = 1;
+        in[i].before = EXT_STOP;
+        in[i].after  = (s->duration == 1 || (s->duration == 2 && i == 0)) ? EXT_STOP : EXT_INFINITY;
+    }
+
+    return ff_framesync_configure(&s->fs);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    MixContext *s = ctx->priv;
+    int i;
+
+    ff_framesync_uninit(&s->fs);
+    av_freep(&s->weights);
+
+    if (!s->tmix) {
+        for (i = 0; i < ctx->nb_inputs; i++)
+            av_freep(&ctx->input_pads[i].name);
+    } else {
+        for (i = 0; i < s->nb_frames; i++)
+            av_frame_free(&s->frames[i]);
+    }
+    av_freep(&s->frames);
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    MixContext *s = ctx->priv;
+    return ff_framesync_activate(&s->fs);
+}
+
+#define OFFSET(x) offsetof(MixContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption mix_options[] = {
+    { "inputs", "set number of inputs", OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64=2}, 2, INT_MAX, .flags = FLAGS },
+    { "weights", "set weight for each input", OFFSET(weights_str), AV_OPT_TYPE_STRING, {.str="1 1"}, 0, 0, .flags = FLAGS },
+    { "scale", "set scale", OFFSET(scale), AV_OPT_TYPE_FLOAT, {.dbl=0}, 0, INT16_MAX, .flags = FLAGS },
+    { "duration", "how to determine end of stream", OFFSET(duration), AV_OPT_TYPE_INT, {.i64=0}, 0, 2, .flags = FLAGS, "duration" },
+        { "longest",  "Duration of longest input",  0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "duration" },
+        { "shortest", "Duration of shortest input", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "duration" },
+        { "first",    "Duration of first input",    0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "duration" },
+    { NULL },
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+    },
+    { NULL }
+};
+
+#if CONFIG_MIX_FILTER
+AVFILTER_DEFINE_CLASS(mix);
+
+AVFilter ff_vf_mix = {
+    .name          = "mix",
+    .description   = NULL_IF_CONFIG_SMALL("Mix video inputs."),
+    .priv_size     = sizeof(MixContext),
+    .priv_class    = &mix_class,
+    .query_formats = query_formats,
+    .outputs       = outputs,
+    .init          = init,
+    .uninit        = uninit,
+    .activate      = activate,
+    .flags         = AVFILTER_FLAG_DYNAMIC_INPUTS | AVFILTER_FLAG_SLICE_THREADS,
+};
+
+#endif /* CONFIG_MIX_FILTER */
+
+#if CONFIG_TMIX_FILTER
+static int tmix_filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    MixContext *s = ctx->priv;
+    ThreadData td;
+    AVFrame *out;
+
+    if (s->nb_frames < s->nb_inputs) {
+        s->frames[s->nb_frames] = in;
+        s->nb_frames++;
+        return 0;
+    } else {
+        av_frame_free(&s->frames[0]);
+        memmove(&s->frames[0], &s->frames[1], sizeof(*s->frames) * (s->nb_inputs - 1));
+        s->frames[s->nb_inputs - 1] = in;
+    }
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out)
+        return AVERROR(ENOMEM);
+    out->pts = s->frames[0]->pts;
+
+    td.out = out;
+    td.in = s->frames;
+    ctx->internal->execute(ctx, mix_frames, &td, NULL, FFMIN(s->height[0], ff_filter_get_nb_threads(ctx)));
+
+    return ff_filter_frame(outlink, out);
+}
+
+static const AVOption tmix_options[] = {
+    { "frames", "set number of successive frames to mix", OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64=3}, 2, 128, .flags = FLAGS },
+    { "weights", "set weight for each frame", OFFSET(weights_str), AV_OPT_TYPE_STRING, {.str="1 1 1"}, 0, 0, .flags = FLAGS },
+    { "scale", "set scale", OFFSET(scale), AV_OPT_TYPE_FLOAT, {.dbl=0}, 0, INT16_MAX, .flags = FLAGS },
+    { NULL },
+};
+
+static const AVFilterPad inputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .filter_frame  = tmix_filter_frame,
+    },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(tmix);
+
+AVFilter ff_vf_tmix = {
+    .name          = "tmix",
+    .description   = NULL_IF_CONFIG_SMALL("Mix successive video frames."),
+    .priv_size     = sizeof(MixContext),
+    .priv_class    = &tmix_class,
+    .query_formats = query_formats,
+    .outputs       = outputs,
+    .inputs        = inputs,
+    .init          = init,
+    .uninit        = uninit,
+    .flags         = AVFILTER_FLAG_SLICE_THREADS,
+};
+
+#endif /* CONFIG_TMIX_FILTER */

diff --git a/libavfilter/vf_neighbor.c b/libavfilter/vf_neighbor.c
index de4a12f..2db1e5e 100644
--- a/libavfilter/vf_neighbor.c
+++ b/libavfilter/vf_neighbor.c

@@ -20,6 +20,7 @@
  */
 
 #include "libavutil/imgutils.h"
+#include "libavutil/intreadwrite.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/opt.h"
 #include "avfilter.h"
@@ -27,6 +28,10 @@
 #include "internal.h"
 #include "video.h"
 
+typedef struct ThreadData {
+    AVFrame *in, *out;
+} ThreadData;
+
 typedef struct NContext {
     const AVClass *class;
     int planeheight[4];
@@ -34,7 +39,9 @@
     int nb_planes;
     int threshold[4];
     int coordinates;
-    uint8_t *buffer;
+
+    int depth;
+    int bpc;
 
     void (*filter)(uint8_t *dst, const uint8_t *p1, int width,
                    int threshold, const uint8_t *coordinates[], int coord);
@@ -43,34 +50,29 @@
 static int query_formats(AVFilterContext *ctx)
 {
     static const enum AVPixelFormat pix_fmts[] = {
-        AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA420P,
-        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ422P,AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ411P,
-        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
-        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE
+        AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P,
+        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
+        AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
+        AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12,
+        AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
+        AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+        AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+        AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_NONE
     };
 
     return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
 }
 
-static av_cold void uninit(AVFilterContext *ctx)
-{
-    NContext *s = ctx->priv;
-
-    av_freep(&s->buffer);
-}
-
-static inline void line_copy8(uint8_t *line, const uint8_t *srcp, int width, int mergin)
-{
-    int i;
-
-    memcpy(line, srcp, width);
-
-    for (i = mergin; i > 0; i--) {
-        line[-i] = line[i];
-        line[width - 1 + i] = line[width - 1 - i];
-    }
-}
-
 static void erosion(uint8_t *dst, const uint8_t *p1, int width,
                     int threshold, const uint8_t *coordinates[], int coord)
 {
@@ -91,6 +93,27 @@
     }
 }
 
+static void erosion16(uint8_t *dstp, const uint8_t *p1, int width,
+                      int threshold, const uint8_t *coordinates[], int coord)
+{
+    uint16_t *dst = (uint16_t *)dstp;
+    int x, i;
+
+    for (x = 0; x < width; x++) {
+        int min = AV_RN16A(&p1[2 * x]);
+        int limit = FFMAX(min - threshold, 0);
+
+        for (i = 0; i < 8; i++) {
+            if (coord & (1 << i)) {
+                min = FFMIN(min, AV_RN16A(coordinates[i] + x * 2));
+            }
+            min = FFMAX(min, limit);
+        }
+
+        dst[x] = min;
+    }
+}
+
 static void dilation(uint8_t *dst, const uint8_t *p1, int width,
                      int threshold, const uint8_t *coordinates[], int coord)
 {
@@ -111,6 +134,27 @@
     }
 }
 
+static void dilation16(uint8_t *dstp, const uint8_t *p1, int width,
+                       int threshold, const uint8_t *coordinates[], int coord)
+{
+    uint16_t *dst = (uint16_t *)dstp;
+    int x, i;
+
+    for (x = 0; x < width; x++) {
+        int max = AV_RN16A(&p1[x * 2]);
+        int limit = FFMIN(max + threshold, 255);
+
+        for (i = 0; i < 8; i++) {
+            if (coord & (1 << i)) {
+                max = FFMAX(max, AV_RN16A(coordinates[i] + x * 2));
+            }
+            max = FFMIN(max, limit);
+        }
+
+        dst[x] = max;
+    }
+}
+
 static void deflate(uint8_t *dst, const uint8_t *p1, int width,
                     int threshold, const uint8_t *coordinates[], int coord)
 {
@@ -126,6 +170,22 @@
     }
 }
 
+static void deflate16(uint8_t *dstp, const uint8_t *p1, int width,
+                      int threshold, const uint8_t *coordinates[], int coord)
+{
+    uint16_t *dst = (uint16_t *)dstp;
+    int x, i;
+
+    for (x = 0; x < width; x++) {
+        int sum = 0;
+        int limit = FFMAX(AV_RN16A(&p1[2 * x]) - threshold, 0);
+
+        for (i = 0; i < 8; sum += AV_RN16A(coordinates[i++] + x * 2));
+
+        dst[x] = FFMAX(FFMIN(sum / 8, p1[x]), limit);
+    }
+}
+
 static void inflate(uint8_t *dst, const uint8_t *p1, int width,
                     int threshold, const uint8_t *coordinates[], int coord)
 {
@@ -141,32 +201,98 @@
     }
 }
 
+static void inflate16(uint8_t *dstp, const uint8_t *p1, int width,
+                      int threshold, const uint8_t *coordinates[], int coord)
+{
+    uint16_t *dst = (uint16_t *)dstp;
+    int x, i;
+
+    for (x = 0; x < width; x++) {
+        int sum = 0;
+        int limit = FFMIN(AV_RN16A(&p1[2 * x]) + threshold, 255);
+
+        for (i = 0; i < 8; sum += AV_RN16A(coordinates[i++] + x * 2));
+
+        dst[x] = FFMIN(FFMAX(sum / 8, p1[x]), limit);
+    }
+}
+
 static int config_input(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->dst;
     NContext *s = ctx->priv;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
-    int ret;
 
-    if ((ret = av_image_fill_linesizes(s->planewidth, inlink->format, inlink->w)) < 0)
-        return ret;
+    s->depth = desc->comp[0].depth;
+    s->bpc = (s->depth + 7) / 8;
 
+    s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->planewidth[0] = s->planewidth[3] = inlink->w;
     s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
     s->planeheight[0] = s->planeheight[3] = inlink->h;
 
     s->nb_planes = av_pix_fmt_count_planes(inlink->format);
-    s->buffer = av_malloc(3 * (s->planewidth[0] + 32));
-    if (!s->buffer)
-        return AVERROR(ENOMEM);
 
     if (!strcmp(ctx->filter->name, "erosion"))
-        s->filter = erosion;
+        s->filter = s->depth > 8 ? erosion16 : erosion;
     else if (!strcmp(ctx->filter->name, "dilation"))
-        s->filter = dilation;
+        s->filter = s->depth > 8 ? dilation16 : dilation;
     else if (!strcmp(ctx->filter->name, "deflate"))
-        s->filter = deflate;
+        s->filter = s->depth > 8 ? deflate16 : deflate;
     else if (!strcmp(ctx->filter->name, "inflate"))
-        s->filter = inflate;
+        s->filter = s->depth > 8 ? inflate16 : inflate;
+
+    return 0;
+}
+
+static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    NContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *out = td->out;
+    AVFrame *in = td->in;
+    int plane, y;
+
+    for (plane = 0; plane < s->nb_planes; plane++) {
+        const int bpc = s->bpc;
+        const int threshold = s->threshold[plane];
+        const int stride = in->linesize[plane];
+        const int dstride = out->linesize[plane];
+        const int height = s->planeheight[plane];
+        const int width  = s->planewidth[plane];
+        const int slice_start = (height * jobnr) / nb_jobs;
+        const int slice_end = (height * (jobnr+1)) / nb_jobs;
+        const uint8_t *src = (const uint8_t *)in->data[plane] + slice_start * stride;
+        uint8_t *dst = out->data[plane] + slice_start * dstride;
+
+        if (!threshold) {
+            av_image_copy_plane(dst, dstride, src, stride, width * bpc, slice_end - slice_start);
+            continue;
+        }
+
+        for (y = slice_start; y < slice_end; y++) {
+            const int nh = y > 0;
+            const int ph = y < height - 1;
+            const uint8_t *coordinates[] = { src - nh * stride, src + 1 * bpc - nh * stride, src + 2 * bpc - nh * stride,
+                                             src,                                            src + 2 * bpc,
+                                             src + ph * stride, src + 1 * bpc + ph * stride, src + 2 * bpc + ph * stride};
+
+            const uint8_t *coordinateslb[] = { src + 1 * bpc - nh * stride, src - nh * stride, src + 1 * bpc - nh * stride,
+                                               src + 1 * bpc,                                  src + 1 * bpc,
+                                               src + 1 * bpc + ph * stride, src + ph * stride, src + 1 * bpc + ph * stride};
+
+            const uint8_t *coordinatesrb[] = { src + (width - 2) * bpc - nh * stride, src + (width - 1) * bpc - nh * stride, src + (width - 2) * bpc - nh * stride,
+                                               src + (width - 2) * bpc,                                                      src + (width - 2) * bpc,
+                                               src + (width - 2) * bpc + ph * stride, src + (width - 1) * bpc + ph * stride, src + (width - 2) * bpc + ph * stride};
+
+            s->filter(dst,                     src,                     1,         threshold, coordinateslb, s->coordinates);
+            s->filter(dst          + 1  * bpc, src          + 1  * bpc, width - 2, threshold, coordinates,   s->coordinates);
+            s->filter(dst + (width - 1) * bpc, src + (width - 1) * bpc, 1,         threshold, coordinatesrb, s->coordinates);
+
+            src += stride;
+            dst += dstride;
+        }
+    }
 
     return 0;
 }
@@ -176,8 +302,8 @@
     AVFilterContext *ctx = inlink->dst;
     AVFilterLink *outlink = ctx->outputs[0];
     NContext *s = ctx->priv;
+    ThreadData td;
     AVFrame *out;
-    int plane, y;
 
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     if (!out) {
@@ -186,43 +312,9 @@
     }
     av_frame_copy_props(out, in);
 
-    for (plane = 0; plane < s->nb_planes; plane++) {
-        const int threshold = s->threshold[plane];
-
-        if (threshold) {
-            const uint8_t *src = in->data[plane];
-            uint8_t *dst = out->data[plane];
-            int stride = in->linesize[plane];
-            int height = s->planeheight[plane];
-            int width  = s->planewidth[plane];
-            uint8_t *p0 = s->buffer + 16;
-            uint8_t *p1 = p0 + s->planewidth[0];
-            uint8_t *p2 = p1 + s->planewidth[0];
-            uint8_t *orig = p0, *end = p2;
-
-            line_copy8(p0, src + stride, width, 1);
-            line_copy8(p1, src, width, 1);
-
-            for (y = 0; y < height; y++) {
-                const uint8_t *coordinates[] = { p0 - 1, p0, p0 + 1,
-                                                 p1 - 1,     p1 + 1,
-                                                 p2 - 1, p2, p2 + 1};
-                src += stride * (y < height - 1 ? 1 : -1);
-                line_copy8(p2, src, width, 1);
-
-                s->filter(dst, p1, width, threshold, coordinates, s->coordinates);
-
-                p0 = p1;
-                p1 = p2;
-                p2 = (p2 == end) ? orig: p2 + s->planewidth[0];
-                dst += out->linesize[plane];
-            }
-        } else {
-            av_image_copy_plane(out->data[plane], out->linesize[plane],
-                                in->data[plane], in->linesize[plane],
-                                s->planewidth[plane], s->planeheight[plane]);
-        }
-    }
+    td.in = in;
+    td.out = out;
+    ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(s->planeheight[1], ff_filter_get_nb_threads(ctx)));
 
     av_frame_free(&in);
     return ff_filter_frame(outlink, out);
@@ -257,11 +349,11 @@
     .description   = NULL_IF_CONFIG_SMALL(description_),     \
     .priv_size     = sizeof(NContext),                       \
     .priv_class    = &name_##_class,                         \
-    .uninit        = uninit,                                 \
     .query_formats = query_formats,                          \
     .inputs        = neighbor_inputs,                        \
     .outputs       = neighbor_outputs,                       \
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, \
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC| \
+                     AVFILTER_FLAG_SLICE_THREADS,            \
 }
 
 #if CONFIG_EROSION_FILTER

diff --git a/libavfilter/vf_neighbor_opencl.c b/libavfilter/vf_neighbor_opencl.c
new file mode 100644
index 0000000..fb40132
--- /dev/null
+++ b/libavfilter/vf_neighbor_opencl.c

@@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 2018 Danil Iashchenko
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/avstring.h"
+
+
+#include "avfilter.h"
+#include "internal.h"
+#include "opencl.h"
+#include "opencl_source.h"
+#include "video.h"
+
+typedef struct NeighborOpenCLContext {
+    OpenCLFilterContext ocf;
+
+    int              initialised;
+    cl_kernel        kernel;
+    cl_command_queue command_queue;
+
+    char *matrix_str[4];
+
+    cl_float threshold[4];
+    cl_int coordinates;
+    cl_mem coord;
+
+} NeighborOpenCLContext;
+
+static int neighbor_opencl_init(AVFilterContext *avctx)
+{
+    NeighborOpenCLContext *ctx = avctx->priv;
+    const char *kernel_name;
+    cl_int cle;
+    int err;
+
+    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_neighbor, 1);
+    if (err < 0)
+        goto fail;
+
+    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+                                              ctx->ocf.hwctx->device_id,
+                                              0, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
+                     "command queue %d.\n", cle);
+
+    if (!strcmp(avctx->filter->name, "erosion_opencl")){
+        kernel_name = "erosion_global";
+    } else if (!strcmp(avctx->filter->name, "dilation_opencl")){
+        kernel_name = "dilation_global";
+    }
+    ctx->kernel = clCreateKernel(ctx->ocf.program, kernel_name, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create "
+                     "kernel %d.\n", cle);
+
+    ctx->initialised = 1;
+    return 0;
+
+fail:
+    if (ctx->command_queue)
+        clReleaseCommandQueue(ctx->command_queue);
+    if (ctx->kernel)
+        clReleaseKernel(ctx->kernel);
+    return err;
+}
+
+static int neighbor_opencl_make_filter_params(AVFilterContext *avctx)
+{
+    NeighborOpenCLContext *ctx = avctx->priv;
+    cl_int matrix[9];
+    cl_mem buffer;
+    cl_int cle;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        ctx->threshold[i] /= 255.0;
+    }
+
+    matrix[4] = 0;
+    for (i = 0; i < 8; i++) {
+        if (ctx->coordinates & (1 << i)) {
+            matrix[i > 3 ? i + 1: i] = 1;
+        }
+    }
+    buffer = clCreateBuffer(ctx->ocf.hwctx->context,
+                            CL_MEM_READ_ONLY |
+                            CL_MEM_COPY_HOST_PTR |
+                            CL_MEM_HOST_NO_ACCESS,
+                            9 * sizeof(cl_int), matrix, &cle);
+    if (!buffer) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create matrix buffer: "
+               "%d.\n", cle);
+        return AVERROR(EIO);
+    }
+    ctx->coord = buffer;
+
+    return 0;
+}
+
+
+static int neighbor_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+    AVFilterContext *avctx = inlink->dst;
+    AVFilterLink *outlink = avctx->outputs[0];
+    NeighborOpenCLContext *ctx = avctx->priv;
+    AVFrame *output = NULL;
+    cl_int cle;
+    size_t global_work[2];
+    cl_mem src, dst;
+    int err, p;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {0, 0, 1};
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(input->format),
+           input->width, input->height, input->pts);
+
+    if (!input->hw_frames_ctx)
+        return AVERROR(EINVAL);
+
+    if (!ctx->initialised) {
+        err = neighbor_opencl_init(avctx);
+        if (err < 0)
+            goto fail;
+
+        err = neighbor_opencl_make_filter_params(avctx);
+        if (err < 0)
+            goto fail;
+
+    }
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
+        src = (cl_mem) input->data[p];
+        dst = (cl_mem)output->data[p];
+
+        if (!dst)
+            break;
+
+        if (ctx->threshold[p] == 0) {
+            err = ff_opencl_filter_work_size_from_image(avctx, region, output, p, 0);
+            if (err < 0)
+                goto fail;
+
+            cle = clEnqueueCopyImage(ctx->command_queue, src, dst,
+                                     origin, origin, region, 0, NULL, NULL);
+            CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to copy plane %d: %d.\n",
+                             p, cle);
+        } else {
+            CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem,   &dst);
+            CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem,   &src);
+            CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_float, &ctx->threshold[p]);
+            CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_mem,   &ctx->coord);
+
+            err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0);
+            if (err < 0)
+                goto fail;
+
+            av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
+                   "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
+                   p, global_work[0], global_work[1]);
+
+            cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
+                                         global_work, NULL,
+                                         0, NULL, NULL);
+            CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue "
+                             "kernel: %d.\n", cle);
+        }
+    }
+
+    cle = clFinish(ctx->command_queue);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
+
+    err = av_frame_copy_props(output, input);
+    if (err < 0)
+        goto fail;
+
+    av_frame_free(&input);
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output->format),
+           output->width, output->height, output->pts);
+
+    return ff_filter_frame(outlink, output);
+
+fail:
+    clFinish(ctx->command_queue);
+    av_frame_free(&input);
+    av_frame_free(&output);
+    return err;
+}
+
+static av_cold void neighbor_opencl_uninit(AVFilterContext *avctx)
+{
+    NeighborOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+
+    clReleaseMemObject(ctx->coord);
+
+    if (ctx->kernel) {
+        cle = clReleaseKernel(ctx->kernel);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->command_queue) {
+        cle = clReleaseCommandQueue(ctx->command_queue);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "command queue: %d.\n", cle);
+    }
+
+    ff_opencl_filter_uninit(avctx);
+}
+
+static const AVFilterPad neighbor_opencl_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &neighbor_opencl_filter_frame,
+        .config_props = &ff_opencl_filter_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad neighbor_opencl_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_opencl_filter_config_output,
+    },
+    { NULL }
+};
+
+#define OFFSET(x) offsetof(NeighborOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+
+#if CONFIG_EROSION_OPENCL_FILTER
+
+static const AVOption erosion_opencl_options[] = {
+    { "threshold0",  "set threshold for 1st plane",   OFFSET(threshold[0]),   AV_OPT_TYPE_FLOAT, {.dbl=65535.0}, 0.0, 65535, FLAGS },
+    { "threshold1",  "set threshold for 2nd plane",   OFFSET(threshold[1]),   AV_OPT_TYPE_FLOAT, {.dbl=65535.0}, 0.0, 65535, FLAGS },
+    { "threshold2",  "set threshold for 3rd plane",   OFFSET(threshold[2]),   AV_OPT_TYPE_FLOAT, {.dbl=65535.0}, 0.0, 65535, FLAGS },
+    { "threshold3",  "set threshold for 4th plane",   OFFSET(threshold[3]),   AV_OPT_TYPE_FLOAT, {.dbl=65535.0}, 0.0, 65535, FLAGS },
+    { "coordinates", "set coordinates",               OFFSET(coordinates),    AV_OPT_TYPE_INT,   {.i64=255},     0,   255,   FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(erosion_opencl);
+
+AVFilter ff_vf_erosion_opencl = {
+    .name           = "erosion_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Apply erosion effect"),
+    .priv_size      = sizeof(NeighborOpenCLContext),
+    .priv_class     = &erosion_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &neighbor_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = neighbor_opencl_inputs,
+    .outputs        = neighbor_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif /* CONFIG_EROSION_OPENCL_FILTER */
+
+#if CONFIG_DILATION_OPENCL_FILTER
+
+static const AVOption dilation_opencl_options[] = {
+    { "threshold0",  "set threshold for 1st plane",   OFFSET(threshold[0]),   AV_OPT_TYPE_FLOAT, {.dbl=65535.0}, 0.0, 65535, FLAGS },
+    { "threshold1",  "set threshold for 2nd plane",   OFFSET(threshold[1]),   AV_OPT_TYPE_FLOAT, {.dbl=65535.0}, 0.0, 65535, FLAGS },
+    { "threshold2",  "set threshold for 3rd plane",   OFFSET(threshold[2]),   AV_OPT_TYPE_FLOAT, {.dbl=65535.0}, 0.0, 65535, FLAGS },
+    { "threshold3",  "set threshold for 4th plane",   OFFSET(threshold[3]),   AV_OPT_TYPE_FLOAT, {.dbl=65535.0}, 0.0, 65535, FLAGS },
+    { "coordinates", "set coordinates",               OFFSET(coordinates),    AV_OPT_TYPE_INT,   {.i64=255},     0,   255,   FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(dilation_opencl);
+
+AVFilter ff_vf_dilation_opencl = {
+    .name           = "dilation_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Apply dilation effect"),
+    .priv_size      = sizeof(NeighborOpenCLContext),
+    .priv_class     = &dilation_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &neighbor_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = neighbor_opencl_inputs,
+    .outputs        = neighbor_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif /* CONFIG_DILATION_OPENCL_FILTER */

diff --git a/libavfilter/vf_nlmeans.c b/libavfilter/vf_nlmeans.c
index e4952e1..82e779c 100644
--- a/libavfilter/vf_nlmeans.c
+++ b/libavfilter/vf_nlmeans.c

@@ -20,8 +20,6 @@
 
 /**
  * @todo
- * - SIMD for compute_safe_ssd_integral_image
- * - SIMD for final weighted averaging
  * - better automatic defaults? see "Parameters" @ http://www.ipol.im/pub/art/2011/bcm_nlm/
  * - temporal support (probably doesn't need any displacement according to
  *   "Denoising image sequences does not require motion estimation")
@@ -37,11 +35,12 @@
 #include "avfilter.h"
 #include "formats.h"
 #include "internal.h"
+#include "vf_nlmeans.h"
 #include "video.h"
 
 struct weighted_avg {
-    double total_weight;
-    double sum;
+    float total_weight;
+    float sum;
 };
 
 #define WEIGHT_LUT_NBITS 9
@@ -60,12 +59,13 @@
     uint32_t *ii_orig;                          // integral image
     uint32_t *ii;                               // integral image starting after the 0-line and 0-column
     int ii_w, ii_h;                             // width and height of the integral image
-    int ii_lz_32;                               // linesize in 32-bit units of the integral image
+    ptrdiff_t ii_lz_32;                         // linesize in 32-bit units of the integral image
     struct weighted_avg *wa;                    // weighted average of every pixel
-    int wa_linesize;                            // linesize for wa in struct size unit
-    double weight_lut[WEIGHT_LUT_SIZE];         // lookup table mapping (scaled) patch differences to their associated weights
-    double pdiff_lut_scale;                     // scale factor for patch differences before looking into the LUT
-    int max_meaningful_diff;                    // maximum difference considered (if the patch difference is too high we ignore the pixel)
+    ptrdiff_t wa_linesize;                      // linesize for wa in struct size unit
+    float weight_lut[WEIGHT_LUT_SIZE];          // lookup table mapping (scaled) patch differences to their associated weights
+    float pdiff_lut_scale;                      // scale factor for patch differences before looking into the LUT
+    uint32_t max_meaningful_diff;               // maximum difference considered (if the patch difference is too high we ignore the pixel)
+    NLMeansDSPContext dsp;
 } NLMeansContext;
 
 #define OFFSET(x) offsetof(NLMeansContext, x)
@@ -100,44 +100,6 @@
     return ff_set_common_formats(ctx, fmts_list);
 }
 
-/*
- * M is a discrete map where every entry contains the sum of all the entries
- * in the rectangle from the top-left origin of M to its coordinate. In the
- * following schema, "i" contains the sum of the whole map:
- *
- * M = +----------+-----------------+----+
- *     |          |                 |    |
- *     |          |                 |    |
- *     |         a|                b|   c|
- *     +----------+-----------------+----+
- *     |          |                 |    |
- *     |          |                 |    |
- *     |          |        X        |    |
- *     |          |                 |    |
- *     |         d|                e|   f|
- *     +----------+-----------------+----+
- *     |          |                 |    |
- *     |         g|                h|   i|
- *     +----------+-----------------+----+
- *
- * The sum of the X box can be calculated with:
- *    X = e-d-b+a
- *
- * See https://en.wikipedia.org/wiki/Summed_area_table
- *
- * The compute*_ssd functions compute the integral image M where every entry
- * contains the sum of the squared difference of every corresponding pixels of
- * two input planes of the same size as M.
- */
-static inline int get_integral_patch_value(const uint32_t *ii, int ii_lz_32, int x, int y, int p)
-{
-    const int e = ii[(y + p    ) * ii_lz_32 + (x + p    )];
-    const int d = ii[(y + p    ) * ii_lz_32 + (x - p - 1)];
-    const int b = ii[(y - p - 1) * ii_lz_32 + (x + p    )];
-    const int a = ii[(y - p - 1) * ii_lz_32 + (x - p - 1)];
-    return e - d - b + a;
-}
-
 /**
  * Compute squared difference of the safe area (the zone where s1 and s2
  * overlap). It is likely the largest integral zone, so it is interesting to do
@@ -145,29 +107,39 @@
  * function, we do not need any clipping here.
  *
  * The line above dst and the column to its left are always readable.
- *
- * This C version computes the SSD integral image using a scalar accumulator,
- * while for SIMD implementation it is likely more interesting to use the
- * two-loops algorithm variant.
  */
-static void compute_safe_ssd_integral_image_c(uint32_t *dst, int dst_linesize_32,
-                                              const uint8_t *s1, int linesize1,
-                                              const uint8_t *s2, int linesize2,
+static void compute_safe_ssd_integral_image_c(uint32_t *dst, ptrdiff_t dst_linesize_32,
+                                              const uint8_t *s1, ptrdiff_t linesize1,
+                                              const uint8_t *s2, ptrdiff_t linesize2,
                                               int w, int h)
 {
     int x, y;
+    const uint32_t *dst_top = dst - dst_linesize_32;
+
+    /* SIMD-friendly assumptions allowed here */
+    av_assert2(!(w & 0xf) && w >= 16 && h >= 1);
 
     for (y = 0; y < h; y++) {
-        uint32_t acc = dst[-1] - dst[-dst_linesize_32 - 1];
+        for (x = 0; x < w; x += 4) {
+            const int d0 = s1[x    ] - s2[x    ];
+            const int d1 = s1[x + 1] - s2[x + 1];
+            const int d2 = s1[x + 2] - s2[x + 2];
+            const int d3 = s1[x + 3] - s2[x + 3];
 
-        for (x = 0; x < w; x++) {
-            const int d  = s1[x] - s2[x];
-            acc += d * d;
-            dst[x] = dst[-dst_linesize_32 + x] + acc;
+            dst[x    ] = dst_top[x    ] - dst_top[x - 1] + d0*d0;
+            dst[x + 1] = dst_top[x + 1] - dst_top[x    ] + d1*d1;
+            dst[x + 2] = dst_top[x + 2] - dst_top[x + 1] + d2*d2;
+            dst[x + 3] = dst_top[x + 3] - dst_top[x + 2] + d3*d3;
+
+            dst[x    ] += dst[x - 1];
+            dst[x + 1] += dst[x    ];
+            dst[x + 2] += dst[x + 1];
+            dst[x + 3] += dst[x + 2];
         }
         s1  += linesize1;
         s2  += linesize2;
         dst += dst_linesize_32;
+        dst_top += dst_linesize_32;
     }
 }
 
@@ -195,9 +167,9 @@
  * @param w                 width to compute
  * @param h                 height to compute
  */
-static inline void compute_unsafe_ssd_integral_image(uint32_t *dst, int dst_linesize_32,
+static inline void compute_unsafe_ssd_integral_image(uint32_t *dst, ptrdiff_t dst_linesize_32,
                                                      int startx, int starty,
-                                                     const uint8_t *src, int linesize,
+                                                     const uint8_t *src, ptrdiff_t linesize,
                                                      int offx, int offy, int r, int sw, int sh,
                                                      int w, int h)
 {
@@ -237,8 +209,9 @@
  * @param h                 source height
  * @param e                 research padding edge
  */
-static void compute_ssd_integral_image(uint32_t *ii, int ii_linesize_32,
-                                       const uint8_t *src, int linesize, int offx, int offy,
+static void compute_ssd_integral_image(const NLMeansDSPContext *dsp,
+                                       uint32_t *ii, ptrdiff_t ii_linesize_32,
+                                       const uint8_t *src, ptrdiff_t linesize, int offx, int offy,
                                        int e, int w, int h)
 {
     // ii has a surrounding padding of thickness "e"
@@ -257,9 +230,16 @@
     // to compare the 2 sources pixels
     const int startx_safe = FFMAX(s1x, s2x);
     const int starty_safe = FFMAX(s1y, s2y);
-    const int endx_safe   = FFMIN(s1x + w, s2x + w);
+    const int u_endx_safe = FFMIN(s1x + w, s2x + w); // unaligned
     const int endy_safe   = FFMIN(s1y + h, s2y + h);
 
+    // deduce the safe area width and height
+    const int safe_pw = (u_endx_safe - startx_safe) & ~0xf;
+    const int safe_ph = endy_safe - starty_safe;
+
+    // adjusted end x position of the safe area after width of the safe area gets aligned
+    const int endx_safe = startx_safe + safe_pw;
+
     // top part where only one of s1 and s2 is still readable, or none at all
     compute_unsafe_ssd_integral_image(ii, ii_linesize_32,
                                       0, 0,
@@ -273,24 +253,25 @@
                                       0, starty_safe,
                                       src, linesize,
                                       offx, offy, e, w, h,
-                                      startx_safe, endy_safe - starty_safe);
+                                      startx_safe, safe_ph);
 
     // main and safe part of the integral
     av_assert1(startx_safe - s1x >= 0); av_assert1(startx_safe - s1x < w);
     av_assert1(starty_safe - s1y >= 0); av_assert1(starty_safe - s1y < h);
     av_assert1(startx_safe - s2x >= 0); av_assert1(startx_safe - s2x < w);
     av_assert1(starty_safe - s2y >= 0); av_assert1(starty_safe - s2y < h);
-    compute_safe_ssd_integral_image_c(ii + starty_safe*ii_linesize_32 + startx_safe, ii_linesize_32,
-                                      src + (starty_safe - s1y) * linesize + (startx_safe - s1x), linesize,
-                                      src + (starty_safe - s2y) * linesize + (startx_safe - s2x), linesize,
-                                      endx_safe - startx_safe, endy_safe - starty_safe);
+    if (safe_pw && safe_ph)
+        dsp->compute_safe_ssd_integral_image(ii + starty_safe*ii_linesize_32 + startx_safe, ii_linesize_32,
+                                             src + (starty_safe - s1y) * linesize + (startx_safe - s1x), linesize,
+                                             src + (starty_safe - s2y) * linesize + (startx_safe - s2x), linesize,
+                                             safe_pw, safe_ph);
 
     // right part of the integral
     compute_unsafe_ssd_integral_image(ii, ii_linesize_32,
                                       endx_safe, starty_safe,
                                       src, linesize,
                                       offx, offy, e, w, h,
-                                      ii_w - endx_safe, endy_safe - starty_safe);
+                                      ii_w - endx_safe, safe_ph);
 
     // bottom part where only one of s1 and s2 is still readable, or none at all
     compute_unsafe_ssd_integral_image(ii, ii_linesize_32,
@@ -308,8 +289,8 @@
     const int e = FFMAX(s->research_hsize, s->research_hsize_uv)
                 + FFMAX(s->patch_hsize,    s->patch_hsize_uv);
 
-    s->chroma_w = FF_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
-    s->chroma_h = FF_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->chroma_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->chroma_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
     s->nb_planes = av_pix_fmt_count_planes(inlink->format);
 
     /* Allocate the integral image with extra edges of thickness "e"
@@ -356,7 +337,7 @@
 
 struct thread_data {
     const uint8_t *src;
-    int src_linesize;
+    ptrdiff_t src_linesize;
     int startx, starty;
     int endx, endy;
     const uint32_t *ii_start;
@@ -368,34 +349,93 @@
     int x, y;
     NLMeansContext *s = ctx->priv;
     const struct thread_data *td = arg;
-    const uint8_t *src = td->src;
-    const int src_linesize = td->src_linesize;
+    const ptrdiff_t src_linesize = td->src_linesize;
     const int process_h = td->endy - td->starty;
     const int slice_start = (process_h *  jobnr   ) / nb_jobs;
     const int slice_end   = (process_h * (jobnr+1)) / nb_jobs;
     const int starty = td->starty + slice_start;
     const int endy   = td->starty + slice_end;
+    const int p = td->p;
+    const uint32_t *ii = td->ii_start + (starty - p - 1) * s->ii_lz_32 - p - 1;
+    const int dist_b = 2*p + 1;
+    const int dist_d = dist_b * s->ii_lz_32;
+    const int dist_e = dist_d + dist_b;
 
     for (y = starty; y < endy; y++) {
+        const uint8_t *src = td->src + y*src_linesize;
+        struct weighted_avg *wa = s->wa + y*s->wa_linesize;
         for (x = td->startx; x < td->endx; x++) {
-            const int patch_diff_sq = get_integral_patch_value(td->ii_start, s->ii_lz_32, x, y, td->p);
+            /*
+             * M is a discrete map where every entry contains the sum of all the entries
+             * in the rectangle from the top-left origin of M to its coordinate. In the
+             * following schema, "i" contains the sum of the whole map:
+             *
+             * M = +----------+-----------------+----+
+             *     |          |                 |    |
+             *     |          |                 |    |
+             *     |         a|                b|   c|
+             *     +----------+-----------------+----+
+             *     |          |                 |    |
+             *     |          |                 |    |
+             *     |          |        X        |    |
+             *     |          |                 |    |
+             *     |         d|                e|   f|
+             *     +----------+-----------------+----+
+             *     |          |                 |    |
+             *     |         g|                h|   i|
+             *     +----------+-----------------+----+
+             *
+             * The sum of the X box can be calculated with:
+             *    X = e-d-b+a
+             *
+             * See https://en.wikipedia.org/wiki/Summed_area_table
+             *
+             * The compute*_ssd functions compute the integral image M where every entry
+             * contains the sum of the squared difference of every corresponding pixels of
+             * two input planes of the same size as M.
+             */
+            const uint32_t a = ii[x];
+            const uint32_t b = ii[x + dist_b];
+            const uint32_t d = ii[x + dist_d];
+            const uint32_t e = ii[x + dist_e];
+            const uint32_t patch_diff_sq = e - d - b + a;
+
             if (patch_diff_sq < s->max_meaningful_diff) {
-                struct weighted_avg *wa = &s->wa[y*s->wa_linesize + x];
-                const int weight_lut_idx = patch_diff_sq * s->pdiff_lut_scale;
-                const double weight = s->weight_lut[weight_lut_idx]; // exp(-patch_diff_sq * s->pdiff_scale)
-                wa->total_weight += weight;
-                wa->sum += weight * src[y*src_linesize + x];
+                const unsigned weight_lut_idx = patch_diff_sq * s->pdiff_lut_scale;
+                const float weight = s->weight_lut[weight_lut_idx]; // exp(-patch_diff_sq * s->pdiff_scale)
+                wa[x].total_weight += weight;
+                wa[x].sum += weight * src[x];
             }
         }
+        ii += s->ii_lz_32;
     }
     return 0;
 }
 
-static int nlmeans_plane(AVFilterContext *ctx, int w, int h, int p, int r,
-                         uint8_t *dst, int dst_linesize,
-                         const uint8_t *src, int src_linesize)
+static void weight_averages(uint8_t *dst, ptrdiff_t dst_linesize,
+                            const uint8_t *src, ptrdiff_t src_linesize,
+                            struct weighted_avg *wa, ptrdiff_t wa_linesize,
+                            int w, int h)
 {
     int x, y;
+
+    for (y = 0; y < h; y++) {
+        for (x = 0; x < w; x++) {
+            // Also weight the centered pixel
+            wa[x].total_weight += 1.f;
+            wa[x].sum += 1.f * src[x];
+            dst[x] = av_clip_uint8(wa[x].sum / wa[x].total_weight);
+        }
+        dst += dst_linesize;
+        src += src_linesize;
+        wa += wa_linesize;
+    }
+}
+
+static int nlmeans_plane(AVFilterContext *ctx, int w, int h, int p, int r,
+                         uint8_t *dst, ptrdiff_t dst_linesize,
+                         const uint8_t *src, ptrdiff_t src_linesize)
+{
     int offx, offy;
     NLMeansContext *s = ctx->priv;
     /* patches center points cover the whole research window so the patches
@@ -420,7 +460,7 @@
                     .p            = p,
                 };
 
-                compute_ssd_integral_image(s->ii, s->ii_lz_32,
+                compute_ssd_integral_image(&s->dsp, s->ii, s->ii_lz_32,
                                            src, src_linesize,
                                            offx, offy, e, w, h);
                 ctx->internal->execute(ctx, nlmeans_slice, &td, NULL,
@@ -428,17 +468,10 @@
             }
         }
     }
-    for (y = 0; y < h; y++) {
-        for (x = 0; x < w; x++) {
-            struct weighted_avg *wa = &s->wa[y*s->wa_linesize + x];
 
-            // Also weight the centered pixel
-            wa->total_weight += 1.0;
-            wa->sum += 1.0 * src[y*src_linesize + x];
+    weight_averages(dst, dst_linesize, src, src_linesize,
+                    s->wa, s->wa_linesize, w, h);
 
-            dst[y*dst_linesize + x] = av_clip_uint8(wa->sum / wa->total_weight);
-        }
-    }
     return 0;
 }
 
@@ -478,6 +511,14 @@
     }                                                           \
 } while (0)
 
+void ff_nlmeans_init(NLMeansDSPContext *dsp)
+{
+    dsp->compute_safe_ssd_integral_image = compute_safe_ssd_integral_image_c;
+
+    if (ARCH_AARCH64)
+        ff_nlmeans_init_aarch64(dsp);
+}
+
 static av_cold int init(AVFilterContext *ctx)
 {
     int i;
@@ -509,6 +550,8 @@
            s->research_size, s->research_size, s->research_size_uv, s->research_size_uv,
            s->patch_size,    s->patch_size,    s->patch_size_uv,    s->patch_size_uv);
 
+    ff_nlmeans_init(&s->dsp);
+
     return 0;
 }
 

diff --git a/libavfilter/vf_nlmeans.h b/libavfilter/vf_nlmeans.h
new file mode 100644
index 0000000..0a9aab2
--- /dev/null
+++ b/libavfilter/vf_nlmeans.h

@@ -0,0 +1,35 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_NLMEANS_H
+#define AVFILTER_NLMEANS_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct NLMeansDSPContext {
+    void (*compute_safe_ssd_integral_image)(uint32_t *dst, ptrdiff_t dst_linesize_32,
+                                            const uint8_t *s1, ptrdiff_t linesize1,
+                                            const uint8_t *s2, ptrdiff_t linesize2,
+                                            int w, int h);
+} NLMeansDSPContext;
+
+void ff_nlmeans_init(NLMeansDSPContext *dsp);
+void ff_nlmeans_init_aarch64(NLMeansDSPContext *dsp);
+
+#endif /* AVFILTER_NLMEANS_H */

diff --git a/libavfilter/vf_normalize.c b/libavfilter/vf_normalize.c
new file mode 100644
index 0000000..5c1fe98
--- /dev/null
+++ b/libavfilter/vf_normalize.c

@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2017 Richard Ling
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * Normalize RGB video (aka histogram stretching, contrast stretching).
+ * See: https://en.wikipedia.org/wiki/Normalization_(image_processing)
+ *
+ * For each channel of each frame, the filter computes the input range and maps
+ * it linearly to the user-specified output range. The output range defaults
+ * to the full dynamic range from pure black to pure white.
+ *
+ * Naively maximising the dynamic range of each frame of video in isolation
+ * may cause flickering (rapid changes in brightness of static objects in the
+ * scene) when small dark or bright objects enter or leave the scene. This
+ * filter can apply temporal smoothing to the input range to reduce flickering.
+ * Temporal smoothing is similar to the auto-exposure (automatic gain control)
+ * on a video camera, which performs the same function; and, like a video
+ * camera, it may cause a period of over- or under-exposure of the video.
+ *
+ * The filter can normalize the R,G,B channels independently, which may cause
+ * color shifting, or link them together as a single channel, which prevents
+ * color shifting. More precisely, linked normalization preserves hue (as it's
+ * defined in HSV/HSL color spaces) while independent normalization does not.
+ * Independent normalization can be used to remove color casts, such as the
+ * blue cast from underwater video, restoring more natural colors. The filter
+ * can also combine independent and linked normalization in any ratio.
+ *
+ * Finally the overall strength of the filter can be adjusted, from no effect
+ * to full normalization.
+ *
+ * The 5 AVOptions are:
+ *   blackpt,   Colors which define the output range. The minimum input value
+ *   whitept    is mapped to the blackpt. The maximum input value is mapped to
+ *              the whitept. The defaults are black and white respectively.
+ *              Specifying white for blackpt and black for whitept will give
+ *              color-inverted, normalized video. Shades of grey can be used
+ *              to reduce the dynamic range (contrast). Specifying saturated
+ *              colors here can create some interesting effects.
+ *
+ *   smoothing  The amount of temporal smoothing, expressed in frames (>=0).
+ *              the minimum and maximum input values of each channel are
+ *              smoothed using a rolling average over the current frame and
+ *              that many previous frames of video.  Defaults to 0 (no temporal
+ *              smoothing).
+ *
+ *   independence
+ *              Controls the ratio of independent (color shifting) channel
+ *              normalization to linked (color preserving) normalization. 0.0
+ *              is fully linked, 1.0 is fully independent. Defaults to fully
+ *              independent.
+ *
+ *   strength   Overall strength of the filter. 1.0 is full strength. 0.0 is
+ *              a rather expensive no-op. Values in between can give a gentle
+ *              boost to low-contrast video without creating an artificial
+ *              over-processed look. The default is full strength.
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct NormalizeContext {
+    const AVClass *class;
+
+    // Storage for the corresponding AVOptions
+    uint8_t blackpt[4];
+    uint8_t whitept[4];
+    int smoothing;
+    float independence;
+    float strength;
+
+    int co[4];          // Offsets to R,G,B,A bytes respectively in each pixel
+    int num_components; // Number of components in the pixel format
+    int history_len;    // Number of frames to average; based on smoothing factor
+    int frame_num;      // Increments on each frame, starting from 0.
+
+    // Per-extremum, per-channel history, for temporal smoothing.
+    struct {
+        uint8_t *history;       // History entries.
+        uint32_t history_sum;   // Sum of history entries.
+    } min[3], max[3];           // Min and max for each channel in {R,G,B}.
+    uint8_t *history_mem;       // Single allocation for above history entries
+
+} NormalizeContext;
+
+#define OFFSET(x) offsetof(NormalizeContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption normalize_options[] = {
+    { "blackpt",  "output color to which darkest input color is mapped",  OFFSET(blackpt), AV_OPT_TYPE_COLOR, { .str = "black" }, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "whitept",  "output color to which brightest input color is mapped",  OFFSET(whitept), AV_OPT_TYPE_COLOR, { .str = "white" }, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "smoothing",  "amount of temporal smoothing of the input range, to reduce flicker", OFFSET(smoothing), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX/8, FLAGS },
+    { "independence", "proportion of independent to linked channel normalization", OFFSET(independence), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGS },
+    { "strength", "strength of filter, from no effect to full normalization", OFFSET(strength), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(normalize);
+
+// This function is the main guts of the filter. Normalizes the input frame
+// into the output frame. The frames are known to have the same dimensions
+// and pixel format.
+static void normalize(NormalizeContext *s, AVFrame *in, AVFrame *out)
+{
+    // Per-extremum, per-channel local variables.
+    struct {
+        uint8_t in;     // Original input byte value for this frame.
+        float smoothed; // Smoothed input value [0,255].
+        float out;      // Output value [0,255].
+    } min[3], max[3];   // Min and max for each channel in {R,G,B}.
+
+    float rgb_min_smoothed; // Min input range for linked normalization
+    float rgb_max_smoothed; // Max input range for linked normalization
+    uint8_t lut[3][256];    // Lookup table
+    int x, y, c;
+
+    // First, scan the input frame to find, for each channel, the minimum
+    // (min.in) and maximum (max.in) values present in the channel.
+    for (c = 0; c < 3; c++)
+        min[c].in = max[c].in = in->data[0][s->co[c]];
+    for (y = 0; y < in->height; y++) {
+        uint8_t *inp = in->data[0] + y * in->linesize[0];
+        uint8_t *outp = out->data[0] + y * out->linesize[0];
+        for (x = 0; x < in->width; x++) {
+            for (c = 0; c < 3; c++) {
+                min[c].in = FFMIN(min[c].in, inp[s->co[c]]);
+                max[c].in = FFMAX(max[c].in, inp[s->co[c]]);
+            }
+            inp += s->num_components;
+            outp += s->num_components;
+        }
+    }
+
+    // Next, for each channel, push min.in and max.in into their respective
+    // histories, to determine the min.smoothed and max.smoothed for this frame.
+    {
+        int history_idx = s->frame_num % s->history_len;
+        // Assume the history is not yet full; num_history_vals is the number
+        // of frames received so far including the current frame.
+        int num_history_vals = s->frame_num + 1;
+        if (s->frame_num >= s->history_len) {
+            //The history is full; drop oldest value and cap num_history_vals.
+            for (c = 0; c < 3; c++) {
+                s->min[c].history_sum -= s->min[c].history[history_idx];
+                s->max[c].history_sum -= s->max[c].history[history_idx];
+            }
+            num_history_vals = s->history_len;
+        }
+        // For each extremum, update history_sum and calculate smoothed value
+        // as the rolling average of the history entries.
+        for (c = 0; c < 3; c++) {
+            s->min[c].history_sum += (s->min[c].history[history_idx] = min[c].in);
+            min[c].smoothed = s->min[c].history_sum / (float)num_history_vals;
+            s->max[c].history_sum += (s->max[c].history[history_idx] = max[c].in);
+            max[c].smoothed = s->max[c].history_sum / (float)num_history_vals;
+        }
+    }
+
+    // Determine the input range for linked normalization. This is simply the
+    // minimum of the per-channel minimums, and the maximum of the per-channel
+    // maximums.
+    rgb_min_smoothed = FFMIN3(min[0].smoothed, min[1].smoothed, min[2].smoothed);
+    rgb_max_smoothed = FFMAX3(max[0].smoothed, max[1].smoothed, max[2].smoothed);
+
+    // Now, process each channel to determine the input and output range and
+    // build the lookup tables.
+    for (c = 0; c < 3; c++) {
+        int in_val;
+        // Adjust the input range for this channel [min.smoothed,max.smoothed]
+        // by mixing in the correct proportion of the linked normalization
+        // input range [rgb_min_smoothed,rgb_max_smoothed].
+        min[c].smoothed = (min[c].smoothed  *         s->independence)
+                        + (rgb_min_smoothed * (1.0f - s->independence));
+        max[c].smoothed = (max[c].smoothed  *         s->independence)
+                        + (rgb_max_smoothed * (1.0f - s->independence));
+
+        // Calculate the output range [min.out,max.out] as a ratio of the full-
+        // strength output range [blackpt,whitept] and the original input range
+        // [min.in,max.in], based on the user-specified filter strength.
+        min[c].out = (s->blackpt[c] *         s->strength)
+                   + (min[c].in     * (1.0f - s->strength));
+        max[c].out = (s->whitept[c] *         s->strength)
+                   + (max[c].in     * (1.0f - s->strength));
+
+        // Now, build a lookup table which linearly maps the adjusted input range
+        // [min.smoothed,max.smoothed] to the output range [min.out,max.out].
+        // Perform the linear interpolation for each x:
+        //     lut[x] = (int)(float(x - min.smoothed) * scale + max.out + 0.5)
+        // where scale = (max.out - min.out) / (max.smoothed - min.smoothed)
+        if (min[c].smoothed == max[c].smoothed) {
+            // There is no dynamic range to expand. No mapping for this channel.
+            for (in_val = min[c].in; in_val <= max[c].in; in_val++)
+                lut[c][in_val] = min[c].out;
+        } else {
+            // We must set lookup values for all values in the original input
+            // range [min.in,max.in]. Since the original input range may be
+            // larger than [min.smoothed,max.smoothed], some output values may
+            // fall outside the [0,255] dynamic range. We need to clamp them.
+            float scale = (max[c].out - min[c].out) / (max[c].smoothed - min[c].smoothed);
+            for (in_val = min[c].in; in_val <= max[c].in; in_val++) {
+                int out_val = (in_val - min[c].smoothed) * scale + min[c].out + 0.5f;
+                out_val = FFMAX(out_val, 0);
+                out_val = FFMIN(out_val, 255);
+                lut[c][in_val] = out_val;
+            }
+        }
+    }
+
+    // Finally, process the pixels of the input frame using the lookup tables.
+    for (y = 0; y < in->height; y++) {
+        uint8_t *inp = in->data[0] + y * in->linesize[0];
+        uint8_t *outp = out->data[0] + y * out->linesize[0];
+        for (x = 0; x < in->width; x++) {
+            for (c = 0; c < 3; c++)
+                outp[s->co[c]] = lut[c][inp[s->co[c]]];
+            if (s->num_components == 4)
+                // Copy alpha as-is.
+                outp[s->co[3]] = inp[s->co[3]];
+            inp += s->num_components;
+            outp += s->num_components;
+        }
+    }
+
+    s->frame_num++;
+}
+
+// Now we define all the functions accessible from the ff_vf_normalize class,
+// which is ffmpeg's interface to our filter.  See doc/filter_design.txt and
+// doc/writing_filters.txt for descriptions of what these interface functions
+// are expected to do.
+
+// Set the pixel formats that our filter supports. We should be able to process
+// any 8-bit RGB formats. 16-bit support might be useful one day.
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pixel_fmts[] = {
+        AV_PIX_FMT_RGB24,
+        AV_PIX_FMT_BGR24,
+        AV_PIX_FMT_ARGB,
+        AV_PIX_FMT_RGBA,
+        AV_PIX_FMT_ABGR,
+        AV_PIX_FMT_BGRA,
+        AV_PIX_FMT_0RGB,
+        AV_PIX_FMT_RGB0,
+        AV_PIX_FMT_0BGR,
+        AV_PIX_FMT_BGR0,
+        AV_PIX_FMT_NONE
+    };
+    // According to filter_design.txt, using ff_set_common_formats() this way
+    // ensures the pixel formats of the input and output will be the same. That
+    // saves a bit of effort possibly needing to handle format conversions.
+    AVFilterFormats *formats = ff_make_format_list(pixel_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, formats);
+}
+
+// At this point we know the pixel format used for both input and output.  We
+// can also access the frame rate of the input video and allocate some memory
+// appropriately
+static int config_input(AVFilterLink *inlink)
+{
+    NormalizeContext *s = inlink->dst->priv;
+    // Store offsets to R,G,B,A bytes respectively in each pixel
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    int c;
+
+    for (c = 0; c < 4; ++c)
+        s->co[c] = desc->comp[c].offset;
+    s->num_components = desc->nb_components;
+    // Convert smoothing value to history_len (a count of frames to average,
+    // must be at least 1).  Currently this is a direct assignment, but the
+    // smoothing value was originally envisaged as a number of seconds.  In
+    // future it would be nice to set history_len using a number of seconds,
+    // but VFR video is currently an obstacle to doing so.
+    s->history_len = s->smoothing + 1;
+    // Allocate the history buffers -- there are 6 -- one for each extrema.
+    // s->smoothing is limited to INT_MAX/8, so that (s->history_len * 6)
+    // can't overflow on 32bit causing a too-small allocation.
+    s->history_mem = av_malloc(s->history_len * 6);
+    if (s->history_mem == NULL)
+        return AVERROR(ENOMEM);
+
+    for (c = 0; c < 3; c++) {
+        s->min[c].history = s->history_mem + (c*2)   * s->history_len;
+        s->max[c].history = s->history_mem + (c*2+1) * s->history_len;
+    }
+    return 0;
+}
+
+// Free any memory allocations here
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    NormalizeContext *s = ctx->priv;
+
+    av_freep(&s->history_mem);
+}
+
+// This function is pretty much standard from doc/writing_filters.txt.  It
+// tries to do in-place filtering where possible, only allocating a new output
+// frame when absolutely necessary.
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    NormalizeContext *s = ctx->priv;
+    AVFrame *out;
+    // Set 'direct' if we can modify the input frame in-place.  Otherwise we
+    // need to retrieve a new frame from the output link.
+    int direct = av_frame_is_writable(in) && !ctx->is_disabled;
+
+    if (direct) {
+        out = in;
+    } else {
+        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+
+    // Now we've got the input and output frames (which may be the same frame)
+    // perform the filtering with our custom function.
+    normalize(s, in, out);
+
+    if (ctx->is_disabled) {
+        av_frame_free(&out);
+        return ff_filter_frame(outlink, in);
+    }
+
+    if (!direct)
+        av_frame_free(&in);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_normalize = {
+    .name          = "normalize",
+    .description   = NULL_IF_CONFIG_SMALL("Normalize RGB video."),
+    .priv_size     = sizeof(NormalizeContext),
+    .priv_class    = &normalize_class,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = inputs,
+    .outputs       = outputs,
+};

diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c
index e003982..abfff49 100644
--- a/libavfilter/vf_ocr.c
+++ b/libavfilter/vf_ocr.c

@@ -90,9 +90,7 @@
     AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
     if (!fmts_list)
         return AVERROR(ENOMEM);
-    ff_set_common_formats(ctx, fmts_list);
-
-    return 0;
+    return ff_set_common_formats(ctx, fmts_list);
 }
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)

diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
index 5bf3d66..ba25893 100644
--- a/libavfilter/vf_overlay.c
+++ b/libavfilter/vf_overlay.c

@@ -39,6 +39,11 @@
 #include "drawutils.h"
 #include "framesync.h"
 #include "video.h"
+#include "vf_overlay.h"
+
+typedef struct ThreadData {
+    AVFrame *dst, *src;
+} ThreadData;
 
 static const char *const var_names[] = {
     "main_w",    "W", ///< width  of the main    video
@@ -55,21 +60,6 @@
     NULL
 };
 
-enum var_name {
-    VAR_MAIN_W,    VAR_MW,
-    VAR_MAIN_H,    VAR_MH,
-    VAR_OVERLAY_W, VAR_OW,
-    VAR_OVERLAY_H, VAR_OH,
-    VAR_HSUB,
-    VAR_VSUB,
-    VAR_X,
-    VAR_Y,
-    VAR_N,
-    VAR_POS,
-    VAR_T,
-    VAR_VARS_NB
-};
-
 #define MAIN    0
 #define OVERLAY 1
 
@@ -88,44 +78,6 @@
     EVAL_MODE_NB
 };
 
-enum OverlayFormat {
-    OVERLAY_FORMAT_YUV420,
-    OVERLAY_FORMAT_YUV422,
-    OVERLAY_FORMAT_YUV444,
-    OVERLAY_FORMAT_RGB,
-    OVERLAY_FORMAT_GBRP,
-    OVERLAY_FORMAT_AUTO,
-    OVERLAY_FORMAT_NB
-};
-
-typedef struct OverlayContext {
-    const AVClass *class;
-    int x, y;                   ///< position of overlaid picture
-
-    uint8_t main_is_packed_rgb;
-    uint8_t main_rgba_map[4];
-    uint8_t main_has_alpha;
-    uint8_t overlay_is_packed_rgb;
-    uint8_t overlay_rgba_map[4];
-    uint8_t overlay_has_alpha;
-    int format;                 ///< OverlayFormat
-    int eval_mode;              ///< EvalMode
-
-    FFFrameSync fs;
-
-    int main_pix_step[4];       ///< steps per pixel for each plane of the main output
-    int overlay_pix_step[4];    ///< steps per pixel for each plane of the overlay
-    int hsub, vsub;             ///< chroma subsampling values
-    const AVPixFmtDescriptor *main_desc; ///< format descriptor for main input
-
-    double var_values[VAR_VARS_NB];
-    char *x_expr, *y_expr;
-
-    AVExpr *x_pexpr, *y_pexpr;
-
-    void (*blend_image)(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y);
-} OverlayContext;
-
 static av_cold void uninit(AVFilterContext *ctx)
 {
     OverlayContext *s = ctx->priv;
@@ -148,6 +100,7 @@
 
     s->var_values[VAR_X] = av_expr_eval(s->x_pexpr, s->var_values, NULL);
     s->var_values[VAR_Y] = av_expr_eval(s->y_pexpr, s->var_values, NULL);
+    /* It is necessary if x is expressed from y  */
     s->var_values[VAR_X] = av_expr_eval(s->x_pexpr, s->var_values, NULL);
     s->x = normalize_xy(s->var_values[VAR_X], s->hsub);
     s->y = normalize_xy(s->var_values[VAR_Y], s->vsub);
@@ -401,9 +354,10 @@
  * Blend image in src to destination buffer dst at position (x, y).
  */
 
-static void blend_image_packed_rgb(AVFilterContext *ctx,
+static av_always_inline void blend_slice_packed_rgb(AVFilterContext *ctx,
                                    AVFrame *dst, const AVFrame *src,
-                                   int main_has_alpha, int x, int y)
+                                   int main_has_alpha, int x, int y,
+                                   int is_straight, int jobnr, int nb_jobs)
 {
     OverlayContext *s = ctx->priv;
     int i, imax, j, jmax;
@@ -422,13 +376,19 @@
     const int sb = s->overlay_rgba_map[B];
     const int sa = s->overlay_rgba_map[A];
     const int sstep = s->overlay_pix_step[0];
+    int slice_start, slice_end;
     uint8_t *S, *sp, *d, *dp;
 
     i = FFMAX(-y, 0);
-    sp = src->data[0] + i     * src->linesize[0];
-    dp = dst->data[0] + (y+i) * dst->linesize[0];
+    imax = FFMIN(-y + dst_h, src_h);
 
-    for (imax = FFMIN(-y + dst_h, src_h); i < imax; i++) {
+    slice_start = (imax * jobnr) / nb_jobs;
+    slice_end = (imax * (jobnr+1)) / nb_jobs;
+
+    sp = src->data[0] + (i + slice_start)     * src->linesize[0];
+    dp = dst->data[0] + (y + i + slice_start) * dst->linesize[0];
+
+    for (i = i + slice_start; i < slice_end; i++) {
         j = FFMAX(-x, 0);
         S = sp + j     * sstep;
         d = dp + (x+j) * dstep;
@@ -454,9 +414,12 @@
             default:
                 // main_value = main_value * (1 - alpha) + overlay_value * alpha
                 // since alpha is in the range 0-255, the result must divided by 255
-                d[dr] = FAST_DIV255(d[dr] * (255 - alpha) + S[sr] * alpha);
-                d[dg] = FAST_DIV255(d[dg] * (255 - alpha) + S[sg] * alpha);
-                d[db] = FAST_DIV255(d[db] * (255 - alpha) + S[sb] * alpha);
+                d[dr] = is_straight ? FAST_DIV255(d[dr] * (255 - alpha) + S[sr] * alpha) :
+                        FFMIN(FAST_DIV255(d[dr] * (255 - alpha)) + S[sr], 255);
+                d[dg] = is_straight ? FAST_DIV255(d[dg] * (255 - alpha) + S[sg] * alpha) :
+                        FFMIN(FAST_DIV255(d[dg] * (255 - alpha)) + S[sg], 255);
+                d[db] = is_straight ? FAST_DIV255(d[db] * (255 - alpha) + S[sb] * alpha) :
+                        FFMIN(FAST_DIV255(d[db] * (255 - alpha)) + S[sb], 255);
             }
             if (main_has_alpha) {
                 switch (alpha) {
@@ -487,8 +450,13 @@
                                          int main_has_alpha,
                                          int dst_plane,
                                          int dst_offset,
-                                         int dst_step)
+                                         int dst_step,
+                                         int straight,
+                                         int yuv,
+                                         int jobnr,
+                                         int nb_jobs)
 {
+    OverlayContext *octx = ctx->priv;
     int src_wp = AV_CEIL_RSHIFT(src_w, hsub);
     int src_hp = AV_CEIL_RSHIFT(src_h, vsub);
     int dst_wp = AV_CEIL_RSHIFT(dst_w, hsub);
@@ -497,23 +465,39 @@
     int xp = x>>hsub;
     uint8_t *s, *sp, *d, *dp, *dap, *a, *da, *ap;
     int jmax, j, k, kmax;
+    int slice_start, slice_end;
 
     j = FFMAX(-yp, 0);
-    sp = src->data[i] + j         * src->linesize[i];
-    dp = dst->data[dst_plane]
-                      + (yp+j)    * dst->linesize[dst_plane]
-                      + dst_offset;
-    ap = src->data[3] + (j<<vsub) * src->linesize[3];
-    dap = dst->data[3] + ((yp+j) << vsub) * dst->linesize[3];
+    jmax = FFMIN(-yp + dst_hp, src_hp);
 
-    for (jmax = FFMIN(-yp + dst_hp, src_hp); j < jmax; j++) {
+    slice_start = (jmax * jobnr) / nb_jobs;
+    slice_end = (jmax * (jobnr+1)) / nb_jobs;
+
+    sp = src->data[i] + slice_start * src->linesize[i];
+    dp = dst->data[dst_plane]
+                      + (yp + slice_start) * dst->linesize[dst_plane]
+                      + dst_offset;
+    ap = src->data[3] + (slice_start << vsub) * src->linesize[3];
+    dap = dst->data[3] + ((yp + slice_start) << vsub) * dst->linesize[3];
+
+    for (j = j + slice_start; j < slice_end; j++) {
         k = FFMAX(-xp, 0);
         d = dp + (xp+k) * dst_step;
         s = sp + k;
         a = ap + (k<<hsub);
         da = dap + ((xp+k) << hsub);
+        kmax = FFMIN(-xp + dst_wp, src_wp);
 
-        for (kmax = FFMIN(-xp + dst_wp, src_wp); k < kmax; k++) {
+        if (((vsub && j+1 < src_hp) || !vsub) && octx->blend_row[i]) {
+            int c = octx->blend_row[i](d, da, s, a, kmax - k, src->linesize[3]);
+
+            s += c;
+            d += dst_step * c;
+            da += (1 << hsub) * c;
+            a += (1 << hsub) * c;
+            k += c;
+        }
+        for (; k < kmax; k++) {
             int alpha_v, alpha_h, alpha;
 
             // average alpha for color components, improve quality
@@ -546,7 +530,14 @@
                     alpha_d = da[0];
                 alpha = UNPREMULTIPLY_ALPHA(alpha, alpha_d);
             }
-            *d = FAST_DIV255(*d * (255 - alpha) + *s * alpha);
+            if (straight) {
+                *d = FAST_DIV255(*d * (255 - alpha) + *s * alpha);
+            } else {
+                if (i && yuv)
+                    *d = av_clip(FAST_DIV255((*d - 128) * (255 - alpha)) + *s - 128, -128, 128) + 128;
+                else
+                    *d = FFMIN(FAST_DIV255(*d * (255 - alpha)) + *s, 255);
+            }
             s++;
             d += dst_step;
             da += 1 << hsub;
@@ -562,17 +553,23 @@
 static inline void alpha_composite(const AVFrame *src, const AVFrame *dst,
                                    int src_w, int src_h,
                                    int dst_w, int dst_h,
-                                   int x, int y)
+                                   int x, int y,
+                                   int jobnr, int nb_jobs)
 {
     uint8_t alpha;          ///< the amount of overlay to blend on to main
     uint8_t *s, *sa, *d, *da;
     int i, imax, j, jmax;
+    int slice_start, slice_end;
+
+    imax = FFMIN(-y + dst_h, src_h);
+    slice_start = (imax * jobnr) / nb_jobs;
+    slice_end = ((imax * (jobnr+1)) / nb_jobs);
 
     i = FFMAX(-y, 0);
-    sa = src->data[3] + i     * src->linesize[3];
-    da = dst->data[3] + (y+i) * dst->linesize[3];
+    sa = src->data[3] + (i + slice_start) * src->linesize[3];
+    da = dst->data[3] + (y + i + slice_start) * dst->linesize[3];
 
-    for (imax = FFMIN(-y + dst_h, src_h); i < imax; i++) {
+    for (i = i + slice_start; i < slice_end; i++) {
         j = FFMAX(-x, 0);
         s = sa + j;
         d = da + x+j;
@@ -601,11 +598,13 @@
     }
 }
 
-static av_always_inline void blend_image_yuv(AVFilterContext *ctx,
+static av_always_inline void blend_slice_yuv(AVFilterContext *ctx,
                                              AVFrame *dst, const AVFrame *src,
                                              int hsub, int vsub,
                                              int main_has_alpha,
-                                             int x, int y)
+                                             int x, int y,
+                                             int is_straight,
+                                             int jobnr, int nb_jobs)
 {
     OverlayContext *s = ctx->priv;
     const int src_w = src->width;
@@ -614,21 +613,27 @@
     const int dst_h = dst->height;
 
     blend_plane(ctx, dst, src, src_w, src_h, dst_w, dst_h, 0, 0,       0, x, y, main_has_alpha,
-                s->main_desc->comp[0].plane, s->main_desc->comp[0].offset, s->main_desc->comp[0].step);
+                s->main_desc->comp[0].plane, s->main_desc->comp[0].offset, s->main_desc->comp[0].step, is_straight, 1,
+                jobnr, nb_jobs);
     blend_plane(ctx, dst, src, src_w, src_h, dst_w, dst_h, 1, hsub, vsub, x, y, main_has_alpha,
-                s->main_desc->comp[1].plane, s->main_desc->comp[1].offset, s->main_desc->comp[1].step);
+                s->main_desc->comp[1].plane, s->main_desc->comp[1].offset, s->main_desc->comp[1].step, is_straight, 1,
+                jobnr, nb_jobs);
     blend_plane(ctx, dst, src, src_w, src_h, dst_w, dst_h, 2, hsub, vsub, x, y, main_has_alpha,
-                s->main_desc->comp[2].plane, s->main_desc->comp[2].offset, s->main_desc->comp[2].step);
+                s->main_desc->comp[2].plane, s->main_desc->comp[2].offset, s->main_desc->comp[2].step, is_straight, 1,
+                jobnr, nb_jobs);
 
     if (main_has_alpha)
-        alpha_composite(src, dst, src_w, src_h, dst_w, dst_h, x, y);
+        alpha_composite(src, dst, src_w, src_h, dst_w, dst_h, x, y, jobnr, nb_jobs);
 }
 
-static av_always_inline void blend_image_planar_rgb(AVFilterContext *ctx,
+static av_always_inline void blend_slice_planar_rgb(AVFilterContext *ctx,
                                                     AVFrame *dst, const AVFrame *src,
                                                     int hsub, int vsub,
                                                     int main_has_alpha,
-                                                    int x, int y)
+                                                    int x, int y,
+                                                    int is_straight,
+                                                    int jobnr,
+                                                    int nb_jobs)
 {
     OverlayContext *s = ctx->priv;
     const int src_w = src->width;
@@ -637,64 +642,177 @@
     const int dst_h = dst->height;
 
     blend_plane(ctx, dst, src, src_w, src_h, dst_w, dst_h, 0, 0,       0, x, y, main_has_alpha,
-                s->main_desc->comp[1].plane, s->main_desc->comp[1].offset, s->main_desc->comp[1].step);
+                s->main_desc->comp[1].plane, s->main_desc->comp[1].offset, s->main_desc->comp[1].step, is_straight, 0,
+                jobnr, nb_jobs);
     blend_plane(ctx, dst, src, src_w, src_h, dst_w, dst_h, 1, hsub, vsub, x, y, main_has_alpha,
-                s->main_desc->comp[2].plane, s->main_desc->comp[2].offset, s->main_desc->comp[2].step);
+                s->main_desc->comp[2].plane, s->main_desc->comp[2].offset, s->main_desc->comp[2].step, is_straight, 0,
+                jobnr, nb_jobs);
     blend_plane(ctx, dst, src, src_w, src_h, dst_w, dst_h, 2, hsub, vsub, x, y, main_has_alpha,
-                s->main_desc->comp[0].plane, s->main_desc->comp[0].offset, s->main_desc->comp[0].step);
+                s->main_desc->comp[0].plane, s->main_desc->comp[0].offset, s->main_desc->comp[0].step, is_straight, 0,
+                jobnr, nb_jobs);
 
     if (main_has_alpha)
-        alpha_composite(src, dst, src_w, src_h, dst_w, dst_h, x, y);
+        alpha_composite(src, dst, src_w, src_h, dst_w, dst_h, x, y, jobnr, nb_jobs);
 }
 
-static void blend_image_yuv420(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y)
+static int blend_slice_yuv420(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    blend_image_yuv(ctx, dst, src, 1, 1, 0, x, y);
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_yuv(ctx, td->dst, td->src, 1, 1, 0, s->x, s->y, 1, jobnr, nb_jobs);
+    return 0;
 }
 
-static void blend_image_yuva420(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y)
+static int blend_slice_yuva420(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    blend_image_yuv(ctx, dst, src, 1, 1, 1, x, y);
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_yuv(ctx, td->dst, td->src, 1, 1, 1, s->x, s->y, 1, jobnr, nb_jobs);
+    return 0;
 }
 
-static void blend_image_yuv422(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y)
+static int blend_slice_yuv422(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    blend_image_yuv(ctx, dst, src, 1, 0, 0, x, y);
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_yuv(ctx, td->dst, td->src, 1, 0, 0, s->x, s->y, 1, jobnr, nb_jobs);
+    return 0;
 }
 
-static void blend_image_yuva422(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y)
+static int blend_slice_yuva422(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    blend_image_yuv(ctx, dst, src, 1, 0, 1, x, y);
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_yuv(ctx, td->dst, td->src, 1, 0, 1, s->x, s->y, 1, jobnr, nb_jobs);
+    return 0;
 }
 
-static void blend_image_yuv444(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y)
+static int blend_slice_yuv444(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    blend_image_yuv(ctx, dst, src, 0, 0, 0, x, y);
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_yuv(ctx, td->dst, td->src, 0, 0, 0, s->x, s->y, 1, jobnr, nb_jobs);
+    return 0;
 }
 
-static void blend_image_yuva444(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y)
+static int blend_slice_yuva444(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    blend_image_yuv(ctx, dst, src, 0, 0, 1, x, y);
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_yuv(ctx, td->dst, td->src, 0, 0, 1, s->x, s->y, 1, jobnr, nb_jobs);
+    return 0;
 }
 
-static void blend_image_gbrp(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y)
+static int blend_slice_gbrp(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    blend_image_planar_rgb(ctx, dst, src, 0, 0, 0, x, y);
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_planar_rgb(ctx, td->dst, td->src, 0, 0, 0, s->x, s->y, 1, jobnr, nb_jobs);
+    return 0;
 }
 
-static void blend_image_gbrap(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y)
+static int blend_slice_gbrap(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    blend_image_planar_rgb(ctx, dst, src, 0, 0, 1, x, y);
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_planar_rgb(ctx, td->dst, td->src, 0, 0, 1, s->x, s->y, 1, jobnr, nb_jobs);
+    return 0;
 }
 
-static void blend_image_rgb(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y)
+static int blend_slice_yuv420_pm(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    blend_image_packed_rgb(ctx, dst, src, 0, x, y);
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_yuv(ctx, td->dst, td->src, 1, 1, 0, s->x, s->y, 0, jobnr, nb_jobs);
+    return 0;
 }
 
-static void blend_image_rgba(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y)
+static int blend_slice_yuva420_pm(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    blend_image_packed_rgb(ctx, dst, src, 1, x, y);
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_yuv(ctx, td->dst, td->src, 1, 1, 1, s->x, s->y, 0, jobnr, nb_jobs);
+    return 0;
+}
+
+static int blend_slice_yuv422_pm(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_yuv(ctx, td->dst, td->src, 1, 0, 0, s->x, s->y, 0, jobnr, nb_jobs);
+    return 0;
+}
+
+static int blend_slice_yuva422_pm(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_yuv(ctx, td->dst, td->src, 1, 0, 1, s->x, s->y, 0, jobnr, nb_jobs);
+    return 0;
+}
+
+static int blend_slice_yuv444_pm(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_yuv(ctx, td->dst, td->src, 0, 0, 0, s->x, s->y, 0, jobnr, nb_jobs);
+    return 0;
+}
+
+static int blend_slice_yuva444_pm(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_yuv(ctx, td->dst, td->src, 0, 0, 1, s->x, s->y, 0, jobnr, nb_jobs);
+    return 0;
+}
+
+static int blend_slice_gbrp_pm(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_planar_rgb(ctx, td->dst, td->src, 0, 0, 0, s->x, s->y, 0, jobnr, nb_jobs);
+    return 0;
+}
+
+static int blend_slice_gbrap_pm(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_planar_rgb(ctx, td->dst, td->src, 0, 0, 1, s->x, s->y, 0, jobnr, nb_jobs);
+    return 0;
+}
+
+static int blend_slice_rgb(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_packed_rgb(ctx, td->dst, td->src, 0, s->x, s->y, 1, jobnr, nb_jobs);
+    return 0;
+}
+
+static int blend_slice_rgba(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_packed_rgb(ctx, td->dst, td->src, 1, s->x, s->y, 1, jobnr, nb_jobs);
+    return 0;
+}
+
+static int blend_slice_rgb_pm(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_packed_rgb(ctx, td->dst, td->src, 0, s->x, s->y, 0, jobnr, nb_jobs);
+    return 0;
+}
+
+static int blend_slice_rgba_pm(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    OverlayContext *s = ctx->priv;
+    ThreadData *td = arg;
+    blend_slice_packed_rgb(ctx, td->dst, td->src, 1, s->x, s->y, 0, jobnr, nb_jobs);
+    return 0;
 }
 
 static int config_input_main(AVFilterLink *inlink)
@@ -714,39 +832,39 @@
     s->main_has_alpha = ff_fmt_is_in(inlink->format, alpha_pix_fmts);
     switch (s->format) {
     case OVERLAY_FORMAT_YUV420:
-        s->blend_image = s->main_has_alpha ? blend_image_yuva420 : blend_image_yuv420;
+        s->blend_slice = s->main_has_alpha ? blend_slice_yuva420 : blend_slice_yuv420;
         break;
     case OVERLAY_FORMAT_YUV422:
-        s->blend_image = s->main_has_alpha ? blend_image_yuva422 : blend_image_yuv422;
+        s->blend_slice = s->main_has_alpha ? blend_slice_yuva422 : blend_slice_yuv422;
         break;
     case OVERLAY_FORMAT_YUV444:
-        s->blend_image = s->main_has_alpha ? blend_image_yuva444 : blend_image_yuv444;
+        s->blend_slice = s->main_has_alpha ? blend_slice_yuva444 : blend_slice_yuv444;
         break;
     case OVERLAY_FORMAT_RGB:
-        s->blend_image = s->main_has_alpha ? blend_image_rgba : blend_image_rgb;
+        s->blend_slice = s->main_has_alpha ? blend_slice_rgba : blend_slice_rgb;
         break;
     case OVERLAY_FORMAT_GBRP:
-        s->blend_image = s->main_has_alpha ? blend_image_gbrap : blend_image_gbrp;
+        s->blend_slice = s->main_has_alpha ? blend_slice_gbrap : blend_slice_gbrp;
         break;
     case OVERLAY_FORMAT_AUTO:
         switch (inlink->format) {
         case AV_PIX_FMT_YUVA420P:
-            s->blend_image = blend_image_yuva420;
+            s->blend_slice = blend_slice_yuva420;
             break;
         case AV_PIX_FMT_YUVA422P:
-            s->blend_image = blend_image_yuva422;
+            s->blend_slice = blend_slice_yuva422;
             break;
         case AV_PIX_FMT_YUVA444P:
-            s->blend_image = blend_image_yuva444;
+            s->blend_slice = blend_slice_yuva444;
             break;
         case AV_PIX_FMT_ARGB:
         case AV_PIX_FMT_RGBA:
         case AV_PIX_FMT_BGRA:
         case AV_PIX_FMT_ABGR:
-            s->blend_image = blend_image_rgba;
+            s->blend_slice = blend_slice_rgba;
             break;
         case AV_PIX_FMT_GBRAP:
-            s->blend_image = blend_image_gbrap;
+            s->blend_slice = blend_slice_gbrap;
             break;
         default:
             av_assert0(0);
@@ -754,6 +872,58 @@
         }
         break;
     }
+
+    if (!s->alpha_format)
+        goto end;
+
+    switch (s->format) {
+    case OVERLAY_FORMAT_YUV420:
+        s->blend_slice = s->main_has_alpha ? blend_slice_yuva420_pm : blend_slice_yuv420_pm;
+        break;
+    case OVERLAY_FORMAT_YUV422:
+        s->blend_slice = s->main_has_alpha ? blend_slice_yuva422_pm : blend_slice_yuv422_pm;
+        break;
+    case OVERLAY_FORMAT_YUV444:
+        s->blend_slice = s->main_has_alpha ? blend_slice_yuva444_pm : blend_slice_yuv444_pm;
+        break;
+    case OVERLAY_FORMAT_RGB:
+        s->blend_slice = s->main_has_alpha ? blend_slice_rgba_pm : blend_slice_rgb_pm;
+        break;
+    case OVERLAY_FORMAT_GBRP:
+        s->blend_slice = s->main_has_alpha ? blend_slice_gbrap_pm : blend_slice_gbrp_pm;
+        break;
+    case OVERLAY_FORMAT_AUTO:
+        switch (inlink->format) {
+        case AV_PIX_FMT_YUVA420P:
+            s->blend_slice = blend_slice_yuva420_pm;
+            break;
+        case AV_PIX_FMT_YUVA422P:
+            s->blend_slice = blend_slice_yuva422_pm;
+            break;
+        case AV_PIX_FMT_YUVA444P:
+            s->blend_slice = blend_slice_yuva444_pm;
+            break;
+        case AV_PIX_FMT_ARGB:
+        case AV_PIX_FMT_RGBA:
+        case AV_PIX_FMT_BGRA:
+        case AV_PIX_FMT_ABGR:
+            s->blend_slice = blend_slice_rgba_pm;
+            break;
+        case AV_PIX_FMT_GBRAP:
+            s->blend_slice = blend_slice_gbrap_pm;
+            break;
+        default:
+            av_assert0(0);
+            break;
+        }
+        break;
+    }
+
+end:
+    if (ARCH_X86)
+        ff_overlay_init_x86(s, s->format, inlink->format,
+                            s->alpha_format, s->main_has_alpha);
+
     return 0;
 }
 
@@ -792,8 +962,14 @@
     }
 
     if (s->x < mainpic->width  && s->x + second->width  >= 0 ||
-        s->y < mainpic->height && s->y + second->height >= 0)
-        s->blend_image(ctx, mainpic, second, s->x, s->y);
+        s->y < mainpic->height && s->y + second->height >= 0) {
+        ThreadData td;
+
+        td.dst = mainpic;
+        td.src = second;
+        ctx->internal->execute(ctx, s->blend_slice, &td, NULL, FFMIN(FFMIN(mainpic->height - s->y, second->height),
+                                                                     ff_filter_get_nb_threads(ctx)));
+    }
     return ff_filter_frame(ctx->outputs[0], mainpic);
 }
 
@@ -835,6 +1011,9 @@
         { "gbrp",   "", 0, AV_OPT_TYPE_CONST, {.i64=OVERLAY_FORMAT_GBRP},   .flags = FLAGS, .unit = "format" },
         { "auto",   "", 0, AV_OPT_TYPE_CONST, {.i64=OVERLAY_FORMAT_AUTO},   .flags = FLAGS, .unit = "format" },
     { "repeatlast", "repeat overlay of the last overlay frame", OFFSET(fs.opt_repeatlast), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
+    { "alpha", "alpha format", OFFSET(alpha_format), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "alpha_format" },
+        { "straight",      "", 0, AV_OPT_TYPE_CONST, {.i64=0}, .flags = FLAGS, .unit = "alpha_format" },
+        { "premultiplied", "", 0, AV_OPT_TYPE_CONST, {.i64=1}, .flags = FLAGS, .unit = "alpha_format" },
     { NULL }
 };
 
@@ -876,5 +1055,6 @@
     .process_command = process_command,
     .inputs        = avfilter_vf_overlay_inputs,
     .outputs       = avfilter_vf_overlay_outputs,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
+                     AVFILTER_FLAG_SLICE_THREADS,
 };

diff --git a/libavfilter/vf_overlay.h b/libavfilter/vf_overlay.h
new file mode 100644
index 0000000..98b06ea
--- /dev/null
+++ b/libavfilter/vf_overlay.h

@@ -0,0 +1,86 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_OVERLAY_H
+#define AVFILTER_OVERLAY_H
+
+#include "libavutil/eval.h"
+#include "libavutil/pixdesc.h"
+#include "framesync.h"
+#include "avfilter.h"
+
+enum var_name {
+    VAR_MAIN_W,    VAR_MW,
+    VAR_MAIN_H,    VAR_MH,
+    VAR_OVERLAY_W, VAR_OW,
+    VAR_OVERLAY_H, VAR_OH,
+    VAR_HSUB,
+    VAR_VSUB,
+    VAR_X,
+    VAR_Y,
+    VAR_N,
+    VAR_POS,
+    VAR_T,
+    VAR_VARS_NB
+};
+
+enum OverlayFormat {
+    OVERLAY_FORMAT_YUV420,
+    OVERLAY_FORMAT_YUV422,
+    OVERLAY_FORMAT_YUV444,
+    OVERLAY_FORMAT_RGB,
+    OVERLAY_FORMAT_GBRP,
+    OVERLAY_FORMAT_AUTO,
+    OVERLAY_FORMAT_NB
+};
+
+typedef struct OverlayContext {
+    const AVClass *class;
+    int x, y;                   ///< position of overlaid picture
+
+    uint8_t main_is_packed_rgb;
+    uint8_t main_rgba_map[4];
+    uint8_t main_has_alpha;
+    uint8_t overlay_is_packed_rgb;
+    uint8_t overlay_rgba_map[4];
+    uint8_t overlay_has_alpha;
+    int format;                 ///< OverlayFormat
+    int alpha_format;
+    int eval_mode;              ///< EvalMode
+
+    FFFrameSync fs;
+
+    int main_pix_step[4];       ///< steps per pixel for each plane of the main output
+    int overlay_pix_step[4];    ///< steps per pixel for each plane of the overlay
+    int hsub, vsub;             ///< chroma subsampling values
+    const AVPixFmtDescriptor *main_desc; ///< format descriptor for main input
+
+    double var_values[VAR_VARS_NB];
+    char *x_expr, *y_expr;
+
+    AVExpr *x_pexpr, *y_pexpr;
+
+    int (*blend_row[4])(uint8_t *d, uint8_t *da, uint8_t *s, uint8_t *a, int w,
+                        ptrdiff_t alinesize);
+    int (*blend_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
+} OverlayContext;
+
+void ff_overlay_init_x86(OverlayContext *s, int format, int pix_format,
+                         int alpha_format, int main_has_alpha);
+
+#endif /* AVFILTER_OVERLAY_H */

diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/vf_overlay_opencl.c
new file mode 100644
index 0000000..e9c8532
--- /dev/null
+++ b/libavfilter/vf_overlay_opencl.c

@@ -0,0 +1,328 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/log.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "framesync.h"
+#include "internal.h"
+#include "opencl.h"
+#include "opencl_source.h"
+#include "video.h"
+
+typedef struct OverlayOpenCLContext {
+    OpenCLFilterContext ocf;
+
+    int              initialised;
+    cl_kernel        kernel;
+    cl_command_queue command_queue;
+
+    FFFrameSync      fs;
+
+    int              nb_planes;
+    int              x_subsample;
+    int              y_subsample;
+    int              alpha_separate;
+
+    int              x_position;
+    int              y_position;
+} OverlayOpenCLContext;
+
+static int overlay_opencl_load(AVFilterContext *avctx,
+                               enum AVPixelFormat main_format,
+                               enum AVPixelFormat overlay_format)
+{
+    OverlayOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+    const char *source = ff_opencl_source_overlay;
+    const char *kernel;
+    const AVPixFmtDescriptor *main_desc, *overlay_desc;
+    int err, i, main_planes, overlay_planes;
+
+    main_desc    = av_pix_fmt_desc_get(main_format);
+    overlay_desc = av_pix_fmt_desc_get(overlay_format);
+
+    main_planes = overlay_planes = 0;
+    for (i = 0; i < main_desc->nb_components; i++)
+        main_planes = FFMAX(main_planes,
+                            main_desc->comp[i].plane + 1);
+    for (i = 0; i < overlay_desc->nb_components; i++)
+        overlay_planes = FFMAX(overlay_planes,
+                               overlay_desc->comp[i].plane + 1);
+
+    ctx->nb_planes = main_planes;
+    ctx->x_subsample = 1 << main_desc->log2_chroma_w;
+    ctx->y_subsample = 1 << main_desc->log2_chroma_h;
+
+    if (ctx->x_position % ctx->x_subsample ||
+        ctx->y_position % ctx->y_subsample) {
+        av_log(avctx, AV_LOG_WARNING, "Warning: overlay position (%d, %d) "
+               "does not match subsampling (%d, %d).\n",
+               ctx->x_position, ctx->y_position,
+               ctx->x_subsample, ctx->y_subsample);
+    }
+
+    if (main_planes == overlay_planes) {
+        if (main_desc->nb_components == overlay_desc->nb_components)
+            kernel = "overlay_no_alpha";
+        else
+            kernel = "overlay_internal_alpha";
+        ctx->alpha_separate = 0;
+    } else {
+        kernel = "overlay_external_alpha";
+        ctx->alpha_separate = 1;
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", kernel);
+
+    err = ff_opencl_filter_load_program(avctx, &source, 1);
+    if (err < 0)
+        goto fail;
+
+    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+                                              ctx->ocf.hwctx->device_id,
+                                              0, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
+                     "command queue %d.\n", cle);
+
+    ctx->kernel = clCreateKernel(ctx->ocf.program, kernel, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
+
+    ctx->initialised = 1;
+    return 0;
+
+fail:
+    if (ctx->command_queue)
+        clReleaseCommandQueue(ctx->command_queue);
+    if (ctx->kernel)
+        clReleaseKernel(ctx->kernel);
+    return err;
+}
+
+static int overlay_opencl_blend(FFFrameSync *fs)
+{
+    AVFilterContext    *avctx = fs->parent;
+    AVFilterLink     *outlink = avctx->outputs[0];
+    OverlayOpenCLContext *ctx = avctx->priv;
+    AVFrame *input_main, *input_overlay;
+    AVFrame *output;
+    cl_mem mem;
+    cl_int cle, x, y;
+    size_t global_work[2];
+    int kernel_arg = 0;
+    int err, plane;
+
+    err = ff_framesync_get_frame(fs, 0, &input_main, 0);
+    if (err < 0)
+        return err;
+    err = ff_framesync_get_frame(fs, 1, &input_overlay, 0);
+    if (err < 0)
+        return err;
+
+    if (!ctx->initialised) {
+        AVHWFramesContext *main_fc =
+            (AVHWFramesContext*)input_main->hw_frames_ctx->data;
+        AVHWFramesContext *overlay_fc =
+            (AVHWFramesContext*)input_overlay->hw_frames_ctx->data;
+
+        err = overlay_opencl_load(avctx, main_fc->sw_format,
+                                  overlay_fc->sw_format);
+        if (err < 0)
+            return err;
+    }
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (plane = 0; plane < ctx->nb_planes; plane++) {
+        kernel_arg = 0;
+
+        mem = (cl_mem)output->data[plane];
+        CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
+        kernel_arg++;
+
+        mem = (cl_mem)input_main->data[plane];
+        CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
+        kernel_arg++;
+
+        mem = (cl_mem)input_overlay->data[plane];
+        CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
+        kernel_arg++;
+
+        if (ctx->alpha_separate) {
+            mem = (cl_mem)input_overlay->data[ctx->nb_planes];
+            CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
+            kernel_arg++;
+        }
+
+        x = ctx->x_position / (plane == 0 ? 1 : ctx->x_subsample);
+        y = ctx->y_position / (plane == 0 ? 1 : ctx->y_subsample);
+
+        CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &x);
+        kernel_arg++;
+        CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &y);
+        kernel_arg++;
+
+        if (ctx->alpha_separate) {
+            cl_int alpha_adj_x = plane == 0 ? 1 : ctx->x_subsample;
+            cl_int alpha_adj_y = plane == 0 ? 1 : ctx->y_subsample;
+
+            CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &alpha_adj_x);
+            kernel_arg++;
+            CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &alpha_adj_y);
+            kernel_arg++;
+        }
+
+        err = ff_opencl_filter_work_size_from_image(avctx, global_work,
+                                                    output, plane, 0);
+        if (err < 0)
+            goto fail;
+
+        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
+                                     global_work, NULL, 0, NULL, NULL);
+        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue overlay kernel "
+                         "for plane %d: %d.\n", plane, cle);
+    }
+
+    cle = clFinish(ctx->command_queue);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
+
+    err = av_frame_copy_props(output, input_main);
+
+    av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output->format),
+           output->width, output->height, output->pts);
+
+    return ff_filter_frame(outlink, output);
+
+fail:
+    av_frame_free(&output);
+    return err;
+}
+
+static int overlay_opencl_config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *avctx = outlink->src;
+    OverlayOpenCLContext *ctx = avctx->priv;
+    int err;
+
+    err = ff_opencl_filter_config_output(outlink);
+    if (err < 0)
+        return err;
+
+    err = ff_framesync_init_dualinput(&ctx->fs, avctx);
+    if (err < 0)
+        return err;
+
+    return ff_framesync_configure(&ctx->fs);
+}
+
+static av_cold int overlay_opencl_init(AVFilterContext *avctx)
+{
+    OverlayOpenCLContext *ctx = avctx->priv;
+
+    ctx->fs.on_event = &overlay_opencl_blend;
+
+    return ff_opencl_filter_init(avctx);
+}
+
+static int overlay_opencl_activate(AVFilterContext *avctx)
+{
+    OverlayOpenCLContext *ctx = avctx->priv;
+
+    return ff_framesync_activate(&ctx->fs);
+}
+
+static av_cold void overlay_opencl_uninit(AVFilterContext *avctx)
+{
+    OverlayOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+
+    if (ctx->kernel) {
+        cle = clReleaseKernel(ctx->kernel);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->command_queue) {
+        cle = clReleaseCommandQueue(ctx->command_queue);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "command queue: %d.\n", cle);
+    }
+
+    ff_opencl_filter_uninit(avctx);
+
+    ff_framesync_uninit(&ctx->fs);
+}
+
+#define OFFSET(x) offsetof(OverlayOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption overlay_opencl_options[] = {
+    { "x", "Overlay x position",
+      OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
+    { "y", "Overlay y position",
+      OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(overlay_opencl);
+
+static const AVFilterPad overlay_opencl_inputs[] = {
+    {
+        .name         = "main",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_opencl_filter_config_input,
+    },
+    {
+        .name         = "overlay",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_opencl_filter_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad overlay_opencl_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = &overlay_opencl_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_overlay_opencl = {
+    .name            = "overlay_opencl",
+    .description     = NULL_IF_CONFIG_SMALL("Overlay one video on top of another"),
+    .priv_size       = sizeof(OverlayOpenCLContext),
+    .priv_class      = &overlay_opencl_class,
+    .init            = &overlay_opencl_init,
+    .uninit          = &overlay_opencl_uninit,
+    .query_formats   = &ff_opencl_filter_query_formats,
+    .activate        = &overlay_opencl_activate,
+    .inputs          = overlay_opencl_inputs,
+    .outputs         = overlay_opencl_outputs,
+    .flags_internal  = FF_FILTER_FLAG_HWFRAME_AWARE,
+};

diff --git a/libavfilter/vf_overlay_qsv.c b/libavfilter/vf_overlay_qsv.c
new file mode 100644
index 0000000..2087178
--- /dev/null
+++ b/libavfilter/vf_overlay_qsv.c

@@ -0,0 +1,434 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * A hardware accelerated overlay filter based on Intel Quick Sync Video VPP
+ */
+
+#include "libavutil/opt.h"
+#include "libavutil/common.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/eval.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/avstring.h"
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mathematics.h"
+
+#include "internal.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "video.h"
+
+#include "framesync.h"
+#include "qsvvpp.h"
+
+#define MAIN    0
+#define OVERLAY 1
+
+#define OFFSET(x) offsetof(QSVOverlayContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
+
+enum var_name {
+    VAR_MAIN_iW,     VAR_MW,
+    VAR_MAIN_iH,     VAR_MH,
+    VAR_OVERLAY_iW,
+    VAR_OVERLAY_iH,
+    VAR_OVERLAY_X,  VAR_OX,
+    VAR_OVERLAY_Y,  VAR_OY,
+    VAR_OVERLAY_W,  VAR_OW,
+    VAR_OVERLAY_H,  VAR_OH,
+    VAR_VARS_NB
+};
+
+typedef struct QSVOverlayContext {
+    const AVClass      *class;
+
+    FFFrameSync fs;
+    QSVVPPContext      *qsv;
+    QSVVPPParam        qsv_param;
+    mfxExtVPPComposite comp_conf;
+    double             var_values[VAR_VARS_NB];
+
+    char     *overlay_ox, *overlay_oy, *overlay_ow, *overlay_oh;
+    uint16_t  overlay_alpha, overlay_pixel_alpha;
+
+} QSVOverlayContext;
+
+static const char *const var_names[] = {
+    "main_w",     "W",   /* input width of the main layer */
+    "main_h",     "H",   /* input height of the main layer */
+    "overlay_iw",        /* input width of the overlay layer */
+    "overlay_ih",        /* input height of the overlay layer */
+    "overlay_x",  "x",   /* x position of the overlay layer inside of main */
+    "overlay_y",  "y",   /* y position of the overlay layer inside of main */
+    "overlay_w",  "w",   /* output width of overlay layer */
+    "overlay_h",  "h",   /* output height of overlay layer */
+    NULL
+};
+
+static const AVOption overlay_qsv_options[] = {
+    { "x", "Overlay x position", OFFSET(overlay_ox), AV_OPT_TYPE_STRING, { .str="0"}, 0, 255, .flags = FLAGS},
+    { "y", "Overlay y position", OFFSET(overlay_oy), AV_OPT_TYPE_STRING, { .str="0"}, 0, 255, .flags = FLAGS},
+    { "w", "Overlay width",      OFFSET(overlay_ow), AV_OPT_TYPE_STRING, { .str="overlay_iw"}, 0, 255, .flags = FLAGS},
+    { "h", "Overlay height",     OFFSET(overlay_oh), AV_OPT_TYPE_STRING, { .str="overlay_ih*w/overlay_iw"}, 0, 255, .flags = FLAGS},
+    { "alpha", "Overlay global alpha", OFFSET(overlay_alpha), AV_OPT_TYPE_INT, { .i64 = 255}, 0, 255, .flags = FLAGS},
+    { "eof_action", "Action to take when encountering EOF from secondary input ",
+        OFFSET(fs.opt_eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT },
+        EOF_ACTION_REPEAT, EOF_ACTION_PASS, .flags = FLAGS, "eof_action" },
+        { "repeat", "Repeat the previous frame.",   0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" },
+        { "endall", "End both streams.",            0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" },
+        { "pass",   "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_PASS },   .flags = FLAGS, "eof_action" },
+    { "shortest", "force termination when the shortest input terminates", OFFSET(fs.opt_shortest), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+    { "repeatlast", "repeat overlay of the last overlay frame", OFFSET(fs.opt_repeatlast), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
+    { NULL }
+};
+
+FRAMESYNC_DEFINE_CLASS(overlay_qsv, QSVOverlayContext, fs);
+
+static int eval_expr(AVFilterContext *ctx)
+{
+    QSVOverlayContext *vpp = ctx->priv;
+    double     *var_values = vpp->var_values;
+    int                ret = 0;
+    AVExpr *ox_expr = NULL, *oy_expr = NULL;
+    AVExpr *ow_expr = NULL, *oh_expr = NULL;
+
+#define PASS_EXPR(e, s) {\
+    ret = av_expr_parse(&e, s, var_names, NULL, NULL, NULL, NULL, 0, ctx); \
+    if (ret < 0) {\
+        av_log(ctx, AV_LOG_ERROR, "Error when passing '%s'.\n", s);\
+        goto release;\
+    }\
+}
+    PASS_EXPR(ox_expr, vpp->overlay_ox);
+    PASS_EXPR(oy_expr, vpp->overlay_oy);
+    PASS_EXPR(ow_expr, vpp->overlay_ow);
+    PASS_EXPR(oh_expr, vpp->overlay_oh);
+#undef PASS_EXPR
+
+    var_values[VAR_OVERLAY_W] =
+    var_values[VAR_OW]        = av_expr_eval(ow_expr, var_values, NULL);
+    var_values[VAR_OVERLAY_H] =
+    var_values[VAR_OH]        = av_expr_eval(oh_expr, var_values, NULL);
+
+    /* calc again in case ow is relative to oh */
+    var_values[VAR_OVERLAY_W] =
+    var_values[VAR_OW]        = av_expr_eval(ow_expr, var_values, NULL);
+
+    var_values[VAR_OVERLAY_X] =
+    var_values[VAR_OX]        = av_expr_eval(ox_expr, var_values, NULL);
+    var_values[VAR_OVERLAY_Y] =
+    var_values[VAR_OY]        = av_expr_eval(oy_expr, var_values, NULL);
+
+    /* calc again in case ox is relative to oy */
+    var_values[VAR_OVERLAY_X] =
+    var_values[VAR_OX]        = av_expr_eval(ox_expr, var_values, NULL);
+
+    /* calc overlay_w and overlay_h again incase relative to ox,oy */
+    var_values[VAR_OVERLAY_W] =
+    var_values[VAR_OW]        = av_expr_eval(ow_expr, var_values, NULL);
+    var_values[VAR_OVERLAY_H] =
+    var_values[VAR_OH]        = av_expr_eval(oh_expr, var_values, NULL);
+    var_values[VAR_OVERLAY_W] =
+    var_values[VAR_OW]        = av_expr_eval(ow_expr, var_values, NULL);
+
+release:
+    av_expr_free(ox_expr);
+    av_expr_free(oy_expr);
+    av_expr_free(ow_expr);
+    av_expr_free(oh_expr);
+
+    return ret;
+}
+
+static int have_alpha_planar(AVFilterLink *link)
+{
+    enum AVPixelFormat pix_fmt;
+    const AVPixFmtDescriptor *desc;
+    AVHWFramesContext *fctx;
+
+    if (link->format == AV_PIX_FMT_QSV) {
+        fctx    = (AVHWFramesContext *)link->hw_frames_ctx->data;
+        pix_fmt = fctx->sw_format;
+    }
+
+    desc = av_pix_fmt_desc_get(pix_fmt);
+    if (!desc)
+        return 0;
+
+    return !!(desc->flags & AV_PIX_FMT_FLAG_ALPHA);
+}
+
+static int config_main_input(AVFilterLink *inlink)
+{
+    AVFilterContext      *ctx = inlink->dst;
+    QSVOverlayContext    *vpp = ctx->priv;
+    mfxVPPCompInputStream *st = &vpp->comp_conf.InputStream[0];
+
+    av_log(ctx, AV_LOG_DEBUG, "Input[%d] is of %s.\n", FF_INLINK_IDX(inlink),
+           av_get_pix_fmt_name(inlink->format));
+
+    vpp->var_values[VAR_MAIN_iW] =
+    vpp->var_values[VAR_MW]      = inlink->w;
+    vpp->var_values[VAR_MAIN_iH] =
+    vpp->var_values[VAR_MH]      = inlink->h;
+
+    st->DstX              = 0;
+    st->DstY              = 0;
+    st->DstW              = inlink->w;
+    st->DstH              = inlink->h;
+    st->GlobalAlphaEnable = 0;
+    st->PixelAlphaEnable  = 0;
+
+    return 0;
+}
+
+static int config_overlay_input(AVFilterLink *inlink)
+{
+    AVFilterContext       *ctx = inlink->dst;
+    QSVOverlayContext     *vpp = ctx->priv;
+    mfxVPPCompInputStream *st  = &vpp->comp_conf.InputStream[1];
+    int                    ret = 0;
+
+    av_log(ctx, AV_LOG_DEBUG, "Input[%d] is of %s.\n", FF_INLINK_IDX(inlink),
+           av_get_pix_fmt_name(inlink->format));
+
+    vpp->var_values[VAR_OVERLAY_iW] = inlink->w;
+    vpp->var_values[VAR_OVERLAY_iH] = inlink->h;
+
+    ret = eval_expr(ctx);
+    if (ret < 0)
+        return ret;
+
+    st->DstX              = vpp->var_values[VAR_OX];
+    st->DstY              = vpp->var_values[VAR_OY];
+    st->DstW              = vpp->var_values[VAR_OW];
+    st->DstH              = vpp->var_values[VAR_OH];
+    st->GlobalAlpha       = vpp->overlay_alpha;
+    st->GlobalAlphaEnable = (st->GlobalAlpha < 255);
+    st->PixelAlphaEnable  = have_alpha_planar(inlink);
+
+    return 0;
+}
+
+static int process_frame(FFFrameSync *fs)
+{
+    AVFilterContext  *ctx = fs->parent;
+    QSVOverlayContext  *s = fs->opaque;
+    AVFrame        *frame = NULL;
+    int               ret = 0, i;
+
+    for (i = 0; i < ctx->nb_inputs; i++) {
+        ret = ff_framesync_get_frame(fs, i, &frame, 0);
+        if (ret == 0)
+            ret = ff_qsvvpp_filter_frame(s->qsv, ctx->inputs[i], frame);
+        if (ret < 0 && ret != AVERROR(EAGAIN))
+            break;
+    }
+
+    return ret;
+}
+
+static int init_framesync(AVFilterContext *ctx)
+{
+    QSVOverlayContext *s = ctx->priv;
+    int ret, i;
+
+    s->fs.on_event = process_frame;
+    s->fs.opaque   = s;
+    ret = ff_framesync_init(&s->fs, ctx, ctx->nb_inputs);
+    if (ret < 0)
+        return ret;
+
+    for (i = 0; i < ctx->nb_inputs; i++) {
+        FFFrameSyncIn *in = &s->fs.in[i];
+        in->before    = EXT_STOP;
+        in->after     = EXT_INFINITY;
+        in->sync      = i ? 1 : 2;
+        in->time_base = ctx->inputs[i]->time_base;
+    }
+
+    return ff_framesync_configure(&s->fs);
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext   *ctx = outlink->src;
+    QSVOverlayContext *vpp = ctx->priv;
+    AVFilterLink      *in0 = ctx->inputs[0];
+    AVFilterLink      *in1 = ctx->inputs[1];
+    int ret;
+
+    av_log(ctx, AV_LOG_DEBUG, "Output is of %s.\n", av_get_pix_fmt_name(outlink->format));
+    if ((in0->format == AV_PIX_FMT_QSV && in1->format != AV_PIX_FMT_QSV) ||
+        (in0->format != AV_PIX_FMT_QSV && in1->format == AV_PIX_FMT_QSV)) {
+        av_log(ctx, AV_LOG_ERROR, "Mixing hardware and software pixel formats is not supported.\n");
+        return AVERROR(EINVAL);
+    } else if (in0->format == AV_PIX_FMT_QSV) {
+        AVHWFramesContext *hw_frame0 = (AVHWFramesContext *)in0->hw_frames_ctx->data;
+        AVHWFramesContext *hw_frame1 = (AVHWFramesContext *)in1->hw_frames_ctx->data;
+
+        if (hw_frame0->device_ctx != hw_frame1->device_ctx) {
+            av_log(ctx, AV_LOG_ERROR, "Inputs with different underlying QSV devices are forbidden.\n");
+            return AVERROR(EINVAL);
+        }
+    }
+
+    outlink->w          = vpp->var_values[VAR_MW];
+    outlink->h          = vpp->var_values[VAR_MH];
+    outlink->frame_rate = in0->frame_rate;
+    outlink->time_base  = av_inv_q(outlink->frame_rate);
+
+    ret = init_framesync(ctx);
+    if (ret < 0)
+        return ret;
+
+    return ff_qsvvpp_create(ctx, &vpp->qsv, &vpp->qsv_param);
+}
+
+/*
+ * Callback for qsvvpp
+ * @Note: qsvvpp composition does not generate PTS for result frame.
+ *        so we assign the PTS from framesync to the output frame.
+ */
+
+static int filter_callback(AVFilterLink *outlink, AVFrame *frame)
+{
+    QSVOverlayContext *s = outlink->src->priv;
+    frame->pts = av_rescale_q(s->fs.pts,
+                              s->fs.time_base, outlink->time_base);
+    return ff_filter_frame(outlink, frame);
+}
+
+
+static int overlay_qsv_init(AVFilterContext *ctx)
+{
+    QSVOverlayContext *vpp = ctx->priv;
+
+    /* fill composite config */
+    vpp->comp_conf.Header.BufferId = MFX_EXTBUFF_VPP_COMPOSITE;
+    vpp->comp_conf.Header.BufferSz = sizeof(vpp->comp_conf);
+    vpp->comp_conf.NumInputStream  = ctx->nb_inputs;
+    vpp->comp_conf.InputStream     = av_mallocz_array(ctx->nb_inputs,
+                                                      sizeof(*vpp->comp_conf.InputStream));
+    if (!vpp->comp_conf.InputStream)
+        return AVERROR(ENOMEM);
+
+    /* initialize QSVVPP params */
+    vpp->qsv_param.filter_frame = filter_callback;
+    vpp->qsv_param.ext_buf      = av_mallocz(sizeof(*vpp->qsv_param.ext_buf));
+    if (!vpp->qsv_param.ext_buf)
+        return AVERROR(ENOMEM);
+
+    vpp->qsv_param.ext_buf[0]    = (mfxExtBuffer *)&vpp->comp_conf;
+    vpp->qsv_param.num_ext_buf   = 1;
+    vpp->qsv_param.out_sw_format = AV_PIX_FMT_NV12;
+    vpp->qsv_param.num_crop      = 0;
+
+    return 0;
+}
+
+static void overlay_qsv_uninit(AVFilterContext *ctx)
+{
+    QSVOverlayContext *vpp = ctx->priv;
+
+    ff_qsvvpp_free(&vpp->qsv);
+    ff_framesync_uninit(&vpp->fs);
+    av_freep(&vpp->comp_conf.InputStream);
+    av_freep(&vpp->qsv_param.ext_buf);
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    QSVOverlayContext *s = ctx->priv;
+    return ff_framesync_activate(&s->fs);
+}
+
+static int overlay_qsv_query_formats(AVFilterContext *ctx)
+{
+    int i;
+    int ret;
+
+    static const enum AVPixelFormat main_in_fmts[] = {
+        AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_NV12,
+        AV_PIX_FMT_YUYV422,
+        AV_PIX_FMT_RGB32,
+        AV_PIX_FMT_QSV,
+        AV_PIX_FMT_NONE
+    };
+    static const enum AVPixelFormat out_pix_fmts[] = {
+        AV_PIX_FMT_NV12,
+        AV_PIX_FMT_QSV,
+        AV_PIX_FMT_NONE
+    };
+
+    for (i = 0; i < ctx->nb_inputs; i++) {
+        ret = ff_formats_ref(ff_make_format_list(main_in_fmts), &ctx->inputs[i]->out_formats);
+        if (ret < 0)
+            return ret;
+    }
+
+    ret = ff_formats_ref(ff_make_format_list(out_pix_fmts), &ctx->outputs[0]->in_formats);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static const AVFilterPad overlay_qsv_inputs[] = {
+    {
+        .name          = "main",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_main_input,
+        .needs_fifo    = 1,
+    },
+    {
+        .name          = "overlay",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_overlay_input,
+        .needs_fifo    = 1,
+    },
+    { NULL }
+};
+
+static const AVFilterPad overlay_qsv_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_overlay_qsv = {
+    .name           = "overlay_qsv",
+    .description    = NULL_IF_CONFIG_SMALL("Quick Sync Video overlay."),
+    .priv_size      = sizeof(QSVOverlayContext),
+    .query_formats  = overlay_qsv_query_formats,
+    .preinit        = overlay_qsv_framesync_preinit,
+    .init           = overlay_qsv_init,
+    .uninit         = overlay_qsv_uninit,
+    .activate       = activate,
+    .inputs         = overlay_qsv_inputs,
+    .outputs        = overlay_qsv_outputs,
+    .priv_class     = &overlay_qsv_class,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};

diff --git a/libavfilter/vf_palettegen.c b/libavfilter/vf_palettegen.c
index 03de317..5ff73e6 100644
--- a/libavfilter/vf_palettegen.c
+++ b/libavfilter/vf_palettegen.c

@@ -27,6 +27,7 @@
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/qsort.h"
+#include "libavutil/intreadwrite.h"
 #include "avfilter.h"
 #include "internal.h"
 
@@ -74,6 +75,7 @@
     struct range_box boxes[256];            // define the segmentation of the colorspace (the final palette)
     int nb_boxes;                           // number of boxes (increase will segmenting them)
     int palette_pushed;                     // if the palette frame is pushed into the outlink or not
+    uint8_t transparency_color[4];          // background color for transparency
 } PaletteGenContext;
 
 #define OFFSET(x) offsetof(PaletteGenContext, x)
@@ -81,6 +83,7 @@
 static const AVOption palettegen_options[] = {
     { "max_colors", "set the maximum number of colors to use in the palette", OFFSET(max_colors), AV_OPT_TYPE_INT, {.i64=256}, 4, 256, FLAGS },
     { "reserve_transparent", "reserve a palette entry for transparency", OFFSET(reserve_transparent), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
+    { "transparency_color", "set a background color for transparency", OFFSET(transparency_color), AV_OPT_TYPE_COLOR, {.str="lime"}, CHAR_MIN, CHAR_MAX, FLAGS },
     { "stats_mode", "set statistics mode", OFFSET(stats_mode), AV_OPT_TYPE_INT, {.i64=STATS_MODE_ALL_FRAMES}, 0, NB_STATS_MODE-1, FLAGS, "mode" },
         { "full", "compute full frame histograms", 0, AV_OPT_TYPE_CONST, {.i64=STATS_MODE_ALL_FRAMES}, INT_MIN, INT_MAX, FLAGS, "mode" },
         { "diff", "compute histograms only for the part that differs from previous frame", 0, AV_OPT_TYPE_CONST, {.i64=STATS_MODE_DIFF_FRAMES}, INT_MIN, INT_MAX, FLAGS, "mode" },
@@ -250,7 +253,7 @@
 
     if (s->reserve_transparent) {
         av_assert0(s->nb_boxes < 256);
-        pal[out->width - pal_linesize - 1] = 0x0000ff00; // add a green transparent color
+        pal[out->width - pal_linesize - 1] = AV_RB32(&s->transparency_color) >> 8;
     }
 }
 

diff --git a/libavfilter/vf_paletteuse.c b/libavfilter/vf_paletteuse.c
index 79a0672..604a8af 100644
--- a/libavfilter/vf_paletteuse.c
+++ b/libavfilter/vf_paletteuse.c

@@ -56,7 +56,7 @@
 };
 
 struct color_node {
-    uint8_t val[3];
+    uint8_t val[4];
     uint8_t palette_id;
     int split;
     int left_id, right_id;
@@ -86,6 +86,8 @@
     struct cache_node cache[CACHE_SIZE];    /* lookup cache */
     struct color_node map[AVPALETTE_COUNT]; /* 3D-Tree (KD-Tree with K=3) for reverse colormap */
     uint32_t palette[AVPALETTE_COUNT];
+    int transparency_index; /* index in the palette of transparency. -1 if there is no transparency in the palette. */
+    int trans_thresh;
     int palette_loaded;
     int dither;
     int new;
@@ -116,6 +118,8 @@
     { "bayer_scale", "set scale for bayer dithering", OFFSET(bayer_scale), AV_OPT_TYPE_INT, {.i64=2}, 0, 5, FLAGS },
     { "diff_mode",   "set frame difference mode",     OFFSET(diff_mode),   AV_OPT_TYPE_INT, {.i64=DIFF_MODE_NONE}, 0, NB_DIFF_MODE-1, FLAGS, "diff_mode" },
         { "rectangle", "process smallest different rectangle", 0, AV_OPT_TYPE_CONST, {.i64=DIFF_MODE_RECTANGLE}, INT_MIN, INT_MAX, FLAGS, "diff_mode" },
+    { "new", "take new palette for each output frame", OFFSET(new), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
+    { "alpha_threshold", "set the alpha threshold for transparency", OFFSET(trans_thresh), AV_OPT_TYPE_INT, {.i64=128}, 0, 255 },
 
     /* following are the debug options, not part of the official API */
     { "debug_kdtree", "save Graphviz graph of the kdtree in specified file", OFFSET(dot_filename), AV_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX, FLAGS },
@@ -125,7 +129,6 @@
         { "bruteforce",    "brute-force into the palette", 0, AV_OPT_TYPE_CONST, {.i64=COLOR_SEARCH_BRUTEFORCE},    INT_MIN, INT_MAX, FLAGS, "search" },
     { "mean_err", "compute and print mean error", OFFSET(calc_mean_err), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
     { "debug_accuracy", "test color search accuracy", OFFSET(debug_accuracy), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
-    { "new", "take new palette for each output frame", OFFSET(new), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
     { NULL }
 };
 
@@ -157,34 +160,43 @@
 
 static av_always_inline int dither_color(uint32_t px, int er, int eg, int eb, int scale, int shift)
 {
-    return av_clip_uint8((px >> 16 & 0xff) + ((er * scale) / (1<<shift))) << 16
+    return av_clip_uint8( px >> 24                                      ) << 24
+         | av_clip_uint8((px >> 16 & 0xff) + ((er * scale) / (1<<shift))) << 16
          | av_clip_uint8((px >>  8 & 0xff) + ((eg * scale) / (1<<shift))) <<  8
          | av_clip_uint8((px       & 0xff) + ((eb * scale) / (1<<shift)));
 }
 
-static av_always_inline int diff(const uint8_t *c1, const uint8_t *c2)
+static av_always_inline int diff(const uint8_t *c1, const uint8_t *c2, const int trans_thresh)
 {
     // XXX: try L*a*b with CIE76 (dL*dL + da*da + db*db)
-    const int dr = c1[0] - c2[0];
-    const int dg = c1[1] - c2[1];
-    const int db = c1[2] - c2[2];
-    return dr*dr + dg*dg + db*db;
+    const int dr = c1[1] - c2[1];
+    const int dg = c1[2] - c2[2];
+    const int db = c1[3] - c2[3];
+
+    if (c1[0] < trans_thresh && c2[0] < trans_thresh) {
+        return 0;
+    } else if (c1[0] >= trans_thresh && c2[0] >= trans_thresh) {
+        return dr*dr + dg*dg + db*db;
+    } else {
+        return 255*255 + 255*255 + 255*255;
+    }
 }
 
-static av_always_inline uint8_t colormap_nearest_bruteforce(const uint32_t *palette, const uint8_t *rgb)
+static av_always_inline uint8_t colormap_nearest_bruteforce(const uint32_t *palette, const uint8_t *argb, const int trans_thresh)
 {
     int i, pal_id = -1, min_dist = INT_MAX;
 
     for (i = 0; i < AVPALETTE_COUNT; i++) {
         const uint32_t c = palette[i];
 
-        if ((c & 0xff000000) == 0xff000000) { // ignore transparent entry
-            const uint8_t palrgb[] = {
+        if (c >> 24 >= trans_thresh) { // ignore transparent entry
+            const uint8_t palargb[] = {
+                palette[i]>>24 & 0xff,
                 palette[i]>>16 & 0xff,
                 palette[i]>> 8 & 0xff,
                 palette[i]     & 0xff,
             };
-            const int d = diff(palrgb, rgb);
+            const int d = diff(palargb, argb, trans_thresh);
             if (d < min_dist) {
                 pal_id = i;
                 min_dist = d;
@@ -203,13 +215,14 @@
 static void colormap_nearest_node(const struct color_node *map,
                                   const int node_pos,
                                   const uint8_t *target,
+                                  const int trans_thresh,
                                   struct nearest_color *nearest)
 {
     const struct color_node *kd = map + node_pos;
     const int s = kd->split;
     int dx, nearer_kd_id, further_kd_id;
     const uint8_t *current = kd->val;
-    const int current_to_target = diff(target, current);
+    const int current_to_target = diff(target, current, trans_thresh);
 
     if (current_to_target < nearest->dist_sqd) {
         nearest->node_pos = node_pos;
@@ -223,17 +236,17 @@
         else         nearer_kd_id = kd->right_id, further_kd_id = kd->left_id;
 
         if (nearer_kd_id != -1)
-            colormap_nearest_node(map, nearer_kd_id, target, nearest);
+            colormap_nearest_node(map, nearer_kd_id, target, trans_thresh, nearest);
 
         if (further_kd_id != -1 && dx*dx < nearest->dist_sqd)
-            colormap_nearest_node(map, further_kd_id, target, nearest);
+            colormap_nearest_node(map, further_kd_id, target, trans_thresh, nearest);
     }
 }
 
-static av_always_inline uint8_t colormap_nearest_recursive(const struct color_node *node, const uint8_t *rgb)
+static av_always_inline uint8_t colormap_nearest_recursive(const struct color_node *node, const uint8_t *rgb, const int trans_thresh)
 {
     struct nearest_color res = {.dist_sqd = INT_MAX, .node_pos = -1};
-    colormap_nearest_node(node, 0, rgb, &res);
+    colormap_nearest_node(node, 0, rgb, trans_thresh, &res);
     return node[res.node_pos].palette_id;
 }
 
@@ -242,7 +255,7 @@
     int dx2;
 };
 
-static av_always_inline uint8_t colormap_nearest_iterative(const struct color_node *root, const uint8_t *target)
+static av_always_inline uint8_t colormap_nearest_iterative(const struct color_node *root, const uint8_t *target, const int trans_thresh)
 {
     int pos = 0, best_node_id = -1, best_dist = INT_MAX, cur_color_id = 0;
     struct stack_node nodes[16];
@@ -252,7 +265,7 @@
 
         const struct color_node *kd = &root[cur_color_id];
         const uint8_t *current = kd->val;
-        const int current_to_target = diff(target, current);
+        const int current_to_target = diff(target, current, trans_thresh);
 
         /* Compare current color node to the target and update our best node if
          * it's actually better. */
@@ -314,32 +327,35 @@
     return root[best_node_id].palette_id;
 }
 
-#define COLORMAP_NEAREST(search, palette, root, target)                                    \
-    search == COLOR_SEARCH_NNS_ITERATIVE ? colormap_nearest_iterative(root, target) :      \
-    search == COLOR_SEARCH_NNS_RECURSIVE ? colormap_nearest_recursive(root, target) :      \
-                                           colormap_nearest_bruteforce(palette, target)
+#define COLORMAP_NEAREST(search, palette, root, target, trans_thresh)                                    \
+    search == COLOR_SEARCH_NNS_ITERATIVE ? colormap_nearest_iterative(root, target, trans_thresh) :      \
+    search == COLOR_SEARCH_NNS_RECURSIVE ? colormap_nearest_recursive(root, target, trans_thresh) :      \
+                                           colormap_nearest_bruteforce(palette, target, trans_thresh)
 
 /**
  * Check if the requested color is in the cache already. If not, find it in the
  * color tree and cache it.
- * Note: r, g, and b are the component of c but are passed as well to avoid
+ * Note: a, r, g, and b are the components of color, but are passed as well to avoid
  * recomputing them (they are generally computed by the caller for other uses).
  */
-static av_always_inline int color_get(struct cache_node *cache, uint32_t color,
-                                      uint8_t r, uint8_t g, uint8_t b,
-                                      const struct color_node *map,
-                                      const uint32_t *palette,
+static av_always_inline int color_get(PaletteUseContext *s, uint32_t color,
+                                      uint8_t a, uint8_t r, uint8_t g, uint8_t b,
                                       const enum color_search_method search_method)
 {
     int i;
-    const uint8_t rgb[] = {r, g, b};
+    const uint8_t argb_elts[] = {a, r, g, b};
     const uint8_t rhash = r & ((1<<NBITS)-1);
     const uint8_t ghash = g & ((1<<NBITS)-1);
     const uint8_t bhash = b & ((1<<NBITS)-1);
     const unsigned hash = rhash<<(NBITS*2) | ghash<<NBITS | bhash;
-    struct cache_node *node = &cache[hash];
+    struct cache_node *node = &s->cache[hash];
     struct cached_color *e;
 
+    // first, check for transparency
+    if (a < s->trans_thresh && s->transparency_index >= 0) {
+        return s->transparency_index;
+    }
+
     for (i = 0; i < node->nb_entries; i++) {
         e = &node->entries[i];
         if (e->color == color)
@@ -351,21 +367,24 @@
     if (!e)
         return AVERROR(ENOMEM);
     e->color = color;
-    e->pal_entry = COLORMAP_NEAREST(search_method, palette, map, rgb);
+    e->pal_entry = COLORMAP_NEAREST(search_method, s->palette, s->map, argb_elts, s->trans_thresh);
+
     return e->pal_entry;
 }
 
-static av_always_inline int get_dst_color_err(struct cache_node *cache,
-                                              uint32_t c, const struct color_node *map,
-                                              const uint32_t *palette,
-                                              int *er, int *eg, int *eb,
+static av_always_inline int get_dst_color_err(PaletteUseContext *s,
+                                              uint32_t c, int *er, int *eg, int *eb,
                                               const enum color_search_method search_method)
 {
+    const uint8_t a = c >> 24 & 0xff;
     const uint8_t r = c >> 16 & 0xff;
     const uint8_t g = c >>  8 & 0xff;
     const uint8_t b = c       & 0xff;
-    const int dstx = color_get(cache, c, r, g, b, map, palette, search_method);
-    const uint32_t dstc = palette[dstx];
+    uint32_t dstc;
+    const int dstx = color_get(s, c, a, r, g, b, search_method);
+    if (dstx < 0)
+        return dstx;
+    dstc = s->palette[dstx];
     *er = r - (dstc >> 16 & 0xff);
     *eg = g - (dstc >>  8 & 0xff);
     *eb = b - (dstc       & 0xff);
@@ -378,9 +397,6 @@
                                       const enum color_search_method search_method)
 {
     int x, y;
-    const struct color_node *map = s->map;
-    struct cache_node *cache = s->cache;
-    const uint32_t *palette = s->palette;
     const int src_linesize = in ->linesize[0] >> 2;
     const int dst_linesize = out->linesize[0];
     uint32_t *src = ((uint32_t *)in ->data[0]) + y_start*src_linesize;
@@ -395,14 +411,14 @@
 
             if (dither == DITHERING_BAYER) {
                 const int d = s->ordered_dither[(y & 7)<<3 | (x & 7)];
+                const uint8_t a8 = src[x] >> 24 & 0xff;
                 const uint8_t r8 = src[x] >> 16 & 0xff;
                 const uint8_t g8 = src[x] >>  8 & 0xff;
                 const uint8_t b8 = src[x]       & 0xff;
                 const uint8_t r = av_clip_uint8(r8 + d);
                 const uint8_t g = av_clip_uint8(g8 + d);
                 const uint8_t b = av_clip_uint8(b8 + d);
-                const uint32_t c = r<<16 | g<<8 | b;
-                const int color = color_get(cache, c, r, g, b, map, palette, search_method);
+                const int color = color_get(s, src[x], a8, r, g, b, search_method);
 
                 if (color < 0)
                     return color;
@@ -410,7 +426,7 @@
 
             } else if (dither == DITHERING_HECKBERT) {
                 const int right = x < w - 1, down = y < h - 1;
-                const int color = get_dst_color_err(cache, src[x], map, palette, &er, &eg, &eb, search_method);
+                const int color = get_dst_color_err(s, src[x], &er, &eg, &eb, search_method);
 
                 if (color < 0)
                     return color;
@@ -422,7 +438,7 @@
 
             } else if (dither == DITHERING_FLOYD_STEINBERG) {
                 const int right = x < w - 1, down = y < h - 1, left = x > x_start;
-                const int color = get_dst_color_err(cache, src[x], map, palette, &er, &eg, &eb, search_method);
+                const int color = get_dst_color_err(s, src[x], &er, &eg, &eb, search_method);
 
                 if (color < 0)
                     return color;
@@ -436,7 +452,7 @@
             } else if (dither == DITHERING_SIERRA2) {
                 const int right  = x < w - 1, down  = y < h - 1, left  = x > x_start;
                 const int right2 = x < w - 2,                    left2 = x > x_start + 1;
-                const int color = get_dst_color_err(cache, src[x], map, palette, &er, &eg, &eb, search_method);
+                const int color = get_dst_color_err(s, src[x], &er, &eg, &eb, search_method);
 
                 if (color < 0)
                     return color;
@@ -455,7 +471,7 @@
 
             } else if (dither == DITHERING_SIERRA2_4A) {
                 const int right = x < w - 1, down = y < h - 1, left = x > x_start;
-                const int color = get_dst_color_err(cache, src[x], map, palette, &er, &eg, &eb, search_method);
+                const int color = get_dst_color_err(s, src[x], &er, &eg, &eb, search_method);
 
                 if (color < 0)
                     return color;
@@ -466,10 +482,11 @@
                 if (         down) src[src_linesize + x    ] = dither_color(src[src_linesize + x    ], er, eg, eb, 1, 2);
 
             } else {
+                const uint8_t a = src[x] >> 24 & 0xff;
                 const uint8_t r = src[x] >> 16 & 0xff;
                 const uint8_t g = src[x] >>  8 & 0xff;
                 const uint8_t b = src[x]       & 0xff;
-                const int color = color_get(cache, src[x] & 0xffffff, r, g, b, map, palette, search_method);
+                const int color = color_get(s, src[x], a, r, g, b, search_method);
 
                 if (color < 0)
                     return color;
@@ -489,19 +506,20 @@
                       int depth)
 {
     const struct color_node *node = &map[node_id];
-    const uint32_t fontcolor = node->val[0] > 0x50 &&
-                               node->val[1] > 0x50 &&
-                               node->val[2] > 0x50 ? 0 : 0xffffff;
+    const uint32_t fontcolor = node->val[1] > 0x50 &&
+                               node->val[2] > 0x50 &&
+                               node->val[3] > 0x50 ? 0 : 0xffffff;
+    const int rgb_comp = node->split - 1;
     av_bprintf(buf, "%*cnode%d ["
                "label=\"%c%02X%c%02X%c%02X%c\" "
                "fillcolor=\"#%02x%02x%02x\" "
                "fontcolor=\"#%06"PRIX32"\"]\n",
                depth*INDENT, ' ', node->palette_id,
-               "[  "[node->split], node->val[0],
-               "][ "[node->split], node->val[1],
-               " ]["[node->split], node->val[2],
-               "  ]"[node->split],
-               node->val[0], node->val[1], node->val[2],
+               "[  "[rgb_comp], node->val[1],
+               "][ "[rgb_comp], node->val[2],
+               " ]["[rgb_comp], node->val[3],
+               "  ]"[rgb_comp],
+               node->val[1], node->val[2], node->val[3],
                fontcolor);
     if (parent_id != -1)
         av_bprintf(buf, "%*cnode%d -> node%d\n", depth*INDENT, ' ',
@@ -536,7 +554,7 @@
     return 0;
 }
 
-static int debug_accuracy(const struct color_node *node, const uint32_t *palette,
+static int debug_accuracy(const struct color_node *node, const uint32_t *palette, const int trans_thresh,
                           const enum color_search_method search_method)
 {
     int r, g, b, ret = 0;
@@ -544,16 +562,16 @@
     for (r = 0; r < 256; r++) {
         for (g = 0; g < 256; g++) {
             for (b = 0; b < 256; b++) {
-                const uint8_t rgb[] = {r, g, b};
-                const int r1 = COLORMAP_NEAREST(search_method, palette, node, rgb);
-                const int r2 = colormap_nearest_bruteforce(palette, rgb);
+                const uint8_t argb[] = {0xff, r, g, b};
+                const int r1 = COLORMAP_NEAREST(search_method, palette, node, argb, trans_thresh);
+                const int r2 = colormap_nearest_bruteforce(palette, argb, trans_thresh);
                 if (r1 != r2) {
                     const uint32_t c1 = palette[r1];
                     const uint32_t c2 = palette[r2];
-                    const uint8_t palrgb1[] = { c1>>16 & 0xff, c1>> 8 & 0xff, c1 & 0xff };
-                    const uint8_t palrgb2[] = { c2>>16 & 0xff, c2>> 8 & 0xff, c2 & 0xff };
-                    const int d1 = diff(palrgb1, rgb);
-                    const int d2 = diff(palrgb2, rgb);
+                    const uint8_t palargb1[] = { 0xff, c1>>16 & 0xff, c1>> 8 & 0xff, c1 & 0xff };
+                    const uint8_t palargb2[] = { 0xff, c2>>16 & 0xff, c2>> 8 & 0xff, c2 & 0xff };
+                    const int d1 = diff(palargb1, argb, trans_thresh);
+                    const int d2 = diff(palargb2, argb, trans_thresh);
                     if (d1 != d2) {
                         av_log(NULL, AV_LOG_ERROR,
                                "/!\\ %02X%02X%02X: %d ! %d (%06"PRIX32" ! %06"PRIX32") / dist: %d ! %d\n",
@@ -584,17 +602,19 @@
 {                                                       \
     const struct color *a = pa;                         \
     const struct color *b = pb;                         \
-    return   (a->value >> (8 * (2 - (pos))) & 0xff)     \
-           - (b->value >> (8 * (2 - (pos))) & 0xff);    \
+    return   (a->value >> (8 * (3 - (pos))) & 0xff)     \
+           - (b->value >> (8 * (3 - (pos))) & 0xff);    \
 }
 
-DECLARE_CMP_FUNC(r, 0)
-DECLARE_CMP_FUNC(g, 1)
-DECLARE_CMP_FUNC(b, 2)
+DECLARE_CMP_FUNC(a, 0)
+DECLARE_CMP_FUNC(r, 1)
+DECLARE_CMP_FUNC(g, 2)
+DECLARE_CMP_FUNC(b, 3)
 
-static const cmp_func cmp_funcs[] = {cmp_r, cmp_g, cmp_b};
+static const cmp_func cmp_funcs[] = {cmp_a, cmp_r, cmp_g, cmp_b};
 
 static int get_next_color(const uint8_t *color_used, const uint32_t *palette,
+                          const int trans_thresh,
                           int *component, const struct color_rect *box)
 {
     int wr, wg, wb;
@@ -609,11 +629,16 @@
 
     for (i = 0; i < AVPALETTE_COUNT; i++) {
         const uint32_t c = palette[i];
+        const uint8_t a = c >> 24 & 0xff;
         const uint8_t r = c >> 16 & 0xff;
         const uint8_t g = c >>  8 & 0xff;
         const uint8_t b = c       & 0xff;
 
-        if (color_used[i] ||
+        if (a < trans_thresh) {
+            continue;
+        }
+
+        if (color_used[i] || (a != 0xff) ||
             r < box->min[0] || g < box->min[1] || b < box->min[2] ||
             r > box->max[0] || g > box->max[1] || b > box->max[2])
             continue;
@@ -639,9 +664,9 @@
     wr = ranges.max[0] - ranges.min[0];
     wg = ranges.max[1] - ranges.min[1];
     wb = ranges.max[2] - ranges.min[2];
-    if (wr >= wg && wr >= wb) longest = 0;
-    if (wg >= wr && wg >= wb) longest = 1;
-    if (wb >= wr && wb >= wg) longest = 2;
+    if (wr >= wg && wr >= wb) longest = 1;
+    if (wg >= wr && wg >= wb) longest = 2;
+    if (wb >= wr && wb >= wg) longest = 3;
     cmpf = cmp_funcs[longest];
     *component = longest;
 
@@ -655,6 +680,7 @@
                            uint8_t *color_used,
                            int *nb_used,
                            const uint32_t *palette,
+                           const int trans_thresh,
                            const struct color_rect *box)
 {
     uint32_t c;
@@ -662,7 +688,7 @@
     int node_left_id = -1, node_right_id = -1;
     struct color_node *node;
     struct color_rect box1, box2;
-    const int pal_id = get_next_color(color_used, palette, &component, box);
+    const int pal_id = get_next_color(color_used, palette, trans_thresh, &component, box);
 
     if (pal_id < 0)
         return -1;
@@ -673,21 +699,22 @@
     node = &map[cur_id];
     node->split = component;
     node->palette_id = pal_id;
-    node->val[0] = c>>16 & 0xff;
-    node->val[1] = c>> 8 & 0xff;
-    node->val[2] = c     & 0xff;
+    node->val[0] = c>>24 & 0xff;
+    node->val[1] = c>>16 & 0xff;
+    node->val[2] = c>> 8 & 0xff;
+    node->val[3] = c     & 0xff;
 
     color_used[pal_id] = 1;
 
     /* get the two boxes this node creates */
     box1 = box2 = *box;
-    box1.max[component] = node->val[component];
-    box2.min[component] = node->val[component] + 1;
+    box1.max[component-1] = node->val[component];
+    box2.min[component-1] = node->val[component] + 1;
 
-    node_left_id = colormap_insert(map, color_used, nb_used, palette, &box1);
+    node_left_id = colormap_insert(map, color_used, nb_used, palette, trans_thresh, &box1);
 
-    if (box2.min[component] <= box2.max[component])
-        node_right_id = colormap_insert(map, color_used, nb_used, palette, &box2);
+    if (box2.min[component-1] <= box2.max[component-1])
+        node_right_id = colormap_insert(map, color_used, nb_used, palette, trans_thresh, &box2);
 
     node->left_id  = node_left_id;
     node->right_id = node_right_id;
@@ -711,6 +738,16 @@
 
     /* disable transparent colors and dups */
     qsort(s->palette, AVPALETTE_COUNT, sizeof(*s->palette), cmp_pal_entry);
+    // update transparency index:
+    if (s->transparency_index >= 0) {
+        for (i = 0; i < AVPALETTE_COUNT; i++) {
+            if ((s->palette[i]>>24 & 0xff) == 0) {
+                s->transparency_index = i; // we are assuming at most one transparent color in palette
+                break;
+            }
+        }
+    }
+
     for (i = 0; i < AVPALETTE_COUNT; i++) {
         const uint32_t c = s->palette[i];
         if (i != 0 && c == last_color) {
@@ -718,7 +755,7 @@
             continue;
         }
         last_color = c;
-        if ((c & 0xff000000) != 0xff000000) {
+        if (c >> 24 < s->trans_thresh) {
             color_used[i] = 1; // ignore transparent color(s)
             continue;
         }
@@ -727,13 +764,13 @@
     box.min[0] = box.min[1] = box.min[2] = 0x00;
     box.max[0] = box.max[1] = box.max[2] = 0xff;
 
-    colormap_insert(s->map, color_used, &nb_used, s->palette, &box);
+    colormap_insert(s->map, color_used, &nb_used, s->palette, s->trans_thresh, &box);
 
     if (s->dot_filename)
         disp_tree(s->map, s->dot_filename);
 
     if (s->debug_accuracy) {
-        if (!debug_accuracy(s->map, s->palette, s->color_search_method))
+        if (!debug_accuracy(s->map, s->palette, s->trans_thresh, s->color_search_method))
             av_log(NULL, AV_LOG_INFO, "Accuracy check passed\n");
     }
 }
@@ -754,9 +791,9 @@
         for (x = 0; x < in1->width; x++) {
             const uint32_t c1 = src1[x];
             const uint32_t c2 = palette[src2[x]];
-            const uint8_t rgb1[] = {c1 >> 16 & 0xff, c1 >> 8 & 0xff, c1 & 0xff};
-            const uint8_t rgb2[] = {c2 >> 16 & 0xff, c2 >> 8 & 0xff, c2 & 0xff};
-            mean_err += diff(rgb1, rgb2);
+            const uint8_t argb1[] = {0xff, c1 >> 16 & 0xff, c1 >> 8 & 0xff, c1 & 0xff};
+            const uint8_t argb2[] = {0xff, c2 >> 16 & 0xff, c2 >> 8 & 0xff, c2 & 0xff};
+            mean_err += diff(argb1, argb2, s->trans_thresh);
         }
         src1 += src1_linesize;
         src2 += src2_linesize;
@@ -857,9 +894,9 @@
     *hp = height;
 }
 
-static AVFrame *apply_palette(AVFilterLink *inlink, AVFrame *in)
+static int apply_palette(AVFilterLink *inlink, AVFrame *in, AVFrame **outf)
 {
-    int x, y, w, h;
+    int x, y, w, h, ret;
     AVFilterContext *ctx = inlink->dst;
     PaletteUseContext *s = ctx->priv;
     AVFilterLink *outlink = inlink->dst->outputs[0];
@@ -867,7 +904,8 @@
     AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     if (!out) {
         av_frame_free(&in);
-        return NULL;
+        *outf = NULL;
+        return AVERROR(ENOMEM);
     }
     av_frame_copy_props(out, in);
 
@@ -881,21 +919,25 @@
         av_frame_make_writable(s->last_in) < 0) {
         av_frame_free(&in);
         av_frame_free(&out);
-        return NULL;
+        *outf = NULL;
+        return AVERROR(ENOMEM);
     }
 
     ff_dlog(ctx, "%dx%d rect: (%d;%d) -> (%d,%d) [area:%dx%d]\n",
             w, h, x, y, x+w, y+h, in->width, in->height);
 
-    if (s->set_frame(s, out, in, x, y, w, h) < 0) {
+    ret = s->set_frame(s, out, in, x, y, w, h);
+    if (ret < 0) {
         av_frame_free(&out);
-        return NULL;
+        *outf = NULL;
+        return ret;
     }
     memcpy(out->data[1], s->palette, AVPALETTE_SIZE);
     if (s->calc_mean_err)
         debug_mean_error(s, in, out, inlink->frame_count_out);
     av_frame_free(&in);
-    return out;
+    *outf = out;
+    return 0;
 }
 
 static int config_output(AVFilterLink *outlink)
@@ -941,6 +983,8 @@
     const uint32_t *p = (const uint32_t *)palette_frame->data[0];
     const int p_linesize = palette_frame->linesize[0] >> 2;
 
+    s->transparency_index = -1;
+
     if (s->new) {
         memset(s->palette, 0, sizeof(s->palette));
         memset(s->map, 0, sizeof(s->map));
@@ -951,8 +995,13 @@
 
     i = 0;
     for (y = 0; y < palette_frame->height; y++) {
-        for (x = 0; x < palette_frame->width; x++)
-            s->palette[i++] = p[x];
+        for (x = 0; x < palette_frame->width; x++) {
+            s->palette[i] = p[x];
+            if (p[x]>>24 < s->trans_thresh) {
+                s->transparency_index = i; // we are assuming at most one transparent color in palette
+            }
+            i++;
+        }
         p += p_linesize;
     }
 
@@ -967,7 +1016,7 @@
     AVFilterContext *ctx = fs->parent;
     AVFilterLink *inlink = ctx->inputs[0];
     PaletteUseContext *s = ctx->priv;
-    AVFrame *master, *second, *out;
+    AVFrame *master, *second, *out = NULL;
     int ret;
 
     // writable for error diffusal dithering
@@ -981,12 +1030,13 @@
     if (!s->palette_loaded) {
         load_palette(s, second);
     }
-    out = apply_palette(inlink, master);
+    ret = apply_palette(inlink, master, &out);
+    if (ret < 0)
+        goto error;
     return ff_filter_frame(ctx->outputs[0], out);
 
 error:
     av_frame_free(&master);
-    av_frame_free(&second);
     return ret;
 }
 

diff --git a/libavfilter/vf_pixdesctest.c b/libavfilter/vf_pixdesctest.c
index d6423ac..2d0749e 100644
--- a/libavfilter/vf_pixdesctest.c
+++ b/libavfilter/vf_pixdesctest.c

@@ -81,7 +81,7 @@
 
     /* copy palette */
     if (priv->pix_desc->flags & AV_PIX_FMT_FLAG_PAL ||
-        priv->pix_desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL)
+        ((priv->pix_desc->flags & FF_PSEUDOPAL) && out->data[1] && in->data[1]))
         memcpy(out->data[1], in->data[1], AVPALETTE_SIZE);
 
     for (c = 0; c < priv->pix_desc->nb_components; c++) {

diff --git a/libavfilter/vf_premultiply.c b/libavfilter/vf_premultiply.c
index 5120adc..a9404b4 100644
--- a/libavfilter/vf_premultiply.c
+++ b/libavfilter/vf_premultiply.c

@@ -28,6 +28,10 @@
 #include "internal.h"
 #include "video.h"
 
+typedef struct ThreadData {
+    AVFrame *m, *a, *d;
+} ThreadData;
+
 typedef struct PreMultiplyContext {
     const AVClass *class;
     int width[4], height[4];
@@ -70,7 +74,7 @@
         AV_PIX_FMT_YUV444P16,
         AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
         AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_NONE
     };
 
@@ -272,7 +276,7 @@
     for (y = 0; y < h; y++) {
         for (x = 0; x < w; x++) {
             if (asrc[x] > 0 && asrc[x] < 255)
-                dst[x] = FFMIN((msrc[x] - offset) * 255 / asrc[x] + offset, 255);
+                dst[x] = FFMIN(FFMAX(msrc[x] - offset, 0) * 255 / asrc[x] + offset, 255);
             else
                 dst[x] = msrc[x];
         }
@@ -350,7 +354,7 @@
     for (y = 0; y < h; y++) {
         for (x = 0; x < w; x++) {
             if (asrc[x] > 0 && asrc[x] < max)
-                dst[x] = FFMAX(FFMIN((msrc[x] - offset) * (unsigned)max / asrc[x] + offset, max), 0);
+                dst[x] = FFMAX(FFMIN(FFMAX(msrc[x] - offset, 0) * (unsigned)max / asrc[x] + offset, max), 0);
             else
                 dst[x] = msrc[x];
         }
@@ -361,6 +365,41 @@
     }
 }
 
+static int premultiply_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    PreMultiplyContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *out = td->d;
+    AVFrame *alpha = td->a;
+    AVFrame *base = td->m;
+    int p;
+
+    for (p = 0; p < s->nb_planes; p++) {
+        const int slice_start = (s->height[p] * jobnr) / nb_jobs;
+        const int slice_end = (s->height[p] * (jobnr+1)) / nb_jobs;
+
+        if (!((1 << p) & s->planes) || p == 3) {
+            av_image_copy_plane(out->data[p] + slice_start * out->linesize[p],
+                                out->linesize[p],
+                                base->data[p] + slice_start * base->linesize[p],
+                                base->linesize[p],
+                                s->linesize[p], slice_end - slice_start);
+            continue;
+        }
+
+        s->premultiply[p](base->data[p] + slice_start * base->linesize[p],
+                          s->inplace ? alpha->data[3] + slice_start * alpha->linesize[3] :
+                                       alpha->data[0] + slice_start * alpha->linesize[0],
+                          out->data[p] + slice_start * out->linesize[p],
+                          base->linesize[p], s->inplace ? alpha->linesize[3] : alpha->linesize[0],
+                          out->linesize[p],
+                          s->width[p], slice_end - slice_start,
+                          s->half, s->inverse ? s->max : s->depth, s->offset);
+    }
+
+    return 0;
+}
+
 static int filter_frame(AVFilterContext *ctx,
                         AVFrame **out, AVFrame *base, AVFrame *alpha)
 {
@@ -372,7 +411,8 @@
         if (!*out)
             return AVERROR(ENOMEM);
     } else {
-        int p, full, limited;
+        ThreadData td;
+        int full, limited;
 
         *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
         if (!*out)
@@ -424,6 +464,7 @@
             case AV_PIX_FMT_GRAY9:
             case AV_PIX_FMT_GRAY10:
             case AV_PIX_FMT_GRAY12:
+            case AV_PIX_FMT_GRAY14:
             case AV_PIX_FMT_GRAY16:
                 s->premultiply[0] = limited ? unpremultiply16offset : unpremultiply16;
                 break;
@@ -470,26 +511,18 @@
             case AV_PIX_FMT_GRAY9:
             case AV_PIX_FMT_GRAY10:
             case AV_PIX_FMT_GRAY12:
+            case AV_PIX_FMT_GRAY14:
             case AV_PIX_FMT_GRAY16:
                 s->premultiply[0] = limited ? premultiply16offset : premultiply16;
                 break;
             }
         }
 
-        for (p = 0; p < s->nb_planes; p++) {
-            if (!((1 << p) & s->planes) || p == 3) {
-                av_image_copy_plane((*out)->data[p], (*out)->linesize[p], base->data[p], base->linesize[p],
-                                    s->linesize[p], s->height[p]);
-                continue;
-            }
-
-            s->premultiply[p](base->data[p], s->inplace ? alpha->data[3] : alpha->data[0],
-                              (*out)->data[p],
-                              base->linesize[p], s->inplace ? alpha->linesize[3] : alpha->linesize[0],
-                              (*out)->linesize[p],
-                              s->width[p], s->height[p],
-                              s->half, s->inverse ? s->max : s->depth, s->offset);
-        }
+        td.d = *out;
+        td.a = alpha;
+        td.m = base;
+        ctx->internal->execute(ctx, premultiply_slice, &td, NULL, FFMIN(s->height[0],
+                                                                        ff_filter_get_nb_threads(ctx)));
     }
 
     return 0;
@@ -607,9 +640,10 @@
         int64_t pts;
 
         if ((ret = ff_inlink_consume_frame(ctx->inputs[0], &frame)) > 0) {
-            if ((ret = filter_frame(ctx, &out, frame, frame)) < 0)
-                return ret;
+            ret = filter_frame(ctx, &out, frame, frame);
             av_frame_free(&frame);
+            if (ret < 0)
+                return ret;
             ret = ff_filter_frame(ctx->outputs[0], out);
         }
         if (ret < 0) {
@@ -694,7 +728,8 @@
     .outputs       = premultiply_outputs,
     .priv_class    = &premultiply_class,
     .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
-                     AVFILTER_FLAG_DYNAMIC_INPUTS,
+                     AVFILTER_FLAG_DYNAMIC_INPUTS |
+                     AVFILTER_FLAG_SLICE_THREADS,
 };
 
 #endif /* CONFIG_PREMULTIPLY_FILTER */
@@ -716,7 +751,8 @@
     .outputs       = premultiply_outputs,
     .priv_class    = &unpremultiply_class,
     .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
-                     AVFILTER_FLAG_DYNAMIC_INPUTS,
+                     AVFILTER_FLAG_DYNAMIC_INPUTS |
+                     AVFILTER_FLAG_SLICE_THREADS,
 };
 
 #endif /* CONFIG_UNPREMULTIPLY_FILTER */

diff --git a/libavfilter/vf_procamp_vaapi.c b/libavfilter/vf_procamp_vaapi.c
new file mode 100644
index 0000000..10eccbe
--- /dev/null
+++ b/libavfilter/vf_procamp_vaapi.c

@@ -0,0 +1,270 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <string.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "vaapi_vpp.h"
+
+// ProcAmp Min/Max/Default Values
+#define BRIGHTNESS_MIN     -100.0F
+#define BRIGHTNESS_MAX      100.0F
+#define BRIGHTNESS_DEFAULT    0.0F
+
+#define CONTRAST_MIN          0.0F
+#define CONTRAST_MAX         10.0F
+#define CONTRAST_DEFAULT      1.0F
+
+#define HUE_MIN            -180.0F
+#define HUE_MAX             180.0F
+#define HUE_DEFAULT           0.0F
+
+#define SATURATION_MIN        0.0F
+#define SATURATION_MAX       10.0F
+#define SATURATION_DEFAULT    1.0F
+
+typedef struct ProcampVAAPIContext {
+    VAAPIVPPContext vpp_ctx; // must be the first field
+
+    float bright;
+    float hue;
+    float saturation;
+    float contrast;
+} ProcampVAAPIContext;
+
+static float map(float x, float in_min, float in_max, float out_min, float out_max)
+{
+    double slope, output;
+
+    slope = 1.0 * (out_max - out_min) / (in_max - in_min);
+    output = out_min + slope * (x - in_min);
+
+    return (float)output;
+}
+
+static int procamp_vaapi_build_filter_params(AVFilterContext *avctx)
+{
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+    ProcampVAAPIContext *ctx = avctx->priv;
+    VAStatus vas;
+    VAProcFilterParameterBufferColorBalance procamp_params[4];
+    VAProcFilterCapColorBalance procamp_caps[VAProcColorBalanceCount];
+    int num_caps;
+    int i = 0;
+
+    memset(&procamp_params, 0, sizeof(procamp_params));
+    memset(&procamp_caps, 0, sizeof(procamp_caps));
+
+    num_caps = VAProcColorBalanceCount;
+    vas = vaQueryVideoProcFilterCaps(vpp_ctx->hwctx->display, vpp_ctx->va_context,
+                                     VAProcFilterColorBalance, &procamp_caps, &num_caps);
+
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query procamp "
+               "filter caps: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR(EIO);
+    }
+
+    /* brightness */
+    procamp_params[i].type   = VAProcFilterColorBalance;
+    procamp_params[i].attrib = VAProcColorBalanceBrightness;
+    procamp_params[i].value  = map(ctx->bright, BRIGHTNESS_MIN, BRIGHTNESS_MAX,
+                                   procamp_caps[VAProcColorBalanceBrightness-1].range.min_value,
+                                   procamp_caps[VAProcColorBalanceBrightness-1].range.max_value);
+    i++;
+
+    /* contrast */
+    procamp_params[i].type   = VAProcFilterColorBalance;
+    procamp_params[i].attrib = VAProcColorBalanceContrast;
+    procamp_params[i].value  = map(ctx->contrast, CONTRAST_MIN, CONTRAST_MAX,
+                                   procamp_caps[VAProcColorBalanceContrast-1].range.min_value,
+                                   procamp_caps[VAProcColorBalanceContrast-1].range.max_value);
+    i++;
+
+    /* hue */
+    procamp_params[i].type   = VAProcFilterColorBalance;
+    procamp_params[i].attrib = VAProcColorBalanceHue;
+    procamp_params[i].value  = map(ctx->hue, HUE_MIN, HUE_MAX,
+                                   procamp_caps[VAProcColorBalanceHue-1].range.min_value,
+                                   procamp_caps[VAProcColorBalanceHue-1].range.max_value);
+    i++;
+
+    /* saturation */
+    procamp_params[i].type   = VAProcFilterColorBalance;
+    procamp_params[i].attrib = VAProcColorBalanceSaturation;
+    procamp_params[i].value  = map(ctx->saturation, SATURATION_MIN, SATURATION_MAX,
+                                   procamp_caps[VAProcColorBalanceSaturation-1].range.min_value,
+                                   procamp_caps[VAProcColorBalanceSaturation-1].range.max_value);
+    i++;
+
+    return ff_vaapi_vpp_make_param_buffers(avctx,
+                                           VAProcFilterParameterBufferType,
+                                           &procamp_params,
+                                           sizeof(procamp_params[0]),
+                                           i);
+}
+
+static int procamp_vaapi_filter_frame(AVFilterLink *inlink, AVFrame *input_frame)
+{
+    AVFilterContext *avctx   = inlink->dst;
+    AVFilterLink *outlink    = avctx->outputs[0];
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+    AVFrame *output_frame = NULL;
+    VASurfaceID input_surface, output_surface;
+    VAProcPipelineParameterBuffer params;
+    VARectangle input_region;
+    int err;
+
+    av_log(avctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(input_frame->format),
+           input_frame->width, input_frame->height, input_frame->pts);
+
+    if (vpp_ctx->va_context == VA_INVALID_ID)
+        return AVERROR(EINVAL);
+
+    input_surface = (VASurfaceID)(uintptr_t)input_frame->data[3];
+    av_log(avctx, AV_LOG_DEBUG, "Using surface %#x for procamp input.\n",
+           input_surface);
+
+    output_frame = ff_get_video_buffer(outlink, vpp_ctx->output_width,
+                                       vpp_ctx->output_height);
+    if (!output_frame) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    output_surface = (VASurfaceID)(uintptr_t)output_frame->data[3];
+    av_log(avctx, AV_LOG_DEBUG, "Using surface %#x for procamp output.\n",
+           output_surface);
+    memset(&params, 0, sizeof(params));
+    input_region = (VARectangle) {
+        .x      = 0,
+        .y      = 0,
+        .width  = input_frame->width,
+        .height = input_frame->height,
+    };
+
+    params.surface = input_surface;
+    params.surface_region = &input_region;
+    params.surface_color_standard =
+        ff_vaapi_vpp_colour_standard(input_frame->colorspace);
+
+    params.output_region = NULL;
+    params.output_background_color = 0xff000000;
+    params.output_color_standard = params.surface_color_standard;
+
+    params.pipeline_flags = 0;
+    params.filter_flags = VA_FRAME_PICTURE;
+
+    params.filters     = &vpp_ctx->filter_buffers[0];
+    params.num_filters = 1;
+
+    err = ff_vaapi_vpp_render_picture(avctx, &params, output_surface);
+    if (err < 0)
+        goto fail;
+
+    err = av_frame_copy_props(output_frame, input_frame);
+    if (err < 0)
+        goto fail;
+    av_frame_free(&input_frame);
+
+    av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output_frame->format),
+           output_frame->width, output_frame->height, output_frame->pts);
+
+    return ff_filter_frame(outlink, output_frame);
+
+fail:
+    av_frame_free(&input_frame);
+    av_frame_free(&output_frame);
+    return err;
+}
+
+static av_cold int procamp_vaapi_init(AVFilterContext *avctx)
+{
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+
+    ff_vaapi_vpp_ctx_init(avctx);
+    vpp_ctx->pipeline_uninit     = ff_vaapi_vpp_pipeline_uninit;
+    vpp_ctx->build_filter_params = procamp_vaapi_build_filter_params;
+    vpp_ctx->output_format       = AV_PIX_FMT_NONE;
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(ProcampVAAPIContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
+static const AVOption procamp_vaapi_options[] = {
+    { "b", "Output video brightness",
+      OFFSET(bright),  AV_OPT_TYPE_FLOAT, { .dbl = BRIGHTNESS_DEFAULT }, BRIGHTNESS_MIN, BRIGHTNESS_MAX, .flags = FLAGS },
+    { "brightness", "Output video brightness",
+      OFFSET(bright),  AV_OPT_TYPE_FLOAT, { .dbl = BRIGHTNESS_DEFAULT }, BRIGHTNESS_MIN, BRIGHTNESS_MAX, .flags = FLAGS },
+    { "s", "Output video saturation",
+      OFFSET(saturation), AV_OPT_TYPE_FLOAT, { .dbl = SATURATION_DEFAULT }, SATURATION_MIN, SATURATION_MAX, .flags = FLAGS },
+    { "saturatio", "Output video saturation",
+      OFFSET(saturation), AV_OPT_TYPE_FLOAT, { .dbl = SATURATION_DEFAULT }, SATURATION_MIN, SATURATION_MAX, .flags = FLAGS },
+    { "c", "Output video contrast",
+      OFFSET(contrast),  AV_OPT_TYPE_FLOAT, { .dbl = CONTRAST_DEFAULT }, CONTRAST_MIN, CONTRAST_MAX, .flags = FLAGS },
+    { "contrast", "Output video contrast",
+      OFFSET(contrast),  AV_OPT_TYPE_FLOAT, { .dbl = CONTRAST_DEFAULT }, CONTRAST_MIN, CONTRAST_MAX, .flags = FLAGS },
+    { "h", "Output video hue",
+      OFFSET(hue), AV_OPT_TYPE_FLOAT, { .dbl = HUE_DEFAULT }, HUE_MIN, HUE_MAX, .flags = FLAGS },
+    { "hue", "Output video hue",
+      OFFSET(hue), AV_OPT_TYPE_FLOAT, { .dbl = HUE_DEFAULT }, HUE_MIN, HUE_MAX, .flags = FLAGS },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(procamp_vaapi);
+
+static const AVFilterPad procamp_vaapi_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &procamp_vaapi_filter_frame,
+        .config_props = &ff_vaapi_vpp_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad procamp_vaapi_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_vaapi_vpp_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_procamp_vaapi = {
+    .name          = "procamp_vaapi",
+    .description   = NULL_IF_CONFIG_SMALL("ProcAmp (color balance) adjustments for hue, saturation, brightness, contrast"),
+    .priv_size     = sizeof(ProcampVAAPIContext),
+    .init          = &procamp_vaapi_init,
+    .uninit        = &ff_vaapi_vpp_ctx_uninit,
+    .query_formats = &ff_vaapi_vpp_query_formats,
+    .inputs        = procamp_vaapi_inputs,
+    .outputs       = procamp_vaapi_outputs,
+    .priv_class    = &procamp_vaapi_class,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};

diff --git a/libavfilter/vf_program_opencl.c b/libavfilter/vf_program_opencl.c
new file mode 100644
index 0000000..dfb2565
--- /dev/null
+++ b/libavfilter/vf_program_opencl.c

@@ -0,0 +1,430 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/log.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "framesync.h"
+#include "internal.h"
+#include "opencl.h"
+#include "video.h"
+
+typedef struct ProgramOpenCLContext {
+    OpenCLFilterContext ocf;
+
+    int                 loaded;
+    cl_uint             index;
+    cl_kernel           kernel;
+    cl_command_queue    command_queue;
+
+    FFFrameSync         fs;
+    AVFrame           **frames;
+
+    const char         *source_file;
+    const char         *kernel_name;
+    int                 nb_inputs;
+    int                 width, height;
+    enum AVPixelFormat  source_format;
+    AVRational          source_rate;
+} ProgramOpenCLContext;
+
+static int program_opencl_load(AVFilterContext *avctx)
+{
+    ProgramOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+    int err;
+
+    err = ff_opencl_filter_load_program_from_file(avctx, ctx->source_file);
+    if (err < 0)
+        return err;
+
+    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+                                              ctx->ocf.hwctx->device_id,
+                                              0, &cle);
+    if (!ctx->command_queue) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL "
+               "command queue: %d.\n", cle);
+        return AVERROR(EIO);
+    }
+
+    ctx->kernel = clCreateKernel(ctx->ocf.program, ctx->kernel_name, &cle);
+    if (!ctx->kernel) {
+        if (cle == CL_INVALID_KERNEL_NAME) {
+            av_log(avctx, AV_LOG_ERROR, "Kernel function '%s' not found in "
+                   "program.\n", ctx->kernel_name);
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
+        }
+        return AVERROR(EIO);
+    }
+
+    ctx->loaded = 1;
+    return 0;
+}
+
+static int program_opencl_run(AVFilterContext *avctx)
+{
+    AVFilterLink     *outlink = avctx->outputs[0];
+    ProgramOpenCLContext *ctx = avctx->priv;
+    AVFrame *output = NULL;
+    cl_int cle;
+    size_t global_work[2];
+    cl_mem src, dst;
+    int err, input, plane;
+
+    if (!ctx->loaded) {
+        err = program_opencl_load(avctx);
+        if (err < 0)
+            return err;
+    }
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (plane = 0; plane < FF_ARRAY_ELEMS(output->data); plane++) {
+        dst = (cl_mem)output->data[plane];
+        if (!dst)
+            break;
+
+        cle = clSetKernelArg(ctx->kernel, 0, sizeof(cl_mem), &dst);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+                   "destination image argument: %d.\n", cle);
+            err = AVERROR_UNKNOWN;
+            goto fail;
+        }
+        cle = clSetKernelArg(ctx->kernel, 1, sizeof(cl_uint), &ctx->index);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+                   "index argument: %d.\n", cle);
+            err = AVERROR_UNKNOWN;
+            goto fail;
+        }
+
+        for (input = 0; input < ctx->nb_inputs; input++) {
+            av_assert0(ctx->frames[input]);
+
+            src = (cl_mem)ctx->frames[input]->data[plane];
+            av_assert0(src);
+
+            cle = clSetKernelArg(ctx->kernel, 2 + input, sizeof(cl_mem), &src);
+            if (cle != CL_SUCCESS) {
+                av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+                       "source image argument %d: %d.\n", input, cle);
+                err = AVERROR_UNKNOWN;
+                goto fail;
+            }
+        }
+
+        err = ff_opencl_filter_work_size_from_image(avctx, global_work,
+                                                    output, plane, 0);
+        if (err < 0)
+            goto fail;
+
+        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
+               "(%zux%zu).\n", plane, global_work[0], global_work[1]);
+
+        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
+                                     global_work, NULL, 0, NULL, NULL);
+        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle);
+    }
+
+    cle = clFinish(ctx->command_queue);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
+
+    if (ctx->nb_inputs > 0) {
+        err = av_frame_copy_props(output, ctx->frames[0]);
+        if (err < 0)
+            goto fail;
+    } else {
+        output->pts = ctx->index;
+    }
+    ++ctx->index;
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output->format),
+           output->width, output->height, output->pts);
+
+    return ff_filter_frame(outlink, output);
+
+fail:
+    clFinish(ctx->command_queue);
+    av_frame_free(&output);
+    return err;
+}
+
+static int program_opencl_request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *avctx = outlink->src;
+
+    return program_opencl_run(avctx);
+}
+
+static int program_opencl_filter(FFFrameSync *fs)
+{
+    AVFilterContext    *avctx = fs->parent;
+    ProgramOpenCLContext *ctx = avctx->priv;
+    int err, i;
+
+    for (i = 0; i < ctx->nb_inputs; i++) {
+        err = ff_framesync_get_frame(&ctx->fs, i, &ctx->frames[i], 0);
+        if (err < 0)
+            return err;
+    }
+
+    return program_opencl_run(avctx);
+}
+
+static int program_opencl_activate(AVFilterContext *avctx)
+{
+    ProgramOpenCLContext *ctx = avctx->priv;
+
+    av_assert0(ctx->nb_inputs > 0);
+
+    return ff_framesync_activate(&ctx->fs);
+}
+
+static int program_opencl_config_output(AVFilterLink *outlink)
+{
+    AVFilterContext    *avctx = outlink->src;
+    ProgramOpenCLContext *ctx = avctx->priv;
+    int err;
+
+    err = ff_opencl_filter_config_output(outlink);
+    if (err < 0)
+        return err;
+
+    if (ctx->nb_inputs > 0) {
+        FFFrameSyncIn *in;
+        int i;
+
+        err = ff_framesync_init(&ctx->fs, avctx, ctx->nb_inputs);
+        if (err < 0)
+            return err;
+
+        ctx->fs.opaque = ctx;
+        ctx->fs.on_event = &program_opencl_filter;
+
+        in = ctx->fs.in;
+        for (i = 0; i < ctx->nb_inputs; i++) {
+            const AVFilterLink *inlink = avctx->inputs[i];
+
+            in[i].time_base = inlink->time_base;
+            in[i].sync      = 1;
+            in[i].before    = EXT_STOP;
+            in[i].after     = EXT_INFINITY;
+        }
+
+        err = ff_framesync_configure(&ctx->fs);
+        if (err < 0)
+            return err;
+
+    } else {
+        outlink->time_base = av_inv_q(ctx->source_rate);
+    }
+
+    return 0;
+}
+
+static av_cold int program_opencl_init(AVFilterContext *avctx)
+{
+    ProgramOpenCLContext *ctx = avctx->priv;
+    int err;
+
+    ff_opencl_filter_init(avctx);
+
+    ctx->ocf.output_width  = ctx->width;
+    ctx->ocf.output_height = ctx->height;
+
+    if (!strcmp(avctx->filter->name, "openclsrc")) {
+        if (!ctx->ocf.output_width || !ctx->ocf.output_height) {
+            av_log(avctx, AV_LOG_ERROR, "OpenCL source requires output "
+                   "dimensions to be specified.\n");
+            return AVERROR(EINVAL);
+        }
+
+        ctx->nb_inputs = 0;
+        ctx->ocf.output_format = ctx->source_format;
+    } else {
+        int i;
+
+        ctx->frames = av_mallocz_array(ctx->nb_inputs,
+                                       sizeof(*ctx->frames));
+        if (!ctx->frames)
+            return AVERROR(ENOMEM);
+
+        for (i = 0; i < ctx->nb_inputs; i++) {
+            AVFilterPad input;
+            memset(&input, 0, sizeof(input));
+
+            input.type = AVMEDIA_TYPE_VIDEO;
+            input.name = av_asprintf("input%d", i);
+            if (!input.name)
+                return AVERROR(ENOMEM);
+
+            input.config_props = &ff_opencl_filter_config_input;
+
+            err = ff_insert_inpad(avctx, i, &input);
+            if (err < 0) {
+                av_freep(&input.name);
+                return err;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static av_cold void program_opencl_uninit(AVFilterContext *avctx)
+{
+    ProgramOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+    int i;
+
+    if (ctx->nb_inputs > 0) {
+        ff_framesync_uninit(&ctx->fs);
+
+        av_freep(&ctx->frames);
+        for (i = 0; i < avctx->nb_inputs; i++)
+            av_freep(&avctx->input_pads[i].name);
+    }
+
+    if (ctx->kernel) {
+        cle = clReleaseKernel(ctx->kernel);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->command_queue) {
+        cle = clReleaseCommandQueue(ctx->command_queue);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "command queue: %d.\n", cle);
+    }
+
+    ff_opencl_filter_uninit(avctx);
+}
+
+#define OFFSET(x) offsetof(ProgramOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
+
+#if CONFIG_PROGRAM_OPENCL_FILTER
+
+static const AVOption program_opencl_options[] = {
+    { "source", "OpenCL program source file", OFFSET(source_file),
+      AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
+    { "kernel", "Kernel name in program",     OFFSET(kernel_name),
+      AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
+
+    { "inputs", "Number of inputs", OFFSET(nb_inputs),
+      AV_OPT_TYPE_INT,              { .i64 = 1 }, 1, INT_MAX, FLAGS },
+
+    { "size",   "Video size",       OFFSET(width),
+      AV_OPT_TYPE_IMAGE_SIZE,       { .str = NULL }, 0, 0, FLAGS },
+    { "s",      "Video size",       OFFSET(width),
+      AV_OPT_TYPE_IMAGE_SIZE,       { .str = NULL }, 0, 0, FLAGS },
+
+    { NULL },
+};
+
+FRAMESYNC_DEFINE_CLASS(program_opencl, ProgramOpenCLContext, fs);
+
+static const AVFilterPad program_opencl_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &program_opencl_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_program_opencl = {
+    .name           = "program_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Filter video using an OpenCL program"),
+    .priv_size      = sizeof(ProgramOpenCLContext),
+    .priv_class     = &program_opencl_class,
+    .preinit        = &program_opencl_framesync_preinit,
+    .init           = &program_opencl_init,
+    .uninit         = &program_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .activate       = &program_opencl_activate,
+    .inputs         = NULL,
+    .outputs        = program_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif
+
+#if CONFIG_OPENCLSRC_FILTER
+
+static const AVOption openclsrc_options[] = {
+    { "source", "OpenCL program source file", OFFSET(source_file),
+      AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
+    { "kernel", "Kernel name in program",     OFFSET(kernel_name),
+      AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
+
+    { "size",   "Video size",       OFFSET(width),
+      AV_OPT_TYPE_IMAGE_SIZE,       { .str = NULL }, 0, 0, FLAGS },
+    { "s",      "Video size",       OFFSET(width),
+      AV_OPT_TYPE_IMAGE_SIZE,       { .str = NULL }, 0, 0, FLAGS },
+
+    { "format", "Video format",     OFFSET(source_format),
+      AV_OPT_TYPE_PIXEL_FMT,        { .i64 = AV_PIX_FMT_NONE }, -1, INT_MAX, FLAGS },
+
+    { "rate",   "Video frame rate", OFFSET(source_rate),
+      AV_OPT_TYPE_VIDEO_RATE,       { .str = "25" }, 0, INT_MAX, FLAGS },
+    { "r",      "Video frame rate", OFFSET(source_rate),
+      AV_OPT_TYPE_VIDEO_RATE,       { .str = "25" }, 0, INT_MAX, FLAGS },
+
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(openclsrc);
+
+static const AVFilterPad openclsrc_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = &program_opencl_config_output,
+        .request_frame = &program_opencl_request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_vsrc_openclsrc = {
+    .name           = "openclsrc",
+    .description    = NULL_IF_CONFIG_SMALL("Generate video using an OpenCL program"),
+    .priv_size      = sizeof(ProgramOpenCLContext),
+    .priv_class     = &openclsrc_class,
+    .init           = &program_opencl_init,
+    .uninit         = &program_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = NULL,
+    .outputs        = openclsrc_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif

diff --git a/libavfilter/vf_pseudocolor.c b/libavfilter/vf_pseudocolor.c
index f8f5372..e48b69c 100644
--- a/libavfilter/vf_pseudocolor.c
+++ b/libavfilter/vf_pseudocolor.c

@@ -94,7 +94,7 @@
 };
 
 static const enum AVPixelFormat pix_fmts[] = {
-    AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY16,
+    AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
     AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P,
     AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA422P,
     AV_PIX_FMT_YUV444P, AV_PIX_FMT_GBRP,
@@ -114,7 +114,10 @@
     AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUVA420P16,
     AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUVA422P16,
     AV_PIX_FMT_YUV444P16, AV_PIX_FMT_YUVA444P16,
+    AV_PIX_FMT_GBRP9,
     AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10,
+    AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12,
+    AV_PIX_FMT_GBRP14,
     AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16,
     AV_PIX_FMT_NONE
 };
@@ -531,11 +534,18 @@
     case AV_PIX_FMT_YUV444P14:
     case AV_PIX_FMT_YUV444P16:
     case AV_PIX_FMT_YUVA444P16:
+    case AV_PIX_FMT_GBRP9:
     case AV_PIX_FMT_GBRP10:
-    case AV_PIX_FMT_GBRAP10:
+    case AV_PIX_FMT_GBRP12:
+    case AV_PIX_FMT_GBRP14:
     case AV_PIX_FMT_GBRP16:
+    case AV_PIX_FMT_GBRAP10:
+    case AV_PIX_FMT_GBRAP12:
     case AV_PIX_FMT_GBRAP16:
+    case AV_PIX_FMT_GRAY9:
     case AV_PIX_FMT_GRAY10:
+    case AV_PIX_FMT_GRAY12:
+    case AV_PIX_FMT_GRAY14:
     case AV_PIX_FMT_GRAY16:
         s->filter[0] = s->filter[1] = s->filter[2] = s->filter[3] = pseudocolor_filter_16;
         break;

diff --git a/libavfilter/vf_psnr.c b/libavfilter/vf_psnr.c
index 493a501..0675a17 100644
--- a/libavfilter/vf_psnr.c
+++ b/libavfilter/vf_psnr.c

@@ -260,7 +260,7 @@
 static int query_formats(AVFilterContext *ctx)
 {
     static const enum AVPixelFormat pix_fmts[] = {
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
 #define PF_NOALPHA(suf) AV_PIX_FMT_YUV420##suf,  AV_PIX_FMT_YUV422##suf,  AV_PIX_FMT_YUV444##suf
 #define PF_ALPHA(suf)   AV_PIX_FMT_YUVA420##suf, AV_PIX_FMT_YUVA422##suf, AV_PIX_FMT_YUVA444##suf
 #define PF(suf)         PF_NOALPHA(suf), PF_ALPHA(suf)
@@ -270,7 +270,7 @@
         AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
         AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
         AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
-        AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16,
+        AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
         AV_PIX_FMT_NONE
     };
 

diff --git a/libavfilter/vf_remap.c b/libavfilter/vf_remap.c
index d242847..48ec38a 100644
--- a/libavfilter/vf_remap.c
+++ b/libavfilter/vf_remap.c

@@ -52,9 +52,7 @@
     int step;
     FFFrameSync fs;
 
-    void (*remap)(struct RemapContext *s, const AVFrame *in,
-                  const AVFrame *xin, const AVFrame *yin,
-                  AVFrame *out);
+    int (*remap_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
 } RemapContext;
 
 #define OFFSET(x) offsetof(RemapContext, x)
@@ -66,6 +64,13 @@
 
 AVFILTER_DEFINE_CLASS(remap);
 
+typedef struct ThreadData {
+    AVFrame *in, *xin, *yin, *out;
+    int nb_planes;
+    int nb_components;
+    int step;
+} ThreadData;
+
 static int query_formats(AVFilterContext *ctx)
 {
     static const enum AVPixelFormat pix_fmts[] = {
@@ -85,7 +90,7 @@
         AV_PIX_FMT_RGBA64, AV_PIX_FMT_BGRA64,
         AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9,
         AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12,
-        AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_NONE
     };
     static const enum AVPixelFormat map_fmts[] = {
@@ -121,67 +126,48 @@
  * pixels are copied from source to target using :
  * Target_frame[y][x] = Source_frame[ ymap[y][x] ][ [xmap[y][x] ];
  */
-static void remap_planar(RemapContext *s, const AVFrame *in,
-                         const AVFrame *xin, const AVFrame *yin,
-                         AVFrame *out)
-{
-    const int xlinesize = xin->linesize[0] / 2;
-    const int ylinesize = yin->linesize[0] / 2;
-    int x , y, plane;
-
-    for (plane = 0; plane < s->nb_planes ; plane++) {
-        uint8_t *dst         = out->data[plane];
-        const int dlinesize  = out->linesize[plane];
-        const uint8_t *src   = in->data[plane];
-        const int slinesize  = in->linesize[plane];
-        const uint16_t *xmap = (const uint16_t *)xin->data[0];
-        const uint16_t *ymap = (const uint16_t *)yin->data[0];
-
-        for (y = 0; y < out->height; y++) {
-            for (x = 0; x < out->width; x++) {
-                if (ymap[x] < in->height && xmap[x] < in->width) {
-                    dst[x] = src[ymap[x] * slinesize + xmap[x]];
-                } else {
-                    dst[x] = 0;
-                }
-            }
-            dst  += dlinesize;
-            xmap += xlinesize;
-            ymap += ylinesize;
-        }
-    }
+#define DEFINE_REMAP_PLANAR_FUNC(name, bits, div)                                           \
+static int remap_planar##bits##_##name##_slice(AVFilterContext *ctx, void *arg,             \
+                                               int jobnr, int nb_jobs)                      \
+{                                                                                           \
+    const ThreadData *td = (ThreadData*)arg;                                                \
+    const AVFrame *in  = td->in;                                                            \
+    const AVFrame *xin = td->xin;                                                           \
+    const AVFrame *yin = td->yin;                                                           \
+    const AVFrame *out = td->out;                                                           \
+    const int slice_start = (out->height *  jobnr   ) / nb_jobs;                            \
+    const int slice_end   = (out->height * (jobnr+1)) / nb_jobs;                            \
+    const int xlinesize = xin->linesize[0] / 2;                                             \
+    const int ylinesize = yin->linesize[0] / 2;                                             \
+    int x , y, plane;                                                                       \
+                                                                                            \
+    for (plane = 0; plane < td->nb_planes ; plane++) {                                      \
+        const int dlinesize  = out->linesize[plane] / div;                                  \
+        const uint##bits##_t *src = (const uint##bits##_t *)in->data[plane];                \
+        uint##bits##_t *dst = (uint##bits##_t *)out->data[plane] + slice_start * dlinesize; \
+        const int slinesize  = in->linesize[plane] / div;                                   \
+        const uint16_t *xmap = (const uint16_t *)xin->data[0] + slice_start * xlinesize;    \
+        const uint16_t *ymap = (const uint16_t *)yin->data[0] + slice_start * ylinesize;    \
+                                                                                            \
+        for (y = slice_start; y < slice_end; y++) {                                         \
+            for (x = 0; x < out->width; x++) {                                              \
+                if (ymap[x] < in->height && xmap[x] < in->width) {                          \
+                    dst[x] = src[ymap[x] * slinesize + xmap[x]];                            \
+                } else {                                                                    \
+                    dst[x] = 0;                                                             \
+                }                                                                           \
+            }                                                                               \
+            dst  += dlinesize;                                                              \
+            xmap += xlinesize;                                                              \
+            ymap += ylinesize;                                                              \
+        }                                                                                   \
+    }                                                                                       \
+                                                                                            \
+    return 0;                                                                               \
 }
 
-static void remap_planar16(RemapContext *s, const AVFrame *in,
-                           const AVFrame *xin, const AVFrame *yin,
-                           AVFrame *out)
-{
-    const int xlinesize = xin->linesize[0] / 2;
-    const int ylinesize = yin->linesize[0] / 2;
-    int x , y, plane;
-
-    for (plane = 0; plane < s->nb_planes ; plane++) {
-        uint16_t *dst        = (uint16_t *)out->data[plane];
-        const int dlinesize  = out->linesize[plane] / 2;
-        const uint16_t *src  = (const uint16_t *)in->data[plane];
-        const int slinesize  = in->linesize[plane] / 2;
-        const uint16_t *xmap = (const uint16_t *)xin->data[0];
-        const uint16_t *ymap = (const uint16_t *)yin->data[0];
-
-        for (y = 0; y < out->height; y++) {
-            for (x = 0; x < out->width; x++) {
-                if (ymap[x] < in->height && xmap[x] < in->width) {
-                    dst[x] = src[ymap[x] * slinesize + xmap[x]];
-                } else {
-                    dst[x] = 0;
-                }
-            }
-            dst  += dlinesize;
-            xmap += xlinesize;
-            ymap += ylinesize;
-        }
-    }
-}
+DEFINE_REMAP_PLANAR_FUNC(nearest, 8, 1)
+DEFINE_REMAP_PLANAR_FUNC(nearest, 16, 2)
 
 /**
  * remap_packed algorithm expects pixels with both padded bits (step) and
@@ -189,67 +175,48 @@
  * pixels are copied from source to target using :
  * Target_frame[y][x] = Source_frame[ ymap[y][x] ][ [xmap[y][x] ];
  */
-static void remap_packed(RemapContext *s, const AVFrame *in,
-                         const AVFrame *xin, const AVFrame *yin,
-                         AVFrame *out)
-{
-    uint8_t *dst = out->data[0];
-    const uint8_t *src  = in->data[0];
-    const int dlinesize = out->linesize[0];
-    const int slinesize = in->linesize[0];
-    const int xlinesize = xin->linesize[0] / 2;
-    const int ylinesize = yin->linesize[0] / 2;
-    const uint16_t *xmap = (const uint16_t *)xin->data[0];
-    const uint16_t *ymap = (const uint16_t *)yin->data[0];
-    const int step = s->step;
-    int c, x, y;
-
-    for (y = 0; y < out->height; y++) {
-        for (x = 0; x < out->width; x++) {
-            for (c = 0; c < s->nb_components; c++) {
-                if (ymap[x] < in->height && xmap[x] < in->width) {
-                    dst[x * step + c] = src[ymap[x] * slinesize + xmap[x] * step + c];
-                } else {
-                    dst[x * step + c] = 0;
-                }
-            }
-        }
-        dst  += dlinesize;
-        xmap += xlinesize;
-        ymap += ylinesize;
-    }
+#define DEFINE_REMAP_PACKED_FUNC(name, bits, div)                                           \
+static int remap_packed##bits##_##name##_slice(AVFilterContext *ctx, void *arg,             \
+                                               int jobnr, int nb_jobs)                      \
+{                                                                                           \
+    const ThreadData *td = (ThreadData*)arg;                                                \
+    const AVFrame *in  = td->in;                                                            \
+    const AVFrame *xin = td->xin;                                                           \
+    const AVFrame *yin = td->yin;                                                           \
+    const AVFrame *out = td->out;                                                           \
+    const int slice_start = (out->height *  jobnr   ) / nb_jobs;                            \
+    const int slice_end   = (out->height * (jobnr+1)) / nb_jobs;                            \
+    const int dlinesize  = out->linesize[0] / div;                                          \
+    const int slinesize  = in->linesize[0] / div;                                           \
+    const int xlinesize  = xin->linesize[0] / 2;                                            \
+    const int ylinesize  = yin->linesize[0] / 2;                                            \
+    const uint##bits##_t *src = (const uint##bits##_t *)in->data[0];                        \
+    uint##bits##_t *dst = (uint##bits##_t *)out->data[0] + slice_start * dlinesize;         \
+    const uint16_t *xmap = (const uint16_t *)xin->data[0] + slice_start * xlinesize;        \
+    const uint16_t *ymap = (const uint16_t *)yin->data[0] + slice_start * ylinesize;        \
+    const int step       = td->step / div;                                                  \
+    int c, x, y;                                                                            \
+                                                                                            \
+    for (y = slice_start; y < slice_end; y++) {                                             \
+        for (x = 0; x < out->width; x++) {                                                  \
+            for (c = 0; c < td->nb_components; c++) {                                       \
+                if (ymap[x] < in->height && xmap[x] < in->width) {                          \
+                    dst[x * step + c] = src[ymap[x] * slinesize + xmap[x] * step + c];      \
+                } else {                                                                    \
+                    dst[x * step + c] = 0;                                                  \
+                }                                                                           \
+            }                                                                               \
+        }                                                                                   \
+        dst  += dlinesize;                                                                  \
+        xmap += xlinesize;                                                                  \
+        ymap += ylinesize;                                                                  \
+    }                                                                                       \
+                                                                                            \
+    return 0;                                                                               \
 }
 
-static void remap_packed16(RemapContext *s, const AVFrame *in,
-                           const AVFrame *xin, const AVFrame *yin,
-                           AVFrame *out)
-{
-    uint16_t *dst = (uint16_t *)out->data[0];
-    const uint16_t *src  = (const uint16_t *)in->data[0];
-    const int dlinesize = out->linesize[0] / 2;
-    const int slinesize = in->linesize[0] / 2;
-    const int xlinesize = xin->linesize[0] / 2;
-    const int ylinesize = yin->linesize[0] / 2;
-    const uint16_t *xmap = (const uint16_t *)xin->data[0];
-    const uint16_t *ymap = (const uint16_t *)yin->data[0];
-    const int step = s->step / 2;
-    int c, x, y;
-
-    for (y = 0; y < out->height; y++) {
-        for (x = 0; x < out->width; x++) {
-            for (c = 0; c < s->nb_components; c++) {
-                if (ymap[x] < in->height && xmap[x] < in->width) {
-                    dst[x * step + c] = src[ymap[x] * slinesize + xmap[x] * step + c];
-                } else {
-                    dst[x * step + c] = 0;
-                }
-            }
-        }
-        dst  += dlinesize;
-        xmap += xlinesize;
-        ymap += ylinesize;
-    }
-}
+DEFINE_REMAP_PACKED_FUNC(nearest, 8, 1)
+DEFINE_REMAP_PACKED_FUNC(nearest, 16, 2)
 
 static int config_input(AVFilterLink *inlink)
 {
@@ -262,15 +229,15 @@
 
     if (desc->comp[0].depth == 8) {
         if (s->nb_planes > 1 || s->nb_components == 1) {
-            s->remap = remap_planar;
+            s->remap_slice = remap_planar8_nearest_slice;
         } else {
-            s->remap = remap_packed;
+            s->remap_slice = remap_packed8_nearest_slice;
         }
     } else {
         if (s->nb_planes > 1 || s->nb_components == 1) {
-            s->remap = remap_planar16;
+            s->remap_slice = remap_planar16_nearest_slice;
         } else {
-            s->remap = remap_packed16;
+            s->remap_slice = remap_packed16_nearest_slice;
         }
     }
 
@@ -296,12 +263,21 @@
         if (!out)
             return AVERROR(ENOMEM);
     } else {
+        ThreadData td;
+
         out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
         if (!out)
             return AVERROR(ENOMEM);
         av_frame_copy_props(out, in);
 
-        s->remap(s, in, xpic, ypic, out);
+        td.in  = in;
+        td.xin = xpic;
+        td.yin = ypic;
+        td.out = out;
+        td.nb_planes = s->nb_planes;
+        td.nb_components = s->nb_components;
+        td.step = s->step;
+        ctx->internal->execute(ctx, s->remap_slice, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
     }
     out->pts = av_rescale_q(in->pts, s->fs.time_base, outlink->time_base);
 
@@ -362,7 +338,6 @@
     return ff_framesync_activate(&s->fs);
 }
 
-
 static av_cold void uninit(AVFilterContext *ctx)
 {
     RemapContext *s = ctx->priv;
@@ -406,5 +381,5 @@
     .inputs        = remap_inputs,
     .outputs       = remap_outputs,
     .priv_class    = &remap_class,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
 };

diff --git a/libavfilter/vf_scale.c b/libavfilter/vf_scale.c
index 3329c12..f741419 100644
--- a/libavfilter/vf_scale.c
+++ b/libavfilter/vf_scale.c

@@ -261,11 +261,10 @@
 
     /* TODO: make algorithm configurable */
 
-    scale->input_is_pal = desc->flags & AV_PIX_FMT_FLAG_PAL ||
-                          desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL;
+    scale->input_is_pal = desc->flags & AV_PIX_FMT_FLAG_PAL;
     if (outfmt == AV_PIX_FMT_PAL8) outfmt = AV_PIX_FMT_BGR8;
     scale->output_is_pal = av_pix_fmt_desc_get(outfmt)->flags & AV_PIX_FMT_FLAG_PAL ||
-                           av_pix_fmt_desc_get(outfmt)->flags & AV_PIX_FMT_FLAG_PSEUDOPAL;
+                           av_pix_fmt_desc_get(outfmt)->flags & FF_PSEUDOPAL;
 
     if (scale->sws)
         sws_freeContext(scale->sws);
@@ -362,6 +361,7 @@
     outlink->h = inlink->h;
     outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
     outlink->time_base = inlink->time_base;
+    outlink->frame_rate = inlink->frame_rate;
 
     return 0;
 }
@@ -577,7 +577,9 @@
     {  "in_range", "set input color range",  OFFSET( in_range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_UNSPECIFIED }, 0, 2, FLAGS, "range" },
     { "out_range", "set output color range", OFFSET(out_range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_UNSPECIFIED }, 0, 2, FLAGS, "range" },
     { "auto",   NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_UNSPECIFIED }, 0, 0, FLAGS, "range" },
+    { "unknown", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_UNSPECIFIED }, 0, 0, FLAGS, "range" },
     { "full",   NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, "range" },
+    { "limited",NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, "range" },
     { "jpeg",   NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, "range" },
     { "mpeg",   NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, "range" },
     { "tv",     NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, "range" },

diff --git a/libavfilter/vf_scale_qsv.c b/libavfilter/vf_scale_qsv.c
index a5f5be7..7d593b2 100644
--- a/libavfilter/vf_scale_qsv.c
+++ b/libavfilter/vf_scale_qsv.c

@@ -36,6 +36,7 @@
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/time.h"
+#include "libavfilter/qsvvpp.h"
 
 #include "avfilter.h"
 #include "formats.h"
@@ -71,7 +72,6 @@
 typedef struct QSVScaleContext {
     const AVClass *class;
 
-    AVBufferRef *out_frames_ref;
     /* a clone of the main session, used internally for scaling */
     mfxSession   session;
 
@@ -134,7 +134,6 @@
         MFXClose(s->session);
         s->session = NULL;
     }
-    av_buffer_unref(&s->out_frames_ref);
 
     av_freep(&s->mem_ids_in);
     av_freep(&s->mem_ids_out);
@@ -165,6 +164,7 @@
                          int out_width, int out_height)
 {
     QSVScaleContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
 
     AVHWFramesContext *in_frames_ctx;
     AVHWFramesContext *out_frames_ctx;
@@ -185,21 +185,25 @@
     in_format     = in_frames_ctx->sw_format;
     out_format    = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format;
 
-    s->out_frames_ref = av_hwframe_ctx_alloc(in_frames_ctx->device_ref);
-    if (!s->out_frames_ref)
+    outlink->hw_frames_ctx = av_hwframe_ctx_alloc(in_frames_ctx->device_ref);
+    if (!outlink->hw_frames_ctx)
         return AVERROR(ENOMEM);
-    out_frames_ctx   = (AVHWFramesContext*)s->out_frames_ref->data;
+    out_frames_ctx   = (AVHWFramesContext*)outlink->hw_frames_ctx->data;
     out_frames_hwctx = out_frames_ctx->hwctx;
 
     out_frames_ctx->format            = AV_PIX_FMT_QSV;
     out_frames_ctx->width             = FFALIGN(out_width,  32);
     out_frames_ctx->height            = FFALIGN(out_height, 32);
     out_frames_ctx->sw_format         = out_format;
-    out_frames_ctx->initial_pool_size = 32;
+    out_frames_ctx->initial_pool_size = 4;
 
     out_frames_hwctx->frame_type = in_frames_hwctx->frame_type;
 
-    ret = av_hwframe_ctx_init(s->out_frames_ref);
+    ret = ff_filter_init_hw_frames(ctx, outlink, 32);
+    if (ret < 0)
+        return ret;
+
+    ret = av_hwframe_ctx_init(outlink->hw_frames_ctx);
     if (ret < 0)
         return ret;
 
@@ -266,7 +270,7 @@
 
     QSVScaleContext                   *s = ctx->priv;
     AVHWFramesContext     *in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data;
-    AVHWFramesContext    *out_frames_ctx = (AVHWFramesContext*)s->out_frames_ref->data;
+    AVHWFramesContext    *out_frames_ctx = (AVHWFramesContext*)ctx->outputs[0]->hw_frames_ctx->data;
     AVQSVFramesContext  *in_frames_hwctx = in_frames_ctx->hwctx;
     AVQSVFramesContext *out_frames_hwctx = out_frames_ctx->hwctx;
     AVQSVDeviceContext     *device_hwctx = in_frames_ctx->device_ctx->hwctx;
@@ -298,6 +302,11 @@
         }
     }
 
+    if (err != MFX_ERR_NONE) {
+        av_log(ctx, AV_LOG_ERROR, "Error getting the session handle\n");
+        return AVERROR_UNKNOWN;
+    }
+
     /* create a "slave" session with those same properties, to be used for
      * actual scaling */
     err = MFXInit(impl, &ver, &s->session);
@@ -312,6 +321,12 @@
             return AVERROR_UNKNOWN;
     }
 
+    if (QSV_RUNTIME_VERSION_ATLEAST(ver, 1, 25)) {
+        err = MFXJoinSession(device_hwctx->session, s->session);
+            if (err != MFX_ERR_NONE)
+                return AVERROR_UNKNOWN;
+    }
+
     memset(&par, 0, sizeof(par));
 
     if (opaque) {
@@ -407,8 +422,6 @@
 static int init_scale_session(AVFilterContext *ctx, int in_width, int in_height,
                               int out_width, int out_height)
 {
-    QSVScaleContext *s = ctx->priv;
-
     int ret;
 
     qsvscale_uninit(ctx);
@@ -421,11 +434,6 @@
     if (ret < 0)
         return ret;
 
-    av_buffer_unref(&ctx->outputs[0]->hw_frames_ctx);
-    ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->out_frames_ref);
-    if (!ctx->outputs[0]->hw_frames_ctx)
-        return AVERROR(ENOMEM);
-
     return 0;
 }
 

diff --git a/libavfilter/vf_scale_vaapi.c b/libavfilter/vf_scale_vaapi.c
index 22e928c..d6529d5 100644
--- a/libavfilter/vf_scale_vaapi.c
+++ b/libavfilter/vf_scale_vaapi.c

@@ -18,12 +18,7 @@
 
 #include <string.h>
 
-#include <va/va.h>
-#include <va/va_vpp.h>
-
 #include "libavutil/avassert.h"
-#include "libavutil/hwcontext.h"
-#include "libavutil/hwcontext_vaapi.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
@@ -33,288 +28,91 @@
 #include "internal.h"
 #include "scale.h"
 #include "video.h"
+#include "vaapi_vpp.h"
 
 typedef struct ScaleVAAPIContext {
-    const AVClass *class;
-
-    AVVAAPIDeviceContext *hwctx;
-    AVBufferRef *device_ref;
-
-    int valid_ids;
-    VAConfigID  va_config;
-    VAContextID va_context;
-
-    AVBufferRef       *input_frames_ref;
-    AVHWFramesContext *input_frames;
-
-    AVBufferRef       *output_frames_ref;
-    AVHWFramesContext *output_frames;
+    VAAPIVPPContext vpp_ctx; // must be the first field
 
     char *output_format_string;
-    enum AVPixelFormat output_format;
 
     char *w_expr;      // width expression string
     char *h_expr;      // height expression string
-
-    int output_width;  // computed width
-    int output_height; // computed height
 } ScaleVAAPIContext;
 
-
-static int scale_vaapi_query_formats(AVFilterContext *avctx)
-{
-    enum AVPixelFormat pix_fmts[] = {
-        AV_PIX_FMT_VAAPI, AV_PIX_FMT_NONE,
-    };
-    int err;
-
-    if ((err = ff_formats_ref(ff_make_format_list(pix_fmts),
-                              &avctx->inputs[0]->out_formats)) < 0)
-        return err;
-    if ((err = ff_formats_ref(ff_make_format_list(pix_fmts),
-                              &avctx->outputs[0]->in_formats)) < 0)
-        return err;
-
-    return 0;
-}
-
-static int scale_vaapi_pipeline_uninit(ScaleVAAPIContext *ctx)
-{
-    if (ctx->va_context != VA_INVALID_ID) {
-        vaDestroyContext(ctx->hwctx->display, ctx->va_context);
-        ctx->va_context = VA_INVALID_ID;
-    }
-
-    if (ctx->va_config != VA_INVALID_ID) {
-        vaDestroyConfig(ctx->hwctx->display, ctx->va_config);
-        ctx->va_config = VA_INVALID_ID;
-    }
-
-    av_buffer_unref(&ctx->output_frames_ref);
-    av_buffer_unref(&ctx->device_ref);
-    ctx->hwctx = 0;
-
-    return 0;
-}
-
-static int scale_vaapi_config_input(AVFilterLink *inlink)
-{
-    AVFilterContext *avctx = inlink->dst;
-    ScaleVAAPIContext *ctx = avctx->priv;
-
-    scale_vaapi_pipeline_uninit(ctx);
-
-    if (!inlink->hw_frames_ctx) {
-        av_log(avctx, AV_LOG_ERROR, "A hardware frames reference is "
-               "required to associate the processing device.\n");
-        return AVERROR(EINVAL);
-    }
-
-    ctx->input_frames_ref = av_buffer_ref(inlink->hw_frames_ctx);
-    ctx->input_frames = (AVHWFramesContext*)ctx->input_frames_ref->data;
-
-    return 0;
-}
-
 static int scale_vaapi_config_output(AVFilterLink *outlink)
 {
-    AVFilterLink *inlink = outlink->src->inputs[0];
-    AVFilterContext *avctx = outlink->src;
-    ScaleVAAPIContext *ctx = avctx->priv;
-    AVVAAPIHWConfig *hwconfig = NULL;
-    AVHWFramesConstraints *constraints = NULL;
-    AVVAAPIFramesContext *va_frames;
-    VAStatus vas;
-    int err, i;
-
-    scale_vaapi_pipeline_uninit(ctx);
-
-    ctx->device_ref = av_buffer_ref(ctx->input_frames->device_ref);
-    ctx->hwctx = ((AVHWDeviceContext*)ctx->device_ref->data)->hwctx;
-
-    av_assert0(ctx->va_config == VA_INVALID_ID);
-    vas = vaCreateConfig(ctx->hwctx->display, VAProfileNone,
-                         VAEntrypointVideoProc, 0, 0, &ctx->va_config);
-    if (vas != VA_STATUS_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to create processing pipeline "
-               "config: %d (%s).\n", vas, vaErrorStr(vas));
-        err = AVERROR(EIO);
-        goto fail;
-    }
-
-    hwconfig = av_hwdevice_hwconfig_alloc(ctx->device_ref);
-    if (!hwconfig) {
-        err = AVERROR(ENOMEM);
-        goto fail;
-    }
-    hwconfig->config_id = ctx->va_config;
-
-    constraints = av_hwdevice_get_hwframe_constraints(ctx->device_ref,
-                                                      hwconfig);
-    if (!constraints) {
-        err = AVERROR(ENOMEM);
-        goto fail;
-    }
-
-    if (ctx->output_format == AV_PIX_FMT_NONE)
-        ctx->output_format = ctx->input_frames->sw_format;
-    if (constraints->valid_sw_formats) {
-        for (i = 0; constraints->valid_sw_formats[i] != AV_PIX_FMT_NONE; i++) {
-            if (ctx->output_format == constraints->valid_sw_formats[i])
-                break;
-        }
-        if (constraints->valid_sw_formats[i] == AV_PIX_FMT_NONE) {
-            av_log(ctx, AV_LOG_ERROR, "Hardware does not support output "
-                   "format %s.\n", av_get_pix_fmt_name(ctx->output_format));
-            err = AVERROR(EINVAL);
-            goto fail;
-        }
-    }
+    AVFilterLink *inlink     = outlink->src->inputs[0];
+    AVFilterContext *avctx   = outlink->src;
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+    ScaleVAAPIContext *ctx   = avctx->priv;
+    int err;
 
     if ((err = ff_scale_eval_dimensions(ctx,
                                         ctx->w_expr, ctx->h_expr,
                                         inlink, outlink,
-                                        &ctx->output_width, &ctx->output_height)) < 0)
-        goto fail;
+                                        &vpp_ctx->output_width, &vpp_ctx->output_height)) < 0)
+        return err;
 
-    if (ctx->output_width  < constraints->min_width  ||
-        ctx->output_height < constraints->min_height ||
-        ctx->output_width  > constraints->max_width  ||
-        ctx->output_height > constraints->max_height) {
-        av_log(ctx, AV_LOG_ERROR, "Hardware does not support scaling to "
-               "size %dx%d (constraints: width %d-%d height %d-%d).\n",
-               ctx->output_width, ctx->output_height,
-               constraints->min_width,  constraints->max_width,
-               constraints->min_height, constraints->max_height);
-        err = AVERROR(EINVAL);
-        goto fail;
-    }
+    err = ff_vaapi_vpp_config_output(outlink);
+    if (err < 0)
+        return err;
 
-    ctx->output_frames_ref = av_hwframe_ctx_alloc(ctx->device_ref);
-    if (!ctx->output_frames_ref) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to create HW frame context "
-               "for output.\n");
-        err = AVERROR(ENOMEM);
-        goto fail;
-    }
+    if (inlink->sample_aspect_ratio.num)
+        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio);
+    else
+        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
 
-    ctx->output_frames = (AVHWFramesContext*)ctx->output_frames_ref->data;
-
-    ctx->output_frames->format    = AV_PIX_FMT_VAAPI;
-    ctx->output_frames->sw_format = ctx->output_format;
-    ctx->output_frames->width     = ctx->output_width;
-    ctx->output_frames->height    = ctx->output_height;
-
-    // The number of output frames we need is determined by what follows
-    // the filter.  If it's an encoder with complex frame reference
-    // structures then this could be very high.
-    ctx->output_frames->initial_pool_size = 10;
-
-    err = av_hwframe_ctx_init(ctx->output_frames_ref);
-    if (err < 0) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to initialise VAAPI frame "
-               "context for output: %d\n", err);
-        goto fail;
-    }
-
-    va_frames = ctx->output_frames->hwctx;
-
-    av_assert0(ctx->va_context == VA_INVALID_ID);
-    vas = vaCreateContext(ctx->hwctx->display, ctx->va_config,
-                          ctx->output_width, ctx->output_height,
-                          VA_PROGRESSIVE,
-                          va_frames->surface_ids, va_frames->nb_surfaces,
-                          &ctx->va_context);
-    if (vas != VA_STATUS_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to create processing pipeline "
-               "context: %d (%s).\n", vas, vaErrorStr(vas));
-        return AVERROR(EIO);
-    }
-
-    outlink->w = ctx->output_width;
-    outlink->h = ctx->output_height;
-
-    outlink->hw_frames_ctx = av_buffer_ref(ctx->output_frames_ref);
-    if (!outlink->hw_frames_ctx) {
-        err = AVERROR(ENOMEM);
-        goto fail;
-    }
-
-    av_freep(&hwconfig);
-    av_hwframe_constraints_free(&constraints);
     return 0;
-
-fail:
-    av_buffer_unref(&ctx->output_frames_ref);
-    av_freep(&hwconfig);
-    av_hwframe_constraints_free(&constraints);
-    return err;
-}
-
-static int vaapi_proc_colour_standard(enum AVColorSpace av_cs)
-{
-    switch(av_cs) {
-#define CS(av, va) case AVCOL_SPC_ ## av: return VAProcColorStandard ## va;
-        CS(BT709,     BT709);
-        CS(BT470BG,   BT601);
-        CS(SMPTE170M, SMPTE170M);
-        CS(SMPTE240M, SMPTE240M);
-#undef CS
-    default:
-        return VAProcColorStandardNone;
-    }
 }
 
 static int scale_vaapi_filter_frame(AVFilterLink *inlink, AVFrame *input_frame)
 {
-    AVFilterContext *avctx = inlink->dst;
-    AVFilterLink *outlink = avctx->outputs[0];
-    ScaleVAAPIContext *ctx = avctx->priv;
-    AVFrame *output_frame = NULL;
+    AVFilterContext *avctx   = inlink->dst;
+    AVFilterLink *outlink    = avctx->outputs[0];
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+    AVFrame *output_frame    = NULL;
     VASurfaceID input_surface, output_surface;
     VAProcPipelineParameterBuffer params;
-    VABufferID params_id;
     VARectangle input_region;
-    VAStatus vas;
     int err;
 
-    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+    av_log(avctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
            av_get_pix_fmt_name(input_frame->format),
            input_frame->width, input_frame->height, input_frame->pts);
 
-    if (ctx->va_context == VA_INVALID_ID)
+    if (vpp_ctx->va_context == VA_INVALID_ID)
         return AVERROR(EINVAL);
 
     input_surface = (VASurfaceID)(uintptr_t)input_frame->data[3];
-    av_log(ctx, AV_LOG_DEBUG, "Using surface %#x for scale input.\n",
+    av_log(avctx, AV_LOG_DEBUG, "Using surface %#x for scale input.\n",
            input_surface);
 
-    output_frame = ff_get_video_buffer(outlink, ctx->output_width,
-                                       ctx->output_height);
+    output_frame = ff_get_video_buffer(outlink, vpp_ctx->output_width,
+                                       vpp_ctx->output_height);
     if (!output_frame) {
         err = AVERROR(ENOMEM);
         goto fail;
     }
 
     output_surface = (VASurfaceID)(uintptr_t)output_frame->data[3];
-    av_log(ctx, AV_LOG_DEBUG, "Using surface %#x for scale output.\n",
+    av_log(avctx, AV_LOG_DEBUG, "Using surface %#x for scale output.\n",
            output_surface);
 
     memset(&params, 0, sizeof(params));
 
-    // If there were top/left cropping, it could be taken into
-    // account here.
     input_region = (VARectangle) {
-        .x      = 0,
-        .y      = 0,
-        .width  = input_frame->width,
-        .height = input_frame->height,
+        .x      = input_frame->crop_left,
+        .y      = input_frame->crop_top,
+        .width  = input_frame->width -
+                 (input_frame->crop_left + input_frame->crop_right),
+        .height = input_frame->height -
+                 (input_frame->crop_top + input_frame->crop_bottom),
     };
 
     params.surface = input_surface;
     params.surface_region = &input_region;
     params.surface_color_standard =
-        vaapi_proc_colour_standard(input_frame->colorspace);
+        ff_vaapi_vpp_colour_standard(input_frame->colorspace);
 
     params.output_region = 0;
     params.output_background_color = 0xff000000;
@@ -323,71 +121,22 @@
     params.pipeline_flags = 0;
     params.filter_flags = VA_FILTER_SCALING_HQ;
 
-    vas = vaBeginPicture(ctx->hwctx->display,
-                         ctx->va_context, output_surface);
-    if (vas != VA_STATUS_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to attach new picture: "
-               "%d (%s).\n", vas, vaErrorStr(vas));
-        err = AVERROR(EIO);
+    err = ff_vaapi_vpp_render_picture(avctx, &params, output_surface);
+    if (err < 0)
         goto fail;
-    }
 
-    vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
-                         VAProcPipelineParameterBufferType,
-                         sizeof(params), 1, &params, &params_id);
-    if (vas != VA_STATUS_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to create parameter buffer: "
-               "%d (%s).\n", vas, vaErrorStr(vas));
-        err = AVERROR(EIO);
-        goto fail_after_begin;
-    }
-    av_log(ctx, AV_LOG_DEBUG, "Pipeline parameter buffer is %#x.\n",
-           params_id);
+    err = av_frame_copy_props(output_frame, input_frame);
+    if (err < 0)
+        goto fail;
 
-    vas = vaRenderPicture(ctx->hwctx->display, ctx->va_context,
-                          &params_id, 1);
-    if (vas != VA_STATUS_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to render parameter buffer: "
-               "%d (%s).\n", vas, vaErrorStr(vas));
-        err = AVERROR(EIO);
-        goto fail_after_begin;
-    }
-
-    vas = vaEndPicture(ctx->hwctx->display, ctx->va_context);
-    if (vas != VA_STATUS_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to start picture processing: "
-               "%d (%s).\n", vas, vaErrorStr(vas));
-        err = AVERROR(EIO);
-        goto fail_after_render;
-    }
-
-    if (CONFIG_VAAPI_1 || ctx->hwctx->driver_quirks &
-        AV_VAAPI_DRIVER_QUIRK_RENDER_PARAM_BUFFERS) {
-        vas = vaDestroyBuffer(ctx->hwctx->display, params_id);
-        if (vas != VA_STATUS_SUCCESS) {
-            av_log(ctx, AV_LOG_ERROR, "Failed to free parameter buffer: "
-                   "%d (%s).\n", vas, vaErrorStr(vas));
-            // And ignore.
-        }
-    }
-
-    av_frame_copy_props(output_frame, input_frame);
     av_frame_free(&input_frame);
 
-    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+    av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
            av_get_pix_fmt_name(output_frame->format),
            output_frame->width, output_frame->height, output_frame->pts);
 
     return ff_filter_frame(outlink, output_frame);
 
-    // We want to make sure that if vaBeginPicture has been called, we also
-    // call vaRenderPicture and vaEndPicture.  These calls may well fail or
-    // do something else nasty, but once we're in this failure case there
-    // isn't much else we can do.
-fail_after_begin:
-    vaRenderPicture(ctx->hwctx->display, ctx->va_context, &params_id, 1);
-fail_after_render:
-    vaEndPicture(ctx->hwctx->display, ctx->va_context);
 fail:
     av_frame_free(&input_frame);
     av_frame_free(&output_frame);
@@ -396,39 +145,26 @@
 
 static av_cold int scale_vaapi_init(AVFilterContext *avctx)
 {
-    ScaleVAAPIContext *ctx = avctx->priv;
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+    ScaleVAAPIContext *ctx   = avctx->priv;
 
-    ctx->va_config  = VA_INVALID_ID;
-    ctx->va_context = VA_INVALID_ID;
-    ctx->valid_ids  = 1;
+    ff_vaapi_vpp_ctx_init(avctx);
+    vpp_ctx->pipeline_uninit = ff_vaapi_vpp_pipeline_uninit;
 
     if (ctx->output_format_string) {
-        ctx->output_format = av_get_pix_fmt(ctx->output_format_string);
-        if (ctx->output_format == AV_PIX_FMT_NONE) {
-            av_log(ctx, AV_LOG_ERROR, "Invalid output format.\n");
+        vpp_ctx->output_format = av_get_pix_fmt(ctx->output_format_string);
+        if (vpp_ctx->output_format == AV_PIX_FMT_NONE) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid output format.\n");
             return AVERROR(EINVAL);
         }
     } else {
         // Use the input format once that is configured.
-        ctx->output_format = AV_PIX_FMT_NONE;
+        vpp_ctx->output_format = AV_PIX_FMT_NONE;
     }
 
     return 0;
 }
 
-static av_cold void scale_vaapi_uninit(AVFilterContext *avctx)
-{
-    ScaleVAAPIContext *ctx = avctx->priv;
-
-    if (ctx->valid_ids)
-        scale_vaapi_pipeline_uninit(ctx);
-
-    av_buffer_unref(&ctx->input_frames_ref);
-    av_buffer_unref(&ctx->output_frames_ref);
-    av_buffer_unref(&ctx->device_ref);
-}
-
-
 #define OFFSET(x) offsetof(ScaleVAAPIContext, x)
 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
 static const AVOption scale_vaapi_options[] = {
@@ -441,19 +177,14 @@
     { NULL },
 };
 
-static const AVClass scale_vaapi_class = {
-    .class_name = "scale_vaapi",
-    .item_name  = av_default_item_name,
-    .option     = scale_vaapi_options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
+AVFILTER_DEFINE_CLASS(scale_vaapi);
 
 static const AVFilterPad scale_vaapi_inputs[] = {
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
         .filter_frame = &scale_vaapi_filter_frame,
-        .config_props = &scale_vaapi_config_input,
+        .config_props = &ff_vaapi_vpp_config_input,
     },
     { NULL }
 };
@@ -472,8 +203,8 @@
     .description   = NULL_IF_CONFIG_SMALL("Scale to/from VAAPI surfaces."),
     .priv_size     = sizeof(ScaleVAAPIContext),
     .init          = &scale_vaapi_init,
-    .uninit        = &scale_vaapi_uninit,
-    .query_formats = &scale_vaapi_query_formats,
+    .uninit        = &ff_vaapi_vpp_ctx_uninit,
+    .query_formats = &ff_vaapi_vpp_query_formats,
     .inputs        = scale_vaapi_inputs,
     .outputs       = scale_vaapi_outputs,
     .priv_class    = &scale_vaapi_class,

diff --git a/libavfilter/vf_selectivecolor.c b/libavfilter/vf_selectivecolor.c
index c4d51bb..b99f31b 100644
--- a/libavfilter/vf_selectivecolor.c
+++ b/libavfilter/vf_selectivecolor.c

@@ -344,7 +344,7 @@
             const int max_color = FFMAX3(r, g, b);                                                      \
             const int is_white   = (r > 1<<(nbits-1) && g > 1<<(nbits-1) && b > 1<<(nbits-1));          \
             const int is_neutral = (r || g || b) &&                                                     \
-                                   r != (1<<nbits)-1 && g != (1<<nbits)-1 && b != (1<<nbits)-1;         \
+                                   (r != (1<<nbits)-1 || g != (1<<nbits)-1 || b != (1<<nbits)-1);       \
             const int is_black   = (r < 1<<(nbits-1) && g < 1<<(nbits-1) && b < 1<<(nbits-1));          \
             const uint32_t range_flag = (r == max_color) << RANGE_REDS                                  \
                                       | (r == min_color) << RANGE_CYANS                                 \

diff --git a/libavfilter/vf_setparams.c b/libavfilter/vf_setparams.c
new file mode 100644
index 0000000..8427f98
--- /dev/null
+++ b/libavfilter/vf_setparams.c

@@ -0,0 +1,83 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/pixfmt.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct SetParamsContext {
+    const AVClass *class;
+    int color_range;
+} SetParamsContext;
+
+#define OFFSET(x) offsetof(SetParamsContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption setrange_options[] = {
+    {"range", "select color range", OFFSET(color_range), AV_OPT_TYPE_INT, {.i64=-1},-1, AVCOL_RANGE_NB-1, FLAGS, "range"},
+    {"auto",  "keep the same color range",   0, AV_OPT_TYPE_CONST, {.i64=-1},                       0, 0, FLAGS, "range"},
+    {"unspecified",                  NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_UNSPECIFIED},  0, 0, FLAGS, "range"},
+    {"unknown",                      NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_UNSPECIFIED},  0, 0, FLAGS, "range"},
+    {"limited",                      NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_MPEG},         0, 0, FLAGS, "range"},
+    {"tv",                           NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_MPEG},         0, 0, FLAGS, "range"},
+    {"mpeg",                         NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_MPEG},         0, 0, FLAGS, "range"},
+    {"full",                         NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_JPEG},         0, 0, FLAGS, "range"},
+    {"pc",                           NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_JPEG},         0, 0, FLAGS, "range"},
+    {"jpeg",                         NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_JPEG},         0, 0, FLAGS, "range"},
+    {NULL}
+};
+
+AVFILTER_DEFINE_CLASS(setrange);
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    SetParamsContext *s = ctx->priv;
+
+    if (s->color_range >= 0)
+        frame->color_range = s->color_range;
+    return ff_filter_frame(ctx->outputs[0], frame);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_setrange = {
+    .name        = "setrange",
+    .description = NULL_IF_CONFIG_SMALL("Force color range for the output video frame."),
+    .priv_size   = sizeof(SetParamsContext),
+    .priv_class  = &setrange_class,
+    .inputs      = inputs,
+    .outputs     = outputs,
+};

diff --git a/libavfilter/vf_showinfo.c b/libavfilter/vf_showinfo.c
index 14b8aa4..d1d1415 100644
--- a/libavfilter/vf_showinfo.c
+++ b/libavfilter/vf_showinfo.c

@@ -29,6 +29,7 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
 #include "libavutil/pixdesc.h"
+#include "libavutil/spherical.h"
 #include "libavutil/stereo3d.h"
 #include "libavutil/timestamp.h"
 
@@ -36,6 +37,45 @@
 #include "internal.h"
 #include "video.h"
 
+static void dump_spherical(AVFilterContext *ctx, AVFrame *frame, AVFrameSideData *sd)
+{
+    AVSphericalMapping *spherical = (AVSphericalMapping *)sd->data;
+    double yaw, pitch, roll;
+
+    av_log(ctx, AV_LOG_INFO, "spherical information: ");
+    if (sd->size < sizeof(*spherical)) {
+        av_log(ctx, AV_LOG_INFO, "invalid data");
+        return;
+    }
+
+    if (spherical->projection == AV_SPHERICAL_EQUIRECTANGULAR)
+        av_log(ctx, AV_LOG_INFO, "equirectangular ");
+    else if (spherical->projection == AV_SPHERICAL_CUBEMAP)
+        av_log(ctx, AV_LOG_INFO, "cubemap ");
+    else if (spherical->projection == AV_SPHERICAL_EQUIRECTANGULAR_TILE)
+        av_log(ctx, AV_LOG_INFO, "tiled equirectangular ");
+    else {
+        av_log(ctx, AV_LOG_WARNING, "unknown");
+        return;
+    }
+
+    yaw = ((double)spherical->yaw) / (1 << 16);
+    pitch = ((double)spherical->pitch) / (1 << 16);
+    roll = ((double)spherical->roll) / (1 << 16);
+    av_log(ctx, AV_LOG_INFO, "(%f/%f/%f) ", yaw, pitch, roll);
+
+    if (spherical->projection == AV_SPHERICAL_EQUIRECTANGULAR_TILE) {
+        size_t l, t, r, b;
+        av_spherical_tile_bounds(spherical, frame->width, frame->height,
+                                 &l, &t, &r, &b);
+        av_log(ctx, AV_LOG_INFO,
+               "[%"SIZE_SPECIFIER", %"SIZE_SPECIFIER", %"SIZE_SPECIFIER", %"SIZE_SPECIFIER"] ",
+               l, t, r, b);
+    } else if (spherical->projection == AV_SPHERICAL_CUBEMAP) {
+        av_log(ctx, AV_LOG_INFO, "[pad %"PRIu32"] ", spherical->padding);
+    }
+}
+
 static void dump_stereo3d(AVFilterContext *ctx, AVFrameSideData *sd)
 {
     AVStereo3D *stereo;
@@ -48,19 +88,7 @@
 
     stereo = (AVStereo3D *)sd->data;
 
-    av_log(ctx, AV_LOG_INFO, "type - ");
-    switch (stereo->type) {
-    case AV_STEREO3D_2D:                  av_log(ctx, AV_LOG_INFO, "2D");                     break;
-    case AV_STEREO3D_SIDEBYSIDE:          av_log(ctx, AV_LOG_INFO, "side by side");           break;
-    case AV_STEREO3D_TOPBOTTOM:           av_log(ctx, AV_LOG_INFO, "top and bottom");         break;
-    case AV_STEREO3D_FRAMESEQUENCE:       av_log(ctx, AV_LOG_INFO, "frame alternate");        break;
-    case AV_STEREO3D_CHECKERBOARD:        av_log(ctx, AV_LOG_INFO, "checkerboard");           break;
-    case AV_STEREO3D_LINES:               av_log(ctx, AV_LOG_INFO, "interleaved lines");      break;
-    case AV_STEREO3D_COLUMNS:             av_log(ctx, AV_LOG_INFO, "interleaved columns");    break;
-    case AV_STEREO3D_SIDEBYSIDE_QUINCUNX: av_log(ctx, AV_LOG_INFO, "side by side "
-                                                                   "(quincunx subsampling)"); break;
-    default:                              av_log(ctx, AV_LOG_WARNING, "unknown");             break;
-    }
+    av_log(ctx, AV_LOG_INFO, "type - %s", av_stereo3d_type_name(stereo->type));
 
     if (stereo->flags & AV_STEREO3D_FLAG_INVERT)
         av_log(ctx, AV_LOG_INFO, " (inverted)");
@@ -140,6 +168,9 @@
         case AV_FRAME_DATA_A53_CC:
             av_log(ctx, AV_LOG_INFO, "A/53 closed captions (%d bytes)", sd->size);
             break;
+        case AV_FRAME_DATA_SPHERICAL:
+            dump_spherical(ctx, frame, sd);
+            break;
         case AV_FRAME_DATA_STEREO3D:
             dump_stereo3d(ctx, sd);
             break;

diff --git a/libavfilter/vf_shuffleplanes.c b/libavfilter/vf_shuffleplanes.c
index 4bc7b79..5d1302d 100644
--- a/libavfilter/vf_shuffleplanes.c
+++ b/libavfilter/vf_shuffleplanes.c

@@ -69,7 +69,7 @@
         }
 
         if ((desc->flags & AV_PIX_FMT_FLAG_PAL ||
-             desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) &&
+             desc->flags & FF_PSEUDOPAL) &&
             (i == 1) != (s->map[i] == 1)) {
             av_log(ctx, AV_LOG_ERROR,
                    "Cannot map between a palette plane and a data plane.\n");
@@ -158,10 +158,9 @@
 AVFilter ff_vf_shuffleplanes = {
     .name         = "shuffleplanes",
     .description  = NULL_IF_CONFIG_SMALL("Shuffle video planes."),
-
     .priv_size    = sizeof(ShufflePlanesContext),
     .priv_class   = &shuffleplanes_class,
-
     .inputs       = shuffleplanes_inputs,
     .outputs      = shuffleplanes_outputs,
+    .flags        = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
 };

diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c
new file mode 100644
index 0000000..6423d2e
--- /dev/null
+++ b/libavfilter/vf_sr.c

@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 2018 Sergey Lavrushkin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Filter implementing image super-resolution using deep convolutional networks.
+ * https://arxiv.org/abs/1501.00092
+ * https://arxiv.org/abs/1609.05158
+ */
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "libavutil/opt.h"
+#include "libavformat/avio.h"
+#include "libswscale/swscale.h"
+#include "dnn_interface.h"
+
+typedef struct SRContext {
+    const AVClass *class;
+
+    char *model_filename;
+    DNNBackendType backend_type;
+    DNNModule *dnn_module;
+    DNNModel *model;
+    DNNData input, output;
+    int scale_factor;
+    struct SwsContext *sws_contexts[3];
+    int sws_slice_h, sws_input_linesize, sws_output_linesize;
+} SRContext;
+
+#define OFFSET(x) offsetof(SRContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
+static const AVOption sr_options[] = {
+    { "dnn_backend", "DNN backend used for model execution", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" },
+    { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" },
+#if (CONFIG_LIBTENSORFLOW == 1)
+    { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" },
+#endif
+    { "scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS },
+    { "model", "path to model file specifying network architecture and its parameters", OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(sr);
+
+static av_cold int init(AVFilterContext *context)
+{
+    SRContext *sr_context = context->priv;
+
+    sr_context->dnn_module = ff_get_dnn_module(sr_context->backend_type);
+    if (!sr_context->dnn_module){
+        av_log(context, AV_LOG_ERROR, "could not create DNN module for requested backend\n");
+        return AVERROR(ENOMEM);
+    }
+    if (!sr_context->model_filename){
+        av_log(context, AV_LOG_ERROR, "model file for network was not specified\n");
+        return AVERROR(EIO);
+    }
+    else{
+        if (!sr_context->dnn_module->load_model) {
+            av_log(context, AV_LOG_ERROR, "load_model for network was not specified\n");
+            return AVERROR(EIO);
+        } else {
+            sr_context->model = (sr_context->dnn_module->load_model)(sr_context->model_filename);
+        }
+    }
+    if (!sr_context->model){
+        av_log(context, AV_LOG_ERROR, "could not load DNN model\n");
+        return AVERROR(EIO);
+    }
+
+    sr_context->sws_contexts[0] = NULL;
+    sr_context->sws_contexts[1] = NULL;
+    sr_context->sws_contexts[2] = NULL;
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *context)
+{
+    const enum AVPixelFormat pixel_formats[] = {AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
+                                                AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_GRAY8,
+                                                AV_PIX_FMT_NONE};
+    AVFilterFormats *formats_list;
+
+    formats_list = ff_make_format_list(pixel_formats);
+    if (!formats_list){
+        av_log(context, AV_LOG_ERROR, "could not create formats list\n");
+        return AVERROR(ENOMEM);
+    }
+
+    return ff_set_common_formats(context, formats_list);
+}
+
+static int config_props(AVFilterLink *inlink)
+{
+    AVFilterContext *context = inlink->dst;
+    SRContext *sr_context = context->priv;
+    AVFilterLink *outlink = context->outputs[0];
+    DNNReturnType result;
+    int sws_src_h, sws_src_w, sws_dst_h, sws_dst_w;
+
+    sr_context->input.width = inlink->w * sr_context->scale_factor;
+    sr_context->input.height = inlink->h * sr_context->scale_factor;
+    sr_context->input.channels = 1;
+
+    result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, &sr_context->output);
+    if (result != DNN_SUCCESS){
+        av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n");
+        return AVERROR(EIO);
+    }
+    else{
+        if (sr_context->input.height != sr_context->output.height || sr_context->input.width != sr_context->output.width){
+            sr_context->input.width = inlink->w;
+            sr_context->input.height = inlink->h;
+            result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, &sr_context->output);
+            if (result != DNN_SUCCESS){
+                av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n");
+                return AVERROR(EIO);
+            }
+            sr_context->scale_factor = 0;
+        }
+        outlink->h = sr_context->output.height;
+        outlink->w = sr_context->output.width;
+        sr_context->sws_contexts[1] = sws_getContext(sr_context->input.width, sr_context->input.height, AV_PIX_FMT_GRAY8,
+                                                     sr_context->input.width, sr_context->input.height, AV_PIX_FMT_GRAYF32,
+                                                     0, NULL, NULL, NULL);
+        sr_context->sws_input_linesize = sr_context->input.width << 2;
+        sr_context->sws_contexts[2] = sws_getContext(sr_context->output.width, sr_context->output.height, AV_PIX_FMT_GRAYF32,
+                                                     sr_context->output.width, sr_context->output.height, AV_PIX_FMT_GRAY8,
+                                                     0, NULL, NULL, NULL);
+        sr_context->sws_output_linesize = sr_context->output.width << 2;
+        if (!sr_context->sws_contexts[1] || !sr_context->sws_contexts[2]){
+            av_log(context, AV_LOG_ERROR, "could not create SwsContext for conversions\n");
+            return AVERROR(ENOMEM);
+        }
+        if (sr_context->scale_factor){
+            sr_context->sws_contexts[0] = sws_getContext(inlink->w, inlink->h, inlink->format,
+                                                         outlink->w, outlink->h, outlink->format,
+                                                         SWS_BICUBIC, NULL, NULL, NULL);
+            if (!sr_context->sws_contexts[0]){
+                av_log(context, AV_LOG_ERROR, "could not create SwsContext for scaling\n");
+                return AVERROR(ENOMEM);
+            }
+            sr_context->sws_slice_h = inlink->h;
+        }
+        else{
+            if (inlink->format != AV_PIX_FMT_GRAY8){
+                sws_src_h = sr_context->input.height;
+                sws_src_w = sr_context->input.width;
+                sws_dst_h = sr_context->output.height;
+                sws_dst_w = sr_context->output.width;
+
+                switch (inlink->format){
+                case AV_PIX_FMT_YUV420P:
+                    sws_src_h = AV_CEIL_RSHIFT(sws_src_h, 1);
+                    sws_src_w = AV_CEIL_RSHIFT(sws_src_w, 1);
+                    sws_dst_h = AV_CEIL_RSHIFT(sws_dst_h, 1);
+                    sws_dst_w = AV_CEIL_RSHIFT(sws_dst_w, 1);
+                    break;
+                case AV_PIX_FMT_YUV422P:
+                    sws_src_w = AV_CEIL_RSHIFT(sws_src_w, 1);
+                    sws_dst_w = AV_CEIL_RSHIFT(sws_dst_w, 1);
+                    break;
+                case AV_PIX_FMT_YUV444P:
+                    break;
+                case AV_PIX_FMT_YUV410P:
+                    sws_src_h = AV_CEIL_RSHIFT(sws_src_h, 2);
+                    sws_src_w = AV_CEIL_RSHIFT(sws_src_w, 2);
+                    sws_dst_h = AV_CEIL_RSHIFT(sws_dst_h, 2);
+                    sws_dst_w = AV_CEIL_RSHIFT(sws_dst_w, 2);
+                    break;
+                case AV_PIX_FMT_YUV411P:
+                    sws_src_w = AV_CEIL_RSHIFT(sws_src_w, 2);
+                    sws_dst_w = AV_CEIL_RSHIFT(sws_dst_w, 2);
+                    break;
+                default:
+                    av_log(context, AV_LOG_ERROR, "could not create SwsContext for scaling for given input pixel format");
+                    return AVERROR(EIO);
+                }
+                sr_context->sws_contexts[0] = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8,
+                                                             sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8,
+                                                             SWS_BICUBIC, NULL, NULL, NULL);
+                if (!sr_context->sws_contexts[0]){
+                    av_log(context, AV_LOG_ERROR, "could not create SwsContext for scaling\n");
+                    return AVERROR(ENOMEM);
+                }
+                sr_context->sws_slice_h = sws_src_h;
+            }
+        }
+
+        return 0;
+    }
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *context = inlink->dst;
+    SRContext *sr_context = context->priv;
+    AVFilterLink *outlink = context->outputs[0];
+    AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    DNNReturnType dnn_result;
+
+    if (!out){
+        av_log(context, AV_LOG_ERROR, "could not allocate memory for output frame\n");
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    av_frame_copy_props(out, in);
+    out->height = sr_context->output.height;
+    out->width = sr_context->output.width;
+    if (sr_context->scale_factor){
+        sws_scale(sr_context->sws_contexts[0], (const uint8_t **)in->data, in->linesize,
+                  0, sr_context->sws_slice_h, out->data, out->linesize);
+
+        sws_scale(sr_context->sws_contexts[1], (const uint8_t **)out->data, out->linesize,
+                  0, out->height, (uint8_t * const*)(&sr_context->input.data),
+                  (const int [4]){sr_context->sws_input_linesize, 0, 0, 0});
+    }
+    else{
+        if (sr_context->sws_contexts[0]){
+            sws_scale(sr_context->sws_contexts[0], (const uint8_t **)(in->data + 1), in->linesize + 1,
+                      0, sr_context->sws_slice_h, out->data + 1, out->linesize + 1);
+            sws_scale(sr_context->sws_contexts[0], (const uint8_t **)(in->data + 2), in->linesize + 2,
+                      0, sr_context->sws_slice_h, out->data + 2, out->linesize + 2);
+        }
+
+        sws_scale(sr_context->sws_contexts[1], (const uint8_t **)in->data, in->linesize,
+                  0, in->height, (uint8_t * const*)(&sr_context->input.data),
+                  (const int [4]){sr_context->sws_input_linesize, 0, 0, 0});
+    }
+    av_frame_free(&in);
+
+    dnn_result = (sr_context->dnn_module->execute_model)(sr_context->model);
+    if (dnn_result != DNN_SUCCESS){
+        av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n");
+        return AVERROR(EIO);
+    }
+
+    sws_scale(sr_context->sws_contexts[2], (const uint8_t *[4]){(const uint8_t *)sr_context->output.data, 0, 0, 0},
+              (const int[4]){sr_context->sws_output_linesize, 0, 0, 0},
+              0, out->height, (uint8_t * const*)out->data, out->linesize);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static av_cold void uninit(AVFilterContext *context)
+{
+    int i;
+    SRContext *sr_context = context->priv;
+
+    if (sr_context->dnn_module){
+        (sr_context->dnn_module->free_model)(&sr_context->model);
+        av_freep(&sr_context->dnn_module);
+    }
+
+    for (i = 0; i < 3; ++i){
+        if (sr_context->sws_contexts[i]){
+            sws_freeContext(sr_context->sws_contexts[i]);
+        }
+    }
+}
+
+static const AVFilterPad sr_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_props,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad sr_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_sr = {
+    .name          = "sr",
+    .description   = NULL_IF_CONFIG_SMALL("Apply DNN-based image super resolution to the input."),
+    .priv_size     = sizeof(SRContext),
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = sr_inputs,
+    .outputs       = sr_outputs,
+    .priv_class    = &sr_class,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
+

diff --git a/libavfilter/vf_ssim.c b/libavfilter/vf_ssim.c
index 4dcdc05..4c957f4 100644
--- a/libavfilter/vf_ssim.c
+++ b/libavfilter/vf_ssim.c

@@ -359,7 +359,7 @@
 {
     static const enum AVPixelFormat pix_fmts[] = {
         AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10,
-        AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
         AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
         AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,

diff --git a/libavfilter/vf_stack.c b/libavfilter/vf_stack.c
index 2467302..b2b8c68 100644
--- a/libavfilter/vf_stack.c
+++ b/libavfilter/vf_stack.c

@@ -105,6 +105,7 @@
     if (!out)
         return AVERROR(ENOMEM);
     out->pts = av_rescale_q(s->fs.pts, s->fs.time_base, outlink->time_base);
+    out->sample_aspect_ratio = outlink->sample_aspect_ratio;
 
     for (i = 0; i < s->nb_inputs; i++) {
         AVFilterLink *inlink = ctx->inputs[i];
@@ -147,6 +148,7 @@
     StackContext *s = ctx->priv;
     AVRational time_base = ctx->inputs[0]->time_base;
     AVRational frame_rate = ctx->inputs[0]->frame_rate;
+    AVRational sar = ctx->inputs[0]->sample_aspect_ratio;
     int height = ctx->inputs[0]->h;
     int width = ctx->inputs[0]->w;
     FFFrameSyncIn *in;
@@ -179,6 +181,7 @@
     outlink->h          = height;
     outlink->time_base  = time_base;
     outlink->frame_rate = frame_rate;
+    outlink->sample_aspect_ratio = sar;
 
     if ((ret = ff_framesync_init(&s->fs, ctx, s->nb_inputs)) < 0)
         return ret;

diff --git a/libavfilter/vf_subtitles.c b/libavfilter/vf_subtitles.c
index 66a5646..a7b0246 100644
--- a/libavfilter/vf_subtitles.c
+++ b/libavfilter/vf_subtitles.c

@@ -413,7 +413,7 @@
      *
      * That API is old and needs to be reworked to match behaviour with A/V.
      */
-    av_codec_set_pkt_timebase(dec_ctx, st->time_base);
+    dec_ctx->pkt_timebase = st->time_base;
 
     ret = avcodec_open2(dec_ctx, NULL, &codec_opts);
     if (ret < 0)

diff --git a/libavfilter/vf_swaprect.c b/libavfilter/vf_swaprect.c
index f96f897..f1fab1e 100644
--- a/libavfilter/vf_swaprect.c
+++ b/libavfilter/vf_swaprect.c

@@ -151,32 +151,32 @@
     x2[0] = av_clip(x2[0], 0, inlink->w - 1);
     y2[0] = av_clip(y2[0], 0, inlink->w - 1);
 
-    ah[1] = ah[2] = FF_CEIL_RSHIFT(h, s->desc->log2_chroma_h);
+    ah[1] = ah[2] = AV_CEIL_RSHIFT(h, s->desc->log2_chroma_h);
     ah[0] = ah[3] = h;
-    aw[1] = aw[2] = FF_CEIL_RSHIFT(w, s->desc->log2_chroma_w);
+    aw[1] = aw[2] = AV_CEIL_RSHIFT(w, s->desc->log2_chroma_w);
     aw[0] = aw[3] = w;
 
     w = FFMIN3(w, inlink->w - x1[0], inlink->w - x2[0]);
     h = FFMIN3(h, inlink->h - y1[0], inlink->h - y2[0]);
 
-    ph[1] = ph[2] = FF_CEIL_RSHIFT(h, s->desc->log2_chroma_h);
+    ph[1] = ph[2] = AV_CEIL_RSHIFT(h, s->desc->log2_chroma_h);
     ph[0] = ph[3] = h;
-    pw[1] = pw[2] = FF_CEIL_RSHIFT(w, s->desc->log2_chroma_w);
+    pw[1] = pw[2] = AV_CEIL_RSHIFT(w, s->desc->log2_chroma_w);
     pw[0] = pw[3] = w;
 
-    lh[1] = lh[2] = FF_CEIL_RSHIFT(inlink->h, s->desc->log2_chroma_h);
+    lh[1] = lh[2] = AV_CEIL_RSHIFT(inlink->h, s->desc->log2_chroma_h);
     lh[0] = lh[3] = inlink->h;
-    lw[1] = lw[2] = FF_CEIL_RSHIFT(inlink->w, s->desc->log2_chroma_w);
+    lw[1] = lw[2] = AV_CEIL_RSHIFT(inlink->w, s->desc->log2_chroma_w);
     lw[0] = lw[3] = inlink->w;
 
-    x1[1] = x1[2] = FF_CEIL_RSHIFT(x1[0], s->desc->log2_chroma_w);
+    x1[1] = x1[2] = AV_CEIL_RSHIFT(x1[0], s->desc->log2_chroma_w);
     x1[0] = x1[3] = x1[0];
-    y1[1] = y1[2] = FF_CEIL_RSHIFT(y1[0], s->desc->log2_chroma_h);
+    y1[1] = y1[2] = AV_CEIL_RSHIFT(y1[0], s->desc->log2_chroma_h);
     y1[0] = y1[3] = y1[0];
 
-    x2[1] = x2[2] = FF_CEIL_RSHIFT(x2[0], s->desc->log2_chroma_w);
+    x2[1] = x2[2] = AV_CEIL_RSHIFT(x2[0], s->desc->log2_chroma_w);
     x2[0] = x2[3] = x2[0];
-    y2[1] = y2[2] = FF_CEIL_RSHIFT(y2[0], s->desc->log2_chroma_h);
+    y2[1] = y2[2] = AV_CEIL_RSHIFT(y2[0], s->desc->log2_chroma_h);
     y2[0] = y2[3] = y2[0];
 
     for (p = 0; p < s->nb_planes; p++) {

diff --git a/libavfilter/vf_threshold.c b/libavfilter/vf_threshold.c
index 88f6ef2..cf2535d 100644
--- a/libavfilter/vf_threshold.c
+++ b/libavfilter/vf_threshold.c

@@ -31,27 +31,7 @@
 #include "framesync.h"
 #include "internal.h"
 #include "video.h"
-
-typedef struct ThresholdContext {
-    const AVClass *class;
-
-    int planes;
-    int bpc;
-
-    int nb_planes;
-    int width[4], height[4];
-
-    void (*threshold)(const uint8_t *in, const uint8_t *threshold,
-                      const uint8_t *min, const uint8_t *max,
-                      uint8_t *out,
-                      ptrdiff_t ilinesize, ptrdiff_t tlinesize,
-                      ptrdiff_t flinesize, ptrdiff_t slinesize,
-                      ptrdiff_t olinesize,
-                      int w, int h);
-
-    AVFrame *frames[4];
-    FFFrameSync fs;
-} ThresholdContext;
+#include "threshold.h"
 
 #define OFFSET(x) offsetof(ThresholdContext, x)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
@@ -81,19 +61,66 @@
         AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
         AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12 , AV_PIX_FMT_GBRAP16,
         AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10,
-        AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_NONE
     };
 
     return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
 }
 
+typedef struct ThreadData {
+    AVFrame *in;
+    AVFrame *threshold;
+    AVFrame *min;
+    AVFrame *max;
+    AVFrame *out;
+} ThreadData;
+
+static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ThresholdContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *min = td->min;
+    AVFrame *max = td->max;
+    AVFrame *threshold = td->threshold;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+
+    for (int p = 0; p < s->nb_planes; p++) {
+        const int h = s->height[p];
+        const int slice_start = (h * jobnr) / nb_jobs;
+        const int slice_end = (h * (jobnr+1)) / nb_jobs;
+
+        if (!(s->planes & (1 << p))) {
+            av_image_copy_plane(out->data[p] + slice_start * out->linesize[p],
+                                out->linesize[p],
+                                in->data[p] + slice_start * in->linesize[p],
+                                in->linesize[p],
+                                s->width[p] * s->bpc,
+                                slice_end - slice_start);
+            continue;
+        }
+        s->threshold(in->data[p] + slice_start * in->linesize[p],
+                     threshold->data[p] + slice_start * threshold->linesize[p],
+                     min->data[p] + slice_start * min->linesize[p],
+                     max->data[p] + slice_start * max->linesize[p],
+                     out->data[p] + slice_start * out->linesize[p],
+                     in->linesize[p], threshold->linesize[p],
+                     min->linesize[p], max->linesize[p],
+                     out->linesize[p],
+                     s->width[p], slice_end - slice_start);
+    }
+
+    return 0;
+}
+
 static int process_frame(FFFrameSync *fs)
 {
     AVFilterContext *ctx = fs->parent;
     ThresholdContext *s = fs->opaque;
     AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *out, *in, *threshold, *min, *max;
+    ThreadData td;
     int ret;
 
     if ((ret = ff_framesync_get_frame(&s->fs, 0, &in,        0)) < 0 ||
@@ -107,29 +134,18 @@
         if (!out)
             return AVERROR(ENOMEM);
     } else {
-        int p;
-
         out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
         if (!out)
             return AVERROR(ENOMEM);
         av_frame_copy_props(out, in);
 
-        for (p = 0; p < s->nb_planes; p++) {
-            if (!(s->planes & (1 << p))) {
-                av_image_copy_plane(out->data[p], out->linesize[p],
-                                    in->data[p], in->linesize[p],
-                                    s->width[p] * s->bpc,
-                                    s->height[p]);
-                continue;
-            }
-            s->threshold(in->data[p], threshold->data[p],
-                         min->data[p], max->data[p],
-                         out->data[p],
-                         in->linesize[p], threshold->linesize[p],
-                         min->linesize[p], max->linesize[p],
-                         out->linesize[p],
-                         s->width[p], s->height[p]);
-        }
+        td.out = out;
+        td.in = in;
+        td.threshold = threshold;
+        td.min = min;
+        td.max = max;
+        ctx->internal->execute(ctx, filter_slice, &td, NULL,
+                               FFMIN(s->height[2], ff_filter_get_nb_threads(ctx)));
     }
 
     out->pts = av_rescale_q(s->fs.pts, s->fs.time_base, outlink->time_base);
@@ -155,7 +171,7 @@
         in        += ilinesize;
         threshold += tlinesize;
         min       += flinesize;
-        max       += flinesize;
+        max       += slinesize;
         out       += olinesize;
     }
 }
@@ -183,7 +199,7 @@
         in        += ilinesize / 2;
         threshold += tlinesize / 2;
         min       += flinesize / 2;
-        max       += flinesize / 2;
+        max       += slinesize / 2;
         out       += olinesize / 2;
     }
 }
@@ -203,8 +219,16 @@
     s->height[0] = s->height[3] = inlink->h;
     s->width[1]  = s->width[2]  = AV_CEIL_RSHIFT(inlink->w, hsub);
     s->width[0]  = s->width[3]  = inlink->w;
+    s->depth = desc->comp[0].depth;
 
-    if (desc->comp[0].depth == 8) {
+    ff_threshold_init(s);
+
+    return 0;
+}
+
+void ff_threshold_init(ThresholdContext *s)
+{
+    if (s->depth == 8) {
         s->threshold = threshold8;
         s->bpc = 1;
     } else {
@@ -212,7 +236,8 @@
         s->bpc = 2;
     }
 
-    return 0;
+    if (ARCH_X86)
+        ff_threshold_init_x86(s);
 }
 
 static int config_output(AVFilterLink *outlink)
@@ -335,5 +360,5 @@
     .activate      = activate,
     .inputs        = inputs,
     .outputs       = outputs,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
 };

diff --git a/libavfilter/vf_tile.c b/libavfilter/vf_tile.c
index 87e0b94..439689a 100644
--- a/libavfilter/vf_tile.c
+++ b/libavfilter/vf_tile.c

@@ -23,6 +23,7 @@
  * tile video filter
  */
 
+#include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "avfilter.h"
@@ -36,16 +37,17 @@
     unsigned w, h;
     unsigned margin;
     unsigned padding;
+    unsigned overlap;
+    unsigned init_padding;
     unsigned current;
     unsigned nb_frames;
     FFDrawContext draw;
     FFDrawColor blank;
     AVFrame *out_ref;
+    AVFrame *prev_out_ref;
     uint8_t rgba_color[4];
 } TileContext;
 
-#define REASONABLE_SIZE 1024
-
 #define OFFSET(x) offsetof(TileContext, x)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 
@@ -59,6 +61,10 @@
     { "padding", "set inner border thickness in pixels", OFFSET(padding),
         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1024, FLAGS },
     { "color",   "set the color of the unused area", OFFSET(rgba_color), AV_OPT_TYPE_COLOR, {.str = "black"}, .flags = FLAGS },
+    { "overlap", "set how many frames to overlap for each render", OFFSET(overlap),
+        AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
+    { "init_padding", " set how many frames to initially pad", OFFSET(init_padding),
+        AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
     { NULL }
 };
 
@@ -68,12 +74,21 @@
 {
     TileContext *tile = ctx->priv;
 
-    if (tile->w > REASONABLE_SIZE || tile->h > REASONABLE_SIZE) {
+    if (tile->w > UINT_MAX / tile->h) {
         av_log(ctx, AV_LOG_ERROR, "Tile size %ux%u is insane.\n",
                tile->w, tile->h);
         return AVERROR(EINVAL);
     }
 
+    if (tile->padding) {
+        if ((tile->w - 1 > (UINT32_MAX - 2 * tile->margin) / tile->padding) ||
+            (tile->h - 1 > (UINT32_MAX - 2 * tile->margin) / tile->padding)) {
+            av_log(ctx, AV_LOG_ERROR, "Combination of Tile size %ux%u, padding %d and margin %d overflows.\n",
+                   tile->w, tile->h, tile->padding, tile->margin);
+            return AVERROR(EINVAL);
+        }
+    }
+
     if (tile->nb_frames == 0) {
         tile->nb_frames = tile->w * tile->h;
     } else if (tile->nb_frames > tile->w * tile->h) {
@@ -82,6 +97,17 @@
         return AVERROR(EINVAL);
     }
 
+    if (tile->overlap >= tile->nb_frames) {
+        av_log(ctx, AV_LOG_WARNING, "overlap must be less than %d\n", tile->nb_frames);
+        tile->overlap = tile->nb_frames - 1;
+    }
+
+    if (tile->init_padding >= tile->nb_frames) {
+        av_log(ctx, AV_LOG_WARNING, "init_padding must be less than %d\n", tile->nb_frames);
+    } else {
+        tile->current = tile->init_padding;
+    }
+
     return 0;
 }
 
@@ -112,19 +138,19 @@
     outlink->h = tile->h * inlink->h + total_margin_h;
     outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
     outlink->frame_rate = av_mul_q(inlink->frame_rate,
-                                   av_make_q(1, tile->nb_frames));
+                                   av_make_q(1, tile->nb_frames - tile->overlap));
     ff_draw_init(&tile->draw, inlink->format, 0);
     ff_draw_color(&tile->draw, &tile->blank, tile->rgba_color);
 
     return 0;
 }
 
-static void get_current_tile_pos(AVFilterContext *ctx, unsigned *x, unsigned *y)
+static void get_tile_pos(AVFilterContext *ctx, unsigned *x, unsigned *y, unsigned current)
 {
     TileContext *tile    = ctx->priv;
     AVFilterLink *inlink = ctx->inputs[0];
-    const unsigned tx = tile->current % tile->w;
-    const unsigned ty = tile->current / tile->w;
+    const unsigned tx = current % tile->w;
+    const unsigned ty = current / tile->w;
 
     *x = tile->margin + (inlink->w + tile->padding) * tx;
     *y = tile->margin + (inlink->h + tile->padding) * ty;
@@ -136,12 +162,13 @@
     AVFilterLink *inlink = ctx->inputs[0];
     unsigned x0, y0;
 
-    get_current_tile_pos(ctx, &x0, &y0);
+    get_tile_pos(ctx, &x0, &y0, tile->current);
     ff_fill_rectangle(&tile->draw, &tile->blank,
                       out_buf->data, out_buf->linesize,
                       x0, y0, inlink->w, inlink->h);
     tile->current++;
 }
+
 static int end_last_frame(AVFilterContext *ctx)
 {
     TileContext *tile     = ctx->priv;
@@ -151,8 +178,13 @@
 
     while (tile->current < tile->nb_frames)
         draw_blank_frame(ctx, out_buf);
+    tile->current = tile->overlap;
+    if (tile->current) {
+        av_frame_free(&tile->prev_out_ref);
+        tile->prev_out_ref = av_frame_clone(out_buf);
+    }
     ret = ff_filter_frame(outlink, out_buf);
-    tile->current = 0;
+    tile->out_ref = NULL;
     return ret;
 }
 
@@ -167,7 +199,7 @@
     AVFilterLink *outlink = ctx->outputs[0];
     unsigned x0, y0;
 
-    if (!tile->current) {
+    if (!tile->out_ref) {
         tile->out_ref = ff_get_video_buffer(outlink, outlink->w, outlink->h);
         if (!tile->out_ref) {
             av_frame_free(&picref);
@@ -178,14 +210,29 @@
         tile->out_ref->height = outlink->h;
 
         /* fill surface once for margin/padding */
-        if (tile->margin || tile->padding)
+        if (tile->margin || tile->padding || tile->init_padding)
             ff_fill_rectangle(&tile->draw, &tile->blank,
                               tile->out_ref->data,
                               tile->out_ref->linesize,
                               0, 0, outlink->w, outlink->h);
+        tile->init_padding = 0;
     }
 
-    get_current_tile_pos(ctx, &x0, &y0);
+    if (tile->prev_out_ref) {
+        unsigned x1, y1, i;
+
+        for (i = tile->nb_frames - tile->overlap; i < tile->nb_frames; i++) {
+            get_tile_pos(ctx, &x1, &y1, i);
+            get_tile_pos(ctx, &x0, &y0, i - (tile->nb_frames - tile->overlap));
+            ff_copy_rectangle2(&tile->draw,
+                               tile->out_ref->data, tile->out_ref->linesize,
+                               tile->prev_out_ref->data, tile->prev_out_ref->linesize,
+                               x0, y0, x1, y1, inlink->w, inlink->h);
+
+        }
+    }
+
+    get_tile_pos(ctx, &x0, &y0, tile->current);
     ff_copy_rectangle2(&tile->draw,
                        tile->out_ref->data, tile->out_ref->linesize,
                        picref->data, picref->linesize,
@@ -206,11 +253,18 @@
     int r;
 
     r = ff_request_frame(inlink);
-    if (r == AVERROR_EOF && tile->current)
+    if (r == AVERROR_EOF && tile->current && tile->out_ref)
         r = end_last_frame(ctx);
     return r;
 }
 
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    TileContext *tile = ctx->priv;
+
+    av_frame_free(&tile->prev_out_ref);
+}
+
 static const AVFilterPad tile_inputs[] = {
     {
         .name         = "default",
@@ -234,6 +288,7 @@
     .name          = "tile",
     .description   = NULL_IF_CONFIG_SMALL("Tile several successive frames together."),
     .init          = init,
+    .uninit        = uninit,
     .query_formats = query_formats,
     .priv_size     = sizeof(TileContext),
     .inputs        = tile_inputs,

diff --git a/libavfilter/vf_tonemap.c b/libavfilter/vf_tonemap.c
index 10308bd..98a2c4b 100644
--- a/libavfilter/vf_tonemap.c
+++ b/libavfilter/vf_tonemap.c

@@ -30,17 +30,15 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
-#include "libavutil/mastering_display_metadata.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 
 #include "avfilter.h"
+#include "colorspace.h"
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
 
-#define REFERENCE_WHITE 100.0f
-
 enum TonemapAlgorithm {
     TONEMAP_NONE,
     TONEMAP_LINEAR,
@@ -52,11 +50,6 @@
     TONEMAP_MAX,
 };
 
-typedef struct LumaCoefficients {
-    double cr, cg, cb;
-} LumaCoefficients;
-
-
 static const struct LumaCoefficients luma_coefficients[AVCOL_SPC_NB] = {
     [AVCOL_SPC_FCC]        = { 0.30,   0.59,   0.11   },
     [AVCOL_SPC_BT470BG]    = { 0.299,  0.587,  0.114  },
@@ -75,7 +68,7 @@
     double desat;
     double peak;
 
-    const LumaCoefficients *coeffs;
+    const struct LumaCoefficients *coeffs;
 } TonemapContext;
 
 static const enum AVPixelFormat pix_fmts[] = {
@@ -114,31 +107,6 @@
     return 0;
 }
 
-static double determine_signal_peak(AVFrame *in)
-{
-    AVFrameSideData *sd = av_frame_get_side_data(in, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
-    double peak = 0;
-
-    if (sd) {
-        AVContentLightMetadata *clm = (AVContentLightMetadata *)sd->data;
-        peak = clm->MaxCLL / REFERENCE_WHITE;
-    }
-
-    sd = av_frame_get_side_data(in, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
-    if (!peak && sd) {
-        AVMasteringDisplayMetadata *metadata = (AVMasteringDisplayMetadata *)sd->data;
-        if (metadata->has_luminance)
-            peak = av_q2d(metadata->max_luminance) / REFERENCE_WHITE;
-    }
-
-    /* smpte2084 needs the side data above to work correctly
-     * if missing, assume that the original transfer was arib-std-b67 */
-    if (!peak)
-        peak = 12;
-
-    return peak;
-}
-
 static float hable(float in)
 {
     float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f;
@@ -260,7 +228,7 @@
 
     /* read peak from side data if not passed in */
     if (!peak) {
-        peak = determine_signal_peak(in);
+        peak = ff_determine_signal_peak(in);
         av_log(s, AV_LOG_DEBUG, "Computed signal peak: %f\n", peak);
     }
 
@@ -297,6 +265,8 @@
 
     av_frame_free(&in);
 
+    ff_update_hdr_metadata(out, peak);
+
     return ff_filter_frame(outlink, out);
 }
 

diff --git a/libavfilter/vf_tonemap_opencl.c b/libavfilter/vf_tonemap_opencl.c
new file mode 100644
index 0000000..cd293c2
--- /dev/null
+++ b/libavfilter/vf_tonemap_opencl.c

@@ -0,0 +1,560 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <float.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/bprint.h"
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "internal.h"
+#include "opencl.h"
+#include "opencl_source.h"
+#include "video.h"
+#include "colorspace.h"
+
+// TODO:
+// - separate peak-detection from tone-mapping kernel to solve
+//    one-frame-delay issue.
+// - import colorspace matrix generation from vf_colorspace.c
+// - more format support
+
+#define DETECTION_FRAMES 63
+
+enum TonemapAlgorithm {
+    TONEMAP_NONE,
+    TONEMAP_LINEAR,
+    TONEMAP_GAMMA,
+    TONEMAP_CLIP,
+    TONEMAP_REINHARD,
+    TONEMAP_HABLE,
+    TONEMAP_MOBIUS,
+    TONEMAP_MAX,
+};
+
+typedef struct TonemapOpenCLContext {
+    OpenCLFilterContext ocf;
+
+    enum AVColorSpace colorspace, colorspace_in, colorspace_out;
+    enum AVColorTransferCharacteristic trc, trc_in, trc_out;
+    enum AVColorPrimaries primaries, primaries_in, primaries_out;
+    enum AVColorRange range, range_in, range_out;
+    enum AVChromaLocation chroma_loc;
+
+    enum TonemapAlgorithm tonemap;
+    enum AVPixelFormat    format;
+    double                peak;
+    double                param;
+    double                desat_param;
+    double                target_peak;
+    double                scene_threshold;
+    int                   initialised;
+    cl_kernel             kernel;
+    cl_command_queue      command_queue;
+    cl_mem                util_mem;
+} TonemapOpenCLContext;
+
+static const char *yuv_coff[AVCOL_SPC_NB] = {
+    [AVCOL_SPC_BT709] = "rgb2yuv_bt709",
+    [AVCOL_SPC_BT2020_NCL] = "rgb2yuv_bt2020",
+};
+
+static const char *rgb_coff[AVCOL_SPC_NB] = {
+    [AVCOL_SPC_BT709] = "yuv2rgb_bt709",
+    [AVCOL_SPC_BT2020_NCL] = "yuv2rgb_bt2020",
+};
+
+static const char *linearize_funcs[AVCOL_TRC_NB] = {
+    [AVCOL_TRC_SMPTE2084] = "eotf_st2084",
+    [AVCOL_TRC_ARIB_STD_B67] = "inverse_oetf_hlg",
+};
+
+static const char *delinearize_funcs[AVCOL_TRC_NB] = {
+    [AVCOL_TRC_BT709]     = "inverse_eotf_bt1886",
+    [AVCOL_TRC_BT2020_10] = "inverse_eotf_bt1886",
+};
+
+static const struct LumaCoefficients luma_coefficients[AVCOL_SPC_NB] = {
+    [AVCOL_SPC_BT709]      = { 0.2126, 0.7152, 0.0722 },
+    [AVCOL_SPC_BT2020_NCL] = { 0.2627, 0.6780, 0.0593 },
+};
+
+static struct PrimaryCoefficients primaries_table[AVCOL_PRI_NB] = {
+    [AVCOL_PRI_BT709]  = { 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 },
+    [AVCOL_PRI_BT2020] = { 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 },
+};
+
+static struct WhitepointCoefficients whitepoint_table[AVCOL_PRI_NB] = {
+    [AVCOL_PRI_BT709]  = { 0.3127, 0.3290 },
+    [AVCOL_PRI_BT2020] = { 0.3127, 0.3290 },
+};
+
+static const char *tonemap_func[TONEMAP_MAX] = {
+    [TONEMAP_NONE]     = "direct",
+    [TONEMAP_LINEAR]   = "linear",
+    [TONEMAP_GAMMA]    = "gamma",
+    [TONEMAP_CLIP]     = "clip",
+    [TONEMAP_REINHARD] = "reinhard",
+    [TONEMAP_HABLE]    = "hable",
+    [TONEMAP_MOBIUS]   = "mobius",
+};
+
+static void get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out,
+                               double rgb2rgb[3][3]) {
+    double rgb2xyz[3][3], xyz2rgb[3][3];
+
+    ff_fill_rgb2xyz_table(&primaries_table[out], &whitepoint_table[out], rgb2xyz);
+    ff_matrix_invert_3x3(rgb2xyz, xyz2rgb);
+    ff_fill_rgb2xyz_table(&primaries_table[in], &whitepoint_table[in], rgb2xyz);
+    ff_matrix_mul_3x3(rgb2rgb, rgb2xyz, xyz2rgb);
+}
+
+#define OPENCL_SOURCE_NB 3
+// Average light level for SDR signals. This is equal to a signal level of 0.5
+// under a typical presentation gamma of about 2.0.
+static const float sdr_avg = 0.25f;
+
+static int tonemap_opencl_init(AVFilterContext *avctx)
+{
+    TonemapOpenCLContext *ctx = avctx->priv;
+    int rgb2rgb_passthrough = 1;
+    double rgb2rgb[3][3];
+    struct LumaCoefficients luma_src, luma_dst;
+    cl_int cle;
+    int err;
+    AVBPrint header;
+    const char *opencl_sources[OPENCL_SOURCE_NB];
+
+    av_bprint_init(&header, 1024, AV_BPRINT_SIZE_AUTOMATIC);
+
+    switch(ctx->tonemap) {
+    case TONEMAP_GAMMA:
+        if (isnan(ctx->param))
+            ctx->param = 1.8f;
+        break;
+    case TONEMAP_REINHARD:
+        if (!isnan(ctx->param))
+            ctx->param = (1.0f - ctx->param) / ctx->param;
+        break;
+    case TONEMAP_MOBIUS:
+        if (isnan(ctx->param))
+            ctx->param = 0.3f;
+        break;
+    }
+
+    if (isnan(ctx->param))
+        ctx->param = 1.0f;
+
+    // SDR peak is 1.0f
+    ctx->target_peak = 1.0f;
+    av_log(ctx, AV_LOG_DEBUG, "tone mapping transfer from %s to %s\n",
+           av_color_transfer_name(ctx->trc_in),
+           av_color_transfer_name(ctx->trc_out));
+    av_log(ctx, AV_LOG_DEBUG, "mapping colorspace from %s to %s\n",
+           av_color_space_name(ctx->colorspace_in),
+           av_color_space_name(ctx->colorspace_out));
+    av_log(ctx, AV_LOG_DEBUG, "mapping primaries from %s to %s\n",
+           av_color_primaries_name(ctx->primaries_in),
+           av_color_primaries_name(ctx->primaries_out));
+    av_log(ctx, AV_LOG_DEBUG, "mapping range from %s to %s\n",
+           av_color_range_name(ctx->range_in),
+           av_color_range_name(ctx->range_out));
+    // checking valid value just because of limited implementaion
+    // please remove when more functionalities are implemented
+    av_assert0(ctx->trc_out == AVCOL_TRC_BT709 ||
+               ctx->trc_out == AVCOL_TRC_BT2020_10);
+    av_assert0(ctx->trc_in == AVCOL_TRC_SMPTE2084||
+               ctx->trc_in == AVCOL_TRC_ARIB_STD_B67);
+    av_assert0(ctx->colorspace_in == AVCOL_SPC_BT2020_NCL ||
+               ctx->colorspace_in == AVCOL_SPC_BT709);
+    av_assert0(ctx->primaries_in == AVCOL_PRI_BT2020 ||
+               ctx->primaries_in == AVCOL_PRI_BT709);
+
+    av_bprintf(&header, "__constant const float tone_param = %.4ff;\n",
+               ctx->param);
+    av_bprintf(&header, "__constant const float desat_param = %.4ff;\n",
+               ctx->desat_param);
+    av_bprintf(&header, "__constant const float target_peak = %.4ff;\n",
+               ctx->target_peak);
+    av_bprintf(&header, "__constant const float sdr_avg = %.4ff;\n", sdr_avg);
+    av_bprintf(&header, "__constant const float scene_threshold = %.4ff;\n",
+               ctx->scene_threshold);
+    av_bprintf(&header, "#define TONE_FUNC %s\n", tonemap_func[ctx->tonemap]);
+    av_bprintf(&header, "#define DETECTION_FRAMES %d\n", DETECTION_FRAMES);
+
+    if (ctx->primaries_out != ctx->primaries_in) {
+        get_rgb2rgb_matrix(ctx->primaries_in, ctx->primaries_out, rgb2rgb);
+        rgb2rgb_passthrough = 0;
+    }
+    if (ctx->range_in == AVCOL_RANGE_JPEG)
+        av_bprintf(&header, "#define FULL_RANGE_IN\n");
+
+    if (ctx->range_out == AVCOL_RANGE_JPEG)
+        av_bprintf(&header, "#define FULL_RANGE_OUT\n");
+
+    av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc);
+
+    if (rgb2rgb_passthrough)
+        av_bprintf(&header, "#define RGB2RGB_PASSTHROUGH\n");
+    else {
+        av_bprintf(&header, "__constant float rgb2rgb[9] = {\n");
+        av_bprintf(&header, "    %.4ff, %.4ff, %.4ff,\n",
+                   rgb2rgb[0][0], rgb2rgb[0][1], rgb2rgb[0][2]);
+        av_bprintf(&header, "    %.4ff, %.4ff, %.4ff,\n",
+                   rgb2rgb[1][0], rgb2rgb[1][1], rgb2rgb[1][2]);
+        av_bprintf(&header, "    %.4ff, %.4ff, %.4ff};\n",
+                   rgb2rgb[2][0], rgb2rgb[2][1], rgb2rgb[2][2]);
+    }
+
+    av_bprintf(&header, "#define rgb_matrix %s\n",
+               rgb_coff[ctx->colorspace_in]);
+    av_bprintf(&header, "#define yuv_matrix %s\n",
+               yuv_coff[ctx->colorspace_out]);
+
+    luma_src = luma_coefficients[ctx->colorspace_in];
+    luma_dst = luma_coefficients[ctx->colorspace_out];
+    av_bprintf(&header, "constant float3 luma_src = {%.4ff, %.4ff, %.4ff};\n",
+               luma_src.cr, luma_src.cg, luma_src.cb);
+    av_bprintf(&header, "constant float3 luma_dst = {%.4ff, %.4ff, %.4ff};\n",
+               luma_dst.cr, luma_dst.cg, luma_dst.cb);
+
+    av_bprintf(&header, "#define linearize %s\n", linearize_funcs[ctx->trc_in]);
+    av_bprintf(&header, "#define delinearize %s\n",
+               delinearize_funcs[ctx->trc_out]);
+
+    if (ctx->trc_in == AVCOL_TRC_ARIB_STD_B67)
+        av_bprintf(&header, "#define ootf_impl ootf_hlg\n");
+
+    if (ctx->trc_out == AVCOL_TRC_ARIB_STD_B67)
+        av_bprintf(&header, "#define inverse_ootf_impl inverse_ootf_hlg\n");
+
+    av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str);
+    opencl_sources[0] = header.str;
+    opencl_sources[1] = ff_opencl_source_tonemap;
+    opencl_sources[2] = ff_opencl_source_colorspace_common;
+    err = ff_opencl_filter_load_program(avctx, opencl_sources, OPENCL_SOURCE_NB);
+
+    av_bprint_finalize(&header, NULL);
+    if (err < 0)
+        goto fail;
+
+    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+                                              ctx->ocf.hwctx->device_id,
+                                              0, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
+                     "command queue %d.\n", cle);
+
+    ctx->kernel = clCreateKernel(ctx->ocf.program, "tonemap", &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
+
+    ctx->util_mem =
+        clCreateBuffer(ctx->ocf.hwctx->context, 0,
+                       (2 * DETECTION_FRAMES + 7) * sizeof(unsigned),
+                       NULL, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create util buffer: %d.\n", cle);
+
+    ctx->initialised = 1;
+    return 0;
+
+fail:
+    if (ctx->util_mem)
+        clReleaseMemObject(ctx->util_mem);
+    if (ctx->command_queue)
+        clReleaseCommandQueue(ctx->command_queue);
+    if (ctx->kernel)
+        clReleaseKernel(ctx->kernel);
+    return err;
+}
+
+static int tonemap_opencl_config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *avctx = outlink->src;
+    TonemapOpenCLContext *s = avctx->priv;
+    int ret;
+    if (s->format == AV_PIX_FMT_NONE)
+        av_log(avctx, AV_LOG_WARNING, "format not set, use default format NV12\n");
+    else {
+      if (s->format != AV_PIX_FMT_P010 &&
+          s->format != AV_PIX_FMT_NV12) {
+        av_log(avctx, AV_LOG_ERROR, "unsupported output format,"
+               "only p010/nv12 supported now\n");
+        return AVERROR(EINVAL);
+      }
+    }
+
+    s->ocf.output_format = s->format == AV_PIX_FMT_NONE ? AV_PIX_FMT_NV12 : s->format;
+    ret = ff_opencl_filter_config_output(outlink);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static int launch_kernel(AVFilterContext *avctx, cl_kernel kernel,
+                         AVFrame *output, AVFrame *input, float peak) {
+    TonemapOpenCLContext *ctx = avctx->priv;
+    int err = AVERROR(ENOSYS);
+    size_t global_work[2];
+    size_t local_work[2];
+    cl_int cle;
+
+    CL_SET_KERNEL_ARG(kernel, 0, cl_mem, &output->data[0]);
+    CL_SET_KERNEL_ARG(kernel, 1, cl_mem, &input->data[0]);
+    CL_SET_KERNEL_ARG(kernel, 2, cl_mem, &output->data[1]);
+    CL_SET_KERNEL_ARG(kernel, 3, cl_mem, &input->data[1]);
+    CL_SET_KERNEL_ARG(kernel, 4, cl_mem, &ctx->util_mem);
+    CL_SET_KERNEL_ARG(kernel, 5, cl_float, &peak);
+
+    local_work[0]  = 16;
+    local_work[1]  = 16;
+    // Note the work size based on uv plane, as we process a 2x2 quad in one workitem
+    err = ff_opencl_filter_work_size_from_image(avctx, global_work, output,
+                                                1, 16);
+    if (err < 0)
+        return err;
+
+    cle = clEnqueueNDRangeKernel(ctx->command_queue, kernel, 2, NULL,
+                                 global_work, local_work,
+                                 0, NULL, NULL);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle);
+    return 0;
+fail:
+    return err;
+}
+
+static int tonemap_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+    AVFilterContext    *avctx = inlink->dst;
+    AVFilterLink     *outlink = avctx->outputs[0];
+    TonemapOpenCLContext *ctx = avctx->priv;
+    AVFrame *output = NULL;
+    cl_int cle;
+    int err;
+    double peak = ctx->peak;
+
+    AVHWFramesContext *input_frames_ctx =
+        (AVHWFramesContext*)input->hw_frames_ctx->data;
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(input->format),
+           input->width, input->height, input->pts);
+
+    if (!input->hw_frames_ctx)
+        return AVERROR(EINVAL);
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    err = av_frame_copy_props(output, input);
+    if (err < 0)
+        goto fail;
+
+    if (!peak)
+        peak = ff_determine_signal_peak(input);
+
+    if (ctx->trc != -1)
+        output->color_trc = ctx->trc;
+    if (ctx->primaries != -1)
+        output->color_primaries = ctx->primaries;
+    if (ctx->colorspace != -1)
+        output->colorspace = ctx->colorspace;
+    if (ctx->range != -1)
+        output->color_range = ctx->range;
+
+    ctx->trc_in = input->color_trc;
+    ctx->trc_out = output->color_trc;
+    ctx->colorspace_in = input->colorspace;
+    ctx->colorspace_out = output->colorspace;
+    ctx->primaries_in = input->color_primaries;
+    ctx->primaries_out = output->color_primaries;
+    ctx->range_in = input->color_range;
+    ctx->range_out = output->color_range;
+    ctx->chroma_loc = output->chroma_location;
+
+    if (!ctx->initialised) {
+        if (!(input->color_trc == AVCOL_TRC_SMPTE2084 ||
+            input->color_trc == AVCOL_TRC_ARIB_STD_B67)) {
+            av_log(ctx, AV_LOG_ERROR, "unsupported transfer function characteristic.\n");
+            err = AVERROR(ENOSYS);
+            goto fail;
+        }
+
+        if (input_frames_ctx->sw_format != AV_PIX_FMT_P010) {
+            av_log(ctx, AV_LOG_ERROR, "unsupported format in tonemap_opencl.\n");
+            err = AVERROR(ENOSYS);
+            goto fail;
+        }
+
+        err = tonemap_opencl_init(avctx);
+        if (err < 0)
+            goto fail;
+    }
+
+    switch(input_frames_ctx->sw_format) {
+    case AV_PIX_FMT_P010:
+        err = launch_kernel(avctx, ctx->kernel, output, input, peak);
+        if (err < 0) goto fail;
+        break;
+    default:
+        err = AVERROR(ENOSYS);
+        goto fail;
+    }
+
+    cle = clFinish(ctx->command_queue);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
+
+    av_frame_free(&input);
+
+    ff_update_hdr_metadata(output, ctx->target_peak);
+
+    av_log(ctx, AV_LOG_DEBUG, "Tone-mapping output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output->format),
+           output->width, output->height, output->pts);
+#ifndef NDEBUG
+    {
+        uint32_t *ptr, *max_total_p, *avg_total_p, *frame_number_p;
+        float peak_detected, avg_detected;
+        unsigned map_size = (2 * DETECTION_FRAMES  + 7) * sizeof(unsigned);
+        ptr = (void *)clEnqueueMapBuffer(ctx->command_queue, ctx->util_mem,
+                                         CL_TRUE, CL_MAP_READ, 0, map_size,
+                                         0, NULL, NULL, &cle);
+        // For the layout of the util buffer, refer tonemap.cl
+        if (ptr) {
+            max_total_p = ptr + 2 * (DETECTION_FRAMES + 1) + 1;
+            avg_total_p = max_total_p + 1;
+            frame_number_p = avg_total_p + 2;
+            peak_detected = (float)*max_total_p / (REFERENCE_WHITE * (*frame_number_p));
+            avg_detected = (float)*avg_total_p / (REFERENCE_WHITE * (*frame_number_p));
+            av_log(ctx, AV_LOG_DEBUG, "peak %f, avg %f will be used for next frame\n",
+                   peak_detected, avg_detected);
+            clEnqueueUnmapMemObject(ctx->command_queue, ctx->util_mem, ptr, 0,
+                                    NULL, NULL);
+        }
+    }
+#endif
+
+    return ff_filter_frame(outlink, output);
+
+fail:
+    clFinish(ctx->command_queue);
+    av_frame_free(&input);
+    av_frame_free(&output);
+    return err;
+}
+
+static av_cold void tonemap_opencl_uninit(AVFilterContext *avctx)
+{
+    TonemapOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+
+    if (ctx->util_mem)
+        clReleaseMemObject(ctx->util_mem);
+    if (ctx->kernel) {
+        cle = clReleaseKernel(ctx->kernel);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->command_queue) {
+        cle = clReleaseCommandQueue(ctx->command_queue);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "command queue: %d.\n", cle);
+    }
+
+    ff_opencl_filter_uninit(avctx);
+}
+
+#define OFFSET(x) offsetof(TonemapOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption tonemap_opencl_options[] = {
+    { "tonemap",      "tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = TONEMAP_NONE}, TONEMAP_NONE, TONEMAP_MAX - 1, FLAGS, "tonemap" },
+    {     "none",     0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_NONE},              0, 0, FLAGS, "tonemap" },
+    {     "linear",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_LINEAR},            0, 0, FLAGS, "tonemap" },
+    {     "gamma",    0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_GAMMA},             0, 0, FLAGS, "tonemap" },
+    {     "clip",     0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_CLIP},              0, 0, FLAGS, "tonemap" },
+    {     "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD},          0, 0, FLAGS, "tonemap" },
+    {     "hable",    0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE},             0, 0, FLAGS, "tonemap" },
+    {     "mobius",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS},            0, 0, FLAGS, "tonemap" },
+    { "transfer", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" },
+    { "t",        "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" },
+    {     "bt709",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709},         0, 0, FLAGS, "transfer" },
+    {     "bt2020",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10},     0, 0, FLAGS, "transfer" },
+    { "matrix", "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" },
+    { "m",      "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" },
+    {     "bt709",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709},         0, 0, FLAGS, "matrix" },
+    {     "bt2020",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL},    0, 0, FLAGS, "matrix" },
+    { "primaries", "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" },
+    { "p",         "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" },
+    {     "bt709",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709},         0, 0, FLAGS, "primaries" },
+    {     "bt2020",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020},        0, 0, FLAGS, "primaries" },
+    { "range",         "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "range" },
+    { "r",             "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "range" },
+    {     "tv",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG},         0, 0, FLAGS, "range" },
+    {     "pc",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG},         0, 0, FLAGS, "range" },
+    {     "limited",       0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG},         0, 0, FLAGS, "range" },
+    {     "full",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG},         0, 0, FLAGS, "range" },
+    { "format",    "output pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, INT_MAX, FLAGS, "fmt" },
+    { "peak",      "signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS },
+    { "param",     "tonemap parameter",   OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS },
+    { "desat",     "desaturation parameter",   OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0, DBL_MAX, FLAGS },
+    { "threshold", "scene detection threshold",   OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.2}, 0, DBL_MAX, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(tonemap_opencl);
+
+static const AVFilterPad tonemap_opencl_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &tonemap_opencl_filter_frame,
+        .config_props = &ff_opencl_filter_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad tonemap_opencl_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &tonemap_opencl_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_tonemap_opencl = {
+    .name           = "tonemap_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("perform HDR to SDR conversion with tonemapping"),
+    .priv_size      = sizeof(TonemapOpenCLContext),
+    .priv_class     = &tonemap_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &tonemap_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = tonemap_opencl_inputs,
+    .outputs        = tonemap_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};

diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
index 3ff4cb4..74a4bbc 100644
--- a/libavfilter/vf_transpose.c
+++ b/libavfilter/vf_transpose.c

@@ -52,6 +52,14 @@
     TRANSPOSE_CLOCK_FLIP,
 };
 
+typedef struct TransVtable {
+    void (*transpose_8x8)(uint8_t *src, ptrdiff_t src_linesize,
+                          uint8_t *dst, ptrdiff_t dst_linesize);
+    void (*transpose_block)(uint8_t *src, ptrdiff_t src_linesize,
+                            uint8_t *dst, ptrdiff_t dst_linesize,
+                            int w, int h);
+} TransVtable;
+
 typedef struct TransContext {
     const AVClass *class;
     int hsub, vsub;
@@ -61,11 +69,7 @@
     int passthrough;    ///< PassthroughType, landscape passthrough mode enabled
     int dir;            ///< TransposeDir
 
-    void (*transpose_8x8)(uint8_t *src, ptrdiff_t src_linesize,
-                          uint8_t *dst, ptrdiff_t dst_linesize);
-    void (*transpose_block)(uint8_t *src, ptrdiff_t src_linesize,
-                            uint8_t *dst, ptrdiff_t dst_linesize,
-                            int w, int h);
+    TransVtable vtables[4];
 } TransContext;
 
 static int query_formats(AVFilterContext *ctx)
@@ -233,19 +237,22 @@
     else
         outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
 
-    switch (s->pixsteps[0]) {
-    case 1: s->transpose_block = transpose_block_8_c;
-            s->transpose_8x8   = transpose_8x8_8_c;  break;
-    case 2: s->transpose_block = transpose_block_16_c;
-            s->transpose_8x8   = transpose_8x8_16_c; break;
-    case 3: s->transpose_block = transpose_block_24_c;
-            s->transpose_8x8   = transpose_8x8_24_c; break;
-    case 4: s->transpose_block = transpose_block_32_c;
-            s->transpose_8x8   = transpose_8x8_32_c; break;
-    case 6: s->transpose_block = transpose_block_48_c;
-            s->transpose_8x8   = transpose_8x8_48_c; break;
-    case 8: s->transpose_block = transpose_block_64_c;
-            s->transpose_8x8   = transpose_8x8_64_c; break;
+    for (int i = 0; i < 4; i++) {
+        TransVtable *v = &s->vtables[i];
+        switch (s->pixsteps[i]) {
+        case 1: v->transpose_block = transpose_block_8_c;
+                v->transpose_8x8   = transpose_8x8_8_c;  break;
+        case 2: v->transpose_block = transpose_block_16_c;
+                v->transpose_8x8   = transpose_8x8_16_c; break;
+        case 3: v->transpose_block = transpose_block_24_c;
+                v->transpose_8x8   = transpose_8x8_24_c; break;
+        case 4: v->transpose_block = transpose_block_32_c;
+                v->transpose_8x8   = transpose_8x8_32_c; break;
+        case 6: v->transpose_block = transpose_block_48_c;
+                v->transpose_8x8   = transpose_8x8_48_c; break;
+        case 8: v->transpose_block = transpose_block_64_c;
+                v->transpose_8x8   = transpose_8x8_64_c; break;
+        }
     }
 
     av_log(ctx, AV_LOG_VERBOSE,
@@ -290,6 +297,7 @@
         uint8_t *dst, *src;
         int dstlinesize, srclinesize;
         int x, y;
+        TransVtable *v = &s->vtables[plane];
 
         dstlinesize = out->linesize[plane];
         dst         = out->data[plane] + start * dstlinesize;
@@ -308,20 +316,20 @@
 
         for (y = start; y < end - 7; y += 8) {
             for (x = 0; x < outw - 7; x += 8) {
-                s->transpose_8x8(src + x * srclinesize + y * pixstep,
+                v->transpose_8x8(src + x * srclinesize + y * pixstep,
                                  srclinesize,
                                  dst + (y - start) * dstlinesize + x * pixstep,
                                  dstlinesize);
             }
             if (outw - x > 0 && end - y > 0)
-                s->transpose_block(src + x * srclinesize + y * pixstep,
+                v->transpose_block(src + x * srclinesize + y * pixstep,
                                    srclinesize,
                                    dst + (y - start) * dstlinesize + x * pixstep,
                                    dstlinesize, outw - x, end - y);
         }
 
         if (end - y > 0)
-            s->transpose_block(src + 0 * srclinesize + y * pixstep,
+            v->transpose_block(src + 0 * srclinesize + y * pixstep,
                                srclinesize,
                                dst + (y - start) * dstlinesize + 0 * pixstep,
                                dstlinesize, outw, end - y);

diff --git a/libavfilter/vf_transpose_npp.c b/libavfilter/vf_transpose_npp.c
new file mode 100644
index 0000000..1b3a5c0
--- /dev/null
+++ b/libavfilter/vf_transpose_npp.c

@@ -0,0 +1,487 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <nppi.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const enum AVPixelFormat supported_formats[] = {
+    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_YUV444P
+};
+
+enum TransposeStage {
+    STAGE_ROTATE,
+    STAGE_TRANSPOSE,
+    STAGE_NB
+};
+
+enum Transpose {
+    NPP_TRANSPOSE_CCLOCK_FLIP = 0,
+    NPP_TRANSPOSE_CLOCK = 1,
+    NPP_TRANSPOSE_CCLOCK = 2,
+    NPP_TRANSPOSE_CLOCK_FLIP = 3
+};
+
+enum Passthrough {
+    NPP_TRANSPOSE_PT_TYPE_NONE = 0,
+    NPP_TRANSPOSE_PT_TYPE_LANDSCAPE,
+    NPP_TRANSPOSE_PT_TYPE_PORTRAIT
+};
+
+typedef struct NPPTransposeStageContext {
+    int stage_needed;
+    enum AVPixelFormat in_fmt;
+    enum AVPixelFormat out_fmt;
+    struct {
+        int width;
+        int height;
+    } planes_in[3], planes_out[3];
+    AVBufferRef *frames_ctx;
+    AVFrame     *frame;
+} NPPTransposeStageContext;
+
+typedef struct NPPTransposeContext {
+    const AVClass *class;
+    NPPTransposeStageContext stages[STAGE_NB];
+    AVFrame *tmp_frame;
+
+    int passthrough;    ///< PassthroughType, landscape passthrough mode enabled
+    int dir;            ///< TransposeDir
+} NPPTransposeContext;
+
+static int npptranspose_init(AVFilterContext *ctx)
+{
+    NPPTransposeContext *s = ctx->priv;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
+        s->stages[i].frame = av_frame_alloc();
+        if (!s->stages[i].frame)
+            return AVERROR(ENOMEM);
+    }
+
+    s->tmp_frame = av_frame_alloc();
+    if (!s->tmp_frame)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void npptranspose_uninit(AVFilterContext *ctx)
+{
+    NPPTransposeContext *s = ctx->priv;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
+        av_frame_free(&s->stages[i].frame);
+        av_buffer_unref(&s->stages[i].frames_ctx);
+    }
+
+    av_frame_free(&s->tmp_frame);
+}
+
+static int npptranspose_query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pixel_formats[] = {
+        AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE,
+    };
+
+    AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
+    return ff_set_common_formats(ctx, pix_fmts);
+}
+
+static int init_stage(NPPTransposeStageContext *stage, AVBufferRef *device_ctx)
+{
+    AVBufferRef *out_ref = NULL;
+    AVHWFramesContext *out_ctx;
+    int in_sw, in_sh, out_sw, out_sh;
+    int ret, i;
+
+    av_pix_fmt_get_chroma_sub_sample(stage->in_fmt,  &in_sw,  &in_sh);
+    av_pix_fmt_get_chroma_sub_sample(stage->out_fmt, &out_sw, &out_sh);
+
+    if (!stage->planes_out[0].width) {
+        stage->planes_out[0].width  = stage->planes_in[0].width;
+        stage->planes_out[0].height = stage->planes_in[0].height;
+    }
+
+    for (i = 1; i < FF_ARRAY_ELEMS(stage->planes_in); i++) {
+        stage->planes_in[i].width   = stage->planes_in[0].width   >> in_sw;
+        stage->planes_in[i].height  = stage->planes_in[0].height  >> in_sh;
+        stage->planes_out[i].width  = stage->planes_out[0].width  >> out_sw;
+        stage->planes_out[i].height = stage->planes_out[0].height >> out_sh;
+    }
+
+    out_ref = av_hwframe_ctx_alloc(device_ctx);
+    if (!out_ref)
+        return AVERROR(ENOMEM);
+    out_ctx = (AVHWFramesContext*)out_ref->data;
+
+    out_ctx->format    = AV_PIX_FMT_CUDA;
+    out_ctx->sw_format = stage->out_fmt;
+    out_ctx->width     = FFALIGN(stage->planes_out[0].width,  32);
+    out_ctx->height    = FFALIGN(stage->planes_out[0].height, 32);
+
+    ret = av_hwframe_ctx_init(out_ref);
+    if (ret < 0)
+        goto fail;
+
+    av_frame_unref(stage->frame);
+    ret = av_hwframe_get_buffer(out_ref, stage->frame, 0);
+    if (ret < 0)
+        goto fail;
+
+    stage->frame->width  = stage->planes_out[0].width;
+    stage->frame->height = stage->planes_out[0].height;
+    av_buffer_unref(&stage->frames_ctx);
+    stage->frames_ctx = out_ref;
+
+    return 0;
+
+fail:
+    av_buffer_unref(&out_ref);
+    return ret;
+}
+
+static int format_is_supported(enum AVPixelFormat fmt)
+{
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
+        if (supported_formats[i] == fmt)
+            return 1;
+
+    return 0;
+}
+
+static int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height,
+                                 int out_width, int out_height)
+{
+    NPPTransposeContext *s = ctx->priv;
+    AVHWFramesContext *in_frames_ctx;
+    enum AVPixelFormat format;
+    int i, ret, last_stage = -1;
+    int rot_width = out_width, rot_height = out_height;
+
+    /* check that we have a hw context */
+    if (!ctx->inputs[0]->hw_frames_ctx) {
+        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+
+    in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data;
+    format        = in_frames_ctx->sw_format;
+
+    if (!format_is_supported(format)) {
+        av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
+               av_get_pix_fmt_name(format));
+        return AVERROR(ENOSYS);
+    }
+
+    if (s->dir != NPP_TRANSPOSE_CCLOCK_FLIP) {
+        s->stages[STAGE_ROTATE].stage_needed = 1;
+    }
+
+    if (s->dir == NPP_TRANSPOSE_CCLOCK_FLIP || s->dir == NPP_TRANSPOSE_CLOCK_FLIP) {
+        s->stages[STAGE_TRANSPOSE].stage_needed = 1;
+
+        /* Rotating by 180° in case of clock_flip, or not at all for cclock_flip, so width/height unchanged by rotation */
+        rot_width = in_width;
+        rot_height = in_height;
+    }
+
+    s->stages[STAGE_ROTATE].in_fmt               = format;
+    s->stages[STAGE_ROTATE].out_fmt              = format;
+    s->stages[STAGE_ROTATE].planes_in[0].width   = in_width;
+    s->stages[STAGE_ROTATE].planes_in[0].height  = in_height;
+    s->stages[STAGE_ROTATE].planes_out[0].width  = rot_width;
+    s->stages[STAGE_ROTATE].planes_out[0].height = rot_height;
+    s->stages[STAGE_TRANSPOSE].in_fmt               = format;
+    s->stages[STAGE_TRANSPOSE].out_fmt              = format;
+    s->stages[STAGE_TRANSPOSE].planes_in[0].width   = rot_width;
+    s->stages[STAGE_TRANSPOSE].planes_in[0].height  = rot_height;
+    s->stages[STAGE_TRANSPOSE].planes_out[0].width  = out_width;
+    s->stages[STAGE_TRANSPOSE].planes_out[0].height = out_height;
+
+    /* init the hardware contexts */
+    for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
+        if (!s->stages[i].stage_needed)
+            continue;
+        ret = init_stage(&s->stages[i], in_frames_ctx->device_ref);
+        if (ret < 0)
+            return ret;
+        last_stage = i;
+    }
+
+    if (last_stage >= 0) {
+        ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->stages[last_stage].frames_ctx);
+    } else {
+        ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(ctx->inputs[0]->hw_frames_ctx);
+        s->passthrough = 1;
+    }
+
+    if (!ctx->outputs[0]->hw_frames_ctx)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static int npptranspose_config_props(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx   = outlink->src;
+    AVFilterLink *inlink   = ctx->inputs[0];
+    NPPTransposeContext *s = ctx->priv;
+    int ret;
+
+    if ((inlink->w >= inlink->h && s->passthrough == NPP_TRANSPOSE_PT_TYPE_LANDSCAPE) ||
+        (inlink->w <= inlink->h && s->passthrough == NPP_TRANSPOSE_PT_TYPE_PORTRAIT))
+    {
+        if (inlink->hw_frames_ctx) {
+            outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
+            if (!outlink->hw_frames_ctx)
+                return AVERROR(ENOMEM);
+        }
+
+        av_log(ctx, AV_LOG_VERBOSE,
+               "w:%d h:%d -> w:%d h:%d (passthrough mode)\n",
+               inlink->w, inlink->h, inlink->w, inlink->h);
+        return 0;
+    } else {
+        s->passthrough = NPP_TRANSPOSE_PT_TYPE_NONE;
+    }
+
+    outlink->w = inlink->h;
+    outlink->h = inlink->w;
+    outlink->sample_aspect_ratio = (AVRational){inlink->sample_aspect_ratio.den, inlink->sample_aspect_ratio.num};
+
+    ret = init_processing_chain(ctx, inlink->w, inlink->h, outlink->w, outlink->h);
+    if (ret < 0)
+        return ret;
+
+    av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -transpose-> w:%d h:%d\n",
+           inlink->w, inlink->h, outlink->w, outlink->h);
+
+    return 0;
+}
+
+static int npptranspose_rotate(AVFilterContext *ctx, NPPTransposeStageContext *stage,
+                               AVFrame *out, AVFrame *in)
+{
+    NPPTransposeContext *s = ctx->priv;
+    NppStatus err;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(stage->planes_in) && i < FF_ARRAY_ELEMS(in->data) && in->data[i]; i++) {
+        int iw = stage->planes_in[i].width;
+        int ih = stage->planes_in[i].height;
+        int ow = stage->planes_out[i].width;
+        int oh = stage->planes_out[i].height;
+
+        // nppRotate uses 0,0 as the rotation point
+        // need to shift the image accordingly after rotation
+        // need to substract 1 to get the correct coordinates
+        double angle = s->dir == NPP_TRANSPOSE_CLOCK ? -90.0 : s->dir == NPP_TRANSPOSE_CCLOCK ? 90.0 : 180.0;
+        int shiftw = (s->dir == NPP_TRANSPOSE_CLOCK  || s->dir == NPP_TRANSPOSE_CLOCK_FLIP) ? ow - 1 : 0;
+        int shifth = (s->dir == NPP_TRANSPOSE_CCLOCK || s->dir == NPP_TRANSPOSE_CLOCK_FLIP) ? oh - 1 : 0;
+
+        err = nppiRotate_8u_C1R(in->data[i], (NppiSize){ iw, ih },
+                                in->linesize[i], (NppiRect){ 0, 0, iw, ih },
+                                out->data[i], out->linesize[i],
+                                (NppiRect){ 0, 0, ow, oh },
+                                angle, shiftw, shifth, NPPI_INTER_NN);
+        if (err != NPP_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "NPP rotate error: %d\n", err);
+            return AVERROR_UNKNOWN;
+        }
+    }
+
+    return 0;
+}
+
+static int npptranspose_transpose(AVFilterContext *ctx, NPPTransposeStageContext *stage,
+                                  AVFrame *out, AVFrame *in)
+{
+    NppStatus err;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(stage->planes_in) && i < FF_ARRAY_ELEMS(in->data) && in->data[i]; i++) {
+        int iw = stage->planes_in[i].width;
+        int ih = stage->planes_in[i].height;
+
+        err = nppiTranspose_8u_C1R(in->data[i], in->linesize[i],
+                                   out->data[i], out->linesize[i],
+                                   (NppiSize){ iw, ih });
+        if (err != NPP_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "NPP transpose error: %d\n", err);
+            return AVERROR_UNKNOWN;
+        }
+    }
+
+    return 0;
+}
+
+static int (*const npptranspose_process[])(AVFilterContext *ctx, NPPTransposeStageContext *stage,
+                                           AVFrame *out, AVFrame *in) = {
+    [STAGE_ROTATE]       = npptranspose_rotate,
+    [STAGE_TRANSPOSE]    = npptranspose_transpose
+};
+
+static int npptranspose_filter(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
+{
+    NPPTransposeContext *s = ctx->priv;
+    AVFrame *src = in;
+    int i, ret, last_stage = -1;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
+        if (!s->stages[i].stage_needed)
+            continue;
+
+        ret = npptranspose_process[i](ctx, &s->stages[i], s->stages[i].frame, src);
+        if (ret < 0)
+            return ret;
+
+        src        = s->stages[i].frame;
+        last_stage = i;
+    }
+
+    if (last_stage < 0)
+        return AVERROR_BUG;
+
+    ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0);
+    if (ret < 0)
+        return ret;
+
+    av_frame_move_ref(out, src);
+    av_frame_move_ref(src, s->tmp_frame);
+
+    ret = av_frame_copy_props(out, in);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in)
+{
+    AVFilterContext              *ctx = link->dst;
+    NPPTransposeContext            *s = ctx->priv;
+    AVFilterLink             *outlink = ctx->outputs[0];
+    AVHWFramesContext     *frames_ctx = (AVHWFramesContext*)outlink->hw_frames_ctx->data;
+    AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
+    AVFrame *out = NULL;
+    CUresult err;
+    CUcontext dummy;
+    int ret = 0;
+
+    if (s->passthrough)
+        return ff_filter_frame(outlink, in);
+
+    out = av_frame_alloc();
+    if (!out) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    err = device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx);
+    if (err != CUDA_SUCCESS) {
+        ret = AVERROR_UNKNOWN;
+        goto fail;
+    }
+
+    ret = npptranspose_filter(ctx, out, in);
+
+    device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy);
+    if (ret < 0)
+        goto fail;
+
+    av_frame_free(&in);
+
+    return ff_filter_frame(outlink, out);
+
+fail:
+    av_frame_free(&in);
+    av_frame_free(&out);
+    return ret;
+}
+
+#define OFFSET(x) offsetof(NPPTransposeContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
+
+static const AVOption options[] = {
+    { "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = NPP_TRANSPOSE_CCLOCK_FLIP }, 0, 3, FLAGS, "dir" },
+        { "cclock_flip", "rotate counter-clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = NPP_TRANSPOSE_CCLOCK_FLIP }, 0, 0, FLAGS, "dir" },
+        { "clock",       "rotate clockwise",                            0, AV_OPT_TYPE_CONST, { .i64 = NPP_TRANSPOSE_CLOCK       }, 0, 0, FLAGS, "dir" },
+        { "cclock",      "rotate counter-clockwise",                    0, AV_OPT_TYPE_CONST, { .i64 = NPP_TRANSPOSE_CCLOCK      }, 0, 0, FLAGS, "dir" },
+        { "clock_flip",  "rotate clockwise with vertical flip",         0, AV_OPT_TYPE_CONST, { .i64 = NPP_TRANSPOSE_CLOCK_FLIP  }, 0, 0, FLAGS, "dir" },
+    { "passthrough", "do not apply transposition if the input matches the specified geometry", OFFSET(passthrough), AV_OPT_TYPE_INT, { .i64 = NPP_TRANSPOSE_PT_TYPE_NONE },  0, 2, FLAGS, "passthrough" },
+        { "none",      "always apply transposition",  0, AV_OPT_TYPE_CONST, { .i64 = NPP_TRANSPOSE_PT_TYPE_NONE },      0, 0, FLAGS, "passthrough" },
+        { "landscape", "preserve landscape geometry", 0, AV_OPT_TYPE_CONST, { .i64 = NPP_TRANSPOSE_PT_TYPE_LANDSCAPE }, 0, 0, FLAGS, "passthrough" },
+        { "portrait",  "preserve portrait geometry",  0, AV_OPT_TYPE_CONST, { .i64 = NPP_TRANSPOSE_PT_TYPE_PORTRAIT },  0, 0, FLAGS, "passthrough" },
+    { NULL },
+};
+
+static const AVClass npptranspose_class = {
+    .class_name = "npptranspose",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVFilterPad npptranspose_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = npptranspose_filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad npptranspose_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = npptranspose_config_props,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_transpose_npp = {
+    .name           = "transpose_npp",
+    .description    = NULL_IF_CONFIG_SMALL("NVIDIA Performance Primitives video transpose"),
+    .init           = npptranspose_init,
+    .uninit         = npptranspose_uninit,
+    .query_formats  = npptranspose_query_formats,
+    .priv_size      = sizeof(NPPTransposeContext),
+    .priv_class     = &npptranspose_class,
+    .inputs         = npptranspose_inputs,
+    .outputs        = npptranspose_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};

diff --git a/libavfilter/vf_unsharp.c b/libavfilter/vf_unsharp.c
index 438ff6d..41ccc56 100644
--- a/libavfilter/vf_unsharp.c
+++ b/libavfilter/vf_unsharp.c

@@ -46,7 +46,6 @@
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "unsharp.h"
-#include "unsharp_opencl.h"
 
 static void apply_unsharp(      uint8_t *dst, int dst_stride,
                           const uint8_t *src, int src_stride,
@@ -134,10 +133,8 @@
 
 static av_cold int init(AVFilterContext *ctx)
 {
-    int ret = 0;
     UnsharpContext *s = ctx->priv;
 
-
     set_filter_param(&s->luma,   s->lmsize_x, s->lmsize_y, s->lamount);
     set_filter_param(&s->chroma, s->cmsize_x, s->cmsize_y, s->camount);
 
@@ -146,16 +143,6 @@
         return AVERROR(EINVAL);
     }
     s->apply_unsharp = apply_unsharp_c;
-    if (!CONFIG_OPENCL && s->opencl) {
-        av_log(ctx, AV_LOG_ERROR, "OpenCL support was not enabled in this build, cannot be selected\n");
-        return AVERROR(EINVAL);
-    }
-    if (CONFIG_OPENCL && s->opencl) {
-        s->apply_unsharp = ff_opencl_apply_unsharp;
-        ret = ff_opencl_unsharp_init(ctx);
-        if (ret < 0)
-            return ret;
-    }
     return 0;
 }
 
@@ -227,10 +214,6 @@
 {
     UnsharpContext *s = ctx->priv;
 
-    if (CONFIG_OPENCL && s->opencl) {
-        ff_opencl_unsharp_uninit(ctx);
-    }
-
     free_filter_param(&s->luma);
     free_filter_param(&s->chroma);
 }
@@ -248,14 +231,9 @@
         return AVERROR(ENOMEM);
     }
     av_frame_copy_props(out, in);
-    if (CONFIG_OPENCL && s->opencl) {
-        ret = ff_opencl_unsharp_process_inout_buf(link->dst, in, out);
-        if (ret < 0)
-            goto end;
-    }
 
     ret = s->apply_unsharp(link->dst, in, out);
-end:
+
     av_frame_free(&in);
 
     if (ret < 0) {
@@ -282,7 +260,7 @@
     { "cy",             "set chroma matrix vertical size",   OFFSET(cmsize_y), AV_OPT_TYPE_INT,   { .i64 = 5 }, MIN_SIZE, MAX_SIZE, FLAGS },
     { "chroma_amount",  "set chroma effect strength",        OFFSET(camount),  AV_OPT_TYPE_FLOAT, { .dbl = 0 },       -2,        5, FLAGS },
     { "ca",             "set chroma effect strength",        OFFSET(camount),  AV_OPT_TYPE_FLOAT, { .dbl = 0 },       -2,        5, FLAGS },
-    { "opencl",         "use OpenCL filtering capabilities", OFFSET(opencl),   AV_OPT_TYPE_BOOL,  { .i64 = 0 },        0,        1, FLAGS },
+    { "opencl",         "ignored",                           OFFSET(opencl),   AV_OPT_TYPE_BOOL,  { .i64 = 0 },        0,        1, FLAGS },
     { NULL }
 };
 

diff --git a/libavfilter/vf_unsharp_opencl.c b/libavfilter/vf_unsharp_opencl.c
new file mode 100644
index 0000000..d76d1b1
--- /dev/null
+++ b/libavfilter/vf_unsharp_opencl.c

@@ -0,0 +1,412 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "internal.h"
+#include "opencl.h"
+#include "opencl_source.h"
+#include "video.h"
+
+#define MAX_DIAMETER 23
+
+typedef struct UnsharpOpenCLContext {
+    OpenCLFilterContext ocf;
+
+    int              initialised;
+    cl_kernel        kernel;
+    cl_command_queue command_queue;
+
+    float luma_size_x;
+    float luma_size_y;
+    float luma_amount;
+    float chroma_size_x;
+    float chroma_size_y;
+    float chroma_amount;
+
+    int global;
+
+    int nb_planes;
+    struct {
+        float blur_x[MAX_DIAMETER];
+        float blur_y[MAX_DIAMETER];
+
+        cl_mem   matrix;
+        cl_mem   coef_x;
+        cl_mem   coef_y;
+
+        cl_int   size_x;
+        cl_int   size_y;
+        cl_float amount;
+        cl_float threshold;
+    } plane[4];
+} UnsharpOpenCLContext;
+
+
+static int unsharp_opencl_init(AVFilterContext *avctx)
+{
+    UnsharpOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+    int err;
+
+    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_unsharp, 1);
+    if (err < 0)
+        goto fail;
+
+    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+                                              ctx->ocf.hwctx->device_id,
+                                              0, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
+                     "command queue %d.\n", cle);
+
+    // Use global kernel if mask size will be too big for the local store..
+    ctx->global = (ctx->luma_size_x   > 17.0f ||
+                   ctx->luma_size_y   > 17.0f ||
+                   ctx->chroma_size_x > 17.0f ||
+                   ctx->chroma_size_y > 17.0f);
+
+    ctx->kernel = clCreateKernel(ctx->ocf.program,
+                                 ctx->global ? "unsharp_global"
+                                             : "unsharp_local", &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
+
+    ctx->initialised = 1;
+    return 0;
+
+fail:
+    if (ctx->command_queue)
+        clReleaseCommandQueue(ctx->command_queue);
+    if (ctx->kernel)
+        clReleaseKernel(ctx->kernel);
+    return err;
+}
+
+static int unsharp_opencl_make_filter_params(AVFilterContext *avctx)
+{
+    UnsharpOpenCLContext *ctx = avctx->priv;
+    const AVPixFmtDescriptor *desc;
+    float *matrix;
+    double val, sum;
+    cl_int cle;
+    cl_mem buffer;
+    size_t matrix_bytes;
+    float diam_x, diam_y, amount;
+    int err, p, x, y, size_x, size_y;
+
+    desc = av_pix_fmt_desc_get(ctx->ocf.output_format);
+
+    ctx->nb_planes = 0;
+    for (p = 0; p < desc->nb_components; p++)
+        ctx->nb_planes = FFMAX(ctx->nb_planes, desc->comp[p].plane + 1);
+
+    for (p = 0; p < ctx->nb_planes; p++) {
+        if (p == 0 || (desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+            diam_x = ctx->luma_size_x;
+            diam_y = ctx->luma_size_y;
+            amount = ctx->luma_amount;
+        } else {
+            diam_x = ctx->chroma_size_x;
+            diam_y = ctx->chroma_size_y;
+            amount = ctx->chroma_amount;
+        }
+        size_x = (int)ceil(diam_x) | 1;
+        size_y = (int)ceil(diam_y) | 1;
+        matrix_bytes = size_x * size_y * sizeof(float);
+
+        matrix = av_malloc(matrix_bytes);
+        if (!matrix) {
+            err = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        sum = 0.0;
+        for (x = 0; x < size_x; x++) {
+            double dx = (double)(x - size_x / 2) / diam_x;
+            sum += ctx->plane[p].blur_x[x] = exp(-16.0 * (dx * dx));
+        }
+        for (x = 0; x < size_x; x++)
+            ctx->plane[p].blur_x[x] /= sum;
+
+        sum = 0.0;
+        for (y = 0; y < size_y; y++) {
+            double dy = (double)(y - size_y / 2) / diam_y;
+            sum += ctx->plane[p].blur_y[y] = exp(-16.0 * (dy * dy));
+        }
+        for (y = 0; y < size_y; y++)
+            ctx->plane[p].blur_y[y] /= sum;
+
+        for (y = 0; y < size_y; y++) {
+            for (x = 0; x < size_x; x++) {
+                val = ctx->plane[p].blur_x[x] * ctx->plane[p].blur_y[y];
+                matrix[y * size_x + x] = val;
+            }
+        }
+
+        if (ctx->global) {
+            buffer = clCreateBuffer(ctx->ocf.hwctx->context,
+                                    CL_MEM_READ_ONLY     |
+                                    CL_MEM_COPY_HOST_PTR |
+                                    CL_MEM_HOST_NO_ACCESS,
+                                    matrix_bytes, matrix, &cle);
+            CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create matrix buffer: "
+                             "%d.\n", cle);
+            ctx->plane[p].matrix = buffer;
+        } else {
+            buffer = clCreateBuffer(ctx->ocf.hwctx->context,
+                                    CL_MEM_READ_ONLY     |
+                                    CL_MEM_COPY_HOST_PTR |
+                                    CL_MEM_HOST_NO_ACCESS,
+                                    sizeof(ctx->plane[p].blur_x),
+                                    ctx->plane[p].blur_x, &cle);
+            CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create x-coef buffer: "
+                             "%d.\n", cle);
+            ctx->plane[p].coef_x = buffer;
+
+            buffer = clCreateBuffer(ctx->ocf.hwctx->context,
+                                    CL_MEM_READ_ONLY     |
+                                    CL_MEM_COPY_HOST_PTR |
+                                    CL_MEM_HOST_NO_ACCESS,
+                                    sizeof(ctx->plane[p].blur_y),
+                                    ctx->plane[p].blur_y, &cle);
+            CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create y-coef buffer: "
+                             "%d.\n", cle);
+            ctx->plane[p].coef_y = buffer;
+        }
+
+        av_freep(&matrix);
+
+        ctx->plane[p].size_x = size_x;
+        ctx->plane[p].size_y = size_y;
+        ctx->plane[p].amount = amount;
+    }
+
+    err = 0;
+fail:
+    av_freep(&matrix);
+    return err;
+}
+
+static int unsharp_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+    AVFilterContext    *avctx = inlink->dst;
+    AVFilterLink     *outlink = avctx->outputs[0];
+    UnsharpOpenCLContext *ctx = avctx->priv;
+    AVFrame *output = NULL;
+    cl_int cle;
+    size_t global_work[2];
+    size_t local_work[2];
+    cl_mem src, dst;
+    int err, p;
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(input->format),
+           input->width, input->height, input->pts);
+
+    if (!input->hw_frames_ctx)
+        return AVERROR(EINVAL);
+
+    if (!ctx->initialised) {
+        err = unsharp_opencl_init(avctx);
+        if (err < 0)
+            goto fail;
+
+        err = unsharp_opencl_make_filter_params(avctx);
+        if (err < 0)
+            goto fail;
+    }
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
+        src = (cl_mem) input->data[p];
+        dst = (cl_mem)output->data[p];
+
+        if (!dst)
+            break;
+
+        CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem,   &dst);
+        CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem,   &src);
+        CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_int,   &ctx->plane[p].size_x);
+        CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_int,   &ctx->plane[p].size_y);
+        CL_SET_KERNEL_ARG(ctx->kernel, 4, cl_float, &ctx->plane[p].amount);
+
+        if (ctx->global) {
+            CL_SET_KERNEL_ARG(ctx->kernel, 5, cl_mem, &ctx->plane[p].matrix);
+        } else {
+            CL_SET_KERNEL_ARG(ctx->kernel, 5, cl_mem, &ctx->plane[p].coef_x);
+            CL_SET_KERNEL_ARG(ctx->kernel, 6, cl_mem, &ctx->plane[p].coef_y);
+        }
+
+        err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p,
+                                                    ctx->global ? 0 : 16);
+        if (err < 0)
+            goto fail;
+
+        local_work[0]  = 16;
+        local_work[1]  = 16;
+
+        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
+               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
+               p, global_work[0], global_work[1]);
+
+        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
+                                     global_work, ctx->global ? NULL : local_work,
+                                     0, NULL, NULL);
+        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle);
+    }
+
+    cle = clFinish(ctx->command_queue);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
+
+    err = av_frame_copy_props(output, input);
+    if (err < 0)
+        goto fail;
+
+    av_frame_free(&input);
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output->format),
+           output->width, output->height, output->pts);
+
+    return ff_filter_frame(outlink, output);
+
+fail:
+    clFinish(ctx->command_queue);
+    av_frame_free(&input);
+    av_frame_free(&output);
+    return err;
+}
+
+static av_cold void unsharp_opencl_uninit(AVFilterContext *avctx)
+{
+    UnsharpOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+    int i;
+
+    for (i = 0; i < ctx->nb_planes; i++) {
+        if (ctx->plane[i].matrix)
+            clReleaseMemObject(ctx->plane[i].matrix);
+        if (ctx->plane[i].coef_x)
+            clReleaseMemObject(ctx->plane[i].coef_x);
+        if (ctx->plane[i].coef_y)
+            clReleaseMemObject(ctx->plane[i].coef_y);
+    }
+
+    if (ctx->kernel) {
+        cle = clReleaseKernel(ctx->kernel);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->command_queue) {
+        cle = clReleaseCommandQueue(ctx->command_queue);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "command queue: %d.\n", cle);
+    }
+
+    ff_opencl_filter_uninit(avctx);
+}
+
+#define OFFSET(x) offsetof(UnsharpOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption unsharp_opencl_options[] = {
+    { "luma_msize_x",     "Set luma mask horizontal diameter (pixels)",
+      OFFSET(luma_size_x),     AV_OPT_TYPE_FLOAT,
+      { .dbl = 5.0 },   1, MAX_DIAMETER, FLAGS },
+    { "lx",               "Set luma mask horizontal diameter (pixels)",
+      OFFSET(luma_size_x),     AV_OPT_TYPE_FLOAT,
+      { .dbl = 5.0 },   1, MAX_DIAMETER, FLAGS },
+    { "luma_msize_y",     "Set luma mask vertical diameter (pixels)",
+      OFFSET(luma_size_y),     AV_OPT_TYPE_FLOAT,
+      { .dbl = 5.0 },   1, MAX_DIAMETER, FLAGS },
+    { "ly",               "Set luma mask vertical diameter (pixels)",
+      OFFSET(luma_size_y),     AV_OPT_TYPE_FLOAT,
+      { .dbl = 5.0 },   1, MAX_DIAMETER, FLAGS },
+    { "luma_amount",      "Set luma amount (multiplier)",
+      OFFSET(luma_amount),     AV_OPT_TYPE_FLOAT,
+      { .dbl = 1.0 }, -10, 10, FLAGS },
+    { "la",               "Set luma amount (multiplier)",
+      OFFSET(luma_amount),     AV_OPT_TYPE_FLOAT,
+      { .dbl = 1.0 }, -10, 10, FLAGS },
+
+    { "chroma_msize_x",   "Set chroma mask horizontal diameter (pixels after subsampling)",
+      OFFSET(chroma_size_x),   AV_OPT_TYPE_FLOAT,
+      { .dbl = 5.0 },   1, MAX_DIAMETER, FLAGS },
+    { "cx",               "Set chroma mask horizontal diameter (pixels after subsampling)",
+      OFFSET(chroma_size_x),   AV_OPT_TYPE_FLOAT,
+      { .dbl = 5.0 },   1, MAX_DIAMETER, FLAGS },
+    { "chroma_msize_y",   "Set chroma mask vertical diameter (pixels after subsampling)",
+      OFFSET(chroma_size_y),   AV_OPT_TYPE_FLOAT,
+      { .dbl = 5.0 },   1, MAX_DIAMETER, FLAGS },
+    { "cy",               "Set chroma mask vertical diameter (pixels after subsampling)",
+      OFFSET(chroma_size_y),   AV_OPT_TYPE_FLOAT,
+      { .dbl = 5.0 },   1, MAX_DIAMETER, FLAGS },
+    { "chroma_amount",    "Set chroma amount (multiplier)",
+      OFFSET(chroma_amount),   AV_OPT_TYPE_FLOAT,
+      { .dbl = 0.0 }, -10, 10, FLAGS },
+    { "ca",               "Set chroma amount (multiplier)",
+      OFFSET(chroma_amount),   AV_OPT_TYPE_FLOAT,
+      { .dbl = 0.0 }, -10, 10, FLAGS },
+
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(unsharp_opencl);
+
+static const AVFilterPad unsharp_opencl_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &unsharp_opencl_filter_frame,
+        .config_props = &ff_opencl_filter_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad unsharp_opencl_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_opencl_filter_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_unsharp_opencl = {
+    .name           = "unsharp_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Apply unsharp mask to input video"),
+    .priv_size      = sizeof(UnsharpOpenCLContext),
+    .priv_class     = &unsharp_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &unsharp_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = unsharp_opencl_inputs,
+    .outputs        = unsharp_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};

diff --git a/libavfilter/vf_vfrdet.c b/libavfilter/vf_vfrdet.c
new file mode 100644
index 0000000..cac96e2
--- /dev/null
+++ b/libavfilter/vf_vfrdet.c

@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+#include "internal.h"
+
+typedef struct VFRDETContext {
+    const AVClass *class;
+
+    int64_t prev_pts;
+    int64_t delta;
+    int64_t min_delta;
+    int64_t max_delta;
+
+    uint64_t vfr;
+    uint64_t cfr;
+} VFRDETContext;
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    VFRDETContext *s = ctx->priv;
+
+    if (s->prev_pts != AV_NOPTS_VALUE) {
+        int64_t delta = in->pts - s->prev_pts;
+
+        if (s->delta == AV_NOPTS_VALUE) {
+            s->delta = delta;
+        }
+
+        if (s->delta != delta) {
+            s->vfr++;
+            s->delta = delta;
+            s->min_delta = FFMIN(delta, s->min_delta);
+            s->max_delta = FFMAX(delta, s->max_delta);
+        } else {
+            s->cfr++;
+        }
+    }
+
+    s->prev_pts = in->pts;
+
+    return ff_filter_frame(ctx->outputs[0], in);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    VFRDETContext *s = ctx->priv;
+
+    s->prev_pts = AV_NOPTS_VALUE;
+    s->delta    = AV_NOPTS_VALUE;
+    s->min_delta = INT64_MAX;
+    s->max_delta = INT64_MIN;
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    VFRDETContext *s = ctx->priv;
+
+    av_log(ctx, AV_LOG_INFO, "VFR:%f (%"PRIu64"/%"PRIu64")", s->vfr / (float)(s->vfr + s->cfr), s->vfr, s->cfr);
+    if (s->vfr)
+        av_log(ctx, AV_LOG_INFO, " min: %"PRId64" max: %"PRId64")", s->min_delta, s->max_delta);
+    av_log(ctx, AV_LOG_INFO, "\n");
+}
+
+static const AVFilterPad vfrdet_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad vfrdet_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_vfrdet = {
+    .name        = "vfrdet",
+    .description = NULL_IF_CONFIG_SMALL("Variable frame rate detect filter."),
+    .priv_size   = sizeof(VFRDETContext),
+    .init        = init,
+    .uninit      = uninit,
+    .inputs      = vfrdet_inputs,
+    .outputs     = vfrdet_outputs,
+};

diff --git a/libavfilter/vf_vidstabdetect.c b/libavfilter/vf_vidstabdetect.c
index 63a178a..fd7ff3b 100644
--- a/libavfilter/vf_vidstabdetect.c
+++ b/libavfilter/vf_vidstabdetect.c

@@ -107,10 +107,11 @@
     VSMotionDetect* md = &(s->md);
     VSFrameInfo fi;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    int is_planar = desc->flags & AV_PIX_FMT_FLAG_PLANAR;
 
     vsFrameInfoInit(&fi, inlink->w, inlink->h,
                     ff_av2vs_pixfmt(ctx, inlink->format));
-    if (fi.bytesPerPixel != av_get_bits_per_pixel(desc)/8) {
+    if (!is_planar && fi.bytesPerPixel != av_get_bits_per_pixel(desc)/8) {
         av_log(ctx, AV_LOG_ERROR, "pixel-format error: wrong bits/per/pixel, please report a BUG");
         return AVERROR(EINVAL);
     }

diff --git a/libavfilter/vf_vidstabtransform.c b/libavfilter/vf_vidstabtransform.c
index 11a0e3d..d1ec139 100644
--- a/libavfilter/vf_vidstabtransform.c
+++ b/libavfilter/vf_vidstabtransform.c

@@ -146,6 +146,7 @@
     FILE *f;
 
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    int is_planar = desc->flags & AV_PIX_FMT_FLAG_PLANAR;
 
     VSTransformData *td = &(tc->td);
 
@@ -161,7 +162,7 @@
         return AVERROR(EINVAL);
     }
 
-    if (fi_src.bytesPerPixel != av_get_bits_per_pixel(desc)/8 ||
+    if ((!is_planar && fi_src.bytesPerPixel != av_get_bits_per_pixel(desc)/8) ||
         fi_src.log2ChromaW != desc->log2_chroma_w ||
         fi_src.log2ChromaH != desc->log2_chroma_h) {
         av_log(ctx, AV_LOG_ERROR, "pixel-format error: bpp %i<>%i  ",

diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c
new file mode 100644
index 0000000..41a9f38
--- /dev/null
+++ b/libavfilter/vf_vpp_qsv.c

@@ -0,0 +1,429 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ ** @file
+ ** Hardware accelerated common filters based on Intel Quick Sync Video VPP
+ **/
+
+#include <float.h>
+
+#include "libavutil/opt.h"
+#include "libavutil/eval.h"
+#include "libavutil/avassert.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/mathematics.h"
+
+#include "formats.h"
+#include "internal.h"
+#include "avfilter.h"
+#include "libavcodec/avcodec.h"
+#include "libavformat/avformat.h"
+
+#include "qsvvpp.h"
+
+#define OFFSET(x) offsetof(VPPContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
+
+/* number of video enhancement filters */
+#define ENH_FILTERS_COUNT (5)
+
+typedef struct VPPContext{
+    const AVClass *class;
+
+    QSVVPPContext *qsv;
+
+    /* Video Enhancement Algorithms */
+    mfxExtVPPDeinterlacing  deinterlace_conf;
+    mfxExtVPPFrameRateConversion frc_conf;
+    mfxExtVPPDenoise denoise_conf;
+    mfxExtVPPDetail detail_conf;
+    mfxExtVPPProcAmp procamp_conf;
+
+    int out_width;
+    int out_height;
+
+    AVRational framerate;       /* target framerate */
+    int use_frc;                /* use framerate conversion */
+    int deinterlace;            /* deinterlace mode : 0=off, 1=bob, 2=advanced */
+    int denoise;                /* Enable Denoise algorithm. Value [0, 100] */
+    int detail;                 /* Enable Detail Enhancement algorithm. */
+                                /* Level is the optional, value [0, 100] */
+    int use_crop;               /* 1 = use crop; 0=none */
+    int crop_w;
+    int crop_h;
+    int crop_x;
+    int crop_y;
+
+    /* param for the procamp */
+    int    procamp;            /* enable procamp */
+    float  hue;
+    float  saturation;
+    float  contrast;
+    float  brightness;
+
+    char *cx, *cy, *cw, *ch;
+    char *ow, *oh;
+} VPPContext;
+
+static const AVOption options[] = {
+    { "deinterlace", "deinterlace mode: 0=off, 1=bob, 2=advanced", OFFSET(deinterlace), AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, MFX_DEINTERLACING_ADVANCED, .flags = FLAGS, "deinterlace" },
+    { "bob",         "Bob deinterlace mode.",                      0,                   AV_OPT_TYPE_CONST,    { .i64 = MFX_DEINTERLACING_BOB },            .flags = FLAGS, "deinterlace" },
+    { "advanced",    "Advanced deinterlace mode. ",                0,                   AV_OPT_TYPE_CONST,    { .i64 = MFX_DEINTERLACING_ADVANCED },       .flags = FLAGS, "deinterlace" },
+
+    { "denoise",     "denoise level [0, 100]",       OFFSET(denoise),     AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, 100, .flags = FLAGS },
+    { "detail",      "enhancement level [0, 100]",   OFFSET(detail),      AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, 100, .flags = FLAGS },
+    { "framerate",   "output framerate",             OFFSET(framerate),   AV_OPT_TYPE_RATIONAL, { .dbl = 0.0 },0, DBL_MAX, .flags = FLAGS },
+    { "procamp",     "Enable ProcAmp",               OFFSET(procamp),     AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, 1, .flags = FLAGS},
+    { "hue",         "ProcAmp hue",                  OFFSET(hue),         AV_OPT_TYPE_FLOAT,    { .dbl = 0.0 }, -180.0, 180.0, .flags = FLAGS},
+    { "saturation",  "ProcAmp saturation",           OFFSET(saturation),  AV_OPT_TYPE_FLOAT,    { .dbl = 1.0 }, 0.0, 10.0, .flags = FLAGS},
+    { "contrast",    "ProcAmp contrast",             OFFSET(contrast),    AV_OPT_TYPE_FLOAT,    { .dbl = 1.0 }, 0.0, 10.0, .flags = FLAGS},
+    { "brightness",  "ProcAmp brightness",           OFFSET(brightness),  AV_OPT_TYPE_FLOAT,    { .dbl = 0.0 }, -100.0, 100.0, .flags = FLAGS},
+
+    { "cw",   "set the width crop area expression",   OFFSET(cw), AV_OPT_TYPE_STRING, { .str = "iw" }, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "ch",   "set the height crop area expression",  OFFSET(ch), AV_OPT_TYPE_STRING, { .str = "ih" }, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "cx",   "set the x crop area expression",       OFFSET(cx), AV_OPT_TYPE_STRING, { .str = "(in_w-out_w)/2" }, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "cy",   "set the y crop area expression",       OFFSET(cy), AV_OPT_TYPE_STRING, { .str = "(in_h-out_h)/2" }, CHAR_MIN, CHAR_MAX, FLAGS },
+
+    { "w",      "Output video width",  OFFSET(ow), AV_OPT_TYPE_STRING, { .str="cw" }, 0, 255, .flags = FLAGS },
+    { "width",  "Output video width",  OFFSET(ow), AV_OPT_TYPE_STRING, { .str="cw" }, 0, 255, .flags = FLAGS },
+    { "h",      "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
+    { "height", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
+    { NULL }
+};
+
+static const char *const var_names[] = {
+    "iw", "in_w",
+    "ih", "in_h",
+    "ow", "out_w", "w",
+    "oh", "out_h", "h",
+    "cw",
+    "ch",
+    "cx",
+    "cy",
+    NULL
+};
+
+enum var_name {
+    VAR_iW, VAR_IN_W,
+    VAR_iH, VAR_IN_H,
+    VAR_oW, VAR_OUT_W, VAR_W,
+    VAR_oH, VAR_OUT_H, VAR_H,
+    CW,
+    CH,
+    CX,
+    CY,
+    VAR_VARS_NB
+};
+
+static int eval_expr(AVFilterContext *ctx)
+{
+#define PASS_EXPR(e, s) {\
+    ret = av_expr_parse(&e, s, var_names, NULL, NULL, NULL, NULL, 0, ctx); \
+    if (ret < 0) {\
+        av_log(ctx, AV_LOG_ERROR, "Error when passing '%s'.\n", s);\
+        goto release;\
+    }\
+}
+#define CALC_EXPR(e, v, i) {\
+    i = v = av_expr_eval(e, var_values, NULL); \
+}
+    VPPContext *vpp = ctx->priv;
+    double  var_values[VAR_VARS_NB] = { NAN };
+    AVExpr *w_expr  = NULL, *h_expr  = NULL;
+    AVExpr *cw_expr = NULL, *ch_expr = NULL;
+    AVExpr *cx_expr = NULL, *cy_expr = NULL;
+    int     ret = 0;
+
+    PASS_EXPR(cw_expr, vpp->cw);
+    PASS_EXPR(ch_expr, vpp->ch);
+
+    PASS_EXPR(w_expr, vpp->ow);
+    PASS_EXPR(h_expr, vpp->oh);
+
+    PASS_EXPR(cx_expr, vpp->cx);
+    PASS_EXPR(cy_expr, vpp->cy);
+
+    var_values[VAR_iW] =
+    var_values[VAR_IN_W] = ctx->inputs[0]->w;
+
+    var_values[VAR_iH] =
+    var_values[VAR_IN_H] = ctx->inputs[0]->h;
+
+    /* crop params */
+    CALC_EXPR(cw_expr, var_values[CW], vpp->crop_w);
+    CALC_EXPR(ch_expr, var_values[CH], vpp->crop_h);
+
+    /* calc again in case cw is relative to ch */
+    CALC_EXPR(cw_expr, var_values[CW], vpp->crop_w);
+
+    CALC_EXPR(w_expr,
+            var_values[VAR_OUT_W] = var_values[VAR_oW] = var_values[VAR_W],
+            vpp->out_width);
+    CALC_EXPR(h_expr,
+            var_values[VAR_OUT_H] = var_values[VAR_oH] = var_values[VAR_H],
+            vpp->out_height);
+
+    /* calc again in case ow is relative to oh */
+    CALC_EXPR(w_expr,
+            var_values[VAR_OUT_W] = var_values[VAR_oW] = var_values[VAR_W],
+            vpp->out_width);
+
+
+    CALC_EXPR(cx_expr, var_values[CX], vpp->crop_x);
+    CALC_EXPR(cy_expr, var_values[CY], vpp->crop_y);
+
+    /* calc again in case cx is relative to cy */
+    CALC_EXPR(cx_expr, var_values[CX], vpp->crop_x);
+
+    if ((vpp->crop_w != var_values[VAR_iW]) || (vpp->crop_h != var_values[VAR_iH]))
+        vpp->use_crop = 1;
+
+release:
+    av_expr_free(w_expr);
+    av_expr_free(h_expr);
+    av_expr_free(cw_expr);
+    av_expr_free(ch_expr);
+    av_expr_free(cx_expr);
+    av_expr_free(cy_expr);
+#undef PASS_EXPR
+#undef CALC_EXPR
+
+    return ret;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    VPPContext      *vpp = ctx->priv;
+    int              ret;
+
+    if (vpp->framerate.den == 0 || vpp->framerate.num == 0)
+        vpp->framerate = inlink->frame_rate;
+
+    if (av_cmp_q(vpp->framerate, inlink->frame_rate))
+        vpp->use_frc = 1;
+
+    ret = eval_expr(ctx);
+    if (ret != 0) {
+        av_log(ctx, AV_LOG_ERROR, "Fail to eval expr.\n");
+        return ret;
+    }
+
+    if (vpp->out_height == 0 || vpp->out_width == 0) {
+        vpp->out_width  = inlink->w;
+        vpp->out_height = inlink->h;
+    }
+
+    if (vpp->use_crop) {
+        vpp->crop_x = FFMAX(vpp->crop_x, 0);
+        vpp->crop_y = FFMAX(vpp->crop_y, 0);
+
+        if(vpp->crop_w + vpp->crop_x > inlink->w)
+           vpp->crop_x = inlink->w - vpp->crop_w;
+        if(vpp->crop_h + vpp->crop_y > inlink->h)
+           vpp->crop_y = inlink->h - vpp->crop_h;
+    }
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    VPPContext      *vpp = ctx->priv;
+    QSVVPPParam     param = { NULL };
+    QSVVPPCrop      crop  = { 0 };
+    mfxExtBuffer    *ext_buf[ENH_FILTERS_COUNT];
+    AVFilterLink    *inlink = ctx->inputs[0];
+
+    outlink->w          = vpp->out_width;
+    outlink->h          = vpp->out_height;
+    outlink->frame_rate = vpp->framerate;
+    outlink->time_base  = av_inv_q(vpp->framerate);
+
+    param.filter_frame  = NULL;
+    param.out_sw_format = AV_PIX_FMT_NV12;
+    param.num_ext_buf   = 0;
+    param.ext_buf       = ext_buf;
+
+    if (vpp->use_crop) {
+        crop.in_idx = 0;
+        crop.x = vpp->crop_x;
+        crop.y = vpp->crop_y;
+        crop.w = vpp->crop_w;
+        crop.h = vpp->crop_h;
+
+        param.num_crop = 1;
+        param.crop     = &crop;
+    }
+
+    if (vpp->deinterlace) {
+        memset(&vpp->deinterlace_conf, 0, sizeof(mfxExtVPPDeinterlacing));
+        vpp->deinterlace_conf.Header.BufferId = MFX_EXTBUFF_VPP_DEINTERLACING;
+        vpp->deinterlace_conf.Header.BufferSz = sizeof(mfxExtVPPDeinterlacing);
+        vpp->deinterlace_conf.Mode = vpp->deinterlace == 1 ?
+                                     MFX_DEINTERLACING_BOB : MFX_DEINTERLACING_ADVANCED;
+
+        param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->deinterlace_conf;
+    }
+
+    if (vpp->use_frc) {
+        memset(&vpp->frc_conf, 0, sizeof(mfxExtVPPFrameRateConversion));
+        vpp->frc_conf.Header.BufferId = MFX_EXTBUFF_VPP_FRAME_RATE_CONVERSION;
+        vpp->frc_conf.Header.BufferSz = sizeof(mfxExtVPPFrameRateConversion);
+        vpp->frc_conf.Algorithm = MFX_FRCALGM_DISTRIBUTED_TIMESTAMP;
+
+        param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->frc_conf;
+    }
+
+    if (vpp->denoise) {
+        memset(&vpp->denoise_conf, 0, sizeof(mfxExtVPPDenoise));
+        vpp->denoise_conf.Header.BufferId = MFX_EXTBUFF_VPP_DENOISE;
+        vpp->denoise_conf.Header.BufferSz = sizeof(mfxExtVPPDenoise);
+        vpp->denoise_conf.DenoiseFactor   = vpp->denoise;
+
+        param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->denoise_conf;
+    }
+
+    if (vpp->detail) {
+        memset(&vpp->detail_conf, 0, sizeof(mfxExtVPPDetail));
+        vpp->detail_conf.Header.BufferId  = MFX_EXTBUFF_VPP_DETAIL;
+        vpp->detail_conf.Header.BufferSz  = sizeof(mfxExtVPPDetail);
+        vpp->detail_conf.DetailFactor = vpp->detail;
+
+        param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->detail_conf;
+    }
+
+    if (vpp->procamp) {
+        memset(&vpp->procamp_conf, 0, sizeof(mfxExtVPPProcAmp));
+        vpp->procamp_conf.Header.BufferId  = MFX_EXTBUFF_VPP_PROCAMP;
+        vpp->procamp_conf.Header.BufferSz  = sizeof(mfxExtVPPProcAmp);
+        vpp->procamp_conf.Hue              = vpp->hue;
+        vpp->procamp_conf.Saturation       = vpp->saturation;
+        vpp->procamp_conf.Contrast         = vpp->contrast;
+        vpp->procamp_conf.Brightness       = vpp->brightness;
+
+        param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->procamp_conf;
+    }
+
+    if (vpp->use_frc || vpp->use_crop || vpp->deinterlace || vpp->denoise ||
+        vpp->detail || vpp->procamp || inlink->w != outlink->w || inlink->h != outlink->h)
+        return ff_qsvvpp_create(ctx, &vpp->qsv, &param);
+    else {
+        av_log(ctx, AV_LOG_VERBOSE, "qsv vpp pass through mode.\n");
+        if (inlink->hw_frames_ctx)
+            outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
+    }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
+{
+    int              ret = 0;
+    AVFilterContext  *ctx = inlink->dst;
+    VPPContext       *vpp = inlink->dst->priv;
+    AVFilterLink     *outlink = ctx->outputs[0];
+
+    if (vpp->qsv) {
+        ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref);
+        av_frame_free(&picref);
+    } else {
+        if (picref->pts != AV_NOPTS_VALUE)
+            picref->pts = av_rescale_q(picref->pts, inlink->time_base, outlink->time_base);
+        ret = ff_filter_frame(outlink, picref);
+    }
+
+    return ret;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    int ret;
+    AVFilterFormats *in_fmts, *out_fmts;
+    static const enum AVPixelFormat in_pix_fmts[] = {
+        AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_NV12,
+        AV_PIX_FMT_YUYV422,
+        AV_PIX_FMT_RGB32,
+        AV_PIX_FMT_QSV,
+        AV_PIX_FMT_NONE
+    };
+    static const enum AVPixelFormat out_pix_fmts[] = {
+        AV_PIX_FMT_NV12,
+        AV_PIX_FMT_QSV,
+        AV_PIX_FMT_NONE
+    };
+
+    in_fmts  = ff_make_format_list(in_pix_fmts);
+    out_fmts = ff_make_format_list(out_pix_fmts);
+    ret = ff_formats_ref(in_fmts, &ctx->inputs[0]->out_formats);
+    if (ret < 0)
+        return ret;
+    ret = ff_formats_ref(out_fmts, &ctx->outputs[0]->in_formats);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static av_cold void vpp_uninit(AVFilterContext *ctx)
+{
+    VPPContext *vpp = ctx->priv;
+
+    ff_qsvvpp_free(&vpp->qsv);
+}
+
+static const AVClass vpp_class = {
+    .class_name = "vpp_qsv",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVFilterPad vpp_inputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_input,
+        .filter_frame  = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad vpp_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_vpp_qsv = {
+    .name          = "vpp_qsv",
+    .description   = NULL_IF_CONFIG_SMALL("Quick Sync Video VPP."),
+    .priv_size     = sizeof(VPPContext),
+    .query_formats = query_formats,
+    .uninit        = vpp_uninit,
+    .inputs        = vpp_inputs,
+    .outputs       = vpp_outputs,
+    .priv_class    = &vpp_class,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};

diff --git a/libavfilter/vf_waveform.c b/libavfilter/vf_waveform.c
index 0c57474..bcee57c 100644
--- a/libavfilter/vf_waveform.c
+++ b/libavfilter/vf_waveform.c

@@ -29,6 +29,14 @@
 #include "internal.h"
 #include "video.h"
 
+typedef struct ThreadData {
+    AVFrame *in;
+    AVFrame *out;
+    int component;
+    int offset_y;
+    int offset_x;
+} ThreadData;
+
 enum FilterType {
     LOWPASS,
     FLAT,
@@ -36,6 +44,7 @@
     CHROMA,
     COLOR,
     ACOLOR,
+    XFLAT,
     NB_FILTERS
 };
 
@@ -89,14 +98,13 @@
     int            max;
     int            size;
     int            scale;
+    uint8_t        grat_yuva_color[4];
     int            shift_w[4], shift_h[4];
     GraticuleLines *glines;
     int            nb_glines;
-    void (*waveform)(struct WaveformContext *s,
-                     AVFrame *in, AVFrame *out,
-                     int component, int intensity,
-                     int offset_y, int offset_x,
-                     int column, int mirror);
+
+    int (*waveform_slice)(AVFilterContext *ctx, void *arg,
+                          int jobnr, int nb_jobs);
     void (*graticulef)(struct WaveformContext *s, AVFrame *out);
     const AVPixFmtDescriptor *desc;
     const AVPixFmtDescriptor *odesc;
@@ -135,10 +143,12 @@
         { "chroma",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=CHROMA},  0, 0, FLAGS, "filter" },
         { "color",   NULL, 0, AV_OPT_TYPE_CONST, {.i64=COLOR},   0, 0, FLAGS, "filter" },
         { "acolor",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=ACOLOR},  0, 0, FLAGS, "filter" },
-    { "graticule", "set graticule", OFFSET(graticule), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "graticule" },
-    { "g",         "set graticule", OFFSET(graticule), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "graticule" },
-        { "none",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "graticule" },
-        { "green", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "graticule" },
+        { "xflat",   NULL, 0, AV_OPT_TYPE_CONST, {.i64=XFLAT},   0, 0, FLAGS, "filter" },
+    { "graticule", "set graticule", OFFSET(graticule), AV_OPT_TYPE_INT, {.i64=0}, 0, 2, FLAGS, "graticule" },
+    { "g",         "set graticule", OFFSET(graticule), AV_OPT_TYPE_INT, {.i64=0}, 0, 2, FLAGS, "graticule" },
+        { "none",   NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "graticule" },
+        { "green",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "graticule" },
+        { "orange", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "graticule" },
     { "opacity", "set graticule opacity", OFFSET(opacity), AV_OPT_TYPE_FLOAT, {.dbl=0.75}, 0, 1, FLAGS },
     { "o",       "set graticule opacity", OFFSET(opacity), AV_OPT_TYPE_FLOAT, {.dbl=0.75}, 0, 1, FLAGS },
     { "flags", "set graticule flags", OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64=1}, 0, 3, FLAGS, "flags" },
@@ -291,10 +301,12 @@
     switch (s->filter) {
     case LOWPASS: in_pix_fmts = in_lowpass_pix_fmts; break;
     case CHROMA:
+    case XFLAT:
     case AFLAT:
     case FLAT:    in_pix_fmts = in_flat_pix_fmts;    break;
     case ACOLOR:
     case COLOR:   in_pix_fmts = in_color_pix_fmts;   break;
+    default: return AVERROR_BUG;
     }
 
     if (!ctx->inputs[0]->out_formats) {
@@ -618,11 +630,28 @@
         *target = 255;
 }
 
+static void update_cr(uint8_t *target, int unused, int intensity)
+{
+    if (*target - intensity > 0)
+        *target -= intensity;
+    else
+        *target = 0;
+}
+
+static void update16_cr(uint16_t *target, int unused, int intensity, int limit)
+{
+    if (*target - intensity > 0)
+        *target -= intensity;
+    else
+        *target = 0;
+}
+
 static av_always_inline void lowpass16(WaveformContext *s,
                                        AVFrame *in, AVFrame *out,
                                        int component, int intensity,
                                        int offset_y, int offset_x,
-                                       int column, int mirror)
+                                       int column, int mirror,
+                                       int jobnr, int nb_jobs)
 {
     const int plane = s->desc->comp[component].plane;
     const int shift_w = s->shift_w[component];
@@ -634,22 +663,26 @@
     const int max = limit - intensity;
     const int src_h = AV_CEIL_RSHIFT(in->height, shift_h);
     const int src_w = AV_CEIL_RSHIFT(in->width, shift_w);
-    const uint16_t *src_data = (const uint16_t *)in->data[plane];
-    uint16_t *dst_data = (uint16_t *)out->data[plane] + offset_y * dst_linesize + offset_x;
+    const int sliceh_start = !column ? (src_h * jobnr) / nb_jobs : 0;
+    const int sliceh_end = !column ? (src_h * (jobnr+1)) / nb_jobs : src_h;
+    const int slicew_start = column ? (src_w * jobnr) / nb_jobs : 0;
+    const int slicew_end = column ? (src_w * (jobnr+1)) / nb_jobs : src_w;
+    const int step = column ? 1 << shift_w : 1 << shift_h;
+    const uint16_t *src_data = (const uint16_t *)in->data[plane] + sliceh_start * src_linesize;
+    uint16_t *dst_data = (uint16_t *)out->data[plane] + (offset_y + sliceh_start * step) * dst_linesize + offset_x;
     uint16_t * const dst_bottom_line = dst_data + dst_linesize * (s->size - 1);
     uint16_t * const dst_line = (mirror ? dst_bottom_line : dst_data);
-    const int step = column ? 1 << shift_w : 1 << shift_h;
     const uint16_t *p;
     int y;
 
     if (!column && mirror)
         dst_data += s->size;
 
-    for (y = 0; y < src_h; y++) {
-        const uint16_t *src_data_end = src_data + src_w;
-        uint16_t *dst = dst_line;
+    for (y = sliceh_start; y < sliceh_end; y++) {
+        const uint16_t *src_data_end = src_data + slicew_end;
+        uint16_t *dst = dst_line + slicew_start * step;
 
-        for (p = src_data; p < src_data_end; p++) {
+        for (p = src_data + slicew_start; p < src_data_end; p++) {
             uint16_t *target;
             int i = 0, v = FFMIN(*p, limit);
 
@@ -673,19 +706,26 @@
         src_data += src_linesize;
         dst_data += dst_linesize * step;
     }
-
-    envelope16(s, out, plane, plane, column ? offset_x : offset_y);
 }
 
-#define LOWPASS16_FUNC(name, column, mirror)               \
-static void lowpass16_##name(WaveformContext *s,           \
-                             AVFrame *in, AVFrame *out,    \
-                             int component, int intensity, \
-                             int offset_y, int offset_x,   \
-                             int unused1, int unused2)     \
-{                                                          \
-    lowpass16(s, in, out, component, intensity,            \
-              offset_y, offset_x, column, mirror);         \
+#define LOWPASS16_FUNC(name, column, mirror)        \
+static int lowpass16_##name(AVFilterContext *ctx,   \
+                             void *arg, int jobnr,  \
+                             int nb_jobs)           \
+{                                                   \
+    WaveformContext *s = ctx->priv;                 \
+    ThreadData *td = arg;                           \
+    AVFrame *in = td->in;                           \
+    AVFrame *out = td->out;                         \
+    int component = td->component;                  \
+    int offset_y = td->offset_y;                    \
+    int offset_x = td->offset_x;                    \
+                                                    \
+    lowpass16(s, in, out, component, s->intensity,  \
+              offset_y, offset_x, column, mirror,   \
+              jobnr, nb_jobs);                      \
+                                                    \
+    return 0;                                       \
 }
 
 LOWPASS16_FUNC(column_mirror, 1, 1)
@@ -697,7 +737,8 @@
                                      AVFrame *in, AVFrame *out,
                                      int component, int intensity,
                                      int offset_y, int offset_x,
-                                     int column, int mirror)
+                                     int column, int mirror,
+                                     int jobnr, int nb_jobs)
 {
     const int plane = s->desc->comp[component].plane;
     const int shift_w = s->shift_w[component];
@@ -708,22 +749,26 @@
     const int max = 255 - intensity;
     const int src_h = AV_CEIL_RSHIFT(in->height, shift_h);
     const int src_w = AV_CEIL_RSHIFT(in->width, shift_w);
-    const uint8_t *src_data = in->data[plane];
-    uint8_t *dst_data = out->data[plane] + offset_y * dst_linesize + offset_x;
+    const int sliceh_start = !column ? (src_h * jobnr) / nb_jobs : 0;
+    const int sliceh_end = !column ? (src_h * (jobnr+1)) / nb_jobs : src_h;
+    const int slicew_start = column ? (src_w * jobnr) / nb_jobs : 0;
+    const int slicew_end = column ? (src_w * (jobnr+1)) / nb_jobs : src_w;
+    const int step = column ? 1 << shift_w : 1 << shift_h;
+    const uint8_t *src_data = in->data[plane] + sliceh_start * src_linesize;
+    uint8_t *dst_data = out->data[plane] + (offset_y + sliceh_start * step) * dst_linesize + offset_x;
     uint8_t * const dst_bottom_line = dst_data + dst_linesize * (s->size - 1);
     uint8_t * const dst_line = (mirror ? dst_bottom_line : dst_data);
-    const int step = column ? 1 << shift_w : 1 << shift_h;
     const uint8_t *p;
     int y;
 
     if (!column && mirror)
         dst_data += s->size;
 
-    for (y = 0; y < src_h; y++) {
-        const uint8_t *src_data_end = src_data + src_w;
-        uint8_t *dst = dst_line;
+    for (y = sliceh_start; y < sliceh_end; y++) {
+        const uint8_t *src_data_end = src_data + slicew_end;
+        uint8_t *dst = dst_line + slicew_start * step;
 
-        for (p = src_data; p < src_data_end; p++) {
+        for (p = src_data + slicew_start; p < src_data_end; p++) {
             uint8_t *target;
             if (column) {
                 target = dst + dst_signed_linesize * *p;
@@ -744,14 +789,13 @@
     }
 
     if (column && step > 1) {
-        const int dst_w = s->display == PARADE ? out->width / s->acomp : out->width;
         const int dst_h = 256;
         uint8_t *dst;
         int x, z;
 
         dst = out->data[plane] + offset_y * dst_linesize + offset_x;
         for (y = 0; y < dst_h; y++) {
-            for (x = 0; x < dst_w; x+=step) {
+            for (x = slicew_start * step; x < slicew_end * step; x+=step) {
                 for (z = 1; z < step; z++) {
                     dst[x + z] = dst[x];
                 }
@@ -759,31 +803,37 @@
             dst += dst_linesize;
         }
     } else if (step > 1) {
-        const int dst_h = s->display == PARADE ? out->height / s->acomp : out->height;
         const int dst_w = 256;
         uint8_t *dst;
         int z;
 
-        dst = out->data[plane] + offset_y * dst_linesize + offset_x;
-        for (y = 0; y < dst_h; y+=step) {
+        dst = out->data[plane] + (offset_y + sliceh_start * step) * dst_linesize + offset_x;
+        for (y = sliceh_start * step; y < sliceh_end * step; y+=step) {
             for (z = 1; z < step; z++)
                 memcpy(dst + dst_linesize * z, dst, dst_w);
             dst += dst_linesize * step;
         }
     }
-
-    envelope(s, out, plane, plane, column ? offset_x : offset_y);
 }
 
-#define LOWPASS_FUNC(name, column, mirror)               \
-static void lowpass_##name(WaveformContext *s,           \
-                           AVFrame *in, AVFrame *out,    \
-                           int component, int intensity, \
-                           int offset_y, int offset_x,   \
-                           int unused1, int unused2)     \
-{                                                        \
-    lowpass(s, in, out, component, intensity,            \
-            offset_y, offset_x, column, mirror);         \
+#define LOWPASS_FUNC(name, column, mirror)        \
+static int lowpass_##name(AVFilterContext *ctx,   \
+                          void *arg, int jobnr,   \
+                          int nb_jobs)            \
+{                                                 \
+    WaveformContext *s = ctx->priv;               \
+    ThreadData *td = arg;                         \
+    AVFrame *in = td->in;                         \
+    AVFrame *out = td->out;                       \
+    int component = td->component;                \
+    int offset_y = td->offset_y;                  \
+    int offset_x = td->offset_x;                  \
+                                                  \
+    lowpass(s, in, out, component, s->intensity,  \
+            offset_y, offset_x, column, mirror,   \
+            jobnr, nb_jobs);                      \
+                                                  \
+    return 0;                                     \
 }
 
 LOWPASS_FUNC(column_mirror, 1, 1)
@@ -795,7 +845,8 @@
                                     AVFrame *in, AVFrame *out,
                                     int component, int intensity,
                                     int offset_y, int offset_x,
-                                    int column, int mirror)
+                                    int column, int mirror,
+                                    int jobnr, int nb_jobs)
 {
     const int plane = s->desc->comp[component].plane;
     const int c0_linesize = in->linesize[ plane + 0 ] / 2;
@@ -814,13 +865,17 @@
     const int mid = s->max / 2;
     const int src_h = in->height;
     const int src_w = in->width;
+    const int sliceh_start = !column ? (src_h * jobnr) / nb_jobs : 0;
+    const int sliceh_end = !column ? (src_h * (jobnr+1)) / nb_jobs : src_h;
+    const int slicew_start = column ? (src_w * jobnr) / nb_jobs : 0;
+    const int slicew_end = column ? (src_w * (jobnr+1)) / nb_jobs : src_w;
     int x, y;
 
     if (column) {
         const int d0_signed_linesize = d0_linesize * (mirror == 1 ? -1 : 1);
         const int d1_signed_linesize = d1_linesize * (mirror == 1 ? -1 : 1);
 
-        for (x = 0; x < src_w; x++) {
+        for (x = slicew_start; x < slicew_end; x++) {
             const uint16_t *c0_data = (uint16_t *)in->data[plane + 0];
             const uint16_t *c1_data = (uint16_t *)in->data[(plane + 1) % s->ncomp];
             const uint16_t *c2_data = (uint16_t *)in->data[(plane + 2) % s->ncomp];
@@ -854,18 +909,18 @@
             }
         }
     } else {
-        const uint16_t *c0_data = (uint16_t *)in->data[plane];
-        const uint16_t *c1_data = (uint16_t *)in->data[(plane + 1) % s->ncomp];
-        const uint16_t *c2_data = (uint16_t *)in->data[(plane + 2) % s->ncomp];
-        uint16_t *d0_data = (uint16_t *)(out->data[plane]) + offset_y * d0_linesize + offset_x;
-        uint16_t *d1_data = (uint16_t *)(out->data[(plane + 1) % s->ncomp]) + offset_y * d1_linesize + offset_x;
+        const uint16_t *c0_data = (uint16_t *)(in->data[plane]) +                  (sliceh_start >> c0_shift_h) * c0_linesize;
+        const uint16_t *c1_data = (uint16_t *)(in->data[(plane + 1) % s->ncomp]) + (sliceh_start >> c1_shift_h) * c1_linesize;
+        const uint16_t *c2_data = (uint16_t *)(in->data[(plane + 2) % s->ncomp]) + (sliceh_start >> c2_shift_h) * c2_linesize;
+        uint16_t *d0_data = (uint16_t *)(out->data[plane]) + (offset_y + sliceh_start) * d0_linesize + offset_x;
+        uint16_t *d1_data = (uint16_t *)(out->data[(plane + 1) % s->ncomp]) + (offset_y + sliceh_start) * d1_linesize + offset_x;
 
         if (mirror) {
             d0_data += s->size - 1;
             d1_data += s->size - 1;
         }
 
-        for (y = 0; y < src_h; y++) {
+        for (y = sliceh_start; y < sliceh_end; y++) {
             for (x = 0; x < src_w; x++) {
                 const int c0 = FFMIN(c0_data[x >> c0_shift_w], limit) + s->max;
                 const int c1 = FFMIN(FFABS(c1_data[x >> c1_shift_w] - mid) + FFABS(c2_data[x >> c2_shift_w] - mid), limit);
@@ -898,16 +953,39 @@
             d1_data += d1_linesize;
         }
     }
-
-    envelope16(s, out, plane, plane, column ? offset_x : offset_y);
-    envelope16(s, out, plane, (plane + 1) % s->ncomp, column ? offset_x : offset_y);
 }
 
+#define FLAT16_FUNC(name, column, mirror)        \
+static int flat16_##name(AVFilterContext *ctx,   \
+                         void *arg, int jobnr,   \
+                         int nb_jobs)            \
+{                                                \
+    WaveformContext *s = ctx->priv;              \
+    ThreadData *td = arg;                        \
+    AVFrame *in = td->in;                        \
+    AVFrame *out = td->out;                      \
+    int component = td->component;               \
+    int offset_y = td->offset_y;                 \
+    int offset_x = td->offset_x;                 \
+                                                 \
+    flat16(s, in, out, component, s->intensity,  \
+           offset_y, offset_x, column, mirror,   \
+           jobnr, nb_jobs);                      \
+                                                 \
+    return 0;                                    \
+}
+
+FLAT16_FUNC(column_mirror, 1, 1)
+FLAT16_FUNC(column,        1, 0)
+FLAT16_FUNC(row_mirror,    0, 1)
+FLAT16_FUNC(row,           0, 0)
+
 static av_always_inline void flat(WaveformContext *s,
                                   AVFrame *in, AVFrame *out,
                                   int component, int intensity,
                                   int offset_y, int offset_x,
-                                  int column, int mirror)
+                                  int column, int mirror,
+                                  int jobnr, int nb_jobs)
 {
     const int plane = s->desc->comp[component].plane;
     const int c0_linesize = in->linesize[ plane + 0 ];
@@ -924,13 +1002,17 @@
     const int max = 255 - intensity;
     const int src_h = in->height;
     const int src_w = in->width;
+    const int sliceh_start = !column ? (src_h * jobnr) / nb_jobs : 0;
+    const int sliceh_end = !column ? (src_h * (jobnr+1)) / nb_jobs : src_h;
+    const int slicew_start = column ? (src_w * jobnr) / nb_jobs : 0;
+    const int slicew_end = column ? (src_w * (jobnr+1)) / nb_jobs : src_w;
     int x, y;
 
     if (column) {
         const int d0_signed_linesize = d0_linesize * (mirror == 1 ? -1 : 1);
         const int d1_signed_linesize = d1_linesize * (mirror == 1 ? -1 : 1);
 
-        for (x = 0; x < src_w; x++) {
+        for (x = slicew_start; x < slicew_end; x++) {
             const uint8_t *c0_data = in->data[plane + 0];
             const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp];
             const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp];
@@ -964,20 +1046,20 @@
             }
         }
     } else {
-        const uint8_t *c0_data = in->data[plane];
-        const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp];
-        const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp];
-        uint8_t *d0_data = out->data[plane] + offset_y * d0_linesize + offset_x;
-        uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + offset_y * d1_linesize + offset_x;
+        const uint8_t *c0_data = in->data[plane] +                  (sliceh_start >> c0_shift_h) * c0_linesize;
+        const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp] + (sliceh_start >> c1_shift_h) * c1_linesize;
+        const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp] + (sliceh_start >> c2_shift_h) * c2_linesize;
+        uint8_t *d0_data = out->data[plane] + (offset_y + sliceh_start) * d0_linesize + offset_x;
+        uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + (offset_y + sliceh_start) * d1_linesize + offset_x;
 
         if (mirror) {
             d0_data += s->size - 1;
             d1_data += s->size - 1;
         }
 
-        for (y = 0; y < src_h; y++) {
+        for (y = sliceh_start; y < sliceh_end; y++) {
             for (x = 0; x < src_w; x++) {
-                int c0 = c0_data[x >> c0_shift_w] + 256;
+                const int c0 = c0_data[x >> c0_shift_w] + 256;
                 const int c1 = FFABS(c1_data[x >> c1_shift_w] - 128) + FFABS(c2_data[x >> c2_shift_w] - 128);
                 uint8_t *target;
 
@@ -1008,266 +1090,323 @@
             d1_data += d1_linesize;
         }
     }
-
-    envelope(s, out, plane, plane, column ? offset_x : offset_y);
-    envelope(s, out, plane, (plane + 1) % s->ncomp, column ? offset_x : offset_y);
 }
 
-static av_always_inline void aflat16(WaveformContext *s,
-                                     AVFrame *in, AVFrame *out,
-                                     int component, int intensity,
-                                     int offset_y, int offset_x,
-                                     int column, int mirror)
-{
-    const int plane = s->desc->comp[component].plane;
-    const int c0_linesize = in->linesize[ plane + 0 ] / 2;
-    const int c1_linesize = in->linesize[(plane + 1) % s->ncomp] / 2;
-    const int c2_linesize = in->linesize[(plane + 2) % s->ncomp] / 2;
-    const int c0_shift_w = s->shift_w[ component + 0 ];
-    const int c1_shift_w = s->shift_w[(component + 1) % s->ncomp];
-    const int c2_shift_w = s->shift_w[(component + 2) % s->ncomp];
-    const int c0_shift_h = s->shift_h[ component + 0 ];
-    const int c1_shift_h = s->shift_h[(component + 1) % s->ncomp];
-    const int c2_shift_h = s->shift_h[(component + 2) % s->ncomp];
-    const int d0_linesize = out->linesize[ plane + 0 ] / 2;
-    const int d1_linesize = out->linesize[(plane + 1) % s->ncomp] / 2;
-    const int d2_linesize = out->linesize[(plane + 2) % s->ncomp] / 2;
-    const int limit = s->max - 1;
-    const int max = limit - intensity;
-    const int mid = s->max / 2;
-    const int src_h = in->height;
-    const int src_w = in->width;
-    int x, y;
-
-    if (column) {
-        const int d0_signed_linesize = d0_linesize * (mirror == 1 ? -1 : 1);
-        const int d1_signed_linesize = d1_linesize * (mirror == 1 ? -1 : 1);
-        const int d2_signed_linesize = d2_linesize * (mirror == 1 ? -1 : 1);
-
-        for (x = 0; x < src_w; x++) {
-            const uint16_t *c0_data = (uint16_t *)in->data[plane + 0];
-            const uint16_t *c1_data = (uint16_t *)in->data[(plane + 1) % s->ncomp];
-            const uint16_t *c2_data = (uint16_t *)in->data[(plane + 2) % s->ncomp];
-            uint16_t *d0_data = (uint16_t *)out->data[plane] + offset_y * d0_linesize + offset_x;
-            uint16_t *d1_data = (uint16_t *)out->data[(plane + 1) % s->ncomp] + offset_y * d1_linesize + offset_x;
-            uint16_t *d2_data = (uint16_t *)out->data[(plane + 2) % s->ncomp] + offset_y * d2_linesize + offset_x;
-            uint16_t * const d0_bottom_line = d0_data + d0_linesize * (s->size - 1);
-            uint16_t * const d0 = (mirror ? d0_bottom_line : d0_data);
-            uint16_t * const d1_bottom_line = d1_data + d1_linesize * (s->size - 1);
-            uint16_t * const d1 = (mirror ? d1_bottom_line : d1_data);
-            uint16_t * const d2_bottom_line = d2_data + d2_linesize * (s->size - 1);
-            uint16_t * const d2 = (mirror ? d2_bottom_line : d2_data);
-
-            for (y = 0; y < src_h; y++) {
-                const int c0 = FFMIN(c0_data[x >> c0_shift_w], limit) + mid;
-                const int c1 = FFMIN(c1_data[x >> c1_shift_w], limit) - mid;
-                const int c2 = FFMIN(c2_data[x >> c2_shift_w], limit) - mid;
-                uint16_t *target;
-
-                target = d0 + x + d0_signed_linesize * c0;
-                update16(target, max, intensity, limit);
-
-                target = d1 + x + d1_signed_linesize * (c0 + c1);
-                update16(target, max, intensity, limit);
-
-                target = d2 + x + d2_signed_linesize * (c0 + c2);
-                update16(target, max, intensity, limit);
-
-                if (!c0_shift_h || (y & c0_shift_h))
-                    c0_data += c0_linesize;
-                if (!c1_shift_h || (y & c1_shift_h))
-                    c1_data += c1_linesize;
-                if (!c2_shift_h || (y & c2_shift_h))
-                    c2_data += c2_linesize;
-                d0_data += d0_linesize;
-                d1_data += d1_linesize;
-                d2_data += d2_linesize;
-            }
-        }
-    } else {
-        const uint16_t *c0_data = (uint16_t *)in->data[plane];
-        const uint16_t *c1_data = (uint16_t *)in->data[(plane + 1) % s->ncomp];
-        const uint16_t *c2_data = (uint16_t *)in->data[(plane + 2) % s->ncomp];
-        uint16_t *d0_data = (uint16_t *)out->data[plane] + offset_y * d0_linesize + offset_x;
-        uint16_t *d1_data = (uint16_t *)out->data[(plane + 1) % s->ncomp] + offset_y * d1_linesize + offset_x;
-        uint16_t *d2_data = (uint16_t *)out->data[(plane + 2) % s->ncomp] + offset_y * d2_linesize + offset_x;
-
-        if (mirror) {
-            d0_data += s->size - 1;
-            d1_data += s->size - 1;
-            d2_data += s->size - 1;
-        }
-
-        for (y = 0; y < src_h; y++) {
-            for (x = 0; x < src_w; x++) {
-                const int c0 = FFMIN(c0_data[x >> c0_shift_w], limit) + mid;
-                const int c1 = FFMIN(c1_data[x >> c1_shift_w], limit) - mid;
-                const int c2 = FFMIN(c2_data[x >> c2_shift_w], limit) - mid;
-                uint16_t *target;
-
-                if (mirror) {
-                    target = d0_data - c0;
-                    update16(target, max, intensity, limit);
-                    target = d1_data - (c0 + c1);
-                    update16(target, max, intensity, limit);
-                    target = d2_data - (c0 + c2);
-                    update16(target, max, intensity, limit);
-                } else {
-                    target = d0_data + c0;
-                    update16(target, max, intensity, limit);
-                    target = d1_data + (c0 + c1);
-                    update16(target, max, intensity, limit);
-                    target = d2_data + (c0 + c2);
-                    update16(target, max, intensity, limit);
-                }
-            }
-
-            if (!c0_shift_h || (y & c0_shift_h))
-                c0_data += c0_linesize;
-            if (!c1_shift_h || (y & c1_shift_h))
-                c1_data += c1_linesize;
-            if (!c2_shift_h || (y & c2_shift_h))
-                c2_data += c2_linesize;
-            d0_data += d0_linesize;
-            d1_data += d1_linesize;
-            d2_data += d2_linesize;
-        }
-    }
-
-    envelope16(s, out, plane, (plane + 0) % s->ncomp, column ? offset_x : offset_y);
-    envelope16(s, out, plane, (plane + 1) % s->ncomp, column ? offset_x : offset_y);
-    envelope16(s, out, plane, (plane + 2) % s->ncomp, column ? offset_x : offset_y);
+#define FLAT_FUNC(name, column, mirror)        \
+static int flat_##name(AVFilterContext *ctx,   \
+                       void *arg, int jobnr,   \
+                       int nb_jobs)            \
+{                                              \
+    WaveformContext *s = ctx->priv;            \
+    ThreadData *td = arg;                      \
+    AVFrame *in = td->in;                      \
+    AVFrame *out = td->out;                    \
+    int component = td->component;             \
+    int offset_y = td->offset_y;               \
+    int offset_x = td->offset_x;               \
+                                               \
+    flat(s, in, out, component, s->intensity,  \
+         offset_y, offset_x, column, mirror,   \
+         jobnr, nb_jobs);                      \
+                                               \
+    return 0;                                  \
 }
 
-static av_always_inline void aflat(WaveformContext *s,
-                                   AVFrame *in, AVFrame *out,
-                                   int component, int intensity,
-                                   int offset_y, int offset_x,
-                                   int column, int mirror)
-{
-    const int plane = s->desc->comp[component].plane;
-    const int c0_linesize = in->linesize[ plane + 0 ];
-    const int c1_linesize = in->linesize[(plane + 1) % s->ncomp];
-    const int c2_linesize = in->linesize[(plane + 2) % s->ncomp];
-    const int c0_shift_w = s->shift_w[ component + 0 ];
-    const int c1_shift_w = s->shift_w[(component + 1) % s->ncomp];
-    const int c2_shift_w = s->shift_w[(component + 2) % s->ncomp];
-    const int c0_shift_h = s->shift_h[ component + 0 ];
-    const int c1_shift_h = s->shift_h[(component + 1) % s->ncomp];
-    const int c2_shift_h = s->shift_h[(component + 2) % s->ncomp];
-    const int d0_linesize = out->linesize[ plane + 0 ];
-    const int d1_linesize = out->linesize[(plane + 1) % s->ncomp];
-    const int d2_linesize = out->linesize[(plane + 2) % s->ncomp];
-    const int max = 255 - intensity;
-    const int src_h = in->height;
-    const int src_w = in->width;
-    int x, y;
+FLAT_FUNC(column_mirror, 1, 1)
+FLAT_FUNC(column,        1, 0)
+FLAT_FUNC(row_mirror,    0, 1)
+FLAT_FUNC(row,           0, 0)
 
-    if (column) {
-        const int d0_signed_linesize = d0_linesize * (mirror == 1 ? -1 : 1);
-        const int d1_signed_linesize = d1_linesize * (mirror == 1 ? -1 : 1);
-        const int d2_signed_linesize = d2_linesize * (mirror == 1 ? -1 : 1);
-
-        for (x = 0; x < src_w; x++) {
-            const uint8_t *c0_data = in->data[plane + 0];
-            const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp];
-            const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp];
-            uint8_t *d0_data = out->data[plane] + offset_y * d0_linesize + offset_x;
-            uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + offset_y * d1_linesize + offset_x;
-            uint8_t *d2_data = out->data[(plane + 2) % s->ncomp] + offset_y * d2_linesize + offset_x;
-            uint8_t * const d0_bottom_line = d0_data + d0_linesize * (s->size - 1);
-            uint8_t * const d0 = (mirror ? d0_bottom_line : d0_data);
-            uint8_t * const d1_bottom_line = d1_data + d1_linesize * (s->size - 1);
-            uint8_t * const d1 = (mirror ? d1_bottom_line : d1_data);
-            uint8_t * const d2_bottom_line = d2_data + d2_linesize * (s->size - 1);
-            uint8_t * const d2 = (mirror ? d2_bottom_line : d2_data);
-
-            for (y = 0; y < src_h; y++) {
-                const int c0 = c0_data[x >> c0_shift_w] + 128;
-                const int c1 = c1_data[x >> c1_shift_w] - 128;
-                const int c2 = c2_data[x >> c2_shift_w] - 128;
-                uint8_t *target;
-
-                target = d0 + x + d0_signed_linesize * c0;
-                update(target, max, intensity);
-
-                target = d1 + x + d1_signed_linesize * (c0 + c1);
-                update(target, max, intensity);
-
-                target = d2 + x + d2_signed_linesize * (c0 + c2);
-                update(target, max, intensity);
-
-                if (!c0_shift_h || (y & c0_shift_h))
-                    c0_data += c0_linesize;
-                if (!c1_shift_h || (y & c1_shift_h))
-                    c1_data += c1_linesize;
-                if (!c1_shift_h || (y & c1_shift_h))
-                    c2_data += c1_linesize;
-                d0_data += d0_linesize;
-                d1_data += d1_linesize;
-                d2_data += d2_linesize;
-            }
-        }
-    } else {
-        const uint8_t *c0_data = in->data[plane];
-        const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp];
-        const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp];
-        uint8_t *d0_data = out->data[plane] + offset_y * d0_linesize + offset_x;
-        uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + offset_y * d1_linesize + offset_x;
-        uint8_t *d2_data = out->data[(plane + 2) % s->ncomp] + offset_y * d2_linesize + offset_x;
-
-        if (mirror) {
-            d0_data += s->size - 1;
-            d1_data += s->size - 1;
-            d2_data += s->size - 1;
-        }
-
-        for (y = 0; y < src_h; y++) {
-            for (x = 0; x < src_w; x++) {
-                const int c0 = c0_data[x >> c0_shift_w] + 128;
-                const int c1 = c1_data[x >> c1_shift_w] - 128;
-                const int c2 = c2_data[x >> c2_shift_w] - 128;
-                uint8_t *target;
-
-                if (mirror) {
-                    target = d0_data - c0;
-                    update(target, max, intensity);
-                    target = d1_data - (c0 + c1);
-                    update(target, max, intensity);
-                    target = d2_data - (c0 + c2);
-                    update(target, max, intensity);
-                } else {
-                    target = d0_data + c0;
-                    update(target, max, intensity);
-                    target = d1_data + (c0 + c1);
-                    update(target, max, intensity);
-                    target = d2_data + (c0 + c2);
-                    update(target, max, intensity);
-                }
-            }
-
-            if (!c0_shift_h || (y & c0_shift_h))
-                c0_data += c0_linesize;
-            if (!c1_shift_h || (y & c1_shift_h))
-                c1_data += c1_linesize;
-            if (!c2_shift_h || (y & c2_shift_h))
-                c2_data += c2_linesize;
-            d0_data += d0_linesize;
-            d1_data += d1_linesize;
-            d2_data += d2_linesize;
-        }
-    }
-
-    envelope(s, out, plane, (plane + 0) % s->ncomp, column ? offset_x : offset_y);
-    envelope(s, out, plane, (plane + 1) % s->ncomp, column ? offset_x : offset_y);
-    envelope(s, out, plane, (plane + 2) % s->ncomp, column ? offset_x : offset_y);
+#define AFLAT16(name, update_cr, column, mirror)                                                                   \
+static int name(AVFilterContext *ctx,                                                                              \
+                void *arg, int jobnr,                                                                              \
+                int nb_jobs)                                                                                       \
+{                                                                                                                  \
+    WaveformContext *s = ctx->priv;                                                                                \
+    ThreadData *td = arg;                                                                                          \
+    AVFrame *in = td->in;                                                                                          \
+    AVFrame *out = td->out;                                                                                        \
+    int component = td->component;                                                                                 \
+    int offset_y = td->offset_y;                                                                                   \
+    int offset_x = td->offset_x;                                                                                   \
+    const int intensity = s->intensity;                                                                            \
+    const int plane = s->desc->comp[component].plane;                                                              \
+    const int c0_linesize = in->linesize[ plane + 0 ] / 2;                                                         \
+    const int c1_linesize = in->linesize[(plane + 1) % s->ncomp] / 2;                                              \
+    const int c2_linesize = in->linesize[(plane + 2) % s->ncomp] / 2;                                              \
+    const int c0_shift_w = s->shift_w[ component + 0 ];                                                            \
+    const int c1_shift_w = s->shift_w[(component + 1) % s->ncomp];                                                 \
+    const int c2_shift_w = s->shift_w[(component + 2) % s->ncomp];                                                 \
+    const int c0_shift_h = s->shift_h[ component + 0 ];                                                            \
+    const int c1_shift_h = s->shift_h[(component + 1) % s->ncomp];                                                 \
+    const int c2_shift_h = s->shift_h[(component + 2) % s->ncomp];                                                 \
+    const int d0_linesize = out->linesize[ plane + 0 ] / 2;                                                        \
+    const int d1_linesize = out->linesize[(plane + 1) % s->ncomp] / 2;                                             \
+    const int d2_linesize = out->linesize[(plane + 2) % s->ncomp] / 2;                                             \
+    const int limit = s->max - 1;                                                                                  \
+    const int max = limit - intensity;                                                                             \
+    const int mid = s->max / 2;                                                                                    \
+    const int src_h = in->height;                                                                                  \
+    const int src_w = in->width;                                                                                   \
+    const int sliceh_start = !column ? (src_h * jobnr) / nb_jobs : 0;                                              \
+    const int sliceh_end = !column ? (src_h * (jobnr+1)) / nb_jobs : src_h;                                        \
+    const int slicew_start = column ? (src_w * jobnr) / nb_jobs : 0;                                               \
+    const int slicew_end = column ? (src_w * (jobnr+1)) / nb_jobs : src_w;                                         \
+    int x, y;                                                                                                      \
+                                                                                                                   \
+    if (column) {                                                                                                  \
+        const int d0_signed_linesize = d0_linesize * (mirror == 1 ? -1 : 1);                                       \
+        const int d1_signed_linesize = d1_linesize * (mirror == 1 ? -1 : 1);                                       \
+        const int d2_signed_linesize = d2_linesize * (mirror == 1 ? -1 : 1);                                       \
+                                                                                                                   \
+        for (x = slicew_start; x < slicew_end; x++) {                                                              \
+            const uint16_t *c0_data = (uint16_t *)in->data[plane + 0];                                             \
+            const uint16_t *c1_data = (uint16_t *)in->data[(plane + 1) % s->ncomp];                                \
+            const uint16_t *c2_data = (uint16_t *)in->data[(plane + 2) % s->ncomp];                                \
+            uint16_t *d0_data = (uint16_t *)out->data[plane] + offset_y * d0_linesize + offset_x;                  \
+            uint16_t *d1_data = (uint16_t *)out->data[(plane + 1) % s->ncomp] + offset_y * d1_linesize + offset_x; \
+            uint16_t *d2_data = (uint16_t *)out->data[(plane + 2) % s->ncomp] + offset_y * d2_linesize + offset_x; \
+            uint16_t * const d0_bottom_line = d0_data + d0_linesize * (s->size - 1);                               \
+            uint16_t * const d0 = (mirror ? d0_bottom_line : d0_data);                                             \
+            uint16_t * const d1_bottom_line = d1_data + d1_linesize * (s->size - 1);                               \
+            uint16_t * const d1 = (mirror ? d1_bottom_line : d1_data);                                             \
+            uint16_t * const d2_bottom_line = d2_data + d2_linesize * (s->size - 1);                               \
+            uint16_t * const d2 = (mirror ? d2_bottom_line : d2_data);                                             \
+                                                                                                                   \
+            for (y = 0; y < src_h; y++) {                                                                          \
+                const int c0 = FFMIN(c0_data[x >> c0_shift_w], limit) + mid;                                       \
+                const int c1 = FFMIN(c1_data[x >> c1_shift_w], limit) - mid;                                       \
+                const int c2 = FFMIN(c2_data[x >> c2_shift_w], limit) - mid;                                       \
+                uint16_t *target;                                                                                  \
+                                                                                                                   \
+                target = d0 + x + d0_signed_linesize * c0;                                                         \
+                update16(target, max, intensity, limit);                                                           \
+                                                                                                                   \
+                target = d1 + x + d1_signed_linesize * (c0 + c1);                                                  \
+                update16(target, max, intensity, limit);                                                           \
+                                                                                                                   \
+                target = d2 + x + d2_signed_linesize * (c0 + c2);                                                  \
+                update_cr(target, max, intensity, limit);                                                          \
+                                                                                                                   \
+                if (!c0_shift_h || (y & c0_shift_h))                                                               \
+                    c0_data += c0_linesize;                                                                        \
+                if (!c1_shift_h || (y & c1_shift_h))                                                               \
+                    c1_data += c1_linesize;                                                                        \
+                if (!c2_shift_h || (y & c2_shift_h))                                                               \
+                    c2_data += c2_linesize;                                                                        \
+                d0_data += d0_linesize;                                                                            \
+                d1_data += d1_linesize;                                                                            \
+                d2_data += d2_linesize;                                                                            \
+            }                                                                                                      \
+        }                                                                                                          \
+    } else {                                                                                                       \
+        const uint16_t *c0_data = (uint16_t *)in->data[plane] + (sliceh_start >> c0_shift_h) * c0_linesize;        \
+        const uint16_t *c1_data = (uint16_t *)in->data[(plane + 1) % s->ncomp] + (sliceh_start >> c1_shift_h) * c1_linesize; \
+        const uint16_t *c2_data = (uint16_t *)in->data[(plane + 2) % s->ncomp] + (sliceh_start >> c2_shift_h) * c2_linesize; \
+        uint16_t *d0_data = (uint16_t *)out->data[plane] + (offset_y + sliceh_start) * d0_linesize + offset_x;                      \
+        uint16_t *d1_data = (uint16_t *)out->data[(plane + 1) % s->ncomp] + (offset_y + sliceh_start) * d1_linesize + offset_x;     \
+        uint16_t *d2_data = (uint16_t *)out->data[(plane + 2) % s->ncomp] + (offset_y + sliceh_start) * d2_linesize + offset_x;     \
+                                                                                                                   \
+        if (mirror) {                                                                                              \
+            d0_data += s->size - 1;                                                                                \
+            d1_data += s->size - 1;                                                                                \
+            d2_data += s->size - 1;                                                                                \
+        }                                                                                                          \
+                                                                                                                   \
+        for (y = sliceh_start; y < sliceh_end; y++) {                                                              \
+            for (x = 0; x < src_w; x++) {                                                                          \
+                const int c0 = FFMIN(c0_data[x >> c0_shift_w], limit) + mid;                                       \
+                const int c1 = FFMIN(c1_data[x >> c1_shift_w], limit) - mid;                                       \
+                const int c2 = FFMIN(c2_data[x >> c2_shift_w], limit) - mid;                                       \
+                uint16_t *target;                                                                                  \
+                                                                                                                   \
+                if (mirror) {                                                                                      \
+                    target = d0_data - c0;                                                                         \
+                    update16(target, max, intensity, limit);                                                       \
+                    target = d1_data - (c0 + c1);                                                                  \
+                    update16(target, max, intensity, limit);                                                       \
+                    target = d2_data - (c0 + c2);                                                                  \
+                    update_cr(target, max, intensity, limit);                                                      \
+                } else {                                                                                           \
+                    target = d0_data + c0;                                                                         \
+                    update16(target, max, intensity, limit);                                                       \
+                    target = d1_data + (c0 + c1);                                                                  \
+                    update16(target, max, intensity, limit);                                                       \
+                    target = d2_data + (c0 + c2);                                                                  \
+                    update_cr(target, max, intensity, limit);                                                      \
+                }                                                                                                  \
+            }                                                                                                      \
+                                                                                                                   \
+            if (!c0_shift_h || (y & c0_shift_h))                                                                   \
+                c0_data += c0_linesize;                                                                            \
+            if (!c1_shift_h || (y & c1_shift_h))                                                                   \
+                c1_data += c1_linesize;                                                                            \
+            if (!c2_shift_h || (y & c2_shift_h))                                                                   \
+                c2_data += c2_linesize;                                                                            \
+            d0_data += d0_linesize;                                                                                \
+            d1_data += d1_linesize;                                                                                \
+            d2_data += d2_linesize;                                                                                \
+        }                                                                                                          \
+    }                                                                                                              \
+    return 0;                                                                                                      \
 }
 
+#define AFLAT(name, update_cr, column, mirror)                                                        \
+static int name(AVFilterContext *ctx,                                                                 \
+                void *arg, int jobnr,                                                                 \
+                int nb_jobs)                                                                          \
+{                                                                                                     \
+    WaveformContext *s = ctx->priv;                                                                   \
+    ThreadData *td = arg;                                                                             \
+    AVFrame *in = td->in;                                                                             \
+    AVFrame *out = td->out;                                                                           \
+    int component = td->component;                                                                    \
+    int offset_y = td->offset_y;                                                                      \
+    int offset_x = td->offset_x;                                                                      \
+    const int src_h = in->height;                                                                     \
+    const int src_w = in->width;                                                                      \
+    const int sliceh_start = !column ? (src_h * jobnr) / nb_jobs : 0;                                 \
+    const int sliceh_end = !column ? (src_h * (jobnr+1)) / nb_jobs : src_h;                           \
+    const int slicew_start = column ? (src_w * jobnr) / nb_jobs : 0;                                  \
+    const int slicew_end = column ? (src_w * (jobnr+1)) / nb_jobs : src_w;                            \
+    const int intensity = s->intensity;                                                               \
+    const int plane = s->desc->comp[component].plane;                                                 \
+    const int c0_linesize = in->linesize[ plane + 0 ];                                                \
+    const int c1_linesize = in->linesize[(plane + 1) % s->ncomp];                                     \
+    const int c2_linesize = in->linesize[(plane + 2) % s->ncomp];                                     \
+    const int c0_shift_w = s->shift_w[ component + 0 ];                                               \
+    const int c1_shift_w = s->shift_w[(component + 1) % s->ncomp];                                    \
+    const int c2_shift_w = s->shift_w[(component + 2) % s->ncomp];                                    \
+    const int c0_shift_h = s->shift_h[ component + 0 ];                                               \
+    const int c1_shift_h = s->shift_h[(component + 1) % s->ncomp];                                    \
+    const int c2_shift_h = s->shift_h[(component + 2) % s->ncomp];                                    \
+    const int d0_linesize = out->linesize[ plane + 0 ];                                               \
+    const int d1_linesize = out->linesize[(plane + 1) % s->ncomp];                                    \
+    const int d2_linesize = out->linesize[(plane + 2) % s->ncomp];                                    \
+    const int max = 255 - intensity;                                                                  \
+    int x, y;                                                                                         \
+                                                                                                      \
+    if (column) {                                                                                     \
+        const int d0_signed_linesize = d0_linesize * (mirror == 1 ? -1 : 1);                          \
+        const int d1_signed_linesize = d1_linesize * (mirror == 1 ? -1 : 1);                          \
+        const int d2_signed_linesize = d2_linesize * (mirror == 1 ? -1 : 1);                          \
+                                                                                                      \
+        for (x = slicew_start; x < slicew_end; x++) {                                                 \
+            const uint8_t *c0_data = in->data[plane + 0];                                             \
+            const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp];                                \
+            const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp];                                \
+            uint8_t *d0_data = out->data[plane] + offset_y * d0_linesize + offset_x;                  \
+            uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + offset_y * d1_linesize + offset_x; \
+            uint8_t *d2_data = out->data[(plane + 2) % s->ncomp] + offset_y * d2_linesize + offset_x; \
+            uint8_t * const d0_bottom_line = d0_data + d0_linesize * (s->size - 1);                   \
+            uint8_t * const d0 = (mirror ? d0_bottom_line : d0_data);                                 \
+            uint8_t * const d1_bottom_line = d1_data + d1_linesize * (s->size - 1);                   \
+            uint8_t * const d1 = (mirror ? d1_bottom_line : d1_data);                                 \
+            uint8_t * const d2_bottom_line = d2_data + d2_linesize * (s->size - 1);                   \
+            uint8_t * const d2 = (mirror ? d2_bottom_line : d2_data);                                 \
+                                                                                                      \
+            for (y = 0; y < src_h; y++) {                                                             \
+                const int c0 = c0_data[x >> c0_shift_w] + 128;                                        \
+                const int c1 = c1_data[x >> c1_shift_w] - 128;                                        \
+                const int c2 = c2_data[x >> c2_shift_w] - 128;                                        \
+                uint8_t *target;                                                                      \
+                                                                                                      \
+                target = d0 + x + d0_signed_linesize * c0;                                            \
+                update(target, max, intensity);                                                       \
+                                                                                                      \
+                target = d1 + x + d1_signed_linesize * (c0 + c1);                                     \
+                update(target, max, intensity);                                                       \
+                                                                                                      \
+                target = d2 + x + d2_signed_linesize * (c0 + c2);                                     \
+                update_cr(target, max, intensity);                                                    \
+                                                                                                      \
+                if (!c0_shift_h || (y & c0_shift_h))                                                  \
+                    c0_data += c0_linesize;                                                           \
+                if (!c1_shift_h || (y & c1_shift_h))                                                  \
+                    c1_data += c1_linesize;                                                           \
+                if (!c1_shift_h || (y & c1_shift_h))                                                  \
+                    c2_data += c1_linesize;                                                           \
+                d0_data += d0_linesize;                                                               \
+                d1_data += d1_linesize;                                                               \
+                d2_data += d2_linesize;                                                               \
+            }                                                                                         \
+        }                                                                                             \
+    } else {                                                                                          \
+        const uint8_t *c0_data = in->data[plane] + (sliceh_start >> c0_shift_h) * c0_linesize;        \
+        const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp] + (sliceh_start >> c1_shift_h) * c1_linesize; \
+        const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp] + (sliceh_start >> c2_shift_h) * c2_linesize; \
+        uint8_t *d0_data = out->data[plane] + (offset_y + sliceh_start) * d0_linesize + offset_x;     \
+        uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + (offset_y + sliceh_start) * d1_linesize + offset_x; \
+        uint8_t *d2_data = out->data[(plane + 2) % s->ncomp] + (offset_y + sliceh_start) * d2_linesize + offset_x; \
+                                                                                                      \
+        if (mirror) {                                                                                 \
+            d0_data += s->size - 1;                                                                   \
+            d1_data += s->size - 1;                                                                   \
+            d2_data += s->size - 1;                                                                   \
+        }                                                                                             \
+                                                                                                      \
+        for (y = sliceh_start; y < sliceh_end; y++) {                                                 \
+            for (x = 0; x < src_w; x++) {                                                             \
+                const int c0 = c0_data[x >> c0_shift_w] + 128;                                        \
+                const int c1 = c1_data[x >> c1_shift_w] - 128;                                        \
+                const int c2 = c2_data[x >> c2_shift_w] - 128;                                        \
+                uint8_t *target;                                                                      \
+                                                                                                      \
+                if (mirror) {                                                                         \
+                    target = d0_data - c0;                                                            \
+                    update(target, max, intensity);                                                   \
+                    target = d1_data - (c0 + c1);                                                     \
+                    update(target, max, intensity);                                                   \
+                    target = d2_data - (c0 + c2);                                                     \
+                    update_cr(target, max, intensity);                                                \
+                } else {                                                                              \
+                    target = d0_data + c0;                                                            \
+                    update(target, max, intensity);                                                   \
+                    target = d1_data + (c0 + c1);                                                     \
+                    update(target, max, intensity);                                                   \
+                    target = d2_data + (c0 + c2);                                                     \
+                    update_cr(target, max, intensity);                                                \
+                }                                                                                     \
+            }                                                                                         \
+                                                                                                      \
+            if (!c0_shift_h || (y & c0_shift_h))                                                      \
+                c0_data += c0_linesize;                                                               \
+            if (!c1_shift_h || (y & c1_shift_h))                                                      \
+                c1_data += c1_linesize;                                                               \
+            if (!c2_shift_h || (y & c2_shift_h))                                                      \
+                c2_data += c2_linesize;                                                               \
+            d0_data += d0_linesize;                                                                   \
+            d1_data += d1_linesize;                                                                   \
+            d2_data += d2_linesize;                                                                   \
+        }                                                                                             \
+    }                                                                                                 \
+    return 0;                                                                                         \
+}
+
+AFLAT16(aflat16_row,           update16,    0, 0)
+AFLAT16(aflat16_row_mirror,    update16,    0, 1)
+AFLAT16(aflat16_column,        update16,    1, 0)
+AFLAT16(aflat16_column_mirror, update16,    1, 1)
+AFLAT16(xflat16_row,           update16_cr, 0, 0)
+AFLAT16(xflat16_row_mirror,    update16_cr, 0, 1)
+AFLAT16(xflat16_column,        update16_cr, 1, 0)
+AFLAT16(xflat16_column_mirror, update16_cr, 1, 1)
+
+AFLAT(aflat_row,           update,    0, 0)
+AFLAT(aflat_row_mirror,    update,    0, 1)
+AFLAT(aflat_column,        update,    1, 0)
+AFLAT(aflat_column_mirror, update,    1, 1)
+AFLAT(xflat_row,           update_cr, 0, 0)
+AFLAT(xflat_row_mirror,    update_cr, 0, 1)
+AFLAT(xflat_column,        update_cr, 1, 0)
+AFLAT(xflat_column_mirror, update_cr, 1, 1)
+
 static av_always_inline void chroma16(WaveformContext *s,
                                       AVFrame *in, AVFrame *out,
                                       int component, int intensity,
                                       int offset_y, int offset_x,
-                                      int column, int mirror)
+                                      int column, int mirror,
+                                      int jobnr, int nb_jobs)
 {
     const int plane = s->desc->comp[component].plane;
     const int c0_linesize = in->linesize[(plane + 1) % s->ncomp] / 2;
@@ -1282,12 +1421,16 @@
     const int c1_shift_h = s->shift_h[(component + 2) % s->ncomp];
     const int src_h = in->height;
     const int src_w = in->width;
+    const int sliceh_start = !column ? (src_h * jobnr) / nb_jobs : 0;
+    const int sliceh_end = !column ? (src_h * (jobnr+1)) / nb_jobs : src_h;
+    const int slicew_start = column ? (src_w * jobnr) / nb_jobs : 0;
+    const int slicew_end = column ? (src_w * (jobnr+1)) / nb_jobs : src_w;
     int x, y;
 
     if (column) {
         const int dst_signed_linesize = dst_linesize * (mirror == 1 ? -1 : 1);
 
-        for (x = 0; x < src_w; x++) {
+        for (x = slicew_start; x < slicew_end; x++) {
             const uint16_t *c0_data = (uint16_t *)in->data[(plane + 1) % s->ncomp];
             const uint16_t *c1_data = (uint16_t *)in->data[(plane + 2) % s->ncomp];
             uint16_t *dst_data = (uint16_t *)out->data[plane] + offset_y * dst_linesize + offset_x;
@@ -1310,13 +1453,13 @@
             }
         }
     } else {
-        const uint16_t *c0_data = (uint16_t *)in->data[(plane + 1) % s->ncomp];
-        const uint16_t *c1_data = (uint16_t *)in->data[(plane + 2) % s->ncomp];
-        uint16_t *dst_data = (uint16_t *)out->data[plane] + offset_y * dst_linesize + offset_x;
+        const uint16_t *c0_data = (uint16_t *)in->data[(plane + 1) % s->ncomp] + (sliceh_start >> c0_shift_h) * c0_linesize;
+        const uint16_t *c1_data = (uint16_t *)in->data[(plane + 2) % s->ncomp] + (sliceh_start >> c1_shift_h) * c1_linesize;
+        uint16_t *dst_data = (uint16_t *)out->data[plane] + (offset_y + sliceh_start) * dst_linesize + offset_x;
 
         if (mirror)
             dst_data += s->size - 1;
-        for (y = 0; y < src_h; y++) {
+        for (y = sliceh_start; y < sliceh_end; y++) {
             for (x = 0; x < src_w; x++) {
                 const int sum = FFMIN(FFABS(c0_data[x >> c0_shift_w] - mid) + FFABS(c1_data[x >> c1_shift_w] - mid - 1), limit);
                 uint16_t *target;
@@ -1337,17 +1480,47 @@
             dst_data += dst_linesize;
         }
     }
-
-    envelope16(s, out, plane, plane, column ? offset_x : offset_y);
 }
 
+#define CHROMA16_FUNC(name, column, mirror)      \
+static int chroma16_##name(AVFilterContext *ctx, \
+                           void *arg, int jobnr, \
+                           int nb_jobs)          \
+{                                                \
+    WaveformContext *s = ctx->priv;              \
+    ThreadData *td = arg;                        \
+    AVFrame *in = td->in;                        \
+    AVFrame *out = td->out;                      \
+    int component = td->component;               \
+    int offset_y = td->offset_y;                 \
+    int offset_x = td->offset_x;                 \
+                                                 \
+    chroma16(s, in, out, component, s->intensity,\
+           offset_y, offset_x, column, mirror,   \
+           jobnr, nb_jobs);                      \
+                                                 \
+    return 0;                                    \
+}
+
+CHROMA16_FUNC(column_mirror, 1, 1)
+CHROMA16_FUNC(column,        1, 0)
+CHROMA16_FUNC(row_mirror,    0, 1)
+CHROMA16_FUNC(row,           0, 0)
+
 static av_always_inline void chroma(WaveformContext *s,
                                     AVFrame *in, AVFrame *out,
                                     int component, int intensity,
                                     int offset_y, int offset_x,
-                                    int column, int mirror)
+                                    int column, int mirror,
+                                    int jobnr, int nb_jobs)
 {
     const int plane = s->desc->comp[component].plane;
+    const int src_h = in->height;
+    const int src_w = in->width;
+    const int sliceh_start = !column ? (src_h * jobnr) / nb_jobs : 0;
+    const int sliceh_end = !column ? (src_h * (jobnr+1)) / nb_jobs : src_h;
+    const int slicew_start = column ? (src_w * jobnr) / nb_jobs : 0;
+    const int slicew_end = column ? (src_w * (jobnr+1)) / nb_jobs : src_w;
     const int c0_linesize = in->linesize[(plane + 1) % s->ncomp];
     const int c1_linesize = in->linesize[(plane + 2) % s->ncomp];
     const int dst_linesize = out->linesize[plane];
@@ -1356,14 +1529,12 @@
     const int c1_shift_w = s->shift_w[(component + 2) % s->ncomp];
     const int c0_shift_h = s->shift_h[(component + 1) % s->ncomp];
     const int c1_shift_h = s->shift_h[(component + 2) % s->ncomp];
-    const int src_h = in->height;
-    const int src_w = in->width;
     int x, y;
 
     if (column) {
         const int dst_signed_linesize = dst_linesize * (mirror == 1 ? -1 : 1);
 
-        for (x = 0; x < src_w; x++) {
+        for (x = slicew_start; x < slicew_end; x++) {
             const uint8_t *c0_data = in->data[(plane + 1) % s->ncomp];
             const uint8_t *c1_data = in->data[(plane + 2) % s->ncomp];
             uint8_t *dst_data = out->data[plane] + offset_y * dst_linesize + offset_x;
@@ -1386,13 +1557,13 @@
             }
         }
     } else {
-        const uint8_t *c0_data = in->data[(plane + 1) % s->ncomp];
-        const uint8_t *c1_data = in->data[(plane + 2) % s->ncomp];
-        uint8_t *dst_data = out->data[plane] + offset_y * dst_linesize + offset_x;
+        const uint8_t *c0_data = in->data[(plane + 1) % s->ncomp] + (sliceh_start >> c0_shift_h) * c0_linesize;
+        const uint8_t *c1_data = in->data[(plane + 2) % s->ncomp] + (sliceh_start >> c1_shift_h) * c1_linesize;
+        uint8_t *dst_data = out->data[plane] + (offset_y + sliceh_start) * dst_linesize + offset_x;
 
         if (mirror)
             dst_data += s->size - 1;
-        for (y = 0; y < src_h; y++) {
+        for (y = sliceh_start; y < sliceh_end; y++) {
             for (x = 0; x < src_w; x++) {
                 const int sum = FFABS(c0_data[x >> c0_shift_w] - 128) + FFABS(c1_data[x >> c1_shift_w] - 127);
                 uint8_t *target;
@@ -1413,35 +1584,63 @@
             dst_data += dst_linesize;
         }
     }
-
-    envelope(s, out, plane, plane, column ? offset_x : offset_y);
 }
 
+#define CHROMA_FUNC(name, column, mirror)        \
+static int chroma_##name(AVFilterContext *ctx,   \
+                         void *arg, int jobnr,   \
+                         int nb_jobs)            \
+{                                                \
+    WaveformContext *s = ctx->priv;              \
+    ThreadData *td = arg;                        \
+    AVFrame *in = td->in;                        \
+    AVFrame *out = td->out;                      \
+    int component = td->component;               \
+    int offset_y = td->offset_y;                 \
+    int offset_x = td->offset_x;                 \
+                                                 \
+    chroma(s, in, out, component, s->intensity,  \
+           offset_y, offset_x, column, mirror,   \
+           jobnr, nb_jobs);                      \
+                                                 \
+    return 0;                                    \
+}
+
+CHROMA_FUNC(column_mirror, 1, 1)
+CHROMA_FUNC(column,        1, 0)
+CHROMA_FUNC(row_mirror,    0, 1)
+CHROMA_FUNC(row,           0, 0)
+
 static av_always_inline void color16(WaveformContext *s,
                                      AVFrame *in, AVFrame *out,
                                      int component, int intensity,
                                      int offset_y, int offset_x,
-                                     int column, int mirror)
+                                     int column, int mirror,
+                                     int jobnr, int nb_jobs)
 {
     const int plane = s->desc->comp[component].plane;
     const int limit = s->max - 1;
-    const uint16_t *c0_data = (const uint16_t *)in->data[plane + 0];
-    const uint16_t *c1_data = (const uint16_t *)in->data[(plane + 1) % s->ncomp];
-    const uint16_t *c2_data = (const uint16_t *)in->data[(plane + 2) % s->ncomp];
+    const int src_h = in->height;
+    const int src_w = in->width;
+    const int sliceh_start = !column ? (src_h * jobnr) / nb_jobs : 0;
+    const int sliceh_end = !column ? (src_h * (jobnr+1)) / nb_jobs : src_h;
+    const int slicew_start = column ? (src_w * jobnr) / nb_jobs : 0;
+    const int slicew_end = column ? (src_w * (jobnr+1)) / nb_jobs : src_w;
     const int c0_linesize = in->linesize[ plane + 0 ] / 2;
     const int c1_linesize = in->linesize[(plane + 1) % s->ncomp] / 2;
     const int c2_linesize = in->linesize[(plane + 2) % s->ncomp] / 2;
+    const int c0_shift_h = s->shift_h[ component + 0 ];
+    const int c1_shift_h = s->shift_h[(component + 1) % s->ncomp];
+    const int c2_shift_h = s->shift_h[(component + 2) % s->ncomp];
+    const uint16_t *c0_data = (const uint16_t *)in->data[plane + 0] + (sliceh_start >> c0_shift_h) * c0_linesize;
+    const uint16_t *c1_data = (const uint16_t *)in->data[(plane + 1) % s->ncomp] + (sliceh_start >> c1_shift_h) * c1_linesize;
+    const uint16_t *c2_data = (const uint16_t *)in->data[(plane + 2) % s->ncomp] + (sliceh_start >> c2_shift_h) * c2_linesize;
     const int d0_linesize = out->linesize[ plane + 0 ] / 2;
     const int d1_linesize = out->linesize[(plane + 1) % s->ncomp] / 2;
     const int d2_linesize = out->linesize[(plane + 2) % s->ncomp] / 2;
     const int c0_shift_w = s->shift_w[ component + 0 ];
     const int c1_shift_w = s->shift_w[(component + 1) % s->ncomp];
     const int c2_shift_w = s->shift_w[(component + 2) % s->ncomp];
-    const int c0_shift_h = s->shift_h[ component + 0 ];
-    const int c1_shift_h = s->shift_h[(component + 1) % s->ncomp];
-    const int c2_shift_h = s->shift_h[(component + 2) % s->ncomp];
-    const int src_h = in->height;
-    const int src_w = in->width;
     int x, y;
 
     if (column) {
@@ -1459,7 +1658,7 @@
         uint16_t * const d2 = (mirror ? d2_bottom_line : d2_data);
 
         for (y = 0; y < src_h; y++) {
-            for (x = 0; x < src_w; x++) {
+            for (x = slicew_start; x < slicew_end; x++) {
                 const int c0 = FFMIN(c0_data[x >> c0_shift_w], limit);
                 const int c1 = c1_data[x >> c1_shift_w];
                 const int c2 = c2_data[x >> c2_shift_w];
@@ -1480,9 +1679,9 @@
             d2_data += d2_linesize;
         }
     } else {
-        uint16_t *d0_data = (uint16_t *)out->data[plane] + offset_y * d0_linesize + offset_x;
-        uint16_t *d1_data = (uint16_t *)out->data[(plane + 1) % s->ncomp] + offset_y * d1_linesize + offset_x;
-        uint16_t *d2_data = (uint16_t *)out->data[(plane + 2) % s->ncomp] + offset_y * d2_linesize + offset_x;
+        uint16_t *d0_data = (uint16_t *)out->data[plane] + (offset_y + sliceh_start) * d0_linesize + offset_x;
+        uint16_t *d1_data = (uint16_t *)out->data[(plane + 1) % s->ncomp] + (offset_y + sliceh_start) * d1_linesize + offset_x;
+        uint16_t *d2_data = (uint16_t *)out->data[(plane + 2) % s->ncomp] + (offset_y + sliceh_start) * d2_linesize + offset_x;
 
         if (mirror) {
             d0_data += s->size - 1;
@@ -1490,7 +1689,7 @@
             d2_data += s->size - 1;
         }
 
-        for (y = 0; y < src_h; y++) {
+        for (y = sliceh_start; y < sliceh_end; y++) {
             for (x = 0; x < src_w; x++) {
                 const int c0 = FFMIN(c0_data[x >> c0_shift_w], limit);
                 const int c1 = c1_data[x >> c1_shift_w];
@@ -1518,37 +1717,65 @@
             d2_data += d2_linesize;
         }
     }
-
-    envelope16(s, out, plane, plane, column ? offset_x : offset_y);
 }
 
+#define COLOR16_FUNC(name, column, mirror)       \
+static int color16_##name(AVFilterContext *ctx,  \
+                          void *arg, int jobnr,  \
+                          int nb_jobs)           \
+{                                                \
+    WaveformContext *s = ctx->priv;              \
+    ThreadData *td = arg;                        \
+    AVFrame *in = td->in;                        \
+    AVFrame *out = td->out;                      \
+    int component = td->component;               \
+    int offset_y = td->offset_y;                 \
+    int offset_x = td->offset_x;                 \
+                                                 \
+    color16(s, in, out, component, s->intensity, \
+            offset_y, offset_x, column, mirror,  \
+            jobnr, nb_jobs);                     \
+                                                 \
+    return 0;                                    \
+}
+
+COLOR16_FUNC(column_mirror, 1, 1)
+COLOR16_FUNC(column,        1, 0)
+COLOR16_FUNC(row_mirror,    0, 1)
+COLOR16_FUNC(row,           0, 0)
+
 static av_always_inline void color(WaveformContext *s,
                                    AVFrame *in, AVFrame *out,
                                    int component, int intensity,
                                    int offset_y, int offset_x,
-                                   int column, int mirror)
+                                   int column, int mirror,
+                                   int jobnr, int nb_jobs)
 {
     const int plane = s->desc->comp[component].plane;
-    const uint8_t *c0_data = in->data[plane + 0];
-    const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp];
-    const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp];
+    const int src_h = in->height;
+    const int src_w = in->width;
+    const int sliceh_start = !column ? (src_h * jobnr) / nb_jobs : 0;
+    const int sliceh_end = !column ? (src_h * (jobnr+1)) / nb_jobs : src_h;
+    const int slicew_start = column ? (src_w * jobnr) / nb_jobs : 0;
+    const int slicew_end = column ? (src_w * (jobnr+1)) / nb_jobs : src_w;
     const int c0_linesize = in->linesize[ plane + 0 ];
     const int c1_linesize = in->linesize[(plane + 1) % s->ncomp];
     const int c2_linesize = in->linesize[(plane + 2) % s->ncomp];
+    const int c0_shift_h = s->shift_h[ component + 0 ];
+    const int c1_shift_h = s->shift_h[(component + 1) % s->ncomp];
+    const int c2_shift_h = s->shift_h[(component + 2) % s->ncomp];
+    const uint8_t *c0_data = in->data[plane] +                  (sliceh_start >> c0_shift_h) * c0_linesize;
+    const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp] + (sliceh_start >> c1_shift_h) * c1_linesize;
+    const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp] + (sliceh_start >> c2_shift_h) * c2_linesize;
     const int d0_linesize = out->linesize[ plane + 0 ];
     const int d1_linesize = out->linesize[(plane + 1) % s->ncomp];
     const int d2_linesize = out->linesize[(plane + 2) % s->ncomp];
     const int c0_shift_w = s->shift_w[ component + 0 ];
     const int c1_shift_w = s->shift_w[(component + 1) % s->ncomp];
     const int c2_shift_w = s->shift_w[(component + 2) % s->ncomp];
-    const int c0_shift_h = s->shift_h[ component + 0 ];
-    const int c1_shift_h = s->shift_h[(component + 1) % s->ncomp];
-    const int c2_shift_h = s->shift_h[(component + 2) % s->ncomp];
-    const int src_h = in->height;
-    const int src_w = in->width;
     int x, y;
 
-    if (s->mode) {
+    if (column) {
         const int d0_signed_linesize = d0_linesize * (mirror == 1 ? -1 : 1);
         const int d1_signed_linesize = d1_linesize * (mirror == 1 ? -1 : 1);
         const int d2_signed_linesize = d2_linesize * (mirror == 1 ? -1 : 1);
@@ -1563,7 +1790,7 @@
         uint8_t * const d2 = (mirror ? d2_bottom_line : d2_data);
 
         for (y = 0; y < src_h; y++) {
-            for (x = 0; x < src_w; x++) {
+            for (x = slicew_start; x < slicew_end; x++) {
                 const int c0 = c0_data[x >> c0_shift_w];
                 const int c1 = c1_data[x >> c1_shift_w];
                 const int c2 = c2_data[x >> c2_shift_w];
@@ -1584,9 +1811,9 @@
             d2_data += d2_linesize;
         }
     } else {
-        uint8_t *d0_data = out->data[plane] + offset_y * d0_linesize + offset_x;
-        uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + offset_y * d1_linesize + offset_x;
-        uint8_t *d2_data = out->data[(plane + 2) % s->ncomp] + offset_y * d2_linesize + offset_x;
+        uint8_t *d0_data = out->data[plane] + (offset_y + sliceh_start) * d0_linesize + offset_x;
+        uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + (offset_y + sliceh_start) * d1_linesize + offset_x;
+        uint8_t *d2_data = out->data[(plane + 2) % s->ncomp] + (offset_y + sliceh_start) * d2_linesize + offset_x;
 
         if (mirror) {
             d0_data += s->size - 1;
@@ -1594,7 +1821,7 @@
             d2_data += s->size - 1;
         }
 
-        for (y = 0; y < src_h; y++) {
+        for (y = sliceh_start; y < sliceh_end; y++) {
             for (x = 0; x < src_w; x++) {
                 const int c0 = c0_data[x >> c0_shift_w];
                 const int c1 = c1_data[x >> c1_shift_w];
@@ -1622,39 +1849,67 @@
             d2_data += d2_linesize;
         }
     }
-
-    envelope(s, out, plane, plane, column ? offset_x : offset_y);
 }
 
+#define COLOR_FUNC(name, column, mirror)       \
+static int color_##name(AVFilterContext *ctx,  \
+                        void *arg, int jobnr,  \
+                        int nb_jobs)           \
+{                                              \
+    WaveformContext *s = ctx->priv;            \
+    ThreadData *td = arg;                      \
+    AVFrame *in = td->in;                      \
+    AVFrame *out = td->out;                    \
+    int component = td->component;             \
+    int offset_y = td->offset_y;               \
+    int offset_x = td->offset_x;               \
+                                               \
+    color(s, in, out, component, s->intensity, \
+          offset_y, offset_x, column, mirror,  \
+          jobnr, nb_jobs);                     \
+                                               \
+    return 0;                                  \
+}
+
+COLOR_FUNC(column_mirror, 1, 1)
+COLOR_FUNC(column,        1, 0)
+COLOR_FUNC(row_mirror,    0, 1)
+COLOR_FUNC(row,           0, 0)
+
 static av_always_inline void acolor16(WaveformContext *s,
                                       AVFrame *in, AVFrame *out,
                                       int component, int intensity,
                                       int offset_y, int offset_x,
-                                      int column, int mirror)
+                                      int column, int mirror,
+                                      int jobnr, int nb_jobs)
 {
     const int plane = s->desc->comp[component].plane;
     const int limit = s->max - 1;
     const int max = limit - intensity;
-    const uint16_t *c0_data = (const uint16_t *)in->data[plane + 0];
-    const uint16_t *c1_data = (const uint16_t *)in->data[(plane + 1) % s->ncomp];
-    const uint16_t *c2_data = (const uint16_t *)in->data[(plane + 2) % s->ncomp];
+    const int src_h = in->height;
+    const int src_w = in->width;
+    const int sliceh_start = !column ? (src_h * jobnr) / nb_jobs : 0;
+    const int sliceh_end = !column ? (src_h * (jobnr+1)) / nb_jobs : src_h;
+    const int slicew_start = column ? (src_w * jobnr) / nb_jobs : 0;
+    const int slicew_end = column ? (src_w * (jobnr+1)) / nb_jobs : src_w;
+    const int c0_shift_h = s->shift_h[ component + 0 ];
+    const int c1_shift_h = s->shift_h[(component + 1) % s->ncomp];
+    const int c2_shift_h = s->shift_h[(component + 2) % s->ncomp];
     const int c0_linesize = in->linesize[ plane + 0 ] / 2;
     const int c1_linesize = in->linesize[(plane + 1) % s->ncomp] / 2;
     const int c2_linesize = in->linesize[(plane + 2) % s->ncomp] / 2;
+    const uint16_t *c0_data = (const uint16_t *)in->data[plane + 0] + (sliceh_start >> c0_shift_h) * c0_linesize;
+    const uint16_t *c1_data = (const uint16_t *)in->data[(plane + 1) % s->ncomp] + (sliceh_start >> c1_shift_h) * c1_linesize;
+    const uint16_t *c2_data = (const uint16_t *)in->data[(plane + 2) % s->ncomp] + (sliceh_start >> c2_shift_h) * c2_linesize;
     const int d0_linesize = out->linesize[ plane + 0 ] / 2;
     const int d1_linesize = out->linesize[(plane + 1) % s->ncomp] / 2;
     const int d2_linesize = out->linesize[(plane + 2) % s->ncomp] / 2;
     const int c0_shift_w = s->shift_w[ component + 0 ];
     const int c1_shift_w = s->shift_w[(component + 1) % s->ncomp];
     const int c2_shift_w = s->shift_w[(component + 2) % s->ncomp];
-    const int c0_shift_h = s->shift_h[ component + 0 ];
-    const int c1_shift_h = s->shift_h[(component + 1) % s->ncomp];
-    const int c2_shift_h = s->shift_h[(component + 2) % s->ncomp];
-    const int src_h = in->height;
-    const int src_w = in->width;
     int x, y;
 
-    if (s->mode) {
+    if (column) {
         const int d0_signed_linesize = d0_linesize * (mirror == 1 ? -1 : 1);
         const int d1_signed_linesize = d1_linesize * (mirror == 1 ? -1 : 1);
         const int d2_signed_linesize = d2_linesize * (mirror == 1 ? -1 : 1);
@@ -1669,7 +1924,7 @@
         uint16_t * const d2 = (mirror ? d2_bottom_line : d2_data);
 
         for (y = 0; y < src_h; y++) {
-            for (x = 0; x < src_w; x++) {
+            for (x = slicew_start; x < slicew_end; x++) {
                 const int c0 = FFMIN(c0_data[x >> c0_shift_w], limit);
                 const int c1 = c1_data[x >> c1_shift_w];
                 const int c2 = c2_data[x >> c2_shift_w];
@@ -1690,9 +1945,9 @@
             d2_data += d2_linesize;
         }
     } else {
-        uint16_t *d0_data = (uint16_t *)out->data[plane] + offset_y * d0_linesize + offset_x;
-        uint16_t *d1_data = (uint16_t *)out->data[(plane + 1) % s->ncomp] + offset_y * d1_linesize + offset_x;
-        uint16_t *d2_data = (uint16_t *)out->data[(plane + 2) % s->ncomp] + offset_y * d2_linesize + offset_x;
+        uint16_t *d0_data = (uint16_t *)out->data[plane] + (offset_y + sliceh_start) * d0_linesize + offset_x;
+        uint16_t *d1_data = (uint16_t *)out->data[(plane + 1) % s->ncomp] + (offset_y + sliceh_start) * d1_linesize + offset_x;
+        uint16_t *d2_data = (uint16_t *)out->data[(plane + 2) % s->ncomp] + (offset_y + sliceh_start) * d2_linesize + offset_x;
 
         if (mirror) {
             d0_data += s->size - 1;
@@ -1700,7 +1955,7 @@
             d2_data += s->size - 1;
         }
 
-        for (y = 0; y < src_h; y++) {
+        for (y = sliceh_start; y < sliceh_end; y++) {
             for (x = 0; x < src_w; x++) {
                 const int c0 = FFMIN(c0_data[x >> c0_shift_w], limit);
                 const int c1 = c1_data[x >> c1_shift_w];
@@ -1728,38 +1983,66 @@
             d2_data += d2_linesize;
         }
     }
-
-    envelope16(s, out, plane, plane, column ? offset_x : offset_y);
 }
 
+#define ACOLOR16_FUNC(name, column, mirror)      \
+static int acolor16_##name(AVFilterContext *ctx, \
+                           void *arg, int jobnr, \
+                           int nb_jobs)          \
+{                                                \
+    WaveformContext *s = ctx->priv;              \
+    ThreadData *td = arg;                        \
+    AVFrame *in = td->in;                        \
+    AVFrame *out = td->out;                      \
+    int component = td->component;               \
+    int offset_y = td->offset_y;                 \
+    int offset_x = td->offset_x;                 \
+                                                 \
+    acolor16(s, in, out, component, s->intensity,\
+             offset_y, offset_x, column, mirror, \
+             jobnr, nb_jobs);                    \
+                                                 \
+    return 0;                                    \
+}
+
+ACOLOR16_FUNC(column_mirror, 1, 1)
+ACOLOR16_FUNC(column,        1, 0)
+ACOLOR16_FUNC(row_mirror,    0, 1)
+ACOLOR16_FUNC(row,           0, 0)
+
 static av_always_inline void acolor(WaveformContext *s,
                                     AVFrame *in, AVFrame *out,
                                     int component, int intensity,
                                     int offset_y, int offset_x,
-                                    int column, int mirror)
+                                    int column, int mirror,
+                                    int jobnr, int nb_jobs)
 {
     const int plane = s->desc->comp[component].plane;
-    const uint8_t *c0_data = in->data[plane + 0];
-    const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp];
-    const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp];
-    const int c0_linesize = in->linesize[ plane + 0 ];
-    const int c1_linesize = in->linesize[(plane + 1) % s->ncomp];
-    const int c2_linesize = in->linesize[(plane + 2) % s->ncomp];
-    const int d0_linesize = out->linesize[ plane + 0 ];
-    const int d1_linesize = out->linesize[(plane + 1) % s->ncomp];
-    const int d2_linesize = out->linesize[(plane + 2) % s->ncomp];
+    const int src_h = in->height;
+    const int src_w = in->width;
+    const int sliceh_start = !column ? (src_h * jobnr) / nb_jobs : 0;
+    const int sliceh_end = !column ? (src_h * (jobnr+1)) / nb_jobs : src_h;
+    const int slicew_start = column ? (src_w * jobnr) / nb_jobs : 0;
+    const int slicew_end = column ? (src_w * (jobnr+1)) / nb_jobs : src_w;
     const int c0_shift_w = s->shift_w[ component + 0 ];
     const int c1_shift_w = s->shift_w[(component + 1) % s->ncomp];
     const int c2_shift_w = s->shift_w[(component + 2) % s->ncomp];
     const int c0_shift_h = s->shift_h[ component + 0 ];
     const int c1_shift_h = s->shift_h[(component + 1) % s->ncomp];
     const int c2_shift_h = s->shift_h[(component + 2) % s->ncomp];
+    const int c0_linesize = in->linesize[ plane + 0 ];
+    const int c1_linesize = in->linesize[(plane + 1) % s->ncomp];
+    const int c2_linesize = in->linesize[(plane + 2) % s->ncomp];
+    const uint8_t *c0_data = in->data[plane + 0] + (sliceh_start >> c0_shift_h) * c0_linesize;
+    const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp] + (sliceh_start >> c1_shift_h) * c1_linesize;
+    const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp] + (sliceh_start >> c2_shift_h) * c2_linesize;
+    const int d0_linesize = out->linesize[ plane + 0 ];
+    const int d1_linesize = out->linesize[(plane + 1) % s->ncomp];
+    const int d2_linesize = out->linesize[(plane + 2) % s->ncomp];
     const int max = 255 - intensity;
-    const int src_h = in->height;
-    const int src_w = in->width;
     int x, y;
 
-    if (s->mode) {
+    if (column) {
         const int d0_signed_linesize = d0_linesize * (mirror == 1 ? -1 : 1);
         const int d1_signed_linesize = d1_linesize * (mirror == 1 ? -1 : 1);
         const int d2_signed_linesize = d2_linesize * (mirror == 1 ? -1 : 1);
@@ -1774,7 +2057,7 @@
         uint8_t * const d2 = (mirror ? d2_bottom_line : d2_data);
 
         for (y = 0; y < src_h; y++) {
-            for (x = 0; x < src_w; x++) {
+            for (x = slicew_start; x < slicew_end; x++) {
                 const int c0 = c0_data[x >> c0_shift_w];
                 const int c1 = c1_data[x >> c1_shift_w];
                 const int c2 = c2_data[x >> c2_shift_w];
@@ -1795,9 +2078,9 @@
             d2_data += d2_linesize;
         }
     } else {
-        uint8_t *d0_data = out->data[plane] + offset_y * d0_linesize + offset_x;
-        uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + offset_y * d1_linesize + offset_x;
-        uint8_t *d2_data = out->data[(plane + 2) % s->ncomp] + offset_y * d2_linesize + offset_x;
+        uint8_t *d0_data = out->data[plane] + (offset_y + sliceh_start) * d0_linesize + offset_x;
+        uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + (offset_y + sliceh_start) * d1_linesize + offset_x;
+        uint8_t *d2_data = out->data[(plane + 2) % s->ncomp] + (offset_y + sliceh_start) * d2_linesize + offset_x;
 
         if (mirror) {
             d0_data += s->size - 1;
@@ -1805,7 +2088,7 @@
             d2_data += s->size - 1;
         }
 
-        for (y = 0; y < src_h; y++) {
+        for (y = sliceh_start; y < sliceh_end; y++) {
             for (x = 0; x < src_w; x++) {
                 const int c0 = c0_data[x >> c0_shift_w];
                 const int c1 = c1_data[x >> c1_shift_w];
@@ -1833,12 +2116,34 @@
             d2_data += d2_linesize;
         }
     }
-
-    envelope(s, out, plane, plane, column ? offset_x : offset_y);
 }
 
+#define ACOLOR_FUNC(name, column, mirror)        \
+static int acolor_##name(AVFilterContext *ctx,   \
+                         void *arg, int jobnr,   \
+                         int nb_jobs)            \
+{                                                \
+    WaveformContext *s = ctx->priv;              \
+    ThreadData *td = arg;                        \
+    AVFrame *in = td->in;                        \
+    AVFrame *out = td->out;                      \
+    int component = td->component;               \
+    int offset_y = td->offset_y;                 \
+    int offset_x = td->offset_x;                 \
+                                                 \
+    acolor(s, in, out, component, s->intensity,  \
+           offset_y, offset_x, column, mirror,   \
+           jobnr, nb_jobs);                      \
+                                                 \
+    return 0;                                    \
+}
+
+ACOLOR_FUNC(column_mirror, 1, 1)
+ACOLOR_FUNC(column,        1, 0)
+ACOLOR_FUNC(row_mirror,    0, 1)
+ACOLOR_FUNC(row,           0, 0)
+
 static const uint8_t black_yuva_color[4] = { 0, 127, 127, 255 };
-static const uint8_t green_yuva_color[4] = { 255, 0, 0, 255 };
 static const uint8_t black_gbrp_color[4] = { 0, 0, 0, 255 };
 
 static const GraticuleLines aflat_digital8[] = {
@@ -2299,7 +2604,7 @@
 {
 }
 
-static void graticule_green_row(WaveformContext *s, AVFrame *out)
+static void graticule_row(WaveformContext *s, AVFrame *out)
 {
     const int step = (s->flags & 2) + 1;
     const float o1 = s->opacity;
@@ -2313,7 +2618,7 @@
 
         k++;
         for (p = 0; p < s->ncomp; p++) {
-            const int v = green_yuva_color[p];
+            const int v = s->grat_yuva_color[p];
             for (l = 0; l < s->nb_glines; l++) {
                 const uint16_t pos = s->glines[l].line[c].pos;
                 int x = offset_x + (s->mirror ? s->size - 1 - pos : pos);
@@ -2331,7 +2636,7 @@
             if (x < 0)
                 x = 4;
 
-            draw_vtext(out, x, offset_y + 2, o1, o2, name, green_yuva_color);
+            draw_vtext(out, x, offset_y + 2, o1, o2, name, s->grat_yuva_color);
         }
 
         offset_x += s->size * (s->display == STACK);
@@ -2339,12 +2644,12 @@
     }
 }
 
-static void graticule16_green_row(WaveformContext *s, AVFrame *out)
+static void graticule16_row(WaveformContext *s, AVFrame *out)
 {
     const int step = (s->flags & 2) + 1;
     const float o1 = s->opacity;
     const float o2 = 1. - o1;
-    const int mult = s->size / 256;
+    const int mult = s->max / 256;
     const int height = s->display == PARADE ? out->height / s->acomp : out->height;
     int k = 0, c, p, l, offset_x = 0, offset_y = 0;
 
@@ -2354,7 +2659,7 @@
 
         k++;
         for (p = 0; p < s->ncomp; p++) {
-            const int v = green_yuva_color[p] * mult;
+            const int v = s->grat_yuva_color[p] * mult;
             for (l = 0; l < s->nb_glines ; l++) {
                 const uint16_t pos = s->glines[l].line[c].pos;
                 int x = offset_x + (s->mirror ? s->size - 1 - pos : pos);
@@ -2372,7 +2677,7 @@
             if (x < 0)
                 x = 4;
 
-            draw_vtext16(out, x, offset_y + 2, mult, o1, o2, name, green_yuva_color);
+            draw_vtext16(out, x, offset_y + 2, mult, o1, o2, name, s->grat_yuva_color);
         }
 
         offset_x += s->size * (s->display == STACK);
@@ -2380,7 +2685,7 @@
     }
 }
 
-static void graticule_green_column(WaveformContext *s, AVFrame *out)
+static void graticule_column(WaveformContext *s, AVFrame *out)
 {
     const int step = (s->flags & 2) + 1;
     const float o1 = s->opacity;
@@ -2394,7 +2699,7 @@
 
         k++;
         for (p = 0; p < s->ncomp; p++) {
-            const int v = green_yuva_color[p];
+            const int v = s->grat_yuva_color[p];
             for (l = 0; l < s->nb_glines ; l++) {
                 const uint16_t pos = s->glines[l].line[c].pos;
                 int y = offset_y + (s->mirror ? s->size - 1 - pos : pos);
@@ -2412,7 +2717,7 @@
             if (y < 0)
                 y = 4;
 
-            draw_htext(out, 2 + offset_x, y, o1, o2, name, green_yuva_color);
+            draw_htext(out, 2 + offset_x, y, o1, o2, name, s->grat_yuva_color);
         }
 
         offset_y += s->size * (s->display == STACK);
@@ -2420,12 +2725,12 @@
     }
 }
 
-static void graticule16_green_column(WaveformContext *s, AVFrame *out)
+static void graticule16_column(WaveformContext *s, AVFrame *out)
 {
     const int step = (s->flags & 2) + 1;
     const float o1 = s->opacity;
     const float o2 = 1. - o1;
-    const int mult = s->size / 256;
+    const int mult = s->max / 256;
     const int width = s->display == PARADE ? out->width / s->acomp : out->width;
     int k = 0, c, p, l, offset_x = 0, offset_y = 0;
 
@@ -2435,7 +2740,7 @@
 
         k++;
         for (p = 0; p < s->ncomp; p++) {
-            const int v = green_yuva_color[p] * mult;
+            const int v = s->grat_yuva_color[p] * mult;
             for (l = 0; l < s->nb_glines ; l++) {
                 const uint16_t pos = s->glines[l].line[c].pos;
                 int y = offset_y + (s->mirror ? s->size - 1 - pos : pos);
@@ -2453,7 +2758,7 @@
             if (y < 0)
                 y = 4;
 
-            draw_htext16(out, 2 + offset_x, y, mult, o1, o2, name, green_yuva_color);
+            draw_htext16(out, 2 + offset_x, y, mult, o1, o2, name, s->grat_yuva_color);
         }
 
         offset_y += s->size * (s->display == STACK);
@@ -2480,6 +2785,7 @@
     s->graticulef = graticule_none;
 
     switch (s->filter) {
+    case XFLAT:
     case AFLAT: s->size = 256 * 2; break;
     case FLAT:  s->size = 256 * 3; break;
     default:    s->size = 256;     break;
@@ -2487,67 +2793,80 @@
 
     switch (s->filter | ((s->bits > 8) << 4) |
             (s->mode << 8) | (s->mirror << 12)) {
-    case 0x1100: s->waveform = lowpass_column_mirror; break;
-    case 0x1000: s->waveform = lowpass_row_mirror;    break;
-    case 0x0100: s->waveform = lowpass_column;        break;
-    case 0x0000: s->waveform = lowpass_row;           break;
-    case 0x1110: s->waveform = lowpass16_column_mirror; break;
-    case 0x1010: s->waveform = lowpass16_row_mirror;    break;
-    case 0x0110: s->waveform = lowpass16_column;        break;
-    case 0x0010: s->waveform = lowpass16_row;           break;
-    case 0x1101:
-    case 0x1001:
-    case 0x0101:
-    case 0x0001: s->waveform = flat;      break;
-    case 0x1111:
-    case 0x1011:
-    case 0x0111:
-    case 0x0011: s->waveform = flat16;    break;
-    case 0x1102:
-    case 0x1002:
-    case 0x0102:
-    case 0x0002: s->waveform = aflat;     break;
-    case 0x1112:
-    case 0x1012:
-    case 0x0112:
-    case 0x0012: s->waveform = aflat16;   break;
-    case 0x1103:
-    case 0x1003:
-    case 0x0103:
-    case 0x0003: s->waveform = chroma;    break;
-    case 0x1113:
-    case 0x1013:
-    case 0x0113:
-    case 0x0013: s->waveform = chroma16;  break;
-    case 0x1104:
-    case 0x1004:
-    case 0x0104:
-    case 0x0004: s->waveform = color;     break;
-    case 0x1114:
-    case 0x1014:
-    case 0x0114:
-    case 0x0014: s->waveform = color16;   break;
-    case 0x1105:
-    case 0x1005:
-    case 0x0105:
-    case 0x0005: s->waveform = acolor;    break;
-    case 0x1115:
-    case 0x1015:
-    case 0x0115:
-    case 0x0015: s->waveform = acolor16;  break;
+    case 0x1100: s->waveform_slice = lowpass_column_mirror; break;
+    case 0x1000: s->waveform_slice = lowpass_row_mirror;    break;
+    case 0x0100: s->waveform_slice = lowpass_column;        break;
+    case 0x0000: s->waveform_slice = lowpass_row;           break;
+    case 0x1110: s->waveform_slice = lowpass16_column_mirror; break;
+    case 0x1010: s->waveform_slice = lowpass16_row_mirror;    break;
+    case 0x0110: s->waveform_slice = lowpass16_column;        break;
+    case 0x0010: s->waveform_slice = lowpass16_row;           break;
+    case 0x1101: s->waveform_slice = flat_column_mirror; break;
+    case 0x1001: s->waveform_slice = flat_row_mirror;    break;
+    case 0x0101: s->waveform_slice = flat_column;        break;
+    case 0x0001: s->waveform_slice = flat_row;           break;
+    case 0x1111: s->waveform_slice = flat16_column_mirror; break;
+    case 0x1011: s->waveform_slice = flat16_row_mirror;    break;
+    case 0x0111: s->waveform_slice = flat16_column;        break;
+    case 0x0011: s->waveform_slice = flat16_row;           break;
+    case 0x1102: s->waveform_slice = aflat_column_mirror; break;
+    case 0x1002: s->waveform_slice = aflat_row_mirror;    break;
+    case 0x0102: s->waveform_slice = aflat_column;        break;
+    case 0x0002: s->waveform_slice = aflat_row;           break;
+    case 0x1112: s->waveform_slice = aflat16_column_mirror; break;
+    case 0x1012: s->waveform_slice = aflat16_row_mirror;    break;
+    case 0x0112: s->waveform_slice = aflat16_column;        break;
+    case 0x0012: s->waveform_slice = aflat16_row;           break;
+    case 0x1103: s->waveform_slice = chroma_column_mirror; break;
+    case 0x1003: s->waveform_slice = chroma_row_mirror;    break;
+    case 0x0103: s->waveform_slice = chroma_column;        break;
+    case 0x0003: s->waveform_slice = chroma_row;           break;
+    case 0x1113: s->waveform_slice = chroma16_column_mirror; break;
+    case 0x1013: s->waveform_slice = chroma16_row_mirror;    break;
+    case 0x0113: s->waveform_slice = chroma16_column;        break;
+    case 0x0013: s->waveform_slice = chroma16_row;           break;
+    case 0x1104: s->waveform_slice = color_column_mirror; break;
+    case 0x1004: s->waveform_slice = color_row_mirror;    break;
+    case 0x0104: s->waveform_slice = color_column;        break;
+    case 0x0004: s->waveform_slice = color_row;           break;
+    case 0x1114: s->waveform_slice = color16_column_mirror; break;
+    case 0x1014: s->waveform_slice = color16_row_mirror;    break;
+    case 0x0114: s->waveform_slice = color16_column;        break;
+    case 0x0014: s->waveform_slice = color16_row;           break;
+    case 0x1105: s->waveform_slice = acolor_column_mirror; break;
+    case 0x1005: s->waveform_slice = acolor_row_mirror;    break;
+    case 0x0105: s->waveform_slice = acolor_column;        break;
+    case 0x0005: s->waveform_slice = acolor_row;           break;
+    case 0x1115: s->waveform_slice = acolor16_column_mirror; break;
+    case 0x1015: s->waveform_slice = acolor16_row_mirror;    break;
+    case 0x0115: s->waveform_slice = acolor16_column;        break;
+    case 0x0015: s->waveform_slice = acolor16_row;           break;
+    case 0x1106: s->waveform_slice = xflat_column_mirror; break;
+    case 0x1006: s->waveform_slice = xflat_row_mirror;    break;
+    case 0x0106: s->waveform_slice = xflat_column;        break;
+    case 0x0006: s->waveform_slice = xflat_row;           break;
+    case 0x1116: s->waveform_slice = xflat16_column_mirror; break;
+    case 0x1016: s->waveform_slice = xflat16_row_mirror;    break;
+    case 0x0116: s->waveform_slice = xflat16_column;        break;
+    case 0x0016: s->waveform_slice = xflat16_row;           break;
     }
 
+    s->grat_yuva_color[0] = 255;
+    s->grat_yuva_color[2] = s->graticule == 2 ? 255 : 0;
+    s->grat_yuva_color[3] = 255;
+
     switch (s->filter) {
     case LOWPASS:
     case COLOR:
     case ACOLOR:
     case CHROMA:
     case AFLAT:
+    case XFLAT:
     case FLAT:
         if (s->graticule && s->mode == 1)
-            s->graticulef = s->bits > 8 ? graticule16_green_column : graticule_green_column;
+            s->graticulef = s->bits > 8 ? graticule16_column : graticule_column;
         else if (s->graticule && s->mode == 0)
-            s->graticulef = s->bits > 8 ? graticule16_green_row : graticule_green_row;
+            s->graticulef = s->bits > 8 ? graticule16_row : graticule_row;
         break;
     }
 
@@ -2610,6 +2929,7 @@
             break;
         }
         break;
+    case XFLAT:
     case AFLAT:
         switch (s->scale) {
         case DIGITAL:
@@ -2783,6 +3103,8 @@
 
     for (k = 0, i = 0; k < s->ncomp; k++) {
         if ((1 << k) & s->pcomp) {
+            const int plane = s->desc->comp[k].plane;
+            ThreadData td;
             int offset_y;
             int offset_x;
 
@@ -2793,7 +3115,45 @@
                 offset_y = s->mode ? i++ * s->size * !!s->display : 0;
                 offset_x = s->mode ? 0 : i++ * s->size * !!s->display;
             }
-            s->waveform(s, in, out, k, s->intensity, offset_y, offset_x, s->mode, s->mirror);
+
+            td.in = in;
+            td.out = out;
+            td.component = k;
+            td.offset_y = offset_y;
+            td.offset_x = offset_x;
+            ctx->internal->execute(ctx, s->waveform_slice, &td, NULL, ff_filter_get_nb_threads(ctx));
+            switch (s->filter) {
+            case ACOLOR:
+            case CHROMA:
+            case COLOR:
+            case LOWPASS:
+                if (s->bits <= 8)
+                    envelope(s, out, plane, plane, s->mode ? offset_x : offset_y);
+                else
+                    envelope16(s, out, plane, plane, s->mode ? offset_x : offset_y);
+                break;
+            case FLAT:
+                if (s->bits <= 8) {
+                    envelope(s, out, plane, plane, s->mode ? offset_x : offset_y);
+                    envelope(s, out, plane, (plane + 1) % s->ncomp, s->mode ? offset_x : offset_y);
+                } else {
+                    envelope16(s, out, plane, plane, s->mode ? offset_x : offset_y);
+                    envelope16(s, out, plane, (plane + 1) % s->ncomp, s->mode ? offset_x : offset_y);
+                }
+                break;
+            case AFLAT:
+            case XFLAT:
+                if (s->bits <= 8) {
+                    envelope(s, out, plane, (plane + 0) % s->ncomp, s->mode ? offset_x : offset_y);
+                    envelope(s, out, plane, (plane + 1) % s->ncomp, s->mode ? offset_x : offset_y);
+                    envelope(s, out, plane, (plane + 2) % s->ncomp, s->mode ? offset_x : offset_y);
+                } else {
+                    envelope16(s, out, plane, (plane + 0) % s->ncomp, s->mode ? offset_x : offset_y);
+                    envelope16(s, out, plane, (plane + 1) % s->ncomp, s->mode ? offset_x : offset_y);
+                    envelope16(s, out, plane, (plane + 2) % s->ncomp, s->mode ? offset_x : offset_y);
+                }
+                break;
+            }
         }
     }
     s->graticulef(s, out);
@@ -2837,4 +3197,5 @@
     .uninit        = uninit,
     .inputs        = inputs,
     .outputs       = outputs,
+    .flags         = AVFILTER_FLAG_SLICE_THREADS,
 };

diff --git a/libavfilter/vf_weave.c b/libavfilter/vf_weave.c
index 037f5d1..663d79f 100644
--- a/libavfilter/vf_weave.c
+++ b/libavfilter/vf_weave.c

@@ -84,6 +84,8 @@
     AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *out;
     int i;
+    int weave;
+    int field1, field2;
 
     if (!s->prev) {
         s->prev = in;
@@ -98,26 +100,18 @@
     }
     av_frame_copy_props(out, in);
 
+    weave = (s->double_weave && !(inlink->frame_count_out & 1));
+    field1 = weave ? s->first_field : (!s->first_field);
+    field2 = weave ? (!s->first_field) : s->first_field;
     for (i = 0; i < s->nb_planes; i++) {
-        if (s->double_weave && !(inlink->frame_count_out & 1)) {
-            av_image_copy_plane(out->data[i] + out->linesize[i] * s->first_field,
-                                out->linesize[i] * 2,
-                                in->data[i], in->linesize[i],
-                                s->linesize[i], s->planeheight[i]);
-            av_image_copy_plane(out->data[i] + out->linesize[i] * !s->first_field,
-                                out->linesize[i] * 2,
-                                s->prev->data[i], s->prev->linesize[i],
-                                s->linesize[i], s->planeheight[i]);
-        } else {
-            av_image_copy_plane(out->data[i] + out->linesize[i] * !s->first_field,
-                                out->linesize[i] * 2,
-                                in->data[i], in->linesize[i],
-                                s->linesize[i], s->planeheight[i]);
-            av_image_copy_plane(out->data[i] + out->linesize[i] * s->first_field,
-                                out->linesize[i] * 2,
-                                s->prev->data[i], s->prev->linesize[i],
-                                s->linesize[i], s->planeheight[i]);
-        }
+        av_image_copy_plane(out->data[i] + out->linesize[i] * field1,
+                            out->linesize[i] * 2,
+                            in->data[i], in->linesize[i],
+                            s->linesize[i], s->planeheight[i]);
+        av_image_copy_plane(out->data[i] + out->linesize[i] * field2,
+                            out->linesize[i] * 2,
+                            s->prev->data[i], s->prev->linesize[i],
+                            s->linesize[i], s->planeheight[i]);
     }
 
     out->pts = s->double_weave ? s->prev->pts : in->pts / 2;

diff --git a/libavfilter/vf_xbr.c b/libavfilter/vf_xbr.c
index 78094e0..2c71871 100644
--- a/libavfilter/vf_xbr.c
+++ b/libavfilter/vf_xbr.c

@@ -24,7 +24,7 @@
  * XBR Filter is used for depixelization of image.
  * This is based on Hyllian's xBR shader.
  *
- * @see http://www.libretro.com/forums/viewtopic.php?f=6&t=134
+ * @see https://forums.libretro.com/t/xbr-algorithm-tutorial/123
  * @see https://github.com/yoyofr/iFBA/blob/master/fba_src/src/intf/video/scalers/xbr.cpp
  */
 

diff --git a/libavfilter/vf_yadif.c b/libavfilter/vf_yadif.c
index 694ac44..f58d8ac 100644
--- a/libavfilter/vf_yadif.c
+++ b/libavfilter/vf_yadif.c

@@ -123,18 +123,20 @@
     uint8_t *prev2 = parity ? prev : cur ;
     uint8_t *next2 = parity ? cur  : next;
 
+    const int edge = MAX_ALIGN - 1;
+
     /* Only edge pixels need to be processed here.  A constant value of false
      * for is_not_edge should let the compiler ignore the whole branch. */
     FILTER(0, 3, 0)
 
-    dst  = (uint8_t*)dst1  + w - (MAX_ALIGN-1);
-    prev = (uint8_t*)prev1 + w - (MAX_ALIGN-1);
-    cur  = (uint8_t*)cur1  + w - (MAX_ALIGN-1);
-    next = (uint8_t*)next1 + w - (MAX_ALIGN-1);
+    dst  = (uint8_t*)dst1  + w - edge;
+    prev = (uint8_t*)prev1 + w - edge;
+    cur  = (uint8_t*)cur1  + w - edge;
+    next = (uint8_t*)next1 + w - edge;
     prev2 = (uint8_t*)(parity ? prev : cur);
     next2 = (uint8_t*)(parity ? cur  : next);
 
-    FILTER(w - (MAX_ALIGN-1), w - 3, 1)
+    FILTER(w - edge, w - 3, 1)
     FILTER(w - 3, w, 0)
 }
 
@@ -167,19 +169,22 @@
     int x;
     uint16_t *prev2 = parity ? prev : cur ;
     uint16_t *next2 = parity ? cur  : next;
+
+    const int edge = MAX_ALIGN / 2 - 1;
+
     mrefs /= 2;
     prefs /= 2;
 
     FILTER(0, 3, 0)
 
-    dst   = (uint16_t*)dst1  + w - (MAX_ALIGN/2-1);
-    prev  = (uint16_t*)prev1 + w - (MAX_ALIGN/2-1);
-    cur   = (uint16_t*)cur1  + w - (MAX_ALIGN/2-1);
-    next  = (uint16_t*)next1 + w - (MAX_ALIGN/2-1);
+    dst   = (uint16_t*)dst1  + w - edge;
+    prev  = (uint16_t*)prev1 + w - edge;
+    cur   = (uint16_t*)cur1  + w - edge;
+    next  = (uint16_t*)next1 + w - edge;
     prev2 = (uint16_t*)(parity ? prev : cur);
     next2 = (uint16_t*)(parity ? cur  : next);
 
-    FILTER(w - (MAX_ALIGN/2-1), w - 3, 1)
+    FILTER(w - edge, w - 3, 1)
     FILTER(w - 3, w, 0)
 }
 
@@ -193,6 +198,7 @@
     int slice_start = (td->h *  jobnr   ) / nb_jobs;
     int slice_end   = (td->h * (jobnr+1)) / nb_jobs;
     int y;
+    int edge = 3 + MAX_ALIGN / df - 1;
 
     /* filtering reads 3 pixels to the left/right; to avoid invalid reads,
      * we need to call the c variant which avoids this for border pixels
@@ -205,7 +211,7 @@
             uint8_t *dst  = &td->frame->data[td->plane][y * td->frame->linesize[td->plane]];
             int     mode  = y == 1 || y + 2 == td->h ? 2 : s->mode;
             s->filter_line(dst + pix_3, prev + pix_3, cur + pix_3,
-                           next + pix_3, td->w - (3 + MAX_ALIGN/df-1),
+                           next + pix_3, td->w - edge,
                            y + 1 < td->h ? refs : -refs,
                            y ? -refs : refs,
                            td->parity ^ td->tff, mode);

diff --git a/libavfilter/vf_zoompan.c b/libavfilter/vf_zoompan.c
index 0635171..dea10d5 100644
--- a/libavfilter/vf_zoompan.c
+++ b/libavfilter/vf_zoompan.c

@@ -167,7 +167,7 @@
     var_values[VAR_PDURATION] = s->prev_nb_frames;
     var_values[VAR_TIME] = pts * av_q2d(outlink->time_base);
     var_values[VAR_FRAME] = i;
-    var_values[VAR_ON] = outlink->frame_count_in + 1;
+    var_values[VAR_ON] = outlink->frame_count_in;
 
     *zoom = av_expr_eval(s->zoom_expr, var_values, NULL);
 
@@ -274,8 +274,8 @@
         s->var_values[VAR_IN_H]  = s->var_values[VAR_IH] = s->in->height;
         s->var_values[VAR_OUT_W] = s->var_values[VAR_OW] = s->w;
         s->var_values[VAR_OUT_H] = s->var_values[VAR_OH] = s->h;
-        s->var_values[VAR_IN]    = inlink->frame_count_out + 1;
-        s->var_values[VAR_ON]    = outlink->frame_count_in + 1;
+        s->var_values[VAR_IN]    = inlink->frame_count_out - 1;
+        s->var_values[VAR_ON]    = outlink->frame_count_in;
         s->var_values[VAR_PX]    = s->x;
         s->var_values[VAR_PY]    = s->y;
         s->var_values[VAR_X]     = 0;

diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
index c303dd4..6e1d36c 100644
--- a/libavfilter/vf_zscale.c
+++ b/libavfilter/vf_zscale.c

@@ -321,7 +321,7 @@
 
     av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
 
-    return err_code;
+    return AVERROR_EXTERNAL;
 }
 
 static int convert_chroma_location(enum AVChromaLocation chroma_location)
@@ -353,16 +353,26 @@
         return ZIMG_MATRIX_709;
     case AVCOL_SPC_UNSPECIFIED:
         return ZIMG_MATRIX_UNSPECIFIED;
+    case AVCOL_SPC_FCC:
+        return ZIMG_MATRIX_FCC;
     case AVCOL_SPC_BT470BG:
         return ZIMG_MATRIX_470BG;
     case AVCOL_SPC_SMPTE170M:
         return ZIMG_MATRIX_170M;
+    case AVCOL_SPC_SMPTE240M:
+        return ZIMG_MATRIX_240M;
     case AVCOL_SPC_YCGCO:
         return ZIMG_MATRIX_YCGCO;
     case AVCOL_SPC_BT2020_NCL:
         return ZIMG_MATRIX_2020_NCL;
     case AVCOL_SPC_BT2020_CL:
         return ZIMG_MATRIX_2020_CL;
+    case AVCOL_SPC_CHROMA_DERIVED_NCL:
+        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
+    case AVCOL_SPC_CHROMA_DERIVED_CL:
+        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
+    case AVCOL_SPC_ICTCP:
+        return ZIMG_MATRIX_ICTCP;
     }
     return ZIMG_MATRIX_UNSPECIFIED;
 }
@@ -374,10 +384,22 @@
         return ZIMG_TRANSFER_UNSPECIFIED;
     case AVCOL_TRC_BT709:
         return ZIMG_TRANSFER_709;
+    case AVCOL_TRC_GAMMA22:
+        return ZIMG_TRANSFER_470_M;
+    case AVCOL_TRC_GAMMA28:
+        return ZIMG_TRANSFER_470_BG;
     case AVCOL_TRC_SMPTE170M:
         return ZIMG_TRANSFER_601;
+    case AVCOL_TRC_SMPTE240M:
+        return ZIMG_TRANSFER_240M;
     case AVCOL_TRC_LINEAR:
         return ZIMG_TRANSFER_LINEAR;
+    case AVCOL_TRC_LOG:
+        return ZIMG_TRANSFER_LOG_100;
+    case AVCOL_TRC_LOG_SQRT:
+        return ZIMG_TRANSFER_LOG_316;
+    case AVCOL_TRC_IEC61966_2_4:
+        return ZIMG_TRANSFER_IEC_61966_2_4;
     case AVCOL_TRC_BT2020_10:
         return ZIMG_TRANSFER_2020_10;
     case AVCOL_TRC_BT2020_12:
@@ -399,14 +421,26 @@
         return ZIMG_PRIMARIES_UNSPECIFIED;
     case AVCOL_PRI_BT709:
         return ZIMG_PRIMARIES_709;
+    case AVCOL_PRI_BT470M:
+        return ZIMG_PRIMARIES_470_M;
+    case AVCOL_PRI_BT470BG:
+        return ZIMG_PRIMARIES_470_BG;
     case AVCOL_PRI_SMPTE170M:
         return ZIMG_PRIMARIES_170M;
     case AVCOL_PRI_SMPTE240M:
         return ZIMG_PRIMARIES_240M;
+    case AVCOL_PRI_FILM:
+        return ZIMG_PRIMARIES_FILM;
     case AVCOL_PRI_BT2020:
         return ZIMG_PRIMARIES_2020;
+    case AVCOL_PRI_SMPTE428:
+        return ZIMG_PRIMARIES_ST428;
+    case AVCOL_PRI_SMPTE431:
+        return ZIMG_PRIMARIES_ST431_2;
     case AVCOL_PRI_SMPTE432:
         return ZIMG_PRIMARIES_ST432_1;
+    case AVCOL_PRI_JEDEC_P22:
+        return ZIMG_PRIMARIES_EBU3213_E;
     }
     return ZIMG_PRIMARIES_UNSPECIFIED;
 }
@@ -581,7 +615,7 @@
             s->alpha_dst_format.width = out->width;
             s->alpha_dst_format.height = out->height;
             s->alpha_dst_format.depth = odesc->comp[0].depth;
-            s->alpha_dst_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
+            s->alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
             s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
 
             zimg_filter_graph_free(s->alpha_graph);
@@ -624,7 +658,7 @@
 
     ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
     if (ret) {
-        print_zimg_error(link->dst);
+        ret = print_zimg_error(link->dst);
         goto fail;
     }
 
@@ -639,7 +673,7 @@
 
         ret = zimg_filter_graph_process(s->alpha_graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
         if (ret) {
-            print_zimg_error(link->dst);
+            ret = print_zimg_error(link->dst);
             goto fail;
         }
     } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
@@ -673,6 +707,7 @@
     ZScaleContext *s = ctx->priv;
 
     zimg_filter_graph_free(s->graph);
+    zimg_filter_graph_free(s->alpha_graph);
     av_freep(&s->tmp);
     s->tmp_size = 0;
 }
@@ -734,8 +769,8 @@
     {     "unknown",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = -1},                 0, 0, FLAGS, "range" },
     {     "tv",               0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_RANGE_LIMITED}, 0, 0, FLAGS, "range" },
     {     "pc",               0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_RANGE_FULL},    0, 0, FLAGS, "range" },
-    { "primaries", "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_PRIMARIES_ST432_1, FLAGS, "primaries" },
-    { "p",         "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_PRIMARIES_ST432_1, FLAGS, "primaries" },
+    { "primaries", "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" },
+    { "p",         "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" },
     {     "input",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = -1},                         0, 0, FLAGS, "primaries" },
     {     "709",              0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_709},         0, 0, FLAGS, "primaries" },
     {     "unspecified",      0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_UNSPECIFIED}, 0, 0, FLAGS, "primaries" },
@@ -744,12 +779,18 @@
     {     "2020",             0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_2020},        0, 0, FLAGS, "primaries" },
     {     "unknown",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_UNSPECIFIED}, 0, 0, FLAGS, "primaries" },
     {     "bt709",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_709},         0, 0, FLAGS, "primaries" },
+    {     "bt470m",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_470_M},       0, 0, FLAGS, "primaries" },
+    {     "bt470bg",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_470_BG},      0, 0, FLAGS, "primaries" },
     {     "smpte170m",        0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_170M},        0, 0, FLAGS, "primaries" },
     {     "smpte240m",        0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_240M},        0, 0, FLAGS, "primaries" },
+    {     "film",             0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_FILM},        0, 0, FLAGS, "primaries" },
     {     "bt2020",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_2020},        0, 0, FLAGS, "primaries" },
+    {     "smpte428",         0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_ST428},       0, 0, FLAGS, "primaries" },
+    {     "smpte431",         0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_ST431_2},     0, 0, FLAGS, "primaries" },
     {     "smpte432",         0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_ST432_1},     0, 0, FLAGS, "primaries" },
-    { "transfer", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_TRANSFER_ARIB_B67, FLAGS, "transfer" },
-    { "t",        "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_TRANSFER_ARIB_B67, FLAGS, "transfer" },
+    {     "jedec-p22",        0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_EBU3213_E},   0, 0, FLAGS, "primaries" },
+    { "transfer", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "transfer" },
+    { "t",        "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "transfer" },
     {     "input",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = -1},                         0, 0, FLAGS, "transfer" },
     {     "709",              0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_709},         0, 0, FLAGS, "transfer" },
     {     "unspecified",      0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_UNSPECIFIED}, 0, 0, FLAGS, "transfer" },
@@ -758,16 +799,21 @@
     {     "2020_10",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_2020_10},     0, 0, FLAGS, "transfer" },
     {     "2020_12",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_2020_12},     0, 0, FLAGS, "transfer" },
     {     "unknown",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_UNSPECIFIED}, 0, 0, FLAGS, "transfer" },
+    {     "bt470m",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_470_M},       0, 0, FLAGS, "transfer" },
+    {     "bt470bg",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_470_BG},      0, 0, FLAGS, "transfer" },
     {     "smpte170m",        0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_601},         0, 0, FLAGS, "transfer" },
     {     "bt709",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_709},         0, 0, FLAGS, "transfer" },
     {     "linear",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_LINEAR},      0, 0, FLAGS, "transfer" },
+    {     "log100",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_LOG_100},     0, 0, FLAGS, "transfer" },
+    {     "log316",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_LOG_316},     0, 0, FLAGS, "transfer" },
     {     "bt2020-10",        0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_2020_10},     0, 0, FLAGS, "transfer" },
     {     "bt2020-12",        0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_2020_12},     0, 0, FLAGS, "transfer" },
     {     "smpte2084",        0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_ST2084},      0, 0, FLAGS, "transfer" },
+    {     "iec61966-2-4",     0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_IEC_61966_2_4},0, 0, FLAGS, "transfer" },
     {     "iec61966-2-1",     0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_IEC_61966_2_1},0, 0, FLAGS, "transfer" },
     {     "arib-std-b67",     0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_ARIB_B67},    0, 0, FLAGS, "transfer" },
-    { "matrix", "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_MATRIX_2020_CL, FLAGS, "matrix" },
-    { "m",      "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_MATRIX_2020_CL, FLAGS, "matrix" },
+    { "matrix", "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" },
+    { "m",      "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" },
     {     "input",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = -1},                      0, 0, FLAGS, "matrix" },
     {     "709",              0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_709},         0, 0, FLAGS, "matrix" },
     {     "unspecified",      0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_UNSPECIFIED}, 0, 0, FLAGS, "matrix" },
@@ -776,21 +822,27 @@
     {     "2020_ncl",         0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_2020_NCL},    0, 0, FLAGS, "matrix" },
     {     "2020_cl",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_2020_CL},     0, 0, FLAGS, "matrix" },
     {     "unknown",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_UNSPECIFIED}, 0, 0, FLAGS, "matrix" },
+    {     "gbr",              0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_RGB},         0, 0, FLAGS, "matrix" },
     {     "bt709",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_709},         0, 0, FLAGS, "matrix" },
+    {     "fcc",              0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_FCC},         0, 0, FLAGS, "matrix" },
     {     "bt470bg",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_470BG},       0, 0, FLAGS, "matrix" },
     {     "smpte170m",        0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_170M},        0, 0, FLAGS, "matrix" },
+    {     "smpte2400m",       0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_240M},        0, 0, FLAGS, "matrix" },
     {     "ycgco",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_YCGCO},       0, 0, FLAGS, "matrix" },
     {     "bt2020nc",         0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_2020_NCL},    0, 0, FLAGS, "matrix" },
     {     "bt2020c",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_2020_CL},     0, 0, FLAGS, "matrix" },
+    {     "chroma-derived-nc",0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL}, 0, 0, FLAGS, "matrix" },
+    {     "chroma-derived-c", 0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_CHROMATICITY_DERIVED_CL}, 0, 0, FLAGS, "matrix" },
+    {     "ictcp",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_ICTCP},       0, 0, FLAGS, "matrix" },
     { "in_range", "set input color range", OFFSET(range_in),    AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_RANGE_FULL, FLAGS, "range" },
     { "rangein", "set input color range", OFFSET(range_in),     AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_RANGE_FULL, FLAGS, "range" },
     { "rin",     "set input color range", OFFSET(range_in),     AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_RANGE_FULL, FLAGS, "range" },
-    { "primariesin", "set input color primaries", OFFSET(primaries_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_PRIMARIES_ST432_1, FLAGS, "primaries" },
-    { "pin",         "set input color primaries", OFFSET(primaries_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_PRIMARIES_ST432_1, FLAGS, "primaries" },
-    { "transferin", "set input transfer characteristic", OFFSET(trc_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_TRANSFER_ARIB_B67, FLAGS, "transfer" },
-    { "tin",        "set input transfer characteristic", OFFSET(trc_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_TRANSFER_ARIB_B67, FLAGS, "transfer" },
-    { "matrixin", "set input colorspace matrix", OFFSET(colorspace_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_MATRIX_2020_CL, FLAGS, "matrix" },
-    { "min",      "set input colorspace matrix", OFFSET(colorspace_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_MATRIX_2020_CL, FLAGS, "matrix" },
+    { "primariesin", "set input color primaries", OFFSET(primaries_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" },
+    { "pin",         "set input color primaries", OFFSET(primaries_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" },
+    { "transferin", "set input transfer characteristic", OFFSET(trc_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "transfer" },
+    { "tin",        "set input transfer characteristic", OFFSET(trc_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "transfer" },
+    { "matrixin", "set input colorspace matrix", OFFSET(colorspace_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" },
+    { "min",      "set input colorspace matrix", OFFSET(colorspace_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" },
     { "chromal",  "set output chroma location", OFFSET(chromal), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_CHROMA_BOTTOM, FLAGS, "chroma" },
     { "c",        "set output chroma location", OFFSET(chromal), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_CHROMA_BOTTOM, FLAGS, "chroma" },
     {     "input",     0, 0, AV_OPT_TYPE_CONST, {.i64 = -1},                       0, 0, FLAGS, "chroma" },

diff --git a/libavfilter/video.c b/libavfilter/video.c
index 6f9020b..7a8e587 100644
--- a/libavfilter/video.c
+++ b/libavfilter/video.c

@@ -43,6 +43,7 @@
 
 AVFrame *ff_default_get_video_buffer(AVFilterLink *link, int w, int h)
 {
+    AVFrame *frame = NULL;
     int pool_width = 0;
     int pool_height = 0;
     int pool_align = 0;
@@ -86,7 +87,13 @@
         }
     }
 
-    return ff_frame_pool_get(link->frame_pool);
+    frame = ff_frame_pool_get(link->frame_pool);
+    if (!frame)
+        return NULL;
+
+    frame->sample_aspect_ratio = link->sample_aspect_ratio;
+
+    return frame;
 }
 
 AVFrame *ff_get_video_buffer(AVFilterLink *link, int w, int h)

diff --git a/libavfilter/vsrc_testsrc.c b/libavfilter/vsrc_testsrc.c
index fe0d50a..f067148 100644
--- a/libavfilter/vsrc_testsrc.c
+++ b/libavfilter/vsrc_testsrc.c

@@ -857,8 +857,8 @@
         uint8_t alpha[256];
 
         r = s->pts;
-        for (y = ymin; y < ymax - 15; y += 16) {
-            for (x = xmin; x < xmax - 15; x += 16) {
+        for (y = ymin; y + 15 < ymax; y += 16) {
+            for (x = xmin; x + 15 < xmax; x += 16) {
                 if ((x ^ y) & 16)
                     continue;
                 for (i = 0; i < 256; i++) {
@@ -1252,7 +1252,7 @@
 
 #endif /* CONFIG_YUVTESTSRC_FILTER */
 
-#if CONFIG_SMPTEBARS_FILTER || CONFIG_SMPTEHDBARS_FILTER
+#if CONFIG_PAL75BARS_FILTER || CONFIG_PAL100BARS_FILTER || CONFIG_SMPTEBARS_FILTER || CONFIG_SMPTEHDBARS_FILTER
 
 static const uint8_t rainbow[7][4] = {
     { 180, 128, 128, 255 },     /* 75% white */
@@ -1264,6 +1264,16 @@
     {  35, 212, 114, 255 },     /* 75% blue */
 };
 
+static const uint8_t rainbow100[7][4] = {
+    { 235, 128, 128, 255 },     /* 100% white */
+    { 210,  16, 146, 255 },     /* 100% yellow */
+    { 170, 166,  16, 255 },     /* 100% cyan */
+    { 145,  54,  34, 255 },     /* 100% green */
+    { 106, 202, 222, 255 },     /* 100% magenta */
+    {  81,  90, 240, 255 },     /* 100% red */
+    {  41, 240, 110, 255 },     /* 100% blue */
+};
+
 static const uint8_t rainbowhd[7][4] = {
     { 180, 128, 128, 255 },     /* 75% white */
     { 168,  44, 136, 255 },     /* 75% yellow */
@@ -1371,6 +1381,100 @@
     { NULL }
 };
 
+#if CONFIG_PAL75BARS_FILTER
+
+#define pal75bars_options options
+AVFILTER_DEFINE_CLASS(pal75bars);
+
+static void pal75bars_fill_picture(AVFilterContext *ctx, AVFrame *picref)
+{
+    TestSourceContext *test = ctx->priv;
+    int r_w, i, x = 0;
+    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(picref->format);
+
+    picref->color_range = AVCOL_RANGE_MPEG;
+    picref->colorspace = AVCOL_SPC_BT470BG;
+
+    r_w = FFALIGN((test->w + 7) / 8, 1 << pixdesc->log2_chroma_w);
+
+    draw_bar(test, white, x, 0, r_w, test->h, picref);
+    x += r_w;
+    for (i = 1; i < 7; i++) {
+        draw_bar(test, rainbow[i], x, 0, r_w, test->h, picref);
+        x += r_w;
+    }
+    draw_bar(test, black0, x, 0, r_w, test->h, picref);
+}
+
+static av_cold int pal75bars_init(AVFilterContext *ctx)
+{
+    TestSourceContext *test = ctx->priv;
+
+    test->fill_picture_fn = pal75bars_fill_picture;
+    test->draw_once = 1;
+    return init(ctx);
+}
+
+AVFilter ff_vsrc_pal75bars = {
+    .name          = "pal75bars",
+    .description   = NULL_IF_CONFIG_SMALL("Generate PAL 75% color bars."),
+    .priv_size     = sizeof(TestSourceContext),
+    .priv_class    = &pal75bars_class,
+    .init          = pal75bars_init,
+    .uninit        = uninit,
+    .query_formats = smptebars_query_formats,
+    .inputs        = NULL,
+    .outputs       = smptebars_outputs,
+};
+
+#endif  /* CONFIG_PAL75BARS_FILTER */
+
+#if CONFIG_PAL100BARS_FILTER
+
+#define pal100bars_options options
+AVFILTER_DEFINE_CLASS(pal100bars);
+
+static void pal100bars_fill_picture(AVFilterContext *ctx, AVFrame *picref)
+{
+    TestSourceContext *test = ctx->priv;
+    int r_w, i, x = 0;
+    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(picref->format);
+
+    picref->color_range = AVCOL_RANGE_MPEG;
+    picref->colorspace = AVCOL_SPC_BT470BG;
+
+    r_w = FFALIGN((test->w + 7) / 8, 1 << pixdesc->log2_chroma_w);
+
+    for (i = 0; i < 7; i++) {
+        draw_bar(test, rainbow100[i], x, 0, r_w, test->h, picref);
+        x += r_w;
+    }
+    draw_bar(test, black0, x, 0, r_w, test->h, picref);
+}
+
+static av_cold int pal100bars_init(AVFilterContext *ctx)
+{
+    TestSourceContext *test = ctx->priv;
+
+    test->fill_picture_fn = pal100bars_fill_picture;
+    test->draw_once = 1;
+    return init(ctx);
+}
+
+AVFilter ff_vsrc_pal100bars = {
+    .name          = "pal100bars",
+    .description   = NULL_IF_CONFIG_SMALL("Generate PAL 100% color bars."),
+    .priv_size     = sizeof(TestSourceContext),
+    .priv_class    = &pal100bars_class,
+    .init          = pal100bars_init,
+    .uninit        = uninit,
+    .query_formats = smptebars_query_formats,
+    .inputs        = NULL,
+    .outputs       = smptebars_outputs,
+};
+
+#endif  /* CONFIG_PAL100BARS_FILTER */
+
 #if CONFIG_SMPTEBARS_FILTER
 
 #define smptebars_options options

diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index 3431625..b484c8b 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile

@@ -5,12 +5,15 @@
 OBJS-$(CONFIG_EQ_FILTER)                     += x86/vf_eq.o
 OBJS-$(CONFIG_FSPP_FILTER)                   += x86/vf_fspp_init.o
 OBJS-$(CONFIG_GRADFUN_FILTER)                += x86/vf_gradfun_init.o
+OBJS-$(CONFIG_FRAMERATE_FILTER)              += x86/vf_framerate_init.o
+OBJS-$(CONFIG_HFLIP_FILTER)                  += x86/vf_hflip_init.o
 OBJS-$(CONFIG_HQDN3D_FILTER)                 += x86/vf_hqdn3d_init.o
 OBJS-$(CONFIG_IDET_FILTER)                   += x86/vf_idet_init.o
-OBJS-$(CONFIG_INTERLACE_FILTER)              += x86/vf_interlace_init.o
+OBJS-$(CONFIG_INTERLACE_FILTER)              += x86/vf_tinterlace_init.o
 OBJS-$(CONFIG_LIMITER_FILTER)                += x86/vf_limiter_init.o
 OBJS-$(CONFIG_MASKEDMERGE_FILTER)            += x86/vf_maskedmerge_init.o
 OBJS-$(CONFIG_NOISE_FILTER)                  += x86/vf_noise.o
+OBJS-$(CONFIG_OVERLAY_FILTER)                += x86/vf_overlay_init.o
 OBJS-$(CONFIG_PP7_FILTER)                    += x86/vf_pp7_init.o
 OBJS-$(CONFIG_PSNR_FILTER)                   += x86/vf_psnr_init.o
 OBJS-$(CONFIG_PULLUP_FILTER)                 += x86/vf_pullup_init.o
@@ -20,6 +23,7 @@
 OBJS-$(CONFIG_SSIM_FILTER)                   += x86/vf_ssim_init.o
 OBJS-$(CONFIG_STEREO3D_FILTER)               += x86/vf_stereo3d_init.o
 OBJS-$(CONFIG_TBLEND_FILTER)                 += x86/vf_blend_init.o
+OBJS-$(CONFIG_THRESHOLD_FILTER)              += x86/vf_threshold_init.o
 OBJS-$(CONFIG_TINTERLACE_FILTER)             += x86/vf_tinterlace_init.o
 OBJS-$(CONFIG_VOLUME_FILTER)                 += x86/af_volume_init.o
 OBJS-$(CONFIG_W3FDIF_FILTER)                 += x86/vf_w3fdif_init.o
@@ -29,13 +33,16 @@
 X86ASM-OBJS-$(CONFIG_BLEND_FILTER)           += x86/vf_blend.o
 X86ASM-OBJS-$(CONFIG_BWDIF_FILTER)           += x86/vf_bwdif.o
 X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER)      += x86/colorspacedsp.o
+X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER)       += x86/vf_framerate.o
 X86ASM-OBJS-$(CONFIG_FSPP_FILTER)            += x86/vf_fspp.o
 X86ASM-OBJS-$(CONFIG_GRADFUN_FILTER)         += x86/vf_gradfun.o
+X86ASM-OBJS-$(CONFIG_HFLIP_FILTER)           += x86/vf_hflip.o
 X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER)          += x86/vf_hqdn3d.o
 X86ASM-OBJS-$(CONFIG_IDET_FILTER)            += x86/vf_idet.o
 X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER)       += x86/vf_interlace.o
 X86ASM-OBJS-$(CONFIG_LIMITER_FILTER)         += x86/vf_limiter.o
 X86ASM-OBJS-$(CONFIG_MASKEDMERGE_FILTER)     += x86/vf_maskedmerge.o
+X86ASM-OBJS-$(CONFIG_OVERLAY_FILTER)         += x86/vf_overlay.o
 X86ASM-OBJS-$(CONFIG_PP7_FILTER)             += x86/vf_pp7.o
 X86ASM-OBJS-$(CONFIG_PSNR_FILTER)            += x86/vf_psnr.o
 X86ASM-OBJS-$(CONFIG_PULLUP_FILTER)          += x86/vf_pullup.o
@@ -46,6 +53,7 @@
 X86ASM-OBJS-$(CONFIG_SSIM_FILTER)            += x86/vf_ssim.o
 X86ASM-OBJS-$(CONFIG_STEREO3D_FILTER)        += x86/vf_stereo3d.o
 X86ASM-OBJS-$(CONFIG_TBLEND_FILTER)          += x86/vf_blend.o
+X86ASM-OBJS-$(CONFIG_THRESHOLD_FILTER)       += x86/vf_threshold.o
 X86ASM-OBJS-$(CONFIG_TINTERLACE_FILTER)      += x86/vf_interlace.o
 X86ASM-OBJS-$(CONFIG_VOLUME_FILTER)          += x86/af_volume.o
 X86ASM-OBJS-$(CONFIG_W3FDIF_FILTER)          += x86/vf_w3fdif.o

diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index 4916aaf..251bbb5 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm

@@ -2,6 +2,8 @@
 ;* x86-optimized functions for blend filter
 ;*
 ;* Copyright (C) 2015 Paul B Mahol
+;* Copyright (C) 2018 Henrik Gramner
+;* Copyright (C) 2018 Jokyo Images
 ;*
 ;* This file is part of FFmpeg.
 ;*
@@ -25,6 +27,8 @@
 SECTION_RODATA
 
 ps_255: times 4 dd 255.0
+pd_32768 : times 4 dd 32768
+pd_65535 : times 4 dd 65535
 pw_1:   times 8 dw 1
 pw_128: times 8 dw 128
 pw_255: times 8 dw 255
@@ -34,10 +38,13 @@
 
 SECTION .text
 
-%macro BLEND_INIT 2
+%macro BLEND_INIT 2-3
 %if ARCH_X86_64
 cglobal blend_%1, 6, 9, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, end, x
     mov    widthd, dword widthm
+    %if %0 == 3; is 16 bit
+        add    widthq, widthq ; doesn't compile on x86_32
+    %endif
 %else
 cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end, x
 %define dst_linesizeq r5mp
@@ -59,8 +66,8 @@
 REP_RET
 %endmacro
 
-%macro BLEND_SIMPLE 2
-BLEND_INIT %1, 2
+%macro BLEND_SIMPLE 2-3
+BLEND_INIT %1, 2, %3
 .nextrow:
     mov        xq, widthq
 
@@ -74,39 +81,43 @@
 BLEND_END
 %endmacro
 
-INIT_XMM sse2
-BLEND_SIMPLE xor,      xor
-BLEND_SIMPLE or,       or
-BLEND_SIMPLE and,      and
-BLEND_SIMPLE addition, addusb
-BLEND_SIMPLE subtract, subusb
-BLEND_SIMPLE darken,   minub
-BLEND_SIMPLE lighten,  maxub
-
-BLEND_INIT grainextract, 4
-    pxor       m2, m2
-    mova       m3, [pw_128]
+; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
+%macro GRAINEXTRACT 3-4
+BLEND_INIT %1, 6, %4
+    pxor           m4, m4
+%if %0 == 4 ; 16 bit
+    VBROADCASTI128 m5, [pd_32768]
+%else
+    VBROADCASTI128 m5, [pw_128]
+%endif
 .nextrow:
     mov        xq, widthq
-
     .loop:
-        movh            m0, [topq + xq]
-        movh            m1, [bottomq + xq]
-        punpcklbw       m0, m2
-        punpcklbw       m1, m2
-        paddw           m0, m3
-        psubw           m0, m1
-        packuswb        m0, m0
-        movh   [dstq + xq], m0
-        add             xq, mmsize / 2
+        movu           m1, [topq + xq]
+        movu           m3, [bottomq + xq]
+
+        punpckl%2%3      m0, m1, m4
+        punpckh%2%3      m1, m4
+        punpckl%2%3      m2, m3, m4
+        punpckh%2%3      m3, m4
+
+        padd%3          m0, m5
+        padd%3          m1, m5
+        psub%3          m0, m2
+        psub%3          m1, m3
+
+        packus%3%2       m0, m1
+
+        mova  [dstq + xq], m0
+        add            xq, mmsize
     jl .loop
 BLEND_END
+%endmacro
 
 %macro MULTIPLY 3 ; a, b, pw_1
     pmullw          %1, %2               ; xxxxxxxx  a * b
     paddw           %1, %3
-    mova            %2, %1
-    psrlw           %2, 8
+    psrlw           %2, %1, 8
     paddw           %1, %2
     psrlw           %1, 8                ; 00xx00xx  a * b / 255
 %endmacro
@@ -118,92 +129,118 @@
     pxor            %1, %4               ; 00xx00xx  255 - x / 255
 %endmacro
 
-BLEND_INIT multiply, 4
-    pxor       m2, m2
-    mova       m3, [pw_1]
+%macro BLEND_MULTIPLY 0
+BLEND_INIT multiply, 6
+    pxor       m4, m4
+    VBROADCASTI128       m5, [pw_1]
 .nextrow:
     mov        xq, widthq
 
     .loop:
-                                             ;     word
-                                             ;     |--|
-        movh            m0, [topq + xq]      ; 0000xxxx
-        movh            m1, [bottomq + xq]
-        punpcklbw       m0, m2               ; 00xx00xx
-        punpcklbw       m1, m2
+        movu           m1, [topq + xq]
+        movu           m3, [bottomq + xq]
+        punpcklbw      m0, m1, m4
+        punpckhbw      m1, m4
+        punpcklbw      m2, m3, m4
+        punpckhbw      m3, m4
 
-        MULTIPLY        m0, m1, m3
+        MULTIPLY        m0, m2, m5
+        MULTIPLY        m1, m3, m5
 
-        packuswb        m0, m0               ; 0000xxxx
-        movh   [dstq + xq], m0
-        add             xq, mmsize / 2
-
+        packuswb       m0, m1
+        mova  [dstq + xq], m0
+        add            xq, mmsize
     jl .loop
 BLEND_END
+%endmacro
 
-BLEND_INIT screen, 5
-    pxor       m2, m2
-    mova       m3, [pw_1]
-    mova       m4, [pw_255]
+%macro BLEND_SCREEN 0
+BLEND_INIT screen, 7
+    pxor       m4, m4
+
+    VBROADCASTI128       m5, [pw_1]
+    VBROADCASTI128       m6, [pw_255]
 .nextrow:
     mov        xq, widthq
 
     .loop:
-        movh            m0, [topq + xq]      ; 0000xxxx
-        movh            m1, [bottomq + xq]
-        punpcklbw       m0, m2               ; 00xx00xx
-        punpcklbw       m1, m2
+        movu           m1, [topq + xq]
+        movu           m3, [bottomq + xq]
+        punpcklbw      m0, m1, m4
+        punpckhbw      m1, m4
+        punpcklbw      m2, m3, m4
+        punpckhbw      m3, m4
 
-        SCREEN          m0, m1, m3, m4
+        SCREEN          m0, m2, m5, m6
+        SCREEN          m1, m3, m5, m6
 
-        packuswb        m0, m0               ; 0000xxxx
-        movh   [dstq + xq], m0
-        add             xq, mmsize / 2
-
+        packuswb       m0, m1
+        mova  [dstq + xq], m0
+        add            xq, mmsize
     jl .loop
 BLEND_END
+%endmacro
 
-BLEND_INIT average, 3
-    pxor       m2, m2
+;%1 name, %2 (b or w), %3 (set if 16 bit)
+%macro AVERAGE 2-3
+BLEND_INIT %1, 3, %3
+    pcmpeqb        m2, m2
+
+.nextrow:
+    mov        xq, widthq
+
+.loop:
+    movu           m0, [topq + xq]
+    movu           m1, [bottomq + xq]
+    pxor           m0, m2
+    pxor           m1, m2
+    pavg%2         m0, m1
+    pxor           m0, m2
+    mova  [dstq + xq], m0
+    add            xq, mmsize
+    jl .loop
+BLEND_END
+%endmacro
+
+; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
+%macro GRAINMERGE 3-4
+BLEND_INIT %1, 6, %4
+    pxor       m4, m4
+%if %0 == 4 ; 16 bit
+    VBROADCASTI128       m5, [pd_32768]
+%else
+    VBROADCASTI128       m5, [pw_128]
+%endif
 .nextrow:
     mov        xq, widthq
 
     .loop:
-        movh            m0, [topq + xq]
-        movh            m1, [bottomq + xq]
-        punpcklbw       m0, m2
-        punpcklbw       m1, m2
-        paddw           m0, m1
-        psrlw           m0, 1
-        packuswb        m0, m0
-        movh   [dstq + xq], m0
-        add             xq, mmsize / 2
+        movu           m1, [topq + xq]
+        movu           m3, [bottomq + xq]
+
+        punpckl%2%3    m0, m1, m4
+        punpckh%2%3    m1, m4
+        punpckl%2%3    m2, m3, m4
+        punpckh%2%3    m3, m4
+
+        padd%3         m0, m2
+        padd%3         m1, m3
+        psub%3         m0, m5
+        psub%3         m1, m5
+
+        packus%3%2     m0, m1
+
+        mova  [dstq + xq], m0
+        add            xq, mmsize
     jl .loop
 BLEND_END
+%endmacro
 
-BLEND_INIT grainmerge, 4
-    pxor       m2, m2
-    mova       m3, [pw_128]
-.nextrow:
-    mov        xq, widthq
-
-    .loop:
-        movh            m0, [topq + xq]
-        movh            m1, [bottomq + xq]
-        punpcklbw       m0, m2
-        punpcklbw       m1, m2
-        paddw           m0, m1
-        psubw           m0, m3
-        packuswb        m0, m0
-        movh   [dstq + xq], m0
-        add             xq, mmsize / 2
-    jl .loop
-BLEND_END
-
+%macro HARDMIX 0
 BLEND_INIT hardmix, 5
-    mova       m2, [pb_255]
-    mova       m3, [pb_128]
-    mova       m4, [pb_127]
+    VBROADCASTI128       m2, [pb_255]
+    VBROADCASTI128       m3, [pb_128]
+    VBROADCASTI128       m4, [pb_127]
 .nextrow:
     mov        xq, widthq
 
@@ -218,7 +255,9 @@
         add             xq, mmsize
     jl .loop
 BLEND_END
+%endmacro
 
+%macro DIVIDE 0
 BLEND_INIT divide, 4
     pxor       m2, m2
     mova       m3, [ps_255]
@@ -247,9 +286,12 @@
 
     jl .loop
 BLEND_END
+%endmacro
 
-BLEND_INIT phoenix, 4
-    mova       m3, [pb_255]
+%macro PHOENIX 2-3
+; %1 name, %2 b or w, %3 (opt) 1 if 16 bit
+BLEND_INIT %1, 4, %3
+    VBROADCASTI128       m3, [pb_255]
 .nextrow:
     mov        xq, widthq
 
@@ -257,18 +299,20 @@
         movu            m0, [topq + xq]
         movu            m1, [bottomq + xq]
         mova            m2, m0
-        pminub          m0, m1
-        pmaxub          m1, m2
+        pminu%2         m0, m1
+        pmaxu%2         m1, m2
         mova            m2, m3
-        psubusb         m2, m1
-        paddusb         m2, m0
+        psubus%2        m2, m1
+        paddus%2        m2, m0
         mova   [dstq + xq], m2
         add             xq, mmsize
     jl .loop
 BLEND_END
+%endmacro
 
-%macro BLEND_ABS 0
-BLEND_INIT difference, 5
+; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
+%macro DIFFERENCE 3-4
+BLEND_INIT %1, 5, %4
     pxor       m2, m2
 .nextrow:
     mov        xq, widthq
@@ -276,64 +320,92 @@
     .loop:
         movu            m0, [topq + xq]
         movu            m1, [bottomq + xq]
-        punpckhbw       m3, m0, m2
-        punpcklbw       m0, m2
-        punpckhbw       m4, m1, m2
-        punpcklbw       m1, m2
-        psubw           m0, m1
-        psubw           m3, m4
+        punpckh%2%3     m3, m0, m2
+        punpckl%2%3     m0, m2
+        punpckh%2%3     m4, m1, m2
+        punpckl%2%3     m1, m2
+        psub%3          m0, m1
+        psub%3          m3, m4
+%if %0 == 4; 16 bit
+        pabsd           m0, m0
+        pabsd           m3, m3
+%else
         ABS2            m0, m3, m1, m4
-        packuswb        m0, m3
+%endif
+        packus%3%2      m0, m3
         mova   [dstq + xq], m0
         add             xq, mmsize
     jl .loop
 BLEND_END
+%endmacro
 
-BLEND_INIT extremity, 8
+; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
+%macro EXTREMITY 3-4
+BLEND_INIT %1, 8, %4
     pxor       m2, m2
-    mova       m4, [pw_255]
+%if %0 == 4; 16 bit
+    VBROADCASTI128       m4, [pd_65535]
+%else
+    VBROADCASTI128       m4, [pw_255]
+%endif
 .nextrow:
     mov        xq, widthq
 
     .loop:
         movu            m0, [topq + xq]
         movu            m1, [bottomq + xq]
-        punpckhbw       m5, m0, m2
-        punpcklbw       m0, m2
-        punpckhbw       m6, m1, m2
-        punpcklbw       m1, m2
-        psubw           m3, m4, m0
-        psubw           m7, m4, m5
-        psubw           m3, m1
-        psubw           m7, m6
+        punpckh%2%3     m5, m0, m2
+        punpckl%2%3     m0, m2
+        punpckh%2%3     m6, m1, m2
+        punpckl%2%3     m1, m2
+        psub%3          m3, m4, m0
+        psub%3          m7, m4, m5
+        psub%3          m3, m1
+        psub%3          m7, m6
+%if %0 == 4; 16 bit
+        pabsd           m3, m3
+        pabsd           m7, m7
+%else
         ABS2            m3, m7, m1, m6
-        packuswb        m3, m7
+%endif
+        packus%3%2      m3, m7
         mova   [dstq + xq], m3
         add             xq, mmsize
     jl .loop
 BLEND_END
+%endmacro
 
-BLEND_INIT negation, 8
+%macro NEGATION 3-4
+BLEND_INIT %1, 8, %4
     pxor       m2, m2
-    mova       m4, [pw_255]
+%if %0 == 4; 16 bit
+    VBROADCASTI128       m4, [pd_65535]
+%else
+    VBROADCASTI128       m4, [pw_255]
+%endif
 .nextrow:
     mov        xq, widthq
 
     .loop:
         movu            m0, [topq + xq]
         movu            m1, [bottomq + xq]
-        punpckhbw       m5, m0, m2
-        punpcklbw       m0, m2
-        punpckhbw       m6, m1, m2
-        punpcklbw       m1, m2
-        psubw           m3, m4, m0
-        psubw           m7, m4, m5
-        psubw           m3, m1
-        psubw           m7, m6
+        punpckh%2%3     m5, m0, m2
+        punpckl%2%3     m0, m2
+        punpckh%2%3     m6, m1, m2
+        punpckl%2%3     m1, m2
+        psub%3          m3, m4, m0
+        psub%3          m7, m4, m5
+        psub%3          m3, m1
+        psub%3          m7, m6
+%if %0 == 4; 16 bit
+        pabsd           m3, m3
+        pabsd           m7, m7
+%else
         ABS2            m3, m7, m1, m6
-        psubw           m0, m4, m3
-        psubw           m1, m4, m7
-        packuswb        m0, m1
+%endif
+        psub%3          m0, m4, m3
+        psub%3          m1, m4, m7
+        packus%3%2      m0, m1
         mova   [dstq + xq], m0
         add             xq, mmsize
     jl .loop
@@ -341,6 +413,86 @@
 %endmacro
 
 INIT_XMM sse2
-BLEND_ABS
+BLEND_SIMPLE xor,      xor
+BLEND_SIMPLE or,       or
+BLEND_SIMPLE and,      and
+BLEND_SIMPLE addition, addusb
+BLEND_SIMPLE subtract, subusb
+BLEND_SIMPLE darken,   minub
+BLEND_SIMPLE lighten,  maxub
+GRAINEXTRACT grainextract, b, w
+BLEND_MULTIPLY
+BLEND_SCREEN
+AVERAGE       average,    b
+GRAINMERGE    grainmerge, b, w
+HARDMIX
+PHOENIX phoenix, b
+DIFFERENCE difference, b, w
+DIVIDE
+EXTREMITY extremity, b, w
+NEGATION negation, b, w
+
+%if ARCH_X86_64
+BLEND_SIMPLE addition_16, addusw, 1
+BLEND_SIMPLE and_16,      and,    1
+BLEND_SIMPLE or_16,       or,     1
+BLEND_SIMPLE subtract_16, subusw, 1
+BLEND_SIMPLE xor_16,      xor,    1
+AVERAGE      average_16,  w,      1
+%endif
+
 INIT_XMM ssse3
-BLEND_ABS
+DIFFERENCE difference, b, w
+EXTREMITY extremity, b, w
+NEGATION negation, b, w
+
+INIT_XMM sse4
+%if ARCH_X86_64
+BLEND_SIMPLE darken_16,   minuw, 1
+BLEND_SIMPLE lighten_16,  maxuw, 1
+GRAINEXTRACT grainextract_16, w, d, 1
+GRAINMERGE   grainmerge_16, w, d, 1
+PHOENIX      phoenix_16,      w, 1
+DIFFERENCE   difference_16, w, d, 1
+EXTREMITY    extremity_16, w, d, 1
+NEGATION     negation_16, w, d, 1
+%endif
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+BLEND_SIMPLE xor,      xor
+BLEND_SIMPLE or,       or
+BLEND_SIMPLE and,      and
+BLEND_SIMPLE addition, addusb
+BLEND_SIMPLE subtract, subusb
+BLEND_SIMPLE darken,   minub
+BLEND_SIMPLE lighten,  maxub
+GRAINEXTRACT grainextract, b, w
+BLEND_MULTIPLY
+BLEND_SCREEN
+AVERAGE    average,    b
+GRAINMERGE grainmerge, b, w
+HARDMIX
+PHOENIX phoenix, b
+
+DIFFERENCE difference, b, w
+EXTREMITY extremity, b, w
+NEGATION negation, b, w
+
+%if ARCH_X86_64
+BLEND_SIMPLE addition_16, addusw, 1
+BLEND_SIMPLE and_16,      and,    1
+BLEND_SIMPLE darken_16,   minuw,  1
+BLEND_SIMPLE lighten_16,  maxuw,  1
+BLEND_SIMPLE or_16,       or,     1
+BLEND_SIMPLE subtract_16, subusw, 1
+BLEND_SIMPLE xor_16,      xor,    1
+GRAINEXTRACT grainextract_16, w, d, 1
+AVERAGE      average_16,  w,      1
+GRAINMERGE   grainmerge_16, w, d, 1
+PHOENIX      phoenix_16,       w, 1
+DIFFERENCE   difference_16, w, d, 1
+EXTREMITY    extremity_16, w, d, 1
+NEGATION     negation_16, w, d, 1
+%endif
+%endif

diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index a4fc9af..acf2855 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c

@@ -31,58 +31,173 @@
                              struct FilterParams *param, double *values, int starty);
 
 BLEND_FUNC(addition, sse2)
+BLEND_FUNC(addition, avx2)
 BLEND_FUNC(grainmerge, sse2)
+BLEND_FUNC(grainmerge, avx2)
 BLEND_FUNC(average, sse2)
+BLEND_FUNC(average, avx2)
 BLEND_FUNC(and, sse2)
+BLEND_FUNC(and, avx2)
 BLEND_FUNC(darken, sse2)
+BLEND_FUNC(darken, avx2)
 BLEND_FUNC(grainextract, sse2)
+BLEND_FUNC(grainextract, avx2)
 BLEND_FUNC(multiply, sse2)
+BLEND_FUNC(multiply, avx2)
 BLEND_FUNC(screen, sse2)
+BLEND_FUNC(screen, avx2)
 BLEND_FUNC(hardmix, sse2)
+BLEND_FUNC(hardmix, avx2)
 BLEND_FUNC(divide, sse2)
 BLEND_FUNC(lighten, sse2)
+BLEND_FUNC(lighten, avx2)
 BLEND_FUNC(or, sse2)
+BLEND_FUNC(or, avx2)
 BLEND_FUNC(phoenix, sse2)
+BLEND_FUNC(phoenix, avx2)
 BLEND_FUNC(subtract, sse2)
+BLEND_FUNC(subtract, avx2)
 BLEND_FUNC(xor, sse2)
+BLEND_FUNC(xor, avx2)
 BLEND_FUNC(difference, sse2)
 BLEND_FUNC(difference, ssse3)
+BLEND_FUNC(difference, avx2)
 BLEND_FUNC(extremity, sse2)
 BLEND_FUNC(extremity, ssse3)
+BLEND_FUNC(extremity, avx2)
 BLEND_FUNC(negation, sse2)
 BLEND_FUNC(negation, ssse3)
+BLEND_FUNC(negation, avx2)
+
+#if ARCH_X86_64
+BLEND_FUNC(addition_16, sse2)
+BLEND_FUNC(addition_16, avx2)
+BLEND_FUNC(grainmerge_16, sse4)
+BLEND_FUNC(grainmerge_16, avx2)
+BLEND_FUNC(average_16, sse2)
+BLEND_FUNC(average_16, avx2)
+BLEND_FUNC(and_16, sse2)
+BLEND_FUNC(and_16, avx2)
+BLEND_FUNC(darken_16, sse4)
+BLEND_FUNC(darken_16, avx2)
+BLEND_FUNC(grainextract_16, sse4)
+BLEND_FUNC(grainextract_16, avx2)
+BLEND_FUNC(difference_16, sse4)
+BLEND_FUNC(difference_16, avx2)
+BLEND_FUNC(extremity_16, sse4)
+BLEND_FUNC(extremity_16, avx2)
+BLEND_FUNC(negation_16, sse4)
+BLEND_FUNC(negation_16, avx2)
+BLEND_FUNC(lighten_16, sse4)
+BLEND_FUNC(lighten_16, avx2)
+BLEND_FUNC(or_16, sse2)
+BLEND_FUNC(or_16, avx2)
+BLEND_FUNC(phoenix_16, sse4)
+BLEND_FUNC(phoenix_16, avx2)
+BLEND_FUNC(subtract_16, sse2)
+BLEND_FUNC(subtract_16, avx2)
+BLEND_FUNC(xor_16, sse2)
+BLEND_FUNC(xor_16, avx2)
+#endif /* ARCH_X86_64 */
 
 av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1 && !is_16bit) {
-        switch (param->mode) {
-        case BLEND_ADDITION: param->blend = ff_blend_addition_sse2; break;
-        case BLEND_GRAINMERGE: param->blend = ff_blend_grainmerge_sse2; break;
-        case BLEND_AND:      param->blend = ff_blend_and_sse2;      break;
-        case BLEND_AVERAGE:  param->blend = ff_blend_average_sse2;  break;
-        case BLEND_DARKEN:   param->blend = ff_blend_darken_sse2;   break;
-        case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_sse2; break;
-        case BLEND_DIVIDE:   param->blend = ff_blend_divide_sse2;   break;
-        case BLEND_HARDMIX:  param->blend = ff_blend_hardmix_sse2;  break;
-        case BLEND_LIGHTEN:  param->blend = ff_blend_lighten_sse2;  break;
-        case BLEND_MULTIPLY: param->blend = ff_blend_multiply_sse2; break;
-        case BLEND_OR:       param->blend = ff_blend_or_sse2;       break;
-        case BLEND_PHOENIX:  param->blend = ff_blend_phoenix_sse2;  break;
-        case BLEND_SCREEN:   param->blend = ff_blend_screen_sse2;   break;
-        case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
-        case BLEND_XOR:      param->blend = ff_blend_xor_sse2;      break;
-        case BLEND_DIFFERENCE: param->blend = ff_blend_difference_sse2; break;
-        case BLEND_EXTREMITY:  param->blend = ff_blend_extremity_sse2;  break;
-        case BLEND_NEGATION:   param->blend = ff_blend_negation_sse2;   break;
+    if (!is_16bit) {
+        if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1) {
+            switch (param->mode) {
+            case BLEND_ADDITION:     param->blend = ff_blend_addition_sse2;     break;
+            case BLEND_GRAINMERGE:   param->blend = ff_blend_grainmerge_sse2;   break;
+            case BLEND_AND:          param->blend = ff_blend_and_sse2;          break;
+            case BLEND_AVERAGE:      param->blend = ff_blend_average_sse2;      break;
+            case BLEND_DARKEN:       param->blend = ff_blend_darken_sse2;       break;
+            case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_sse2; break;
+            case BLEND_DIVIDE:       param->blend = ff_blend_divide_sse2;       break;
+            case BLEND_HARDMIX:      param->blend = ff_blend_hardmix_sse2;      break;
+            case BLEND_LIGHTEN:      param->blend = ff_blend_lighten_sse2;      break;
+            case BLEND_MULTIPLY:     param->blend = ff_blend_multiply_sse2;     break;
+            case BLEND_OR:           param->blend = ff_blend_or_sse2;           break;
+            case BLEND_PHOENIX:      param->blend = ff_blend_phoenix_sse2;      break;
+            case BLEND_SCREEN:       param->blend = ff_blend_screen_sse2;       break;
+            case BLEND_SUBTRACT:     param->blend = ff_blend_subtract_sse2;     break;
+            case BLEND_XOR:          param->blend = ff_blend_xor_sse2;          break;
+            case BLEND_DIFFERENCE:   param->blend = ff_blend_difference_sse2;   break;
+            case BLEND_EXTREMITY:    param->blend = ff_blend_extremity_sse2;    break;
+            case BLEND_NEGATION:     param->blend = ff_blend_negation_sse2;     break;
+            }
         }
-    }
-    if (EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) {
-        switch (param->mode) {
-        case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break;
-        case BLEND_EXTREMITY:  param->blend = ff_blend_extremity_ssse3;  break;
-        case BLEND_NEGATION:   param->blend = ff_blend_negation_ssse3;   break;
+        if (EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1) {
+            switch (param->mode) {
+            case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break;
+            case BLEND_EXTREMITY:  param->blend = ff_blend_extremity_ssse3;  break;
+            case BLEND_NEGATION:   param->blend = ff_blend_negation_ssse3;   break;
+            }
         }
+
+        if (EXTERNAL_AVX2_FAST(cpu_flags) && param->opacity == 1) {
+            switch (param->mode) {
+            case BLEND_ADDITION:     param->blend = ff_blend_addition_avx2;     break;
+            case BLEND_GRAINMERGE:   param->blend = ff_blend_grainmerge_avx2;   break;
+            case BLEND_AND:          param->blend = ff_blend_and_avx2;          break;
+            case BLEND_AVERAGE:      param->blend = ff_blend_average_avx2;      break;
+            case BLEND_DARKEN:       param->blend = ff_blend_darken_avx2;       break;
+            case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_avx2; break;
+            case BLEND_HARDMIX:      param->blend = ff_blend_hardmix_avx2;      break;
+            case BLEND_LIGHTEN:      param->blend = ff_blend_lighten_avx2;      break;
+            case BLEND_MULTIPLY:     param->blend = ff_blend_multiply_avx2;     break;
+            case BLEND_OR:           param->blend = ff_blend_or_avx2;           break;
+            case BLEND_PHOENIX:      param->blend = ff_blend_phoenix_avx2;      break;
+            case BLEND_SCREEN:       param->blend = ff_blend_screen_avx2;       break;
+            case BLEND_SUBTRACT:     param->blend = ff_blend_subtract_avx2;     break;
+            case BLEND_XOR:          param->blend = ff_blend_xor_avx2;          break;
+            case BLEND_DIFFERENCE:   param->blend = ff_blend_difference_avx2;   break;
+            case BLEND_EXTREMITY:    param->blend = ff_blend_extremity_avx2;    break;
+            case BLEND_NEGATION:     param->blend = ff_blend_negation_avx2;     break;
+            }
+        }
+    } else { /* is_16_bit */
+#if ARCH_X86_64
+        if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1) {
+            switch (param->mode) {
+            case BLEND_ADDITION: param->blend = ff_blend_addition_16_sse2; break;
+            case BLEND_AND:      param->blend = ff_blend_and_16_sse2;      break;
+            case BLEND_AVERAGE:  param->blend = ff_blend_average_16_sse2;  break;
+            case BLEND_OR:       param->blend = ff_blend_or_16_sse2;       break;
+            case BLEND_SUBTRACT: param->blend = ff_blend_subtract_16_sse2; break;
+            case BLEND_XOR:      param->blend = ff_blend_xor_16_sse2;      break;
+            }
+        }
+        if (EXTERNAL_SSE4(cpu_flags) && param->opacity == 1) {
+            switch (param->mode) {
+            case BLEND_GRAINMERGE: param->blend = ff_blend_grainmerge_16_sse4; break;
+            case BLEND_DARKEN:   param->blend = ff_blend_darken_16_sse4;     break;
+            case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_16_sse4; break;
+            case BLEND_DIFFERENCE: param->blend = ff_blend_difference_16_sse4; break;
+            case BLEND_EXTREMITY:  param->blend = ff_blend_extremity_16_sse4;    break;
+            case BLEND_NEGATION:  param->blend = ff_blend_negation_16_sse4;     break;
+            case BLEND_LIGHTEN:  param->blend = ff_blend_lighten_16_sse4;    break;
+            case BLEND_PHOENIX:  param->blend = ff_blend_phoenix_16_sse4;    break;
+            }
+        }
+        if (EXTERNAL_AVX2_FAST(cpu_flags) && param->opacity == 1) {
+            switch (param->mode) {
+            case BLEND_ADDITION: param->blend = ff_blend_addition_16_avx2; break;
+            case BLEND_GRAINMERGE: param->blend = ff_blend_grainmerge_16_avx2;   break;
+            case BLEND_AND:      param->blend = ff_blend_and_16_avx2;      break;
+            case BLEND_AVERAGE:  param->blend = ff_blend_average_16_avx2;  break;
+            case BLEND_DARKEN:   param->blend = ff_blend_darken_16_avx2;   break;
+            case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_16_avx2; break;
+            case BLEND_DIFFERENCE: param->blend = ff_blend_difference_16_avx2; break;
+            case BLEND_EXTREMITY:  param->blend = ff_blend_extremity_16_avx2;    break;
+            case BLEND_NEGATION:  param->blend = ff_blend_negation_16_avx2;     break;
+            case BLEND_LIGHTEN:  param->blend = ff_blend_lighten_16_avx2;  break;
+            case BLEND_OR:       param->blend = ff_blend_or_16_avx2;       break;
+            case BLEND_PHOENIX:  param->blend = ff_blend_phoenix_16_avx2;  break;
+            case BLEND_SUBTRACT: param->blend = ff_blend_subtract_16_avx2; break;
+            case BLEND_XOR:      param->blend = ff_blend_xor_16_avx2;      break;
+            }
+        }
+#endif /* ARCH_X86_64 */
     }
 }

diff --git a/libavfilter/x86/vf_framerate.asm b/libavfilter/x86/vf_framerate.asm
new file mode 100644
index 0000000..7a30c87
--- /dev/null
+++ b/libavfilter/x86/vf_framerate.asm

@@ -0,0 +1,134 @@
+;*****************************************************************************
+;* x86-optimized functions for framerate filter
+;*
+;* Copyright (C) 2018 Marton Balint
+;*
+;* Based on vf_blend.asm, Copyright (C) 2015 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+
+%macro XSPLAT 3
+%if cpuflag(avx2)
+    vpbroadcast%3  %1, %2
+%else
+    movd           %1, %2
+%ifidn %3, d
+    SPLATD         %1
+%else
+    SPLATW         %1, %1
+%endif
+%endif
+%endmacro
+
+
+%macro BLEND_INIT 0-1
+%if ARCH_X86_64
+cglobal blend_frames%1, 6, 9, 5, src1, src1_linesize, src2, src2_linesize, dst, dst_linesize, width, end, x
+    mov    widthd, dword widthm
+%else
+cglobal blend_frames%1, 5, 7, 5, src1, src1_linesize, src2, src2_linesize, dst, end, x
+%define dst_linesizeq r5mp
+%define widthq r6mp
+%endif
+    mov      endd, dword r7m
+    add     src1q, widthq
+    add     src2q, widthq
+    add      dstq, widthq
+    neg    widthq
+%endmacro
+
+
+%macro BLEND_LOOP 4
+.nextrow:
+    mov        xq, widthq
+
+    .loop:
+        movu            m0, [src1q + xq]
+        movu            m1, [src2q + xq]
+        SBUTTERFLY    %1%2, 0, 1, 4         ; aAbBcCdD
+                                            ; eEfFgGhH
+        pmadd%3         m0, m2
+        pmadd%3         m1, m2
+
+        padd%2          m0, m3
+        padd%2          m1, m3
+        psrl%2          m0, %4              ; 0A0B0C0D
+        psrl%2          m1, %4              ; 0E0F0G0H
+
+        packus%2%1      m0, m1              ; ABCDEFGH
+        movu   [dstq + xq], m0
+        add             xq, mmsize
+    jl .loop
+    add     src1q, src1_linesizeq
+    add     src2q, src2_linesizeq
+    add      dstq, dst_linesizeq
+    sub      endd, 1
+    jg .nextrow
+REP_RET
+%endmacro
+
+
+%macro BLEND_FRAMES 0
+    BLEND_INIT
+
+    XSPLAT     m2, r8m, w                   ; factor1
+    XSPLAT     m3, r9m, w                   ; factor2
+
+    psllw      m3, 8
+    por        m2, m3                       ; interleaved factors
+
+    XSPLAT     m3, r10m, w                  ; half
+
+    BLEND_LOOP  b, w, ubsw, 7
+%endmacro
+
+
+%macro BLEND_FRAMES16 0
+    BLEND_INIT 16
+
+    XSPLAT     m2, r8m, d                   ; factor1
+    XSPLAT     m3, r9m, d                   ; factor2
+
+    pslld      m3, 16
+    por        m2, m3                       ; interleaved factors
+
+    XSPLAT     m3, r10m, d                  ; half
+
+    BLEND_LOOP  w, d, wd, 15
+%endmacro
+
+
+INIT_XMM ssse3
+BLEND_FRAMES
+
+INIT_XMM sse4
+BLEND_FRAMES16
+
+
+%if HAVE_AVX2_EXTERNAL
+
+INIT_YMM avx2
+BLEND_FRAMES
+BLEND_FRAMES16
+
+%endif

diff --git a/libavfilter/x86/vf_framerate_init.c b/libavfilter/x86/vf_framerate_init.c
new file mode 100644
index 0000000..9d40faf
--- /dev/null
+++ b/libavfilter/x86/vf_framerate_init.c

@@ -0,0 +1,42 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/framerate.h"
+
+void ff_blend_frames_ssse3(BLEND_FUNC_PARAMS);
+void ff_blend_frames_avx2(BLEND_FUNC_PARAMS);
+void ff_blend_frames16_sse4(BLEND_FUNC_PARAMS);
+void ff_blend_frames16_avx2(BLEND_FUNC_PARAMS);
+
+void ff_framerate_init_x86(FrameRateContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+    if (s->bitdepth == 8) {
+        if (EXTERNAL_AVX2_FAST(cpu_flags))
+            s->blend = ff_blend_frames_avx2;
+        else if (EXTERNAL_SSSE3(cpu_flags))
+            s->blend = ff_blend_frames_ssse3;
+    } else {
+        if (EXTERNAL_AVX2_FAST(cpu_flags))
+            s->blend = ff_blend_frames16_avx2;
+        else if (EXTERNAL_SSE4(cpu_flags))
+            s->blend = ff_blend_frames16_sse4;
+    }
+}

diff --git a/libavfilter/x86/vf_hflip.asm b/libavfilter/x86/vf_hflip.asm
new file mode 100644
index 0000000..2856189
--- /dev/null
+++ b/libavfilter/x86/vf_hflip.asm

@@ -0,0 +1,90 @@
+;*****************************************************************************
+;* x86-optimized functions for hflip filter
+;*
+;* Copyright (C) 2017 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;*****************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pb_flip_byte:  db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+pb_flip_short: db 14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1
+
+SECTION .text
+
+;%1 byte or short, %2 b or w, %3 size in byte (1 for byte, 2 for short)
+%macro HFLIP 3
+cglobal hflip_%1, 3, 5, 3, src, dst, w, r, x
+    VBROADCASTI128    m0, [pb_flip_%1]
+    xor               xq, xq
+%if %3 == 1
+    movsxdifnidn wq, wd
+%else ; short
+    add     wd, wd
+%endif
+    mov     rq, wq
+    and     rq, 2 * mmsize - 1
+    cmp     wq, 2 * mmsize
+    jl .loop1
+    sub     wq, rq
+
+    .loop0:
+        neg     xq
+%if mmsize == 32
+        vpermq  m1, [srcq + xq -     mmsize + %3], 0x4e; flip each lane at load
+        vpermq  m2, [srcq + xq - 2 * mmsize + %3], 0x4e; flip each lane at load
+%else
+        movu    m1, [srcq + xq -     mmsize + %3]
+        movu    m2, [srcq + xq - 2 * mmsize + %3]
+%endif
+        pshufb  m1, m0
+        pshufb  m2, m0
+        neg     xq
+        movu    [dstq + xq         ], m1
+        movu    [dstq + xq + mmsize], m2
+        add     xq, mmsize * 2
+        cmp     xq, wq
+        jl .loop0
+
+    cmp    rq, 0
+    je .end
+    add    wq, rq
+
+    .loop1:
+        neg    xq
+        mov    r%2, [srcq + xq]
+        neg    xq
+        mov    [dstq + xq], r%2
+        add    xq, %3
+        cmp    xq, wq
+        jl .loop1
+    .end:
+        RET
+%endmacro
+
+INIT_XMM ssse3
+HFLIP byte, b, 1
+HFLIP short, w, 2
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+HFLIP byte, b, 1
+HFLIP short, w, 2
+%endif

diff --git a/libavfilter/x86/vf_hflip_init.c b/libavfilter/x86/vf_hflip_init.c
new file mode 100644
index 0000000..0ac399b
--- /dev/null
+++ b/libavfilter/x86/vf_hflip_init.c

@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/hflip.h"
+
+void ff_hflip_byte_ssse3(const uint8_t *src, uint8_t *dst, int w);
+void ff_hflip_byte_avx2(const uint8_t *src, uint8_t *dst, int w);
+void ff_hflip_short_ssse3(const uint8_t *src, uint8_t *dst, int w);
+void ff_hflip_short_avx2(const uint8_t *src, uint8_t *dst, int w);
+
+av_cold void ff_hflip_init_x86(FlipContext *s, int step[4], int nb_planes)
+{
+    int cpu_flags = av_get_cpu_flags();
+    int i;
+
+    for (i = 0; i < nb_planes; i++) {
+        if (step[i] == 1) {
+            if (EXTERNAL_SSSE3(cpu_flags)) {
+                s->flip_line[i] = ff_hflip_byte_ssse3;
+            }
+            if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+                s->flip_line[i] = ff_hflip_byte_avx2;
+            }
+        } else if (step[i] == 2) {
+            if (EXTERNAL_SSSE3(cpu_flags)) {
+                s->flip_line[i] = ff_hflip_short_ssse3;
+            }
+            if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+                s->flip_line[i] = ff_hflip_short_avx2;
+            }
+        }
+    }
+}

diff --git a/libavfilter/x86/vf_overlay.asm b/libavfilter/x86/vf_overlay.asm
new file mode 100644
index 0000000..14ec60c
--- /dev/null
+++ b/libavfilter/x86/vf_overlay.asm

@@ -0,0 +1,144 @@
+;*****************************************************************************
+;* x86-optimized functions for overlay filter
+;*
+;* Copyright (C) 2018 Paul B Mahol
+;* Copyright (C) 2018 Henrik Gramner
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;*****************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pb_1:     times 16 db 1
+pw_128:   times  8 dw 128
+pw_255:   times  8 dw 255
+pw_257:   times  8 dw 257
+
+SECTION .text
+
+INIT_XMM sse4
+cglobal overlay_row_44, 5, 7, 6, 0, d, da, s, a, w, r, x
+    xor          xq, xq
+    movsxdifnidn wq, wd
+    mov          rq, wq
+    and          rq, mmsize/2 - 1
+    cmp          wq, mmsize/2
+    jl .end
+    sub          wq, rq
+    mova         m3, [pw_255]
+    mova         m4, [pw_128]
+    mova         m5, [pw_257]
+    .loop:
+        pmovzxbw    m0, [sq+xq]
+        pmovzxbw    m2, [aq+xq]
+        pmovzxbw    m1, [dq+xq]
+        pmullw      m0, m2
+        pxor        m2, m3
+        pmullw      m1, m2
+        paddw       m0, m4
+        paddw       m0, m1
+        pmulhuw     m0, m5
+        packuswb    m0, m0
+        movq   [dq+xq], m0
+        add         xq, mmsize/2
+        cmp         xq, wq
+        jl .loop
+
+    .end:
+    mov    eax, xd
+    RET
+
+INIT_XMM sse4
+cglobal overlay_row_22, 5, 7, 6, 0, d, da, s, a, w, r, x
+    xor          xq, xq
+    movsxdifnidn wq, wd
+    sub          wq, 1
+    mov          rq, wq
+    and          rq, mmsize/2 - 1
+    cmp          wq, mmsize/2
+    jl .end
+    sub          wq, rq
+    mova         m3, [pw_255]
+    mova         m4, [pw_128]
+    mova         m5, [pw_257]
+    .loop:
+        pmovzxbw    m0, [sq+xq]
+        movu        m1, [aq+2*xq]
+        pandn       m2, m3, m1
+        psllw       m1, 8
+        pavgw       m2, m1
+        pavgw       m2, m1
+        psrlw       m2, 8
+        pmovzxbw    m1, [dq+xq]
+        pmullw      m0, m2
+        pxor        m2, m3
+        pmullw      m1, m2
+        paddw       m0, m4
+        paddw       m0, m1
+        pmulhuw     m0, m5
+        packuswb    m0, m0
+        movq   [dq+xq], m0
+        add         xq, mmsize/2
+        cmp         xq, wq
+        jl .loop
+
+    .end:
+    mov    eax, xd
+    RET
+
+INIT_XMM sse4
+cglobal overlay_row_20, 6, 7, 7, 0, d, da, s, a, w, r, x
+    mov         daq, aq
+    add         daq, rmp
+    xor          xq, xq
+    movsxdifnidn wq, wd
+    sub          wq, 1
+    mov          rq, wq
+    and          rq, mmsize/2 - 1
+    cmp          wq, mmsize/2
+    jl .end
+    sub          wq, rq
+    mova         m3, [pw_255]
+    mova         m4, [pw_128]
+    mova         m5, [pw_257]
+    mova         m6, [pb_1]
+    .loop:
+        pmovzxbw    m0, [sq+xq]
+        movu        m2, [aq+2*xq]
+        movu        m1, [daq+2*xq]
+        pmaddubsw   m2, m6
+        pmaddubsw   m1, m6
+        paddw       m2, m1
+        psrlw       m2, 2
+        pmovzxbw    m1, [dq+xq]
+        pmullw      m0, m2
+        pxor        m2, m3
+        pmullw      m1, m2
+        paddw       m0, m4
+        paddw       m0, m1
+        pmulhuw     m0, m5
+        packuswb    m0, m0
+        movq   [dq+xq], m0
+        add         xq, mmsize/2
+        cmp         xq, wq
+        jl .loop
+
+    .end:
+    mov    eax, xd
+    RET

diff --git a/libavfilter/x86/vf_overlay_init.c b/libavfilter/x86/vf_overlay_init.c
new file mode 100644
index 0000000..d4218b1
--- /dev/null
+++ b/libavfilter/x86/vf_overlay_init.c

@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/vf_overlay.h"
+
+int ff_overlay_row_44_sse4(uint8_t *d, uint8_t *da, uint8_t *s, uint8_t *a,
+                           int w, ptrdiff_t alinesize);
+
+int ff_overlay_row_20_sse4(uint8_t *d, uint8_t *da, uint8_t *s, uint8_t *a,
+                           int w, ptrdiff_t alinesize);
+
+int ff_overlay_row_22_sse4(uint8_t *d, uint8_t *da, uint8_t *s, uint8_t *a,
+                           int w, ptrdiff_t alinesize);
+
+av_cold void ff_overlay_init_x86(OverlayContext *s, int format, int pix_format,
+                                 int alpha_format, int main_has_alpha)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE4(cpu_flags) &&
+        (format == OVERLAY_FORMAT_YUV444 ||
+         format == OVERLAY_FORMAT_GBRP) &&
+        alpha_format == 0 && main_has_alpha == 0) {
+        s->blend_row[0] = ff_overlay_row_44_sse4;
+        s->blend_row[1] = ff_overlay_row_44_sse4;
+        s->blend_row[2] = ff_overlay_row_44_sse4;
+    }
+
+    if (EXTERNAL_SSE4(cpu_flags) &&
+        (pix_format == AV_PIX_FMT_YUV420P) &&
+        (format == OVERLAY_FORMAT_YUV420) &&
+        alpha_format == 0 && main_has_alpha == 0) {
+        s->blend_row[0] = ff_overlay_row_44_sse4;
+        s->blend_row[1] = ff_overlay_row_20_sse4;
+        s->blend_row[2] = ff_overlay_row_20_sse4;
+    }
+
+    if (EXTERNAL_SSE4(cpu_flags) &&
+        (format == OVERLAY_FORMAT_YUV422) &&
+        alpha_format == 0 && main_has_alpha == 0) {
+        s->blend_row[0] = ff_overlay_row_44_sse4;
+        s->blend_row[1] = ff_overlay_row_22_sse4;
+        s->blend_row[2] = ff_overlay_row_22_sse4;
+    }
+}

diff --git a/libavfilter/x86/vf_threshold.asm b/libavfilter/x86/vf_threshold.asm
new file mode 100644
index 0000000..098069b
--- /dev/null
+++ b/libavfilter/x86/vf_threshold.asm

@@ -0,0 +1,92 @@
+;*****************************************************************************
+;* x86-optimized functions for threshold filter
+;*
+;* Copyright (C) 2017 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;*****************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pb_128: times 16 db 128
+pb_128_0 : times 8 db 0, 128
+
+SECTION .text
+
+;%1 depth (8 or 16) ; %2 b or w ; %3 constant
+%macro THRESHOLD 3
+%if ARCH_X86_64
+cglobal threshold%1, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x
+    mov             wd, dword wm
+    mov             hd, dword hm
+%else
+cglobal threshold%1, 5, 7, 5, in, threshold, min, max, out, w, x
+    mov             wd, r10m
+%define     ilinesizeq  r5mp
+%define     tlinesizeq  r6mp
+%define     flinesizeq  r7mp
+%define     slinesizeq  r8mp
+%define     olinesizeq  r9mp
+%define             hd  r11mp
+%endif
+    VBROADCASTI128  m4, [%3]
+%if %1 == 16
+    add             wq, wq ; w *= 2 (16 bits instead of 8)
+%endif
+    add            inq, wq
+    add     thresholdq, wq
+    add           minq, wq
+    add           maxq, wq
+    add           outq, wq
+    neg             wq
+.nextrow:
+    mov         xq, wq
+
+    .loop:
+        movu            m1, [inq + xq]
+        movu            m0, [thresholdq + xq]
+        movu            m2, [minq + xq]
+        movu            m3, [maxq + xq]
+        pxor            m0, m4
+        pxor            m1, m4
+        pcmpgt%2        m0, m1
+        PBLENDVB        m3, m2, m0
+        movu   [outq + xq], m3
+        add             xq, mmsize
+    jl .loop
+
+    add          inq, ilinesizeq
+    add   thresholdq, tlinesizeq
+    add         minq, flinesizeq
+    add         maxq, slinesizeq
+    add         outq, olinesizeq
+    sub         hd, 1
+    jg .nextrow
+RET
+%endmacro
+
+INIT_XMM sse4
+THRESHOLD 8, b, pb_128
+THRESHOLD 16, w, pb_128_0
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+THRESHOLD 8, b, pb_128
+THRESHOLD 16, w, pb_128_0
+%endif

diff --git a/libavfilter/x86/vf_threshold_init.c b/libavfilter/x86/vf_threshold_init.c
new file mode 100644
index 0000000..8e42296
--- /dev/null
+++ b/libavfilter/x86/vf_threshold_init.c

@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/threshold.h"
+
+#define THRESHOLD_FUNC(depth, opt) \
+void ff_threshold##depth##_##opt(const uint8_t *in, const uint8_t *threshold,\
+                                const uint8_t *min, const uint8_t *max,     \
+                                uint8_t *out,                               \
+                                ptrdiff_t ilinesize, ptrdiff_t tlinesize,   \
+                                ptrdiff_t flinesize, ptrdiff_t slinesize,   \
+                                ptrdiff_t olinesize,                        \
+                                int w, int h);
+
+THRESHOLD_FUNC(8, sse4)
+THRESHOLD_FUNC(8, avx2)
+THRESHOLD_FUNC(16, sse4)
+THRESHOLD_FUNC(16, avx2)
+
+av_cold void ff_threshold_init_x86(ThresholdContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (s->depth == 8) {
+        if (EXTERNAL_SSE4(cpu_flags)) {
+            s->threshold = ff_threshold8_sse4;
+        }
+        if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+            s->threshold = ff_threshold8_avx2;
+        }
+    } else if (s->depth == 16) {
+        if (EXTERNAL_SSE4(cpu_flags)) {
+            s->threshold = ff_threshold16_sse4;
+        }
+        if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+            s->threshold = ff_threshold16_avx2;
+        }
+    }
+}

diff --git a/libavfilter/x86/yadif-16.asm b/libavfilter/x86/yadif-16.asm
index 79d127d..9053b37 100644
--- a/libavfilter/x86/yadif-16.asm
+++ b/libavfilter/x86/yadif-16.asm

@@ -54,30 +54,6 @@
 %endif
 %endmacro
 
-%macro PMINSD 3
-%if cpuflag(sse4)
-    pminsd %1, %2
-%else
-    mova    %3, %2
-    pcmpgtd %3, %1
-    pand    %1, %3
-    pandn   %3, %2
-    por     %1, %3
-%endif
-%endmacro
-
-%macro PMAXSD 3
-%if cpuflag(sse4)
-    pmaxsd %1, %2
-%else
-    mova    %3, %1
-    pcmpgtd %3, %2
-    pand    %1, %3
-    pandn   %3, %2
-    por     %1, %3
-%endif
-%endmacro
-
 %macro PMAXUW 2
 %if cpuflag(sse4)
     pmaxuw %1, %2

diff --git a/libavformat/.gitignore b/libavformat/.gitignore
index cdc24b7..fb70c12 100644
--- a/libavformat/.gitignore
+++ b/libavformat/.gitignore

@@ -1 +1,3 @@
 /protocol_list.c
+/muxer_list.c
+/demuxer_list.c

diff --git a/libavformat/Makefile b/libavformat/Makefile
index df709c29..e99e915 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile

@@ -87,11 +87,17 @@
 OBJS-$(CONFIG_AIX_DEMUXER)               += aixdec.o
 OBJS-$(CONFIG_AMR_DEMUXER)               += amr.o
 OBJS-$(CONFIG_AMR_MUXER)                 += amr.o
+OBJS-$(CONFIG_AMRNB_DEMUXER)             += amr.o
+OBJS-$(CONFIG_AMRWB_DEMUXER)             += amr.o
 OBJS-$(CONFIG_ANM_DEMUXER)               += anm.o
 OBJS-$(CONFIG_APC_DEMUXER)               += apc.o
 OBJS-$(CONFIG_APE_DEMUXER)               += ape.o apetag.o img2.o
 OBJS-$(CONFIG_APNG_DEMUXER)              += apngdec.o
 OBJS-$(CONFIG_APNG_MUXER)                += apngenc.o
+OBJS-$(CONFIG_APTX_DEMUXER)              += aptxdec.o rawdec.o
+OBJS-$(CONFIG_APTX_MUXER)                += rawenc.o
+OBJS-$(CONFIG_APTX_HD_DEMUXER)           += aptxdec.o rawdec.o
+OBJS-$(CONFIG_APTX_HD_MUXER)             += rawenc.o
 OBJS-$(CONFIG_AQTITLE_DEMUXER)           += aqtitledec.o subtitles.o
 OBJS-$(CONFIG_ASF_DEMUXER)               += asfdec_f.o asf.o asfcrypt.o \
                                             avlanguage.o
@@ -109,6 +115,8 @@
 OBJS-$(CONFIG_AVM2_MUXER)                += swfenc.o swf.o
 OBJS-$(CONFIG_AVR_DEMUXER)               += avr.o pcm.o
 OBJS-$(CONFIG_AVS_DEMUXER)               += avs.o voc_packet.o vocdec.o voc.o
+OBJS-$(CONFIG_AVS2_DEMUXER)              += davs2.o rawdec.o
+OBJS-$(CONFIG_AVS2_MUXER)                += rawenc.o
 OBJS-$(CONFIG_BETHSOFTVID_DEMUXER)       += bethsoftvid.o
 OBJS-$(CONFIG_BFI_DEMUXER)               += bfi.o
 OBJS-$(CONFIG_BINK_DEMUXER)              += bink.o
@@ -120,20 +128,23 @@
 OBJS-$(CONFIG_BFSTM_DEMUXER)             += brstm.o
 OBJS-$(CONFIG_BRSTM_DEMUXER)             += brstm.o
 OBJS-$(CONFIG_C93_DEMUXER)               += c93.o voc_packet.o vocdec.o voc.o
-OBJS-$(CONFIG_CAF_DEMUXER)               += cafdec.o caf.o mov.o mov_chan.o \
-                                            replaygain.o
+OBJS-$(CONFIG_CAF_DEMUXER)               += cafdec.o caf.o mov_chan.o mov_esds.o
 OBJS-$(CONFIG_CAF_MUXER)                 += cafenc.o caf.o riff.o
 OBJS-$(CONFIG_CAVSVIDEO_DEMUXER)         += cavsvideodec.o rawdec.o
 OBJS-$(CONFIG_CAVSVIDEO_MUXER)           += rawenc.o
 OBJS-$(CONFIG_CDG_DEMUXER)               += cdg.o
 OBJS-$(CONFIG_CDXL_DEMUXER)              += cdxl.o
 OBJS-$(CONFIG_CINE_DEMUXER)              += cinedec.o
+OBJS-$(CONFIG_CODEC2_DEMUXER)            += codec2.o rawdec.o pcm.o
+OBJS-$(CONFIG_CODEC2_MUXER)              += codec2.o rawenc.o
+OBJS-$(CONFIG_CODEC2RAW_DEMUXER)         += codec2.o rawdec.o pcm.o
+OBJS-$(CONFIG_CODEC2RAW_MUXER)           += rawenc.o
 OBJS-$(CONFIG_CONCAT_DEMUXER)            += concatdec.o
 OBJS-$(CONFIG_CRC_MUXER)                 += crcenc.o
 OBJS-$(CONFIG_DATA_DEMUXER)              += rawdec.o
 OBJS-$(CONFIG_DATA_MUXER)                += rawenc.o
-OBJS-$(CONFIG_DASH_MUXER)                += dash.o dashenc.o
-OBJS-$(CONFIG_DASH_DEMUXER)              += dashdec.o
+OBJS-$(CONFIG_DASH_MUXER)                += dash.o dashenc.o hlsplaylist.o
+OBJS-$(CONFIG_DASH_DEMUXER)              += dash.o dashdec.o
 OBJS-$(CONFIG_DAUD_DEMUXER)              += dauddec.o
 OBJS-$(CONFIG_DAUD_MUXER)                += daudenc.o
 OBJS-$(CONFIG_DCSTR_DEMUXER)             += dcstr.o
@@ -158,11 +169,10 @@
 OBJS-$(CONFIG_EAC3_DEMUXER)              += ac3dec.o rawdec.o
 OBJS-$(CONFIG_EAC3_MUXER)                += rawenc.o
 OBJS-$(CONFIG_EPAF_DEMUXER)              += epafdec.o pcm.o
-OBJS-$(CONFIG_FFM_DEMUXER)               += ffmdec.o
-OBJS-$(CONFIG_FFM_MUXER)                 += ffmenc.o
 OBJS-$(CONFIG_FFMETADATA_DEMUXER)        += ffmetadec.o
 OBJS-$(CONFIG_FFMETADATA_MUXER)          += ffmetaenc.o
 OBJS-$(CONFIG_FIFO_MUXER)                += fifo.o
+OBJS-$(CONFIG_FIFO_TEST_MUXER)           += fifo_test.o
 OBJS-$(CONFIG_FILMSTRIP_DEMUXER)         += filmstripdec.o
 OBJS-$(CONFIG_FILMSTRIP_MUXER)           += filmstripenc.o
 OBJS-$(CONFIG_FITS_DEMUXER)              += fitsdec.o
@@ -212,7 +222,7 @@
 OBJS-$(CONFIG_HEVC_DEMUXER)              += hevcdec.o rawdec.o
 OBJS-$(CONFIG_HEVC_MUXER)                += rawenc.o
 OBJS-$(CONFIG_HLS_DEMUXER)               += hls.o
-OBJS-$(CONFIG_HLS_MUXER)                 += hlsenc.o
+OBJS-$(CONFIG_HLS_MUXER)                 += hlsenc.o hlsplaylist.o
 OBJS-$(CONFIG_HNM_DEMUXER)               += hnm.o
 OBJS-$(CONFIG_ICO_DEMUXER)               += icodec.o
 OBJS-$(CONFIG_ICO_MUXER)                 += icoenc.o
@@ -250,6 +260,7 @@
 OBJS-$(CONFIG_IMAGE_TIFF_PIPE_DEMUXER)    += img2dec.o img2.o
 OBJS-$(CONFIG_IMAGE_WEBP_PIPE_DEMUXER)    += img2dec.o img2.o
 OBJS-$(CONFIG_IMAGE_XPM_PIPE_DEMUXER)     += img2dec.o img2.o
+OBJS-$(CONFIG_IMAGE_XWD_PIPE_DEMUXER)     += img2dec.o img2.o
 OBJS-$(CONFIG_INGENIENT_DEMUXER)         += ingenientdec.o rawdec.o
 OBJS-$(CONFIG_IPMOVIE_DEMUXER)           += ipmovie.o
 OBJS-$(CONFIG_IRCAM_DEMUXER)             += ircamdec.o ircam.o pcm.o
@@ -276,7 +287,7 @@
                                             oggparsevorbis.o vorbiscomment.o \
                                             flac_picture.o replaygain.o
 OBJS-$(CONFIG_MATROSKA_MUXER)            += matroskaenc.o matroska.o \
-                                            avc.o hevc.o \
+                                            av1.o avc.o hevc.o \
                                             flacenc_header.o avlanguage.o vorbiscomment.o wv.o \
                                             webmdashenc.o webm_chunk.o
 OBJS-$(CONFIG_MD5_MUXER)                 += hashenc.o
@@ -292,8 +303,8 @@
 OBJS-$(CONFIG_MM_DEMUXER)                += mm.o
 OBJS-$(CONFIG_MMF_DEMUXER)               += mmf.o
 OBJS-$(CONFIG_MMF_MUXER)                 += mmf.o rawenc.o
-OBJS-$(CONFIG_MOV_DEMUXER)               += mov.o mov_chan.o replaygain.o
-OBJS-$(CONFIG_MOV_MUXER)                 += movenc.o avc.o hevc.o vpcc.o \
+OBJS-$(CONFIG_MOV_DEMUXER)               += mov.o mov_chan.o mov_esds.o replaygain.o
+OBJS-$(CONFIG_MOV_MUXER)                 += movenc.o av1.o avc.o hevc.o vpcc.o \
                                             movenchint.o mov_chan.o rtp.o \
                                             movenccenc.o rawutils.o
 OBJS-$(CONFIG_MP2_MUXER)                 += rawenc.o
@@ -328,6 +339,7 @@
 OBJS-$(CONFIG_MXG_DEMUXER)               += mxg.o
 OBJS-$(CONFIG_NC_DEMUXER)                += ncdec.o
 OBJS-$(CONFIG_NISTSPHERE_DEMUXER)        += nistspheredec.o pcm.o
+OBJS-$(CONFIG_NSP_DEMUXER)               += nspdec.o
 OBJS-$(CONFIG_NSV_DEMUXER)               += nsvdec.o
 OBJS-$(CONFIG_NULL_MUXER)                += nullenc.o
 OBJS-$(CONFIG_NUT_DEMUXER)               += nutdec.o nut.o isom.o
@@ -443,6 +455,8 @@
 OBJS-$(CONFIG_SAMI_DEMUXER)              += samidec.o subtitles.o
 OBJS-$(CONFIG_SAP_DEMUXER)               += sapdec.o
 OBJS-$(CONFIG_SAP_MUXER)                 += sapenc.o
+OBJS-$(CONFIG_SBC_DEMUXER)               += sbcdec.o rawdec.o
+OBJS-$(CONFIG_SBC_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_SBG_DEMUXER)               += sbgdec.o
 OBJS-$(CONFIG_SCC_DEMUXER)               += sccdec.o subtitles.o
 OBJS-$(CONFIG_SCC_MUXER)                 += sccenc.o subtitles.o
@@ -451,7 +465,9 @@
 OBJS-$(CONFIG_SDS_DEMUXER)               += sdsdec.o
 OBJS-$(CONFIG_SDX_DEMUXER)               += sdxdec.o
 OBJS-$(CONFIG_SEGAFILM_DEMUXER)          += segafilm.o
+OBJS-$(CONFIG_SEGAFILM_MUXER)            += segafilmenc.o
 OBJS-$(CONFIG_SEGMENT_MUXER)             += segment.o
+OBJS-$(CONFIG_SER_DEMUXER)               += serdec.o
 OBJS-$(CONFIG_SHORTEN_DEMUXER)           += shortendec.o rawdec.o
 OBJS-$(CONFIG_SIFF_DEMUXER)              += siff.o
 OBJS-$(CONFIG_SINGLEJPEG_MUXER)          += rawenc.o
@@ -471,6 +487,7 @@
 OBJS-$(CONFIG_SRT_MUXER)                 += srtenc.o
 OBJS-$(CONFIG_STL_DEMUXER)               += stldec.o subtitles.o
 OBJS-$(CONFIG_STR_DEMUXER)               += psxstr.o
+OBJS-$(CONFIG_STREAM_SEGMENT_MUXER)      += segment.o
 OBJS-$(CONFIG_SUBVIEWER1_DEMUXER)        += subviewer1dec.o subtitles.o
 OBJS-$(CONFIG_SUBVIEWER_DEMUXER)         += subviewerdec.o subtitles.o
 OBJS-$(CONFIG_SUP_DEMUXER)               += supdec.o
@@ -491,6 +508,7 @@
 OBJS-$(CONFIG_TTA_DEMUXER)               += tta.o apetag.o img2.o
 OBJS-$(CONFIG_TTA_MUXER)                 += ttaenc.o apetag.o img2.o
 OBJS-$(CONFIG_TTY_DEMUXER)               += tty.o sauce.o
+OBJS-$(CONFIG_TY_DEMUXER)                += ty.o
 OBJS-$(CONFIG_TXD_DEMUXER)               += txd.o
 OBJS-$(CONFIG_UNCODEDFRAMECRC_MUXER)     += uncodedframecrcenc.o framehash.o
 OBJS-$(CONFIG_V210_DEMUXER)              += v210.o
@@ -514,7 +532,7 @@
 OBJS-$(CONFIG_WAV_MUXER)                 += wavenc.o
 OBJS-$(CONFIG_WC3_DEMUXER)               += wc3movie.o
 OBJS-$(CONFIG_WEBM_MUXER)                += matroskaenc.o matroska.o \
-                                            avc.o hevc.o \
+                                            av1.o avc.o hevc.o \
                                             flacenc_header.o avlanguage.o \
                                             wv.o vorbiscomment.o \
                                             webmdashenc.o webm_chunk.o
@@ -542,15 +560,13 @@
 OBJS-$(CONFIG_YUV4MPEGPIPE_DEMUXER)      += yuv4mpegdec.o
 OBJS-$(CONFIG_YUV4MPEGPIPE_MUXER)        += yuv4mpegenc.o
 
-# external libraries
+# external library muxers/demuxers
 OBJS-$(CONFIG_AVISYNTH_DEMUXER)          += avisynth.o
 OBJS-$(CONFIG_CHROMAPRINT_MUXER)         += chromaprint.o
 OBJS-$(CONFIG_LIBGME_DEMUXER)            += libgme.o
 OBJS-$(CONFIG_LIBMODPLUG_DEMUXER)        += libmodplug.o
 OBJS-$(CONFIG_LIBOPENMPT_DEMUXER)        += libopenmpt.o
-OBJS-$(CONFIG_LIBRTMP)                   += librtmp.o
-OBJS-$(CONFIG_LIBSSH_PROTOCOL)           += libssh.o
-OBJS-$(CONFIG_LIBSMBCLIENT_PROTOCOL)     += libsmbclient.o
+OBJS-$(CONFIG_VAPOURSYNTH_DEMUXER)       += vapoursynth.o
 
 # protocols I/O
 OBJS-$(CONFIG_ASYNC_PROTOCOL)            += async.o
@@ -560,7 +576,7 @@
 OBJS-$(CONFIG_CONCAT_PROTOCOL)           += concat.o
 OBJS-$(CONFIG_CRYPTO_PROTOCOL)           += crypto.o
 OBJS-$(CONFIG_DATA_PROTOCOL)             += data_uri.o
-OBJS-$(CONFIG_FFRTMPCRYPT_PROTOCOL)      += rtmpcrypt.o rtmpdh.o
+OBJS-$(CONFIG_FFRTMPCRYPT_PROTOCOL)      += rtmpcrypt.o rtmpdigest.o rtmpdh.o
 OBJS-$(CONFIG_FFRTMPHTTP_PROTOCOL)       += rtmphttp.o
 OBJS-$(CONFIG_FILE_PROTOCOL)             += file.o
 OBJS-$(CONFIG_FTP_PROTOCOL)              += ftp.o
@@ -575,26 +591,39 @@
 OBJS-$(CONFIG_MMST_PROTOCOL)             += mmst.o mms.o asf.o
 OBJS-$(CONFIG_PIPE_PROTOCOL)             += file.o
 OBJS-$(CONFIG_PROMPEG_PROTOCOL)          += prompeg.o
-OBJS-$(CONFIG_RTMP_PROTOCOL)             += rtmpproto.o rtmppkt.o
-OBJS-$(CONFIG_RTMPE_PROTOCOL)            += rtmpproto.o rtmppkt.o
-OBJS-$(CONFIG_RTMPS_PROTOCOL)            += rtmpproto.o rtmppkt.o
-OBJS-$(CONFIG_RTMPT_PROTOCOL)            += rtmpproto.o rtmppkt.o
-OBJS-$(CONFIG_RTMPTE_PROTOCOL)           += rtmpproto.o rtmppkt.o
-OBJS-$(CONFIG_RTMPTS_PROTOCOL)           += rtmpproto.o rtmppkt.o
-OBJS-$(CONFIG_RTP_PROTOCOL)              += rtpproto.o
+OBJS-$(CONFIG_RTMP_PROTOCOL)             += rtmpproto.o rtmpdigest.o rtmppkt.o
+OBJS-$(CONFIG_RTMPE_PROTOCOL)            += rtmpproto.o rtmpdigest.o rtmppkt.o
+OBJS-$(CONFIG_RTMPS_PROTOCOL)            += rtmpproto.o rtmpdigest.o rtmppkt.o
+OBJS-$(CONFIG_RTMPT_PROTOCOL)            += rtmpproto.o rtmpdigest.o rtmppkt.o
+OBJS-$(CONFIG_RTMPTE_PROTOCOL)           += rtmpproto.o rtmpdigest.o rtmppkt.o
+OBJS-$(CONFIG_RTMPTS_PROTOCOL)           += rtmpproto.o rtmpdigest.o rtmppkt.o
+OBJS-$(CONFIG_RTP_PROTOCOL)              += rtpproto.o ip.o
 OBJS-$(CONFIG_SCTP_PROTOCOL)             += sctp.o
 OBJS-$(CONFIG_SRTP_PROTOCOL)             += srtpproto.o srtp.o
 OBJS-$(CONFIG_SUBFILE_PROTOCOL)          += subfile.o
 OBJS-$(CONFIG_TEE_PROTOCOL)              += teeproto.o tee_common.o
 OBJS-$(CONFIG_TCP_PROTOCOL)              += tcp.o
-OBJS-$(CONFIG_TLS_GNUTLS_PROTOCOL)       += tls_gnutls.o tls.o
-OBJS-$(CONFIG_TLS_OPENSSL_PROTOCOL)      += tls_openssl.o tls.o
-OBJS-$(CONFIG_TLS_SCHANNEL_PROTOCOL)     += tls_schannel.o tls.o
-OBJS-$(CONFIG_TLS_SECURETRANSPORT_PROTOCOL) += tls_securetransport.o tls.o
-OBJS-$(CONFIG_UDP_PROTOCOL)              += udp.o
-OBJS-$(CONFIG_UDPLITE_PROTOCOL)          += udp.o
+TLS-OBJS-$(CONFIG_GNUTLS)                += tls_gnutls.o
+TLS-OBJS-$(CONFIG_LIBTLS)                += tls_libtls.o
+TLS-OBJS-$(CONFIG_MBEDTLS)               += tls_mbedtls.o
+TLS-OBJS-$(CONFIG_OPENSSL)               += tls_openssl.o
+TLS-OBJS-$(CONFIG_SECURETRANSPORT)       += tls_securetransport.o
+TLS-OBJS-$(CONFIG_SCHANNEL)              += tls_schannel.o
+OBJS-$(CONFIG_TLS_PROTOCOL)              += tls.o $(TLS-OBJS-yes)
+OBJS-$(CONFIG_UDP_PROTOCOL)              += udp.o ip.o
+OBJS-$(CONFIG_UDPLITE_PROTOCOL)          += udp.o ip.o
 OBJS-$(CONFIG_UNIX_PROTOCOL)             += unix.o
 
+# external library protocols
+OBJS-$(CONFIG_LIBRTMP_PROTOCOL)          += librtmp.o
+OBJS-$(CONFIG_LIBRTMPE_PROTOCOL)         += librtmp.o
+OBJS-$(CONFIG_LIBRTMPS_PROTOCOL)         += librtmp.o
+OBJS-$(CONFIG_LIBRTMPT_PROTOCOL)         += librtmp.o
+OBJS-$(CONFIG_LIBRTMPTE_PROTOCOL)        += librtmp.o
+OBJS-$(CONFIG_LIBSMBCLIENT_PROTOCOL)     += libsmbclient.o
+OBJS-$(CONFIG_LIBSRT_PROTOCOL)           += libsrt.o
+OBJS-$(CONFIG_LIBSSH_PROTOCOL)           += libssh.o
+
 # libavdevice dependencies
 OBJS-$(CONFIG_IEC61883_INDEV)            += dv.o
 

diff --git a/libavformat/aacdec.c b/libavformat/aacdec.c
index 36d558f..685458b 100644
--- a/libavformat/aacdec.c
+++ b/libavformat/aacdec.c

@@ -22,8 +22,10 @@
 
 #include "libavutil/intreadwrite.h"
 #include "avformat.h"
+#include "avio_internal.h"
 #include "internal.h"
 #include "id3v1.h"
+#include "id3v2.h"
 #include "apetag.h"
 
 #define ADTS_HEADER_SIZE 7
@@ -116,13 +118,56 @@
     return 0;
 }
 
+static int handle_id3(AVFormatContext *s, AVPacket *pkt)
+{
+    AVDictionary *metadata = NULL;
+    AVIOContext ioctx;
+    ID3v2ExtraMeta *id3v2_extra_meta = NULL;
+    int ret;
+
+    ret = av_append_packet(s->pb, pkt, ff_id3v2_tag_len(pkt->data) - pkt->size);
+    if (ret < 0) {
+        av_packet_unref(pkt);
+        return ret;
+    }
+
+    ffio_init_context(&ioctx, pkt->data, pkt->size, 0, NULL, NULL, NULL, NULL);
+    ff_id3v2_read_dict(&ioctx, &metadata, ID3v2_DEFAULT_MAGIC, &id3v2_extra_meta);
+    if ((ret = ff_id3v2_parse_priv_dict(&metadata, &id3v2_extra_meta)) < 0)
+        goto error;
+
+    if (metadata) {
+        if ((ret = av_dict_copy(&s->metadata, metadata, 0)) < 0)
+            goto error;
+        s->event_flags |= AVFMT_EVENT_FLAG_METADATA_UPDATED;
+    }
+
+error:
+    av_packet_unref(pkt);
+    ff_id3v2_free_extra_meta(&id3v2_extra_meta);
+    av_dict_free(&metadata);
+
+    return ret;
+}
+
 static int adts_aac_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     int ret, fsize;
 
-    ret = av_get_packet(s->pb, pkt, ADTS_HEADER_SIZE);
+    // Parse all the ID3 headers between frames
+    while (1) {
+        ret = av_get_packet(s->pb, pkt, FFMAX(ID3v2_HEADER_SIZE, ADTS_HEADER_SIZE));
+        if (ret >= ID3v2_HEADER_SIZE && ff_id3v2_match(pkt->data, ID3v2_DEFAULT_MAGIC)) {
+            if ((ret = handle_id3(s, pkt)) >= 0) {
+                continue;
+            }
+        }
+        break;
+    }
+
     if (ret < 0)
         return ret;
+
     if (ret < ADTS_HEADER_SIZE) {
         av_packet_unref(pkt);
         return AVERROR(EIO);
@@ -139,7 +184,7 @@
         return AVERROR_INVALIDDATA;
     }
 
-    ret = av_append_packet(s->pb, pkt, fsize - ADTS_HEADER_SIZE);
+    ret = av_append_packet(s->pb, pkt, fsize - pkt->size);
     if (ret < 0)
         av_packet_unref(pkt);
 

diff --git a/libavformat/aadec.c b/libavformat/aadec.c
index 8d39b1d..d83f283 100644
--- a/libavformat/aadec.c
+++ b/libavformat/aadec.c

@@ -35,6 +35,9 @@
 #define MAX_TOC_ENTRIES 16
 #define MAX_DICTIONARY_ENTRIES 128
 #define TEA_BLOCK_SIZE 8
+#define CHAPTER_HEADER_SIZE 8
+#define TIMEPREC 1000
+#define MP3_FRAME_SIZE 104
 
 typedef struct AADemuxContext {
     AVClass *class;
@@ -46,6 +49,9 @@
     struct AVTEA *tea_ctx;
     uint8_t file_key[16];
     int64_t current_chapter_size;
+    int64_t content_start;
+    int64_t content_end;
+    int seek_offset;
 } AADemuxContext;
 
 static int get_second_size(char *codec_name)
@@ -69,7 +75,7 @@
     uint32_t nkey, nval, toc_size, npairs, header_seed = 0, start;
     char key[128], val[128], codec_name[64] = {0};
     uint8_t output[24], dst[8], src[8];
-    int64_t largest_size = -1, current_size = -1;
+    int64_t largest_size = -1, current_size = -1, chapter_pos;
     struct toc_entry {
         uint32_t offset;
         uint32_t size;
@@ -172,19 +178,24 @@
         st->codecpar->codec_id = AV_CODEC_ID_MP3;
         st->codecpar->sample_rate = 22050;
         st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
-        st->start_time = 0;
+        avpriv_set_pts_info(st, 64, 8, 32000 * TIMEPREC);
+        // encoded audio frame is MP3_FRAME_SIZE bytes (+1 with padding, unlikely)
     } else if (!strcmp(codec_name, "acelp85")) {
         st->codecpar->codec_id = AV_CODEC_ID_SIPR;
         st->codecpar->block_align = 19;
         st->codecpar->channels = 1;
         st->codecpar->sample_rate = 8500;
+        st->codecpar->bit_rate = 8500;
         st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
+        avpriv_set_pts_info(st, 64, 8, 8500 * TIMEPREC);
     } else if (!strcmp(codec_name, "acelp16")) {
         st->codecpar->codec_id = AV_CODEC_ID_SIPR;
         st->codecpar->block_align = 20;
         st->codecpar->channels = 1;
         st->codecpar->sample_rate = 16000;
+        st->codecpar->bit_rate = 16000;
         st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
+        avpriv_set_pts_info(st, 64, 8, 16000 * TIMEPREC);
     }
 
     /* determine, and jump to audio start offset */
@@ -197,7 +208,30 @@
     }
     start = TOC[largest_idx].offset;
     avio_seek(pb, start, SEEK_SET);
+
+    // extract chapter positions. since all formats have constant bit rate, use it
+    // as time base in bytes/s, for easy stream position <-> timestamp conversion
+    st->start_time = 0;
+    c->content_start = start;
+    c->content_end = start + largest_size;
+
+    while ((chapter_pos = avio_tell(pb)) >= 0 && chapter_pos < c->content_end) {
+        int chapter_idx = s->nb_chapters;
+        uint32_t chapter_size = avio_rb32(pb);
+        if (chapter_size == 0) break;
+        chapter_pos -= start + CHAPTER_HEADER_SIZE * chapter_idx;
+        avio_skip(pb, 4 + chapter_size);
+        if (!avpriv_new_chapter(s, chapter_idx, st->time_base,
+            chapter_pos * TIMEPREC, (chapter_pos + chapter_size) * TIMEPREC, NULL))
+                return AVERROR(ENOMEM);
+    }
+
+    st->duration = (largest_size - CHAPTER_HEADER_SIZE * s->nb_chapters) * TIMEPREC;
+
+    ff_update_cur_dts(s, st, 0);
+    avio_seek(pb, start, SEEK_SET);
     c->current_chapter_size = 0;
+    c->seek_offset = 0;
 
     return 0;
 }
@@ -213,6 +247,12 @@
     int written = 0;
     int ret;
     AADemuxContext *c = s->priv_data;
+    uint64_t pos = avio_tell(s->pb);
+
+    // are we at the end of the audio content?
+    if (pos >= c->content_end) {
+        return AVERROR_EOF;
+    }
 
     // are we at the start of a chapter?
     if (c->current_chapter_size == 0) {
@@ -223,6 +263,7 @@
         av_log(s, AV_LOG_DEBUG, "Chapter %d (%" PRId64 " bytes)\n", c->chapter_idx, c->current_chapter_size);
         c->chapter_idx = c->chapter_idx + 1;
         avio_skip(s->pb, 4); // data start offset
+        pos += 8;
         c->current_codec_second_size = c->codec_second_size;
     }
 
@@ -234,7 +275,9 @@
     // decrypt c->current_codec_second_size bytes
     blocks = c->current_codec_second_size / TEA_BLOCK_SIZE;
     for (i = 0; i < blocks; i++) {
-        avio_read(s->pb, src, TEA_BLOCK_SIZE);
+        ret = avio_read(s->pb, src, TEA_BLOCK_SIZE);
+        if (ret != TEA_BLOCK_SIZE)
+            return (ret < 0) ? ret : AVERROR_EOF;
         av_tea_init(c->tea_ctx, c->file_key, 16);
         av_tea_crypt(c->tea_ctx, dst, src, 1, NULL, 1);
         memcpy(buf + written, dst, TEA_BLOCK_SIZE);
@@ -242,7 +285,9 @@
     }
     trailing_bytes = c->current_codec_second_size % TEA_BLOCK_SIZE;
     if (trailing_bytes != 0) { // trailing bytes are left unencrypted!
-        avio_read(s->pb, src, trailing_bytes);
+        ret = avio_read(s->pb, src, trailing_bytes);
+        if (ret != trailing_bytes)
+            return (ret < 0) ? ret : AVERROR_EOF;
         memcpy(buf + written, src, trailing_bytes);
         written = written + trailing_bytes;
     }
@@ -252,14 +297,69 @@
     if (c->current_chapter_size <= 0)
         c->current_chapter_size = 0;
 
-    ret = av_new_packet(pkt, written);
+    if (c->seek_offset > written)
+        c->seek_offset = 0; // ignore wrong estimate
+
+    ret = av_new_packet(pkt, written - c->seek_offset);
     if (ret < 0)
         return ret;
-    memcpy(pkt->data, buf, written);
+    memcpy(pkt->data, buf + c->seek_offset, written - c->seek_offset);
+    pkt->pos = pos;
 
+    c->seek_offset = 0;
     return 0;
 }
 
+static int aa_read_seek(AVFormatContext *s,
+                        int stream_index, int64_t timestamp, int flags)
+{
+    AADemuxContext *c = s->priv_data;
+    AVChapter *ch;
+    int64_t chapter_pos, chapter_start, chapter_size;
+    int chapter_idx = 0;
+
+    // find chapter containing seek timestamp
+    if (timestamp < 0)
+        timestamp = 0;
+
+    while (chapter_idx < s->nb_chapters && timestamp >= s->chapters[chapter_idx]->end) {
+        ++chapter_idx;
+    }
+
+    if (chapter_idx >= s->nb_chapters) {
+        chapter_idx = s->nb_chapters - 1;
+        if (chapter_idx < 0) return -1; // there is no chapter.
+        timestamp = s->chapters[chapter_idx]->end;
+    }
+
+    ch = s->chapters[chapter_idx];
+
+    // sync by clamping timestamp to nearest valid block position in its chapter
+    chapter_size = ch->end / TIMEPREC - ch->start / TIMEPREC;
+    chapter_pos = av_rescale_rnd((timestamp - ch->start) / TIMEPREC,
+        1, c->codec_second_size,
+        (flags & AVSEEK_FLAG_BACKWARD) ? AV_ROUND_DOWN : AV_ROUND_UP)
+        * c->codec_second_size;
+    if (chapter_pos >= chapter_size)
+        chapter_pos = chapter_size;
+    chapter_start = c->content_start + (ch->start / TIMEPREC) + CHAPTER_HEADER_SIZE * (1 + chapter_idx);
+
+    // reinit read state
+    avio_seek(s->pb, chapter_start + chapter_pos, SEEK_SET);
+    c->current_codec_second_size = c->codec_second_size;
+    c->current_chapter_size = chapter_size - chapter_pos;
+    c->chapter_idx = 1 + chapter_idx;
+
+    // for unaligned frames, estimate offset of first frame in block (assume no padding)
+    if (s->streams[0]->codecpar->codec_id == AV_CODEC_ID_MP3) {
+        c->seek_offset = (MP3_FRAME_SIZE - chapter_pos % MP3_FRAME_SIZE) % MP3_FRAME_SIZE;
+    }
+
+    ff_update_cur_dts(s, s->streams[0], ch->start + (chapter_pos + c->seek_offset) * TIMEPREC);
+
+    return 1;
+}
+
 static int aa_probe(AVProbeData *p)
 {
     uint8_t *buf = p->buf;
@@ -305,6 +405,7 @@
     .read_probe     = aa_probe,
     .read_header    = aa_read_header,
     .read_packet    = aa_read_packet,
+    .read_seek      = aa_read_seek,
     .read_close     = aa_read_close,
-    .flags          = AVFMT_GENERIC_INDEX,
+    .flags          = AVFMT_NO_BYTE_SEEK | AVFMT_NOGENSEARCH,
 };

diff --git a/libavformat/ac3dec.c b/libavformat/ac3dec.c
index e85b0ac..6f423ff 100644
--- a/libavformat/ac3dec.c
+++ b/libavformat/ac3dec.c

@@ -19,6 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/avassert.h"
 #include "libavutil/crc.h"
 #include "libavcodec/ac3_parser.h"
 #include "avformat.h"
@@ -28,8 +29,6 @@
 {
     int max_frames, first_frames = 0, frames;
     const uint8_t *buf, *buf2, *end;
-    AC3HeaderInfo *phdr = NULL;
-    GetBitContext gbc;
     enum AVCodecID codec_id = AV_CODEC_ID_AC3;
 
     max_frames = 0;
@@ -44,39 +43,49 @@
 
         for(frames = 0; buf2 < end; frames++) {
             uint8_t buf3[4096];
-            int i;
-            if(!memcmp(buf2, "\x1\x10\0\0\0\0\0\0", 8))
+            uint8_t bitstream_id;
+            uint16_t frame_size;
+            int i, ret;
+
+            if(!memcmp(buf2, "\x1\x10\0\0\0\0\0\0", 8)) {
+                if (buf2 + 16 > end)
+                    break;
                 buf2+=16;
+            }
             if (buf[0] == 0x77 && buf[1] == 0x0B) {
                 for(i=0; i<8; i+=2) {
                     buf3[i  ] = buf2[i+1];
                     buf3[i+1] = buf2[i  ];
                 }
-                init_get_bits(&gbc, buf3, 54);
+                ret = av_ac3_parse_header(buf3, 8, &bitstream_id,
+                                          &frame_size);
             }else
-                init_get_bits(&gbc, buf2, 54);
-            if(avpriv_ac3_parse_header(&gbc, &phdr) < 0)
+                ret = av_ac3_parse_header(buf2, end - buf2, &bitstream_id,
+                                          &frame_size);
+            if (ret < 0)
                 break;
-            if(buf2 + phdr->frame_size > end)
+            if(buf2 + frame_size > end)
                 break;
             if (buf[0] == 0x77 && buf[1] == 0x0B) {
-                av_assert0(phdr->frame_size <= sizeof(buf3));
-                for(i=8; i<phdr->frame_size; i+=2) {
+                av_assert0(frame_size <= sizeof(buf3));
+                for(i = 8; i < frame_size; i += 2) {
                     buf3[i  ] = buf2[i+1];
                     buf3[i+1] = buf2[i  ];
                 }
+                if (av_crc(av_crc_get_table(AV_CRC_16_ANSI), 0, buf3 + 2, frame_size - 2))
+                    break;
+            } else {
+                if (av_crc(av_crc_get_table(AV_CRC_16_ANSI), 0, buf2 + 2, frame_size - 2))
+                    break;
             }
-            if(av_crc(av_crc_get_table(AV_CRC_16_ANSI), 0, gbc.buffer + 2, phdr->frame_size - 2))
-                break;
-            if (phdr->bitstream_id > 10)
+            if (bitstream_id > 10)
                 codec_id = AV_CODEC_ID_EAC3;
-            buf2 += phdr->frame_size;
+            buf2 += frame_size;
         }
         max_frames = FFMAX(max_frames, frames);
         if(buf == p->buf)
             first_frames = frames;
     }
-    av_freep(&phdr);
     if(codec_id != expected_codec_id) return 0;
     // keep this in sync with mp3 probe, both need to avoid
     // issues with MPEG-files!

diff --git a/libavformat/adtsenc.c b/libavformat/adtsenc.c
index a046c2f..3c2840c 100644
--- a/libavformat/adtsenc.c
+++ b/libavformat/adtsenc.c

@@ -85,7 +85,7 @@
         init_put_bits(&pb, adts->pce_data, MAX_PCE_SIZE);
 
         put_bits(&pb, 3, 5); //ID_PCE
-        adts->pce_size = (avpriv_copy_pce_data(&pb, &gb) + 3) / 8;
+        adts->pce_size = (ff_copy_pce_data(&pb, &gb) + 3) / 8;
         flush_put_bits(&pb);
     }
 
@@ -94,13 +94,15 @@
     return 0;
 }
 
-static int adts_write_header(AVFormatContext *s)
+static int adts_init(AVFormatContext *s)
 {
     ADTSContext *adts = s->priv_data;
     AVCodecParameters *par = s->streams[0]->codecpar;
 
-    if (adts->id3v2tag)
-        ff_id3v2_write_simple(s, 4, ID3v2_DEFAULT_MAGIC);
+    if (par->codec_id != AV_CODEC_ID_AAC) {
+        av_log(s, AV_LOG_ERROR, "Only AAC streams can be muxed by the ADTS muxer\n");
+        return AVERROR(EINVAL);
+    }
     if (par->extradata_size > 0)
         return adts_decode_extradata(s, adts, par->extradata,
                                      par->extradata_size);
@@ -108,6 +110,16 @@
     return 0;
 }
 
+static int adts_write_header(AVFormatContext *s)
+{
+    ADTSContext *adts = s->priv_data;
+
+    if (adts->id3v2tag)
+        ff_id3v2_write_simple(s, 4, ID3v2_DEFAULT_MAGIC);
+
+    return 0;
+}
+
 static int adts_write_frame_header(ADTSContext *ctx,
                                    uint8_t *buf, int size, int pce_size)
 {
@@ -220,6 +232,7 @@
     .priv_data_size    = sizeof(ADTSContext),
     .audio_codec       = AV_CODEC_ID_AAC,
     .video_codec       = AV_CODEC_ID_NONE,
+    .init              = adts_init,
     .write_header      = adts_write_header,
     .write_packet      = adts_write_packet,
     .write_trailer     = adts_write_trailer,

diff --git a/libavformat/aiffdec.c b/libavformat/aiffdec.c
index 99e05c7..7c701e0 100644
--- a/libavformat/aiffdec.c
+++ b/libavformat/aiffdec.c

@@ -81,11 +81,10 @@
             av_free(str);
             return;
         }
-        size += (size&1)-res;
+        size -= res;
         str[res] = 0;
         av_dict_set(&s->metadata, key, str, AV_DICT_DONT_STRDUP_VAL);
-    }else
-        size+= size&1;
+    }
 
     avio_skip(s->pb, size);
 }
@@ -325,6 +324,16 @@
             if(ff_mov_read_chan(s, pb, st, size) < 0)
                 return AVERROR_INVALIDDATA;
             break;
+        case MKTAG('A','P','C','M'): /* XA ADPCM compressed sound chunk */
+            st->codecpar->codec_id = AV_CODEC_ID_ADPCM_XA;
+            aiff->data_end = avio_tell(pb) + size;
+            offset = avio_tell(pb) + 8;
+            /* This field is unknown and its data seems to be irrelevant */
+            avio_rb32(pb);
+            st->codecpar->block_align = avio_rb32(pb);
+
+            goto got_sound;
+            break;
         case 0:
             if (offset > 0 && st->codecpar->block_align) // COMM && SSND
                 goto got_sound;

diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index 405ddb5..9e41718 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c

@@ -20,376 +20,606 @@
  */
 
 #include "libavutil/thread.h"
+#include "libavformat/internal.h"
 #include "avformat.h"
 #include "rtp.h"
 #include "rdt.h"
 #include "url.h"
 #include "version.h"
 
-#define REGISTER_MUXER(X, x)                                            \
-    {                                                                   \
-        extern AVOutputFormat ff_##x##_muxer;                           \
-        if (CONFIG_##X##_MUXER)                                         \
-            av_register_output_format(&ff_##x##_muxer);                 \
-    }
+/* (de)muxers */
+extern AVOutputFormat ff_a64_muxer;
+extern AVInputFormat  ff_aa_demuxer;
+extern AVInputFormat  ff_aac_demuxer;
+extern AVInputFormat  ff_ac3_demuxer;
+extern AVOutputFormat ff_ac3_muxer;
+extern AVInputFormat  ff_acm_demuxer;
+extern AVInputFormat  ff_act_demuxer;
+extern AVInputFormat  ff_adf_demuxer;
+extern AVInputFormat  ff_adp_demuxer;
+extern AVInputFormat  ff_ads_demuxer;
+extern AVOutputFormat ff_adts_muxer;
+extern AVInputFormat  ff_adx_demuxer;
+extern AVOutputFormat ff_adx_muxer;
+extern AVInputFormat  ff_aea_demuxer;
+extern AVInputFormat  ff_afc_demuxer;
+extern AVInputFormat  ff_aiff_demuxer;
+extern AVOutputFormat ff_aiff_muxer;
+extern AVInputFormat  ff_aix_demuxer;
+extern AVInputFormat  ff_amr_demuxer;
+extern AVOutputFormat ff_amr_muxer;
+extern AVInputFormat  ff_amrnb_demuxer;
+extern AVInputFormat  ff_amrwb_demuxer;
+extern AVInputFormat  ff_anm_demuxer;
+extern AVInputFormat  ff_apc_demuxer;
+extern AVInputFormat  ff_ape_demuxer;
+extern AVInputFormat  ff_apng_demuxer;
+extern AVOutputFormat ff_apng_muxer;
+extern AVInputFormat  ff_aptx_demuxer;
+extern AVOutputFormat ff_aptx_muxer;
+extern AVInputFormat  ff_aptx_hd_demuxer;
+extern AVOutputFormat ff_aptx_hd_muxer;
+extern AVInputFormat  ff_aqtitle_demuxer;
+extern AVInputFormat  ff_asf_demuxer;
+extern AVOutputFormat ff_asf_muxer;
+extern AVInputFormat  ff_asf_o_demuxer;
+extern AVInputFormat  ff_ass_demuxer;
+extern AVOutputFormat ff_ass_muxer;
+extern AVInputFormat  ff_ast_demuxer;
+extern AVOutputFormat ff_ast_muxer;
+extern AVOutputFormat ff_asf_stream_muxer;
+extern AVInputFormat  ff_au_demuxer;
+extern AVOutputFormat ff_au_muxer;
+extern AVInputFormat  ff_avi_demuxer;
+extern AVOutputFormat ff_avi_muxer;
+extern AVInputFormat  ff_avisynth_demuxer;
+extern AVOutputFormat ff_avm2_muxer;
+extern AVInputFormat  ff_avr_demuxer;
+extern AVInputFormat  ff_avs_demuxer;
+extern AVInputFormat  ff_avs2_demuxer;
+extern AVOutputFormat ff_avs2_muxer;
+extern AVInputFormat  ff_bethsoftvid_demuxer;
+extern AVInputFormat  ff_bfi_demuxer;
+extern AVInputFormat  ff_bintext_demuxer;
+extern AVInputFormat  ff_bink_demuxer;
+extern AVInputFormat  ff_bit_demuxer;
+extern AVOutputFormat ff_bit_muxer;
+extern AVInputFormat  ff_bmv_demuxer;
+extern AVInputFormat  ff_bfstm_demuxer;
+extern AVInputFormat  ff_brstm_demuxer;
+extern AVInputFormat  ff_boa_demuxer;
+extern AVInputFormat  ff_c93_demuxer;
+extern AVInputFormat  ff_caf_demuxer;
+extern AVOutputFormat ff_caf_muxer;
+extern AVInputFormat  ff_cavsvideo_demuxer;
+extern AVOutputFormat ff_cavsvideo_muxer;
+extern AVInputFormat  ff_cdg_demuxer;
+extern AVInputFormat  ff_cdxl_demuxer;
+extern AVInputFormat  ff_cine_demuxer;
+extern AVInputFormat  ff_codec2_demuxer;
+extern AVOutputFormat ff_codec2_muxer;
+extern AVInputFormat  ff_codec2raw_demuxer;
+extern AVOutputFormat ff_codec2raw_muxer;
+extern AVInputFormat  ff_concat_demuxer;
+extern AVOutputFormat ff_crc_muxer;
+extern AVInputFormat  ff_dash_demuxer;
+extern AVOutputFormat ff_dash_muxer;
+extern AVInputFormat  ff_data_demuxer;
+extern AVOutputFormat ff_data_muxer;
+extern AVInputFormat  ff_daud_demuxer;
+extern AVOutputFormat ff_daud_muxer;
+extern AVInputFormat  ff_dcstr_demuxer;
+extern AVInputFormat  ff_dfa_demuxer;
+extern AVInputFormat  ff_dirac_demuxer;
+extern AVOutputFormat ff_dirac_muxer;
+extern AVInputFormat  ff_dnxhd_demuxer;
+extern AVOutputFormat ff_dnxhd_muxer;
+extern AVInputFormat  ff_dsf_demuxer;
+extern AVInputFormat  ff_dsicin_demuxer;
+extern AVInputFormat  ff_dss_demuxer;
+extern AVInputFormat  ff_dts_demuxer;
+extern AVOutputFormat ff_dts_muxer;
+extern AVInputFormat  ff_dtshd_demuxer;
+extern AVInputFormat  ff_dv_demuxer;
+extern AVOutputFormat ff_dv_muxer;
+extern AVInputFormat  ff_dvbsub_demuxer;
+extern AVInputFormat  ff_dvbtxt_demuxer;
+extern AVInputFormat  ff_dxa_demuxer;
+extern AVInputFormat  ff_ea_demuxer;
+extern AVInputFormat  ff_ea_cdata_demuxer;
+extern AVInputFormat  ff_eac3_demuxer;
+extern AVOutputFormat ff_eac3_muxer;
+extern AVInputFormat  ff_epaf_demuxer;
+extern AVOutputFormat ff_f4v_muxer;
+extern AVInputFormat  ff_ffmetadata_demuxer;
+extern AVOutputFormat ff_ffmetadata_muxer;
+extern AVOutputFormat ff_fifo_muxer;
+extern AVOutputFormat ff_fifo_test_muxer;
+extern AVInputFormat  ff_filmstrip_demuxer;
+extern AVOutputFormat ff_filmstrip_muxer;
+extern AVInputFormat  ff_fits_demuxer;
+extern AVOutputFormat ff_fits_muxer;
+extern AVInputFormat  ff_flac_demuxer;
+extern AVOutputFormat ff_flac_muxer;
+extern AVInputFormat  ff_flic_demuxer;
+extern AVInputFormat  ff_flv_demuxer;
+extern AVOutputFormat ff_flv_muxer;
+extern AVInputFormat  ff_live_flv_demuxer;
+extern AVInputFormat  ff_fourxm_demuxer;
+extern AVOutputFormat ff_framecrc_muxer;
+extern AVOutputFormat ff_framehash_muxer;
+extern AVOutputFormat ff_framemd5_muxer;
+extern AVInputFormat  ff_frm_demuxer;
+extern AVInputFormat  ff_fsb_demuxer;
+extern AVInputFormat  ff_g722_demuxer;
+extern AVOutputFormat ff_g722_muxer;
+extern AVInputFormat  ff_g723_1_demuxer;
+extern AVOutputFormat ff_g723_1_muxer;
+extern AVInputFormat  ff_g726_demuxer;
+extern AVOutputFormat ff_g726_muxer;
+extern AVInputFormat  ff_g726le_demuxer;
+extern AVOutputFormat ff_g726le_muxer;
+extern AVInputFormat  ff_g729_demuxer;
+extern AVInputFormat  ff_gdv_demuxer;
+extern AVInputFormat  ff_genh_demuxer;
+extern AVInputFormat  ff_gif_demuxer;
+extern AVOutputFormat ff_gif_muxer;
+extern AVInputFormat  ff_gsm_demuxer;
+extern AVOutputFormat ff_gsm_muxer;
+extern AVInputFormat  ff_gxf_demuxer;
+extern AVOutputFormat ff_gxf_muxer;
+extern AVInputFormat  ff_h261_demuxer;
+extern AVOutputFormat ff_h261_muxer;
+extern AVInputFormat  ff_h263_demuxer;
+extern AVOutputFormat ff_h263_muxer;
+extern AVInputFormat  ff_h264_demuxer;
+extern AVOutputFormat ff_h264_muxer;
+extern AVOutputFormat ff_hash_muxer;
+extern AVOutputFormat ff_hds_muxer;
+extern AVInputFormat  ff_hevc_demuxer;
+extern AVOutputFormat ff_hevc_muxer;
+extern AVInputFormat  ff_hls_demuxer;
+extern AVOutputFormat ff_hls_muxer;
+extern AVInputFormat  ff_hnm_demuxer;
+extern AVInputFormat  ff_ico_demuxer;
+extern AVOutputFormat ff_ico_muxer;
+extern AVInputFormat  ff_idcin_demuxer;
+extern AVInputFormat  ff_idf_demuxer;
+extern AVInputFormat  ff_iff_demuxer;
+extern AVInputFormat  ff_ilbc_demuxer;
+extern AVOutputFormat ff_ilbc_muxer;
+extern AVInputFormat  ff_image2_demuxer;
+extern AVOutputFormat ff_image2_muxer;
+extern AVInputFormat  ff_image2pipe_demuxer;
+extern AVOutputFormat ff_image2pipe_muxer;
+extern AVInputFormat  ff_image2_alias_pix_demuxer;
+extern AVInputFormat  ff_image2_brender_pix_demuxer;
+extern AVInputFormat  ff_ingenient_demuxer;
+extern AVInputFormat  ff_ipmovie_demuxer;
+extern AVOutputFormat ff_ipod_muxer;
+extern AVInputFormat  ff_ircam_demuxer;
+extern AVOutputFormat ff_ircam_muxer;
+extern AVOutputFormat ff_ismv_muxer;
+extern AVInputFormat  ff_iss_demuxer;
+extern AVInputFormat  ff_iv8_demuxer;
+extern AVInputFormat  ff_ivf_demuxer;
+extern AVOutputFormat ff_ivf_muxer;
+extern AVInputFormat  ff_ivr_demuxer;
+extern AVInputFormat  ff_jacosub_demuxer;
+extern AVOutputFormat ff_jacosub_muxer;
+extern AVInputFormat  ff_jv_demuxer;
+extern AVOutputFormat ff_latm_muxer;
+extern AVInputFormat  ff_lmlm4_demuxer;
+extern AVInputFormat  ff_loas_demuxer;
+extern AVInputFormat  ff_lrc_demuxer;
+extern AVOutputFormat ff_lrc_muxer;
+extern AVInputFormat  ff_lvf_demuxer;
+extern AVInputFormat  ff_lxf_demuxer;
+extern AVInputFormat  ff_m4v_demuxer;
+extern AVOutputFormat ff_m4v_muxer;
+extern AVOutputFormat ff_md5_muxer;
+extern AVInputFormat  ff_matroska_demuxer;
+extern AVOutputFormat ff_matroska_muxer;
+extern AVOutputFormat ff_matroska_audio_muxer;
+extern AVInputFormat  ff_mgsts_demuxer;
+extern AVInputFormat  ff_microdvd_demuxer;
+extern AVOutputFormat ff_microdvd_muxer;
+extern AVInputFormat  ff_mjpeg_demuxer;
+extern AVOutputFormat ff_mjpeg_muxer;
+extern AVInputFormat  ff_mjpeg_2000_demuxer;
+extern AVInputFormat  ff_mlp_demuxer;
+extern AVOutputFormat ff_mlp_muxer;
+extern AVInputFormat  ff_mlv_demuxer;
+extern AVInputFormat  ff_mm_demuxer;
+extern AVInputFormat  ff_mmf_demuxer;
+extern AVOutputFormat ff_mmf_muxer;
+extern AVInputFormat  ff_mov_demuxer;
+extern AVOutputFormat ff_mov_muxer;
+extern AVOutputFormat ff_mp2_muxer;
+extern AVInputFormat  ff_mp3_demuxer;
+extern AVOutputFormat ff_mp3_muxer;
+extern AVOutputFormat ff_mp4_muxer;
+extern AVInputFormat  ff_mpc_demuxer;
+extern AVInputFormat  ff_mpc8_demuxer;
+extern AVOutputFormat ff_mpeg1system_muxer;
+extern AVOutputFormat ff_mpeg1vcd_muxer;
+extern AVOutputFormat ff_mpeg1video_muxer;
+extern AVOutputFormat ff_mpeg2dvd_muxer;
+extern AVOutputFormat ff_mpeg2svcd_muxer;
+extern AVOutputFormat ff_mpeg2video_muxer;
+extern AVOutputFormat ff_mpeg2vob_muxer;
+extern AVInputFormat  ff_mpegps_demuxer;
+extern AVInputFormat  ff_mpegts_demuxer;
+extern AVOutputFormat ff_mpegts_muxer;
+extern AVInputFormat  ff_mpegtsraw_demuxer;
+extern AVInputFormat  ff_mpegvideo_demuxer;
+extern AVInputFormat  ff_mpjpeg_demuxer;
+extern AVOutputFormat ff_mpjpeg_muxer;
+extern AVInputFormat  ff_mpl2_demuxer;
+extern AVInputFormat  ff_mpsub_demuxer;
+extern AVInputFormat  ff_msf_demuxer;
+extern AVInputFormat  ff_msnwc_tcp_demuxer;
+extern AVInputFormat  ff_mtaf_demuxer;
+extern AVInputFormat  ff_mtv_demuxer;
+extern AVInputFormat  ff_musx_demuxer;
+extern AVInputFormat  ff_mv_demuxer;
+extern AVInputFormat  ff_mvi_demuxer;
+extern AVInputFormat  ff_mxf_demuxer;
+extern AVOutputFormat ff_mxf_muxer;
+extern AVOutputFormat ff_mxf_d10_muxer;
+extern AVOutputFormat ff_mxf_opatom_muxer;
+extern AVInputFormat  ff_mxg_demuxer;
+extern AVInputFormat  ff_nc_demuxer;
+extern AVInputFormat  ff_nistsphere_demuxer;
+extern AVInputFormat  ff_nsp_demuxer;
+extern AVInputFormat  ff_nsv_demuxer;
+extern AVOutputFormat ff_null_muxer;
+extern AVInputFormat  ff_nut_demuxer;
+extern AVOutputFormat ff_nut_muxer;
+extern AVInputFormat  ff_nuv_demuxer;
+extern AVOutputFormat ff_oga_muxer;
+extern AVInputFormat  ff_ogg_demuxer;
+extern AVOutputFormat ff_ogg_muxer;
+extern AVOutputFormat ff_ogv_muxer;
+extern AVInputFormat  ff_oma_demuxer;
+extern AVOutputFormat ff_oma_muxer;
+extern AVOutputFormat ff_opus_muxer;
+extern AVInputFormat  ff_paf_demuxer;
+extern AVInputFormat  ff_pcm_alaw_demuxer;
+extern AVOutputFormat ff_pcm_alaw_muxer;
+extern AVInputFormat  ff_pcm_mulaw_demuxer;
+extern AVOutputFormat ff_pcm_mulaw_muxer;
+extern AVInputFormat  ff_pcm_f64be_demuxer;
+extern AVOutputFormat ff_pcm_f64be_muxer;
+extern AVInputFormat  ff_pcm_f64le_demuxer;
+extern AVOutputFormat ff_pcm_f64le_muxer;
+extern AVInputFormat  ff_pcm_f32be_demuxer;
+extern AVOutputFormat ff_pcm_f32be_muxer;
+extern AVInputFormat  ff_pcm_f32le_demuxer;
+extern AVOutputFormat ff_pcm_f32le_muxer;
+extern AVInputFormat  ff_pcm_s32be_demuxer;
+extern AVOutputFormat ff_pcm_s32be_muxer;
+extern AVInputFormat  ff_pcm_s32le_demuxer;
+extern AVOutputFormat ff_pcm_s32le_muxer;
+extern AVInputFormat  ff_pcm_s24be_demuxer;
+extern AVOutputFormat ff_pcm_s24be_muxer;
+extern AVInputFormat  ff_pcm_s24le_demuxer;
+extern AVOutputFormat ff_pcm_s24le_muxer;
+extern AVInputFormat  ff_pcm_s16be_demuxer;
+extern AVOutputFormat ff_pcm_s16be_muxer;
+extern AVInputFormat  ff_pcm_s16le_demuxer;
+extern AVOutputFormat ff_pcm_s16le_muxer;
+extern AVInputFormat  ff_pcm_s8_demuxer;
+extern AVOutputFormat ff_pcm_s8_muxer;
+extern AVInputFormat  ff_pcm_u32be_demuxer;
+extern AVOutputFormat ff_pcm_u32be_muxer;
+extern AVInputFormat  ff_pcm_u32le_demuxer;
+extern AVOutputFormat ff_pcm_u32le_muxer;
+extern AVInputFormat  ff_pcm_u24be_demuxer;
+extern AVOutputFormat ff_pcm_u24be_muxer;
+extern AVInputFormat  ff_pcm_u24le_demuxer;
+extern AVOutputFormat ff_pcm_u24le_muxer;
+extern AVInputFormat  ff_pcm_u16be_demuxer;
+extern AVOutputFormat ff_pcm_u16be_muxer;
+extern AVInputFormat  ff_pcm_u16le_demuxer;
+extern AVOutputFormat ff_pcm_u16le_muxer;
+extern AVInputFormat  ff_pcm_u8_demuxer;
+extern AVOutputFormat ff_pcm_u8_muxer;
+extern AVInputFormat  ff_pjs_demuxer;
+extern AVInputFormat  ff_pmp_demuxer;
+extern AVOutputFormat ff_psp_muxer;
+extern AVInputFormat  ff_pva_demuxer;
+extern AVInputFormat  ff_pvf_demuxer;
+extern AVInputFormat  ff_qcp_demuxer;
+extern AVInputFormat  ff_r3d_demuxer;
+extern AVInputFormat  ff_rawvideo_demuxer;
+extern AVOutputFormat ff_rawvideo_muxer;
+extern AVInputFormat  ff_realtext_demuxer;
+extern AVInputFormat  ff_redspark_demuxer;
+extern AVInputFormat  ff_rl2_demuxer;
+extern AVInputFormat  ff_rm_demuxer;
+extern AVOutputFormat ff_rm_muxer;
+extern AVInputFormat  ff_roq_demuxer;
+extern AVOutputFormat ff_roq_muxer;
+extern AVInputFormat  ff_rpl_demuxer;
+extern AVInputFormat  ff_rsd_demuxer;
+extern AVInputFormat  ff_rso_demuxer;
+extern AVOutputFormat ff_rso_muxer;
+extern AVInputFormat  ff_rtp_demuxer;
+extern AVOutputFormat ff_rtp_muxer;
+extern AVOutputFormat ff_rtp_mpegts_muxer;
+extern AVInputFormat  ff_rtsp_demuxer;
+extern AVOutputFormat ff_rtsp_muxer;
+extern AVInputFormat  ff_s337m_demuxer;
+extern AVInputFormat  ff_sami_demuxer;
+extern AVInputFormat  ff_sap_demuxer;
+extern AVOutputFormat ff_sap_muxer;
+extern AVInputFormat  ff_sbc_demuxer;
+extern AVOutputFormat ff_sbc_muxer;
+extern AVInputFormat  ff_sbg_demuxer;
+extern AVInputFormat  ff_scc_demuxer;
+extern AVOutputFormat ff_scc_muxer;
+extern AVInputFormat  ff_sdp_demuxer;
+extern AVInputFormat  ff_sdr2_demuxer;
+extern AVInputFormat  ff_sds_demuxer;
+extern AVInputFormat  ff_sdx_demuxer;
+extern AVInputFormat  ff_segafilm_demuxer;
+extern AVOutputFormat ff_segafilm_muxer;
+extern AVOutputFormat ff_segment_muxer;
+extern AVOutputFormat ff_stream_segment_muxer;
+extern AVInputFormat  ff_ser_demuxer;
+extern AVInputFormat  ff_shorten_demuxer;
+extern AVInputFormat  ff_siff_demuxer;
+extern AVOutputFormat ff_singlejpeg_muxer;
+extern AVInputFormat  ff_sln_demuxer;
+extern AVInputFormat  ff_smacker_demuxer;
+extern AVInputFormat  ff_smjpeg_demuxer;
+extern AVOutputFormat ff_smjpeg_muxer;
+extern AVOutputFormat ff_smoothstreaming_muxer;
+extern AVInputFormat  ff_smush_demuxer;
+extern AVInputFormat  ff_sol_demuxer;
+extern AVInputFormat  ff_sox_demuxer;
+extern AVOutputFormat ff_sox_muxer;
+extern AVOutputFormat ff_spx_muxer;
+extern AVInputFormat  ff_spdif_demuxer;
+extern AVOutputFormat ff_spdif_muxer;
+extern AVInputFormat  ff_srt_demuxer;
+extern AVOutputFormat ff_srt_muxer;
+extern AVInputFormat  ff_str_demuxer;
+extern AVInputFormat  ff_stl_demuxer;
+extern AVInputFormat  ff_subviewer1_demuxer;
+extern AVInputFormat  ff_subviewer_demuxer;
+extern AVInputFormat  ff_sup_demuxer;
+extern AVOutputFormat ff_sup_muxer;
+extern AVInputFormat  ff_svag_demuxer;
+extern AVInputFormat  ff_swf_demuxer;
+extern AVOutputFormat ff_swf_muxer;
+extern AVInputFormat  ff_tak_demuxer;
+extern AVOutputFormat ff_tee_muxer;
+extern AVInputFormat  ff_tedcaptions_demuxer;
+extern AVOutputFormat ff_tg2_muxer;
+extern AVOutputFormat ff_tgp_muxer;
+extern AVInputFormat  ff_thp_demuxer;
+extern AVInputFormat  ff_threedostr_demuxer;
+extern AVInputFormat  ff_tiertexseq_demuxer;
+extern AVOutputFormat ff_mkvtimestamp_v2_muxer;
+extern AVInputFormat  ff_tmv_demuxer;
+extern AVInputFormat  ff_truehd_demuxer;
+extern AVOutputFormat ff_truehd_muxer;
+extern AVInputFormat  ff_tta_demuxer;
+extern AVOutputFormat ff_tta_muxer;
+extern AVInputFormat  ff_txd_demuxer;
+extern AVInputFormat  ff_tty_demuxer;
+extern AVInputFormat  ff_ty_demuxer;
+extern AVOutputFormat ff_uncodedframecrc_muxer;
+extern AVInputFormat  ff_v210_demuxer;
+extern AVInputFormat  ff_v210x_demuxer;
+extern AVInputFormat  ff_vag_demuxer;
+extern AVInputFormat  ff_vc1_demuxer;
+extern AVOutputFormat ff_vc1_muxer;
+extern AVInputFormat  ff_vc1t_demuxer;
+extern AVOutputFormat ff_vc1t_muxer;
+extern AVInputFormat  ff_vivo_demuxer;
+extern AVInputFormat  ff_vmd_demuxer;
+extern AVInputFormat  ff_vobsub_demuxer;
+extern AVInputFormat  ff_voc_demuxer;
+extern AVOutputFormat ff_voc_muxer;
+extern AVInputFormat  ff_vpk_demuxer;
+extern AVInputFormat  ff_vplayer_demuxer;
+extern AVInputFormat  ff_vqf_demuxer;
+extern AVInputFormat  ff_w64_demuxer;
+extern AVOutputFormat ff_w64_muxer;
+extern AVInputFormat  ff_wav_demuxer;
+extern AVOutputFormat ff_wav_muxer;
+extern AVInputFormat  ff_wc3_demuxer;
+extern AVOutputFormat ff_webm_muxer;
+extern AVInputFormat  ff_webm_dash_manifest_demuxer;
+extern AVOutputFormat ff_webm_dash_manifest_muxer;
+extern AVOutputFormat ff_webm_chunk_muxer;
+extern AVOutputFormat ff_webp_muxer;
+extern AVInputFormat  ff_webvtt_demuxer;
+extern AVOutputFormat ff_webvtt_muxer;
+extern AVInputFormat  ff_wsaud_demuxer;
+extern AVInputFormat  ff_wsd_demuxer;
+extern AVInputFormat  ff_wsvqa_demuxer;
+extern AVInputFormat  ff_wtv_demuxer;
+extern AVOutputFormat ff_wtv_muxer;
+extern AVInputFormat  ff_wve_demuxer;
+extern AVInputFormat  ff_wv_demuxer;
+extern AVOutputFormat ff_wv_muxer;
+extern AVInputFormat  ff_xa_demuxer;
+extern AVInputFormat  ff_xbin_demuxer;
+extern AVInputFormat  ff_xmv_demuxer;
+extern AVInputFormat  ff_xvag_demuxer;
+extern AVInputFormat  ff_xwma_demuxer;
+extern AVInputFormat  ff_yop_demuxer;
+extern AVInputFormat  ff_yuv4mpegpipe_demuxer;
+extern AVOutputFormat ff_yuv4mpegpipe_muxer;
+/* image demuxers */
+extern AVInputFormat  ff_image_bmp_pipe_demuxer;
+extern AVInputFormat  ff_image_dds_pipe_demuxer;
+extern AVInputFormat  ff_image_dpx_pipe_demuxer;
+extern AVInputFormat  ff_image_exr_pipe_demuxer;
+extern AVInputFormat  ff_image_j2k_pipe_demuxer;
+extern AVInputFormat  ff_image_jpeg_pipe_demuxer;
+extern AVInputFormat  ff_image_jpegls_pipe_demuxer;
+extern AVInputFormat  ff_image_pam_pipe_demuxer;
+extern AVInputFormat  ff_image_pbm_pipe_demuxer;
+extern AVInputFormat  ff_image_pcx_pipe_demuxer;
+extern AVInputFormat  ff_image_pgmyuv_pipe_demuxer;
+extern AVInputFormat  ff_image_pgm_pipe_demuxer;
+extern AVInputFormat  ff_image_pictor_pipe_demuxer;
+extern AVInputFormat  ff_image_png_pipe_demuxer;
+extern AVInputFormat  ff_image_ppm_pipe_demuxer;
+extern AVInputFormat  ff_image_psd_pipe_demuxer;
+extern AVInputFormat  ff_image_qdraw_pipe_demuxer;
+extern AVInputFormat  ff_image_sgi_pipe_demuxer;
+extern AVInputFormat  ff_image_svg_pipe_demuxer;
+extern AVInputFormat  ff_image_sunrast_pipe_demuxer;
+extern AVInputFormat  ff_image_tiff_pipe_demuxer;
+extern AVInputFormat  ff_image_webp_pipe_demuxer;
+extern AVInputFormat  ff_image_xpm_pipe_demuxer;
+extern AVInputFormat  ff_image_xwd_pipe_demuxer;
 
-#define REGISTER_DEMUXER(X, x)                                          \
-    {                                                                   \
-        extern AVInputFormat ff_##x##_demuxer;                          \
-        if (CONFIG_##X##_DEMUXER)                                       \
-            av_register_input_format(&ff_##x##_demuxer);                \
-    }
+/* external libraries */
+extern AVOutputFormat ff_chromaprint_muxer;
+extern AVInputFormat  ff_libgme_demuxer;
+extern AVInputFormat  ff_libmodplug_demuxer;
+extern AVInputFormat  ff_libopenmpt_demuxer;
+extern AVInputFormat  ff_vapoursynth_demuxer;
 
-#define REGISTER_MUXDEMUX(X, x) REGISTER_MUXER(X, x); REGISTER_DEMUXER(X, x)
+#include "libavformat/muxer_list.c"
+#include "libavformat/demuxer_list.c"
 
-static void register_all(void)
+static const AVInputFormat * const *indev_list = NULL;
+static const AVOutputFormat * const *outdev_list = NULL;
+
+const AVOutputFormat *av_muxer_iterate(void **opaque)
 {
-    avcodec_register_all();
+    static const uintptr_t size = sizeof(muxer_list)/sizeof(muxer_list[0]) - 1;
+    uintptr_t i = (uintptr_t)*opaque;
+    const AVOutputFormat *f = NULL;
 
-    /* (de)muxers */
-    REGISTER_MUXER   (A64,              a64);
-    REGISTER_DEMUXER (AA,               aa);
-    REGISTER_DEMUXER (AAC,              aac);
-    REGISTER_MUXDEMUX(AC3,              ac3);
-    REGISTER_DEMUXER (ACM,              acm);
-    REGISTER_DEMUXER (ACT,              act);
-    REGISTER_DEMUXER (ADF,              adf);
-    REGISTER_DEMUXER (ADP,              adp);
-    REGISTER_DEMUXER (ADS,              ads);
-    REGISTER_MUXER   (ADTS,             adts);
-    REGISTER_MUXDEMUX(ADX,              adx);
-    REGISTER_DEMUXER (AEA,              aea);
-    REGISTER_DEMUXER (AFC,              afc);
-    REGISTER_MUXDEMUX(AIFF,             aiff);
-    REGISTER_DEMUXER (AIX,              aix);
-    REGISTER_MUXDEMUX(AMR,              amr);
-    REGISTER_DEMUXER (ANM,              anm);
-    REGISTER_DEMUXER (APC,              apc);
-    REGISTER_DEMUXER (APE,              ape);
-    REGISTER_MUXDEMUX(APNG,             apng);
-    REGISTER_DEMUXER (AQTITLE,          aqtitle);
-    REGISTER_MUXDEMUX(ASF,              asf);
-    REGISTER_DEMUXER (ASF_O,            asf_o);
-    REGISTER_MUXDEMUX(ASS,              ass);
-    REGISTER_MUXDEMUX(AST,              ast);
-    REGISTER_MUXER   (ASF_STREAM,       asf_stream);
-    REGISTER_MUXDEMUX(AU,               au);
-    REGISTER_MUXDEMUX(AVI,              avi);
-    REGISTER_DEMUXER (AVISYNTH,         avisynth);
-    REGISTER_MUXER   (AVM2,             avm2);
-    REGISTER_DEMUXER (AVR,              avr);
-    REGISTER_DEMUXER (AVS,              avs);
-    REGISTER_DEMUXER (BETHSOFTVID,      bethsoftvid);
-    REGISTER_DEMUXER (BFI,              bfi);
-    REGISTER_DEMUXER (BINTEXT,          bintext);
-    REGISTER_DEMUXER (BINK,             bink);
-    REGISTER_MUXDEMUX(BIT,              bit);
-    REGISTER_DEMUXER (BMV,              bmv);
-    REGISTER_DEMUXER (BFSTM,            bfstm);
-    REGISTER_DEMUXER (BRSTM,            brstm);
-    REGISTER_DEMUXER (BOA,              boa);
-    REGISTER_DEMUXER (C93,              c93);
-    REGISTER_MUXDEMUX(CAF,              caf);
-    REGISTER_MUXDEMUX(CAVSVIDEO,        cavsvideo);
-    REGISTER_DEMUXER (CDG,              cdg);
-    REGISTER_DEMUXER (CDXL,             cdxl);
-    REGISTER_DEMUXER (CINE,             cine);
-    REGISTER_DEMUXER (CONCAT,           concat);
-    REGISTER_MUXER   (CRC,              crc);
-    REGISTER_MUXDEMUX(DASH,             dash);
-    REGISTER_MUXDEMUX(DATA,             data);
-    REGISTER_MUXDEMUX(DAUD,             daud);
-    REGISTER_DEMUXER (DCSTR,            dcstr);
-    REGISTER_DEMUXER (DFA,              dfa);
-    REGISTER_MUXDEMUX(DIRAC,            dirac);
-    REGISTER_MUXDEMUX(DNXHD,            dnxhd);
-    REGISTER_DEMUXER (DSF,              dsf);
-    REGISTER_DEMUXER (DSICIN,           dsicin);
-    REGISTER_DEMUXER (DSS,              dss);
-    REGISTER_MUXDEMUX(DTS,              dts);
-    REGISTER_DEMUXER (DTSHD,            dtshd);
-    REGISTER_MUXDEMUX(DV,               dv);
-    REGISTER_DEMUXER (DVBSUB,           dvbsub);
-    REGISTER_DEMUXER (DVBTXT,           dvbtxt);
-    REGISTER_DEMUXER (DXA,              dxa);
-    REGISTER_DEMUXER (EA,               ea);
-    REGISTER_DEMUXER (EA_CDATA,         ea_cdata);
-    REGISTER_MUXDEMUX(EAC3,             eac3);
-    REGISTER_DEMUXER (EPAF,             epaf);
-    REGISTER_MUXER   (F4V,              f4v);
-    REGISTER_MUXDEMUX(FFM,              ffm);
-    REGISTER_MUXDEMUX(FFMETADATA,       ffmetadata);
-    REGISTER_MUXER   (FIFO,             fifo);
-    REGISTER_MUXDEMUX(FILMSTRIP,        filmstrip);
-    REGISTER_MUXDEMUX(FITS,             fits);
-    REGISTER_MUXDEMUX(FLAC,             flac);
-    REGISTER_DEMUXER (FLIC,             flic);
-    REGISTER_MUXDEMUX(FLV,              flv);
-    REGISTER_DEMUXER (LIVE_FLV,         live_flv);
-    REGISTER_DEMUXER (FOURXM,           fourxm);
-    REGISTER_MUXER   (FRAMECRC,         framecrc);
-    REGISTER_MUXER   (FRAMEHASH,        framehash);
-    REGISTER_MUXER   (FRAMEMD5,         framemd5);
-    REGISTER_DEMUXER (FRM,              frm);
-    REGISTER_DEMUXER (FSB,              fsb);
-    REGISTER_MUXDEMUX(G722,             g722);
-    REGISTER_MUXDEMUX(G723_1,           g723_1);
-    REGISTER_MUXDEMUX(G726,             g726);
-    REGISTER_MUXDEMUX(G726LE,           g726le);
-    REGISTER_DEMUXER (G729,             g729);
-    REGISTER_DEMUXER (GDV,              gdv);
-    REGISTER_DEMUXER (GENH,             genh);
-    REGISTER_MUXDEMUX(GIF,              gif);
-    REGISTER_MUXDEMUX(GSM,              gsm);
-    REGISTER_MUXDEMUX(GXF,              gxf);
-    REGISTER_MUXDEMUX(H261,             h261);
-    REGISTER_MUXDEMUX(H263,             h263);
-    REGISTER_MUXDEMUX(H264,             h264);
-    REGISTER_MUXER   (HASH,             hash);
-    REGISTER_MUXER   (HDS,              hds);
-    REGISTER_MUXDEMUX(HEVC,             hevc);
-    REGISTER_MUXDEMUX(HLS,              hls);
-    REGISTER_DEMUXER (HNM,              hnm);
-    REGISTER_MUXDEMUX(ICO,              ico);
-    REGISTER_DEMUXER (IDCIN,            idcin);
-    REGISTER_DEMUXER (IDF,              idf);
-    REGISTER_DEMUXER (IFF,              iff);
-    REGISTER_MUXDEMUX(ILBC,             ilbc);
-    REGISTER_MUXDEMUX(IMAGE2,           image2);
-    REGISTER_MUXDEMUX(IMAGE2PIPE,       image2pipe);
-    REGISTER_DEMUXER (IMAGE2_ALIAS_PIX, image2_alias_pix);
-    REGISTER_DEMUXER (IMAGE2_BRENDER_PIX, image2_brender_pix);
-    REGISTER_DEMUXER (INGENIENT,        ingenient);
-    REGISTER_DEMUXER (IPMOVIE,          ipmovie);
-    REGISTER_MUXER   (IPOD,             ipod);
-    REGISTER_MUXDEMUX(IRCAM,            ircam);
-    REGISTER_MUXER   (ISMV,             ismv);
-    REGISTER_DEMUXER (ISS,              iss);
-    REGISTER_DEMUXER (IV8,              iv8);
-    REGISTER_MUXDEMUX(IVF,              ivf);
-    REGISTER_DEMUXER (IVR,              ivr);
-    REGISTER_MUXDEMUX(JACOSUB,          jacosub);
-    REGISTER_DEMUXER (JV,               jv);
-    REGISTER_MUXER   (LATM,             latm);
-    REGISTER_DEMUXER (LMLM4,            lmlm4);
-    REGISTER_DEMUXER (LOAS,             loas);
-    REGISTER_MUXDEMUX(LRC,              lrc);
-    REGISTER_DEMUXER (LVF,              lvf);
-    REGISTER_DEMUXER (LXF,              lxf);
-    REGISTER_MUXDEMUX(M4V,              m4v);
-    REGISTER_MUXER   (MD5,              md5);
-    REGISTER_MUXDEMUX(MATROSKA,         matroska);
-    REGISTER_MUXER   (MATROSKA_AUDIO,   matroska_audio);
-    REGISTER_DEMUXER (MGSTS,            mgsts);
-    REGISTER_MUXDEMUX(MICRODVD,         microdvd);
-    REGISTER_MUXDEMUX(MJPEG,            mjpeg);
-    REGISTER_DEMUXER (MJPEG_2000,       mjpeg_2000);
-    REGISTER_MUXDEMUX(MLP,              mlp);
-    REGISTER_DEMUXER (MLV,              mlv);
-    REGISTER_DEMUXER (MM,               mm);
-    REGISTER_MUXDEMUX(MMF,              mmf);
-    REGISTER_MUXDEMUX(MOV,              mov);
-    REGISTER_MUXER   (MP2,              mp2);
-    REGISTER_MUXDEMUX(MP3,              mp3);
-    REGISTER_MUXER   (MP4,              mp4);
-    REGISTER_DEMUXER (MPC,              mpc);
-    REGISTER_DEMUXER (MPC8,             mpc8);
-    REGISTER_MUXER   (MPEG1SYSTEM,      mpeg1system);
-    REGISTER_MUXER   (MPEG1VCD,         mpeg1vcd);
-    REGISTER_MUXER   (MPEG1VIDEO,       mpeg1video);
-    REGISTER_MUXER   (MPEG2DVD,         mpeg2dvd);
-    REGISTER_MUXER   (MPEG2SVCD,        mpeg2svcd);
-    REGISTER_MUXER   (MPEG2VIDEO,       mpeg2video);
-    REGISTER_MUXER   (MPEG2VOB,         mpeg2vob);
-    REGISTER_DEMUXER (MPEGPS,           mpegps);
-    REGISTER_MUXDEMUX(MPEGTS,           mpegts);
-    REGISTER_DEMUXER (MPEGTSRAW,        mpegtsraw);
-    REGISTER_DEMUXER (MPEGVIDEO,        mpegvideo);
-    REGISTER_MUXDEMUX(MPJPEG,           mpjpeg);
-    REGISTER_DEMUXER (MPL2,             mpl2);
-    REGISTER_DEMUXER (MPSUB,            mpsub);
-    REGISTER_DEMUXER (MSF,              msf);
-    REGISTER_DEMUXER (MSNWC_TCP,        msnwc_tcp);
-    REGISTER_DEMUXER (MTAF,             mtaf);
-    REGISTER_DEMUXER (MTV,              mtv);
-    REGISTER_DEMUXER (MUSX,             musx);
-    REGISTER_DEMUXER (MV,               mv);
-    REGISTER_DEMUXER (MVI,              mvi);
-    REGISTER_MUXDEMUX(MXF,              mxf);
-    REGISTER_MUXER   (MXF_D10,          mxf_d10);
-    REGISTER_MUXER   (MXF_OPATOM,       mxf_opatom);
-    REGISTER_DEMUXER (MXG,              mxg);
-    REGISTER_DEMUXER (NC,               nc);
-    REGISTER_DEMUXER (NISTSPHERE,       nistsphere);
-    REGISTER_DEMUXER (NSV,              nsv);
-    REGISTER_MUXER   (NULL,             null);
-    REGISTER_MUXDEMUX(NUT,              nut);
-    REGISTER_DEMUXER (NUV,              nuv);
-    REGISTER_MUXER   (OGA,              oga);
-    REGISTER_MUXDEMUX(OGG,              ogg);
-    REGISTER_MUXER   (OGV,              ogv);
-    REGISTER_MUXDEMUX(OMA,              oma);
-    REGISTER_MUXER   (OPUS,             opus);
-    REGISTER_DEMUXER (PAF,              paf);
-    REGISTER_MUXDEMUX(PCM_ALAW,         pcm_alaw);
-    REGISTER_MUXDEMUX(PCM_MULAW,        pcm_mulaw);
-    REGISTER_MUXDEMUX(PCM_F64BE,        pcm_f64be);
-    REGISTER_MUXDEMUX(PCM_F64LE,        pcm_f64le);
-    REGISTER_MUXDEMUX(PCM_F32BE,        pcm_f32be);
-    REGISTER_MUXDEMUX(PCM_F32LE,        pcm_f32le);
-    REGISTER_MUXDEMUX(PCM_S32BE,        pcm_s32be);
-    REGISTER_MUXDEMUX(PCM_S32LE,        pcm_s32le);
-    REGISTER_MUXDEMUX(PCM_S24BE,        pcm_s24be);
-    REGISTER_MUXDEMUX(PCM_S24LE,        pcm_s24le);
-    REGISTER_MUXDEMUX(PCM_S16BE,        pcm_s16be);
-    REGISTER_MUXDEMUX(PCM_S16LE,        pcm_s16le);
-    REGISTER_MUXDEMUX(PCM_S8,           pcm_s8);
-    REGISTER_MUXDEMUX(PCM_U32BE,        pcm_u32be);
-    REGISTER_MUXDEMUX(PCM_U32LE,        pcm_u32le);
-    REGISTER_MUXDEMUX(PCM_U24BE,        pcm_u24be);
-    REGISTER_MUXDEMUX(PCM_U24LE,        pcm_u24le);
-    REGISTER_MUXDEMUX(PCM_U16BE,        pcm_u16be);
-    REGISTER_MUXDEMUX(PCM_U16LE,        pcm_u16le);
-    REGISTER_MUXDEMUX(PCM_U8,           pcm_u8);
-    REGISTER_DEMUXER (PJS,              pjs);
-    REGISTER_DEMUXER (PMP,              pmp);
-    REGISTER_MUXER   (PSP,              psp);
-    REGISTER_DEMUXER (PVA,              pva);
-    REGISTER_DEMUXER (PVF,              pvf);
-    REGISTER_DEMUXER (QCP,              qcp);
-    REGISTER_DEMUXER (R3D,              r3d);
-    REGISTER_MUXDEMUX(RAWVIDEO,         rawvideo);
-    REGISTER_DEMUXER (REALTEXT,         realtext);
-    REGISTER_DEMUXER (REDSPARK,         redspark);
-    REGISTER_DEMUXER (RL2,              rl2);
-    REGISTER_MUXDEMUX(RM,               rm);
-    REGISTER_MUXDEMUX(ROQ,              roq);
-    REGISTER_DEMUXER (RPL,              rpl);
-    REGISTER_DEMUXER (RSD,              rsd);
-    REGISTER_MUXDEMUX(RSO,              rso);
-    REGISTER_MUXDEMUX(RTP,              rtp);
-    REGISTER_MUXER   (RTP_MPEGTS,       rtp_mpegts);
-    REGISTER_MUXDEMUX(RTSP,             rtsp);
-    REGISTER_DEMUXER (S337M,            s337m);
-    REGISTER_DEMUXER (SAMI,             sami);
-    REGISTER_MUXDEMUX(SAP,              sap);
-    REGISTER_DEMUXER (SBG,              sbg);
-    REGISTER_MUXDEMUX(SCC,              scc);
-    REGISTER_DEMUXER (SDP,              sdp);
-    REGISTER_DEMUXER (SDR2,             sdr2);
-    REGISTER_DEMUXER (SDS,              sds);
-    REGISTER_DEMUXER (SDX,              sdx);
-#if CONFIG_RTPDEC
-    ff_register_rtp_dynamic_payload_handlers();
-    ff_register_rdt_dynamic_payload_handlers();
-#endif
-    REGISTER_DEMUXER (SEGAFILM,         segafilm);
-    REGISTER_MUXER   (SEGMENT,          segment);
-    REGISTER_MUXER   (SEGMENT,          stream_segment);
-    REGISTER_DEMUXER (SHORTEN,          shorten);
-    REGISTER_DEMUXER (SIFF,             siff);
-    REGISTER_MUXER   (SINGLEJPEG,       singlejpeg);
-    REGISTER_DEMUXER (SLN,              sln);
-    REGISTER_DEMUXER (SMACKER,          smacker);
-    REGISTER_MUXDEMUX(SMJPEG,           smjpeg);
-    REGISTER_MUXER   (SMOOTHSTREAMING,  smoothstreaming);
-    REGISTER_DEMUXER (SMUSH,            smush);
-    REGISTER_DEMUXER (SOL,              sol);
-    REGISTER_MUXDEMUX(SOX,              sox);
-    REGISTER_MUXER   (SPX,              spx);
-    REGISTER_MUXDEMUX(SPDIF,            spdif);
-    REGISTER_MUXDEMUX(SRT,              srt);
-    REGISTER_DEMUXER (STR,              str);
-    REGISTER_DEMUXER (STL,              stl);
-    REGISTER_DEMUXER (SUBVIEWER1,       subviewer1);
-    REGISTER_DEMUXER (SUBVIEWER,        subviewer);
-    REGISTER_MUXDEMUX(SUP,              sup);
-    REGISTER_DEMUXER (SVAG,             svag);
-    REGISTER_MUXDEMUX(SWF,              swf);
-    REGISTER_DEMUXER (TAK,              tak);
-    REGISTER_MUXER   (TEE,              tee);
-    REGISTER_DEMUXER (TEDCAPTIONS,      tedcaptions);
-    REGISTER_MUXER   (TG2,              tg2);
-    REGISTER_MUXER   (TGP,              tgp);
-    REGISTER_DEMUXER (THP,              thp);
-    REGISTER_DEMUXER (THREEDOSTR,       threedostr);
-    REGISTER_DEMUXER (TIERTEXSEQ,       tiertexseq);
-    REGISTER_MUXER   (MKVTIMESTAMP_V2,  mkvtimestamp_v2);
-    REGISTER_DEMUXER (TMV,              tmv);
-    REGISTER_MUXDEMUX(TRUEHD,           truehd);
-    REGISTER_MUXDEMUX(TTA,              tta);
-    REGISTER_DEMUXER (TXD,              txd);
-    REGISTER_DEMUXER (TTY,              tty);
-    REGISTER_MUXER   (UNCODEDFRAMECRC,  uncodedframecrc);
-    REGISTER_DEMUXER (V210,             v210);
-    REGISTER_DEMUXER (V210X,            v210x);
-    REGISTER_DEMUXER (VAG,              vag);
-    REGISTER_MUXDEMUX(VC1,              vc1);
-    REGISTER_MUXDEMUX(VC1T,             vc1t);
-    REGISTER_DEMUXER (VIVO,             vivo);
-    REGISTER_DEMUXER (VMD,              vmd);
-    REGISTER_DEMUXER (VOBSUB,           vobsub);
-    REGISTER_MUXDEMUX(VOC,              voc);
-    REGISTER_DEMUXER (VPK,              vpk);
-    REGISTER_DEMUXER (VPLAYER,          vplayer);
-    REGISTER_DEMUXER (VQF,              vqf);
-    REGISTER_MUXDEMUX(W64,              w64);
-    REGISTER_MUXDEMUX(WAV,              wav);
-    REGISTER_DEMUXER (WC3,              wc3);
-    REGISTER_MUXER   (WEBM,             webm);
-    REGISTER_MUXDEMUX(WEBM_DASH_MANIFEST, webm_dash_manifest);
-    REGISTER_MUXER   (WEBM_CHUNK,       webm_chunk);
-    REGISTER_MUXER   (WEBP,             webp);
-    REGISTER_MUXDEMUX(WEBVTT,           webvtt);
-    REGISTER_DEMUXER (WSAUD,            wsaud);
-    REGISTER_DEMUXER (WSD,              wsd);
-    REGISTER_DEMUXER (WSVQA,            wsvqa);
-    REGISTER_MUXDEMUX(WTV,              wtv);
-    REGISTER_DEMUXER (WVE,              wve);
-    REGISTER_MUXDEMUX(WV,               wv);
-    REGISTER_DEMUXER (XA,               xa);
-    REGISTER_DEMUXER (XBIN,             xbin);
-    REGISTER_DEMUXER (XMV,              xmv);
-    REGISTER_DEMUXER (XVAG,             xvag);
-    REGISTER_DEMUXER (XWMA,             xwma);
-    REGISTER_DEMUXER (YOP,              yop);
-    REGISTER_MUXDEMUX(YUV4MPEGPIPE,     yuv4mpegpipe);
+    if (i < size) {
+        f = muxer_list[i];
+    } else if (indev_list) {
+        f = outdev_list[i - size];
+    }
 
-    /* image demuxers */
-    REGISTER_DEMUXER (IMAGE_BMP_PIPE,        image_bmp_pipe);
-    REGISTER_DEMUXER (IMAGE_DDS_PIPE,        image_dds_pipe);
-    REGISTER_DEMUXER (IMAGE_DPX_PIPE,        image_dpx_pipe);
-    REGISTER_DEMUXER (IMAGE_EXR_PIPE,        image_exr_pipe);
-    REGISTER_DEMUXER (IMAGE_J2K_PIPE,        image_j2k_pipe);
-    REGISTER_DEMUXER (IMAGE_JPEG_PIPE,       image_jpeg_pipe);
-    REGISTER_DEMUXER (IMAGE_JPEGLS_PIPE,     image_jpegls_pipe);
-    REGISTER_DEMUXER (IMAGE_PAM_PIPE,        image_pam_pipe);
-    REGISTER_DEMUXER (IMAGE_PBM_PIPE,        image_pbm_pipe);
-    REGISTER_DEMUXER (IMAGE_PCX_PIPE,        image_pcx_pipe);
-    REGISTER_DEMUXER (IMAGE_PGMYUV_PIPE,     image_pgmyuv_pipe);
-    REGISTER_DEMUXER (IMAGE_PGM_PIPE,        image_pgm_pipe);
-    REGISTER_DEMUXER (IMAGE_PICTOR_PIPE,     image_pictor_pipe);
-    REGISTER_DEMUXER (IMAGE_PNG_PIPE,        image_png_pipe);
-    REGISTER_DEMUXER (IMAGE_PPM_PIPE,        image_ppm_pipe);
-    REGISTER_DEMUXER (IMAGE_PSD_PIPE,        image_psd_pipe);
-    REGISTER_DEMUXER (IMAGE_QDRAW_PIPE,      image_qdraw_pipe);
-    REGISTER_DEMUXER (IMAGE_SGI_PIPE,        image_sgi_pipe);
-    REGISTER_DEMUXER (IMAGE_SVG_PIPE,        image_svg_pipe);
-    REGISTER_DEMUXER (IMAGE_SUNRAST_PIPE,    image_sunrast_pipe);
-    REGISTER_DEMUXER (IMAGE_TIFF_PIPE,       image_tiff_pipe);
-    REGISTER_DEMUXER (IMAGE_WEBP_PIPE,       image_webp_pipe);
-    REGISTER_DEMUXER (IMAGE_XPM_PIPE,        image_xpm_pipe);
+    if (f)
+        *opaque = (void*)(i + 1);
+    return f;
+}
 
-    /* external libraries */
-    REGISTER_MUXER   (CHROMAPRINT,      chromaprint);
-    REGISTER_DEMUXER (LIBGME,           libgme);
-    REGISTER_DEMUXER (LIBMODPLUG,       libmodplug);
-    REGISTER_DEMUXER (LIBOPENMPT,       libopenmpt);
+const AVInputFormat *av_demuxer_iterate(void **opaque)
+{
+    static const uintptr_t size = sizeof(demuxer_list)/sizeof(demuxer_list[0]) - 1;
+    uintptr_t i = (uintptr_t)*opaque;
+    const AVInputFormat *f = NULL;
+
+    if (i < size) {
+        f = demuxer_list[i];
+    } else if (outdev_list) {
+        f = indev_list[i - size];
+    }
+
+    if (f)
+        *opaque = (void*)(i + 1);
+    return f;
+}
+
+static AVMutex avpriv_register_devices_mutex = AV_MUTEX_INITIALIZER;
+
+#if FF_API_NEXT
+FF_DISABLE_DEPRECATION_WARNINGS
+static AVOnce av_format_next_init = AV_ONCE_INIT;
+
+static void av_format_init_next(void)
+{
+    AVOutputFormat *prevout = NULL, *out;
+    AVInputFormat *previn = NULL, *in;
+
+    ff_mutex_lock(&avpriv_register_devices_mutex);
+
+    for (int i = 0; (out = (AVOutputFormat*)muxer_list[i]); i++) {
+        if (prevout)
+            prevout->next = out;
+        prevout = out;
+    }
+
+    if (outdev_list) {
+        for (int i = 0; (out = (AVOutputFormat*)outdev_list[i]); i++) {
+            if (prevout)
+                prevout->next = out;
+            prevout = out;
+        }
+    }
+
+    for (int i = 0; (in = (AVInputFormat*)demuxer_list[i]); i++) {
+        if (previn)
+            previn->next = in;
+        previn = in;
+    }
+
+    if (indev_list) {
+        for (int i = 0; (in = (AVInputFormat*)indev_list[i]); i++) {
+            if (previn)
+                previn->next = in;
+            previn = in;
+        }
+    }
+
+    ff_mutex_unlock(&avpriv_register_devices_mutex);
+}
+
+AVInputFormat *av_iformat_next(const AVInputFormat *f)
+{
+    ff_thread_once(&av_format_next_init, av_format_init_next);
+
+    if (f)
+        return f->next;
+    else {
+        void *opaque = NULL;
+        return (AVInputFormat *)av_demuxer_iterate(&opaque);
+    }
+}
+
+AVOutputFormat *av_oformat_next(const AVOutputFormat *f)
+{
+    ff_thread_once(&av_format_next_init, av_format_init_next);
+
+    if (f)
+        return f->next;
+    else {
+        void *opaque = NULL;
+        return (AVOutputFormat *)av_muxer_iterate(&opaque);
+    }
 }
 
 void av_register_all(void)
 {
-    static AVOnce control = AV_ONCE_INIT;
+    ff_thread_once(&av_format_next_init, av_format_init_next);
+}
 
-    ff_thread_once(&control, register_all);
+void av_register_input_format(AVInputFormat *format)
+{
+    ff_thread_once(&av_format_next_init, av_format_init_next);
+}
+
+void av_register_output_format(AVOutputFormat *format)
+{
+    ff_thread_once(&av_format_next_init, av_format_init_next);
+}
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+void avpriv_register_devices(const AVOutputFormat * const o[], const AVInputFormat * const i[])
+{
+    ff_mutex_lock(&avpriv_register_devices_mutex);
+    outdev_list = o;
+    indev_list = i;
+    ff_mutex_unlock(&avpriv_register_devices_mutex);
+#if FF_API_NEXT
+    av_format_init_next();
+#endif
 }

diff --git a/libavformat/amr.c b/libavformat/amr.c
index b5194a2..de34705 100644
--- a/libavformat/amr.c
+++ b/libavformat/amr.c

@@ -38,6 +38,13 @@
 static const char AMR_header[]   = "#!AMR\n";
 static const char AMRWB_header[] = "#!AMR-WB\n";
 
+static const uint8_t amrnb_packed_size[16] = {
+    13, 14, 16, 18, 20, 21, 27, 32, 6, 1, 1, 1, 1, 1, 1, 1
+};
+static const uint8_t amrwb_packed_size[16] = {
+    18, 24, 33, 37, 41, 47, 51, 59, 61, 6, 1, 1, 1, 1, 1, 1
+};
+
 #if CONFIG_AMR_MUXER
 static int amr_write_header(AVFormatContext *s)
 {
@@ -126,17 +133,9 @@
     mode = (toc >> 3) & 0x0F;
 
     if (par->codec_id == AV_CODEC_ID_AMR_NB) {
-        static const uint8_t packed_size[16] = {
-            12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0
-        };
-
-        size = packed_size[mode] + 1;
+        size = amrnb_packed_size[mode];
     } else if (par->codec_id == AV_CODEC_ID_AMR_WB) {
-        static const uint8_t packed_size[16] = {
-            18, 24, 33, 37, 41, 47, 51, 59, 61, 6, 6, 0, 0, 0, 1, 1
-        };
-
-        size = packed_size[mode];
+        size = amrwb_packed_size[mode];
     }
 
     if (!size || av_new_packet(pkt, size))
@@ -176,6 +175,118 @@
 };
 #endif
 
+#if CONFIG_AMRNB_DEMUXER
+static int amrnb_probe(AVProbeData *p)
+{
+    int mode, i = 0, valid = 0, invalid = 0;
+    const uint8_t *b = p->buf;
+
+    while (i < p->buf_size) {
+        mode = b[i] >> 3 & 0x0F;
+        if (mode < 9 && (b[i] & 0x4) == 0x4) {
+            int last = b[i];
+            int size = amrnb_packed_size[mode];
+            while (size--) {
+                if (b[++i] != last)
+                    break;
+            }
+            if (size > 0) {
+                valid++;
+                i += size;
+            }
+        } else {
+            valid = 0;
+            invalid++;
+            i++;
+        }
+    }
+    if (valid > 100 && valid >> 4 > invalid)
+        return AVPROBE_SCORE_EXTENSION / 2 + 1;
+    return 0;
+}
+
+static int amrnb_read_header(AVFormatContext *s)
+{
+    AVStream *st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+    st->codecpar->codec_id       = AV_CODEC_ID_AMR_NB;
+    st->codecpar->sample_rate    = 8000;
+    st->codecpar->channels       = 1;
+    st->codecpar->channel_layout = AV_CH_LAYOUT_MONO;
+    st->codecpar->codec_type     = AVMEDIA_TYPE_AUDIO;
+    avpriv_set_pts_info(st, 64, 1, 8000);
+
+    return 0;
+}
+
+AVInputFormat ff_amrnb_demuxer = {
+    .name           = "amrnb",
+    .long_name      = NULL_IF_CONFIG_SMALL("raw AMR-NB"),
+    .priv_data_size = sizeof(AMRContext),
+    .read_probe     = amrnb_probe,
+    .read_header    = amrnb_read_header,
+    .read_packet    = amr_read_packet,
+    .flags          = AVFMT_GENERIC_INDEX,
+};
+#endif
+
+#if CONFIG_AMRWB_DEMUXER
+static int amrwb_probe(AVProbeData *p)
+{
+    int mode, i = 0, valid = 0, invalid = 0;
+    const uint8_t *b = p->buf;
+
+    while (i < p->buf_size) {
+        mode = b[i] >> 3 & 0x0F;
+        if (mode < 10 && (b[i] & 0x4) == 0x4) {
+            int last = b[i];
+            int size = amrwb_packed_size[mode];
+            while (size--) {
+                if (b[++i] != last)
+                    break;
+            }
+            if (size > 0) {
+                valid++;
+                i += size;
+            }
+        } else {
+            valid = 0;
+            invalid++;
+            i++;
+        }
+    }
+    if (valid > 100 && valid >> 4 > invalid)
+        return AVPROBE_SCORE_EXTENSION / 2 + 1;
+    return 0;
+}
+
+static int amrwb_read_header(AVFormatContext *s)
+{
+    AVStream *st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+    st->codecpar->codec_id       = AV_CODEC_ID_AMR_WB;
+    st->codecpar->sample_rate    = 16000;
+    st->codecpar->channels       = 1;
+    st->codecpar->channel_layout = AV_CH_LAYOUT_MONO;
+    st->codecpar->codec_type     = AVMEDIA_TYPE_AUDIO;
+    avpriv_set_pts_info(st, 64, 1, 16000);
+
+    return 0;
+}
+
+AVInputFormat ff_amrwb_demuxer = {
+    .name           = "amrwb",
+    .long_name      = NULL_IF_CONFIG_SMALL("raw AMR-WB"),
+    .priv_data_size = sizeof(AMRContext),
+    .read_probe     = amrwb_probe,
+    .read_header    = amrwb_read_header,
+    .read_packet    = amr_read_packet,
+    .flags          = AVFMT_GENERIC_INDEX,
+};
+#endif
+
 #if CONFIG_AMR_MUXER
 AVOutputFormat ff_amr_muxer = {
     .name              = "amr",

diff --git a/libavformat/apngdec.c b/libavformat/apngdec.c
index ffff037..f9a97e5 100644
--- a/libavformat/apngdec.c
+++ b/libavformat/apngdec.c

@@ -44,7 +44,6 @@
     int max_fps;
     int default_fps;
 
-    int64_t pkt_pts;
     int pkt_duration;
 
     int is_key_frame;
@@ -390,9 +389,8 @@
 
         if (ctx->is_key_frame)
             pkt->flags |= AV_PKT_FLAG_KEY;
-        pkt->pts = ctx->pkt_pts;
+        pkt->pts = pkt->dts = AV_NOPTS_VALUE;
         pkt->duration = ctx->pkt_duration;
-        ctx->pkt_pts += ctx->pkt_duration;
         return ret;
     case MKTAG('I', 'E', 'N', 'D'):
         ctx->cur_loop++;

diff --git a/libavformat/aptxdec.c b/libavformat/aptxdec.c
new file mode 100644
index 0000000..a262cd9
--- /dev/null
+++ b/libavformat/aptxdec.c

@@ -0,0 +1,128 @@
+/*
+ * RAW aptX demuxer
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "rawdec.h"
+
+#define APTX_BLOCK_SIZE   4
+#define APTX_PACKET_SIZE  (256*APTX_BLOCK_SIZE)
+
+#define APTX_HD_BLOCK_SIZE   6
+#define APTX_HD_PACKET_SIZE  (256*APTX_HD_BLOCK_SIZE)
+
+typedef struct AptXDemuxerContext {
+    AVClass *class;
+    int sample_rate;
+} AptXDemuxerContext;
+
+static AVStream *aptx_read_header_common(AVFormatContext *s)
+{
+    AptXDemuxerContext *s1 = s->priv_data;
+    AVStream *st = avformat_new_stream(s, NULL);
+    if (!st)
+        return NULL;
+    st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
+    st->codecpar->format = AV_SAMPLE_FMT_S32P;
+    st->codecpar->channels = 2;
+    st->codecpar->sample_rate = s1->sample_rate;
+    st->start_time = 0;
+    return st;
+}
+
+static int aptx_read_header(AVFormatContext *s)
+{
+    AVStream *st = aptx_read_header_common(s);
+    if (!st)
+        return AVERROR(ENOMEM);
+    st->codecpar->codec_id = AV_CODEC_ID_APTX;
+    st->codecpar->bits_per_coded_sample = 4;
+    st->codecpar->block_align = APTX_BLOCK_SIZE;
+    st->codecpar->frame_size = APTX_PACKET_SIZE;
+    return 0;
+}
+
+static int aptx_hd_read_header(AVFormatContext *s)
+{
+    AVStream *st = aptx_read_header_common(s);
+    if (!st)
+        return AVERROR(ENOMEM);
+    st->codecpar->codec_id = AV_CODEC_ID_APTX_HD;
+    st->codecpar->bits_per_coded_sample = 6;
+    st->codecpar->block_align = APTX_HD_BLOCK_SIZE;
+    st->codecpar->frame_size = APTX_HD_PACKET_SIZE;
+    return 0;
+}
+
+static int aptx_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    return av_get_packet(s->pb, pkt, APTX_PACKET_SIZE);
+}
+
+static int aptx_hd_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    return av_get_packet(s->pb, pkt, APTX_HD_PACKET_SIZE);
+}
+
+static const AVOption aptx_options[] = {
+    { "sample_rate", "", offsetof(AptXDemuxerContext, sample_rate), AV_OPT_TYPE_INT, {.i64 = 48000}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
+    { NULL },
+};
+
+#if CONFIG_APTX_DEMUXER
+static const AVClass aptx_demuxer_class = {
+    .class_name = "aptx demuxer",
+    .item_name  = av_default_item_name,
+    .option     = aptx_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVInputFormat ff_aptx_demuxer = {
+    .name           = "aptx",
+    .long_name      = NULL_IF_CONFIG_SMALL("raw aptX"),
+    .extensions     = "aptx",
+    .priv_data_size = sizeof(AptXDemuxerContext),
+    .read_header    = aptx_read_header,
+    .read_packet    = aptx_read_packet,
+    .flags          = AVFMT_GENERIC_INDEX,
+    .priv_class     = &aptx_demuxer_class,
+};
+#endif
+
+#if CONFIG_APTX_HD_DEMUXER
+static const AVClass aptx_hd_demuxer_class = {
+    .class_name = "aptx hd demuxer",
+    .item_name  = av_default_item_name,
+    .option     = aptx_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVInputFormat ff_aptx_hd_demuxer = {
+    .name           = "aptx_hd",
+    .long_name      = NULL_IF_CONFIG_SMALL("raw aptX HD"),
+    .extensions     = "aptxhd",
+    .priv_data_size = sizeof(AptXDemuxerContext),
+    .read_header    = aptx_hd_read_header,
+    .read_packet    = aptx_hd_read_packet,
+    .flags          = AVFMT_GENERIC_INDEX,
+    .priv_class     = &aptx_hd_demuxer_class,
+};
+#endif

diff --git a/libavformat/asfdec_o.c b/libavformat/asfdec_o.c
index 5122e33..b4b2698 100644
--- a/libavformat/asfdec_o.c
+++ b/libavformat/asfdec_o.c

@@ -706,7 +706,8 @@
     st->codecpar->codec_id  = ff_codec_get_id(ff_codec_bmp_tags, tag);
     size_bmp = FFMAX(size_asf, size_bmp);
 
-    if (size_bmp > BMP_HEADER_SIZE) {
+    if (size_bmp > BMP_HEADER_SIZE &&
+        size_bmp < INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
         int ret;
         st->codecpar->extradata_size  = size_bmp - BMP_HEADER_SIZE;
         if (!(st->codecpar->extradata = av_malloc(st->codecpar->extradata_size +

diff --git a/libavformat/audiointerleave.c b/libavformat/audiointerleave.c
index 6d4954b..dea5d99 100644
--- a/libavformat/audiointerleave.c
+++ b/libavformat/audiointerleave.c

@@ -81,15 +81,19 @@
     AVStream *st = s->streams[stream_index];
     AudioInterleaveContext *aic = st->priv_data;
     int ret;
-    int size = FFMIN(av_fifo_size(aic->fifo), *aic->samples * aic->sample_size);
+    int frame_size = *aic->samples * aic->sample_size;
+    int size = FFMIN(av_fifo_size(aic->fifo), frame_size);
     if (!size || (!flush && size == av_fifo_size(aic->fifo)))
         return 0;
 
-    ret = av_new_packet(pkt, size);
+    ret = av_new_packet(pkt, frame_size);
     if (ret < 0)
         return ret;
     av_fifo_generic_read(aic->fifo, pkt->data, size, NULL);
 
+    if (size < pkt->size)
+        memset(pkt->data + size, 0, pkt->size - size);
+
     pkt->dts = pkt->pts = aic->dts;
     pkt->duration = av_rescale_q(*aic->samples, st->time_base, aic->time_base);
     pkt->stream_index = stream_index;
@@ -99,7 +103,7 @@
     if (!*aic->samples)
         aic->samples = aic->samples_per_frame;
 
-    return size;
+    return pkt->size;
 }
 
 int ff_audio_rechunk_interleave(AVFormatContext *s, AVPacket *out, AVPacket *pkt, int flush,

diff --git a/libavformat/av1.c b/libavformat/av1.c
new file mode 100644
index 0000000..a0aad43
--- /dev/null
+++ b/libavformat/av1.c

@@ -0,0 +1,393 @@
+/*
+ * AV1 helper functions for muxers
+ * Copyright (c) 2018 James Almer <jamrial@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mem.h"
+#include "libavcodec/av1.h"
+#include "libavcodec/av1_parse.h"
+#include "libavcodec/profiles.h"
+#include "libavcodec/put_bits.h"
+#include "av1.h"
+#include "avio.h"
+
+int ff_av1_filter_obus(AVIOContext *pb, const uint8_t *buf, int size)
+{
+    const uint8_t *end = buf + size;
+    int64_t obu_size;
+    int start_pos, type, temporal_id, spatial_id;
+
+    size = 0;
+    while (buf < end) {
+        int len = parse_obu_header(buf, end - buf, &obu_size, &start_pos,
+                                   &type, &temporal_id, &spatial_id);
+        if (len < 0)
+            return len;
+
+        switch (type) {
+        case AV1_OBU_TEMPORAL_DELIMITER:
+        case AV1_OBU_REDUNDANT_FRAME_HEADER:
+        case AV1_OBU_TILE_LIST:
+        case AV1_OBU_PADDING:
+            break;
+        default:
+            avio_write(pb, buf, len);
+            size += len;
+            break;
+        }
+        buf += len;
+    }
+
+    return size;
+}
+
+int ff_av1_filter_obus_buf(const uint8_t *buf, uint8_t **out, int *size)
+{
+    AVIOContext *pb;
+    int ret;
+
+    ret = avio_open_dyn_buf(&pb);
+    if (ret < 0)
+        return ret;
+
+    ret = ff_av1_filter_obus(pb, buf, *size);
+    if (ret < 0)
+        return ret;
+
+    av_freep(out);
+    *size = avio_close_dyn_buf(pb, out);
+
+    return ret;
+}
+
+typedef struct AV1SequenceParameters {
+    uint8_t seq_profile;
+    uint8_t seq_level_idx_0;
+    uint8_t seq_tier_0;
+    uint8_t high_bitdepth;
+    uint8_t twelve_bit;
+    uint8_t monochrome;
+    uint8_t chroma_subsampling_x;
+    uint8_t chroma_subsampling_y;
+    uint8_t chroma_sample_position;
+} AV1SequenceParameters;
+
+static inline void uvlc(GetBitContext *gb)
+{
+    int leading_zeros = 0;
+
+    while (get_bits_left(gb)) {
+        if (get_bits1(gb))
+            break;
+        leading_zeros++;
+    }
+
+    if (leading_zeros >= 32)
+        return;
+
+    skip_bits_long(gb, leading_zeros);
+}
+
+static int parse_color_config(AV1SequenceParameters *seq_params, GetBitContext *gb)
+{
+    int color_primaries, transfer_characteristics, matrix_coefficients;
+
+    seq_params->high_bitdepth = get_bits1(gb);
+    if (seq_params->seq_profile == FF_PROFILE_AV1_PROFESSIONAL && seq_params->high_bitdepth)
+        seq_params->twelve_bit = get_bits1(gb);
+
+    if (seq_params->seq_profile == FF_PROFILE_AV1_HIGH)
+        seq_params->monochrome = 0;
+    else
+        seq_params->monochrome = get_bits1(gb);
+
+    if (get_bits1(gb)) { // color_description_present_flag
+        color_primaries          = get_bits(gb, 8);
+        transfer_characteristics = get_bits(gb, 8);
+        matrix_coefficients      = get_bits(gb, 8);
+    } else {
+        color_primaries          = AVCOL_PRI_UNSPECIFIED;
+        transfer_characteristics = AVCOL_TRC_UNSPECIFIED;
+        matrix_coefficients      = AVCOL_SPC_UNSPECIFIED;
+    }
+
+    if (seq_params->monochrome) {
+        skip_bits1(gb); // color_range
+        seq_params->chroma_subsampling_x = 1;
+        seq_params->chroma_subsampling_y = 1;
+        seq_params->chroma_sample_position = 0;
+        return 0;
+    } else if (color_primaries          == AVCOL_PRI_BT709 &&
+               transfer_characteristics == AVCOL_TRC_IEC61966_2_1 &&
+               matrix_coefficients      == AVCOL_SPC_RGB) {
+        seq_params->chroma_subsampling_x = 0;
+        seq_params->chroma_subsampling_y = 0;
+    } else {
+        skip_bits1(gb); // color_range
+
+        if (seq_params->seq_profile == FF_PROFILE_AV1_MAIN) {
+            seq_params->chroma_subsampling_x = 1;
+            seq_params->chroma_subsampling_y = 1;
+        } else if (seq_params->seq_profile == FF_PROFILE_AV1_HIGH) {
+            seq_params->chroma_subsampling_x = 0;
+            seq_params->chroma_subsampling_y = 0;
+        } else {
+            if (seq_params->twelve_bit) {
+                seq_params->chroma_subsampling_x = get_bits1(gb);
+                if (seq_params->chroma_subsampling_x)
+                    seq_params->chroma_subsampling_y = get_bits1(gb);
+                else
+                    seq_params->chroma_subsampling_y = 0;
+            } else {
+                seq_params->chroma_subsampling_x = 1;
+                seq_params->chroma_subsampling_y = 0;
+            }
+        }
+        if (seq_params->chroma_subsampling_x && seq_params->chroma_subsampling_y)
+            seq_params->chroma_sample_position = get_bits(gb, 2);
+    }
+
+    skip_bits1(gb); // separate_uv_delta_q
+
+    return 0;
+}
+
+static int parse_sequence_header(AV1SequenceParameters *seq_params, const uint8_t *buf, int size)
+{
+    GetBitContext gb;
+    int reduced_still_picture_header;
+    int frame_width_bits_minus_1, frame_height_bits_minus_1;
+    int size_bits, ret;
+
+    size_bits = get_obu_bit_length(buf, size, AV1_OBU_SEQUENCE_HEADER);
+    if (size_bits < 0)
+        return size_bits;
+
+    ret = init_get_bits(&gb, buf, size_bits);
+    if (ret < 0)
+        return ret;
+
+    memset(seq_params, 0, sizeof(*seq_params));
+
+    seq_params->seq_profile = get_bits(&gb, 3);
+
+    skip_bits1(&gb); // still_picture
+    reduced_still_picture_header = get_bits1(&gb);
+
+    if (reduced_still_picture_header) {
+        seq_params->seq_level_idx_0 = get_bits(&gb, 5);
+        seq_params->seq_tier_0 = 0;
+    } else {
+        int initial_display_delay_present_flag, operating_points_cnt_minus_1;
+        int decoder_model_info_present_flag, buffer_delay_length_minus_1;
+
+        if (get_bits1(&gb)) { // timing_info_present_flag
+            skip_bits_long(&gb, 32); // num_units_in_display_tick
+            skip_bits_long(&gb, 32); // time_scale
+
+            if (get_bits1(&gb)) // equal_picture_interval
+                uvlc(&gb); // num_ticks_per_picture_minus_1
+
+            decoder_model_info_present_flag = get_bits1(&gb);
+            if (decoder_model_info_present_flag) {
+                buffer_delay_length_minus_1 = get_bits(&gb, 5);
+                skip_bits_long(&gb, 32); // num_units_in_decoding_tick
+                skip_bits(&gb, 10); // buffer_removal_time_length_minus_1 (5)
+                                    // frame_presentation_time_length_minus_1 (5)
+            }
+        } else
+            decoder_model_info_present_flag = 0;
+
+        initial_display_delay_present_flag = get_bits1(&gb);
+
+        operating_points_cnt_minus_1 = get_bits(&gb, 5);
+        for (int i = 0; i <= operating_points_cnt_minus_1; i++) {
+            int seq_level_idx, seq_tier;
+
+            skip_bits(&gb, 12); // operating_point_idc
+            seq_level_idx = get_bits(&gb, 5);
+
+            if (seq_level_idx > 7)
+                seq_tier = get_bits1(&gb);
+            else
+                seq_tier = 0;
+
+            if (decoder_model_info_present_flag) {
+                if (get_bits1(&gb)) { // decoder_model_present_for_this_op
+                    skip_bits_long(&gb, buffer_delay_length_minus_1 + 1); // decoder_buffer_delay
+                    skip_bits_long(&gb, buffer_delay_length_minus_1 + 1); // encoder_buffer_delay
+                    skip_bits1(&gb); // low_delay_mode_flag
+                }
+            }
+
+            if (initial_display_delay_present_flag) {
+                if (get_bits1(&gb)) // initial_display_delay_present_for_this_op
+                    skip_bits(&gb, 4); // initial_display_delay_minus_1
+            }
+
+            if (i == 0) {
+               seq_params->seq_level_idx_0 = seq_level_idx;
+               seq_params->seq_tier_0 = seq_tier;
+            }
+        }
+    }
+
+    frame_width_bits_minus_1  = get_bits(&gb, 4);
+    frame_height_bits_minus_1 = get_bits(&gb, 4);
+
+    skip_bits(&gb, frame_width_bits_minus_1 + 1); // max_frame_width_minus_1
+    skip_bits(&gb, frame_height_bits_minus_1 + 1); // max_frame_height_minus_1
+
+    if (!reduced_still_picture_header) {
+        if (get_bits1(&gb)) // frame_id_numbers_present_flag
+            skip_bits(&gb, 7); // delta_frame_id_length_minus_2 (4), additional_frame_id_length_minus_1 (3)
+    }
+
+    skip_bits(&gb, 3); // use_128x128_superblock (1), enable_filter_intra (1), enable_intra_edge_filter (1)
+
+    if (!reduced_still_picture_header) {
+        int enable_order_hint, seq_force_screen_content_tools;
+
+        skip_bits(&gb, 4); // enable_intraintra_compound (1), enable_masked_compound (1)
+                           // enable_warped_motion (1), enable_dual_filter (1)
+
+        enable_order_hint = get_bits1(&gb);
+        if (enable_order_hint)
+            skip_bits(&gb, 2); // enable_jnt_comp (1), enable_ref_frame_mvs (1)
+
+        if (get_bits1(&gb)) // seq_choose_screen_content_tools
+            seq_force_screen_content_tools = 2;
+        else
+            seq_force_screen_content_tools = get_bits1(&gb);
+
+        if (seq_force_screen_content_tools) {
+            if (!get_bits1(&gb)) // seq_choose_integer_mv
+                skip_bits1(&gb); // seq_force_integer_mv
+        }
+
+        if (enable_order_hint)
+            skip_bits(&gb, 3); // order_hint_bits_minus_1
+    }
+
+    skip_bits(&gb, 3); // enable_superres (1), enable_cdef (1), enable_restoration (1)
+
+    parse_color_config(seq_params, &gb);
+
+    skip_bits1(&gb); // film_grain_params_present
+
+    if (get_bits_left(&gb))
+        return AVERROR_INVALIDDATA;
+
+    return 0;
+}
+
+int ff_isom_write_av1c(AVIOContext *pb, const uint8_t *buf, int size)
+{
+    AVIOContext *seq_pb = NULL, *meta_pb = NULL;
+    AV1SequenceParameters seq_params;
+    PutBitContext pbc;
+    uint8_t header[4];
+    uint8_t *seq = NULL, *meta = NULL;
+    int64_t obu_size;
+    int start_pos, type, temporal_id, spatial_id;
+    int ret, nb_seq = 0, seq_size, meta_size;
+
+    if (size <= 0)
+        return AVERROR_INVALIDDATA;
+
+    ret = avio_open_dyn_buf(&seq_pb);
+    if (ret < 0)
+        return ret;
+    ret = avio_open_dyn_buf(&meta_pb);
+    if (ret < 0)
+        goto fail;
+
+    while (size > 0) {
+        int len = parse_obu_header(buf, size, &obu_size, &start_pos,
+                                   &type, &temporal_id, &spatial_id);
+        if (len < 0) {
+            ret = len;
+            goto fail;
+        }
+
+        switch (type) {
+        case AV1_OBU_SEQUENCE_HEADER:
+            nb_seq++;
+            if (!obu_size || nb_seq > 1) {
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+            ret = parse_sequence_header(&seq_params, buf + start_pos, obu_size);
+            if (ret < 0)
+                goto fail;
+
+            avio_write(seq_pb, buf, len);
+            break;
+        case AV1_OBU_METADATA:
+            if (!obu_size) {
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+            avio_write(meta_pb, buf, len);
+            break;
+        default:
+            break;
+        }
+        size -= len;
+        buf  += len;
+    }
+
+    seq_size  = avio_close_dyn_buf(seq_pb, &seq);
+    if (!seq_size) {
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    init_put_bits(&pbc, header, sizeof(header));
+
+    put_bits(&pbc, 1, 1); // marker
+    put_bits(&pbc, 7, 1); // version
+    put_bits(&pbc, 3, seq_params.seq_profile);
+    put_bits(&pbc, 5, seq_params.seq_level_idx_0);
+    put_bits(&pbc, 1, seq_params.seq_tier_0);
+    put_bits(&pbc, 1, seq_params.high_bitdepth);
+    put_bits(&pbc, 1, seq_params.twelve_bit);
+    put_bits(&pbc, 1, seq_params.monochrome);
+    put_bits(&pbc, 1, seq_params.chroma_subsampling_x);
+    put_bits(&pbc, 1, seq_params.chroma_subsampling_y);
+    put_bits(&pbc, 2, seq_params.chroma_sample_position);
+    flush_put_bits(&pbc);
+
+    avio_write(pb, header, sizeof(header));
+    avio_write(pb, seq, seq_size);
+
+    meta_size = avio_close_dyn_buf(meta_pb, &meta);
+    if (meta_size)
+        avio_write(pb, meta, meta_size);
+
+fail:
+    if (!seq)
+        avio_close_dyn_buf(seq_pb, &seq);
+    if (!meta)
+        avio_close_dyn_buf(meta_pb, &meta);
+    av_free(seq);
+    av_free(meta);
+
+    return ret;
+}

diff --git a/libavformat/av1.h b/libavformat/av1.h
new file mode 100644
index 0000000..9f2a71f
--- /dev/null
+++ b/libavformat/av1.h

@@ -0,0 +1,70 @@
+/*
+ * AV1 helper functions for muxers
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFORMAT_AV1_H
+#define AVFORMAT_AV1_H
+
+#include <stdint.h>
+
+#include "avio.h"
+
+/**
+ * Filter out AV1 OBUs not meant to be present in ISOBMFF sample data and write
+ * the resulting bitstream to the provided AVIOContext.
+ *
+ * @param pb pointer to the AVIOContext where the filtered bitstream shall be
+ *           written
+ * @param buf input data buffer
+ * @param size size of the input data buffer
+ *
+ * @return the amount of bytes written in case of success, a negative AVERROR
+ *         code in case of failure
+ */
+int ff_av1_filter_obus(AVIOContext *pb, const uint8_t *buf, int size);
+
+/**
+ * Filter out AV1 OBUs not meant to be present in ISOBMFF sample data and write
+ * the resulting bitstream to a newly allocated data buffer.
+ *
+ * @param pb pointer to the AVIOContext where the filtered bitstream shall be
+ *           written
+ * @param buf input data buffer
+ * @param out pointer to pointer that will hold the allocated data buffer
+ * @param size size of the input data buffer. The size of the resulting output
+               data buffer will be written here
+ *
+ * @return the amount of bytes written in case of success, a negative AVERROR
+ *         code in case of failure. On failure, out and size are unchanged
+ */
+int ff_av1_filter_obus_buf(const uint8_t *buf, uint8_t **out, int *size);
+
+/**
+ * Writes AV1 extradata (Sequence Header and Metadata OBUs) to the provided
+ * AVIOContext.
+ *
+ * @param pb pointer to the AVIOContext where the hvcC shall be written
+ * @param buf input data buffer
+ * @param size size in bytes of the input data buffer
+ *
+ * @return >= 0 in case of success, a negative AVERROR code in case of failure
+ */
+int ff_isom_write_av1c(AVIOContext *pb, const uint8_t *buf, int size);
+
+#endif /* AVFORMAT_AV1_H */

diff --git a/libavformat/avc.c b/libavformat/avc.c
index 094a958..ec50033 100644
--- a/libavformat/avc.c
+++ b/libavformat/avc.c

@@ -20,6 +20,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavcodec/h264.h"
 #include "avformat.h"
 #include "avio.h"
 #include "avc.h"
@@ -105,60 +106,92 @@
 
 int ff_isom_write_avcc(AVIOContext *pb, const uint8_t *data, int len)
 {
-    if (len > 6) {
-        /* check for H.264 start code */
-        if (AV_RB32(data) == 0x00000001 ||
-            AV_RB24(data) == 0x000001) {
-            uint8_t *buf=NULL, *end, *start;
-            uint32_t sps_size=0, pps_size=0;
-            uint8_t *sps=0, *pps=0;
+    AVIOContext *sps_pb = NULL, *pps_pb = NULL;
+    uint8_t *buf = NULL, *end, *start = NULL;
+    uint8_t *sps = NULL, *pps = NULL;
+    uint32_t sps_size = 0, pps_size = 0;
+    int ret, nb_sps = 0, nb_pps = 0;
 
-            int ret = ff_avc_parse_nal_units_buf(data, &buf, &len);
-            if (ret < 0)
-                return ret;
-            start = buf;
-            end = buf + len;
+    if (len <= 6)
+        return AVERROR_INVALIDDATA;
 
-            /* look for sps and pps */
-            while (end - buf > 4) {
-                uint32_t size;
-                uint8_t nal_type;
-                size = FFMIN(AV_RB32(buf), end - buf - 4);
-                buf += 4;
-                nal_type = buf[0] & 0x1f;
-
-                if (nal_type == 7) { /* SPS */
-                    sps = buf;
-                    sps_size = size;
-                } else if (nal_type == 8) { /* PPS */
-                    pps = buf;
-                    pps_size = size;
-                }
-
-                buf += size;
-            }
-
-            if (!sps || !pps || sps_size < 4 || sps_size > UINT16_MAX || pps_size > UINT16_MAX)
-                return AVERROR_INVALIDDATA;
-
-            avio_w8(pb, 1); /* version */
-            avio_w8(pb, sps[1]); /* profile */
-            avio_w8(pb, sps[2]); /* profile compat */
-            avio_w8(pb, sps[3]); /* level */
-            avio_w8(pb, 0xff); /* 6 bits reserved (111111) + 2 bits nal size length - 1 (11) */
-            avio_w8(pb, 0xe1); /* 3 bits reserved (111) + 5 bits number of sps (00001) */
-
-            avio_wb16(pb, sps_size);
-            avio_write(pb, sps, sps_size);
-            avio_w8(pb, 1); /* number of pps */
-            avio_wb16(pb, pps_size);
-            avio_write(pb, pps, pps_size);
-            av_free(start);
-        } else {
-            avio_write(pb, data, len);
-        }
+    /* check for H.264 start code */
+    if (AV_RB32(data) != 0x00000001 &&
+        AV_RB24(data) != 0x000001) {
+        avio_write(pb, data, len);
+        return 0;
     }
-    return 0;
+
+    ret = ff_avc_parse_nal_units_buf(data, &buf, &len);
+    if (ret < 0)
+        return ret;
+    start = buf;
+    end = buf + len;
+
+    ret = avio_open_dyn_buf(&sps_pb);
+    if (ret < 0)
+        goto fail;
+    ret = avio_open_dyn_buf(&pps_pb);
+    if (ret < 0)
+        goto fail;
+
+    /* look for sps and pps */
+    while (end - buf > 4) {
+        uint32_t size;
+        uint8_t nal_type;
+        size = FFMIN(AV_RB32(buf), end - buf - 4);
+        buf += 4;
+        nal_type = buf[0] & 0x1f;
+
+        if (nal_type == 7) { /* SPS */
+            nb_sps++;
+            if (size > UINT16_MAX || nb_sps >= H264_MAX_SPS_COUNT) {
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+            avio_wb16(sps_pb, size);
+            avio_write(sps_pb, buf, size);
+        } else if (nal_type == 8) { /* PPS */
+            nb_pps++;
+            if (size > UINT16_MAX || nb_pps >= H264_MAX_PPS_COUNT) {
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+            avio_wb16(pps_pb, size);
+            avio_write(pps_pb, buf, size);
+        }
+
+        buf += size;
+    }
+    sps_size = avio_close_dyn_buf(sps_pb, &sps);
+    pps_size = avio_close_dyn_buf(pps_pb, &pps);
+
+    if (sps_size < 6 || !pps_size) {
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    avio_w8(pb, 1); /* version */
+    avio_w8(pb, sps[3]); /* profile */
+    avio_w8(pb, sps[4]); /* profile compat */
+    avio_w8(pb, sps[5]); /* level */
+    avio_w8(pb, 0xff); /* 6 bits reserved (111111) + 2 bits nal size length - 1 (11) */
+    avio_w8(pb, 0xe0 | nb_sps); /* 3 bits reserved (111) + 5 bits number of sps */
+
+    avio_write(pb, sps, sps_size);
+    avio_w8(pb, nb_pps); /* number of pps */
+    avio_write(pb, pps, pps_size);
+
+fail:
+    if (!sps)
+        avio_close_dyn_buf(sps_pb, &sps);
+    if (!pps)
+        avio_close_dyn_buf(pps_pb, &pps);
+    av_free(sps);
+    av_free(pps);
+    av_free(start);
+
+    return ret;
 }
 
 int ff_avc_write_annexb_extradata(const uint8_t *in, uint8_t **buf, int *size)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 47d0073..259443e 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h

@@ -442,19 +442,6 @@
  */
 int av_append_packet(AVIOContext *s, AVPacket *pkt, int size);
 
-#if FF_API_LAVF_FRAC
-/*************************************************/
-/* fractional numbers for exact pts handling */
-
-/**
- * The exact value of the fractional number is: 'val + num / den'.
- * num is assumed to be 0 <= num < den.
- */
-typedef struct AVFrac {
-    int64_t val, num, den;
-} AVFrac;
-#endif
-
 /*************************************************/
 /* input/output formats */
 
@@ -483,10 +470,6 @@
 #define AVFMT_NOFILE        0x0001
 #define AVFMT_NEEDNUMBER    0x0002 /**< Needs '%d' in filename. */
 #define AVFMT_SHOW_IDS      0x0008 /**< Show format stream IDs numbers. */
-#if FF_API_LAVF_FMT_RAWPICTURE
-#define AVFMT_RAWPICTURE    0x0020 /**< Format wants AVPicture structure for
-                                      raw picture data. @deprecated Not used anymore */
-#endif
 #define AVFMT_GLOBALHEADER  0x0040 /**< Format wants global header. */
 #define AVFMT_NOTIMESTAMPS  0x0080 /**< Format does not need / have any timestamps. */
 #define AVFMT_GENERIC_INDEX 0x0100 /**< Use generic index building code. */
@@ -807,9 +790,9 @@
     AVSTREAM_PARSE_HEADERS,    /**< Only parse headers, do not repack. */
     AVSTREAM_PARSE_TIMESTAMPS, /**< full parsing and interpolation of timestamps for frames not starting on a packet boundary */
     AVSTREAM_PARSE_FULL_ONCE,  /**< full parsing and repack of the first frame only, only implemented for H.264 currently */
-    AVSTREAM_PARSE_FULL_RAW=MKTAG(0,'R','A','W'),       /**< full parsing and repack with timestamp and position generation by parser for raw
-                                                             this assumes that each packet in the file contains no demuxer level headers and
-                                                             just codec level data, otherwise position generation would fail */
+    AVSTREAM_PARSE_FULL_RAW,   /**< full parsing and repack with timestamp and position generation by parser for raw
+                                    this assumes that each packet in the file contains no demuxer level headers and
+                                    just codec level data, otherwise position generation would fail */
 };
 
 typedef struct AVIndexEntry {
@@ -867,6 +850,8 @@
 #define AV_DISPOSITION_CAPTIONS     0x10000
 #define AV_DISPOSITION_DESCRIPTIONS 0x20000
 #define AV_DISPOSITION_METADATA     0x40000
+#define AV_DISPOSITION_DEPENDENT    0x80000 ///< dependent audio stream (mix_type=0 in mpegts)
+#define AV_DISPOSITION_STILL_IMAGE 0x100000 ///< still images in video stream (still_picture_flag=1 in mpegts)
 
 /**
  * Options for behavior on timestamp wrap detection.
@@ -899,14 +884,6 @@
 #endif
     void *priv_data;
 
-#if FF_API_LAVF_FRAC
-    /**
-     * @deprecated this field is unused
-     */
-    attribute_deprecated
-    struct AVFrac pts;
-#endif
-
     /**
      * This is the fundamental unit of time (in seconds) in terms
      * of which frame timestamps are represented.
@@ -1006,6 +983,39 @@
     int event_flags;
 #define AVSTREAM_EVENT_FLAG_METADATA_UPDATED 0x0001 ///< The call resulted in updated metadata.
 
+    /**
+     * Real base framerate of the stream.
+     * This is the lowest framerate with which all timestamps can be
+     * represented accurately (it is the least common multiple of all
+     * framerates in the stream). Note, this value is just a guess!
+     * For example, if the time base is 1/90000 and all frames have either
+     * approximately 3600 or 1800 timer ticks, then r_frame_rate will be 50/1.
+     */
+    AVRational r_frame_rate;
+
+#if FF_API_LAVF_FFSERVER
+    /**
+     * String containing pairs of key and values describing recommended encoder configuration.
+     * Pairs are separated by ','.
+     * Keys are separated from values by '='.
+     *
+     * @deprecated unused
+     */
+    attribute_deprecated
+    char *recommended_encoder_configuration;
+#endif
+
+    /**
+     * Codec parameters associated with this stream. Allocated and freed by
+     * libavformat in avformat_new_stream() and avformat_free_context()
+     * respectively.
+     *
+     * - demuxing: filled by libavformat on stream creation or in
+     *             avformat_find_stream_info()
+     * - muxing: filled by the caller before avformat_write_header()
+     */
+    AVCodecParameters *codecpar;
+
     /*****************************************************************
      * All fields below this line are not part of the public API. They
      * may not be used outside of libavformat and can be changed and
@@ -1016,10 +1026,10 @@
      *****************************************************************
      */
 
+#define MAX_STD_TIMEBASES (30*12+30+3+6)
     /**
      * Stream information used internally by avformat_find_stream_info()
      */
-#define MAX_STD_TIMEBASES (30*12+30+3+6)
     struct {
         int64_t last_dts;
         int64_t duration_gcd;
@@ -1028,6 +1038,7 @@
         double (*duration_error)[2][MAX_STD_TIMEBASES];
         int64_t codec_info_duration;
         int64_t codec_info_duration_fields;
+        int frame_delay_evidence;
 
         /**
          * 0  -> decoder has not been searched for yet.
@@ -1091,25 +1102,19 @@
     unsigned int index_entries_allocated_size;
 
     /**
-     * Real base framerate of the stream.
-     * This is the lowest framerate with which all timestamps can be
-     * represented accurately (it is the least common multiple of all
-     * framerates in the stream). Note, this value is just a guess!
-     * For example, if the time base is 1/90000 and all frames have either
-     * approximately 3600 or 1800 timer ticks, then r_frame_rate will be 50/1.
-     *
-     * Code outside avformat should access this field using:
-     * av_stream_get/set_r_frame_rate(stream)
-     */
-    AVRational r_frame_rate;
-
-    /**
      * Stream Identifier
      * This is the MPEG-TS stream identifier +1
      * 0 means unknown
      */
     int stream_identifier;
 
+    /**
+     * Details of the MPEG-TS program which created this stream.
+     */
+    int program_num;
+    int pmt_version;
+    int pmt_stream_idx;
+
     int64_t interleaver_chunk_size;
     int64_t interleaver_chunk_duration;
 
@@ -1208,19 +1213,6 @@
      */
     int inject_global_side_data;
 
-    /*****************************************************************
-     * All fields above this line are not part of the public API.
-     * Fields below are part of the public API and ABI again.
-     *****************************************************************
-     */
-
-    /**
-     * String containing paris of key and values describing recommended encoder configuration.
-     * Paris are separated by ','.
-     * Keys are separated from values by '='.
-     */
-    char *recommended_encoder_configuration;
-
     /**
      * display aspect ratio (0 if unknown)
      * - encoding: unused
@@ -1228,31 +1220,31 @@
      */
     AVRational display_aspect_ratio;
 
-    struct FFFrac *priv_pts;
-
     /**
      * An opaque field for libavformat internal usage.
      * Must not be accessed in any way by callers.
      */
     AVStreamInternal *internal;
-
-    /*
-     * Codec parameters associated with this stream. Allocated and freed by
-     * libavformat in avformat_new_stream() and avformat_free_context()
-     * respectively.
-     *
-     * - demuxing: filled by libavformat on stream creation or in
-     *             avformat_find_stream_info()
-     * - muxing: filled by the caller before avformat_write_header()
-     */
-    AVCodecParameters *codecpar;
 } AVStream;
 
+#if FF_API_FORMAT_GET_SET
+/**
+ * Accessors for some AVStream fields. These used to be provided for ABI
+ * compatibility, and do not need to be used anymore.
+ */
+attribute_deprecated
 AVRational av_stream_get_r_frame_rate(const AVStream *s);
+attribute_deprecated
 void       av_stream_set_r_frame_rate(AVStream *s, AVRational r);
-struct AVCodecParserContext *av_stream_get_parser(const AVStream *s);
+#if FF_API_LAVF_FFSERVER
+attribute_deprecated
 char* av_stream_get_recommended_encoder_configuration(const AVStream *s);
+attribute_deprecated
 void  av_stream_set_recommended_encoder_configuration(AVStream *s, char *configuration);
+#endif
+#endif
+
+struct AVCodecParserContext *av_stream_get_parser(const AVStream *s);
 
 /**
  * Returns the pts of the last muxed packet + its duration
@@ -1280,6 +1272,7 @@
     int program_num;
     int pmt_pid;
     int pcr_pid;
+    int pmt_version;
 
     /*****************************************************************
      * All fields below this line are not part of the public API. They
@@ -1297,6 +1290,11 @@
 
 #define AVFMTCTX_NOHEADER      0x0001 /**< signal that no header is present
                                          (streams are added dynamically) */
+#define AVFMTCTX_UNSEEKABLE    0x0002 /**< signal that the stream is definitely
+                                         not seekable, and attempts to call the
+                                         seek function will fail. For some
+                                         network protocols (e.g. HLS), this can
+                                         change dynamically at runtime. */
 
 typedef struct AVChapter {
     int id;                 ///< unique ID to identify the chapter
@@ -1411,13 +1409,33 @@
      */
     AVStream **streams;
 
+#if FF_API_FORMAT_FILENAME
     /**
      * input or output filename
      *
      * - demuxing: set by avformat_open_input()
      * - muxing: may be set by the caller before avformat_write_header()
+     *
+     * @deprecated Use url instead.
      */
+    attribute_deprecated
     char filename[1024];
+#endif
+
+    /**
+     * input or output URL. Unlike the old filename field, this field has no
+     * length restriction.
+     *
+     * - demuxing: set by avformat_open_input(), initialized to an empty
+     *             string if url parameter was NULL in avformat_open_input().
+     * - muxing: may be set by the caller before calling avformat_write_header()
+     *           (or avformat_init_output() if that is called first) to a string
+     *           which is freeable by av_free(). Set to an empty string if it
+     *           was NULL in avformat_init_output().
+     *
+     * Freed by libavformat in avformat_free_context().
+     */
+    char *url;
 
     /**
      * Position of the first frame of the component, in
@@ -1470,15 +1488,17 @@
  * This flag is mainly intended for testing.
  */
 #define AVFMT_FLAG_BITEXACT         0x0400
-#define AVFMT_FLAG_MP4A_LATM    0x8000 ///< Enable RTP MP4A-LATM payload
+#if FF_API_LAVF_MP4A_LATM
+#define AVFMT_FLAG_MP4A_LATM    0x8000 ///< Deprecated, does nothing.
+#endif
 #define AVFMT_FLAG_SORT_DTS    0x10000 ///< try to interleave outputted packets by dts (using this flag can slow demuxing down)
 #define AVFMT_FLAG_PRIV_OPT    0x20000 ///< Enable use of private options by delaying codec open (this could be made default once all code is converted)
 #if FF_API_LAVF_KEEPSIDE_FLAG
-#define AVFMT_FLAG_KEEP_SIDE_DATA 0x40000 ///< Don't merge side data but keep it separate. Deprecated, will be the default.
+#define AVFMT_FLAG_KEEP_SIDE_DATA 0x40000 ///< Deprecated, does nothing.
 #endif
 #define AVFMT_FLAG_FAST_SEEK   0x80000 ///< Enable fast, but inaccurate seeks for some formats
 #define AVFMT_FLAG_SHORTEST   0x100000 ///< Stop muxing when the shortest stream stops.
-#define AVFMT_FLAG_AUTO_BSF   0x200000 ///< Wait for packet data before writing a header, and add bitstream filters as requested by the muxer
+#define AVFMT_FLAG_AUTO_BSF   0x200000 ///< Add bitstream filters as requested by the muxer
 
     /**
      * Maximum size of the data read from input for determining
@@ -1881,7 +1901,7 @@
      */
     char *protocol_whitelist;
 
-    /*
+    /**
      * A callback for opening new IO streams.
      *
      * Whenever a muxer or a demuxer needs to open an IO stream (typically from
@@ -1922,31 +1942,55 @@
      * - decoding: set by user
      */
     int max_streams;
+
+    /**
+     * Skip duration calcuation in estimate_timings_from_pts.
+     * - encoding: unused
+     * - decoding: set by user
+     */
+    int skip_estimate_duration_from_pts;
 } AVFormatContext;
 
+#if FF_API_FORMAT_GET_SET
 /**
  * Accessors for some AVFormatContext fields. These used to be provided for ABI
  * compatibility, and do not need to be used anymore.
  */
+attribute_deprecated
 int av_format_get_probe_score(const AVFormatContext *s);
+attribute_deprecated
 AVCodec * av_format_get_video_codec(const AVFormatContext *s);
+attribute_deprecated
 void      av_format_set_video_codec(AVFormatContext *s, AVCodec *c);
+attribute_deprecated
 AVCodec * av_format_get_audio_codec(const AVFormatContext *s);
+attribute_deprecated
 void      av_format_set_audio_codec(AVFormatContext *s, AVCodec *c);
+attribute_deprecated
 AVCodec * av_format_get_subtitle_codec(const AVFormatContext *s);
+attribute_deprecated
 void      av_format_set_subtitle_codec(AVFormatContext *s, AVCodec *c);
+attribute_deprecated
 AVCodec * av_format_get_data_codec(const AVFormatContext *s);
+attribute_deprecated
 void      av_format_set_data_codec(AVFormatContext *s, AVCodec *c);
+attribute_deprecated
 int       av_format_get_metadata_header_padding(const AVFormatContext *s);
+attribute_deprecated
 void      av_format_set_metadata_header_padding(AVFormatContext *s, int c);
+attribute_deprecated
 void *    av_format_get_opaque(const AVFormatContext *s);
+attribute_deprecated
 void      av_format_set_opaque(AVFormatContext *s, void *opaque);
+attribute_deprecated
 av_format_control_message av_format_get_control_message_cb(const AVFormatContext *s);
+attribute_deprecated
 void      av_format_set_control_message_cb(AVFormatContext *s, av_format_control_message callback);
 #if FF_API_OLD_OPEN_CALLBACKS
 attribute_deprecated AVOpenCallback av_format_get_open_cb(const AVFormatContext *s);
 attribute_deprecated void av_format_set_open_cb(AVFormatContext *s, AVOpenCallback callback);
 #endif
+#endif
 
 /**
  * This function will cause global side data to be injected in the next packet
@@ -1991,6 +2035,7 @@
  */
 const char *avformat_license(void);
 
+#if FF_API_NEXT
 /**
  * Initialize libavformat and register all the muxers, demuxers and
  * protocols. If you do not call this function, then you can select
@@ -1999,31 +2044,44 @@
  * @see av_register_input_format()
  * @see av_register_output_format()
  */
+attribute_deprecated
 void av_register_all(void);
 
+attribute_deprecated
 void av_register_input_format(AVInputFormat *format);
+attribute_deprecated
 void av_register_output_format(AVOutputFormat *format);
+#endif
 
 /**
- * Do global initialization of network components. This is optional,
- * but recommended, since it avoids the overhead of implicitly
- * doing the setup for each session.
+ * Do global initialization of network libraries. This is optional,
+ * and not recommended anymore.
  *
- * Calling this function will become mandatory if using network
- * protocols at some major version bump.
+ * This functions only exists to work around thread-safety issues
+ * with older GnuTLS or OpenSSL libraries. If libavformat is linked
+ * to newer versions of those libraries, or if you do not use them,
+ * calling this function is unnecessary. Otherwise, you need to call
+ * this function before any other threads using them are started.
+ *
+ * This function will be deprecated once support for older GnuTLS and
+ * OpenSSL libraries is removed, and this function has no purpose
+ * anymore.
  */
 int avformat_network_init(void);
 
 /**
- * Undo the initialization done by avformat_network_init.
+ * Undo the initialization done by avformat_network_init. Call it only
+ * once for each time you called avformat_network_init.
  */
 int avformat_network_deinit(void);
 
+#if FF_API_NEXT
 /**
  * If f is NULL, returns the first registered input format,
  * if f is non-NULL, returns the next registered input format after f
  * or NULL if f is the last one.
  */
+attribute_deprecated
 AVInputFormat  *av_iformat_next(const AVInputFormat  *f);
 
 /**
@@ -2031,7 +2089,31 @@
  * if f is non-NULL, returns the next registered output format after f
  * or NULL if f is the last one.
  */
+attribute_deprecated
 AVOutputFormat *av_oformat_next(const AVOutputFormat *f);
+#endif
+
+/**
+ * Iterate over all registered muxers.
+ *
+ * @param opaque a pointer where libavformat will store the iteration state. Must
+ *               point to NULL to start the iteration.
+ *
+ * @return the next registered muxer or NULL when the iteration is
+ *         finished
+ */
+const AVOutputFormat *av_muxer_iterate(void **opaque);
+
+/**
+ * Iterate over all registered demuxers.
+ *
+ * @param opaque a pointer where libavformat will store the iteration state. Must
+ *               point to NULL to start the iteration.
+ *
+ * @return the next registered demuxer or NULL when the iteration is
+ *         finished
+ */
+const AVInputFormat *av_demuxer_iterate(void **opaque);
 
 /**
  * Allocate an AVFormatContext.
@@ -2108,13 +2190,8 @@
  * @param size pointer for side information size to store (optional)
  * @return pointer to data if present or NULL otherwise
  */
-#if FF_API_NOCONST_GET_SIDE_DATA
-uint8_t *av_stream_get_side_data(AVStream *stream,
-                                 enum AVPacketSideDataType type, int *size);
-#else
 uint8_t *av_stream_get_side_data(const AVStream *stream,
                                  enum AVPacketSideDataType type, int *size);
-#endif
 
 AVProgram *av_new_program(AVFormatContext *s, int id);
 

diff --git a/libavformat/avidec.c b/libavformat/avidec.c
index b8a31dc..3f07479 100644
--- a/libavformat/avidec.c
+++ b/libavformat/avidec.c

@@ -401,10 +401,10 @@
         // skip 4 byte padding
         bytestream2_skip(&gb, 4);
         offset = bytestream2_tell(&gb);
-        bytestream2_init(&gb, data + offset, data_size - offset);
 
         // decode EXIF tags from IFD, AVI is always little-endian
-        return avpriv_exif_decode_ifd(s, &gb, 1, 0, &st->metadata);
+        return avpriv_exif_decode_ifd(s, data + offset, data_size - offset,
+                                      1, 0, &st->metadata);
         break;
     case MKTAG('C', 'A', 'S', 'I'):
         avpriv_request_sample(s, "RIFF stream data tag type CASI (%u)", tag);
@@ -670,7 +670,7 @@
             st->start_time = 0;
             avio_rl32(pb); /* buffer size */
             avio_rl32(pb); /* quality */
-            if (ast->cum_len*ast->scale/ast->rate > 3600) {
+            if (ast->cum_len > 3600LL * ast->rate / ast->scale) {
                 av_log(s, AV_LOG_ERROR, "crazy start time, iam scared, giving up\n");
                 ast->cum_len = 0;
             }
@@ -1231,6 +1231,11 @@
             goto start_sync;
         }
 
+        if (d[2] == 'w' && d[3] == 'c' && n < s->nb_streams) {
+            avio_skip(pb, 16 * 3 + 8);
+            goto start_sync;
+        }
+
         if (avi->dv_demux && n != 0)
             continue;
 

diff --git a/libavformat/avienc.c b/libavformat/avienc.c
index 483f5b5..ac0f04c 100644
--- a/libavformat/avienc.c
+++ b/libavformat/avienc.c

@@ -501,8 +501,14 @@
             AVRational dar = av_mul_q(st->sample_aspect_ratio,
                                       (AVRational) { par->width,
                                                      par->height });
-            int num, den;
+            int num, den, fields, i;
             av_reduce(&num, &den, dar.num, dar.den, 0xFFFF);
+            if (par->field_order == AV_FIELD_TT || par->field_order == AV_FIELD_BB ||
+                par->field_order == AV_FIELD_TB || par->field_order == AV_FIELD_BT) {
+                fields = 2; // interlaced
+            } else {
+                fields = 1; // progressive
+            }
 
             avio_wl32(pb, 0); // video format   = unknown
             avio_wl32(pb, 0); // video standard = unknown
@@ -514,17 +520,30 @@
             avio_wl16(pb, num);
             avio_wl32(pb, par->width);
             avio_wl32(pb, par->height);
-            avio_wl32(pb, 1); // progressive FIXME
+            avio_wl32(pb, fields); // fields per frame
 
-            avio_wl32(pb, par->height);
-            avio_wl32(pb, par->width);
-            avio_wl32(pb, par->height);
-            avio_wl32(pb, par->width);
-            avio_wl32(pb, 0);
-            avio_wl32(pb, 0);
+            for (i = 0; i < fields; i++) {
+                int start_line;
+                // OpenDML v1.02 is not very specific on what value to use for
+                // start_line when frame data is not coming from a capturing device,
+                // so just use 0/1 depending on the field order for interlaced frames
+                if (par->field_order == AV_FIELD_TT || par->field_order == AV_FIELD_TB) {
+                    start_line = (i == 0) ? 0 : 1;
+                } else if (par->field_order == AV_FIELD_BB || par->field_order == AV_FIELD_BT) {
+                    start_line = (i == 0) ? 1 : 0;
+                } else {
+                    start_line = 0;
+                }
 
-            avio_wl32(pb, 0);
-            avio_wl32(pb, 0);
+                avio_wl32(pb, par->height / fields); // compressed bitmap height
+                avio_wl32(pb, par->width);           // compressed bitmap width
+                avio_wl32(pb, par->height / fields); // valid bitmap height
+                avio_wl32(pb, par->width);           // valid bitmap width
+                avio_wl32(pb, 0);                    // valid bitmap X offset
+                avio_wl32(pb, 0);                    // valid bitmap Y offset
+                avio_wl32(pb, 0);                    // valid X offset in T
+                avio_wl32(pb, start_line);           // valid Y start line
+            }
             ff_end_tag(pb, vprp);
         }
 

diff --git a/libavformat/avio.c b/libavformat/avio.c
index 64248e0..663789e 100644
--- a/libavformat/avio.c
+++ b/libavformat/avio.c

@@ -297,7 +297,7 @@
        return url_alloc_for_protocol(puc, p, filename, flags, int_cb);
 
     *puc = NULL;
-    if (av_strstart(filename, "https:", NULL))
+    if (av_strstart(filename, "https:", NULL) || av_strstart(filename, "tls:", NULL))
         av_log(NULL, AV_LOG_WARNING, "https protocol not found, recompile FFmpeg with "
                                      "openssl, gnutls "
                                      "or securetransport enabled.\n");
@@ -391,8 +391,10 @@
                 }
                 av_usleep(1000);
             }
-        } else if (ret < 1)
-            return (ret < 0 && ret != AVERROR_EOF) ? ret : len;
+        } else if (ret == AVERROR_EOF)
+            return (len > 0) ? len : AVERROR_EOF;
+        else if (ret < 0)
+            return ret;
         if (ret) {
             fast_retries = FFMAX(fast_retries, 2);
             wait_since = 0;
@@ -623,13 +625,15 @@
 
 int ffurl_get_file_handle(URLContext *h)
 {
-    if (!h->prot->url_get_file_handle)
+    if (!h || !h->prot || !h->prot->url_get_file_handle)
         return -1;
     return h->prot->url_get_file_handle(h);
 }
 
 int ffurl_get_multi_file_handle(URLContext *h, int **handles, int *numhandles)
 {
+    if (!h || !h->prot)
+        return AVERROR(ENOSYS);
     if (!h->prot->url_get_multi_file_handle) {
         if (!h->prot->url_get_file_handle)
             return AVERROR(ENOSYS);
@@ -645,22 +649,21 @@
 
 int ffurl_get_short_seek(URLContext *h)
 {
-    if (!h->prot->url_get_short_seek)
+    if (!h || !h->prot || !h->prot->url_get_short_seek)
         return AVERROR(ENOSYS);
     return h->prot->url_get_short_seek(h);
 }
 
 int ffurl_shutdown(URLContext *h, int flags)
 {
-    if (!h->prot->url_shutdown)
-        return AVERROR(EINVAL);
+    if (!h || !h->prot || !h->prot->url_shutdown)
+        return AVERROR(ENOSYS);
     return h->prot->url_shutdown(h, flags);
 }
 
 int ff_check_interrupt(AVIOInterruptCB *cb)
 {
-    int ret;
-    if (cb && cb->callback && (ret = cb->callback(cb->opaque)))
-        return ret;
+    if (cb && cb->callback)
+        return cb->callback(cb->opaque);
     return 0;
 }

diff --git a/libavformat/avio.h b/libavformat/avio.h
index f9c5972..75912ce 100644
--- a/libavformat/avio.h
+++ b/libavformat/avio.h

@@ -236,7 +236,6 @@
     int (*write_packet)(void *opaque, uint8_t *buf, int buf_size);
     int64_t (*seek)(void *opaque, int64_t offset, int whence);
     int64_t pos;            /**< position in the file of the current buffer */
-    int must_flush;         /**< unused */
     int eof_reached;        /**< true if eof reached */
     int write_flag;         /**< true if open for writing */
     int max_packet_size;
@@ -452,6 +451,8 @@
  * @param write_flag Set to 1 if the buffer should be writable, 0 otherwise.
  * @param opaque An opaque pointer to user-specific data.
  * @param read_packet  A function for refilling the buffer, may be NULL.
+ *                     For stream protocols, must never return 0 but rather
+ *                     a proper AVERROR code.
  * @param write_packet A function for writing the buffer contents, may be NULL.
  *        The function may not change the input buffers content.
  * @param seek A function for seeking to specified byte position, may be NULL.
@@ -569,13 +570,6 @@
  * @return non zero if and only if end of file
  */
 int avio_feof(AVIOContext *s);
-#if FF_API_URL_FEOF
-/**
- * @deprecated use avio_feof()
- */
-attribute_deprecated
-int url_feof(AVIOContext *s);
-#endif
 
 /** @warning Writes up to 4 KiB per call */
 int avio_printf(AVIOContext *s, const char *fmt, ...) av_printf_format(2, 3);

diff --git a/libavformat/avio_internal.h b/libavformat/avio_internal.h
index c01835d..04c1ad5 100644
--- a/libavformat/avio_internal.h
+++ b/libavformat/avio_internal.h

@@ -133,6 +133,14 @@
 int ffio_fdopen(AVIOContext **s, URLContext *h);
 
 /**
+ * Return the URLContext associated with the AVIOContext
+ *
+ * @param s IO context
+ * @return pointer to URLContext or NULL.
+ */
+URLContext *ffio_geturlcontext(AVIOContext *s);
+
+/**
  * Open a write-only fake memory stream. The written data is not stored
  * anywhere - this is only used for measuring the amount of data
  * written.

diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c
index 636cb46..5a33f82 100644
--- a/libavformat/aviobuf.c
+++ b/libavformat/aviobuf.c

@@ -87,6 +87,8 @@
                   int (*write_packet)(void *opaque, uint8_t *buf, int buf_size),
                   int64_t (*seek)(void *opaque, int64_t offset, int whence))
 {
+    memset(s, 0, sizeof(AVIOContext));
+
     s->buffer      = buffer;
     s->orig_buffer_size =
     s->buffer_size = buffer_size;
@@ -135,7 +137,7 @@
                   int (*write_packet)(void *opaque, uint8_t *buf, int buf_size),
                   int64_t (*seek)(void *opaque, int64_t offset, int whence))
 {
-    AVIOContext *s = av_mallocz(sizeof(AVIOContext));
+    AVIOContext *s = av_malloc(sizeof(AVIOContext));
     if (!s)
         return NULL;
     ffio_init_context(s, buffer, buffer_size, write_flag, opaque,
@@ -364,13 +366,6 @@
     return s->eof_reached;
 }
 
-#if FF_API_URL_FEOF
-int url_feof(AVIOContext *s)
-{
-    return avio_feof(s);
-}
-#endif
-
 void avio_wl32(AVIOContext *s, unsigned int val)
 {
     avio_w8(s, (uint8_t) val       );
@@ -531,6 +526,24 @@
     s->last_time = time;
 }
 
+static int read_packet_wrapper(AVIOContext *s, uint8_t *buf, int size)
+{
+    int ret;
+
+    if (!s->read_packet)
+        return AVERROR(EINVAL);
+    ret = s->read_packet(s->opaque, buf, size);
+#if FF_API_OLD_AVIO_EOF_0
+    if (!ret && !s->max_packet_size) {
+        av_log(NULL, AV_LOG_WARNING, "Invalid return value 0 for stream protocol\n");
+        ret = AVERROR_EOF;
+    }
+#else
+    av_assert2(ret || s->max_packet_size);
+#endif
+    return ret;
+}
+
 /* Input stream */
 
 static void fill_buffer(AVIOContext *s)
@@ -569,16 +582,14 @@
         len = s->orig_buffer_size;
     }
 
-    if (s->read_packet)
-        len = s->read_packet(s->opaque, dst, len);
-    else
-        len = 0;
-    if (len <= 0) {
+    len = read_packet_wrapper(s, dst, len);
+    if (len == AVERROR_EOF) {
         /* do not modify buffer if EOF reached so that a seek back can
            be done without rereading data */
         s->eof_reached = 1;
-        if (len < 0)
-            s->error = len;
+    } else if (len < 0) {
+        s->eof_reached = 1;
+        s->error= len;
     } else {
         s->pos += len;
         s->buf_ptr = dst;
@@ -644,15 +655,15 @@
         if (len == 0 || s->write_flag) {
             if((s->direct || size > s->buffer_size) && !s->update_checksum) {
                 // bypass the buffer and read data directly into buf
-                if(s->read_packet)
-                    len = s->read_packet(s->opaque, buf, size);
-
-                if (len <= 0) {
+                len = read_packet_wrapper(s, buf, size);
+                if (len == AVERROR_EOF) {
                     /* do not modify buffer if EOF reached so that a seek back can
                     be done without rereading data */
                     s->eof_reached = 1;
-                    if(len<0)
-                        s->error= len;
+                    break;
+                } else if (len < 0) {
+                    s->eof_reached = 1;
+                    s->error= len;
                     break;
                 } else {
                     s->pos += len;
@@ -711,7 +722,7 @@
         return -1;
 
     if (s->read_packet && s->write_flag) {
-        len = s->read_packet(s->opaque, buf, size);
+        len = read_packet_wrapper(s, buf, size);
         if (len > 0)
             s->pos += len;
         return len;
@@ -812,6 +823,60 @@
     return i;
 }
 
+int ff_get_chomp_line(AVIOContext *s, char *buf, int maxlen)
+{
+    int len = ff_get_line(s, buf, maxlen);
+    while (len > 0 && av_isspace(buf[len - 1]))
+        buf[--len] = '\0';
+    return len;
+}
+
+int64_t ff_read_line_to_bprint(AVIOContext *s, AVBPrint *bp)
+{
+    int len, end;
+    int64_t read = 0;
+    char tmp[1024];
+    char c;
+
+    do {
+        len = 0;
+        do {
+            c = avio_r8(s);
+            end = (c == '\r' || c == '\n' || c == '\0');
+            if (!end)
+                tmp[len++] = c;
+        } while (!end && len < sizeof(tmp));
+        av_bprint_append_data(bp, tmp, len);
+        read += len;
+    } while (!end);
+
+    if (c == '\r' && avio_r8(s) != '\n' && !avio_feof(s))
+        avio_skip(s, -1);
+
+    if (!c && s->error)
+        return s->error;
+
+    if (!c && !read && avio_feof(s))
+        return AVERROR_EOF;
+
+    return read;
+}
+
+int64_t ff_read_line_to_bprint_overwrite(AVIOContext *s, AVBPrint *bp)
+{
+    int64_t ret;
+
+    av_bprint_clear(bp);
+    ret = ff_read_line_to_bprint(s, bp);
+    if (ret < 0)
+        return ret;
+
+    if (!av_bprint_is_complete(bp))
+        return AVERROR(ENOMEM);
+
+    return bp->len;
+}
+
 int avio_get_str(AVIOContext *s, int maxlen, char *buf, int buflen)
 {
     int i;
@@ -971,6 +1036,19 @@
     return AVERROR(ENOMEM);
 }
 
+URLContext* ffio_geturlcontext(AVIOContext *s)
+{
+    AVIOInternal *internal;
+    if (!s)
+        return NULL;
+
+    internal = s->opaque;
+    if (internal && s->read_packet == io_read_packet)
+        return internal->h;
+    else
+        return NULL;
+}
+
 int ffio_ensure_seekback(AVIOContext *s, int64_t buf_size)
 {
     uint8_t *buffer;
@@ -1124,9 +1202,9 @@
     av_freep(&s->opaque);
     av_freep(&s->buffer);
     if (s->write_flag)
-        av_log(s, AV_LOG_DEBUG, "Statistics: %d seeks, %d writeouts\n", s->seek_count, s->writeout_count);
+        av_log(s, AV_LOG_VERBOSE, "Statistics: %d seeks, %d writeouts\n", s->seek_count, s->writeout_count);
     else
-        av_log(s, AV_LOG_DEBUG, "Statistics: %"PRId64" bytes read, %d seeks\n", s->bytes_read, s->seek_count);
+        av_log(s, AV_LOG_VERBOSE, "Statistics: %"PRId64" bytes read, %d seeks\n", s->bytes_read, s->seek_count);
     av_opt_free(s);
 
     avio_context_free(&s);

diff --git a/libavformat/avisynth.c b/libavformat/avisynth.c
index 5670028..250a489 100644
--- a/libavformat/avisynth.c
+++ b/libavformat/avisynth.c

@@ -774,15 +774,15 @@
     int ret;
 
     // Calling library must implement a lock for thread-safe opens.
-    if (ret = avpriv_lock_avformat())
+    if (ret = ff_lock_avformat())
         return ret;
 
     if (ret = avisynth_open_file(s)) {
-        avpriv_unlock_avformat();
+        ff_unlock_avformat();
         return ret;
     }
 
-    avpriv_unlock_avformat();
+    ff_unlock_avformat();
     return 0;
 }
 
@@ -818,11 +818,11 @@
 
 static av_cold int avisynth_read_close(AVFormatContext *s)
 {
-    if (avpriv_lock_avformat())
+    if (ff_lock_avformat())
         return AVERROR_UNKNOWN;
 
     avisynth_context_destroy(s->priv_data);
-    avpriv_unlock_avformat();
+    ff_unlock_avformat();
     return 0;
 }
 

diff --git a/libavformat/avs.c b/libavformat/avs.c
index 763ba63..62f5a42 100644
--- a/libavformat/avs.c
+++ b/libavformat/avs.c

@@ -19,6 +19,12 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+/**
+ * @file
+ * Argonaut Games' Creature Shock demuxer
+ * @see http://wiki.multimedia.cx/index.php?title=AVS
+ */
+
 #include "avformat.h"
 #include "voc.h"
 
@@ -225,7 +231,7 @@
 
 AVInputFormat ff_avs_demuxer = {
     .name           = "avs",
-    .long_name      = NULL_IF_CONFIG_SMALL("AVS"),
+    .long_name      = NULL_IF_CONFIG_SMALL("Argonaut Games Creature Shock"),
     .priv_data_size = sizeof(AvsFormat),
     .read_probe     = avs_probe,
     .read_header    = avs_read_header,

diff --git a/libavformat/bink.c b/libavformat/bink.c
index 8a05082..567a38c 100644
--- a/libavformat/bink.c
+++ b/libavformat/bink.c

@@ -92,6 +92,8 @@
     uint16_t flags;
     int keyframe;
     int ret;
+    uint32_t signature;
+    uint8_t revision;
 
     vst = avformat_new_stream(s, NULL);
     if (!vst)
@@ -160,14 +162,14 @@
         return AVERROR(EIO);
     }
 
+    signature = (vst->codecpar->codec_tag & 0xFFFFFF);
+    revision = ((vst->codecpar->codec_tag >> 24) % 0xFF);
+
+    if ((signature == AV_RL32("BIK") && (revision == 'k')) ||
+        (signature == AV_RL32("KB2") && (revision == 'i' || revision == 'j' || revision == 'k')))
+        avio_skip(pb, 4); /* unknown new field */
+
     if (bink->num_audio_tracks) {
-        uint32_t signature = (vst->codecpar->codec_tag & 0xFFFFFF);
-        uint8_t revision = ((vst->codecpar->codec_tag >> 24) % 0xFF);
-
-        if ((signature == AV_RL32("BIK") && (revision == 0x6b)) || /* k */
-            (signature == AV_RL32("KB2") && (revision == 0x69 || revision == 0x6a || revision == 0x6b))) /* i,j,k */
-            avio_skip(pb, 4); /* unknown new field */
-
         avio_skip(pb, 4 * bink->num_audio_tracks); /* max decoded size */
 
         for (i = 0; i < bink->num_audio_tracks; i++) {

diff --git a/libavformat/bintext.c b/libavformat/bintext.c
index 12e3bfd..0b499d9 100644
--- a/libavformat/bintext.c
+++ b/libavformat/bintext.c

@@ -126,6 +126,53 @@
         par->width = fsize > 4000 ? (160<<3) : (80<<3);
 }
 
+static int bin_probe(AVProbeData *p)
+{
+    const uint8_t *d = p->buf;
+    int magic = 0, sauce = 0;
+    int invisible = 0;
+    int i;
+
+    if (p->buf_size > 256)
+        magic = !memcmp(d + p->buf_size - 256, next_magic, sizeof(next_magic));
+    if (p->buf_size > 128)
+        sauce = !memcmp(d + p->buf_size - 128, "SAUCE00", 7);
+
+    if (magic)
+        return AVPROBE_SCORE_EXTENSION + 1;
+
+    if (av_match_ext(p->filename, "bin")) {
+        AVCodecParameters par;
+        int got_width = 0;
+        par.width = par.height = 0;
+        if (sauce)
+            return AVPROBE_SCORE_EXTENSION + 1;
+
+        predict_width(&par, p->buf_size, got_width);
+        if (par.width <= 0)
+            return 0;
+        calculate_height(&par, p->buf_size);
+        if (par.height <= 0)
+            return 0;
+
+        for (i = 0; i < p->buf_size - 256;  i+=2) {
+            if ((d[i+1] & 15) == (d[i+1] >> 4) && d[i] && d[i] != 0xFF && d[i] != ' ') {
+                invisible ++;
+            }
+        }
+
+        if (par.width * par.height * 2 / (8*16) == p->buf_size)
+            return AVPROBE_SCORE_MAX / 2;
+        return 0;
+    }
+
+    if (sauce)
+        return 1;
+
+    return 0;
+}
+
+
 static int bintext_read_header(AVFormatContext *s)
 {
     BinDemuxContext *bin = s->priv_data;
@@ -343,9 +390,9 @@
     .name           = "bin",
     .long_name      = NULL_IF_CONFIG_SMALL("Binary text"),
     .priv_data_size = sizeof(BinDemuxContext),
+    .read_probe     = bin_probe,
     .read_header    = bintext_read_header,
     .read_packet    = read_packet,
-    .extensions     = "bin",
     .priv_class     = CLASS("Binary text demuxer"),
 };
 #endif

diff --git a/libavformat/bluray.c b/libavformat/bluray.c
index 9282bf9..635c4f1 100644
--- a/libavformat/bluray.c
+++ b/libavformat/bluray.c

@@ -198,7 +198,7 @@
 
     len = bd_read(bd->bd, buf, size);
 
-    return len;
+    return len == 0 ? AVERROR_EOF : len;
 }
 
 static int64_t bluray_seek(URLContext *h, int64_t pos, int whence)

diff --git a/libavformat/cache.c b/libavformat/cache.c
index 6aabca2..66bbbf5 100644
--- a/libavformat/cache.c
+++ b/libavformat/cache.c

@@ -201,7 +201,7 @@
     }
 
     r = ffurl_read(c->inner, buf, size);
-    if (r == 0 && size>0) {
+    if (r == AVERROR_EOF && size>0) {
         c->is_true_eof = 1;
         av_assert0(c->end >= c->logical_pos);
     }
@@ -263,7 +263,7 @@
                 if (whence == SEEK_SET)
                     size = FFMIN(sizeof(tmp), pos - c->logical_pos);
                 ret = cache_read(h, tmp, size);
-                if (ret == 0 && whence == SEEK_END) {
+                if (ret == AVERROR_EOF && whence == SEEK_END) {
                     av_assert0(c->is_true_eof);
                     goto resolve_eof;
                 }

diff --git a/libavformat/cafenc.c b/libavformat/cafenc.c
index f550cd9..0f7c4eb 100644
--- a/libavformat/cafenc.c
+++ b/libavformat/cafenc.c

@@ -81,6 +81,8 @@
         return 320;
     case AV_CODEC_ID_MP1:
         return 384;
+    case AV_CODEC_ID_OPUS:
+        return 960;
     case AV_CODEC_ID_MP2:
     case AV_CODEC_ID_MP3:
         return 1152;
@@ -117,11 +119,15 @@
 
     switch (par->codec_id) {
     case AV_CODEC_ID_AAC:
-    case AV_CODEC_ID_OPUS:
         av_log(s, AV_LOG_ERROR, "muxing codec currently unsupported\n");
         return AVERROR_PATCHWELCOME;
     }
 
+    if (par->codec_id == AV_CODEC_ID_OPUS && par->channels > 2) {
+        av_log(s, AV_LOG_ERROR, "Only mono and stereo are supported for Opus\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     if (!codec_tag) {
         av_log(s, AV_LOG_ERROR, "unsupported codec\n");
         return AVERROR_INVALIDDATA;

diff --git a/libavformat/chromaprint.c b/libavformat/chromaprint.c
index 4da02be..f39c09d 100644
--- a/libavformat/chromaprint.c
+++ b/libavformat/chromaprint.c

@@ -20,6 +20,7 @@
  */
 
 #include "avformat.h"
+#include "internal.h"
 #include "libavutil/opt.h"
 #include "libavcodec/internal.h"
 #include <chromaprint.h>
@@ -49,9 +50,9 @@
 static void cleanup(ChromaprintMuxContext *cpr)
 {
     if (cpr->ctx) {
-        avpriv_lock_avformat();
+        ff_lock_avformat();
         chromaprint_free(cpr->ctx);
-        avpriv_unlock_avformat();
+        ff_unlock_avformat();
     }
 }
 
@@ -60,9 +61,9 @@
     ChromaprintMuxContext *cpr = s->priv_data;
     AVStream *st;
 
-    avpriv_lock_avformat();
+    ff_lock_avformat();
     cpr->ctx = chromaprint_new(cpr->algorithm);
-    avpriv_unlock_avformat();
+    ff_unlock_avformat();
 
     if (!cpr->ctx) {
         av_log(s, AV_LOG_ERROR, "Failed to create chromaprint context.\n");

diff --git a/libavformat/codec2.c b/libavformat/codec2.c
new file mode 100644
index 0000000..28dbbd8
--- /dev/null
+++ b/libavformat/codec2.c

@@ -0,0 +1,285 @@
+/*
+ * codec2 muxer and demuxers
+ * Copyright (c) 2017 Tomas Härdin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <memory.h>
+#include "libavcodec/codec2utils.h"
+#include "libavutil/intreadwrite.h"
+#include "avio_internal.h"
+#include "avformat.h"
+#include "internal.h"
+#include "rawdec.h"
+#include "rawenc.h"
+#include "pcm.h"
+
+#define AVPRIV_CODEC2_HEADER_SIZE 7
+#define AVPRIV_CODEC2_MAGIC       0xC0DEC2
+
+//the lowest version we should ever run across is 0.8
+//we may run across later versions as the format evolves
+#define EXPECTED_CODEC2_MAJOR_VERSION 0
+#define EXPECTED_CODEC2_MINOR_VERSION 8
+
+typedef struct {
+    const AVClass *class;
+    int mode;
+    int frames_per_packet;
+} Codec2Context;
+
+static int codec2_probe(AVProbeData *p)
+{
+    //must start wih C0 DE C2
+    if (AV_RB24(p->buf) != AVPRIV_CODEC2_MAGIC) {
+        return 0;
+    }
+
+    //no .c2 files prior to 0.8
+    //be strict about major version while we're at it
+    if (p->buf[3] != EXPECTED_CODEC2_MAJOR_VERSION ||
+        p->buf[4] <  EXPECTED_CODEC2_MINOR_VERSION) {
+        return 0;
+    }
+
+    //32 bits of identification -> low score
+    return AVPROBE_SCORE_EXTENSION + 1;
+}
+
+static int codec2_read_header_common(AVFormatContext *s, AVStream *st)
+{
+    int mode = avpriv_codec2_mode_from_extradata(st->codecpar->extradata);
+
+    st->codecpar->codec_type        = AVMEDIA_TYPE_AUDIO;
+    st->codecpar->codec_id          = AV_CODEC_ID_CODEC2;
+    st->codecpar->sample_rate       = 8000;
+    st->codecpar->channels          = 1;
+    st->codecpar->format            = AV_SAMPLE_FMT_S16;
+    st->codecpar->channel_layout    = AV_CH_LAYOUT_MONO;
+    st->codecpar->bit_rate          = avpriv_codec2_mode_bit_rate(s, mode);
+    st->codecpar->frame_size        = avpriv_codec2_mode_frame_size(s, mode);
+    st->codecpar->block_align       = avpriv_codec2_mode_block_align(s, mode);
+
+    if (st->codecpar->bit_rate <= 0 ||
+        st->codecpar->frame_size <= 0 ||
+        st->codecpar->block_align <= 0) {
+        return AVERROR_INVALIDDATA;
+    }
+
+    avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+
+    return 0;
+}
+
+static int codec2_read_header(AVFormatContext *s)
+{
+    AVStream *st = avformat_new_stream(s, NULL);
+    int ret, version;
+
+    if (!st) {
+        return AVERROR(ENOMEM);
+    }
+
+    if (avio_rb24(s->pb) != AVPRIV_CODEC2_MAGIC) {
+        av_log(s, AV_LOG_ERROR, "not a .c2 file\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    ret = ff_alloc_extradata(st->codecpar, AVPRIV_CODEC2_EXTRADATA_SIZE);
+    if (ret) {
+        return ret;
+    }
+
+    ret = ffio_read_size(s->pb, st->codecpar->extradata, AVPRIV_CODEC2_EXTRADATA_SIZE);
+    if (ret < 0) {
+        return ret;
+    }
+
+    version = avpriv_codec2_version_from_extradata(st->codecpar->extradata);
+    if ((version >> 8) != EXPECTED_CODEC2_MAJOR_VERSION) {
+        avpriv_report_missing_feature(s, "Major version %i", version >> 8);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    s->internal->data_offset = AVPRIV_CODEC2_HEADER_SIZE;
+
+    return codec2_read_header_common(s, st);
+}
+
+static int codec2_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    Codec2Context *c2 = s->priv_data;
+    AVStream *st = s->streams[0];
+    int ret, size, n, block_align, frame_size;
+
+    block_align = st->codecpar->block_align;
+    frame_size  = st->codecpar->frame_size;
+
+    if (block_align <= 0 || frame_size <= 0 || c2->frames_per_packet <= 0) {
+        return AVERROR(EINVAL);
+    }
+
+    //try to read desired number of frames, compute n from to actual number of bytes read
+    size = c2->frames_per_packet * block_align;
+    ret = av_get_packet(s->pb, pkt, size);
+    if (ret < 0) {
+        return ret;
+    }
+
+    //only set duration - compute_pkt_fields() and ff_pcm_read_seek() takes care of everything else
+    //tested by spamming the seek functionality in ffplay
+    n = ret / block_align;
+    pkt->duration = n * frame_size;
+
+    return ret;
+}
+
+static int codec2_write_header(AVFormatContext *s)
+{
+    AVStream *st;
+
+    if (s->nb_streams != 1 || s->streams[0]->codecpar->codec_id != AV_CODEC_ID_CODEC2) {
+        av_log(s, AV_LOG_ERROR, ".c2 files must have exactly one codec2 stream\n");
+        return AVERROR(EINVAL);
+    }
+
+    st = s->streams[0];
+
+    if (st->codecpar->extradata_size != AVPRIV_CODEC2_EXTRADATA_SIZE) {
+        av_log(s, AV_LOG_ERROR, ".c2 files require exactly %i bytes of extradata (got %i)\n",
+               AVPRIV_CODEC2_EXTRADATA_SIZE, st->codecpar->extradata_size);
+        return AVERROR(EINVAL);
+    }
+
+    avio_wb24(s->pb, AVPRIV_CODEC2_MAGIC);
+    avio_write(s->pb, st->codecpar->extradata, AVPRIV_CODEC2_EXTRADATA_SIZE);
+
+    return 0;
+}
+
+static int codec2raw_read_header(AVFormatContext *s)
+{
+    Codec2Context *c2 = s->priv_data;
+    AVStream *st;
+    int ret;
+
+    if (c2->mode < 0) {
+        //FIXME: using a default value of -1 for mandatory options is an incredibly ugly hack
+        av_log(s, AV_LOG_ERROR, "-mode must be set in order to make sense of raw codec2 files\n");
+        return AVERROR(EINVAL);
+    }
+
+    st = avformat_new_stream(s, NULL);
+    if (!st) {
+        return AVERROR(ENOMEM);
+    }
+
+    ret = ff_alloc_extradata(st->codecpar, AVPRIV_CODEC2_EXTRADATA_SIZE);
+    if (ret) {
+        return ret;
+    }
+
+    s->internal->data_offset = 0;
+    avpriv_codec2_make_extradata(st->codecpar->extradata, c2->mode);
+
+    return codec2_read_header_common(s, st);
+}
+
+//transcoding report2074.c2 to wav went from 7.391s to 5.322s with -frames_per_packet 1000 compared to default, same sha1sum
+#define FRAMES_PER_PACKET \
+    { "frames_per_packet", "Number of frames to read at a time. Higher = faster decoding, lower granularity", \
+      offsetof(Codec2Context, frames_per_packet), AV_OPT_TYPE_INT, {.i64 = 1}, 1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM}
+
+static const AVOption codec2_options[] = {
+    FRAMES_PER_PACKET,
+    { NULL },
+};
+
+static const AVOption codec2raw_options[] = {
+    AVPRIV_CODEC2_AVOPTIONS("codec2 mode [mandatory]", Codec2Context, -1, -1, AV_OPT_FLAG_DECODING_PARAM),
+    FRAMES_PER_PACKET,
+    { NULL },
+};
+
+static const AVClass codec2_mux_class = {
+    .class_name = "codec2 muxer",
+    .item_name  = av_default_item_name,
+    .version    = LIBAVUTIL_VERSION_INT,
+    .category   = AV_CLASS_CATEGORY_DEMUXER,
+};
+
+static const AVClass codec2_demux_class = {
+    .class_name = "codec2 demuxer",
+    .item_name  = av_default_item_name,
+    .option     = codec2_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+    .category   = AV_CLASS_CATEGORY_DEMUXER,
+};
+
+static const AVClass codec2raw_demux_class = {
+    .class_name = "codec2raw demuxer",
+    .item_name  = av_default_item_name,
+    .option     = codec2raw_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+    .category   = AV_CLASS_CATEGORY_DEMUXER,
+};
+
+#if CONFIG_CODEC2_DEMUXER
+AVInputFormat ff_codec2_demuxer = {
+    .name           = "codec2",
+    .long_name      = NULL_IF_CONFIG_SMALL("codec2 .c2 demuxer"),
+    .priv_data_size = sizeof(Codec2Context),
+    .extensions     = "c2",
+    .read_probe     = codec2_probe,
+    .read_header    = codec2_read_header,
+    .read_packet    = codec2_read_packet,
+    .read_seek      = ff_pcm_read_seek,
+    .flags          = AVFMT_GENERIC_INDEX,
+    .raw_codec_id   = AV_CODEC_ID_CODEC2,
+    .priv_class     = &codec2_demux_class,
+};
+#endif
+
+#if CONFIG_CODEC2_MUXER
+AVOutputFormat ff_codec2_muxer = {
+    .name           = "codec2",
+    .long_name      = NULL_IF_CONFIG_SMALL("codec2 .c2 muxer"),
+    .priv_data_size = sizeof(Codec2Context),
+    .extensions     = "c2",
+    .audio_codec    = AV_CODEC_ID_CODEC2,
+    .video_codec    = AV_CODEC_ID_NONE,
+    .write_header   = codec2_write_header,
+    .write_packet   = ff_raw_write_packet,
+    .flags          = AVFMT_NOTIMESTAMPS,
+    .priv_class     = &codec2_mux_class,
+};
+#endif
+
+#if CONFIG_CODEC2RAW_DEMUXER
+AVInputFormat ff_codec2raw_demuxer = {
+    .name           = "codec2raw",
+    .long_name      = NULL_IF_CONFIG_SMALL("raw codec2 demuxer"),
+    .priv_data_size = sizeof(Codec2Context),
+    .read_header    = codec2raw_read_header,
+    .read_packet    = codec2_read_packet,
+    .read_seek      = ff_pcm_read_seek,
+    .flags          = AVFMT_GENERIC_INDEX,
+    .raw_codec_id   = AV_CODEC_ID_CODEC2,
+    .priv_class     = &codec2raw_demux_class,
+};
+#endif

diff --git a/libavformat/concat.c b/libavformat/concat.c
index 46b520f..19c83c3 100644
--- a/libavformat/concat.c
+++ b/libavformat/concat.c

@@ -135,19 +135,20 @@
 
     while (size > 0) {
         result = ffurl_read(nodes[i].uc, buf, size);
-        if (result < 0)
-            return total ? total : result;
-        if (!result) {
+        if (result == AVERROR_EOF) {
             if (i + 1 == data->length ||
                 ffurl_seek(nodes[++i].uc, 0, SEEK_SET) < 0)
                 break;
+            result = 0;
         }
+        if (result < 0)
+            return total ? total : result;
         total += result;
         buf   += result;
         size  -= result;
     }
     data->current = i;
-    return total;
+    return total ? total : result;
 }
 
 static int64_t concat_seek(URLContext *h, int64_t pos, int whence)

diff --git a/libavformat/concatdec.c b/libavformat/concatdec.c
index 0e18901..bbe1313 100644
--- a/libavformat/concatdec.c
+++ b/libavformat/concatdec.c

@@ -20,6 +20,7 @@
 
 #include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
@@ -126,10 +127,10 @@
         url = filename;
         filename = NULL;
     } else {
-        url_len = strlen(avf->filename) + strlen(filename) + 16;
+        url_len = strlen(avf->url) + strlen(filename) + 16;
         if (!(url = av_malloc(url_len)))
             FAIL(AVERROR(ENOMEM));
-        ff_make_absolute_url(url, url_len, avf->filename, filename);
+        ff_make_absolute_url(url, url_len, avf->url, filename);
         av_freep(&filename);
     }
 
@@ -185,8 +186,8 @@
         return ret;
     st->r_frame_rate        = source_st->r_frame_rate;
     st->avg_frame_rate      = source_st->avg_frame_rate;
-    st->time_base           = source_st->time_base;
     st->sample_aspect_ratio = source_st->sample_aspect_ratio;
+    avpriv_set_pts_info(st, 64, source_st->time_base.num, source_st->time_base.den);
 
     av_dict_copy(&st->metadata, source_st->metadata, 0);
     return 0;
@@ -386,18 +387,18 @@
 static int concat_read_header(AVFormatContext *avf)
 {
     ConcatContext *cat = avf->priv_data;
-    uint8_t buf[4096];
+    AVBPrint bp;
     uint8_t *cursor, *keyword;
-    int ret, line = 0, i;
+    int line = 0, i;
     unsigned nb_files_alloc = 0;
     ConcatFile *file = NULL;
-    int64_t time = 0;
+    int64_t ret, time = 0;
 
-    while (1) {
-        if ((ret = ff_get_line(avf->pb, buf, sizeof(buf))) <= 0)
-            break;
+    av_bprint_init(&bp, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+    while ((ret = ff_read_line_to_bprint_overwrite(avf->pb, &bp)) >= 0) {
         line++;
-        cursor = buf;
+        cursor = bp.str;
         keyword = get_keyword(&cursor);
         if (!*keyword || *keyword == '#')
             continue;
@@ -473,7 +474,7 @@
             FAIL(AVERROR_INVALIDDATA);
         }
     }
-    if (ret < 0)
+    if (ret != AVERROR_EOF && ret < 0)
         goto fail;
     if (!cat->nb_files)
         FAIL(AVERROR_INVALIDDATA);
@@ -499,9 +500,11 @@
                                                MATCH_ONE_TO_ONE;
     if ((ret = open_file(avf, 0)) < 0)
         goto fail;
+    av_bprint_finalize(&bp, NULL);
     return 0;
 
 fail:
+    av_bprint_finalize(&bp, NULL);
     concat_read_close(avf);
     return ret;
 }
@@ -600,7 +603,6 @@
             av_packet_unref(pkt);
             continue;
         }
-        pkt->stream_index = cs->out_stream_index;
         break;
     }
     if ((ret = filter_packet(avf, cs, pkt)))
@@ -643,6 +645,7 @@
         }
     }
 
+    pkt->stream_index = cs->out_stream_index;
     return ret;
 }
 

diff --git a/libavformat/dashdec.c b/libavformat/dashdec.c
index f63f1ff..497e7e4 100644
--- a/libavformat/dashdec.c
+++ b/libavformat/dashdec.c

@@ -84,6 +84,10 @@
     int stream_index;
 
     enum AVMediaType type;
+    char id[20];
+    int bandwidth;
+    AVRational framerate;
+    AVStream *assoc_stream; /* demuxer stream associated with this representation */
 
     int n_fragments;
     struct fragment **fragments; /* VOD list of fragment for profile */
@@ -118,13 +122,30 @@
 typedef struct DASHContext {
     const AVClass *class;
     char *base_url;
-    struct representation *cur_video;
-    struct representation *cur_audio;
+    char *adaptionset_contenttype_val;
+    char *adaptionset_par_val;
+    char *adaptionset_lang_val;
+    char *adaptionset_minbw_val;
+    char *adaptionset_maxbw_val;
+    char *adaptionset_minwidth_val;
+    char *adaptionset_maxwidth_val;
+    char *adaptionset_minheight_val;
+    char *adaptionset_maxheight_val;
+    char *adaptionset_minframerate_val;
+    char *adaptionset_maxframerate_val;
+    char *adaptionset_segmentalignment_val;
+    char *adaptionset_bitstreamswitching_val;
+
+    int n_videos;
+    struct representation **videos;
+    int n_audios;
+    struct representation **audios;
 
     /* MediaPresentationDescription Attribute */
     uint64_t media_presentation_duration;
     uint64_t suggested_presentation_delay;
     uint64_t availability_start_time;
+    uint64_t availability_end_time;
     uint64_t publish_time;
     uint64_t minimum_update_period;
     uint64_t time_shift_buffer_depth;
@@ -136,13 +157,27 @@
 
     int is_live;
     AVIOInterruptCB *interrupt_callback;
-    char *user_agent;                    ///< holds HTTP user agent set as an AVOption to the HTTP protocol context
-    char *cookies;                       ///< holds HTTP cookie values set in either the initial response or as an AVOption to the HTTP protocol context
-    char *headers;                       ///< holds HTTP headers set as an AVOption to the HTTP protocol context
     char *allowed_extensions;
     AVDictionary *avio_opts;
+    int max_url_size;
+
+    /* Flags for init section*/
+    int is_init_section_common_video;
+    int is_init_section_common_audio;
+
 } DASHContext;
 
+static int ishttp(char *url)
+{
+    const char *proto_name = avio_find_protocol_name(url);
+    return av_strstart(proto_name, "http", NULL);
+}
+
+static int aligned(int val)
+{
+    return ((val + 0x3F) >> 6) << 6;
+}
+
 static uint64_t get_current_time_in_sec(void)
 {
     return  av_gettime() / 1000000;
@@ -238,6 +273,12 @@
                 goto finish;
 
             start_time += pls->timelines[i]->duration;
+
+            if (pls->timelines[i]->repeat == -1) {
+                start_time = pls->timelines[i]->duration * cur_seq_no;
+                goto finish;
+            }
+
             for (j = 0; j < pls->timelines[i]->repeat; j++) {
                 num++;
                 if (num == cur_seq_no)
@@ -328,25 +369,29 @@
     }
 
     av_freep(&pls->url_template);
-    av_freep(pls);
+    av_freep(&pls);
 }
 
-static void set_httpheader_options(DASHContext *c, AVDictionary *opts)
+static void free_video_list(DASHContext *c)
 {
-    // broker prior HTTP options that should be consistent across requests
-    av_dict_set(&opts, "user-agent", c->user_agent, 0);
-    av_dict_set(&opts, "cookies", c->cookies, 0);
-    av_dict_set(&opts, "headers", c->headers, 0);
-    if (c->is_live) {
-        av_dict_set(&opts, "seekable", "0", 0);
+    int i;
+    for (i = 0; i < c->n_videos; i++) {
+        struct representation *pls = c->videos[i];
+        free_representation(pls);
     }
+    av_freep(&c->videos);
+    c->n_videos = 0;
 }
-static void update_options(char **dest, const char *name, void *src)
+
+static void free_audio_list(DASHContext *c)
 {
-    av_freep(dest);
-    av_opt_get(src, name, AV_OPT_SEARCH_CHILDREN, (uint8_t**)dest);
-    if (*dest)
-        av_freep(dest);
+    int i;
+    for (i = 0; i < c->n_audios; i++) {
+        struct representation *pls = c->audios[i];
+        free_representation(pls);
+    }
+    av_freep(&c->audios);
+    c->n_audios = 0;
 }
 
 static int open_url(AVFormatContext *s, AVIOContext **pb, const char *url,
@@ -375,9 +420,9 @@
     if (av_strstart(proto_name, "file", NULL)) {
         if (strcmp(c->allowed_extensions, "ALL") && !av_match_ext(url, c->allowed_extensions)) {
             av_log(s, AV_LOG_ERROR,
-                "Filename extension of \'%s\' is not a common multimedia extension, blocked for security reasons.\n"
-                "If you wish to override this adjust allowed_extensions, you can set it to \'ALL\' to allow all\n",
-                url);
+                   "Filename extension of \'%s\' is not a common multimedia extension, blocked for security reasons.\n"
+                   "If you wish to override this adjust allowed_extensions, you can set it to \'ALL\' to allow all\n",
+                   url);
             return AVERROR_INVALIDDATA;
         }
     } else if (av_strstart(proto_name, "http", NULL)) {
@@ -392,7 +437,8 @@
     else if (strcmp(proto_name, "file") || !strncmp(url, "file,", 5))
         return AVERROR_INVALIDDATA;
 
-    ret = s->io_open(s, pb, url, AVIO_FLAG_READ, &tmp);
+    av_freep(pb);
+    ret = avio_open2(pb, url, AVIO_FLAG_READ, c->interrupt_callback, &tmp);
     if (ret >= 0) {
         // update cookies on http response with setcookies.
         char *new_cookies = NULL;
@@ -401,11 +447,9 @@
             av_opt_get(*pb, "cookies", AV_OPT_SEARCH_CHILDREN, (uint8_t**)&new_cookies);
 
         if (new_cookies) {
-            av_free(c->cookies);
-            c->cookies = new_cookies;
+            av_dict_set(&opts, "cookies", new_cookies, AV_DICT_DONT_STRDUP_VAL);
         }
 
-        av_dict_set(&opts, "cookies", c->cookies, 0);
     }
 
     av_dict_free(&tmp);
@@ -418,6 +462,7 @@
 
 static char *get_content_url(xmlNodePtr *baseurl_nodes,
                              int n_baseurl_nodes,
+                             int max_url_size,
                              char *rep_id_val,
                              char *rep_bandwidth_val,
                              char *val)
@@ -425,10 +470,12 @@
     int i;
     char *text;
     char *url = NULL;
-    char tmp_str[MAX_URL_SIZE];
-    char tmp_str_2[MAX_URL_SIZE];
+    char *tmp_str = av_mallocz(max_url_size);
+    char *tmp_str_2 = av_mallocz(max_url_size);
 
-    memset(tmp_str, 0, sizeof(tmp_str));
+    if (!tmp_str || !tmp_str_2) {
+        return NULL;
+    }
 
     for (i = 0; i < n_baseurl_nodes; ++i) {
         if (baseurl_nodes[i] &&
@@ -436,32 +483,36 @@
             baseurl_nodes[i]->children->type == XML_TEXT_NODE) {
             text = xmlNodeGetContent(baseurl_nodes[i]->children);
             if (text) {
-                memset(tmp_str, 0, sizeof(tmp_str));
-                memset(tmp_str_2, 0, sizeof(tmp_str_2));
-                ff_make_absolute_url(tmp_str_2, MAX_URL_SIZE, tmp_str, text);
-                av_strlcpy(tmp_str, tmp_str_2, sizeof(tmp_str));
+                memset(tmp_str, 0, max_url_size);
+                memset(tmp_str_2, 0, max_url_size);
+                ff_make_absolute_url(tmp_str_2, max_url_size, tmp_str, text);
+                av_strlcpy(tmp_str, tmp_str_2, max_url_size);
                 xmlFree(text);
             }
         }
     }
 
     if (val)
-        av_strlcat(tmp_str, (const char*)val, sizeof(tmp_str));
+        av_strlcat(tmp_str, (const char*)val, max_url_size);
 
     if (rep_id_val) {
         url = av_strireplace(tmp_str, "$RepresentationID$", (const char*)rep_id_val);
         if (!url) {
-            return NULL;
+            goto end;
         }
-        av_strlcpy(tmp_str, url, sizeof(tmp_str));
-        av_free(url);
+        av_strlcpy(tmp_str, url, max_url_size);
     }
     if (rep_bandwidth_val && tmp_str[0] != '\0') {
+        // free any previously assigned url before reassigning
+        av_free(url);
         url = av_strireplace(tmp_str, "$Bandwidth$", (const char*)rep_bandwidth_val);
         if (!url) {
-            return NULL;
+            goto end;
         }
     }
+end:
+    av_free(tmp_str);
+    av_free(tmp_str_2);
     return url;
 }
 
@@ -522,55 +573,85 @@
     return type;
 }
 
+static struct fragment * get_Fragment(char *range)
+{
+    struct fragment * seg =  av_mallocz(sizeof(struct fragment));
+
+    if (!seg)
+        return NULL;
+
+    seg->size = -1;
+    if (range) {
+        char *str_end_offset;
+        char *str_offset = av_strtok(range, "-", &str_end_offset);
+        seg->url_offset = strtoll(str_offset, NULL, 10);
+        seg->size = strtoll(str_end_offset, NULL, 10) - seg->url_offset;
+    }
+
+    return seg;
+}
+
 static int parse_manifest_segmenturlnode(AVFormatContext *s, struct representation *rep,
                                          xmlNodePtr fragmenturl_node,
                                          xmlNodePtr *baseurl_nodes,
                                          char *rep_id_val,
                                          char *rep_bandwidth_val)
 {
+    DASHContext *c = s->priv_data;
     char *initialization_val = NULL;
     char *media_val = NULL;
+    char *range_val = NULL;
+    int max_url_size = c ? c->max_url_size: MAX_URL_SIZE;
 
     if (!av_strcasecmp(fragmenturl_node->name, (const char *)"Initialization")) {
         initialization_val = xmlGetProp(fragmenturl_node, "sourceURL");
-        if (initialization_val) {
-            rep->init_section = av_mallocz(sizeof(struct fragment));
+        range_val = xmlGetProp(fragmenturl_node, "range");
+        if (initialization_val || range_val) {
+            rep->init_section = get_Fragment(range_val);
             if (!rep->init_section) {
                 xmlFree(initialization_val);
+                xmlFree(range_val);
                 return AVERROR(ENOMEM);
             }
             rep->init_section->url = get_content_url(baseurl_nodes, 4,
+                                                     max_url_size,
                                                      rep_id_val,
                                                      rep_bandwidth_val,
                                                      initialization_val);
+
             if (!rep->init_section->url) {
                 av_free(rep->init_section);
                 xmlFree(initialization_val);
+                xmlFree(range_val);
                 return AVERROR(ENOMEM);
             }
-            rep->init_section->size = -1;
             xmlFree(initialization_val);
+            xmlFree(range_val);
         }
     } else if (!av_strcasecmp(fragmenturl_node->name, (const char *)"SegmentURL")) {
         media_val = xmlGetProp(fragmenturl_node, "media");
-        if (media_val) {
-            struct fragment *seg = av_mallocz(sizeof(struct fragment));
+        range_val = xmlGetProp(fragmenturl_node, "mediaRange");
+        if (media_val || range_val) {
+            struct fragment *seg = get_Fragment(range_val);
             if (!seg) {
                 xmlFree(media_val);
+                xmlFree(range_val);
                 return AVERROR(ENOMEM);
             }
             seg->url = get_content_url(baseurl_nodes, 4,
+                                       max_url_size,
                                        rep_id_val,
                                        rep_bandwidth_val,
                                        media_val);
             if (!seg->url) {
                 av_free(seg);
                 xmlFree(media_val);
+                xmlFree(range_val);
                 return AVERROR(ENOMEM);
             }
-            seg->size = -1;
             dynarray_add(&rep->fragments, &rep->n_fragments, seg);
             xmlFree(media_val);
+            xmlFree(range_val);
         }
     }
 
@@ -613,14 +694,128 @@
     return 0;
 }
 
+static int resolve_content_path(AVFormatContext *s, const char *url, int *max_url_size, xmlNodePtr *baseurl_nodes, int n_baseurl_nodes) {
+
+    char *tmp_str = NULL;
+    char *path = NULL;
+    char *mpdName = NULL;
+    xmlNodePtr node = NULL;
+    char *baseurl = NULL;
+    char *root_url = NULL;
+    char *text = NULL;
+    char *tmp = NULL;
+
+    int isRootHttp = 0;
+    char token ='/';
+    int start =  0;
+    int rootId = 0;
+    int updated = 0;
+    int size = 0;
+    int i;
+    int tmp_max_url_size = strlen(url);
+
+    for (i = n_baseurl_nodes-1; i >= 0 ; i--) {
+        text = xmlNodeGetContent(baseurl_nodes[i]);
+        if (!text)
+            continue;
+        tmp_max_url_size += strlen(text);
+        if (ishttp(text)) {
+            xmlFree(text);
+            break;
+        }
+        xmlFree(text);
+    }
+
+    tmp_max_url_size = aligned(tmp_max_url_size);
+    text = av_mallocz(tmp_max_url_size);
+    if (!text) {
+        updated = AVERROR(ENOMEM);
+        goto end;
+    }
+    av_strlcpy(text, url, strlen(url)+1);
+    tmp = text;
+    while (mpdName = av_strtok(tmp, "/", &tmp))  {
+        size = strlen(mpdName);
+    }
+    av_free(text);
+
+    path = av_mallocz(tmp_max_url_size);
+    tmp_str = av_mallocz(tmp_max_url_size);
+    if (!tmp_str || !path) {
+        updated = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    av_strlcpy (path, url, strlen(url) - size + 1);
+    for (rootId = n_baseurl_nodes - 1; rootId > 0; rootId --) {
+        if (!(node = baseurl_nodes[rootId])) {
+            continue;
+        }
+        text = xmlNodeGetContent(node);
+        if (ishttp(text)) {
+            xmlFree(text);
+            break;
+        }
+        xmlFree(text);
+    }
+
+    node = baseurl_nodes[rootId];
+    baseurl = xmlNodeGetContent(node);
+    root_url = (av_strcasecmp(baseurl, "")) ? baseurl : path;
+    if (node) {
+        xmlNodeSetContent(node, root_url);
+        updated = 1;
+    }
+
+    size = strlen(root_url);
+    isRootHttp = ishttp(root_url);
+
+    if (root_url[size - 1] != token) {
+        av_strlcat(root_url, "/", size + 2);
+        size += 2;
+    }
+
+    for (i = 0; i < n_baseurl_nodes; ++i) {
+        if (i == rootId) {
+            continue;
+        }
+        text = xmlNodeGetContent(baseurl_nodes[i]);
+        if (text) {
+            memset(tmp_str, 0, strlen(tmp_str));
+            if (!ishttp(text) && isRootHttp) {
+                av_strlcpy(tmp_str, root_url, size + 1);
+            }
+            start = (text[0] == token);
+            av_strlcat(tmp_str, text + start, tmp_max_url_size);
+            xmlNodeSetContent(baseurl_nodes[i], tmp_str);
+            updated = 1;
+            xmlFree(text);
+        }
+    }
+
+end:
+    if (tmp_max_url_size > *max_url_size) {
+        *max_url_size = tmp_max_url_size;
+    }
+    av_free(path);
+    av_free(tmp_str);
+    xmlFree(baseurl);
+    return updated;
+
+}
+
 static int parse_manifest_representation(AVFormatContext *s, const char *url,
                                          xmlNodePtr node,
                                          xmlNodePtr adaptionset_node,
                                          xmlNodePtr mpd_baseurl_node,
                                          xmlNodePtr period_baseurl_node,
+                                         xmlNodePtr period_segmenttemplate_node,
+                                         xmlNodePtr period_segmentlist_node,
                                          xmlNodePtr fragment_template_node,
                                          xmlNodePtr content_component_node,
-                                         xmlNodePtr adaptionset_baseurl_node)
+                                         xmlNodePtr adaptionset_baseurl_node,
+                                         xmlNodePtr adaptionset_segmentlist_node,
+                                         xmlNodePtr adaptionset_supplementalproperty_node)
 {
     int32_t ret = 0;
     int32_t audio_rep_idx = 0;
@@ -631,18 +826,21 @@
     xmlNodePtr representation_segmenttemplate_node = NULL;
     xmlNodePtr representation_baseurl_node = NULL;
     xmlNodePtr representation_segmentlist_node = NULL;
+    xmlNodePtr segmentlists_tab[2];
     xmlNodePtr fragment_timeline_node = NULL;
-    xmlNodePtr fragment_templates_tab[2];
+    xmlNodePtr fragment_templates_tab[5];
     char *duration_val = NULL;
     char *presentation_timeoffset_val = NULL;
     char *startnumber_val = NULL;
     char *timescale_val = NULL;
     char *initialization_val = NULL;
     char *media_val = NULL;
+    char *val = NULL;
     xmlNodePtr baseurl_nodes[4];
     xmlNodePtr representation_node = node;
     char *rep_id_val = xmlGetProp(representation_node, "id");
     char *rep_bandwidth_val = xmlGetProp(representation_node, "bandwidth");
+    char *rep_framerate_val = xmlGetProp(representation_node, "frameRate");
     enum AVMediaType type = AVMEDIA_TYPE_UNKNOWN;
 
     // try get information from representation
@@ -656,7 +854,7 @@
         type = get_content_type(adaptionset_node);
     if (type == AVMEDIA_TYPE_UNKNOWN) {
         av_log(s, AV_LOG_VERBOSE, "Parsing '%s' - skipp not supported representation type\n", url);
-    } else if ((type == AVMEDIA_TYPE_VIDEO && !c->cur_video) || (type == AVMEDIA_TYPE_AUDIO && !c->cur_audio)) {
+    } else if (type == AVMEDIA_TYPE_VIDEO || type == AVMEDIA_TYPE_AUDIO) {
         // convert selected representation to our internal struct
         rep = av_mallocz(sizeof(struct representation));
         if (!rep) {
@@ -672,17 +870,27 @@
         baseurl_nodes[2] = adaptionset_baseurl_node;
         baseurl_nodes[3] = representation_baseurl_node;
 
-        if (representation_segmenttemplate_node || fragment_template_node) {
+        ret = resolve_content_path(s, url, &c->max_url_size, baseurl_nodes, 4);
+        c->max_url_size = aligned(c->max_url_size
+                                  + (rep_id_val ? strlen(rep_id_val) : 0)
+                                  + (rep_bandwidth_val ? strlen(rep_bandwidth_val) : 0));
+        if (ret == AVERROR(ENOMEM) || ret == 0) {
+            goto end;
+        }
+        if (representation_segmenttemplate_node || fragment_template_node || period_segmenttemplate_node) {
             fragment_timeline_node = NULL;
             fragment_templates_tab[0] = representation_segmenttemplate_node;
-            fragment_templates_tab[1] = fragment_template_node;
+            fragment_templates_tab[1] = adaptionset_segmentlist_node;
+            fragment_templates_tab[2] = fragment_template_node;
+            fragment_templates_tab[3] = period_segmenttemplate_node;
+            fragment_templates_tab[4] = period_segmentlist_node;
 
-            presentation_timeoffset_val = get_val_from_nodes_tab(fragment_templates_tab, 2, "presentationTimeOffset");
-            duration_val = get_val_from_nodes_tab(fragment_templates_tab, 2, "duration");
-            startnumber_val = get_val_from_nodes_tab(fragment_templates_tab, 2, "startNumber");
-            timescale_val = get_val_from_nodes_tab(fragment_templates_tab, 2, "timescale");
-            initialization_val = get_val_from_nodes_tab(fragment_templates_tab, 2, "initialization");
-            media_val = get_val_from_nodes_tab(fragment_templates_tab, 2, "media");
+            presentation_timeoffset_val = get_val_from_nodes_tab(fragment_templates_tab, 4, "presentationTimeOffset");
+            duration_val = get_val_from_nodes_tab(fragment_templates_tab, 4, "duration");
+            startnumber_val = get_val_from_nodes_tab(fragment_templates_tab, 4, "startNumber");
+            timescale_val = get_val_from_nodes_tab(fragment_templates_tab, 4, "timescale");
+            initialization_val = get_val_from_nodes_tab(fragment_templates_tab, 4, "initialization");
+            media_val = get_val_from_nodes_tab(fragment_templates_tab, 4, "media");
 
             if (initialization_val) {
                 rep->init_section = av_mallocz(sizeof(struct fragment));
@@ -691,7 +899,8 @@
                     ret = AVERROR(ENOMEM);
                     goto end;
                 }
-                rep->init_section->url = get_content_url(baseurl_nodes, 4, rep_id_val, rep_bandwidth_val, initialization_val);
+                c->max_url_size = aligned(c->max_url_size  + strlen(initialization_val));
+                rep->init_section->url = get_content_url(baseurl_nodes, 4,  c->max_url_size, rep_id_val, rep_bandwidth_val, initialization_val);
                 if (!rep->init_section->url) {
                     av_free(rep->init_section);
                     av_free(rep);
@@ -703,31 +912,51 @@
             }
 
             if (media_val) {
-                rep->url_template = get_content_url(baseurl_nodes, 4, rep_id_val, rep_bandwidth_val, media_val);
+                c->max_url_size = aligned(c->max_url_size  + strlen(media_val));
+                rep->url_template = get_content_url(baseurl_nodes, 4, c->max_url_size, rep_id_val, rep_bandwidth_val, media_val);
                 xmlFree(media_val);
             }
 
             if (presentation_timeoffset_val) {
                 rep->presentation_timeoffset = (int64_t) strtoll(presentation_timeoffset_val, NULL, 10);
+                av_log(s, AV_LOG_TRACE, "rep->presentation_timeoffset = [%"PRId64"]\n", rep->presentation_timeoffset);
                 xmlFree(presentation_timeoffset_val);
             }
             if (duration_val) {
                 rep->fragment_duration = (int64_t) strtoll(duration_val, NULL, 10);
+                av_log(s, AV_LOG_TRACE, "rep->fragment_duration = [%"PRId64"]\n", rep->fragment_duration);
                 xmlFree(duration_val);
             }
             if (timescale_val) {
                 rep->fragment_timescale = (int64_t) strtoll(timescale_val, NULL, 10);
+                av_log(s, AV_LOG_TRACE, "rep->fragment_timescale = [%"PRId64"]\n", rep->fragment_timescale);
                 xmlFree(timescale_val);
             }
             if (startnumber_val) {
                 rep->first_seq_no = (int64_t) strtoll(startnumber_val, NULL, 10);
+                av_log(s, AV_LOG_TRACE, "rep->first_seq_no = [%"PRId64"]\n", rep->first_seq_no);
                 xmlFree(startnumber_val);
             }
+            if (adaptionset_supplementalproperty_node) {
+                if (!av_strcasecmp(xmlGetProp(adaptionset_supplementalproperty_node,"schemeIdUri"), "http://dashif.org/guidelines/last-segment-number")) {
+                    val = xmlGetProp(adaptionset_supplementalproperty_node,"value");
+                    if (!val) {
+                        av_log(s, AV_LOG_ERROR, "Missing value attribute in adaptionset_supplementalproperty_node\n");
+                    } else {
+                        rep->last_seq_no =(int64_t) strtoll(val, NULL, 10) - 1;
+                        xmlFree(val);
+                    }
+                }
+            }
 
             fragment_timeline_node = find_child_node_by_name(representation_segmenttemplate_node, "SegmentTimeline");
 
             if (!fragment_timeline_node)
                 fragment_timeline_node = find_child_node_by_name(fragment_template_node, "SegmentTimeline");
+            if (!fragment_timeline_node)
+                fragment_timeline_node = find_child_node_by_name(adaptionset_segmentlist_node, "SegmentTimeline");
+            if (!fragment_timeline_node)
+                fragment_timeline_node = find_child_node_by_name(period_segmentlist_node, "SegmentTimeline");
             if (fragment_timeline_node) {
                 fragment_timeline_node = xmlFirstElementChild(fragment_timeline_node);
                 while (fragment_timeline_node) {
@@ -744,7 +973,7 @@
                 ret = AVERROR(ENOMEM);
                 goto end;
             }
-            seg->url = get_content_url(baseurl_nodes, 4, rep_id_val, rep_bandwidth_val, NULL);
+            seg->url = get_content_url(baseurl_nodes, 4, c->max_url_size, rep_id_val, rep_bandwidth_val, NULL);
             if (!seg->url) {
                 av_free(seg);
                 ret = AVERROR(ENOMEM);
@@ -756,14 +985,19 @@
             // TODO: https://www.brendanlong.com/the-structure-of-an-mpeg-dash-mpd.html
             // http://www-itec.uni-klu.ac.at/dash/ddash/mpdGenerator.php?fragmentlength=15&type=full
             xmlNodePtr fragmenturl_node = NULL;
-            duration_val = xmlGetProp(representation_segmentlist_node, "duration");
-            timescale_val = xmlGetProp(representation_segmentlist_node, "timescale");
+            segmentlists_tab[0] = representation_segmentlist_node;
+            segmentlists_tab[1] = adaptionset_segmentlist_node;
+
+            duration_val = get_val_from_nodes_tab(segmentlists_tab, 2, "duration");
+            timescale_val = get_val_from_nodes_tab(segmentlists_tab, 2, "timescale");
             if (duration_val) {
                 rep->fragment_duration = (int64_t) strtoll(duration_val, NULL, 10);
+                av_log(s, AV_LOG_TRACE, "rep->fragment_duration = [%"PRId64"]\n", rep->fragment_duration);
                 xmlFree(duration_val);
             }
             if (timescale_val) {
                 rep->fragment_timescale = (int64_t) strtoll(timescale_val, NULL, 10);
+                av_log(s, AV_LOG_TRACE, "rep->fragment_timescale = [%"PRId64"]\n", rep->fragment_timescale);
                 xmlFree(timescale_val);
             }
             fragmenturl_node = xmlFirstElementChild(representation_segmentlist_node);
@@ -782,6 +1016,10 @@
 
             if (!fragment_timeline_node)
                 fragment_timeline_node = find_child_node_by_name(fragment_template_node, "SegmentTimeline");
+            if (!fragment_timeline_node)
+                fragment_timeline_node = find_child_node_by_name(adaptionset_segmentlist_node, "SegmentTimeline");
+            if (!fragment_timeline_node)
+                fragment_timeline_node = find_child_node_by_name(period_segmentlist_node, "SegmentTimeline");
             if (fragment_timeline_node) {
                 fragment_timeline_node = xmlFirstElementChild(fragment_timeline_node);
                 while (fragment_timeline_node) {
@@ -801,12 +1039,21 @@
         if (rep) {
             if (rep->fragment_duration > 0 && !rep->fragment_timescale)
                 rep->fragment_timescale = 1;
+            rep->bandwidth = rep_bandwidth_val ? atoi(rep_bandwidth_val) : 0;
+            strncpy(rep->id, rep_id_val ? rep_id_val : "", sizeof(rep->id));
+            rep->framerate = av_make_q(0, 0);
+            if (type == AVMEDIA_TYPE_VIDEO && rep_framerate_val) {
+                ret = av_parse_video_rate(&rep->framerate, rep_framerate_val);
+                if (ret < 0)
+                    av_log(s, AV_LOG_VERBOSE, "Ignoring invalid frame rate '%s'\n", rep_framerate_val);
+            }
+
             if (type == AVMEDIA_TYPE_VIDEO) {
                 rep->rep_idx = video_rep_idx;
-                c->cur_video = rep;
+                dynarray_add(&c->videos, &c->n_videos, rep);
             } else {
                 rep->rep_idx = audio_rep_idx;
-                c->cur_audio = rep;
+                dynarray_add(&c->audios, &c->n_audios, rep);
             }
         }
     }
@@ -819,6 +1066,8 @@
         xmlFree(rep_id_val);
     if (rep_bandwidth_val)
         xmlFree(rep_bandwidth_val);
+    if (rep_framerate_val)
+        xmlFree(rep_framerate_val);
 
     return ret;
 }
@@ -826,13 +1075,31 @@
 static int parse_manifest_adaptationset(AVFormatContext *s, const char *url,
                                         xmlNodePtr adaptionset_node,
                                         xmlNodePtr mpd_baseurl_node,
-                                        xmlNodePtr period_baseurl_node)
+                                        xmlNodePtr period_baseurl_node,
+                                        xmlNodePtr period_segmenttemplate_node,
+                                        xmlNodePtr period_segmentlist_node)
 {
     int ret = 0;
+    DASHContext *c = s->priv_data;
     xmlNodePtr fragment_template_node = NULL;
     xmlNodePtr content_component_node = NULL;
     xmlNodePtr adaptionset_baseurl_node = NULL;
+    xmlNodePtr adaptionset_segmentlist_node = NULL;
+    xmlNodePtr adaptionset_supplementalproperty_node = NULL;
     xmlNodePtr node = NULL;
+    c->adaptionset_contenttype_val = xmlGetProp(adaptionset_node, "contentType");
+    c->adaptionset_par_val = xmlGetProp(adaptionset_node, "par");
+    c->adaptionset_lang_val = xmlGetProp(adaptionset_node, "lang");
+    c->adaptionset_minbw_val = xmlGetProp(adaptionset_node, "minBandwidth");
+    c->adaptionset_maxbw_val = xmlGetProp(adaptionset_node, "maxBandwidth");
+    c->adaptionset_minwidth_val = xmlGetProp(adaptionset_node, "minWidth");
+    c->adaptionset_maxwidth_val = xmlGetProp(adaptionset_node, "maxWidth");
+    c->adaptionset_minheight_val = xmlGetProp(adaptionset_node, "minHeight");
+    c->adaptionset_maxheight_val = xmlGetProp(adaptionset_node, "maxHeight");
+    c->adaptionset_minframerate_val = xmlGetProp(adaptionset_node, "minFrameRate");
+    c->adaptionset_maxframerate_val = xmlGetProp(adaptionset_node, "maxFrameRate");
+    c->adaptionset_segmentalignment_val = xmlGetProp(adaptionset_node, "segmentAlignment");
+    c->adaptionset_bitstreamswitching_val = xmlGetProp(adaptionset_node, "bitstreamSwitching");
 
     node = xmlFirstElementChild(adaptionset_node);
     while (node) {
@@ -842,14 +1109,22 @@
             content_component_node = node;
         } else if (!av_strcasecmp(node->name, (const char *)"BaseURL")) {
             adaptionset_baseurl_node = node;
+        } else if (!av_strcasecmp(node->name, (const char *)"SegmentList")) {
+            adaptionset_segmentlist_node = node;
+        } else if (!av_strcasecmp(node->name, (const char *)"SupplementalProperty")) {
+            adaptionset_supplementalproperty_node = node;
         } else if (!av_strcasecmp(node->name, (const char *)"Representation")) {
             ret = parse_manifest_representation(s, url, node,
                                                 adaptionset_node,
                                                 mpd_baseurl_node,
                                                 period_baseurl_node,
+                                                period_segmenttemplate_node,
+                                                period_segmentlist_node,
                                                 fragment_template_node,
                                                 content_component_node,
-                                                adaptionset_baseurl_node);
+                                                adaptionset_baseurl_node,
+                                                adaptionset_segmentlist_node,
+                                                adaptionset_supplementalproperty_node);
             if (ret < 0) {
                 return ret;
             }
@@ -872,20 +1147,21 @@
     xmlNodePtr root_element = NULL;
     xmlNodePtr node = NULL;
     xmlNodePtr period_node = NULL;
+    xmlNodePtr tmp_node = NULL;
     xmlNodePtr mpd_baseurl_node = NULL;
     xmlNodePtr period_baseurl_node = NULL;
+    xmlNodePtr period_segmenttemplate_node = NULL;
+    xmlNodePtr period_segmentlist_node = NULL;
     xmlNodePtr adaptionset_node = NULL;
     xmlAttrPtr attr = NULL;
     char *val  = NULL;
-    uint32_t perdiod_duration_sec = 0;
-    uint32_t perdiod_start_sec = 0;
-    int32_t audio_rep_idx = 0;
-    int32_t video_rep_idx = 0;
+    uint32_t period_duration_sec = 0;
+    uint32_t period_start_sec = 0;
 
     if (!in) {
         close_in = 1;
 
-        set_httpheader_options(c, opts);
+        av_dict_copy(&opts, c->avio_opts, 0);
         ret = avio_open2(&in, url, AVIO_FLAG_READ, c->interrupt_callback, &opts);
         av_dict_free(&opts);
         if (ret < 0)
@@ -916,7 +1192,7 @@
     } else {
         LIBXML_TEST_VERSION
 
-        doc = xmlReadMemory(buffer, filesize, c->base_url, NULL, 0);
+            doc = xmlReadMemory(buffer, filesize, c->base_url, NULL, 0);
         root_element = xmlDocGetRootElement(doc);
         node = root_element;
 
@@ -949,46 +1225,61 @@
 
             if (!av_strcasecmp(attr->name, (const char *)"availabilityStartTime")) {
                 c->availability_start_time = get_utc_date_time_insec(s, (const char *)val);
+                av_log(s, AV_LOG_TRACE, "c->availability_start_time = [%"PRId64"]\n", c->availability_start_time);
+            } else if (!av_strcasecmp(attr->name, (const char *)"availabilityEndTime")) {
+                c->availability_end_time = get_utc_date_time_insec(s, (const char *)val);
+                av_log(s, AV_LOG_TRACE, "c->availability_end_time = [%"PRId64"]\n", c->availability_end_time);
             } else if (!av_strcasecmp(attr->name, (const char *)"publishTime")) {
                 c->publish_time = get_utc_date_time_insec(s, (const char *)val);
+                av_log(s, AV_LOG_TRACE, "c->publish_time = [%"PRId64"]\n", c->publish_time);
             } else if (!av_strcasecmp(attr->name, (const char *)"minimumUpdatePeriod")) {
                 c->minimum_update_period = get_duration_insec(s, (const char *)val);
+                av_log(s, AV_LOG_TRACE, "c->minimum_update_period = [%"PRId64"]\n", c->minimum_update_period);
             } else if (!av_strcasecmp(attr->name, (const char *)"timeShiftBufferDepth")) {
                 c->time_shift_buffer_depth = get_duration_insec(s, (const char *)val);
+                av_log(s, AV_LOG_TRACE, "c->time_shift_buffer_depth = [%"PRId64"]\n", c->time_shift_buffer_depth);
             } else if (!av_strcasecmp(attr->name, (const char *)"minBufferTime")) {
                 c->min_buffer_time = get_duration_insec(s, (const char *)val);
+                av_log(s, AV_LOG_TRACE, "c->min_buffer_time = [%"PRId64"]\n", c->min_buffer_time);
             } else if (!av_strcasecmp(attr->name, (const char *)"suggestedPresentationDelay")) {
                 c->suggested_presentation_delay = get_duration_insec(s, (const char *)val);
+                av_log(s, AV_LOG_TRACE, "c->suggested_presentation_delay = [%"PRId64"]\n", c->suggested_presentation_delay);
             } else if (!av_strcasecmp(attr->name, (const char *)"mediaPresentationDuration")) {
                 c->media_presentation_duration = get_duration_insec(s, (const char *)val);
+                av_log(s, AV_LOG_TRACE, "c->media_presentation_duration = [%"PRId64"]\n", c->media_presentation_duration);
             }
             attr = attr->next;
             xmlFree(val);
         }
 
-        mpd_baseurl_node = find_child_node_by_name(node, "BaseURL");
+        tmp_node = find_child_node_by_name(node, "BaseURL");
+        if (tmp_node) {
+            mpd_baseurl_node = xmlCopyNode(tmp_node,1);
+        } else {
+            mpd_baseurl_node = xmlNewNode(NULL, "BaseURL");
+        }
 
         // at now we can handle only one period, with the longest duration
         node = xmlFirstElementChild(node);
         while (node) {
             if (!av_strcasecmp(node->name, (const char *)"Period")) {
-                perdiod_duration_sec = 0;
-                perdiod_start_sec = 0;
+                period_duration_sec = 0;
+                period_start_sec = 0;
                 attr = node->properties;
                 while (attr) {
                     val = xmlGetProp(node, attr->name);
                     if (!av_strcasecmp(attr->name, (const char *)"duration")) {
-                        perdiod_duration_sec = get_duration_insec(s, (const char *)val);
+                        period_duration_sec = get_duration_insec(s, (const char *)val);
                     } else if (!av_strcasecmp(attr->name, (const char *)"start")) {
-                        perdiod_start_sec = get_duration_insec(s, (const char *)val);
+                        period_start_sec = get_duration_insec(s, (const char *)val);
                     }
                     attr = attr->next;
                     xmlFree(val);
                 }
-                if ((perdiod_duration_sec) >= (c->period_duration)) {
+                if ((period_duration_sec) >= (c->period_duration)) {
                     period_node = node;
-                    c->period_duration = perdiod_duration_sec;
-                    c->period_start = perdiod_start_sec;
+                    c->period_duration = period_duration_sec;
+                    c->period_start = period_start_sec;
                     if (c->period_start > 0)
                         c->media_presentation_duration = c->period_duration;
                 }
@@ -1005,23 +1296,20 @@
         while (adaptionset_node) {
             if (!av_strcasecmp(adaptionset_node->name, (const char *)"BaseURL")) {
                 period_baseurl_node = adaptionset_node;
+            } else if (!av_strcasecmp(adaptionset_node->name, (const char *)"SegmentTemplate")) {
+                period_segmenttemplate_node = adaptionset_node;
+            } else if (!av_strcasecmp(adaptionset_node->name, (const char *)"SegmentList")) {
+                period_segmentlist_node = adaptionset_node;
             } else if (!av_strcasecmp(adaptionset_node->name, (const char *)"AdaptationSet")) {
-                parse_manifest_adaptationset(s, url, adaptionset_node, mpd_baseurl_node, period_baseurl_node);
+                parse_manifest_adaptationset(s, url, adaptionset_node, mpd_baseurl_node, period_baseurl_node, period_segmenttemplate_node, period_segmentlist_node);
             }
             adaptionset_node = xmlNextElementSibling(adaptionset_node);
         }
-        if (c->cur_video) {
-            c->cur_video->rep_count = video_rep_idx;
-            av_log(s, AV_LOG_VERBOSE, "rep_idx[%d]\n", (int)c->cur_video->rep_idx);
-            av_log(s, AV_LOG_VERBOSE, "rep_count[%d]\n", (int)video_rep_idx);
-        }
-        if (c->cur_audio) {
-            c->cur_audio->rep_count = audio_rep_idx;
-        }
 cleanup:
         /*free the document */
         xmlFreeDoc(doc);
         xmlCleanupParser();
+        xmlFreeNode(mpd_baseurl_node);
     }
 
     av_free(new_url);
@@ -1040,22 +1328,26 @@
 
     if (c->is_live) {
         if (pls->n_fragments) {
+            av_log(s, AV_LOG_TRACE, "in n_fragments mode\n");
             num = pls->first_seq_no;
         } else if (pls->n_timelines) {
-            start_time_offset = get_segment_start_time_based_on_timeline(pls, 0xFFFFFFFF) - pls->timelines[pls->first_seq_no]->starttime; // total duration of playlist
-            if (start_time_offset < 60 * pls->fragment_timescale)
-                start_time_offset = 0;
-            else
-                start_time_offset = start_time_offset - 60 * pls->fragment_timescale;
-
-            num = calc_next_seg_no_from_timelines(pls, pls->timelines[pls->first_seq_no]->starttime + start_time_offset);
+            av_log(s, AV_LOG_TRACE, "in n_timelines mode\n");
+            start_time_offset = get_segment_start_time_based_on_timeline(pls, 0xFFFFFFFF) - 60 * pls->fragment_timescale; // 60 seconds before end
+            num = calc_next_seg_no_from_timelines(pls, start_time_offset);
             if (num == -1)
                 num = pls->first_seq_no;
+            else
+                num += pls->first_seq_no;
         } else if (pls->fragment_duration){
+            av_log(s, AV_LOG_TRACE, "in fragment_duration mode fragment_timescale = %"PRId64", presentation_timeoffset = %"PRId64"\n", pls->fragment_timescale, pls->presentation_timeoffset);
             if (pls->presentation_timeoffset) {
-                num = pls->presentation_timeoffset * pls->fragment_timescale / pls->fragment_duration;
+                num = pls->first_seq_no + (((get_current_time_in_sec() - c->availability_start_time) * pls->fragment_timescale)-pls->presentation_timeoffset) / pls->fragment_duration - c->min_buffer_time;
             } else if (c->publish_time > 0 && !c->availability_start_time) {
-                num = pls->first_seq_no + (((c->publish_time - c->availability_start_time) - c->suggested_presentation_delay) * pls->fragment_timescale) / pls->fragment_duration;
+                if (c->min_buffer_time) {
+                    num = pls->first_seq_no + (((c->publish_time + pls->fragment_duration) - c->suggested_presentation_delay) * pls->fragment_timescale) / pls->fragment_duration - c->min_buffer_time;
+                } else {
+                    num = pls->first_seq_no + (((c->publish_time - c->time_shift_buffer_depth + pls->fragment_duration) - c->suggested_presentation_delay) * pls->fragment_timescale) / pls->fragment_duration;
+                }
             } else {
                 num = pls->first_seq_no + (((get_current_time_in_sec() - c->availability_start_time) - c->suggested_presentation_delay) * pls->fragment_timescale) / pls->fragment_duration;
             }
@@ -1072,6 +1364,7 @@
     int64_t num = 0;
 
     if (c->is_live && pls->fragment_duration) {
+        av_log(s, AV_LOG_TRACE, "in live mode\n");
         num = pls->first_seq_no + (((get_current_time_in_sec() - c->availability_start_time) - c->time_shift_buffer_depth) * pls->fragment_timescale) / pls->fragment_duration;
     } else {
         num = pls->first_seq_no;
@@ -1079,9 +1372,8 @@
     return num;
 }
 
-static int64_t calc_max_seg_no(struct representation *pls)
+static int64_t calc_max_seg_no(struct representation *pls, DASHContext *c)
 {
-    DASHContext *c = pls->parent->priv_data;
     int64_t num = 0;
 
     if (pls->n_fragments) {
@@ -1090,7 +1382,12 @@
         int i = 0;
         num = pls->first_seq_no + pls->n_timelines - 1;
         for (i = 0; i < pls->n_timelines; i++) {
-            num += pls->timelines[i]->repeat;
+            if (pls->timelines[i]->repeat == -1) {
+                int length_of_each_segment = pls->timelines[i]->duration / pls->fragment_timescale;
+                num =  c->period_duration / length_of_each_segment;
+            } else {
+                num += pls->timelines[i]->repeat;
+            }
         }
     } else if (c->is_live && pls->fragment_duration) {
         num = pls->first_seq_no + (((get_current_time_in_sec() - c->availability_start_time)) * pls->fragment_timescale)  / pls->fragment_duration;
@@ -1101,21 +1398,21 @@
     return num;
 }
 
-static void move_timelines(struct representation *rep_src, struct representation *rep_dest)
+static void move_timelines(struct representation *rep_src, struct representation *rep_dest, DASHContext *c)
 {
     if (rep_dest && rep_src ) {
         free_timelines_list(rep_dest);
         rep_dest->timelines    = rep_src->timelines;
         rep_dest->n_timelines  = rep_src->n_timelines;
         rep_dest->first_seq_no = rep_src->first_seq_no;
-        rep_dest->last_seq_no = calc_max_seg_no(rep_dest);
+        rep_dest->last_seq_no = calc_max_seg_no(rep_dest, c);
         rep_src->timelines = NULL;
         rep_src->n_timelines = 0;
         rep_dest->cur_seq_no = rep_src->cur_seq_no;
     }
 }
 
-static void move_segments(struct representation *rep_src, struct representation *rep_dest)
+static void move_segments(struct representation *rep_src, struct representation *rep_dest, DASHContext *c)
 {
     if (rep_dest && rep_src ) {
         free_fragment_list(rep_dest);
@@ -1126,7 +1423,7 @@
         rep_dest->fragments    = rep_src->fragments;
         rep_dest->n_fragments  = rep_src->n_fragments;
         rep_dest->parent  = rep_src->parent;
-        rep_dest->last_seq_no = calc_max_seg_no(rep_dest);
+        rep_dest->last_seq_no = calc_max_seg_no(rep_dest, c);
         rep_src->fragments = NULL;
         rep_src->n_fragments = 0;
     }
@@ -1136,48 +1433,69 @@
 static int refresh_manifest(AVFormatContext *s)
 {
 
-    int ret = 0;
+    int ret = 0, i;
     DASHContext *c = s->priv_data;
 
     // save current context
-    struct representation *cur_video =  c->cur_video;
-    struct representation *cur_audio =  c->cur_audio;
+    int n_videos = c->n_videos;
+    struct representation **videos = c->videos;
+    int n_audios = c->n_audios;
+    struct representation **audios = c->audios;
     char *base_url = c->base_url;
 
     c->base_url = NULL;
-    c->cur_video = NULL;
-    c->cur_audio = NULL;
-    ret = parse_manifest(s, s->filename, NULL);
+    c->n_videos = 0;
+    c->videos = NULL;
+    c->n_audios = 0;
+    c->audios = NULL;
+    ret = parse_manifest(s, s->url, NULL);
     if (ret)
         goto finish;
 
-    if (cur_video && cur_video->timelines || cur_audio && cur_audio->timelines) {
-        // calc current time
-        int64_t currentVideoTime = 0;
-        int64_t currentAudioTime = 0;
-        if (cur_video && cur_video->timelines)
-            currentVideoTime = get_segment_start_time_based_on_timeline(cur_video, cur_video->cur_seq_no) / cur_video->fragment_timescale;
-        if (cur_audio && cur_audio->timelines)
-            currentAudioTime = get_segment_start_time_based_on_timeline(cur_audio, cur_audio->cur_seq_no) / cur_audio->fragment_timescale;
-        // update segments
-        if (cur_video && cur_video->timelines) {
-            c->cur_video->cur_seq_no = calc_next_seg_no_from_timelines(c->cur_video, currentVideoTime * cur_video->fragment_timescale - 1);
-            if (c->cur_video->cur_seq_no >= 0) {
-                move_timelines(c->cur_video, cur_video);
+    if (c->n_videos != n_videos) {
+        av_log(c, AV_LOG_ERROR,
+               "new manifest has mismatched no. of video representations, %d -> %d\n",
+               n_videos, c->n_videos);
+        return AVERROR_INVALIDDATA;
+    }
+    if (c->n_audios != n_audios) {
+        av_log(c, AV_LOG_ERROR,
+               "new manifest has mismatched no. of audio representations, %d -> %d\n",
+               n_audios, c->n_audios);
+        return AVERROR_INVALIDDATA;
+    }
+
+    for (i = 0; i < n_videos; i++) {
+        struct representation *cur_video = videos[i];
+        struct representation *ccur_video = c->videos[i];
+        if (cur_video->timelines) {
+            // calc current time
+            int64_t currentTime = get_segment_start_time_based_on_timeline(cur_video, cur_video->cur_seq_no) / cur_video->fragment_timescale;
+            // update segments
+            ccur_video->cur_seq_no = calc_next_seg_no_from_timelines(ccur_video, currentTime * cur_video->fragment_timescale - 1);
+            if (ccur_video->cur_seq_no >= 0) {
+                move_timelines(ccur_video, cur_video, c);
             }
         }
-        if (cur_audio && cur_audio->timelines) {
-            c->cur_audio->cur_seq_no = calc_next_seg_no_from_timelines(c->cur_audio, currentAudioTime * cur_audio->fragment_timescale - 1);
-            if (c->cur_audio->cur_seq_no >= 0) {
-               move_timelines(c->cur_audio, cur_audio);
-            }
+        if (cur_video->fragments) {
+            move_segments(ccur_video, cur_video, c);
         }
     }
-    if (cur_video && cur_video->fragments) {
-        move_segments(c->cur_video, cur_video);
-    }
-    if (cur_audio && cur_audio->fragments) {
-        move_segments(c->cur_audio, cur_audio);
+    for (i = 0; i < n_audios; i++) {
+        struct representation *cur_audio = audios[i];
+        struct representation *ccur_audio = c->audios[i];
+        if (cur_audio->timelines) {
+            // calc current time
+            int64_t currentTime = get_segment_start_time_based_on_timeline(cur_audio, cur_audio->cur_seq_no) / cur_audio->fragment_timescale;
+            // update segments
+            ccur_audio->cur_seq_no = calc_next_seg_no_from_timelines(ccur_audio, currentTime * cur_audio->fragment_timescale - 1);
+            if (ccur_audio->cur_seq_no >= 0) {
+                move_timelines(ccur_audio, cur_audio, c);
+            }
+        }
+        if (cur_audio->fragments) {
+            move_segments(ccur_audio, cur_audio, c);
+        }
     }
 
 finish:
@@ -1186,12 +1504,14 @@
         av_free(base_url);
     else
         c->base_url  = base_url;
-    if (c->cur_audio)
-        free_representation(c->cur_audio);
-    if (c->cur_video)
-        free_representation(c->cur_video);
-    c->cur_audio = cur_audio;
-    c->cur_video = cur_video;
+    if (c->audios)
+        free_audio_list(c);
+    if (c->videos)
+        free_video_list(c);
+    c->n_audios = n_audios;
+    c->audios = audios;
+    c->n_videos = n_videos;
+    c->videos = videos;
     return ret;
 }
 
@@ -1226,7 +1546,7 @@
     }
     if (c->is_live) {
         min_seq_no = calc_min_seg_no(pls->parent, pls);
-        max_seq_no = calc_max_seg_no(pls);
+        max_seq_no = calc_max_seg_no(pls, c);
 
         if (pls->timelines || pls->fragments) {
             refresh_manifest(pls->parent);
@@ -1248,33 +1568,30 @@
         }
     }
     if (seg) {
-        char tmpfilename[MAX_URL_SIZE];
-
-        ff_dash_fill_tmpl_params(tmpfilename, sizeof(tmpfilename), pls->url_template, 0, pls->cur_seq_no, 0, get_segment_start_time_based_on_timeline(pls, pls->cur_seq_no));
+        char *tmpfilename= av_mallocz(c->max_url_size);
+        if (!tmpfilename) {
+            return NULL;
+        }
+        ff_dash_fill_tmpl_params(tmpfilename, c->max_url_size, pls->url_template, 0, pls->cur_seq_no, 0, get_segment_start_time_based_on_timeline(pls, pls->cur_seq_no));
         seg->url = av_strireplace(pls->url_template, pls->url_template, tmpfilename);
         if (!seg->url) {
             av_log(pls->parent, AV_LOG_WARNING, "Unable to resolve template url '%s', try to use origin template\n", pls->url_template);
             seg->url = av_strdup(pls->url_template);
             if (!seg->url) {
                 av_log(pls->parent, AV_LOG_ERROR, "Cannot resolve template url '%s'\n", pls->url_template);
+                av_free(tmpfilename);
                 return NULL;
             }
         }
-
+        av_free(tmpfilename);
         seg->size = -1;
     }
 
     return seg;
 }
 
-enum ReadFromURLMode {
-    READ_NORMAL,
-    READ_COMPLETE,
-};
-
 static int read_from_url(struct representation *pls, struct fragment *seg,
-                         uint8_t *buf, int buf_size,
-                         enum ReadFromURLMode mode)
+                         uint8_t *buf, int buf_size)
 {
     int ret;
 
@@ -1282,14 +1599,7 @@
     if (seg->size >= 0)
         buf_size = FFMIN(buf_size, pls->cur_seg_size - pls->cur_seg_offset);
 
-    if (mode == READ_COMPLETE) {
-        ret = avio_read(pls->input, buf, buf_size);
-        if (ret < buf_size) {
-            av_log(pls->parent, AV_LOG_WARNING, "Could not read complete fragment.\n");
-        }
-    } else {
-        ret = avio_read(pls->input, buf, buf_size);
-    }
+    ret = avio_read(pls->input, buf, buf_size);
     if (ret > 0)
         pls->cur_seg_offset += ret;
 
@@ -1299,10 +1609,14 @@
 static int open_input(DASHContext *c, struct representation *pls, struct fragment *seg)
 {
     AVDictionary *opts = NULL;
-    char url[MAX_URL_SIZE];
-    int ret;
+    char *url = NULL;
+    int ret = 0;
 
-    set_httpheader_options(c, opts);
+    url = av_mallocz(c->max_url_size);
+    if (!url) {
+        goto cleanup;
+    }
+
     if (seg->size >= 0) {
         /* try to restrict the HTTP request to the part we want
          * (if this is in fact a HTTP request) */
@@ -1310,7 +1624,7 @@
         av_dict_set_int(&opts, "end_offset", seg->url_offset + seg->size, 0);
     }
 
-    ff_make_absolute_url(url, MAX_URL_SIZE, c->base_url, seg->url);
+    ff_make_absolute_url(url, c->max_url_size, c->base_url, seg->url);
     av_log(pls->parent, AV_LOG_VERBOSE, "DASH request for url '%s', offset %"PRId64", playlist %d\n",
            url, seg->url_offset, pls->rep_idx);
     ret = open_url(pls->parent, &pls->input, url, c->avio_opts, opts, NULL);
@@ -1318,19 +1632,8 @@
         goto cleanup;
     }
 
-    /* Seek to the requested position. If this was a HTTP request, the offset
-     * should already be where want it to, but this allows e.g. local testing
-     * without a HTTP server. */
-    if (!ret && seg->url_offset) {
-        int64_t seekret = avio_seek(pls->input, seg->url_offset, SEEK_SET);
-        if (seekret < 0) {
-            av_log(pls->parent, AV_LOG_ERROR, "Unable to seek to offset %"PRId64" of DASH fragment '%s'\n", seg->url_offset, seg->url);
-            ret = (int) seekret;
-            ff_format_io_close(pls->parent, &pls->input);
-        }
-    }
-
 cleanup:
+    av_free(url);
     av_dict_free(&opts);
     pls->cur_seg_offset = 0;
     pls->cur_seg_size = seg->size;
@@ -1372,7 +1675,7 @@
     av_fast_malloc(&pls->init_sec_buf, &pls->init_sec_buf_size, sec_size);
 
     ret = read_from_url(pls, pls->init_section, pls->init_sec_buf,
-                        pls->init_sec_buf_size, READ_COMPLETE);
+                        pls->init_sec_buf_size);
     ff_format_io_close(pls->parent, &pls->input);
 
     if (ret < 0)
@@ -1443,13 +1746,15 @@
         ret = AVERROR_EOF;
         goto end;
     }
-    ret = read_from_url(v, v->cur_seg, buf, buf_size, READ_NORMAL);
+    ret = read_from_url(v, v->cur_seg, buf, buf_size);
     if (ret > 0)
         goto end;
 
-    if (!v->is_restart_needed)
-        v->cur_seq_no++;
-    v->is_restart_needed = 1;
+    if (c->is_live || v->cur_seq_no < v->last_seq_no) {
+        if (!v->is_restart_needed)
+            v->cur_seq_no++;
+        v->is_restart_needed = 1;
+    }
 
 end:
     return ret;
@@ -1458,7 +1763,7 @@
 static int save_avio_options(AVFormatContext *s)
 {
     DASHContext *c = s->priv_data;
-    const char *opts[] = { "headers", "user_agent", "user-agent", "cookies", NULL }, **opt = opts;
+    const char *opts[] = { "headers", "user_agent", "cookies", NULL }, **opt = opts;
     uint8_t *buf = NULL;
     int ret = 0;
 
@@ -1466,8 +1771,12 @@
         if (av_opt_get(s->pb, *opt, AV_OPT_SEARCH_CHILDREN, &buf) >= 0) {
             if (buf[0] != '\0') {
                 ret = av_dict_set(&c->avio_opts, *opt, buf, AV_DICT_DONT_STRDUP_VAL);
-                if (ret < 0)
+                if (ret < 0) {
+                    av_freep(&buf);
                     return ret;
+                }
+            } else {
+                av_freep(&buf);
             }
         }
         opt++;
@@ -1482,26 +1791,37 @@
     av_log(s, AV_LOG_ERROR,
            "A DASH playlist item '%s' referred to an external file '%s'. "
            "Opening this file was forbidden for security reasons\n",
-           s->filename, url);
+           s->url, url);
     return AVERROR(EPERM);
 }
 
+static void close_demux_for_component(struct representation *pls)
+{
+    /* note: the internal buffer could have changed */
+    av_freep(&pls->pb.buffer);
+    memset(&pls->pb, 0x00, sizeof(AVIOContext));
+    pls->ctx->pb = NULL;
+    avformat_close_input(&pls->ctx);
+    pls->ctx = NULL;
+}
+
 static int reopen_demux_for_component(AVFormatContext *s, struct representation *pls)
 {
     DASHContext *c = s->priv_data;
     AVInputFormat *in_fmt = NULL;
     AVDictionary  *in_fmt_opts = NULL;
     uint8_t *avio_ctx_buffer  = NULL;
-    int ret = 0;
+    int ret = 0, i;
 
     if (pls->ctx) {
-        /* note: the internal buffer could have changed, and be != avio_ctx_buffer */
-        av_freep(&pls->pb.buffer);
-        memset(&pls->pb, 0x00, sizeof(AVIOContext));
-        pls->ctx->pb = NULL;
-        avformat_close_input(&pls->ctx);
-        pls->ctx = NULL;
+        close_demux_for_component(pls);
     }
+
+    if (ff_check_interrupt(&s->interrupt_callback)) {
+        ret = AVERROR_EXIT;
+        goto fail;
+    }
+
     if (!(pls->ctx = avformat_alloc_context())) {
         ret = AVERROR(ENOMEM);
         goto fail;
@@ -1544,6 +1864,13 @@
     if (ret < 0)
         goto fail;
     if (pls->n_fragments) {
+#if FF_API_R_FRAME_RATE
+        if (pls->framerate.den) {
+            for (i = 0; i < pls->ctx->nb_streams; i++)
+                pls->ctx->streams[i]->r_frame_rate = pls->framerate;
+        }
+#endif
+
         ret = avformat_find_stream_info(pls->ctx, NULL);
         if (ret < 0)
             goto fail;
@@ -1560,7 +1887,10 @@
 
     pls->parent = s;
     pls->cur_seq_no  = calc_cur_seg_no(s, pls);
-    pls->last_seq_no = calc_max_seg_no(pls);
+
+    if (!pls->last_seq_no) {
+        pls->last_seq_no = calc_max_seg_no(pls, s->priv_data);
+    }
 
     ret = reopen_demux_for_component(s, pls);
     if (ret < 0) {
@@ -1583,54 +1913,92 @@
     return ret;
 }
 
+static int is_common_init_section_exist(struct representation **pls, int n_pls)
+{
+    struct fragment *first_init_section = pls[0]->init_section;
+    char *url =NULL;
+    int64_t url_offset = -1;
+    int64_t size = -1;
+    int i = 0;
+
+    if (first_init_section == NULL || n_pls == 0)
+        return 0;
+
+    url = first_init_section->url;
+    url_offset = first_init_section->url_offset;
+    size = pls[0]->init_section->size;
+    for (i=0;i<n_pls;i++) {
+        if (av_strcasecmp(pls[i]->init_section->url,url) || pls[i]->init_section->url_offset != url_offset || pls[i]->init_section->size != size) {
+            return 0;
+        }
+    }
+    return 1;
+}
+
+static void copy_init_section(struct representation *rep_dest, struct representation *rep_src)
+{
+    rep_dest->init_sec_buf = av_mallocz(rep_src->init_sec_buf_size);
+    memcpy(rep_dest->init_sec_buf, rep_src->init_sec_buf, rep_src->init_sec_data_len);
+    rep_dest->init_sec_buf_size = rep_src->init_sec_buf_size;
+    rep_dest->init_sec_data_len = rep_src->init_sec_data_len;
+    rep_dest->cur_timestamp = rep_src->cur_timestamp;
+}
+
+
 static int dash_read_header(AVFormatContext *s)
 {
-    void *u = (s->flags & AVFMT_FLAG_CUSTOM_IO) ? NULL : s->pb;
     DASHContext *c = s->priv_data;
     int ret = 0;
     int stream_index = 0;
+    int i;
 
     c->interrupt_callback = &s->interrupt_callback;
-    // if the URL context is good, read important options we must broker later
-    if (u) {
-        update_options(&c->user_agent, "user-agent", u);
-        update_options(&c->cookies, "cookies", u);
-        update_options(&c->headers, "headers", u);
-    }
-
-    if ((ret = parse_manifest(s, s->filename, s->pb)) < 0)
-        goto fail;
 
     if ((ret = save_avio_options(s)) < 0)
         goto fail;
 
+    av_dict_set(&c->avio_opts, "seekable", "0", 0);
+
+    if ((ret = parse_manifest(s, s->url, s->pb)) < 0)
+        goto fail;
+
     /* If this isn't a live stream, fill the total duration of the
      * stream. */
     if (!c->is_live) {
         s->duration = (int64_t) c->media_presentation_duration * AV_TIME_BASE;
     }
 
-    /* Open the demuxer for curent video and current audio components if available */
-    if (!ret && c->cur_video) {
-        ret = open_demux_for_component(s, c->cur_video);
-        if (!ret) {
-            c->cur_video->stream_index = stream_index;
-            ++stream_index;
-        } else {
-            free_representation(c->cur_video);
-            c->cur_video = NULL;
+    if(c->n_videos)
+        c->is_init_section_common_video = is_common_init_section_exist(c->videos, c->n_videos);
+
+    /* Open the demuxer for video and audio components if available */
+    for (i = 0; i < c->n_videos; i++) {
+        struct representation *cur_video = c->videos[i];
+        if (i > 0 && c->is_init_section_common_video) {
+            copy_init_section(cur_video,c->videos[0]);
         }
+        ret = open_demux_for_component(s, cur_video);
+
+        if (ret)
+            goto fail;
+        cur_video->stream_index = stream_index;
+        ++stream_index;
     }
 
-    if (!ret && c->cur_audio) {
-        ret = open_demux_for_component(s, c->cur_audio);
-        if (!ret) {
-            c->cur_audio->stream_index = stream_index;
-            ++stream_index;
-        } else {
-            free_representation(c->cur_audio);
-            c->cur_audio = NULL;
+    if(c->n_audios)
+        c->is_init_section_common_audio = is_common_init_section_exist(c->audios, c->n_audios);
+
+    for (i = 0; i < c->n_audios; i++) {
+        struct representation *cur_audio = c->audios[i];
+        if (i > 0 && c->is_init_section_common_audio) {
+            copy_init_section(cur_audio,c->audios[0]);
         }
+        ret = open_demux_for_component(s, cur_audio);
+
+        if (ret)
+            goto fail;
+        cur_audio->stream_index = stream_index;
+        ++stream_index;
     }
 
     if (!stream_index) {
@@ -1646,11 +2014,25 @@
             goto fail;
         }
 
-        if (c->cur_video) {
-            av_program_add_stream_index(s, 0, c->cur_video->stream_index);
+        for (i = 0; i < c->n_videos; i++) {
+            struct representation *pls = c->videos[i];
+
+            av_program_add_stream_index(s, 0, pls->stream_index);
+            pls->assoc_stream = s->streams[pls->stream_index];
+            if (pls->bandwidth > 0)
+                av_dict_set_int(&pls->assoc_stream->metadata, "variant_bitrate", pls->bandwidth, 0);
+            if (pls->id[0])
+                av_dict_set(&pls->assoc_stream->metadata, "id", pls->id, 0);
         }
-        if (c->cur_audio) {
-            av_program_add_stream_index(s, 0, c->cur_audio->stream_index);
+        for (i = 0; i < c->n_audios; i++) {
+            struct representation *pls = c->audios[i];
+
+            av_program_add_stream_index(s, 0, pls->stream_index);
+            pls->assoc_stream = s->streams[pls->stream_index];
+            if (pls->bandwidth > 0)
+                av_dict_set_int(&pls->assoc_stream->metadata, "variant_bitrate", pls->bandwidth, 0);
+            if (pls->id[0])
+                av_dict_set(&pls->assoc_stream->metadata, "id", pls->id, 0);
         }
     }
 
@@ -1659,43 +2041,79 @@
     return ret;
 }
 
+static void recheck_discard_flags(AVFormatContext *s, struct representation **p, int n)
+{
+    int i, j;
+
+    for (i = 0; i < n; i++) {
+        struct representation *pls = p[i];
+
+        int needed = !pls->assoc_stream || pls->assoc_stream->discard < AVDISCARD_ALL;
+        if (needed && !pls->ctx) {
+            pls->cur_seg_offset = 0;
+            pls->init_sec_buf_read_offset = 0;
+            /* Catch up */
+            for (j = 0; j < n; j++) {
+                pls->cur_seq_no = FFMAX(pls->cur_seq_no, p[j]->cur_seq_no);
+            }
+            reopen_demux_for_component(s, pls);
+            av_log(s, AV_LOG_INFO, "Now receiving stream_index %d\n", pls->stream_index);
+        } else if (!needed && pls->ctx) {
+            close_demux_for_component(pls);
+            if (pls->input)
+                ff_format_io_close(pls->parent, &pls->input);
+            av_log(s, AV_LOG_INFO, "No longer receiving stream_index %d\n", pls->stream_index);
+        }
+    }
+}
+
 static int dash_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     DASHContext *c = s->priv_data;
-    int ret = 0;
+    int ret = 0, i;
+    int64_t mints = 0;
     struct representation *cur = NULL;
 
-    if (!c->cur_audio && !c->cur_video ) {
+    recheck_discard_flags(s, c->videos, c->n_videos);
+    recheck_discard_flags(s, c->audios, c->n_audios);
+
+    for (i = 0; i < c->n_videos; i++) {
+        struct representation *pls = c->videos[i];
+        if (!pls->ctx)
+            continue;
+        if (!cur || pls->cur_timestamp < mints) {
+            cur = pls;
+            mints = pls->cur_timestamp;
+        }
+    }
+    for (i = 0; i < c->n_audios; i++) {
+        struct representation *pls = c->audios[i];
+        if (!pls->ctx)
+            continue;
+        if (!cur || pls->cur_timestamp < mints) {
+            cur = pls;
+            mints = pls->cur_timestamp;
+        }
+    }
+
+    if (!cur) {
         return AVERROR_INVALIDDATA;
     }
-    if (c->cur_audio && !c->cur_video) {
-        cur = c->cur_audio;
-    } else if (!c->cur_audio && c->cur_video) {
-        cur = c->cur_video;
-    } else if (c->cur_video->cur_timestamp < c->cur_audio->cur_timestamp) {
-        cur = c->cur_video;
-    } else {
-        cur = c->cur_audio;
-    }
-
-    if (cur->ctx) {
-        while (!ff_check_interrupt(c->interrupt_callback) && !ret) {
-            ret = av_read_frame(cur->ctx, pkt);
-            if (ret >= 0) {
-                /* If we got a packet, return it */
-                cur->cur_timestamp = av_rescale(pkt->pts, (int64_t)cur->ctx->streams[0]->time_base.num * 90000, cur->ctx->streams[0]->time_base.den);
-                pkt->stream_index = cur->stream_index;
-                return 0;
-            }
-            if (cur->is_restart_needed) {
-                cur->cur_seg_offset = 0;
-                cur->init_sec_buf_read_offset = 0;
-                if (cur->input)
-                    ff_format_io_close(cur->parent, &cur->input);
-                ret = reopen_demux_for_component(s, cur);
-                cur->is_restart_needed = 0;
-            }
-
+    while (!ff_check_interrupt(c->interrupt_callback) && !ret) {
+        ret = av_read_frame(cur->ctx, pkt);
+        if (ret >= 0) {
+            /* If we got a packet, return it */
+            cur->cur_timestamp = av_rescale(pkt->pts, (int64_t)cur->ctx->streams[0]->time_base.num * 90000, cur->ctx->streams[0]->time_base.den);
+            pkt->stream_index = cur->stream_index;
+            return 0;
+        }
+        if (cur->is_restart_needed) {
+            cur->cur_seg_offset = 0;
+            cur->init_sec_buf_read_offset = 0;
+            if (cur->input)
+                ff_format_io_close(cur->parent, &cur->input);
+            ret = reopen_demux_for_component(s, cur);
+            cur->is_restart_needed = 0;
         }
     }
     return AVERROR_EOF;
@@ -1704,33 +2122,30 @@
 static int dash_close(AVFormatContext *s)
 {
     DASHContext *c = s->priv_data;
-    if (c->cur_audio) {
-        free_representation(c->cur_audio);
-    }
-    if (c->cur_video) {
-        free_representation(c->cur_video);
-    }
+    free_audio_list(c);
+    free_video_list(c);
 
-    av_freep(&c->cookies);
-    av_freep(&c->user_agent);
     av_dict_free(&c->avio_opts);
     av_freep(&c->base_url);
     return 0;
 }
 
-static int dash_seek(AVFormatContext *s, struct representation *pls, int64_t seek_pos_msec, int flags)
+static int dash_seek(AVFormatContext *s, struct representation *pls, int64_t seek_pos_msec, int flags, int dry_run)
 {
     int ret = 0;
     int i = 0;
     int j = 0;
     int64_t duration = 0;
 
-    av_log(pls->parent, AV_LOG_VERBOSE, "DASH seek pos[%"PRId64"ms], playlist %d\n", seek_pos_msec, pls->rep_idx);
+    av_log(pls->parent, AV_LOG_VERBOSE, "DASH seek pos[%"PRId64"ms], playlist %d%s\n",
+           seek_pos_msec, pls->rep_idx, dry_run ? " (dry)" : "");
 
     // single fragment mode
     if (pls->n_fragments == 1) {
         pls->cur_timestamp = 0;
         pls->cur_seg_offset = 0;
+        if (dry_run)
+            return 0;
         ff_read_frame_flush(pls->ctx);
         return av_seek_frame(pls->ctx, -1, seek_pos_msec * 1000, flags);
     }
@@ -1769,20 +2184,20 @@
     } else if (pls->fragment_duration > 0) {
         pls->cur_seq_no = pls->first_seq_no + ((seek_pos_msec * pls->fragment_timescale) / pls->fragment_duration) / 1000;
     } else {
-        av_log(pls->parent, AV_LOG_ERROR, "dash_seek missing fragment_duration\n");
+        av_log(pls->parent, AV_LOG_ERROR, "dash_seek missing timeline or fragment_duration\n");
         pls->cur_seq_no = pls->first_seq_no;
     }
     pls->cur_timestamp = 0;
     pls->cur_seg_offset = 0;
     pls->init_sec_buf_read_offset = 0;
-    ret = reopen_demux_for_component(s, pls);
+    ret = dry_run ? 0 : reopen_demux_for_component(s, pls);
 
     return ret;
 }
 
 static int dash_read_seek(AVFormatContext *s, int stream_index, int64_t timestamp, int flags)
 {
-    int ret = 0;
+    int ret = 0, i;
     DASHContext *c = s->priv_data;
     int64_t seek_pos_msec = av_rescale_rnd(timestamp, 1000,
                                            s->streams[stream_index]->time_base.den,
@@ -1790,12 +2205,17 @@
                                            AV_ROUND_DOWN : AV_ROUND_UP);
     if ((flags & AVSEEK_FLAG_BYTE) || c->is_live)
         return AVERROR(ENOSYS);
-    if (c->cur_audio) {
-        ret = dash_seek(s, c->cur_audio, seek_pos_msec, flags);
+
+    /* Seek in discarded streams with dry_run=1 to avoid reopening them */
+    for (i = 0; i < c->n_videos; i++) {
+        if (!ret)
+            ret = dash_seek(s, c->videos[i], seek_pos_msec, flags, !c->videos[i]->ctx);
     }
-    if (!ret && c->cur_video) {
-        ret = dash_seek(s, c->cur_video, seek_pos_msec, flags);
+    for (i = 0; i < c->n_audios; i++) {
+        if (!ret)
+            ret = dash_seek(s, c->audios[i], seek_pos_msec, flags, !c->audios[i]->ctx);
     }
+
     return ret;
 }
 

diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 240ff41..26a4dce 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c

@@ -1,6 +1,7 @@
 /*
  * MPEG-DASH ISO BMFF segmenter
  * Copyright (c) 2014 Martin Storsjo
+ * Copyright (c) 2018 Akamai Technologies, Inc.
  *
  * This file is part of FFmpeg.
  *
@@ -36,12 +37,23 @@
 #include "avc.h"
 #include "avformat.h"
 #include "avio_internal.h"
+#include "hlsplaylist.h"
+#if CONFIG_HTTP_PROTOCOL
+#include "http.h"
+#endif
 #include "internal.h"
 #include "isom.h"
 #include "os_support.h"
 #include "url.h"
+#include "vpcc.h"
 #include "dash.h"
 
+typedef enum {
+    SEGMENT_TYPE_MP4 = 0,
+    SEGMENT_TYPE_WEBM,
+    SEGMENT_TYPE_NB
+} SegmentType;
+
 typedef struct Segment {
     char file[1024];
     int64_t start_pos;
@@ -55,13 +67,14 @@
     char id[10];
     enum AVMediaType media_type;
     AVDictionary *metadata;
+    AVRational min_frame_rate, max_frame_rate;
+    int ambiguous_frame_rate;
 } AdaptationSet;
 
 typedef struct OutputStream {
     AVFormatContext *ctx;
     int ctx_inited, as_idx;
     AVIOContext *out;
-    char format_name[8];
     int packets_written;
     char initfile[1024];
     int64_t init_start_pos, pos;
@@ -69,11 +82,17 @@
     int nb_segments, segments_size, segment_index;
     Segment **segments;
     int64_t first_pts, start_pts, max_pts;
-    int64_t last_dts;
+    int64_t last_dts, last_pts;
     int bit_rate;
-    char bandwidth_str[64];
 
     char codec_str[100];
+    int written_len;
+    char filename[1024];
+    char full_path[1024];
+    char temp_path[1024];
+    double availability_time_offset;
+    int total_pkt_size;
+    int muxer_overhead;
 } OutputStream;
 
 typedef struct DASHContext {
@@ -83,7 +102,10 @@
     int nb_as;
     int window_size;
     int extra_window_size;
+#if FF_API_DASH_MIN_SEG_DURATION
     int min_seg_duration;
+#endif
+    int64_t seg_duration;
     int remove_at_exit;
     int use_template;
     int use_timeline;
@@ -97,9 +119,20 @@
     const char *single_file_name;
     const char *init_seg_name;
     const char *media_seg_name;
-    AVRational min_frame_rate, max_frame_rate;
-    int ambiguous_frame_rate;
     const char *utc_timing_url;
+    const char *method;
+    const char *user_agent;
+    int hls_playlist;
+    int http_persistent;
+    int master_playlist_created;
+    AVIOContext *mpd_out;
+    AVIOContext *m3u8_out;
+    int streaming;
+    int64_t timeout;
+    int index_correction;
+    char *format_options_str;
+    SegmentType segment_type;
+    const char *format_name;
 } DASHContext;
 
 static struct codec_string {
@@ -113,8 +146,83 @@
     { 0, NULL }
 };
 
+static struct format_string {
+    SegmentType segment_type;
+    const char *str;
+} formats[] = {
+    { SEGMENT_TYPE_MP4, "mp4" },
+    { SEGMENT_TYPE_WEBM, "webm" },
+    { 0, NULL }
+};
+
+static int dashenc_io_open(AVFormatContext *s, AVIOContext **pb, char *filename,
+                           AVDictionary **options) {
+    DASHContext *c = s->priv_data;
+    int http_base_proto = filename ? ff_is_http_proto(filename) : 0;
+    int err = AVERROR_MUXER_NOT_FOUND;
+    if (!*pb || !http_base_proto || !c->http_persistent) {
+        err = s->io_open(s, pb, filename, AVIO_FLAG_WRITE, options);
+#if CONFIG_HTTP_PROTOCOL
+    } else {
+        URLContext *http_url_context = ffio_geturlcontext(*pb);
+        av_assert0(http_url_context);
+        err = ff_http_do_new_request(http_url_context, filename);
+#endif
+    }
+    return err;
+}
+
+static void dashenc_io_close(AVFormatContext *s, AVIOContext **pb, char *filename) {
+    DASHContext *c = s->priv_data;
+    int http_base_proto = filename ? ff_is_http_proto(filename) : 0;
+
+    if (!http_base_proto || !c->http_persistent) {
+        ff_format_io_close(s, pb);
+#if CONFIG_HTTP_PROTOCOL
+    } else {
+        URLContext *http_url_context = ffio_geturlcontext(*pb);
+        av_assert0(http_url_context);
+        avio_flush(*pb);
+        ffurl_shutdown(http_url_context, AVIO_FLAG_WRITE);
+#endif
+    }
+}
+
+static const char *get_format_str(SegmentType segment_type) {
+    int i;
+    for (i = 0; i < SEGMENT_TYPE_NB; i++)
+        if (formats[i].segment_type == segment_type)
+            return formats[i].str;
+    return NULL;
+}
+
+static int check_file_extension(const char *filename, const char *extension) {
+    char *dot;
+    if (!filename || !extension)
+        return -1;
+    dot = strrchr(filename, '.');
+    if (dot && !strcmp(dot + 1, extension))
+        return 0;
+    return -1;
+}
+
+static void set_vp9_codec_str(AVFormatContext *s, AVCodecParameters *par,
+                              AVRational *frame_rate, char *str, int size) {
+    VPCC vpcc;
+    int ret = ff_isom_get_vpcc_features(s, par, frame_rate, &vpcc);
+    if (ret == 0) {
+        av_strlcatf(str, size, "vp09.%02d.%02d.%02d",
+                    vpcc.profile, vpcc.level, vpcc.bitdepth);
+    } else {
+        // Default to just vp9 in case of error while finding out profile or level
+        av_log(s, AV_LOG_WARNING, "Could not find VP9 profile and/or level\n");
+        av_strlcpy(str, "vp9", size);
+    }
+    return;
+}
+
 static void set_codec_str(AVFormatContext *s, AVCodecParameters *par,
-                          char *str, int size)
+                          AVRational *frame_rate, char *str, int size)
 {
     const AVCodecTag *tags[2] = { NULL, NULL };
     uint32_t tag;
@@ -123,7 +231,11 @@
     // common Webm codecs are not part of RFC 6381
     for (i = 0; codecs[i].id; i++)
         if (codecs[i].id == par->codec_id) {
-            av_strlcpy(str, codecs[i].str, size);
+            if (codecs[i].id == AV_CODEC_ID_VP9) {
+                set_vp9_codec_str(s, par, frame_rate, str, size);
+            } else {
+                av_strlcpy(str, codecs[i].str, size);
+            }
             return;
         }
 
@@ -203,13 +315,34 @@
     // write out to file
     *range_length = avio_close_dyn_buf(os->ctx->pb, &buffer);
     os->ctx->pb = NULL;
-    avio_write(os->out, buffer, *range_length);
+    avio_write(os->out, buffer + os->written_len, *range_length - os->written_len);
+    os->written_len = 0;
     av_free(buffer);
 
     // re-open buffer
     return avio_open_dyn_buf(&os->ctx->pb);
 }
 
+static void set_http_options(AVDictionary **options, DASHContext *c)
+{
+    if (c->method)
+        av_dict_set(options, "method", c->method, 0);
+    if (c->user_agent)
+        av_dict_set(options, "user_agent", c->user_agent, 0);
+    if (c->http_persistent)
+        av_dict_set_int(options, "multiple_requests", 1, 0);
+    if (c->timeout >= 0)
+        av_dict_set_int(options, "timeout", c->timeout, 0);
+}
+
+static void get_hls_playlist_name(char *playlist_name, int string_size,
+                                  const char *base_url, int id) {
+    if (base_url)
+        snprintf(playlist_name, string_size, "%smedia_%d.m3u8", base_url, id);
+    else
+        snprintf(playlist_name, string_size, "media_%d.m3u8", id);
+}
+
 static int flush_init_segment(AVFormatContext *s, OutputStream *os)
 {
     DASHContext *c = s->priv_data;
@@ -253,10 +386,15 @@
         av_free(os->segments);
     }
     av_freep(&c->streams);
+
+    ff_format_io_close(s, &c->mpd_out);
+    ff_format_io_close(s, &c->m3u8_out);
 }
 
-static void output_segment_list(OutputStream *os, AVIOContext *out, DASHContext *c)
+static void output_segment_list(OutputStream *os, AVIOContext *out, AVFormatContext *s,
+                                int representation_id, int final)
 {
+    DASHContext *c = s->priv_data;
     int i, start_index = 0, start_number = 1;
     if (c->window_size) {
         start_index  = FFMAX(os->nb_segments   - c->window_size, 0);
@@ -266,8 +404,12 @@
     if (c->use_template) {
         int timescale = c->use_timeline ? os->ctx->streams[0]->time_base.den : AV_TIME_BASE;
         avio_printf(out, "\t\t\t\t<SegmentTemplate timescale=\"%d\" ", timescale);
-        if (!c->use_timeline)
-            avio_printf(out, "duration=\"%"PRId64"\" ", c->last_duration);
+        if (!c->use_timeline) {
+            avio_printf(out, "duration=\"%"PRId64"\" ", c->seg_duration);
+            if (c->streaming && os->availability_time_offset)
+                avio_printf(out, "availabilityTimeOffset=\"%.3f\" ",
+                            os->availability_time_offset);
+        }
         avio_printf(out, "initialization=\"%s\" media=\"%s\" startNumber=\"%d\">\n", c->init_seg_name, c->media_seg_name, c->use_timeline ? start_number : 1);
         if (c->use_timeline) {
             int64_t cur_time = 0;
@@ -315,6 +457,61 @@
         }
         avio_printf(out, "\t\t\t\t</SegmentList>\n");
     }
+    if (c->hls_playlist && start_index < os->nb_segments)
+    {
+        int timescale = os->ctx->streams[0]->time_base.den;
+        char temp_filename_hls[1024];
+        char filename_hls[1024];
+        AVDictionary *http_opts = NULL;
+        int target_duration = 0;
+        int ret = 0;
+        const char *proto = avio_find_protocol_name(c->dirname);
+        int use_rename = proto && !strcmp(proto, "file");
+
+        get_hls_playlist_name(filename_hls, sizeof(filename_hls),
+                              c->dirname, representation_id);
+
+        snprintf(temp_filename_hls, sizeof(temp_filename_hls), use_rename ? "%s.tmp" : "%s", filename_hls);
+
+        set_http_options(&http_opts, c);
+        dashenc_io_open(s, &c->m3u8_out, temp_filename_hls, &http_opts);
+        av_dict_free(&http_opts);
+        for (i = start_index; i < os->nb_segments; i++) {
+            Segment *seg = os->segments[i];
+            double duration = (double) seg->duration / timescale;
+            if (target_duration <= duration)
+                target_duration = lrint(duration);
+        }
+
+        ff_hls_write_playlist_header(c->m3u8_out, 6, -1, target_duration,
+                                     start_number, PLAYLIST_TYPE_NONE);
+
+        ff_hls_write_init_file(c->m3u8_out, os->initfile, c->single_file,
+                               os->init_range_length, os->init_start_pos);
+
+        for (i = start_index; i < os->nb_segments; i++) {
+            Segment *seg = os->segments[i];
+            ret = ff_hls_write_file_entry(c->m3u8_out, 0, c->single_file,
+                                    (double) seg->duration / timescale, 0,
+                                    seg->range_length, seg->start_pos, NULL,
+                                    c->single_file ? os->initfile : seg->file,
+                                    NULL);
+            if (ret < 0) {
+                av_log(os->ctx, AV_LOG_WARNING, "ff_hls_write_file_entry get error\n");
+            }
+        }
+
+        if (final)
+            ff_hls_write_end_list(c->m3u8_out);
+
+        dashenc_io_close(s, &c->m3u8_out, temp_filename_hls);
+
+        if (use_rename)
+            if (avpriv_io_move(temp_filename_hls, filename_hls) < 0) {
+                av_log(os->ctx, AV_LOG_WARNING, "renaming file %s to %s failed\n\n", temp_filename_hls, filename_hls);
+            }
+    }
+
 }
 
 static char *xmlescape(const char *str) {
@@ -384,7 +581,8 @@
     }
 }
 
-static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_index)
+static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_index,
+                                int final)
 {
     DASHContext *c = s->priv_data;
     AdaptationSet *as = &c->as[as_index];
@@ -393,8 +591,8 @@
 
     avio_printf(out, "\t\t<AdaptationSet id=\"%s\" contentType=\"%s\" segmentAlignment=\"true\" bitstreamSwitching=\"true\"",
                 as->id, as->media_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
-    if (as->media_type == AVMEDIA_TYPE_VIDEO && c->max_frame_rate.num && !c->ambiguous_frame_rate)
-        avio_printf(out, " %s=\"%d/%d\"", (av_cmp_q(c->min_frame_rate, c->max_frame_rate) < 0) ? "maxFrameRate" : "frameRate", c->max_frame_rate.num, c->max_frame_rate.den);
+    if (as->media_type == AVMEDIA_TYPE_VIDEO && as->max_frame_rate.num && !as->ambiguous_frame_rate && av_cmp_q(as->min_frame_rate, as->max_frame_rate) < 0)
+        avio_printf(out, " maxFrameRate=\"%d/%d\"", as->max_frame_rate.num, as->max_frame_rate.den);
     lang = av_dict_get(as->metadata, "language", NULL, 0);
     if (lang)
         avio_printf(out, " lang=\"%s\"", lang->value);
@@ -406,24 +604,29 @@
 
     for (i = 0; i < s->nb_streams; i++) {
         OutputStream *os = &c->streams[i];
+        char bandwidth_str[64] = {'\0'};
 
         if (os->as_idx - 1 != as_index)
             continue;
 
+        if (os->bit_rate > 0)
+            snprintf(bandwidth_str, sizeof(bandwidth_str), " bandwidth=\"%d\"",
+                     os->bit_rate);
+
         if (as->media_type == AVMEDIA_TYPE_VIDEO) {
             AVStream *st = s->streams[i];
             avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/%s\" codecs=\"%s\"%s width=\"%d\" height=\"%d\"",
-                i, os->format_name, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->width, s->streams[i]->codecpar->height);
+                i, c->format_name, os->codec_str, bandwidth_str, s->streams[i]->codecpar->width, s->streams[i]->codecpar->height);
             if (st->avg_frame_rate.num)
                 avio_printf(out, " frameRate=\"%d/%d\"", st->avg_frame_rate.num, st->avg_frame_rate.den);
             avio_printf(out, ">\n");
         } else {
             avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/%s\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n",
-                i, os->format_name, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->sample_rate);
+                i, c->format_name, os->codec_str, bandwidth_str, s->streams[i]->codecpar->sample_rate);
             avio_printf(out, "\t\t\t\t<AudioChannelConfiguration schemeIdUri=\"urn:mpeg:dash:23003:3:audio_channel_configuration:2011\" value=\"%d\" />\n",
                 s->streams[i]->codecpar->channels);
         }
-        output_segment_list(os, out, c);
+        output_segment_list(os, out, s, i, final);
         avio_printf(out, "\t\t\t</Representation>\n");
     }
     avio_printf(out, "\t\t</AdaptationSet>\n");
@@ -571,20 +774,24 @@
     AVIOContext *out;
     char temp_filename[1024];
     int ret, i;
-    const char *proto = avio_find_protocol_name(s->filename);
+    const char *proto = avio_find_protocol_name(s->url);
     int use_rename = proto && !strcmp(proto, "file");
     static unsigned int warned_non_file = 0;
     AVDictionaryEntry *title = av_dict_get(s->metadata, "title", NULL, 0);
+    AVDictionary *opts = NULL;
 
     if (!use_rename && !warned_non_file++)
         av_log(s, AV_LOG_ERROR, "Cannot use rename on non file protocol, this may lead to races and temporary partial files\n");
 
-    snprintf(temp_filename, sizeof(temp_filename), use_rename ? "%s.tmp" : "%s", s->filename);
-    ret = s->io_open(s, &out, temp_filename, AVIO_FLAG_WRITE, NULL);
+    snprintf(temp_filename, sizeof(temp_filename), use_rename ? "%s.tmp" : "%s", s->url);
+    set_http_options(&opts, c);
+    ret = dashenc_io_open(s, &c->mpd_out, temp_filename, &opts);
     if (ret < 0) {
         av_log(s, AV_LOG_ERROR, "Unable to open %s for writing\n", temp_filename);
         return ret;
     }
+    out = c->mpd_out;
+    av_dict_free(&opts);
     avio_printf(out, "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
     avio_printf(out, "<MPD xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
                 "\txmlns=\"urn:mpeg:dash:schema:mpd:2011\"\n"
@@ -603,9 +810,6 @@
             update_period = 500;
         avio_printf(out, "\tminimumUpdatePeriod=\"PT%"PRId64"S\"\n", update_period);
         avio_printf(out, "\tsuggestedPresentationDelay=\"PT%"PRId64"S\"\n", c->last_duration / AV_TIME_BASE);
-        if (!c->availability_start_time[0] && s->nb_streams > 0 && c->streams[0].nb_segments > 0) {
-            format_date_now(c->availability_start_time, sizeof(c->availability_start_time));
-        }
         if (c->availability_start_time[0])
             avio_printf(out, "\tavailabilityStartTime=\"%s\"\n", c->availability_start_time);
         format_date_now(now_str, sizeof(now_str));
@@ -627,8 +831,6 @@
         av_free(escaped);
     }
     avio_printf(out, "\t</ProgramInformation>\n");
-    if (c->utc_timing_url)
-        avio_printf(out, "\t<UTCTiming schemeIdUri=\"urn:mpeg:dash:utc:http-xsdate:2014\" value=\"%s\"/>\n", c->utc_timing_url);
 
     if (c->window_size && s->nb_streams > 0 && c->streams[0].nb_segments > 0 && !c->use_template) {
         OutputStream *os = &c->streams[0];
@@ -642,16 +844,92 @@
     }
 
     for (i = 0; i < c->nb_as; i++) {
-        if ((ret = write_adaptation_set(s, out, i)) < 0)
+        if ((ret = write_adaptation_set(s, out, i, final)) < 0)
             return ret;
     }
     avio_printf(out, "\t</Period>\n");
+
+    if (c->utc_timing_url)
+        avio_printf(out, "\t<UTCTiming schemeIdUri=\"urn:mpeg:dash:utc:http-xsdate:2014\" value=\"%s\"/>\n", c->utc_timing_url);
+
     avio_printf(out, "</MPD>\n");
     avio_flush(out);
-    ff_format_io_close(s, &out);
+    dashenc_io_close(s, &c->mpd_out, temp_filename);
 
-    if (use_rename)
-        return avpriv_io_move(temp_filename, s->filename);
+    if (use_rename) {
+        if ((ret = avpriv_io_move(temp_filename, s->url)) < 0)
+            return ret;
+    }
+
+    if (c->hls_playlist && !c->master_playlist_created) {
+        char filename_hls[1024];
+        const char *audio_group = "A1";
+        char audio_codec_str[128] = "\0";
+        int is_default = 1;
+        int max_audio_bitrate = 0;
+
+        if (*c->dirname)
+            snprintf(filename_hls, sizeof(filename_hls), "%smaster.m3u8", c->dirname);
+        else
+            snprintf(filename_hls, sizeof(filename_hls), "master.m3u8");
+
+        snprintf(temp_filename, sizeof(temp_filename), use_rename ? "%s.tmp" : "%s", filename_hls);
+
+        set_http_options(&opts, c);
+        ret = avio_open2(&out, temp_filename, AVIO_FLAG_WRITE, NULL, &opts);
+        if (ret < 0) {
+            av_log(s, AV_LOG_ERROR, "Unable to open %s for writing\n", temp_filename);
+            return ret;
+        }
+        av_dict_free(&opts);
+
+        ff_hls_write_playlist_version(out, 7);
+
+        for (i = 0; i < s->nb_streams; i++) {
+            char playlist_file[64];
+            AVStream *st = s->streams[i];
+            OutputStream *os = &c->streams[i];
+            if (st->codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
+                continue;
+            get_hls_playlist_name(playlist_file, sizeof(playlist_file), NULL, i);
+            ff_hls_write_audio_rendition(out, (char *)audio_group,
+                                         playlist_file, i, is_default);
+            max_audio_bitrate = FFMAX(st->codecpar->bit_rate +
+                                      os->muxer_overhead, max_audio_bitrate);
+            if (!av_strnstr(audio_codec_str, os->codec_str, sizeof(audio_codec_str))) {
+                if (strlen(audio_codec_str))
+                    av_strlcat(audio_codec_str, ",", sizeof(audio_codec_str));
+                av_strlcat(audio_codec_str, os->codec_str, sizeof(audio_codec_str));
+            }
+            is_default = 0;
+        }
+
+        for (i = 0; i < s->nb_streams; i++) {
+            char playlist_file[64];
+            char codec_str[128];
+            AVStream *st = s->streams[i];
+            OutputStream *os = &c->streams[i];
+            char *agroup = NULL;
+            int stream_bitrate = st->codecpar->bit_rate + os->muxer_overhead;
+            if (st->codecpar->codec_type != AVMEDIA_TYPE_VIDEO)
+                continue;
+            av_strlcpy(codec_str, os->codec_str, sizeof(codec_str));
+            if (max_audio_bitrate) {
+                agroup = (char *)audio_group;
+                stream_bitrate += max_audio_bitrate;
+                av_strlcat(codec_str, ",", sizeof(codec_str));
+                av_strlcat(codec_str, audio_codec_str, sizeof(codec_str));
+            }
+            get_hls_playlist_name(playlist_file, sizeof(playlist_file), NULL, i);
+            ff_hls_write_stream_info(st, out, stream_bitrate, playlist_file, agroup,
+                                     codec_str, NULL);
+        }
+        avio_close(out);
+        if (use_rename)
+            if ((ret = avpriv_io_move(temp_filename, filename_hls)) < 0)
+                return ret;
+        c->master_playlist_created = 1;
+    }
 
     return 0;
 }
@@ -664,14 +942,6 @@
     return 0;
 }
 
-static int dict_set_int(AVDictionary **pm, const char *key, int64_t value, int flags)
-{
-    char valuestr[22];
-    snprintf(valuestr, sizeof(valuestr), "%"PRId64, value);
-    flags &= ~AV_DICT_DONT_STRDUP_VAL;
-    return av_dict_set(pm, key, valuestr, flags);
-}
-
 static int dash_init(AVFormatContext *s)
 {
     DASHContext *c = s->priv_data;
@@ -683,16 +953,22 @@
         c->single_file = 1;
     if (c->single_file)
         c->use_template = 0;
-    c->ambiguous_frame_rate = 0;
 
-    av_strlcpy(c->dirname, s->filename, sizeof(c->dirname));
+#if FF_API_DASH_MIN_SEG_DURATION
+    if (c->min_seg_duration != 5000000) {
+        av_log(s, AV_LOG_WARNING, "The min_seg_duration option is deprecated and will be removed. Please use the -seg_duration\n");
+        c->seg_duration = c->min_seg_duration;
+    }
+#endif
+
+    av_strlcpy(c->dirname, s->url, sizeof(c->dirname));
     ptr = strrchr(c->dirname, '/');
     if (ptr) {
         av_strlcpy(basename, &ptr[1], sizeof(basename));
         ptr[1] = '\0';
     } else {
         c->dirname[0] = '\0';
-        av_strlcpy(basename, s->filename, sizeof(basename));
+        av_strlcpy(basename, s->url, sizeof(basename));
     }
 
     ptr = strrchr(basename, '.');
@@ -715,10 +991,7 @@
         char filename[1024];
 
         os->bit_rate = s->streams[i]->codecpar->bit_rate;
-        if (os->bit_rate) {
-            snprintf(os->bandwidth_str, sizeof(os->bandwidth_str),
-                     " bandwidth=\"%d\"", os->bit_rate);
-        } else {
+        if (!os->bit_rate) {
             int level = s->strict_std_compliance >= FF_COMPLIANCE_STRICT ?
                         AV_LOG_ERROR : AV_LOG_WARNING;
             av_log(s, level, "No bit rate set for stream %d\n", i);
@@ -734,31 +1007,36 @@
         if (!ctx)
             return AVERROR(ENOMEM);
 
-        // choose muxer based on codec: webm for VP8/9 and opus, mp4 otherwise
-        // note: os->format_name is also used as part of the mimetype of the
-        //       representation, e.g. video/<format_name>
-        if (s->streams[i]->codecpar->codec_id == AV_CODEC_ID_VP8 ||
-            s->streams[i]->codecpar->codec_id == AV_CODEC_ID_VP9 ||
-            s->streams[i]->codecpar->codec_id == AV_CODEC_ID_OPUS ||
-            s->streams[i]->codecpar->codec_id == AV_CODEC_ID_VORBIS) {
-            snprintf(os->format_name, sizeof(os->format_name), "webm");
-        } else {
-            snprintf(os->format_name, sizeof(os->format_name), "mp4");
+        c->format_name = get_format_str(c->segment_type);
+        if (!c->format_name)
+            return AVERROR_MUXER_NOT_FOUND;
+        if (c->segment_type == SEGMENT_TYPE_WEBM) {
+            if ((!c->single_file && check_file_extension(c->init_seg_name, c->format_name) != 0) ||
+                (!c->single_file && check_file_extension(c->media_seg_name, c->format_name) != 0) ||
+                (c->single_file && check_file_extension(c->single_file_name, c->format_name) != 0)) {
+                av_log(s, AV_LOG_WARNING,
+                       "One or many segment file names doesn't end with .webm. "
+                       "Override -init_seg_name and/or -media_seg_name and/or "
+                       "-single_file_name to end with the extension .webm\n");
+            }
         }
-        ctx->oformat = av_guess_format(os->format_name, NULL, NULL);
+
+        ctx->oformat = av_guess_format(c->format_name, NULL, NULL);
         if (!ctx->oformat)
             return AVERROR_MUXER_NOT_FOUND;
         os->ctx = ctx;
-        ctx->interrupt_callback = s->interrupt_callback;
-        ctx->opaque             = s->opaque;
-        ctx->io_close           = s->io_close;
-        ctx->io_open            = s->io_open;
+        ctx->interrupt_callback    = s->interrupt_callback;
+        ctx->opaque                = s->opaque;
+        ctx->io_close              = s->io_close;
+        ctx->io_open               = s->io_open;
+        ctx->strict_std_compliance = s->strict_std_compliance;
 
         if (!(st = avformat_new_stream(ctx, NULL)))
             return AVERROR(ENOMEM);
         avcodec_parameters_copy(st->codecpar, s->streams[i]->codecpar);
         st->sample_aspect_ratio = s->streams[i]->sample_aspect_ratio;
         st->time_base = s->streams[i]->time_base;
+        st->avg_frame_rate = s->streams[i]->avg_frame_rate;
         ctx->avoid_negative_ts = s->avoid_negative_ts;
         ctx->flags = s->flags;
 
@@ -774,18 +1052,32 @@
             ff_dash_fill_tmpl_params(os->initfile, sizeof(os->initfile), c->init_seg_name, i, 0, os->bit_rate, 0);
         }
         snprintf(filename, sizeof(filename), "%s%s", c->dirname, os->initfile);
-        ret = s->io_open(s, &os->out, filename, AVIO_FLAG_WRITE, NULL);
+        set_http_options(&opts, c);
+        ret = s->io_open(s, &os->out, filename, AVIO_FLAG_WRITE, &opts);
         if (ret < 0)
             return ret;
+        av_dict_free(&opts);
         os->init_start_pos = 0;
 
-        if (!strcmp(os->format_name, "mp4")) {
-            av_dict_set(&opts, "movflags", "frag_custom+dash+delay_moov", 0);
-        } else {
-            dict_set_int(&opts, "cluster_time_limit", c->min_seg_duration / 1000, 0);
-            dict_set_int(&opts, "cluster_size_limit", 5 * 1024 * 1024, 0); // set a large cluster size limit
+        if (c->format_options_str) {
+            ret = av_dict_parse_string(&opts, c->format_options_str, "=", ":", 0);
+            if (ret < 0)
+                return ret;
         }
-        if ((ret = avformat_write_header(ctx, &opts)) < 0)
+
+        if (c->segment_type == SEGMENT_TYPE_MP4) {
+            if (c->streaming)
+                av_dict_set(&opts, "movflags", "frag_every_frame+dash+delay_moov+global_sidx", 0);
+            else
+                av_dict_set(&opts, "movflags", "frag_custom+dash+delay_moov", 0);
+        } else {
+            av_dict_set_int(&opts, "cluster_time_limit", c->seg_duration / 1000, 0);
+            av_dict_set_int(&opts, "cluster_size_limit", 5 * 1024 * 1024, 0); // set a large cluster size limit
+            av_dict_set_int(&opts, "dash", 1, 0);
+            av_dict_set_int(&opts, "dash_track_number", i + 1, 0);
+            av_dict_set_int(&opts, "live", 1, 0);
+        }
+        if ((ret = avformat_init_output(ctx, &opts)) < 0)
             return ret;
         os->ctx_inited = 1;
         avio_flush(ctx->pb);
@@ -793,13 +1085,6 @@
 
         av_log(s, AV_LOG_VERBOSE, "Representation %d init segment will be written to: %s\n", i, filename);
 
-        // Flush init segment
-        // except for mp4, since delay_moov is set and the init segment
-        // is then flushed after the first packets
-        if (strcmp(os->format_name, "mp4")) {
-            flush_init_segment(s, os);
-        }
-
         s->streams[i]->time_base = st->time_base;
         // If the muxer wants to shift timestamps, request to have them shifted
         // already before being handed to this muxer, so we don't have mismatches
@@ -808,25 +1093,26 @@
         if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
             AVRational avg_frame_rate = s->streams[i]->avg_frame_rate;
             if (avg_frame_rate.num > 0) {
-                if (av_cmp_q(avg_frame_rate, c->min_frame_rate) < 0)
-                    c->min_frame_rate = avg_frame_rate;
-                if (av_cmp_q(c->max_frame_rate, avg_frame_rate) < 0)
-                    c->max_frame_rate = avg_frame_rate;
+                if (av_cmp_q(avg_frame_rate, as->min_frame_rate) < 0)
+                    as->min_frame_rate = avg_frame_rate;
+                if (av_cmp_q(as->max_frame_rate, avg_frame_rate) < 0)
+                    as->max_frame_rate = avg_frame_rate;
             } else {
-                c->ambiguous_frame_rate = 1;
+                as->ambiguous_frame_rate = 1;
             }
             c->has_video = 1;
         }
 
-        set_codec_str(s, st->codecpar, os->codec_str, sizeof(os->codec_str));
+        set_codec_str(s, st->codecpar, &st->avg_frame_rate, os->codec_str,
+                      sizeof(os->codec_str));
         os->first_pts = AV_NOPTS_VALUE;
         os->max_pts = AV_NOPTS_VALUE;
         os->last_dts = AV_NOPTS_VALUE;
         os->segment_index = 1;
     }
 
-    if (!c->has_video && c->min_seg_duration <= 0) {
-        av_log(s, AV_LOG_WARNING, "no video stream and no min seg duration set\n");
+    if (!c->has_video && c->seg_duration <= 0) {
+        av_log(s, AV_LOG_WARNING, "no video stream and no seg duration set\n");
         return AVERROR(EINVAL);
     }
     return 0;
@@ -838,21 +1124,23 @@
     int i, ret;
     for (i = 0; i < s->nb_streams; i++) {
         OutputStream *os = &c->streams[i];
-        if ((ret = avformat_write_header(os->ctx, NULL)) < 0) {
-            dash_free(s);
+        if ((ret = avformat_write_header(os->ctx, NULL)) < 0)
             return ret;
-        }
+
+        // Flush init segment
+        // Only for WebM segment, since for mp4 delay_moov is set and
+        // the init segment is thus flushed after the first packets.
+        if (c->segment_type == SEGMENT_TYPE_WEBM &&
+            (ret = flush_init_segment(s, os)) < 0)
+            return ret;
     }
-    ret = write_manifest(s, 0);
-    if (!ret)
-        av_log(s, AV_LOG_VERBOSE, "Manifest written to: %s\n", s->filename);
     return ret;
 }
 
 static int add_segment(OutputStream *os, const char *file,
                        int64_t time, int duration,
                        int64_t start_pos, int64_t range_length,
-                       int64_t index_length)
+                       int64_t index_length, int next_exp_index)
 {
     int err;
     Segment *seg;
@@ -880,6 +1168,12 @@
     seg->index_length = index_length;
     os->segments[os->nb_segments++] = seg;
     os->segment_index++;
+    //correcting the segment index if it has fallen behind the expected value
+    if (os->segment_index < next_exp_index) {
+        av_log(NULL, AV_LOG_WARNING, "Correcting the segment index after file %s: current=%d corrected=%d\n",
+               file, os->segment_index, next_exp_index);
+        os->segment_index = next_exp_index;
+    }
     return 0;
 }
 
@@ -917,7 +1211,8 @@
 }
 
 static int update_stream_extradata(AVFormatContext *s, OutputStream *os,
-                                   AVCodecParameters *par)
+                                   AVCodecParameters *par,
+                                   AVRational *frame_rate)
 {
     uint8_t *extradata;
 
@@ -934,26 +1229,60 @@
     os->ctx->streams[0]->codecpar->extradata = extradata;
     os->ctx->streams[0]->codecpar->extradata_size = par->extradata_size;
 
-    set_codec_str(s, par, os->codec_str, sizeof(os->codec_str));
+    set_codec_str(s, par, frame_rate, os->codec_str, sizeof(os->codec_str));
 
     return 0;
 }
 
+static void dashenc_delete_file(AVFormatContext *s, char *filename) {
+    DASHContext *c = s->priv_data;
+    int http_base_proto = ff_is_http_proto(filename);
+
+    if (http_base_proto) {
+        AVIOContext *out = NULL;
+        AVDictionary *http_opts = NULL;
+
+        set_http_options(&http_opts, c);
+        av_dict_set(&http_opts, "method", "DELETE", 0);
+
+        if (dashenc_io_open(s, &out, filename, &http_opts) < 0) {
+            av_log(s, AV_LOG_ERROR, "failed to delete %s\n", filename);
+        }
+
+        av_dict_free(&http_opts);
+        dashenc_io_close(s, &out, filename);
+    } else if (unlink(filename) < 0) {
+        av_log(s, AV_LOG_ERROR, "failed to delete %s: %s\n", filename, strerror(errno));
+    }
+}
+
 static int dash_flush(AVFormatContext *s, int final, int stream)
 {
     DASHContext *c = s->priv_data;
     int i, ret = 0;
 
-    const char *proto = avio_find_protocol_name(s->filename);
+    const char *proto = avio_find_protocol_name(s->url);
     int use_rename = proto && !strcmp(proto, "file");
 
-    int cur_flush_segment_index = 0;
-    if (stream >= 0)
+    int cur_flush_segment_index = 0, next_exp_index = -1;
+    if (stream >= 0) {
         cur_flush_segment_index = c->streams[stream].segment_index;
 
+        //finding the next segment's expected index, based on the current pts value
+        if (c->use_template && !c->use_timeline && c->index_correction &&
+            c->streams[stream].last_pts != AV_NOPTS_VALUE &&
+            c->streams[stream].first_pts != AV_NOPTS_VALUE) {
+            int64_t pts_diff = av_rescale_q(c->streams[stream].last_pts -
+                                            c->streams[stream].first_pts,
+                                            s->streams[stream]->time_base,
+                                            AV_TIME_BASE_Q);
+            next_exp_index = (pts_diff / c->seg_duration) + 1;
+        }
+    }
+
     for (i = 0; i < s->nb_streams; i++) {
         OutputStream *os = &c->streams[i];
-        char filename[1024] = "", full_path[1024], temp_path[1024];
+        AVStream *st = s->streams[i];
         int range_length, index_length = 0;
 
         if (!os->packets_written)
@@ -971,21 +1300,11 @@
                 continue;
         }
 
-        if (!os->init_range_length) {
-            flush_init_segment(s, os);
-        }
-
         if (!c->single_file) {
-            ff_dash_fill_tmpl_params(filename, sizeof(filename), c->media_seg_name, i, os->segment_index, os->bit_rate, os->start_pts);
-            snprintf(full_path, sizeof(full_path), "%s%s", c->dirname, filename);
-            snprintf(temp_path, sizeof(temp_path), use_rename ? "%s.tmp" : "%s", full_path);
-            ret = s->io_open(s, &os->out, temp_path, AVIO_FLAG_WRITE, NULL);
-            if (ret < 0)
-                break;
-            if (!strcmp(os->format_name, "mp4"))
+            if (c->segment_type == SEGMENT_TYPE_MP4 && !os->written_len)
                 write_styp(os->ctx->pb);
         } else {
-            snprintf(full_path, sizeof(full_path), "%s%s", c->dirname, os->initfile);
+            snprintf(os->full_path, sizeof(os->full_path), "%s%s", c->dirname, os->initfile);
         }
 
         ret = flush_dynbuf(os, &range_length);
@@ -994,28 +1313,34 @@
         os->packets_written = 0;
 
         if (c->single_file) {
-            find_index_range(s, full_path, os->pos, &index_length);
+            find_index_range(s, os->full_path, os->pos, &index_length);
         } else {
-            ff_format_io_close(s, &os->out);
+            dashenc_io_close(s, &os->out, os->temp_path);
 
             if (use_rename) {
-                ret = avpriv_io_move(temp_path, full_path);
+                ret = avpriv_io_move(os->temp_path, os->full_path);
                 if (ret < 0)
                     break;
             }
         }
 
+        if (!os->muxer_overhead)
+            os->muxer_overhead = ((int64_t) (range_length - os->total_pkt_size) *
+                                  8 * AV_TIME_BASE) /
+                                 av_rescale_q(os->max_pts - os->start_pts,
+                                              st->time_base, AV_TIME_BASE_Q);
+        os->total_pkt_size = 0;
+
         if (!os->bit_rate) {
             // calculate average bitrate of first segment
-            int64_t bitrate = (int64_t) range_length * 8 * AV_TIME_BASE / (os->max_pts - os->start_pts);
-            if (bitrate >= 0) {
+            int64_t bitrate = (int64_t) range_length * 8 * AV_TIME_BASE / av_rescale_q(os->max_pts - os->start_pts,
+                                                                                       st->time_base,
+                                                                                       AV_TIME_BASE_Q);
+            if (bitrate >= 0)
                 os->bit_rate = bitrate;
-                snprintf(os->bandwidth_str, sizeof(os->bandwidth_str),
-                     " bandwidth=\"%d\"", os->bit_rate);
-            }
         }
-        add_segment(os, filename, os->start_pts, os->max_pts - os->start_pts, os->pos, range_length, index_length);
-        av_log(s, AV_LOG_VERBOSE, "Representation %d media segment %d written to: %s\n", i, os->segment_index, full_path);
+        add_segment(os, os->filename, os->start_pts, os->max_pts - os->start_pts, os->pos, range_length, index_length, next_exp_index);
+        av_log(s, AV_LOG_VERBOSE, "Representation %d media segment %d written to: %s\n", i, os->segment_index, os->full_path);
 
         os->pos += range_length;
     }
@@ -1031,7 +1356,7 @@
                 for (j = 0; j < remove; j++) {
                     char filename[1024];
                     snprintf(filename, sizeof(filename), "%s%s", c->dirname, os->segments[j]->file);
-                    unlink(filename);
+                    dashenc_delete_file(s, filename);
                     av_free(os->segments[j]);
                 }
                 os->nb_segments -= remove;
@@ -1050,9 +1375,10 @@
     DASHContext *c = s->priv_data;
     AVStream *st = s->streams[pkt->stream_index];
     OutputStream *os = &c->streams[pkt->stream_index];
+    int64_t seg_end_duration, elapsed_duration;
     int ret;
 
-    ret = update_stream_extradata(s, os, st->codecpar);
+    ret = update_stream_extradata(s, os, st->codecpar, &st->avg_frame_rate);
     if (ret < 0)
         return ret;
 
@@ -1076,11 +1402,31 @@
 
     if (os->first_pts == AV_NOPTS_VALUE)
         os->first_pts = pkt->pts;
+    os->last_pts = pkt->pts;
+
+    if (!c->availability_start_time[0])
+        format_date_now(c->availability_start_time,
+                        sizeof(c->availability_start_time));
+
+    if (!os->availability_time_offset && pkt->duration) {
+        int64_t frame_duration = av_rescale_q(pkt->duration, st->time_base,
+                                              AV_TIME_BASE_Q);
+         os->availability_time_offset = ((double) c->seg_duration -
+                                         frame_duration) / AV_TIME_BASE;
+    }
+
+    if (c->use_template && !c->use_timeline) {
+        elapsed_duration = pkt->pts - os->first_pts;
+        seg_end_duration = (int64_t) os->segment_index * c->seg_duration;
+    } else {
+        elapsed_duration = pkt->pts - os->start_pts;
+        seg_end_duration = c->seg_duration;
+    }
 
     if ((!c->has_video || st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) &&
         pkt->flags & AV_PKT_FLAG_KEY && os->packets_written &&
-        av_compare_ts(pkt->pts - os->start_pts, st->time_base,
-                      c->min_seg_duration, AV_TIME_BASE_Q) >= 0) {
+        av_compare_ts(elapsed_duration, st->time_base,
+                      seg_end_duration, AV_TIME_BASE_Q) >= 0) {
         int64_t prev_duration = c->last_duration;
 
         c->last_duration = av_rescale_q(pkt->pts - os->start_pts,
@@ -1117,7 +1463,47 @@
     else
         os->max_pts = FFMAX(os->max_pts, pkt->pts + pkt->duration);
     os->packets_written++;
-    return ff_write_chained(os->ctx, 0, pkt, s, 0);
+    os->total_pkt_size += pkt->size;
+    if ((ret = ff_write_chained(os->ctx, 0, pkt, s, 0)) < 0)
+        return ret;
+
+    if (!os->init_range_length)
+        flush_init_segment(s, os);
+
+    //open the output context when the first frame of a segment is ready
+    if (!c->single_file && os->packets_written == 1) {
+        AVDictionary *opts = NULL;
+        const char *proto = avio_find_protocol_name(s->url);
+        int use_rename = proto && !strcmp(proto, "file");
+        os->filename[0] = os->full_path[0] = os->temp_path[0] = '\0';
+        ff_dash_fill_tmpl_params(os->filename, sizeof(os->filename),
+                                 c->media_seg_name, pkt->stream_index,
+                                 os->segment_index, os->bit_rate, os->start_pts);
+        snprintf(os->full_path, sizeof(os->full_path), "%s%s", c->dirname,
+                 os->filename);
+        snprintf(os->temp_path, sizeof(os->temp_path),
+                 use_rename ? "%s.tmp" : "%s", os->full_path);
+        set_http_options(&opts, c);
+        ret = dashenc_io_open(s, &os->out, os->temp_path, &opts);
+        if (ret < 0)
+            return ret;
+        av_dict_free(&opts);
+    }
+
+    //write out the data immediately in streaming mode
+    if (c->streaming && c->segment_type == SEGMENT_TYPE_MP4) {
+        int len = 0;
+        uint8_t *buf = NULL;
+        if (!os->written_len)
+            write_styp(os->ctx->pb);
+        avio_flush(os->ctx->pb);
+        len = avio_get_dyn_buf (os->ctx->pb, &buf);
+        avio_write(os->out, buf + os->written_len, len - os->written_len);
+        os->written_len = len;
+        avio_flush(os->out);
+    }
+
+    return ret;
 }
 
 static int dash_write_trailer(AVFormatContext *s)
@@ -1144,9 +1530,9 @@
         for (i = 0; i < s->nb_streams; i++) {
             OutputStream *os = &c->streams[i];
             snprintf(filename, sizeof(filename), "%s%s", c->dirname, os->initfile);
-            unlink(filename);
+            dashenc_delete_file(s, filename);
         }
-        unlink(s->filename);
+        dashenc_delete_file(s, s->url);
     }
 
     return 0;
@@ -1181,7 +1567,10 @@
     { "adaptation_sets", "Adaptation sets. Syntax: id=0,streams=0,1,2 id=1,streams=3,4 and so on", OFFSET(adaptation_sets), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
     { "window_size", "number of segments kept in the manifest", OFFSET(window_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, E },
     { "extra_window_size", "number of segments kept outside of the manifest before removing from disk", OFFSET(extra_window_size), AV_OPT_TYPE_INT, { .i64 = 5 }, 0, INT_MAX, E },
-    { "min_seg_duration", "minimum segment duration (in microseconds)", OFFSET(min_seg_duration), AV_OPT_TYPE_INT64, { .i64 = 5000000 }, 0, INT_MAX, E },
+#if FF_API_DASH_MIN_SEG_DURATION
+    { "min_seg_duration", "minimum segment duration (in microseconds) (will be deprecated)", OFFSET(min_seg_duration), AV_OPT_TYPE_INT, { .i64 = 5000000 }, 0, INT_MAX, E },
+#endif
+    { "seg_duration", "segment duration (in seconds, fractional value can be set)", OFFSET(seg_duration), AV_OPT_TYPE_DURATION, { .i64 = 5000000 }, 0, INT_MAX, E },
     { "remove_at_exit", "remove all segments when finished", OFFSET(remove_at_exit), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
     { "use_template", "Use SegmentTemplate instead of SegmentList", OFFSET(use_template), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, E },
     { "use_timeline", "Use SegmentTimeline in SegmentTemplate", OFFSET(use_timeline), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, E },
@@ -1190,6 +1579,17 @@
     { "init_seg_name", "DASH-templated name to used for the initialization segment", OFFSET(init_seg_name), AV_OPT_TYPE_STRING, {.str = "init-stream$RepresentationID$.m4s"}, 0, 0, E },
     { "media_seg_name", "DASH-templated name to used for the media segments", OFFSET(media_seg_name), AV_OPT_TYPE_STRING, {.str = "chunk-stream$RepresentationID$-$Number%05d$.m4s"}, 0, 0, E },
     { "utc_timing_url", "URL of the page that will return the UTC timestamp in ISO format", OFFSET(utc_timing_url), AV_OPT_TYPE_STRING, { 0 }, 0, 0, E },
+    { "method", "set the HTTP method", OFFSET(method), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, E },
+    { "http_user_agent", "override User-Agent field in HTTP header", OFFSET(user_agent), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, E},
+    { "http_persistent", "Use persistent HTTP connections", OFFSET(http_persistent), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, E },
+    { "hls_playlist", "Generate HLS playlist files(master.m3u8, media_%d.m3u8)", OFFSET(hls_playlist), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
+    { "streaming", "Enable/Disable streaming mode of output. Each frame will be moof fragment", OFFSET(streaming), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
+    { "timeout", "set timeout for socket I/O operations", OFFSET(timeout), AV_OPT_TYPE_DURATION, { .i64 = -1 }, -1, INT_MAX, .flags = E },
+    { "index_correction", "Enable/Disable segment index correction logic", OFFSET(index_correction), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
+    { "format_options","set list of options for the container format (mp4/webm) used for dash", OFFSET(format_options_str), AV_OPT_TYPE_STRING, {.str = NULL},  0, 0, E},
+    { "dash_segment_type", "set dash segment files type", OFFSET(segment_type), AV_OPT_TYPE_INT, {.i64 = SEGMENT_TYPE_MP4 }, 0, SEGMENT_TYPE_NB - 1, E, "segment_type"},
+    { "mp4", "make segment file in ISOBMFF format", 0, AV_OPT_TYPE_CONST, {.i64 = SEGMENT_TYPE_MP4 }, 0, UINT_MAX,   E, "segment_type"},
+    { "webm", "make segment file in WebM format", 0, AV_OPT_TYPE_CONST, {.i64 = SEGMENT_TYPE_WEBM }, 0, UINT_MAX,   E, "segment_type"},
     { NULL },
 };
 
@@ -1203,6 +1603,7 @@
 AVOutputFormat ff_dash_muxer = {
     .name           = "dash",
     .long_name      = NULL_IF_CONFIG_SMALL("DASH Muxer"),
+    .extensions     = "mpd",
     .priv_data_size = sizeof(DASHContext),
     .audio_codec    = AV_CODEC_ID_AAC,
     .video_codec    = AV_CODEC_ID_H264,

diff --git a/libavformat/davs2.c b/libavformat/davs2.c
new file mode 100644
index 0000000..df2667f
--- /dev/null
+++ b/libavformat/davs2.c

@@ -0,0 +1,71 @@
+/*
+ * AVS2 video stream probe.
+ *
+ * Copyright (C) 2018 Huiwen Ren, <hwrenx@126.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "rawdec.h"
+#include "libavcodec/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define ISSQH(x)  ((x) == 0xB0 )
+#define ISEND(x)  ((x) == 0xB1 )
+#define ISPIC(x)  ((x) == 0xB3 || (x) == 0xB6)
+#define ISUNIT(x) ( ISSQH(x) || ISEND(x) || (x) == 0xB2 || ISPIC(x) || (x) == 0xB5 || (x) == 0xB7 )
+#define ISAVS2(x) ((x) == 0x20 || (x) == 0x22 || (x) == 0x30 || (x) == 0x32 )
+
+static int avs2_probe(AVProbeData *p)
+{
+    uint32_t code= -1, hds=0, pic=0, seq=0;
+    uint8_t state=0;
+    const uint8_t *ptr = p->buf, *end = p->buf + p->buf_size, *sqb=0;
+    if (AV_RB32(p->buf) != 0x1B0){
+        return 0;
+    }
+
+    while (ptr < end) {
+        ptr = avpriv_find_start_code(ptr, end, &code);
+        state = code & 0xFF;
+        if ((code & 0xffffff00) == 0x100) {
+            if (ISUNIT(state)) {
+                if (sqb && !hds) {
+                    hds = ptr - sqb;
+                }
+                if (ISSQH(state)) {
+                    if (!ISAVS2(*ptr))
+                        return 0;
+                    sqb = ptr;
+                    seq++;
+                } else if (ISPIC(state)) {
+                    pic++;
+                } else if (ISEND(state)) {
+                    break;
+                }
+            }
+        }
+    }
+    if (seq && hds >= 21 && pic){
+        return AVPROBE_SCORE_EXTENSION + 2; // more than cavs
+    }
+
+    return 0;
+}
+
+FF_DEF_RAWVIDEO_DEMUXER(avs2, "raw AVS2-P2/IEEE1857.4", avs2_probe, "avs,avs2", AV_CODEC_ID_AVS2)

diff --git a/libavformat/dsfdec.c b/libavformat/dsfdec.c
index 41538fd..5e06fd6 100644
--- a/libavformat/dsfdec.c
+++ b/libavformat/dsfdec.c

@@ -26,6 +26,8 @@
 
 typedef struct {
     uint64_t data_end;
+    uint64_t audio_size;
+    uint64_t data_size;
 } DSFContext;
 
 static int dsf_probe(AVProbeData *p)
@@ -120,7 +122,7 @@
         return AVERROR_INVALIDDATA;
     }
 
-    avio_skip(pb, 8);
+    dsf->audio_size = avio_rl64(pb) / 8 * st->codecpar->channels;
     st->codecpar->block_align = avio_rl32(pb);
     if (st->codecpar->block_align > INT_MAX / st->codecpar->channels) {
         avpriv_request_sample(s, "block_align overflow");
@@ -135,7 +137,9 @@
     dsf->data_end = avio_tell(pb);
     if (avio_rl32(pb) != MKTAG('d', 'a', 't', 'a'))
         return AVERROR_INVALIDDATA;
-    dsf->data_end += avio_rl64(pb);
+    dsf->data_size = avio_rl64(pb) - 12;
+    dsf->data_end += dsf->data_size + 12;
+    s->internal->data_offset = avio_tell(pb);
 
     return 0;
 }
@@ -151,6 +155,34 @@
         return AVERROR_EOF;
 
     pkt->stream_index = 0;
+    if (dsf->data_size > dsf->audio_size) {
+        int last_packet = pos == (dsf->data_end - st->codecpar->block_align);
+
+        if (last_packet) {
+            int64_t data_pos = pos - s->internal->data_offset;
+            int64_t packet_size = dsf->audio_size - data_pos;
+            int64_t skip_size = dsf->data_size - data_pos - packet_size;
+            uint8_t *dst;
+            int ch, ret;
+
+            if (packet_size <= 0 || skip_size <= 0)
+                return AVERROR_INVALIDDATA;
+
+            if (av_new_packet(pkt, packet_size) < 0)
+                return AVERROR(ENOMEM);
+            dst = pkt->data;
+            for (ch = 0; ch < st->codecpar->channels; ch++) {
+                ret = avio_read(pb, dst,  packet_size / st->codecpar->channels);
+                if (ret < packet_size / st->codecpar->channels)
+                    return AVERROR_EOF;
+
+                dst += ret;
+                avio_skip(pb, skip_size / st->codecpar->channels);
+            }
+
+            return 0;
+        }
+    }
     return av_get_packet(pb, pkt, FFMIN(dsf->data_end - pos, st->codecpar->block_align));
 }
 

diff --git a/libavformat/dump.c b/libavformat/dump.c
index 77043e3..bc0f401 100644
--- a/libavformat/dump.c
+++ b/libavformat/dump.c

@@ -372,7 +372,9 @@
         size_t l, t, r, b;
         av_spherical_tile_bounds(spherical, par->width, par->height,
                                  &l, &t, &r, &b);
-        av_log(ctx, AV_LOG_INFO, "[%zu, %zu, %zu, %zu] ", l, t, r, b);
+        av_log(ctx, AV_LOG_INFO,
+               "[%"SIZE_SPECIFIER", %"SIZE_SPECIFIER", %"SIZE_SPECIFIER", %"SIZE_SPECIFIER"] ",
+               l, t, r, b);
     } else if (spherical->projection == AV_SPHERICAL_CUBEMAP) {
         av_log(ctx, AV_LOG_INFO, "[pad %"PRIu32"] ", spherical->padding);
     }
@@ -545,6 +547,12 @@
         av_log(NULL, AV_LOG_INFO, " (visual impaired)");
     if (st->disposition & AV_DISPOSITION_CLEAN_EFFECTS)
         av_log(NULL, AV_LOG_INFO, " (clean effects)");
+    if (st->disposition & AV_DISPOSITION_DESCRIPTIONS)
+        av_log(NULL, AV_LOG_INFO, " (descriptions)");
+    if (st->disposition & AV_DISPOSITION_DEPENDENT)
+        av_log(NULL, AV_LOG_INFO, " (dependent)");
+    if (st->disposition & AV_DISPOSITION_STILL_IMAGE)
+        av_log(NULL, AV_LOG_INFO, " (still image)");
     av_log(NULL, AV_LOG_INFO, "\n");
 
     dump_metadata(NULL, st->metadata, "    ");

diff --git a/libavformat/ffm.h b/libavformat/ffm.h
deleted file mode 100644
index c445f47..0000000
--- a/libavformat/ffm.h
+++ /dev/null

@@ -1,62 +0,0 @@
-/*
- * FFM (ffserver live feed) common header
- * Copyright (c) 2001 Fabrice Bellard
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVFORMAT_FFM_H
-#define AVFORMAT_FFM_H
-
-#include <stdint.h>
-#include "avformat.h"
-#include "avio.h"
-
-/* The FFM file is made of blocks of fixed size */
-#define FFM_HEADER_SIZE 14
-#define FFM_PACKET_SIZE 4096
-#define PACKET_ID       0x666d
-
-/* each packet contains frames (which can span several packets */
-#define FRAME_HEADER_SIZE    16
-#define FLAG_KEY_FRAME       0x01
-#define FLAG_DTS             0x02
-
-enum {
-    READ_HEADER,
-    READ_DATA,
-};
-
-typedef struct FFMContext {
-    const AVClass *class;
-    /* only reading mode */
-    int64_t write_index, file_size;
-    int read_state;
-    uint8_t header[FRAME_HEADER_SIZE+4];
-
-    /* read and write */
-    int first_packet; /* true if first packet, needed to set the discontinuity tag */
-    int packet_size;
-    int frame_offset;
-    int64_t dts;
-    uint8_t *packet_ptr, *packet_end;
-    uint8_t packet[FFM_PACKET_SIZE];
-    int64_t start_time;
-    int server_attached;
-} FFMContext;
-
-#endif /* AVFORMAT_FFM_H */

diff --git a/libavformat/ffmdec.c b/libavformat/ffmdec.c
deleted file mode 100644
index de6ac27..0000000
--- a/libavformat/ffmdec.c
+++ /dev/null

@@ -1,878 +0,0 @@
-/*
- * FFM (ffserver live feed) demuxer
- * Copyright (c) 2001 Fabrice Bellard
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdint.h>
-
-#include "libavutil/imgutils.h"
-#include "libavutil/internal.h"
-#include "libavutil/intreadwrite.h"
-#include "libavutil/intfloat.h"
-#include "libavutil/opt.h"
-#include "libavutil/avassert.h"
-#include "libavutil/avstring.h"
-#include "libavutil/pixdesc.h"
-#include "libavcodec/internal.h"
-#include "avformat.h"
-#include "internal.h"
-#include "ffm.h"
-#include "avio_internal.h"
-
-static int ffm_is_avail_data(AVFormatContext *s, int size)
-{
-    FFMContext *ffm = s->priv_data;
-    int64_t pos, avail_size;
-    ptrdiff_t len;
-
-    len = ffm->packet_end - ffm->packet_ptr;
-    if (size <= len)
-        return 1;
-    pos = avio_tell(s->pb);
-    if (!ffm->write_index) {
-        if (pos == ffm->file_size)
-            return AVERROR_EOF;
-        avail_size = ffm->file_size - pos;
-    } else {
-    if (pos == ffm->write_index) {
-        /* exactly at the end of stream */
-        if (ffm->server_attached)
-            return AVERROR(EAGAIN);
-        else
-            return AVERROR_INVALIDDATA;
-    } else if (pos < ffm->write_index) {
-        avail_size = ffm->write_index - pos;
-    } else {
-        avail_size = (ffm->file_size - pos) + (ffm->write_index - FFM_PACKET_SIZE);
-    }
-    }
-    avail_size = (avail_size / ffm->packet_size) * (ffm->packet_size - FFM_HEADER_SIZE) + len;
-    if (size <= avail_size)
-        return 1;
-    else if (ffm->server_attached)
-        return AVERROR(EAGAIN);
-    else
-        return AVERROR_INVALIDDATA;
-}
-
-static int ffm_resync(AVFormatContext *s, uint32_t state)
-{
-    av_log(s, AV_LOG_ERROR, "resyncing\n");
-    while (state != PACKET_ID) {
-        if (avio_feof(s->pb)) {
-            av_log(s, AV_LOG_ERROR, "cannot find FFM syncword\n");
-            return -1;
-        }
-        state = (state << 8) | avio_r8(s->pb);
-    }
-    return 0;
-}
-
-/* first is true if we read the frame header */
-static int ffm_read_data(AVFormatContext *s,
-                         uint8_t *buf, int size, int header)
-{
-    FFMContext *ffm = s->priv_data;
-    AVIOContext *pb = s->pb;
-    int fill_size, size1, frame_offset;
-    uint32_t id;
-    ptrdiff_t len;
-    int64_t last_pos = -1;
-
-    size1 = size;
-    while (size > 0) {
-    redo:
-        len = ffm->packet_end - ffm->packet_ptr;
-        if (len < 0)
-            return -1;
-        if (len > size)
-            len = size;
-        if (len == 0) {
-            if (avio_tell(pb) == ffm->file_size) {
-                if (ffm->server_attached) {
-                    avio_seek(pb, ffm->packet_size, SEEK_SET);
-                } else
-                    return AVERROR_EOF;
-            }
-    retry_read:
-            if (pb->buffer_size != ffm->packet_size) {
-                int64_t tell = avio_tell(pb);
-                int ret = ffio_set_buf_size(pb, ffm->packet_size);
-                if (ret < 0)
-                    return ret;
-                avio_seek(pb, tell, SEEK_SET);
-            }
-            id = avio_rb16(pb); /* PACKET_ID */
-            if (id != PACKET_ID) {
-                if (ffm_resync(s, id) < 0)
-                    return -1;
-                last_pos = avio_tell(pb);
-            }
-            fill_size = avio_rb16(pb);
-            ffm->dts = avio_rb64(pb);
-            frame_offset = avio_rb16(pb);
-            avio_read(pb, ffm->packet, ffm->packet_size - FFM_HEADER_SIZE);
-            if (ffm->packet_size < FFM_HEADER_SIZE + fill_size || frame_offset < 0) {
-                return -1;
-            }
-            ffm->packet_end = ffm->packet + (ffm->packet_size - FFM_HEADER_SIZE - fill_size);
-            /* if first packet or resynchronization packet, we must
-               handle it specifically */
-            if (ffm->first_packet || (frame_offset & 0x8000)) {
-                if (!frame_offset) {
-                    /* This packet has no frame headers in it */
-                    if (avio_tell(pb) >= ffm->packet_size * 3LL) {
-                        int64_t seekback = FFMIN(ffm->packet_size * 2LL, avio_tell(pb) - last_pos);
-                        seekback = FFMAX(seekback, 0);
-                        avio_seek(pb, -seekback, SEEK_CUR);
-                        goto retry_read;
-                    }
-                    /* This is bad, we cannot find a valid frame header */
-                    return 0;
-                }
-                ffm->first_packet = 0;
-                if ((frame_offset & 0x7fff) < FFM_HEADER_SIZE) {
-                    ffm->packet_end = ffm->packet_ptr;
-                    return -1;
-                }
-                ffm->packet_ptr = ffm->packet + (frame_offset & 0x7fff) - FFM_HEADER_SIZE;
-                if (!header)
-                    break;
-            } else {
-                ffm->packet_ptr = ffm->packet;
-            }
-            goto redo;
-        }
-        memcpy(buf, ffm->packet_ptr, len);
-        buf += len;
-        ffm->packet_ptr += len;
-        size -= len;
-        header = 0;
-    }
-    return size1 - size;
-}
-
-/* ensure that actual seeking happens between FFM_PACKET_SIZE
-   and file_size - FFM_PACKET_SIZE */
-static int64_t ffm_seek1(AVFormatContext *s, int64_t pos1)
-{
-    FFMContext *ffm = s->priv_data;
-    AVIOContext *pb = s->pb;
-    int64_t pos;
-
-    pos = FFMIN(pos1, ffm->file_size - FFM_PACKET_SIZE);
-    pos = FFMAX(pos, FFM_PACKET_SIZE);
-    ff_dlog(s, "seek to %"PRIx64" -> %"PRIx64"\n", pos1, pos);
-    return avio_seek(pb, pos, SEEK_SET);
-}
-
-static int64_t get_dts(AVFormatContext *s, int64_t pos)
-{
-    AVIOContext *pb = s->pb;
-    int64_t dts;
-
-    ffm_seek1(s, pos);
-    avio_skip(pb, 4);
-    dts = avio_rb64(pb);
-    ff_dlog(s, "dts=%0.6f\n", dts / 1000000.0);
-    return dts;
-}
-
-static void adjust_write_index(AVFormatContext *s)
-{
-    FFMContext *ffm = s->priv_data;
-    AVIOContext *pb = s->pb;
-    int64_t pts;
-    //int64_t orig_write_index = ffm->write_index;
-    int64_t pos_min, pos_max;
-    int64_t pts_start;
-    int64_t ptr = avio_tell(pb);
-
-
-    pos_min = 0;
-    pos_max = ffm->file_size - 2 * FFM_PACKET_SIZE;
-
-    pts_start = get_dts(s, pos_min);
-
-    pts = get_dts(s, pos_max);
-
-    if (pts - 100000 > pts_start)
-        goto end;
-
-    ffm->write_index = FFM_PACKET_SIZE;
-
-    pts_start = get_dts(s, pos_min);
-
-    pts = get_dts(s, pos_max);
-
-    if (pts - 100000 <= pts_start) {
-        while (1) {
-            int64_t newpos;
-            int64_t newpts;
-
-            newpos = ((pos_max + pos_min) / (2 * FFM_PACKET_SIZE)) * FFM_PACKET_SIZE;
-
-            if (newpos == pos_min)
-                break;
-
-            newpts = get_dts(s, newpos);
-
-            if (newpts - 100000 <= pts) {
-                pos_max = newpos;
-                pts = newpts;
-            } else {
-                pos_min = newpos;
-            }
-        }
-        ffm->write_index += pos_max;
-    }
-
- end:
-    avio_seek(pb, ptr, SEEK_SET);
-}
-
-
-static int ffm_append_recommended_configuration(AVStream *st, char **conf)
-{
-    int ret;
-    size_t newsize;
-    av_assert0(conf && st);
-    if (!*conf)
-        return 0;
-    if (!st->recommended_encoder_configuration) {
-        st->recommended_encoder_configuration = *conf;
-        *conf = 0;
-        return 0;
-    }
-    newsize = strlen(*conf) + strlen(st->recommended_encoder_configuration) + 2;
-    if ((ret = av_reallocp(&st->recommended_encoder_configuration, newsize)) < 0)
-        return ret;
-    av_strlcat(st->recommended_encoder_configuration, ",", newsize);
-    av_strlcat(st->recommended_encoder_configuration, *conf, newsize);
-    av_freep(conf);
-    return 0;
-}
-
-#define VALIDATE_PARAMETER(parameter, name, check) {                              \
-    if (check) {                                                                  \
-        av_log(s, AV_LOG_ERROR, "Invalid " name " %d\n", codecpar->parameter);   \
-        ret = AVERROR_INVALIDDATA;                                                \
-        goto fail;                                                                \
-    }                                                                             \
-}
-
-static int ffm2_read_header(AVFormatContext *s)
-{
-    FFMContext *ffm = s->priv_data;
-    AVStream *st = NULL;
-    AVIOContext *pb = s->pb;
-    AVCodecContext *dummy_codec = NULL;
-    AVCodecParameters *codecpar = NULL;
-    const AVCodecDescriptor *codec_desc;
-    int ret;
-    int f_main = 0, f_cprv = -1, f_stvi = -1, f_stau = -1;
-    AVCodec *enc;
-    char *buffer;
-
-    ffm->packet_size = avio_rb32(pb);
-    if (ffm->packet_size != FFM_PACKET_SIZE) {
-        av_log(s, AV_LOG_ERROR, "Invalid packet size %d, expected size was %d\n",
-               ffm->packet_size, FFM_PACKET_SIZE);
-        ret = AVERROR_INVALIDDATA;
-        goto fail;
-    }
-
-    ffm->write_index = avio_rb64(pb);
-    /* get also filesize */
-    if (pb->seekable & AVIO_SEEKABLE_NORMAL) {
-        ffm->file_size = avio_size(pb);
-        if (ffm->write_index && 0)
-            adjust_write_index(s);
-    } else {
-        ffm->file_size = (UINT64_C(1) << 63) - 1;
-    }
-    dummy_codec = avcodec_alloc_context3(NULL);
-
-    while(!avio_feof(pb)) {
-        unsigned id = avio_rb32(pb);
-        unsigned size = avio_rb32(pb);
-        int64_t next = avio_tell(pb) + size;
-        char rc_eq_buf[128];
-        int flags;
-
-        if(!id)
-            break;
-
-        switch(id) {
-        case MKBETAG('M', 'A', 'I', 'N'):
-            if (f_main++) {
-                ret = AVERROR(EINVAL);
-                goto fail;
-            }
-            avio_rb32(pb); /* nb_streams */
-            avio_rb32(pb); /* total bitrate */
-            break;
-        case MKBETAG('C', 'O', 'M', 'M'):
-            f_cprv = f_stvi = f_stau = 0;
-            st = avformat_new_stream(s, NULL);
-            if (!st) {
-                ret = AVERROR(ENOMEM);
-                goto fail;
-            }
-
-            avpriv_set_pts_info(st, 64, 1, 1000000);
-
-            codecpar = st->codecpar;
-            /* generic info */
-            codecpar->codec_id = avio_rb32(pb);
-            codec_desc = avcodec_descriptor_get(codecpar->codec_id);
-            if (!codec_desc) {
-                av_log(s, AV_LOG_ERROR, "Invalid codec id: %d\n", codecpar->codec_id);
-                codecpar->codec_id = AV_CODEC_ID_NONE;
-                ret = AVERROR_INVALIDDATA;
-                goto fail;
-            }
-            codecpar->codec_type = avio_r8(pb);
-            if (codecpar->codec_type != codec_desc->type) {
-                av_log(s, AV_LOG_ERROR, "Codec type mismatch: expected %d, found %d\n",
-                       codec_desc->type, codecpar->codec_type);
-                codecpar->codec_id = AV_CODEC_ID_NONE;
-                codecpar->codec_type = AVMEDIA_TYPE_UNKNOWN;
-                ret = AVERROR_INVALIDDATA;
-                goto fail;
-            }
-            codecpar->bit_rate = avio_rb32(pb);
-            if (codecpar->bit_rate < 0) {
-                av_log(s, AV_LOG_ERROR, "Invalid bit rate %"PRId64"\n", codecpar->bit_rate);
-                ret = AVERROR_INVALIDDATA;
-                goto fail;
-            }
-            flags = avio_rb32(pb);
-#if FF_API_LAVF_AVCTX
-FF_DISABLE_DEPRECATION_WARNINGS
-            st->codec->flags = flags;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-            avio_rb32(pb); // flags2
-            avio_rb32(pb); // debug
-            if (flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
-                int size = avio_rb32(pb);
-                if (size < 0 || size >= FF_MAX_EXTRADATA_SIZE) {
-                    av_log(s, AV_LOG_ERROR, "Invalid extradata size %d\n", size);
-                    ret = AVERROR_INVALIDDATA;
-                    goto fail;
-                }
-                codecpar->extradata = av_mallocz(size + AV_INPUT_BUFFER_PADDING_SIZE);
-                if (!codecpar->extradata) {
-                    ret = AVERROR(ENOMEM);
-                    goto fail;
-                }
-                codecpar->extradata_size = size;
-                avio_read(pb, codecpar->extradata, size);
-            }
-            break;
-        case MKBETAG('S', 'T', 'V', 'I'):
-            if (f_stvi++ || codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
-                ret = AVERROR(EINVAL);
-                goto fail;
-            }
-            avio_rb32(pb); // time_base.num
-            avio_rb32(pb); // time_base.den
-            codecpar->width = avio_rb16(pb);
-            codecpar->height = avio_rb16(pb);
-            ret = av_image_check_size(codecpar->width, codecpar->height, 0, s);
-            if (ret < 0)
-                goto fail;
-            avio_rb16(pb); // gop_size
-            codecpar->format = avio_rb32(pb);
-            if (!av_pix_fmt_desc_get(codecpar->format)) {
-                av_log(s, AV_LOG_ERROR, "Invalid pix fmt id: %d\n", codecpar->format);
-                codecpar->format = AV_PIX_FMT_NONE;
-                ret = AVERROR_INVALIDDATA;
-                goto fail;
-            }
-            avio_r8(pb);   // qmin
-            avio_r8(pb);   // qmax
-            avio_r8(pb);   // max_qdiff
-            avio_rb16(pb); // qcompress / 10000.0
-            avio_rb16(pb); // qblur / 10000.0
-            avio_rb32(pb); // bit_rate_tolerance
-            avio_get_str(pb, INT_MAX, rc_eq_buf, sizeof(rc_eq_buf));
-
-            avio_rb32(pb); // rc_max_rate
-            avio_rb32(pb); // rc_min_rate
-            avio_rb32(pb); // rc_buffer_size
-            avio_rb64(pb); // i_quant_factor
-            avio_rb64(pb); // b_quant_factor
-            avio_rb64(pb); // i_quant_offset
-            avio_rb64(pb); // b_quant_offset
-            avio_rb32(pb); // dct_algo
-            avio_rb32(pb); // strict_std_compliance
-            avio_rb32(pb); // max_b_frames
-            avio_rb32(pb); // mpeg_quant
-            avio_rb32(pb); // intra_dc_precision
-            avio_rb32(pb); // me_method
-            avio_rb32(pb); // mb_decision
-            avio_rb32(pb); // nsse_weight
-            avio_rb32(pb); // frame_skip_cmp
-            avio_rb64(pb); // rc_buffer_aggressivity
-            codecpar->codec_tag = avio_rb32(pb);
-            avio_r8(pb);   // thread_count
-            avio_rb32(pb); // coder_type
-            avio_rb32(pb); // me_cmp
-            avio_rb32(pb); // me_subpel_quality
-            avio_rb32(pb); // me_range
-            avio_rb32(pb); // keyint_min
-            avio_rb32(pb); // scenechange_threshold
-            avio_rb32(pb); // b_frame_strategy
-            avio_rb64(pb); // qcompress
-            avio_rb64(pb); // qblur
-            avio_rb32(pb); // max_qdiff
-            avio_rb32(pb); // refs
-            break;
-        case MKBETAG('S', 'T', 'A', 'U'):
-            if (f_stau++ || codecpar->codec_type != AVMEDIA_TYPE_AUDIO) {
-                ret = AVERROR(EINVAL);
-                goto fail;
-            }
-            codecpar->sample_rate = avio_rb32(pb);
-            VALIDATE_PARAMETER(sample_rate, "sample rate",        codecpar->sample_rate < 0)
-            codecpar->channels = avio_rl16(pb);
-            VALIDATE_PARAMETER(channels,    "number of channels", codecpar->channels < 0)
-            codecpar->frame_size = avio_rl16(pb);
-            VALIDATE_PARAMETER(frame_size,  "frame size",         codecpar->frame_size < 0)
-            break;
-        case MKBETAG('C', 'P', 'R', 'V'):
-            if (f_cprv++) {
-                ret = AVERROR(EINVAL);
-                goto fail;
-            }
-            enc = avcodec_find_encoder(codecpar->codec_id);
-            if (enc && enc->priv_data_size && enc->priv_class) {
-                buffer = av_malloc(size + 1);
-                if (!buffer) {
-                    ret = AVERROR(ENOMEM);
-                    goto fail;
-                }
-                avio_get_str(pb, size, buffer, size + 1);
-                if ((ret = ffm_append_recommended_configuration(st, &buffer)) < 0)
-                    goto fail;
-            }
-            break;
-        case MKBETAG('S', '2', 'V', 'I'):
-            if (f_stvi++ || !size || codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
-                ret = AVERROR(EINVAL);
-                goto fail;
-            }
-            buffer = av_malloc(size);
-            if (!buffer) {
-                ret = AVERROR(ENOMEM);
-                goto fail;
-            }
-            avio_get_str(pb, INT_MAX, buffer, size);
-            // The lack of AVOptions support in AVCodecParameters makes this back and forth copying needed
-            avcodec_parameters_to_context(dummy_codec, codecpar);
-            av_set_options_string(dummy_codec, buffer, "=", ",");
-            avcodec_parameters_from_context(codecpar, dummy_codec);
-            if ((ret = ffm_append_recommended_configuration(st, &buffer)) < 0)
-                goto fail;
-            break;
-        case MKBETAG('S', '2', 'A', 'U'):
-            if (f_stau++ || !size || codecpar->codec_type != AVMEDIA_TYPE_AUDIO) {
-                ret = AVERROR(EINVAL);
-                goto fail;
-            }
-            buffer = av_malloc(size);
-            if (!buffer) {
-                ret = AVERROR(ENOMEM);
-                goto fail;
-            }
-            avio_get_str(pb, INT_MAX, buffer, size);
-            // The lack of AVOptions support in AVCodecParameters makes this back and forth copying needed
-            avcodec_parameters_to_context(dummy_codec, codecpar);
-            av_set_options_string(dummy_codec, buffer, "=", ",");
-            avcodec_parameters_from_context(codecpar, dummy_codec);
-            if ((ret = ffm_append_recommended_configuration(st, &buffer)) < 0)
-                goto fail;
-            break;
-        }
-        avio_seek(pb, next, SEEK_SET);
-    }
-
-    /* get until end of block reached */
-    while ((avio_tell(pb) % ffm->packet_size) != 0 && !pb->eof_reached)
-        avio_r8(pb);
-
-    /* init packet demux */
-    ffm->packet_ptr = ffm->packet;
-    ffm->packet_end = ffm->packet;
-    ffm->frame_offset = 0;
-    ffm->dts = 0;
-    ffm->read_state = READ_HEADER;
-    ffm->first_packet = 1;
-    avcodec_free_context(&dummy_codec);
-    return 0;
- fail:
-    avcodec_free_context(&dummy_codec);
-    return ret;
-}
-
-static int ffm_read_header(AVFormatContext *s)
-{
-    FFMContext *ffm = s->priv_data;
-    AVStream *st;
-    AVIOContext *pb = s->pb;
-    AVCodecContext *dummy_codec = NULL;
-    AVCodecParameters *codecpar;
-    const AVCodecDescriptor *codec_desc;
-    int i, nb_streams, ret;
-    uint32_t tag;
-
-    /* header */
-    tag = avio_rl32(pb);
-    if (tag == MKTAG('F', 'F', 'M', '2'))
-        return ffm2_read_header(s);
-    if (tag != MKTAG('F', 'F', 'M', '1')) {
-        ret = AVERROR_INVALIDDATA;
-        goto fail;
-    }
-    ffm->packet_size = avio_rb32(pb);
-    if (ffm->packet_size != FFM_PACKET_SIZE) {
-        ret = AVERROR_INVALIDDATA;
-        goto fail;
-    }
-    ffm->write_index = avio_rb64(pb);
-    /* get also filesize */
-    if (pb->seekable & AVIO_SEEKABLE_NORMAL) {
-        ffm->file_size = avio_size(pb);
-        if (ffm->write_index && 0)
-            adjust_write_index(s);
-    } else {
-        ffm->file_size = (UINT64_C(1) << 63) - 1;
-    }
-    dummy_codec = avcodec_alloc_context3(NULL);
-
-    nb_streams = avio_rb32(pb);
-    avio_rb32(pb); /* total bitrate */
-    /* read each stream */
-    for(i=0;i<nb_streams;i++) {
-        char rc_eq_buf[128];
-        int flags;
-
-        st = avformat_new_stream(s, NULL);
-        if (!st) {
-            ret = AVERROR(ENOMEM);
-            goto fail;
-        }
-
-        avpriv_set_pts_info(st, 64, 1, 1000000);
-
-        codecpar = st->codecpar;
-        /* generic info */
-        codecpar->codec_id = avio_rb32(pb);
-        codec_desc = avcodec_descriptor_get(codecpar->codec_id);
-        if (!codec_desc) {
-            av_log(s, AV_LOG_ERROR, "Invalid codec id: %d\n", codecpar->codec_id);
-            codecpar->codec_id = AV_CODEC_ID_NONE;
-            ret = AVERROR_INVALIDDATA;
-            goto fail;
-        }
-        codecpar->codec_type = avio_r8(pb); /* codec_type */
-        if (codecpar->codec_type != codec_desc->type) {
-            av_log(s, AV_LOG_ERROR, "Codec type mismatch: expected %d, found %d\n",
-                   codec_desc->type, codecpar->codec_type);
-            codecpar->codec_id = AV_CODEC_ID_NONE;
-            codecpar->codec_type = AVMEDIA_TYPE_UNKNOWN;
-            ret = AVERROR_INVALIDDATA;
-            goto fail;
-        }
-        codecpar->bit_rate = avio_rb32(pb);
-        if (codecpar->bit_rate < 0) {
-            av_log(s, AV_LOG_WARNING, "Invalid bit rate %"PRId64"\n", codecpar->bit_rate);
-            ret = AVERROR_INVALIDDATA;
-            goto fail;
-        }
-        flags = avio_rb32(pb);
-#if FF_API_LAVF_AVCTX
-FF_DISABLE_DEPRECATION_WARNINGS
-            st->codec->flags = flags;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-        avio_rb32(pb); // flags2
-        avio_rb32(pb); // debug
-        /* specific info */
-        switch(codecpar->codec_type) {
-        case AVMEDIA_TYPE_VIDEO:
-            avio_rb32(pb); // time_base.num
-            avio_rb32(pb); // time_base.den
-            codecpar->width = avio_rb16(pb);
-            codecpar->height = avio_rb16(pb);
-            if ((ret = av_image_check_size(codecpar->width, codecpar->height, 0, s)) < 0)
-                goto fail;
-            avio_rb16(pb); // gop_size
-            codecpar->format = avio_rb32(pb);
-            if (!av_pix_fmt_desc_get(codecpar->format)) {
-                av_log(s, AV_LOG_ERROR, "Invalid pix fmt id: %d\n", codecpar->format);
-                codecpar->format = AV_PIX_FMT_NONE;
-                ret = AVERROR_INVALIDDATA;
-                goto fail;
-            }
-            avio_r8(pb);   // qmin
-            avio_r8(pb);   // qmax
-            avio_r8(pb);   // max_qdiff
-            avio_rb16(pb); // qcompress / 10000.0
-            avio_rb16(pb); // qblur / 10000.0
-            avio_rb32(pb); // bit_rate_tolerance
-            avio_get_str(pb, INT_MAX, rc_eq_buf, sizeof(rc_eq_buf));
-
-            avio_rb32(pb); // rc_max_rate
-            avio_rb32(pb); // rc_min_rate
-            avio_rb32(pb); // rc_buffer_size
-            avio_rb64(pb); // i_quant_factor
-            avio_rb64(pb); // b_quant_factor
-            avio_rb64(pb); // i_quant_offset
-            avio_rb64(pb); // b_quant_offset
-            avio_rb32(pb); // dct_algo
-            avio_rb32(pb); // strict_std_compliance
-            avio_rb32(pb); // max_b_frames
-            avio_rb32(pb); // mpeg_quant
-            avio_rb32(pb); // intra_dc_precision
-            avio_rb32(pb); // me_method
-            avio_rb32(pb); // mb_decision
-            avio_rb32(pb); // nsse_weight
-            avio_rb32(pb); // frame_skip_cmp
-            avio_rb64(pb); // rc_buffer_aggressivity
-            codecpar->codec_tag = avio_rb32(pb);
-            avio_r8(pb);   // thread_count
-            avio_rb32(pb); // coder_type
-            avio_rb32(pb); // me_cmp
-            avio_rb32(pb); // me_subpel_quality
-            avio_rb32(pb); // me_range
-            avio_rb32(pb); // keyint_min
-            avio_rb32(pb); // scenechange_threshold
-            avio_rb32(pb); // b_frame_strategy
-            avio_rb64(pb); // qcompress
-            avio_rb64(pb); // qblur
-            avio_rb32(pb); // max_qdiff
-            avio_rb32(pb); // refs
-            break;
-        case AVMEDIA_TYPE_AUDIO:
-            codecpar->sample_rate = avio_rb32(pb);
-            VALIDATE_PARAMETER(sample_rate, "sample rate",        codecpar->sample_rate < 0)
-            codecpar->channels = avio_rl16(pb);
-            VALIDATE_PARAMETER(channels,    "number of channels", codecpar->channels < 0)
-            codecpar->frame_size = avio_rl16(pb);
-            VALIDATE_PARAMETER(frame_size,  "frame size",         codecpar->frame_size < 0)
-            break;
-        default:
-            ret = AVERROR_INVALIDDATA;
-            goto fail;
-        }
-        if (flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
-            int size = avio_rb32(pb);
-            if (size < 0 || size >= FF_MAX_EXTRADATA_SIZE) {
-                av_log(s, AV_LOG_ERROR, "Invalid extradata size %d\n", size);
-                ret = AVERROR_INVALIDDATA;
-                goto fail;
-            }
-            codecpar->extradata = av_mallocz(size + AV_INPUT_BUFFER_PADDING_SIZE);
-            if (!codecpar->extradata) {
-                ret = AVERROR(ENOMEM);
-                goto fail;
-            }
-            codecpar->extradata_size = size;
-            avio_read(pb, codecpar->extradata, size);
-        }
-    }
-
-    /* get until end of block reached */
-    while ((avio_tell(pb) % ffm->packet_size) != 0 && !pb->eof_reached)
-        avio_r8(pb);
-
-    /* init packet demux */
-    ffm->packet_ptr = ffm->packet;
-    ffm->packet_end = ffm->packet;
-    ffm->frame_offset = 0;
-    ffm->dts = 0;
-    ffm->read_state = READ_HEADER;
-    ffm->first_packet = 1;
-    avcodec_free_context(&dummy_codec);
-    return 0;
- fail:
-    avcodec_free_context(&dummy_codec);
-    return ret;
-}
-
-/* return < 0 if eof */
-static int ffm_read_packet(AVFormatContext *s, AVPacket *pkt)
-{
-    int size;
-    FFMContext *ffm = s->priv_data;
-    int duration, ret;
-
-    switch(ffm->read_state) {
-    case READ_HEADER:
-        if ((ret = ffm_is_avail_data(s, FRAME_HEADER_SIZE+4)) < 0)
-            return ret;
-
-        ff_dlog(s, "pos=%08"PRIx64" spos=%"PRIx64", write_index=%"PRIx64" size=%"PRIx64"\n",
-               avio_tell(s->pb), s->pb->pos, ffm->write_index, ffm->file_size);
-        if (ffm_read_data(s, ffm->header, FRAME_HEADER_SIZE, 1) !=
-            FRAME_HEADER_SIZE)
-            return -1;
-        if (ffm->header[1] & FLAG_DTS)
-            if (ffm_read_data(s, ffm->header+16, 4, 1) != 4)
-                return -1;
-        ffm->read_state = READ_DATA;
-        /* fall through */
-    case READ_DATA:
-        size = AV_RB24(ffm->header + 2);
-        if ((ret = ffm_is_avail_data(s, size)) < 0)
-            return ret;
-
-        duration = AV_RB24(ffm->header + 5);
-
-        if (av_new_packet(pkt, size) < 0) {
-            return AVERROR(ENOMEM);
-        }
-        pkt->stream_index = ffm->header[0];
-        if ((unsigned)pkt->stream_index >= s->nb_streams) {
-            av_log(s, AV_LOG_ERROR, "invalid stream index %d\n", pkt->stream_index);
-            av_packet_unref(pkt);
-            ffm->read_state = READ_HEADER;
-            return -1;
-        }
-        pkt->pos = avio_tell(s->pb);
-        if (ffm->header[1] & FLAG_KEY_FRAME)
-            pkt->flags |= AV_PKT_FLAG_KEY;
-
-        ffm->read_state = READ_HEADER;
-        if (ffm_read_data(s, pkt->data, size, 0) != size) {
-            /* bad case: desynchronized packet. we cancel all the packet loading */
-            av_packet_unref(pkt);
-            return -1;
-        }
-        pkt->pts = AV_RB64(ffm->header+8);
-        if (ffm->header[1] & FLAG_DTS)
-            pkt->dts = pkt->pts - AV_RB32(ffm->header+16);
-        else
-            pkt->dts = pkt->pts;
-        pkt->duration = duration;
-        break;
-    }
-    return 0;
-}
-
-/* seek to a given time in the file. The file read pointer is
-   positioned at or before pts. XXX: the following code is quite
-   approximative */
-static int ffm_seek(AVFormatContext *s, int stream_index, int64_t wanted_pts, int flags)
-{
-    FFMContext *ffm = s->priv_data;
-    int64_t pos_min, pos_max, pos;
-    int64_t pts_min, pts_max, pts;
-    double pos1;
-
-    ff_dlog(s, "wanted_pts=%0.6f\n", wanted_pts / 1000000.0);
-    /* find the position using linear interpolation (better than
-       dichotomy in typical cases) */
-    if (ffm->write_index && ffm->write_index < ffm->file_size) {
-        if (get_dts(s, FFM_PACKET_SIZE) < wanted_pts) {
-            pos_min = FFM_PACKET_SIZE;
-            pos_max = ffm->write_index - FFM_PACKET_SIZE;
-        } else {
-            pos_min = ffm->write_index;
-            pos_max = ffm->file_size - FFM_PACKET_SIZE;
-        }
-    } else {
-        pos_min = FFM_PACKET_SIZE;
-        pos_max = ffm->file_size - FFM_PACKET_SIZE;
-    }
-    while (pos_min <= pos_max) {
-        pts_min = get_dts(s, pos_min);
-        pts_max = get_dts(s, pos_max);
-        if (pts_min > wanted_pts || pts_max <= wanted_pts) {
-            pos = pts_min > wanted_pts ? pos_min : pos_max;
-            goto found;
-        }
-        /* linear interpolation */
-        pos1 = (double)(pos_max - pos_min) * (double)(wanted_pts - pts_min) /
-            (double)(pts_max - pts_min);
-        pos = (((int64_t)pos1) / FFM_PACKET_SIZE) * FFM_PACKET_SIZE;
-        if (pos <= pos_min)
-            pos = pos_min;
-        else if (pos >= pos_max)
-            pos = pos_max;
-        pts = get_dts(s, pos);
-        /* check if we are lucky */
-        if (pts == wanted_pts) {
-            goto found;
-        } else if (pts > wanted_pts) {
-            pos_max = pos - FFM_PACKET_SIZE;
-        } else {
-            pos_min = pos + FFM_PACKET_SIZE;
-        }
-    }
-    pos = (flags & AVSEEK_FLAG_BACKWARD) ? pos_min : pos_max;
-
- found:
-    if (ffm_seek1(s, pos) < 0)
-        return -1;
-
-    /* reset read state */
-    ffm->read_state = READ_HEADER;
-    ffm->packet_ptr = ffm->packet;
-    ffm->packet_end = ffm->packet;
-    ffm->first_packet = 1;
-
-    return 0;
-}
-
-static int ffm_probe(AVProbeData *p)
-{
-    if (
-        p->buf[0] == 'F' && p->buf[1] == 'F' && p->buf[2] == 'M' &&
-        (p->buf[3] == '1' || p->buf[3] == '2'))
-        return AVPROBE_SCORE_MAX + 1;
-    return 0;
-}
-
-static const AVOption options[] = {
-    {"server_attached", NULL, offsetof(FFMContext, server_attached), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AV_OPT_FLAG_EXPORT },
-    {"ffm_write_index", NULL, offsetof(FFMContext, write_index), AV_OPT_TYPE_INT64, {.i64 = 0}, 0, INT64_MAX, AV_OPT_FLAG_EXPORT },
-    {"ffm_file_size", NULL, offsetof(FFMContext, file_size), AV_OPT_TYPE_INT64, {.i64 = 0}, 0, INT64_MAX, AV_OPT_FLAG_EXPORT },
-    { NULL },
-};
-
-static const AVClass ffm_class = {
-    .class_name = "ffm demuxer",
-    .item_name  = av_default_item_name,
-    .option     = options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
-AVInputFormat ff_ffm_demuxer = {
-    .name           = "ffm",
-    .long_name      = NULL_IF_CONFIG_SMALL("FFM (FFserver live feed)"),
-    .priv_data_size = sizeof(FFMContext),
-    .read_probe     = ffm_probe,
-    .read_header    = ffm_read_header,
-    .read_packet    = ffm_read_packet,
-    .read_seek      = ffm_seek,
-    .priv_class     = &ffm_class,
-};

diff --git a/libavformat/ffmenc.c b/libavformat/ffmenc.c
deleted file mode 100644
index ef7dc3a..0000000
--- a/libavformat/ffmenc.c
+++ /dev/null

@@ -1,362 +0,0 @@
-/*
- * FFM (ffserver live feed) muxer
- * Copyright (c) 2001 Fabrice Bellard
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/intreadwrite.h"
-#include "libavutil/intfloat.h"
-#include "libavutil/avassert.h"
-#include "libavutil/parseutils.h"
-#include "libavutil/opt.h"
-#include "avformat.h"
-#include "avio_internal.h"
-#include "internal.h"
-#include "ffm.h"
-
-static void flush_packet(AVFormatContext *s)
-{
-    FFMContext *ffm = s->priv_data;
-    int fill_size, h;
-    AVIOContext *pb = s->pb;
-
-    fill_size = ffm->packet_end - ffm->packet_ptr;
-    memset(ffm->packet_ptr, 0, fill_size);
-
-    av_assert1(avio_tell(pb) % ffm->packet_size == 0);
-
-    /* put header */
-    avio_wb16(pb, PACKET_ID);
-    avio_wb16(pb, fill_size);
-    avio_wb64(pb, ffm->dts);
-    h = ffm->frame_offset;
-    if (ffm->first_packet)
-        h |= 0x8000;
-    avio_wb16(pb, h);
-    avio_write(pb, ffm->packet, ffm->packet_end - ffm->packet);
-    avio_flush(pb);
-
-    /* prepare next packet */
-    ffm->frame_offset = 0; /* no key frame */
-    ffm->packet_ptr = ffm->packet;
-    ffm->first_packet = 0;
-}
-
-/* 'first' is true if first data of a frame */
-static void ffm_write_data(AVFormatContext *s,
-                           const uint8_t *buf, int size,
-                           int64_t dts, int header)
-{
-    FFMContext *ffm = s->priv_data;
-    int len;
-
-    if (header && ffm->frame_offset == 0) {
-        ffm->frame_offset = ffm->packet_ptr - ffm->packet + FFM_HEADER_SIZE;
-        ffm->dts = dts;
-    }
-
-    /* write as many packets as needed */
-    while (size > 0) {
-        len = ffm->packet_end - ffm->packet_ptr;
-        if (len > size)
-            len = size;
-        memcpy(ffm->packet_ptr, buf, len);
-
-        ffm->packet_ptr += len;
-        buf += len;
-        size -= len;
-        if (ffm->packet_ptr >= ffm->packet_end)
-            flush_packet(s);
-    }
-}
-
-static void write_header_chunk(AVIOContext *pb, AVIOContext *dpb, unsigned id)
-{
-    uint8_t *dyn_buf;
-    int dyn_size= avio_close_dyn_buf(dpb, &dyn_buf);
-    avio_wb32(pb, id);
-    avio_wb32(pb, dyn_size);
-    avio_write(pb, dyn_buf, dyn_size);
-    av_free(dyn_buf);
-}
-
-static int ffm_write_header_codec_ctx(AVIOContext *pb, AVCodecParameters *ctxpar, unsigned tag, int type)
-{
-    AVIOContext *tmp;
-    char *buf = NULL;
-    int ret, need_coma = 0;
-    AVCodecContext *ctx = NULL;
-
-#define SKIP_DEFAULTS   AV_OPT_SERIALIZE_SKIP_DEFAULTS
-#define OPT_FLAGS_EXACT AV_OPT_SERIALIZE_OPT_FLAGS_EXACT
-#define ENC             AV_OPT_FLAG_ENCODING_PARAM
-
-    if (avio_open_dyn_buf(&tmp) < 0)
-        return AVERROR(ENOMEM);
-
-    // AVCodecParameters does not suport AVOptions, we thus must copy it over to a context that does
-    // otherwise it could be used directly and this would be much simpler
-    ctx = avcodec_alloc_context3(NULL);
-    if (!ctx) {
-        ret = AVERROR(ENOMEM);
-        goto fail;
-    }
-    avcodec_parameters_to_context(ctx, ctxpar);
-
-    if ((ret = av_opt_serialize(ctx, ENC | type, SKIP_DEFAULTS, &buf, '=', ',')) < 0)
-        goto fail;
-    if (buf && strlen(buf)) {
-        avio_write(tmp, buf, strlen(buf));
-        av_freep(&buf);
-        need_coma = 1;
-    }
-    if ((ret = av_opt_serialize(ctx, 0, SKIP_DEFAULTS | OPT_FLAGS_EXACT, &buf, '=', ',')) < 0)
-        goto fail;
-    if (buf && strlen(buf)) {
-        if (need_coma)
-            avio_w8(tmp, ',');
-        avio_write(tmp, buf, strlen(buf));
-    }
-    av_freep(&buf);
-    avio_w8(tmp, 0);
-    write_header_chunk(pb, tmp, tag);
-    avcodec_free_context(&ctx);
-    return 0;
-  fail:
-    av_free(buf);
-    ffio_free_dyn_buf(&tmp);
-    avcodec_free_context(&ctx);
-    return ret;
-
-#undef SKIP_DEFAULTS
-#undef OPT_FLAGS_EXACT
-#undef ENC
-}
-
-static int ffm_write_recommended_config(AVIOContext *pb, AVCodecParameters *codecpar, unsigned tag,
-                                        const char *configuration)
-{
-    int ret;
-    const AVCodec *enc = avcodec_find_encoder(codecpar->codec_id);
-    AVIOContext *tmp;
-    AVDictionaryEntry *t = NULL;
-    AVDictionary *all = NULL, *comm = NULL, *prv = NULL;
-    char *buf = NULL;
-
-    if (!enc || !enc->priv_class || !enc->priv_data_size) {
-        /* codec is not known/has no private options, so save everything as common options */
-        if (avio_open_dyn_buf(&tmp) < 0)
-            return AVERROR(ENOMEM);
-        avio_put_str(tmp, configuration);
-        write_header_chunk(pb, tmp, tag);
-        return 0;
-    }
-
-    if ((ret = av_dict_parse_string(&all, configuration, "=", ",", 0)) < 0)
-        return ret;
-
-    while ((t = av_dict_get(all, "", t, AV_DICT_IGNORE_SUFFIX))) {
-        if (av_opt_find((void *)&enc->priv_class, t->key, NULL, 0, AV_OPT_SEARCH_FAKE_OBJ)) {
-            if ((ret = av_dict_set(&prv, t->key, t->value, 0)) < 0)
-                goto fail;
-        } else if ((ret = av_dict_set(&comm, t->key, t->value, 0)) < 0)
-            goto fail;
-    }
-
-    if (comm) {
-        if ((ret = av_dict_get_string(comm, &buf, '=', ',')) < 0 ||
-            (ret = avio_open_dyn_buf(&tmp)) < 0)
-            goto fail;
-        avio_put_str(tmp, buf);
-        av_freep(&buf);
-        write_header_chunk(pb, tmp, tag);
-    }
-    if (prv) {
-        if ((ret = av_dict_get_string(prv, &buf, '=', ',')) < 0 ||
-            (ret = avio_open_dyn_buf(&tmp)) < 0)
-            goto fail;
-        avio_put_str(tmp, buf);
-        write_header_chunk(pb, tmp, MKBETAG('C', 'P', 'R', 'V'));
-    }
-
-  fail:
-    av_free(buf);
-    av_dict_free(&all);
-    av_dict_free(&comm);
-    av_dict_free(&prv);
-    return ret;
-}
-
-static int ffm_write_header(AVFormatContext *s)
-{
-    FFMContext *ffm = s->priv_data;
-    AVStream *st;
-    AVIOContext *pb = s->pb;
-    AVCodecParameters *codecpar;
-    int bit_rate, i, ret;
-
-    if ((ret = ff_parse_creation_time_metadata(s, &ffm->start_time, 0)) < 0)
-        return ret;
-
-    ffm->packet_size = FFM_PACKET_SIZE;
-
-    /* header */
-    avio_wl32(pb, MKTAG('F', 'F', 'M', '2'));
-    avio_wb32(pb, ffm->packet_size);
-    avio_wb64(pb, 0); /* current write position */
-
-    if(avio_open_dyn_buf(&pb) < 0)
-        return AVERROR(ENOMEM);
-
-    avio_wb32(pb, s->nb_streams);
-    bit_rate = 0;
-    for(i=0;i<s->nb_streams;i++) {
-        st = s->streams[i];
-        bit_rate += st->codecpar->bit_rate;
-    }
-    avio_wb32(pb, bit_rate);
-
-    write_header_chunk(s->pb, pb, MKBETAG('M', 'A', 'I', 'N'));
-
-    /* list of streams */
-    for(i=0;i<s->nb_streams;i++) {
-        int flags = 0;
-        st = s->streams[i];
-        avpriv_set_pts_info(st, 64, 1, 1000000);
-        if(avio_open_dyn_buf(&pb) < 0)
-            return AVERROR(ENOMEM);
-
-        codecpar = st->codecpar;
-        /* generic info */
-        avio_wb32(pb, codecpar->codec_id);
-        avio_w8(pb, codecpar->codec_type);
-        avio_wb32(pb, codecpar->bit_rate);
-        if (codecpar->extradata_size)
-            flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
-
-        // If the user is not providing us with a configuration we have to fill it in as we cannot access the encoder
-        if (!st->recommended_encoder_configuration) {
-            if (s->flags & AVFMT_FLAG_BITEXACT)
-                flags |= AV_CODEC_FLAG_BITEXACT;
-        }
-
-        avio_wb32(pb, flags);
-        avio_wb32(pb, 0); // flags2
-        avio_wb32(pb, 0); // debug
-        if (codecpar->extradata_size) {
-            avio_wb32(pb, codecpar->extradata_size);
-            avio_write(pb, codecpar->extradata, codecpar->extradata_size);
-        }
-        write_header_chunk(s->pb, pb, MKBETAG('C', 'O', 'M', 'M'));
-        /* specific info */
-        switch(codecpar->codec_type) {
-        case AVMEDIA_TYPE_VIDEO:
-            if (st->recommended_encoder_configuration) {
-                av_log(NULL, AV_LOG_DEBUG, "writing recommended configuration: %s\n",
-                       st->recommended_encoder_configuration);
-                if ((ret = ffm_write_recommended_config(s->pb, codecpar, MKBETAG('S', '2', 'V', 'I'),
-                                                        st->recommended_encoder_configuration)) < 0)
-                return ret;
-            } else if ((ret = ffm_write_header_codec_ctx(s->pb, codecpar, MKBETAG('S', '2', 'V', 'I'), AV_OPT_FLAG_VIDEO_PARAM)) < 0)
-                return ret;
-            break;
-        case AVMEDIA_TYPE_AUDIO:
-            if (st->recommended_encoder_configuration) {
-                av_log(NULL, AV_LOG_DEBUG, "writing recommended configuration: %s\n",
-                       st->recommended_encoder_configuration);
-                if ((ret = ffm_write_recommended_config(s->pb, codecpar, MKBETAG('S', '2', 'A', 'U'),
-                                                        st->recommended_encoder_configuration)) < 0)
-                return ret;
-            } else if ((ret = ffm_write_header_codec_ctx(s->pb, codecpar, MKBETAG('S', '2', 'A', 'U'), AV_OPT_FLAG_AUDIO_PARAM)) < 0)
-                return ret;
-            break;
-        default:
-            return -1;
-        }
-    }
-    pb = s->pb;
-
-    avio_wb64(pb, 0); // end of header
-
-    /* flush until end of block reached */
-    while ((avio_tell(pb) % ffm->packet_size) != 0)
-        avio_w8(pb, 0);
-
-    avio_flush(pb);
-
-    /* init packet mux */
-    ffm->packet_ptr = ffm->packet;
-    ffm->packet_end = ffm->packet + ffm->packet_size - FFM_HEADER_SIZE;
-    av_assert0(ffm->packet_end >= ffm->packet);
-    ffm->frame_offset = 0;
-    ffm->dts = 0;
-    ffm->first_packet = 1;
-
-    return 0;
-}
-
-static int ffm_write_packet(AVFormatContext *s, AVPacket *pkt)
-{
-    FFMContext *ffm = s->priv_data;
-    int64_t dts;
-    uint8_t header[FRAME_HEADER_SIZE+4];
-    int header_size = FRAME_HEADER_SIZE;
-
-    dts = ffm->start_time + pkt->dts;
-    /* packet size & key_frame */
-    header[0] = pkt->stream_index;
-    header[1] = 0;
-    if (pkt->flags & AV_PKT_FLAG_KEY)
-        header[1] |= FLAG_KEY_FRAME;
-    AV_WB24(header+2, pkt->size);
-    AV_WB24(header+5, pkt->duration);
-    AV_WB64(header+8, ffm->start_time + pkt->pts);
-    if (pkt->pts != pkt->dts) {
-        header[1] |= FLAG_DTS;
-        AV_WB32(header+16, pkt->pts - pkt->dts);
-        header_size += 4;
-    }
-    ffm_write_data(s, header, header_size, dts, 1);
-    ffm_write_data(s, pkt->data, pkt->size, dts, 0);
-
-    return 0;
-}
-
-static int ffm_write_trailer(AVFormatContext *s)
-{
-    FFMContext *ffm = s->priv_data;
-
-    /* flush packets */
-    if (ffm->packet_ptr > ffm->packet)
-        flush_packet(s);
-
-    return 0;
-}
-
-AVOutputFormat ff_ffm_muxer = {
-    .name              = "ffm",
-    .long_name         = NULL_IF_CONFIG_SMALL("FFM (FFserver live feed)"),
-    .extensions        = "ffm",
-    .priv_data_size    = sizeof(FFMContext),
-    .audio_codec       = AV_CODEC_ID_MP2,
-    .video_codec       = AV_CODEC_ID_MPEG1VIDEO,
-    .write_header      = ffm_write_header,
-    .write_packet      = ffm_write_packet,
-    .write_trailer     = ffm_write_trailer,
-    .flags             = AVFMT_TS_NEGATIVE,
-};

diff --git a/libavformat/fifo.c b/libavformat/fifo.c
index c881f31..145e2e2 100644
--- a/libavformat/fifo.c
+++ b/libavformat/fifo.c

@@ -124,9 +124,9 @@
     if (ret < 0)
         return ret;
 
-    ret = ff_format_output_open(avf2, avf->filename, &format_options);
+    ret = ff_format_output_open(avf2, avf->url, &format_options);
     if (ret < 0) {
-        av_log(avf, AV_LOG_ERROR, "Error opening %s: %s\n", avf->filename,
+        av_log(avf, AV_LOG_ERROR, "Error opening %s: %s\n", avf->url,
                av_err2str(ret));
         goto end;
     }
@@ -500,13 +500,13 @@
         }
     }
 
-    oformat = av_guess_format(fifo->format, avf->filename, NULL);
+    oformat = av_guess_format(fifo->format, avf->url, NULL);
     if (!oformat) {
         ret = AVERROR_MUXER_NOT_FOUND;
         return ret;
     }
 
-    ret = fifo_mux_init(avf, oformat, avf->filename);
+    ret = fifo_mux_init(avf, oformat, avf->url);
     if (ret < 0)
         return ret;
 

diff --git a/libavformat/fifo_test.c b/libavformat/fifo_test.c
new file mode 100644
index 0000000..02ec215
--- /dev/null
+++ b/libavformat/fifo_test.c

@@ -0,0 +1,152 @@
+/*
+ * FIFO test pseudo-muxer
+ * Copyright (c) 2016 Jan Sebechlebsky
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with FFmpeg; if not, write to the Free Software * Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+
+#include "libavutil/opt.h"
+#include "libavutil/time.h"
+#include "libavutil/avassert.h"
+
+#include "avformat.h"
+#include "url.h"
+
+/* Implementation of mock muxer to simulate real muxer failures */
+
+#define MAX_TST_PACKETS 128
+#define SLEEPTIME_50_MS 50000
+#define SLEEPTIME_10_MS 10000
+
+/* Implementation of mock muxer to simulate real muxer failures */
+
+/* This is structure of data sent in packets to
+ * failing muxer */
+typedef struct FailingMuxerPacketData {
+    int ret;             /* return value of write_packet call*/
+    int recover_after;   /* set ret to zero after this number of recovery attempts */
+    unsigned sleep_time; /* sleep for this long in write_packet to simulate long I/O operation */
+} FailingMuxerPacketData;
+
+
+typedef struct FailingMuxerContext {
+    AVClass *class;
+    int write_header_ret;
+    int write_trailer_ret;
+    /* If non-zero, summary of processed packets will be printed in deinit */
+    int print_deinit_summary;
+
+    int flush_count;
+    int pts_written[MAX_TST_PACKETS];
+    int pts_written_nr;
+} FailingMuxerContext;
+
+static int failing_write_header(AVFormatContext *avf)
+{
+    FailingMuxerContext *ctx = avf->priv_data;
+    return ctx->write_header_ret;
+}
+
+static int failing_write_packet(AVFormatContext *avf, AVPacket *pkt)
+{
+    FailingMuxerContext *ctx = avf->priv_data;
+    int ret = 0;
+    if (!pkt) {
+        ctx->flush_count++;
+    } else {
+        FailingMuxerPacketData *data = (FailingMuxerPacketData*) pkt->data;
+
+        if (!data->recover_after) {
+            data->ret = 0;
+        } else {
+            data->recover_after--;
+        }
+
+        ret = data->ret;
+
+        if (data->sleep_time) {
+            int64_t slept = 0;
+            while (slept < data->sleep_time) {
+                if (ff_check_interrupt(&avf->interrupt_callback))
+                    return AVERROR_EXIT;
+                av_usleep(SLEEPTIME_10_MS);
+                slept += SLEEPTIME_10_MS;
+            }
+        }
+
+        if (!ret) {
+            ctx->pts_written[ctx->pts_written_nr++] = pkt->pts;
+            av_packet_unref(pkt);
+        }
+    }
+    return ret;
+}
+
+static int failing_write_trailer(AVFormatContext *avf)
+{
+    FailingMuxerContext *ctx = avf->priv_data;
+    return ctx->write_trailer_ret;
+}
+
+static void failing_deinit(AVFormatContext *avf)
+{
+    int i;
+    FailingMuxerContext *ctx = avf->priv_data;
+
+    if (!ctx->print_deinit_summary)
+        return;
+
+    printf("flush count: %d\n", ctx->flush_count);
+    printf("pts seen nr: %d\n", ctx->pts_written_nr);
+    printf("pts seen: ");
+    for (i = 0; i < ctx->pts_written_nr; ++i ) {
+        printf(i ? ",%d" : "%d", ctx->pts_written[i]);
+    }
+    printf("\n");
+}
+#define OFFSET(x) offsetof(FailingMuxerContext, x)
+static const AVOption options[] = {
+        {"write_header_ret", "write_header() return value", OFFSET(write_header_ret),
+         AV_OPT_TYPE_INT, {.i64 = 0}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM},
+        {"write_trailer_ret", "write_trailer() return value", OFFSET(write_trailer_ret),
+         AV_OPT_TYPE_INT, {.i64 = 0}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM},
+        {"print_deinit_summary", "print summary when deinitializing muxer", OFFSET(print_deinit_summary),
+         AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
+        {NULL}
+    };
+
+static const AVClass failing_muxer_class = {
+    .class_name = "Fifo test muxer",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVOutputFormat ff_fifo_test_muxer = {
+    .name           = "fifo_test",
+    .long_name      = NULL_IF_CONFIG_SMALL("Fifo test muxer"),
+    .priv_data_size = sizeof(FailingMuxerContext),
+    .write_header   = failing_write_header,
+    .write_packet   = failing_write_packet,
+    .write_trailer  = failing_write_trailer,
+    .deinit         = failing_deinit,
+    .priv_class     = &failing_muxer_class,
+    .flags          = AVFMT_NOFILE | AVFMT_ALLOW_FLUSH,
+};
+

diff --git a/libavformat/fitsenc.c b/libavformat/fitsenc.c
index 7cb1715..cc3999a 100644
--- a/libavformat/fitsenc.c
+++ b/libavformat/fitsenc.c

@@ -106,6 +106,8 @@
             }
             bzero = 32768;
             break;
+        default:
+            return AVERROR(EINVAL);
     }
 
     if (fitsctx->first_image) {
@@ -166,7 +168,9 @@
 
 static int fits_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
-    write_image_header(s);
+    int ret = write_image_header(s);
+    if (ret < 0)
+        return ret;
     avio_write(s->pb, pkt->data, pkt->size);
     return 0;
 }

diff --git a/libavformat/flacenc.c b/libavformat/flacenc.c
index b894f9e..617bccd 100644
--- a/libavformat/flacenc.c
+++ b/libavformat/flacenc.c

@@ -21,10 +21,13 @@
 
 #include "libavutil/channel_layout.h"
 #include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
 #include "libavcodec/flac.h"
 #include "avformat.h"
 #include "avio_internal.h"
 #include "flacenc.h"
+#include "id3v2.h"
+#include "internal.h"
 #include "vorbiscomment.h"
 #include "libavcodec/bytestream.h"
 
@@ -33,8 +36,15 @@
     const AVClass *class;
     int write_header;
 
+    int audio_stream_idx;
+    int waiting_pics;
+    /* audio packets are queued here until we get all the attached pictures */
+    AVPacketList *queue, *queue_end;
+
     /* updated streaminfo sent by the encoder at the end */
     uint8_t *streaminfo;
+
+    unsigned attached_types;
 } FlacMuxerContext;
 
 static int flac_write_block_padding(AVIOContext *pb, unsigned int n_padding_bytes,
@@ -74,36 +84,160 @@
     return 0;
 }
 
-static int flac_write_header(struct AVFormatContext *s)
+static int flac_write_picture(struct AVFormatContext *s, AVPacket *pkt)
 {
-    int ret;
-    int padding = s->metadata_header_padding;
-    AVCodecParameters *par = s->streams[0]->codecpar;
-    FlacMuxerContext *c   = s->priv_data;
+    FlacMuxerContext *c = s->priv_data;
+    AVIOContext *pb = s->pb;
+    const AVPixFmtDescriptor *pixdesc;
+    const CodecMime *mime = ff_id3v2_mime_tags;
+    AVDictionaryEntry *e;
+    const char *mimetype = NULL, *desc = "";
+    const AVStream *st = s->streams[pkt->stream_index];
+    int i, mimelen, desclen, type = 0;
 
-    if (!c->write_header)
+    if (!pkt->data)
         return 0;
 
-    if (s->nb_streams > 1) {
-        av_log(s, AV_LOG_ERROR, "only one stream is supported\n");
+    while (mime->id != AV_CODEC_ID_NONE) {
+        if (mime->id == st->codecpar->codec_id) {
+            mimetype = mime->str;
+            break;
+        }
+        mime++;
+    }
+    if (!mimetype) {
+        av_log(s, AV_LOG_ERROR, "No mimetype is known for stream %d, cannot "
+               "write an attached picture.\n", st->index);
         return AVERROR(EINVAL);
     }
-    if (par->codec_id != AV_CODEC_ID_FLAC) {
-        av_log(s, AV_LOG_ERROR, "unsupported codec\n");
+    mimelen = strlen(mimetype);
+
+    /* get the picture type */
+    e = av_dict_get(st->metadata, "comment", NULL, 0);
+    for (i = 0; e && i < FF_ARRAY_ELEMS(ff_id3v2_picture_types); i++) {
+        if (!av_strcasecmp(e->value, ff_id3v2_picture_types[i])) {
+            type = i;
+            break;
+        }
+    }
+
+    if ((c->attached_types & (1 << type)) & 0x6) {
+        av_log(s, AV_LOG_ERROR, "Duplicate attachment for type '%s'\n", ff_id3v2_picture_types[type]);
         return AVERROR(EINVAL);
     }
 
+    if (type == 1 && (st->codecpar->codec_id != AV_CODEC_ID_PNG ||
+                      st->codecpar->width != 32 ||
+                      st->codecpar->height != 32)) {
+        av_log(s, AV_LOG_ERROR, "File icon attachment must be a 32x32 PNG");
+        return AVERROR(EINVAL);
+    }
+
+    c->attached_types |= (1 << type);
+
+    /* get the description */
+    if ((e = av_dict_get(st->metadata, "title", NULL, 0)))
+        desc = e->value;
+    desclen = strlen(desc);
+
+    avio_w8(pb, 0x06);
+    avio_wb24(pb, 4 + 4 + mimelen + 4 + desclen + 4 + 4 + 4 + 4 + 4 + pkt->size);
+
+    avio_wb32(pb, type);
+
+    avio_wb32(pb, mimelen);
+    avio_write(pb, mimetype, mimelen);
+
+    avio_wb32(pb, desclen);
+    avio_write(pb, desc, desclen);
+
+    avio_wb32(pb, st->codecpar->width);
+    avio_wb32(pb, st->codecpar->height);
+    if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format)))
+        avio_wb32(pb, av_get_bits_per_pixel(pixdesc));
+    else
+        avio_wb32(pb, 0);
+    avio_wb32(pb, 0);
+
+    avio_wb32(pb, pkt->size);
+    avio_write(pb, pkt->data, pkt->size);
+    return 0;
+}
+
+static int flac_finish_header(struct AVFormatContext *s)
+{
+    int i, ret, padding = s->metadata_header_padding;
     if (padding < 0)
         padding = 8192;
     /* The FLAC specification states that 24 bits are used to represent the
      * size of a metadata block so we must clip this value to 2^24-1. */
     padding = av_clip_uintp2(padding, 24);
 
-    ret = ff_flac_write_header(s->pb, par->extradata,
-                               par->extradata_size, 0);
+    for (i = 0; i < s->nb_streams; i++) {
+        AVStream *st = s->streams[i];
+        AVPacket *pkt = st->priv_data;
+        if (!pkt)
+            continue;
+        ret = flac_write_picture(s, pkt);
+        av_packet_unref(pkt);
+        if (ret < 0 && (s->error_recognition & AV_EF_EXPLODE))
+            return ret;
+    }
+
+    ret = flac_write_block_comment(s->pb, &s->metadata, !padding,
+                                   s->flags & AVFMT_FLAG_BITEXACT);
     if (ret)
         return ret;
 
+    /* The command line flac encoder defaults to placing a seekpoint
+     * every 10s.  So one might add padding to allow that later
+     * but there seems to be no simple way to get the duration here.
+     * So just add the amount requested by the user. */
+    if (padding)
+        flac_write_block_padding(s->pb, padding, 1);
+
+    return 0;
+}
+
+static int flac_init(struct AVFormatContext *s)
+{
+    AVCodecParameters *par;
+    FlacMuxerContext *c = s->priv_data;
+    int i;
+
+    c->audio_stream_idx = -1;
+    for (i = 0; i < s->nb_streams; i++) {
+        AVStream *st = s->streams[i];
+        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+            if (c->audio_stream_idx >= 0 || st->codecpar->codec_id != AV_CODEC_ID_FLAC) {
+                av_log(s, AV_LOG_ERROR, "Invalid audio stream. Exactly one FLAC "
+                       "audio stream is required.\n");
+                return AVERROR(EINVAL);
+            }
+            par = s->streams[i]->codecpar;
+            c->audio_stream_idx = i;
+        } else if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
+            if (!(st->disposition & AV_DISPOSITION_ATTACHED_PIC)) {
+                av_log(s, AV_LOG_WARNING, "Video stream #%d is not an attached picture. Ignoring\n", i);
+                continue;
+            } else if (st->codecpar->codec_id == AV_CODEC_ID_GIF) {
+                av_log(s, AV_LOG_ERROR, "GIF image support is not implemented.\n");
+                return AVERROR_PATCHWELCOME;
+            } else if (!c->write_header) {
+                av_log(s, AV_LOG_ERROR, "Can't write attached pictures without a header.\n");
+                return AVERROR(EINVAL);
+            }
+            c->waiting_pics++;
+        } else {
+            av_log(s, AV_LOG_ERROR, "Only audio streams and pictures are allowed in FLAC.\n");
+            return AVERROR(EINVAL);
+        }
+    }
+    if (c->audio_stream_idx < 0) {
+        av_log(s, AV_LOG_ERROR, "No audio stream present.\n");
+        return AVERROR(EINVAL);
+    }
+
     /* add the channel layout tag */
     if (par->channel_layout &&
         !(par->channel_layout & ~0x3ffffULL) &&
@@ -121,28 +255,84 @@
         }
     }
 
-    ret = flac_write_block_comment(s->pb, &s->metadata, !padding,
-                                   s->flags & AVFMT_FLAG_BITEXACT);
-    if (ret)
+    return 0;
+}
+
+static int flac_write_header(struct AVFormatContext *s)
+{
+    FlacMuxerContext *c = s->priv_data;
+    AVCodecParameters *par = s->streams[c->audio_stream_idx]->codecpar;
+    int ret;
+
+    if (!c->write_header)
+        return 0;
+
+    ret = ff_flac_write_header(s->pb, par->extradata,
+                               par->extradata_size, 0);
+    if (ret < 0)
         return ret;
 
-    /* The command line flac encoder defaults to placing a seekpoint
-     * every 10s.  So one might add padding to allow that later
-     * but there seems to be no simple way to get the duration here.
-     * So just add the amount requested by the user. */
-    if (padding)
-        flac_write_block_padding(s->pb, padding, 1);
+    if (!c->waiting_pics)
+        ret = flac_finish_header(s);
 
     return ret;
 }
 
+static int flac_write_audio_packet(struct AVFormatContext *s, AVPacket *pkt)
+{
+    FlacMuxerContext *c = s->priv_data;
+    uint8_t *streaminfo;
+    int streaminfo_size;
+
+    /* check for updated streaminfo */
+    streaminfo = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA,
+                                         &streaminfo_size);
+    if (streaminfo && streaminfo_size == FLAC_STREAMINFO_SIZE) {
+        av_freep(&c->streaminfo);
+
+        c->streaminfo = av_malloc(FLAC_STREAMINFO_SIZE);
+        if (!c->streaminfo)
+            return AVERROR(ENOMEM);
+        memcpy(c->streaminfo, streaminfo, FLAC_STREAMINFO_SIZE);
+    }
+
+    if (pkt->size)
+        avio_write(s->pb, pkt->data, pkt->size);
+    return 0;
+}
+
+static int flac_queue_flush(AVFormatContext *s)
+{
+    FlacMuxerContext *c = s->priv_data;
+    AVPacket pkt;
+    int ret, write = 1;
+
+    ret = flac_finish_header(s);
+    if (ret < 0)
+        write = 0;
+
+    while (c->queue) {
+        ff_packet_list_get(&c->queue, &c->queue_end, &pkt);
+        if (write && (ret = flac_write_audio_packet(s, &pkt)) < 0)
+            write = 0;
+        av_packet_unref(&pkt);
+    }
+    return ret;
+}
+
 static int flac_write_trailer(struct AVFormatContext *s)
 {
     AVIOContext *pb = s->pb;
     int64_t file_size;
     FlacMuxerContext *c = s->priv_data;
     uint8_t *streaminfo = c->streaminfo ? c->streaminfo :
-                                          s->streams[0]->codecpar->extradata;
+                                          s->streams[c->audio_stream_idx]->codecpar->extradata;
+
+    if (c->waiting_pics) {
+        av_log(s, AV_LOG_WARNING, "No packets were sent for some of the "
+               "attached pictures.\n");
+        flac_queue_flush(s);
+    }
 
     if (!c->write_header || !streaminfo)
         return 0;
@@ -166,23 +356,48 @@
 static int flac_write_packet(struct AVFormatContext *s, AVPacket *pkt)
 {
     FlacMuxerContext *c = s->priv_data;
-    uint8_t *streaminfo;
-    int streaminfo_size;
+    int ret;
 
-    /* check for updated streaminfo */
-    streaminfo = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA,
-                                         &streaminfo_size);
-    if (streaminfo && streaminfo_size == FLAC_STREAMINFO_SIZE) {
-        av_freep(&c->streaminfo);
+    if (pkt->stream_index == c->audio_stream_idx) {
+        if (c->waiting_pics) {
+            /* buffer audio packets until we get all the pictures */
+            ret = ff_packet_list_put(&c->queue, &c->queue_end, pkt, FF_PACKETLIST_FLAG_REF_PACKET);
+            if (ret < 0) {
+                av_log(s, AV_LOG_ERROR, "Out of memory in packet queue; skipping attached pictures\n");
+                c->waiting_pics = 0;
+                ret = flac_queue_flush(s);
+                if (ret < 0)
+                    return ret;
+                return flac_write_audio_packet(s, pkt);
+            }
+        } else
+            return flac_write_audio_packet(s, pkt);
+    } else {
+        AVStream *st = s->streams[pkt->stream_index];
 
-        c->streaminfo = av_malloc(FLAC_STREAMINFO_SIZE);
-        if (!c->streaminfo)
-            return AVERROR(ENOMEM);
-        memcpy(c->streaminfo, streaminfo, FLAC_STREAMINFO_SIZE);
+        if (!c->waiting_pics ||
+            !(st->disposition & AV_DISPOSITION_ATTACHED_PIC))
+            return 0;
+
+        /* warn only once for each stream */
+        if (st->nb_frames == 1) {
+            av_log(s, AV_LOG_WARNING, "Got more than one picture in stream %d,"
+                   " ignoring.\n", pkt->stream_index);
+        }
+        if (st->nb_frames >= 1)
+            return 0;
+
+        st->priv_data = av_packet_clone(pkt);
+        if (!st->priv_data)
+            av_log(s, AV_LOG_ERROR, "Out of memory queueing an attached picture; skipping\n");
+        c->waiting_pics--;
+
+        /* flush the buffered audio packets */
+        if (!c->waiting_pics &&
+            (ret = flac_queue_flush(s)) < 0)
+            return ret;
     }
 
-    if (pkt->size)
-        avio_write(s->pb, pkt->data, pkt->size);
     return 0;
 }
 
@@ -205,7 +420,8 @@
     .mime_type         = "audio/x-flac",
     .extensions        = "flac",
     .audio_codec       = AV_CODEC_ID_FLAC,
-    .video_codec       = AV_CODEC_ID_NONE,
+    .video_codec       = AV_CODEC_ID_PNG,
+    .init              = flac_init,
     .write_header      = flac_write_header,
     .write_packet      = flac_write_packet,
     .write_trailer     = flac_write_trailer,

diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
index 2d89bef1..a2dea46 100644
--- a/libavformat/flvdec.c
+++ b/libavformat/flvdec.c

@@ -44,6 +44,8 @@
 typedef struct FLVContext {
     const AVClass *class; ///< Class for private options.
     int trust_metadata;   ///< configure streams according onMetaData
+    int trust_datasize;   ///< trust data size of FLVTag
+    int dump_full_metadata;   ///< Dump full metadata of the onMetadata
     int wrong_dts;        ///< wrong dts due to negative cts
     uint8_t *new_extradata[FLV_STREAM_TYPE_NB];
     int new_extradata_size[FLV_STREAM_TYPE_NB];
@@ -598,8 +600,10 @@
                         if (version > 0 && version <= 655)
                             flv->broken_sizes = 1;
                     }
-                } else if (!strcmp(key, "metadatacreator") && !strcmp(str_val, "MEGA")) {
-                    flv->broken_sizes = 1;
+                } else if (!strcmp(key, "metadatacreator")) {
+                    if (   !strcmp (str_val, "MEGA")
+                        || !strncmp(str_val, "FlixEngine", 10))
+                        flv->broken_sizes = 1;
                 }
             }
         }
@@ -609,7 +613,7 @@
             (!vpar && !strcmp(key, "videocodecid"))))
                 s->ctx_flags &= ~AVFMTCTX_NOHEADER; //If there is either audio/video missing, codecid will be an empty object
 
-        if (!strcmp(key, "duration")        ||
+        if ((!strcmp(key, "duration")        ||
             !strcmp(key, "filesize")        ||
             !strcmp(key, "width")           ||
             !strcmp(key, "height")          ||
@@ -621,7 +625,7 @@
             !strcmp(key, "audiosamplesize") ||
             !strcmp(key, "stereo")          ||
             !strcmp(key, "audiocodecid")    ||
-            !strcmp(key, "datastream"))
+            !strcmp(key, "datastream")) && !flv->dump_full_metadata)
             return 0;
 
         s->event_flags |= AVFMT_EVENT_FLAG_METADATA_UPDATED;
@@ -652,8 +656,6 @@
     AVStream av_unused *dstream;
     AVIOContext *ioc;
     int i;
-    // only needs to hold the string "onMetaData".
-    // Anything longer is something we don't want.
     char buffer[32];
 
     astream = NULL;
@@ -751,6 +753,9 @@
 
 static int flv_get_extradata(AVFormatContext *s, AVStream *st, int size)
 {
+    if (!size)
+        return 0;
+
     av_freep(&st->codecpar->extradata);
     if (ff_get_extradata(s, st->codecpar, s->pb, size) < 0)
         return AVERROR(ENOMEM);
@@ -761,6 +766,9 @@
 static int flv_queue_extradata(FLVContext *flv, AVIOContext *pb, int stream,
                                int size)
 {
+    if (!size)
+        return 0;
+
     av_free(flv->new_extradata[stream]);
     flv->new_extradata[stream] = av_mallocz(size +
                                             AV_INPUT_BUFFER_PADDING_SIZE);
@@ -1152,6 +1160,12 @@
         st->codecpar->codec_id == AV_CODEC_ID_MPEG4) {
         int type = avio_r8(s->pb);
         size--;
+
+        if (size < 0) {
+            ret = AVERROR_INVALIDDATA;
+            goto leave;
+        }
+
         if (st->codecpar->codec_id == AV_CODEC_ID_H264 || st->codecpar->codec_id == AV_CODEC_ID_MPEG4) {
             // sign extension
             int32_t cts = (avio_rb24(s->pb) + 0xff800000) ^ 0xff800000;
@@ -1244,16 +1258,18 @@
 
 leave:
     last = avio_rb32(s->pb);
-    if (last != orig_size + 11 && last != orig_size + 10 &&
-        !avio_feof(s->pb) &&
-        (last != orig_size || !last) && last != flv->sum_flv_tag_size &&
-        !flv->broken_sizes) {
-        av_log(s, AV_LOG_ERROR, "Packet mismatch %d %d %d\n", last, orig_size + 11, flv->sum_flv_tag_size);
-        avio_seek(s->pb, pos + 1, SEEK_SET);
-        ret = resync(s);
-        av_packet_unref(pkt);
-        if (ret >= 0) {
-            goto retry;
+    if (!flv->trust_datasize) {
+        if (last != orig_size + 11 && last != orig_size + 10 &&
+            !avio_feof(s->pb) &&
+            (last != orig_size || !last) && last != flv->sum_flv_tag_size &&
+            !flv->broken_sizes) {
+            av_log(s, AV_LOG_ERROR, "Packet mismatch %d %d %d\n", last, orig_size + 11, flv->sum_flv_tag_size);
+            avio_seek(s->pb, pos + 1, SEEK_SET);
+            ret = resync(s);
+            av_packet_unref(pkt);
+            if (ret >= 0) {
+                goto retry;
+            }
         }
     }
     return ret;
@@ -1271,6 +1287,8 @@
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
     { "flv_metadata", "Allocate streams according to the onMetaData array", OFFSET(trust_metadata), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
+    { "flv_full_metadata", "Dump full metadata of the onMetadata", OFFSET(dump_full_metadata), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
+    { "flv_ignore_prevtag", "Ignore the Size of previous tag", OFFSET(trust_datasize), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
     { "missing_streams", "", OFFSET(missing_streams), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 0xFF, VD | AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY },
     { NULL }
 };

diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c
index 899b07e..e4863f1 100644
--- a/libavformat/flvenc.c
+++ b/libavformat/flvenc.c

@@ -230,12 +230,17 @@
     avio_write(pb, str, len);
 }
 
+// FLV timestamps are 32 bits signed, RTMP timestamps should be 32-bit unsigned
+static void put_timestamp(AVIOContext *pb, int64_t ts) {
+    avio_wb24(pb, ts & 0xFFFFFF);
+    avio_w8(pb, (ts >> 24) & 0x7F);
+}
+
 static void put_avc_eos_tag(AVIOContext *pb, unsigned ts)
 {
     avio_w8(pb, FLV_TAG_TYPE_VIDEO);
     avio_wb24(pb, 5);               /* Tag Data Size */
-    avio_wb24(pb, ts);              /* lower 24 bits of timestamp in ms */
-    avio_w8(pb, (ts >> 24) & 0x7F); /* MSB of ts in ms */
+    put_timestamp(pb, ts);
     avio_wb24(pb, 0);               /* StreamId = 0 */
     avio_w8(pb, 23);                /* ub[4] FrameType = 1, ub[4] CodecId = 7 */
     avio_w8(pb, 2);                 /* AVC end of sequence */
@@ -480,7 +485,7 @@
     return AVERROR(ENOSYS);
 }
 
-static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par) {
+static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par, int64_t ts) {
     int64_t data_size;
     AVIOContext *pb = s->pb;
     FLVContext *flv = s->priv_data;
@@ -492,8 +497,7 @@
                 par->codec_type == AVMEDIA_TYPE_VIDEO ?
                         FLV_TAG_TYPE_VIDEO : FLV_TAG_TYPE_AUDIO);
         avio_wb24(pb, 0); // size patched later
-        avio_wb24(pb, 0); // ts
-        avio_w8(pb, 0);   // ts ext
+        put_timestamp(pb, ts);
         avio_wb24(pb, 0); // streamid
         pos = avio_tell(pb);
         if (par->codec_id == AV_CODEC_ID_AAC) {
@@ -610,10 +614,10 @@
      * writing, so we re-open the same output, but for reading. It also avoids
      * a read/seek/write/seek back and forth. */
     avio_flush(s->pb);
-    ret = s->io_open(s, &read_pb, s->filename, AVIO_FLAG_READ, NULL);
+    ret = s->io_open(s, &read_pb, s->url, AVIO_FLAG_READ, NULL);
     if (ret < 0) {
         av_log(s, AV_LOG_ERROR, "Unable to re-open %s output file for "
-               "the second pass (add_keyframe_index)\n", s->filename);
+               "the second pass (add_keyframe_index)\n", s->url);
         goto end;
     }
 
@@ -756,7 +760,7 @@
     }
 
     for (i = 0; i < s->nb_streams; i++) {
-        flv_write_codec_header(s, s->streams[i]->codecpar);
+        flv_write_codec_header(s, s->streams[i]->codecpar, 0);
     }
 
     flv->datastart_offset = avio_tell(pb);
@@ -879,6 +883,11 @@
     int flags = -1, flags_size, ret;
     int64_t cur_offset = avio_tell(pb);
 
+    if (par->codec_type == AVMEDIA_TYPE_AUDIO && !pkt->size) {
+        av_log(s, AV_LOG_WARNING, "Empty audio Packet\n");
+        return AVERROR(EINVAL);
+    }
+
     if (par->codec_id == AV_CODEC_ID_VP6F || par->codec_id == AV_CODEC_ID_VP6A ||
         par->codec_id == AV_CODEC_ID_VP6  || par->codec_id == AV_CODEC_ID_AAC)
         flags_size = 2;
@@ -900,7 +909,7 @@
             }
             memcpy(par->extradata, side, side_size);
             par->extradata_size = side_size;
-            flv_write_codec_header(s, par);
+            flv_write_codec_header(s, par, pkt->dts);
         }
     }
 
@@ -978,8 +987,7 @@
     }
 
     avio_wb24(pb, size + flags_size);
-    avio_wb24(pb, ts & 0xFFFFFF);
-    avio_w8(pb, (ts >> 24) & 0x7F); // timestamps are 32 bits _signed_
+    put_timestamp(pb, ts);
     avio_wb24(pb, flv->reserved);
 
     if (par->codec_type == AVMEDIA_TYPE_DATA ||

diff --git a/libavformat/format.c b/libavformat/format.c
index 38ca2a3..2c4c895 100644
--- a/libavformat/format.c
+++ b/libavformat/format.c

@@ -19,10 +19,10 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavutil/atomic.h"
 #include "libavutil/avstring.h"
 #include "libavutil/bprint.h"
 #include "libavutil/opt.h"
+#include "libavutil/thread.h"
 
 #include "avio_internal.h"
 #include "avformat.h"
@@ -34,53 +34,6 @@
  * @file
  * Format register and lookup
  */
-/** head of registered input format linked list */
-static AVInputFormat *first_iformat = NULL;
-/** head of registered output format linked list */
-static AVOutputFormat *first_oformat = NULL;
-
-static AVInputFormat **last_iformat = &first_iformat;
-static AVOutputFormat **last_oformat = &first_oformat;
-
-AVInputFormat *av_iformat_next(const AVInputFormat *f)
-{
-    if (f)
-        return f->next;
-    else
-        return first_iformat;
-}
-
-AVOutputFormat *av_oformat_next(const AVOutputFormat *f)
-{
-    if (f)
-        return f->next;
-    else
-        return first_oformat;
-}
-
-void av_register_input_format(AVInputFormat *format)
-{
-    AVInputFormat **p = last_iformat;
-
-    // Note, format could be added after the first 2 checks but that implies that *p is no longer NULL
-    while(p != &format->next && !format->next && avpriv_atomic_ptr_cas((void * volatile *)p, NULL, format))
-        p = &(*p)->next;
-
-    if (!format->next)
-        last_iformat = &format->next;
-}
-
-void av_register_output_format(AVOutputFormat *format)
-{
-    AVOutputFormat **p = last_oformat;
-
-    // Note, format could be added after the first 2 checks but that implies that *p is no longer NULL
-    while(p != &format->next && !format->next && avpriv_atomic_ptr_cas((void * volatile *)p, NULL, format))
-        p = &(*p)->next;
-
-    if (!format->next)
-        last_oformat = &format->next;
-}
 
 int av_match_ext(const char *filename, const char *extensions)
 {
@@ -98,7 +51,9 @@
 AVOutputFormat *av_guess_format(const char *short_name, const char *filename,
                                 const char *mime_type)
 {
-    AVOutputFormat *fmt = NULL, *fmt_found;
+    const AVOutputFormat *fmt = NULL;
+    AVOutputFormat *fmt_found = NULL;
+    void *i = 0;
     int score_max, score;
 
     /* specific test for image sequences */
@@ -110,9 +65,8 @@
     }
 #endif
     /* Find the proper file type. */
-    fmt_found = NULL;
     score_max = 0;
-    while ((fmt = av_oformat_next(fmt))) {
+    while ((fmt = av_muxer_iterate(&i))) {
         score = 0;
         if (fmt->name && short_name && av_match_name(short_name, fmt->name))
             score += 100;
@@ -124,7 +78,7 @@
         }
         if (score > score_max) {
             score_max = score;
-            fmt_found = fmt;
+            fmt_found = (AVOutputFormat*)fmt;
         }
     }
     return fmt_found;
@@ -163,10 +117,11 @@
 
 AVInputFormat *av_find_input_format(const char *short_name)
 {
-    AVInputFormat *fmt = NULL;
-    while ((fmt = av_iformat_next(fmt)))
+    const AVInputFormat *fmt = NULL;
+    void *i = 0;
+    while ((fmt = av_demuxer_iterate(&i)))
         if (av_match_name(short_name, fmt->name))
-            return fmt;
+            return (AVInputFormat*)fmt;
     return NULL;
 }
 
@@ -174,8 +129,10 @@
                                       int *score_ret)
 {
     AVProbeData lpd = *pd;
-    AVInputFormat *fmt1 = NULL, *fmt;
+    const AVInputFormat *fmt1 = NULL;
+    AVInputFormat *fmt = NULL;
     int score, score_max = 0;
+    void *i = 0;
     const static uint8_t zerobuffer[AVPROBE_PADDING_SIZE];
     enum nodat {
         NO_ID3,
@@ -200,8 +157,7 @@
             nodat = ID3_GREATER_PROBE;
     }
 
-    fmt = NULL;
-    while ((fmt1 = av_iformat_next(fmt1))) {
+    while ((fmt1 = av_demuxer_iterate(&i))) {
         if (!is_opened == !(fmt1->flags & AVFMT_NOFILE) && strcmp(fmt1->name, "image2"))
             continue;
         score = 0;
@@ -235,7 +191,7 @@
         }
         if (score > score_max) {
             score_max = score;
-            fmt       = fmt1;
+            fmt       = (AVInputFormat*)fmt1;
         } else if (score == score_max)
             fmt = NULL;
     }
@@ -294,14 +250,6 @@
             *semi = '\0';
         }
     }
-#if 0
-    if (!*fmt && pb->av_class && av_opt_get(pb, "mime_type", AV_OPT_SEARCH_CHILDREN, &mime_type) >= 0 && mime_type) {
-        if (!av_strcasecmp(mime_type, "audio/aacp")) {
-            *fmt = av_find_input_format("aac");
-        }
-        av_freep(&mime_type);
-    }
-#endif
 
     for (probe_size = PROBE_BUF_MIN; probe_size <= max_probe_size && !*fmt;
          probe_size = FFMIN(probe_size << 1,

diff --git a/libavformat/ftp.c b/libavformat/ftp.c
index 9aa7a45..676f1c6 100644
--- a/libavformat/ftp.c
+++ b/libavformat/ftp.c

@@ -489,8 +489,6 @@
     return 0;
 }
 
-static int ftp_has_feature(FTPContext *s, const char *feature_name);
-
 static int ftp_list(FTPContext *s)
 {
     int ret;

diff --git a/libavformat/gxfenc.c b/libavformat/gxfenc.c
index 0e0772b..3507c00 100644
--- a/libavformat/gxfenc.c
+++ b/libavformat/gxfenc.c

@@ -311,7 +311,7 @@
     AVIOContext *pb = s->pb;
     int64_t pos;
     int len;
-    const char *filename = strrchr(s->filename, '/');
+    const char *filename = strrchr(s->url, '/');
 
     pos = avio_tell(pb);
     avio_wb16(pb, 0); /* size */
@@ -320,7 +320,7 @@
     if (filename)
         filename++;
     else
-        filename = s->filename;
+        filename = s->url;
     len = strlen(filename);
 
     avio_w8(pb, MAT_NAME);

diff --git a/libavformat/hdsenc.c b/libavformat/hdsenc.c
index 72829f7..d82aee1 100644
--- a/libavformat/hdsenc.c
+++ b/libavformat/hdsenc.c

@@ -169,8 +169,8 @@
     if (c->nb_streams > 0)
         duration = c->streams[0].last_ts * av_q2d(s->streams[0]->time_base);
 
-    snprintf(filename, sizeof(filename), "%s/index.f4m", s->filename);
-    snprintf(temp_filename, sizeof(temp_filename), "%s/index.f4m.tmp", s->filename);
+    snprintf(filename, sizeof(filename), "%s/index.f4m", s->url);
+    snprintf(temp_filename, sizeof(temp_filename), "%s/index.f4m.tmp", s->url);
     ret = s->io_open(s, &out, temp_filename, AVIO_FLAG_WRITE, NULL);
     if (ret < 0) {
         av_log(s, AV_LOG_ERROR, "Unable to open %s for writing\n", temp_filename);
@@ -178,7 +178,7 @@
     }
     avio_printf(out, "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
     avio_printf(out, "<manifest xmlns=\"http://ns.adobe.com/f4m/1.0\">\n");
-    avio_printf(out, "\t<id>%s</id>\n", av_basename(s->filename));
+    avio_printf(out, "\t<id>%s</id>\n", av_basename(s->url));
     avio_printf(out, "\t<streamType>%s</streamType>\n",
                      final ? "recorded" : "live");
     avio_printf(out, "\t<deliveryType>streaming</deliveryType>\n");
@@ -236,9 +236,9 @@
         cur_media_time = os->fragments[os->nb_fragments - 1]->start_time;
 
     snprintf(filename, sizeof(filename),
-             "%s/stream%d.abst", s->filename, index);
+             "%s/stream%d.abst", s->url, index);
     snprintf(temp_filename, sizeof(temp_filename),
-             "%s/stream%d.abst.tmp", s->filename, index);
+             "%s/stream%d.abst.tmp", s->url, index);
     ret = s->io_open(s, &out, temp_filename, AVIO_FLAG_WRITE, NULL);
     if (ret < 0) {
         av_log(s, AV_LOG_ERROR, "Unable to open %s for writing\n", temp_filename);
@@ -317,9 +317,9 @@
     int ret = 0, i;
     AVOutputFormat *oformat;
 
-    if (mkdir(s->filename, 0777) == -1 && errno != EEXIST) {
+    if (mkdir(s->url, 0777) == -1 && errno != EEXIST) {
         ret = AVERROR(errno);
-        av_log(s, AV_LOG_ERROR , "Failed to create directory %s\n", s->filename);
+        av_log(s, AV_LOG_ERROR , "Failed to create directory %s\n", s->url);
         goto fail;
     }
 
@@ -412,7 +412,7 @@
             s->streams[os->first_stream + j]->time_base = os->ctx->streams[j]->time_base;
 
         snprintf(os->temp_filename, sizeof(os->temp_filename),
-                 "%s/stream%d_temp", s->filename, i);
+                 "%s/stream%d_temp", s->url, i);
         ret = init_file(s, os, 0);
         if (ret < 0)
             goto fail;
@@ -476,7 +476,7 @@
     close_file(s, os);
 
     snprintf(target_filename, sizeof(target_filename),
-             "%s/stream%dSeg1-Frag%d", s->filename, index, os->fragment_index);
+             "%s/stream%dSeg1-Frag%d", s->url, index, os->fragment_index);
     ret = ff_rename(os->temp_filename, target_filename, s);
     if (ret < 0)
         return ret;
@@ -549,13 +549,13 @@
 
     if (c->remove_at_exit) {
         char filename[1024];
-        snprintf(filename, sizeof(filename), "%s/index.f4m", s->filename);
+        snprintf(filename, sizeof(filename), "%s/index.f4m", s->url);
         unlink(filename);
         for (i = 0; i < c->nb_streams; i++) {
-            snprintf(filename, sizeof(filename), "%s/stream%d.abst", s->filename, i);
+            snprintf(filename, sizeof(filename), "%s/stream%d.abst", s->url, i);
             unlink(filename);
         }
-        rmdir(s->filename);
+        rmdir(s->url);
     }
 
     hds_free(s);

diff --git a/libavformat/hevc.c b/libavformat/hevc.c
index 1a2d6cd..3628d5a 100644
--- a/libavformat/hevc.c
+++ b/libavformat/hevc.c

@@ -417,7 +417,7 @@
 
 static int parse_rps(GetBitContext *gb, unsigned int rps_idx,
                      unsigned int num_rps,
-                     unsigned int num_delta_pocs[HEVC_MAX_SHORT_TERM_RPS_COUNT])
+                     unsigned int num_delta_pocs[HEVC_MAX_SHORT_TERM_REF_PIC_SETS])
 {
     unsigned int i;
 
@@ -486,7 +486,7 @@
                           HEVCDecoderConfigurationRecord *hvcc)
 {
     unsigned int i, sps_max_sub_layers_minus1, log2_max_pic_order_cnt_lsb_minus4;
-    unsigned int num_short_term_ref_pic_sets, num_delta_pocs[HEVC_MAX_SHORT_TERM_RPS_COUNT];
+    unsigned int num_short_term_ref_pic_sets, num_delta_pocs[HEVC_MAX_SHORT_TERM_REF_PIC_SETS];
 
     skip_bits(gb, 4); // sps_video_parameter_set_id
 
@@ -556,7 +556,7 @@
     }
 
     num_short_term_ref_pic_sets = get_ue_golomb_long(gb);
-    if (num_short_term_ref_pic_sets > HEVC_MAX_SHORT_TERM_RPS_COUNT)
+    if (num_short_term_ref_pic_sets > HEVC_MAX_SHORT_TERM_REF_PIC_SETS)
         return AVERROR_INVALIDDATA;
 
     for (i = 0; i < num_short_term_ref_pic_sets; i++) {
@@ -669,6 +669,8 @@
     while (i < src_len)
         dst[len++] = src[i++];
 
+    memset(dst + len, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
     *dst_len = len;
     return dst;
 }

diff --git a/libavformat/hls.c b/libavformat/hls.c
index 786934a..8ad08ba 100644
--- a/libavformat/hls.c
+++ b/libavformat/hls.c

@@ -26,6 +26,7 @@
  * http://tools.ietf.org/html/draft-pantos-http-live-streaming
  */
 
+#include "libavformat/http.h"
 #include "libavutil/avstring.h"
 #include "libavutil/avassert.h"
 #include "libavutil/intreadwrite.h"
@@ -94,6 +95,9 @@
     AVIOContext pb;
     uint8_t* read_buffer;
     AVIOContext *input;
+    int input_read_done;
+    AVIOContext *input_next;
+    int input_next_requested;
     AVFormatContext *parent;
     int index;
     AVFormatContext *ctx;
@@ -111,7 +115,7 @@
     int start_seq_no;
     int n_segments;
     struct segment **segments;
-    int needed, cur_needed;
+    int needed;
     int cur_seq_no;
     int64_t cur_seg_offset;
     int64_t last_load_time;
@@ -198,34 +202,30 @@
     int64_t first_timestamp;
     int64_t cur_timestamp;
     AVIOInterruptCB *interrupt_callback;
-    char *user_agent;                    ///< holds HTTP user agent set as an AVOption to the HTTP protocol context
-    char *cookies;                       ///< holds HTTP cookie values set in either the initial response or as an AVOption to the HTTP protocol context
-    char *headers;                       ///< holds HTTP headers set as an AVOption to the HTTP protocol context
-    char *http_proxy;                    ///< holds the address of the HTTP proxy server
     AVDictionary *avio_opts;
     int strict_std_compliance;
     char *allowed_extensions;
     int max_reload;
+    int http_persistent;
+    int http_multiple;
+    AVIOContext *playlist_pb;
 } HLSContext;
 
-static int read_chomp_line(AVIOContext *s, char *buf, int maxlen)
+static void free_segment_dynarray(struct segment **segments, int n_segments)
 {
-    int len = ff_get_line(s, buf, maxlen);
-    while (len > 0 && av_isspace(buf[len - 1]))
-        buf[--len] = '\0';
-    return len;
+    int i;
+    for (i = 0; i < n_segments; i++) {
+        av_freep(&segments[i]->key);
+        av_freep(&segments[i]->url);
+        av_freep(&segments[i]);
+    }
 }
 
 static void free_segment_list(struct playlist *pls)
 {
-    int i;
-    for (i = 0; i < pls->n_segments; i++) {
-        av_freep(&pls->segments[i]->key);
-        av_freep(&pls->segments[i]->url);
-        av_freep(&pls->segments[i]);
-    }
-    av_freep(&pls->segments);
-    pls->n_segments = 0;
+        free_segment_dynarray(pls->segments, pls->n_segments);
+        av_freep(&pls->segments);
+        pls->n_segments = 0;
 }
 
 static void free_init_section_list(struct playlist *pls)
@@ -256,6 +256,10 @@
         av_freep(&pls->pb.buffer);
         if (pls->input)
             ff_format_io_close(c->ctx, &pls->input);
+        pls->input_read_done = 0;
+        if (pls->input_next)
+            ff_format_io_close(c->ctx, &pls->input_next);
+        pls->input_next_requested = 0;
         if (pls->ctx) {
             pls->ctx->pb = NULL;
             avformat_close_input(&pls->ctx);
@@ -263,10 +267,6 @@
         av_free(pls);
     }
     av_freep(&c->playlists);
-    av_freep(&c->cookies);
-    av_freep(&c->user_agent);
-    av_freep(&c->headers);
-    av_freep(&c->http_proxy);
     c->n_playlists = 0;
 }
 
@@ -589,21 +589,32 @@
     return 0;
 }
 
-static void update_options(char **dest, const char *name, void *src)
+static int open_url_keepalive(AVFormatContext *s, AVIOContext **pb,
+                              const char *url)
 {
-    av_freep(dest);
-    av_opt_get(src, name, AV_OPT_SEARCH_CHILDREN, (uint8_t**)dest);
-    if (*dest && !strlen(*dest))
-        av_freep(dest);
+#if !CONFIG_HTTP_PROTOCOL
+    return AVERROR_PROTOCOL_NOT_FOUND;
+#else
+    int ret;
+    URLContext *uc = ffio_geturlcontext(*pb);
+    av_assert0(uc);
+    (*pb)->eof_reached = 0;
+    ret = ff_http_do_new_request(uc, url);
+    if (ret < 0) {
+        ff_format_io_close(s, pb);
+    }
+    return ret;
+#endif
 }
 
 static int open_url(AVFormatContext *s, AVIOContext **pb, const char *url,
-                    AVDictionary *opts, AVDictionary *opts2, int *is_http)
+                    AVDictionary *opts, AVDictionary *opts2, int *is_http_out)
 {
     HLSContext *c = s->priv_data;
     AVDictionary *tmp = NULL;
     const char *proto_name = NULL;
     int ret;
+    int is_http = 0;
 
     av_dict_copy(&tmp, opts, 0);
     av_dict_copy(&tmp, opts2, 0);
@@ -629,7 +640,7 @@
             return AVERROR_INVALIDDATA;
         }
     } else if (av_strstart(proto_name, "http", NULL)) {
-        ;
+        is_http = 1;
     } else
         return AVERROR_INVALIDDATA;
 
@@ -640,7 +651,20 @@
     else if (strcmp(proto_name, "file") || !strncmp(url, "file,", 5))
         return AVERROR_INVALIDDATA;
 
-    ret = s->io_open(s, pb, url, AVIO_FLAG_READ, &tmp);
+    if (is_http && c->http_persistent && *pb) {
+        ret = open_url_keepalive(c->ctx, pb, url);
+        if (ret == AVERROR_EXIT) {
+            return ret;
+        } else if (ret < 0) {
+            if (ret != AVERROR_EOF)
+                av_log(s, AV_LOG_WARNING,
+                    "keepalive request failed for '%s', retrying with new connection: %s\n",
+                    url, av_err2str(ret));
+            ret = s->io_open(s, pb, url, AVIO_FLAG_READ, &tmp);
+        }
+    } else {
+        ret = s->io_open(s, pb, url, AVIO_FLAG_READ, &tmp);
+    }
     if (ret >= 0) {
         // update cookies on http response with setcookies.
         char *new_cookies = NULL;
@@ -648,18 +672,14 @@
         if (!(s->flags & AVFMT_FLAG_CUSTOM_IO))
             av_opt_get(*pb, "cookies", AV_OPT_SEARCH_CHILDREN, (uint8_t**)&new_cookies);
 
-        if (new_cookies) {
-            av_free(c->cookies);
-            c->cookies = new_cookies;
-        }
-
-        av_dict_set(&opts, "cookies", c->cookies, 0);
+        if (new_cookies)
+            av_dict_set(&opts, "cookies", new_cookies, AV_DICT_DONT_STRDUP_VAL);
     }
 
     av_dict_free(&tmp);
 
-    if (is_http)
-        *is_http = av_strstart(proto_name, "http", NULL);
+    if (is_http_out)
+        *is_http_out = is_http;
 
     return ret;
 }
@@ -682,48 +702,64 @@
     struct variant_info variant_info;
     char tmp_str[MAX_URL_SIZE];
     struct segment *cur_init_section = NULL;
+    int is_http = av_strstart(url, "http", NULL);
+    struct segment **prev_segments = NULL;
+    int prev_n_segments = 0;
+    int prev_start_seq_no = -1;
+
+    if (is_http && !in && c->http_persistent && c->playlist_pb) {
+        in = c->playlist_pb;
+        ret = open_url_keepalive(c->ctx, &c->playlist_pb, url);
+        if (ret == AVERROR_EXIT) {
+            return ret;
+        } else if (ret < 0) {
+            if (ret != AVERROR_EOF)
+                av_log(c->ctx, AV_LOG_WARNING,
+                    "keepalive request failed for '%s', retrying with new connection: %s\n",
+                    url, av_err2str(ret));
+            in = NULL;
+        }
+    }
 
     if (!in) {
-#if 1
         AVDictionary *opts = NULL;
-        close_in = 1;
-        /* Some HLS servers don't like being sent the range header */
-        av_dict_set(&opts, "seekable", "0", 0);
+        av_dict_copy(&opts, c->avio_opts, 0);
 
-        // broker prior HTTP options that should be consistent across requests
-        av_dict_set(&opts, "user_agent", c->user_agent, 0);
-        av_dict_set(&opts, "cookies", c->cookies, 0);
-        av_dict_set(&opts, "headers", c->headers, 0);
-        av_dict_set(&opts, "http_proxy", c->http_proxy, 0);
+        if (c->http_persistent)
+            av_dict_set(&opts, "multiple_requests", "1", 0);
 
         ret = c->ctx->io_open(c->ctx, &in, url, AVIO_FLAG_READ, &opts);
         av_dict_free(&opts);
         if (ret < 0)
             return ret;
-#else
-        ret = open_in(c, &in, url);
-        if (ret < 0)
-            return ret;
-        close_in = 1;
-#endif
+
+        if (is_http && c->http_persistent)
+            c->playlist_pb = in;
+        else
+            close_in = 1;
     }
 
     if (av_opt_get(in, "location", AV_OPT_SEARCH_CHILDREN, &new_url) >= 0)
         url = new_url;
 
-    read_chomp_line(in, line, sizeof(line));
+    ff_get_chomp_line(in, line, sizeof(line));
     if (strcmp(line, "#EXTM3U")) {
         ret = AVERROR_INVALIDDATA;
         goto fail;
     }
 
     if (pls) {
-        free_segment_list(pls);
+        prev_start_seq_no = pls->start_seq_no;
+        prev_segments = pls->segments;
+        prev_n_segments = pls->n_segments;
+        pls->segments = NULL;
+        pls->n_segments = 0;
+
         pls->finished = 0;
         pls->type = PLS_TYPE_UNSPECIFIED;
     }
     while (!avio_feof(in)) {
-        read_chomp_line(in, line, sizeof(line));
+        ff_get_chomp_line(in, line, sizeof(line));
         if (av_strstart(line, "#EXT-X-STREAM-INF:", &ptr)) {
             is_variant = 1;
             memset(&variant_info, 0, sizeof(variant_info));
@@ -775,6 +811,27 @@
             ff_parse_key_value(ptr, (ff_parse_key_val_cb) handle_init_section_args,
                                &info);
             cur_init_section = new_init_section(pls, &info, url);
+            cur_init_section->key_type = key_type;
+            if (has_iv) {
+                memcpy(cur_init_section->iv, iv, sizeof(iv));
+            } else {
+                int seq = pls->start_seq_no + pls->n_segments;
+                memset(cur_init_section->iv, 0, sizeof(cur_init_section->iv));
+                AV_WB32(cur_init_section->iv + 12, seq);
+            }
+
+            if (key_type != KEY_NONE) {
+                ff_make_absolute_url(tmp_str, sizeof(tmp_str), url, key);
+                cur_init_section->key = av_strdup(tmp_str);
+                if (!cur_init_section->key) {
+                    av_free(cur_init_section);
+                    ret = AVERROR(ENOMEM);
+                    goto fail;
+                }
+            } else {
+                cur_init_section->key = NULL;
+            }
+
         } else if (av_strstart(line, "#EXT-X-ENDLIST", &ptr)) {
             if (pls)
                 pls->finished = 1;
@@ -858,6 +915,23 @@
             }
         }
     }
+    if (prev_segments) {
+        if (pls->start_seq_no > prev_start_seq_no && c->first_timestamp != AV_NOPTS_VALUE) {
+            int64_t prev_timestamp = c->first_timestamp;
+            int i, diff = pls->start_seq_no - prev_start_seq_no;
+            for (i = 0; i < prev_n_segments && i < diff; i++) {
+                c->first_timestamp += prev_segments[i]->duration;
+            }
+            av_log(c->ctx, AV_LOG_DEBUG, "Media sequence change (%d -> %d)"
+                   " reflected in first_timestamp: %"PRId64" -> %"PRId64"\n",
+                   prev_start_seq_no, pls->start_seq_no,
+                   prev_timestamp, c->first_timestamp);
+        } else if (pls->start_seq_no < prev_start_seq_no) {
+            av_log(c->ctx, AV_LOG_WARNING, "Media sequence changed unexpectedly: %d -> %d\n",
+                   prev_start_seq_no, pls->start_seq_no);
+        }
+        free_segment_dynarray(prev_segments, prev_n_segments);
+    }
     if (pls)
         pls->last_load_time = av_gettime_relative();
 
@@ -865,6 +939,11 @@
     av_free(new_url);
     if (close_in)
         ff_format_io_close(c->ctx, &in);
+    c->ctx->ctx_flags = c->ctx->ctx_flags & ~(unsigned)AVFMTCTX_UNSEEKABLE;
+    if (!c->n_variants || !c->variants[0]->n_playlists ||
+        !(c->variants[0]->playlists[0]->finished ||
+          c->variants[0]->playlists[0]->type == PLS_TYPE_EVENT))
+        c->ctx->ctx_flags |= AVFMTCTX_UNSEEKABLE;
     return ret;
 }
 
@@ -873,14 +952,16 @@
     return pls->segments[pls->cur_seq_no - pls->start_seq_no];
 }
 
-enum ReadFromURLMode {
-    READ_NORMAL,
-    READ_COMPLETE,
-};
+static struct segment *next_segment(struct playlist *pls)
+{
+    int n = pls->cur_seq_no - pls->start_seq_no + 1;
+    if (n >= pls->n_segments)
+        return NULL;
+    return pls->segments[n];
+}
 
 static int read_from_url(struct playlist *pls, struct segment *seg,
-                         uint8_t *buf, int buf_size,
-                         enum ReadFromURLMode mode)
+                         uint8_t *buf, int buf_size)
 {
     int ret;
 
@@ -888,13 +969,7 @@
     if (seg->size >= 0)
         buf_size = FFMIN(buf_size, seg->size - pls->cur_seg_offset);
 
-    if (mode == READ_COMPLETE) {
-        ret = avio_read(pls->input, buf, buf_size);
-        if (ret != buf_size)
-            av_log(NULL, AV_LOG_ERROR, "Could not read complete segment.\n");
-    } else
-        ret = avio_read(pls->input, buf, buf_size);
-
+    ret = avio_read(pls->input, buf, buf_size);
     if (ret > 0)
         pls->cur_seg_offset += ret;
 
@@ -983,6 +1058,7 @@
             /* demuxer not yet opened, defer picture attachment */
             pls->id3_deferred_extra = extra_meta;
 
+        ff_id3v2_parse_priv_dict(&metadata, &extra_meta);
         av_dict_copy(&pls->ctx->metadata, metadata, 0);
         pls->id3_initial = metadata;
 
@@ -1012,7 +1088,7 @@
     while (1) {
         /* see if we can retrieve enough data for ID3 header */
         if (*len < ID3v2_HEADER_SIZE && buf_size >= ID3v2_HEADER_SIZE) {
-            bytes = read_from_url(pls, seg, buf + *len, ID3v2_HEADER_SIZE - *len, READ_COMPLETE);
+            bytes = read_from_url(pls, seg, buf + *len, ID3v2_HEADER_SIZE - *len);
             if (bytes > 0) {
 
                 if (bytes == ID3v2_HEADER_SIZE - *len)
@@ -1064,7 +1140,7 @@
 
             if (remaining > 0) {
                 /* read the rest of the tag in */
-                if (read_from_url(pls, seg, pls->id3_buf + id3_buf_pos, remaining, READ_COMPLETE) != remaining)
+                if (read_from_url(pls, seg, pls->id3_buf + id3_buf_pos, remaining) != remaining)
                     break;
                 id3_buf_pos += remaining;
                 av_log(pls->ctx, AV_LOG_DEBUG, "Stripped additional %d HLS ID3 bytes\n", remaining);
@@ -1078,7 +1154,7 @@
 
     /* re-fill buffer for the caller unless EOF */
     if (*len >= 0 && (fill_buf || *len == 0)) {
-        bytes = read_from_url(pls, seg, buf + *len, buf_size - *len, READ_NORMAL);
+        bytes = read_from_url(pls, seg, buf + *len, buf_size - *len);
 
         /* ignore error if we already had some data */
         if (bytes >= 0)
@@ -1098,18 +1174,14 @@
         pls->is_id3_timestamped = (pls->id3_mpegts_timestamp != AV_NOPTS_VALUE);
 }
 
-static int open_input(HLSContext *c, struct playlist *pls, struct segment *seg)
+static int open_input(HLSContext *c, struct playlist *pls, struct segment *seg, AVIOContext **in)
 {
     AVDictionary *opts = NULL;
     int ret;
     int is_http = 0;
 
-    // broker prior HTTP options that should be consistent across requests
-    av_dict_set(&opts, "user_agent", c->user_agent, 0);
-    av_dict_set(&opts, "cookies", c->cookies, 0);
-    av_dict_set(&opts, "headers", c->headers, 0);
-    av_dict_set(&opts, "http_proxy", c->http_proxy, 0);
-    av_dict_set(&opts, "seekable", "0", 0);
+    if (c->http_persistent)
+        av_dict_set(&opts, "multiple_requests", "1", 0);
 
     if (seg->size >= 0) {
         /* try to restrict the HTTP request to the part we want
@@ -1122,12 +1194,11 @@
            seg->url, seg->url_offset, pls->index);
 
     if (seg->key_type == KEY_NONE) {
-        ret = open_url(pls->parent, &pls->input, seg->url, c->avio_opts, opts, &is_http);
+        ret = open_url(pls->parent, in, seg->url, c->avio_opts, opts, &is_http);
     } else if (seg->key_type == KEY_AES_128) {
-        AVDictionary *opts2 = NULL;
         char iv[33], key[33], url[MAX_URL_SIZE];
         if (strcmp(seg->key, pls->key_url)) {
-            AVIOContext *pb;
+            AVIOContext *pb = NULL;
             if (open_url(pls->parent, &pb, seg->key, c->avio_opts, opts, NULL) == 0) {
                 ret = avio_read(pb, pls->key, sizeof(pls->key));
                 if (ret != sizeof(pls->key)) {
@@ -1149,14 +1220,10 @@
         else
             snprintf(url, sizeof(url), "crypto:%s", seg->url);
 
-        av_dict_copy(&opts2, c->avio_opts, 0);
-        av_dict_set(&opts2, "key", key, 0);
-        av_dict_set(&opts2, "iv", iv, 0);
+        av_dict_set(&opts, "key", key, 0);
+        av_dict_set(&opts, "iv", iv, 0);
 
-        ret = open_url(pls->parent, &pls->input, url, opts2, opts, &is_http);
-
-        av_dict_free(&opts2);
-
+        ret = open_url(pls->parent, in, url, c->avio_opts, opts, &is_http);
         if (ret < 0) {
             goto cleanup;
         }
@@ -1180,11 +1247,11 @@
      * noticed without the call, though.
      */
     if (ret == 0 && !is_http && seg->key_type == KEY_NONE && seg->url_offset) {
-        int64_t seekret = avio_seek(pls->input, seg->url_offset, SEEK_SET);
+        int64_t seekret = avio_seek(*in, seg->url_offset, SEEK_SET);
         if (seekret < 0) {
             av_log(pls->parent, AV_LOG_ERROR, "Unable to seek to offset %"PRId64" of HLS segment '%s'\n", seg->url_offset, seg->url);
             ret = seekret;
-            ff_format_io_close(pls->parent, &pls->input);
+            ff_format_io_close(pls->parent, in);
         }
     }
 
@@ -1210,7 +1277,7 @@
     if (!seg->init_section)
         return 0;
 
-    ret = open_input(c, pls, seg->init_section);
+    ret = open_input(c, pls, seg->init_section, &pls->input);
     if (ret < 0) {
         av_log(pls->parent, AV_LOG_WARNING,
                "Failed to open an initialization section in playlist %d\n",
@@ -1234,7 +1301,7 @@
     av_fast_malloc(&pls->init_sec_buf, &pls->init_sec_buf_size, sec_size);
 
     ret = read_from_url(pls, seg->init_section, pls->init_sec_buf,
-                        pls->init_sec_buf_size, READ_COMPLETE);
+                        pls->init_sec_buf_size);
     ff_format_io_close(pls->parent, &pls->input);
 
     if (ret < 0)
@@ -1258,33 +1325,72 @@
                           pls->target_duration;
 }
 
+static int playlist_needed(struct playlist *pls)
+{
+    AVFormatContext *s = pls->parent;
+    int i, j;
+    int stream_needed = 0;
+    int first_st;
+
+    /* If there is no context or streams yet, the playlist is needed */
+    if (!pls->ctx || !pls->n_main_streams)
+        return 1;
+
+    /* check if any of the streams in the playlist are needed */
+    for (i = 0; i < pls->n_main_streams; i++) {
+        if (pls->main_streams[i]->discard < AVDISCARD_ALL) {
+            stream_needed = 1;
+            break;
+        }
+    }
+
+    /* If all streams in the playlist were discarded, the playlist is not
+     * needed (regardless of whether whole programs are discarded or not). */
+    if (!stream_needed)
+        return 0;
+
+    /* Otherwise, check if all the programs (variants) this playlist is in are
+     * discarded. Since all streams in the playlist are part of the same programs
+     * we can just check the programs of the first stream. */
+
+    first_st = pls->main_streams[0]->index;
+
+    for (i = 0; i < s->nb_programs; i++) {
+        AVProgram *program = s->programs[i];
+        if (program->discard < AVDISCARD_ALL) {
+            for (j = 0; j < program->nb_stream_indexes; j++) {
+                if (program->stream_index[j] == first_st) {
+                    /* playlist is in an undiscarded program */
+                    return 1;
+                }
+            }
+        }
+    }
+
+    /* some streams were not discarded but all the programs were */
+    return 0;
+}
+
 static int read_data(void *opaque, uint8_t *buf, int buf_size)
 {
     struct playlist *v = opaque;
     HLSContext *c = v->parent->priv_data;
-    int ret, i;
+    int ret;
     int just_opened = 0;
     int reload_count = 0;
+    struct segment *seg;
 
 restart:
     if (!v->needed)
         return AVERROR_EOF;
 
-    if (!v->input) {
+    if (!v->input || (c->http_persistent && v->input_read_done)) {
         int64_t reload_interval;
-        struct segment *seg;
 
         /* Check that the playlist is still needed before opening a new
          * segment. */
-        if (v->ctx && v->ctx->nb_streams) {
-            v->needed = 0;
-            for (i = 0; i < v->n_main_streams; i++) {
-                if (v->main_streams[i]->discard < AVDISCARD_ALL) {
-                    v->needed = 1;
-                    break;
-                }
-            }
-        }
+        v->needed = playlist_needed(v);
+
         if (!v->needed) {
             av_log(v->parent, AV_LOG_INFO, "No longer receiving playlist %d\n",
                 v->index);
@@ -1302,8 +1408,9 @@
         if (!v->finished &&
             av_gettime_relative() - v->last_load_time >= reload_interval) {
             if ((ret = parse_playlist(c, v->url, v, NULL)) < 0) {
-                av_log(v->parent, AV_LOG_WARNING, "Failed to reload playlist %d\n",
-                       v->index);
+                if (ret != AVERROR_EXIT)
+                    av_log(v->parent, AV_LOG_WARNING, "Failed to reload playlist %d\n",
+                           v->index);
                 return ret;
             }
             /* If we need to reload the playlist again below (if
@@ -1329,6 +1436,7 @@
             goto reload;
         }
 
+        v->input_read_done = 0;
         seg = current_segment(v);
 
         /* load/update Media Initialization Section, if any */
@@ -1336,11 +1444,18 @@
         if (ret)
             return ret;
 
-        ret = open_input(c, v, seg);
+        if (c->http_multiple == 1 && v->input_next_requested) {
+            FFSWAP(AVIOContext *, v->input, v->input_next);
+            v->input_next_requested = 0;
+            ret = 0;
+        } else {
+            ret = open_input(c, v, seg, &v->input);
+        }
         if (ret < 0) {
             if (ff_check_interrupt(c->interrupt_callback))
                 return AVERROR_EXIT;
-            av_log(v->parent, AV_LOG_WARNING, "Failed to open segment of playlist %d\n",
+            av_log(v->parent, AV_LOG_WARNING, "Failed to open segment %d of playlist %d\n",
+                   v->cur_seq_no,
                    v->index);
             v->cur_seq_no += 1;
             goto reload;
@@ -1348,6 +1463,30 @@
         just_opened = 1;
     }
 
+    if (c->http_multiple == -1) {
+        uint8_t *http_version_opt = NULL;
+        int r = av_opt_get(v->input, "http_version", AV_OPT_SEARCH_CHILDREN, &http_version_opt);
+        if (r >= 0) {
+            c->http_multiple = strncmp((const char *)http_version_opt, "1.1", 3) == 0;
+            av_freep(&http_version_opt);
+        }
+    }
+
+    seg = next_segment(v);
+    if (c->http_multiple == 1 && !v->input_next_requested &&
+        seg && seg->key_type == KEY_NONE && av_strstart(seg->url, "http", NULL)) {
+        ret = open_input(c, v, seg, &v->input_next);
+        if (ret < 0) {
+            if (ff_check_interrupt(c->interrupt_callback))
+                return AVERROR_EXIT;
+            av_log(v->parent, AV_LOG_WARNING, "Failed to open segment %d of playlist %d\n",
+                   v->cur_seq_no + 1,
+                   v->index);
+        } else {
+            v->input_next_requested = 1;
+        }
+    }
+
     if (v->init_sec_buf_read_offset < v->init_sec_data_len) {
         /* Push init section out first before first actual segment */
         int copy_size = FFMIN(v->init_sec_data_len - v->init_sec_buf_read_offset, buf_size);
@@ -1356,7 +1495,8 @@
         return copy_size;
     }
 
-    ret = read_from_url(v, current_segment(v), buf, buf_size, READ_NORMAL);
+    seg = current_segment(v);
+    ret = read_from_url(v, seg, buf, buf_size);
     if (ret > 0) {
         if (just_opened && v->is_id3_timestamped != 0) {
             /* Intercept ID3 tags here, elementary audio streams are required
@@ -1366,7 +1506,12 @@
 
         return ret;
     }
-    ff_format_io_close(v->parent, &v->input);
+    if (c->http_persistent &&
+        seg->key_type == KEY_NONE && av_strstart(seg->url, "http", NULL)) {
+        v->input_read_done = 1;
+    } else {
+        ff_format_io_close(v->parent, &v->input);
+    }
     v->cur_seq_no++;
 
     c->cur_seq_no = v->cur_seq_no;
@@ -1499,7 +1644,7 @@
 {
     HLSContext *c = s->priv_data;
     static const char * const opts[] = {
-        "headers", "http_proxy", "user_agent", "user-agent", "cookies", NULL };
+        "headers", "http_proxy", "user_agent", "cookies", "referer", "rw_timeout", NULL };
     const char * const * opt = opts;
     uint8_t *buf;
     int ret = 0;
@@ -1523,7 +1668,7 @@
     av_log(s, AV_LOG_ERROR,
            "A HLS playlist item '%s' referred to an external file '%s'. "
            "Opening this file was forbidden for security reasons\n",
-           s->filename, url);
+           s->url, url);
     return AVERROR(EPERM);
 }
 
@@ -1627,13 +1772,13 @@
     free_rendition_list(c);
 
     av_dict_free(&c->avio_opts);
+    ff_format_io_close(c->ctx, &c->playlist_pb);
 
     return 0;
 }
 
 static int hls_read_header(AVFormatContext *s)
 {
-    void *u = (s->flags & AVFMT_FLAG_CUSTOM_IO) ? NULL : s->pb;
     HLSContext *c = s->priv_data;
     int ret = 0, i;
     int highest_cur_seq_no = 0;
@@ -1646,29 +1791,15 @@
     c->first_timestamp = AV_NOPTS_VALUE;
     c->cur_timestamp = AV_NOPTS_VALUE;
 
-    if (u) {
-        // get the previous user agent & set back to null if string size is zero
-        update_options(&c->user_agent, "user_agent", u);
-
-        // get the previous cookies & set back to null if string size is zero
-        update_options(&c->cookies, "cookies", u);
-
-        // get the previous headers & set back to null if string size is zero
-        update_options(&c->headers, "headers", u);
-
-        // get the previous http proxt & set back to null if string size is zero
-        update_options(&c->http_proxy, "http_proxy", u);
-    }
-
-    if ((ret = parse_playlist(c, s->filename, NULL, s->pb)) < 0)
-        goto fail;
-
     if ((ret = save_avio_options(s)) < 0)
         goto fail;
 
     /* Some HLS servers don't like being sent the range header */
     av_dict_set(&c->avio_opts, "seekable", "0", 0);
 
+    if ((ret = parse_playlist(c, s->url, NULL, s->pb)) < 0)
+        goto fail;
+
     if (c->n_variants == 0) {
         av_log(NULL, AV_LOG_WARNING, "Empty playlist\n");
         ret = AVERROR_EOF;
@@ -1798,6 +1929,7 @@
         if (pls->id3_deferred_extra && pls->ctx->nb_streams == 1) {
             ff_id3v2_parse_apic(pls->ctx, &pls->id3_deferred_extra);
             avformat_queue_attached_pictures(pls->ctx);
+            ff_id3v2_parse_priv(pls->ctx, &pls->id3_deferred_extra);
             ff_id3v2_free_extra_meta(&pls->id3_deferred_extra);
             pls->id3_deferred_extra = NULL;
         }
@@ -1824,6 +1956,13 @@
         if (ret < 0)
             goto fail;
 
+        /*
+         * Copy any metadata from playlist to main streams, but do not set
+         * event flags.
+         */
+        if (pls->n_main_streams)
+            av_dict_copy(&pls->main_streams[0]->metadata, pls->ctx->metadata, 0);
+
         add_metadata_from_renditions(s, pls, AVMEDIA_TYPE_AUDIO);
         add_metadata_from_renditions(s, pls, AVMEDIA_TYPE_VIDEO);
         add_metadata_from_renditions(s, pls, AVMEDIA_TYPE_SUBTITLE);
@@ -1841,20 +1980,15 @@
 {
     HLSContext *c = s->priv_data;
     int i, changed = 0;
+    int cur_needed;
 
     /* Check if any new streams are needed */
-    for (i = 0; i < c->n_playlists; i++)
-        c->playlists[i]->cur_needed = 0;
-
-    for (i = 0; i < s->nb_streams; i++) {
-        AVStream *st = s->streams[i];
-        struct playlist *pls = c->playlists[s->streams[i]->id];
-        if (st->discard < AVDISCARD_ALL)
-            pls->cur_needed = 1;
-    }
     for (i = 0; i < c->n_playlists; i++) {
         struct playlist *pls = c->playlists[i];
-        if (pls->cur_needed && !pls->needed) {
+
+        cur_needed = playlist_needed(c->playlists[i]);
+
+        if (cur_needed && !pls->needed) {
             pls->needed = 1;
             changed = 1;
             pls->cur_seq_no = select_cur_seq_no(c, pls);
@@ -1866,9 +2000,13 @@
                 pls->seek_stream_index = -1;
             }
             av_log(s, AV_LOG_INFO, "Now receiving playlist %d, segment %d\n", i, pls->cur_seq_no);
-        } else if (first && !pls->cur_needed && pls->needed) {
+        } else if (first && !cur_needed && pls->needed) {
             if (pls->input)
                 ff_format_io_close(pls->parent, &pls->input);
+            pls->input_read_done = 0;
+            if (pls->input_next)
+                ff_format_io_close(pls->parent, &pls->input_next);
+            pls->input_next_requested = 0;
             pls->needed = 0;
             changed = 1;
             av_log(s, AV_LOG_INFO, "No longer receiving playlist %d\n", i);
@@ -2009,6 +2147,17 @@
             return ret;
         }
 
+        // If sub-demuxer reports updated metadata, copy it to the first stream
+        // and set its AVSTREAM_EVENT_FLAG_METADATA_UPDATED flag.
+        if (pls->ctx->event_flags & AVFMT_EVENT_FLAG_METADATA_UPDATED) {
+            if (pls->n_main_streams) {
+                st = pls->main_streams[0];
+                av_dict_copy(&st->metadata, pls->ctx->metadata, 0);
+                st->event_flags |= AVSTREAM_EVENT_FLAG_METADATA_UPDATED;
+            }
+            pls->ctx->event_flags &= ~AVFMT_EVENT_FLAG_METADATA_UPDATED;
+        }
+
         /* check if noheader flag has been cleared by the subdemuxer */
         if (pls->has_noheader_flag && !(pls->ctx->ctx_flags & AVFMTCTX_NOHEADER)) {
             pls->has_noheader_flag = 0;
@@ -2060,8 +2209,7 @@
     int stream_subdemuxer_index;
     int64_t first_timestamp, seek_timestamp, duration;
 
-    if ((flags & AVSEEK_FLAG_BYTE) ||
-        !(c->variants[0]->playlists[0]->finished || c->variants[0]->playlists[0]->type == PLS_TYPE_EVENT))
+    if ((flags & AVSEEK_FLAG_BYTE) || (c->ctx->ctx_flags & AVFMTCTX_UNSEEKABLE))
         return AVERROR(ENOSYS);
 
     first_timestamp = c->first_timestamp == AV_NOPTS_VALUE ?
@@ -2103,6 +2251,10 @@
         struct playlist *pls = c->playlists[i];
         if (pls->input)
             ff_format_io_close(pls->parent, &pls->input);
+        pls->input_read_done = 0;
+        if (pls->input_next)
+            ff_format_io_close(pls->parent, &pls->input_next);
+        pls->input_next_requested = 0;
         av_packet_unref(&pls->pkt);
         reset_packet(&pls->pkt);
         pls->pb.eof_reached = 0;
@@ -2157,6 +2309,10 @@
         INT_MIN, INT_MAX, FLAGS},
     {"max_reload", "Maximum number of times a insufficient list is attempted to be reloaded",
         OFFSET(max_reload), AV_OPT_TYPE_INT, {.i64 = 1000}, 0, INT_MAX, FLAGS},
+    {"http_persistent", "Use persistent HTTP connections",
+        OFFSET(http_persistent), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
+    {"http_multiple", "Use multiple HTTP connections for fetching segments",
+        OFFSET(http_multiple), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, FLAGS},
     {NULL}
 };
 
@@ -2172,6 +2328,7 @@
     .long_name      = NULL_IF_CONFIG_SMALL("Apple HTTP Live Streaming"),
     .priv_class     = &hls_class,
     .priv_data_size = sizeof(HLSContext),
+    .flags          = AVFMT_NOGENSEARCH,
     .read_probe     = hls_probe,
     .read_header    = hls_read_header,
     .read_packet    = hls_read_packet,

diff --git a/libavformat/hlsenc.c b/libavformat/hlsenc.c
index e0cef8b..28c2dd6 100644
--- a/libavformat/hlsenc.c
+++ b/libavformat/hlsenc.c

@@ -1,6 +1,7 @@
 /*
  * Apple HTTP Live Streaming segmenter
  * Copyright (c) 2012, Luca Barbato
+ * Copyright (c) 2017 Akamai Technologies, Inc.
  *
  * This file is part of FFmpeg.
  *
@@ -44,6 +45,10 @@
 
 #include "avformat.h"
 #include "avio_internal.h"
+#if CONFIG_HTTP_PROTOCOL
+#include "http.h"
+#endif
+#include "hlsplaylist.h"
 #include "internal.h"
 #include "os_support.h"
 
@@ -53,9 +58,15 @@
   HLS_START_SEQUENCE_AS_FORMATTED_DATETIME = 2,  // YYYYMMDDhhmmss
 } StartSequenceSourceType;
 
+typedef enum {
+    CODEC_ATTRIBUTE_WRITTEN = 0,
+    CODEC_ATTRIBUTE_WILL_NOT_BE_WRITTEN,
+} CodecAttributeStatus;
+
 #define KEYSIZE 16
 #define LINE_BUFFER_SIZE 1024
 #define HLS_MICROSECOND_UNIT   1000000
+#define POSTFIX_PATTERN "_%d"
 
 typedef struct HLSSegment {
     char filename[1024];
@@ -64,6 +75,7 @@
     int discont;
     int64_t pos;
     int64_t size;
+    unsigned var_stream_idx;
 
     char key_uri[LINE_BUFFER_SIZE + 1];
     char iv_string[KEYSIZE*2 + 1];
@@ -86,6 +98,7 @@
     HLS_SECOND_LEVEL_SEGMENT_SIZE = (1 << 10), // include segment size (bytes) in segment filenames when use_localtime  e.g.: %%014s
     HLS_TEMP_FILE = (1 << 11),
     HLS_PERIODIC_REKEY = (1 << 12),
+    HLS_INDEPENDENT_SEGMENTS = (1 << 13),
 } HLSFlags;
 
 typedef enum {
@@ -93,19 +106,10 @@
     SEGMENT_TYPE_FMP4,
 } SegmentType;
 
-typedef enum {
-    PLAYLIST_TYPE_NONE,
-    PLAYLIST_TYPE_EVENT,
-    PLAYLIST_TYPE_VOD,
-    PLAYLIST_TYPE_NB,
-} PlaylistType;
-
-typedef struct HLSContext {
-    const AVClass *class;  // Class for private options.
+typedef struct VariantStream {
+    unsigned var_stream_idx;
     unsigned number;
     int64_t sequence;
-    int64_t start_sequence;
-    uint32_t start_sequence_source_type;  // enum StartSequenceSourceType
     AVOutputFormat *oformat;
     AVOutputFormat *vtt_oformat;
     AVIOContext *out;
@@ -115,23 +119,6 @@
     AVFormatContext *avf;
     AVFormatContext *vtt_avf;
 
-    float time;            // Set by a private option.
-    float init_time;       // Set by a private option.
-    int max_nb_segments;   // Set by a private option.
-#if FF_API_HLS_WRAP
-    int  wrap;             // Set by a private option.
-#endif
-    uint32_t flags;        // enum HLSFlags
-    uint32_t pl_type;      // enum PlaylistType
-    char *segment_filename;
-    char *fmp4_init_filename;
-    int segment_type;
-    int fmp4_init_mode;
-
-    int use_localtime;      ///< flag to expand filename with localtime
-    int use_localtime_mkdir;///< flag to mkdir dirname in timebased filename
-    int allowcache;
-    int64_t recording_time;
     int has_video;
     int has_subtitle;
     int new_start;
@@ -141,19 +128,67 @@
     double duration;      // last segment duration computed so far, in seconds
     int64_t start_pos;    // last segment starting position
     int64_t size;         // last segment size
-    int64_t max_seg_size; // every segment file max size
     int nb_entries;
     int discontinuity_set;
     int discontinuity;
+    int reference_stream_index;
 
     HLSSegment *segments;
     HLSSegment *last_segment;
     HLSSegment *old_segments;
 
     char *basename;
-    char *base_output_dirname;
     char *vtt_basename;
     char *vtt_m3u8_name;
+    char *m3u8_name;
+
+    double initial_prog_date_time;
+    char current_segment_final_filename_fmt[1024]; // when renaming segments
+
+    char *fmp4_init_filename;
+    char *base_output_dirname;
+    int fmp4_init_mode;
+
+    AVStream **streams;
+    char codec_attr[128];
+    CodecAttributeStatus attr_status;
+    unsigned int nb_streams;
+    int m3u8_created; /* status of media play-list creation */
+    char *agroup; /* audio group name */
+    char *ccgroup; /* closed caption group name */
+    char *baseurl;
+} VariantStream;
+
+typedef struct ClosedCaptionsStream {
+    char *ccgroup; /* closed caption group name */
+    char *instreamid; /* closed captions INSTREAM-ID */
+    char *language; /* closed captions langauge */
+} ClosedCaptionsStream;
+
+typedef struct HLSContext {
+    const AVClass *class;  // Class for private options.
+    int64_t start_sequence;
+    uint32_t start_sequence_source_type;  // enum StartSequenceSourceType
+
+    float time;            // Set by a private option.
+    float init_time;       // Set by a private option.
+    int max_nb_segments;   // Set by a private option.
+    int hls_delete_threshold; // Set by a private option.
+#if FF_API_HLS_WRAP
+    int  wrap;             // Set by a private option.
+#endif
+    uint32_t flags;        // enum HLSFlags
+    uint32_t pl_type;      // enum PlaylistType
+    char *segment_filename;
+    char *fmp4_init_filename;
+    int segment_type;
+
+    int use_localtime;      ///< flag to expand filename with localtime
+    int use_localtime_mkdir;///< flag to mkdir dirname in timebased filename
+    int allowcache;
+    int64_t recording_time;
+    int64_t max_seg_size; // every segment file max size
+
     char *baseurl;
     char *format_options_str;
     char *vtt_format_options_str;
@@ -165,6 +200,7 @@
     char *key_url;
     char *iv;
     char *key_basename;
+    int encrypt_started;
 
     char *key_info_file;
     char key_file[LINE_BUFFER_SIZE + 1];
@@ -174,54 +210,61 @@
     AVDictionary *vtt_format_options;
 
     char *method;
-
-    double initial_prog_date_time;
-    char current_segment_final_filename_fmt[1024]; // when renaming segments
     char *user_agent;
+
+    VariantStream *var_streams;
+    unsigned int nb_varstreams;
+    ClosedCaptionsStream *cc_streams;
+    unsigned int nb_ccstreams;
+
+    int master_m3u8_created; /* status of master play-list creation */
+    char *master_m3u8_url; /* URL of the master m3u8 file */
+    int version; /* HLS version */
+    char *var_stream_map; /* user specified variant stream map string */
+    char *cc_stream_map; /* user specified closed caption streams map string */
+    char *master_pl_name;
+    unsigned int master_publish_rate;
+    int http_persistent;
+    AVIOContext *m3u8_out;
+    AVIOContext *sub_m3u8_out;
+    int64_t timeout;
 } HLSContext;
 
-static int get_int_from_double(double val)
-{
-    return (int)((val - (int)val) >= 0.001) ? (int)(val + 1) : (int)val;
+static int hlsenc_io_open(AVFormatContext *s, AVIOContext **pb, char *filename,
+                          AVDictionary **options) {
+    HLSContext *hls = s->priv_data;
+    int http_base_proto = filename ? ff_is_http_proto(filename) : 0;
+    int err = AVERROR_MUXER_NOT_FOUND;
+    if (!*pb || !http_base_proto || !hls->http_persistent) {
+        err = s->io_open(s, pb, filename, AVIO_FLAG_WRITE, options);
+#if CONFIG_HTTP_PROTOCOL
+    } else {
+        URLContext *http_url_context = ffio_geturlcontext(*pb);
+        av_assert0(http_url_context);
+        err = ff_http_do_new_request(http_url_context, filename);
+#endif
+    }
+    return err;
 }
 
-static int mkdir_p(const char *path) {
-    int ret = 0;
-    char *temp = av_strdup(path);
-    char *pos = temp;
-    char tmp_ch = '\0';
-
-    if (!path || !temp) {
-        return -1;
+static void hlsenc_io_close(AVFormatContext *s, AVIOContext **pb, char *filename) {
+    HLSContext *hls = s->priv_data;
+    int http_base_proto = filename ? ff_is_http_proto(filename) : 0;
+    if (!http_base_proto || !hls->http_persistent || hls->key_info_file || hls->encrypt) {
+        ff_format_io_close(s, pb);
+#if CONFIG_HTTP_PROTOCOL
+    } else {
+        URLContext *http_url_context = ffio_geturlcontext(*pb);
+        av_assert0(http_url_context);
+        avio_flush(*pb);
+        ffurl_shutdown(http_url_context, AVIO_FLAG_WRITE);
+#endif
     }
-
-    if (!strncmp(temp, "/", 1) || !strncmp(temp, "\\", 1)) {
-        pos++;
-    } else if (!strncmp(temp, "./", 2) || !strncmp(temp, ".\\", 2)) {
-        pos += 2;
-    }
-
-    for ( ; *pos != '\0'; ++pos) {
-        if (*pos == '/' || *pos == '\\') {
-            tmp_ch = *pos;
-            *pos = '\0';
-            ret = mkdir(temp, 0755);
-            *pos = tmp_ch;
-        }
-    }
-
-    if ((*(pos - 1) != '/') || (*(pos - 1) != '\\')) {
-        ret = mkdir(temp, 0755);
-    }
-
-    av_free(temp);
-    return ret;
 }
 
 static void set_http_options(AVFormatContext *s, AVDictionary **options, HLSContext *c)
 {
-    const char *proto = avio_find_protocol_name(s->filename);
-    int http_base_proto = proto ? (!av_strcasecmp(proto, "http") || !av_strcasecmp(proto, "https")) : 0;
+    int http_base_proto = ff_is_http_proto(s->url);
 
     if (c->method) {
         av_dict_set(options, "method", c->method, 0);
@@ -231,17 +274,68 @@
     }
     if (c->user_agent)
         av_dict_set(options, "user_agent", c->user_agent, 0);
-
+    if (c->http_persistent)
+        av_dict_set_int(options, "multiple_requests", 1, 0);
+    if (c->timeout >= 0)
+        av_dict_set_int(options, "timeout", c->timeout, 0);
 }
 
-static int replace_int_data_in_filename(char *buf, int buf_size, const char *filename, char placeholder, int64_t number)
+static void write_codec_attr(AVStream *st, VariantStream *vs) {
+    int codec_strlen = strlen(vs->codec_attr);
+    char attr[32];
+
+    if (st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE)
+        return;
+    if (vs->attr_status == CODEC_ATTRIBUTE_WILL_NOT_BE_WRITTEN)
+        return;
+
+    if (st->codecpar->codec_id == AV_CODEC_ID_H264) {
+        uint8_t *data = st->codecpar->extradata;
+        if (data && (data[0] | data[1] | data[2]) == 0 && data[3] == 1 && (data[4] & 0x1F) == 7) {
+            snprintf(attr, sizeof(attr),
+                     "avc1.%02x%02x%02x", data[5], data[6], data[7]);
+        } else {
+            goto fail;
+        }
+    } else if (st->codecpar->codec_id == AV_CODEC_ID_MP2) {
+        snprintf(attr, sizeof(attr), "mp4a.40.33");
+    } else if (st->codecpar->codec_id == AV_CODEC_ID_MP3) {
+        snprintf(attr, sizeof(attr), "mp4a.40.34");
+    } else if (st->codecpar->codec_id == AV_CODEC_ID_AAC) {
+        /* TODO : For HE-AAC, HE-AACv2, the last digit needs to be set to 5 and 29 respectively */
+        snprintf(attr, sizeof(attr), "mp4a.40.2");
+    } else if (st->codecpar->codec_id == AV_CODEC_ID_AC3) {
+        snprintf(attr, sizeof(attr), "ac-3");
+    } else if (st->codecpar->codec_id == AV_CODEC_ID_EAC3) {
+        snprintf(attr, sizeof(attr), "ec-3");
+    } else {
+        goto fail;
+    }
+    // Don't write the same attribute multiple times
+    if (!av_stristr(vs->codec_attr, attr)) {
+        snprintf(vs->codec_attr + codec_strlen,
+                 sizeof(vs->codec_attr) - codec_strlen,
+                 "%s%s", codec_strlen ? "," : "", attr);
+    }
+    return;
+
+fail:
+    vs->codec_attr[0] = '\0';
+    vs->attr_status = CODEC_ATTRIBUTE_WILL_NOT_BE_WRITTEN;
+    return;
+}
+
+static int replace_int_data_in_filename(char **s, const char *filename, char placeholder, int64_t number)
 {
     const char *p;
-    char *q, buf1[20], c;
-    int nd, len, addchar_count;
+    char *new_filename;
+    char c;
+    int nd, addchar_count;
     int found_count = 0;
+    AVBPrint buf;
 
-    q = buf;
+    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+
     p = filename;
     for (;;) {
         c = *p;
@@ -258,13 +352,7 @@
             }
 
             if (*(p + addchar_count) == placeholder) {
-                len = snprintf(buf1, sizeof(buf1), "%0*"PRId64, (number < 0) ? nd : nd++, number);
-                if (len < 1)  // returned error or empty buf1
-                    goto fail;
-                if ((q - buf + len) > buf_size - 1)
-                    goto fail;
-                memcpy(q, buf1, len);
-                q += len;
+                av_bprintf(&buf, "%0*"PRId64, (number < 0) ? nd : nd++, number);
                 p += (addchar_count + 1);
                 addchar_count = 0;
                 found_count++;
@@ -273,17 +361,17 @@
         } else
             addchar_count = 1;
 
-        while (addchar_count--)
-            if ((q - buf) < buf_size - 1)
-                *q++ = *p++;
-            else
-                goto fail;
+        av_bprint_append_data(&buf, p, addchar_count);
+        p += addchar_count;
     }
-    *q = '\0';
+    if (!av_bprint_is_complete(&buf)) {
+        av_bprint_finalize(&buf, NULL);
+        return -1;
+    }
+    if (av_bprint_finalize(&buf, &new_filename) < 0 || !new_filename)
+        return -1;
+    *s = new_filename;
     return found_count;
-fail:
-    *q = '\0';
-    return -1;
 }
 
 static void write_styp(AVIOContext *pb)
@@ -296,39 +384,70 @@
     ffio_wfourcc(pb, "msix");
 }
 
-static int hls_delete_old_segments(AVFormatContext *s, HLSContext *hls) {
+static int flush_dynbuf(VariantStream *vs, int *range_length)
+{
+    AVFormatContext *ctx = vs->avf;
+    uint8_t *buffer;
+
+    if (!ctx->pb) {
+        return AVERROR(EINVAL);
+    }
+
+    // flush
+    av_write_frame(ctx, NULL);
+    avio_flush(ctx->pb);
+
+    // write out to file
+    *range_length = avio_close_dyn_buf(ctx->pb, &buffer);
+    ctx->pb = NULL;
+    avio_write(vs->out, buffer, *range_length);
+    av_free(buffer);
+
+    // re-open buffer
+    return avio_open_dyn_buf(&ctx->pb);
+}
+
+static int hls_delete_old_segments(AVFormatContext *s, HLSContext *hls,
+                                   VariantStream *vs) {
 
     HLSSegment *segment, *previous_segment = NULL;
     float playlist_duration = 0.0f;
     int ret = 0, path_size, sub_path_size;
+    int segment_cnt = 0;
     char *dirname = NULL, *p, *sub_path;
     char *path = NULL;
     AVDictionary *options = NULL;
     AVIOContext *out = NULL;
     const char *proto = NULL;
 
-    segment = hls->segments;
+    segment = vs->segments;
     while (segment) {
         playlist_duration += segment->duration;
         segment = segment->next;
     }
 
-    segment = hls->old_segments;
+    segment = vs->old_segments;
+    segment_cnt = 0;
     while (segment) {
         playlist_duration -= segment->duration;
         previous_segment = segment;
         segment = previous_segment->next;
+        segment_cnt++;
         if (playlist_duration <= -previous_segment->duration) {
             previous_segment->next = NULL;
             break;
         }
+        if (segment_cnt >= hls->hls_delete_threshold) {
+            previous_segment->next = NULL;
+            break;
+        }
     }
 
     if (segment && !hls->use_localtime_mkdir) {
         if (hls->segment_filename) {
             dirname = av_strdup(hls->segment_filename);
         } else {
-            dirname = av_strdup(hls->avf->filename);
+            dirname = av_strdup(vs->avf->url);
         }
         if (!dirname) {
             ret = AVERROR(ENOMEM);
@@ -336,9 +455,23 @@
         }
         p = (char *)av_basename(dirname);
         *p = '\0';
+
     }
 
     while (segment) {
+        char * r_dirname = dirname;
+
+        /* if %v is present in the file's directory */
+        if (av_stristr(dirname, "%v")) {
+
+            if (replace_int_data_in_filename(&r_dirname, dirname, 'v', segment->var_stream_idx) < 1) {
+                ret = AVERROR(EINVAL);
+                goto fail;
+            }
+            av_free(dirname);
+            dirname = r_dirname;
+        }
+
         av_log(hls, AV_LOG_DEBUG, "deleting old segment %s\n",
                                   segment->filename);
         path_size =  (hls->use_localtime_mkdir ? 0 : strlen(dirname)) + strlen(segment->filename) + 1;
@@ -355,12 +488,12 @@
             av_strlcat(path, segment->filename, path_size);
         }
 
-        proto = avio_find_protocol_name(s->filename);
+        proto = avio_find_protocol_name(s->url);
         if (hls->method || (proto && !av_strcasecmp(proto, "http"))) {
             av_dict_set(&options, "method", "DELETE", 0);
-            if ((ret = hls->avf->io_open(hls->avf, &out, path, AVIO_FLAG_WRITE, &options)) < 0)
+            if ((ret = vs->avf->io_open(vs->avf, &out, path, AVIO_FLAG_WRITE, &options)) < 0)
                 goto fail;
-            ff_format_io_close(hls->avf, &out);
+            ff_format_io_close(vs->avf, &out);
         } else if (unlink(path) < 0) {
             av_log(hls, AV_LOG_ERROR, "failed to delete old segment %s: %s\n",
                                      path, strerror(errno));
@@ -379,11 +512,11 @@
 
             if (hls->method || (proto && !av_strcasecmp(proto, "http"))) {
                 av_dict_set(&options, "method", "DELETE", 0);
-                if ((ret = hls->avf->io_open(hls->avf, &out, sub_path, AVIO_FLAG_WRITE, &options)) < 0) {
+                if ((ret = vs->avf->io_open(vs->avf, &out, sub_path, AVIO_FLAG_WRITE, &options)) < 0) {
                     av_free(sub_path);
                     goto fail;
                 }
-                ff_format_io_close(hls->avf, &out);
+                ff_format_io_close(vs->avf, &out);
             } else if (unlink(sub_path) < 0) {
                 av_log(hls, AV_LOG_ERROR, "failed to delete old segment %s: %s\n",
                                          sub_path, strerror(errno));
@@ -417,7 +550,7 @@
     return AVERROR(EINVAL);
 }
 
-static int do_encrypt(AVFormatContext *s)
+static int do_encrypt(AVFormatContext *s, VariantStream *vs)
 {
     HLSContext *hls = s->priv_data;
     int ret;
@@ -425,12 +558,12 @@
     AVIOContext *pb;
     uint8_t key[KEYSIZE];
 
-    len = strlen(hls->basename) + 4 + 1;
+    len = strlen(s->url) + 4 + 1;
     hls->key_basename = av_mallocz(len);
     if (!hls->key_basename)
         return AVERROR(ENOMEM);
 
-    av_strlcpy(hls->key_basename, s->filename, len);
+    av_strlcpy(hls->key_basename, s->url, len);
     av_strlcat(hls->key_basename, ".key", len);
 
     if (hls->key_url) {
@@ -446,7 +579,7 @@
         char buf[33];
 
         if (!hls->iv) {
-            AV_WB64(iv + 8, hls->sequence);
+            AV_WB64(iv + 8, vs->sequence);
         } else {
             memcpy(iv, hls->iv, sizeof(iv));
         }
@@ -538,15 +671,7 @@
     return 0;
 }
 
-static int read_chomp_line(AVIOContext *s, char *buf, int maxlen)
-{
-    int len = ff_get_line(s, buf, maxlen);
-    while (len > 0 && av_isspace(buf[len - 1]))
-        buf[--len] = '\0';
-    return len;
-}
-
-static int hls_mux_init(AVFormatContext *s)
+static int hls_mux_init(AVFormatContext *s, VariantStream *vs)
 {
     AVDictionary *options = NULL;
     HLSContext *hls = s->priv_data;
@@ -555,13 +680,16 @@
     int byterange_mode = (hls->flags & HLS_SINGLE_FILE) || (hls->max_seg_size > 0);
     int i, ret;
 
-    ret = avformat_alloc_output_context2(&hls->avf, hls->oformat, NULL, NULL);
+    ret = avformat_alloc_output_context2(&vs->avf, vs->oformat, NULL, NULL);
     if (ret < 0)
         return ret;
-    oc = hls->avf;
+    oc = vs->avf;
 
-    oc->filename[0]        = '\0';
-    oc->oformat            = hls->oformat;
+    oc->url                = av_strdup("");
+    if (!oc->url)
+        return AVERROR(ENOMEM);
+
+    oc->oformat            = vs->oformat;
     oc->interrupt_callback = s->interrupt_callback;
     oc->max_delay          = s->max_delay;
     oc->opaque             = s->opaque;
@@ -569,57 +697,65 @@
     oc->io_close           = s->io_close;
     av_dict_copy(&oc->metadata, s->metadata, 0);
 
-    if(hls->vtt_oformat) {
-        ret = avformat_alloc_output_context2(&hls->vtt_avf, hls->vtt_oformat, NULL, NULL);
+    if(vs->vtt_oformat) {
+        ret = avformat_alloc_output_context2(&vs->vtt_avf, vs->vtt_oformat, NULL, NULL);
         if (ret < 0)
             return ret;
-        vtt_oc          = hls->vtt_avf;
-        vtt_oc->oformat = hls->vtt_oformat;
+        vtt_oc          = vs->vtt_avf;
+        vtt_oc->oformat = vs->vtt_oformat;
         av_dict_copy(&vtt_oc->metadata, s->metadata, 0);
     }
 
-    for (i = 0; i < s->nb_streams; i++) {
+    for (i = 0; i < vs->nb_streams; i++) {
         AVStream *st;
         AVFormatContext *loc;
-        if (s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE)
+        if (vs->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE)
             loc = vtt_oc;
         else
             loc = oc;
 
         if (!(st = avformat_new_stream(loc, NULL)))
             return AVERROR(ENOMEM);
-        avcodec_parameters_copy(st->codecpar, s->streams[i]->codecpar);
+        avcodec_parameters_copy(st->codecpar, vs->streams[i]->codecpar);
         if (!oc->oformat->codec_tag ||
-            av_codec_get_id (oc->oformat->codec_tag, s->streams[i]->codecpar->codec_tag) == st->codecpar->codec_id ||
-            av_codec_get_tag(oc->oformat->codec_tag, s->streams[i]->codecpar->codec_id) <= 0) {
-            st->codecpar->codec_tag = s->streams[i]->codecpar->codec_tag;
+            av_codec_get_id (oc->oformat->codec_tag, vs->streams[i]->codecpar->codec_tag) == st->codecpar->codec_id ||
+            av_codec_get_tag(oc->oformat->codec_tag, vs->streams[i]->codecpar->codec_id) <= 0) {
+            st->codecpar->codec_tag = vs->streams[i]->codecpar->codec_tag;
         } else {
             st->codecpar->codec_tag = 0;
         }
 
-        st->sample_aspect_ratio = s->streams[i]->sample_aspect_ratio;
-        st->time_base = s->streams[i]->time_base;
-        av_dict_copy(&st->metadata, s->streams[i]->metadata, 0);
+        st->sample_aspect_ratio = vs->streams[i]->sample_aspect_ratio;
+        st->time_base = vs->streams[i]->time_base;
+        av_dict_copy(&st->metadata, vs->streams[i]->metadata, 0);
     }
-    hls->packets_written = 1;
-    hls->start_pos = 0;
-    hls->new_start = 1;
-    hls->fmp4_init_mode = 0;
+
+    vs->packets_written = 1;
+    vs->start_pos = 0;
+    vs->new_start = 1;
+    vs->fmp4_init_mode = 0;
 
     if (hls->segment_type == SEGMENT_TYPE_FMP4) {
         if (hls->max_seg_size > 0) {
             av_log(s, AV_LOG_WARNING, "Multi-file byterange mode is currently unsupported in the HLS muxer.\n");
             return AVERROR_PATCHWELCOME;
         }
-        hls->packets_written = 0;
-        hls->init_range_length = 0;
-        hls->fmp4_init_mode = !byterange_mode;
+
+        vs->packets_written = 0;
+        vs->init_range_length = 0;
+        vs->fmp4_init_mode = !byterange_mode;
         set_http_options(s, &options, hls);
         if ((ret = avio_open_dyn_buf(&oc->pb)) < 0)
             return ret;
 
-        if ((ret = s->io_open(s, &hls->out, hls->base_output_dirname, AVIO_FLAG_WRITE, &options)) < 0) {
-            av_log(s, AV_LOG_ERROR, "Failed to open segment '%s'\n", hls->fmp4_init_filename);
+        if (byterange_mode) {
+            ret = hlsenc_io_open(s, &vs->out, vs->basename, &options);
+        } else {
+            ret = hlsenc_io_open(s, &vs->out, vs->base_output_dirname, &options);
+        }
+        av_dict_free(&options);
+        if (ret < 0) {
+            av_log(s, AV_LOG_ERROR, "Failed to open segment '%s'\n", vs->fmp4_init_filename);
             return ret;
         }
 
@@ -659,47 +795,44 @@
     return (HLSSegment *) NULL;
 }
 
-static int sls_flags_filename_process(struct AVFormatContext *s, HLSContext *hls, HLSSegment *en, double duration,
-                                         int64_t pos, int64_t size)
+static int sls_flags_filename_process(struct AVFormatContext *s, HLSContext *hls,
+                                      VariantStream *vs, HLSSegment *en,
+                                      double duration, int64_t pos, int64_t size)
 {
     if ((hls->flags & (HLS_SECOND_LEVEL_SEGMENT_SIZE | HLS_SECOND_LEVEL_SEGMENT_DURATION)) &&
-        strlen(hls->current_segment_final_filename_fmt)) {
-        av_strlcpy(hls->avf->filename, hls->current_segment_final_filename_fmt, sizeof(hls->avf->filename));
+        strlen(vs->current_segment_final_filename_fmt)) {
+        char * new_url = av_strdup(vs->current_segment_final_filename_fmt);
+        if (!new_url) {
+            av_free(en);
+            return AVERROR(ENOMEM);
+        }
+        ff_format_set_url(vs->avf, new_url);
         if (hls->flags & HLS_SECOND_LEVEL_SEGMENT_SIZE) {
-            char * filename = av_strdup(hls->avf->filename);  // %%s will be %s after strftime
-            if (!filename) {
-                av_free(en);
-                return AVERROR(ENOMEM);
-            }
-            if (replace_int_data_in_filename(hls->avf->filename, sizeof(hls->avf->filename),
-                filename, 's', pos + size) < 1) {
+            char *filename = NULL;
+            if (replace_int_data_in_filename(&filename, vs->avf->url, 's', pos + size) < 1) {
                 av_log(hls, AV_LOG_ERROR,
                        "Invalid second level segment filename template '%s', "
                         "you can try to remove second_level_segment_size flag\n",
-                       filename);
+                       vs->avf->url);
                 av_free(filename);
                 av_free(en);
                 return AVERROR(EINVAL);
             }
-            av_free(filename);
+            ff_format_set_url(vs->avf, filename);
         }
         if (hls->flags & HLS_SECOND_LEVEL_SEGMENT_DURATION) {
-            char * filename = av_strdup(hls->avf->filename);  // %%t will be %t after strftime
-            if (!filename) {
-                av_free(en);
-                return AVERROR(ENOMEM);
-            }
-            if (replace_int_data_in_filename(hls->avf->filename, sizeof(hls->avf->filename),
-                filename, 't',  (int64_t)round(duration * HLS_MICROSECOND_UNIT)) < 1) {
+            char *filename = NULL;
+            if (replace_int_data_in_filename(&filename, vs->avf->url,
+                't',  (int64_t)round(duration * HLS_MICROSECOND_UNIT)) < 1) {
                 av_log(hls, AV_LOG_ERROR,
                        "Invalid second level segment filename template '%s', "
                         "you can try to remove second_level_segment_time flag\n",
-                       filename);
+                       vs->avf->url);
                 av_free(filename);
                 av_free(en);
                 return AVERROR(EINVAL);
             }
-            av_free(filename);
+            ff_format_set_url(vs->avf, filename);
         }
     }
     return 0;
@@ -711,26 +844,26 @@
 
     if (hls->flags & HLS_SECOND_LEVEL_SEGMENT_DURATION) {
          av_log(hls, AV_LOG_ERROR,
-                "second_level_segment_duration hls_flag requires use_localtime to be true\n");
+                "second_level_segment_duration hls_flag requires strftime to be true\n");
          ret = AVERROR(EINVAL);
     }
     if (hls->flags & HLS_SECOND_LEVEL_SEGMENT_SIZE) {
          av_log(hls, AV_LOG_ERROR,
-                "second_level_segment_size hls_flag requires use_localtime to be true\n");
+                "second_level_segment_size hls_flag requires strfime to be true\n");
          ret = AVERROR(EINVAL);
     }
     if (hls->flags & HLS_SECOND_LEVEL_SEGMENT_INDEX) {
         av_log(hls, AV_LOG_ERROR,
-               "second_level_segment_index hls_flag requires use_localtime to be true\n");
+               "second_level_segment_index hls_flag requires strftime to be true\n");
         ret = AVERROR(EINVAL);
     }
 
     return ret;
 }
 
-static int sls_flag_check_duration_size(HLSContext *hls)
+static int sls_flag_check_duration_size(HLSContext *hls, VariantStream *vs)
 {
-    const char *proto = avio_find_protocol_name(hls->basename);
+    const char *proto = avio_find_protocol_name(vs->basename);
     int segment_renaming_ok = proto && !strcmp(proto, "file");
     int ret = 0;
 
@@ -748,69 +881,64 @@
     return ret;
 }
 
-static void sls_flag_file_rename(HLSContext *hls, char *old_filename) {
+static void sls_flag_file_rename(HLSContext *hls, VariantStream *vs, char *old_filename) {
     if ((hls->flags & (HLS_SECOND_LEVEL_SEGMENT_SIZE | HLS_SECOND_LEVEL_SEGMENT_DURATION)) &&
-        strlen(hls->current_segment_final_filename_fmt)) {
-        ff_rename(old_filename, hls->avf->filename, hls);
+        strlen(vs->current_segment_final_filename_fmt)) {
+        ff_rename(old_filename, vs->avf->url, hls);
     }
 }
 
-static int sls_flag_use_localtime_filename(AVFormatContext *oc, HLSContext *c)
+static int sls_flag_use_localtime_filename(AVFormatContext *oc, HLSContext *c, VariantStream *vs)
 {
     if (c->flags & HLS_SECOND_LEVEL_SEGMENT_INDEX) {
-        char * filename = av_strdup(oc->filename);  // %%d will be %d after strftime
-        if (!filename)
-            return AVERROR(ENOMEM);
-        if (replace_int_data_in_filename(oc->filename, sizeof(oc->filename),
+        char *filename = NULL;
+        if (replace_int_data_in_filename(&filename,
 #if FF_API_HLS_WRAP
-            filename, 'd', c->wrap ? c->sequence % c->wrap : c->sequence) < 1) {
+            oc->url, 'd', c->wrap ? vs->sequence % c->wrap : vs->sequence) < 1) {
 #else
-            filename, 'd', c->sequence) < 1) {
+            oc->url, 'd', vs->sequence) < 1) {
 #endif
             av_log(c, AV_LOG_ERROR, "Invalid second level segment filename template '%s', "
                     "you can try to remove second_level_segment_index flag\n",
-                   filename);
+                   oc->url);
             av_free(filename);
             return AVERROR(EINVAL);
         }
-        av_free(filename);
+        ff_format_set_url(oc, filename);
     }
     if (c->flags & (HLS_SECOND_LEVEL_SEGMENT_SIZE | HLS_SECOND_LEVEL_SEGMENT_DURATION)) {
-        av_strlcpy(c->current_segment_final_filename_fmt, oc->filename,
-                   sizeof(c->current_segment_final_filename_fmt));
+        av_strlcpy(vs->current_segment_final_filename_fmt, oc->url,
+                   sizeof(vs->current_segment_final_filename_fmt));
         if (c->flags & HLS_SECOND_LEVEL_SEGMENT_SIZE) {
-            char * filename = av_strdup(oc->filename);  // %%s will be %s after strftime
-            if (!filename)
-                return AVERROR(ENOMEM);
-            if (replace_int_data_in_filename(oc->filename, sizeof(oc->filename), filename, 's', 0) < 1) {
+            char *filename = NULL;
+            if (replace_int_data_in_filename(&filename, oc->url, 's', 0) < 1) {
                 av_log(c, AV_LOG_ERROR, "Invalid second level segment filename template '%s', "
                         "you can try to remove second_level_segment_size flag\n",
-                       filename);
+                       oc->url);
                 av_free(filename);
                 return AVERROR(EINVAL);
             }
-            av_free(filename);
+            ff_format_set_url(oc, filename);
         }
         if (c->flags & HLS_SECOND_LEVEL_SEGMENT_DURATION) {
-            char * filename = av_strdup(oc->filename);  // %%t will be %t after strftime
-            if (!filename)
-                return AVERROR(ENOMEM);
-            if (replace_int_data_in_filename(oc->filename, sizeof(oc->filename), filename, 't', 0) < 1) {
+            char *filename = NULL;
+            if (replace_int_data_in_filename(&filename, oc->url, 't', 0) < 1) {
                 av_log(c, AV_LOG_ERROR, "Invalid second level segment filename template '%s', "
                         "you can try to remove second_level_segment_time flag\n",
-                       filename);
+                       oc->url);
                 av_free(filename);
                 return AVERROR(EINVAL);
             }
-            av_free(filename);
+            ff_format_set_url(oc, filename);
         }
     }
     return 0;
 }
 
 /* Create a new segment and append it to the segment list */
-static int hls_append_segment(struct AVFormatContext *s, HLSContext *hls, double duration,
-                              int64_t pos, int64_t size)
+static int hls_append_segment(struct AVFormatContext *s, HLSContext *hls,
+                              VariantStream *vs, double duration, int64_t pos,
+                              int64_t size)
 {
     HLSSegment *en = av_malloc(sizeof(*en));
     const char  *filename;
@@ -820,24 +948,25 @@
     if (!en)
         return AVERROR(ENOMEM);
 
-    ret = sls_flags_filename_process(s, hls, en, duration, pos, size);
+    en->var_stream_idx = vs->var_stream_idx;
+    ret = sls_flags_filename_process(s, hls, vs, en, duration, pos, size);
     if (ret < 0) {
         return ret;
     }
 
-    filename = av_basename(hls->avf->filename);
+    filename = av_basename(vs->avf->url);
 
     if (hls->use_localtime_mkdir) {
-        filename = hls->avf->filename;
+        filename = vs->avf->url;
     }
-    if ((find_segment_by_filename(hls->segments, filename) || find_segment_by_filename(hls->old_segments, filename))
+    if ((find_segment_by_filename(vs->segments, filename) || find_segment_by_filename(vs->old_segments, filename))
         && !byterange_mode) {
         av_log(hls, AV_LOG_WARNING, "Duplicated segment filename detected: %s\n", filename);
     }
     av_strlcpy(en->filename, filename, sizeof(en->filename));
 
-    if(hls->has_subtitle)
-        av_strlcpy(en->sub_filename, av_basename(hls->vtt_avf->filename), sizeof(en->sub_filename));
+    if(vs->has_subtitle)
+        av_strlcpy(en->sub_filename, av_basename(vs->vtt_avf->url), sizeof(en->sub_filename));
     else
         en->sub_filename[0] = '\0';
 
@@ -847,9 +976,9 @@
     en->next     = NULL;
     en->discont  = 0;
 
-    if (hls->discontinuity) {
+    if (vs->discontinuity) {
         en->discont = 1;
-        hls->discontinuity = 0;
+        vs->discontinuity = 0;
     }
 
     if (hls->key_info_file || hls->encrypt) {
@@ -857,45 +986,45 @@
         av_strlcpy(en->iv_string, hls->iv_string, sizeof(en->iv_string));
     }
 
-    if (!hls->segments)
-        hls->segments = en;
+    if (!vs->segments)
+        vs->segments = en;
     else
-        hls->last_segment->next = en;
+        vs->last_segment->next = en;
 
-    hls->last_segment = en;
+    vs->last_segment = en;
 
     // EVENT or VOD playlists imply sliding window cannot be used
     if (hls->pl_type != PLAYLIST_TYPE_NONE)
         hls->max_nb_segments = 0;
 
-    if (hls->max_nb_segments && hls->nb_entries >= hls->max_nb_segments) {
-        en = hls->segments;
-        hls->initial_prog_date_time += en->duration;
-        hls->segments = en->next;
+    if (hls->max_nb_segments && vs->nb_entries >= hls->max_nb_segments) {
+        en = vs->segments;
+        vs->initial_prog_date_time += en->duration;
+        vs->segments = en->next;
         if (en && hls->flags & HLS_DELETE_SEGMENTS &&
 #if FF_API_HLS_WRAP
                 !(hls->flags & HLS_SINGLE_FILE || hls->wrap)) {
 #else
                 !(hls->flags & HLS_SINGLE_FILE)) {
 #endif
-            en->next = hls->old_segments;
-            hls->old_segments = en;
-            if ((ret = hls_delete_old_segments(s, hls)) < 0)
+            en->next = vs->old_segments;
+            vs->old_segments = en;
+            if ((ret = hls_delete_old_segments(s, hls, vs)) < 0)
                 return ret;
         } else
             av_free(en);
     } else
-        hls->nb_entries++;
+        vs->nb_entries++;
 
     if (hls->max_seg_size > 0) {
         return 0;
     }
-    hls->sequence++;
+    vs->sequence++;
 
     return 0;
 }
 
-static int parse_playlist(AVFormatContext *s, const char *url)
+static int parse_playlist(AVFormatContext *s, const char *url, VariantStream *vs)
 {
     HLSContext *hls = s->priv_data;
     AVIOContext *in;
@@ -910,32 +1039,32 @@
                                    s->protocol_whitelist, s->protocol_blacklist)) < 0)
         return ret;
 
-    read_chomp_line(in, line, sizeof(line));
+    ff_get_chomp_line(in, line, sizeof(line));
     if (strcmp(line, "#EXTM3U")) {
         ret = AVERROR_INVALIDDATA;
         goto fail;
     }
 
-    hls->discontinuity = 0;
+    vs->discontinuity = 0;
     while (!avio_feof(in)) {
-        read_chomp_line(in, line, sizeof(line));
+        ff_get_chomp_line(in, line, sizeof(line));
         if (av_strstart(line, "#EXT-X-MEDIA-SEQUENCE:", &ptr)) {
             int64_t tmp_sequence = strtoll(ptr, NULL, 10);
-            if (tmp_sequence < hls->sequence)
+            if (tmp_sequence < vs->sequence)
               av_log(hls, AV_LOG_VERBOSE,
                      "Found playlist sequence number was smaller """
                      "than specified start sequence number: %"PRId64" < %"PRId64", "
                      "omitting\n", tmp_sequence, hls->start_sequence);
             else {
               av_log(hls, AV_LOG_DEBUG, "Found playlist sequence number: %"PRId64"\n", tmp_sequence);
-              hls->sequence = tmp_sequence;
+              vs->sequence = tmp_sequence;
             }
         } else if (av_strstart(line, "#EXT-X-DISCONTINUITY", &ptr)) {
             is_segment = 1;
-            hls->discontinuity = 1;
+            vs->discontinuity = 1;
         } else if (av_strstart(line, "#EXTINF:", &ptr)) {
             is_segment = 1;
-            hls->duration = atof(ptr);
+            vs->duration = atof(ptr);
         } else if (av_stristart(line, "#EXT-X-KEY:", &ptr)) {
             ptr = av_stristr(line, "URI=\"");
             if (ptr) {
@@ -963,14 +1092,19 @@
             continue;
         } else if (line[0]) {
             if (is_segment) {
+                char *new_file = av_strdup(line);
+                if (!new_file) {
+                    ret = AVERROR(ENOMEM);
+                    goto fail;
+                }
+                ff_format_set_url(vs->avf, new_file);
                 is_segment = 0;
-                new_start_pos = avio_tell(hls->avf->pb);
-                hls->size = new_start_pos - hls->start_pos;
-                av_strlcpy(hls->avf->filename, line, sizeof(line));
-                ret = hls_append_segment(s, hls, hls->duration, hls->start_pos, hls->size);
+                new_start_pos = avio_tell(vs->avf->pb);
+                vs->size = new_start_pos - vs->start_pos;
+                ret = hls_append_segment(s, hls, vs, vs->duration, vs->start_pos, vs->size);
                 if (ret < 0)
                     goto fail;
-                hls->start_pos = new_start_pos;
+                vs->start_pos = new_start_pos;
             }
         }
     }
@@ -991,306 +1125,492 @@
     }
 }
 
-static void write_m3u8_head_block(HLSContext *hls, AVIOContext *out, int version,
-                                  int target_duration, int64_t sequence)
+static int hls_rename_temp_file(AVFormatContext *s, AVFormatContext *oc)
 {
-    avio_printf(out, "#EXTM3U\n");
-    avio_printf(out, "#EXT-X-VERSION:%d\n", version);
-    if (hls->allowcache == 0 || hls->allowcache == 1) {
-        avio_printf(out, "#EXT-X-ALLOW-CACHE:%s\n", hls->allowcache == 0 ? "NO" : "YES");
-    }
-    avio_printf(out, "#EXT-X-TARGETDURATION:%d\n", target_duration);
-    avio_printf(out, "#EXT-X-MEDIA-SEQUENCE:%"PRId64"\n", sequence);
-    av_log(hls, AV_LOG_VERBOSE, "EXT-X-MEDIA-SEQUENCE:%"PRId64"\n", sequence);
-}
+    size_t len = strlen(oc->url);
+    char *final_filename = av_strdup(oc->url);
+    int ret;
 
-static void hls_rename_temp_file(AVFormatContext *s, AVFormatContext *oc)
-{
-    size_t len = strlen(oc->filename);
-    char final_filename[sizeof(oc->filename)];
-
-    av_strlcpy(final_filename, oc->filename, len);
+    if (!final_filename)
+        return AVERROR(ENOMEM);
     final_filename[len-4] = '\0';
-    ff_rename(oc->filename, final_filename, s);
-    oc->filename[len-4] = '\0';
+    ret = ff_rename(oc->url, final_filename, s);
+    oc->url[len-4] = '\0';
+    av_freep(&final_filename);
+    return ret;
 }
 
-static int hls_window(AVFormatContext *s, int last)
+static int get_relative_url(const char *master_url, const char *media_url,
+                            char *rel_url, int rel_url_buf_size)
+{
+    char *p = NULL;
+    int base_len = -1;
+    p = strrchr(master_url, '/') ? strrchr(master_url, '/') :\
+            strrchr(master_url, '\\');
+    if (p) {
+        base_len = FFABS(p - master_url);
+        if (av_strncasecmp(master_url, media_url, base_len)) {
+            av_log(NULL, AV_LOG_WARNING, "Unable to find relative url\n");
+            return AVERROR(EINVAL);
+        }
+    }
+    av_strlcpy(rel_url, &(media_url[base_len + 1]), rel_url_buf_size);
+    return 0;
+}
+
+static int64_t get_stream_bit_rate(AVStream *stream) {
+    AVCPBProperties *props = (AVCPBProperties*)av_stream_get_side_data(
+        stream,
+        AV_PKT_DATA_CPB_PROPERTIES,
+        NULL
+    );
+
+    if (stream->codecpar->bit_rate)
+        return stream->codecpar->bit_rate;
+    else if (props)
+        return props->max_bitrate;
+
+    return 0;
+}
+
+static int create_master_playlist(AVFormatContext *s,
+                                  VariantStream * const input_vs)
+{
+    HLSContext *hls = s->priv_data;
+    VariantStream *vs, *temp_vs;
+    AVStream *vid_st, *aud_st;
+    AVDictionary *options = NULL;
+    unsigned int i, j;
+    int m3u8_name_size, ret, bandwidth;
+    char *m3u8_rel_name, *ccgroup;
+    ClosedCaptionsStream *ccs;
+
+    input_vs->m3u8_created = 1;
+    if (!hls->master_m3u8_created) {
+        /* For the first time, wait until all the media playlists are created */
+        for (i = 0; i < hls->nb_varstreams; i++)
+            if (!hls->var_streams[i].m3u8_created)
+                return 0;
+    } else {
+         /* Keep publishing the master playlist at the configured rate */
+        if (&hls->var_streams[0] != input_vs || !hls->master_publish_rate ||
+            input_vs->number % hls->master_publish_rate)
+            return 0;
+    }
+
+    set_http_options(s, &options, hls);
+
+    ret = hlsenc_io_open(s, &hls->m3u8_out, hls->master_m3u8_url, &options);
+    av_dict_free(&options);
+    if (ret < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Failed to open master play list file '%s'\n",
+                hls->master_m3u8_url);
+        goto fail;
+    }
+
+    ff_hls_write_playlist_version(hls->m3u8_out, hls->version);
+
+    for (i = 0; i < hls->nb_ccstreams; i++) {
+        ccs = &(hls->cc_streams[i]);
+        avio_printf(hls->m3u8_out, "#EXT-X-MEDIA:TYPE=CLOSED-CAPTIONS");
+        avio_printf(hls->m3u8_out, ",GROUP-ID=\"%s\"", ccs->ccgroup);
+        avio_printf(hls->m3u8_out, ",NAME=\"%s\"", ccs->instreamid);
+        if (ccs->language)
+            avio_printf(hls->m3u8_out, ",LANGUAGE=\"%s\"", ccs->language);
+        avio_printf(hls->m3u8_out, ",INSTREAM-ID=\"%s\"\n", ccs->instreamid);
+    }
+
+    /* For audio only variant streams add #EXT-X-MEDIA tag with attributes*/
+    for (i = 0; i < hls->nb_varstreams; i++) {
+        vs = &(hls->var_streams[i]);
+
+        if (vs->has_video || vs->has_subtitle || !vs->agroup)
+            continue;
+
+        m3u8_name_size = strlen(vs->m3u8_name) + 1;
+        m3u8_rel_name = av_malloc(m3u8_name_size);
+        if (!m3u8_rel_name) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        av_strlcpy(m3u8_rel_name, vs->m3u8_name, m3u8_name_size);
+        ret = get_relative_url(hls->master_m3u8_url, vs->m3u8_name,
+                               m3u8_rel_name, m3u8_name_size);
+        if (ret < 0) {
+            av_log(s, AV_LOG_ERROR, "Unable to find relative URL\n");
+            goto fail;
+        }
+
+        ff_hls_write_audio_rendition(hls->m3u8_out, vs->agroup, m3u8_rel_name, 0, 1);
+
+        av_freep(&m3u8_rel_name);
+    }
+
+    /* For variant streams with video add #EXT-X-STREAM-INF tag with attributes*/
+    for (i = 0; i < hls->nb_varstreams; i++) {
+        vs = &(hls->var_streams[i]);
+
+        m3u8_name_size = strlen(vs->m3u8_name) + 1;
+        m3u8_rel_name = av_malloc(m3u8_name_size);
+        if (!m3u8_rel_name) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        av_strlcpy(m3u8_rel_name, vs->m3u8_name, m3u8_name_size);
+        ret = get_relative_url(hls->master_m3u8_url, vs->m3u8_name,
+                               m3u8_rel_name, m3u8_name_size);
+        if (ret < 0) {
+            av_log(NULL, AV_LOG_ERROR, "Unable to find relative URL\n");
+            goto fail;
+        }
+
+        vid_st = NULL;
+        aud_st = NULL;
+        for (j = 0; j < vs->nb_streams; j++) {
+            if (vs->streams[j]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
+                vid_st = vs->streams[j];
+            else if (vs->streams[j]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
+                aud_st = vs->streams[j];
+        }
+
+        if (!vid_st && !aud_st) {
+            av_log(NULL, AV_LOG_WARNING, "Media stream not found\n");
+            continue;
+        }
+
+        /**
+         * Traverse through the list of audio only rendition streams and find
+         * the rendition which has highest bitrate in the same audio group
+         */
+        if (vs->agroup) {
+            for (j = 0; j < hls->nb_varstreams; j++) {
+                temp_vs = &(hls->var_streams[j]);
+                if (!temp_vs->has_video && !temp_vs->has_subtitle &&
+                    temp_vs->agroup &&
+                    !av_strcasecmp(temp_vs->agroup, vs->agroup)) {
+                    if (!aud_st)
+                        aud_st = temp_vs->streams[0];
+                    if (temp_vs->streams[0]->codecpar->bit_rate >
+                            aud_st->codecpar->bit_rate)
+                        aud_st = temp_vs->streams[0];
+                }
+            }
+        }
+
+        bandwidth = 0;
+        if (vid_st)
+            bandwidth += get_stream_bit_rate(vid_st);
+        if (aud_st)
+            bandwidth += get_stream_bit_rate(aud_st);
+        bandwidth += bandwidth / 10;
+
+        ccgroup = NULL;
+        if (vid_st && vs->ccgroup) {
+            /* check if this group name is available in the cc map string */
+            for (j = 0; j < hls->nb_ccstreams; j++) {
+                ccs = &(hls->cc_streams[j]);
+                if (!av_strcasecmp(ccs->ccgroup, vs->ccgroup)) {
+                    ccgroup = vs->ccgroup;
+                    break;
+                }
+            }
+            if (j == hls->nb_ccstreams)
+                av_log(NULL, AV_LOG_WARNING, "mapping ccgroup %s not found\n",
+                        vs->ccgroup);
+        }
+
+        ff_hls_write_stream_info(vid_st, hls->m3u8_out, bandwidth, m3u8_rel_name,
+                aud_st ? vs->agroup : NULL, vs->codec_attr, ccgroup);
+
+        av_freep(&m3u8_rel_name);
+    }
+fail:
+    if(ret >=0)
+        hls->master_m3u8_created = 1;
+    av_freep(&m3u8_rel_name);
+    hlsenc_io_close(s, &hls->m3u8_out, hls->master_m3u8_url);
+    return ret;
+}
+
+static int hls_window(AVFormatContext *s, int last, VariantStream *vs)
 {
     HLSContext *hls = s->priv_data;
     HLSSegment *en;
     int target_duration = 0;
     int ret = 0;
-    AVIOContext *out = NULL;
-    AVIOContext *sub_out = NULL;
     char temp_filename[1024];
-    int64_t sequence = FFMAX(hls->start_sequence, hls->sequence - hls->nb_entries);
-    int version = 3;
-    const char *proto = avio_find_protocol_name(s->filename);
-    int use_rename = proto && !strcmp(proto, "file");
+    int64_t sequence = FFMAX(hls->start_sequence, vs->sequence - vs->nb_entries);
+    const char *proto = avio_find_protocol_name(s->url);
+    int use_temp_file = proto && !strcmp(proto, "file") && (s->flags & HLS_TEMP_FILE);
     static unsigned warned_non_file;
     char *key_uri = NULL;
     char *iv_string = NULL;
     AVDictionary *options = NULL;
-    double prog_date_time = hls->initial_prog_date_time;
+    double prog_date_time = vs->initial_prog_date_time;
+    double *prog_date_time_p = (hls->flags & HLS_PROGRAM_DATE_TIME) ? &prog_date_time : NULL;
     int byterange_mode = (hls->flags & HLS_SINGLE_FILE) || (hls->max_seg_size > 0);
 
+    hls->version = 3;
     if (byterange_mode) {
-        version = 4;
+        hls->version = 4;
         sequence = 0;
     }
 
-    if (hls->segment_type == SEGMENT_TYPE_FMP4) {
-        version = 7;
+    if (hls->flags & HLS_INDEPENDENT_SEGMENTS) {
+        hls->version = 6;
     }
 
-    if (!use_rename && !warned_non_file++)
+    if (hls->segment_type == SEGMENT_TYPE_FMP4) {
+        hls->version = 7;
+    }
+
+    if (!use_temp_file && !warned_non_file++)
         av_log(s, AV_LOG_ERROR, "Cannot use rename on non file protocol, this may lead to races and temporary partial files\n");
 
     set_http_options(s, &options, hls);
-    snprintf(temp_filename, sizeof(temp_filename), use_rename ? "%s.tmp" : "%s", s->filename);
-    if ((ret = s->io_open(s, &out, temp_filename, AVIO_FLAG_WRITE, &options)) < 0)
+    snprintf(temp_filename, sizeof(temp_filename), use_temp_file ? "%s.tmp" : "%s", vs->m3u8_name);
+    if ((ret = hlsenc_io_open(s, &hls->m3u8_out, temp_filename, &options)) < 0)
         goto fail;
 
-    for (en = hls->segments; en; en = en->next) {
+    for (en = vs->segments; en; en = en->next) {
         if (target_duration <= en->duration)
-            target_duration = get_int_from_double(en->duration);
+            target_duration = lrint(en->duration);
     }
 
-    hls->discontinuity_set = 0;
-    write_m3u8_head_block(hls, out, version, target_duration, sequence);
-    if (hls->pl_type == PLAYLIST_TYPE_EVENT) {
-        avio_printf(out, "#EXT-X-PLAYLIST-TYPE:EVENT\n");
-    } else if (hls->pl_type == PLAYLIST_TYPE_VOD) {
-        avio_printf(out, "#EXT-X-PLAYLIST-TYPE:VOD\n");
-    }
+    vs->discontinuity_set = 0;
+    ff_hls_write_playlist_header(hls->m3u8_out, hls->version, hls->allowcache,
+                                 target_duration, sequence, hls->pl_type);
 
-    if((hls->flags & HLS_DISCONT_START) && sequence==hls->start_sequence && hls->discontinuity_set==0 ){
-        avio_printf(out, "#EXT-X-DISCONTINUITY\n");
-        hls->discontinuity_set = 1;
+    if((hls->flags & HLS_DISCONT_START) && sequence==hls->start_sequence && vs->discontinuity_set==0 ){
+        avio_printf(hls->m3u8_out, "#EXT-X-DISCONTINUITY\n");
+        vs->discontinuity_set = 1;
     }
-    for (en = hls->segments; en; en = en->next) {
+    if (vs->has_video && (hls->flags & HLS_INDEPENDENT_SEGMENTS)) {
+        avio_printf(hls->m3u8_out, "#EXT-X-INDEPENDENT-SEGMENTS\n");
+    }
+    for (en = vs->segments; en; en = en->next) {
         if ((hls->encrypt || hls->key_info_file) && (!key_uri || strcmp(en->key_uri, key_uri) ||
                                     av_strcasecmp(en->iv_string, iv_string))) {
-            avio_printf(out, "#EXT-X-KEY:METHOD=AES-128,URI=\"%s\"", en->key_uri);
+            avio_printf(hls->m3u8_out, "#EXT-X-KEY:METHOD=AES-128,URI=\"%s\"", en->key_uri);
             if (*en->iv_string)
-                avio_printf(out, ",IV=0x%s", en->iv_string);
-            avio_printf(out, "\n");
+                avio_printf(hls->m3u8_out, ",IV=0x%s", en->iv_string);
+            avio_printf(hls->m3u8_out, "\n");
             key_uri = en->key_uri;
             iv_string = en->iv_string;
         }
 
-        if (en->discont) {
-            avio_printf(out, "#EXT-X-DISCONTINUITY\n");
+        if ((hls->segment_type == SEGMENT_TYPE_FMP4) && (en == vs->segments)) {
+            ff_hls_write_init_file(hls->m3u8_out, (hls->flags & HLS_SINGLE_FILE) ? en->filename : vs->fmp4_init_filename,
+                                   hls->flags & HLS_SINGLE_FILE, vs->init_range_length, 0);
         }
 
-        if ((hls->segment_type == SEGMENT_TYPE_FMP4) && (en == hls->segments)) {
-            avio_printf(out, "#EXT-X-MAP:URI=\"%s\"", hls->fmp4_init_filename);
-            if (hls->flags & HLS_SINGLE_FILE) {
-                avio_printf(out, ",BYTERANGE=\"%"PRId64"@%"PRId64"\"", en->size, en->pos);
-            }
-            avio_printf(out, "\n");
+        ret = ff_hls_write_file_entry(hls->m3u8_out, en->discont, byterange_mode,
+                                      en->duration, hls->flags & HLS_ROUND_DURATIONS,
+                                      en->size, en->pos, vs->baseurl,
+                                      en->filename, prog_date_time_p);
+        if (ret < 0) {
+            av_log(s, AV_LOG_WARNING, "ff_hls_write_file_entry get error\n");
         }
-        if (hls->flags & HLS_ROUND_DURATIONS)
-            avio_printf(out, "#EXTINF:%ld,\n",  lrint(en->duration));
-        else
-            avio_printf(out, "#EXTINF:%f,\n", en->duration);
-        if (byterange_mode)
-            avio_printf(out, "#EXT-X-BYTERANGE:%"PRId64"@%"PRId64"\n",
-                        en->size, en->pos);
-
-        if (hls->flags & HLS_PROGRAM_DATE_TIME) {
-            time_t tt, wrongsecs;
-            int milli;
-            struct tm *tm, tmpbuf;
-            char buf0[128], buf1[128];
-            tt = (int64_t)prog_date_time;
-            milli = av_clip(lrint(1000*(prog_date_time - tt)), 0, 999);
-            tm = localtime_r(&tt, &tmpbuf);
-            strftime(buf0, sizeof(buf0), "%Y-%m-%dT%H:%M:%S", tm);
-            if (!strftime(buf1, sizeof(buf1), "%z", tm) || buf1[1]<'0' ||buf1[1]>'2') {
-                int tz_min, dst = tm->tm_isdst;
-                tm = gmtime_r(&tt, &tmpbuf);
-                tm->tm_isdst = dst;
-                wrongsecs = mktime(tm);
-                tz_min = (abs(wrongsecs - tt) + 30) / 60;
-                snprintf(buf1, sizeof(buf1),
-                         "%c%02d%02d",
-                         wrongsecs <= tt ? '+' : '-',
-                         tz_min / 60,
-                         tz_min % 60);
-            }
-            avio_printf(out, "#EXT-X-PROGRAM-DATE-TIME:%s.%03d%s\n", buf0, milli, buf1);
-            prog_date_time += en->duration;
-        }
-        if (hls->baseurl)
-            avio_printf(out, "%s", hls->baseurl);
-        avio_printf(out, "%s\n", en->filename);
     }
 
     if (last && (hls->flags & HLS_OMIT_ENDLIST)==0)
-        avio_printf(out, "#EXT-X-ENDLIST\n");
+        ff_hls_write_end_list(hls->m3u8_out);
 
-    if( hls->vtt_m3u8_name ) {
-        if ((ret = s->io_open(s, &sub_out, hls->vtt_m3u8_name, AVIO_FLAG_WRITE, &options)) < 0)
+    if( vs->vtt_m3u8_name ) {
+        if ((ret = hlsenc_io_open(s, &hls->sub_m3u8_out, vs->vtt_m3u8_name, &options)) < 0)
             goto fail;
-        write_m3u8_head_block(hls, sub_out, version, target_duration, sequence);
-
-        for (en = hls->segments; en; en = en->next) {
-            avio_printf(sub_out, "#EXTINF:%f,\n", en->duration);
-            if (byterange_mode)
-                 avio_printf(sub_out, "#EXT-X-BYTERANGE:%"PRIi64"@%"PRIi64"\n",
-                         en->size, en->pos);
-            if (hls->baseurl)
-                avio_printf(sub_out, "%s", hls->baseurl);
-            avio_printf(sub_out, "%s\n", en->sub_filename);
+        ff_hls_write_playlist_header(hls->sub_m3u8_out, hls->version, hls->allowcache,
+                                     target_duration, sequence, PLAYLIST_TYPE_NONE);
+        for (en = vs->segments; en; en = en->next) {
+            ret = ff_hls_write_file_entry(hls->sub_m3u8_out, 0, byterange_mode,
+                                          en->duration, 0, en->size, en->pos,
+                                          vs->baseurl, en->sub_filename, NULL);
+            if (ret < 0) {
+                av_log(s, AV_LOG_WARNING, "ff_hls_write_file_entry get error\n");
+            }
         }
 
         if (last)
-            avio_printf(sub_out, "#EXT-X-ENDLIST\n");
+            ff_hls_write_end_list(hls->sub_m3u8_out);
 
     }
 
 fail:
     av_dict_free(&options);
-    ff_format_io_close(s, &out);
-    ff_format_io_close(s, &sub_out);
-    if (ret >= 0 && use_rename)
-        ff_rename(temp_filename, s->filename, s);
+    hlsenc_io_close(s, &hls->m3u8_out, temp_filename);
+    hlsenc_io_close(s, &hls->sub_m3u8_out, vs->vtt_m3u8_name);
+    if (use_temp_file)
+        ff_rename(temp_filename, vs->m3u8_name, s);
+
+    if (ret >= 0 && hls->master_pl_name)
+        if (create_master_playlist(s, vs) < 0)
+            av_log(s, AV_LOG_WARNING, "Master playlist creation failed\n");
+
     return ret;
 }
 
-static int hls_start(AVFormatContext *s)
+static int hls_start(AVFormatContext *s, VariantStream *vs)
 {
     HLSContext *c = s->priv_data;
-    AVFormatContext *oc = c->avf;
-    AVFormatContext *vtt_oc = c->vtt_avf;
+    AVFormatContext *oc = vs->avf;
+    AVFormatContext *vtt_oc = vs->vtt_avf;
     AVDictionary *options = NULL;
+    const char *proto = avio_find_protocol_name(s->url);
+    int use_temp_file = proto && !strcmp(proto, "file") && (s->flags & HLS_TEMP_FILE);
     char *filename, iv_string[KEYSIZE*2 + 1];
     int err = 0;
 
     if (c->flags & HLS_SINGLE_FILE) {
-        av_strlcpy(oc->filename, c->basename,
-                   sizeof(oc->filename));
-        if (c->vtt_basename)
-            av_strlcpy(vtt_oc->filename, c->vtt_basename,
-                  sizeof(vtt_oc->filename));
+        char *new_name = av_strdup(vs->basename);
+        if (!new_name)
+            return AVERROR(ENOMEM);
+        ff_format_set_url(oc, new_name);
+        if (vs->vtt_basename) {
+            new_name = av_strdup(vs->vtt_basename);
+            if (!new_name)
+                return AVERROR(ENOMEM);
+            ff_format_set_url(vtt_oc, new_name);
+        }
     } else if (c->max_seg_size > 0) {
-        if (replace_int_data_in_filename(oc->filename, sizeof(oc->filename),
+        char *filename = NULL;
+        if (replace_int_data_in_filename(&filename,
 #if FF_API_HLS_WRAP
-            c->basename, 'd', c->wrap ? c->sequence % c->wrap : c->sequence) < 1) {
+            vs->basename, 'd', c->wrap ? vs->sequence % c->wrap : vs->sequence) < 1) {
 #else
-            c->basename, 'd', c->sequence) < 1) {
+            vs->basename, 'd', vs->sequence) < 1) {
 #endif
-                av_log(oc, AV_LOG_ERROR, "Invalid segment filename template '%s', you can try to use -use_localtime 1 with it\n", c->basename);
+                av_free(filename);
+                av_log(oc, AV_LOG_ERROR, "Invalid segment filename template '%s', you can try to use -strftime 1 with it\n", vs->basename);
                 return AVERROR(EINVAL);
         }
+        ff_format_set_url(oc, filename);
     } else {
         if (c->use_localtime) {
             time_t now0;
             struct tm *tm, tmpbuf;
+            int bufsize = strlen(vs->basename) + 1024;
+            char *buf = av_mallocz(bufsize);
+            if (!buf)
+                return AVERROR(ENOMEM);
             time(&now0);
             tm = localtime_r(&now0, &tmpbuf);
-            if (!strftime(oc->filename, sizeof(oc->filename), c->basename, tm)) {
-                av_log(oc, AV_LOG_ERROR, "Could not get segment filename with use_localtime\n");
+            ff_format_set_url(oc, buf);
+            if (!strftime(oc->url, bufsize, vs->basename, tm)) {
+                av_log(oc, AV_LOG_ERROR, "Could not get segment filename with strftime\n");
                 return AVERROR(EINVAL);
             }
 
-            err = sls_flag_use_localtime_filename(oc, c);
+            err = sls_flag_use_localtime_filename(oc, c, vs);
             if (err < 0) {
                 return AVERROR(ENOMEM);
             }
 
             if (c->use_localtime_mkdir) {
                 const char *dir;
-                char *fn_copy = av_strdup(oc->filename);
+                char *fn_copy = av_strdup(oc->url);
                 if (!fn_copy) {
                     return AVERROR(ENOMEM);
                 }
                 dir = av_dirname(fn_copy);
-                if (mkdir_p(dir) == -1 && errno != EEXIST) {
+                if (ff_mkdir_p(dir) == -1 && errno != EEXIST) {
                     av_log(oc, AV_LOG_ERROR, "Could not create directory %s with use_localtime_mkdir\n", dir);
                     av_free(fn_copy);
                     return AVERROR(errno);
                 }
                 av_free(fn_copy);
             }
-        } else if (replace_int_data_in_filename(oc->filename, sizeof(oc->filename),
+        } else {
+            char *filename = NULL;
+            if (replace_int_data_in_filename(&filename,
 #if FF_API_HLS_WRAP
-                   c->basename, 'd', c->wrap ? c->sequence % c->wrap : c->sequence) < 1) {
+                   vs->basename, 'd', c->wrap ? vs->sequence % c->wrap : vs->sequence) < 1) {
 #else
-                   c->basename, 'd', c->sequence) < 1) {
+                   vs->basename, 'd', vs->sequence) < 1) {
 #endif
-            av_log(oc, AV_LOG_ERROR, "Invalid segment filename template '%s' you can try to use -use_localtime 1 with it\n", c->basename);
-            return AVERROR(EINVAL);
-        }
-        if( c->vtt_basename) {
-            if (replace_int_data_in_filename(vtt_oc->filename, sizeof(vtt_oc->filename),
-#if FF_API_HLS_WRAP
-                c->vtt_basename, 'd', c->wrap ? c->sequence % c->wrap : c->sequence) < 1) {
-#else
-                c->vtt_basename, 'd', c->sequence) < 1) {
-#endif
-                av_log(vtt_oc, AV_LOG_ERROR, "Invalid segment filename template '%s'\n", c->vtt_basename);
+                av_free(filename);
+                av_log(oc, AV_LOG_ERROR, "Invalid segment filename template '%s' you can try to use -strftime 1 with it\n", vs->basename);
                 return AVERROR(EINVAL);
             }
+            ff_format_set_url(oc, filename);
+        }
+        if( vs->vtt_basename) {
+            char *filename = NULL;
+            if (replace_int_data_in_filename(&filename,
+#if FF_API_HLS_WRAP
+                vs->vtt_basename, 'd', c->wrap ? vs->sequence % c->wrap : vs->sequence) < 1) {
+#else
+                vs->vtt_basename, 'd', vs->sequence) < 1) {
+#endif
+                av_free(filename);
+                av_log(vtt_oc, AV_LOG_ERROR, "Invalid segment filename template '%s'\n", vs->vtt_basename);
+                return AVERROR(EINVAL);
+            }
+            ff_format_set_url(vtt_oc, filename);
        }
     }
-    c->number++;
+    vs->number++;
 
     set_http_options(s, &options, c);
 
-    if (c->flags & HLS_TEMP_FILE) {
-        av_strlcat(oc->filename, ".tmp", sizeof(oc->filename));
+    if (use_temp_file) {
+        char *new_name = av_asprintf("%s.tmp", oc->url);
+        if (!new_name)
+            return AVERROR(ENOMEM);
+        ff_format_set_url(oc, new_name);
     }
 
     if (c->key_info_file || c->encrypt) {
-        if (c->key_info_file && c->encrypt) {
-            av_log(s, AV_LOG_WARNING, "Cannot use both -hls_key_info_file and -hls_enc,"
-                  " will use -hls_key_info_file priority\n");
+        if (c->segment_type == SEGMENT_TYPE_FMP4) {
+            av_log(s, AV_LOG_ERROR, "Encrypted fmp4 not yet supported\n");
+            return AVERROR_PATCHWELCOME;
         }
 
-        if (c->number <= 1 || (c->flags & HLS_PERIODIC_REKEY)) {
+        if (c->key_info_file && c->encrypt) {
+            av_log(s, AV_LOG_WARNING, "Cannot use both -hls_key_info_file and -hls_enc,"
+                  " ignoring -hls_enc\n");
+        }
+
+        if (!c->encrypt_started || (c->flags & HLS_PERIODIC_REKEY)) {
             if (c->key_info_file) {
                 if ((err = hls_encryption_start(s)) < 0)
                     goto fail;
             } else {
-                if ((err = do_encrypt(s)) < 0)
+                if ((err = do_encrypt(s, vs)) < 0)
                     goto fail;
             }
+            c->encrypt_started = 1;
         }
         if ((err = av_dict_set(&options, "encryption_key", c->key_string, 0))
                 < 0)
             goto fail;
         err = av_strlcpy(iv_string, c->iv_string, sizeof(iv_string));
         if (!err)
-            snprintf(iv_string, sizeof(iv_string), "%032"PRIx64, c->sequence);
+            snprintf(iv_string, sizeof(iv_string), "%032"PRIx64, vs->sequence);
         if ((err = av_dict_set(&options, "encryption_iv", iv_string, 0)) < 0)
            goto fail;
 
-        filename = av_asprintf("crypto:%s", oc->filename);
+        filename = av_asprintf("crypto:%s", oc->url);
         if (!filename) {
             err = AVERROR(ENOMEM);
             goto fail;
         }
-        err = s->io_open(s, &oc->pb, filename, AVIO_FLAG_WRITE, &options);
+        err = hlsenc_io_open(s, &oc->pb, filename, &options);
         av_free(filename);
         av_dict_free(&options);
         if (err < 0)
             return err;
-    } else
-        if ((err = s->io_open(s, &oc->pb, oc->filename, AVIO_FLAG_WRITE, &options)) < 0)
+    } else if (c->segment_type != SEGMENT_TYPE_FMP4) {
+        if ((err = hlsenc_io_open(s, &oc->pb, oc->url, &options)) < 0)
             goto fail;
-    if (c->vtt_basename) {
+    }
+    if (vs->vtt_basename) {
         set_http_options(s, &options, c);
-        if ((err = s->io_open(s, &vtt_oc->pb, vtt_oc->filename, AVIO_FLAG_WRITE, &options)) < 0)
+        if ((err = hlsenc_io_open(s, &vtt_oc->pb, vtt_oc->url, &options)) < 0)
             goto fail;
     }
     av_dict_free(&options);
 
-    if (c->segment_type == SEGMENT_TYPE_FMP4 && !(c->flags & HLS_SINGLE_FILE)) {
-            write_styp(oc->pb);
-    } else {
+    if (c->segment_type != SEGMENT_TYPE_FMP4) {
         /* We only require one PAT/PMT per segment. */
         if (oc->oformat->priv_class && oc->priv_data) {
             char period[21];
@@ -1303,7 +1623,7 @@
         }
     }
 
-    if (c->vtt_basename) {
+    if (vs->vtt_basename) {
         err = avformat_write_header(vtt_oc,NULL);
         if (err < 0)
             return err;
@@ -1332,19 +1652,859 @@
     return (HAVE_LIBC_MSVCRT || !strftime(b, sizeof(b), "%s", p) || !strcmp(b, "%s")) ? "-%Y%m%d%H%M%S.ts" : "-%s.ts";
 }
 
+static int append_postfix(char *name, int name_buf_len, int i)
+{
+    char *p;
+    char extension[10] = {'\0'};
+
+    p = strrchr(name, '.');
+    if (p) {
+        av_strlcpy(extension, p, sizeof(extension));
+        *p = '\0';
+    }
+
+    snprintf(name + strlen(name), name_buf_len - strlen(name), POSTFIX_PATTERN, i);
+
+    if (strlen(extension))
+        av_strlcat(name, extension, name_buf_len);
+
+    return 0;
+}
+
+static int validate_name(int nb_vs, const char *fn)
+{
+    const char *filename, *subdir_name;
+    char *fn_dup = NULL;
+    int ret = 0;
+
+    if (!fn) {
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    fn_dup = av_strdup(fn);
+    if (!fn_dup) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    filename = av_basename(fn);
+    subdir_name = av_dirname(fn_dup);
+
+    if (nb_vs > 1 && !av_stristr(filename, "%v") && !av_stristr(subdir_name, "%v")) {
+        av_log(NULL, AV_LOG_ERROR, "More than 1 variant streams are present, %%v is expected in the filename %s\n",
+                fn);
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    if (av_stristr(filename, "%v") && av_stristr(subdir_name, "%v")) {
+        av_log(NULL, AV_LOG_ERROR, "%%v is expected either in filename or in the sub-directory name of file %s\n",
+                fn);
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+fail:
+    av_freep(&fn_dup);
+    return ret;
+}
+
+static int format_name(char *buf, int buf_len, int index)
+{
+    const char *proto, *dir;
+    char *orig_buf_dup = NULL, *mod_buf = NULL, *mod_buf_dup = NULL;
+    int ret = 0;
+
+    if (!av_stristr(buf, "%v"))
+        return ret;
+
+    orig_buf_dup = av_strdup(buf);
+    if (!orig_buf_dup) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    if (replace_int_data_in_filename(&mod_buf, orig_buf_dup, 'v', index) < 1) {
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+    av_strlcpy(buf, mod_buf, buf_len);
+
+    proto = avio_find_protocol_name(orig_buf_dup);
+    dir = av_dirname(orig_buf_dup);
+
+    /* if %v is present in the file's directory, create sub-directory */
+    if (av_stristr(dir, "%v") && proto && !strcmp(proto, "file")) {
+        mod_buf_dup = av_strdup(buf);
+        if (!mod_buf_dup) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        dir = av_dirname(mod_buf_dup);
+        if (ff_mkdir_p(dir) == -1 && errno != EEXIST) {
+            ret = AVERROR(errno);
+            goto fail;
+        }
+    }
+
+fail:
+    av_freep(&orig_buf_dup);
+    av_freep(&mod_buf_dup);
+    av_freep(&mod_buf);
+    return ret;
+}
+
+static int get_nth_codec_stream_index(AVFormatContext *s,
+                                      enum AVMediaType codec_type,
+                                      int stream_id)
+{
+    unsigned int stream_index, cnt;
+    if (stream_id < 0 || stream_id > s->nb_streams - 1)
+        return -1;
+    cnt = 0;
+    for (stream_index = 0; stream_index < s->nb_streams; stream_index++) {
+        if (s->streams[stream_index]->codecpar->codec_type != codec_type)
+            continue;
+        if (cnt == stream_id)
+            return stream_index;
+        cnt++;
+    }
+    return -1;
+}
+
+static int parse_variant_stream_mapstring(AVFormatContext *s)
+{
+    HLSContext *hls = s->priv_data;
+    VariantStream *vs;
+    int stream_index;
+    enum AVMediaType codec_type;
+    int nb_varstreams, nb_streams;
+    char *p, *q, *saveptr1, *saveptr2, *varstr, *keyval;
+    const char *val;
+
+    /**
+     * Expected format for var_stream_map string is as below:
+     * "a:0,v:0 a:1,v:1"
+     * "a:0,agroup:a0 a:1,agroup:a1 v:0,agroup:a0  v:1,agroup:a1"
+     * This string specifies how to group the audio, video and subtitle streams
+     * into different variant streams. The variant stream groups are separated
+     * by space.
+     *
+     * a:, v:, s: are keys to specify audio, video and subtitle streams
+     * respectively. Allowed values are 0 to 9 digits (limited just based on
+     * practical usage)
+     *
+     * agroup: is key to specify audio group. A string can be given as value.
+     */
+    p = av_strdup(hls->var_stream_map);
+    q = p;
+    while(av_strtok(q, " \t", &saveptr1)) {
+        q = NULL;
+        hls->nb_varstreams++;
+    }
+    av_freep(&p);
+
+    hls->var_streams = av_mallocz(sizeof(*hls->var_streams) * hls->nb_varstreams);
+    if (!hls->var_streams)
+        return AVERROR(ENOMEM);
+
+    p = hls->var_stream_map;
+    nb_varstreams = 0;
+    while (varstr = av_strtok(p, " \t", &saveptr1)) {
+        p = NULL;
+
+        if (nb_varstreams < hls->nb_varstreams) {
+            vs = &(hls->var_streams[nb_varstreams]);
+            vs->var_stream_idx = nb_varstreams;
+            nb_varstreams++;
+        } else
+            return AVERROR(EINVAL);
+
+        q = varstr;
+        while (q < varstr + strlen(varstr)) {
+            if (!av_strncasecmp(q, "a:", 2) || !av_strncasecmp(q, "v:", 2) ||
+                !av_strncasecmp(q, "s:", 2))
+                vs->nb_streams++;
+            q++;
+        }
+        vs->streams = av_mallocz(sizeof(AVStream *) * vs->nb_streams);
+        if (!vs->streams)
+            return AVERROR(ENOMEM);
+
+        nb_streams = 0;
+        while (keyval = av_strtok(varstr, ",", &saveptr2)) {
+            varstr = NULL;
+
+            if (av_strstart(keyval, "agroup:", &val)) {
+                vs->agroup = av_strdup(val);
+                if (!vs->agroup)
+                    return AVERROR(ENOMEM);
+                continue;
+            } else if (av_strstart(keyval, "ccgroup:", &val)) {
+                vs->ccgroup = av_strdup(val);
+                if (!vs->ccgroup)
+                    return AVERROR(ENOMEM);
+                continue;
+            } else if (av_strstart(keyval, "v:", &val)) {
+                codec_type = AVMEDIA_TYPE_VIDEO;
+            } else if (av_strstart(keyval, "a:", &val)) {
+                codec_type = AVMEDIA_TYPE_AUDIO;
+            } else if (av_strstart(keyval, "s:", &val)) {
+                codec_type = AVMEDIA_TYPE_SUBTITLE;
+            } else {
+                av_log(s, AV_LOG_ERROR, "Invalid keyval %s\n", keyval);
+                return AVERROR(EINVAL);
+            }
+
+            stream_index = -1;
+            if (av_isdigit(*val))
+                stream_index = get_nth_codec_stream_index (s, codec_type,
+                                                           atoi(val));
+
+            if (stream_index >= 0 && nb_streams < vs->nb_streams) {
+                vs->streams[nb_streams++] = s->streams[stream_index];
+            } else {
+                av_log(s, AV_LOG_ERROR, "Unable to map stream at %s\n", keyval);
+                return AVERROR(EINVAL);
+            }
+        }
+    }
+    av_log(s, AV_LOG_DEBUG, "Number of variant streams %d\n",
+            hls->nb_varstreams);
+
+    return 0;
+}
+
+static int parse_cc_stream_mapstring(AVFormatContext *s)
+{
+    HLSContext *hls = s->priv_data;
+    int nb_ccstreams;
+    char *p, *q, *ccstr, *keyval;
+    char *saveptr1 = NULL, *saveptr2 = NULL;
+    const char *val;
+    ClosedCaptionsStream *ccs;
+
+    p = av_strdup(hls->cc_stream_map);
+    q = p;
+    while(av_strtok(q, " \t", &saveptr1)) {
+        q = NULL;
+        hls->nb_ccstreams++;
+    }
+    av_freep(&p);
+
+    hls->cc_streams = av_mallocz(sizeof(*hls->cc_streams) * hls->nb_ccstreams);
+    if (!hls->cc_streams)
+        return AVERROR(ENOMEM);
+
+    p = hls->cc_stream_map;
+    nb_ccstreams = 0;
+    while (ccstr = av_strtok(p, " \t", &saveptr1)) {
+        p = NULL;
+
+        if (nb_ccstreams < hls->nb_ccstreams)
+            ccs = &(hls->cc_streams[nb_ccstreams++]);
+        else
+            return AVERROR(EINVAL);
+
+        while (keyval = av_strtok(ccstr, ",", &saveptr2)) {
+            ccstr = NULL;
+
+            if (av_strstart(keyval, "ccgroup:", &val)) {
+                ccs->ccgroup = av_strdup(val);
+                if (!ccs->ccgroup)
+                    return AVERROR(ENOMEM);
+            } else if (av_strstart(keyval, "instreamid:", &val)) {
+                ccs->instreamid = av_strdup(val);
+                if (!ccs->instreamid)
+                    return AVERROR(ENOMEM);
+            } else if (av_strstart(keyval, "language:", &val)) {
+                ccs->language = av_strdup(val);
+                if (!ccs->language)
+                    return AVERROR(ENOMEM);
+            } else {
+                av_log(s, AV_LOG_ERROR, "Invalid keyval %s\n", keyval);
+                return AVERROR(EINVAL);
+            }
+        }
+
+        if (!ccs->ccgroup || !ccs->instreamid) {
+            av_log(s, AV_LOG_ERROR, "Insufficient parameters in cc stream map string\n");
+            return AVERROR(EINVAL);
+        }
+
+        if (av_strstart(ccs->instreamid, "CC", &val)) {
+            if(atoi(val) < 1 || atoi(val) > 4) {
+                av_log(s, AV_LOG_ERROR, "Invalid instream ID CC index %d in %s, range 1-4\n",
+                       atoi(val), ccs->instreamid);
+                return AVERROR(EINVAL);
+            }
+        } else if (av_strstart(ccs->instreamid, "SERVICE", &val)) {
+            if(atoi(val) < 1 || atoi(val) > 63) {
+                av_log(s, AV_LOG_ERROR, "Invalid instream ID SERVICE index %d in %s, range 1-63 \n",
+                       atoi(val), ccs->instreamid);
+                return AVERROR(EINVAL);
+            }
+        } else {
+            av_log(s, AV_LOG_ERROR, "Invalid instream ID %s, supported are CCn or SERIVICEn\n",
+                   ccs->instreamid);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    return 0;
+}
+
+static int update_variant_stream_info(AVFormatContext *s) {
+    HLSContext *hls = s->priv_data;
+    unsigned int i;
+    int ret = 0;
+
+    if (hls->cc_stream_map) {
+        ret = parse_cc_stream_mapstring(s);
+        if (ret < 0)
+            return ret;
+    }
+
+    if (hls->var_stream_map) {
+        return parse_variant_stream_mapstring(s);
+    } else {
+        //By default, a single variant stream with all the codec streams is created
+        hls->nb_varstreams = 1;
+        hls->var_streams = av_mallocz(sizeof(*hls->var_streams) *
+                                             hls->nb_varstreams);
+        if (!hls->var_streams)
+            return AVERROR(ENOMEM);
+
+        hls->var_streams[0].var_stream_idx = 0;
+        hls->var_streams[0].nb_streams = s->nb_streams;
+        hls->var_streams[0].streams = av_mallocz(sizeof(AVStream *) *
+                                            hls->var_streams[0].nb_streams);
+        if (!hls->var_streams[0].streams)
+            return AVERROR(ENOMEM);
+
+        //by default, the first available ccgroup is mapped to the variant stream
+        if (hls->nb_ccstreams) {
+            hls->var_streams[0].ccgroup = av_strdup(hls->cc_streams[0].ccgroup);
+            if (!hls->var_streams[0].ccgroup)
+                return AVERROR(ENOMEM);
+        }
+
+        for (i = 0; i < s->nb_streams; i++)
+            hls->var_streams[0].streams[i] = s->streams[i];
+    }
+    return 0;
+}
+
+static int update_master_pl_info(AVFormatContext *s) {
+    HLSContext *hls = s->priv_data;
+    const char *dir;
+    char *fn1= NULL, *fn2 = NULL;
+    int ret = 0;
+
+    fn1 = av_strdup(s->url);
+    if (!fn1) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    dir = av_dirname(fn1);
+
+    /**
+     * if output file's directory has %v, variants are created in sub-directories
+     * then master is created at the sub-directories level
+     */
+    if (dir && av_stristr(av_basename(dir), "%v")) {
+        fn2 = av_strdup(dir);
+        if (!fn2) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        dir = av_dirname(fn2);
+    }
+
+    if (dir && strcmp(dir, "."))
+        hls->master_m3u8_url = av_append_path_component(dir, hls->master_pl_name);
+    else
+        hls->master_m3u8_url = av_strdup(hls->master_pl_name);
+
+    if (!hls->master_m3u8_url) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+fail:
+    av_freep(&fn1);
+    av_freep(&fn2);
+
+    return ret;
+}
+
 static int hls_write_header(AVFormatContext *s)
 {
     HLSContext *hls = s->priv_data;
-    int ret, i;
-    char *p = NULL;
+    int ret, i, j;
+    AVDictionary *options = NULL;
+    VariantStream *vs = NULL;
+
+    for (i = 0; i < hls->nb_varstreams; i++) {
+        vs = &hls->var_streams[i];
+
+        av_dict_copy(&options, hls->format_options, 0);
+        ret = avformat_write_header(vs->avf, &options);
+        if (av_dict_count(options)) {
+            av_log(s, AV_LOG_ERROR, "Some of provided format options in '%s' are not recognized\n", hls->format_options_str);
+            ret = AVERROR(EINVAL);
+            av_dict_free(&options);
+            goto fail;
+        }
+        av_dict_free(&options);
+        //av_assert0(s->nb_streams == hls->avf->nb_streams);
+        for (j = 0; j < vs->nb_streams; j++) {
+            AVStream *inner_st;
+            AVStream *outer_st = vs->streams[j];
+
+            if (hls->max_seg_size > 0) {
+                if ((outer_st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) &&
+                    (outer_st->codecpar->bit_rate > hls->max_seg_size)) {
+                    av_log(s, AV_LOG_WARNING, "Your video bitrate is bigger than hls_segment_size, "
+                           "(%"PRId64 " > %"PRId64 "), the result maybe not be what you want.",
+                           outer_st->codecpar->bit_rate, hls->max_seg_size);
+                }
+            }
+
+            if (outer_st->codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE)
+                inner_st = vs->avf->streams[j];
+            else if (vs->vtt_avf)
+                inner_st = vs->vtt_avf->streams[0];
+            else {
+                /* We have a subtitle stream, when the user does not want one */
+                inner_st = NULL;
+                continue;
+            }
+            avpriv_set_pts_info(outer_st, inner_st->pts_wrap_bits, inner_st->time_base.num, inner_st->time_base.den);
+            write_codec_attr(outer_st, vs);
+
+        }
+        /* Update the Codec Attr string for the mapped audio groups */
+        if (vs->has_video && vs->agroup) {
+            for (j = 0; j < hls->nb_varstreams; j++) {
+                VariantStream *vs_agroup = &(hls->var_streams[j]);
+                if (!vs_agroup->has_video && !vs_agroup->has_subtitle &&
+                    vs_agroup->agroup &&
+                    !av_strcasecmp(vs_agroup->agroup, vs->agroup)) {
+                    write_codec_attr(vs_agroup->streams[0], vs);
+                }
+            }
+        }
+    }
+fail:
+
+    return ret;
+}
+
+static int hls_write_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    HLSContext *hls = s->priv_data;
+    AVFormatContext *oc = NULL;
+    AVStream *st = s->streams[pkt->stream_index];
+    int64_t end_pts = 0;
+    int is_ref_pkt = 1;
+    int ret = 0, can_split = 1, i, j;
+    int stream_index = 0;
+    int range_length = 0;
+    const char *proto = avio_find_protocol_name(s->url);
+    int use_temp_file = proto && !strcmp(proto, "file") && (s->flags & HLS_TEMP_FILE);
+    uint8_t *buffer = NULL;
+    VariantStream *vs = NULL;
+    AVDictionary *options = NULL;
+    char *old_filename = NULL;
+
+    for (i = 0; i < hls->nb_varstreams; i++) {
+        vs = &hls->var_streams[i];
+        for (j = 0; j < vs->nb_streams; j++) {
+            if (vs->streams[j] == st) {
+                if( st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE ) {
+                    oc = vs->vtt_avf;
+                    stream_index = 0;
+                } else {
+                    oc = vs->avf;
+                    stream_index = j;
+                }
+                break;
+            }
+        }
+
+        if (oc)
+            break;
+    }
+
+    if (!oc) {
+        av_log(s, AV_LOG_ERROR, "Unable to find mapping variant stream\n");
+        return AVERROR(ENOMEM);
+    }
+
+    end_pts = hls->recording_time * vs->number;
+
+    if (vs->sequence - vs->nb_entries > hls->start_sequence && hls->init_time > 0) {
+        /* reset end_pts, hls->recording_time at end of the init hls list */
+        int init_list_dur = hls->init_time * vs->nb_entries * AV_TIME_BASE;
+        int after_init_list_dur = (vs->sequence - hls->start_sequence - vs->nb_entries ) * (hls->time * AV_TIME_BASE);
+        hls->recording_time = hls->time * AV_TIME_BASE;
+        end_pts = init_list_dur + after_init_list_dur ;
+    }
+
+    if (vs->start_pts == AV_NOPTS_VALUE) {
+        vs->start_pts = pkt->pts;
+    }
+
+    if (vs->has_video) {
+        can_split = st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
+                    ((pkt->flags & AV_PKT_FLAG_KEY) || (hls->flags & HLS_SPLIT_BY_TIME));
+        is_ref_pkt = (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) && (pkt->stream_index == vs->reference_stream_index);
+    }
+    if (pkt->pts == AV_NOPTS_VALUE)
+        is_ref_pkt = can_split = 0;
+
+    if (is_ref_pkt) {
+        if (vs->end_pts == AV_NOPTS_VALUE)
+            vs->end_pts = pkt->pts;
+        if (vs->new_start) {
+            vs->new_start = 0;
+            vs->duration = (double)(pkt->pts - vs->end_pts)
+                                       * st->time_base.num / st->time_base.den;
+            vs->dpp = (double)(pkt->duration) * st->time_base.num / st->time_base.den;
+        } else {
+            if (pkt->duration) {
+                vs->duration += (double)(pkt->duration) * st->time_base.num / st->time_base.den;
+            } else {
+                av_log(s, AV_LOG_WARNING, "pkt->duration = 0, maybe the hls segment duration will not precise\n");
+                vs->duration = (double)(pkt->pts - vs->end_pts) * st->time_base.num / st->time_base.den;
+            }
+        }
+
+    }
+
+    if (vs->packets_written && can_split && av_compare_ts(pkt->pts - vs->start_pts, st->time_base,
+                                                          end_pts, AV_TIME_BASE_Q) >= 0) {
+        int64_t new_start_pos;
+        int byterange_mode = (hls->flags & HLS_SINGLE_FILE) || (hls->max_seg_size > 0);
+
+        av_write_frame(vs->avf, NULL); /* Flush any buffered data */
+
+        new_start_pos = avio_tell(vs->avf->pb);
+        if (hls->segment_type != SEGMENT_TYPE_FMP4) {
+            vs->size = new_start_pos - vs->start_pos;
+        } else {
+            vs->size = new_start_pos;
+        }
+
+        if (hls->segment_type == SEGMENT_TYPE_FMP4) {
+            if (!vs->init_range_length) {
+                avio_flush(oc->pb);
+                range_length = avio_close_dyn_buf(oc->pb, &buffer);
+                avio_write(vs->out, buffer, range_length);
+                vs->init_range_length = range_length;
+                avio_open_dyn_buf(&oc->pb);
+                vs->packets_written = 0;
+                vs->start_pos = range_length;
+                if (!byterange_mode) {
+                    ff_format_io_close(s, &vs->out);
+                    hlsenc_io_close(s, &vs->out, vs->base_output_dirname);
+                }
+            }
+        } else {
+            if (!byterange_mode) {
+                hlsenc_io_close(s, &oc->pb, oc->url);
+            }
+        }
+        if (!byterange_mode) {
+            if (vs->vtt_avf) {
+                hlsenc_io_close(s, &vs->vtt_avf->pb, vs->vtt_avf->url);
+            }
+        }
+
+        // look to rename the asset name
+        if (use_temp_file && oc->url[0]) {
+            if (!(hls->flags & HLS_SINGLE_FILE) || (hls->max_seg_size <= 0))
+                if ((vs->avf->oformat->priv_class && vs->avf->priv_data) && hls->segment_type != SEGMENT_TYPE_FMP4) {
+                    av_opt_set(vs->avf->priv_data, "mpegts_flags", "resend_headers", 0);
+                }
+        }
+
+        if (vs->fmp4_init_mode) {
+            vs->number--;
+        }
+
+        if (hls->segment_type == SEGMENT_TYPE_FMP4) {
+            if (hls->flags & HLS_SINGLE_FILE) {
+                ret = flush_dynbuf(vs, &range_length);
+                if (ret < 0) {
+                    av_free(old_filename);
+                    return ret;
+                }
+                vs->size = range_length;
+            } else {
+                set_http_options(s, &options, hls);
+                ret = hlsenc_io_open(s, &vs->out, vs->avf->url, &options);
+                if (ret < 0) {
+                    av_log(s, AV_LOG_ERROR, "Failed to open file '%s'\n",
+                           vs->avf->url);
+                    return ret;
+                }
+                write_styp(vs->out);
+                ret = flush_dynbuf(vs, &range_length);
+                if (ret < 0) {
+                    return ret;
+                }
+                ff_format_io_close(s, &vs->out);
+
+                // rename that segment from .tmp to the real one
+                if (use_temp_file && oc->url[0]) {
+                    hls_rename_temp_file(s, oc);
+                    av_free(old_filename);
+                    old_filename = av_strdup(vs->avf->url);
+
+                    if (!old_filename) {
+                        return AVERROR(ENOMEM);
+                    }
+                }
+            }
+        }
+
+        old_filename = av_strdup(vs->avf->url);
+        if (!old_filename) {
+            return AVERROR(ENOMEM);
+        }
+
+        if (vs->start_pos || hls->segment_type != SEGMENT_TYPE_FMP4) {
+            ret = hls_append_segment(s, hls, vs, vs->duration, vs->start_pos, vs->size);
+            vs->end_pts = pkt->pts;
+            vs->duration = 0;
+            if (ret < 0) {
+                av_free(old_filename);
+                return ret;
+            }
+        }
+
+        if (hls->segment_type != SEGMENT_TYPE_FMP4) {
+            vs->start_pos = new_start_pos;
+        } else {
+            vs->start_pos += vs->size;
+        }
+
+        vs->fmp4_init_mode = 0;
+        if (hls->flags & HLS_SINGLE_FILE) {
+            vs->number++;
+        } else if (hls->max_seg_size > 0) {
+            if (vs->start_pos >= hls->max_seg_size) {
+                vs->sequence++;
+                sls_flag_file_rename(hls, vs, old_filename);
+                ret = hls_start(s, vs);
+                vs->start_pos = 0;
+                /* When split segment by byte, the duration is short than hls_time,
+                 * so it is not enough one segment duration as hls_time, */
+                vs->number--;
+            }
+            vs->number++;
+        } else {
+            sls_flag_file_rename(hls, vs, old_filename);
+            ret = hls_start(s, vs);
+        }
+        av_free(old_filename);
+
+        if (ret < 0) {
+            return ret;
+        }
+
+        // if we're building a VOD playlist, skip writing the manifest multiple times, and just wait until the end
+        if (hls->pl_type != PLAYLIST_TYPE_VOD) {
+            if ((ret = hls_window(s, 0, vs)) < 0) {
+                return ret;
+            }
+        }
+    }
+
+    vs->packets_written++;
+    ret = ff_write_chained(oc, stream_index, pkt, s, 0);
+
+    return ret;
+}
+
+static int hls_write_trailer(struct AVFormatContext *s)
+{
+    HLSContext *hls = s->priv_data;
+    AVFormatContext *oc = NULL;
+    AVFormatContext *vtt_oc = NULL;
+    char *old_filename = NULL;
+    const char *proto = avio_find_protocol_name(s->url);
+    int use_temp_file = proto && !strcmp(proto, "file") && (s->flags & HLS_TEMP_FILE);
+    int i;
+    int ret = 0;
+    VariantStream *vs = NULL;
+
+    for (i = 0; i < hls->nb_varstreams; i++) {
+        vs = &hls->var_streams[i];
+
+        oc = vs->avf;
+        vtt_oc = vs->vtt_avf;
+        old_filename = av_strdup(vs->avf->url);
+
+        if (!old_filename) {
+            return AVERROR(ENOMEM);
+        }
+        if ( hls->segment_type == SEGMENT_TYPE_FMP4) {
+            int range_length = 0;
+            if (!(hls->flags & HLS_SINGLE_FILE)) {
+                ret = hlsenc_io_open(s, &vs->out, vs->avf->url, NULL);
+                if (ret < 0) {
+                    av_log(s, AV_LOG_ERROR, "Failed to open file '%s'\n", vs->avf->url);
+                    goto failed;
+                }
+                write_styp(vs->out);
+            }
+            ret = flush_dynbuf(vs, &range_length);
+            if (ret < 0) {
+                goto failed;
+            }
+            ff_format_io_close(s, &vs->out);
+        }
+
+failed:
+        av_write_trailer(oc);
+        if (oc->pb) {
+            if (hls->segment_type != SEGMENT_TYPE_FMP4) {
+                vs->size = avio_tell(vs->avf->pb) - vs->start_pos;
+            } else {
+                vs->size = avio_tell(vs->avf->pb);
+            }
+            if (hls->segment_type != SEGMENT_TYPE_FMP4)
+                ff_format_io_close(s, &oc->pb);
+
+            // rename that segment from .tmp to the real one
+            if (use_temp_file && oc->url[0] && !(hls->flags & HLS_SINGLE_FILE)) {
+                hls_rename_temp_file(s, oc);
+                av_free(old_filename);
+                old_filename = av_strdup(vs->avf->url);
+
+                if (!old_filename) {
+                    return AVERROR(ENOMEM);
+                }
+            }
+
+            /* after av_write_trailer, then duration + 1 duration per packet */
+            hls_append_segment(s, hls, vs, vs->duration + vs->dpp, vs->start_pos, vs->size);
+        }
+
+        sls_flag_file_rename(hls, vs, old_filename);
+
+        if (vtt_oc) {
+            if (vtt_oc->pb)
+                av_write_trailer(vtt_oc);
+            vs->size = avio_tell(vs->vtt_avf->pb) - vs->start_pos;
+            ff_format_io_close(s, &vtt_oc->pb);
+        }
+        av_freep(&vs->basename);
+        av_freep(&vs->base_output_dirname);
+        avformat_free_context(oc);
+
+        vs->avf = NULL;
+        hls_window(s, 1, vs);
+
+        av_freep(&vs->fmp4_init_filename);
+        if (vtt_oc) {
+            av_freep(&vs->vtt_basename);
+            av_freep(&vs->vtt_m3u8_name);
+            avformat_free_context(vtt_oc);
+        }
+
+        hls_free_segments(vs->segments);
+        hls_free_segments(vs->old_segments);
+        av_free(old_filename);
+        av_freep(&vs->m3u8_name);
+        av_freep(&vs->streams);
+        av_freep(&vs->agroup);
+        av_freep(&vs->ccgroup);
+        av_freep(&vs->baseurl);
+    }
+
+    for (i = 0; i < hls->nb_ccstreams; i++) {
+        ClosedCaptionsStream *ccs = &hls->cc_streams[i];
+        av_freep(&ccs->ccgroup);
+        av_freep(&ccs->instreamid);
+        av_freep(&ccs->language);
+    }
+
+    ff_format_io_close(s, &hls->m3u8_out);
+    ff_format_io_close(s, &hls->sub_m3u8_out);
+    av_freep(&hls->key_basename);
+    av_freep(&hls->var_streams);
+    av_freep(&hls->cc_streams);
+    av_freep(&hls->master_m3u8_url);
+    return 0;
+}
+
+
+static int hls_init(AVFormatContext *s)
+{
+    int ret = 0;
+    int i = 0;
+    int j = 0;
+    HLSContext *hls = s->priv_data;
     const char *pattern = "%d.ts";
+    VariantStream *vs = NULL;
+    int basename_size = 0;
     const char *pattern_localtime_fmt = get_default_pattern_localtime_fmt(s);
     const char *vtt_pattern = "%d.vtt";
-    AVDictionary *options = NULL;
-    int basename_size = 0;
+    char *p = NULL;
     int vtt_basename_size = 0;
     int fmp4_init_filename_len = strlen(hls->fmp4_init_filename) + 1;
 
+    ret = update_variant_stream_info(s);
+    if (ret < 0) {
+        av_log(s, AV_LOG_ERROR, "Variant stream info update failed with status %x\n",
+               ret);
+        goto fail;
+    }
+    //TODO: Updates needed to encryption functionality with periodic re-key when more than one variant streams are present
+    if (hls->nb_varstreams > 1 && hls->flags & HLS_PERIODIC_REKEY) {
+        ret = AVERROR(EINVAL);
+        av_log(s, AV_LOG_ERROR, "Periodic re-key not supported when more than one variant streams are present\n");
+        goto fail;
+    }
+
+    ret = validate_name(hls->nb_varstreams, s->url);
+    if (ret < 0)
+        goto fail;
+
+    if (hls->segment_filename) {
+        ret = validate_name(hls->nb_varstreams, hls->segment_filename);
+        if (ret < 0)
+            goto fail;
+    }
+
+    if (av_strcasecmp(hls->fmp4_init_filename, "init.mp4")) {
+        ret = validate_name(hls->nb_varstreams, hls->fmp4_init_filename);
+        if (ret < 0)
+            goto fail;
+    }
+
+    if (hls->subtitle_filename) {
+        ret = validate_name(hls->nb_varstreams, hls->subtitle_filename);
+        if (ret < 0)
+            goto fail;
+    }
+
+    if (hls->master_pl_name) {
+        ret = update_master_pl_info(s);
+        if (ret < 0) {
+            av_log(s, AV_LOG_ERROR, "Master stream info update failed with status %x\n",
+                   ret);
+            goto fail;
+        }
+    }
+
     if (hls->segment_type == SEGMENT_TYPE_FMP4) {
         pattern = "%d.m4s";
     }
@@ -1365,442 +2525,269 @@
         av_log(hls, AV_LOG_DEBUG, "start_number evaluated to %"PRId64"\n", hls->start_sequence);
     }
 
-    hls->sequence       = hls->start_sequence;
     hls->recording_time = (hls->init_time ? hls->init_time : hls->time) * AV_TIME_BASE;
-    hls->start_pts      = AV_NOPTS_VALUE;
-    hls->current_segment_final_filename_fmt[0] = '\0';
+    for (i = 0; i < hls->nb_varstreams; i++) {
+        vs = &hls->var_streams[i];
 
-    if (hls->flags & HLS_PROGRAM_DATE_TIME) {
-        time_t now0;
-        time(&now0);
-        hls->initial_prog_date_time = now0;
-    }
-
-    if (hls->format_options_str) {
-        ret = av_dict_parse_string(&hls->format_options, hls->format_options_str, "=", ":", 0);
-        if (ret < 0) {
-            av_log(s, AV_LOG_ERROR, "Could not parse format options list '%s'\n", hls->format_options_str);
+        vs->m3u8_name = av_strdup(s->url);
+        if (!vs->m3u8_name ) {
+            ret = AVERROR(ENOMEM);
             goto fail;
         }
-    }
+        ret = format_name(vs->m3u8_name, strlen(s->url) + 1, i);
+        if (ret < 0)
+            goto fail;
 
-    for (i = 0; i < s->nb_streams; i++) {
-        hls->has_video +=
-            s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO;
-        hls->has_subtitle +=
-            s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE;
-    }
+        vs->sequence       = hls->start_sequence;
+        vs->start_pts      = AV_NOPTS_VALUE;
+        vs->end_pts      = AV_NOPTS_VALUE;
+        vs->current_segment_final_filename_fmt[0] = '\0';
 
-    if (hls->has_video > 1)
-        av_log(s, AV_LOG_WARNING,
-               "More than a single video stream present, "
-               "expect issues decoding it.\n");
+        if (hls->flags & HLS_SPLIT_BY_TIME && hls->flags & HLS_INDEPENDENT_SEGMENTS) {
+            // Independent segments cannot be guaranteed when splitting by time
+            hls->flags &= ~HLS_INDEPENDENT_SEGMENTS;
+            av_log(s, AV_LOG_WARNING,
+                   "'split_by_time' and 'independent_segments' cannot be enabled together. "
+                   "Disabling 'independent_segments' flag\n");
+        }
 
-    if (hls->segment_type == SEGMENT_TYPE_FMP4) {
-        hls->oformat = av_guess_format("mp4", NULL, NULL);
-    } else {
-        hls->oformat = av_guess_format("mpegts", NULL, NULL);
-    }
+        if (hls->flags & HLS_PROGRAM_DATE_TIME) {
+            time_t now0;
+            time(&now0);
+            vs->initial_prog_date_time = now0;
+        }
+        if (hls->format_options_str) {
+            ret = av_dict_parse_string(&hls->format_options, hls->format_options_str, "=", ":", 0);
+            if (ret < 0) {
+                av_log(s, AV_LOG_ERROR, "Could not parse format options list '%s'\n", hls->format_options_str);
+                goto fail;
+            }
+        }
 
-    if (!hls->oformat) {
-        ret = AVERROR_MUXER_NOT_FOUND;
-        goto fail;
-    }
+        for (j = 0; j < vs->nb_streams; j++) {
+            vs->has_video += vs->streams[j]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO;
+            /* Get one video stream to reference for split segments
+             * so use the first video stream index. */
+            if ((vs->has_video == 1) && (vs->streams[j]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)) {
+                vs->reference_stream_index = vs->streams[j]->index;
+            }
+            vs->has_subtitle += vs->streams[j]->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE;
+        }
 
-    if(hls->has_subtitle) {
-        hls->vtt_oformat = av_guess_format("webvtt", NULL, NULL);
-        if (!hls->oformat) {
+        if (vs->has_video > 1)
+            av_log(s, AV_LOG_WARNING, "More than a single video stream present, expect issues decoding it.\n");
+        if (hls->segment_type == SEGMENT_TYPE_FMP4) {
+            vs->oformat = av_guess_format("mp4", NULL, NULL);
+        } else {
+            vs->oformat = av_guess_format("mpegts", NULL, NULL);
+        }
+
+        if (!vs->oformat) {
             ret = AVERROR_MUXER_NOT_FOUND;
             goto fail;
         }
-    }
 
-    if (hls->segment_filename) {
-        hls->basename = av_strdup(hls->segment_filename);
-        if (!hls->basename) {
-            ret = AVERROR(ENOMEM);
-            goto fail;
-        }
-    } else {
-        if (hls->flags & HLS_SINGLE_FILE) {
-            if (hls->segment_type == SEGMENT_TYPE_FMP4) {
-                pattern = ".m4s";
-            } else {
-                pattern = ".ts";
-            }
-        }
-
-        if (hls->use_localtime) {
-            basename_size = strlen(s->filename) + strlen(pattern_localtime_fmt) + 1;
-        } else {
-            basename_size = strlen(s->filename) + strlen(pattern) + 1;
-        }
-        hls->basename = av_malloc(basename_size);
-        if (!hls->basename) {
-            ret = AVERROR(ENOMEM);
-            goto fail;
-        }
-
-        av_strlcpy(hls->basename, s->filename, basename_size);
-
-        p = strrchr(hls->basename, '.');
-        if (p)
-            *p = '\0';
-        if (hls->use_localtime) {
-            av_strlcat(hls->basename, pattern_localtime_fmt, basename_size);
-        } else {
-            av_strlcat(hls->basename, pattern, basename_size);
-        }
-    }
-
-    if (hls->segment_type == SEGMENT_TYPE_FMP4) {
-        if (av_strcasecmp(hls->fmp4_init_filename, "init.mp4")) {
-            hls->base_output_dirname = av_malloc(fmp4_init_filename_len);
-            if (!hls->base_output_dirname) {
-                ret = AVERROR(ENOMEM);
+        if (vs->has_subtitle) {
+            vs->vtt_oformat = av_guess_format("webvtt", NULL, NULL);
+            if (!vs->oformat) {
+                ret = AVERROR_MUXER_NOT_FOUND;
                 goto fail;
             }
-            av_strlcpy(hls->base_output_dirname, hls->fmp4_init_filename, fmp4_init_filename_len);
-        } else {
-            if (basename_size > 0) {
-                hls->base_output_dirname = av_malloc(basename_size);
-            } else {
-                hls->base_output_dirname = av_malloc(strlen(hls->fmp4_init_filename) + 1);
-            }
-            if (!hls->base_output_dirname) {
+        }
+        if (hls->segment_filename) {
+            basename_size = strlen(hls->segment_filename) + 1;
+            vs->basename = av_malloc(basename_size);
+            if (!vs->basename) {
                 ret = AVERROR(ENOMEM);
                 goto fail;
             }
 
-            if (basename_size > 0) {
-                av_strlcpy(hls->base_output_dirname, s->filename, basename_size);
-                p = strrchr(hls->base_output_dirname, '/');
-            }
-            if (p) {
-                *(p + 1) = '\0';
-                av_strlcat(hls->base_output_dirname, hls->fmp4_init_filename, basename_size);
-            } else {
-                av_strlcpy(hls->base_output_dirname, hls->fmp4_init_filename, fmp4_init_filename_len);
-            }
-        }
-    }
-
-    if (!hls->use_localtime) {
-        ret = sls_flag_check_duration_size_index(hls);
-        if (ret < 0) {
-             goto fail;
-        }
-    } else {
-        ret = sls_flag_check_duration_size(hls);
-        if (ret < 0) {
-             goto fail;
-        }
-    }
-    if(hls->has_subtitle) {
-
-        if (hls->flags & HLS_SINGLE_FILE)
-            vtt_pattern = ".vtt";
-        vtt_basename_size = strlen(s->filename) + strlen(vtt_pattern) + 1;
-        hls->vtt_basename = av_malloc(vtt_basename_size);
-        if (!hls->vtt_basename) {
-            ret = AVERROR(ENOMEM);
-            goto fail;
-        }
-        hls->vtt_m3u8_name = av_malloc(vtt_basename_size);
-        if (!hls->vtt_m3u8_name ) {
-            ret = AVERROR(ENOMEM);
-            goto fail;
-        }
-        av_strlcpy(hls->vtt_basename, s->filename, vtt_basename_size);
-        p = strrchr(hls->vtt_basename, '.');
-        if (p)
-            *p = '\0';
-
-        if( hls->subtitle_filename ) {
-            strcpy(hls->vtt_m3u8_name, hls->subtitle_filename);
+            av_strlcpy(vs->basename, hls->segment_filename, basename_size);
+            ret = format_name(vs->basename, basename_size, i);
+            if (ret < 0)
+                goto fail;
         } else {
-            strcpy(hls->vtt_m3u8_name, hls->vtt_basename);
-            av_strlcat(hls->vtt_m3u8_name, "_vtt.m3u8", vtt_basename_size);
-        }
-        av_strlcat(hls->vtt_basename, vtt_pattern, vtt_basename_size);
-    }
+            if (hls->flags & HLS_SINGLE_FILE) {
+                if (hls->segment_type == SEGMENT_TYPE_FMP4) {
+                    pattern = ".m4s";
+                } else {
+                    pattern = ".ts";
+                }
+            }
 
-    if ((hls->flags & HLS_SINGLE_FILE) && (hls->segment_type == SEGMENT_TYPE_FMP4)) {
-        hls->fmp4_init_filename  = av_strdup(hls->basename);
-        if (!hls->fmp4_init_filename) {
-            ret = AVERROR(ENOMEM);
-            goto fail;
-        }
-    }
+            if (hls->use_localtime) {
+                basename_size = strlen(vs->m3u8_name) + strlen(pattern_localtime_fmt) + 1;
+            } else {
+                basename_size = strlen(vs->m3u8_name) + strlen(pattern) + 1;
+            }
 
-    if ((ret = hls_mux_init(s)) < 0)
-        goto fail;
+            vs->basename = av_malloc(basename_size);
+            if (!vs->basename) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
 
-    if (hls->flags & HLS_APPEND_LIST) {
-        parse_playlist(s, s->filename);
-        hls->discontinuity = 1;
-        if (hls->init_time > 0) {
-            av_log(s, AV_LOG_WARNING, "append_list mode does not support hls_init_time,"
-                   " hls_init_time value will have no effect\n");
-            hls->init_time = 0;
-            hls->recording_time = hls->time * AV_TIME_BASE;
-        }
-    }
+            av_strlcpy(vs->basename, vs->m3u8_name, basename_size);
 
-    if (hls->segment_type != SEGMENT_TYPE_FMP4 || hls->flags & HLS_SINGLE_FILE) {
-        if ((ret = hls_start(s)) < 0)
-            goto fail;
-    }
-
-    av_dict_copy(&options, hls->format_options, 0);
-    ret = avformat_write_header(hls->avf, &options);
-    if (av_dict_count(options)) {
-        av_log(s, AV_LOG_ERROR, "Some of provided format options in '%s' are not recognized\n", hls->format_options_str);
-        ret = AVERROR(EINVAL);
-        goto fail;
-    }
-    //av_assert0(s->nb_streams == hls->avf->nb_streams);
-    for (i = 0; i < s->nb_streams; i++) {
-        AVStream *inner_st;
-        AVStream *outer_st = s->streams[i];
-
-        if (hls->max_seg_size > 0) {
-            if ((outer_st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) &&
-                (outer_st->codecpar->bit_rate > hls->max_seg_size)) {
-                av_log(s, AV_LOG_WARNING, "Your video bitrate is bigger than hls_segment_size, "
-                       "(%"PRId64 " > %"PRId64 "), the result maybe not be what you want.",
-                       outer_st->codecpar->bit_rate, hls->max_seg_size);
+            p = strrchr(vs->basename, '.');
+            if (p)
+                *p = '\0';
+            if (hls->use_localtime) {
+                av_strlcat(vs->basename, pattern_localtime_fmt, basename_size);
+            } else {
+                av_strlcat(vs->basename, pattern, basename_size);
             }
         }
 
-        if (outer_st->codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE)
-            inner_st = hls->avf->streams[i];
-        else if (hls->vtt_avf)
-            inner_st = hls->vtt_avf->streams[0];
-        else {
-            /* We have a subtitle stream, when the user does not want one */
-            inner_st = NULL;
-            continue;
+        if (hls->segment_type == SEGMENT_TYPE_FMP4) {
+            if (hls->nb_varstreams > 1)
+                fmp4_init_filename_len += strlen(POSTFIX_PATTERN);
+            if (hls->flags & HLS_SINGLE_FILE) {
+                vs->fmp4_init_filename  = av_strdup(vs->basename);
+                if (!vs->fmp4_init_filename) {
+                    ret = AVERROR(ENOMEM);
+                    goto fail;
+                }
+            } else {
+                vs->fmp4_init_filename = av_malloc(fmp4_init_filename_len);
+                if (!vs->fmp4_init_filename ) {
+                    ret = AVERROR(ENOMEM);
+                    goto fail;
+                }
+                av_strlcpy(vs->fmp4_init_filename, hls->fmp4_init_filename,
+                           fmp4_init_filename_len);
+                if (hls->nb_varstreams > 1) {
+                    ret = append_postfix(vs->fmp4_init_filename, fmp4_init_filename_len, i);
+                    if (ret < 0)
+                        goto fail;
+                }
+
+                fmp4_init_filename_len = strlen(vs->m3u8_name) +
+                    strlen(vs->fmp4_init_filename) + 1;
+
+                vs->base_output_dirname = av_malloc(fmp4_init_filename_len);
+                if (!vs->base_output_dirname) {
+                    ret = AVERROR(ENOMEM);
+                    goto fail;
+                }
+
+                av_strlcpy(vs->base_output_dirname, vs->m3u8_name,
+                           fmp4_init_filename_len);
+                p = strrchr(vs->base_output_dirname, '/');
+                if (p) {
+                    *(p + 1) = '\0';
+                    av_strlcat(vs->base_output_dirname, vs->fmp4_init_filename,
+                               fmp4_init_filename_len);
+                } else {
+                    av_strlcpy(vs->base_output_dirname, vs->fmp4_init_filename,
+                               fmp4_init_filename_len);
+                }
+            }
         }
-        avpriv_set_pts_info(outer_st, inner_st->pts_wrap_bits, inner_st->time_base.num, inner_st->time_base.den);
+
+        if (!hls->use_localtime) {
+            ret = sls_flag_check_duration_size_index(hls);
+            if (ret < 0) {
+                goto fail;
+            }
+        } else {
+            ret = sls_flag_check_duration_size(hls, vs);
+            if (ret < 0) {
+                goto fail;
+            }
+        }
+        if (vs->has_subtitle) {
+
+            if (hls->flags & HLS_SINGLE_FILE)
+                vtt_pattern = ".vtt";
+            vtt_basename_size = strlen(vs->m3u8_name) + strlen(vtt_pattern) + 1;
+
+            vs->vtt_basename = av_malloc(vtt_basename_size);
+            if (!vs->vtt_basename) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+            vs->vtt_m3u8_name = av_malloc(vtt_basename_size);
+            if (!vs->vtt_m3u8_name ) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+            av_strlcpy(vs->vtt_basename, vs->m3u8_name, vtt_basename_size);
+            p = strrchr(vs->vtt_basename, '.');
+            if (p)
+                *p = '\0';
+
+            if ( hls->subtitle_filename ) {
+                strcpy(vs->vtt_m3u8_name, hls->subtitle_filename);
+                ret = format_name(vs->vtt_m3u8_name, vtt_basename_size, i);
+                if (ret < 0)
+                    goto fail;
+            } else {
+                strcpy(vs->vtt_m3u8_name, vs->vtt_basename);
+                av_strlcat(vs->vtt_m3u8_name, "_vtt.m3u8", vtt_basename_size);
+            }
+            av_strlcat(vs->vtt_basename, vtt_pattern, vtt_basename_size);
+        }
+
+        if (hls->baseurl) {
+            vs->baseurl = av_strdup(hls->baseurl);
+            if (!vs->baseurl) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+        }
+
+        if ((ret = hls_mux_init(s, vs)) < 0)
+            goto fail;
+
+        if (hls->flags & HLS_APPEND_LIST) {
+            parse_playlist(s, vs->m3u8_name, vs);
+            vs->discontinuity = 1;
+            if (hls->init_time > 0) {
+                av_log(s, AV_LOG_WARNING, "append_list mode does not support hls_init_time,"
+                       " hls_init_time value will have no effect\n");
+                hls->init_time = 0;
+                hls->recording_time = hls->time * AV_TIME_BASE;
+            }
+        }
+
+        if ((ret = hls_start(s, vs)) < 0)
+            goto fail;
     }
+
 fail:
-
-    av_dict_free(&options);
     if (ret < 0) {
-        av_freep(&hls->fmp4_init_filename);
-        av_freep(&hls->basename);
-        av_freep(&hls->vtt_basename);
         av_freep(&hls->key_basename);
-        if (hls->avf)
-            avformat_free_context(hls->avf);
-        if (hls->vtt_avf)
-            avformat_free_context(hls->vtt_avf);
-
+        for (i = 0; i < hls->nb_varstreams && hls->var_streams; i++) {
+            vs = &hls->var_streams[i];
+            av_freep(&vs->basename);
+            av_freep(&vs->vtt_basename);
+            av_freep(&vs->fmp4_init_filename);
+            av_freep(&vs->m3u8_name);
+            av_freep(&vs->vtt_m3u8_name);
+            av_freep(&vs->streams);
+            av_freep(&vs->agroup);
+            av_freep(&vs->ccgroup);
+            av_freep(&vs->baseurl);
+            if (vs->avf)
+                avformat_free_context(vs->avf);
+            if (vs->vtt_avf)
+                avformat_free_context(vs->vtt_avf);
+        }
+        for (i = 0; i < hls->nb_ccstreams; i++) {
+            ClosedCaptionsStream *ccs = &hls->cc_streams[i];
+            av_freep(&ccs->ccgroup);
+            av_freep(&ccs->instreamid);
+            av_freep(&ccs->language);
+        }
+        av_freep(&hls->var_streams);
+        av_freep(&hls->cc_streams);
+        av_freep(&hls->master_m3u8_url);
     }
-    return ret;
-}
-
-static int hls_write_packet(AVFormatContext *s, AVPacket *pkt)
-{
-    HLSContext *hls = s->priv_data;
-    AVFormatContext *oc = NULL;
-    AVStream *st = s->streams[pkt->stream_index];
-    int64_t end_pts = hls->recording_time * hls->number;
-    int is_ref_pkt = 1;
-    int ret = 0, can_split = 1;
-    int stream_index = 0;
-    int range_length = 0;
-    uint8_t *buffer = NULL;
-
-    if (hls->sequence - hls->nb_entries > hls->start_sequence && hls->init_time > 0) {
-        /* reset end_pts, hls->recording_time at end of the init hls list */
-        int init_list_dur = hls->init_time * hls->nb_entries * AV_TIME_BASE;
-        int after_init_list_dur = (hls->sequence - hls->nb_entries ) * hls->time * AV_TIME_BASE;
-        hls->recording_time = hls->time * AV_TIME_BASE;
-        end_pts = init_list_dur + after_init_list_dur ;
-    }
-
-    if( st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE ) {
-        oc = hls->vtt_avf;
-        stream_index = 0;
-    } else {
-        oc = hls->avf;
-        stream_index = pkt->stream_index;
-    }
-    if (hls->start_pts == AV_NOPTS_VALUE) {
-        hls->start_pts = pkt->pts;
-        hls->end_pts   = pkt->pts;
-    }
-
-    if (hls->has_video) {
-        can_split = st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
-                    ((pkt->flags & AV_PKT_FLAG_KEY) || (hls->flags & HLS_SPLIT_BY_TIME));
-        is_ref_pkt = st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO;
-    }
-    if (pkt->pts == AV_NOPTS_VALUE)
-        is_ref_pkt = can_split = 0;
-
-    if (is_ref_pkt) {
-        if (hls->new_start) {
-            hls->new_start = 0;
-            hls->duration = (double)(pkt->pts - hls->end_pts)
-                                       * st->time_base.num / st->time_base.den;
-            hls->dpp = (double)(pkt->duration) * st->time_base.num / st->time_base.den;
-        } else {
-            if (pkt->duration) {
-                hls->duration += (double)(pkt->duration) * st->time_base.num / st->time_base.den;
-            } else {
-                av_log(s, AV_LOG_WARNING, "pkt->duration = 0, maybe the hls segment duration will not precise\n");
-                hls->duration = (double)(pkt->pts - hls->end_pts) * st->time_base.num / st->time_base.den;
-            }
-        }
-
-    }
-    if (hls->packets_written && can_split && av_compare_ts(pkt->pts - hls->start_pts, st->time_base,
-                                   end_pts, AV_TIME_BASE_Q) >= 0) {
-        int64_t new_start_pos;
-        char *old_filename = av_strdup(hls->avf->filename);
-        int byterange_mode = (hls->flags & HLS_SINGLE_FILE) || (hls->max_seg_size > 0);
-
-        if (!old_filename) {
-            return AVERROR(ENOMEM);
-        }
-
-        av_write_frame(hls->avf, NULL); /* Flush any buffered data */
-
-        new_start_pos = avio_tell(hls->avf->pb);
-        hls->size = new_start_pos - hls->start_pos;
-
-        if (!byterange_mode) {
-            if (hls->segment_type == SEGMENT_TYPE_FMP4 && !hls->init_range_length) {
-                avio_flush(oc->pb);
-                range_length = avio_close_dyn_buf(oc->pb, &buffer);
-                avio_write(hls->out, buffer, range_length);
-                hls->init_range_length = range_length;
-                avio_open_dyn_buf(&oc->pb);
-                hls->packets_written = 0;
-                ff_format_io_close(s, &hls->out);
-            } else {
-                ff_format_io_close(s, &oc->pb);
-            }
-            if (hls->vtt_avf) {
-                ff_format_io_close(s, &hls->vtt_avf->pb);
-            }
-        }
-        if ((hls->flags & HLS_TEMP_FILE) && oc->filename[0]) {
-            if (!(hls->flags & HLS_SINGLE_FILE) || (hls->max_seg_size <= 0))
-                if ((hls->avf->oformat->priv_class && hls->avf->priv_data) && hls->segment_type != SEGMENT_TYPE_FMP4)
-                    av_opt_set(hls->avf->priv_data, "mpegts_flags", "resend_headers", 0);
-            hls_rename_temp_file(s, oc);
-        }
-
-        if (hls->fmp4_init_mode) {
-            hls->number--;
-        }
-
-        if (!hls->fmp4_init_mode || byterange_mode)
-            ret = hls_append_segment(s, hls, hls->duration, hls->start_pos, hls->size);
-
-        hls->start_pos = new_start_pos;
-        if (ret < 0) {
-            av_free(old_filename);
-            return ret;
-        }
-
-        hls->end_pts = pkt->pts;
-        hls->duration = 0;
-
-        hls->fmp4_init_mode = 0;
-        if (hls->flags & HLS_SINGLE_FILE) {
-            hls->number++;
-        } else if (hls->max_seg_size > 0) {
-            if (hls->start_pos >= hls->max_seg_size) {
-                hls->sequence++;
-                sls_flag_file_rename(hls, old_filename);
-                ret = hls_start(s);
-                hls->start_pos = 0;
-                /* When split segment by byte, the duration is short than hls_time,
-                 * so it is not enough one segment duration as hls_time, */
-                hls->number--;
-            }
-            hls->number++;
-        } else {
-            sls_flag_file_rename(hls, old_filename);
-            ret = hls_start(s);
-        }
-        av_free(old_filename);
-
-        if (ret < 0) {
-            return ret;
-        }
-
-        if (!hls->fmp4_init_mode || byterange_mode)
-            if ((ret = hls_window(s, 0)) < 0) {
-                return ret;
-            }
-    }
-
-    hls->packets_written++;
-    ret = ff_write_chained(oc, stream_index, pkt, s, 0);
 
     return ret;
 }
 
-static int hls_write_trailer(struct AVFormatContext *s)
-{
-    HLSContext *hls = s->priv_data;
-    AVFormatContext *oc = hls->avf;
-    AVFormatContext *vtt_oc = hls->vtt_avf;
-    char *old_filename = av_strdup(hls->avf->filename);
-
-    if (!old_filename) {
-        return AVERROR(ENOMEM);
-    }
-
-
-    av_write_trailer(oc);
-    if (oc->pb) {
-        hls->size = avio_tell(hls->avf->pb) - hls->start_pos;
-        ff_format_io_close(s, &oc->pb);
-
-        if ((hls->flags & HLS_TEMP_FILE) && oc->filename[0]) {
-            hls_rename_temp_file(s, oc);
-        }
-
-        /* after av_write_trailer, then duration + 1 duration per packet */
-        hls_append_segment(s, hls, hls->duration + hls->dpp, hls->start_pos, hls->size);
-    }
-
-    sls_flag_file_rename(hls, old_filename);
-
-    if (vtt_oc) {
-        if (vtt_oc->pb)
-            av_write_trailer(vtt_oc);
-        hls->size = avio_tell(hls->vtt_avf->pb) - hls->start_pos;
-        ff_format_io_close(s, &vtt_oc->pb);
-    }
-    av_freep(&hls->basename);
-    av_freep(&hls->base_output_dirname);
-    av_freep(&hls->key_basename);
-    avformat_free_context(oc);
-
-    hls->avf = NULL;
-    hls_window(s, 1);
-
-    av_freep(&hls->fmp4_init_filename);
-    if (vtt_oc) {
-        av_freep(&hls->vtt_basename);
-        av_freep(&hls->vtt_m3u8_name);
-        avformat_free_context(vtt_oc);
-    }
-
-    hls_free_segments(hls->segments);
-    hls_free_segments(hls->old_segments);
-    av_free(old_filename);
-    return 0;
-}
-
 #define OFFSET(x) offsetof(HLSContext, x)
 #define E AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
@@ -1808,6 +2795,7 @@
     {"hls_time",      "set segment length in seconds",           OFFSET(time),    AV_OPT_TYPE_FLOAT,  {.dbl = 2},     0, FLT_MAX, E},
     {"hls_init_time", "set segment length in seconds at init list",           OFFSET(init_time),    AV_OPT_TYPE_FLOAT,  {.dbl = 0},     0, FLT_MAX, E},
     {"hls_list_size", "set maximum number of playlist entries",  OFFSET(max_nb_segments),    AV_OPT_TYPE_INT,    {.i64 = 5},     0, INT_MAX, E},
+    {"hls_delete_threshold", "set number of unreferenced segments to keep before deleting",  OFFSET(hls_delete_threshold),    AV_OPT_TYPE_INT,    {.i64 = 1},     1, INT_MAX, E},
     {"hls_ts_options","set hls mpegts list of options for the container format used for hls", OFFSET(format_options_str), AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,    E},
     {"hls_vtt_options","set hls vtt list of options for the container format used for hls", OFFSET(vtt_format_options_str), AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,    E},
 #if FF_API_HLS_WRAP
@@ -1841,8 +2829,15 @@
     {"second_level_segment_duration", "include segment duration in segment filenames when use_localtime", 0, AV_OPT_TYPE_CONST, {.i64 = HLS_SECOND_LEVEL_SEGMENT_DURATION }, 0, UINT_MAX,   E, "flags"},
     {"second_level_segment_size", "include segment size in segment filenames when use_localtime", 0, AV_OPT_TYPE_CONST, {.i64 = HLS_SECOND_LEVEL_SEGMENT_SIZE }, 0, UINT_MAX,   E, "flags"},
     {"periodic_rekey", "reload keyinfo file periodically for re-keying", 0, AV_OPT_TYPE_CONST, {.i64 = HLS_PERIODIC_REKEY }, 0, UINT_MAX,   E, "flags"},
-    {"use_localtime", "set filename expansion with strftime at segment creation", OFFSET(use_localtime), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, E },
-    {"use_localtime_mkdir", "create last directory component in strftime-generated filename", OFFSET(use_localtime_mkdir), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, E },
+    {"independent_segments", "add EXT-X-INDEPENDENT-SEGMENTS, whenever applicable", 0, AV_OPT_TYPE_CONST, { .i64 = HLS_INDEPENDENT_SEGMENTS }, 0, UINT_MAX, E, "flags"},
+#if FF_API_HLS_USE_LOCALTIME
+    {"use_localtime", "set filename expansion with strftime at segment creation(will be deprecated )", OFFSET(use_localtime), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, E },
+#endif
+    {"strftime", "set filename expansion with strftime at segment creation", OFFSET(use_localtime), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, E },
+#if FF_API_HLS_USE_LOCALTIME
+    {"use_localtime_mkdir", "create last directory component in strftime-generated filename(will be deprecated)", OFFSET(use_localtime_mkdir), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, E },
+#endif
+    {"strftime_mkdir", "create last directory component in strftime-generated filename", OFFSET(use_localtime_mkdir), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, E },
     {"hls_playlist_type", "set the HLS playlist type", OFFSET(pl_type), AV_OPT_TYPE_INT, {.i64 = PLAYLIST_TYPE_NONE }, 0, PLAYLIST_TYPE_NB-1, E, "pl_type" },
     {"event", "EVENT playlist", 0, AV_OPT_TYPE_CONST, {.i64 = PLAYLIST_TYPE_EVENT }, INT_MIN, INT_MAX, E, "pl_type" },
     {"vod", "VOD playlist", 0, AV_OPT_TYPE_CONST, {.i64 = PLAYLIST_TYPE_VOD }, INT_MIN, INT_MAX, E, "pl_type" },
@@ -1852,6 +2847,12 @@
     {"epoch", "seconds since epoch", 0, AV_OPT_TYPE_CONST, {.i64 = HLS_START_SEQUENCE_AS_SECONDS_SINCE_EPOCH }, INT_MIN, INT_MAX, E, "start_sequence_source_type" },
     {"datetime", "current datetime as YYYYMMDDhhmmss", 0, AV_OPT_TYPE_CONST, {.i64 = HLS_START_SEQUENCE_AS_FORMATTED_DATETIME }, INT_MIN, INT_MAX, E, "start_sequence_source_type" },
     {"http_user_agent", "override User-Agent field in HTTP header", OFFSET(user_agent), AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,    E},
+    {"var_stream_map", "Variant stream map string", OFFSET(var_stream_map), AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,    E},
+    {"cc_stream_map", "Closed captions stream map string", OFFSET(cc_stream_map), AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,    E},
+    {"master_pl_name", "Create HLS master playlist with this name", OFFSET(master_pl_name), AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,    E},
+    {"master_pl_publish_rate", "Publish master play list every after this many segment intervals", OFFSET(master_publish_rate), AV_OPT_TYPE_INT, {.i64 = 0}, 0, UINT_MAX, E},
+    {"http_persistent", "Use persistent HTTP connections", OFFSET(http_persistent), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, E },
+    {"timeout", "set timeout for socket I/O operations", OFFSET(timeout), AV_OPT_TYPE_DURATION, { .i64 = -1 }, -1, INT_MAX, .flags = E },
     { NULL },
 };
 
@@ -1871,7 +2872,8 @@
     .audio_codec    = AV_CODEC_ID_AAC,
     .video_codec    = AV_CODEC_ID_H264,
     .subtitle_codec = AV_CODEC_ID_WEBVTT,
-    .flags          = AVFMT_NOFILE | AVFMT_GLOBALHEADER | AVFMT_ALLOW_FLUSH,
+    .flags          = AVFMT_NOFILE | AVFMT_GLOBALHEADER | AVFMT_ALLOW_FLUSH | AVFMT_NODIMENSIONS,
+    .init           = hls_init,
     .write_header   = hls_write_header,
     .write_packet   = hls_write_packet,
     .write_trailer  = hls_write_trailer,

diff --git a/libavformat/hlsplaylist.c b/libavformat/hlsplaylist.c
new file mode 100644
index 0000000..efcbff0
--- /dev/null
+++ b/libavformat/hlsplaylist.c

@@ -0,0 +1,161 @@
+/*
+ * Apple HTTP Live Streaming segmenter
+ * Copyright (c) 2012, Luca Barbato
+ * Copyright (c) 2017 Akamai Technologies, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include <stdint.h>
+
+#include "libavutil/time_internal.h"
+
+#include "avformat.h"
+#include "hlsplaylist.h"
+
+void ff_hls_write_playlist_version(AVIOContext *out, int version) {
+    if (!out)
+        return;
+    avio_printf(out, "#EXTM3U\n");
+    avio_printf(out, "#EXT-X-VERSION:%d\n", version);
+}
+
+void ff_hls_write_audio_rendition(AVIOContext *out, char *agroup,
+                                  char *filename, int name_id, int is_default) {
+    if (!out || !agroup || !filename)
+        return;
+
+    avio_printf(out, "#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID=\"group_%s\"", agroup);
+    avio_printf(out, ",NAME=\"audio_%d\",DEFAULT=%s,URI=\"%s\"\n", name_id,
+                     is_default ? "YES" : "NO", filename);
+}
+
+void ff_hls_write_stream_info(AVStream *st, AVIOContext *out,
+                              int bandwidth, char *filename, char *agroup,
+                              char *codecs, char *ccgroup) {
+
+    if (!out || !filename)
+        return;
+
+    if (!bandwidth) {
+        av_log(NULL, AV_LOG_WARNING,
+                "Bandwidth info not available, set audio and video bitrates\n");
+        return;
+    }
+
+    avio_printf(out, "#EXT-X-STREAM-INF:BANDWIDTH=%d", bandwidth);
+    if (st && st->codecpar->width > 0 && st->codecpar->height > 0)
+        avio_printf(out, ",RESOLUTION=%dx%d", st->codecpar->width,
+                st->codecpar->height);
+    if (codecs && strlen(codecs) > 0)
+        avio_printf(out, ",CODECS=\"%s\"", codecs);
+    if (agroup && strlen(agroup) > 0)
+        avio_printf(out, ",AUDIO=\"group_%s\"", agroup);
+    if (ccgroup && strlen(ccgroup) > 0)
+        avio_printf(out, ",CLOSED-CAPTIONS=\"%s\"", ccgroup);
+    avio_printf(out, "\n%s\n\n", filename);
+}
+
+void ff_hls_write_playlist_header(AVIOContext *out, int version, int allowcache,
+                                  int target_duration, int64_t sequence,
+                                  uint32_t playlist_type) {
+    if (!out)
+        return;
+    ff_hls_write_playlist_version(out, version);
+    if (allowcache == 0 || allowcache == 1) {
+        avio_printf(out, "#EXT-X-ALLOW-CACHE:%s\n", allowcache == 0 ? "NO" : "YES");
+    }
+    avio_printf(out, "#EXT-X-TARGETDURATION:%d\n", target_duration);
+    avio_printf(out, "#EXT-X-MEDIA-SEQUENCE:%"PRId64"\n", sequence);
+    av_log(NULL, AV_LOG_VERBOSE, "EXT-X-MEDIA-SEQUENCE:%"PRId64"\n", sequence);
+
+    if (playlist_type == PLAYLIST_TYPE_EVENT) {
+        avio_printf(out, "#EXT-X-PLAYLIST-TYPE:EVENT\n");
+    } else if (playlist_type == PLAYLIST_TYPE_VOD) {
+        avio_printf(out, "#EXT-X-PLAYLIST-TYPE:VOD\n");
+    }
+}
+
+void ff_hls_write_init_file(AVIOContext *out, char *filename,
+                            int byterange_mode, int64_t size, int64_t pos) {
+    avio_printf(out, "#EXT-X-MAP:URI=\"%s\"", filename);
+    if (byterange_mode) {
+        avio_printf(out, ",BYTERANGE=\"%"PRId64"@%"PRId64"\"", size, pos);
+    }
+    avio_printf(out, "\n");
+}
+
+int ff_hls_write_file_entry(AVIOContext *out, int insert_discont,
+                             int byterange_mode,
+                             double duration, int round_duration,
+                             int64_t size, int64_t pos, //Used only if HLS_SINGLE_FILE flag is set
+                             char *baseurl, //Ignored if NULL
+                             char *filename, double *prog_date_time) {
+    if (!out || !filename)
+        return AVERROR(EINVAL);
+
+    if (insert_discont) {
+        avio_printf(out, "#EXT-X-DISCONTINUITY\n");
+    }
+    if (round_duration)
+        avio_printf(out, "#EXTINF:%ld,\n",  lrint(duration));
+    else
+        avio_printf(out, "#EXTINF:%f,\n", duration);
+    if (byterange_mode)
+        avio_printf(out, "#EXT-X-BYTERANGE:%"PRId64"@%"PRId64"\n", size, pos);
+
+    if (prog_date_time) {
+        time_t tt, wrongsecs;
+        int milli;
+        struct tm *tm, tmpbuf;
+        char buf0[128], buf1[128];
+        tt = (int64_t)*prog_date_time;
+        milli = av_clip(lrint(1000*(*prog_date_time - tt)), 0, 999);
+        tm = localtime_r(&tt, &tmpbuf);
+        if (!strftime(buf0, sizeof(buf0), "%Y-%m-%dT%H:%M:%S", tm)) {
+            av_log(NULL, AV_LOG_DEBUG, "strftime error in ff_hls_write_file_entry\n");
+            return AVERROR_UNKNOWN;
+        }
+        if (!strftime(buf1, sizeof(buf1), "%z", tm) || buf1[1]<'0' ||buf1[1]>'2') {
+            int tz_min, dst = tm->tm_isdst;
+            tm = gmtime_r(&tt, &tmpbuf);
+            tm->tm_isdst = dst;
+            wrongsecs = mktime(tm);
+            tz_min = (FFABS(wrongsecs - tt) + 30) / 60;
+            snprintf(buf1, sizeof(buf1),
+                     "%c%02d%02d",
+                     wrongsecs <= tt ? '+' : '-',
+                     tz_min / 60,
+                     tz_min % 60);
+        }
+        avio_printf(out, "#EXT-X-PROGRAM-DATE-TIME:%s.%03d%s\n", buf0, milli, buf1);
+        *prog_date_time += duration;
+    }
+    if (baseurl)
+        avio_printf(out, "%s", baseurl);
+    avio_printf(out, "%s\n", filename);
+
+    return 0;
+}
+
+void ff_hls_write_end_list (AVIOContext *out) {
+    if (!out)
+        return;
+    avio_printf(out, "#EXT-X-ENDLIST\n");
+}
+

diff --git a/libavformat/hlsplaylist.h b/libavformat/hlsplaylist.h
new file mode 100644
index 0000000..5054b01
--- /dev/null
+++ b/libavformat/hlsplaylist.h

@@ -0,0 +1,58 @@
+/*
+ * Apple HTTP Live Streaming segmenter
+ * Copyright (c) 2012, Luca Barbato
+ * Copyright (c) 2017 Akamai Technologies, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFORMAT_HLSPLAYLIST_H
+#define AVFORMAT_HLSPLAYLIST_H
+
+#include <stdint.h>
+
+#include "libavutil/common.h"
+#include "avformat.h"
+#include "avio.h"
+
+typedef enum {
+    PLAYLIST_TYPE_NONE,
+    PLAYLIST_TYPE_EVENT,
+    PLAYLIST_TYPE_VOD,
+    PLAYLIST_TYPE_NB,
+} PlaylistType;
+
+void ff_hls_write_playlist_version(AVIOContext *out, int version);
+void ff_hls_write_audio_rendition(AVIOContext *out, char *agroup,
+                                  char *filename, int name_id, int is_default);
+void ff_hls_write_stream_info(AVStream *st, AVIOContext *out,
+                              int bandwidth, char *filename, char *agroup,
+                              char *codecs, char *ccgroup);
+void ff_hls_write_playlist_header(AVIOContext *out, int version, int allowcache,
+                                  int target_duration, int64_t sequence,
+                                  uint32_t playlist_type);
+void ff_hls_write_init_file(AVIOContext *out, char *filename,
+                            int byterange_mode, int64_t size, int64_t pos);
+int ff_hls_write_file_entry(AVIOContext *out, int insert_discont,
+                             int byterange_mode,
+                             double duration, int round_duration,
+                             int64_t size, int64_t pos, //Used only if HLS_SINGLE_FILE flag is set
+                             char *baseurl, //Ignored if NULL
+                             char *filename, double *prog_date_time);
+void ff_hls_write_end_list (AVIOContext *out);
+
+#endif /* AVFORMAT_HLSPLAYLIST_H_ */

diff --git a/libavformat/hlsproto.c b/libavformat/hlsproto.c
index 2b19ed0..e7ef2d8 100644
--- a/libavformat/hlsproto.c
+++ b/libavformat/hlsproto.c

@@ -69,14 +69,6 @@
     int64_t last_load_time;
 } HLSContext;
 
-static int read_chomp_line(AVIOContext *s, char *buf, int maxlen)
-{
-    int len = ff_get_line(s, buf, maxlen);
-    while (len > 0 && av_isspace(buf[len - 1]))
-        buf[--len] = '\0';
-    return len;
-}
-
 static void free_segment_list(HLSContext *s)
 {
     int i;
@@ -122,7 +114,7 @@
                                    h->protocol_whitelist, h->protocol_blacklist)) < 0)
         return ret;
 
-    read_chomp_line(in, line, sizeof(line));
+    ff_get_chomp_line(in, line, sizeof(line));
     if (strcmp(line, "#EXTM3U")) {
         ret = AVERROR_INVALIDDATA;
         goto fail;
@@ -131,7 +123,7 @@
     free_segment_list(s);
     s->finished = 0;
     while (!avio_feof(in)) {
-        read_chomp_line(in, line, sizeof(line));
+        ff_get_chomp_line(in, line, sizeof(line));
         if (av_strstart(line, "#EXT-X-STREAM-INF:", &ptr)) {
             struct variant_info info = {{0}};
             is_variant = 1;

diff --git a/libavformat/http.c b/libavformat/http.c
index 668cd51..3a35bc7 100644
--- a/libavformat/http.c
+++ b/libavformat/http.c

@@ -66,6 +66,7 @@
     int http_code;
     /* Used if "Transfer-Encoding: chunked" otherwise -1. */
     uint64_t chunksize;
+    int chunkend;
     uint64_t off, end_off, filesize;
     char *location;
     HTTPAuthState auth_state;
@@ -73,7 +74,9 @@
     char *http_proxy;
     char *headers;
     char *mime_type;
+    char *http_version;
     char *user_agent;
+    char *referer;
 #if FF_API_HTTP_USER_AGENT
     char *user_agent_deprecated;
 #endif
@@ -115,7 +118,6 @@
     int reconnect;
     int reconnect_at_eof;
     int reconnect_streamed;
-    int reconnect_delay;
     int reconnect_delay_max;
     int listen;
     char *resource;
@@ -137,12 +139,14 @@
     { "headers", "set custom HTTP headers, can override built in default headers", OFFSET(headers), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D | E },
     { "content_type", "set a specific content type for the POST messages", OFFSET(content_type), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D | E },
     { "user_agent", "override User-Agent header", OFFSET(user_agent), AV_OPT_TYPE_STRING, { .str = DEFAULT_USER_AGENT }, 0, 0, D },
+    { "referer", "override referer header", OFFSET(referer), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D },
 #if FF_API_HTTP_USER_AGENT
-    { "user-agent", "override User-Agent header", OFFSET(user_agent_deprecated), AV_OPT_TYPE_STRING, { .str = DEFAULT_USER_AGENT }, 0, 0, D },
+    { "user-agent", "use the \"user_agent\" option instead", OFFSET(user_agent_deprecated), AV_OPT_TYPE_STRING, { .str = DEFAULT_USER_AGENT }, 0, 0, D|AV_OPT_FLAG_DEPRECATED },
 #endif
     { "multiple_requests", "use persistent connections", OFFSET(multiple_requests), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, D | E },
     { "post_data", "set custom HTTP post data", OFFSET(post_data), AV_OPT_TYPE_BINARY, .flags = D | E },
     { "mime_type", "export the MIME type", OFFSET(mime_type), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY },
+    { "http_version", "export the http response version", OFFSET(http_version), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY },
     { "cookies", "set cookies to be sent in applicable future requests, use newline delimited Set-Cookie HTTP field value syntax", OFFSET(cookies), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D },
     { "icy", "request ICY metadata", OFFSET(icy), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, D },
     { "icy_metadata_headers", "return ICY metadata headers", OFFSET(icy_metadata_headers), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, AV_OPT_FLAG_EXPORT },
@@ -170,6 +174,7 @@
                         const char *hoststr, const char *auth,
                         const char *proxyauth, int *new_location);
 static int http_read_header(URLContext *h, int *new_location);
+static int http_shutdown(URLContext *h, int flags);
 
 void ff_http_init_auth_state(URLContext *dest, const URLContext *src)
 {
@@ -304,7 +309,39 @@
     HTTPContext *s = h->priv_data;
     AVDictionary *options = NULL;
     int ret;
+    char hostname1[1024], hostname2[1024], proto1[10], proto2[10];
+    int port1, port2;
 
+    if (!h->prot ||
+        !(!strcmp(h->prot->name, "http") ||
+          !strcmp(h->prot->name, "https")))
+        return AVERROR(EINVAL);
+
+    av_url_split(proto1, sizeof(proto1), NULL, 0,
+                 hostname1, sizeof(hostname1), &port1,
+                 NULL, 0, s->location);
+    av_url_split(proto2, sizeof(proto2), NULL, 0,
+                 hostname2, sizeof(hostname2), &port2,
+                 NULL, 0, uri);
+    if (port1 != port2 || strncmp(hostname1, hostname2, sizeof(hostname2)) != 0) {
+        av_log(h, AV_LOG_ERROR, "Cannot reuse HTTP connection for different host: %s:%d != %s:%d\n",
+            hostname1, port1,
+            hostname2, port2
+        );
+        return AVERROR(EINVAL);
+    }
+
+    if (!s->end_chunked_post) {
+        ret = http_shutdown(h, h->flags);
+        if (ret < 0)
+            return ret;
+    }
+
+    if (s->willclose)
+        return AVERROR_EOF;
+
+    s->end_chunked_post = 0;
+    s->chunkend      = 0;
     s->off           = 0;
     s->icy_data_read = 0;
     av_free(s->location);
@@ -312,6 +349,7 @@
     if (!s->location)
         return AVERROR(ENOMEM);
 
+    av_log(s, AV_LOG_INFO, "Opening \'%s\' for %s\n", uri, h->flags & AVIO_FLAG_WRITE ? "writing" : "reading");
     ret = http_open_cnx(h, &options);
     av_dict_free(&options);
     return ret;
@@ -538,7 +576,11 @@
         goto fail;
     cc->hd = cl;
     cc->is_multi_client = 1;
+    return 0;
 fail:
+    if (c) {
+        ffurl_closep(c);
+    }
     return ret;
 }
 
@@ -712,6 +754,9 @@
 {
     char *param, *next_param, *cstr, *back;
 
+    if (!set_cookie[0])
+        return 0;
+
     if (!(cstr = av_strdup(set_cookie)))
         return AVERROR(EINVAL);
 
@@ -719,6 +764,8 @@
     back = &cstr[strlen(cstr)-1];
     while (strchr(WHITESPACES, *back)) {
         *back='\0';
+        if (back == cstr)
+            break;
         back--;
     }
 
@@ -764,7 +811,7 @@
             // if the cookie has already expired ignore it
             if (av_timegm(&new_tm) < av_gettime() / 1000000) {
                 av_dict_free(&new_params);
-                return -1;
+                return 0;
             }
 
             // only replace an older cookie with the same name
@@ -788,6 +835,7 @@
             }
         }
     }
+    av_dict_free(&new_params);
 
     // duplicate the cookie name (dict will dupe the value)
     if (!(eql = strchr(p, '='))) return AVERROR(EINVAL);
@@ -887,6 +935,14 @@
             }
             av_log(h, AV_LOG_TRACE, "HTTP version string: %s\n", version);
         } else {
+            if (av_strncasecmp(p, "HTTP/1.0", 8) == 0)
+                s->willclose = 1;
+            while (*p != '/' && *p != '\0')
+                p++;
+            while (*p == '/')
+                p++;
+            av_freep(&s->http_version);
+            s->http_version = av_strndup(p, 3);
             while (!av_isspace(*p) && *p != '\0')
                 p++;
             while (av_isspace(*p))
@@ -963,7 +1019,8 @@
 /**
  * Create a string containing cookie values for use as a HTTP cookie header
  * field value for a particular path and domain from the cookie values stored in
- * the HTTP protocol context. The cookie string is stored in *cookies.
+ * the HTTP protocol context. The cookie string is stored in *cookies, and may
+ * be NULL if there are no valid cookies.
  *
  * @return a negative value if an error condition occurred, 0 otherwise
  */
@@ -973,15 +1030,20 @@
     // cookie strings will look like Set-Cookie header field values.  Multiple
     // Set-Cookie fields will result in multiple values delimited by a newline
     int ret = 0;
-    char *cookie, *set_cookies = av_strdup(s->cookies), *next = set_cookies;
-
-    if (!set_cookies) return AVERROR(EINVAL);
+    char *cookie, *set_cookies, *next;
 
     // destroy any cookies in the dictionary.
     av_dict_free(&s->cookie_dict);
 
+    if (!s->cookies)
+        return 0;
+
+    next = set_cookies = av_strdup(s->cookies);
+    if (!next)
+        return AVERROR(ENOMEM);
+
     *cookies = NULL;
-    while ((cookie = av_strtok(next, "\n", &next))) {
+    while ((cookie = av_strtok(next, "\n", &next)) && !ret) {
         AVDictionary *cookie_params = NULL;
         AVDictionaryEntry *cookie_entry, *e;
 
@@ -991,23 +1053,19 @@
 
         // continue on to the next cookie if this one cannot be parsed
         if (parse_set_cookie(cookie, &cookie_params))
-            continue;
+            goto skip_cookie;
 
         // if the cookie has no value, skip it
         cookie_entry = av_dict_get(cookie_params, "", NULL, AV_DICT_IGNORE_SUFFIX);
-        if (!cookie_entry || !cookie_entry->value) {
-            av_dict_free(&cookie_params);
-            continue;
-        }
+        if (!cookie_entry || !cookie_entry->value)
+            goto skip_cookie;
 
         // if the cookie has expired, don't add it
         if ((e = av_dict_get(cookie_params, "expires", NULL, 0)) && e->value) {
             struct tm tm_buf = {0};
             if (!parse_set_cookie_expiry_time(e->value, &tm_buf)) {
-                if (av_timegm(&tm_buf) < av_gettime() / 1000000) {
-                    av_dict_free(&cookie_params);
-                    continue;
-                }
+                if (av_timegm(&tm_buf) < av_gettime() / 1000000)
+                    goto skip_cookie;
             }
         }
 
@@ -1015,42 +1073,32 @@
         if ((e = av_dict_get(cookie_params, "domain", NULL, 0)) && e->value) {
             // find the offset comparison is on the min domain (b.com, not a.b.com)
             int domain_offset = strlen(domain) - strlen(e->value);
-            if (domain_offset < 0) {
-                av_dict_free(&cookie_params);
-                continue;
-            }
+            if (domain_offset < 0)
+                goto skip_cookie;
 
             // match the cookie domain
-            if (av_strcasecmp(&domain[domain_offset], e->value)) {
-                av_dict_free(&cookie_params);
-                continue;
-            }
+            if (av_strcasecmp(&domain[domain_offset], e->value))
+                goto skip_cookie;
         }
 
         // ensure this cookie matches the path
         e = av_dict_get(cookie_params, "path", NULL, 0);
-        if (!e || av_strncasecmp(path, e->value, strlen(e->value))) {
-            av_dict_free(&cookie_params);
-            continue;
-        }
+        if (!e || av_strncasecmp(path, e->value, strlen(e->value)))
+            goto skip_cookie;
 
         // cookie parameters match, so copy the value
         if (!*cookies) {
-            if (!(*cookies = av_asprintf("%s=%s", cookie_entry->key, cookie_entry->value))) {
-                ret = AVERROR(ENOMEM);
-                break;
-            }
+            *cookies = av_asprintf("%s=%s", cookie_entry->key, cookie_entry->value);
         } else {
             char *tmp = *cookies;
-            size_t str_size = strlen(cookie_entry->key) + strlen(cookie_entry->value) + strlen(*cookies) + 4;
-            if (!(*cookies = av_malloc(str_size))) {
-                ret = AVERROR(ENOMEM);
-                av_free(tmp);
-                break;
-            }
-            snprintf(*cookies, str_size, "%s; %s=%s", tmp, cookie_entry->key, cookie_entry->value);
+            *cookies = av_asprintf("%s; %s=%s", tmp, cookie_entry->key, cookie_entry->value);
             av_free(tmp);
         }
+        if (!*cookies)
+            ret = AVERROR(ENOMEM);
+
+    skip_cookie:
+        av_dict_free(&cookie_params);
     }
 
     av_free(set_cookies);
@@ -1145,7 +1193,6 @@
 
 #if FF_API_HTTP_USER_AGENT
     if (strcmp(s->user_agent_deprecated, DEFAULT_USER_AGENT)) {
-        av_log(s, AV_LOG_WARNING, "the user-agent option is deprecated, please use user_agent option\n");
         s->user_agent = av_strdup(s->user_agent_deprecated);
     }
 #endif
@@ -1153,6 +1200,12 @@
     if (!has_header(s->headers, "\r\nUser-Agent: "))
         len += av_strlcatf(headers + len, sizeof(headers) - len,
                            "User-Agent: %s\r\n", s->user_agent);
+    if (s->referer) {
+        /* set default headers if needed */
+        if (!has_header(s->headers, "\r\nReferer: "))
+            len += av_strlcatf(headers + len, sizeof(headers) - len,
+                               "Referer: %s\r\n", s->referer);
+    }
     if (!has_header(s->headers, "\r\nAccept: "))
         len += av_strlcpy(headers + len, "Accept: */*\r\n",
                           sizeof(headers) - len);
@@ -1281,6 +1334,9 @@
     int len;
 
     if (s->chunksize != UINT64_MAX) {
+        if (s->chunkend) {
+            return AVERROR_EOF;
+        }
         if (!s->chunksize) {
             char line[32];
             int err;
@@ -1293,11 +1349,19 @@
             s->chunksize = strtoull(line, NULL, 16);
 
             av_log(h, AV_LOG_TRACE,
-                   "Chunked encoding data size: %"PRIu64"'\n",
+                   "Chunked encoding data size: %"PRIu64"\n",
                     s->chunksize);
 
-            if (!s->chunksize)
+            if (!s->chunksize && s->multiple_requests) {
+                http_get_line(s, line, sizeof(line)); // read empty chunk
+                s->chunkend = 1;
                 return 0;
+            }
+            else if (!s->chunksize) {
+                av_log(h, AV_LOG_DEBUG, "Last chunk received, closing conn\n");
+                ffurl_closep(&s->hd);
+                return 0;
+            }
             else if (s->chunksize == UINT64_MAX) {
                 av_log(h, AV_LOG_ERROR, "Invalid chunk size %"PRIu64"\n",
                        s->chunksize);
@@ -1377,6 +1441,7 @@
     HTTPContext *s = h->priv_data;
     int err, new_location, read_ret;
     int64_t seek_ret;
+    int reconnect_delay = 0;
 
     if (!s->hd)
         return AVERROR_EOF;
@@ -1392,25 +1457,35 @@
         return http_buf_read_compressed(h, buf, size);
 #endif /* CONFIG_ZLIB */
     read_ret = http_buf_read(h, buf, size);
-    if (   (read_ret  < 0 && s->reconnect        && (!h->is_streamed || s->reconnect_streamed) && s->filesize > 0 && s->off < s->filesize)
-        || (read_ret == 0 && s->reconnect_at_eof && (!h->is_streamed || s->reconnect_streamed))) {
+    while (read_ret < 0) {
         uint64_t target = h->is_streamed ? 0 : s->off;
 
-        if (s->reconnect_delay > s->reconnect_delay_max)
+        if (read_ret == AVERROR_EXIT)
+            break;
+
+        if (h->is_streamed && !s->reconnect_streamed)
+            break;
+
+        if (!(s->reconnect && s->filesize > 0 && s->off < s->filesize) &&
+            !(s->reconnect_at_eof && read_ret == AVERROR_EOF))
+            break;
+
+        if (reconnect_delay > s->reconnect_delay_max)
             return AVERROR(EIO);
 
-        av_log(h, AV_LOG_INFO, "Will reconnect at %"PRIu64" error=%s.\n", s->off, av_err2str(read_ret));
-        av_usleep(1000U*1000*s->reconnect_delay);
-        s->reconnect_delay = 1 + 2*s->reconnect_delay;
+        av_log(h, AV_LOG_WARNING, "Will reconnect at %"PRIu64" in %d second(s), error=%s.\n", s->off, reconnect_delay, av_err2str(read_ret));
+        err = ff_network_sleep_interruptible(1000U*1000*reconnect_delay, &h->interrupt_callback);
+        if (err != AVERROR(ETIMEDOUT))
+            return err;
+        reconnect_delay = 1 + 2*reconnect_delay;
         seek_ret = http_seek_internal(h, target, SEEK_SET, 1);
-        if (seek_ret != target) {
+        if (seek_ret >= 0 && seek_ret != target) {
             av_log(h, AV_LOG_ERROR, "Failed to reconnect at %"PRIu64".\n", target);
             return read_ret;
         }
 
         read_ret = http_buf_read(h, buf, size);
-    } else
-        s->reconnect_delay = 0;
+    }
 
     return read_ret;
 }
@@ -1547,6 +1622,18 @@
         ((flags & AVIO_FLAG_READ) && s->chunked_post && s->listen)) {
         ret = ffurl_write(s->hd, footer, sizeof(footer) - 1);
         ret = ret > 0 ? 0 : ret;
+        /* flush the receive buffer when it is write only mode */
+        if (!(flags & AVIO_FLAG_READ)) {
+            char buf[1024];
+            int read_ret;
+            s->hd->flags |= AVIO_FLAG_NONBLOCK;
+            read_ret = ffurl_read(s->hd, buf, sizeof(buf));
+            s->hd->flags &= ~AVIO_FLAG_NONBLOCK;
+            if (read_ret < 0 && read_ret != AVERROR(EAGAIN)) {
+                av_log(h, AV_LOG_ERROR, "URL read error:  %d\n", read_ret);
+                ret = read_ret;
+            }
+        }
         s->end_chunked_post = 1;
     }
 

diff --git a/libavformat/icecast.c b/libavformat/icecast.c
index 02e3e38..c93b06b 100644
--- a/libavformat/icecast.c
+++ b/libavformat/icecast.c

@@ -96,7 +96,7 @@
     if (flags & AVIO_FLAG_READ)
         return AVERROR(ENOSYS);
 
-    av_bprint_init(&bp, 0, 1);
+    av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
 
     // Build header strings
     cat_header(&bp, "Ice-Name", s->name);

diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
index 6c216ba..f7de26a 100644
--- a/libavformat/id3v2.c
+++ b/libavformat/id3v2.c

@@ -33,6 +33,7 @@
 #endif
 
 #include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
 #include "libavutil/dict.h"
 #include "libavutil/intreadwrite.h"
 #include "avio_internal.h"
@@ -975,19 +976,21 @@
                 }
             }
             if (unsync || tunsync) {
-                int64_t end = avio_tell(pb) + tlen;
-                uint8_t *b;
+                uint8_t *b = buffer;
+                uint8_t *t = buffer;
+                uint8_t *end = t + tlen;
 
-                b = buffer;
-                while (avio_tell(pb) < end && b - buffer < tlen && !pb->eof_reached) {
-                    *b++ = avio_r8(pb);
-                    if (*(b - 1) == 0xff && avio_tell(pb) < end - 1 &&
-                        b - buffer < tlen &&
-                        !pb->eof_reached ) {
-                        uint8_t val = avio_r8(pb);
-                        *b++ = val ? val : avio_r8(pb);
-                    }
+                if (avio_read(pb, buffer, tlen) != tlen) {
+                    av_log(s, AV_LOG_ERROR, "Failed to read tag data\n");
+                    goto seek;
                 }
+
+                while (t != end) {
+                    *b++ = *t++;
+                    if (t != end && t[-1] == 0xff && !t[0])
+                        t++;
+                }
+
                 ffio_init_context(&pb_local, buffer, b - buffer, 0, NULL, NULL, NULL,
                                   NULL);
                 tlen = b - buffer;
@@ -1224,3 +1227,50 @@
     av_freep(&chapters);
     return ret;
 }
+
+int ff_id3v2_parse_priv_dict(AVDictionary **metadata, ID3v2ExtraMeta **extra_meta)
+{
+    ID3v2ExtraMeta *cur;
+    int dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL;
+
+    for (cur = *extra_meta; cur; cur = cur->next) {
+        if (!strcmp(cur->tag, "PRIV")) {
+            ID3v2ExtraMetaPRIV *priv = cur->data;
+            AVBPrint bprint;
+            char *escaped, *key;
+            int i, ret;
+
+            if ((key = av_asprintf(ID3v2_PRIV_METADATA_PREFIX "%s", priv->owner)) == NULL) {
+                return AVERROR(ENOMEM);
+            }
+
+            av_bprint_init(&bprint, priv->datasize + 1, AV_BPRINT_SIZE_UNLIMITED);
+
+            for (i = 0; i < priv->datasize; i++) {
+                if (priv->data[i] < 32 || priv->data[i] > 126 || priv->data[i] == '\\') {
+                    av_bprintf(&bprint, "\\x%02x", priv->data[i]);
+                } else {
+                    av_bprint_chars(&bprint, priv->data[i], 1);
+                }
+            }
+
+            if ((ret = av_bprint_finalize(&bprint, &escaped)) < 0) {
+                av_free(key);
+                return ret;
+            }
+
+            if ((ret = av_dict_set(metadata, key, escaped, dict_flags)) < 0) {
+                av_free(key);
+                av_free(escaped);
+                return ret;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta **extra_meta)
+{
+    return ff_id3v2_parse_priv_dict(&s->metadata, extra_meta);
+}

diff --git a/libavformat/id3v2.h b/libavformat/id3v2.h
index 5e64ead..9de0bee 100644
--- a/libavformat/id3v2.h
+++ b/libavformat/id3v2.h

@@ -39,6 +39,8 @@
 #define ID3v2_FLAG_ENCRYPTION  0x0004
 #define ID3v2_FLAG_COMPRESSION 0x0008
 
+#define ID3v2_PRIV_METADATA_PREFIX "id3v2_priv."
+
 enum ID3v2Encoding {
     ID3v2_ENCODING_ISO8859  = 0,
     ID3v2_ENCODING_UTF16BOM = 1,
@@ -167,6 +169,19 @@
  */
 int ff_id3v2_parse_chapters(AVFormatContext *s, ID3v2ExtraMeta **extra_meta);
 
+/**
+ * Parse PRIV tags into a dictionary. The PRIV owner is the metadata key. The
+ * PRIV data is the value, with non-printable characters escaped.
+ */
+int ff_id3v2_parse_priv_dict(AVDictionary **d, ID3v2ExtraMeta **extra_meta);
+
+/**
+ * Add metadata for all PRIV tags in the ID3v2 header. The PRIV owner is the
+ * metadata key. The PRIV data is the value, with non-printable characters
+ * escaped.
+ */
+int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta **extra_meta);
+
 extern const AVMetadataConv ff_id3v2_34_metadata_conv[];
 extern const AVMetadataConv ff_id3v2_4_metadata_conv[];
 

diff --git a/libavformat/id3v2enc.c b/libavformat/id3v2enc.c
index 14de76a..ffe358f 100644
--- a/libavformat/id3v2enc.c
+++ b/libavformat/id3v2enc.c

@@ -96,6 +96,59 @@
     return len + ID3v2_HEADER_SIZE;
 }
 
+/**
+ * Write a priv frame with owner and data. 'key' is the owner prepended with
+ * ID3v2_PRIV_METADATA_PREFIX. 'data' is provided as a string. Any \xXX
+ * (where 'X' is a valid hex digit) will be unescaped to the byte value.
+ */
+static int id3v2_put_priv(ID3v2EncContext *id3, AVIOContext *avioc, const char *key, const char *data)
+{
+    int len;
+    uint8_t *pb;
+    AVIOContext *dyn_buf;
+
+    if (!av_strstart(key, ID3v2_PRIV_METADATA_PREFIX, &key)) {
+        return 0;
+    }
+
+    if (avio_open_dyn_buf(&dyn_buf) < 0)
+        return AVERROR(ENOMEM);
+
+    // owner + null byte.
+    avio_write(dyn_buf, key, strlen(key) + 1);
+
+    while (*data) {
+        if (av_strstart(data, "\\x", &data)) {
+            if (data[0] && data[1] && av_isxdigit(data[0]) && av_isxdigit(data[1])) {
+                char digits[] = {data[0], data[1], 0};
+                avio_w8(dyn_buf, strtol(digits, NULL, 16));
+                data += 2;
+            } else {
+                ffio_free_dyn_buf(&dyn_buf);
+                av_log(avioc, AV_LOG_ERROR, "Invalid escape '\\x%.2s' in metadata tag '"
+                       ID3v2_PRIV_METADATA_PREFIX "%s'.\n", data, key);
+                return AVERROR(EINVAL);
+            }
+        } else {
+            avio_write(dyn_buf, data++, 1);
+        }
+    }
+
+    len = avio_close_dyn_buf(dyn_buf, &pb);
+
+    avio_wb32(avioc, MKBETAG('P', 'R', 'I', 'V'));
+    if (id3->version == 3)
+        avio_wb32(avioc, len);
+    else
+        id3v2_put_size(avioc, len);
+    avio_wb16(avioc, 0);
+    avio_write(avioc, pb, len);
+
+    av_free(pb);
+
+    return len + ID3v2_HEADER_SIZE;
+}
+
 static int id3v2_check_write_tag(ID3v2EncContext *id3, AVIOContext *pb, AVDictionaryEntry *t,
                                  const char table[][4], enum ID3v2Encoding enc)
 {
@@ -186,6 +239,13 @@
             continue;
         }
 
+        if ((ret = id3v2_put_priv(id3, pb, t->key, t->value)) > 0) {
+            id3->len += ret;
+            continue;
+        } else if (ret < 0) {
+            return ret;
+        }
+
         /* unknown tag, write as TXXX frame */
         if ((ret = id3v2_put_ttag(id3, pb, t->key, t->value, MKBETAG('T', 'X', 'X', 'X'), enc)) < 0)
             return ret;

diff --git a/libavformat/img2dec.c b/libavformat/img2dec.c
index ecf64ea..ff4757e 100644
--- a/libavformat/img2dec.c
+++ b/libavformat/img2dec.c

@@ -34,6 +34,7 @@
 #include "internal.h"
 #include "img2.h"
 #include "libavcodec/mjpeg.h"
+#include "libavcodec/xwd.h"
 #include "subtitles.h"
 
 #if HAVE_GLOB
@@ -198,7 +199,7 @@
         return AVERROR(EINVAL);
     }
 
-    av_strlcpy(s->path, s1->filename, sizeof(s->path));
+    av_strlcpy(s->path, s1->url, sizeof(s->path));
     s->img_number = 0;
     s->img_count  = 0;
 
@@ -323,7 +324,8 @@
         if (s1->pb) {
             int probe_buffer_size = 2048;
             uint8_t *probe_buffer = av_realloc(NULL, probe_buffer_size + AVPROBE_PADDING_SIZE);
-            AVInputFormat *fmt = NULL;
+            const AVInputFormat *fmt = NULL;
+            void *fmt_iter = NULL;
             AVProbeData pd = { 0 };
 
             if (!probe_buffer)
@@ -338,9 +340,9 @@
 
             pd.buf = probe_buffer;
             pd.buf_size = probe_buffer_size;
-            pd.filename = s1->filename;
+            pd.filename = s1->url;
 
-            while ((fmt = av_iformat_next(fmt))) {
+            while ((fmt = av_demuxer_iterate(&fmt_iter))) {
                 if (fmt->read_header != ff_img_read_header ||
                     !fmt->read_probe ||
                     (fmt->flags & AVFMT_NOFILE) ||
@@ -878,10 +880,14 @@
 {
     const uint8_t *b = p->buf;
     const uint8_t *end = p->buf + p->buf_size;
+
     if (memcmp(p->buf, "<?xml", 5))
         return 0;
     while (b < end) {
-        b += ff_subtitles_next_line(b);
+        int inc = ff_subtitles_next_line(b);
+        if (!inc)
+            break;
+        b += inc;
         if (b >= end - 4)
             return 0;
         if (!memcmp(b, "<svg", 4))
@@ -969,6 +975,36 @@
     return 0;
 }
 
+static int xwd_probe(AVProbeData *p)
+{
+    const uint8_t *b = p->buf;
+    unsigned width, bpp, bpad, lsize;
+
+    if (   p->buf_size < XWD_HEADER_SIZE
+        || AV_RB32(b     ) < XWD_HEADER_SIZE                          // header size
+        || AV_RB32(b +  4) != XWD_VERSION                             // version
+        || AV_RB32(b +  8) != XWD_Z_PIXMAP                            // format
+        || AV_RB32(b + 12) > 32 || !AV_RB32(b + 12)                   // depth
+        || AV_RB32(b + 16) == 0                                       // width
+        || AV_RB32(b + 20) == 0                                       // height
+        || AV_RB32(b + 28) > 1                                        // byteorder
+        || AV_RB32(b + 32) & ~56 || av_popcount(AV_RB32(b + 32)) != 1 // bitmap unit
+        || AV_RB32(b + 36) > 1                                        // bitorder
+        || AV_RB32(b + 40) & ~56 || av_popcount(AV_RB32(b + 40)) != 1 // padding
+        || AV_RB32(b + 44) > 32 || !AV_RB32(b + 44)                   // bpp
+        || AV_RB32(b + 68) > 256)                                     // colours
+        return 0;
+
+    width = AV_RB32(b + 16);
+    bpad  = AV_RB32(b + 40);
+    bpp   = AV_RB32(b + 44);
+    lsize = AV_RB32(b + 48);
+    if (lsize < FFALIGN(width * bpp, bpad) >> 3)
+        return 0;
+
+    return AVPROBE_SCORE_MAX / 2 + 1;
+}
+
 #define IMAGEAUTO_DEMUXER(imgname, codecid)\
 static const AVClass imgname ## _class = {\
     .class_name = AV_STRINGIFY(imgname) " demuxer",\
@@ -1011,3 +1047,4 @@
 IMAGEAUTO_DEMUXER(tiff,    AV_CODEC_ID_TIFF)
 IMAGEAUTO_DEMUXER(webp,    AV_CODEC_ID_WEBP)
 IMAGEAUTO_DEMUXER(xpm,     AV_CODEC_ID_XPM)
+IMAGEAUTO_DEMUXER(xwd,     AV_CODEC_ID_XWD)

diff --git a/libavformat/img2enc.c b/libavformat/img2enc.c
index d793807..a09cc8e 100644
--- a/libavformat/img2enc.c
+++ b/libavformat/img2enc.c

@@ -42,6 +42,7 @@
     char target[4][1024];
     int update;
     int use_strftime;
+    int frame_pts;
     const char *muxer;
     int use_rename;
 } VideoMuxData;
@@ -52,7 +53,7 @@
     AVStream *st = s->streams[0];
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(st->codecpar->format);
 
-    av_strlcpy(img->path, s->filename, sizeof(img->path));
+    av_strlcpy(img->path, s->url, sizeof(img->path));
 
     /* find format */
     if (s->oformat->flags & AVFMT_NOFILE)
@@ -99,12 +100,17 @@
                 av_log(s, AV_LOG_ERROR, "Could not get frame filename with strftime\n");
                 return AVERROR(EINVAL);
             }
+        } else if (img->frame_pts) {
+            if (av_get_frame_filename2(filename, sizeof(filename), img->path, pkt->pts, AV_FRAME_FILENAME_FLAGS_MULTIPLE) < 0) {
+                av_log(s, AV_LOG_ERROR, "Cannot write filename by pts of the frames.");
+                return AVERROR(EINVAL);
+            }
         } else if (av_get_frame_filename2(filename, sizeof(filename), img->path,
                                           img->img_number,
                                           AV_FRAME_FILENAME_FLAGS_MULTIPLE) < 0 &&
                    img->img_number > 1) {
             av_log(s, AV_LOG_ERROR,
-                   "Could not get frame filename number %d from pattern '%s' (either set updatefirst or use a pattern like %%03d within the filename pattern)\n",
+                   "Could not get frame filename number %d from pattern '%s' (either set update or use a pattern like %%03d within the filename pattern)\n",
                    img->img_number, img->path);
             return AVERROR(EINVAL);
         }
@@ -150,7 +156,7 @@
 
         av_assert0(!img->split_planes);
 
-        ret = avformat_alloc_output_context2(&fmt, NULL, img->muxer, s->filename);
+        ret = avformat_alloc_output_context2(&fmt, NULL, img->muxer, s->url);
         if (ret < 0)
             return ret;
         st = avformat_new_stream(fmt, NULL);
@@ -203,10 +209,10 @@
 #define OFFSET(x) offsetof(VideoMuxData, x)
 #define ENC AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption muxoptions[] = {
-    { "updatefirst",  "continuously overwrite one file", OFFSET(update),  AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0,       1, ENC },
     { "update",       "continuously overwrite one file", OFFSET(update),  AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0,       1, ENC },
     { "start_number", "set first number in the sequence", OFFSET(img_number), AV_OPT_TYPE_INT,  { .i64 = 1 }, 0, INT_MAX, ENC },
     { "strftime",     "use strftime for filename", OFFSET(use_strftime),  AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, ENC },
+    { "frame_pts",    "use current frame pts for filename", OFFSET(frame_pts),  AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, ENC },
     { "atomic_writing", "write files atomically (using temporary files and renames)", OFFSET(use_rename), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, ENC },
     { NULL },
 };

diff --git a/libavformat/internal.h b/libavformat/internal.h
index d136c79..399d0a6 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h

@@ -121,12 +121,6 @@
     int avoid_negative_ts_use_pts;
 
     /**
-     * Whether or not a header has already been written
-     */
-    int header_written;
-    int write_header_ret;
-
-    /**
      * Timestamp of the end of the shortest stream.
      */
     int64_t shortest_end;
@@ -196,6 +190,8 @@
      * Whether the internal avctx needs to be updated from codecpar (after a late change to codecpar)
      */
     int need_context_update;
+
+    FFFrac *priv_pts;
 };
 
 #ifdef __GNUC__
@@ -215,6 +211,14 @@
 
 struct tm *ff_brktimegm(time_t secs, struct tm *tm);
 
+/**
+ * Automatically create sub-directories
+ *
+ * @param path will create sub-directories by path
+ * @return 0, or < 0 on error
+ */
+int ff_mkdir_p(const char *path);
+
 char *ff_data_to_hex(char *buf, const uint8_t *src, int size, int lowercase);
 
 /**
@@ -244,6 +248,14 @@
 uint64_t ff_ntp_time(void);
 
 /**
+ * Get the NTP time stamp formatted as per the RFC-5905.
+ *
+ * @param ntp_time NTP time in micro seconds (since NTP epoch)
+ * @return the formatted NTP time stamp
+ */
+uint64_t ff_get_formatted_ntp_time(uint64_t ntp_time_us);
+
+/**
  * Append the media-specific SDP fragment for the media stream c
  * to the buffer buff.
  *
@@ -303,6 +315,42 @@
  */
 int ff_get_line(AVIOContext *s, char *buf, int maxlen);
 
+/**
+ * Same as ff_get_line but strip the white-space characters in the text tail
+ *
+ * @param s the read-only AVIOContext
+ * @param buf buffer to store the read line
+ * @param maxlen size of the buffer
+ * @return the length of the string written in the buffer
+ */
+int ff_get_chomp_line(AVIOContext *s, char *buf, int maxlen);
+
+/**
+ * Read a whole line of text from AVIOContext to an AVBPrint buffer. Stop
+ * reading after reaching a \\r, a \\n, a \\r\\n, a \\0 or EOF.  The line
+ * ending characters are NOT included in the buffer, but they are skipped on
+ * the input.
+ *
+ * @param s the read-only AVIOContext
+ * @param bp the AVBPrint buffer
+ * @return the length of the read line, not including the line endings,
+ *         negative on error.
+ */
+int64_t ff_read_line_to_bprint(AVIOContext *s, AVBPrint *bp);
+
+/**
+ * Read a whole line of text from AVIOContext to an AVBPrint buffer overwriting
+ * its contents. Stop reading after reaching a \\r, a \\n, a \\r\\n, a \\0 or
+ * EOF. The line ending characters are NOT included in the buffer, but they
+ * are skipped on the input.
+ *
+ * @param s the read-only AVIOContext
+ * @param bp the AVBPrint buffer
+ * @return the length of the read line not including the line endings,
+ *         negative on error, or if the buffer becomes truncated.
+ */
+int64_t ff_read_line_to_bprint_overwrite(AVIOContext *s, AVBPrint *bp);
+
 #define SPACE_CHARS " \t\r\n"
 
 /**
@@ -545,8 +593,11 @@
     int ret = 0;
     if (rename(oldpath, newpath) == -1) {
         ret = AVERROR(errno);
-        if (logctx)
-            av_log(logctx, AV_LOG_ERROR, "failed to rename file %s to %s\n", oldpath, newpath);
+        if (logctx) {
+            char err[AV_ERROR_MAX_STRING_SIZE] = {0};
+            av_make_error_string(err, AV_ERROR_MAX_STRING_SIZE, ret);
+            av_log(logctx, AV_LOG_ERROR, "failed to rename file %s to %s: %s\n", oldpath, newpath, err);
+        }
     }
     return ret;
 }
@@ -555,6 +606,8 @@
  * Allocate extradata with additional AV_INPUT_BUFFER_PADDING_SIZE at end
  * which is always set to 0.
  *
+ * Previously allocated extradata in par will be freed.
+ *
  * @param size size of extradata
  * @return 0 if OK, AVERROR_xxx on error
  */
@@ -623,6 +676,14 @@
 void ff_format_io_close(AVFormatContext *s, AVIOContext **pb);
 
 /**
+ * Utility function to check if the file uses http or https protocol
+ *
+ * @param s AVFormatContext
+ * @param filename URL or file name to open for writing
+ */
+int ff_is_http_proto(char *filename);
+
+/**
  * Parse creation_time in AVFormatContext metadata if exists and warn if the
  * parsing fails.
  *
@@ -685,4 +746,55 @@
 int ff_interleaved_peek(AVFormatContext *s, int stream,
                         AVPacket *pkt, int add_offset);
 
+
+int ff_lock_avformat(void);
+int ff_unlock_avformat(void);
+
+/**
+ * Set AVFormatContext url field to the provided pointer. The pointer must
+ * point to a valid string. The existing url field is freed if necessary. Also
+ * set the legacy filename field to the same string which was provided in url.
+ */
+void ff_format_set_url(AVFormatContext *s, char *url);
+
+#define FF_PACKETLIST_FLAG_REF_PACKET (1 << 0) /**< Create a new reference for the packet instead of
+                                                    transferring the ownership of the existing one to the
+                                                    list. */
+
+/**
+ * Append an AVPacket to the list.
+ *
+ * @param head  List head element
+ * @param tail  List tail element
+ * @param pkt   The packet being appended
+ * @param flags Any combination of FF_PACKETLIST_FLAG_* flags
+ * @return 0 on success, negative AVERROR value on failure. On failure,
+           the list is unchanged
+ */
+int ff_packet_list_put(AVPacketList **head, AVPacketList **tail,
+                       AVPacket *pkt, int flags);
+
+/**
+ * Remove the oldest AVPacket in the list and return it.
+ *
+ * @note The pkt will be overwritten completely. The caller owns the
+ *       packet and must unref it by itself.
+ *
+ * @param head List head element
+ * @param tail List tail element
+ * @param pkt  Pointer to an initialized AVPacket struct
+ */
+int ff_packet_list_get(AVPacketList **head, AVPacketList **tail,
+                       AVPacket *pkt);
+
+/**
+ * Wipe the list and unref all the packets in it.
+ *
+ * @param head List head element
+ * @param tail List tail element
+ */
+void ff_packet_list_free(AVPacketList **head, AVPacketList **tail);
+
+void avpriv_register_devices(const AVOutputFormat * const o[], const AVInputFormat * const i[]);
+
 #endif /* AVFORMAT_INTERNAL_H */

diff --git a/libavformat/ip.c b/libavformat/ip.c
new file mode 100644
index 0000000..70c5529
--- /dev/null
+++ b/libavformat/ip.c

@@ -0,0 +1,159 @@
+/*
+ * IP common code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with FFmpeg; if not, write to the Free Software * Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "ip.h"
+#include "libavutil/avstring.h"
+
+static int compare_addr(const struct sockaddr_storage *a,
+                        const struct sockaddr_storage *b)
+{
+    if (a->ss_family != b->ss_family)
+        return 1;
+    if (a->ss_family == AF_INET) {
+        return (((const struct sockaddr_in *)a)->sin_addr.s_addr !=
+                ((const struct sockaddr_in *)b)->sin_addr.s_addr);
+    }
+
+#if HAVE_STRUCT_SOCKADDR_IN6
+    if (a->ss_family == AF_INET6) {
+        const uint8_t *s6_addr_a = ((const struct sockaddr_in6 *)a)->sin6_addr.s6_addr;
+        const uint8_t *s6_addr_b = ((const struct sockaddr_in6 *)b)->sin6_addr.s6_addr;
+        return memcmp(s6_addr_a, s6_addr_b, 16);
+    }
+#endif
+    return 1;
+}
+
+int ff_ip_check_source_lists(struct sockaddr_storage *source_addr_ptr, IPSourceFilters *s)
+{
+    int i;
+    if (s->nb_exclude_addrs) {
+        for (i = 0; i < s->nb_exclude_addrs; i++) {
+            if (!compare_addr(source_addr_ptr, &s->exclude_addrs[i]))
+                return 1;
+        }
+    }
+    if (s->nb_include_addrs) {
+        for (i = 0; i < s->nb_include_addrs; i++) {
+            if (!compare_addr(source_addr_ptr, &s->include_addrs[i]))
+                return 0;
+        }
+        return 1;
+    }
+    return 0;
+}
+
+struct addrinfo *ff_ip_resolve_host(void *log_ctx,
+                                    const char *hostname, int port,
+                                    int type, int family, int flags)
+{
+    struct addrinfo hints = { 0 }, *res = 0;
+    int error;
+    char sport[16];
+    const char *node = 0, *service = "0";
+
+    if (port > 0) {
+        snprintf(sport, sizeof(sport), "%d", port);
+        service = sport;
+    }
+    if ((hostname) && (hostname[0] != '\0') && (hostname[0] != '?')) {
+        node = hostname;
+    }
+    hints.ai_socktype = type;
+    hints.ai_family   = family;
+    hints.ai_flags    = flags;
+    if ((error = getaddrinfo(node, service, &hints, &res))) {
+        res = NULL;
+        av_log(log_ctx, AV_LOG_ERROR, "getaddrinfo(%s, %s): %s\n",
+               node ? node : "unknown",
+               service,
+               gai_strerror(error));
+    }
+
+    return res;
+}
+
+
+static int ip_parse_addr_list(void *log_ctx, const char *buf,
+                              struct sockaddr_storage **address_list_ptr,
+                              int *address_list_size_ptr)
+{
+    struct addrinfo *ai = NULL;
+
+    /* Resolve all of the IPs */
+
+    while (buf && buf[0]) {
+        char* host = av_get_token(&buf, ",");
+        if (!host)
+            return AVERROR(ENOMEM);
+
+        ai = ff_ip_resolve_host(log_ctx, host, 0, SOCK_DGRAM, AF_UNSPEC, 0);
+        av_freep(&host);
+
+        if (ai) {
+            struct sockaddr_storage source_addr = {0};
+            memcpy(&source_addr, ai->ai_addr, ai->ai_addrlen);
+            freeaddrinfo(ai);
+            av_dynarray2_add((void **)address_list_ptr, address_list_size_ptr, sizeof(source_addr), (uint8_t *)&source_addr);
+            if (!*address_list_ptr)
+                return AVERROR(ENOMEM);
+        } else {
+            return AVERROR(EINVAL);
+        }
+
+        if (*buf)
+            buf++;
+    }
+
+    return 0;
+}
+
+static int ip_parse_sources_and_blocks(void *log_ctx, const char *buf, IPSourceFilters *filters, int parse_include_list)
+{
+    int ret;
+    if (parse_include_list)
+        ret = ip_parse_addr_list(log_ctx, buf, &filters->include_addrs, &filters->nb_include_addrs);
+    else
+        ret = ip_parse_addr_list(log_ctx, buf, &filters->exclude_addrs, &filters->nb_exclude_addrs);
+
+    if (ret >= 0 && filters->nb_include_addrs && filters->nb_exclude_addrs) {
+        av_log(log_ctx, AV_LOG_ERROR, "Simultaneously including and excluding sources is not supported.\n");
+        return AVERROR(EINVAL);
+    }
+    return ret;
+}
+
+int ff_ip_parse_sources(void *log_ctx, const char *buf, IPSourceFilters *filters)
+{
+    return ip_parse_sources_and_blocks(log_ctx, buf, filters, 1);
+}
+
+int ff_ip_parse_blocks(void *log_ctx, const char *buf, IPSourceFilters *filters)
+{
+    return ip_parse_sources_and_blocks(log_ctx, buf, filters, 0);
+}
+
+void ff_ip_reset_filters(IPSourceFilters *filters)
+{
+    av_freep(&filters->exclude_addrs);
+    av_freep(&filters->include_addrs);
+    filters->nb_include_addrs = 0;
+    filters->nb_exclude_addrs = 0;
+}

diff --git a/libavformat/ip.h b/libavformat/ip.h
new file mode 100644
index 0000000..b76cdab
--- /dev/null
+++ b/libavformat/ip.h

@@ -0,0 +1,72 @@
+/*
+ * IP common code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with FFmpeg; if not, write to the Free Software * Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFORMAT_IP_H
+#define AVFORMAT_IP_H
+
+#include "network.h"
+
+/**
+ * Structure for storing IP (UDP) source filters or block lists.
+ */
+typedef struct IPSourceFilters {
+    int nb_include_addrs;
+    int nb_exclude_addrs;
+    struct sockaddr_storage *include_addrs;
+    struct sockaddr_storage *exclude_addrs;
+} IPSourceFilters;
+
+/**
+ * Checks the source address against a given IP source filter.
+ * @return 0 if packet should be processed based on the filter, 1 if the packet
+ *         can be dropped.
+ */
+int ff_ip_check_source_lists(struct sockaddr_storage *source_addr_ptr, IPSourceFilters *s);
+
+/**
+ * Resolves hostname into an addrinfo structure.
+ * @return addrinfo structure which should be freed by the user, NULL in case
+ *         of error.
+ */
+struct addrinfo *ff_ip_resolve_host(void *log_ctx,
+                                    const char *hostname, int port,
+                                    int type, int family, int flags);
+
+/**
+ * Parses the address[,address] source list in buf and adds it to the filters
+ * in the IPSourceFilters structure.
+ * @return 0 on success, < 0 AVERROR code on error.
+ */
+int ff_ip_parse_sources(void *log_ctx, const char *buf, IPSourceFilters *filters);
+
+/**
+ * Parses the address[,address] source block list in buf and adds it to the
+ * filters in the IPSourceFilters structure.
+ * @return 0 on success, < 0 AVERROR code on error.
+ */
+int ff_ip_parse_blocks(void *log_ctx, const char *buf, IPSourceFilters *filters);
+
+/**
+ * Resets the IP filter list and frees the internal fields of an
+ * IPSourceFilters structure.
+ */
+void ff_ip_reset_filters(IPSourceFilters *filters);
+
+#endif /* AVFORMAT_IP_H */

diff --git a/libavformat/isom.c b/libavformat/isom.c
index 77983c5..ca9d22e 100644
--- a/libavformat/isom.c
+++ b/libavformat/isom.c

@@ -186,7 +186,9 @@
     { AV_CODEC_ID_H264, MKTAG('x', 'a', 'l', 'g') }, /* XAVC-L HD422 produced by FCP */
     { AV_CODEC_ID_H264, MKTAG('a', 'v', 'l', 'g') }, /* Panasonic P2 AVC-LongG */
 
+    { AV_CODEC_ID_VP8,  MKTAG('v', 'p', '0', '8') }, /* VP8 */
     { AV_CODEC_ID_VP9,  MKTAG('v', 'p', '0', '9') }, /* VP9 */
+    { AV_CODEC_ID_AV1,  MKTAG('a', 'v', '0', '1') }, /* AV1 */
 
     { AV_CODEC_ID_MPEG1VIDEO, MKTAG('m', '1', 'v', ' ') },
     { AV_CODEC_ID_MPEG1VIDEO, MKTAG('m', '1', 'v', '1') }, /* Apple MPEG-1 Camcorder */
@@ -372,6 +374,11 @@
     { AV_CODEC_ID_NONE, 0 },
 };
 
+const AVCodecTag ff_codec_movdata_tags[] = {
+    { AV_CODEC_ID_BIN_DATA, MKTAG('g', 'p', 'm', 'd') },
+    { AV_CODEC_ID_NONE, 0 },
+};
+
 /* map numeric codes from mdhd atom to ISO 639 */
 /* cf. QTFileFormat.pdf p253, qtff.pdf p205 */
 /* http://developer.apple.com/documentation/mac/Text/Text-368.html */
@@ -524,8 +531,7 @@
     if (tag == MP4DecSpecificDescrTag) {
         av_log(fc, AV_LOG_TRACE, "Specific MPEG-4 header len=%d\n", len);
         if (!len || (uint64_t)len > (1<<30))
-            return -1;
-        av_free(st->codecpar->extradata);
+            return AVERROR_INVALIDDATA;
         if ((ret = ff_get_extradata(fc, st->codecpar, pb, len)) < 0)
             return ret;
         if (st->codecpar->codec_id == AV_CODEC_ID_AAC) {

diff --git a/libavformat/isom.h b/libavformat/isom.h
index b9380e9..e629663 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h

@@ -27,6 +27,7 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#include "libavutil/encryption_info.h"
 #include "libavutil/mastering_display_metadata.h"
 #include "libavutil/spherical.h"
 #include "libavutil/stereo3d.h"
@@ -40,6 +41,7 @@
 extern const AVCodecTag ff_codec_movvideo_tags[];
 extern const AVCodecTag ff_codec_movaudio_tags[];
 extern const AVCodecTag ff_codec_movsubtitle_tags[];
+extern const AVCodecTag ff_codec_movdata_tags[];
 
 int ff_mov_iso639_to_lang(const char lang[4], int mp4);
 int ff_mov_lang_to_iso639(unsigned code, char to[4]);
@@ -93,7 +95,6 @@
     unsigned duration;
     unsigned size;
     unsigned flags;
-    int64_t time;
 } MOVFragment;
 
 typedef struct MOVTrackExt {
@@ -109,17 +110,42 @@
     unsigned int index;
 } MOVSbgp;
 
+typedef struct MOVEncryptionIndex {
+    // Individual encrypted samples.  If there are no elements, then the default
+    // settings will be used.
+    unsigned int nb_encrypted_samples;
+    AVEncryptionInfo **encrypted_samples;
+
+    uint8_t* auxiliary_info_sizes;
+    size_t auxiliary_info_sample_count;
+    uint8_t auxiliary_info_default_size;
+    uint64_t* auxiliary_offsets;  ///< Absolute seek position
+    size_t auxiliary_offsets_count;
+} MOVEncryptionIndex;
+
+typedef struct MOVFragmentStreamInfo {
+    int id;
+    int64_t sidx_pts;
+    int64_t first_tfra_pts;
+    int64_t tfdt_dts;
+    int index_entry;
+    MOVEncryptionIndex *encryption_index;
+} MOVFragmentStreamInfo;
+
 typedef struct MOVFragmentIndexItem {
     int64_t moof_offset;
-    int64_t time;
     int headers_read;
+    int current;
+    int nb_stream_info;
+    MOVFragmentStreamInfo * stream_info;
 } MOVFragmentIndexItem;
 
 typedef struct MOVFragmentIndex {
-    unsigned track_id;
-    unsigned item_count;
-    unsigned current_item;
-    MOVFragmentIndexItem *items;
+    int allocated_size;
+    int complete;
+    int current;
+    int nb_items;
+    MOVFragmentIndexItem * item;
 } MOVFragmentIndex;
 
 typedef struct MOVIndexRange {
@@ -158,6 +184,7 @@
     int *keyframes;
     int time_scale;
     int64_t time_offset;  ///< time offset of the edit list entries
+    int64_t min_corrected_pts;  ///< minimum Composition time shown by the edits excluding empty edits.
     int current_sample;
     int64_t current_index;
     MOVIndexRange* index_ranges;
@@ -191,6 +218,7 @@
     int *extradata_size;
     int last_stsd_index;
     int stsd_count;
+    int stsd_version;
 
     int32_t *display_matrix;
     AVStereo3D *stereo3d;
@@ -204,15 +232,10 @@
 
     int has_sidx;  // If there is an sidx entry for this stream.
     struct {
-        int use_subsamples;
-        uint8_t* auxiliary_info;
-        uint8_t* auxiliary_info_end;
-        uint8_t* auxiliary_info_pos;
-        uint8_t auxiliary_info_default_size;
-        uint8_t* auxiliary_info_sizes;
-        size_t auxiliary_info_sizes_count;
-        int64_t auxiliary_info_index;
         struct AVAESCTR* aes_ctr;
+        unsigned int per_sample_iv_size;  // Either 0, 8, or 16.
+        AVEncryptionInfo *default_encrypted_sample;
+        MOVEncryptionIndex *encryption_index;
     } cenc;
 } MOVStreamContext;
 
@@ -250,9 +273,7 @@
     int moov_retry;
     int use_mfra_for;
     int has_looked_for_mfra;
-    MOVFragmentIndex** fragment_index_data;
-    unsigned fragment_index_count;
-    int fragment_index_complete;
+    MOVFragmentIndex frag_index;
     int atom_depth;
     unsigned int aax_mode;  ///< 'aax' file has been detected
     uint8_t file_key[20];
@@ -310,6 +331,11 @@
 #define MOV_TKHD_FLAG_IN_PREVIEW    0x0004
 #define MOV_TKHD_FLAG_IN_POSTER     0x0008
 
+#define MOV_SAMPLE_DEPENDENCY_UNKNOWN 0x0
+#define MOV_SAMPLE_DEPENDENCY_YES     0x1
+#define MOV_SAMPLE_DEPENDENCY_NO      0x2
+
+
 #define TAG_IS_AVCI(tag)                    \
     ((tag) == MKTAG('a', 'i', '5', 'p') ||  \
      (tag) == MKTAG('a', 'i', '5', 'q') ||  \
@@ -328,7 +354,6 @@
 
 
 int ff_mov_read_esds(AVFormatContext *fc, AVIOContext *pb);
-enum AVCodecID ff_mov_get_lpcm_codec_id(int bps, int flags);
 
 int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries);
 void ff_mov_write_chan(AVIOContext *pb, int64_t channel_layout);
@@ -337,4 +362,18 @@
 #define FF_MOV_FLAG_MFRA_DTS 1
 #define FF_MOV_FLAG_MFRA_PTS 2
 
+/**
+ * Compute codec id for 'lpcm' tag.
+ * See CoreAudioTypes and AudioStreamBasicDescription at Apple.
+ */
+static inline enum AVCodecID ff_mov_get_lpcm_codec_id(int bps, int flags)
+{
+    /* lpcm flags:
+     * 0x1 = float
+     * 0x2 = big-endian
+     * 0x4 = signed
+     */
+    return ff_get_pcm_codec_id(bps, flags & 1, flags & 2, flags & 4 ? -1 : 0);
+}
+
 #endif /* AVFORMAT_ISOM_H */

diff --git a/libavformat/ivfenc.c b/libavformat/ivfenc.c
index fdc0ee0..66441a2 100644
--- a/libavformat/ivfenc.c
+++ b/libavformat/ivfenc.c

@@ -37,14 +37,18 @@
     }
     par = s->streams[0]->codecpar;
     if (par->codec_type != AVMEDIA_TYPE_VIDEO ||
-        !(par->codec_id == AV_CODEC_ID_VP8 || par->codec_id == AV_CODEC_ID_VP9)) {
-        av_log(s, AV_LOG_ERROR, "Currently only VP8 and VP9 are supported!\n");
+        !(par->codec_id == AV_CODEC_ID_AV1 ||
+          par->codec_id == AV_CODEC_ID_VP8 ||
+          par->codec_id == AV_CODEC_ID_VP9)) {
+        av_log(s, AV_LOG_ERROR, "Currently only VP8, VP9 and AV1 are supported!\n");
         return AVERROR(EINVAL);
     }
     avio_write(pb, "DKIF", 4);
     avio_wl16(pb, 0); // version
     avio_wl16(pb, 32); // header length
-    avio_wl32(pb, par->codec_tag ? par->codec_tag : par->codec_id == AV_CODEC_ID_VP9 ? AV_RL32("VP90") : AV_RL32("VP80"));
+    avio_wl32(pb,
+              par->codec_id == AV_CODEC_ID_VP9 ? AV_RL32("VP90") :
+              par->codec_id == AV_CODEC_ID_VP8 ? AV_RL32("VP80") : AV_RL32("AV01"));
     avio_wl16(pb, par->width);
     avio_wl16(pb, par->height);
     avio_wl32(pb, s->streams[0]->time_base.den);
@@ -100,6 +104,7 @@
 static const AVCodecTag codec_ivf_tags[] = {
     { AV_CODEC_ID_VP8,  MKTAG('V', 'P', '8', '0') },
     { AV_CODEC_ID_VP9,  MKTAG('V', 'P', '9', '0') },
+    { AV_CODEC_ID_AV1,  MKTAG('A', 'V', '0', '1') },
     { AV_CODEC_ID_NONE, 0 }
 };
 

diff --git a/libavformat/latmenc.c b/libavformat/latmenc.c
index c919976..273197b 100644
--- a/libavformat/latmenc.c
+++ b/libavformat/latmenc.c

@@ -128,7 +128,7 @@
                 int ret = init_get_bits8(&gb, par->extradata, par->extradata_size);
                 av_assert0(ret >= 0); // extradata size has been checked already, so this should not fail
                 skip_bits_long(&gb, ctx->off + 3);
-                avpriv_copy_pce_data(bs, &gb);
+                ff_copy_pce_data(bs, &gb);
             }
         }
 

diff --git a/libavformat/libavformat.v b/libavformat/libavformat.v
index c961cd8..47d5ddc 100644
--- a/libavformat/libavformat.v
+++ b/libavformat/libavformat.v

@@ -1,19 +1,6 @@
 LIBAVFORMAT_MAJOR {
     global:
         av*;
-        #FIXME those are for ffserver
-        ff_inet_aton;
-        ff_socket_nonblock;
-        ff_rtsp_parse_line;
-        ff_rtp_get_local_rtp_port;
-        ff_rtp_get_local_rtcp_port;
-        ffio_open_dyn_packet_buf;
-        ffio_set_buf_size;
-        ffurl_close;
-        ffurl_open;
-        ffurl_write;
-        #those are deprecated, remove on next bump
-        url_feof;
     local:
         *;
 };

diff --git a/libavformat/libopenmpt.c b/libavformat/libopenmpt.c
index af6eb1a..0fff702 100644
--- a/libavformat/libopenmpt.c
+++ b/libavformat/libopenmpt.c

@@ -21,6 +21,14 @@
 
 #include <libopenmpt/libopenmpt.h>
 #include <libopenmpt/libopenmpt_stream_callbacks_file.h>
+#include <libopenmpt/libopenmpt_version.h>
+/* Shims to support libopenmpt < 0.3.0 (as documented by libopenmpt) */
+#if !defined(OPENMPT_API_VERSION_MAKE)
+#define OPENMPT_API_VERSION_MAKE(major, minor, patch) (((major)<<24)|((minor)<<16)|((patch)<<0))
+#endif
+#if !defined(OPENMPT_API_VERSION_AT_LEAST)
+#define OPENMPT_API_VERSION_AT_LEAST(major, minor, patch) (OPENMPT_API_VERSION >= OPENMPT_API_VERSION_MAKE((major), (minor), (patch)))
+#endif
 
 #include "libavutil/avstring.h"
 #include "libavutil/opt.h"
@@ -72,13 +80,17 @@
 {
     AVStream *st;
     OpenMPTContext *openmpt = s->priv_data;
-    int64_t size = avio_size(s->pb);
-    if (size <= 0)
-        return AVERROR_INVALIDDATA;
-    char *buf = av_malloc(size);
+    int64_t size;
+    char *buf;
+#if OPENMPT_API_VERSION_AT_LEAST(0,3,0)
+    int error;
+#endif
     int ret;
 
-
+    size = avio_size(s->pb);
+    if (size <= 0)
+        return AVERROR_INVALIDDATA;
+    buf = av_malloc(size);
     if (!buf)
         return AVERROR(ENOMEM);
     size = avio_read(s->pb, buf, size);
@@ -88,10 +100,24 @@
         return size;
     }
 
+#if OPENMPT_API_VERSION_AT_LEAST(0,3,0)
+    error = OPENMPT_ERROR_OK;
+    openmpt->module = openmpt_module_create_from_memory2(buf, size, openmpt_logfunc, s, NULL, NULL, &error, NULL, NULL);
+    av_freep(&buf);
+    if (!openmpt->module) {
+        if (error == OPENMPT_ERROR_OUT_OF_MEMORY)
+            return AVERROR(ENOMEM);
+        else if (error >= OPENMPT_ERROR_GENERAL)
+            return AVERROR_INVALIDDATA;
+        else
+            return AVERROR_UNKNOWN;
+    }
+#else
     openmpt->module = openmpt_module_create_from_memory(buf, size, openmpt_logfunc, s, NULL);
     av_freep(&buf);
     if (!openmpt->module)
             return AVERROR_INVALIDDATA;
+#endif
 
     openmpt->channels = av_get_channel_layout_nb_channels(openmpt->layout);
 
@@ -192,6 +218,62 @@
     return 0;
 }
 
+static int probe_openmpt_extension(AVProbeData *p)
+{
+    const char *ext;
+    if (p->filename) {
+        ext = strrchr(p->filename, '.');
+        if (ext && strlen(ext + 1) > 0) {
+            ext++;  /* skip '.' */
+            if (openmpt_is_extension_supported(ext) == 1)
+                return AVPROBE_SCORE_EXTENSION;
+        }
+    }
+    return 0;
+}
+
+static int read_probe_openmpt(AVProbeData *p)
+{
+#if OPENMPT_API_VERSION_AT_LEAST(0,3,0)
+    int probe_result;
+    if (p->buf && p->buf_size > 0) {
+        probe_result = openmpt_probe_file_header_without_filesize(
+                           OPENMPT_PROBE_FILE_HEADER_FLAGS_DEFAULT,
+                           p->buf, p->buf_size,
+                           &openmpt_logfunc, NULL, NULL, NULL, NULL, NULL);
+        if (probe_result == OPENMPT_PROBE_FILE_HEADER_RESULT_SUCCESS) {
+            /* As probing here relies on code external to FFmpeg, do not return
+             * AVPROBE_SCORE_MAX in order to reduce the impact in the rare
+             * cases of false positives.
+             */
+            return AVPROBE_SCORE_MIME + 1;
+        } else if (probe_result == OPENMPT_PROBE_FILE_HEADER_RESULT_WANTMOREDATA) {
+            if (probe_openmpt_extension(p) > 0) {
+                return AVPROBE_SCORE_RETRY;
+            } else {
+                if (p->buf_size >= openmpt_probe_file_header_get_recommended_size()) {
+                    /* We have already received the recommended amount of data
+                     * and still cannot decide. Return a rather low score.
+                     */
+                    return AVPROBE_SCORE_RETRY / 2;
+                } else {
+                    /* The file extension is unknown and we have very few data
+                     * bytes available. libopenmpt cannot decide anything here,
+                     * and returning any score > 0 would result in successfull
+                     * probing of random data.
+                     */
+                    return 0;
+                }
+            }
+        } else if (probe_result == OPENMPT_PROBE_FILE_HEADER_RESULT_FAILURE) {
+            return 0;
+        }
+    }
+#endif
+    /* for older libopenmpt, fall back to file extension probing */
+    return probe_openmpt_extension(p);
+}
+
 static const AVClass class_openmpt = {
     .class_name = "libopenmpt",
     .item_name  = av_default_item_name,
@@ -203,10 +285,15 @@
     .name           = "libopenmpt",
     .long_name      = NULL_IF_CONFIG_SMALL("Tracker formats (libopenmpt)"),
     .priv_data_size = sizeof(OpenMPTContext),
+    .read_probe     = read_probe_openmpt,
     .read_header    = read_header_openmpt,
     .read_packet    = read_packet_openmpt,
     .read_close     = read_close_openmpt,
     .read_seek      = read_seek_openmpt,
     .priv_class     = &class_openmpt,
-    .extensions     = "669,amf,ams,dbm,digi,dmf,dsm,far,gdm,imf,it,j2b,m15,mdl,med,mmcmp,mms,mo3,mod,mptm,mt2,mtm,nst,okt,plm,ppm,psm,pt36,ptm,s3m,sfx,sfx2,stk,stm,ult,umx,wow,xm,xpk",
+#if OPENMPT_API_VERSION_AT_LEAST(0,3,0)
+    .extensions     = "669,amf,ams,dbm,digi,dmf,dsm,dtm,far,gdm,ice,imf,it,j2b,m15,mdl,med,mmcmp,mms,mo3,mod,mptm,mt2,mtm,nst,okt,plm,ppm,psm,pt36,ptm,s3m,sfx,sfx2,st26,stk,stm,stp,ult,umx,wow,xm,xpk",
+#else
+    .extensions     = "669,amf,ams,dbm,digi,dmf,dsm,far,gdm,ice,imf,it,j2b,m15,mdl,med,mmcmp,mms,mo3,mod,mptm,mt2,mtm,nst,okt,plm,ppm,psm,pt36,ptm,s3m,sfx,sfx2,st26,stk,stm,ult,umx,wow,xm,xpk",
+#endif
 };

diff --git a/libavformat/librtmp.c b/libavformat/librtmp.c
index f3cfa9a..43013e4 100644
--- a/libavformat/librtmp.c
+++ b/libavformat/librtmp.c

@@ -261,7 +261,10 @@
     LibRTMPContext *ctx = s->priv_data;
     RTMP *r = &ctx->rtmp;
 
-    return RTMP_Write(r, buf, size);
+    int ret = RTMP_Write(r, buf, size);
+    if (!ret)
+        return AVERROR_EOF;
+    return ret;
 }
 
 static int rtmp_read(URLContext *s, uint8_t *buf, int size)
@@ -269,7 +272,10 @@
     LibRTMPContext *ctx = s->priv_data;
     RTMP *r = &ctx->rtmp;
 
-    return RTMP_Read(r, buf, size);
+    int ret = RTMP_Read(r, buf, size);
+    if (!ret)
+        return AVERROR_EOF;
+    return ret;
 }
 
 static int rtmp_read_pause(URLContext *s, int pause)

diff --git a/libavformat/libsmbclient.c b/libavformat/libsmbclient.c
index b68cd8b..3285868 100644
--- a/libavformat/libsmbclient.c
+++ b/libavformat/libsmbclient.c

@@ -166,7 +166,7 @@
         return ret;
     }
 
-    return bytes_read;
+    return bytes_read ? bytes_read : AVERROR_EOF;
 }
 
 static int libsmbc_write(URLContext *h, const unsigned char *buf, int size)

diff --git a/libavformat/libsrt.c b/libavformat/libsrt.c
new file mode 100644
index 0000000..fbfd6ac
--- /dev/null
+++ b/libavformat/libsrt.c

@@ -0,0 +1,603 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Haivision Open SRT (Secure Reliable Transport) protocol
+ */
+
+#include <srt/srt.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/time.h"
+
+#include "avformat.h"
+#include "internal.h"
+#include "network.h"
+#include "os_support.h"
+#include "url.h"
+
+/* This is for MPEG-TS and it's a default SRTO_PAYLOADSIZE for SRTT_LIVE (8 TS packets) */
+#ifndef SRT_LIVE_DEFAULT_PAYLOAD_SIZE
+#define SRT_LIVE_DEFAULT_PAYLOAD_SIZE 1316
+#endif
+
+/* This is the maximum payload size for Live mode, should you have a different payload type than MPEG-TS */
+#ifndef SRT_LIVE_MAX_PAYLOAD_SIZE
+#define SRT_LIVE_MAX_PAYLOAD_SIZE 1456
+#endif
+
+enum SRTMode {
+    SRT_MODE_CALLER = 0,
+    SRT_MODE_LISTENER = 1,
+    SRT_MODE_RENDEZVOUS = 2
+};
+
+typedef struct SRTContext {
+    const AVClass *class;
+    int fd;
+    int eid;
+    int64_t rw_timeout;
+    int64_t listen_timeout;
+    int recv_buffer_size;
+    int send_buffer_size;
+
+    int64_t maxbw;
+    int pbkeylen;
+    char *passphrase;
+    int mss;
+    int ffs;
+    int ipttl;
+    int iptos;
+    int64_t inputbw;
+    int oheadbw;
+    int64_t latency;
+    int tlpktdrop;
+    int nakreport;
+    int64_t connect_timeout;
+    int payload_size;
+    int64_t rcvlatency;
+    int64_t peerlatency;
+    enum SRTMode mode;
+} SRTContext;
+
+#define D AV_OPT_FLAG_DECODING_PARAM
+#define E AV_OPT_FLAG_ENCODING_PARAM
+#define OFFSET(x) offsetof(SRTContext, x)
+static const AVOption libsrt_options[] = {
+    { "rw_timeout",     "Timeout of socket I/O operations",                                     OFFSET(rw_timeout),       AV_OPT_TYPE_INT64, { .i64 = -1 }, -1, INT64_MAX, .flags = D|E },
+    { "listen_timeout", "Connection awaiting timeout",                                          OFFSET(listen_timeout),   AV_OPT_TYPE_INT64, { .i64 = -1 }, -1, INT64_MAX, .flags = D|E },
+    { "send_buffer_size", "Socket send buffer size (in bytes)",                                 OFFSET(send_buffer_size), AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, INT_MAX,   .flags = D|E },
+    { "recv_buffer_size", "Socket receive buffer size (in bytes)",                              OFFSET(recv_buffer_size), AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, INT_MAX,   .flags = D|E },
+    { "pkt_size",       "Maximum SRT packet size",                                              OFFSET(payload_size),     AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, SRT_LIVE_MAX_PAYLOAD_SIZE, .flags = D|E, "payload_size" },
+    { "payload_size",   "Maximum SRT packet size",                                              OFFSET(payload_size),     AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, SRT_LIVE_MAX_PAYLOAD_SIZE, .flags = D|E, "payload_size" },
+    { "ts_size",        NULL, 0, AV_OPT_TYPE_CONST,  { .i64 = SRT_LIVE_DEFAULT_PAYLOAD_SIZE }, INT_MIN, INT_MAX, .flags = D|E, "payload_size" },
+    { "max_size",       NULL, 0, AV_OPT_TYPE_CONST,  { .i64 = SRT_LIVE_MAX_PAYLOAD_SIZE },     INT_MIN, INT_MAX, .flags = D|E, "payload_size" },
+    { "maxbw",          "Maximum bandwidth (bytes per second) that the connection can use",     OFFSET(maxbw),            AV_OPT_TYPE_INT64,    { .i64 = -1 }, -1, INT64_MAX, .flags = D|E },
+    { "pbkeylen",       "Crypto key len in bytes {16,24,32} Default: 16 (128-bit)",             OFFSET(pbkeylen),         AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, 32,        .flags = D|E },
+    { "passphrase",     "Crypto PBKDF2 Passphrase size[0,10..64] 0:disable crypto",             OFFSET(passphrase),       AV_OPT_TYPE_STRING,   { .str = NULL },              .flags = D|E },
+    { "mss",            "The Maximum Segment Size",                                             OFFSET(mss),              AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, 1500,      .flags = D|E },
+    { "ffs",            "Flight flag size (window size) (in bytes)",                            OFFSET(ffs),              AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, INT_MAX,   .flags = D|E },
+    { "ipttl",          "IP Time To Live",                                                      OFFSET(ipttl),            AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, 255,       .flags = D|E },
+    { "iptos",          "IP Type of Service",                                                   OFFSET(iptos),            AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, 255,       .flags = D|E },
+    { "inputbw",        "Estimated input stream rate",                                          OFFSET(inputbw),          AV_OPT_TYPE_INT64,    { .i64 = -1 }, -1, INT64_MAX, .flags = D|E },
+    { "oheadbw",        "MaxBW ceiling based on % over input stream rate",                      OFFSET(oheadbw),          AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, 100,       .flags = D|E },
+    { "latency",        "receiver delay to absorb bursts of missed packet retransmissions",     OFFSET(latency),          AV_OPT_TYPE_INT64, { .i64 = -1 }, -1, INT64_MAX, .flags = D|E },
+    { "tsbpddelay",     "deprecated, same effect as latency option",                            OFFSET(latency),          AV_OPT_TYPE_INT64, { .i64 = -1 }, -1, INT64_MAX, .flags = D|E },
+    { "rcvlatency",     "receive latency",                                                      OFFSET(rcvlatency),       AV_OPT_TYPE_INT64, { .i64 = -1 }, -1, INT64_MAX, .flags = D|E },
+    { "peerlatency",    "peer latency",                                                         OFFSET(peerlatency),      AV_OPT_TYPE_INT64, { .i64 = -1 }, -1, INT64_MAX, .flags = D|E },
+    { "tlpktdrop",      "Enable receiver pkt drop",                                             OFFSET(tlpktdrop),        AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, 1,         .flags = D|E },
+    { "nakreport",      "Enable receiver to send periodic NAK reports",                         OFFSET(nakreport),        AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, 1,         .flags = D|E },
+    { "connect_timeout", "Connect timeout. Caller default: 3000, rendezvous (x 10)",            OFFSET(connect_timeout),  AV_OPT_TYPE_INT64, { .i64 = -1 }, -1, INT64_MAX, .flags = D|E },
+    { "mode",           "Connection mode (caller, listener, rendezvous)",                       OFFSET(mode),             AV_OPT_TYPE_INT,      { .i64 = SRT_MODE_CALLER }, SRT_MODE_CALLER, SRT_MODE_RENDEZVOUS, .flags = D|E, "mode" },
+    { "caller",         NULL, 0, AV_OPT_TYPE_CONST,  { .i64 = SRT_MODE_CALLER },     INT_MIN, INT_MAX, .flags = D|E, "mode" },
+    { "listener",       NULL, 0, AV_OPT_TYPE_CONST,  { .i64 = SRT_MODE_LISTENER },   INT_MIN, INT_MAX, .flags = D|E, "mode" },
+    { "rendezvous",     NULL, 0, AV_OPT_TYPE_CONST,  { .i64 = SRT_MODE_RENDEZVOUS }, INT_MIN, INT_MAX, .flags = D|E, "mode" },
+    { NULL }
+};
+
+static int libsrt_neterrno(URLContext *h)
+{
+    int err = srt_getlasterror(NULL);
+    av_log(h, AV_LOG_ERROR, "%s\n", srt_getlasterror_str());
+    if (err == SRT_EASYNCRCV)
+        return AVERROR(EAGAIN);
+    return AVERROR_UNKNOWN;
+}
+
+static int libsrt_socket_nonblock(int socket, int enable)
+{
+    int ret = srt_setsockopt(socket, 0, SRTO_SNDSYN, &enable, sizeof(enable));
+    if (ret < 0)
+        return ret;
+    return srt_setsockopt(socket, 0, SRTO_RCVSYN, &enable, sizeof(enable));
+}
+
+static int libsrt_network_wait_fd(URLContext *h, int eid, int fd, int write)
+{
+    int ret, len = 1;
+    int modes = write ? SRT_EPOLL_OUT : SRT_EPOLL_IN;
+    SRTSOCKET ready[1];
+
+    if (srt_epoll_add_usock(eid, fd, &modes) < 0)
+        return libsrt_neterrno(h);
+    if (write) {
+        ret = srt_epoll_wait(eid, 0, 0, ready, &len, POLLING_TIME, 0, 0, 0, 0);
+    } else {
+        ret = srt_epoll_wait(eid, ready, &len, 0, 0, POLLING_TIME, 0, 0, 0, 0);
+    }
+    if (ret < 0) {
+        if (srt_getlasterror(NULL) == SRT_ETIMEOUT)
+            ret = AVERROR(EAGAIN);
+        else
+            ret = libsrt_neterrno(h);
+    } else {
+        ret = 0;
+    }
+    if (srt_epoll_remove_usock(eid, fd) < 0)
+        return libsrt_neterrno(h);
+    return ret;
+}
+
+/* TODO de-duplicate code from ff_network_wait_fd_timeout() */
+
+static int libsrt_network_wait_fd_timeout(URLContext *h, int eid, int fd, int write, int64_t timeout, AVIOInterruptCB *int_cb)
+{
+    int ret;
+    int64_t wait_start = 0;
+
+    while (1) {
+        if (ff_check_interrupt(int_cb))
+            return AVERROR_EXIT;
+        ret = libsrt_network_wait_fd(h, eid, fd, write);
+        if (ret != AVERROR(EAGAIN))
+            return ret;
+        if (timeout > 0) {
+            if (!wait_start)
+                wait_start = av_gettime_relative();
+            else if (av_gettime_relative() - wait_start > timeout)
+                return AVERROR(ETIMEDOUT);
+        }
+    }
+}
+
+static int libsrt_listen(int eid, int fd, const struct sockaddr *addr, socklen_t addrlen, URLContext *h, int timeout)
+{
+    int ret;
+    int reuse = 1;
+    if (srt_setsockopt(fd, SOL_SOCKET, SRTO_REUSEADDR, &reuse, sizeof(reuse))) {
+        av_log(h, AV_LOG_WARNING, "setsockopt(SRTO_REUSEADDR) failed\n");
+    }
+    ret = srt_bind(fd, addr, addrlen);
+    if (ret)
+        return libsrt_neterrno(h);
+
+    ret = srt_listen(fd, 1);
+    if (ret)
+        return libsrt_neterrno(h);
+
+    while ((ret = libsrt_network_wait_fd_timeout(h, eid, fd, 1, timeout, &h->interrupt_callback))) {
+        switch (ret) {
+        case AVERROR(ETIMEDOUT):
+            continue;
+        default:
+            return ret;
+        }
+    }
+
+    ret = srt_accept(fd, NULL, NULL);
+    if (ret < 0)
+        return libsrt_neterrno(h);
+    if (libsrt_socket_nonblock(ret, 1) < 0)
+        av_log(h, AV_LOG_DEBUG, "libsrt_socket_nonblock failed\n");
+
+    return ret;
+}
+
+static int libsrt_listen_connect(int eid, int fd, const struct sockaddr *addr, socklen_t addrlen, int timeout, URLContext *h, int will_try_next)
+{
+    int ret;
+
+    if (libsrt_socket_nonblock(fd, 1) < 0)
+        av_log(h, AV_LOG_DEBUG, "ff_socket_nonblock failed\n");
+
+    while ((ret = srt_connect(fd, addr, addrlen))) {
+        ret = libsrt_neterrno(h);
+        switch (ret) {
+        case AVERROR(EINTR):
+            if (ff_check_interrupt(&h->interrupt_callback))
+                return AVERROR_EXIT;
+            continue;
+        case AVERROR(EINPROGRESS):
+        case AVERROR(EAGAIN):
+            ret = libsrt_network_wait_fd_timeout(h, eid, fd, 1, timeout, &h->interrupt_callback);
+            if (ret < 0)
+                return ret;
+            ret = srt_getlasterror(NULL);
+            srt_clearlasterror();
+            if (ret != 0) {
+                char buf[128];
+                ret = AVERROR(ret);
+                av_strerror(ret, buf, sizeof(buf));
+                if (will_try_next)
+                    av_log(h, AV_LOG_WARNING,
+                           "Connection to %s failed (%s), trying next address\n",
+                           h->filename, buf);
+                else
+                    av_log(h, AV_LOG_ERROR, "Connection to %s failed: %s\n",
+                           h->filename, buf);
+            }
+        default:
+            return ret;
+        }
+    }
+    return ret;
+}
+
+static int libsrt_setsockopt(URLContext *h, int fd, SRT_SOCKOPT optname, const char * optnamestr, const void * optval, int optlen)
+{
+    if (srt_setsockopt(fd, 0, optname, optval, optlen) < 0) {
+        av_log(h, AV_LOG_ERROR, "failed to set option %s on socket: %s\n", optnamestr, srt_getlasterror_str());
+        return AVERROR(EIO);
+    }
+    return 0;
+}
+
+static int libsrt_getsockopt(URLContext *h, int fd, SRT_SOCKOPT optname, const char * optnamestr, void * optval, int * optlen)
+{
+    if (srt_getsockopt(fd, 0, optname, optval, optlen) < 0) {
+        av_log(h, AV_LOG_ERROR, "failed to get option %s on socket: %s\n", optnamestr, srt_getlasterror_str());
+        return AVERROR(EIO);
+    }
+    return 0;
+}
+
+/* - The "POST" options can be altered any time on a connected socket.
+     They MAY have also some meaning when set prior to connecting; such
+     option is SRTO_RCVSYN, which makes connect/accept call asynchronous.
+     Because of that this option is treated special way in this app. */
+static int libsrt_set_options_post(URLContext *h, int fd)
+{
+    SRTContext *s = h->priv_data;
+
+    if ((s->inputbw >= 0 && libsrt_setsockopt(h, fd, SRTO_INPUTBW, "SRTO_INPUTBW", &s->inputbw, sizeof(s->inputbw)) < 0) ||
+        (s->oheadbw >= 0 && libsrt_setsockopt(h, fd, SRTO_OHEADBW, "SRTO_OHEADBW", &s->oheadbw, sizeof(s->oheadbw)) < 0)) {
+        return AVERROR(EIO);
+    }
+    return 0;
+}
+
+/* - The "PRE" options must be set prior to connecting and can't be altered
+     on a connected socket, however if set on a listening socket, they are
+     derived by accept-ed socket. */
+static int libsrt_set_options_pre(URLContext *h, int fd)
+{
+    SRTContext *s = h->priv_data;
+    int yes = 1;
+    int latency = s->latency / 1000;
+    int rcvlatency = s->rcvlatency / 1000;
+    int peerlatency = s->peerlatency / 1000;
+    int connect_timeout = s->connect_timeout;
+
+    if ((s->mode == SRT_MODE_RENDEZVOUS && libsrt_setsockopt(h, fd, SRTO_RENDEZVOUS, "SRTO_RENDEZVOUS", &yes, sizeof(yes)) < 0) ||
+        (s->maxbw >= 0 && libsrt_setsockopt(h, fd, SRTO_MAXBW, "SRTO_MAXBW", &s->maxbw, sizeof(s->maxbw)) < 0) ||
+        (s->pbkeylen >= 0 && libsrt_setsockopt(h, fd, SRTO_PBKEYLEN, "SRTO_PBKEYLEN", &s->pbkeylen, sizeof(s->pbkeylen)) < 0) ||
+        (s->passphrase && libsrt_setsockopt(h, fd, SRTO_PASSPHRASE, "SRTO_PASSPHRASE", s->passphrase, strlen(s->passphrase)) < 0) ||
+        (s->mss >= 0 && libsrt_setsockopt(h, fd, SRTO_MSS, "SRTO_MMS", &s->mss, sizeof(s->mss)) < 0) ||
+        (s->ffs >= 0 && libsrt_setsockopt(h, fd, SRTO_FC, "SRTO_FC", &s->ffs, sizeof(s->ffs)) < 0) ||
+        (s->ipttl >= 0 && libsrt_setsockopt(h, fd, SRTO_IPTTL, "SRTO_UPTTL", &s->ipttl, sizeof(s->ipttl)) < 0) ||
+        (s->iptos >= 0 && libsrt_setsockopt(h, fd, SRTO_IPTOS, "SRTO_IPTOS", &s->iptos, sizeof(s->iptos)) < 0) ||
+        (s->latency >= 0 && libsrt_setsockopt(h, fd, SRTO_LATENCY, "SRTO_LATENCY", &latency, sizeof(latency)) < 0) ||
+        (s->rcvlatency >= 0 && libsrt_setsockopt(h, fd, SRTO_RCVLATENCY, "SRTO_RCVLATENCY", &rcvlatency, sizeof(rcvlatency)) < 0) ||
+        (s->peerlatency >= 0 && libsrt_setsockopt(h, fd, SRTO_PEERLATENCY, "SRTO_PEERLATENCY", &peerlatency, sizeof(peerlatency)) < 0) ||
+        (s->tlpktdrop >= 0 && libsrt_setsockopt(h, fd, SRTO_TLPKTDROP, "SRTO_TLPKDROP", &s->tlpktdrop, sizeof(s->tlpktdrop)) < 0) ||
+        (s->nakreport >= 0 && libsrt_setsockopt(h, fd, SRTO_NAKREPORT, "SRTO_NAKREPORT", &s->nakreport, sizeof(s->nakreport)) < 0) ||
+        (connect_timeout >= 0 && libsrt_setsockopt(h, fd, SRTO_CONNTIMEO, "SRTO_CONNTIMEO", &connect_timeout, sizeof(connect_timeout)) <0 ) ||
+        (s->payload_size >= 0 && libsrt_setsockopt(h, fd, SRTO_PAYLOADSIZE, "SRTO_PAYLOADSIZE", &s->payload_size, sizeof(s->payload_size)) < 0)) {
+        return AVERROR(EIO);
+    }
+    return 0;
+}
+
+
+static int libsrt_setup(URLContext *h, const char *uri, int flags)
+{
+    struct addrinfo hints = { 0 }, *ai, *cur_ai;
+    int port, fd = -1;
+    SRTContext *s = h->priv_data;
+    const char *p;
+    char buf[256];
+    int ret;
+    char hostname[1024],proto[1024],path[1024];
+    char portstr[10];
+    int open_timeout = 5000000;
+    int eid;
+
+    eid = srt_epoll_create();
+    if (eid < 0)
+        return libsrt_neterrno(h);
+    s->eid = eid;
+
+    av_url_split(proto, sizeof(proto), NULL, 0, hostname, sizeof(hostname),
+        &port, path, sizeof(path), uri);
+    if (strcmp(proto, "srt"))
+        return AVERROR(EINVAL);
+    if (port <= 0 || port >= 65536) {
+        av_log(h, AV_LOG_ERROR, "Port missing in uri\n");
+        return AVERROR(EINVAL);
+    }
+    p = strchr(uri, '?');
+    if (p) {
+        if (av_find_info_tag(buf, sizeof(buf), "timeout", p)) {
+            s->rw_timeout = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "listen_timeout", p)) {
+            s->listen_timeout = strtol(buf, NULL, 10);
+        }
+    }
+    if (s->rw_timeout >= 0) {
+        open_timeout = h->rw_timeout = s->rw_timeout;
+    }
+    hints.ai_family = AF_UNSPEC;
+    hints.ai_socktype = SOCK_DGRAM;
+    snprintf(portstr, sizeof(portstr), "%d", port);
+    if (s->mode == SRT_MODE_LISTENER)
+        hints.ai_flags |= AI_PASSIVE;
+    ret = getaddrinfo(hostname[0] ? hostname : NULL, portstr, &hints, &ai);
+    if (ret) {
+        av_log(h, AV_LOG_ERROR,
+               "Failed to resolve hostname %s: %s\n",
+               hostname, gai_strerror(ret));
+        return AVERROR(EIO);
+    }
+
+    cur_ai = ai;
+
+ restart:
+
+    fd = srt_socket(cur_ai->ai_family, cur_ai->ai_socktype, 0);
+    if (fd < 0) {
+        ret = libsrt_neterrno(h);
+        goto fail;
+    }
+
+    if ((ret = libsrt_set_options_pre(h, fd)) < 0) {
+        goto fail;
+    }
+
+    /* Set the socket's send or receive buffer sizes, if specified.
+       If unspecified or setting fails, system default is used. */
+    if (s->recv_buffer_size > 0) {
+        srt_setsockopt(fd, SOL_SOCKET, SRTO_UDP_RCVBUF, &s->recv_buffer_size, sizeof (s->recv_buffer_size));
+    }
+    if (s->send_buffer_size > 0) {
+        srt_setsockopt(fd, SOL_SOCKET, SRTO_UDP_SNDBUF, &s->send_buffer_size, sizeof (s->send_buffer_size));
+    }
+    if (s->mode == SRT_MODE_LISTENER) {
+        // multi-client
+        if ((ret = libsrt_listen(s->eid, fd, cur_ai->ai_addr, cur_ai->ai_addrlen, h, open_timeout / 1000)) < 0)
+            goto fail1;
+        fd = ret;
+    } else {
+        if (s->mode == SRT_MODE_RENDEZVOUS) {
+            ret = srt_bind(fd, cur_ai->ai_addr, cur_ai->ai_addrlen);
+            if (ret)
+                goto fail1;
+        }
+
+        if ((ret = libsrt_listen_connect(s->eid, fd, cur_ai->ai_addr, cur_ai->ai_addrlen,
+                                          open_timeout / 1000, h, !!cur_ai->ai_next)) < 0) {
+            if (ret == AVERROR_EXIT)
+                goto fail1;
+            else
+                goto fail;
+        }
+    }
+    if ((ret = libsrt_set_options_post(h, fd)) < 0) {
+        goto fail;
+    }
+
+    if (flags & AVIO_FLAG_WRITE) {
+        int packet_size = 0;
+        int optlen = sizeof(packet_size);
+        ret = libsrt_getsockopt(h, fd, SRTO_PAYLOADSIZE, "SRTO_PAYLOADSIZE", &packet_size, &optlen);
+        if (ret < 0)
+            goto fail1;
+        if (packet_size > 0)
+            h->max_packet_size = packet_size;
+    }
+
+    h->is_streamed = 1;
+    s->fd = fd;
+
+    freeaddrinfo(ai);
+    return 0;
+
+ fail:
+    if (cur_ai->ai_next) {
+        /* Retry with the next sockaddr */
+        cur_ai = cur_ai->ai_next;
+        if (fd >= 0)
+            srt_close(fd);
+        ret = 0;
+        goto restart;
+    }
+ fail1:
+    if (fd >= 0)
+        srt_close(fd);
+    freeaddrinfo(ai);
+    return ret;
+}
+
+static int libsrt_open(URLContext *h, const char *uri, int flags)
+{
+    SRTContext *s = h->priv_data;
+    const char * p;
+    char buf[256];
+
+    if (srt_startup() < 0) {
+        return AVERROR_UNKNOWN;
+    }
+
+    /* SRT options (srt/srt.h) */
+    p = strchr(uri, '?');
+    if (p) {
+        if (av_find_info_tag(buf, sizeof(buf), "maxbw", p)) {
+            s->maxbw = strtoll(buf, NULL, 0);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "pbkeylen", p)) {
+            s->pbkeylen = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "passphrase", p)) {
+            s->passphrase = av_strndup(buf, strlen(buf));
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "mss", p)) {
+            s->mss = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "ffs", p)) {
+            s->ffs = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "ipttl", p)) {
+            s->ipttl = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "iptos", p)) {
+            s->iptos = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "inputbw", p)) {
+            s->inputbw = strtoll(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "oheadbw", p)) {
+            s->oheadbw = strtoll(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "latency", p)) {
+            s->latency = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "tsbpddelay", p)) {
+            s->latency = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "rcvlatency", p)) {
+            s->rcvlatency = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "peerlatency", p)) {
+            s->peerlatency = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "tlpktdrop", p)) {
+            s->tlpktdrop = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "nakreport", p)) {
+            s->nakreport = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "connect_timeout", p)) {
+            s->connect_timeout = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "payload_size", p) ||
+            av_find_info_tag(buf, sizeof(buf), "pkt_size", p)) {
+            s->payload_size = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "mode", p)) {
+            if (!strcmp(buf, "caller")) {
+                s->mode = SRT_MODE_CALLER;
+            } else if (!strcmp(buf, "listener")) {
+                s->mode = SRT_MODE_LISTENER;
+            } else if (!strcmp(buf, "rendezvous")) {
+                s->mode = SRT_MODE_RENDEZVOUS;
+            } else {
+                return AVERROR(EIO);
+            }
+        }
+    }
+    return libsrt_setup(h, uri, flags);
+}
+
+static int libsrt_read(URLContext *h, uint8_t *buf, int size)
+{
+    SRTContext *s = h->priv_data;
+    int ret;
+
+    if (!(h->flags & AVIO_FLAG_NONBLOCK)) {
+        ret = libsrt_network_wait_fd_timeout(h, s->eid, s->fd, 0, h->rw_timeout, &h->interrupt_callback);
+        if (ret)
+            return ret;
+    }
+
+    ret = srt_recvmsg(s->fd, buf, size);
+    if (ret < 0) {
+        ret = libsrt_neterrno(h);
+    }
+
+    return ret;
+}
+
+static int libsrt_write(URLContext *h, const uint8_t *buf, int size)
+{
+    SRTContext *s = h->priv_data;
+    int ret;
+
+    if (!(h->flags & AVIO_FLAG_NONBLOCK)) {
+        ret = libsrt_network_wait_fd_timeout(h, s->eid, s->fd, 1, h->rw_timeout, &h->interrupt_callback);
+        if (ret)
+            return ret;
+    }
+
+    ret = srt_sendmsg(s->fd, buf, size, -1, 0);
+    if (ret < 0) {
+        ret = libsrt_neterrno(h);
+    }
+
+    return ret;
+}
+
+static int libsrt_close(URLContext *h)
+{
+    SRTContext *s = h->priv_data;
+
+    srt_close(s->fd);
+
+    srt_epoll_release(s->eid);
+
+    srt_cleanup();
+
+    return 0;
+}
+
+static int libsrt_get_file_handle(URLContext *h)
+{
+    SRTContext *s = h->priv_data;
+    return s->fd;
+}
+
+static const AVClass libsrt_class = {
+    .class_name = "libsrt",
+    .item_name  = av_default_item_name,
+    .option     = libsrt_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+const URLProtocol ff_libsrt_protocol = {
+    .name                = "srt",
+    .url_open            = libsrt_open,
+    .url_read            = libsrt_read,
+    .url_write           = libsrt_write,
+    .url_close           = libsrt_close,
+    .url_get_file_handle = libsrt_get_file_handle,
+    .priv_data_size      = sizeof(SRTContext),
+    .flags               = URL_PROTOCOL_FLAG_NETWORK,
+    .priv_data_class     = &libsrt_class,
+};

diff --git a/libavformat/libssh.c b/libavformat/libssh.c
index 9e3d4da..21474f0 100644
--- a/libavformat/libssh.c
+++ b/libavformat/libssh.c

@@ -295,7 +295,7 @@
         av_log(libssh, AV_LOG_ERROR, "Read error.\n");
         return AVERROR(EIO);
     }
-    return bytes_read;
+    return bytes_read ? bytes_read : AVERROR_EOF;
 }
 
 static int libssh_write(URLContext *h, const unsigned char *buf, int size)

diff --git a/libavformat/m4vdec.c b/libavformat/m4vdec.c
index 34d434f..220daeb 100644
--- a/libavformat/m4vdec.c
+++ b/libavformat/m4vdec.c

@@ -22,13 +22,19 @@
 #include "avformat.h"
 #include "rawdec.h"
 
-#define VISUAL_OBJECT_START_CODE       0x000001b5
-#define VOP_START_CODE                 0x000001b6
+#define VOS_STARTCODE        0x1B0
+#define USER_DATA_STARTCODE  0x1B2
+#define GOP_STARTCODE        0x1B3
+#define VISUAL_OBJ_STARTCODE 0x1B5
+#define VOP_STARTCODE        0x1B6
+#define SLICE_STARTCODE      0x1B7
+#define EXT_STARTCODE        0x1B8
 
 static int mpeg4video_probe(AVProbeData *probe_packet)
 {
     uint32_t temp_buffer = -1;
     int VO = 0, VOL = 0, VOP = 0, VISO = 0, res = 0;
+    int res_main = 0;
     int i;
 
     for (i = 0; i < probe_packet->buf_size; i++) {
@@ -38,19 +44,27 @@
         if (temp_buffer < 2)
             continue;
 
-        if (temp_buffer == VOP_START_CODE)
+        if (temp_buffer == VOP_STARTCODE)
             VOP++;
-        else if (temp_buffer == VISUAL_OBJECT_START_CODE)
+        else if (temp_buffer == VISUAL_OBJ_STARTCODE)
             VISO++;
         else if (temp_buffer >= 0x100 && temp_buffer < 0x120)
             VO++;
         else if (temp_buffer >= 0x120 && temp_buffer < 0x130)
             VOL++;
+        else if (temp_buffer == SLICE_STARTCODE || temp_buffer == EXT_STARTCODE)
+            res_main++;
         else if (!(0x1AF < temp_buffer && temp_buffer < 0x1B7) &&
                  !(0x1B9 < temp_buffer && temp_buffer < 0x1C4))
             res++;
     }
 
+    // res_main repesents the reserved codes within the "main" profile, they are
+    // added to the reserved ones if it appears that this is a "main" profile
+    // stream
+    if (res_main && 2*res_main < VOP)
+        res += res_main;
+
     if (VOP >= VISO && VOP >= VOL && VO >= VOL && VOL > 0 && res == 0)
         return VOP+VO > 4 ? AVPROBE_SCORE_EXTENSION : AVPROBE_SCORE_EXTENSION/2;
 

diff --git a/libavformat/matroska.c b/libavformat/matroska.c
index 94ccbec..4d18d14 100644
--- a/libavformat/matroska.c
+++ b/libavformat/matroska.c

@@ -103,6 +103,22 @@
     {""                 , AV_CODEC_ID_NONE}
 };
 
+const CodecTags ff_webm_codec_tags[] = {
+    {"V_VP8"            , AV_CODEC_ID_VP8},
+    {"V_VP9"            , AV_CODEC_ID_VP9},
+    {"V_AV1"            , AV_CODEC_ID_AV1},
+
+    {"A_VORBIS"         , AV_CODEC_ID_VORBIS},
+    {"A_OPUS"           , AV_CODEC_ID_OPUS},
+
+    {"D_WEBVTT/SUBTITLES"   , AV_CODEC_ID_WEBVTT},
+    {"D_WEBVTT/CAPTIONS"    , AV_CODEC_ID_WEBVTT},
+    {"D_WEBVTT/DESCRIPTIONS", AV_CODEC_ID_WEBVTT},
+    {"D_WEBVTT/METADATA"    , AV_CODEC_ID_WEBVTT},
+
+    {""                 , AV_CODEC_ID_NONE}
+};
+
 const CodecMime ff_mkv_image_mime_tags[] = {
     {"image/gif"                  , AV_CODEC_ID_GIF},
     {"image/jpeg"                 , AV_CODEC_ID_MJPEG},

diff --git a/libavformat/matroska.h b/libavformat/matroska.h
index 83c8246..86968a8 100644
--- a/libavformat/matroska.h
+++ b/libavformat/matroska.h

@@ -360,6 +360,7 @@
 #define MATROSKA_VIDEO_STEREO_PLANE_COUNT  3
 
 extern const CodecTags ff_mkv_codec_tags[];
+extern const CodecTags ff_webm_codec_tags[];
 extern const CodecMime ff_mkv_mime_tags[];
 extern const CodecMime ff_mkv_image_mime_tags[];
 extern const AVMetadataConv ff_mkv_metadata_conv[];

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index e663109..e679398 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c

@@ -104,6 +104,7 @@
 
 typedef struct EbmlBin {
     int      size;
+    AVBufferRef *buf;
     uint8_t *data;
     int64_t  pos;
 } EbmlBin;
@@ -338,9 +339,8 @@
     int64_t segment_start;
 
     /* the packet queue */
-    AVPacket **packets;
-    int num_packets;
-    AVPacket *prev_pkt;
+    AVPacketList *queue;
+    AVPacketList *queue_end;
 
     int done;
 
@@ -963,14 +963,19 @@
  */
 static int ebml_read_binary(AVIOContext *pb, int length, EbmlBin *bin)
 {
-    av_fast_padded_malloc(&bin->data, &bin->size, length);
-    if (!bin->data)
-        return AVERROR(ENOMEM);
+    int ret;
 
+    ret = av_buffer_realloc(&bin->buf, length + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (ret < 0)
+        return ret;
+    memset(bin->buf->data + length, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+    bin->data = bin->buf->data;
     bin->size = length;
     bin->pos  = avio_tell(pb);
     if (avio_read(pb, bin->data, length) != length) {
-        av_freep(&bin->data);
+        av_buffer_unref(&bin->buf);
+        bin->data = NULL;
         bin->size = 0;
         return AVERROR(EIO);
     }
@@ -1253,7 +1258,7 @@
             av_freep(data_off);
             break;
         case EBML_BIN:
-            av_freep(&((EbmlBin *) data_off)->data);
+            av_buffer_unref(&((EbmlBin *) data_off)->buf);
             break;
         case EBML_LEVEL1:
         case EBML_NEST:
@@ -1362,7 +1367,7 @@
             return 0;
 
         pkt_size = isize + header_size;
-        pkt_data = av_malloc(pkt_size);
+        pkt_data = av_malloc(pkt_size + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!pkt_data)
             return AVERROR(ENOMEM);
 
@@ -1374,7 +1379,8 @@
     case MATROSKA_TRACK_ENCODING_COMP_LZO:
         do {
             olen       = pkt_size *= 3;
-            newpktdata = av_realloc(pkt_data, pkt_size + AV_LZO_OUTPUT_PADDING);
+            newpktdata = av_realloc(pkt_data, pkt_size + AV_LZO_OUTPUT_PADDING
+                                                       + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!newpktdata) {
                 result = AVERROR(ENOMEM);
                 goto failed;
@@ -1399,7 +1405,7 @@
         zstream.avail_in = isize;
         do {
             pkt_size  *= 3;
-            newpktdata = av_realloc(pkt_data, pkt_size);
+            newpktdata = av_realloc(pkt_data, pkt_size + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!newpktdata) {
                 inflateEnd(&zstream);
                 result = AVERROR(ENOMEM);
@@ -1432,7 +1438,7 @@
         bzstream.avail_in = isize;
         do {
             pkt_size  *= 3;
-            newpktdata = av_realloc(pkt_data, pkt_size);
+            newpktdata = av_realloc(pkt_data, pkt_size + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!newpktdata) {
                 BZ2_bzDecompressEnd(&bzstream);
                 result = AVERROR(ENOMEM);
@@ -1459,6 +1465,8 @@
         return AVERROR_INVALIDDATA;
     }
 
+    memset(pkt_data + pkt_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
     *buf      = pkt_data;
     *buf_size = pkt_size;
     return 0;
@@ -1989,18 +1997,25 @@
             return AVERROR_INVALIDDATA;
         }
         break;
+    case MATROSKA_VIDEO_PROJECTION_TYPE_RECTANGULAR:
+        /* No Spherical metadata */
+        return 0;
     default:
+        av_log(NULL, AV_LOG_WARNING,
+               "Unknown spherical metadata type %"PRIu64"\n",
+               track->video.projection.type);
         return 0;
     }
 
     spherical = av_spherical_alloc(&spherical_size);
     if (!spherical)
         return AVERROR(ENOMEM);
+
     spherical->projection = projection;
 
-    spherical->yaw   = (int32_t)(track->video.projection.yaw   * (1 << 16));
-    spherical->pitch = (int32_t)(track->video.projection.pitch * (1 << 16));
-    spherical->roll  = (int32_t)(track->video.projection.roll  * (1 << 16));
+    spherical->yaw   = (int32_t) (track->video.projection.yaw   * (1 << 16));
+    spherical->pitch = (int32_t) (track->video.projection.pitch * (1 << 16));
+    spherical->roll  = (int32_t) (track->video.projection.roll  * (1 << 16));
 
     spherical->padding = padding;
 
@@ -2030,12 +2045,13 @@
      * by expanding/shifting the data by 4 bytes and storing the data
      * size at the start. */
     if (ff_codec_get_id(codec_tags, AV_RL32(track->codec_priv.data))) {
-        uint8_t *p = av_realloc(track->codec_priv.data,
-                                track->codec_priv.size + 4);
-        if (!p)
-            return AVERROR(ENOMEM);
-        memmove(p + 4, p, track->codec_priv.size);
-        track->codec_priv.data = p;
+        int ret = av_buffer_realloc(&track->codec_priv.buf,
+                                    track->codec_priv.size + 4 + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (ret < 0)
+            return ret;
+
+        track->codec_priv.data = track->codec_priv.buf->data;
+        memmove(track->codec_priv.data + 4, track->codec_priv.data, track->codec_priv.size);
         track->codec_priv.size += 4;
         AV_WB32(track->codec_priv.data, track->codec_priv.size);
     }
@@ -2156,8 +2172,19 @@
                            "Failed to decode codec private data\n");
                 }
 
-                if (codec_priv != track->codec_priv.data)
-                    av_free(codec_priv);
+                if (codec_priv != track->codec_priv.data) {
+                    av_buffer_unref(&track->codec_priv.buf);
+                    if (track->codec_priv.data) {
+                        track->codec_priv.buf = av_buffer_create(track->codec_priv.data,
+                                                                 track->codec_priv.size + AV_INPUT_BUFFER_PADDING_SIZE,
+                                                                 NULL, NULL, 0);
+                        if (!track->codec_priv.buf) {
+                            av_freep(&track->codec_priv.data);
+                            track->codec_priv.size = 0;
+                            return AVERROR(ENOMEM);
+                        }
+                    }
+                }
             }
         }
 
@@ -2390,6 +2417,14 @@
                 return ret;
         } else if (codec_id == AV_CODEC_ID_PRORES && track->codec_priv.size == 4) {
             fourcc = AV_RL32(track->codec_priv.data);
+        } else if (codec_id == AV_CODEC_ID_VP9 && track->codec_priv.size) {
+            /* we don't need any value stored in CodecPrivate.
+               make sure that it's not exported as extradata. */
+            track->codec_priv.size = 0;
+        } else if (codec_id == AV_CODEC_ID_AV1 && track->codec_priv.size) {
+            /* For now, propagate only the OBUs, if any. Once libavcodec is
+               updated to handle isobmff style extradata this can be removed. */
+            extradata_offset = 4;
         }
         track->codec_priv.size -= extradata_offset;
 
@@ -2514,7 +2549,9 @@
             st->codecpar->channels    = track->audio.channels;
             if (!st->codecpar->bits_per_coded_sample)
                 st->codecpar->bits_per_coded_sample = track->audio.bitdepth;
-            if (st->codecpar->codec_id == AV_CODEC_ID_MP3)
+            if (st->codecpar->codec_id == AV_CODEC_ID_MP3 ||
+                st->codecpar->codec_id == AV_CODEC_ID_MLP ||
+                st->codecpar->codec_id == AV_CODEC_ID_TRUEHD)
                 st->need_parsing = AVSTREAM_PARSE_FULL;
             else if (st->codecpar->codec_id != AV_CODEC_ID_AAC)
                 st->need_parsing = AVSTREAM_PARSE_HEADERS;
@@ -2709,11 +2746,11 @@
 static int matroska_deliver_packet(MatroskaDemuxContext *matroska,
                                    AVPacket *pkt)
 {
-    if (matroska->num_packets > 0) {
+    if (matroska->queue) {
         MatroskaTrack *tracks = matroska->tracks.elem;
         MatroskaTrack *track;
-        memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
-        av_freep(&matroska->packets[0]);
+
+        ff_packet_list_get(&matroska->queue, &matroska->queue_end, pkt);
         track = &tracks[pkt->stream_index];
         if (track->has_palette) {
             uint8_t *pal = av_packet_new_side_data(pkt, AV_PKT_DATA_PALETTE, AVPALETTE_SIZE);
@@ -2724,20 +2761,6 @@
             }
             track->has_palette = 0;
         }
-        if (matroska->num_packets > 1) {
-            void *newpackets;
-            memmove(&matroska->packets[0], &matroska->packets[1],
-                    (matroska->num_packets - 1) * sizeof(AVPacket *));
-            newpackets = av_realloc(matroska->packets,
-                                    (matroska->num_packets - 1) *
-                                    sizeof(AVPacket *));
-            if (newpackets)
-                matroska->packets = newpackets;
-        } else {
-            av_freep(&matroska->packets);
-            matroska->prev_pkt = NULL;
-        }
-        matroska->num_packets--;
         return 0;
     }
 
@@ -2749,16 +2772,7 @@
  */
 static void matroska_clear_queue(MatroskaDemuxContext *matroska)
 {
-    matroska->prev_pkt = NULL;
-    if (matroska->packets) {
-        int n;
-        for (n = 0; n < matroska->num_packets; n++) {
-            av_packet_unref(matroska->packets[n]);
-            av_freep(&matroska->packets[n]);
-        }
-        av_freep(&matroska->packets);
-        matroska->num_packets = 0;
-    }
+    ff_packet_list_free(&matroska->queue, &matroska->queue_end);
 }
 
 static int matroska_parse_laces(MatroskaDemuxContext *matroska, uint8_t **buf,
@@ -2924,13 +2938,10 @@
 
     while (track->audio.pkt_cnt) {
         int ret;
-        AVPacket *pkt = av_mallocz(sizeof(AVPacket));
-        if (!pkt)
-            return AVERROR(ENOMEM);
+        AVPacket pktl, *pkt = &pktl;
 
         ret = av_new_packet(pkt, a);
         if (ret < 0) {
-            av_free(pkt);
             return ret;
         }
         memcpy(pkt->data,
@@ -2940,7 +2951,11 @@
         track->audio.buf_timecode = AV_NOPTS_VALUE;
         pkt->pos                  = pos;
         pkt->stream_index         = st->index;
-        dynarray_add(&matroska->packets, &matroska->num_packets, pkt);
+        ret = ff_packet_list_put(&matroska->queue, &matroska->queue_end, pkt, 0);
+        if (ret < 0) {
+            av_packet_unref(pkt);
+            return AVERROR(ENOMEM);
+        }
     }
 
     return 0;
@@ -2993,7 +3008,7 @@
             goto fail;
         }
 
-        tmp = av_realloc(dst, dstlen + blocksize + 32);
+        tmp = av_realloc(dst, dstlen + blocksize + 32 + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!tmp) {
             ret = AVERROR(ENOMEM);
             goto fail;
@@ -3017,6 +3032,8 @@
         offset += blocksize + 32;
     }
 
+    memset(dst + dstlen, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
     *pdst = dst;
     *size = dstlen;
 
@@ -3027,6 +3044,30 @@
     return ret;
 }
 
+static int matroska_parse_prores(MatroskaTrack *track, uint8_t *src,
+                                 uint8_t **pdst, int *size)
+{
+    uint8_t *dst = src;
+    int dstlen = *size;
+
+    if (AV_RB32(&src[4]) != MKBETAG('i', 'c', 'p', 'f')) {
+        dst = av_malloc(dstlen + 8 + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!dst)
+            return AVERROR(ENOMEM);
+
+        AV_WB32(dst, dstlen);
+        AV_WB32(dst + 4, MKBETAG('i', 'c', 'p', 'f'));
+        memcpy(dst + 8, src, dstlen);
+        memset(dst + 8 + dstlen, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+        dstlen += 8;
+    }
+
+    *pdst = dst;
+    *size = dstlen;
+
+    return 0;
+}
+
 static int matroska_parse_webvtt(MatroskaDemuxContext *matroska,
                                  MatroskaTrack *track,
                                  AVStream *st,
@@ -3035,7 +3076,7 @@
                                  uint64_t duration,
                                  int64_t pos)
 {
-    AVPacket *pkt;
+    AVPacket pktl, *pkt = &pktl;
     uint8_t *id, *settings, *text, *buf;
     int id_len, settings_len, text_len;
     uint8_t *p, *q;
@@ -3092,13 +3133,9 @@
     if (text_len <= 0)
         return AVERROR_INVALIDDATA;
 
-    pkt = av_mallocz(sizeof(*pkt));
-    if (!pkt)
-        return AVERROR(ENOMEM);
     err = av_new_packet(pkt, text_len);
     if (err < 0) {
-        av_free(pkt);
-        return AVERROR(err);
+        return err;
     }
 
     memcpy(pkt->data, text, text_len);
@@ -3108,7 +3145,7 @@
                                       AV_PKT_DATA_WEBVTT_IDENTIFIER,
                                       id_len);
         if (!buf) {
-            av_free(pkt);
+            av_packet_unref(pkt);
             return AVERROR(ENOMEM);
         }
         memcpy(buf, id, id_len);
@@ -3119,7 +3156,7 @@
                                       AV_PKT_DATA_WEBVTT_SETTINGS,
                                       settings_len);
         if (!buf) {
-            av_free(pkt);
+            av_packet_unref(pkt);
             return AVERROR(ENOMEM);
         }
         memcpy(buf, settings, settings_len);
@@ -3137,15 +3174,18 @@
     pkt->duration = duration;
     pkt->pos = pos;
 
-    dynarray_add(&matroska->packets, &matroska->num_packets, pkt);
-    matroska->prev_pkt = pkt;
+    err = ff_packet_list_put(&matroska->queue, &matroska->queue_end, pkt, 0);
+    if (err < 0) {
+        av_packet_unref(pkt);
+        return AVERROR(ENOMEM);
+    }
 
     return 0;
 }
 
 static int matroska_parse_frame(MatroskaDemuxContext *matroska,
                                 MatroskaTrack *track, AVStream *st,
-                                uint8_t *data, int pkt_size,
+                                AVBufferRef *buf, uint8_t *data, int pkt_size,
                                 uint64_t timecode, uint64_t lace_duration,
                                 int64_t pos, int is_keyframe,
                                 uint8_t *additional, uint64_t additional_id, int additional_size,
@@ -3153,8 +3193,8 @@
 {
     MatroskaTrackEncoding *encodings = track->encodings.elem;
     uint8_t *pkt_data = data;
-    int offset = 0, res;
-    AVPacket *pkt;
+    int res;
+    AVPacket pktl, *pkt = &pktl;
 
     if (encodings && !encodings->type && encodings->scope & 1) {
         res = matroska_decode_buffer(&pkt_data, &pkt_size, track);
@@ -3175,34 +3215,33 @@
         pkt_data = wv_data;
     }
 
-    if (st->codecpar->codec_id == AV_CODEC_ID_PRORES &&
-        AV_RB32(&data[4]) != MKBETAG('i', 'c', 'p', 'f'))
-        offset = 8;
-
-    pkt = av_mallocz(sizeof(AVPacket));
-    if (!pkt) {
+    if (st->codecpar->codec_id == AV_CODEC_ID_PRORES) {
+        uint8_t *pr_data;
+        res = matroska_parse_prores(track, pkt_data, &pr_data, &pkt_size);
+        if (res < 0) {
+            av_log(matroska->ctx, AV_LOG_ERROR,
+                   "Error parsing a prores block.\n");
+            goto fail;
+        }
         if (pkt_data != data)
             av_freep(&pkt_data);
-        return AVERROR(ENOMEM);
+        pkt_data = pr_data;
     }
-    /* XXX: prevent data copy... */
-    if (av_new_packet(pkt, pkt_size + offset) < 0) {
-        av_free(pkt);
+
+    av_init_packet(pkt);
+    if (pkt_data != data)
+        pkt->buf = av_buffer_create(pkt_data, pkt_size + AV_INPUT_BUFFER_PADDING_SIZE,
+                                    NULL, NULL, 0);
+    else
+        pkt->buf = av_buffer_ref(buf);
+
+    if (!pkt->buf) {
         res = AVERROR(ENOMEM);
         goto fail;
     }
 
-    if (st->codecpar->codec_id == AV_CODEC_ID_PRORES && offset == 8) {
-        uint8_t *buf = pkt->data;
-        bytestream_put_be32(&buf, pkt_size);
-        bytestream_put_be32(&buf, MKBETAG('i', 'c', 'p', 'f'));
-    }
-
-    memcpy(pkt->data + offset, pkt_data, pkt_size);
-
-    if (pkt_data != data)
-        av_freep(&pkt_data);
-
+    pkt->data         = pkt_data;
+    pkt->size         = pkt_size;
     pkt->flags        = is_keyframe;
     pkt->stream_index = st->index;
 
@@ -3212,7 +3251,6 @@
                                                      additional_size + 8);
         if (!side_data) {
             av_packet_unref(pkt);
-            av_free(pkt);
             return AVERROR(ENOMEM);
         }
         AV_WB64(side_data, additional_id);
@@ -3225,7 +3263,6 @@
                                                      10);
         if (!side_data) {
             av_packet_unref(pkt);
-            av_free(pkt);
             return AVERROR(ENOMEM);
         }
         discard_padding = av_rescale_q(discard_padding,
@@ -3253,8 +3290,11 @@
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
-    dynarray_add(&matroska->packets, &matroska->num_packets, pkt);
-    matroska->prev_pkt = pkt;
+    res = ff_packet_list_put(&matroska->queue, &matroska->queue_end, pkt, 0);
+    if (res < 0) {
+        av_packet_unref(pkt);
+        return AVERROR(ENOMEM);
+    }
 
     return 0;
 
@@ -3264,7 +3304,7 @@
     return res;
 }
 
-static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data,
+static int matroska_parse_block(MatroskaDemuxContext *matroska, AVBufferRef *buf, uint8_t *data,
                                 int size, int64_t pos, uint64_t cluster_time,
                                 uint64_t block_duration, int is_keyframe,
                                 uint8_t *additional, uint64_t additional_id, int additional_size,
@@ -3382,7 +3422,7 @@
             if (res)
                 goto end;
         } else {
-            res = matroska_parse_frame(matroska, track, st, data, lace_size[n],
+            res = matroska_parse_frame(matroska, track, st, buf, data, lace_size[n],
                                        timecode, lace_duration, pos,
                                        !n ? is_keyframe : 0,
                                        additional, additional_id, additional_size,
@@ -3418,7 +3458,6 @@
         memset(&matroska->current_cluster, 0, sizeof(MatroskaCluster));
         matroska->current_cluster_num_blocks = 0;
         matroska->current_cluster_pos        = avio_tell(matroska->ctx->pb);
-        matroska->prev_pkt                   = NULL;
         /* sizeof the ID which was already read */
         if (matroska->current_id)
             matroska->current_cluster_pos -= 4;
@@ -3446,7 +3485,7 @@
                                     blocks[i].additional.data : NULL;
             if (!blocks[i].non_simple)
                 blocks[i].duration = 0;
-            res = matroska_parse_block(matroska, blocks[i].bin.data,
+            res = matroska_parse_block(matroska, blocks[i].bin.buf, blocks[i].bin.data,
                                        blocks[i].bin.size, blocks[i].bin.pos,
                                        matroska->current_cluster.timecode,
                                        blocks[i].duration, is_keyframe,
@@ -3471,7 +3510,6 @@
     if (!matroska->contains_ssa)
         return matroska_parse_cluster_incremental(matroska);
     pos = avio_tell(matroska->ctx->pb);
-    matroska->prev_pkt = NULL;
     if (matroska->current_id)
         pos -= 4;  /* sizeof the ID which was already read */
     res         = ebml_parse(matroska, matroska_clusters, &cluster);
@@ -3480,7 +3518,7 @@
     for (i = 0; i < blocks_list->nb_elem; i++)
         if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
             int is_keyframe = blocks[i].non_simple ? blocks[i].reference == INT64_MIN : -1;
-            res = matroska_parse_block(matroska, blocks[i].bin.data,
+            res = matroska_parse_block(matroska, blocks[i].bin.buf, blocks[i].bin.data,
                                        blocks[i].bin.size, blocks[i].bin.pos,
                                        cluster.timecode, blocks[i].duration,
                                        is_keyframe, NULL, 0, 0, pos,
@@ -3656,10 +3694,10 @@
         matroska->current_id = 0;
         matroska_clear_queue(matroska);
         if (matroska_parse_cluster(matroska) < 0 ||
-            matroska->num_packets <= 0) {
+            !matroska->queue) {
             break;
         }
-        pkt = matroska->packets[0];
+        pkt = &matroska->queue->pkt;
         cluster_pos += cluster_length + 12; // 12 is the offset of the cluster id and length.
         if (!(pkt->flags & AV_PKT_FLAG_KEY)) {
             rv = 0;
@@ -3946,8 +3984,8 @@
     }
 
     // basename of the file
-    buf = strrchr(s->filename, '/');
-    av_dict_set(&s->streams[0]->metadata, FILENAME, buf ? ++buf : s->filename, 0);
+    buf = strrchr(s->url, '/');
+    av_dict_set(&s->streams[0]->metadata, FILENAME, buf ? ++buf : s->url, 0);
 
     // track number
     tracks = matroska->tracks.elem;

diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 3073fd8..22d73d7 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c

@@ -21,6 +21,7 @@
 
 #include <stdint.h>
 
+#include "av1.h"
 #include "avc.h"
 #include "hevc.h"
 #include "avformat.h"
@@ -769,6 +770,13 @@
         ff_isom_write_hvcc(dyn_cp, par->extradata,
                            par->extradata_size, 0);
         return 0;
+    case AV_CODEC_ID_AV1:
+        if (par->extradata_size)
+            return ff_isom_write_av1c(dyn_cp, par->extradata,
+                                      par->extradata_size);
+        else
+            put_ebml_void(pb, 4 + 3);
+        break;
     case AV_CODEC_ID_ALAC:
         if (par->extradata_size < 36) {
             av_log(s, AV_LOG_ERROR,
@@ -953,78 +961,79 @@
     return 0;
 }
 
-static int mkv_write_video_projection(AVFormatContext *s, AVIOContext *pb, AVStream *st)
+static int mkv_write_video_projection(AVFormatContext *s, AVIOContext *pb,
+                                      AVStream *st)
 {
+    AVIOContext b;
+    AVIOContext *dyn_cp;
     int side_data_size = 0;
+    int ret, projection_size;
+    uint8_t *projection_ptr;
+    uint8_t private[20];
+
     const AVSphericalMapping *spherical =
-        (const AVSphericalMapping*) av_stream_get_side_data(st, AV_PKT_DATA_SPHERICAL,
+        (const AVSphericalMapping *)av_stream_get_side_data(st, AV_PKT_DATA_SPHERICAL,
                                                             &side_data_size);
 
-    if (side_data_size) {
-        AVIOContext *dyn_cp;
-        uint8_t *projection_ptr;
-        int ret, projection_size;
+    if (!side_data_size)
+        return 0;
 
-        ret = avio_open_dyn_buf(&dyn_cp);
-        if (ret < 0)
-            return ret;
+    ret = avio_open_dyn_buf(&dyn_cp);
+    if (ret < 0)
+        return ret;
 
-        switch (spherical->projection) {
-        case AV_SPHERICAL_EQUIRECTANGULAR:
-            put_ebml_uint(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONTYPE,
-                          MATROSKA_VIDEO_PROJECTION_TYPE_EQUIRECTANGULAR);
-            break;
-        case AV_SPHERICAL_EQUIRECTANGULAR_TILE:
-        {
-            AVIOContext b;
-            uint8_t private[20];
-            ffio_init_context(&b, private, sizeof(private),
-                              1, NULL, NULL, NULL, NULL);
-            put_ebml_uint(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONTYPE,
-                          MATROSKA_VIDEO_PROJECTION_TYPE_EQUIRECTANGULAR);
-            avio_wb32(&b, 0); // version + flags
-            avio_wb32(&b, spherical->bound_top);
-            avio_wb32(&b, spherical->bound_bottom);
-            avio_wb32(&b, spherical->bound_left);
-            avio_wb32(&b, spherical->bound_right);
-            put_ebml_binary(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONPRIVATE, private, sizeof(private));
-            break;
-        }
-        case AV_SPHERICAL_CUBEMAP:
-        {
-            AVIOContext b;
-            uint8_t private[12];
-            ffio_init_context(&b, private, sizeof(private),
-                              1, NULL, NULL, NULL, NULL);
-            put_ebml_uint(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONTYPE,
-                          MATROSKA_VIDEO_PROJECTION_TYPE_CUBEMAP);
-            avio_wb32(&b, 0); // version + flags
-            avio_wb32(&b, 0); // layout
-            avio_wb32(&b, spherical->padding);
-            put_ebml_binary(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONPRIVATE, private, sizeof(private));
-            break;
-        }
-        default:
-            av_log(s, AV_LOG_WARNING, "Unknown projection type\n");
-            goto end;
-        }
+    switch (spherical->projection) {
+    case AV_SPHERICAL_EQUIRECTANGULAR:
+        put_ebml_uint(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONTYPE,
+                      MATROSKA_VIDEO_PROJECTION_TYPE_EQUIRECTANGULAR);
+        break;
+    case AV_SPHERICAL_EQUIRECTANGULAR_TILE:
+        ffio_init_context(&b, private, 20, 1, NULL, NULL, NULL, NULL);
+        put_ebml_uint(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONTYPE,
+                      MATROSKA_VIDEO_PROJECTION_TYPE_EQUIRECTANGULAR);
+        avio_wb32(&b, 0); // version + flags
+        avio_wb32(&b, spherical->bound_top);
+        avio_wb32(&b, spherical->bound_bottom);
+        avio_wb32(&b, spherical->bound_left);
+        avio_wb32(&b, spherical->bound_right);
+        put_ebml_binary(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONPRIVATE,
+                        private, avio_tell(&b));
+        break;
+    case AV_SPHERICAL_CUBEMAP:
+        ffio_init_context(&b, private, 12, 1, NULL, NULL, NULL, NULL);
+        put_ebml_uint(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONTYPE,
+                      MATROSKA_VIDEO_PROJECTION_TYPE_CUBEMAP);
+        avio_wb32(&b, 0); // version + flags
+        avio_wb32(&b, 0); // layout
+        avio_wb32(&b, spherical->padding);
+        put_ebml_binary(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONPRIVATE,
+                        private, avio_tell(&b));
+        break;
+    default:
+        av_log(s, AV_LOG_WARNING, "Unknown projection type\n");
+        goto end;
+    }
 
-        if (spherical->yaw)
-            put_ebml_float(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONPOSEYAW,   (double)spherical->yaw   / (1 << 16));
-        if (spherical->pitch)
-            put_ebml_float(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONPOSEPITCH, (double)spherical->pitch / (1 << 16));
-        if (spherical->roll)
-            put_ebml_float(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONPOSEROLL,  (double)spherical->roll  / (1 << 16));
+    if (spherical->yaw)
+        put_ebml_float(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONPOSEYAW,
+                       (double) spherical->yaw   / (1 << 16));
+    if (spherical->pitch)
+        put_ebml_float(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONPOSEPITCH,
+                       (double) spherical->pitch / (1 << 16));
+    if (spherical->roll)
+        put_ebml_float(dyn_cp, MATROSKA_ID_VIDEOPROJECTIONPOSEROLL,
+                       (double) spherical->roll  / (1 << 16));
 
 end:
-        projection_size = avio_close_dyn_buf(dyn_cp, &projection_ptr);
-        if (projection_size) {
-            ebml_master projection = start_ebml_master(pb, MATROSKA_ID_VIDEOPROJECTION, projection_size);
-            avio_write(pb, projection_ptr, projection_size);
-            end_ebml_master(pb, projection);
-        }
-        av_freep(&projection_ptr);
+    projection_size = avio_close_dyn_buf(dyn_cp, &projection_ptr);
+    if (projection_size) {
+        ebml_master projection = start_ebml_master(pb,
+                                                   MATROSKA_ID_VIDEOPROJECTION,
+                                                   projection_size);
+        avio_write(pb, projection_ptr, projection_size);
+        end_ebml_master(pb, projection);
     }
+    av_freep(&projection_ptr);
 
     return 0;
 }
@@ -1229,21 +1238,38 @@
     if (st->disposition & AV_DISPOSITION_FORCED)
         put_ebml_uint(pb, MATROSKA_ID_TRACKFLAGFORCED, 1);
 
-    if (mkv->mode == MODE_WEBM && par->codec_id == AV_CODEC_ID_WEBVTT) {
+    if (mkv->mode == MODE_WEBM) {
         const char *codec_id;
-        if (st->disposition & AV_DISPOSITION_CAPTIONS) {
-            codec_id = "D_WEBVTT/CAPTIONS";
-            native_id = MATROSKA_TRACK_TYPE_SUBTITLE;
-        } else if (st->disposition & AV_DISPOSITION_DESCRIPTIONS) {
-            codec_id = "D_WEBVTT/DESCRIPTIONS";
-            native_id = MATROSKA_TRACK_TYPE_METADATA;
-        } else if (st->disposition & AV_DISPOSITION_METADATA) {
-            codec_id = "D_WEBVTT/METADATA";
-            native_id = MATROSKA_TRACK_TYPE_METADATA;
-        } else {
-            codec_id = "D_WEBVTT/SUBTITLES";
-            native_id = MATROSKA_TRACK_TYPE_SUBTITLE;
+        if (par->codec_type != AVMEDIA_TYPE_SUBTITLE) {
+            for (j = 0; ff_webm_codec_tags[j].id != AV_CODEC_ID_NONE; j++) {
+                if (ff_webm_codec_tags[j].id == par->codec_id) {
+                    codec_id = ff_webm_codec_tags[j].str;
+                    native_id = 1;
+                    break;
+                }
+            }
+        } else if (par->codec_id == AV_CODEC_ID_WEBVTT) {
+            if (st->disposition & AV_DISPOSITION_CAPTIONS) {
+                codec_id = "D_WEBVTT/CAPTIONS";
+                native_id = MATROSKA_TRACK_TYPE_SUBTITLE;
+            } else if (st->disposition & AV_DISPOSITION_DESCRIPTIONS) {
+                codec_id = "D_WEBVTT/DESCRIPTIONS";
+                native_id = MATROSKA_TRACK_TYPE_METADATA;
+            } else if (st->disposition & AV_DISPOSITION_METADATA) {
+                codec_id = "D_WEBVTT/METADATA";
+                native_id = MATROSKA_TRACK_TYPE_METADATA;
+            } else {
+                codec_id = "D_WEBVTT/SUBTITLES";
+                native_id = MATROSKA_TRACK_TYPE_SUBTITLE;
+            }
         }
+
+        if (!native_id) {
+            av_log(s, AV_LOG_ERROR,
+                   "Only VP8 or VP9 or AV1 video and Vorbis or Opus audio and WebVTT subtitles are supported for WebM.\n");
+            return AVERROR(EINVAL);
+        }
+
         put_ebml_string(pb, MATROSKA_ID_CODECID, codec_id);
     } else {
         // look for a codec ID string specific to mkv to use,
@@ -1285,16 +1311,6 @@
         put_ebml_uint(pb, MATROSKA_ID_SEEKPREROLL, OPUS_SEEK_PREROLL);
     }
 
-    if (mkv->mode == MODE_WEBM && !(par->codec_id == AV_CODEC_ID_VP8 ||
-                                    par->codec_id == AV_CODEC_ID_VP9 ||
-                                    par->codec_id == AV_CODEC_ID_OPUS ||
-                                    par->codec_id == AV_CODEC_ID_VORBIS ||
-                                    par->codec_id == AV_CODEC_ID_WEBVTT)) {
-        av_log(s, AV_LOG_ERROR,
-               "Only VP8 or VP9 video and Vorbis or Opus audio and WebVTT subtitles are supported for WebM.\n");
-        return AVERROR(EINVAL);
-    }
-
     switch (par->codec_type) {
     case AVMEDIA_TYPE_VIDEO:
         mkv->have_video = 1;
@@ -1303,8 +1319,6 @@
         if(   st->avg_frame_rate.num > 0 && st->avg_frame_rate.den > 0
            && av_cmp_q(av_inv_q(st->avg_frame_rate), st->time_base) > 0)
             put_ebml_uint(pb, MATROSKA_ID_TRACKDEFAULTDURATION, 1000000000LL * st->avg_frame_rate.den / st->avg_frame_rate.num);
-        else
-            put_ebml_uint(pb, MATROSKA_ID_TRACKDEFAULTDURATION, 1000000000LL * st->time_base.num / st->time_base.den);
 
         if (!native_id &&
             ff_codec_get_tag(ff_codec_movvideo_tags, par->codec_id) &&
@@ -1866,17 +1880,6 @@
     }
 
     for (i = 0; i < s->nb_streams; i++) {
-        if (s->streams[i]->codecpar->codec_id == AV_CODEC_ID_ATRAC3 ||
-            s->streams[i]->codecpar->codec_id == AV_CODEC_ID_COOK ||
-            s->streams[i]->codecpar->codec_id == AV_CODEC_ID_RA_288 ||
-            s->streams[i]->codecpar->codec_id == AV_CODEC_ID_SIPR ||
-            s->streams[i]->codecpar->codec_id == AV_CODEC_ID_RV10 ||
-            s->streams[i]->codecpar->codec_id == AV_CODEC_ID_RV20) {
-            av_log(s, AV_LOG_ERROR,
-                   "The Matroska muxer does not yet support muxing %s\n",
-                   avcodec_get_name(s->streams[i]->codecpar->codec_id));
-            return AVERROR_PATCHWELCOME;
-        }
         if (s->streams[i]->codecpar->codec_id == AV_CODEC_ID_OPUS ||
             av_dict_get(s->streams[i]->metadata, "stereo_mode", NULL, 0) ||
             av_dict_get(s->streams[i]->metadata, "alpha_mode", NULL, 0))
@@ -2016,8 +2019,17 @@
         ret = AVERROR(ENOMEM);
         goto fail;
     }
+
+    if (s->metadata_header_padding > 0) {
+        if (s->metadata_header_padding == 1)
+            s->metadata_header_padding++;
+        put_ebml_void(pb, s->metadata_header_padding);
+    }
+
     if ((pb->seekable & AVIO_SEEKABLE_NORMAL) && mkv->reserve_cues_space) {
         mkv->cues_pos = avio_tell(pb);
+        if (mkv->reserve_cues_space == 1)
+            mkv->reserve_cues_space++;
         put_ebml_void(pb, mkv->reserve_cues_space);
     }
 
@@ -2137,6 +2149,8 @@
              (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1))
         /* extradata is Annex B, assume the bitstream is too and convert it */
         ff_hevc_annexb2mp4_buf(pkt->data, &data, &size, 0, NULL);
+    else if (par->codec_id == AV_CODEC_ID_AV1)
+        ff_av1_filter_obus_buf(pkt->data, &data, &size);
     else if (par->codec_id == AV_CODEC_ID_WAVPACK) {
         int ret = mkv_strip_wavpack(pkt->data, &data, &size);
         if (ret < 0) {
@@ -2336,6 +2350,37 @@
             avcodec_parameters_free(&codecpriv_par);
         }
         break;
+    // FIXME: Remove the following once libaom starts propagating extradata during init()
+    //        See https://bugs.chromium.org/p/aomedia/issues/detail?id=2012
+    case AV_CODEC_ID_AV1:
+        if (side_data_size && (s->pb->seekable & AVIO_SEEKABLE_NORMAL) && !mkv->is_live &&
+            !par->extradata_size) {
+            AVIOContext *dyn_cp;
+            uint8_t *codecpriv;
+            int codecpriv_size;
+            int64_t curpos;
+            ret = avio_open_dyn_buf(&dyn_cp);
+            if (ret < 0)
+                return ret;
+            ff_isom_write_av1c(dyn_cp, side_data, side_data_size);
+            codecpriv_size = avio_close_dyn_buf(dyn_cp, &codecpriv);
+            if (!codecpriv_size) {
+                av_free(codecpriv);
+                return AVERROR_INVALIDDATA;
+            }
+            curpos = avio_tell(mkv->tracks_bc);
+            avio_seek(mkv->tracks_bc, track->codecpriv_offset, SEEK_SET);
+            // Do not write the OBUs as we don't have space saved for them
+            put_ebml_binary(mkv->tracks_bc, MATROSKA_ID_CODECPRIVATE, codecpriv, 4);
+            av_free(codecpriv);
+            avio_seek(mkv->tracks_bc, curpos, SEEK_SET);
+            ret = ff_alloc_extradata(par, side_data_size);
+            if (ret < 0)
+                return ret;
+            memcpy(par->extradata, side_data, side_data_size);
+        } else if (!par->extradata_size)
+            return AVERROR_INVALIDDATA;
+        break;
     default:
         if (side_data_size)
             av_log(s, AV_LOG_DEBUG, "Ignoring new extradata in a packet for stream %d.\n", pkt->stream_index);
@@ -2655,10 +2700,41 @@
     return 0;
 }
 
+static int webm_query_codec(enum AVCodecID codec_id, int std_compliance)
+{
+    int i;
+    for (i = 0; ff_webm_codec_tags[i].id != AV_CODEC_ID_NONE; i++)
+        if (ff_webm_codec_tags[i].id == codec_id)
+            return 1;
+
+    return 0;
+}
+
 static int mkv_init(struct AVFormatContext *s)
 {
     int i;
 
+    if (s->nb_streams > MAX_TRACKS) {
+        av_log(s, AV_LOG_ERROR,
+               "At most %d streams are supported for muxing in Matroska\n",
+               MAX_TRACKS);
+        return AVERROR(EINVAL);
+    }
+
+    for (i = 0; i < s->nb_streams; i++) {
+        if (s->streams[i]->codecpar->codec_id == AV_CODEC_ID_ATRAC3 ||
+            s->streams[i]->codecpar->codec_id == AV_CODEC_ID_COOK ||
+            s->streams[i]->codecpar->codec_id == AV_CODEC_ID_RA_288 ||
+            s->streams[i]->codecpar->codec_id == AV_CODEC_ID_SIPR ||
+            s->streams[i]->codecpar->codec_id == AV_CODEC_ID_RV10 ||
+            s->streams[i]->codecpar->codec_id == AV_CODEC_ID_RV20) {
+            av_log(s, AV_LOG_ERROR,
+                   "The Matroska muxer does not yet support muxing %s\n",
+                   avcodec_get_name(s->streams[i]->codecpar->codec_id));
+            return AVERROR_PATCHWELCOME;
+        }
+    }
+
     if (s->avoid_negative_ts < 0) {
         s->avoid_negative_ts = 1;
         s->internal->avoid_negative_ts_use_pts = 1;
@@ -2689,7 +2765,6 @@
 
 static const AVCodecTag additional_audio_tags[] = {
     { AV_CODEC_ID_ALAC,      0XFFFFFFFF },
-    { AV_CODEC_ID_EAC3,      0XFFFFFFFF },
     { AV_CODEC_ID_MLP,       0xFFFFFFFF },
     { AV_CODEC_ID_OPUS,      0xFFFFFFFF },
     { AV_CODEC_ID_PCM_S16BE, 0xFFFFFFFF },
@@ -2708,8 +2783,6 @@
     { AV_CODEC_ID_RV10,      0xFFFFFFFF },
     { AV_CODEC_ID_RV20,      0xFFFFFFFF },
     { AV_CODEC_ID_RV30,      0xFFFFFFFF },
-    { AV_CODEC_ID_RV40,      0xFFFFFFFF },
-    { AV_CODEC_ID_VP9,       0xFFFFFFFF },
     { AV_CODEC_ID_NONE,      0xFFFFFFFF }
 };
 
@@ -2789,6 +2862,7 @@
     .write_header      = mkv_write_header,
     .write_packet      = mkv_write_flush_packet,
     .write_trailer     = mkv_write_trailer,
+    .query_codec       = webm_query_codec,
     .check_bitstream   = mkv_check_bitstream,
     .flags             = AVFMT_GLOBALHEADER | AVFMT_VARIABLE_FPS |
                          AVFMT_TS_NONSTRICT | AVFMT_ALLOW_FLUSH,

diff --git a/libavformat/mlvdec.c b/libavformat/mlvdec.c
index 319cd26..ded8196 100644
--- a/libavformat/mlvdec.c
+++ b/libavformat/mlvdec.c

@@ -77,7 +77,7 @@
     return 0;
 }
 
-static void read_string(AVFormatContext *avctx, AVIOContext *pb, const char *tag, int size)
+static void read_string(AVFormatContext *avctx, AVIOContext *pb, const char *tag, unsigned size)
 {
     char * value = av_malloc(size + 1);
     if (!value) {
@@ -342,9 +342,9 @@
         return ret;
 
     /* scan secondary files */
-    if (strlen(avctx->filename) > 2) {
+    if (strlen(avctx->url) > 2) {
         int i;
-        char *filename = av_strdup(avctx->filename);
+        char *filename = av_strdup(avctx->url);
 
         if (!filename)
             return AVERROR(ENOMEM);

diff --git a/libavformat/mms.c b/libavformat/mms.c
index 17fa76a..768fda6 100644
--- a/libavformat/mms.c
+++ b/libavformat/mms.c

@@ -94,24 +94,26 @@
                 }
             }
         } else if (!memcmp(p, ff_asf_stream_header, sizeof(ff_asf_guid))) {
-            flags     = AV_RL16(p + sizeof(ff_asf_guid)*3 + 24);
-            stream_id = flags & 0x7F;
-            //The second condition is for checking CS_PKT_STREAM_ID_REQUEST packet size,
-            //we can calculate the packet size by stream_num.
-            //Please see function send_stream_selection_request().
-            if (mms->stream_num < MMS_MAX_STREAMS &&
-                    46 + mms->stream_num * 6 < sizeof(mms->out_buffer)) {
-                mms->streams = av_fast_realloc(mms->streams,
-                                   &mms->nb_streams_allocated,
-                                   (mms->stream_num + 1) * sizeof(MMSStream));
-                if (!mms->streams)
-                    return AVERROR(ENOMEM);
-                mms->streams[mms->stream_num].id = stream_id;
-                mms->stream_num++;
-            } else {
-                av_log(NULL, AV_LOG_ERROR,
-                       "Corrupt stream (too many A/V streams)\n");
-                return AVERROR_INVALIDDATA;
+            if (end - p >= (sizeof(ff_asf_guid) * 3 + 26)) {
+                flags     = AV_RL16(p + sizeof(ff_asf_guid)*3 + 24);
+                stream_id = flags & 0x7F;
+                //The second condition is for checking CS_PKT_STREAM_ID_REQUEST packet size,
+                //we can calculate the packet size by stream_num.
+                //Please see function send_stream_selection_request().
+                if (mms->stream_num < MMS_MAX_STREAMS &&
+                        46 + mms->stream_num * 6 < sizeof(mms->out_buffer)) {
+                    mms->streams = av_fast_realloc(mms->streams,
+                                       &mms->nb_streams_allocated,
+                                       (mms->stream_num + 1) * sizeof(MMSStream));
+                    if (!mms->streams)
+                        return AVERROR(ENOMEM);
+                    mms->streams[mms->stream_num].id = stream_id;
+                    mms->stream_num++;
+                } else {
+                    av_log(NULL, AV_LOG_ERROR,
+                           "Corrupt stream (too many A/V streams)\n");
+                    return AVERROR_INVALIDDATA;
+                }
             }
         } else if (!memcmp(p, ff_asf_ext_stream_header, sizeof(ff_asf_guid))) {
             if (end - p >= 88) {
@@ -143,6 +145,12 @@
             }
         } else if (!memcmp(p, ff_asf_head1_guid, sizeof(ff_asf_guid))) {
             chunksize = 46; // see references [2] section 3.4. This should be set 46.
+            if (chunksize > end - p) {
+                av_log(NULL, AV_LOG_ERROR,
+                    "Corrupt stream (header chunksize %"PRId64" is invalid)\n",
+                    chunksize);
+                return AVERROR_INVALIDDATA;
+            }
         }
         p += chunksize;
     }

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 9a6c3b3..ec57a05 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c

@@ -336,6 +336,8 @@
     case MKTAG( 'l','d','e','s'): key = "synopsis";  break;
     case MKTAG( 'l','o','c','i'):
         return mov_metadata_loci(c, pb, atom.size);
+    case MKTAG( 'm','a','n','u'): key = "make"; break;
+    case MKTAG( 'm','o','d','l'): key = "model"; break;
     case MKTAG( 'p','c','s','t'): key = "podcast";
         parse = mov_metadata_int8_no_padding; break;
     case MKTAG( 'p','g','a','p'): key = "gapless_playback";
@@ -410,7 +412,11 @@
                 int ret = mov_read_covr(c, pb, data_type, str_size);
                 if (ret < 0) {
                     av_log(c->fc, AV_LOG_ERROR, "Error parsing cover art.\n");
+                    return ret;
                 }
+                atom.size -= str_size;
+                if (atom.size > 8)
+                    goto retry;
                 return ret;
             } else if (!key && c->found_hdlr_mdta && c->meta_keys) {
                 uint32_t index = AV_RB32(&atom.type);
@@ -759,7 +765,8 @@
         title_str[title_size] = 0;
         if (title_str[0]) {
             int off = (!c->isom && title_str[0] == title_size - 1);
-            av_dict_set(&st->metadata, "handler_name", title_str + off, 0);
+            // flag added so as to not set stream handler name if already set from mdia->hdlr
+            av_dict_set(&st->metadata, "handler_name", title_str + off, AV_DICT_DONT_OVERWRITE);
         }
         av_freep(&title_str);
     }
@@ -767,28 +774,6 @@
     return 0;
 }
 
-int ff_mov_read_esds(AVFormatContext *fc, AVIOContext *pb)
-{
-    AVStream *st;
-    int tag;
-
-    if (fc->nb_streams < 1)
-        return 0;
-    st = fc->streams[fc->nb_streams-1];
-
-    avio_rb32(pb); /* version + flags */
-    ff_mp4_read_descr(fc, pb, &tag);
-    if (tag == MP4ESDescrTag) {
-        ff_mp4_parse_es_descr(pb, NULL);
-    } else
-        avio_rb16(pb); /* ID */
-
-    ff_mp4_read_descr(fc, pb, &tag);
-    if (tag == MP4DecConfigDescrTag)
-        ff_mp4_read_dec_config_descr(fc, st, pb);
-    return 0;
-}
-
 static int mov_read_esds(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     return ff_mov_read_esds(c->fc, pb);
@@ -1168,6 +1153,217 @@
     return 0; /* now go for mdat */
 }
 
+static MOVFragmentStreamInfo * get_frag_stream_info(
+    MOVFragmentIndex *frag_index,
+    int index,
+    int id)
+{
+    int i;
+    MOVFragmentIndexItem * item;
+
+    if (index < 0 || index >= frag_index->nb_items)
+        return NULL;
+    item = &frag_index->item[index];
+    for (i = 0; i < item->nb_stream_info; i++)
+        if (item->stream_info[i].id == id)
+            return &item->stream_info[i];
+
+    // This shouldn't happen
+    return NULL;
+}
+
+static void set_frag_stream(MOVFragmentIndex *frag_index, int id)
+{
+    int i;
+    MOVFragmentIndexItem * item;
+
+    if (frag_index->current < 0 ||
+        frag_index->current >= frag_index->nb_items)
+        return;
+
+    item = &frag_index->item[frag_index->current];
+    for (i = 0; i < item->nb_stream_info; i++)
+        if (item->stream_info[i].id == id) {
+            item->current = i;
+            return;
+        }
+
+    // id not found.  This shouldn't happen.
+    item->current = -1;
+}
+
+static MOVFragmentStreamInfo * get_current_frag_stream_info(
+    MOVFragmentIndex *frag_index)
+{
+    MOVFragmentIndexItem *item;
+    if (frag_index->current < 0 ||
+        frag_index->current >= frag_index->nb_items)
+        return NULL;
+
+    item = &frag_index->item[frag_index->current];
+    if (item->current >= 0 && item->current < item->nb_stream_info)
+        return &item->stream_info[item->current];
+
+    // This shouldn't happen
+    return NULL;
+}
+
+static int search_frag_moof_offset(MOVFragmentIndex *frag_index, int64_t offset)
+{
+    int a, b, m;
+    int64_t moof_offset;
+
+    // Optimize for appending new entries
+    if (!frag_index->nb_items ||
+        frag_index->item[frag_index->nb_items - 1].moof_offset < offset)
+        return frag_index->nb_items;
+
+    a = -1;
+    b = frag_index->nb_items;
+
+    while (b - a > 1) {
+        m = (a + b) >> 1;
+        moof_offset = frag_index->item[m].moof_offset;
+        if (moof_offset >= offset)
+            b = m;
+        if (moof_offset <= offset)
+            a = m;
+    }
+    return b;
+}
+
+static int64_t get_stream_info_time(MOVFragmentStreamInfo * frag_stream_info)
+{
+
+    if (frag_stream_info) {
+        if (frag_stream_info->sidx_pts != AV_NOPTS_VALUE)
+            return frag_stream_info->sidx_pts;
+        if (frag_stream_info->first_tfra_pts != AV_NOPTS_VALUE)
+            return frag_stream_info->first_tfra_pts;
+        if (frag_stream_info->tfdt_dts != AV_NOPTS_VALUE)
+            return frag_stream_info->tfdt_dts;
+    }
+    return AV_NOPTS_VALUE;
+}
+
+static int64_t get_frag_time(MOVFragmentIndex *frag_index,
+                             int index, int track_id)
+{
+    MOVFragmentStreamInfo * frag_stream_info;
+    int64_t timestamp;
+    int i;
+
+    if (track_id >= 0) {
+        frag_stream_info = get_frag_stream_info(frag_index, index, track_id);
+        return frag_stream_info->sidx_pts;
+    }
+
+    for (i = 0; i < frag_index->item[index].nb_stream_info; i++) {
+        frag_stream_info = &frag_index->item[index].stream_info[i];
+        timestamp = get_stream_info_time(frag_stream_info);
+        if (timestamp != AV_NOPTS_VALUE)
+            return timestamp;
+    }
+    return AV_NOPTS_VALUE;
+}
+
+static int search_frag_timestamp(MOVFragmentIndex *frag_index,
+                                 AVStream *st, int64_t timestamp)
+{
+    int a, b, m;
+    int64_t frag_time;
+    int id = -1;
+
+    if (st) {
+        // If the stream is referenced by any sidx, limit the search
+        // to fragments that referenced this stream in the sidx
+        MOVStreamContext *sc = st->priv_data;
+        if (sc->has_sidx)
+            id = st->id;
+    }
+
+    a = -1;
+    b = frag_index->nb_items;
+
+    while (b - a > 1) {
+        m = (a + b) >> 1;
+        frag_time = get_frag_time(frag_index, m, id);
+        if (frag_time != AV_NOPTS_VALUE) {
+            if (frag_time >= timestamp)
+                b = m;
+            if (frag_time <= timestamp)
+                a = m;
+        }
+    }
+    return a;
+}
+
+static int update_frag_index(MOVContext *c, int64_t offset)
+{
+    int index, i;
+    MOVFragmentIndexItem * item;
+    MOVFragmentStreamInfo * frag_stream_info;
+
+    // If moof_offset already exists in frag_index, return index to it
+    index = search_frag_moof_offset(&c->frag_index, offset);
+    if (index < c->frag_index.nb_items &&
+        c->frag_index.item[index].moof_offset == offset)
+        return index;
+
+    // offset is not yet in frag index.
+    // Insert new item at index (sorted by moof offset)
+    item = av_fast_realloc(c->frag_index.item,
+                           &c->frag_index.allocated_size,
+                           (c->frag_index.nb_items + 1) *
+                           sizeof(*c->frag_index.item));
+    if(!item)
+        return -1;
+    c->frag_index.item = item;
+
+    frag_stream_info = av_realloc_array(NULL, c->fc->nb_streams,
+                                        sizeof(*item->stream_info));
+    if (!frag_stream_info)
+        return -1;
+
+    for (i = 0; i < c->fc->nb_streams; i++) {
+        frag_stream_info[i].id = c->fc->streams[i]->id;
+        frag_stream_info[i].sidx_pts = AV_NOPTS_VALUE;
+        frag_stream_info[i].tfdt_dts = AV_NOPTS_VALUE;
+        frag_stream_info[i].first_tfra_pts = AV_NOPTS_VALUE;
+        frag_stream_info[i].index_entry = -1;
+        frag_stream_info[i].encryption_index = NULL;
+    }
+
+    if (index < c->frag_index.nb_items)
+        memmove(c->frag_index.item + index + 1, c->frag_index.item + index,
+                (c->frag_index.nb_items - index) * sizeof(*c->frag_index.item));
+
+    item = &c->frag_index.item[index];
+    item->headers_read = 0;
+    item->current = 0;
+    item->nb_stream_info = c->fc->nb_streams;
+    item->moof_offset = offset;
+    item->stream_info = frag_stream_info;
+    c->frag_index.nb_items++;
+
+    return index;
+}
+
+static void fix_frag_index_entries(MOVFragmentIndex *frag_index, int index,
+                                   int id, int entries)
+{
+    int i;
+    MOVFragmentStreamInfo * frag_stream_info;
+
+    if (index < 0)
+        return;
+    for (i = index; i < frag_index->nb_items; i++) {
+        frag_stream_info = get_frag_stream_info(frag_index, i, id);
+        if (frag_stream_info && frag_stream_info->index_entry >= 0)
+            frag_stream_info->index_entry += entries;
+    }
+}
+
 static int mov_read_moof(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     if (!c->has_looked_for_mfra && c->use_mfra_for > 0) {
@@ -1187,6 +1383,7 @@
     }
     c->fragment.moof_offset = c->fragment.implicit_offset = avio_tell(pb) - 8;
     av_log(c->fc, AV_LOG_TRACE, "moof offset %"PRIx64"\n", c->fragment.moof_offset);
+    c->frag_index.current = update_frag_index(c, c->fragment.moof_offset);
     return mov_read_default(c, pb, atom);
 }
 
@@ -1544,6 +1741,7 @@
                 par->width = 1440;
             return 0;
         } else if ((par->codec_tag == MKTAG('A', 'V', 'd', '1') ||
+                    par->codec_tag == MKTAG('A', 'V', 'j', '2') ||
                     par->codec_tag == MKTAG('A', 'V', 'd', 'n')) &&
                    atom.size >= 24) {
             int num, den;
@@ -1704,6 +1902,8 @@
     ret = ff_get_extradata(c->fc, st->codecpar, pb, atom.size);
     if (ret < 0)
         return ret;
+    if (atom.type == MKTAG('h','v','c','C') && st->codecpar->codec_tag == MKTAG('d','v','h','1'))
+        st->codecpar->codec_id = AV_CODEC_ID_HEVC;
 
     return 0;
 }
@@ -1801,26 +2001,14 @@
 
     sc->chunk_count = i;
 
-    if (pb->eof_reached)
+    if (pb->eof_reached) {
+        av_log(c->fc, AV_LOG_WARNING, "reached eof, corrupted STCO atom\n");
         return AVERROR_EOF;
+    }
 
     return 0;
 }
 
-/**
- * Compute codec id for 'lpcm' tag.
- * See CoreAudioTypes and AudioStreamBasicDescription at Apple.
- */
-enum AVCodecID ff_mov_get_lpcm_codec_id(int bps, int flags)
-{
-    /* lpcm flags:
-     * 0x1 = float
-     * 0x2 = big-endian
-     * 0x4 = signed
-     */
-    return ff_get_pcm_codec_id(bps, flags & 1, flags & 2, flags & 4 ? -1 : 0);
-}
-
 static int mov_codec_id(AVStream *st, uint32_t format)
 {
     int id = ff_codec_get_id(ff_codec_movaudio_tags, format);
@@ -1846,6 +2034,8 @@
             id = ff_codec_get_id(ff_codec_movsubtitle_tags, format);
             if (id > 0)
                 st->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE;
+            else
+                id = ff_codec_get_id(ff_codec_movdata_tags, format);
         }
     }
 
@@ -1932,8 +2122,8 @@
     // Read QT version 1 fields. In version 0 these do not exist.
     av_log(c->fc, AV_LOG_TRACE, "version =%d, isom =%d\n", version, c->isom);
     if (!c->isom ||
-        (compatible_brands && strstr(compatible_brands->value, "qt  "))) {
-
+        (compatible_brands && strstr(compatible_brands->value, "qt  ")) ||
+        (sc->stsd_version == 0 && version > 0)) {
         if (version == 1) {
             sc->samples_per_frame = avio_rb32(pb);
             avio_rb32(pb); /* bytes per packet */
@@ -2219,6 +2409,7 @@
     case AV_CODEC_ID_EAC3:
     case AV_CODEC_ID_MPEG1VIDEO:
     case AV_CODEC_ID_VC1:
+    case AV_CODEC_ID_VP8:
     case AV_CODEC_ID_VP9:
         st->need_parsing = AVSTREAM_PARSE_FULL;
         break;
@@ -2262,8 +2453,7 @@
     MOVStreamContext *sc;
     int pseudo_stream_id;
 
-    if (c->fc->nb_streams < 1)
-        return 0;
+    av_assert0 (c->fc->nb_streams >= 1);
     st = c->fc->streams[c->fc->nb_streams-1];
     sc = st->priv_data;
 
@@ -2289,8 +2479,10 @@
         }
 
         if (mov_skip_multiple_stsd(c, pb, st->codecpar->codec_tag, format,
-                                   size - (avio_tell(pb) - start_pos)))
+                                   size - (avio_tell(pb) - start_pos))) {
+            sc->stsd_count++;
             continue;
+        }
 
         sc->pseudo_stream_id = st->codecpar->codec_tag ? -1 : pseudo_stream_id;
         sc->dref_id= dref_id;
@@ -2302,18 +2494,16 @@
                "size=%"PRId64" 4CC=%s codec_type=%d\n", size,
                av_fourcc2str(format), st->codecpar->codec_type);
 
+        st->codecpar->codec_id = id;
         if (st->codecpar->codec_type==AVMEDIA_TYPE_VIDEO) {
-            st->codecpar->codec_id = id;
             mov_parse_stsd_video(c, pb, st, sc);
         } else if (st->codecpar->codec_type==AVMEDIA_TYPE_AUDIO) {
-            st->codecpar->codec_id = id;
             mov_parse_stsd_audio(c, pb, st, sc);
             if (st->codecpar->sample_rate < 0) {
                 av_log(c->fc, AV_LOG_ERROR, "Invalid sample rate %d\n", st->codecpar->sample_rate);
                 return AVERROR_INVALIDDATA;
             }
         } else if (st->codecpar->codec_type==AVMEDIA_TYPE_SUBTITLE){
-            st->codecpar->codec_id = id;
             mov_parse_stsd_subtitle(c, pb, st, sc,
                                     size - (avio_tell(pb) - start_pos));
         } else {
@@ -2342,10 +2532,13 @@
             av_freep(&st->codecpar->extradata);
             st->codecpar->extradata_size = 0;
         }
+        sc->stsd_count++;
     }
 
-    if (pb->eof_reached)
+    if (pb->eof_reached) {
+        av_log(c->fc, AV_LOG_WARNING, "reached eof, corrupted STSD atom\n");
         return AVERROR_EOF;
+    }
 
     return 0;
 }
@@ -2361,11 +2554,12 @@
     st = c->fc->streams[c->fc->nb_streams - 1];
     sc = st->priv_data;
 
-    avio_r8(pb); /* version */
+    sc->stsd_version = avio_r8(pb);
     avio_rb24(pb); /* flags */
     entries = avio_rb32(pb);
 
-    if (entries <= 0) {
+    /* Each entry contains a size (4 bytes) and format (4 bytes). */
+    if (entries <= 0 || entries > atom.size / 8) {
         av_log(c->fc, AV_LOG_ERROR, "invalid STSD entries %d\n", entries);
         return AVERROR_INVALIDDATA;
     }
@@ -2378,17 +2572,18 @@
 
     /* Prepare space for hosting multiple extradata. */
     sc->extradata = av_mallocz_array(entries, sizeof(*sc->extradata));
+    if (!sc->extradata)
+        return AVERROR(ENOMEM);
+
     sc->extradata_size = av_mallocz_array(entries, sizeof(*sc->extradata_size));
-    if (!sc->extradata_size || !sc->extradata) {
+    if (!sc->extradata_size) {
         ret = AVERROR(ENOMEM);
         goto fail;
     }
 
     ret = ff_mov_read_stsd_entries(c, pb, entries);
     if (ret < 0)
-        return ret;
-
-    sc->stsd_count = entries;
+        goto fail;
 
     /* Restore back the primary extradata. */
     av_freep(&st->codecpar->extradata);
@@ -2402,6 +2597,12 @@
 
     return mov_finalize_stsd_codec(c, pb, st, sc);
 fail:
+    if (sc->extradata) {
+        int j;
+        for (j = 0; j < sc->stsd_count; j++)
+            av_freep(&sc->extradata[j]);
+    }
+
     av_freep(&sc->extradata);
     av_freep(&sc->extradata_size);
     return ret;
@@ -2444,9 +2645,34 @@
     }
 
     sc->stsc_count = i;
+    for (i = sc->stsc_count - 1; i < UINT_MAX; i--) {
+        int64_t first_min = i + 1;
+        if ((i+1 < sc->stsc_count && sc->stsc_data[i].first >= sc->stsc_data[i+1].first) ||
+            (i > 0 && sc->stsc_data[i].first <= sc->stsc_data[i-1].first) ||
+            sc->stsc_data[i].first < first_min ||
+            sc->stsc_data[i].count < 1 ||
+            sc->stsc_data[i].id < 1) {
+            av_log(c->fc, AV_LOG_WARNING, "STSC entry %d is invalid (first=%d count=%d id=%d)\n", i, sc->stsc_data[i].first, sc->stsc_data[i].count, sc->stsc_data[i].id);
+            if (i+1 >= sc->stsc_count) {
+                sc->stsc_data[i].first = FFMAX(sc->stsc_data[i].first, first_min);
+                if (i > 0 && sc->stsc_data[i].first <= sc->stsc_data[i-1].first)
+                    sc->stsc_data[i].first = FFMIN(sc->stsc_data[i-1].first + 1LL, INT_MAX);
+                sc->stsc_data[i].count = FFMAX(sc->stsc_data[i].count, 1);
+                sc->stsc_data[i].id    = FFMAX(sc->stsc_data[i].id, 1);
+                continue;
+            }
+            av_assert0(sc->stsc_data[i+1].first >= 2);
+            // We replace this entry by the next valid
+            sc->stsc_data[i].first = sc->stsc_data[i+1].first - 1;
+            sc->stsc_data[i].count = sc->stsc_data[i+1].count;
+            sc->stsc_data[i].id    = sc->stsc_data[i+1].id;
+        }
+    }
 
-    if (pb->eof_reached)
+    if (pb->eof_reached) {
+        av_log(c->fc, AV_LOG_WARNING, "reached eof, corrupted STSC atom\n");
         return AVERROR_EOF;
+    }
 
     return 0;
 }
@@ -2457,7 +2683,7 @@
 }
 
 /* Compute the samples value for the stsc entry at the given index. */
-static inline int mov_get_stsc_samples(MOVStreamContext *sc, unsigned int index)
+static inline int64_t mov_get_stsc_samples(MOVStreamContext *sc, unsigned int index)
 {
     int chunk_count;
 
@@ -2466,7 +2692,7 @@
     else
         chunk_count = sc->chunk_count - (sc->stsc_data[index].first - 1);
 
-    return sc->stsc_data[index].count * chunk_count;
+    return sc->stsc_data[index].count * (int64_t)chunk_count;
 }
 
 static int mov_read_stps(MOVContext *c, AVIOContext *pb, MOVAtom atom)
@@ -2497,8 +2723,10 @@
 
     sc->stps_count = i;
 
-    if (pb->eof_reached)
+    if (pb->eof_reached) {
+        av_log(c->fc, AV_LOG_WARNING, "reached eof, corrupted STPS atom\n");
         return AVERROR_EOF;
+    }
 
     return 0;
 }
@@ -2544,8 +2772,10 @@
 
     sc->keyframe_count = i;
 
-    if (pb->eof_reached)
+    if (pb->eof_reached) {
+        av_log(c->fc, AV_LOG_WARNING, "reached eof, corrupted STSS atom\n");
         return AVERROR_EOF;
+    }
 
     return 0;
 }
@@ -2615,7 +2845,8 @@
     if (ret < 0) {
         av_freep(&sc->sample_sizes);
         av_free(buf);
-        return ret;
+        av_log(c->fc, AV_LOG_WARNING, "STSZ atom truncated\n");
+        return 0;
     }
 
     init_get_bits(&gb, buf, 8*num_bytes);
@@ -2629,8 +2860,10 @@
 
     av_free(buf);
 
-    if (pb->eof_reached)
+    if (pb->eof_reached) {
+        av_log(c->fc, AV_LOG_WARNING, "reached eof, corrupted STSZ atom\n");
         return AVERROR_EOF;
+    }
 
     return 0;
 }
@@ -2639,7 +2872,7 @@
 {
     AVStream *st;
     MOVStreamContext *sc;
-    unsigned int i, entries;
+    unsigned int i, entries, alloc_size = 0;
     int64_t duration=0;
     int64_t total_sample_count=0;
 
@@ -2657,15 +2890,24 @@
 
     if (sc->stts_data)
         av_log(c->fc, AV_LOG_WARNING, "Duplicated STTS atom\n");
-    av_free(sc->stts_data);
+    av_freep(&sc->stts_data);
     sc->stts_count = 0;
-    sc->stts_data = av_malloc_array(entries, sizeof(*sc->stts_data));
-    if (!sc->stts_data)
+    if (entries >= INT_MAX / sizeof(*sc->stts_data))
         return AVERROR(ENOMEM);
 
     for (i = 0; i < entries && !pb->eof_reached; i++) {
         int sample_duration;
         unsigned int sample_count;
+        unsigned int min_entries = FFMIN(FFMAX(i + 1, 1024 * 1024), entries);
+        MOVStts *stts_data = av_fast_realloc(sc->stts_data, &alloc_size,
+                                             min_entries * sizeof(*sc->stts_data));
+        if (!stts_data) {
+            av_freep(&sc->stts_data);
+            sc->stts_count = 0;
+            return AVERROR(ENOMEM);
+        }
+        sc->stts_count = min_entries;
+        sc->stts_data = stts_data;
 
         sample_count=avio_rb32(pb);
         sample_duration = avio_rb32(pb);
@@ -2682,21 +2924,28 @@
             && total_sample_count > 100
             && sample_duration/10 > duration / total_sample_count)
             sample_duration = duration / total_sample_count;
-        duration+=(int64_t)sample_duration*sample_count;
+        duration+=(int64_t)sample_duration*(uint64_t)sample_count;
         total_sample_count+=sample_count;
     }
 
     sc->stts_count = i;
 
-    sc->duration_for_fps  += duration;
-    sc->nb_frames_for_fps += total_sample_count;
+    if (duration > 0 &&
+        duration <= INT64_MAX - sc->duration_for_fps &&
+        total_sample_count <= INT64_MAX - sc->nb_frames_for_fps
+    ) {
+        sc->duration_for_fps  += duration;
+        sc->nb_frames_for_fps += total_sample_count;
+    }
 
-    if (pb->eof_reached)
+    if (pb->eof_reached) {
+        av_log(c->fc, AV_LOG_WARNING, "reached eof, corrupted STTS atom\n");
         return AVERROR_EOF;
+    }
 
     st->nb_frames= total_sample_count;
     if (duration)
-        st->duration= duration;
+        st->duration= FFMIN(st->duration, duration);
     sc->track_end = duration;
     return 0;
 }
@@ -2768,8 +3017,10 @@
 
     sc->ctts_count = ctts_count;
 
-    if (pb->eof_reached)
+    if (pb->eof_reached) {
+        av_log(c->fc, AV_LOG_WARNING, "reached eof, corrupted CTTS atom\n");
         return AVERROR_EOF;
+    }
 
     av_log(c->fc, AV_LOG_TRACE, "dts shift %d\n", sc->dts_shift);
 
@@ -2815,7 +3066,12 @@
 
     sc->rap_group_count = i;
 
-    return pb->eof_reached ? AVERROR_EOF : 0;
+    if (pb->eof_reached) {
+        av_log(c->fc, AV_LOG_WARNING, "reached eof, corrupted SBGP atom\n");
+        return AVERROR_EOF;
+    }
+
+    return 0;
 }
 
 /**
@@ -2845,34 +3101,99 @@
 }
 
 /**
- * Find the closest previous frame to the timestamp, in e_old index
+ * Find the closest previous frame to the timestamp_pts, in e_old index
  * entries. Searching for just any frame / just key frames can be controlled by
  * last argument 'flag'.
- * Returns the index of the entry in st->index_entries if successful,
- * else returns -1.
+ * Note that if ctts_data is not NULL, we will always search for a key frame
+ * irrespective of the value of 'flag'. If we don't find any keyframe, we will
+ * return the first frame of the video.
+ *
+ * Here the timestamp_pts is considered to be a presentation timestamp and
+ * the timestamp of index entries are considered to be decoding timestamps.
+ *
+ * Returns 0 if successful in finding a frame, else returns -1.
+ * Places the found index corresponding output arg.
+ *
+ * If ctts_old is not NULL, then refines the searched entry by searching
+ * backwards from the found timestamp, to find the frame with correct PTS.
+ *
+ * Places the found ctts_index and ctts_sample in corresponding output args.
  */
-static int64_t find_prev_closest_index(AVStream *st,
-                                       AVIndexEntry *e_old,
-                                       int nb_old,
-                                       int64_t timestamp,
-                                       int flag)
+static int find_prev_closest_index(AVStream *st,
+                                   AVIndexEntry *e_old,
+                                   int nb_old,
+                                   MOVStts* ctts_data,
+                                   int64_t ctts_count,
+                                   int64_t timestamp_pts,
+                                   int flag,
+                                   int64_t* index,
+                                   int64_t* ctts_index,
+                                   int64_t* ctts_sample)
 {
+    MOVStreamContext *msc = st->priv_data;
     AVIndexEntry *e_keep = st->index_entries;
     int nb_keep = st->nb_index_entries;
-    int64_t found = -1;
     int64_t i = 0;
+    int64_t index_ctts_count;
+
+    av_assert0(index);
+
+    // If dts_shift > 0, then all the index timestamps will have to be offset by
+    // at least dts_shift amount to obtain PTS.
+    // Hence we decrement the searched timestamp_pts by dts_shift to find the closest index element.
+    if (msc->dts_shift > 0) {
+        timestamp_pts -= msc->dts_shift;
+    }
 
     st->index_entries = e_old;
     st->nb_index_entries = nb_old;
-    found = av_index_search_timestamp(st, timestamp, flag | AVSEEK_FLAG_BACKWARD);
+    *index = av_index_search_timestamp(st, timestamp_pts, flag | AVSEEK_FLAG_BACKWARD);
 
     // Keep going backwards in the index entries until the timestamp is the same.
-    if (found >= 0) {
-        for (i = found; i > 0 && e_old[i].timestamp == e_old[i - 1].timestamp;
+    if (*index >= 0) {
+        for (i = *index; i > 0 && e_old[i].timestamp == e_old[i - 1].timestamp;
              i--) {
             if ((flag & AVSEEK_FLAG_ANY) ||
                 (e_old[i - 1].flags & AVINDEX_KEYFRAME)) {
-                found = i - 1;
+                *index = i - 1;
+            }
+        }
+    }
+
+    // If we have CTTS then refine the search, by searching backwards over PTS
+    // computed by adding corresponding CTTS durations to index timestamps.
+    if (ctts_data && *index >= 0) {
+        av_assert0(ctts_index);
+        av_assert0(ctts_sample);
+        // Find out the ctts_index for the found frame.
+        *ctts_index = 0;
+        *ctts_sample = 0;
+        for (index_ctts_count = 0; index_ctts_count < *index; index_ctts_count++) {
+            if (*ctts_index < ctts_count) {
+                (*ctts_sample)++;
+                if (ctts_data[*ctts_index].count == *ctts_sample) {
+                    (*ctts_index)++;
+                    *ctts_sample = 0;
+                }
+            }
+        }
+
+        while (*index >= 0 && (*ctts_index) >= 0 && (*ctts_index) < ctts_count) {
+            // Find a "key frame" with PTS <= timestamp_pts (So that we can decode B-frames correctly).
+            // No need to add dts_shift to the timestamp here becase timestamp_pts has already been
+            // compensated by dts_shift above.
+            if ((e_old[*index].timestamp + ctts_data[*ctts_index].duration) <= timestamp_pts &&
+                (e_old[*index].flags & AVINDEX_KEYFRAME)) {
+                break;
+            }
+
+            (*index)--;
+            if (*ctts_sample == 0) {
+                (*ctts_index)--;
+                if (*ctts_index >= 0)
+                  *ctts_sample = ctts_data[*ctts_index].count - 1;
+            } else {
+                (*ctts_sample)--;
             }
         }
     }
@@ -2880,7 +3201,7 @@
     /* restore AVStream state*/
     st->index_entries = e_keep;
     st->nb_index_entries = nb_keep;
-    return found;
+    return *index >= 0 ? 0 : -1;
 }
 
 /**
@@ -2959,7 +3280,7 @@
         FFMAX(min_size_needed, 2 * (*allocated_size)) :
         min_size_needed;
 
-    if((unsigned)(*ctts_count) + 1 >= UINT_MAX / sizeof(MOVStts))
+    if((unsigned)(*ctts_count) >= UINT_MAX / sizeof(MOVStts) - 1)
         return -1;
 
     ctts_buf_new = av_fast_realloc(*ctts_data, allocated_size, requested_size);
@@ -2976,6 +3297,62 @@
     return *ctts_count;
 }
 
+#define MAX_REORDER_DELAY 16
+static void mov_estimate_video_delay(MOVContext *c, AVStream* st) {
+    MOVStreamContext *msc = st->priv_data;
+    int ind;
+    int ctts_ind = 0;
+    int ctts_sample = 0;
+    int64_t pts_buf[MAX_REORDER_DELAY + 1]; // Circular buffer to sort pts.
+    int buf_start = 0;
+    int j, r, num_swaps;
+
+    for (j = 0; j < MAX_REORDER_DELAY + 1; j++)
+        pts_buf[j] = INT64_MIN;
+
+    if (st->codecpar->video_delay <= 0 && msc->ctts_data &&
+        st->codecpar->codec_id == AV_CODEC_ID_H264) {
+        st->codecpar->video_delay = 0;
+        for(ind = 0; ind < st->nb_index_entries && ctts_ind < msc->ctts_count; ++ind) {
+            // Point j to the last elem of the buffer and insert the current pts there.
+            j = buf_start;
+            buf_start = (buf_start + 1);
+            if (buf_start == MAX_REORDER_DELAY + 1)
+                buf_start = 0;
+
+            pts_buf[j] = st->index_entries[ind].timestamp + msc->ctts_data[ctts_ind].duration;
+
+            // The timestamps that are already in the sorted buffer, and are greater than the
+            // current pts, are exactly the timestamps that need to be buffered to output PTS
+            // in correct sorted order.
+            // Hence the video delay (which is the buffer size used to sort DTS and output PTS),
+            // can be computed as the maximum no. of swaps any particular timestamp needs to
+            // go through, to keep this buffer in sorted order.
+            num_swaps = 0;
+            while (j != buf_start) {
+                r = j - 1;
+                if (r < 0) r = MAX_REORDER_DELAY;
+                if (pts_buf[j] < pts_buf[r]) {
+                    FFSWAP(int64_t, pts_buf[j], pts_buf[r]);
+                    ++num_swaps;
+                } else {
+                    break;
+                }
+                j = r;
+            }
+            st->codecpar->video_delay = FFMAX(st->codecpar->video_delay, num_swaps);
+
+            ctts_sample++;
+            if (ctts_sample == msc->ctts_data[ctts_ind].count) {
+                ctts_ind++;
+                ctts_sample = 0;
+            }
+        }
+        av_log(c->fc, AV_LOG_DEBUG, "Setting codecpar->delay to %d for stream st: %d\n",
+               st->codecpar->video_delay, st->index);
+    }
+}
+
 static void mov_current_sample_inc(MOVStreamContext *sc)
 {
     sc->current_sample++;
@@ -3047,14 +3424,11 @@
     int64_t edit_list_start_ctts_sample = 0;
     int64_t curr_cts;
     int64_t curr_ctts = 0;
-    int64_t min_corrected_pts = -1;
     int64_t empty_edits_sum_duration = 0;
     int64_t edit_list_index = 0;
     int64_t index;
-    int64_t index_ctts_count;
     int flags;
     int64_t start_dts = 0;
-    int64_t edit_list_media_time_dts = 0;
     int64_t edit_list_start_encountered = 0;
     int64_t search_timestamp = 0;
     int64_t* frame_duration_buffer = NULL;
@@ -3063,6 +3437,7 @@
     int packet_skip_samples = 0;
     MOVIndexRange *current_index_range;
     int i;
+    int found_keyframe_after_edit = 0;
 
     if (!msc->elst_data || msc->elst_count <= 0 || nb_old <= 0) {
         return;
@@ -3089,6 +3464,9 @@
     msc->ctts_sample = 0;
     msc->ctts_allocated_size = 0;
 
+    // Reinitialize min_corrected_pts so that it can be computed again.
+    msc->min_corrected_pts = -1;
+
     // If the dts_shift is positive (in case of negative ctts values in mov),
     // then negate the DTS by dts_shift
     if (msc->dts_shift > 0) {
@@ -3124,17 +3502,11 @@
                 st->skip_samples = msc->start_pad = 0;
         }
 
-        //find closest previous key frame
-        edit_list_media_time_dts = edit_list_media_time;
-        if (msc->dts_shift > 0) {
-            edit_list_media_time_dts -= msc->dts_shift;
-        }
-
         // While reordering frame index according to edit list we must handle properly
         // the scenario when edit list entry starts from none key frame.
         // We find closest previous key frame and preserve it and consequent frames in index.
         // All frames which are outside edit list entry time boundaries will be dropped after decoding.
-        search_timestamp = edit_list_media_time_dts;
+        search_timestamp = edit_list_media_time;
         if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
             // Audio decoders like AAC need need a decoder delay samples previous to the current sample,
             // to correctly decode this frame. Hence for audio we seek to a frame 1 sec. before the
@@ -3142,42 +3514,27 @@
             search_timestamp = FFMAX(search_timestamp - msc->time_scale, e_old[0].timestamp);
         }
 
-        index = find_prev_closest_index(st, e_old, nb_old, search_timestamp, 0);
-        if (index == -1) {
+        if (find_prev_closest_index(st, e_old, nb_old, ctts_data_old, ctts_count_old, search_timestamp, 0,
+                                    &index, &ctts_index_old, &ctts_sample_old) < 0) {
             av_log(mov->fc, AV_LOG_WARNING,
                    "st: %d edit list: %"PRId64" Missing key frame while searching for timestamp: %"PRId64"\n",
                    st->index, edit_list_index, search_timestamp);
-            index = find_prev_closest_index(st, e_old, nb_old, search_timestamp, AVSEEK_FLAG_ANY);
-
-            if (index == -1) {
+            if (find_prev_closest_index(st, e_old, nb_old, ctts_data_old, ctts_count_old, search_timestamp, AVSEEK_FLAG_ANY,
+                                        &index, &ctts_index_old, &ctts_sample_old) < 0) {
                 av_log(mov->fc, AV_LOG_WARNING,
-                       "st: %d edit list %"PRId64" Cannot find an index entry before timestamp: %"PRId64".\n"
-                       "Rounding edit list media time to zero.\n",
+                       "st: %d edit list %"PRId64" Cannot find an index entry before timestamp: %"PRId64".\n",
                        st->index, edit_list_index, search_timestamp);
                 index = 0;
-                edit_list_media_time = 0;
+                ctts_index_old = 0;
+                ctts_sample_old = 0;
             }
         }
         current = e_old + index;
-
-        ctts_index_old = 0;
-        ctts_sample_old = 0;
-
-        // set ctts_index properly for the found key frame
-        for (index_ctts_count = 0; index_ctts_count < index; index_ctts_count++) {
-            if (ctts_data_old && ctts_index_old < ctts_count_old) {
-                ctts_sample_old++;
-                if (ctts_data_old[ctts_index_old].count == ctts_sample_old) {
-                    ctts_index_old++;
-                    ctts_sample_old = 0;
-                }
-            }
-        }
-
         edit_list_start_ctts_sample = ctts_sample_old;
 
         // Iterate over index and arrange it according to edit list
         edit_list_start_encountered = 0;
+        found_keyframe_after_edit = 0;
         for (; current < e_old_end; current++, index++) {
             // check  if frame outside edit list mark it for discard
             frame_duration = (current + 1 <  e_old_end) ?
@@ -3237,7 +3594,7 @@
                     flags |= AVINDEX_DISCARD_FRAME;
                     av_log(mov->fc, AV_LOG_DEBUG, "drop a frame at curr_cts: %"PRId64" @ %"PRId64"\n", curr_cts, index);
 
-                    if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && edit_list_start_encountered == 0) {
+                    if (edit_list_start_encountered == 0) {
                         num_discarded_begin++;
                         frame_duration_buffer = av_realloc(frame_duration_buffer,
                                                            num_discarded_begin * sizeof(int64_t));
@@ -3248,23 +3605,23 @@
                         frame_duration_buffer[num_discarded_begin - 1] = frame_duration;
 
                         // Increment skip_samples for the first non-zero audio edit list
-                        if (first_non_zero_audio_edit > 0 && st->codecpar->codec_id != AV_CODEC_ID_VORBIS) {
+                        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
+                            first_non_zero_audio_edit > 0 && st->codecpar->codec_id != AV_CODEC_ID_VORBIS) {
                             st->skip_samples += frame_duration;
-                            msc->start_pad = st->skip_samples;
                         }
                     }
                 }
             } else {
-                if (min_corrected_pts < 0) {
-                    min_corrected_pts = edit_list_dts_counter + curr_ctts + msc->dts_shift;
+                if (msc->min_corrected_pts < 0) {
+                    msc->min_corrected_pts = edit_list_dts_counter + curr_ctts + msc->dts_shift;
                 } else {
-                    min_corrected_pts = FFMIN(min_corrected_pts, edit_list_dts_counter + curr_ctts + msc->dts_shift);
+                    msc->min_corrected_pts = FFMIN(msc->min_corrected_pts, edit_list_dts_counter + curr_ctts + msc->dts_shift);
                 }
                 if (edit_list_start_encountered == 0) {
                     edit_list_start_encountered = 1;
-                    // Make timestamps strictly monotonically increasing for audio, by rewriting timestamps for
+                    // Make timestamps strictly monotonically increasing by rewriting timestamps for
                     // discarded packets.
-                    if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && frame_duration_buffer) {
+                    if (frame_duration_buffer) {
                         fix_index_entry_timestamps(st, st->nb_index_entries, edit_list_dts_counter,
                                                    frame_duration_buffer, num_discarded_begin);
                         av_freep(&frame_duration_buffer);
@@ -3291,39 +3648,52 @@
             }
 
             // Break when found first key frame after edit entry completion
-            if (((curr_cts + frame_duration) >= (edit_list_duration + edit_list_media_time)) &&
+            if ((curr_cts + frame_duration >= (edit_list_duration + edit_list_media_time)) &&
                 ((flags & AVINDEX_KEYFRAME) || ((st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)))) {
-
-                if (ctts_data_old && ctts_sample_old != 0) {
-                    if (add_ctts_entry(&msc->ctts_data, &msc->ctts_count,
-                                       &msc->ctts_allocated_size,
-                                       ctts_sample_old - edit_list_start_ctts_sample,
-                                       ctts_data_old[ctts_index_old].duration) == -1) {
-                        av_log(mov->fc, AV_LOG_ERROR, "Cannot add CTTS entry %"PRId64" - {%"PRId64", %d}\n",
-                               ctts_index_old, ctts_sample_old - edit_list_start_ctts_sample,
-                               ctts_data_old[ctts_index_old].duration);
-                        break;
+                if (ctts_data_old) {
+                    // If we have CTTS and this is the first keyframe after edit elist,
+                    // wait for one more, because there might be trailing B-frames after this I-frame
+                    // that do belong to the edit.
+                    if (st->codecpar->codec_type != AVMEDIA_TYPE_AUDIO && found_keyframe_after_edit == 0) {
+                        found_keyframe_after_edit = 1;
+                        continue;
+                    }
+                    if (ctts_sample_old != 0) {
+                        if (add_ctts_entry(&msc->ctts_data, &msc->ctts_count,
+                                           &msc->ctts_allocated_size,
+                                           ctts_sample_old - edit_list_start_ctts_sample,
+                                           ctts_data_old[ctts_index_old].duration) == -1) {
+                            av_log(mov->fc, AV_LOG_ERROR, "Cannot add CTTS entry %"PRId64" - {%"PRId64", %d}\n",
+                                   ctts_index_old, ctts_sample_old - edit_list_start_ctts_sample,
+                                   ctts_data_old[ctts_index_old].duration);
+                            break;
+                        }
                     }
                 }
                 break;
             }
         }
     }
-    // If there are empty edits, then min_corrected_pts might be positive intentionally. So we subtract the
-    // sum duration of emtpy edits here.
-    min_corrected_pts -= empty_edits_sum_duration;
+    // If there are empty edits, then msc->min_corrected_pts might be positive
+    // intentionally. So we subtract the sum duration of emtpy edits here.
+    msc->min_corrected_pts -= empty_edits_sum_duration;
 
     // If the minimum pts turns out to be greater than zero after fixing the index, then we subtract the
     // dts by that amount to make the first pts zero.
-    if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && min_corrected_pts > 0) {
-        av_log(mov->fc, AV_LOG_DEBUG, "Offset DTS by %"PRId64" to make first pts zero.\n", min_corrected_pts);
-        for (i = 0; i < st->nb_index_entries; ++i) {
-            st->index_entries[i].timestamp -= min_corrected_pts;
+    if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
+        if (msc->min_corrected_pts > 0) {
+            av_log(mov->fc, AV_LOG_DEBUG, "Offset DTS by %"PRId64" to make first pts zero.\n", msc->min_corrected_pts);
+            for (i = 0; i < st->nb_index_entries; ++i) {
+                st->index_entries[i].timestamp -= msc->min_corrected_pts;
+            }
         }
     }
+    // Start time should be equal to zero or the duration of any empty edits.
+    st->start_time = empty_edits_sum_duration;
 
-    // Update av stream length
-    st->duration = edit_list_dts_entry_end - start_dts;
+    // Update av stream length, if it ends up shorter than the track's media duration
+    st->duration = FFMIN(st->duration, edit_list_dts_entry_end - start_dts);
+    msc->start_pad = st->skip_samples;
 
     // Free the old index and the old CTTS structures
     av_free(e_old);
@@ -3380,6 +3750,7 @@
             if (empty_duration)
                 empty_duration = av_rescale(empty_duration, sc->time_scale, mov->time_scale);
             sc->time_offset = start_time - empty_duration;
+            sc->min_corrected_pts = start_time;
             if (!mov->advanced_editlist)
                 current_dts = -sc->time_offset;
         }
@@ -3430,6 +3801,9 @@
                 av_free(ctts_data_old);
                 return;
             }
+
+            memset((uint8_t*)(sc->ctts_data), 0, sc->ctts_allocated_size);
+
             for (i = 0; i < ctts_count_old &&
                         sc->ctts_count < sc->sample_count; i++)
                 for (j = 0; j < ctts_data_old[i].count &&
@@ -3546,6 +3920,9 @@
     } else {
         unsigned chunk_samples, total = 0;
 
+        if (!sc->chunk_count)
+            return;
+
         // compute total chunk count
         for (i = 0; i < sc->stsc_count; i++) {
             unsigned count, chunk_count;
@@ -3646,6 +4023,16 @@
         // Fix index according to edit lists.
         mov_fix_index(mov, st);
     }
+
+    // Update start time of the stream.
+    if (st->start_time == AV_NOPTS_VALUE && st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && st->nb_index_entries > 0) {
+        st->start_time = st->index_entries[0].timestamp + sc->dts_shift;
+        if (sc->ctts_data) {
+            st->start_time += sc->ctts_data[0].duration;
+        }
+    }
+
+    mov_estimate_video_delay(mov, st);
 }
 
 static int test_same_origin(const char *src, const char *ref) {
@@ -3789,6 +4176,11 @@
                st->index);
         return 0;
     }
+    if (sc->chunk_count && sc->stsc_count && sc->stsc_data[ sc->stsc_count - 1 ].first > sc->chunk_count) {
+        av_log(c->fc, AV_LOG_ERROR, "stream %d, contradictionary STSC and STCO\n",
+               st->index);
+        return AVERROR_INVALIDDATA;
+    }
 
     fix_timescale(c, sc);
 
@@ -3799,7 +4191,7 @@
     if (sc->dref_id-1 < sc->drefs_count && sc->drefs[sc->dref_id-1].path) {
         MOVDref *dref = &sc->drefs[sc->dref_id - 1];
         if (c->enable_drefs) {
-            if (mov_open_dref(c, &sc->pb, c->fc->filename, dref) < 0)
+            if (mov_open_dref(c, &sc->pb, c->fc->url, dref) < 0)
                 av_log(c->fc, AV_LOG_ERROR,
                        "stream %d, error opening alias: path='%s', dir='%s', "
                        "filename='%s', volume='%s', nlvl_from=%d, nlvl_to=%d\n",
@@ -3969,8 +4361,10 @@
             break;
 
         *p = av_malloc(len + 1);
-        if (!*p)
+        if (!*p) {
+            ret = AVERROR(ENOMEM);
             break;
+        }
         ret = ffio_read_size(pb, *p, len);
         if (ret < 0) {
             av_freep(p);
@@ -4144,8 +4538,7 @@
 {
     MOVFragment *frag = &c->fragment;
     MOVTrackExt *trex = NULL;
-    MOVFragmentIndex* index = NULL;
-    int flags, track_id, i, found = 0;
+    int flags, track_id, i;
 
     avio_r8(pb); /* version */
     flags = avio_rb24(pb);
@@ -4154,6 +4547,7 @@
     if (!track_id)
         return AVERROR_INVALIDDATA;
     frag->track_id = track_id;
+    set_frag_stream(&c->frag_index, track_id);
     for (i = 0; i < c->trex_count; i++)
         if (c->trex_data[i].track_id == frag->track_id) {
             trex = &c->trex_data[i];
@@ -4175,35 +4569,8 @@
                      avio_rb32(pb) : trex->size;
     frag->flags    = flags & MOV_TFHD_DEFAULT_FLAGS ?
                      avio_rb32(pb) : trex->flags;
-    frag->time     = AV_NOPTS_VALUE;
-    for (i = 0; i < c->fragment_index_count; i++) {
-        int j;
-        MOVFragmentIndex* candidate = c->fragment_index_data[i];
-        if (candidate->track_id == frag->track_id) {
-            av_log(c->fc, AV_LOG_DEBUG,
-                   "found fragment index for track %u\n", frag->track_id);
-            index = candidate;
-            for (j = index->current_item; j < index->item_count; j++) {
-                if (frag->implicit_offset == index->items[j].moof_offset) {
-                    av_log(c->fc, AV_LOG_DEBUG, "found fragment index entry "
-                            "for track %u and moof_offset %"PRId64"\n",
-                            frag->track_id, index->items[j].moof_offset);
-                    frag->time = index->items[j].time;
-                    index->current_item = j + 1;
-                    found = 1;
-                    break;
-                }
-            }
-            if (found)
-                break;
-        }
-    }
-    if (index && !found) {
-        av_log(c->fc, AV_LOG_DEBUG, "track %u has a fragment index but "
-               "it doesn't have an (in-order) entry for moof_offset "
-               "%"PRId64"\n", frag->track_id, frag->implicit_offset);
-    }
     av_log(c->fc, AV_LOG_TRACE, "frag flags 0x%x\n", frag->flags);
+
     return 0;
 }
 
@@ -4258,6 +4625,8 @@
     AVStream *st = NULL;
     MOVStreamContext *sc;
     int version, i;
+    MOVFragmentStreamInfo * frag_stream_info;
+    int64_t base_media_decode_time;
 
     for (i = 0; i < c->fc->nb_streams; i++) {
         if (c->fc->streams[i]->id == frag->track_id) {
@@ -4270,15 +4639,21 @@
         return AVERROR_INVALIDDATA;
     }
     sc = st->priv_data;
-    if (sc->pseudo_stream_id + 1 != frag->stsd_id)
+    if (sc->pseudo_stream_id + 1 != frag->stsd_id && sc->pseudo_stream_id != -1)
         return 0;
     version = avio_r8(pb);
     avio_rb24(pb); /* flags */
     if (version) {
-        sc->track_end = avio_rb64(pb);
+        base_media_decode_time = avio_rb64(pb);
     } else {
-        sc->track_end = avio_rb32(pb);
+        base_media_decode_time = avio_rb32(pb);
     }
+
+    frag_stream_info = get_current_frag_stream_info(&c->frag_index);
+    if (frag_stream_info)
+        frag_stream_info->tfdt_dts = base_media_decode_time;
+    sc->track_end = base_media_decode_time;
+
     return 0;
 }
 
@@ -4289,10 +4664,16 @@
     MOVStreamContext *sc;
     MOVStts *ctts_data;
     uint64_t offset;
-    int64_t dts;
+    int64_t dts, pts = AV_NOPTS_VALUE;
     int data_offset = 0;
     unsigned entries, first_sample_flags = frag->flags;
     int flags, distance, i;
+    int64_t prev_dts = AV_NOPTS_VALUE;
+    int next_frag_index = -1, index_entry_pos;
+    size_t requested_size;
+    size_t old_ctts_allocated_size;
+    AVIndexEntry *new_entries;
+    MOVFragmentStreamInfo * frag_stream_info;
 
     for (i = 0; i < c->fc->nb_streams; i++) {
         if (c->fc->streams[i]->id == frag->track_id) {
@@ -4307,6 +4688,23 @@
     sc = st->priv_data;
     if (sc->pseudo_stream_id+1 != frag->stsd_id && sc->pseudo_stream_id != -1)
         return 0;
+
+    // Find the next frag_index index that has a valid index_entry for
+    // the current track_id.
+    //
+    // A valid index_entry means the trun for the fragment was read
+    // and it's samples are in index_entries at the given position.
+    // New index entries will be inserted before the index_entry found.
+    index_entry_pos = st->nb_index_entries;
+    for (i = c->frag_index.current + 1; i < c->frag_index.nb_items; i++) {
+        frag_stream_info = get_frag_stream_info(&c->frag_index, i, frag->track_id);
+        if (frag_stream_info && frag_stream_info->index_entry >= 0) {
+            next_frag_index = i;
+            index_entry_pos = frag_stream_info->index_entry;
+            break;
+        }
+    }
+
     avio_r8(pb); /* version */
     flags = avio_rb24(pb);
     entries = avio_rb32(pb);
@@ -4316,18 +4714,100 @@
         return AVERROR_INVALIDDATA;
     if (flags & MOV_TRUN_DATA_OFFSET)        data_offset        = avio_rb32(pb);
     if (flags & MOV_TRUN_FIRST_SAMPLE_FLAGS) first_sample_flags = avio_rb32(pb);
-    dts    = sc->track_end - sc->time_offset;
-    offset = frag->base_data_offset + data_offset;
+
+    frag_stream_info = get_current_frag_stream_info(&c->frag_index);
+    if (frag_stream_info)
+    {
+        if (frag_stream_info->first_tfra_pts != AV_NOPTS_VALUE &&
+            c->use_mfra_for == FF_MOV_FLAG_MFRA_PTS) {
+            pts = frag_stream_info->first_tfra_pts;
+            av_log(c->fc, AV_LOG_DEBUG, "found mfra time %"PRId64
+                    ", using it for pts\n", pts);
+        } else if (frag_stream_info->sidx_pts != AV_NOPTS_VALUE) {
+            // FIXME: sidx earliest_presentation_time is *PTS*, s.b.
+            // pts = frag_stream_info->sidx_pts;
+            dts = frag_stream_info->sidx_pts - sc->time_offset;
+            av_log(c->fc, AV_LOG_DEBUG, "found sidx time %"PRId64
+                    ", using it for pts\n", pts);
+        } else if (frag_stream_info->tfdt_dts != AV_NOPTS_VALUE) {
+            dts = frag_stream_info->tfdt_dts - sc->time_offset;
+            av_log(c->fc, AV_LOG_DEBUG, "found tfdt time %"PRId64
+                    ", using it for dts\n", dts);
+        } else {
+            dts = sc->track_end - sc->time_offset;
+            av_log(c->fc, AV_LOG_DEBUG, "found track end time %"PRId64
+                    ", using it for dts\n", dts);
+        }
+    } else {
+        dts = sc->track_end - sc->time_offset;
+        av_log(c->fc, AV_LOG_DEBUG, "found track end time %"PRId64
+                ", using it for dts\n", dts);
+    }
+    offset   = frag->base_data_offset + data_offset;
     distance = 0;
     av_log(c->fc, AV_LOG_TRACE, "first sample flags 0x%x\n", first_sample_flags);
+
+    // realloc space for new index entries
+    if((unsigned)st->nb_index_entries + entries >= UINT_MAX / sizeof(AVIndexEntry)) {
+        entries = UINT_MAX / sizeof(AVIndexEntry) - st->nb_index_entries;
+        av_log(c->fc, AV_LOG_ERROR, "Failed to add index entry\n");
+    }
+    if (entries <= 0)
+        return -1;
+
+    requested_size = (st->nb_index_entries + entries) * sizeof(AVIndexEntry);
+    new_entries = av_fast_realloc(st->index_entries,
+                                  &st->index_entries_allocated_size,
+                                  requested_size);
+    if(!new_entries)
+        return AVERROR(ENOMEM);
+    st->index_entries= new_entries;
+
+    requested_size = (st->nb_index_entries + entries) * sizeof(*sc->ctts_data);
+    old_ctts_allocated_size = sc->ctts_allocated_size;
+    ctts_data = av_fast_realloc(sc->ctts_data, &sc->ctts_allocated_size,
+                                requested_size);
+    if (!ctts_data)
+        return AVERROR(ENOMEM);
+    sc->ctts_data = ctts_data;
+
+    // In case there were samples without ctts entries, ensure they get
+    // zero valued entries. This ensures clips which mix boxes with and
+    // without ctts entries don't pickup uninitialized data.
+    memset((uint8_t*)(sc->ctts_data) + old_ctts_allocated_size, 0,
+           sc->ctts_allocated_size - old_ctts_allocated_size);
+
+    if (index_entry_pos < st->nb_index_entries) {
+        // Make hole in index_entries and ctts_data for new samples
+        memmove(st->index_entries + index_entry_pos + entries,
+                st->index_entries + index_entry_pos,
+                sizeof(*st->index_entries) *
+                (st->nb_index_entries - index_entry_pos));
+        memmove(sc->ctts_data + index_entry_pos + entries,
+                sc->ctts_data + index_entry_pos,
+                sizeof(*sc->ctts_data) * (sc->ctts_count - index_entry_pos));
+        if (index_entry_pos < sc->current_sample) {
+            sc->current_sample += entries;
+        }
+    }
+
+    st->nb_index_entries += entries;
+    sc->ctts_count = st->nb_index_entries;
+
+    // Record the index_entry position in frag_index of this fragment
+    if (frag_stream_info)
+        frag_stream_info->index_entry = index_entry_pos;
+
+    if (index_entry_pos > 0)
+        prev_dts = st->index_entries[index_entry_pos-1].timestamp;
+
     for (i = 0; i < entries && !pb->eof_reached; i++) {
         unsigned sample_size = frag->size;
         int sample_flags = i ? frag->flags : first_sample_flags;
         unsigned sample_duration = frag->duration;
         unsigned ctts_duration = 0;
         int keyframe = 0;
-        int ctts_index = 0;
-        int old_nb_index_entries = st->nb_index_entries;
+        int index_entry_flags = 0;
 
         if (flags & MOV_TRUN_SAMPLE_DURATION) sample_duration = avio_rb32(pb);
         if (flags & MOV_TRUN_SAMPLE_SIZE)     sample_size     = avio_rb32(pb);
@@ -4335,27 +4815,22 @@
         if (flags & MOV_TRUN_SAMPLE_CTS)      ctts_duration   = avio_rb32(pb);
 
         mov_update_dts_shift(sc, ctts_duration);
-        if (frag->time != AV_NOPTS_VALUE) {
-            if (c->use_mfra_for == FF_MOV_FLAG_MFRA_PTS) {
-                int64_t pts = frag->time;
-                av_log(c->fc, AV_LOG_DEBUG, "found frag time %"PRId64
-                        " sc->dts_shift %d ctts.duration %d"
-                        " sc->time_offset %"PRId64" flags & MOV_TRUN_SAMPLE_CTS %d\n", pts,
-                        sc->dts_shift, ctts_duration,
-                        sc->time_offset, flags & MOV_TRUN_SAMPLE_CTS);
-                dts = pts - sc->dts_shift;
-                if (flags & MOV_TRUN_SAMPLE_CTS) {
-                    dts -= ctts_duration;
-                } else {
-                    dts -= sc->time_offset;
-                }
-                av_log(c->fc, AV_LOG_DEBUG, "calculated into dts %"PRId64"\n", dts);
+        if (pts != AV_NOPTS_VALUE) {
+            dts = pts - sc->dts_shift;
+            if (flags & MOV_TRUN_SAMPLE_CTS) {
+                dts -= ctts_duration;
             } else {
-                dts = frag->time - sc->time_offset;
-                av_log(c->fc, AV_LOG_DEBUG, "found frag time %"PRId64
-                        ", using it for dts\n", dts);
+                dts -= sc->time_offset;
             }
-            frag->time = AV_NOPTS_VALUE;
+            av_log(c->fc, AV_LOG_DEBUG,
+                   "pts %"PRId64" calculated dts %"PRId64
+                   " sc->dts_shift %d ctts.duration %d"
+                   " sc->time_offset %"PRId64
+                   " flags & MOV_TRUN_SAMPLE_CTS %d\n",
+                   pts, dts,
+                   sc->dts_shift, ctts_duration,
+                   sc->time_offset, flags & MOV_TRUN_SAMPLE_CTS);
+            pts = AV_NOPTS_VALUE;
         }
 
         if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
@@ -4364,57 +4839,83 @@
             keyframe =
                 !(sample_flags & (MOV_FRAG_SAMPLE_FLAG_IS_NON_SYNC |
                                   MOV_FRAG_SAMPLE_FLAG_DEPENDS_YES));
-        if (keyframe)
+        if (keyframe) {
             distance = 0;
-        ctts_index = av_add_index_entry(st, offset, dts, sample_size, distance,
-                                        keyframe ? AVINDEX_KEYFRAME : 0);
-        if (ctts_index >= 0 && old_nb_index_entries < st->nb_index_entries) {
-            unsigned int size_needed = st->nb_index_entries * sizeof(*sc->ctts_data);
-            unsigned int request_size = size_needed > sc->ctts_allocated_size ?
-                FFMAX(size_needed, 2 * sc->ctts_allocated_size) : size_needed;
-            unsigned int old_ctts_size = sc->ctts_allocated_size;
-            ctts_data = av_fast_realloc(sc->ctts_data, &sc->ctts_allocated_size, request_size);
-            if (!ctts_data) {
-                av_freep(&sc->ctts_data);
-                return AVERROR(ENOMEM);
-            }
-            sc->ctts_data = ctts_data;
-
-            // In case there were samples without ctts entries, ensure they get
-            // zero valued entries. This ensures clips which mix boxes with and
-            // without ctts entries don't pickup uninitialized data.
-            memset((uint8_t*)(sc->ctts_data) + old_ctts_size, 0, sc->ctts_allocated_size - old_ctts_size);
-
-            if (ctts_index != old_nb_index_entries) {
-                memmove(sc->ctts_data + ctts_index + 1, sc->ctts_data + ctts_index,
-                        sizeof(*sc->ctts_data) * (sc->ctts_count - ctts_index));
-                if (ctts_index <= sc->current_sample) {
-                    // if we inserted a new item before the current sample, move the
-                    // counter ahead so it is still pointing to the same sample.
-                    sc->current_sample++;
-                }
-            }
-
-            sc->ctts_data[ctts_index].count = 1;
-            sc->ctts_data[ctts_index].duration = ctts_duration;
-            sc->ctts_count++;
-        } else {
-            av_log(c->fc, AV_LOG_ERROR, "Failed to add index entry\n");
+            index_entry_flags |= AVINDEX_KEYFRAME;
         }
+        // Fragments can overlap in time.  Discard overlapping frames after
+        // decoding.
+        if (prev_dts >= dts)
+            index_entry_flags |= AVINDEX_DISCARD_FRAME;
+
+        st->index_entries[index_entry_pos].pos = offset;
+        st->index_entries[index_entry_pos].timestamp = dts;
+        st->index_entries[index_entry_pos].size= sample_size;
+        st->index_entries[index_entry_pos].min_distance= distance;
+        st->index_entries[index_entry_pos].flags = index_entry_flags;
+
+        sc->ctts_data[index_entry_pos].count = 1;
+        sc->ctts_data[index_entry_pos].duration = ctts_duration;
+        index_entry_pos++;
 
         av_log(c->fc, AV_LOG_TRACE, "AVIndex stream %d, sample %d, offset %"PRIx64", dts %"PRId64", "
-                "size %u, distance %d, keyframe %d\n", st->index, ctts_index,
-                offset, dts, sample_size, distance, keyframe);
+                "size %u, distance %d, keyframe %d\n", st->index,
+                index_entry_pos, offset, dts, sample_size, distance, keyframe);
         distance++;
         dts += sample_duration;
         offset += sample_size;
         sc->data_size += sample_size;
-        sc->duration_for_fps += sample_duration;
-        sc->nb_frames_for_fps ++;
+
+        if (sample_duration <= INT64_MAX - sc->duration_for_fps &&
+            1 <= INT64_MAX - sc->nb_frames_for_fps
+        ) {
+            sc->duration_for_fps += sample_duration;
+            sc->nb_frames_for_fps ++;
+        }
+    }
+    if (i < entries) {
+        // EOF found before reading all entries.  Fix the hole this would
+        // leave in index_entries and ctts_data
+        int gap = entries - i;
+        memmove(st->index_entries + index_entry_pos,
+                st->index_entries + index_entry_pos + gap,
+                sizeof(*st->index_entries) *
+                (st->nb_index_entries - (index_entry_pos + gap)));
+        memmove(sc->ctts_data + index_entry_pos,
+                sc->ctts_data + index_entry_pos + gap,
+                sizeof(*sc->ctts_data) *
+                (sc->ctts_count - (index_entry_pos + gap)));
+
+        st->nb_index_entries -= gap;
+        sc->ctts_count -= gap;
+        if (index_entry_pos < sc->current_sample) {
+            sc->current_sample -= gap;
+        }
+        entries = i;
     }
 
-    if (pb->eof_reached)
+    // The end of this new fragment may overlap in time with the start
+    // of the next fragment in index_entries. Mark the samples in the next
+    // fragment that overlap with AVINDEX_DISCARD_FRAME
+    prev_dts = AV_NOPTS_VALUE;
+    if (index_entry_pos > 0)
+        prev_dts = st->index_entries[index_entry_pos-1].timestamp;
+    for (i = index_entry_pos; i < st->nb_index_entries; i++) {
+        if (prev_dts < st->index_entries[i].timestamp)
+            break;
+        st->index_entries[i].flags |= AVINDEX_DISCARD_FRAME;
+    }
+
+    // If a hole was created to insert the new index_entries into,
+    // the index_entry recorded for all subsequent moof must
+    // be incremented by the number of entries inserted.
+    fix_frag_index_entries(&c->frag_index, next_frag_index,
+                           frag->track_id, entries);
+
+    if (pb->eof_reached) {
+        av_log(c->fc, AV_LOG_WARNING, "reached eof, corrupted TRUN atom\n");
         return AVERROR_EOF;
+    }
 
     frag->implicit_offset = offset;
 
@@ -4427,14 +4928,12 @@
 
 static int mov_read_sidx(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
-    int64_t offset = avio_tell(pb) + atom.size, pts;
+    int64_t offset = avio_tell(pb) + atom.size, pts, timestamp;
     uint8_t version;
-    unsigned i, track_id;
+    unsigned i, j, track_id, item_count;
     AVStream *st = NULL;
     AVStream *ref_st = NULL;
     MOVStreamContext *sc, *ref_sc = NULL;
-    MOVFragmentIndex *index = NULL;
-    MOVFragmentIndex **tmp;
     AVRational timescale;
 
     version = avio_r8(pb);
@@ -4476,57 +4975,46 @@
 
     avio_rb16(pb); // reserved
 
-    index = av_mallocz(sizeof(MOVFragmentIndex));
-    if (!index)
-        return AVERROR(ENOMEM);
+    item_count = avio_rb16(pb);
 
-    index->track_id = track_id;
-
-    index->item_count = avio_rb16(pb);
-    index->items = av_mallocz_array(index->item_count, sizeof(MOVFragmentIndexItem));
-
-    if (!index->items) {
-        av_freep(&index);
-        return AVERROR(ENOMEM);
-    }
-
-    for (i = 0; i < index->item_count; i++) {
+    for (i = 0; i < item_count; i++) {
+        int index;
+        MOVFragmentStreamInfo * frag_stream_info;
         uint32_t size = avio_rb32(pb);
         uint32_t duration = avio_rb32(pb);
         if (size & 0x80000000) {
             avpriv_request_sample(c->fc, "sidx reference_type 1");
-            av_freep(&index->items);
-            av_freep(&index);
             return AVERROR_PATCHWELCOME;
         }
         avio_rb32(pb); // sap_flags
-        index->items[i].moof_offset = offset;
-        index->items[i].time = av_rescale_q(pts, st->time_base, timescale);
+        timestamp = av_rescale_q(pts, st->time_base, timescale);
+
+        index = update_frag_index(c, offset);
+        frag_stream_info = get_frag_stream_info(&c->frag_index, index, track_id);
+        if (frag_stream_info)
+            frag_stream_info->sidx_pts = timestamp;
+
         offset += size;
         pts += duration;
     }
 
     st->duration = sc->track_end = pts;
 
-    tmp = av_realloc_array(c->fragment_index_data,
-                           c->fragment_index_count + 1,
-                           sizeof(MOVFragmentIndex*));
-    if (!tmp) {
-        av_freep(&index->items);
-        av_freep(&index);
-        return AVERROR(ENOMEM);
-    }
-
-    c->fragment_index_data = tmp;
-    c->fragment_index_data[c->fragment_index_count++] = index;
     sc->has_sidx = 1;
 
     if (offset == avio_size(pb)) {
-        for (i = 0; i < c->fc->nb_streams; i++) {
-            if (c->fc->streams[i]->id == c->fragment_index_data[0]->track_id) {
-                ref_st = c->fc->streams[i];
-                ref_sc = ref_st->priv_data;
-                break;
+        // Find first entry in fragment index that came from an sidx.
+        // This will pretty much always be the first entry.
+        for (i = 0; i < c->frag_index.nb_items; i++) {
+            MOVFragmentIndexItem * item = &c->frag_index.item[i];
+            for (j = 0; ref_st == NULL && j < item->nb_stream_info; j++) {
+                MOVFragmentStreamInfo * si;
+                si = &item->stream_info[j];
+                if (si->sidx_pts != AV_NOPTS_VALUE) {
+                    ref_st = c->fc->streams[j];
+                    ref_sc = ref_st->priv_data;
+                    break;
+                }
             }
         }
         for (i = 0; i < c->fc->nb_streams; i++) {
@@ -4537,7 +5025,7 @@
             }
         }
 
-        c->fragment_index_complete = 1;
+        c->frag_index.complete = 1;
     }
 
     return 0;
@@ -4600,6 +5088,7 @@
     if (ret < 0)
         goto free_and_return;
 
+    ret = AVERROR_INVALIDDATA;
     if (uncompress (moov_data, (uLongf *) &moov_len, (const Bytef *)cmov_data, cmov_len) != Z_OK)
         goto free_and_return;
     if (ffio_init_context(&ctx, moov_data, moov_len, 0, NULL, NULL, NULL, NULL) != 0)
@@ -4623,6 +5112,7 @@
 {
     MOVStreamContext *sc;
     int i, edit_count, version;
+    int64_t elst_entry_size;
 
     if (c->fc->nb_streams < 1 || c->ignore_editlist)
         return 0;
@@ -4631,6 +5121,21 @@
     version = avio_r8(pb); /* version */
     avio_rb24(pb); /* flags */
     edit_count = avio_rb32(pb); /* entries */
+    atom.size -= 8;
+
+    elst_entry_size = version == 1 ? 20 : 12;
+    if (atom.size != edit_count * elst_entry_size) {
+        if (c->fc->strict_std_compliance >= FF_COMPLIANCE_STRICT) {
+            av_log(c->fc, AV_LOG_ERROR, "Invalid edit list entry_count: %d for elst atom of size: %"PRId64" bytes.\n",
+                   edit_count, atom.size + 8);
+            return AVERROR_INVALIDDATA;
+        } else {
+            edit_count = atom.size / elst_entry_size;
+            if (edit_count * elst_entry_size != atom.size) {
+                av_log(c->fc, AV_LOG_WARNING, "ELST atom of %"PRId64" bytes, bigger than %d entries.", atom.size, edit_count);
+            }
+        }
+    }
 
     if (!edit_count)
         return 0;
@@ -4643,17 +5148,20 @@
         return AVERROR(ENOMEM);
 
     av_log(c->fc, AV_LOG_TRACE, "track[%u].edit_count = %i\n", c->fc->nb_streams - 1, edit_count);
-    for (i = 0; i < edit_count && !pb->eof_reached; i++) {
+    for (i = 0; i < edit_count && atom.size > 0 && !pb->eof_reached; i++) {
         MOVElst *e = &sc->elst_data[i];
 
         if (version == 1) {
             e->duration = avio_rb64(pb);
             e->time     = avio_rb64(pb);
+            atom.size -= 16;
         } else {
             e->duration = avio_rb32(pb); /* segment duration */
             e->time     = (int32_t)avio_rb32(pb); /* media time */
+            atom.size -= 8;
         }
         e->rate = avio_rb32(pb) / 65536.0;
+        atom.size -= 4;
         av_log(c->fc, AV_LOG_TRACE, "duration=%"PRId64" time=%"PRId64" rate=%f\n",
                e->duration, e->time, e->rate);
 
@@ -4680,6 +5188,34 @@
     return 0;
 }
 
+static int mov_read_av1c(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    AVStream *st;
+    int ret;
+
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams - 1];
+
+    if (atom.size < 4) {
+        av_log(c->fc, AV_LOG_ERROR, "Empty AV1 Codec Configuration Box\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* For now, propagate only the OBUs, if any. Once libavcodec is
+       updated to handle isobmff style extradata this can be removed. */
+    avio_skip(pb, 4);
+
+    if (atom.size == 4)
+        return 0;
+
+    ret = ff_get_extradata(c->fc, st->codecpar, pb, atom.size - 4);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
 static int mov_read_vpcc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     AVStream *st;
@@ -4770,6 +5306,45 @@
     return 0;
 }
 
+static int mov_read_mdcv(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    MOVStreamContext *sc;
+    const int mapping[3] = {1, 2, 0};
+    const int chroma_den = 50000;
+    const int luma_den = 10000;
+    int i;
+
+    if (c->fc->nb_streams < 1)
+        return AVERROR_INVALIDDATA;
+
+    sc = c->fc->streams[c->fc->nb_streams - 1]->priv_data;
+
+    if (atom.size < 24) {
+        av_log(c->fc, AV_LOG_ERROR, "Invalid Mastering Display Color Volume box\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    sc->mastering = av_mastering_display_metadata_alloc();
+    if (!sc->mastering)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < 3; i++) {
+        const int j = mapping[i];
+        sc->mastering->display_primaries[j][0] = av_make_q(avio_rb16(pb), chroma_den);
+        sc->mastering->display_primaries[j][1] = av_make_q(avio_rb16(pb), chroma_den);
+    }
+    sc->mastering->white_point[0] = av_make_q(avio_rb16(pb), chroma_den);
+    sc->mastering->white_point[1] = av_make_q(avio_rb16(pb), chroma_den);
+
+    sc->mastering->max_luminance = av_make_q(avio_rb32(pb), luma_den);
+    sc->mastering->min_luminance = av_make_q(avio_rb32(pb), luma_den);
+
+    sc->mastering->has_luminance = 1;
+    sc->mastering->has_primaries = 1;
+
+    return 0;
+}
+
 static int mov_read_coll(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     MOVStreamContext *sc;
@@ -4802,6 +5377,30 @@
     return 0;
 }
 
+static int mov_read_clli(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    MOVStreamContext *sc;
+
+    if (c->fc->nb_streams < 1)
+        return AVERROR_INVALIDDATA;
+
+    sc = c->fc->streams[c->fc->nb_streams - 1]->priv_data;
+
+    if (atom.size < 4) {
+        av_log(c->fc, AV_LOG_ERROR, "Empty Content Light Level Info box\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    sc->coll = av_content_light_metadata_alloc(&sc->coll_size);
+    if (!sc->coll)
+        return AVERROR(ENOMEM);
+
+    sc->coll->MaxCLL  = avio_rb16(pb);
+    sc->coll->MaxFALL = avio_rb16(pb);
+
+    return 0;
+}
+
 static int mov_read_st3d(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     AVStream *st;
@@ -4849,7 +5448,7 @@
 {
     AVStream *st;
     MOVStreamContext *sc;
-    int size, layout;
+    int size, version, layout;
     int32_t yaw, pitch, roll;
     uint32_t l = 0, t = 0, r = 0, b = 0;
     uint32_t tag, padding = 0;
@@ -4875,7 +5474,13 @@
         av_log(c->fc, AV_LOG_ERROR, "Missing spherical video header\n");
         return 0;
     }
-    avio_skip(pb, 4); /*  version + flags */
+    version = avio_r8(pb);
+    if (version != 0) {
+        av_log(c->fc, AV_LOG_WARNING, "Unknown spherical version %d\n",
+               version);
+        return 0;
+    }
+    avio_skip(pb, 3); /* flags */
     avio_skip(pb, size - 12); /* metadata_source */
 
     size = avio_rb32(pb);
@@ -4897,7 +5502,13 @@
         av_log(c->fc, AV_LOG_ERROR, "Missing projection header box\n");
         return 0;
     }
-    avio_skip(pb, 4); /*  version + flags */
+    version = avio_r8(pb);
+    if (version != 0) {
+        av_log(c->fc, AV_LOG_WARNING, "Unknown spherical version %d\n",
+               version);
+        return 0;
+    }
+    avio_skip(pb, 3); /* flags */
 
     /* 16.16 fixed point */
     yaw   = avio_rb32(pb);
@@ -4909,7 +5520,13 @@
         return AVERROR_INVALIDDATA;
 
     tag = avio_rl32(pb);
-    avio_skip(pb, 4); /*  version + flags */
+    version = avio_r8(pb);
+    if (version != 0) {
+        av_log(c->fc, AV_LOG_WARNING, "Unknown spherical version %d\n",
+               version);
+        return 0;
+    }
+    avio_skip(pb, 3); /* flags */
     switch (tag) {
     case MKTAG('c','b','m','p'):
         layout = avio_rb32(pb);
@@ -4940,7 +5557,7 @@
             projection = AV_SPHERICAL_EQUIRECTANGULAR;
         break;
     default:
-        av_log(c->fc, AV_LOG_ERROR, "Unknown projection type\n");
+        av_log(c->fc, AV_LOG_ERROR, "Unknown projection type: %s\n", av_fourcc2str(tag));
         return 0;
     }
 
@@ -4979,7 +5596,8 @@
         goto out;
 
     /* Check for mandatory keys and values, try to support XML as best-effort */
-    if (av_stristr(buffer, "<GSpherical:StitchingSoftware>") &&
+    if (!sc->spherical &&
+        av_stristr(buffer, "<GSpherical:StitchingSoftware>") &&
         (val = av_stristr(buffer, "<GSpherical:Spherical>")) &&
         av_stristr(val, "true") &&
         (val = av_stristr(buffer, "<GSpherical:Stitched>")) &&
@@ -4992,7 +5610,7 @@
 
         sc->spherical->projection = AV_SPHERICAL_EQUIRECTANGULAR;
 
-        if (av_stristr(buffer, "<GSpherical:StereoMode>")) {
+        if (av_stristr(buffer, "<GSpherical:StereoMode>") && !sc->stereo3d) {
             enum AVStereo3DType mode;
 
             if (av_stristr(buffer, "left-right"))
@@ -5134,7 +5752,8 @@
         if (ret < 0)
             return ret;
         if (!sc->spherical)
-            av_log(c->fc, AV_LOG_WARNING, "Invalid spherical metadata found\n");    }
+            av_log(c->fc, AV_LOG_WARNING, "Invalid spherical metadata found\n");
+    }
 
     return 0;
 }
@@ -5202,117 +5821,635 @@
     return 0;
 }
 
+/**
+ * Gets the current encryption info and associated current stream context.  If
+ * we are parsing a track fragment, this will return the specific encryption
+ * info for this fragment; otherwise this will return the global encryption
+ * info for the current stream.
+ */
+static int get_current_encryption_info(MOVContext *c, MOVEncryptionIndex **encryption_index, MOVStreamContext **sc)
+{
+    MOVFragmentStreamInfo *frag_stream_info;
+    AVStream *st;
+    int i;
+
+    frag_stream_info = get_current_frag_stream_info(&c->frag_index);
+    if (frag_stream_info) {
+        for (i = 0; i < c->fc->nb_streams; i++) {
+            if (c->fc->streams[i]->id == frag_stream_info->id) {
+              st = c->fc->streams[i];
+              break;
+            }
+        }
+        if (i == c->fc->nb_streams)
+            return 0;
+        *sc = st->priv_data;
+
+        if (!frag_stream_info->encryption_index) {
+            // If this stream isn't encrypted, don't create the index.
+            if (!(*sc)->cenc.default_encrypted_sample)
+                return 0;
+            frag_stream_info->encryption_index = av_mallocz(sizeof(*frag_stream_info->encryption_index));
+            if (!frag_stream_info->encryption_index)
+                return AVERROR(ENOMEM);
+        }
+        *encryption_index = frag_stream_info->encryption_index;
+        return 1;
+    } else {
+        // No current track fragment, using stream level encryption info.
+
+        if (c->fc->nb_streams < 1)
+            return 0;
+        st = c->fc->streams[c->fc->nb_streams - 1];
+        *sc = st->priv_data;
+
+        if (!(*sc)->cenc.encryption_index) {
+            // If this stream isn't encrypted, don't create the index.
+            if (!(*sc)->cenc.default_encrypted_sample)
+                return 0;
+            (*sc)->cenc.encryption_index = av_mallocz(sizeof(*frag_stream_info->encryption_index));
+            if (!(*sc)->cenc.encryption_index)
+                return AVERROR(ENOMEM);
+        }
+
+        *encryption_index = (*sc)->cenc.encryption_index;
+        return 1;
+    }
+}
+
+static int mov_read_sample_encryption_info(MOVContext *c, AVIOContext *pb, MOVStreamContext *sc, AVEncryptionInfo **sample, int use_subsamples)
+{
+    int i;
+    unsigned int subsample_count;
+    AVSubsampleEncryptionInfo *subsamples;
+
+    if (!sc->cenc.default_encrypted_sample) {
+        av_log(c->fc, AV_LOG_ERROR, "Missing schm or tenc\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    *sample = av_encryption_info_clone(sc->cenc.default_encrypted_sample);
+    if (!*sample)
+        return AVERROR(ENOMEM);
+
+    if (sc->cenc.per_sample_iv_size != 0) {
+        if (avio_read(pb, (*sample)->iv, sc->cenc.per_sample_iv_size) != sc->cenc.per_sample_iv_size) {
+            av_log(c->fc, AV_LOG_ERROR, "failed to read the initialization vector\n");
+            av_encryption_info_free(*sample);
+            *sample = NULL;
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    if (use_subsamples) {
+        subsample_count = avio_rb16(pb);
+        av_free((*sample)->subsamples);
+        (*sample)->subsamples = av_mallocz_array(subsample_count, sizeof(*subsamples));
+        if (!(*sample)->subsamples) {
+            av_encryption_info_free(*sample);
+            *sample = NULL;
+            return AVERROR(ENOMEM);
+        }
+
+        for (i = 0; i < subsample_count && !pb->eof_reached; i++) {
+            (*sample)->subsamples[i].bytes_of_clear_data = avio_rb16(pb);
+            (*sample)->subsamples[i].bytes_of_protected_data = avio_rb32(pb);
+        }
+
+        if (pb->eof_reached) {
+            av_log(c->fc, AV_LOG_ERROR, "hit EOF while reading sub-sample encryption info\n");
+            av_encryption_info_free(*sample);
+            *sample = NULL;
+            return AVERROR_INVALIDDATA;
+        }
+        (*sample)->subsample_count = subsample_count;
+    }
+
+    return 0;
+}
+
 static int mov_read_senc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
-    AVStream *st;
+    AVEncryptionInfo **encrypted_samples;
+    MOVEncryptionIndex *encryption_index;
     MOVStreamContext *sc;
-    size_t auxiliary_info_size;
+    int use_subsamples, ret;
+    unsigned int sample_count, i, alloc_size = 0;
 
-    if (c->decryption_key_len == 0 || c->fc->nb_streams < 1)
+    ret = get_current_encryption_info(c, &encryption_index, &sc);
+    if (ret != 1)
+        return ret;
+
+    if (encryption_index->nb_encrypted_samples) {
+        // This can happen if we have both saio/saiz and senc atoms.
+        av_log(c->fc, AV_LOG_DEBUG, "Ignoring duplicate encryption info in senc\n");
         return 0;
-
-    st = c->fc->streams[c->fc->nb_streams - 1];
-    sc = st->priv_data;
-
-    if (sc->cenc.aes_ctr) {
-        av_log(c->fc, AV_LOG_ERROR, "duplicate senc atom\n");
-        return AVERROR_INVALIDDATA;
     }
 
     avio_r8(pb); /* version */
-    sc->cenc.use_subsamples = avio_rb24(pb) & 0x02; /* flags */
+    use_subsamples = avio_rb24(pb) & 0x02; /* flags */
 
-    avio_rb32(pb);        /* entries */
-
-    if (atom.size < 8 || atom.size > FFMIN(INT_MAX, SIZE_MAX)) {
-        av_log(c->fc, AV_LOG_ERROR, "senc atom size %"PRId64" invalid\n", atom.size);
-        return AVERROR_INVALIDDATA;
-    }
-
-    /* save the auxiliary info as is */
-    auxiliary_info_size = atom.size - 8;
-
-    sc->cenc.auxiliary_info = av_malloc(auxiliary_info_size);
-    if (!sc->cenc.auxiliary_info) {
+    sample_count = avio_rb32(pb);
+    if (sample_count >= INT_MAX / sizeof(*encrypted_samples))
         return AVERROR(ENOMEM);
+
+    for (i = 0; i < sample_count; i++) {
+        unsigned int min_samples = FFMIN(FFMAX(i + 1, 1024 * 1024), sample_count);
+        encrypted_samples = av_fast_realloc(encryption_index->encrypted_samples, &alloc_size,
+                                            min_samples * sizeof(*encrypted_samples));
+        if (encrypted_samples) {
+            encryption_index->encrypted_samples = encrypted_samples;
+
+            ret = mov_read_sample_encryption_info(
+                c, pb, sc, &encryption_index->encrypted_samples[i], use_subsamples);
+        } else {
+            ret = AVERROR(ENOMEM);
+        }
+        if (pb->eof_reached) {
+            av_log(c->fc, AV_LOG_ERROR, "Hit EOF while reading senc\n");
+            ret = AVERROR_INVALIDDATA;
+        }
+
+        if (ret < 0) {
+            for (; i > 0; i--)
+                av_encryption_info_free(encryption_index->encrypted_samples[i - 1]);
+            av_freep(&encryption_index->encrypted_samples);
+            return ret;
+        }
     }
+    encryption_index->nb_encrypted_samples = sample_count;
 
-    sc->cenc.auxiliary_info_end = sc->cenc.auxiliary_info + auxiliary_info_size;
-    sc->cenc.auxiliary_info_pos = sc->cenc.auxiliary_info;
-    sc->cenc.auxiliary_info_index = 0;
+    return 0;
+}
 
-    if (avio_read(pb, sc->cenc.auxiliary_info, auxiliary_info_size) != auxiliary_info_size) {
-        av_log(c->fc, AV_LOG_ERROR, "failed to read the auxiliary info");
-        return AVERROR_INVALIDDATA;
+static int mov_parse_auxiliary_info(MOVContext *c, MOVStreamContext *sc, AVIOContext *pb, MOVEncryptionIndex *encryption_index)
+{
+    AVEncryptionInfo **sample, **encrypted_samples;
+    int64_t prev_pos;
+    size_t sample_count, sample_info_size, i;
+    int ret = 0;
+    unsigned int alloc_size = 0;
+
+    if (encryption_index->nb_encrypted_samples)
+        return 0;
+    sample_count = encryption_index->auxiliary_info_sample_count;
+    if (encryption_index->auxiliary_offsets_count != 1) {
+        av_log(c->fc, AV_LOG_ERROR, "Multiple auxiliary info chunks are not supported\n");
+        return AVERROR_PATCHWELCOME;
     }
-
-    /* initialize the cipher */
-    sc->cenc.aes_ctr = av_aes_ctr_alloc();
-    if (!sc->cenc.aes_ctr) {
+    if (sample_count >= INT_MAX / sizeof(*encrypted_samples))
         return AVERROR(ENOMEM);
+
+    prev_pos = avio_tell(pb);
+    if (!(pb->seekable & AVIO_SEEKABLE_NORMAL) ||
+        avio_seek(pb, encryption_index->auxiliary_offsets[0], SEEK_SET) != encryption_index->auxiliary_offsets[0]) {
+        av_log(c->fc, AV_LOG_INFO, "Failed to seek for auxiliary info, will only parse senc atoms for encryption info\n");
+        goto finish;
     }
 
-    return av_aes_ctr_init(sc->cenc.aes_ctr, c->decryption_key);
+    for (i = 0; i < sample_count && !pb->eof_reached; i++) {
+        unsigned int min_samples = FFMIN(FFMAX(i + 1, 1024 * 1024), sample_count);
+        encrypted_samples = av_fast_realloc(encryption_index->encrypted_samples, &alloc_size,
+                                            min_samples * sizeof(*encrypted_samples));
+        if (!encrypted_samples) {
+            ret = AVERROR(ENOMEM);
+            goto finish;
+        }
+        encryption_index->encrypted_samples = encrypted_samples;
+
+        sample = &encryption_index->encrypted_samples[i];
+        sample_info_size = encryption_index->auxiliary_info_default_size
+                               ? encryption_index->auxiliary_info_default_size
+                               : encryption_index->auxiliary_info_sizes[i];
+
+        ret = mov_read_sample_encryption_info(c, pb, sc, sample, sample_info_size > sc->cenc.per_sample_iv_size);
+        if (ret < 0)
+            goto finish;
+    }
+    if (pb->eof_reached) {
+        av_log(c->fc, AV_LOG_ERROR, "Hit EOF while reading auxiliary info\n");
+        ret = AVERROR_INVALIDDATA;
+    } else {
+        encryption_index->nb_encrypted_samples = sample_count;
+    }
+
+finish:
+    avio_seek(pb, prev_pos, SEEK_SET);
+    if (ret < 0) {
+        for (; i > 0; i--) {
+            av_encryption_info_free(encryption_index->encrypted_samples[i - 1]);
+        }
+        av_freep(&encryption_index->encrypted_samples);
+    }
+    return ret;
+}
+
+/**
+ * Tries to read the given number of bytes from the stream and puts it in a
+ * newly allocated buffer.  This reads in small chunks to avoid allocating large
+ * memory if the file contains an invalid/malicious size value.
+ */
+static int mov_try_read_block(AVIOContext *pb, size_t size, uint8_t **data)
+{
+    const unsigned int block_size = 1024 * 1024;
+    uint8_t *buffer = NULL;
+    unsigned int alloc_size = 0, offset = 0;
+    while (offset < size) {
+        unsigned int new_size =
+            alloc_size >= INT_MAX - block_size ? INT_MAX : alloc_size + block_size;
+        uint8_t *new_buffer = av_fast_realloc(buffer, &alloc_size, new_size);
+        unsigned int to_read = FFMIN(size, alloc_size) - offset;
+        if (!new_buffer) {
+            av_free(buffer);
+            return AVERROR(ENOMEM);
+        }
+        buffer = new_buffer;
+
+        if (avio_read(pb, buffer + offset, to_read) != to_read) {
+            av_free(buffer);
+            return AVERROR_INVALIDDATA;
+        }
+        offset += to_read;
+    }
+
+    *data = buffer;
+    return 0;
 }
 
 static int mov_read_saiz(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
-    AVStream *st;
+    MOVEncryptionIndex *encryption_index;
     MOVStreamContext *sc;
-    size_t data_size;
-    int atom_header_size;
-    int flags;
+    int ret;
+    unsigned int sample_count, aux_info_type, aux_info_param;
 
-    if (c->decryption_key_len == 0 || c->fc->nb_streams < 1)
+    ret = get_current_encryption_info(c, &encryption_index, &sc);
+    if (ret != 1)
+        return ret;
+
+    if (encryption_index->nb_encrypted_samples) {
+        // This can happen if we have both saio/saiz and senc atoms.
+        av_log(c->fc, AV_LOG_DEBUG, "Ignoring duplicate encryption info in saiz\n");
         return 0;
-
-    st = c->fc->streams[c->fc->nb_streams - 1];
-    sc = st->priv_data;
-
-    if (sc->cenc.auxiliary_info_sizes || sc->cenc.auxiliary_info_default_size) {
-        av_log(c->fc, AV_LOG_ERROR, "duplicate saiz atom\n");
-        return AVERROR_INVALIDDATA;
     }
 
-    atom_header_size = 9;
+    if (encryption_index->auxiliary_info_sample_count) {
+        av_log(c->fc, AV_LOG_ERROR, "Duplicate saiz atom\n");
+        return AVERROR_INVALIDDATA;
+    }
 
     avio_r8(pb); /* version */
-    flags = avio_rb24(pb);
-
-    if ((flags & 0x01) != 0) {
-        atom_header_size += 8;
-
-        avio_rb32(pb);    /* info type */
-        avio_rb32(pb);    /* info type param */
-    }
-
-    sc->cenc.auxiliary_info_default_size = avio_r8(pb);
-    avio_rb32(pb);    /* entries */
-
-    if (atom.size <= atom_header_size) {
+    if (avio_rb24(pb) & 0x01) {  /* flags */
+        aux_info_type = avio_rb32(pb);
+        aux_info_param = avio_rb32(pb);
+        if (sc->cenc.default_encrypted_sample) {
+            if (aux_info_type != sc->cenc.default_encrypted_sample->scheme) {
+                av_log(c->fc, AV_LOG_DEBUG, "Ignoring saiz box with non-zero aux_info_type\n");
+                return 0;
+            }
+            if (aux_info_param != 0) {
+                av_log(c->fc, AV_LOG_DEBUG, "Ignoring saiz box with non-zero aux_info_type_parameter\n");
+                return 0;
+            }
+        } else {
+            // Didn't see 'schm' or 'tenc', so this isn't encrypted.
+            if ((aux_info_type == MKBETAG('c','e','n','c') ||
+                 aux_info_type == MKBETAG('c','e','n','s') ||
+                 aux_info_type == MKBETAG('c','b','c','1') ||
+                 aux_info_type == MKBETAG('c','b','c','s')) &&
+                aux_info_param == 0) {
+                av_log(c->fc, AV_LOG_ERROR, "Saw encrypted saiz without schm/tenc\n");
+                return AVERROR_INVALIDDATA;
+            } else {
+                return 0;
+            }
+        }
+    } else if (!sc->cenc.default_encrypted_sample) {
+        // Didn't see 'schm' or 'tenc', so this isn't encrypted.
         return 0;
     }
 
-    if (atom.size > FFMIN(INT_MAX, SIZE_MAX)) {
-        av_log(c->fc, AV_LOG_ERROR, "saiz atom auxiliary_info_sizes size %"PRId64" invalid\n", atom.size);
+    encryption_index->auxiliary_info_default_size = avio_r8(pb);
+    sample_count = avio_rb32(pb);
+    encryption_index->auxiliary_info_sample_count = sample_count;
+
+    if (encryption_index->auxiliary_info_default_size == 0) {
+        ret = mov_try_read_block(pb, sample_count, &encryption_index->auxiliary_info_sizes);
+        if (ret < 0) {
+            av_log(c->fc, AV_LOG_ERROR, "Failed to read the auxiliary info\n");
+            return ret;
+        }
+    }
+
+    if (encryption_index->auxiliary_offsets_count) {
+        return mov_parse_auxiliary_info(c, sc, pb, encryption_index);
+    }
+
+    return 0;
+}
+
+static int mov_read_saio(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    uint64_t *auxiliary_offsets;
+    MOVEncryptionIndex *encryption_index;
+    MOVStreamContext *sc;
+    int i, ret;
+    unsigned int version, entry_count, aux_info_type, aux_info_param;
+    unsigned int alloc_size = 0;
+
+    ret = get_current_encryption_info(c, &encryption_index, &sc);
+    if (ret != 1)
+        return ret;
+
+    if (encryption_index->nb_encrypted_samples) {
+        // This can happen if we have both saio/saiz and senc atoms.
+        av_log(c->fc, AV_LOG_DEBUG, "Ignoring duplicate encryption info in saio\n");
+        return 0;
+    }
+
+    if (encryption_index->auxiliary_offsets_count) {
+        av_log(c->fc, AV_LOG_ERROR, "Duplicate saio atom\n");
         return AVERROR_INVALIDDATA;
     }
 
-    /* save the auxiliary info sizes as is */
-    data_size = atom.size - atom_header_size;
+    version = avio_r8(pb); /* version */
+    if (avio_rb24(pb) & 0x01) {  /* flags */
+        aux_info_type = avio_rb32(pb);
+        aux_info_param = avio_rb32(pb);
+        if (sc->cenc.default_encrypted_sample) {
+            if (aux_info_type != sc->cenc.default_encrypted_sample->scheme) {
+                av_log(c->fc, AV_LOG_DEBUG, "Ignoring saio box with non-zero aux_info_type\n");
+                return 0;
+            }
+            if (aux_info_param != 0) {
+                av_log(c->fc, AV_LOG_DEBUG, "Ignoring saio box with non-zero aux_info_type_parameter\n");
+                return 0;
+            }
+        } else {
+            // Didn't see 'schm' or 'tenc', so this isn't encrypted.
+            if ((aux_info_type == MKBETAG('c','e','n','c') ||
+                 aux_info_type == MKBETAG('c','e','n','s') ||
+                 aux_info_type == MKBETAG('c','b','c','1') ||
+                 aux_info_type == MKBETAG('c','b','c','s')) &&
+                aux_info_param == 0) {
+                av_log(c->fc, AV_LOG_ERROR, "Saw encrypted saio without schm/tenc\n");
+                return AVERROR_INVALIDDATA;
+            } else {
+                return 0;
+            }
+        }
+    } else if (!sc->cenc.default_encrypted_sample) {
+        // Didn't see 'schm' or 'tenc', so this isn't encrypted.
+        return 0;
+    }
 
-    sc->cenc.auxiliary_info_sizes = av_malloc(data_size);
-    if (!sc->cenc.auxiliary_info_sizes) {
+    entry_count = avio_rb32(pb);
+    if (entry_count >= INT_MAX / sizeof(*auxiliary_offsets))
         return AVERROR(ENOMEM);
+
+    for (i = 0; i < entry_count && !pb->eof_reached; i++) {
+        unsigned int min_offsets = FFMIN(FFMAX(i + 1, 1024), entry_count);
+        auxiliary_offsets = av_fast_realloc(
+            encryption_index->auxiliary_offsets, &alloc_size,
+            min_offsets * sizeof(*auxiliary_offsets));
+        if (!auxiliary_offsets) {
+            av_freep(&encryption_index->auxiliary_offsets);
+            return AVERROR(ENOMEM);
+        }
+        encryption_index->auxiliary_offsets = auxiliary_offsets;
+
+        if (version == 0) {
+            encryption_index->auxiliary_offsets[i] = avio_rb32(pb);
+        } else {
+            encryption_index->auxiliary_offsets[i] = avio_rb64(pb);
+        }
+        if (c->frag_index.current >= 0) {
+            encryption_index->auxiliary_offsets[i] += c->fragment.base_data_offset;
+        }
     }
 
-    sc->cenc.auxiliary_info_sizes_count = data_size;
-
-    if (avio_read(pb, sc->cenc.auxiliary_info_sizes, data_size) != data_size) {
-        av_log(c->fc, AV_LOG_ERROR, "failed to read the auxiliary info sizes");
+    if (pb->eof_reached) {
+        av_log(c->fc, AV_LOG_ERROR, "Hit EOF while reading saio\n");
+        av_freep(&encryption_index->auxiliary_offsets);
         return AVERROR_INVALIDDATA;
     }
 
+    encryption_index->auxiliary_offsets_count = entry_count;
+
+    if (encryption_index->auxiliary_info_sample_count) {
+        return mov_parse_auxiliary_info(c, sc, pb, encryption_index);
+    }
+
+    return 0;
+}
+
+static int mov_read_pssh(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    AVEncryptionInitInfo *info, *old_init_info;
+    uint8_t **key_ids;
+    AVStream *st;
+    uint8_t *side_data, *extra_data, *old_side_data;
+    size_t side_data_size;
+    int ret = 0, old_side_data_size;
+    unsigned int version, kid_count, extra_data_size, alloc_size = 0;
+
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+
+    version = avio_r8(pb); /* version */
+    avio_rb24(pb);  /* flags */
+
+    info = av_encryption_init_info_alloc(/* system_id_size */ 16, /* num_key_ids */ 0,
+                                         /* key_id_size */ 16, /* data_size */ 0);
+    if (!info)
+        return AVERROR(ENOMEM);
+
+    if (avio_read(pb, info->system_id, 16) != 16) {
+        av_log(c->fc, AV_LOG_ERROR, "Failed to read the system id\n");
+        ret = AVERROR_INVALIDDATA;
+        goto finish;
+    }
+
+    if (version > 0) {
+        kid_count = avio_rb32(pb);
+        if (kid_count >= INT_MAX / sizeof(*key_ids))
+            return AVERROR(ENOMEM);
+
+        for (unsigned int i = 0; i < kid_count && !pb->eof_reached; i++) {
+            unsigned int min_kid_count = FFMIN(FFMAX(i + 1, 1024), kid_count);
+            key_ids = av_fast_realloc(info->key_ids, &alloc_size,
+                                      min_kid_count * sizeof(*key_ids));
+            if (!key_ids) {
+                ret = AVERROR(ENOMEM);
+                goto finish;
+            }
+            info->key_ids = key_ids;
+
+            info->key_ids[i] = av_mallocz(16);
+            if (!info->key_ids[i]) {
+                ret = AVERROR(ENOMEM);
+                goto finish;
+            }
+            info->num_key_ids = i + 1;
+
+            if (avio_read(pb, info->key_ids[i], 16) != 16) {
+                av_log(c->fc, AV_LOG_ERROR, "Failed to read the key id\n");
+                ret = AVERROR_INVALIDDATA;
+                goto finish;
+            }
+        }
+
+        if (pb->eof_reached) {
+            av_log(c->fc, AV_LOG_ERROR, "Hit EOF while reading pssh\n");
+            ret = AVERROR_INVALIDDATA;
+            goto finish;
+        }
+    }
+
+    extra_data_size = avio_rb32(pb);
+    ret = mov_try_read_block(pb, extra_data_size, &extra_data);
+    if (ret < 0)
+        goto finish;
+
+    av_freep(&info->data);  // malloc(0) may still allocate something.
+    info->data = extra_data;
+    info->data_size = extra_data_size;
+
+    // If there is existing initialization data, append to the list.
+    old_side_data = av_stream_get_side_data(st, AV_PKT_DATA_ENCRYPTION_INIT_INFO, &old_side_data_size);
+    if (old_side_data) {
+        old_init_info = av_encryption_init_info_get_side_data(old_side_data, old_side_data_size);
+        if (old_init_info) {
+            // Append to the end of the list.
+            for (AVEncryptionInitInfo *cur = old_init_info;; cur = cur->next) {
+                if (!cur->next) {
+                    cur->next = info;
+                    break;
+                }
+            }
+            info = old_init_info;
+        } else {
+            // Assume existing side-data will be valid, so the only error we could get is OOM.
+            ret = AVERROR(ENOMEM);
+            goto finish;
+        }
+    }
+
+    side_data = av_encryption_init_info_add_side_data(info, &side_data_size);
+    if (!side_data) {
+        ret = AVERROR(ENOMEM);
+        goto finish;
+    }
+    ret = av_stream_add_side_data(st, AV_PKT_DATA_ENCRYPTION_INIT_INFO,
+                                  side_data, side_data_size);
+    if (ret < 0)
+        av_free(side_data);
+
+finish:
+    av_encryption_init_info_free(info);
+    return ret;
+}
+
+static int mov_read_schm(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    AVStream *st;
+    MOVStreamContext *sc;
+
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+    sc = st->priv_data;
+
+    if (sc->pseudo_stream_id != 0) {
+        av_log(c->fc, AV_LOG_ERROR, "schm boxes are only supported in first sample descriptor\n");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    if (atom.size < 8)
+        return AVERROR_INVALIDDATA;
+
+    avio_rb32(pb); /* version and flags */
+
+    if (!sc->cenc.default_encrypted_sample) {
+        sc->cenc.default_encrypted_sample = av_encryption_info_alloc(0, 16, 16);
+        if (!sc->cenc.default_encrypted_sample) {
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    sc->cenc.default_encrypted_sample->scheme = avio_rb32(pb);
+    return 0;
+}
+
+static int mov_read_tenc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    AVStream *st;
+    MOVStreamContext *sc;
+    unsigned int version, pattern, is_protected, iv_size;
+
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+    sc = st->priv_data;
+
+    if (sc->pseudo_stream_id != 0) {
+        av_log(c->fc, AV_LOG_ERROR, "tenc atom are only supported in first sample descriptor\n");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    if (!sc->cenc.default_encrypted_sample) {
+        sc->cenc.default_encrypted_sample = av_encryption_info_alloc(0, 16, 16);
+        if (!sc->cenc.default_encrypted_sample) {
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    if (atom.size < 20)
+        return AVERROR_INVALIDDATA;
+
+    version = avio_r8(pb); /* version */
+    avio_rb24(pb); /* flags */
+
+    avio_r8(pb); /* reserved */
+    pattern = avio_r8(pb);
+
+    if (version > 0) {
+        sc->cenc.default_encrypted_sample->crypt_byte_block = pattern >> 4;
+        sc->cenc.default_encrypted_sample->skip_byte_block = pattern & 0xf;
+    }
+
+    is_protected = avio_r8(pb);
+    if (is_protected && !sc->cenc.encryption_index) {
+        // The whole stream should be by-default encrypted.
+        sc->cenc.encryption_index = av_mallocz(sizeof(MOVEncryptionIndex));
+        if (!sc->cenc.encryption_index)
+            return AVERROR(ENOMEM);
+    }
+    sc->cenc.per_sample_iv_size = avio_r8(pb);
+    if (sc->cenc.per_sample_iv_size != 0 && sc->cenc.per_sample_iv_size != 8 &&
+        sc->cenc.per_sample_iv_size != 16) {
+        av_log(c->fc, AV_LOG_ERROR, "invalid per-sample IV size value\n");
+        return AVERROR_INVALIDDATA;
+    }
+    if (avio_read(pb, sc->cenc.default_encrypted_sample->key_id, 16) != 16) {
+        av_log(c->fc, AV_LOG_ERROR, "failed to read the default key ID\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (is_protected && !sc->cenc.per_sample_iv_size) {
+        iv_size = avio_r8(pb);
+        if (iv_size != 8 && iv_size != 16) {
+            av_log(c->fc, AV_LOG_ERROR, "invalid default_constant_IV_size in tenc atom\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (avio_read(pb, sc->cenc.default_encrypted_sample->iv, iv_size) != iv_size) {
+            av_log(c->fc, AV_LOG_ERROR, "failed to read the default IV\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
     return 0;
 }
 
@@ -5353,108 +6490,123 @@
     return 0;
 }
 
-static int mov_seek_auxiliary_info(MOVContext *c, MOVStreamContext *sc, int64_t index)
+static int cenc_decrypt(MOVContext *c, MOVStreamContext *sc, AVEncryptionInfo *sample, uint8_t *input, int size)
 {
-    size_t auxiliary_info_seek_offset = 0;
-    int i;
+    int i, ret;
 
-    if (sc->cenc.auxiliary_info_default_size) {
-        auxiliary_info_seek_offset = (size_t)sc->cenc.auxiliary_info_default_size * index;
-    } else if (sc->cenc.auxiliary_info_sizes) {
-        if (index > sc->cenc.auxiliary_info_sizes_count) {
-            av_log(c, AV_LOG_ERROR, "current sample %"PRId64" greater than the number of auxiliary info sample sizes %"SIZE_SPECIFIER"\n",
-                index, sc->cenc.auxiliary_info_sizes_count);
-            return AVERROR_INVALIDDATA;
-        }
-
-        for (i = 0; i < index; i++) {
-            auxiliary_info_seek_offset += sc->cenc.auxiliary_info_sizes[i];
-        }
+    if (sample->scheme != MKBETAG('c','e','n','c') || sample->crypt_byte_block != 0 || sample->skip_byte_block != 0) {
+        av_log(c->fc, AV_LOG_ERROR, "Only the 'cenc' encryption scheme is supported\n");
+        return AVERROR_PATCHWELCOME;
     }
 
-    if (auxiliary_info_seek_offset > sc->cenc.auxiliary_info_end - sc->cenc.auxiliary_info) {
-        av_log(c, AV_LOG_ERROR, "auxiliary info offset %"SIZE_SPECIFIER" greater than auxiliary info size %"SIZE_SPECIFIER"\n",
-            auxiliary_info_seek_offset, (size_t)(sc->cenc.auxiliary_info_end - sc->cenc.auxiliary_info));
-        return AVERROR_INVALIDDATA;
-    }
+    if (!sc->cenc.aes_ctr) {
+        /* initialize the cipher */
+        sc->cenc.aes_ctr = av_aes_ctr_alloc();
+        if (!sc->cenc.aes_ctr) {
+            return AVERROR(ENOMEM);
+        }
 
-    sc->cenc.auxiliary_info_pos = sc->cenc.auxiliary_info + auxiliary_info_seek_offset;
-    sc->cenc.auxiliary_info_index = index;
-    return 0;
-}
-
-static int cenc_filter(MOVContext *c, MOVStreamContext *sc, int64_t index, uint8_t *input, int size)
-{
-    uint32_t encrypted_bytes;
-    uint16_t subsample_count;
-    uint16_t clear_bytes;
-    uint8_t* input_end = input + size;
-    int ret;
-
-    if (index != sc->cenc.auxiliary_info_index) {
-        ret = mov_seek_auxiliary_info(c, sc, index);
+        ret = av_aes_ctr_init(sc->cenc.aes_ctr, c->decryption_key);
         if (ret < 0) {
             return ret;
         }
     }
 
-    /* read the iv */
-    if (AES_CTR_IV_SIZE > sc->cenc.auxiliary_info_end - sc->cenc.auxiliary_info_pos) {
-        av_log(c->fc, AV_LOG_ERROR, "failed to read iv from the auxiliary info\n");
-        return AVERROR_INVALIDDATA;
-    }
+    av_aes_ctr_set_full_iv(sc->cenc.aes_ctr, sample->iv);
 
-    av_aes_ctr_set_iv(sc->cenc.aes_ctr, sc->cenc.auxiliary_info_pos);
-    sc->cenc.auxiliary_info_pos += AES_CTR_IV_SIZE;
-
-    if (!sc->cenc.use_subsamples)
+    if (!sample->subsample_count)
     {
         /* decrypt the whole packet */
         av_aes_ctr_crypt(sc->cenc.aes_ctr, input, input, size);
         return 0;
     }
 
-    /* read the subsample count */
-    if (sizeof(uint16_t) > sc->cenc.auxiliary_info_end - sc->cenc.auxiliary_info_pos) {
-        av_log(c->fc, AV_LOG_ERROR, "failed to read subsample count from the auxiliary info\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    subsample_count = AV_RB16(sc->cenc.auxiliary_info_pos);
-    sc->cenc.auxiliary_info_pos += sizeof(uint16_t);
-
-    for (; subsample_count > 0; subsample_count--)
+    for (i = 0; i < sample->subsample_count; i++)
     {
-        if (6 > sc->cenc.auxiliary_info_end - sc->cenc.auxiliary_info_pos) {
-            av_log(c->fc, AV_LOG_ERROR, "failed to read subsample from the auxiliary info\n");
-            return AVERROR_INVALIDDATA;
-        }
-
-        /* read the number of clear / encrypted bytes */
-        clear_bytes = AV_RB16(sc->cenc.auxiliary_info_pos);
-        sc->cenc.auxiliary_info_pos += sizeof(uint16_t);
-        encrypted_bytes = AV_RB32(sc->cenc.auxiliary_info_pos);
-        sc->cenc.auxiliary_info_pos += sizeof(uint32_t);
-
-        if ((uint64_t)clear_bytes + encrypted_bytes > input_end - input) {
+        if (sample->subsamples[i].bytes_of_clear_data + sample->subsamples[i].bytes_of_protected_data > size) {
             av_log(c->fc, AV_LOG_ERROR, "subsample size exceeds the packet size left\n");
             return AVERROR_INVALIDDATA;
         }
 
         /* skip the clear bytes */
-        input += clear_bytes;
+        input += sample->subsamples[i].bytes_of_clear_data;
+        size -= sample->subsamples[i].bytes_of_clear_data;
 
         /* decrypt the encrypted bytes */
-        av_aes_ctr_crypt(sc->cenc.aes_ctr, input, input, encrypted_bytes);
-        input += encrypted_bytes;
+        av_aes_ctr_crypt(sc->cenc.aes_ctr, input, input, sample->subsamples[i].bytes_of_protected_data);
+        input += sample->subsamples[i].bytes_of_protected_data;
+        size -= sample->subsamples[i].bytes_of_protected_data;
     }
 
-    if (input < input_end) {
+    if (size > 0) {
         av_log(c->fc, AV_LOG_ERROR, "leftover packet bytes after subsample processing\n");
         return AVERROR_INVALIDDATA;
     }
 
-    sc->cenc.auxiliary_info_index++;
+    return 0;
+}
+
+static int cenc_filter(MOVContext *mov, MOVStreamContext *sc, AVPacket *pkt, int current_index)
+{
+    MOVFragmentStreamInfo *frag_stream_info;
+    MOVEncryptionIndex *encryption_index;
+    AVEncryptionInfo *encrypted_sample;
+    int encrypted_index, ret;
+
+    frag_stream_info = get_current_frag_stream_info(&mov->frag_index);
+    encrypted_index = current_index;
+    encryption_index = NULL;
+    if (frag_stream_info) {
+        // Note this only supports encryption info in the first sample descriptor.
+        if (mov->fragment.stsd_id == 1) {
+            if (frag_stream_info->encryption_index) {
+                encrypted_index = current_index - frag_stream_info->index_entry;
+                encryption_index = frag_stream_info->encryption_index;
+            } else {
+                encryption_index = sc->cenc.encryption_index;
+            }
+        }
+    } else {
+        encryption_index = sc->cenc.encryption_index;
+    }
+
+    if (encryption_index) {
+        if (encryption_index->auxiliary_info_sample_count &&
+            !encryption_index->nb_encrypted_samples) {
+            av_log(mov->fc, AV_LOG_ERROR, "saiz atom found without saio\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if (encryption_index->auxiliary_offsets_count &&
+            !encryption_index->nb_encrypted_samples) {
+            av_log(mov->fc, AV_LOG_ERROR, "saio atom found without saiz\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (!encryption_index->nb_encrypted_samples) {
+            // Full-sample encryption with default settings.
+            encrypted_sample = sc->cenc.default_encrypted_sample;
+        } else if (encrypted_index >= 0 && encrypted_index < encryption_index->nb_encrypted_samples) {
+            // Per-sample setting override.
+            encrypted_sample = encryption_index->encrypted_samples[encrypted_index];
+        } else {
+            av_log(mov->fc, AV_LOG_ERROR, "Incorrect number of samples in encryption info\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (mov->decryption_key) {
+            return cenc_decrypt(mov, sc, encrypted_sample, pkt->data, pkt->size);
+        } else {
+            size_t size;
+            uint8_t *side_data = av_encryption_info_add_side_data(encrypted_sample, &size);
+            if (!side_data)
+                return AVERROR(ENOMEM);
+            ret = av_packet_add_side_data(pkt, AV_PKT_DATA_ENCRYPTION_INFO, side_data, size);
+            if (ret < 0)
+                av_free(side_data);
+            return ret;
+        }
+    }
+
     return 0;
 }
 
@@ -5463,7 +6615,7 @@
     const int OPUS_SEEK_PREROLL_MS = 80;
     AVStream *st;
     size_t size;
-    int16_t pre_skip;
+    uint16_t pre_skip;
 
     if (c->fc->nb_streams < 1)
         return 0;
@@ -5512,6 +6664,7 @@
 { MKTAG('A','A','L','P'), mov_read_avid },
 { MKTAG('A','R','E','S'), mov_read_ares },
 { MKTAG('a','v','s','s'), mov_read_avss },
+{ MKTAG('a','v','1','C'), mov_read_av1c },
 { MKTAG('c','h','p','l'), mov_read_chpl },
 { MKTAG('c','o','6','4'), mov_read_stco },
 { MKTAG('c','o','l','r'), mov_read_colr },
@@ -5584,6 +6737,11 @@
 { MKTAG('f','r','m','a'), mov_read_frma },
 { MKTAG('s','e','n','c'), mov_read_senc },
 { MKTAG('s','a','i','z'), mov_read_saiz },
+{ MKTAG('s','a','i','o'), mov_read_saio },
+{ MKTAG('p','s','s','h'), mov_read_pssh },
+{ MKTAG('s','c','h','m'), mov_read_schm },
+{ MKTAG('s','c','h','i'), mov_read_default },
+{ MKTAG('t','e','n','c'), mov_read_tenc },
 { MKTAG('d','f','L','a'), mov_read_dfla },
 { MKTAG('s','t','3','d'), mov_read_st3d }, /* stereoscopic 3D video box */
 { MKTAG('s','v','3','d'), mov_read_sv3d }, /* spherical video box */
@@ -5591,6 +6749,8 @@
 { MKTAG('S','m','D','m'), mov_read_smdm },
 { MKTAG('C','o','L','L'), mov_read_coll },
 { MKTAG('v','p','c','C'), mov_read_vpcc },
+{ MKTAG('m','d','c','v'), mov_read_mdcv },
+{ MKTAG('c','l','l','i'), mov_read_clli },
 { 0, NULL }
 };
 
@@ -5687,9 +6847,9 @@
                 return err;
             }
             if (c->found_moov && c->found_mdat &&
-                ((!(pb->seekable & AVIO_SEEKABLE_NORMAL) || c->fc->flags & AVFMT_FLAG_IGNIDX || c->fragment_index_complete) ||
+                ((!(pb->seekable & AVIO_SEEKABLE_NORMAL) || c->fc->flags & AVFMT_FLAG_IGNIDX || c->frag_index.complete) ||
                  start_pos + a.size == avio_size(pb))) {
-                if (!(pb->seekable & AVIO_SEEKABLE_NORMAL) || c->fc->flags & AVFMT_FLAG_IGNIDX || c->fragment_index_complete)
+                if (!(pb->seekable & AVIO_SEEKABLE_NORMAL) || c->fc->flags & AVFMT_FLAG_IGNIDX || c->frag_index.complete)
                     c->next_root_atom = start_pos + a.size;
                 c->atom_depth --;
                 return 0;
@@ -5967,6 +7127,18 @@
     return 0;
 }
 
+static void mov_free_encryption_index(MOVEncryptionIndex **index) {
+    int i;
+    if (!index || !*index) return;
+    for (i = 0; i < (*index)->nb_encrypted_samples; i++) {
+        av_encryption_info_free((*index)->encrypted_samples[i]);
+    }
+    av_freep(&(*index)->encrypted_samples);
+    av_freep(&(*index)->auxiliary_info_sizes);
+    av_freep(&(*index)->auxiliary_offsets);
+    av_freep(index);
+}
+
 static int mov_read_close(AVFormatContext *s)
 {
     MOVContext *mov = s->priv_data;
@@ -6009,8 +7181,8 @@
         av_freep(&sc->extradata);
         av_freep(&sc->extradata_size);
 
-        av_freep(&sc->cenc.auxiliary_info);
-        av_freep(&sc->cenc.auxiliary_info_sizes);
+        mov_free_encryption_index(&sc->cenc.encryption_index);
+        av_encryption_info_free(sc->cenc.default_encrypted_sample);
         av_aes_ctr_free(sc->cenc.aes_ctr);
 
         av_freep(&sc->stereo3d);
@@ -6034,12 +7206,14 @@
     av_freep(&mov->trex_data);
     av_freep(&mov->bitrates);
 
-    for (i = 0; i < mov->fragment_index_count; i++) {
-        MOVFragmentIndex* index = mov->fragment_index_data[i];
-        av_freep(&index->items);
-        av_freep(&mov->fragment_index_data[i]);
+    for (i = 0; i < mov->frag_index.nb_items; i++) {
+        MOVFragmentStreamInfo *frag = mov->frag_index.item[i].stream_info;
+        for (j = 0; j < mov->frag_index.item[i].nb_stream_info; j++) {
+            mov_free_encryption_index(&frag[j].encryption_index);
+        }
+        av_freep(&mov->frag_index.item[i].stream_info);
     }
-    av_freep(&mov->fragment_index_data);
+    av_freep(&mov->frag_index.item);
 
     av_freep(&mov->aes_decrypt);
     av_freep(&mov->chapter_tracks);
@@ -6083,48 +7257,27 @@
 
 static int read_tfra(MOVContext *mov, AVIOContext *f)
 {
-    MOVFragmentIndex* index = NULL;
     int version, fieldlength, i, j;
     int64_t pos = avio_tell(f);
     uint32_t size = avio_rb32(f);
-    void *tmp;
+    unsigned track_id, item_count;
 
     if (avio_rb32(f) != MKBETAG('t', 'f', 'r', 'a')) {
         return 1;
     }
     av_log(mov->fc, AV_LOG_VERBOSE, "found tfra\n");
-    index = av_mallocz(sizeof(MOVFragmentIndex));
-    if (!index) {
-        return AVERROR(ENOMEM);
-    }
-
-    tmp = av_realloc_array(mov->fragment_index_data,
-                           mov->fragment_index_count + 1,
-                           sizeof(MOVFragmentIndex*));
-    if (!tmp) {
-        av_freep(&index);
-        return AVERROR(ENOMEM);
-    }
-    mov->fragment_index_data = tmp;
-    mov->fragment_index_data[mov->fragment_index_count++] = index;
 
     version = avio_r8(f);
     avio_rb24(f);
-    index->track_id = avio_rb32(f);
+    track_id = avio_rb32(f);
     fieldlength = avio_rb32(f);
-    index->item_count = avio_rb32(f);
-    index->items = av_mallocz_array(
-            index->item_count, sizeof(MOVFragmentIndexItem));
-    if (!index->items) {
-        index->item_count = 0;
-        return AVERROR(ENOMEM);
-    }
-    for (i = 0; i < index->item_count; i++) {
+    item_count = avio_rb32(f);
+    for (i = 0; i < item_count; i++) {
         int64_t time, offset;
+        int index;
+        MOVFragmentStreamInfo * frag_stream_info;
 
         if (avio_feof(f)) {
-            index->item_count = 0;
-            av_freep(&index->items);
             return AVERROR_INVALIDDATA;
         }
 
@@ -6135,8 +7288,16 @@
             time   = avio_rb32(f);
             offset = avio_rb32(f);
         }
-        index->items[i].time = time;
-        index->items[i].moof_offset = offset;
+
+        // The first sample of each stream in a fragment is always a random
+        // access sample.  So it's entry in the tfra can be used as the
+        // initial PTS of the fragment.
+        index = update_frag_index(mov, offset);
+        frag_stream_info = get_frag_stream_info(&mov->frag_index, index, track_id);
+        if (frag_stream_info &&
+            frag_stream_info->first_tfra_pts == AV_NOPTS_VALUE)
+            frag_stream_info->first_tfra_pts = time;
+
         for (j = 0; j < ((fieldlength >> 4) & 3) + 1; j++)
             avio_r8(f);
         for (j = 0; j < ((fieldlength >> 2) & 3) + 1; j++)
@@ -6218,13 +7379,13 @@
 
     /* check MOV header */
     do {
-    if (mov->moov_retry)
-        avio_seek(pb, 0, SEEK_SET);
-    if ((err = mov_read_default(mov, pb, atom)) < 0) {
-        av_log(s, AV_LOG_ERROR, "error reading header\n");
-        mov_read_close(s);
-        return err;
-    }
+        if (mov->moov_retry)
+            avio_seek(pb, 0, SEEK_SET);
+        if ((err = mov_read_default(mov, pb, atom)) < 0) {
+            av_log(s, AV_LOG_ERROR, "error reading header\n");
+            mov_read_close(s);
+            return err;
+        }
     } while ((pb->seekable & AVIO_SEEKABLE_NORMAL) && !mov->found_moov && !mov->moov_retry++);
     if (!mov->found_moov) {
         av_log(s, AV_LOG_ERROR, "moov atom not found\n");
@@ -6397,12 +7558,9 @@
     }
     ff_configure_buffers_for_index(s, AV_TIME_BASE);
 
-    for (i = 0; i < mov->fragment_index_count; i++) {
-        MOVFragmentIndex *idx = mov->fragment_index_data[i];
-        for (j = 0; j < idx->item_count; j++)
-            if (idx->items[j].moof_offset <= mov->fragment.moof_offset)
-                idx->items[j].headers_read = 1;
-    }
+    for (i = 0; i < mov->frag_index.nb_items; i++)
+        if (mov->frag_index.item[i].moof_offset <= mov->fragment.moof_offset)
+            mov->frag_index.item[i].headers_read = 1;
 
     return 0;
 }
@@ -6440,43 +7598,29 @@
     return 1;
 }
 
-static int mov_switch_root(AVFormatContext *s, int64_t target)
+static int mov_switch_root(AVFormatContext *s, int64_t target, int index)
 {
     int ret;
     MOVContext *mov = s->priv_data;
-    int i, j;
-    int already_read = 0;
 
+    if (index >= 0 && index < mov->frag_index.nb_items)
+        target = mov->frag_index.item[index].moof_offset;
     if (avio_seek(s->pb, target, SEEK_SET) != target) {
         av_log(mov->fc, AV_LOG_ERROR, "root atom offset 0x%"PRIx64": partial file\n", target);
         return AVERROR_INVALIDDATA;
     }
 
     mov->next_root_atom = 0;
-
-    for (i = 0; i < mov->fragment_index_count; i++) {
-        MOVFragmentIndex *index = mov->fragment_index_data[i];
-        int found = 0;
-        for (j = 0; j < index->item_count; j++) {
-            MOVFragmentIndexItem *item = &index->items[j];
-            if (found) {
-                mov->next_root_atom = item->moof_offset;
-                break; // Advance to next index in outer loop
-            } else if (item->moof_offset == target) {
-                index->current_item = FFMIN(j, index->current_item);
-                if (item->headers_read)
-                    already_read = 1;
-                item->headers_read = 1;
-                found = 1;
-            }
-        }
-        if (!found)
-            index->current_item = 0;
+    if (index < 0 || index >= mov->frag_index.nb_items)
+        index = search_frag_moof_offset(&mov->frag_index, target);
+    if (index < mov->frag_index.nb_items) {
+        if (index + 1 < mov->frag_index.nb_items)
+            mov->next_root_atom = mov->frag_index.item[index + 1].moof_offset;
+        if (mov->frag_index.item[index].headers_read)
+            return 0;
+        mov->frag_index.item[index].headers_read = 1;
     }
 
-    if (already_read)
-        return 0;
-
     mov->found_mdat = 0;
 
     ret = mov_read_default(mov, s->pb, (MOVAtom){ AV_RL32("root"), INT64_MAX });
@@ -6526,7 +7670,7 @@
     if (!sample || (mov->next_root_atom && sample->pos > mov->next_root_atom)) {
         if (!mov->next_root_atom)
             return AVERROR_EOF;
-        if ((ret = mov_switch_root(s, mov->next_root_atom)) < 0)
+        if ((ret = mov_switch_root(s, mov->next_root_atom, -1)) < 0)
             return ret;
         goto retry;
     }
@@ -6607,7 +7751,9 @@
     } else {
         int64_t next_dts = (sc->current_sample < st->nb_index_entries) ?
             st->index_entries[sc->current_sample].timestamp : st->duration;
-        pkt->duration = next_dts - pkt->dts;
+
+        if (next_dts >= pkt->dts)
+            pkt->duration = next_dts - pkt->dts;
         pkt->pts = pkt->dts;
     }
     if (st->discard == AVDISCARD_ALL)
@@ -6637,12 +7783,9 @@
     if (mov->aax_mode)
         aax_filter(pkt->data, pkt->size, mov);
 
-    if (sc->cenc.aes_ctr) {
-        ret = cenc_filter(mov, sc, current_index, pkt->data, pkt->size);
-        if (ret) {
-            return ret;
-        }
-    }
+    ret = cenc_filter(mov, sc, pkt, current_index);
+    if (ret < 0)
+        return ret;
 
     return 0;
 }
@@ -6650,25 +7793,18 @@
 static int mov_seek_fragment(AVFormatContext *s, AVStream *st, int64_t timestamp)
 {
     MOVContext *mov = s->priv_data;
-    MOVStreamContext *sc = st->priv_data;
-    int i, j;
+    int index;
 
-    if (!mov->fragment_index_complete)
+    if (!mov->frag_index.complete)
         return 0;
 
-    for (i = 0; i < mov->fragment_index_count; i++) {
-        if (mov->fragment_index_data[i]->track_id == st->id || !sc->has_sidx) {
-            MOVFragmentIndex *index = mov->fragment_index_data[i];
-            for (j = index->item_count - 1; j >= 0; j--) {
-                if (index->items[j].time <= timestamp) {
-                    if (index->items[j].headers_read)
-                        return 0;
-
-                    return mov_switch_root(s, index->items[j].moof_offset);
-                }
-            }
-        }
-    }
+    index = search_frag_timestamp(&mov->frag_index, st, timestamp);
+    if (index < 0)
+        index = 0;
+    if (!mov->frag_index.item[index].headers_read)
+        return mov_switch_root(s, -1, index);
+    if (index + 1 < mov->frag_index.nb_items)
+        mov->next_root_atom = mov->frag_index.item[index + 1].moof_offset;
 
     return 0;
 }
@@ -6676,10 +7812,14 @@
 static int mov_seek_stream(AVFormatContext *s, AVStream *st, int64_t timestamp, int flags)
 {
     MOVStreamContext *sc = st->priv_data;
-    int sample, time_sample;
+    int sample, time_sample, ret;
     unsigned int i;
 
-    int ret = mov_seek_fragment(s, st, timestamp);
+    // Here we consider timestamp to be PTS, hence try to offset it so that we
+    // can search over the DTS timeline.
+    timestamp -= (sc->min_corrected_pts + sc->dts_shift);
+
+    ret = mov_seek_fragment(s, st, timestamp);
     if (ret < 0)
         return ret;
 
@@ -6708,12 +7848,13 @@
     /* adjust stsd index */
     time_sample = 0;
     for (i = 0; i < sc->stsc_count; i++) {
-        int next = time_sample + mov_get_stsc_samples(sc, i);
+        int64_t next = time_sample + mov_get_stsc_samples(sc, i);
         if (next > sc->current_sample) {
             sc->stsc_index = i;
             sc->stsc_sample = sc->current_sample - time_sample;
             break;
         }
+        av_assert0(next == (int)next);
         time_sample = next;
     }
 

diff --git a/libavformat/mov_esds.c b/libavformat/mov_esds.c
new file mode 100644
index 0000000..a444d96
--- /dev/null
+++ b/libavformat/mov_esds.c

@@ -0,0 +1,44 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "avio.h"
+#include "isom.h"
+
+int ff_mov_read_esds(AVFormatContext *fc, AVIOContext *pb)
+{
+    AVStream *st;
+    int tag, ret = 0;
+
+    if (fc->nb_streams < 1)
+        return 0;
+    st = fc->streams[fc->nb_streams-1];
+
+    avio_rb32(pb); /* version + flags */
+    ff_mp4_read_descr(fc, pb, &tag);
+    if (tag == MP4ESDescrTag) {
+        ff_mp4_parse_es_descr(pb, NULL);
+    } else
+        avio_rb16(pb); /* ID */
+
+    ff_mp4_read_descr(fc, pb, &tag);
+    if (tag == MP4DecConfigDescrTag)
+        ret = ff_mp4_read_dec_config_descr(fc, st, pb);
+
+    return ret;
+}

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 2838286..33978ee 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c

@@ -30,8 +30,9 @@
 #include "riff.h"
 #include "avio.h"
 #include "isom.h"
+#include "av1.h"
 #include "avc.h"
-#include "libavcodec/ac3_parser.h"
+#include "libavcodec/ac3_parser_internal.h"
 #include "libavcodec/dnxhddata.h"
 #include "libavcodec/flac.h"
 #include "libavcodec/get_bits.h"
@@ -62,6 +63,7 @@
     { "moov_size", "maximum moov size so it can be placed at the begin", offsetof(MOVMuxContext, reserved_moov_size), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, 0 },
     { "empty_moov", "Make the initial moov atom empty", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_EMPTY_MOOV}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "frag_keyframe", "Fragment at video keyframes", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_FRAG_KEYFRAME}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
+    { "frag_every_frame", "Fragment at every frame", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_FRAG_EVERY_FRAME}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "separate_moof", "Write separate moof/mdat atoms for each track", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_SEPARATE_MOOF}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "frag_custom", "Flush fragments on caller requests", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_FRAG_CUSTOM}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "isml", "Create a live smooth streaming feed (for pushing to a publishing point)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_ISML}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
@@ -97,6 +99,10 @@
     { "encryption_kid", "The media encryption key identifier (hex)", offsetof(MOVMuxContext, encryption_kid), AV_OPT_TYPE_BINARY, .flags = AV_OPT_FLAG_ENCODING_PARAM },
     { "use_stream_ids_as_track_ids", "use stream ids as track ids", offsetof(MOVMuxContext, use_stream_ids_as_track_ids), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
     { "write_tmcd", "force or disable writing tmcd", offsetof(MOVMuxContext, write_tmcd), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AV_OPT_FLAG_ENCODING_PARAM},
+    { "write_prft", "Write producer reference time box with specified time source", offsetof(MOVMuxContext, write_prft), AV_OPT_TYPE_INT, {.i64 = MOV_PRFT_NONE}, 0, MOV_PRFT_NB-1, AV_OPT_FLAG_ENCODING_PARAM, "prft"},
+    { "wallclock", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MOV_PRFT_SRC_WALLCLOCK}, 0, 0, AV_OPT_FLAG_ENCODING_PARAM, "prft"},
+    { "pts", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MOV_PRFT_SRC_PTS}, 0, 0, AV_OPT_FLAG_ENCODING_PARAM, "prft"},
+    { "empty_hdlr_name", "write zero-length name string in hdlr atoms within mdia and minf atoms", offsetof(MOVMuxContext, empty_hdlr_name), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
     { NULL },
 };
 
@@ -139,6 +145,22 @@
     return 0;
 }
 
+static int is_cover_image(const AVStream *st)
+{
+    /* Eg. AV_DISPOSITION_ATTACHED_PIC | AV_DISPOSITION_TIMED_THUMBNAILS
+     * is encoded as sparse video track */
+    return st && st->disposition == AV_DISPOSITION_ATTACHED_PIC;
+}
+
+static int rtp_hinting_needed(const AVStream *st)
+{
+    /* Add hint tracks for each real audio and video stream */
+    if (is_cover_image(st))
+        return 0;
+    return st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO ||
+           st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO;
+}
+
 /* Chunk offset atom */
 static int mov_write_stco_tag(AVIOContext *pb, MOVTrack *track)
 {
@@ -252,6 +274,30 @@
     return update_size(pb, pos);
 }
 
+/* Sample dependency atom */
+static int mov_write_sdtp_tag(AVIOContext *pb, MOVTrack *track)
+{
+    int i;
+    uint8_t leading, dependent, reference, redundancy;
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); // size
+    ffio_wfourcc(pb, "sdtp");
+    avio_wb32(pb, 0); // version & flags
+    for (i = 0; i < track->entry; i++) {
+        dependent = MOV_SAMPLE_DEPENDENCY_YES;
+        leading = reference = redundancy = MOV_SAMPLE_DEPENDENCY_UNKNOWN;
+        if (track->cluster[i].flags & MOV_DISPOSABLE_SAMPLE) {
+            reference = MOV_SAMPLE_DEPENDENCY_NO;
+        }
+        if (track->cluster[i].flags & MOV_SYNC_SAMPLE) {
+            dependent = MOV_SAMPLE_DEPENDENCY_NO;
+        }
+        avio_w8(pb, (leading << 6)   | (dependent << 4) |
+                    (reference << 2) | redundancy);
+    }
+    return update_size(pb, pos);
+}
+
 static int mov_write_amr_tag(AVIOContext *pb, MOVTrack *track)
 {
     avio_wb32(pb, 0x11); /* size */
@@ -345,23 +391,22 @@
 #if CONFIG_AC3_PARSER
 static int handle_eac3(MOVMuxContext *mov, AVPacket *pkt, MOVTrack *track)
 {
-    GetBitContext gbc;
-    AC3HeaderInfo tmp, *hdr = &tmp;
+    AC3HeaderInfo *hdr = NULL;
     struct eac3_info *info;
-    int num_blocks;
+    int num_blocks, ret;
 
     if (!track->eac3_priv && !(track->eac3_priv = av_mallocz(sizeof(*info))))
         return AVERROR(ENOMEM);
     info = track->eac3_priv;
 
-    init_get_bits(&gbc, pkt->data, pkt->size * 8);
-    if (avpriv_ac3_parse_header(&gbc, &hdr) < 0) {
+    if (avpriv_ac3_parse_header(&hdr, pkt->data, pkt->size) < 0) {
         /* drop the packets until we see a good one */
         if (!track->entry) {
-            av_log(mov, AV_LOG_WARNING, "Dropping invalid packet from start of the stream\n");
-            return 0;
-        }
-        return AVERROR_INVALIDDATA;
+            av_log(mov->fc, AV_LOG_WARNING, "Dropping invalid packet from start of the stream\n");
+            ret = 0;
+        } else
+            ret = AVERROR_INVALIDDATA;
+        goto end;
     }
 
     info->data_rate = FFMAX(info->data_rate, hdr->bit_rate / 1000);
@@ -369,25 +414,36 @@
 
     if (!info->ec3_done) {
         /* AC-3 substream must be the first one */
-        if (hdr->bitstream_id <= 10 && hdr->substreamid != 0)
-            return AVERROR(EINVAL);
+        if (hdr->bitstream_id <= 10 && hdr->substreamid != 0) {
+            ret = AVERROR(EINVAL);
+            goto end;
+        }
 
         /* this should always be the case, given that our AC-3 parser
          * concatenates dependent frames to their independent parent */
         if (hdr->frame_type == EAC3_FRAME_TYPE_INDEPENDENT) {
             /* substream ids must be incremental */
-            if (hdr->substreamid > info->num_ind_sub + 1)
-                return AVERROR(EINVAL);
+            if (hdr->substreamid > info->num_ind_sub + 1) {
+                ret = AVERROR(EINVAL);
+                goto end;
+            }
 
             if (hdr->substreamid == info->num_ind_sub + 1) {
                 //info->num_ind_sub++;
-                avpriv_request_sample(track->par, "Multiple independent substreams");
-                return AVERROR_PATCHWELCOME;
+                avpriv_request_sample(mov->fc, "Multiple independent substreams");
+                ret = AVERROR_PATCHWELCOME;
+                goto end;
             } else if (hdr->substreamid < info->num_ind_sub ||
                        hdr->substreamid == 0 && info->substream[0].bsid) {
                 info->ec3_done = 1;
                 goto concatenate;
             }
+        } else {
+            if (hdr->substreamid != 0) {
+                avpriv_request_sample(mov->fc, "Multiple non EAC3 independent substreams");
+                ret = AVERROR_PATCHWELCOME;
+                goto end;
+            }
         }
 
         /* fill the info needed for the "dec3" atom */
@@ -403,16 +459,20 @@
             int parent = hdr->substreamid;
 
             while (cumul_size != pkt->size) {
+                GetBitContext gbc;
                 int i;
-                init_get_bits(&gbc, pkt->data + cumul_size, (pkt->size - cumul_size) * 8);
-                if (avpriv_ac3_parse_header(&gbc, &hdr) < 0)
-                    return AVERROR_INVALIDDATA;
-                if (hdr->frame_type != EAC3_FRAME_TYPE_DEPENDENT)
-                    return AVERROR(EINVAL);
-                cumul_size += hdr->frame_size;
+                ret = avpriv_ac3_parse_header(&hdr, pkt->data + cumul_size, pkt->size - cumul_size);
+                if (ret < 0)
+                    goto end;
+                if (hdr->frame_type != EAC3_FRAME_TYPE_DEPENDENT) {
+                    ret = AVERROR(EINVAL);
+                    goto end;
+                }
                 info->substream[parent].num_dep_sub++;
+                ret /= 8;
 
                 /* header is parsed up to lfeon, but custom channel map may be needed */
+                init_get_bits8(&gbc, pkt->data + cumul_size + ret, pkt->size - cumul_size - ret);
                 /* skip bsid */
                 skip_bits(&gbc, 5);
                 /* skip volume control params */
@@ -427,42 +487,46 @@
                     info->substream[parent].chan_loc |= (get_bits(&gbc, 16) >> 5) & 0x1f;
                 else
                     info->substream[parent].chan_loc |= hdr->channel_mode;
+                cumul_size += hdr->frame_size;
             }
         }
     }
 
 concatenate:
-    if (!info->num_blocks && num_blocks == 6)
-        return pkt->size;
-    else if (info->num_blocks + num_blocks > 6)
-        return AVERROR_INVALIDDATA;
+    if (!info->num_blocks && num_blocks == 6) {
+        ret = pkt->size;
+        goto end;
+    }
+    else if (info->num_blocks + num_blocks > 6) {
+        ret = AVERROR_INVALIDDATA;
+        goto end;
+    }
 
     if (!info->num_blocks) {
-        int ret = av_packet_ref(&info->pkt, pkt);
-        if (ret < 0)
-            return ret;
-        info->num_blocks = num_blocks;
-        return 0;
+        ret = av_packet_ref(&info->pkt, pkt);
+        if (!ret)
+            info->num_blocks = num_blocks;
+        goto end;
     } else {
-        int ret;
         if ((ret = av_grow_packet(&info->pkt, pkt->size)) < 0)
-            return ret;
+            goto end;
         memcpy(info->pkt.data + info->pkt.size - pkt->size, pkt->data, pkt->size);
         info->num_blocks += num_blocks;
         info->pkt.duration += pkt->duration;
         if ((ret = av_copy_packet_side_data(&info->pkt, pkt)) < 0)
-            return ret;
+            goto end;
         if (info->num_blocks != 6)
-            return 0;
+            goto end;
         av_packet_unref(pkt);
-        ret = av_packet_ref(pkt, &info->pkt);
-        if (ret < 0)
-            return ret;
-        av_packet_unref(&info->pkt);
+        av_packet_move_ref(pkt, &info->pkt);
         info->num_blocks = 0;
     }
+    ret = pkt->size;
 
-    return pkt->size;
+end:
+    av_free(hdr);
+
+    return ret;
 }
 #endif
 
@@ -959,7 +1023,7 @@
     uint32_t tag = track->tag;
 
     if (track->mode == MODE_MOV) {
-        if (track->timescale > UINT16_MAX) {
+        if (track->timescale > UINT16_MAX || !track->par->channels) {
             if (mov_get_lpcm_flags(track->par->codec_id))
                 tag = AV_RL32("lpcm");
             version = 2;
@@ -1100,6 +1164,16 @@
     return 0xf;
 }
 
+static int mov_write_av1c_tag(AVIOContext *pb, MOVTrack *track)
+{
+    int64_t pos = avio_tell(pb);
+
+    avio_wb32(pb, 0);
+    ffio_wfourcc(pb, "av1C");
+    ff_isom_write_av1c(pb, track->vos_data, track->vos_len);
+    return update_size(pb, pos);
+}
+
 static int mov_write_avcc_tag(AVIOContext *pb, MOVTrack *track)
 {
     int64_t pos = avio_tell(pb);
@@ -1461,9 +1535,9 @@
     return tag;
 }
 
-static int mov_get_codec_tag(AVFormatContext *s, MOVTrack *track)
+static unsigned int mov_get_codec_tag(AVFormatContext *s, MOVTrack *track)
 {
-    int tag = track->par->codec_tag;
+    unsigned int tag = track->par->codec_tag;
 
     if (!tag || (s->strict_std_compliance >= FF_COMPLIANCE_NORMAL &&
                  (track->par->codec_id == AV_CODEC_ID_DVVIDEO ||
@@ -1508,29 +1582,50 @@
     return tag;
 }
 
-static int mov_find_codec_tag(AVFormatContext *s, MOVTrack *track)
-{
-    int tag;
+static const AVCodecTag codec_cover_image_tags[] = {
+    { AV_CODEC_ID_MJPEG,  0xD },
+    { AV_CODEC_ID_PNG,    0xE },
+    { AV_CODEC_ID_BMP,    0x1B },
+    { AV_CODEC_ID_NONE, 0 },
+};
 
-    if (track->mode == MODE_MP4 || track->mode == MODE_PSP)
-        tag = track->par->codec_tag;
-    else if (track->mode == MODE_ISM)
-        tag = track->par->codec_tag;
-    else if (track->mode == MODE_IPOD) {
-        if (!av_match_ext(s->filename, "m4a") &&
-            !av_match_ext(s->filename, "m4v") &&
-            !av_match_ext(s->filename, "m4b"))
+static unsigned int validate_codec_tag(const AVCodecTag *const *tags,
+                                       unsigned int tag, int codec_id)
+{
+    int i;
+
+    /**
+     * Check that tag + id is in the table
+     */
+    for (i = 0; tags && tags[i]; i++) {
+        const AVCodecTag *codec_tags = tags[i];
+        while (codec_tags->id != AV_CODEC_ID_NONE) {
+            if (avpriv_toupper4(codec_tags->tag) == avpriv_toupper4(tag) &&
+                codec_tags->id == codec_id)
+                return codec_tags->tag;
+            codec_tags++;
+        }
+    }
+    return 0;
+}
+
+static unsigned int mov_find_codec_tag(AVFormatContext *s, MOVTrack *track)
+{
+    if (is_cover_image(track->st))
+        return ff_codec_get_tag(codec_cover_image_tags, track->par->codec_id);
+
+    if (track->mode == MODE_IPOD)
+        if (!av_match_ext(s->url, "m4a") &&
+            !av_match_ext(s->url, "m4v") &&
+            !av_match_ext(s->url, "m4b"))
             av_log(s, AV_LOG_WARNING, "Warning, extension is not .m4a nor .m4v "
                    "Quicktime/Ipod might not play the file\n");
-        tag = track->par->codec_tag;
-    } else if (track->mode & MODE_3GP)
-        tag = track->par->codec_tag;
-    else if (track->mode == MODE_F4V)
-        tag = track->par->codec_tag;
-    else
-        tag = mov_get_codec_tag(s, track);
 
-    return tag;
+    if (track->mode == MODE_MOV) {
+        return mov_get_codec_tag(s, track);
+    } else
+        return validate_codec_tag(s->oformat->codec_tag, track->par->codec_tag,
+                                  track->par->codec_id);
 }
 
 /** Write uuid atom.
@@ -1671,6 +1766,21 @@
     return update_size(pb, sv3d_pos);
 }
 
+static int mov_write_clap_tag(AVIOContext *pb, MOVTrack *track)
+{
+    avio_wb32(pb, 40);
+    ffio_wfourcc(pb, "clap");
+    avio_wb32(pb, track->par->width); /* apertureWidth_N */
+    avio_wb32(pb, 1); /* apertureWidth_D (= 1) */
+    avio_wb32(pb, track->height); /* apertureHeight_N */
+    avio_wb32(pb, 1); /* apertureHeight_D (= 1) */
+    avio_wb32(pb, 0); /* horizOff_N (= 0) */
+    avio_wb32(pb, 1); /* horizOff_D (= 1) */
+    avio_wb32(pb, 0); /* vertOff_N (= 0) */
+    avio_wb32(pb, 1); /* vertOff_D (= 1) */
+    return 40;
+}
+
 static int mov_write_pasp_tag(AVIOContext *pb, MOVTrack *track)
 {
     AVRational sar;
@@ -1755,23 +1865,30 @@
         ffio_wfourcc(pb, "nclc");
     switch (track->par->color_primaries) {
     case AVCOL_PRI_BT709:     avio_wb16(pb, 1); break;
+    case AVCOL_PRI_BT470BG:   avio_wb16(pb, 5); break;
     case AVCOL_PRI_SMPTE170M:
     case AVCOL_PRI_SMPTE240M: avio_wb16(pb, 6); break;
-    case AVCOL_PRI_BT470BG:   avio_wb16(pb, 5); break;
+    case AVCOL_PRI_BT2020:    avio_wb16(pb, 9); break;
+    case AVCOL_PRI_SMPTE431:  avio_wb16(pb, 11); break;
+    case AVCOL_PRI_SMPTE432:  avio_wb16(pb, 12); break;
     default:                  avio_wb16(pb, 2);
     }
     switch (track->par->color_trc) {
-    case AVCOL_TRC_BT709:     avio_wb16(pb, 1); break;
-    case AVCOL_TRC_SMPTE170M: avio_wb16(pb, 1); break; // remapped
-    case AVCOL_TRC_SMPTE240M: avio_wb16(pb, 7); break;
-    default:                  avio_wb16(pb, 2);
+    case AVCOL_TRC_BT709:        avio_wb16(pb, 1); break;
+    case AVCOL_TRC_SMPTE170M:    avio_wb16(pb, 1); break; // remapped
+    case AVCOL_TRC_SMPTE240M:    avio_wb16(pb, 7); break;
+    case AVCOL_TRC_SMPTEST2084:  avio_wb16(pb, 16); break;
+    case AVCOL_TRC_SMPTE428:     avio_wb16(pb, 17); break;
+    case AVCOL_TRC_ARIB_STD_B67: avio_wb16(pb, 18); break;
+    default:                     avio_wb16(pb, 2);
     }
     switch (track->par->color_space) {
-    case AVCOL_SPC_BT709:     avio_wb16(pb, 1); break;
+    case AVCOL_SPC_BT709:      avio_wb16(pb, 1); break;
     case AVCOL_SPC_BT470BG:
-    case AVCOL_SPC_SMPTE170M: avio_wb16(pb, 6); break;
-    case AVCOL_SPC_SMPTE240M: avio_wb16(pb, 7); break;
-    default:                  avio_wb16(pb, 2);
+    case AVCOL_SPC_SMPTE170M:  avio_wb16(pb, 6); break;
+    case AVCOL_SPC_SMPTE240M:  avio_wb16(pb, 7); break;
+    case AVCOL_SPC_BT2020_NCL: avio_wb16(pb, 9); break;
+    default:                   avio_wb16(pb, 2);
     }
 
     if (track->mode == MODE_MP4) {
@@ -1817,6 +1934,13 @@
     char compressor_name[32] = { 0 };
     int avid = 0;
 
+    int uncompressed_ycbcr = ((track->par->codec_id == AV_CODEC_ID_RAWVIDEO && track->par->format == AV_PIX_FMT_UYVY422)
+                           || (track->par->codec_id == AV_CODEC_ID_RAWVIDEO && track->par->format == AV_PIX_FMT_YUYV422)
+                           ||  track->par->codec_id == AV_CODEC_ID_V308
+                           ||  track->par->codec_id == AV_CODEC_ID_V408
+                           ||  track->par->codec_id == AV_CODEC_ID_V410
+                           ||  track->par->codec_id == AV_CODEC_ID_V210);
+
     avio_wb32(pb, 0); /* size */
     if (mov->encryption_scheme != MOV_ENC_NONE) {
         ffio_wfourcc(pb, "encv");
@@ -1827,11 +1951,15 @@
     avio_wb16(pb, 0); /* Reserved */
     avio_wb16(pb, 1); /* Data-reference index */
 
-    avio_wb16(pb, 0); /* Codec stream version */
+    if (uncompressed_ycbcr) {
+        avio_wb16(pb, 2); /* Codec stream version */
+    } else {
+        avio_wb16(pb, 0); /* Codec stream version */
+    }
     avio_wb16(pb, 0); /* Codec stream revision (=0) */
     if (track->mode == MODE_MOV) {
         ffio_wfourcc(pb, "FFMP"); /* Vendor */
-        if (track->par->codec_id == AV_CODEC_ID_RAWVIDEO) {
+        if (track->par->codec_id == AV_CODEC_ID_RAWVIDEO || uncompressed_ycbcr) {
             avio_wb32(pb, 0); /* Temporal Quality */
             avio_wb32(pb, 0x400); /* Spatial Quality = lossless*/
         } else {
@@ -1855,7 +1983,10 @@
     avio_w8(pb, strlen(compressor_name));
     avio_write(pb, compressor_name, 31);
 
-    if (track->mode == MODE_MOV && track->par->bits_per_coded_sample)
+    if (track->mode == MODE_MOV &&
+       (track->par->codec_id == AV_CODEC_ID_V410 || track->par->codec_id == AV_CODEC_ID_V210))
+        avio_wb16(pb, 0x18);
+    else if (track->mode == MODE_MOV && track->par->bits_per_coded_sample)
         avio_wb16(pb, track->par->bits_per_coded_sample |
                   (track->par->format == AV_PIX_FMT_GRAY8 ? 0x20 : 0));
     else
@@ -1900,6 +2031,8 @@
             mov_write_uuid_tag_ipod(pb);
     } else if (track->par->codec_id == AV_CODEC_ID_VP9) {
         mov_write_vpcc_tag(mov->fc, pb, track);
+    } else if (track->par->codec_id == AV_CODEC_ID_AV1) {
+        mov_write_av1c_tag(pb, track);
     } else if (track->par->codec_id == AV_CODEC_ID_VC1 && track->vos_len > 0)
         mov_write_dvc1_tag(pb, track);
     else if (track->par->codec_id == AV_CODEC_ID_VP6F ||
@@ -1955,6 +2088,10 @@
         mov_write_pasp_tag(pb, track);
     }
 
+    if (uncompressed_ycbcr){
+        mov_write_clap_tag(pb, track);
+    }
+
     if (mov->encryption_scheme != MOV_ENC_NONE) {
         ff_mov_cenc_write_sinf_tag(track, pb, mov->encryption_kid);
     }
@@ -2239,9 +2376,9 @@
                decoded. */
             if (roll_samples_remaining > 0)
                 distance = 0;
-            /* Verify distance is a minimum of 2 (60ms) packets and a maximum of
-               32 (2.5ms) packets. */
-            av_assert0(distance == 0 || (distance >= 2 && distance <= 32));
+            /* Verify distance is a maximum of 32 (2.5ms) packets. */
+            if (distance > 32)
+                return AVERROR_INVALIDDATA;
             if (i && distance == sgpd_entries[entries].roll_distance) {
                 sgpd_entries[entries].count++;
             } else {
@@ -2305,6 +2442,8 @@
          track->par->codec_tag == MKTAG('r','t','p',' ')) &&
         track->has_keyframes && track->has_keyframes < track->entry)
         mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE);
+    if (track->par->codec_type == AVMEDIA_TYPE_VIDEO && track->has_disposable)
+        mov_write_sdtp_tag(pb, track);
     if (track->mode == MODE_MOV && track->flags & MOV_TRACK_STPS)
         mov_write_stss_tag(pb, track, MOV_PARTIAL_SYNC_SAMPLE);
     if (track->par->codec_type == AVMEDIA_TYPE_VIDEO &&
@@ -2443,6 +2582,7 @@
 
 static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track)
 {
+    MOVMuxContext *mov = s->priv_data;
     const char *hdlr, *descr = NULL, *hdlr_type = NULL;
     int64_t pos = avio_tell(pb);
 
@@ -2491,12 +2631,15 @@
             // of the track. So if an alternate handler description is
             // specified, use it.
             AVDictionaryEntry *t;
-            t = av_dict_get(track->st->metadata, "handler", NULL, 0);
+            t = av_dict_get(track->st->metadata, "handler_name", NULL, 0);
             if (t && utf8len(t->value))
                 descr = t->value;
         }
     }
 
+    if (mov->empty_hdlr_name) /* expressly allowed by QTFF and not prohibited in ISO 14496-12 8.4.3.3 */
+        descr = "";
+
     avio_wb32(pb, 0); /* size */
     ffio_wfourcc(pb, "hdlr");
     avio_wb32(pb, 0); /* Version & flags */
@@ -2555,7 +2698,7 @@
     } else if (track->tag == MKTAG('g','p','m','d')) {
         mov_write_gmhd_tag(pb, track);
     }
-    if (track->mode == MODE_MOV) /* FIXME: Why do it for MODE_MOV only ? */
+    if (track->mode == MODE_MOV) /* ISO 14496-12 8.4.3.1 specifies hdlr only within mdia or meta boxes */
         mov_write_hdlr_tag(s, pb, NULL);
     mov_write_dinf_tag(pb);
     if ((ret = mov_write_stbl_tag(s, pb, mov, track)) < 0)
@@ -2945,7 +3088,7 @@
     if (ret < 0)
         return ret;
 
-    if (mov->mode & MODE_MP4)
+    if (mov->mode & (MODE_MP4|MODE_MOV))
         mov_write_track_metadata(pb_buf, st, "name", "title");
 
     if ((size = avio_close_dyn_buf(pb_buf, &buf)) > 0) {
@@ -3334,6 +3477,33 @@
     return size;
 }
 
+static int mov_write_covr(AVIOContext *pb, AVFormatContext *s)
+{
+    MOVMuxContext *mov = s->priv_data;
+    int64_t pos = 0;
+    int i;
+
+    for (i = 0; i < s->nb_streams; i++) {
+        MOVTrack *trk = &mov->tracks[i];
+
+        if (!is_cover_image(trk->st) || trk->cover_image.size <= 0)
+            continue;
+
+        if (!pos) {
+            pos = avio_tell(pb);
+            avio_wb32(pb, 0);
+            ffio_wfourcc(pb, "covr");
+        }
+        avio_wb32(pb, 16 + trk->cover_image.size);
+        ffio_wfourcc(pb, "data");
+        avio_wb32(pb, trk->tag);
+        avio_wb32(pb , 0);
+        avio_write(pb, trk->cover_image.data, trk->cover_image.size);
+    }
+
+    return pos ? update_size(pb, pos) : 0;
+}
+
 /* iTunes meta data list */
 static int mov_write_ilst_tag(AVIOContext *pb, MOVMuxContext *mov,
                               AVFormatContext *s)
@@ -3353,7 +3523,7 @@
     }
     mov_write_string_metadata(s, pb, "\251cmt", "comment"  , 1);
     mov_write_string_metadata(s, pb, "\251gen", "genre"    , 1);
-    mov_write_string_metadata(s, pb, "\251cpy", "copyright", 1);
+    mov_write_string_metadata(s, pb, "cprt",    "copyright", 1);
     mov_write_string_metadata(s, pb, "\251grp", "grouping" , 1);
     mov_write_string_metadata(s, pb, "\251lyr", "lyrics"   , 1);
     mov_write_string_metadata(s, pb, "desc",    "description",1);
@@ -3368,6 +3538,7 @@
     mov_write_int8_metadata  (s, pb, "hdvd",    "hd_video",  1);
     mov_write_int8_metadata  (s, pb, "pgap",    "gapless_playback",1);
     mov_write_int8_metadata  (s, pb, "cpil",    "compilation", 1);
+    mov_write_covr(pb, s);
     mov_write_trkn_tag(pb, mov, s, 0); // track number
     mov_write_trkn_tag(pb, mov, s, 1); // disc number
     mov_write_tmpo_tag(pb, s);
@@ -3858,7 +4029,7 @@
         AVStream *st = track->st;
         AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL,0);
 
-        if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
+        if (track->par->codec_type == AVMEDIA_TYPE_VIDEO && !is_cover_image(st)) {
             type = "video";
         } else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO) {
             type = "audio";
@@ -4378,6 +4549,49 @@
     return 0;
 }
 
+static int mov_write_prft_tag(AVIOContext *pb, MOVMuxContext *mov, int tracks)
+{
+    int64_t pos = avio_tell(pb), pts_us, ntp_ts;
+    MOVTrack *first_track;
+
+    /* PRFT should be associated with at most one track. So, choosing only the
+     * first track. */
+    if (tracks > 0)
+        return 0;
+    first_track = &(mov->tracks[0]);
+
+    if (!first_track->entry) {
+        av_log(mov->fc, AV_LOG_WARNING, "Unable to write PRFT, no entries in the track\n");
+        return 0;
+    }
+
+    if (first_track->cluster[0].pts == AV_NOPTS_VALUE) {
+        av_log(mov->fc, AV_LOG_WARNING, "Unable to write PRFT, first PTS is invalid\n");
+        return 0;
+    }
+
+    if (mov->write_prft == MOV_PRFT_SRC_WALLCLOCK) {
+        ntp_ts = ff_get_formatted_ntp_time(ff_ntp_time());
+    } else if (mov->write_prft == MOV_PRFT_SRC_PTS) {
+        pts_us = av_rescale_q(first_track->cluster[0].pts,
+                              first_track->st->time_base, AV_TIME_BASE_Q);
+        ntp_ts = ff_get_formatted_ntp_time(pts_us + NTP_OFFSET_US);
+    } else {
+        av_log(mov->fc, AV_LOG_WARNING, "Unsupported PRFT box configuration: %d\n",
+               mov->write_prft);
+        return 0;
+    }
+
+    avio_wb32(pb, 0);                           // Size place holder
+    ffio_wfourcc(pb, "prft");                   // Type
+    avio_w8(pb, 1);                             // Version
+    avio_wb24(pb, 0);                           // Flags
+    avio_wb32(pb, first_track->track_id);       // reference track ID
+    avio_wb64(pb, ntp_ts);                      // NTP time stamp
+    avio_wb64(pb, first_track->cluster[0].pts); //media time
+    return update_size(pb, pos);
+}
+
 static int mov_write_moof_tag(AVIOContext *pb, MOVMuxContext *mov, int tracks,
                               int64_t mdat_size)
 {
@@ -4392,6 +4606,9 @@
     if (mov->flags & FF_MOV_FLAG_DASH && !(mov->flags & FF_MOV_FLAG_GLOBAL_SIDX))
         mov_write_sidx_tags(pb, mov, tracks, moof_size + 8 + mdat_size);
 
+    if (mov->write_prft > MOV_PRFT_NONE && mov->write_prft < MOV_PRFT_NB)
+        mov_write_prft_tag(pb, mov, tracks);
+
     if (mov->flags & FF_MOV_FLAG_GLOBAL_SIDX ||
         !(mov->flags & FF_MOV_FLAG_SKIP_TRAILER) ||
         mov->ism_lookahead) {
@@ -4478,6 +4695,8 @@
 
     for (i = 0; i < s->nb_streams; i++) {
         AVStream *st = s->streams[i];
+        if (is_cover_image(st))
+            continue;
         if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
             has_video = 1;
         if (st->codecpar->codec_id == AV_CODEC_ID_H264)
@@ -4626,6 +4845,8 @@
         int video_streams_nb = 0, audio_streams_nb = 0, other_streams_nb = 0;
         for (i = 0; i < s->nb_streams; i++) {
             AVStream *st = s->streams[i];
+            if (is_cover_image(st))
+                continue;
             if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
                 video_streams_nb++;
             else if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
@@ -4815,7 +5036,7 @@
         int buf_size, moov_size;
 
         for (i = 0; i < mov->nb_streams; i++)
-            if (!mov->tracks[i].entry)
+            if (!mov->tracks[i].entry && !is_cover_image(mov->tracks[i].st))
                 break;
         /* Don't write the initial moov unless all tracks have data */
         if (i < mov->nb_streams && !force)
@@ -5072,6 +5293,11 @@
     else
         samples_in_chunk = 1;
 
+    if (samples_in_chunk < 1) {
+        av_log(s, AV_LOG_ERROR, "fatal error, input packet contains no samples\n");
+        return AVERROR_PATCHWELCOME;
+    }
+
     /* copy extradata if it exists */
     if (trk->vos_len == 0 && par->extradata_size > 0 &&
         !TAG_IS_AVCI(trk->tag) &&
@@ -5122,6 +5348,13 @@
         } else {
             size = ff_hevc_annexb2mp4(pb, pkt->data, pkt->size, 0, NULL);
         }
+    } else if (par->codec_id == AV_CODEC_ID_AV1) {
+        if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
+            ff_av1_filter_obus_buf(pkt->data, &reformatted_data, &size);
+            avio_write(pb, reformatted_data, size);
+        } else {
+            size = ff_av1_filter_obus(pb, pkt->data, pkt->size);
+        }
 #if CONFIG_AC3_PARSER
     } else if (par->codec_id == AV_CODEC_ID_EAC3) {
         size = handle_eac3(mov, pkt, trk);
@@ -5176,6 +5409,7 @@
     trk->cluster[trk->entry].size             = size;
     trk->cluster[trk->entry].entries          = samples_in_chunk;
     trk->cluster[trk->entry].dts              = pkt->dts;
+    trk->cluster[trk->entry].pts              = pkt->pts;
     if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
         if (!trk->frag_discont) {
             /* First packet of a new fragment. We already wrote the duration
@@ -5268,6 +5502,10 @@
         if (trk->cluster[trk->entry].flags & MOV_SYNC_SAMPLE)
             trk->has_keyframes++;
     }
+    if (pkt->flags & AV_PKT_FLAG_DISPOSABLE) {
+        trk->cluster[trk->entry].flags |= MOV_DISPOSABLE_SAMPLE;
+        trk->has_disposable++;
+    }
     trk->entry++;
     trk->sample_count += samples_in_chunk;
     mov->mdat_size    += size;
@@ -5310,6 +5548,7 @@
 
         if (trk->par->codec_id == AV_CODEC_ID_MP4ALS ||
             trk->par->codec_id == AV_CODEC_ID_AAC ||
+            trk->par->codec_id == AV_CODEC_ID_AV1 ||
             trk->par->codec_id == AV_CODEC_ID_FLAC) {
             int side_size = 0;
             uint8_t *side = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
@@ -5347,7 +5586,8 @@
              (mov->max_fragment_size && mov->mdat_size + size >= mov->max_fragment_size) ||
              (mov->flags & FF_MOV_FLAG_FRAG_KEYFRAME &&
               par->codec_type == AVMEDIA_TYPE_VIDEO &&
-              trk->entry && pkt->flags & AV_PKT_FLAG_KEY)) {
+              trk->entry && pkt->flags & AV_PKT_FLAG_KEY) ||
+              (mov->flags & FF_MOV_FLAG_FRAG_EVERY_FRAME)) {
             if (frag_duration >= mov->min_fragment_duration) {
                 // Set the duration of this track to line up with the next
                 // sample in this track. This avoids relying on AVPacket
@@ -5389,13 +5629,32 @@
 
 static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
+    MOVMuxContext *mov = s->priv_data;
+    MOVTrack *trk;
+
     if (!pkt) {
         mov_flush_fragment(s, 1);
         return 1;
+    }
+
+    trk = &mov->tracks[pkt->stream_index];
+
+    if (is_cover_image(trk->st)) {
+        int ret;
+
+        if (trk->st->nb_frames >= 1) {
+            if (trk->st->nb_frames == 1)
+                av_log(s, AV_LOG_WARNING, "Got more than one picture in stream %d,"
+                       " ignoring.\n", pkt->stream_index);
+            return 0;
+        }
+
+        if ((ret = av_packet_ref(&trk->cover_image, pkt)) < 0)
+            return ret;
+
+        return 0;
     } else {
         int i;
-        MOVMuxContext *mov = s->priv_data;
-        MOVTrack *trk = &mov->tracks[pkt->stream_index];
 
         if (!pkt->size)
             return mov_write_single_packet(s, pkt); /* Passthrough. */
@@ -5642,7 +5901,8 @@
         AVStream *st = s->streams[i];
 
         if (st->codecpar->codec_type <= AVMEDIA_TYPE_UNKNOWN ||
-            st->codecpar->codec_type >= AVMEDIA_TYPE_NB)
+            st->codecpar->codec_type >= AVMEDIA_TYPE_NB ||
+            is_cover_image(st))
             continue;
 
         if (first[st->codecpar->codec_type] < 0)
@@ -5685,6 +5945,7 @@
             av_freep(&mov->tracks[i].par);
         av_freep(&mov->tracks[i].cluster);
         av_freep(&mov->tracks[i].frag_info);
+        av_packet_unref(&mov->tracks[i].cover_image);
 
         if (mov->tracks[i].vos_len)
             av_freep(&mov->tracks[i].vos_data);
@@ -5789,13 +6050,14 @@
     if (mov->max_fragment_duration || mov->max_fragment_size ||
         mov->flags & (FF_MOV_FLAG_EMPTY_MOOV |
                       FF_MOV_FLAG_FRAG_KEYFRAME |
-                      FF_MOV_FLAG_FRAG_CUSTOM))
+                      FF_MOV_FLAG_FRAG_CUSTOM |
+                      FF_MOV_FLAG_FRAG_EVERY_FRAME))
         mov->flags |= FF_MOV_FLAG_FRAGMENT;
 
     /* Set other implicit flags immediately */
     if (mov->mode == MODE_ISM)
         mov->flags |= FF_MOV_FLAG_EMPTY_MOOV | FF_MOV_FLAG_SEPARATE_MOOF |
-                      FF_MOV_FLAG_FRAGMENT;
+                      FF_MOV_FLAG_FRAGMENT | FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS;
     if (mov->flags & FF_MOV_FLAG_DASH)
         mov->flags |= FF_MOV_FLAG_FRAGMENT | FF_MOV_FLAG_EMPTY_MOOV |
                       FF_MOV_FLAG_DEFAULT_BASE_MOOF;
@@ -5856,14 +6118,9 @@
         mov->chapter_track = mov->nb_streams++;
 
     if (mov->flags & FF_MOV_FLAG_RTP_HINT) {
-        /* Add hint tracks for each audio and video stream */
-        for (i = 0; i < s->nb_streams; i++) {
-            AVStream *st = s->streams[i];
-            if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO ||
-                st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+        for (i = 0; i < s->nb_streams; i++)
+            if (rtp_hinting_needed(s->streams[i]))
                 mov->nb_streams++;
-            }
-        }
     }
 
     if (   mov->write_tmcd == -1 && (mov->mode == MODE_MOV || mov->mode == MODE_MP4)
@@ -5989,11 +6246,17 @@
                         pix_fmt == AV_PIX_FMT_MONOWHITE ||
                         pix_fmt == AV_PIX_FMT_MONOBLACK;
             }
-            if (track->par->codec_id == AV_CODEC_ID_VP9) {
+            if (track->par->codec_id == AV_CODEC_ID_VP9 ||
+                track->par->codec_id == AV_CODEC_ID_AV1) {
                 if (track->mode != MODE_MP4) {
-                    av_log(s, AV_LOG_ERROR, "VP9 only supported in MP4.\n");
+                    av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
                     return AVERROR(EINVAL);
                 }
+            } else if (track->par->codec_id == AV_CODEC_ID_VP8) {
+                /* altref frames handling is not defined in the spec as of version v1.0,
+                 * so just forbid muxing VP8 streams altogether until a new version does */
+                av_log(s, AV_LOG_ERROR, "VP8 muxing is currently not supported.\n");
+                return AVERROR_PATCHWELCOME;
             }
         } else if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
             track->timescale = st->codecpar->sample_rate;
@@ -6081,15 +6344,10 @@
         nb_tracks++;
 
     if (mov->flags & FF_MOV_FLAG_RTP_HINT) {
-        /* Add hint tracks for each audio and video stream */
         hint_track = nb_tracks;
-        for (i = 0; i < s->nb_streams; i++) {
-            AVStream *st = s->streams[i];
-            if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO ||
-                st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+        for (i = 0; i < s->nb_streams; i++)
+            if (rtp_hinting_needed(s->streams[i]))
                 nb_tracks++;
-            }
-        }
     }
 
     if (mov->mode == MODE_MOV || mov->mode == MODE_MP4)
@@ -6148,7 +6406,8 @@
     if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
         /* If no fragmentation options have been set, set a default. */
         if (!(mov->flags & (FF_MOV_FLAG_FRAG_KEYFRAME |
-                            FF_MOV_FLAG_FRAG_CUSTOM)) &&
+                            FF_MOV_FLAG_FRAG_CUSTOM |
+                            FF_MOV_FLAG_FRAG_EVERY_FRAME)) &&
             !mov->max_fragment_duration && !mov->max_fragment_size)
             mov->flags |= FF_MOV_FLAG_FRAG_KEYFRAME;
     } else {
@@ -6166,11 +6425,8 @@
             return ret;
 
     if (mov->flags & FF_MOV_FLAG_RTP_HINT) {
-        /* Initialize the hint tracks for each audio and video stream */
         for (i = 0; i < s->nb_streams; i++) {
-            AVStream *st = s->streams[i];
-            if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO ||
-                st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+            if (rtp_hinting_needed(s->streams[i])) {
                 if ((ret = ff_mov_init_hinting(s, hint_track, i)) < 0)
                     return ret;
                 hint_track++;
@@ -6315,10 +6571,10 @@
      * writing, so we re-open the same output, but for reading. It also avoids
      * a read/seek/write/seek back and forth. */
     avio_flush(s->pb);
-    ret = s->io_open(s, &read_pb, s->filename, AVIO_FLAG_READ, NULL);
+    ret = s->io_open(s, &read_pb, s->url, AVIO_FLAG_READ, NULL);
     if (ret < 0) {
         av_log(s, AV_LOG_ERROR, "Unable to re-open %s output file for "
-               "the second pass (faststart)\n", s->filename);
+               "the second pass (faststart)\n", s->url);
         goto end;
     }
 
@@ -6498,6 +6754,7 @@
 const AVCodecTag codec_mp4_tags[] = {
     { AV_CODEC_ID_MPEG4       , MKTAG('m', 'p', '4', 'v') },
     { AV_CODEC_ID_H264        , MKTAG('a', 'v', 'c', '1') },
+    { AV_CODEC_ID_H264        , MKTAG('a', 'v', 'c', '3') },
     { AV_CODEC_ID_HEVC        , MKTAG('h', 'e', 'v', '1') },
     { AV_CODEC_ID_HEVC        , MKTAG('h', 'v', 'c', '1') },
     { AV_CODEC_ID_MPEG2VIDEO  , MKTAG('m', 'p', '4', 'v') },
@@ -6509,6 +6766,7 @@
     { AV_CODEC_ID_DIRAC       , MKTAG('d', 'r', 'a', 'c') },
     { AV_CODEC_ID_TSCC2       , MKTAG('m', 'p', '4', 'v') },
     { AV_CODEC_ID_VP9         , MKTAG('v', 'p', '0', '9') },
+    { AV_CODEC_ID_AV1         , MKTAG('a', 'v', '0', '1') },
     { AV_CODEC_ID_AAC         , MKTAG('m', 'p', '4', 'a') },
     { AV_CODEC_ID_MP4ALS      , MKTAG('m', 'p', '4', 'a') },
     { AV_CODEC_ID_MP3         , MKTAG('m', 'p', '4', 'a') },
@@ -6523,6 +6781,7 @@
     { AV_CODEC_ID_EVRC        , MKTAG('m', 'p', '4', 'a') },
     { AV_CODEC_ID_DVD_SUBTITLE, MKTAG('m', 'p', '4', 's') },
     { AV_CODEC_ID_MOV_TEXT    , MKTAG('t', 'x', '3', 'g') },
+    { AV_CODEC_ID_BIN_DATA    , MKTAG('g', 'p', 'm', 'd') },
     { AV_CODEC_ID_NONE        ,    0 },
 };
 
@@ -6663,7 +6922,7 @@
     .name              = "ipod",
     .long_name         = NULL_IF_CONFIG_SMALL("iPod H.264 MP4 (MPEG-4 Part 14)"),
     .mime_type         = "video/mp4",
-    .extensions        = "m4v,m4a",
+    .extensions        = "m4v,m4a,m4b",
     .priv_data_size    = sizeof(MOVMuxContext),
     .audio_codec       = AV_CODEC_ID_AAC,
     .video_codec       = AV_CODEC_ID_H264,

diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index cc2a155..fe605d1 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h

@@ -46,6 +46,7 @@
 typedef struct MOVIentry {
     uint64_t     pos;
     int64_t      dts;
+    int64_t      pts;
     unsigned int size;
     unsigned int samples_in_chunk;
     unsigned int chunkNum;              ///< Chunk number if the current entry is a chunk start otherwise 0
@@ -53,6 +54,7 @@
     int          cts;
 #define MOV_SYNC_SAMPLE         0x0001
 #define MOV_PARTIAL_SYNC_SAMPLE 0x0002
+#define MOV_DISPOSABLE_SAMPLE   0x0004
     uint32_t     flags;
 } MOVIentry;
 
@@ -89,6 +91,7 @@
     long        sample_size;
     long        chunkCount;
     int         has_keyframes;
+    int         has_disposable;
 #define MOV_TRACK_CTTS         0x0001
 #define MOV_TRACK_STPS         0x0002
 #define MOV_TRACK_ENABLED      0x0004
@@ -130,6 +133,7 @@
     uint32_t    default_size;
 
     HintSampleQueue sample_queue;
+    AVPacket cover_image;
 
     AVIOContext *mdat_buf;
     int64_t     data_offset;
@@ -166,6 +170,13 @@
     MOV_ENC_CENC_AES_CTR,
 } MOVEncryptionScheme;
 
+typedef enum {
+    MOV_PRFT_NONE = 0,
+    MOV_PRFT_SRC_WALLCLOCK,
+    MOV_PRFT_SRC_PTS,
+    MOV_PRFT_NB
+} MOVPrftBox;
+
 typedef struct MOVMuxContext {
     const AVClass *av_class;
     int     mode;
@@ -221,6 +232,8 @@
     int use_stream_ids_as_track_ids;
     int track_ids_ok;
     int write_tmcd;
+    MOVPrftBox write_prft;
+    int empty_hdlr_name;
 } MOVMuxContext;
 
 #define FF_MOV_FLAG_RTP_HINT              (1 <<  0)
@@ -243,6 +256,7 @@
 #define FF_MOV_FLAG_USE_MDTA              (1 << 17)
 #define FF_MOV_FLAG_SKIP_TRAILER          (1 << 18)
 #define FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS  (1 << 19)
+#define FF_MOV_FLAG_FRAG_EVERY_FRAME      (1 << 20)
 
 int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt);
 

diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
index a5c4f2e..ef88493 100644
--- a/libavformat/mp3dec.c
+++ b/libavformat/mp3dec.c

@@ -71,6 +71,7 @@
     int max_frames, first_frames = 0;
     int whole_used = 0;
     int frames, ret;
+    int framesizes, max_framesizes;
     uint32_t header;
     const uint8_t *buf, *buf0, *buf2, *end;
 
@@ -80,11 +81,12 @@
         buf0++;
 
     max_frames = 0;
+    max_framesizes = 0;
     buf = buf0;
 
     for(; buf < end; buf= buf2+1) {
         buf2 = buf;
-        for(frames = 0; buf2 < end; frames++) {
+        for(framesizes = frames = 0; buf2 < end; frames++) {
             MPADecodeHeader h;
 
             header = AV_RB32(buf2);
@@ -92,8 +94,10 @@
             if (ret != 0)
                 break;
             buf2 += h.frame_size;
+            framesizes += h.frame_size;
         }
         max_frames = FFMAX(max_frames, frames);
+        max_framesizes = FFMAX(max_framesizes, framesizes);
         if(buf == buf0) {
             first_frames= frames;
             if (buf2 == end + sizeof(uint32_t))
@@ -103,12 +107,12 @@
     // keep this in sync with ac3 probe, both need to avoid
     // issues with MPEG-files!
     if   (first_frames>=7) return AVPROBE_SCORE_EXTENSION + 1;
-    else if(max_frames>200)return AVPROBE_SCORE_EXTENSION;
-    else if(max_frames>=4 && max_frames >= p->buf_size/10000) return AVPROBE_SCORE_EXTENSION / 2;
+    else if(max_frames>200 && p->buf_size < 2*max_framesizes)return AVPROBE_SCORE_EXTENSION;
+    else if(max_frames>=4 && p->buf_size < 2*max_framesizes) return AVPROBE_SCORE_EXTENSION / 2;
     else if(ff_id3v2_match(buf0, ID3v2_DEFAULT_MAGIC) && 2*ff_id3v2_tag_len(buf0) >= p->buf_size)
                            return p->buf_size < PROBE_BUF_MAX ? AVPROBE_SCORE_EXTENSION / 4 : AVPROBE_SCORE_EXTENSION - 2;
     else if(first_frames > 1 && whole_used) return 5;
-    else if(max_frames>=1 && max_frames >= p->buf_size/10000) return 1;
+    else if(max_frames>=1 && p->buf_size < 10*max_framesizes) return 1;
     else                   return 0;
 //mpegps_mp3_unrecognized_format.mpg has max_frames=3
 }
@@ -508,9 +512,9 @@
                     return AVERROR(EINVAL);
                 }
             }
-            if ((target_pos - pos)*dir <= 0 && abs(MIN_VALID/2-j) < score) {
+            if ((target_pos - pos)*dir <= 0 && FFABS(MIN_VALID/2-j) < score) {
                 candidate = pos;
-                score = abs(MIN_VALID/2-j);
+                score = FFABS(MIN_VALID/2-j);
             }
             pos += ret;
         }

diff --git a/libavformat/mp3enc.c b/libavformat/mp3enc.c
index 8479e24..dd662f5 100644
--- a/libavformat/mp3enc.c
+++ b/libavformat/mp3enc.c

@@ -369,20 +369,18 @@
 static int mp3_queue_flush(AVFormatContext *s)
 {
     MP3Context *mp3 = s->priv_data;
-    AVPacketList *pktl;
+    AVPacket pkt;
     int ret = 0, write = 1;
 
     ff_id3v2_finish(&mp3->id3, s->pb, s->metadata_header_padding);
     mp3_write_xing(s);
 
-    while ((pktl = mp3->queue)) {
-        if (write && (ret = mp3_write_audio_packet(s, &pktl->pkt)) < 0)
+    while (mp3->queue) {
+        ff_packet_list_get(&mp3->queue, &mp3->queue_end, &pkt);
+        if (write && (ret = mp3_write_audio_packet(s, &pkt)) < 0)
             write = 0;
-        av_packet_unref(&pktl->pkt);
-        mp3->queue = pktl->next;
-        av_freep(&pktl);
+        av_packet_unref(&pkt);
     }
-    mp3->queue_end = NULL;
     return ret;
 }
 
@@ -514,21 +512,14 @@
     if (pkt->stream_index == mp3->audio_stream_idx) {
         if (mp3->pics_to_write) {
             /* buffer audio packets until we get all the pictures */
-            AVPacketList *pktl = av_mallocz(sizeof(*pktl));
+            int ret = ff_packet_list_put(&mp3->queue, &mp3->queue_end, pkt, FF_PACKETLIST_FLAG_REF_PACKET);
 
-            if (!pktl || av_packet_ref(&pktl->pkt, pkt) < 0) {
-                av_freep(&pktl);
+            if (ret < 0) {
                 av_log(s, AV_LOG_WARNING, "Not enough memory to buffer audio. Skipping picture streams\n");
                 mp3->pics_to_write = 0;
                 mp3_queue_flush(s);
                 return mp3_write_audio_packet(s, pkt);
             }
-
-            if (mp3->queue_end)
-                mp3->queue_end->next = pktl;
-            else
-                mp3->queue = pktl;
-            mp3->queue_end = pktl;
         } else
             return mp3_write_audio_packet(s, pkt);
     } else {

diff --git a/libavformat/mpc8.c b/libavformat/mpc8.c
index f280faa..79e5f6a 100644
--- a/libavformat/mpc8.c
+++ b/libavformat/mpc8.c

@@ -297,7 +297,7 @@
             return 0;
         }
         if(tag == TAG_STREAMEND)
-            return AVERROR(EIO);
+            return AVERROR_EOF;
         mpc8_handle_chunk(s, tag, pos, size);
     }
     return AVERROR_EOF;

diff --git a/libavformat/mpeg.c b/libavformat/mpeg.c
index 50fe7a1..d4369b4 100644
--- a/libavformat/mpeg.c
+++ b/libavformat/mpeg.c

@@ -20,6 +20,7 @@
  */
 
 #include "avformat.h"
+#include "avio_internal.h"
 #include "internal.h"
 #include "mpeg.h"
 
@@ -128,6 +129,7 @@
     int sofdec;
     int dvd;
     int imkh_cctv;
+    int raw_ac3;
 #if CONFIG_VOBSUB_DEMUXER
     AVFormatContext *sub_ctx;
     FFDemuxSubtitlesQueue q[32];
@@ -442,8 +444,24 @@
     }
 
     if (startcode == PRIVATE_STREAM_1) {
+        int ret = ffio_ensure_seekback(s->pb, 2);
+
+        if (ret < 0)
+            return ret;
+
         startcode = avio_r8(s->pb);
-        len--;
+        m->raw_ac3 = 0;
+        if (startcode == 0x0b) {
+            if (avio_r8(s->pb) == 0x77) {
+                startcode = 0x80;
+                m->raw_ac3 = 1;
+                avio_skip(s->pb, -2);
+            } else {
+                avio_skip(s->pb, -1);
+            }
+        } else {
+            len--;
+        }
     }
     if (len < 0)
         goto error_redo;
@@ -486,14 +504,16 @@
         if (len < 4)
             goto skip;
 
-        /* audio: skip header */
-        avio_r8(s->pb);
-        lpcm_header_len = avio_rb16(s->pb);
-        len -= 3;
-        if (startcode >= 0xb0 && startcode <= 0xbf) {
-            /* MLP/TrueHD audio has a 4-byte header */
+        if (!m->raw_ac3) {
+            /* audio: skip header */
             avio_r8(s->pb);
-            len--;
+            lpcm_header_len = avio_rb16(s->pb);
+            len -= 3;
+            if (startcode >= 0xb0 && startcode <= 0xbf) {
+                /* MLP/TrueHD audio has a 4-byte header */
+                avio_r8(s->pb);
+                len--;
+            }
         }
     }
 
@@ -524,6 +544,9 @@
         } else if (es_type == STREAM_TYPE_VIDEO_H264) {
             codec_id = AV_CODEC_ID_H264;
             type     = AVMEDIA_TYPE_VIDEO;
+        } else if (es_type == STREAM_TYPE_VIDEO_HEVC) {
+            codec_id = AV_CODEC_ID_HEVC;
+            type     = AVMEDIA_TYPE_VIDEO;
         } else if (es_type == STREAM_TYPE_AUDIO_AC3) {
             codec_id = AV_CODEC_ID_AC3;
             type     = AVMEDIA_TYPE_AUDIO;
@@ -568,7 +591,7 @@
         codec_id = AV_CODEC_ID_DTS;
     } else if (startcode >= 0xa0 && startcode <= 0xaf) {
         type     = AVMEDIA_TYPE_AUDIO;
-        if (lpcm_header_len == 6 || startcode == 0xa1) {
+        if (lpcm_header_len >= 6 && startcode == 0xa1) {
             codec_id = AV_CODEC_ID_MLP;
         } else {
             codec_id = AV_CODEC_ID_PCM_DVD;
@@ -627,7 +650,7 @@
     pkt->stream_index = st->index;
 
     if (s->debug & FF_FDEBUG_TS)
-        av_log(s, AV_LOG_TRACE, "%d: pts=%0.3f dts=%0.3f size=%d\n",
+        av_log(s, AV_LOG_DEBUG, "%d: pts=%0.3f dts=%0.3f size=%d\n",
             pkt->stream_index, pkt->pts / 90000.0, pkt->dts / 90000.0,
             pkt->size);
 
@@ -648,7 +671,7 @@
         len = mpegps_read_pes_header(s, &pos, &startcode, &pts, &dts);
         if (len < 0) {
             if (s->debug & FF_FDEBUG_TS)
-                av_log(s, AV_LOG_TRACE, "none (ret=%d)\n", len);
+                av_log(s, AV_LOG_DEBUG, "none (ret=%d)\n", len);
             return AV_NOPTS_VALUE;
         }
         if (startcode == s->streams[stream_index]->id &&
@@ -658,7 +681,7 @@
         avio_skip(s->pb, len);
     }
     if (s->debug & FF_FDEBUG_TS)
-        av_log(s, AV_LOG_TRACE, "pos=0x%"PRIx64" dts=0x%"PRIx64" %0.3f\n",
+        av_log(s, AV_LOG_DEBUG, "pos=0x%"PRIx64" dts=0x%"PRIx64" %0.3f\n",
             pos, dts, dts / 90000.0);
     *ppos = pos;
     return dts;
@@ -703,7 +726,7 @@
 
     if (!vobsub->sub_name) {
         char *ext;
-        vobsub->sub_name = av_strdup(s->filename);
+        vobsub->sub_name = av_strdup(s->url);
         if (!vobsub->sub_name) {
             ret = AVERROR(ENOMEM);
             goto end;
@@ -718,7 +741,7 @@
             goto end;
         }
         memcpy(ext, !strncmp(ext, "IDX", 3) ? "SUB" : "sub", 3);
-        av_log(s, AV_LOG_VERBOSE, "IDX/SUB: %s -> %s\n", s->filename, vobsub->sub_name);
+        av_log(s, AV_LOG_VERBOSE, "IDX/SUB: %s -> %s\n", s->url, vobsub->sub_name);
     }
 
     if (!(iformat = av_find_input_format("mpeg"))) {

diff --git a/libavformat/mpeg.h b/libavformat/mpeg.h
index 617e36c..b635295 100644
--- a/libavformat/mpeg.h
+++ b/libavformat/mpeg.h

@@ -55,6 +55,7 @@
 #define STREAM_TYPE_AUDIO_AAC       0x0f
 #define STREAM_TYPE_VIDEO_MPEG4     0x10
 #define STREAM_TYPE_VIDEO_H264      0x1b
+#define STREAM_TYPE_VIDEO_HEVC      0x24
 #define STREAM_TYPE_VIDEO_CAVS      0x42
 
 #define STREAM_TYPE_AUDIO_AC3       0x81

diff --git a/libavformat/mpegenc.c b/libavformat/mpegenc.c
index c77c3df..4c6fa67 100644
--- a/libavformat/mpegenc.c
+++ b/libavformat/mpegenc.c

@@ -353,7 +353,8 @@
             if (!s->is_mpeg2 &&
                 (st->codecpar->codec_id == AV_CODEC_ID_AC3 ||
                  st->codecpar->codec_id == AV_CODEC_ID_DTS ||
-                 st->codecpar->codec_id == AV_CODEC_ID_PCM_S16BE))
+                 st->codecpar->codec_id == AV_CODEC_ID_PCM_S16BE ||
+                 st->codecpar->codec_id == AV_CODEC_ID_PCM_DVD))
                  av_log(ctx, AV_LOG_WARNING,
                         "%s in MPEG-1 system streams is not widely supported, "
                         "consider using the vob or the dvd muxer "
@@ -363,16 +364,30 @@
                 stream->id = ac3_id++;
             } else if (st->codecpar->codec_id == AV_CODEC_ID_DTS) {
                 stream->id = dts_id++;
-            } else if (st->codecpar->codec_id == AV_CODEC_ID_PCM_S16BE) {
+            } else if (st->codecpar->codec_id == AV_CODEC_ID_PCM_S16BE ||
+                       st->codecpar->codec_id == AV_CODEC_ID_PCM_DVD) {
+                if (st->codecpar->bits_per_coded_sample != 16) {
+                    av_log(ctx, AV_LOG_ERROR, "Only 16 bit LPCM streams can be muxed.\n");
+                    goto fail;
+                }
                 stream->id = lpcm_id++;
                 for (j = 0; j < 4; j++) {
                     if (lpcm_freq_tab[j] == st->codecpar->sample_rate)
                         break;
                 }
-                if (j == 4)
+                if (j == 4) {
+                    int sr;
+                    av_log(ctx, AV_LOG_ERROR, "Invalid sampling rate for PCM stream.\n");
+                    av_log(ctx, AV_LOG_INFO, "Allowed sampling rates:");
+                    for (sr = 0; sr < 4; sr++)
+                         av_log(ctx, AV_LOG_INFO, " %d", lpcm_freq_tab[sr]);
+                    av_log(ctx, AV_LOG_INFO, "\n");
                     goto fail;
-                if (st->codecpar->channels > 8)
-                    return -1;
+                }
+                if (st->codecpar->channels > 8) {
+                    av_log(ctx, AV_LOG_ERROR, "At most 8 channels allowed for LPCM streams.\n");
+                    goto fail;
+                }
                 stream->lpcm_header[0] = 0x0c;
                 stream->lpcm_header[1] = (st->codecpar->channels - 1) | (j << 4);
                 stream->lpcm_header[2] = 0x80;
@@ -397,7 +412,7 @@
                 stream->max_buffer_size = 6 * 1024 + props->buffer_size / 8;
             else {
                 av_log(ctx, AV_LOG_WARNING,
-                       "VBV buffer size not set, using default size of 130KB\n"
+                       "VBV buffer size not set, using default size of 230KB\n"
                        "If you want the mpeg file to be compliant to some specification\n"
                        "Like DVD, VCD or others, make sure you set the correct buffer size\n");
                 // FIXME: this is probably too small as default
@@ -1150,6 +1165,19 @@
         return AVERROR(ENOMEM);
     pkt_desc->pts            = pts;
     pkt_desc->dts            = dts;
+
+    if (st->codecpar->codec_id == AV_CODEC_ID_PCM_DVD) {
+        if (size < 3) {
+            av_log(ctx, AV_LOG_ERROR, "Invalid packet size %d\n", size);
+            return AVERROR(EINVAL);
+        }
+
+        /* Skip first 3 bytes of packet data, which comprise PCM header
+           and will be written fresh by this muxer. */
+        buf += 3;
+        size -= 3;
+    }
+
     pkt_desc->unwritten_size =
     pkt_desc->size           = size;
     if (!stream->predecode_packet)

diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 53cbcfb..edf6b57 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c

@@ -143,10 +143,12 @@
 
     int skip_changes;
     int skip_clear;
+    int skip_unknown_pmt;
 
     int scan_all_pmts;
 
     int resync_size;
+    int merge_pmt_versions;
 
     /******************************************/
     /* private mpegts data */
@@ -170,8 +172,12 @@
      {.i64 = 1}, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
     {"ts_packetsize", "output option carrying the raw packet size", offsetof(MpegTSContext, raw_packet_size), AV_OPT_TYPE_INT,
      {.i64 = 0}, 0, 0, AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY },
-    {"scan_all_pmts",   "scan and combine all PMTs", offsetof(MpegTSContext, scan_all_pmts), AV_OPT_TYPE_BOOL,
-     { .i64 =  -1}, -1, 1,  AV_OPT_FLAG_DECODING_PARAM },
+    {"scan_all_pmts", "scan and combine all PMTs", offsetof(MpegTSContext, scan_all_pmts), AV_OPT_TYPE_BOOL,
+     {.i64 = -1}, -1, 1, AV_OPT_FLAG_DECODING_PARAM },
+    {"skip_unknown_pmt", "skip PMTs for programs not advertised in the PAT", offsetof(MpegTSContext, skip_unknown_pmt), AV_OPT_TYPE_BOOL,
+     {.i64 = 0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
+    {"merge_pmt_versions", "re-use streams when PMT's version/pids change", offsetof(MpegTSContext, merge_pmt_versions), AV_OPT_TYPE_BOOL,
+     {.i64 = 0}, 0, 1,  AV_OPT_FLAG_DECODING_PARAM },
     {"skip_changes", "skip changing / adding streams / programs", offsetof(MpegTSContext, skip_changes), AV_OPT_TYPE_BOOL,
      {.i64 = 0}, 0, 1, 0 },
     {"skip_clear", "skip clearing programs", offsetof(MpegTSContext, skip_clear), AV_OPT_TYPE_BOOL,
@@ -241,6 +247,7 @@
     uint8_t header[MAX_PES_HEADER_SIZE];
     AVBufferRef *buffer;
     SLConfigDescr sl;
+    int merged_st;
 } PESContext;
 
 extern AVInputFormat ff_mpegts_demuxer;
@@ -330,12 +337,23 @@
     p->pmt_found = 1;
 }
 
-static void set_pcr_pid(AVFormatContext *s, unsigned int programid, unsigned int pid)
+static void update_av_program_info(AVFormatContext *s, unsigned int programid,
+                                   unsigned int pid, int version)
 {
     int i;
     for (i = 0; i < s->nb_programs; i++) {
-        if (s->programs[i]->id == programid) {
-            s->programs[i]->pcr_pid = pid;
+        AVProgram *program = s->programs[i];
+        if (program->id == programid) {
+            int old_pcr_pid = program->pcr_pid,
+                old_version = program->pmt_version;
+            program->pcr_pid = pid;
+            program->pmt_version = version;
+
+            if (old_version != -1 && old_version != version) {
+                av_log(s, AV_LOG_VERBOSE,
+                       "detected PMT change (program=%d, version=%d/%d, pcr_pid=0x%x/0x%x)\n",
+                       programid, old_version, version, old_pcr_pid, pid);
+            }
             break;
         }
     }
@@ -391,7 +409,8 @@
                                const uint8_t *buf, int buf_size, int is_start)
 {
     MpegTSSectionFilter *tss = &tss1->u.section_filter;
-    int len;
+    uint8_t *cur_section_buf = NULL;
+    int len, offset;
 
     if (is_start) {
         memcpy(tss->section_buf, buf, buf_size);
@@ -401,42 +420,54 @@
     } else {
         if (tss->end_of_section_reached)
             return;
-        len = 4096 - tss->section_index;
+        len = MAX_SECTION_SIZE - tss->section_index;
         if (buf_size < len)
             len = buf_size;
         memcpy(tss->section_buf + tss->section_index, buf, len);
         tss->section_index += len;
     }
 
-    /* compute section length if possible */
-    if (tss->section_h_size == -1 && tss->section_index >= 3) {
-        len = (AV_RB16(tss->section_buf + 1) & 0xfff) + 3;
-        if (len > 4096)
-            return;
-        tss->section_h_size = len;
-    }
-
-    if (tss->section_h_size != -1 &&
-        tss->section_index >= tss->section_h_size) {
-        int crc_valid = 1;
-        tss->end_of_section_reached = 1;
-
-        if (tss->check_crc) {
-            crc_valid = !av_crc(av_crc_get_table(AV_CRC_32_IEEE), -1, tss->section_buf, tss->section_h_size);
-            if (tss->section_h_size >= 4)
-                tss->crc = AV_RB32(tss->section_buf + tss->section_h_size - 4);
-
-            if (crc_valid) {
-                ts->crc_validity[ tss1->pid ] = 100;
-            }else if (ts->crc_validity[ tss1->pid ] > -10) {
-                ts->crc_validity[ tss1->pid ]--;
-            }else
-                crc_valid = 2;
+    offset = 0;
+    cur_section_buf = tss->section_buf;
+    while (cur_section_buf - tss->section_buf < MAX_SECTION_SIZE && cur_section_buf[0] != 0xff) {
+        /* compute section length if possible */
+        if (tss->section_h_size == -1 && tss->section_index - offset >= 3) {
+            len = (AV_RB16(cur_section_buf + 1) & 0xfff) + 3;
+            if (len > MAX_SECTION_SIZE)
+                return;
+            tss->section_h_size = len;
         }
-        if (crc_valid) {
-            tss->section_cb(tss1, tss->section_buf, tss->section_h_size);
-            if (crc_valid != 1)
-                tss->last_ver = -1;
+
+        if (tss->section_h_size != -1 &&
+            tss->section_index >= offset + tss->section_h_size) {
+            int crc_valid = 1;
+            tss->end_of_section_reached = 1;
+
+            if (tss->check_crc) {
+                crc_valid = !av_crc(av_crc_get_table(AV_CRC_32_IEEE), -1, cur_section_buf, tss->section_h_size);
+                if (tss->section_h_size >= 4)
+                    tss->crc = AV_RB32(cur_section_buf + tss->section_h_size - 4);
+
+                if (crc_valid) {
+                    ts->crc_validity[ tss1->pid ] = 100;
+                }else if (ts->crc_validity[ tss1->pid ] > -10) {
+                    ts->crc_validity[ tss1->pid ]--;
+                }else
+                    crc_valid = 2;
+            }
+            if (crc_valid) {
+                tss->section_cb(tss1, cur_section_buf, tss->section_h_size);
+                if (crc_valid != 1)
+                    tss->last_ver = -1;
+            }
+
+            cur_section_buf += tss->section_h_size;
+            offset += tss->section_h_size;
+            tss->section_h_size = -1;
+        } else {
+            tss->section_h_size = -1;
+            tss->end_of_section_reached = 0;
+            break;
         }
     }
 }
@@ -478,7 +509,7 @@
     sec = &filter->u.section_filter;
     sec->section_cb  = section_cb;
     sec->opaque      = opaque;
-    sec->section_buf = av_malloc(MAX_SECTION_SIZE);
+    sec->section_buf = av_mallocz(MAX_SECTION_SIZE);
     sec->check_crc   = check_crc;
     sec->last_ver    = -1;
 
@@ -521,8 +552,8 @@
         PESContext *pes = filter->u.pes_filter.opaque;
         av_buffer_unref(&pes->buffer);
         /* referenced private data will be freed later in
-         * avformat_close_input */
-        if (!((PESContext *)filter->u.pes_filter.opaque)->st) {
+         * avformat_close_input (pes->st->priv_data == pes) */
+        if (!pes->st || pes->merged_st) {
             av_freep(&filter->u.pes_filter.opaque);
         }
     }
@@ -1060,6 +1091,8 @@
                     if (!pes->st) {
                         if (ts->skip_changes)
                             goto skip;
+                        if (ts->merge_pmt_versions)
+                            goto skip; /* wait for PMT to merge new stream */
 
                         pes->st = avformat_new_stream(ts->stream, NULL);
                         if (!pes->st)
@@ -1671,6 +1704,11 @@
         mpegts_find_stream_type(st, desc_tag, DESC_types);
 
     switch (desc_tag) {
+    case 0x02: /* video stream descriptor */
+        if (get8(pp, desc_end) & 0x1) {
+            st->disposition |= AV_DISPOSITION_STILL_IMAGE;
+        }
+        break;
     case 0x1E: /* SL descriptor */
         desc_es_id = get16(pp, desc_end);
         if (desc_es_id < 0)
@@ -1733,10 +1771,10 @@
                     }
                 }
 
-               if (st->codecpar->extradata_size < language_count * 2)
-                   return AVERROR_INVALIDDATA;
+                if (st->codecpar->extradata_size < language_count * 2)
+                    return AVERROR_INVALIDDATA;
 
-               extradata = st->codecpar->extradata;
+                extradata = st->codecpar->extradata;
 
                 for (i = 0; i < language_count; i++) {
                     language[i * 4 + 0] = get8(pp, desc_end);
@@ -1835,12 +1873,15 @@
                 break;
             case 0x03:
                 st->disposition |= AV_DISPOSITION_VISUAL_IMPAIRED;
+                st->disposition |= AV_DISPOSITION_DESCRIPTIONS;
                 break;
             }
         }
         if (i && language[0]) {
             language[i - 1] = 0;
-            av_dict_set(&st->metadata, "language", language, 0);
+            /* don't overwrite language, as it may already have been set by
+             * another, more specific descriptor (e.g. supplementary audio) */
+            av_dict_set(&st->metadata, "language", language, AV_DICT_DONT_OVERWRITE);
         }
         break;
     case 0x05: /* registration descriptor */
@@ -1895,6 +1936,71 @@
                 st->internal->need_context_update = 1;
             }
         }
+        if (ext_desc_tag == 0x06) { /* supplementary audio descriptor */
+            int flags;
+
+            if (desc_len < 1)
+                return AVERROR_INVALIDDATA;
+            flags = get8(pp, desc_end);
+
+            if ((flags & 0x80) == 0) /* mix_type */
+                st->disposition |= AV_DISPOSITION_DEPENDENT;
+
+            switch ((flags >> 2) & 0x1F) { /* editorial_classification */
+            case 0x01:
+                st->disposition |= AV_DISPOSITION_VISUAL_IMPAIRED;
+                st->disposition |= AV_DISPOSITION_DESCRIPTIONS;
+                break;
+            case 0x02:
+                st->disposition |= AV_DISPOSITION_HEARING_IMPAIRED;
+                break;
+            case 0x03:
+                st->disposition |= AV_DISPOSITION_VISUAL_IMPAIRED;
+                break;
+            }
+
+            if (flags & 0x01) { /* language_code_present */
+                if (desc_len < 4)
+                    return AVERROR_INVALIDDATA;
+                language[0] = get8(pp, desc_end);
+                language[1] = get8(pp, desc_end);
+                language[2] = get8(pp, desc_end);
+                language[3] = 0;
+
+                /* This language always has to override a possible
+                 * ISO 639 language descriptor language */
+                if (language[0])
+                    av_dict_set(&st->metadata, "language", language, 0);
+            }
+        }
+        break;
+    case 0x6a: /* ac-3_descriptor */
+        {
+            int component_type_flag = get8(pp, desc_end) & (1 << 7);
+            if (component_type_flag) {
+                int component_type = get8(pp, desc_end);
+                int service_type_mask = 0x38;  // 0b00111000
+                int service_type = ((component_type & service_type_mask) >> 3);
+                if (service_type == 0x02 /* 0b010 */) {
+                    st->disposition |= AV_DISPOSITION_DESCRIPTIONS;
+                    av_log(ts ? ts->stream : fc, AV_LOG_DEBUG, "New track disposition for id %u: %u\n", st->id, st->disposition);
+                }
+            }
+        }
+        break;
+    case 0x7a: /* enhanced_ac-3_descriptor */
+        {
+            int component_type_flag = get8(pp, desc_end) & (1 << 7);
+            if (component_type_flag) {
+                int component_type = get8(pp, desc_end);
+                int service_type_mask = 0x38;  // 0b00111000
+                int service_type = ((component_type & service_type_mask) >> 3);
+                if (service_type == 0x02 /* 0b010 */) {
+                    st->disposition |= AV_DISPOSITION_DESCRIPTIONS;
+                    av_log(ts ? ts->stream : fc, AV_LOG_DEBUG, "New track disposition for id %u: %u\n", st->id, st->disposition);
+                }
+            }
+        }
         break;
     default:
         break;
@@ -1903,6 +2009,72 @@
     return 0;
 }
 
+static AVStream *find_matching_stream(MpegTSContext *ts, int pid,
+                                      int stream_identifier, int pmt_stream_idx)
+{
+    AVFormatContext *s = ts->stream;
+    int i;
+    AVStream *found = NULL;
+
+    for (i = 0; i < s->nb_streams; i++) {
+        AVStream *st = s->streams[i];
+        if (stream_identifier != -1) { /* match based on "stream identifier descriptor" if present */
+            if (st->stream_identifier == stream_identifier+1) {
+                found = st;
+                break;
+            }
+        } else if (st->pmt_stream_idx == pmt_stream_idx) { /* match based on position within the PMT */
+            found = st;
+            break;
+        }
+    }
+
+    if (found) {
+        av_log(ts->stream, AV_LOG_VERBOSE,
+               "re-using existing %s stream %d (pid=0x%x) for new pid=0x%x\n",
+               av_get_media_type_string(found->codecpar->codec_type),
+               i, found->id, pid);
+    }
+
+    return found;
+}
+
+static int parse_stream_identifier_desc(const uint8_t *p, const uint8_t *p_end)
+{
+    const uint8_t **pp = &p;
+    const uint8_t *desc_list_end;
+    const uint8_t *desc_end;
+    int desc_list_len;
+    int desc_len, desc_tag;
+
+    desc_list_len = get16(pp, p_end);
+    if (desc_list_len < 0)
+        return -1;
+    desc_list_len &= 0xfff;
+    desc_list_end  = p + desc_list_len;
+    if (desc_list_end > p_end)
+        return -1;
+
+    while (1) {
+        desc_tag = get8(pp, desc_list_end);
+        if (desc_tag < 0)
+            return -1;
+        desc_len = get8(pp, desc_list_end);
+        if (desc_len < 0)
+            return -1;
+        desc_end = *pp + desc_len;
+        if (desc_end > desc_list_end)
+            return -1;
+
+        if (desc_tag == 0x52) {
+            return get8(pp, desc_end);
+        }
+        *pp = desc_end;
+    }
+
+    return -1;
+}
+
 static int is_pes_stream(int stream_type, uint32_t prog_reg_desc)
 {
     return !(stream_type == 0x13 ||
@@ -1920,6 +2092,7 @@
     int program_info_length, pcr_pid, pid, stream_type;
     int desc_list_len;
     uint32_t prog_reg_desc = 0; /* registration descriptor */
+    int stream_identifier = -1;
 
     int mp4_descr_count = 0;
     Mp4Descr mp4_descr[MAX_MP4_DESCR_COUNT] = { { 0 } };
@@ -1932,17 +2105,19 @@
     p = section;
     if (parse_section_header(h, &p, p_end) < 0)
         return;
+    if (h->tid != PMT_TID)
+        return;
     if (skip_identical(h, tssf))
         return;
 
     av_log(ts->stream, AV_LOG_TRACE, "sid=0x%x sec_num=%d/%d version=%d tid=%d\n",
             h->id, h->sec_num, h->last_sec_num, h->version, h->tid);
 
-    if (h->tid != PMT_TID)
-        return;
     if (!ts->scan_all_pmts && ts->skip_changes)
         return;
 
+    if (ts->skip_unknown_pmt && !get_program(ts, h->id))
+        return;
     if (!ts->skip_clear)
         clear_program(ts, h->id);
 
@@ -1951,7 +2126,7 @@
         return;
     pcr_pid &= 0x1fff;
     add_pid_to_pmt(ts, h->id, pcr_pid);
-    set_pcr_pid(ts->stream, h->id, pcr_pid);
+    update_av_program_info(ts->stream, h->id, pcr_pid, h->version);
 
     av_log(ts->stream, AV_LOG_TRACE, "pcr_pid=0x%x\n", pcr_pid);
 
@@ -1993,7 +2168,7 @@
     set_pmt_found(ts, h->id);
 
 
-    for (;;) {
+    for (i = 0; ; i++) {
         st = 0;
         pes = NULL;
         stream_type = get8(&p, p_end);
@@ -2006,35 +2181,67 @@
         if (pid == ts->current_pid)
             goto out;
 
+        if (ts->merge_pmt_versions)
+            stream_identifier = parse_stream_identifier_desc(p, p_end);
+
         /* now create stream */
         if (ts->pids[pid] && ts->pids[pid]->type == MPEGTS_PES) {
             pes = ts->pids[pid]->u.pes_filter.opaque;
+            if (ts->merge_pmt_versions && !pes->st) {
+                st = find_matching_stream(ts, pid, stream_identifier, i);
+                if (st) {
+                    pes->st = st;
+                    pes->stream_type = stream_type;
+                    pes->merged_st = 1;
+                }
+            }
             if (!pes->st) {
-                pes->st     = avformat_new_stream(pes->stream, NULL);
+                pes->st = avformat_new_stream(pes->stream, NULL);
                 if (!pes->st)
                     goto out;
                 pes->st->id = pes->pid;
+                pes->st->program_num = h->id;
+                pes->st->pmt_version = h->version;
+                pes->st->pmt_stream_idx = i;
             }
             st = pes->st;
         } else if (is_pes_stream(stream_type, prog_reg_desc)) {
             if (ts->pids[pid])
                 mpegts_close_filter(ts, ts->pids[pid]); // wrongly added sdt filter probably
             pes = add_pes_stream(ts, pid, pcr_pid);
-            if (pes) {
+            if (ts->merge_pmt_versions && pes && !pes->st) {
+                st = find_matching_stream(ts, pid, stream_identifier, i);
+                if (st) {
+                    pes->st = st;
+                    pes->stream_type = stream_type;
+                    pes->merged_st = 1;
+                }
+            }
+            if (pes && !pes->st) {
                 st = avformat_new_stream(pes->stream, NULL);
                 if (!st)
                     goto out;
                 st->id = pes->pid;
+                st->program_num = h->id;
+                st->pmt_version = h->version;
+                st->pmt_stream_idx = i;
             }
         } else {
             int idx = ff_find_stream_index(ts->stream, pid);
             if (idx >= 0) {
                 st = ts->stream->streams[idx];
-            } else {
+            }
+            if (ts->merge_pmt_versions && !st) {
+                st = find_matching_stream(ts, pid, stream_identifier, i);
+            }
+            if (!st) {
                 st = avformat_new_stream(ts->stream, NULL);
                 if (!st)
                     goto out;
                 st->id = pid;
+                st->program_num = h->id;
+                st->pmt_version = h->version;
+                st->pmt_stream_idx = i;
                 st->codecpar->codec_type = AVMEDIA_TYPE_DATA;
                 if (stream_type == 0x86 && prog_reg_desc == AV_RL32("CUEI")) {
                     mpegts_find_stream_type(st, stream_type, SCTE_types);
@@ -2296,6 +2503,14 @@
         }
     }
 
+    if (packet[1] & 0x80) {
+        av_log(ts->stream, AV_LOG_DEBUG, "Packet had TEI flag set; marking as corrupt\n");
+        if (tss->type == MPEGTS_PES) {
+            PESContext *pc = tss->u.pes_filter.opaque;
+            pc->flags |= AV_PKT_FLAG_CORRUPT;
+        }
+    }
+
     p = packet + 4;
     if (has_adaptation) {
         int64_t pcr_h;

diff --git a/libavformat/mpegtsenc.c b/libavformat/mpegtsenc.c
index fdfa544..3339e26 100644
--- a/libavformat/mpegtsenc.c
+++ b/libavformat/mpegtsenc.c

@@ -1012,7 +1012,7 @@
            ts->sdt_packet_period, ts->pat_packet_period);
 
     if (ts->m2ts_mode == -1) {
-        if (av_match_ext(s->filename, "m2ts")) {
+        if (av_match_ext(s->url, "m2ts")) {
             ts->m2ts_mode = 1;
         } else {
             ts->m2ts_mode = 0;
@@ -1983,6 +1983,6 @@
     .write_trailer  = mpegts_write_end,
     .deinit         = mpegts_deinit,
     .check_bitstream = mpegts_check_bitstream,
-    .flags          = AVFMT_ALLOW_FLUSH | AVFMT_VARIABLE_FPS,
+    .flags          = AVFMT_ALLOW_FLUSH | AVFMT_VARIABLE_FPS | AVFMT_NODIMENSIONS,
     .priv_class     = &mpegts_muxer_class,
 };

diff --git a/libavformat/mpjpeg.c b/libavformat/mpjpeg.c
index 3904ccb..80f83c5 100644
--- a/libavformat/mpjpeg.c
+++ b/libavformat/mpjpeg.c

@@ -23,7 +23,7 @@
 
 /* Multipart JPEG */
 
-#define BOUNDARY_TAG "ffserver"
+#define BOUNDARY_TAG "ffmpeg"
 
 typedef struct MPJPEGContext {
     AVClass *class;

diff --git a/libavformat/mpjpegdec.c b/libavformat/mpjpegdec.c
index 83aa70d..64d880a 100644
--- a/libavformat/mpjpegdec.c
+++ b/libavformat/mpjpegdec.c

@@ -375,7 +375,7 @@
 #define OFFSET(x) offsetof(MPJPEGDemuxContext, x)
 
 #define DEC AV_OPT_FLAG_DECODING_PARAM
-const AVOption mpjpeg_options[] = {
+static const AVOption mpjpeg_options[] = {
     { "strict_mime_boundary",  "require MIME boundaries match", OFFSET(strict_mime_boundary), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, DEC },
     { NULL }
 };

diff --git a/libavformat/mux.c b/libavformat/mux.c
index 53ad46d..2847a02 100644
--- a/libavformat/mux.c
+++ b/libavformat/mux.c

@@ -186,8 +186,16 @@
     } else
         s->priv_data = NULL;
 
-    if (filename)
+    if (filename) {
+#if FF_API_FORMAT_FILENAME
+FF_DISABLE_DEPRECATION_WARNINGS
         av_strlcpy(s->filename, filename, sizeof(s->filename));
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        if (!(s->url = av_strdup(filename)))
+            goto nomem;
+
+    }
     *avctx = s;
     return 0;
 nomem:
@@ -251,23 +259,25 @@
         (ret = av_opt_set_dict2(s->priv_data, &tmp, AV_OPT_SEARCH_CHILDREN)) < 0)
         goto fail;
 
+#if FF_API_FORMAT_FILENAME
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (!s->url && !(s->url = av_strdup(s->filename))) {
+FF_ENABLE_DEPRECATION_WARNINGS
+#else
+    if (!s->url && !(s->url = av_strdup(""))) {
+#endif
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
 #if FF_API_LAVF_AVCTX
 FF_DISABLE_DEPRECATION_WARNINGS
     if (s->nb_streams && s->streams[0]->codec->flags & AV_CODEC_FLAG_BITEXACT) {
         if (!(s->flags & AVFMT_FLAG_BITEXACT)) {
-#if FF_API_LAVF_BITEXACT
-            av_log(s, AV_LOG_WARNING,
-                   "Setting the AVFormatContext to bitexact mode, because "
-                   "the AVCodecContext is in that mode. This behavior will "
-                   "change in the future. To keep the current behavior, set "
-                   "AVFormatContext.flags |= AVFMT_FLAG_BITEXACT.\n");
-            s->flags |= AVFMT_FLAG_BITEXACT;
-#else
             av_log(s, AV_LOG_WARNING,
                    "The AVFormatContext is not in set to bitexact mode, only "
                    "the AVCodecContext. If this is not intended, set "
                    "AVFormatContext.flags |= AVFMT_FLAG_BITEXACT.\n");
-#endif
         }
     }
 FF_ENABLE_DEPRECATION_WARNINGS
@@ -284,17 +294,6 @@
         st  = s->streams[i];
         par = st->codecpar;
 
-#if FF_API_LAVF_CODEC_TB && FF_API_LAVF_AVCTX
-FF_DISABLE_DEPRECATION_WARNINGS
-        if (!st->time_base.num && st->codec->time_base.num) {
-            av_log(s, AV_LOG_WARNING, "Using AVStream.codec.time_base as a "
-                   "timebase hint to the muxer is deprecated. Set "
-                   "AVStream.time_base instead.\n");
-            avpriv_set_pts_info(st, 64, st->codec->time_base.num, st->codec->time_base.den);
-        }
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
 #if FF_API_LAVF_AVCTX
 FF_DISABLE_DEPRECATION_WARNINGS
         if (st->codecpar->codec_type == AVMEDIA_TYPE_UNKNOWN &&
@@ -452,19 +451,27 @@
             break;
         }
 
-        if (!st->priv_pts)
-            st->priv_pts = av_mallocz(sizeof(*st->priv_pts));
-        if (!st->priv_pts)
+        if (!st->internal->priv_pts)
+            st->internal->priv_pts = av_mallocz(sizeof(*st->internal->priv_pts));
+        if (!st->internal->priv_pts)
             return AVERROR(ENOMEM);
 
         if (den != AV_NOPTS_VALUE) {
             if (den <= 0)
                 return AVERROR_INVALIDDATA;
 
-            frac_init(st->priv_pts, 0, 0, den);
+            frac_init(st->internal->priv_pts, 0, 0, den);
         }
     }
 
+    if (s->avoid_negative_ts < 0) {
+        av_assert2(s->avoid_negative_ts == AVFMT_AVOID_NEG_TS_AUTO);
+        if (s->oformat->flags & (AVFMT_TS_NEGATIVE | AVFMT_NOTIMESTAMPS)) {
+            s->avoid_negative_ts = 0;
+        } else
+            s->avoid_negative_ts = AVFMT_AVOID_NEG_TS_MAKE_NON_NEGATIVE;
+    }
+
     return 0;
 }
 
@@ -478,25 +485,6 @@
     }
 }
 
-static int write_header_internal(AVFormatContext *s)
-{
-    if (!(s->oformat->flags & AVFMT_NOFILE) && s->pb)
-        avio_write_marker(s->pb, AV_NOPTS_VALUE, AVIO_DATA_MARKER_HEADER);
-    if (s->oformat->write_header) {
-        int ret = s->oformat->write_header(s);
-        if (ret >= 0 && s->pb && s->pb->error < 0)
-            ret = s->pb->error;
-        s->internal->write_header_ret = ret;
-        if (ret < 0)
-            return ret;
-        flush_if_needed(s);
-    }
-    s->internal->header_written = 1;
-    if (!(s->oformat->flags & AVFMT_NOFILE) && s->pb)
-        avio_write_marker(s->pb, AV_NOPTS_VALUE, AVIO_DATA_MARKER_UNKNOWN);
-    return 0;
-}
-
 int avformat_init_output(AVFormatContext *s, AVDictionary **options)
 {
     int ret = 0;
@@ -511,14 +499,6 @@
         if ((ret = init_pts(s)) < 0)
             return ret;
 
-        if (s->avoid_negative_ts < 0) {
-            av_assert2(s->avoid_negative_ts == AVFMT_AVOID_NEG_TS_AUTO);
-            if (s->oformat->flags & (AVFMT_TS_NEGATIVE | AVFMT_NOTIMESTAMPS)) {
-                s->avoid_negative_ts = 0;
-            } else
-                s->avoid_negative_ts = AVFMT_AVOID_NEG_TS_MAKE_NON_NEGATIVE;
-        }
-
         return AVSTREAM_INIT_IN_INIT_OUTPUT;
     }
 
@@ -535,23 +515,22 @@
         if ((ret = avformat_init_output(s, options)) < 0)
             return ret;
 
-    if (!(s->oformat->check_bitstream && s->flags & AVFMT_FLAG_AUTO_BSF)) {
-        ret = write_header_internal(s);
+    if (!(s->oformat->flags & AVFMT_NOFILE) && s->pb)
+        avio_write_marker(s->pb, AV_NOPTS_VALUE, AVIO_DATA_MARKER_HEADER);
+    if (s->oformat->write_header) {
+        ret = s->oformat->write_header(s);
+        if (ret >= 0 && s->pb && s->pb->error < 0)
+            ret = s->pb->error;
         if (ret < 0)
             goto fail;
+        flush_if_needed(s);
     }
+    if (!(s->oformat->flags & AVFMT_NOFILE) && s->pb)
+        avio_write_marker(s->pb, AV_NOPTS_VALUE, AVIO_DATA_MARKER_UNKNOWN);
 
     if (!s->internal->streams_initialized) {
         if ((ret = init_pts(s)) < 0)
             goto fail;
-
-        if (s->avoid_negative_ts < 0) {
-            av_assert2(s->avoid_negative_ts == AVFMT_AVOID_NEG_TS_AUTO);
-            if (s->oformat->flags & (AVFMT_TS_NEGATIVE | AVFMT_NOTIMESTAMPS)) {
-                s->avoid_negative_ts = 0;
-            } else
-                s->avoid_negative_ts = AVFMT_AVOID_NEG_TS_MAKE_NON_NEGATIVE;
-        }
     }
 
     return streams_already_initialized;
@@ -592,7 +571,7 @@
     }
 
     if (s->debug & FF_FDEBUG_TS)
-        av_log(s, AV_LOG_TRACE, "compute_muxer_pkt_fields: pts:%s dts:%s cur_dts:%s b:%d size:%d st:%d\n",
+        av_log(s, AV_LOG_DEBUG, "compute_muxer_pkt_fields: pts:%s dts:%s cur_dts:%s b:%d size:%d st:%d\n",
             av_ts2str(pkt->pts), av_ts2str(pkt->dts), av_ts2str(st->cur_dts), delay, pkt->size, pkt->stream_index);
 
     if (pkt->duration < 0 && st->codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {
@@ -621,7 +600,7 @@
         }
         pkt->dts =
 //        pkt->pts= st->cur_dts;
-            pkt->pts = st->priv_pts->val;
+            pkt->pts = st->internal->priv_pts->val;
     }
 
     //calculate dts from pts
@@ -654,11 +633,11 @@
     }
 
     if (s->debug & FF_FDEBUG_TS)
-        av_log(s, AV_LOG_TRACE, "av_write_frame: pts2:%s dts2:%s\n",
+        av_log(s, AV_LOG_DEBUG, "av_write_frame: pts2:%s dts2:%s\n",
             av_ts2str(pkt->pts), av_ts2str(pkt->dts));
 
     st->cur_dts = pkt->dts;
-    st->priv_pts->val = pkt->dts;
+    st->internal->priv_pts->val = pkt->dts;
 
     /* update pts */
     switch (st->codecpar->codec_type) {
@@ -670,12 +649,12 @@
         /* HACK/FIXME, we skip the initial 0 size packets as they are most
          * likely equal to the encoder delay, but it would be better if we
          * had the real timestamps from the encoder */
-        if (frame_size >= 0 && (pkt->size || st->priv_pts->num != st->priv_pts->den >> 1 || st->priv_pts->val)) {
-            frac_add(st->priv_pts, (int64_t)st->time_base.den * frame_size);
+        if (frame_size >= 0 && (pkt->size || st->internal->priv_pts->num != st->internal->priv_pts->den >> 1 || st->internal->priv_pts->val)) {
+            frac_add(st->internal->priv_pts, (int64_t)st->time_base.den * frame_size);
         }
         break;
     case AVMEDIA_TYPE_VIDEO:
-        frac_add(st->priv_pts, (int64_t)st->time_base.den * st->time_base.num);
+        frac_add(st->internal->priv_pts, (int64_t)st->time_base.den * st->time_base.num);
         break;
     }
     return 0;
@@ -694,7 +673,7 @@
  */
 static int write_packet(AVFormatContext *s, AVPacket *pkt)
 {
-    int ret, did_split;
+    int ret;
     int64_t pts_backup, dts_backup;
 
     pts_backup = pkt->pts;
@@ -759,18 +738,6 @@
         }
     }
 
-#if FF_API_LAVF_MERGE_SD
-FF_DISABLE_DEPRECATION_WARNINGS
-    did_split = av_packet_split_side_data(pkt);
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
-    if (!s->internal->header_written) {
-        ret = s->internal->write_header_ret ? s->internal->write_header_ret : write_header_internal(s);
-        if (ret < 0)
-            goto fail;
-    }
-
     if ((pkt->flags & AV_PKT_FLAG_UNCODED_FRAME)) {
         AVFrame *frame = (AVFrame *)pkt->data;
         av_assert0(pkt->size == UNCODED_FRAME_PACKET_SIZE);
@@ -786,14 +753,6 @@
             ret = s->pb->error;
     }
 
-fail:
-#if FF_API_LAVF_MERGE_SD
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (did_split)
-        av_packet_merge_side_data(pkt);
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
     if (ret < 0) {
         pkt->pts = pts_backup;
         pkt->dts = dts_backup;
@@ -889,16 +848,6 @@
         }
     }
 
-#if FF_API_LAVF_MERGE_SD
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (st->internal->nb_bsfcs) {
-        ret = av_packet_split_side_data(pkt);
-        if (ret < 0)
-            av_log(s, AV_LOG_WARNING, "Failed to split side data before bitstream filter\n");
-    }
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
     for (i = 0; i < st->internal->nb_bsfcs; i++) {
         AVBSFContext *ctx = st->internal->bsfcs[i];
         // TODO: when any bitstream filter requires flushing at EOF, we'll need to
@@ -918,7 +867,9 @@
             av_log(ctx, AV_LOG_ERROR,
                     "Failed to send packet to filter %s for stream %d\n",
                     ctx->filter->name, pkt->stream_index);
-            return ret;
+            if (s->error_recognition & AV_EF_EXPLODE)
+                return ret;
+            return 0;
         }
     }
     return 1;
@@ -934,11 +885,6 @@
 
     if (!pkt) {
         if (s->oformat->flags & AVFMT_ALLOW_FLUSH) {
-            if (!s->internal->header_written) {
-                ret = s->internal->write_header_ret ? s->internal->write_header_ret : write_header_internal(s);
-                if (ret < 0)
-                    return ret;
-            }
             ret = s->oformat->write_packet(s, NULL);
             flush_if_needed(s);
             if (ret >= 0 && s->pb && s->pb->error < 0)
@@ -1254,7 +1200,7 @@
             goto fail;
 
         if (s->debug & FF_FDEBUG_TS)
-            av_log(s, AV_LOG_TRACE, "av_interleaved_write_frame size:%d dts:%s pts:%s\n",
+            av_log(s, AV_LOG_DEBUG, "av_interleaved_write_frame size:%d dts:%s pts:%s\n",
                 pkt->size, av_ts2str(pkt->dts), av_ts2str(pkt->pts));
 
 #if FF_API_COMPUTE_PKT_FIELDS2 && FF_API_LAVF_AVCTX
@@ -1322,14 +1268,8 @@
             goto fail;
     }
 
-    if (!s->internal->header_written) {
-        ret = s->internal->write_header_ret ? s->internal->write_header_ret : write_header_internal(s);
-        if (ret < 0)
-            goto fail;
-    }
-
 fail:
-    if (s->internal->header_written && s->oformat->write_trailer) {
+    if (s->oformat->write_trailer) {
         if (!(s->oformat->flags & AVFMT_NOFILE) && s->pb)
             avio_write_marker(s->pb, AV_NOPTS_VALUE, AVIO_DATA_MARKER_TRAILER);
         if (ret >= 0) {
@@ -1342,7 +1282,6 @@
     if (s->oformat->deinit)
         s->oformat->deinit(s);
 
-    s->internal->header_written =
     s->internal->initialized =
     s->internal->streams_initialized = 0;
 

diff --git a/libavformat/mvdec.c b/libavformat/mvdec.c
index f7aa4cb..fa59617 100644
--- a/libavformat/mvdec.c
+++ b/libavformat/mvdec.c

@@ -227,7 +227,9 @@
                        int (*parse)(AVFormatContext *avctx, AVStream *st,
                                     const char *name, int size))
 {
-    int count, i;
+    unsigned count;
+    int i;
+
     AVIOContext *pb = avctx->pb;
     avio_skip(pb, 4);
     count = avio_rb32(pb);
@@ -235,6 +237,10 @@
     for (i = 0; i < count; i++) {
         char name[17];
         int size;
+
+        if (avio_feof(pb))
+            return AVERROR_EOF;
+
         avio_read(pb, name, 16);
         name[sizeof(name) - 1] = 0;
         size = avio_rb32(pb);

diff --git a/libavformat/mxf.c b/libavformat/mxf.c
index bfc3218..451cbcf 100644
--- a/libavformat/mxf.c
+++ b/libavformat/mxf.c

@@ -28,6 +28,7 @@
 const MXFCodecUL ff_mxf_data_definition_uls[] = {
     { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x01,0x03,0x02,0x02,0x01,0x00,0x00,0x00 }, 13, AVMEDIA_TYPE_VIDEO },
     { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x01,0x03,0x02,0x02,0x02,0x00,0x00,0x00 }, 13, AVMEDIA_TYPE_AUDIO },
+    { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x01,0x03,0x02,0x02,0x03,0x00,0x00,0x00 }, 13, AVMEDIA_TYPE_DATA },
     { { 0x80,0x7D,0x00,0x60,0x08,0x14,0x3E,0x6F,0x6F,0x3C,0x8C,0xE1,0x6C,0xEF,0x11,0xD2 }, 16, AVMEDIA_TYPE_VIDEO }, /* LegacyPicture Avid Media Composer MXF */
     { { 0x80,0x7D,0x00,0x60,0x08,0x14,0x3E,0x6F,0x78,0xE1,0xEB,0xE1,0x6C,0xEF,0x11,0xD2 }, 16, AVMEDIA_TYPE_AUDIO }, /* LegacySound Avid Media Composer MXF */
     { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },  0,  AVMEDIA_TYPE_DATA },
@@ -134,9 +135,10 @@
     { { 1001, 24000 }, { 2002, 0,    0,    0,    0,    0 } }, // FILM 23.976
     { { 1, 24},        { 2000, 0,    0,    0,    0,    0 } }, // FILM 24
     { { 1001, 30000 }, { 1602, 1601, 1602, 1601, 1602, 0 } }, // NTSC 29.97
-    { { 1001, 60000 }, { 801,  801,  801,  801,  800,  0 } }, // NTSC 59.94
+    { { 1001, 60000 }, { 801,  801,  800,  801,  801,  0 } }, // NTSC 59.94
     { { 1, 25 },       { 1920, 0,    0,    0,    0,    0 } }, // PAL 25
     { { 1, 50 },       { 960,  0,    0,    0,    0,    0 } }, // PAL 50
+    { { 1, 60 },       { 800,  0,    0,    0,    0,    0 } },
 };
 
 static const AVRational mxf_time_base[] = {
@@ -146,6 +148,7 @@
     { 1001, 60000 },
     { 1, 25 },
     { 1, 50 },
+    { 1, 60 },
     { 0, 0}
 };
 
@@ -155,7 +158,7 @@
     int idx = av_find_nearest_q_idx(time_base, mxf_time_base);
     AVRational diff = av_sub_q(time_base, mxf_time_base[idx]);
 
-    diff.num = abs(diff.num);
+    diff.num = FFABS(diff.num);
 
     if (av_cmp_q(diff, (AVRational){1, 1000}) >= 0)
         return NULL;
@@ -169,3 +172,20 @@
 
     return &mxf_spf[idx];
 }
+
+static const int mxf_content_package_rates[] = {
+    3, 2, 7, 13, 4, 10, 12,
+};
+
+int ff_mxf_get_content_package_rate(AVRational time_base)
+{
+    int idx = av_find_nearest_q_idx(time_base, mxf_time_base);
+    AVRational diff = av_sub_q(time_base, mxf_time_base[idx]);
+
+    diff.num = FFABS(diff.num);
+
+    if (av_cmp_q(diff, (AVRational){1, 1000}) >= 0)
+        return -1;
+
+    return mxf_content_package_rates[idx];
+}

diff --git a/libavformat/mxf.h b/libavformat/mxf.h
index f3db1f9..4394450 100644
--- a/libavformat/mxf.h
+++ b/libavformat/mxf.h

@@ -45,9 +45,9 @@
     SubDescriptor,
     IndexTableSegment,
     EssenceContainerData,
-    TypeBottom,// add metadata type before this
     EssenceGroup,
     TaggedValue,
+    TapeDescriptor,
 };
 
 enum MXFFrameLayout {
@@ -62,12 +62,23 @@
     UID key;
     int64_t offset;
     uint64_t length;
+    int64_t next_klv;
 } KLVPacket;
 
+typedef enum {
+    NormalWrap = 0,
+    D10D11Wrap,
+    RawAWrap,
+    RawVWrap
+} MXFWrappingIndicatorType;
+
 typedef struct MXFCodecUL {
     UID uid;
     unsigned matching_len;
     int id;
+    const char *desc;
+    unsigned wrapping_indicator_pos;
+    MXFWrappingIndicatorType wrapping_indicator_type;
 } MXFCodecUL;
 
 typedef struct {
@@ -82,6 +93,8 @@
 
 int ff_mxf_decode_pixel_layout(const char pixel_layout[16], enum AVPixelFormat *pix_fmt);
 const MXFSamplesPerFrame *ff_mxf_get_samples_per_frame(AVFormatContext *s, AVRational time_base);
+int ff_mxf_get_content_package_rate(AVRational time_base);
+
 
 #define PRIxUID                             \
     "%02x.%02x.%02x.%02x."                  \

diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
index 118e3e4..f49890e 100644
--- a/libavformat/mxfdec.c
+++ b/libavformat/mxfdec.c

@@ -52,10 +52,13 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/timecode.h"
+#include "libavutil/opt.h"
 #include "avformat.h"
 #include "internal.h"
 #include "mxf.h"
 
+#define MXF_MAX_CHUNK_SIZE (32 << 20)
+
 typedef enum {
     Header,
     BodyPartition,
@@ -76,6 +79,12 @@
     OPSONYOpt,  /* FATE sample, violates the spec in places */
 } MXFOP;
 
+typedef enum {
+    UnknownWrapped = 0,
+    FrameWrapped,
+    ClipWrapped,
+} MXFWrappingScheme;
+
 typedef struct MXFPartition {
     int closed;
     int complete;
@@ -91,6 +100,8 @@
     int64_t index_byte_count;
     int pack_length;
     int64_t pack_ofs;               ///< absolute offset of pack in file, including run-in
+    int64_t body_offset;
+    KLVPacket first_essence_klv;
 } MXFPartition;
 
 typedef struct MXFCryptoContext {
@@ -162,6 +173,10 @@
     int intra_only;
     uint64_t sample_count;
     int64_t original_duration; /* st->duration in SampleRate/EditRate units */
+    int index_sid;
+    int body_sid;
+    MXFWrappingScheme wrapping;
+    int edit_units_per_packet; /* how many edit units to read at a time (PCM, ClipWrapped) */
 } MXFTrack;
 
 typedef struct MXFDescriptor {
@@ -223,6 +238,15 @@
     int comment_count;
 } MXFPackage;
 
+typedef struct MXFEssenceContainerData {
+    UID uid;
+    enum MXFMetadataSetType type;
+    UID package_uid;
+    UID package_ul;
+    int index_sid;
+    int body_sid;
+} MXFEssenceContainerData;
+
 typedef struct MXFMetadataSet {
     UID uid;
     enum MXFMetadataSetType type;
@@ -242,11 +266,14 @@
 } MXFIndexTable;
 
 typedef struct MXFContext {
+    const AVClass *class;     /**< Class for private options. */
     MXFPartition *partitions;
     unsigned partitions_count;
     MXFOP op;
     UID *packages_refs;
     int packages_count;
+    UID *essence_container_data_refs;
+    int essence_container_data_count;
     MXFMetadataSet **metadata_sets;
     int metadata_sets_count;
     AVFormatContext *fc;
@@ -255,23 +282,16 @@
     int local_tags_count;
     uint64_t footer_partition;
     KLVPacket current_klv_data;
-    int current_klv_index;
     int run_in;
     MXFPartition *current_partition;
     int parsing_backward;
     int64_t last_forward_tell;
     int last_forward_partition;
-    int current_edit_unit;
     int nb_index_tables;
     MXFIndexTable *index_tables;
-    int edit_units_per_packet;      ///< how many edit units to read at a time (PCM, OPAtom)
+    int eia608_extract;
 } MXFContext;
 
-enum MXFWrappingScheme {
-    Frame,
-    Clip,
-};
-
 /* NOTE: klv_offset is not set (-1) for local keys */
 typedef int MXFMetadataReadFunc(void *arg, AVIOContext *pb, int tag, int size, UID uid, int64_t klv_offset);
 
@@ -289,7 +309,8 @@
 static const uint8_t mxf_essence_element_key[]             = { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01 };
 static const uint8_t mxf_avid_essence_element_key[]        = { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0e,0x04,0x03,0x01 };
 static const uint8_t mxf_canopus_essence_element_key[]     = { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x0a,0x0e,0x0f,0x03,0x01 };
-static const uint8_t mxf_system_item_key[]                 = { 0x06,0x0e,0x2b,0x34,0x02,0x05,0x01,0x01,0x0d,0x01,0x03,0x01,0x04 };
+static const uint8_t mxf_system_item_key_cp[]              = { 0x06,0x0e,0x2b,0x34,0x02,0x05,0x01,0x01,0x0d,0x01,0x03,0x01,0x04 };
+static const uint8_t mxf_system_item_key_gc[]              = { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x03,0x01,0x14 };
 static const uint8_t mxf_klv_key[]                         = { 0x06,0x0e,0x2b,0x34 };
 /* complete keys to match */
 static const uint8_t mxf_crypto_source_container_ul[]      = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x09,0x06,0x01,0x01,0x02,0x02,0x00,0x00,0x00 };
@@ -357,6 +378,8 @@
         while (bytes_num--)
             size = size << 8 | avio_r8(pb);
     }
+    if (size > INT64_MAX)
+        return AVERROR_INVALIDDATA;
     return size;
 }
 
@@ -375,27 +398,133 @@
 
 static int klv_read_packet(KLVPacket *klv, AVIOContext *pb)
 {
+    int64_t length, pos;
     if (!mxf_read_sync(pb, mxf_klv_key, 4))
         return AVERROR_INVALIDDATA;
     klv->offset = avio_tell(pb) - 4;
     memcpy(klv->key, mxf_klv_key, 4);
     avio_read(pb, klv->key + 4, 12);
-    klv->length = klv_decode_ber_length(pb);
-    return klv->length == -1 ? -1 : 0;
+    length = klv_decode_ber_length(pb);
+    if (length < 0)
+        return length;
+    klv->length = length;
+    pos = avio_tell(pb);
+    if (pos > INT64_MAX - length)
+        return AVERROR_INVALIDDATA;
+    klv->next_klv = pos + length;
+    return 0;
 }
 
-static int mxf_get_stream_index(AVFormatContext *s, KLVPacket *klv)
+static int mxf_get_stream_index(AVFormatContext *s, KLVPacket *klv, int body_sid)
 {
     int i;
 
     for (i = 0; i < s->nb_streams; i++) {
         MXFTrack *track = s->streams[i]->priv_data;
         /* SMPTE 379M 7.3 */
-        if (track && !memcmp(klv->key + sizeof(mxf_essence_element_key), track->track_number, sizeof(track->track_number)))
+        if (track && (!body_sid || !track->body_sid || track->body_sid == body_sid) && !memcmp(klv->key + sizeof(mxf_essence_element_key), track->track_number, sizeof(track->track_number)))
             return i;
     }
     /* return 0 if only one stream, for OP Atom files with 0 as track number */
-    return s->nb_streams == 1 ? 0 : -1;
+    return s->nb_streams == 1 && s->streams[0]->priv_data ? 0 : -1;
+}
+
+static int find_body_sid_by_offset(MXFContext *mxf, int64_t offset)
+{
+    // we look for partition where the offset is placed
+    int a, b, m;
+    int64_t this_partition;
+
+    a = -1;
+    b = mxf->partitions_count;
+
+    while (b - a > 1) {
+        m         = (a + b) >> 1;
+        this_partition = mxf->partitions[m].this_partition;
+        if (this_partition <= offset)
+            a = m;
+        else
+            b = m;
+    }
+
+    if (a == -1)
+        return 0;
+    return mxf->partitions[a].body_sid;
+}
+
+static int mxf_get_eia608_packet(AVFormatContext *s, AVStream *st, AVPacket *pkt, int64_t length)
+{
+    int count = avio_rb16(s->pb);
+    int cdp_identifier, cdp_length, cdp_footer_id, ccdata_id, cc_count;
+    int line_num, sample_coding, sample_count;
+    int did, sdid, data_length;
+    int i, ret;
+
+    if (count != 1)
+        av_log(s, AV_LOG_WARNING, "unsupported multiple ANC packets (%d) per KLV packet\n", count);
+
+    for (i = 0; i < count; i++) {
+        if (length < 6) {
+            av_log(s, AV_LOG_ERROR, "error reading s436m packet %"PRId64"\n", length);
+            return AVERROR_INVALIDDATA;
+        }
+        line_num = avio_rb16(s->pb);
+        avio_r8(s->pb); // wrapping type
+        sample_coding = avio_r8(s->pb);
+        sample_count = avio_rb16(s->pb);
+        length -= 6 + 8 + sample_count;
+        if (line_num != 9 && line_num != 11)
+            continue;
+        if (sample_coding == 7 || sample_coding == 8 || sample_coding == 9) {
+            av_log(s, AV_LOG_WARNING, "unsupported s436m 10 bit sample coding\n");
+            continue;
+        }
+        if (length < 0)
+            return AVERROR_INVALIDDATA;
+
+        avio_rb32(s->pb); // array count
+        avio_rb32(s->pb); // array elem size
+        did = avio_r8(s->pb);
+        sdid = avio_r8(s->pb);
+        data_length = avio_r8(s->pb);
+        if (did != 0x61 || sdid != 1) {
+            av_log(s, AV_LOG_WARNING, "unsupported did or sdid: %x %x\n", did, sdid);
+            continue;
+        }
+        cdp_identifier = avio_rb16(s->pb); // cdp id
+        if (cdp_identifier != 0x9669) {
+            av_log(s, AV_LOG_ERROR, "wrong cdp identifier %x\n", cdp_identifier);
+            return AVERROR_INVALIDDATA;
+        }
+        cdp_length = avio_r8(s->pb);
+        avio_r8(s->pb); // cdp_frame_rate
+        avio_r8(s->pb); // cdp_flags
+        avio_rb16(s->pb); // cdp_hdr_sequence_cntr
+        ccdata_id = avio_r8(s->pb); // ccdata_id
+        if (ccdata_id != 0x72) {
+            av_log(s, AV_LOG_ERROR, "wrong cdp data section %x\n", ccdata_id);
+            return AVERROR_INVALIDDATA;
+        }
+        cc_count = avio_r8(s->pb) & 0x1f;
+        ret = av_get_packet(s->pb, pkt, cc_count * 3);
+        if (ret < 0)
+            return ret;
+        if (cdp_length - 9 - 4 <  cc_count * 3) {
+            av_log(s, AV_LOG_ERROR, "wrong cdp size %d cc count %d\n", cdp_length, cc_count);
+            return AVERROR_INVALIDDATA;
+        }
+        avio_skip(s->pb, data_length - 9 - 4 - cc_count * 3);
+        cdp_footer_id = avio_r8(s->pb);
+        if (cdp_footer_id != 0x74) {
+            av_log(s, AV_LOG_ERROR, "wrong cdp footer section %x\n", cdp_footer_id);
+            return AVERROR_INVALIDDATA;
+        }
+        avio_rb16(s->pb); // cdp_ftr_sequence_cntr
+        avio_r8(s->pb); // packet_checksum
+        break;
+    }
+
+    return 0;
 }
 
 /* XXX: use AVBitStreamFilter */
@@ -439,6 +568,7 @@
     uint8_t ivec[16];
     uint8_t tmpbuf[16];
     int index;
+    int body_sid;
 
     if (!mxf->aesc && s->key && s->keylen == 16) {
         mxf->aesc = av_aes_alloc();
@@ -447,7 +577,10 @@
         av_aes_init(mxf->aesc, s->key, 128, 1);
     }
     // crypto context
-    avio_skip(pb, klv_decode_ber_length(pb));
+    size = klv_decode_ber_length(pb);
+    if (size < 0)
+        return size;
+    avio_skip(pb, size);
     // plaintext offset
     klv_decode_ber_length(pb);
     plaintext_size = avio_rb64(pb);
@@ -456,7 +589,9 @@
     avio_read(pb, klv->key, 16);
     if (!IS_KLV_KEY(klv, mxf_essence_element_key))
         return AVERROR_INVALIDDATA;
-    index = mxf_get_stream_index(s, klv);
+
+    body_sid = find_body_sid_by_offset(mxf, klv->offset);
+    index = mxf_get_stream_index(s, klv, body_sid);
     if (index < 0)
         return AVERROR_INVALIDDATA;
     // source size
@@ -524,6 +659,9 @@
     uint64_t footer_partition;
     uint32_t nb_essence_containers;
 
+    if (mxf->partitions_count >= INT_MAX / 2)
+        return AVERROR_INVALIDDATA;
+
     tmp_part = av_realloc_array(mxf->partitions, mxf->partitions_count + 1, sizeof(*mxf->partitions));
     if (!tmp_part)
         return AVERROR(ENOMEM);
@@ -572,7 +710,7 @@
     partition->header_byte_count = avio_rb64(pb);
     partition->index_byte_count = avio_rb64(pb);
     partition->index_sid = avio_rb32(pb);
-    avio_skip(pb, 8);
+    partition->body_offset = avio_rb64(pb);
     partition->body_sid = avio_rb32(pb);
     if (avio_read(pb, op, sizeof(UID)) != sizeof(UID)) {
         av_log(mxf->fc, AV_LOG_ERROR, "Failed reading UID\n");
@@ -756,6 +894,9 @@
             av_log(mxf->fc, AV_LOG_VERBOSE, "Multiple packages_refs\n");
         av_free(mxf->packages_refs);
         return mxf_read_strong_ref_array(pb, &mxf->packages_refs, &mxf->packages_count);
+    case 0x1902:
+        av_free(mxf->essence_container_data_refs);
+        return mxf_read_strong_ref_array(pb, &mxf->essence_container_data_refs, &mxf->essence_container_data_count);
     }
     return 0;
 }
@@ -892,6 +1033,25 @@
     return 0;
 }
 
+static int mxf_read_essence_container_data(void *arg, AVIOContext *pb, int tag, int size, UID uid, int64_t klv_offset)
+{
+    MXFEssenceContainerData *essence_data = arg;
+    switch(tag) {
+        case 0x2701:
+            /* linked package umid UMID */
+            avio_read(pb, essence_data->package_ul, 16);
+            avio_read(pb, essence_data->package_uid, 16);
+            break;
+        case 0x3f06:
+            essence_data->index_sid = avio_rb32(pb);
+            break;
+        case 0x3f07:
+            essence_data->body_sid = avio_rb32(pb);
+            break;
+    }
+    return 0;
+}
+
 static int mxf_read_index_entry_array(AVIOContext *pb, MXFIndexTableSegment *segment)
 {
     int i, length;
@@ -1154,13 +1314,18 @@
 
 static const MXFCodecUL mxf_picture_essence_container_uls[] = {
     // video essence container uls
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x07,0x0d,0x01,0x03,0x01,0x02,0x0c,0x01,0x00 }, 14,   AV_CODEC_ID_JPEG2000 },
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x10,0x60,0x01 }, 14,       AV_CODEC_ID_H264 }, /* H.264 frame wrapped */
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x12,0x01,0x00 }, 14,        AV_CODEC_ID_VC1 }, /* VC-1 frame wrapped */
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x04,0x60,0x01 }, 14, AV_CODEC_ID_MPEG2VIDEO }, /* MPEG-ES frame wrapped */
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x01,0x04,0x01 }, 14, AV_CODEC_ID_MPEG2VIDEO }, /* Type D-10 mapping of 40Mbps 525/60-I */
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x02,0x41,0x01 }, 14,    AV_CODEC_ID_DVVIDEO }, /* DV 625 25mbps */
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x05,0x00,0x00 }, 14,   AV_CODEC_ID_RAWVIDEO }, /* uncompressed picture */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x07,0x0d,0x01,0x03,0x01,0x02,0x0c,0x01,0x00 }, 14,   AV_CODEC_ID_JPEG2000, NULL, 14 },
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x10,0x60,0x01 }, 14,       AV_CODEC_ID_H264, NULL, 15 }, /* H.264 */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 }, 14,      AV_CODEC_ID_DNXHD, NULL, 14 }, /* VC-3 */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x12,0x01,0x00 }, 14,        AV_CODEC_ID_VC1, NULL, 14 }, /* VC-1 */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x14,0x01,0x00 }, 14,       AV_CODEC_ID_TIFF, NULL, 14 }, /* TIFF */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x15,0x01,0x00 }, 14,      AV_CODEC_ID_DIRAC, NULL, 14 }, /* VC-2 */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x1b,0x01,0x00 }, 14,       AV_CODEC_ID_CFHD, NULL, 14 }, /* VC-5 */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x1c,0x01,0x00 }, 14,     AV_CODEC_ID_PRORES, NULL, 14 }, /* ProRes */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x04,0x60,0x01 }, 14, AV_CODEC_ID_MPEG2VIDEO, NULL, 15 }, /* MPEG-ES */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x01,0x04,0x01 }, 14, AV_CODEC_ID_MPEG2VIDEO, NULL, 15, D10D11Wrap }, /* SMPTE D-10 mapping */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x02,0x41,0x01 }, 14,    AV_CODEC_ID_DVVIDEO, NULL, 15 }, /* DV 625 25mbps */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x05,0x00,0x00 }, 14,   AV_CODEC_ID_RAWVIDEO, NULL, 15, RawVWrap }, /* uncompressed picture */
     { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0a,0x0e,0x0f,0x03,0x01,0x02,0x20,0x01,0x01 }, 15,     AV_CODEC_ID_HQ_HQA },
     { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0a,0x0e,0x0f,0x03,0x01,0x02,0x20,0x02,0x01 }, 15,        AV_CODEC_ID_HQX },
     { { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0xff,0x4b,0x46,0x41,0x41,0x00,0x0d,0x4d,0x4f }, 14,   AV_CODEC_ID_RAWVIDEO }, /* Legacy ?? Uncompressed Picture */
@@ -1191,22 +1356,54 @@
 
 static const MXFCodecUL mxf_sound_essence_container_uls[] = {
     // sound essence container uls
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x06,0x01,0x00 }, 14, AV_CODEC_ID_PCM_S16LE }, /* BWF Frame wrapped */
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x04,0x40,0x01 }, 14,       AV_CODEC_ID_MP2 }, /* MPEG-ES Frame wrapped, 0x40 ??? stream id */
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x01,0x01,0x01 }, 14, AV_CODEC_ID_PCM_S16LE }, /* D-10 Mapping 50Mbps PAL Extended Template */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x06,0x01,0x00 }, 14, AV_CODEC_ID_PCM_S16LE, NULL, 14, RawAWrap }, /* BWF */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x04,0x40,0x01 }, 14,       AV_CODEC_ID_MP2, NULL, 15 }, /* MPEG-ES */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x01,0x01,0x01 }, 14, AV_CODEC_ID_PCM_S16LE, NULL, 13 }, /* D-10 Mapping 50Mbps PAL Extended Template */
     { { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0xff,0x4b,0x46,0x41,0x41,0x00,0x0d,0x4d,0x4F }, 14, AV_CODEC_ID_PCM_S16LE }, /* 0001GL00.MXF.A1.mxf_opatom.mxf */
     { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x03,0x04,0x02,0x02,0x02,0x03,0x03,0x01,0x00 }, 14,       AV_CODEC_ID_AAC }, /* MPEG-2 AAC ADTS (legacy) */
     { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },  0,      AV_CODEC_ID_NONE },
 };
 
 static const MXFCodecUL mxf_data_essence_container_uls[] = {
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x09,0x0d,0x01,0x03,0x01,0x02,0x0e,0x00,0x00 }, 16, 0 },
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x09,0x0d,0x01,0x03,0x01,0x02,0x0d,0x00,0x00 }, 16, AV_CODEC_ID_NONE,      "vbi_smpte_436M", 11 },
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x09,0x0d,0x01,0x03,0x01,0x02,0x0e,0x00,0x00 }, 16, AV_CODEC_ID_NONE, "vbi_vanc_smpte_436M", 11 },
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x09,0x0d,0x01,0x03,0x01,0x02,0x13,0x01,0x01 }, 16, AV_CODEC_ID_TTML },
     { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },  0, AV_CODEC_ID_NONE },
 };
 
-static const char * const mxf_data_essence_descriptor[] = {
-    "vbi_vanc_smpte_436M",
-};
+static MXFWrappingScheme mxf_get_wrapping_kind(UID *essence_container_ul)
+{
+    int val;
+    const MXFCodecUL *codec_ul;
+
+    codec_ul = mxf_get_codec_ul(mxf_picture_essence_container_uls, essence_container_ul);
+    if (!codec_ul->uid[0])
+        codec_ul = mxf_get_codec_ul(mxf_sound_essence_container_uls, essence_container_ul);
+    if (!codec_ul->uid[0])
+        codec_ul = mxf_get_codec_ul(mxf_data_essence_container_uls, essence_container_ul);
+    if (!codec_ul->uid[0] || !codec_ul->wrapping_indicator_pos)
+        return UnknownWrapped;
+
+    val = (*essence_container_ul)[codec_ul->wrapping_indicator_pos];
+    switch (codec_ul->wrapping_indicator_type) {
+        case RawVWrap:
+            val = val % 4;
+            break;
+        case RawAWrap:
+            if (val == 0x03 || val == 0x04)
+                val -= 0x02;
+            break;
+        case D10D11Wrap:
+            if (val == 0x02)
+                val = 0x01;
+            break;
+    }
+    if (val == 0x01)
+        return FrameWrapped;
+    if (val == 0x02)
+        return ClipWrapped;
+    return UnknownWrapped;
+}
 
 static int mxf_get_sorted_table_segments(MXFContext *mxf, int *nb_sorted_segments, MXFIndexTableSegment ***sorted_segments)
 {
@@ -1229,9 +1426,22 @@
         return AVERROR(ENOMEM);
     }
 
-    for (i = j = 0; i < mxf->metadata_sets_count; i++)
-        if (mxf->metadata_sets[i]->type == IndexTableSegment)
-            unsorted_segments[j++] = (MXFIndexTableSegment*)mxf->metadata_sets[i];
+    for (i = nb_segments = 0; i < mxf->metadata_sets_count; i++) {
+        if (mxf->metadata_sets[i]->type == IndexTableSegment) {
+            MXFIndexTableSegment *s = (MXFIndexTableSegment*)mxf->metadata_sets[i];
+            if (s->edit_unit_byte_count || s->nb_index_entries)
+                unsorted_segments[nb_segments++] = s;
+            else
+                av_log(mxf->fc, AV_LOG_WARNING, "IndexSID %i segment at %"PRId64" missing EditUnitByteCount and IndexEntryArray\n",
+                       s->index_sid, s->index_start_position);
+        }
+    }
+
+    if (!nb_segments) {
+        av_freep(sorted_segments);
+        av_free(unsorted_segments);
+        return AVERROR_INVALIDDATA;
+    }
 
     *nb_sorted_segments = 0;
 
@@ -1247,9 +1457,15 @@
              * We want the smallest values for the keys than what we currently have, unless this is the first such entry this time around.
              * If we come across an entry with the same IndexStartPosition but larger IndexDuration, then we'll prefer it over the one we currently have.
              */
-            if ((i == 0     || s->body_sid > last_body_sid || s->index_sid > last_index_sid || s->index_start_position > last_index_start) &&
-                (best == -1 || s->body_sid < best_body_sid || s->index_sid < best_index_sid || s->index_start_position < best_index_start ||
-                (s->index_start_position == best_index_start && s->index_duration > best_index_duration))) {
+            if ((i == 0 ||
+                 s->body_sid >  last_body_sid ||
+                 s->body_sid == last_body_sid && s->index_sid >  last_index_sid ||
+                 s->body_sid == last_body_sid && s->index_sid == last_index_sid && s->index_start_position > last_index_start) &&
+                (best == -1 ||
+                 s->body_sid <  best_body_sid ||
+                 s->body_sid == best_body_sid && s->index_sid <  best_index_sid ||
+                 s->body_sid == best_body_sid && s->index_sid == best_index_sid && s->index_start_position <  best_index_start ||
+                 s->body_sid == best_body_sid && s->index_sid == best_index_sid && s->index_start_position == best_index_start && s->index_duration > best_index_duration)) {
                 best             = j;
                 best_body_sid    = s->body_sid;
                 best_index_sid   = s->index_sid;
@@ -1276,28 +1492,42 @@
 /**
  * Computes the absolute file offset of the given essence container offset
  */
-static int mxf_absolute_bodysid_offset(MXFContext *mxf, int body_sid, int64_t offset, int64_t *offset_out)
+static int mxf_absolute_bodysid_offset(MXFContext *mxf, int body_sid, int64_t offset, int64_t *offset_out, MXFPartition **partition_out)
 {
-    int x;
-    int64_t offset_in = offset;     /* for logging */
+    MXFPartition *last_p = NULL;
+    int a, b, m, m0;
 
-    for (x = 0; x < mxf->partitions_count; x++) {
-        MXFPartition *p = &mxf->partitions[x];
+    if (offset < 0)
+        return AVERROR(EINVAL);
 
-        if (p->body_sid != body_sid)
-            continue;
+    a = -1;
+    b = mxf->partitions_count;
 
-        if (offset < p->essence_length || !p->essence_length) {
-            *offset_out = p->essence_offset + offset;
-            return 0;
-        }
+    while (b - a > 1) {
+        m0 = m = (a + b) >> 1;
 
-        offset -= p->essence_length;
+        while (m < b && mxf->partitions[m].body_sid != body_sid)
+            m++;
+
+        if (m < b && mxf->partitions[m].body_offset <= offset)
+            a = m;
+        else
+            b = m0;
+    }
+
+    if (a >= 0)
+        last_p = &mxf->partitions[a];
+
+    if (last_p && (!last_p->essence_length || last_p->essence_length > (offset - last_p->body_offset))) {
+        *offset_out = last_p->essence_offset + (offset - last_p->body_offset);
+        if (partition_out)
+            *partition_out = last_p;
+        return 0;
     }
 
     av_log(mxf->fc, AV_LOG_ERROR,
            "failed to find absolute offset of %"PRIX64" in BodySID %i - partial file?\n",
-           offset_in, body_sid);
+           offset, body_sid);
 
     return AVERROR_INVALIDDATA;
 }
@@ -1326,11 +1556,13 @@
 }
 
 /* EditUnit -> absolute offset */
-static int mxf_edit_unit_absolute_offset(MXFContext *mxf, MXFIndexTable *index_table, int64_t edit_unit, int64_t *edit_unit_out, int64_t *offset_out, int nag)
+static int mxf_edit_unit_absolute_offset(MXFContext *mxf, MXFIndexTable *index_table, int64_t edit_unit, AVRational edit_rate, int64_t *edit_unit_out, int64_t *offset_out, MXFPartition **partition_out, int nag)
 {
     int i;
     int64_t offset_temp = 0;
 
+    edit_unit = av_rescale_q(edit_unit, index_table->segments[0]->index_edit_rate, edit_rate);
+
     for (i = 0; i < index_table->nb_segments; i++) {
         MXFIndexTableSegment *s = index_table->segments[i];
 
@@ -1341,7 +1573,7 @@
 
             if (s->edit_unit_byte_count)
                 offset_temp += s->edit_unit_byte_count * index;
-            else if (s->nb_index_entries) {
+            else {
                 if (s->nb_index_entries == 2 * s->index_duration + 1)
                     index *= 2;     /* Avid index */
 
@@ -1352,16 +1584,12 @@
                 }
 
                 offset_temp = s->stream_offset_entries[index];
-            } else {
-                av_log(mxf->fc, AV_LOG_ERROR, "IndexSID %i segment at %"PRId64" missing EditUnitByteCount and IndexEntryArray\n",
-                       index_table->index_sid, s->index_start_position);
-                return AVERROR_INVALIDDATA;
             }
 
             if (edit_unit_out)
-                *edit_unit_out = edit_unit;
+                *edit_unit_out = av_rescale_q(edit_unit, edit_rate, s->index_edit_rate);
 
-            return mxf_absolute_bodysid_offset(mxf, index_table->body_sid, offset_temp, offset_out);
+            return mxf_absolute_bodysid_offset(mxf, index_table->body_sid, offset_temp, offset_out, partition_out);
         } else {
             /* EditUnitByteCount == 0 for VBR indexes, which is fine since they use explicit StreamOffsets */
             offset_temp += s->edit_unit_byte_count * s->index_duration;
@@ -1389,6 +1617,12 @@
             return 0;                               /* no TemporalOffsets */
         }
 
+        if (s->index_duration > INT_MAX - index_table->nb_ptses) {
+            index_table->nb_ptses = 0;
+            av_log(mxf->fc, AV_LOG_ERROR, "ignoring IndexSID %d, duration is too large\n", s->index_sid);
+            return 0;
+        }
+
         index_table->nb_ptses += s->index_duration;
     }
 
@@ -1496,14 +1730,6 @@
 {
     int i, j, k, ret, nb_sorted_segments;
     MXFIndexTableSegment **sorted_segments = NULL;
-    AVStream *st = NULL;
-
-    for (i = 0; i < mxf->fc->nb_streams; i++) {
-        if (mxf->fc->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_DATA)
-            continue;
-        st = mxf->fc->streams[i];
-        break;
-    }
 
     if ((ret = mxf_get_sorted_table_segments(mxf, &nb_sorted_segments, &sorted_segments)) ||
         nb_sorted_segments <= 0) {
@@ -1542,6 +1768,7 @@
 
     for (i = j = 0; j < mxf->nb_index_tables; i += mxf->index_tables[j++].nb_segments) {
         MXFIndexTable *t = &mxf->index_tables[j];
+        MXFTrack *mxf_track = NULL;
 
         t->segments = av_mallocz_array(t->nb_segments,
                                        sizeof(*t->segments));
@@ -1564,8 +1791,23 @@
         if ((ret = mxf_compute_ptses_fake_index(mxf, t)) < 0)
             goto finish_decoding_index;
 
+        for (k = 0; k < mxf->fc->nb_streams; k++) {
+            MXFTrack *track = mxf->fc->streams[k]->priv_data;
+            if (track && track->index_sid == t->index_sid) {
+                mxf_track = track;
+                break;
+            }
+        }
+
         /* fix zero IndexDurations */
         for (k = 0; k < t->nb_segments; k++) {
+            if (!t->segments[k]->index_edit_rate.num || !t->segments[k]->index_edit_rate.den) {
+                av_log(mxf->fc, AV_LOG_WARNING, "IndexSID %i segment %i has invalid IndexEditRate\n",
+                       t->index_sid, k);
+                if (mxf_track)
+                    t->segments[k]->index_edit_rate = mxf_track->edit_rate;
+            }
+
             if (t->segments[k]->index_duration)
                 continue;
 
@@ -1573,7 +1815,7 @@
                 av_log(mxf->fc, AV_LOG_WARNING, "IndexSID %i segment %i has zero IndexDuration and there's more than one segment\n",
                        t->index_sid, k);
 
-            if (!st) {
+            if (!mxf_track) {
                 av_log(mxf->fc, AV_LOG_WARNING, "no streams?\n");
                 break;
             }
@@ -1581,7 +1823,7 @@
             /* assume the first stream's duration is reasonable
              * leave index_duration = 0 on further segments in case we have any (unlikely)
              */
-            t->segments[k]->index_duration = st->duration;
+            t->segments[k]->index_duration = mxf_track->original_duration;
             break;
         }
     }
@@ -1680,7 +1922,7 @@
     return NULL;
 }
 
-static MXFPackage* mxf_resolve_source_package(MXFContext *mxf, UID package_uid)
+static MXFPackage* mxf_resolve_source_package(MXFContext *mxf, UID package_ul, UID package_uid)
 {
     MXFPackage *package = NULL;
     int i;
@@ -1690,7 +1932,7 @@
         if (!package)
             continue;
 
-        if (!memcmp(package->package_uid, package_uid, 16))
+        if (!memcmp(package->package_ul, package_ul, 16) && !memcmp(package->package_uid, package_uid, 16))
             return package;
     }
     return NULL;
@@ -1739,7 +1981,7 @@
         if (!component)
             continue;
 
-        if (!(package = mxf_resolve_source_package(mxf, component->source_package_uid)))
+        if (!(package = mxf_resolve_source_package(mxf, component->source_package_ul, component->source_package_uid)))
             continue;
 
         descriptor = mxf_resolve_strong_ref(mxf, &package->descriptor_ref, Descriptor);
@@ -1805,7 +2047,7 @@
         if (!sourceclip)
             continue;
 
-        if (!(physical_package = mxf_resolve_source_package(mxf, sourceclip->source_package_uid)))
+        if (!(physical_package = mxf_resolve_source_package(mxf, sourceclip->source_package_ul, sourceclip->source_package_uid)))
             break;
 
         mxf_add_umid_metadata(&st->metadata, "reel_umid", physical_package);
@@ -1975,7 +2217,7 @@
             if (!component)
                 continue;
 
-            source_package = mxf_resolve_source_package(mxf, component->source_package_uid);
+            source_package = mxf_resolve_source_package(mxf, component->source_package_ul, component->source_package_uid);
             if (!source_package) {
                 av_log(mxf->fc, AV_LOG_TRACE, "material track %d: no corresponding source package found\n", material_track->track_id);
                 continue;
@@ -1995,6 +2237,21 @@
                 av_log(mxf->fc, AV_LOG_ERROR, "material track %d: no corresponding source track found\n", material_track->track_id);
                 break;
             }
+
+            for (k = 0; k < mxf->essence_container_data_count; k++) {
+                MXFEssenceContainerData *essence_data;
+
+                if (!(essence_data = mxf_resolve_strong_ref(mxf, &mxf->essence_container_data_refs[k], EssenceContainerData))) {
+                    av_log(mxf->fc, AV_LOG_TRACE, "could not resolve essence container data strong ref\n");
+                    continue;
+                }
+                if (!memcmp(component->source_package_ul, essence_data->package_ul, sizeof(UID)) && !memcmp(component->source_package_uid, essence_data->package_uid, sizeof(UID))) {
+                    source_track->body_sid = essence_data->body_sid;
+                    source_track->index_sid = essence_data->index_sid;
+                    break;
+                }
+            }
+
             if(source_track && component)
                 break;
         }
@@ -2065,6 +2322,9 @@
         PRINT_KEY(mxf->fc, "essence codec     ul", descriptor->essence_codec_ul);
         PRINT_KEY(mxf->fc, "essence container ul", descriptor->essence_container_ul);
         essence_container_ul = &descriptor->essence_container_ul;
+        source_track->wrapping = (mxf->op == OPAtom) ? ClipWrapped : mxf_get_wrapping_kind(essence_container_ul);
+        if (source_track->wrapping == UnknownWrapped)
+            av_log(mxf->fc, AV_LOG_INFO, "wrapping of stream %d is unknown\n", st->index);
         /* HACK: replacing the original key with mxf_encrypted_essence_container
          * is not allowed according to s429-6, try to find correct information anyway */
         if (IS_KLV_KEY(essence_container_ul, mxf_encrypted_essence_container)) {
@@ -2208,7 +2468,6 @@
             if (st->codecpar->codec_id == AV_CODEC_ID_NONE || (st->codecpar->codec_id == AV_CODEC_ID_PCM_ALAW && (enum AVCodecID)container_ul->id != AV_CODEC_ID_NONE))
                 st->codecpar->codec_id = (enum AVCodecID)container_ul->id;
             st->codecpar->channels = descriptor->channels;
-            st->codecpar->bits_per_coded_sample = descriptor->bits_per_sample;
 
             if (descriptor->sample_rate.den > 0) {
                 st->codecpar->sample_rate = descriptor->sample_rate.num / descriptor->sample_rate.den;
@@ -2241,13 +2500,21 @@
             } else if (st->codecpar->codec_id == AV_CODEC_ID_MP2) {
                 st->need_parsing = AVSTREAM_PARSE_FULL;
             }
+            st->codecpar->bits_per_coded_sample = av_get_bits_per_sample(st->codecpar->codec_id);
         } else if (st->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
-            int codec_id = mxf_get_codec_ul(mxf_data_essence_container_uls,
-                                            essence_container_ul)->id;
-            if (codec_id >= 0 &&
-                codec_id < FF_ARRAY_ELEMS(mxf_data_essence_descriptor)) {
-                av_dict_set(&st->metadata, "data_type",
-                            mxf_data_essence_descriptor[codec_id], 0);
+            enum AVMediaType type;
+            container_ul = mxf_get_codec_ul(mxf_data_essence_container_uls, essence_container_ul);
+            if (st->codecpar->codec_id == AV_CODEC_ID_NONE)
+                st->codecpar->codec_id = container_ul->id;
+            type = avcodec_get_type(st->codecpar->codec_id);
+            if (type == AVMEDIA_TYPE_SUBTITLE)
+                st->codecpar->codec_type = type;
+            if (container_ul->desc)
+                av_dict_set(&st->metadata, "data_type", container_ul->desc, 0);
+            if (mxf->eia608_extract &&
+                !strcmp(container_ul->desc, "vbi_vanc_smpte_436M")) {
+                st->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE;
+                st->codecpar->codec_id = AV_CODEC_ID_EIA_608;
             }
         }
         if (descriptor->extradata) {
@@ -2263,7 +2530,7 @@
             if (ret < 0)
                 return ret;
         }
-        if (st->codecpar->codec_type != AVMEDIA_TYPE_DATA && (*essence_container_ul)[15] > 0x01) {
+        if (st->codecpar->codec_type != AVMEDIA_TYPE_DATA && source_track->wrapping != FrameWrapped) {
             /* TODO: decode timestamps */
             st->need_parsing = AVSTREAM_PARSE_TIMESTAMPS;
         }
@@ -2393,14 +2660,17 @@
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x48,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* Wave */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x47,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* AES3 */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x51,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* MPEG2VideoDescriptor */
+    { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x5b,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* VBI - SMPTE 436M */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x5c,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* VANC/VBI - SMPTE 436M */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x5e,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* MPEG2AudioDescriptor */
+    { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x64,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* DC Timed Text Descriptor */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x3A,0x00 }, mxf_read_track, sizeof(MXFTrack), Track }, /* Static Track */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x3B,0x00 }, mxf_read_track, sizeof(MXFTrack), Track }, /* Generic Track */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x14,0x00 }, mxf_read_timecode_component, sizeof(MXFTimecodeComponent), TimecodeComponent },
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x0c,0x00 }, mxf_read_pulldown_component, sizeof(MXFPulldownComponent), PulldownComponent },
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x04,0x01,0x02,0x02,0x00,0x00 }, mxf_read_cryptographic_context, sizeof(MXFCryptoContext), CryptoContext },
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x02,0x01,0x01,0x10,0x01,0x00 }, mxf_read_index_table_segment, sizeof(MXFIndexTableSegment), IndexTableSegment },
+    { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x23,0x00 }, mxf_read_essence_container_data, sizeof(MXFEssenceContainerData), EssenceContainerData },
     { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 }, NULL, 0, AnyType },
 };
 
@@ -2453,7 +2723,8 @@
         if (ctx_size && tag == 0x3C0A) {
             avio_read(pb, ctx->uid, 16);
         } else if ((ret = read_child(ctx, pb, tag, size, uid, -1)) < 0) {
-            mxf_free_metadataset(&ctx, !!ctx_size);
+            if (ctx_size)
+                mxf_free_metadataset(&ctx, 1);
             return ret;
         }
 
@@ -2462,7 +2733,7 @@
         if (avio_tell(pb) > klv_end) {
             if (ctx_size) {
                 ctx->type = type;
-                mxf_free_metadataset(&ctx, !!ctx_size);
+                mxf_free_metadataset(&ctx, 1);
             }
 
             av_log(mxf->fc, AV_LOG_ERROR,
@@ -2624,65 +2895,70 @@
     return mxf->parsing_backward ? mxf_seek_to_previous_partition(mxf) : 1;
 }
 
+static MXFWrappingScheme mxf_get_wrapping_by_body_sid(AVFormatContext *s, int body_sid)
+{
+    for (int i = 0; i < s->nb_streams; i++) {
+        MXFTrack *track = s->streams[i]->priv_data;
+        if (track && track->body_sid == body_sid && track->wrapping != UnknownWrapped)
+            return track->wrapping;
+    }
+    return UnknownWrapped;
+}
+
 /**
  * Figures out the proper offset and length of the essence container in each partition
  */
-static void mxf_compute_essence_containers(MXFContext *mxf)
+static void mxf_compute_essence_containers(AVFormatContext *s)
 {
+    MXFContext *mxf = s->priv_data;
     int x;
 
-    /* everything is already correct */
-    if (mxf->op == OPAtom)
-        return;
-
     for (x = 0; x < mxf->partitions_count; x++) {
         MXFPartition *p = &mxf->partitions[x];
+        MXFWrappingScheme wrapping;
 
         if (!p->body_sid)
             continue;       /* BodySID == 0 -> no essence */
 
-        if (x >= mxf->partitions_count - 1)
-            break;          /* FooterPartition - can't compute length (and we don't need to) */
+        /* for clip wrapped essences we point essence_offset after the KL (usually klv.offset + 20 or 25)
+         * otherwise we point essence_offset at the key of the first essence KLV.
+         */
 
-        /* essence container spans to the next partition */
-        p->essence_length = mxf->partitions[x+1].this_partition - p->essence_offset;
+        wrapping = (mxf->op == OPAtom) ? ClipWrapped : mxf_get_wrapping_by_body_sid(s, p->body_sid);
 
-        if (p->essence_length < 0) {
-            /* next ThisPartition < essence_offset */
-            p->essence_length = 0;
-            av_log(mxf->fc, AV_LOG_ERROR,
-                   "partition %i: bad ThisPartition = %"PRIX64"\n",
-                   x+1, mxf->partitions[x+1].this_partition);
+        if (wrapping == ClipWrapped) {
+            p->essence_offset = p->first_essence_klv.next_klv - p->first_essence_klv.length;
+            p->essence_length = p->first_essence_klv.length;
+        } else {
+            p->essence_offset = p->first_essence_klv.offset;
+
+            /* essence container spans to the next partition */
+            if (x < mxf->partitions_count - 1)
+                p->essence_length = mxf->partitions[x+1].this_partition - p->essence_offset;
+
+            if (p->essence_length < 0) {
+                /* next ThisPartition < essence_offset */
+                p->essence_length = 0;
+                av_log(mxf->fc, AV_LOG_ERROR,
+                       "partition %i: bad ThisPartition = %"PRIX64"\n",
+                       x+1, mxf->partitions[x+1].this_partition);
+            }
         }
     }
 }
 
-static int64_t round_to_kag(int64_t position, int kag_size)
-{
-    /* TODO: account for run-in? the spec isn't clear whether KAG should account for it */
-    /* NOTE: kag_size may be any integer between 1 - 2^10 */
-    int64_t ret = (position / kag_size) * kag_size;
-    return ret == position ? ret : ret + kag_size;
-}
-
 static int is_pcm(enum AVCodecID codec_id)
 {
     /* we only care about "normal" PCM codecs until we get samples */
     return codec_id >= AV_CODEC_ID_PCM_S16LE && codec_id < AV_CODEC_ID_PCM_S24DAUD;
 }
 
-static AVStream* mxf_get_opatom_stream(MXFContext *mxf)
+static MXFIndexTable *mxf_find_index_table(MXFContext *mxf, int index_sid)
 {
     int i;
-
-    if (mxf->op != OPAtom)
-        return NULL;
-
-    for (i = 0; i < mxf->fc->nb_streams; i++) {
-        if (mxf->fc->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_DATA)
-            continue;
-        return mxf->fc->streams[i];
-    }
+    for (i = 0; i < mxf->nb_index_tables; i++)
+        if (mxf->index_tables[i].index_sid == index_sid)
+            return &mxf->index_tables[i];
     return NULL;
 }
 
@@ -2691,61 +2967,62 @@
  * very small (2, 4..). In those cases we should read more than one
  * sample per call to mxf_read_packet().
  */
-static void mxf_handle_small_eubc(AVFormatContext *s)
+static void mxf_compute_edit_units_per_packet(MXFContext *mxf, AVStream *st)
 {
-    MXFContext *mxf = s->priv_data;
+    MXFTrack *track = st->priv_data;
+    MXFIndexTable *t;
 
-    /* assuming non-OPAtom == frame wrapped
-     * no sane writer would wrap 2 byte PCM packets with 20 byte headers.. */
-    AVStream *st = mxf_get_opatom_stream(mxf);
-    if (!st)
+    if (!track)
         return;
+    track->edit_units_per_packet = 1;
+    if (track->wrapping != ClipWrapped)
+        return;
+
+    t = mxf_find_index_table(mxf, track->index_sid);
 
     /* expect PCM with exactly one index table segment and a small (< 32) EUBC */
     if (st->codecpar->codec_type != AVMEDIA_TYPE_AUDIO         ||
         !is_pcm(st->codecpar->codec_id)                        ||
-        mxf->nb_index_tables != 1                              ||
-        mxf->index_tables[0].nb_segments != 1                  ||
-        mxf->index_tables[0].segments[0]->edit_unit_byte_count >= 32)
+        !t                                                     ||
+        t->nb_segments != 1                                    ||
+        t->segments[0]->edit_unit_byte_count >= 32)
         return;
 
     /* arbitrarily default to 48 kHz PAL audio frame size */
     /* TODO: We could compute this from the ratio between the audio
      *       and video edit rates for 48 kHz NTSC we could use the
      *       1802-1802-1802-1802-1801 pattern. */
-    mxf->edit_units_per_packet = 1920;
+    track->edit_units_per_packet = FFMAX(1, track->edit_rate.num / track->edit_rate.den / 25);
 }
 
 /**
- * Deal with the case where OPAtom files does not have any IndexTableSegments.
+ * Deal with the case where ClipWrapped essences does not have any IndexTableSegments.
  */
-static int mxf_handle_missing_index_segment(MXFContext *mxf)
+static int mxf_handle_missing_index_segment(MXFContext *mxf, AVStream *st)
 {
-    AVFormatContext *s = mxf->fc;
-    AVStream *st = NULL;
+    MXFTrack *track = st->priv_data;
     MXFIndexTableSegment *segment = NULL;
     MXFPartition *p = NULL;
     int essence_partition_count = 0;
+    int edit_unit_byte_count = 0;
     int i, ret;
 
-    st = mxf_get_opatom_stream(mxf);
-    if (!st)
+    if (!track || track->wrapping != ClipWrapped)
         return 0;
 
-    /* TODO: support raw video without an index if they exist */
-    if (st->codecpar->codec_type != AVMEDIA_TYPE_AUDIO || !is_pcm(st->codecpar->codec_id))
-        return 0;
-
-    /* check if file already has a IndexTableSegment */
+    /* check if track already has an IndexTableSegment */
     for (i = 0; i < mxf->metadata_sets_count; i++) {
-        if (mxf->metadata_sets[i]->type == IndexTableSegment)
-            return 0;
+        if (mxf->metadata_sets[i]->type == IndexTableSegment) {
+            MXFIndexTableSegment *s = (MXFIndexTableSegment*)mxf->metadata_sets[i];
+            if (s->body_sid == track->body_sid)
+                return 0;
+        }
     }
 
     /* find the essence partition */
     for (i = 0; i < mxf->partitions_count; i++) {
         /* BodySID == 0 -> no essence */
-        if (!mxf->partitions[i].body_sid)
+        if (mxf->partitions[i].body_sid != track->body_sid)
             continue;
 
         p = &mxf->partitions[i];
@@ -2756,6 +3033,17 @@
     if (essence_partition_count != 1)
         return 0;
 
+    if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && is_pcm(st->codecpar->codec_id)) {
+        edit_unit_byte_count = (av_get_bits_per_sample(st->codecpar->codec_id) * st->codecpar->channels) >> 3;
+    } else if (st->duration > 0 && p->first_essence_klv.length > 0 && p->first_essence_klv.length % st->duration == 0) {
+        edit_unit_byte_count = p->first_essence_klv.length / st->duration;
+    }
+
+    if (edit_unit_byte_count <= 0)
+        return 0;
+
+    av_log(mxf->fc, AV_LOG_WARNING, "guessing index for stream %d using edit unit byte count %d\n", st->index, edit_unit_byte_count);
+
     if (!(segment = av_mallocz(sizeof(*segment))))
         return AVERROR(ENOMEM);
 
@@ -2764,12 +3052,18 @@
         return ret;
     }
 
+    /* Make sure we have nonzero unique index_sid, body_sid will be ok, because
+     * using the same SID for index is forbidden in MXF. */
+    if (!track->index_sid)
+        track->index_sid = track->body_sid;
+
     segment->type = IndexTableSegment;
     /* stream will be treated as small EditUnitByteCount */
-    segment->edit_unit_byte_count = (av_get_bits_per_sample(st->codecpar->codec_id) * st->codecpar->channels) >> 3;
+    segment->edit_unit_byte_count = edit_unit_byte_count;
     segment->index_start_position = 0;
-    segment->index_duration = s->streams[0]->duration;
-    segment->index_sid = p->index_sid;
+    segment->index_duration = st->duration;
+    segment->index_edit_rate = av_inv_q(st->time_base);
+    segment->index_sid = track->index_sid;
     segment->body_sid = p->body_sid;
     return 0;
 }
@@ -2833,7 +3127,6 @@
     int ret;
 
     mxf->last_forward_tell = INT64_MAX;
-    mxf->edit_units_per_packet = 1;
 
     if (!mxf_read_sync(s->pb, mxf_header_partition_pack_key, 14)) {
         av_log(s, AV_LOG_ERROR, "could not find header partition pack key\n");
@@ -2860,37 +3153,18 @@
         av_log(s, AV_LOG_TRACE, "size %"PRIu64" offset %#"PRIx64"\n", klv.length, klv.offset);
         if (IS_KLV_KEY(klv.key, mxf_encrypted_triplet_key) ||
             IS_KLV_KEY(klv.key, mxf_essence_element_key) ||
+            IS_KLV_KEY(klv.key, mxf_canopus_essence_element_key) ||
             IS_KLV_KEY(klv.key, mxf_avid_essence_element_key) ||
-            IS_KLV_KEY(klv.key, mxf_system_item_key)) {
+            IS_KLV_KEY(klv.key, mxf_system_item_key_cp) ||
+            IS_KLV_KEY(klv.key, mxf_system_item_key_gc)) {
 
             if (!mxf->current_partition) {
                 av_log(mxf->fc, AV_LOG_ERROR, "found essence prior to first PartitionPack\n");
                 return AVERROR_INVALIDDATA;
             }
 
-            if (!mxf->current_partition->essence_offset) {
-                /* for OP1a we compute essence_offset
-                 * for OPAtom we point essence_offset after the KL (usually op1a_essence_offset + 20 or 25)
-                 * TODO: for OP1a we could eliminate this entire if statement, always stopping parsing at op1a_essence_offset
-                 *       for OPAtom we still need the actual essence_offset though (the KL's length can vary)
-                 */
-                int64_t op1a_essence_offset =
-                    round_to_kag(mxf->current_partition->this_partition +
-                                 mxf->current_partition->pack_length,       mxf->current_partition->kag_size) +
-                    round_to_kag(mxf->current_partition->header_byte_count, mxf->current_partition->kag_size) +
-                    round_to_kag(mxf->current_partition->index_byte_count,  mxf->current_partition->kag_size);
-
-                if (mxf->op == OPAtom) {
-                    /* point essence_offset to the actual data
-                    * OPAtom has all the essence in one big KLV
-                    */
-                    mxf->current_partition->essence_offset = avio_tell(s->pb);
-                    mxf->current_partition->essence_length = klv.length;
-                } else {
-                    /* NOTE: op1a_essence_offset may be less than to klv.offset (C0023S01.mxf)  */
-                    mxf->current_partition->essence_offset = op1a_essence_offset;
-                }
-            }
+            if (!mxf->current_partition->first_essence_klv.offset)
+                mxf->current_partition->first_essence_klv = klv;
 
             if (!essence_offset)
                 essence_offset = klv.offset;
@@ -2929,14 +3203,14 @@
     }
     avio_seek(s->pb, essence_offset, SEEK_SET);
 
-    mxf_compute_essence_containers(mxf);
-
     /* we need to do this before computing the index tables
      * to be able to fill in zero IndexDurations with st->duration */
     if ((ret = mxf_parse_structural_metadata(mxf)) < 0)
         goto fail;
 
-    mxf_handle_missing_index_segment(mxf);
+    for (int i = 0; i < s->nb_streams; i++)
+        mxf_handle_missing_index_segment(mxf, s->streams[i]);
+
     if ((ret = mxf_compute_index_tables(mxf)) < 0)
         goto fail;
 
@@ -2944,13 +3218,16 @@
         /* TODO: look up which IndexSID to use via EssenceContainerData */
         av_log(mxf->fc, AV_LOG_INFO, "got %i index tables - only the first one (IndexSID %i) will be used\n",
                mxf->nb_index_tables, mxf->index_tables[0].index_sid);
-    } else if (mxf->nb_index_tables == 0 && mxf->op == OPAtom) {
+    } else if (mxf->nb_index_tables == 0 && mxf->op == OPAtom && (s->error_recognition & AV_EF_EXPLODE)) {
         av_log(mxf->fc, AV_LOG_ERROR, "cannot demux OPAtom without an index\n");
         ret = AVERROR_INVALIDDATA;
         goto fail;
     }
 
-    mxf_handle_small_eubc(s);
+    mxf_compute_essence_containers(s);
+
+    for (int i = 0; i < s->nb_streams; i++)
+        mxf_compute_edit_units_per_packet(mxf, s->streams[i]);
 
     return 0;
 fail:
@@ -2959,71 +3236,59 @@
     return ret;
 }
 
-/**
- * Sets mxf->current_edit_unit based on what offset we're currently at.
- * @return next_ofs if OK, <0 on error
- */
-static int64_t mxf_set_current_edit_unit(MXFContext *mxf, int64_t current_offset)
+/* Get the edit unit of the next packet from current_offset in a track. The returned edit unit can be original_duration as well! */
+static int mxf_get_next_track_edit_unit(MXFContext *mxf, MXFTrack *track, int64_t current_offset, int64_t *edit_unit_out)
 {
-    int64_t last_ofs = -1, next_ofs = -1;
-    MXFIndexTable *t = &mxf->index_tables[0];
+    int64_t a, b, m, offset;
+    MXFIndexTable *t = mxf_find_index_table(mxf, track->index_sid);
 
-    /* this is called from the OP1a demuxing logic, which means there
-     * may be no index tables */
-    if (mxf->nb_index_tables <= 0)
+    if (!t || track->original_duration <= 0)
         return -1;
 
-    /* find mxf->current_edit_unit so that the next edit unit starts ahead of current_offset */
-    while (mxf->current_edit_unit >= 0) {
-        if (mxf_edit_unit_absolute_offset(mxf, t, mxf->current_edit_unit + 1, NULL, &next_ofs, 0) < 0)
+    a = -1;
+    b = track->original_duration;
+
+    while (b - a > 1) {
+        m = (a + b) >> 1;
+        if (mxf_edit_unit_absolute_offset(mxf, t, m, track->edit_rate, NULL, &offset, NULL, 0) < 0)
             return -1;
-
-        if (next_ofs <= last_ofs) {
-            /* large next_ofs didn't change or current_edit_unit wrapped
-             * around this fixes the infinite loop on zzuf3.mxf */
-            av_log(mxf->fc, AV_LOG_ERROR,
-                   "next_ofs didn't change. not deriving packet timestamps\n");
-            return -1;
-        }
-
-        if (next_ofs > current_offset)
-            break;
-
-        last_ofs = next_ofs;
-        mxf->current_edit_unit++;
+        if (offset < current_offset)
+            a = m;
+        else
+            b = m;
     }
 
-    /* not checking mxf->current_edit_unit >= t->nb_ptses here since CBR files may lack IndexEntryArrays */
-    if (mxf->current_edit_unit < 0)
-        return -1;
+    *edit_unit_out = b;
 
-    return next_ofs;
+    return 0;
 }
 
-static int mxf_compute_sample_count(MXFContext *mxf, int stream_index,
-                                    uint64_t *sample_count)
+static int64_t mxf_compute_sample_count(MXFContext *mxf, AVStream *st,
+                                        int64_t edit_unit)
 {
     int i, total = 0, size = 0;
-    AVStream *st = mxf->fc->streams[stream_index];
     MXFTrack *track = st->priv_data;
     AVRational time_base = av_inv_q(track->edit_rate);
     AVRational sample_rate = av_inv_q(st->time_base);
     const MXFSamplesPerFrame *spf = NULL;
+    int64_t sample_count;
+
+    // For non-audio sample_count equals current edit unit
+    if (st->codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
+        return edit_unit;
 
     if ((sample_rate.num / sample_rate.den) == 48000)
         spf = ff_mxf_get_samples_per_frame(mxf->fc, time_base);
     if (!spf) {
         int remainder = (sample_rate.num * time_base.num) %
                         (time_base.den * sample_rate.den);
-        *sample_count = av_q2d(av_mul_q((AVRational){mxf->current_edit_unit, 1},
-                                        av_mul_q(sample_rate, time_base)));
         if (remainder)
             av_log(mxf->fc, AV_LOG_WARNING,
                    "seeking detected on stream #%d with time base (%d/%d) and "
                    "sample rate (%d/%d), audio pts won't be accurate.\n",
-                   stream_index, time_base.num, time_base.den,
+                   st->index, time_base.num, time_base.den,
                    sample_rate.num, sample_rate.den);
-        return 0;
+        return av_rescale_q(edit_unit, sample_rate, track->edit_rate);
     }
 
     while (spf->samples_per_frame[size]) {
@@ -3033,18 +3298,62 @@
 
     av_assert2(size);
 
-    *sample_count = (mxf->current_edit_unit / size) * (uint64_t)total;
-    for (i = 0; i < mxf->current_edit_unit % size; i++) {
-        *sample_count += spf->samples_per_frame[i];
+    sample_count = (edit_unit / size) * (uint64_t)total;
+    for (i = 0; i < edit_unit % size; i++) {
+        sample_count += spf->samples_per_frame[i];
     }
 
-    return 0;
+    return sample_count;
+}
+
+/**
+ * Make sure track->sample_count is correct based on what offset we're currently at.
+ * Also determine the next edit unit (or packet) offset.
+ * @return next_ofs if OK, <0 on error
+ */
+static int64_t mxf_set_current_edit_unit(MXFContext *mxf, AVStream *st, int64_t current_offset, int resync)
+{
+    int64_t next_ofs = -1;
+    MXFTrack *track = st->priv_data;
+    int64_t edit_unit = av_rescale_q(track->sample_count, st->time_base, av_inv_q(track->edit_rate));
+    int64_t new_edit_unit;
+    MXFIndexTable *t = mxf_find_index_table(mxf, track->index_sid);
+
+    if (!t || track->wrapping == UnknownWrapped)
+        return -1;
+
+    if (mxf_edit_unit_absolute_offset(mxf, t, edit_unit + track->edit_units_per_packet, track->edit_rate, NULL, &next_ofs, NULL, 0) < 0 &&
+        (next_ofs = mxf_essence_container_end(mxf, t->body_sid)) <= 0) {
+        av_log(mxf->fc, AV_LOG_ERROR, "unable to compute the size of the last packet\n");
+        return -1;
+    }
+
+    /* check if the next edit unit offset (next_ofs) starts ahead of current_offset */
+    if (next_ofs > current_offset)
+        return next_ofs;
+
+    if (!resync) {
+        av_log(mxf->fc, AV_LOG_ERROR, "cannot find current edit unit for stream %d, invalid index?\n", st->index);
+        return -1;
+    }
+
+    if (mxf_get_next_track_edit_unit(mxf, track, current_offset + 1, &new_edit_unit) < 0 || new_edit_unit <= 0) {
+        av_log(mxf->fc, AV_LOG_ERROR, "failed to find next track edit unit in stream %d\n", st->index);
+        return -1;
+    }
+
+    new_edit_unit--;
+    track->sample_count = mxf_compute_sample_count(mxf, st, new_edit_unit);
+    av_log(mxf->fc, AV_LOG_WARNING, "edit unit sync lost on stream %d, jumping from %"PRId64" to %"PRId64"\n", st->index, edit_unit, new_edit_unit);
+
+    return mxf_set_current_edit_unit(mxf, st, current_offset, 0);
 }
 
 static int mxf_set_audio_pts(MXFContext *mxf, AVCodecParameters *par,
                              AVPacket *pkt)
 {
-    MXFTrack *track = mxf->fc->streams[pkt->stream_index]->priv_data;
+    AVStream *st = mxf->fc->streams[pkt->stream_index];
+    MXFTrack *track = st->priv_data;
     int64_t bits_per_sample = par->bits_per_coded_sample;
 
     if (!bits_per_sample)
@@ -3055,60 +3364,82 @@
     if (   par->channels <= 0
         || bits_per_sample <= 0
         || par->channels * (int64_t)bits_per_sample < 8)
-        return AVERROR(EINVAL);
-    track->sample_count += pkt->size / (par->channels * (int64_t)bits_per_sample / 8);
+        track->sample_count = mxf_compute_sample_count(mxf, st, av_rescale_q(track->sample_count, st->time_base, av_inv_q(track->edit_rate)) + 1);
+    else
+        track->sample_count += pkt->size / (par->channels * (int64_t)bits_per_sample / 8);
+
     return 0;
 }
 
-static int mxf_set_pts(MXFContext *mxf, AVStream *st, AVPacket *pkt, int64_t next_ofs)
+static int mxf_set_pts(MXFContext *mxf, AVStream *st, AVPacket *pkt)
 {
     AVCodecParameters *par = st->codecpar;
     MXFTrack *track = st->priv_data;
 
-    if (par->codec_type == AVMEDIA_TYPE_VIDEO && next_ofs >= 0) {
-        /* mxf->current_edit_unit good - see if we have an
-         * index table to derive timestamps from */
-        MXFIndexTable *t = &mxf->index_tables[0];
+    if (par->codec_type == AVMEDIA_TYPE_VIDEO) {
+        /* see if we have an index table to derive timestamps from */
+        MXFIndexTable *t = mxf_find_index_table(mxf, track->index_sid);
 
-        if (mxf->nb_index_tables >= 1 && mxf->current_edit_unit < t->nb_ptses) {
-            pkt->dts = mxf->current_edit_unit + t->first_dts;
-            pkt->pts = t->ptses[mxf->current_edit_unit];
-        } else if (track && track->intra_only) {
+        if (t && track->sample_count < t->nb_ptses) {
+            pkt->dts = track->sample_count + t->first_dts;
+            pkt->pts = t->ptses[track->sample_count];
+        } else if (track->intra_only) {
             /* intra-only -> PTS = EditUnit.
              * let utils.c figure out DTS since it can be < PTS if low_delay = 0 (Sony IMX30) */
-            pkt->pts = mxf->current_edit_unit;
+            pkt->pts = track->sample_count;
         }
+        track->sample_count++;
     } else if (par->codec_type == AVMEDIA_TYPE_AUDIO) {
         int ret = mxf_set_audio_pts(mxf, par, pkt);
         if (ret < 0)
             return ret;
+    } else if (track) {
+        pkt->dts = pkt->pts = track->sample_count;
+        pkt->duration = 1;
+        track->sample_count++;
     }
     return 0;
 }
 
-static int mxf_read_packet_old(AVFormatContext *s, AVPacket *pkt)
+static int mxf_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     KLVPacket klv;
     MXFContext *mxf = s->priv_data;
     int ret;
 
-    while ((ret = klv_read_packet(&klv, s->pb)) == 0) {
-        PRINT_KEY(s, "read packet", klv.key);
-        av_log(s, AV_LOG_TRACE, "size %"PRIu64" offset %#"PRIx64"\n", klv.length, klv.offset);
-        if (IS_KLV_KEY(klv.key, mxf_encrypted_triplet_key)) {
-            ret = mxf_decrypt_triplet(s, pkt, &klv);
-            if (ret < 0) {
-                av_log(s, AV_LOG_ERROR, "invalid encoded triplet\n");
-                return ret;
+    while (1) {
+        int64_t max_data_size;
+        int64_t pos = avio_tell(s->pb);
+
+        if (pos < mxf->current_klv_data.next_klv - mxf->current_klv_data.length || pos >= mxf->current_klv_data.next_klv) {
+            mxf->current_klv_data = (KLVPacket){{0}};
+            ret = klv_read_packet(&klv, s->pb);
+            if (ret < 0)
+                break;
+            max_data_size = klv.length;
+            pos = klv.next_klv - klv.length;
+            PRINT_KEY(s, "read packet", klv.key);
+            av_log(s, AV_LOG_TRACE, "size %"PRIu64" offset %#"PRIx64"\n", klv.length, klv.offset);
+            if (IS_KLV_KEY(klv.key, mxf_encrypted_triplet_key)) {
+                ret = mxf_decrypt_triplet(s, pkt, &klv);
+                if (ret < 0) {
+                    av_log(s, AV_LOG_ERROR, "invalid encoded triplet\n");
+                    return ret;
+                }
+                return 0;
             }
-            return 0;
+        } else {
+            klv = mxf->current_klv_data;
+            max_data_size = klv.next_klv - pos;
         }
         if (IS_KLV_KEY(klv.key, mxf_essence_element_key) ||
             IS_KLV_KEY(klv.key, mxf_canopus_essence_element_key) ||
             IS_KLV_KEY(klv.key, mxf_avid_essence_element_key)) {
-            int index = mxf_get_stream_index(s, &klv);
-            int64_t next_ofs, next_klv;
+            int body_sid = find_body_sid_by_offset(mxf, klv.offset);
+            int index = mxf_get_stream_index(s, &klv, body_sid);
+            int64_t next_ofs;
             AVStream *st;
+            MXFTrack *track;
 
             if (index < 0) {
                 av_log(s, AV_LOG_ERROR,
@@ -3118,22 +3449,38 @@
             }
 
             st = s->streams[index];
+            track = st->priv_data;
 
             if (s->streams[index]->discard == AVDISCARD_ALL)
                 goto skip;
 
-            next_klv = avio_tell(s->pb) + klv.length;
-            next_ofs = mxf_set_current_edit_unit(mxf, klv.offset);
+            next_ofs = mxf_set_current_edit_unit(mxf, st, pos, 1);
 
-            if (next_ofs >= 0 && next_klv > next_ofs) {
-                /* if this check is hit then it's possible OPAtom was treated as OP1a
-                 * truncate the packet since it's probably very large (>2 GiB is common) */
-                avpriv_request_sample(s,
-                                      "OPAtom misinterpreted as OP1a? "
-                                      "KLV for edit unit %i extending into "
-                                      "next edit unit",
-                                      mxf->current_edit_unit);
-                klv.length = next_ofs - avio_tell(s->pb);
+            if (track->wrapping != FrameWrapped) {
+                int64_t size;
+
+                if (next_ofs <= 0) {
+                    // If we have no way to packetize the data, then return it in chunks...
+                    if (klv.next_klv - klv.length == pos && max_data_size > MXF_MAX_CHUNK_SIZE) {
+                        st->need_parsing = AVSTREAM_PARSE_FULL;
+                        avpriv_request_sample(s, "Huge KLV without proper index in non-frame wrapped essence");
+                    }
+                    size = FFMIN(max_data_size, MXF_MAX_CHUNK_SIZE);
+                } else {
+                    if ((size = next_ofs - pos) <= 0) {
+                        av_log(s, AV_LOG_ERROR, "bad size: %"PRId64"\n", size);
+                        ret = AVERROR_INVALIDDATA;
+                        goto skip;
+                    }
+                    // We must not overread, because the next edit unit might be in another KLV
+                    if (size > max_data_size)
+                        size = max_data_size;
+                }
+
+                mxf->current_klv_data = klv;
+                klv.offset = pos;
+                klv.length = size;
+                klv.next_klv = klv.offset + klv.length;
             }
 
             /* check for 8 channels AES3 element */
@@ -3142,96 +3489,52 @@
                                               pkt, klv.length);
                 if (ret < 0) {
                     av_log(s, AV_LOG_ERROR, "error reading D-10 aes3 frame\n");
+                    mxf->current_klv_data = (KLVPacket){{0}};
+                    return ret;
+                }
+            } else if (mxf->eia608_extract &&
+                       s->streams[index]->codecpar->codec_id == AV_CODEC_ID_EIA_608) {
+                ret = mxf_get_eia608_packet(s, s->streams[index], pkt, klv.length);
+                if (ret < 0) {
+                    mxf->current_klv_data = (KLVPacket){{0}};
                     return ret;
                 }
             } else {
                 ret = av_get_packet(s->pb, pkt, klv.length);
-                if (ret < 0)
+                if (ret < 0) {
+                    mxf->current_klv_data = (KLVPacket){{0}};
                     return ret;
+                }
             }
             pkt->stream_index = index;
             pkt->pos = klv.offset;
 
-            ret = mxf_set_pts(mxf, st, pkt, next_ofs);
-            if (ret < 0)
+            ret = mxf_set_pts(mxf, st, pkt);
+            if (ret < 0) {
+                mxf->current_klv_data = (KLVPacket){{0}};
                 return ret;
+            }
 
             /* seek for truncated packets */
-            avio_seek(s->pb, next_klv, SEEK_SET);
+            avio_seek(s->pb, klv.next_klv, SEEK_SET);
 
             return 0;
-        } else
+        } else {
         skip:
-            avio_skip(s->pb, klv.length);
+            avio_skip(s->pb, max_data_size);
+            mxf->current_klv_data = (KLVPacket){{0}};
+        }
     }
     return avio_feof(s->pb) ? AVERROR_EOF : ret;
 }
 
-static int mxf_read_packet(AVFormatContext *s, AVPacket *pkt)
-{
-    MXFContext *mxf = s->priv_data;
-    int ret, size;
-    int64_t ret64, pos, next_pos;
-    AVStream *st;
-    MXFIndexTable *t;
-    int edit_units;
-
-    if (mxf->op != OPAtom)
-        return mxf_read_packet_old(s, pkt);
-
-    // If we have no streams then we basically are at EOF
-    st = mxf_get_opatom_stream(mxf);
-    if (!st)
-        return AVERROR_EOF;
-
-    /* OPAtom - clip wrapped demuxing */
-    /* NOTE: mxf_read_header() makes sure nb_index_tables > 0 for OPAtom */
-    t = &mxf->index_tables[0];
-
-    if (mxf->current_edit_unit >= st->duration)
-        return AVERROR_EOF;
-
-    edit_units = FFMIN(mxf->edit_units_per_packet, st->duration - mxf->current_edit_unit);
-
-    if ((ret = mxf_edit_unit_absolute_offset(mxf, t, mxf->current_edit_unit, NULL, &pos, 1)) < 0)
-        return ret;
-
-    /* compute size by finding the next edit unit or the end of the essence container
-     * not pretty, but it works */
-    if ((ret = mxf_edit_unit_absolute_offset(mxf, t, mxf->current_edit_unit + edit_units, NULL, &next_pos, 0)) < 0 &&
-        (next_pos = mxf_essence_container_end(mxf, t->body_sid)) <= 0) {
-        av_log(s, AV_LOG_ERROR, "unable to compute the size of the last packet\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    if ((size = next_pos - pos) <= 0) {
-        av_log(s, AV_LOG_ERROR, "bad size: %i\n", size);
-        return AVERROR_INVALIDDATA;
-    }
-
-    if ((ret64 = avio_seek(s->pb, pos, SEEK_SET)) < 0)
-        return ret64;
-
-    if ((size = av_get_packet(s->pb, pkt, size)) < 0)
-        return size;
-
-    pkt->stream_index = st->index;
-
-    ret = mxf_set_pts(mxf, st, pkt, next_pos);
-    if (ret < 0)
-        return ret;
-
-    mxf->current_edit_unit += edit_units;
-
-    return 0;
-}
-
 static int mxf_read_close(AVFormatContext *s)
 {
     MXFContext *mxf = s->priv_data;
     int i;
 
     av_freep(&mxf->packages_refs);
+    av_freep(&mxf->essence_container_data_refs);
 
     for (i = 0; i < s->nb_streams; i++)
         s->streams[i]->priv_data = NULL;
@@ -3294,7 +3597,7 @@
     MXFIndexTable *t;
     MXFTrack *source_track = st->priv_data;
 
-    if(st->codecpar->codec_type == AVMEDIA_TYPE_DATA)
+    if (!source_track)
         return 0;
 
     /* if audio then truncate sample_time to EditRate */
@@ -3303,20 +3606,36 @@
                                    av_inv_q(source_track->edit_rate));
 
     if (mxf->nb_index_tables <= 0) {
-    if (!s->bit_rate)
-        return AVERROR_INVALIDDATA;
-    if (sample_time < 0)
-        sample_time = 0;
-    seconds = av_rescale(sample_time, st->time_base.num, st->time_base.den);
+        if (!s->bit_rate)
+            return AVERROR_INVALIDDATA;
+        if (sample_time < 0)
+            sample_time = 0;
+        seconds = av_rescale(sample_time, st->time_base.num, st->time_base.den);
 
-    seekpos = avio_seek(s->pb, (s->bit_rate * seconds) >> 3, SEEK_SET);
-    if (seekpos < 0)
-        return seekpos;
+        seekpos = avio_seek(s->pb, (s->bit_rate * seconds) >> 3, SEEK_SET);
+        if (seekpos < 0)
+            return seekpos;
 
-    ff_update_cur_dts(s, st, sample_time);
-    mxf->current_edit_unit = sample_time;
+        ff_update_cur_dts(s, st, sample_time);
+        mxf->current_klv_data = (KLVPacket){{0}};
     } else {
+        MXFPartition *partition;
+
         t = &mxf->index_tables[0];
+        if (t->index_sid != source_track->index_sid) {
+            /* If the first index table does not belong to the stream, then find a stream which does belong to the index table */
+            for (i = 0; i < s->nb_streams; i++) {
+                MXFTrack *new_source_track = s->streams[i]->priv_data;
+                if (new_source_track && new_source_track->index_sid == t->index_sid) {
+                    sample_time = av_rescale_q(sample_time, new_source_track->edit_rate, source_track->edit_rate);
+                    source_track = new_source_track;
+                    st = s->streams[i];
+                    break;
+                }
+            }
+            if (i == s->nb_streams)
+                return AVERROR_INVALIDDATA;
+        }
 
         /* clamp above zero, else ff_index_search_timestamp() returns negative
          * this also means we allow seeking before the start */
@@ -3344,11 +3663,23 @@
             sample_time = FFMIN(sample_time, source_track->original_duration - 1);
         }
 
-        if ((ret = mxf_edit_unit_absolute_offset(mxf, t, sample_time, &sample_time, &seekpos, 1)) < 0)
+        if (source_track->wrapping == UnknownWrapped)
+            av_log(mxf->fc, AV_LOG_WARNING, "attempted seek in an UnknownWrapped essence\n");
+
+        if ((ret = mxf_edit_unit_absolute_offset(mxf, t, sample_time, source_track->edit_rate, &sample_time, &seekpos, &partition, 1)) < 0)
             return ret;
 
         ff_update_cur_dts(s, st, sample_time);
-        mxf->current_edit_unit = sample_time;
+        if (source_track->wrapping == ClipWrapped) {
+            KLVPacket klv = partition->first_essence_klv;
+            if (seekpos < klv.next_klv - klv.length || seekpos >= klv.next_klv) {
+                av_log(mxf->fc, AV_LOG_ERROR, "attempted seek out of clip wrapped KLV\n");
+                return AVERROR_INVALIDDATA;
+            }
+            mxf->current_klv_data = klv;
+        } else {
+            mxf->current_klv_data = (KLVPacket){{0}};
+        }
         avio_seek(s->pb, seekpos, SEEK_SET);
     }
 
@@ -3356,18 +3687,31 @@
     for (i = 0; i < s->nb_streams; i++) {
         AVStream *cur_st = s->streams[i];
         MXFTrack *cur_track = cur_st->priv_data;
-        uint64_t current_sample_count = 0;
-        if (cur_st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
-            ret = mxf_compute_sample_count(mxf, i, &current_sample_count);
-            if (ret < 0)
-                return ret;
-
-            cur_track->sample_count = current_sample_count;
+        if (cur_track) {
+            int64_t track_edit_unit = sample_time;
+            if (st != cur_st)
+                mxf_get_next_track_edit_unit(mxf, cur_track, seekpos, &track_edit_unit);
+            cur_track->sample_count = mxf_compute_sample_count(mxf, cur_st, track_edit_unit);
         }
     }
     return 0;
 }
 
+static const AVOption options[] = {
+    { "eia608_extract", "extract eia 608 captions from s436m track",
+      offsetof(MXFContext, eia608_extract), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1,
+      AV_OPT_FLAG_DECODING_PARAM },
+    { NULL },
+};
+
+static const AVClass demuxer_class = {
+    .class_name = "mxf",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+    .category   = AV_CLASS_CATEGORY_DEMUXER,
+};
+
 AVInputFormat ff_mxf_demuxer = {
     .name           = "mxf",
     .long_name      = NULL_IF_CONFIG_SMALL("MXF (Material eXchange Format)"),
@@ -3378,4 +3722,5 @@
     .read_packet    = mxf_read_packet,
     .read_close     = mxf_read_close,
     .read_seek      = mxf_read_seek,
+    .priv_class     = &demuxer_class,
 };

diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
index 035e65e..f6acf80 100644
--- a/libavformat/mxfenc.c
+++ b/libavformat/mxfenc.c

@@ -88,10 +88,17 @@
     int color_siting;
     int signal_standard;
     int h_chroma_sub_sample;
+    int v_chroma_sub_sample;
     int temporal_reordering;
     AVRational aspect_ratio; ///< display aspect ratio
     int closed_gop;          ///< gop is closed, used in mpeg-2 frame parsing
     int video_bit_rate;
+    int slice_offset;
+    int frame_size;          ///< frame size in bytes
+    int seq_closed_gop;      ///< all gops in sequence are closed, used in mpeg-2 descriptor
+    int max_gop;             ///< maximum gop size, used by mpeg-2 descriptor
+    int b_picture_count;     ///< maximum number of consecutive b pictures, used in mpeg-2 descriptor
+    int low_delay;           ///< low delay, used in mpeg-2 descriptor
 } MXFStreamContext;
 
 typedef struct MXFContainerEssenceEntry {
@@ -101,6 +108,13 @@
     void (*write_desc)(AVFormatContext *, AVStream *);
 } MXFContainerEssenceEntry;
 
+typedef struct MXFPackage {
+    char *name;
+    enum MXFMetadataSetType type;
+    int instance;
+    struct MXFPackage *ref;
+} MXFPackage;
+
 enum ULIndex {
     INDEX_MPEG2 = 0,
     INDEX_AES3,
@@ -138,6 +152,11 @@
     INDEX_DNXHD_720p_8bit_HIGH,
     INDEX_DNXHD_720p_8bit_MEDIUM,
     INDEX_DNXHD_720p_8bit_LOW,
+    INDEX_DNXHR_LB,
+    INDEX_DNXHR_SQ,
+    INDEX_DNXHR_HQ,
+    INDEX_DNXHR_HQX,
+    INDEX_DNXHR_444,
     INDEX_JPEG2000,
     INDEX_H264,
 };
@@ -161,6 +180,7 @@
 static void mxf_write_mpegvideo_desc(AVFormatContext *s, AVStream *st);
 static void mxf_write_cdci_desc(AVFormatContext *s, AVStream *st);
 static void mxf_write_generic_sound_desc(AVFormatContext *s, AVStream *st);
+static void mxf_write_s436m_anc_desc(AVFormatContext *s, AVStream *st);
 
 static const MXFContainerEssenceEntry mxf_essence_container_uls[] = {
     { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x02,0x0D,0x01,0x03,0x01,0x02,0x04,0x60,0x01 },
@@ -337,6 +357,31 @@
       { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
       { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x71,0x13,0x00,0x00 },
       mxf_write_cdci_desc },
+    // DNxHR LB - CID 1274
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
+      { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
+      { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0D,0x04,0x01,0x02,0x02,0x71,0x28,0x00,0x00 },
+      mxf_write_cdci_desc },
+    // DNxHR SQ - CID 1273
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
+      { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
+      { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0D,0x04,0x01,0x02,0x02,0x71,0x27,0x00,0x00 },
+      mxf_write_cdci_desc },
+    // DNxHR HQ - CID 1272
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
+      { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
+      { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0D,0x04,0x01,0x02,0x02,0x71,0x26,0x00,0x00 },
+      mxf_write_cdci_desc },
+    // DNxHR HQX - CID 1271
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
+      { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
+      { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0D,0x04,0x01,0x02,0x02,0x71,0x25,0x00,0x00 },
+      mxf_write_cdci_desc },
+    // DNxHR 444 - CID 1270
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
+      { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
+      { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0D,0x04,0x01,0x02,0x02,0x71,0x24,0x00,0x00 },
+      mxf_write_cdci_desc },
     // JPEG2000
     { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x07,0x0d,0x01,0x03,0x01,0x02,0x0c,0x01,0x00 },
       { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x08,0x00 },
@@ -347,6 +392,11 @@
       { 0x06,0x0E,0x2B,0x34,0x01,0x02,0x01,0x01,0x0D,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
       { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x00,0x00,0x00 },
       mxf_write_mpegvideo_desc },
+    // S436M ANC
+    { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x0D,0x01,0x03,0x01,0x02,0x0e,0x00,0x00 },
+      { 0x06,0x0E,0x2B,0x34,0x01,0x02,0x01,0x01,0x0D,0x01,0x03,0x01,0x17,0x01,0x02,0x00 },
+      { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x01,0x5C,0x00 },
+      mxf_write_s436m_anc_desc },
     { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },
       { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },
       { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },
@@ -372,6 +422,7 @@
     AVStream *timecode_track;
     int timecode_base;       ///< rounded time code base (25 or 30)
     int edit_unit_byte_count; ///< fixed edit unit byte count
+    int content_package_rate; ///< content package rate in system element, see SMPTE 326M
     uint64_t body_offset;
     uint32_t instance_number;
     uint8_t umid[16];        ///< unique material identifier
@@ -380,6 +431,8 @@
     uint32_t tagged_value_count;
     AVRational audio_edit_rate;
     int store_user_comments;
+    int track_instance_count; // used to generate MXFTrack uuids
+    int cbr_index;           ///< use a constant bitrate index
 } MXFContext;
 
 static const uint8_t uuid_base[]            = { 0xAD,0xAB,0x44,0x24,0x2f,0x25,0x4d,0xc7,0x92,0xff,0x29,0xbd };
@@ -414,6 +467,7 @@
     { 0x3C0A, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x01,0x01,0x15,0x02,0x00,0x00,0x00,0x00}}, /* Instance UID */
     { 0x3B02, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x07,0x02,0x01,0x10,0x02,0x04,0x00,0x00}}, /* Last Modified Date */
     { 0x3B05, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x03,0x01,0x02,0x01,0x05,0x00,0x00,0x00}}, /* Version */
+    { 0x3B07, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x03,0x01,0x02,0x01,0x04,0x00,0x00,0x00}}, /* Object Model Version */
     { 0x3B06, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x06,0x01,0x01,0x04,0x06,0x04,0x00,0x00}}, /* Identifications reference */
     { 0x3B03, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x06,0x01,0x01,0x04,0x02,0x01,0x00,0x00}}, /* Content Storage reference */
     { 0x3B09, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x01,0x02,0x02,0x03,0x00,0x00,0x00,0x00}}, /* Operational Pattern UL */
@@ -423,9 +477,12 @@
     { 0x3C09, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x05,0x20,0x07,0x01,0x01,0x00,0x00,0x00}}, /* This Generation UID */
     { 0x3C01, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x05,0x20,0x07,0x01,0x02,0x01,0x00,0x00}}, /* Company Name */
     { 0x3C02, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x05,0x20,0x07,0x01,0x03,0x01,0x00,0x00}}, /* Product Name */
+    { 0x3C03, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x05,0x20,0x07,0x01,0x04,0x00,0x00,0x00}}, /* Product Version */
     { 0x3C04, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x05,0x20,0x07,0x01,0x05,0x01,0x00,0x00}}, /* Version String */
     { 0x3C05, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x05,0x20,0x07,0x01,0x07,0x00,0x00,0x00}}, /* Product ID */
     { 0x3C06, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x07,0x02,0x01,0x10,0x02,0x03,0x00,0x00}}, /* Modification Date */
+    { 0x3C07, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x05,0x20,0x07,0x01,0x0A,0x00,0x00,0x00}}, /* Toolkit Version */
+    { 0x3C08, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x05,0x20,0x07,0x01,0x06,0x01,0x00,0x00}}, /* Platform */
     // Content Storage
     { 0x1901, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x06,0x01,0x01,0x04,0x05,0x01,0x00,0x00}}, /* Package strong reference batch */
     { 0x1902, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x06,0x01,0x01,0x04,0x05,0x02,0x00,0x00}}, /* Package strong reference batch */
@@ -468,20 +525,36 @@
     { 0x320D, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x04,0x01,0x03,0x02,0x05,0x00,0x00,0x00}}, /* Video Line Map */
     { 0x3203, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x01,0x05,0x02,0x02,0x00,0x00,0x00}}, /* Stored Width */
     { 0x3202, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x01,0x05,0x02,0x01,0x00,0x00,0x00}}, /* Stored Height */
+    { 0x3216, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x04,0x01,0x03,0x02,0x08,0x00,0x00,0x00}}, /* Stored F2 Offset */
+    { 0x3205, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x01,0x05,0x01,0x08,0x00,0x00,0x00}}, /* Sampled Width */
+    { 0x3204, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x01,0x05,0x01,0x07,0x00,0x00,0x00}}, /* Sampled Height */
+    { 0x3206, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x01,0x05,0x01,0x09,0x00,0x00,0x00}}, /* Sampled X Offset */
+    { 0x3207, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x01,0x05,0x01,0x0A,0x00,0x00,0x00}}, /* Sampled Y Offset */
     { 0x3209, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x01,0x05,0x01,0x0C,0x00,0x00,0x00}}, /* Display Width */
     { 0x3208, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x01,0x05,0x01,0x0B,0x00,0x00,0x00}}, /* Display Height */
+    { 0x320A, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x01,0x05,0x01,0x0D,0x00,0x00,0x00}}, /* Display X offset */
     { 0x320B, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x01,0x05,0x01,0x0E,0x00,0x00,0x00}}, /* Presentation Y offset */
+    { 0x3217, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x04,0x01,0x03,0x02,0x07,0x00,0x00,0x00}}, /* Display F2 offset */
     { 0x320E, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x01,0x01,0x01,0x01,0x00,0x00,0x00}}, /* Aspect Ratio */
+    { 0x3210, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x04,0x01,0x02,0x01,0x01,0x01,0x02,0x00}}, /* Transfer characteristic */
+    { 0x3213, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x04,0x18,0x01,0x02,0x00,0x00,0x00,0x00}}, /* Image Start Offset */
+    { 0x3214, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x04,0x18,0x01,0x03,0x00,0x00,0x00,0x00}}, /* Image End Offset */
     { 0x3201, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x04,0x01,0x06,0x01,0x00,0x00,0x00,0x00}}, /* Picture Essence Coding */
     { 0x3212, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x04,0x01,0x03,0x01,0x06,0x00,0x00,0x00}}, /* Field Dominance (Opt) */
     { 0x3215, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x04,0x05,0x01,0x13,0x00,0x00,0x00,0x00}}, /* Signal Standard */
     // CDCI Picture Essence Descriptor
     { 0x3301, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x04,0x01,0x05,0x03,0x0A,0x00,0x00,0x00}}, /* Component Depth */
     { 0x3302, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x01,0x05,0x01,0x05,0x00,0x00,0x00}}, /* Horizontal Subsampling */
+    { 0x3308, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x04,0x01,0x05,0x01,0x10,0x00,0x00,0x00}}, /* Vertical Subsampling */
     { 0x3303, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x01,0x05,0x01,0x06,0x00,0x00,0x00}}, /* Color Siting */
+    { 0x3307, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x04,0x18,0x01,0x04,0x00,0x00,0x00,0x00}}, /* Padding Bits */
+    { 0x3304, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x01,0x05,0x03,0x03,0x00,0x00,0x00}}, /* Black Ref level */
+    { 0x3305, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x01,0x05,0x03,0x04,0x00,0x00,0x00}}, /* White Ref level */
+    { 0x3306, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x04,0x01,0x05,0x03,0x05,0x00,0x00,0x00}}, /* Color Range */
     // Generic Sound Essence Descriptor
     { 0x3D02, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x04,0x04,0x02,0x03,0x01,0x04,0x00,0x00,0x00}}, /* Locked/Unlocked */
     { 0x3D03, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x04,0x02,0x03,0x01,0x01,0x01,0x00,0x00}}, /* Audio sampling rate */
+    { 0x3D04, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x02,0x01,0x01,0x03,0x00,0x00,0x00}}, /* Audio Ref Level */
     { 0x3D07, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x04,0x02,0x01,0x01,0x04,0x00,0x00,0x00}}, /* ChannelCount */
     { 0x3D01, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x04,0x04,0x02,0x03,0x03,0x04,0x00,0x00,0x00}}, /* Quantization bits */
     { 0x3D06, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x04,0x02,0x04,0x02,0x00,0x00,0x00,0x00}}, /* Sound Essence Compression */
@@ -496,7 +569,11 @@
     { 0x3F0A, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x04,0x04,0x04,0x02,0x05,0x00,0x00,0x00}}, /* Index Entry Array */
     // MPEG video Descriptor
     { 0x8000, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x04,0x01,0x06,0x02,0x01,0x0B,0x00,0x00}}, /* BitRate */
+    { 0x8003, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x04,0x01,0x06,0x02,0x01,0x05,0x00,0x00}}, /* LowDelay */
+    { 0x8004, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x04,0x01,0x06,0x02,0x01,0x06,0x00,0x00}}, /* ClosedGOP */
+    { 0x8006, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x04,0x01,0x06,0x02,0x01,0x08,0x00,0x00}}, /* MaxGOP */
     { 0x8007, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x04,0x01,0x06,0x02,0x01,0x0A,0x00,0x00}}, /* ProfileAndLevel */
+    { 0x8008, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x04,0x01,0x06,0x02,0x01,0x09,0x00,0x00}}, /* BPictureCount */
     // Wave Audio Essence Descriptor
     { 0x3D09, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x04,0x02,0x03,0x03,0x05,0x00,0x00,0x00}}, /* Average Bytes Per Second */
     { 0x3D0A, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x04,0x02,0x03,0x02,0x01,0x00,0x00,0x00}}, /* Block Align */
@@ -654,9 +731,14 @@
 
     mxf_write_refs_count(pb, DESCRIPTOR_COUNT(c->essence_container_count));
     av_log(s,AV_LOG_DEBUG, "essence container count:%d\n", c->essence_container_count);
-    for (i = 0; i < c->essence_container_count; i++) {
+    for (i = 0; i < s->nb_streams; i++) {
         MXFStreamContext *sc = s->streams[i]->priv_data;
+        // check first track of essence container type and only write it once
+        if (sc->track_essence_element_key[15] != 0)
+            continue;
         avio_write(pb, mxf_essence_container_uls[sc->index].container_ul, 16);
+        if (c->essence_container_count == 1)
+            break;
     }
 
     if (c->essence_container_count > 1)
@@ -670,7 +752,7 @@
 
     mxf_write_metadata_key(pb, 0x012f00);
     PRINT_KEY(s, "preface key", pb->buf_ptr - 16);
-    klv_encode_ber_length(pb, 130 + 16LL * DESCRIPTOR_COUNT(mxf->essence_container_count));
+    klv_encode_ber_length(pb, 138 + 16LL * DESCRIPTOR_COUNT(mxf->essence_container_count));
 
     // write preface set uid
     mxf_write_local_tag(pb, 16, 0x3C0A);
@@ -683,7 +765,11 @@
 
     // write version
     mxf_write_local_tag(pb, 2, 0x3B05);
-    avio_wb16(pb, 258); // v1.2
+    avio_wb16(pb, 259); // v1.3
+
+    // Object Model Version
+    mxf_write_local_tag(pb, 4, 0x3B07);
+    avio_wb32(pb, 1);
 
     // write identification_refs
     mxf_write_local_tag(pb, 16 + 8, 0x3B06);
@@ -768,6 +854,22 @@
     avio_put_str16be(pb, value);
 }
 
+static void store_version(AVFormatContext *s){
+    AVIOContext *pb = s->pb;
+
+    if (s->flags & AVFMT_FLAG_BITEXACT) {
+        avio_wb16(pb, 0); // major
+        avio_wb16(pb, 0); // minor
+        avio_wb16(pb, 0); // tertiary
+    } else {
+        avio_wb16(pb, LIBAVFORMAT_VERSION_MAJOR); // major
+        avio_wb16(pb, LIBAVFORMAT_VERSION_MINOR); // minor
+        avio_wb16(pb, LIBAVFORMAT_VERSION_MICRO); // tertiary
+    }
+    avio_wb16(pb, 0); // patch
+    avio_wb16(pb, 0); // release
+}
+
 static void mxf_write_identification(AVFormatContext *s)
 {
     MXFContext *mxf = s->priv_data;
@@ -782,7 +884,7 @@
 
     version = s->flags & AVFMT_FLAG_BITEXACT ?
         "0.0.0" : AV_STRINGIFY(LIBAVFORMAT_VERSION);
-    length = 72 + mxf_utf16_local_tag_length(company) +
+    length = 100 +mxf_utf16_local_tag_length(company) +
                   mxf_utf16_local_tag_length(product) +
                   mxf_utf16_local_tag_length(version);
     klv_encode_ber_length(pb, length);
@@ -797,6 +899,10 @@
     mxf_write_uuid(pb, Identification, 1);
     mxf_write_local_tag_utf16(pb, 0x3C01, company); // Company Name
     mxf_write_local_tag_utf16(pb, 0x3C02, product); // Product Name
+
+    mxf_write_local_tag(pb, 10, 0x3C03); // Product Version
+    store_version(s);
+
     mxf_write_local_tag_utf16(pb, 0x3C04, version); // Version String
 
     // write product uid
@@ -806,15 +912,19 @@
     // modification date
     mxf_write_local_tag(pb, 8, 0x3C06);
     avio_wb64(pb, mxf->timestamp);
+
+    mxf_write_local_tag(pb, 10, 0x3C07); // Toolkit Version
+    store_version(s);
 }
 
-static void mxf_write_content_storage(AVFormatContext *s)
+static void mxf_write_content_storage(AVFormatContext *s, MXFPackage *packages, int package_count)
 {
     AVIOContext *pb = s->pb;
+    int i;
 
     mxf_write_metadata_key(pb, 0x011800);
     PRINT_KEY(s, "content storage key", pb->buf_ptr - 16);
-    klv_encode_ber_length(pb, 92);
+    klv_encode_ber_length(pb, 60 + (16 * package_count));
 
     // write uid
     mxf_write_local_tag(pb, 16, 0x3C0A);
@@ -822,10 +932,11 @@
     PRINT_KEY(s, "content storage uid", pb->buf_ptr - 16);
 
     // write package reference
-    mxf_write_local_tag(pb, 16 * 2 + 8, 0x1901);
-    mxf_write_refs_count(pb, 2);
-    mxf_write_uuid(pb, MaterialPackage, 0);
-    mxf_write_uuid(pb, SourcePackage, 0);
+    mxf_write_local_tag(pb, 16 * package_count + 8, 0x1901);
+    mxf_write_refs_count(pb, package_count);
+    for (i = 0; i < package_count; i++) {
+        mxf_write_uuid(pb, packages[i].type, packages[i].instance);
+    }
 
     // write essence container data
     mxf_write_local_tag(pb, 8 + 16, 0x1902);
@@ -833,7 +944,7 @@
     mxf_write_uuid(pb, EssenceContainerData, 0);
 }
 
-static void mxf_write_track(AVFormatContext *s, AVStream *st, enum MXFMetadataSetType type)
+static void mxf_write_track(AVFormatContext *s, AVStream *st, MXFPackage *package)
 {
     MXFContext *mxf = s->priv_data;
     AVIOContext *pb = s->pb;
@@ -845,7 +956,7 @@
 
     // write track uid
     mxf_write_local_tag(pb, 16, 0x3C0A);
-    mxf_write_uuid(pb, type == MaterialPackage ? Track : Track + TypeBottom, st->index);
+    mxf_write_uuid(pb, Track, mxf->track_instance_count);
     PRINT_KEY(s, "track uid", pb->buf_ptr - 16);
 
     // write track id
@@ -854,7 +965,7 @@
 
     // write track number
     mxf_write_local_tag(pb, 4, 0x4804);
-    if (type == MaterialPackage)
+    if (package->type == MaterialPackage)
         avio_wb32(pb, 0); // track number of material package is 0
     else
         avio_write(pb, sc->track_essence_element_key + 12, 4);
@@ -876,7 +987,7 @@
 
     // write sequence refs
     mxf_write_local_tag(pb, 16, 0x4803);
-    mxf_write_uuid(pb, type == MaterialPackage ? Sequence: Sequence + TypeBottom, st->index);
+    mxf_write_uuid(pb, Sequence, mxf->track_instance_count);
 }
 
 static const uint8_t smpte_12m_timecode_track_data_ul[] = { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x01,0x03,0x02,0x01,0x01,0x00,0x00,0x00 };
@@ -905,7 +1016,7 @@
     }
 }
 
-static void mxf_write_sequence(AVFormatContext *s, AVStream *st, enum MXFMetadataSetType type)
+static void mxf_write_sequence(AVFormatContext *s, AVStream *st, MXFPackage *package)
 {
     MXFContext *mxf = s->priv_data;
     AVIOContext *pb = s->pb;
@@ -916,7 +1027,7 @@
     klv_encode_ber_length(pb, 80);
 
     mxf_write_local_tag(pb, 16, 0x3C0A);
-    mxf_write_uuid(pb, type == MaterialPackage ? Sequence: Sequence + TypeBottom, st->index);
+    mxf_write_uuid(pb, Sequence, mxf->track_instance_count);
 
     PRINT_KEY(s, "sequence uid", pb->buf_ptr - 16);
     mxf_write_common_fields(s, st);
@@ -928,12 +1039,11 @@
         component = TimecodeComponent;
     else
         component = SourceClip;
-    if (type == SourcePackage)
-        component += TypeBottom;
-    mxf_write_uuid(pb, component, st->index);
+
+    mxf_write_uuid(pb, component, mxf->track_instance_count);
 }
 
-static void mxf_write_timecode_component(AVFormatContext *s, AVStream *st, enum MXFMetadataSetType type)
+static void mxf_write_timecode_component(AVFormatContext *s, AVStream *st, MXFPackage *package)
 {
     MXFContext *mxf = s->priv_data;
     AVIOContext *pb = s->pb;
@@ -943,8 +1053,7 @@
 
     // UID
     mxf_write_local_tag(pb, 16, 0x3C0A);
-    mxf_write_uuid(pb, type == MaterialPackage ? TimecodeComponent :
-                   TimecodeComponent + TypeBottom, st->index);
+    mxf_write_uuid(pb, TimecodeComponent, mxf->track_instance_count);
 
     mxf_write_common_fields(s, st);
 
@@ -961,8 +1070,9 @@
     avio_w8(pb, !!(mxf->tc.flags & AV_TIMECODE_FLAG_DROPFRAME));
 }
 
-static void mxf_write_structural_component(AVFormatContext *s, AVStream *st, enum MXFMetadataSetType type)
+static void mxf_write_structural_component(AVFormatContext *s, AVStream *st, MXFPackage *package)
 {
+    MXFContext *mxf = s->priv_data;
     AVIOContext *pb = s->pb;
     int i;
 
@@ -972,7 +1082,7 @@
 
     // write uid
     mxf_write_local_tag(pb, 16, 0x3C0A);
-    mxf_write_uuid(pb, type == MaterialPackage ? SourceClip: SourceClip + TypeBottom, st->index);
+    mxf_write_uuid(pb, SourceClip, mxf->track_instance_count);
 
     PRINT_KEY(s, "structural component uid", pb->buf_ptr - 16);
     mxf_write_common_fields(s, st);
@@ -983,20 +1093,33 @@
 
     // write source package uid, end of the reference
     mxf_write_local_tag(pb, 32, 0x1101);
-    if (type == SourcePackage) {
+    if (!package->ref) {
         for (i = 0; i < 4; i++)
             avio_wb64(pb, 0);
     } else
-        mxf_write_umid(s, 1);
+        mxf_write_umid(s, package->ref->instance);
 
     // write source track id
     mxf_write_local_tag(pb, 4, 0x1102);
-    if (type == SourcePackage)
+    if (package->type == SourcePackage && !package->ref)
         avio_wb32(pb, 0);
     else
         avio_wb32(pb, st->index+2);
 }
 
+static void mxf_write_tape_descriptor(AVFormatContext *s)
+{
+    AVIOContext *pb = s->pb;
+
+    mxf_write_metadata_key(pb, 0x012e00);
+    PRINT_KEY(s, "tape descriptor key", pb->buf_ptr - 16);
+    klv_encode_ber_length(pb, 20);
+    mxf_write_local_tag(pb, 16, 0x3C0A);
+    mxf_write_uuid(pb, TapeDescriptor, 0);
+    PRINT_KEY(s, "tape_desc uid", pb->buf_ptr - 16);
+}
+
+
 static void mxf_write_multi_descriptor(AVFormatContext *s)
 {
     MXFContext *mxf = s->priv_data;
@@ -1034,14 +1157,16 @@
         mxf_write_uuid(pb, SubDescriptor, i);
 }
 
-static void mxf_write_generic_desc(AVFormatContext *s, AVStream *st, const UID key, unsigned size)
+static int64_t mxf_write_generic_desc(AVFormatContext *s, AVStream *st, const UID key)
 {
     MXFContext *mxf = s->priv_data;
     MXFStreamContext *sc = st->priv_data;
     AVIOContext *pb = s->pb;
+    int64_t pos;
 
     avio_write(pb, key, 16);
-    klv_encode_ber4_length(pb, size+20+8+12+20);
+    klv_encode_ber4_length(pb, 0);
+    pos = avio_tell(pb);
 
     mxf_write_local_tag(pb, 16, 0x3C0A);
     mxf_write_uuid(pb, SubDescriptor, st->index);
@@ -1066,35 +1191,94 @@
 
     mxf_write_local_tag(pb, 16, 0x3004);
     avio_write(pb, mxf_essence_container_uls[sc->index].container_ul, 16);
+
+    return pos;
 }
 
+static const UID mxf_s436m_anc_descriptor_key = { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x5c,0x00 };
 static const UID mxf_mpegvideo_descriptor_key = { 0x06,0x0E,0x2B,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x51,0x00 };
 static const UID mxf_wav_descriptor_key       = { 0x06,0x0E,0x2B,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x48,0x00 };
 static const UID mxf_aes3_descriptor_key      = { 0x06,0x0E,0x2B,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x47,0x00 };
 static const UID mxf_cdci_descriptor_key      = { 0x06,0x0E,0x2B,0x34,0x02,0x53,0x01,0x01,0x0D,0x01,0x01,0x01,0x01,0x01,0x28,0x00 };
 static const UID mxf_generic_sound_descriptor_key = { 0x06,0x0E,0x2B,0x34,0x02,0x53,0x01,0x01,0x0D,0x01,0x01,0x01,0x01,0x01,0x42,0x00 };
 
-static void mxf_write_cdci_common(AVFormatContext *s, AVStream *st, const UID key, unsigned size)
+static int get_trc(UID ul, enum AVColorTransferCharacteristic trc)
+{
+    switch (trc){
+    case AVCOL_TRC_GAMMA28   :
+    case AVCOL_TRC_GAMMA22   :
+        memcpy(ul, ((UID){0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x04,0x01,0x01,0x01,0x01,0x01,0x00,0x00}), 16);
+        return 0;
+    case AVCOL_TRC_BT709     :
+    case AVCOL_TRC_SMPTE170M :
+        memcpy(ul, ((UID){0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x04,0x01,0x01,0x01,0x01,0x02,0x00,0x00}), 16);
+        return 0;
+    case AVCOL_TRC_SMPTE240M :
+        memcpy(ul, ((UID){0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x04,0x01,0x01,0x01,0x01,0x03,0x00,0x00}), 16);
+        return 0;
+    case AVCOL_TRC_BT1361_ECG:
+        memcpy(ul, ((UID){0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x06,0x04,0x01,0x01,0x01,0x01,0x05,0x00,0x00}), 16);
+        return 0;
+    case AVCOL_TRC_LINEAR    :
+        memcpy(ul, ((UID){0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x06,0x04,0x01,0x01,0x01,0x01,0x06,0x00,0x00}), 16);
+        return 0;
+    case AVCOL_TRC_SMPTE428  :
+        memcpy(ul, ((UID){0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x08,0x04,0x01,0x01,0x01,0x01,0x07,0x00,0x00}), 16);
+        return 0;
+    default:
+        return -1;
+    }
+}
+
+static int64_t mxf_write_cdci_common(AVFormatContext *s, AVStream *st, const UID key)
 {
     MXFStreamContext *sc = st->priv_data;
     AVIOContext *pb = s->pb;
+    int stored_width  = (st->codecpar->width +15)/16*16;
     int stored_height = (st->codecpar->height+15)/16*16;
     int display_height;
     int f1, f2;
-    unsigned desc_size = size+8+8+8+8+8+8+8+5+16+4+12+20+5;
-    if (sc->interlaced && sc->field_dominance)
-        desc_size += 5;
-    if (sc->signal_standard)
-        desc_size += 5;
+    UID transfer_ul = {0};
+    int64_t pos = mxf_write_generic_desc(s, st, key);
 
-    mxf_write_generic_desc(s, st, key, desc_size);
+    get_trc(transfer_ul, st->codecpar->color_trc);
 
     mxf_write_local_tag(pb, 4, 0x3203);
-    avio_wb32(pb, st->codecpar->width);
+    avio_wb32(pb, stored_width);
 
     mxf_write_local_tag(pb, 4, 0x3202);
     avio_wb32(pb, stored_height>>sc->interlaced);
 
+    if (s->oformat == &ff_mxf_d10_muxer) {
+        //Stored F2 Offset
+        mxf_write_local_tag(pb, 4, 0x3216);
+        avio_wb32(pb, 0);
+
+        //Image Start Offset
+        mxf_write_local_tag(pb, 4, 0x3213);
+        avio_wb32(pb, 0);
+
+        //Image End Offset
+        mxf_write_local_tag(pb, 4, 0x3214);
+        avio_wb32(pb, 0);
+    }
+
+    //Sampled width
+    mxf_write_local_tag(pb, 4, 0x3205);
+    avio_wb32(pb, st->codecpar->width);
+
+    //Samples height
+    mxf_write_local_tag(pb, 4, 0x3204);
+    avio_wb32(pb, st->codecpar->height>>sc->interlaced);
+
+    //Sampled X Offset
+    mxf_write_local_tag(pb, 4, 0x3206);
+    avio_wb32(pb, 0);
+
+    //Sampled Y Offset
+    mxf_write_local_tag(pb, 4, 0x3207);
+    avio_wb32(pb, 0);
+
     mxf_write_local_tag(pb, 4, 0x3209);
     avio_wb32(pb, st->codecpar->width);
 
@@ -1108,10 +1292,20 @@
     mxf_write_local_tag(pb, 4, 0x3208);
     avio_wb32(pb, display_height>>sc->interlaced);
 
-    // presentation Y offset
+    // display X offset
+    mxf_write_local_tag(pb, 4, 0x320A);
+    avio_wb32(pb, 0);
+
+    // display Y offset
     mxf_write_local_tag(pb, 4, 0x320B);
     avio_wb32(pb, (st->codecpar->height - display_height)>>sc->interlaced);
 
+    if (sc->interlaced) {
+        //Display F2 Offset
+        mxf_write_local_tag(pb, 4, 0x3217);
+        avio_wb32(pb, -((st->codecpar->height - display_height)&1));
+    }
+
     // component depth
     mxf_write_local_tag(pb, 4, 0x3301);
     avio_wb32(pb, sc->component_depth);
@@ -1120,10 +1314,35 @@
     mxf_write_local_tag(pb, 4, 0x3302);
     avio_wb32(pb, sc->h_chroma_sub_sample);
 
+    // vertical subsampling
+    mxf_write_local_tag(pb, 4, 0x3308);
+    avio_wb32(pb, sc->v_chroma_sub_sample);
+
     // color siting
     mxf_write_local_tag(pb, 1, 0x3303);
     avio_w8(pb, sc->color_siting);
 
+    // Padding Bits
+    mxf_write_local_tag(pb, 2, 0x3307);
+    avio_wb16(pb, 0);
+
+    if (st->codecpar->color_range != AVCOL_RANGE_UNSPECIFIED) {
+        int black = 0,
+            white = (1<<sc->component_depth) - 1,
+            color = (1<<sc->component_depth) - 1;
+        if (st->codecpar->color_range == AVCOL_RANGE_MPEG) {
+            black = 1   << (sc->component_depth - 4);
+            white = 235 << (sc->component_depth - 8);
+            color = (14 << (sc->component_depth - 4)) + 1;
+        }
+        mxf_write_local_tag(pb, 4, 0x3304);
+        avio_wb32(pb, black);
+        mxf_write_local_tag(pb, 4, 0x3305);
+        avio_wb32(pb, white);
+        mxf_write_local_tag(pb, 4, 0x3306);
+        avio_wb32(pb, color);
+    }
+
     if (sc->signal_standard) {
         mxf_write_local_tag(pb, 1, 0x3215);
         avio_w8(pb, sc->signal_standard);
@@ -1160,6 +1379,12 @@
     avio_wb32(pb, sc->aspect_ratio.num);
     avio_wb32(pb, sc->aspect_ratio.den);
 
+    //Transfer characteristic
+    if (transfer_ul[0]) {
+        mxf_write_local_tag(pb, 16, 0x3210);
+        avio_write(pb, transfer_ul, 16);
+    };
+
     mxf_write_local_tag(pb, 16, 0x3201);
     avio_write(pb, *sc->codec_ul, 16);
 
@@ -1168,11 +1393,28 @@
         avio_w8(pb, sc->field_dominance);
     }
 
+    return pos;
+}
+
+static void mxf_update_klv_size(AVIOContext *pb, int64_t pos)
+{
+    int64_t cur_pos = avio_tell(pb);
+    int size = cur_pos - pos;
+    avio_seek(pb, pos - 4, SEEK_SET);
+    klv_encode_ber4_length(pb, size);
+    avio_seek(pb, cur_pos, SEEK_SET);
 }
 
 static void mxf_write_cdci_desc(AVFormatContext *s, AVStream *st)
 {
-    mxf_write_cdci_common(s, st, mxf_cdci_descriptor_key, 0);
+    int64_t pos = mxf_write_cdci_common(s, st, mxf_cdci_descriptor_key);
+    mxf_update_klv_size(s->pb, pos);
+}
+
+static void mxf_write_s436m_anc_desc(AVFormatContext *s, AVStream *st)
+{
+    int64_t pos = mxf_write_generic_desc(s, st, mxf_s436m_anc_descriptor_key);
+    mxf_update_klv_size(s->pb, pos);
 }
 
 static void mxf_write_mpegvideo_desc(AVFormatContext *s, AVStream *st)
@@ -1180,10 +1422,9 @@
     AVIOContext *pb = s->pb;
     MXFStreamContext *sc = st->priv_data;
     int profile_and_level = (st->codecpar->profile<<4) | st->codecpar->level;
+    int64_t pos = mxf_write_cdci_common(s, st, mxf_mpegvideo_descriptor_key);
 
     if (st->codecpar->codec_id != AV_CODEC_ID_H264) {
-        mxf_write_cdci_common(s, st, mxf_mpegvideo_descriptor_key, 8+5);
-
         // bit rate
         mxf_write_local_tag(pb, 4, 0x8000);
         avio_wb32(pb, sc->video_bit_rate);
@@ -1193,24 +1434,35 @@
         if (!st->codecpar->profile)
             profile_and_level |= 0x80; // escape bit
         avio_w8(pb, profile_and_level);
-    } else {
-        mxf_write_cdci_common(s, st, mxf_mpegvideo_descriptor_key, 0);
+
+        // low delay
+        mxf_write_local_tag(pb, 1, 0x8003);
+        avio_w8(pb, sc->low_delay);
+
+        // closed gop
+        mxf_write_local_tag(pb, 1, 0x8004);
+        avio_w8(pb, sc->seq_closed_gop);
+
+        // max gop
+        mxf_write_local_tag(pb, 2, 0x8006);
+        avio_wb16(pb, sc->max_gop);
+
+        // b picture count
+        mxf_write_local_tag(pb, 2, 0x8008);
+        avio_wb16(pb, sc->b_picture_count);
     }
+
+    mxf_update_klv_size(pb, pos);
 }
 
-static void mxf_write_generic_sound_common(AVFormatContext *s, AVStream *st, const UID key, unsigned size)
+static int64_t mxf_write_generic_sound_common(AVFormatContext *s, AVStream *st, const UID key)
 {
     AVIOContext *pb = s->pb;
     MXFContext *mxf = s->priv_data;
     int show_warnings = !mxf->footer_partition_offset;
-    int duration_size = 0;
+    int64_t pos = mxf_write_generic_desc(s, st, key);
 
-    if (s->oformat == &ff_mxf_opatom_muxer)
-        duration_size = 12;
-
-    mxf_write_generic_desc(s, st, key, size+duration_size+5+12+8+8);
-
-    if (duration_size > 0) {
+    if (s->oformat == &ff_mxf_opatom_muxer) {
         mxf_write_local_tag(pb, 8, 0x3002);
         avio_wb64(pb, mxf->body_offset / mxf->edit_unit_byte_count);
     }
@@ -1224,6 +1476,11 @@
     avio_wb32(pb, st->codecpar->sample_rate);
     avio_wb32(pb, 1);
 
+    if (s->oformat == &ff_mxf_d10_muxer) {
+        mxf_write_local_tag(pb, 1, 0x3D04);
+        avio_w8(pb, 0);
+    }
+
     mxf_write_local_tag(pb, 4, 0x3D07);
     if (mxf->channel_count == -1) {
         if (show_warnings && (s->oformat == &ff_mxf_d10_muxer) && (st->codecpar->channels != 4) && (st->codecpar->channels != 8))
@@ -1241,13 +1498,14 @@
 
     mxf_write_local_tag(pb, 4, 0x3D01);
     avio_wb32(pb, av_get_bits_per_sample(st->codecpar->codec_id));
+
+    return pos;
 }
 
-static void mxf_write_wav_common(AVFormatContext *s, AVStream *st, const UID key, unsigned size)
+static int64_t mxf_write_wav_common(AVFormatContext *s, AVStream *st, const UID key)
 {
     AVIOContext *pb = s->pb;
-
-    mxf_write_generic_sound_common(s, st, key, size+6+8);
+    int64_t pos = mxf_write_generic_sound_common(s, st, key);
 
     mxf_write_local_tag(pb, 2, 0x3D0A);
     avio_wb16(pb, st->codecpar->block_align);
@@ -1255,21 +1513,26 @@
     // avg bytes per sec
     mxf_write_local_tag(pb, 4, 0x3D09);
     avio_wb32(pb, st->codecpar->block_align*st->codecpar->sample_rate);
+
+    return pos;
 }
 
 static void mxf_write_wav_desc(AVFormatContext *s, AVStream *st)
 {
-    mxf_write_wav_common(s, st, mxf_wav_descriptor_key, 0);
+    int64_t pos = mxf_write_wav_common(s, st, mxf_wav_descriptor_key);
+    mxf_update_klv_size(s->pb, pos);
 }
 
 static void mxf_write_aes3_desc(AVFormatContext *s, AVStream *st)
 {
-    mxf_write_wav_common(s, st, mxf_aes3_descriptor_key, 0);
+    int64_t pos = mxf_write_wav_common(s, st, mxf_aes3_descriptor_key);
+    mxf_update_klv_size(s->pb, pos);
 }
 
 static void mxf_write_generic_sound_desc(AVFormatContext *s, AVStream *st)
 {
-    mxf_write_generic_sound_common(s, st, mxf_generic_sound_descriptor_key, 0);
+    int64_t pos = mxf_write_generic_sound_common(s, st, mxf_generic_sound_descriptor_key);
+    mxf_update_klv_size(s->pb, pos);
 }
 
 static const uint8_t mxf_indirect_value_utf16le[] = { 0x4c,0x00,0x02,0x10,0x01,0x00,0x00,0x00,0x00,0x06,0x0e,0x2b,0x34,0x01,0x04,0x01,0x01 };
@@ -1321,15 +1584,15 @@
     return count;
 }
 
-static void mxf_write_package(AVFormatContext *s, enum MXFMetadataSetType type, const char *package_name)
+static void mxf_write_package(AVFormatContext *s, MXFPackage *package)
 {
     MXFContext *mxf = s->priv_data;
     AVIOContext *pb = s->pb;
     int i, track_count = s->nb_streams+1;
-    int name_size = mxf_utf16_local_tag_length(package_name);
+    int name_size = mxf_utf16_local_tag_length(package->name);
     int user_comment_count = 0;
 
-    if (type == MaterialPackage) {
+    if (package->type == MaterialPackage) {
         if (mxf->store_user_comments)
             user_comment_count = mxf_write_user_comments(s, s->metadata);
         mxf_write_metadata_key(pb, 0x013600);
@@ -1343,18 +1606,18 @@
 
     // write uid
     mxf_write_local_tag(pb, 16, 0x3C0A);
-    mxf_write_uuid(pb, type, 0);
-    av_log(s, AV_LOG_DEBUG, "package type:%d\n", type);
+    mxf_write_uuid(pb, package->type, package->instance);
+    av_log(s, AV_LOG_DEBUG, "package type:%d\n", package->type);
     PRINT_KEY(s, "package uid", pb->buf_ptr - 16);
 
     // write package umid
     mxf_write_local_tag(pb, 32, 0x4401);
-    mxf_write_umid(s, type == SourcePackage);
+    mxf_write_umid(s, package->instance);
     PRINT_KEY(s, "package umid second part", pb->buf_ptr - 16);
 
     // package name
     if (name_size)
-        mxf_write_local_tag_utf16(pb, 0x4402, package_name);
+        mxf_write_local_tag_utf16(pb, 0x4402, package->name);
 
     // package creation date
     mxf_write_local_tag(pb, 8, 0x4405);
@@ -1367,10 +1630,9 @@
     // write track refs
     mxf_write_local_tag(pb, track_count*16 + 8, 0x4403);
     mxf_write_refs_count(pb, track_count);
-    mxf_write_uuid(pb, type == MaterialPackage ? Track :
-                   Track + TypeBottom, -1); // timecode track
-    for (i = 0; i < s->nb_streams; i++)
-        mxf_write_uuid(pb, type == MaterialPackage ? Track : Track + TypeBottom, i);
+    // these are the uuids of the tracks the will be written in mxf_write_track
+    for (i = 0; i < track_count; i++)
+        mxf_write_uuid(pb, Track,  mxf->track_instance_count + i);
 
     // write user comment refs
     if (mxf->store_user_comments) {
@@ -1381,27 +1643,41 @@
     }
 
     // write multiple descriptor reference
-    if (type == SourcePackage) {
+    if (package->type == SourcePackage && package->instance == 1) {
         mxf_write_local_tag(pb, 16, 0x4701);
         if (s->nb_streams > 1) {
             mxf_write_uuid(pb, MultipleDescriptor, 0);
             mxf_write_multi_descriptor(s);
         } else
             mxf_write_uuid(pb, SubDescriptor, 0);
+    } else if (package->type == SourcePackage && package->instance == 2) {
+        mxf_write_local_tag(pb, 16, 0x4701);
+        mxf_write_uuid(pb, TapeDescriptor, 0);
+        mxf_write_tape_descriptor(s);
     }
 
+    /*
+     * for every 1 track in a package there is 1 sequence and 1 component.
+     * all 3 of these elements share the same instance number for generating
+     * there instance uuids. mxf->track_instance_count stores this value.
+     * mxf->track_instance_count is incremented after a group of all 3 of
+     * these elements are written.
+     */
+
     // write timecode track
-    mxf_write_track(s, mxf->timecode_track, type);
-    mxf_write_sequence(s, mxf->timecode_track, type);
-    mxf_write_timecode_component(s, mxf->timecode_track, type);
+    mxf_write_track(s, mxf->timecode_track, package);
+    mxf_write_sequence(s, mxf->timecode_track, package);
+    mxf_write_timecode_component(s, mxf->timecode_track, package);
+    mxf->track_instance_count++;
 
     for (i = 0; i < s->nb_streams; i++) {
         AVStream *st = s->streams[i];
-        mxf_write_track(s, st, type);
-        mxf_write_sequence(s, st, type);
-        mxf_write_structural_component(s, st, type);
+        mxf_write_track(s, st, package);
+        mxf_write_sequence(s, st, package);
+        mxf_write_structural_component(s, st, package);
+        mxf->track_instance_count++;
 
-        if (type == SourcePackage) {
+        if (package->type == SourcePackage && package->instance == 1) {
             MXFStreamContext *sc = st->priv_data;
             mxf_essence_container_uls[sc->index].write_desc(s, st);
         }
@@ -1432,33 +1708,49 @@
 
 static int mxf_write_header_metadata_sets(AVFormatContext *s)
 {
-    const char *material_package_name = NULL;
-    const char *file_package_name = NULL;
+    MXFContext *mxf = s->priv_data;
     AVDictionaryEntry *entry = NULL;
     AVStream *st = NULL;
     int i;
+    MXFPackage packages[3] = {{0}};
+    int package_count = 2;
+    packages[0].type = MaterialPackage;
+    packages[1].type = SourcePackage;
+    packages[1].instance = 1;
+    packages[0].ref = &packages[1];
+
 
     if (entry = av_dict_get(s->metadata, "material_package_name", NULL, 0))
-       material_package_name = entry->value;
+       packages[0].name = entry->value;
 
     if (entry = av_dict_get(s->metadata, "file_package_name", NULL, 0)) {
-        file_package_name = entry->value;
+        packages[1].name = entry->value;
     } else {
         /* check if any of the streams contain a file_package_name */
         for (i = 0; i < s->nb_streams; i++) {
             st = s->streams[i];
             if (entry = av_dict_get(st->metadata, "file_package_name", NULL, 0)) {
-                file_package_name = entry->value;
+                packages[1].name = entry->value;
                 break;
             }
         }
     }
 
+    entry = av_dict_get(s->metadata, "reel_name", NULL, 0);
+    if (entry) {
+        packages[2].name = entry->value;
+        packages[2].type = SourcePackage;
+        packages[2].instance = 2;
+        packages[1].ref = &packages[2];
+        package_count = 3;
+    }
+
     mxf_write_preface(s);
     mxf_write_identification(s);
-    mxf_write_content_storage(s);
-    mxf_write_package(s, MaterialPackage, material_package_name);
-    mxf_write_package(s, SourcePackage, file_package_name);
+    mxf_write_content_storage(s, packages, package_count);
+    mxf->track_instance_count = 0;
+    for (i = 0; i < package_count; i++)
+        mxf_write_package(s, &packages[i]);
     mxf_write_essence_container_data(s);
     return 0;
 }
@@ -1478,6 +1770,9 @@
     AVIOContext *pb = s->pb;
     int i, j, temporal_reordering = 0;
     int key_index = mxf->last_key_index;
+    int prev_non_b_picture = 0;
+    int audio_frame_size = 0;
+    int64_t pos;
 
     av_log(s, AV_LOG_DEBUG, "edit units count %d\n", mxf->edit_units_count);
 
@@ -1486,12 +1781,8 @@
 
     avio_write(pb, index_table_segment_key, 16);
 
-    if (mxf->edit_unit_byte_count) {
-        klv_encode_ber_length(pb, 80);
-    } else {
-        klv_encode_ber_length(pb, 85 + 12+(s->nb_streams+1LL)*6 +
-                              12+mxf->edit_units_count*(11+mxf->slice_count*4LL));
-    }
+    klv_encode_ber4_length(pb, 0);
+    pos = avio_tell(pb);
 
     // instance id
     mxf_write_local_tag(pb, 16, 0x3C0A);
@@ -1525,44 +1816,52 @@
     mxf_write_local_tag(pb, 4, 0x3F07);
     avio_wb32(pb, 1);
 
-    if (!mxf->edit_unit_byte_count) {
-        // real slice count - 1
-        mxf_write_local_tag(pb, 1, 0x3F08);
-        avio_w8(pb, mxf->slice_count);
+    // real slice count - 1
+    mxf_write_local_tag(pb, 1, 0x3F08);
+    avio_w8(pb, !mxf->edit_unit_byte_count); // only one slice for CBR
 
-        // delta entry array
-        mxf_write_local_tag(pb, 8 + (s->nb_streams+1)*6, 0x3F09);
-        avio_wb32(pb, s->nb_streams+1); // num of entries
-        avio_wb32(pb, 6);               // size of one entry
-        // write system item delta entry
-        avio_w8(pb, 0);
-        avio_w8(pb, 0); // slice entry
-        avio_wb32(pb, 0); // element delta
-        for (i = 0; i < s->nb_streams; i++) {
-            AVStream *st = s->streams[i];
-            MXFStreamContext *sc = st->priv_data;
-            avio_w8(pb, sc->temporal_reordering);
-            if (sc->temporal_reordering)
-                temporal_reordering = 1;
-            if (i == 0) { // video track
-                avio_w8(pb, 0); // slice number
-                avio_wb32(pb, KAG_SIZE); // system item size including klv fill
-            } else { // audio track
-                unsigned audio_frame_size = sc->aic.samples[0]*sc->aic.sample_size;
+    // delta entry array
+    mxf_write_local_tag(pb, 8 + (s->nb_streams+1)*6, 0x3F09);
+    avio_wb32(pb, s->nb_streams+1); // num of entries
+    avio_wb32(pb, 6);               // size of one entry
+    // write system item delta entry
+    avio_w8(pb, 0);
+    avio_w8(pb, 0); // slice entry
+    avio_wb32(pb, 0); // element delta
+    // write each stream delta entry
+    for (i = 0; i < s->nb_streams; i++) {
+        AVStream *st = s->streams[i];
+        MXFStreamContext *sc = st->priv_data;
+        avio_w8(pb, sc->temporal_reordering);
+        if (sc->temporal_reordering)
+            temporal_reordering = 1;
+        if (mxf->edit_unit_byte_count) {
+            avio_w8(pb, 0); // slice number
+            avio_wb32(pb, sc->slice_offset);
+        } else if (i == 0) { // video track
+            avio_w8(pb, 0); // slice number
+            avio_wb32(pb, KAG_SIZE); // system item size including klv fill
+        } else { // audio or data track
+            if (!audio_frame_size) {
+                audio_frame_size = sc->aic.samples[0]*sc->aic.sample_size;
                 audio_frame_size += klv_fill_size(audio_frame_size);
-                avio_w8(pb, 1);
-                avio_wb32(pb, (i-1)*audio_frame_size); // element delta
             }
+            avio_w8(pb, 1);
+            avio_wb32(pb, (i-1)*audio_frame_size); // element delta
         }
+    }
 
-        mxf_write_local_tag(pb, 8 + mxf->edit_units_count*(11+mxf->slice_count*4), 0x3F0A);
+    if (!mxf->edit_unit_byte_count) {
+        MXFStreamContext *sc = s->streams[0]->priv_data;
+        mxf_write_local_tag(pb, 8 + mxf->edit_units_count*15, 0x3F0A);
         avio_wb32(pb, mxf->edit_units_count);  // num of entries
-        avio_wb32(pb, 11+mxf->slice_count*4);  // size of one entry
+        avio_wb32(pb, 15);  // size of one entry
 
         for (i = 0; i < mxf->edit_units_count; i++) {
             int temporal_offset = 0;
 
             if (!(mxf->index_entries[i].flags & 0x33)) { // I-frame
+                sc->max_gop = FFMAX(sc->max_gop, i - mxf->last_key_index);
                 mxf->last_key_index = key_index;
                 key_index = i;
             }
@@ -1582,11 +1881,13 @@
             avio_w8(pb, temporal_offset);
 
             if ((mxf->index_entries[i].flags & 0x30) == 0x30) { // back and forward prediction
+                sc->b_picture_count = FFMAX(sc->b_picture_count, i - prev_non_b_picture);
                 avio_w8(pb, mxf->last_key_index - i);
             } else {
                 avio_w8(pb, key_index - i); // key frame offset
                 if ((mxf->index_entries[i].flags & 0x20) == 0x20) // only forward
                     mxf->last_key_index = key_index;
+                prev_non_b_picture = i;
             }
 
             if (!(mxf->index_entries[i].flags & 0x33) && // I-frame
@@ -1597,12 +1898,16 @@
             avio_wb64(pb, mxf->index_entries[i].offset);
             if (s->nb_streams > 1)
                 avio_wb32(pb, mxf->index_entries[i].slice_offset);
+            else
+                avio_wb32(pb, 0);
         }
 
         mxf->last_key_index = key_index - mxf->edit_units_count;
         mxf->last_indexed_edit_unit += mxf->edit_units_count;
         mxf->edit_units_count = 0;
     }
+
+    mxf_update_klv_size(pb, pos);
 }
 
 static void mxf_write_klv_fill(AVFormatContext *s)
@@ -1630,7 +1935,7 @@
 
     if (!mxf->edit_unit_byte_count && mxf->edit_units_count)
         index_byte_count = 85 + 12+(s->nb_streams+1)*6 +
-            12+mxf->edit_units_count*(11+mxf->slice_count*4);
+            12+mxf->edit_units_count*15;
     else if (mxf->edit_unit_byte_count && indexsid)
         index_byte_count = 80;
 
@@ -1655,11 +1960,11 @@
     else
         avio_write(pb, body_partition_key, 16);
 
-    klv_encode_ber_length(pb, 88 + 16LL * DESCRIPTOR_COUNT(mxf->essence_container_count));
+    klv_encode_ber4_length(pb, 88 + 16LL * DESCRIPTOR_COUNT(mxf->essence_container_count));
 
     // write partition value
     avio_wb16(pb, 1); // majorVersion
-    avio_wb16(pb, 2); // minorVersion
+    avio_wb16(pb, 3); // minorVersion
     avio_wb32(pb, KAG_SIZE); // KAGSize
 
     avio_wb64(pb, partition_offset); // ThisPartition
@@ -1706,6 +2011,7 @@
         mxf_write_klv_fill(s);
         start = avio_tell(s->pb);
         mxf_write_primer_pack(s);
+        mxf_write_klv_fill(s);
         mxf_write_header_metadata_sets(s);
         pos = avio_tell(s->pb);
         header_byte_count = pos - start + klv_fill_size(pos);
@@ -1727,7 +2033,7 @@
 {
     MXFContext *mxf = s->priv_data;
     MXFStreamContext *sc = st->priv_data;
-    int i, cid;
+    int cid;
     uint8_t* header_cid;
     int frame_size = 0;
 
@@ -1740,7 +2046,11 @@
     header_cid = pkt->data + 0x28;
     cid = header_cid[0] << 24 | header_cid[1] << 16 | header_cid[2] << 8 | header_cid[3];
 
-    if ((frame_size = avpriv_dnxhd_get_frame_size(cid)) < 0)
+    if ((frame_size = avpriv_dnxhd_get_frame_size(cid)) == DNXHD_VARIABLE) {
+        frame_size = avpriv_dnxhd_get_hr_frame_size(cid, st->codecpar->width, st->codecpar->height);
+    }
+
+    if (frame_size < 0)
         return -1;
     if ((sc->interlaced = avpriv_dnxhd_get_interlaced(cid)) < 0)
         return AVERROR_INVALIDDATA;
@@ -1779,30 +2089,30 @@
     case 1253:
         sc->index = INDEX_DNXHD_720p_8bit_LOW;
         break;
+    case 1274:
+        sc->index = INDEX_DNXHR_LB;
+        break;
+    case 1273:
+        sc->index = INDEX_DNXHR_SQ;
+        break;
+    case 1272:
+        sc->index = INDEX_DNXHR_HQ;
+        break;
+    case 1271:
+        sc->index = INDEX_DNXHR_HQX;
+        sc->component_depth = st->codecpar->bits_per_raw_sample;
+        break;
+    case 1270:
+        sc->index = INDEX_DNXHR_444;
+        sc->component_depth = st->codecpar->bits_per_raw_sample;
+        break;
     default:
         return -1;
     }
 
     sc->codec_ul = &mxf_essence_container_uls[sc->index].codec_ul;
     sc->aspect_ratio = (AVRational){ 16, 9 };
-
-    if (s->oformat == &ff_mxf_opatom_muxer) {
-        mxf->edit_unit_byte_count = frame_size;
-        return 1;
-    }
-
-    mxf->edit_unit_byte_count = KAG_SIZE;
-    for (i = 0; i < s->nb_streams; i++) {
-        AVStream *st = s->streams[i];
-        MXFStreamContext *sc = st->priv_data;
-        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
-            mxf->edit_unit_byte_count += 16 + 4 + sc->aic.samples[0]*sc->aic.sample_size;
-            mxf->edit_unit_byte_count += klv_fill_size(mxf->edit_unit_byte_count);
-        } else if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
-            mxf->edit_unit_byte_count += 16 + 4 + frame_size;
-            mxf->edit_unit_byte_count += klv_fill_size(mxf->edit_unit_byte_count);
-        }
-    }
+    sc->frame_size = pkt->size;
 
     return 1;
 }
@@ -1812,7 +2122,7 @@
     MXFContext *mxf = s->priv_data;
     MXFStreamContext *sc = st->priv_data;
     uint8_t *vs_pack, *vsc_pack;
-    int i, ul_index, frame_size, stype, pal;
+    int ul_index, frame_size, stype, pal;
     const AVDVProfile *profile;
 
     if (mxf->header_written)
@@ -1869,24 +2179,7 @@
 
     sc->index = ul_index;
     sc->codec_ul =  &mxf_essence_container_uls[sc->index].codec_ul;
-
-    if(s->oformat == &ff_mxf_opatom_muxer) {
-        mxf->edit_unit_byte_count = frame_size;
-        return 1;
-    }
-
-    mxf->edit_unit_byte_count = KAG_SIZE;
-    for (i = 0; i < s->nb_streams; i++) {
-        AVStream *st = s->streams[i];
-        MXFStreamContext *sc = st->priv_data;
-        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
-            mxf->edit_unit_byte_count += 16 + 4 + sc->aic.samples[0]*sc->aic.sample_size;
-            mxf->edit_unit_byte_count += klv_fill_size(mxf->edit_unit_byte_count);
-        } else if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
-            mxf->edit_unit_byte_count += 16 + 4 + frame_size;
-            mxf->edit_unit_byte_count += klv_fill_size(mxf->edit_unit_byte_count);
-        }
-    }
+    sc->frame_size = pkt->size;
 
     return 1;
 }
@@ -1896,22 +2189,31 @@
     int frame_size;
     int profile;
     uint8_t interlaced;
+    int long_gop; // 1 or 0 when there are separate UIDs for Long GOP and Intra, -1 when Intra/LGOP detection can be ignored
 } mxf_h264_codec_uls[] = {
-    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x31,0x11,0x01 },      0,  66, 0 }, // AVC Baseline, Unconstrained Coding
-    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x20,0x01 },      0, 110, 0 }, // AVC High 10 Intra
-    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x01 }, 232960,   0, 1 }, // AVC Intra 50 1080i60
-    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x02 }, 281088,   0, 1 }, // AVC Intra 50 1080i50
-    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x03 }, 232960,   0, 0 }, // AVC Intra 50 1080p30
-    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x04 }, 281088,   0, 0 }, // AVC Intra 50 1080p25
-    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x08 }, 116736,   0, 0 }, // AVC Intra 50 720p60
-    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x09 }, 140800,   0, 0 }, // AVC Intra 50 720p50
-    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x30,0x01 },      0, 122, 0 }, // AVC High 422 Intra
-    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x31,0x01 }, 472576,   0, 1 }, // AVC Intra 100 1080i60
-    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x31,0x02 }, 568832,   0, 1 }, // AVC Intra 100 1080i50
-    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x31,0x03 }, 472576,   0, 0 }, // AVC Intra 100 1080p30
-    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x31,0x04 }, 568832,   0, 0 }, // AVC Intra 100 1080p25
-    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x31,0x08 }, 236544,   0, 0 }, // AVC Intra 100 720p60
-    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x31,0x09 }, 284672,   0, 0 }, // AVC Intra 100 720p50
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x31,0x11,0x01 },      0,  66, 0, -1 }, // AVC Baseline, Unconstrained Coding
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x31,0x20,0x01 },      0,  77, 0, -1 }, // AVC Main
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x31,0x30,0x01 },      0,  88, 0, -1 }, // AVC Extended
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x31,0x40,0x01 },      0, 100, 0, -1 }, // AVC High
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x31,0x50,0x01 },      0, 110, 0,  1 }, // AVC High 10
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x31,0x60,0x01 },      0, 122, 0,  1 }, // AVC High 422
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x31,0x70,0x01 },      0, 244, 0,  1 }, // AVC High 444
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x20,0x01 },      0, 110, 0,  0 }, // AVC High 10 Intra
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x01 }, 232960,   0, 1,  0 }, // AVC Intra 50 1080i60
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x02 }, 281088,   0, 1,  0 }, // AVC Intra 50 1080i50
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x03 }, 232960,   0, 0,  0 }, // AVC Intra 50 1080p30
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x04 }, 281088,   0, 0,  0 }, // AVC Intra 50 1080p25
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x08 }, 116736,   0, 0,  0 }, // AVC Intra 50 720p60
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x09 }, 140800,   0, 0,  0 }, // AVC Intra 50 720p50
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x30,0x01 },      0, 122, 0,  0 }, // AVC High 422 Intra
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x31,0x01 }, 472576,   0, 1,  0 }, // AVC Intra 100 1080i60
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x31,0x02 }, 568832,   0, 1,  0 }, // AVC Intra 100 1080i50
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x31,0x03 }, 472576,   0, 0,  0 }, // AVC Intra 100 1080p30
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x31,0x04 }, 568832,   0, 0,  0 }, // AVC Intra 100 1080p25
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x31,0x08 }, 236544,   0, 0,  0 }, // AVC Intra 100 720p60
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x32,0x31,0x09 }, 284672,   0, 0,  0 }, // AVC Intra 100 720p50
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0d,0x04,0x01,0x02,0x02,0x01,0x32,0x40,0x01 },      0, 244, 0,  0 }, // AVC High 444 Intra
+    {{ 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0d,0x04,0x01,0x02,0x02,0x01,0x32,0x50,0x01 },      0,  44, 0, -1 }, // AVC CAVLC 444
 };
 
 static int mxf_parse_h264_frame(AVFormatContext *s, AVStream *st,
@@ -1919,10 +2221,12 @@
 {
     MXFContext *mxf = s->priv_data;
     MXFStreamContext *sc = st->priv_data;
+    AVCodecParameters *par = st->codecpar;
     static const int mxf_h264_num_codec_uls = sizeof(mxf_h264_codec_uls) / sizeof(mxf_h264_codec_uls[0]);
     const uint8_t *buf = pkt->data;
     const uint8_t *buf_end = pkt->data + pkt->size;
     uint32_t state = -1;
+    int long_gop = 0; // assume intra when there is no SPS header
     int extra_size = 512; // support AVC Intra files without SPS/PPS header
     int i, frame_size;
     uint8_t uid_found;
@@ -1937,7 +2241,8 @@
         --buf;
         switch (state & 0x1f) {
         case H264_NAL_SPS:
-            st->codecpar->profile = buf[1];
+            par->profile = buf[1];
+            long_gop = buf[2] & 0x10 ? 0 : 1; // constraint_set3_flag signals intra
             e->flags |= 0x40;
             break;
         case H264_NAL_PPS:
@@ -1956,25 +2261,27 @@
         return 1;
 
     sc->aspect_ratio = (AVRational){ 16, 9 }; // 16:9 is mandatory for broadcast HD
-    sc->component_depth = 10; // AVC Intra is always 10 Bit
-    sc->interlaced = st->codecpar->field_order != AV_FIELD_PROGRESSIVE ? 1 : 0;
-    if (sc->interlaced)
-        sc->field_dominance = 1; // top field first is mandatory for AVC Intra
+    sc->interlaced = par->field_order != AV_FIELD_PROGRESSIVE ? 1 : 0;
 
     uid_found = 0;
     frame_size = pkt->size + extra_size;
     for (i = 0; i < mxf_h264_num_codec_uls; i++) {
         if (frame_size == mxf_h264_codec_uls[i].frame_size && sc->interlaced == mxf_h264_codec_uls[i].interlaced) {
             sc->codec_ul = &mxf_h264_codec_uls[i].uid;
+            sc->component_depth = 10; // AVC Intra is always 10 Bit
+            if (sc->interlaced)
+                sc->field_dominance = 1; // top field first is mandatory for AVC Intra
             return 1;
-        } else if (st->codecpar->profile == mxf_h264_codec_uls[i].profile) {
+        } else if ((mxf_h264_codec_uls[i].profile == par->profile) &&
+                   ((mxf_h264_codec_uls[i].long_gop < 0) ||
+                   (mxf_h264_codec_uls[i].long_gop == long_gop))) {
             sc->codec_ul = &mxf_h264_codec_uls[i].uid;
             uid_found = 1;
         }
     }
 
     if (!uid_found) {
-        av_log(s, AV_LOG_ERROR, "AVC Intra 50/100 supported only\n");
+        av_log(s, AV_LOG_ERROR, "h264 profile not supported\n");
         return 0;
     }
 
@@ -2027,6 +2334,7 @@
             if ((pkt->data[i+1] & 0xf0) == 0x10) { // seq ext
                 st->codecpar->profile = pkt->data[i+1] & 0x07;
                 st->codecpar->level   = pkt->data[i+2] >> 4;
+                sc->low_delay = pkt->data[i+6] >> 7;
             } else if (i + 5 < pkt->size && (pkt->data[i+1] & 0xf0) == 0x80) { // pict coding ext
                 sc->interlaced = !(pkt->data[i+5] & 0x80); // progressive frame
                 if (sc->interlaced)
@@ -2035,9 +2343,14 @@
             }
         } else if (c == 0x1b8) { // gop
             if (pkt->data[i+4]>>6 & 0x01) { // closed
+                if (sc->seq_closed_gop == -1)
+                    sc->seq_closed_gop = 1;
                 sc->closed_gop = 1;
                 if (e->flags & 0x40) // sequence header present
                     e->flags |= 0x80; // random access
+            } else {
+                sc->seq_closed_gop = 0;
+                sc->closed_gop = 0;
             }
         } else if (c == 0x1b3) { // seq
             e->flags |= 0x40;
@@ -2149,11 +2462,13 @@
             // Default component depth to 8
             sc->component_depth = 8;
             sc->h_chroma_sub_sample = 2;
+            sc->v_chroma_sub_sample = 2;
             sc->color_siting = 0xFF;
 
             if (pix_desc) {
                 sc->component_depth     = pix_desc->comp[0].depth;
                 sc->h_chroma_sub_sample = 1 << pix_desc->log2_chroma_w;
+                sc->v_chroma_sub_sample = 1 << pix_desc->log2_chroma_h;
             }
             switch (ff_choose_chroma_location(s, st)) {
             case AVCHROMA_LOC_TOPLEFT: sc->color_siting = 0; break;
@@ -2169,13 +2484,24 @@
                        tbc.den, tbc.num);
                 return AVERROR(EINVAL);
             }
+            mxf->content_package_rate = ff_mxf_get_content_package_rate(tbc);
             mxf->time_base = spf->time_base;
             rate = av_inv_q(mxf->time_base);
             avpriv_set_pts_info(st, 64, mxf->time_base.num, mxf->time_base.den);
             if((ret = mxf_init_timecode(s, st, rate)) < 0)
                 return ret;
 
+            if (st->codecpar->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
+                sc->seq_closed_gop = -1; // unknown yet
+            }
+
             sc->video_bit_rate = st->codecpar->bit_rate;
+
+            if (s->oformat == &ff_mxf_d10_muxer ||
+                st->codecpar->codec_id == AV_CODEC_ID_DNXHD ||
+                st->codecpar->codec_id == AV_CODEC_ID_DVVIDEO)
+                mxf->cbr_index = 1;
+
             if (s->oformat == &ff_mxf_d10_muxer) {
                 if (st->codecpar->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
                     av_log(s, AV_LOG_ERROR, "error MXF D-10 only support MPEG-2 Video\n");
@@ -2196,14 +2522,10 @@
                     return -1;
                 }
 
-                mxf->edit_unit_byte_count = KAG_SIZE; // system element
-                mxf->edit_unit_byte_count += 16 + 4 + (uint64_t)sc->video_bit_rate *
-                    mxf->time_base.num / (8*mxf->time_base.den);
-                mxf->edit_unit_byte_count += klv_fill_size(mxf->edit_unit_byte_count);
-                mxf->edit_unit_byte_count += 16 + 4 + 4 + spf->samples_per_frame[0]*8*4;
-                mxf->edit_unit_byte_count += klv_fill_size(mxf->edit_unit_byte_count);
-
                 sc->signal_standard = 1;
+                sc->color_siting = 0;
+                sc->frame_size = (int64_t)sc->video_bit_rate *
+                    mxf->time_base.num / (8*mxf->time_base.den);
             }
             if (mxf->signal_standard >= 0)
                 sc->signal_standard = mxf->signal_standard;
@@ -2223,6 +2545,7 @@
                     av_log(s, AV_LOG_ERROR, "MXF D-10 only support 16 or 24 bits le audio\n");
                 }
                 sc->index = ((MXFStreamContext*)s->streams[0]->priv_data)->index + 1;
+                sc->frame_size = 4 + 8 * spf[0].samples_per_frame[0] * 4;
             } else if (s->oformat == &ff_mxf_opatom_muxer) {
                 AVRational tbc = av_inv_q(mxf->audio_edit_rate);
 
@@ -2251,6 +2574,20 @@
                 sc->index = INDEX_WAV;
             } else {
                 mxf->slice_count = 1;
+                sc->frame_size = (st->codecpar->channels * spf[0].samples_per_frame[0] *
+                                  av_get_bits_per_sample(st->codecpar->codec_id)) / 8;
+            }
+        } else if (st->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
+            AVDictionaryEntry *e = av_dict_get(st->metadata, "data_type", NULL, 0);
+            if (e && !strcmp(e->value, "vbi_vanc_smpte_436M")) {
+                sc->index = 38;
+            } else {
+                av_log(s, AV_LOG_ERROR, "track %d: unsupported data type\n", i);
+                return -1;
+            }
+            if (st->index != s->nb_streams - 1) {
+                av_log(s, AV_LOG_ERROR, "data track must be placed last\n");
+                return -1;
             }
         }
 
@@ -2321,14 +2658,22 @@
     AVIOContext *pb = s->pb;
     unsigned frame;
     uint32_t time_code;
+    int i, system_item_bitmap = 0x58; // UL, user date/time stamp, picture present
 
     frame = mxf->last_indexed_edit_unit + mxf->edit_units_count;
 
     // write system metadata pack
     avio_write(pb, system_metadata_pack_key, 16);
     klv_encode_ber4_length(pb, 57);
-    avio_w8(pb, 0x5c); // UL, user date/time stamp, picture and sound item present
-    avio_w8(pb, 0x04); // content package rate
+
+    for (i = 0; i < s->nb_streams; i++) {
+        if (s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
+            system_item_bitmap |= 0x4;
+        else if (s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_DATA)
+            system_item_bitmap |= 0x2;
+    }
+    avio_w8(pb, system_item_bitmap);
+    avio_w8(pb, mxf->content_package_rate); // content package rate
     avio_w8(pb, 0x00); // content package type
     avio_wb16(pb, 0x00); // channel handle
     avio_wb16(pb, (mxf->tc.start + frame) & 0xFFFF); // continuity count, supposed to overflow
@@ -2356,35 +2701,6 @@
     mxf_write_umid(s, 1);
 }
 
-static void mxf_write_d10_video_packet(AVFormatContext *s, AVStream *st, AVPacket *pkt)
-{
-    MXFContext *mxf = s->priv_data;
-    AVIOContext *pb = s->pb;
-    MXFStreamContext *sc = st->priv_data;
-    int packet_size = (uint64_t)sc->video_bit_rate*mxf->time_base.num /
-        (8*mxf->time_base.den); // frame size
-    int pad;
-
-    packet_size += 16 + 4;
-    packet_size += klv_fill_size(packet_size);
-
-    klv_encode_ber4_length(pb, pkt->size);
-    avio_write(pb, pkt->data, pkt->size);
-
-    // ensure CBR muxing by padding to correct video frame size
-    pad = packet_size - pkt->size - 16 - 4;
-    if (pad > 20) {
-        avio_write(s->pb, klv_fill_key, 16);
-        pad -= 16 + 4;
-        klv_encode_ber4_length(s->pb, pad);
-        ffio_fill(s->pb, 0, pad);
-        av_assert1(!(avio_tell(s->pb) & (KAG_SIZE-1)));
-    } else {
-        av_log(s, AV_LOG_WARNING, "cannot fill d-10 video packet\n");
-        ffio_fill(s->pb, 0, pad);
-    }
-}
-
 static void mxf_write_d10_audio_packet(AVFormatContext *s, AVStream *st, AVPacket *pkt)
 {
     MXFContext *mxf = s->priv_data;
@@ -2468,6 +2784,27 @@
     return 0;
 }
 
+static void mxf_compute_edit_unit_byte_count(AVFormatContext *s)
+{
+    MXFContext *mxf = s->priv_data;
+    int i;
+
+    if (s->oformat == &ff_mxf_opatom_muxer) {
+        MXFStreamContext *sc = s->streams[0]->priv_data;
+        mxf->edit_unit_byte_count = sc->frame_size;
+        return;
+    }
+
+    mxf->edit_unit_byte_count = KAG_SIZE; // system element
+    for (i = 0; i < s->nb_streams; i++) {
+        AVStream *st = s->streams[i];
+        MXFStreamContext *sc = st->priv_data;
+        sc->slice_offset = mxf->edit_unit_byte_count;
+        mxf->edit_unit_byte_count += 16 + 4 + sc->frame_size;
+        mxf->edit_unit_byte_count += klv_fill_size(mxf->edit_unit_byte_count);
+    }
+}
+
 static int mxf_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
     MXFContext *mxf = s->priv_data;
@@ -2477,7 +2814,7 @@
     MXFIndexEntry ie = {0};
     int err;
 
-    if (!mxf->edit_unit_byte_count && !(mxf->edit_units_count % EDIT_UNITS_PER_BODY)) {
+    if (!mxf->cbr_index && !mxf->edit_unit_byte_count && !(mxf->edit_units_count % EDIT_UNITS_PER_BODY)) {
         if ((err = av_reallocp_array(&mxf->index_entries, mxf->edit_units_count
                                      + EDIT_UNITS_PER_BODY, sizeof(*mxf->index_entries))) < 0) {
             mxf->edit_units_count = 0;
@@ -2508,6 +2845,16 @@
         }
     }
 
+    if (mxf->cbr_index) {
+        if (pkt->size != sc->frame_size && st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
+            av_log(s, AV_LOG_ERROR, "track %d: frame size does not match index unit size, %d != %d\n",
+                   st->index, pkt->size, sc->frame_size);
+            return -1;
+        }
+        if (!mxf->header_written)
+            mxf_compute_edit_unit_byte_count(s);
+    }
+
     if (s->oformat == &ff_mxf_opatom_muxer)
         return mxf_write_opatom_packet(s, pkt, &ie);
 
@@ -2556,11 +2903,9 @@
 
     mxf_write_klv_fill(s);
     avio_write(pb, sc->track_essence_element_key, 16); // write key
-    if (s->oformat == &ff_mxf_d10_muxer) {
-        if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
-            mxf_write_d10_video_packet(s, st, pkt);
-        else
-            mxf_write_d10_audio_packet(s, st, pkt);
+    if (s->oformat == &ff_mxf_d10_muxer &&
+        st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+        mxf_write_d10_audio_packet(s, st, pkt);
     } else {
         klv_encode_ber4_length(pb, pkt->size); // write length
         avio_write(pb, pkt->data, pkt->size);
@@ -2603,7 +2948,7 @@
 {
     MXFContext *mxf = s->priv_data;
     AVIOContext *pb = s->pb;
-    int err = 0;
+    int i, err = 0;
 
     if (!mxf->header_written ||
         (s->oformat == &ff_mxf_opatom_muxer && !mxf->body_partition_offset)) {
@@ -2647,6 +2992,11 @@
             if ((err = mxf_write_partition(s, 0, 0, header_closed_partition_key, 1)) < 0)
                 goto end;
         }
+        // update footer partition offset
+        for (i = 0; i < mxf->body_partitions_count; i++) {
+            avio_seek(pb, mxf->body_partition_offset[i]+44, SEEK_SET);
+            avio_wb64(pb, mxf->footer_partition_offset);
+        }
     }
 
 end:
@@ -2677,6 +3027,9 @@
             while (pktl) {
                 if (!stream_count || pktl->pkt.stream_index == 0)
                     break;
+                // update last packet in packet buffer
+                if (s->streams[pktl->pkt.stream_index]->last_in_packet_buffer != pktl)
+                    s->streams[pktl->pkt.stream_index]->last_in_packet_buffer = pktl;
                 last = pktl;
                 pktl = pktl->next;
                 stream_count--;
@@ -2684,9 +3037,6 @@
             // purge packet queue
             while (pktl) {
                 AVPacketList *next = pktl->next;
-
-                if(s->streams[pktl->pkt.stream_index]->last_in_packet_buffer == pktl)
-                    s->streams[pktl->pkt.stream_index]->last_in_packet_buffer= NULL;
                 av_packet_unref(&pktl->pkt);
                 av_freep(&pktl);
                 pktl = next;

diff --git a/libavformat/mxg.c b/libavformat/mxg.c
index 6fbf99c..fe5879e 100644
--- a/libavformat/mxg.c
+++ b/libavformat/mxg.c

@@ -169,11 +169,14 @@
                     continue;
                 }
 
+                size = mxg->buffer_ptr - mxg->soi_ptr;
+                ret = av_new_packet(pkt, size);
+                if (ret < 0)
+                    return ret;
+                memcpy(pkt->data, mxg->soi_ptr, size);
+
                 pkt->pts = pkt->dts = mxg->dts;
                 pkt->stream_index = 0;
-                pkt->buf  = NULL;
-                pkt->size = mxg->buffer_ptr - mxg->soi_ptr;
-                pkt->data = mxg->soi_ptr;
 
                 if (mxg->soi_ptr - mxg->buffer > mxg->cache_size) {
                     if (mxg->cache_size > 0) {
@@ -206,12 +209,14 @@
                 mxg->buffer_ptr += size;
 
                 if (marker == APP13 && size >= 16) { /* audio data */
+                    ret = av_new_packet(pkt, size - 14);
+                    if (ret < 0)
+                        return ret;
+                    memcpy(pkt->data, startmarker_ptr + 16, size - 14);
+
                     /* time (GMT) of first sample in usec since 1970, little-endian */
                     pkt->pts = pkt->dts = AV_RL64(startmarker_ptr + 8);
                     pkt->stream_index = 1;
-                    pkt->buf  = NULL;
-                    pkt->size = size - 14;
-                    pkt->data = startmarker_ptr + 16;
 
                     if (startmarker_ptr - mxg->buffer > mxg->cache_size) {
                         if (mxg->cache_size > 0) {

diff --git a/libavformat/network.c b/libavformat/network.c
index b3987a4..5664455 100644
--- a/libavformat/network.c
+++ b/libavformat/network.c

@@ -24,46 +24,42 @@
 #include "url.h"
 #include "libavcodec/internal.h"
 #include "libavutil/avutil.h"
+#include "libavutil/avassert.h"
 #include "libavutil/mem.h"
 #include "libavutil/time.h"
 
 int ff_tls_init(void)
 {
-#if CONFIG_TLS_OPENSSL_PROTOCOL
+#if CONFIG_TLS_PROTOCOL
+#if CONFIG_OPENSSL
     int ret;
     if ((ret = ff_openssl_init()) < 0)
         return ret;
 #endif
-#if CONFIG_TLS_GNUTLS_PROTOCOL
+#if CONFIG_GNUTLS
     ff_gnutls_init();
 #endif
+#endif
     return 0;
 }
 
 void ff_tls_deinit(void)
 {
-#if CONFIG_TLS_OPENSSL_PROTOCOL
+#if CONFIG_TLS_PROTOCOL
+#if CONFIG_OPENSSL
     ff_openssl_deinit();
 #endif
-#if CONFIG_TLS_GNUTLS_PROTOCOL
+#if CONFIG_GNUTLS
     ff_gnutls_deinit();
 #endif
+#endif
 }
 
-int ff_network_inited_globally;
-
 int ff_network_init(void)
 {
 #if HAVE_WINSOCK2_H
     WSADATA wsaData;
-#endif
 
-    if (!ff_network_inited_globally)
-        av_log(NULL, AV_LOG_WARNING, "Using network protocols without global "
-                                     "network initialization. Please use "
-                                     "avformat_network_init(), this will "
-                                     "become mandatory later.\n");
-#if HAVE_WINSOCK2_H
     if (WSAStartup(MAKEWORD(1,1), &wsaData))
         return 0;
 #endif
@@ -99,6 +95,24 @@
     }
 }
 
+int ff_network_sleep_interruptible(int64_t timeout, AVIOInterruptCB *int_cb)
+{
+    int64_t wait_start = av_gettime_relative();
+
+    while (1) {
+        int64_t time_left;
+
+        if (ff_check_interrupt(int_cb))
+            return AVERROR_EXIT;
+
+        time_left = timeout - (av_gettime_relative() - wait_start);
+        if (time_left <= 0)
+            return AVERROR(ETIMEDOUT);
+
+        av_usleep(FFMIN(time_left, POLLING_TIME * 1000));
+    }
+}
+
 void ff_network_close(void)
 {
 #if HAVE_WINSOCK2_H
@@ -152,14 +166,17 @@
         if (ff_check_interrupt(cb))
             return AVERROR_EXIT;
         ret = poll(p, nfds, POLLING_TIME);
-        if (ret != 0)
+        if (ret != 0) {
+            if (ret < 0)
+                ret = ff_neterrno();
+            if (ret == AVERROR(EINTR))
+                continue;
             break;
+        }
     } while (timeout <= 0 || runs-- > 0);
 
     if (!ret)
         return AVERROR(ETIMEDOUT);
-    if (ret < 0)
-        return ff_neterrno();
     return ret;
 }
 
@@ -181,8 +198,11 @@
 #endif
     }
 #ifdef SO_NOSIGPIPE
-    if (fd != -1)
-        setsockopt(fd, SOL_SOCKET, SO_NOSIGPIPE, &(int){1}, sizeof(int));
+    if (fd != -1) {
+        if (setsockopt(fd, SOL_SOCKET, SO_NOSIGPIPE, &(int){1}, sizeof(int))) {
+             av_log(NULL, AV_LOG_WARNING, "setsockopt(SO_NOSIGPIPE) failed\n");
+        }
+    }
 #endif
     return fd;
 }
@@ -280,6 +300,230 @@
     return ret;
 }
 
+static void interleave_addrinfo(struct addrinfo *base)
+{
+    struct addrinfo **next = &base->ai_next;
+    while (*next) {
+        struct addrinfo *cur = *next;
+        // Iterate forward until we find an entry of a different family.
+        if (cur->ai_family == base->ai_family) {
+            next = &cur->ai_next;
+            continue;
+        }
+        if (cur == base->ai_next) {
+            // If the first one following base is of a different family, just
+            // move base forward one step and continue.
+            base = cur;
+            next = &base->ai_next;
+            continue;
+        }
+        // Unchain cur from the rest of the list from its current spot.
+        *next = cur->ai_next;
+        // Hook in cur directly after base.
+        cur->ai_next = base->ai_next;
+        base->ai_next = cur;
+        // Restart with a new base. We know that before moving the cur element,
+        // everything between the previous base and cur had the same family,
+        // different from cur->ai_family. Therefore, we can keep next pointing
+        // where it was, and continue from there with base at the one after
+        // cur.
+        base = cur->ai_next;
+    }
+}
+
+static void print_address_list(void *ctx, const struct addrinfo *addr,
+                               const char *title)
+{
+    char hostbuf[100], portbuf[20];
+    av_log(ctx, AV_LOG_DEBUG, "%s:\n", title);
+    while (addr) {
+        getnameinfo(addr->ai_addr, addr->ai_addrlen,
+                    hostbuf, sizeof(hostbuf), portbuf, sizeof(portbuf),
+                    NI_NUMERICHOST | NI_NUMERICSERV);
+        av_log(ctx, AV_LOG_DEBUG, "Address %s port %s\n", hostbuf, portbuf);
+        addr = addr->ai_next;
+    }
+}
+
+struct ConnectionAttempt {
+    int fd;
+    int64_t deadline_us;
+    struct addrinfo *addr;
+};
+
+// Returns < 0 on error, 0 on successfully started connection attempt,
+// > 0 for a connection that succeeded already.
+static int start_connect_attempt(struct ConnectionAttempt *attempt,
+                                 struct addrinfo **ptr, int timeout_ms,
+                                 URLContext *h,
+                                 void (*customize_fd)(void *, int), void *customize_ctx)
+{
+    struct addrinfo *ai = *ptr;
+    int ret;
+
+    *ptr = ai->ai_next;
+
+    attempt->fd = ff_socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+    if (attempt->fd < 0)
+        return ff_neterrno();
+    attempt->deadline_us = av_gettime_relative() + timeout_ms * 1000;
+    attempt->addr = ai;
+
+    ff_socket_nonblock(attempt->fd, 1);
+
+    if (customize_fd)
+        customize_fd(customize_ctx, attempt->fd);
+
+    while ((ret = connect(attempt->fd, ai->ai_addr, ai->ai_addrlen))) {
+        ret = ff_neterrno();
+        switch (ret) {
+        case AVERROR(EINTR):
+            if (ff_check_interrupt(&h->interrupt_callback)) {
+                closesocket(attempt->fd);
+                attempt->fd = -1;
+                return AVERROR_EXIT;
+            }
+            continue;
+        case AVERROR(EINPROGRESS):
+        case AVERROR(EAGAIN):
+            return 0;
+        default:
+            closesocket(attempt->fd);
+            attempt->fd = -1;
+            return ret;
+        }
+    }
+    return 1;
+}
+
+// Try a new connection to another address after 200 ms, as suggested in
+// RFC 8305 (or sooner if an earlier attempt fails).
+#define NEXT_ATTEMPT_DELAY_MS 200
+
+int ff_connect_parallel(struct addrinfo *addrs, int timeout_ms_per_address,
+                        int parallel, URLContext *h, int *fd,
+                        void (*customize_fd)(void *, int), void *customize_ctx)
+{
+    struct ConnectionAttempt attempts[3];
+    struct pollfd pfd[3];
+    int nb_attempts = 0, i, j;
+    int64_t next_attempt_us = av_gettime_relative(), next_deadline_us;
+    int last_err = AVERROR(EIO);
+    socklen_t optlen;
+    char errbuf[100], hostbuf[100], portbuf[20];
+
+    if (parallel > FF_ARRAY_ELEMS(attempts))
+        parallel = FF_ARRAY_ELEMS(attempts);
+
+    print_address_list(h, addrs, "Original list of addresses");
+    // This mutates the list, but the head of the list is still the same
+    // element, so the caller, who owns the list, doesn't need to get
+    // an updated pointer.
+    interleave_addrinfo(addrs);
+    print_address_list(h, addrs, "Interleaved list of addresses");
+
+    while (nb_attempts > 0 || addrs) {
+        // Start a new connection attempt, if possible.
+        if (nb_attempts < parallel && addrs) {
+            getnameinfo(addrs->ai_addr, addrs->ai_addrlen,
+                        hostbuf, sizeof(hostbuf), portbuf, sizeof(portbuf),
+                        NI_NUMERICHOST | NI_NUMERICSERV);
+            av_log(h, AV_LOG_VERBOSE, "Starting connection attempt to %s port %s\n",
+                                      hostbuf, portbuf);
+            last_err = start_connect_attempt(&attempts[nb_attempts], &addrs,
+                                             timeout_ms_per_address, h,
+                                             customize_fd, customize_ctx);
+            if (last_err < 0) {
+                av_strerror(last_err, errbuf, sizeof(errbuf));
+                av_log(h, AV_LOG_VERBOSE, "Connected attempt failed: %s\n",
+                                          errbuf);
+                continue;
+            }
+            if (last_err > 0) {
+                for (i = 0; i < nb_attempts; i++)
+                    closesocket(attempts[i].fd);
+                *fd = attempts[nb_attempts].fd;
+                return 0;
+            }
+            pfd[nb_attempts].fd = attempts[nb_attempts].fd;
+            pfd[nb_attempts].events = POLLOUT;
+            next_attempt_us = av_gettime_relative() + NEXT_ATTEMPT_DELAY_MS * 1000;
+            nb_attempts++;
+        }
+
+        av_assert0(nb_attempts > 0);
+        // The connection attempts are sorted from oldest to newest, so the
+        // first one will have the earliest deadline.
+        next_deadline_us = attempts[0].deadline_us;
+        // If we can start another attempt in parallel, wait until that time.
+        if (nb_attempts < parallel && addrs)
+            next_deadline_us = FFMIN(next_deadline_us, next_attempt_us);
+        last_err = ff_poll_interrupt(pfd, nb_attempts,
+                                     (next_deadline_us - av_gettime_relative())/1000,
+                                     &h->interrupt_callback);
+        if (last_err < 0 && last_err != AVERROR(ETIMEDOUT))
+            break;
+
+        // Check the status from the poll output.
+        for (i = 0; i < nb_attempts; i++) {
+            last_err = 0;
+            if (pfd[i].revents) {
+                // Some sort of action for this socket, check its status (either
+                // a successful connection or an error).
+                optlen = sizeof(last_err);
+                if (getsockopt(attempts[i].fd, SOL_SOCKET, SO_ERROR, &last_err, &optlen))
+                    last_err = ff_neterrno();
+                else if (last_err != 0)
+                    last_err = AVERROR(last_err);
+                if (last_err == 0) {
+                    // Everything is ok, we seem to have a successful
+                    // connection. Close other sockets and return this one.
+                    for (j = 0; j < nb_attempts; j++)
+                        if (j != i)
+                            closesocket(attempts[j].fd);
+                    *fd = attempts[i].fd;
+                    getnameinfo(attempts[i].addr->ai_addr, attempts[i].addr->ai_addrlen,
+                                hostbuf, sizeof(hostbuf), portbuf, sizeof(portbuf),
+                                NI_NUMERICHOST | NI_NUMERICSERV);
+                    av_log(h, AV_LOG_VERBOSE, "Successfully connected to %s port %s\n",
+                                              hostbuf, portbuf);
+                    return 0;
+                }
+            }
+            if (attempts[i].deadline_us < av_gettime_relative() && !last_err)
+                last_err = AVERROR(ETIMEDOUT);
+            if (!last_err)
+                continue;
+            // Error (or timeout) for this socket; close the socket and remove
+            // it from the attempts/pfd arrays, to let a new attempt start
+            // directly.
+            getnameinfo(attempts[i].addr->ai_addr, attempts[i].addr->ai_addrlen,
+                        hostbuf, sizeof(hostbuf), portbuf, sizeof(portbuf),
+                        NI_NUMERICHOST | NI_NUMERICSERV);
+            av_strerror(last_err, errbuf, sizeof(errbuf));
+            av_log(h, AV_LOG_VERBOSE, "Connection attempt to %s port %s "
+                                      "failed: %s\n", hostbuf, portbuf, errbuf);
+            closesocket(attempts[i].fd);
+            memmove(&attempts[i], &attempts[i + 1],
+                    (nb_attempts - i - 1) * sizeof(*attempts));
+            memmove(&pfd[i], &pfd[i + 1],
+                    (nb_attempts - i - 1) * sizeof(*pfd));
+            i--;
+            nb_attempts--;
+        }
+    }
+    for (i = 0; i < nb_attempts; i++)
+        closesocket(attempts[i].fd);
+    if (last_err >= 0)
+        last_err = AVERROR(ECONNREFUSED);
+    if (last_err != AVERROR_EXIT) {
+        av_strerror(last_err, errbuf, sizeof(errbuf));
+        av_log(h, AV_LOG_ERROR, "Connection to %s failed: %s\n",
+               h->filename, errbuf);
+    }
+    return last_err;
+}
+
 static int match_host_pattern(const char *pattern, const char *hostname)
 {
     int len_p, len_h;
@@ -333,3 +577,10 @@
     av_free(buf);
     return ret;
 }
+
+void ff_log_net_error(void *ctx, int level, const char* prefix)
+{
+    char errbuf[100];
+    av_strerror(ff_neterrno(), errbuf, sizeof(errbuf));
+    av_log(ctx, level, "%s: %s\n", prefix, errbuf);
+}

diff --git a/libavformat/network.h b/libavformat/network.h
index f83c796..7f46730 100644
--- a/libavformat/network.h
+++ b/libavformat/network.h

@@ -59,6 +59,7 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
+#include <netinet/tcp.h>
 #include <netdb.h>
 
 #define ff_neterrno() AVERROR(errno)
@@ -74,7 +75,6 @@
 
 int ff_socket_nonblock(int socket, int enable);
 
-extern int ff_network_inited_globally;
 int ff_network_init(void);
 void ff_network_close(void);
 
@@ -87,15 +87,22 @@
  * This works similarly to ff_network_wait_fd, but waits up to 'timeout' microseconds
  * Uses ff_network_wait_fd in a loop
  *
- * @fd Socket descriptor
- * @write Set 1 to wait for socket able to be read, 0 to be written
- * @timeout Timeout interval, in microseconds. Actual precision is 100000 mcs, due to ff_network_wait_fd usage
+ * @param fd Socket descriptor
+ * @param write Set 1 to wait for socket able to be read, 0 to be written
+ * @param timeout Timeout interval, in microseconds. Actual precision is 100000 mcs, due to ff_network_wait_fd usage
  * @param int_cb Interrupt callback, is checked before each ff_network_wait_fd call
  * @return 0 if data can be read/written, AVERROR(ETIMEDOUT) if timeout expired, or negative error code
  */
 int ff_network_wait_fd_timeout(int fd, int write, int64_t timeout, AVIOInterruptCB *int_cb);
 
-int ff_inet_aton (const char * str, struct in_addr * add);
+/**
+ * Waits for up to 'timeout' microseconds. If the usert's int_cb is set and
+ * triggered, return before that.
+ * @param timeout Timeout in microseconds. Maybe have lower actual precision.
+ * @param int_cb Interrupt callback, is checked regularly.
+ * @return AVERROR(ETIMEDOUT) if timeout expirted, AVERROR_EXIT if interrupted by int_cb
+ */
+int ff_network_sleep_interruptible(int64_t timeout, AVIOInterruptCB *int_cb);
 
 #if !HAVE_STRUCT_SOCKADDR_STORAGE
 struct sockaddr_storage {
@@ -297,4 +304,34 @@
 
 int ff_socket(int domain, int type, int protocol);
 
+void ff_log_net_error(void *ctx, int level, const char* prefix);
+
+/**
+ * Connect to any of the given addrinfo addresses, with multiple attempts
+ * running in parallel.
+ *
+ * @param addrs    The list of addresses to try to connect to.
+ *                 This list will be mutated internally, but the list head
+ *                 will remain as such, so this doesn't affect the caller
+ *                 freeing the list afterwards.
+ * @param timeout_ms_per_address The number of milliseconds to wait for each
+ *                 connection attempt. Since multiple addresses are tried,
+ *                 some of them in parallel, the total run time will at most
+ *                 be timeout_ms_per_address*ceil(nb_addrs/parallel) +
+ *                 (parallel - 1) * NEXT_ATTEMPT_DELAY_MS.
+ * @param parallel The maximum number of connections to attempt in parallel.
+ *                 This is limited to an internal maximum capacity.
+ * @param h        URLContext providing interrupt check
+ *                 callback and logging context.
+ * @param fd       If successful, the connected socket is returned here.
+ * @param customize_fd Function that will be called for each socket created,
+ *                 to allow the caller to set socket options before calling
+ *                 connect() on it, may be NULL.
+ * @param customize_ctx Context parameter passed to customize_fd.
+ * @return         0 on success, AVERROR on failure.
+ */
+int ff_connect_parallel(struct addrinfo *addrs, int timeout_ms_per_address,
+                        int parallel, URLContext *h, int *fd,
+                        void (*customize_fd)(void *, int), void *customize_ctx);
+
 #endif /* AVFORMAT_NETWORK_H */

diff --git a/libavformat/nspdec.c b/libavformat/nspdec.c
new file mode 100644
index 0000000..34c747b
--- /dev/null
+++ b/libavformat/nspdec.c

@@ -0,0 +1,108 @@
+/*
+ * NSP demuxer
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/intreadwrite.h"
+#include "avformat.h"
+#include "internal.h"
+#include "pcm.h"
+
+static int nsp_probe(AVProbeData *p)
+{
+    if (AV_RB32(p->buf) == AV_RB32("FORM") &&
+        AV_RB32(p->buf + 4) == AV_RB32("DS16"))
+        return AVPROBE_SCORE_MAX;
+    return 0;
+}
+
+static int nsp_read_header(AVFormatContext *s)
+{
+    int channels = 0, rate = 0;
+    uint32_t chunk, size;
+    AVStream *st;
+    int64_t pos;
+
+    avio_skip(s->pb, 12);
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    while (!avio_feof(s->pb)) {
+        char value[1024];
+
+        chunk = avio_rb32(s->pb);
+        size  = avio_rl32(s->pb);
+        pos   = avio_tell(s->pb);
+
+        switch (chunk) {
+        case MKBETAG('H', 'E', 'D', 'R'):
+        case MKBETAG('H', 'D', 'R', '8'):
+            if (size < 32)
+                return AVERROR_INVALIDDATA;
+            avio_skip(s->pb, 20);
+            rate = avio_rl32(s->pb);
+            avio_skip(s->pb, size - (avio_tell(s->pb) - pos));
+            break;
+        case MKBETAG('N', 'O', 'T', 'E'):
+            avio_get_str(s->pb, size, value, sizeof(value));
+            av_dict_set(&s->metadata, "Comment", value, 0);
+            avio_skip(s->pb, size & 1);
+            break;
+        case MKBETAG('S', 'D', 'A', 'B'):
+            channels = 2;
+            break;
+        case MKBETAG('S', 'D', '_', '2'):
+        case MKBETAG('S', 'D', '_', '3'):
+        case MKBETAG('S', 'D', '_', '4'):
+        case MKBETAG('S', 'D', '_', '5'):
+        case MKBETAG('S', 'D', '_', '6'):
+        case MKBETAG('S', 'D', '_', '7'):
+        case MKBETAG('S', 'D', '_', '8'):
+            av_log(s, AV_LOG_WARNING, "Unsupported chunk!\n");
+        case MKBETAG('S', 'D', 'A', '_'):
+        case MKBETAG('S', 'D', '_', 'A'):
+            channels = 1;
+            break;
+        }
+
+        if (channels)
+            break;
+    }
+
+    st->codecpar->codec_type  = AVMEDIA_TYPE_AUDIO;
+    st->codecpar->channels    = channels;
+    st->codecpar->sample_rate = rate;
+    st->codecpar->codec_id    = AV_CODEC_ID_PCM_S16LE;
+    st->codecpar->block_align = 2 * channels;
+
+    return 0;
+}
+
+AVInputFormat ff_nsp_demuxer = {
+    .name           = "nsp",
+    .long_name      = NULL_IF_CONFIG_SMALL("Computerized Speech Lab NSP"),
+    .read_probe     = nsp_probe,
+    .read_header    = nsp_read_header,
+    .read_packet    = ff_pcm_read_packet,
+    .read_seek      = ff_pcm_read_seek,
+    .extensions     = "nsp",
+    .flags          = AVFMT_GENERIC_INDEX,
+};

diff --git a/libavformat/nsvdec.c b/libavformat/nsvdec.c
index d8ce656..92f7d17 100644
--- a/libavformat/nsvdec.c
+++ b/libavformat/nsvdec.c

@@ -176,6 +176,7 @@
     int16_t avsync;
     AVRational framerate;
     uint32_t *nsvs_timestamps;
+    int nsvf;
 } NSVContext;
 
 static const AVCodecTag nsv_codec_video_tags[] = {
@@ -266,6 +267,12 @@
 
     nsv->state = NSV_UNSYNC; /* in case we fail */
 
+    if (nsv->nsvf) {
+        av_log(s, AV_LOG_TRACE, "Multiple NSVf\n");
+        return 0;
+    }
+    nsv->nsvf = 1;
+
     size = avio_rl32(pb);
     if (size < 28)
         return -1;

diff --git a/libavformat/nut.c b/libavformat/nut.c
index 592fe4d..e65f424 100644
--- a/libavformat/nut.c
+++ b/libavformat/nut.c

@@ -154,6 +154,9 @@
     { AV_CODEC_ID_RAWVIDEO,         MKTAG('Y', '4',   0,  16) },
     { AV_CODEC_ID_RAWVIDEO,         MKTAG(16,    0, '4', 'Y') },
 
+    { AV_CODEC_ID_RAWVIDEO,         MKTAG('Y', '1',   0,  14) },
+    { AV_CODEC_ID_RAWVIDEO,         MKTAG(14,    0, '1', 'Y') },
+
     { AV_CODEC_ID_RAWVIDEO,         MKTAG('G', '3',   0,   8) },
 
     { AV_CODEC_ID_RAWVIDEO,         MKTAG('G', '3',   0,   9) },

diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c
index 193a286..27d16a3 100644
--- a/libavformat/oggdec.c
+++ b/libavformat/oggdec.c

@@ -128,7 +128,10 @@
     ogg->state = ost->next;
 
         for (i = 0; i < ogg->nstreams; i++) {
-            av_freep(&ogg->streams[i].buf);
+            struct ogg_stream *stream = &ogg->streams[i];
+            av_freep(&stream->buf);
+            av_freep(&stream->new_metadata);
+
             if (i >= ost->nstreams || !ost->streams[i].private) {
                 free_stream(s, i);
             }
@@ -543,7 +546,11 @@
     os->incomplete = 0;
 
     if (os->header) {
-        os->header = os->codec->header(s, idx);
+        if ((ret = os->codec->header(s, idx)) < 0) {
+            av_log(s, AV_LOG_ERROR, "Header processing failed: %s\n", av_err2str(ret));
+            return ret;
+        }
+        os->header = ret;
         if (!os->header) {
             os->segp  = segp;
             os->psize = psize;
@@ -574,8 +581,12 @@
     } else {
         os->pflags    = 0;
         os->pduration = 0;
-        if (os->codec && os->codec->packet)
-            os->codec->packet(s, idx);
+        if (os->codec && os->codec->packet) {
+            if ((ret = os->codec->packet(s, idx)) < 0) {
+                av_log(s, AV_LOG_ERROR, "Packet processing failed: %s\n", av_err2str(ret));
+                return ret;
+            }
+        }
         if (sid)
             *sid = idx;
         if (dstart)

diff --git a/libavformat/oggparsedaala.c b/libavformat/oggparsedaala.c
index a373b41..e944470 100644
--- a/libavformat/oggparsedaala.c
+++ b/libavformat/oggparsedaala.c

@@ -218,6 +218,7 @@
     int seg, duration = 1;
     struct ogg *ogg = s->priv_data;
     struct ogg_stream *os = ogg->streams + idx;
+    int64_t pts;
 
     /*
      * first packet handling: here we parse the duration of each packet in the
@@ -230,7 +231,10 @@
             if (os->segments[seg] < 255)
                 duration++;
 
-        os->lastpts = os->lastdts = daala_gptopts(s, idx, os->granule, NULL) - duration;
+        pts = daala_gptopts(s, idx, os->granule, NULL);
+        if (pts != AV_NOPTS_VALUE)
+            pts -= duration;
+        os->lastpts = os->lastdts = pts;
         if(s->streams[idx]->start_time == AV_NOPTS_VALUE) {
             s->streams[idx]->start_time = os->lastpts;
             if (s->streams[idx]->duration != AV_NOPTS_VALUE)

diff --git a/libavformat/oggparseogm.c b/libavformat/oggparseogm.c
index e7a501b..a074537 100644
--- a/libavformat/oggparseogm.c
+++ b/libavformat/oggparseogm.c

@@ -24,7 +24,6 @@
 
 #include <stdlib.h>
 
-#include "libavutil/avassert.h"
 #include "libavutil/intreadwrite.h"
 
 #include "libavcodec/bytestream.h"
@@ -106,10 +105,10 @@
                 size -= 4;
             }
             if (size > 52) {
-                av_assert0(AV_INPUT_BUFFER_PADDING_SIZE <= 52);
                 size -= 52;
                 if (bytestream2_get_bytes_left(&p) < size)
                     return AVERROR_INVALIDDATA;
+                av_freep(&st->codecpar->extradata);
                 if (ff_alloc_extradata(st->codecpar, size) < 0)
                     return AVERROR(ENOMEM);
                 bytestream2_get_buffer(&p, st->codecpar->extradata, st->codecpar->extradata_size);
@@ -177,11 +176,14 @@
         os->pflags |= AV_PKT_FLAG_KEY;
 
     lb = ((*p & 2) << 1) | ((*p >> 6) & 3);
+    if (os->psize < lb + 1)
+        return AVERROR_INVALIDDATA;
+
     os->pstart += lb + 1;
     os->psize -= lb + 1;
 
     while (lb--)
-        os->pduration += p[lb+1] << (lb*8);
+        os->pduration += (uint64_t)p[lb+1] << (lb*8);
 
     return 0;
 }

diff --git a/libavformat/oggparseopus.c b/libavformat/oggparseopus.c
index f45ad84..cd34cf2 100644
--- a/libavformat/oggparseopus.c
+++ b/libavformat/oggparseopus.c

@@ -62,6 +62,7 @@
         /*gain                = AV_RL16(packet + 16);*/
         /*channel_map         = AV_RL8 (packet + 18);*/
 
+        av_freep(&st->codecpar->extradata);
         if (ff_alloc_extradata(st->codecpar, os->psize))
             return AVERROR(ENOMEM);
 

diff --git a/libavformat/oggparsetheora.c b/libavformat/oggparsetheora.c
index b14f9f0..b0c0edc 100644
--- a/libavformat/oggparsetheora.c
+++ b/libavformat/oggparsetheora.c

@@ -181,6 +181,7 @@
 
     if ((!os->lastpts || os->lastpts == AV_NOPTS_VALUE) && !(os->flags & OGG_FLAG_EOS)) {
         int seg;
+        int64_t pts;
 
         duration = 1;
         for (seg = os->segp; seg < os->nsegs; seg++) {
@@ -188,7 +189,10 @@
                 duration ++;
         }
 
-        os->lastpts = os->lastdts   = theora_gptopts(s, idx, os->granule, NULL) - duration;
+        pts = theora_gptopts(s, idx, os->granule, NULL);
+        if (pts != AV_NOPTS_VALUE)
+            pts -= duration;
+        os->lastpts = os->lastdts = pts;
         if(s->streams[idx]->start_time == AV_NOPTS_VALUE) {
             s->streams[idx]->start_time = os->lastpts;
             if (s->streams[idx]->duration > 0)

diff --git a/libavformat/oggparsevorbis.c b/libavformat/oggparsevorbis.c
index 65b1998..bcfd246 100644
--- a/libavformat/oggparsevorbis.c
+++ b/libavformat/oggparsevorbis.c

@@ -230,6 +230,10 @@
 
     len = priv->len[0] + priv->len[1] + priv->len[2];
     buf_len = len + len / 255 + 64;
+
+    if (*buf)
+        return AVERROR_INVALIDDATA;
+
     ptr = *buf = av_realloc(NULL, buf_len);
     if (!ptr)
         return AVERROR(ENOMEM);
@@ -317,7 +321,7 @@
     if (priv->packet[pkt_type >> 1])
         return AVERROR_INVALIDDATA;
     if (pkt_type > 1 && !priv->packet[0] || pkt_type > 3 && !priv->packet[1])
-        return AVERROR_INVALIDDATA;
+        return priv->vp ? 0 : AVERROR_INVALIDDATA;
 
     priv->len[pkt_type >> 1]    = os->psize;
     priv->packet[pkt_type >> 1] = av_mallocz(os->psize);

diff --git a/libavformat/options.c b/libavformat/options.c
index 9371c72..c188c23 100644
--- a/libavformat/options.c
+++ b/libavformat/options.c

@@ -104,7 +104,7 @@
 {
     int loglevel;
 
-    if (!strcmp(url, s->filename) ||
+    if (!strcmp(url, s->url) ||
         s->iformat && !strcmp(s->iformat->name, "image2") ||
         s->oformat && !strcmp(s->oformat->name, "image2")
     ) {

diff --git a/libavformat/options_table.h b/libavformat/options_table.h
index 118086d..f2f077b 100644
--- a/libavformat/options_table.h
+++ b/libavformat/options_table.h

@@ -49,15 +49,17 @@
 {"discardcorrupt", "discard corrupted frames", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_DISCARD_CORRUPT }, INT_MIN, INT_MAX, D, "fflags"},
 {"sortdts", "try to interleave outputted packets by dts", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_SORT_DTS }, INT_MIN, INT_MAX, D, "fflags"},
 #if FF_API_LAVF_KEEPSIDE_FLAG
-{"keepside", "don't merge side data", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_KEEP_SIDE_DATA }, INT_MIN, INT_MAX, D, "fflags"},
+{"keepside", "deprecated, does nothing", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_KEEP_SIDE_DATA }, INT_MIN, INT_MAX, D, "fflags"},
 #endif
 {"fastseek", "fast but inaccurate seeks", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_FAST_SEEK }, INT_MIN, INT_MAX, D, "fflags"},
-{"latm", "enable RTP MP4A-LATM payload", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_MP4A_LATM }, INT_MIN, INT_MAX, E, "fflags"},
+#if FF_API_LAVF_MP4A_LATM
+{"latm", "deprecated, does nothing", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_MP4A_LATM }, INT_MIN, INT_MAX, E, "fflags"},
+#endif
 {"nobuffer", "reduce the latency introduced by optional buffering", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_NOBUFFER }, 0, INT_MAX, D, "fflags"},
-{"seek2any", "allow seeking to non-keyframes on demuxer level when supported", OFFSET(seek2any), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, D},
 {"bitexact", "do not write random/volatile data", 0, AV_OPT_TYPE_CONST, { .i64 = AVFMT_FLAG_BITEXACT }, 0, 0, E, "fflags" },
 {"shortest", "stop muxing with the shortest stream", 0, AV_OPT_TYPE_CONST, { .i64 = AVFMT_FLAG_SHORTEST }, 0, 0, E, "fflags" },
-{"autobsf", "add needed bsfs automatically (delays header until each stream's first packet is written)", 0, AV_OPT_TYPE_CONST, { .i64 = AVFMT_FLAG_AUTO_BSF }, 0, 0, E, "fflags" },
+{"autobsf", "add needed bsfs automatically", 0, AV_OPT_TYPE_CONST, { .i64 = AVFMT_FLAG_AUTO_BSF }, 0, 0, E, "fflags" },
+{"seek2any", "allow seeking to non-keyframes on demuxer level when supported", OFFSET(seek2any), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, D},
 {"analyzeduration", "specify how many microseconds are analyzed to probe the input", OFFSET(max_analyze_duration), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, INT64_MAX, D},
 {"cryptokey", "decryption key", OFFSET(key), AV_OPT_TYPE_BINARY, {.dbl = 0}, 0, 0, D},
 {"indexmem", "max memory used for timestamp index (per stream)", OFFSET(max_index_size), AV_OPT_TYPE_INT, {.i64 = 1<<20 }, 0, INT_MAX, D},
@@ -108,6 +110,7 @@
 {"protocol_whitelist", "List of protocols that are allowed to be used", OFFSET(protocol_whitelist), AV_OPT_TYPE_STRING, { .str = NULL },  CHAR_MIN, CHAR_MAX, D },
 {"protocol_blacklist", "List of protocols that are not allowed to be used", OFFSET(protocol_blacklist), AV_OPT_TYPE_STRING, { .str = NULL },  CHAR_MIN, CHAR_MAX, D },
 {"max_streams", "maximum number of streams", OFFSET(max_streams), AV_OPT_TYPE_INT, { .i64 = 1000 }, 0, INT_MAX, D },
+{"skip_estimate_duration_from_pts", "skip duration calculation in estimate_timings_from_pts", OFFSET(skip_estimate_duration_from_pts), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, D},
 {NULL},
 };
 

diff --git a/libavformat/os_support.c b/libavformat/os_support.c
index 86d0b8f..15cea7f 100644
--- a/libavformat/os_support.c
+++ b/libavformat/os_support.c

@@ -43,10 +43,11 @@
 
 #include "network.h"
 
+#if !HAVE_GETADDRINFO
 #if !HAVE_INET_ATON
 #include <stdlib.h>
 
-int ff_inet_aton(const char *str, struct in_addr *add)
+static int inet_aton(const char *str, struct in_addr *add)
 {
     unsigned int add1 = 0, add2 = 0, add3 = 0, add4 = 0;
 
@@ -60,14 +61,8 @@
 
     return 1;
 }
-#else
-int ff_inet_aton(const char *str, struct in_addr *add)
-{
-    return inet_aton(str, add);
-}
 #endif /* !HAVE_INET_ATON */
 
-#if !HAVE_GETADDRINFO
 int ff_getaddrinfo(const char *node, const char *service,
                    const struct addrinfo *hints, struct addrinfo **res)
 {
@@ -75,16 +70,6 @@
     struct addrinfo *ai;
     struct sockaddr_in *sin;
 
-#if HAVE_WINSOCK2_H
-    int (WSAAPI *win_getaddrinfo)(const char *node, const char *service,
-                                  const struct addrinfo *hints,
-                                  struct addrinfo **res);
-    HMODULE ws2mod = GetModuleHandle("ws2_32.dll");
-    win_getaddrinfo = GetProcAddress(ws2mod, "getaddrinfo");
-    if (win_getaddrinfo)
-        return win_getaddrinfo(node, service, hints, res);
-#endif /* HAVE_WINSOCK2_H */
-
     *res = NULL;
     sin  = av_mallocz(sizeof(struct sockaddr_in));
     if (!sin)
@@ -92,7 +77,7 @@
     sin->sin_family = AF_INET;
 
     if (node) {
-        if (!ff_inet_aton(node, &sin->sin_addr)) {
+        if (!inet_aton(node, &sin->sin_addr)) {
             if (hints && (hints->ai_flags & AI_NUMERICHOST)) {
                 av_free(sin);
                 return EAI_FAIL;
@@ -148,17 +133,6 @@
 
 void ff_freeaddrinfo(struct addrinfo *res)
 {
-#if HAVE_WINSOCK2_H
-    void (WSAAPI *win_freeaddrinfo)(struct addrinfo *res);
-    HMODULE ws2mod = GetModuleHandle("ws2_32.dll");
-    win_freeaddrinfo = (void (WSAAPI *)(struct addrinfo *res))
-                       GetProcAddress(ws2mod, "freeaddrinfo");
-    if (win_freeaddrinfo) {
-        win_freeaddrinfo(res);
-        return;
-    }
-#endif /* HAVE_WINSOCK2_H */
-
     av_freep(&res->ai_canonname);
     av_freep(&res->ai_addr);
     av_freep(&res);
@@ -170,16 +144,6 @@
 {
     const struct sockaddr_in *sin = (const struct sockaddr_in *)sa;
 
-#if HAVE_WINSOCK2_H
-    int (WSAAPI *win_getnameinfo)(const struct sockaddr *sa, socklen_t salen,
-                                  char *host, DWORD hostlen,
-                                  char *serv, DWORD servlen, int flags);
-    HMODULE ws2mod = GetModuleHandle("ws2_32.dll");
-    win_getnameinfo = GetProcAddress(ws2mod, "getnameinfo");
-    if (win_getnameinfo)
-        return win_getnameinfo(sa, salen, host, hostlen, serv, servlen, flags);
-#endif /* HAVE_WINSOCK2_H */
-
     if (sa->sa_family != AF_INET)
         return EAI_FAMILY;
     if (!host && !serv)

diff --git a/libavformat/os_support.h b/libavformat/os_support.h
index 91220e9..7a56dc9 100644
--- a/libavformat/os_support.h
+++ b/libavformat/os_support.h

@@ -40,7 +40,7 @@
 #endif
 #endif
 
-#if defined(_WIN32) && !defined(__MINGW32CE__)
+#ifdef _WIN32
 #  include <fcntl.h>
 #  ifdef lseek
 #   undef lseek
@@ -54,7 +54,7 @@
 #   undef fstat
 #  endif
 #  define fstat(f,s) _fstati64((f), (s))
-#endif /* defined(_WIN32) && !defined(__MINGW32CE__) */
+#endif /* defined(_WIN32) */
 
 
 #ifdef __ANDROID__
@@ -139,9 +139,7 @@
 #endif /* HAVE_POLL_H */
 #endif /* CONFIG_NETWORK */
 
-#if defined(__MINGW32CE__)
-#define mkdir(a, b) _mkdir(a)
-#elif defined(_WIN32)
+#ifdef _WIN32
 #include <stdio.h>
 #include <windows.h>
 #include "libavutil/wchar_filename.h"
@@ -220,7 +218,7 @@
 
 fallback:
     /* filename may be be in CP_ACP */
-#if !HAVE_WINRT
+#if !HAVE_UWP
     ret = MoveFileExA(src_utf8, dest_utf8, MOVEFILE_REPLACE_EXISTING);
     if (ret)
         errno = EPERM;

diff --git a/libavformat/pcm.c b/libavformat/pcm.c
index 806f91b..767bbd0 100644
--- a/libavformat/pcm.c
+++ b/libavformat/pcm.c

@@ -28,13 +28,20 @@
 
 int ff_pcm_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
+    AVCodecParameters *par = s->streams[0]->codecpar;
     int ret, size;
 
-    size= RAW_SAMPLES*s->streams[0]->codecpar->block_align;
-    if (size <= 0)
+    if (par->block_align <= 0)
         return AVERROR(EINVAL);
 
-    ret= av_get_packet(s->pb, pkt, size);
+    /*
+     * Compute read size to complete a read every 62ms.
+     * Clamp to RAW_SAMPLES if larger.
+     */
+    size = FFMAX(par->sample_rate/25, 1);
+    size = FFMIN(size, RAW_SAMPLES) * par->block_align;
+
+    ret = av_get_packet(s->pb, pkt, size);
 
     pkt->flags &= ~AV_PKT_FLAG_CORRUPT;
     pkt->stream_index = 0;

diff --git a/libavformat/protocols.c b/libavformat/protocols.c
index 8d3555e..ad95659 100644
--- a/libavformat/protocols.c
+++ b/libavformat/protocols.c

@@ -56,10 +56,7 @@
 extern const URLProtocol ff_subfile_protocol;
 extern const URLProtocol ff_tee_protocol;
 extern const URLProtocol ff_tcp_protocol;
-extern const URLProtocol ff_tls_gnutls_protocol;
-extern const URLProtocol ff_tls_schannel_protocol;
-extern const URLProtocol ff_tls_securetransport_protocol;
-extern const URLProtocol ff_tls_openssl_protocol;
+extern const URLProtocol ff_tls_protocol;
 extern const URLProtocol ff_udp_protocol;
 extern const URLProtocol ff_udplite_protocol;
 extern const URLProtocol ff_unix_protocol;
@@ -68,6 +65,7 @@
 extern const URLProtocol ff_librtmps_protocol;
 extern const URLProtocol ff_librtmpt_protocol;
 extern const URLProtocol ff_librtmpte_protocol;
+extern const URLProtocol ff_libsrt_protocol;
 extern const URLProtocol ff_libssh_protocol;
 extern const URLProtocol ff_libsmbclient_protocol;
 

diff --git a/libavformat/pva.c b/libavformat/pva.c
index 16381db..04ae8e2 100644
--- a/libavformat/pva.c
+++ b/libavformat/pva.c

@@ -134,6 +134,10 @@
             pes_flags              = avio_rb16(pb);
             pes_header_data_length = avio_r8(pb);
 
+            if (avio_feof(pb)) {
+                return AVERROR_EOF;
+            }
+
             if (pes_signal != 1 || pes_header_data_length == 0) {
                 pva_log(s, AV_LOG_WARNING, "expected non empty signaled PES packet, "
                                           "trying to recover\n");

diff --git a/libavformat/qtpalette.c b/libavformat/qtpalette.c
index 666c6b7..6833f0c 100644
--- a/libavformat/qtpalette.c
+++ b/libavformat/qtpalette.c

@@ -49,7 +49,7 @@
     /* If the depth is 1, 2, 4, or 8 bpp, file is palettized. */
     if ((bit_depth == 1 || bit_depth == 2 || bit_depth == 4 || bit_depth == 8)) {
         uint32_t color_count, color_start, color_end;
-        uint32_t a, r, g, b;
+        uint32_t r, g, b;
 
         /* Ignore the greyscale bit for 1-bit video and sample
          * descriptions containing a color table. */
@@ -94,17 +94,17 @@
             color_end = avio_rb16(pb);
             if ((color_start <= 255) && (color_end <= 255)) {
                 for (i = color_start; i <= color_end; i++) {
-                    /* each A, R, G, or B component is 16 bits;
-                     * only use the top 8 bits */
-                    a = avio_r8(pb);
-                    avio_r8(pb);
+                    /* Each color is made of four unsigned 16 bit integers. The
+                     * first integer is 0, the remaining integers are the red,
+                     * the green and the blue values. We only use the top 8 bit. */
+                    avio_skip(pb, 2);
                     r = avio_r8(pb);
                     avio_r8(pb);
                     g = avio_r8(pb);
                     avio_r8(pb);
                     b = avio_r8(pb);
                     avio_r8(pb);
-                    palette[i] = (a << 24 ) | (r << 16) | (g << 8) | (b);
+                    palette[i] = (0xFFU << 24) | (r << 16) | (g << 8) | (b);
                 }
             }
         }

diff --git a/libavformat/rawdec.c b/libavformat/rawdec.c
index e926549..b38a4b5 100644
--- a/libavformat/rawdec.c
+++ b/libavformat/rawdec.c

@@ -130,7 +130,7 @@
     int nb_invalid = 0;
     int nb_frames = 0;
 
-    for (i=0; i<p->buf_size-2; i++) {
+    for (i = 0; i < p->buf_size - 1; i++) {
         int c;
         if (p->buf[i] != 0xFF)
             continue;

diff --git a/libavformat/rawenc.c b/libavformat/rawenc.c
index f640121..993d232 100644
--- a/libavformat/rawenc.c
+++ b/libavformat/rawenc.c

@@ -91,6 +91,45 @@
 };
 #endif
 
+#if CONFIG_APTX_MUXER
+AVOutputFormat ff_aptx_muxer = {
+    .name              = "aptx",
+    .long_name         = NULL_IF_CONFIG_SMALL("raw aptX (Audio Processing Technology for Bluetooth)"),
+    .extensions        = "aptx",
+    .audio_codec       = AV_CODEC_ID_APTX,
+    .video_codec       = AV_CODEC_ID_NONE,
+    .write_header      = force_one_stream,
+    .write_packet      = ff_raw_write_packet,
+    .flags             = AVFMT_NOTIMESTAMPS,
+};
+#endif
+
+#if CONFIG_APTX_HD_MUXER
+AVOutputFormat ff_aptx_hd_muxer = {
+    .name              = "aptx_hd",
+    .long_name         = NULL_IF_CONFIG_SMALL("raw aptX HD (Audio Processing Technology for Bluetooth)"),
+    .extensions        = "aptxhd",
+    .audio_codec       = AV_CODEC_ID_APTX_HD,
+    .video_codec       = AV_CODEC_ID_NONE,
+    .write_header      = force_one_stream,
+    .write_packet      = ff_raw_write_packet,
+    .flags             = AVFMT_NOTIMESTAMPS,
+};
+#endif
+
+#if CONFIG_AVS2_MUXER
+AVOutputFormat ff_avs2_muxer = {
+    .name              = "avs2",
+    .long_name         = NULL_IF_CONFIG_SMALL("raw AVS2-P2/IEEE1857.4 video"),
+    .extensions        = "avs,avs2",
+    .audio_codec       = AV_CODEC_ID_NONE,
+    .video_codec       = AV_CODEC_ID_AVS2,
+    .write_header      = force_one_stream,
+    .write_packet      = ff_raw_write_packet,
+    .flags             = AVFMT_NOTIMESTAMPS,
+};
+#endif
+
 #if CONFIG_CAVSVIDEO_MUXER
 AVOutputFormat ff_cavsvideo_muxer = {
     .name              = "cavsvideo",
@@ -104,6 +143,19 @@
 };
 #endif
 
+#if CONFIG_CODEC2RAW_MUXER
+AVOutputFormat ff_codec2raw_muxer = {
+    .name              = "codec2raw",
+    .long_name         = NULL_IF_CONFIG_SMALL("raw codec2 muxer"),
+    .audio_codec       = AV_CODEC_ID_CODEC2,
+    .video_codec       = AV_CODEC_ID_NONE,
+    .write_header      = force_one_stream,
+    .write_packet      = ff_raw_write_packet,
+    .flags             = AVFMT_NOTIMESTAMPS,
+};
+#endif
+
+
 #if CONFIG_DATA_MUXER
 AVOutputFormat ff_data_muxer = {
     .name              = "data",
@@ -413,6 +465,19 @@
 };
 #endif
 
+#if CONFIG_SBC_MUXER
+AVOutputFormat ff_sbc_muxer = {
+    .name              = "sbc",
+    .long_name         = NULL_IF_CONFIG_SMALL("raw SBC"),
+    .mime_type         = "audio/x-sbc",
+    .extensions        = "sbc,msbc",
+    .audio_codec       = AV_CODEC_ID_SBC,
+    .write_header      = force_one_stream,
+    .write_packet      = ff_raw_write_packet,
+    .flags             = AVFMT_NOTIMESTAMPS,
+};
+#endif
+
 #if CONFIG_TRUEHD_MUXER
 AVOutputFormat ff_truehd_muxer = {
     .name              = "truehd",

diff --git a/libavformat/rdt.c b/libavformat/rdt.c
index b69827f..60c36f0 100644
--- a/libavformat/rdt.c
+++ b/libavformat/rdt.c

@@ -53,7 +53,7 @@
 
 RDTDemuxContext *
 ff_rdt_parse_open(AVFormatContext *ic, int first_stream_of_set_idx,
-                  void *priv_data, RTPDynamicProtocolHandler *handler)
+                  void *priv_data, const RTPDynamicProtocolHandler *handler)
 {
     RDTDemuxContext *s = av_mallocz(sizeof(RDTDemuxContext));
     if (!s)
@@ -554,7 +554,7 @@
 }
 
 #define RDT_HANDLER(n, s, t) \
-static RTPDynamicProtocolHandler rdt_ ## n ## _handler = { \
+RTPDynamicProtocolHandler ff_rdt_ ## n ## _handler = { \
     .enc_name         = s, \
     .codec_type       = t, \
     .codec_id         = AV_CODEC_ID_NONE, \
@@ -570,10 +570,3 @@
 RDT_HANDLER(video,      "x-pn-realvideo",                AVMEDIA_TYPE_VIDEO);
 RDT_HANDLER(audio,      "x-pn-realaudio",                AVMEDIA_TYPE_AUDIO);
 
-void ff_register_rdt_dynamic_payload_handlers(void)
-{
-    ff_register_dynamic_payload_handler(&rdt_video_handler);
-    ff_register_dynamic_payload_handler(&rdt_audio_handler);
-    ff_register_dynamic_payload_handler(&rdt_live_video_handler);
-    ff_register_dynamic_payload_handler(&rdt_live_audio_handler);
-}

diff --git a/libavformat/rdt.h b/libavformat/rdt.h
index ce6026f..67fb308 100644
--- a/libavformat/rdt.h
+++ b/libavformat/rdt.h

@@ -41,7 +41,7 @@
 RDTDemuxContext *ff_rdt_parse_open(AVFormatContext *ic,
                                    int first_stream_of_set_idx,
                                    void *priv_data,
-                                   RTPDynamicProtocolHandler *handler);
+                                   const RTPDynamicProtocolHandler *handler);
 void ff_rdt_parse_close(RDTDemuxContext *s);
 
 /**
@@ -60,11 +60,6 @@
                                        const char *challenge);
 
 /**
- * Register RDT-related dynamic payload handlers with our cache.
- */
-void ff_register_rdt_dynamic_payload_handlers(void);
-
-/**
  * Add subscription information to Subscribe parameter string.
  *
  * @param cmd string to write the subscription information into.

diff --git a/libavformat/riff.c b/libavformat/riff.c
index 3f0b390..3907e1a 100644
--- a/libavformat/riff.c
+++ b/libavformat/riff.c

@@ -369,6 +369,7 @@
     { AV_CODEC_ID_ZMBV,         MKTAG('Z', 'M', 'B', 'V') },
     { AV_CODEC_ID_KMVC,         MKTAG('K', 'M', 'V', 'C') },
     { AV_CODEC_ID_CAVS,         MKTAG('C', 'A', 'V', 'S') },
+    { AV_CODEC_ID_AVS2,         MKTAG('A', 'V', 'S', '2') },
     { AV_CODEC_ID_JPEG2000,     MKTAG('m', 'j', 'p', '2') },
     { AV_CODEC_ID_JPEG2000,     MKTAG('M', 'J', '2', 'C') },
     { AV_CODEC_ID_JPEG2000,     MKTAG('L', 'J', '2', 'C') },
@@ -404,6 +405,12 @@
     { AV_CODEC_ID_UTVIDEO,      MKTAG('U', 'Q', 'Y', '2') },
     { AV_CODEC_ID_UTVIDEO,      MKTAG('U', 'Q', 'R', 'A') },
     { AV_CODEC_ID_UTVIDEO,      MKTAG('U', 'Q', 'R', 'G') },
+    { AV_CODEC_ID_UTVIDEO,      MKTAG('U', 'M', 'Y', '2') },
+    { AV_CODEC_ID_UTVIDEO,      MKTAG('U', 'M', 'H', '2') },
+    { AV_CODEC_ID_UTVIDEO,      MKTAG('U', 'M', 'Y', '4') },
+    { AV_CODEC_ID_UTVIDEO,      MKTAG('U', 'M', 'H', '4') },
+    { AV_CODEC_ID_UTVIDEO,      MKTAG('U', 'M', 'R', 'A') },
+    { AV_CODEC_ID_UTVIDEO,      MKTAG('U', 'M', 'R', 'G') },
     { AV_CODEC_ID_VBLE,         MKTAG('V', 'B', 'L', 'E') },
     { AV_CODEC_ID_ESCAPE130,    MKTAG('E', '1', '3', '0') },
     { AV_CODEC_ID_DXTORY,       MKTAG('x', 't', 'o', 'r') },
@@ -463,6 +470,11 @@
     { AV_CODEC_ID_AV1,          MKTAG('A', 'V', '0', '1') },
     { AV_CODEC_ID_MSCC,         MKTAG('M', 'S', 'C', 'C') },
     { AV_CODEC_ID_SRGC,         MKTAG('S', 'R', 'G', 'C') },
+    { AV_CODEC_ID_IMM4,         MKTAG('I', 'M', 'M', '4') },
+    { AV_CODEC_ID_PROSUMER,     MKTAG('B', 'T', '2', '0') },
+    { AV_CODEC_ID_MWSC,         MKTAG('M', 'W', 'S', 'C') },
+    { AV_CODEC_ID_WCMV,         MKTAG('W', 'C', 'M', 'V') },
+    { AV_CODEC_ID_RASC,         MKTAG('R', 'A', 'S', 'C') },
     { AV_CODEC_ID_NONE,         0 }
 };
 
@@ -580,6 +592,7 @@
 const AVCodecGuid ff_codec_wav_guids[] = {
     { AV_CODEC_ID_AC3,      { 0x2C, 0x80, 0x6D, 0xE0, 0x46, 0xDB, 0xCF, 0x11, 0xB4, 0xD1, 0x00, 0x80, 0x5F, 0x6C, 0xBB, 0xEA } },
     { AV_CODEC_ID_ATRAC3P,  { 0xBF, 0xAA, 0x23, 0xE9, 0x58, 0xCB, 0x71, 0x44, 0xA1, 0x19, 0xFF, 0xFA, 0x01, 0xE4, 0xCE, 0x62 } },
+    { AV_CODEC_ID_ATRAC9,   { 0xD2, 0x42, 0xE1, 0x47, 0xBA, 0x36, 0x8D, 0x4D, 0x88, 0xFC, 0x61, 0x65, 0x4F, 0x8C, 0x83, 0x6C } },
     { AV_CODEC_ID_EAC3,     { 0xAF, 0x87, 0xFB, 0xA7, 0x02, 0x2D, 0xFB, 0x42, 0xA4, 0xD4, 0x05, 0xCD, 0x93, 0x84, 0x3B, 0xDD } },
     { AV_CODEC_ID_MP2,      { 0x2B, 0x80, 0x6D, 0xE0, 0x46, 0xDB, 0xCF, 0x11, 0xB4, 0xD1, 0x00, 0x80, 0x5F, 0x6C, 0xBB, 0xEA } },
     { AV_CODEC_ID_NONE }

diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
index d6d7d9c..f26c5b4 100644
--- a/libavformat/rmdec.c
+++ b/libavformat/rmdec.c

@@ -70,16 +70,10 @@
 
 static inline void get_strl(AVIOContext *pb, char *buf, int buf_size, int len)
 {
-    int i;
-    char *q, r;
+    int read = avio_get_str(pb, len, buf, buf_size);
 
-    q = buf;
-    for(i=0;i<len;i++) {
-        r = avio_r8(pb);
-        if (i < buf_size - 1)
-            *q++ = r;
-    }
-    if (buf_size > 0) *q = '\0';
+    if (read > 0)
+        avio_skip(pb, len - read);
 }
 
 static void get_str8(AVIOContext *pb, char *buf, int buf_size)
@@ -105,8 +99,10 @@
 
     for (i=0; i<FF_ARRAY_ELEMS(ff_rm_metadata); i++) {
         int len = wide ? avio_rb16(pb) : avio_r8(pb);
-        get_strl(pb, buf, sizeof(buf), len);
-        av_dict_set(&s->metadata, ff_rm_metadata[i], buf, 0);
+        if (len > 0) {
+            get_strl(pb, buf, sizeof(buf), len);
+            av_dict_set(&s->metadata, ff_rm_metadata[i], buf, 0);
+        }
     }
 }
 
@@ -526,7 +522,7 @@
 
         size2 = avio_rb32(pb);
         ret = ff_rm_read_mdpr_codecdata(s, s->pb, st2, st2->priv_data,
-                                        size2, mime);
+                                        size2, NULL);
         if (ret < 0)
             return ret;
     }
@@ -950,12 +946,14 @@
             } else
                 return -1;
         } else {
-            if ((ret = av_get_packet(pb, pkt, len)) < 0)
+            ret = av_get_packet(pb, pkt, len);
+            if (ret < 0)
                 return ret;
             rm_ac3_swap_bytes(st, pkt);
         }
     } else {
-        if ((ret = av_get_packet(pb, pkt, len)) < 0)
+        ret = av_get_packet(pb, pkt, len);
+        if (ret < 0)
             return ret;
     }
 
@@ -973,16 +971,17 @@
                       AVStream *st, RMStream *ast, AVPacket *pkt)
 {
     RMDemuxContext *rm = s->priv_data;
+    int ret;
 
     av_assert0 (rm->audio_pkt_cnt > 0);
 
     if (ast->deint_id == DEINT_ID_VBRF ||
         ast->deint_id == DEINT_ID_VBRS) {
-        int ret = av_get_packet(pb, pkt, ast->sub_packet_lengths[ast->sub_packet_cnt - rm->audio_pkt_cnt]);
+        ret = av_get_packet(pb, pkt, ast->sub_packet_lengths[ast->sub_packet_cnt - rm->audio_pkt_cnt]);
         if (ret < 0)
             return ret;
     } else {
-        int ret = av_new_packet(pkt, st->codecpar->block_align);
+        ret = av_new_packet(pkt, st->codecpar->block_align);
         if (ret < 0)
             return ret;
         memcpy(pkt->data, ast->pkt.data + st->codecpar->block_align * //FIXME avoid this
@@ -1270,6 +1269,8 @@
                 if (avio_rb32(pb) == MKBETAG('M', 'L', 'T', 'I')) {
                     ret = rm_read_multi(s, pb, st, NULL);
                 } else {
+                    if (avio_feof(pb))
+                        return AVERROR_INVALIDDATA;
                     avio_seek(pb, -4, SEEK_CUR);
                     ret = ff_rm_read_mdpr_codecdata(s, pb, st, st->priv_data, len, NULL);
                 }

diff --git a/libavformat/rtmpcrypt.c b/libavformat/rtmpcrypt.c
index c41ae43..253b8ca 100644
--- a/libavformat/rtmpcrypt.c
+++ b/libavformat/rtmpcrypt.c

@@ -301,7 +301,7 @@
 
     if (rt->handshaked) {
         /* encrypt data to send to the server */
-        av_rc4_crypt(&rt->key_out, buf, buf, size, NULL, 1);
+        av_rc4_crypt(&rt->key_out, (uint8_t *)buf, buf, size, NULL, 1);
     }
 
     if ((ret = ffurl_write(rt->stream, buf, size)) < 0)

diff --git a/libavformat/rtmpdh.c b/libavformat/rtmpdh.c
index 8eb0882..5ddae53 100644
--- a/libavformat/rtmpdh.c
+++ b/libavformat/rtmpdh.c

@@ -38,6 +38,11 @@
 
 #include "rtmpdh.h"
 
+#if CONFIG_MBEDTLS
+#include <mbedtls/ctr_drbg.h>
+#include <mbedtls/entropy.h>
+#endif
+
 #define P1024                                          \
     "FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1" \
     "29024E088A67CC74020BBEA63B139B22514A08798E3404DD" \
@@ -159,6 +164,56 @@
     BN_CTX_free(ctx);
     return 0;
 }
+#elif CONFIG_MBEDTLS
+#define bn_new(bn)                      \
+    do {                                \
+        bn = av_malloc(sizeof(*bn));    \
+        if (bn)                         \
+            mbedtls_mpi_init(bn);       \
+    } while (0)
+#define bn_free(bn)                     \
+    do {                                \
+        mbedtls_mpi_free(bn);           \
+        av_free(bn);                    \
+    } while (0)
+#define bn_set_word(bn, w)          mbedtls_mpi_lset(bn, w)
+#define bn_cmp(a, b)                mbedtls_mpi_cmp_mpi(a, b)
+#define bn_copy(to, from)           mbedtls_mpi_copy(to, from)
+#define bn_sub_word(bn, w)          mbedtls_mpi_sub_int(bn, bn, w)
+#define bn_cmp_1(bn)                mbedtls_mpi_cmp_int(bn, 1)
+#define bn_num_bytes(bn)            (mbedtls_mpi_bitlen(bn) + 7) / 8
+#define bn_bn2bin(bn, buf, len)     mbedtls_mpi_write_binary(bn, buf, len)
+#define bn_bin2bn(bn, buf, len)                     \
+    do {                                            \
+        bn_new(bn);                                 \
+        if (bn)                                     \
+            mbedtls_mpi_read_binary(bn, buf, len);  \
+    } while (0)
+#define bn_hex2bn(bn, buf, ret)                     \
+    do {                                            \
+        bn_new(bn);                                 \
+        if (bn)                                     \
+            ret = (mbedtls_mpi_read_string(bn, 16, buf) == 0);  \
+        else                                        \
+            ret = 1;                                \
+    } while (0)
+#define bn_random(bn, num_bits)                     \
+    do {                                            \
+        mbedtls_entropy_context entropy_ctx;        \
+        mbedtls_ctr_drbg_context ctr_drbg_ctx;      \
+                                                    \
+        mbedtls_entropy_init(&entropy_ctx);         \
+        mbedtls_ctr_drbg_init(&ctr_drbg_ctx);       \
+        mbedtls_ctr_drbg_seed(&ctr_drbg_ctx,        \
+                              mbedtls_entropy_func, \
+                              &entropy_ctx,         \
+                              NULL, 0);             \
+        mbedtls_mpi_fill_random(bn, (num_bits + 7) / 8, mbedtls_ctr_drbg_random, &ctr_drbg_ctx); \
+        mbedtls_ctr_drbg_free(&ctr_drbg_ctx);       \
+        mbedtls_entropy_free(&entropy_ctx);         \
+    } while (0)
+#define bn_modexp(bn, y, q, p)      mbedtls_mpi_exp_mod(bn, y, q, p, 0)
+
 #endif
 
 #define MAX_BYTES 18000

diff --git a/libavformat/rtmpdh.h b/libavformat/rtmpdh.h
index 188aad7..8cc1a42 100644
--- a/libavformat/rtmpdh.h
+++ b/libavformat/rtmpdh.h

@@ -40,6 +40,11 @@
 #include <openssl/dh.h>
 
 typedef BIGNUM *FFBigNum;
+#elif CONFIG_MBEDTLS
+#include <mbedtls/bignum.h>
+
+typedef mbedtls_mpi *FFBigNum;
+
 #endif
 
 typedef struct FF_DH {

diff --git a/libavformat/rtmpdigest.c b/libavformat/rtmpdigest.c
new file mode 100644
index 0000000..a9b1177
--- /dev/null
+++ b/libavformat/rtmpdigest.c

@@ -0,0 +1,67 @@
+/*
+ * RTMP network protocol
+ * Copyright (c) 2009 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * RTMP protocol digest
+ */
+
+#include <stdint.h>
+
+#include "libavutil/error.h"
+#include "libavutil/hmac.h"
+
+#include "rtmp.h"
+
+int ff_rtmp_calc_digest(const uint8_t *src, int len, int gap,
+                        const uint8_t *key, int keylen, uint8_t *dst)
+{
+    AVHMAC *hmac;
+
+    hmac = av_hmac_alloc(AV_HMAC_SHA256);
+    if (!hmac)
+        return AVERROR(ENOMEM);
+
+    av_hmac_init(hmac, key, keylen);
+    if (gap <= 0) {
+        av_hmac_update(hmac, src, len);
+    } else { //skip 32 bytes used for storing digest
+        av_hmac_update(hmac, src, gap);
+        av_hmac_update(hmac, src + gap + 32, len - gap - 32);
+    }
+    av_hmac_final(hmac, dst, 32);
+
+    av_hmac_free(hmac);
+
+    return 0;
+}
+
+int ff_rtmp_calc_digest_pos(const uint8_t *buf, int off, int mod_val,
+                            int add_val)
+{
+    int i, digest_pos = 0;
+
+    for (i = 0; i < 4; i++)
+        digest_pos += buf[i + off];
+    digest_pos = digest_pos % mod_val + add_val;
+
+    return digest_pos;
+}

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 7320b4f..b741e42 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c

@@ -27,7 +27,6 @@
 #include "libavcodec/bytestream.h"
 #include "libavutil/avstring.h"
 #include "libavutil/base64.h"
-#include "libavutil/hmac.h"
 #include "libavutil/intfloat.h"
 #include "libavutil/lfg.h"
 #include "libavutil/md5.h"
@@ -989,41 +988,6 @@
     return rtmp_send_packet(rt, &pkt, 1);
 }
 
-int ff_rtmp_calc_digest(const uint8_t *src, int len, int gap,
-                        const uint8_t *key, int keylen, uint8_t *dst)
-{
-    AVHMAC *hmac;
-
-    hmac = av_hmac_alloc(AV_HMAC_SHA256);
-    if (!hmac)
-        return AVERROR(ENOMEM);
-
-    av_hmac_init(hmac, key, keylen);
-    if (gap <= 0) {
-        av_hmac_update(hmac, src, len);
-    } else { //skip 32 bytes used for storing digest
-        av_hmac_update(hmac, src, gap);
-        av_hmac_update(hmac, src + gap + 32, len - gap - 32);
-    }
-    av_hmac_final(hmac, dst, 32);
-
-    av_hmac_free(hmac);
-
-    return 0;
-}
-
-int ff_rtmp_calc_digest_pos(const uint8_t *buf, int off, int mod_val,
-                            int add_val)
-{
-    int i, digest_pos = 0;
-
-    for (i = 0; i < 4; i++)
-        digest_pos += buf[i + off];
-    digest_pos = digest_pos % mod_val + add_val;
-
-    return digest_pos;
-}
-
 /**
  * Put HMAC-SHA2 digest of packet data (except for the bytes where this digest
  * will be stored) into that packet.
@@ -2467,8 +2431,10 @@
         rt->bytes_read += ret;
         if (rt->bytes_read - rt->last_bytes_read > rt->receive_report_size) {
             av_log(s, AV_LOG_DEBUG, "Sending bytes read report\n");
-            if ((ret = gen_bytes_read(s, rt, rpkt.timestamp + 1)) < 0)
+            if ((ret = gen_bytes_read(s, rt, rpkt.timestamp + 1)) < 0) {
+                ff_rtmp_packet_destroy(&rpkt);
                 return ret;
+            }
             rt->last_bytes_read = rt->bytes_read;
         }
 

diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c
index 4acb1ca..e75a34c 100644
--- a/libavformat/rtpdec.c
+++ b/libavformat/rtpdec.c

@@ -69,88 +69,104 @@
     .codec_id   = AV_CODEC_ID_TEXT,
 };
 
-static RTPDynamicProtocolHandler *rtp_first_dynamic_payload_handler = NULL;
+extern RTPDynamicProtocolHandler ff_rdt_video_handler;
+extern RTPDynamicProtocolHandler ff_rdt_audio_handler;
+extern RTPDynamicProtocolHandler ff_rdt_live_video_handler;
+extern RTPDynamicProtocolHandler ff_rdt_live_audio_handler;
 
-void ff_register_dynamic_payload_handler(RTPDynamicProtocolHandler *handler)
+static const RTPDynamicProtocolHandler *rtp_dynamic_protocol_handler_list[] = {
+    /* rtp */
+    &ff_ac3_dynamic_handler,
+    &ff_amr_nb_dynamic_handler,
+    &ff_amr_wb_dynamic_handler,
+    &ff_dv_dynamic_handler,
+    &ff_g726_16_dynamic_handler,
+    &ff_g726_24_dynamic_handler,
+    &ff_g726_32_dynamic_handler,
+    &ff_g726_40_dynamic_handler,
+    &ff_g726le_16_dynamic_handler,
+    &ff_g726le_24_dynamic_handler,
+    &ff_g726le_32_dynamic_handler,
+    &ff_g726le_40_dynamic_handler,
+    &ff_h261_dynamic_handler,
+    &ff_h263_1998_dynamic_handler,
+    &ff_h263_2000_dynamic_handler,
+    &ff_h263_rfc2190_dynamic_handler,
+    &ff_h264_dynamic_handler,
+    &ff_hevc_dynamic_handler,
+    &ff_ilbc_dynamic_handler,
+    &ff_jpeg_dynamic_handler,
+    &ff_mp4a_latm_dynamic_handler,
+    &ff_mp4v_es_dynamic_handler,
+    &ff_mpeg_audio_dynamic_handler,
+    &ff_mpeg_audio_robust_dynamic_handler,
+    &ff_mpeg_video_dynamic_handler,
+    &ff_mpeg4_generic_dynamic_handler,
+    &ff_mpegts_dynamic_handler,
+    &ff_ms_rtp_asf_pfa_handler,
+    &ff_ms_rtp_asf_pfv_handler,
+    &ff_qcelp_dynamic_handler,
+    &ff_qdm2_dynamic_handler,
+    &ff_qt_rtp_aud_handler,
+    &ff_qt_rtp_vid_handler,
+    &ff_quicktime_rtp_aud_handler,
+    &ff_quicktime_rtp_vid_handler,
+    &ff_rfc4175_rtp_handler,
+    &ff_svq3_dynamic_handler,
+    &ff_theora_dynamic_handler,
+    &ff_vc2hq_dynamic_handler,
+    &ff_vorbis_dynamic_handler,
+    &ff_vp8_dynamic_handler,
+    &ff_vp9_dynamic_handler,
+    &gsm_dynamic_handler,
+    &l24_dynamic_handler,
+    &opus_dynamic_handler,
+    &realmedia_mp3_dynamic_handler,
+    &speex_dynamic_handler,
+    &t140_dynamic_handler,
+    /* rdt */
+    &ff_rdt_video_handler,
+    &ff_rdt_audio_handler,
+    &ff_rdt_live_video_handler,
+    &ff_rdt_live_audio_handler,
+    NULL,
+};
+
+const RTPDynamicProtocolHandler *ff_rtp_handler_iterate(void **opaque)
 {
-    handler->next = rtp_first_dynamic_payload_handler;
-    rtp_first_dynamic_payload_handler = handler;
+    uintptr_t i = (uintptr_t)*opaque;
+    const RTPDynamicProtocolHandler *r = rtp_dynamic_protocol_handler_list[i];
+
+    if (r)
+        *opaque = (void*)(i + 1);
+
+    return r;
 }
 
-void ff_register_rtp_dynamic_payload_handlers(void)
-{
-    ff_register_dynamic_payload_handler(&ff_ac3_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_amr_nb_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_amr_wb_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_dv_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_g726_16_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_g726_24_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_g726_32_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_g726_40_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_g726le_16_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_g726le_24_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_g726le_32_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_g726le_40_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_h261_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_h263_1998_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_h263_2000_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_h263_rfc2190_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_h264_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_hevc_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_ilbc_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_jpeg_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_mp4a_latm_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_mp4v_es_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_mpeg_audio_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_mpeg_audio_robust_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_mpeg_video_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_mpeg4_generic_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_mpegts_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_ms_rtp_asf_pfa_handler);
-    ff_register_dynamic_payload_handler(&ff_ms_rtp_asf_pfv_handler);
-    ff_register_dynamic_payload_handler(&ff_qcelp_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_qdm2_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_qt_rtp_aud_handler);
-    ff_register_dynamic_payload_handler(&ff_qt_rtp_vid_handler);
-    ff_register_dynamic_payload_handler(&ff_quicktime_rtp_aud_handler);
-    ff_register_dynamic_payload_handler(&ff_quicktime_rtp_vid_handler);
-    ff_register_dynamic_payload_handler(&ff_rfc4175_rtp_handler);
-    ff_register_dynamic_payload_handler(&ff_svq3_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_theora_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_vc2hq_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_vorbis_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_vp8_dynamic_handler);
-    ff_register_dynamic_payload_handler(&ff_vp9_dynamic_handler);
-    ff_register_dynamic_payload_handler(&gsm_dynamic_handler);
-    ff_register_dynamic_payload_handler(&l24_dynamic_handler);
-    ff_register_dynamic_payload_handler(&opus_dynamic_handler);
-    ff_register_dynamic_payload_handler(&realmedia_mp3_dynamic_handler);
-    ff_register_dynamic_payload_handler(&speex_dynamic_handler);
-    ff_register_dynamic_payload_handler(&t140_dynamic_handler);
-}
-
-RTPDynamicProtocolHandler *ff_rtp_handler_find_by_name(const char *name,
+const RTPDynamicProtocolHandler *ff_rtp_handler_find_by_name(const char *name,
                                                        enum AVMediaType codec_type)
 {
-    RTPDynamicProtocolHandler *handler;
-    for (handler = rtp_first_dynamic_payload_handler;
-         handler; handler = handler->next)
+    void *i = 0;
+    const RTPDynamicProtocolHandler *handler;
+    while (handler = ff_rtp_handler_iterate(&i)) {
         if (handler->enc_name &&
             !av_strcasecmp(name, handler->enc_name) &&
             codec_type == handler->codec_type)
             return handler;
+    }
     return NULL;
 }
 
-RTPDynamicProtocolHandler *ff_rtp_handler_find_by_id(int id,
+const RTPDynamicProtocolHandler *ff_rtp_handler_find_by_id(int id,
                                                      enum AVMediaType codec_type)
 {
-    RTPDynamicProtocolHandler *handler;
-    for (handler = rtp_first_dynamic_payload_handler;
-         handler; handler = handler->next)
+    void *i = 0;
+    const RTPDynamicProtocolHandler *handler;
+    while (handler = ff_rtp_handler_iterate(&i)) {
         if (handler->static_payload_id && handler->static_payload_id == id &&
             codec_type == handler->codec_type)
             return handler;
+    }
     return NULL;
 }
 
@@ -556,7 +572,7 @@
 }
 
 void ff_rtp_parse_set_dynamic_protocol(RTPDemuxContext *s, PayloadContext *ctx,
-                                       RTPDynamicProtocolHandler *handler)
+                                       const RTPDynamicProtocolHandler *handler)
 {
     s->dynamic_protocol_context = ctx;
     s->handler                  = handler;

diff --git a/libavformat/rtpdec.h b/libavformat/rtpdec.h
index 77596b6..5a47d6f 100644
--- a/libavformat/rtpdec.h
+++ b/libavformat/rtpdec.h

@@ -43,7 +43,7 @@
 RTPDemuxContext *ff_rtp_parse_open(AVFormatContext *s1, AVStream *st,
                                    int payload_type, int queue_size);
 void ff_rtp_parse_set_dynamic_protocol(RTPDemuxContext *s, PayloadContext *ctx,
-                                       RTPDynamicProtocolHandler *handler);
+                                       const RTPDynamicProtocolHandler *handler);
 void ff_rtp_parse_set_crypto(RTPDemuxContext *s, const char *suite,
                              const char *params);
 int ff_rtp_parse_packet(RTPDemuxContext *s, AVPacket *pkt,
@@ -192,10 +192,31 @@
     PayloadContext *dynamic_protocol_context;
 };
 
-void ff_register_dynamic_payload_handler(RTPDynamicProtocolHandler *handler);
-RTPDynamicProtocolHandler *ff_rtp_handler_find_by_name(const char *name,
+/**
+ * Iterate over all registered rtp dynamic protocol handlers.
+ *
+ * @param opaque a pointer where libavformat will store the iteration state. Must
+ *               point to NULL to start the iteration.
+ *
+ * @return the next registered rtp dynamic protocol handler or NULL when the iteration is
+ *         finished
+ */
+const RTPDynamicProtocolHandler *ff_rtp_handler_iterate(void **opaque);
+/**
+ * Find a registered rtp dynamic protocol handler with the specified name.
+ *
+ * @param name name of the requested rtp dynamic protocol handler
+ * @return A rtp dynamic protocol handler if one was found, NULL otherwise.
+ */
+const RTPDynamicProtocolHandler *ff_rtp_handler_find_by_name(const char *name,
                                                   enum AVMediaType codec_type);
-RTPDynamicProtocolHandler *ff_rtp_handler_find_by_id(int id,
+/**
+ * Find a registered rtp dynamic protocol handler with a matching codec ID.
+ *
+ * @param id AVCodecID of the requested rtp dynamic protocol handler.
+ * @return A rtp dynamic protocol handler if one was found, NULL otherwise.
+ */
+const RTPDynamicProtocolHandler *ff_rtp_handler_find_by_id(int id,
                                                 enum AVMediaType codec_type);
 
 /* from rtsp.c, but used by rtp dynamic protocol handlers. */
@@ -209,8 +230,6 @@
                                     PayloadContext *data,
                                     const char *attr, const char *value));
 
-void ff_register_rtp_dynamic_payload_handlers(void);
-
 /**
  * Close the dynamic buffer and make a packet from it.
  */

diff --git a/libavformat/rtpdec_ac3.c b/libavformat/rtpdec_ac3.c
index 48b2d9c..56a379f 100644
--- a/libavformat/rtpdec_ac3.c
+++ b/libavformat/rtpdec_ac3.c

@@ -122,7 +122,7 @@
     return 0;
 }
 
-RTPDynamicProtocolHandler ff_ac3_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_ac3_dynamic_handler = {
     .enc_name           = "ac3",
     .codec_type         = AVMEDIA_TYPE_AUDIO,
     .codec_id           = AV_CODEC_ID_AC3,

diff --git a/libavformat/rtpdec_amr.c b/libavformat/rtpdec_amr.c
index 8687e65..35d3222 100644
--- a/libavformat/rtpdec_amr.c
+++ b/libavformat/rtpdec_amr.c

@@ -182,7 +182,7 @@
     return 0;
 }
 
-RTPDynamicProtocolHandler ff_amr_nb_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_amr_nb_dynamic_handler = {
     .enc_name         = "AMR",
     .codec_type       = AVMEDIA_TYPE_AUDIO,
     .codec_id         = AV_CODEC_ID_AMR_NB,
@@ -192,7 +192,7 @@
     .parse_packet     = amr_handle_packet,
 };
 
-RTPDynamicProtocolHandler ff_amr_wb_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_amr_wb_dynamic_handler = {
     .enc_name         = "AMR-WB",
     .codec_type       = AVMEDIA_TYPE_AUDIO,
     .codec_id         = AV_CODEC_ID_AMR_WB,

diff --git a/libavformat/rtpdec_asf.c b/libavformat/rtpdec_asf.c
index 2c09fda..54ffef6 100644
--- a/libavformat/rtpdec_asf.c
+++ b/libavformat/rtpdec_asf.c

@@ -139,12 +139,12 @@
         ret = avformat_open_input(&rt->asf_ctx, "", iformat, &opts);
         av_dict_free(&opts);
         if (ret < 0) {
-            av_free(buf);
+            av_free(pb.buffer);
             return ret;
         }
         av_dict_copy(&s->metadata, rt->asf_ctx->metadata, 0);
         rt->asf_pb_pos = avio_tell(&pb);
-        av_free(buf);
+        av_free(pb.buffer);
         rt->asf_ctx->pb = NULL;
     }
     return ret;
@@ -300,7 +300,7 @@
 }
 
 #define RTP_ASF_HANDLER(n, s, t) \
-RTPDynamicProtocolHandler ff_ms_rtp_ ## n ## _handler = { \
+const RTPDynamicProtocolHandler ff_ms_rtp_ ## n ## _handler = { \
     .enc_name         = s, \
     .codec_type       = t, \
     .codec_id         = AV_CODEC_ID_NONE, \

diff --git a/libavformat/rtpdec_dv.c b/libavformat/rtpdec_dv.c
index de99d27..53a5855 100644
--- a/libavformat/rtpdec_dv.c
+++ b/libavformat/rtpdec_dv.c

@@ -131,7 +131,7 @@
     return 0;
 }
 
-RTPDynamicProtocolHandler ff_dv_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_dv_dynamic_handler = {
     .enc_name         = "DV",
     .codec_type       = AVMEDIA_TYPE_VIDEO,
     .codec_id         = AV_CODEC_ID_DVVIDEO,

diff --git a/libavformat/rtpdec_formats.h b/libavformat/rtpdec_formats.h
index a436c9d..dad2b8a 100644
--- a/libavformat/rtpdec_formats.h
+++ b/libavformat/rtpdec_formats.h

@@ -47,47 +47,47 @@
                                int nal_header_len);
 void ff_h264_parse_framesize(AVCodecParameters *par, const char *p);
 
-extern RTPDynamicProtocolHandler ff_ac3_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_amr_nb_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_amr_wb_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_dv_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_g726_16_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_g726_24_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_g726_32_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_g726_40_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_g726le_16_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_g726le_24_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_g726le_32_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_g726le_40_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_h261_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_h263_1998_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_h263_2000_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_h263_rfc2190_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_h264_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_hevc_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_ilbc_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_jpeg_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_mp4a_latm_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_mp4v_es_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_mpeg_audio_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_mpeg_audio_robust_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_mpeg_video_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_mpeg4_generic_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_mpegts_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_ms_rtp_asf_pfa_handler;
-extern RTPDynamicProtocolHandler ff_ms_rtp_asf_pfv_handler;
-extern RTPDynamicProtocolHandler ff_qcelp_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_qdm2_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_qt_rtp_aud_handler;
-extern RTPDynamicProtocolHandler ff_qt_rtp_vid_handler;
-extern RTPDynamicProtocolHandler ff_quicktime_rtp_aud_handler;
-extern RTPDynamicProtocolHandler ff_quicktime_rtp_vid_handler;
-extern RTPDynamicProtocolHandler ff_rfc4175_rtp_handler;
-extern RTPDynamicProtocolHandler ff_svq3_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_theora_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_vc2hq_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_vorbis_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_vp8_dynamic_handler;
-extern RTPDynamicProtocolHandler ff_vp9_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_ac3_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_amr_nb_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_amr_wb_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_dv_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_g726_16_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_g726_24_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_g726_32_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_g726_40_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_g726le_16_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_g726le_24_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_g726le_32_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_g726le_40_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_h261_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_h263_1998_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_h263_2000_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_h263_rfc2190_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_h264_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_hevc_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_ilbc_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_jpeg_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_mp4a_latm_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_mp4v_es_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_mpeg_audio_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_mpeg_audio_robust_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_mpeg_video_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_mpeg4_generic_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_mpegts_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_ms_rtp_asf_pfa_handler;
+extern const RTPDynamicProtocolHandler ff_ms_rtp_asf_pfv_handler;
+extern const RTPDynamicProtocolHandler ff_qcelp_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_qdm2_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_qt_rtp_aud_handler;
+extern const RTPDynamicProtocolHandler ff_qt_rtp_vid_handler;
+extern const RTPDynamicProtocolHandler ff_quicktime_rtp_aud_handler;
+extern const RTPDynamicProtocolHandler ff_quicktime_rtp_vid_handler;
+extern const RTPDynamicProtocolHandler ff_rfc4175_rtp_handler;
+extern const RTPDynamicProtocolHandler ff_svq3_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_theora_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_vc2hq_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_vorbis_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_vp8_dynamic_handler;
+extern const RTPDynamicProtocolHandler ff_vp9_dynamic_handler;
 
 #endif /* AVFORMAT_RTPDEC_FORMATS_H */

diff --git a/libavformat/rtpdec_g726.c b/libavformat/rtpdec_g726.c
index 2de09ac..89afd58 100644
--- a/libavformat/rtpdec_g726.c
+++ b/libavformat/rtpdec_g726.c

@@ -35,13 +35,13 @@
     return 0; \
 } \
 \
-RTPDynamicProtocolHandler ff_g726_ ## bitrate ## _dynamic_handler = { \
+const RTPDynamicProtocolHandler ff_g726_ ## bitrate ## _dynamic_handler = { \
     .enc_name   = "AAL2-G726-" #bitrate, \
     .codec_type = AVMEDIA_TYPE_AUDIO, \
     .codec_id   = AV_CODEC_ID_ADPCM_G726, \
     .init       = g726_ ## bitrate ## _init, \
 }; \
-RTPDynamicProtocolHandler ff_g726le_ ## bitrate ## _dynamic_handler = { \
+const RTPDynamicProtocolHandler ff_g726le_ ## bitrate ## _dynamic_handler = { \
     .enc_name   = "G726-" #bitrate, \
     .codec_type = AVMEDIA_TYPE_AUDIO, \
     .codec_id   = AV_CODEC_ID_ADPCM_G726LE, \

diff --git a/libavformat/rtpdec_h261.c b/libavformat/rtpdec_h261.c
index 9729f21..a102909 100644
--- a/libavformat/rtpdec_h261.c
+++ b/libavformat/rtpdec_h261.c

@@ -162,7 +162,7 @@
     return 0;
 }
 
-RTPDynamicProtocolHandler ff_h261_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_h261_dynamic_handler = {
     .enc_name          = "H261",
     .codec_type        = AVMEDIA_TYPE_VIDEO,
     .codec_id          = AV_CODEC_ID_H261,

diff --git a/libavformat/rtpdec_h263.c b/libavformat/rtpdec_h263.c
index 97aa4ad..9b71ed7 100644
--- a/libavformat/rtpdec_h263.c
+++ b/libavformat/rtpdec_h263.c

@@ -89,7 +89,7 @@
     return 0;
 }
 
-RTPDynamicProtocolHandler ff_h263_1998_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_h263_1998_dynamic_handler = {
     .enc_name         = "H263-1998",
     .codec_type       = AVMEDIA_TYPE_VIDEO,
     .codec_id         = AV_CODEC_ID_H263,
@@ -97,7 +97,7 @@
     .parse_packet     = ff_h263_handle_packet,
 };
 
-RTPDynamicProtocolHandler ff_h263_2000_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_h263_2000_dynamic_handler = {
     .enc_name         = "H263-2000",
     .codec_type       = AVMEDIA_TYPE_VIDEO,
     .codec_id         = AV_CODEC_ID_H263,

diff --git a/libavformat/rtpdec_h263_rfc2190.c b/libavformat/rtpdec_h263_rfc2190.c
index 6ba2814..a0f587f 100644
--- a/libavformat/rtpdec_h263_rfc2190.c
+++ b/libavformat/rtpdec_h263_rfc2190.c

@@ -183,7 +183,7 @@
     return 0;
 }
 
-RTPDynamicProtocolHandler ff_h263_rfc2190_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_h263_rfc2190_dynamic_handler = {
     .codec_type        = AVMEDIA_TYPE_VIDEO,
     .codec_id          = AV_CODEC_ID_H263,
     .need_parsing      = AVSTREAM_PARSE_FULL,

diff --git a/libavformat/rtpdec_h264.c b/libavformat/rtpdec_h264.c
index 6f8148a..a785120 100644
--- a/libavformat/rtpdec_h264.c
+++ b/libavformat/rtpdec_h264.c

@@ -408,7 +408,7 @@
     return 0;
 }
 
-RTPDynamicProtocolHandler ff_h264_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_h264_dynamic_handler = {
     .enc_name         = "H264",
     .codec_type       = AVMEDIA_TYPE_VIDEO,
     .codec_id         = AV_CODEC_ID_H264,

diff --git a/libavformat/rtpdec_hevc.c b/libavformat/rtpdec_hevc.c
index a0e3a7c..5a06b23 100644
--- a/libavformat/rtpdec_hevc.c
+++ b/libavformat/rtpdec_hevc.c

@@ -347,7 +347,7 @@
     return res;
 }
 
-RTPDynamicProtocolHandler ff_hevc_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_hevc_dynamic_handler = {
     .enc_name         = "H265",
     .codec_type       = AVMEDIA_TYPE_VIDEO,
     .codec_id         = AV_CODEC_ID_HEVC,

diff --git a/libavformat/rtpdec_ilbc.c b/libavformat/rtpdec_ilbc.c
index cb48f76..9094f2c 100644
--- a/libavformat/rtpdec_ilbc.c
+++ b/libavformat/rtpdec_ilbc.c

@@ -66,7 +66,7 @@
     return 0;
 }
 
-RTPDynamicProtocolHandler ff_ilbc_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_ilbc_dynamic_handler = {
     .enc_name         = "iLBC",
     .codec_type       = AVMEDIA_TYPE_AUDIO,
     .codec_id         = AV_CODEC_ID_ILBC,

diff --git a/libavformat/rtpdec_jpeg.c b/libavformat/rtpdec_jpeg.c
index 465d9bc..931463c 100644
--- a/libavformat/rtpdec_jpeg.c
+++ b/libavformat/rtpdec_jpeg.c

@@ -379,7 +379,7 @@
     return AVERROR(EAGAIN);
 }
 
-RTPDynamicProtocolHandler ff_jpeg_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_jpeg_dynamic_handler = {
     .enc_name          = "JPEG",
     .codec_type        = AVMEDIA_TYPE_VIDEO,
     .codec_id          = AV_CODEC_ID_MJPEG,

diff --git a/libavformat/rtpdec_latm.c b/libavformat/rtpdec_latm.c
index a25c07f..9087d6b 100644
--- a/libavformat/rtpdec_latm.c
+++ b/libavformat/rtpdec_latm.c

@@ -162,7 +162,7 @@
     return 0;
 }
 
-RTPDynamicProtocolHandler ff_mp4a_latm_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_mp4a_latm_dynamic_handler = {
     .enc_name           = "MP4A-LATM",
     .codec_type         = AVMEDIA_TYPE_AUDIO,
     .codec_id           = AV_CODEC_ID_AAC,

diff --git a/libavformat/rtpdec_mpa_robust.c b/libavformat/rtpdec_mpa_robust.c
index 86c8958..f4716edf 100644
--- a/libavformat/rtpdec_mpa_robust.c
+++ b/libavformat/rtpdec_mpa_robust.c

@@ -189,7 +189,7 @@
     return 0;
 }
 
-RTPDynamicProtocolHandler ff_mpeg_audio_robust_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_mpeg_audio_robust_dynamic_handler = {
     .enc_name          = "mpa-robust",
     .codec_type        = AVMEDIA_TYPE_AUDIO,
     .codec_id          = AV_CODEC_ID_MP3ADU,

diff --git a/libavformat/rtpdec_mpeg12.c b/libavformat/rtpdec_mpeg12.c
index b93de3d..43d9d58 100644
--- a/libavformat/rtpdec_mpeg12.c
+++ b/libavformat/rtpdec_mpeg12.c

@@ -48,7 +48,7 @@
     return 0;
 }
 
-RTPDynamicProtocolHandler ff_mpeg_audio_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_mpeg_audio_dynamic_handler = {
     .codec_type        = AVMEDIA_TYPE_AUDIO,
     .codec_id          = AV_CODEC_ID_MP3,
     .need_parsing      = AVSTREAM_PARSE_FULL,
@@ -56,7 +56,7 @@
     .static_payload_id = 14,
 };
 
-RTPDynamicProtocolHandler ff_mpeg_video_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_mpeg_video_dynamic_handler = {
     .codec_type        = AVMEDIA_TYPE_VIDEO,
     .codec_id          = AV_CODEC_ID_MPEG2VIDEO,
     .need_parsing      = AVSTREAM_PARSE_FULL,

diff --git a/libavformat/rtpdec_mpeg4.c b/libavformat/rtpdec_mpeg4.c
index 994ab49..4f70599 100644
--- a/libavformat/rtpdec_mpeg4.c
+++ b/libavformat/rtpdec_mpeg4.c

@@ -325,7 +325,7 @@
     return 0;
 }
 
-RTPDynamicProtocolHandler ff_mp4v_es_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_mp4v_es_dynamic_handler = {
     .enc_name           = "MP4V-ES",
     .codec_type         = AVMEDIA_TYPE_VIDEO,
     .codec_id           = AV_CODEC_ID_MPEG4,
@@ -334,7 +334,7 @@
     .parse_sdp_a_line   = parse_sdp_line,
 };
 
-RTPDynamicProtocolHandler ff_mpeg4_generic_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_mpeg4_generic_dynamic_handler = {
     .enc_name           = "mpeg4-generic",
     .codec_type         = AVMEDIA_TYPE_AUDIO,
     .codec_id           = AV_CODEC_ID_AAC,

diff --git a/libavformat/rtpdec_mpegts.c b/libavformat/rtpdec_mpegts.c
index 5bf0f18..405271f 100644
--- a/libavformat/rtpdec_mpegts.c
+++ b/libavformat/rtpdec_mpegts.c

@@ -89,7 +89,7 @@
     return 0;
 }
 
-RTPDynamicProtocolHandler ff_mpegts_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_mpegts_dynamic_handler = {
     .codec_type        = AVMEDIA_TYPE_DATA,
     .priv_data_size    = sizeof(PayloadContext),
     .parse_packet      = mpegts_handle_packet,

diff --git a/libavformat/rtpdec_qcelp.c b/libavformat/rtpdec_qcelp.c
index 41cc826..3485c27 100644
--- a/libavformat/rtpdec_qcelp.c
+++ b/libavformat/rtpdec_qcelp.c

@@ -209,7 +209,7 @@
         return return_stored_frame(ctx, data, st, pkt, timestamp, buf, len);
 }
 
-RTPDynamicProtocolHandler ff_qcelp_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_qcelp_dynamic_handler = {
     .enc_name           = "x-Purevoice",
     .codec_type         = AVMEDIA_TYPE_AUDIO,
     .codec_id           = AV_CODEC_ID_QCELP,

diff --git a/libavformat/rtpdec_qdm2.c b/libavformat/rtpdec_qdm2.c
index 1f4fd5a..fa2b1b9 100644
--- a/libavformat/rtpdec_qdm2.c
+++ b/libavformat/rtpdec_qdm2.c

@@ -298,7 +298,7 @@
     return (qdm->cache > 0) ? 1 : 0;
 }
 
-RTPDynamicProtocolHandler ff_qdm2_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_qdm2_dynamic_handler = {
     .enc_name         = "X-QDM",
     .codec_type       = AVMEDIA_TYPE_AUDIO,
     .codec_id         = AV_CODEC_ID_NONE,

diff --git a/libavformat/rtpdec_rfc4175.c b/libavformat/rtpdec_rfc4175.c
index 498381d..e9c62c1 100644
--- a/libavformat/rtpdec_rfc4175.c
+++ b/libavformat/rtpdec_rfc4175.c

@@ -226,7 +226,7 @@
     return AVERROR(EAGAIN);
 }
 
-RTPDynamicProtocolHandler ff_rfc4175_rtp_handler = {
+const RTPDynamicProtocolHandler ff_rfc4175_rtp_handler = {
     .enc_name           = "raw",
     .codec_type         = AVMEDIA_TYPE_VIDEO,
     .codec_id           = AV_CODEC_ID_BITPACKED,

diff --git a/libavformat/rtpdec_svq3.c b/libavformat/rtpdec_svq3.c
index 18d79d2..77164dd 100644
--- a/libavformat/rtpdec_svq3.c
+++ b/libavformat/rtpdec_svq3.c

@@ -110,7 +110,7 @@
     ffio_free_dyn_buf(&sv->pktbuf);
 }
 
-RTPDynamicProtocolHandler ff_svq3_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_svq3_dynamic_handler = {
     .enc_name         = "X-SV3V-ES",
     .codec_type       = AVMEDIA_TYPE_VIDEO,
     .codec_id         = AV_CODEC_ID_NONE,      // see if (config_packet) above

diff --git a/libavformat/rtpdec_vc2hq.c b/libavformat/rtpdec_vc2hq.c
index 8a3996a..1a11ace 100644
--- a/libavformat/rtpdec_vc2hq.c
+++ b/libavformat/rtpdec_vc2hq.c

@@ -216,7 +216,7 @@
     return res;
 }
 
-RTPDynamicProtocolHandler ff_vc2hq_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_vc2hq_dynamic_handler = {
     .enc_name         = "VC2",
     .codec_type       = AVMEDIA_TYPE_VIDEO,
     .codec_id         = AV_CODEC_ID_DIRAC,

diff --git a/libavformat/rtpdec_vp8.c b/libavformat/rtpdec_vp8.c
index f0e457b..360dd5c 100644
--- a/libavformat/rtpdec_vp8.c
+++ b/libavformat/rtpdec_vp8.c

@@ -276,7 +276,7 @@
     return vp8->sequence_dirty || !vp8->sequence_ok;
 }
 
-RTPDynamicProtocolHandler ff_vp8_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_vp8_dynamic_handler = {
     .enc_name       = "VP8",
     .codec_type     = AVMEDIA_TYPE_VIDEO,
     .codec_id       = AV_CODEC_ID_VP8,

diff --git a/libavformat/rtpdec_vp9.c b/libavformat/rtpdec_vp9.c
index 4a7f934..6bbdf48 100644
--- a/libavformat/rtpdec_vp9.c
+++ b/libavformat/rtpdec_vp9.c

@@ -330,7 +330,7 @@
     ffio_free_dyn_buf(&vp9->buf);
 }
 
-RTPDynamicProtocolHandler ff_vp9_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_vp9_dynamic_handler = {
     .enc_name         = "VP9",
     .codec_type       = AVMEDIA_TYPE_VIDEO,
     .codec_id         = AV_CODEC_ID_VP9,

diff --git a/libavformat/rtpdec_xiph.c b/libavformat/rtpdec_xiph.c
index 43de6ce..574508a 100644
--- a/libavformat/rtpdec_xiph.c
+++ b/libavformat/rtpdec_xiph.c

@@ -365,7 +365,7 @@
     return 0;
 }
 
-RTPDynamicProtocolHandler ff_theora_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_theora_dynamic_handler = {
     .enc_name         = "theora",
     .codec_type       = AVMEDIA_TYPE_VIDEO,
     .codec_id         = AV_CODEC_ID_THEORA,
@@ -375,7 +375,7 @@
     .parse_packet     = xiph_handle_packet,
 };
 
-RTPDynamicProtocolHandler ff_vorbis_dynamic_handler = {
+const RTPDynamicProtocolHandler ff_vorbis_dynamic_handler = {
     .enc_name         = "vorbis",
     .codec_type       = AVMEDIA_TYPE_AUDIO,
     .codec_id         = AV_CODEC_ID_VORBIS,

diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c
index 573593f..63047be 100644
--- a/libavformat/rtpenc.c
+++ b/libavformat/rtpenc.c

@@ -66,6 +66,7 @@
     case AV_CODEC_ID_PCM_S8:
     case AV_CODEC_ID_PCM_S16BE:
     case AV_CODEC_ID_PCM_S16LE:
+    case AV_CODEC_ID_PCM_S24BE:
     case AV_CODEC_ID_PCM_U16BE:
     case AV_CODEC_ID_PCM_U16LE:
     case AV_CODEC_ID_PCM_U8:
@@ -544,6 +545,8 @@
     case AV_CODEC_ID_PCM_S16BE:
     case AV_CODEC_ID_PCM_S16LE:
         return rtp_send_samples(s1, pkt->data, size, 16 * st->codecpar->channels);
+    case AV_CODEC_ID_PCM_S24BE:
+        return rtp_send_samples(s1, pkt->data, size, 24 * st->codecpar->channels);
     case AV_CODEC_ID_ADPCM_G722:
         /* The actual sample size is half a byte per sample, but since the
          * stream clock rate is 8000 Hz while the sample rate is 16000 Hz,

diff --git a/libavformat/rtpenc_chain.c b/libavformat/rtpenc_chain.c
index f768fb0..d3c1bc9 100644
--- a/libavformat/rtpenc_chain.c
+++ b/libavformat/rtpenc_chain.c

@@ -58,7 +58,7 @@
     rtpctx->max_delay = s->max_delay;
     /* Copy other stream parameters. */
     rtpctx->streams[0]->sample_aspect_ratio = st->sample_aspect_ratio;
-    rtpctx->flags |= s->flags & (AVFMT_FLAG_MP4A_LATM | AVFMT_FLAG_BITEXACT);
+    rtpctx->flags |= s->flags & AVFMT_FLAG_BITEXACT;
 
     /* Get the payload type from the codec */
     if (st->id < RTP_PT_PRIVATE)
@@ -101,7 +101,7 @@
     return 0;
 
 fail:
-    av_free(rtpctx);
+    avformat_free_context(rtpctx);
     if (handle)
         ffurl_close(handle);
     return ret;

diff --git a/libavformat/rtpenc_mpegts.c b/libavformat/rtpenc_mpegts.c
index 7af02e0..5f81e1a 100644
--- a/libavformat/rtpenc_mpegts.c
+++ b/libavformat/rtpenc_mpegts.c

@@ -85,6 +85,10 @@
     }
     rtp_ctx->oformat = rtp_format;
     st = avformat_new_stream(rtp_ctx, NULL);
+    if (!st) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
     st->time_base.num   = 1;
     st->time_base.den   = 90000;
     st->codecpar->codec_id = AV_CODEC_ID_MPEG2TS;

diff --git a/libavformat/rtpproto.c b/libavformat/rtpproto.c
index 0706cae..e706300 100644
--- a/libavformat/rtpproto.c
+++ b/libavformat/rtpproto.c

@@ -32,6 +32,7 @@
 #include "rtp.h"
 #include "rtpproto.h"
 #include "url.h"
+#include "ip.h"
 
 #include <stdarg.h>
 #include "internal.h"
@@ -45,8 +46,8 @@
 typedef struct RTPContext {
     const AVClass *class;
     URLContext *rtp_hd, *rtcp_hd, *fec_hd;
-    int rtp_fd, rtcp_fd, nb_ssm_include_addrs, nb_ssm_exclude_addrs;
-    struct sockaddr_storage **ssm_include_addrs, **ssm_exclude_addrs;
+    int rtp_fd, rtcp_fd;
+    IPSourceFilters filters;
     int write_to_source;
     struct sockaddr_storage last_rtp_source, last_rtcp_source;
     socklen_t last_rtp_source_len, last_rtcp_source_len;
@@ -126,45 +127,6 @@
     return 0;
 }
 
-static struct addrinfo* rtp_resolve_host(const char *hostname, int port,
-                                         int type, int family, int flags)
-{
-    struct addrinfo hints = { 0 }, *res = 0;
-    int error;
-    char service[16];
-
-    snprintf(service, sizeof(service), "%d", port);
-    hints.ai_socktype = type;
-    hints.ai_family   = family;
-    hints.ai_flags    = flags;
-    if ((error = getaddrinfo(hostname, service, &hints, &res))) {
-        res = NULL;
-        av_log(NULL, AV_LOG_ERROR, "rtp_resolve_host: %s\n", gai_strerror(error));
-    }
-
-    return res;
-}
-
-static int compare_addr(const struct sockaddr_storage *a,
-                        const struct sockaddr_storage *b)
-{
-    if (a->ss_family != b->ss_family)
-        return 1;
-    if (a->ss_family == AF_INET) {
-        return (((const struct sockaddr_in *)a)->sin_addr.s_addr !=
-                ((const struct sockaddr_in *)b)->sin_addr.s_addr);
-    }
-
-#if HAVE_STRUCT_SOCKADDR_IN6
-    if (a->ss_family == AF_INET6) {
-        const uint8_t *s6_addr_a = ((const struct sockaddr_in6 *)a)->sin6_addr.s6_addr;
-        const uint8_t *s6_addr_b = ((const struct sockaddr_in6 *)b)->sin6_addr.s6_addr;
-        return memcmp(s6_addr_a, s6_addr_b, 16);
-    }
-#endif
-    return 1;
-}
-
 static int get_port(const struct sockaddr_storage *ss)
 {
     if (ss->ss_family == AF_INET)
@@ -186,25 +148,6 @@
 #endif
 }
 
-static int rtp_check_source_lists(RTPContext *s, struct sockaddr_storage *source_addr_ptr)
-{
-    int i;
-    if (s->nb_ssm_exclude_addrs) {
-        for (i = 0; i < s->nb_ssm_exclude_addrs; i++) {
-            if (!compare_addr(source_addr_ptr, s->ssm_exclude_addrs[i]))
-                return 1;
-        }
-    }
-    if (s->nb_ssm_include_addrs) {
-        for (i = 0; i < s->nb_ssm_include_addrs; i++) {
-            if (!compare_addr(source_addr_ptr, s->ssm_include_addrs[i]))
-                return 0;
-        }
-        return 1;
-    }
-    return 0;
-}
-
 /**
  * add option to url of the form:
  * "http://host:port/path?option1=val1&option2=val2...
@@ -252,48 +195,6 @@
         url_add_option(buf, buf_size, "block=%s", exclude_sources);
 }
 
-static void rtp_parse_addr_list(URLContext *h, char *buf,
-                                struct sockaddr_storage ***address_list_ptr,
-                                int *address_list_size_ptr)
-{
-    struct addrinfo *ai = NULL;
-    struct sockaddr_storage *source_addr;
-    char tmp = '\0', *p = buf, *next;
-
-    /* Resolve all of the IPs */
-
-    while (p && p[0]) {
-        next = strchr(p, ',');
-
-        if (next) {
-            tmp = *next;
-            *next = '\0';
-        }
-
-        ai = rtp_resolve_host(p, 0, SOCK_DGRAM, AF_UNSPEC, 0);
-        if (ai) {
-            source_addr = av_mallocz(sizeof(struct sockaddr_storage));
-            if (!source_addr) {
-                freeaddrinfo(ai);
-                break;
-            }
-
-            memcpy(source_addr, ai->ai_addr, ai->ai_addrlen);
-            freeaddrinfo(ai);
-            dynarray_add(address_list_ptr, address_list_size_ptr, source_addr);
-        } else {
-            av_log(h, AV_LOG_WARNING, "Unable to resolve %s\n", p);
-        }
-
-        if (next) {
-            *next = tmp;
-            p = next + 1;
-        } else {
-            p = NULL;
-        }
-    }
-}
-
 /**
  * url syntax: rtp://host:port[?option=val...]
  * option: 'ttl=n'            : set the ttl value (for multicast only)
@@ -366,17 +267,16 @@
         }
         if (av_find_info_tag(buf, sizeof(buf), "sources", p)) {
             av_strlcpy(include_sources, buf, sizeof(include_sources));
-
-            rtp_parse_addr_list(h, buf, &s->ssm_include_addrs, &s->nb_ssm_include_addrs);
+            ff_ip_parse_sources(h, buf, &s->filters);
         } else {
-            rtp_parse_addr_list(h, s->sources, &s->ssm_include_addrs, &s->nb_ssm_include_addrs);
+            ff_ip_parse_sources(h, s->sources, &s->filters);
             sources = s->sources;
         }
         if (av_find_info_tag(buf, sizeof(buf), "block", p)) {
             av_strlcpy(exclude_sources, buf, sizeof(exclude_sources));
-            rtp_parse_addr_list(h, buf, &s->ssm_exclude_addrs, &s->nb_ssm_exclude_addrs);
+            ff_ip_parse_blocks(h, buf, &s->filters);
         } else {
-            rtp_parse_addr_list(h, s->block, &s->ssm_exclude_addrs, &s->nb_ssm_exclude_addrs);
+            ff_ip_parse_blocks(h, s->block, &s->filters);
             block = s->block;
         }
     }
@@ -500,7 +400,7 @@
                         continue;
                     return AVERROR(EIO);
                 }
-                if (rtp_check_source_lists(s, addrs[i]))
+                if (ff_ip_check_source_lists(addrs[i], &s->filters))
                     continue;
                 return len;
             }
@@ -603,14 +503,8 @@
 static int rtp_close(URLContext *h)
 {
     RTPContext *s = h->priv_data;
-    int i;
 
-    for (i = 0; i < s->nb_ssm_include_addrs; i++)
-        av_freep(&s->ssm_include_addrs[i]);
-    av_freep(&s->ssm_include_addrs);
-    for (i = 0; i < s->nb_ssm_exclude_addrs; i++)
-        av_freep(&s->ssm_exclude_addrs[i]);
-    av_freep(&s->ssm_exclude_addrs);
+    ff_ip_reset_filters(&s->filters);
 
     ffurl_close(s->rtp_hd);
     ffurl_close(s->rtcp_hd);
@@ -636,12 +530,6 @@
  * @return the local port number
  */
 
-int ff_rtp_get_local_rtcp_port(URLContext *h)
-{
-    RTPContext *s = h->priv_data;
-    return ff_udp_get_local_port(s->rtcp_hd);
-}
-
 static int rtp_get_file_handle(URLContext *h)
 {
     RTPContext *s = h->priv_data;

diff --git a/libavformat/rtpproto.h b/libavformat/rtpproto.h
index 5b243fb..131aac5 100644
--- a/libavformat/rtpproto.h
+++ b/libavformat/rtpproto.h

@@ -26,6 +26,5 @@
 int ff_rtp_set_remote_url(URLContext *h, const char *uri);
 
 int ff_rtp_get_local_rtp_port(URLContext *h);
-int ff_rtp_get_local_rtcp_port(URLContext *h);
 
 #endif /* AVFORMAT_RTPPROTO_H */

diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c
index b6da61b..ceb770a 100644
--- a/libavformat/rtsp.c
+++ b/libavformat/rtsp.c

@@ -93,10 +93,18 @@
     RTSP_MEDIATYPE_OPTS("allowed_media_types", "set media types to accept from the server"),
     { "min_port", "set minimum local UDP port", OFFSET(rtp_port_min), AV_OPT_TYPE_INT, {.i64 = RTSP_RTP_PORT_MIN}, 0, 65535, DEC|ENC },
     { "max_port", "set maximum local UDP port", OFFSET(rtp_port_max), AV_OPT_TYPE_INT, {.i64 = RTSP_RTP_PORT_MAX}, 0, 65535, DEC|ENC },
-    { "timeout", "set maximum timeout (in seconds) to wait for incoming connections (-1 is infinite, imply flag listen)", OFFSET(initial_timeout), AV_OPT_TYPE_INT, {.i64 = -1}, INT_MIN, INT_MAX, DEC },
+    { "listen_timeout", "set maximum timeout (in seconds) to wait for incoming connections (-1 is infinite, imply flag listen)", OFFSET(initial_timeout), AV_OPT_TYPE_INT, {.i64 = -1}, INT_MIN, INT_MAX, DEC },
+#if FF_API_OLD_RTSP_OPTIONS
+    { "timeout", "set maximum timeout (in seconds) to wait for incoming connections (-1 is infinite, imply flag listen) (deprecated, use listen_timeout)", OFFSET(initial_timeout), AV_OPT_TYPE_INT, {.i64 = -1}, INT_MIN, INT_MAX, DEC },
     { "stimeout", "set timeout (in microseconds) of socket TCP I/O operations", OFFSET(stimeout), AV_OPT_TYPE_INT, {.i64 = 0}, INT_MIN, INT_MAX, DEC },
+#else
+    { "timeout", "set timeout (in microseconds) of socket TCP I/O operations", OFFSET(stimeout), AV_OPT_TYPE_INT, {.i64 = 0}, INT_MIN, INT_MAX, DEC },
+#endif
     COMMON_OPTS(),
-    { "user-agent", "override User-Agent header", OFFSET(user_agent), AV_OPT_TYPE_STRING, {.str = LIBAVFORMAT_IDENT}, 0, 0, DEC },
+    { "user_agent", "override User-Agent header", OFFSET(user_agent), AV_OPT_TYPE_STRING, {.str = LIBAVFORMAT_IDENT}, 0, 0, DEC },
+#if FF_API_OLD_RTSP_OPTIONS
+    { "user-agent", "override User-Agent header (deprecated, use user_agent)", OFFSET(user_agent), AV_OPT_TYPE_STRING, {.str = LIBAVFORMAT_IDENT}, 0, 0, DEC },
+#endif
     { NULL },
 };
 
@@ -203,7 +211,7 @@
 }
 
 #if CONFIG_RTPDEC
-static void init_rtp_handler(RTPDynamicProtocolHandler *handler,
+static void init_rtp_handler(const RTPDynamicProtocolHandler *handler,
                              RTSPStream *rtsp_st, AVStream *st)
 {
     AVCodecParameters *par = st ? st->codecpar : NULL;
@@ -263,7 +271,7 @@
     }
 
     if (par->codec_id == AV_CODEC_ID_NONE) {
-        RTPDynamicProtocolHandler *handler =
+        const RTPDynamicProtocolHandler *handler =
             ff_rtp_handler_find_by_name(buf, par->codec_type);
         init_rtp_handler(handler, rtsp_st, st);
         /* If no dynamic handler was found, check with the list of standard
@@ -487,7 +495,7 @@
                 if (CONFIG_RTPDEC && !rt->ts)
                     rt->ts = avpriv_mpegts_parse_open(s);
             } else {
-                RTPDynamicProtocolHandler *handler;
+                const RTPDynamicProtocolHandler *handler;
                 handler = ff_rtp_handler_find_by_id(
                               rtsp_st->sdp_payload_type, AVMEDIA_TYPE_DATA);
                 init_rtp_handler(handler, rtsp_st, NULL);
@@ -505,7 +513,7 @@
             rtsp_st->stream_index = st->index;
             st->codecpar->codec_type = codec_type;
             if (rtsp_st->sdp_payload_type < RTP_PT_PRIVATE) {
-                RTPDynamicProtocolHandler *handler;
+                const RTPDynamicProtocolHandler *handler;
                 /* if standard payload type, we can find the codec right now */
                 ff_rtp_get_codec_info(st->codecpar, rtsp_st->sdp_payload_type);
                 if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
@@ -1686,7 +1694,7 @@
 redirect:
     /* extract hostname and port */
     av_url_split(proto, sizeof(proto), auth, sizeof(auth),
-                 host, sizeof(host), &port, path, sizeof(path), s->filename);
+                 host, sizeof(host), &port, path, sizeof(path), s->url);
 
     if (!strcmp(proto, "rtsps")) {
         lower_rtsp_proto         = "tls";
@@ -1717,7 +1725,7 @@
         }
     }
 
-    /* Construct the URI used in request; this is similar to s->filename,
+    /* Construct the URI used in request; this is similar to s->url,
      * but with authentication credentials removed and RTSP specific options
      * stripped out. */
     ff_url_join(rt->control_uri, sizeof(rt->control_uri), proto, NULL,
@@ -1905,13 +1913,19 @@
     ff_rtsp_close_streams(s);
     ff_rtsp_close_connections(s);
     if (reply->status_code >=300 && reply->status_code < 400 && s->iformat) {
-        av_strlcpy(s->filename, reply->location, sizeof(s->filename));
+        char *new_url = av_strdup(reply->location);
+        if (!new_url) {
+            err = AVERROR(ENOMEM);
+            goto fail2;
+        }
+        ff_format_set_url(s, new_url);
         rt->session_id[0] = '\0';
         av_log(s, AV_LOG_INFO, "Status %d: Redirecting to %s\n",
                reply->status_code,
-               s->filename);
+               s->url);
         goto redirect;
     }
+ fail2:
     ff_network_close();
     return err;
 }
@@ -2009,7 +2023,9 @@
             }
 #if CONFIG_RTSP_DEMUXER
             if (rt->rtsp_hd && p[0].revents & POLLIN) {
-                return parse_rtsp_message(s);
+                if ((ret = parse_rtsp_message(s)) < 0) {
+                    return ret;
+                }
             }
 #endif
         } else if (n == 0 && ++timeout_cnt >= MAX_TIMEOUTS) {
@@ -2423,7 +2439,7 @@
     if (!ff_network_init())
         return AVERROR(EIO);
 
-    ret = ffurl_open_whitelist(&in, s->filename, AVIO_FLAG_READ,
+    ret = ffurl_open_whitelist(&in, s->url, AVIO_FLAG_READ,
                      &s->interrupt_callback, NULL, s->protocol_whitelist, s->protocol_blacklist, NULL);
     if (ret)
         goto fail;
@@ -2474,7 +2490,7 @@
     }
 
     av_url_split(NULL, 0, NULL, 0, host, sizeof(host), &port,
-                 NULL, 0, s->filename);
+                 NULL, 0, s->url);
 
     snprintf(sdp, sizeof(sdp),
              "v=0\r\nc=IN IP%d %s\r\nm=%s %d RTP/AVP %d\r\n",

diff --git a/libavformat/rtsp.h b/libavformat/rtsp.h
index 36fdae4..9a7f366 100644
--- a/libavformat/rtsp.h
+++ b/libavformat/rtsp.h

@@ -458,7 +458,7 @@
     /** The following are used for dynamic protocols (rtpdec_*.c/rdt.c) */
     //@{
     /** handler structure */
-    RTPDynamicProtocolHandler *dynamic_handler;
+    const RTPDynamicProtocolHandler *dynamic_handler;
 
     /** private data associated with the dynamic protocol */
     PayloadContext *dynamic_protocol_context;

diff --git a/libavformat/rtspdec.c b/libavformat/rtspdec.c
index fdf75a0..32dff23 100644
--- a/libavformat/rtspdec.c
+++ b/libavformat/rtspdec.c

@@ -644,7 +644,7 @@
 
     /* extract hostname and port */
     av_url_split(proto, sizeof(proto), auth, sizeof(auth), host, sizeof(host),
-                 &port, path, sizeof(path), s->filename);
+                 &port, path, sizeof(path), s->url);
 
     /* ff_url_join. No authorization by now (NULL) */
     ff_url_join(rt->control_uri, sizeof(rt->control_uri), proto, NULL, host,
@@ -804,7 +804,7 @@
     int port;
 
     av_url_split(NULL, 0, NULL, 0, host, sizeof(host), &port, NULL, 0,
-                 s->filename);
+                 s->url);
     ff_rtsp_undo_setup(s, 0);
     return ff_rtsp_make_setup_request(s, host, port, RTSP_LOWER_TRANSPORT_TCP,
                                       rt->real_challenge);

diff --git a/libavformat/rtspenc.c b/libavformat/rtspenc.c
index e7707bb..97e3ef6 100644
--- a/libavformat/rtspenc.c
+++ b/libavformat/rtspenc.c

@@ -50,6 +50,7 @@
     int i;
     char *sdp;
     AVFormatContext sdp_ctx, *ctx_array[1];
+    char url[1024];
 
     if (s->start_time_realtime == 0  ||  s->start_time_realtime == AV_NOPTS_VALUE)
         s->start_time_realtime = av_gettime();
@@ -71,7 +72,8 @@
      * flexible SDP creation interface.
      */
     sdp_ctx = *s;
-    ff_url_join(sdp_ctx.filename, sizeof(sdp_ctx.filename),
+    sdp_ctx.url = url;
+    ff_url_join(url, sizeof(url),
                 "rtsp", NULL, addr, -1, NULL);
     ctx_array[0] = &sdp_ctx;
     if (av_sdp_create(ctx_array, 1, sdp, SDP_MAX_SIZE)) {

diff --git a/libavformat/sapdec.c b/libavformat/sapdec.c
index 522b38d..7a6c8bf 100644
--- a/libavformat/sapdec.c
+++ b/libavformat/sapdec.c

@@ -74,7 +74,7 @@
         return AVERROR(EIO);
 
     av_url_split(NULL, 0, NULL, 0, host, sizeof(host), &port,
-                 path, sizeof(path), s->filename);
+                 path, sizeof(path), s->url);
     if (port < 0)
         port = 9875;
 

diff --git a/libavformat/sapenc.c b/libavformat/sapenc.c
index 3098e34..f9afab0 100644
--- a/libavformat/sapenc.c
+++ b/libavformat/sapenc.c

@@ -84,7 +84,7 @@
 
     /* extract hostname and port */
     av_url_split(NULL, 0, NULL, 0, host, sizeof(host), &base_port,
-                 path, sizeof(path), s->filename);
+                 path, sizeof(path), s->url);
     if (base_port < 0)
         base_port = 5004;
 
@@ -144,6 +144,7 @@
         s->start_time_realtime = av_gettime();
     for (i = 0; i < s->nb_streams; i++) {
         URLContext *fd;
+        char *new_url;
 
         ff_url_join(url, sizeof(url), "rtp", NULL, host, base_port,
                     "?ttl=%d", ttl);
@@ -161,7 +162,12 @@
             goto fail;
         s->streams[i]->priv_data = contexts[i];
         s->streams[i]->time_base = contexts[i]->streams[0]->time_base;
-        av_strlcpy(contexts[i]->filename, url, sizeof(contexts[i]->filename));
+        new_url = av_strdup(url);
+        if (!new_url) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        ff_format_set_url(contexts[i], new_url);
     }
 
     if (s->nb_streams > 0 && title)

diff --git a/libavfilter/avfiltergraph.h b/libavformat/sbcdec.c
similarity index 60%
copy from libavfilter/avfiltergraph.h
copy to libavformat/sbcdec.c
index b31d581..ae74a22 100644
--- a/libavfilter/avfiltergraph.h
+++ b/libavformat/sbcdec.c

@@ -1,6 +1,6 @@
 /*
- * Filter graphs
- * copyright (c) 2007 Bobby Bingham
+ * RAW SBC demuxer
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
  *
  * This file is part of FFmpeg.
  *
@@ -19,10 +19,15 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVFILTER_AVFILTERGRAPH_H
-#define AVFILTER_AVFILTERGRAPH_H
+#include "avformat.h"
+#include "rawdec.h"
 
-#include "avfilter.h"
-#include "libavutil/log.h"
-
-#endif /* AVFILTER_AVFILTERGRAPH_H */
+AVInputFormat ff_sbc_demuxer = {
+    .name           = "sbc",
+    .long_name      = NULL_IF_CONFIG_SMALL("raw SBC (low-complexity subband codec)"),
+    .extensions     = "sbc,msbc",
+    .raw_codec_id   = AV_CODEC_ID_SBC,
+    .read_header    = ff_raw_audio_read_header,
+    .read_packet    = ff_raw_read_partial_packet,
+    .flags          = AVFMT_GENERIC_INDEX,
+};

diff --git a/libavformat/sdp.c b/libavformat/sdp.c
index 0242ca3..a5d202e 100644
--- a/libavformat/sdp.c
+++ b/libavformat/sdp.c

@@ -584,6 +584,12 @@
                                          payload_type,
                                          p->sample_rate, p->channels);
             break;
+        case AV_CODEC_ID_PCM_S24BE:
+            if (payload_type >= RTP_PT_PRIVATE)
+                av_strlcatf(buff, size, "a=rtpmap:%d L24/%d/%d\r\n",
+                                         payload_type,
+                                         p->sample_rate, p->channels);
+            break;
         case AV_CODEC_ID_PCM_MULAW:
             if (payload_type >= RTP_PT_PRIVATE)
                 av_strlcatf(buff, size, "a=rtpmap:%d PCMU/%d/%d\r\n",
@@ -778,7 +784,7 @@
     port = 0;
     ttl = 0;
     if (n_files == 1) {
-        port = sdp_get_address(dst, sizeof(dst), &ttl, ac[0]->filename);
+        port = sdp_get_address(dst, sizeof(dst), &ttl, ac[0]->url ? ac[0]->url : "");
         is_multicast = resolve_destination(dst, sizeof(dst), dst_type,
                                            sizeof(dst_type));
         if (!is_multicast)
@@ -798,7 +804,7 @@
     dst[0] = 0;
     for (i = 0; i < n_files; i++) {
         if (n_files != 1) {
-            port = sdp_get_address(dst, sizeof(dst), &ttl, ac[i]->filename);
+            port = sdp_get_address(dst, sizeof(dst), &ttl, ac[i]->url ? ac[i]->url : "");
             is_multicast = resolve_destination(dst, sizeof(dst), dst_type,
                                                sizeof(dst_type));
             if (!is_multicast)

diff --git a/libavformat/segafilm.c b/libavformat/segafilm.c
index 1fdef50..b0c6c41 100644
--- a/libavformat/segafilm.c
+++ b/libavformat/segafilm.c

@@ -239,7 +239,7 @@
         } else {
             film->sample_table[i].stream = film->video_stream_index;
             film->sample_table[i].pts = AV_RB32(&scratch[8]) & 0x7FFFFFFF;
-            film->sample_table[i].keyframe = (scratch[8] & 0x80) ? 0 : 1;
+            film->sample_table[i].keyframe = (scratch[8] & 0x80) ? 0 : AVINDEX_KEYFRAME;
             video_frame_counter++;
             if (film->video_type)
                 av_add_index_entry(s->streams[film->video_stream_index],
@@ -270,6 +270,8 @@
     FilmDemuxContext *film = s->priv_data;
     AVIOContext *pb = s->pb;
     film_sample *sample;
+    film_sample *next_sample = NULL;
+    int next_sample_id;
     int ret = 0;
 
     if (film->current_sample >= film->sample_count)
@@ -277,6 +279,20 @@
 
     sample = &film->sample_table[film->current_sample];
 
+    /* Find the next sample from the same stream, assuming there is one;
+     * this is used to calculate the duration below */
+    next_sample_id = film->current_sample + 1;
+    while (next_sample == NULL) {
+        if (next_sample_id >= film->sample_count)
+            break;
+
+        next_sample = &film->sample_table[next_sample_id];
+        if (next_sample->stream != sample->stream) {
+            next_sample = NULL;
+            next_sample_id++;
+        }
+    }
+
     /* position the stream (will probably be there anyway) */
     avio_seek(pb, sample->sample_offset, SEEK_SET);
 
@@ -285,7 +301,11 @@
         ret = AVERROR(EIO);
 
     pkt->stream_index = sample->stream;
+    pkt->dts = sample->pts;
     pkt->pts = sample->pts;
+    pkt->flags |= sample->keyframe ? AV_PKT_FLAG_KEY : 0;
+    if (next_sample != NULL)
+        pkt->duration = next_sample->pts - sample->pts;
 
     film->current_sample++;
 

diff --git a/libavformat/segafilmenc.c b/libavformat/segafilmenc.c
new file mode 100644
index 0000000..524230e
--- /dev/null
+++ b/libavformat/segafilmenc.c

@@ -0,0 +1,398 @@
+/*
+ * Sega FILM Format (CPK) Muxer
+ * Copyright (C) 2003 The FFmpeg project
+ * Copyright (C) 2018 Misty De Meo
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Sega FILM (.cpk) file muxer
+ * @author Misty De Meo <misty@brew.sh>
+ *
+ * @see For more information regarding the Sega FILM file format, visit:
+ *   http://wiki.multimedia.cx/index.php?title=Sega_FILM
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "avformat.h"
+#include "internal.h"
+#include "avio_internal.h"
+
+typedef struct FILMPacket {
+    int audio;
+    int keyframe;
+    int32_t pts;
+    int32_t duration;
+    int32_t size;
+    int32_t index;
+    struct FILMPacket *next;
+} FILMPacket;
+
+typedef struct FILMOutputContext {
+    const AVClass *class;
+    int audio_index;
+    int video_index;
+    int64_t stab_pos;
+    FILMPacket *start;
+    FILMPacket *last;
+    int64_t packet_count;
+} FILMOutputContext;
+
+static int film_write_packet_to_header(AVFormatContext *format_context, FILMPacket *pkt)
+{
+    AVIOContext *pb = format_context->pb;
+    /* The bits in these two 32-bit integers contain info about the contents of this sample */
+    int32_t info1 = 0;
+    int32_t info2 = 0;
+
+    if (pkt->audio) {
+        /* Always the same, carries no more information than "this is audio" */
+        info1 = 0xFFFFFFFF;
+        info2 = 1;
+    } else {
+        info1 = pkt->pts;
+        info2 = pkt->duration;
+        /* The top bit being set indicates a key frame */
+        if (!pkt->keyframe)
+            info1 |= (1 << 31);
+    }
+
+    /* Write the 16-byte sample info packet to the STAB chunk in the header */
+    avio_wb32(pb, pkt->index);
+    avio_wb32(pb, pkt->size);
+    avio_wb32(pb, info1);
+    avio_wb32(pb, info2);
+
+    return 0;
+}
+
+static int film_write_packet(AVFormatContext *format_context, AVPacket *pkt)
+{
+    FILMPacket *metadata;
+    AVIOContext *pb = format_context->pb;
+    FILMOutputContext *film = format_context->priv_data;
+    int encoded_buf_size = 0;
+    enum AVCodecID codec_id;
+
+    /* Track the metadata used to write the header and add it to the linked list */
+    metadata = av_mallocz(sizeof(FILMPacket));
+    if (!metadata)
+        return AVERROR(ENOMEM);
+    metadata->audio = pkt->stream_index == film->audio_index;
+    metadata->keyframe = pkt->flags & AV_PKT_FLAG_KEY;
+    metadata->pts = pkt->pts;
+    metadata->duration = pkt->duration;
+    metadata->size = pkt->size;
+    if (film->last == NULL) {
+        metadata->index = 0;
+    } else {
+        metadata->index = film->last->index + film->last->size;
+        film->last->next = metadata;
+    }
+    metadata->next = NULL;
+    if (film->start == NULL)
+        film->start = metadata;
+    film->packet_count++;
+    film->last = metadata;
+
+    codec_id = format_context->streams[pkt->stream_index]->codecpar->codec_id;
+
+    /* Sega Cinepak has an extra two-byte header; write dummy data there,
+     * then adjust the cvid header to accommodate for the extra size */
+    if (codec_id == AV_CODEC_ID_CINEPAK) {
+        encoded_buf_size = AV_RB24(&pkt->data[1]);
+        /* Already Sega Cinepak, so no need to reformat the packets */
+        if (encoded_buf_size != pkt->size && (pkt->size % encoded_buf_size) != 0) {
+            avio_write(pb, pkt->data, pkt->size);
+        } else {
+            uint8_t padding[2] = {0, 0};
+            /* In Sega Cinepak, the reported size in the Cinepak header is
+             * 8 bytes too short. However, the size in the STAB section of the header
+             * is correct, taking into account the extra two bytes. */
+            AV_WB24(&pkt->data[1], pkt->size - 8 + 2);
+            metadata->size += 2;
+
+            avio_write(pb, pkt->data, 10);
+            avio_write(pb, padding, 2);
+            avio_write(pb, &pkt->data[10], pkt->size - 10);
+        }
+    } else {
+        /* Other formats can just be written as-is */
+        avio_write(pb, pkt->data, pkt->size);
+    }
+
+    return 0;
+}
+
+static int get_audio_codec_id(enum AVCodecID codec_id)
+{
+    /* 0 (PCM) and 2 (ADX) are the only known values */
+    switch (codec_id) {
+    case AV_CODEC_ID_PCM_S8_PLANAR:
+    case AV_CODEC_ID_PCM_S16BE_PLANAR:
+        return 0;
+        break;
+    case AV_CODEC_ID_ADPCM_ADX:
+        return 2;
+        break;
+    default:
+        return -1;
+    }
+}
+
+static int film_init(AVFormatContext *format_context)
+{
+    AVStream *audio = NULL;
+    FILMOutputContext *film = format_context->priv_data;
+    film->audio_index = -1;
+    film->video_index = -1;
+    film->stab_pos = 0;
+    film->packet_count = 0;
+    film->start = NULL;
+    film->last = NULL;
+
+    for (int i = 0; i < format_context->nb_streams; i++) {
+        AVStream *st = format_context->streams[i];
+        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+            if (film->audio_index > -1) {
+                av_log(format_context, AV_LOG_ERROR, "Sega FILM allows a maximum of one audio stream.\n");
+                return AVERROR(EINVAL);
+            }
+            film->audio_index = i;
+            audio = st;
+        }
+
+        if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
+            if (film->video_index > -1) {
+                av_log(format_context, AV_LOG_ERROR, "Sega FILM allows a maximum of one video stream.\n");
+                return AVERROR(EINVAL);
+            }
+            film->video_index = i;
+        }
+
+        if (film->video_index == -1) {
+            av_log(format_context, AV_LOG_ERROR, "No video stream present.\n");
+            return AVERROR(EINVAL);
+        }
+    }
+
+    if (audio != NULL && get_audio_codec_id(audio->codecpar->codec_id) < 0) {
+        av_log(format_context, AV_LOG_ERROR, "Incompatible audio stream format.\n");
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+static int shift_data(AVFormatContext *format_context, int64_t shift_size)
+{
+    int ret = 0;
+    int64_t pos, pos_end = avio_tell(format_context->pb);
+    uint8_t *buf, *read_buf[2];
+    int read_buf_id = 0;
+    int read_size[2];
+    AVIOContext *read_pb;
+
+    buf = av_malloc(shift_size * 2);
+    if (!buf)
+        return AVERROR(ENOMEM);
+    read_buf[0] = buf;
+    read_buf[1] = buf + shift_size;
+
+    /* Write the header at the beginning of the file, shifting all content as necessary;
+     * based on the approach used by MOV faststart. */
+    avio_flush(format_context->pb);
+    ret = format_context->io_open(format_context, &read_pb, format_context->url, AVIO_FLAG_READ, NULL);
+    if (ret < 0) {
+        av_log(format_context, AV_LOG_ERROR, "Unable to re-open %s output file to "
+               "write the header\n", format_context->url);
+        av_free(buf);
+        return ret;
+    }
+
+    /* mark the end of the shift to up to the last data we wrote, and get ready
+     * for writing */
+    pos_end = avio_tell(format_context->pb);
+    avio_seek(format_context->pb, shift_size, SEEK_SET);
+
+    /* start reading at where the new header will be placed */
+    avio_seek(read_pb, 0, SEEK_SET);
+    pos = avio_tell(read_pb);
+
+#define READ_BLOCK do {                                                             \
+    read_size[read_buf_id] = avio_read(read_pb, read_buf[read_buf_id], shift_size);  \
+    read_buf_id ^= 1;                                                               \
+} while (0)
+
+    /* shift data by chunk of at most shift_size */
+    READ_BLOCK;
+    do {
+        int n;
+        READ_BLOCK;
+        n = read_size[read_buf_id];
+        if (n <= 0)
+            break;
+        avio_write(format_context->pb, read_buf[read_buf_id], n);
+        pos += n;
+    } while (pos < pos_end);
+    ff_format_io_close(format_context, &read_pb);
+
+    av_free(buf);
+    return 0;
+}
+
+static int film_write_header(AVFormatContext *format_context)
+{
+    int ret = 0;
+    int64_t sample_table_size, stabsize, headersize;
+    int8_t audio_codec;
+    AVIOContext *pb = format_context->pb;
+    FILMOutputContext *film = format_context->priv_data;
+    FILMPacket *prev, *packet;
+    AVStream *audio = NULL;
+    AVStream *video = NULL;
+
+    /* Calculate how much we need to reserve for the header;
+     * this is the amount the rest of the data will be shifted up by. */
+    sample_table_size = film->packet_count * 16;
+    stabsize = 16 + sample_table_size;
+    headersize = 16 + /* FILM header base */
+                 32 + /* FDSC chunk */
+                 stabsize;
+
+    ret = shift_data(format_context, headersize);
+    if (ret < 0)
+        return ret;
+    /* Seek back to the beginning to start writing the header now */
+    avio_seek(pb, 0, SEEK_SET);
+
+    if (film->audio_index > -1)
+        audio = format_context->streams[film->audio_index];
+    if (film->video_index > -1)
+        video = format_context->streams[film->video_index];
+
+    if (audio != NULL) {
+        audio_codec = get_audio_codec_id(audio->codecpar->codec_id);
+        if (audio_codec < 0) {
+            av_log(format_context, AV_LOG_ERROR, "Incompatible audio stream format.\n");
+            return AVERROR(EINVAL);
+        }
+    }
+
+    if (video->codecpar->format != AV_PIX_FMT_RGB24) {
+        av_log(format_context, AV_LOG_ERROR, "Pixel format must be rgb24.\n");
+        return AVERROR(EINVAL);
+    }
+
+    /* First, write the FILM header; this is very simple */
+
+    ffio_wfourcc(pb, "FILM");
+    avio_wb32(pb, 48 + stabsize);
+    /* This seems to be okay to hardcode, since this muxer targets 1.09 features;
+     * videos produced by this muxer are readable by 1.08 and lower players. */
+    ffio_wfourcc(pb, "1.09");
+    /* I have no idea what this field does, might be reserved */
+    avio_wb32(pb, 0);
+
+    /* Next write the FDSC (file description) chunk */
+    ffio_wfourcc(pb, "FDSC");
+    avio_wb32(pb, 0x20); /* Size of FDSC chunk */
+
+    /* The only two supported codecs; raw video is rare */
+    switch (video->codecpar->codec_id) {
+    case AV_CODEC_ID_CINEPAK:
+        ffio_wfourcc(pb, "cvid");
+        break;
+    case AV_CODEC_ID_RAWVIDEO:
+        ffio_wfourcc(pb, "raw ");
+        break;
+    default:
+        av_log(format_context, AV_LOG_ERROR, "Incompatible video stream format.\n");
+        return AVERROR(EINVAL);
+    }
+
+    avio_wb32(pb, video->codecpar->height);
+    avio_wb32(pb, video->codecpar->width);
+    avio_w8(pb, 24); /* Bits per pixel - observed to always be 24 */
+
+    if (audio != NULL) {
+        avio_w8(pb, audio->codecpar->channels); /* Audio channels */
+        avio_w8(pb, audio->codecpar->bits_per_coded_sample); /* Audio bit depth */
+        avio_w8(pb, audio_codec); /* Compression - 0 is PCM, 2 is ADX */
+        avio_wb16(pb, audio->codecpar->sample_rate); /* Audio sampling rate */
+    } else {
+        /* Set all these fields to 0 if there's no audio */
+        avio_w8(pb, 0);
+        avio_w8(pb, 0);
+        avio_w8(pb, 0);
+        avio_wb16(pb, 0);
+    }
+
+    /* I have no idea what this pair of fields does either, might be reserved */
+    avio_wb32(pb, 0);
+    avio_wb16(pb, 0);
+
+    /* Finally, write the STAB (sample table) chunk */
+    ffio_wfourcc(pb, "STAB");
+    avio_wb32(pb, 16 + (film->packet_count * 16));
+    /* Framerate base frequency. Here we're assuming that the frame rate is even.
+     * In real world Sega FILM files, there are usually a couple of approaches:
+     * a) framerate base frequency is the same as the framerate, and ticks
+     *    increment by 1 every frame, or
+     * b) framerate base frequency is a much larger number, and ticks
+     *    increment by larger steps every frame.
+     * The latter occurs even in cases where the frame rate is even; for example, in
+     * Lunar: Silver Star Story, the base frequency is 600 and each frame, the ticks
+     * are incremented by 25 for an evenly spaced framerate of 24fps. */
+    avio_wb32(pb, av_q2d(av_inv_q(video->time_base)));
+
+    avio_wb32(pb, film->packet_count);
+
+    avio_flush(pb);
+
+    /* Finally, write out each packet's data to the header */
+    packet = film->start;
+    while (packet != NULL) {
+        film_write_packet_to_header(format_context, packet);
+        prev = packet;
+        packet = packet->next;
+        av_freep(&prev);
+    }
+
+    return 0;
+}
+
+static const AVClass film_muxer_class = {
+    .class_name     = "Sega FILM muxer",
+    .item_name      = av_default_item_name,
+    .version        = LIBAVUTIL_VERSION_INT,
+};
+
+AVOutputFormat ff_segafilm_muxer = {
+    .name           = "film_cpk",
+    .long_name      = NULL_IF_CONFIG_SMALL("Sega FILM / CPK"),
+    .extensions     = "cpk",
+    .priv_data_size = sizeof(FILMOutputContext),
+    .audio_codec    = AV_CODEC_ID_PCM_S16BE_PLANAR,
+    .video_codec    = AV_CODEC_ID_CINEPAK,
+    .init           = film_init,
+    .write_trailer  = film_write_header,
+    .write_packet   = film_write_packet,
+    .priv_class     = &film_muxer_class,
+};

diff --git a/libavformat/segment.c b/libavformat/segment.c
index 81d3f1d..e2ac2c3 100644
--- a/libavformat/segment.c
+++ b/libavformat/segment.c

@@ -192,6 +192,8 @@
     AVFormatContext *oc = seg->avf;
     size_t size;
     int ret;
+    char buf[1024];
+    char *new_name;
 
     if (seg->segment_idx_wrap)
         seg->segment_idx %= seg->segment_idx_wrap;
@@ -200,18 +202,22 @@
         struct tm *tm, tmpbuf;
         time(&now0);
         tm = localtime_r(&now0, &tmpbuf);
-        if (!strftime(oc->filename, sizeof(oc->filename), s->filename, tm)) {
+        if (!strftime(buf, sizeof(buf), s->url, tm)) {
             av_log(oc, AV_LOG_ERROR, "Could not get segment filename with strftime\n");
             return AVERROR(EINVAL);
         }
-    } else if (av_get_frame_filename(oc->filename, sizeof(oc->filename),
-                                     s->filename, seg->segment_idx) < 0) {
-        av_log(oc, AV_LOG_ERROR, "Invalid segment filename template '%s'\n", s->filename);
+    } else if (av_get_frame_filename(buf, sizeof(buf),
+                                     s->url, seg->segment_idx) < 0) {
+        av_log(oc, AV_LOG_ERROR, "Invalid segment filename template '%s'\n", s->url);
         return AVERROR(EINVAL);
     }
+    new_name = av_strdup(buf);
+    if (!new_name)
+        return AVERROR(ENOMEM);
+    ff_format_set_url(oc, new_name);
 
     /* copy modified name in list entry */
-    size = strlen(av_basename(oc->filename)) + 1;
+    size = strlen(av_basename(oc->url)) + 1;
     if (seg->entry_prefix)
         size += strlen(seg->entry_prefix);
 
@@ -219,7 +225,7 @@
         return ret;
     snprintf(seg->cur_entry.filename, size, "%s%s",
              seg->entry_prefix ? seg->entry_prefix : "",
-             av_basename(oc->filename));
+             av_basename(oc->url));
 
     return 0;
 }
@@ -245,8 +251,8 @@
     if ((err = set_segment_filename(s)) < 0)
         return err;
 
-    if ((err = s->io_open(s, &oc->pb, oc->filename, AVIO_FLAG_WRITE, NULL)) < 0) {
-        av_log(s, AV_LOG_ERROR, "Failed to open segment '%s'\n", oc->filename);
+    if ((err = s->io_open(s, &oc->pb, oc->url, AVIO_FLAG_WRITE, NULL)) < 0) {
+        av_log(s, AV_LOG_ERROR, "Failed to open segment '%s'\n", oc->url);
         return err;
     }
     if (!seg->individual_header_trailer)
@@ -360,7 +366,7 @@
 
     if (ret < 0)
         av_log(s, AV_LOG_ERROR, "Failure occurred when ending segment '%s'\n",
-               oc->filename);
+               oc->url);
 
     if (seg->list) {
         if (seg->list_size || seg->list_type == LIST_TYPE_M3U8) {
@@ -403,7 +409,7 @@
     }
 
     av_log(s, AV_LOG_VERBOSE, "segment:'%s' count:%d ended\n",
-           seg->avf->filename, seg->segment_count);
+           seg->avf->url, seg->segment_count);
     seg->segment_count++;
 
     if (seg->increment_tc) {
@@ -726,7 +732,7 @@
            seg->reference_stream_index,
            av_get_media_type_string(s->streams[seg->reference_stream_index]->codecpar->codec_type));
 
-    seg->oformat = av_guess_format(seg->format, s->filename, NULL);
+    seg->oformat = av_guess_format(seg->format, s->url, NULL);
 
     if (!seg->oformat)
         return AVERROR_MUXER_NOT_FOUND;
@@ -745,9 +751,9 @@
 
     if (seg->write_header_trailer) {
         if ((ret = s->io_open(s, &oc->pb,
-                              seg->header_filename ? seg->header_filename : oc->filename,
+                              seg->header_filename ? seg->header_filename : oc->url,
                               AVIO_FLAG_WRITE, NULL)) < 0) {
-            av_log(s, AV_LOG_ERROR, "Failed to open segment '%s'\n", oc->filename);
+            av_log(s, AV_LOG_ERROR, "Failed to open segment '%s'\n", oc->url);
             return ret;
         }
         if (!seg->individual_header_trailer)
@@ -830,7 +836,7 @@
         } else {
             close_null_ctxp(&oc->pb);
         }
-        if ((ret = oc->io_open(oc, &oc->pb, oc->filename, AVIO_FLAG_WRITE, NULL)) < 0)
+        if ((ret = oc->io_open(oc, &oc->pb, oc->url, AVIO_FLAG_WRITE, NULL)) < 0)
             return ret;
         if (!seg->individual_header_trailer)
             oc->pb->seekable = 0;
@@ -917,7 +923,7 @@
 
     if (seg->segment_frame_count == 0) {
         av_log(s, AV_LOG_VERBOSE, "segment:'%s' starts with packet stream:%d pts:%s pts_time:%s frame:%d\n",
-               seg->avf->filename, pkt->stream_index,
+               seg->avf->url, pkt->stream_index,
                av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, &st->time_base), seg->frame_count);
     }
 
@@ -1039,7 +1045,7 @@
     { "segment_clocktime_offset", "set segment clocktime offset",        OFFSET(clocktime_offset), AV_OPT_TYPE_DURATION, {.i64 = 0}, 0, 86400000000LL, E},
     { "segment_clocktime_wrap_duration", "set segment clocktime wrapping duration", OFFSET(clocktime_wrap_duration), AV_OPT_TYPE_DURATION, {.i64 = INT64_MAX}, 0, INT64_MAX, E},
     { "segment_time",      "set segment duration",                       OFFSET(time_str),AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,       E },
-    { "segment_time_delta","set approximation value used for the segment times", OFFSET(time_delta), AV_OPT_TYPE_DURATION, {.i64 = 0}, 0, 0, E },
+    { "segment_time_delta","set approximation value used for the segment times", OFFSET(time_delta), AV_OPT_TYPE_DURATION, {.i64 = 0}, 0, INT64_MAX, E },
     { "segment_times",     "set segment split time points",              OFFSET(times_str),AV_OPT_TYPE_STRING,{.str = NULL},  0, 0,       E },
     { "segment_frames",    "set segment split frame numbers",            OFFSET(frames_str),AV_OPT_TYPE_STRING,{.str = NULL},  0, 0,       E },
     { "segment_wrap",      "set number after which the index wraps",     OFFSET(segment_idx_wrap), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, E },
@@ -1058,6 +1064,7 @@
     { NULL },
 };
 
+#if CONFIG_SEGMENT_MUXER
 static const AVClass seg_class = {
     .class_name = "segment muxer",
     .item_name  = av_default_item_name,
@@ -1078,7 +1085,9 @@
     .check_bitstream = seg_check_bitstream,
     .priv_class     = &seg_class,
 };
+#endif
 
+#if CONFIG_STREAM_SEGMENT_MUXER
 static const AVClass sseg_class = {
     .class_name = "stream_segment muxer",
     .item_name  = av_default_item_name,
@@ -1099,3 +1108,4 @@
     .check_bitstream = seg_check_bitstream,
     .priv_class     = &sseg_class,
 };
+#endif

diff --git a/libavformat/serdec.c b/libavformat/serdec.c
new file mode 100644
index 0000000..25e6f63
--- /dev/null
+++ b/libavformat/serdec.c

@@ -0,0 +1,146 @@
+/*
+ * SER demuxer
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/opt.h"
+#include "internal.h"
+#include "avformat.h"
+
+#define SER_MAGIC "LUCAM-RECORDER"
+
+typedef struct SERDemuxerContext {
+    const AVClass *class;
+    int width, height;
+    AVRational framerate;
+    int64_t end;
+} SERDemuxerContext;
+
+static int ser_probe(AVProbeData *pd)
+{
+    if (memcmp(pd->buf, SER_MAGIC, 14) == 0)
+        return AVPROBE_SCORE_MAX;
+    else
+        return 0;
+}
+
+static int ser_read_header(AVFormatContext *s)
+{
+    SERDemuxerContext *ser = s->priv_data;
+    enum AVPixelFormat pix_fmt;
+    int depth, color_id, endian;
+    int packet_size;
+    AVStream *st;
+
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    avio_skip(s->pb, 14);
+    avio_skip(s->pb, 4);
+    color_id = avio_rl32(s->pb);
+    endian = avio_rl32(s->pb);
+    ser->width = avio_rl32(s->pb);
+    ser->height = avio_rl32(s->pb);
+    depth = avio_rl32(s->pb);
+    st->nb_frames = st->duration = avio_rl32(s->pb);
+    avio_skip(s->pb, 120);
+    avio_skip(s->pb, 8);
+    avio_skip(s->pb, 8);
+
+    switch (color_id) {
+    case   0: pix_fmt = depth <= 8 ? AV_PIX_FMT_GRAY8       : endian ? AV_PIX_FMT_GRAY16LE       : AV_PIX_FMT_GRAY16BE;       break;
+    case   8: pix_fmt = depth <= 8 ? AV_PIX_FMT_BAYER_RGGB8 : endian ? AV_PIX_FMT_BAYER_RGGB16LE : AV_PIX_FMT_BAYER_RGGB16BE; break;
+    case   9: pix_fmt = depth <= 8 ? AV_PIX_FMT_BAYER_GRBG8 : endian ? AV_PIX_FMT_BAYER_GRBG16LE : AV_PIX_FMT_BAYER_GRBG16BE; break;
+    case  10: pix_fmt = depth <= 8 ? AV_PIX_FMT_BAYER_GBRG8 : endian ? AV_PIX_FMT_BAYER_GBRG16LE : AV_PIX_FMT_BAYER_GBRG16BE; break;
+    case  11: pix_fmt = depth <= 8 ? AV_PIX_FMT_BAYER_BGGR8 : endian ? AV_PIX_FMT_BAYER_BGGR16LE : AV_PIX_FMT_BAYER_BGGR16BE; break;
+    case 100: pix_fmt = depth <= 8 ? AV_PIX_FMT_RGB24       : endian ? AV_PIX_FMT_RGB48LE        : AV_PIX_FMT_RGB48BE;        break;
+    case 101: pix_fmt = depth <= 8 ? AV_PIX_FMT_BGR24       : endian ? AV_PIX_FMT_BGR48LE        : AV_PIX_FMT_BGR48BE;        break;
+    default:
+        return AVERROR_PATCHWELCOME;
+    }
+
+    st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
+    st->codecpar->codec_id = s->iformat->raw_codec_id;
+
+    avpriv_set_pts_info(st, 64, ser->framerate.den, ser->framerate.num);
+
+    st->codecpar->width  = ser->width;
+    st->codecpar->height = ser->height;
+    st->codecpar->format = pix_fmt;
+    packet_size = av_image_get_buffer_size(st->codecpar->format, ser->width, ser->height, 1);
+    if (packet_size < 0)
+        return packet_size;
+    ser->end = 178 + st->nb_frames * packet_size;
+    s->packet_size = packet_size;
+    st->codecpar->bit_rate = av_rescale_q(s->packet_size,
+                                       (AVRational){8,1}, st->time_base);
+
+    return 0;
+}
+
+
+static int ser_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    SERDemuxerContext *ser = s->priv_data;
+    int64_t pos;
+    int ret;
+
+    pos = avio_tell(s->pb);
+    if (pos >= ser->end)
+        return AVERROR_EOF;
+
+    ret = av_get_packet(s->pb, pkt, s->packet_size);
+    pkt->pts = pkt->dts = (pkt->pos - s->internal->data_offset) / s->packet_size;
+
+    pkt->stream_index = 0;
+    if (ret < 0)
+        return ret;
+    return 0;
+}
+
+#define OFFSET(x) offsetof(SERDemuxerContext, x)
+#define DEC AV_OPT_FLAG_DECODING_PARAM
+static const AVOption ser_options[] = {
+    { "framerate", "set frame rate", OFFSET(framerate), AV_OPT_TYPE_VIDEO_RATE, {.str = "25"}, 0, INT_MAX, DEC },
+    { NULL },
+};
+
+static const AVClass ser_demuxer_class = {
+    .class_name = "ser demuxer",
+    .item_name  = av_default_item_name,
+    .option     = ser_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVInputFormat ff_ser_demuxer = {
+    .name           = "ser",
+    .long_name      = NULL_IF_CONFIG_SMALL("SER (Simple uncompressed video format for astronomical capturing)"),
+    .priv_data_size = sizeof(SERDemuxerContext),
+    .read_probe     = ser_probe,
+    .read_header    = ser_read_header,
+    .read_packet    = ser_read_packet,
+    .flags          = AVFMT_GENERIC_INDEX,
+    .extensions     = "ser",
+    .raw_codec_id   = AV_CODEC_ID_RAWVIDEO,
+    .priv_class     = &ser_demuxer_class,
+};

diff --git a/libavformat/smoothstreamingenc.c b/libavformat/smoothstreamingenc.c
index 54a1c49..094712a 100644
--- a/libavformat/smoothstreamingenc.c
+++ b/libavformat/smoothstreamingenc.c

@@ -221,8 +221,8 @@
     int ret, i, video_chunks = 0, audio_chunks = 0, video_streams = 0, audio_streams = 0;
     int64_t duration = 0;
 
-    snprintf(filename, sizeof(filename), "%s/Manifest", s->filename);
-    snprintf(temp_filename, sizeof(temp_filename), "%s/Manifest.tmp", s->filename);
+    snprintf(filename, sizeof(filename), "%s/Manifest", s->url);
+    snprintf(temp_filename, sizeof(temp_filename), "%s/Manifest.tmp", s->url);
     ret = s->io_open(s, &out, temp_filename, AVIO_FLAG_WRITE, NULL);
     if (ret < 0) {
         av_log(s, AV_LOG_ERROR, "Unable to open %s for writing\n", temp_filename);
@@ -295,7 +295,7 @@
     int ret = 0, i;
     AVOutputFormat *oformat;
 
-    if (mkdir(s->filename, 0777) == -1 && errno != EEXIST) {
+    if (mkdir(s->url, 0777) == -1 && errno != EEXIST) {
         ret = AVERROR(errno);
         av_log(s, AV_LOG_ERROR, "mkdir failed\n");
         goto fail;
@@ -324,7 +324,7 @@
             ret = AVERROR(EINVAL);
             goto fail;
         }
-        snprintf(os->dirname, sizeof(os->dirname), "%s/QualityLevels(%"PRId64")", s->filename, s->streams[i]->codecpar->bit_rate);
+        snprintf(os->dirname, sizeof(os->dirname), "%s/QualityLevels(%"PRId64")", s->url, s->streams[i]->codecpar->bit_rate);
         if (mkdir(os->dirname, 0777) == -1 && errno != EEXIST) {
             ret = AVERROR(errno);
             av_log(s, AV_LOG_ERROR, "mkdir failed\n");
@@ -609,9 +609,9 @@
 
     if (c->remove_at_exit) {
         char filename[1024];
-        snprintf(filename, sizeof(filename), "%s/Manifest", s->filename);
+        snprintf(filename, sizeof(filename), "%s/Manifest", s->url);
         unlink(filename);
-        rmdir(s->filename);
+        rmdir(s->url);
     }
 
     ism_free(s);

diff --git a/libavformat/spdifdec.c b/libavformat/spdifdec.c
index f728837..21bfce4 100644
--- a/libavformat/spdifdec.c
+++ b/libavformat/spdifdec.c

@@ -25,18 +25,22 @@
  * @author Anssi Hannula
  */
 
+#include "libavutil/bswap.h"
+
+#include "libavcodec/ac3.h"
+#include "libavcodec/adts_parser.h"
+
 #include "avformat.h"
 #include "spdif.h"
-#include "libavcodec/ac3.h"
-#include "libavcodec/aacadtsdec.h"
 
 static int spdif_get_offset_and_codec(AVFormatContext *s,
                                       enum IEC61937DataType data_type,
                                       const char *buf, int *offset,
                                       enum AVCodecID *codec)
 {
-    AACADTSHeaderInfo aac_hdr;
-    GetBitContext gbc;
+    uint32_t samples;
+    uint8_t frames;
+    int ret;
 
     switch (data_type & 0xff) {
     case IEC61937_AC3:
@@ -56,13 +60,13 @@
         *codec = AV_CODEC_ID_MP3;
         break;
     case IEC61937_MPEG2_AAC:
-        init_get_bits(&gbc, buf, AAC_ADTS_HEADER_SIZE * 8);
-        if (avpriv_aac_parse_header(&gbc, &aac_hdr) < 0) {
+        ret = av_adts_header_parse(buf, &samples, &frames);
+        if (ret < 0) {
             if (s) /* be silent during a probe */
                 av_log(s, AV_LOG_ERROR, "Invalid AAC packet in IEC 61937\n");
-            return AVERROR_INVALIDDATA;
+            return ret;
         }
-        *offset = aac_hdr.samples << 2;
+        *offset = samples << 2;
         *codec = AV_CODEC_ID_AAC;
         break;
     case IEC61937_MPEG2_LAYER1_LSF:
@@ -100,7 +104,7 @@
 }
 
 /* Largest offset between bursts we currently handle, i.e. AAC with
-   aac_hdr.samples = 4096 */
+   samples = 4096 */
 #define SPDIF_MAX_OFFSET 16384
 
 static int spdif_probe(AVProbeData *p)
@@ -132,7 +136,7 @@
             } else
                 consecutive_codes = 0;
 
-            if (buf + 4 + AAC_ADTS_HEADER_SIZE > p_buf + buf_size)
+            if (buf + 4 + AV_AAC_ADTS_HEADER_SIZE > p_buf + buf_size)
                 break;
 
             /* continue probing to find more sync codes */

diff --git a/libavformat/spdifenc.c b/libavformat/spdifenc.c
index b47ec12..9514ff8 100644
--- a/libavformat/spdifenc.c
+++ b/libavformat/spdifenc.c

@@ -50,9 +50,9 @@
 #include "avio_internal.h"
 #include "spdif.h"
 #include "libavcodec/ac3.h"
+#include "libavcodec/adts_parser.h"
 #include "libavcodec/dca.h"
 #include "libavcodec/dca_syncwords.h"
-#include "libavcodec/aacadtsdec.h"
 #include "libavutil/opt.h"
 
 typedef struct IEC61937Context {
@@ -118,7 +118,8 @@
     static const uint8_t eac3_repeat[4] = {6, 3, 2, 1};
     int repeat = 1;
 
-    if ((pkt->data[4] & 0xc0) != 0xc0) /* fscod */
+    int bsid = pkt->data[5] >> 3;
+    if (bsid > 10 && (pkt->data[4] & 0xc0) != 0xc0) /* fscod */
         repeat = eac3_repeat[(pkt->data[4] & 0x30) >> 4]; /* numblkscod */
 
     ctx->hd_buf = av_fast_realloc(ctx->hd_buf, &ctx->hd_buf_size, ctx->hd_buf_filled + pkt->size);
@@ -349,19 +350,18 @@
 static int spdif_header_aac(AVFormatContext *s, AVPacket *pkt)
 {
     IEC61937Context *ctx = s->priv_data;
-    AACADTSHeaderInfo hdr;
-    GetBitContext gbc;
+    uint32_t samples;
+    uint8_t frames;
     int ret;
 
-    init_get_bits(&gbc, pkt->data, AAC_ADTS_HEADER_SIZE * 8);
-    ret = avpriv_aac_parse_header(&gbc, &hdr);
+    ret = av_adts_header_parse(pkt->data, &samples, &frames);
     if (ret < 0) {
         av_log(s, AV_LOG_ERROR, "Wrong AAC file format\n");
-        return AVERROR_INVALIDDATA;
+        return ret;
     }
 
-    ctx->pkt_offset = hdr.samples << 2;
-    switch (hdr.num_aac_frames) {
+    ctx->pkt_offset = samples << 2;
+    switch (frames) {
     case 1:
         ctx->data_type = IEC61937_MPEG2_AAC;
         break;
@@ -373,7 +373,7 @@
         break;
     default:
         av_log(s, AV_LOG_ERROR,
-               "%"PRIu32" samples in AAC frame not supported\n", hdr.samples);
+               "%"PRIu32" samples in AAC frame not supported\n", samples);
         return AVERROR(EINVAL);
     }
     //TODO Data type dependent info (LC profile/SBR)

diff --git a/libavformat/subfile.c b/libavformat/subfile.c
index 497cf85..b527f2b 100644
--- a/libavformat/subfile.c
+++ b/libavformat/subfile.c

@@ -72,6 +72,9 @@
     SubfileContext *c = h->priv_data;
     int ret;
 
+    if (!c->end)
+        c->end = INT64_MAX;
+
     if (c->end <= c->start) {
         av_log(h, AV_LOG_ERROR, "end before start\n");
         return AVERROR(EINVAL);

diff --git a/libavformat/swfdec.c b/libavformat/swfdec.c
index 57b619f..212157f 100644
--- a/libavformat/swfdec.c
+++ b/libavformat/swfdec.c

@@ -95,7 +95,7 @@
     if (p->buf[3] >= 20 || xmax < 16 || ymax < 16)
         return AVPROBE_SCORE_MAX / 4;
 
-    return AVPROBE_SCORE_MAX;
+    return AVPROBE_SCORE_EXTENSION + 1;
 }
 
 #if CONFIG_ZLIB

diff --git a/libavformat/swfenc.c b/libavformat/swfenc.c
index cada45e..f53db0f 100644
--- a/libavformat/swfenc.c
+++ b/libavformat/swfenc.c

@@ -69,7 +69,7 @@
 
     if (val == 0)
         return;
-    val = abs(val);
+    val = FFABS(val);
     n = 1;
     while (val != 0) {
         n++;

diff --git a/libavformat/takdec.c b/libavformat/takdec.c
index 1535bec..6fda35c 100644
--- a/libavformat/takdec.c
+++ b/libavformat/takdec.c

@@ -103,7 +103,6 @@
                 }
             }
 
-            init_get_bits8(&gb, buffer, size - 3);
             break;
         case TAK_METADATA_MD5: {
             uint8_t md5[16];
@@ -145,7 +144,9 @@
         if (type == TAK_METADATA_STREAMINFO) {
             TAKStreamInfo ti;
 
-            avpriv_tak_parse_streaminfo(&gb, &ti);
+            ret = avpriv_tak_parse_streaminfo(&ti, buffer, size -3);
+            if (ret < 0)
+                return AVERROR_INVALIDDATA;
             if (ti.samples > 0)
                 st->duration = ti.samples;
             st->codecpar->bits_per_coded_sample = ti.bps;
@@ -161,11 +162,13 @@
         } else if (type == TAK_METADATA_LAST_FRAME) {
             if (size != 11)
                 return AVERROR_INVALIDDATA;
+            init_get_bits8(&gb, buffer, size - 3);
             tc->mlast_frame = 1;
             tc->data_end    = get_bits64(&gb, TAK_LAST_FRAME_POS_BITS) +
                               get_bits(&gb, TAK_LAST_FRAME_SIZE_BITS);
             av_freep(&buffer);
         } else if (type == TAK_METADATA_ENCODER) {
+            init_get_bits8(&gb, buffer, size - 3);
             av_log(s, AV_LOG_VERBOSE, "encoder version: %0X\n",
                    get_bits_long(&gb, TAK_ENCODER_VERSION_BITS));
             av_freep(&buffer);

diff --git a/libavformat/tcp.c b/libavformat/tcp.c
index 07b4ed9..2198e0f 100644
--- a/libavformat/tcp.c
+++ b/libavformat/tcp.c

@@ -41,6 +41,10 @@
     int listen_timeout;
     int recv_buffer_size;
     int send_buffer_size;
+    int tcp_nodelay;
+#if !HAVE_WINSOCK2_H
+    int tcp_mss;
+#endif /* !HAVE_WINSOCK2_H */
 } TCPContext;
 
 #define OFFSET(x) offsetof(TCPContext, x)
@@ -52,6 +56,10 @@
     { "listen_timeout",  "Connection awaiting timeout (in milliseconds)",      OFFSET(listen_timeout), AV_OPT_TYPE_INT, { .i64 = -1 },         -1, INT_MAX, .flags = D|E },
     { "send_buffer_size", "Socket send buffer size (in bytes)",                OFFSET(send_buffer_size), AV_OPT_TYPE_INT, { .i64 = -1 },         -1, INT_MAX, .flags = D|E },
     { "recv_buffer_size", "Socket receive buffer size (in bytes)",             OFFSET(recv_buffer_size), AV_OPT_TYPE_INT, { .i64 = -1 },         -1, INT_MAX, .flags = D|E },
+    { "tcp_nodelay", "Use TCP_NODELAY to disable nagle's algorithm",           OFFSET(tcp_nodelay), AV_OPT_TYPE_BOOL, { .i64 = 0 },             0, 1, .flags = D|E },
+#if !HAVE_WINSOCK2_H
+    { "tcp_mss",     "Maximum segment size for outgoing TCP packets",          OFFSET(tcp_mss),     AV_OPT_TYPE_INT, { .i64 = -1 },         -1, INT_MAX, .flags = D|E },
+#endif /* !HAVE_WINSOCK2_H */
     { NULL }
 };
 
@@ -62,6 +70,35 @@
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
+static void customize_fd(void *ctx, int fd)
+{
+    TCPContext *s = ctx;
+    /* Set the socket's send or receive buffer sizes, if specified.
+       If unspecified or setting fails, system default is used. */
+    if (s->recv_buffer_size > 0) {
+        if (setsockopt (fd, SOL_SOCKET, SO_RCVBUF, &s->recv_buffer_size, sizeof (s->recv_buffer_size))) {
+            ff_log_net_error(ctx, AV_LOG_WARNING, "setsockopt(SO_RCVBUF)");
+        }
+    }
+    if (s->send_buffer_size > 0) {
+        if (setsockopt (fd, SOL_SOCKET, SO_SNDBUF, &s->send_buffer_size, sizeof (s->send_buffer_size))) {
+            ff_log_net_error(ctx, AV_LOG_WARNING, "setsockopt(SO_SNDBUF)");
+        }
+    }
+    if (s->tcp_nodelay > 0) {
+        if (setsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &s->tcp_nodelay, sizeof (s->tcp_nodelay))) {
+            ff_log_net_error(ctx, AV_LOG_WARNING, "setsockopt(TCP_NODELAY)");
+        }
+    }
+#if !HAVE_WINSOCK2_H
+    if (s->tcp_mss > 0) {
+        if (setsockopt (fd, IPPROTO_TCP, TCP_MAXSEG, &s->tcp_mss, sizeof (s->tcp_mss))) {
+            ff_log_net_error(ctx, AV_LOG_WARNING, "setsockopt(TCP_MAXSEG)");
+        }
+    }
+#endif /* !HAVE_WINSOCK2_H */
+}
+
 /* return non zero if error */
 static int tcp_open(URLContext *h, const char *uri, int flags)
 {
@@ -121,7 +158,6 @@
 
     cur_ai = ai;
 
- restart:
 #if HAVE_STRUCT_SOCKADDR_IN6
     // workaround for IOS9 getaddrinfo in IPv6 only network use hardcode IPv4 address can not resolve port number.
     if (cur_ai->ai_family == AF_INET6){
@@ -132,21 +168,19 @@
     }
 #endif
 
-    fd = ff_socket(cur_ai->ai_family,
-                   cur_ai->ai_socktype,
-                   cur_ai->ai_protocol);
-    if (fd < 0) {
-        ret = ff_neterrno();
-        goto fail;
-    }
-
-    /* Set the socket's send or receive buffer sizes, if specified.
-       If unspecified or setting fails, system default is used. */
-    if (s->recv_buffer_size > 0) {
-        setsockopt (fd, SOL_SOCKET, SO_RCVBUF, &s->recv_buffer_size, sizeof (s->recv_buffer_size));
-    }
-    if (s->send_buffer_size > 0) {
-        setsockopt (fd, SOL_SOCKET, SO_SNDBUF, &s->send_buffer_size, sizeof (s->send_buffer_size));
+    if (s->listen > 0) {
+        while (cur_ai && fd < 0) {
+            fd = ff_socket(cur_ai->ai_family,
+                           cur_ai->ai_socktype,
+                           cur_ai->ai_protocol);
+            if (fd < 0) {
+                ret = ff_neterrno();
+                cur_ai = cur_ai->ai_next;
+            }
+        }
+        if (fd < 0)
+            goto fail1;
+        customize_fd(s, fd);
     }
 
     if (s->listen == 2) {
@@ -161,14 +195,9 @@
         // Socket descriptor already closed here. Safe to overwrite to client one.
         fd = ret;
     } else {
-        if ((ret = ff_listen_connect(fd, cur_ai->ai_addr, cur_ai->ai_addrlen,
-                                     s->open_timeout / 1000, h, !!cur_ai->ai_next)) < 0) {
-
-            if (ret == AVERROR_EXIT)
-                goto fail1;
-            else
-                goto fail;
-        }
+        ret = ff_connect_parallel(ai, s->open_timeout / 1000, 3, h, &fd, customize_fd, s);
+        if (ret < 0)
+            goto fail1;
     }
 
     h->is_streamed = 1;
@@ -177,15 +206,6 @@
     freeaddrinfo(ai);
     return 0;
 
- fail:
-    if (cur_ai->ai_next) {
-        /* Retry with the next sockaddr */
-        cur_ai = cur_ai->ai_next;
-        if (fd >= 0)
-            closesocket(fd);
-        ret = 0;
-        goto restart;
-    }
  fail1:
     if (fd >= 0)
         closesocket(fd);
@@ -203,8 +223,10 @@
         return ret;
     cc = (*c)->priv_data;
     ret = ff_accept(sc->fd, sc->listen_timeout, s);
-    if (ret < 0)
+    if (ret < 0) {
+        ffurl_closep(c);
         return ret;
+    }
     cc->fd = ret;
     return 0;
 }
@@ -220,6 +242,8 @@
             return ret;
     }
     ret = recv(s->fd, buf, size, 0);
+    if (ret == 0)
+        return AVERROR_EOF;
     return ret < 0 ? ff_neterrno() : ret;
 }
 
@@ -270,7 +294,7 @@
 {
     TCPContext *s = h->priv_data;
     int avail;
-    int avail_len = sizeof(avail);
+    socklen_t avail_len = sizeof(avail);
 
 #if HAVE_WINSOCK2_H
     /* SO_RCVBUF with winsock only reports the actual TCP window size when

diff --git a/libavformat/tedcaptionsdec.c b/libavformat/tedcaptionsdec.c
index 774d499..cd6ab0c 100644
--- a/libavformat/tedcaptionsdec.c
+++ b/libavformat/tedcaptionsdec.c

@@ -94,7 +94,7 @@
 {
     int ret;
 
-    av_bprint_init(bp, 0, full ? -1 : 1);
+    av_bprint_init(bp, 0, full ? AV_BPRINT_SIZE_UNLIMITED : AV_BPRINT_SIZE_AUTOMATIC);
     ret = expect_byte(pb, cur_byte, '"');
     if (ret < 0)
         goto fail;

diff --git a/libavformat/tee.c b/libavformat/tee.c
index dd1844ac..ef3b113 100644
--- a/libavformat/tee.c
+++ b/libavformat/tee.c

@@ -406,7 +406,7 @@
 {
     int i;
     av_log(log_ctx, log_level, "filename:'%s' format:%s\n",
-           slave->avf->filename, slave->avf->oformat->name);
+           slave->avf->url, slave->avf->oformat->name);
     for (i = 0; i < slave->avf->nb_streams; i++) {
         AVStream *st = slave->avf->streams[i];
         AVBSFContext *bsf = slave->bsfs[i];
@@ -448,7 +448,7 @@
 {
     TeeContext *tee = avf->priv_data;
     unsigned nb_slaves = 0, i;
-    const char *filename = avf->filename;
+    const char *filename = avf->url;
     char **slaves = NULL;
     int ret;
 

diff --git a/libavformat/tests/fifo_muxer.c b/libavformat/tests/fifo_muxer.c
index e20bd6e..5127a8a 100644
--- a/libavformat/tests/fifo_muxer.c
+++ b/libavformat/tests/fifo_muxer.c

@@ -31,8 +31,6 @@
 #define SLEEPTIME_50_MS 50000
 #define SLEEPTIME_10_MS 10000
 
-/* Implementation of mock muxer to simulate real muxer failures */
-
 /* This is structure of data sent in packets to
  * failing muxer */
 typedef struct FailingMuxerPacketData {
@@ -41,113 +39,7 @@
     unsigned sleep_time; /* sleep for this long in write_packet to simulate long I/O operation */
 } FailingMuxerPacketData;
 
-
-typedef struct FailingMuxerContext {
-    AVClass *class;
-    int write_header_ret;
-    int write_trailer_ret;
-    /* If non-zero, summary of processed packets will be printed in deinit */
-    int print_deinit_summary;
-
-    int flush_count;
-    int pts_written[MAX_TST_PACKETS];
-    int pts_written_nr;
-} FailingMuxerContext;
-
-static int failing_write_header(AVFormatContext *avf)
-{
-    FailingMuxerContext *ctx = avf->priv_data;
-    return ctx->write_header_ret;
-}
-
-static int failing_write_packet(AVFormatContext *avf, AVPacket *pkt)
-{
-    FailingMuxerContext *ctx = avf->priv_data;
-    int ret = 0;
-    if (!pkt) {
-        ctx->flush_count++;
-    } else {
-        FailingMuxerPacketData *data = (FailingMuxerPacketData*) pkt->data;
-
-        if (!data->recover_after) {
-            data->ret = 0;
-        } else {
-            data->recover_after--;
-        }
-
-        ret = data->ret;
-
-        if (data->sleep_time) {
-            int64_t slept = 0;
-            while (slept < data->sleep_time) {
-                if (ff_check_interrupt(&avf->interrupt_callback))
-                    return AVERROR_EXIT;
-                av_usleep(SLEEPTIME_10_MS);
-                slept += SLEEPTIME_10_MS;
-            }
-        }
-
-        if (!ret) {
-            ctx->pts_written[ctx->pts_written_nr++] = pkt->pts;
-            av_packet_unref(pkt);
-        }
-    }
-    return ret;
-}
-
-static int failing_write_trailer(AVFormatContext *avf)
-{
-    FailingMuxerContext *ctx = avf->priv_data;
-    return ctx->write_trailer_ret;
-}
-
-static void failing_deinit(AVFormatContext *avf)
-{
-    int i;
-    FailingMuxerContext *ctx = avf->priv_data;
-
-    if (!ctx->print_deinit_summary)
-        return;
-
-    printf("flush count: %d\n", ctx->flush_count);
-    printf("pts seen nr: %d\n", ctx->pts_written_nr);
-    printf("pts seen: ");
-    for (i = 0; i < ctx->pts_written_nr; ++i ) {
-        printf(i ? ",%d" : "%d", ctx->pts_written[i]);
-    }
-    printf("\n");
-}
-#define OFFSET(x) offsetof(FailingMuxerContext, x)
-static const AVOption options[] = {
-        {"write_header_ret", "write_header() return value", OFFSET(write_header_ret),
-         AV_OPT_TYPE_INT, {.i64 = 0}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM},
-        {"write_trailer_ret", "write_trailer() return value", OFFSET(write_trailer_ret),
-         AV_OPT_TYPE_INT, {.i64 = 0}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM},
-        {"print_deinit_summary", "print summary when deinitializing muxer", OFFSET(print_deinit_summary),
-         AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
-        {NULL}
-    };
-
-static const AVClass failing_muxer_class = {
-    .class_name = "Failing test muxer",
-    .item_name  = av_default_item_name,
-    .option     = options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
-
-AVOutputFormat tst_failing_muxer = {
-    .name           = "fail",
-    .long_name      = NULL_IF_CONFIG_SMALL("Failing test muxer"),
-    .priv_data_size = sizeof(FailingMuxerContext),
-    .write_header   = failing_write_header,
-    .write_packet   = failing_write_packet,
-    .write_trailer  = failing_write_trailer,
-    .deinit         = failing_deinit,
-    .priv_class     = &failing_muxer_class,
-    .flags          = AVFMT_NOFILE | AVFMT_ALLOW_FLUSH,
-};
-
-static int prepare_packet(AVPacket *pkt,const FailingMuxerPacketData *pkt_data, int64_t pts)
+static int prepare_packet(AVPacket *pkt, const FailingMuxerPacketData *pkt_data, int64_t pts)
 {
     int ret;
     FailingMuxerPacketData *data = av_malloc(sizeof(*data));
@@ -238,54 +130,6 @@
     return ret;
 }
 
-static int fifo_write_header_err_tst(AVFormatContext *oc, AVDictionary **opts,
-                                     const FailingMuxerPacketData *pkt_data)
-{
-    int ret = 0, i;
-    AVPacket pkt;
-
-    av_init_packet(&pkt);
-
-    ret = avformat_write_header(oc, opts);
-    if (ret) {
-        fprintf(stderr, "Unexpected write_header failure: %s\n",
-                av_err2str(ret));
-        goto fail;
-    }
-
-    for (i = 0; i < MAX_TST_PACKETS; i++ ) {
-        ret = prepare_packet(&pkt, pkt_data, i);
-        if (ret < 0) {
-            fprintf(stderr, "Failed to prepare test packet: %s\n",
-                    av_err2str(ret));
-            goto write_trailer_and_fail;
-        }
-        ret = av_write_frame(oc, &pkt);
-        av_packet_unref(&pkt);
-        if (ret < 0) {
-            break;
-        }
-    }
-
-    if (!ret) {
-        fprintf(stderr, "write_packet not failed when supposed to.\n");
-        goto fail;
-    } else if (ret != -1) {
-        fprintf(stderr, "Unexpected write_packet error: %s\n", av_err2str(ret));
-        goto fail;
-    }
-
-    ret = av_write_trailer(oc);
-    if (ret < 0)
-        fprintf(stderr, "Unexpected write_trailer error: %s\n", av_err2str(ret));
-
-    return ret;
-write_trailer_and_fail:
-    av_write_trailer(oc);
-fail:
-    return ret;
-}
-
 static int fifo_overflow_drop_test(AVFormatContext *oc, AVDictionary **opts,
                                    const FailingMuxerPacketData *data)
 {
@@ -381,7 +225,7 @@
              (int)test->print_summary_on_deinit, test->write_header_ret,
              test->write_trailer_ret);
     ret = av_dict_set(&opts, "format_opts", buffer, 0);
-    ret1 = av_dict_set(&opts, "fifo_format", "fail", 0);
+    ret1 = av_dict_set(&opts, "fifo_format", "fifo_test", 0);
     if (ret < 0 || ret1 < 0) {
         fprintf(stderr, "Failed to set options for test muxer: %s\n",
                 av_err2str(ret));
@@ -403,10 +247,6 @@
          * exactly what was on input */
         {fifo_basic_test, "nonfail test", NULL,1, 0, 0, {0, 0, 0}},
 
-        /* Test that we receive delayed write_header error from one of the write_packet
-         * calls. */
-        {fifo_write_header_err_tst, "write header error test", NULL, 0, -1, 0, {0, 0, 0}},
-
         /* Each write_packet will fail 3 times before operation is successful. If recovery
          * Since recovery is on, fifo muxer should not return any errors. */
         {fifo_basic_test, "recovery test", "attempt_recovery=1:recovery_wait_time=0",
@@ -434,9 +274,6 @@
 {
     int i, ret, ret_all = 0;
 
-    av_register_all();
-    av_register_output_format(&tst_failing_muxer);
-
     for (i = 0; tests[i].test_func; i++) {
         ret = run_test(&tests[i]);
         if (!ret_all && ret < 0)

diff --git a/libavformat/tests/movenc.c b/libavformat/tests/movenc.c
index 8e59b74..1d15d97 100644
--- a/libavformat/tests/movenc.c
+++ b/libavformat/tests/movenc.c

@@ -115,6 +115,7 @@
     case AVIO_DATA_MARKER_BOUNDARY_POINT: str = "boundary"; break;
     case AVIO_DATA_MARKER_UNKNOWN:        str = "unknown";  break;
     case AVIO_DATA_MARKER_TRAILER:        str = "trailer";  break;
+    default:                              str = "unknown";  break;
     }
     if (time == AV_NOPTS_VALUE)
         snprintf(timebuf, sizeof(timebuf), "nopts");
@@ -378,8 +379,6 @@
         }
     }
 
-    av_register_all();
-
     md5 = av_md5_alloc();
     if (!md5)
         return 1;

diff --git a/libavformat/tests/seek.c b/libavformat/tests/seek.c
index 5cf3a12..e0067a6 100644
--- a/libavformat/tests/seek.c
+++ b/libavformat/tests/seek.c

@@ -67,8 +67,6 @@
     int frame_count = 1;
     int duration = 4;
 
-    ic->flags |= AVFMT_FLAG_KEEP_SIDE_DATA;
-
     for(i=2; i<argc; i+=2){
         if       (!strcmp(argv[i], "-seekforw")){
             seekfirst = atoi(argv[i+1]);
@@ -93,9 +91,6 @@
     av_dict_set(&format_opts, "channels", "1", 0);
     av_dict_set(&format_opts, "sample_rate", "22050", 0);
 
-    /* initialize libavcodec, and register all codecs and formats */
-    av_register_all();
-
     if (argc < 2) {
         printf("usage: %s input_file\n"
                "\n", argv[0]);

diff --git a/libavformat/tls.h b/libavformat/tls.h
index 0326ef7..beb19d6 100644
--- a/libavformat/tls.h
+++ b/libavformat/tls.h

@@ -22,11 +22,9 @@
 #ifndef AVFORMAT_TLS_H
 #define AVFORMAT_TLS_H
 
-#include "config.h"
-#include "url.h"
 #include "libavutil/opt.h"
 
-#define CONFIG_TLS_PROTOCOL (CONFIG_TLS_GNUTLS_PROTOCOL | CONFIG_TLS_OPENSSL_PROTOCOL | CONFIG_TLS_SECURETRANSPORT_PROTOCOL | CONFIG_TLS_SCHANNEL_PROTOCOL)
+#include "url.h"
 
 typedef struct TLSShared {
     char *ca_file;

diff --git a/libavformat/tls_gnutls.c b/libavformat/tls_gnutls.c
index 5ce6c3d..e3c4368 100644
--- a/libavformat/tls_gnutls.c
+++ b/libavformat/tls_gnutls.c

@@ -55,26 +55,27 @@
 
 void ff_gnutls_init(void)
 {
-    avpriv_lock_avformat();
+    ff_lock_avformat();
 #if HAVE_THREADS && GNUTLS_VERSION_NUMBER < 0x020b00
     if (gcry_control(GCRYCTL_ANY_INITIALIZATION_P) == 0)
         gcry_control(GCRYCTL_SET_THREAD_CBS, &gcry_threads_pthread);
 #endif
     gnutls_global_init();
-    avpriv_unlock_avformat();
+    ff_unlock_avformat();
 }
 
 void ff_gnutls_deinit(void)
 {
-    avpriv_lock_avformat();
+    ff_lock_avformat();
     gnutls_global_deinit();
-    avpriv_unlock_avformat();
+    ff_unlock_avformat();
 }
 
 static int print_tls_error(URLContext *h, int ret)
 {
     switch (ret) {
     case GNUTLS_E_AGAIN:
+        return AVERROR(EAGAIN);
     case GNUTLS_E_INTERRUPTED:
 #ifdef GNUTLS_E_PREMATURE_TERMINATION
     case GNUTLS_E_PREMATURE_TERMINATION:
@@ -114,7 +115,10 @@
         return ret;
     if (ret == AVERROR_EXIT)
         return 0;
-    errno = EIO;
+    if (ret == AVERROR(EAGAIN))
+        errno = EAGAIN;
+    else
+        errno = EIO;
     return -1;
 }
 
@@ -127,7 +131,10 @@
         return ret;
     if (ret == AVERROR_EXIT)
         return 0;
-    errno = EIO;
+    if (ret == AVERROR(EAGAIN))
+        errno = EAGAIN;
+    else
+        errno = EIO;
     return -1;
 }
 
@@ -223,7 +230,11 @@
 static int tls_read(URLContext *h, uint8_t *buf, int size)
 {
     TLSContext *c = h->priv_data;
-    int ret = gnutls_record_recv(c->session, buf, size);
+    int ret;
+    // Set or clear the AVIO_FLAG_NONBLOCK on c->tls_shared.tcp
+    c->tls_shared.tcp->flags &= ~AVIO_FLAG_NONBLOCK;
+    c->tls_shared.tcp->flags |= h->flags & AVIO_FLAG_NONBLOCK;
+    ret = gnutls_record_recv(c->session, buf, size);
     if (ret > 0)
         return ret;
     if (ret == 0)
@@ -234,7 +245,11 @@
 static int tls_write(URLContext *h, const uint8_t *buf, int size)
 {
     TLSContext *c = h->priv_data;
-    int ret = gnutls_record_send(c->session, buf, size);
+    int ret;
+    // Set or clear the AVIO_FLAG_NONBLOCK on c->tls_shared.tcp
+    c->tls_shared.tcp->flags &= ~AVIO_FLAG_NONBLOCK;
+    c->tls_shared.tcp->flags |= h->flags & AVIO_FLAG_NONBLOCK;
+    ret = gnutls_record_send(c->session, buf, size);
     if (ret > 0)
         return ret;
     if (ret == 0)
@@ -260,7 +275,7 @@
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
-const URLProtocol ff_tls_gnutls_protocol = {
+const URLProtocol ff_tls_protocol = {
     .name           = "tls",
     .url_open2      = tls_open,
     .url_read       = tls_read,

diff --git a/libavformat/tls_libtls.c b/libavformat/tls_libtls.c
new file mode 100644
index 0000000..ba83b56
--- /dev/null
+++ b/libavformat/tls_libtls.c

@@ -0,0 +1,207 @@
+/*
+ * TLS/SSL Protocol
+ * Copyright (c) 2011 Martin Storsjo
+ * Copyright (c) 2017 sfan5 <sfan5@live.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "internal.h"
+#include "network.h"
+#include "url.h"
+#include "tls.h"
+#include "libavcodec/internal.h"
+#include "libavutil/avutil.h"
+#include "libavutil/opt.h"
+
+#include <tls.h>
+
+typedef struct TLSContext {
+    const AVClass *class;
+    TLSShared tls_shared;
+    struct tls *ctx;
+} TLSContext;
+
+static int ff_tls_close(URLContext *h)
+{
+    TLSContext *p = h->priv_data;
+    if (p->ctx) {
+        tls_close(p->ctx);
+        tls_free(p->ctx);
+    }
+    if (p->tls_shared.tcp)
+        ffurl_close(p->tls_shared.tcp);
+    return 0;
+}
+
+static ssize_t tls_read_callback(struct tls *ctx, void *buf, size_t buflen, void *cb_arg)
+{
+    URLContext *h = (URLContext*) cb_arg;
+    int ret = ffurl_read(h, buf, buflen);
+    if (ret == AVERROR(EAGAIN))
+        return TLS_WANT_POLLIN;
+    else if (ret == AVERROR_EXIT)
+        return 0;
+    return ret >= 0 ? ret : -1;
+}
+
+static ssize_t tls_write_callback(struct tls *ctx, const void *buf, size_t buflen, void *cb_arg)
+{
+    URLContext *h = (URLContext*) cb_arg;
+    int ret = ffurl_write(h, buf, buflen);
+    if (ret == AVERROR(EAGAIN))
+        return TLS_WANT_POLLOUT;
+    else if (ret == AVERROR_EXIT)
+        return 0;
+    return ret >= 0 ? ret : -1;
+}
+
+static int ff_tls_open(URLContext *h, const char *uri, int flags, AVDictionary **options)
+{
+    TLSContext *p = h->priv_data;
+    TLSShared *c = &p->tls_shared;
+    struct tls_config *cfg = NULL;
+    int ret;
+
+    if (tls_init() == -1) {
+        ret = AVERROR(EIO);
+        goto fail;
+    }
+
+    if ((ret = ff_tls_open_underlying(c, h, uri, options)) < 0)
+        goto fail;
+
+    p->ctx = !c->listen ? tls_client() : tls_server();
+    if (!p->ctx) {
+        ret = AVERROR(EIO);
+        goto fail;
+    }
+
+    cfg = tls_config_new();
+    if (!p->ctx) {
+        ret = AVERROR(EIO);
+        goto fail;
+    }
+    if (tls_config_set_protocols(cfg, TLS_PROTOCOLS_ALL) == -1)
+        goto err_config;
+    // While TLSv1.0 and TLSv1.1 are already enabled by the above,
+    // we need to be less strict with ciphers so it works in practice.
+    if (tls_config_set_ciphers(cfg, "compat") == -1)
+        goto err_config;
+    if (c->ca_file && tls_config_set_ca_file(cfg, c->ca_file) == -1)
+        goto err_config;
+    if (c->cert_file && tls_config_set_cert_file(cfg, c->cert_file) == -1)
+        goto err_config;
+    if (c->key_file && tls_config_set_key_file(cfg, c->key_file) == -1)
+        goto err_config;
+    if (!c->verify) {
+        tls_config_insecure_noverifycert(cfg);
+        tls_config_insecure_noverifyname(cfg);
+        tls_config_insecure_noverifytime(cfg);
+    }
+    if (tls_configure(p->ctx, cfg) == -1)
+        goto err_ctx;
+
+    if (!c->listen) {
+        ret = tls_connect_cbs(p->ctx, tls_read_callback, tls_write_callback,
+            c->tcp, c->host);
+    } else {
+        struct tls *ctx_new;
+        ret = tls_accept_cbs(p->ctx, &ctx_new, tls_read_callback,
+            tls_write_callback, c->tcp);
+        if (ret == 0) {
+            // free "server" context and replace by "connection" context
+            tls_free(p->ctx);
+            p->ctx = ctx_new;
+        }
+    }
+    if (ret == -1)
+        goto err_ctx;
+
+    tls_config_free(cfg);
+    return 0;
+err_config:
+    av_log(h, AV_LOG_ERROR, "%s\n", tls_config_error(cfg));
+    ret = AVERROR(EIO);
+    goto fail;
+err_ctx:
+    av_log(h, AV_LOG_ERROR, "%s\n", tls_error(p->ctx));
+    ret = AVERROR(EIO);
+    /* fallthrough */
+fail:
+    if (cfg)
+        tls_config_free(cfg);
+    ff_tls_close(h);
+    return ret;
+}
+
+static int ff_tls_read(URLContext *h, uint8_t *buf, int size)
+{
+    TLSContext *p = h->priv_data;
+    ssize_t ret;
+    ret = tls_read(p->ctx, buf, size);
+    if (ret > 0)
+        return ret;
+    else if (ret == 0)
+        return AVERROR_EOF;
+    av_log(h, AV_LOG_ERROR, "%s\n", tls_error(p->ctx));
+    return AVERROR(EIO);
+}
+
+static int ff_tls_write(URLContext *h, const uint8_t *buf, int size)
+{
+    TLSContext *p = h->priv_data;
+    ssize_t ret;
+    ret = tls_write(p->ctx, buf, size);
+    if (ret > 0)
+        return ret;
+    else if (ret == 0)
+        return AVERROR_EOF;
+    av_log(h, AV_LOG_ERROR, "%s\n", tls_error(p->ctx));
+    return AVERROR(EIO);
+}
+
+static int tls_get_file_handle(URLContext *h)
+{
+    TLSContext *c = h->priv_data;
+    return ffurl_get_file_handle(c->tls_shared.tcp);
+}
+
+static const AVOption options[] = {
+    TLS_COMMON_OPTIONS(TLSContext, tls_shared),
+    { NULL }
+};
+
+static const AVClass tls_class = {
+    .class_name = "tls",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+const URLProtocol ff_tls_protocol = {
+    .name           = "tls",
+    .url_open2      = ff_tls_open,
+    .url_read       = ff_tls_read,
+    .url_write      = ff_tls_write,
+    .url_close      = ff_tls_close,
+    .url_get_file_handle = tls_get_file_handle,
+    .priv_data_size = sizeof(TLSContext),
+    .flags          = URL_PROTOCOL_FLAG_NETWORK,
+    .priv_data_class = &tls_class,
+};

diff --git a/libavformat/tls_mbedtls.c b/libavformat/tls_mbedtls.c
new file mode 100644
index 0000000..9b80a1e
--- /dev/null
+++ b/libavformat/tls_mbedtls.c

@@ -0,0 +1,351 @@
+/*
+ * TLS/SSL Protocol
+ * Copyright (c) 2018 Thomas Volkert
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <mbedtls/certs.h>
+#include <mbedtls/config.h>
+#include <mbedtls/ctr_drbg.h>
+#include <mbedtls/entropy.h>
+#include <mbedtls/net_sockets.h>
+#include <mbedtls/platform.h>
+#include <mbedtls/ssl.h>
+#include <mbedtls/x509_crt.h>
+
+#include "avformat.h"
+#include "internal.h"
+#include "url.h"
+#include "tls.h"
+#include "libavutil/parseutils.h"
+
+typedef struct TLSContext {
+    const AVClass *class;
+    TLSShared tls_shared;
+    mbedtls_ssl_context ssl_context;
+    mbedtls_ssl_config ssl_config;
+    mbedtls_entropy_context entropy_context;
+    mbedtls_ctr_drbg_context ctr_drbg_context;
+    mbedtls_x509_crt ca_cert;
+    mbedtls_x509_crt own_cert;
+    mbedtls_pk_context priv_key;
+    char *priv_key_pw;
+} TLSContext;
+
+#define OFFSET(x) offsetof(TLSContext, x)
+
+static int tls_close(URLContext *h)
+{
+    TLSContext *tls_ctx = h->priv_data;
+
+    mbedtls_ssl_close_notify(&tls_ctx->ssl_context);
+    mbedtls_pk_free(&tls_ctx->priv_key);
+    mbedtls_x509_crt_free(&tls_ctx->ca_cert);
+    mbedtls_x509_crt_free(&tls_ctx->own_cert);
+    mbedtls_ssl_free(&tls_ctx->ssl_context);
+    mbedtls_ssl_config_free(&tls_ctx->ssl_config);
+    mbedtls_ctr_drbg_free(&tls_ctx->ctr_drbg_context);
+    mbedtls_entropy_free(&tls_ctx->entropy_context);
+
+    return 0;
+}
+
+static int handle_transport_error(URLContext *h, const char* func_name, int react_on_eagain, int ret)
+{
+    switch (ret) {
+    case AVERROR(EAGAIN):
+        return react_on_eagain;
+    case AVERROR_EXIT:
+        return 0;
+    case AVERROR(EPIPE):
+    case AVERROR(ECONNRESET):
+        return MBEDTLS_ERR_NET_CONN_RESET;
+    default:
+        av_log(h, AV_LOG_ERROR, "%s returned 0x%x\n", func_name, ret);
+        errno = EIO;
+        return MBEDTLS_ERR_NET_SEND_FAILED;
+    }
+}
+
+static int mbedtls_send(void *ctx, const unsigned char *buf, size_t len)
+{
+    URLContext *h = (URLContext*) ctx;
+    int ret = ffurl_write(h, buf, len);
+    if (ret >= 0)
+        return ret;
+
+    if (h->max_packet_size && len > h->max_packet_size)
+        return MBEDTLS_ERR_SSL_BUFFER_TOO_SMALL;
+
+    return handle_transport_error(h, "ffurl_write", MBEDTLS_ERR_SSL_WANT_WRITE, ret);
+}
+
+static int mbedtls_recv(void *ctx, unsigned char *buf, size_t len)
+{
+    URLContext *h = (URLContext*) ctx;
+    int ret = ffurl_read(h, buf, len);
+    if (ret >= 0)
+        return ret;
+
+    if (h->max_packet_size && len > h->max_packet_size)
+        return MBEDTLS_ERR_SSL_BUFFER_TOO_SMALL;
+
+    return handle_transport_error(h, "ffurl_read", MBEDTLS_ERR_SSL_WANT_READ, ret);
+}
+
+static void handle_pk_parse_error(URLContext *h, int ret)
+{
+    switch (ret) {
+    case MBEDTLS_ERR_PK_FILE_IO_ERROR:
+        av_log(h, AV_LOG_ERROR, "Read of key file failed. Is it actually there, are the access permissions correct?\n");
+        break;
+    case MBEDTLS_ERR_PK_PASSWORD_REQUIRED:
+        av_log(h, AV_LOG_ERROR, "A password for the private key is missing.\n");
+        break;
+    case MBEDTLS_ERR_PK_PASSWORD_MISMATCH:
+        av_log(h, AV_LOG_ERROR, "The given password for the private key is wrong.\n");
+        break;
+    default:
+        av_log(h, AV_LOG_ERROR, "mbedtls_pk_parse_key returned -0x%x\n", -ret);
+        break;
+    }
+}
+
+static void handle_handshake_error(URLContext *h, int ret)
+{
+    switch (ret) {
+    case MBEDTLS_ERR_SSL_NO_USABLE_CIPHERSUITE:
+        av_log(h, AV_LOG_ERROR, "None of the common ciphersuites is usable. Was the local certificate correctly set?\n");
+        break;
+    case MBEDTLS_ERR_SSL_FATAL_ALERT_MESSAGE:
+        av_log(h, AV_LOG_ERROR, "A fatal alert message was received from the peer, has the peer a correct certificate?\n");
+        break;
+    case MBEDTLS_ERR_SSL_CA_CHAIN_REQUIRED:
+        av_log(h, AV_LOG_ERROR, "No CA chain is set, but required to operate. Was the CA correctly set?\n");
+        break;
+    case MBEDTLS_ERR_NET_CONN_RESET:
+        av_log(h, AV_LOG_ERROR, "TLS handshake was aborted by peer.\n");
+        break;
+    default:
+        av_log(h, AV_LOG_ERROR, "mbedtls_ssl_handshake returned -0x%x\n", -ret);
+        break;
+    }
+}
+
+static void parse_options(TLSContext *tls_ctxc, const char *uri)
+{
+    char buf[1024];
+    const char *p = strchr(uri, '?');
+    if (!p)
+        return;
+
+    if (!tls_ctxc->priv_key_pw && av_find_info_tag(buf, sizeof(buf), "key_password", p))
+        tls_ctxc->priv_key_pw = av_strdup(buf);
+}
+
+static int tls_open(URLContext *h, const char *uri, int flags, AVDictionary **options)
+{
+    TLSContext *tls_ctx = h->priv_data;
+    TLSShared *shr = &tls_ctx->tls_shared;
+    uint32_t verify_res_flags;
+    int ret;
+
+    // parse additional options
+    parse_options(tls_ctx, uri);
+
+    if ((ret = ff_tls_open_underlying(shr, h, uri, options)) < 0)
+        goto fail;
+
+    mbedtls_ssl_init(&tls_ctx->ssl_context);
+    mbedtls_ssl_config_init(&tls_ctx->ssl_config);
+    mbedtls_entropy_init(&tls_ctx->entropy_context);
+    mbedtls_ctr_drbg_init(&tls_ctx->ctr_drbg_context);
+    mbedtls_x509_crt_init(&tls_ctx->ca_cert);
+    mbedtls_pk_init(&tls_ctx->priv_key);
+
+    // load trusted CA
+    if (shr->ca_file) {
+        if ((ret = mbedtls_x509_crt_parse_file(&tls_ctx->ca_cert, shr->ca_file)) != 0) {
+            av_log(h, AV_LOG_ERROR, "mbedtls_x509_crt_parse_file for CA cert returned %d\n", ret);
+            goto fail;
+        }
+    }
+
+    // load own certificate
+    if (shr->cert_file) {
+        if ((ret = mbedtls_x509_crt_parse_file(&tls_ctx->own_cert, shr->cert_file)) != 0) {
+            av_log(h, AV_LOG_ERROR, "mbedtls_x509_crt_parse_file for own cert returned %d\n", ret);
+            goto fail;
+        }
+    }
+
+    // load key file
+    if (shr->key_file) {
+        if ((ret = mbedtls_pk_parse_keyfile(&tls_ctx->priv_key,
+                                            shr->key_file,
+                                            tls_ctx->priv_key_pw)) != 0) {
+            handle_pk_parse_error(h, ret);
+            goto fail;
+        }
+    }
+
+    // seed the random number generator
+    if ((ret = mbedtls_ctr_drbg_seed(&tls_ctx->ctr_drbg_context,
+                                     mbedtls_entropy_func,
+                                     &tls_ctx->entropy_context,
+                                     NULL, 0)) != 0) {
+        av_log(h, AV_LOG_ERROR, "mbedtls_ctr_drbg_seed returned %d\n", ret);
+        goto fail;
+    }
+
+    if ((ret = mbedtls_ssl_config_defaults(&tls_ctx->ssl_config,
+                                           shr->listen ? MBEDTLS_SSL_IS_SERVER : MBEDTLS_SSL_IS_CLIENT,
+                                           MBEDTLS_SSL_TRANSPORT_STREAM,
+                                           MBEDTLS_SSL_PRESET_DEFAULT)) != 0) {
+        av_log(h, AV_LOG_ERROR, "mbedtls_ssl_config_defaults returned %d\n", ret);
+        goto fail;
+    }
+
+    mbedtls_ssl_conf_authmode(&tls_ctx->ssl_config,
+                              shr->ca_file ? MBEDTLS_SSL_VERIFY_REQUIRED : MBEDTLS_SSL_VERIFY_NONE);
+    mbedtls_ssl_conf_rng(&tls_ctx->ssl_config, mbedtls_ctr_drbg_random, &tls_ctx->ctr_drbg_context);
+    mbedtls_ssl_conf_ca_chain(&tls_ctx->ssl_config, &tls_ctx->ca_cert, NULL);
+
+    // set own certificate and private key
+    if ((ret = mbedtls_ssl_conf_own_cert(&tls_ctx->ssl_config, &tls_ctx->own_cert, &tls_ctx->priv_key)) != 0) {
+        av_log(h, AV_LOG_ERROR, "mbedtls_ssl_conf_own_cert returned %d\n", ret);
+        goto fail;
+    }
+
+    if ((ret = mbedtls_ssl_setup(&tls_ctx->ssl_context, &tls_ctx->ssl_config)) != 0) {
+        av_log(h, AV_LOG_ERROR, "mbedtls_ssl_setup returned %d\n", ret);
+        goto fail;
+    }
+
+    if (!shr->listen && !shr->numerichost) {
+        if ((ret = mbedtls_ssl_set_hostname(&tls_ctx->ssl_context, shr->host)) != 0) {
+            av_log(h, AV_LOG_ERROR, "mbedtls_ssl_set_hostname returned %d\n", ret);
+            goto fail;
+        }
+    }
+
+    // set I/O functions to use FFmpeg internal code for transport layer
+    mbedtls_ssl_set_bio(&tls_ctx->ssl_context, shr->tcp, mbedtls_send, mbedtls_recv, NULL);
+
+    // ssl handshake
+    while ((ret = mbedtls_ssl_handshake(&tls_ctx->ssl_context)) != 0) {
+        if (ret != MBEDTLS_ERR_SSL_WANT_READ && ret != MBEDTLS_ERR_SSL_WANT_WRITE) {
+            handle_handshake_error(h, ret);
+            goto fail;
+        }
+    }
+
+    if (shr->verify) {
+        // check the result of the certificate verification
+        if ((verify_res_flags = mbedtls_ssl_get_verify_result(&tls_ctx->ssl_context)) != 0) {
+            av_log(h, AV_LOG_ERROR, "mbedtls_ssl_get_verify_result reported problems "\
+                                    "with the certificate verification, returned flags: %u\n",
+                                    verify_res_flags);
+            if (verify_res_flags & MBEDTLS_X509_BADCERT_NOT_TRUSTED)
+                av_log(h, AV_LOG_ERROR, "The certificate is not correctly signed by the trusted CA.\n");
+            goto fail;
+        }
+    }
+
+    return 0;
+
+fail:
+    tls_close(h);
+    return AVERROR(EIO);
+}
+
+static int handle_tls_error(URLContext *h, const char* func_name, int ret)
+{
+    switch (ret) {
+    case MBEDTLS_ERR_SSL_WANT_READ:
+    case MBEDTLS_ERR_SSL_WANT_WRITE:
+        return AVERROR(EAGAIN);
+    case MBEDTLS_ERR_NET_SEND_FAILED:
+    case MBEDTLS_ERR_NET_RECV_FAILED:
+        return AVERROR(EIO);
+    case MBEDTLS_ERR_NET_CONN_RESET:
+    case MBEDTLS_ERR_SSL_PEER_CLOSE_NOTIFY:
+        av_log(h, AV_LOG_WARNING, "%s reported connection reset by peer\n", func_name);
+        return AVERROR_EOF;
+    default:
+        av_log(h, AV_LOG_ERROR, "%s returned -0x%x\n", func_name, -ret);
+        return AVERROR(EIO);
+    }
+}
+
+static int tls_read(URLContext *h, uint8_t *buf, int size)
+{
+    TLSContext *tls_ctx = h->priv_data;
+    int ret;
+
+    if ((ret = mbedtls_ssl_read(&tls_ctx->ssl_context, buf, size)) > 0) {
+        // return read length
+        return ret;
+    }
+
+    return handle_tls_error(h, "mbedtls_ssl_read", ret);
+}
+
+static int tls_write(URLContext *h, const uint8_t *buf, int size)
+{
+    TLSContext *tls_ctx = h->priv_data;
+    int ret;
+
+    if ((ret = mbedtls_ssl_write(&tls_ctx->ssl_context, buf, size)) > 0) {
+        // return written length
+        return ret;
+    }
+
+    return handle_tls_error(h, "mbedtls_ssl_write", ret);
+}
+
+static int tls_get_file_handle(URLContext *h)
+{
+    TLSContext *c = h->priv_data;
+    return ffurl_get_file_handle(c->tls_shared.tcp);
+}
+
+static const AVOption options[] = {
+    TLS_COMMON_OPTIONS(TLSContext, tls_shared), \
+    {"key_password", "Password for the private key file", OFFSET(priv_key_pw),  AV_OPT_TYPE_STRING, .flags = TLS_OPTFL }, \
+    { NULL }
+};
+
+static const AVClass tls_class = {
+    .class_name = "tls",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+const URLProtocol ff_tls_protocol = {
+    .name           = "tls",
+    .url_open2      = tls_open,
+    .url_read       = tls_read,
+    .url_write      = tls_write,
+    .url_close      = tls_close,
+    .url_get_file_handle = tls_get_file_handle,
+    .priv_data_size = sizeof(TLSContext),
+    .flags          = URL_PROTOCOL_FLAG_NETWORK,
+    .priv_data_class = &tls_class,
+};

diff --git a/libavformat/tls_openssl.c b/libavformat/tls_openssl.c
index 38af8a2..7ae71bd 100644
--- a/libavformat/tls_openssl.c
+++ b/libavformat/tls_openssl.c

@@ -66,6 +66,85 @@
 #endif
 #endif
 
+int ff_openssl_init(void)
+{
+    ff_lock_avformat();
+    if (!openssl_init) {
+        SSL_library_init();
+        SSL_load_error_strings();
+#if HAVE_THREADS
+        if (!CRYPTO_get_locking_callback()) {
+            int i;
+            openssl_mutexes = av_malloc_array(sizeof(pthread_mutex_t), CRYPTO_num_locks());
+            if (!openssl_mutexes) {
+                ff_unlock_avformat();
+                return AVERROR(ENOMEM);
+            }
+
+            for (i = 0; i < CRYPTO_num_locks(); i++)
+                pthread_mutex_init(&openssl_mutexes[i], NULL);
+            CRYPTO_set_locking_callback(openssl_lock);
+#if !defined(WIN32) && OPENSSL_VERSION_NUMBER < 0x10000000
+            CRYPTO_set_id_callback(openssl_thread_id);
+#endif
+        }
+#endif
+    }
+    openssl_init++;
+    ff_unlock_avformat();
+
+    return 0;
+}
+
+void ff_openssl_deinit(void)
+{
+    ff_lock_avformat();
+    openssl_init--;
+    if (!openssl_init) {
+#if HAVE_THREADS
+        if (CRYPTO_get_locking_callback() == openssl_lock) {
+            int i;
+            CRYPTO_set_locking_callback(NULL);
+            for (i = 0; i < CRYPTO_num_locks(); i++)
+                pthread_mutex_destroy(&openssl_mutexes[i]);
+            av_free(openssl_mutexes);
+        }
+#endif
+    }
+    ff_unlock_avformat();
+}
+
+static int print_tls_error(URLContext *h, int ret)
+{
+    TLSContext *c = h->priv_data;
+    if (h->flags & AVIO_FLAG_NONBLOCK) {
+        int err = SSL_get_error(c->ssl, ret);
+        if (err == SSL_ERROR_WANT_READ || err == SSL_ERROR_WANT_WRITE)
+            return AVERROR(EAGAIN);
+    }
+    av_log(h, AV_LOG_ERROR, "%s\n", ERR_error_string(ERR_get_error(), NULL));
+    return AVERROR(EIO);
+}
+
+static int tls_close(URLContext *h)
+{
+    TLSContext *c = h->priv_data;
+    if (c->ssl) {
+        SSL_shutdown(c->ssl);
+        SSL_free(c->ssl);
+    }
+    if (c->ctx)
+        SSL_CTX_free(c->ctx);
+    if (c->tls_shared.tcp)
+        ffurl_close(c->tls_shared.tcp);
+#if OPENSSL_VERSION_NUMBER >= 0x1010000fL
+    if (c->url_bio_method)
+        BIO_meth_free(c->url_bio_method);
+#endif
+    ff_openssl_deinit();
+    return 0;
+}
+
 static int url_bio_create(BIO *b)
 {
 #if OPENSSL_VERSION_NUMBER >= 0x1010000fL
@@ -98,6 +177,8 @@
     if (ret >= 0)
         return ret;
     BIO_clear_retry_flags(b);
+    if (ret == AVERROR(EAGAIN))
+        BIO_set_retry_read(b);
     if (ret == AVERROR_EXIT)
         return 0;
     return -1;
@@ -110,6 +191,8 @@
     if (ret >= 0)
         return ret;
     BIO_clear_retry_flags(b);
+    if (ret == AVERROR(EAGAIN))
+        BIO_set_retry_write(b);
     if (ret == AVERROR_EXIT)
         return 0;
     return -1;
@@ -143,79 +226,6 @@
 };
 #endif
 
-int ff_openssl_init(void)
-{
-    avpriv_lock_avformat();
-    if (!openssl_init) {
-        SSL_library_init();
-        SSL_load_error_strings();
-#if HAVE_THREADS
-        if (!CRYPTO_get_locking_callback()) {
-            int i;
-            openssl_mutexes = av_malloc_array(sizeof(pthread_mutex_t), CRYPTO_num_locks());
-            if (!openssl_mutexes) {
-                avpriv_unlock_avformat();
-                return AVERROR(ENOMEM);
-            }
-
-            for (i = 0; i < CRYPTO_num_locks(); i++)
-                pthread_mutex_init(&openssl_mutexes[i], NULL);
-            CRYPTO_set_locking_callback(openssl_lock);
-#if !defined(WIN32) && OPENSSL_VERSION_NUMBER < 0x10000000
-            CRYPTO_set_id_callback(openssl_thread_id);
-#endif
-        }
-#endif
-    }
-    openssl_init++;
-    avpriv_unlock_avformat();
-
-    return 0;
-}
-
-void ff_openssl_deinit(void)
-{
-    avpriv_lock_avformat();
-    openssl_init--;
-    if (!openssl_init) {
-#if HAVE_THREADS
-        if (CRYPTO_get_locking_callback() == openssl_lock) {
-            int i;
-            CRYPTO_set_locking_callback(NULL);
-            for (i = 0; i < CRYPTO_num_locks(); i++)
-                pthread_mutex_destroy(&openssl_mutexes[i]);
-            av_free(openssl_mutexes);
-        }
-#endif
-    }
-    avpriv_unlock_avformat();
-}
-
-static int print_tls_error(URLContext *h, int ret)
-{
-    av_log(h, AV_LOG_ERROR, "%s\n", ERR_error_string(ERR_get_error(), NULL));
-    return AVERROR(EIO);
-}
-
-static int tls_close(URLContext *h)
-{
-    TLSContext *c = h->priv_data;
-    if (c->ssl) {
-        SSL_shutdown(c->ssl);
-        SSL_free(c->ssl);
-    }
-    if (c->ctx)
-        SSL_CTX_free(c->ctx);
-    if (c->tls_shared.tcp)
-        ffurl_close(c->tls_shared.tcp);
-#if OPENSSL_VERSION_NUMBER >= 0x1010000fL
-    if (c->url_bio_method)
-        BIO_meth_free(c->url_bio_method);
-#endif
-    ff_openssl_deinit();
-    return 0;
-}
-
 static int tls_open(URLContext *h, const char *uri, int flags, AVDictionary **options)
 {
     TLSContext *p = h->priv_data;
@@ -302,7 +312,11 @@
 static int tls_read(URLContext *h, uint8_t *buf, int size)
 {
     TLSContext *c = h->priv_data;
-    int ret = SSL_read(c->ssl, buf, size);
+    int ret;
+    // Set or clear the AVIO_FLAG_NONBLOCK on c->tls_shared.tcp
+    c->tls_shared.tcp->flags &= ~AVIO_FLAG_NONBLOCK;
+    c->tls_shared.tcp->flags |= h->flags & AVIO_FLAG_NONBLOCK;
+    ret = SSL_read(c->ssl, buf, size);
     if (ret > 0)
         return ret;
     if (ret == 0)
@@ -313,7 +327,11 @@
 static int tls_write(URLContext *h, const uint8_t *buf, int size)
 {
     TLSContext *c = h->priv_data;
-    int ret = SSL_write(c->ssl, buf, size);
+    int ret;
+    // Set or clear the AVIO_FLAG_NONBLOCK on c->tls_shared.tcp
+    c->tls_shared.tcp->flags &= ~AVIO_FLAG_NONBLOCK;
+    c->tls_shared.tcp->flags |= h->flags & AVIO_FLAG_NONBLOCK;
+    ret = SSL_write(c->ssl, buf, size);
     if (ret > 0)
         return ret;
     if (ret == 0)
@@ -339,7 +357,7 @@
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
-const URLProtocol ff_tls_openssl_protocol = {
+const URLProtocol ff_tls_protocol = {
     .name           = "tls",
     .url_open2      = tls_open,
     .url_read       = tls_read,

diff --git a/libavformat/tls_schannel.c b/libavformat/tls_schannel.c
index 9f1c088..4f0badc 100644
--- a/libavformat/tls_schannel.c
+++ b/libavformat/tls_schannel.c

@@ -148,7 +148,7 @@
     TLSContext *c = h->priv_data;
     TLSShared *s = &c->tls_shared;
     SECURITY_STATUS sspi_ret;
-    SecBuffer outbuf[3];
+    SecBuffer outbuf[3] = { 0 };
     SecBufferDesc outbuf_desc;
     SecBuffer inbuf[2];
     SecBufferDesc inbuf_desc;
@@ -413,11 +413,13 @@
 
         ret = ffurl_read(s->tcp, c->enc_buf + c->enc_buf_offset,
                          c->enc_buf_size - c->enc_buf_offset);
-        if (ret < 0) {
+        if (ret == AVERROR_EOF) {
+            c->connection_closed = 1;
+            ret = 0;
+        } else if (ret < 0) {
             av_log(h, AV_LOG_ERROR, "Unable to read from socket\n");
             return ret;
-        } else if (ret == 0)
-            c->connection_closed = 1;
+        }
 
         c->enc_buf_offset += ret;
     }
@@ -515,7 +517,7 @@
     if (ret == 0 && !c->connection_closed)
         ret = AVERROR(EAGAIN);
 
-    return ret < 0 ? ret : 0;
+    return ret < 0 ? ret : AVERROR_EOF;
 }
 
 static int tls_write(URLContext *h, const uint8_t *buf, int len)
@@ -595,7 +597,7 @@
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
-const URLProtocol ff_tls_schannel_protocol = {
+const URLProtocol ff_tls_protocol = {
     .name           = "tls",
     .url_open2      = tls_open,
     .url_read       = tls_read,

diff --git a/libavformat/tls_securetransport.c b/libavformat/tls_securetransport.c
index bc8a320..3738054 100644
--- a/libavformat/tls_securetransport.c
+++ b/libavformat/tls_securetransport.c

@@ -54,7 +54,7 @@
     TLSContext *c = h->priv_data;
     switch (ret) {
     case errSSLWouldBlock:
-        break;
+        return AVERROR(EAGAIN);
     case errSSLXCertChainInvalid:
         av_log(h, AV_LOG_ERROR, "Invalid certificate chain\n");
         return AVERROR(EIO);
@@ -69,6 +69,9 @@
 
 static int import_pem(URLContext *h, char *path, CFArrayRef *array)
 {
+#if !HAVE_SECITEMIMPORT
+    return AVERROR_PATCHWELCOME;
+#else
     AVIOContext *s = NULL;
     CFDataRef data = NULL;
     int64_t ret = 0;
@@ -124,6 +127,7 @@
     if (s)
         avio_close(s);
     return ret;
+#endif
 }
 
 static int load_ca(URLContext *h)
@@ -193,7 +197,8 @@
 {
     URLContext *h = (URLContext*)connection;
     TLSContext *c = h->priv_data;
-    int read = ffurl_read_complete(c->tls_shared.tcp, data, *dataLength);
+    size_t requested = *dataLength;
+    int read = ffurl_read(c->tls_shared.tcp, data, requested);
     if (read <= 0) {
         *dataLength = 0;
         switch(AVUNERROR(read)) {
@@ -210,7 +215,10 @@
         }
     } else {
         *dataLength = read;
-        return noErr;
+        if (read < requested)
+            return errSSLWouldBlock;
+        else
+            return noErr;
     }
 }
 
@@ -322,12 +330,13 @@
             if (peerTrust)
                 CFRelease(peerTrust);
         }
-        if (status == noErr)
+        if (status == noErr) {
             break;
-
-        av_log(h, AV_LOG_ERROR, "Unable to negotiate TLS/SSL session: %i\n", (int)status);
-        ret = AVERROR(EIO);
-        goto fail;
+        } else if (status != errSSLWouldBlock) {
+            av_log(h, AV_LOG_ERROR, "Unable to negotiate TLS/SSL session: %i\n", (int)status);
+            ret = AVERROR(EIO);
+            goto fail;
+        }
     }
 
     return 0;
@@ -344,6 +353,9 @@
     case errSSLClosedGraceful:
     case errSSLClosedNoNotify:
         return 0;
+    case errSSLWouldBlock:
+        if (processed > 0)
+            return processed;
     default:
         return (int)status;
     }
@@ -352,8 +364,12 @@
 static int tls_read(URLContext *h, uint8_t *buf, int size)
 {
     TLSContext *c = h->priv_data;
-    size_t processed = 0;
-    int ret = SSLRead(c->ssl_context, buf, size, &processed);
+    size_t available = 0, processed = 0;
+    int ret;
+    SSLGetBufferedReadSize(c->ssl_context, &available);
+    if (available)
+        size = FFMIN(available, size);
+    ret = SSLRead(c->ssl_context, buf, size, &processed);
     ret = map_ssl_error(ret, processed);
     if (ret > 0)
         return ret;
@@ -393,7 +409,7 @@
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
-const URLProtocol ff_tls_securetransport_protocol = {
+const URLProtocol ff_tls_protocol = {
     .name           = "tls",
     .url_open2      = tls_open,
     .url_read       = tls_read,

diff --git a/libavformat/ttaenc.c b/libavformat/ttaenc.c
index fdce1e3..d8e1136 100644
--- a/libavformat/ttaenc.c
+++ b/libavformat/ttaenc.c

@@ -29,22 +29,23 @@
 
 typedef struct TTAMuxContext {
     AVIOContext *seek_table;
-    AVIOContext *data;
+    AVPacketList *queue, *queue_end;
     uint32_t nb_samples;
     int frame_size;
     int last_frame;
 } TTAMuxContext;
 
-static int tta_write_header(AVFormatContext *s)
+static int tta_init(AVFormatContext *s)
 {
     TTAMuxContext *tta = s->priv_data;
-    AVCodecParameters *par = s->streams[0]->codecpar;
-    int ret;
+    AVCodecParameters *par;
 
     if (s->nb_streams != 1) {
         av_log(s, AV_LOG_ERROR, "Only one stream is supported\n");
         return AVERROR(EINVAL);
     }
+    par = s->streams[0]->codecpar;
+
     if (par->codec_id != AV_CODEC_ID_TTA) {
         av_log(s, AV_LOG_ERROR, "Unsupported codec\n");
         return AVERROR(EINVAL);
@@ -54,22 +55,6 @@
         return AVERROR_INVALIDDATA;
     }
 
-    if ((ret = avio_open_dyn_buf(&tta->seek_table)) < 0)
-        return ret;
-    if ((ret = avio_open_dyn_buf(&tta->data)) < 0) {
-        ffio_free_dyn_buf(&tta->seek_table);
-        return ret;
-    }
-
-    /* Ignore most extradata information if present. It can be innacurate
-       if for example remuxing from Matroska */
-    ffio_init_checksum(s->pb, ff_crcEDB88320_update, UINT32_MAX);
-    ffio_init_checksum(tta->seek_table, ff_crcEDB88320_update, UINT32_MAX);
-    avio_write(s->pb, "TTA1", 4);
-    avio_wl16(s->pb, par->extradata ? AV_RL16(par->extradata + 4) : 1);
-    avio_wl16(s->pb, par->channels);
-    avio_wl16(s->pb, par->bits_per_raw_sample);
-    avio_wl32(s->pb, par->sample_rate);
     /* Prevent overflow */
     if (par->sample_rate > 0x7FFFFFu) {
         av_log(s, AV_LOG_ERROR, "Sample rate too large\n");
@@ -81,11 +66,39 @@
     return 0;
 }
 
+static int tta_write_header(AVFormatContext *s)
+{
+    TTAMuxContext *tta = s->priv_data;
+    AVCodecParameters *par = s->streams[0]->codecpar;
+    int ret;
+
+    if ((ret = avio_open_dyn_buf(&tta->seek_table)) < 0)
+        return ret;
+
+    /* Ignore most extradata information if present. It can be innacurate
+       if for example remuxing from Matroska */
+    ffio_init_checksum(s->pb, ff_crcEDB88320_update, UINT32_MAX);
+    ffio_init_checksum(tta->seek_table, ff_crcEDB88320_update, UINT32_MAX);
+    avio_write(s->pb, "TTA1", 4);
+    avio_wl16(s->pb, par->extradata ? AV_RL16(par->extradata + 4) : 1);
+    avio_wl16(s->pb, par->channels);
+    avio_wl16(s->pb, par->bits_per_raw_sample);
+    avio_wl32(s->pb, par->sample_rate);
+
+    return 0;
+}
+
 static int tta_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
     TTAMuxContext *tta = s->priv_data;
+    int ret;
 
-    avio_write(tta->data, pkt->data, pkt->size);
+    ret = ff_packet_list_put(&tta->queue, &tta->queue_end, pkt,
+                             FF_PACKETLIST_FLAG_REF_PACKET);
+    if (ret < 0) {
+        return ret;
+    }
+
     avio_wl32(tta->seek_table, pkt->size);
     tta->nb_samples += pkt->duration;
 
@@ -106,6 +119,18 @@
     return 0;
 }
 
+static void tta_queue_flush(AVFormatContext *s)
+{
+    TTAMuxContext *tta = s->priv_data;
+    AVPacket pkt;
+
+    while (tta->queue) {
+        ff_packet_list_get(&tta->queue, &tta->queue_end, &pkt);
+        avio_write(s->pb, pkt.data, pkt.size);
+        av_packet_unref(&pkt);
+    }
+}
+
 static int tta_write_trailer(AVFormatContext *s)
 {
     TTAMuxContext *tta = s->priv_data;
@@ -125,9 +150,7 @@
     av_free(ptr);
 
     /* Write audio data */
-    size = avio_close_dyn_buf(tta->data, &ptr);
-    avio_write(s->pb, ptr, size);
-    av_free(ptr);
+    tta_queue_flush(s);
 
     ff_ape_write_tag(s);
     avio_flush(s->pb);
@@ -143,6 +166,7 @@
     .priv_data_size    = sizeof(TTAMuxContext),
     .audio_codec       = AV_CODEC_ID_TTA,
     .video_codec       = AV_CODEC_ID_NONE,
+    .init              = tta_init,
     .write_header      = tta_write_header,
     .write_packet      = tta_write_packet,
     .write_trailer     = tta_write_trailer,

diff --git a/libavformat/ty.c b/libavformat/ty.c
new file mode 100644
index 0000000..8230d3a
--- /dev/null
+++ b/libavformat/ty.c

@@ -0,0 +1,789 @@
+/*
+ * TiVo ty stream demuxer
+ * Copyright (c) 2005 VLC authors and VideoLAN
+ * Copyright (c) 2005 by Neal Symms (tivo@freakinzoo.com) - February 2005
+ * based on code by Christopher Wingert for tivo-mplayer
+ * tivo(at)wingert.org, February 2003
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "avformat.h"
+#include "internal.h"
+#include "mpeg.h"
+
+#define SERIES1_PES_LENGTH  11    /* length of audio PES hdr on S1 */
+#define SERIES2_PES_LENGTH  16    /* length of audio PES hdr on S2 */
+#define AC3_PES_LENGTH      14    /* length of audio PES hdr for AC3 */
+#define VIDEO_PES_LENGTH    16    /* length of video PES header */
+#define DTIVO_PTS_OFFSET    6     /* offs into PES for MPEG PTS on DTivo */
+#define SA_PTS_OFFSET       9     /* offset into PES for MPEG PTS on SA */
+#define AC3_PTS_OFFSET      9     /* offset into PES for AC3 PTS on DTivo */
+#define VIDEO_PTS_OFFSET    9     /* offset into PES for video PTS on all */
+#define AC3_PKT_LENGTH      1536  /* size of TiVo AC3 pkts (w/o PES hdr) */
+
+static const uint8_t ty_VideoPacket[]     = { 0x00, 0x00, 0x01, 0xe0 };
+static const uint8_t ty_MPEGAudioPacket[] = { 0x00, 0x00, 0x01, 0xc0 };
+static const uint8_t ty_AC3AudioPacket[]  = { 0x00, 0x00, 0x01, 0xbd };
+
+#define TIVO_PES_FILEID   0xf5467abd
+#define CHUNK_SIZE        (128 * 1024)
+#define CHUNK_PEEK_COUNT  3      /* number of chunks to probe */
+
+typedef struct TyRecHdr {
+    int64_t   rec_size;
+    uint8_t   ex[2];
+    uint8_t   rec_type;
+    uint8_t   subrec_type;
+    uint64_t  ty_pts;            /* TY PTS in the record header */
+} TyRecHdr;
+
+typedef enum {
+    TIVO_TYPE_UNKNOWN,
+    TIVO_TYPE_SA,
+    TIVO_TYPE_DTIVO
+} TiVo_type;
+
+typedef enum {
+    TIVO_SERIES_UNKNOWN,
+    TIVO_SERIES1,
+    TIVO_SERIES2
+} TiVo_series;
+
+typedef enum {
+    TIVO_AUDIO_UNKNOWN,
+    TIVO_AUDIO_AC3,
+    TIVO_AUDIO_MPEG
+} TiVo_audio;
+
+typedef struct TySeqTable {
+    uint64_t    timestamp;
+    uint8_t     chunk_bitmask[8];
+} TySeqTable;
+
+typedef struct TYDemuxContext {
+    unsigned        cur_chunk;
+    unsigned        cur_chunk_pos;
+    int64_t         cur_pos;
+    TiVo_type       tivo_type;        /* TiVo type (SA / DTiVo) */
+    TiVo_series     tivo_series;      /* Series1 or Series2 */
+    TiVo_audio      audio_type;       /* AC3 or MPEG */
+    int             pes_length;       /* Length of Audio PES header */
+    int             pts_offset;       /* offset into audio PES of PTS */
+    uint8_t         pes_buffer[20];   /* holds incomplete pes headers */
+    int             pes_buf_cnt;      /* how many bytes in our buffer */
+    size_t          ac3_pkt_size;     /* length of ac3 pkt we've seen so far */
+    uint64_t        last_ty_pts;      /* last TY timestamp we've seen */
+    unsigned        seq_table_size;   /* number of entries in SEQ table */
+
+    int64_t         first_audio_pts;
+    int64_t         last_audio_pts;
+    int64_t         last_video_pts;
+
+    TyRecHdr       *rec_hdrs;         /* record headers array */
+    int             cur_rec;          /* current record in this chunk */
+    int             num_recs;         /* number of recs in this chunk */
+    int             seq_rec;          /* record number where seq start is */
+    TySeqTable     *seq_table;        /* table of SEQ entries from mstr chk */
+    int             first_chunk;
+
+    uint8_t         chunk[CHUNK_SIZE];
+} TYDemuxContext;
+
+static int ty_probe(AVProbeData *p)
+{
+    int i;
+
+    for (i = 0; i + 12 < p->buf_size; i += CHUNK_SIZE) {
+        if (AV_RB32(p->buf + i) == TIVO_PES_FILEID &&
+            AV_RB32(p->buf + i + 4) == 0x02 &&
+            AV_RB32(p->buf + i + 8) == CHUNK_SIZE) {
+            return AVPROBE_SCORE_MAX;
+        }
+    }
+
+    return 0;
+}
+
+static TyRecHdr *parse_chunk_headers(const uint8_t *buf,
+                                     int num_recs)
+{
+    TyRecHdr *hdrs, *rec_hdr;
+    int i;
+
+    hdrs = av_calloc(num_recs, sizeof(TyRecHdr));
+    if (!hdrs)
+        return NULL;
+
+    for (i = 0; i < num_recs; i++) {
+        const uint8_t *record_header = buf + (i * 16);
+
+        rec_hdr = &hdrs[i];     /* for brevity */
+        rec_hdr->rec_type = record_header[3];
+        rec_hdr->subrec_type = record_header[2] & 0x0f;
+        if ((record_header[0] & 0x80) == 0x80) {
+            uint8_t b1, b2;
+
+            /* marker bit 2 set, so read extended data */
+            b1 = (((record_header[0] & 0x0f) << 4) |
+                  ((record_header[1] & 0xf0) >> 4));
+            b2 = (((record_header[1] & 0x0f) << 4) |
+                  ((record_header[2] & 0xf0) >> 4));
+
+            rec_hdr->ex[0] = b1;
+            rec_hdr->ex[1] = b2;
+            rec_hdr->rec_size = 0;
+            rec_hdr->ty_pts = 0;
+        } else {
+            rec_hdr->rec_size = (record_header[0] << 8 |
+                                 record_header[1]) << 4 |
+                                (record_header[2] >> 4);
+            rec_hdr->ty_pts = AV_RB64(&record_header[8]);
+        }
+    }
+    return hdrs;
+}
+
+static int find_es_header(const uint8_t *header,
+                          const uint8_t *buffer, int search_len)
+{
+    int count;
+
+    for (count = 0; count < search_len; count++) {
+        if (!memcmp(&buffer[count], header, 4))
+            return count;
+    }
+    return -1;
+}
+
+static int analyze_chunk(AVFormatContext *s, const uint8_t *chunk)
+{
+    TYDemuxContext *ty = s->priv_data;
+    int num_recs, i;
+    TyRecHdr *hdrs;
+    int num_6e0, num_be0, num_9c0, num_3c0;
+
+    /* skip if it's a Part header */
+    if (AV_RB32(&chunk[0]) == TIVO_PES_FILEID)
+        return 0;
+
+    /* number of records in chunk (we ignore high order byte;
+     * rarely are there > 256 chunks & we don't need that many anyway) */
+    num_recs = chunk[0];
+    if (num_recs < 5) {
+        /* try again with the next chunk.  Sometimes there are dead ones */
+        return 0;
+    }
+
+    chunk += 4;       /* skip past rec count & SEQ bytes */
+    ff_dlog(s, "probe: chunk has %d recs\n", num_recs);
+    hdrs = parse_chunk_headers(chunk, num_recs);
+    if (!hdrs)
+        return AVERROR(ENOMEM);
+
+    /* scan headers.
+     * 1. check video packets.  Presence of 0x6e0 means S1.
+     *    No 6e0 but have be0 means S2.
+     * 2. probe for audio 0x9c0 vs 0x3c0 (AC3 vs Mpeg)
+     *    If AC-3, then we have DTivo.
+     *    If MPEG, search for PTS offset.  This will determine SA vs. DTivo.
+     */
+    num_6e0 = num_be0 = num_9c0 = num_3c0 = 0;
+    for (i = 0; i < num_recs; i++) {
+        switch (hdrs[i].subrec_type << 8 | hdrs[i].rec_type) {
+        case 0x6e0:
+            num_6e0++;
+            break;
+        case 0xbe0:
+            num_be0++;
+            break;
+        case 0x3c0:
+            num_3c0++;
+            break;
+        case 0x9c0:
+            num_9c0++;
+            break;
+        }
+    }
+    ff_dlog(s, "probe: chunk has %d 0x6e0 recs, %d 0xbe0 recs.\n",
+            num_6e0, num_be0);
+
+    /* set up our variables */
+    if (num_6e0 > 0) {
+        ff_dlog(s, "detected Series 1 Tivo\n");
+        ty->tivo_series = TIVO_SERIES1;
+        ty->pes_length = SERIES1_PES_LENGTH;
+    } else if (num_be0 > 0) {
+        ff_dlog(s, "detected Series 2 Tivo\n");
+        ty->tivo_series = TIVO_SERIES2;
+        ty->pes_length = SERIES2_PES_LENGTH;
+    }
+    if (num_9c0 > 0) {
+        ff_dlog(s, "detected AC-3 Audio (DTivo)\n");
+        ty->audio_type = TIVO_AUDIO_AC3;
+        ty->tivo_type = TIVO_TYPE_DTIVO;
+        ty->pts_offset = AC3_PTS_OFFSET;
+        ty->pes_length = AC3_PES_LENGTH;
+    } else if (num_3c0 > 0) {
+        ty->audio_type = TIVO_AUDIO_MPEG;
+        ff_dlog(s, "detected MPEG Audio\n");
+    }
+
+    /* if tivo_type still unknown, we can check PTS location
+     * in MPEG packets to determine tivo_type */
+    if (ty->tivo_type == TIVO_TYPE_UNKNOWN) {
+        uint32_t data_offset = 16 * num_recs;
+
+        for (i = 0; i < num_recs; i++) {
+            if (data_offset + hdrs[i].rec_size > CHUNK_SIZE)
+                break;
+
+            if ((hdrs[i].subrec_type << 8 | hdrs[i].rec_type) == 0x3c0 && hdrs[i].rec_size > 15) {
+                /* first make sure we're aligned */
+                int pes_offset = find_es_header(ty_MPEGAudioPacket,
+                        &chunk[data_offset], 5);
+                if (pes_offset >= 0) {
+                    /* pes found. on SA, PES has hdr data at offset 6, not PTS. */
+                    if ((chunk[data_offset + 6 + pes_offset] & 0x80) == 0x80) {
+                        /* S1SA or S2(any) Mpeg Audio (PES hdr, not a PTS start) */
+                        if (ty->tivo_series == TIVO_SERIES1)
+                            ff_dlog(s, "detected Stand-Alone Tivo\n");
+                        ty->tivo_type = TIVO_TYPE_SA;
+                        ty->pts_offset = SA_PTS_OFFSET;
+                    } else {
+                        if (ty->tivo_series == TIVO_SERIES1)
+                            ff_dlog(s, "detected DirecTV Tivo\n");
+                        ty->tivo_type = TIVO_TYPE_DTIVO;
+                        ty->pts_offset = DTIVO_PTS_OFFSET;
+                    }
+                    break;
+                }
+            }
+            data_offset += hdrs[i].rec_size;
+        }
+    }
+    av_free(hdrs);
+
+    return 0;
+}
+
+static int ty_read_header(AVFormatContext *s)
+{
+    TYDemuxContext *ty = s->priv_data;
+    AVIOContext *pb = s->pb;
+    AVStream *st, *ast;
+    int i, ret = 0;
+
+    ty->first_audio_pts = AV_NOPTS_VALUE;
+    ty->last_audio_pts = AV_NOPTS_VALUE;
+    ty->last_video_pts = AV_NOPTS_VALUE;
+
+    for (i = 0; i < CHUNK_PEEK_COUNT; i++) {
+        avio_read(pb, ty->chunk, CHUNK_SIZE);
+
+        ret = analyze_chunk(s, ty->chunk);
+        if (ret < 0)
+            return ret;
+        if (ty->tivo_series != TIVO_SERIES_UNKNOWN &&
+            ty->audio_type  != TIVO_AUDIO_UNKNOWN &&
+            ty->tivo_type   != TIVO_TYPE_UNKNOWN)
+            break;
+    }
+
+    if (ty->tivo_series == TIVO_SERIES_UNKNOWN ||
+        ty->audio_type == TIVO_AUDIO_UNKNOWN ||
+        ty->tivo_type == TIVO_TYPE_UNKNOWN)
+        return AVERROR(EIO);
+
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+    st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
+    st->codecpar->codec_id   = AV_CODEC_ID_MPEG2VIDEO;
+    st->need_parsing         = AVSTREAM_PARSE_FULL_RAW;
+    avpriv_set_pts_info(st, 64, 1, 90000);
+
+    ast = avformat_new_stream(s, NULL);
+    if (!ast)
+        return AVERROR(ENOMEM);
+    ast->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
+
+    if (ty->audio_type == TIVO_AUDIO_MPEG) {
+        ast->codecpar->codec_id = AV_CODEC_ID_MP2;
+        ast->need_parsing       = AVSTREAM_PARSE_FULL_RAW;
+    } else {
+        ast->codecpar->codec_id = AV_CODEC_ID_AC3;
+    }
+    avpriv_set_pts_info(ast, 64, 1, 90000);
+
+    ty->first_chunk = 1;
+
+    avio_seek(pb, 0, SEEK_SET);
+
+    return 0;
+}
+
+/* parse a master chunk, filling the SEQ table and other variables.
+ * We assume the stream is currently pointing to it.
+ */
+static void parse_master(AVFormatContext *s)
+{
+    TYDemuxContext *ty = s->priv_data;
+    unsigned map_size;  /* size of bitmask, in bytes */
+    unsigned i, j;
+
+    /* Note that the entries in the SEQ table in the stream may have
+       different sizes depending on the bits per entry.  We store them
+       all in the same size structure, so we have to parse them out one
+       by one.  If we had a dynamic structure, we could simply read the
+       entire table directly from the stream into memory in place. */
+
+    /* clear the SEQ table */
+    av_freep(&ty->seq_table);
+
+    /* parse header info */
+
+    map_size = AV_RB32(ty->chunk + 20);  /* size of bitmask, in bytes */
+    i = AV_RB32(ty->chunk + 28);   /* size of SEQ table, in bytes */
+
+    ty->seq_table_size = i / (8LL + map_size);
+
+    if (ty->seq_table_size == 0) {
+        ty->seq_table = NULL;
+        return;
+    }
+
+    /* parse all the entries */
+    ty->seq_table = av_calloc(ty->seq_table_size, sizeof(TySeqTable));
+    if (ty->seq_table == NULL) {
+        ty->seq_table_size = 0;
+        return;
+    }
+
+    ty->cur_chunk_pos = 32;
+    for (j = 0; j < ty->seq_table_size; j++) {
+        if (ty->cur_chunk_pos >= CHUNK_SIZE - 8)
+            return;
+        ty->seq_table[j].timestamp = AV_RB64(ty->chunk + ty->cur_chunk_pos);
+        ty->cur_chunk_pos += 8;
+        if (map_size > 8) {
+            av_log(s, AV_LOG_ERROR, "Unsupported SEQ bitmap size in master chunk.\n");
+            ty->cur_chunk_pos += map_size;
+        } else {
+            memcpy(ty->seq_table[j].chunk_bitmask, ty->chunk + ty->cur_chunk_pos, map_size);
+        }
+    }
+}
+
+static int get_chunk(AVFormatContext *s)
+{
+    TYDemuxContext *ty = s->priv_data;
+    AVIOContext *pb = s->pb;
+    int read_size, num_recs;
+
+    ff_dlog(s, "parsing ty chunk #%d\n", ty->cur_chunk);
+
+    /* if we have left-over filler space from the last chunk, get that */
+    if (avio_feof(pb))
+        return AVERROR_EOF;
+
+    /* read the TY packet header */
+    read_size = avio_read(pb, ty->chunk, CHUNK_SIZE);
+    ty->cur_chunk++;
+
+    if ((read_size < 4) || (AV_RB32(ty->chunk) == 0)) {
+        return AVERROR_EOF;
+    }
+
+    /* check if it's a PART Header */
+    if (AV_RB32(ty->chunk) == TIVO_PES_FILEID) {
+        parse_master(s); /* parse master chunk */
+        return get_chunk(s);
+    }
+
+    /* number of records in chunk (8- or 16-bit number) */
+    if (ty->chunk[3] & 0x80) {
+        /* 16 bit rec cnt */
+        ty->num_recs = num_recs = (ty->chunk[1] << 8) + ty->chunk[0];
+        ty->seq_rec = (ty->chunk[3] << 8) + ty->chunk[2];
+        if (ty->seq_rec != 0xffff) {
+            ty->seq_rec &= ~0x8000;
+        }
+    } else {
+        /* 8 bit reclen - TiVo 1.3 format */
+        ty->num_recs = num_recs = ty->chunk[0];
+        ty->seq_rec = ty->chunk[1];
+    }
+    ty->cur_rec = 0;
+    ty->first_chunk = 0;
+
+    ff_dlog(s, "chunk has %d records\n", num_recs);
+    ty->cur_chunk_pos = 4;
+
+    av_freep(&ty->rec_hdrs);
+
+    if (num_recs * 16 >= CHUNK_SIZE - 4)
+        return AVERROR_INVALIDDATA;
+
+    ty->rec_hdrs = parse_chunk_headers(ty->chunk + 4, num_recs);
+    if (!ty->rec_hdrs)
+        return AVERROR(ENOMEM);
+    ty->cur_chunk_pos += 16 * num_recs;
+
+    return 0;
+}
+
+static int demux_video(AVFormatContext *s, TyRecHdr *rec_hdr, AVPacket *pkt)
+{
+    TYDemuxContext *ty = s->priv_data;
+    const int subrec_type = rec_hdr->subrec_type;
+    const int64_t rec_size = rec_hdr->rec_size;
+    int es_offset1;
+    int got_packet = 0;
+
+    if (subrec_type != 0x02 && subrec_type != 0x0c &&
+        subrec_type != 0x08 && rec_size > 4) {
+        /* get the PTS from this packet if it has one.
+         * on S1, only 0x06 has PES.  On S2, however, most all do.
+         * Do NOT Pass the PES Header to the MPEG2 codec */
+        es_offset1 = find_es_header(ty_VideoPacket, ty->chunk + ty->cur_chunk_pos, 5);
+        if (es_offset1 != -1) {
+            ty->last_video_pts = ff_parse_pes_pts(
+                    ty->chunk + ty->cur_chunk_pos + es_offset1 + VIDEO_PTS_OFFSET);
+            if (subrec_type != 0x06) {
+                /* if we found a PES, and it's not type 6, then we're S2 */
+                /* The packet will have video data (& other headers) so we
+                 * chop out the PES header and send the rest */
+                if (rec_size >= VIDEO_PES_LENGTH + es_offset1) {
+                    int size = rec_hdr->rec_size - VIDEO_PES_LENGTH - es_offset1;
+
+                    ty->cur_chunk_pos += VIDEO_PES_LENGTH + es_offset1;
+                    if (av_new_packet(pkt, size) < 0)
+                        return AVERROR(ENOMEM);
+                    memcpy(pkt->data, ty->chunk + ty->cur_chunk_pos, size);
+                    ty->cur_chunk_pos += size;
+                    pkt->stream_index = 0;
+                    got_packet = 1;
+                } else {
+                    ff_dlog(s, "video rec type 0x%02x has short PES"
+                        " (%"PRId64" bytes)\n", subrec_type, rec_size);
+                    /* nuke this block; it's too short, but has PES marker */
+                    ty->cur_chunk_pos += rec_size;
+                    return 0;
+                }
+            }
+        }
+    }
+
+    if (subrec_type == 0x06) {
+        /* type 6 (S1 DTivo) has no data, so we're done */
+        ty->cur_chunk_pos += rec_size;
+        return 0;
+    }
+
+    if (!got_packet) {
+        if (av_new_packet(pkt, rec_size) < 0)
+            return AVERROR(ENOMEM);
+        memcpy(pkt->data, ty->chunk + ty->cur_chunk_pos, rec_size);
+        ty->cur_chunk_pos += rec_size;
+        pkt->stream_index = 0;
+        got_packet = 1;
+    }
+
+    /* if it's not a continue blk, then set PTS */
+    if (subrec_type != 0x02) {
+        if (subrec_type == 0x0c && pkt->size >= 6)
+            pkt->data[5] |= 0x08;
+        if (subrec_type == 0x07) {
+            ty->last_ty_pts = rec_hdr->ty_pts;
+        } else {
+            /* yes I know this is a cheap hack.  It's the timestamp
+               used for display and skipping fwd/back, so it
+               doesn't have to be accurate to the millisecond.
+               I adjust it here by roughly one 1/30 sec.  Yes it
+               will be slightly off for UK streams, but it's OK.
+             */
+            ty->last_ty_pts += 35000000;
+            //ty->last_ty_pts += 33366667;
+        }
+        /* set PTS for this block before we send */
+        if (ty->last_video_pts > AV_NOPTS_VALUE) {
+            pkt->pts = ty->last_video_pts;
+            /* PTS gets used ONCE.
+             * Any subsequent frames we get BEFORE next PES
+             * header will have their PTS computed in the codec */
+            ty->last_video_pts = AV_NOPTS_VALUE;
+        }
+    }
+
+    return got_packet;
+}
+
+static int check_sync_pes(AVFormatContext *s, AVPacket *pkt,
+                          int32_t offset, int32_t rec_len)
+{
+    TYDemuxContext *ty = s->priv_data;
+
+    if (offset < 0 || offset + ty->pes_length > rec_len) {
+        /* entire PES header not present */
+        ff_dlog(s, "PES header at %"PRId32" not complete in record. storing.\n", offset);
+        /* save the partial pes header */
+        if (offset < 0) {
+            /* no header found, fake some 00's (this works, believe me) */
+            memset(ty->pes_buffer, 0, 4);
+            ty->pes_buf_cnt = 4;
+            if (rec_len > 4)
+                ff_dlog(s, "PES header not found in record of %"PRId32" bytes!\n", rec_len);
+            return -1;
+        }
+        /* copy the partial pes header we found */
+        memcpy(ty->pes_buffer, pkt->data + offset, rec_len - offset);
+        ty->pes_buf_cnt = rec_len - offset;
+
+        if (offset > 0) {
+            /* PES Header was found, but not complete, so trim the end of this record */
+            pkt->size -= rec_len - offset;
+            return 1;
+        }
+        return -1;    /* partial PES, no audio data */
+    }
+    /* full PES header present, extract PTS */
+    ty->last_audio_pts = ff_parse_pes_pts(&pkt->data[ offset + ty->pts_offset]);
+    if (ty->first_audio_pts == AV_NOPTS_VALUE)
+        ty->first_audio_pts = ty->last_audio_pts;
+    pkt->pts = ty->last_audio_pts;
+    memmove(pkt->data + offset, pkt->data + offset + ty->pes_length, rec_len - ty->pes_length);
+    pkt->size -= ty->pes_length;
+    return 0;
+}
+
+static int demux_audio(AVFormatContext *s, TyRecHdr *rec_hdr, AVPacket *pkt)
+{
+    TYDemuxContext *ty = s->priv_data;
+    const int subrec_type = rec_hdr->subrec_type;
+    const int64_t rec_size = rec_hdr->rec_size;
+    int es_offset1;
+
+    if (subrec_type == 2) {
+        int need = 0;
+        /* SA or DTiVo Audio Data, no PES (continued block)
+         * ================================================
+         */
+
+        /* continue PES if previous was incomplete */
+        if (ty->pes_buf_cnt > 0) {
+            need = ty->pes_length - ty->pes_buf_cnt;
+
+            ff_dlog(s, "continuing PES header\n");
+            /* do we have enough data to complete? */
+            if (need >= rec_size) {
+                /* don't have complete PES hdr; save what we have and return */
+                memcpy(ty->pes_buffer + ty->pes_buf_cnt, ty->chunk + ty->cur_chunk_pos, rec_size);
+                ty->cur_chunk_pos += rec_size;
+                ty->pes_buf_cnt += rec_size;
+                return 0;
+            }
+
+            /* we have enough; reconstruct this frame with the new hdr */
+            memcpy(ty->pes_buffer + ty->pes_buf_cnt, ty->chunk + ty->cur_chunk_pos, need);
+            ty->cur_chunk_pos += need;
+            /* get the PTS out of this PES header (MPEG or AC3) */
+            if (ty->audio_type == TIVO_AUDIO_MPEG) {
+                es_offset1 = find_es_header(ty_MPEGAudioPacket,
+                        ty->pes_buffer, 5);
+            } else {
+                es_offset1 = find_es_header(ty_AC3AudioPacket,
+                        ty->pes_buffer, 5);
+            }
+            if (es_offset1 < 0) {
+                ff_dlog(s, "Can't find audio PES header in packet.\n");
+            } else {
+                ty->last_audio_pts = ff_parse_pes_pts(
+                    &ty->pes_buffer[es_offset1 + ty->pts_offset]);
+                pkt->pts = ty->last_audio_pts;
+            }
+            ty->pes_buf_cnt = 0;
+
+        }
+        if (av_new_packet(pkt, rec_size - need) < 0)
+            return AVERROR(ENOMEM);
+        memcpy(pkt->data, ty->chunk + ty->cur_chunk_pos, rec_size - need);
+        ty->cur_chunk_pos += rec_size - need;
+        pkt->stream_index = 1;
+
+        /* S2 DTivo has AC3 packets with 2 padding bytes at end.  This is
+         * not allowed in the AC3 spec and will cause problems.  So here
+         * we try to trim things. */
+        /* Also, S1 DTivo has alternating short / long AC3 packets.  That
+         * is, one packet is short (incomplete) and the next packet has
+         * the first one's missing data, plus all of its own.  Strange. */
+        if (ty->audio_type == TIVO_AUDIO_AC3 &&
+                ty->tivo_series == TIVO_SERIES2) {
+            if (ty->ac3_pkt_size + pkt->size > AC3_PKT_LENGTH) {
+                pkt->size -= 2;
+                ty->ac3_pkt_size = 0;
+            } else {
+                ty->ac3_pkt_size += pkt->size;
+            }
+        }
+    } else if (subrec_type == 0x03) {
+        if (av_new_packet(pkt, rec_size) < 0)
+            return AVERROR(ENOMEM);
+        memcpy(pkt->data, ty->chunk + ty->cur_chunk_pos, rec_size);
+        ty->cur_chunk_pos += rec_size;
+        pkt->stream_index = 1;
+        /* MPEG Audio with PES Header, either SA or DTiVo   */
+        /* ================================================ */
+        es_offset1 = find_es_header(ty_MPEGAudioPacket, pkt->data, 5);
+
+        /* SA PES Header, No Audio Data                     */
+        /* ================================================ */
+        if ((es_offset1 == 0) && (rec_size == 16)) {
+            ty->last_audio_pts = ff_parse_pes_pts(&pkt->data[SA_PTS_OFFSET]);
+            if (ty->first_audio_pts == AV_NOPTS_VALUE)
+                ty->first_audio_pts = ty->last_audio_pts;
+            av_packet_unref(pkt);
+            return 0;
+        }
+        /* DTiVo Audio with PES Header                      */
+        /* ================================================ */
+
+        /* Check for complete PES */
+        if (check_sync_pes(s, pkt, es_offset1, rec_size) == -1) {
+            /* partial PES header found, nothing else.
+             * we're done. */
+            av_packet_unref(pkt);
+            return 0;
+        }
+    } else if (subrec_type == 0x04) {
+        /* SA Audio with no PES Header                      */
+        /* ================================================ */
+        if (av_new_packet(pkt, rec_size) < 0)
+            return AVERROR(ENOMEM);
+        memcpy(pkt->data, ty->chunk + ty->cur_chunk_pos, rec_size);
+        ty->cur_chunk_pos += rec_size;
+        pkt->stream_index = 1;
+        pkt->pts = ty->last_audio_pts;
+    } else if (subrec_type == 0x09) {
+        if (av_new_packet(pkt, rec_size) < 0)
+            return AVERROR(ENOMEM);
+        memcpy(pkt->data, ty->chunk + ty->cur_chunk_pos, rec_size);
+        ty->cur_chunk_pos += rec_size ;
+        pkt->stream_index = 1;
+
+        /* DTiVo AC3 Audio Data with PES Header             */
+        /* ================================================ */
+        es_offset1 = find_es_header(ty_AC3AudioPacket, pkt->data, 5);
+
+        /* Check for complete PES */
+        if (check_sync_pes(s, pkt, es_offset1, rec_size) == -1) {
+            /* partial PES header found, nothing else.  we're done. */
+            av_packet_unref(pkt);
+            return 0;
+        }
+        /* S2 DTivo has invalid long AC3 packets */
+        if (ty->tivo_series == TIVO_SERIES2) {
+            if (pkt->size > AC3_PKT_LENGTH) {
+                pkt->size -= 2;
+                ty->ac3_pkt_size = 0;
+            } else {
+                ty->ac3_pkt_size = pkt->size;
+            }
+        }
+    } else {
+        /* Unsupported/Unknown */
+        ty->cur_chunk_pos += rec_size;
+        return 0;
+    }
+
+    return 1;
+}
+
+static int ty_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    TYDemuxContext *ty = s->priv_data;
+    AVIOContext *pb = s->pb;
+    TyRecHdr *rec;
+    int64_t rec_size = 0;
+    int ret = 0;
+
+    if (avio_feof(pb))
+        return AVERROR_EOF;
+
+    while (ret <= 0) {
+        if (!ty->rec_hdrs || ty->first_chunk || ty->cur_rec >= ty->num_recs) {
+            if (get_chunk(s) < 0 || ty->num_recs <= 0)
+                return AVERROR_EOF;
+        }
+
+        rec = &ty->rec_hdrs[ty->cur_rec];
+        rec_size = rec->rec_size;
+        ty->cur_rec++;
+
+        if (rec_size <= 0)
+            continue;
+
+        if (ty->cur_chunk_pos + rec->rec_size > CHUNK_SIZE)
+            return AVERROR_INVALIDDATA;
+
+        if (avio_feof(pb))
+            return AVERROR_EOF;
+
+        switch (rec->rec_type) {
+        case VIDEO_ID:
+            ret = demux_video(s, rec, pkt);
+            break;
+        case AUDIO_ID:
+            ret = demux_audio(s, rec, pkt);
+            break;
+        default:
+            ff_dlog(s, "Invalid record type 0x%02x\n", rec->rec_type);
+        case 0x01:
+        case 0x02:
+        case 0x03: /* TiVo data services */
+        case 0x05: /* unknown, but seen regularly */
+            ty->cur_chunk_pos += rec->rec_size;
+            break;
+        }
+    }
+
+    return 0;
+}
+
+static int ty_read_close(AVFormatContext *s)
+{
+    TYDemuxContext *ty = s->priv_data;
+
+    av_freep(&ty->seq_table);
+    av_freep(&ty->rec_hdrs);
+
+    return 0;
+}
+
+AVInputFormat ff_ty_demuxer = {
+    .name           = "ty",
+    .long_name      = NULL_IF_CONFIG_SMALL("TiVo TY Stream"),
+    .priv_data_size = sizeof(TYDemuxContext),
+    .read_probe     = ty_probe,
+    .read_header    = ty_read_header,
+    .read_packet    = ty_read_packet,
+    .read_close     = ty_read_close,
+    .extensions     = "ty,ty+",
+    .flags          = AVFMT_TS_DISCONT,
+};

diff --git a/libavformat/udp.c b/libavformat/udp.c
index 3835f98..cf73d33 100644
--- a/libavformat/udp.c
+++ b/libavformat/udp.c

@@ -41,6 +41,7 @@
 #include "network.h"
 #include "os_support.h"
 #include "url.h"
+#include "ip.h"
 
 #ifdef __APPLE__
 #include "TargetConditionals.h"
@@ -64,10 +65,6 @@
 #include <pthread.h>
 #endif
 
-#ifndef HAVE_PTHREAD_CANCEL
-#define HAVE_PTHREAD_CANCEL 0
-#endif
-
 #ifndef IPV6_ADD_MEMBERSHIP
 #define IPV6_ADD_MEMBERSHIP IPV6_JOIN_GROUP
 #define IPV6_DROP_MEMBERSHIP IPV6_LEAVE_GROUP
@@ -113,6 +110,7 @@
     struct sockaddr_storage local_addr_storage;
     char *sources;
     char *block;
+    IPSourceFilters filters;
 } UDPContext;
 
 #define OFFSET(x) offsetof(UDPContext, x)
@@ -154,20 +152,13 @@
     .version        = LIBAVUTIL_VERSION_INT,
 };
 
-static void log_net_error(void *ctx, int level, const char* prefix)
-{
-    char errbuf[100];
-    av_strerror(ff_neterrno(), errbuf, sizeof(errbuf));
-    av_log(ctx, level, "%s: %s\n", prefix, errbuf);
-}
-
 static int udp_set_multicast_ttl(int sockfd, int mcastTTL,
                                  struct sockaddr *addr)
 {
 #ifdef IP_MULTICAST_TTL
     if (addr->sa_family == AF_INET) {
         if (setsockopt(sockfd, IPPROTO_IP, IP_MULTICAST_TTL, &mcastTTL, sizeof(mcastTTL)) < 0) {
-            log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IP_MULTICAST_TTL)");
+            ff_log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IP_MULTICAST_TTL)");
             return -1;
         }
     }
@@ -175,7 +166,7 @@
 #if defined(IPPROTO_IPV6) && defined(IPV6_MULTICAST_HOPS)
     if (addr->sa_family == AF_INET6) {
         if (setsockopt(sockfd, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, &mcastTTL, sizeof(mcastTTL)) < 0) {
-            log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IPV6_MULTICAST_HOPS)");
+            ff_log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IPV6_MULTICAST_HOPS)");
             return -1;
         }
     }
@@ -195,7 +186,7 @@
         else
             mreq.imr_interface.s_addr= INADDR_ANY;
         if (setsockopt(sockfd, IPPROTO_IP, IP_ADD_MEMBERSHIP, (const void *)&mreq, sizeof(mreq)) < 0) {
-            log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IP_ADD_MEMBERSHIP)");
+            ff_log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IP_ADD_MEMBERSHIP)");
             return -1;
         }
     }
@@ -205,9 +196,10 @@
         struct ipv6_mreq mreq6;
 
         memcpy(&mreq6.ipv6mr_multiaddr, &(((struct sockaddr_in6 *)addr)->sin6_addr), sizeof(struct in6_addr));
+        //TODO: Interface index should be looked up from local_addr
         mreq6.ipv6mr_interface= 0;
         if (setsockopt(sockfd, IPPROTO_IPV6, IPV6_ADD_MEMBERSHIP, &mreq6, sizeof(mreq6)) < 0) {
-            log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IPV6_ADD_MEMBERSHIP)");
+            ff_log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IPV6_ADD_MEMBERSHIP)");
             return -1;
         }
     }
@@ -227,7 +219,7 @@
         else
             mreq.imr_interface.s_addr= INADDR_ANY;
         if (setsockopt(sockfd, IPPROTO_IP, IP_DROP_MEMBERSHIP, (const void *)&mreq, sizeof(mreq)) < 0) {
-            log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IP_DROP_MEMBERSHIP)");
+            ff_log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IP_DROP_MEMBERSHIP)");
             return -1;
         }
     }
@@ -237,9 +229,10 @@
         struct ipv6_mreq mreq6;
 
         memcpy(&mreq6.ipv6mr_multiaddr, &(((struct sockaddr_in6 *)addr)->sin6_addr), sizeof(struct in6_addr));
+        //TODO: Interface index should be looked up from local_addr
         mreq6.ipv6mr_interface= 0;
         if (setsockopt(sockfd, IPPROTO_IPV6, IPV6_DROP_MEMBERSHIP, &mreq6, sizeof(mreq6)) < 0) {
-            log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IPV6_DROP_MEMBERSHIP)");
+            ff_log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IPV6_DROP_MEMBERSHIP)");
             return -1;
         }
     }
@@ -247,102 +240,67 @@
     return 0;
 }
 
-static struct addrinfo *udp_resolve_host(URLContext *h,
-                                         const char *hostname, int port,
-                                         int type, int family, int flags)
-{
-    struct addrinfo hints = { 0 }, *res = 0;
-    int error;
-    char sport[16];
-    const char *node = 0, *service = "0";
-
-    if (port > 0) {
-        snprintf(sport, sizeof(sport), "%d", port);
-        service = sport;
-    }
-    if ((hostname) && (hostname[0] != '\0') && (hostname[0] != '?')) {
-        node = hostname;
-    }
-    hints.ai_socktype = type;
-    hints.ai_family   = family;
-    hints.ai_flags = flags;
-    if ((error = getaddrinfo(node, service, &hints, &res))) {
-        res = NULL;
-        av_log(h, AV_LOG_ERROR, "getaddrinfo(%s, %s): %s\n",
-               node ? node : "unknown",
-               service,
-               gai_strerror(error));
-    }
-
-    return res;
-}
-
 static int udp_set_multicast_sources(URLContext *h,
                                      int sockfd, struct sockaddr *addr,
-                                     int addr_len, char **sources,
+                                     int addr_len, struct sockaddr_storage *local_addr,
+                                     struct sockaddr_storage *sources,
                                      int nb_sources, int include)
 {
-#if HAVE_STRUCT_GROUP_SOURCE_REQ && defined(MCAST_BLOCK_SOURCE) && !defined(_WIN32) && (!defined(TARGET_OS_TV) || !TARGET_OS_TV)
-    /* These ones are available in the microsoft SDK, but don't seem to work
-     * as on linux, so just prefer the v4-only approach there for now. */
-    int i;
-    for (i = 0; i < nb_sources; i++) {
-        struct group_source_req mreqs;
-        int level = addr->sa_family == AF_INET ? IPPROTO_IP : IPPROTO_IPV6;
-        struct addrinfo *sourceaddr = udp_resolve_host(h, sources[i], 0,
-                                                       SOCK_DGRAM, AF_UNSPEC,
-                                                       0);
-        if (!sourceaddr)
-            return AVERROR(ENOENT);
-
-        mreqs.gsr_interface = 0;
-        memcpy(&mreqs.gsr_group, addr, addr_len);
-        memcpy(&mreqs.gsr_source, sourceaddr->ai_addr, sourceaddr->ai_addrlen);
-        freeaddrinfo(sourceaddr);
-
-        if (setsockopt(sockfd, level,
-                       include ? MCAST_JOIN_SOURCE_GROUP : MCAST_BLOCK_SOURCE,
-                       (const void *)&mreqs, sizeof(mreqs)) < 0) {
-            if (include)
-                log_net_error(NULL, AV_LOG_ERROR, "setsockopt(MCAST_JOIN_SOURCE_GROUP)");
-            else
-                log_net_error(NULL, AV_LOG_ERROR, "setsockopt(MCAST_BLOCK_SOURCE)");
-            return ff_neterrno();
-        }
-    }
-#elif HAVE_STRUCT_IP_MREQ_SOURCE && defined(IP_BLOCK_SOURCE)
     int i;
     if (addr->sa_family != AF_INET) {
+#if HAVE_STRUCT_GROUP_SOURCE_REQ && defined(MCAST_BLOCK_SOURCE)
+        /* For IPv4 prefer the old approach, as that alone works reliably on
+         * Windows and it also supports supplying the interface based on its
+         * address. */
+        int i;
+        for (i = 0; i < nb_sources; i++) {
+            struct group_source_req mreqs;
+            int level = addr->sa_family == AF_INET ? IPPROTO_IP : IPPROTO_IPV6;
+
+            //TODO: Interface index should be looked up from local_addr
+            mreqs.gsr_interface = 0;
+            memcpy(&mreqs.gsr_group, addr, addr_len);
+            memcpy(&mreqs.gsr_source, &sources[i], sizeof(*sources));
+
+            if (setsockopt(sockfd, level,
+                           include ? MCAST_JOIN_SOURCE_GROUP : MCAST_BLOCK_SOURCE,
+                           (const void *)&mreqs, sizeof(mreqs)) < 0) {
+                if (include)
+                    ff_log_net_error(NULL, AV_LOG_ERROR, "setsockopt(MCAST_JOIN_SOURCE_GROUP)");
+                else
+                    ff_log_net_error(NULL, AV_LOG_ERROR, "setsockopt(MCAST_BLOCK_SOURCE)");
+                return ff_neterrno();
+            }
+        }
+        return 0;
+#else
         av_log(NULL, AV_LOG_ERROR,
                "Setting multicast sources only supported for IPv4\n");
         return AVERROR(EINVAL);
+#endif
     }
+#if HAVE_STRUCT_IP_MREQ_SOURCE && defined(IP_BLOCK_SOURCE)
     for (i = 0; i < nb_sources; i++) {
         struct ip_mreq_source mreqs;
-        struct addrinfo *sourceaddr = udp_resolve_host(h, sources[i], 0,
-                                                       SOCK_DGRAM, AF_UNSPEC,
-                                                       0);
-        if (!sourceaddr)
-            return AVERROR(ENOENT);
-        if (sourceaddr->ai_addr->sa_family != AF_INET) {
-            freeaddrinfo(sourceaddr);
-            av_log(NULL, AV_LOG_ERROR, "%s is of incorrect protocol family\n",
-                   sources[i]);
+        if (sources[i].ss_family != AF_INET) {
+            av_log(NULL, AV_LOG_ERROR, "Source/block address %d is of incorrect protocol family\n", i + 1);
             return AVERROR(EINVAL);
         }
 
         mreqs.imr_multiaddr.s_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr;
-        mreqs.imr_interface.s_addr = INADDR_ANY;
-        mreqs.imr_sourceaddr.s_addr = ((struct sockaddr_in *)sourceaddr->ai_addr)->sin_addr.s_addr;
-        freeaddrinfo(sourceaddr);
+        if (local_addr)
+            mreqs.imr_interface= ((struct sockaddr_in *)local_addr)->sin_addr;
+        else
+            mreqs.imr_interface.s_addr= INADDR_ANY;
+        mreqs.imr_sourceaddr.s_addr = ((struct sockaddr_in *)&sources[i])->sin_addr.s_addr;
 
         if (setsockopt(sockfd, IPPROTO_IP,
                        include ? IP_ADD_SOURCE_MEMBERSHIP : IP_BLOCK_SOURCE,
                        (const void *)&mreqs, sizeof(mreqs)) < 0) {
             if (include)
-                log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IP_ADD_SOURCE_MEMBERSHIP)");
+                ff_log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IP_ADD_SOURCE_MEMBERSHIP)");
             else
-                log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IP_BLOCK_SOURCE)");
+                ff_log_net_error(NULL, AV_LOG_ERROR, "setsockopt(IP_BLOCK_SOURCE)");
             return ff_neterrno();
         }
     }
@@ -358,7 +316,7 @@
     struct addrinfo *res0;
     int addr_len;
 
-    res0 = udp_resolve_host(h, hostname, port, SOCK_DGRAM, AF_UNSPEC, 0);
+    res0 = ff_ip_resolve_host(h, hostname, port, SOCK_DGRAM, AF_UNSPEC, 0);
     if (!res0) return AVERROR(EIO);
     memcpy(addr, res0->ai_addr, res0->ai_addrlen);
     addr_len = res0->ai_addrlen;
@@ -377,7 +335,7 @@
 
     if (((struct sockaddr *) &s->dest_addr)->sa_family)
         family = ((struct sockaddr *) &s->dest_addr)->sa_family;
-    res0 = udp_resolve_host(h, (localaddr && localaddr[0]) ? localaddr : NULL,
+    res0 = ff_ip_resolve_host(h, (localaddr && localaddr[0]) ? localaddr : NULL,
                             s->local_port,
                             SOCK_DGRAM, family, AI_PASSIVE);
     if (!res0)
@@ -388,7 +346,7 @@
         else
             udp_fd = ff_socket(res->ai_family, SOCK_DGRAM, 0);
         if (udp_fd != -1) break;
-        log_net_error(NULL, AV_LOG_ERROR, "socket");
+        ff_log_net_error(NULL, AV_LOG_ERROR, "socket");
     }
 
     if (udp_fd < 0)
@@ -463,7 +421,7 @@
                 if (connect(s->udp_fd, (struct sockaddr *) &s->dest_addr,
                             s->dest_addr_len)) {
                     s->is_connected = 0;
-                    log_net_error(h, AV_LOG_ERROR, "connect");
+                    ff_log_net_error(h, AV_LOG_ERROR, "connect");
                     return AVERROR(EIO);
                 }
             }
@@ -511,13 +469,15 @@
     }
     while(1) {
         int len;
+        struct sockaddr_storage addr;
+        socklen_t addr_len = sizeof(addr);
 
         pthread_mutex_unlock(&s->mutex);
         /* Blocking operations are always cancellation points;
            see "General Information" / "Thread Cancelation Overview"
            in Single Unix. */
         pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_cancelstate);
-        len = recv(s->udp_fd, s->tmp+4, sizeof(s->tmp)-4, 0);
+        len = recvfrom(s->udp_fd, s->tmp+4, sizeof(s->tmp)-4, 0, (struct sockaddr *)&addr, &addr_len);
         pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old_cancelstate);
         pthread_mutex_lock(&s->mutex);
         if (len < 0) {
@@ -527,6 +487,8 @@
             }
             continue;
         }
+        if (ff_ip_check_source_lists(&addr, &s->filters))
+            continue;
         AV_WL32(s->tmp, len);
 
         if(av_fifo_space(s->fifo) < len + 4) {
@@ -657,27 +619,6 @@
 
 #endif
 
-static int parse_source_list(char *buf, char **sources, int *num_sources,
-                             int max_sources)
-{
-    char *source_start;
-
-    source_start = buf;
-    while (1) {
-        char *next = strchr(source_start, ',');
-        if (next)
-            *next = '\0';
-        sources[*num_sources] = av_strdup(source_start);
-        if (!sources[*num_sources])
-            return AVERROR(ENOMEM);
-        source_start = next + 1;
-        (*num_sources)++;
-        if (*num_sources >= max_sources || !next)
-            break;
-    }
-    return 0;
-}
-
 /* put it in UDP context */
 /* return non zero if error */
 static int udp_open(URLContext *h, const char *uri, int flags)
@@ -690,8 +631,6 @@
     char buf[256];
     struct sockaddr_storage my_addr;
     socklen_t len;
-    int i, num_include_sources = 0, num_exclude_sources = 0;
-    char *include_sources[32], *exclude_sources[32];
 
     h->is_streamed = 1;
 
@@ -700,15 +639,12 @@
         s->buffer_size = is_output ? UDP_TX_BUF_SIZE : UDP_MAX_PKT_SIZE;
 
     if (s->sources) {
-        if (parse_source_list(s->sources, include_sources,
-                              &num_include_sources,
-                              FF_ARRAY_ELEMS(include_sources)))
+        if (ff_ip_parse_sources(h, s->sources, &s->filters) < 0)
             goto fail;
     }
 
     if (s->block) {
-        if (parse_source_list(s->block, exclude_sources, &num_exclude_sources,
-                              FF_ARRAY_ELEMS(exclude_sources)))
+        if (ff_ip_parse_blocks(h, s->block, &s->filters) < 0)
             goto fail;
     }
 
@@ -777,13 +713,11 @@
             av_strlcpy(localaddr, buf, sizeof(localaddr));
         }
         if (av_find_info_tag(buf, sizeof(buf), "sources", p)) {
-            if (parse_source_list(buf, include_sources, &num_include_sources,
-                                  FF_ARRAY_ELEMS(include_sources)))
+            if (ff_ip_parse_sources(h, buf, &s->filters) < 0)
                 goto fail;
         }
         if (av_find_info_tag(buf, sizeof(buf), "block", p)) {
-            if (parse_source_list(buf, exclude_sources, &num_exclude_sources,
-                                  FF_ARRAY_ELEMS(exclude_sources)))
+            if (ff_ip_parse_blocks(h, buf, &s->filters) < 0)
                 goto fail;
         }
         if (!is_output && av_find_info_tag(buf, sizeof(buf), "timeout", p))
@@ -870,7 +804,7 @@
      * bind failed */
     /* the bind is needed to give a port to the socket now */
     if (bind_ret < 0 && bind(udp_fd,(struct sockaddr *)&my_addr, len) < 0) {
-        log_net_error(h, AV_LOG_ERROR, "bind failed");
+        ff_log_net_error(h, AV_LOG_ERROR, "bind failed");
         goto fail;
     }
 
@@ -886,27 +820,23 @@
         }
         if (h->flags & AVIO_FLAG_READ) {
             /* input */
-            if (num_include_sources && num_exclude_sources) {
-                av_log(h, AV_LOG_ERROR, "Simultaneously including and excluding multicast sources is not supported\n");
-                goto fail;
-            }
-            if (num_include_sources) {
+            if (s->filters.nb_include_addrs) {
                 if (udp_set_multicast_sources(h, udp_fd,
                                               (struct sockaddr *)&s->dest_addr,
-                                              s->dest_addr_len,
-                                              include_sources,
-                                              num_include_sources, 1) < 0)
+                                              s->dest_addr_len, &s->local_addr_storage,
+                                              s->filters.include_addrs,
+                                              s->filters.nb_include_addrs, 1) < 0)
                     goto fail;
             } else {
                 if (udp_join_multicast_group(udp_fd, (struct sockaddr *)&s->dest_addr,(struct sockaddr *)&s->local_addr_storage) < 0)
                     goto fail;
             }
-            if (num_exclude_sources) {
+            if (s->filters.nb_exclude_addrs) {
                 if (udp_set_multicast_sources(h, udp_fd,
                                               (struct sockaddr *)&s->dest_addr,
-                                              s->dest_addr_len,
-                                              exclude_sources,
-                                              num_exclude_sources, 0) < 0)
+                                              s->dest_addr_len, &s->local_addr_storage,
+                                              s->filters.exclude_addrs,
+                                              s->filters.nb_exclude_addrs, 0) < 0)
                     goto fail;
             }
         }
@@ -916,18 +846,18 @@
         /* limit the tx buf size to limit latency */
         tmp = s->buffer_size;
         if (setsockopt(udp_fd, SOL_SOCKET, SO_SNDBUF, &tmp, sizeof(tmp)) < 0) {
-            log_net_error(h, AV_LOG_ERROR, "setsockopt(SO_SNDBUF)");
+            ff_log_net_error(h, AV_LOG_ERROR, "setsockopt(SO_SNDBUF)");
             goto fail;
         }
     } else {
         /* set udp recv buffer size to the requested value (default 64K) */
         tmp = s->buffer_size;
         if (setsockopt(udp_fd, SOL_SOCKET, SO_RCVBUF, &tmp, sizeof(tmp)) < 0) {
-            log_net_error(h, AV_LOG_WARNING, "setsockopt(SO_RECVBUF)");
+            ff_log_net_error(h, AV_LOG_WARNING, "setsockopt(SO_RECVBUF)");
         }
         len = sizeof(tmp);
         if (getsockopt(udp_fd, SOL_SOCKET, SO_RCVBUF, &tmp, &len) < 0) {
-            log_net_error(h, AV_LOG_WARNING, "getsockopt(SO_RCVBUF)");
+            ff_log_net_error(h, AV_LOG_WARNING, "getsockopt(SO_RCVBUF)");
         } else {
             av_log(h, AV_LOG_DEBUG, "end receive buffer size reported is %d\n", tmp);
             if(tmp < s->buffer_size)
@@ -939,16 +869,11 @@
     }
     if (s->is_connected) {
         if (connect(udp_fd, (struct sockaddr *) &s->dest_addr, s->dest_addr_len)) {
-            log_net_error(h, AV_LOG_ERROR, "connect");
+            ff_log_net_error(h, AV_LOG_ERROR, "connect");
             goto fail;
         }
     }
 
-    for (i = 0; i < num_include_sources; i++)
-        av_freep(&include_sources[i]);
-    for (i = 0; i < num_exclude_sources; i++)
-        av_freep(&exclude_sources[i]);
-
     s->udp_fd = udp_fd;
 
 #if HAVE_PTHREAD_CANCEL
@@ -998,10 +923,7 @@
     if (udp_fd >= 0)
         closesocket(udp_fd);
     av_fifo_freep(&s->fifo);
-    for (i = 0; i < num_include_sources; i++)
-        av_freep(&include_sources[i]);
-    for (i = 0; i < num_exclude_sources; i++)
-        av_freep(&exclude_sources[i]);
+    ff_ip_reset_filters(&s->filters);
     return AVERROR(EIO);
 }
 
@@ -1019,6 +941,8 @@
 {
     UDPContext *s = h->priv_data;
     int ret;
+    struct sockaddr_storage addr;
+    socklen_t addr_len = sizeof(addr);
 #if HAVE_PTHREAD_CANCEL
     int avail, nonblock = h->flags & AVIO_FLAG_NONBLOCK;
 
@@ -1069,9 +993,12 @@
         if (ret < 0)
             return ret;
     }
-    ret = recv(s->udp_fd, buf, size, 0);
-
-    return ret < 0 ? ff_neterrno() : ret;
+    ret = recvfrom(s->udp_fd, buf, size, 0, (struct sockaddr *)&addr, &addr_len);
+    if (ret < 0)
+        return ff_neterrno();
+    if (ff_ip_check_source_lists(&addr, &s->filters))
+        return AVERROR(EINTR);
+    return ret;
 }
 
 static int udp_write(URLContext *h, const uint8_t *buf, int size)
@@ -1155,6 +1082,7 @@
 #endif
     closesocket(s->udp_fd);
     av_fifo_freep(&s->fifo);
+    ff_ip_reset_filters(&s->filters);
     return 0;
 }
 

diff --git a/libavformat/unix.c b/libavformat/unix.c
index 4f01d14..38016db 100644
--- a/libavformat/unix.c
+++ b/libavformat/unix.c

@@ -111,6 +111,8 @@
             return ret;
     }
     ret = recv(s->fd, buf, size, 0);
+    if (!ret && s->type == SOCK_STREAM)
+        return AVERROR_EOF;
     return ret < 0 ? ff_neterrno() : ret;
 }
 

diff --git a/libavformat/utils.c b/libavformat/utils.c
index 93722e3..a8ac902 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c

@@ -32,6 +32,7 @@
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/pixdesc.h"
+#include "libavutil/thread.h"
 #include "libavutil/time.h"
 #include "libavutil/time_internal.h"
 #include "libavutil/timestamp.h"
@@ -55,6 +56,8 @@
 #include "libavutil/ffversion.h"
 const char av_format_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
 
+static AVMutex avformat_mutex = AV_MUTEX_INITIALIZER;
+
 /**
  * @file
  * various utility functions for use within FFmpeg
@@ -77,6 +80,16 @@
     return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
 }
 
+int ff_lock_avformat(void)
+{
+    return ff_mutex_lock(&avformat_mutex) ? -1 : 0;
+}
+
+int ff_unlock_avformat(void)
+{
+    return ff_mutex_unlock(&avformat_mutex) ? -1 : 0;
+}
+
 #define RELATIVE_TS_BASE (INT64_MAX - (1LL<<48))
 
 static int is_relative(int64_t ts) {
@@ -104,8 +117,13 @@
     return timestamp;
 }
 
+#if FF_API_FORMAT_GET_SET
 MAKE_ACCESSORS(AVStream, stream, AVRational, r_frame_rate)
+#if FF_API_LAVF_FFSERVER
+FF_DISABLE_DEPRECATION_WARNINGS
 MAKE_ACCESSORS(AVStream, stream, char *, recommended_encoder_configuration)
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 MAKE_ACCESSORS(AVFormatContext, format, AVCodec *, video_codec)
 MAKE_ACCESSORS(AVFormatContext, format, AVCodec *, audio_codec)
 MAKE_ACCESSORS(AVFormatContext, format, AVCodec *, subtitle_codec)
@@ -118,11 +136,12 @@
 MAKE_ACCESSORS(AVFormatContext, format, AVOpenCallback, open_cb)
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
+#endif
 
 int64_t av_stream_get_end_pts(const AVStream *st)
 {
-    if (st->priv_pts) {
-        return st->priv_pts->val;
+    if (st->internal->priv_pts) {
+        return st->internal->priv_pts->val;
     } else
         return AV_NOPTS_VALUE;
 }
@@ -215,10 +234,12 @@
     return codec;
 }
 
+#if FF_API_FORMAT_GET_SET
 int av_format_get_probe_score(const AVFormatContext *s)
 {
     return s->probe_score;
 }
+#endif
 
 /* an arbitrarily chosen "sane" max packet size -- 50M */
 #define SANE_CHUNK_SIZE (50000000)
@@ -320,6 +341,7 @@
     } fmt_id_type[] = {
         { "aac",       AV_CODEC_ID_AAC,        AVMEDIA_TYPE_AUDIO },
         { "ac3",       AV_CODEC_ID_AC3,        AVMEDIA_TYPE_AUDIO },
+        { "aptx",      AV_CODEC_ID_APTX,       AVMEDIA_TYPE_AUDIO },
         { "dts",       AV_CODEC_ID_DTS,        AVMEDIA_TYPE_AUDIO },
         { "dvbsub",    AV_CODEC_ID_DVB_SUBTITLE,AVMEDIA_TYPE_SUBTITLE },
         { "dvbtxt",    AV_CODEC_ID_DVB_TELETEXT,AVMEDIA_TYPE_SUBTITLE },
@@ -422,8 +444,9 @@
                                  s, 0, s->format_probesize);
 }
 
-static int add_to_pktbuf(AVPacketList **packet_buffer, AVPacket *pkt,
-                         AVPacketList **plast_pktl, int ref)
+int ff_packet_list_put(AVPacketList **packet_buffer,
+                       AVPacketList **plast_pktl,
+                       AVPacket      *pkt, int flags)
 {
     AVPacketList *pktl = av_mallocz(sizeof(AVPacketList));
     int ret;
@@ -431,12 +454,15 @@
     if (!pktl)
         return AVERROR(ENOMEM);
 
-    if (ref) {
+    if (flags & FF_PACKETLIST_FLAG_REF_PACKET) {
         if ((ret = av_packet_ref(&pktl->pkt, pkt)) < 0) {
             av_free(pktl);
             return ret;
         }
     } else {
+        // TODO: Adapt callers in this file so the line below can use
+        //       av_packet_move_ref() to effectively move the reference
+        //       to the list.
         pktl->pkt = *pkt;
     }
 
@@ -463,9 +489,10 @@
                 continue;
             }
 
-            ret = add_to_pktbuf(&s->internal->raw_packet_buffer,
-                                &s->streams[i]->attached_pic,
-                                &s->internal->raw_packet_buffer_end, 1);
+            ret = ff_packet_list_put(&s->internal->raw_packet_buffer,
+                                     &s->internal->raw_packet_buffer_end,
+                                     &s->streams[i]->attached_pic,
+                                     FF_PACKETLIST_FLAG_REF_PACKET);
             if (ret < 0)
                 return ret;
         }
@@ -533,7 +560,16 @@
     if ((ret = av_opt_set_dict(s, &tmp)) < 0)
         goto fail;
 
+    if (!(s->url = av_strdup(filename ? filename : ""))) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+#if FF_API_FORMAT_FILENAME
+FF_DISABLE_DEPRECATION_WARNINGS
     av_strlcpy(s->filename, filename ? filename : "", sizeof(s->filename));
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     if ((ret = init_input(s, filename, &tmp)) < 0)
         goto fail;
     s->probe_score = ret;
@@ -615,6 +651,8 @@
                 goto fail;
             if ((ret = ff_id3v2_parse_chapters(s, &id3v2_extra_meta)) < 0)
                 goto fail;
+            if ((ret = ff_id3v2_parse_priv(s, &id3v2_extra_meta)) < 0)
+                goto fail;
         } else
             av_log(s, AV_LOG_DEBUG, "demuxer does not support additional id3 data, skipping\n");
     }
@@ -834,13 +872,9 @@
             continue;
         }
 
-        if (!pkt->buf) {
-            AVPacket tmp = { 0 };
-            ret = av_packet_ref(&tmp, pkt);
-            if (ret < 0)
-                return ret;
-            *pkt = tmp;
-        }
+        err = av_packet_make_refcounted(pkt);
+        if (err < 0)
+            return err;
 
         if ((s->flags & AVFMT_FLAG_DISCARD_CORRUPT) &&
             (pkt->flags & AV_PKT_FLAG_CORRUPT)) {
@@ -880,8 +914,9 @@
         if (!pktl && st->request_probe <= 0)
             return ret;
 
-        err = add_to_pktbuf(&s->internal->raw_packet_buffer, pkt,
-                            &s->internal->raw_packet_buffer_end, 0);
+        err = ff_packet_list_put(&s->internal->raw_packet_buffer,
+                                 &s->internal->raw_packet_buffer_end,
+                                 pkt, 0);
         if (err)
             return err;
         s->internal->raw_packet_buffer_remaining_size -= pkt->size;
@@ -900,6 +935,7 @@
     case AV_CODEC_ID_MP1:
     case AV_CODEC_ID_MP2:
     case AV_CODEC_ID_MP3:
+    case AV_CODEC_ID_CODEC2:
         return 1;
     }
 
@@ -1093,6 +1129,7 @@
     if (st->first_dts != AV_NOPTS_VALUE ||
         dts           == AV_NOPTS_VALUE ||
         st->cur_dts   == AV_NOPTS_VALUE ||
+        st->cur_dts < INT_MIN + RELATIVE_TS_BASE ||
         is_relative(dts))
         return;
 
@@ -1124,7 +1161,9 @@
     }
 
     if (st->start_time == AV_NOPTS_VALUE) {
-        st->start_time = pts;
+        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO || !(pkt->flags & AV_PKT_FLAG_DISCARD)) {
+            st->start_time = pts;
+        }
         if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && st->codecpar->sample_rate)
             st->start_time += av_rescale_q(st->skip_samples, (AVRational){1, st->codecpar->sample_rate}, st->time_base);
     }
@@ -1293,7 +1332,7 @@
         presentation_delayed = 1;
 
     if (s->debug & FF_FDEBUG_TS)
-        av_log(s, AV_LOG_TRACE,
+        av_log(s, AV_LOG_DEBUG,
             "IN delayed:%d pts:%s, dts:%s cur_dts:%s st:%d pc:%p duration:%"PRId64" delay:%d onein_oneout:%d\n",
             presentation_delayed, av_ts2str(pkt->pts), av_ts2str(pkt->dts), av_ts2str(st->cur_dts),
             pkt->stream_index, pc, pkt->duration, delay, onein_oneout);
@@ -1362,11 +1401,11 @@
         st->cur_dts = pkt->dts;
 
     if (s->debug & FF_FDEBUG_TS)
-        av_log(s, AV_LOG_TRACE, "OUTdelayed:%d/%d pts:%s, dts:%s cur_dts:%s\n",
+        av_log(s, AV_LOG_DEBUG, "OUTdelayed:%d/%d pts:%s, dts:%s cur_dts:%s\n",
             presentation_delayed, delay, av_ts2str(pkt->pts), av_ts2str(pkt->dts), av_ts2str(st->cur_dts));
 
     /* update flags */
-    if (is_intra_only(st->codecpar->codec_id))
+    if (st->codecpar->codec_type == AVMEDIA_TYPE_DATA || is_intra_only(st->codecpar->codec_id))
         pkt->flags |= AV_PKT_FLAG_KEY;
 #if FF_API_CONVERGENCE_DURATION
 FF_DISABLE_DEPRECATION_WARNINGS
@@ -1376,14 +1415,17 @@
 #endif
 }
 
-static void free_packet_buffer(AVPacketList **pkt_buf, AVPacketList **pkt_buf_end)
+void ff_packet_list_free(AVPacketList **pkt_buf, AVPacketList **pkt_buf_end)
 {
-    while (*pkt_buf) {
-        AVPacketList *pktl = *pkt_buf;
-        *pkt_buf = pktl->next;
+    AVPacketList *tmp = *pkt_buf;
+
+    while (tmp) {
+        AVPacketList *pktl = tmp;
+        tmp = pktl->next;
         av_packet_unref(&pktl->pkt);
         av_freep(&pktl);
     }
+    *pkt_buf     = NULL;
     *pkt_buf_end = NULL;
 }
 
@@ -1430,6 +1472,22 @@
         if (!out_pkt.size)
             continue;
 
+        if (pkt->buf && out_pkt.data == pkt->data) {
+            /* reference pkt->buf only when out_pkt.data is guaranteed to point
+             * to data in it and not in the parser's internal buffer. */
+            /* XXX: Ensure this is the case with all parsers when st->parser->flags
+             * is PARSER_FLAG_COMPLETE_FRAMES and check for that instead? */
+            out_pkt.buf = av_buffer_ref(pkt->buf);
+            if (!out_pkt.buf) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+        } else {
+            ret = av_packet_make_refcounted(&out_pkt);
+            if (ret < 0)
+                goto fail;
+        }
+
         if (pkt->side_data) {
             out_pkt.side_data       = pkt->side_data;
             out_pkt.side_data_elems = pkt->side_data_elems;
@@ -1453,6 +1511,7 @@
         out_pkt.pts          = st->parser->pts;
         out_pkt.dts          = st->parser->dts;
         out_pkt.pos          = st->parser->pos;
+        out_pkt.flags       |= pkt->flags & AV_PKT_FLAG_DISCARD;
 
         if (st->need_parsing == AVSTREAM_PARSE_FULL_RAW)
             out_pkt.pos = st->parser->frame_offset;
@@ -1467,11 +1526,13 @@
 
         compute_pkt_fields(s, st, st->parser, &out_pkt, next_dts, next_pts);
 
-        ret = add_to_pktbuf(&s->internal->parse_queue, &out_pkt,
-                            &s->internal->parse_queue_end, 1);
-        av_packet_unref(&out_pkt);
-        if (ret < 0)
+        ret = ff_packet_list_put(&s->internal->parse_queue,
+                                 &s->internal->parse_queue_end,
+                                 &out_pkt, 0);
+        if (ret < 0) {
+            av_packet_unref(&out_pkt);
             goto fail;
+        }
     }
 
     /* end of the stream => close and free the parser */
@@ -1485,9 +1546,9 @@
     return ret;
 }
 
-static int read_from_packet_buffer(AVPacketList **pkt_buffer,
-                                   AVPacketList **pkt_buffer_end,
-                                   AVPacket      *pkt)
+int ff_packet_list_get(AVPacketList **pkt_buffer,
+                       AVPacketList **pkt_buffer_end,
+                       AVPacket      *pkt)
 {
     AVPacketList *pktl;
     av_assert0(*pkt_buffer);
@@ -1633,7 +1694,7 @@
     }
 
     if (!got_packet && s->internal->parse_queue)
-        ret = read_from_packet_buffer(&s->internal->parse_queue, &s->internal->parse_queue_end, pkt);
+        ret = ff_packet_list_get(&s->internal->parse_queue, &s->internal->parse_queue_end, pkt);
 
     if (ret >= 0) {
         AVStream *st = s->streams[pkt->stream_index];
@@ -1677,13 +1738,6 @@
             }
             st->inject_global_side_data = 0;
         }
-
-#if FF_API_LAVF_MERGE_SD
-FF_DISABLE_DEPRECATION_WARNINGS
-        if (!(s->flags & AVFMT_FLAG_KEEP_SIDE_DATA))
-            av_packet_merge_side_data(pkt);
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
     }
 
     av_opt_get_dict_val(s, "metadata", AV_OPT_SEARCH_CHILDREN, &metadata);
@@ -1719,7 +1773,7 @@
 
     if (!genpts) {
         ret = s->internal->packet_buffer
-              ? read_from_packet_buffer(&s->internal->packet_buffer,
+              ? ff_packet_list_get(&s->internal->packet_buffer,
                                         &s->internal->packet_buffer_end, pkt)
               : read_frame_internal(s, pkt);
         if (ret < 0)
@@ -1768,7 +1822,7 @@
             st = s->streams[next_pkt->stream_index];
             if (!(next_pkt->pts == AV_NOPTS_VALUE && st->discard < AVDISCARD_ALL &&
                   next_pkt->dts != AV_NOPTS_VALUE && !eof)) {
-                ret = read_from_packet_buffer(&s->internal->packet_buffer,
+                ret = ff_packet_list_get(&s->internal->packet_buffer,
                                                &s->internal->packet_buffer_end, pkt);
                 goto return_packet;
             }
@@ -1783,8 +1837,9 @@
                 return ret;
         }
 
-        ret = add_to_pktbuf(&s->internal->packet_buffer, pkt,
-                            &s->internal->packet_buffer_end, 1);
+        ret = ff_packet_list_put(&s->internal->packet_buffer,
+                                 &s->internal->packet_buffer_end,
+                                 pkt, FF_PACKETLIST_FLAG_REF_PACKET);
         av_packet_unref(pkt);
         if (ret < 0)
             return ret;
@@ -1811,9 +1866,9 @@
 {
     if (!s->internal)
         return;
-    free_packet_buffer(&s->internal->parse_queue,       &s->internal->parse_queue_end);
-    free_packet_buffer(&s->internal->packet_buffer,     &s->internal->packet_buffer_end);
-    free_packet_buffer(&s->internal->raw_packet_buffer, &s->internal->raw_packet_buffer_end);
+    ff_packet_list_free(&s->internal->parse_queue,       &s->internal->parse_queue_end);
+    ff_packet_list_free(&s->internal->packet_buffer,     &s->internal->packet_buffer_end);
+    ff_packet_list_free(&s->internal->raw_packet_buffer, &s->internal->raw_packet_buffer_end);
 
     s->internal->raw_packet_buffer_remaining_size = RAW_PACKET_BUFFER_SIZE;
 }
@@ -2038,7 +2093,7 @@
     int64_t pos_delta = 0;
     int64_t skip = 0;
     //We could use URLProtocol flags here but as many user applications do not use URLProtocols this would be unreliable
-    const char *proto = avio_find_protocol_name(s->filename);
+    const char *proto = avio_find_protocol_name(s->url);
 
     if (!proto) {
         av_log(s, AV_LOG_INFO,
@@ -2563,9 +2618,8 @@
 static void update_stream_timings(AVFormatContext *ic)
 {
     int64_t start_time, start_time1, start_time_text, end_time, end_time1, end_time_text;
-    int64_t duration, duration1, filesize;
+    int64_t duration, duration1, duration_text, filesize;
     int i;
-    AVStream *st;
     AVProgram *p;
 
     start_time = INT64_MAX;
@@ -2573,22 +2627,25 @@
     end_time   = INT64_MIN;
     end_time_text   = INT64_MIN;
     duration   = INT64_MIN;
+    duration_text = INT64_MIN;
+
     for (i = 0; i < ic->nb_streams; i++) {
-        st = ic->streams[i];
+        AVStream *st = ic->streams[i];
+        int is_text = st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE ||
+                      st->codecpar->codec_type == AVMEDIA_TYPE_DATA;
         if (st->start_time != AV_NOPTS_VALUE && st->time_base.den) {
             start_time1 = av_rescale_q(st->start_time, st->time_base,
                                        AV_TIME_BASE_Q);
-            if (st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE || st->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
-                if (start_time1 < start_time_text)
-                    start_time_text = start_time1;
-            } else
+            if (is_text)
+                start_time_text = FFMIN(start_time_text, start_time1);
+            else
                 start_time = FFMIN(start_time, start_time1);
             end_time1 = av_rescale_q_rnd(st->duration, st->time_base,
                                          AV_TIME_BASE_Q,
                                          AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
             if (end_time1 != AV_NOPTS_VALUE && (end_time1 > 0 ? start_time1 <= INT64_MAX - end_time1 : start_time1 >= INT64_MIN - end_time1)) {
                 end_time1 += start_time1;
-                if (st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE || st->codecpar->codec_type == AVMEDIA_TYPE_DATA)
+                if (is_text)
                     end_time_text = FFMAX(end_time_text, end_time1);
                 else
                     end_time = FFMAX(end_time, end_time1);
@@ -2603,19 +2660,26 @@
         if (st->duration != AV_NOPTS_VALUE) {
             duration1 = av_rescale_q(st->duration, st->time_base,
                                      AV_TIME_BASE_Q);
-            duration  = FFMAX(duration, duration1);
+            if (is_text)
+                duration_text = FFMAX(duration_text, duration1);
+            else
+                duration = FFMAX(duration, duration1);
         }
     }
-    if (start_time == INT64_MAX || (start_time > start_time_text && start_time - start_time_text < AV_TIME_BASE))
+    if (start_time == INT64_MAX || (start_time > start_time_text && start_time - (uint64_t)start_time_text < AV_TIME_BASE))
         start_time = start_time_text;
     else if (start_time > start_time_text)
         av_log(ic, AV_LOG_VERBOSE, "Ignoring outlier non primary stream starttime %f\n", start_time_text / (float)AV_TIME_BASE);
 
-    if (end_time == INT64_MIN || (end_time < end_time_text && end_time_text - end_time < AV_TIME_BASE)) {
+    if (end_time == INT64_MIN || (end_time < end_time_text && end_time_text - (uint64_t)end_time < AV_TIME_BASE))
         end_time = end_time_text;
-    } else if (end_time < end_time_text) {
+    else if (end_time < end_time_text)
         av_log(ic, AV_LOG_VERBOSE, "Ignoring outlier non primary stream endtime %f\n", end_time_text / (float)AV_TIME_BASE);
-    }
+
+     if (duration == INT64_MIN || (duration < duration_text && duration_text - duration < AV_TIME_BASE))
+         duration = duration_text;
+     else if (duration < duration_text)
+         av_log(ic, AV_LOG_VERBOSE, "Ignoring outlier non primary stream duration %f\n", duration_text / (float)AV_TIME_BASE);
 
     if (start_time != INT64_MAX) {
         ic->start_time = start_time;
@@ -2748,6 +2812,11 @@
         }
     }
 
+    if (ic->skip_estimate_duration_from_pts) {
+        av_log(ic, AV_LOG_INFO, "Skipping duration calculation in estimate_timings_from_pts\n");
+        goto skip_duration_calc;
+    }
+
     av_opt_set(ic, "skip_changes", "1", AV_OPT_SEARCH_CHILDREN);
     /* estimate the end time (duration) */
     /* XXX: may need to support wrapping */
@@ -2832,6 +2901,7 @@
             }
         }
     }
+skip_duration_calc:
     fill_all_stream_timings(ic);
 
     avio_seek(ic->pb, old_offset, SEEK_SET);
@@ -3215,23 +3285,20 @@
 
 int ff_alloc_extradata(AVCodecParameters *par, int size)
 {
-    int ret;
+    av_freep(&par->extradata);
+    par->extradata_size = 0;
 
-    if (size < 0 || size >= INT32_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
-        par->extradata = NULL;
-        par->extradata_size = 0;
+    if (size < 0 || size >= INT32_MAX - AV_INPUT_BUFFER_PADDING_SIZE)
         return AVERROR(EINVAL);
-    }
+
     par->extradata = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
-    if (par->extradata) {
-        memset(par->extradata + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
-        par->extradata_size = size;
-        ret = 0;
-    } else {
-        par->extradata_size = 0;
-        ret = AVERROR(ENOMEM);
-    }
-    return ret;
+    if (!par->extradata)
+        return AVERROR(ENOMEM);
+
+    memset(par->extradata + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+    par->extradata_size = size;
+
+    return 0;
 }
 
 int ff_get_extradata(AVFormatContext *s, AVCodecParameters *par, AVIOContext *pb, int size)
@@ -3394,7 +3461,7 @@
 
 static int extract_extradata_init(AVStream *st)
 {
-    AVStreamInternal *i = st->internal;
+    AVStreamInternal *sti = st->internal;
     const AVBitStreamFilter *f;
     int ret;
 
@@ -3407,70 +3474,66 @@
     if (!ret)
         goto finish;
 
-    i->extract_extradata.pkt = av_packet_alloc();
-    if (!i->extract_extradata.pkt)
+    sti->extract_extradata.pkt = av_packet_alloc();
+    if (!sti->extract_extradata.pkt)
         return AVERROR(ENOMEM);
 
-    ret = av_bsf_alloc(f, &i->extract_extradata.bsf);
+    ret = av_bsf_alloc(f, &sti->extract_extradata.bsf);
     if (ret < 0)
         goto fail;
 
-    ret = avcodec_parameters_copy(i->extract_extradata.bsf->par_in,
+    ret = avcodec_parameters_copy(sti->extract_extradata.bsf->par_in,
                                   st->codecpar);
     if (ret < 0)
         goto fail;
 
-    i->extract_extradata.bsf->time_base_in = st->time_base;
+    sti->extract_extradata.bsf->time_base_in = st->time_base;
 
-    /* if init fails here, we assume extracting extradata is just not
-     * supported for this codec, so we return success */
-    ret = av_bsf_init(i->extract_extradata.bsf);
-    if (ret < 0) {
-        av_bsf_free(&i->extract_extradata.bsf);
-        ret = 0;
-    }
+    ret = av_bsf_init(sti->extract_extradata.bsf);
+    if (ret < 0)
+        goto fail;
 
 finish:
-    i->extract_extradata.inited = 1;
+    sti->extract_extradata.inited = 1;
 
     return 0;
 fail:
-    av_bsf_free(&i->extract_extradata.bsf);
-    av_packet_free(&i->extract_extradata.pkt);
+    av_bsf_free(&sti->extract_extradata.bsf);
+    av_packet_free(&sti->extract_extradata.pkt);
     return ret;
 }
 
 static int extract_extradata(AVStream *st, AVPacket *pkt)
 {
-    AVStreamInternal *i = st->internal;
+    AVStreamInternal *sti = st->internal;
     AVPacket *pkt_ref;
     int ret;
 
-    if (!i->extract_extradata.inited) {
+    if (!sti->extract_extradata.inited) {
         ret = extract_extradata_init(st);
         if (ret < 0)
             return ret;
     }
 
-    if (i->extract_extradata.inited && !i->extract_extradata.bsf)
+    if (sti->extract_extradata.inited && !sti->extract_extradata.bsf)
         return 0;
 
-    pkt_ref = i->extract_extradata.pkt;
+    pkt_ref = sti->extract_extradata.pkt;
     ret = av_packet_ref(pkt_ref, pkt);
     if (ret < 0)
         return ret;
 
-    ret = av_bsf_send_packet(i->extract_extradata.bsf, pkt_ref);
+    ret = av_bsf_send_packet(sti->extract_extradata.bsf, pkt_ref);
     if (ret < 0) {
         av_packet_unref(pkt_ref);
         return ret;
     }
 
-    while (ret >= 0 && !i->avctx->extradata) {
+    while (ret >= 0 && !sti->avctx->extradata) {
         int extradata_size;
         uint8_t *extradata;
 
-        ret = av_bsf_receive_packet(i->extract_extradata.bsf, pkt_ref);
+        ret = av_bsf_receive_packet(sti->extract_extradata.bsf, pkt_ref);
         if (ret < 0) {
             if (ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
                 return ret;
@@ -3481,13 +3544,15 @@
                                             &extradata_size);
 
         if (extradata) {
-            i->avctx->extradata = av_mallocz(extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
-            if (!i->avctx->extradata) {
+            av_assert0(!sti->avctx->extradata);
+            if ((unsigned)extradata_size < FF_MAX_EXTRADATA_SIZE)
+                sti->avctx->extradata = av_mallocz(extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+            if (!sti->avctx->extradata) {
                 av_packet_unref(pkt_ref);
                 return AVERROR(ENOMEM);
             }
-            memcpy(i->avctx->extradata, extradata, extradata_size);
-            i->avctx->extradata_size = extradata_size;
+            memcpy(sti->avctx->extradata, extradata, extradata_size);
+            sti->avctx->extradata_size = extradata_size;
         }
         av_packet_unref(pkt_ref);
     }
@@ -3630,6 +3695,7 @@
         /* check if one codec still needs to be handled */
         for (i = 0; i < ic->nb_streams; i++) {
             int fps_analyze_framecount = 20;
+            int count;
 
             st = ic->streams[i];
             if (!has_codec_parameters(st, NULL))
@@ -3646,14 +3712,18 @@
             if (st->disposition & AV_DISPOSITION_ATTACHED_PIC)
                 fps_analyze_framecount = 0;
             /* variable fps and no guess at the real fps */
+            count = (ic->iformat->flags & AVFMT_NOTIMESTAMPS) ?
+                       st->info->codec_info_duration_fields/2 :
+                       st->info->duration_count;
             if (!(st->r_frame_rate.num && st->avg_frame_rate.num) &&
                 st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
-                int count = (ic->iformat->flags & AVFMT_NOTIMESTAMPS) ?
-                    st->info->codec_info_duration_fields/2 :
-                    st->info->duration_count;
                 if (count < fps_analyze_framecount)
                     break;
             }
+            // Look at the first 3 frames if there is evidence of frame delay
+            // but the decoder delay is not set.
+            if (st->info->frame_delay_evidence && count < 2 && st->internal->avctx->has_b_frames == 0)
+                break;
             if (!st->internal->avctx->extradata &&
                 (!st->internal->extract_extradata.inited ||
                  st->internal->extract_extradata.bsf) &&
@@ -3711,8 +3781,9 @@
         pkt = &pkt1;
 
         if (!(ic->flags & AVFMT_FLAG_NOBUFFER)) {
-            ret = add_to_pktbuf(&ic->internal->packet_buffer, pkt,
-                                &ic->internal->packet_buffer_end, 0);
+            ret = ff_packet_list_put(&ic->internal->packet_buffer,
+                                     &ic->internal->packet_buffer_end,
+                                     pkt, 0);
             if (ret < 0)
                 goto find_stream_info_err;
         }
@@ -3748,7 +3819,7 @@
             if (st->info->fps_last_dts != AV_NOPTS_VALUE &&
                 st->info->fps_last_dts_idx > st->info->fps_first_dts_idx &&
                 (pkt->dts - st->info->fps_last_dts) / 1000 >
-                (st->info->fps_last_dts     - st->info->fps_first_dts) /
+                (st->info->fps_last_dts     - (uint64_t)st->info->fps_first_dts) /
                 (st->info->fps_last_dts_idx - st->info->fps_first_dts_idx)) {
                 av_log(ic, AV_LOG_WARNING,
                        "DTS discontinuity in stream %d: packet %d with DTS "
@@ -3796,17 +3867,20 @@
                 break;
             }
             if (pkt->duration) {
-                if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE && pkt->pts != AV_NOPTS_VALUE && pkt->pts >= st->start_time) {
+                if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE && pkt->pts != AV_NOPTS_VALUE && st->start_time != AV_NOPTS_VALUE && pkt->pts >= st->start_time) {
                     st->info->codec_info_duration = FFMIN(pkt->pts - st->start_time, st->info->codec_info_duration + pkt->duration);
                 } else
                     st->info->codec_info_duration += pkt->duration;
                 st->info->codec_info_duration_fields += st->parser && st->need_parsing && avctx->ticks_per_frame ==2 ? st->parser->repeat_pict + 1 : 2;
             }
         }
+        if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
 #if FF_API_R_FRAME_RATE
-        if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
             ff_rfps_add_frame(ic, st, pkt->dts);
 #endif
+            if (pkt->dts != pkt->pts && pkt->dts != AV_NOPTS_VALUE && pkt->pts != AV_NOPTS_VALUE)
+                st->info->frame_delay_evidence = 1;
+        }
         if (!st->internal->avctx->extradata) {
             ret = extract_extradata(st, pkt);
             if (ret < 0)
@@ -4022,11 +4096,13 @@
             ret = avcodec_parameters_from_context(st->codecpar, st->internal->avctx);
             if (ret < 0)
                 goto find_stream_info_err;
+#if FF_API_LOWRES
             // The decoder might reduce the video size by the lowres factor.
-            if (av_codec_get_lowres(st->internal->avctx) && orig_w) {
+            if (st->internal->avctx->lowres && orig_w) {
                 st->codecpar->width = orig_w;
                 st->codecpar->height = orig_h;
             }
+#endif
         }
 
 #if FF_API_LAVF_AVCTX
@@ -4035,13 +4111,15 @@
         if (ret < 0)
             goto find_stream_info_err;
 
+#if FF_API_LOWRES
         // The old API (AVStream.codec) "requires" the resolution to be adjusted
         // by the lowres factor.
-        if (av_codec_get_lowres(st->internal->avctx) && st->internal->avctx->width) {
-            av_codec_set_lowres(st->codec, av_codec_get_lowres(st->internal->avctx));
+        if (st->internal->avctx->lowres && st->internal->avctx->width) {
+            st->codec->lowres = st->internal->avctx->lowres;
             st->codec->width = st->internal->avctx->width;
             st->codec->height = st->internal->avctx->height;
         }
+#endif
 
         if (st->codec->codec_tag != MKTAG('t','m','c','d')) {
             st->codec->time_base = st->internal->avctx->time_base;
@@ -4232,6 +4310,8 @@
         }
     }
 
+#if FF_API_LAVF_FFSERVER
+FF_DISABLE_DEPRECATION_WARNINGS
     av_freep(&dst->recommended_encoder_configuration);
     if (src->recommended_encoder_configuration) {
         const char *conf_str = src->recommended_encoder_configuration;
@@ -4239,6 +4319,8 @@
         if (!dst->recommended_encoder_configuration)
             return AVERROR(ENOMEM);
     }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     return 0;
 }
@@ -4267,6 +4349,7 @@
             av_bsf_free(&st->internal->bsfcs[i]);
             av_freep(&st->internal->bsfcs);
         }
+        av_freep(&st->internal->priv_pts);
         av_bsf_free(&st->internal->extract_extradata.bsf);
         av_packet_free(&st->internal->extract_extradata.pkt);
     }
@@ -4285,8 +4368,11 @@
     if (st->info)
         av_freep(&st->info->duration_error);
     av_freep(&st->info);
+#if FF_API_LAVF_FFSERVER
+FF_DISABLE_DEPRECATION_WARNINGS
     av_freep(&st->recommended_encoder_configuration);
-    av_freep(&st->priv_pts);
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     av_freep(pst);
 }
@@ -4333,6 +4419,7 @@
     av_freep(&s->streams);
     flush_packet_queue(s);
     av_freep(&s->internal);
+    av_freep(&s->url);
     av_free(s);
 }
 
@@ -4480,6 +4567,7 @@
             return NULL;
         dynarray_add(&ac->programs, &ac->nb_programs, program);
         program->discard = AVDISCARD_NONE;
+        program->pmt_version = -1;
     }
     program->id = id;
     program->pts_wrap_reference = AV_NOPTS_VALUE;
@@ -4554,6 +4642,28 @@
     return (av_gettime() / 1000) * 1000 + NTP_OFFSET_US;
 }
 
+uint64_t ff_get_formatted_ntp_time(uint64_t ntp_time_us)
+{
+    uint64_t ntp_ts, frac_part, sec;
+    uint32_t usec;
+
+    //current ntp time in seconds and micro seconds
+    sec = ntp_time_us / 1000000;
+    usec = ntp_time_us % 1000000;
+
+    //encoding in ntp timestamp format
+    frac_part = usec * 0xFFFFFFFFULL;
+    frac_part /= 1000000;
+
+    if (sec > 0xFFFFFFFFULL)
+        av_log(NULL, AV_LOG_WARNING, "NTP time format roll over detected\n");
+
+    ntp_ts = sec << 32;
+    ntp_ts |= frac_part;
+
+    return ntp_ts;
+}
+
 int av_get_frame_filename2(char *buf, int buf_size, const char *path, int number, int flags)
 {
     const char *p;
@@ -4685,6 +4795,40 @@
     }
 }
 
+int ff_mkdir_p(const char *path)
+{
+    int ret = 0;
+    char *temp = av_strdup(path);
+    char *pos = temp;
+    char tmp_ch = '\0';
+
+    if (!path || !temp) {
+        return -1;
+    }
+
+    if (!av_strncasecmp(temp, "/", 1) || !av_strncasecmp(temp, "\\", 1)) {
+        pos++;
+    } else if (!av_strncasecmp(temp, "./", 2) || !av_strncasecmp(temp, ".\\", 2)) {
+        pos += 2;
+    }
+
+    for ( ; *pos != '\0'; ++pos) {
+        if (*pos == '/' || *pos == '\\') {
+            tmp_ch = *pos;
+            *pos = '\0';
+            ret = mkdir(temp, 0755);
+            *pos = tmp_ch;
+        }
+    }
+
+    if ((*(pos - 1) != '/') || (*(pos - 1) != '\\')) {
+        ret = mkdir(temp, 0755);
+    }
+
+    av_free(temp);
+    return ret;
+}
+
 char *ff_data_to_hex(char *buff, const uint8_t *src, int s, int lowercase)
 {
     int i;
@@ -4757,10 +4901,10 @@
     s->time_base     = new_tb;
 #if FF_API_LAVF_AVCTX
 FF_DISABLE_DEPRECATION_WARNINGS
-    av_codec_set_pkt_timebase(s->codec, new_tb);
+    s->codec->pkt_timebase = new_tb;
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
-    av_codec_set_pkt_timebase(s->internal->avctx, new_tb);
+    s->internal->avctx->pkt_timebase = new_tb;
     s->pts_wrap_bits = pts_wrap_bits;
 }
 
@@ -4849,7 +4993,6 @@
 {
 #if CONFIG_NETWORK
     int ret;
-    ff_network_inited_globally = 1;
     if ((ret = ff_network_init()) < 0)
         return ret;
     if ((ret = ff_tls_init()) < 0)
@@ -4863,7 +5006,6 @@
 #if CONFIG_NETWORK
     ff_network_close();
     ff_tls_deinit();
-    ff_network_inited_globally = 0;
 #endif
     return 0;
 }
@@ -5016,11 +5158,94 @@
             if (s->programs[i]->id != prog_id)
                 continue;
 
-            if (*endptr++ == ':') {
-                int stream_idx = strtol(endptr, NULL, 0);
-                return stream_idx >= 0 &&
-                    stream_idx < s->programs[i]->nb_stream_indexes &&
-                    st->index == s->programs[i]->stream_index[stream_idx];
+            if (*endptr++ == ':') {  // p:<id>:....
+                if ( *endptr == 'a' || *endptr == 'v' ||
+                     *endptr == 's' || *endptr == 'd') {  // p:<id>:<st_type>[:<index>]
+                    enum AVMediaType type;
+
+                    switch (*endptr++) {
+                    case 'v': type = AVMEDIA_TYPE_VIDEO;      break;
+                    case 'a': type = AVMEDIA_TYPE_AUDIO;      break;
+                    case 's': type = AVMEDIA_TYPE_SUBTITLE;   break;
+                    case 'd': type = AVMEDIA_TYPE_DATA;       break;
+                    default:  av_assert0(0);
+                    }
+                    if (*endptr++ == ':') {  // p:<id>:<st_type>:<index>
+                        int stream_idx = strtol(endptr, NULL, 0), type_counter = 0;
+                        for (j = 0; j < s->programs[i]->nb_stream_indexes; j++) {
+                            int stream_index = s->programs[i]->stream_index[j];
+                            if (st->index == s->programs[i]->stream_index[j]) {
+#if FF_API_LAVF_AVCTX
+FF_DISABLE_DEPRECATION_WARNINGS
+                                return type_counter == stream_idx &&
+                                       (type == st->codecpar->codec_type ||
+                                        type == st->codec->codec_type);
+FF_ENABLE_DEPRECATION_WARNINGS
+#else
+                                return type_counter == stream_idx &&
+                                       type == st->codecpar->codec_type;
+#endif
+                             }
+#if FF_API_LAVF_AVCTX
+FF_DISABLE_DEPRECATION_WARNINGS
+                            if (type == s->streams[stream_index]->codecpar->codec_type ||
+                                type == s->streams[stream_index]->codec->codec_type)
+                                type_counter++;
+FF_ENABLE_DEPRECATION_WARNINGS
+#else
+                            if (type == s->streams[stream_index]->codecpar->codec_type)
+                                type_counter++;
+#endif
+                        }
+                        return 0;
+                    } else {  // p:<id>:<st_type>
+                        for (j = 0; j < s->programs[i]->nb_stream_indexes; j++)
+                            if (st->index == s->programs[i]->stream_index[j]) {
+#if FF_API_LAVF_AVCTX
+FF_DISABLE_DEPRECATION_WARNINGS
+                                 return type == st->codecpar->codec_type ||
+                                        type == st->codec->codec_type;
+FF_ENABLE_DEPRECATION_WARNINGS
+#else
+                                 return type == st->codecpar->codec_type;
+#endif
+                            }
+                        return 0;
+                    }
+
+                } else if ( *endptr == 'm') { // p:<id>:m:<metadata_spec>
+                    AVDictionaryEntry *tag;
+                    char *key, *val;
+                    int ret = 0;
+
+                    if (*(++endptr) != ':') {
+                        av_log(s, AV_LOG_ERROR, "Invalid stream specifier syntax, missing ':' sign after :m.\n");
+                        return AVERROR(EINVAL);
+                    }
+
+                    val = strchr(++endptr, ':');
+                    key = val ? av_strndup(endptr, val - endptr) : av_strdup(endptr);
+                    if (!key)
+                        return AVERROR(ENOMEM);
+
+                    for (j = 0; j < s->programs[i]->nb_stream_indexes; j++)
+                        if (st->index == s->programs[i]->stream_index[j]) {
+                            tag = av_dict_get(st->metadata, key, NULL, 0);
+                            if (tag && (!val || !strcmp(tag->value, val + 1)))
+                                ret = 1;
+
+                            break;
+                        }
+
+                    av_freep(&key);
+                    return ret;
+
+                } else {  // p:<id>:<index>
+                    int stream_idx = strtol(endptr, NULL, 0);
+                    return stream_idx >= 0 &&
+                           stream_idx < s->programs[i]->nb_stream_indexes &&
+                           st->index == s->programs[i]->stream_index[stream_idx];
+                }
             }
 
             for (j = 0; j < s->programs[i]->nb_stream_indexes; j++)
@@ -5247,13 +5472,8 @@
     return 0;
 }
 
-#if FF_API_NOCONST_GET_SIDE_DATA
-uint8_t *av_stream_get_side_data(AVStream *st,
-                                 enum AVPacketSideDataType type, int *size)
-#else
 uint8_t *av_stream_get_side_data(const AVStream *st,
                                  enum AVPacketSideDataType type, int *size)
-#endif
 {
     int i;
 
@@ -5453,6 +5673,11 @@
     *pb = NULL;
 }
 
+int ff_is_http_proto(char *filename) {
+    const char *proto = avio_find_protocol_name(filename);
+    return proto ? (!av_strcasecmp(proto, "http") || !av_strcasecmp(proto, "https")) : 0;
+}
+
 int ff_parse_creation_time_metadata(AVFormatContext *s, int64_t *timestamp, int return_seconds)
 {
     AVDictionaryEntry *entry;
@@ -5600,3 +5825,15 @@
     return st->internal->avctx->time_base;
 #endif
 }
+
+void ff_format_set_url(AVFormatContext *s, char *url)
+{
+    av_assert0(url);
+    av_freep(&s->url);
+    s->url = url;
+#if FF_API_FORMAT_FILENAME
+FF_DISABLE_DEPRECATION_WARNINGS
+    av_strlcpy(s->filename, url, sizeof(s->filename));
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+}

diff --git a/libavformat/vapoursynth.c b/libavformat/vapoursynth.c
new file mode 100644
index 0000000..f3ad691
--- /dev/null
+++ b/libavformat/vapoursynth.c

@@ -0,0 +1,496 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+* @file
+* VapourSynth demuxer
+*
+* Synthesizes vapour (?)
+*/
+
+#include <limits.h>
+
+#include <VapourSynth.h>
+#include <VSScript.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/eval.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avformat.h"
+#include "internal.h"
+
+struct VSState {
+    VSScript *vss;
+};
+
+typedef struct VSContext {
+    const AVClass *class;
+
+    AVBufferRef *vss_state;
+
+    const VSAPI *vsapi;
+    VSCore *vscore;
+
+    VSNodeRef *outnode;
+    int is_cfr;
+    int current_frame;
+
+    int c_order[4];
+
+    /* options */
+    int64_t max_script_size;
+} VSContext;
+
+#define OFFSET(x) offsetof(VSContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM
+#define D AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+    {"max_script_size",    "set max file size supported (in bytes)", OFFSET(max_script_size),    AV_OPT_TYPE_INT64, {.i64 = 1 * 1024 * 1024}, 0,    SIZE_MAX - 1, A|D},
+    {NULL}
+};
+
+static void free_vss_state(void *opaque, uint8_t *data)
+{
+    struct VSState *vss = opaque;
+
+    if (vss->vss) {
+        vsscript_freeScript(vss->vss);
+        vsscript_finalize();
+    }
+}
+
+static av_cold int read_close_vs(AVFormatContext *s)
+{
+    VSContext *vs = s->priv_data;
+
+    if (vs->outnode)
+        vs->vsapi->freeNode(vs->outnode);
+
+    av_buffer_unref(&vs->vss_state);
+
+    vs->vsapi = NULL;
+    vs->vscore = NULL;
+    vs->outnode = NULL;
+
+    return 0;
+}
+
+static av_cold int is_native_endian(enum AVPixelFormat pixfmt)
+{
+    enum AVPixelFormat other = av_pix_fmt_swap_endianness(pixfmt);
+    const AVPixFmtDescriptor *pd;
+    if (other == AV_PIX_FMT_NONE || other == pixfmt)
+        return 1; // not affected by byte order
+    pd = av_pix_fmt_desc_get(pixfmt);
+    return pd && (!!HAVE_BIGENDIAN == !!(pd->flags & AV_PIX_FMT_FLAG_BE));
+}
+
+static av_cold enum AVPixelFormat match_pixfmt(const VSFormat *vsf, int c_order[4])
+{
+    static const int yuv_order[4] = {0, 1, 2, 0};
+    static const int rgb_order[4] = {1, 2, 0, 0};
+    const AVPixFmtDescriptor *pd;
+
+    for (pd = av_pix_fmt_desc_next(NULL); pd; pd = av_pix_fmt_desc_next(pd)) {
+        int is_rgb, is_yuv, i;
+        const int *order;
+        enum AVPixelFormat pixfmt;
+
+        pixfmt = av_pix_fmt_desc_get_id(pd);
+
+        if (pd->flags & (AV_PIX_FMT_FLAG_BAYER | AV_PIX_FMT_FLAG_ALPHA |
+                         AV_PIX_FMT_FLAG_HWACCEL | AV_PIX_FMT_FLAG_BITSTREAM))
+            continue;
+
+        if (pd->log2_chroma_w != vsf->subSamplingW ||
+            pd->log2_chroma_h != vsf->subSamplingH)
+            continue;
+
+        is_rgb = vsf->colorFamily == cmRGB;
+        if (is_rgb != !!(pd->flags & AV_PIX_FMT_FLAG_RGB))
+            continue;
+
+        is_yuv = vsf->colorFamily == cmYUV ||
+                 vsf->colorFamily == cmYCoCg ||
+                 vsf->colorFamily == cmGray;
+        if (!is_rgb && !is_yuv)
+            continue;
+
+        if (vsf->sampleType != ((pd->flags & AV_PIX_FMT_FLAG_FLOAT) ? stFloat : stInteger))
+            continue;
+
+        if (av_pix_fmt_count_planes(pixfmt) != vsf->numPlanes)
+            continue;
+
+        if (strncmp(pd->name, "xyz", 3) == 0)
+            continue;
+
+        if (!is_native_endian(pixfmt))
+            continue;
+
+        order = is_yuv ? yuv_order : rgb_order;
+
+        for (i = 0; i < pd->nb_components; i++) {
+            const AVComponentDescriptor *c = &pd->comp[i];
+            if (order[c->plane] != i ||
+                c->offset != 0 || c->shift != 0 ||
+                c->step != vsf->bytesPerSample ||
+                c->depth != vsf->bitsPerSample)
+                goto cont;
+        }
+
+        // Use it.
+        memcpy(c_order, order, sizeof(int[4]));
+        return pixfmt;
+
+    cont: ;
+    }
+
+    return AV_PIX_FMT_NONE;
+}
+
+static av_cold int read_header_vs(AVFormatContext *s)
+{
+    AVStream *st;
+    AVIOContext *pb = s->pb;
+    VSContext *vs = s->priv_data;
+    int64_t sz = avio_size(pb);
+    char *buf = NULL;
+    char dummy;
+    const VSVideoInfo *info;
+    struct VSState *vss_state;
+    int err;
+
+    vss_state = av_mallocz(sizeof(*vss_state));
+    if (!vss_state) {
+        err = AVERROR(ENOMEM);
+        goto done;
+    }
+
+    vs->vss_state = av_buffer_create(NULL, 0, free_vss_state, vss_state, 0);
+    if (!vs->vss_state) {
+        err = AVERROR(ENOMEM);
+        av_free(vss_state);
+        goto done;
+    }
+
+    if (!vsscript_init()) {
+        av_log(s, AV_LOG_ERROR, "Failed to initialize VSScript (possibly PYTHONPATH not set).\n");
+        err = AVERROR_EXTERNAL;
+        goto done;
+    }
+
+    if (vsscript_createScript(&vss_state->vss)) {
+        av_log(s, AV_LOG_ERROR, "Failed to create script instance.\n");
+        err = AVERROR_EXTERNAL;
+        vsscript_finalize();
+        goto done;
+    }
+
+    if (sz < 0 || sz > vs->max_script_size) {
+        if (sz < 0)
+            av_log(s, AV_LOG_WARNING, "Could not determine file size\n");
+        sz = vs->max_script_size;
+    }
+
+    buf = av_malloc(sz + 1);
+    if (!buf) {
+        err = AVERROR(ENOMEM);
+        goto done;
+    }
+    sz = avio_read(pb, buf, sz);
+
+    if (sz < 0) {
+        av_log(s, AV_LOG_ERROR, "Could not read script.\n");
+        err = sz;
+        goto done;
+    }
+
+    // Data left means our buffer (the max_script_size option) is too small
+    if (avio_read(pb, &dummy, 1) == 1) {
+        av_log(s, AV_LOG_ERROR, "File size is larger than max_script_size option "
+               "value %"PRIi64", consider increasing the max_script_size option\n",
+               vs->max_script_size);
+        err = AVERROR_BUFFER_TOO_SMALL;
+        goto done;
+    }
+
+    buf[sz] = '\0';
+    if (vsscript_evaluateScript(&vss_state->vss, buf, s->url, 0)) {
+        const char *msg = vsscript_getError(vss_state->vss);
+        av_log(s, AV_LOG_ERROR, "Failed to parse script: %s\n", msg ? msg : "(unknown)");
+        err = AVERROR_EXTERNAL;
+        goto done;
+    }
+
+    vs->vsapi = vsscript_getVSApi();
+    vs->vscore = vsscript_getCore(vss_state->vss);
+
+    vs->outnode = vsscript_getOutput(vss_state->vss, 0);
+    if (!vs->outnode) {
+        av_log(s, AV_LOG_ERROR, "Could not get script output node.\n");
+        err = AVERROR_EXTERNAL;
+        goto done;
+    }
+
+    st = avformat_new_stream(s, NULL);
+    if (!st) {
+        err = AVERROR(ENOMEM);
+        goto done;
+    }
+
+    info = vs->vsapi->getVideoInfo(vs->outnode);
+
+    if (!info->format || !info->width || !info->height) {
+        av_log(s, AV_LOG_ERROR, "Non-constant input format not supported.\n");
+        err = AVERROR_PATCHWELCOME;
+        goto done;
+    }
+
+    if (info->fpsDen) {
+        vs->is_cfr = 1;
+        avpriv_set_pts_info(st, 64, info->fpsDen, info->fpsNum);
+        st->duration = info->numFrames;
+    } else {
+        // VFR. Just set "something".
+        avpriv_set_pts_info(st, 64, 1, AV_TIME_BASE);
+        s->ctx_flags |= AVFMTCTX_UNSEEKABLE;
+    }
+
+    st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
+    st->codecpar->codec_id = AV_CODEC_ID_WRAPPED_AVFRAME;
+    st->codecpar->width = info->width;
+    st->codecpar->height = info->height;
+    st->codecpar->format = match_pixfmt(info->format, vs->c_order);
+
+    if (st->codecpar->format == AV_PIX_FMT_NONE) {
+        av_log(s, AV_LOG_ERROR, "Unsupported VS pixel format %s\n", info->format->name);
+        err = AVERROR_EXTERNAL;
+        goto done;
+    }
+    av_log(s, AV_LOG_VERBOSE, "VS format %s -> pixfmt %s\n", info->format->name,
+           av_get_pix_fmt_name(st->codecpar->format));
+
+    if (info->format->colorFamily == cmYCoCg)
+        st->codecpar->color_space = AVCOL_SPC_YCGCO;
+
+done:
+    av_free(buf);
+    if (err < 0)
+        read_close_vs(s);
+    return err;
+}
+
+static void free_frame(void *opaque, uint8_t *data)
+{
+    AVFrame *frame = (AVFrame *)data;
+
+    av_frame_free(&frame);
+}
+
+static int get_vs_prop_int(AVFormatContext *s, const VSMap *map, const char *name, int def)
+{
+    VSContext *vs = s->priv_data;
+    int64_t res;
+    int err = 1;
+
+    res = vs->vsapi->propGetInt(map, name, 0, &err);
+    return err || res < INT_MIN || res > INT_MAX ? def : res;
+}
+
+struct vsframe_ref_data {
+    const VSAPI *vsapi;
+    const VSFrameRef *frame;
+    AVBufferRef *vss_state;
+};
+
+static void free_vsframe_ref(void *opaque, uint8_t *data)
+{
+    struct vsframe_ref_data *d = opaque;
+
+    if (d->frame)
+        d->vsapi->freeFrame(d->frame);
+
+    av_buffer_unref(&d->vss_state);
+
+    av_free(d);
+}
+
+static int read_packet_vs(AVFormatContext *s, AVPacket *pkt)
+{
+    VSContext *vs = s->priv_data;
+    AVStream *st = s->streams[0];
+    AVFrame *frame = NULL;
+    char vserr[80];
+    const VSFrameRef *vsframe;
+    const VSVideoInfo *info = vs->vsapi->getVideoInfo(vs->outnode);
+    const VSMap *props;
+    const AVPixFmtDescriptor *desc;
+    AVBufferRef *vsframe_ref = NULL;
+    struct vsframe_ref_data *ref_data;
+    int err = 0;
+    int i;
+
+    if (vs->current_frame >= info->numFrames)
+        return AVERROR_EOF;
+
+    ref_data = av_mallocz(sizeof(*ref_data));
+    if (!ref_data) {
+        err = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    // (the READONLY flag is important because the ref is reused for plane data)
+    vsframe_ref = av_buffer_create(NULL, 0, free_vsframe_ref, ref_data, AV_BUFFER_FLAG_READONLY);
+    if (!vsframe_ref) {
+        err = AVERROR(ENOMEM);
+        av_free(ref_data);
+        goto end;
+    }
+
+    vsframe = vs->vsapi->getFrame(vs->current_frame, vs->outnode, vserr, sizeof(vserr));
+    if (!vsframe) {
+        av_log(s, AV_LOG_ERROR, "Error getting frame: %s\n", vserr);
+        err = AVERROR_EXTERNAL;
+        goto end;
+    }
+
+    ref_data->vsapi = vs->vsapi;
+    ref_data->frame = vsframe;
+
+    ref_data->vss_state = av_buffer_ref(vs->vss_state);
+    if (!ref_data->vss_state) {
+        err = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    props = vs->vsapi->getFramePropsRO(vsframe);
+
+    frame = av_frame_alloc();
+    if (!frame) {
+        err = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    frame->format       = st->codecpar->format;
+    frame->width        = st->codecpar->width;
+    frame->height       = st->codecpar->height;
+    frame->colorspace   = st->codecpar->color_space;
+
+    // Values according to ISO/IEC 14496-10.
+    frame->colorspace       = get_vs_prop_int(s, props, "_Matrix",      frame->colorspace);
+    frame->color_primaries  = get_vs_prop_int(s, props, "_Primaries",   frame->color_primaries);
+    frame->color_trc        = get_vs_prop_int(s, props, "_Transfer",    frame->color_trc);
+
+    if (get_vs_prop_int(s, props, "_ColorRange", 1) == 0)
+        frame->color_range = AVCOL_RANGE_JPEG;
+
+    frame->sample_aspect_ratio.num = get_vs_prop_int(s, props, "_SARNum", 0);
+    frame->sample_aspect_ratio.den = get_vs_prop_int(s, props, "_SARDen", 1);
+
+    av_assert0(vs->vsapi->getFrameWidth(vsframe, 0) == frame->width);
+    av_assert0(vs->vsapi->getFrameHeight(vsframe, 0) == frame->height);
+
+    desc = av_pix_fmt_desc_get(frame->format);
+
+    for (i = 0; i < info->format->numPlanes; i++) {
+        int p = vs->c_order[i];
+        ptrdiff_t plane_h = frame->height;
+
+        frame->data[i] = (void *)vs->vsapi->getReadPtr(vsframe, p);
+        frame->linesize[i] = vs->vsapi->getStride(vsframe, p);
+
+        frame->buf[i] = av_buffer_ref(vsframe_ref);
+        if (!frame->buf[i]) {
+            err = AVERROR(ENOMEM);
+            goto end;
+        }
+
+        // Each plane needs an AVBufferRef that indicates the correct plane
+        // memory range. VapourSynth doesn't even give us the memory range,
+        // so make up a bad guess to make FFmpeg happy (even if almost nothing
+        // checks the memory range).
+        if (i == 1 || i == 2)
+            plane_h = AV_CEIL_RSHIFT(plane_h, desc->log2_chroma_h);
+        frame->buf[i]->data = frame->data[i];
+        frame->buf[i]->size = frame->linesize[i] * plane_h;
+    }
+
+    pkt->buf = av_buffer_create((uint8_t*)frame, sizeof(*frame),
+                                free_frame, NULL, 0);
+    if (!pkt->buf) {
+        err = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    frame = NULL; // pkt owns it now
+
+    pkt->data   = pkt->buf->data;
+    pkt->size   = pkt->buf->size;
+    pkt->flags |= AV_PKT_FLAG_TRUSTED;
+
+    if (vs->is_cfr)
+        pkt->pts = vs->current_frame;
+
+    vs->current_frame++;
+
+end:
+    av_frame_free(&frame);
+    av_buffer_unref(&vsframe_ref);
+    return err;
+}
+
+static int read_seek_vs(AVFormatContext *s, int stream_idx, int64_t ts, int flags)
+{
+    VSContext *vs = s->priv_data;
+
+    if (!vs->is_cfr)
+        return AVERROR(ENOSYS);
+
+    vs->current_frame = FFMIN(FFMAX(0, ts), s->streams[0]->duration);
+    return 0;
+}
+
+static av_cold int probe_vs(AVProbeData *p)
+{
+    // Explicitly do not support this. VS scripts are written in Python, and
+    // can run arbitrary code on the user's system.
+    return 0;
+}
+
+static const AVClass class_vs = {
+    .class_name = "VapourSynth demuxer",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVInputFormat ff_vapoursynth_demuxer = {
+    .name           = "vapoursynth",
+    .long_name      = NULL_IF_CONFIG_SMALL("VapourSynth demuxer"),
+    .priv_data_size = sizeof(VSContext),
+    .read_probe     = probe_vs,
+    .read_header    = read_header_vs,
+    .read_packet    = read_packet_vs,
+    .read_close     = read_close_vs,
+    .read_seek      = read_seek_vs,
+    .priv_class     = &class_vs,
+};

diff --git a/libavformat/version.h b/libavformat/version.h
index 878917d..e4a6907 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h

@@ -31,8 +31,8 @@
 
 // Major bumping may affect Ticket5467, 5421, 5451(compatibility with Chromium)
 // Also please add any ticket numbers that you believe might be affected here
-#define LIBAVFORMAT_VERSION_MAJOR  57
-#define LIBAVFORMAT_VERSION_MINOR  83
+#define LIBAVFORMAT_VERSION_MAJOR  58
+#define LIBAVFORMAT_VERSION_MINOR  19
 #define LIBAVFORMAT_VERSION_MICRO 100
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
@@ -55,47 +55,53 @@
  * at once through the bump. This improves the git bisect-ability of the change.
  *
  */
-#ifndef FF_API_LAVF_BITEXACT
-#define FF_API_LAVF_BITEXACT            (LIBAVFORMAT_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_LAVF_FRAC
-#define FF_API_LAVF_FRAC                (LIBAVFORMAT_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_LAVF_CODEC_TB
-#define FF_API_LAVF_CODEC_TB            (LIBAVFORMAT_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_URL_FEOF
-#define FF_API_URL_FEOF                 (LIBAVFORMAT_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_LAVF_FMT_RAWPICTURE
-#define FF_API_LAVF_FMT_RAWPICTURE      (LIBAVFORMAT_VERSION_MAJOR < 58)
-#endif
 #ifndef FF_API_COMPUTE_PKT_FIELDS2
-#define FF_API_COMPUTE_PKT_FIELDS2      (LIBAVFORMAT_VERSION_MAJOR < 58)
+#define FF_API_COMPUTE_PKT_FIELDS2      (LIBAVFORMAT_VERSION_MAJOR < 59)
 #endif
 #ifndef FF_API_OLD_OPEN_CALLBACKS
-#define FF_API_OLD_OPEN_CALLBACKS       (LIBAVFORMAT_VERSION_MAJOR < 58)
+#define FF_API_OLD_OPEN_CALLBACKS       (LIBAVFORMAT_VERSION_MAJOR < 59)
 #endif
 #ifndef FF_API_LAVF_AVCTX
-#define FF_API_LAVF_AVCTX               (LIBAVFORMAT_VERSION_MAJOR < 58)
-#endif
-#ifndef FF_API_NOCONST_GET_SIDE_DATA
-#define FF_API_NOCONST_GET_SIDE_DATA    (LIBAVFORMAT_VERSION_MAJOR < 58)
+#define FF_API_LAVF_AVCTX               (LIBAVFORMAT_VERSION_MAJOR < 59)
 #endif
 #ifndef FF_API_HTTP_USER_AGENT
-#define FF_API_HTTP_USER_AGENT          (LIBAVFORMAT_VERSION_MAJOR < 58)
+#define FF_API_HTTP_USER_AGENT          (LIBAVFORMAT_VERSION_MAJOR < 59)
 #endif
 #ifndef FF_API_HLS_WRAP
-#define FF_API_HLS_WRAP                 (LIBAVFORMAT_VERSION_MAJOR < 58)
+#define FF_API_HLS_WRAP                 (LIBAVFORMAT_VERSION_MAJOR < 59)
 #endif
-#ifndef FF_API_LAVF_MERGE_SD
-#define FF_API_LAVF_MERGE_SD            (LIBAVFORMAT_VERSION_MAJOR < 58)
+#ifndef FF_API_HLS_USE_LOCALTIME
+#define FF_API_HLS_USE_LOCALTIME        (LIBAVFORMAT_VERSION_MAJOR < 59)
 #endif
 #ifndef FF_API_LAVF_KEEPSIDE_FLAG
-#define FF_API_LAVF_KEEPSIDE_FLAG       (LIBAVFORMAT_VERSION_MAJOR < 58)
+#define FF_API_LAVF_KEEPSIDE_FLAG       (LIBAVFORMAT_VERSION_MAJOR < 59)
 #endif
 #ifndef FF_API_OLD_ROTATE_API
-#define FF_API_OLD_ROTATE_API           (LIBAVFORMAT_VERSION_MAJOR < 58)
+#define FF_API_OLD_ROTATE_API           (LIBAVFORMAT_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_FORMAT_GET_SET
+#define FF_API_FORMAT_GET_SET           (LIBAVFORMAT_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_OLD_AVIO_EOF_0
+#define FF_API_OLD_AVIO_EOF_0           (LIBAVFORMAT_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_LAVF_FFSERVER
+#define FF_API_LAVF_FFSERVER            (LIBAVFORMAT_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_FORMAT_FILENAME
+#define FF_API_FORMAT_FILENAME          (LIBAVFORMAT_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_OLD_RTSP_OPTIONS
+#define FF_API_OLD_RTSP_OPTIONS         (LIBAVFORMAT_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_NEXT
+#define FF_API_NEXT                     (LIBAVFORMAT_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_DASH_MIN_SEG_DURATION
+#define FF_API_DASH_MIN_SEG_DURATION    (LIBAVFORMAT_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_LAVF_MP4A_LATM
+#define FF_API_LAVF_MP4A_LATM           (LIBAVFORMAT_VERSION_MAJOR < 59)
 #endif
 
 

diff --git a/libavformat/vpcc.c b/libavformat/vpcc.c
index df08de5..e0b7f28 100644
--- a/libavformat/vpcc.c
+++ b/libavformat/vpcc.c

@@ -67,11 +67,58 @@
     return color_range == AVCOL_RANGE_JPEG;
 }
 
-int ff_isom_write_vpcc(AVFormatContext *s, AVIOContext *pb,
-                       AVCodecParameters *par)
+// Find approximate VP9 level based on the Luma's Sample rate and Picture size.
+static int get_vp9_level(AVCodecParameters *par, AVRational *frame_rate) {
+    int picture_size = par->width * par->height;
+    int64_t sample_rate;
+
+    // All decisions will be based on picture_size, if frame rate is missing/invalid
+    if (!frame_rate || !frame_rate->den)
+        sample_rate = 0;
+    else
+        sample_rate = ((int64_t)picture_size * frame_rate->num) / frame_rate->den;
+
+    if (picture_size <= 0) {
+        return 0;
+    } else if (sample_rate <= 829440     && picture_size <= 36864) {
+        return 10;
+    } else if (sample_rate <= 2764800    && picture_size <= 73728) {
+        return 11;
+    } else if (sample_rate <= 4608000    && picture_size <= 122880) {
+        return 20;
+    } else if (sample_rate <= 9216000    && picture_size <= 245760) {
+        return 21;
+    } else if (sample_rate <= 20736000   && picture_size <= 552960) {
+        return 30;
+    } else if (sample_rate <= 36864000   && picture_size <= 983040) {
+        return 31;
+    } else if (sample_rate <= 83558400   && picture_size <= 2228224) {
+        return 40;
+    } else if (sample_rate <= 160432128  && picture_size <= 2228224) {
+        return 41;
+    } else if (sample_rate <= 311951360  && picture_size <= 8912896) {
+        return 50;
+    } else if (sample_rate <= 588251136  && picture_size <= 8912896) {
+        return 51;
+    } else if (sample_rate <= 1176502272 && picture_size <= 8912896) {
+        return 52;
+    } else if (sample_rate <= 1176502272 && picture_size <= 35651584) {
+        return 60;
+    } else if (sample_rate <= 2353004544 && picture_size <= 35651584) {
+        return 61;
+    } else if (sample_rate <= 4706009088 && picture_size <= 35651584) {
+        return 62;
+    } else {
+        return 0;
+    }
+}
+
+int ff_isom_get_vpcc_features(AVFormatContext *s, AVCodecParameters *par,
+                              AVRational *frame_rate, VPCC *vpcc)
 {
     int profile = par->profile;
-    int level = par->level == FF_LEVEL_UNKNOWN ? 0 : par->level;
+    int level = par->level == FF_LEVEL_UNKNOWN ?
+        get_vp9_level(par, frame_rate) : par->level;
     int bit_depth = get_bit_depth(s, par->format);
     int vpx_chroma_subsampling =
         get_vpx_chroma_subsampling(s, par->format, par->chroma_location);
@@ -90,9 +137,28 @@
         }
     }
 
-    avio_w8(pb, profile);
-    avio_w8(pb, level);
-    avio_w8(pb, (bit_depth << 4) | (vpx_chroma_subsampling << 1) | vpx_video_full_range_flag);
+    vpcc->profile            = profile;
+    vpcc->level              = level;
+    vpcc->bitdepth           = bit_depth;
+    vpcc->chroma_subsampling = vpx_chroma_subsampling;
+    vpcc->full_range_flag    = vpx_video_full_range_flag;
+
+    return 0;
+}
+
+int ff_isom_write_vpcc(AVFormatContext *s, AVIOContext *pb,
+                       AVCodecParameters *par)
+{
+    VPCC vpcc;
+    int ret;
+
+    ret = ff_isom_get_vpcc_features(s, par, NULL, &vpcc);
+    if (ret < 0)
+        return ret;
+
+    avio_w8(pb, vpcc.profile);
+    avio_w8(pb, vpcc.level);
+    avio_w8(pb, (vpcc.bitdepth << 4) | (vpcc.chroma_subsampling << 1) | vpcc.full_range_flag);
     avio_w8(pb, par->color_primaries);
     avio_w8(pb, par->color_trc);
     avio_w8(pb, par->color_space);

diff --git a/libavformat/vpcc.h b/libavformat/vpcc.h
index 184e857..e87bec5 100644
--- a/libavformat/vpcc.h
+++ b/libavformat/vpcc.h

@@ -32,6 +32,14 @@
 #include "avformat.h"
 #include "libavcodec/avcodec.h"
 
+typedef struct VPCC {
+    int profile;
+    int level;
+    int bitdepth;
+    int chroma_subsampling;
+    int full_range_flag;
+} VPCC;
+
 /**
  * Writes VP codec configuration to the provided AVIOContext.
  *
@@ -44,4 +52,7 @@
 int ff_isom_write_vpcc(AVFormatContext *s, AVIOContext *pb,
                        AVCodecParameters *par);
 
+int ff_isom_get_vpcc_features(AVFormatContext *s, AVCodecParameters *par,
+                              AVRational *frame_rate, VPCC *vpcc);
+
 #endif /* AVFORMAT_VPCC_H */

diff --git a/libavformat/wavdec.c b/libavformat/wavdec.c
index b016185..e280be4 100644
--- a/libavformat/wavdec.c
+++ b/libavformat/wavdec.c

@@ -822,6 +822,7 @@
             samples = avio_rl64(pb);
             if (samples > 0)
                 st->duration = samples;
+            avio_skip(pb, FFALIGN(size, INT64_C(8)) - 32);
         } else if (!memcmp(guid, ff_w64_guid_data, 16)) {
             wav->data_end = avio_tell(pb) + size - 24;
 

diff --git a/libavformat/wavenc.c b/libavformat/wavenc.c
index adb20cb..159119d 100644
--- a/libavformat/wavenc.c
+++ b/libavformat/wavenc.c

@@ -74,8 +74,6 @@
     uint32_t peak_num_frames;
     uint32_t peak_outbuf_size;
     uint32_t peak_outbuf_bytes;
-    uint32_t peak_pos_pop;
-    uint16_t peak_pop;
     uint8_t *peak_output;
     int last_duration;
     int write_bext;
@@ -195,7 +193,6 @@
 {
     WAVMuxContext *wav = s->priv_data;
     AVCodecParameters *par = s->streams[0]->codecpar;
-    int peak_of_peaks;
     int c;
 
     if (!wav->peak_output)
@@ -213,12 +210,6 @@
             wav->peak_maxpos[c] =
                 FFMAX(wav->peak_maxpos[c], wav->peak_maxneg[c]);
 
-        peak_of_peaks = FFMAX3(wav->peak_maxpos[c], wav->peak_maxneg[c],
-                               wav->peak_pop);
-        if (peak_of_peaks > wav->peak_pop)
-            wav->peak_pos_pop = wav->peak_num_frames;
-        wav->peak_pop = peak_of_peaks;
-
         if (wav->peak_outbuf_size - wav->peak_outbuf_bytes <
             wav->peak_format * wav->peak_ppv) {
             wav->peak_outbuf_size += PEAK_BUFFER_SIZE;
@@ -287,7 +278,7 @@
     avio_wl32(pb, wav->peak_block_size);        /* frames per value */
     avio_wl32(pb, par->channels);               /* number of channels */
     avio_wl32(pb, wav->peak_num_frames);        /* number of peak frames */
-    avio_wl32(pb, wav->peak_pos_pop);           /* audio sample frame index */
+    avio_wl32(pb, -1);                          /* audio sample frame position (not implemented) */
     avio_wl32(pb, 128);                         /* equal to size of header */
     avio_write(pb, timestamp, 28);              /* ASCII time stamp */
     ffio_fill(pb, 0, 60);

diff --git a/libavformat/webm_chunk.c b/libavformat/webm_chunk.c
index f8dbaa3..7ceb276 100644
--- a/libavformat/webm_chunk.c
+++ b/libavformat/webm_chunk.c

@@ -99,8 +99,8 @@
         av_strlcpy(filename, wc->header_filename, strlen(wc->header_filename) + 1);
     } else {
         if (av_get_frame_filename(filename, MAX_FILENAME_SIZE,
-                                  s->filename, wc->chunk_index - 1) < 0) {
-            av_log(oc, AV_LOG_ERROR, "Invalid chunk filename template '%s'\n", s->filename);
+                                  s->url, wc->chunk_index - 1) < 0) {
+            av_log(oc, AV_LOG_ERROR, "Invalid chunk filename template '%s'\n", s->url);
             return AVERROR(EINVAL);
         }
     }
@@ -114,12 +114,14 @@
     int ret;
     int i;
     AVDictionary *options = NULL;
+    char oc_filename[MAX_FILENAME_SIZE];
+    char *oc_url;
 
     // DASH Streams can only have either one track per file.
     if (s->nb_streams != 1) { return AVERROR_INVALIDDATA; }
 
     wc->chunk_index = wc->chunk_start_index;
-    wc->oformat = av_guess_format("webm", s->filename, "video/webm");
+    wc->oformat = av_guess_format("webm", s->url, "video/webm");
     if (!wc->oformat)
         return AVERROR_MUXER_NOT_FOUND;
 
@@ -127,12 +129,16 @@
     if (ret < 0)
         return ret;
     oc = wc->avf;
-    ret = get_chunk_filename(s, 1, oc->filename);
+    ret = get_chunk_filename(s, 1, oc_filename);
     if (ret < 0)
         return ret;
+    oc_url = av_strdup(oc_filename);
+    if (!oc_url)
+        return AVERROR(ENOMEM);
+    ff_format_set_url(oc, oc_url);
     if (wc->http_method)
         av_dict_set(&options, "method", wc->http_method, 0);
-    ret = s->io_open(s, &oc->pb, oc->filename, AVIO_FLAG_WRITE, &options);
+    ret = s->io_open(s, &oc->pb, oc->url, AVIO_FLAG_WRITE, &options);
     av_dict_free(&options);
     if (ret < 0)
         return ret;

diff --git a/libavformat/webvttenc.c b/libavformat/webvttenc.c
index 4827de0..61b7f54 100644
--- a/libavformat/webvttenc.c
+++ b/libavformat/webvttenc.c

@@ -38,7 +38,7 @@
     min -= 60 * hour;
 
     if (hour > 0)
-        avio_printf(pb, "%"PRId64":", hour);
+        avio_printf(pb, "%02"PRId64":", hour);
 
     avio_printf(pb, "%02"PRId64":%02"PRId64".%03"PRId64"", min, sec, millisec);
 }

diff --git a/libavformat/wtvdec.c b/libavformat/wtvdec.c
index 27be5c9..301163b 100644
--- a/libavformat/wtvdec.c
+++ b/libavformat/wtvdec.c

@@ -65,7 +65,7 @@
 }
 
 /**
- * @return bytes read, 0 on end of file, or <0 on error
+ * @return bytes read, AVERROR_EOF on end of file, or <0 on error
  */
 static int wtvfile_read_packet(void *opaque, uint8_t *buf, int buf_size)
 {
@@ -76,7 +76,7 @@
     if (wf->error || pb->error)
         return -1;
     if (wf->position >= wf->length || avio_feof(pb))
-        return 0;
+        return AVERROR_EOF;
 
     buf_size = FFMIN(buf_size, wf->length - wf->position);
     while(nread < buf_size) {

diff --git a/libavformat/yuv4mpegdec.c b/libavformat/yuv4mpegdec.c
index 462b823..855fadb 100644
--- a/libavformat/yuv4mpegdec.c
+++ b/libavformat/yuv4mpegdec.c

@@ -41,6 +41,7 @@
     enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE, alt_pix_fmt = AV_PIX_FMT_NONE;
     enum AVChromaLocation chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
     enum AVFieldOrder field_order = AV_FIELD_UNKNOWN;
+    enum AVColorRange color_range = AVCOL_RANGE_UNSPECIFIED;
     AVStream *st;
 
     for (i = 0; i < MAX_YUV4_HEADER; i++) {
@@ -126,6 +127,12 @@
                 pix_fmt = AV_PIX_FMT_YUV444P;
             } else if (strncmp("mono16", tokstart, 6) == 0) {
                 pix_fmt = AV_PIX_FMT_GRAY16;
+            } else if (strncmp("mono12", tokstart, 6) == 0) {
+                pix_fmt = AV_PIX_FMT_GRAY12;
+            } else if (strncmp("mono10", tokstart, 6) == 0) {
+                pix_fmt = AV_PIX_FMT_GRAY10;
+            } else if (strncmp("mono9", tokstart, 5) == 0) {
+                pix_fmt = AV_PIX_FMT_GRAY9;
             } else if (strncmp("mono", tokstart, 4) == 0) {
                 pix_fmt = AV_PIX_FMT_GRAY8;
             } else {
@@ -214,6 +221,12 @@
                     alt_pix_fmt = AV_PIX_FMT_YUV422P;
                 else if (strncmp("444", tokstart, 3) == 0)
                     alt_pix_fmt = AV_PIX_FMT_YUV444P;
+            } else if (strncmp("COLORRANGE=", tokstart, 11) == 0) {
+              tokstart += 11;
+              if (strncmp("FULL",tokstart, 4) == 0)
+                  color_range = AVCOL_RANGE_JPEG;
+              else if (strncmp("LIMITED", tokstart, 7) == 0)
+                  color_range = AVCOL_RANGE_MPEG;
             }
             while (tokstart < header_end && *tokstart != 0x20)
                 tokstart++;
@@ -257,6 +270,7 @@
     st->codecpar->codec_id            = AV_CODEC_ID_RAWVIDEO;
     st->sample_aspect_ratio           = (AVRational){ aspectn, aspectd };
     st->codecpar->chroma_location     = chroma_sample_location;
+    st->codecpar->color_range         = color_range;
     st->codecpar->field_order         = field_order;
     s->packet_size = av_image_get_buffer_size(st->codecpar->format, width, height, 1) + Y4M_FRAME_MAGIC_LEN;
     if ((int) s->packet_size < 0)
@@ -308,7 +322,13 @@
 static int yuv4_read_seek(AVFormatContext *s, int stream_index,
                           int64_t pts, int flags)
 {
-    if (avio_seek(s->pb, pts * s->packet_size + s->internal->data_offset, SEEK_SET) < 0)
+    int64_t pos;
+
+    if (flags & AVSEEK_FLAG_BACKWARD)
+        pts = FFMAX(0, pts - 1);
+    pos = pts * s->packet_size;
+
+    if (avio_seek(s->pb, pos + s->internal->data_offset, SEEK_SET) < 0)
         return -1;
     return 0;
 }

diff --git a/libavformat/yuv4mpegenc.c b/libavformat/yuv4mpegenc.c
index b4dc6e9..e84dbf9 100644
--- a/libavformat/yuv4mpegenc.c
+++ b/libavformat/yuv4mpegenc.c

@@ -33,6 +33,7 @@
     int raten, rated, aspectn, aspectd, n;
     char inter;
     const char *colorspace = "";
+    const char *colorrange = "";
     int field_order;
 
     st     = s->streams[0];
@@ -57,6 +58,17 @@
     FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
+    switch(st->codecpar->color_range) {
+    case AVCOL_RANGE_MPEG:
+        colorrange = " XCOLORRANGE=LIMITED";
+        break;
+    case AVCOL_RANGE_JPEG:
+        colorrange = " XCOLORRANGE=FULL";
+        break;
+    default:
+        break;
+    }
+
     switch (field_order) {
     case AV_FIELD_TB:
     case AV_FIELD_TT: inter = 't'; break;
@@ -69,12 +81,33 @@
     case AV_PIX_FMT_GRAY8:
         colorspace = " Cmono";
         break;
+    case AV_PIX_FMT_GRAY9:
+        colorspace = " Cmono9";
+        break;
+    case AV_PIX_FMT_GRAY10:
+        colorspace = " Cmono10";
+        break;
+    case AV_PIX_FMT_GRAY12:
+        colorspace = " Cmono12";
+        break;
     case AV_PIX_FMT_GRAY16:
         colorspace = " Cmono16";
         break;
     case AV_PIX_FMT_YUV411P:
         colorspace = " C411 XYSCSS=411";
         break;
+    case AV_PIX_FMT_YUVJ420P:
+        colorspace = " C420jpeg XYSCSS=420JPEG";
+        colorrange = " XCOLORRANGE=FULL";
+        break;
+    case AV_PIX_FMT_YUVJ422P:
+        colorspace = " C422 XYSCSS=422";
+        colorrange = " XCOLORRANGE=FULL";
+        break;
+    case AV_PIX_FMT_YUVJ444P:
+        colorspace = " C444 XYSCSS=444";
+        colorrange = " XCOLORRANGE=FULL";
+        break;
     case AV_PIX_FMT_YUV420P:
         switch (st->codecpar->chroma_location) {
         case AVCHROMA_LOC_TOPLEFT: colorspace = " C420paldv XYSCSS=420PALDV"; break;
@@ -136,13 +169,14 @@
     }
 
     /* construct stream header, if this is the first frame */
-    n = snprintf(buf, Y4M_LINE_MAX, "%s W%d H%d F%d:%d I%c A%d:%d%s\n",
+    n = snprintf(buf, Y4M_LINE_MAX, "%s W%d H%d F%d:%d I%c A%d:%d%s%s\n",
                  Y4M_MAGIC, width, height, raten, rated, inter,
-                 aspectn, aspectd, colorspace);
+                 aspectn, aspectd, colorspace, colorrange);
 
     return n;
 }
 
+
 static int yuv4_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
     AVStream *st = s->streams[pkt->stream_index];
@@ -183,7 +217,14 @@
     case AV_PIX_FMT_YUV420P:
     case AV_PIX_FMT_YUV422P:
     case AV_PIX_FMT_YUV444P:
+    // TODO: remove YUVJ pixel formats when they are completely removed from the codebase.
+    case AV_PIX_FMT_YUVJ420P:
+    case AV_PIX_FMT_YUVJ422P:
+    case AV_PIX_FMT_YUVJ444P:
         break;
+    case AV_PIX_FMT_GRAY9:
+    case AV_PIX_FMT_GRAY10:
+    case AV_PIX_FMT_GRAY12:
     case AV_PIX_FMT_GRAY16:
     case AV_PIX_FMT_YUV420P9:
     case AV_PIX_FMT_YUV422P9:
@@ -213,7 +254,8 @@
         ptr += frame->linesize[0];
     }
 
-    if (st->codecpar->format != AV_PIX_FMT_GRAY8 &&
+    if (st->codecpar->format != AV_PIX_FMT_GRAY8 && st->codecpar->format != AV_PIX_FMT_GRAY9 &&
+        st->codecpar->format != AV_PIX_FMT_GRAY10 && st->codecpar->format != AV_PIX_FMT_GRAY12 &&
         st->codecpar->format != AV_PIX_FMT_GRAY16) {
         // Adjust for smaller Cb and Cr planes
         av_pix_fmt_get_chroma_sub_sample(st->codecpar->format, &h_chroma_shift,
@@ -255,11 +297,18 @@
                "stream, some mjpegtools might not work.\n");
         break;
     case AV_PIX_FMT_GRAY8:
-    case AV_PIX_FMT_GRAY16:
     case AV_PIX_FMT_YUV420P:
     case AV_PIX_FMT_YUV422P:
     case AV_PIX_FMT_YUV444P:
+    // TODO: remove YUVJ pixel formats when they are completely removed from the codebase.
+    case AV_PIX_FMT_YUVJ420P:
+    case AV_PIX_FMT_YUVJ422P:
+    case AV_PIX_FMT_YUVJ444P:
         break;
+    case AV_PIX_FMT_GRAY9:
+    case AV_PIX_FMT_GRAY10:
+    case AV_PIX_FMT_GRAY12:
+    case AV_PIX_FMT_GRAY16:
     case AV_PIX_FMT_YUV420P9:
     case AV_PIX_FMT_YUV422P9:
     case AV_PIX_FMT_YUV444P9:
@@ -291,7 +340,8 @@
                "yuv444p10, yuv422p10, yuv420p10, "
                "yuv444p12, yuv422p12, yuv420p12, "
                "yuv444p14, yuv422p14, yuv420p14, "
-               "yuv444p16, yuv422p16, yuv420p16 "
+               "yuv444p16, yuv422p16, yuv420p16, "
+               "gray9, gray10, gray12 "
                "and gray16 pixel formats. "
                "Use -pix_fmt to select one.\n");
         return AVERROR(EIO);

diff --git a/libavresample/avresample.h b/libavresample/avresample.h
index 193443e..5ac9adb 100644
--- a/libavresample/avresample.h
+++ b/libavresample/avresample.h

@@ -105,22 +105,31 @@
 
 typedef struct AVAudioResampleContext AVAudioResampleContext;
 
-/** Mixing Coefficient Types */
-enum AVMixCoeffType {
+/**
+ * @deprecated use libswresample
+ *
+ * Mixing Coefficient Types */
+enum attribute_deprecated AVMixCoeffType {
     AV_MIX_COEFF_TYPE_Q8,   /** 16-bit 8.8 fixed-point                      */
     AV_MIX_COEFF_TYPE_Q15,  /** 32-bit 17.15 fixed-point                    */
     AV_MIX_COEFF_TYPE_FLT,  /** floating-point                              */
     AV_MIX_COEFF_TYPE_NB,   /** Number of coeff types. Not part of ABI      */
 };
 
-/** Resampling Filter Types */
-enum AVResampleFilterType {
+/**
+ * @deprecated use libswresample
+ *
+ * Resampling Filter Types */
+enum attribute_deprecated AVResampleFilterType {
     AV_RESAMPLE_FILTER_TYPE_CUBIC,              /**< Cubic */
     AV_RESAMPLE_FILTER_TYPE_BLACKMAN_NUTTALL,   /**< Blackman Nuttall Windowed Sinc */
     AV_RESAMPLE_FILTER_TYPE_KAISER,             /**< Kaiser Windowed Sinc */
 };
 
-enum AVResampleDitherMethod {
+/**
+ * @deprecated use libswresample
+ */
+enum attribute_deprecated AVResampleDitherMethod {
     AV_RESAMPLE_DITHER_NONE,            /**< Do not use dithering */
     AV_RESAMPLE_DITHER_RECTANGULAR,     /**< Rectangular Dither */
     AV_RESAMPLE_DITHER_TRIANGULAR,      /**< Triangular Dither*/
@@ -130,22 +139,37 @@
 };
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Return the LIBAVRESAMPLE_VERSION_INT constant.
  */
+attribute_deprecated
 unsigned avresample_version(void);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Return the libavresample build-time configuration.
  * @return  configure string
  */
+attribute_deprecated
 const char *avresample_configuration(void);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Return the libavresample license.
  */
+attribute_deprecated
 const char *avresample_license(void);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Get the AVClass for AVAudioResampleContext.
  *
  * Can be used in combination with AV_OPT_SEARCH_FAKE_OBJ for examining options
@@ -155,16 +179,24 @@
  *
  * @return AVClass for AVAudioResampleContext
  */
+attribute_deprecated
 const AVClass *avresample_get_class(void);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Allocate AVAudioResampleContext and set options.
  *
  * @return  allocated audio resample context, or NULL on failure
  */
+attribute_deprecated
 AVAudioResampleContext *avresample_alloc_context(void);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Initialize AVAudioResampleContext.
  * @note The context must be configured using the AVOption API.
  * @note The fields "in_channel_layout", "out_channel_layout",
@@ -178,17 +210,25 @@
  * @param avr  audio resample context
  * @return     0 on success, negative AVERROR code on failure
  */
+attribute_deprecated
 int avresample_open(AVAudioResampleContext *avr);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Check whether an AVAudioResampleContext is open or closed.
  *
  * @param avr AVAudioResampleContext to check
  * @return 1 if avr is open, 0 if avr is closed.
  */
+attribute_deprecated
 int avresample_is_open(AVAudioResampleContext *avr);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Close AVAudioResampleContext.
  *
  * This closes the context, but it does not change the parameters. The context
@@ -201,18 +241,26 @@
  *
  * @param avr  audio resample context
  */
+attribute_deprecated
 void avresample_close(AVAudioResampleContext *avr);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Free AVAudioResampleContext and associated AVOption values.
  *
  * This also calls avresample_close() before freeing.
  *
  * @param avr  audio resample context
  */
+attribute_deprecated
 void avresample_free(AVAudioResampleContext **avr);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Generate a channel mixing matrix.
  *
  * This function is the one used internally by libavresample for building the
@@ -234,12 +282,16 @@
  * @param matrix_encoding     matrixed stereo downmix mode (e.g. dplii)
  * @return                    0 on success, negative AVERROR code on failure
  */
+attribute_deprecated
 int avresample_build_matrix(uint64_t in_layout, uint64_t out_layout,
                             double center_mix_level, double surround_mix_level,
                             double lfe_mix_level, int normalize, double *matrix,
                             int stride, enum AVMatrixEncoding matrix_encoding);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Get the current channel mixing matrix.
  *
  * If no custom matrix has been previously set or the AVAudioResampleContext is
@@ -251,10 +303,14 @@
  * @param stride  distance between adjacent input channels in the matrix array
  * @return        0 on success, negative AVERROR code on failure
  */
+attribute_deprecated
 int avresample_get_matrix(AVAudioResampleContext *avr, double *matrix,
                           int stride);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Set channel mixing matrix.
  *
  * Allows for setting a custom mixing matrix, overriding the default matrix
@@ -272,10 +328,14 @@
  * @param stride  distance between adjacent input channels in the matrix array
  * @return        0 on success, negative AVERROR code on failure
  */
+attribute_deprecated
 int avresample_set_matrix(AVAudioResampleContext *avr, const double *matrix,
                           int stride);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Set a customized input channel mapping.
  *
  * This function can only be called when the allocated context is not open.
@@ -302,10 +362,14 @@
  * @param channel_map customized input channel mapping
  * @return            0 on success, negative AVERROR code on failure
  */
+attribute_deprecated
 int avresample_set_channel_mapping(AVAudioResampleContext *avr,
                                    const int *channel_map);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Set compensation for resampling.
  *
  * This can be called anytime after avresample_open(). If resampling is not
@@ -318,10 +382,14 @@
  * @param compensation_distance  compensation distance, in samples
  * @return                       0 on success, negative AVERROR code on failure
  */
+attribute_deprecated
 int avresample_set_compensation(AVAudioResampleContext *avr, int sample_delta,
                                 int compensation_distance);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Provide the upper bound on the number of samples the configured
  * conversion would output.
  *
@@ -331,10 +399,13 @@
  * @return              number of samples or AVERROR(EINVAL) if the value
  *                      would exceed INT_MAX
  */
-
+attribute_deprecated
 int avresample_get_out_samples(AVAudioResampleContext *avr, int in_nb_samples);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Convert input samples and write them to the output FIFO.
  *
  * The upper bound on the number of output samples can be obtained through
@@ -376,12 +447,16 @@
  *                        not including converted samples added to the internal
  *                        output FIFO
  */
+attribute_deprecated
 int avresample_convert(AVAudioResampleContext *avr, uint8_t **output,
                        int out_plane_size, int out_samples,
                        uint8_t * const *input, int in_plane_size,
                        int in_samples);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Return the number of samples currently in the resampling delay buffer.
  *
  * When resampling, there may be a delay between the input and output. Any
@@ -394,9 +469,13 @@
  * @param avr  audio resample context
  * @return     number of samples currently in the resampling delay buffer
  */
+attribute_deprecated
 int avresample_get_delay(AVAudioResampleContext *avr);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Return the number of available samples in the output FIFO.
  *
  * During conversion, if the user does not specify an output buffer or
@@ -411,9 +490,13 @@
  * @param avr  audio resample context
  * @return     number of samples available for reading
  */
+attribute_deprecated
 int avresample_available(AVAudioResampleContext *avr);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Read samples from the output FIFO.
  *
  * During conversion, if the user does not specify an output buffer or
@@ -430,9 +513,13 @@
  * @param nb_samples  number of samples to read from the FIFO
  * @return            the number of samples written to output
  */
+attribute_deprecated
 int avresample_read(AVAudioResampleContext *avr, uint8_t **output, int nb_samples);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Convert the samples in the input AVFrame and write them to the output AVFrame.
  *
  * Input and output AVFrames must have channel_layout, sample_rate and format set.
@@ -476,10 +563,14 @@
  * @return                0 on success, AVERROR on failure or nonmatching
  *                        configuration.
  */
+attribute_deprecated
 int avresample_convert_frame(AVAudioResampleContext *avr,
                              AVFrame *output, AVFrame *input);
 
 /**
+ *
+ * @deprecated use libswresample
+ *
  * Configure or reconfigure the AVAudioResampleContext using the information
  * provided by the AVFrames.
  *
@@ -494,6 +585,7 @@
  * @param in              input AVFrame
  * @return                0 on success, AVERROR on failure.
  */
+attribute_deprecated
 int avresample_config(AVAudioResampleContext *avr, AVFrame *out, AVFrame *in);
 
 /**

diff --git a/libavresample/version.h b/libavresample/version.h
index 20c78c7..d5d3ea8 100644
--- a/libavresample/version.h
+++ b/libavresample/version.h

@@ -27,8 +27,8 @@
 
 #include "libavutil/version.h"
 
-#define LIBAVRESAMPLE_VERSION_MAJOR  3
-#define LIBAVRESAMPLE_VERSION_MINOR  7
+#define LIBAVRESAMPLE_VERSION_MAJOR  4
+#define LIBAVRESAMPLE_VERSION_MINOR  0
 #define LIBAVRESAMPLE_VERSION_MICRO  0
 
 #define LIBAVRESAMPLE_VERSION_INT  AV_VERSION_INT(LIBAVRESAMPLE_VERSION_MAJOR, \

diff --git a/libavutil/Makefile b/libavutil/Makefile
index 65e285a..9ed24cf 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile

@@ -24,6 +24,7 @@
           dict.h                                                        \
           display.h                                                     \
           downmix_info.h                                                \
+          encryption_info.h                                             \
           error.h                                                       \
           eval.h                                                        \
           fifo.h                                                        \
@@ -37,6 +38,7 @@
           hwcontext_drm.h                                               \
           hwcontext_dxva2.h                                             \
           hwcontext_qsv.h                                               \
+          hwcontext_mediacodec.h                                        \
           hwcontext_vaapi.h                                             \
           hwcontext_videotoolbox.h                                      \
           hwcontext_vdpau.h                                             \
@@ -79,8 +81,6 @@
 
 HEADERS-$(CONFIG_LZO)                   += lzo.h
 
-HEADERS-$(CONFIG_OPENCL)                += opencl.h
-
 ARCH_HEADERS = bswap.h                                                  \
                intmath.h                                                \
                intreadwrite.h                                           \
@@ -108,6 +108,7 @@
        dict.o                                                           \
        display.o                                                        \
        downmix_info.o                                                   \
+       encryption_info.o                                                \
        error.o                                                          \
        eval.o                                                           \
        fifo.o                                                           \
@@ -156,15 +157,14 @@
        xtea.o                                                           \
        tea.o                                                            \
 
-OBJS-$(!HAVE_ATOMICS_NATIVE)            += atomic.o                     \
-
 OBJS-$(CONFIG_CUDA)                     += hwcontext_cuda.o
 OBJS-$(CONFIG_D3D11VA)                  += hwcontext_d3d11va.o
 OBJS-$(CONFIG_DXVA2)                    += hwcontext_dxva2.o
-OBJS-$(CONFIG_QSV)                   += hwcontext_qsv.o
 OBJS-$(CONFIG_LIBDRM)                   += hwcontext_drm.o
 OBJS-$(CONFIG_LZO)                      += lzo.o
-OBJS-$(CONFIG_OPENCL)                   += opencl.o opencl_internal.o
+OBJS-$(CONFIG_MEDIACODEC)               += hwcontext_mediacodec.o
+OBJS-$(CONFIG_OPENCL)                   += hwcontext_opencl.o
+OBJS-$(CONFIG_QSV)                      += hwcontext_qsv.o
 OBJS-$(CONFIG_VAAPI)                    += hwcontext_vaapi.o
 OBJS-$(CONFIG_VIDEOTOOLBOX)             += hwcontext_videotoolbox.o
 OBJS-$(CONFIG_VDPAU)                    += hwcontext_vdpau.o
@@ -178,19 +178,15 @@
 SKIPHEADERS-$(CONFIG_CUDA)             += hwcontext_cuda_internal.h
 SKIPHEADERS-$(CONFIG_D3D11VA)          += hwcontext_d3d11va.h
 SKIPHEADERS-$(CONFIG_DXVA2)            += hwcontext_dxva2.h
-SKIPHEADERS-$(CONFIG_QSV)           += hwcontext_qsv.h
+SKIPHEADERS-$(CONFIG_QSV)              += hwcontext_qsv.h
+SKIPHEADERS-$(CONFIG_OPENCL)           += hwcontext_opencl.h
 SKIPHEADERS-$(CONFIG_VAAPI)            += hwcontext_vaapi.h
 SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += hwcontext_videotoolbox.h
 SKIPHEADERS-$(CONFIG_VDPAU)            += hwcontext_vdpau.h
-SKIPHEADERS-$(HAVE_ATOMICS_GCC)        += atomic_gcc.h
-SKIPHEADERS-$(HAVE_ATOMICS_SUNCC)      += atomic_suncc.h
-SKIPHEADERS-$(HAVE_ATOMICS_WIN32)      += atomic_win32.h
-SKIPHEADERS-$(CONFIG_OPENCL)           += opencl.h
 
 TESTPROGS = adler32                                                     \
             aes                                                         \
             aes_ctr                                                     \
-            atomic                                                      \
             audio_fifo                                                  \
             avstring                                                    \
             base64                                                      \
@@ -204,12 +200,15 @@
             des                                                         \
             dict                                                        \
             display                                                     \
+            encryption_info                                             \
             error                                                       \
             eval                                                        \
             file                                                        \
             fifo                                                        \
             hash                                                        \
             hmac                                                        \
+            hwdevice                                                    \
+            integer                                                     \
             imgutils                                                    \
             lfg                                                         \
             lls                                                         \

diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S
index 4289729..fd32bf7 100644
--- a/libavutil/aarch64/asm.S
+++ b/libavutil/aarch64/asm.S

@@ -23,7 +23,7 @@
 #ifdef __ELF__
 #   define ELF
 #else
-#   define ELF  #
+#   define ELF #
 #endif
 
 #if HAVE_AS_FUNC
@@ -82,6 +82,15 @@
         adrp            \rd, \val+(\offset)@PAGE
         add             \rd, \rd, \val+(\offset)@PAGEOFF
     .endif
+#elif CONFIG_PIC && defined(_WIN32)
+    .if \offset < 0
+        adrp            \rd, \val
+        add             \rd, \rd, :lo12:\val
+        sub             \rd, \rd, -(\offset)
+    .else
+        adrp            \rd, \val+(\offset)
+        add             \rd, \rd, :lo12:\val+(\offset)
+    .endif
 #elif CONFIG_PIC
         adrp            \rd, \val+(\offset)
         add             \rd, \rd, :lo12:\val+(\offset)

diff --git a/libavutil/aarch64/cpu.h b/libavutil/aarch64/cpu.h
index cf1b9cc..2ee3f93 100644
--- a/libavutil/aarch64/cpu.h
+++ b/libavutil/aarch64/cpu.h

@@ -19,7 +19,6 @@
 #ifndef AVUTIL_AARCH64_CPU_H
 #define AVUTIL_AARCH64_CPU_H
 
-#include "config.h"
 #include "libavutil/cpu.h"
 #include "libavutil/cpu_internal.h"
 

diff --git a/libavutil/aes_ctr.c b/libavutil/aes_ctr.c
index e9c568f..0c2e867 100644
--- a/libavutil/aes_ctr.c
+++ b/libavutil/aes_ctr.c

@@ -45,6 +45,12 @@
     a->block_offset = 0;
 }
 
+void av_aes_ctr_set_full_iv(struct AVAESCTR *a, const uint8_t* iv)
+{
+    memcpy(a->counter, iv, sizeof(a->counter));
+    a->block_offset = 0;
+}
+
 const uint8_t* av_aes_ctr_get_iv(struct AVAESCTR *a)
 {
     return a->counter;

diff --git a/libavutil/aes_ctr.h b/libavutil/aes_ctr.h
index f596fa6..e4aae12 100644
--- a/libavutil/aes_ctr.h
+++ b/libavutil/aes_ctr.h

@@ -67,11 +67,16 @@
 void av_aes_ctr_set_random_iv(struct AVAESCTR *a);
 
 /**
- * Forcefully change the iv
+ * Forcefully change the 8-byte iv
  */
 void av_aes_ctr_set_iv(struct AVAESCTR *a, const uint8_t* iv);
 
 /**
+ * Forcefully change the "full" 16-byte iv, including the counter
+ */
+void av_aes_ctr_set_full_iv(struct AVAESCTR *a, const uint8_t* iv);
+
+/**
  * Increment the top 64 bit of the iv (performed after each frame)
  */
 void av_aes_ctr_increment_iv(struct AVAESCTR *a);

diff --git a/libavutil/arm/asm.S b/libavutil/arm/asm.S
index 7d33a64..6744f2a 100644
--- a/libavutil/arm/asm.S
+++ b/libavutil/arm/asm.S

@@ -46,6 +46,7 @@
 #   define FPU @
 #endif
 
+#if HAVE_AS_ARCH_DIRECTIVE
 #if   HAVE_NEON
         .arch           armv7-a
 #elif HAVE_ARMV6T2
@@ -55,6 +56,7 @@
 #elif HAVE_ARMV5TE
         .arch           armv5te
 #endif
+#endif
 #if   HAVE_AS_OBJECT_ARCH
 ELF     .object_arch    armv4
 #endif
@@ -109,11 +111,17 @@
 ELF     .size   \name, . - \name
         .purgem endconst
     .endm
-.if HAVE_SECTION_DATA_REL_RO && \relocate
+#if HAVE_SECTION_DATA_REL_RO
+.if \relocate
         .section        .data.rel.ro
 .else
         .section        .rodata
 .endif
+#elif !defined(__MACH__)
+        .section        .rodata
+#else
+        .const_data
+#endif
         .align          \align
 \name:
 .endm

diff --git a/libavutil/arm/cpu.h b/libavutil/arm/cpu.h
index eb64ed5..1d6cc65 100644
--- a/libavutil/arm/cpu.h
+++ b/libavutil/arm/cpu.h

@@ -19,7 +19,6 @@
 #ifndef AVUTIL_ARM_CPU_H
 #define AVUTIL_ARM_CPU_H
 
-#include "config.h"
 #include "libavutil/cpu.h"
 #include "libavutil/cpu_internal.h"
 

diff --git a/libavutil/arm/intmath.h b/libavutil/arm/intmath.h
index 65e42c5..5311a7d 100644
--- a/libavutil/arm/intmath.h
+++ b/libavutil/arm/intmath.h

@@ -94,6 +94,22 @@
     return r;
 }
 
+#define av_sat_sub32 av_sat_sub32_arm
+static av_always_inline int av_sat_sub32_arm(int a, int b)
+{
+    int r;
+    __asm__ ("qsub %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
+    return r;
+}
+
+#define av_sat_dsub32 av_sat_dsub32_arm
+static av_always_inline int av_sat_dsub32_arm(int a, int b)
+{
+    int r;
+    __asm__ ("qdsub %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
+    return r;
+}
+
 #endif /* HAVE_ARMV6_INLINE */
 
 #if HAVE_ASM_MOD_Q

diff --git a/libavutil/atomic.c b/libavutil/atomic.c
deleted file mode 100644
index 64cff25..0000000
--- a/libavutil/atomic.c
+++ /dev/null

@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "config.h"
-#include "atomic.h"
-
-#if !HAVE_ATOMICS_NATIVE
-
-#if HAVE_PTHREADS
-
-#include <pthread.h>
-
-static pthread_mutex_t atomic_lock = PTHREAD_MUTEX_INITIALIZER;
-
-int avpriv_atomic_int_get(volatile int *ptr)
-{
-    int res;
-
-    pthread_mutex_lock(&atomic_lock);
-    res = *ptr;
-    pthread_mutex_unlock(&atomic_lock);
-
-    return res;
-}
-
-void avpriv_atomic_int_set(volatile int *ptr, int val)
-{
-    pthread_mutex_lock(&atomic_lock);
-    *ptr = val;
-    pthread_mutex_unlock(&atomic_lock);
-}
-
-int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc)
-{
-    int res;
-
-    pthread_mutex_lock(&atomic_lock);
-    *ptr += inc;
-    res = *ptr;
-    pthread_mutex_unlock(&atomic_lock);
-
-    return res;
-}
-
-void *avpriv_atomic_ptr_cas(void * volatile *ptr, void *oldval, void *newval)
-{
-    void *ret;
-    pthread_mutex_lock(&atomic_lock);
-    ret = *ptr;
-    if (ret == oldval)
-        *ptr = newval;
-    pthread_mutex_unlock(&atomic_lock);
-    return ret;
-}
-
-#elif !HAVE_THREADS
-
-int avpriv_atomic_int_get(volatile int *ptr)
-{
-    return *ptr;
-}
-
-void avpriv_atomic_int_set(volatile int *ptr, int val)
-{
-    *ptr = val;
-}
-
-int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc)
-{
-    *ptr += inc;
-    return *ptr;
-}
-
-void *avpriv_atomic_ptr_cas(void * volatile *ptr, void *oldval, void *newval)
-{
-    if (*ptr == oldval) {
-        *ptr = newval;
-        return oldval;
-    }
-    return *ptr;
-}
-
-#else /* HAVE_THREADS */
-
-/* This should never trigger, unless a new threading implementation
- * without correct atomics dependencies in configure or a corresponding
- * atomics implementation is added. */
-#error "Threading is enabled, but there is no implementation of atomic operations available"
-
-#endif /* HAVE_PTHREADS */
-
-#endif /* !HAVE_ATOMICS_NATIVE */

diff --git a/libavutil/atomic.h b/libavutil/atomic.h
deleted file mode 100644
index 15906d2..0000000
--- a/libavutil/atomic.h
+++ /dev/null

@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVUTIL_ATOMIC_H
-#define AVUTIL_ATOMIC_H
-
-#include "config.h"
-
-#if HAVE_ATOMICS_NATIVE
-
-#if HAVE_ATOMICS_GCC
-#include "atomic_gcc.h"
-#elif HAVE_ATOMICS_WIN32
-#include "atomic_win32.h"
-#elif HAVE_ATOMICS_SUNCC
-#include "atomic_suncc.h"
-#endif
-
-#else
-
-/**
- * Load the current value stored in an atomic integer.
- *
- * @param ptr atomic integer
- * @return the current value of the atomic integer
- * @note This acts as a memory barrier.
- */
-int avpriv_atomic_int_get(volatile int *ptr);
-
-/**
- * Store a new value in an atomic integer.
- *
- * @param ptr atomic integer
- * @param val the value to store in the atomic integer
- * @note This acts as a memory barrier.
- */
-void avpriv_atomic_int_set(volatile int *ptr, int val);
-
-/**
- * Add a value to an atomic integer.
- *
- * @param ptr atomic integer
- * @param inc the value to add to the atomic integer (may be negative)
- * @return the new value of the atomic integer.
- * @note This does NOT act as a memory barrier. This is primarily
- *       intended for reference counting.
- */
-int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc);
-
-/**
- * Atomic pointer compare and swap.
- *
- * @param ptr pointer to the pointer to operate on
- * @param oldval do the swap if the current value of *ptr equals to oldval
- * @param newval value to replace *ptr with
- * @return the value of *ptr before comparison
- */
-void *avpriv_atomic_ptr_cas(void * volatile *ptr, void *oldval, void *newval);
-
-#endif /* HAVE_ATOMICS_NATIVE */
-
-#endif /* AVUTIL_ATOMIC_H */

diff --git a/libavutil/atomic_gcc.h b/libavutil/atomic_gcc.h
deleted file mode 100644
index 2bb43c3..0000000
--- a/libavutil/atomic_gcc.h
+++ /dev/null

@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVUTIL_ATOMIC_GCC_H
-#define AVUTIL_ATOMIC_GCC_H
-
-#include <stdint.h>
-
-#include "atomic.h"
-
-#define avpriv_atomic_int_get atomic_int_get_gcc
-static inline int atomic_int_get_gcc(volatile int *ptr)
-{
-    __sync_synchronize();
-    return *ptr;
-}
-
-#define avpriv_atomic_int_set atomic_int_set_gcc
-static inline void atomic_int_set_gcc(volatile int *ptr, int val)
-{
-    *ptr = val;
-    __sync_synchronize();
-}
-
-#define avpriv_atomic_int_add_and_fetch atomic_int_add_and_fetch_gcc
-static inline int atomic_int_add_and_fetch_gcc(volatile int *ptr, int inc)
-{
-    return __sync_add_and_fetch(ptr, inc);
-}
-
-#define avpriv_atomic_ptr_cas atomic_ptr_cas_gcc
-static inline void *atomic_ptr_cas_gcc(void * volatile *ptr,
-                                       void *oldval, void *newval)
-{
-#ifdef __ARMCC_VERSION
-    // armcc will throw an error if ptr is not an integer type
-    volatile uintptr_t *tmp = (volatile uintptr_t*)ptr;
-    return (void*)__sync_val_compare_and_swap(tmp, oldval, newval);
-#else
-    return __sync_val_compare_and_swap(ptr, oldval, newval);
-#endif
-}
-
-#endif /* AVUTIL_ATOMIC_GCC_H */

diff --git a/libavutil/atomic_suncc.h b/libavutil/atomic_suncc.h
deleted file mode 100644
index a75a37b..0000000
--- a/libavutil/atomic_suncc.h
+++ /dev/null

@@ -1,54 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVUTIL_ATOMIC_SUNCC_H
-#define AVUTIL_ATOMIC_SUNCC_H
-
-#include <atomic.h>
-#include <mbarrier.h>
-
-#include "atomic.h"
-
-#define avpriv_atomic_int_get atomic_int_get_suncc
-static inline int atomic_int_get_suncc(volatile int *ptr)
-{
-    __machine_rw_barrier();
-    return *ptr;
-}
-
-#define avpriv_atomic_int_set atomic_int_set_suncc
-static inline void atomic_int_set_suncc(volatile int *ptr, int val)
-{
-    *ptr = val;
-    __machine_rw_barrier();
-}
-
-#define avpriv_atomic_int_add_and_fetch atomic_int_add_and_fetch_suncc
-static inline int atomic_int_add_and_fetch_suncc(volatile int *ptr, int inc)
-{
-    return atomic_add_int_nv(ptr, inc);
-}
-
-#define avpriv_atomic_ptr_cas atomic_ptr_cas_suncc
-static inline void *atomic_ptr_cas_suncc(void * volatile *ptr,
-                                         void *oldval, void *newval)
-{
-    return atomic_cas_ptr(ptr, oldval, newval);
-}
-
-#endif /* AVUTIL_ATOMIC_SUNCC_H */

diff --git a/libavutil/atomic_win32.h b/libavutil/atomic_win32.h
deleted file mode 100644
index f729933..0000000
--- a/libavutil/atomic_win32.h
+++ /dev/null

@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVUTIL_ATOMIC_WIN32_H
-#define AVUTIL_ATOMIC_WIN32_H
-
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-
-#define avpriv_atomic_int_get atomic_int_get_win32
-static inline int atomic_int_get_win32(volatile int *ptr)
-{
-    MemoryBarrier();
-    return *ptr;
-}
-
-#define avpriv_atomic_int_set atomic_int_set_win32
-static inline void atomic_int_set_win32(volatile int *ptr, int val)
-{
-    *ptr = val;
-    MemoryBarrier();
-}
-
-#define avpriv_atomic_int_add_and_fetch atomic_int_add_and_fetch_win32
-static inline int atomic_int_add_and_fetch_win32(volatile int *ptr, int inc)
-{
-    return inc + InterlockedExchangeAdd(ptr, inc);
-}
-
-#define avpriv_atomic_ptr_cas atomic_ptr_cas_win32
-static inline void *atomic_ptr_cas_win32(void * volatile *ptr,
-                                         void *oldval, void *newval)
-{
-    return InterlockedCompareExchangePointer(ptr, newval, oldval);
-}
-
-#endif /* AVUTIL_ATOMIC_WIN32_H */

diff --git a/libavutil/attributes.h b/libavutil/attributes.h
index 54d1901..ced108a 100644
--- a/libavutil/attributes.h
+++ b/libavutil/attributes.h

@@ -66,19 +66,19 @@
 #    define av_noinline
 #endif
 
-#if AV_GCC_VERSION_AT_LEAST(3,1)
+#if AV_GCC_VERSION_AT_LEAST(3,1) || defined(__clang__)
 #    define av_pure __attribute__((pure))
 #else
 #    define av_pure
 #endif
 
-#if AV_GCC_VERSION_AT_LEAST(2,6)
+#if AV_GCC_VERSION_AT_LEAST(2,6) || defined(__clang__)
 #    define av_const __attribute__((const))
 #else
 #    define av_const
 #endif
 
-#if AV_GCC_VERSION_AT_LEAST(4,3)
+#if AV_GCC_VERSION_AT_LEAST(4,3) || defined(__clang__)
 #    define av_cold __attribute__((cold))
 #else
 #    define av_cold
@@ -138,19 +138,19 @@
 #    define av_used
 #endif
 
-#if AV_GCC_VERSION_AT_LEAST(3,3)
+#if AV_GCC_VERSION_AT_LEAST(3,3) || defined(__clang__)
 #   define av_alias __attribute__((may_alias))
 #else
 #   define av_alias
 #endif
 
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__)
+#if (defined(__GNUC__) || defined(__clang__)) && !defined(__INTEL_COMPILER)
 #    define av_uninit(x) x=x
 #else
 #    define av_uninit(x) x
 #endif
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
 #    define av_builtin_constant_p __builtin_constant_p
 #    define av_printf_format(fmtpos, attrpos) __attribute__((__format__(__printf__, fmtpos, attrpos)))
 #else
@@ -158,7 +158,7 @@
 #    define av_printf_format(fmtpos, attrpos)
 #endif
 
-#if AV_GCC_VERSION_AT_LEAST(2,5)
+#if AV_GCC_VERSION_AT_LEAST(2,5) || defined(__clang__)
 #    define av_noreturn __attribute__((noreturn))
 #else
 #    define av_noreturn

diff --git a/libavutil/avassert.h b/libavutil/avassert.h
index 46f3fea..9abeade 100644
--- a/libavutil/avassert.h
+++ b/libavutil/avassert.h

@@ -66,7 +66,7 @@
 #endif
 
 /**
- * Assert that floating point opperations can be executed.
+ * Assert that floating point operations can be executed.
  *
  * This will av_assert0() that the cpu is not in MMX state on X86
  */

diff --git a/libavutil/colorspace.h b/libavutil/colorspace.h
index b6dba2c..d0be8cb 100644
--- a/libavutil/colorspace.h
+++ b/libavutil/colorspace.h

@@ -107,4 +107,16 @@
 (((FIX(0.50000*224.0/255.0) * r1 - FIX(0.41869*224.0/255.0) * g1 -           \
    FIX(0.08131*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128)
 
+#define RGB_TO_Y_JPEG(r, g, b) \
+(FFMIN((FIX(0.29900) * (r) + FIX(0.58700) * (g) + \
+  FIX(0.11400) * (b) + (ONE_HALF)) >> SCALEBITS, 255))
+
+#define RGB_TO_U_JPEG(r1, g1, b1)\
+(((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + \
+     FIX(0.50000) * b1 + (ONE_HALF) - 1) >> (SCALEBITS)) + 128)
+
+#define RGB_TO_V_JPEG(r1, g1, b1)\
+(((FIX(0.50000) * r1 - FIX(0.41869) * g1 - \
+   FIX(0.08131) * b1 + (ONE_HALF) - 1) >> (SCALEBITS)) + 128)
+
 #endif /* AVUTIL_COLORSPACE_H */

diff --git a/libavutil/common.h b/libavutil/common.h
index 8142b31..8db0291 100644
--- a/libavutil/common.h
+++ b/libavutil/common.h

@@ -158,7 +158,7 @@
  */
 static av_always_inline av_const uint8_t av_clip_uint8_c(int a)
 {
-    if (a&(~0xFF)) return (-a)>>31;
+    if (a&(~0xFF)) return (~a)>>31;
     else           return a;
 }
 
@@ -180,7 +180,7 @@
  */
 static av_always_inline av_const uint16_t av_clip_uint16_c(int a)
 {
-    if (a&(~0xFFFF)) return (-a)>>31;
+    if (a&(~0xFFFF)) return (~a)>>31;
     else             return a;
 }
 
@@ -228,7 +228,7 @@
  */
 static av_always_inline av_const unsigned av_clip_uintp2_c(int a, int p)
 {
-    if (a & ~((1<<p) - 1)) return -a >> 31 & ((1<<p) - 1);
+    if (a & ~((1<<p) - 1)) return (~a) >> 31 & ((1<<p) - 1);
     else                   return  a;
 }
 
@@ -260,7 +260,7 @@
  *
  * @param  a first value
  * @param  b value doubled and added to a
- * @return sum with signed saturation
+ * @return sum sat(a + sat(2*b)) with signed saturation
  */
 static av_always_inline int av_sat_dadd32_c(int a, int b)
 {
@@ -268,6 +268,30 @@
 }
 
 /**
+ * Subtract two signed 32-bit values with saturation.
+ *
+ * @param  a one value
+ * @param  b another value
+ * @return difference with signed saturation
+ */
+static av_always_inline int av_sat_sub32_c(int a, int b)
+{
+    return av_clipl_int32((int64_t)a - b);
+}
+
+/**
+ * Subtract a doubled value from another value with saturation at both stages.
+ *
+ * @param  a first value
+ * @param  b value doubled and subtracted from a
+ * @return difference sat(a - sat(2*b)) with signed saturation
+ */
+static av_always_inline int av_sat_dsub32_c(int a, int b)
+{
+    return av_sat_sub32(a, av_sat_add32(b, b));
+}
+
+/**
  * Clip a float value into the amin-amax range.
  * @param a value to clip
  * @param amin minimum value of the clip range
@@ -513,6 +537,12 @@
 #ifndef av_sat_dadd32
 #   define av_sat_dadd32    av_sat_dadd32_c
 #endif
+#ifndef av_sat_sub32
+#   define av_sat_sub32     av_sat_sub32_c
+#endif
+#ifndef av_sat_dsub32
+#   define av_sat_dsub32    av_sat_dsub32_c
+#endif
 #ifndef av_clipf
 #   define av_clipf         av_clipf_c
 #endif

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index c8401b8..6548cc3 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c

@@ -80,7 +80,8 @@
                     AV_CPU_FLAG_XOP      |
                     AV_CPU_FLAG_FMA3     |
                     AV_CPU_FLAG_FMA4     |
-                    AV_CPU_FLAG_AVX2     ))
+                    AV_CPU_FLAG_AVX2     |
+                    AV_CPU_FLAG_AVX512   ))
         && !(arg & AV_CPU_FLAG_MMX)) {
         av_log(NULL, AV_LOG_WARNING, "MMX implied by specified flags\n");
         arg |= AV_CPU_FLAG_MMX;
@@ -126,6 +127,7 @@
 #define CPUFLAG_AVX2     (AV_CPU_FLAG_AVX2     | CPUFLAG_AVX)
 #define CPUFLAG_BMI2     (AV_CPU_FLAG_BMI2     | AV_CPU_FLAG_BMI1)
 #define CPUFLAG_AESNI    (AV_CPU_FLAG_AESNI    | CPUFLAG_SSE42)
+#define CPUFLAG_AVX512   (AV_CPU_FLAG_AVX512   | CPUFLAG_AVX2)
     static const AVOption cpuflags_opts[] = {
         { "flags"   , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, INT64_MAX, .unit = "flags" },
 #if   ARCH_PPC
@@ -154,6 +156,7 @@
         { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOWEXT     },    .unit = "flags" },
         { "cmov",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV     },    .unit = "flags" },
         { "aesni"   , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AESNI        },    .unit = "flags" },
+        { "avx512"  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX512       },    .unit = "flags" },
 #elif ARCH_ARM
         { "armv5te",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV5TE  },    .unit = "flags" },
         { "armv6",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6    },    .unit = "flags" },
@@ -216,6 +219,7 @@
         { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_3DNOWEXT },    .unit = "flags" },
         { "cmov",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV     },    .unit = "flags" },
         { "aesni",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AESNI    },    .unit = "flags" },
+        { "avx512"  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512   },    .unit = "flags" },
 
 #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX
 #define CPU_FLAG_P3 CPU_FLAG_P2 | AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_SSE

diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 9e5d40a..8bb9eb6 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h

@@ -55,6 +55,7 @@
 #define AV_CPU_FLAG_FMA3        0x10000 ///< Haswell FMA3 functions
 #define AV_CPU_FLAG_BMI1        0x20000 ///< Bit Manipulation Instruction Set 1
 #define AV_CPU_FLAG_BMI2        0x40000 ///< Bit Manipulation Instruction Set 2
+#define AV_CPU_FLAG_AVX512     0x100000 ///< AVX-512 functions: requires OS support even if YMM/ZMM registers aren't used
 
 #define AV_CPU_FLAG_ALTIVEC      0x0001 ///< standard
 #define AV_CPU_FLAG_VSX          0x0002 ///< ISA 2.06

diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
index b8bf1e5..37122d1 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/cpu_internal.h

@@ -19,6 +19,8 @@
 #ifndef AVUTIL_CPU_INTERNAL_H
 #define AVUTIL_CPU_INTERNAL_H
 
+#include "config.h"
+
 #include "cpu.h"
 
 #define CPUEXT_SUFFIX(flags, suffix, cpuext)                            \

diff --git a/libavutil/crc.c b/libavutil/crc.c
index 495732b..c45ea63 100644
--- a/libavutil/crc.c
+++ b/libavutil/crc.c

@@ -20,6 +20,8 @@
 
 #include "config.h"
 
+#include "thread.h"
+#include "avassert.h"
 #include "bswap.h"
 #include "common.h"
 #include "crc.h"
@@ -50,6 +52,30 @@
         0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB, 0xE6, 0xE1, 0xE8, 0xEF,
         0xFA, 0xFD, 0xF4, 0xF3, 0x01
     },
+    [AV_CRC_8_EBU] = {
+        0x00, 0x1D, 0x3A, 0x27, 0x74, 0x69, 0x4E, 0x53, 0xE8, 0xF5, 0xD2, 0xCF,
+        0x9C, 0x81, 0xA6, 0xBB, 0xCD, 0xD0, 0xF7, 0xEA, 0xB9, 0xA4, 0x83, 0x9E,
+        0x25, 0x38, 0x1F, 0x02, 0x51, 0x4C, 0x6B, 0x76, 0x87, 0x9A, 0xBD, 0xA0,
+        0xF3, 0xEE, 0xC9, 0xD4, 0x6F, 0x72, 0x55, 0x48, 0x1B, 0x06, 0x21, 0x3C,
+        0x4A, 0x57, 0x70, 0x6D, 0x3E, 0x23, 0x04, 0x19, 0xA2, 0xBF, 0x98, 0x85,
+        0xD6, 0xCB, 0xEC, 0xF1, 0x13, 0x0E, 0x29, 0x34, 0x67, 0x7A, 0x5D, 0x40,
+        0xFB, 0xE6, 0xC1, 0xDC, 0x8F, 0x92, 0xB5, 0xA8, 0xDE, 0xC3, 0xE4, 0xF9,
+        0xAA, 0xB7, 0x90, 0x8D, 0x36, 0x2B, 0x0C, 0x11, 0x42, 0x5F, 0x78, 0x65,
+        0x94, 0x89, 0xAE, 0xB3, 0xE0, 0xFD, 0xDA, 0xC7, 0x7C, 0x61, 0x46, 0x5B,
+        0x08, 0x15, 0x32, 0x2F, 0x59, 0x44, 0x63, 0x7E, 0x2D, 0x30, 0x17, 0x0A,
+        0xB1, 0xAC, 0x8B, 0x96, 0xC5, 0xD8, 0xFF, 0xE2, 0x26, 0x3B, 0x1C, 0x01,
+        0x52, 0x4F, 0x68, 0x75, 0xCE, 0xD3, 0xF4, 0xE9, 0xBA, 0xA7, 0x80, 0x9D,
+        0xEB, 0xF6, 0xD1, 0xCC, 0x9F, 0x82, 0xA5, 0xB8, 0x03, 0x1E, 0x39, 0x24,
+        0x77, 0x6A, 0x4D, 0x50, 0xA1, 0xBC, 0x9B, 0x86, 0xD5, 0xC8, 0xEF, 0xF2,
+        0x49, 0x54, 0x73, 0x6E, 0x3D, 0x20, 0x07, 0x1A, 0x6C, 0x71, 0x56, 0x4B,
+        0x18, 0x05, 0x22, 0x3F, 0x84, 0x99, 0xBE, 0xA3, 0xF0, 0xED, 0xCA, 0xD7,
+        0x35, 0x28, 0x0F, 0x12, 0x41, 0x5C, 0x7B, 0x66, 0xDD, 0xC0, 0xE7, 0xFA,
+        0xA9, 0xB4, 0x93, 0x8E, 0xF8, 0xE5, 0xC2, 0xDF, 0x8C, 0x91, 0xB6, 0xAB,
+        0x10, 0x0D, 0x2A, 0x37, 0x64, 0x79, 0x5E, 0x43, 0xB2, 0xAF, 0x88, 0x95,
+        0xC6, 0xDB, 0xFC, 0xE1, 0x5A, 0x47, 0x60, 0x7D, 0x2E, 0x33, 0x14, 0x09,
+        0x7F, 0x62, 0x45, 0x58, 0x0B, 0x16, 0x31, 0x2C, 0x97, 0x8A, 0xAD, 0xB0,
+        0xE3, 0xFE, 0xD9, 0xC4, 0x01
+    },
     [AV_CRC_16_ANSI] = {
         0x0000, 0x0580, 0x0F80, 0x0A00, 0x1B80, 0x1E00, 0x1400, 0x1180,
         0x3380, 0x3600, 0x3C00, 0x3980, 0x2800, 0x2D80, 0x2780, 0x2200,
@@ -291,20 +317,25 @@
 #else
 #define CRC_TABLE_SIZE 1024
 #endif
-static struct {
-    uint8_t  le;
-    uint8_t  bits;
-    uint32_t poly;
-} av_crc_table_params[AV_CRC_MAX] = {
-    [AV_CRC_8_ATM]      = { 0,  8,       0x07 },
-    [AV_CRC_16_ANSI]    = { 0, 16,     0x8005 },
-    [AV_CRC_16_CCITT]   = { 0, 16,     0x1021 },
-    [AV_CRC_24_IEEE]    = { 0, 24,   0x864CFB },
-    [AV_CRC_32_IEEE]    = { 0, 32, 0x04C11DB7 },
-    [AV_CRC_32_IEEE_LE] = { 1, 32, 0xEDB88320 },
-    [AV_CRC_16_ANSI_LE] = { 1, 16,     0xA001 },
-};
 static AVCRC av_crc_table[AV_CRC_MAX][CRC_TABLE_SIZE];
+
+#define DECLARE_CRC_INIT_TABLE_ONCE(id, le, bits, poly)                                       \
+static AVOnce id ## _once_control = AV_ONCE_INIT;                                             \
+static void id ## _init_table_once(void)                                                      \
+{                                                                                             \
+    av_assert0(av_crc_init(av_crc_table[id], le, bits, poly, sizeof(av_crc_table[id])) >= 0); \
+}
+
+#define CRC_INIT_TABLE_ONCE(id) ff_thread_once(&id ## _once_control, id ## _init_table_once)
+
+DECLARE_CRC_INIT_TABLE_ONCE(AV_CRC_8_ATM,      0,  8,       0x07)
+DECLARE_CRC_INIT_TABLE_ONCE(AV_CRC_8_EBU,      0,  8,       0x1D)
+DECLARE_CRC_INIT_TABLE_ONCE(AV_CRC_16_ANSI,    0, 16,     0x8005)
+DECLARE_CRC_INIT_TABLE_ONCE(AV_CRC_16_CCITT,   0, 16,     0x1021)
+DECLARE_CRC_INIT_TABLE_ONCE(AV_CRC_24_IEEE,    0, 24,   0x864CFB)
+DECLARE_CRC_INIT_TABLE_ONCE(AV_CRC_32_IEEE,    0, 32, 0x04C11DB7)
+DECLARE_CRC_INIT_TABLE_ONCE(AV_CRC_32_IEEE_LE, 1, 32, 0xEDB88320)
+DECLARE_CRC_INIT_TABLE_ONCE(AV_CRC_16_ANSI_LE, 1, 16,     0xA001)
 #endif
 
 int av_crc_init(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size)
@@ -343,13 +374,17 @@
 const AVCRC *av_crc_get_table(AVCRCId crc_id)
 {
 #if !CONFIG_HARDCODED_TABLES
-    if (!av_crc_table[crc_id][FF_ARRAY_ELEMS(av_crc_table[crc_id]) - 1])
-        if (av_crc_init(av_crc_table[crc_id],
-                        av_crc_table_params[crc_id].le,
-                        av_crc_table_params[crc_id].bits,
-                        av_crc_table_params[crc_id].poly,
-                        sizeof(av_crc_table[crc_id])) < 0)
-            return NULL;
+    switch (crc_id) {
+    case AV_CRC_8_ATM:      CRC_INIT_TABLE_ONCE(AV_CRC_8_ATM); break;
+    case AV_CRC_8_EBU:      CRC_INIT_TABLE_ONCE(AV_CRC_8_EBU); break;
+    case AV_CRC_16_ANSI:    CRC_INIT_TABLE_ONCE(AV_CRC_16_ANSI); break;
+    case AV_CRC_16_CCITT:   CRC_INIT_TABLE_ONCE(AV_CRC_16_CCITT); break;
+    case AV_CRC_24_IEEE:    CRC_INIT_TABLE_ONCE(AV_CRC_24_IEEE); break;
+    case AV_CRC_32_IEEE:    CRC_INIT_TABLE_ONCE(AV_CRC_32_IEEE); break;
+    case AV_CRC_32_IEEE_LE: CRC_INIT_TABLE_ONCE(AV_CRC_32_IEEE_LE); break;
+    case AV_CRC_16_ANSI_LE: CRC_INIT_TABLE_ONCE(AV_CRC_16_ANSI_LE); break;
+    default: av_assert0(0);
+    }
 #endif
     return av_crc_table[crc_id];
 }

diff --git a/libavutil/crc.h b/libavutil/crc.h
index 2a1b0d7..47e22b4 100644
--- a/libavutil/crc.h
+++ b/libavutil/crc.h

@@ -53,11 +53,8 @@
     AV_CRC_32_IEEE,
     AV_CRC_32_IEEE_LE,  /*< reversed bitorder version of AV_CRC_32_IEEE */
     AV_CRC_16_ANSI_LE,  /*< reversed bitorder version of AV_CRC_16_ANSI */
-#if FF_API_CRC_BIG_TABLE
-    AV_CRC_24_IEEE = 12,
-#else
     AV_CRC_24_IEEE,
-#endif /* FF_API_CRC_BIG_TABLE */
+    AV_CRC_8_EBU,
     AV_CRC_MAX,         /*< Not part of public API! Do not use outside libavutil. */
 }AVCRCId;
 

diff --git a/libavutil/encryption_info.c b/libavutil/encryption_info.c
new file mode 100644
index 0000000..812c704
--- /dev/null
+++ b/libavutil/encryption_info.c

@@ -0,0 +1,339 @@
+/**
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "encryption_info.h"
+#include "mem.h"
+#include "intreadwrite.h"
+
+#define FF_ENCRYPTION_INFO_EXTRA 24
+
+// The format of the AVEncryptionInfo side data:
+// u32be scheme
+// u32be crypt_byte_block
+// u32be skip_byte_block
+// u32be key_id_size
+// u32be iv_size
+// u32be subsample_count
+// u8[key_id_size] key_id
+// u8[iv_size] iv
+// {
+//   u32be bytes_of_clear_data
+//   u32be bytes_of_protected_data
+// }[subsample_count]
+
+AVEncryptionInfo *av_encryption_info_alloc(uint32_t subsample_count, uint32_t key_id_size, uint32_t iv_size)
+{
+    AVEncryptionInfo *info;
+
+    info = av_mallocz(sizeof(*info));
+    if (!info)
+        return NULL;
+
+    info->key_id = av_mallocz(key_id_size);
+    info->key_id_size = key_id_size;
+    info->iv = av_mallocz(iv_size);
+    info->iv_size = iv_size;
+    info->subsamples = av_mallocz_array(subsample_count, sizeof(*info->subsamples));
+    info->subsample_count = subsample_count;
+
+    // Allow info->subsamples to be NULL if there are no subsamples.
+    if (!info->key_id || !info->iv || (!info->subsamples && subsample_count)) {
+        av_encryption_info_free(info);
+        return NULL;
+    }
+
+    return info;
+}
+
+AVEncryptionInfo *av_encryption_info_clone(const AVEncryptionInfo *info)
+{
+    AVEncryptionInfo *ret;
+
+    ret = av_encryption_info_alloc(info->subsample_count, info->key_id_size, info->iv_size);
+    if (!ret)
+        return NULL;
+
+    ret->scheme = info->scheme;
+    ret->crypt_byte_block = info->crypt_byte_block;
+    ret->skip_byte_block = info->skip_byte_block;
+    memcpy(ret->iv, info->iv, info->iv_size);
+    memcpy(ret->key_id, info->key_id, info->key_id_size);
+    memcpy(ret->subsamples, info->subsamples, sizeof(*info->subsamples) * info->subsample_count);
+    return ret;
+}
+
+void av_encryption_info_free(AVEncryptionInfo *info)
+{
+    if (info) {
+        av_free(info->key_id);
+        av_free(info->iv);
+        av_free(info->subsamples);
+        av_free(info);
+    }
+}
+
+AVEncryptionInfo *av_encryption_info_get_side_data(const uint8_t* buffer, size_t size)
+{
+    AVEncryptionInfo *info;
+    uint64_t key_id_size, iv_size, subsample_count, i;
+
+    if (!buffer || size < FF_ENCRYPTION_INFO_EXTRA)
+        return NULL;
+
+    key_id_size = AV_RB32(buffer + 12);
+    iv_size = AV_RB32(buffer + 16);
+    subsample_count = AV_RB32(buffer + 20);
+
+    if (size < FF_ENCRYPTION_INFO_EXTRA + key_id_size + iv_size + subsample_count * 8)
+        return NULL;
+
+    info = av_encryption_info_alloc(subsample_count, key_id_size, iv_size);
+    if (!info)
+        return NULL;
+
+    info->scheme = AV_RB32(buffer);
+    info->crypt_byte_block = AV_RB32(buffer + 4);
+    info->skip_byte_block = AV_RB32(buffer + 8);
+    memcpy(info->key_id, buffer + 24, key_id_size);
+    memcpy(info->iv, buffer + key_id_size + 24, iv_size);
+
+    buffer += key_id_size + iv_size + 24;
+    for (i = 0; i < subsample_count; i++) {
+        info->subsamples[i].bytes_of_clear_data = AV_RB32(buffer);
+        info->subsamples[i].bytes_of_protected_data = AV_RB32(buffer + 4);
+        buffer += 8;
+    }
+
+    return info;
+}
+
+uint8_t *av_encryption_info_add_side_data(const AVEncryptionInfo *info, size_t *size)
+{
+    uint8_t *buffer, *cur_buffer;
+    uint32_t i;
+
+    if (UINT32_MAX - FF_ENCRYPTION_INFO_EXTRA < info->key_id_size ||
+        UINT32_MAX - FF_ENCRYPTION_INFO_EXTRA - info->key_id_size < info->iv_size ||
+        (UINT32_MAX - FF_ENCRYPTION_INFO_EXTRA - info->key_id_size - info->iv_size) / 8 < info->subsample_count) {
+        return NULL;
+    }
+
+    *size = FF_ENCRYPTION_INFO_EXTRA + info->key_id_size + info->iv_size +
+            (info->subsample_count * 8);
+    cur_buffer = buffer = av_malloc(*size);
+    if (!buffer)
+        return NULL;
+
+    AV_WB32(cur_buffer,      info->scheme);
+    AV_WB32(cur_buffer +  4, info->crypt_byte_block);
+    AV_WB32(cur_buffer +  8, info->skip_byte_block);
+    AV_WB32(cur_buffer + 12, info->key_id_size);
+    AV_WB32(cur_buffer + 16, info->iv_size);
+    AV_WB32(cur_buffer + 20, info->subsample_count);
+    cur_buffer += 24;
+    memcpy(cur_buffer, info->key_id, info->key_id_size);
+    cur_buffer += info->key_id_size;
+    memcpy(cur_buffer, info->iv, info->iv_size);
+    cur_buffer += info->iv_size;
+    for (i = 0; i < info->subsample_count; i++) {
+        AV_WB32(cur_buffer, info->subsamples[i].bytes_of_clear_data);
+        AV_WB32(cur_buffer + 4, info->subsamples[i].bytes_of_protected_data);
+        cur_buffer += 8;
+    }
+
+    return buffer;
+}
+
+// The format of the AVEncryptionInitInfo side data:
+// u32be init_info_count
+// {
+//   u32be system_id_size
+//   u32be num_key_ids
+//   u32be key_id_size
+//   u32be data_size
+//   u8[system_id_size] system_id
+//   u8[key_id_size][num_key_id] key_ids
+//   u8[data_size] data
+// }[init_info_count]
+
+#define FF_ENCRYPTION_INIT_INFO_EXTRA 16
+
+AVEncryptionInitInfo *av_encryption_init_info_alloc(
+    uint32_t system_id_size, uint32_t num_key_ids, uint32_t key_id_size, uint32_t data_size)
+{
+    AVEncryptionInitInfo *info;
+    uint32_t i;
+
+    info = av_mallocz(sizeof(*info));
+    if (!info)
+        return NULL;
+
+    info->system_id = av_mallocz(system_id_size);
+    info->system_id_size = system_id_size;
+    info->key_ids = key_id_size ? av_mallocz_array(num_key_ids, sizeof(*info->key_ids)) : NULL;
+    info->num_key_ids = num_key_ids;
+    info->key_id_size = key_id_size;
+    info->data = av_mallocz(data_size);
+    info->data_size = data_size;
+
+    // Allow pointers to be NULL if the size is 0.
+    if ((!info->system_id && system_id_size) || (!info->data && data_size) ||
+        (!info->key_ids && num_key_ids && key_id_size)) {
+        av_encryption_init_info_free(info);
+        return NULL;
+    }
+
+    if (key_id_size) {
+        for (i = 0; i < num_key_ids; i++) {
+            info->key_ids[i] = av_mallocz(key_id_size);
+            if (!info->key_ids[i]) {
+                av_encryption_init_info_free(info);
+                return NULL;
+            }
+        }
+    }
+
+    return info;
+}
+
+void av_encryption_init_info_free(AVEncryptionInitInfo *info)
+{
+    uint32_t i;
+    if (info) {
+        for (i = 0; i < info->num_key_ids; i++) {
+            av_free(info->key_ids[i]);
+        }
+        av_encryption_init_info_free(info->next);
+        av_free(info->system_id);
+        av_free(info->key_ids);
+        av_free(info->data);
+        av_free(info);
+    }
+}
+
+AVEncryptionInitInfo *av_encryption_init_info_get_side_data(
+    const uint8_t *side_data, size_t side_data_size)
+{
+    // |ret| tracks the front of the list, |info| tracks the back.
+    AVEncryptionInitInfo *ret = NULL, *info, *temp_info;
+    uint64_t system_id_size, num_key_ids, key_id_size, data_size, i, j;
+    uint64_t init_info_count;
+
+    if (!side_data || side_data_size < 4)
+        return NULL;
+
+    init_info_count = AV_RB32(side_data);
+    side_data += 4;
+    side_data_size -= 4;
+    for (i = 0; i < init_info_count; i++) {
+        if (side_data_size < FF_ENCRYPTION_INIT_INFO_EXTRA) {
+            av_encryption_init_info_free(ret);
+            return NULL;
+        }
+
+        system_id_size = AV_RB32(side_data);
+        num_key_ids = AV_RB32(side_data + 4);
+        key_id_size = AV_RB32(side_data + 8);
+        data_size = AV_RB32(side_data + 12);
+
+        // UINT32_MAX + UINT32_MAX + UINT32_MAX * UINT32_MAX == UINT64_MAX
+        if (side_data_size - FF_ENCRYPTION_INIT_INFO_EXTRA < system_id_size + data_size + num_key_ids * key_id_size) {
+            av_encryption_init_info_free(ret);
+            return NULL;
+        }
+        side_data += FF_ENCRYPTION_INIT_INFO_EXTRA;
+        side_data_size -= FF_ENCRYPTION_INIT_INFO_EXTRA;
+
+        temp_info = av_encryption_init_info_alloc(system_id_size, num_key_ids, key_id_size, data_size);
+        if (!temp_info) {
+            av_encryption_init_info_free(ret);
+            return NULL;
+        }
+        if (i == 0) {
+            info = ret = temp_info;
+        } else {
+            info->next = temp_info;
+            info = temp_info;
+        }
+
+        memcpy(info->system_id, side_data, system_id_size);
+        side_data += system_id_size;
+        side_data_size -= system_id_size;
+        for (j = 0; j < num_key_ids; j++) {
+            memcpy(info->key_ids[j], side_data, key_id_size);
+            side_data += key_id_size;
+            side_data_size -= key_id_size;
+        }
+        memcpy(info->data, side_data, data_size);
+        side_data += data_size;
+        side_data_size -= data_size;
+    }
+
+    return ret;
+}
+
+uint8_t *av_encryption_init_info_add_side_data(const AVEncryptionInitInfo *info, size_t *side_data_size)
+{
+    const AVEncryptionInitInfo *cur_info;
+    uint8_t *buffer, *cur_buffer;
+    uint32_t i, init_info_count;
+    uint64_t temp_side_data_size;
+
+    temp_side_data_size = 4;
+    init_info_count = 0;
+    for (cur_info = info; cur_info; cur_info = cur_info->next) {
+        temp_side_data_size += (uint64_t)FF_ENCRYPTION_INIT_INFO_EXTRA + cur_info->system_id_size + cur_info->data_size;
+        if (init_info_count == UINT32_MAX || temp_side_data_size > UINT32_MAX) {
+            return NULL;
+        }
+        init_info_count++;
+
+        if (cur_info->num_key_ids) {
+            temp_side_data_size += (uint64_t)cur_info->num_key_ids * cur_info->key_id_size;
+            if (temp_side_data_size > UINT32_MAX) {
+                return NULL;
+            }
+        }
+    }
+    *side_data_size = temp_side_data_size;
+
+    cur_buffer = buffer = av_malloc(*side_data_size);
+    if (!buffer)
+        return NULL;
+
+    AV_WB32(cur_buffer, init_info_count);
+    cur_buffer += 4;
+    for (cur_info = info; cur_info; cur_info = cur_info->next) {
+        AV_WB32(cur_buffer,      cur_info->system_id_size);
+        AV_WB32(cur_buffer +  4, cur_info->num_key_ids);
+        AV_WB32(cur_buffer +  8, cur_info->key_id_size);
+        AV_WB32(cur_buffer + 12, cur_info->data_size);
+        cur_buffer += 16;
+
+        memcpy(cur_buffer, cur_info->system_id, cur_info->system_id_size);
+        cur_buffer += cur_info->system_id_size;
+        for (i = 0; i < cur_info->num_key_ids; i++) {
+            memcpy(cur_buffer, cur_info->key_ids[i], cur_info->key_id_size);
+            cur_buffer += cur_info->key_id_size;
+        }
+        memcpy(cur_buffer, cur_info->data, cur_info->data_size);
+        cur_buffer += cur_info->data_size;
+    }
+
+    return buffer;
+}

diff --git a/libavutil/encryption_info.h b/libavutil/encryption_info.h
new file mode 100644
index 0000000..8fe7ebf
--- /dev/null
+++ b/libavutil/encryption_info.h

@@ -0,0 +1,205 @@
+/**
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_ENCRYPTION_INFO_H
+#define AVUTIL_ENCRYPTION_INFO_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct AVSubsampleEncryptionInfo {
+    /** The number of bytes that are clear. */
+    unsigned int bytes_of_clear_data;
+
+    /**
+     * The number of bytes that are protected.  If using pattern encryption,
+     * the pattern applies to only the protected bytes; if not using pattern
+     * encryption, all these bytes are encrypted.
+     */
+    unsigned int bytes_of_protected_data;
+} AVSubsampleEncryptionInfo;
+
+/**
+ * This describes encryption info for a packet.  This contains frame-specific
+ * info for how to decrypt the packet before passing it to the decoder.
+ *
+ * The size of this struct is not part of the public ABI.
+ */
+typedef struct AVEncryptionInfo {
+    /** The fourcc encryption scheme, in big-endian byte order. */
+    uint32_t scheme;
+
+    /**
+     * Only used for pattern encryption.  This is the number of 16-byte blocks
+     * that are encrypted.
+     */
+    uint32_t crypt_byte_block;
+
+    /**
+     * Only used for pattern encryption.  This is the number of 16-byte blocks
+     * that are clear.
+     */
+    uint32_t skip_byte_block;
+
+    /**
+     * The ID of the key used to encrypt the packet.  This should always be
+     * 16 bytes long, but may be changed in the future.
+     */
+    uint8_t *key_id;
+    uint32_t key_id_size;
+
+    /**
+     * The initialization vector.  This may have been zero-filled to be the
+     * correct block size.  This should always be 16 bytes long, but may be
+     * changed in the future.
+     */
+    uint8_t *iv;
+    uint32_t iv_size;
+
+    /**
+     * An array of subsample encryption info specifying how parts of the sample
+     * are encrypted.  If there are no subsamples, then the whole sample is
+     * encrypted.
+     */
+    AVSubsampleEncryptionInfo *subsamples;
+    uint32_t subsample_count;
+} AVEncryptionInfo;
+
+/**
+ * This describes info used to initialize an encryption key system.
+ *
+ * The size of this struct is not part of the public ABI.
+ */
+typedef struct AVEncryptionInitInfo {
+    /**
+     * A unique identifier for the key system this is for, can be NULL if it
+     * is not known.  This should always be 16 bytes, but may change in the
+     * future.
+     */
+    uint8_t* system_id;
+    uint32_t system_id_size;
+
+    /**
+     * An array of key IDs this initialization data is for.  All IDs are the
+     * same length.  Can be NULL if there are no known key IDs.
+     */
+    uint8_t** key_ids;
+    /** The number of key IDs. */
+    uint32_t num_key_ids;
+    /**
+     * The number of bytes in each key ID.  This should always be 16, but may
+     * change in the future.
+     */
+    uint32_t key_id_size;
+
+    /**
+     * Key-system specific initialization data.  This data is copied directly
+     * from the file and the format depends on the specific key system.  This
+     * can be NULL if there is no initialization data; in that case, there
+     * will be at least one key ID.
+     */
+    uint8_t* data;
+    uint32_t data_size;
+
+    /**
+     * An optional pointer to the next initialization info in the list.
+     */
+    struct AVEncryptionInitInfo *next;
+} AVEncryptionInitInfo;
+
+/**
+ * Allocates an AVEncryptionInfo structure and sub-pointers to hold the given
+ * number of subsamples.  This will allocate pointers for the key ID, IV,
+ * and subsample entries, set the size members, and zero-initialize the rest.
+ *
+ * @param subsample_count The number of subsamples.
+ * @param key_id_size The number of bytes in the key ID, should be 16.
+ * @param iv_size The number of bytes in the IV, should be 16.
+ *
+ * @return The new AVEncryptionInfo structure, or NULL on error.
+ */
+AVEncryptionInfo *av_encryption_info_alloc(uint32_t subsample_count, uint32_t key_id_size, uint32_t iv_size);
+
+/**
+ * Allocates an AVEncryptionInfo structure with a copy of the given data.
+ * @return The new AVEncryptionInfo structure, or NULL on error.
+ */
+AVEncryptionInfo *av_encryption_info_clone(const AVEncryptionInfo *info);
+
+/**
+ * Frees the given encryption info object.  This MUST NOT be used to free the
+ * side-data data pointer, that should use normal side-data methods.
+ */
+void av_encryption_info_free(AVEncryptionInfo *info);
+
+/**
+ * Creates a copy of the AVEncryptionInfo that is contained in the given side
+ * data.  The resulting object should be passed to av_encryption_info_free()
+ * when done.
+ *
+ * @return The new AVEncryptionInfo structure, or NULL on error.
+ */
+AVEncryptionInfo *av_encryption_info_get_side_data(const uint8_t *side_data, size_t side_data_size);
+
+/**
+ * Allocates and initializes side data that holds a copy of the given encryption
+ * info.  The resulting pointer should be either freed using av_free or given
+ * to av_packet_add_side_data().
+ *
+ * @return The new side-data pointer, or NULL.
+ */
+uint8_t *av_encryption_info_add_side_data(
+      const AVEncryptionInfo *info, size_t *side_data_size);
+
+
+/**
+ * Allocates an AVEncryptionInitInfo structure and sub-pointers to hold the
+ * given sizes.  This will allocate pointers and set all the fields.
+ *
+ * @return The new AVEncryptionInitInfo structure, or NULL on error.
+ */
+AVEncryptionInitInfo *av_encryption_init_info_alloc(
+    uint32_t system_id_size, uint32_t num_key_ids, uint32_t key_id_size, uint32_t data_size);
+
+/**
+ * Frees the given encryption init info object.  This MUST NOT be used to free
+ * the side-data data pointer, that should use normal side-data methods.
+ */
+void av_encryption_init_info_free(AVEncryptionInitInfo* info);
+
+/**
+ * Creates a copy of the AVEncryptionInitInfo that is contained in the given
+ * side data.  The resulting object should be passed to
+ * av_encryption_init_info_free() when done.
+ *
+ * @return The new AVEncryptionInitInfo structure, or NULL on error.
+ */
+AVEncryptionInitInfo *av_encryption_init_info_get_side_data(
+    const uint8_t* side_data, size_t side_data_size);
+
+/**
+ * Allocates and initializes side data that holds a copy of the given encryption
+ * init info.  The resulting pointer should be either freed using av_free or
+ * given to av_packet_add_side_data().
+ *
+ * @return The new side-data pointer, or NULL.
+ */
+uint8_t *av_encryption_init_info_add_side_data(
+    const AVEncryptionInitInfo *info, size_t *side_data_size);
+
+#endif /* AVUTIL_ENCRYPTION_INFO_H */

diff --git a/libavutil/eval.c b/libavutil/eval.c
index b5f4ea2..5da9a6d 100644
--- a/libavutil/eval.c
+++ b/libavutil/eval.c

@@ -57,7 +57,14 @@
     double *var;
 } Parser;
 
-static const AVClass eval_class = { "Eval", av_default_item_name, NULL, LIBAVUTIL_VERSION_INT, offsetof(Parser,log_offset), offsetof(Parser,log_ctx) };
+static const AVClass eval_class = {
+    .class_name                = "Eval",
+    .item_name                 = av_default_item_name,
+    .option                    = NULL,
+    .version                   = LIBAVUTIL_VERSION_INT,
+    .log_level_offset_offset   = offsetof(Parser, log_offset),
+    .parent_log_context_offset = offsetof(Parser, log_ctx),
+};
 
 static const struct {
     double bin_val;

diff --git a/libavutil/file.c b/libavutil/file.c
index 7bdf6cd..d946085 100644
--- a/libavutil/file.c
+++ b/libavutil/file.c

@@ -42,8 +42,12 @@
 } FileLogContext;
 
 static const AVClass file_log_ctx_class = {
-    "FILE", av_default_item_name, NULL, LIBAVUTIL_VERSION_INT,
-    offsetof(FileLogContext, log_offset), offsetof(FileLogContext, log_ctx)
+    .class_name                = "FILE",
+    .item_name                 = av_default_item_name,
+    .option                    = NULL,
+    .version                   = LIBAVUTIL_VERSION_INT,
+    .log_level_offset_offset   = offsetof(FileLogContext, log_offset),
+    .parent_log_context_offset = offsetof(FileLogContext, log_ctx),
 };
 
 int av_file_map(const char *filename, uint8_t **bufptr, size_t *size,
@@ -81,6 +85,11 @@
     }
     *size = off_size;
 
+    if (!*size) {
+        *bufptr = NULL;
+        goto out;
+    }
+
 #if HAVE_MMAP
     ptr = mmap(NULL, *size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
     if (ptr == MAP_FAILED) {
@@ -122,12 +131,15 @@
     read(fd, *bufptr, *size);
 #endif
 
+out:
     close(fd);
     return 0;
 }
 
 void av_file_unmap(uint8_t *bufptr, size_t size)
 {
+    if (!size)
+        return;
 #if HAVE_MMAP
     munmap(bufptr, size);
 #elif HAVE_MAPVIEWOFFILE

diff --git a/libavutil/file.h b/libavutil/file.h
index 8666c7b..3ef4a60 100644
--- a/libavutil/file.h
+++ b/libavutil/file.h

@@ -33,6 +33,8 @@
  * allocated buffer or map it with mmap() when available.
  * In case of success set *bufptr to the read or mmapped buffer, and
  * *size to the size in bytes of the buffer in *bufptr.
+ * Unlike mmap this function succeeds with zero sized files, in this
+ * case *bufptr will be set to NULL and *size will be set to 0.
  * The returned buffer must be released with av_file_unmap().
  *
  * @param log_offset loglevel offset used for logging

diff --git a/libavutil/file_open.c b/libavutil/file_open.c
index 34070d9..a8da283 100644
--- a/libavutil/file_open.c
+++ b/libavutil/file_open.c

@@ -29,7 +29,7 @@
 #include <io.h>
 #endif
 
-#if defined(_WIN32) && !defined(__MINGW32CE__)
+#ifdef _WIN32
 #undef open
 #undef lseek
 #undef stat
@@ -99,8 +99,12 @@
 } FileLogContext;
 
 static const AVClass file_log_ctx_class = {
-    "TEMPFILE", av_default_item_name, NULL, LIBAVUTIL_VERSION_INT,
-    offsetof(FileLogContext, log_offset), offsetof(FileLogContext, log_ctx)
+    .class_name                = "TEMPFILE",
+    .item_name                 = av_default_item_name,
+    .option                    = NULL,
+    .version                   = LIBAVUTIL_VERSION_INT,
+    .log_level_offset_offset   = offsetof(FileLogContext, log_offset),
+    .parent_log_context_offset = offsetof(FileLogContext, log_ctx),
 };
 
 int avpriv_tempfile(const char *prefix, char **filename, int log_offset, void *log_ctx)

diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index 1d4911d..6e28d71 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c

@@ -32,6 +32,14 @@
         dst[i] = src0[i] * src1[i];
 }
 
+static void vector_dmul_c(double *dst, const double *src0, const double *src1,
+                          int len)
+{
+    int i;
+    for (i = 0; i < len; i++)
+        dst[i] = src0[i] * src1[i];
+}
+
 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
                                  int len)
 {
@@ -131,6 +139,7 @@
         return NULL;
 
     fdsp->vector_fmul = vector_fmul_c;
+    fdsp->vector_dmul = vector_dmul_c;
     fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
     fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
     fdsp->vector_dmac_scalar = vector_dmac_scalar_c;

diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index 2c24d93..9c66459 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h

@@ -173,6 +173,22 @@
      * @return sum of elementwise products
      */
     float (*scalarproduct_float)(const float *v1, const float *v2, int len);
+
+    /**
+     * Calculate the entry wise product of two vectors of doubles and store the result in
+     * a vector of doubles.
+     *
+     * @param dst  output vector
+     *             constraints: 32-byte aligned
+     * @param src0 first input vector
+     *             constraints: 32-byte aligned
+     * @param src1 second input vector
+     *             constraints: 32-byte aligned
+     * @param len  number of elements in the input
+     *             constraints: multiple of 16
+     */
+    void (*vector_dmul)(double *dst, const double *src0, const double *src1,
+                        int len);
 } AVFloatDSPContext;
 
 /**

diff --git a/libavutil/frame.c b/libavutil/frame.c
index d5fd293..4460325 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c

@@ -26,11 +26,7 @@
 #include "mem.h"
 #include "samplefmt.h"
 
-
-static AVFrameSideData *frame_new_side_data(AVFrame *frame,
-                                            enum AVFrameSideDataType type,
-                                            AVBufferRef *buf);
-
+#if FF_API_FRAME_GET_SET
 MAKE_ACCESSORS(AVFrame, frame, int64_t, best_effort_timestamp)
 MAKE_ACCESSORS(AVFrame, frame, int64_t, pkt_duration)
 MAKE_ACCESSORS(AVFrame, frame, int64_t, pkt_pos)
@@ -42,41 +38,84 @@
 MAKE_ACCESSORS(AVFrame, frame, int,     pkt_size)
 MAKE_ACCESSORS(AVFrame, frame, enum AVColorSpace, colorspace)
 MAKE_ACCESSORS(AVFrame, frame, enum AVColorRange, color_range)
+#endif
 
 #define CHECK_CHANNELS_CONSISTENCY(frame) \
     av_assert2(!(frame)->channel_layout || \
                (frame)->channels == \
                av_get_channel_layout_nb_channels((frame)->channel_layout))
 
-AVDictionary **avpriv_frame_get_metadatap(AVFrame *frame) {return &frame->metadata;};
-
 #if FF_API_FRAME_QP
+struct qp_properties {
+    int stride;
+    int type;
+};
+
 int av_frame_set_qp_table(AVFrame *f, AVBufferRef *buf, int stride, int qp_type)
 {
+    struct qp_properties *p;
+    AVFrameSideData *sd;
+    AVBufferRef *ref;
+
+FF_DISABLE_DEPRECATION_WARNINGS
     av_buffer_unref(&f->qp_table_buf);
 
     f->qp_table_buf = buf;
-
-FF_DISABLE_DEPRECATION_WARNINGS
     f->qscale_table = buf->data;
     f->qstride      = stride;
     f->qscale_type  = qp_type;
 FF_ENABLE_DEPRECATION_WARNINGS
 
+    av_frame_remove_side_data(f, AV_FRAME_DATA_QP_TABLE_PROPERTIES);
+    av_frame_remove_side_data(f, AV_FRAME_DATA_QP_TABLE_DATA);
+
+    ref = av_buffer_ref(buf);
+    if (!av_frame_new_side_data_from_buf(f, AV_FRAME_DATA_QP_TABLE_DATA, ref)) {
+        av_buffer_unref(&ref);
+        return AVERROR(ENOMEM);
+    }
+
+    sd = av_frame_new_side_data(f, AV_FRAME_DATA_QP_TABLE_PROPERTIES,
+                                sizeof(struct qp_properties));
+    if (!sd)
+        return AVERROR(ENOMEM);
+
+    p = (struct qp_properties *)sd->data;
+    p->stride = stride;
+    p->type = qp_type;
+
     return 0;
 }
 
 int8_t *av_frame_get_qp_table(AVFrame *f, int *stride, int *type)
 {
+    AVBufferRef *buf = NULL;
+
+    *stride = 0;
+    *type   = 0;
+
 FF_DISABLE_DEPRECATION_WARNINGS
-    *stride = f->qstride;
-    *type   = f->qscale_type;
+    if (f->qp_table_buf) {
+        *stride = f->qstride;
+        *type   = f->qscale_type;
+        buf     = f->qp_table_buf;
 FF_ENABLE_DEPRECATION_WARNINGS
+    } else {
+        AVFrameSideData *sd;
+        struct qp_properties *p;
+        sd = av_frame_get_side_data(f, AV_FRAME_DATA_QP_TABLE_PROPERTIES);
+        if (!sd)
+            return NULL;
+        p = (struct qp_properties *)sd->data;
+        sd = av_frame_get_side_data(f, AV_FRAME_DATA_QP_TABLE_DATA);
+        if (!sd)
+            return NULL;
+        *stride = p->stride;
+        *type   = p->type;
+        buf     = sd->buf;
+    }
 
-    if (!f->qp_table_buf)
-        return NULL;
-
-    return f->qp_table_buf->data;
+    return buf ? buf->data : NULL;
 }
 #endif
 
@@ -172,7 +211,8 @@
 static int get_video_buffer(AVFrame *frame, int align)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
-    int ret, i;
+    int ret, i, padded_height;
+    int plane_padding = FFMAX(16 + 16/*STRIDE_ALIGN*/, align);
 
     if (!desc)
         return AVERROR(EINVAL);
@@ -197,23 +237,22 @@
             frame->linesize[i] = FFALIGN(frame->linesize[i], align);
     }
 
-    for (i = 0; i < 4 && frame->linesize[i]; i++) {
-        int h = FFALIGN(frame->height, 32);
-        if (i == 1 || i == 2)
-            h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
+    padded_height = FFALIGN(frame->height, 32);
+    if ((ret = av_image_fill_pointers(frame->data, frame->format, padded_height,
+                                      NULL, frame->linesize)) < 0)
+        return ret;
 
-        frame->buf[i] = av_buffer_alloc(frame->linesize[i] * h + 16 + 16/*STRIDE_ALIGN*/ - 1);
-        if (!frame->buf[i])
-            goto fail;
+    frame->buf[0] = av_buffer_alloc(ret + 4*plane_padding);
+    if (!frame->buf[0])
+        goto fail;
 
-        frame->data[i] = frame->buf[i]->data;
-    }
-    if (desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) {
-        av_buffer_unref(&frame->buf[1]);
-        frame->buf[1] = av_buffer_alloc(AVPALETTE_SIZE);
-        if (!frame->buf[1])
-            goto fail;
-        frame->data[1] = frame->buf[1]->data;
+    if (av_image_fill_pointers(frame->data, frame->format, padded_height,
+                               frame->buf[0]->data, frame->linesize) < 0)
+        goto fail;
+
+    for (i = 1; i < 4; i++) {
+        if (frame->data[i])
+            frame->data[i] += i * plane_padding;
     }
 
     frame->extended_data = frame->data;
@@ -356,8 +395,10 @@
             }
             memcpy(sd_dst->data, sd_src->data, sd_src->size);
         } else {
-            sd_dst = frame_new_side_data(dst, sd_src->type, av_buffer_ref(sd_src->buf));
+            AVBufferRef *ref = av_buffer_ref(sd_src->buf);
+            sd_dst = av_frame_new_side_data_from_buf(dst, sd_src->type, ref);
             if (!sd_dst) {
+                av_buffer_unref(&ref);
                 wipe_side_data(dst);
                 return AVERROR(ENOMEM);
             }
@@ -383,12 +424,17 @@
 #endif
 
     av_buffer_unref(&dst->opaque_ref);
+    av_buffer_unref(&dst->private_ref);
     if (src->opaque_ref) {
         dst->opaque_ref = av_buffer_ref(src->opaque_ref);
         if (!dst->opaque_ref)
             return AVERROR(ENOMEM);
     }
-
+    if (src->private_ref) {
+        dst->private_ref = av_buffer_ref(src->private_ref);
+        if (!dst->private_ref)
+            return AVERROR(ENOMEM);
+    }
     return 0;
 }
 
@@ -518,12 +564,15 @@
     av_freep(&frame->extended_buf);
     av_dict_free(&frame->metadata);
 #if FF_API_FRAME_QP
+FF_DISABLE_DEPRECATION_WARNINGS
     av_buffer_unref(&frame->qp_table_buf);
+FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
     av_buffer_unref(&frame->hw_frames_ctx);
 
     av_buffer_unref(&frame->opaque_ref);
+    av_buffer_unref(&frame->private_ref);
 
     get_frame_defaults(frame);
 }
@@ -636,9 +685,9 @@
     return NULL;
 }
 
-static AVFrameSideData *frame_new_side_data(AVFrame *frame,
-                                            enum AVFrameSideDataType type,
-                                            AVBufferRef *buf)
+AVFrameSideData *av_frame_new_side_data_from_buf(AVFrame *frame,
+                                                 enum AVFrameSideDataType type,
+                                                 AVBufferRef *buf)
 {
     AVFrameSideData *ret, **tmp;
 
@@ -646,17 +695,17 @@
         return NULL;
 
     if (frame->nb_side_data > INT_MAX / sizeof(*frame->side_data) - 1)
-        goto fail;
+        return NULL;
 
     tmp = av_realloc(frame->side_data,
                      (frame->nb_side_data + 1) * sizeof(*frame->side_data));
     if (!tmp)
-        goto fail;
+        return NULL;
     frame->side_data = tmp;
 
     ret = av_mallocz(sizeof(*ret));
     if (!ret)
-        goto fail;
+        return NULL;
 
     ret->buf = buf;
     ret->data = ret->buf->data;
@@ -666,17 +715,18 @@
     frame->side_data[frame->nb_side_data++] = ret;
 
     return ret;
-fail:
-    av_buffer_unref(&buf);
-    return NULL;
 }
 
 AVFrameSideData *av_frame_new_side_data(AVFrame *frame,
                                         enum AVFrameSideDataType type,
                                         int size)
 {
-
-    return frame_new_side_data(frame, type, av_buffer_alloc(size));
+    AVFrameSideData *ret;
+    AVBufferRef *buf = av_buffer_alloc(size);
+    ret = av_frame_new_side_data_from_buf(frame, type, buf);
+    if (!ret)
+        av_buffer_unref(&buf);
+    return ret;
 }
 
 AVFrameSideData *av_frame_get_side_data(const AVFrame *frame,
@@ -769,7 +819,7 @@
     switch(type) {
     case AV_FRAME_DATA_PANSCAN:         return "AVPanScan";
     case AV_FRAME_DATA_A53_CC:          return "ATSC A53 Part 4 Closed Captions";
-    case AV_FRAME_DATA_STEREO3D:        return "Stereoscopic 3d metadata";
+    case AV_FRAME_DATA_STEREO3D:        return "Stereo 3D";
     case AV_FRAME_DATA_MATRIXENCODING:  return "AVMatrixEncoding";
     case AV_FRAME_DATA_DOWNMIX_INFO:    return "Metadata relevant to a downmix procedure";
     case AV_FRAME_DATA_REPLAYGAIN:      return "AVReplayGain";
@@ -781,7 +831,12 @@
     case AV_FRAME_DATA_MASTERING_DISPLAY_METADATA:  return "Mastering display metadata";
     case AV_FRAME_DATA_CONTENT_LIGHT_LEVEL:         return "Content light level metadata";
     case AV_FRAME_DATA_GOP_TIMECODE:                return "GOP timecode";
+    case AV_FRAME_DATA_SPHERICAL:                   return "Spherical Mapping";
     case AV_FRAME_DATA_ICC_PROFILE:                 return "ICC profile";
+#if FF_API_FRAME_QP
+    case AV_FRAME_DATA_QP_TABLE_PROPERTIES:         return "QP table properties";
+    case AV_FRAME_DATA_QP_TABLE_DATA:               return "QP table data";
+#endif
     }
     return NULL;
 }
@@ -796,7 +851,7 @@
         int shift_x = (i == 1 || i == 2) ? desc->log2_chroma_w : 0;
         int shift_y = (i == 1 || i == 2) ? desc->log2_chroma_h : 0;
 
-        if (desc->flags & (AV_PIX_FMT_FLAG_PAL | AV_PIX_FMT_FLAG_PSEUDOPAL) && i == 1) {
+        if (desc->flags & (AV_PIX_FMT_FLAG_PAL | FF_PSEUDOPAL) && i == 1) {
             offsets[i] = 0;
             break;
         }

diff --git a/libavutil/frame.h b/libavutil/frame.h
index abe4f4f..9d57d6c 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h

@@ -141,6 +141,23 @@
      * metadata key entry "name".
      */
     AV_FRAME_DATA_ICC_PROFILE,
+
+#if FF_API_FRAME_QP
+    /**
+     * Implementation-specific description of the format of AV_FRAME_QP_TABLE_DATA.
+     * The contents of this side data are undocumented and internal; use
+     * av_frame_set_qp_table() and av_frame_get_qp_table() to access this in a
+     * meaningful way instead.
+     */
+    AV_FRAME_DATA_QP_TABLE_PROPERTIES,
+
+    /**
+     * Raw QP table data. Its format is described by
+     * AV_FRAME_DATA_QP_TABLE_PROPERTIES. Use av_frame_set_qp_table() and
+     * av_frame_get_qp_table() to access this instead.
+     */
+    AV_FRAME_DATA_QP_TABLE_DATA,
+#endif
 };
 
 enum AVActiveFormatDescription {
@@ -529,6 +546,7 @@
     attribute_deprecated
     int qscale_type;
 
+    attribute_deprecated
     AVBufferRef *qp_table_buf;
 #endif
     /**
@@ -563,39 +581,77 @@
     /**
      * @}
      */
+
+    /**
+     * AVBufferRef for internal use by a single libav* library.
+     * Must not be used to transfer data between libraries.
+     * Has to be NULL when ownership of the frame leaves the respective library.
+     *
+     * Code outside the FFmpeg libs should never check or change the contents of the buffer ref.
+     *
+     * FFmpeg calls av_buffer_unref() on it when the frame is unreferenced.
+     * av_frame_copy_props() calls create a new reference with av_buffer_ref()
+     * for the target frame's private_ref field.
+     */
+    AVBufferRef *private_ref;
 } AVFrame;
 
+#if FF_API_FRAME_GET_SET
 /**
  * Accessors for some AVFrame fields. These used to be provided for ABI
  * compatibility, and do not need to be used anymore.
  */
+attribute_deprecated
 int64_t av_frame_get_best_effort_timestamp(const AVFrame *frame);
+attribute_deprecated
 void    av_frame_set_best_effort_timestamp(AVFrame *frame, int64_t val);
+attribute_deprecated
 int64_t av_frame_get_pkt_duration         (const AVFrame *frame);
+attribute_deprecated
 void    av_frame_set_pkt_duration         (AVFrame *frame, int64_t val);
+attribute_deprecated
 int64_t av_frame_get_pkt_pos              (const AVFrame *frame);
+attribute_deprecated
 void    av_frame_set_pkt_pos              (AVFrame *frame, int64_t val);
+attribute_deprecated
 int64_t av_frame_get_channel_layout       (const AVFrame *frame);
+attribute_deprecated
 void    av_frame_set_channel_layout       (AVFrame *frame, int64_t val);
+attribute_deprecated
 int     av_frame_get_channels             (const AVFrame *frame);
+attribute_deprecated
 void    av_frame_set_channels             (AVFrame *frame, int     val);
+attribute_deprecated
 int     av_frame_get_sample_rate          (const AVFrame *frame);
+attribute_deprecated
 void    av_frame_set_sample_rate          (AVFrame *frame, int     val);
+attribute_deprecated
 AVDictionary *av_frame_get_metadata       (const AVFrame *frame);
+attribute_deprecated
 void          av_frame_set_metadata       (AVFrame *frame, AVDictionary *val);
+attribute_deprecated
 int     av_frame_get_decode_error_flags   (const AVFrame *frame);
+attribute_deprecated
 void    av_frame_set_decode_error_flags   (AVFrame *frame, int     val);
+attribute_deprecated
 int     av_frame_get_pkt_size(const AVFrame *frame);
+attribute_deprecated
 void    av_frame_set_pkt_size(AVFrame *frame, int val);
-AVDictionary **avpriv_frame_get_metadatap(AVFrame *frame);
 #if FF_API_FRAME_QP
+attribute_deprecated
 int8_t *av_frame_get_qp_table(AVFrame *f, int *stride, int *type);
+attribute_deprecated
 int av_frame_set_qp_table(AVFrame *f, AVBufferRef *buf, int stride, int type);
 #endif
+attribute_deprecated
 enum AVColorSpace av_frame_get_colorspace(const AVFrame *frame);
+attribute_deprecated
 void    av_frame_set_colorspace(AVFrame *frame, enum AVColorSpace val);
+attribute_deprecated
 enum AVColorRange av_frame_get_color_range(const AVFrame *frame);
+attribute_deprecated
 void    av_frame_set_color_range(AVFrame *frame, enum AVColorRange val);
+#endif
 
 /**
  * Get the name of a colorspace.
@@ -763,6 +819,22 @@
                                         int size);
 
 /**
+ * Add a new side data to a frame from an existing AVBufferRef
+ *
+ * @param frame a frame to which the side data should be added
+ * @param type  the type of the added side data
+ * @param buf   an AVBufferRef to add as side data. The ownership of
+ *              the reference is transferred to the frame.
+ *
+ * @return newly added side data on success, NULL on error. On failure
+ *         the frame is unchanged and the AVBufferRef remains owned by
+ *         the caller.
+ */
+AVFrameSideData *av_frame_new_side_data_from_buf(AVFrame *frame,
+                                                 enum AVFrameSideDataType type,
+                                                 AVBufferRef *buf);
+
+/**
  * @return a pointer to the side data of a given type on success, NULL if there
  * is no side data with such type in this frame.
  */

diff --git a/libavutil/hash.c b/libavutil/hash.c
index 7037b0d..75edb6d 100644
--- a/libavutil/hash.c
+++ b/libavutil/hash.c

@@ -155,7 +155,11 @@
     }
 }
 
+#if FF_API_CRYPTO_SIZE_T
 void av_hash_update(AVHashContext *ctx, const uint8_t *src, int len)
+#else
+void av_hash_update(AVHashContext *ctx, const uint8_t *src, size_t len)
+#endif
 {
     switch (ctx->type) {
     case MD5:     av_md5_update(ctx->ctx, src, len); break;

diff --git a/libavutil/hash.h b/libavutil/hash.h
index a20b893..7693e6b 100644
--- a/libavutil/hash.h
+++ b/libavutil/hash.h

@@ -29,6 +29,8 @@
 
 #include <stdint.h>
 
+#include "version.h"
+
 /**
  * @defgroup lavu_hash Hash Functions
  * @ingroup lavu_crypto
@@ -179,7 +181,11 @@
  * @param[in]     src Data to be added to the hash context
  * @param[in]     len Size of the additional data
  */
+#if FF_API_CRYPTO_SIZE_T
 void av_hash_update(struct AVHashContext *ctx, const uint8_t *src, int len);
+#else
+void av_hash_update(struct AVHashContext *ctx, const uint8_t *src, size_t len);
+#endif
 
 /**
  * Finalize a hash context and compute the actual hash value.

diff --git a/libavutil/hmac.h b/libavutil/hmac.h
index 576a0a4..412e950 100644
--- a/libavutil/hmac.h
+++ b/libavutil/hmac.h

@@ -35,7 +35,7 @@
     AV_HMAC_SHA1,
     AV_HMAC_SHA224,
     AV_HMAC_SHA256,
-    AV_HMAC_SHA384 = 12,
+    AV_HMAC_SHA384,
     AV_HMAC_SHA512,
 };
 

diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index 048e821..f1e404a 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c

@@ -41,6 +41,9 @@
 #if CONFIG_DXVA2
     &ff_hwcontext_type_dxva2,
 #endif
+#if CONFIG_OPENCL
+    &ff_hwcontext_type_opencl,
+#endif
 #if CONFIG_QSV
     &ff_hwcontext_type_qsv,
 #endif
@@ -53,6 +56,9 @@
 #if CONFIG_VIDEOTOOLBOX
     &ff_hwcontext_type_videotoolbox,
 #endif
+#if CONFIG_MEDIACODEC
+    &ff_hwcontext_type_mediacodec,
+#endif
     NULL,
 };
 
@@ -61,10 +67,12 @@
     [AV_HWDEVICE_TYPE_DRM]    = "drm",
     [AV_HWDEVICE_TYPE_DXVA2]  = "dxva2",
     [AV_HWDEVICE_TYPE_D3D11VA] = "d3d11va",
+    [AV_HWDEVICE_TYPE_OPENCL] = "opencl",
     [AV_HWDEVICE_TYPE_QSV]    = "qsv",
     [AV_HWDEVICE_TYPE_VAAPI]  = "vaapi",
     [AV_HWDEVICE_TYPE_VDPAU]  = "vdpau",
     [AV_HWDEVICE_TYPE_VIDEOTOOLBOX] = "videotoolbox",
+    [AV_HWDEVICE_TYPE_MEDIACODEC] = "mediacodec",
 };
 
 enum AVHWDeviceType av_hwdevice_find_type_by_name(const char *name)
@@ -79,7 +87,8 @@
 
 const char *av_hwdevice_get_type_name(enum AVHWDeviceType type)
 {
-    if (type >= 0 && type < FF_ARRAY_ELEMS(hw_type_names))
+    if (type > AV_HWDEVICE_TYPE_NONE &&
+        type < FF_ARRAY_ELEMS(hw_type_names))
         return hw_type_names[type];
     else
         return NULL;
@@ -212,19 +221,16 @@
 {
     AVHWFramesContext *ctx = (AVHWFramesContext*)data;
 
-    if (ctx->internal->source_frames) {
-        av_buffer_unref(&ctx->internal->source_frames);
+    if (ctx->internal->pool_internal)
+        av_buffer_pool_uninit(&ctx->internal->pool_internal);
 
-    } else {
-        if (ctx->internal->pool_internal)
-            av_buffer_pool_uninit(&ctx->internal->pool_internal);
+    if (ctx->internal->hw_type->frames_uninit)
+        ctx->internal->hw_type->frames_uninit(ctx);
 
-        if (ctx->internal->hw_type->frames_uninit)
-            ctx->internal->hw_type->frames_uninit(ctx);
+    if (ctx->free)
+        ctx->free(ctx);
 
-        if (ctx->free)
-            ctx->free(ctx);
-    }
+    av_buffer_unref(&ctx->internal->source_frames);
 
     av_buffer_unref(&ctx->device_ref);
 
@@ -477,8 +483,10 @@
 
         ret = av_hwframe_get_buffer(ctx->internal->source_frames,
                                     src_frame, 0);
-        if (ret < 0)
+        if (ret < 0) {
+            av_frame_free(&src_frame);
             return ret;
+        }
 
         ret = av_hwframe_map(frame, src_frame,
                              ctx->internal->source_allocation_map_flags);
@@ -638,6 +646,9 @@
                     ret = AVERROR(ENOMEM);
                     goto fail;
                 }
+                ret = av_hwdevice_ctx_init(dst_ref);
+                if (ret < 0)
+                    goto fail;
                 goto done;
             }
             if (ret != AVERROR(ENOSYS))
@@ -650,10 +661,6 @@
     goto fail;
 
 done:
-    ret = av_hwdevice_ctx_init(dst_ref);
-    if (ret < 0)
-        goto fail;
-
     *dst_ref_ptr = dst_ref;
     return 0;
 
@@ -863,3 +870,10 @@
     av_buffer_unref(&dst_ref);
     return ret;
 }
+
+int ff_hwframe_map_replace(AVFrame *dst, const AVFrame *src)
+{
+    HWMapDescriptor *hwmap = (HWMapDescriptor*)dst->buf[0]->data;
+    av_frame_unref(hwmap->source);
+    return av_frame_ref(hwmap->source, src);
+}

diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
index 03334e2..f5a4b62 100644
--- a/libavutil/hwcontext.h
+++ b/libavutil/hwcontext.h

@@ -25,15 +25,17 @@
 #include "pixfmt.h"
 
 enum AVHWDeviceType {
+    AV_HWDEVICE_TYPE_NONE,
     AV_HWDEVICE_TYPE_VDPAU,
     AV_HWDEVICE_TYPE_CUDA,
     AV_HWDEVICE_TYPE_VAAPI,
     AV_HWDEVICE_TYPE_DXVA2,
     AV_HWDEVICE_TYPE_QSV,
     AV_HWDEVICE_TYPE_VIDEOTOOLBOX,
-    AV_HWDEVICE_TYPE_NONE,
     AV_HWDEVICE_TYPE_D3D11VA,
     AV_HWDEVICE_TYPE_DRM,
+    AV_HWDEVICE_TYPE_OPENCL,
+    AV_HWDEVICE_TYPE_MEDIACODEC,
 };
 
 typedef struct AVHWDeviceInternal AVHWDeviceInternal;

diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
index dfb67bc..3b1d53e 100644
--- a/libavutil/hwcontext_cuda.c
+++ b/libavutil/hwcontext_cuda.c

@@ -24,6 +24,7 @@
 #include "mem.h"
 #include "pixdesc.h"
 #include "pixfmt.h"
+#include "imgutils.h"
 
 #define CUDA_FRAME_ALIGNMENT 256
 
@@ -38,6 +39,8 @@
     AV_PIX_FMT_P010,
     AV_PIX_FMT_P016,
     AV_PIX_FMT_YUV444P16,
+    AV_PIX_FMT_0RGB32,
+    AV_PIX_FMT_0BGR32,
 };
 
 static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
@@ -115,7 +118,6 @@
 static int cuda_frames_init(AVHWFramesContext *ctx)
 {
     CUDAFramesContext *priv = ctx->internal->priv;
-    int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT);
     int i;
 
     for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
@@ -131,25 +133,9 @@
     av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
 
     if (!ctx->pool) {
-        int size;
-
-        switch (ctx->sw_format) {
-        case AV_PIX_FMT_NV12:
-        case AV_PIX_FMT_YUV420P:
-            size = aligned_width * ctx->height * 3 / 2;
-            break;
-        case AV_PIX_FMT_YUV444P:
-        case AV_PIX_FMT_P010:
-        case AV_PIX_FMT_P016:
-            size = aligned_width * ctx->height * 3;
-            break;
-        case AV_PIX_FMT_YUV444P16:
-            size = aligned_width * ctx->height * 6;
-            break;
-        default:
-            av_log(ctx, AV_LOG_ERROR, "BUG: Pixel format missing from size calculation.");
-            return AVERROR_BUG;
-        }
+        int size = av_image_get_buffer_size(ctx->sw_format, ctx->width, ctx->height, CUDA_FRAME_ALIGNMENT);
+        if (size < 0)
+            return size;
 
         ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
         if (!ctx->internal->pool_internal)
@@ -161,49 +147,23 @@
 
 static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
 {
-    int aligned_width;
-    int width_in_bytes = ctx->width;
-
-    if (ctx->sw_format == AV_PIX_FMT_P010 ||
-        ctx->sw_format == AV_PIX_FMT_P016 ||
-        ctx->sw_format == AV_PIX_FMT_YUV444P16) {
-       width_in_bytes *= 2;
-    }
-    aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT);
+    int res;
 
     frame->buf[0] = av_buffer_pool_get(ctx->pool);
     if (!frame->buf[0])
         return AVERROR(ENOMEM);
 
-    switch (ctx->sw_format) {
-    case AV_PIX_FMT_NV12:
-    case AV_PIX_FMT_P010:
-    case AV_PIX_FMT_P016:
-        frame->data[0]     = frame->buf[0]->data;
-        frame->data[1]     = frame->data[0] + aligned_width * ctx->height;
-        frame->linesize[0] = aligned_width;
-        frame->linesize[1] = aligned_width;
-        break;
-    case AV_PIX_FMT_YUV420P:
-        frame->data[0]     = frame->buf[0]->data;
-        frame->data[2]     = frame->data[0] + aligned_width * ctx->height;
-        frame->data[1]     = frame->data[2] + aligned_width * ctx->height / 4;
-        frame->linesize[0] = aligned_width;
-        frame->linesize[1] = aligned_width / 2;
-        frame->linesize[2] = aligned_width / 2;
-        break;
-    case AV_PIX_FMT_YUV444P:
-    case AV_PIX_FMT_YUV444P16:
-        frame->data[0]     = frame->buf[0]->data;
-        frame->data[1]     = frame->data[0] + aligned_width * ctx->height;
-        frame->data[2]     = frame->data[1] + aligned_width * ctx->height;
-        frame->linesize[0] = aligned_width;
-        frame->linesize[1] = aligned_width;
-        frame->linesize[2] = aligned_width;
-        break;
-    default:
-        av_frame_unref(frame);
-        return AVERROR_BUG;
+    res = av_image_fill_arrays(frame->data, frame->linesize, frame->buf[0]->data,
+                               ctx->sw_format, ctx->width, ctx->height, CUDA_FRAME_ALIGNMENT);
+    if (res < 0)
+        return res;
+
+    // YUV420P is a special case.
+    // Nvenc expects the U/V planes in swapped order from how ffmpeg expects them, also chroma is half-aligned
+    if (ctx->sw_format == AV_PIX_FMT_YUV420P) {
+        frame->linesize[1] = frame->linesize[2] = frame->linesize[0] / 2;
+        frame->data[2]     = frame->data[1];
+        frame->data[1]     = frame->data[2] + frame->linesize[2] * ctx->height / 2;
     }
 
     frame->format = AV_PIX_FMT_CUDA;
@@ -258,13 +218,19 @@
             .Height        = src->height >> (i ? priv->shift_height : 0),
         };
 
-        err = cu->cuMemcpy2D(&cpy);
+        err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream);
         if (err != CUDA_SUCCESS) {
             av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
             return AVERROR_UNKNOWN;
         }
     }
 
+    err = cu->cuStreamSynchronize(device_hwctx->stream);
+    if (err != CUDA_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n");
+        return AVERROR_UNKNOWN;
+    }
+
     cu->cuCtxPopCurrent(&dummy);
 
     return 0;
@@ -297,13 +263,19 @@
             .Height        = src->height >> (i ? priv->shift_height : 0),
         };
 
-        err = cu->cuMemcpy2D(&cpy);
+        err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream);
         if (err != CUDA_SUCCESS) {
-            av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
+            av_log(ctx, AV_LOG_ERROR, "Error transferring the data to the CUDA frame\n");
             return AVERROR_UNKNOWN;
         }
     }
 
+    err = cu->cuStreamSynchronize(device_hwctx->stream);
+    if (err != CUDA_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n");
+        return AVERROR_UNKNOWN;
+    }
+
     cu->cuCtxPopCurrent(&dummy);
 
     return 0;
@@ -336,7 +308,7 @@
     }
 
     if (!hwctx->internal->cuda_dl) {
-        ret = cuda_load_functions(&hwctx->internal->cuda_dl);
+        ret = cuda_load_functions(&hwctx->internal->cuda_dl, ctx);
         if (ret < 0) {
             av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n");
             goto error;
@@ -386,6 +358,9 @@
         goto error;
     }
 
+    // Setting stream to NULL will make functions automatically use the default CUstream
+    hwctx->stream = NULL;
+
     cu->cuCtxPopCurrent(&dummy);
 
     hwctx->internal->is_allocated = 1;

diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h
index 12dae84..81a0552 100644
--- a/libavutil/hwcontext_cuda.h
+++ b/libavutil/hwcontext_cuda.h

@@ -41,6 +41,7 @@
  */
 typedef struct AVCUDADeviceContext {
     CUcontext cuda_ctx;
+    CUstream stream;
     AVCUDADeviceContextInternal *internal;
 } AVCUDADeviceContext;
 

diff --git a/libavutil/hwcontext_d3d11va.c b/libavutil/hwcontext_d3d11va.c
index 52683b9..41330f0 100644
--- a/libavutil/hwcontext_d3d11va.c
+++ b/libavutil/hwcontext_d3d11va.c

@@ -20,14 +20,6 @@
 
 #include <windows.h>
 
-// Include thread.h before redefining _WIN32_WINNT, to get
-// the right implementation for AVOnce
-#include "thread.h"
-
-#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0600
-#undef _WIN32_WINNT
-#define _WIN32_WINNT 0x0600
-#endif
 #define COBJMACROS
 
 #include <initguid.h>
@@ -46,6 +38,7 @@
 #include "imgutils.h"
 #include "pixdesc.h"
 #include "pixfmt.h"
+#include "thread.h"
 
 typedef HRESULT(WINAPI *PFN_CREATE_DXGI_FACTORY)(REFIID riid, void **ppFactory);
 
@@ -120,9 +113,42 @@
     s->staging_texture = NULL;
 }
 
+static int d3d11va_frames_get_constraints(AVHWDeviceContext *ctx,
+                                          const void *hwconfig,
+                                          AVHWFramesConstraints *constraints)
+{
+    AVD3D11VADeviceContext *device_hwctx = ctx->hwctx;
+    int nb_sw_formats = 0;
+    HRESULT hr;
+    int i;
+
+    constraints->valid_sw_formats = av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1,
+                                                    sizeof(*constraints->valid_sw_formats));
+    if (!constraints->valid_sw_formats)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
+        UINT format_support = 0;
+        hr = ID3D11Device_CheckFormatSupport(device_hwctx->device, supported_formats[i].d3d_format, &format_support);
+        if (SUCCEEDED(hr) && (format_support & D3D11_FORMAT_SUPPORT_TEXTURE2D))
+            constraints->valid_sw_formats[nb_sw_formats++] = supported_formats[i].pix_fmt;
+    }
+    constraints->valid_sw_formats[nb_sw_formats] = AV_PIX_FMT_NONE;
+
+    constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
+    if (!constraints->valid_hw_formats)
+        return AVERROR(ENOMEM);
+
+    constraints->valid_hw_formats[0] = AV_PIX_FMT_D3D11;
+    constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
+
+    return 0;
+}
+
 static void free_texture(void *opaque, uint8_t *data)
 {
     ID3D11Texture2D_Release((ID3D11Texture2D *)opaque);
+    av_free(data);
 }
 
 static AVBufferRef *wrap_texture_buf(ID3D11Texture2D *tex, int index)
@@ -457,20 +483,31 @@
 {
     AVD3D11VADeviceContext *device_hwctx = hwdev->hwctx;
 
-    if (device_hwctx->device)
+    if (device_hwctx->device) {
         ID3D11Device_Release(device_hwctx->device);
+        device_hwctx->device = NULL;
+    }
 
-    if (device_hwctx->device_context)
+    if (device_hwctx->device_context) {
         ID3D11DeviceContext_Release(device_hwctx->device_context);
+        device_hwctx->device_context = NULL;
+    }
 
-    if (device_hwctx->video_device)
+    if (device_hwctx->video_device) {
         ID3D11VideoDevice_Release(device_hwctx->video_device);
+        device_hwctx->video_device = NULL;
+    }
 
-    if (device_hwctx->video_context)
+    if (device_hwctx->video_context) {
         ID3D11VideoContext_Release(device_hwctx->video_context);
+        device_hwctx->video_context = NULL;
+    }
 
-    if (device_hwctx->lock == d3d11va_default_lock)
+    if (device_hwctx->lock == d3d11va_default_lock) {
         CloseHandle(device_hwctx->lock_ctx);
+        device_hwctx->lock_ctx = INVALID_HANDLE_VALUE;
+        device_hwctx->lock = NULL;
+    }
 }
 
 static int d3d11va_device_create(AVHWDeviceContext *ctx, const char *device,
@@ -512,6 +549,15 @@
         }
     }
 
+    if (pAdapter) {
+        DXGI_ADAPTER_DESC desc;
+        hr = IDXGIAdapter2_GetDesc(pAdapter, &desc);
+        if (!FAILED(hr)) {
+            av_log(ctx, AV_LOG_INFO, "Using device %04x:%04x (%ls).\n",
+                   desc.VendorId, desc.DeviceId, desc.Description);
+        }
+    }
+
     hr = mD3D11CreateDevice(pAdapter, pAdapter ? D3D_DRIVER_TYPE_UNKNOWN : D3D_DRIVER_TYPE_HARDWARE, NULL, creationFlags, NULL, 0,
                    D3D11_SDK_VERSION, &device_hwctx->device, NULL, NULL);
     if (pAdapter)
@@ -557,6 +603,7 @@
     .device_create        = d3d11va_device_create,
     .device_init          = d3d11va_device_init,
     .device_uninit        = d3d11va_device_uninit,
+    .frames_get_constraints = d3d11va_frames_get_constraints,
     .frames_init          = d3d11va_frames_init,
     .frames_uninit        = d3d11va_frames_uninit,
     .frames_get_buffer    = d3d11va_get_buffer,

diff --git a/libavutil/hwcontext_d3d11va.h b/libavutil/hwcontext_d3d11va.h
index 98db7ce..9f91e9b 100644
--- a/libavutil/hwcontext_d3d11va.h
+++ b/libavutil/hwcontext_d3d11va.h

@@ -37,6 +37,7 @@
  */
 
 #include <d3d11.h>
+#include <stdint.h>
 
 /**
  * This struct is allocated as AVHWDeviceContext.hwctx

diff --git a/libavutil/hwcontext_drm.h b/libavutil/hwcontext_drm.h
index 2e22545..42709f2 100644
--- a/libavutil/hwcontext_drm.h
+++ b/libavutil/hwcontext_drm.h

@@ -58,6 +58,9 @@
     size_t size;
     /**
      * Format modifier applied to the object (DRM_FORMAT_MOD_*).
+     *
+     * If the format modifier is unknown then this should be set to
+     * DRM_FORMAT_MOD_INVALID.
      */
     uint64_t format_modifier;
 } AVDRMObjectDescriptor;

diff --git a/libavutil/hwcontext_dxva2.c b/libavutil/hwcontext_dxva2.c
index 665c2d6..4585f32 100644
--- a/libavutil/hwcontext_dxva2.c
+++ b/libavutil/hwcontext_dxva2.c

@@ -18,10 +18,6 @@
 
 #include <windows.h>
 
-#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0600
-#undef _WIN32_WINNT
-#define _WIN32_WINNT 0x0600
-#endif
 #define DXVA2API_USE_BITFIELDS
 #define COBJMACROS
 

diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h
index 2d75d3d..77dc47d 100644
--- a/libavutil/hwcontext_internal.h
+++ b/libavutil/hwcontext_internal.h

@@ -156,14 +156,21 @@
                                         HWMapDescriptor *hwmap),
                           void *priv);
 
+/**
+ * Replace the current hwmap of dst with the one from src, used for indirect
+ * mappings like VAAPI->(DRM)->OpenCL/Vulkan where a direct interop is missing
+ */
+int ff_hwframe_map_replace(AVFrame *dst, const AVFrame *src);
 
 extern const HWContextType ff_hwcontext_type_cuda;
 extern const HWContextType ff_hwcontext_type_d3d11va;
 extern const HWContextType ff_hwcontext_type_drm;
 extern const HWContextType ff_hwcontext_type_dxva2;
+extern const HWContextType ff_hwcontext_type_opencl;
 extern const HWContextType ff_hwcontext_type_qsv;
 extern const HWContextType ff_hwcontext_type_vaapi;
 extern const HWContextType ff_hwcontext_type_vdpau;
 extern const HWContextType ff_hwcontext_type_videotoolbox;
+extern const HWContextType ff_hwcontext_type_mediacodec;
 
 #endif /* AVUTIL_HWCONTEXT_INTERNAL_H */

diff --git a/libavutil/hwcontext_mediacodec.c b/libavutil/hwcontext_mediacodec.c
new file mode 100644
index 0000000..b0d8993
--- /dev/null
+++ b/libavutil/hwcontext_mediacodec.c

@@ -0,0 +1,50 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "buffer.h"
+#include "common.h"
+#include "hwcontext.h"
+#include "hwcontext_internal.h"
+#include "hwcontext_mediacodec.h"
+
+static int mc_device_create(AVHWDeviceContext *ctx, const char *device,
+                            AVDictionary *opts, int flags)
+{
+    if (device && device[0]) {
+        av_log(ctx, AV_LOG_ERROR, "Device selection unsupported.\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    return 0;
+}
+
+const HWContextType ff_hwcontext_type_mediacodec = {
+    .type                 = AV_HWDEVICE_TYPE_MEDIACODEC,
+    .name                 = "mediacodec",
+
+    .device_hwctx_size    = sizeof(AVMediaCodecDeviceContext),
+
+    .device_create        = mc_device_create,
+
+    .pix_fmts = (const enum AVPixelFormat[]){
+        AV_PIX_FMT_MEDIACODEC,
+        AV_PIX_FMT_NONE
+    },
+};

diff --git a/libavfilter/avfiltergraph.h b/libavutil/hwcontext_mediacodec.h
similarity index 63%
copy from libavfilter/avfiltergraph.h
copy to libavutil/hwcontext_mediacodec.h
index b31d581..101a980 100644
--- a/libavfilter/avfiltergraph.h
+++ b/libavutil/hwcontext_mediacodec.h

@@ -1,7 +1,4 @@
 /*
- * Filter graphs
- * copyright (c) 2007 Bobby Bingham
- *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -19,10 +16,21 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVFILTER_AVFILTERGRAPH_H
-#define AVFILTER_AVFILTERGRAPH_H
+#ifndef AVUTIL_HWCONTEXT_MEDIACODEC_H
+#define AVUTIL_HWCONTEXT_MEDIACODEC_H
 
-#include "avfilter.h"
-#include "libavutil/log.h"
+/**
+ * MediaCodec details.
+ *
+ * Allocated as AVHWDeviceContext.hwctx
+ */
+typedef struct AVMediaCodecDeviceContext {
+    /**
+     * android/view/Surface handle, to be filled by the user.
+     *
+     * This is the default surface used by decoders on this device.
+     */
+    void *surface;
+} AVMediaCodecDeviceContext;
 
-#endif /* AVFILTER_AVFILTERGRAPH_H */
+#endif /* AVUTIL_HWCONTEXT_MEDIACODEC_H */

diff --git a/libavutil/hwcontext_opencl.c b/libavutil/hwcontext_opencl.c
new file mode 100644
index 0000000..7288775
--- /dev/null
+++ b/libavutil/hwcontext_opencl.c

@@ -0,0 +1,2939 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
+
+#include <string.h>
+
+#include "config.h"
+
+#include "avassert.h"
+#include "avstring.h"
+#include "common.h"
+#include "hwcontext.h"
+#include "hwcontext_internal.h"
+#include "hwcontext_opencl.h"
+#include "mem.h"
+#include "pixdesc.h"
+
+#if HAVE_OPENCL_VAAPI_BEIGNET
+#include <unistd.h>
+#include <va/va.h>
+#include <va/va_drmcommon.h>
+#include <CL/cl_intel.h>
+#include "hwcontext_vaapi.h"
+#endif
+
+#if HAVE_OPENCL_DRM_BEIGNET
+#include <unistd.h>
+#include <CL/cl_intel.h>
+#include "hwcontext_drm.h"
+#endif
+
+#if HAVE_OPENCL_VAAPI_INTEL_MEDIA
+#if CONFIG_LIBMFX
+#include <mfx/mfxstructures.h>
+#endif
+#include <va/va.h>
+#include <CL/va_ext.h>
+#include "hwcontext_vaapi.h"
+#endif
+
+#if HAVE_OPENCL_DXVA2
+#define COBJMACROS
+#include <CL/cl_dx9_media_sharing.h>
+#include <dxva2api.h>
+#include "hwcontext_dxva2.h"
+#endif
+
+#if HAVE_OPENCL_D3D11
+#include <CL/cl_d3d11.h>
+#include "hwcontext_d3d11va.h"
+#endif
+
+#if HAVE_OPENCL_DRM_ARM
+#include <CL/cl_ext.h>
+#include <drm_fourcc.h>
+#include "hwcontext_drm.h"
+#endif
+
+
+typedef struct OpenCLDeviceContext {
+    // Default command queue to use for transfer/mapping operations on
+    // the device.  If the user supplies one, this is a reference to it.
+    // Otherwise, it is newly-created.
+    cl_command_queue command_queue;
+
+    // The platform the context exists on.  This is needed to query and
+    // retrieve extension functions.
+    cl_platform_id platform_id;
+
+    // Platform/device-specific functions.
+#if HAVE_OPENCL_DRM_BEIGNET
+    int beignet_drm_mapping_usable;
+    clCreateImageFromFdINTEL_fn clCreateImageFromFdINTEL;
+#endif
+
+#if HAVE_OPENCL_VAAPI_INTEL_MEDIA
+    int qsv_mapping_usable;
+    clCreateFromVA_APIMediaSurfaceINTEL_fn
+        clCreateFromVA_APIMediaSurfaceINTEL;
+    clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn
+        clEnqueueAcquireVA_APIMediaSurfacesINTEL;
+    clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn
+        clEnqueueReleaseVA_APIMediaSurfacesINTEL;
+#endif
+
+#if HAVE_OPENCL_DXVA2
+    int dxva2_mapping_usable;
+    cl_dx9_media_adapter_type_khr dx9_media_adapter_type;
+
+    clCreateFromDX9MediaSurfaceKHR_fn
+        clCreateFromDX9MediaSurfaceKHR;
+    clEnqueueAcquireDX9MediaSurfacesKHR_fn
+        clEnqueueAcquireDX9MediaSurfacesKHR;
+    clEnqueueReleaseDX9MediaSurfacesKHR_fn
+        clEnqueueReleaseDX9MediaSurfacesKHR;
+#endif
+
+#if HAVE_OPENCL_D3D11
+    int d3d11_mapping_usable;
+    clCreateFromD3D11Texture2DKHR_fn
+        clCreateFromD3D11Texture2DKHR;
+    clEnqueueAcquireD3D11ObjectsKHR_fn
+        clEnqueueAcquireD3D11ObjectsKHR;
+    clEnqueueReleaseD3D11ObjectsKHR_fn
+        clEnqueueReleaseD3D11ObjectsKHR;
+#endif
+
+#if HAVE_OPENCL_DRM_ARM
+    int drm_arm_mapping_usable;
+#endif
+} OpenCLDeviceContext;
+
+typedef struct OpenCLFramesContext {
+    // Command queue used for transfer/mapping operations on this frames
+    // context.  If the user supplies one, this is a reference to it.
+    // Otherwise, it is a reference to the default command queue for the
+    // device.
+    cl_command_queue command_queue;
+
+#if HAVE_OPENCL_DXVA2 || HAVE_OPENCL_D3D11
+    // For mapping APIs which have separate creation and acquire/release
+    // steps, this stores the OpenCL memory objects corresponding to each
+    // frame.
+    int                   nb_mapped_frames;
+    AVOpenCLFrameDescriptor *mapped_frames;
+#endif
+} OpenCLFramesContext;
+
+
+static void CL_CALLBACK opencl_error_callback(const char *errinfo,
+                                              const void *private_info,
+                                              size_t cb,
+                                              void *user_data)
+{
+    AVHWDeviceContext *ctx = user_data;
+    av_log(ctx, AV_LOG_ERROR, "OpenCL error: %s\n", errinfo);
+}
+
+static void opencl_device_free(AVHWDeviceContext *hwdev)
+{
+    AVOpenCLDeviceContext *hwctx = hwdev->hwctx;
+    cl_int cle;
+
+    cle = clReleaseContext(hwctx->context);
+    if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to release OpenCL "
+               "context: %d.\n", cle);
+    }
+}
+
+static struct {
+    const char *key;
+    cl_platform_info name;
+} opencl_platform_params[] = {
+    { "platform_profile",    CL_PLATFORM_PROFILE    },
+    { "platform_version",    CL_PLATFORM_VERSION    },
+    { "platform_name",       CL_PLATFORM_NAME       },
+    { "platform_vendor",     CL_PLATFORM_VENDOR     },
+    { "platform_extensions", CL_PLATFORM_EXTENSIONS },
+};
+
+static struct {
+    const char *key;
+    cl_device_info name;
+} opencl_device_params[] = {
+    { "device_name",         CL_DEVICE_NAME         },
+    { "device_vendor",       CL_DEVICE_VENDOR       },
+    { "driver_version",      CL_DRIVER_VERSION      },
+    { "device_version",      CL_DEVICE_VERSION      },
+    { "device_profile",      CL_DEVICE_PROFILE      },
+    { "device_extensions",   CL_DEVICE_EXTENSIONS   },
+};
+
+static struct {
+    const char *key;
+    cl_device_type type;
+} opencl_device_types[] = {
+    { "cpu",         CL_DEVICE_TYPE_CPU         },
+    { "gpu",         CL_DEVICE_TYPE_GPU         },
+    { "accelerator", CL_DEVICE_TYPE_ACCELERATOR },
+    { "custom",      CL_DEVICE_TYPE_CUSTOM      },
+    { "default",     CL_DEVICE_TYPE_DEFAULT     },
+    { "all",         CL_DEVICE_TYPE_ALL         },
+};
+
+static char *opencl_get_platform_string(cl_platform_id platform_id,
+                                        cl_platform_info key)
+{
+    char *str;
+    size_t size;
+    cl_int cle;
+    cle = clGetPlatformInfo(platform_id, key, 0, NULL, &size);
+    if (cle != CL_SUCCESS)
+        return NULL;
+    str = av_malloc(size);
+    if (!str)
+        return NULL;
+    cle = clGetPlatformInfo(platform_id, key, size, str, &size);
+    if (cle != CL_SUCCESS) {
+        av_free(str);
+        return NULL;
+    }
+    av_assert0(strlen(str) + 1 == size);
+    return str;
+}
+
+static char *opencl_get_device_string(cl_device_id device_id,
+                                      cl_device_info key)
+{
+    char *str;
+    size_t size;
+    cl_int cle;
+    cle = clGetDeviceInfo(device_id, key, 0, NULL, &size);
+    if (cle != CL_SUCCESS)
+        return NULL;
+    str = av_malloc(size);
+    if (!str)
+        return NULL;
+    cle = clGetDeviceInfo(device_id, key, size, str, &size);
+    if (cle != CL_SUCCESS) {
+        av_free(str);
+        return NULL;
+    }
+    av_assert0(strlen(str) + 1== size);
+    return str;
+}
+
+static int opencl_check_platform_extension(cl_platform_id platform_id,
+                                           const char *name)
+{
+    char *str;
+    int found = 0;
+    str = opencl_get_platform_string(platform_id,
+                                     CL_PLATFORM_EXTENSIONS);
+    if (str && strstr(str, name))
+        found = 1;
+    av_free(str);
+    return found;
+}
+
+static int opencl_check_device_extension(cl_device_id device_id,
+                                         const char *name)
+{
+    char *str;
+    int found = 0;
+    str = opencl_get_device_string(device_id,
+                                   CL_DEVICE_EXTENSIONS);
+    if (str && strstr(str, name))
+        found = 1;
+    av_free(str);
+    return found;
+}
+
+static av_unused int opencl_check_extension(AVHWDeviceContext *hwdev,
+                                            const char *name)
+{
+    AVOpenCLDeviceContext *hwctx = hwdev->hwctx;
+    OpenCLDeviceContext    *priv = hwdev->internal->priv;
+
+    if (opencl_check_platform_extension(priv->platform_id, name)) {
+        av_log(hwdev, AV_LOG_DEBUG,
+               "%s found as platform extension.\n", name);
+        return 1;
+    }
+
+    if (opencl_check_device_extension(hwctx->device_id, name)) {
+        av_log(hwdev, AV_LOG_DEBUG,
+               "%s found as device extension.\n", name);
+        return 1;
+    }
+
+    return 0;
+}
+
+static int opencl_enumerate_platforms(AVHWDeviceContext *hwdev,
+                                      cl_uint *nb_platforms,
+                                      cl_platform_id **platforms,
+                                      void *context)
+{
+    cl_int cle;
+
+    cle = clGetPlatformIDs(0, NULL, nb_platforms);
+    if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to get number of "
+               "OpenCL platforms: %d.\n", cle);
+        return AVERROR(ENODEV);
+    }
+    av_log(hwdev, AV_LOG_DEBUG, "%u OpenCL platforms found.\n",
+           *nb_platforms);
+
+    *platforms = av_malloc_array(*nb_platforms, sizeof(**platforms));
+    if (!*platforms)
+        return AVERROR(ENOMEM);
+
+    cle = clGetPlatformIDs(*nb_platforms, *platforms, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to get list of OpenCL "
+               "platforms: %d.\n", cle);
+        av_freep(platforms);
+        return AVERROR(ENODEV);
+    }
+
+    return 0;
+}
+
+static int opencl_filter_platform(AVHWDeviceContext *hwdev,
+                                  cl_platform_id platform_id,
+                                  const char *platform_name,
+                                  void *context)
+{
+    AVDictionary *opts = context;
+    const AVDictionaryEntry *param;
+    char *str;
+    int i, ret = 0;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(opencl_platform_params); i++) {
+        param = av_dict_get(opts, opencl_platform_params[i].key,
+                            NULL, 0);
+        if (!param)
+            continue;
+
+        str = opencl_get_platform_string(platform_id,
+                                         opencl_platform_params[i].name);
+        if (!str) {
+            av_log(hwdev, AV_LOG_ERROR, "Failed to query %s "
+                   "of platform \"%s\".\n",
+                   opencl_platform_params[i].key, platform_name);
+            return AVERROR_UNKNOWN;
+        }
+        if (!av_stristr(str, param->value)) {
+            av_log(hwdev, AV_LOG_DEBUG, "%s does not match (\"%s\").\n",
+                   param->key, str);
+            ret = 1;
+        }
+        av_free(str);
+    }
+
+    return ret;
+}
+
+static int opencl_enumerate_devices(AVHWDeviceContext *hwdev,
+                                    cl_platform_id platform_id,
+                                    const char *platform_name,
+                                    cl_uint *nb_devices,
+                                    cl_device_id **devices,
+                                    void *context)
+{
+    cl_int cle;
+
+    cle = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL,
+                         0, NULL, nb_devices);
+    if (cle == CL_DEVICE_NOT_FOUND) {
+        av_log(hwdev, AV_LOG_DEBUG, "No devices found "
+               "on platform \"%s\".\n", platform_name);
+        *nb_devices = 0;
+        return 0;
+    } else if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to get number of devices "
+               "on platform \"%s\": %d.\n", platform_name, cle);
+        return AVERROR(ENODEV);
+    }
+    av_log(hwdev, AV_LOG_DEBUG, "%u OpenCL devices found on "
+           "platform \"%s\".\n", *nb_devices, platform_name);
+
+    *devices = av_malloc_array(*nb_devices, sizeof(**devices));
+    if (!*devices)
+        return AVERROR(ENOMEM);
+
+    cle = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL,
+                         *nb_devices, *devices, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to get list of devices "
+               "on platform \"%s\": %d.\n", platform_name, cle);
+        av_freep(devices);
+        return AVERROR(ENODEV);
+    }
+
+    return 0;
+}
+
+static int opencl_filter_device(AVHWDeviceContext *hwdev,
+                                cl_device_id device_id,
+                                const char *device_name,
+                                void *context)
+{
+    AVDictionary *opts = context;
+    const AVDictionaryEntry *param;
+    char *str;
+    int i, ret = 0;
+
+    param = av_dict_get(opts, "device_type", NULL, 0);
+    if (param) {
+        cl_device_type match_type = 0, device_type;
+        cl_int cle;
+
+        for (i = 0; i < FF_ARRAY_ELEMS(opencl_device_types); i++) {
+            if (!strcmp(opencl_device_types[i].key, param->value)) {
+                match_type = opencl_device_types[i].type;
+                break;
+            }
+        }
+        if (!match_type) {
+            av_log(hwdev, AV_LOG_ERROR, "Unknown device type %s.\n",
+                   param->value);
+            return AVERROR(EINVAL);
+        }
+
+        cle = clGetDeviceInfo(device_id, CL_DEVICE_TYPE,
+                              sizeof(device_type), &device_type, NULL);
+        if (cle != CL_SUCCESS) {
+            av_log(hwdev, AV_LOG_ERROR, "Failed to query device type "
+                   "of device \"%s\".\n", device_name);
+            return AVERROR_UNKNOWN;
+        }
+
+        if (!(device_type & match_type)) {
+            av_log(hwdev, AV_LOG_DEBUG, "device_type does not match.\n");
+            return 1;
+        }
+    }
+
+    for (i = 0; i < FF_ARRAY_ELEMS(opencl_device_params); i++) {
+        param = av_dict_get(opts, opencl_device_params[i].key,
+                            NULL, 0);
+        if (!param)
+            continue;
+
+        str = opencl_get_device_string(device_id,
+                                       opencl_device_params[i].name);
+        if (!str) {
+            av_log(hwdev, AV_LOG_ERROR, "Failed to query %s "
+                   "of device \"%s\".\n",
+                   opencl_device_params[i].key, device_name);
+            return AVERROR_UNKNOWN;
+        }
+        if (!av_stristr(str, param->value)) {
+            av_log(hwdev, AV_LOG_DEBUG, "%s does not match (\"%s\").\n",
+                   param->key, str);
+            ret = 1;
+        }
+        av_free(str);
+    }
+
+    return ret;
+}
+
+typedef struct OpenCLDeviceSelector {
+    int platform_index;
+    int device_index;
+    void *context;
+    int (*enumerate_platforms)(AVHWDeviceContext *hwdev,
+                               cl_uint *nb_platforms,
+                               cl_platform_id **platforms,
+                               void *context);
+    int (*filter_platform)    (AVHWDeviceContext *hwdev,
+                               cl_platform_id platform_id,
+                               const char *platform_name,
+                               void *context);
+    int (*enumerate_devices)  (AVHWDeviceContext *hwdev,
+                               cl_platform_id platform_id,
+                               const char *platform_name,
+                               cl_uint *nb_devices,
+                               cl_device_id **devices,
+                               void *context);
+    int (*filter_device)      (AVHWDeviceContext *hwdev,
+                               cl_device_id device_id,
+                               const char *device_name,
+                               void *context);
+} OpenCLDeviceSelector;
+
+static int opencl_device_create_internal(AVHWDeviceContext *hwdev,
+                                         const OpenCLDeviceSelector *selector,
+                                         cl_context_properties *props)
+{
+    cl_uint      nb_platforms;
+    cl_platform_id *platforms = NULL;
+    cl_platform_id  platform_id;
+    cl_uint      nb_devices;
+    cl_device_id   *devices = NULL;
+    AVOpenCLDeviceContext *hwctx = hwdev->hwctx;
+    cl_int cle;
+    cl_context_properties default_props[3];
+    char *platform_name_src = NULL,
+         *device_name_src   = NULL;
+    int err, found, p, d;
+
+    err = selector->enumerate_platforms(hwdev, &nb_platforms, &platforms,
+                                        selector->context);
+    if (err)
+        return err;
+
+    found = 0;
+    for (p = 0; p < nb_platforms; p++) {
+        const char *platform_name;
+
+        if (selector->platform_index >= 0 &&
+            selector->platform_index != p)
+            continue;
+
+        av_freep(&platform_name_src);
+        platform_name_src = opencl_get_platform_string(platforms[p],
+                                                           CL_PLATFORM_NAME);
+        if (platform_name_src)
+            platform_name = platform_name_src;
+        else
+            platform_name = "Unknown Platform";
+
+        if (selector->filter_platform) {
+            err = selector->filter_platform(hwdev, platforms[p],
+                                            platform_name,
+                                            selector->context);
+            if (err < 0)
+                goto fail;
+            if (err > 0)
+                continue;
+        }
+
+        err = opencl_enumerate_devices(hwdev, platforms[p], platform_name,
+                                       &nb_devices, &devices,
+                                       selector->context);
+        if (err < 0)
+            continue;
+
+        for (d = 0; d < nb_devices; d++) {
+            const char *device_name;
+
+            if (selector->device_index >= 0 &&
+                selector->device_index != d)
+                continue;
+
+            av_freep(&device_name_src);
+            device_name_src = opencl_get_device_string(devices[d],
+                                                           CL_DEVICE_NAME);
+            if (device_name_src)
+                device_name = device_name_src;
+            else
+                device_name = "Unknown Device";
+
+            if (selector->filter_device) {
+                err = selector->filter_device(hwdev, devices[d],
+                                              device_name,
+                                              selector->context);
+                if (err < 0)
+                    goto fail;
+                if (err > 0)
+                    continue;
+            }
+
+            av_log(hwdev, AV_LOG_VERBOSE, "%d.%d: %s / %s\n", p, d,
+                   platform_name, device_name);
+
+            ++found;
+            platform_id      = platforms[p];
+            hwctx->device_id = devices[d];
+        }
+
+        av_freep(&devices);
+    }
+
+    if (found == 0) {
+        av_log(hwdev, AV_LOG_ERROR, "No matching devices found.\n");
+        err = AVERROR(ENODEV);
+        goto fail;
+    }
+    if (found > 1) {
+        av_log(hwdev, AV_LOG_ERROR, "More than one matching device found.\n");
+        err = AVERROR(ENODEV);
+        goto fail;
+    }
+
+    if (!props) {
+        props = default_props;
+        default_props[0] = CL_CONTEXT_PLATFORM;
+        default_props[1] = (intptr_t)platform_id;
+        default_props[2] = 0;
+    } else {
+        if (props[0] == CL_CONTEXT_PLATFORM && props[1] == 0)
+            props[1] = (intptr_t)platform_id;
+    }
+
+    hwctx->context = clCreateContext(props, 1, &hwctx->device_id,
+                                     &opencl_error_callback, hwdev, &cle);
+    if (!hwctx->context) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to create OpenCL context: "
+               "%d.\n", cle);
+        err = AVERROR(ENODEV);
+        goto fail;
+    }
+
+    hwdev->free = &opencl_device_free;
+
+    err = 0;
+fail:
+    av_freep(&platform_name_src);
+    av_freep(&device_name_src);
+    av_freep(&platforms);
+    av_freep(&devices);
+    return err;
+}
+
+static int opencl_device_create(AVHWDeviceContext *hwdev, const char *device,
+                                AVDictionary *opts, int flags)
+{
+    OpenCLDeviceSelector selector = {
+        .context = opts,
+        .enumerate_platforms = &opencl_enumerate_platforms,
+        .filter_platform     = &opencl_filter_platform,
+        .enumerate_devices   = &opencl_enumerate_devices,
+        .filter_device       = &opencl_filter_device,
+    };
+
+    if (device && device[0]) {
+        // Match one or both indices for platform and device.
+        int d = -1, p = -1, ret;
+        if (device[0] == '.')
+            ret = sscanf(device, ".%d", &d);
+        else
+            ret = sscanf(device, "%d.%d", &p, &d);
+        if (ret < 1) {
+            av_log(hwdev, AV_LOG_ERROR, "Invalid OpenCL platform/device "
+                   "index specification \"%s\".\n", device);
+            return AVERROR(EINVAL);
+        }
+        selector.platform_index = p;
+        selector.device_index   = d;
+    } else {
+        selector.platform_index = -1;
+        selector.device_index   = -1;
+    }
+
+    return opencl_device_create_internal(hwdev, &selector, NULL);
+}
+
+static int opencl_device_init(AVHWDeviceContext *hwdev)
+{
+    AVOpenCLDeviceContext *hwctx = hwdev->hwctx;
+    OpenCLDeviceContext    *priv = hwdev->internal->priv;
+    cl_int cle;
+
+    if (hwctx->command_queue) {
+        cle = clRetainCommandQueue(hwctx->command_queue);
+        if (cle != CL_SUCCESS) {
+            av_log(hwdev, AV_LOG_ERROR, "Failed to retain external "
+                   "command queue: %d.\n", cle);
+            return AVERROR(EIO);
+        }
+        priv->command_queue = hwctx->command_queue;
+    } else {
+        priv->command_queue = clCreateCommandQueue(hwctx->context,
+                                                   hwctx->device_id,
+                                                   0, &cle);
+        if (!priv->command_queue) {
+            av_log(hwdev, AV_LOG_ERROR, "Failed to create internal "
+                   "command queue: %d.\n", cle);
+            return AVERROR(EIO);
+        }
+    }
+
+    cle = clGetDeviceInfo(hwctx->device_id, CL_DEVICE_PLATFORM,
+                          sizeof(priv->platform_id), &priv->platform_id,
+                          NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to determine the OpenCL "
+               "platform containing the device.\n");
+        return AVERROR(EIO);
+    }
+
+#define CL_FUNC(name, desc) do {                                \
+        if (fail)                                               \
+            break;                                              \
+        priv->name = clGetExtensionFunctionAddressForPlatform(  \
+            priv->platform_id, #name);                          \
+        if (!priv->name) {                                      \
+            av_log(hwdev, AV_LOG_VERBOSE,                       \
+                   desc " function not found (%s).\n", #name);  \
+            fail = 1;                                           \
+        } else {                                                \
+            av_log(hwdev, AV_LOG_VERBOSE,                       \
+                   desc " function found (%s).\n", #name);      \
+        }                                                       \
+    } while (0)
+
+#if HAVE_OPENCL_DRM_BEIGNET
+    {
+        int fail = 0;
+
+        CL_FUNC(clCreateImageFromFdINTEL,
+                "Beignet DRM to OpenCL image mapping");
+
+        if (fail) {
+            av_log(hwdev, AV_LOG_WARNING, "Beignet DRM to OpenCL "
+                   "mapping not usable.\n");
+            priv->beignet_drm_mapping_usable = 0;
+        } else {
+            priv->beignet_drm_mapping_usable = 1;
+        }
+    }
+#endif
+
+#if HAVE_OPENCL_VAAPI_INTEL_MEDIA
+    {
+        size_t props_size;
+        cl_context_properties *props = NULL;
+        VADisplay va_display;
+        const char *va_ext = "cl_intel_va_api_media_sharing";
+        int i, fail = 0;
+
+        if (!opencl_check_extension(hwdev, va_ext)) {
+            av_log(hwdev, AV_LOG_VERBOSE, "The %s extension is "
+                   "required for QSV to OpenCL mapping.\n", va_ext);
+            goto no_qsv;
+        }
+
+        cle = clGetContextInfo(hwctx->context, CL_CONTEXT_PROPERTIES,
+                               0, NULL, &props_size);
+        if (cle != CL_SUCCESS) {
+            av_log(hwdev, AV_LOG_VERBOSE, "Failed to get context "
+                   "properties: %d.\n", cle);
+            goto no_qsv;
+        }
+        if (props_size == 0) {
+            av_log(hwdev, AV_LOG_VERBOSE, "Media sharing must be "
+                   "enabled on context creation to use QSV to "
+                   "OpenCL mapping.\n");
+            goto no_qsv;
+        }
+
+        props = av_malloc(props_size);
+        if (!props)
+            return AVERROR(ENOMEM);
+
+        cle = clGetContextInfo(hwctx->context, CL_CONTEXT_PROPERTIES,
+                               props_size, props, NULL);
+        if (cle != CL_SUCCESS) {
+            av_log(hwdev, AV_LOG_VERBOSE, "Failed to get context "
+                   "properties: %d.\n", cle);
+            goto no_qsv;
+        }
+
+        va_display = NULL;
+        for (i = 0; i < (props_size / sizeof(*props) - 1); i++) {
+            if (props[i] == CL_CONTEXT_VA_API_DISPLAY_INTEL) {
+                va_display = (VADisplay)(intptr_t)props[i+1];
+                break;
+            }
+        }
+        if (!va_display) {
+            av_log(hwdev, AV_LOG_VERBOSE, "Media sharing must be "
+                   "enabled on context creation to use QSV to "
+                   "OpenCL mapping.\n");
+            goto no_qsv;
+        }
+        if (!vaDisplayIsValid(va_display)) {
+            av_log(hwdev, AV_LOG_VERBOSE, "A valid VADisplay is "
+                   "required on context creation to use QSV to "
+                   "OpenCL mapping.\n");
+            goto no_qsv;
+        }
+
+        CL_FUNC(clCreateFromVA_APIMediaSurfaceINTEL,
+                "Intel QSV to OpenCL mapping");
+        CL_FUNC(clEnqueueAcquireVA_APIMediaSurfacesINTEL,
+                "Intel QSV in OpenCL acquire");
+        CL_FUNC(clEnqueueReleaseVA_APIMediaSurfacesINTEL,
+                "Intel QSV in OpenCL release");
+
+        if (fail) {
+        no_qsv:
+            av_log(hwdev, AV_LOG_WARNING, "QSV to OpenCL mapping "
+                   "not usable.\n");
+            priv->qsv_mapping_usable = 0;
+        } else {
+            priv->qsv_mapping_usable = 1;
+        }
+        av_free(props);
+    }
+#endif
+
+#if HAVE_OPENCL_DXVA2
+    {
+        int fail = 0;
+
+        CL_FUNC(clCreateFromDX9MediaSurfaceKHR,
+                "DXVA2 to OpenCL mapping");
+        CL_FUNC(clEnqueueAcquireDX9MediaSurfacesKHR,
+                "DXVA2 in OpenCL acquire");
+        CL_FUNC(clEnqueueReleaseDX9MediaSurfacesKHR,
+                "DXVA2 in OpenCL release");
+
+        if (fail) {
+            av_log(hwdev, AV_LOG_WARNING, "DXVA2 to OpenCL mapping "
+                   "not usable.\n");
+            priv->dxva2_mapping_usable = 0;
+        } else {
+            priv->dx9_media_adapter_type = CL_ADAPTER_D3D9EX_KHR;
+            priv->dxva2_mapping_usable = 1;
+        }
+    }
+#endif
+
+#if HAVE_OPENCL_D3D11
+    {
+        const char *d3d11_ext = "cl_khr_d3d11_sharing";
+        const char *nv12_ext  = "cl_intel_d3d11_nv12_media_sharing";
+        int fail = 0;
+
+        if (!opencl_check_extension(hwdev, d3d11_ext)) {
+            av_log(hwdev, AV_LOG_VERBOSE, "The %s extension is "
+                   "required for D3D11 to OpenCL mapping.\n", d3d11_ext);
+            fail = 1;
+        } else if (!opencl_check_extension(hwdev, nv12_ext)) {
+            av_log(hwdev, AV_LOG_VERBOSE, "The %s extension may be "
+                   "required for D3D11 to OpenCL mapping.\n", nv12_ext);
+            // Not fatal.
+        }
+
+        CL_FUNC(clCreateFromD3D11Texture2DKHR,
+                "D3D11 to OpenCL mapping");
+        CL_FUNC(clEnqueueAcquireD3D11ObjectsKHR,
+                "D3D11 in OpenCL acquire");
+        CL_FUNC(clEnqueueReleaseD3D11ObjectsKHR,
+                "D3D11 in OpenCL release");
+
+        if (fail) {
+            av_log(hwdev, AV_LOG_WARNING, "D3D11 to OpenCL mapping "
+                   "not usable.\n");
+            priv->d3d11_mapping_usable = 0;
+        } else {
+            priv->d3d11_mapping_usable = 1;
+        }
+    }
+#endif
+
+#if HAVE_OPENCL_DRM_ARM
+    {
+        const char *drm_arm_ext = "cl_arm_import_memory";
+        const char *image_ext   = "cl_khr_image2d_from_buffer";
+        int fail = 0;
+
+        if (!opencl_check_extension(hwdev, drm_arm_ext)) {
+            av_log(hwdev, AV_LOG_VERBOSE, "The %s extension is "
+                   "required for DRM to OpenCL mapping on ARM.\n",
+                   drm_arm_ext);
+            fail = 1;
+        }
+        if (!opencl_check_extension(hwdev, image_ext)) {
+            av_log(hwdev, AV_LOG_VERBOSE, "The %s extension is "
+                   "required for DRM to OpenCL mapping on ARM.\n",
+                   image_ext);
+            fail = 1;
+        }
+
+        // clImportMemoryARM() is linked statically.
+
+        if (fail) {
+            av_log(hwdev, AV_LOG_WARNING, "DRM to OpenCL mapping on ARM "
+                   "not usable.\n");
+            priv->drm_arm_mapping_usable = 0;
+        } else {
+            priv->drm_arm_mapping_usable = 1;
+        }
+    }
+#endif
+
+#undef CL_FUNC
+
+    return 0;
+}
+
+static void opencl_device_uninit(AVHWDeviceContext *hwdev)
+{
+    OpenCLDeviceContext *priv = hwdev->internal->priv;
+    cl_int cle;
+
+    if (priv->command_queue) {
+        cle = clReleaseCommandQueue(priv->command_queue);
+        if (cle != CL_SUCCESS) {
+            av_log(hwdev, AV_LOG_ERROR, "Failed to release internal "
+                   "command queue reference: %d.\n", cle);
+        }
+        priv->command_queue = NULL;
+    }
+}
+
+#if HAVE_OPENCL_VAAPI_INTEL_MEDIA
+static int opencl_filter_intel_media_vaapi_platform(AVHWDeviceContext *hwdev,
+                                                    cl_platform_id platform_id,
+                                                    const char *platform_name,
+                                                    void *context)
+{
+    // This doesn't exist as a platform extension, so just test whether
+    // the function we will use for device enumeration exists.
+
+    if (!clGetExtensionFunctionAddressForPlatform(platform_id,
+            "clGetDeviceIDsFromVA_APIMediaAdapterINTEL")) {
+        av_log(hwdev, AV_LOG_DEBUG, "Platform %s does not export the "
+               "VAAPI device enumeration function.\n", platform_name);
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+static int opencl_enumerate_intel_media_vaapi_devices(AVHWDeviceContext *hwdev,
+                                                      cl_platform_id platform_id,
+                                                      const char *platform_name,
+                                                      cl_uint *nb_devices,
+                                                      cl_device_id **devices,
+                                                      void *context)
+{
+    VADisplay va_display = context;
+    clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn
+        clGetDeviceIDsFromVA_APIMediaAdapterINTEL;
+    cl_int cle;
+
+    clGetDeviceIDsFromVA_APIMediaAdapterINTEL =
+        clGetExtensionFunctionAddressForPlatform(platform_id,
+            "clGetDeviceIDsFromVA_APIMediaAdapterINTEL");
+    if (!clGetDeviceIDsFromVA_APIMediaAdapterINTEL) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to get address of "
+               "clGetDeviceIDsFromVA_APIMediaAdapterINTEL().\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    cle = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
+        platform_id, CL_VA_API_DISPLAY_INTEL, va_display,
+        CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, 0, NULL, nb_devices);
+    if (cle == CL_DEVICE_NOT_FOUND) {
+        av_log(hwdev, AV_LOG_DEBUG, "No VAAPI-supporting devices found "
+               "on platform \"%s\".\n", platform_name);
+        *nb_devices = 0;
+        return 0;
+    } else if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to get number of devices "
+               "on platform \"%s\": %d.\n", platform_name, cle);
+        return AVERROR_UNKNOWN;
+    }
+
+    *devices = av_malloc_array(*nb_devices, sizeof(**devices));
+    if (!*devices)
+        return AVERROR(ENOMEM);
+
+    cle = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
+        platform_id, CL_VA_API_DISPLAY_INTEL, va_display,
+        CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, *nb_devices, *devices, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to get list of VAAPI-supporting "
+               "devices on platform \"%s\": %d.\n", platform_name, cle);
+        av_freep(devices);
+        return AVERROR_UNKNOWN;
+    }
+
+    return 0;
+}
+
+static int opencl_filter_intel_media_vaapi_device(AVHWDeviceContext *hwdev,
+                                                  cl_device_id device_id,
+                                                  const char *device_name,
+                                                  void *context)
+{
+    const char *va_ext = "cl_intel_va_api_media_sharing";
+
+    if (opencl_check_device_extension(device_id, va_ext)) {
+        return 0;
+    } else {
+        av_log(hwdev, AV_LOG_DEBUG, "Device %s does not support the "
+               "%s extension.\n", device_name, va_ext);
+        return 1;
+    }
+}
+#endif
+
+#if HAVE_OPENCL_DXVA2
+static int opencl_filter_dxva2_platform(AVHWDeviceContext *hwdev,
+                                        cl_platform_id platform_id,
+                                        const char *platform_name,
+                                        void *context)
+{
+    const char *dx9_ext = "cl_khr_dx9_media_sharing";
+
+    if (opencl_check_platform_extension(platform_id, dx9_ext)) {
+        return 0;
+    } else {
+        av_log(hwdev, AV_LOG_DEBUG, "Platform %s does not support the "
+               "%s extension.\n", platform_name, dx9_ext);
+        return 1;
+    }
+}
+
+static int opencl_enumerate_dxva2_devices(AVHWDeviceContext *hwdev,
+                                          cl_platform_id platform_id,
+                                          const char *platform_name,
+                                          cl_uint *nb_devices,
+                                          cl_device_id **devices,
+                                          void *context)
+{
+    IDirect3DDevice9 *device = context;
+    clGetDeviceIDsFromDX9MediaAdapterKHR_fn
+        clGetDeviceIDsFromDX9MediaAdapterKHR;
+    cl_dx9_media_adapter_type_khr media_adapter_type = CL_ADAPTER_D3D9EX_KHR;
+    cl_int cle;
+
+    clGetDeviceIDsFromDX9MediaAdapterKHR =
+        clGetExtensionFunctionAddressForPlatform(platform_id,
+            "clGetDeviceIDsFromDX9MediaAdapterKHR");
+    if (!clGetDeviceIDsFromDX9MediaAdapterKHR) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to get address of "
+               "clGetDeviceIDsFromDX9MediaAdapterKHR().\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    cle = clGetDeviceIDsFromDX9MediaAdapterKHR(
+        platform_id, 1, &media_adapter_type, (void**)&device,
+        CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR,
+        0, NULL, nb_devices);
+    if (cle == CL_DEVICE_NOT_FOUND) {
+        av_log(hwdev, AV_LOG_DEBUG, "No DXVA2-supporting devices found "
+               "on platform \"%s\".\n", platform_name);
+        *nb_devices = 0;
+        return 0;
+    } else if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to get number of devices "
+               "on platform \"%s\": %d.\n", platform_name, cle);
+        return AVERROR_UNKNOWN;
+    }
+
+    *devices = av_malloc_array(*nb_devices, sizeof(**devices));
+    if (!*devices)
+        return AVERROR(ENOMEM);
+
+    cle = clGetDeviceIDsFromDX9MediaAdapterKHR(
+        platform_id, 1, &media_adapter_type, (void**)&device,
+        CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR,
+        *nb_devices, *devices, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to get list of DXVA2-supporting "
+               "devices on platform \"%s\": %d.\n", platform_name, cle);
+        av_freep(devices);
+        return AVERROR_UNKNOWN;
+    }
+
+    return 0;
+}
+#endif
+
+#if HAVE_OPENCL_D3D11
+static int opencl_filter_d3d11_platform(AVHWDeviceContext *hwdev,
+                                        cl_platform_id platform_id,
+                                        const char *platform_name,
+                                        void *context)
+{
+    const char *d3d11_ext = "cl_khr_d3d11_sharing";
+
+    if (opencl_check_platform_extension(platform_id, d3d11_ext)) {
+        return 0;
+    } else {
+        av_log(hwdev, AV_LOG_DEBUG, "Platform %s does not support the "
+               "%s extension.\n", platform_name, d3d11_ext);
+        return 1;
+    }
+}
+
+static int opencl_enumerate_d3d11_devices(AVHWDeviceContext *hwdev,
+                                          cl_platform_id platform_id,
+                                          const char *platform_name,
+                                          cl_uint *nb_devices,
+                                          cl_device_id **devices,
+                                          void *context)
+{
+    ID3D11Device *device = context;
+    clGetDeviceIDsFromD3D11KHR_fn clGetDeviceIDsFromD3D11KHR;
+    cl_int cle;
+
+    clGetDeviceIDsFromD3D11KHR =
+        clGetExtensionFunctionAddressForPlatform(platform_id,
+            "clGetDeviceIDsFromD3D11KHR");
+    if (!clGetDeviceIDsFromD3D11KHR) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to get address of "
+               "clGetDeviceIDsFromD3D11KHR().\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    cle = clGetDeviceIDsFromD3D11KHR(platform_id,
+                                     CL_D3D11_DEVICE_KHR, device,
+                                     CL_PREFERRED_DEVICES_FOR_D3D11_KHR,
+                                     0, NULL, nb_devices);
+    if (cle == CL_DEVICE_NOT_FOUND) {
+        av_log(hwdev, AV_LOG_DEBUG, "No D3D11-supporting devices found "
+               "on platform \"%s\".\n", platform_name);
+        *nb_devices = 0;
+        return 0;
+    } else if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to get number of devices "
+               "on platform \"%s\": %d.\n", platform_name, cle);
+        return AVERROR_UNKNOWN;
+    }
+
+    *devices = av_malloc_array(*nb_devices, sizeof(**devices));
+    if (!*devices)
+        return AVERROR(ENOMEM);
+
+    cle = clGetDeviceIDsFromD3D11KHR(platform_id,
+                                     CL_D3D11_DEVICE_KHR, device,
+                                     CL_PREFERRED_DEVICES_FOR_D3D11_KHR,
+                                     *nb_devices, *devices, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to get list of D3D11-supporting "
+               "devices on platform \"%s\": %d.\n", platform_name, cle);
+        av_freep(devices);
+        return AVERROR_UNKNOWN;
+    }
+
+    return 0;
+}
+#endif
+
+#if HAVE_OPENCL_DXVA2 || HAVE_OPENCL_D3D11
+static int opencl_filter_gpu_device(AVHWDeviceContext *hwdev,
+                                    cl_device_id device_id,
+                                    const char *device_name,
+                                    void *context)
+{
+    cl_device_type device_type;
+    cl_int cle;
+
+    cle = clGetDeviceInfo(device_id, CL_DEVICE_TYPE,
+                          sizeof(device_type), &device_type, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to query device type "
+               "of device \"%s\".\n", device_name);
+        return AVERROR_UNKNOWN;
+    }
+    if (!(device_type & CL_DEVICE_TYPE_GPU)) {
+        av_log(hwdev, AV_LOG_DEBUG, "Device %s skipped (not GPU).\n",
+               device_name);
+        return 1;
+    }
+
+    return 0;
+}
+#endif
+
+#if HAVE_OPENCL_DRM_ARM
+static int opencl_filter_drm_arm_platform(AVHWDeviceContext *hwdev,
+                                          cl_platform_id platform_id,
+                                          const char *platform_name,
+                                          void *context)
+{
+    const char *drm_arm_ext = "cl_arm_import_memory";
+
+    if (opencl_check_platform_extension(platform_id, drm_arm_ext)) {
+        return 0;
+    } else {
+        av_log(hwdev, AV_LOG_DEBUG, "Platform %s does not support the "
+               "%s extension.\n", platform_name, drm_arm_ext);
+        return 1;
+    }
+}
+
+static int opencl_filter_drm_arm_device(AVHWDeviceContext *hwdev,
+                                        cl_device_id device_id,
+                                        const char *device_name,
+                                        void *context)
+{
+    const char *drm_arm_ext = "cl_arm_import_memory";
+
+    if (opencl_check_device_extension(device_id, drm_arm_ext)) {
+        return 0;
+    } else {
+        av_log(hwdev, AV_LOG_DEBUG, "Device %s does not support the "
+               "%s extension.\n", device_name, drm_arm_ext);
+        return 1;
+    }
+}
+#endif
+
+static int opencl_device_derive(AVHWDeviceContext *hwdev,
+                                AVHWDeviceContext *src_ctx,
+                                int flags)
+{
+    int err;
+    switch (src_ctx->type) {
+
+#if HAVE_OPENCL_DRM_BEIGNET
+    case AV_HWDEVICE_TYPE_DRM:
+    case AV_HWDEVICE_TYPE_VAAPI:
+        {
+            // Surface mapping works via DRM PRIME fds with no special
+            // initialisation required in advance.  This just finds the
+            // Beignet ICD by name.
+            AVDictionary *opts = NULL;
+
+            err = av_dict_set(&opts, "platform_vendor", "Intel", 0);
+            if (err >= 0)
+                err = av_dict_set(&opts, "platform_version", "beignet", 0);
+            if (err >= 0) {
+                OpenCLDeviceSelector selector = {
+                    .platform_index      = -1,
+                    .device_index        = 0,
+                    .context             = opts,
+                    .enumerate_platforms = &opencl_enumerate_platforms,
+                    .filter_platform     = &opencl_filter_platform,
+                    .enumerate_devices   = &opencl_enumerate_devices,
+                    .filter_device       = NULL,
+                };
+                err = opencl_device_create_internal(hwdev, &selector, NULL);
+            }
+            av_dict_free(&opts);
+        }
+        break;
+#endif
+
+#if HAVE_OPENCL_VAAPI_INTEL_MEDIA
+        // The generic code automatically attempts to derive from all
+        // ancestors of the given device, so we can ignore QSV devices here
+        // and just consider the inner VAAPI device it was derived from.
+    case AV_HWDEVICE_TYPE_VAAPI:
+        {
+            AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
+            cl_context_properties props[7] = {
+                CL_CONTEXT_PLATFORM,
+                0,
+                CL_CONTEXT_VA_API_DISPLAY_INTEL,
+                (intptr_t)src_hwctx->display,
+                CL_CONTEXT_INTEROP_USER_SYNC,
+                CL_FALSE,
+                0,
+            };
+            OpenCLDeviceSelector selector = {
+                .platform_index      = -1,
+                .device_index        = -1,
+                .context             = src_hwctx->display,
+                .enumerate_platforms = &opencl_enumerate_platforms,
+                .filter_platform     = &opencl_filter_intel_media_vaapi_platform,
+                .enumerate_devices   = &opencl_enumerate_intel_media_vaapi_devices,
+                .filter_device       = &opencl_filter_intel_media_vaapi_device,
+            };
+
+            err = opencl_device_create_internal(hwdev, &selector, props);
+        }
+        break;
+#endif
+
+#if HAVE_OPENCL_DXVA2
+    case AV_HWDEVICE_TYPE_DXVA2:
+        {
+            AVDXVA2DeviceContext *src_hwctx = src_ctx->hwctx;
+            IDirect3DDevice9 *device;
+            HANDLE device_handle;
+            HRESULT hr;
+
+            hr = IDirect3DDeviceManager9_OpenDeviceHandle(src_hwctx->devmgr,
+                                                          &device_handle);
+            if (FAILED(hr)) {
+                av_log(hwdev, AV_LOG_ERROR, "Failed to open device handle "
+                       "for Direct3D9 device: %lx.\n", (unsigned long)hr);
+                err = AVERROR_UNKNOWN;
+                break;
+            }
+
+            hr = IDirect3DDeviceManager9_LockDevice(src_hwctx->devmgr,
+                                                    device_handle,
+                                                    &device, FALSE);
+            if (SUCCEEDED(hr)) {
+                cl_context_properties props[5] = {
+                    CL_CONTEXT_PLATFORM,
+                    0,
+                    CL_CONTEXT_ADAPTER_D3D9EX_KHR,
+                    (intptr_t)device,
+                    0,
+                };
+                OpenCLDeviceSelector selector = {
+                    .platform_index      = -1,
+                    .device_index        = -1,
+                    .context             = device,
+                    .enumerate_platforms = &opencl_enumerate_platforms,
+                    .filter_platform     = &opencl_filter_dxva2_platform,
+                    .enumerate_devices   = &opencl_enumerate_dxva2_devices,
+                    .filter_device       = &opencl_filter_gpu_device,
+                };
+
+                err = opencl_device_create_internal(hwdev, &selector, props);
+
+                IDirect3DDeviceManager9_UnlockDevice(src_hwctx->devmgr,
+                                                     device_handle, FALSE);
+            } else {
+                av_log(hwdev, AV_LOG_ERROR, "Failed to lock device handle "
+                       "for Direct3D9 device: %lx.\n", (unsigned long)hr);
+                err = AVERROR_UNKNOWN;
+            }
+
+            IDirect3DDeviceManager9_CloseDeviceHandle(src_hwctx->devmgr,
+                                                      device_handle);
+        }
+        break;
+#endif
+
+#if HAVE_OPENCL_D3D11
+    case AV_HWDEVICE_TYPE_D3D11VA:
+        {
+            AVD3D11VADeviceContext *src_hwctx = src_ctx->hwctx;
+            cl_context_properties props[5] = {
+                CL_CONTEXT_PLATFORM,
+                0,
+                CL_CONTEXT_D3D11_DEVICE_KHR,
+                (intptr_t)src_hwctx->device,
+                0,
+            };
+            OpenCLDeviceSelector selector = {
+                .platform_index      = -1,
+                .device_index        = -1,
+                .context             = src_hwctx->device,
+                .enumerate_platforms = &opencl_enumerate_platforms,
+                .filter_platform     = &opencl_filter_d3d11_platform,
+                .enumerate_devices   = &opencl_enumerate_d3d11_devices,
+                .filter_device       = &opencl_filter_gpu_device,
+            };
+
+            err = opencl_device_create_internal(hwdev, &selector, props);
+        }
+        break;
+#endif
+
+#if HAVE_OPENCL_DRM_ARM
+    case AV_HWDEVICE_TYPE_DRM:
+        {
+            OpenCLDeviceSelector selector = {
+                .platform_index      = -1,
+                .device_index        = -1,
+                .context             = NULL,
+                .enumerate_platforms = &opencl_enumerate_platforms,
+                .filter_platform     = &opencl_filter_drm_arm_platform,
+                .enumerate_devices   = &opencl_enumerate_devices,
+                .filter_device       = &opencl_filter_drm_arm_device,
+            };
+
+            err = opencl_device_create_internal(hwdev, &selector, NULL);
+        }
+        break;
+#endif
+
+    default:
+        err = AVERROR(ENOSYS);
+        break;
+    }
+
+    return err;
+}
+
+static int opencl_get_plane_format(enum AVPixelFormat pixfmt,
+                                   int plane, int width, int height,
+                                   cl_image_format *image_format,
+                                   cl_image_desc *image_desc)
+{
+    const AVPixFmtDescriptor *desc;
+    const AVComponentDescriptor *comp;
+    int channels = 0, order = 0, depth = 0, step = 0;
+    int wsub, hsub, alpha;
+    int c;
+
+    if (plane >= AV_NUM_DATA_POINTERS)
+        return AVERROR(ENOENT);
+
+    desc = av_pix_fmt_desc_get(pixfmt);
+
+    // Only normal images are allowed.
+    if (desc->flags & (AV_PIX_FMT_FLAG_BITSTREAM |
+                       AV_PIX_FMT_FLAG_HWACCEL   |
+                       AV_PIX_FMT_FLAG_PAL))
+        return AVERROR(EINVAL);
+
+    wsub = 1 << desc->log2_chroma_w;
+    hsub = 1 << desc->log2_chroma_h;
+    // Subsampled components must be exact.
+    if (width & wsub - 1 || height & hsub - 1)
+        return AVERROR(EINVAL);
+
+    for (c = 0; c < desc->nb_components; c++) {
+        comp = &desc->comp[c];
+        if (comp->plane != plane)
+            continue;
+        // The step size must be a power of two.
+        if (comp->step != 1 && comp->step != 2 &&
+            comp->step != 4 && comp->step != 8)
+            return AVERROR(EINVAL);
+        // The bits in each component must be packed in the
+        // most-significant-bits of the relevant bytes.
+        if (comp->shift + comp->depth != 8 &&
+            comp->shift + comp->depth != 16)
+            return AVERROR(EINVAL);
+        // The depth must not vary between components.
+        if (depth && comp->depth != depth)
+            return AVERROR(EINVAL);
+        // If a single data element crosses multiple bytes then
+        // it must match the native endianness.
+        if (comp->depth > 8 &&
+            HAVE_BIGENDIAN == !(desc->flags & AV_PIX_FMT_FLAG_BE))
+            return AVERROR(EINVAL);
+        // A single data element must not contain multiple samples
+        // from the same component.
+        if (step && comp->step != step)
+            return AVERROR(EINVAL);
+        order = order * 10 + c + 1;
+        depth = comp->depth;
+        step  = comp->step;
+        alpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA &&
+                 c == desc->nb_components - 1);
+        ++channels;
+    }
+    if (channels == 0)
+        return AVERROR(ENOENT);
+
+    memset(image_format, 0, sizeof(*image_format));
+    memset(image_desc,   0, sizeof(*image_desc));
+    image_desc->image_type = CL_MEM_OBJECT_IMAGE2D;
+
+    if (plane == 0 || alpha) {
+        image_desc->image_width     = width;
+        image_desc->image_height    = height;
+        image_desc->image_row_pitch = step * width;
+    } else {
+        image_desc->image_width     = width  / wsub;
+        image_desc->image_height    = height / hsub;
+        image_desc->image_row_pitch = step * width / wsub;
+    }
+
+    if (depth <= 8) {
+        image_format->image_channel_data_type = CL_UNORM_INT8;
+    } else {
+        if (depth <= 16)
+            image_format->image_channel_data_type = CL_UNORM_INT16;
+        else
+            return AVERROR(EINVAL);
+    }
+
+#define CHANNEL_ORDER(order, type) \
+    case order: image_format->image_channel_order = type; break;
+    switch (order) {
+        CHANNEL_ORDER(1,    CL_R);
+        CHANNEL_ORDER(2,    CL_R);
+        CHANNEL_ORDER(3,    CL_R);
+        CHANNEL_ORDER(4,    CL_R);
+        CHANNEL_ORDER(12,   CL_RG);
+        CHANNEL_ORDER(23,   CL_RG);
+        CHANNEL_ORDER(1234, CL_RGBA);
+        CHANNEL_ORDER(3214, CL_BGRA);
+        CHANNEL_ORDER(4123, CL_ARGB);
+#ifdef CL_ABGR
+        CHANNEL_ORDER(4321, CL_ABGR);
+#endif
+    default:
+        return AVERROR(EINVAL);
+    }
+#undef CHANNEL_ORDER
+
+    return 0;
+}
+
+static int opencl_frames_get_constraints(AVHWDeviceContext *hwdev,
+                                         const void *hwconfig,
+                                         AVHWFramesConstraints *constraints)
+{
+    AVOpenCLDeviceContext *hwctx = hwdev->hwctx;
+    cl_uint nb_image_formats;
+    cl_image_format *image_formats = NULL;
+    cl_int cle;
+    enum AVPixelFormat pix_fmt;
+    int err, pix_fmts_found;
+    size_t max_width, max_height;
+
+    cle = clGetDeviceInfo(hwctx->device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH,
+                          sizeof(max_width), &max_width, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to query maximum "
+               "supported image width: %d.\n", cle);
+    } else {
+        constraints->max_width = max_width;
+    }
+    cle = clGetDeviceInfo(hwctx->device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
+                          sizeof(max_height), &max_height, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to query maximum "
+               "supported image height: %d.\n", cle);
+    } else {
+        constraints->max_height = max_height;
+    }
+    av_log(hwdev, AV_LOG_DEBUG, "Maximum supported image size %dx%d.\n",
+           constraints->max_width, constraints->max_height);
+
+    cle = clGetSupportedImageFormats(hwctx->context,
+                                     CL_MEM_READ_WRITE,
+                                     CL_MEM_OBJECT_IMAGE2D,
+                                     0, NULL, &nb_image_formats);
+    if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to query supported "
+               "image formats: %d.\n", cle);
+        err = AVERROR(ENOSYS);
+        goto fail;
+    }
+    if (nb_image_formats == 0) {
+        av_log(hwdev, AV_LOG_ERROR, "No image support in OpenCL "
+               "driver (zero supported image formats).\n");
+        err = AVERROR(ENOSYS);
+        goto fail;
+    }
+
+    image_formats =
+        av_malloc_array(nb_image_formats, sizeof(*image_formats));
+    if (!image_formats) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    cle = clGetSupportedImageFormats(hwctx->context,
+                                     CL_MEM_READ_WRITE,
+                                     CL_MEM_OBJECT_IMAGE2D,
+                                     nb_image_formats,
+                                     image_formats, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(hwdev, AV_LOG_ERROR, "Failed to query supported "
+               "image formats: %d.\n", cle);
+        err = AVERROR(ENOSYS);
+        goto fail;
+    }
+
+    pix_fmts_found = 0;
+    for (pix_fmt = 0; pix_fmt < AV_PIX_FMT_NB; pix_fmt++) {
+        cl_image_format image_format;
+        cl_image_desc   image_desc;
+        int plane, i;
+
+        for (plane = 0;; plane++) {
+            err = opencl_get_plane_format(pix_fmt, plane, 0, 0,
+                                          &image_format,
+                                          &image_desc);
+            if (err < 0)
+                break;
+
+            for (i = 0; i < nb_image_formats; i++) {
+                if (image_formats[i].image_channel_order ==
+                    image_format.image_channel_order &&
+                    image_formats[i].image_channel_data_type ==
+                    image_format.image_channel_data_type)
+                    break;
+            }
+            if (i == nb_image_formats) {
+                err = AVERROR(EINVAL);
+                break;
+            }
+        }
+        if (err != AVERROR(ENOENT))
+            continue;
+
+        av_log(hwdev, AV_LOG_DEBUG, "Format %s supported.\n",
+               av_get_pix_fmt_name(pix_fmt));
+
+        err = av_reallocp_array(&constraints->valid_sw_formats,
+                                pix_fmts_found + 2,
+                                sizeof(*constraints->valid_sw_formats));
+        if (err < 0)
+            goto fail;
+        constraints->valid_sw_formats[pix_fmts_found] = pix_fmt;
+        constraints->valid_sw_formats[pix_fmts_found + 1] =
+            AV_PIX_FMT_NONE;
+        ++pix_fmts_found;
+    }
+
+    av_freep(&image_formats);
+
+    constraints->valid_hw_formats =
+        av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
+    if (!constraints->valid_hw_formats) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+    constraints->valid_hw_formats[0] = AV_PIX_FMT_OPENCL;
+    constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
+
+    return 0;
+
+fail:
+    av_freep(&image_formats);
+    return err;
+}
+
+static void opencl_pool_free(void *opaque, uint8_t *data)
+{
+    AVHWFramesContext       *hwfc = opaque;
+    AVOpenCLFrameDescriptor *desc = (AVOpenCLFrameDescriptor*)data;
+    cl_int cle;
+    int p;
+
+    for (p = 0; p < desc->nb_planes; p++) {
+        cle = clReleaseMemObject(desc->planes[p]);
+        if (cle != CL_SUCCESS) {
+            av_log(hwfc, AV_LOG_ERROR, "Failed to release plane %d: "
+                   "%d.\n", p, cle);
+        }
+    }
+
+    av_free(desc);
+}
+
+static AVBufferRef *opencl_pool_alloc(void *opaque, int size)
+{
+    AVHWFramesContext      *hwfc = opaque;
+    AVOpenCLDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+    AVOpenCLFrameDescriptor *desc;
+    cl_int cle;
+    cl_mem image;
+    cl_image_format image_format;
+    cl_image_desc   image_desc;
+    int err, p;
+    AVBufferRef *ref;
+
+    desc = av_mallocz(sizeof(*desc));
+    if (!desc)
+        return NULL;
+
+    for (p = 0;; p++) {
+        err = opencl_get_plane_format(hwfc->sw_format, p,
+                                      hwfc->width, hwfc->height,
+                                      &image_format, &image_desc);
+        if (err == AVERROR(ENOENT))
+            break;
+        if (err < 0)
+            goto fail;
+
+        // For generic image objects, the pitch is determined by the
+        // implementation.
+        image_desc.image_row_pitch = 0;
+
+        image = clCreateImage(hwctx->context, CL_MEM_READ_WRITE,
+                              &image_format, &image_desc, NULL, &cle);
+        if (!image) {
+            av_log(hwfc, AV_LOG_ERROR, "Failed to create image for "
+                   "plane %d: %d.\n", p, cle);
+            goto fail;
+        }
+
+        desc->planes[p] = image;
+    }
+
+    desc->nb_planes = p;
+
+    ref = av_buffer_create((uint8_t*)desc, sizeof(*desc),
+                           &opencl_pool_free, hwfc, 0);
+    if (!ref)
+        goto fail;
+
+    return ref;
+
+fail:
+    for (p = 0; desc->planes[p]; p++)
+        clReleaseMemObject(desc->planes[p]);
+    av_free(desc);
+    return NULL;
+}
+
+static int opencl_frames_init_command_queue(AVHWFramesContext *hwfc)
+{
+    AVOpenCLFramesContext *hwctx = hwfc->hwctx;
+    OpenCLDeviceContext *devpriv = hwfc->device_ctx->internal->priv;
+    OpenCLFramesContext    *priv = hwfc->internal->priv;
+    cl_int cle;
+
+    priv->command_queue = hwctx->command_queue ? hwctx->command_queue
+                                               : devpriv->command_queue;
+    cle = clRetainCommandQueue(priv->command_queue);
+    if (cle != CL_SUCCESS) {
+        av_log(hwfc, AV_LOG_ERROR, "Failed to retain frame "
+               "command queue: %d.\n", cle);
+        return AVERROR(EIO);
+    }
+
+    return 0;
+}
+
+static int opencl_frames_init(AVHWFramesContext *hwfc)
+{
+    if (!hwfc->pool) {
+        hwfc->internal->pool_internal =
+            av_buffer_pool_init2(sizeof(cl_mem), hwfc,
+                                 &opencl_pool_alloc, NULL);
+        if (!hwfc->internal->pool_internal)
+            return AVERROR(ENOMEM);
+    }
+
+    return opencl_frames_init_command_queue(hwfc);
+}
+
+static void opencl_frames_uninit(AVHWFramesContext *hwfc)
+{
+    OpenCLFramesContext *priv = hwfc->internal->priv;
+    cl_int cle;
+
+#if HAVE_OPENCL_DXVA2 || HAVE_OPENCL_D3D11
+    int i, p;
+    for (i = 0; i < priv->nb_mapped_frames; i++) {
+        AVOpenCLFrameDescriptor *desc = &priv->mapped_frames[i];
+        for (p = 0; p < desc->nb_planes; p++) {
+            cle = clReleaseMemObject(desc->planes[p]);
+            if (cle != CL_SUCCESS) {
+                av_log(hwfc, AV_LOG_ERROR, "Failed to release mapped "
+                       "frame object (frame %d plane %d): %d.\n",
+                       i, p, cle);
+            }
+        }
+    }
+    av_freep(&priv->mapped_frames);
+#endif
+
+    cle = clReleaseCommandQueue(priv->command_queue);
+    if (cle != CL_SUCCESS) {
+        av_log(hwfc, AV_LOG_ERROR, "Failed to release frame "
+               "command queue: %d.\n", cle);
+    }
+}
+
+static int opencl_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
+{
+    AVOpenCLFrameDescriptor *desc;
+    int p;
+
+    frame->buf[0] = av_buffer_pool_get(hwfc->pool);
+    if (!frame->buf[0])
+        return AVERROR(ENOMEM);
+
+    desc = (AVOpenCLFrameDescriptor*)frame->buf[0]->data;
+
+    for (p = 0; p < desc->nb_planes; p++)
+        frame->data[p] = (uint8_t*)desc->planes[p];
+
+    frame->format  = AV_PIX_FMT_OPENCL;
+    frame->width   = hwfc->width;
+    frame->height  = hwfc->height;
+
+    return 0;
+}
+
+static int opencl_transfer_get_formats(AVHWFramesContext *hwfc,
+                                       enum AVHWFrameTransferDirection dir,
+                                       enum AVPixelFormat **formats)
+{
+    enum AVPixelFormat *fmts;
+
+    fmts = av_malloc_array(2, sizeof(*fmts));
+    if (!fmts)
+        return AVERROR(ENOMEM);
+
+    fmts[0] = hwfc->sw_format;
+    fmts[1] = AV_PIX_FMT_NONE;
+
+    *formats = fmts;
+    return 0;
+}
+
+static int opencl_wait_events(AVHWFramesContext *hwfc,
+                              cl_event *events, int nb_events)
+{
+    cl_int cle;
+    int i;
+
+    cle = clWaitForEvents(nb_events, events);
+    if (cle != CL_SUCCESS) {
+        av_log(hwfc, AV_LOG_ERROR, "Failed to wait for event "
+               "completion: %d.\n", cle);
+        return AVERROR(EIO);
+    }
+
+    for (i = 0; i < nb_events; i++) {
+        cle = clReleaseEvent(events[i]);
+        if (cle != CL_SUCCESS) {
+            av_log(hwfc, AV_LOG_ERROR, "Failed to release "
+                   "event: %d.\n", cle);
+        }
+    }
+
+    return 0;
+}
+
+static int opencl_transfer_data_from(AVHWFramesContext *hwfc,
+                                     AVFrame *dst, const AVFrame *src)
+{
+    OpenCLFramesContext *priv = hwfc->internal->priv;
+    cl_image_format image_format;
+    cl_image_desc image_desc;
+    cl_int cle;
+    size_t origin[3] = { 0, 0, 0 };
+    size_t region[3];
+    cl_event events[AV_NUM_DATA_POINTERS];
+    int err, p;
+
+    if (dst->format != hwfc->sw_format)
+        return AVERROR(EINVAL);
+
+    for (p = 0;; p++) {
+        err = opencl_get_plane_format(hwfc->sw_format, p,
+                                      src->width, src->height,
+                                      &image_format, &image_desc);
+        if (err < 0) {
+            if (err == AVERROR(ENOENT))
+                err = 0;
+            break;
+        }
+
+        if (!dst->data[p]) {
+            av_log(hwfc, AV_LOG_ERROR, "Plane %d missing on "
+                   "destination frame for transfer.\n", p);
+            err = AVERROR(EINVAL);
+            break;
+        }
+
+        region[0] = image_desc.image_width;
+        region[1] = image_desc.image_height;
+        region[2] = 1;
+
+        cle = clEnqueueReadImage(priv->command_queue,
+                                 (cl_mem)src->data[p],
+                                 CL_FALSE, origin, region,
+                                 dst->linesize[p], 0,
+                                 dst->data[p],
+                                 0, NULL, &events[p]);
+        if (cle != CL_SUCCESS) {
+            av_log(hwfc, AV_LOG_ERROR, "Failed to enqueue read of "
+                   "OpenCL image plane %d: %d.\n", p, cle);
+            err = AVERROR(EIO);
+            break;
+        }
+    }
+
+    opencl_wait_events(hwfc, events, p);
+
+    return err;
+}
+
+static int opencl_transfer_data_to(AVHWFramesContext *hwfc,
+                                   AVFrame *dst, const AVFrame *src)
+{
+    OpenCLFramesContext *priv = hwfc->internal->priv;
+    cl_image_format image_format;
+    cl_image_desc image_desc;
+    cl_int cle;
+    size_t origin[3] = { 0, 0, 0 };
+    size_t region[3];
+    cl_event events[AV_NUM_DATA_POINTERS];
+    int err, p;
+
+    if (src->format != hwfc->sw_format)
+        return AVERROR(EINVAL);
+
+    for (p = 0;; p++) {
+        err = opencl_get_plane_format(hwfc->sw_format, p,
+                                      src->width, src->height,
+                                      &image_format, &image_desc);
+        if (err < 0) {
+            if (err == AVERROR(ENOENT))
+                err = 0;
+            break;
+        }
+
+        if (!src->data[p]) {
+            av_log(hwfc, AV_LOG_ERROR, "Plane %d missing on "
+                   "source frame for transfer.\n", p);
+            err = AVERROR(EINVAL);
+            break;
+        }
+
+        region[0] = image_desc.image_width;
+        region[1] = image_desc.image_height;
+        region[2] = 1;
+
+        cle = clEnqueueWriteImage(priv->command_queue,
+                                  (cl_mem)dst->data[p],
+                                  CL_FALSE, origin, region,
+                                  src->linesize[p], 0,
+                                  src->data[p],
+                                  0, NULL, &events[p]);
+        if (cle != CL_SUCCESS) {
+            av_log(hwfc, AV_LOG_ERROR, "Failed to enqueue write of "
+                   "OpenCL image plane %d: %d.\n", p, cle);
+            err = AVERROR(EIO);
+            break;
+        }
+    }
+
+    opencl_wait_events(hwfc, events, p);
+
+    return err;
+}
+
+typedef struct OpenCLMapping {
+    // The mapped addresses for each plane.
+    // The destination frame is not available when we unmap, so these
+    // need to be stored separately.
+    void *address[AV_NUM_DATA_POINTERS];
+} OpenCLMapping;
+
+static void opencl_unmap_frame(AVHWFramesContext *hwfc,
+                               HWMapDescriptor *hwmap)
+{
+    OpenCLFramesContext *priv = hwfc->internal->priv;
+    OpenCLMapping *map = hwmap->priv;
+    cl_event events[AV_NUM_DATA_POINTERS];
+    int p, e;
+    cl_int cle;
+
+    for (p = e = 0; p < FF_ARRAY_ELEMS(map->address); p++) {
+        if (!map->address[p])
+            break;
+
+        cle = clEnqueueUnmapMemObject(priv->command_queue,
+                                      (cl_mem)hwmap->source->data[p],
+                                      map->address[p],
+                                      0, NULL, &events[e]);
+        if (cle != CL_SUCCESS) {
+            av_log(hwfc, AV_LOG_ERROR, "Failed to unmap OpenCL "
+                   "image plane %d: %d.\n", p, cle);
+        }
+        ++e;
+    }
+
+    opencl_wait_events(hwfc, events, e);
+
+    av_free(map);
+}
+
+static int opencl_map_frame(AVHWFramesContext *hwfc, AVFrame *dst,
+                            const AVFrame *src, int flags)
+{
+    OpenCLFramesContext *priv = hwfc->internal->priv;
+    cl_map_flags map_flags;
+    cl_image_format image_format;
+    cl_image_desc image_desc;
+    cl_int cle;
+    OpenCLMapping *map;
+    size_t origin[3] = { 0, 0, 0 };
+    size_t region[3];
+    size_t row_pitch;
+    cl_event events[AV_NUM_DATA_POINTERS];
+    int err, p;
+
+    av_assert0(hwfc->sw_format == dst->format);
+
+    if (flags & AV_HWFRAME_MAP_OVERWRITE &&
+        !(flags & AV_HWFRAME_MAP_READ)) {
+        // This is mutually exclusive with the read/write flags, so
+        // there is no way to map with read here.
+        map_flags = CL_MAP_WRITE_INVALIDATE_REGION;
+    } else {
+        map_flags = 0;
+        if (flags & AV_HWFRAME_MAP_READ)
+            map_flags |= CL_MAP_READ;
+        if (flags & AV_HWFRAME_MAP_WRITE)
+            map_flags |= CL_MAP_WRITE;
+    }
+
+    map = av_mallocz(sizeof(*map));
+    if (!map)
+        return AVERROR(ENOMEM);
+
+    for (p = 0;; p++) {
+        err = opencl_get_plane_format(hwfc->sw_format, p,
+                                      src->width, src->height,
+                                      &image_format, &image_desc);
+        if (err == AVERROR(ENOENT))
+            break;
+        if (err < 0)
+            goto fail;
+
+        region[0] = image_desc.image_width;
+        region[1] = image_desc.image_height;
+        region[2] = 1;
+
+        map->address[p] =
+            clEnqueueMapImage(priv->command_queue,
+                              (cl_mem)src->data[p],
+                              CL_FALSE, map_flags, origin, region,
+                              &row_pitch, NULL, 0, NULL,
+                              &events[p], &cle);
+        if (!map->address[p]) {
+            av_log(hwfc, AV_LOG_ERROR, "Failed to map OpenCL "
+                   "image plane %d: %d.\n", p, cle);
+            err = AVERROR(EIO);
+            goto fail;
+        }
+
+        dst->data[p] = map->address[p];
+
+        av_log(hwfc, AV_LOG_DEBUG, "Map plane %d (%p -> %p).\n",
+               p, src->data[p], dst->data[p]);
+    }
+
+    err = opencl_wait_events(hwfc, events, p);
+    if (err < 0)
+        goto fail;
+
+    err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
+                                &opencl_unmap_frame, map);
+    if (err < 0)
+        goto fail;
+
+    dst->width  = src->width;
+    dst->height = src->height;
+
+    return 0;
+
+fail:
+    for (p = 0; p < AV_NUM_DATA_POINTERS; p++) {
+        if (!map->address[p])
+            break;
+        clEnqueueUnmapMemObject(priv->command_queue,
+                                (cl_mem)src->data[p],
+                                map->address[p],
+                                0, NULL, &events[p]);
+    }
+    if (p > 0)
+        opencl_wait_events(hwfc, events, p);
+    av_freep(&map);
+    return err;
+}
+
+#if HAVE_OPENCL_DRM_BEIGNET
+
+typedef struct DRMBeignetToOpenCLMapping {
+    AVFrame              *drm_frame;
+    AVDRMFrameDescriptor *drm_desc;
+
+    AVOpenCLFrameDescriptor frame;
+} DRMBeignetToOpenCLMapping;
+
+static void opencl_unmap_from_drm_beignet(AVHWFramesContext *dst_fc,
+                                          HWMapDescriptor *hwmap)
+{
+    DRMBeignetToOpenCLMapping *mapping = hwmap->priv;
+    cl_int cle;
+    int i;
+
+    for (i = 0; i < mapping->frame.nb_planes; i++) {
+        cle = clReleaseMemObject(mapping->frame.planes[i]);
+        if (cle != CL_SUCCESS) {
+            av_log(dst_fc, AV_LOG_ERROR, "Failed to release CL image "
+                   "of plane %d of DRM frame: %d.\n", i, cle);
+        }
+    }
+
+    av_free(mapping);
+}
+
+static int opencl_map_from_drm_beignet(AVHWFramesContext *dst_fc,
+                                       AVFrame *dst, const AVFrame *src,
+                                       int flags)
+{
+    AVOpenCLDeviceContext *hwctx = dst_fc->device_ctx->hwctx;
+    OpenCLDeviceContext    *priv = dst_fc->device_ctx->internal->priv;
+    DRMBeignetToOpenCLMapping *mapping;
+    const AVDRMFrameDescriptor *desc;
+    cl_int cle;
+    int err, i, j, p;
+
+    desc = (const AVDRMFrameDescriptor*)src->data[0];
+
+    mapping = av_mallocz(sizeof(*mapping));
+    if (!mapping)
+        return AVERROR(ENOMEM);
+
+    p = 0;
+    for (i = 0; i < desc->nb_layers; i++) {
+        const AVDRMLayerDescriptor *layer = &desc->layers[i];
+        for (j = 0; j < layer->nb_planes; j++) {
+            const AVDRMPlaneDescriptor *plane = &layer->planes[j];
+            const AVDRMObjectDescriptor *object =
+                &desc->objects[plane->object_index];
+
+            cl_import_image_info_intel image_info = {
+                .fd        = object->fd,
+                .size      = object->size,
+                .type      = CL_MEM_OBJECT_IMAGE2D,
+                .offset    = plane->offset,
+                .row_pitch = plane->pitch,
+            };
+            cl_image_desc image_desc;
+
+            err = opencl_get_plane_format(dst_fc->sw_format, p,
+                                          src->width, src->height,
+                                          &image_info.fmt,
+                                          &image_desc);
+            if (err < 0) {
+                av_log(dst_fc, AV_LOG_ERROR, "DRM frame layer %d "
+                       "plane %d is not representable in OpenCL: %d.\n",
+                       i, j, err);
+                goto fail;
+            }
+            image_info.width  = image_desc.image_width;
+            image_info.height = image_desc.image_height;
+
+            mapping->frame.planes[p] =
+                priv->clCreateImageFromFdINTEL(hwctx->context,
+                                               &image_info, &cle);
+            if (!mapping->frame.planes[p]) {
+                av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL image "
+                       "from layer %d plane %d of DRM frame: %d.\n",
+                       i, j, cle);
+                err = AVERROR(EIO);
+                goto fail;
+            }
+
+            dst->data[p] = (uint8_t*)mapping->frame.planes[p];
+            mapping->frame.nb_planes = ++p;
+        }
+    }
+
+    err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
+                                &opencl_unmap_from_drm_beignet,
+                                mapping);
+    if (err < 0)
+        goto fail;
+
+    dst->width  = src->width;
+    dst->height = src->height;
+
+    return 0;
+
+fail:
+    for (p = 0; p < mapping->frame.nb_planes; p++) {
+        if (mapping->frame.planes[p])
+            clReleaseMemObject(mapping->frame.planes[p]);
+    }
+    av_free(mapping);
+    return err;
+}
+
+#if HAVE_OPENCL_VAAPI_BEIGNET
+
+static int opencl_map_from_vaapi(AVHWFramesContext *dst_fc,
+                                 AVFrame *dst, const AVFrame *src,
+                                 int flags)
+{
+    AVFrame *tmp;
+    int err;
+
+    tmp = av_frame_alloc();
+    if (!tmp)
+        return AVERROR(ENOMEM);
+
+    tmp->format = AV_PIX_FMT_DRM_PRIME;
+
+    err = av_hwframe_map(tmp, src, flags);
+    if (err < 0)
+        goto fail;
+
+    err = opencl_map_from_drm_beignet(dst_fc, dst, tmp, flags);
+    if (err < 0)
+        goto fail;
+
+    err = ff_hwframe_map_replace(dst, src);
+
+fail:
+    av_frame_free(&tmp);
+    return err;
+}
+
+#endif /* HAVE_OPENCL_VAAPI_BEIGNET */
+#endif /* HAVE_OPENCL_DRM_BEIGNET */
+
+static inline cl_mem_flags opencl_mem_flags_for_mapping(int map_flags)
+{
+    if ((map_flags & AV_HWFRAME_MAP_READ) &&
+        (map_flags & AV_HWFRAME_MAP_WRITE))
+        return CL_MEM_READ_WRITE;
+    else if (map_flags & AV_HWFRAME_MAP_READ)
+        return CL_MEM_READ_ONLY;
+    else if (map_flags & AV_HWFRAME_MAP_WRITE)
+        return CL_MEM_WRITE_ONLY;
+    else
+        return 0;
+}
+
+#if HAVE_OPENCL_VAAPI_INTEL_MEDIA
+
+static void opencl_unmap_from_qsv(AVHWFramesContext *dst_fc,
+                                  HWMapDescriptor *hwmap)
+{
+    AVOpenCLFrameDescriptor    *desc = hwmap->priv;
+    OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv;
+    OpenCLFramesContext *frames_priv = dst_fc->internal->priv;
+    cl_event event;
+    cl_int cle;
+    int p;
+
+    av_log(dst_fc, AV_LOG_DEBUG, "Unmap QSV/VAAPI surface from OpenCL.\n");
+
+    cle = device_priv->clEnqueueReleaseVA_APIMediaSurfacesINTEL(
+        frames_priv->command_queue, desc->nb_planes, desc->planes,
+        0, NULL, &event);
+    if (cle != CL_SUCCESS) {
+        av_log(dst_fc, AV_LOG_ERROR, "Failed to release surface "
+               "handles: %d.\n", cle);
+    }
+
+    opencl_wait_events(dst_fc, &event, 1);
+
+    for (p = 0; p < desc->nb_planes; p++) {
+        cle = clReleaseMemObject(desc->planes[p]);
+        if (cle != CL_SUCCESS) {
+            av_log(dst_fc, AV_LOG_ERROR, "Failed to release CL "
+                   "image of plane %d of QSV/VAAPI surface: %d\n",
+                   p, cle);
+        }
+    }
+
+    av_free(desc);
+}
+
+static int opencl_map_from_qsv(AVHWFramesContext *dst_fc, AVFrame *dst,
+                               const AVFrame *src, int flags)
+{
+    AVHWFramesContext *src_fc =
+        (AVHWFramesContext*)src->hw_frames_ctx->data;
+    AVOpenCLDeviceContext   *dst_dev = dst_fc->device_ctx->hwctx;
+    OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv;
+    OpenCLFramesContext *frames_priv = dst_fc->internal->priv;
+    AVOpenCLFrameDescriptor *desc;
+    VASurfaceID va_surface;
+    cl_mem_flags cl_flags;
+    cl_event event;
+    cl_int cle;
+    int err, p;
+
+#if CONFIG_LIBMFX
+    if (src->format == AV_PIX_FMT_QSV) {
+        mfxFrameSurface1 *mfx_surface = (mfxFrameSurface1*)src->data[3];
+        va_surface = *(VASurfaceID*)mfx_surface->Data.MemId;
+    } else
+#endif
+        if (src->format == AV_PIX_FMT_VAAPI) {
+        va_surface = (VASurfaceID)(uintptr_t)src->data[3];
+    } else {
+        return AVERROR(ENOSYS);
+    }
+
+    cl_flags = opencl_mem_flags_for_mapping(flags);
+    if (!cl_flags)
+        return AVERROR(EINVAL);
+
+    av_log(src_fc, AV_LOG_DEBUG, "Map QSV/VAAPI surface %#x to "
+           "OpenCL.\n", va_surface);
+
+    desc = av_mallocz(sizeof(*desc));
+    if (!desc)
+        return AVERROR(ENOMEM);
+
+    // The cl_intel_va_api_media_sharing extension only supports NV12
+    // surfaces, so for now there are always exactly two planes.
+    desc->nb_planes = 2;
+
+    for (p = 0; p < desc->nb_planes; p++) {
+        desc->planes[p] =
+            device_priv->clCreateFromVA_APIMediaSurfaceINTEL(
+                dst_dev->context, cl_flags, &va_surface, p, &cle);
+        if (!desc->planes[p]) {
+            av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL "
+                   "image from plane %d of QSV/VAAPI surface "
+                   "%#x: %d.\n", p, va_surface, cle);
+            err = AVERROR(EIO);
+            goto fail;
+        }
+
+        dst->data[p] = (uint8_t*)desc->planes[p];
+    }
+
+    cle = device_priv->clEnqueueAcquireVA_APIMediaSurfacesINTEL(
+        frames_priv->command_queue, desc->nb_planes, desc->planes,
+        0, NULL, &event);
+    if (cle != CL_SUCCESS) {
+        av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire surface "
+               "handles: %d.\n", cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    err = opencl_wait_events(dst_fc, &event, 1);
+    if (err < 0)
+        goto fail;
+
+    err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
+                                &opencl_unmap_from_qsv, desc);
+    if (err < 0)
+        goto fail;
+
+    dst->width  = src->width;
+    dst->height = src->height;
+
+    return 0;
+
+fail:
+    for (p = 0; p < desc->nb_planes; p++)
+        if (desc->planes[p])
+            clReleaseMemObject(desc->planes[p]);
+    av_freep(&desc);
+    return err;
+}
+
+#endif
+
+#if HAVE_OPENCL_DXVA2
+
+static void opencl_unmap_from_dxva2(AVHWFramesContext *dst_fc,
+                                    HWMapDescriptor *hwmap)
+{
+    AVOpenCLFrameDescriptor    *desc = hwmap->priv;
+    OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv;
+    OpenCLFramesContext *frames_priv = dst_fc->device_ctx->internal->priv;
+    cl_event event;
+    cl_int cle;
+
+    av_log(dst_fc, AV_LOG_DEBUG, "Unmap DXVA2 surface from OpenCL.\n");
+
+    cle = device_priv->clEnqueueReleaseDX9MediaSurfacesKHR(
+        frames_priv->command_queue, desc->nb_planes, desc->planes,
+        0, NULL, &event);
+    if (cle != CL_SUCCESS) {
+        av_log(dst_fc, AV_LOG_ERROR, "Failed to release surface "
+               "handle: %d.\n", cle);
+        return;
+    }
+
+    opencl_wait_events(dst_fc, &event, 1);
+}
+
+static int opencl_map_from_dxva2(AVHWFramesContext *dst_fc, AVFrame *dst,
+                                 const AVFrame *src, int flags)
+{
+    AVHWFramesContext *src_fc =
+        (AVHWFramesContext*)src->hw_frames_ctx->data;
+    AVDXVA2FramesContext  *src_hwctx = src_fc->hwctx;
+    OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv;
+    OpenCLFramesContext *frames_priv = dst_fc->internal->priv;
+    AVOpenCLFrameDescriptor *desc;
+    cl_event event;
+    cl_int cle;
+    int err, i;
+
+    av_log(dst_fc, AV_LOG_DEBUG, "Map DXVA2 surface %p to "
+           "OpenCL.\n", src->data[3]);
+
+    for (i = 0; i < src_hwctx->nb_surfaces; i++) {
+        if (src_hwctx->surfaces[i] == (IDirect3DSurface9*)src->data[3])
+            break;
+    }
+    if (i >= src_hwctx->nb_surfaces) {
+        av_log(dst_fc, AV_LOG_ERROR, "Trying to map from a surface which "
+               "is not in the mapped frames context.\n");
+        return AVERROR(EINVAL);
+    }
+
+    desc = &frames_priv->mapped_frames[i];
+
+    cle = device_priv->clEnqueueAcquireDX9MediaSurfacesKHR(
+        frames_priv->command_queue, desc->nb_planes, desc->planes,
+        0, NULL, &event);
+    if (cle != CL_SUCCESS) {
+        av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire surface "
+               "handle: %d.\n", cle);
+        return AVERROR(EIO);
+    }
+
+    err = opencl_wait_events(dst_fc, &event, 1);
+    if (err < 0)
+        goto fail;
+
+    for (i = 0; i < desc->nb_planes; i++)
+        dst->data[i] = (uint8_t*)desc->planes[i];
+
+    err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
+                                &opencl_unmap_from_dxva2, desc);
+    if (err < 0)
+        goto fail;
+
+    dst->width  = src->width;
+    dst->height = src->height;
+
+    return 0;
+
+fail:
+    cle = device_priv->clEnqueueReleaseDX9MediaSurfacesKHR(
+        frames_priv->command_queue, desc->nb_planes, desc->planes,
+        0, NULL, &event);
+    if (cle == CL_SUCCESS)
+        opencl_wait_events(dst_fc, &event, 1);
+    return err;
+}
+
+static int opencl_frames_derive_from_dxva2(AVHWFramesContext *dst_fc,
+                                           AVHWFramesContext *src_fc, int flags)
+{
+    AVOpenCLDeviceContext   *dst_dev = dst_fc->device_ctx->hwctx;
+    AVDXVA2FramesContext  *src_hwctx = src_fc->hwctx;
+    OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv;
+    OpenCLFramesContext *frames_priv = dst_fc->internal->priv;
+    cl_mem_flags cl_flags;
+    cl_int cle;
+    int err, i, p, nb_planes;
+
+    if (src_fc->sw_format != AV_PIX_FMT_NV12) {
+        av_log(dst_fc, AV_LOG_ERROR, "Only NV12 textures are supported "
+               "for DXVA2 to OpenCL mapping.\n");
+        return AVERROR(EINVAL);
+    }
+    nb_planes = 2;
+
+    if (src_fc->initial_pool_size == 0) {
+        av_log(dst_fc, AV_LOG_ERROR, "Only fixed-size pools are supported "
+               "for DXVA2 to OpenCL mapping.\n");
+        return AVERROR(EINVAL);
+    }
+
+    cl_flags = opencl_mem_flags_for_mapping(flags);
+    if (!cl_flags)
+        return AVERROR(EINVAL);
+
+    frames_priv->nb_mapped_frames = src_hwctx->nb_surfaces;
+
+    frames_priv->mapped_frames =
+        av_mallocz_array(frames_priv->nb_mapped_frames,
+                         sizeof(*frames_priv->mapped_frames));
+    if (!frames_priv->mapped_frames)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < frames_priv->nb_mapped_frames; i++) {
+        AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i];
+        cl_dx9_surface_info_khr surface_info = {
+            .resource      = src_hwctx->surfaces[i],
+            .shared_handle = NULL,
+        };
+        desc->nb_planes = nb_planes;
+        for (p = 0; p < nb_planes; p++) {
+            desc->planes[p] =
+                device_priv->clCreateFromDX9MediaSurfaceKHR(
+                    dst_dev->context, cl_flags,
+                    device_priv->dx9_media_adapter_type,
+                    &surface_info, p, &cle);
+            if (!desc->planes[p]) {
+                av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL "
+                       "image from plane %d of DXVA2 surface %d: %d.\n",
+                       p, i, cle);
+                err = AVERROR(EIO);
+                goto fail;
+            }
+        }
+    }
+
+    return 0;
+
+fail:
+    for (i = 0; i < frames_priv->nb_mapped_frames; i++) {
+        AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i];
+        for (p = 0; p < desc->nb_planes; p++) {
+            if (desc->planes[p])
+                clReleaseMemObject(desc->planes[p]);
+        }
+    }
+    av_freep(&frames_priv->mapped_frames);
+    frames_priv->nb_mapped_frames = 0;
+    return err;
+}
+
+#endif
+
+#if HAVE_OPENCL_D3D11
+
+static void opencl_unmap_from_d3d11(AVHWFramesContext *dst_fc,
+                                    HWMapDescriptor *hwmap)
+{
+    AVOpenCLFrameDescriptor    *desc = hwmap->priv;
+    OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv;
+    OpenCLFramesContext *frames_priv = dst_fc->device_ctx->internal->priv;
+    cl_event event;
+    cl_int cle;
+
+    cle = device_priv->clEnqueueReleaseD3D11ObjectsKHR(
+        frames_priv->command_queue, desc->nb_planes, desc->planes,
+        0, NULL, &event);
+    if (cle != CL_SUCCESS) {
+        av_log(dst_fc, AV_LOG_ERROR, "Failed to release surface "
+               "handle: %d.\n", cle);
+    }
+
+    opencl_wait_events(dst_fc, &event, 1);
+}
+
+static int opencl_map_from_d3d11(AVHWFramesContext *dst_fc, AVFrame *dst,
+                                 const AVFrame *src, int flags)
+{
+    OpenCLDeviceContext  *device_priv = dst_fc->device_ctx->internal->priv;
+    OpenCLFramesContext  *frames_priv = dst_fc->internal->priv;
+    AVOpenCLFrameDescriptor *desc;
+    cl_event event;
+    cl_int cle;
+    int err, index, i;
+
+    index = (intptr_t)src->data[1];
+    if (index >= frames_priv->nb_mapped_frames) {
+        av_log(dst_fc, AV_LOG_ERROR, "Texture array index out of range for "
+               "mapping: %d >= %d.\n", index, frames_priv->nb_mapped_frames);
+        return AVERROR(EINVAL);
+    }
+
+    av_log(dst_fc, AV_LOG_DEBUG, "Map D3D11 texture %d to OpenCL.\n",
+           index);
+
+    desc = &frames_priv->mapped_frames[index];
+
+    cle = device_priv->clEnqueueAcquireD3D11ObjectsKHR(
+        frames_priv->command_queue, desc->nb_planes, desc->planes,
+        0, NULL, &event);
+    if (cle != CL_SUCCESS) {
+        av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire surface "
+               "handle: %d.\n", cle);
+        return AVERROR(EIO);
+    }
+
+    err = opencl_wait_events(dst_fc, &event, 1);
+    if (err < 0)
+        goto fail;
+
+    for (i = 0; i < desc->nb_planes; i++)
+        dst->data[i] = (uint8_t*)desc->planes[i];
+
+    err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
+                                &opencl_unmap_from_d3d11, desc);
+    if (err < 0)
+        goto fail;
+
+    dst->width  = src->width;
+    dst->height = src->height;
+
+    return 0;
+
+fail:
+    cle = device_priv->clEnqueueReleaseD3D11ObjectsKHR(
+        frames_priv->command_queue, desc->nb_planes, desc->planes,
+        0, NULL, &event);
+    if (cle == CL_SUCCESS)
+        opencl_wait_events(dst_fc, &event, 1);
+    return err;
+}
+
+static int opencl_frames_derive_from_d3d11(AVHWFramesContext *dst_fc,
+                                           AVHWFramesContext *src_fc, int flags)
+{
+    AVOpenCLDeviceContext    *dst_dev = dst_fc->device_ctx->hwctx;
+    AVD3D11VAFramesContext *src_hwctx = src_fc->hwctx;
+    OpenCLDeviceContext  *device_priv = dst_fc->device_ctx->internal->priv;
+    OpenCLFramesContext  *frames_priv = dst_fc->internal->priv;
+    cl_mem_flags cl_flags;
+    cl_int cle;
+    int err, i, p, nb_planes;
+
+    if (src_fc->sw_format != AV_PIX_FMT_NV12) {
+        av_log(dst_fc, AV_LOG_ERROR, "Only NV12 textures are supported "
+               "for D3D11 to OpenCL mapping.\n");
+        return AVERROR(EINVAL);
+    }
+    nb_planes = 2;
+
+    if (src_fc->initial_pool_size == 0) {
+        av_log(dst_fc, AV_LOG_ERROR, "Only fixed-size pools are supported "
+               "for D3D11 to OpenCL mapping.\n");
+        return AVERROR(EINVAL);
+    }
+
+    cl_flags = opencl_mem_flags_for_mapping(flags);
+    if (!cl_flags)
+        return AVERROR(EINVAL);
+
+    frames_priv->nb_mapped_frames = src_fc->initial_pool_size;
+
+    frames_priv->mapped_frames =
+        av_mallocz_array(frames_priv->nb_mapped_frames,
+                         sizeof(*frames_priv->mapped_frames));
+    if (!frames_priv->mapped_frames)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < frames_priv->nb_mapped_frames; i++) {
+        AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i];
+        desc->nb_planes = nb_planes;
+        for (p = 0; p < nb_planes; p++) {
+            UINT subresource = 2 * i + p;
+
+            desc->planes[p] =
+                device_priv->clCreateFromD3D11Texture2DKHR(
+                    dst_dev->context, cl_flags, src_hwctx->texture,
+                    subresource, &cle);
+            if (!desc->planes[p]) {
+                av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL "
+                       "image from plane %d of D3D texture "
+                       "index %d (subresource %u): %d.\n",
+                       p, i, (unsigned int)subresource, cle);
+                err = AVERROR(EIO);
+                goto fail;
+            }
+        }
+    }
+
+    return 0;
+
+fail:
+    for (i = 0; i < frames_priv->nb_mapped_frames; i++) {
+        AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i];
+        for (p = 0; p < desc->nb_planes; p++) {
+            if (desc->planes[p])
+                clReleaseMemObject(desc->planes[p]);
+        }
+    }
+    av_freep(&frames_priv->mapped_frames);
+    frames_priv->nb_mapped_frames = 0;
+    return err;
+}
+
+#endif
+
+#if HAVE_OPENCL_DRM_ARM
+
+typedef struct DRMARMtoOpenCLMapping {
+    int nb_objects;
+    cl_mem object_buffers[AV_DRM_MAX_PLANES];
+    int nb_planes;
+    cl_mem plane_images[AV_DRM_MAX_PLANES];
+} DRMARMtoOpenCLMapping;
+
+static void opencl_unmap_from_drm_arm(AVHWFramesContext *dst_fc,
+                                      HWMapDescriptor *hwmap)
+{
+    DRMARMtoOpenCLMapping *mapping = hwmap->priv;
+    int i;
+
+    for (i = 0; i < mapping->nb_planes; i++)
+        clReleaseMemObject(mapping->plane_images[i]);
+
+    for (i = 0; i < mapping->nb_objects; i++)
+        clReleaseMemObject(mapping->object_buffers[i]);
+
+    av_free(mapping);
+}
+
+static int opencl_map_from_drm_arm(AVHWFramesContext *dst_fc, AVFrame *dst,
+                                   const AVFrame *src, int flags)
+{
+    AVHWFramesContext *src_fc =
+        (AVHWFramesContext*)src->hw_frames_ctx->data;
+    AVOpenCLDeviceContext *dst_dev = dst_fc->device_ctx->hwctx;
+    const AVDRMFrameDescriptor *desc;
+    DRMARMtoOpenCLMapping *mapping = NULL;
+    cl_mem_flags cl_flags;
+    const cl_import_properties_arm props[3] = {
+        CL_IMPORT_TYPE_ARM, CL_IMPORT_TYPE_DMA_BUF_ARM, 0,
+    };
+    cl_int cle;
+    int err, i, j;
+
+    desc = (const AVDRMFrameDescriptor*)src->data[0];
+
+    cl_flags = opencl_mem_flags_for_mapping(flags);
+    if (!cl_flags)
+        return AVERROR(EINVAL);
+
+    mapping = av_mallocz(sizeof(*mapping));
+    if (!mapping)
+        return AVERROR(ENOMEM);
+
+    mapping->nb_objects = desc->nb_objects;
+    for (i = 0; i < desc->nb_objects; i++) {
+        int fd = desc->objects[i].fd;
+
+        av_log(dst_fc, AV_LOG_DEBUG, "Map DRM PRIME fd %d to OpenCL.\n", fd);
+
+        if (desc->objects[i].format_modifier) {
+            av_log(dst_fc, AV_LOG_DEBUG, "Warning: object %d fd %d has "
+                   "nonzero format modifier %"PRId64", result may not "
+                   "be as expected.\n", i, fd,
+                   desc->objects[i].format_modifier);
+        }
+
+        mapping->object_buffers[i] =
+            clImportMemoryARM(dst_dev->context, cl_flags, props,
+                              &fd, desc->objects[i].size, &cle);
+        if (!mapping->object_buffers[i]) {
+            av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL buffer "
+                   "from object %d (fd %d, size %"SIZE_SPECIFIER") of DRM frame: %d.\n",
+                   i, fd, desc->objects[i].size, cle);
+            err = AVERROR(EIO);
+            goto fail;
+        }
+    }
+
+    mapping->nb_planes = 0;
+    for (i = 0; i < desc->nb_layers; i++) {
+        const AVDRMLayerDescriptor *layer = &desc->layers[i];
+
+        for (j = 0; j < layer->nb_planes; j++) {
+            const AVDRMPlaneDescriptor *plane = &layer->planes[j];
+            cl_mem plane_buffer;
+            cl_image_format image_format;
+            cl_image_desc   image_desc;
+            cl_buffer_region region;
+            int p = mapping->nb_planes;
+
+            err = opencl_get_plane_format(src_fc->sw_format, p,
+                                          src_fc->width, src_fc->height,
+                                          &image_format, &image_desc);
+            if (err < 0) {
+                av_log(dst_fc, AV_LOG_ERROR, "Invalid plane %d (DRM "
+                       "layer %d plane %d): %d.\n", p, i, j, err);
+                goto fail;
+            }
+
+            region.origin = plane->offset;
+            region.size   = image_desc.image_row_pitch *
+                            image_desc.image_height;
+
+            plane_buffer =
+                clCreateSubBuffer(mapping->object_buffers[plane->object_index],
+                                  cl_flags,
+                                  CL_BUFFER_CREATE_TYPE_REGION,
+                                  &region, &cle);
+            if (!plane_buffer) {
+                av_log(dst_fc, AV_LOG_ERROR, "Failed to create sub-buffer "
+                       "for plane %d: %d.\n", p, cle);
+                err = AVERROR(EIO);
+                goto fail;
+            }
+
+            image_desc.buffer = plane_buffer;
+
+            mapping->plane_images[p] =
+                clCreateImage(dst_dev->context, cl_flags,
+                              &image_format, &image_desc, NULL, &cle);
+
+            // Unreference the sub-buffer immediately - we don't need it
+            // directly and a reference is held by the image.
+            clReleaseMemObject(plane_buffer);
+
+            if (!mapping->plane_images[p]) {
+                av_log(dst_fc, AV_LOG_ERROR, "Failed to create image "
+                       "for plane %d: %d.\n", p, cle);
+                err = AVERROR(EIO);
+                goto fail;
+            }
+
+            ++mapping->nb_planes;
+        }
+    }
+
+    for (i = 0; i < mapping->nb_planes; i++)
+        dst->data[i] = (uint8_t*)mapping->plane_images[i];
+
+    err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
+                                &opencl_unmap_from_drm_arm, mapping);
+    if (err < 0)
+        goto fail;
+
+    dst->width  = src->width;
+    dst->height = src->height;
+
+    return 0;
+
+fail:
+    for (i = 0; i < mapping->nb_planes; i++) {
+        clReleaseMemObject(mapping->plane_images[i]);
+    }
+    for (i = 0; i < mapping->nb_objects; i++) {
+        if (mapping->object_buffers[i])
+            clReleaseMemObject(mapping->object_buffers[i]);
+    }
+    av_free(mapping);
+    return err;
+}
+
+#endif
+
+static int opencl_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
+                           const AVFrame *src, int flags)
+{
+    av_assert0(src->format == AV_PIX_FMT_OPENCL);
+    if (hwfc->sw_format != dst->format)
+        return AVERROR(ENOSYS);
+    return opencl_map_frame(hwfc, dst, src, flags);
+}
+
+static int opencl_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
+                         const AVFrame *src, int flags)
+{
+    av_unused OpenCLDeviceContext *priv = hwfc->device_ctx->internal->priv;
+    av_assert0(dst->format == AV_PIX_FMT_OPENCL);
+    switch (src->format) {
+#if HAVE_OPENCL_DRM_BEIGNET
+    case AV_PIX_FMT_DRM_PRIME:
+        if (priv->beignet_drm_mapping_usable)
+            return opencl_map_from_drm_beignet(hwfc, dst, src, flags);
+#endif
+#if HAVE_OPENCL_VAAPI_BEIGNET
+    case AV_PIX_FMT_VAAPI:
+        if (priv->beignet_drm_mapping_usable)
+            return opencl_map_from_vaapi(hwfc, dst, src, flags);
+#endif
+#if HAVE_OPENCL_VAAPI_INTEL_MEDIA
+    case AV_PIX_FMT_QSV:
+    case AV_PIX_FMT_VAAPI:
+        if (priv->qsv_mapping_usable)
+            return opencl_map_from_qsv(hwfc, dst, src, flags);
+#endif
+#if HAVE_OPENCL_DXVA2
+    case AV_PIX_FMT_DXVA2_VLD:
+        if (priv->dxva2_mapping_usable)
+            return opencl_map_from_dxva2(hwfc, dst, src, flags);
+#endif
+#if HAVE_OPENCL_D3D11
+    case AV_PIX_FMT_D3D11:
+        if (priv->d3d11_mapping_usable)
+            return opencl_map_from_d3d11(hwfc, dst, src, flags);
+#endif
+#if HAVE_OPENCL_DRM_ARM
+    case AV_PIX_FMT_DRM_PRIME:
+        if (priv->drm_arm_mapping_usable)
+            return opencl_map_from_drm_arm(hwfc, dst, src, flags);
+#endif
+    }
+    return AVERROR(ENOSYS);
+}
+
+static int opencl_frames_derive_to(AVHWFramesContext *dst_fc,
+                                   AVHWFramesContext *src_fc, int flags)
+{
+    av_unused OpenCLDeviceContext *priv = dst_fc->device_ctx->internal->priv;
+    switch (src_fc->device_ctx->type) {
+#if HAVE_OPENCL_DRM_BEIGNET
+    case AV_HWDEVICE_TYPE_DRM:
+        if (!priv->beignet_drm_mapping_usable)
+            return AVERROR(ENOSYS);
+        break;
+#endif
+#if HAVE_OPENCL_VAAPI_BEIGNET
+    case AV_HWDEVICE_TYPE_VAAPI:
+        if (!priv->beignet_drm_mapping_usable)
+            return AVERROR(ENOSYS);
+        break;
+#endif
+#if HAVE_OPENCL_VAAPI_INTEL_MEDIA
+    case AV_HWDEVICE_TYPE_QSV:
+    case AV_HWDEVICE_TYPE_VAAPI:
+        if (!priv->qsv_mapping_usable)
+            return AVERROR(ENOSYS);
+        break;
+#endif
+#if HAVE_OPENCL_DXVA2
+    case AV_HWDEVICE_TYPE_DXVA2:
+        if (!priv->dxva2_mapping_usable)
+            return AVERROR(ENOSYS);
+        {
+            int err;
+            err = opencl_frames_derive_from_dxva2(dst_fc, src_fc, flags);
+            if (err < 0)
+                return err;
+        }
+        break;
+#endif
+#if HAVE_OPENCL_D3D11
+    case AV_HWDEVICE_TYPE_D3D11VA:
+        if (!priv->d3d11_mapping_usable)
+            return AVERROR(ENOSYS);
+        {
+            int err;
+            err = opencl_frames_derive_from_d3d11(dst_fc, src_fc, flags);
+            if (err < 0)
+                return err;
+        }
+        break;
+#endif
+#if HAVE_OPENCL_DRM_ARM
+    case AV_HWDEVICE_TYPE_DRM:
+        if (!priv->drm_arm_mapping_usable)
+            return AVERROR(ENOSYS);
+        break;
+#endif
+    default:
+        return AVERROR(ENOSYS);
+    }
+    return opencl_frames_init_command_queue(dst_fc);
+}
+
+const HWContextType ff_hwcontext_type_opencl = {
+    .type                   = AV_HWDEVICE_TYPE_OPENCL,
+    .name                   = "OpenCL",
+
+    .device_hwctx_size      = sizeof(AVOpenCLDeviceContext),
+    .device_priv_size       = sizeof(OpenCLDeviceContext),
+    .frames_hwctx_size      = sizeof(AVOpenCLFramesContext),
+    .frames_priv_size       = sizeof(OpenCLFramesContext),
+
+    .device_create          = &opencl_device_create,
+    .device_derive          = &opencl_device_derive,
+    .device_init            = &opencl_device_init,
+    .device_uninit          = &opencl_device_uninit,
+
+    .frames_get_constraints = &opencl_frames_get_constraints,
+    .frames_init            = &opencl_frames_init,
+    .frames_uninit          = &opencl_frames_uninit,
+    .frames_get_buffer      = &opencl_get_buffer,
+
+    .transfer_get_formats   = &opencl_transfer_get_formats,
+    .transfer_data_to       = &opencl_transfer_data_to,
+    .transfer_data_from     = &opencl_transfer_data_from,
+
+    .map_from               = &opencl_map_from,
+    .map_to                 = &opencl_map_to,
+    .frames_derive_to       = &opencl_frames_derive_to,
+
+    .pix_fmts = (const enum AVPixelFormat[]) {
+        AV_PIX_FMT_OPENCL,
+        AV_PIX_FMT_NONE
+    },
+};

diff --git a/libavutil/hwcontext_opencl.h b/libavutil/hwcontext_opencl.h
new file mode 100644
index 0000000..ef54486
--- /dev/null
+++ b/libavutil/hwcontext_opencl.h

@@ -0,0 +1,100 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_HWCONTEXT_OPENCL_H
+#define AVUTIL_HWCONTEXT_OPENCL_H
+
+#ifdef __APPLE__
+#include <OpenCL/cl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#include "frame.h"
+
+/**
+ * @file
+ * API-specific header for AV_HWDEVICE_TYPE_OPENCL.
+ *
+ * Pools allocated internally are always dynamic, and are primarily intended
+ * to be used in OpenCL-only cases.  If interoperation is required, it is
+ * typically required to allocate frames in the other API and then map the
+ * frames context to OpenCL with av_hwframe_ctx_create_derived().
+ */
+
+/**
+ * OpenCL frame descriptor for pool allocation.
+ *
+ * In user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs
+ * with the data pointer pointing at an object of this type describing the
+ * planes of the frame.
+ */
+typedef struct AVOpenCLFrameDescriptor {
+    /**
+     * Number of planes in the frame.
+     */
+    int nb_planes;
+    /**
+     * OpenCL image2d objects for each plane of the frame.
+     */
+    cl_mem planes[AV_NUM_DATA_POINTERS];
+} AVOpenCLFrameDescriptor;
+
+/**
+ * OpenCL device details.
+ *
+ * Allocated as AVHWDeviceContext.hwctx
+ */
+typedef struct AVOpenCLDeviceContext {
+    /**
+     * The primary device ID of the device.  If multiple OpenCL devices
+     * are associated with the context then this is the one which will
+     * be used for all operations internal to FFmpeg.
+     */
+    cl_device_id device_id;
+    /**
+     * The OpenCL context which will contain all operations and frames on
+     * this device.
+     */
+    cl_context context;
+    /**
+     * The default command queue for this device, which will be used by all
+     * frames contexts which do not have their own command queue.  If not
+     * intialised by the user, a default queue will be created on the
+     * primary device.
+     */
+    cl_command_queue command_queue;
+} AVOpenCLDeviceContext;
+
+/**
+ * OpenCL-specific data associated with a frame pool.
+ *
+ * Allocated as AVHWFramesContext.hwctx.
+ */
+typedef struct AVOpenCLFramesContext {
+    /**
+     * The command queue used for internal asynchronous operations on this
+     * device (av_hwframe_transfer_data(), av_hwframe_map()).
+     *
+     * If this is not set, the command queue from the associated device is
+     * used instead.
+     */
+    cl_command_queue command_queue;
+} AVOpenCLFramesContext;
+
+#endif /* AVUTIL_HWCONTEXT_OPENCL_H */

diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
index f1d16d8..814ce21 100644
--- a/libavutil/hwcontext_qsv.c
+++ b/libavutil/hwcontext_qsv.c

@@ -23,6 +23,10 @@
 
 #include "config.h"
 
+#if HAVE_PTHREADS
+#include <pthread.h>
+#endif
+
 #if CONFIG_VAAPI
 #include "hwcontext_vaapi.h"
 #endif
@@ -56,7 +60,13 @@
 
 typedef struct QSVFramesContext {
     mfxSession session_download;
+    int session_download_init;
     mfxSession session_upload;
+    int session_upload_init;
+#if HAVE_PTHREADS
+    pthread_mutex_t session_lock;
+    pthread_cond_t session_cond;
+#endif
 
     AVBufferRef *child_frames_ref;
     mfxFrameSurface1 *surfaces_internal;
@@ -90,6 +100,7 @@
     uint32_t           fourcc;
 } supported_pixel_formats[] = {
     { AV_PIX_FMT_NV12, MFX_FOURCC_NV12 },
+    { AV_PIX_FMT_BGRA, MFX_FOURCC_RGB4 },
     { AV_PIX_FMT_P010, MFX_FOURCC_P010 },
     { AV_PIX_FMT_PAL8, MFX_FOURCC_P8   },
 };
@@ -147,12 +158,19 @@
         MFXClose(s->session_download);
     }
     s->session_download = NULL;
+    s->session_download_init = 0;
 
     if (s->session_upload) {
         MFXVideoVPP_Close(s->session_upload);
         MFXClose(s->session_upload);
     }
     s->session_upload = NULL;
+    s->session_upload_init = 0;
+
+#if HAVE_PTHREADS
+    pthread_mutex_destroy(&s->session_lock);
+    pthread_cond_destroy(&s->session_cond);
+#endif
 
     av_freep(&s->mem_ids);
     av_freep(&s->surface_ptrs);
@@ -234,8 +252,8 @@
     child_frames_ctx->format            = device_priv->child_pix_fmt;
     child_frames_ctx->sw_format         = ctx->sw_format;
     child_frames_ctx->initial_pool_size = ctx->initial_pool_size;
-    child_frames_ctx->width             = ctx->width;
-    child_frames_ctx->height            = ctx->height;
+    child_frames_ctx->width             = FFALIGN(ctx->width, 16);
+    child_frames_ctx->height            = FFALIGN(ctx->height, 16);
 
 #if CONFIG_DXVA2
     if (child_device_ctx->type == AV_HWDEVICE_TYPE_DXVA2) {
@@ -307,12 +325,13 @@
         surf->Info.ChromaFormat   = MFX_CHROMAFORMAT_YUV444;
 
     surf->Info.FourCC         = fourcc;
-    surf->Info.Width          = ctx->width;
+    surf->Info.Width          = FFALIGN(ctx->width, 16);
     surf->Info.CropW          = ctx->width;
-    surf->Info.Height         = ctx->height;
+    surf->Info.Height         = FFALIGN(ctx->height, 16);
     surf->Info.CropH          = ctx->height;
     surf->Info.FrameRateExtN  = 25;
     surf->Info.FrameRateExtD  = 1;
+    surf->Info.PicStruct      = MFX_PICSTRUCT_PROGRESSIVE;
 
     return 0;
 }
@@ -534,13 +553,16 @@
             s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
     }
 
-    ret = qsv_init_internal_session(ctx, &s->session_download, 0);
-    if (ret < 0)
-        return ret;
+    s->session_download = NULL;
+    s->session_upload   = NULL;
 
-    ret = qsv_init_internal_session(ctx, &s->session_upload, 1);
-    if (ret < 0)
-        return ret;
+    s->session_download_init = 0;
+    s->session_upload_init   = 0;
+
+#if HAVE_PTHREADS
+    pthread_mutex_init(&s->session_lock, NULL);
+    pthread_cond_init(&s->session_cond, NULL);
+#endif
 
     return 0;
 }
@@ -730,6 +752,37 @@
     return ret;
 }
 
+static int map_frame_to_surface(const AVFrame *frame, mfxFrameSurface1 *surface)
+{
+    switch (frame->format) {
+    case AV_PIX_FMT_NV12:
+    case AV_PIX_FMT_P010:
+        surface->Data.Y  = frame->data[0];
+        surface->Data.UV = frame->data[1];
+        break;
+
+    case AV_PIX_FMT_YUV420P:
+        surface->Data.Y = frame->data[0];
+        surface->Data.U = frame->data[1];
+        surface->Data.V = frame->data[2];
+        break;
+
+    case AV_PIX_FMT_BGRA:
+        surface->Data.B = frame->data[0];
+        surface->Data.G = frame->data[0] + 1;
+        surface->Data.R = frame->data[0] + 2;
+        surface->Data.A = frame->data[0] + 3;
+        break;
+
+    default:
+        return MFX_ERR_UNSUPPORTED;
+    }
+    surface->Data.Pitch     = frame->linesize[0];
+    surface->Data.TimeStamp = frame->pts;
+
+    return 0;
+}
+
 static int qsv_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
                                   const AVFrame *src)
 {
@@ -739,6 +792,32 @@
 
     mfxSyncPoint sync = NULL;
     mfxStatus err;
+    int ret = 0;
+
+    while (!s->session_download_init && !s->session_download && !ret) {
+#if HAVE_PTHREADS
+        if (pthread_mutex_trylock(&s->session_lock) == 0) {
+#endif
+            if (!s->session_download_init) {
+                ret = qsv_init_internal_session(ctx, &s->session_download, 0);
+                if (s->session_download)
+                    s->session_download_init = 1;
+            }
+#if HAVE_PTHREADS
+            pthread_mutex_unlock(&s->session_lock);
+            pthread_cond_signal(&s->session_cond);
+        } else {
+            pthread_mutex_lock(&s->session_lock);
+            while (!s->session_download_init && !s->session_download) {
+                pthread_cond_wait(&s->session_cond, &s->session_lock);
+            }
+            pthread_mutex_unlock(&s->session_lock);
+        }
+#endif
+    }
+
+    if (ret < 0)
+        return ret;
 
     if (!s->session_download) {
         if (s->child_frames_ref)
@@ -749,11 +828,7 @@
     }
 
     out.Info = in->Info;
-    out.Data.PitchLow = dst->linesize[0];
-    out.Data.Y        = dst->data[0];
-    out.Data.U        = dst->data[1];
-    out.Data.V        = dst->data[2];
-    out.Data.A        = dst->data[3];
+    map_frame_to_surface(dst, &out);
 
     do {
         err = MFXVideoVPP_RunFrameVPPAsync(s->session_download, in, &out, NULL, &sync);
@@ -786,21 +861,66 @@
 
     mfxSyncPoint sync = NULL;
     mfxStatus err;
+    int ret = 0;
+    /* make a copy if the input is not padded as libmfx requires */
+    AVFrame tmp_frame, *src_frame;
+    int realigned = 0;
+
+
+    while (!s->session_upload_init && !s->session_upload && !ret) {
+#if HAVE_PTHREADS
+        if (pthread_mutex_trylock(&s->session_lock) == 0) {
+#endif
+            if (!s->session_upload_init) {
+                ret = qsv_init_internal_session(ctx, &s->session_upload, 1);
+                if (s->session_upload)
+                    s->session_upload_init = 1;
+            }
+#if HAVE_PTHREADS
+            pthread_mutex_unlock(&s->session_lock);
+            pthread_cond_signal(&s->session_cond);
+        } else {
+            pthread_mutex_lock(&s->session_lock);
+            while (!s->session_upload_init && !s->session_upload) {
+                pthread_cond_wait(&s->session_cond, &s->session_lock);
+            }
+            pthread_mutex_unlock(&s->session_lock);
+        }
+#endif
+    }
+    if (ret < 0)
+        return ret;
+
+
+    if (src->height & 16 || src->linesize[0] & 16) {
+        realigned = 1;
+        memset(&tmp_frame, 0, sizeof(tmp_frame));
+        tmp_frame.format         = src->format;
+        tmp_frame.width          = FFALIGN(src->width, 16);
+        tmp_frame.height         = FFALIGN(src->height, 16);
+        ret = av_frame_get_buffer(&tmp_frame, 32);
+        if (ret < 0)
+            return ret;
+
+        ret = av_frame_copy(&tmp_frame, src);
+        if (ret < 0) {
+            av_frame_unref(&tmp_frame);
+            return ret;
+        }
+    }
+
+    src_frame = realigned ? &tmp_frame : src;
 
     if (!s->session_upload) {
         if (s->child_frames_ref)
-            return qsv_transfer_data_child(ctx, dst, src);
+            return qsv_transfer_data_child(ctx, dst, src_frame);
 
         av_log(ctx, AV_LOG_ERROR, "Surface upload not possible\n");
         return AVERROR(ENOSYS);
     }
 
     in.Info = out->Info;
-    in.Data.PitchLow = src->linesize[0];
-    in.Data.Y        = src->data[0];
-    in.Data.U        = src->data[1];
-    in.Data.V        = src->data[2];
-    in.Data.A        = src->data[3];
+    map_frame_to_surface(src_frame, &in);
 
     do {
         err = MFXVideoVPP_RunFrameVPPAsync(s->session_upload, &in, out, NULL, &sync);
@@ -821,6 +941,9 @@
         return AVERROR_UNKNOWN;
     }
 
+    if (realigned)
+        av_frame_unref(&tmp_frame);
+
     return 0;
 }
 
@@ -989,7 +1112,6 @@
                                         int flags)
 {
     AVQSVDeviceContext *hwctx = ctx->hwctx;
-    QSVDeviceContext       *s = ctx->internal->priv;
 
     mfxVersion    ver = { { 3, 1 } };
     mfxHDL        handle;
@@ -1029,6 +1151,27 @@
         goto fail;
     }
 
+    err = MFXQueryVersion(hwctx->session, &ver);
+    if (err != MFX_ERR_NONE) {
+        av_log(ctx, AV_LOG_ERROR, "Error querying an MFX session: %d.\n", err);
+        ret = AVERROR_UNKNOWN;
+        goto fail;
+    }
+
+    av_log(ctx, AV_LOG_VERBOSE,
+           "Initialize MFX session: API version is %d.%d, implementation version is %d.%d\n",
+           MFX_VERSION_MAJOR, MFX_VERSION_MINOR, ver.Major, ver.Minor);
+
+    MFXClose(hwctx->session);
+
+    err = MFXInit(implementation, &ver, &hwctx->session);
+    if (err != MFX_ERR_NONE) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Error initializing an MFX session: %d.\n", err);
+        ret = AVERROR_UNKNOWN;
+        goto fail;
+    }
+
     err = MFXVideoCORE_SetHandle(hwctx->session, handle_type, handle);
     if (err != MFX_ERR_NONE) {
         av_log(ctx, AV_LOG_ERROR, "Error setting child device handle: "
@@ -1037,6 +1180,11 @@
         goto fail;
     }
 
+    ret = MFXQueryVersion(hwctx->session,&ver);
+    if (ret == MFX_ERR_NONE) {
+        av_log(ctx, AV_LOG_VERBOSE, "MFX compile/runtime API: %d.%d/%d.%d\n",
+               MFX_VERSION_MAJOR, MFX_VERSION_MINOR, ver.Major, ver.Minor);
+    }
     return 0;
 
 fail:

diff --git a/libavutil/hwcontext_vaapi.c b/libavutil/hwcontext_vaapi.c
index cc961f1..8624369 100644
--- a/libavutil/hwcontext_vaapi.c
+++ b/libavutil/hwcontext_vaapi.c

@@ -28,6 +28,9 @@
 #if CONFIG_LIBDRM
 #   include <va/va_drmcommon.h>
 #   include <drm_fourcc.h>
+#   ifndef DRM_FORMAT_MOD_INVALID
+#       define DRM_FORMAT_MOD_INVALID ((1ULL << 56) - 1)
+#   endif
 #endif
 
 #include <fcntl.h>
@@ -40,15 +43,13 @@
 #include "buffer.h"
 #include "common.h"
 #include "hwcontext.h"
+#include "hwcontext_drm.h"
 #include "hwcontext_internal.h"
 #include "hwcontext_vaapi.h"
 #include "mem.h"
 #include "pixdesc.h"
 #include "pixfmt.h"
 
-#if CONFIG_LIBDRM
-#   include "hwcontext_drm.h"
-#endif
 
 typedef struct VAAPIDevicePriv {
 #if HAVE_VAAPI_X11
@@ -86,52 +87,82 @@
     int flags;
 } VAAPIMapping;
 
-#define MAP(va, rt, av) { \
-        VA_FOURCC_ ## va, \
-        VA_RT_FORMAT_ ## rt, \
-        AV_PIX_FMT_ ## av \
-    }
-// The map fourcc <-> pix_fmt isn't bijective because of the annoying U/V
-// plane swap cases.  The frame handling below tries to hide these.
-static const struct {
+typedef struct VAAPIFormat {
     unsigned int fourcc;
     unsigned int rt_format;
     enum AVPixelFormat pix_fmt;
-} vaapi_format_map[] = {
-    MAP(NV12, YUV420,  NV12),
-    MAP(YV12, YUV420,  YUV420P), // With U/V planes swapped.
-    MAP(IYUV, YUV420,  YUV420P),
-  //MAP(I420, YUV420,  YUV420P), // Not in libva but used by Intel driver.
+    int chroma_planes_swapped;
+} VAAPIFormatDescriptor;
+
+#define MAP(va, rt, av, swap_uv) { \
+        VA_FOURCC_ ## va, \
+        VA_RT_FORMAT_ ## rt, \
+        AV_PIX_FMT_ ## av, \
+        swap_uv, \
+    }
+// The map fourcc <-> pix_fmt isn't bijective because of the annoying U/V
+// plane swap cases.  The frame handling below tries to hide these.
+static const VAAPIFormatDescriptor vaapi_format_map[] = {
+    MAP(NV12, YUV420,  NV12,    0),
+#ifdef VA_FOURCC_I420
+    MAP(I420, YUV420,  YUV420P, 0),
+#endif
+    MAP(YV12, YUV420,  YUV420P, 1),
+    MAP(IYUV, YUV420,  YUV420P, 0),
+    MAP(422H, YUV422,  YUV422P, 0),
 #ifdef VA_FOURCC_YV16
-    MAP(YV16, YUV422,  YUV422P), // With U/V planes swapped.
+    MAP(YV16, YUV422,  YUV422P, 1),
 #endif
-    MAP(422H, YUV422,  YUV422P),
-    MAP(UYVY, YUV422,  UYVY422),
-    MAP(YUY2, YUV422,  YUYV422),
-    MAP(Y800, YUV400,  GRAY8),
+    MAP(UYVY, YUV422,  UYVY422, 0),
+    MAP(YUY2, YUV422,  YUYV422, 0),
+    MAP(411P, YUV411,  YUV411P, 0),
+    MAP(422V, YUV422,  YUV440P, 0),
+    MAP(444P, YUV444,  YUV444P, 0),
+    MAP(Y800, YUV400,  GRAY8,   0),
 #ifdef VA_FOURCC_P010
-    MAP(P010, YUV420_10BPP, P010),
+    MAP(P010, YUV420_10BPP, P010, 0),
 #endif
-    MAP(BGRA, RGB32,   BGRA),
-    MAP(BGRX, RGB32,   BGR0),
-    MAP(RGBA, RGB32,   RGBA),
-    MAP(RGBX, RGB32,   RGB0),
+    MAP(BGRA, RGB32,   BGRA, 0),
+    MAP(BGRX, RGB32,   BGR0, 0),
+    MAP(RGBA, RGB32,   RGBA, 0),
+    MAP(RGBX, RGB32,   RGB0, 0),
 #ifdef VA_FOURCC_ABGR
-    MAP(ABGR, RGB32,   ABGR),
-    MAP(XBGR, RGB32,   0BGR),
+    MAP(ABGR, RGB32,   ABGR, 0),
+    MAP(XBGR, RGB32,   0BGR, 0),
 #endif
-    MAP(ARGB, RGB32,   ARGB),
-    MAP(XRGB, RGB32,   0RGB),
+    MAP(ARGB, RGB32,   ARGB, 0),
+    MAP(XRGB, RGB32,   0RGB, 0),
 };
 #undef MAP
 
-static enum AVPixelFormat vaapi_pix_fmt_from_fourcc(unsigned int fourcc)
+static const VAAPIFormatDescriptor *
+    vaapi_format_from_fourcc(unsigned int fourcc)
 {
     int i;
     for (i = 0; i < FF_ARRAY_ELEMS(vaapi_format_map); i++)
         if (vaapi_format_map[i].fourcc == fourcc)
-            return vaapi_format_map[i].pix_fmt;
-    return AV_PIX_FMT_NONE;
+            return &vaapi_format_map[i];
+    return NULL;
+}
+
+static const VAAPIFormatDescriptor *
+    vaapi_format_from_pix_fmt(enum AVPixelFormat pix_fmt)
+{
+    int i;
+    for (i = 0; i < FF_ARRAY_ELEMS(vaapi_format_map); i++)
+        if (vaapi_format_map[i].pix_fmt == pix_fmt)
+            return &vaapi_format_map[i];
+    return NULL;
+}
+
+static enum AVPixelFormat vaapi_pix_fmt_from_fourcc(unsigned int fourcc)
+{
+    const VAAPIFormatDescriptor *desc;
+    desc = vaapi_format_from_fourcc(fourcc);
+    if (desc)
+        return desc->pix_fmt;
+    else
+        return AV_PIX_FMT_NONE;
 }
 
 static int vaapi_get_image_format(AVHWDeviceContext *hwdev,
@@ -273,11 +304,14 @@
     const char *match_string;
     unsigned int quirks;
 } vaapi_driver_quirks_table[] = {
+#if !VA_CHECK_VERSION(1, 0, 0)
+    // The i965 driver did not conform before version 2.0.
     {
         "Intel i965 (Quick Sync)",
         "i965",
         AV_VAAPI_DRIVER_QUIRK_RENDER_PARAM_BUFFERS,
     },
+#endif
     {
         "Intel iHD",
         "ubit",
@@ -338,29 +372,37 @@
         }
     }
 
+    vendor_string = vaQueryVendorString(hwctx->display);
+    if (vendor_string)
+        av_log(hwdev, AV_LOG_VERBOSE, "VAAPI driver: %s.\n", vendor_string);
+
     if (hwctx->driver_quirks & AV_VAAPI_DRIVER_QUIRK_USER_SET) {
-        av_log(hwdev, AV_LOG_VERBOSE, "Not detecting driver: "
-               "quirks set by user.\n");
+        av_log(hwdev, AV_LOG_VERBOSE, "Using quirks set by user (%#x).\n",
+               hwctx->driver_quirks);
     } else {
         // Detect the driver in use and set quirk flags if necessary.
-        vendor_string = vaQueryVendorString(hwctx->display);
         hwctx->driver_quirks = 0;
         if (vendor_string) {
             for (i = 0; i < FF_ARRAY_ELEMS(vaapi_driver_quirks_table); i++) {
                 if (strstr(vendor_string,
                            vaapi_driver_quirks_table[i].match_string)) {
-                    av_log(hwdev, AV_LOG_VERBOSE, "Matched \"%s\" as known "
-                           "driver \"%s\".\n", vendor_string,
-                           vaapi_driver_quirks_table[i].friendly_name);
+                    av_log(hwdev, AV_LOG_VERBOSE, "Matched driver string "
+                           "as known nonstandard driver \"%s\", setting "
+                           "quirks (%#x).\n",
+                           vaapi_driver_quirks_table[i].friendly_name,
+                           vaapi_driver_quirks_table[i].quirks);
                     hwctx->driver_quirks |=
                         vaapi_driver_quirks_table[i].quirks;
                     break;
                 }
             }
             if (!(i < FF_ARRAY_ELEMS(vaapi_driver_quirks_table))) {
-                av_log(hwdev, AV_LOG_VERBOSE, "Unknown driver \"%s\", "
-                       "assuming standard behaviour.\n", vendor_string);
+                av_log(hwdev, AV_LOG_VERBOSE, "Driver not found in known "
+                       "nonstandard list, using standard behaviour.\n");
             }
+        } else {
+            av_log(hwdev, AV_LOG_VERBOSE, "Driver has no vendor string, "
+                   "assuming standard behaviour.\n");
         }
     }
 
@@ -444,22 +486,16 @@
     AVVAAPIFramesContext  *avfc = hwfc->hwctx;
     VAAPIFramesContext     *ctx = hwfc->internal->priv;
     AVVAAPIDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+    const VAAPIFormatDescriptor *desc;
     VAImageFormat *expected_format;
     AVBufferRef *test_surface = NULL;
     VASurfaceID test_surface_id;
     VAImage test_image;
     VAStatus vas;
     int err, i;
-    unsigned int fourcc, rt_format;
 
-    for (i = 0; i < FF_ARRAY_ELEMS(vaapi_format_map); i++) {
-        if (vaapi_format_map[i].pix_fmt == hwfc->sw_format) {
-            fourcc    = vaapi_format_map[i].fourcc;
-            rt_format = vaapi_format_map[i].rt_format;
-            break;
-        }
-    }
-    if (i >= FF_ARRAY_ELEMS(vaapi_format_map)) {
+    desc = vaapi_format_from_pix_fmt(hwfc->sw_format);
+    if (!desc) {
         av_log(hwfc, AV_LOG_ERROR, "Unsupported format: %s.\n",
                av_get_pix_fmt_name(hwfc->sw_format));
         return AVERROR(EINVAL);
@@ -470,9 +506,9 @@
             int need_memory_type = !(hwctx->driver_quirks & AV_VAAPI_DRIVER_QUIRK_ATTRIB_MEMTYPE);
             int need_pixel_format = 1;
             for (i = 0; i < avfc->nb_attributes; i++) {
-                if (ctx->attributes[i].type == VASurfaceAttribMemoryType)
+                if (avfc->attributes[i].type == VASurfaceAttribMemoryType)
                     need_memory_type  = 0;
-                if (ctx->attributes[i].type == VASurfaceAttribPixelFormat)
+                if (avfc->attributes[i].type == VASurfaceAttribPixelFormat)
                     need_pixel_format = 0;
             }
             ctx->nb_attributes =
@@ -500,7 +536,7 @@
                     .type          = VASurfaceAttribPixelFormat,
                     .flags         = VA_SURFACE_ATTRIB_SETTABLE,
                     .value.type    = VAGenericValueTypeInteger,
-                    .value.value.i = fourcc,
+                    .value.value.i = desc->fourcc,
                 };
             }
             av_assert0(i == ctx->nb_attributes);
@@ -509,7 +545,7 @@
             ctx->nb_attributes = 0;
         }
 
-        ctx->rt_format = rt_format;
+        ctx->rt_format = desc->rt_format;
 
         if (hwfc->initial_pool_size > 0) {
             // This pool will be usable as a render target, so we need to store
@@ -625,24 +661,31 @@
                                       enum AVPixelFormat **formats)
 {
     VAAPIDeviceContext *ctx = hwfc->device_ctx->internal->priv;
-    enum AVPixelFormat *pix_fmts, preferred_format;
-    int i, k;
+    enum AVPixelFormat *pix_fmts;
+    int i, k, sw_format_available;
 
-    preferred_format = hwfc->sw_format;
+    sw_format_available = 0;
+    for (i = 0; i < ctx->nb_formats; i++) {
+        if (ctx->formats[i].pix_fmt == hwfc->sw_format)
+            sw_format_available = 1;
+    }
 
     pix_fmts = av_malloc((ctx->nb_formats + 1) * sizeof(*pix_fmts));
     if (!pix_fmts)
         return AVERROR(ENOMEM);
 
-    pix_fmts[0] = preferred_format;
-    k = 1;
+    if (sw_format_available) {
+        pix_fmts[0] = hwfc->sw_format;
+        k = 1;
+    } else {
+        k = 0;
+    }
     for (i = 0; i < ctx->nb_formats; i++) {
-        if (ctx->formats[i].pix_fmt == preferred_format)
+        if (ctx->formats[i].pix_fmt == hwfc->sw_format)
             continue;
         av_assert0(k < ctx->nb_formats);
         pix_fmts[k++] = ctx->formats[i].pix_fmt;
     }
-    av_assert0(k == ctx->nb_formats);
     pix_fmts[k] = AV_PIX_FMT_NONE;
 
     *formats = pix_fmts;
@@ -692,6 +735,7 @@
     AVVAAPIDeviceContext *hwctx = hwfc->device_ctx->hwctx;
     VAAPIFramesContext *ctx = hwfc->internal->priv;
     VASurfaceID surface_id;
+    const VAAPIFormatDescriptor *desc;
     VAImageFormat *image_format;
     VAAPIMapping *map;
     VAStatus vas;
@@ -800,11 +844,9 @@
         dst->data[i] = (uint8_t*)address + map->image.offsets[i];
         dst->linesize[i] = map->image.pitches[i];
     }
-    if (
-#ifdef VA_FOURCC_YV16
-        map->image.format.fourcc == VA_FOURCC_YV16 ||
-#endif
-        map->image.format.fourcc == VA_FOURCC_YV12) {
+
+    desc = vaapi_format_from_fourcc(map->image.format.fourcc);
+    if (desc && desc->chroma_planes_swapped) {
         // Chroma planes are YVU rather than YUV, so swap them.
         FFSWAP(uint8_t*, dst->data[1], dst->data[2]);
     }
@@ -957,9 +999,10 @@
         (AVHWFramesContext*)dst->hw_frames_ctx->data;
     AVVAAPIDeviceContext  *dst_dev = dst_fc->device_ctx->hwctx;
     const AVDRMFrameDescriptor *desc;
+    const VAAPIFormatDescriptor *format_desc;
     VASurfaceID surface_id;
     VAStatus vas;
-    uint32_t va_fourcc, va_rt_format;
+    uint32_t va_fourcc;
     int err, i, j, k;
 
     unsigned long buffer_handle;
@@ -1010,10 +1053,8 @@
     av_log(dst_fc, AV_LOG_DEBUG, "Map DRM object %d to VAAPI as "
            "%08x.\n", desc->objects[0].fd, va_fourcc);
 
-    for (i = 0; i < FF_ARRAY_ELEMS(vaapi_format_map); i++) {
-        if (vaapi_format_map[i].fourcc == va_fourcc)
-            va_rt_format = vaapi_format_map[i].rt_format;
-    }
+    format_desc = vaapi_format_from_fourcc(va_fourcc);
+    av_assert0(format_desc);
 
     buffer_handle = desc->objects[0].fd;
     buffer_desc.pixel_format = va_fourcc;
@@ -1034,7 +1075,13 @@
     }
     buffer_desc.num_planes = k;
 
-    vas = vaCreateSurfaces(dst_dev->display, va_rt_format,
+    if (format_desc->chroma_planes_swapped &&
+        buffer_desc.num_planes == 3) {
+        FFSWAP(uint32_t, buffer_desc.pitches[1], buffer_desc.pitches[2]);
+        FFSWAP(uint32_t, buffer_desc.offsets[1], buffer_desc.offsets[2]);
+    }
+
+    vas = vaCreateSurfaces(dst_dev->display, format_desc->rt_format,
                            src->width, src->height,
                            &surface_id, 1,
                            attrs, FF_ARRAY_ELEMS(attrs));
@@ -1061,8 +1108,9 @@
     return 0;
 }
 
-static void vaapi_unmap_to_drm(AVHWFramesContext *dst_fc,
-                               HWMapDescriptor *hwmap)
+#if VA_CHECK_VERSION(1, 1, 0)
+static void vaapi_unmap_to_drm_esh(AVHWFramesContext *hwfc,
+                                   HWMapDescriptor *hwmap)
 {
     AVDRMFrameDescriptor *drm_desc = hwmap->priv;
     int i;
@@ -1073,16 +1121,261 @@
     av_freep(&drm_desc);
 }
 
+static int vaapi_map_to_drm_esh(AVHWFramesContext *hwfc, AVFrame *dst,
+                                const AVFrame *src, int flags)
+{
+    AVVAAPIDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+    VASurfaceID surface_id;
+    VAStatus vas;
+    VADRMPRIMESurfaceDescriptor va_desc;
+    AVDRMFrameDescriptor *drm_desc = NULL;
+    uint32_t export_flags;
+    int err, i, j;
+
+    surface_id = (VASurfaceID)(uintptr_t)src->data[3];
+
+    export_flags = VA_EXPORT_SURFACE_SEPARATE_LAYERS;
+    if (flags & AV_HWFRAME_MAP_READ)
+        export_flags |= VA_EXPORT_SURFACE_READ_ONLY;
+    if (flags & AV_HWFRAME_MAP_WRITE)
+        export_flags |= VA_EXPORT_SURFACE_WRITE_ONLY;
+
+    vas = vaExportSurfaceHandle(hwctx->display, surface_id,
+                                VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
+                                export_flags, &va_desc);
+    if (vas != VA_STATUS_SUCCESS) {
+        if (vas == VA_STATUS_ERROR_UNIMPLEMENTED)
+            return AVERROR(ENOSYS);
+        av_log(hwfc, AV_LOG_ERROR, "Failed to export surface %#x: "
+               "%d (%s).\n", surface_id, vas, vaErrorStr(vas));
+        return AVERROR(EIO);
+    }
+
+    drm_desc = av_mallocz(sizeof(*drm_desc));
+    if (!drm_desc) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    // By some bizarre coincidence, these structures are very similar...
+    drm_desc->nb_objects = va_desc.num_objects;
+    for (i = 0; i < va_desc.num_objects; i++) {
+        drm_desc->objects[i].fd   = va_desc.objects[i].fd;
+        drm_desc->objects[i].size = va_desc.objects[i].size;
+        drm_desc->objects[i].format_modifier =
+            va_desc.objects[i].drm_format_modifier;
+    }
+    drm_desc->nb_layers = va_desc.num_layers;
+    for (i = 0; i < va_desc.num_layers; i++) {
+        drm_desc->layers[i].format    = va_desc.layers[i].drm_format;
+        drm_desc->layers[i].nb_planes = va_desc.layers[i].num_planes;
+        for (j = 0; j < va_desc.layers[i].num_planes; j++) {
+            drm_desc->layers[i].planes[j].object_index =
+                va_desc.layers[i].object_index[j];
+            drm_desc->layers[i].planes[j].offset =
+                va_desc.layers[i].offset[j];
+            drm_desc->layers[i].planes[j].pitch =
+                va_desc.layers[i].pitch[j];
+        }
+    }
+
+    err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
+                                &vaapi_unmap_to_drm_esh, drm_desc);
+    if (err < 0)
+        goto fail;
+
+    dst->width   = src->width;
+    dst->height  = src->height;
+    dst->data[0] = (uint8_t*)drm_desc;
+
+    return 0;
+
+fail:
+    for (i = 0; i < va_desc.num_objects; i++)
+        close(va_desc.objects[i].fd);
+    av_freep(&drm_desc);
+    return err;
+}
+#endif
+
+#if VA_CHECK_VERSION(0, 36, 0)
+typedef struct VAAPIDRMImageBufferMapping {
+    VAImage      image;
+    VABufferInfo buffer_info;
+
+    AVDRMFrameDescriptor drm_desc;
+} VAAPIDRMImageBufferMapping;
+
+static void vaapi_unmap_to_drm_abh(AVHWFramesContext *hwfc,
+                                  HWMapDescriptor *hwmap)
+{
+    AVVAAPIDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+    VAAPIDRMImageBufferMapping *mapping = hwmap->priv;
+    VASurfaceID surface_id;
+    VAStatus vas;
+
+    surface_id = (VASurfaceID)(uintptr_t)hwmap->source->data[3];
+    av_log(hwfc, AV_LOG_DEBUG, "Unmap VAAPI surface %#x from DRM.\n",
+           surface_id);
+
+    // DRM PRIME file descriptors are closed by vaReleaseBufferHandle(),
+    // so we shouldn't close them separately.
+
+    vas = vaReleaseBufferHandle(hwctx->display, mapping->image.buf);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(hwfc, AV_LOG_ERROR, "Failed to release buffer "
+               "handle of image %#x (derived from surface %#x): "
+               "%d (%s).\n", mapping->image.buf, surface_id,
+               vas, vaErrorStr(vas));
+    }
+
+    vas = vaDestroyImage(hwctx->display, mapping->image.image_id);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(hwfc, AV_LOG_ERROR, "Failed to destroy image "
+               "derived from surface %#x: %d (%s).\n",
+               surface_id, vas, vaErrorStr(vas));
+    }
+
+    av_free(mapping);
+}
+
+static int vaapi_map_to_drm_abh(AVHWFramesContext *hwfc, AVFrame *dst,
+                                const AVFrame *src, int flags)
+{
+    AVVAAPIDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+    VAAPIDRMImageBufferMapping *mapping = NULL;
+    VASurfaceID surface_id;
+    VAStatus vas;
+    int err, i, p;
+
+    surface_id = (VASurfaceID)(uintptr_t)src->data[3];
+    av_log(hwfc, AV_LOG_DEBUG, "Map VAAPI surface %#x to DRM.\n",
+           surface_id);
+
+    mapping = av_mallocz(sizeof(*mapping));
+    if (!mapping)
+        return AVERROR(ENOMEM);
+
+    vas = vaDeriveImage(hwctx->display, surface_id,
+                        &mapping->image);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(hwfc, AV_LOG_ERROR, "Failed to derive image from "
+               "surface %#x: %d (%s).\n",
+               surface_id, vas, vaErrorStr(vas));
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    for (i = 0; i < FF_ARRAY_ELEMS(vaapi_drm_format_map); i++) {
+        if (vaapi_drm_format_map[i].va_fourcc ==
+            mapping->image.format.fourcc)
+            break;
+    }
+    if (i >= FF_ARRAY_ELEMS(vaapi_drm_format_map)) {
+        av_log(hwfc, AV_LOG_ERROR, "No matching DRM format for "
+               "VAAPI format %#x.\n", mapping->image.format.fourcc);
+        err = AVERROR(EINVAL);
+        goto fail_derived;
+    }
+
+    mapping->buffer_info.mem_type =
+        VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME;
+
+    mapping->drm_desc.nb_layers =
+        vaapi_drm_format_map[i].nb_layer_formats;
+    if (mapping->drm_desc.nb_layers > 1) {
+        if (mapping->drm_desc.nb_layers != mapping->image.num_planes) {
+            av_log(hwfc, AV_LOG_ERROR, "Image properties do not match "
+                   "expected format: got %d planes, but expected %d.\n",
+                   mapping->image.num_planes, mapping->drm_desc.nb_layers);
+            err = AVERROR(EINVAL);
+            goto fail_derived;
+        }
+
+        for(p = 0; p < mapping->drm_desc.nb_layers; p++) {
+            mapping->drm_desc.layers[p] = (AVDRMLayerDescriptor) {
+                .format    = vaapi_drm_format_map[i].layer_formats[p],
+                .nb_planes = 1,
+                .planes[0] = {
+                    .object_index = 0,
+                    .offset       = mapping->image.offsets[p],
+                    .pitch        = mapping->image.pitches[p],
+                },
+            };
+        }
+    } else {
+        mapping->drm_desc.layers[0].format =
+            vaapi_drm_format_map[i].layer_formats[0];
+        mapping->drm_desc.layers[0].nb_planes = mapping->image.num_planes;
+        for (p = 0; p < mapping->image.num_planes; p++) {
+            mapping->drm_desc.layers[0].planes[p] = (AVDRMPlaneDescriptor) {
+                .object_index = 0,
+                .offset       = mapping->image.offsets[p],
+                .pitch        = mapping->image.pitches[p],
+            };
+        }
+    }
+
+    vas = vaAcquireBufferHandle(hwctx->display, mapping->image.buf,
+                                &mapping->buffer_info);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(hwfc, AV_LOG_ERROR, "Failed to get buffer "
+               "handle from image %#x (derived from surface %#x): "
+               "%d (%s).\n", mapping->image.buf, surface_id,
+               vas, vaErrorStr(vas));
+        err = AVERROR(EIO);
+        goto fail_derived;
+    }
+
+    av_log(hwfc, AV_LOG_DEBUG, "DRM PRIME fd is %ld.\n",
+           mapping->buffer_info.handle);
+
+    mapping->drm_desc.nb_objects = 1;
+    mapping->drm_desc.objects[0] = (AVDRMObjectDescriptor) {
+        .fd   = mapping->buffer_info.handle,
+        .size = mapping->image.data_size,
+        // There is no way to get the format modifier with this API.
+        .format_modifier = DRM_FORMAT_MOD_INVALID,
+    };
+
+    err = ff_hwframe_map_create(src->hw_frames_ctx,
+                                dst, src, &vaapi_unmap_to_drm_abh,
+                                mapping);
+    if (err < 0)
+        goto fail_mapped;
+
+    dst->data[0] = (uint8_t*)&mapping->drm_desc;
+    dst->width   = src->width;
+    dst->height  = src->height;
+
+    return 0;
+
+fail_mapped:
+    vaReleaseBufferHandle(hwctx->display, mapping->image.buf);
+fail_derived:
+    vaDestroyImage(hwctx->display, mapping->image.image_id);
+fail:
+    av_freep(&mapping);
+    return err;
+}
+#endif
+
 static int vaapi_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
                             const AVFrame *src, int flags)
 {
-    // Older versions without vaExportSurfaceHandle() are not supported -
-    // in theory this is possible with a combination of vaDeriveImage()
-    // and vaAcquireBufferHandle(), but it doesn't carry enough metadata
-    // to actually use the result in a generic way.
+#if VA_CHECK_VERSION(1, 1, 0)
+    int err;
+    err = vaapi_map_to_drm_esh(hwfc, dst, src, flags);
+    if (err != AVERROR(ENOSYS))
+        return err;
+#endif
+#if VA_CHECK_VERSION(0, 36, 0)
+    return vaapi_map_to_drm_abh(hwfc, dst, src, flags);
+#endif
     return AVERROR(ENOSYS);
 }
-#endif
+
+#endif /* CONFIG_LIBDRM */
 
 static int vaapi_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
                         const AVFrame *src, int flags)
@@ -1243,7 +1536,7 @@
 static int vaapi_device_derive(AVHWDeviceContext *ctx,
                                AVHWDeviceContext *src_ctx, int flags)
 {
-#if CONFIG_LIBDRM
+#if HAVE_VAAPI_DRM
     if (src_ctx->type == AV_HWDEVICE_TYPE_DRM) {
         AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
         VADisplay *display;

diff --git a/libavutil/hwcontext_vdpau.c b/libavutil/hwcontext_vdpau.c
index 9b8f839..c11c3cf 100644
--- a/libavutil/hwcontext_vdpau.c
+++ b/libavutil/hwcontext_vdpau.c

@@ -79,11 +79,12 @@
 
 static const struct {
     VdpChromaType chroma_type;
+    enum AVPixelFormat frames_sw_format;
     const VDPAUPixFmtMap *map;
 } vdpau_pix_fmts[] = {
-    { VDP_CHROMA_TYPE_420, pix_fmts_420 },
-    { VDP_CHROMA_TYPE_422, pix_fmts_422 },
-    { VDP_CHROMA_TYPE_444, pix_fmts_444 },
+    { VDP_CHROMA_TYPE_420, AV_PIX_FMT_YUV420P, pix_fmts_420 },
+    { VDP_CHROMA_TYPE_422, AV_PIX_FMT_YUV422P, pix_fmts_422 },
+    { VDP_CHROMA_TYPE_444, AV_PIX_FMT_YUV444P, pix_fmts_444 },
 };
 
 static int count_pixfmts(const VDPAUPixFmtMap *map)
@@ -170,6 +171,35 @@
         av_freep(&priv->pix_fmts[i]);
 }
 
+static int vdpau_frames_get_constraints(AVHWDeviceContext *ctx,
+                                        const void *hwconfig,
+                                        AVHWFramesConstraints *constraints)
+{
+    VDPAUDeviceContext   *priv  = ctx->internal->priv;
+    int nb_sw_formats = 0;
+    int i;
+
+    constraints->valid_sw_formats = av_malloc_array(FF_ARRAY_ELEMS(vdpau_pix_fmts) + 1,
+                                                    sizeof(*constraints->valid_sw_formats));
+    if (!constraints->valid_sw_formats)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < FF_ARRAY_ELEMS(vdpau_pix_fmts); i++) {
+        if (priv->nb_pix_fmts[i] > 1)
+            constraints->valid_sw_formats[nb_sw_formats++] = vdpau_pix_fmts[i].frames_sw_format;
+    }
+    constraints->valid_sw_formats[nb_sw_formats] = AV_PIX_FMT_NONE;
+
+    constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
+    if (!constraints->valid_hw_formats)
+        return AVERROR(ENOMEM);
+
+    constraints->valid_hw_formats[0] = AV_PIX_FMT_VDPAU;
+    constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
+
+    return 0;
+}
+
 static void vdpau_buffer_free(void *opaque, uint8_t *data)
 {
     AVHWFramesContext          *ctx = opaque;
@@ -214,26 +244,18 @@
 
     int i;
 
-    switch (ctx->sw_format) {
-    case AV_PIX_FMT_YUV420P: priv->chroma_type = VDP_CHROMA_TYPE_420; break;
-    case AV_PIX_FMT_YUV422P: priv->chroma_type = VDP_CHROMA_TYPE_422; break;
-    case AV_PIX_FMT_YUV444P: priv->chroma_type = VDP_CHROMA_TYPE_444; break;
-    default:
-        av_log(ctx, AV_LOG_ERROR, "Unsupported data layout: %s\n",
-               av_get_pix_fmt_name(ctx->sw_format));
-        return AVERROR(ENOSYS);
-    }
-
     for (i = 0; i < FF_ARRAY_ELEMS(vdpau_pix_fmts); i++) {
-        if (vdpau_pix_fmts[i].chroma_type == priv->chroma_type) {
+        if (vdpau_pix_fmts[i].frames_sw_format == ctx->sw_format) {
+            priv->chroma_type = vdpau_pix_fmts[i].chroma_type;
             priv->chroma_idx  = i;
             priv->pix_fmts    = device_priv->pix_fmts[i];
             priv->nb_pix_fmts = device_priv->nb_pix_fmts[i];
             break;
         }
     }
-    if (!priv->pix_fmts) {
-        av_log(ctx, AV_LOG_ERROR, "Unsupported chroma type: %d\n", priv->chroma_type);
+    if (priv->nb_pix_fmts < 2) {
+        av_log(ctx, AV_LOG_ERROR, "Unsupported sw format: %s\n",
+               av_get_pix_fmt_name(ctx->sw_format));
         return AVERROR(ENOSYS);
     }
 
@@ -468,6 +490,7 @@
 #endif
     .device_init          = vdpau_device_init,
     .device_uninit        = vdpau_device_uninit,
+    .frames_get_constraints = vdpau_frames_get_constraints,
     .frames_init          = vdpau_frames_init,
     .frames_get_buffer    = vdpau_get_buffer,
     .transfer_get_formats = vdpau_transfer_get_formats,

diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c
index 5005178..4938a7e 100644
--- a/libavutil/imgutils.c
+++ b/libavutil/imgutils.c

@@ -125,7 +125,7 @@
     size[0] = linesizes[0] * height;
 
     if (desc->flags & AV_PIX_FMT_FLAG_PAL ||
-        desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) {
+        desc->flags & FF_PSEUDOPAL) {
         data[1] = ptr + size[0]; /* palette is stored here as 256 32 bits words */
         return size[0] + 256 * 4;
     }
@@ -216,7 +216,7 @@
         av_free(buf);
         return ret;
     }
-    if (desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) {
+    if (desc->flags & AV_PIX_FMT_FLAG_PAL || (desc->flags & FF_PSEUDOPAL && pointers[1])) {
         avpriv_set_systematic_pal2((uint32_t*)pointers[1], pix_fmt);
         if (align < 4) {
             av_log(NULL, AV_LOG_ERROR, "Formats with a palette require a minimum alignment of 4\n");
@@ -225,7 +225,7 @@
     }
 
     if ((desc->flags & AV_PIX_FMT_FLAG_PAL ||
-         desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) &&
+         desc->flags & FF_PSEUDOPAL) && pointers[1] &&
         pointers[1] - pointers[0] > linesizes[0] * h) {
         /* zero-initialize the padding before the palette */
         memset(pointers[0] + linesizes[0] * h, 0,
@@ -242,9 +242,10 @@
 } ImgUtils;
 
 static const AVClass imgutils_class = {
-    .class_name = "IMGUTILS",
-    .item_name  = av_default_item_name,
-    .version    = LIBAVUTIL_VERSION_INT,
+    .class_name                = "IMGUTILS",
+    .item_name                 = av_default_item_name,
+    .option                    = NULL,
+    .version                   = LIBAVUTIL_VERSION_INT,
     .log_level_offset_offset   = offsetof(ImgUtils, log_offset),
     .parent_log_context_offset = offsetof(ImgUtils, log_ctx),
 };
@@ -353,12 +354,13 @@
         return;
 
     if (desc->flags & AV_PIX_FMT_FLAG_PAL ||
-        desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) {
+        desc->flags & FF_PSEUDOPAL) {
         copy_plane(dst_data[0], dst_linesizes[0],
                    src_data[0], src_linesizes[0],
                    width, height);
         /* copy the palette */
-        memcpy(dst_data[1], src_data[1], 4*256);
+        if ((desc->flags & AV_PIX_FMT_FLAG_PAL) || (dst_data[1] && src_data[1]))
+            memcpy(dst_data[1], src_data[1], 4*256);
     } else {
         int i, planes_nb = 0;
 
@@ -441,7 +443,7 @@
         return ret;
 
     // do not include palette for these pseudo-paletted formats
-    if (desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL)
+    if (desc->flags & FF_PSEUDOPAL)
         return FFALIGN(width, align) * height;
 
     return av_image_fill_arrays(data, linesize, NULL, pix_fmt,

diff --git a/libavutil/integer.c b/libavutil/integer.c
index 6d6855f..890e314 100644
--- a/libavutil/integer.c
+++ b/libavutil/integer.c

@@ -164,41 +164,3 @@
     }
     return out;
 }
-
-#ifdef TEST
-
-const uint8_t ff_log2_tab[256]={
-        0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-        5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-        6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-        6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
-};
-
-int main(void){
-    int64_t a,b;
-
-    for(a=7; a<256*256*256; a+=13215){
-        for(b=3; b<256*256*256; b+=27118){
-            AVInteger ai= av_int2i(a);
-            AVInteger bi= av_int2i(b);
-
-            av_assert0(av_i2int(ai) == a);
-            av_assert0(av_i2int(bi) == b);
-            av_assert0(av_i2int(av_add_i(ai,bi)) == a+b);
-            av_assert0(av_i2int(av_sub_i(ai,bi)) == a-b);
-            av_assert0(av_i2int(av_mul_i(ai,bi)) == a*b);
-            av_assert0(av_i2int(av_shr_i(ai, 9)) == a>>9);
-            av_assert0(av_i2int(av_shr_i(ai,-9)) == a<<9);
-            av_assert0(av_i2int(av_shr_i(ai, 17)) == a>>17);
-            av_assert0(av_i2int(av_shr_i(ai,-17)) == a<<17);
-            av_assert0(av_log2_i(ai) == av_log2(a));
-            av_assert0(av_i2int(av_div_i(ai,bi)) == a/b);
-        }
-    }
-    return 0;
-}
-#endif

diff --git a/libavutil/internal.h b/libavutil/internal.h
index a2d73e3..06bd561 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h

@@ -43,6 +43,7 @@
 #include "cpu.h"
 #include "dict.h"
 #include "macros.h"
+#include "mem.h"
 #include "pixfmt.h"
 #include "version.h"
 
@@ -62,10 +63,10 @@
 #endif
 #endif
 
-#if defined(_MSC_VER) && CONFIG_SHARED
-#    define av_export __declspec(dllimport)
+#if defined(_WIN32) && CONFIG_SHARED && !defined(BUILDING_avutil)
+#    define av_export_avutil __declspec(dllimport)
 #else
-#    define av_export
+#    define av_export_avutil
 #endif
 
 #if HAVE_PRAGMA_DEPRECATED
@@ -76,8 +77,8 @@
 #        define FF_DISABLE_DEPRECATION_WARNINGS __pragma(warning(push)) __pragma(warning(disable:4996))
 #        define FF_ENABLE_DEPRECATION_WARNINGS  __pragma(warning(pop))
 #    else
-#        define FF_DISABLE_DEPRECATION_WARNINGS _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
-#        define FF_ENABLE_DEPRECATION_WARNINGS  _Pragma("GCC diagnostic warning \"-Wdeprecated-declarations\"")
+#        define FF_DISABLE_DEPRECATION_WARNINGS _Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+#        define FF_ENABLE_DEPRECATION_WARNINGS  _Pragma("GCC diagnostic pop")
 #    endif
 #else
 #    define FF_DISABLE_DEPRECATION_WARNINGS
@@ -110,24 +111,30 @@
     DECLARE_ALIGNED(a, t, la_##v) s o;                  \
     t (*v) o = la_##v
 
-#define LOCAL_ALIGNED(a, t, v, ...) E1(LOCAL_ALIGNED_A(a, t, v, __VA_ARGS__,,))
+#define LOCAL_ALIGNED(a, t, v, ...) LOCAL_ALIGNED_##a(t, v, __VA_ARGS__)
 
-#if HAVE_LOCAL_ALIGNED_8
+#if HAVE_LOCAL_ALIGNED
+#   define LOCAL_ALIGNED_4(t, v, ...) E1(LOCAL_ALIGNED_D(4, t, v, __VA_ARGS__,,))
+#else
+#   define LOCAL_ALIGNED_4(t, v, ...) E1(LOCAL_ALIGNED_A(4, t, v, __VA_ARGS__,,))
+#endif
+
+#if HAVE_LOCAL_ALIGNED
 #   define LOCAL_ALIGNED_8(t, v, ...) E1(LOCAL_ALIGNED_D(8, t, v, __VA_ARGS__,,))
 #else
-#   define LOCAL_ALIGNED_8(t, v, ...) LOCAL_ALIGNED(8, t, v, __VA_ARGS__)
+#   define LOCAL_ALIGNED_8(t, v, ...) E1(LOCAL_ALIGNED_A(8, t, v, __VA_ARGS__,,))
 #endif
 
-#if HAVE_LOCAL_ALIGNED_16
+#if HAVE_LOCAL_ALIGNED
 #   define LOCAL_ALIGNED_16(t, v, ...) E1(LOCAL_ALIGNED_D(16, t, v, __VA_ARGS__,,))
 #else
-#   define LOCAL_ALIGNED_16(t, v, ...) LOCAL_ALIGNED(16, t, v, __VA_ARGS__)
+#   define LOCAL_ALIGNED_16(t, v, ...) E1(LOCAL_ALIGNED_A(16, t, v, __VA_ARGS__,,))
 #endif
 
-#if HAVE_LOCAL_ALIGNED_32
+#if HAVE_LOCAL_ALIGNED
 #   define LOCAL_ALIGNED_32(t, v, ...) E1(LOCAL_ALIGNED_D(32, t, v, __VA_ARGS__,,))
 #else
-#   define LOCAL_ALIGNED_32(t, v, ...) LOCAL_ALIGNED(32, t, v, __VA_ARGS__)
+#   define LOCAL_ALIGNED_32(t, v, ...) E1(LOCAL_ALIGNED_A(32, t, v, __VA_ARGS__,,))
 #endif
 
 #define FF_ALLOC_OR_GOTO(ctx, p, size, label)\
@@ -353,4 +360,13 @@
  */
 int avpriv_dict_set_timestamp(AVDictionary **dict, const char *key, int64_t timestamp);
 
+// Helper macro for AV_PIX_FMT_FLAG_PSEUDOPAL deprecation. Code inside FFmpeg
+// should always use FF_PSEUDOPAL. Once the public API flag gets removed, all
+// code using it is dead code.
+#if FF_API_PSEUDOPAL
+#define FF_PSEUDOPAL AV_PIX_FMT_FLAG_PSEUDOPAL
+#else
+#define FF_PSEUDOPAL 0
+#endif
+
 #endif /* AVUTIL_INTERNAL_H */

diff --git a/libavutil/intreadwrite.h b/libavutil/intreadwrite.h
index d54d4b9..67c763b 100644
--- a/libavutil/intreadwrite.h
+++ b/libavutil/intreadwrite.h

@@ -215,7 +215,7 @@
  * by per-arch headers.
  */
 
-#if defined(__GNUC__) && !defined(__TI_COMPILER_VERSION__)
+#if defined(__GNUC__)
 
 union unaligned_64 { uint64_t l; } __attribute__((packed)) av_alias;
 union unaligned_32 { uint32_t l; } __attribute__((packed)) av_alias;
@@ -224,12 +224,7 @@
 #   define AV_RN(s, p) (((const union unaligned_##s *) (p))->l)
 #   define AV_WN(s, p, v) ((((union unaligned_##s *) (p))->l) = (v))
 
-#elif defined(__DECC)
-
-#   define AV_RN(s, p) (*((const __unaligned uint##s##_t*)(p)))
-#   define AV_WN(s, p, v) (*((__unaligned uint##s##_t*)(p)) = (v))
-
-#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_X64)) && AV_HAVE_FAST_UNALIGNED
+#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_X64) || defined(_M_ARM64)) && AV_HAVE_FAST_UNALIGNED
 
 #   define AV_RN(s, p) (*((const __unaligned uint##s##_t*)(p)))
 #   define AV_WN(s, p, v) (*((__unaligned uint##s##_t*)(p)) = (v))

diff --git a/libavutil/log.c b/libavutil/log.c
index be80620..93a156b 100644
--- a/libavutil/log.c
+++ b/libavutil/log.c

@@ -39,11 +39,9 @@
 #include "common.h"
 #include "internal.h"
 #include "log.h"
+#include "thread.h"
 
-#if HAVE_PTHREADS
-#include <pthread.h>
-static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-#endif
+static AVMutex mutex = AV_MUTEX_INITIALIZER;
 
 #define LINE_SZ 1024
 
@@ -57,7 +55,7 @@
 static int flags;
 
 #define NB_LEVELS 8
-#if defined(_WIN32) && !defined(__MINGW32CE__) && HAVE_SETCONSOLETEXTATTRIBUTE
+#if defined(_WIN32) && HAVE_SETCONSOLETEXTATTRIBUTE
 #include <windows.h>
 static const uint8_t color[16 + AV_CLASS_CATEGORY_NB] = {
     [AV_LOG_PANIC  /8] = 12,
@@ -124,7 +122,7 @@
 
 static void check_color_terminal(void)
 {
-#if defined(_WIN32) && !defined(__MINGW32CE__) && HAVE_SETCONSOLETEXTATTRIBUTE
+#if defined(_WIN32) && HAVE_SETCONSOLETEXTATTRIBUTE
     CONSOLE_SCREEN_BUFFER_INFO con_info;
     con = GetStdHandle(STD_ERROR_HANDLE);
     use_color = (con != INVALID_HANDLE_VALUE) && !getenv("NO_COLOR") &&
@@ -159,7 +157,7 @@
     if (level == AV_LOG_INFO/8) local_use_color = 0;
     else                        local_use_color = use_color;
 
-#if defined(_WIN32) && !defined(__MINGW32CE__) && HAVE_SETCONSOLETEXTATTRIBUTE
+#if defined(_WIN32) && HAVE_SETCONSOLETEXTATTRIBUTE
     if (local_use_color)
         SetConsoleTextAttribute(con, background | color[level]);
     fputs(str, stderr);
@@ -249,9 +247,9 @@
                         AVBPrint part[4], int *print_prefix, int type[2])
 {
     AVClass* avc = avcl ? *(AVClass **) avcl : NULL;
-    av_bprint_init(part+0, 0, 1);
-    av_bprint_init(part+1, 0, 1);
-    av_bprint_init(part+2, 0, 1);
+    av_bprint_init(part+0, 0, AV_BPRINT_SIZE_AUTOMATIC);
+    av_bprint_init(part+1, 0, AV_BPRINT_SIZE_AUTOMATIC);
+    av_bprint_init(part+2, 0, AV_BPRINT_SIZE_AUTOMATIC);
     av_bprint_init(part+3, 0, 65536);
 
     if(type) type[0] = type[1] = AV_CLASS_CATEGORY_NA + 16;
@@ -268,11 +266,11 @@
         av_bprintf(part+1, "[%s @ %p] ",
                  avc->item_name(avcl), avcl);
         if(type) type[1] = get_category(avcl);
-
-        if (flags & AV_LOG_PRINT_LEVEL)
-            av_bprintf(part+2, "[%s] ", get_level_str(level));
     }
 
+    if (*print_prefix && (level > AV_LOG_QUIET) && (flags & AV_LOG_PRINT_LEVEL))
+        av_bprintf(part+2, "[%s] ", get_level_str(level));
+
     av_vbprintf(part+3, fmt, vl);
 
     if(*part[0].str || *part[1].str || *part[2].str || *part[3].str) {
@@ -317,9 +315,7 @@
 
     if (level > av_log_level)
         return;
-#if HAVE_PTHREADS
-    pthread_mutex_lock(&mutex);
-#endif
+    ff_mutex_lock(&mutex);
 
     format_line(ptr, level, fmt, vl, part, &print_prefix, type);
     snprintf(line, sizeof(line), "%s%s%s%s", part[0].str, part[1].str, part[2].str, part[3].str);
@@ -356,9 +352,7 @@
 #endif
 end:
     av_bprint_finalize(part+3, NULL);
-#if HAVE_PTHREADS
-    pthread_mutex_unlock(&mutex);
-#endif
+    ff_mutex_unlock(&mutex);
 }
 
 static void (*av_log_callback)(void*, int, const char*, va_list) =

diff --git a/libavutil/log.h b/libavutil/log.h
index f0a5738..d9554e6 100644
--- a/libavutil/log.h
+++ b/libavutil/log.h

@@ -334,20 +334,6 @@
 int av_log_format_line2(void *ptr, int level, const char *fmt, va_list vl,
                         char *line, int line_size, int *print_prefix);
 
-#if FF_API_DLOG
-/**
- * av_dlog macros
- * @deprecated unused
- * Useful to print debug messages that shouldn't get compiled in normally.
- */
-
-#ifdef DEBUG
-#    define av_dlog(pctx, ...) av_log(pctx, AV_LOG_DEBUG, __VA_ARGS__)
-#else
-#    define av_dlog(pctx, ...) do { if (0) av_log(pctx, AV_LOG_DEBUG, __VA_ARGS__); } while (0)
-#endif
-#endif /* FF_API_DLOG */
-
 /**
  * Skip repeated messages, this requires the user app to use av_log() instead of
  * (f)printf as the 2 would otherwise interfere and lead to

diff --git a/libavutil/mastering_display_metadata.h b/libavutil/mastering_display_metadata.h
index 847b0b6..c23b07c 100644
--- a/libavutil/mastering_display_metadata.h
+++ b/libavutil/mastering_display_metadata.h

@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright (c) 2016 Neil Birkbeck <neil.birkbeck@gmail.com>
  *
  * This file is part of FFmpeg.

diff --git a/libavutil/mem.c b/libavutil/mem.c
index 36740f1..6149755 100644
--- a/libavutil/mem.c
+++ b/libavutil/mem.c

@@ -61,7 +61,7 @@
 
 #include "mem_internal.h"
 
-#define ALIGN (HAVE_AVX ? 32 : 16)
+#define ALIGN (HAVE_AVX512 ? 64 : (HAVE_AVX ? 32 : 16))
 
 /* NOTE: if you want to override these functions with your own
  * implementations (not recommended) you have to link libav* as
@@ -181,6 +181,20 @@
     return 0;
 }
 
+void *av_malloc_array(size_t nmemb, size_t size)
+{
+    if (!size || nmemb >= INT_MAX / size)
+        return NULL;
+    return av_malloc(nmemb * size);
+}
+
+void *av_mallocz_array(size_t nmemb, size_t size)
+{
+    if (!size || nmemb >= INT_MAX / size)
+        return NULL;
+    return av_mallocz(nmemb * size);
+}
+
 void *av_realloc_array(void *ptr, size_t nmemb, size_t size)
 {
     if (!size || nmemb >= INT_MAX / size)
@@ -449,10 +463,15 @@
 
 void *av_fast_realloc(void *ptr, unsigned int *size, size_t min_size)
 {
-    if (min_size < *size)
+    if (min_size <= *size)
         return ptr;
 
-    min_size = FFMAX(min_size + min_size / 16 + 32, min_size);
+    if (min_size > max_alloc_size - 32) {
+        *size = 0;
+        return NULL;
+    }
+
+    min_size = FFMIN(max_alloc_size - 32, FFMAX(min_size + min_size / 16 + 32, min_size));
 
     ptr = av_realloc(ptr, min_size);
     /* we could set this to the unmodified min_size but this is safer

diff --git a/libavutil/mem.h b/libavutil/mem.h
index 527cd03..7e0b12a 100644
--- a/libavutil/mem.h
+++ b/libavutil/mem.h

@@ -74,6 +74,19 @@
  */
 
 /**
+ * @def DECLARE_ASM_ALIGNED(n,t,v)
+ * Declare an aligned variable appropriate for use in inline assembly code.
+ *
+ * @code{.c}
+ * DECLARE_ASM_ALIGNED(16, uint64_t, pw_08) = UINT64_C(0x0008000800080008);
+ * @endcode
+ *
+ * @param n Minimum alignment in bytes
+ * @param t Type of the variable (or array element)
+ * @param v Name of the variable
+ */
+
+/**
  * @def DECLARE_ASM_CONST(n,t,v)
  * Declare a static constant aligned variable appropriate for use in inline
  * assembly code.
@@ -89,25 +102,23 @@
 
 #if defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1110 || defined(__SUNPRO_C)
     #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
+    #define DECLARE_ASM_ALIGNED(n,t,v)  t __attribute__ ((aligned (n))) v
     #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (n))) v
-#elif defined(__TI_COMPILER_VERSION__)
-    #define DECLARE_ALIGNED(n,t,v)                      \
-        AV_PRAGMA(DATA_ALIGN(v,n))                      \
-        t __attribute__((aligned(n))) v
-    #define DECLARE_ASM_CONST(n,t,v)                    \
-        AV_PRAGMA(DATA_ALIGN(v,n))                      \
-        static const t __attribute__((aligned(n))) v
 #elif defined(__DJGPP__)
     #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, 16)))) v
+    #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v
     #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v
 #elif defined(__GNUC__) || defined(__clang__)
     #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
+    #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (n))) v
     #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (n))) v
 #elif defined(_MSC_VER)
     #define DECLARE_ALIGNED(n,t,v)      __declspec(align(n)) t v
+    #define DECLARE_ASM_ALIGNED(n,t,v)  __declspec(align(n)) t v
     #define DECLARE_ASM_CONST(n,t,v)    __declspec(align(n)) static const t v
 #else
     #define DECLARE_ALIGNED(n,t,v)      t v
+    #define DECLARE_ASM_ALIGNED(n,t,v)  t v
     #define DECLARE_ASM_CONST(n,t,v)    static const t v
 #endif
 
@@ -206,12 +217,7 @@
  *         be allocated
  * @see av_malloc()
  */
-av_alloc_size(1, 2) static inline void *av_malloc_array(size_t nmemb, size_t size)
-{
-    if (!size || nmemb >= INT_MAX / size)
-        return NULL;
-    return av_malloc(nmemb * size);
-}
+av_alloc_size(1, 2) void *av_malloc_array(size_t nmemb, size_t size);
 
 /**
  * Allocate a memory block for an array with av_mallocz().
@@ -226,12 +232,7 @@
  * @see av_mallocz()
  * @see av_malloc_array()
  */
-av_alloc_size(1, 2) static inline void *av_mallocz_array(size_t nmemb, size_t size)
-{
-    if (!size || nmemb >= INT_MAX / size)
-        return NULL;
-    return av_mallocz(nmemb * size);
-}
+av_alloc_size(1, 2) void *av_mallocz_array(size_t nmemb, size_t size);
 
 /**
  * Non-inlined equivalent of av_mallocz_array().

diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h
index c892529..6a46704 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h

@@ -1088,6 +1088,25 @@
     out_m;                                                \
 } )
 
+#define CLIP_SW_0_255_MAX_SATU(in)                    \
+( {                                                   \
+    v4i32 out_m;                                      \
+                                                      \
+    out_m = __msa_maxi_s_w((v4i32) in, 0);            \
+    out_m = (v4i32) __msa_sat_u_w((v4u32) out_m, 7);  \
+    out_m;                                            \
+} )
+#define CLIP_SW2_0_255_MAX_SATU(in0, in1)  \
+{                                          \
+    in0 = CLIP_SW_0_255_MAX_SATU(in0);     \
+    in1 = CLIP_SW_0_255_MAX_SATU(in1);     \
+}
+#define CLIP_SW4_0_255_MAX_SATU(in0, in1, in2, in3)  \
+{                                                    \
+    CLIP_SW2_0_255_MAX_SATU(in0, in1);               \
+    CLIP_SW2_0_255_MAX_SATU(in2, in3);               \
+}
+
 /* Description : Addition of 4 signed word elements
                  4 signed word elements of input vector are added together and
                  resulted integer sum is returned
@@ -2244,6 +2263,22 @@
     out3 = in6 - in7;                                                         \
 }
 
+/* Description : Sign extend byte elements from right half of the vector
+   Arguments   : Input  - in    (byte vector)
+                 Output - out   (sign extended halfword vector)
+                 Return Type - signed halfword
+   Details     : Sign bit of byte elements from input vector 'in' is
+                 extracted and interleaved with same vector 'in' to generate
+                 8 halfword elements keeping sign intact
+*/
+#define UNPCK_R_SB_SH(in, out)                       \
+{                                                    \
+    v16i8 sign_m;                                    \
+                                                     \
+    sign_m = __msa_clti_s_b((v16i8) in, 0);          \
+    out = (v8i16) __msa_ilvr_b(sign_m, (v16i8) in);  \
+}
+
 /* Description : Sign extend halfword elements from right half of the vector
    Arguments   : Inputs  - in    (input halfword vector)
                  Outputs - out   (sign extended word vectors)

diff --git a/libavutil/mips/mmiutils.h b/libavutil/mips/mmiutils.h
index 491579e..76b1199 100644
--- a/libavutil/mips/mmiutils.h
+++ b/libavutil/mips/mmiutils.h

@@ -201,40 +201,139 @@
 
 #endif /* HAVE_LOONGSON2 */
 
-#define TRANSPOSE_4H(m1, m2, m3, m4, t1, t2, t3, t4, t5, r1, zero, shift) \
-        "li         "#r1",  0x93                                    \n\t" \
-        "xor        "#zero","#zero","#zero"                         \n\t" \
-        "mtc1       "#r1",  "#shift"                                \n\t" \
-        "punpcklhw  "#t1",  "#m1",  "#zero"                         \n\t" \
-        "punpcklhw  "#t5",  "#m2",  "#zero"                         \n\t" \
-        "pshufh     "#t5",  "#t5",  "#shift"                        \n\t" \
-        "or         "#t1",  "#t1",  "#t5"                           \n\t" \
-        "punpckhhw  "#t2",  "#m1",  "#zero"                         \n\t" \
-        "punpckhhw  "#t5",  "#m2",  "#zero"                         \n\t" \
-        "pshufh     "#t5",  "#t5",  "#shift"                        \n\t" \
-        "or         "#t2",  "#t2",  "#t5"                           \n\t" \
-        "punpcklhw  "#t3",  "#m3",  "#zero"                         \n\t" \
-        "punpcklhw  "#t5",  "#m4",  "#zero"                         \n\t" \
-        "pshufh     "#t5",  "#t5",  "#shift"                        \n\t" \
-        "or         "#t3",  "#t3",  "#t5"                           \n\t" \
-        "punpckhhw  "#t4",  "#m3",  "#zero"                         \n\t" \
-        "punpckhhw  "#t5",  "#m4",  "#zero"                         \n\t" \
-        "pshufh     "#t5",  "#t5",  "#shift"                        \n\t" \
-        "or         "#t4",  "#t4",  "#t5"                           \n\t" \
-        "punpcklwd  "#m1",  "#t1",  "#t3"                           \n\t" \
-        "punpckhwd  "#m2",  "#t1",  "#t3"                           \n\t" \
-        "punpcklwd  "#m3",  "#t2",  "#t4"                           \n\t" \
-        "punpckhwd  "#m4",  "#t2",  "#t4"                           \n\t"
+/**
+ * backup register
+ */
+#define BACKUP_REG \
+  double temp_backup_reg[8];                                    \
+  if (_MIPS_SIM == _ABI64)                                      \
+    __asm__ volatile (                                          \
+      "gssqc1       $f25,      $f24,       0x00(%[temp])  \n\t" \
+      "gssqc1       $f27,      $f26,       0x10(%[temp])  \n\t" \
+      "gssqc1       $f29,      $f28,       0x20(%[temp])  \n\t" \
+      "gssqc1       $f31,      $f30,       0x30(%[temp])  \n\t" \
+      :                                                         \
+      : [temp]"r"(temp_backup_reg)                              \
+      : "memory"                                                \
+    );                                                          \
+  else                                                          \
+    __asm__ volatile (                                          \
+      "gssqc1       $f22,      $f20,       0x00(%[temp])  \n\t" \
+      "gssqc1       $f26,      $f24,       0x10(%[temp])  \n\t" \
+      "gssqc1       $f30,      $f28,       0x20(%[temp])  \n\t" \
+      :                                                         \
+      : [temp]"r"(temp_backup_reg)                              \
+      : "memory"                                                \
+    );
 
+/**
+ * recover register
+ */
+#define RECOVER_REG \
+  if (_MIPS_SIM == _ABI64)                                      \
+    __asm__ volatile (                                          \
+      "gslqc1       $f25,      $f24,       0x00(%[temp])  \n\t" \
+      "gslqc1       $f27,      $f26,       0x10(%[temp])  \n\t" \
+      "gslqc1       $f29,      $f28,       0x20(%[temp])  \n\t" \
+      "gslqc1       $f31,      $f30,       0x30(%[temp])  \n\t" \
+      :                                                         \
+      : [temp]"r"(temp_backup_reg)                              \
+      : "memory"                                                \
+    );                                                          \
+  else                                                          \
+    __asm__ volatile (                                          \
+      "gslqc1       $f22,      $f20,       0x00(%[temp])  \n\t" \
+      "gslqc1       $f26,      $f24,       0x10(%[temp])  \n\t" \
+      "gslqc1       $f30,      $f28,       0x20(%[temp])  \n\t" \
+      :                                                         \
+      : [temp]"r"(temp_backup_reg)                              \
+      : "memory"                                                \
+    );
 
-#define PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift)                              \
-        "psrah      "#fp1",     "#fp1",     "#shift"                \n\t"   \
-        "psrah      "#fp2",     "#fp2",     "#shift"                \n\t"   \
-        "psrah      "#fp3",     "#fp3",     "#shift"                \n\t"   \
+/**
+ * brief: Transpose 4X4 half word packaged data.
+ * fr_i0, fr_i1, fr_i2, fr_i3: src & dst
+ * fr_t0, fr_t1, fr_t2, fr_t3: temporary register
+ */
+#define TRANSPOSE_4H(fr_i0, fr_i1, fr_i2, fr_i3,                          \
+                     fr_t0, fr_t1, fr_t2, fr_t3)                          \
+        "punpcklhw  "#fr_t0",   "#fr_i0",   "#fr_i1"                \n\t" \
+        "punpckhhw  "#fr_t1",   "#fr_i0",   "#fr_i1"                \n\t" \
+        "punpcklhw  "#fr_t2",   "#fr_i2",   "#fr_i3"                \n\t" \
+        "punpckhhw  "#fr_t3",   "#fr_i2",   "#fr_i3"                \n\t" \
+        "punpcklwd  "#fr_i0",   "#fr_t0",   "#fr_t2"                \n\t" \
+        "punpckhwd  "#fr_i1",   "#fr_t0",   "#fr_t2"                \n\t" \
+        "punpcklwd  "#fr_i2",   "#fr_t1",   "#fr_t3"                \n\t" \
+        "punpckhwd  "#fr_i3",   "#fr_t1",   "#fr_t3"                \n\t"
+
+/**
+ * brief: Transpose 8x8 byte packaged data.
+ * fr_i0~i7: src & dst
+ * fr_t0~t3: temporary register
+ */
+#define TRANSPOSE_8B(fr_i0, fr_i1, fr_i2, fr_i3, fr_i4, fr_i5,            \
+                     fr_i6, fr_i7, fr_t0, fr_t1, fr_t2, fr_t3)            \
+        "punpcklbh  "#fr_t0",   "#fr_i0",   "#fr_i1"                \n\t" \
+        "punpckhbh  "#fr_t1",   "#fr_i0",   "#fr_i1"                \n\t" \
+        "punpcklbh  "#fr_t2",   "#fr_i2",   "#fr_i3"                \n\t" \
+        "punpckhbh  "#fr_t3",   "#fr_i2",   "#fr_i3"                \n\t" \
+        "punpcklbh  "#fr_i0",   "#fr_i4",   "#fr_i5"                \n\t" \
+        "punpckhbh  "#fr_i1",   "#fr_i4",   "#fr_i5"                \n\t" \
+        "punpcklbh  "#fr_i2",   "#fr_i6",   "#fr_i7"                \n\t" \
+        "punpckhbh  "#fr_i3",   "#fr_i6",   "#fr_i7"                \n\t" \
+        "punpcklhw  "#fr_i4",   "#fr_t0",   "#fr_t2"                \n\t" \
+        "punpckhhw  "#fr_i5",   "#fr_t0",   "#fr_t2"                \n\t" \
+        "punpcklhw  "#fr_i6",   "#fr_t1",   "#fr_t3"                \n\t" \
+        "punpckhhw  "#fr_i7",   "#fr_t1",   "#fr_t3"                \n\t" \
+        "punpcklhw  "#fr_t0",   "#fr_i0",   "#fr_i2"                \n\t" \
+        "punpckhhw  "#fr_t1",   "#fr_i0",   "#fr_i2"                \n\t" \
+        "punpcklhw  "#fr_t2",   "#fr_i1",   "#fr_i3"                \n\t" \
+        "punpckhhw  "#fr_t3",   "#fr_i1",   "#fr_i3"                \n\t" \
+        "punpcklwd  "#fr_i0",   "#fr_i4",   "#fr_t0"                \n\t" \
+        "punpckhwd  "#fr_i1",   "#fr_i4",   "#fr_t0"                \n\t" \
+        "punpcklwd  "#fr_i2",   "#fr_i5",   "#fr_t1"                \n\t" \
+        "punpckhwd  "#fr_i3",   "#fr_i5",   "#fr_t1"                \n\t" \
+        "punpcklwd  "#fr_i4",   "#fr_i6",   "#fr_t2"                \n\t" \
+        "punpckhwd  "#fr_i5",   "#fr_i6",   "#fr_t2"                \n\t" \
+        "punpcklwd  "#fr_i6",   "#fr_i7",   "#fr_t3"                \n\t" \
+        "punpckhwd  "#fr_i7",   "#fr_i7",   "#fr_t3"                \n\t"
+
+/**
+ * brief: Parallel SRA for 8 byte packaged data.
+ * fr_i0: src
+ * fr_i1: SRA number(SRAB number + 8)
+ * fr_t0, fr_t1: temporary register
+ * fr_d0: dst
+ */
+#define PSRAB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0)                      \
+        "punpcklbh    "#fr_t0",   "#fr_t0",   "#fr_i0"              \n\t" \
+        "punpckhbh    "#fr_t1",   "#fr_t1",   "#fr_i0"              \n\t" \
+        "psrah        "#fr_t0",   "#fr_t0",   "#fr_i1"              \n\t" \
+        "psrah        "#fr_t1",   "#fr_t1",   "#fr_i1"              \n\t" \
+        "packsshb     "#fr_d0",   "#fr_t0",   "#fr_t1"              \n\t"
+
+/**
+ * brief: Parallel SRL for 8 byte packaged data.
+ * fr_i0: src
+ * fr_i1: SRL number(SRLB number + 8)
+ * fr_t0, fr_t1: temporary register
+ * fr_d0: dst
+ */
+#define PSRLB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0)                      \
+        "punpcklbh    "#fr_t0",   "#fr_t0",   "#fr_i0"              \n\t" \
+        "punpckhbh    "#fr_t1",   "#fr_t1",   "#fr_i0"              \n\t" \
+        "psrlh        "#fr_t0",   "#fr_t0",   "#fr_i1"              \n\t" \
+        "psrlh        "#fr_t1",   "#fr_t1",   "#fr_i1"              \n\t" \
+        "packsshb     "#fr_d0",   "#fr_t0",   "#fr_t1"              \n\t"
+
+#define PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift)                            \
+        "psrah      "#fp1",     "#fp1",     "#shift"                \n\t" \
+        "psrah      "#fp2",     "#fp2",     "#shift"                \n\t" \
+        "psrah      "#fp3",     "#fp3",     "#shift"                \n\t" \
         "psrah      "#fp4",     "#fp4",     "#shift"                \n\t"
 
-#define PSRAH_8_MMI(fp1, fp2, fp3, fp4, fp5, fp6, fp7, fp8, shift)          \
-        PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift)                              \
+#define PSRAH_8_MMI(fp1, fp2, fp3, fp4, fp5, fp6, fp7, fp8, shift)        \
+        PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift)                            \
         PSRAH_4_MMI(fp5, fp6, fp7, fp8, shift)
 
 

diff --git a/libavutil/murmur3.c b/libavutil/murmur3.c
index 4271e01..7961752 100644
--- a/libavutil/murmur3.c
+++ b/libavutil/murmur3.c

@@ -60,7 +60,7 @@
     return k;
 }
 
-static uint64_t inline get_k2(const uint8_t *src)
+static inline uint64_t get_k2(const uint8_t *src)
 {
     uint64_t k = AV_RL64(src + 8);
     k *= c2;
@@ -69,7 +69,7 @@
     return k;
 }
 
-static uint64_t inline update_h1(uint64_t k, uint64_t h1, uint64_t h2)
+static inline uint64_t update_h1(uint64_t k, uint64_t h1, uint64_t h2)
 {
     k ^= h1;
     k = ROT(k, 27);
@@ -79,7 +79,7 @@
     return k;
 }
 
-static uint64_t inline update_h2(uint64_t k, uint64_t h1, uint64_t h2)
+static inline uint64_t update_h2(uint64_t k, uint64_t h1, uint64_t h2)
 {
     k ^= h2;
     k = ROT(k, 31);
@@ -89,7 +89,11 @@
     return k;
 }
 
+#if FF_API_CRYPTO_SIZE_T
 void av_murmur3_update(AVMurMur3 *c, const uint8_t *src, int len)
+#else
+void av_murmur3_update(AVMurMur3 *c, const uint8_t *src, size_t len)
+#endif
 {
     const uint8_t *end;
     uint64_t h1 = c->h1, h2 = c->h2;

diff --git a/libavutil/murmur3.h b/libavutil/murmur3.h
index 6a1694c..1b09175 100644
--- a/libavutil/murmur3.h
+++ b/libavutil/murmur3.h

@@ -29,6 +29,8 @@
 
 #include <stdint.h>
 
+#include "version.h"
+
 /**
  * @defgroup lavu_murmur3 Murmur3
  * @ingroup lavu_hash
@@ -97,7 +99,11 @@
  * @param[in]  src  Input data to update hash with
  * @param[in]  len  Number of bytes to read from `src`
  */
+#if FF_API_CRYPTO_SIZE_T
 void av_murmur3_update(struct AVMurMur3 *c, const uint8_t *src, int len);
+#else
+void av_murmur3_update(struct AVMurMur3 *c, const uint8_t *src, size_t len);
+#endif
 
 /**
  * Finish hashing and output digest value.

diff --git a/libavutil/opencl.c b/libavutil/opencl.c
deleted file mode 100644
index 2027565..0000000
--- a/libavutil/opencl.c
+++ /dev/null

@@ -1,875 +0,0 @@
-/*
- * Copyright (C) 2012 Peng  Gao     <peng@multicorewareinc.com>
- * Copyright (C) 2012 Li    Cao     <li@multicorewareinc.com>
- * Copyright (C) 2012 Wei   Gao     <weigao@multicorewareinc.com>
- * Copyright (C) 2013 Lenny Wang    <lwanghpc@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "opencl.h"
-#include "avstring.h"
-#include "log.h"
-#include "avassert.h"
-#include "opt.h"
-
-#if HAVE_THREADS
-#include "thread.h"
-#include "atomic.h"
-
-static pthread_mutex_t * volatile atomic_opencl_lock = NULL;
-#define LOCK_OPENCL pthread_mutex_lock(atomic_opencl_lock)
-#define UNLOCK_OPENCL pthread_mutex_unlock(atomic_opencl_lock)
-#else
-#define LOCK_OPENCL
-#define UNLOCK_OPENCL
-#endif
-
-#define MAX_KERNEL_CODE_NUM 200
-
-typedef struct {
-    int is_compiled;
-    const char *kernel_string;
-} KernelCode;
-
-typedef struct {
-    const AVClass *class;
-    int log_offset;
-    void *log_ctx;
-    int init_count;
-    int opt_init_flag;
-     /**
-     * if set to 1, the OpenCL environment was created by the user and
-     * passed as AVOpenCLExternalEnv when initing ,0:created by opencl wrapper.
-     */
-    int is_user_created;
-    int platform_idx;
-    int device_idx;
-    cl_platform_id platform_id;
-    cl_device_type device_type;
-    cl_context context;
-    cl_device_id device_id;
-    cl_command_queue command_queue;
-    int kernel_code_count;
-    KernelCode kernel_code[MAX_KERNEL_CODE_NUM];
-    AVOpenCLDeviceList device_list;
-} OpenclContext;
-
-#define OFFSET(x) offsetof(OpenclContext, x)
-
-static const AVOption opencl_options[] = {
-     { "platform_idx",        "set platform index value",  OFFSET(platform_idx),  AV_OPT_TYPE_INT,    {.i64=-1}, -1, INT_MAX},
-     { "device_idx",          "set device index value",    OFFSET(device_idx),    AV_OPT_TYPE_INT,    {.i64=-1}, -1, INT_MAX},
-     { NULL }
-};
-
-static const AVClass openclutils_class = {
-    .class_name                = "opencl",
-    .option                    = opencl_options,
-    .item_name                 = av_default_item_name,
-    .version                   = LIBAVUTIL_VERSION_INT,
-    .log_level_offset_offset   = offsetof(OpenclContext, log_offset),
-    .parent_log_context_offset = offsetof(OpenclContext, log_ctx),
-};
-
-static OpenclContext opencl_ctx = {&openclutils_class};
-
-static const cl_device_type device_type[] = {CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_CPU};
-
-typedef struct {
-    int err_code;
-    const char *err_str;
-} OpenclErrorMsg;
-
-static const OpenclErrorMsg opencl_err_msg[] = {
-    {CL_DEVICE_NOT_FOUND,                               "DEVICE NOT FOUND"},
-    {CL_DEVICE_NOT_AVAILABLE,                           "DEVICE NOT AVAILABLE"},
-    {CL_COMPILER_NOT_AVAILABLE,                         "COMPILER NOT AVAILABLE"},
-    {CL_MEM_OBJECT_ALLOCATION_FAILURE,                  "MEM OBJECT ALLOCATION FAILURE"},
-    {CL_OUT_OF_RESOURCES,                               "OUT OF RESOURCES"},
-    {CL_OUT_OF_HOST_MEMORY,                             "OUT OF HOST MEMORY"},
-    {CL_PROFILING_INFO_NOT_AVAILABLE,                   "PROFILING INFO NOT AVAILABLE"},
-    {CL_MEM_COPY_OVERLAP,                               "MEM COPY OVERLAP"},
-    {CL_IMAGE_FORMAT_MISMATCH,                          "IMAGE FORMAT MISMATCH"},
-    {CL_IMAGE_FORMAT_NOT_SUPPORTED,                     "IMAGE FORMAT NOT_SUPPORTED"},
-    {CL_BUILD_PROGRAM_FAILURE,                          "BUILD PROGRAM FAILURE"},
-    {CL_MAP_FAILURE,                                    "MAP FAILURE"},
-    {CL_MISALIGNED_SUB_BUFFER_OFFSET,                   "MISALIGNED SUB BUFFER OFFSET"},
-    {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST,      "EXEC STATUS ERROR FOR EVENTS IN WAIT LIST"},
-    {CL_COMPILE_PROGRAM_FAILURE,                        "COMPILE PROGRAM FAILURE"},
-    {CL_LINKER_NOT_AVAILABLE,                           "LINKER NOT AVAILABLE"},
-    {CL_LINK_PROGRAM_FAILURE,                           "LINK PROGRAM FAILURE"},
-    {CL_DEVICE_PARTITION_FAILED,                        "DEVICE PARTITION FAILED"},
-    {CL_KERNEL_ARG_INFO_NOT_AVAILABLE,                  "KERNEL ARG INFO NOT AVAILABLE"},
-    {CL_INVALID_VALUE,                                  "INVALID VALUE"},
-    {CL_INVALID_DEVICE_TYPE,                            "INVALID DEVICE TYPE"},
-    {CL_INVALID_PLATFORM,                               "INVALID PLATFORM"},
-    {CL_INVALID_DEVICE,                                 "INVALID DEVICE"},
-    {CL_INVALID_CONTEXT,                                "INVALID CONTEXT"},
-    {CL_INVALID_QUEUE_PROPERTIES,                       "INVALID QUEUE PROPERTIES"},
-    {CL_INVALID_COMMAND_QUEUE,                          "INVALID COMMAND QUEUE"},
-    {CL_INVALID_HOST_PTR,                               "INVALID HOST PTR"},
-    {CL_INVALID_MEM_OBJECT,                             "INVALID MEM OBJECT"},
-    {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,                "INVALID IMAGE FORMAT DESCRIPTOR"},
-    {CL_INVALID_IMAGE_SIZE,                             "INVALID IMAGE SIZE"},
-    {CL_INVALID_SAMPLER,                                "INVALID SAMPLER"},
-    {CL_INVALID_BINARY,                                 "INVALID BINARY"},
-    {CL_INVALID_BUILD_OPTIONS,                          "INVALID BUILD OPTIONS"},
-    {CL_INVALID_PROGRAM,                                "INVALID PROGRAM"},
-    {CL_INVALID_PROGRAM_EXECUTABLE,                     "INVALID PROGRAM EXECUTABLE"},
-    {CL_INVALID_KERNEL_NAME,                            "INVALID KERNEL NAME"},
-    {CL_INVALID_KERNEL_DEFINITION,                      "INVALID KERNEL DEFINITION"},
-    {CL_INVALID_KERNEL,                                 "INVALID KERNEL"},
-    {CL_INVALID_ARG_INDEX,                              "INVALID ARG INDEX"},
-    {CL_INVALID_ARG_VALUE,                              "INVALID ARG VALUE"},
-    {CL_INVALID_ARG_SIZE,                               "INVALID ARG_SIZE"},
-    {CL_INVALID_KERNEL_ARGS,                            "INVALID KERNEL ARGS"},
-    {CL_INVALID_WORK_DIMENSION,                         "INVALID WORK DIMENSION"},
-    {CL_INVALID_WORK_GROUP_SIZE,                        "INVALID WORK GROUP SIZE"},
-    {CL_INVALID_WORK_ITEM_SIZE,                         "INVALID WORK ITEM SIZE"},
-    {CL_INVALID_GLOBAL_OFFSET,                          "INVALID GLOBAL OFFSET"},
-    {CL_INVALID_EVENT_WAIT_LIST,                        "INVALID EVENT WAIT LIST"},
-    {CL_INVALID_EVENT,                                  "INVALID EVENT"},
-    {CL_INVALID_OPERATION,                              "INVALID OPERATION"},
-    {CL_INVALID_GL_OBJECT,                              "INVALID GL OBJECT"},
-    {CL_INVALID_BUFFER_SIZE,                            "INVALID BUFFER SIZE"},
-    {CL_INVALID_MIP_LEVEL,                              "INVALID MIP LEVEL"},
-    {CL_INVALID_GLOBAL_WORK_SIZE,                       "INVALID GLOBAL WORK SIZE"},
-    {CL_INVALID_PROPERTY,                               "INVALID PROPERTY"},
-    {CL_INVALID_IMAGE_DESCRIPTOR,                       "INVALID IMAGE DESCRIPTOR"},
-    {CL_INVALID_COMPILER_OPTIONS,                       "INVALID COMPILER OPTIONS"},
-    {CL_INVALID_LINKER_OPTIONS,                         "INVALID LINKER OPTIONS"},
-    {CL_INVALID_DEVICE_PARTITION_COUNT,                 "INVALID DEVICE PARTITION COUNT"},
-};
-
-const char *av_opencl_errstr(cl_int status)
-{
-    int i;
-    for (i = 0; i < FF_ARRAY_ELEMS(opencl_err_msg); i++) {
-        if (opencl_err_msg[i].err_code == status)
-            return opencl_err_msg[i].err_str;
-    }
-    return "unknown error";
-}
-
-static void free_device_list(AVOpenCLDeviceList *device_list)
-{
-    int i, j;
-    if (!device_list || !device_list->platform_node)
-        return;
-    for (i = 0; i < device_list->platform_num; i++) {
-        if (!device_list->platform_node[i])
-            continue;
-        for (j = 0; j < device_list->platform_node[i]->device_num; j++) {
-            av_freep(&(device_list->platform_node[i]->device_node[j]->device_name));
-            av_freep(&(device_list->platform_node[i]->device_node[j]));
-        }
-        av_freep(&device_list->platform_node[i]->device_node);
-        av_freep(&(device_list->platform_node[i]->platform_name));
-        av_freep(&device_list->platform_node[i]);
-    }
-    av_freep(&device_list->platform_node);
-    device_list->platform_num = 0;
-}
-
-static int get_device_list(AVOpenCLDeviceList *device_list)
-{
-    cl_int status;
-    int i, j, k, device_num, total_devices_num, ret = 0;
-    int *devices_num;
-    cl_platform_id *platform_ids = NULL;
-    cl_device_id *device_ids = NULL;
-    AVOpenCLDeviceNode *device_node = NULL;
-    size_t platform_name_size = 0;
-    size_t device_name_size = 0;
-    status = clGetPlatformIDs(0, NULL, &device_list->platform_num);
-    if (status != CL_SUCCESS) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Could not get OpenCL platform ids: %s\n", av_opencl_errstr(status));
-        return AVERROR_EXTERNAL;
-    }
-    platform_ids = av_mallocz_array(device_list->platform_num, sizeof(cl_platform_id));
-    if (!platform_ids)
-        return AVERROR(ENOMEM);
-    status = clGetPlatformIDs(device_list->platform_num, platform_ids, NULL);
-    if (status != CL_SUCCESS) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-                "Could not get OpenCL platform ids: %s\n", av_opencl_errstr(status));
-        ret = AVERROR_EXTERNAL;
-        goto end;
-    }
-    device_list->platform_node = av_mallocz_array(device_list->platform_num, sizeof(AVOpenCLPlatformNode *));
-    if (!device_list->platform_node) {
-        ret = AVERROR(ENOMEM);
-        goto end;
-    }
-    devices_num = av_mallocz(sizeof(int) * FF_ARRAY_ELEMS(device_type));
-    if (!devices_num) {
-        ret = AVERROR(ENOMEM);
-        goto end;
-    }
-    for (i = 0; i < device_list->platform_num; i++) {
-        device_list->platform_node[i] = av_mallocz(sizeof(AVOpenCLPlatformNode));
-        if (!device_list->platform_node[i]) {
-            ret = AVERROR(ENOMEM);
-            goto end;
-        }
-        device_list->platform_node[i]->platform_id = platform_ids[i];
-        status = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR,
-                                   0, NULL, &platform_name_size);
-        if (status != CL_SUCCESS) {
-            av_log(&opencl_ctx, AV_LOG_WARNING,
-                    "Could not get size of platform name: %s\n", av_opencl_errstr(status));
-        } else {
-            device_list->platform_node[i]->platform_name = av_malloc(platform_name_size * sizeof(char));
-            if (!device_list->platform_node[i]->platform_name) {
-                av_log(&opencl_ctx, AV_LOG_WARNING,
-                        "Could not allocate memory for device name: %s\n", av_opencl_errstr(status));
-            } else {
-                status = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR,
-                                           platform_name_size * sizeof(char),
-                                           device_list->platform_node[i]->platform_name, NULL);
-                if (status != CL_SUCCESS) {
-                    av_log(&opencl_ctx, AV_LOG_WARNING,
-                            "Could not get platform name: %s\n", av_opencl_errstr(status));
-                }
-            }
-        }
-        total_devices_num = 0;
-        for (j = 0; j < FF_ARRAY_ELEMS(device_type); j++) {
-            status = clGetDeviceIDs(device_list->platform_node[i]->platform_id,
-                                    device_type[j], 0, NULL, &devices_num[j]);
-            total_devices_num += devices_num[j];
-        }
-        device_list->platform_node[i]->device_node = av_mallocz_array(total_devices_num, sizeof(AVOpenCLDeviceNode *));
-        if (!device_list->platform_node[i]->device_node) {
-            ret = AVERROR(ENOMEM);
-            goto end;
-        }
-        for (j = 0; j < FF_ARRAY_ELEMS(device_type); j++) {
-            if (devices_num[j]) {
-                device_ids = av_mallocz_array(devices_num[j], sizeof(cl_device_id));
-                if (!device_ids) {
-                    ret = AVERROR(ENOMEM);
-                    goto end;
-                }
-                status = clGetDeviceIDs(device_list->platform_node[i]->platform_id, device_type[j],
-                                        devices_num[j], device_ids, NULL);
-                if (status != CL_SUCCESS) {
-                    av_log(&opencl_ctx, AV_LOG_WARNING,
-                            "Could not get device ID: %s:\n", av_opencl_errstr(status));
-                    av_freep(&device_ids);
-                    continue;
-                }
-                for (k = 0; k < devices_num[j]; k++) {
-                    device_num = device_list->platform_node[i]->device_num;
-                    device_list->platform_node[i]->device_node[device_num] = av_mallocz(sizeof(AVOpenCLDeviceNode));
-                    if (!device_list->platform_node[i]->device_node[device_num]) {
-                        ret = AVERROR(ENOMEM);
-                        goto end;
-                    }
-                    device_node = device_list->platform_node[i]->device_node[device_num];
-                    device_node->device_id = device_ids[k];
-                    device_node->device_type = device_type[j];
-                    status = clGetDeviceInfo(device_node->device_id, CL_DEVICE_NAME,
-                                             0, NULL, &device_name_size);
-                    if (status != CL_SUCCESS) {
-                        av_log(&opencl_ctx, AV_LOG_WARNING,
-                                "Could not get size of device name: %s\n", av_opencl_errstr(status));
-                        continue;
-                    }
-                    device_node->device_name = av_malloc(device_name_size * sizeof(char));
-                    if (!device_node->device_name) {
-                        av_log(&opencl_ctx, AV_LOG_WARNING,
-                                "Could not allocate memory for device name: %s\n", av_opencl_errstr(status));
-                        continue;
-                    }
-                    status = clGetDeviceInfo(device_node->device_id, CL_DEVICE_NAME,
-                                             device_name_size * sizeof(char),
-                                             device_node->device_name, NULL);
-                    if (status != CL_SUCCESS) {
-                        av_log(&opencl_ctx, AV_LOG_WARNING,
-                                "Could not get device name: %s\n", av_opencl_errstr(status));
-                        continue;
-                    }
-                    device_list->platform_node[i]->device_num++;
-                }
-                av_freep(&device_ids);
-            }
-        }
-    }
-end:
-    av_freep(&platform_ids);
-    av_freep(&devices_num);
-    av_freep(&device_ids);
-    if (ret < 0)
-        free_device_list(device_list);
-    return ret;
-}
-
-int av_opencl_get_device_list(AVOpenCLDeviceList **device_list)
-{
-    int ret = 0;
-    *device_list = av_mallocz(sizeof(AVOpenCLDeviceList));
-    if (!(*device_list)) {
-        av_log(&opencl_ctx, AV_LOG_ERROR, "Could not allocate opencl device list\n");
-        return AVERROR(ENOMEM);
-    }
-    ret = get_device_list(*device_list);
-    if (ret < 0) {
-        av_log(&opencl_ctx, AV_LOG_ERROR, "Could not get device list from environment\n");
-        free_device_list(*device_list);
-        av_freep(device_list);
-        return ret;
-    }
-    return ret;
-}
-
-void av_opencl_free_device_list(AVOpenCLDeviceList **device_list)
-{
-    free_device_list(*device_list);
-    av_freep(device_list);
-}
-
-static inline int init_opencl_mtx(void)
-{
-#if HAVE_THREADS
-    if (!atomic_opencl_lock) {
-        int err;
-        pthread_mutex_t *tmp = av_malloc(sizeof(pthread_mutex_t));
-        if (!tmp)
-            return AVERROR(ENOMEM);
-        if ((err = pthread_mutex_init(tmp, NULL))) {
-            av_free(tmp);
-            return AVERROR(err);
-        }
-        if (avpriv_atomic_ptr_cas((void * volatile *)&atomic_opencl_lock, NULL, tmp)) {
-            pthread_mutex_destroy(tmp);
-            av_free(tmp);
-        }
-    }
-#endif
-    return 0;
-}
-
-int av_opencl_set_option(const char *key, const char *val)
-{
-    int ret = init_opencl_mtx( );
-    if (ret < 0)
-        return ret;
-    LOCK_OPENCL;
-    if (!opencl_ctx.opt_init_flag) {
-        av_opt_set_defaults(&opencl_ctx);
-        opencl_ctx.opt_init_flag = 1;
-    }
-    ret = av_opt_set(&opencl_ctx, key, val, 0);
-    UNLOCK_OPENCL;
-    return ret;
-}
-
-int av_opencl_get_option(const char *key, uint8_t **out_val)
-{
-    int ret = 0;
-    LOCK_OPENCL;
-    ret = av_opt_get(&opencl_ctx, key, 0, out_val);
-    UNLOCK_OPENCL;
-    return ret;
-}
-
-void av_opencl_free_option(void)
-{
-    /*FIXME: free openclutils context*/
-    LOCK_OPENCL;
-    av_opt_free(&opencl_ctx);
-    UNLOCK_OPENCL;
-}
-
-AVOpenCLExternalEnv *av_opencl_alloc_external_env(void)
-{
-    AVOpenCLExternalEnv *ext = av_mallocz(sizeof(AVOpenCLExternalEnv));
-    if (!ext) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Could not malloc external opencl environment data space\n");
-    }
-    return ext;
-}
-
-void av_opencl_free_external_env(AVOpenCLExternalEnv **ext_opencl_env)
-{
-    av_freep(ext_opencl_env);
-}
-
-int av_opencl_register_kernel_code(const char *kernel_code)
-{
-    int i, ret = init_opencl_mtx( );
-    if (ret < 0)
-        return ret;
-    LOCK_OPENCL;
-    if (opencl_ctx.kernel_code_count >= MAX_KERNEL_CODE_NUM) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Could not register kernel code, maximum number of registered kernel code %d already reached\n",
-               MAX_KERNEL_CODE_NUM);
-        ret = AVERROR(EINVAL);
-        goto end;
-    }
-    for (i = 0; i < opencl_ctx.kernel_code_count; i++) {
-        if (opencl_ctx.kernel_code[i].kernel_string == kernel_code) {
-            av_log(&opencl_ctx, AV_LOG_WARNING, "Same kernel code has been registered\n");
-            goto end;
-        }
-    }
-    opencl_ctx.kernel_code[opencl_ctx.kernel_code_count].kernel_string = kernel_code;
-    opencl_ctx.kernel_code[opencl_ctx.kernel_code_count].is_compiled = 0;
-    opencl_ctx.kernel_code_count++;
-end:
-    UNLOCK_OPENCL;
-    return ret;
-}
-
-cl_program av_opencl_compile(const char *program_name, const char *build_opts)
-{
-    int i;
-    cl_int status, build_status;
-    int kernel_code_idx = 0;
-    const char *kernel_source = NULL;
-    size_t kernel_code_len;
-    char* ptr = NULL;
-    cl_program program = NULL;
-    size_t log_size;
-    char *log = NULL;
-
-    LOCK_OPENCL;
-    for (i = 0; i < opencl_ctx.kernel_code_count; i++) {
-        // identify a program using a unique name within the kernel source
-        ptr = av_stristr(opencl_ctx.kernel_code[i].kernel_string, program_name);
-        if (ptr && !opencl_ctx.kernel_code[i].is_compiled) {
-            kernel_source = opencl_ctx.kernel_code[i].kernel_string;
-            kernel_code_len = strlen(opencl_ctx.kernel_code[i].kernel_string);
-            kernel_code_idx = i;
-            break;
-        }
-    }
-    if (!kernel_source) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Unable to find OpenCL kernel source '%s'\n", program_name);
-        goto end;
-    }
-
-    /* create a CL program from kernel source */
-    program = clCreateProgramWithSource(opencl_ctx.context, 1, &kernel_source, &kernel_code_len, &status);
-    if(status != CL_SUCCESS) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Unable to create OpenCL program '%s': %s\n", program_name, av_opencl_errstr(status));
-        program = NULL;
-        goto end;
-    }
-
-    build_status = clBuildProgram(program, 1, &(opencl_ctx.device_id), build_opts, NULL, NULL);
-    status = clGetProgramBuildInfo(program, opencl_ctx.device_id,
-                                   CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
-    if (status != CL_SUCCESS) {
-        av_log(&opencl_ctx, AV_LOG_WARNING,
-               "Failed to get compilation log: %s\n",
-               av_opencl_errstr(status));
-    } else {
-        log = av_malloc(log_size);
-        if (log) {
-            status = clGetProgramBuildInfo(program, opencl_ctx.device_id,
-                                           CL_PROGRAM_BUILD_LOG, log_size,
-                                           log, NULL);
-            if (status != CL_SUCCESS) {
-                av_log(&opencl_ctx, AV_LOG_WARNING,
-                       "Failed to get compilation log: %s\n",
-                       av_opencl_errstr(status));
-            } else {
-                int level = build_status == CL_SUCCESS ? AV_LOG_DEBUG :
-                                                         AV_LOG_ERROR;
-                av_log(&opencl_ctx, level, "Compilation log:\n%s\n", log);
-            }
-        }
-        av_freep(&log);
-    }
-    if (build_status != CL_SUCCESS) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Compilation failed with OpenCL program '%s': %s\n",
-               program_name, av_opencl_errstr(build_status));
-        program = NULL;
-        goto end;
-    }
-
-    opencl_ctx.kernel_code[kernel_code_idx].is_compiled = 1;
-end:
-    UNLOCK_OPENCL;
-    return program;
-}
-
-cl_command_queue av_opencl_get_command_queue(void)
-{
-    return opencl_ctx.command_queue;
-}
-
-static int init_opencl_env(OpenclContext *opencl_ctx, AVOpenCLExternalEnv *ext_opencl_env)
-{
-    cl_int status;
-    cl_context_properties cps[3];
-    int i, ret = 0;
-    AVOpenCLDeviceNode *device_node = NULL;
-
-    if (ext_opencl_env) {
-        if (opencl_ctx->is_user_created)
-            return 0;
-        opencl_ctx->platform_id     = ext_opencl_env->platform_id;
-        opencl_ctx->is_user_created = 1;
-        opencl_ctx->command_queue   = ext_opencl_env->command_queue;
-        opencl_ctx->context         = ext_opencl_env->context;
-        opencl_ctx->device_id       = ext_opencl_env->device_id;
-        opencl_ctx->device_type     = ext_opencl_env->device_type;
-    } else {
-        if (!opencl_ctx->is_user_created) {
-            if (!opencl_ctx->device_list.platform_num) {
-                ret = get_device_list(&opencl_ctx->device_list);
-                if (ret < 0) {
-                    return ret;
-                }
-            }
-            if (opencl_ctx->platform_idx >= 0) {
-                if (opencl_ctx->device_list.platform_num < opencl_ctx->platform_idx + 1) {
-                    av_log(opencl_ctx, AV_LOG_ERROR, "User set platform index not exist\n");
-                    return AVERROR(EINVAL);
-                }
-                if (!opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_num) {
-                    av_log(opencl_ctx, AV_LOG_ERROR, "No devices in user specific platform with index %d\n",
-                           opencl_ctx->platform_idx);
-                    return AVERROR(EINVAL);
-                }
-                opencl_ctx->platform_id = opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->platform_id;
-            } else {
-                /* get a usable platform by default*/
-                for (i = 0; i < opencl_ctx->device_list.platform_num; i++) {
-                    if (opencl_ctx->device_list.platform_node[i]->device_num) {
-                        opencl_ctx->platform_id = opencl_ctx->device_list.platform_node[i]->platform_id;
-                        opencl_ctx->platform_idx = i;
-                        break;
-                    }
-                }
-            }
-            if (!opencl_ctx->platform_id) {
-                av_log(opencl_ctx, AV_LOG_ERROR, "Could not get OpenCL platforms\n");
-                return AVERROR_EXTERNAL;
-            }
-            /* get a usable device*/
-            if (opencl_ctx->device_idx >= 0) {
-                if (opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_num < opencl_ctx->device_idx + 1) {
-                    av_log(opencl_ctx, AV_LOG_ERROR,
-                           "Could not get OpenCL device idx %d in the user set platform\n", opencl_ctx->platform_idx);
-                    return AVERROR(EINVAL);
-                }
-            } else {
-                opencl_ctx->device_idx = 0;
-            }
-
-            device_node = opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_node[opencl_ctx->device_idx];
-            opencl_ctx->device_id = device_node->device_id;
-            opencl_ctx->device_type = device_node->device_type;
-
-            /*
-             * Use available platform.
-             */
-            av_log(opencl_ctx, AV_LOG_VERBOSE, "Platform Name: %s, Device Name: %s\n",
-                   opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->platform_name,
-                   device_node->device_name);
-            cps[0] = CL_CONTEXT_PLATFORM;
-            cps[1] = (cl_context_properties)opencl_ctx->platform_id;
-            cps[2] = 0;
-
-            opencl_ctx->context = clCreateContextFromType(cps, opencl_ctx->device_type,
-                                                       NULL, NULL, &status);
-            if (status != CL_SUCCESS) {
-                av_log(opencl_ctx, AV_LOG_ERROR,
-                       "Could not get OpenCL context from device type: %s\n", av_opencl_errstr(status));
-                return AVERROR_EXTERNAL;
-            }
-            opencl_ctx->command_queue = clCreateCommandQueue(opencl_ctx->context, opencl_ctx->device_id,
-                                                          0, &status);
-            if (status != CL_SUCCESS) {
-                av_log(opencl_ctx, AV_LOG_ERROR,
-                       "Could not create OpenCL command queue: %s\n", av_opencl_errstr(status));
-                return AVERROR_EXTERNAL;
-            }
-        }
-    }
-    return ret;
-}
-
-int av_opencl_init(AVOpenCLExternalEnv *ext_opencl_env)
-{
-    int ret = init_opencl_mtx( );
-    if (ret < 0)
-        return ret;
-    LOCK_OPENCL;
-    if (!opencl_ctx.init_count) {
-        if (!opencl_ctx.opt_init_flag) {
-            av_opt_set_defaults(&opencl_ctx);
-            opencl_ctx.opt_init_flag = 1;
-        }
-        ret = init_opencl_env(&opencl_ctx, ext_opencl_env);
-        if (ret < 0)
-            goto end;
-        if (opencl_ctx.kernel_code_count <= 0) {
-            av_log(&opencl_ctx, AV_LOG_ERROR,
-                   "No kernel code is registered, compile kernel file failed\n");
-            ret = AVERROR(EINVAL);
-            goto end;
-        }
-    }
-    opencl_ctx.init_count++;
-end:
-    UNLOCK_OPENCL;
-    return ret;
-}
-
-void av_opencl_uninit(void)
-{
-    int i;
-    cl_int status;
-    LOCK_OPENCL;
-    opencl_ctx.init_count--;
-    if (opencl_ctx.is_user_created)
-        goto end;
-    if (opencl_ctx.init_count > 0)
-        goto end;
-    if (opencl_ctx.command_queue) {
-        status = clReleaseCommandQueue(opencl_ctx.command_queue);
-        if (status != CL_SUCCESS) {
-            av_log(&opencl_ctx, AV_LOG_ERROR,
-                   "Could not release OpenCL command queue: %s\n", av_opencl_errstr(status));
-        }
-        opencl_ctx.command_queue = NULL;
-    }
-    if (opencl_ctx.context) {
-        status = clReleaseContext(opencl_ctx.context);
-        if (status != CL_SUCCESS) {
-            av_log(&opencl_ctx, AV_LOG_ERROR,
-                   "Could not release OpenCL context: %s\n", av_opencl_errstr(status));
-        }
-        opencl_ctx.context = NULL;
-    }
-    for (i = 0; i < opencl_ctx.kernel_code_count; i++) {
-        opencl_ctx.kernel_code[i].is_compiled = 0;
-    }
-    free_device_list(&opencl_ctx.device_list);
-end:
-    if (opencl_ctx.init_count <= 0)
-        av_opt_free(&opencl_ctx); //FIXME: free openclutils context
-    UNLOCK_OPENCL;
-}
-
-int av_opencl_buffer_create(cl_mem *cl_buf, size_t cl_buf_size, int flags, void *host_ptr)
-{
-    cl_int status;
-    *cl_buf = clCreateBuffer(opencl_ctx.context, flags, cl_buf_size, host_ptr, &status);
-    if (status != CL_SUCCESS) {
-        av_log(&opencl_ctx, AV_LOG_ERROR, "Could not create OpenCL buffer: %s\n", av_opencl_errstr(status));
-        return AVERROR_EXTERNAL;
-    }
-    return 0;
-}
-
-void av_opencl_buffer_release(cl_mem *cl_buf)
-{
-    cl_int status = 0;
-    if (!cl_buf)
-        return;
-    status = clReleaseMemObject(*cl_buf);
-    if (status != CL_SUCCESS) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Could not release OpenCL buffer: %s\n", av_opencl_errstr(status));
-    }
-    memset(cl_buf, 0, sizeof(*cl_buf));
-}
-
-int av_opencl_buffer_write(cl_mem dst_cl_buf, uint8_t *src_buf, size_t buf_size)
-{
-    cl_int status;
-    void *mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, dst_cl_buf,
-                                      CL_TRUE, CL_MAP_WRITE, 0, sizeof(uint8_t) * buf_size,
-                                      0, NULL, NULL, &status);
-
-    if (status != CL_SUCCESS) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status));
-        return AVERROR_EXTERNAL;
-    }
-    memcpy(mapped, src_buf, buf_size);
-
-    status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, dst_cl_buf, mapped, 0, NULL, NULL);
-    if (status != CL_SUCCESS) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status));
-        return AVERROR_EXTERNAL;
-    }
-    return 0;
-}
-
-int av_opencl_buffer_read(uint8_t *dst_buf, cl_mem src_cl_buf, size_t buf_size)
-{
-    cl_int status;
-    void *mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, src_cl_buf,
-                                      CL_TRUE, CL_MAP_READ, 0, buf_size,
-                                      0, NULL, NULL, &status);
-
-    if (status != CL_SUCCESS) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status));
-        return AVERROR_EXTERNAL;
-    }
-    memcpy(dst_buf, mapped, buf_size);
-
-    status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, src_cl_buf, mapped, 0, NULL, NULL);
-    if (status != CL_SUCCESS) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status));
-        return AVERROR_EXTERNAL;
-    }
-    return 0;
-}
-
-int av_opencl_buffer_write_image(cl_mem dst_cl_buf, size_t cl_buffer_size, int dst_cl_offset,
-                                 uint8_t **src_data, int *plane_size, int plane_num)
-{
-    int i, buffer_size = 0;
-    uint8_t *temp;
-    cl_int status;
-    void *mapped;
-    if ((unsigned int)plane_num > 8) {
-        return AVERROR(EINVAL);
-    }
-    for (i = 0;i < plane_num;i++) {
-        buffer_size += plane_size[i];
-    }
-    if (buffer_size > cl_buffer_size) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Cannot write image to OpenCL buffer: buffer too small\n");
-        return AVERROR(EINVAL);
-    }
-    mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, dst_cl_buf,
-                                CL_TRUE, CL_MAP_WRITE, 0, buffer_size + dst_cl_offset,
-                                0, NULL, NULL, &status);
-    if (status != CL_SUCCESS) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status));
-        return AVERROR_EXTERNAL;
-    }
-    temp = mapped;
-    temp += dst_cl_offset;
-    for (i = 0; i < plane_num; i++) {
-        memcpy(temp, src_data[i], plane_size[i]);
-        temp += plane_size[i];
-    }
-    status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, dst_cl_buf, mapped, 0, NULL, NULL);
-    if (status != CL_SUCCESS) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status));
-        return AVERROR_EXTERNAL;
-    }
-    return 0;
-}
-
-int av_opencl_buffer_read_image(uint8_t **dst_data, int *plane_size, int plane_num,
-                                cl_mem src_cl_buf, size_t cl_buffer_size)
-{
-    int i,buffer_size = 0,ret = 0;
-    uint8_t *temp;
-    void *mapped;
-    cl_int status;
-    if ((unsigned int)plane_num > 8) {
-        return AVERROR(EINVAL);
-    }
-    for (i = 0; i < plane_num; i++) {
-        buffer_size += plane_size[i];
-    }
-    if (buffer_size > cl_buffer_size) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Cannot write image to CPU buffer: OpenCL buffer too small\n");
-        return AVERROR(EINVAL);
-    }
-    mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, src_cl_buf,
-                                CL_TRUE, CL_MAP_READ, 0, buffer_size,
-                                0, NULL, NULL, &status);
-
-    if (status != CL_SUCCESS) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status));
-        return AVERROR_EXTERNAL;
-    }
-    temp = mapped;
-    if (ret >= 0) {
-        for (i = 0; i < plane_num; i++) {
-            memcpy(dst_data[i], temp, plane_size[i]);
-            temp += plane_size[i];
-        }
-    }
-    status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, src_cl_buf, mapped, 0, NULL, NULL);
-    if (status != CL_SUCCESS) {
-        av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status));
-        return AVERROR_EXTERNAL;
-    }
-    return 0;
-}
-
-int64_t av_opencl_benchmark(AVOpenCLDeviceNode *device_node, cl_platform_id platform,
-                            int64_t (*benchmark)(AVOpenCLExternalEnv *ext_opencl_env))
-{
-    int64_t ret = 0;
-    cl_int status;
-    cl_context_properties cps[3];
-    AVOpenCLExternalEnv *ext_opencl_env = NULL;
-
-    ext_opencl_env = av_opencl_alloc_external_env();
-    ext_opencl_env->device_id = device_node->device_id;
-    ext_opencl_env->device_type = device_node->device_type;
-    av_log(&opencl_ctx, AV_LOG_VERBOSE, "Performing test on OpenCL device %s\n",
-           device_node->device_name);
-
-    cps[0] = CL_CONTEXT_PLATFORM;
-    cps[1] = (cl_context_properties)platform;
-    cps[2] = 0;
-    ext_opencl_env->context = clCreateContextFromType(cps, ext_opencl_env->device_type,
-                                                      NULL, NULL, &status);
-    if (status != CL_SUCCESS || !ext_opencl_env->context) {
-        ret = AVERROR_EXTERNAL;
-        goto end;
-    }
-    ext_opencl_env->command_queue = clCreateCommandQueue(ext_opencl_env->context,
-                                                         ext_opencl_env->device_id, 0, &status);
-    if (status != CL_SUCCESS || !ext_opencl_env->command_queue) {
-        ret = AVERROR_EXTERNAL;
-        goto end;
-    }
-    ret = benchmark(ext_opencl_env);
-    if (ret < 0)
-        av_log(&opencl_ctx, AV_LOG_ERROR, "Benchmark failed with OpenCL device %s\n",
-               device_node->device_name);
-end:
-    if (ext_opencl_env->command_queue)
-        clReleaseCommandQueue(ext_opencl_env->command_queue);
-    if (ext_opencl_env->context)
-        clReleaseContext(ext_opencl_env->context);
-    av_opencl_free_external_env(&ext_opencl_env);
-    return ret;
-}

diff --git a/libavutil/opencl.h b/libavutil/opencl.h
deleted file mode 100644
index b709927..0000000
--- a/libavutil/opencl.h
+++ /dev/null

@@ -1,292 +0,0 @@
-/*
- * Copyright (C) 2012 Peng  Gao     <peng@multicorewareinc.com>
- * Copyright (C) 2012 Li    Cao     <li@multicorewareinc.com>
- * Copyright (C) 2012 Wei   Gao     <weigao@multicorewareinc.com>
- * Copyright (C) 2013 Lenny Wang    <lwanghpc@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * OpenCL wrapper
- *
- * This interface is considered still experimental and its API and ABI may
- * change without prior notice.
- */
-
-#ifndef AVUTIL_OPENCL_H
-#define AVUTIL_OPENCL_H
-
-#define CL_USE_DEPRECATED_OPENCL_1_2_APIS 1
-#ifdef __APPLE__
-#include <OpenCL/cl.h>
-#else
-#include <CL/cl.h>
-#endif
-#include <stdint.h>
-#include "dict.h"
-
-#include "libavutil/version.h"
-
-#define AV_OPENCL_KERNEL( ... )# __VA_ARGS__
-
-typedef struct {
-    int device_type;
-    char *device_name;
-    cl_device_id device_id;
-} AVOpenCLDeviceNode;
-
-typedef struct {
-    cl_platform_id platform_id;
-    char *platform_name;
-    int device_num;
-    AVOpenCLDeviceNode **device_node;
-} AVOpenCLPlatformNode;
-
-typedef struct {
-    int platform_num;
-    AVOpenCLPlatformNode **platform_node;
-} AVOpenCLDeviceList;
-
-typedef struct {
-    cl_platform_id platform_id;
-    cl_device_type device_type;
-    cl_context context;
-    cl_device_id  device_id;
-    cl_command_queue command_queue;
-    char *platform_name;
-} AVOpenCLExternalEnv;
-
-/**
- * Get OpenCL device list.
- *
- * It must be freed with av_opencl_free_device_list().
- *
- * @param device_list pointer to OpenCL environment device list,
- *                    should be released by av_opencl_free_device_list()
- *
- * @return  >=0 on success, a negative error code in case of failure
- */
-int av_opencl_get_device_list(AVOpenCLDeviceList **device_list);
-
-/**
-  * Free OpenCL device list.
-  *
-  * @param device_list pointer to OpenCL environment device list
-  *                       created by av_opencl_get_device_list()
-  */
-void av_opencl_free_device_list(AVOpenCLDeviceList **device_list);
-
-/**
- * Set option in the global OpenCL context.
- *
- * This options affect the operation performed by the next
- * av_opencl_init() operation.
- *
- * The currently accepted options are:
- * - platform: set index of platform in device list
- * - device: set index of device in device list
- *
- * See reference "OpenCL Specification Version: 1.2 chapter 5.6.4".
- *
- * @param key                 option key
- * @param val                 option value
- * @return >=0 on success, a negative error code in case of failure
- * @see av_opencl_get_option()
- */
-int av_opencl_set_option(const char *key, const char *val);
-
-/**
- * Get option value from the global OpenCL context.
- *
- * @param key        option key
- * @param out_val  pointer to location where option value will be
- *                         written, must be freed with av_freep()
- * @return  >=0 on success, a negative error code in case of failure
- * @see av_opencl_set_option()
- */
-int av_opencl_get_option(const char *key, uint8_t **out_val);
-
-/**
- * Free option values of the global OpenCL context.
- *
- */
-void av_opencl_free_option(void);
-
-/**
- * Allocate OpenCL external environment.
- *
- * It must be freed with av_opencl_free_external_env().
- *
- * @return pointer to allocated OpenCL external environment
- */
-AVOpenCLExternalEnv *av_opencl_alloc_external_env(void);
-
-/**
- * Free OpenCL external environment.
- *
- * @param ext_opencl_env pointer to OpenCL external environment
- *                       created by av_opencl_alloc_external_env()
- */
-void av_opencl_free_external_env(AVOpenCLExternalEnv **ext_opencl_env);
-
-/**
- * Get OpenCL error string.
- *
- * @param status    OpenCL error code
- * @return OpenCL error string
- */
-const char *av_opencl_errstr(cl_int status);
-
-/**
- * Register kernel code.
- *
- *  The registered kernel code is stored in a global context, and compiled
- *  in the runtime environment when av_opencl_init() is called.
- *
- * @param kernel_code    kernel code to be compiled in the OpenCL runtime environment
- * @return  >=0 on success, a negative error code in case of failure
- */
-int av_opencl_register_kernel_code(const char *kernel_code);
-
-/**
- * Initialize the run time OpenCL environment
- *
- * @param ext_opencl_env external OpenCL environment, created by an
- *                       application program, ignored if set to NULL
- * @return >=0 on success, a negative error code in case of failure
- */
-int av_opencl_init(AVOpenCLExternalEnv *ext_opencl_env);
-
-/**
- * compile specific OpenCL kernel source
- *
- * @param program_name  pointer to a program name used for identification
- * @param build_opts    pointer to a string that describes the preprocessor
- *                      build options to be used for building the program
- * @return a cl_program object
- */
-cl_program av_opencl_compile(const char *program_name, const char* build_opts);
-
-/**
- * get OpenCL command queue
- *
- * @return a cl_command_queue object
- */
-cl_command_queue av_opencl_get_command_queue(void);
-
-/**
- * Create OpenCL buffer.
- *
- * The buffer is used to save the data used or created by an OpenCL
- * kernel.
- * The created buffer must be released with av_opencl_buffer_release().
- *
- * See clCreateBuffer() function reference for more information about
- * the parameters.
- *
- * @param cl_buf       pointer to OpenCL buffer
- * @param cl_buf_size  size in bytes of the OpenCL buffer to create
- * @param flags        flags used to control buffer attributes
- * @param host_ptr     host pointer of the OpenCL buffer
- * @return >=0 on success, a negative error code in case of failure
- */
-int av_opencl_buffer_create(cl_mem *cl_buf, size_t cl_buf_size, int flags, void *host_ptr);
-
-/**
- * Write OpenCL buffer with data from src_buf.
- *
- * @param dst_cl_buf        pointer to OpenCL destination buffer
- * @param src_buf           pointer to source buffer
- * @param buf_size          size in bytes of the source and destination buffers
- * @return >=0 on success, a negative error code in case of failure
- */
-int av_opencl_buffer_write(cl_mem dst_cl_buf, uint8_t *src_buf, size_t buf_size);
-
-/**
- * Read data from OpenCL buffer to memory buffer.
- *
- * @param dst_buf           pointer to destination buffer (CPU memory)
- * @param src_cl_buf        pointer to source OpenCL buffer
- * @param buf_size          size in bytes of the source and destination buffers
- * @return >=0 on success, a negative error code in case of failure
- */
-int av_opencl_buffer_read(uint8_t *dst_buf, cl_mem src_cl_buf, size_t buf_size);
-
-/**
- * Write image data from memory to OpenCL buffer.
- *
- * The source must be an array of pointers to image plane buffers.
- *
- * @param dst_cl_buf         pointer to destination OpenCL buffer
- * @param dst_cl_buf_size    size in bytes of OpenCL buffer
- * @param dst_cl_buf_offset  the offset of the OpenCL buffer start position
- * @param src_data           array of pointers to source plane buffers
- * @param src_plane_sizes    array of sizes in bytes of the source plane buffers
- * @param src_plane_num      number of source image planes
- * @return >=0 on success, a negative error code in case of failure
- */
-int av_opencl_buffer_write_image(cl_mem dst_cl_buf, size_t cl_buffer_size, int dst_cl_offset,
-                                 uint8_t **src_data, int *plane_size, int plane_num);
-
-/**
- * Read image data from OpenCL buffer.
- *
- * @param dst_data           array of pointers to destination plane buffers
- * @param dst_plane_sizes    array of pointers to destination plane buffers
- * @param dst_plane_num      number of destination image planes
- * @param src_cl_buf         pointer to source OpenCL buffer
- * @param src_cl_buf_size    size in bytes of OpenCL buffer
- * @return >=0 on success, a negative error code in case of failure
- */
-int av_opencl_buffer_read_image(uint8_t **dst_data, int *plane_size, int plane_num,
-                                cl_mem src_cl_buf, size_t cl_buffer_size);
-
-/**
- * Release OpenCL buffer.
- *
- * @param cl_buf pointer to OpenCL buffer to release, which was
- *               previously filled with av_opencl_buffer_create()
- */
-void av_opencl_buffer_release(cl_mem *cl_buf);
-
-/**
- * Release OpenCL environment.
- *
- * The OpenCL environment is effectively released only if all the created
- * kernels had been released with av_opencl_release_kernel().
- */
-void av_opencl_uninit(void);
-
-/**
- * Benchmark an OpenCL device with a user defined callback function.  This function
- * sets up an external OpenCL environment including context and command queue on
- * the device then tears it down in the end.  The callback function should perform
- * the rest of the work.
- *
- * @param device            pointer to the OpenCL device to be used
- * @param platform          cl_platform_id handle to which the device belongs to
- * @param benchmark         callback function to perform the benchmark, return a
- *                          negative value in case of failure
- * @return the score passed from the callback function, a negative error code in case
- * of failure
- */
-int64_t av_opencl_benchmark(AVOpenCLDeviceNode *device, cl_platform_id platform,
-                            int64_t (*benchmark)(AVOpenCLExternalEnv *ext_opencl_env));
-
-#endif /* AVUTIL_OPENCL_H */

diff --git a/libavutil/opencl_internal.c b/libavutil/opencl_internal.c
deleted file mode 100644
index bdb4193..0000000
--- a/libavutil/opencl_internal.c
+++ /dev/null

@@ -1,59 +0,0 @@
-/*
- * Copyright (C) 2012 Peng Gao <peng@multicorewareinc.com>
- * Copyright (C) 2012 Li   Cao <li@multicorewareinc.com>
- * Copyright (C) 2012 Wei  Gao <weigao@multicorewareinc.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "opencl_internal.h"
-#include "libavutil/log.h"
-
-int avpriv_opencl_set_parameter(FFOpenclParam *opencl_param, ...)
-{
-    int ret = 0;
-    va_list arg_ptr;
-    void *param;
-    size_t param_size;
-    cl_int status;
-    if (!opencl_param->kernel) {
-        av_log(opencl_param->ctx, AV_LOG_ERROR, "OpenCL kernel must be set\n");
-        return AVERROR(EINVAL);
-    }
-    va_start(arg_ptr, opencl_param);
-    do {
-        param = va_arg(arg_ptr, void *);
-        if (!param)
-            break;
-        param_size = va_arg(arg_ptr, size_t);
-        if (!param_size) {
-            av_log(opencl_param->ctx, AV_LOG_ERROR, "Parameter size must not be 0\n");
-            ret = AVERROR(EINVAL);
-            goto end;
-        }
-        status = clSetKernelArg(opencl_param->kernel, opencl_param->param_num, param_size, param);
-        if (status != CL_SUCCESS) {
-            av_log(opencl_param->ctx, AV_LOG_ERROR, "Cannot set kernel argument: %s\n", av_opencl_errstr(status));
-            ret = AVERROR_EXTERNAL;
-            goto end;
-        }
-        opencl_param->param_num++;
-    } while (param && param_size);
-end:
-    va_end(arg_ptr);
-    return ret;
-}

diff --git a/libavutil/opencl_internal.h b/libavutil/opencl_internal.h
deleted file mode 100644
index 5cabb7b..0000000
--- a/libavutil/opencl_internal.h
+++ /dev/null

@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2012 Peng Gao <peng@multicorewareinc.com>
- * Copyright (C) 2012 Li   Cao <li@multicorewareinc.com>
- * Copyright (C) 2012 Wei  Gao <weigao@multicorewareinc.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVUTIL_OPENCL_INTERNAL_H
-#define AVUTIL_OPENCL_INTERNAL_H
-
-#include "attributes.h"
-#include "opencl.h"
-
-#define FF_OPENCL_PARAM_INFO(a) ((void*)(&(a))), (sizeof(a))
-
-typedef struct {
-    cl_kernel kernel;
-    int param_num;
-    void *ctx;
-} FFOpenclParam;
-
-av_warn_unused_result
-int avpriv_opencl_set_parameter(FFOpenclParam *opencl_param, ...);
-
-#endif /* AVUTIL_OPENCL_INTERNAL_H */

diff --git a/libavutil/opt.c b/libavutil/opt.c
index df88663..93d6c26 100644
--- a/libavutil/opt.c
+++ b/libavutil/opt.c

@@ -463,6 +463,9 @@
     if (o->flags & AV_OPT_FLAG_READONLY)
         return AVERROR(EINVAL);
 
+    if (o->flags & AV_OPT_FLAG_DEPRECATED)
+        av_log(obj, AV_LOG_WARNING, "The \"%s\" option is deprecated: %s\n", name, o->help);
+
     dst = ((uint8_t *)target_obj) + o->offset;
     switch (o->type) {
     case AV_OPT_TYPE_BOOL:
@@ -493,15 +496,22 @@
     case AV_OPT_TYPE_SAMPLE_FMT:
         return set_string_sample_fmt(obj, o, val, dst);
     case AV_OPT_TYPE_DURATION:
-        if (!val) {
-            *(int64_t *)dst = 0;
+        {
+            int64_t usecs = 0;
+            if (val) {
+                if ((ret = av_parse_time(&usecs, val, 1)) < 0) {
+                    av_log(obj, AV_LOG_ERROR, "Unable to parse option value \"%s\" as duration\n", val);
+                    return ret;
+                }
+            }
+            if (usecs < o->min || usecs > o->max) {
+                av_log(obj, AV_LOG_ERROR, "Value %f for parameter '%s' out of range [%g - %g]\n",
+                       usecs / 1000000.0, o->name, o->min / 1000000.0, o->max / 1000000.0);
+                return AVERROR(ERANGE);
+            }
+            *(int64_t *)dst = usecs;
             return 0;
-        } else {
-            if ((ret = av_parse_time(dst, val, 1)) < 0)
-                av_log(obj, AV_LOG_ERROR, "Unable to parse option value \"%s\" as duration\n", val);
-            return ret;
         }
-        break;
     case AV_OPT_TYPE_COLOR:
         return set_string_color(obj, o, val, dst);
     case AV_OPT_TYPE_CHANNEL_LAYOUT:
@@ -759,6 +769,9 @@
     if (!o || !target_obj || (o->offset<=0 && o->type != AV_OPT_TYPE_CONST))
         return AVERROR_OPTION_NOT_FOUND;
 
+    if (o->flags & AV_OPT_FLAG_DEPRECATED)
+        av_log(obj, AV_LOG_WARNING, "The \"%s\" option is deprecated: %s\n", name, o->help);
+
     dst = (uint8_t *)target_obj + o->offset;
 
     buf[0] = 0;
@@ -1181,6 +1194,7 @@
         av_log(av_log_obj, AV_LOG_INFO, "%c", (opt->flags & AV_OPT_FLAG_SUBTITLE_PARAM) ? 'S' : '.');
         av_log(av_log_obj, AV_LOG_INFO, "%c", (opt->flags & AV_OPT_FLAG_EXPORT)         ? 'X' : '.');
         av_log(av_log_obj, AV_LOG_INFO, "%c", (opt->flags & AV_OPT_FLAG_READONLY)       ? 'R' : '.');
+        av_log(av_log_obj, AV_LOG_INFO, "%c", (opt->flags & AV_OPT_FLAG_BSF_PARAM)      ? 'B' : '.');
 
         if (opt->help)
             av_log(av_log_obj, AV_LOG_INFO, " %s", opt->help);

diff --git a/libavutil/opt.h b/libavutil/opt.h
index 0d89379..39f4a8d 100644
--- a/libavutil/opt.h
+++ b/libavutil/opt.h

@@ -229,15 +229,15 @@
     AV_OPT_TYPE_BINARY,  ///< offset must point to a pointer immediately followed by an int for the length
     AV_OPT_TYPE_DICT,
     AV_OPT_TYPE_UINT64,
-    AV_OPT_TYPE_CONST = 128,
-    AV_OPT_TYPE_IMAGE_SIZE = MKBETAG('S','I','Z','E'), ///< offset must point to two consecutive integers
-    AV_OPT_TYPE_PIXEL_FMT  = MKBETAG('P','F','M','T'),
-    AV_OPT_TYPE_SAMPLE_FMT = MKBETAG('S','F','M','T'),
-    AV_OPT_TYPE_VIDEO_RATE = MKBETAG('V','R','A','T'), ///< offset must point to AVRational
-    AV_OPT_TYPE_DURATION   = MKBETAG('D','U','R',' '),
-    AV_OPT_TYPE_COLOR      = MKBETAG('C','O','L','R'),
-    AV_OPT_TYPE_CHANNEL_LAYOUT = MKBETAG('C','H','L','A'),
-    AV_OPT_TYPE_BOOL           = MKBETAG('B','O','O','L'),
+    AV_OPT_TYPE_CONST,
+    AV_OPT_TYPE_IMAGE_SIZE, ///< offset must point to two consecutive integers
+    AV_OPT_TYPE_PIXEL_FMT,
+    AV_OPT_TYPE_SAMPLE_FMT,
+    AV_OPT_TYPE_VIDEO_RATE, ///< offset must point to AVRational
+    AV_OPT_TYPE_DURATION,
+    AV_OPT_TYPE_COLOR,
+    AV_OPT_TYPE_CHANNEL_LAYOUT,
+    AV_OPT_TYPE_BOOL,
 };
 
 /**
@@ -275,9 +275,6 @@
     int flags;
 #define AV_OPT_FLAG_ENCODING_PARAM  1   ///< a generic parameter which can be set by the user for muxing or encoding
 #define AV_OPT_FLAG_DECODING_PARAM  2   ///< a generic parameter which can be set by the user for demuxing or decoding
-#if FF_API_OPT_TYPE_METADATA
-#define AV_OPT_FLAG_METADATA        4   ///< some data extracted or inserted into the file like title, comment, ...
-#endif
 #define AV_OPT_FLAG_AUDIO_PARAM     8
 #define AV_OPT_FLAG_VIDEO_PARAM     16
 #define AV_OPT_FLAG_SUBTITLE_PARAM  32
@@ -290,7 +287,9 @@
  * This flag only makes sense when AV_OPT_FLAG_EXPORT is also set.
  */
 #define AV_OPT_FLAG_READONLY        128
+#define AV_OPT_FLAG_BSF_PARAM       (1<<8) ///< a generic parameter which can be set by the user for bit stream filtering
 #define AV_OPT_FLAG_FILTERING_PARAM (1<<16) ///< a generic parameter which can be set by the user for filtering
+#define AV_OPT_FLAG_DEPRECATED      (1<<17) ///< set if option is deprecated, users should refer to AVOption.help text for more information
 //FIXME think about enc-audio, ... style flags
 
     /**

diff --git a/libavutil/parseutils.c b/libavutil/parseutils.c
index 7ca07b3..59bec6c 100644
--- a/libavutil/parseutils.c
+++ b/libavutil/parseutils.c

@@ -590,7 +590,7 @@
     int64_t t, now64;
     time_t now;
     struct tm dt = { 0 }, tmbuf;
-    int today = 0, negative = 0, microseconds = 0;
+    int today = 0, negative = 0, microseconds = 0, suffix = 1000000;
     int i;
     static const char * const date_fmt[] = {
         "%Y - %m - %d",
@@ -661,12 +661,15 @@
         if (!q) {
             char *o;
             /* parse timestr as S+ */
-            dt.tm_sec = strtol(p, &o, 10);
+            errno = 0;
+            t = strtoll(p, &o, 10);
             if (o == p) /* the parsing didn't succeed */
                 return AVERROR(EINVAL);
-            dt.tm_min = 0;
-            dt.tm_hour = 0;
+            if (errno == ERANGE)
+                return AVERROR(ERANGE);
             q = o;
+        } else {
+            t = dt.tm_hour * 3600 + dt.tm_min * 60 + dt.tm_sec;
         }
     }
 
@@ -688,7 +691,16 @@
     }
 
     if (duration) {
-        t = dt.tm_hour * 3600 + dt.tm_min * 60 + dt.tm_sec;
+        if (q[0] == 'm' && q[1] == 's') {
+            suffix = 1000;
+            microseconds /= 1000;
+            q += 2;
+        } else if (q[0] == 'u' && q[1] == 's') {
+            suffix = 1;
+            microseconds = 0;
+            q += 2;
+        } else if (*q == 's')
+            q++;
     } else {
         int is_utc = *q == 'Z' || *q == 'z';
         int tzoffset = 0;
@@ -724,7 +736,11 @@
     if (*q)
         return AVERROR(EINVAL);
 
-    t *= 1000000;
+    if (INT64_MAX / suffix < t)
+        return AVERROR(ERANGE);
+    t *= suffix;
+    if (INT64_MAX - microseconds < t)
+        return AVERROR(ERANGE);
     t += microseconds;
     *timeval = negative ? -t : t;
     return 0;

diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index 2cfab89..970a832 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c

@@ -257,7 +257,7 @@
         .comp = {
             { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
         },
-        .flags = AV_PIX_FMT_FLAG_PSEUDOPAL,
+        .flags = FF_PSEUDOPAL,
         .alias = "gray8,y8",
     },
     [AV_PIX_FMT_MONOWHITE] = {
@@ -288,7 +288,7 @@
         .comp = {
             { 0, 1, 0, 0, 8, 0, 7, 1 },
         },
-        .flags = AV_PIX_FMT_FLAG_PAL,
+        .flags = AV_PIX_FMT_FLAG_PAL | AV_PIX_FMT_FLAG_ALPHA,
     },
     [AV_PIX_FMT_YUVJ420P] = {
         .name = "yuvj420p",
@@ -326,22 +326,10 @@
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
-#if FF_API_XVMC
-    [AV_PIX_FMT_XVMC_MPEG2_MC] = {
-        .name = "xvmcmc",
-        .flags = AV_PIX_FMT_FLAG_HWACCEL,
-    },
-    [AV_PIX_FMT_XVMC_MPEG2_IDCT] = {
-        .name = "xvmcidct",
-        .flags = AV_PIX_FMT_FLAG_HWACCEL,
-    },
-#endif /* FF_API_XVMC */
-#if !FF_API_XVMC
     [AV_PIX_FMT_XVMC] = {
         .name = "xvmc",
         .flags = AV_PIX_FMT_FLAG_HWACCEL,
     },
-#endif /* !FF_API_XVMC */
     [AV_PIX_FMT_UYVY422] = {
         .name = "uyvy422",
         .nb_components = 3,
@@ -374,7 +362,7 @@
             { 0, 1, 0, 3, 3, 0, 2, 1 },        /* G */
             { 0, 1, 0, 6, 2, 0, 1, 1 },        /* B */
         },
-        .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PSEUDOPAL,
+        .flags = AV_PIX_FMT_FLAG_RGB | FF_PSEUDOPAL,
     },
     [AV_PIX_FMT_BGR4] = {
         .name = "bgr4",
@@ -398,7 +386,7 @@
             { 0, 1, 0, 1, 2, 0, 1, 1 },        /* G */
             { 0, 1, 0, 3, 1, 0, 0, 1 },        /* B */
         },
-        .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PSEUDOPAL,
+        .flags = AV_PIX_FMT_FLAG_RGB | FF_PSEUDOPAL,
     },
     [AV_PIX_FMT_RGB8] = {
         .name = "rgb8",
@@ -410,7 +398,7 @@
             { 0, 1, 0, 3, 3, 0, 2, 1 },        /* G */
             { 0, 1, 0, 0, 3, 0, 2, 1 },        /* B */
         },
-        .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PSEUDOPAL,
+        .flags = AV_PIX_FMT_FLAG_RGB | FF_PSEUDOPAL,
     },
     [AV_PIX_FMT_RGB4] = {
         .name = "rgb4",
@@ -434,7 +422,7 @@
             { 0, 1, 0, 1, 2, 0, 1, 1 },        /* G */
             { 0, 1, 0, 0, 1, 0, 0, 1 },        /* B */
         },
-        .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PSEUDOPAL,
+        .flags = AV_PIX_FMT_FLAG_RGB | FF_PSEUDOPAL,
     },
     [AV_PIX_FMT_NV12] = {
         .name = "nv12",
@@ -623,6 +611,27 @@
         },
         .alias = "y12le",
     },
+    [AV_PIX_FMT_GRAY14BE] = {
+        .name = "gray14be",
+        .nb_components = 1,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 2, 0, 0, 14, 1, 13, 1 },       /* Y */
+        },
+        .flags = AV_PIX_FMT_FLAG_BE,
+        .alias = "y14be",
+    },
+    [AV_PIX_FMT_GRAY14LE] = {
+        .name = "gray14le",
+        .nb_components = 1,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 2, 0, 0, 14, 1, 13, 1 },       /* Y */
+        },
+        .alias = "y14le",
+    },
     [AV_PIX_FMT_GRAY16BE] = {
         .name = "gray16be",
         .nb_components = 1,
@@ -989,44 +998,6 @@
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
-#if FF_API_VDPAU
-    [AV_PIX_FMT_VDPAU_H264] = {
-        .name = "vdpau_h264",
-        .log2_chroma_w = 1,
-        .log2_chroma_h = 1,
-        .flags = AV_PIX_FMT_FLAG_HWACCEL,
-    },
-    [AV_PIX_FMT_VDPAU_MPEG1] = {
-        .name = "vdpau_mpeg1",
-        .log2_chroma_w = 1,
-        .log2_chroma_h = 1,
-        .flags = AV_PIX_FMT_FLAG_HWACCEL,
-    },
-    [AV_PIX_FMT_VDPAU_MPEG2] = {
-        .name = "vdpau_mpeg2",
-        .log2_chroma_w = 1,
-        .log2_chroma_h = 1,
-        .flags = AV_PIX_FMT_FLAG_HWACCEL,
-    },
-    [AV_PIX_FMT_VDPAU_WMV3] = {
-        .name = "vdpau_wmv3",
-        .log2_chroma_w = 1,
-        .log2_chroma_h = 1,
-        .flags = AV_PIX_FMT_FLAG_HWACCEL,
-    },
-    [AV_PIX_FMT_VDPAU_VC1] = {
-        .name = "vdpau_vc1",
-        .log2_chroma_w = 1,
-        .log2_chroma_h = 1,
-        .flags = AV_PIX_FMT_FLAG_HWACCEL,
-    },
-    [AV_PIX_FMT_VDPAU_MPEG4] = {
-        .name = "vdpau_mpeg4",
-        .log2_chroma_w = 1,
-        .log2_chroma_h = 1,
-        .flags = AV_PIX_FMT_FLAG_HWACCEL,
-    },
-#endif
     [AV_PIX_FMT_RGB48BE] = {
         .name = "rgb48be",
         .nb_components = 3,
@@ -1670,12 +1641,6 @@
         .log2_chroma_h = 1,
         .flags = AV_PIX_FMT_FLAG_HWACCEL,
     },
-    [AV_PIX_FMT_VDA_VLD] = {
-        .name = "vda_vld",
-        .log2_chroma_w = 1,
-        .log2_chroma_h = 1,
-        .flags = AV_PIX_FMT_FLAG_HWACCEL,
-    },
     [AV_PIX_FMT_YA8] = {
         .name = "ya8",
         .nb_components = 2,
@@ -2029,10 +1994,6 @@
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_BE,
     },
-    [AV_PIX_FMT_VDA] = {
-        .name = "vda",
-        .flags = AV_PIX_FMT_FLAG_HWACCEL,
-    },
     [AV_PIX_FMT_QSV] = {
         .name = "qsv",
         .flags = AV_PIX_FMT_FLAG_HWACCEL,
@@ -2241,6 +2202,32 @@
         .name = "drm_prime",
         .flags = AV_PIX_FMT_FLAG_HWACCEL,
     },
+    [AV_PIX_FMT_OPENCL] = {
+        .name  = "opencl",
+        .flags = AV_PIX_FMT_FLAG_HWACCEL,
+    },
+    [AV_PIX_FMT_GRAYF32BE] = {
+        .name = "grayf32be",
+        .nb_components = 1,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 4, 0, 0, 32, 3, 31, 1 },       /* Y */
+        },
+        .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_FLOAT,
+        .alias = "yf32be",
+    },
+    [AV_PIX_FMT_GRAYF32LE] = {
+        .name = "grayf32le",
+        .nb_components = 1,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 4, 0, 0, 32, 3, 31, 1 },       /* Y */
+        },
+        .flags = AV_PIX_FMT_FLAG_FLOAT,
+        .alias = "yf32le",
+    },
 };
 #if FF_API_PLUS1_MINUS1
 FF_ENABLE_DEPRECATION_WARNINGS
@@ -2488,7 +2475,6 @@
         av_assert0(d->log2_chroma_h <= 3);
         av_assert0(d->nb_components <= 4);
         av_assert0(d->name && d->name[0]);
-        av_assert0((d->nb_components==4 || d->nb_components==2) == !!(d->flags & AV_PIX_FMT_FLAG_ALPHA));
         av_assert2(av_get_pix_fmt(d->name) == i);
 
         for (j=0; j<FF_ARRAY_ELEMS(d->comp); j++) {
@@ -2539,7 +2525,7 @@
 #define FF_COLOR_XYZ      4
 
 #define pixdesc_has_alpha(pixdesc) \
-    ((pixdesc)->nb_components == 2 || (pixdesc)->nb_components == 4 || (pixdesc)->flags & AV_PIX_FMT_FLAG_PAL)
+    ((pixdesc)->flags & AV_PIX_FMT_FLAG_ALPHA)
 
 
 static int get_color_type(const AVPixFmtDescriptor *desc) {
@@ -2773,7 +2759,12 @@
     int i;
 
     for (i = 0; i < FF_ARRAY_ELEMS(color_primaries_names); i++) {
-        size_t len = strlen(color_primaries_names[i]);
+        size_t len;
+
+        if (!color_primaries_names[i])
+            continue;
+
+        len = strlen(color_primaries_names[i]);
         if (!strncmp(color_primaries_names[i], name, len))
             return i;
     }
@@ -2792,7 +2783,12 @@
     int i;
 
     for (i = 0; i < FF_ARRAY_ELEMS(color_transfer_names); i++) {
-        size_t len = strlen(color_transfer_names[i]);
+        size_t len;
+
+        if (!color_transfer_names[i])
+            continue;
+
+        len = strlen(color_transfer_names[i]);
         if (!strncmp(color_transfer_names[i], name, len))
             return i;
     }
@@ -2811,7 +2807,12 @@
     int i;
 
     for (i = 0; i < FF_ARRAY_ELEMS(color_space_names); i++) {
-        size_t len = strlen(color_space_names[i]);
+        size_t len;
+
+        if (!color_space_names[i])
+            continue;
+
+        len = strlen(color_space_names[i]);
         if (!strncmp(color_space_names[i], name, len))
             return i;
     }
@@ -2830,7 +2831,12 @@
     int i;
 
     for (i = 0; i < FF_ARRAY_ELEMS(chroma_location_names); i++) {
-        size_t len = strlen(chroma_location_names[i]);
+        size_t len;
+
+        if (!chroma_location_names[i])
+            continue;
+
+        len = strlen(chroma_location_names[i]);
         if (!strncmp(chroma_location_names[i], name, len))
             return i;
     }

diff --git a/libavutil/pixdesc.h b/libavutil/pixdesc.h
index fc3737c..4f9c5a2 100644
--- a/libavutil/pixdesc.h
+++ b/libavutil/pixdesc.h

@@ -154,17 +154,21 @@
  * in some cases be simpler. Or the data can be interpreted purely based on
  * the pixel format without using the palette.
  * An example of a pseudo-paletted format is AV_PIX_FMT_GRAY8
+ *
+ * @deprecated This flag is deprecated, and will be removed. When it is removed,
+ * the extra palette allocation in AVFrame.data[1] is removed as well. Only
+ * actual paletted formats (as indicated by AV_PIX_FMT_FLAG_PAL) will have a
+ * palette. Starting with FFmpeg versions which have this flag deprecated, the
+ * extra "pseudo" palette is already ignored, and API users are not required to
+ * allocate a palette for AV_PIX_FMT_FLAG_PSEUDOPAL formats (it was required
+ * before the deprecation, though).
  */
 #define AV_PIX_FMT_FLAG_PSEUDOPAL    (1 << 6)
 
 /**
  * The pixel format has an alpha channel. This is set on all formats that
- * support alpha in some way. The exception is AV_PIX_FMT_PAL8, which can
- * carry alpha as part of the palette. Details are explained in the
- * AVPixelFormat enum, and are also encoded in the corresponding
- * AVPixFmtDescriptor.
- *
- * The alpha is always straight, never pre-multiplied.
+ * support alpha in some way, including AV_PIX_FMT_PAL8. The alpha is always
+ * straight, never pre-multiplied.
  *
  * If a codec or a filter does not support alpha, it should set all alpha to
  * opaque, or use the equivalent pixel formats without alpha component, e.g.
@@ -225,11 +229,6 @@
  * Utility function to access log2_chroma_w log2_chroma_h from
  * the pixel format AVPixFmtDescriptor.
  *
- * See av_get_chroma_sub_sample() for a function that asserts a
- * valid pixel format instead of returning an error code.
- * Its recommended that you use avcodec_get_chroma_sub_sample unless
- * you do check the return code!
- *
  * @param[in]  pix_fmt the pixel format
  * @param[out] h_shift store log2_chroma_w (horizontal/width shift)
  * @param[out] v_shift store log2_chroma_h (vertical/height shift)

diff --git a/libavutil/pixelutils.c b/libavutil/pixelutils.c
index b663027..ebee3d6 100644
--- a/libavutil/pixelutils.c
+++ b/libavutil/pixelutils.c

@@ -51,12 +51,14 @@
 DECLARE_BLOCK_FUNCTIONS(4)
 DECLARE_BLOCK_FUNCTIONS(8)
 DECLARE_BLOCK_FUNCTIONS(16)
+DECLARE_BLOCK_FUNCTIONS(32)
 
 static const av_pixelutils_sad_fn sad_c[] = {
     block_sad_2x2_c,
     block_sad_4x4_c,
     block_sad_8x8_c,
     block_sad_16x16_c,
+    block_sad_32x32_c,
 };
 
 #endif /* CONFIG_PIXELUTILS */

diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index 24889c8..6815f8d 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h

@@ -42,6 +42,10 @@
  * This is stored as BGRA on little-endian CPU architectures and ARGB on
  * big-endian CPUs.
  *
+ * @note
+ * If the resolution is not a multiple of the chroma subsampling factor
+ * then the chroma plane resolution must be rounded up.
+ *
  * @par
  * When the pixel format is palettized RGB32 (AV_PIX_FMT_PAL8), the palettized
  * image data is stored in AVFrame.data[0]. The palette is transported in
@@ -74,11 +78,6 @@
     AV_PIX_FMT_YUVJ420P,  ///< planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting color_range
     AV_PIX_FMT_YUVJ422P,  ///< planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting color_range
     AV_PIX_FMT_YUVJ444P,  ///< planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting color_range
-#if FF_API_XVMC
-    AV_PIX_FMT_XVMC_MPEG2_MC,///< XVideo Motion Acceleration via common packet passing
-    AV_PIX_FMT_XVMC_MPEG2_IDCT,
-    AV_PIX_FMT_XVMC = AV_PIX_FMT_XVMC_MPEG2_IDCT,
-#endif /* FF_API_XVMC */
     AV_PIX_FMT_UYVY422,   ///< packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
     AV_PIX_FMT_UYYVYY411, ///< packed YUV 4:1:1, 12bpp, Cb Y0 Y1 Cr Y2 Y3
     AV_PIX_FMT_BGR8,      ///< packed RGB 3:3:2,  8bpp, (msb)2B 3G 3R(lsb)
@@ -100,13 +99,6 @@
     AV_PIX_FMT_YUV440P,   ///< planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
     AV_PIX_FMT_YUVJ440P,  ///< planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range
     AV_PIX_FMT_YUVA420P,  ///< planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
-#if FF_API_VDPAU
-    AV_PIX_FMT_VDPAU_H264,///< H.264 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
-    AV_PIX_FMT_VDPAU_MPEG1,///< MPEG-1 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
-    AV_PIX_FMT_VDPAU_MPEG2,///< MPEG-2 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
-    AV_PIX_FMT_VDPAU_WMV3,///< WMV3 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
-    AV_PIX_FMT_VDPAU_VC1, ///< VC-1 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
-#endif
     AV_PIX_FMT_RGB48BE,   ///< packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, the 2-byte value for each R/G/B component is stored as big-endian
     AV_PIX_FMT_RGB48LE,   ///< packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, the 2-byte value for each R/G/B component is stored as little-endian
 
@@ -142,9 +134,6 @@
     AV_PIX_FMT_YUV422P16BE,  ///< planar YUV 4:2:2, 32bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
     AV_PIX_FMT_YUV444P16LE,  ///< planar YUV 4:4:4, 48bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
     AV_PIX_FMT_YUV444P16BE,  ///< planar YUV 4:4:4, 48bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
-#if FF_API_VDPAU
-    AV_PIX_FMT_VDPAU_MPEG4,  ///< MPEG-4 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
-#endif
     AV_PIX_FMT_DXVA2_VLD,    ///< HW decoding through DXVA2, Picture.data[3] contains a LPDIRECT3DSURFACE9 pointer
 
     AV_PIX_FMT_RGB444LE,  ///< packed RGB 4:4:4, 16bpp, (msb)4X 4R 4G 4B(lsb), little-endian, X=unused/undefined
@@ -176,7 +165,6 @@
     AV_PIX_FMT_YUV444P10LE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
     AV_PIX_FMT_YUV422P9BE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
     AV_PIX_FMT_YUV422P9LE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
-    AV_PIX_FMT_VDA_VLD,    ///< hardware decoding through VDA
     AV_PIX_FMT_GBRP,      ///< planar GBR 4:4:4 24bpp
     AV_PIX_FMT_GBR24P = AV_PIX_FMT_GBRP, // alias for #AV_PIX_FMT_GBRP
     AV_PIX_FMT_GBRP9BE,   ///< planar GBR 4:4:4 27bpp, big-endian
@@ -221,8 +209,6 @@
 
     AV_PIX_FMT_YVYU422,   ///< packed YUV 4:2:2, 16bpp, Y0 Cr Y1 Cb
 
-    AV_PIX_FMT_VDA,          ///< HW acceleration through VDA, data[3] contains a CVPixelBufferRef
-
     AV_PIX_FMT_YA16BE,       ///< 16 bits gray, 16 bits alpha (big-endian)
     AV_PIX_FMT_YA16LE,       ///< 16 bits gray, 16 bits alpha (little-endian)
 
@@ -248,7 +234,7 @@
      */
     AV_PIX_FMT_CUDA,
 
-    AV_PIX_FMT_0RGB=0x123+4,///< packed RGB 8:8:8, 32bpp, XRGBXRGB...   X=unused/undefined
+    AV_PIX_FMT_0RGB,        ///< packed RGB 8:8:8, 32bpp, XRGBXRGB...   X=unused/undefined
     AV_PIX_FMT_RGB0,        ///< packed RGB 8:8:8, 32bpp, RGBXRGBX...   X=unused/undefined
     AV_PIX_FMT_0BGR,        ///< packed BGR 8:8:8, 32bpp, XBGRXBGR...   X=unused/undefined
     AV_PIX_FMT_BGR0,        ///< packed BGR 8:8:8, 32bpp, BGRXBGRX...   X=unused/undefined
@@ -283,9 +269,9 @@
     AV_PIX_FMT_BAYER_GBRG16BE, ///< bayer, GBGB..(odd line), RGRG..(even line), 16-bit samples, big-endian */
     AV_PIX_FMT_BAYER_GRBG16LE, ///< bayer, GRGR..(odd line), BGBG..(even line), 16-bit samples, little-endian */
     AV_PIX_FMT_BAYER_GRBG16BE, ///< bayer, GRGR..(odd line), BGBG..(even line), 16-bit samples, big-endian */
-#if !FF_API_XVMC
+
     AV_PIX_FMT_XVMC,///< XVideo Motion Acceleration via common packet passing
-#endif /* !FF_API_XVMC */
+
     AV_PIX_FMT_YUV440P10LE, ///< planar YUV 4:4:0,20bpp, (1 Cr & Cb sample per 1x2 Y samples), little-endian
     AV_PIX_FMT_YUV440P10BE, ///< planar YUV 4:4:0,20bpp, (1 Cr & Cb sample per 1x2 Y samples), big-endian
     AV_PIX_FMT_YUV440P12LE, ///< planar YUV 4:4:0,24bpp, (1 Cr & Cb sample per 1x2 Y samples), little-endian
@@ -340,6 +326,19 @@
      * data[0] points to an AVDRMFrameDescriptor.
      */
     AV_PIX_FMT_DRM_PRIME,
+    /**
+     * Hardware surfaces for OpenCL.
+     *
+     * data[i] contain 2D image objects (typed in C as cl_mem, used
+     * in OpenCL as image2d_t) for each plane of the surface.
+     */
+    AV_PIX_FMT_OPENCL,
+
+    AV_PIX_FMT_GRAY14BE,   ///<        Y        , 14bpp, big-endian
+    AV_PIX_FMT_GRAY14LE,   ///<        Y        , 14bpp, little-endian
+
+    AV_PIX_FMT_GRAYF32BE,  ///< IEEE-754 single precision Y, 32bpp, big-endian
+    AV_PIX_FMT_GRAYF32LE,  ///< IEEE-754 single precision Y, 32bpp, little-endian
 
     AV_PIX_FMT_NB         ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
 };
@@ -360,6 +359,7 @@
 #define AV_PIX_FMT_GRAY9  AV_PIX_FMT_NE(GRAY9BE,  GRAY9LE)
 #define AV_PIX_FMT_GRAY10 AV_PIX_FMT_NE(GRAY10BE, GRAY10LE)
 #define AV_PIX_FMT_GRAY12 AV_PIX_FMT_NE(GRAY12BE, GRAY12LE)
+#define AV_PIX_FMT_GRAY14 AV_PIX_FMT_NE(GRAY14BE, GRAY14LE)
 #define AV_PIX_FMT_GRAY16 AV_PIX_FMT_NE(GRAY16BE, GRAY16LE)
 #define AV_PIX_FMT_YA16   AV_PIX_FMT_NE(YA16BE,   YA16LE)
 #define AV_PIX_FMT_RGB48  AV_PIX_FMT_NE(RGB48BE,  RGB48LE)
@@ -408,6 +408,8 @@
 #define AV_PIX_FMT_GBRPF32    AV_PIX_FMT_NE(GBRPF32BE,  GBRPF32LE)
 #define AV_PIX_FMT_GBRAPF32   AV_PIX_FMT_NE(GBRAPF32BE, GBRAPF32LE)
 
+#define AV_PIX_FMT_GRAYF32    AV_PIX_FMT_NE(GRAYF32BE, GRAYF32LE)
+
 #define AV_PIX_FMT_YUVA420P9  AV_PIX_FMT_NE(YUVA420P9BE , YUVA420P9LE)
 #define AV_PIX_FMT_YUVA422P9  AV_PIX_FMT_NE(YUVA422P9BE , YUVA422P9LE)
 #define AV_PIX_FMT_YUVA444P9  AV_PIX_FMT_NE(YUVA444P9BE , YUVA444P9LE)

diff --git a/libavutil/ppc/cpu.h b/libavutil/ppc/cpu.h
index 0744157..36973a5 100644
--- a/libavutil/ppc/cpu.h
+++ b/libavutil/ppc/cpu.h

@@ -19,7 +19,6 @@
 #ifndef AVUTIL_PPC_CPU_H
 #define AVUTIL_PPC_CPU_H
 
-#include "config.h"
 #include "libavutil/cpu.h"
 #include "libavutil/cpu_internal.h"
 

diff --git a/libavutil/ppc/util_altivec.h b/libavutil/ppc/util_altivec.h
index 6ab7e50..2548011 100644
--- a/libavutil/ppc/util_altivec.h
+++ b/libavutil/ppc/util_altivec.h

@@ -28,10 +28,6 @@
 
 #include "config.h"
 
-#if HAVE_ALTIVEC_H
-#include <altivec.h>
-#endif
-
 /***********************************************************************
  * Vector types
  **********************************************************************/
@@ -56,6 +52,7 @@
 #define zero_s32v (vec_s32) zerov
 
 #if HAVE_ALTIVEC
+#include <altivec.h>
 
 // used to build registers permutation vectors (vcprm)
 // the 's' are for words in the _s_econd vector

diff --git a/libavutil/random_seed.c b/libavutil/random_seed.c
index d1c9a3f..70dc509 100644
--- a/libavutil/random_seed.c
+++ b/libavutil/random_seed.c

@@ -26,9 +26,9 @@
 #if HAVE_IO_H
 #include <io.h>
 #endif
-#if HAVE_CRYPTGENRANDOM
+#if HAVE_BCRYPT
 #include <windows.h>
-#include <wincrypt.h>
+#include <bcrypt.h>
 #endif
 #include <fcntl.h>
 #include <math.h>
@@ -121,13 +121,14 @@
 {
     uint32_t seed;
 
-#if HAVE_CRYPTGENRANDOM
-    HCRYPTPROV provider;
-    if (CryptAcquireContext(&provider, NULL, NULL, PROV_RSA_FULL,
-                            CRYPT_VERIFYCONTEXT | CRYPT_SILENT)) {
-        BOOL ret = CryptGenRandom(provider, sizeof(seed), (PBYTE) &seed);
-        CryptReleaseContext(provider, 0);
-        if (ret)
+#if HAVE_BCRYPT
+    BCRYPT_ALG_HANDLE algo_handle;
+    NTSTATUS ret = BCryptOpenAlgorithmProvider(&algo_handle, BCRYPT_RNG_ALGORITHM,
+                                               MS_PRIMITIVE_PROVIDER, 0);
+    if (BCRYPT_SUCCESS(ret)) {
+        NTSTATUS ret = BCryptGenRandom(algo_handle, (UCHAR*)&seed, sizeof(seed), 0);
+        BCryptCloseAlgorithmProvider(algo_handle, 0);
+        if (BCRYPT_SUCCESS(ret))
             return seed;
     }
 #endif

diff --git a/libavutil/ripemd.c b/libavutil/ripemd.c
index b0db297..4f1c4ea 100644
--- a/libavutil/ripemd.c
+++ b/libavutil/ripemd.c

@@ -510,7 +510,11 @@
     return 0;
 }
 
+#if FF_API_CRYPTO_SIZE_T
 void av_ripemd_update(AVRIPEMD* ctx, const uint8_t* data, unsigned int len)
+#else
+void av_ripemd_update(AVRIPEMD* ctx, const uint8_t* data, size_t len)
+#endif
 {
     unsigned int i, j;
 

diff --git a/libavutil/ripemd.h b/libavutil/ripemd.h
index 6d6bb32..0db6858 100644
--- a/libavutil/ripemd.h
+++ b/libavutil/ripemd.h

@@ -66,7 +66,11 @@
  * @param data    input data to update hash with
  * @param len     input data length
  */
+#if FF_API_CRYPTO_SIZE_T
 void av_ripemd_update(struct AVRIPEMD* context, const uint8_t* data, unsigned int len);
+#else
+void av_ripemd_update(struct AVRIPEMD* context, const uint8_t* data, size_t len);
+#endif
 
 /**
  * Finish hashing and output digest value.

diff --git a/libavutil/slicethread.c b/libavutil/slicethread.c
index c43f87a..dfbe551 100644
--- a/libavutil/slicethread.c
+++ b/libavutil/slicethread.c

@@ -99,10 +99,6 @@
     AVSliceThread *ctx;
     int nb_workers, i;
 
-#if HAVE_W32THREADS
-    w32thread_init();
-#endif
-
     av_assert0(nb_threads >= 0);
     if (!nb_threads) {
         int nb_cpus = av_cpu_count();

diff --git a/libavutil/stereo3d.h b/libavutil/stereo3d.h
index 54f4c4c..d421aac 100644
--- a/libavutil/stereo3d.h
+++ b/libavutil/stereo3d.h

@@ -141,6 +141,25 @@
     AV_STEREO3D_COLUMNS,
 };
 
+/**
+ * List of possible view types.
+ */
+enum AVStereo3DView {
+    /**
+     * Frame contains two packed views.
+     */
+    AV_STEREO3D_VIEW_PACKED,
+
+    /**
+     * Frame contains only the left view.
+     */
+    AV_STEREO3D_VIEW_LEFT,
+
+    /**
+     * Frame contains only the right view.
+     */
+    AV_STEREO3D_VIEW_RIGHT,
+};
 
 /**
  * Inverted views, Right/Bottom represents the left view.
@@ -164,6 +183,11 @@
      * Additional information about the frame packing.
      */
     int flags;
+
+    /**
+     * Determines which views are packed.
+     */
+    enum AVStereo3DView view;
 } AVStereo3D;
 
 /**

diff --git a/libavutil/tests/.gitignore b/libavutil/tests/.gitignore
index 8ede070..9d90827 100644
--- a/libavutil/tests/.gitignore
+++ b/libavutil/tests/.gitignore

@@ -17,12 +17,15 @@
 /dict
 /display
 /error
+/encryption_info
 /eval
 /fifo
 /file
 /hash
 /hmac
+/hwdevice
 /imgutils
+/integer
 /lfg
 /lls
 /log

diff --git a/libavutil/tests/aes_ctr.c b/libavutil/tests/aes_ctr.c
index c5ebeda..00fdb05 100644
--- a/libavutil/tests/aes_ctr.c
+++ b/libavutil/tests/aes_ctr.c

@@ -45,7 +45,7 @@
 
     av_aes_ctr_set_random_iv(ae);
     iv =   av_aes_ctr_get_iv(ae);
-    av_aes_ctr_set_iv(ad, iv);
+    av_aes_ctr_set_full_iv(ad, iv);
 
     av_aes_ctr_crypt(ae, tmp, plain, sizeof(tmp));
     av_aes_ctr_crypt(ad, tmp, tmp,   sizeof(tmp));

diff --git a/libavutil/tests/atomic.c b/libavutil/tests/atomic.c
deleted file mode 100644
index e41bf5a..0000000
--- a/libavutil/tests/atomic.c
+++ /dev/null

@@ -1,38 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/atomic.h"
-#include "libavutil/avassert.h"
-
-int main(void)
-{
-    volatile int val      = 1;
-    void *tmp1            = (int *)&val;
-    void * volatile *tmp2 = &tmp1;
-    int res;
-
-    res = avpriv_atomic_int_add_and_fetch(&val, 1);
-    av_assert0(res == 2);
-    avpriv_atomic_int_set(&val, 3);
-    res = avpriv_atomic_int_get(&val);
-    av_assert0(res == 3);
-    avpriv_atomic_ptr_cas(tmp2, tmp1, &res);
-    av_assert0(*tmp2 == &res);
-
-    return 0;
-}

diff --git a/libavutil/tests/bprint.c b/libavutil/tests/bprint.c
index d7f381d..5a46f55 100644
--- a/libavutil/tests/bprint.c
+++ b/libavutil/tests/bprint.c

@@ -47,13 +47,13 @@
     char buf[256];
     struct tm testtime = { .tm_year = 100, .tm_mon = 11, .tm_mday = 20 };
 
-    av_bprint_init(&b, 0, -1);
+    av_bprint_init(&b, 0, AV_BPRINT_SIZE_UNLIMITED);
     bprint_pascal(&b, 5);
     printf("Short text in unlimited buffer: %u/%u\n", (unsigned)strlen(b.str), b.len);
     printf("%s\n", b.str);
     av_bprint_finalize(&b, NULL);
 
-    av_bprint_init(&b, 0, -1);
+    av_bprint_init(&b, 0, AV_BPRINT_SIZE_UNLIMITED);
     bprint_pascal(&b, 25);
     printf("Long text in unlimited buffer: %u/%u\n", (unsigned)strlen(b.str), b.len);
     av_bprint_finalize(&b, NULL);
@@ -63,16 +63,16 @@
     printf("Long text in limited buffer: %u/%u\n", (unsigned)strlen(b.str), b.len);
     av_bprint_finalize(&b, NULL);
 
-    av_bprint_init(&b, 0, 1);
+    av_bprint_init(&b, 0, AV_BPRINT_SIZE_AUTOMATIC);
     bprint_pascal(&b, 5);
     printf("Short text in automatic buffer: %u/%u\n", (unsigned)strlen(b.str), b.len);
 
-    av_bprint_init(&b, 0, 1);
+    av_bprint_init(&b, 0, AV_BPRINT_SIZE_AUTOMATIC);
     bprint_pascal(&b, 25);
     printf("Long text in automatic buffer: %u/%u\n", (unsigned)strlen(b.str)/8*8, b.len);
     /* Note that the size of the automatic buffer is arch-dependent. */
 
-    av_bprint_init(&b, 0, 0);
+    av_bprint_init(&b, 0, AV_BPRINT_SIZE_COUNT_ONLY);
     bprint_pascal(&b, 25);
     printf("Long text count only buffer: %u/%u\n", (unsigned)strlen(b.str), b.len);
 
@@ -80,7 +80,7 @@
     bprint_pascal(&b, 25);
     printf("Long text count only buffer: %u/%u\n", (unsigned)strlen(buf), b.len);
 
-    av_bprint_init(&b, 0, -1);
+    av_bprint_init(&b, 0, AV_BPRINT_SIZE_UNLIMITED);
     av_bprint_strftime(&b, "%Y-%m-%d", &testtime);
     printf("strftime full: %u/%u \"%s\"\n", (unsigned)strlen(buf), b.len, b.str);
     av_bprint_finalize(&b, NULL);

diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c
index f02a54c..ce45b71 100644
--- a/libavutil/tests/cpu.c
+++ b/libavutil/tests/cpu.c

@@ -73,6 +73,7 @@
     { AV_CPU_FLAG_BMI1,      "bmi1"       },
     { AV_CPU_FLAG_BMI2,      "bmi2"       },
     { AV_CPU_FLAG_AESNI,     "aesni"      },
+    { AV_CPU_FLAG_AVX512,    "avx512"     },
 #endif
     { 0 }
 };

diff --git a/libavutil/tests/crc.c b/libavutil/tests/crc.c
index 9825d6b..413aada 100644
--- a/libavutil/tests/crc.c
+++ b/libavutil/tests/crc.c

@@ -25,20 +25,21 @@
 {
     uint8_t buf[1999];
     int i;
-    static const unsigned p[6][3] = {
+    static const unsigned p[7][3] = {
         { AV_CRC_32_IEEE_LE, 0xEDB88320, 0x3D5CDD04 },
         { AV_CRC_32_IEEE   , 0x04C11DB7, 0xC0F5BAE0 },
         { AV_CRC_24_IEEE   , 0x864CFB  , 0xB704CE   },
         { AV_CRC_16_ANSI_LE, 0xA001    , 0xBFD8     },
         { AV_CRC_16_ANSI   , 0x8005    , 0x1FBB     },
-        { AV_CRC_8_ATM     , 0x07      , 0xE3       }
+        { AV_CRC_8_ATM     , 0x07      , 0xE3       },
+        { AV_CRC_8_EBU     , 0x1D      , 0xD6       },
     };
     const AVCRC *ctx;
 
     for (i = 0; i < sizeof(buf); i++)
         buf[i] = i + i * i;
 
-    for (i = 0; i < 6; i++) {
+    for (i = 0; i < 7; i++) {
         ctx = av_crc_get_table(p[i][0]);
         printf("crc %08X = %X\n", p[i][1], av_crc(ctx, 0, buf, sizeof(buf)));
     }

diff --git a/libavutil/tests/encryption_info.c b/libavutil/tests/encryption_info.c
new file mode 100644
index 0000000..d489612
--- /dev/null
+++ b/libavutil/tests/encryption_info.c

@@ -0,0 +1,176 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/encryption_info.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "libavutil/avassert.h"
+
+static const AVSubsampleEncryptionInfo test_subsamples[] = {{1, 2}, {3, 4}, {5, 6}, {7, 8}};
+static const size_t test_subsample_count = sizeof(test_subsamples) / sizeof(test_subsamples[0]);
+static const uint8_t test_iv[] = {0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18};
+static const uint8_t test_key_id[] = {0x21, 0x22, 0x23, 0x24};
+static const uint8_t test_key_id_2[] = {0x31, 0x32, 0x33, 0x34};
+static const uint8_t test_system_id[] = {0x41, 0x42, 0x43};
+static const uint8_t test_data[] = {0x51, 0x52};
+
+static int compare_encryption_info(const AVEncryptionInfo *a, const AVEncryptionInfo *b) {
+    if (!a || !b || a->scheme != b->scheme || a->crypt_byte_block != b->crypt_byte_block ||
+        a->skip_byte_block != b->skip_byte_block || a->key_id_size != b->key_id_size ||
+        a->iv_size != b->iv_size || a->subsample_count != b->subsample_count)
+        return 1;
+
+    if (memcmp(a->key_id, b->key_id, a->key_id_size) != 0 ||
+        memcmp(a->iv, b->iv, a->iv_size) != 0 ||
+        memcmp(a->subsamples, b->subsamples, a->subsample_count * sizeof(a->subsamples[0])))
+        return 1;
+
+    return 0;
+}
+
+static int compare_encryption_init_info(const AVEncryptionInitInfo *a, const AVEncryptionInitInfo *b) {
+    if (!a || !b || a->system_id_size != b->system_id_size ||
+        a->num_key_ids != b->num_key_ids || a->key_id_size != b->key_id_size ||
+        a->data_size != b->data_size)
+        return 1;
+
+    if (memcmp(a->system_id, b->system_id, a->system_id_size) != 0 ||
+        memcmp(a->data, b->data, a->data_size) != 0)
+        return 1;
+
+    for (uint32_t i = 0; i < a->num_key_ids; i++) {
+        if (memcmp(a->key_ids[i], b->key_ids[i], a->key_id_size) != 0)
+            return 1;
+    }
+
+    if (a->next || b->next) {
+        if (!a->next || !b->next)
+            return 1;
+        if (compare_encryption_init_info(a->next, b->next) != 0)
+            return 1;
+    }
+
+    return 0;
+}
+
+static void run_encryption_info_test(void)
+{
+    AVEncryptionInfo *info, *copy;
+    uint8_t *side_data;
+    size_t side_data_size;
+
+    info = av_encryption_info_alloc(test_subsample_count, sizeof(test_key_id), sizeof(test_iv));
+    av_assert0(info);
+    av_assert0(info->key_id);
+    av_assert0(info->key_id_size == sizeof(test_key_id));
+    av_assert0(info->iv);
+    av_assert0(info->iv_size == sizeof(test_iv));
+    av_assert0(info->subsamples);
+    av_assert0(info->subsample_count == test_subsample_count);
+
+    info->scheme = 1234;
+    info->crypt_byte_block = 333;
+    info->skip_byte_block = 444;
+    memcpy(info->key_id, test_key_id, sizeof(test_key_id));
+    memcpy(info->iv, test_iv, sizeof(test_iv));
+    memcpy(info->subsamples, test_subsamples, sizeof(test_subsamples));
+
+    copy = av_encryption_info_clone(info);
+    av_assert0(copy);
+    av_assert0(copy != info);
+    av_assert0(compare_encryption_info(info, copy) == 0);
+    av_encryption_info_free(copy);
+
+    side_data = av_encryption_info_add_side_data(info, &side_data_size);
+    av_assert0(side_data);
+    av_assert0(side_data_size > 0);
+
+    copy = av_encryption_info_get_side_data(side_data, side_data_size);
+    av_assert0(copy);
+    av_assert0(copy != info);
+    av_assert0(compare_encryption_info(info, copy) == 0);
+    av_encryption_info_free(copy);
+    av_free(side_data);
+
+    av_encryption_info_free(info);
+}
+
+static AVEncryptionInitInfo *create_init_info(void)
+{
+    AVEncryptionInitInfo *info;
+
+    info = av_encryption_init_info_alloc(sizeof(test_system_id), 2, sizeof(test_key_id), sizeof(test_data));
+    av_assert0(info);
+    av_assert0(info->system_id);
+    av_assert0(info->system_id_size == sizeof(test_system_id));
+    av_assert0(info->key_ids);
+    av_assert0(info->num_key_ids == 2);
+    av_assert0(info->key_id_size == sizeof(test_key_id));
+    av_assert0(info->key_ids[0]);
+    av_assert0(info->key_ids[1]);
+    av_assert0(info->data);
+    av_assert0(info->data_size == sizeof(test_data));
+    av_assert0(!info->next);
+
+    memcpy(info->system_id, test_system_id, sizeof(test_system_id));
+    memcpy(info->key_ids[0], test_key_id, sizeof(test_key_id));
+    memcpy(info->key_ids[1], test_key_id_2, sizeof(test_key_id_2));
+    memcpy(info->data, test_data, sizeof(test_data));
+
+    return info;
+}
+
+static void run_encryption_init_info_test(void)
+{
+    AVEncryptionInitInfo *info, *copy;
+    uint8_t *side_data;
+    size_t side_data_size;
+
+    info = create_init_info();
+
+    side_data = av_encryption_init_info_add_side_data(info, &side_data_size);
+    av_assert0(side_data);
+    av_assert0(side_data_size > 0);
+    copy = av_encryption_init_info_get_side_data(side_data, side_data_size);
+    av_assert0(copy);
+    av_assert0(compare_encryption_init_info(info, copy) == 0);
+    av_encryption_init_info_free(copy);
+    av_free(side_data);
+
+    // Make the first init info different from the second to test the correct order.
+    memset(info->system_id, 0, info->system_id_size);
+    info->next = create_init_info();
+    side_data = av_encryption_init_info_add_side_data(info, &side_data_size);
+    av_assert0(side_data);
+    copy = av_encryption_init_info_get_side_data(side_data, side_data_size);
+    av_assert0(copy);
+    av_assert0(compare_encryption_init_info(info, copy) == 0);
+    av_encryption_init_info_free(copy);
+    av_free(side_data);
+
+    av_encryption_init_info_free(info);
+}
+
+int main(int argc, char **argv)
+{
+    run_encryption_info_test();
+    run_encryption_init_info_test();
+    return 0;
+}

diff --git a/libavutil/tests/hmac.c b/libavutil/tests/hmac.c
index 5eeb63c..0fa50e4 100644
--- a/libavutil/tests/hmac.c
+++ b/libavutil/tests/hmac.c

@@ -70,7 +70,7 @@
     }
 
     /* SHA-2 */
-    for (i = AV_HMAC_SHA224; i <= AV_HMAC_SHA256; i++) {
+    for (i = AV_HMAC_SHA224; i <= AV_HMAC_SHA512; i++) {
         hmac = av_hmac_alloc(i);
         if (!hmac)
             return 1;
@@ -83,17 +83,5 @@
         av_hmac_free(hmac);
     }
 
-    for (i = AV_HMAC_SHA384; i <= AV_HMAC_SHA512; i++) {
-        hmac = av_hmac_alloc(i);
-        if (!hmac)
-            return 1;
-        // RFC 4231 test vectors
-        test(hmac, key1, sizeof(key1), data1, sizeof(data1));
-        test(hmac, key2, sizeof(key2), data2, sizeof(data2));
-        test(hmac, key3, 20, data3, sizeof(data3));
-        test(hmac, key3, sizeof(key3), data4, sizeof(data4));
-        test(hmac, key3, sizeof(key3), data6, sizeof(data6));
-        av_hmac_free(hmac);
-    }
     return 0;
 }

diff --git a/libavutil/tests/hwdevice.c b/libavutil/tests/hwdevice.c
new file mode 100644
index 0000000..7eb355c
--- /dev/null
+++ b/libavutil/tests/hwdevice.c

@@ -0,0 +1,226 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+
+#include "libavutil/hwcontext.h"
+
+static int test_derivation(AVBufferRef *src_ref, const char *src_name)
+{
+    enum AVHWDeviceType derived_type;
+    const char *derived_name;
+    AVBufferRef *derived_ref = NULL, *back_ref = NULL;
+    AVHWDeviceContext *src_dev, *derived_dev;
+    int err;
+
+    src_dev = (AVHWDeviceContext*)src_ref->data;
+
+    derived_type = AV_HWDEVICE_TYPE_NONE;
+    while (1) {
+        derived_type = av_hwdevice_iterate_types(derived_type);
+        if (derived_type == AV_HWDEVICE_TYPE_NONE)
+            break;
+
+        derived_name = av_hwdevice_get_type_name(derived_type);
+
+        err = av_hwdevice_ctx_create_derived(&derived_ref, derived_type,
+                                             src_ref, 0);
+        if (err < 0) {
+            fprintf(stderr, "Unable to derive %s -> %s: %d.\n",
+                    src_name, derived_name, err);
+            continue;
+        }
+
+        derived_dev = (AVHWDeviceContext*)derived_ref->data;
+        if (derived_dev->type != derived_type) {
+            fprintf(stderr, "Device derived as type %d has type %d.\n",
+                    derived_type, derived_dev->type);
+            goto fail;
+        }
+
+        if (derived_type == src_dev->type) {
+            if (derived_dev != src_dev) {
+                fprintf(stderr, "Derivation of %s from itself succeeded "
+                        "but did not return the same device.\n", src_name);
+                goto fail;
+            }
+            av_buffer_unref(&derived_ref);
+            continue;
+        }
+
+        err = av_hwdevice_ctx_create_derived(&back_ref, src_dev->type,
+                                             derived_ref, 0);
+        if (err < 0) {
+            fprintf(stderr, "Derivation %s to %s succeeded, but derivation "
+                    "back again failed: %d.\n",
+                    src_name, derived_name, err);
+            goto fail;
+        }
+
+        if (back_ref->data != src_ref->data) {
+            fprintf(stderr, "Derivation %s to %s succeeded, but derivation "
+                    "back again did not return the original device.\n",
+                   src_name, derived_name);
+            goto fail;
+        }
+
+        fprintf(stderr, "Successfully tested derivation %s -> %s.\n",
+                src_name, derived_name);
+
+        av_buffer_unref(&derived_ref);
+        av_buffer_unref(&back_ref);
+    }
+
+    return 0;
+
+fail:
+    av_buffer_unref(&derived_ref);
+    av_buffer_unref(&back_ref);
+    return -1;
+}
+
+static int test_device(enum AVHWDeviceType type, const char *name,
+                       const char *device, AVDictionary *opts, int flags)
+{
+    AVBufferRef *ref;
+    AVHWDeviceContext *dev;
+    int err;
+
+    err = av_hwdevice_ctx_create(&ref, type, device, opts, flags);
+    if (err < 0) {
+        fprintf(stderr, "Failed to create %s device: %d.\n", name, err);
+        return 1;
+    }
+
+    dev = (AVHWDeviceContext*)ref->data;
+    if (dev->type != type) {
+        fprintf(stderr, "Device created as type %d has type %d.\n",
+                type, dev->type);
+        av_buffer_unref(&ref);
+        return -1;
+    }
+
+    fprintf(stderr, "Device type %s successfully created.\n", name);
+
+    err = test_derivation(ref, name);
+
+    av_buffer_unref(&ref);
+
+    return err;
+}
+
+static const struct {
+    enum AVHWDeviceType type;
+    const char *possible_devices[5];
+} test_devices[] = {
+    { AV_HWDEVICE_TYPE_CUDA,
+      { "0", "1", "2" } },
+    { AV_HWDEVICE_TYPE_DRM,
+      { "/dev/dri/card0", "/dev/dri/card1",
+        "/dev/dri/renderD128", "/dev/dri/renderD129" } },
+    { AV_HWDEVICE_TYPE_DXVA2,
+      { "0", "1", "2" } },
+    { AV_HWDEVICE_TYPE_D3D11VA,
+      { "0", "1", "2" } },
+    { AV_HWDEVICE_TYPE_OPENCL,
+      { "0.0", "0.1", "1.0", "1.1" } },
+    { AV_HWDEVICE_TYPE_VAAPI,
+      { "/dev/dri/renderD128", "/dev/dri/renderD129", ":0" } },
+};
+
+static int test_device_type(enum AVHWDeviceType type)
+{
+    enum AVHWDeviceType check;
+    const char *name;
+    int i, j, found, err;
+
+    name = av_hwdevice_get_type_name(type);
+    if (!name) {
+        fprintf(stderr, "No name available for device type %d.\n", type);
+        return -1;
+    }
+
+    check = av_hwdevice_find_type_by_name(name);
+    if (check != type) {
+        fprintf(stderr, "Type %d maps to name %s maps to type %d.\n",
+               type, name, check);
+        return -1;
+    }
+
+    found = 0;
+
+    err = test_device(type, name, NULL, NULL, 0);
+    if (err < 0) {
+        fprintf(stderr, "Test failed for %s with default options.\n", name);
+        return -1;
+    }
+    if (err == 0) {
+        fprintf(stderr, "Test passed for %s with default options.\n", name);
+        ++found;
+    }
+
+    for (i = 0; i < FF_ARRAY_ELEMS(test_devices); i++) {
+        if (test_devices[i].type != type)
+            continue;
+
+        for (j = 0; test_devices[i].possible_devices[j]; j++) {
+            err = test_device(type, name,
+                              test_devices[i].possible_devices[j],
+                              NULL, 0);
+            if (err < 0) {
+                fprintf(stderr, "Test failed for %s with device %s.\n",
+                       name, test_devices[i].possible_devices[j]);
+                return -1;
+            }
+            if (err == 0) {
+                fprintf(stderr, "Test passed for %s with device %s.\n",
+                        name, test_devices[i].possible_devices[j]);
+                ++found;
+            }
+        }
+    }
+
+    return !found;
+}
+
+int main(void)
+{
+    enum AVHWDeviceType type = AV_HWDEVICE_TYPE_NONE;
+    int pass, fail, skip, err;
+
+    pass = fail = skip = 0;
+    while (1) {
+        type = av_hwdevice_iterate_types(type);
+        if (type == AV_HWDEVICE_TYPE_NONE)
+            break;
+
+        err = test_device_type(type);
+        if (err == 0)
+            ++pass;
+        else if (err < 0)
+            ++fail;
+        else
+            ++skip;
+    }
+
+    fprintf(stderr, "Attempted to test %d device types: "
+            "%d passed, %d failed, %d skipped.\n",
+            pass + fail + skip, pass, fail, skip);
+
+    return fail > 0;
+}

diff --git a/libavutil/tests/integer.c b/libavutil/tests/integer.c
new file mode 100644
index 0000000..d2c8f2a
--- /dev/null
+++ b/libavutil/tests/integer.c

@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/integer.h"
+#include "libavutil/intmath.h"
+
+int main(void){
+    int64_t a,b;
+
+    for(a=7; a<256*256*256; a+=13215){
+        for(b=3; b<256*256*256; b+=27118){
+            AVInteger ai= av_int2i(a);
+            AVInteger bi= av_int2i(b);
+
+            av_assert0(av_i2int(ai) == a);
+            av_assert0(av_i2int(bi) == b);
+            av_assert0(av_i2int(av_add_i(ai,bi)) == a+b);
+            av_assert0(av_i2int(av_sub_i(ai,bi)) == a-b);
+            av_assert0(av_i2int(av_mul_i(ai,bi)) == a*b);
+            av_assert0(av_i2int(av_shr_i(ai, 9)) == a>>9);
+            av_assert0(av_i2int(av_shr_i(ai,-9)) == a<<9);
+            av_assert0(av_i2int(av_shr_i(ai, 17)) == a>>17);
+            av_assert0(av_i2int(av_shr_i(ai,-17)) == a<<17);
+            av_assert0(av_log2_i(ai) == av_log2(a));
+            av_assert0(av_i2int(av_div_i(ai,bi)) == a/b);
+        }
+    }
+    return 0;
+}

diff --git a/libavutil/tests/opt.c b/libavutil/tests/opt.c
index 568eb45..f4cfa59 100644
--- a/libavutil/tests/opt.c
+++ b/libavutil/tests/opt.c

@@ -98,9 +98,9 @@
 }
 
 static const AVClass test_class = {
-    "TestContext",
-    test_get_name,
-    test_options
+    .class_name = "TestContext",
+    .item_name  = test_get_name,
+    .option     = test_options,
 };
 
 static void log_callback_help(void *ptr, int level, const char *fmt, va_list vl)

diff --git a/libavutil/tests/parseutils.c b/libavutil/tests/parseutils.c
index 180f624..a1ac8d4 100644
--- a/libavutil/tests/parseutils.c
+++ b/libavutil/tests/parseutils.c

@@ -185,6 +185,10 @@
         "42.1729",
         "-1729.42",
         "12:34",
+        "2147483648s",
+        "4294967296ms",
+        "8589934592us",
+        "9223372036854775808us",
     };
 
     av_log_set_level(AV_LOG_DEBUG);

diff --git a/libavutil/tests/pixdesc.c b/libavutil/tests/pixdesc.c
index 7fbfeea..34e2bea 100644
--- a/libavutil/tests/pixdesc.c
+++ b/libavutil/tests/pixdesc.c

@@ -37,10 +37,6 @@
             skip = 0;
         }
         av_log(NULL, AV_LOG_INFO, "pix fmt %s avg_bpp:%d colortype:%d\n", desc->name, av_get_padded_bits_per_pixel(desc), get_color_type(desc));
-        if ((!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) != (desc->nb_components != 2 && desc->nb_components != 4)) {
-            av_log(NULL, AV_LOG_ERROR, "Alpha flag mismatch\n");
-            err = 1;
-        }
     }
     return err;
 }

diff --git a/libavutil/tests/pixelutils.c b/libavutil/tests/pixelutils.c
index ec4dc8f..2f80758 100644
--- a/libavutil/tests/pixelutils.c
+++ b/libavutil/tests/pixelutils.c

@@ -115,7 +115,7 @@
         goto end;
 
     /* Exact buffer sizes, to check for overreads */
-    for (i = 1; i <= 4; i++) {
+    for (i = 1; i <= 5; i++) {
         for (align = 0; align < 3; align++) {
             int size1, size2;
 

diff --git a/libavutil/thread.h b/libavutil/thread.h
index f108e20..cc5272d 100644
--- a/libavutil/thread.h
+++ b/libavutil/thread.h

@@ -134,6 +134,7 @@
 #endif
 
 #define AVMutex pthread_mutex_t
+#define AV_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
 
 #define ff_mutex_init    pthread_mutex_init
 #define ff_mutex_lock    pthread_mutex_lock
@@ -148,11 +149,12 @@
 #else
 
 #define AVMutex char
+#define AV_MUTEX_INITIALIZER 0
 
-#define ff_mutex_init(mutex, attr) (0)
-#define ff_mutex_lock(mutex) (0)
-#define ff_mutex_unlock(mutex) (0)
-#define ff_mutex_destroy(mutex) (0)
+static inline int ff_mutex_init(AVMutex *mutex, const void *attr){ return 0; }
+static inline int ff_mutex_lock(AVMutex *mutex){ return 0; }
+static inline int ff_mutex_unlock(AVMutex *mutex){ return 0; }
+static inline int ff_mutex_destroy(AVMutex *mutex){ return 0; }
 
 #define AVOnce char
 #define AV_ONCE_INIT 0

diff --git a/libavutil/threadmessage.c b/libavutil/threadmessage.c
index 872e939..764b7fb 100644
--- a/libavutil/threadmessage.c
+++ b/libavutil/threadmessage.c

@@ -102,6 +102,19 @@
 #endif
 }
 
+int av_thread_message_queue_nb_elems(AVThreadMessageQueue *mq)
+{
+#if HAVE_THREADS
+    int ret;
+    pthread_mutex_lock(&mq->lock);
+    ret = av_fifo_size(mq->fifo);
+    pthread_mutex_unlock(&mq->lock);
+    return ret / mq->elsize;
+#else
+    return AVERROR(ENOSYS);
+#endif
+}
+
 #if HAVE_THREADS
 
 static int av_thread_message_queue_send_locked(AVThreadMessageQueue *mq,

diff --git a/libavutil/threadmessage.h b/libavutil/threadmessage.h
index 8480a0a..42ce655 100644
--- a/libavutil/threadmessage.h
+++ b/libavutil/threadmessage.h

@@ -96,6 +96,14 @@
                                            void (*free_func)(void *msg));
 
 /**
+ * Return the current number of messages in the queue.
+ *
+ * @return the current number of messages or AVERROR(ENOSYS) if lavu was built
+ *         without thread support
+ */
+int av_thread_message_queue_nb_elems(AVThreadMessageQueue *mq);
+
+/**
  * Flush the message queue
  *
  * This function is mostly equivalent to reading and free-ing every message

diff --git a/libavutil/timecode.c b/libavutil/timecode.c
index c0c67c8..60077ba 100644
--- a/libavutil/timecode.c
+++ b/libavutil/timecode.c

@@ -155,7 +155,7 @@
 static int check_timecode(void *log_ctx, AVTimecode *tc)
 {
     if ((int)tc->fps <= 0) {
-        av_log(log_ctx, AV_LOG_ERROR, "Timecode frame rate must be specified\n");
+        av_log(log_ctx, AV_LOG_ERROR, "Valid timecode frame rate must be specified. Minimum value is 1\n");
         return AVERROR(EINVAL);
     }
     if ((tc->flags & AV_TIMECODE_FLAG_DROPFRAME) && tc->fps != 30 && tc->fps != 60) {
@@ -214,7 +214,7 @@
     tc->start = (hh*3600 + mm*60 + ss) * tc->fps + ff;
     if (tc->flags & AV_TIMECODE_FLAG_DROPFRAME) { /* adjust frame number */
         int tmins = 60*hh + mm;
-        tc->start -= 2 * (tmins - tmins/10);
+        tc->start -= (tc->fps == 30 ? 2 : 4) * (tmins - tmins/10);
     }
     return 0;
 }

diff --git a/libavutil/timer.h b/libavutil/timer.h
index f7ab455..0bb353c 100644
--- a/libavutil/timer.h
+++ b/libavutil/timer.h

@@ -42,7 +42,7 @@
 #include <stdint.h>
 #include <inttypes.h>
 
-#if HAVE_MACH_MACH_TIME_H
+#if HAVE_MACH_ABSOLUTE_TIME
 #include <mach/mach_time.h>
 #endif
 

diff --git a/libavutil/utils.c b/libavutil/utils.c
index 2c170db..230081e 100644
--- a/libavutil/utils.c
+++ b/libavutil/utils.c

@@ -41,9 +41,6 @@
     if (checks_done)
         return LIBAVUTIL_VERSION_INT;
 
-#if FF_API_VDPAU
-    av_assert0(AV_PIX_FMT_VDA_VLD == 81); //check if the pix fmt enum has not had anything inserted or removed by mistake
-#endif
     av_assert0(AV_SAMPLE_FMT_DBLP == 9);
     av_assert0(AVMEDIA_TYPE_ATTACHMENT == 4);
     av_assert0(AV_PICTURE_TYPE_BI == 7);

diff --git a/libavutil/version.h b/libavutil/version.h
index f594dc0..f84ec89 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h

@@ -78,10 +78,9 @@
  * @{
  */
 
-
-#define LIBAVUTIL_VERSION_MAJOR  55
-#define LIBAVUTIL_VERSION_MINOR  78
-#define LIBAVUTIL_VERSION_MICRO 100
+#define LIBAVUTIL_VERSION_MAJOR  56
+#define LIBAVUTIL_VERSION_MINOR  19
+#define LIBAVUTIL_VERSION_MICRO 101
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
                                                LIBAVUTIL_VERSION_MINOR, \
@@ -106,38 +105,29 @@
  * @{
  */
 
-#ifndef FF_API_VDPAU
-#define FF_API_VDPAU                    (LIBAVUTIL_VERSION_MAJOR < 56)
-#endif
-#ifndef FF_API_XVMC
-#define FF_API_XVMC                     (LIBAVUTIL_VERSION_MAJOR < 56)
-#endif
-#ifndef FF_API_OPT_TYPE_METADATA
-#define FF_API_OPT_TYPE_METADATA        (LIBAVUTIL_VERSION_MAJOR < 56)
-#endif
-#ifndef FF_API_DLOG
-#define FF_API_DLOG                     (LIBAVUTIL_VERSION_MAJOR < 56)
-#endif
 #ifndef FF_API_VAAPI
-#define FF_API_VAAPI                    (LIBAVUTIL_VERSION_MAJOR < 56)
+#define FF_API_VAAPI                    (LIBAVUTIL_VERSION_MAJOR < 57)
 #endif
 #ifndef FF_API_FRAME_QP
-#define FF_API_FRAME_QP                 (LIBAVUTIL_VERSION_MAJOR < 56)
+#define FF_API_FRAME_QP                 (LIBAVUTIL_VERSION_MAJOR < 57)
 #endif
 #ifndef FF_API_PLUS1_MINUS1
-#define FF_API_PLUS1_MINUS1             (LIBAVUTIL_VERSION_MAJOR < 56)
+#define FF_API_PLUS1_MINUS1             (LIBAVUTIL_VERSION_MAJOR < 57)
 #endif
 #ifndef FF_API_ERROR_FRAME
-#define FF_API_ERROR_FRAME              (LIBAVUTIL_VERSION_MAJOR < 56)
-#endif
-#ifndef FF_API_CRC_BIG_TABLE
-#define FF_API_CRC_BIG_TABLE            (LIBAVUTIL_VERSION_MAJOR < 56)
+#define FF_API_ERROR_FRAME              (LIBAVUTIL_VERSION_MAJOR < 57)
 #endif
 #ifndef FF_API_PKT_PTS
-#define FF_API_PKT_PTS                  (LIBAVUTIL_VERSION_MAJOR < 56)
+#define FF_API_PKT_PTS                  (LIBAVUTIL_VERSION_MAJOR < 57)
 #endif
 #ifndef FF_API_CRYPTO_SIZE_T
-#define FF_API_CRYPTO_SIZE_T            (LIBAVUTIL_VERSION_MAJOR < 56)
+#define FF_API_CRYPTO_SIZE_T            (LIBAVUTIL_VERSION_MAJOR < 57)
+#endif
+#ifndef FF_API_FRAME_GET_SET
+#define FF_API_FRAME_GET_SET            (LIBAVUTIL_VERSION_MAJOR < 57)
+#endif
+#ifndef FF_API_PSEUDOPAL
+#define FF_API_PSEUDOPAL                (LIBAVUTIL_VERSION_MAJOR < 57)
 #endif
 
 

diff --git a/libavutil/wchar_filename.h b/libavutil/wchar_filename.h
index 2ade321..142d50e 100644
--- a/libavutil/wchar_filename.h
+++ b/libavutil/wchar_filename.h

@@ -19,7 +19,7 @@
 #ifndef AVUTIL_WCHAR_FILENAME_H
 #define AVUTIL_WCHAR_FILENAME_H
 
-#if defined(_WIN32) && !defined(__MINGW32CE__)
+#ifdef _WIN32
 #include <windows.h>
 #include "mem.h"
 

diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index f33088c..bcd41a5 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c

@@ -97,6 +97,7 @@
     int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0;
     int family = 0, model = 0;
     union { int i[3]; char c[12]; } vendor;
+    int xcr0_lo = 0, xcr0_hi = 0;
 
     if (!cpuid_test())
         return 0; /* CPUID not supported */
@@ -126,14 +127,14 @@
             rval |= AV_CPU_FLAG_SSE4;
         if (ecx & 0x00100000 )
             rval |= AV_CPU_FLAG_SSE42;
-        if (ecx & 0x01000000 )
+        if (ecx & 0x02000000 )
             rval |= AV_CPU_FLAG_AESNI;
 #if HAVE_AVX
         /* Check OXSAVE and AVX bits */
         if ((ecx & 0x18000000) == 0x18000000) {
             /* Check for OS support */
-            xgetbv(0, eax, edx);
-            if ((eax & 0x6) == 0x6) {
+            xgetbv(0, xcr0_lo, xcr0_hi);
+            if ((xcr0_lo & 0x6) == 0x6) {
                 rval |= AV_CPU_FLAG_AVX;
                 if (ecx & 0x00001000)
                     rval |= AV_CPU_FLAG_FMA3;
@@ -147,6 +148,13 @@
 #if HAVE_AVX2
         if ((rval & AV_CPU_FLAG_AVX) && (ebx & 0x00000020))
             rval |= AV_CPU_FLAG_AVX2;
+#if HAVE_AVX512 /* F, CD, BW, DQ, VL */
+        if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */
+            if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) == 0xd0030000)
+                rval |= AV_CPU_FLAG_AVX512;
+
+        }
+#endif /* HAVE_AVX512 */
 #endif /* HAVE_AVX2 */
         /* BMI1/2 don't need OS support */
         if (ebx & 0x00000008) {
@@ -238,6 +246,8 @@
 {
     int flags = av_get_cpu_flags();
 
+    if (flags & AV_CPU_FLAG_AVX512)
+        return 64;
     if (flags & (AV_CPU_FLAG_AVX2      |
                  AV_CPU_FLAG_AVX       |
                  AV_CPU_FLAG_XOP       |

diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h
index 309b8e7..937c697 100644
--- a/libavutil/x86/cpu.h
+++ b/libavutil/x86/cpu.h

@@ -19,7 +19,6 @@
 #ifndef AVUTIL_X86_CPU_H
 #define AVUTIL_X86_CPU_H
 
-#include "config.h"
 #include "libavutil/cpu.h"
 #include "libavutil/cpu_internal.h"
 
@@ -50,6 +49,7 @@
 #define X86_FMA4(flags)             CPUEXT(flags, FMA4)
 #define X86_AVX2(flags)             CPUEXT(flags, AVX2)
 #define X86_AESNI(flags)            CPUEXT(flags, AESNI)
+#define X86_AVX512(flags)           CPUEXT(flags, AVX512)
 
 #define EXTERNAL_AMD3DNOW(flags)    CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOW)
 #define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOWEXT)
@@ -79,6 +79,7 @@
 #define EXTERNAL_AVX2_FAST(flags)   CPUEXT_SUFFIX_FAST2(flags, _EXTERNAL, AVX2, AVX)
 #define EXTERNAL_AVX2_SLOW(flags)   CPUEXT_SUFFIX_SLOW2(flags, _EXTERNAL, AVX2, AVX)
 #define EXTERNAL_AESNI(flags)       CPUEXT_SUFFIX(flags, _EXTERNAL, AESNI)
+#define EXTERNAL_AVX512(flags)      CPUEXT_SUFFIX(flags, _EXTERNAL, AVX512)
 
 #define INLINE_AMD3DNOW(flags)      CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOW)
 #define INLINE_AMD3DNOWEXT(flags)   CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOWEXT)

diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index 06d2d2c..517fd63 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm

@@ -58,6 +58,39 @@
 VECTOR_FMUL
 %endif
 
+;-----------------------------------------------------------------------------
+; void vector_dmul(double *dst, const double *src0, const double *src1, int len)
+;-----------------------------------------------------------------------------
+%macro VECTOR_DMUL 0
+cglobal vector_dmul, 4,4,4, dst, src0, src1, len
+    lea       lend, [lenq*8 - mmsize*4]
+ALIGN 16
+.loop:
+    movaps    m0,     [src0q + lenq + 0*mmsize]
+    movaps    m1,     [src0q + lenq + 1*mmsize]
+    movaps    m2,     [src0q + lenq + 2*mmsize]
+    movaps    m3,     [src0q + lenq + 3*mmsize]
+    mulpd     m0, m0, [src1q + lenq + 0*mmsize]
+    mulpd     m1, m1, [src1q + lenq + 1*mmsize]
+    mulpd     m2, m2, [src1q + lenq + 2*mmsize]
+    mulpd     m3, m3, [src1q + lenq + 3*mmsize]
+    movaps    [dstq + lenq + 0*mmsize], m0
+    movaps    [dstq + lenq + 1*mmsize], m1
+    movaps    [dstq + lenq + 2*mmsize], m2
+    movaps    [dstq + lenq + 3*mmsize], m3
+
+    sub       lenq, mmsize*4
+    jge       .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+VECTOR_DMUL
+%if HAVE_AVX_EXTERNAL
+INIT_YMM avx
+VECTOR_DMUL
+%endif
+
 ;------------------------------------------------------------------------------
 ; void ff_vector_fmac_scalar(float *dst, const float *src, float mul, int len)
 ;------------------------------------------------------------------------------

diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
index 122087a..8826e4e 100644
--- a/libavutil/x86/float_dsp_init.c
+++ b/libavutil/x86/float_dsp_init.c

@@ -29,6 +29,11 @@
 void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1,
                         int len);
 
+void ff_vector_dmul_sse2(double *dst, const double *src0, const double *src1,
+                         int len);
+void ff_vector_dmul_avx(double *dst, const double *src0, const double *src1,
+                        int len);
+
 void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul,
                                int len);
 void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul,
@@ -92,11 +97,13 @@
         fdsp->butterflies_float   = ff_butterflies_float_sse;
     }
     if (EXTERNAL_SSE2(cpu_flags)) {
+        fdsp->vector_dmul = ff_vector_dmul_sse2;
         fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_sse2;
         fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
     }
     if (EXTERNAL_AVX_FAST(cpu_flags)) {
         fdsp->vector_fmul = ff_vector_fmul_avx;
+        fdsp->vector_dmul = ff_vector_dmul_avx;
         fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
         fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
         fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_avx;

diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h
index e83971c..40743fd 100644
--- a/libavutil/x86/intmath.h
+++ b/libavutil/x86/intmath.h

@@ -47,7 +47,8 @@
 #   endif
 #   define ff_log2_16bit av_log2
 
-#if defined(__INTEL_COMPILER) || (defined(_MSC_VER) && (_MSC_VER >= 1700))
+#if defined(__INTEL_COMPILER) || (defined(_MSC_VER) && (_MSC_VER >= 1700) && \
+                                  (defined(__BMI__) || !defined(__clang__)))
 #   define ff_ctz(v) _tzcnt_u32(v)
 
 #   if ARCH_X86_64

diff --git a/libavutil/x86/pixelutils.asm b/libavutil/x86/pixelutils.asm
index 7af3007..36c57c5 100644
--- a/libavutil/x86/pixelutils.asm
+++ b/libavutil/x86/pixelutils.asm

@@ -104,8 +104,8 @@
     RET
 
 ;-------------------------------------------------------------------------------
-; int ff_pixelutils_sad_16x16_sse(const uint8_t *src1, ptrdiff_t stride1,
-;                                 const uint8_t *src2, ptrdiff_t stride2);
+; int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
+;                                  const uint8_t *src2, ptrdiff_t stride2);
 ;-------------------------------------------------------------------------------
 INIT_XMM sse2
 cglobal pixelutils_sad_16x16, 4,4,5, src1, stride1, src2, stride2
@@ -134,8 +134,8 @@
     RET
 
 ;-------------------------------------------------------------------------------
-; int ff_pixelutils_sad_[au]_16x16_sse(const uint8_t *src1, ptrdiff_t stride1,
-;                                      const uint8_t *src2, ptrdiff_t stride2);
+; int ff_pixelutils_sad_[au]_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
+;                                       const uint8_t *src2, ptrdiff_t stride2);
 ;-------------------------------------------------------------------------------
 %macro SAD_XMM_16x16 1
 INIT_XMM sse2
@@ -163,3 +163,224 @@
 
 SAD_XMM_16x16 a
 SAD_XMM_16x16 u
+
+
+%macro PROCESS_SAD_32x4_U 0
+    movu    m1,  [r2]
+    movu    m2,  [r2 + 16]
+    movu    m3,  [r0]
+    movu    m4,  [r0 + 16]
+    psadbw  m1,  m3
+    psadbw  m2,  m4
+    paddd   m1,  m2
+    paddd   m0,  m1
+    lea     r2,  [r2 + r3]
+    lea     r0,  [r0 + r1]
+
+    movu    m1,  [r2]
+    movu    m2,  [r2 + 16]
+    movu    m3,  [r0]
+    movu    m4,  [r0 + 16]
+    psadbw  m1,  m3
+    psadbw  m2,  m4
+    paddd   m1,  m2
+    paddd   m0,  m1
+    lea     r2,  [r2 + r3]
+    lea     r0,  [r0 + r1]
+
+    movu    m1,  [r2]
+    movu    m2,  [r2 + 16]
+    movu    m3,  [r0]
+    movu    m4,  [r0 + 16]
+    psadbw  m1,  m3
+    psadbw  m2,  m4
+    paddd   m1,  m2
+    paddd   m0,  m1
+    lea     r2,  [r2 + r3]
+    lea     r0,  [r0 + r1]
+
+    movu    m1,  [r2]
+    movu    m2,  [r2 + 16]
+    movu    m3,  [r0]
+    movu    m4,  [r0 + 16]
+    psadbw  m1,  m3
+    psadbw  m2,  m4
+    paddd   m1,  m2
+    paddd   m0,  m1
+    lea     r2,  [r2 + r3]
+    lea     r0,  [r0 + r1]
+%endmacro
+
+%macro PROCESS_SAD_32x4 1
+    mov%1   m1,  [r2]
+    mov%1   m2,  [r2 + 16]
+    psadbw  m1,  [r0]
+    psadbw  m2,  [r0 + 16]
+    paddd   m1,  m2
+    paddd   m0,  m1
+    lea     r2,  [r2 + r3]
+    lea     r0,  [r0 + r1]
+
+    mov%1   m1,  [r2]
+    mov%1   m2,  [r2 + 16]
+    psadbw  m1,  [r0]
+    psadbw  m2,  [r0 + 16]
+    paddd   m1,  m2
+    paddd   m0,  m1
+    lea     r2,  [r2 + r3]
+    lea     r0,  [r0 + r1]
+
+    mov%1   m1,  [r2]
+    mov%1   m2,  [r2 + 16]
+    psadbw  m1,  [r0]
+    psadbw  m2,  [r0 + 16]
+    paddd   m1,  m2
+    paddd   m0,  m1
+    lea     r2,  [r2 + r3]
+    lea     r0,  [r0 + r1]
+
+    mov%1   m1,  [r2]
+    mov%1   m2,  [r2 + 16]
+    psadbw  m1,  [r0]
+    psadbw  m2,  [r0 + 16]
+    paddd   m1,  m2
+    paddd   m0,  m1
+    lea     r2,  [r2 + r3]
+    lea     r0,  [r0 + r1]
+%endmacro
+
+;-----------------------------------------------------------------------------
+; int ff_pixelutils_sad_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
+;                                  const uint8_t *src2, ptrdiff_t stride2);
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pixelutils_sad_32x32, 4,5,5, src1, stride1, src2, stride2
+    pxor  m0,  m0
+    mov   r4d, 4
+.loop:
+    PROCESS_SAD_32x4_U
+    PROCESS_SAD_32x4_U
+    dec r4d
+    jnz .loop
+
+    movhlps m1,  m0
+    paddd   m0,  m1
+    movd    eax, m0
+    RET
+
+;-------------------------------------------------------------------------------
+; int ff_pixelutils_sad_[au]_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
+;                                       const uint8_t *src2, ptrdiff_t stride2);
+;-------------------------------------------------------------------------------
+%macro SAD_XMM_32x32 1
+INIT_XMM sse2
+cglobal pixelutils_sad_%1_32x32, 4,5,3, src1, stride1, src2, stride2
+    pxor  m0,  m0
+    mov   r4d, 4
+.loop:
+    PROCESS_SAD_32x4 %1
+    PROCESS_SAD_32x4 %1
+    dec r4d
+    jnz .loop
+
+    movhlps m1,  m0
+    paddd   m0,  m1
+    movd    eax, m0
+    RET
+%endmacro
+
+SAD_XMM_32x32 a
+SAD_XMM_32x32 u
+
+%if HAVE_AVX2_EXTERNAL
+;-------------------------------------------------------------------------------
+; int ff_pixelutils_sad_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
+;                                  const uint8_t *src2, ptrdiff_t stride2);
+;-------------------------------------------------------------------------------
+INIT_YMM avx2
+cglobal pixelutils_sad_32x32, 4,7,5, src1, stride1, src2, stride2
+    pxor            m0, m0
+    mov             r4d, 32/4
+    lea             r5, [stride1q * 3]
+    lea             r6, [stride2q * 3]
+
+.loop:
+    movu           m1, [src1q]               ; row 0 of pix0
+    movu           m2, [src2q]               ; row 0 of pix1
+    movu           m3, [src1q + stride1q]    ; row 1 of pix0
+    movu           m4, [src2q + stride2q]    ; row 1 of pix1
+
+    psadbw         m1, m2
+    psadbw         m3, m4
+    paddd          m0, m1
+    paddd          m0, m3
+
+    movu           m1, [src1q + 2 * stride1q] ; row 2 of pix0
+    movu           m2, [src2q + 2 * stride2q] ; row 2 of pix1
+    movu           m3, [src1q + r5]           ; row 3 of pix0
+    movu           m4, [src2q + r6]           ; row 3 of pix1
+
+    psadbw         m1, m2
+    psadbw         m3, m4
+    paddd          m0, m1
+    paddd          m0, m3
+
+    lea            src2q,     [src2q + 4 * stride2q]
+    lea            src1q,     [src1q + 4 * stride1q]
+
+    dec            r4d
+    jnz           .loop
+
+    vextracti128   xm1, m0, 1
+    paddd          xm0, xm1
+    pshufd         xm1, xm0, 2
+    paddd          xm0, xm1
+    movd           eax, xm0
+    RET
+
+;-------------------------------------------------------------------------------
+; int ff_pixelutils_sad_[au]_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
+;                                       const uint8_t *src2, ptrdiff_t stride2);
+;-------------------------------------------------------------------------------
+%macro SAD_AVX2_32x32 1
+INIT_YMM avx2
+cglobal pixelutils_sad_%1_32x32, 4,7,3, src1, stride1, src2, stride2
+    pxor           m0, m0
+    mov            r4d, 32/4
+    lea            r5, [stride1q * 3]
+    lea            r6, [stride2q * 3]
+
+.loop:
+    mov%1          m1, [src2q]                ; row 0 of pix1
+    psadbw         m1, [src1q]
+    mov%1          m2, [src2q + stride2q]     ; row 1 of pix1
+    psadbw         m2, [src1q + stride1q]
+
+    paddd          m0, m1
+    paddd          m0, m2
+
+    mov%1          m1, [src2q + 2 * stride2q] ; row 2 of pix1
+    psadbw         m1, [src1q + 2 * stride1q]
+    mov%1          m2, [src2q + r6]           ; row 3 of pix1
+    psadbw         m2, [src1q + r5]
+
+    paddd          m0, m1
+    paddd          m0, m2
+
+    lea            src2q,     [src2q + 4 * stride2q]
+    lea            src1q,     [src1q + 4 * stride1q]
+
+    dec            r4d
+    jnz           .loop
+
+    vextracti128   xm1, m0, 1
+    paddd          xm0, xm1
+    pshufd         xm1, xm0, 2
+    paddd          xm0, xm1
+    movd           eax, xm0
+    RET
+%endmacro
+
+SAD_AVX2_32x32 a
+SAD_AVX2_32x32 u
+%endif

diff --git a/libavutil/x86/pixelutils_init.c b/libavutil/x86/pixelutils_init.c
index c24a533..184a3a4 100644
--- a/libavutil/x86/pixelutils_init.c
+++ b/libavutil/x86/pixelutils_init.c

@@ -35,6 +35,20 @@
 int ff_pixelutils_sad_u_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
                                    const uint8_t *src2, ptrdiff_t stride2);
 
+int ff_pixelutils_sad_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
+                                 const uint8_t *src2, ptrdiff_t stride2);
+int ff_pixelutils_sad_a_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
+                                   const uint8_t *src2, ptrdiff_t stride2);
+int ff_pixelutils_sad_u_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
+                                   const uint8_t *src2, ptrdiff_t stride2);
+
+int ff_pixelutils_sad_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
+                                 const uint8_t *src2, ptrdiff_t stride2);
+int ff_pixelutils_sad_a_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
+                                   const uint8_t *src2, ptrdiff_t stride2);
+int ff_pixelutils_sad_u_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
+                                   const uint8_t *src2, ptrdiff_t stride2);
+
 void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned)
 {
     int cpu_flags = av_get_cpu_flags();
@@ -61,4 +75,20 @@
         case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1   aligned, src2   aligned
         }
     }
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        switch (aligned) {
+        case 0: sad[4] = ff_pixelutils_sad_32x32_sse2;   break; // src1 unaligned, src2 unaligned
+        case 1: sad[4] = ff_pixelutils_sad_u_32x32_sse2; break; // src1   aligned, src2 unaligned
+        case 2: sad[4] = ff_pixelutils_sad_a_32x32_sse2; break; // src1   aligned, src2   aligned
+        }
+    }
+
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        switch (aligned) {
+        case 0: sad[4] = ff_pixelutils_sad_32x32_avx2;   break; // src1 unaligned, src2 unaligned
+        case 1: sad[4] = ff_pixelutils_sad_u_32x32_avx2; break; // src1   aligned, src2 unaligned
+        case 2: sad[4] = ff_pixelutils_sad_a_32x32_avx2; break; // src1   aligned, src2   aligned
+        }
+    }
 }

diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index 6a054a3..5044ee8 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm

@@ -1,12 +1,12 @@
 ;*****************************************************************************
 ;* x86inc.asm: x264asm abstraction layer
 ;*****************************************************************************
-;* Copyright (C) 2005-2017 x264 project
+;* Copyright (C) 2005-2018 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
+;*          Henrik Gramner <henrik@gramner.com>
 ;*          Anton Mitrofanov <BugMaster@narod.ru>
 ;*          Fiona Glaser <fiona@x264.com>
-;*          Henrik Gramner <henrik@gramner.com>
 ;*
 ;* Permission to use, copy, modify, and/or distribute this software for any
 ;* purpose with or without fee is hereby granted, provided that the above
@@ -90,6 +90,10 @@
         SECTION .text
     %elifidn __OUTPUT_FORMAT__,coff
         SECTION .text
+    %elifidn __OUTPUT_FORMAT__,win32
+        SECTION .rdata align=%1
+    %elif WIN64
+        SECTION .rdata align=%1
     %else
         SECTION .rodata align=%1
     %endif
@@ -337,6 +341,8 @@
 %endmacro
 
 %define required_stack_alignment ((mmsize + 15) & ~15)
+%define vzeroupper_required (mmsize > 16 && (ARCH_X86_64 == 0 || xmm_regs_used > 16 || notcpuflag(avx512)))
+%define high_mm_regs (16*cpuflag(avx512))
 
 %macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
     %ifnum %1
@@ -450,15 +456,16 @@
 
 %macro WIN64_PUSH_XMM 0
     ; Use the shadow space to store XMM6 and XMM7, the rest needs stack space allocated.
-    %if xmm_regs_used > 6
+    %if xmm_regs_used > 6 + high_mm_regs
         movaps [rstk + stack_offset +  8], xmm6
     %endif
-    %if xmm_regs_used > 7
+    %if xmm_regs_used > 7 + high_mm_regs
         movaps [rstk + stack_offset + 24], xmm7
     %endif
-    %if xmm_regs_used > 8
+    %assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
+    %if %%xmm_regs_on_stack > 0
         %assign %%i 8
-        %rep xmm_regs_used-8
+        %rep %%xmm_regs_on_stack
             movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i
             %assign %%i %%i+1
         %endrep
@@ -467,10 +474,11 @@
 
 %macro WIN64_SPILL_XMM 1
     %assign xmm_regs_used %1
-    ASSERT xmm_regs_used <= 16
-    %if xmm_regs_used > 8
+    ASSERT xmm_regs_used <= 16 + high_mm_regs
+    %assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
+    %if %%xmm_regs_on_stack > 0
         ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
-        %assign %%pad (xmm_regs_used-8)*16 + 32
+        %assign %%pad %%xmm_regs_on_stack*16 + 32
         %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
         SUB rsp, stack_size_padded
     %endif
@@ -479,9 +487,10 @@
 
 %macro WIN64_RESTORE_XMM_INTERNAL 0
     %assign %%pad_size 0
-    %if xmm_regs_used > 8
-        %assign %%i xmm_regs_used
-        %rep xmm_regs_used-8
+    %assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
+    %if %%xmm_regs_on_stack > 0
+        %assign %%i xmm_regs_used - high_mm_regs
+        %rep %%xmm_regs_on_stack
             %assign %%i %%i-1
             movaps xmm %+ %%i, [rsp + (%%i-8)*16 + stack_size + 32]
         %endrep
@@ -494,10 +503,10 @@
             %assign %%pad_size stack_size_padded
         %endif
     %endif
-    %if xmm_regs_used > 7
+    %if xmm_regs_used > 7 + high_mm_regs
         movaps xmm7, [rsp + stack_offset - %%pad_size + 24]
     %endif
-    %if xmm_regs_used > 6
+    %if xmm_regs_used > 6 + high_mm_regs
         movaps xmm6, [rsp + stack_offset - %%pad_size +  8]
     %endif
 %endmacro
@@ -509,12 +518,12 @@
     %assign xmm_regs_used 0
 %endmacro
 
-%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32 || stack_size > 0
+%define has_epilogue regs_used > 7 || stack_size > 0 || vzeroupper_required || xmm_regs_used > 6+high_mm_regs
 
 %macro RET 0
     WIN64_RESTORE_XMM_INTERNAL
     POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
-    %if mmsize == 32
+    %if vzeroupper_required
         vzeroupper
     %endif
     AUTO_REP_RET
@@ -538,9 +547,10 @@
 DECLARE_REG 13, R12, 64
 DECLARE_REG 14, R13, 72
 
-%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names...
+%macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names...
     %assign num_args %1
     %assign regs_used %2
+    %assign xmm_regs_used %3
     ASSERT regs_used >= num_args
     SETUP_STACK_POINTER %4
     ASSERT regs_used <= 15
@@ -550,7 +560,7 @@
     DEFINE_ARGS_INTERNAL %0, %4, %5
 %endmacro
 
-%define has_epilogue regs_used > 9 || mmsize == 32 || stack_size > 0
+%define has_epilogue regs_used > 9 || stack_size > 0 || vzeroupper_required
 
 %macro RET 0
     %if stack_size_padded > 0
@@ -561,7 +571,7 @@
         %endif
     %endif
     POP_IF_USED 14, 13, 12, 11, 10, 9
-    %if mmsize == 32
+    %if vzeroupper_required
         vzeroupper
     %endif
     AUTO_REP_RET
@@ -606,7 +616,7 @@
     DEFINE_ARGS_INTERNAL %0, %4, %5
 %endmacro
 
-%define has_epilogue regs_used > 3 || mmsize == 32 || stack_size > 0
+%define has_epilogue regs_used > 3 || stack_size > 0 || vzeroupper_required
 
 %macro RET 0
     %if stack_size_padded > 0
@@ -617,7 +627,7 @@
         %endif
     %endif
     POP_IF_USED 6, 5, 4, 3
-    %if mmsize == 32
+    %if vzeroupper_required
         vzeroupper
     %endif
     AUTO_REP_RET
@@ -727,12 +737,22 @@
     %assign stack_offset 0      ; stack pointer offset relative to the return address
     %assign stack_size 0        ; amount of stack space that can be freely used inside a function
     %assign stack_size_padded 0 ; total amount of allocated stack space, including space for callee-saved xmm registers on WIN64 and alignment padding
-    %assign xmm_regs_used 0     ; number of XMM registers requested, used for dealing with callee-saved registers on WIN64
+    %assign xmm_regs_used 0     ; number of XMM registers requested, used for dealing with callee-saved registers on WIN64 and vzeroupper
     %ifnidn %3, ""
         PROLOGUE %3
     %endif
 %endmacro
 
+; Create a global symbol from a local label with the correct name mangling and type
+%macro cglobal_label 1
+    %if FORMAT_ELF
+        global current_function %+ %1:function hidden
+    %else
+        global current_function %+ %1
+    %endif
+    %1:
+%endmacro
+
 %macro cextern 1
     %xdefine %1 mangle(private_prefix %+ _ %+ %1)
     CAT_XDEFINE cglobaled_, %1, 1
@@ -803,10 +823,10 @@
 %assign cpuflags_bmi1     (1<<17)| cpuflags_avx|cpuflags_lzcnt
 %assign cpuflags_bmi2     (1<<18)| cpuflags_bmi1
 %assign cpuflags_avx2     (1<<19)| cpuflags_fma3|cpuflags_bmi2
+%assign cpuflags_avx512   (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL
 
-%assign cpuflags_cache32  (1<<20)
-%assign cpuflags_cache64  (1<<21)
-%assign cpuflags_slowctz  (1<<22)
+%assign cpuflags_cache32  (1<<21)
+%assign cpuflags_cache64  (1<<22)
 %assign cpuflags_aligned  (1<<23) ; not a cpu feature, but a function variant
 %assign cpuflags_atom     (1<<24)
 
@@ -856,11 +876,12 @@
     %endif
 %endmacro
 
-; Merge mmx and sse*
+; Merge mmx, sse*, and avx*
 ; m# is a simd register of the currently selected size
 ; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m#
 ; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m#
-; (All 3 remain in sync through SWAP.)
+; zm# is the corresponding zmm register if mmsize >= 64, otherwise the same as m#
+; (All 4 remain in sync through SWAP.)
 
 %macro CAT_XDEFINE 3
     %xdefine %1%2 %3
@@ -870,69 +891,99 @@
     %undef %1%2
 %endmacro
 
+%macro DEFINE_MMREGS 1 ; mmtype
+    %assign %%prev_mmregs 0
+    %ifdef num_mmregs
+        %assign %%prev_mmregs num_mmregs
+    %endif
+
+    %assign num_mmregs 8
+    %if ARCH_X86_64 && mmsize >= 16
+        %assign num_mmregs 16
+        %if cpuflag(avx512) || mmsize == 64
+            %assign num_mmregs 32
+        %endif
+    %endif
+
+    %assign %%i 0
+    %rep num_mmregs
+        CAT_XDEFINE m, %%i, %1 %+ %%i
+        CAT_XDEFINE nn%1, %%i, %%i
+        %assign %%i %%i+1
+    %endrep
+    %if %%prev_mmregs > num_mmregs
+        %rep %%prev_mmregs - num_mmregs
+            CAT_UNDEF m, %%i
+            CAT_UNDEF nn %+ mmtype, %%i
+            %assign %%i %%i+1
+        %endrep
+    %endif
+    %xdefine mmtype %1
+%endmacro
+
+; Prefer registers 16-31 over 0-15 to avoid having to use vzeroupper
+%macro AVX512_MM_PERMUTATION 0-1 0 ; start_reg
+    %if ARCH_X86_64 && cpuflag(avx512)
+        %assign %%i %1
+        %rep 16-%1
+            %assign %%i_high %%i+16
+            SWAP %%i, %%i_high
+            %assign %%i %%i+1
+        %endrep
+    %endif
+%endmacro
+
 %macro INIT_MMX 0-1+
     %assign avx_enabled 0
     %define RESET_MM_PERMUTATION INIT_MMX %1
     %define mmsize 8
-    %define num_mmregs 8
     %define mova movq
     %define movu movq
     %define movh movd
     %define movnta movntq
-    %assign %%i 0
-    %rep 8
-        CAT_XDEFINE m, %%i, mm %+ %%i
-        CAT_XDEFINE nnmm, %%i, %%i
-        %assign %%i %%i+1
-    %endrep
-    %rep 8
-        CAT_UNDEF m, %%i
-        CAT_UNDEF nnmm, %%i
-        %assign %%i %%i+1
-    %endrep
     INIT_CPUFLAGS %1
+    DEFINE_MMREGS mm
 %endmacro
 
 %macro INIT_XMM 0-1+
     %assign avx_enabled 0
     %define RESET_MM_PERMUTATION INIT_XMM %1
     %define mmsize 16
-    %define num_mmregs 8
-    %if ARCH_X86_64
-        %define num_mmregs 16
-    %endif
     %define mova movdqa
     %define movu movdqu
     %define movh movq
     %define movnta movntdq
-    %assign %%i 0
-    %rep num_mmregs
-        CAT_XDEFINE m, %%i, xmm %+ %%i
-        CAT_XDEFINE nnxmm, %%i, %%i
-        %assign %%i %%i+1
-    %endrep
     INIT_CPUFLAGS %1
+    DEFINE_MMREGS xmm
+    %if WIN64
+        AVX512_MM_PERMUTATION 6 ; Swap callee-saved registers with volatile registers
+    %endif
 %endmacro
 
 %macro INIT_YMM 0-1+
     %assign avx_enabled 1
     %define RESET_MM_PERMUTATION INIT_YMM %1
     %define mmsize 32
-    %define num_mmregs 8
-    %if ARCH_X86_64
-        %define num_mmregs 16
-    %endif
     %define mova movdqa
     %define movu movdqu
     %undef movh
     %define movnta movntdq
-    %assign %%i 0
-    %rep num_mmregs
-        CAT_XDEFINE m, %%i, ymm %+ %%i
-        CAT_XDEFINE nnymm, %%i, %%i
-        %assign %%i %%i+1
-    %endrep
     INIT_CPUFLAGS %1
+    DEFINE_MMREGS ymm
+    AVX512_MM_PERMUTATION
+%endmacro
+
+%macro INIT_ZMM 0-1+
+    %assign avx_enabled 1
+    %define RESET_MM_PERMUTATION INIT_ZMM %1
+    %define mmsize 64
+    %define mova movdqa
+    %define movu movdqu
+    %undef movh
+    %define movnta movntdq
+    INIT_CPUFLAGS %1
+    DEFINE_MMREGS zmm
+    AVX512_MM_PERMUTATION
 %endmacro
 
 INIT_XMM
@@ -941,18 +992,26 @@
     %define  mmmm%1   mm%1
     %define  mmxmm%1  mm%1
     %define  mmymm%1  mm%1
+    %define  mmzmm%1  mm%1
     %define xmmmm%1   mm%1
     %define xmmxmm%1 xmm%1
     %define xmmymm%1 xmm%1
+    %define xmmzmm%1 xmm%1
     %define ymmmm%1   mm%1
     %define ymmxmm%1 xmm%1
     %define ymmymm%1 ymm%1
+    %define ymmzmm%1 ymm%1
+    %define zmmmm%1   mm%1
+    %define zmmxmm%1 xmm%1
+    %define zmmymm%1 ymm%1
+    %define zmmzmm%1 zmm%1
     %define xm%1 xmm %+ m%1
     %define ym%1 ymm %+ m%1
+    %define zm%1 zmm %+ m%1
 %endmacro
 
 %assign i 0
-%rep 16
+%rep 32
     DECLARE_MMCAST i
     %assign i i+1
 %endrep
@@ -1087,12 +1146,17 @@
 ;=============================================================================
 
 %assign i 0
-%rep 16
+%rep 32
     %if i < 8
         CAT_XDEFINE sizeofmm, i, 8
+        CAT_XDEFINE regnumofmm, i, i
     %endif
     CAT_XDEFINE sizeofxmm, i, 16
     CAT_XDEFINE sizeofymm, i, 32
+    CAT_XDEFINE sizeofzmm, i, 64
+    CAT_XDEFINE regnumofxmm, i, i
+    CAT_XDEFINE regnumofymm, i, i
+    CAT_XDEFINE regnumofzmm, i, i
     %assign i i+1
 %endrep
 %undef i
@@ -1209,7 +1273,7 @@
     %endmacro
 %endmacro
 
-; Instructions with both VEX and non-VEX encodings
+; Instructions with both VEX/EVEX and legacy encodings
 ; Non-destructive instructions are written without parameters
 AVX_INSTR addpd, sse2, 1, 0, 1
 AVX_INSTR addps, sse, 1, 0, 1
@@ -1231,10 +1295,42 @@
 AVX_INSTR blendps, sse4, 1, 1, 0
 AVX_INSTR blendvpd, sse4 ; can't be emulated
 AVX_INSTR blendvps, sse4 ; can't be emulated
+AVX_INSTR cmpeqpd, sse2, 1, 0, 1
+AVX_INSTR cmpeqps, sse, 1, 0, 1
+AVX_INSTR cmpeqsd, sse2, 1, 0, 0
+AVX_INSTR cmpeqss, sse, 1, 0, 0
+AVX_INSTR cmplepd, sse2, 1, 0, 0
+AVX_INSTR cmpleps, sse, 1, 0, 0
+AVX_INSTR cmplesd, sse2, 1, 0, 0
+AVX_INSTR cmpless, sse, 1, 0, 0
+AVX_INSTR cmpltpd, sse2, 1, 0, 0
+AVX_INSTR cmpltps, sse, 1, 0, 0
+AVX_INSTR cmpltsd, sse2, 1, 0, 0
+AVX_INSTR cmpltss, sse, 1, 0, 0
+AVX_INSTR cmpneqpd, sse2, 1, 0, 1
+AVX_INSTR cmpneqps, sse, 1, 0, 1
+AVX_INSTR cmpneqsd, sse2, 1, 0, 0
+AVX_INSTR cmpneqss, sse, 1, 0, 0
+AVX_INSTR cmpnlepd, sse2, 1, 0, 0
+AVX_INSTR cmpnleps, sse, 1, 0, 0
+AVX_INSTR cmpnlesd, sse2, 1, 0, 0
+AVX_INSTR cmpnless, sse, 1, 0, 0
+AVX_INSTR cmpnltpd, sse2, 1, 0, 0
+AVX_INSTR cmpnltps, sse, 1, 0, 0
+AVX_INSTR cmpnltsd, sse2, 1, 0, 0
+AVX_INSTR cmpnltss, sse, 1, 0, 0
+AVX_INSTR cmpordpd, sse2 1, 0, 1
+AVX_INSTR cmpordps, sse 1, 0, 1
+AVX_INSTR cmpordsd, sse2 1, 0, 0
+AVX_INSTR cmpordss, sse 1, 0, 0
 AVX_INSTR cmppd, sse2, 1, 1, 0
 AVX_INSTR cmpps, sse, 1, 1, 0
 AVX_INSTR cmpsd, sse2, 1, 1, 0
 AVX_INSTR cmpss, sse, 1, 1, 0
+AVX_INSTR cmpunordpd, sse2, 1, 0, 1
+AVX_INSTR cmpunordps, sse, 1, 0, 1
+AVX_INSTR cmpunordsd, sse2, 1, 0, 0
+AVX_INSTR cmpunordss, sse, 1, 0, 0
 AVX_INSTR comisd, sse2
 AVX_INSTR comiss, sse
 AVX_INSTR cvtdq2pd, sse2
@@ -1545,6 +1641,52 @@
 FMA4_INSTR fnmadd,   pd, ps, sd, ss
 FMA4_INSTR fnmsub,   pd, ps, sd, ss
 
+; Macros for converting VEX instructions to equivalent EVEX ones.
+%macro EVEX_INSTR 2-3 0 ; vex, evex, prefer_evex
+    %macro %1 2-7 fnord, fnord, %1, %2, %3
+        %ifidn %3, fnord
+            %define %%args %1, %2
+        %elifidn %4, fnord
+            %define %%args %1, %2, %3
+        %else
+            %define %%args %1, %2, %3, %4
+        %endif
+        %assign %%evex_required cpuflag(avx512) & %7
+        %ifnum regnumof%1
+            %if regnumof%1 >= 16 || sizeof%1 > 32
+                %assign %%evex_required 1
+            %endif
+        %endif
+        %ifnum regnumof%2
+            %if regnumof%2 >= 16 || sizeof%2 > 32
+                %assign %%evex_required 1
+            %endif
+        %endif
+        %if %%evex_required
+            %6 %%args
+        %else
+            %5 %%args ; Prefer VEX over EVEX due to shorter instruction length
+        %endif
+    %endmacro
+%endmacro
+
+EVEX_INSTR vbroadcastf128, vbroadcastf32x4
+EVEX_INSTR vbroadcasti128, vbroadcasti32x4
+EVEX_INSTR vextractf128,   vextractf32x4
+EVEX_INSTR vextracti128,   vextracti32x4
+EVEX_INSTR vinsertf128,    vinsertf32x4
+EVEX_INSTR vinserti128,    vinserti32x4
+EVEX_INSTR vmovdqa,        vmovdqa32
+EVEX_INSTR vmovdqu,        vmovdqu32
+EVEX_INSTR vpand,          vpandd
+EVEX_INSTR vpandn,         vpandnd
+EVEX_INSTR vpor,           vpord
+EVEX_INSTR vpxor,          vpxord
+EVEX_INSTR vrcpps,         vrcp14ps,   1 ; EVEX versions have higher precision
+EVEX_INSTR vrcpss,         vrcp14ss,   1
+EVEX_INSTR vrsqrtps,       vrsqrt14ps, 1
+EVEX_INSTR vrsqrtss,       vrsqrt14ss, 1
+
 ; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0)
 %ifdef __YASM_VER__
     %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0

diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index e1220df..d7cd996 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm

@@ -357,7 +357,7 @@
 %endif
 %endmacro
 
-%macro ABSB 2 ; source mmreg, temp mmreg (unused for ssse3)
+%macro ABSB 2 ; source mmreg, temp mmreg (unused for SSSE3)
 %if cpuflag(ssse3)
     pabsb   %1, %1
 %else
@@ -381,7 +381,7 @@
 %endif
 %endmacro
 
-%macro ABSD2_MMX 4
+%macro ABSD2 4
     pxor    %3, %3
     pxor    %4, %4
     pcmpgtd %3, %1
@@ -475,7 +475,7 @@
 %else
     palignr %1, %2, %3
 %endif
-%elif cpuflag(mmx) ; [dst,] src1, src2, imm, tmp
+%else ; [dst,] src1, src2, imm, tmp
     %define %%dst %1
 %if %0==5
 %ifnidn %1, %2
@@ -799,37 +799,47 @@
     pminsw %1, %3
 %endmacro
 
-%macro PMINSD_MMX 3 ; dst, src, tmp
+%macro PMINSD 3 ; dst, src, tmp/unused
+%if cpuflag(sse4)
+    pminsd    %1, %2
+%elif cpuflag(sse2)
+    cvtdq2ps  %1, %1
+    minps     %1, %2
+    cvtps2dq  %1, %1
+%else
     mova      %3, %2
     pcmpgtd   %3, %1
     pxor      %1, %2
     pand      %1, %3
     pxor      %1, %2
+%endif
 %endmacro
 
-%macro PMAXSD_MMX 3 ; dst, src, tmp
+%macro PMAXSD 3 ; dst, src, tmp/unused
+%if cpuflag(sse4)
+    pmaxsd    %1, %2
+%else
     mova      %3, %1
     pcmpgtd   %3, %2
     pand      %1, %3
     pandn     %3, %2
     por       %1, %3
+%endif
 %endmacro
 
-%macro CLIPD_MMX 3-4 ; src/dst, min, max, tmp
-    PMINSD_MMX %1, %3, %4
-    PMAXSD_MMX %1, %2, %4
-%endmacro
-
-%macro CLIPD_SSE2 3-4 ; src/dst, min (float), max (float), unused
+%macro CLIPD 3-4
+%if cpuflag(sse4);  src/dst, min, max, unused
+    pminsd  %1, %3
+    pmaxsd  %1, %2
+%elif cpuflag(sse2) ; src/dst, min (float), max (float), unused
     cvtdq2ps  %1, %1
     minps     %1, %3
     maxps     %1, %2
     cvtps2dq  %1, %1
-%endmacro
-
-%macro CLIPD_SSE41 3-4 ;  src/dst, min, max, unused
-    pminsd  %1, %3
-    pmaxsd  %1, %2
+%else               ; src/dst, min, max, tmp
+    PMINSD    %1, %3, %4
+    PMAXSD    %1, %2, %4
+%endif
 %endmacro
 
 %macro VBROADCASTSS 2 ; dst xmm/ymm, src m32/xmm
@@ -880,6 +890,14 @@
 %endif
 %endmacro
 
+%macro VBROADCASTI128 2 ; dst xmm/ymm, src : 128bits val
+%if mmsize > 16
+    vbroadcasti128 %1, %2
+%else
+    mova           %1, %2
+%endif
+%endmacro
+
 %macro SHUFFLE_MASK_W 8
     %rep 8
         %if %1>=0x80

diff --git a/libavutil/xga_font_data.h b/libavutil/xga_font_data.h
index 5e40f54..69dc337 100644
--- a/libavutil/xga_font_data.h
+++ b/libavutil/xga_font_data.h

@@ -29,7 +29,7 @@
 #include <stdint.h>
 #include "internal.h"
 
-extern av_export const uint8_t avpriv_cga_font[2048];
-extern av_export const uint8_t avpriv_vga16_font[4096];
+extern av_export_avutil const uint8_t avpriv_cga_font[2048];
+extern av_export_avutil const uint8_t avpriv_vga16_font[4096];
 
 #endif /* AVUTIL_XGA_FONT_DATA_H */

diff --git a/libpostproc/version.h b/libpostproc/version.h
index e8f0abe..f372548 100644
--- a/libpostproc/version.h
+++ b/libpostproc/version.h

@@ -28,8 +28,8 @@
 
 #include "libavutil/avutil.h"
 
-#define LIBPOSTPROC_VERSION_MAJOR  54
-#define LIBPOSTPROC_VERSION_MINOR   7
+#define LIBPOSTPROC_VERSION_MAJOR  55
+#define LIBPOSTPROC_VERSION_MINOR   2
 #define LIBPOSTPROC_VERSION_MICRO 100
 
 #define LIBPOSTPROC_VERSION_INT AV_VERSION_INT(LIBPOSTPROC_VERSION_MAJOR, \
@@ -42,8 +42,4 @@
 
 #define LIBPOSTPROC_IDENT       "postproc" AV_STRINGIFY(LIBPOSTPROC_VERSION)
 
-#ifndef FF_API_QP_TYPE
-#define FF_API_QP_TYPE     (LIBPOSTPROC_VERSION_MAJOR < 55)
-#endif
-
 #endif /* POSTPROC_VERSION_H */

diff --git a/libswresample/arm/audio_convert_neon.S b/libswresample/arm/audio_convert_neon.S
index 1f88316..085d50a 100644
--- a/libswresample/arm/audio_convert_neon.S
+++ b/libswresample/arm/audio_convert_neon.S

@@ -22,6 +22,7 @@
 #include "libavutil/arm/asm.S"
 
 function swri_oldapi_conv_flt_to_s16_neon, export=1
+.L_swri_oldapi_conv_flt_to_s16_neon:
         subs            r2,  r2,  #8
         vld1.32         {q0},     [r1,:128]!
         vcvt.s32.f32    q8,  q0,  #31
@@ -66,6 +67,7 @@
 endfunc
 
 function swri_oldapi_conv_fltp_to_s16_2ch_neon, export=1
+.L_swri_oldapi_conv_fltp_to_s16_2ch_neon:
         ldm             r1,  {r1, r3}
         subs            r2,  r2,  #8
         vld1.32         {q0},     [r1,:128]!
@@ -133,8 +135,8 @@
         cmp             r3,  #2
         itt             lt
         ldrlt           r1,  [r1]
-        blt             X(swri_oldapi_conv_flt_to_s16_neon)
-        beq             X(swri_oldapi_conv_fltp_to_s16_2ch_neon)
+        blt             .L_swri_oldapi_conv_flt_to_s16_neon
+        beq             .L_swri_oldapi_conv_fltp_to_s16_2ch_neon
 
         push            {r4-r8, lr}
         cmp             r3,  #4

diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c
index 66a43c1..6b5feaa 100644
--- a/libswresample/rematrix.c
+++ b/libswresample/rematrix.c

@@ -69,8 +69,10 @@
         return AVERROR(EINVAL);
     memset(s->matrix, 0, sizeof(s->matrix));
     memset(s->matrix_flt, 0, sizeof(s->matrix_flt));
-    nb_in  = av_get_channel_layout_nb_channels(s->user_in_ch_layout);
-    nb_out = av_get_channel_layout_nb_channels(s->user_out_ch_layout);
+    nb_in = (s->user_in_ch_count > 0) ? s->user_in_ch_count :
+        av_get_channel_layout_nb_channels(s->user_in_ch_layout);
+    nb_out = (s->user_out_ch_count > 0) ? s->user_out_ch_count :
+        av_get_channel_layout_nb_channels(s->user_out_ch_layout);
     for (out = 0; out < nb_out; out++) {
         for (in = 0; in < nb_in; in++)
             s->matrix_flt[out][in] = s->matrix[out][in] = matrix[in];
@@ -382,8 +384,8 @@
 
 av_cold int swri_rematrix_init(SwrContext *s){
     int i, j;
-    int nb_in  = av_get_channel_layout_nb_channels(s->in_ch_layout);
-    int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout);
+    int nb_in  = s->used_ch_count;
+    int nb_out = s->out.ch_count;
 
     s->mix_any_f = NULL;
 
@@ -445,14 +447,23 @@
         s->mix_2_1_f = (mix_2_1_func_type*)sum2_double;
         s->mix_any_f = (mix_any_func_type*)get_mix_any_func_double(s);
     }else if(s->midbuf.fmt == AV_SAMPLE_FMT_S32P){
-        // Only for dithering currently
-//         s->native_matrix = av_calloc(nb_in * nb_out, sizeof(double));
         s->native_one    = av_mallocz(sizeof(int));
         if (!s->native_one)
             return AVERROR(ENOMEM);
-//         for (i = 0; i < nb_out; i++)
-//             for (j = 0; j < nb_in; j++)
-//                 ((double*)s->native_matrix)[i * nb_in + j] = s->matrix[i][j];
+        s->native_matrix = av_calloc(nb_in * nb_out, sizeof(int));
+        if (!s->native_matrix) {
+            av_freep(&s->native_one);
+            return AVERROR(ENOMEM);
+        }
+        for (i = 0; i < nb_out; i++) {
+            double rem = 0;
+
+            for (j = 0; j < nb_in; j++) {
+                double target = s->matrix[i][j] * 32768 + rem;
+                ((int*)s->native_matrix)[i * nb_in + j] = lrintf(target);
+                rem += target - ((int*)s->native_matrix)[i * nb_in + j];
+            }
+        }
         *((int*)s->native_one) = 32768;
         s->mix_1_1_f = (mix_1_1_func_type*)copy_s32;
         s->mix_2_1_f = (mix_2_1_func_type*)sum2_s32;

diff --git a/libswresample/swresample.c b/libswresample/swresample.c
index 74c96dc..6d28e6a 100644
--- a/libswresample/swresample.c
+++ b/libswresample/swresample.c

@@ -240,7 +240,7 @@
         &&s->int_sample_fmt != AV_SAMPLE_FMT_S64P
         &&s->int_sample_fmt != AV_SAMPLE_FMT_FLTP
         &&s->int_sample_fmt != AV_SAMPLE_FMT_DBLP){
-        av_log(s, AV_LOG_ERROR, "Requested sample format %s is not supported internally, S16/S32/S64/FLT/DBL is supported\n", av_get_sample_fmt_name(s->int_sample_fmt));
+        av_log(s, AV_LOG_ERROR, "Requested sample format %s is not supported internally, s16p/s32p/s64p/fltp/dblp are supported\n", av_get_sample_fmt_name(s->int_sample_fmt));
         return AVERROR(EINVAL);
     }
 
@@ -276,7 +276,7 @@
         && s->int_sample_fmt != AV_SAMPLE_FMT_FLTP
         && s->int_sample_fmt != AV_SAMPLE_FMT_DBLP
         && s->resample){
-        av_log(s, AV_LOG_ERROR, "Resampling only supported with internal s16/s32/flt/dbl\n");
+        av_log(s, AV_LOG_ERROR, "Resampling only supported with internal s16p/s32p/fltp/dblp\n");
         ret = AVERROR(EINVAL);
         goto fail;
     }
@@ -318,7 +318,7 @@
 
 av_assert0(s->used_ch_count);
 av_assert0(s->out.ch_count);
-    s->resample_first= RSC*s->out.ch_count/s->in.ch_count - RSC < s->out_sample_rate/(float)s-> in_sample_rate - 1.0;
+    s->resample_first= RSC*s->out.ch_count/s->used_ch_count - RSC < s->out_sample_rate/(float)s-> in_sample_rate - 1.0;
 
     s->in_buffer= s->in;
     s->silence  = s->in;
@@ -678,7 +678,7 @@
                             s->mix_2_1_simd(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_simd_one, 0, 0, len1);
                     if(out_count != len1)
                         for(ch=0; ch<preout->ch_count; ch++)
-                            s->mix_2_1_f(conv_src->ch[ch] + off, preout->ch[ch] + off, s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos + off + len1, s->native_one, 0, 0, out_count - len1);
+                            s->mix_2_1_f(conv_src->ch[ch] + off, preout->ch[ch] + off, s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos + off, s->native_one, 0, 0, out_count - len1);
                 } else {
                     for(ch=0; ch<preout->ch_count; ch++)
                         s->mix_2_1_f(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_one, 0, 0, out_count);

diff --git a/libswresample/version.h b/libswresample/version.h
index 6a66173..b8b5bee 100644
--- a/libswresample/version.h
+++ b/libswresample/version.h

@@ -28,8 +28,8 @@
 
 #include "libavutil/avutil.h"
 
-#define LIBSWRESAMPLE_VERSION_MAJOR   2
-#define LIBSWRESAMPLE_VERSION_MINOR   9
+#define LIBSWRESAMPLE_VERSION_MAJOR   3
+#define LIBSWRESAMPLE_VERSION_MINOR   2
 #define LIBSWRESAMPLE_VERSION_MICRO 100
 
 #define LIBSWRESAMPLE_VERSION_INT  AV_VERSION_INT(LIBSWRESAMPLE_VERSION_MAJOR, \

diff --git a/libswresample/x86/rematrix_init.c b/libswresample/x86/rematrix_init.c
index d71b41a..d6616f8 100644
--- a/libswresample/x86/rematrix_init.c
+++ b/libswresample/x86/rematrix_init.c

@@ -33,8 +33,8 @@
 av_cold int swri_rematrix_init_x86(struct SwrContext *s){
 #if HAVE_X86ASM
     int mm_flags = av_get_cpu_flags();
-    int nb_in  = av_get_channel_layout_nb_channels(s->in_ch_layout);
-    int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout);
+    int nb_in  = s->used_ch_count;
+    int nb_out = s->out.ch_count;
     int num    = nb_in * nb_out;
     int i,j;
 

diff --git a/libswscale/arm/rgb2yuv_neon_16.S b/libswscale/arm/rgb2yuv_neon_16.S
index 601bc9a..ad7e679 100644
--- a/libswscale/arm/rgb2yuv_neon_16.S
+++ b/libswscale/arm/rgb2yuv_neon_16.S

@@ -18,6 +18,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "config.h"
+#if HAVE_AS_DN_DIRECTIVE
 #include "rgb2yuv_neon_common.S"
 
 /* downsampled R16G16B16 x8 */
@@ -78,3 +80,4 @@
 .endm
 
     loop_420sp  rgbx, nv12, init, kernel_420_16x2, 16
+#endif

diff --git a/libswscale/arm/rgb2yuv_neon_32.S b/libswscale/arm/rgb2yuv_neon_32.S
index f51a5f1..4fd0f64 100644
--- a/libswscale/arm/rgb2yuv_neon_32.S
+++ b/libswscale/arm/rgb2yuv_neon_32.S

@@ -18,6 +18,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "config.h"
+#if HAVE_AS_DN_DIRECTIVE
 #include "rgb2yuv_neon_common.S"
 
 /* downsampled R16G16B16 x8 */
@@ -117,3 +119,4 @@
 
 
     loop_420sp  rgbx, nv12, init, kernel_420_16x2, 32
+#endif

diff --git a/libswscale/arm/swscale_unscaled.c b/libswscale/arm/swscale_unscaled.c
index e1597ab..e41f294 100644
--- a/libswscale/arm/swscale_unscaled.c
+++ b/libswscale/arm/swscale_unscaled.c

@@ -23,6 +23,7 @@
 #include "libswscale/swscale_internal.h"
 #include "libavutil/arm/cpu.h"
 
+#if HAVE_AS_DN_DIRECTIVE
 extern void rgbx_to_nv12_neon_32(const uint8_t *src, uint8_t *y, uint8_t *chroma,
                 int width, int height,
                 int y_stride, int c_stride, int src_stride,
@@ -178,3 +179,8 @@
     if (have_neon(cpu_flags))
         get_unscaled_swscale_neon(c);
 }
+#else
+void ff_get_unscaled_swscale_arm(SwsContext *c)
+{
+}
+#endif

diff --git a/libswscale/input.c b/libswscale/input.c
index bb2f493..4099c19 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c

@@ -942,6 +942,30 @@
 }
 #undef rdpx
 
+static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1,
+                                            const uint8_t *unused2, int width, uint32_t *unused)
+{
+    int i;
+    const float *src = (const float *)_src;
+    uint16_t *dst    = (uint16_t *)_dst;
+
+    for (i = 0; i < width; ++i){
+        dst[i] = av_clip_uint16(lrintf(65535.0f * src[i]));
+    }
+}
+
+static av_always_inline void grayf32ToY16_bswap_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1,
+                                                  const uint8_t *unused2, int width, uint32_t *unused)
+{
+    int i;
+    const uint32_t *src = (const uint32_t *)_src;
+    uint16_t *dst    = (uint16_t *)_dst;
+
+    for (i = 0; i < width; ++i){
+        dst[i] = av_clip_uint16(lrintf(65535.0f * av_int2float(av_bswap32(src[i]))));
+    }
+}
+
 #define rgb9plus_planar_funcs_endian(nbits, endian_name, endian)                                    \
 static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4],              \
                                                   int w, int32_t *rgb2yuv)                          \
@@ -1365,6 +1389,7 @@
     case AV_PIX_FMT_GRAY9LE:
     case AV_PIX_FMT_GRAY10LE:
     case AV_PIX_FMT_GRAY12LE:
+    case AV_PIX_FMT_GRAY14LE:
     case AV_PIX_FMT_GRAY16LE:
 
     case AV_PIX_FMT_P016LE:
@@ -1404,6 +1429,7 @@
     case AV_PIX_FMT_GRAY9BE:
     case AV_PIX_FMT_GRAY10BE:
     case AV_PIX_FMT_GRAY12BE:
+    case AV_PIX_FMT_GRAY14BE:
     case AV_PIX_FMT_GRAY16BE:
 
     case AV_PIX_FMT_P016BE:
@@ -1536,6 +1562,20 @@
     case AV_PIX_FMT_P010BE:
         c->lumToYV12 = p010BEToY_c;
         break;
+    case AV_PIX_FMT_GRAYF32LE:
+#if HAVE_BIGENDIAN
+        c->lumToYV12 = grayf32ToY16_bswap_c;
+#else
+        c->lumToYV12 = grayf32ToY16_c;
+#endif
+        break;
+    case AV_PIX_FMT_GRAYF32BE:
+#if HAVE_BIGENDIAN
+        c->lumToYV12 = grayf32ToY16_c;
+#else
+        c->lumToYV12 = grayf32ToY16_bswap_c;
+#endif
+        break;
     }
     if (c->needAlpha) {
         if (is16BPS(srcFormat) || isNBPS(srcFormat)) {

diff --git a/libswscale/output.c b/libswscale/output.c
index f30bce8..de8637a 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c

@@ -180,6 +180,133 @@
     }
 }
 
+static void yuv2p016cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
+                         const int16_t **chrUSrc, const int16_t **chrVSrc,
+                         uint8_t *dest8, int chrDstW)
+{
+    uint16_t *dest = (uint16_t*)dest8;
+    const int32_t **uSrc = (const int32_t **)chrUSrc;
+    const int32_t **vSrc = (const int32_t **)chrVSrc;
+    int shift = 15;
+    int big_endian = c->dstFormat == AV_PIX_FMT_P016BE;
+    int i, j;
+
+    for (i = 0; i < chrDstW; i++) {
+        int u = 1 << (shift - 1);
+        int v = 1 << (shift - 1);
+
+        /* See yuv2planeX_16_c_template for details. */
+        u -= 0x40000000;
+        v -= 0x40000000;
+        for (j = 0; j < chrFilterSize; j++) {
+            u += uSrc[j][i] * (unsigned)chrFilter[j];
+            v += vSrc[j][i] * (unsigned)chrFilter[j];
+        }
+
+        output_pixel(&dest[2*i]  , u, 0x8000, int);
+        output_pixel(&dest[2*i+1], v, 0x8000, int);
+    }
+}
+
+static av_always_inline void
+yuv2plane1_float_c_template(const int32_t *src, float *dest, int dstW)
+{
+    static const int big_endian = HAVE_BIGENDIAN;
+    static const int shift = 3;
+    static const float float_mult = 1.0f / 65535.0f;
+    int i, val;
+    uint16_t val_uint;
+
+    for (i = 0; i < dstW; ++i){
+        val = src[i] + (1 << (shift - 1));
+        output_pixel(&val_uint, val, 0, uint);
+        dest[i] = float_mult * (float)val_uint;
+    }
+}
+
+static av_always_inline void
+yuv2plane1_float_bswap_c_template(const int32_t *src, uint32_t *dest, int dstW)
+{
+    static const int big_endian = HAVE_BIGENDIAN;
+    static const int shift = 3;
+    static const float float_mult = 1.0f / 65535.0f;
+    int i, val;
+    uint16_t val_uint;
+
+    for (i = 0; i < dstW; ++i){
+        val = src[i] + (1 << (shift - 1));
+        output_pixel(&val_uint, val, 0, uint);
+        dest[i] = av_bswap32(av_float2int(float_mult * (float)val_uint));
+    }
+}
+
+static av_always_inline void
+yuv2planeX_float_c_template(const int16_t *filter, int filterSize, const int32_t **src,
+                            float *dest, int dstW)
+{
+    static const int big_endian = HAVE_BIGENDIAN;
+    static const int shift = 15;
+    static const float float_mult = 1.0f / 65535.0f;
+    int i, j, val;
+    uint16_t val_uint;
+
+    for (i = 0; i < dstW; ++i){
+        val = (1 << (shift - 1)) - 0x40000000;
+        for (j = 0; j < filterSize; ++j){
+            val += src[j][i] * (unsigned)filter[j];
+        }
+        output_pixel(&val_uint, val, 0x8000, int);
+        dest[i] = float_mult * (float)val_uint;
+    }
+}
+
+static av_always_inline void
+yuv2planeX_float_bswap_c_template(const int16_t *filter, int filterSize, const int32_t **src,
+                            uint32_t *dest, int dstW)
+{
+    static const int big_endian = HAVE_BIGENDIAN;
+    static const int shift = 15;
+    static const float float_mult = 1.0f / 65535.0f;
+    int i, j, val;
+    uint16_t val_uint;
+
+    for (i = 0; i < dstW; ++i){
+        val = (1 << (shift - 1)) - 0x40000000;
+        for (j = 0; j < filterSize; ++j){
+            val += src[j][i] * (unsigned)filter[j];
+        }
+        output_pixel(&val_uint, val, 0x8000, int);
+        dest[i] = av_bswap32(av_float2int(float_mult * (float)val_uint));
+    }
+}
+
+#define yuv2plane1_float(template, dest_type, BE_LE) \
+static void yuv2plane1_float ## BE_LE ## _c(const int16_t *src, uint8_t *dest, int dstW, \
+                                            const uint8_t *dither, int offset) \
+{ \
+    template((const int32_t *)src, (dest_type *)dest, dstW); \
+}
+
+#define yuv2planeX_float(template, dest_type, BE_LE) \
+static void yuv2planeX_float ## BE_LE ## _c(const int16_t *filter, int filterSize, \
+                                            const int16_t **src, uint8_t *dest, int dstW, \
+                                            const uint8_t *dither, int offset) \
+{ \
+    template(filter, filterSize, (const int32_t **)src, (dest_type *)dest, dstW); \
+}
+
+#if HAVE_BIGENDIAN
+yuv2plane1_float(yuv2plane1_float_c_template,       float,    BE)
+yuv2plane1_float(yuv2plane1_float_bswap_c_template, uint32_t, LE)
+yuv2planeX_float(yuv2planeX_float_c_template,       float,    BE)
+yuv2planeX_float(yuv2planeX_float_bswap_c_template, uint32_t, LE)
+#else
+yuv2plane1_float(yuv2plane1_float_c_template,       float,    LE)
+yuv2plane1_float(yuv2plane1_float_bswap_c_template, uint32_t, BE)
+yuv2planeX_float(yuv2planeX_float_c_template,       float,    LE)
+yuv2planeX_float(yuv2planeX_float_bswap_c_template, uint32_t, BE)
+#endif
+
 #undef output_pixel
 
 #define output_pixel(pos, val) \
@@ -2257,6 +2384,9 @@
     } else if (is16BPS(dstFormat)) {
         *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c  : yuv2planeX_16LE_c;
         *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c  : yuv2plane1_16LE_c;
+        if (dstFormat == AV_PIX_FMT_P016LE || dstFormat == AV_PIX_FMT_P016BE) {
+          *yuv2nv12cX = yuv2p016cX_c;
+        }
     } else if (isNBPS(dstFormat)) {
         if (desc->comp[0].depth == 9) {
             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c  : yuv2planeX_9LE_c;
@@ -2272,6 +2402,12 @@
             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_14BE_c  : yuv2plane1_14LE_c;
         } else
             av_assert0(0);
+    } else if (dstFormat == AV_PIX_FMT_GRAYF32BE) {
+        *yuv2planeX = yuv2planeX_floatBE_c;
+        *yuv2plane1 = yuv2plane1_floatBE_c;
+    } else if (dstFormat == AV_PIX_FMT_GRAYF32LE) {
+        *yuv2planeX = yuv2planeX_floatLE_c;
+        *yuv2plane1 = yuv2plane1_floatLE_c;
     } else {
         *yuv2plane1 = yuv2plane1_8_c;
         *yuv2planeX = yuv2planeX_8_c;

diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 9438a63..2fb2337 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c

@@ -339,6 +339,7 @@
     }
     if (!is16BPS(dstFormat) && !isNBPS(dstFormat) &&
         dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 &&
+        dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE &&
         !c->needAlpha) {
         c->yuv2planeX = yuv2planeX_altivec;
     }

diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 04b7908..eab8e6a 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c

@@ -53,6 +53,10 @@
 
 void (*shuffle_bytes_0321)(const uint8_t *src, uint8_t *dst, int src_size);
 void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*shuffle_bytes_1230)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*shuffle_bytes_3012)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*shuffle_bytes_3210)(const uint8_t *src, uint8_t *dst, int src_size);
+
 
 void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc,
                    const uint8_t *vsrc, uint8_t *dst,
@@ -319,25 +323,6 @@
     }
 }
 
-
-#define DEFINE_SHUFFLE_BYTES(a, b, c, d)                                \
-void shuffle_bytes_ ## a ## b ## c ## d(const uint8_t *src,             \
-                                        uint8_t *dst, int src_size)     \
-{                                                                       \
-    int i;                                                              \
-                                                                        \
-    for (i = 0; i < src_size; i += 4) {                                 \
-        dst[i + 0] = src[i + a];                                        \
-        dst[i + 1] = src[i + b];                                        \
-        dst[i + 2] = src[i + c];                                        \
-        dst[i + 3] = src[i + d];                                        \
-    }                                                                   \
-}
-
-DEFINE_SHUFFLE_BYTES(1, 2, 3, 0)
-DEFINE_SHUFFLE_BYTES(3, 0, 1, 2)
-DEFINE_SHUFFLE_BYTES(3, 2, 1, 0)
-
 #define DEFINE_RGB48TOBGR48(need_bswap, swap)                           \
 void rgb48tobgr48_ ## need_bswap(const uint8_t *src,                    \
                                  uint8_t *dst, int src_size)            \

diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index 6994839..3569254 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h

@@ -52,6 +52,9 @@
 
 extern void (*shuffle_bytes_0321)(const uint8_t *src, uint8_t *dst, int src_size);
 extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*shuffle_bytes_1230)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*shuffle_bytes_3012)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*shuffle_bytes_3210)(const uint8_t *src, uint8_t *dst, int src_size);
 
 void rgb64tobgr48_nobswap(const uint8_t *src, uint8_t *dst, int src_size);
 void   rgb64tobgr48_bswap(const uint8_t *src, uint8_t *dst, int src_size);
@@ -76,10 +79,6 @@
 void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size);
 void    rgb12to15(const uint8_t *src, uint8_t *dst, int src_size);
 
-void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size);
-void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, int src_size);
-void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, int src_size);
-
 void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
                       uint8_t *vdst, int width, int height, int lumStride,
                       int chromStride, int srcStride, int32_t *rgb2yuv);

diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index 499d25b..f7385e3 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c

@@ -342,6 +342,26 @@
     }
 }
 
+#if !HAVE_BIGENDIAN
+#define DEFINE_SHUFFLE_BYTES(name, a, b, c, d)                          \
+static void shuffle_bytes_##name (const uint8_t *src,                   \
+                                        uint8_t *dst, int src_size)     \
+{                                                                       \
+    int i;                                                              \
+                                                                        \
+    for (i = 0; i < src_size; i += 4) {                                 \
+        dst[i + 0] = src[i + a];                                        \
+        dst[i + 1] = src[i + b];                                        \
+        dst[i + 2] = src[i + c];                                        \
+        dst[i + 3] = src[i + d];                                        \
+    }                                                                   \
+}
+
+DEFINE_SHUFFLE_BYTES(1230_c, 1, 2, 3, 0)
+DEFINE_SHUFFLE_BYTES(3012_c, 3, 0, 1, 2)
+DEFINE_SHUFFLE_BYTES(3210_c, 3, 2, 1, 0)
+#endif
+
 static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     unsigned i;
@@ -949,6 +969,9 @@
 #else
     shuffle_bytes_0321 = shuffle_bytes_0321_c;
     shuffle_bytes_2103 = shuffle_bytes_2103_c;
+    shuffle_bytes_1230 = shuffle_bytes_1230_c;
+    shuffle_bytes_3012 = shuffle_bytes_3012_c;
+    shuffle_bytes_3210 = shuffle_bytes_3210_c;
 #endif
     rgb32tobgr16       = rgb32tobgr16_c;
     rgb32tobgr15       = rgb32tobgr15_c;

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 7f3e223..4069550 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c

@@ -74,8 +74,11 @@
     int bits            = desc->comp[0].depth - 1;
     int sh              = bits - 4;
 
-    if((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16)
-        sh= 9;
+    if ((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16) {
+        sh = 9;
+    } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
+        sh = 16 - 1 - 4;
+    }
 
     for (i = 0; i < dstW; i++) {
         int j;
@@ -99,8 +102,11 @@
     const uint16_t *src = (const uint16_t *) _src;
     int sh              = desc->comp[0].depth - 1;
 
-    if(sh<15)
-        sh= isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
+    if (sh<15) {
+        sh = isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
+    } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
+        sh = 16 - 1;
+    }
 
     for (i = 0; i < dstW; i++) {
         int j;

diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 0f51df9..4fa5938 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h

@@ -336,6 +336,8 @@
     uint32_t pal_yuv[256];
     uint32_t pal_rgb[256];
 
+    float uint2float_lut[256];
+
     /**
      * @name Scaled horizontal lines ring buffer.
      * The horizontal scaler keeps just enough scaled lines in a ring buffer
@@ -676,6 +678,17 @@
     return ((desc->flags & AV_PIX_FMT_FLAG_PLANAR) && isYUV(pix_fmt));
 }
 
+/*
+ * Identity semi-planar YUV formats. Specifically, those are YUV formats
+ * where the second and third components (U & V) are on the same plane.
+ */
+static av_always_inline int isSemiPlanarYUV(enum AVPixelFormat pix_fmt)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+    av_assert0(desc);
+    return (isPlanarYUV(pix_fmt) && desc->comp[1].plane == desc->comp[2].plane);
+}
+
 static av_always_inline int isRGB(enum AVPixelFormat pix_fmt)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
@@ -753,6 +766,13 @@
             pix_fmt == AV_PIX_FMT_MONOBLACK || pix_fmt == AV_PIX_FMT_MONOWHITE;
 }
 
+static av_always_inline int isFloat(enum AVPixelFormat pix_fmt)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+    av_assert0(desc);
+    return desc->flags & AV_PIX_FMT_FLAG_FLOAT;
+}
+
 static av_always_inline int isALPHA(enum AVPixelFormat pix_fmt)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
@@ -795,9 +815,17 @@
 
 static av_always_inline int usePal(enum AVPixelFormat pix_fmt)
 {
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    av_assert0(desc);
-    return (desc->flags & AV_PIX_FMT_FLAG_PAL) || (desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL);
+    switch (pix_fmt) {
+    case AV_PIX_FMT_PAL8:
+    case AV_PIX_FMT_BGR4_BYTE:
+    case AV_PIX_FMT_BGR8:
+    case AV_PIX_FMT_GRAY8:
+    case AV_PIX_FMT_RGB4_BYTE:
+    case AV_PIX_FMT_RGB8:
+        return 1;
+    default:
+        return 0;
+    }
 }
 
 extern const uint64_t ff_dither4[2];

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ef36aec..4b3cd71 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c

@@ -110,24 +110,6 @@
   { 112, 16,104,  8,118, 22,110, 14,},
 }};
 
-static const uint16_t dither_scale[15][16]={
-{    2,    3,    3,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,},
-{    2,    3,    7,    7,   13,   13,   25,   25,   25,   25,   25,   25,   25,   25,   25,   25,},
-{    3,    3,    4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  113,  113,  113,  113,},
-{    3,    4,    4,    5,   31,   31,   61,  121,  241,  241,  241,  241,  481,  481,  481,  481,},
-{    3,    4,    5,    5,    6,   63,   63,  125,  249,  497,  993,  993,  993,  993,  993, 1985,},
-{    3,    5,    6,    6,    6,    7,  127,  127,  253,  505, 1009, 2017, 4033, 4033, 4033, 4033,},
-{    3,    5,    6,    7,    7,    7,    8,  255,  255,  509, 1017, 2033, 4065, 8129,16257,16257,},
-{    3,    5,    6,    8,    8,    8,    8,    9,  511,  511, 1021, 2041, 4081, 8161,16321,32641,},
-{    3,    5,    7,    8,    9,    9,    9,    9,   10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
-{    3,    5,    7,    8,   10,   10,   10,   10,   10,   11, 2047, 2047, 4093, 8185,16369,32737,},
-{    3,    5,    7,    8,   10,   11,   11,   11,   11,   11,   12, 4095, 4095, 8189,16377,32753,},
-{    3,    5,    7,    9,   10,   12,   12,   12,   12,   12,   12,   13, 8191, 8191,16381,32761,},
-{    3,    5,    7,    9,   10,   12,   13,   13,   13,   13,   13,   13,   14,16383,16383,32765,},
-{    3,    5,    7,    9,   10,   12,   14,   14,   14,   14,   14,   14,   14,   15,32767,32767,},
-{    3,    5,    7,    9,   11,   12,   14,   15,   15,   15,   15,   15,   15,   15,   16,65535,},
-};
-
 
 static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
                       uint8_t val)
@@ -198,16 +180,28 @@
     return srcSliceH;
 }
 
-static int planarToP010Wrapper(SwsContext *c, const uint8_t *src8[],
+static int planarToP01xWrapper(SwsContext *c, const uint8_t *src8[],
                                int srcStride[], int srcSliceY,
                                int srcSliceH, uint8_t *dstParam8[],
                                int dstStride[])
 {
+    const AVPixFmtDescriptor *src_format = av_pix_fmt_desc_get(c->srcFormat);
+    const AVPixFmtDescriptor *dst_format = av_pix_fmt_desc_get(c->dstFormat);
     const uint16_t **src = (const uint16_t**)src8;
     uint16_t *dstY = (uint16_t*)(dstParam8[0] + dstStride[0] * srcSliceY);
     uint16_t *dstUV = (uint16_t*)(dstParam8[1] + dstStride[1] * srcSliceY / 2);
     int x, y;
 
+    /* Calculate net shift required for values. */
+    const int shift[3] = {
+        dst_format->comp[0].depth + dst_format->comp[0].shift -
+        src_format->comp[0].depth - src_format->comp[0].shift,
+        dst_format->comp[1].depth + dst_format->comp[1].shift -
+        src_format->comp[1].depth - src_format->comp[1].shift,
+        dst_format->comp[2].depth + dst_format->comp[2].shift -
+        src_format->comp[2].depth - src_format->comp[2].shift,
+    };
+
     av_assert0(!(srcStride[0] % 2 || srcStride[1] % 2 || srcStride[2] % 2 ||
                  dstStride[0] % 2 || dstStride[1] % 2));
 
@@ -215,7 +209,7 @@
         uint16_t *tdstY = dstY;
         const uint16_t *tsrc0 = src[0];
         for (x = c->srcW; x > 0; x--) {
-            *tdstY++ = *tsrc0++ << 6;
+            *tdstY++ = *tsrc0++ << shift[0];
         }
         src[0] += srcStride[0] / 2;
         dstY += dstStride[0] / 2;
@@ -225,8 +219,8 @@
             const uint16_t *tsrc1 = src[1];
             const uint16_t *tsrc2 = src[2];
             for (x = c->srcW / 2; x > 0; x--) {
-                *tdstUV++ = *tsrc1++ << 6;
-                *tdstUV++ = *tsrc2++ << 6;
+                *tdstUV++ = *tsrc1++ << shift[1];
+                *tdstUV++ = *tsrc2++ << shift[2];
             }
             src[1] += srcStride[1] / 2;
             src[2] += srcStride[2] / 2;
@@ -922,6 +916,91 @@
     }
 }
 
+static void gbraptopacked32(const uint8_t *src[], int srcStride[],
+                            uint8_t *dst, int dstStride, int srcSliceH,
+                            int alpha_first, int width)
+{
+    int x, h, i;
+    for (h = 0; h < srcSliceH; h++) {
+        uint8_t *dest = dst + dstStride * h;
+
+        if (alpha_first) {
+            for (x = 0; x < width; x++) {
+                *dest++ = src[3][x];
+                *dest++ = src[0][x];
+                *dest++ = src[1][x];
+                *dest++ = src[2][x];
+            }
+        } else {
+            for (x = 0; x < width; x++) {
+                *dest++ = src[0][x];
+                *dest++ = src[1][x];
+                *dest++ = src[2][x];
+                *dest++ = src[3][x];
+            }
+        }
+
+        for (i = 0; i < 4; i++)
+            src[i] += srcStride[i];
+    }
+}
+
+static int planarRgbaToRgbWrapper(SwsContext *c, const uint8_t *src[],
+                                  int srcStride[], int srcSliceY, int srcSliceH,
+                                  uint8_t *dst[], int dstStride[])
+{
+    int alpha_first = 0;
+    const uint8_t *src102[] = { src[1], src[0], src[2], src[3] };
+    const uint8_t *src201[] = { src[2], src[0], src[1], src[3] };
+    int stride102[] = { srcStride[1], srcStride[0], srcStride[2], srcStride[3] };
+    int stride201[] = { srcStride[2], srcStride[0], srcStride[1], srcStride[3] };
+
+    if (c->srcFormat != AV_PIX_FMT_GBRAP) {
+        av_log(c, AV_LOG_ERROR, "unsupported planar RGB conversion %s -> %s\n",
+               av_get_pix_fmt_name(c->srcFormat),
+               av_get_pix_fmt_name(c->dstFormat));
+        return srcSliceH;
+    }
+
+    switch (c->dstFormat) {
+    case AV_PIX_FMT_BGR24:
+        gbr24ptopacked24(src102, stride102,
+                         dst[0] + srcSliceY * dstStride[0], dstStride[0],
+                         srcSliceH, c->srcW);
+        break;
+
+    case AV_PIX_FMT_RGB24:
+        gbr24ptopacked24(src201, stride201,
+                         dst[0] + srcSliceY * dstStride[0], dstStride[0],
+                         srcSliceH, c->srcW);
+        break;
+
+    case AV_PIX_FMT_ARGB:
+        alpha_first = 1;
+    case AV_PIX_FMT_RGBA:
+        gbraptopacked32(src201, stride201,
+                        dst[0] + srcSliceY * dstStride[0], dstStride[0],
+                        srcSliceH, alpha_first, c->srcW);
+        break;
+
+    case AV_PIX_FMT_ABGR:
+        alpha_first = 1;
+    case AV_PIX_FMT_BGRA:
+        gbraptopacked32(src102, stride102,
+                        dst[0] + srcSliceY * dstStride[0], dstStride[0],
+                        srcSliceH, alpha_first, c->srcW);
+        break;
+
+    default:
+        av_log(c, AV_LOG_ERROR,
+               "unsupported planar RGB conversion %s -> %s\n",
+               av_get_pix_fmt_name(c->srcFormat),
+               av_get_pix_fmt_name(c->dstFormat));
+    }
+
+    return srcSliceH;
+}
+
 static int planarRgbToRgbWrapper(SwsContext *c, const uint8_t *src[],
                                  int srcStride[], int srcSliceY, int srcSliceH,
                                  uint8_t *dst[], int dstStride[])
@@ -1473,6 +1552,46 @@
     return srcSliceH;
 }
 
+static int uint_y_to_float_y_wrapper(SwsContext *c, const uint8_t *src[],
+                                     int srcStride[], int srcSliceY,
+                                     int srcSliceH, uint8_t *dst[], int dstStride[])
+{
+    int y, x;
+    ptrdiff_t dstStrideFloat = dstStride[0] >> 2;
+    const uint8_t *srcPtr = src[0];
+    float *dstPtr = (float *)(dst[0] + dstStride[0] * srcSliceY);
+
+    for (y = 0; y < srcSliceH; ++y){
+        for (x = 0; x < c->srcW; ++x){
+            dstPtr[x] = c->uint2float_lut[srcPtr[x]];
+        }
+        srcPtr += srcStride[0];
+        dstPtr += dstStrideFloat;
+    }
+
+    return srcSliceH;
+}
+
+static int float_y_to_uint_y_wrapper(SwsContext *c, const uint8_t* src[],
+                                     int srcStride[], int srcSliceY,
+                                     int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    int y, x;
+    ptrdiff_t srcStrideFloat = srcStride[0] >> 2;
+    const float *srcPtr = (const float *)src[0];
+    uint8_t *dstPtr = dst[0] + dstStride[0] * srcSliceY;
+
+    for (y = 0; y < srcSliceH; ++y){
+        for (x = 0; x < c->srcW; ++x){
+            dstPtr[x] = av_clip_uint8(lrintf(255.0f * srcPtr[x]));
+        }
+        srcPtr += srcStrideFloat;
+        dstPtr += dstStride[0];
+    }
+
+    return srcSliceH;
+}
+
 /* unscaled copy like stuff (assumes nearly identical formats) */
 static int packedCopyWrapper(SwsContext *c, const uint8_t *src[],
                              int srcStride[], int srcSliceY, int srcSliceH,
@@ -1502,24 +1621,63 @@
 }
 
 #define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
-    uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
-    int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
-    for (i = 0; i < height; i++) {\
-        const uint8_t *dither= dithers[src_depth-9][i&7];\
-        for (j = 0; j < length-7; j+=8){\
-            dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
-            dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\
-            dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\
-            dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\
-            dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\
-            dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\
-            dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\
-            dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\
+    unsigned shift= src_depth-dst_depth, tmp;\
+    if (c->dither == SWS_DITHER_NONE) {\
+        for (i = 0; i < height; i++) {\
+            for (j = 0; j < length-7; j+=8) {\
+                dst[j+0] = dbswap(bswap(src[j+0])>>shift);\
+                dst[j+1] = dbswap(bswap(src[j+1])>>shift);\
+                dst[j+2] = dbswap(bswap(src[j+2])>>shift);\
+                dst[j+3] = dbswap(bswap(src[j+3])>>shift);\
+                dst[j+4] = dbswap(bswap(src[j+4])>>shift);\
+                dst[j+5] = dbswap(bswap(src[j+5])>>shift);\
+                dst[j+6] = dbswap(bswap(src[j+6])>>shift);\
+                dst[j+7] = dbswap(bswap(src[j+7])>>shift);\
+            }\
+            for (; j < length; j++) {\
+                dst[j] = dbswap(bswap(src[j])>>shift);\
+            }\
+            dst += dstStride;\
+            src += srcStride;\
         }\
-        for (; j < length; j++)\
-            dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\
-        dst += dstStride;\
-        src += srcStride;\
+    } else if (shiftonly) {\
+        for (i = 0; i < height; i++) {\
+            const uint8_t *dither= dithers[shift-1][i&7];\
+            for (j = 0; j < length-7; j+=8) {\
+                tmp = (bswap(src[j+0]) + dither[0])>>shift; dst[j+0] = dbswap(tmp - (tmp>>dst_depth));\
+                tmp = (bswap(src[j+1]) + dither[1])>>shift; dst[j+1] = dbswap(tmp - (tmp>>dst_depth));\
+                tmp = (bswap(src[j+2]) + dither[2])>>shift; dst[j+2] = dbswap(tmp - (tmp>>dst_depth));\
+                tmp = (bswap(src[j+3]) + dither[3])>>shift; dst[j+3] = dbswap(tmp - (tmp>>dst_depth));\
+                tmp = (bswap(src[j+4]) + dither[4])>>shift; dst[j+4] = dbswap(tmp - (tmp>>dst_depth));\
+                tmp = (bswap(src[j+5]) + dither[5])>>shift; dst[j+5] = dbswap(tmp - (tmp>>dst_depth));\
+                tmp = (bswap(src[j+6]) + dither[6])>>shift; dst[j+6] = dbswap(tmp - (tmp>>dst_depth));\
+                tmp = (bswap(src[j+7]) + dither[7])>>shift; dst[j+7] = dbswap(tmp - (tmp>>dst_depth));\
+            }\
+            for (; j < length; j++) {\
+                tmp = (bswap(src[j]) + dither[j&7])>>shift; dst[j] = dbswap(tmp - (tmp>>dst_depth));\
+            }\
+            dst += dstStride;\
+            src += srcStride;\
+        }\
+    } else {\
+        for (i = 0; i < height; i++) {\
+            const uint8_t *dither= dithers[shift-1][i&7];\
+            for (j = 0; j < length-7; j+=8) {\
+                tmp = bswap(src[j+0]); dst[j+0] = dbswap((tmp - (tmp>>dst_depth) + dither[0])>>shift);\
+                tmp = bswap(src[j+1]); dst[j+1] = dbswap((tmp - (tmp>>dst_depth) + dither[1])>>shift);\
+                tmp = bswap(src[j+2]); dst[j+2] = dbswap((tmp - (tmp>>dst_depth) + dither[2])>>shift);\
+                tmp = bswap(src[j+3]); dst[j+3] = dbswap((tmp - (tmp>>dst_depth) + dither[3])>>shift);\
+                tmp = bswap(src[j+4]); dst[j+4] = dbswap((tmp - (tmp>>dst_depth) + dither[4])>>shift);\
+                tmp = bswap(src[j+5]); dst[j+5] = dbswap((tmp - (tmp>>dst_depth) + dither[5])>>shift);\
+                tmp = bswap(src[j+6]); dst[j+6] = dbswap((tmp - (tmp>>dst_depth) + dither[6])>>shift);\
+                tmp = bswap(src[j+7]); dst[j+7] = dbswap((tmp - (tmp>>dst_depth) + dither[7])>>shift);\
+            }\
+            for (; j < length; j++) {\
+                tmp = bswap(src[j]); dst[j] = dbswap((tmp - (tmp>>dst_depth) + dither[j&7])>>shift);\
+            }\
+            dst += dstStride;\
+            src += srcStride;\
+        }\
     }
 
 static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
@@ -1717,14 +1875,17 @@
         !(flags & SWS_ACCURATE_RND) && (c->dither == SWS_DITHER_BAYER || c->dither == SWS_DITHER_AUTO) && !(dstH & 1)) {
         c->swscale = ff_yuv2rgb_get_func_ptr(c);
     }
-    /* yuv420p10_to_p010 */
-    if ((srcFormat == AV_PIX_FMT_YUV420P10 || srcFormat == AV_PIX_FMT_YUVA420P10) &&
-        dstFormat == AV_PIX_FMT_P010) {
-        c->swscale = planarToP010Wrapper;
+    /* yuv420p1x_to_p01x */
+    if ((srcFormat == AV_PIX_FMT_YUV420P10 || srcFormat == AV_PIX_FMT_YUVA420P10 ||
+         srcFormat == AV_PIX_FMT_YUV420P12 ||
+         srcFormat == AV_PIX_FMT_YUV420P14 ||
+         srcFormat == AV_PIX_FMT_YUV420P16 || srcFormat == AV_PIX_FMT_YUVA420P16) &&
+        (dstFormat == AV_PIX_FMT_P010 || dstFormat == AV_PIX_FMT_P016)) {
+        c->swscale = planarToP01xWrapper;
     }
-    /* yuv420p_to_p010le */
+    /* yuv420p_to_p01xle */
     if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUVA420P) &&
-        dstFormat == AV_PIX_FMT_P010LE) {
+        (dstFormat == AV_PIX_FMT_P010LE || dstFormat == AV_PIX_FMT_P016LE)) {
         c->swscale = planar8ToP01xleWrapper;
     }
 
@@ -1761,6 +1922,9 @@
     if (srcFormat == AV_PIX_FMT_GBRP && isPlanar(srcFormat) && isByteRGB(dstFormat))
         c->swscale = planarRgbToRgbWrapper;
 
+    if (srcFormat == AV_PIX_FMT_GBRAP && isByteRGB(dstFormat))
+        c->swscale = planarRgbaToRgbWrapper;
+
     if ((srcFormat == AV_PIX_FMT_RGB48LE  || srcFormat == AV_PIX_FMT_RGB48BE  ||
          srcFormat == AV_PIX_FMT_BGR48LE  || srcFormat == AV_PIX_FMT_BGR48BE  ||
          srcFormat == AV_PIX_FMT_RGBA64LE || srcFormat == AV_PIX_FMT_RGBA64BE ||
@@ -1816,6 +1980,7 @@
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GRAY9)  ||
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GRAY10) ||
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GRAY12) ||
+        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GRAY14) ||
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GRAY16) ||
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YA16)   ||
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_AYUV64) ||
@@ -1862,6 +2027,16 @@
             c->swscale = yuv422pToUyvyWrapper;
     }
 
+    /* uint Y to float Y */
+    if (srcFormat == AV_PIX_FMT_GRAY8 && dstFormat == AV_PIX_FMT_GRAYF32){
+        c->swscale = uint_y_to_float_y_wrapper;
+    }
+
+    /* float Y to uint Y */
+    if (srcFormat == AV_PIX_FMT_GRAYF32 && dstFormat == AV_PIX_FMT_GRAY8){
+        c->swscale = float_y_to_uint_y_wrapper;
+    }
+
     /* LQ converters if -sws 0 or -sws 4*/
     if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)) {
         /* yv12_to_yuy2 */
@@ -1888,18 +2063,13 @@
     if ( srcFormat == dstFormat ||
         (srcFormat == AV_PIX_FMT_YUVA420P && dstFormat == AV_PIX_FMT_YUV420P) ||
         (srcFormat == AV_PIX_FMT_YUV420P && dstFormat == AV_PIX_FMT_YUVA420P) ||
-        (isPlanarYUV(srcFormat) && isPlanarGray(dstFormat)) ||
+        (isFloat(srcFormat) == isFloat(dstFormat)) && ((isPlanarYUV(srcFormat) && isPlanarGray(dstFormat)) ||
         (isPlanarYUV(dstFormat) && isPlanarGray(srcFormat)) ||
         (isPlanarGray(dstFormat) && isPlanarGray(srcFormat)) ||
         (isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat) &&
          c->chrDstHSubSample == c->chrSrcHSubSample &&
          c->chrDstVSubSample == c->chrSrcVSubSample &&
-         dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 &&
-         dstFormat != AV_PIX_FMT_P010LE && dstFormat != AV_PIX_FMT_P010BE &&
-         dstFormat != AV_PIX_FMT_P016LE && dstFormat != AV_PIX_FMT_P016BE &&
-         srcFormat != AV_PIX_FMT_NV12 && srcFormat != AV_PIX_FMT_NV21 &&
-         srcFormat != AV_PIX_FMT_P010LE && srcFormat != AV_PIX_FMT_P010BE &&
-         srcFormat != AV_PIX_FMT_P016LE && srcFormat != AV_PIX_FMT_P016BE))
+         !isSemiPlanarYUV(srcFormat) && !isSemiPlanarYUV(dstFormat))))
     {
         if (isPacked(c->srcFormat))
             c->swscale = packedCopyWrapper;

diff --git a/libswscale/tests/swscale.c b/libswscale/tests/swscale.c
index b4b8173..e72c4c3 100644
--- a/libswscale/tests/swscale.c
+++ b/libswscale/tests/swscale.c

@@ -55,8 +55,8 @@
      (x) == AV_PIX_FMT_RGB32_1 ||         \
      (x) == AV_PIX_FMT_YUVA420P)
 
-static uint64_t getSSD(const uint8_t *src1, const uint8_t *src2, int stride1,
-                       int stride2, int w, int h)
+static uint64_t getSSD(const uint8_t *src1, const uint8_t *src2,
+                       int stride1, int stride2, int w, int h)
 {
     int x, y;
     uint64_t ssd = 0;
@@ -80,7 +80,7 @@
 
 // test by ref -> src -> dst -> out & compare out against ref
 // ref & out are YV12
-static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
+static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h,
                   enum AVPixelFormat srcFormat, enum AVPixelFormat dstFormat,
                   int srcW, int srcH, int dstW, int dstH, int flags,
                   struct Results *r)
@@ -90,7 +90,7 @@
     const AVPixFmtDescriptor *desc_dst      = av_pix_fmt_desc_get(dstFormat);
     static enum AVPixelFormat cur_srcFormat;
     static int cur_srcW, cur_srcH;
-    static uint8_t *src[4];
+    static const uint8_t *src[4];
     static int srcStride[4];
     uint8_t *dst[4] = { 0 };
     uint8_t *out[4] = { 0 };
@@ -132,7 +132,8 @@
             res = -1;
             goto end;
         }
-        sws_scale(srcContext, (const uint8_t * const*)ref, refStride, 0, h, src, srcStride);
+        sws_scale(srcContext, ref, refStride, 0, h,
+                  (uint8_t * const *) src, srcStride);
         sws_freeContext(srcContext);
 
         cur_srcFormat = srcFormat;
@@ -211,7 +212,8 @@
             res = -1;
             goto end;
         }
-        sws_scale(outContext, (const uint8_t * const*)dst, dstStride, 0, dstH, out, refStride);
+        sws_scale(outContext, (const uint8_t * const *) dst, dstStride, 0, dstH,
+                  out, refStride);
 
         ssdY = getSSD(ref[0], out[0], refStride[0], refStride[0], w, h);
         if (hasChroma(srcFormat) && hasChroma(dstFormat)) {
@@ -249,7 +251,8 @@
     return res;
 }
 
-static void selfTest(uint8_t *ref[4], int refStride[4], int w, int h,
+static void selfTest(const uint8_t * const ref[4], int refStride[4],
+                     int w, int h,
                      enum AVPixelFormat srcFormat_in,
                      enum AVPixelFormat dstFormat_in)
 {
@@ -299,7 +302,8 @@
     }
 }
 
-static int fileTest(uint8_t *ref[4], int refStride[4], int w, int h, FILE *fp,
+static int fileTest(const uint8_t * const ref[4], int refStride[4],
+                    int w, int h, FILE *fp,
                     enum AVPixelFormat srcFormat_in,
                     enum AVPixelFormat dstFormat_in)
 {
@@ -362,7 +366,7 @@
     const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL };
     int rgb_stride[4]   = { 4 * W, 0, 0, 0 };
     uint8_t *data       = av_malloc(4 * W * H);
-    uint8_t *src[4]     = { data, data + W * H, data + W * H * 2, data + W * H * 3 };
+    const uint8_t * const src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 };
     int stride[4]       = { W, W, W, W };
     int x, y;
     struct SwsContext *sws;
@@ -418,7 +422,7 @@
     for (y = 0; y < H; y++)
         for (x = 0; x < W * 4; x++)
             rgb_data[ x + y * 4 * W] = av_lfg_get(&rand);
-    sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, src, stride);
+    sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, (uint8_t * const *) src, stride);
     sws_freeContext(sws);
     av_free(rgb_data);
 

diff --git a/libswscale/utils.c b/libswscale/utils.c
index dcab707..5e56371 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c

@@ -27,7 +27,7 @@
 #include <math.h>
 #include <stdio.h>
 #include <string.h>
-#if HAVE_SYS_MMAN_H
+#if HAVE_MMAP
 #include <sys/mman.h>
 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
 #define MAP_ANONYMOUS MAP_ANON
@@ -137,6 +137,8 @@
     [AV_PIX_FMT_GRAY10LE]    = { 1, 1 },
     [AV_PIX_FMT_GRAY12BE]    = { 1, 1 },
     [AV_PIX_FMT_GRAY12LE]    = { 1, 1 },
+    [AV_PIX_FMT_GRAY14BE]    = { 1, 1 },
+    [AV_PIX_FMT_GRAY14LE]    = { 1, 1 },
     [AV_PIX_FMT_GRAY16BE]    = { 1, 1 },
     [AV_PIX_FMT_GRAY16LE]    = { 1, 1 },
     [AV_PIX_FMT_YUV440P]     = { 1, 1 },
@@ -254,8 +256,10 @@
     [AV_PIX_FMT_AYUV64LE]    = { 1, 1},
     [AV_PIX_FMT_P010LE]      = { 1, 1 },
     [AV_PIX_FMT_P010BE]      = { 1, 1 },
-    [AV_PIX_FMT_P016LE]      = { 1, 0 },
-    [AV_PIX_FMT_P016BE]      = { 1, 0 },
+    [AV_PIX_FMT_P016LE]      = { 1, 1 },
+    [AV_PIX_FMT_P016BE]      = { 1, 1 },
+    [AV_PIX_FMT_GRAYF32LE]   = { 1, 1 },
+    [AV_PIX_FMT_GRAYF32BE]   = { 1, 1 },
 };
 
 int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
@@ -1024,6 +1028,8 @@
     case AV_PIX_FMT_GRAY10BE:
     case AV_PIX_FMT_GRAY12LE:
     case AV_PIX_FMT_GRAY12BE:
+    case AV_PIX_FMT_GRAY14LE:
+    case AV_PIX_FMT_GRAY14BE:
     case AV_PIX_FMT_GRAY16LE:
     case AV_PIX_FMT_GRAY16BE:
     case AV_PIX_FMT_YA16BE:
@@ -1169,6 +1175,7 @@
     const AVPixFmtDescriptor *desc_dst;
     int ret = 0;
     enum AVPixelFormat tmpFmt;
+    static const float float_mult = 1.0f / 255.0f;
 
     cpu_flags = av_get_cpu_flags();
     flags     = c->flags;
@@ -1533,6 +1540,19 @@
         }
     }
 
+    if (unscaled && c->srcBpc == 8 && dstFormat == AV_PIX_FMT_GRAYF32){
+        for (i = 0; i < 256; ++i){
+            c->uint2float_lut[i] = (float)i * float_mult;
+        }
+    }
+
+    // float will be converted to uint16_t
+    if ((srcFormat == AV_PIX_FMT_GRAYF32BE || srcFormat == AV_PIX_FMT_GRAYF32LE) &&
+        (!unscaled || unscaled && dstFormat != srcFormat && (srcFormat != AV_PIX_FMT_GRAYF32 ||
+        dstFormat != AV_PIX_FMT_GRAY8))){
+        c->srcBpc = 16;
+    }
+
     if (CONFIG_SWSCALE_ALPHA && isALPHA(srcFormat) && !isALPHA(dstFormat)) {
         enum AVPixelFormat tmpFormat = alphaless_fmt(srcFormat);
 
@@ -1789,7 +1809,9 @@
 
     /* unscaled special cases */
     if (unscaled && !usesHFilter && !usesVFilter &&
-        (c->srcRange == c->dstRange || isAnyRGB(dstFormat))) {
+        (c->srcRange == c->dstRange || isAnyRGB(dstFormat) ||
+         srcFormat == AV_PIX_FMT_GRAYF32 && dstFormat == AV_PIX_FMT_GRAY8 ||
+         srcFormat == AV_PIX_FMT_GRAY8 && dstFormat == AV_PIX_FMT_GRAYF32)) {
         ff_get_unscaled_swscale(c);
 
         if (c->swscale) {

diff --git a/libswscale/version.h b/libswscale/version.h
index 474e93b..a07bd71 100644
--- a/libswscale/version.h
+++ b/libswscale/version.h

@@ -26,8 +26,8 @@
 
 #include "libavutil/version.h"
 
-#define LIBSWSCALE_VERSION_MAJOR   4
-#define LIBSWSCALE_VERSION_MINOR   8
+#define LIBSWSCALE_VERSION_MAJOR   5
+#define LIBSWSCALE_VERSION_MINOR   2
 #define LIBSWSCALE_VERSION_MICRO 100
 
 #define LIBSWSCALE_VERSION_INT  AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \

diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile
index b50c7f2..f317d5d 100644
--- a/libswscale/x86/Makefile
+++ b/libswscale/x86/Makefile

@@ -11,3 +11,4 @@
 X86ASM-OBJS                     += x86/input.o                          \
                                    x86/output.o                         \
                                    x86/scale.o                          \
+                                   x86/rgb_2_rgb.o                      \

diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index ffd12e1..2d6fc2a 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c

@@ -144,11 +144,27 @@
 
 #endif /* HAVE_INLINE_ASM */
 
+void ff_shuffle_bytes_2103_mmxext(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_2103_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_0321_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_1230_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_3012_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_3210_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
+
+#if ARCH_X86_64
+void ff_uyvytoyuv422_sse2(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                          const uint8_t *src, int width, int height,
+                          int lumStride, int chromStride, int srcStride);
+void ff_uyvytoyuv422_avx(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                         const uint8_t *src, int width, int height,
+                         int lumStride, int chromStride, int srcStride);
+#endif
+
 av_cold void rgb2rgb_init_x86(void)
 {
-#if HAVE_INLINE_ASM
     int cpu_flags = av_get_cpu_flags();
 
+#if HAVE_INLINE_ASM
     if (INLINE_MMX(cpu_flags))
         rgb2rgb_init_mmx();
     if (INLINE_AMD3DNOW(cpu_flags))
@@ -160,4 +176,25 @@
     if (INLINE_AVX(cpu_flags))
         rgb2rgb_init_avx();
 #endif /* HAVE_INLINE_ASM */
+
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        shuffle_bytes_2103 = ff_shuffle_bytes_2103_mmxext;
+    }
+    if (EXTERNAL_SSE2(cpu_flags)) {
+#if ARCH_X86_64
+        uyvytoyuv422 = ff_uyvytoyuv422_sse2;
+#endif
+    }
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        shuffle_bytes_0321 = ff_shuffle_bytes_0321_ssse3;
+        shuffle_bytes_2103 = ff_shuffle_bytes_2103_ssse3;
+        shuffle_bytes_1230 = ff_shuffle_bytes_1230_ssse3;
+        shuffle_bytes_3012 = ff_shuffle_bytes_3012_ssse3;
+        shuffle_bytes_3210 = ff_shuffle_bytes_3210_ssse3;
+    }
+    if (EXTERNAL_AVX(cpu_flags)) {
+#if ARCH_X86_64
+        uyvytoyuv422 = ff_uyvytoyuv422_avx;
+#endif
+    }
 }

diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index d59bd56..ae2469e 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c

@@ -1034,68 +1034,6 @@
     }
 }
 
-static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size)
-{
-    x86_reg idx = 15 - src_size;
-    const uint8_t *s = src-idx;
-    uint8_t *d = dst-idx;
-    __asm__ volatile(
-        "test          %0, %0           \n\t"
-        "jns           2f               \n\t"
-        PREFETCH"       (%1, %0)        \n\t"
-        "movq          %3, %%mm7        \n\t"
-        "pxor          %4, %%mm7        \n\t"
-        "movq       %%mm7, %%mm6        \n\t"
-        "pxor          %5, %%mm7        \n\t"
-        ".p2align       4               \n\t"
-        "1:                             \n\t"
-        PREFETCH"     32(%1, %0)        \n\t"
-        "movq           (%1, %0), %%mm0 \n\t"
-        "movq          8(%1, %0), %%mm1 \n\t"
-# if COMPILE_TEMPLATE_MMXEXT
-        "pshufw      $177, %%mm0, %%mm3 \n\t"
-        "pshufw      $177, %%mm1, %%mm5 \n\t"
-        "pand       %%mm7, %%mm0        \n\t"
-        "pand       %%mm6, %%mm3        \n\t"
-        "pand       %%mm7, %%mm1        \n\t"
-        "pand       %%mm6, %%mm5        \n\t"
-        "por        %%mm3, %%mm0        \n\t"
-        "por        %%mm5, %%mm1        \n\t"
-# else
-        "movq       %%mm0, %%mm2        \n\t"
-        "movq       %%mm1, %%mm4        \n\t"
-        "pand       %%mm7, %%mm0        \n\t"
-        "pand       %%mm6, %%mm2        \n\t"
-        "pand       %%mm7, %%mm1        \n\t"
-        "pand       %%mm6, %%mm4        \n\t"
-        "movq       %%mm2, %%mm3        \n\t"
-        "movq       %%mm4, %%mm5        \n\t"
-        "pslld        $16, %%mm2        \n\t"
-        "psrld        $16, %%mm3        \n\t"
-        "pslld        $16, %%mm4        \n\t"
-        "psrld        $16, %%mm5        \n\t"
-        "por        %%mm2, %%mm0        \n\t"
-        "por        %%mm4, %%mm1        \n\t"
-        "por        %%mm3, %%mm0        \n\t"
-        "por        %%mm5, %%mm1        \n\t"
-# endif
-        MOVNTQ"     %%mm0,  (%2, %0)    \n\t"
-        MOVNTQ"     %%mm1, 8(%2, %0)    \n\t"
-        "add          $16, %0           \n\t"
-        "js            1b               \n\t"
-        SFENCE"                         \n\t"
-        EMMS"                           \n\t"
-        "2:                             \n\t"
-        : "+&r"(idx)
-        : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
-        : "memory");
-    for (; idx<15; idx+=4) {
-        register unsigned v  = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
-        v &= 0xff00ff;
-        *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
-    }
-}
-
 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     unsigned i;
@@ -2572,7 +2510,6 @@
     rgb24to15          = RENAME(rgb24to15);
     rgb24to16          = RENAME(rgb24to16);
     rgb24tobgr24       = RENAME(rgb24tobgr24);
-    shuffle_bytes_2103 = RENAME(shuffle_bytes_2103);
     rgb32tobgr16       = RENAME(rgb32tobgr16);
     rgb32tobgr15       = RENAME(rgb32tobgr15);
     yv12toyuy2         = RENAME(yv12toyuy2);

diff --git a/libswscale/x86/rgb_2_rgb.asm b/libswscale/x86/rgb_2_rgb.asm
new file mode 100644
index 0000000..5fb5d2e
--- /dev/null
+++ b/libswscale/x86/rgb_2_rgb.asm

@@ -0,0 +1,299 @@
+;******************************************************************************
+;* Copyright Nick Kurshev
+;* Copyright Michael (michaelni@gmx.at)
+;* Copyright 2018 Jokyo Images
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pb_mask_shuffle2103_mmx times 8 dw 255
+pb_shuffle2103: db 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15
+pb_shuffle0321: db 0, 3, 2, 1, 4, 7, 6, 5, 8, 11, 10, 9, 12, 15, 14, 13
+pb_shuffle1230: db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
+pb_shuffle3012: db 3, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14
+pb_shuffle3210: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+
+SECTION .text
+
+%macro RSHIFT_COPY 3
+; %1 dst ; %2 src ; %3 shift
+%if cpuflag(avx)
+    psrldq  %1, %2, %3
+%else
+    mova           %1, %2
+    RSHIFT         %1, %3
+%endif
+%endmacro
+
+;------------------------------------------------------------------------------
+; shuffle_bytes_2103_mmext (const uint8_t *src, uint8_t *dst, int src_size)
+;------------------------------------------------------------------------------
+INIT_MMX mmxext
+cglobal shuffle_bytes_2103, 3, 5, 8, src, dst, w, tmp, x
+    mova   m6, [pb_mask_shuffle2103_mmx]
+    mova   m7, m6
+    psllq  m7, 8
+
+    movsxdifnidn wq, wd
+    mov xq, wq
+
+    add        srcq, wq
+    add        dstq, wq
+    neg          wq
+
+;calc scalar loop
+    and xq, mmsize*2 -4
+    je .loop_simd
+
+.loop_scalar:
+   mov          tmpb, [srcq + wq + 2]
+   mov [dstq+wq + 0], tmpb
+   mov          tmpb, [srcq + wq + 1]
+   mov [dstq+wq + 1], tmpb
+   mov          tmpb, [srcq + wq + 0]
+   mov [dstq+wq + 2], tmpb
+   mov          tmpb, [srcq + wq + 3]
+   mov [dstq+wq + 3], tmpb
+   add            wq, 4
+   sub            xq, 4
+   jg .loop_scalar
+
+;check if src_size < mmsize * 2
+cmp wq, 0
+jge .end
+
+.loop_simd:
+    movu     m0, [srcq+wq]
+    movu     m1, [srcq+wq+8]
+
+    pshufw   m3, m0, 177
+    pshufw   m5, m1, 177
+
+    pand     m0, m7
+    pand     m3, m6
+
+    pand     m1, m7
+    pand     m5, m6
+
+    por      m0, m3
+    por      m1, m5
+
+    movu      [dstq+wq], m0
+    movu  [dstq+wq + 8], m1
+
+    add              wq, mmsize*2
+    jl .loop_simd
+
+.end:
+    RET
+
+;------------------------------------------------------------------------------
+; shuffle_bytes_## (const uint8_t *src, uint8_t *dst, int src_size)
+;------------------------------------------------------------------------------
+; %1-4 index shuffle
+%macro SHUFFLE_BYTES 4
+cglobal shuffle_bytes_%1%2%3%4, 3, 5, 2, src, dst, w, tmp, x
+    VBROADCASTI128    m0, [pb_shuffle%1%2%3%4]
+    movsxdifnidn wq, wd
+    mov xq, wq
+
+    add        srcq, wq
+    add        dstq, wq
+    neg          wq
+
+;calc scalar loop
+    and xq, mmsize-4
+    je .loop_simd
+
+.loop_scalar:
+   mov          tmpb, [srcq + wq + %1]
+   mov [dstq+wq + 0], tmpb
+   mov          tmpb, [srcq + wq + %2]
+   mov [dstq+wq + 1], tmpb
+   mov          tmpb, [srcq + wq + %3]
+   mov [dstq+wq + 2], tmpb
+   mov          tmpb, [srcq + wq + %4]
+   mov [dstq+wq + 3], tmpb
+   add            wq, 4
+   sub            xq, 4
+   jg .loop_scalar
+
+;check if src_size < mmsize
+cmp wq, 0
+jge .end
+
+.loop_simd:
+    movu           m1, [srcq+wq]
+    pshufb         m1, m0
+    movu    [dstq+wq], m1
+    add            wq, mmsize
+    jl .loop_simd
+
+.end:
+    RET
+%endmacro
+
+INIT_XMM ssse3
+SHUFFLE_BYTES 2, 1, 0, 3
+SHUFFLE_BYTES 0, 3, 2, 1
+SHUFFLE_BYTES 1, 2, 3, 0
+SHUFFLE_BYTES 3, 0, 1, 2
+SHUFFLE_BYTES 3, 2, 1, 0
+
+;-----------------------------------------------------------------------------------------------
+; uyvytoyuv422(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+;              const uint8_t *src, int width, int height,
+;              int lumStride, int chromStride, int srcStride)
+;-----------------------------------------------------------------------------------------------
+%macro UYVY_TO_YUV422 0
+cglobal uyvytoyuv422, 9, 14, 8, ydst, udst, vdst, src, w, h, lum_stride, chrom_stride, src_stride, wtwo, whalf, tmp, x, back_w
+    pxor         m0, m0
+    pcmpeqw      m1, m1
+    psrlw        m1, 8
+
+    movsxdifnidn            wq, wd
+    movsxdifnidn   lum_strideq, lum_strided
+    movsxdifnidn chrom_strideq, chrom_strided
+    movsxdifnidn   src_strideq, src_strided
+
+    mov     back_wq, wq
+    mov      whalfq, wq
+    shr      whalfq, 1     ; whalf = width / 2
+
+    lea srcq, [srcq + wq * 2]
+    add    ydstq, wq
+    add    udstq, whalfq
+    add    vdstq, whalfq
+
+.loop_line:
+    mov          xq, wq
+    mov       wtwoq, wq
+    add       wtwoq, wtwoq ; wtwo = width * 2
+
+    neg       wq
+    neg    wtwoq
+    neg   whalfq
+
+    ;calc scalar loop count
+    and       xq, mmsize * 2 - 1
+    je .loop_simd
+
+    .loop_scalar:
+        mov             tmpb, [srcq + wtwoq + 0]
+        mov [udstq + whalfq], tmpb
+
+        mov             tmpb, [srcq + wtwoq + 1]
+        mov     [ydstq + wq], tmpb
+
+        mov             tmpb, [srcq + wtwoq + 2]
+        mov [vdstq + whalfq], tmpb
+
+        mov             tmpb, [srcq + wtwoq + 3]
+        mov [ydstq + wq + 1], tmpb
+
+        add      wq, 2
+        add   wtwoq, 4
+        add  whalfq, 1
+        sub      xq, 2
+        jg .loop_scalar
+
+    ; check if simd loop is need
+    cmp      wq, 0
+    jge .end_line
+
+    .loop_simd:
+        movu    m2, [srcq + wtwoq             ]
+        movu    m3, [srcq + wtwoq + mmsize    ]
+        movu    m4, [srcq + wtwoq + mmsize * 2]
+        movu    m5, [srcq + wtwoq + mmsize * 3]
+
+        ; extract y part 1
+        RSHIFT_COPY    m6, m2, 1 ; UYVY UYVY -> YVYU YVY...
+        pand           m6, m1; YxYx YxYx...
+
+        RSHIFT_COPY    m7, m3, 1 ; UYVY UYVY -> YVYU YVY...
+        pand           m7, m1 ; YxYx YxYx...
+
+        packuswb       m6, m7 ; YYYY YYYY...
+        movu [ydstq + wq], m6
+
+        ; extract y part 2
+        RSHIFT_COPY    m6, m4, 1 ; UYVY UYVY -> YVYU YVY...
+        pand           m6, m1; YxYx YxYx...
+
+        RSHIFT_COPY    m7, m5, 1 ; UYVY UYVY -> YVYU YVY...
+        pand           m7, m1 ; YxYx YxYx...
+
+        packuswb                m6, m7 ; YYYY YYYY...
+        movu [ydstq + wq + mmsize], m6
+
+        ; extract uv
+        pand       m2, m1   ; UxVx...
+        pand       m3, m1   ; UxVx...
+        pand       m4, m1   ; UxVx...
+        pand       m5, m1   ; UxVx...
+
+        packuswb   m2, m3   ; UVUV...
+        packuswb   m4, m5   ; UVUV...
+
+        ; U
+        pand       m6, m2, m1 ; UxUx...
+        pand       m7, m4, m1 ; UxUx...
+
+        packuswb m6, m7 ; UUUU
+        movu   [udstq + whalfq], m6
+
+
+        ; V
+        psrlw      m2, 8  ; VxVx...
+        psrlw      m4, 8  ; VxVx...
+        packuswb   m2, m4 ; VVVV
+        movu   [vdstq + whalfq], m2
+
+        add   whalfq, mmsize
+        add    wtwoq, mmsize * 4
+        add       wq, mmsize * 2
+        jl .loop_simd
+
+    .end_line:
+        add        srcq, src_strideq
+        add        ydstq, lum_strideq
+        add        udstq, chrom_strideq
+        add        vdstq, chrom_strideq
+
+        ;restore initial state of line variable
+        mov           wq, back_wq
+        mov          xq, wq
+        mov      whalfq, wq
+        shr      whalfq, 1     ; whalf = width / 2
+        sub          hd, 1
+        jg .loop_line
+
+    RET
+%endmacro
+
+%if ARCH_X86_64
+INIT_XMM sse2
+UYVY_TO_YUV422
+
+INIT_XMM avx
+UYVY_TO_YUV422
+%endif

diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm
index f978170..83cabff 100644
--- a/libswscale/x86/scale.asm
+++ b/libswscale/x86/scale.asm

@@ -364,15 +364,7 @@
     movd [dstq+wq*2], m0
 %endif ; %3 ==/!= X
 %else ; %2 == 19
-%if mmsize == 8
-    PMINSD_MMX    m0, m2, m4
-%elif cpuflag(sse4)
-    pminsd        m0, m2
-%else ; sse2/ssse3
-    cvtdq2ps      m0, m0
-    minps         m0, m2
-    cvtps2dq      m0, m0
-%endif ; mmx/sse2/ssse3/sse4
+    PMINSD        m0, m2, m4
 %ifnidn %3, X
     mova [dstq+wq*(4>>wshr)], m0
 %else ; %3 == X

diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 869e7fb..7dc2d70 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c

@@ -53,13 +53,13 @@
 DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
 DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
 
-DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
-DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
-DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
+DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
+DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
+DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
 
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
+DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
+DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
+DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
 
 
 //MMX versions

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index b8bdcd4..7c30470 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c

@@ -1500,7 +1500,8 @@
 
     c->use_mmx_vfilter= 0;
     if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && dstFormat != AV_PIX_FMT_NV12
-        && dstFormat != AV_PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) {
+        && dstFormat != AV_PIX_FMT_NV21 && dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE
+        && !(c->flags & SWS_BITEXACT)) {
             if (c->flags & SWS_ACCURATE_RND) {
                 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
                     switch (c->dstFormat) {

diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 1fe5abe..737cbb0 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c

@@ -267,7 +267,11 @@
     PUTRGB(dst_2, py_2, 0);
 ENDYUV2RGBFUNC()
 
+#if HAVE_BIGENDIAN
+YUV2RGBFUNC(yuva2argb_c, uint32_t, 1)
+#else
 YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1)
+#endif
     LOADCHROMA(0);
     PUTRGBA(dst_1, py_1, pa_1, 0, 24);
     PUTRGBA(dst_2, py_2, pa_2, 0, 24);
@@ -301,7 +305,11 @@
     PUTRGBA(dst_2, py_2, pa_2, 0, 24);
 ENDYUV2RGBFUNC()
 
+#if HAVE_BIGENDIAN
+YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1)
+#else
 YUV2RGBFUNC(yuva2argb_c, uint32_t, 1)
+#endif
     LOADCHROMA(0);
     PUTRGBA(dst_1, py_1, pa_1, 0, 0);
     PUTRGBA(dst_2, py_2, pa_2, 0, 0);

diff --git a/tests/Makefile b/tests/Makefile
index 278be24..24680b8 100644
--- a/tests/Makefile
+++ b/tests/Makefile

@@ -1,5 +1,3 @@
-FFSERVER_REFFILE = $(SRC_PATH)/tests/ffserver.regression.ref
-
 THREADS = 1
 VREF = tests/vsynth1/00.pgm
 AREF = tests/data/asynth1.sw
@@ -11,14 +9,6 @@
 
 $(AREF): CMP=
 
-ffservertest: export PROGSUF = $(PROGSSUF)
-ffservertest: ffserver$(PROGSSUF)$(EXESUF) ffmpeg$(PROGSSUF)$(EXESUF) tests/vsynth1/00.pgm tests/data/asynth1.sw
-	@echo
-	@echo "Unfortunately ffserver is broken and therefore its regression"
-	@echo "test fails randomly. Treat the results accordingly."
-	@echo
-	$(SRC_PATH)/tests/ffserver-regression.sh $(FFSERVER_REFFILE) $(SRC_PATH)/tests/ffserver.conf "$(TARGET_SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)"
-
 APITESTSDIR := tests/api
 OBJDIRS += tests/data tests/vsynth1 tests/data/filtergraphs $(APITESTSDIR)/
 
@@ -113,7 +103,9 @@
 include $(SRC_PATH)/tests/fate/atrac.mak
 include $(SRC_PATH)/tests/fate/audio.mak
 include $(SRC_PATH)/tests/fate/bmp.mak
+include $(SRC_PATH)/tests/fate/build.mak
 include $(SRC_PATH)/tests/fate/canopus.mak
+include $(SRC_PATH)/tests/fate/cbs.mak
 include $(SRC_PATH)/tests/fate/cdxl.mak
 include $(SRC_PATH)/tests/fate/checkasm.mak
 include $(SRC_PATH)/tests/fate/concatdec.mak
@@ -137,7 +129,10 @@
 include $(SRC_PATH)/tests/fate/gapless.mak
 include $(SRC_PATH)/tests/fate/gif.mak
 include $(SRC_PATH)/tests/fate/h264.mak
+include $(SRC_PATH)/tests/fate/hap.mak
 include $(SRC_PATH)/tests/fate/hevc.mak
+include $(SRC_PATH)/tests/fate/hw.mak
+include $(SRC_PATH)/tests/fate/id3v2.mak
 include $(SRC_PATH)/tests/fate/image.mak
 include $(SRC_PATH)/tests/fate/indeo.mak
 include $(SRC_PATH)/tests/fate/libavcodec.mak
@@ -156,6 +151,7 @@
 include $(SRC_PATH)/tests/fate/mp3.mak
 include $(SRC_PATH)/tests/fate/mpc.mak
 include $(SRC_PATH)/tests/fate/mpeg4.mak
+include $(SRC_PATH)/tests/fate/mpegps.mak
 include $(SRC_PATH)/tests/fate/mpegts.mak
 include $(SRC_PATH)/tests/fate/mxf.mak
 include $(SRC_PATH)/tests/fate/opus.mak
@@ -186,7 +182,7 @@
 
 FATE_SAMPLES_AVCONV           += $(FATE_SAMPLES_AVCONV-yes)
 FATE_SAMPLES_FFMPEG           += $(FATE_SAMPLES_FFMPEG-yes)
-FATE_EXTERN-$(CONFIG_FFMPEG)  += $(FATE_SAMPLES_AVCONV) $(FATE_SAMPLES_FFMPEG) $(FATE_SAMPLES_FFPROBE)
+FATE_EXTERN-$(CONFIG_FFMPEG)  += $(FATE_SAMPLES_AVCONV) $(FATE_SAMPLES_FFMPEG) $(FATE_SAMPLES_FFPROBE) $(FATE_SAMPLES_FASTSTART)
 FATE_EXTERN += $(FATE_EXTERN-yes)
 
 FATE += $(FATE-yes)
@@ -198,6 +194,8 @@
 
 $(FATE_FFPROBE) $(FATE_SAMPLES_FFPROBE): ffprobe$(PROGSSUF)$(EXESUF)
 
+$(FATE_SAMPLES_FASTSTART): tools/qt-faststart$(EXESUF)
+
 ifdef SAMPLES
 FATE += $(FATE_FULL) $(FATE_FULL-yes)
 FATE += $(FATE_EXTERN)
@@ -220,10 +218,14 @@
 
 fate:: $(FATE)
 
+# Tests requiring hardware support are not included in a default fate run.
+fate-hw: $(FATE_HW-yes)
+FATE += $(FATE_HW-yes)
+
 $(FATE) $(FATE_TESTS-no): export PROGSUF = $(PROGSSUF)
 $(FATE) $(FATE_TESTS-no): $(FATE_UTILS:%=tests/%$(HOSTEXESUF))
 	@echo "TEST    $(@:fate-%=%)"
-	$(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(TARGET_SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)' '$(CPUFLAGS)' '$(CMP_SHIFT)' '$(CMP_TARGET)' '$(SIZE_TOLERANCE)' '$(CMP_UNIT)' '$(GEN)' '$(HWACCEL)' '$(REPORT)'
+	$(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(TARGET_SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)' '$(CPUFLAGS)' '$(CMP_SHIFT)' '$(CMP_TARGET)' '$(SIZE_TOLERANCE)' '$(CMP_UNIT)' '$(GEN)' '$(HWACCEL)' '$(REPORT)' '$(KEEP)'
 
 fate-list:
 	@printf '%s\n' $(sort $(FATE))

diff --git a/tests/api/api-band-test.c b/tests/api/api-band-test.c
index 5ccba4f..a84f6b7 100644
--- a/tests/api/api-band-test.c
+++ b/tests/api/api-band-test.c

@@ -214,8 +214,6 @@
         return 1;
     }
 
-    av_register_all();
-
     if (video_decode(argv[1]) != 0)
         return 1;
 

diff --git a/tests/api/api-codec-param-test.c b/tests/api/api-codec-param-test.c
index 377a5e9..0868322 100644
--- a/tests/api/api-codec-param-test.c
+++ b/tests/api/api-codec-param-test.c

@@ -231,8 +231,6 @@
     AVFormatContext *fmt_ctx = NULL;
     AVFormatContext *fmt_ctx_no_decode = NULL;
 
-    av_register_all();
-
     if (argc < 2) {
         av_log(NULL, AV_LOG_ERROR, "Usage: %s <input>\n", argv[0]);
         return -1;

diff --git a/tests/api/api-flac-test.c b/tests/api/api-flac-test.c
index c5a37f0..2e90812 100644
--- a/tests/api/api-flac-test.c
+++ b/tests/api/api-flac-test.c

@@ -245,8 +245,6 @@
     int sample_rates[] = {8000, 44100, 48000, 192000};
     int cl, sr;
 
-    avcodec_register_all();
-
     enc = avcodec_find_encoder(AV_CODEC_ID_FLAC);
     if (!enc) {
         av_log(NULL, AV_LOG_ERROR, "Can't find encoder\n");

diff --git a/tests/api/api-h264-test.c b/tests/api/api-h264-test.c
index 52282e0..66669fa 100644
--- a/tests/api/api-h264-test.c
+++ b/tests/api/api-h264-test.c

@@ -158,8 +158,6 @@
         return 1;
     }
 
-    av_register_all();
-
     if (video_decode_example(argv[1]) != 0)
         return 1;
 

diff --git a/tests/api/api-seek-test.c b/tests/api/api-seek-test.c
index 2b32cb9..d0531a2 100644
--- a/tests/api/api-seek-test.c
+++ b/tests/api/api-seek-test.c

@@ -279,8 +279,6 @@
         return 1;
     }
 
-    av_register_all();
-
     if (seek_test(argv[1], argv[2], argv[3]) != 0)
         return 1;
 

diff --git a/tests/api/api-threadmessage-test.c b/tests/api/api-threadmessage-test.c
index 05a8062..3c693a7 100644
--- a/tests/api/api-threadmessage-test.c
+++ b/tests/api/api-threadmessage-test.c

@@ -130,7 +130,9 @@
 
     for (i = 0; i < rd->workload; i++) {
         if (rand() % rd->workload < rd->workload / 10) {
-            av_log(NULL, AV_LOG_INFO, "receiver #%d: flushing the queue\n", rd->id);
+            av_log(NULL, AV_LOG_INFO, "receiver #%d: flushing the queue, "
+                   "discarding %d message(s)\n", rd->id,
+                   av_thread_message_queue_nb_elems(rd->queue));
             av_thread_message_flush(rd->queue);
         } else {
             struct message msg;

diff --git a/tests/audiogen.c b/tests/audiogen.c
index 8d596b5..c43bb70 100644
--- a/tests/audiogen.c
+++ b/tests/audiogen.c

@@ -26,7 +26,7 @@
 #include <stdio.h>
 #include <string.h>
 
-#define MAX_CHANNELS 8
+#define MAX_CHANNELS 12
 
 static unsigned int myrnd(unsigned int *seed_ptr, int n)
 {

diff --git a/tests/audiomatch.c b/tests/audiomatch.c
index 9671789..d44c407 100644
--- a/tests/audiomatch.c
+++ b/tests/audiomatch.c

@@ -25,23 +25,23 @@
 #define FFMIN(a,b) ((a) > (b) ? (b) : (a))
 #define FFMAX(a,b) ((a) > (b) ? (a) : (b))
 
-static int64_t fsize(FILE *f){
-    int64_t end, pos= ftell(f);
+static int64_t fsize(FILE *f) {
+    int64_t end, pos = ftell(f);
     fseek(f, 0, SEEK_END);
     end = ftell(f);
     fseek(f, pos, SEEK_SET);
     return end;
 }
 
-int main(int argc, char **argv){
+int main(int argc, char **argv) {
     FILE *f[2];
     int i, pos;
     int siglen, datlen;
     int bestpos = 0;
-    double bestc=0;
-    double sigamp= 0;
+    double bestc = 0;
+    double sigamp = 0;
     int16_t *signal, *data;
-    int maxshift= 16384;
+    int maxshift = 16384;
 
     if (argc < 3) {
         printf("audiomatch <testfile> <reffile>\n");
@@ -80,29 +80,31 @@
     data   = malloc(datlen * sizeof(*data));
     signal = malloc(siglen * sizeof(*signal));
 
-    fread(data  , 1, datlen, f[0]);
-    fread(signal, 1, siglen, f[1]);
+    if (fread(data  , 1, datlen, f[0]) != datlen)
+        return 1;
+    if (fread(signal, 1, siglen, f[1]) != siglen)
+        return 1;
     datlen /= 2;
     siglen /= 2;
 
-    for(i=0; i<siglen; i++){
+    for (i = 0; i < siglen; i++) {
         signal[i] = ((uint8_t*)(signal + i))[0] + 256*((uint8_t*)(signal + i))[1];
         sigamp += signal[i] * signal[i];
     }
-    for(i=0; i<datlen; i++)
+    for (i = 0; i < datlen; i++)
         data[i] = ((uint8_t*)(data + i))[0] + 256*((uint8_t*)(data + i))[1];
 
-    for(pos = 0; pos<maxshift; pos = pos < 0 ? -pos: -pos-1){
-        int64_t c= 0;
+    for (pos = 0; pos < maxshift; pos = pos < 0 ? -pos: -pos-1) {
+        int64_t c = 0;
         int testlen = FFMIN(siglen, datlen-pos);
-        for(i=FFMAX(0, -pos); i<testlen; i++){
-            int j= pos+i;
+        for (i = FFMAX(0, -pos); i < testlen; i++) {
+            int j = pos + i;
             c += signal[i] * data[j];
         }
-        if(fabs(c) > sigamp * 0.94)
+        if (fabs(c) > sigamp * 0.94)
             maxshift = FFMIN(maxshift, fabs(pos)+32);
-        if(fabs(c)>fabs(bestc)){
-            bestc= c;
+        if (fabs(c) > fabs(bestc)) {
+            bestc = c;
             bestpos = pos;
         }
     }

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 14916e5..9484acb 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile

@@ -10,6 +10,7 @@
 AVCODECOBJS-$(CONFIG_H264PRED)          += h264pred.o
 AVCODECOBJS-$(CONFIG_H264QPEL)          += h264qpel.o
 AVCODECOBJS-$(CONFIG_LLVIDDSP)          += llviddsp.o
+AVCODECOBJS-$(CONFIG_LLVIDENCDSP)       += llviddspenc.o
 AVCODECOBJS-$(CONFIG_VP8DSP)            += vp8dsp.o
 AVCODECOBJS-$(CONFIG_VIDEODSP)          += videodsp.o
 
@@ -19,9 +20,11 @@
 AVCODECOBJS-$(CONFIG_ALAC_DECODER)      += alacdsp.o
 AVCODECOBJS-$(CONFIG_DCA_DECODER)       += synth_filter.o
 AVCODECOBJS-$(CONFIG_EXR_DECODER)       += exrdsp.o
+AVCODECOBJS-$(CONFIG_HUFFYUV_DECODER)   += huffyuvdsp.o
 AVCODECOBJS-$(CONFIG_JPEG2000_DECODER)  += jpeg2000dsp.o
 AVCODECOBJS-$(CONFIG_PIXBLOCKDSP)       += pixblockdsp.o
-AVCODECOBJS-$(CONFIG_HEVC_DECODER)      += hevc_add_res.o hevc_idct.o
+AVCODECOBJS-$(CONFIG_HEVC_DECODER)      += hevc_add_res.o hevc_idct.o hevc_sao.o
+AVCODECOBJS-$(CONFIG_UTVIDEO_DECODER)   += utvideodsp.o
 AVCODECOBJS-$(CONFIG_V210_ENCODER)      += v210enc.o
 AVCODECOBJS-$(CONFIG_VP9_DECODER)       += vp9dsp.o
 
@@ -30,9 +33,18 @@
 # libavfilter tests
 AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
 AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
+AVFILTEROBJS-$(CONFIG_HFLIP_FILTER)      += vf_hflip.o
+AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER)  += vf_threshold.o
+AVFILTEROBJS-$(CONFIG_NLMEANS_FILTER)    += vf_nlmeans.o
 
 CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes)
 
+# swscale tests
+SWSCALEOBJS                             += sw_rgb.o
+
+CHECKASMOBJS-$(CONFIG_SWSCALE)  += $(SWSCALEOBJS)
+
+# libavutil tests
 AVUTILOBJS                              += fixed_dsp.o
 AVUTILOBJS                              += float_dsp.o
 
@@ -56,7 +68,7 @@
 CHECKASM := tests/checkasm/checkasm$(EXESUF)
 
 $(CHECKASM): $(CHECKASMOBJS) $(FF_STATIC_DEP_LIBS)
-	$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(CHECKASMOBJS) $(FF_STATIC_DEP_LIBS) $(EXTRALIBS)
+	$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(CHECKASMOBJS) $(FF_STATIC_DEP_LIBS) $(EXTRALIBS-avcodec) $(EXTRALIBS-avfilter) $(EXTRALIBS-avformat) $(EXTRALIBS-avutil) $(EXTRALIBS-swresample) $(EXTRALIBS)
 
 checkasm: $(CHECKASM)
 

diff --git a/tests/checkasm/hevc_sao.c b/tests/checkasm/hevc_sao.c
new file mode 100644
index 0000000..8d0cf80
--- /dev/null
+++ b/tests/checkasm/hevc_sao.c

@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2018 Yingming Fan <yingmingfan@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/intreadwrite.h"
+
+#include "libavcodec/avcodec.h"
+
+#include "libavcodec/hevcdsp.h"
+
+#include "checkasm.h"
+
+static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
+static const uint32_t sao_size[5] = {8, 16, 32, 48, 64};
+
+#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
+#define PIXEL_STRIDE (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) //same with sao_edge src_stride
+#define BUF_SIZE (PIXEL_STRIDE * (64+2) * 2) //+2 for top and bottom row, *2 for high bit depth
+#define OFFSET_THRESH (1 << (bit_depth - 5))
+#define OFFSET_LENGTH 5
+
+#define randomize_buffers(buf0, buf1, size)                 \
+    do {                                                    \
+        uint32_t mask = pixel_mask[(bit_depth - 8) >> 1];   \
+        int k;                                              \
+        for (k = 0; k < size; k += 4) {                     \
+            uint32_t r = rnd() & mask;                      \
+            AV_WN32A(buf0 + k, r);                          \
+            AV_WN32A(buf1 + k, r);                          \
+        }                                                   \
+    } while (0)
+
+#define randomize_buffers2(buf, size)                       \
+    do {                                                    \
+        uint32_t max_offset = OFFSET_THRESH;                \
+        int k;                                              \
+        if (bit_depth == 8) {                               \
+            for (k = 0; k < size; k++) {                    \
+                uint8_t r = rnd() % max_offset;             \
+                buf[k] = r;                                 \
+            }                                               \
+        } else {                                            \
+            for (k = 0; k < size; k++) {                    \
+                uint16_t r = rnd() % max_offset;            \
+                buf[k] = r;                                 \
+            }                                               \
+        }                                                   \
+    } while (0)
+
+static void check_sao_band(HEVCDSPContext h, int bit_depth)
+{
+    int i;
+    LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, src0, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, src1, [BUF_SIZE]);
+    int16_t offset_val[OFFSET_LENGTH];
+    int left_class = rnd()%32;
+
+    for (i = 0; i <= 4; i++) {
+        int block_size = sao_size[i];
+        ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL;
+        declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride,
+                          int16_t *sao_offset_val, int sao_left_class, int width, int height);
+
+        randomize_buffers(src0, src1, BUF_SIZE);
+        randomize_buffers2(offset_val, OFFSET_LENGTH);
+        memset(dst0, 0, BUF_SIZE);
+        memset(dst1, 0, BUF_SIZE);
+
+        if (check_func(h.sao_band_filter[i], "hevc_sao_band_%dx%d_%d", block_size, block_size, bit_depth)) {
+            call_ref(dst0, src0, stride, stride, offset_val, left_class, block_size, block_size);
+            call_new(dst1, src1, stride, stride, offset_val, left_class, block_size, block_size);
+            if (memcmp(dst0, dst1, BUF_SIZE))
+                fail();
+            bench_new(dst1, src1, stride, stride, offset_val, left_class, block_size, block_size);
+        }
+    }
+}
+
+static void check_sao_edge(HEVCDSPContext h, int bit_depth)
+{
+    int i;
+    LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, src0, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, src1, [BUF_SIZE]);
+    int16_t offset_val[OFFSET_LENGTH];
+    int eo = rnd()%4;
+
+    for (i = 0; i <= 4; i++) {
+        int block_size = sao_size[i];
+        ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL;
+        int offset = (AV_INPUT_BUFFER_PADDING_SIZE + PIXEL_STRIDE)*SIZEOF_PIXEL;
+        declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst,
+                          int16_t *sao_offset_val, int eo, int width, int height);
+
+        randomize_buffers(src0, src1, BUF_SIZE);
+        randomize_buffers2(offset_val, OFFSET_LENGTH);
+        memset(dst0, 0, BUF_SIZE);
+        memset(dst1, 0, BUF_SIZE);
+
+        if (check_func(h.sao_edge_filter[i], "hevc_sao_edge_%dx%d_%d", block_size, block_size, bit_depth)) {
+            call_ref(dst0, src0 + offset, stride, offset_val, eo, block_size, block_size);
+            call_new(dst1, src1 + offset, stride, offset_val, eo, block_size, block_size);
+            if (memcmp(dst0, dst1, BUF_SIZE))
+                fail();
+            bench_new(dst1, src1 + offset, stride, offset_val, eo, block_size, block_size);
+        }
+    }
+}
+
+void checkasm_check_hevc_sao(void)
+{
+    int bit_depth;
+
+    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+        HEVCDSPContext h;
+
+        ff_hevc_dsp_init(&h, bit_depth);
+        check_sao_band(h, bit_depth);
+    }
+    report("sao_band");
+
+    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+        HEVCDSPContext h;
+
+        ff_hevc_dsp_init(&h, bit_depth);
+        check_sao_edge(h, bit_depth);
+    }
+    report("sao_edge");
+}

diff --git a/tests/checkasm/huffyuvdsp.c b/tests/checkasm/huffyuvdsp.c
new file mode 100644
index 0000000..8392022
--- /dev/null
+++ b/tests/checkasm/huffyuvdsp.c

@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016 Alexandra Hájková
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "libavcodec/huffyuvdsp.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)     \
+    do {                                 \
+        int j;                           \
+        for (j = 0; j < size; j++)       \
+            buf[j] = rnd() & 0xFFFF;       \
+    } while (0)
+
+static void check_add_int16(HuffYUVDSPContext c, unsigned mask, int width, const char * name)
+{
+    uint16_t *src0 = av_mallocz(width * sizeof(uint16_t));
+    uint16_t *src1 = av_mallocz(width * sizeof(uint16_t));
+    uint16_t *dst0 = av_mallocz(width * sizeof(uint16_t));
+    uint16_t *dst1 = av_mallocz(width * sizeof(uint16_t));
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint16_t *dst, uint16_t *src, unsigned mask, int w);
+
+    if (!src0 || !src1 || !dst0 || !dst1)
+        fail();
+
+    randomize_buffers(src0, width);
+    memcpy(src1, src0, width * sizeof(uint16_t));
+
+    if (check_func(c.add_int16, "%s", name)) {
+        call_ref(dst0, src0, mask, width);
+        call_new(dst1, src1, mask, width);
+        if (memcmp(dst0, dst1, width * sizeof(uint16_t)))
+            fail();
+        bench_new(dst1, src1, mask, width);
+    }
+
+    av_free(src0);
+    av_free(src1);
+    av_free(dst0);
+    av_free(dst1);
+}
+
+void checkasm_check_huffyuvdsp(void)
+{
+    HuffYUVDSPContext c;
+    int width = 16 * av_clip(rnd(), 16, 128);
+
+    ff_huffyuvdsp_init(&c, AV_PIX_FMT_YUV422P);
+
+    /*! test width not multiple of mmsize */
+    check_add_int16(c, 65535, width, "add_int16_rnd_width");
+    report("add_int16_rnd_width");
+
+    /*! test always with the same size (for perf test) */
+    check_add_int16(c, 65535, 16*128, "add_int16_128");
+    report("add_int16_128");
+}

diff --git a/tests/checkasm/llviddspenc.c b/tests/checkasm/llviddspenc.c
new file mode 100644
index 0000000..31eafd5
--- /dev/null
+++ b/tests/checkasm/llviddspenc.c

@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2016 Alexandra Hájková
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "libavcodec/lossless_videoencdsp.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)      \
+    do {                                  \
+        int j;                            \
+        for (j = 0; j < size; j+=4)       \
+            AV_WN32(buf + j, rnd());      \
+    } while (0)
+
+static const struct {uint8_t w, h, s;} planes[] = {
+    {16,16,16}, {21,23,25}, {32,17,48}, {15,128,16}, {128,127,128}
+};
+
+#define MAX_STRIDE 128
+#define MAX_HEIGHT 127
+
+static void check_diff_bytes(LLVidEncDSPContext *c)
+{
+    int i;
+    LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, src2, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, src3, [MAX_STRIDE]);
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const uint8_t *src1,
+                      const uint8_t *src2, intptr_t w);
+
+    memset(dst0, 0, MAX_STRIDE);
+    memset(dst1, 0, MAX_STRIDE);
+    randomize_buffers(src0, MAX_STRIDE);
+    memcpy(src1, src0, MAX_STRIDE);
+    randomize_buffers(src2, MAX_STRIDE);
+    memcpy(src3, src2, MAX_STRIDE);
+
+    if (check_func(c->diff_bytes, "diff_bytes")) {
+        for (i = 0; i < 5; i ++) {
+            call_ref(dst0, src0, src2, planes[i].w);
+            call_new(dst1, src1, src3, planes[i].w);
+            if (memcmp(dst0, dst1, planes[i].w))
+                fail();
+        }
+        bench_new(dst1, src0, src2, planes[4].w);
+    }
+}
+
+static void check_sub_left_pred(LLVidEncDSPContext *c)
+{
+    int i;
+    LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE * MAX_HEIGHT]);
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const uint8_t *src,
+                      ptrdiff_t stride, ptrdiff_t width, int height);
+
+    memset(dst0, 0, MAX_STRIDE * MAX_HEIGHT);
+    memset(dst1, 0, MAX_STRIDE * MAX_HEIGHT);
+    randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT);
+    memcpy(src1, src0, MAX_STRIDE * MAX_HEIGHT);
+
+    if (check_func(c->sub_left_predict, "sub_left_predict")) {
+        for (i = 0; i < 5; i ++) {
+            call_ref(dst0, src0, planes[i].s, planes[i].w, planes[i].h);
+            call_new(dst1, src1, planes[i].s, planes[i].w, planes[i].h);
+            if (memcmp(dst0, dst1, planes[i].w * planes[i].h))
+                fail();
+            break;
+        }
+        bench_new(dst1, src0, planes[4].s, planes[4].w, planes[4].h);
+    }
+}
+
+void checkasm_check_llviddspenc(void)
+{
+    LLVidEncDSPContext c;
+    ff_llvidencdsp_init(&c);
+
+    check_diff_bytes(&c);
+    report("diff_bytes");
+
+    check_sub_left_pred(&c);
+    report("sub_left_predict");
+}

diff --git a/tests/checkasm/sw_rgb.c b/tests/checkasm/sw_rgb.c
new file mode 100644
index 0000000..000420d
--- /dev/null
+++ b/tests/checkasm/sw_rgb.c

@@ -0,0 +1,135 @@
+/*
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "libswscale/rgb2rgb.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)      \
+    do {                                  \
+        int j;                            \
+        for (j = 0; j < size; j+=4)       \
+            AV_WN32(buf + j, rnd());      \
+    } while (0)
+
+static const uint8_t width[] = {12, 16, 20, 32, 36, 128};
+static const struct {uint8_t w, h, s;} planes[] = {
+    {12,16,12}, {16,16,16}, {20,23,25}, {32,18,48}, {8,128,16}, {128,128,128}
+};
+
+#define MAX_STRIDE 128
+#define MAX_HEIGHT 128
+
+static void check_shuffle_bytes(void * func, const char * report)
+{
+    int i;
+    LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE]);
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, const uint8_t *src, uint8_t *dst, int src_size);
+
+    memset(dst0, 0, MAX_STRIDE);
+    memset(dst1, 0, MAX_STRIDE);
+    randomize_buffers(src0, MAX_STRIDE);
+    memcpy(src1, src0, MAX_STRIDE);
+
+    if (check_func(func, "%s", report)) {
+        for (i = 0; i < 6; i ++) {
+            call_ref(src0, dst0, width[i]);
+            call_new(src1, dst1, width[i]);
+            if (memcmp(dst0, dst1, MAX_STRIDE))
+                fail();
+        }
+        bench_new(src0, dst0, width[5]);
+    }
+}
+
+static void check_uyvy_to_422p(void)
+{
+    int i;
+
+    LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE * MAX_HEIGHT * 2]);
+    LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE * MAX_HEIGHT * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst_y_0, [MAX_STRIDE * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, dst_y_1, [MAX_STRIDE * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, dst_u_0, [(MAX_STRIDE/2) * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, dst_u_1, [(MAX_STRIDE/2) * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, dst_v_0, [(MAX_STRIDE/2) * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, dst_v_1, [(MAX_STRIDE/2) * MAX_HEIGHT]);
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                      const uint8_t *src, int width, int height,
+                      int lumStride, int chromStride, int srcStride);
+
+    randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT * 2);
+    memcpy(src1, src0, MAX_STRIDE * MAX_HEIGHT * 2);
+
+    if (check_func(uyvytoyuv422, "uyvytoyuv422")) {
+        for (i = 0; i < 6; i ++) {
+            memset(dst_y_0, 0, MAX_STRIDE * MAX_HEIGHT);
+            memset(dst_y_1, 0, MAX_STRIDE * MAX_HEIGHT);
+            memset(dst_u_0, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
+            memset(dst_u_1, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
+            memset(dst_v_0, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
+            memset(dst_v_1, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
+
+            call_ref(dst_y_0, dst_u_0, dst_v_0, src0, planes[i].w, planes[i].h,
+                     MAX_STRIDE, MAX_STRIDE / 2, planes[i].s);
+            call_new(dst_y_1, dst_u_1, dst_v_1, src1, planes[i].w, planes[i].h,
+                     MAX_STRIDE, MAX_STRIDE / 2, planes[i].s);
+            if (memcmp(dst_y_0, dst_y_1, MAX_STRIDE * MAX_HEIGHT) ||
+                memcmp(dst_u_0, dst_u_1, (MAX_STRIDE/2) * MAX_HEIGHT) ||
+                memcmp(dst_v_0, dst_v_1, (MAX_STRIDE/2) * MAX_HEIGHT))
+                fail();
+        }
+        bench_new(dst_y_1, dst_u_1, dst_v_1, src1, planes[5].w, planes[5].h,
+                  MAX_STRIDE, MAX_STRIDE / 2, planes[5].s);
+    }
+}
+
+void checkasm_check_sw_rgb(void)
+{
+    ff_sws_rgb2rgb_init();
+
+    check_shuffle_bytes(shuffle_bytes_2103, "shuffle_bytes_2103");
+    report("shuffle_bytes_2103");
+
+    check_shuffle_bytes(shuffle_bytes_0321, "shuffle_bytes_0321");
+    report("shuffle_bytes_0321");
+
+    check_shuffle_bytes(shuffle_bytes_1230, "shuffle_bytes_1230");
+    report("shuffle_bytes_1230");
+
+    check_shuffle_bytes(shuffle_bytes_3012, "shuffle_bytes_3012");
+    report("shuffle_bytes_3012");
+
+    check_shuffle_bytes(shuffle_bytes_3210, "shuffle_bytes_3210");
+    report("shuffle_bytes_3210");
+
+    check_uyvy_to_422p();
+    report("uyvytoyuv422");
+}

diff --git a/tests/checkasm/utvideodsp.c b/tests/checkasm/utvideodsp.c
new file mode 100644
index 0000000..080b428
--- /dev/null
+++ b/tests/checkasm/utvideodsp.c

@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2017 Jokyo Images
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "checkasm.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/utvideodsp.h"
+#include "libavutil/intreadwrite.h"
+
+#define WIDTH 240
+#define HEIGHT 120
+#define WIDTH_PADDED (WIDTH + 16) /* padded to 32 */
+#define BUFFER_SIZE (WIDTH_PADDED * HEIGHT)
+
+
+#define randomize_plane(buf, type)          \
+    do {                                    \
+        int w, h;                           \
+        type * tmp = buf;                   \
+        for (h = 0; h < HEIGHT; h++) {      \
+            for (w = 0; w < WIDTH; w++)     \
+                tmp[w] = rnd() & 0xFF;      \
+            tmp += WIDTH_PADDED;            \
+        }                                   \
+    } while (0)
+
+#define cmp_plane(buf0, buf1, s)                    \
+    do {                                            \
+        int h;                                      \
+        for (h = 0; h < HEIGHT; h++) {              \
+            if (memcmp(buf0 + h*WIDTH_PADDED,       \
+                buf1 + h*WIDTH_PADDED, WIDTH *s))   \
+                fail();\
+        }                                           \
+    } while (0)
+
+
+#define CHECK_RESTORE(type)\
+LOCAL_ALIGNED_32(type, src_r0, [BUFFER_SIZE]);  \
+LOCAL_ALIGNED_32(type, src_g0, [BUFFER_SIZE]);  \
+LOCAL_ALIGNED_32(type, src_b0, [BUFFER_SIZE]);  \
+LOCAL_ALIGNED_32(type, src_r1, [BUFFER_SIZE]);  \
+LOCAL_ALIGNED_32(type, src_g1, [BUFFER_SIZE]);  \
+LOCAL_ALIGNED_32(type, src_b1, [BUFFER_SIZE]);  \
+declare_func(void, type *src_r, type *src_g, type *src_b,   \
+             ptrdiff_t linesize_r, ptrdiff_t linesize_g,    \
+             ptrdiff_t linesize_b, int width, int height);  \
+memset(src_r0, 0, BUFFER_SIZE * sizeof(type));  \
+memset(src_g0, 0, BUFFER_SIZE * sizeof(type));  \
+memset(src_b0, 0, BUFFER_SIZE * sizeof(type));  \
+randomize_plane(src_r0, type);                  \
+randomize_plane(src_g0, type);                  \
+randomize_plane(src_b0, type);                  \
+memcpy(src_r1, src_r0, BUFFER_SIZE * sizeof(type));         \
+memcpy(src_g1, src_g0, BUFFER_SIZE * sizeof(type));         \
+memcpy(src_b1, src_b0, BUFFER_SIZE * sizeof(type));         \
+call_ref(src_r0, src_g0, src_b0, WIDTH_PADDED, WIDTH_PADDED, WIDTH_PADDED, WIDTH, HEIGHT);\
+call_new(src_r1, src_g1, src_b1, WIDTH_PADDED, WIDTH_PADDED, WIDTH_PADDED, WIDTH, HEIGHT);\
+cmp_plane(src_r0, src_r1, sizeof(type));    \
+cmp_plane(src_g0, src_g1, sizeof(type));    \
+cmp_plane(src_b0, src_b1, sizeof(type));    \
+bench_new(src_r1, src_g1, src_b1, WIDTH_PADDED, WIDTH_PADDED, WIDTH_PADDED, WIDTH, HEIGHT)
+
+static void check_restore_rgb_planes(void) {
+    CHECK_RESTORE(uint8_t);
+}
+
+static void check_restore_rgb_planes10(void) {
+    CHECK_RESTORE(uint16_t);
+}
+
+void checkasm_check_utvideodsp(void)
+{
+    UTVideoDSPContext h;
+
+    ff_utvideodsp_init(&h);
+
+    if (check_func(h.restore_rgb_planes, "restore_rgb_planes"))
+        check_restore_rgb_planes();
+
+    report("restore_rgb_planes");
+
+    if (check_func(h.restore_rgb_planes10, "restore_rgb_planes10"))
+        check_restore_rgb_planes10();
+
+    report("restore_rgb_planes10");
+}

diff --git a/tests/checkasm/vf_hflip.c b/tests/checkasm/vf_hflip.c
new file mode 100644
index 0000000..6bb4d09
--- /dev/null
+++ b/tests/checkasm/vf_hflip.c

@@ -0,0 +1,76 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavfilter/hflip.h"
+#include "libavutil/intreadwrite.h"
+
+#define WIDTH 256
+#define WIDTH_PADDED 256 + 32
+
+#define randomize_buffers(buf, size)      \
+    do {                                  \
+        int j;                            \
+        uint8_t *tmp_buf = (uint8_t *)buf;\
+        for (j = 0; j < size; j++)        \
+            tmp_buf[j] = rnd() & 0xFF;    \
+    } while (0)
+
+static void check_hflip(int step, const char * report_name){
+    LOCAL_ALIGNED_32(uint8_t, src,     [WIDTH_PADDED]);
+    LOCAL_ALIGNED_32(uint8_t, dst_ref, [WIDTH_PADDED]);
+    LOCAL_ALIGNED_32(uint8_t, dst_new, [WIDTH_PADDED]);
+    int w = WIDTH;
+    int i;
+    int step_array[4] = {1, 1, 1, 1};
+    FlipContext s;
+
+    declare_func(void, const uint8_t *src, uint8_t *dst, int w);
+
+    memset(src,     0, WIDTH_PADDED);
+    memset(dst_ref, 0, WIDTH_PADDED);
+    memset(dst_new, 0, WIDTH_PADDED);
+    randomize_buffers(src, WIDTH_PADDED);
+
+    if (step == 2) {
+        w /= 2;
+        for (i = 0; i < 4; i++)
+            step_array[i] = step;
+    }
+
+    ff_hflip_init(&s, step_array, 4);
+
+    if (check_func(s.flip_line[0], "hflip_%s", report_name)) {
+        for (i = 1; i < w; i++) {
+            call_ref(src + (w - 1) * step, dst_ref, i);
+            call_new(src + (w - 1) * step, dst_new, i);
+            if (memcmp(dst_ref, dst_new, i * step))
+                fail();
+        }
+        bench_new(src + (w - 1) * step, dst_new, w);
+    }
+}
+void checkasm_check_vf_hflip(void)
+{
+    check_hflip(1, "byte");
+    report("hflip_byte");
+
+    check_hflip(2, "short");
+    report("hflip_short");
+}

diff --git a/tests/checkasm/vf_nlmeans.c b/tests/checkasm/vf_nlmeans.c
new file mode 100644
index 0000000..32c6931
--- /dev/null
+++ b/tests/checkasm/vf_nlmeans.c

@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2018 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "checkasm.h"
+#include "libavfilter/vf_nlmeans.h"
+#include "libavutil/avassert.h"
+
+#define randomize_buffer(buf, size) do {    \
+    int i;                                  \
+    for (i = 0; i < size / 4; i++)          \
+        ((uint32_t *)buf)[i] = rnd();       \
+} while (0)
+
+void checkasm_check_nlmeans(void)
+{
+    NLMeansDSPContext dsp = {0};
+
+    const int w = 123;  // source width
+    const int h = 45;   // source height
+    const int p = 3;    // patch half size
+    const int r = 2;    // research window half size
+
+    ff_nlmeans_init(&dsp);
+
+    /* See the filter's code for the explanations on the variables */
+    if (check_func(dsp.compute_safe_ssd_integral_image, "ssd_integral_image")) {
+        int offx, offy;
+        const int e = p + r;
+        const int ii_w = w + e*2;
+        const int ii_h = h + e*2;
+        const int ii_lz_32 = FFALIGN(ii_w + 1, 4);
+        uint32_t *ii_orig_ref = av_mallocz_array(ii_h + 1, ii_lz_32 * sizeof(*ii_orig_ref));
+        uint32_t *ii_ref = ii_orig_ref + ii_lz_32 + 1;
+        uint32_t *ii_orig_new = av_mallocz_array(ii_h + 1, ii_lz_32 * sizeof(*ii_orig_new));
+        uint32_t *ii_new = ii_orig_new + ii_lz_32 + 1;
+        const int src_lz = FFALIGN(w, 16);
+        uint8_t *src = av_mallocz_array(h, src_lz);
+
+        declare_func(void, uint32_t *dst, ptrdiff_t dst_linesize_32,
+                     const uint8_t *s1, ptrdiff_t linesize1,
+                     const uint8_t *s2, ptrdiff_t linesize2,
+                     int w, int h);
+
+        randomize_buffer(src, h * src_lz);
+
+        for (offy = -r; offy <= r; offy++) {
+            for (offx = -r; offx <= r; offx++) {
+                if (offx || offy) {
+                    const int s1x = e;
+                    const int s1y = e;
+                    const int s2x = e + offx;
+                    const int s2y = e + offy;
+                    const int startx_safe = FFMAX(s1x, s2x);
+                    const int starty_safe = FFMAX(s1y, s2y);
+                    const int u_endx_safe = FFMIN(s1x + w, s2x + w);
+                    const int endy_safe   = FFMIN(s1y + h, s2y + h);
+                    const int safe_pw = (u_endx_safe - startx_safe) & ~0xf;
+                    const int safe_ph = endy_safe - starty_safe;
+
+                    av_assert0(safe_pw && safe_ph);
+                    av_assert0(startx_safe - s1x >= 0); av_assert0(startx_safe - s1x < w);
+                    av_assert0(starty_safe - s1y >= 0); av_assert0(starty_safe - s1y < h);
+                    av_assert0(startx_safe - s2x >= 0); av_assert0(startx_safe - s2x < w);
+                    av_assert0(starty_safe - s2y >= 0); av_assert0(starty_safe - s2y < h);
+
+                    memset(ii_ref, 0, (ii_lz_32 * ii_h - 1) * sizeof(*ii_ref));
+                    memset(ii_new, 0, (ii_lz_32 * ii_h - 1) * sizeof(*ii_new));
+
+                    call_ref(ii_ref + starty_safe*ii_lz_32 + startx_safe, ii_lz_32,
+                             src + (starty_safe - s1y) * src_lz + (startx_safe - s1x), src_lz,
+                             src + (starty_safe - s2y) * src_lz + (startx_safe - s2x), src_lz,
+                             safe_pw, safe_ph);
+                    call_new(ii_new + starty_safe*ii_lz_32 + startx_safe, ii_lz_32,
+                             src + (starty_safe - s1y) * src_lz + (startx_safe - s1x), src_lz,
+                             src + (starty_safe - s2y) * src_lz + (startx_safe - s2x), src_lz,
+                             safe_pw, safe_ph);
+
+                    if (memcmp(ii_ref, ii_new, (ii_lz_32 * ii_h - 1) * sizeof(*ii_ref)))
+                        fail();
+
+                    memset(ii_new, 0, (ii_lz_32 * ii_h - 1) * sizeof(*ii_new));
+                    bench_new(ii_new + starty_safe*ii_lz_32 + startx_safe, ii_lz_32,
+                             src + (starty_safe - s1y) * src_lz + (startx_safe - s1x), src_lz,
+                             src + (starty_safe - s2y) * src_lz + (startx_safe - s2x), src_lz,
+                             safe_pw, safe_ph);
+                }
+            }
+        }
+
+        av_freep(&ii_orig_ref);
+        av_freep(&ii_orig_new);
+        av_freep(&src);
+    }
+
+    report("dsp");
+}

diff --git a/tests/checkasm/vf_threshold.c b/tests/checkasm/vf_threshold.c
new file mode 100644
index 0000000..5a2fc0e
--- /dev/null
+++ b/tests/checkasm/vf_threshold.c

@@ -0,0 +1,85 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavfilter/threshold.h"
+#include "libavutil/intreadwrite.h"
+
+#define WIDTH 256
+#define WIDTH_PADDED 256 + 32
+
+#define randomize_buffers(buf, size)     \
+    do {                                 \
+       int j;                            \
+       uint8_t *tmp_buf = (uint8_t *)buf;\
+       for (j = 0; j < size; j++)        \
+           tmp_buf[j] = rnd() & 0xFF;    \
+    } while (0)
+
+static void check_threshold(int depth){
+    LOCAL_ALIGNED_32(uint8_t, in       , [WIDTH_PADDED]);
+    LOCAL_ALIGNED_32(uint8_t, threshold, [WIDTH_PADDED]);
+    LOCAL_ALIGNED_32(uint8_t, min      , [WIDTH_PADDED]);
+    LOCAL_ALIGNED_32(uint8_t, max      , [WIDTH_PADDED]);
+    LOCAL_ALIGNED_32(uint8_t, out_ref  , [WIDTH_PADDED]);
+    LOCAL_ALIGNED_32(uint8_t, out_new  , [WIDTH_PADDED]);
+    ptrdiff_t line_size = WIDTH_PADDED;
+    int w = WIDTH;
+
+    declare_func(void, const uint8_t *in, const uint8_t *threshold,
+                 const uint8_t *min, const uint8_t *max, uint8_t *out,
+                 ptrdiff_t ilinesize, ptrdiff_t tlinesize,
+                 ptrdiff_t flinesize, ptrdiff_t slinesize,
+                 ptrdiff_t olinesize, int w, int h);
+
+    ThresholdContext s;
+    s.depth = depth;
+    ff_threshold_init(&s);
+
+    memset(in,     0, WIDTH_PADDED);
+    memset(threshold, 0, WIDTH_PADDED);
+    memset(min, 0, WIDTH_PADDED);
+    memset(max, 0, WIDTH_PADDED);
+    memset(out_ref, 0, WIDTH_PADDED);
+    memset(out_new, 0, WIDTH_PADDED);
+    randomize_buffers(in, WIDTH);
+    randomize_buffers(threshold, WIDTH);
+    randomize_buffers(min, WIDTH);
+    randomize_buffers(max, WIDTH);
+
+    if (depth == 16)
+        w /= 2;
+
+    if (check_func(s.threshold, "threshold%d", depth)) {
+        call_ref(in, threshold, min, max, out_ref, line_size, line_size, line_size, line_size, line_size, w, 1);
+        call_new(in, threshold, min, max, out_new, line_size, line_size, line_size, line_size, line_size, w, 1);
+        if (memcmp(out_ref, out_new, WIDTH))
+            fail();
+        bench_new(in, threshold, min, max, out_new, line_size, line_size, line_size, line_size, line_size, w, 1);
+    }
+}
+
+void checkasm_check_vf_threshold(void)
+{
+    check_threshold(8);
+    report("threshold8");
+
+    check_threshold(16);
+    report("threshold16");
+}

diff --git a/tests/fate-run.sh b/tests/fate-run.sh
index 4641640..aece90a 100755
--- a/tests/fate-run.sh
+++ b/tests/fate-run.sh

@@ -25,6 +25,7 @@
 gen=${16:-no}
 hwaccel=${17:-none}
 report_type=${18:-standard}
+keep=${19:-0}
 
 outdir="tests/data/fate"
 outfile="${outdir}/${test}"
@@ -88,6 +89,10 @@
     run ffprobe${PROGSUF} -show_entries format=format_name -print_format default=nw=1:nk=1 -v 0 "$@"
 }
 
+probetags(){
+    run ffprobe${PROGSUF} -show_entries format_tags -v 0 "$@"
+}
+
 runlocal(){
     test "${V:-0}" -gt 0 && echo ${base}/"$@" ${base} >&3
     ${base}/"$@" ${base}
@@ -127,15 +132,15 @@
 }
 
 framecrc(){
-    ffmpeg "$@" -flags +bitexact -fflags +bitexact -f framecrc -
+    ffmpeg "$@" -bitexact -f framecrc -
 }
 
 ffmetadata(){
-    ffmpeg "$@" -flags +bitexact -fflags +bitexact -f ffmetadata -
+    ffmpeg "$@" -bitexact -f ffmetadata -
 }
 
 framemd5(){
-    ffmpeg "$@" -flags +bitexact -fflags +bitexact -f framemd5 -
+    ffmpeg "$@" -bitexact -f framemd5 -
 }
 
 crc(){
@@ -160,7 +165,7 @@
 fmtstdout(){
     fmt=$1
     shift 1
-    ffmpeg -flags +bitexact -fflags +bitexact "$@" -f $fmt -
+    ffmpeg -bitexact "$@" -f $fmt -
 }
 
 enc_dec_pcm(){
@@ -173,7 +178,7 @@
     cleanfiles=$encfile
     encfile=$(target_path ${encfile})
     ffmpeg -i $src_file "$@" -f $out_fmt -y ${encfile} || return
-    ffmpeg -flags +bitexact -fflags +bitexact -i ${encfile} -c:a pcm_${pcm_fmt} -fflags +bitexact -f ${dec_fmt} -
+    ffmpeg -bitexact -i ${encfile} -c:a pcm_${pcm_fmt} -fflags +bitexact -f ${dec_fmt} -
 }
 
 FLAGS="-flags +bitexact -sws_flags +accurate_rnd+bitexact -fflags +bitexact"
@@ -222,6 +227,22 @@
         -f framecrc - || return
 }
 
+stream_remux(){
+    src_fmt=$1
+    srcfile=$2
+    enc_fmt=$3
+    stream_maps=$4
+    final_decode=$5
+    encfile="${outdir}/${test}.${enc_fmt}"
+    test "$7" = -keep || cleanfiles="$cleanfiles $encfile"
+    tsrcfile=$(target_path $srcfile)
+    tencfile=$(target_path $encfile)
+    ffmpeg -f $src_fmt -i $tsrcfile $stream_maps -codec copy $FLAGS \
+        -f $enc_fmt -y $tencfile || return
+    ffmpeg $DEC_OPTS -i $encfile $ENC_OPTS $FLAGS $final_decode \
+        -f framecrc - || return
+}
+
 lavffatetest(){
     t="${test#lavf-fate-}"
     ref=${base}/ref/lavf-fate/$t
@@ -294,16 +315,16 @@
     cleanfiles="$cleanfiles $decfile1 $decfile2 $decfile3"
 
     # test packet data
-    ffmpeg $extra_args -i "$sample" -flags +bitexact -fflags +bitexact -c:a copy -f framecrc -y $decfile1
+    ffmpeg $extra_args -i "$sample" -bitexact -c:a copy -f framecrc -y $decfile1
     do_md5sum $decfile1
     # test decoded (and cut) data
-    ffmpeg $extra_args -i "$sample" -flags +bitexact -fflags +bitexact -f wav md5:
+    ffmpeg $extra_args -i "$sample" -bitexact -f wav md5:
     # the same as above again, with seeking to the start
-    ffmpeg $extra_args -ss 0 -seek_timestamp 1 -i "$sample" -flags +bitexact -fflags +bitexact -c:a copy -f framecrc -y $decfile2
+    ffmpeg $extra_args -ss 0 -seek_timestamp 1 -i "$sample" -bitexact -c:a copy -f framecrc -y $decfile2
     do_md5sum $decfile2
-    ffmpeg $extra_args -ss 0 -seek_timestamp 1 -i "$sample" -flags +bitexact -fflags +bitexact -f wav md5:
+    ffmpeg $extra_args -ss 0 -seek_timestamp 1 -i "$sample" -bitexact -f wav md5:
     # test packet data, with seeking to a specific position
-    ffmpeg $extra_args -ss 5 -seek_timestamp 1 -i "$sample" -flags +bitexact -fflags +bitexact -c:a copy -f framecrc -y $decfile3
+    ffmpeg $extra_args -ss 5 -seek_timestamp 1 -i "$sample" -bitexact -c:a copy -f framecrc -y $decfile3
     do_md5sum $decfile3
 }
 
@@ -316,7 +337,7 @@
     cleanfiles="$cleanfiles $file1"
 
     # test data after reencoding
-    ffmpeg -i "$sample" -flags +bitexact -fflags +bitexact -map 0:a -c:a $codec -f $format -y "$file1"
+    ffmpeg -i "$sample" -bitexact -map 0:a -c:a $codec -f $format -y "$file1"
     probegaplessinfo "$file1"
 }
 
@@ -328,7 +349,7 @@
     decfile="${outdir}/${test}.wav"
     cleanfiles="$cleanfiles $decfile"
 
-    ffmpeg -i "$sample" -flags +bitexact -fflags +bitexact $extra_args -y $decfile
+    ffmpeg -i "$sample" -bitexact $extra_args -y $decfile
     tests/audiomatch $decfile $trefile
 }
 
@@ -352,6 +373,10 @@
     fi
 }
 
+null(){
+    :
+}
+
 mkdir -p "$outdir"
 
 # Disable globbing: command arguments may contain globbing characters and
@@ -404,7 +429,9 @@
 fi
 
 if test $err = 0; then
-    rm -f $outfile $errfile $cmpfile $cleanfiles
+    if test $keep = 0; then
+        rm -f $outfile $errfile $cmpfile $cleanfiles
+    fi
 elif test $gen = "no"; then
     echo "Test $test failed. Look at $errfile for details."
     test "${V:-0}" -gt 0 && cat $errfile

diff --git a/tests/fate/aac.mak b/tests/fate/aac.mak
index e8cbcef..bd28322 100644
--- a/tests/fate/aac.mak
+++ b/tests/fate/aac.mak

@@ -154,7 +154,7 @@
 fate-aac-aref-encode: CMP = stddev
 fate-aac-aref-encode: REF = ./tests/data/asynth-44100-2.wav
 fate-aac-aref-encode: CMP_SHIFT = -4096
-fate-aac-aref-encode: CMP_TARGET = 669
+fate-aac-aref-encode: CMP_TARGET = 596
 fate-aac-aref-encode: SIZE_TOLERANCE = 2464
 fate-aac-aref-encode: FUZZ = 89
 
@@ -163,7 +163,7 @@
 fate-aac-ln-encode: CMP = stddev
 fate-aac-ln-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
 fate-aac-ln-encode: CMP_SHIFT = -4096
-fate-aac-ln-encode: CMP_TARGET = 61
+fate-aac-ln-encode: CMP_TARGET = 72
 fate-aac-ln-encode: SIZE_TOLERANCE = 3560
 fate-aac-ln-encode: FUZZ = 30
 
@@ -172,7 +172,7 @@
 fate-aac-ln-encode-128k: CMP = stddev
 fate-aac-ln-encode-128k: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
 fate-aac-ln-encode-128k: CMP_SHIFT = -4096
-fate-aac-ln-encode-128k: CMP_TARGET = 800
+fate-aac-ln-encode-128k: CMP_TARGET = 622
 fate-aac-ln-encode-128k: SIZE_TOLERANCE = 3560
 fate-aac-ln-encode-128k: FUZZ = 5
 
@@ -181,7 +181,7 @@
 fate-aac-pns-encode: CMP = stddev
 fate-aac-pns-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
 fate-aac-pns-encode: CMP_SHIFT = -4096
-fate-aac-pns-encode: CMP_TARGET = 616
+fate-aac-pns-encode: CMP_TARGET = 655
 fate-aac-pns-encode: SIZE_TOLERANCE = 3560
 fate-aac-pns-encode: FUZZ = 74
 
@@ -190,7 +190,7 @@
 fate-aac-tns-encode: CMP = stddev
 fate-aac-tns-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
 fate-aac-tns-encode: CMP_SHIFT = -4096
-fate-aac-tns-encode: CMP_TARGET = 817
+fate-aac-tns-encode: CMP_TARGET = 637
 fate-aac-tns-encode: FUZZ = 7
 fate-aac-tns-encode: SIZE_TOLERANCE = 3560
 
@@ -199,7 +199,7 @@
 fate-aac-is-encode: CMP = stddev
 fate-aac-is-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
 fate-aac-is-encode: CMP_SHIFT = -4096
-fate-aac-is-encode: CMP_TARGET = 615
+fate-aac-is-encode: CMP_TARGET = 514
 fate-aac-is-encode: SIZE_TOLERANCE = 3560
 fate-aac-is-encode: FUZZ = 10
 
@@ -208,26 +208,17 @@
 fate-aac-ms-encode: CMP = stddev
 fate-aac-ms-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
 fate-aac-ms-encode: CMP_SHIFT = -4096
-fate-aac-ms-encode: CMP_TARGET = 675
+fate-aac-ms-encode: CMP_TARGET = 558
 fate-aac-ms-encode: SIZE_TOLERANCE = 3560
 fate-aac-ms-encode: FUZZ = 15
 
-FATE_AAC_ENCODE += fate-aac-ltp-encode
-fate-aac-ltp-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -profile:a aac_ltp -aac_pns 0 -aac_is 0 -aac_ms 0 -aac_tns 0 -b:a 36k -fflags +bitexact -flags +bitexact
-fate-aac-ltp-encode: CMP = stddev
-fate-aac-ltp-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
-fate-aac-ltp-encode: CMP_SHIFT = -4096
-fate-aac-ltp-encode: CMP_TARGET = 1270
-fate-aac-ltp-encode: SIZE_TOLERANCE = 3560
-fate-aac-ltp-encode: FUZZ = 17
-
 #Ticket1784
 FATE_AAC_ENCODE += fate-aac-yoraw-encode
 fate-aac-yoraw-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/yo.raw-short.wav -c:a aac -fflags +bitexact -flags +bitexact
 fate-aac-yoraw-encode: CMP = stddev
 fate-aac-yoraw-encode: REF = $(SAMPLES)/audio-reference/yo.raw-short.wav
 fate-aac-yoraw-encode: CMP_SHIFT = -12288
-fate-aac-yoraw-encode: CMP_TARGET = 259
+fate-aac-yoraw-encode: CMP_TARGET = 226
 fate-aac-yoraw-encode: SIZE_TOLERANCE = 3560
 fate-aac-yoraw-encode: FUZZ = 17
 
@@ -237,7 +228,7 @@
 fate-aac-pred-encode: CMP = stddev
 fate-aac-pred-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
 fate-aac-pred-encode: CMP_SHIFT = -4096
-fate-aac-pred-encode: CMP_TARGET = 841
+fate-aac-pred-encode: CMP_TARGET = 662
 fate-aac-pred-encode: FUZZ = 12
 fate-aac-pred-encode: SIZE_TOLERANCE = 3560
 

diff --git a/tests/fate/ac3.mak b/tests/fate/ac3.mak
index 76be2e8..bb02d38 100644
--- a/tests/fate/ac3.mak
+++ b/tests/fate/ac3.mak

@@ -60,6 +60,10 @@
 fate-eac3-4: CMD = pcm -i $(TARGET_SAMPLES)/eac3/serenity_english_5.1_1536_small.eac3
 fate-eac3-4: REF = $(SAMPLES)/eac3/serenity_english_5.1_1536_small_v2.pcm
 
+FATE_EAC3 += fate-eac3-5
+fate-eac3-5: CMD = pcm -i $(TARGET_SAMPLES)/eac3/the_great_wall_7.1.eac3
+fate-eac3-5: REF = $(SAMPLES)/eac3/the_great_wall_7.1.pcm
+
 $(FATE_AC3) $(FATE_EAC3): CMP = oneoff
 
 FATE_AC3-$(call  DEMDEC, AC3,  AC3)  += $(FATE_AC3)
@@ -88,6 +92,11 @@
 fate-ac3-fixed-encode: CMP = oneline
 fate-ac3-fixed-encode: REF = a1d1fc116463b771abf5aef7ed37d7b1
 
+FATE_EAC3-$(call ALLYES, EAC3_DEMUXER EAC3_MUXER EAC3_CORE_BSF) += fate-eac3-core-bsf
+fate-eac3-core-bsf: CMD = md5pipe -i $(TARGET_SAMPLES)/eac3/the_great_wall_7.1.eac3 -c:a copy -bsf:a eac3_core -fflags +bitexact -f eac3
+fate-eac3-core-bsf: CMP = oneline
+fate-eac3-core-bsf: REF = b704bf851e99b7442e9bed368b60e6ca
+
 FATE_SAMPLES_AVCONV += $(FATE_AC3-yes) $(FATE_EAC3-yes)
 
 fate-ac3: $(FATE_AC3-yes) $(FATE_EAC3-yes)

diff --git a/tests/fate/acodec.mak b/tests/fate/acodec.mak
index 5c3fea9..80d26de 100644
--- a/tests/fate/acodec.mak
+++ b/tests/fate/acodec.mak

@@ -104,14 +104,14 @@
 fate-acodec-dca: SRC = tests/data/asynth-44100-2.wav
 fate-acodec-dca: CMD = md5 -i $(TARGET_PATH)/$(SRC) -c:a dca -strict -2 -f dts -flags +bitexact
 fate-acodec-dca: CMP = oneline
-fate-acodec-dca: REF = 7cd79a3717943a06b217f1130223a86f
+fate-acodec-dca: REF = 2aa580ac67820fce4f581b96ebb34acc
 
 FATE_ACODEC-$(call ENCDEC, DCA, WAV) += fate-acodec-dca2
 fate-acodec-dca2: CMD = enc_dec_pcm dts wav s16le $(SRC) -c:a dca -strict -2 -flags +bitexact
 fate-acodec-dca2: REF = $(SRC)
 fate-acodec-dca2: CMP = stddev
 fate-acodec-dca2: CMP_SHIFT = -2048
-fate-acodec-dca2: CMP_TARGET = 527
+fate-acodec-dca2: CMP_TARGET = 535
 fate-acodec-dca2: SIZE_TOLERANCE = 1632
 
 FATE_ACODEC-$(call ENCDEC, FLAC, FLAC) += fate-acodec-flac fate-acodec-flac-exact-rice

diff --git a/tests/fate/audio.mak b/tests/fate/audio.mak
index 40c4ca7..4fab472 100644
--- a/tests/fate/audio.mak
+++ b/tests/fate/audio.mak

@@ -19,6 +19,11 @@
 FATE_SAMPLES_AUDIO-$(call DEMDEC, DSICIN, DSICINAUDIO) += fate-delphine-cin-audio
 fate-delphine-cin-audio: CMD = framecrc -i $(TARGET_SAMPLES)/delphine-cin/LOGO-partial.CIN -vn
 
+FATE_SAMPLES_AUDIO-$(call DEMDEC, S337M, DOLBY_E) += fate-dolby-e
+fate-dolby-e: CMD = pcm -i $(TARGET_SAMPLES)/dolby_e/16-11
+fate-dolby-e: CMP = oneoff
+fate-dolby-e: REF = $(SAMPLES)/dolby_e/16-11.pcm
+
 FATE_SAMPLES_AUDIO-$(call DEMDEC, DSS, DSS_SP) += fate-dss-lp fate-dss-sp
 fate-dss-lp: CMD = framecrc -i $(TARGET_SAMPLES)/dss/lp.dss -frames 30
 fate-dss-sp: CMD = framecrc -i $(TARGET_SAMPLES)/dss/sp.dss -frames 30

diff --git a/tests/fate/avformat.mak b/tests/fate/avformat.mak
index c9ea99a..a12f9cc 100644
--- a/tests/fate/avformat.mak
+++ b/tests/fate/avformat.mak

@@ -9,7 +9,6 @@
 FATE_LAVF-$(call ENCDEC,  PCM_S16BE,             CAF)                += caf
 FATE_LAVF-$(call ENCDEC,  DPX,                   IMAGE2)             += dpx
 FATE_LAVF-$(call ENCDEC2, DVVIDEO,    PCM_S16LE, AVI)                += dv_fmt
-FATE_LAVF-$(call ENCDEC2, MPEG1VIDEO, MP2,       FFM)                += ffm
 FATE_LAVF-$(call ENCDEC,  FITS,                  FITS)               += fits
 FATE_LAVF-$(call ENCDEC,  RAWVIDEO,              FILMSTRIP)          += flm
 FATE_LAVF-$(call ENCDEC,  FLV,                   FLV)                += flv_fmt

diff --git a/tests/fate/build.mak b/tests/fate/build.mak
new file mode 100644
index 0000000..f97f9eb
--- /dev/null
+++ b/tests/fate/build.mak

@@ -0,0 +1,17 @@
+FATE_BUILD += fate-build-alltools
+fate-build-alltools: alltools
+
+FATE_BUILD += fate-build-checkheaders
+fate-build-checkheaders: checkheaders
+
+FATE_BUILD += fate-build-examples
+fate-build-examples: examples
+
+FATE_BUILD += fate-build-testprogs
+fate-build-testprogs: testprogs
+
+$(FATE_BUILD): CMD = null
+$(FATE_BUILD): CMP = null
+
+# FATE += $(FATE_BUILD)
+fate-build: $(FATE_BUILD)

diff --git a/tests/fate/cbs.mak b/tests/fate/cbs.mak
new file mode 100644
index 0000000..911e770
--- /dev/null
+++ b/tests/fate/cbs.mak

@@ -0,0 +1,107 @@
+# Read/write tests: this uses the codec metadata filter - with no
+# arguments, it decomposes the stream fully and then recomposes it
+# without making any changes.
+
+fate-cbs: fate-cbs-h264 fate-cbs-hevc fate-cbs-mpeg2 fate-cbs-vp9
+
+FATE_CBS_DEPS = $(call ALLYES, $(1)_DEMUXER $(2)_PARSER $(3)_METADATA_BSF $(4)_DECODER $(5)_MUXER)
+
+define FATE_CBS_TEST
+# (codec, test_name, sample_file, output_format)
+FATE_CBS_$(1) += fate-cbs-$(1)-$(2)
+fate-cbs-$(1)-$(2): CMD = md5 -i $(TARGET_SAMPLES)/$(3) -c:v copy -y -bsf:v $(1)_metadata -f $(4)
+endef
+
+# H.264 read/write
+
+FATE_CBS_H264_CONFORMANCE_SAMPLES = \
+    SVA_Base_B.264        \
+    BASQP1_Sony_C.jsv     \
+    FM1_BT_B.h264         \
+    CVFC1_Sony_C.jsv      \
+    AUD_MW_E.264          \
+    CVBS3_Sony_C.jsv      \
+    MR1_BT_A.h264         \
+    CVWP1_TOSHIBA_E.264   \
+    CVNLFI1_Sony_C.jsv    \
+    Sharp_MP_PAFF_1r2.jvt \
+    CVMANL1_TOSHIBA_B.264 \
+    sp1_bt_a.h264         \
+    CVSE2_Sony_B.jsv      \
+    CABACI3_Sony_B.jsv
+
+FATE_CBS_H264_SAMPLES = \
+    sei-1.h264
+
+$(foreach N,$(FATE_CBS_H264_CONFORMANCE_SAMPLES),$(eval $(call FATE_CBS_TEST,h264,$(basename $(N)),h264-conformance/$(N),h264)))
+$(foreach N,$(FATE_CBS_H264_SAMPLES),$(eval $(call FATE_CBS_TEST,h264,$(basename $(N)),h264/$(N),h264)))
+
+FATE_CBS_H264-$(call FATE_CBS_DEPS, H264, H264, H264, H264, H264) = $(FATE_CBS_h264)
+FATE_SAMPLES_AVCONV += $(FATE_CBS_H264-yes)
+fate-cbs-h264: $(FATE_CBS_H264-yes)
+
+# H.265 read/write
+
+FATE_CBS_HEVC_SAMPLES =       \
+    STRUCT_A_Samsung_5.bit    \
+    WP_A_Toshiba_3.bit        \
+    SLIST_A_Sony_4.bit        \
+    SLIST_D_Sony_9.bit        \
+    CAINIT_E_SHARP_3.bit      \
+    CAINIT_H_SHARP_3.bit      \
+    TILES_B_Cisco_1.bit       \
+    WPP_A_ericsson_MAIN_2.bit \
+    WPP_F_ericsson_MAIN_2.bit \
+    ipcm_E_NEC_2.bit          \
+    NUT_A_ericsson_5.bit      \
+    PICSIZE_A_Bossen_1.bit    \
+    PICSIZE_B_Bossen_1.bit    \
+    RPS_A_docomo_4.bit        \
+    RPS_E_qualcomm_5.bit      \
+    LTRPSPS_A_Qualcomm_1.bit  \
+    RPLM_A_qualcomm_4.bit     \
+    CONFWIN_A_Sony_1.bit      \
+    HRD_A_Fujitsu_2.bit
+
+$(foreach N,$(FATE_CBS_HEVC_SAMPLES),$(eval $(call FATE_CBS_TEST,hevc,$(basename $(N)),hevc-conformance/$(N),hevc)))
+
+FATE_CBS_HEVC-$(call FATE_CBS_DEPS, HEVC, HEVC, HEVC, HEVC, HEVC) = $(FATE_CBS_hevc)
+FATE_SAMPLES_AVCONV += $(FATE_CBS_HEVC-yes)
+fate-cbs-hevc: $(FATE_CBS_HEVC-yes)
+
+# MPEG-2 read/write
+
+FATE_CBS_MPEG2_SAMPLES =     \
+    hhi_burst_422_short.bits \
+    sony-ct3.bs              \
+    tcela-6.bits
+
+$(foreach N,$(FATE_CBS_MPEG2_SAMPLES),$(eval $(call FATE_CBS_TEST,mpeg2,$(basename $(N)),mpeg2/$(N),mpeg2video)))
+
+FATE_CBS_MPEG2-$(call FATE_CBS_DEPS, MPEGVIDEO, MPEGVIDEO, MPEG2, MPEG2VIDEO, MPEG2VIDEO) = $(FATE_CBS_mpeg2)
+FATE_SAMPLES_AVCONV += $(FATE_CBS_MPEG2-yes)
+fate-cbs-mpeg2: $(FATE_CBS_MPEG2-yes)
+
+# VP9 read/write
+
+FATE_CBS_VP9_SAMPLES =                  \
+    vp90-2-03-deltaq.webm               \
+    vp90-2-05-resize.ivf                \
+    vp90-2-06-bilinear.webm             \
+    vp90-2-09-lf_deltas.webm            \
+    vp90-2-10-show-existing-frame.webm  \
+    vp90-2-10-show-existing-frame2.webm \
+    vp90-2-segmentation-aq-akiyo.webm   \
+    vp90-2-segmentation-sf-akiyo.webm   \
+    vp90-2-tiling-pedestrian.webm       \
+    vp91-2-04-yuv440.webm               \
+    vp91-2-04-yuv444.webm               \
+    vp92-2-20-10bit-yuv420.webm         \
+    vp93-2-20-10bit-yuv422.webm         \
+    vp93-2-20-12bit-yuv444.webm
+
+$(foreach N,$(FATE_CBS_VP9_SAMPLES),$(eval $(call FATE_CBS_TEST,vp9,$(basename $(N)),vp9-test-vectors/$(N),ivf)))
+
+FATE_CBS_VP9-$(call FATE_CBS_DEPS, IVF, VP9, VP9, VP9, IVF) = $(FATE_CBS_vp9)
+FATE_SAMPLES_AVCONV += $(FATE_CBS_VP9-yes)
+fate-cbs-vp9: $(FATE_CBS_VP9-yes)

diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index fbf60e9..a722b4a 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak

@@ -14,14 +14,19 @@
                 fate-checkasm-h264qpel                                  \
                 fate-checkasm-hevc_add_res                              \
                 fate-checkasm-hevc_idct                                 \
+                fate-checkasm-hevc_sao                                  \
                 fate-checkasm-jpeg2000dsp                               \
                 fate-checkasm-llviddsp                                  \
+                fate-checkasm-llviddspenc                               \
                 fate-checkasm-pixblockdsp                               \
                 fate-checkasm-sbrdsp                                    \
                 fate-checkasm-synth_filter                              \
+                fate-checkasm-sw_rgb                                    \
                 fate-checkasm-v210enc                                   \
                 fate-checkasm-vf_blend                                  \
                 fate-checkasm-vf_colorspace                             \
+                fate-checkasm-vf_hflip                                  \
+                fate-checkasm-vf_threshold                              \
                 fate-checkasm-videodsp                                  \
                 fate-checkasm-vp8dsp                                    \
                 fate-checkasm-vp9dsp                                    \
@@ -30,5 +35,5 @@
 $(FATE_CHECKASM): CMD = run tests/checkasm/checkasm --test=$(@:fate-checkasm-%=%)
 $(FATE_CHECKASM): CMP = null
 
-FATE-$(CONFIG_STATIC) += $(FATE_CHECKASM)
+FATE += $(FATE_CHECKASM)
 fate-checkasm: $(FATE_CHECKASM)

diff --git a/tests/fate/dca.mak b/tests/fate/dca.mak
index b1681c6..fad3a75 100644
--- a/tests/fate/dca.mak
+++ b/tests/fate/dca.mak

@@ -75,5 +75,10 @@
 fate-dts_es: CMP = oneoff
 fate-dts_es: REF = $(SAMPLES)/dts/dts_es_2.pcm
 
+FATE_DCA-$(call ALLYES, DTS_DEMUXER DTS_MUXER DCA_CORE_BSF) += fate-dca-core-bsf
+fate-dca-core-bsf: CMD = md5pipe -i $(TARGET_SAMPLES)/dts/master_audio_7.1_24bit.dts -c:a copy -bsf:a dca_core -fflags +bitexact -f dts
+fate-dca-core-bsf: CMP = oneline
+fate-dca-core-bsf: REF = ca22b00d8c641cd168e2f7ca8d2f340e
+
 FATE_SAMPLES_AUDIO += $(FATE_DCA-yes)
 fate-dca: $(FATE_DCA-yes)

diff --git a/tests/fate/demux.mak b/tests/fate/demux.mak
index 9427ac3..eb8d8c3 100644
--- a/tests/fate/demux.mak
+++ b/tests/fate/demux.mak

@@ -1,9 +1,11 @@
 FATE_SAMPLES_DEMUX-$(call DEMDEC, AVI, FRAPS) += fate-avio-direct
 fate-avio-direct: CMD = framecrc -avioflags direct -i $(TARGET_SAMPLES)/fraps/fraps-v5-bouncing-balls-partial.avi -avioflags direct
 
-FATE_SAMPLES_DEMUX-$(call DEMDEC, AAC, AAC) += fate-adts-demux fate-adts-id3v1-demux
+FATE_SAMPLES_DEMUX-$(call DEMDEC, AAC, AAC) += fate-adts-demux fate-adts-id3v1-demux fate-adts-id3v2-demux fate-adts-id3v2-two-tags-demux
 fate-adts-demux: CMD = crc -i $(TARGET_SAMPLES)/aac/ct_faac-adts.aac -c:a copy
 fate-adts-id3v1-demux: CMD = framecrc -f aac -i $(TARGET_SAMPLES)/aac/id3v1.aac -c:a copy
+fate-adts-id3v2-demux: CMD = framecrc -f aac -i $(TARGET_SAMPLES)/aac/id3v2.aac -c:a copy
+fate-adts-id3v2-two-tags-demux: CMD = framecrc -i $(TARGET_SAMPLES)/aac/id3v2_two_tags.aac -c:a copy
 
 FATE_SAMPLES_DEMUX-$(CONFIG_AEA_DEMUXER) += fate-aea-demux
 fate-aea-demux: CMD = crc -i $(TARGET_SAMPLES)/aea/chirp.aea -c:a copy
@@ -74,6 +76,9 @@
 FATE_SAMPLES_DEMUX-$(CONFIG_MPEGTS_DEMUXER) += fate-ts-opus-demux
 fate-ts-opus-demux: CMD = framecrc -i $(TARGET_SAMPLES)/opus/test-8-7.1.opus-small.ts -c copy
 
+FATE_SAMPLES_DEMUX-$(CONFIG_MPEGTS_DEMUXER) += fate-ts-small-demux
+fate-ts-small-demux: CMD = framecrc -i $(TARGET_SAMPLES)/mpegts/h264small.ts -c copy
+
 FATE_SAMPLES_DEMUX-$(CONFIG_MTV_DEMUXER) += fate-mtv
 fate-mtv: CMD = framecrc -i $(TARGET_SAMPLES)/mtv/comedian_auto-partial.mtv -c copy
 

diff --git a/tests/fate/ffmpeg.mak b/tests/fate/ffmpeg.mak
index d8f2e71..0975af2 100644
--- a/tests/fate/ffmpeg.mak
+++ b/tests/fate/ffmpeg.mak

@@ -26,6 +26,14 @@
 FATE_FFMPEG-$(CONFIG_COLOR_FILTER) += fate-ffmpeg-filter_complex
 fate-ffmpeg-filter_complex: CMD = framecrc -filter_complex color=d=1:r=5 -fflags +bitexact
 
+# Ticket 6603
+FATE_FFMPEG-$(call ALLYES, AEVALSRC_FILTER ASETNSAMPLES_FILTER AC3_FIXED_ENCODER) += fate-ffmpeg-filter_complex_audio
+fate-ffmpeg-filter_complex_audio: CMD = framecrc -filter_complex "aevalsrc=0:d=0.1,asetnsamples=1537" -c ac3_fixed
+
+# Ticket 6375, use case of NoX
+FATE_SAMPLES_FFMPEG-$(call ALLYES, MOV_DEMUXER PNG_DECODER ALAC_DECODER PCM_S16LE_ENCODER RAWVIDEO_ENCODER) += fate-ffmpeg-attached_pics
+fate-ffmpeg-attached_pics: CMD = threads=2 framecrc -i $(TARGET_SAMPLES)/lossless-audio/inside.m4a -c:a pcm_s16le -max_muxing_queue_size 16
+
 FATE_SAMPLES_FFMPEG-$(CONFIG_COLORKEY_FILTER) += fate-ffmpeg-filter_colorkey
 fate-ffmpeg-filter_colorkey: tests/data/filtergraphs/colorkey
 fate-ffmpeg-filter_colorkey: CMD = framecrc -idct simple -fflags +bitexact -flags +bitexact  -sws_flags +accurate_rnd+bitexact -i $(TARGET_SAMPLES)/cavs/cavs.mpg -fflags +bitexact -flags +bitexact -sws_flags +accurate_rnd+bitexact -i $(TARGET_SAMPLES)/lena.pnm -an -filter_complex_script $(TARGET_PATH)/tests/data/filtergraphs/colorkey -sws_flags +accurate_rnd+bitexact -fflags +bitexact -flags +bitexact -qscale 2 -frames:v 10

diff --git a/tests/fate/filter-audio.mak b/tests/fate/filter-audio.mak
index bd8b3d3..dcc8d56 100644
--- a/tests/fate/filter-audio.mak
+++ b/tests/fate/filter-audio.mak

@@ -128,9 +128,59 @@
 fate-filter-firequalizer: CMP_UNIT = s16
 fate-filter-firequalizer: SIZE_TOLERANCE = 1058400 - 1097208
 
+FATE_AFILTER-$(call FILTERDEMDECENCMUX, PAN, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-pan-mono1
+fate-filter-pan-mono1: tests/data/asynth-44100-2.wav
+fate-filter-pan-mono1: SRC = $(TARGET_PATH)/tests/data/asynth-44100-2.wav
+fate-filter-pan-mono1: CMD = framecrc -ss 3.14 -i $(SRC) -frames:a 20 -filter:a "pan=mono|FC=FL"
+
+FATE_AFILTER-$(call FILTERDEMDECENCMUX, PAN, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-pan-mono2
+fate-filter-pan-mono2: tests/data/asynth-44100-2.wav
+fate-filter-pan-mono2: SRC = $(TARGET_PATH)/tests/data/asynth-44100-2.wav
+fate-filter-pan-mono2: CMD = framecrc -ss 3.14 -i $(SRC) -frames:a 20 -filter:a "pan=1C|c0=c0+c1"
+
+FATE_AFILTER-$(call FILTERDEMDECENCMUX, PAN, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-pan-stereo1
+fate-filter-pan-stereo1: tests/data/asynth-44100-3.wav
+fate-filter-pan-stereo1: SRC = $(TARGET_PATH)/tests/data/asynth-44100-3.wav
+fate-filter-pan-stereo1: CMD = framecrc -ss 3.14 -i $(SRC) -frames:a 20 -filter:a "pan=2c|FL=FR|FR=FL"
+
+FATE_AFILTER-$(call FILTERDEMDECENCMUX, PAN, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-pan-stereo2
+fate-filter-pan-stereo2: tests/data/asynth-44100-3.wav
+fate-filter-pan-stereo2: SRC = $(TARGET_PATH)/tests/data/asynth-44100-3.wav
+fate-filter-pan-stereo2: CMD = framecrc -ss 3.14 -i $(SRC) -frames:a 20 -filter:a "pan=stereo|c0=c0-c2|c1=c1-c2"
+
+FATE_AFILTER-$(call FILTERDEMDECENCMUX, PAN, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-pan-stereo3
+fate-filter-pan-stereo3: tests/data/asynth-44100-2.wav
+fate-filter-pan-stereo3: SRC = $(TARGET_PATH)/tests/data/asynth-44100-2.wav
+fate-filter-pan-stereo3: CMD = framecrc -ss 3.14 -i $(SRC) -frames:a 20 -filter:a "pan=FL+FR|FL<3*c0+2*c1|FR<2*c0+3*c1"
+
+FATE_AFILTER-$(call FILTERDEMDECENCMUX, PAN, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-pan-stereo4
+fate-filter-pan-stereo4: tests/data/asynth-44100-2.wav
+fate-filter-pan-stereo4: SRC = $(TARGET_PATH)/tests/data/asynth-44100-2.wav
+fate-filter-pan-stereo4: CMD = framecrc -ss 3.14 -guess_layout_max 0 -i $(SRC) -frames:a 20 -filter:a "pan=2C|c0=c0-0.5*c1|c1=c1+0.5*c0"
+
+FATE_AFILTER-$(call FILTERDEMDECENCMUX, PAN, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-pan-upmix1
+fate-filter-pan-upmix1: tests/data/asynth-44100-2.wav
+fate-filter-pan-upmix1: SRC = $(TARGET_PATH)/tests/data/asynth-44100-2.wav
+fate-filter-pan-upmix1: CMD = framecrc -ss 3.14 -guess_layout_max 0 -i $(SRC) -frames:a 20 -filter:a "pan=4C|c0=c0-0.5*c1|c1=c1+0.5*c0|c2=0*c0|c3=0*c0"
+
+FATE_AFILTER-$(call FILTERDEMDECENCMUX, PAN, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-pan-upmix2
+fate-filter-pan-upmix2: tests/data/asynth-44100-4.wav
+fate-filter-pan-upmix2: SRC = $(TARGET_PATH)/tests/data/asynth-44100-4.wav
+fate-filter-pan-upmix2: CMD = framecrc -ss 3.14 -i $(SRC) -frames:a 20 -filter:a "pan=9C|c0=c0-c1|c1=c2+c3|c2=c0+c1|c3=c2-c3|c4=c1-c0|c5=c3+c2|c6=c1+c0|c7=c3-c2|c8=c0-c3"
+
+FATE_AFILTER-$(call FILTERDEMDECENCMUX, PAN, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-pan-downmix1
+fate-filter-pan-downmix1: tests/data/asynth-44100-4.wav
+fate-filter-pan-downmix1: SRC = $(TARGET_PATH)/tests/data/asynth-44100-4.wav
+fate-filter-pan-downmix1: CMD = framecrc -ss 3.14 -i $(SRC) -frames:a 20 -filter:a "pan=2c|FL<FL+0.5*FC+0.6*BL+0.6*SL|FR<FR+0.5*FC+0.6*BR+0.6*SR"
+
+FATE_AFILTER-$(call FILTERDEMDECENCMUX, PAN, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-pan-downmix2
+fate-filter-pan-downmix2: tests/data/asynth-44100-11.wav
+fate-filter-pan-downmix2: SRC = $(TARGET_PATH)/tests/data/asynth-44100-11.wav
+fate-filter-pan-downmix2: CMD = framecrc -ss 3.14 -i $(SRC) -frames:a 20 -filter:a "pan=5C|c0=0.7*c0+0.7*c10|c1=c9|c2=c8|c3=c7|c4=c6"
+
 FATE_AFILTER_SAMPLES-$(call FILTERDEMDECENCMUX, SILENCEREMOVE, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-silenceremove
 fate-filter-silenceremove: SRC = $(TARGET_SAMPLES)/audio-reference/divertimenti_2ch_96kHz_s24.wav
-fate-filter-silenceremove: CMD = framecrc -i $(SRC) -frames:a 30 -af silenceremove=0:0:0:-1:0:-90dB
+fate-filter-silenceremove: CMD = framecrc -i $(SRC) -frames:a 30 -af silenceremove=start_periods=0:start_duration=0:start_threshold=0:stop_periods=-1:stop_duration=0:stop_threshold=-90dB
 
 FATE_AFILTER_SAMPLES-$(call FILTERDEMDECENCMUX, STEREOTOOLS, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-stereotools
 fate-filter-stereotools: SRC = $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
@@ -169,7 +219,7 @@
 
 FATE_AFILTER-$(call ALLYES, HLS_DEMUXER MPEGTS_MUXER MPEGTS_DEMUXER AEVALSRC_FILTER LAVFI_INDEV MP2FIXED_ENCODER) += fate-filter-hls-append
 fate-filter-hls-append: tests/data/hls-list-append.m3u8
-fate-filter-hls-append: CMD = framecrc -flags +bitexact -i $(TARGET_PATH)/tests/data/hls-list-append.m3u8 -af asetpts=RTCTIME
+fate-filter-hls-append: CMD = framecrc -flags +bitexact -i $(TARGET_PATH)/tests/data/hls-list-append.m3u8 -af asetpts=N*23
 
 FATE_AMIX += fate-filter-amix-simple
 fate-filter-amix-simple: CMD = ffmpeg -filter_complex amix -i $(SRC) -ss 3 -i $(SRC1) -f f32le -

diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
index c19f301..8bbdc04 100644
--- a/tests/fate/filter-video.mak
+++ b/tests/fate/filter-video.mak

@@ -89,6 +89,12 @@
 FATE_FILTER-$(call ALLYES, LAVFI_INDEV ALLYUV_FILTER) += fate-filter-allyuv
 fate-filter-allyuv: CMD = framecrc -lavfi allyuv=rate=5:duration=1 -pix_fmt yuv444p
 
+FATE_FILTER-$(call ALLYES, LAVFI_INDEV PAL75BARS_FILTER) += fate-filter-pal75bars
+fate-filter-pal75bars: CMD = framecrc -lavfi pal75bars=rate=5:duration=1 -pix_fmt yuv420p
+
+FATE_FILTER-$(call ALLYES, LAVFI_INDEV PAL100BARS_FILTER) += fate-filter-pal100bars
+fate-filter-pal100bars: CMD = framecrc -lavfi pal100bars=rate=5:duration=1 -pix_fmt yuv420p
+
 FATE_FILTER-$(call ALLYES, LAVFI_INDEV RGBTESTSRC_FILTER) += fate-filter-rgbtestsrc
 fate-filter-rgbtestsrc: CMD = framecrc -lavfi rgbtestsrc=rate=5:duration=1 -pix_fmt rgb24
 
@@ -108,11 +114,14 @@
 fate-filter-lavd-scalenorm: tests/data/filtergraphs/scalenorm
 fate-filter-lavd-scalenorm: CMD = framecrc -f lavfi -graph_file $(TARGET_PATH)/tests/data/filtergraphs/scalenorm -i dummy
 
-
 FATE_FILTER-$(call ALLYES, FRAMERATE_FILTER TESTSRC2_FILTER) += fate-filter-framerate-up fate-filter-framerate-down
 fate-filter-framerate-up: CMD = framecrc -lavfi testsrc2=r=2:d=10,framerate=fps=10 -t 1
 fate-filter-framerate-down: CMD = framecrc -lavfi testsrc2=r=2:d=10,framerate=fps=1 -t 1
 
+FATE_FILTER-$(call ALLYES, FRAMERATE_FILTER TESTSRC2_FILTER FORMAT_FILTER) += fate-filter-framerate-12bit-up fate-filter-framerate-12bit-down
+fate-filter-framerate-12bit-up: CMD = framecrc -lavfi testsrc2=r=50:d=1,format=pix_fmts=yuv422p12le,framerate=fps=60 -t 1 -pix_fmt yuv422p12le
+fate-filter-framerate-12bit-down: CMD = framecrc -lavfi testsrc2=r=60:d=1,format=pix_fmts=yuv422p12le,framerate=fps=50 -t 1 -pix_fmt yuv422p12le
+
 FATE_FILTER_VSYNTH-$(CONFIG_BOXBLUR_FILTER) += fate-filter-boxblur
 fate-filter-boxblur: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf boxblur=2:1
 
@@ -422,6 +431,17 @@
 FATE_FILTER-$(call ALLYES, TESTSRC2_FILTER FPS_FILTER MPDECIMATE_FILTER) += fate-filter-mpdecimate
 fate-filter-mpdecimate: CMD = framecrc -lavfi testsrc2=r=2:d=10,fps=3,mpdecimate -r 3 -pix_fmt yuv420p
 
+FATE_FILTER-$(call ALLYES, FPS_FILTER TESTSRC2_FILTER) += fate-filter-fps-up fate-filter-fps-up-round-down fate-filter-fps-up-round-up fate-filter-fps-down fate-filter-fps-down-round-down fate-filter-fps-down-round-up fate-filter-fps-down-eof-pass fate-filter-fps-start-drop fate-filter-fps-start-fill
+fate-filter-fps-up: CMD = framecrc -lavfi testsrc2=r=3:d=2,fps=7
+fate-filter-fps-up-round-down: CMD = framecrc -lavfi testsrc2=r=3:d=2,fps=7:round=down
+fate-filter-fps-up-round-up: CMD = framecrc -lavfi testsrc2=r=3:d=2,fps=7:round=up
+fate-filter-fps-down: CMD = framecrc -lavfi testsrc2=r=7:d=3.5,fps=3
+fate-filter-fps-down-round-down: CMD = framecrc -lavfi testsrc2=r=7:d=3.5,fps=3:round=down
+fate-filter-fps-down-round-up: CMD = framecrc -lavfi testsrc2=r=7:d=3.5,fps=3:round=up
+fate-filter-fps-down-eof-pass: CMD = framecrc -lavfi testsrc2=r=7:d=3.5,fps=3:eof_action=pass
+fate-filter-fps-start-drop: CMD = framecrc -lavfi testsrc2=r=7:d=3.5,fps=3:start_time=1.5
+fate-filter-fps-start-fill: CMD = framecrc -lavfi testsrc2=r=7:d=1.5,setpts=PTS+14,fps=3:start_time=1.5
+
 FATE_FILTER_SAMPLES-$(call ALLYES, MOV_DEMUXER FPS_FILTER QTRLE_DECODER) += fate-filter-fps-cfr fate-filter-fps fate-filter-fps-r
 fate-filter-fps-cfr: CMD = framecrc -i $(TARGET_SAMPLES)/qtrle/apple-animation-variable-fps-bug.mov -r 30 -vsync cfr -pix_fmt yuv420p
 fate-filter-fps-r:   CMD = framecrc -i $(TARGET_SAMPLES)/qtrle/apple-animation-variable-fps-bug.mov -r 30 -vf fps -pix_fmt yuv420p
@@ -485,8 +505,17 @@
 FATE_FILTER_VSYNTH-$(call ALLYES, FORMAT_FILTER PERMS_FILTER EDGEDETECT_FILTER) += fate-filter-edgedetect-colormix
 fate-filter-edgedetect-colormix: CMD = video_filter "format=gbrp,perms=random,edgedetect=mode=colormix" -frames:v 20
 
-FATE_FILTER_VSYNTH-$(call ALLYES, PERMS_FILTER HUE_FILTER) += fate-filter-hue
-fate-filter-hue: CMD = video_filter "perms=random,hue=s=sin(2*PI*t)+1" -frames:v 20
+FATE_FILTER_VSYNTH-$(call ALLYES, PERMS_FILTER HUE_FILTER) += fate-filter-hue1
+fate-filter-hue1: CMD = video_filter "perms=random,hue=s=sin(2*PI*t)+1" -frames:v 20
+
+FATE_FILTER_VSYNTH-$(call ALLYES, PERMS_FILTER HUE_FILTER) += fate-filter-hue2
+fate-filter-hue2: CMD = video_filter "perms=random,hue=h=18*n" -frames:v 20
+
+FATE_FILTER_VSYNTH-$(call ALLYES, PERMS_FILTER HUE_FILTER) += fate-filter-hue3
+fate-filter-hue3: CMD = video_filter "perms=random,hue=b=n-10" -frames:v 20
+
+FATE_FILTER_VSYNTH-$(call ALLYES, FORMAT_FILTER PERMS_FILTER HUE_FILTER) += fate-filter-hue4
+fate-filter-hue4: CMD = video_filter "format=yuv422p10,perms=random,hue=h=18*n:s=n/10" -frames:v 20 -pix_fmt yuv422p10le
 
 FATE_FILTER_VSYNTH-$(CONFIG_IDET_FILTER) += fate-filter-idet
 fate-filter-idet: CMD = framecrc -flags bitexact -idct simple -i $(SRC) -vf idet -frames:v 25 -flags +bitexact
@@ -680,6 +709,9 @@
 FATE_FILTER_PIXFMTS-$(CONFIG_TINTERLACE_FILTER) += fate-filter-pixfmts-tinterlace_vlpf
 fate-filter-pixfmts-tinterlace_vlpf: CMD = pixfmts "interleave_top:vlpf"
 
+FATE_FILTER_PIXFMTS-$(CONFIG_TRANSPOSE_FILTER) += fate-filter-pixfmts-transpose
+fate-filter-pixfmts-transpose: CMD = pixfmts "dir=cclock_flip"
+
 FATE_FILTER_PIXFMTS-$(CONFIG_VFLIP_FILTER) += fate-filter-pixfmts-vflip
 fate-filter-pixfmts-vflip: CMD = pixfmts
 
@@ -710,10 +742,10 @@
 fate-filter-metadata-cropdetect: SRC = $(TARGET_SAMPLES)/filter/cropdetect.mp4
 fate-filter-metadata-cropdetect: CMD = run $(FILTER_METADATA_COMMAND) "sws_flags=+accurate_rnd+bitexact;movie='$(SRC)',cropdetect=max_outliers=3"
 
-SILENCEDETECT_DEPS = FFPROBE AVDEVICE LAVFI_INDEV AMOVIE_FILTER AMR_DEMUXER AMRWB_DECODER SILENCEDETECT_FILTER
+SILENCEDETECT_DEPS = FFPROBE AVDEVICE LAVFI_INDEV AMOVIE_FILTER TTA_DEMUXER TTA_DECODER SILENCEDETECT_FILTER
 FATE_METADATA_FILTER-$(call ALLYES, $(SILENCEDETECT_DEPS)) += fate-filter-metadata-silencedetect
-fate-filter-metadata-silencedetect: SRC = $(TARGET_SAMPLES)/amrwb/seed-12k65.awb
-fate-filter-metadata-silencedetect: CMD = run $(FILTER_METADATA_COMMAND) "amovie='$(SRC)',silencedetect=d=-20dB"
+fate-filter-metadata-silencedetect: SRC = $(TARGET_SAMPLES)/lossless-audio/inside.tta
+fate-filter-metadata-silencedetect: CMD = run $(FILTER_METADATA_COMMAND) "amovie='$(SRC)',silencedetect=n=-33.5dB:d=.2"
 
 EBUR128_METADATA_DEPS = FFPROBE AVDEVICE LAVFI_INDEV AMOVIE_FILTER FLAC_DEMUXER FLAC_DECODER EBUR128_FILTER
 FATE_METADATA_FILTER-$(call ALLYES, $(EBUR128_METADATA_DEPS)) += fate-filter-metadata-ebur128

diff --git a/tests/fate/fits.mak b/tests/fate/fits.mak
index 3d58f98..113498c 100644
--- a/tests/fate/fits.mak
+++ b/tests/fate/fits.mak

@@ -8,7 +8,7 @@
 map.tests/data/lena-gray.fits    := gray8
 map.tests/data/lena-gbrp.fits    := rgb24
 map.tests/data/lena-gbrp16.fits  := rgb48
-map.tests/data/lena-gbrap16.fits := rgba64
+map.tests/data/lena-gbrap16le.fits := rgba64
 
 tests/data/lena%.fits: TAG = GEN
 tests/data/lena%.fits: NAME = $(map.$(@))
@@ -18,16 +18,16 @@
         -y $(TARGET_PATH)/$(@) 2>/dev/null
 
 FATE_FITS_DEC-$(call DEMDEC, FITS, FITS) += fate-fitsdec-ext_data_min_max
-fate-fitsdec-ext_data_min_max: CMD = framecrc -i $(TARGET_SAMPLES)/fits/x0cj010ct_d0h.fit -pix_fmt gray16
+fate-fitsdec-ext_data_min_max: CMD = framecrc -i $(TARGET_SAMPLES)/fits/x0cj010ct_d0h.fit -pix_fmt gray16le
 
 FATE_FITS_DEC-$(call DEMDEC, FITS, FITS) += fate-fitsdec-blank_bitpix32
-fate-fitsdec-blank_bitpix32: CMD = framecrc -blank_value 65535 -i $(TARGET_SAMPLES)/fits/file008.fits -pix_fmt gray16
+fate-fitsdec-blank_bitpix32: CMD = framecrc -blank_value 65535 -i $(TARGET_SAMPLES)/fits/file008.fits -pix_fmt gray16le
 
 FATE_FITS_DEC-$(call DEMDEC, FITS, FITS) += fate-fitsdec-bitpix-32
-fate-fitsdec-bitpix-32: CMD = framecrc -i $(TARGET_SAMPLES)/fits/tst0005.fits -pix_fmt gray16
+fate-fitsdec-bitpix-32: CMD = framecrc -i $(TARGET_SAMPLES)/fits/tst0005.fits -pix_fmt gray16le
 
 FATE_FITS_DEC-$(call DEMDEC, FITS, FITS) += fate-fitsdec-bitpix-64
-fate-fitsdec-bitpix-64: CMD = framecrc -i $(TARGET_SAMPLES)/fits/tst0006.fits -pix_fmt gray16
+fate-fitsdec-bitpix-64: CMD = framecrc -i $(TARGET_SAMPLES)/fits/tst0006.fits -pix_fmt gray16le
 
 FATE_FITS_DEC-$(call ALLYES, GIF_DEMUXER FITS_DEMUXER GIF_DECODER FITS_DECODER FITS_ENCODER FITS_MUXER) += fate-fitsdec-multi
 fate-fitsdec-multi: tests/data/fits-multi.fits
@@ -37,7 +37,7 @@
 fate-fitsdec%: SRC = $(TARGET_PATH)/tests/data/lena-$(PIXFMT).fits
 fate-fitsdec%: CMD = framecrc -i $(SRC) -pix_fmt $(PIXFMT)
 
-FATE_FITS_DEC_PIXFMT = gray gbrp gbrp16 gbrap16
+FATE_FITS_DEC_PIXFMT = gray gbrp gbrp16 gbrap16le
 $(FATE_FITS_DEC_PIXFMT:%=fate-fitsdec-%): fate-fitsdec-%: tests/data/lena-%.fits
 FATE_FITS_DEC-$(call ALLYES, FITS_DEMUXER IMAGE2_DEMUXER FITS_DECODER PNG_DECODER FITS_ENCODER FITS_MUXER) += $(FATE_FITS_DEC_PIXFMT:%=fate-fitsdec-%)
 

diff --git a/tests/fate/gapless.mak b/tests/fate/gapless.mak
index 0253b9e..91fddb4 100644
--- a/tests/fate/gapless.mak
+++ b/tests/fate/gapless.mak

@@ -1,5 +1,5 @@
 FATE_GAPLESS-$(CONFIG_MP3_DEMUXER) += fate-gapless-mp3
-fate-gapless-mp3: CMD = gapless $(TARGET_SAMPLES)/gapless/gapless.mp3
+fate-gapless-mp3: CMD = gapless $(TARGET_SAMPLES)/gapless/gapless.mp3 "-c:a mp3"
 
 FATE_GAPLESS-$(CONFIG_MP3_DEMUXER) += fate-audiomatch-square-mp3
 fate-audiomatch-square-mp3: CMD = audio_match $(TARGET_SAMPLES)/audiomatch/square3.mp3 $(TARGET_SAMPLES)/audiomatch/square3.wav

diff --git a/tests/fate/h264.mak b/tests/fate/h264.mak
index 043e1f2..1839b9b 100644
--- a/tests/fate/h264.mak
+++ b/tests/fate/h264.mak

@@ -195,6 +195,7 @@
               fate-h264-lossless                                        \
               fate-h264-3386                                            \
               fate-h264-missing-frame                                   \
+              fate-h264-ref-pic-mod-overflow                            \
 
 FATE_H264-$(call DEMDEC, H264, H264) += $(FATE_H264)
 FATE_H264-$(call DEMDEC,  MOV, H264) += fate-h264-crop-to-container
@@ -434,6 +435,7 @@
 fate-h264-invalid-ref-mod:                        CMD = framecrc -i $(TARGET_SAMPLES)/h264/h264refframeregression.mp4 -an -frames 10 -pix_fmt yuv420p10le
 fate-h264-lossless:                               CMD = framecrc -i $(TARGET_SAMPLES)/h264/lossless.h264
 fate-h264-mixed-nal-coding:                       CMD = framecrc -i $(TARGET_SAMPLES)/h264/mixed-nal-coding.mp4
+fate-h264-ref-pic-mod-overflow:                   CMD = framecrc -i $(TARGET_SAMPLES)/h264/ref-pic-mod-overflow.h264
 fate-h264-twofields-packet:                       CMD = framecrc -i $(TARGET_SAMPLES)/h264/twofields_packet.mp4 -an -frames 30
 fate-h264-unescaped-extradata:                    CMD = framecrc -i $(TARGET_SAMPLES)/h264/unescaped_extradata.mp4 -an -frames 10
 fate-h264-3386:                                   CMD = framecrc -i $(TARGET_SAMPLES)/h264/bbc2.sample.h264

diff --git a/tests/fate/hap.mak b/tests/fate/hap.mak
new file mode 100644
index 0000000..1582971
--- /dev/null
+++ b/tests/fate/hap.mak

@@ -0,0 +1,72 @@
+FATE_HAP += fate-hap1
+fate-hap1: CMD = framecrc -i $(TARGET_SAMPLES)/hap/hap1.mov
+
+FATE_HAP += fate-hap5
+fate-hap5: CMD = framecrc -i $(TARGET_SAMPLES)/hap/hap5.mov
+
+FATE_HAP += fate-hapy
+fate-hapy: CMD = framecrc -i $(TARGET_SAMPLES)/hap/hapy.mov
+
+FATE_HAP += fate-hap-chunk
+fate-hap-chunk: CMD = framecrc -i $(TARGET_SAMPLES)/hap/hapy-12-chunks.mov
+
+FATE_HAP += fate-hapqa-nosnappy-127x71
+fate-hapqa-nosnappy-127x71: CMD = framecrc -i $(TARGET_SAMPLES)/hap/HAPQA_NoSnappy_127x1.mov
+
+FATE_HAP += fate-hapqa-snappy1-127x71
+fate-hapqa-snappy1-127x71: CMD = framecrc -i $(TARGET_SAMPLES)/hap/HAPQA_Snappy_1chunk_127x1.mov
+
+FATE_HAP += fate-hapqa-snappy16-127x71
+fate-hapqa-snappy16-127x71: CMD = framecrc -i $(TARGET_SAMPLES)/hap/HAPQA_Snappy_16chunk_127x1.mov
+
+FATE_HAP += fate-hap-alpha-only-nosnappy-128x72
+fate-hap-alpha-only-nosnappy-128x72: CMD = framecrc -i $(TARGET_SAMPLES)/hap/HapAlphaOnly_NoSnappy_128x72.mov -pix_fmt gray8
+
+FATE_HAP += fate-hap-alpha-only-snappy-127x71
+fate-hap-alpha-only-snappy-127x71: CMD = framecrc -i $(TARGET_SAMPLES)/hap/HapAlphaOnly_snappy1chunk_127x71.mov -pix_fmt gray8
+
+FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, HAP) += $(FATE_HAP)
+fate-hap: $(FATE_HAP)
+
+
+#Test bsf conversion
+FATE_HAPQA_EXTRACT_BSF += fate-hapqa-extract-snappy1-to-hapq
+fate-hapqa-extract-snappy1-to-hapq: CMD = framecrc -i $(TARGET_SAMPLES)/hap/HAPQA_Snappy_1chunk_127x1.mov -c:v copy -bsf:v hapqa_extract=texture=color -tag:v HapY -metadata:s:v:0 encoder="HAPQ"
+
+FATE_HAPQA_EXTRACT_BSF += fate-hapqa-extract-snappy16-to-hapq
+fate-hapqa-extract-snappy16-to-hapq: CMD = framecrc -i $(TARGET_SAMPLES)/hap/HAPQA_Snappy_16chunk_127x1.mov -c:v copy -bsf:v hapqa_extract=texture=color -tag:v HapY -metadata:s:v:0 encoder="HAPQ"
+
+FATE_HAPQA_EXTRACT_BSF += fate-hapqa-extract-snappy1-to-hapalphaonly
+fate-hapqa-extract-snappy1-to-hapalphaonly: CMD = framecrc -i $(TARGET_SAMPLES)/hap/HAPQA_Snappy_1chunk_127x1.mov -c:v copy -bsf:v hapqa_extract=texture=alpha -tag:v HapA -metadata:s:v:0 encoder="HAPAlphaOnly"
+
+FATE_HAPQA_EXTRACT_BSF += fate-hapqa-extract-snappy16-to-hapalphaonly
+fate-hapqa-extract-snappy16-to-hapalphaonly: CMD = framecrc -i $(TARGET_SAMPLES)/hap/HAPQA_Snappy_16chunk_127x1.mov -c:v copy -bsf:v hapqa_extract=texture=alpha -tag:v HapA -metadata:s:v:0 encoder="HAPAlphaOnly"
+
+
+#Test bsf conversion and mov
+tests/data/hapq_nosnappy.mov: TAG = GEN
+tests/data/hapq_nosnappy.mov: ffmpeg$(PROGSSUF)$(EXESUF) | tests/data
+	$(M)$(TARGET_EXEC) $(TARGET_PATH)/$< \
+	-i $(TARGET_SAMPLES)/hap/HAPQA_NoSnappy_127x1.mov -nostdin -c:v copy -bsf:v hapqa_extract=texture=color \
+        -tag:v HapY -metadata:s:v:0 encoder="HAPQ" $(TARGET_PATH)/$@ -y 2>/dev/null
+
+tests/data/hapalphaonly_nosnappy.mov: TAG = GEN
+tests/data/hapalphaonly_nosnappy.mov: ffmpeg$(PROGSSUF)$(EXESUF) | tests/data
+	$(M)$(TARGET_EXEC) $(TARGET_PATH)/$< \
+	-i $(TARGET_SAMPLES)/hap/HAPQA_NoSnappy_127x1.mov -nostdin -c:v copy -bsf:v hapqa_extract=texture=alpha \
+        -tag:v HapA -metadata:s:v:0 encoder="HAPAlpha Only" $(TARGET_PATH)/$@ -y 2>/dev/null
+
+
+FATE_HAPQA_EXTRACT_BSF_FFPROBE += fate-hapqa-extract-nosnappy-to-hapq-mov
+fate-hapqa-extract-nosnappy-to-hapq-mov: tests/data/hapq_nosnappy.mov
+fate-hapqa-extract-nosnappy-to-hapq-mov: CMD = run ffprobe$(PROGSSUF)$(EXESUF) -show_packets -show_data_hash adler32 -bitexact -show_streams -select_streams v -v 0 $(TARGET_PATH)/tests/data/hapq_nosnappy.mov
+
+FATE_HAPQA_EXTRACT_BSF_FFPROBE += fate-hapqa-extract-nosnappy-to-hapalphaonly-mov
+fate-hapqa-extract-nosnappy-to-hapalphaonly-mov: tests/data/hapalphaonly_nosnappy.mov
+fate-hapqa-extract-nosnappy-to-hapalphaonly-mov: CMD = run ffprobe$(PROGSSUF)$(EXESUF) -show_packets -show_data_hash adler32 -bitexact -show_streams -select_streams v -v 0 $(TARGET_PATH)/tests/data/hapalphaonly_nosnappy.mov
+
+
+FATE_SAMPLES_FFMPEG-$(call ALLYES, MOV_DEMUXER HAPQA_EXTRACT_BSF MOV_MUXER) += $(FATE_HAPQA_EXTRACT_BSF)
+FATE_SAMPLES_FFPROBE += $(FATE_HAPQA_EXTRACT_BSF_FFPROBE)
+
+fate-hapqa-extract-bsf: $(FATE_HAPQA_EXTRACT_BSF) $(FATE_HAPQA_EXTRACT_BSF_FFPROBE)

diff --git a/tests/fate/hevc.mak b/tests/fate/hevc.mak
index 2e798ec..db3ea19 100644
--- a/tests/fate/hevc.mak
+++ b/tests/fate/hevc.mak

@@ -168,6 +168,7 @@
 HEVC_SAMPLES_444_12BIT =        \
     IPCM_B_RExt_NEC             \
     PERSIST_RPARAM_A_RExt_Sony_1\
+    PERSIST_RPARAM_A_RExt_Sony_3\
     SAO_A_RExt_MediaTek_1       \
 
 
@@ -187,7 +188,7 @@
 
 define FATE_HEVC_TEST
 FATE_HEVC += fate-hevc-conformance-$(1)
-fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -vsync drop -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bit
+fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -vsync drop -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv420p
 endef
 
 define FATE_HEVC_TEST_10BIT
@@ -207,7 +208,7 @@
 
 define FATE_HEVC_TEST_444_8BIT
 FATE_HEVC += fate-hevc-conformance-$(1)
-fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bit
+fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv444p
 endef
 
 define FATE_HEVC_TEST_444_12BIT
@@ -239,6 +240,9 @@
 fate-hevc-bsf-mp4toannexb: CMP = oneline
 fate-hevc-bsf-mp4toannexb: REF = 1873662a3af1848c37e4eb25722c8df9
 
+fate-hevc-skiploopfilter: CMD = framemd5 -skip_loop_filter nokey -i $(TARGET_SAMPLES)/hevc-conformance/SAO_D_Samsung_5.bit -sws_flags bitexact
+FATE_HEVC += fate-hevc-skiploopfilter
+
 FATE_HEVC-$(call DEMDEC, HEVC, HEVC) += $(FATE_HEVC)
 
 # this sample has two stsd entries and needs to reload extradata
@@ -246,6 +250,9 @@
 
 fate-hevc-extradata-reload: CMD = framemd5 -i $(TARGET_SAMPLES)/hevc/extradata-reload-multi-stsd.mov -sws_flags bitexact
 
+fate-hevc-monochrome-crop: CMD = probeframes -show_entries frame=width,height:stream=width,height $(TARGET_SAMPLES)/hevc/hevc-monochrome.hevc
+FATE_HEVC_FFPROBE-$(call DEMDEC, HEVC, HEVC) += fate-hevc-monochrome-crop
+
 FATE_SAMPLES_AVCONV += $(FATE_HEVC-yes)
 FATE_SAMPLES_FFPROBE += $(FATE_HEVC_FFPROBE-yes)
 

diff --git a/tests/fate/hw.mak b/tests/fate/hw.mak
new file mode 100644
index 0000000..d606cde
--- /dev/null
+++ b/tests/fate/hw.mak

@@ -0,0 +1,6 @@
+FATE_HWCONTEXT += fate-hwdevice
+fate-hwdevice: libavutil/tests/hwdevice$(EXESUF)
+fate-hwdevice: CMD = run libavutil/tests/hwdevice
+fate-hwdevice: CMP = null
+
+FATE_HW-$(CONFIG_AVUTIL) += $(FATE_HWCONTEXT)

diff --git a/tests/fate/id3v2.mak b/tests/fate/id3v2.mak
new file mode 100644
index 0000000..873f593
--- /dev/null
+++ b/tests/fate/id3v2.mak

@@ -0,0 +1,5 @@
+FATE_SAMPLES_ID3V2-$(CONFIG_MP3_DEMUXER) += fate-id3v2-priv
+fate-id3v2-priv: CMD = probetags $(TARGET_SAMPLES)/id3v2/id3v2_priv.mp3
+
+FATE_SAMPLES_FFPROBE += $(FATE_SAMPLES_ID3V2-yes)
+fate-id3v2: $(FATE_SAMPLES_ID3V2-yes)

diff --git a/tests/fate/image.mak b/tests/fate/image.mak
index 7e7be3f..121405a 100644
--- a/tests/fate/image.mak
+++ b/tests/fate/image.mak

@@ -284,6 +284,9 @@
 FATE_EXR += fate-exr-rgb-scanline-half-piz-dw-t08
 fate-exr-rgb-scanline-half-piz-dw-t08: CMD = framecrc -i $(TARGET_SAMPLES)/exr/rgb_scanline_half_piz_dw_t08.exr -pix_fmt rgb48le
 
+FATE_EXR += fate-exr-rgba-zip16-16x32-flag4
+fate-exr-rgba-zip16-16x32-flag4: CMD = framecrc -i $(TARGET_SAMPLES)/exr/rgba_zip16_16x32_flag4.exr -pix_fmt rgba64le
+
 FATE_EXR-$(call DEMDEC, IMAGE2, EXR) += $(FATE_EXR)
 
 FATE_IMAGE += $(FATE_EXR-yes)

diff --git a/tests/fate/libavcodec.mak b/tests/fate/libavcodec.mak
index 27d631d..aa4c36b 100644
--- a/tests/fate/libavcodec.mak
+++ b/tests/fate/libavcodec.mak

@@ -13,6 +13,11 @@
 fate-celp_math: CMD = run libavcodec/tests/celp_math
 fate-celp_math: CMP = null
 
+FATE_LIBAVCODEC-yes += fate-codec_desc
+fate-codec_desc: libavcodec/tests/codec_desc$(EXESUF)
+fate-codec_desc: CMD = run libavcodec/tests/codec_desc
+fate-codec_desc: CMP = null
+
 FATE_LIBAVCODEC-$(CONFIG_GOLOMB) += fate-golomb
 fate-golomb: libavcodec/tests/golomb$(EXESUF)
 fate-golomb: CMD = run libavcodec/tests/golomb
@@ -41,10 +46,20 @@
 fate-dct8x8: CMD = run libavcodec/tests/dct
 fate-dct8x8: CMP = null
 
+FATE_LIBAVCODEC-$(CONFIG_H264_VAAPI_ENCODER) += fate-h264-levels
+fate-h264-levels: libavcodec/tests/h264_levels$(EXESUF)
+fate-h264-levels: CMD = run libavcodec/tests/h264_levels
+fate-h264-levels: REF = /dev/null
+
 FATE_LIBAVCODEC-$(CONFIG_IIRFILTER) += fate-iirfilter
 fate-iirfilter: libavcodec/tests/iirfilter$(EXESUF)
 fate-iirfilter: CMD = run libavcodec/tests/iirfilter
 
+FATE_LIBAVCODEC-$(CONFIG_MPEGVIDEO) += fate-mpeg12framerate
+fate-mpeg12framerate: libavcodec/tests/mpeg12framerate$(EXESUF)
+fate-mpeg12framerate: CMD = run libavcodec/tests/mpeg12framerate
+fate-mpeg12framerate: REF = /dev/null
+
 FATE_LIBAVCODEC-yes += fate-libavcodec-options
 fate-libavcodec-options: libavcodec/tests/options$(EXESUF)
 fate-libavcodec-options: CMD = run libavcodec/tests/options

diff --git a/tests/fate/libavutil.mak b/tests/fate/libavutil.mak
index 591cab9..b12d157 100644
--- a/tests/fate/libavutil.mak
+++ b/tests/fate/libavutil.mak

@@ -23,11 +23,6 @@
 fate-cast5: CMD = run libavutil/tests/cast5
 fate-cast5: CMP = null
 
-FATE_LIBAVUTIL += fate-atomic
-fate-atomic: libavutil/tests/atomic$(EXESUF)
-fate-atomic: CMD = run libavutil/tests/atomic
-fate-atomic: CMP = null
-
 FATE_LIBAVUTIL += fate-audio_fifo
 fate-audio_fifo: libavutil/tests/audio_fifo$(EXESUF)
 fate-audio_fifo: CMD = run libavutil/tests/audio_fifo
@@ -75,6 +70,11 @@
 fate-dict: libavutil/tests/dict$(EXESUF)
 fate-dict: CMD = run libavutil/tests/dict
 
+FATE_LIBAVUTIL += fate-encryption-info
+fate-encryption-info: libavutil/tests/encryption_info$(EXESUF)
+fate-encryption-info: CMD = run libavutil/tests/encryption_info
+fate-encryption-info: CMP = null
+
 FATE_LIBAVUTIL += fate-eval
 fate-eval: libavutil/tests/eval$(EXESUF)
 fate-eval: CMD = run libavutil/tests/eval
@@ -95,6 +95,11 @@
 fate-imgutils: libavutil/tests/imgutils$(EXESUF)
 fate-imgutils: CMD = run libavutil/tests/imgutils
 
+FATE_LIBAVUTIL += fate-integer
+fate-integer: libavutil/tests/integer$(EXESUF)
+fate-integer: CMD = run libavutil/tests/integer
+fate-integer: CMP = null
+
 FATE_LIBAVUTIL += fate-lfg
 fate-lfg: libavutil/tests/lfg$(EXESUF)
 fate-lfg: CMD = run libavutil/tests/lfg

diff --git a/tests/fate/mov.mak b/tests/fate/mov.mak
index cfdada7..4df0fe6 100644
--- a/tests/fate/mov.mak
+++ b/tests/fate/mov.mak

@@ -6,18 +6,37 @@
            fate-mov-1elist-ends-last-bframe \
            fate-mov-2elist-elist1-ends-bframe \
            fate-mov-3elist-encrypted \
+           fate-mov-frag-encrypted \
+           fate-mov-tenc-only-encrypted \
+           fate-mov-invalid-elst-entry-count \
            fate-mov-gpmf-remux \
+           fate-mov-440hz-10ms \
+           fate-mov-ibi-elst-starts-b \
+           fate-mov-elst-ends-betn-b-and-i \
+           fate-mov-frag-overlap \
+           fate-mov-bbi-elst-starts-b \
+           fate-mov-neg-firstpts-discard-frames \
+           fate-mov-stream-shorter-than-movie \
 
-FATE_MOV_FFPROBE = fate-mov-aac-2048-priming \
+FATE_MOV_FFPROBE = fate-mov-neg-firstpts-discard \
+                   fate-mov-neg-firstpts-discard-vorbis \
+                   fate-mov-aac-2048-priming \
                    fate-mov-zombie \
                    fate-mov-init-nonkeyframe \
                    fate-mov-displaymatrix \
                    fate-mov-spherical-mono \
+                   fate-mov-guess-delay-1 \
+                   fate-mov-guess-delay-2 \
+                   fate-mov-guess-delay-3 \
+                   fate-mov-mp4-with-mov-in24-ver \
+
+FATE_MOV_FASTSTART = fate-mov-faststart-4gb-overflow \
 
 FATE_SAMPLES_AVCONV += $(FATE_MOV)
 FATE_SAMPLES_FFPROBE += $(FATE_MOV_FFPROBE)
+FATE_SAMPLES_FASTSTART += $(FATE_MOV_FASTSTART)
 
-fate-mov: $(FATE_MOV) $(FATE_MOV_FFPROBE)
+fate-mov: $(FATE_MOV) $(FATE_MOV_FFPROBE) $(FATE_MOV_FASTSTART)
 
 # Make sure we handle edit lists correctly in normal cases.
 fate-mov-1elist-noctts: CMD = framemd5 -i $(TARGET_SAMPLES)/mov/mov-1elist-noctts.mov
@@ -28,6 +47,12 @@
 # Edit list with encryption
 fate-mov-3elist-encrypted: CMD = framemd5 -decryption_key 12345678901234567890123456789012 -i $(TARGET_SAMPLES)/mov/mov-3elist-encrypted.mov
 
+# Fragmented encryption with senc boxes in movie fragments.
+fate-mov-frag-encrypted: CMD = framemd5 -decryption_key 12345678901234567890123456789012 -i $(TARGET_SAMPLES)/mov/mov-frag-encrypted.mp4
+
+# Full-sample encryption and constant IV using only tenc atom (no senc/saio/saiz).
+fate-mov-tenc-only-encrypted: CMD = framemd5 -decryption_key 12345678901234567890123456789012 -i $(TARGET_SAMPLES)/mov/mov-tenc-only-encrypted.mp4
+
 # Makes sure that the CTTS is also modified when we fix avindex in mov.c while parsing edit lists.
 fate-mov-elist-starts-ctts-2ndsample: CMD = framemd5 -i $(TARGET_SAMPLES)/mov/mov-elist-starts-ctts-2ndsample.mov
 
@@ -39,6 +64,43 @@
 # Makes sure that we handle timestamps of packets in case of multiple edit lists with one of them ending on a B-frame correctly.
 fate-mov-2elist-elist1-ends-bframe: CMD = framemd5 -i $(TARGET_SAMPLES)/mov/mov-2elist-elist1-ends-bframe.mov
 
+# Makes sure that if edit list ends on a B-frame but before the I-frame, then we output the B-frame but discard the I-frame.
+fate-mov-elst-ends-betn-b-and-i: CMD = framemd5 -i $(TARGET_SAMPLES)/mov/elst_ends_betn_b_and_i.mp4
+
+# Makes sure that we handle edit lists and start padding correctly.
+fate-mov-440hz-10ms: CMD = framemd5 -i $(TARGET_SAMPLES)/mov/440hz-10ms.m4a
+
+# Makes sure that we handle invalid edit list entry count correctly.
+fate-mov-invalid-elst-entry-count: CMD = framemd5 -idct simple -flags +bitexact -i $(TARGET_SAMPLES)/mov/invalid_elst_entry_count.mov
+
+# Makes sure that 1st key-frame is picked when,
+#    i) One B-frame between 2 key-frames
+#   ii) Edit list starts on B-frame.
+#  iii) Both key-frames have their DTS < edit list start
+# i.e.  Pts Order: I-B-I
+fate-mov-ibi-elst-starts-b: CMD = framemd5 -flags +bitexact -i $(TARGET_SAMPLES)/mov/mov_ibi_elst_starts_b.mov
+
+# Makes sure that we handle overlapping framgments
+fate-mov-frag-overlap: CMD = framemd5 -i $(TARGET_SAMPLES)/mov/frag_overlap.mp4
+
+# Makes sure that we pick the right frames according to edit list when there is no keyframe with PTS < edit list start.
+# For example, when video starts on a B-frame, and edit list starts on that B-frame too.
+# GOP structure : B B I in presentation order.
+fate-mov-bbi-elst-starts-b: CMD = framemd5 -flags +bitexact -acodec aac_fixed -i $(TARGET_SAMPLES)/h264/twofields_packet.mp4
+
+# Makes sure that the stream start_time is not negative when the first packet is a DISCARD packet with negative timestamp.
+fate-mov-neg-firstpts-discard: CMD = run ffprobe$(PROGSSUF)$(EXESUF) -show_entries stream=start_time -bitexact $(TARGET_SAMPLES)/mov/mov_neg_first_pts_discard.mov
+
+# Makes sure that the VORBIS audio stream start_time is not negative when the first few packets are DISCARD packets
+# with negative timestamps (skip_samples is not set for Vorbis, so ffmpeg computes start_time as negative if not specified by demuxer).
+fate-mov-neg-firstpts-discard-vorbis: CMD = run ffprobe$(PROGSSUF)$(EXESUF) -show_entries stream=start_time -bitexact $(TARGET_SAMPLES)/mov/mov_neg_first_pts_discard_vorbis.mp4
+
+# Makes sure that expected frames are generated for mov_neg_first_pts_discard.mov with -vsync 1
+fate-mov-neg-firstpts-discard-frames: CMD = framemd5 -flags +bitexact -i $(TARGET_SAMPLES)/mov/mov_neg_first_pts_discard.mov -vsync 1
+
+# Makes sure that no frame is dropped/duplicated with fps filter due to start_time / duration miscalculations.
+fate-mov-stream-shorter-than-movie: CMD = framemd5 -flags +bitexact -i $(TARGET_SAMPLES)/mov/mov_stream_shorter_than_movie.mov -vf fps=fps=24 -an
+
 fate-mov-aac-2048-priming: CMD = run ffprobe$(PROGSSUF)$(EXESUF) -show_packets -print_format compact $(TARGET_SAMPLES)/mov/aac-2048-priming.mov
 
 fate-mov-zombie: CMD = run ffprobe$(PROGSSUF)$(EXESUF) -show_streams -show_packets -show_frames -bitexact -print_format compact $(TARGET_SAMPLES)/mov/white_zombie_scrunch-part.mov
@@ -52,3 +114,13 @@
 fate-mov-gpmf-remux: CMD = md5 -i $(TARGET_SAMPLES)/mov/fake-gp-media-with-real-gpmf.mp4 -map 0 -c copy -fflags +bitexact -f mp4
 fate-mov-gpmf-remux: CMP = oneline
 fate-mov-gpmf-remux: REF = 8f48e435ee1f6b7e173ea756141eabf3
+
+fate-mov-guess-delay-1: CMD = run ffprobe$(PROGSSUF)$(EXESUF) -show_entries stream=has_b_frames -select_streams v $(TARGET_SAMPLES)/h264/h264_3bf_nopyramid_nobsrestriction.mp4
+fate-mov-guess-delay-2: CMD = run ffprobe$(PROGSSUF)$(EXESUF) -show_entries stream=has_b_frames -select_streams v $(TARGET_SAMPLES)/h264/h264_3bf_pyramid_nobsrestriction.mp4
+fate-mov-guess-delay-3: CMD = run ffprobe$(PROGSSUF)$(EXESUF) -show_entries stream=has_b_frames -select_streams v $(TARGET_SAMPLES)/h264/h264_4bf_pyramid_nobsrestriction.mp4
+
+fate-mov-faststart-4gb-overflow: CMD = run tools/qt-faststart$(EXESUF) $(TARGET_SAMPLES)/mov/faststart-4gb-overflow.mov faststart-4gb-overflow-output.mov > /dev/null ; do_md5sum faststart-4gb-overflow-output.mov | cut -d " " -f1 ; rm faststart-4gb-overflow-output.mov
+fate-mov-faststart-4gb-overflow: CMP = oneline
+fate-mov-faststart-4gb-overflow: REF = bc875921f151871e787c4b4023269b29
+
+fate-mov-mp4-with-mov-in24-ver: CMD = run ffprobe -show_entries stream=codec_name -select_streams 1 $(TARGET_SAMPLES)/mov/mp4-with-mov-in24-ver.mp4

diff --git a/tests/fate/mpegps.mak b/tests/fate/mpegps.mak
new file mode 100644
index 0000000..cec1ea7
--- /dev/null
+++ b/tests/fate/mpegps.mak

@@ -0,0 +1,7 @@
+# This tests that a 16-bit pcm_dvd stream is correctly remuxed in mpegps
+FATE_MPEGPS-$(call DEMMUX, MPEGPS, MPEG1SYSTEM) += fate-mpegps-remuxed-pcm-demux
+fate-mpegps-remuxed-pcm-demux: $(TARGET_SAMPLES)/mpegps/pcm_aud.mpg
+fate-mpegps-remuxed-pcm-demux: CMD = stream_remux "mpeg" "$(TARGET_SAMPLES)/mpegps/pcm_aud.mpg" "mpeg" "-map 0:a:0" "-codec copy"
+
+FATE_SAMPLES_FFMPEG += $(FATE_MPEGPS-yes)
+fate-mpegps: $(FATE_MPEGPS-yes)

diff --git a/tests/fate/mpegts.mak b/tests/fate/mpegts.mak
index bb0d9d9..bbcbfc4 100644
--- a/tests/fate/mpegts.mak
+++ b/tests/fate/mpegts.mak

@@ -9,6 +9,17 @@
 fate-mpegts-probe-latm: SRC = $(TARGET_SAMPLES)/mpegts/loewe.ts
 fate-mpegts-probe-latm: CMD = run $(PROBE_CODEC_NAME_COMMAND) -i "$(SRC)"
 
+
+FATE_MPEGTS_PROBE-$(call DEMDEC, MPEGTS, HEVC, AAC_LATM) += fate-mpegts-probe-program
+fate-mpegts-probe-program: SRC = $(TARGET_SAMPLES)/mpegts/loewe.ts
+fate-mpegts-probe-program: CMD = run $(PROBE_CODEC_NAME_COMMAND) -select_streams p:769:v:0 -i "$(SRC)"
+
+
+FATE_MPEGTS_PROBE-$(call DEMDEC, MPEGTS) += fate-mpegts-probe-pmt-merge
+fate-mpegts-probe-pmt-merge: SRC = $(TARGET_SAMPLES)/mpegts/pmtchange.ts
+fate-mpegts-probe-pmt-merge: CMD = run $(PROBE_CODEC_NAME_COMMAND) -merge_pmt_versions 1 -i "$(SRC)"
+
+
 FATE_SAMPLES_FFPROBE += $(FATE_MPEGTS_PROBE-yes)
 
 fate-mpegts: $(FATE_MPEGTS_PROBE-yes)

diff --git a/tests/fate/mxf.mak b/tests/fate/mxf.mak
index 7714b61..dce23d5 100644
--- a/tests/fate/mxf.mak
+++ b/tests/fate/mxf.mak

@@ -33,9 +33,13 @@
 fate-mxf-probe-dv25: SRC = $(TARGET_SAMPLES)/mxf/Avid-00005.mxf
 fate-mxf-probe-dv25: CMD = run $(PROBE_FORMAT_STREAMS_COMMAND) -i "$(SRC)"
 
+FATE_MXF_REEL_NAME-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF) += fate-mxf-reel_name
+fate-mxf-reel_name: $(TARGET_SAMPLES)/mxf/Sony-00001.mxf
+fate-mxf-reel_name: CMD = md5 -y -i $(TARGET_SAMPLES)/mxf/Sony-00001.mxf  -c copy -timecode 00:00:00:00 -metadata "reel_name=test_reel" -fflags +bitexact -f mxf
+
 FATE_MXF-$(CONFIG_MXF_DEMUXER) += $(FATE_MXF)
 
-FATE_SAMPLES_AVCONV += $(FATE_MXF-yes)
+FATE_SAMPLES_AVCONV += $(FATE_MXF-yes) $(FATE_MXF_REEL_NAME-yes)
 FATE_SAMPLES_FFPROBE += $(FATE_MXF_PROBE-yes)
 
-fate-mxf: $(FATE_MXF-yes) $(FATE_MXF_PROBE-yes)
+fate-mxf: $(FATE_MXF-yes) $(FATE_MXF_PROBE-yes) $(FATE_MXF_REEL_NAME-yes)

diff --git a/tests/fate/opus.mak b/tests/fate/opus.mak
index b13d86c..7f28945 100644
--- a/tests/fate/opus.mak
+++ b/tests/fate/opus.mak

@@ -1,26 +1,22 @@
 # The samples were produced by simply rewrapping the official test vectors from
-# their custom format into Matroska.
-# The reference files were created with our decoder and tested against the
-# libopus output with the official opus_compare tool. We cannot use libopus
-# output as reference directly, because the use of different resamplers would
-# require too high fuzz values, which can hide bugs.
-# Before adding new tests here, always make sure they pass opus_compare.
+# their custom format into Matroska. The reference decoded outputs are from the
+# newest testvectors file from RFC8251
 
-OPUS_CELT_SAMPLES   = $(addprefix testvector, 01 07 11) tron.6ch.tinypkts
+OPUS_CELT_SAMPLES   = $(addprefix testvector, 01 11) tron.6ch.tinypkts
 OPUS_HYBRID_SAMPLES = $(addprefix testvector, 05 06)
 OPUS_SILK_SAMPLES   = $(addprefix testvector, 02 03 04)
-OPUS_SAMPLES        = $(addprefix testvector, 08 09 10 12)
+OPUS_SAMPLES        = $(addprefix testvector, 07 08 09 10 12)
 
 define FATE_OPUS_TEST
 FATE_OPUS     += fate-opus-$(1)
 FATE_OPUS$(2) += fate-opus-$(1)
 fate-opus-$(1): CMD = ffmpeg -i $(TARGET_SAMPLES)/opus/$(1).mka -f s16le -
-fate-opus-$(1): REF = $(SAMPLES)/opus/$(1).dec
+fate-opus-$(1): REF = $(SAMPLES)/opus/$(1)$(2).dec
 endef
 
-$(foreach N,$(OPUS_CELT_SAMPLES),  $(eval $(call FATE_OPUS_TEST,$(N),_CELT)))
-$(foreach N,$(OPUS_HYBRID_SAMPLES),$(eval $(call FATE_OPUS_TEST,$(N),_HYBRID)))
-$(foreach N,$(OPUS_SILK_SAMPLES),  $(eval $(call FATE_OPUS_TEST,$(N),_SILK)))
+$(foreach N,$(OPUS_CELT_SAMPLES),  $(eval $(call FATE_OPUS_TEST,$(N))))
+$(foreach N,$(OPUS_HYBRID_SAMPLES),$(eval $(call FATE_OPUS_TEST,$(N),_v2)))
+$(foreach N,$(OPUS_SILK_SAMPLES),  $(eval $(call FATE_OPUS_TEST,$(N))))
 $(foreach N,$(OPUS_SAMPLES),       $(eval $(call FATE_OPUS_TEST,$(N),)))
 
 FATE_OPUS := $(sort $(FATE_OPUS))
@@ -28,14 +24,19 @@
 $(FATE_OPUS): CMP = stddev
 $(FATE_OPUS): CMP_UNIT = s16
 $(FATE_OPUS): FUZZ = 3
-fate-opus-testvector02: CMP_TARGET = 191
-fate-opus-testvector03: CMP_TARGET = 139
-fate-opus-testvector04: CMP_TARGET = 119
-fate-opus-testvector05: CMP_TARGET = 108
-fate-opus-testvector06: CMP_TARGET = 106
-fate-opus-testvector08: CMP_TARGET = 6
-fate-opus-testvector10: CMP_TARGET = 38
-fate-opus-testvector12: CMP_TARGET = 160
+fate-opus-testvector01:      CMP_TARGET = 0
+fate-opus-testvector02:      CMP_TARGET = 191
+fate-opus-testvector03:      CMP_TARGET = 139
+fate-opus-testvector04:      CMP_TARGET = 119
+fate-opus-testvector05:      CMP_TARGET = 108
+fate-opus-testvector06:      CMP_TARGET = 106
+fate-opus-testvector07:      CMP_TARGET = 0
+fate-opus-testvector08:      CMP_TARGET = 6
+fate-opus-testvector09:      CMP_TARGET = 0
+fate-opus-testvector10:      CMP_TARGET = 38
+fate-opus-testvector11:      CMP_TARGET = 0
+fate-opus-testvector12:      CMP_TARGET = 160
+fate-opus-tron.6ch.tinypkts: CMP_TARGET = 0
 
 $(FATE_OPUS_CELT): CMP = oneoff
 $(FATE_OPUS_CELT): FUZZ = 6

diff --git a/tests/fate/screen.mak b/tests/fate/screen.mak
index 66dfa6b..68b4f6f 100644
--- a/tests/fate/screen.mak
+++ b/tests/fate/screen.mak

@@ -8,6 +8,15 @@
 FATE_SAMPLES_AVCONV-$(call DEMDEC, AVI, FIC) += fate-fic-avi
 fate-fic-avi: CMD = framecrc -i $(TARGET_SAMPLES)/fic/fic-partial-2MB.avi -an
 
+FATE_FMVC += fate-fmvc-type1
+fate-fmvc-type1: CMD = framecrc -i $(TARGET_SAMPLES)/fmvc/6-methyl-5-hepten-2-one-CC-db_small.avi
+
+FATE_FMVC += fate-fmvc-type2
+fate-fmvc-type2: CMD = framecrc -i $(TARGET_SAMPLES)/fmvc/fmvcVirtualDub_small.avi
+
+FATE_SAMPLES_AVCONV-$(call DEMDEC, AVI, FMVC) += $(FATE_FMVC)
+fate-fmvc: $(FATE_FMVC)
+
 FATE_FRAPS += fate-fraps-v0
 fate-fraps-v0: CMD = framecrc -i $(TARGET_SAMPLES)/fraps/Griffin_Ragdoll01-partial.avi
 

diff --git a/tests/fate/seek.mak b/tests/fate/seek.mak
index c863b2a..6a9f843 100644
--- a/tests/fate/seek.mak
+++ b/tests/fate/seek.mak

@@ -168,7 +168,6 @@
 FATE_SEEK_LAVF-$(call ENCDEC2, MPEG4,      MP2,       AVI)         += avi
 FATE_SEEK_LAVF-$(call ENCDEC,  BMP,                   IMAGE2)      += bmp
 FATE_SEEK_LAVF-$(call ENCDEC2, DVVIDEO,    PCM_S16LE, AVI)         += dv_fmt
-FATE_SEEK_LAVF-$(call ENCDEC2, MPEG1VIDEO, MP2,       FFM)         += ffm
 FATE_SEEK_LAVF-$(call ENCDEC,  FLV,                   FLV)         += flv_fmt
 FATE_SEEK_LAVF-$(call ENCDEC,  GIF,                   IMAGE2)      += gif
 FATE_SEEK_LAVF-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, GXF)         += gxf
@@ -210,7 +209,6 @@
 fate-seek-lavf-avi:      SRC = lavf/lavf.avi
 fate-seek-lavf-bmp:      SRC = images/bmp/%02d.bmp
 fate-seek-lavf-dv_fmt:   SRC = lavf/lavf.dv
-fate-seek-lavf-ffm:      SRC = lavf/lavf.ffm
 fate-seek-lavf-flv_fmt:  SRC = lavf/lavf.flv
 fate-seek-lavf-gif:      SRC = lavf/lavf.gif
 fate-seek-lavf-gxf:      SRC = lavf/lavf.gxf
@@ -253,9 +251,15 @@
 FATE_SEEK_EXTRA-$(call ALLYES, CACHE_PROTOCOL PIPE_PROTOCOL MP3_DEMUXER) += fate-seek-cache-pipe
 FATE_SEEK_EXTRA-$(CONFIG_MATROSKA_DEMUXER) += fate-seek-mkv-codec-delay
 FATE_SEEK_EXTRA-$(CONFIG_MOV_DEMUXER) += fate-seek-extra-mp4
+FATE_SEEK_EXTRA-$(CONFIG_MOV_DEMUXER) += fate-seek-empty-edit-mp4
+FATE_SEEK_EXTRA-$(CONFIG_MOV_DEMUXER) += fate-seek-test-iibbibb-mp4
+FATE_SEEK_EXTRA-$(CONFIG_MOV_DEMUXER) += fate-seek-test-iibbibb-neg-ctts-mp4
 
 fate-seek-extra-mp3:  CMD = run libavformat/tests/seek$(EXESUF) $(TARGET_SAMPLES)/gapless/gapless.mp3 -fastseek 1
 fate-seek-extra-mp4:  CMD = run libavformat/tests/seek$(EXESUF) $(TARGET_SAMPLES)/mov/buck480p30_na.mp4 -duration 180 -frames 4
+fate-seek-empty-edit-mp4:  CMD = run libavformat/tests/seek$(EXESUF) $(TARGET_SAMPLES)/mov/empty_edit_5s.mp4 -duration 15 -frames 4
+fate-seek-test-iibbibb-mp4:  CMD = run libavformat/tests/seek$(EXESUF) $(TARGET_SAMPLES)/mov/test_iibbibb.mp4 -duration 13 -frames 4
+fate-seek-test-iibbibb-neg-ctts-mp4:  CMD = run libavformat/tests/seek$(EXESUF) $(TARGET_SAMPLES)/mov/test_iibbibb_neg_ctts.mp4 -duration 13 -frames 4
 fate-seek-cache-pipe: CMD = cat $(TARGET_SAMPLES)/gapless/gapless.mp3 | run libavformat/tests/seek$(EXESUF) cache:pipe:0 -read_ahead_limit -1
 fate-seek-mkv-codec-delay:   CMD = run libavformat/tests/seek$(EXESUF) $(TARGET_SAMPLES)/mkv/codec_delay_opus.mkv
 

diff --git a/tests/fate/subtitles.mak b/tests/fate/subtitles.mak
index 8c310ad..0042902 100644
--- a/tests/fate/subtitles.mak
+++ b/tests/fate/subtitles.mak

@@ -7,6 +7,9 @@
 FATE_SUBTITLES_ASS-$(call ALLYES, AVDEVICE LAVFI_INDEV CCAPTION_DECODER MOVIE_FILTER MPEGTS_DEMUXER) += fate-sub-cc-realtime
 fate-sub-cc-realtime: CMD = fmtstdout ass -real_time 1 -f lavfi -i "movie=$(TARGET_SAMPLES)/sub/Closedcaption_rollup.m2v[out0+subcc]"
 
+FATE_SUBTITLES_ASS-$(call ALLYES, AVDEVICE LAVFI_INDEV CCAPTION_DECODER MOVIE_FILTER MPEGTS_DEMUXER) += fate-sub-cc-scte20
+fate-sub-cc-scte20: CMD = fmtstdout ass -f lavfi -i "movie=$(TARGET_SAMPLES)/sub/scte20.ts[out0+subcc]"
+
 FATE_SUBTITLES_ASS-$(call DEMDEC, ASS, ASS) += fate-sub-ass-to-ass-transcode
 fate-sub-ass-to-ass-transcode: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/1ededcbd7b.ass
 

diff --git a/tests/fate/utvideo.mak b/tests/fate/utvideo.mak
index 2cf06b3..2322471 100644
--- a/tests/fate/utvideo.mak
+++ b/tests/fate/utvideo.mak

@@ -10,6 +10,15 @@
 FATE_UTVIDEO += fate-utvideo_rgba_median
 fate-utvideo_rgba_median: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_rgba_median.avi
 
+FATE_UTVIDEO += fate-utvideo_rgb_int_median
+fate-utvideo_rgb_int_median: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_rgb_64x48_int_median.avi
+
+FATE_UTVIDEO += fate-utvideo_rgba_gradient
+fate-utvideo_rgba_gradient: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_rgba_gradient.avi
+
+FATE_UTVIDEO += fate-utvideo_rgb_int_gradient
+fate-utvideo_rgb_int_gradient: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_rgb_64x48_int_gradient.avi
+
 FATE_UTVIDEO += fate-utvideo_rgba_single_symbol
 fate-utvideo_rgba_single_symbol: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_rgba_single_symbol.avi
 
@@ -19,34 +28,64 @@
 FATE_UTVIDEO += fate-utvideo_yuv420_median
 fate-utvideo_yuv420_median: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_yuv420_median.avi
 
+FATE_UTVIDEO += fate-utvideo_yuv420_int_median
+fate-utvideo_yuv420_int_median: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_yuv420_709_64x48_int_median.avi
+
+FATE_UTVIDEO += fate-utvideo_yuv420_gradient
+fate-utvideo_yuv420_gradient: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_yuv420_709_64x48_gradient.avi
+
+FATE_UTVIDEO += fate-utvideo_yuv420_int_gradient
+fate-utvideo_yuv420_int_gradient: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_yuv420_709_64x48_int_gradient.avi
+
 FATE_UTVIDEO += fate-utvideo_yuv422_left
 fate-utvideo_yuv422_left: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_yuv422_left.avi
 
 FATE_UTVIDEO += fate-utvideo_yuv422_median
 fate-utvideo_yuv422_median: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_yuv422_median.avi
 
+FATE_UTVIDEO += fate-utvideo_yuv422_int_median
+fate-utvideo_yuv422_int_median: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_yuv422_709_64x48_int_median.avi
+
+FATE_UTVIDEO += fate-utvideo_yuv422_gradient
+fate-utvideo_yuv422_gradient: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_yuv422_709_64x48_gradient.avi
+
+FATE_UTVIDEO += fate-utvideo_yuv422_int_gradient
+fate-utvideo_yuv422_int_gradient: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_yuv422_709_64x48_int_gradient.avi
+
+FATE_UTVIDEO += fate-utvideo_yuv444_709_median
+fate-utvideo_yuv444_709_median: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_yuv444_709_64x48_median.avi
+
+FATE_UTVIDEO += fate-utvideo_yuv444_709_int_median
+fate-utvideo_yuv444_709_int_median: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_yuv444_709_64x48_int_median.avi
+
+FATE_UTVIDEO += fate-utvideo_yuv444_709_gradient
+fate-utvideo_yuv444_709_gradient: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_yuv444_709_gradient.avi
+
+FATE_UTVIDEO += fate-utvideo_yuv444_709_int_gradient
+fate-utvideo_yuv444_709_int_gradient: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_yuv444_709_64x48_int_gradient.avi
+
 FATE_SAMPLES_AVCONV-$(call DEMDEC, AVI, UTVIDEO) += $(FATE_UTVIDEO)
 fate-utvideo: $(FATE_UTVIDEO)
 
 fate-utvideoenc%: CMD = framemd5 -f image2 -c:v pgmyuv -i $(TARGET_PATH)/tests/vsynth1/%02d.pgm -c:v utvideo -slices 1 -sws_flags +accurate_rnd+bitexact ${OPTS}
 
 FATE_UTVIDEOENC += fate-utvideoenc_rgba_left
-fate-utvideoenc_rgba_left: OPTS = -pix_fmt rgba -pred left
+fate-utvideoenc_rgba_left: OPTS = -pix_fmt gbrap -pred left
 
 FATE_UTVIDEOENC += fate-utvideoenc_rgba_median
-fate-utvideoenc_rgba_median: OPTS = -pix_fmt rgba -pred median
+fate-utvideoenc_rgba_median: OPTS = -pix_fmt gbrap -pred median
 
 FATE_UTVIDEOENC += fate-utvideoenc_rgba_none
-fate-utvideoenc_rgba_none: OPTS = -pix_fmt rgba -pred none
+fate-utvideoenc_rgba_none: OPTS = -pix_fmt gbrap -pred none
 
 FATE_UTVIDEOENC += fate-utvideoenc_rgb_left
-fate-utvideoenc_rgb_left: OPTS = -pix_fmt rgb24 -pred left
+fate-utvideoenc_rgb_left: OPTS = -pix_fmt gbrp -pred left
 
 FATE_UTVIDEOENC += fate-utvideoenc_rgb_median
-fate-utvideoenc_rgb_median: OPTS = -pix_fmt rgb24 -pred median
+fate-utvideoenc_rgb_median: OPTS = -pix_fmt gbrp -pred median
 
 FATE_UTVIDEOENC += fate-utvideoenc_rgb_none
-fate-utvideoenc_rgb_none: OPTS = -pix_fmt rgb24 -pred none
+fate-utvideoenc_rgb_none: OPTS = -pix_fmt gbrp -pred none
 
 FATE_UTVIDEOENC += fate-utvideoenc_yuv420_left
 fate-utvideoenc_yuv420_left: OPTS = -pix_fmt yuv420p -pred left
@@ -66,6 +105,15 @@
 FATE_UTVIDEOENC += fate-utvideoenc_yuv422_none
 fate-utvideoenc_yuv422_none: OPTS = -pix_fmt yuv422p -pred none
 
+FATE_UTVIDEOENC += fate-utvideoenc_yuv444_left
+fate-utvideoenc_yuv444_left: OPTS = -pix_fmt yuv444p -pred left
+
+FATE_UTVIDEOENC += fate-utvideoenc_yuv444_median
+fate-utvideoenc_yuv444_median: OPTS = -pix_fmt yuv444p -pred median
+
+FATE_UTVIDEOENC += fate-utvideoenc_yuv444_none
+fate-utvideoenc_yuv444_none: OPTS = -pix_fmt yuv444p -pred none
+
 $(FATE_UTVIDEOENC): $(VREF)
 
 FATE_AVCONV-$(call ENCMUX, UTVIDEO, AVI) += $(FATE_UTVIDEOENC)

diff --git a/tests/fate/vcodec.mak b/tests/fate/vcodec.mak
index c034455..da6da9e 100644
--- a/tests/fate/vcodec.mak
+++ b/tests/fate/vcodec.mak

@@ -18,9 +18,9 @@
 FATE_VCODEC-$(call ENCDEC, ASV2, AVI)   += asv2
 fate-vsynth%-asv2:               ENCOPTS = -qscale 10
 
-FATE_VCODEC-$(call ENCDEC, CINEPAK, MOV) += cinepak
-fate-vsynth%-cinepak:            ENCOPTS  = -c:v cinepak -frames 3
-fate-vsynth%-cinepak:            FMT      = mov
+FATE_VCODEC-$(call ENCDEC, CINEPAK, AVI) += cinepak
+fate-vsynth%-cinepak:            ENCOPTS = -s sqcif -strip_number_adaptivity 1
+fate-vsynth%-cinepak:            DECOPTS = -s sqcif
 
 FATE_VCODEC-$(call ENCDEC, CLJR, AVI)   += cljr
 fate-vsynth%-cljr:               ENCOPTS = -strict -1
@@ -38,17 +38,25 @@
 
 FATE_VCODEC-$(call ENCDEC, VC2 DIRAC, MOV) += vc2-420p vc2-420p10 vc2-420p12 \
                                               vc2-422p vc2-422p10 vc2-422p12 \
-                                              vc2-444p vc2-444p10 vc2-444p12
-fate-vsynth1-vc2-%:              FMT      = mov
-fate-vsynth1-vc2-%:              ENCOPTS = -pix_fmt yuv$(@:fate-vsynth1-vc2-%=%) \
+                                              vc2-444p vc2-444p10 vc2-444p12 \
+                                              vc2-thaar vc2-t5_3
+fate-vsynth1-vc2-4%:             FMT      = mov
+fate-vsynth1-vc2-4%:             ENCOPTS = -pix_fmt yuv$(@:fate-vsynth1-vc2-%=%) \
                                            -c:v vc2 -frames 5 -strict -1
-fate-vsynth2-vc2-%:              FMT      = mov
-fate-vsynth2-vc2-%:              ENCOPTS = -pix_fmt yuv$(@:fate-vsynth2-vc2-%=%) \
+fate-vsynth2-vc2-4%:             FMT      = mov
+fate-vsynth2-vc2-4%:             ENCOPTS = -pix_fmt yuv$(@:fate-vsynth2-vc2-%=%) \
                                            -c:v vc2 -frames 5 -strict -1
-fate-vsynth_lena-vc2-%:          FMT      = mov
-fate-vsynth_lena-vc2-%:          ENCOPTS = -pix_fmt yuv$(@:fate-vsynth_lena-vc2-%=%) \
+fate-vsynth_lena-vc2-4%:         FMT      = mov
+fate-vsynth_lena-vc2-4%:         ENCOPTS = -pix_fmt yuv$(@:fate-vsynth_lena-vc2-%=%) \
                                            -c:v vc2 -frames 5 -strict -1
 
+fate-vsynth1-vc2-t%:             FMT     = mov
+fate-vsynth1-vc2-t%:             ENCOPTS = -pix_fmt yuv422p10 -c:v vc2 -frames 5 -strict -1 -wavelet_type $(@:fate-vsynth1-vc2-t%=%)
+fate-vsynth2-vc2-t%:             FMT     = mov
+fate-vsynth2-vc2-t%:             ENCOPTS = -pix_fmt yuv422p10 -c:v vc2 -frames 5 -strict -1 -wavelet_type $(@:fate-vsynth2-vc2-t%=%)
+fate-vsynth_lena-vc2-t%:         FMT     = mov
+fate-vsynth_lena-vc2-t%:         ENCOPTS = -pix_fmt yuv422p10 -c:v vc2 -frames 5 -strict -1 -wavelet_type $(@:fate-vsynth_lena-vc2-t%=%)
+
 fate-vsynth%-dnxhd-720p:         ENCOPTS = -s hd720 -b 90M              \
                                            -pix_fmt yuv422p -frames 5 -qmax 8
 fate-vsynth%-dnxhd-720p:         FMT     = dnxhd
@@ -434,7 +442,7 @@
 RESIZE_OFF   = dnxhd-720p dnxhd-720p-rd dnxhd-720p-10bit dnxhd-1080i \
                dv dv-411 dv-50 avui snow snow-hpel snow-ll vc2-420p \
                vc2-420p10 vc2-420p12 vc2-422p vc2-422p10 vc2-422p12 \
-               vc2-444p vc2-444p10 vc2-444p12
+               vc2-444p vc2-444p10 vc2-444p12 vc2-thaar vc2-t5_3
 # Incorrect parameters - usually size or color format restrictions
 INC_PAR_OFF  = cinepak h261 h261-trellis h263 h263p h263-obmc msvideo1 \
                roqvideo rv10 rv20 y41p qtrlegray

diff --git a/tests/fate/video.mak b/tests/fate/video.mak
index 930eece..43c3432 100644
--- a/tests/fate/video.mak
+++ b/tests/fate/video.mak

@@ -69,6 +69,18 @@
 FATE_VIDEO-$(call DEMDEC, CDG, CDGRAPHICS) += fate-cdgraphics
 fate-cdgraphics: CMD = framecrc -i $(TARGET_SAMPLES)/cdgraphics/BrotherJohn.cdg -pix_fmt rgba -t 1
 
+FATE_CFHD-$(CONFIG_AVI_DEMUXER) += fate-cfhd-1
+fate-cfhd-1: CMD = framecrc -i $(TARGET_SAMPLES)/cfhd/cfhd_422.avi -pix_fmt yuv422p10le
+
+FATE_CFHD-$(CONFIG_AVI_DEMUXER) += fate-cfhd-2
+fate-cfhd-2: CMD = framecrc -i $(TARGET_SAMPLES)/cfhd/cfhd_444.avi -pix_fmt gbrp12le
+
+FATE_CFHD-$(CONFIG_MOV_DEMUXER) += fate-cfhd-3
+fate-cfhd-3: CMD = framecrc -i $(TARGET_SAMPLES)/cfhd/cfhd_odd.mov -pix_fmt yuv422p10le
+
+FATE_VIDEO-$(CONFIG_CFHD_DECODER) += $(FATE_CFHD-yes)
+fate-cfhd: $(FATE_CFHD-yes)
+
 FATE_VIDEO-$(call DEMDEC, AVI, CLJR) += fate-cljr
 fate-cljr: CMD = framecrc -i $(TARGET_SAMPLES)/cljr/testcljr-partial.avi
 
@@ -159,21 +171,6 @@
 FATE_VIDEO-$(call ENCDEC, ROQ PGMYUV, ROQ IMAGE2) += fate-idroq-video-encode
 fate-idroq-video-encode: CMD = md5 -f image2 -c:v pgmyuv -i $(TARGET_SAMPLES)/ffmpeg-synthetic/vsynth1/%02d.pgm -r 30 -sws_flags +bitexact -vf pad=512:512:80:112 -f roq -t 0.2
 
-FATE_HAP += fate-hap1
-fate-hap1: CMD = framecrc -i $(TARGET_SAMPLES)/hap/hap1.mov
-
-FATE_HAP += fate-hap5
-fate-hap5: CMD = framecrc -i $(TARGET_SAMPLES)/hap/hap5.mov
-
-FATE_HAP += fate-hapy
-fate-hapy: CMD = framecrc -i $(TARGET_SAMPLES)/hap/hapy.mov
-
-FATE_HAP += fate-hap-chunk
-fate-hap-chunk: CMD = framecrc -i $(TARGET_SAMPLES)/hap/hapy-12-chunks.mov
-
-FATE_VIDEO-$(call DEMDEC, MOV, HAP) += $(FATE_HAP)
-fate-hap: $(FATE_HAP)
-
 FATE_IFF-$(CONFIG_IFF_ILBM_DECODER) += fate-iff-byterun1
 fate-iff-byterun1: CMD = framecrc -i $(TARGET_SAMPLES)/iff/ASH.LBM -pix_fmt rgb24
 
@@ -245,6 +242,9 @@
 FATE_VIDEO-$(call DEMDEC, MPEGPS, MPEG2VIDEO) += fate-mpeg2-ticket6024
 fate-mpeg2-ticket6024: CMD = framecrc -flags +bitexact -idct simple -flags +truncated -i $(TARGET_SAMPLES)/mpeg2/matrixbench_mpeg2.lq1.mpg -an
 
+FATE_VIDEO-$(call DEMDEC, MPEGVIDEO, MPEG2VIDEO) += fate-mpeg2-ticket6677
+fate-mpeg2-ticket6677: CMD = framecrc -flags +bitexact -idct simple -vsync drop -i $(TARGET_SAMPLES)/mpeg2/sony-ct3.bs
+
 FATE_VIDEO-$(call DEMDEC, MV, MVC1) += fate-mv-mvc1
 fate-mv-mvc1: CMD = framecrc -i $(TARGET_SAMPLES)/mv/posture.mv -an -frames 25 -pix_fmt rgb555le
 

diff --git a/tests/ffserver-regression.sh b/tests/ffserver-regression.sh
deleted file mode 100755
index 9007fe3..0000000
--- a/tests/ffserver-regression.sh
+++ /dev/null

@@ -1,45 +0,0 @@
-#!/bin/sh
-
-target_samples=$3
-target_exec=$4
-target_path=$5
-
-#perl -e 'chomp($wd = `pwd`); print map { s!tests/data/!!; "<Stream $_>\nFile $wd/tests/data/$_\n</Stream>\n\n" } @ARGV' tests/data/a* >> tests/data/ffserver.conf
-#perl -e 'chomp($wd = `pwd`); print map { s!tests/data/!!; "<Stream $_.asf>\nFile $wd/tests/data/$_\n</Stream>\n\n" } @ARGV' tests/data/a* >> tests/data/ffserver.conf
-
-. $(dirname $0)/md5.sh
-
-FILES=$(sed -n 's/^[^#]*<Stream \(.*\)>.*/\1/p' $2 | grep -v html)
-
-rm -f tests/feed1.ffm
-$target_exec ${target_path}/ffserver${PROGSUF} -f "$2" &
-FFSERVER_PID=$!
-echo "Waiting for feeds to startup..."
-sleep 2
-(
-    cd tests/data || exit $?
-    rm -f ff-* ffserver.regression
-    WGET_OPTIONS="--user-agent=NSPlayer -q --proxy=off -e verbose=off -e server_response=off -T3 --tries=1"
-    for file in $FILES; do
-        if [ $(expr $file : "a-*") != 0 ]; then
-            wget $WGET_OPTIONS -O - http://localhost:9999/$file > ff-$file
-        else
-            wget $WGET_OPTIONS -O - http://localhost:9999/$file?date=19700101T000000Z | dd bs=1 count=100000 > ff-$file 2>/dev/null
-        fi
-        do_md5sum ff-$file >>ffserver.regression
-    done
-    wget  $WGET_OPTIONS -O - 'http://localhost:9999/teststat.html?abc' > ff-stat 2>/dev/null
-    do_md5sum ff-stat >>ffserver.regression
-)
-kill $FFSERVER_PID
-wait > /dev/null 2>&1
-rm -f tests/feed1.ffm
-if diff -u "$1" tests/data/ffserver.regression; then
-    echo
-    echo Server regression test succeeded.
-    exit 0
-else
-    echo
-    echo Server regression test: Error.
-    exit 1
-fi

diff --git a/tests/ffserver.conf b/tests/ffserver.conf
deleted file mode 100644
index 3495d95..0000000
--- a/tests/ffserver.conf
+++ /dev/null

@@ -1,311 +0,0 @@
-#
-# This is a test configuration file. You can invoke it with
-# ../ffserver -f ffserver.conf
-# when in the tests directory and once the vsynth1 subdirectory
-# has been populated. Then point your browser at http://whatever:9999/teststat.html
-# and you can look at the streams
-#
-
-#
-# Port on which the server is listening. You must select a different
-# port from your standard http web server if it is running on the same
-# computer.
-
-HTTPPort 9999
-RTSPPort 9990
-
-# Address on which the server is bound. Only useful if you have
-# several network interfaces.
-
-HTTPBindAddress 0.0.0.0
-
-# Number of simultaneous requests that can be handled. Since FFServer
-# is very fast, this limit is determined mainly by your Internet
-# connection speed.
-
-MaxClients 1000
-
-MaxBandwidth 100000
-
-# Access Log file (uses standard Apache log file format)
-# '-' is the standard output
-
-CustomLog -
-
-##################################################################
-# Definition of the live feeds. Each live feed contains one video
-# and/or audio sequence coming from an ffmpeg encoder or another
-# ffserver. This sequence may be encoded simultaneously with several
-# codecs at several resolutions.
-
-<Feed feed1.ffm>
-
-# You must use 'ffmpeg' to send a live feed to ffserver. In this
-# example, you can type:
-#
-# ffmpeg http://localhost:8090/feed1.ffm
-
-# ffserver can also do time shifting. It means that it can stream any
-# previously recorded live stream. The request should contain:
-# "http://xxxx?date=[YYYY-MM-DDT][[HH:]MM:]SS[.m...]".You must specify
-# a path where the feed is stored on disk. You also specify the
-# maximum size of the feed (100M bytes here). Default:
-# File=/tmp/feed_name.ffm FileMaxSize=5M
-
-File tests/feed1.ffm
-FileMaxSize 100M
-
-# Fire up ffmpeg pointing at this stream
-
-Launch ./ffmpeg -v 0 -y -f image2 -flags +bitexact -fflags +bitexact -i tests/vsynth1/%02d.pgm -flags +bitexact -fflags +bitexact
-
-ACL allow localhost
-</Feed>
-
-##################################################################
-# Now you can define each stream which will be generated from the
-# original audio and video stream. Each format has a filename (here
-# 'test128.mpg'). FFServer will send this stream when answering a
-# request containing this filename.
-
-<Stream test_h.avi>
-Feed feed1.ffm
-Format avi
-#
-BitExact
-DctFastint
-IdctSimple
-VideoFrameRate 10
-VideoSize 352x288
-VideoBitRate 100
-VideoGopSize 30
-NoAudio
-
-PreRoll 10
-StartSendOnKey
-MaxTime 100
-
-</Stream>
-
-<Stream test_l.avi>
-Feed feed1.ffm
-Format avi
-#
-BitExact
-DctFastint
-IdctSimple
-VideoFrameRate 2
-VideoSize 320x240
-VideoBitRate 40
-VideoGopSize 20
-NoAudio
-
-PreRoll 20
-StartSendOnKey
-MaxTime 100
-
-</Stream>
-
-#<Stream test_h.mpg>
-#Feed feed1.ffm
-#
-#VideoFrameRate 10
-#VideoSize 352x288
-#VideoBitRate 100
-#VideoGopSize 30
-#NoAudio
-
-#PreRoll 10
-#StartSendOnKey
-#MaxTime 100
-#
-#</Stream>
-#
-#<Stream test_l.mpg>
-#Feed feed1.ffm
-##
-#VideoFrameRate 2
-#VideoSize 320x240
-#VideoBitRate 40
-#VideoGopSize 20
-#NoAudio
-#
-#PreRoll 20
-#StartSendOnKey
-#MaxTime 100
-#
-#</Stream>
-#
-<Stream test.swf>
-Feed feed1.ffm
-#
-BitExact
-DctFastint
-IdctSimple
-Qscale 10
-VideoFrameRate 10
-VideoSize 352x288
-VideoBitRate 100
-VideoGopSize 30
-NoAudio
-
-PreRoll 10
-StartSendOnKey
-MaxTime 100
-
-</Stream>
-
-<Stream test_h.asf>
-Feed feed1.ffm
-Format asf
-#
-BitExact
-DctFastint
-IdctSimple
-Qscale 10
-VideoFrameRate 10
-VideoSize 320x240
-VideoBitRate 100
-VideoGopSize 30
-NoAudio
-
-PreRoll 10
-StartSendOnKey
-MaxTime 100
-
-AVOptionVideo flags +global_header
-
-Metadata title "Test data stream"
-
-</Stream>
-
-<Stream test_l.asf>
-Feed feed1.ffm
-Format asf
-#
-BitExact
-DctFastint
-IdctSimple
-Qscale 10
-VideoFrameRate 2
-VideoSize 320x240
-VideoBitRate 40
-VideoGopSize 20
-NoAudio
-
-PreRoll 20
-StartSendOnKey
-MaxTime 100
-
-AVOptionVideo flags +global_header
-
-Metadata title "Test data stream"
-
-</Stream>
-
-<Stream test_h.rm>
-
-Feed feed1.ffm
-Format rm
-
-BitExact
-DctFastint
-IdctSimple
-Qscale 10
-VideoBitRate 100
-VideoFrameRate 10
-VideoGopSize 30
-VideoSize    320x240
-NoAudio
-
-PreRoll 10
-StartSendOnKey
-MaxTime 100
-
-</Stream>
-
-<Stream test_l.rm>
-
-Feed feed1.ffm
-Format rm
-
-BitExact
-DctFastint
-IdctSimple
-Qscale 10
-VideoBitRate 40
-VideoFrameRate 2
-VideoGopSize 20
-VideoSize    320x240
-NoAudio
-
-PreRoll 20
-StartSendOnKey
-MaxTime 100
-
-</Stream>
-
-
-<Stream test.jpg>
-
-Feed feed1.ffm
-Format jpeg
-Strict -1
-
-BitExact
-DctFastint
-IdctSimple
-VideoFrameRate 1
-VideoSize 352x288
-NoAudio
-
-PreRoll 2
-
-</Stream>
-
-<Stream test_small.jpg>
-
-Feed feed1.ffm
-Format jpeg
-Strict -1
-
-BitExact
-DctFastint
-IdctSimple
-VideoFrameRate 1
-VideoSize 160x128
-NoAudio
-
-PreRoll 2
-
-</Stream>
-
-<Stream test.mjpg>
-
-Feed feed1.ffm
-Format mpjpeg
-Strict -1
-
-BitExact
-DctFastint
-IdctSimple
-VideoFrameRate 1
-VideoSize    320x240
-NoAudio
-StartSendOnKey
-
-PreRoll 1
-MaxTime 100
-
-</Stream>
-
-
-##################################################################
-# Special stream : server status
-
-<Stream teststat.html>
-
-Format status
-
-</Stream>
-

diff --git a/tests/ffserver.regression.ref b/tests/ffserver.regression.ref
deleted file mode 100644
index fd8536c..0000000
--- a/tests/ffserver.regression.ref
+++ /dev/null

@@ -1,11 +0,0 @@
-0c9639f09decbc54c9f091dcf1ca0e8f *ff-test_h.avi
-e28ba75853caf975e06d92955c9f7f73 *ff-test_l.avi
-a767dbdf5d1bded3450279f812f97b37 *ff-test.swf
-ca209a0c67afbd3bc3bcde0840f313fc *ff-test_h.asf
-f97a91609bfc8a1857455f17c5ec101c *ff-test_l.asf
-06f5a6a4c5d1c6735f4d0068e825c91f *ff-test_h.rm
-1f57580f02f0317407b3b82a3d5e093f *ff-test_l.rm
-e04e6ebf9584654df131f5eec881ac38 *ff-test.jpg
-f15d43e9d3630601b61a024023249bb8 *ff-test_small.jpg
-4735c72cde67000f12e9d1dbfbd975a7 *ff-test.mjpg
-fd038af80560e15271ce42651093ee43 *ff-stat

diff --git a/tests/ref/fate/adts-id3v2-demux b/tests/ref/fate/adts-id3v2-demux
new file mode 100644
index 0000000..db00e3b
--- /dev/null
+++ b/tests/ref/fate/adts-id3v2-demux

@@ -0,0 +1,240 @@
+#tb 0: 1/28224000
+#media_type 0: audio
+#codec_id 0: aac
+#sample_rate 0: 48000
+#channel_layout 0: 4
+#channel_layout_name 0: mono
+0,          0,          0,   602112,      126, 0x639a3a5b
+0,     602112,     602112,   602112,      135, 0x5b1f3ced
+0,    1204224,    1204224,   602112,      123, 0xfcb73863
+0,    1806336,    1806336,   602112,      126, 0x639a3a5b
+0,    2408448,    2408448,   602112,      135, 0x5b1f3ced
+0,    3010560,    3010560,   602112,      123, 0xfcb73863
+0,    3612672,    3612672,   602112,      144, 0xa0434540
+0,    4214784,    4214784,   602112,      119, 0x45053cc1
+0,    4816896,    4816896,   602112,      111, 0x23043aaf
+0,    5419008,    5419008,   602112,      126, 0x693a3a67
+0,    6021120,    6021120,   602112,      149, 0x31304a34
+0,    6623232,    6623232,   602112,      111, 0x21603aab
+0,    7225344,    7225344,   602112,      132, 0xe42d43b3
+0,    7827456,    7827456,   602112,      135, 0x5b1f3ced
+0,    8429568,    8429568,   602112,      123, 0xfe8b3867
+0,    9031680,    9031680,   602112,      144, 0xa26b4544
+0,    9633792,    9633792,   602112,      129, 0xf7de3bc7
+0,   10235904,   10235904,   602112,      111, 0x1fbc3aa7
+0,   10838016,   10838016,   602112,      126, 0x657a3a5f
+0,   11440128,   11440128,   602112,      140, 0xdb6542ec
+0,   12042240,   12042240,   602112,      123, 0xfcb73863
+0,   12644352,   12644352,   602112,      138, 0xad7e44b6
+0,   13246464,   13246464,   602112,      119, 0x46c93cc5
+0,   13848576,   13848576,   602112,      123, 0xfe8b3867
+0,   14450688,   14450688,   602112,      144, 0xa26b4544
+0,   15052800,   15052800,   602112,      129, 0xf7de3bc7
+0,   15654912,   15654912,   602112,      111, 0x1fbc3aa7
+0,   16257024,   16257024,   602112,      126, 0x657a3a5f
+0,   16859136,   16859136,   602112,      140, 0xdb6542ec
+0,   17461248,   17461248,   602112,      123, 0xfcb73863
+0,   18063360,   18063360,   602112,      138, 0xad7e44b6
+0,   18665472,   18665472,   602112,      119, 0x46c93cc5
+0,   19267584,   19267584,   602112,      123, 0xfe8b3867
+0,   19869696,   19869696,   602112,      144, 0xa26b4544
+0,   20471808,   20471808,   602112,      129, 0xf7de3bc7
+0,   21073920,   21073920,   602112,      111, 0x1fbc3aa7
+0,   21676032,   21676032,   602112,      126, 0x657a3a5f
+0,   22278144,   22278144,   602112,      140, 0xdb6542ec
+0,   22880256,   22880256,   602112,      123, 0xfcb73863
+0,   23482368,   23482368,   602112,      138, 0xad7e44b6
+0,   24084480,   24084480,   602112,      119, 0x488d3cc9
+0,   24686592,   24686592,   602112,      123, 0xfe8b3867
+0,   25288704,   25288704,   602112,      144, 0xa26b4544
+0,   25890816,   25890816,   602112,      129, 0xf7de3bc7
+0,   26492928,   26492928,   602112,      111, 0x1fbc3aa7
+0,   27095040,   27095040,   602112,      126, 0x657a3a5f
+0,   27697152,   27697152,   602112,      140, 0xdb6542ec
+0,   28299264,   28299264,   602112,      123, 0xfcb73863
+0,   28901376,   28901376,   602112,      126, 0x639a3a5b
+0,   29503488,   29503488,   602112,      135, 0x5b1f3ced
+0,   30105600,   30105600,   602112,      123, 0xfcb73863
+0,   30707712,   30707712,   602112,      126, 0x639a3a5b
+0,   31309824,   31309824,   602112,      135, 0x5b1f3ced
+0,   31911936,   31911936,   602112,      123, 0xfcb73863
+0,   32514048,   32514048,   602112,      144, 0xa0434540
+0,   33116160,   33116160,   602112,      119, 0x45053cc1
+0,   33718272,   33718272,   602112,      111, 0x23043aaf
+0,   34320384,   34320384,   602112,      126, 0x693a3a67
+0,   34922496,   34922496,   602112,      149, 0x31304a34
+0,   35524608,   35524608,   602112,      111, 0x21603aab
+0,   36126720,   36126720,   602112,      132, 0xe42d43b3
+0,   36728832,   36728832,   602112,      135, 0x5b1f3ced
+0,   37330944,   37330944,   602112,      123, 0xfe8b3867
+0,   37933056,   37933056,   602112,      144, 0xa26b4544
+0,   38535168,   38535168,   602112,      129, 0xf7de3bc7
+0,   39137280,   39137280,   602112,      111, 0x1fbc3aa7
+0,   39739392,   39739392,   602112,      126, 0x657a3a5f
+0,   40341504,   40341504,   602112,      140, 0xdb6542ec
+0,   40943616,   40943616,   602112,      123, 0xfcb73863
+0,   41545728,   41545728,   602112,      138, 0xad7e44b6
+0,   42147840,   42147840,   602112,      119, 0x46c93cc5
+0,   42749952,   42749952,   602112,      123, 0xfe8b3867
+0,   43352064,   43352064,   602112,      144, 0xa26b4544
+0,   43954176,   43954176,   602112,      129, 0xf7de3bc7
+0,   44556288,   44556288,   602112,      111, 0x1fbc3aa7
+0,   45158400,   45158400,   602112,      126, 0x657a3a5f
+0,   45760512,   45760512,   602112,      140, 0xdb6542ec
+0,   46362624,   46362624,   602112,      123, 0xfcb73863
+0,   46964736,   46964736,   602112,      138, 0xad7e44b6
+0,   47566848,   47566848,   602112,      119, 0x46c93cc5
+0,   48168960,   48168960,   602112,      123, 0xfe8b3867
+0,   48771072,   48771072,   602112,      144, 0xa26b4544
+0,   49373184,   49373184,   602112,      129, 0xf7de3bc7
+0,   49975296,   49975296,   602112,      111, 0x1fbc3aa7
+0,   50577408,   50577408,   602112,      126, 0x657a3a5f
+0,   51179520,   51179520,   602112,      140, 0xdb6542ec
+0,   51781632,   51781632,   602112,      123, 0xfcb73863
+0,   52383744,   52383744,   602112,      138, 0xad7e44b6
+0,   52985856,   52985856,   602112,      119, 0x488d3cc9
+0,   53587968,   53587968,   602112,      123, 0xfe8b3867
+0,   54190080,   54190080,   602112,      144, 0xa26b4544
+0,   54792192,   54792192,   602112,      129, 0xf7de3bc7
+0,   55394304,   55394304,   602112,      111, 0x1fbc3aa7
+0,   55996416,   55996416,   602112,      126, 0x657a3a5f
+0,   56598528,   56598528,   602112,      140, 0xdb6542ec
+0,   57200640,   57200640,   602112,      123, 0xfcb73863
+0,   57802752,   57802752,   602112,      126, 0x639a3a5b
+0,   58404864,   58404864,   602112,      135, 0x5b1f3ced
+0,   59006976,   59006976,   602112,      123, 0xfcb73863
+0,   59609088,   59609088,   602112,      126, 0x639a3a5b
+0,   60211200,   60211200,   602112,      135, 0x5b1f3ced
+0,   60813312,   60813312,   602112,      123, 0xfcb73863
+0,   61415424,   61415424,   602112,      144, 0xa0434540
+0,   62017536,   62017536,   602112,      119, 0x45053cc1
+0,   62619648,   62619648,   602112,      111, 0x23043aaf
+0,   63221760,   63221760,   602112,      126, 0x693a3a67
+0,   63823872,   63823872,   602112,      149, 0x31304a34
+0,   64425984,   64425984,   602112,      111, 0x21603aab
+0,   65028096,   65028096,   602112,      132, 0xe42d43b3
+0,   65630208,   65630208,   602112,      135, 0x5b1f3ced
+0,   66232320,   66232320,   602112,      123, 0xfe8b3867
+0,   66834432,   66834432,   602112,      144, 0xa26b4544
+0,   67436544,   67436544,   602112,      129, 0xf7de3bc7
+0,   68038656,   68038656,   602112,      111, 0x1fbc3aa7
+0,   68640768,   68640768,   602112,      126, 0x657a3a5f
+0,   69242880,   69242880,   602112,      140, 0xdb6542ec
+0,   69844992,   69844992,   602112,      123, 0xfcb73863
+0,   70447104,   70447104,   602112,      138, 0xad7e44b6
+0,   71049216,   71049216,   602112,      119, 0x46c93cc5
+0,   71651328,   71651328,   602112,      123, 0xfe8b3867
+0,   72253440,   72253440,   602112,      144, 0xa26b4544
+0,   72855552,   72855552,   602112,      129, 0xf7de3bc7
+0,   73457664,   73457664,   602112,      111, 0x1fbc3aa7
+0,   74059776,   74059776,   602112,      126, 0x657a3a5f
+0,   74661888,   74661888,   602112,      140, 0xdb6542ec
+0,   75264000,   75264000,   602112,      123, 0xfcb73863
+0,   75866112,   75866112,   602112,      138, 0xad7e44b6
+0,   76468224,   76468224,   602112,      119, 0x46c93cc5
+0,   77070336,   77070336,   602112,      123, 0xfe8b3867
+0,   77672448,   77672448,   602112,      144, 0xa26b4544
+0,   78274560,   78274560,   602112,      129, 0xf7de3bc7
+0,   78876672,   78876672,   602112,      111, 0x1fbc3aa7
+0,   79478784,   79478784,   602112,      126, 0x657a3a5f
+0,   80080896,   80080896,   602112,      140, 0xdb6542ec
+0,   80683008,   80683008,   602112,      123, 0xfcb73863
+0,   81285120,   81285120,   602112,      138, 0xad7e44b6
+0,   81887232,   81887232,   602112,      119, 0x488d3cc9
+0,   82489344,   82489344,   602112,      123, 0xfe8b3867
+0,   83091456,   83091456,   602112,      144, 0xa26b4544
+0,   83693568,   83693568,   602112,      129, 0xf7de3bc7
+0,   84295680,   84295680,   602112,      111, 0x1fbc3aa7
+0,   84897792,   84897792,   602112,      126, 0x657a3a5f
+0,   85499904,   85499904,   602112,      140, 0xdb6542ec
+0,   86102016,   86102016,   602112,      123, 0xfcb73863
+0,   86704128,   86704128,   602112,      126, 0x639a3a5b
+0,   87306240,   87306240,   602112,      135, 0x5b1f3ced
+0,   87908352,   87908352,   602112,      123, 0xfcb73863
+0,   88510464,   88510464,   602112,      126, 0x639a3a5b
+0,   89112576,   89112576,   602112,      135, 0x5b1f3ced
+0,   89714688,   89714688,   602112,      123, 0xfcb73863
+0,   90316800,   90316800,   602112,      144, 0xa0434540
+0,   90918912,   90918912,   602112,      119, 0x45053cc1
+0,   91521024,   91521024,   602112,      111, 0x23043aaf
+0,   92123136,   92123136,   602112,      126, 0x693a3a67
+0,   92725248,   92725248,   602112,      149, 0x31304a34
+0,   93327360,   93327360,   602112,      111, 0x21603aab
+0,   93929472,   93929472,   602112,      132, 0xe42d43b3
+0,   94531584,   94531584,   602112,      135, 0x5b1f3ced
+0,   95133696,   95133696,   602112,      123, 0xfe8b3867
+0,   95735808,   95735808,   602112,      144, 0xa26b4544
+0,   96337920,   96337920,   602112,      129, 0xf7de3bc7
+0,   96940032,   96940032,   602112,      111, 0x1fbc3aa7
+0,   97542144,   97542144,   602112,      126, 0x657a3a5f
+0,   98144256,   98144256,   602112,      140, 0xdb6542ec
+0,   98746368,   98746368,   602112,      123, 0xfcb73863
+0,   99348480,   99348480,   602112,      138, 0xad7e44b6
+0,   99950592,   99950592,   602112,      119, 0x46c93cc5
+0,  100552704,  100552704,   602112,      123, 0xfe8b3867
+0,  101154816,  101154816,   602112,      144, 0xa26b4544
+0,  101756928,  101756928,   602112,      129, 0xf7de3bc7
+0,  102359040,  102359040,   602112,      111, 0x1fbc3aa7
+0,  102961152,  102961152,   602112,      126, 0x657a3a5f
+0,  103563264,  103563264,   602112,      140, 0xdb6542ec
+0,  104165376,  104165376,   602112,      123, 0xfcb73863
+0,  104767488,  104767488,   602112,      138, 0xad7e44b6
+0,  105369600,  105369600,   602112,      119, 0x46c93cc5
+0,  105971712,  105971712,   602112,      123, 0xfe8b3867
+0,  106573824,  106573824,   602112,      144, 0xa26b4544
+0,  107175936,  107175936,   602112,      129, 0xf7de3bc7
+0,  107778048,  107778048,   602112,      111, 0x1fbc3aa7
+0,  108380160,  108380160,   602112,      126, 0x657a3a5f
+0,  108982272,  108982272,   602112,      140, 0xdb6542ec
+0,  109584384,  109584384,   602112,      123, 0xfcb73863
+0,  110186496,  110186496,   602112,      138, 0xad7e44b6
+0,  110788608,  110788608,   602112,      119, 0x488d3cc9
+0,  111390720,  111390720,   602112,      123, 0xfe8b3867
+0,  111992832,  111992832,   602112,      144, 0xa26b4544
+0,  112594944,  112594944,   602112,      129, 0xf7de3bc7
+0,  113197056,  113197056,   602112,      111, 0x1fbc3aa7
+0,  113799168,  113799168,   602112,      126, 0x657a3a5f
+0,  114401280,  114401280,   602112,      140, 0xdb6542ec
+0,  115003392,  115003392,   602112,      123, 0xfcb73863
+0,  115605504,  115605504,   602112,      126, 0x639a3a5b
+0,  116207616,  116207616,   602112,      135, 0x5b1f3ced
+0,  116809728,  116809728,   602112,      123, 0xfcb73863
+0,  117411840,  117411840,   602112,      126, 0x639a3a5b
+0,  118013952,  118013952,   602112,      135, 0x5b1f3ced
+0,  118616064,  118616064,   602112,      123, 0xfcb73863
+0,  119218176,  119218176,   602112,      144, 0xa0434540
+0,  119820288,  119820288,   602112,      119, 0x45053cc1
+0,  120422400,  120422400,   602112,      111, 0x23043aaf
+0,  121024512,  121024512,   602112,      126, 0x693a3a67
+0,  121626624,  121626624,   602112,      149, 0x31304a34
+0,  122228736,  122228736,   602112,      111, 0x21603aab
+0,  122830848,  122830848,   602112,      132, 0xe42d43b3
+0,  123432960,  123432960,   602112,      135, 0x5b1f3ced
+0,  124035072,  124035072,   602112,      123, 0xfe8b3867
+0,  124637184,  124637184,   602112,      144, 0xa26b4544
+0,  125239296,  125239296,   602112,      129, 0xf7de3bc7
+0,  125841408,  125841408,   602112,      111, 0x1fbc3aa7
+0,  126443520,  126443520,   602112,      126, 0x657a3a5f
+0,  127045632,  127045632,   602112,      140, 0xdb6542ec
+0,  127647744,  127647744,   602112,      123, 0xfcb73863
+0,  128249856,  128249856,   602112,      138, 0xad7e44b6
+0,  128851968,  128851968,   602112,      119, 0x46c93cc5
+0,  129454080,  129454080,   602112,      123, 0xfe8b3867
+0,  130056192,  130056192,   602112,      144, 0xa26b4544
+0,  130658304,  130658304,   602112,      129, 0xf7de3bc7
+0,  131260416,  131260416,   602112,      111, 0x1fbc3aa7
+0,  131862528,  131862528,   602112,      126, 0x657a3a5f
+0,  132464640,  132464640,   602112,      140, 0xdb6542ec
+0,  133066752,  133066752,   602112,      123, 0xfcb73863
+0,  133668864,  133668864,   602112,      138, 0xad7e44b6
+0,  134270976,  134270976,   602112,      119, 0x46c93cc5
+0,  134873088,  134873088,   602112,      123, 0xfe8b3867
+0,  135475200,  135475200,   602112,      144, 0xa26b4544
+0,  136077312,  136077312,   602112,      129, 0xf7de3bc7
+0,  136679424,  136679424,   602112,      111, 0x1fbc3aa7
+0,  137281536,  137281536,   602112,      126, 0x657a3a5f
+0,  137883648,  137883648,   602112,      140, 0xdb6542ec
+0,  138485760,  138485760,   602112,      123, 0xfcb73863
+0,  139087872,  139087872,   602112,      138, 0xad7e44b6
+0,  139689984,  139689984,   602112,      119, 0x488d3cc9
+0,  140292096,  140292096,   602112,      123, 0xfe8b3867

diff --git a/tests/ref/fate/adts-id3v2-two-tags-demux b/tests/ref/fate/adts-id3v2-two-tags-demux
new file mode 100644
index 0000000..4fffd2e
--- /dev/null
+++ b/tests/ref/fate/adts-id3v2-two-tags-demux

@@ -0,0 +1,475 @@
+#tb 0: 1/28224000
+#media_type 0: audio
+#codec_id 0: aac
+#sample_rate 0: 48000
+#channel_layout 0: 4
+#channel_layout_name 0: mono
+0,          0,          0,   602112,      128, 0x23291993
+0,     602112,     602112,   602112,      128, 0x23291993
+0,    1204224,    1204224,   602112,      128, 0x23291993
+0,    1806336,    1806336,   602112,      128, 0x23291993
+0,    2408448,    2408448,   602112,      128, 0x23291993
+0,    3010560,    3010560,   602112,      128, 0x23291993
+0,    3612672,    3612672,   602112,      128, 0x23291993
+0,    4214784,    4214784,   602112,      128, 0x23291993
+0,    4816896,    4816896,   602112,      128, 0x23291993
+0,    5419008,    5419008,   602112,      128, 0x23291993
+0,    6021120,    6021120,   602112,      128, 0x23291993
+0,    6623232,    6623232,   602112,      128, 0x23291993
+0,    7225344,    7225344,   602112,      128, 0x23291993
+0,    7827456,    7827456,   602112,      128, 0x23291993
+0,    8429568,    8429568,   602112,      128, 0x23291993
+0,    9031680,    9031680,   602112,      128, 0x23291993
+0,    9633792,    9633792,   602112,      128, 0x23291993
+0,   10235904,   10235904,   602112,      128, 0x23291993
+0,   10838016,   10838016,   602112,      128, 0x23291993
+0,   11440128,   11440128,   602112,      128, 0x23291993
+0,   12042240,   12042240,   602112,      128, 0x23291993
+0,   12644352,   12644352,   602112,      128, 0x23291993
+0,   13246464,   13246464,   602112,      128, 0x23291993
+0,   13848576,   13848576,   602112,      128, 0x23291993
+0,   14450688,   14450688,   602112,      128, 0x23291993
+0,   15052800,   15052800,   602112,      128, 0x23291993
+0,   15654912,   15654912,   602112,      128, 0x23291993
+0,   16257024,   16257024,   602112,      128, 0x23291993
+0,   16859136,   16859136,   602112,      128, 0x23291993
+0,   17461248,   17461248,   602112,      128, 0x23291993
+0,   18063360,   18063360,   602112,      128, 0x23291993
+0,   18665472,   18665472,   602112,      128, 0x23291993
+0,   19267584,   19267584,   602112,      128, 0x23291993
+0,   19869696,   19869696,   602112,      128, 0x23291993
+0,   20471808,   20471808,   602112,      128, 0x23291993
+0,   21073920,   21073920,   602112,      128, 0x23291993
+0,   21676032,   21676032,   602112,      128, 0x23291993
+0,   22278144,   22278144,   602112,      128, 0x23291993
+0,   22880256,   22880256,   602112,      128, 0x23291993
+0,   23482368,   23482368,   602112,      128, 0x23291993
+0,   24084480,   24084480,   602112,      128, 0x23291993
+0,   24686592,   24686592,   602112,      128, 0x23291993
+0,   25288704,   25288704,   602112,      128, 0x23291993
+0,   25890816,   25890816,   602112,      128, 0x23291993
+0,   26492928,   26492928,   602112,      128, 0x23291993
+0,   27095040,   27095040,   602112,      128, 0x23291993
+0,   27697152,   27697152,   602112,      128, 0x23291993
+0,   28299264,   28299264,   602112,      128, 0x23291993
+0,   28901376,   28901376,   602112,      128, 0x23291993
+0,   29503488,   29503488,   602112,      128, 0x23291993
+0,   30105600,   30105600,   602112,      128, 0x23291993
+0,   30707712,   30707712,   602112,      128, 0x23291993
+0,   31309824,   31309824,   602112,      128, 0x23291993
+0,   31911936,   31911936,   602112,      128, 0x23291993
+0,   32514048,   32514048,   602112,      128, 0x23291993
+0,   33116160,   33116160,   602112,      128, 0x23291993
+0,   33718272,   33718272,   602112,      128, 0x23291993
+0,   34320384,   34320384,   602112,      128, 0x23291993
+0,   34922496,   34922496,   602112,      128, 0x23291993
+0,   35524608,   35524608,   602112,      128, 0x23291993
+0,   36126720,   36126720,   602112,      128, 0x23291993
+0,   36728832,   36728832,   602112,      128, 0x23291993
+0,   37330944,   37330944,   602112,      128, 0x23291993
+0,   37933056,   37933056,   602112,      128, 0x23291993
+0,   38535168,   38535168,   602112,      128, 0x23291993
+0,   39137280,   39137280,   602112,      128, 0x23291993
+0,   39739392,   39739392,   602112,      128, 0x23291993
+0,   40341504,   40341504,   602112,      128, 0x23291993
+0,   40943616,   40943616,   602112,      128, 0x23291993
+0,   41545728,   41545728,   602112,      128, 0x23291993
+0,   42147840,   42147840,   602112,      128, 0x23291993
+0,   42749952,   42749952,   602112,      128, 0x23291993
+0,   43352064,   43352064,   602112,      128, 0x23291993
+0,   43954176,   43954176,   602112,      128, 0x23291993
+0,   44556288,   44556288,   602112,      128, 0x23291993
+0,   45158400,   45158400,   602112,      128, 0x23291993
+0,   45760512,   45760512,   602112,      128, 0x23291993
+0,   46362624,   46362624,   602112,      128, 0x23291993
+0,   46964736,   46964736,   602112,      128, 0x23291993
+0,   47566848,   47566848,   602112,      128, 0x23291993
+0,   48168960,   48168960,   602112,      128, 0x23291993
+0,   48771072,   48771072,   602112,      128, 0x23291993
+0,   49373184,   49373184,   602112,      128, 0x23291993
+0,   49975296,   49975296,   602112,      128, 0x23291993
+0,   50577408,   50577408,   602112,      128, 0x23291993
+0,   51179520,   51179520,   602112,      128, 0x23291993
+0,   51781632,   51781632,   602112,      128, 0x23291993
+0,   52383744,   52383744,   602112,      128, 0x23291993
+0,   52985856,   52985856,   602112,      128, 0x23291993
+0,   53587968,   53587968,   602112,      128, 0x23291993
+0,   54190080,   54190080,   602112,      128, 0x23291993
+0,   54792192,   54792192,   602112,      128, 0x23291993
+0,   55394304,   55394304,   602112,      128, 0x23291993
+0,   55996416,   55996416,   602112,      128, 0x23291993
+0,   56598528,   56598528,   602112,      128, 0x23291993
+0,   57200640,   57200640,   602112,      128, 0x23291993
+0,   57802752,   57802752,   602112,      128, 0x23291993
+0,   58404864,   58404864,   602112,      128, 0x23291993
+0,   59006976,   59006976,   602112,      128, 0x23291993
+0,   59609088,   59609088,   602112,      128, 0x23291993
+0,   60211200,   60211200,   602112,      128, 0x23291993
+0,   60813312,   60813312,   602112,      128, 0x23291993
+0,   61415424,   61415424,   602112,      128, 0x23291993
+0,   62017536,   62017536,   602112,      128, 0x23291993
+0,   62619648,   62619648,   602112,      128, 0x23291993
+0,   63221760,   63221760,   602112,      128, 0x23291993
+0,   63823872,   63823872,   602112,      128, 0x23291993
+0,   64425984,   64425984,   602112,      128, 0x23291993
+0,   65028096,   65028096,   602112,      128, 0x23291993
+0,   65630208,   65630208,   602112,      128, 0x23291993
+0,   66232320,   66232320,   602112,      128, 0x23291993
+0,   66834432,   66834432,   602112,      128, 0x23291993
+0,   67436544,   67436544,   602112,      128, 0x23291993
+0,   68038656,   68038656,   602112,      128, 0x23291993
+0,   68640768,   68640768,   602112,      128, 0x23291993
+0,   69242880,   69242880,   602112,      128, 0x23291993
+0,   69844992,   69844992,   602112,      128, 0x23291993
+0,   70447104,   70447104,   602112,      128, 0x23291993
+0,   71049216,   71049216,   602112,      128, 0x23291993
+0,   71651328,   71651328,   602112,      128, 0x23291993
+0,   72253440,   72253440,   602112,      128, 0x23291993
+0,   72855552,   72855552,   602112,      128, 0x23291993
+0,   73457664,   73457664,   602112,      128, 0x23291993
+0,   74059776,   74059776,   602112,      128, 0x23291993
+0,   74661888,   74661888,   602112,      128, 0x23291993
+0,   75264000,   75264000,   602112,      128, 0x23291993
+0,   75866112,   75866112,   602112,      128, 0x23291993
+0,   76468224,   76468224,   602112,      128, 0x23291993
+0,   77070336,   77070336,   602112,      128, 0x23291993
+0,   77672448,   77672448,   602112,      128, 0x23291993
+0,   78274560,   78274560,   602112,      128, 0x23291993
+0,   78876672,   78876672,   602112,      128, 0x23291993
+0,   79478784,   79478784,   602112,      128, 0x23291993
+0,   80080896,   80080896,   602112,      128, 0x23291993
+0,   80683008,   80683008,   602112,      128, 0x23291993
+0,   81285120,   81285120,   602112,      128, 0x23291993
+0,   81887232,   81887232,   602112,      128, 0x23291993
+0,   82489344,   82489344,   602112,      128, 0x23291993
+0,   83091456,   83091456,   602112,      128, 0x23291993
+0,   83693568,   83693568,   602112,      128, 0x23291993
+0,   84295680,   84295680,   602112,      128, 0x23291993
+0,   84897792,   84897792,   602112,      128, 0x23291993
+0,   85499904,   85499904,   602112,      128, 0x23291993
+0,   86102016,   86102016,   602112,      128, 0x23291993
+0,   86704128,   86704128,   602112,      128, 0x23291993
+0,   87306240,   87306240,   602112,      128, 0x23291993
+0,   87908352,   87908352,   602112,      128, 0x23291993
+0,   88510464,   88510464,   602112,      128, 0x23291993
+0,   89112576,   89112576,   602112,      128, 0x23291993
+0,   89714688,   89714688,   602112,      128, 0x23291993
+0,   90316800,   90316800,   602112,      128, 0x23291993
+0,   90918912,   90918912,   602112,      128, 0x23291993
+0,   91521024,   91521024,   602112,      128, 0x23291993
+0,   92123136,   92123136,   602112,      128, 0x23291993
+0,   92725248,   92725248,   602112,      128, 0x23291993
+0,   93327360,   93327360,   602112,      128, 0x23291993
+0,   93929472,   93929472,   602112,      128, 0x23291993
+0,   94531584,   94531584,   602112,      128, 0x23291993
+0,   95133696,   95133696,   602112,      128, 0x23291993
+0,   95735808,   95735808,   602112,      128, 0x23291993
+0,   96337920,   96337920,   602112,      128, 0x23291993
+0,   96940032,   96940032,   602112,      128, 0x23291993
+0,   97542144,   97542144,   602112,      128, 0x23291993
+0,   98144256,   98144256,   602112,      128, 0x23291993
+0,   98746368,   98746368,   602112,      128, 0x23291993
+0,   99348480,   99348480,   602112,      128, 0x23291993
+0,   99950592,   99950592,   602112,      128, 0x23291993
+0,  100552704,  100552704,   602112,      128, 0x23291993
+0,  101154816,  101154816,   602112,      128, 0x23291993
+0,  101756928,  101756928,   602112,      128, 0x23291993
+0,  102359040,  102359040,   602112,      128, 0x23291993
+0,  102961152,  102961152,   602112,      128, 0x23291993
+0,  103563264,  103563264,   602112,      128, 0x23291993
+0,  104165376,  104165376,   602112,      128, 0x23291993
+0,  104767488,  104767488,   602112,      128, 0x23291993
+0,  105369600,  105369600,   602112,      128, 0x23291993
+0,  105971712,  105971712,   602112,      128, 0x23291993
+0,  106573824,  106573824,   602112,      128, 0x23291993
+0,  107175936,  107175936,   602112,      128, 0x23291993
+0,  107778048,  107778048,   602112,      128, 0x23291993
+0,  108380160,  108380160,   602112,      128, 0x23291993
+0,  108982272,  108982272,   602112,      128, 0x23291993
+0,  109584384,  109584384,   602112,      128, 0x23291993
+0,  110186496,  110186496,   602112,      128, 0x23291993
+0,  110788608,  110788608,   602112,      128, 0x23291993
+0,  111390720,  111390720,   602112,      128, 0x23291993
+0,  111992832,  111992832,   602112,      128, 0x23291993
+0,  112594944,  112594944,   602112,      128, 0x23291993
+0,  113197056,  113197056,   602112,      128, 0x23291993
+0,  113799168,  113799168,   602112,      128, 0x23291993
+0,  114401280,  114401280,   602112,      128, 0x23291993
+0,  115003392,  115003392,   602112,      128, 0x23291993
+0,  115605504,  115605504,   602112,      128, 0x23291993
+0,  116207616,  116207616,   602112,      128, 0x23291993
+0,  116809728,  116809728,   602112,      128, 0x23291993
+0,  117411840,  117411840,   602112,      128, 0x23291993
+0,  118013952,  118013952,   602112,      128, 0x23291993
+0,  118616064,  118616064,   602112,      128, 0x23291993
+0,  119218176,  119218176,   602112,      128, 0x23291993
+0,  119820288,  119820288,   602112,      128, 0x23291993
+0,  120422400,  120422400,   602112,      128, 0x23291993
+0,  121024512,  121024512,   602112,      128, 0x23291993
+0,  121626624,  121626624,   602112,      128, 0x23291993
+0,  122228736,  122228736,   602112,      128, 0x23291993
+0,  122830848,  122830848,   602112,      128, 0x23291993
+0,  123432960,  123432960,   602112,      128, 0x23291993
+0,  124035072,  124035072,   602112,      128, 0x23291993
+0,  124637184,  124637184,   602112,      128, 0x23291993
+0,  125239296,  125239296,   602112,      128, 0x23291993
+0,  125841408,  125841408,   602112,      128, 0x23291993
+0,  126443520,  126443520,   602112,      128, 0x23291993
+0,  127045632,  127045632,   602112,      128, 0x23291993
+0,  127647744,  127647744,   602112,      128, 0x23291993
+0,  128249856,  128249856,   602112,      128, 0x23291993
+0,  128851968,  128851968,   602112,      128, 0x23291993
+0,  129454080,  129454080,   602112,      128, 0x23291993
+0,  130056192,  130056192,   602112,      128, 0x23291993
+0,  130658304,  130658304,   602112,      128, 0x23291993
+0,  131260416,  131260416,   602112,      128, 0x23291993
+0,  131862528,  131862528,   602112,      128, 0x23291993
+0,  132464640,  132464640,   602112,      128, 0x23291993
+0,  133066752,  133066752,   602112,      128, 0x23291993
+0,  133668864,  133668864,   602112,      128, 0x23291993
+0,  134270976,  134270976,   602112,      128, 0x23291993
+0,  134873088,  134873088,   602112,      128, 0x23291993
+0,  135475200,  135475200,   602112,      128, 0x23291993
+0,  136077312,  136077312,   602112,      128, 0x23291993
+0,  136679424,  136679424,   602112,      128, 0x23291993
+0,  137281536,  137281536,   602112,      128, 0x23291993
+0,  137883648,  137883648,   602112,      128, 0x23291993
+0,  138485760,  138485760,   602112,      128, 0x23291993
+0,  139087872,  139087872,   602112,      128, 0x23291993
+0,  139689984,  139689984,   602112,      128, 0x23291993
+0,  140292096,  140292096,   602112,      128, 0x23291993
+0,  140894208,  140894208,   602112,      128, 0x23291993
+0,  141496320,  141496320,   602112,      128, 0x23291993
+0,  142098432,  142098432,   602112,      128, 0x23291993
+0,  142700544,  142700544,   602112,      128, 0x23291993
+0,  143302656,  143302656,   602112,      128, 0x23291993
+0,  143904768,  143904768,   602112,      128, 0x23291993
+0,  144506880,  144506880,   602112,      128, 0x23291993
+0,  145108992,  145108992,   602112,      128, 0x23291993
+0,  145711104,  145711104,   602112,      128, 0x23291993
+0,  146313216,  146313216,   602112,      128, 0x23291993
+0,  146915328,  146915328,   602112,      128, 0x23291993
+0,  147517440,  147517440,   602112,      128, 0x23291993
+0,  148119552,  148119552,   602112,      128, 0x23291993
+0,  148721664,  148721664,   602112,      128, 0x23291993
+0,  149323776,  149323776,   602112,      128, 0x23291993
+0,  149925888,  149925888,   602112,      128, 0x23291993
+0,  150528000,  150528000,   602112,      128, 0x23291993
+0,  151130112,  151130112,   602112,      128, 0x23291993
+0,  151732224,  151732224,   602112,      128, 0x23291993
+0,  152334336,  152334336,   602112,      128, 0x23291993
+0,  152936448,  152936448,   602112,      128, 0x23291993
+0,  153538560,  153538560,   602112,      128, 0x23291993
+0,  154140672,  154140672,   602112,      128, 0x23291993
+0,  154742784,  154742784,   602112,      128, 0x23291993
+0,  155344896,  155344896,   602112,      128, 0x23291993
+0,  155947008,  155947008,   602112,      128, 0x23291993
+0,  156549120,  156549120,   602112,      128, 0x23291993
+0,  157151232,  157151232,   602112,      128, 0x23291993
+0,  157753344,  157753344,   602112,      128, 0x23291993
+0,  158355456,  158355456,   602112,      128, 0x23291993
+0,  158957568,  158957568,   602112,      128, 0x23291993
+0,  159559680,  159559680,   602112,      128, 0x23291993
+0,  160161792,  160161792,   602112,      128, 0x23291993
+0,  160763904,  160763904,   602112,      128, 0x23291993
+0,  161366016,  161366016,   602112,      128, 0x23291993
+0,  161968128,  161968128,   602112,      128, 0x23291993
+0,  162570240,  162570240,   602112,      128, 0x23291993
+0,  163172352,  163172352,   602112,      128, 0x23291993
+0,  163774464,  163774464,   602112,      128, 0x23291993
+0,  164376576,  164376576,   602112,      128, 0x23291993
+0,  164978688,  164978688,   602112,      128, 0x23291993
+0,  165580800,  165580800,   602112,      128, 0x23291993
+0,  166182912,  166182912,   602112,      128, 0x23291993
+0,  166785024,  166785024,   602112,      128, 0x23291993
+0,  167387136,  167387136,   602112,      128, 0x23291993
+0,  167989248,  167989248,   602112,      128, 0x23291993
+0,  168591360,  168591360,   602112,      128, 0x23291993
+0,  169193472,  169193472,   602112,      128, 0x23291993
+0,  169795584,  169795584,   602112,      128, 0x23291993
+0,  170397696,  170397696,   602112,      128, 0x23291993
+0,  170999808,  170999808,   602112,      128, 0x23291993
+0,  171601920,  171601920,   602112,      128, 0x23291993
+0,  172204032,  172204032,   602112,      128, 0x23291993
+0,  172806144,  172806144,   602112,      128, 0x23291993
+0,  173408256,  173408256,   602112,      128, 0x23291993
+0,  174010368,  174010368,   602112,      128, 0x23291993
+0,  174612480,  174612480,   602112,      128, 0x23291993
+0,  175214592,  175214592,   602112,      128, 0x23291993
+0,  175816704,  175816704,   602112,      128, 0x23291993
+0,  176418816,  176418816,   602112,      128, 0x23291993
+0,  177020928,  177020928,   602112,      128, 0x23291993
+0,  177623040,  177623040,   602112,      128, 0x23291993
+0,  178225152,  178225152,   602112,      128, 0x23291993
+0,  178827264,  178827264,   602112,      128, 0x23291993
+0,  179429376,  179429376,   602112,      128, 0x23291993
+0,  180031488,  180031488,   602112,      128, 0x23291993
+0,  180633600,  180633600,   602112,      128, 0x23291993
+0,  181235712,  181235712,   602112,      128, 0x23291993
+0,  181837824,  181837824,   602112,      128, 0x23291993
+0,  182439936,  182439936,   602112,      128, 0x23291993
+0,  183042048,  183042048,   602112,      128, 0x23291993
+0,  183644160,  183644160,   602112,      128, 0x23291993
+0,  184246272,  184246272,   602112,      128, 0x23291993
+0,  184848384,  184848384,   602112,      128, 0x23291993
+0,  185450496,  185450496,   602112,      128, 0x23291993
+0,  186052608,  186052608,   602112,      128, 0x23291993
+0,  186654720,  186654720,   602112,      128, 0x23291993
+0,  187256832,  187256832,   602112,      128, 0x23291993
+0,  187858944,  187858944,   602112,      128, 0x23291993
+0,  188461056,  188461056,   602112,      128, 0x23291993
+0,  189063168,  189063168,   602112,      128, 0x23291993
+0,  189665280,  189665280,   602112,      128, 0x23291993
+0,  190267392,  190267392,   602112,      128, 0x23291993
+0,  190869504,  190869504,   602112,      128, 0x23291993
+0,  191471616,  191471616,   602112,      128, 0x23291993
+0,  192073728,  192073728,   602112,      128, 0x23291993
+0,  192675840,  192675840,   602112,      128, 0x23291993
+0,  193277952,  193277952,   602112,      128, 0x23291993
+0,  193880064,  193880064,   602112,      128, 0x23291993
+0,  194482176,  194482176,   602112,      128, 0x23291993
+0,  195084288,  195084288,   602112,      128, 0x23291993
+0,  195686400,  195686400,   602112,      128, 0x23291993
+0,  196288512,  196288512,   602112,      128, 0x23291993
+0,  196890624,  196890624,   602112,      128, 0x23291993
+0,  197492736,  197492736,   602112,      128, 0x23291993
+0,  198094848,  198094848,   602112,      128, 0x23291993
+0,  198696960,  198696960,   602112,      128, 0x23291993
+0,  199299072,  199299072,   602112,      128, 0x23291993
+0,  199901184,  199901184,   602112,      128, 0x23291993
+0,  200503296,  200503296,   602112,      128, 0x23291993
+0,  201105408,  201105408,   602112,      128, 0x23291993
+0,  201707520,  201707520,   602112,      128, 0x23291993
+0,  202309632,  202309632,   602112,      128, 0x23291993
+0,  202911744,  202911744,   602112,      128, 0x23291993
+0,  203513856,  203513856,   602112,      128, 0x23291993
+0,  204115968,  204115968,   602112,      128, 0x23291993
+0,  204718080,  204718080,   602112,      128, 0x23291993
+0,  205320192,  205320192,   602112,      128, 0x23291993
+0,  205922304,  205922304,   602112,      128, 0x23291993
+0,  206524416,  206524416,   602112,      128, 0x23291993
+0,  207126528,  207126528,   602112,      128, 0x23291993
+0,  207728640,  207728640,   602112,      128, 0x23291993
+0,  208330752,  208330752,   602112,      128, 0x23291993
+0,  208932864,  208932864,   602112,      128, 0x23291993
+0,  209534976,  209534976,   602112,      128, 0x23291993
+0,  210137088,  210137088,   602112,      128, 0x23291993
+0,  210739200,  210739200,   602112,      128, 0x23291993
+0,  211341312,  211341312,   602112,      128, 0x23291993
+0,  211943424,  211943424,   602112,      128, 0x23291993
+0,  212545536,  212545536,   602112,      128, 0x23291993
+0,  213147648,  213147648,   602112,      128, 0x23291993
+0,  213749760,  213749760,   602112,      128, 0x23291993
+0,  214351872,  214351872,   602112,      128, 0x23291993
+0,  214953984,  214953984,   602112,      128, 0x23291993
+0,  215556096,  215556096,   602112,      128, 0x23291993
+0,  216158208,  216158208,   602112,      128, 0x23291993
+0,  216760320,  216760320,   602112,      128, 0x23291993
+0,  217362432,  217362432,   602112,      128, 0x23291993
+0,  217964544,  217964544,   602112,      128, 0x23291993
+0,  218566656,  218566656,   602112,      128, 0x23291993
+0,  219168768,  219168768,   602112,      128, 0x23291993
+0,  219770880,  219770880,   602112,      128, 0x23291993
+0,  220372992,  220372992,   602112,      128, 0x23291993
+0,  220975104,  220975104,   602112,      128, 0x23291993
+0,  221577216,  221577216,   602112,      128, 0x23291993
+0,  222179328,  222179328,   602112,      128, 0x23291993
+0,  222781440,  222781440,   602112,      128, 0x23291993
+0,  223383552,  223383552,   602112,      128, 0x23291993
+0,  223985664,  223985664,   602112,      128, 0x23291993
+0,  224587776,  224587776,   602112,      128, 0x23291993
+0,  225189888,  225189888,   602112,      128, 0x23291993
+0,  225792000,  225792000,   602112,      128, 0x23291993
+0,  226394112,  226394112,   602112,      128, 0x23291993
+0,  226996224,  226996224,   602112,      128, 0x23291993
+0,  227598336,  227598336,   602112,      128, 0x23291993
+0,  228200448,  228200448,   602112,      128, 0x23291993
+0,  228802560,  228802560,   602112,      128, 0x23291993
+0,  229404672,  229404672,   602112,      128, 0x23291993
+0,  230006784,  230006784,   602112,      128, 0x23291993
+0,  230608896,  230608896,   602112,      128, 0x23291993
+0,  231211008,  231211008,   602112,      128, 0x23291993
+0,  231813120,  231813120,   602112,      128, 0x23291993
+0,  232415232,  232415232,   602112,      128, 0x23291993
+0,  233017344,  233017344,   602112,      128, 0x23291993
+0,  233619456,  233619456,   602112,      128, 0x23291993
+0,  234221568,  234221568,   602112,      128, 0x23291993
+0,  234823680,  234823680,   602112,      128, 0x23291993
+0,  235425792,  235425792,   602112,      128, 0x23291993
+0,  236027904,  236027904,   602112,      128, 0x23291993
+0,  236630016,  236630016,   602112,      128, 0x23291993
+0,  237232128,  237232128,   602112,      128, 0x23291993
+0,  237834240,  237834240,   602112,      128, 0x23291993
+0,  238436352,  238436352,   602112,      128, 0x23291993
+0,  239038464,  239038464,   602112,      128, 0x23291993
+0,  239640576,  239640576,   602112,      128, 0x23291993
+0,  240242688,  240242688,   602112,      128, 0x23291993
+0,  240844800,  240844800,   602112,      128, 0x23291993
+0,  241446912,  241446912,   602112,      128, 0x23291993
+0,  242049024,  242049024,   602112,      128, 0x23291993
+0,  242651136,  242651136,   602112,      128, 0x23291993
+0,  243253248,  243253248,   602112,      128, 0x23291993
+0,  243855360,  243855360,   602112,      128, 0x23291993
+0,  244457472,  244457472,   602112,      128, 0x23291993
+0,  245059584,  245059584,   602112,      128, 0x23291993
+0,  245661696,  245661696,   602112,      128, 0x23291993
+0,  246263808,  246263808,   602112,      128, 0x23291993
+0,  246865920,  246865920,   602112,      128, 0x23291993
+0,  247468032,  247468032,   602112,      128, 0x23291993
+0,  248070144,  248070144,   602112,      128, 0x23291993
+0,  248672256,  248672256,   602112,      128, 0x23291993
+0,  249274368,  249274368,   602112,      128, 0x23291993
+0,  249876480,  249876480,   602112,      128, 0x23291993
+0,  250478592,  250478592,   602112,      128, 0x23291993
+0,  251080704,  251080704,   602112,      128, 0x23291993
+0,  251682816,  251682816,   602112,      128, 0x23291993
+0,  252284928,  252284928,   602112,      128, 0x23291993
+0,  252887040,  252887040,   602112,      128, 0x23291993
+0,  253489152,  253489152,   602112,      128, 0x23291993
+0,  254091264,  254091264,   602112,      128, 0x23291993
+0,  254693376,  254693376,   602112,      128, 0x23291993
+0,  255295488,  255295488,   602112,      128, 0x23291993
+0,  255897600,  255897600,   602112,      128, 0x23291993
+0,  256499712,  256499712,   602112,      128, 0x23291993
+0,  257101824,  257101824,   602112,      128, 0x23291993
+0,  257703936,  257703936,   602112,      128, 0x23291993
+0,  258306048,  258306048,   602112,      128, 0x23291993
+0,  258908160,  258908160,   602112,      128, 0x23291993
+0,  259510272,  259510272,   602112,      128, 0x23291993
+0,  260112384,  260112384,   602112,      128, 0x23291993
+0,  260714496,  260714496,   602112,      128, 0x23291993
+0,  261316608,  261316608,   602112,      128, 0x23291993
+0,  261918720,  261918720,   602112,      128, 0x23291993
+0,  262520832,  262520832,   602112,      128, 0x23291993
+0,  263122944,  263122944,   602112,      128, 0x23291993
+0,  263725056,  263725056,   602112,      128, 0x23291993
+0,  264327168,  264327168,   602112,      128, 0x23291993
+0,  264929280,  264929280,   602112,      128, 0x23291993
+0,  265531392,  265531392,   602112,      128, 0x23291993
+0,  266133504,  266133504,   602112,      128, 0x23291993
+0,  266735616,  266735616,   602112,      128, 0x23291993
+0,  267337728,  267337728,   602112,      128, 0x23291993
+0,  267939840,  267939840,   602112,      128, 0x23291993
+0,  268541952,  268541952,   602112,      128, 0x23291993
+0,  269144064,  269144064,   602112,      128, 0x23291993
+0,  269746176,  269746176,   602112,      128, 0x23291993
+0,  270348288,  270348288,   602112,      128, 0x23291993
+0,  270950400,  270950400,   602112,      128, 0x23291993
+0,  271552512,  271552512,   602112,      128, 0x23291993
+0,  272154624,  272154624,   602112,      128, 0x23291993
+0,  272756736,  272756736,   602112,      128, 0x23291993
+0,  273358848,  273358848,   602112,      128, 0x23291993
+0,  273960960,  273960960,   602112,      128, 0x23291993
+0,  274563072,  274563072,   602112,      128, 0x23291993
+0,  275165184,  275165184,   602112,      128, 0x23291993
+0,  275767296,  275767296,   602112,      128, 0x23291993
+0,  276369408,  276369408,   602112,      128, 0x23291993
+0,  276971520,  276971520,   602112,      128, 0x23291993
+0,  277573632,  277573632,   602112,      128, 0x23291993
+0,  278175744,  278175744,   602112,      128, 0x23291993
+0,  278777856,  278777856,   602112,      128, 0x23291993
+0,  279379968,  279379968,   602112,      128, 0x23291993
+0,  279982080,  279982080,   602112,      128, 0x23291993
+0,  280584192,  280584192,   602112,      128, 0x23291993
+0,  281186304,  281186304,   602112,      128, 0x23291993
+0,  281788416,  281788416,   602112,      128, 0x23291993

diff --git a/tests/ref/fate/adtstoasc_ticket3715 b/tests/ref/fate/adtstoasc_ticket3715
index 96795a2..c5f03e4 100644
--- a/tests/ref/fate/adtstoasc_ticket3715
+++ b/tests/ref/fate/adtstoasc_ticket3715

@@ -92,4 +92,4 @@
 0,      83968,      83968,     1024,      465, 0xeb3ce0af
 0,      84992,      84992,     1024,      326, 0x7be4a667
 0,      86016,      86016,     1024,      339, 0x2cf4a71f
-0,      87040,      87040,     1028,      258, 0xd4007ad4
+0,      87040,      87040,     1024,      258, 0xd4007ad4

diff --git a/tests/ref/fate/aic b/tests/ref/fate/aic
index 1f50350..244ea25 100644
--- a/tests/ref/fate/aic
+++ b/tests/ref/fate/aic

@@ -3,18 +3,18 @@
 #codec_id 0: rawvideo
 #dimensions 0: 1440x1080
 #sar 0: 4/3
-0,          0,          0,        1,  2332800, 0xd941b42f
-0,          1,          1,        1,  2332800, 0xd941b42f
-0,          2,          2,        1,  2332800, 0xae0f5983
-0,          3,          3,        1,  2332800, 0x51cfc127
-0,          4,          4,        1,  2332800, 0x24d40447
-0,          5,          5,        1,  2332800, 0x858a9f51
-0,          6,          6,        1,  2332800, 0x533b48e8
-0,          7,          7,        1,  2332800, 0x2fd73267
-0,          8,          8,        1,  2332800, 0x153566c7
-0,          9,          9,        1,  2332800, 0xa1c49c45
-0,         10,         10,        1,  2332800, 0xb966e25a
-0,         11,         11,        1,  2332800, 0xd0ce5985
-0,         12,         12,        1,  2332800, 0x0029a52e
-0,         13,         13,        1,  2332800, 0x893116c5
-0,         14,         14,        1,  2332800, 0x073d2491
+0,          0,          0,        1,  2332800, 0xc22b8485
+0,          1,          1,        1,  2332800, 0xc22b8485
+0,          2,          2,        1,  2332800, 0xe0c21bd8
+0,          3,          3,        1,  2332800, 0x3e1a8fa0
+0,          4,          4,        1,  2332800, 0xbcb3f235
+0,          5,          5,        1,  2332800, 0x1a7cabd6
+0,          6,          6,        1,  2332800, 0xc0136ba8
+0,          7,          7,        1,  2332800, 0x295e59a6
+0,          8,          8,        1,  2332800, 0xf9c09288
+0,          9,          9,        1,  2332800, 0x0518cc8f
+0,         10,         10,        1,  2332800, 0x9ad3068e
+0,         11,         11,        1,  2332800, 0x5a8b7af1
+0,         12,         12,        1,  2332800, 0x7b35a8fa
+0,         13,         13,        1,  2332800, 0xbe5801eb
+0,         14,         14,        1,  2332800, 0x31ca019f

diff --git a/tests/ref/fate/aic-oddsize b/tests/ref/fate/aic-oddsize
index 3763e32..be4346a 100644
--- a/tests/ref/fate/aic-oddsize
+++ b/tests/ref/fate/aic-oddsize

@@ -3,60 +3,60 @@
 #codec_id 0: rawvideo
 #dimensions 0: 481x241
 #sar 0: 0/1
-0,          0,          0,        1,   174243, 0xa40491e1
-0,          1,          1,        1,   174243, 0xa12cbb56
-0,          2,          2,        1,   174243, 0xa12cbb56
-0,          3,          3,        1,   174243, 0xa12cbb56
-0,          4,          4,        1,   174243, 0xa12cbb56
-0,          5,          5,        1,   174243, 0xa12cbb56
-0,          6,          6,        1,   174243, 0xa12cbb56
-0,          7,          7,        1,   174243, 0xa12cbb56
-0,          8,          8,        1,   174243, 0xa12cbb56
-0,          9,          9,        1,   174243, 0x4e7b7299
-0,         10,         10,        1,   174243, 0x31573b99
-0,         11,         11,        1,   174243, 0x013397b6
-0,         12,         12,        1,   174243, 0xdd988ab8
-0,         13,         13,        1,   174243, 0xd6d96b1e
-0,         14,         14,        1,   174243, 0xd6d96b1e
-0,         15,         15,        1,   174243, 0xd6d96b1e
-0,         16,         16,        1,   174243, 0x111627d3
-0,         17,         17,        1,   174243, 0x284d9ab7
-0,         18,         18,        1,   174243, 0xa348c492
-0,         19,         19,        1,   174243, 0xa348c492
-0,         20,         20,        1,   174243, 0xa348c492
-0,         21,         21,        1,   174243, 0xa348c492
-0,         22,         22,        1,   174243, 0x2d22c3b8
-0,         23,         23,        1,   174243, 0x2d22c3b8
-0,         24,         24,        1,   174243, 0x2d22c3b8
-0,         25,         25,        1,   174243, 0x2d22c3b8
-0,         26,         26,        1,   174243, 0xa6d7c890
-0,         27,         27,        1,   174243, 0x8068bfbb
-0,         28,         28,        1,   174243, 0x420ae647
-0,         29,         29,        1,   174243, 0xc5467756
-0,         30,         30,        1,   174243, 0x238a13dd
-0,         31,         31,        1,   174243, 0x5bab75dc
-0,         32,         32,        1,   174243, 0x14d7f61f
-0,         33,         33,        1,   174243, 0x2e1d334f
-0,         34,         34,        1,   174243, 0xeade7dc0
-0,         35,         35,        1,   174243, 0xeade7dc0
-0,         36,         36,        1,   174243, 0xeade7dc0
-0,         37,         37,        1,   174243, 0xeade7dc0
-0,         38,         38,        1,   174243, 0x088c7ef9
-0,         39,         39,        1,   174243, 0x70a3554e
-0,         40,         40,        1,   174243, 0x0753d1d4
-0,         41,         41,        1,   174243, 0x8266bd6d
-0,         42,         42,        1,   174243, 0x4ce3cda9
-0,         43,         43,        1,   174243, 0x4ce3cda9
-0,         44,         44,        1,   174243, 0x4ce3cda9
-0,         45,         45,        1,   174243, 0x4ce3cda9
-0,         46,         46,        1,   174243, 0xe5f7cd98
-0,         47,         47,        1,   174243, 0xe5f7cd98
-0,         48,         48,        1,   174243, 0xe5f7cd98
-0,         49,         49,        1,   174243, 0xe5f7cd98
-0,         50,         50,        1,   174243, 0x78c5cdb7
-0,         51,         51,        1,   174243, 0x78c5cdb7
-0,         52,         52,        1,   174243, 0x78c5cdb7
-0,         53,         53,        1,   174243, 0xce7ccd92
-0,         54,         54,        1,   174243, 0xce7ccd92
-0,         55,         55,        1,   174243, 0xce7ccd92
-0,         56,         56,        1,   174243, 0xce7ccd92
+0,          0,          0,        1,   174243, 0x15ab835b
+0,          1,          1,        1,   174243, 0x12d3acd0
+0,          2,          2,        1,   174243, 0x12d3acd0
+0,          3,          3,        1,   174243, 0x12d3acd0
+0,          4,          4,        1,   174243, 0x12d3acd0
+0,          5,          5,        1,   174243, 0x12d3acd0
+0,          6,          6,        1,   174243, 0x12d3acd0
+0,          7,          7,        1,   174243, 0x12d3acd0
+0,          8,          8,        1,   174243, 0x12d3acd0
+0,          9,          9,        1,   174243, 0xb21561b4
+0,         10,         10,        1,   174243, 0x49343b5b
+0,         11,         11,        1,   174243, 0x9bd65f49
+0,         12,         12,        1,   174243, 0xed3b9960
+0,         13,         13,        1,   174243, 0x14f70294
+0,         14,         14,        1,   174243, 0x14f70294
+0,         15,         15,        1,   174243, 0x14f70294
+0,         16,         16,        1,   174243, 0x92ac2316
+0,         17,         17,        1,   174243, 0x12729ac0
+0,         18,         18,        1,   174243, 0x6db0bfbd
+0,         19,         19,        1,   174243, 0x6db0bfbd
+0,         20,         20,        1,   174243, 0x6db0bfbd
+0,         21,         21,        1,   174243, 0x6db0bfbd
+0,         22,         22,        1,   174243, 0xd66cbef7
+0,         23,         23,        1,   174243, 0xd66cbef7
+0,         24,         24,        1,   174243, 0xd66cbef7
+0,         25,         25,        1,   174243, 0xd66cbef7
+0,         26,         26,        1,   174243, 0x3314c3d3
+0,         27,         27,        1,   174243, 0x34efb358
+0,         28,         28,        1,   174243, 0x7307d8c8
+0,         29,         29,        1,   174243, 0x80b57ea1
+0,         30,         30,        1,   174243, 0xb0b51bf9
+0,         31,         31,        1,   174243, 0x60c07516
+0,         32,         32,        1,   174243, 0xcc35033f
+0,         33,         33,        1,   174243, 0x2bf12dc4
+0,         34,         34,        1,   174243, 0xf3025eb7
+0,         35,         35,        1,   174243, 0xf3025eb7
+0,         36,         36,        1,   174243, 0xf3025eb7
+0,         37,         37,        1,   174243, 0xf3025eb7
+0,         38,         38,        1,   174243, 0x06e761b3
+0,         39,         39,        1,   174243, 0xa21152fb
+0,         40,         40,        1,   174243, 0xd6dcc575
+0,         41,         41,        1,   174243, 0xd6ceb82a
+0,         42,         42,        1,   174243, 0x20efc206
+0,         43,         43,        1,   174243, 0x20efc206
+0,         44,         44,        1,   174243, 0x20efc206
+0,         45,         45,        1,   174243, 0x20efc206
+0,         46,         46,        1,   174243, 0xba03c1f5
+0,         47,         47,        1,   174243, 0xba03c1f5
+0,         48,         48,        1,   174243, 0xba03c1f5
+0,         49,         49,        1,   174243, 0xba03c1f5
+0,         50,         50,        1,   174243, 0x4cd1c214
+0,         51,         51,        1,   174243, 0x4cd1c214
+0,         52,         52,        1,   174243, 0x4cd1c214
+0,         53,         53,        1,   174243, 0xa288c1ef
+0,         54,         54,        1,   174243, 0xa288c1ef
+0,         55,         55,        1,   174243, 0xa288c1ef
+0,         56,         56,        1,   174243, 0xa288c1ef

diff --git a/tests/ref/fate/api-mjpeg-codec-param b/tests/ref/fate/api-mjpeg-codec-param
index 02f656d..290f941 100644
--- a/tests/ref/fate/api-mjpeg-codec-param
+++ b/tests/ref/fate/api-mjpeg-codec-param

@@ -3,7 +3,6 @@
     ab=0
     bt=4000000
     flags=0x00000000
-    me_method=5
     time_base=0/1
     g=12
     ar=0
@@ -18,7 +17,6 @@
     qdiff=3
     bf=0
     b_qfactor=1.250000
-    rc_strategy=0
     b_strategy=0
     ps=0
     mv_bits=0
@@ -38,18 +36,12 @@
     has_b_frames=0
     block_align=0
     mpeg_quant=0
-    qsquish=0.000000
-    rc_qmod_amp=0.000000
-    rc_qmod_freq=0
     rc_override_count=0
-    rc_eq=
     maxrate=0
     minrate=0
     bufsize=0
-    rc_buf_aggressivity=1.000000
     i_qfactor=-0.800000
     i_qoffset=0.000000
-    rc_init_cplx=0.000000
     dct=0
     lumi_mask=0.000000
     tcplx_mask=0.000000
@@ -64,7 +56,6 @@
     aspect=180/180
     sar=180/180
     debug=0x00000000
-    vismv=0x00000000
     cmp=0
     subcmp=0
     mbcmp=0
@@ -75,39 +66,28 @@
     precmp=0
     pre_dia_size=0
     subq=8
-    dtg_active_format=0
     me_range=0
-    ibias=999999
-    pbias=999999
     global_quality=0
     coder=0
     context=0
     slice_flags=0
-    xvmc_acceleration=0
     mbd=0
-    stream_codec_tag=0
     sc_threshold=0
-    lmin=0
-    lmax=0
     nr=0
     rc_init_occupancy=0
     flags2=0x00000000
-    error=0
     threads=1
-    me_threshold=0
-    mb_threshold=0
     dc=0
     nssew=8
     skip_top=0
     skip_bottom=0
-    profile=-99
+    profile=192
     level=-99
     lowres=0
     skip_threshold=0
     skip_factor=0
     skip_exp=0
     skipcmp=13
-    border_mask=0.000000
     mblmin=236
     mblmax=3658
     mepc=256
@@ -120,7 +100,6 @@
     refs=1
     chromaoffset=0
     trellis=0
-    sc_factor=6
     mv0_threshold=256
     b_sensitivity=40
     compression_level=-1
@@ -158,12 +137,12 @@
     video_size=400x225
     max_pixels=2147483647
     hwaccel_flags=0x00000001
+    extra_hw_frames=-1
 stream=0, decode=1
     b=0
     ab=0
     bt=4000000
     flags=0x00000000
-    me_method=5
     time_base=0/1
     g=12
     ar=0
@@ -178,7 +157,6 @@
     qdiff=3
     bf=0
     b_qfactor=1.250000
-    rc_strategy=0
     b_strategy=0
     ps=0
     mv_bits=0
@@ -198,18 +176,12 @@
     has_b_frames=0
     block_align=0
     mpeg_quant=0
-    qsquish=0.000000
-    rc_qmod_amp=0.000000
-    rc_qmod_freq=0
     rc_override_count=0
-    rc_eq=
     maxrate=0
     minrate=0
     bufsize=0
-    rc_buf_aggressivity=1.000000
     i_qfactor=-0.800000
     i_qoffset=0.000000
-    rc_init_cplx=0.000000
     dct=0
     lumi_mask=0.000000
     tcplx_mask=0.000000
@@ -224,7 +196,6 @@
     aspect=180/180
     sar=180/180
     debug=0x00000000
-    vismv=0x00000000
     cmp=0
     subcmp=0
     mbcmp=0
@@ -235,39 +206,28 @@
     precmp=0
     pre_dia_size=0
     subq=8
-    dtg_active_format=0
     me_range=0
-    ibias=999999
-    pbias=999999
     global_quality=0
     coder=0
     context=0
     slice_flags=0
-    xvmc_acceleration=0
     mbd=0
-    stream_codec_tag=0
     sc_threshold=0
-    lmin=0
-    lmax=0
     nr=0
     rc_init_occupancy=0
     flags2=0x00000000
-    error=0
     threads=1
-    me_threshold=0
-    mb_threshold=0
     dc=0
     nssew=8
     skip_top=0
     skip_bottom=0
-    profile=-99
+    profile=192
     level=-99
     lowres=0
     skip_threshold=0
     skip_factor=0
     skip_exp=0
     skipcmp=13
-    border_mask=0.000000
     mblmin=236
     mblmax=3658
     mepc=256
@@ -280,7 +240,6 @@
     refs=1
     chromaoffset=0
     trellis=0
-    sc_factor=6
     mv0_threshold=256
     b_sensitivity=40
     compression_level=-1
@@ -318,3 +277,4 @@
     video_size=400x225
     max_pixels=2147483647
     hwaccel_flags=0x00000001
+    extra_hw_frames=-1

diff --git a/tests/ref/fate/api-png-codec-param b/tests/ref/fate/api-png-codec-param
index de86f1b..f04ffa7 100644
--- a/tests/ref/fate/api-png-codec-param
+++ b/tests/ref/fate/api-png-codec-param

@@ -3,7 +3,6 @@
     ab=0
     bt=4000000
     flags=0x00000000
-    me_method=5
     time_base=0/1
     g=12
     ar=0
@@ -18,7 +17,6 @@
     qdiff=3
     bf=0
     b_qfactor=1.250000
-    rc_strategy=0
     b_strategy=0
     ps=0
     mv_bits=0
@@ -38,18 +36,12 @@
     has_b_frames=0
     block_align=0
     mpeg_quant=0
-    qsquish=0.000000
-    rc_qmod_amp=0.000000
-    rc_qmod_freq=0
     rc_override_count=0
-    rc_eq=
     maxrate=0
     minrate=0
     bufsize=0
-    rc_buf_aggressivity=1.000000
     i_qfactor=-0.800000
     i_qoffset=0.000000
-    rc_init_cplx=0.000000
     dct=0
     lumi_mask=0.000000
     tcplx_mask=0.000000
@@ -64,7 +56,6 @@
     aspect=2835/2835
     sar=2835/2835
     debug=0x00000000
-    vismv=0x00000000
     cmp=0
     subcmp=0
     mbcmp=0
@@ -75,27 +66,17 @@
     precmp=0
     pre_dia_size=0
     subq=8
-    dtg_active_format=0
     me_range=0
-    ibias=999999
-    pbias=999999
     global_quality=0
     coder=0
     context=0
     slice_flags=0
-    xvmc_acceleration=0
     mbd=0
-    stream_codec_tag=0
     sc_threshold=0
-    lmin=0
-    lmax=0
     nr=0
     rc_init_occupancy=0
     flags2=0x00000000
-    error=0
     threads=1
-    me_threshold=0
-    mb_threshold=0
     dc=0
     nssew=8
     skip_top=0
@@ -107,7 +88,6 @@
     skip_factor=0
     skip_exp=0
     skipcmp=13
-    border_mask=0.000000
     mblmin=236
     mblmax=3658
     mepc=256
@@ -120,7 +100,6 @@
     refs=1
     chromaoffset=0
     trellis=0
-    sc_factor=6
     mv0_threshold=256
     b_sensitivity=40
     compression_level=-1
@@ -158,12 +137,12 @@
     video_size=128x128
     max_pixels=2147483647
     hwaccel_flags=0x00000001
+    extra_hw_frames=-1
 stream=0, decode=1
     b=0
     ab=0
     bt=4000000
     flags=0x00000000
-    me_method=5
     time_base=0/1
     g=12
     ar=0
@@ -178,7 +157,6 @@
     qdiff=3
     bf=0
     b_qfactor=1.250000
-    rc_strategy=0
     b_strategy=0
     ps=0
     mv_bits=0
@@ -198,18 +176,12 @@
     has_b_frames=0
     block_align=0
     mpeg_quant=0
-    qsquish=0.000000
-    rc_qmod_amp=0.000000
-    rc_qmod_freq=0
     rc_override_count=0
-    rc_eq=
     maxrate=0
     minrate=0
     bufsize=0
-    rc_buf_aggressivity=1.000000
     i_qfactor=-0.800000
     i_qoffset=0.000000
-    rc_init_cplx=0.000000
     dct=0
     lumi_mask=0.000000
     tcplx_mask=0.000000
@@ -224,7 +196,6 @@
     aspect=2835/2835
     sar=2835/2835
     debug=0x00000000
-    vismv=0x00000000
     cmp=0
     subcmp=0
     mbcmp=0
@@ -235,27 +206,17 @@
     precmp=0
     pre_dia_size=0
     subq=8
-    dtg_active_format=0
     me_range=0
-    ibias=999999
-    pbias=999999
     global_quality=0
     coder=0
     context=0
     slice_flags=0
-    xvmc_acceleration=0
     mbd=0
-    stream_codec_tag=0
     sc_threshold=0
-    lmin=0
-    lmax=0
     nr=0
     rc_init_occupancy=0
     flags2=0x00000000
-    error=0
     threads=1
-    me_threshold=0
-    mb_threshold=0
     dc=0
     nssew=8
     skip_top=0
@@ -267,7 +228,6 @@
     skip_factor=0
     skip_exp=0
     skipcmp=13
-    border_mask=0.000000
     mblmin=236
     mblmax=3658
     mepc=256
@@ -280,7 +240,6 @@
     refs=1
     chromaoffset=0
     trellis=0
-    sc_factor=6
     mv0_threshold=256
     b_sensitivity=40
     compression_level=-1
@@ -318,3 +277,4 @@
     video_size=128x128
     max_pixels=2147483647
     hwaccel_flags=0x00000001
+    extra_hw_frames=-1

diff --git a/tests/ref/fate/binsub-movtextenc b/tests/ref/fate/binsub-movtextenc
index 22ee85a..dacee09 100644
--- a/tests/ref/fate/binsub-movtextenc
+++ b/tests/ref/fate/binsub-movtextenc

@@ -1 +1 @@
-af6a8f38d7c11d9af7823cc44554d2ad
+66b25412f7ca699ee525ba162246edb6

diff --git a/tests/ref/fate/cbs-h264-AUD_MW_E b/tests/ref/fate/cbs-h264-AUD_MW_E
new file mode 100644
index 0000000..f204792
--- /dev/null
+++ b/tests/ref/fate/cbs-h264-AUD_MW_E

@@ -0,0 +1 @@
+9b8884667eda0b9853bec631458686ce

diff --git a/tests/ref/fate/cbs-h264-BASQP1_Sony_C b/tests/ref/fate/cbs-h264-BASQP1_Sony_C
new file mode 100644
index 0000000..c2185c7
--- /dev/null
+++ b/tests/ref/fate/cbs-h264-BASQP1_Sony_C

@@ -0,0 +1 @@
+00c52ae60bf9a41ae1145fbf5fea9838

diff --git a/tests/ref/fate/cbs-h264-CABACI3_Sony_B b/tests/ref/fate/cbs-h264-CABACI3_Sony_B
new file mode 100644
index 0000000..59aeb72
--- /dev/null
+++ b/tests/ref/fate/cbs-h264-CABACI3_Sony_B

@@ -0,0 +1 @@
+2d94c80b858aec880530bad47afe3668

diff --git a/tests/ref/fate/cbs-h264-CVBS3_Sony_C b/tests/ref/fate/cbs-h264-CVBS3_Sony_C
new file mode 100644
index 0000000..55f5e0b
--- /dev/null
+++ b/tests/ref/fate/cbs-h264-CVBS3_Sony_C

@@ -0,0 +1 @@
+59ff1df9b25e80277cad4ad99e634df6

diff --git a/tests/ref/fate/cbs-h264-CVFC1_Sony_C b/tests/ref/fate/cbs-h264-CVFC1_Sony_C
new file mode 100644
index 0000000..98004cf
--- /dev/null
+++ b/tests/ref/fate/cbs-h264-CVFC1_Sony_C

@@ -0,0 +1 @@
+669f4f3d3ae35fa5a6f5c94e48776dcf

diff --git a/tests/ref/fate/cbs-h264-CVMANL1_TOSHIBA_B b/tests/ref/fate/cbs-h264-CVMANL1_TOSHIBA_B
new file mode 100644
index 0000000..14aa453
--- /dev/null
+++ b/tests/ref/fate/cbs-h264-CVMANL1_TOSHIBA_B

@@ -0,0 +1 @@
+0c1d9694df747cc4697caf866bd3051a

diff --git a/tests/ref/fate/cbs-h264-CVNLFI1_Sony_C b/tests/ref/fate/cbs-h264-CVNLFI1_Sony_C
new file mode 100644
index 0000000..d5f5ad1
--- /dev/null
+++ b/tests/ref/fate/cbs-h264-CVNLFI1_Sony_C

@@ -0,0 +1 @@
+7817d89bd749bc617a225978958a3af0

diff --git a/tests/ref/fate/cbs-h264-CVSE2_Sony_B b/tests/ref/fate/cbs-h264-CVSE2_Sony_B
new file mode 100644
index 0000000..7845723
--- /dev/null
+++ b/tests/ref/fate/cbs-h264-CVSE2_Sony_B

@@ -0,0 +1 @@
+ca8bdba497bd2f3b97c50d59692eb537

diff --git a/tests/ref/fate/cbs-h264-CVWP1_TOSHIBA_E b/tests/ref/fate/cbs-h264-CVWP1_TOSHIBA_E
new file mode 100644
index 0000000..4cb9c47
--- /dev/null
+++ b/tests/ref/fate/cbs-h264-CVWP1_TOSHIBA_E

@@ -0,0 +1 @@
+01290611165b8d8ccba8468f3dae4c4d

diff --git a/tests/ref/fate/cbs-h264-FM1_BT_B b/tests/ref/fate/cbs-h264-FM1_BT_B
new file mode 100644
index 0000000..862de95
--- /dev/null
+++ b/tests/ref/fate/cbs-h264-FM1_BT_B

@@ -0,0 +1 @@
+f7d5474ec576eea3f70d83a26a641a60

diff --git a/tests/ref/fate/cbs-h264-MR1_BT_A b/tests/ref/fate/cbs-h264-MR1_BT_A
new file mode 100644
index 0000000..0532652
--- /dev/null
+++ b/tests/ref/fate/cbs-h264-MR1_BT_A

@@ -0,0 +1 @@
+699d37e66764ddb3b4265c299ca77dcd

diff --git a/tests/ref/fate/cbs-h264-SVA_Base_B b/tests/ref/fate/cbs-h264-SVA_Base_B
new file mode 100644
index 0000000..a591b81
--- /dev/null
+++ b/tests/ref/fate/cbs-h264-SVA_Base_B

@@ -0,0 +1 @@
+443e55dd5f63dccf9a62acbb48451b08

diff --git a/tests/ref/fate/cbs-h264-Sharp_MP_PAFF_1r2 b/tests/ref/fate/cbs-h264-Sharp_MP_PAFF_1r2
new file mode 100644
index 0000000..cc7d639
--- /dev/null
+++ b/tests/ref/fate/cbs-h264-Sharp_MP_PAFF_1r2

@@ -0,0 +1 @@
+fd01840ed6b086c3118b7c53c86d01f5

diff --git a/tests/ref/fate/cbs-h264-sei-1 b/tests/ref/fate/cbs-h264-sei-1
new file mode 100644
index 0000000..2d1cb1f
--- /dev/null
+++ b/tests/ref/fate/cbs-h264-sei-1

@@ -0,0 +1 @@
+5f537551b7dfab76a172f1aebb028986

diff --git a/tests/ref/fate/cbs-h264-sp1_bt_a b/tests/ref/fate/cbs-h264-sp1_bt_a
new file mode 100644
index 0000000..388c53a
--- /dev/null
+++ b/tests/ref/fate/cbs-h264-sp1_bt_a

@@ -0,0 +1 @@
+8405c5583d31d7015ed401b34b4ec93c

diff --git a/tests/ref/fate/cbs-hevc-CAINIT_E_SHARP_3 b/tests/ref/fate/cbs-hevc-CAINIT_E_SHARP_3
new file mode 100644
index 0000000..53af86a
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-CAINIT_E_SHARP_3

@@ -0,0 +1 @@
+44d1c0b80828af779d942cc20dde4ea4

diff --git a/tests/ref/fate/cbs-hevc-CAINIT_H_SHARP_3 b/tests/ref/fate/cbs-hevc-CAINIT_H_SHARP_3
new file mode 100644
index 0000000..5e127d7
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-CAINIT_H_SHARP_3

@@ -0,0 +1 @@
+ae9311dfcaf65bb8de9c4fcf23ce0871

diff --git a/tests/ref/fate/cbs-hevc-CONFWIN_A_Sony_1 b/tests/ref/fate/cbs-hevc-CONFWIN_A_Sony_1
new file mode 100644
index 0000000..0044515
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-CONFWIN_A_Sony_1

@@ -0,0 +1 @@
+dce8104b2addbdd601eb280a88e18583

diff --git a/tests/ref/fate/cbs-hevc-HRD_A_Fujitsu_2 b/tests/ref/fate/cbs-hevc-HRD_A_Fujitsu_2
new file mode 100644
index 0000000..b9c90ae
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-HRD_A_Fujitsu_2

@@ -0,0 +1 @@
+716a90051f028c90daeb86b3825af36d

diff --git a/tests/ref/fate/cbs-hevc-LTRPSPS_A_Qualcomm_1 b/tests/ref/fate/cbs-hevc-LTRPSPS_A_Qualcomm_1
new file mode 100644
index 0000000..21dfa57
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-LTRPSPS_A_Qualcomm_1

@@ -0,0 +1 @@
+11b599202a4d25693123bea8bb003e54

diff --git a/tests/ref/fate/cbs-hevc-NUT_A_ericsson_5 b/tests/ref/fate/cbs-hevc-NUT_A_ericsson_5
new file mode 100644
index 0000000..32e58af
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-NUT_A_ericsson_5

@@ -0,0 +1 @@
+25cf94dfa2e0334eeedbfa9a8ed1c4b2

diff --git a/tests/ref/fate/cbs-hevc-PICSIZE_A_Bossen_1 b/tests/ref/fate/cbs-hevc-PICSIZE_A_Bossen_1
new file mode 100644
index 0000000..cc18004
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-PICSIZE_A_Bossen_1

@@ -0,0 +1 @@
+e87fbd90c297d401738db928e3e04dd4

diff --git a/tests/ref/fate/cbs-hevc-PICSIZE_B_Bossen_1 b/tests/ref/fate/cbs-hevc-PICSIZE_B_Bossen_1
new file mode 100644
index 0000000..5495cec
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-PICSIZE_B_Bossen_1

@@ -0,0 +1 @@
+4993d49d6f2f532dfc683a9d26c1e313

diff --git a/tests/ref/fate/cbs-hevc-RPLM_A_qualcomm_4 b/tests/ref/fate/cbs-hevc-RPLM_A_qualcomm_4
new file mode 100644
index 0000000..c68d615
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-RPLM_A_qualcomm_4

@@ -0,0 +1 @@
+f5d2633eefcd95e189faf4302d270457

diff --git a/tests/ref/fate/cbs-hevc-RPS_A_docomo_4 b/tests/ref/fate/cbs-hevc-RPS_A_docomo_4
new file mode 100644
index 0000000..772d16a
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-RPS_A_docomo_4

@@ -0,0 +1 @@
+97bd4fefd8cd95584f586027e244f283

diff --git a/tests/ref/fate/cbs-hevc-RPS_E_qualcomm_5 b/tests/ref/fate/cbs-hevc-RPS_E_qualcomm_5
new file mode 100644
index 0000000..faef7da
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-RPS_E_qualcomm_5

@@ -0,0 +1 @@
+b2528ef681729176ccb38a77be93a0de

diff --git a/tests/ref/fate/cbs-hevc-SLIST_A_Sony_4 b/tests/ref/fate/cbs-hevc-SLIST_A_Sony_4
new file mode 100644
index 0000000..23b2e32
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-SLIST_A_Sony_4

@@ -0,0 +1 @@
+72cf53bbc967c9679e21a6d3203edb07

diff --git a/tests/ref/fate/cbs-hevc-SLIST_D_Sony_9 b/tests/ref/fate/cbs-hevc-SLIST_D_Sony_9
new file mode 100644
index 0000000..4cea86d
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-SLIST_D_Sony_9

@@ -0,0 +1 @@
+59a22d715e30748492da5e0b9d421909

diff --git a/tests/ref/fate/cbs-hevc-STRUCT_A_Samsung_5 b/tests/ref/fate/cbs-hevc-STRUCT_A_Samsung_5
new file mode 100644
index 0000000..6a4508b
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-STRUCT_A_Samsung_5

@@ -0,0 +1 @@
+647eb851b935fd3bc6a98ce5ce45dbc7

diff --git a/tests/ref/fate/cbs-hevc-TILES_B_Cisco_1 b/tests/ref/fate/cbs-hevc-TILES_B_Cisco_1
new file mode 100644
index 0000000..d767e26
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-TILES_B_Cisco_1

@@ -0,0 +1 @@
+85a114def19cefbd0fb0daf8370d711c

diff --git a/tests/ref/fate/cbs-hevc-WPP_A_ericsson_MAIN_2 b/tests/ref/fate/cbs-hevc-WPP_A_ericsson_MAIN_2
new file mode 100644
index 0000000..fe6928f
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-WPP_A_ericsson_MAIN_2

@@ -0,0 +1 @@
+de7d440b556eea827953e6d12aeb4023

diff --git a/tests/ref/fate/cbs-hevc-WPP_F_ericsson_MAIN_2 b/tests/ref/fate/cbs-hevc-WPP_F_ericsson_MAIN_2
new file mode 100644
index 0000000..08aee56
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-WPP_F_ericsson_MAIN_2

@@ -0,0 +1 @@
+f197136f1fb3242c3422a48470dd7d35

diff --git a/tests/ref/fate/cbs-hevc-WP_A_Toshiba_3 b/tests/ref/fate/cbs-hevc-WP_A_Toshiba_3
new file mode 100644
index 0000000..b868c43
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-WP_A_Toshiba_3

@@ -0,0 +1 @@
+158312a1a35ef4b20cb4aeee48549c03

diff --git a/tests/ref/fate/cbs-hevc-ipcm_E_NEC_2 b/tests/ref/fate/cbs-hevc-ipcm_E_NEC_2
new file mode 100644
index 0000000..cc68b6f
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-ipcm_E_NEC_2

@@ -0,0 +1 @@
+2e1f9c95364cfac2aa6e6ee3a52c43c4

diff --git a/tests/ref/fate/cbs-mpeg2-hhi_burst_422_short b/tests/ref/fate/cbs-mpeg2-hhi_burst_422_short
new file mode 100644
index 0000000..c319fba
--- /dev/null
+++ b/tests/ref/fate/cbs-mpeg2-hhi_burst_422_short

@@ -0,0 +1 @@
+e0c2fdd9baeba0c5ba5839a8cd7a72d3

diff --git a/tests/ref/fate/cbs-mpeg2-sony-ct3 b/tests/ref/fate/cbs-mpeg2-sony-ct3
new file mode 100644
index 0000000..b5b4b12
--- /dev/null
+++ b/tests/ref/fate/cbs-mpeg2-sony-ct3

@@ -0,0 +1 @@
+b1e15a09cfffbad801810af0928736ab

diff --git a/tests/ref/fate/cbs-mpeg2-tcela-6 b/tests/ref/fate/cbs-mpeg2-tcela-6
new file mode 100644
index 0000000..530369d
--- /dev/null
+++ b/tests/ref/fate/cbs-mpeg2-tcela-6

@@ -0,0 +1 @@
+771b6756a63793e05b74e645794908a2

diff --git a/tests/ref/fate/cbs-vp9-vp90-2-03-deltaq b/tests/ref/fate/cbs-vp9-vp90-2-03-deltaq
new file mode 100644
index 0000000..db09cfd
--- /dev/null
+++ b/tests/ref/fate/cbs-vp9-vp90-2-03-deltaq

@@ -0,0 +1 @@
+bb630ef560f83951fa6547a664fdb636

diff --git a/tests/ref/fate/cbs-vp9-vp90-2-05-resize b/tests/ref/fate/cbs-vp9-vp90-2-05-resize
new file mode 100644
index 0000000..8f036bb
--- /dev/null
+++ b/tests/ref/fate/cbs-vp9-vp90-2-05-resize

@@ -0,0 +1 @@
+6838422ebb45df353a2bad62b9aff8e9

diff --git a/tests/ref/fate/cbs-vp9-vp90-2-06-bilinear b/tests/ref/fate/cbs-vp9-vp90-2-06-bilinear
new file mode 100644
index 0000000..f579459
--- /dev/null
+++ b/tests/ref/fate/cbs-vp9-vp90-2-06-bilinear

@@ -0,0 +1 @@
+2ca9d012c7212e38f5e2727ac66ec6c5

diff --git a/tests/ref/fate/cbs-vp9-vp90-2-09-lf_deltas b/tests/ref/fate/cbs-vp9-vp90-2-09-lf_deltas
new file mode 100644
index 0000000..e0b5686
--- /dev/null
+++ b/tests/ref/fate/cbs-vp9-vp90-2-09-lf_deltas

@@ -0,0 +1 @@
+78f5e46bfaecbcd62b9126697a0d97b7

diff --git a/tests/ref/fate/cbs-vp9-vp90-2-10-show-existing-frame b/tests/ref/fate/cbs-vp9-vp90-2-10-show-existing-frame
new file mode 100644
index 0000000..4a4d752
--- /dev/null
+++ b/tests/ref/fate/cbs-vp9-vp90-2-10-show-existing-frame

@@ -0,0 +1 @@
+eea9d10a696c6ed971e4fae9fb619b10

diff --git a/tests/ref/fate/cbs-vp9-vp90-2-10-show-existing-frame2 b/tests/ref/fate/cbs-vp9-vp90-2-10-show-existing-frame2
new file mode 100644
index 0000000..6da8999
--- /dev/null
+++ b/tests/ref/fate/cbs-vp9-vp90-2-10-show-existing-frame2

@@ -0,0 +1 @@
+abf4c7d4be7d3576d96b6f92166b5894

diff --git a/tests/ref/fate/cbs-vp9-vp90-2-segmentation-aq-akiyo b/tests/ref/fate/cbs-vp9-vp90-2-segmentation-aq-akiyo
new file mode 100644
index 0000000..12dfb10
--- /dev/null
+++ b/tests/ref/fate/cbs-vp9-vp90-2-segmentation-aq-akiyo

@@ -0,0 +1 @@
+86cd3750cc9a0672717643c9b9f87fd5

diff --git a/tests/ref/fate/cbs-vp9-vp90-2-segmentation-sf-akiyo b/tests/ref/fate/cbs-vp9-vp90-2-segmentation-sf-akiyo
new file mode 100644
index 0000000..c2b1b87
--- /dev/null
+++ b/tests/ref/fate/cbs-vp9-vp90-2-segmentation-sf-akiyo

@@ -0,0 +1 @@
+5d12fbe6220aae9e62b1d79785a83387

diff --git a/tests/ref/fate/cbs-vp9-vp90-2-tiling-pedestrian b/tests/ref/fate/cbs-vp9-vp90-2-tiling-pedestrian
new file mode 100644
index 0000000..f9cab39
--- /dev/null
+++ b/tests/ref/fate/cbs-vp9-vp90-2-tiling-pedestrian

@@ -0,0 +1 @@
+4c51f3c796baa7c2baa4b7ec0d011406

diff --git a/tests/ref/fate/cbs-vp9-vp91-2-04-yuv440 b/tests/ref/fate/cbs-vp9-vp91-2-04-yuv440
new file mode 100644
index 0000000..6289930
--- /dev/null
+++ b/tests/ref/fate/cbs-vp9-vp91-2-04-yuv440

@@ -0,0 +1 @@
+293bdc92851ca1105e27f04737d8c5f3

diff --git a/tests/ref/fate/cbs-vp9-vp91-2-04-yuv444 b/tests/ref/fate/cbs-vp9-vp91-2-04-yuv444
new file mode 100644
index 0000000..628ea9a
--- /dev/null
+++ b/tests/ref/fate/cbs-vp9-vp91-2-04-yuv444

@@ -0,0 +1 @@
+911eafd8e442e646c5ce97d781757ca8

diff --git a/tests/ref/fate/cbs-vp9-vp92-2-20-10bit-yuv420 b/tests/ref/fate/cbs-vp9-vp92-2-20-10bit-yuv420
new file mode 100644
index 0000000..eeb7580
--- /dev/null
+++ b/tests/ref/fate/cbs-vp9-vp92-2-20-10bit-yuv420

@@ -0,0 +1 @@
+16198c32c29228e0513004ed1bf6fcee

diff --git a/tests/ref/fate/cbs-vp9-vp93-2-20-10bit-yuv422 b/tests/ref/fate/cbs-vp9-vp93-2-20-10bit-yuv422
new file mode 100644
index 0000000..b25bc11
--- /dev/null
+++ b/tests/ref/fate/cbs-vp9-vp93-2-20-10bit-yuv422

@@ -0,0 +1 @@
+4bceedef4aa6a663a09761971e43b5a8

diff --git a/tests/ref/fate/cbs-vp9-vp93-2-20-12bit-yuv444 b/tests/ref/fate/cbs-vp9-vp93-2-20-12bit-yuv444
new file mode 100644
index 0000000..8d122d1
--- /dev/null
+++ b/tests/ref/fate/cbs-vp9-vp93-2-20-12bit-yuv444

@@ -0,0 +1 @@
+0f413b840633bfcfcc78b4c9fab933bf

diff --git a/tests/ref/fate/cfhd-1 b/tests/ref/fate/cfhd-1
new file mode 100644
index 0000000..0280d66
--- /dev/null
+++ b/tests/ref/fate/cfhd-1

@@ -0,0 +1,15 @@
+#tb 0: 1001/30000
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 720x480
+#sar 0: 0/1
+0,          0,          0,        1,  1382400, 0xa3e49817
+0,          1,          1,        1,  1382400, 0x544fdfac
+0,          2,          2,        1,  1382400, 0x84964e11
+0,          3,          3,        1,  1382400, 0xc608c8d1
+0,          4,          4,        1,  1382400, 0xf2f1404f
+0,          5,          5,        1,  1382400, 0x5a3100ba
+0,          6,          6,        1,  1382400, 0x3727baa9
+0,          7,          7,        1,  1382400, 0x894f07db
+0,          8,          8,        1,  1382400, 0x3ef27d46
+0,          9,          9,        1,  1382400, 0x1f90880d

diff --git a/tests/ref/fate/cfhd-2 b/tests/ref/fate/cfhd-2
new file mode 100644
index 0000000..77db54c
--- /dev/null
+++ b/tests/ref/fate/cfhd-2

@@ -0,0 +1,15 @@
+#tb 0: 1001/30000
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 720x480
+#sar 0: 0/1
+0,          0,          0,        1,  2073600, 0x53fab433
+0,          1,          1,        1,  2073600, 0x0d2b3f64
+0,          2,          2,        1,  2073600, 0x857d1d48
+0,          3,          3,        1,  2073600, 0xe1a7df32
+0,          4,          4,        1,  2073600, 0x615861c1
+0,          5,          5,        1,  2073600, 0xf6cdb0a9
+0,          6,          6,        1,  2073600, 0x55dd9f16
+0,          7,          7,        1,  2073600, 0x7c126a32
+0,          8,          8,        1,  2073600, 0x53fdd4c5
+0,          9,          9,        1,  2073600, 0x6062a4b3

diff --git a/tests/ref/fate/cfhd-3 b/tests/ref/fate/cfhd-3
new file mode 100644
index 0000000..59fdc92
--- /dev/null
+++ b/tests/ref/fate/cfhd-3

@@ -0,0 +1,15 @@
+#tb 0: 1000/14587
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 496x241
+#sar 0: 0/1
+0,          0,          0,        1,   478144, 0x48a01dbb
+0,          1,          1,        1,   478144, 0x48a01dbb
+0,          2,          2,        1,   478144, 0x48a01dbb
+0,          3,          3,        1,   478144, 0xb978a72f
+0,          4,          4,        1,   478144, 0x7bbb4679
+0,          5,          5,        1,   478144, 0xc3fd3f59
+0,          6,          6,        1,   478144, 0xfd2a4816
+0,          7,          7,        1,   478144, 0x207f65d3
+0,          8,          8,        1,   478144, 0x207f65d3
+0,          9,          9,        1,   478144, 0x207f65d3

diff --git a/tests/ref/fate/concat-demuxer-extended-lavf-mxf b/tests/ref/fate/concat-demuxer-extended-lavf-mxf
index c47f14f..2fb5fce 100644
--- a/tests/ref/fate/concat-demuxer-extended-lavf-mxf
+++ b/tests/ref/fate/concat-demuxer-extended-lavf-mxf

@@ -1 +1 @@
-7e53f4c5cb0c9afda2771c9f0c697d9c *tests/data/fate/concat-demuxer-extended-lavf-mxf.ffprobe
+a6fb9c37dc71cb43eb9664a8ae9f1c66 *tests/data/fate/concat-demuxer-extended-lavf-mxf.ffprobe

diff --git a/tests/ref/fate/concat-demuxer-extended-lavf-mxf_d10 b/tests/ref/fate/concat-demuxer-extended-lavf-mxf_d10
index 75d386e..60d729b 100644
--- a/tests/ref/fate/concat-demuxer-extended-lavf-mxf_d10
+++ b/tests/ref/fate/concat-demuxer-extended-lavf-mxf_d10

@@ -1 +1 @@
-44810fc2eeee0072d9d7011b0d2afe59 *tests/data/fate/concat-demuxer-extended-lavf-mxf_d10.ffprobe
+cb7c8eac6f8917e39658e1fa4a250da8 *tests/data/fate/concat-demuxer-extended-lavf-mxf_d10.ffprobe

diff --git a/tests/ref/fate/concat-demuxer-simple1-lavf-mxf b/tests/ref/fate/concat-demuxer-simple1-lavf-mxf
index 1174a1e..d18e35b 100644
--- a/tests/ref/fate/concat-demuxer-simple1-lavf-mxf
+++ b/tests/ref/fate/concat-demuxer-simple1-lavf-mxf

@@ -1,124 +1,124 @@
-video|0|0|0.000000|-1|-0.040000|1|0.040000|N/A|N/A|24801|6144|K_
-audio|1|0|0.000000|0|0.000000|1920|0.040000|N/A|N/A|3840|31232|K_
-video|0|3|0.120000|0|0.000000|1|0.040000|N/A|N/A|16743|35840|__
-audio|1|1920|0.040000|1920|0.040000|1920|0.040000|N/A|N/A|3840|52736|K_
-video|0|1|0.040000|1|0.040000|1|0.040000|N/A|N/A|13812|57344|__
-audio|1|3840|0.080000|3840|0.080000|1920|0.040000|N/A|N/A|3840|71680|K_
-video|0|2|0.080000|2|0.080000|1|0.040000|N/A|N/A|13607|76288|__
-audio|1|5760|0.120000|5760|0.120000|1920|0.040000|N/A|N/A|3840|90112|K_
-video|0|6|0.240000|3|0.120000|1|0.040000|N/A|N/A|16158|94720|__
-audio|1|7680|0.160000|7680|0.160000|1920|0.040000|N/A|N/A|3840|111104|K_
-video|0|4|0.160000|4|0.160000|1|0.040000|N/A|N/A|13943|115712|__
-audio|1|9600|0.200000|9600|0.200000|1920|0.040000|N/A|N/A|3840|130048|K_
-video|0|5|0.200000|5|0.200000|1|0.040000|N/A|N/A|11223|134656|__
-audio|1|11520|0.240000|11520|0.240000|1920|0.040000|N/A|N/A|3840|145920|K_
-video|0|9|0.360000|6|0.240000|1|0.040000|N/A|N/A|20298|150528|__
-audio|1|13440|0.280000|13440|0.280000|1920|0.040000|N/A|N/A|3840|171008|K_
-video|0|7|0.280000|7|0.280000|1|0.040000|N/A|N/A|13341|175616|__
-audio|1|15360|0.320000|15360|0.320000|1920|0.040000|N/A|N/A|3840|189440|K_
-video|0|8|0.320000|8|0.320000|1|0.040000|N/A|N/A|12362|194048|__
-audio|1|17280|0.360000|17280|0.360000|1920|0.040000|N/A|N/A|3840|206848|K_
-video|0|12|0.480000|9|0.360000|1|0.040000|N/A|N/A|24786|211456|K_
-audio|1|19200|0.400000|19200|0.400000|1920|0.040000|N/A|N/A|3840|236544|K_
-video|0|10|0.400000|10|0.400000|1|0.040000|N/A|N/A|13377|241152|__
-audio|1|21120|0.440000|21120|0.440000|1920|0.040000|N/A|N/A|3840|254976|K_
-video|0|11|0.440000|11|0.440000|1|0.040000|N/A|N/A|15624|259584|__
-audio|1|23040|0.480000|23040|0.480000|1920|0.040000|N/A|N/A|3840|275456|K_
-video|0|15|0.600000|12|0.480000|1|0.040000|N/A|N/A|22597|280064|__
-audio|1|24960|0.520000|24960|0.520000|1920|0.040000|N/A|N/A|3840|303104|K_
-video|0|13|0.520000|13|0.520000|1|0.040000|N/A|N/A|15028|307712|__
-audio|1|26880|0.560000|26880|0.560000|1920|0.040000|N/A|N/A|3840|323072|K_
-video|0|14|0.560000|14|0.560000|1|0.040000|N/A|N/A|14014|327680|__
-audio|1|28800|0.600000|28800|0.600000|1920|0.040000|N/A|N/A|3840|342016|K_
-video|0|18|0.720000|15|0.600000|1|0.040000|N/A|N/A|20731|346624|__
-audio|1|30720|0.640000|30720|0.640000|1920|0.040000|N/A|N/A|3840|367616|K_
-video|0|16|0.640000|16|0.640000|1|0.040000|N/A|N/A|11946|372224|__
-audio|1|32640|0.680000|32640|0.680000|1920|0.040000|N/A|N/A|3840|384512|K_
-video|0|17|0.680000|17|0.680000|1|0.040000|N/A|N/A|14464|389120|__
-audio|1|34560|0.720000|34560|0.720000|1920|0.040000|N/A|N/A|3840|403968|K_
-video|0|21|0.840000|18|0.720000|1|0.040000|N/A|N/A|16189|408576|__
-audio|1|36480|0.760000|36480|0.760000|1920|0.040000|N/A|N/A|3840|424960|K_
-video|0|19|0.760000|19|0.760000|1|0.040000|N/A|N/A|10524|429568|__
-audio|1|38400|0.800000|38400|0.800000|1920|0.040000|N/A|N/A|3840|440320|K_
-video|0|20|0.800000|20|0.800000|1|0.040000|N/A|N/A|10599|444928|__
-audio|1|40320|0.840000|40320|0.840000|1920|0.040000|N/A|N/A|3840|455680|K_
-video|0|24|0.960000|21|0.840000|1|0.040000|N/A|N/A|24711|460288|K_
-audio|1|42240|0.880000|42240|0.880000|1920|0.040000|N/A|N/A|3840|485376|K_
-video|0|22|0.880000|22|0.880000|1|0.040000|N/A|N/A|10840|489984|__
-audio|1|44160|0.920000|44160|0.920000|1920|0.040000|N/A|N/A|3840|501248|K_
-video|0|23|0.920000|23|0.920000|1|0.040000|N/A|N/A|13350|505856|__
-audio|1|46080|0.960000|46080|0.960000|1920|0.040000|N/A|N/A|3840|519680|K_
-video|0|17|0.680000|14|0.560000|1|0.040000|N/A|N/A|24786|211456|K_
-audio|1|28800|0.600000|28800|0.600000|1920|0.040000|N/A|N/A|3840|236544|K_
-video|0|15|0.600000|15|0.600000|1|0.040000|N/A|N/A|13377|241152|__
-audio|1|30720|0.640000|30720|0.640000|1920|0.040000|N/A|N/A|3840|254976|K_
-video|0|16|0.640000|16|0.640000|1|0.040000|N/A|N/A|15624|259584|__
-audio|1|32640|0.680000|32640|0.680000|1920|0.040000|N/A|N/A|3840|275456|K_
-video|0|20|0.800000|17|0.680000|1|0.040000|N/A|N/A|22597|280064|__
-audio|1|34560|0.720000|34560|0.720000|1920|0.040000|N/A|N/A|3840|303104|K_
-video|0|18|0.720000|18|0.720000|1|0.040000|N/A|N/A|15028|307712|__
-audio|1|36480|0.760000|36480|0.760000|1920|0.040000|N/A|N/A|3840|323072|K_
-video|0|19|0.760000|19|0.760000|1|0.040000|N/A|N/A|14014|327680|__
-audio|1|38400|0.800000|38400|0.800000|1920|0.040000|N/A|N/A|3840|342016|K_
-video|0|23|0.920000|20|0.800000|1|0.040000|N/A|N/A|20731|346624|__
-audio|1|40320|0.840000|40320|0.840000|1920|0.040000|N/A|N/A|3840|367616|K_
-video|0|21|0.840000|21|0.840000|1|0.040000|N/A|N/A|11946|372224|__
-audio|1|42240|0.880000|42240|0.880000|1920|0.040000|N/A|N/A|3840|384512|K_
-video|0|22|0.880000|22|0.880000|1|0.040000|N/A|N/A|14464|389120|__
-audio|1|44160|0.920000|44160|0.920000|1920|0.040000|N/A|N/A|3840|403968|K_
-video|0|26|1.040000|23|0.920000|1|0.040000|N/A|N/A|16189|408576|__
-audio|1|46080|0.960000|46080|0.960000|1920|0.040000|N/A|N/A|3840|424960|K_
-video|0|24|0.960000|24|0.960000|1|0.040000|N/A|N/A|10524|429568|__
-audio|1|48000|1.000000|48000|1.000000|1920|0.040000|N/A|N/A|3840|440320|K_
-video|0|25|1.000000|25|1.000000|1|0.040000|N/A|N/A|10599|444928|__
-audio|1|49920|1.040000|49920|1.040000|1920|0.040000|N/A|N/A|3840|455680|K_
-video|0|29|1.160000|26|1.040000|1|0.040000|N/A|N/A|24711|460288|K_
-audio|1|51840|1.080000|51840|1.080000|1920|0.040000|N/A|N/A|3840|485376|K_
-video|0|27|1.080000|27|1.080000|1|0.040000|N/A|N/A|10840|489984|__
-audio|1|53760|1.120000|53760|1.120000|1920|0.040000|N/A|N/A|3840|501248|K_
-video|0|28|1.120000|28|1.120000|1|0.040000|N/A|N/A|13350|505856|__
-audio|1|55680|1.160000|55680|1.160000|1920|0.040000|N/A|N/A|3840|519680|K_
-video|0|25|1.000000|24|0.960000|1|0.040000|N/A|N/A|24801|6144|K_|1
+video|0|0|0.000000|-1|-0.040000|1|0.040000|N/A|N/A|24801|7168|K_
+audio|1|0|0.000000|0|0.000000|1920|0.040000|N/A|N/A|3840|32256|K_
+video|0|3|0.120000|0|0.000000|1|0.040000|N/A|N/A|16743|36864|__
+audio|1|1920|0.040000|1920|0.040000|1920|0.040000|N/A|N/A|3840|53760|K_
+video|0|1|0.040000|1|0.040000|1|0.040000|N/A|N/A|13812|58368|__
+audio|1|3840|0.080000|3840|0.080000|1920|0.040000|N/A|N/A|3840|72704|K_
+video|0|2|0.080000|2|0.080000|1|0.040000|N/A|N/A|13607|77312|__
+audio|1|5760|0.120000|5760|0.120000|1920|0.040000|N/A|N/A|3840|91136|K_
+video|0|6|0.240000|3|0.120000|1|0.040000|N/A|N/A|16158|95744|__
+audio|1|7680|0.160000|7680|0.160000|1920|0.040000|N/A|N/A|3840|112128|K_
+video|0|4|0.160000|4|0.160000|1|0.040000|N/A|N/A|13943|116736|__
+audio|1|9600|0.200000|9600|0.200000|1920|0.040000|N/A|N/A|3840|131072|K_
+video|0|5|0.200000|5|0.200000|1|0.040000|N/A|N/A|11223|135680|__
+audio|1|11520|0.240000|11520|0.240000|1920|0.040000|N/A|N/A|3840|146944|K_
+video|0|9|0.360000|6|0.240000|1|0.040000|N/A|N/A|20298|151552|__
+audio|1|13440|0.280000|13440|0.280000|1920|0.040000|N/A|N/A|3840|172032|K_
+video|0|7|0.280000|7|0.280000|1|0.040000|N/A|N/A|13341|176640|__
+audio|1|15360|0.320000|15360|0.320000|1920|0.040000|N/A|N/A|3840|190464|K_
+video|0|8|0.320000|8|0.320000|1|0.040000|N/A|N/A|12362|195072|__
+audio|1|17280|0.360000|17280|0.360000|1920|0.040000|N/A|N/A|3840|207872|K_
+video|0|12|0.480000|9|0.360000|1|0.040000|N/A|N/A|24786|212480|K_
+audio|1|19200|0.400000|19200|0.400000|1920|0.040000|N/A|N/A|3840|237568|K_
+video|0|10|0.400000|10|0.400000|1|0.040000|N/A|N/A|13377|242176|__
+audio|1|21120|0.440000|21120|0.440000|1920|0.040000|N/A|N/A|3840|256000|K_
+video|0|11|0.440000|11|0.440000|1|0.040000|N/A|N/A|15624|260608|__
+audio|1|23040|0.480000|23040|0.480000|1920|0.040000|N/A|N/A|3840|276480|K_
+video|0|15|0.600000|12|0.480000|1|0.040000|N/A|N/A|22597|281088|__
+audio|1|24960|0.520000|24960|0.520000|1920|0.040000|N/A|N/A|3840|304128|K_
+video|0|13|0.520000|13|0.520000|1|0.040000|N/A|N/A|15028|308736|__
+audio|1|26880|0.560000|26880|0.560000|1920|0.040000|N/A|N/A|3840|324096|K_
+video|0|14|0.560000|14|0.560000|1|0.040000|N/A|N/A|14014|328704|__
+audio|1|28800|0.600000|28800|0.600000|1920|0.040000|N/A|N/A|3840|343040|K_
+video|0|18|0.720000|15|0.600000|1|0.040000|N/A|N/A|20731|347648|__
+audio|1|30720|0.640000|30720|0.640000|1920|0.040000|N/A|N/A|3840|368640|K_
+video|0|16|0.640000|16|0.640000|1|0.040000|N/A|N/A|11946|373248|__
+audio|1|32640|0.680000|32640|0.680000|1920|0.040000|N/A|N/A|3840|385536|K_
+video|0|17|0.680000|17|0.680000|1|0.040000|N/A|N/A|14464|390144|__
+audio|1|34560|0.720000|34560|0.720000|1920|0.040000|N/A|N/A|3840|404992|K_
+video|0|21|0.840000|18|0.720000|1|0.040000|N/A|N/A|16189|409600|__
+audio|1|36480|0.760000|36480|0.760000|1920|0.040000|N/A|N/A|3840|425984|K_
+video|0|19|0.760000|19|0.760000|1|0.040000|N/A|N/A|10524|430592|__
+audio|1|38400|0.800000|38400|0.800000|1920|0.040000|N/A|N/A|3840|441344|K_
+video|0|20|0.800000|20|0.800000|1|0.040000|N/A|N/A|10599|445952|__
+audio|1|40320|0.840000|40320|0.840000|1920|0.040000|N/A|N/A|3840|456704|K_
+video|0|24|0.960000|21|0.840000|1|0.040000|N/A|N/A|24711|461312|K_
+audio|1|42240|0.880000|42240|0.880000|1920|0.040000|N/A|N/A|3840|486400|K_
+video|0|22|0.880000|22|0.880000|1|0.040000|N/A|N/A|10840|491008|__
+audio|1|44160|0.920000|44160|0.920000|1920|0.040000|N/A|N/A|3840|502272|K_
+video|0|23|0.920000|23|0.920000|1|0.040000|N/A|N/A|13350|506880|__
+audio|1|46080|0.960000|46080|0.960000|1920|0.040000|N/A|N/A|3840|520704|K_
+video|0|17|0.680000|14|0.560000|1|0.040000|N/A|N/A|24786|212480|K_
+audio|1|28800|0.600000|28800|0.600000|1920|0.040000|N/A|N/A|3840|237568|K_
+video|0|15|0.600000|15|0.600000|1|0.040000|N/A|N/A|13377|242176|__
+audio|1|30720|0.640000|30720|0.640000|1920|0.040000|N/A|N/A|3840|256000|K_
+video|0|16|0.640000|16|0.640000|1|0.040000|N/A|N/A|15624|260608|__
+audio|1|32640|0.680000|32640|0.680000|1920|0.040000|N/A|N/A|3840|276480|K_
+video|0|20|0.800000|17|0.680000|1|0.040000|N/A|N/A|22597|281088|__
+audio|1|34560|0.720000|34560|0.720000|1920|0.040000|N/A|N/A|3840|304128|K_
+video|0|18|0.720000|18|0.720000|1|0.040000|N/A|N/A|15028|308736|__
+audio|1|36480|0.760000|36480|0.760000|1920|0.040000|N/A|N/A|3840|324096|K_
+video|0|19|0.760000|19|0.760000|1|0.040000|N/A|N/A|14014|328704|__
+audio|1|38400|0.800000|38400|0.800000|1920|0.040000|N/A|N/A|3840|343040|K_
+video|0|23|0.920000|20|0.800000|1|0.040000|N/A|N/A|20731|347648|__
+audio|1|40320|0.840000|40320|0.840000|1920|0.040000|N/A|N/A|3840|368640|K_
+video|0|21|0.840000|21|0.840000|1|0.040000|N/A|N/A|11946|373248|__
+audio|1|42240|0.880000|42240|0.880000|1920|0.040000|N/A|N/A|3840|385536|K_
+video|0|22|0.880000|22|0.880000|1|0.040000|N/A|N/A|14464|390144|__
+audio|1|44160|0.920000|44160|0.920000|1920|0.040000|N/A|N/A|3840|404992|K_
+video|0|26|1.040000|23|0.920000|1|0.040000|N/A|N/A|16189|409600|__
+audio|1|46080|0.960000|46080|0.960000|1920|0.040000|N/A|N/A|3840|425984|K_
+video|0|24|0.960000|24|0.960000|1|0.040000|N/A|N/A|10524|430592|__
+audio|1|48000|1.000000|48000|1.000000|1920|0.040000|N/A|N/A|3840|441344|K_
+video|0|25|1.000000|25|1.000000|1|0.040000|N/A|N/A|10599|445952|__
+audio|1|49920|1.040000|49920|1.040000|1920|0.040000|N/A|N/A|3840|456704|K_
+video|0|29|1.160000|26|1.040000|1|0.040000|N/A|N/A|24711|461312|K_
+audio|1|51840|1.080000|51840|1.080000|1920|0.040000|N/A|N/A|3840|486400|K_
+video|0|27|1.080000|27|1.080000|1|0.040000|N/A|N/A|10840|491008|__
+audio|1|53760|1.120000|53760|1.120000|1920|0.040000|N/A|N/A|3840|502272|K_
+video|0|28|1.120000|28|1.120000|1|0.040000|N/A|N/A|13350|506880|__
+audio|1|55680|1.160000|55680|1.160000|1920|0.040000|N/A|N/A|3840|520704|K_
+video|0|25|1.000000|24|0.960000|1|0.040000|N/A|N/A|24801|7168|K_|1
 Strings Metadata
-audio|1|48000|1.000000|48000|1.000000|1920|0.040000|N/A|N/A|3840|31232|K_|1
+audio|1|48000|1.000000|48000|1.000000|1920|0.040000|N/A|N/A|3840|32256|K_|1
 Strings Metadata
-video|0|28|1.120000|25|1.000000|1|0.040000|N/A|N/A|16743|35840|__|1
+video|0|28|1.120000|25|1.000000|1|0.040000|N/A|N/A|16743|36864|__|1
 Strings Metadata
-audio|1|49920|1.040000|49920|1.040000|1920|0.040000|N/A|N/A|3840|52736|K_|1
+audio|1|49920|1.040000|49920|1.040000|1920|0.040000|N/A|N/A|3840|53760|K_|1
 Strings Metadata
-video|0|26|1.040000|26|1.040000|1|0.040000|N/A|N/A|13812|57344|__|1
+video|0|26|1.040000|26|1.040000|1|0.040000|N/A|N/A|13812|58368|__|1
 Strings Metadata
-audio|1|51840|1.080000|51840|1.080000|1920|0.040000|N/A|N/A|3840|71680|K_|1
+audio|1|51840|1.080000|51840|1.080000|1920|0.040000|N/A|N/A|3840|72704|K_|1
 Strings Metadata
-video|0|27|1.080000|27|1.080000|1|0.040000|N/A|N/A|13607|76288|__|1
+video|0|27|1.080000|27|1.080000|1|0.040000|N/A|N/A|13607|77312|__|1
 Strings Metadata
-audio|1|53760|1.120000|53760|1.120000|1920|0.040000|N/A|N/A|3840|90112|K_|1
+audio|1|53760|1.120000|53760|1.120000|1920|0.040000|N/A|N/A|3840|91136|K_|1
 Strings Metadata
-video|0|31|1.240000|28|1.120000|1|0.040000|N/A|N/A|16158|94720|__|1
+video|0|31|1.240000|28|1.120000|1|0.040000|N/A|N/A|16158|95744|__|1
 Strings Metadata
-audio|1|55680|1.160000|55680|1.160000|1920|0.040000|N/A|N/A|3840|111104|K_|1
+audio|1|55680|1.160000|55680|1.160000|1920|0.040000|N/A|N/A|3840|112128|K_|1
 Strings Metadata
-video|0|29|1.160000|29|1.160000|1|0.040000|N/A|N/A|13943|115712|__|1
+video|0|29|1.160000|29|1.160000|1|0.040000|N/A|N/A|13943|116736|__|1
 Strings Metadata
-audio|1|57600|1.200000|57600|1.200000|1920|0.040000|N/A|N/A|3840|130048|K_|1
+audio|1|57600|1.200000|57600|1.200000|1920|0.040000|N/A|N/A|3840|131072|K_|1
 Strings Metadata
-video|0|30|1.200000|30|1.200000|1|0.040000|N/A|N/A|11223|134656|__|1
+video|0|30|1.200000|30|1.200000|1|0.040000|N/A|N/A|11223|135680|__|1
 Strings Metadata
-audio|1|59520|1.240000|59520|1.240000|1920|0.040000|N/A|N/A|3840|145920|K_|1
+audio|1|59520|1.240000|59520|1.240000|1920|0.040000|N/A|N/A|3840|146944|K_|1
 Strings Metadata
-video|0|34|1.360000|31|1.240000|1|0.040000|N/A|N/A|20298|150528|__|1
+video|0|34|1.360000|31|1.240000|1|0.040000|N/A|N/A|20298|151552|__|1
 Strings Metadata
-audio|1|61440|1.280000|61440|1.280000|1920|0.040000|N/A|N/A|3840|171008|K_|1
+audio|1|61440|1.280000|61440|1.280000|1920|0.040000|N/A|N/A|3840|172032|K_|1
 Strings Metadata
-video|0|32|1.280000|32|1.280000|1|0.040000|N/A|N/A|13341|175616|__|1
+video|0|32|1.280000|32|1.280000|1|0.040000|N/A|N/A|13341|176640|__|1
 Strings Metadata
-audio|1|63360|1.320000|63360|1.320000|1920|0.040000|N/A|N/A|3840|189440|K_|1
+audio|1|63360|1.320000|63360|1.320000|1920|0.040000|N/A|N/A|3840|190464|K_|1
 Strings Metadata
-video|0|33|1.320000|33|1.320000|1|0.040000|N/A|N/A|12362|194048|__|1
+video|0|33|1.320000|33|1.320000|1|0.040000|N/A|N/A|12362|195072|__|1
 Strings Metadata
-audio|1|65280|1.360000|65280|1.360000|1920|0.040000|N/A|N/A|3840|206848|K_|1
+audio|1|65280|1.360000|65280|1.360000|1920|0.040000|N/A|N/A|3840|207872|K_|1
 Strings Metadata
-video|0|37|1.480000|34|1.360000|1|0.040000|N/A|N/A|24786|211456|K_|1
+video|0|37|1.480000|34|1.360000|1|0.040000|N/A|N/A|24786|212480|K_|1
 Strings Metadata
 0|mpeg2video|4|video|1/25|[0][0][0][0]|0x0000|352|288|0|0|1|1:1|11:9|yuv420p|8|tv|unknown|unknown|unknown|left|progressive|N/A|1|N/A|25/1|25/1|1/25|N/A|N/A|N/A|N/A|N/A|N/A|N/A|N/A|N/A|51|0|0|0|0|0|0|0|0|0|0|0|0|0x060A2B340101010501010D001300000000000000000000000000000000000001
 1|pcm_s16le|unknown|audio|1/48000|[0][0][0][0]|0x0000|s16|48000|1|unknown|16|N/A|0/0|0/0|1/48000|0|0.000000|N/A|N/A|768000|N/A|N/A|N/A|N/A|50|0|0|0|0|0|0|0|0|0|0|0|0|0x060A2B340101010501010D001300000000000000000000000000000000000001

diff --git a/tests/ref/fate/concat-demuxer-simple1-lavf-mxf_d10 b/tests/ref/fate/concat-demuxer-simple1-lavf-mxf_d10
index bd0e5e2..e83d1bf 100644
--- a/tests/ref/fate/concat-demuxer-simple1-lavf-mxf_d10
+++ b/tests/ref/fate/concat-demuxer-simple1-lavf-mxf_d10

@@ -1,82 +1,82 @@
-video|0|0|0.000000|0|0.000000|1|0.040000|N/A|N/A|150000|6144|K_
-audio|1|0|0.000000|0|0.000000|1920|0.040000|N/A|N/A|7680|156672|K_
-video|0|1|0.040000|1|0.040000|1|0.040000|N/A|N/A|150000|219136|K_
-audio|1|1920|0.040000|1920|0.040000|1920|0.040000|N/A|N/A|7680|369664|K_
-video|0|2|0.080000|2|0.080000|1|0.040000|N/A|N/A|150000|432128|K_
-audio|1|3840|0.080000|3840|0.080000|1920|0.040000|N/A|N/A|7680|582656|K_
-video|0|3|0.120000|3|0.120000|1|0.040000|N/A|N/A|150000|645120|K_
-audio|1|5760|0.120000|5760|0.120000|1920|0.040000|N/A|N/A|7680|795648|K_
-video|0|4|0.160000|4|0.160000|1|0.040000|N/A|N/A|150000|858112|K_
-audio|1|7680|0.160000|7680|0.160000|1920|0.040000|N/A|N/A|7680|1008640|K_
-video|0|5|0.200000|5|0.200000|1|0.040000|N/A|N/A|150000|1071104|K_
-audio|1|9600|0.200000|9600|0.200000|1920|0.040000|N/A|N/A|7680|1221632|K_
-video|0|6|0.240000|6|0.240000|1|0.040000|N/A|N/A|150000|1284096|K_
-audio|1|11520|0.240000|11520|0.240000|1920|0.040000|N/A|N/A|7680|1434624|K_
-video|0|7|0.280000|7|0.280000|1|0.040000|N/A|N/A|150000|1497088|K_
-audio|1|13440|0.280000|13440|0.280000|1920|0.040000|N/A|N/A|7680|1647616|K_
-video|0|8|0.320000|8|0.320000|1|0.040000|N/A|N/A|150000|1710080|K_
-audio|1|15360|0.320000|15360|0.320000|1920|0.040000|N/A|N/A|7680|1860608|K_
-video|0|9|0.360000|9|0.360000|1|0.040000|N/A|N/A|150000|1923072|K_
-audio|1|17280|0.360000|17280|0.360000|1920|0.040000|N/A|N/A|7680|2073600|K_
-video|0|10|0.400000|10|0.400000|1|0.040000|N/A|N/A|150000|2136064|K_
-audio|1|19200|0.400000|19200|0.400000|1920|0.040000|N/A|N/A|7680|2286592|K_
-video|0|11|0.440000|11|0.440000|1|0.040000|N/A|N/A|150000|2349056|K_
-audio|1|21120|0.440000|21120|0.440000|1920|0.040000|N/A|N/A|7680|2499584|K_
-video|0|12|0.480000|12|0.480000|1|0.040000|N/A|N/A|150000|2562048|K_
-audio|1|23040|0.480000|23040|0.480000|1920|0.040000|N/A|N/A|7680|2712576|K_
-video|0|13|0.520000|13|0.520000|1|0.040000|N/A|N/A|150000|2775040|K_
-audio|1|24960|0.520000|24960|0.520000|1920|0.040000|N/A|N/A|7680|2925568|K_
-video|0|14|0.560000|14|0.560000|1|0.040000|N/A|N/A|150000|2988032|K_
-audio|1|26880|0.560000|26880|0.560000|1920|0.040000|N/A|N/A|7680|3138560|K_
-video|0|15|0.600000|15|0.600000|1|0.040000|N/A|N/A|150000|3201024|K_
-audio|1|28800|0.600000|28800|0.600000|1920|0.040000|N/A|N/A|7680|3351552|K_
-video|0|16|0.640000|16|0.640000|1|0.040000|N/A|N/A|150000|3414016|K_
-audio|1|30720|0.640000|30720|0.640000|1920|0.040000|N/A|N/A|7680|3564544|K_
-video|0|17|0.680000|17|0.680000|1|0.040000|N/A|N/A|150000|3627008|K_
-audio|1|32640|0.680000|32640|0.680000|1920|0.040000|N/A|N/A|7680|3777536|K_
-video|0|18|0.720000|18|0.720000|1|0.040000|N/A|N/A|150000|3840000|K_
-audio|1|34560|0.720000|34560|0.720000|1920|0.040000|N/A|N/A|7680|3990528|K_
-video|0|19|0.760000|19|0.760000|1|0.040000|N/A|N/A|150000|4052992|K_
-audio|1|36480|0.760000|36480|0.760000|1920|0.040000|N/A|N/A|7680|4203520|K_
-video|0|20|0.800000|20|0.800000|1|0.040000|N/A|N/A|150000|4265984|K_
-audio|1|38400|0.800000|38400|0.800000|1920|0.040000|N/A|N/A|7680|4416512|K_
-video|0|21|0.840000|21|0.840000|1|0.040000|N/A|N/A|150000|4478976|K_
-audio|1|40320|0.840000|40320|0.840000|1920|0.040000|N/A|N/A|7680|4629504|K_
-video|0|22|0.880000|22|0.880000|1|0.040000|N/A|N/A|150000|4691968|K_
-audio|1|42240|0.880000|42240|0.880000|1920|0.040000|N/A|N/A|7680|4842496|K_
-video|0|23|0.920000|23|0.920000|1|0.040000|N/A|N/A|150000|4904960|K_
-audio|1|44160|0.920000|44160|0.920000|1920|0.040000|N/A|N/A|7680|5055488|K_
-video|0|24|0.960000|24|0.960000|1|0.040000|N/A|N/A|150000|5117952|K_
-audio|1|46080|0.960000|46080|0.960000|1920|0.040000|N/A|N/A|7680|5268480|K_
-video|0|25|1.000000|25|1.000000|1|0.040000|N/A|N/A|150000|4265984|K_
-audio|1|48000|1.000000|48000|1.000000|1920|0.040000|N/A|N/A|7680|4416512|K_
-video|0|26|1.040000|26|1.040000|1|0.040000|N/A|N/A|150000|4478976|K_
-audio|1|49920|1.040000|49920|1.040000|1920|0.040000|N/A|N/A|7680|4629504|K_
-video|0|27|1.080000|27|1.080000|1|0.040000|N/A|N/A|150000|4691968|K_
-audio|1|51840|1.080000|51840|1.080000|1920|0.040000|N/A|N/A|7680|4842496|K_
-video|0|28|1.120000|28|1.120000|1|0.040000|N/A|N/A|150000|4904960|K_
-audio|1|53760|1.120000|53760|1.120000|1920|0.040000|N/A|N/A|7680|5055488|K_
-video|0|29|1.160000|29|1.160000|1|0.040000|N/A|N/A|150000|5117952|K_
-audio|1|55680|1.160000|55680|1.160000|1920|0.040000|N/A|N/A|7680|5268480|K_
-video|0|30|1.200000|30|1.200000|1|0.040000|N/A|N/A|150000|1071104|K_|1
+video|0|0|0.000000|0|0.000000|1|0.040000|N/A|N/A|150000|7168|K_
+audio|1|0|0.000000|0|0.000000|1920|0.040000|N/A|N/A|7680|157696|K_
+video|0|1|0.040000|1|0.040000|1|0.040000|N/A|N/A|150000|220160|K_
+audio|1|1920|0.040000|1920|0.040000|1920|0.040000|N/A|N/A|7680|370688|K_
+video|0|2|0.080000|2|0.080000|1|0.040000|N/A|N/A|150000|433152|K_
+audio|1|3840|0.080000|3840|0.080000|1920|0.040000|N/A|N/A|7680|583680|K_
+video|0|3|0.120000|3|0.120000|1|0.040000|N/A|N/A|150000|646144|K_
+audio|1|5760|0.120000|5760|0.120000|1920|0.040000|N/A|N/A|7680|796672|K_
+video|0|4|0.160000|4|0.160000|1|0.040000|N/A|N/A|150000|859136|K_
+audio|1|7680|0.160000|7680|0.160000|1920|0.040000|N/A|N/A|7680|1009664|K_
+video|0|5|0.200000|5|0.200000|1|0.040000|N/A|N/A|150000|1072128|K_
+audio|1|9600|0.200000|9600|0.200000|1920|0.040000|N/A|N/A|7680|1222656|K_
+video|0|6|0.240000|6|0.240000|1|0.040000|N/A|N/A|150000|1285120|K_
+audio|1|11520|0.240000|11520|0.240000|1920|0.040000|N/A|N/A|7680|1435648|K_
+video|0|7|0.280000|7|0.280000|1|0.040000|N/A|N/A|150000|1498112|K_
+audio|1|13440|0.280000|13440|0.280000|1920|0.040000|N/A|N/A|7680|1648640|K_
+video|0|8|0.320000|8|0.320000|1|0.040000|N/A|N/A|150000|1711104|K_
+audio|1|15360|0.320000|15360|0.320000|1920|0.040000|N/A|N/A|7680|1861632|K_
+video|0|9|0.360000|9|0.360000|1|0.040000|N/A|N/A|150000|1924096|K_
+audio|1|17280|0.360000|17280|0.360000|1920|0.040000|N/A|N/A|7680|2074624|K_
+video|0|10|0.400000|10|0.400000|1|0.040000|N/A|N/A|150000|2137088|K_
+audio|1|19200|0.400000|19200|0.400000|1920|0.040000|N/A|N/A|7680|2287616|K_
+video|0|11|0.440000|11|0.440000|1|0.040000|N/A|N/A|150000|2350080|K_
+audio|1|21120|0.440000|21120|0.440000|1920|0.040000|N/A|N/A|7680|2500608|K_
+video|0|12|0.480000|12|0.480000|1|0.040000|N/A|N/A|150000|2563072|K_
+audio|1|23040|0.480000|23040|0.480000|1920|0.040000|N/A|N/A|7680|2713600|K_
+video|0|13|0.520000|13|0.520000|1|0.040000|N/A|N/A|150000|2776064|K_
+audio|1|24960|0.520000|24960|0.520000|1920|0.040000|N/A|N/A|7680|2926592|K_
+video|0|14|0.560000|14|0.560000|1|0.040000|N/A|N/A|150000|2989056|K_
+audio|1|26880|0.560000|26880|0.560000|1920|0.040000|N/A|N/A|7680|3139584|K_
+video|0|15|0.600000|15|0.600000|1|0.040000|N/A|N/A|150000|3202048|K_
+audio|1|28800|0.600000|28800|0.600000|1920|0.040000|N/A|N/A|7680|3352576|K_
+video|0|16|0.640000|16|0.640000|1|0.040000|N/A|N/A|150000|3415040|K_
+audio|1|30720|0.640000|30720|0.640000|1920|0.040000|N/A|N/A|7680|3565568|K_
+video|0|17|0.680000|17|0.680000|1|0.040000|N/A|N/A|150000|3628032|K_
+audio|1|32640|0.680000|32640|0.680000|1920|0.040000|N/A|N/A|7680|3778560|K_
+video|0|18|0.720000|18|0.720000|1|0.040000|N/A|N/A|150000|3841024|K_
+audio|1|34560|0.720000|34560|0.720000|1920|0.040000|N/A|N/A|7680|3991552|K_
+video|0|19|0.760000|19|0.760000|1|0.040000|N/A|N/A|150000|4054016|K_
+audio|1|36480|0.760000|36480|0.760000|1920|0.040000|N/A|N/A|7680|4204544|K_
+video|0|20|0.800000|20|0.800000|1|0.040000|N/A|N/A|150000|4267008|K_
+audio|1|38400|0.800000|38400|0.800000|1920|0.040000|N/A|N/A|7680|4417536|K_
+video|0|21|0.840000|21|0.840000|1|0.040000|N/A|N/A|150000|4480000|K_
+audio|1|40320|0.840000|40320|0.840000|1920|0.040000|N/A|N/A|7680|4630528|K_
+video|0|22|0.880000|22|0.880000|1|0.040000|N/A|N/A|150000|4692992|K_
+audio|1|42240|0.880000|42240|0.880000|1920|0.040000|N/A|N/A|7680|4843520|K_
+video|0|23|0.920000|23|0.920000|1|0.040000|N/A|N/A|150000|4905984|K_
+audio|1|44160|0.920000|44160|0.920000|1920|0.040000|N/A|N/A|7680|5056512|K_
+video|0|24|0.960000|24|0.960000|1|0.040000|N/A|N/A|150000|5118976|K_
+audio|1|46080|0.960000|46080|0.960000|1920|0.040000|N/A|N/A|7680|5269504|K_
+video|0|25|1.000000|25|1.000000|1|0.040000|N/A|N/A|150000|4267008|K_
+audio|1|48000|1.000000|48000|1.000000|1920|0.040000|N/A|N/A|7680|4417536|K_
+video|0|26|1.040000|26|1.040000|1|0.040000|N/A|N/A|150000|4480000|K_
+audio|1|49920|1.040000|49920|1.040000|1920|0.040000|N/A|N/A|7680|4630528|K_
+video|0|27|1.080000|27|1.080000|1|0.040000|N/A|N/A|150000|4692992|K_
+audio|1|51840|1.080000|51840|1.080000|1920|0.040000|N/A|N/A|7680|4843520|K_
+video|0|28|1.120000|28|1.120000|1|0.040000|N/A|N/A|150000|4905984|K_
+audio|1|53760|1.120000|53760|1.120000|1920|0.040000|N/A|N/A|7680|5056512|K_
+video|0|29|1.160000|29|1.160000|1|0.040000|N/A|N/A|150000|5118976|K_
+audio|1|55680|1.160000|55680|1.160000|1920|0.040000|N/A|N/A|7680|5269504|K_
+video|0|30|1.200000|30|1.200000|1|0.040000|N/A|N/A|150000|1072128|K_|1
 Strings Metadata
-audio|1|57600|1.200000|57600|1.200000|1920|0.040000|N/A|N/A|7680|1221632|K_|1
+audio|1|57600|1.200000|57600|1.200000|1920|0.040000|N/A|N/A|7680|1222656|K_|1
 Strings Metadata
-video|0|31|1.240000|31|1.240000|1|0.040000|N/A|N/A|150000|1284096|K_|1
+video|0|31|1.240000|31|1.240000|1|0.040000|N/A|N/A|150000|1285120|K_|1
 Strings Metadata
-audio|1|59520|1.240000|59520|1.240000|1920|0.040000|N/A|N/A|7680|1434624|K_|1
+audio|1|59520|1.240000|59520|1.240000|1920|0.040000|N/A|N/A|7680|1435648|K_|1
 Strings Metadata
-video|0|32|1.280000|32|1.280000|1|0.040000|N/A|N/A|150000|1497088|K_|1
+video|0|32|1.280000|32|1.280000|1|0.040000|N/A|N/A|150000|1498112|K_|1
 Strings Metadata
-audio|1|61440|1.280000|61440|1.280000|1920|0.040000|N/A|N/A|7680|1647616|K_|1
+audio|1|61440|1.280000|61440|1.280000|1920|0.040000|N/A|N/A|7680|1648640|K_|1
 Strings Metadata
-video|0|33|1.320000|33|1.320000|1|0.040000|N/A|N/A|150000|1710080|K_|1
+video|0|33|1.320000|33|1.320000|1|0.040000|N/A|N/A|150000|1711104|K_|1
 Strings Metadata
-audio|1|63360|1.320000|63360|1.320000|1920|0.040000|N/A|N/A|7680|1860608|K_|1
+audio|1|63360|1.320000|63360|1.320000|1920|0.040000|N/A|N/A|7680|1861632|K_|1
 Strings Metadata
-video|0|34|1.360000|34|1.360000|1|0.040000|N/A|N/A|150000|1923072|K_|1
+video|0|34|1.360000|34|1.360000|1|0.040000|N/A|N/A|150000|1924096|K_|1
 Strings Metadata
-audio|1|65280|1.360000|65280|1.360000|1920|0.040000|N/A|N/A|7680|2073600|K_|1
+audio|1|65280|1.360000|65280|1.360000|1920|0.040000|N/A|N/A|7680|2074624|K_|1
 Strings Metadata
 0|mpeg2video|0|video|1/25|[0][0][0][0]|0x0000|720|608|0|0|0|1:1|45:38|yuv422p|5|tv|unknown|unknown|unknown|topleft|tt|N/A|1|N/A|25/1|25/1|1/25|0|0.000000|N/A|N/A|30000000|N/A|N/A|N/A|N/A|35|0|0|0|0|0|0|0|0|0|0|0|0|0x060A2B340101010501010D001300000000000000000000000000000000000001
 1|pcm_s16le|unknown|audio|1/48000|[0][0][0][0]|0x0000|s16|48000|2|unknown|16|N/A|0/0|0/0|1/48000|0|0.000000|N/A|N/A|1536000|N/A|N/A|N/A|N/A|35|0|0|0|0|0|0|0|0|0|0|0|0|0x060A2B340101010501010D001300000000000000000000000000000000000001

diff --git a/tests/ref/fate/copy-psp b/tests/ref/fate/copy-psp
index 81eb172..44ec461 100644
--- a/tests/ref/fate/copy-psp
+++ b/tests/ref/fate/copy-psp

@@ -1,5 +1,5 @@
-cada61453a2483ef8ba1fb82c8bbff25 *tests/data/fate/copy-psp.psp
-2041433 tests/data/fate/copy-psp.psp
+65a177552e03123c9a62ddb942970d05 *tests/data/fate/copy-psp.psp
+2041445 tests/data/fate/copy-psp.psp
 #extradata 0:       51, 0xaf6d1012
 #extradata 1:        2, 0x00b200a1
 #tb 0: 1/90000

diff --git a/tests/ref/fate/copy-trac2211-avi b/tests/ref/fate/copy-trac2211-avi
index 007349e..06d81e5 100644
--- a/tests/ref/fate/copy-trac2211-avi
+++ b/tests/ref/fate/copy-trac2211-avi

@@ -1,5 +1,5 @@
-6f6b211cbc8de9871e8e09e64048e2f9 *tests/data/fate/copy-trac2211-avi.avi
-1777924 tests/data/fate/copy-trac2211-avi.avi
+0920978f3f8196413c43f0033b55a5b6 *tests/data/fate/copy-trac2211-avi.avi
+1777956 tests/data/fate/copy-trac2211-avi.avi
 #tb 0: 1/14
 #media_type 0: video
 #codec_id 0: rawvideo

diff --git a/tests/ref/fate/copy-trac236 b/tests/ref/fate/copy-trac236
index c5240ca..2ac05e6 100644
--- a/tests/ref/fate/copy-trac236
+++ b/tests/ref/fate/copy-trac236

@@ -1,5 +1,5 @@
-d6e3d97b522ce881ed29c5da74cc7e63 *tests/data/fate/copy-trac236.mov
-630810 tests/data/fate/copy-trac236.mov
+959a4d78c6c11936e361fc3101a013eb *tests/data/fate/copy-trac236.mov
+630860 tests/data/fate/copy-trac236.mov
 #tb 0: 100/2997
 #media_type 0: video
 #codec_id 0: rawvideo

diff --git a/tests/ref/fate/copy-trac4914 b/tests/ref/fate/copy-trac4914
index e0864a0..7c8d0e9 100644
--- a/tests/ref/fate/copy-trac4914
+++ b/tests/ref/fate/copy-trac4914

@@ -1,5 +1,5 @@
-d51f6bcc96885a2ce8517ae8c774f610 *tests/data/fate/copy-trac4914.mxf
-560697 tests/data/fate/copy-trac4914.mxf
+b37c4d5693cdb5b9ed9b33501ffb682a *tests/data/fate/copy-trac4914.mxf
+561721 tests/data/fate/copy-trac4914.mxf
 #tb 0: 1001/30000
 #media_type 0: video
 #codec_id 0: rawvideo

diff --git a/tests/ref/fate/crc b/tests/ref/fate/crc
index 8aa0dd2..7b59a3c 100644
--- a/tests/ref/fate/crc
+++ b/tests/ref/fate/crc

@@ -4,3 +4,4 @@
 crc 0000A001 = BFD8
 crc 00008005 = BB1F
 crc 00000007 = E3
+crc 0000001D = D6

diff --git a/tests/ref/fate/exif-image-embedded b/tests/ref/fate/exif-image-embedded
index 306ae08..392c145 100644
--- a/tests/ref/fate/exif-image-embedded
+++ b/tests/ref/fate/exif-image-embedded

@@ -29,6 +29,12 @@
 chroma_location=center
 TAG:UserComment=AppleMark
 
+[SIDE_DATA]
+side_data_type=QP table data
+[/SIDE_DATA]
+[SIDE_DATA]
+side_data_type=QP table properties
+[/SIDE_DATA]
 [/FRAME]
 [FRAME]
 media_type=audio
@@ -44,7 +50,7 @@
 pkt_duration_time=0.001066
 pkt_pos=16292
 pkt_size=417
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=47
 channels=2
 channel_layout=stereo
@@ -63,7 +69,7 @@
 pkt_duration_time=0.026122
 pkt_pos=16709
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -82,7 +88,7 @@
 pkt_duration_time=0.026122
 pkt_pos=17127
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -101,7 +107,7 @@
 pkt_duration_time=0.026122
 pkt_pos=17545
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -120,7 +126,7 @@
 pkt_duration_time=0.026122
 pkt_pos=17963
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -139,7 +145,7 @@
 pkt_duration_time=0.026122
 pkt_pos=18381
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -158,7 +164,7 @@
 pkt_duration_time=0.026122
 pkt_pos=18799
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -177,7 +183,7 @@
 pkt_duration_time=0.026122
 pkt_pos=19217
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -196,7 +202,7 @@
 pkt_duration_time=0.026122
 pkt_pos=19635
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -215,7 +221,7 @@
 pkt_duration_time=0.026122
 pkt_pos=20053
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -234,7 +240,7 @@
 pkt_duration_time=0.026122
 pkt_pos=20471
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -253,7 +259,7 @@
 pkt_duration_time=0.026122
 pkt_pos=20889
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -272,7 +278,7 @@
 pkt_duration_time=0.026122
 pkt_pos=21307
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -291,7 +297,7 @@
 pkt_duration_time=0.026122
 pkt_pos=21725
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -310,7 +316,7 @@
 pkt_duration_time=0.026122
 pkt_pos=22143
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -329,7 +335,7 @@
 pkt_duration_time=0.026122
 pkt_pos=22561
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -348,7 +354,7 @@
 pkt_duration_time=0.026122
 pkt_pos=22979
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -367,7 +373,7 @@
 pkt_duration_time=0.026122
 pkt_pos=23397
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -386,7 +392,7 @@
 pkt_duration_time=0.026122
 pkt_pos=23815
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -405,7 +411,7 @@
 pkt_duration_time=0.026122
 pkt_pos=24233
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo
@@ -424,7 +430,7 @@
 pkt_duration_time=0.026122
 pkt_pos=24651
 pkt_size=418
-sample_fmt=s16p
+sample_fmt=fltp
 nb_samples=1152
 channels=2
 channel_layout=stereo

diff --git a/tests/ref/fate/exif-image-jpg b/tests/ref/fate/exif-image-jpg
index b266501..eb18ded 100644
--- a/tests/ref/fate/exif-image-jpg
+++ b/tests/ref/fate/exif-image-jpg

@@ -229,4 +229,10 @@
 TAG:WhiteBalance=    0
 TAG:DigitalZoomRatio=   4000:4000
 TAG:SceneCaptureType=    0
+[SIDE_DATA]
+side_data_type=QP table data
+[/SIDE_DATA]
+[SIDE_DATA]
+side_data_type=QP table properties
+[/SIDE_DATA]
 [/FRAME]

diff --git a/tests/ref/fate/exr-rgba-zip16-16x32-flag4 b/tests/ref/fate/exr-rgba-zip16-16x32-flag4
new file mode 100644
index 0000000..e34aa71
--- /dev/null
+++ b/tests/ref/fate/exr-rgba-zip16-16x32-flag4

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 16x32
+#sar 0: 1/1
+0,          0,          0,        1,     4096, 0xf90ab1e9

diff --git a/tests/ref/fate/ffmpeg-attached_pics b/tests/ref/fate/ffmpeg-attached_pics
new file mode 100644
index 0000000..ee2f206
--- /dev/null
+++ b/tests/ref/fate/ffmpeg-attached_pics

@@ -0,0 +1,140 @@
+#tb 0: 1/90000
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 200x200
+#sar 0: 2834/2834
+#tb 1: 1/44100
+#media_type 1: audio
+#codec_id 1: pcm_s16le
+#sample_rate 1: 44100
+#channel_layout 1: 3
+#channel_layout_name 1: stereo
+0,          0,          0,        0,   120000, 0x748cc771
+1,          0,          0,     4096,    16384, 0x00000000
+1,       4096,       4096,     4096,    16384, 0x29cd639d
+1,       8192,       8192,     4096,    16384, 0xd52066e5
+1,      12288,      12288,     4096,    16384, 0x0c933408
+1,      16384,      16384,     4096,    16384, 0xb3b97675
+1,      20480,      20480,     4096,    16384, 0x1cb4a26b
+1,      24576,      24576,     4096,    16384, 0x6bf693b9
+1,      28672,      28672,     4096,    16384, 0x12896c95
+1,      32768,      32768,     4096,    16384, 0x48b7167a
+1,      36864,      36864,     4096,    16384, 0x5a9c06ad
+1,      40960,      40960,     4096,    16384, 0x42fa8e65
+1,      45056,      45056,     4096,    16384, 0x175e6a61
+1,      49152,      49152,     4096,    16384, 0x3cd0e606
+1,      53248,      53248,     4096,    16384, 0x5ceff67d
+1,      57344,      57344,     4096,    16384, 0xa24ba733
+1,      61440,      61440,     4096,    16384, 0xafe61a2d
+1,      65536,      65536,     4096,    16384, 0xef8355f1
+1,      69632,      69632,     4096,    16384, 0x8e53cb7c
+1,      73728,      73728,     4096,    16384, 0x461bb940
+1,      77824,      77824,     4096,    16384, 0x96bb6ebd
+1,      81920,      81920,     4096,    16384, 0xb99977dc
+1,      86016,      86016,     4096,    16384, 0xa6b6a178
+1,      90112,      90112,     4096,    16384, 0x7bdc3e50
+1,      94208,      94208,     4096,    16384, 0x44cda519
+1,      98304,      98304,     4096,    16384, 0xbdfd2e72
+1,     102400,     102400,     4096,    16384, 0x5c09dc3c
+1,     106496,     106496,     4096,    16384, 0xe5aabb75
+1,     110592,     110592,     4096,    16384, 0x5db83fb2
+1,     114688,     114688,     4096,    16384, 0x69a6e3c0
+1,     118784,     118784,     4096,    16384, 0x0c09a90d
+1,     122880,     122880,     4096,    16384, 0x978d2b50
+1,     126976,     126976,     4096,    16384, 0x9707fbaa
+1,     131072,     131072,     4096,    16384, 0x8003f93b
+1,     135168,     135168,     4096,    16384, 0xaa12ab0c
+1,     139264,     139264,     4096,    16384, 0x49980501
+1,     143360,     143360,     4096,    16384, 0xdcb891db
+1,     147456,     147456,     4096,    16384, 0x52b76938
+1,     151552,     151552,     4096,    16384, 0x7cb55457
+1,     155648,     155648,     4096,    16384, 0x6b08b7d2
+1,     159744,     159744,     4096,    16384, 0xb56bb312
+1,     163840,     163840,     4096,    16384, 0xcbf9d3e0
+1,     167936,     167936,     4096,    16384, 0xe8958c07
+1,     172032,     172032,     4096,    16384, 0x054ac021
+1,     176128,     176128,     4096,    16384, 0x36811603
+1,     180224,     180224,     4096,    16384, 0x3354f6e1
+1,     184320,     184320,     4096,    16384, 0xa6c11686
+1,     188416,     188416,     4096,    16384, 0xed353877
+1,     192512,     192512,     4096,    16384, 0xef21373e
+1,     196608,     196608,     4096,    16384, 0x31c806d9
+1,     200704,     200704,     4096,    16384, 0x3c1c79d4
+1,     204800,     204800,     4096,    16384, 0x1b7b3d9a
+1,     208896,     208896,     4096,    16384, 0x08977239
+1,     212992,     212992,     4096,    16384, 0x07f9d169
+1,     217088,     217088,     4096,    16384, 0xa66ae19a
+1,     221184,     221184,     4096,    16384, 0x42f51169
+1,     225280,     225280,     4096,    16384, 0x98ff59b6
+1,     229376,     229376,     4096,    16384, 0x855216b9
+1,     233472,     233472,     4096,    16384, 0x0986573d
+1,     237568,     237568,     4096,    16384, 0x060aeffe
+1,     241664,     241664,     4096,    16384, 0x391c19bc
+1,     245760,     245760,     4096,    16384, 0x9939c472
+1,     249856,     249856,     4096,    16384, 0x4e0d31c5
+1,     253952,     253952,     4096,    16384, 0xed2678a6
+1,     258048,     258048,     4096,    16384, 0xfd899fc3
+1,     262144,     262144,     4096,    16384, 0x35cf5263
+1,     266240,     266240,     4096,    16384, 0xa2e35dad
+1,     270336,     270336,     4096,    16384, 0xf9ed08a0
+1,     274432,     274432,     4096,    16384, 0x022d9356
+1,     278528,     278528,     4096,    16384, 0x508042f7
+1,     282624,     282624,     4096,    16384, 0xe2e7e70b
+1,     286720,     286720,     4096,    16384, 0x30812bfd
+1,     290816,     290816,     4096,    16384, 0x5590ea7d
+1,     294912,     294912,     4096,    16384, 0xebaa4fc4
+1,     299008,     299008,     4096,    16384, 0x731cee53
+1,     303104,     303104,     4096,    16384, 0x1127b480
+1,     307200,     307200,     4096,    16384, 0x0809f7c8
+1,     311296,     311296,     4096,    16384, 0xc0d4256f
+1,     315392,     315392,     4096,    16384, 0xe868795c
+1,     319488,     319488,     4096,    16384, 0x801a77d1
+1,     323584,     323584,     4096,    16384, 0x1d44bed5
+1,     327680,     327680,     4096,    16384, 0x7619f16b
+1,     331776,     331776,     4096,    16384, 0x301064b6
+1,     335872,     335872,     4096,    16384, 0x42f3e0fb
+1,     339968,     339968,     4096,    16384, 0xfe186dc6
+1,     344064,     344064,     4096,    16384, 0x1a9bbbab
+1,     348160,     348160,     4096,    16384, 0x3c4e00a8
+1,     352256,     352256,     4096,    16384, 0x3101c84e
+1,     356352,     356352,     4096,    16384, 0x11a6c764
+1,     360448,     360448,     4096,    16384, 0xb75e82a1
+1,     364544,     364544,     4096,    16384, 0x81e3b3dd
+1,     368640,     368640,     4096,    16384, 0x2656fc8e
+1,     372736,     372736,     4096,    16384, 0x6c655f40
+1,     376832,     376832,     4096,    16384, 0xba0432f9
+1,     380928,     380928,     4096,    16384, 0x7028ee57
+1,     385024,     385024,     4096,    16384, 0x16baf6ed
+1,     389120,     389120,     4096,    16384, 0x863bcff5
+1,     393216,     393216,     4096,    16384, 0x4dbce87e
+1,     397312,     397312,     4096,    16384, 0x825e268c
+1,     401408,     401408,     4096,    16384, 0xfe269f0a
+1,     405504,     405504,     4096,    16384, 0x47b9c0ef
+1,     409600,     409600,     4096,    16384, 0xbbe55aac
+1,     413696,     413696,     4096,    16384, 0xeb0674a7
+1,     417792,     417792,     4096,    16384, 0x01afba1b
+1,     421888,     421888,     4096,    16384, 0x5ec18306
+1,     425984,     425984,     4096,    16384, 0x6d0b844f
+1,     430080,     430080,     4096,    16384, 0x6cd1bea2
+1,     434176,     434176,     4096,    16384, 0x97e47cbb
+1,     438272,     438272,     4096,    16384, 0xbb6bf554
+1,     442368,     442368,     4096,    16384, 0x33ea7961
+1,     446464,     446464,     4096,    16384, 0x83ce2f2a
+1,     450560,     450560,     4096,    16384, 0x3bed9e0d
+1,     454656,     454656,     4096,    16384, 0xd3a9570a
+1,     458752,     458752,     4096,    16384, 0x4d5e1aca
+1,     462848,     462848,     4096,    16384, 0x874a9b11
+1,     466944,     466944,     4096,    16384, 0xe51061d8
+1,     471040,     471040,     4096,    16384, 0x3582fac4
+1,     475136,     475136,     4096,    16384, 0x35df558e
+1,     479232,     479232,     4096,    16384, 0xe2485fed
+1,     483328,     483328,     4096,    16384, 0x31f9c6a7
+1,     487424,     487424,     4096,    16384, 0x0a82b244
+1,     491520,     491520,     4096,    16384, 0xfbb428f4
+1,     495616,     495616,     4096,    16384, 0x57b90bb6
+1,     499712,     499712,     4096,    16384, 0x5c6daa1a
+1,     503808,     503808,     4096,    16384, 0xe02ac113
+1,     507904,     507904,     4096,    16384, 0x47ed59b6
+1,     512000,     512000,     4096,    16384, 0x220e4bd3
+1,     516096,     516096,     4096,    16384, 0x65de48b1
+1,     520192,     520192,     4085,    16340, 0x326fa751

diff --git a/tests/ref/fate/ffmpeg-filter_complex_audio b/tests/ref/fate/ffmpeg-filter_complex_audio
new file mode 100644
index 0000000..c424675
--- /dev/null
+++ b/tests/ref/fate/ffmpeg-filter_complex_audio

@@ -0,0 +1,10 @@
+#tb 0: 1/44100
+#media_type 0: audio
+#codec_id 0: ac3
+#sample_rate 0: 44100
+#channel_layout 0: 4
+#channel_layout_name 0: mono
+0,       -256,       -256,     1536,      416, 0x3001fb2d
+0,       1280,       1280,     1536,      418, 0xba72fc16
+0,       2816,       2816,     1536,      418, 0xba72fc16
+0,       4352,       4352,     1536,      418, 0xba72fc16

diff --git a/tests/ref/fate/fifo-muxer-tst b/tests/ref/fate/fifo-muxer-tst
index ca7e294..e1139ee 100644
--- a/tests/ref/fate/fifo-muxer-tst
+++ b/tests/ref/fate/fifo-muxer-tst

@@ -2,7 +2,6 @@
 pts seen nr: 15
 pts seen: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
 nonfail test: ok
-write header error test: ok
 recovery test: ok
 flush count: 1
 pts seen nr: 15

diff --git a/tests/ref/fate/filter-acrossfade b/tests/ref/fate/filter-acrossfade
index 0567b02..8d524c4 100644
--- a/tests/ref/fate/filter-acrossfade
+++ b/tests/ref/fate/filter-acrossfade

@@ -178,7 +178,8 @@
 0,     174968,     174968,     1024,     4096, 0x74ffeeae
 0,     175992,     175992,      408,     1632, 0x28353dae
 0,     176400,     176400,    88200,   352800, 0x4fb492af
-0,     264600,     264600,     1912,     7648, 0xf0c93a5a
+0,     264600,     264600,      888,     3552, 0xa5a41b07
+0,     265488,     265488,     1024,     4096, 0xc1c61f53
 0,     266512,     266512,     1024,     4096, 0x35b6d595
 0,     267536,     267536,     1024,     4096, 0xc4481118
 0,     268560,     268560,     1024,     4096, 0x5dc0b58d

diff --git a/tests/ref/fate/filter-curves b/tests/ref/fate/filter-curves
index 401bd2e..fd427e2 100644
--- a/tests/ref/fate/filter-curves
+++ b/tests/ref/fate/filter-curves

@@ -3,8 +3,8 @@
 #codec_id 0: rawvideo
 #dimensions 0: 640x480
 #sar 0: 0/1
-0,          0,          0,        1,   921600, 0xcf426780
-0,          1,          1,        1,   921600, 0x7642892d
-0,          2,          2,        1,   921600, 0x13c1ab7e
-0,          3,          3,        1,   921600, 0x3eca04bf
-0,          4,          4,        1,   921600, 0x61539162
+0,          0,          0,        1,   921600, 0x3ed36780
+0,          1,          1,        1,   921600, 0x7dbd892d
+0,          2,          2,        1,   921600, 0x0894ab7e
+0,          3,          3,        1,   921600, 0x471004bf
+0,          4,          4,        1,   921600, 0x79c79162

diff --git a/tests/ref/fate/filter-formats b/tests/ref/fate/filter-formats
index ea85eed..17ff5b2 100644
--- a/tests/ref/fate/filter-formats
+++ b/tests/ref/fate/filter-formats

@@ -75,7 +75,7 @@
 0 = ff_parse_channel_layout(0000000000000004,  1, 1c);
 0 = ff_parse_channel_layout(0000000000000003,  2, 2c);
 -1 = ff_parse_channel_layout(FFFFFFFFFFFFFFFF, -1, -1c);
-0 = ff_parse_channel_layout(0000000000000000, 60, 60c);
+-1 = ff_parse_channel_layout(FFFFFFFFFFFFFFFF, -1, 60c);
 -1 = ff_parse_channel_layout(FFFFFFFFFFFFFFFF, -1, 65c);
 0 = ff_parse_channel_layout(0000000000000000,  2, 2C);
 0 = ff_parse_channel_layout(0000000000000000, 60, 60C);

diff --git a/tests/ref/fate/filter-fps-down b/tests/ref/fate/filter-fps-down
new file mode 100644
index 0000000..eb8b368
--- /dev/null
+++ b/tests/ref/fate/filter-fps-down

@@ -0,0 +1,15 @@
+#tb 0: 1/3
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+0,          0,          0,        1,   115200, 0x0c1062d6
+0,          1,          1,        1,   115200, 0x278d887e
+0,          2,          2,        1,   115200, 0x75e1a17b
+0,          3,          3,        1,   115200, 0x686b77e7
+0,          4,          4,        1,   115200, 0x1fc2d693
+0,          5,          5,        1,   115200, 0x2d0ba5a4
+0,          6,          6,        1,   115200, 0x40426f99
+0,          7,          7,        1,   115200, 0xc705ccd9
+0,          8,          8,        1,   115200, 0x5635daa5
+0,          9,          9,        1,   115200, 0x7161ef8f

diff --git a/tests/ref/fate/filter-fps-down-eof-pass b/tests/ref/fate/filter-fps-down-eof-pass
new file mode 100644
index 0000000..0b6725f
--- /dev/null
+++ b/tests/ref/fate/filter-fps-down-eof-pass

@@ -0,0 +1,16 @@
+#tb 0: 1/3
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+0,          0,          0,        1,   115200, 0x0c1062d6
+0,          1,          1,        1,   115200, 0x278d887e
+0,          2,          2,        1,   115200, 0x75e1a17b
+0,          3,          3,        1,   115200, 0x686b77e7
+0,          4,          4,        1,   115200, 0x1fc2d693
+0,          5,          5,        1,   115200, 0x2d0ba5a4
+0,          6,          6,        1,   115200, 0x40426f99
+0,          7,          7,        1,   115200, 0xc705ccd9
+0,          8,          8,        1,   115200, 0x5635daa5
+0,          9,          9,        1,   115200, 0x7161ef8f
+0,         10,         10,        1,   115200, 0xccf02fed

diff --git a/tests/ref/fate/filter-fps-down-round-down b/tests/ref/fate/filter-fps-down-round-down
new file mode 100644
index 0000000..4440539
--- /dev/null
+++ b/tests/ref/fate/filter-fps-down-round-down

@@ -0,0 +1,15 @@
+#tb 0: 1/3
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+0,          0,          0,        1,   115200, 0x201b9db1
+0,          1,          1,        1,   115200, 0x309b9c06
+0,          2,          2,        1,   115200, 0xa14e9aca
+0,          3,          3,        1,   115200, 0x02b6ab21
+0,          4,          4,        1,   115200, 0x296dd4a5
+0,          5,          5,        1,   115200, 0x59e85f83
+0,          6,          6,        1,   115200, 0xf040bf35
+0,          7,          7,        1,   115200, 0xa76dcd9d
+0,          8,          8,        1,   115200, 0x3af5d306
+0,          9,          9,        1,   115200, 0xc8ce7fb1

diff --git a/tests/ref/fate/filter-fps-down-round-up b/tests/ref/fate/filter-fps-down-round-up
new file mode 100644
index 0000000..c3cf02f
--- /dev/null
+++ b/tests/ref/fate/filter-fps-down-round-up

@@ -0,0 +1,16 @@
+#tb 0: 1/3
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+0,          0,          0,        1,   115200, 0x3744b3ed
+0,          1,          1,        1,   115200, 0x201b9db1
+0,          2,          2,        1,   115200, 0x309b9c06
+0,          3,          3,        1,   115200, 0xb73857e2
+0,          4,          4,        1,   115200, 0x02b6ab21
+0,          5,          5,        1,   115200, 0x296dd4a5
+0,          6,          6,        1,   115200, 0xc95a675e
+0,          7,          7,        1,   115200, 0xf040bf35
+0,          8,          8,        1,   115200, 0xa76dcd9d
+0,          9,          9,        1,   115200, 0x0caf7172
+0,         10,         10,        1,   115200, 0xc8ce7fb1

diff --git a/tests/ref/fate/filter-fps-start-drop b/tests/ref/fate/filter-fps-start-drop
new file mode 100644
index 0000000..cfa1c40
--- /dev/null
+++ b/tests/ref/fate/filter-fps-start-drop

@@ -0,0 +1,11 @@
+#tb 0: 1/3
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+0,          5,          5,        1,   115200, 0x2d0ba5a4
+0,          6,          6,        1,   115200, 0xc95a675e
+0,          7,          7,        1,   115200, 0xf040bf35
+0,          8,          8,        1,   115200, 0x5635daa5
+0,          9,          9,        1,   115200, 0x0caf7172
+0,         10,         10,        1,   115200, 0xc8ce7fb1

diff --git a/tests/ref/fate/filter-fps-start-fill b/tests/ref/fate/filter-fps-start-fill
new file mode 100644
index 0000000..c5efb42
--- /dev/null
+++ b/tests/ref/fate/filter-fps-start-fill

@@ -0,0 +1,11 @@
+#tb 0: 1/3
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+0,          5,          5,        1,   115200, 0x3744b3ed
+0,          6,          6,        1,   115200, 0x3744b3ed
+0,          7,          7,        1,   115200, 0x201b9db1
+0,          8,          8,        1,   115200, 0x75e1a17b
+0,          9,          9,        1,   115200, 0xb73857e2
+0,         10,         10,        1,   115200, 0x02b6ab21

diff --git a/tests/ref/fate/filter-fps-up b/tests/ref/fate/filter-fps-up
new file mode 100644
index 0000000..f1a8478
--- /dev/null
+++ b/tests/ref/fate/filter-fps-up

@@ -0,0 +1,17 @@
+#tb 0: 1/7
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+0,          0,          0,        1,   115200, 0x3744b3ed
+0,          1,          1,        1,   115200, 0x3744b3ed
+0,          2,          2,        1,   115200, 0x60a58f35
+0,          3,          3,        1,   115200, 0x60a58f35
+0,          4,          4,        1,   115200, 0x60a58f35
+0,          5,          5,        1,   115200, 0x09ffa4e1
+0,          6,          6,        1,   115200, 0x09ffa4e1
+0,          7,          7,        1,   115200, 0x33f15918
+0,          8,          8,        1,   115200, 0x33f15918
+0,          9,          9,        1,   115200, 0xb0dfacf8
+0,         10,         10,        1,   115200, 0xb0dfacf8
+0,         11,         11,        1,   115200, 0xb0dfacf8

diff --git a/tests/ref/fate/filter-fps-up-round-down b/tests/ref/fate/filter-fps-up-round-down
new file mode 100644
index 0000000..daecb12
--- /dev/null
+++ b/tests/ref/fate/filter-fps-up-round-down

@@ -0,0 +1,16 @@
+#tb 0: 1/7
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+0,          0,          0,        1,   115200, 0x3744b3ed
+0,          1,          1,        1,   115200, 0x3744b3ed
+0,          2,          2,        1,   115200, 0x60a58f35
+0,          3,          3,        1,   115200, 0x60a58f35
+0,          4,          4,        1,   115200, 0x09ffa4e1
+0,          5,          5,        1,   115200, 0x09ffa4e1
+0,          6,          6,        1,   115200, 0x09ffa4e1
+0,          7,          7,        1,   115200, 0x33f15918
+0,          8,          8,        1,   115200, 0x33f15918
+0,          9,          9,        1,   115200, 0xb0dfacf8
+0,         10,         10,        1,   115200, 0xb0dfacf8

diff --git a/tests/ref/fate/filter-fps-up-round-up b/tests/ref/fate/filter-fps-up-round-up
new file mode 100644
index 0000000..d69dbf6
--- /dev/null
+++ b/tests/ref/fate/filter-fps-up-round-up

@@ -0,0 +1,17 @@
+#tb 0: 1/7
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+0,          0,          0,        1,   115200, 0x3744b3ed
+0,          1,          1,        1,   115200, 0x3744b3ed
+0,          2,          2,        1,   115200, 0x3744b3ed
+0,          3,          3,        1,   115200, 0x60a58f35
+0,          4,          4,        1,   115200, 0x60a58f35
+0,          5,          5,        1,   115200, 0x09ffa4e1
+0,          6,          6,        1,   115200, 0x09ffa4e1
+0,          7,          7,        1,   115200, 0x33f15918
+0,          8,          8,        1,   115200, 0x33f15918
+0,          9,          9,        1,   115200, 0x33f15918
+0,         10,         10,        1,   115200, 0xb0dfacf8
+0,         11,         11,        1,   115200, 0xb0dfacf8

diff --git a/tests/ref/fate/filter-framerate-12bit-down b/tests/ref/fate/filter-framerate-12bit-down
new file mode 100644
index 0000000..25a3c0a
--- /dev/null
+++ b/tests/ref/fate/filter-framerate-12bit-down

@@ -0,0 +1,55 @@
+#tb 0: 1/50
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+0,          0,          0,        1,   307200, 0xb49cf016
+0,          1,          1,        1,   307200, 0xbccf696d
+0,          2,          2,        1,   307200, 0x709c8dd1
+0,          3,          3,        1,   307200, 0xb948d907
+0,          4,          4,        1,   307200, 0x850f849a
+0,          5,          5,        1,   307200, 0xaf71e7fc
+0,          6,          6,        1,   307200, 0x0a1ba486
+0,          7,          7,        1,   307200, 0x0601f62a
+0,          8,          8,        1,   307200, 0x2c6d3a59
+0,          9,          9,        1,   307200, 0xed03597b
+0,         10,         10,        1,   307200, 0x9f84df5d
+0,         11,         11,        1,   307200, 0x9fbe8c84
+0,         12,         12,        1,   307200, 0x8f7bc394
+0,         13,         13,        1,   307200, 0x5113c787
+0,         14,         14,        1,   307200, 0x41e8002f
+0,         15,         15,        1,   307200, 0xab2162fc
+0,         16,         16,        1,   307200, 0xbf8f847c
+0,         17,         17,        1,   307200, 0x832d9ef7
+0,         18,         18,        1,   307200, 0xd2f5e043
+0,         19,         19,        1,   307200, 0xceeaddb8
+0,         20,         20,        1,   307200, 0xf2b12fe5
+0,         21,         21,        1,   307200, 0xf3477e11
+0,         22,         22,        1,   307200, 0xdf387773
+0,         23,         23,        1,   307200, 0x273be7e2
+0,         24,         24,        1,   307200, 0x68cd0360
+0,         25,         25,        1,   307200, 0x4693ab03
+0,         26,         26,        1,   307200, 0xe2baef73
+0,         27,         27,        1,   307200, 0x0c9fa60a
+0,         28,         28,        1,   307200, 0x6e4ddbc5
+0,         29,         29,        1,   307200, 0xd1b2353c
+0,         30,         30,        1,   307200, 0x8a512668
+0,         31,         31,        1,   307200, 0x7224b439
+0,         32,         32,        1,   307200, 0x7a9243e2
+0,         33,         33,        1,   307200, 0x9a7e4553
+0,         34,         34,        1,   307200, 0x4d795626
+0,         35,         35,        1,   307200, 0x4e24d659
+0,         36,         36,        1,   307200, 0xa230b54b
+0,         37,         37,        1,   307200, 0x14598ea5
+0,         38,         38,        1,   307200, 0x21619cf3
+0,         39,         39,        1,   307200, 0x5220a167
+0,         40,         40,        1,   307200, 0xb6505ff0
+0,         41,         41,        1,   307200, 0x0a482a3d
+0,         42,         42,        1,   307200, 0x6bdce40c
+0,         43,         43,        1,   307200, 0x3c6074f3
+0,         44,         44,        1,   307200, 0x369c71c8
+0,         45,         45,        1,   307200, 0x4fda2634
+0,         46,         46,        1,   307200, 0x4df2d619
+0,         47,         47,        1,   307200, 0x21205aab
+0,         48,         48,        1,   307200, 0xe00f48c2
+0,         49,         49,        1,   307200, 0xe3b11798

diff --git a/tests/ref/fate/filter-framerate-12bit-up b/tests/ref/fate/filter-framerate-12bit-up
new file mode 100644
index 0000000..15bf9be
--- /dev/null
+++ b/tests/ref/fate/filter-framerate-12bit-up

@@ -0,0 +1,65 @@
+#tb 0: 1/60
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+0,          0,          0,        1,   307200, 0xb49cf016
+0,          1,          1,        1,   307200, 0x95d191b2
+0,          2,          2,        1,   307200, 0x20e5173b
+0,          3,          3,        1,   307200, 0x5378b13c
+0,          4,          4,        1,   307200, 0x3e304543
+0,          5,          5,        1,   307200, 0x2f131cdb
+0,          6,          6,        1,   307200, 0x83dbe321
+0,          7,          7,        1,   307200, 0xb81fc682
+0,          8,          8,        1,   307200, 0xb32a8644
+0,          9,          9,        1,   307200, 0xc5c8b0f8
+0,         10,         10,        1,   307200, 0x27945d0a
+0,         11,         11,        1,   307200, 0x444c7640
+0,         12,         12,        1,   307200, 0xcba4c6bb
+0,         13,         13,        1,   307200, 0xf923c7a3
+0,         14,         14,        1,   307200, 0x88149757
+0,         15,         15,        1,   307200, 0x4bdfd3ca
+0,         16,         16,        1,   307200, 0x1c279695
+0,         17,         17,        1,   307200, 0x634cc809
+0,         18,         18,        1,   307200, 0xda8c6c41
+0,         19,         19,        1,   307200, 0x2136c986
+0,         20,         20,        1,   307200, 0x457576a8
+0,         21,         21,        1,   307200, 0xe2337c57
+0,         22,         22,        1,   307200, 0x8ed517c9
+0,         23,         23,        1,   307200, 0x4fd35f99
+0,         24,         24,        1,   307200, 0x4237e892
+0,         25,         25,        1,   307200, 0x1383a9d8
+0,         26,         26,        1,   307200, 0xc7195735
+0,         27,         27,        1,   307200, 0x0e058165
+0,         28,         28,        1,   307200, 0x0b81f345
+0,         29,         29,        1,   307200, 0x2ddf2f0a
+0,         30,         30,        1,   307200, 0x0b58bcf7
+0,         31,         31,        1,   307200, 0x1b684a1d
+0,         32,         32,        1,   307200, 0x1e44f1cf
+0,         33,         33,        1,   307200, 0x3ed6b5d8
+0,         34,         34,        1,   307200, 0x00881a40
+0,         35,         35,        1,   307200, 0x2c3d1406
+0,         36,         36,        1,   307200, 0xbd67248a
+0,         37,         37,        1,   307200, 0x46261913
+0,         38,         38,        1,   307200, 0xe5b2bbaa
+0,         39,         39,        1,   307200, 0x0e4455cd
+0,         40,         40,        1,   307200, 0xb4943212
+0,         41,         41,        1,   307200, 0xf96b6808
+0,         42,         42,        1,   307200, 0x58adad3f
+0,         43,         43,        1,   307200, 0x978413f0
+0,         44,         44,        1,   307200, 0x0037320a
+0,         45,         45,        1,   307200, 0xaac8e1ca
+0,         46,         46,        1,   307200, 0xc3578407
+0,         47,         47,        1,   307200, 0xfc29c675
+0,         48,         48,        1,   307200, 0x22ed5b5a
+0,         49,         49,        1,   307200, 0x6d58e21e
+0,         50,         50,        1,   307200, 0xbf62b4c3
+0,         51,         51,        1,   307200, 0xbd5edb2d
+0,         52,         52,        1,   307200, 0x55528432
+0,         53,         53,        1,   307200, 0xa3f1e514
+0,         54,         54,        1,   307200, 0xba073e6f
+0,         55,         55,        1,   307200, 0x29b8df00
+0,         56,         56,        1,   307200, 0x1517512b
+0,         57,         57,        1,   307200, 0x4e740b42
+0,         58,         58,        1,   307200, 0xbd6b7053
+0,         59,         59,        1,   307200, 0xe73f29ef

diff --git a/tests/ref/fate/filter-framerate-up b/tests/ref/fate/filter-framerate-up
index b2af9cb..a276bf6 100644
--- a/tests/ref/fate/filter-framerate-up
+++ b/tests/ref/fate/filter-framerate-up

@@ -4,12 +4,12 @@
 #dimensions 0: 320x240
 #sar 0: 1/1
 0,          0,          0,        1,   115200, 0x3744b3ed
-0,          1,          1,        1,   115200, 0xc44bdc65
+0,          1,          1,        1,   115200, 0xec1fdfa0
 0,          2,          2,        1,   115200, 0xa17f0d74
-0,          3,          3,        1,   115200, 0xb0c83274
-0,          4,          4,        1,   115200, 0x232d6368
+0,          3,          3,        1,   115200, 0xd72532a9
+0,          4,          4,        1,   115200, 0x032e60f8
 0,          5,          5,        1,   115200, 0x6e318ba0
-0,          6,          6,        1,   115200, 0x247e846e
+0,          6,          6,        1,   115200, 0x76018292
 0,          7,          7,        1,   115200, 0x89e27599
-0,          8,          8,        1,   115200, 0x31c5704e
-0,          9,          9,        1,   115200, 0x97e45fec
+0,          8,          8,        1,   115200, 0x68536eac
+0,          9,          9,        1,   115200, 0xc3ac62a8

diff --git a/tests/ref/fate/filter-hls-append b/tests/ref/fate/filter-hls-append
index be65c82..52b7dff 100644
--- a/tests/ref/fate/filter-hls-append
+++ b/tests/ref/fate/filter-hls-append

@@ -5,1534 +5,1534 @@
 #channel_layout 0: 4
 #channel_layout_name 0: mono
 0,          0,          0,     1152,     2304, 0x907cb7fa
-0,       1152,       1152,     1152,     2304, 0xb8dc7525
-0,       2304,       2304,     1152,     2304, 0x3e7d6905
-0,       3456,       3456,     1152,     2304, 0xef47877b
-0,       4608,       4608,     1152,     2304, 0xfe916b7e
-0,       5760,       5760,     1152,     2304, 0xe3d08cde
-0,       6912,       6912,     1152,     2304, 0xff7f86cf
-0,       8064,       8064,     1152,     2304, 0x843e6f95
-0,       9216,       9216,     1152,     2304, 0x81577c26
-0,      10368,      10368,     1152,     2304, 0x04a085d5
-0,      11520,      11520,     1152,     2304, 0x1c5a76f5
-0,      12672,      12672,     1152,     2304, 0x4ee78623
-0,      13824,      13824,     1152,     2304, 0x8ec861dc
-0,      14976,      14976,     1152,     2304, 0x0ca179d8
-0,      16128,      16128,     1152,     2304, 0xc6da750f
-0,      17280,      17280,     1152,     2304, 0xf6bf79b5
-0,      18432,      18432,     1152,     2304, 0x97b88a43
-0,      19584,      19584,     1152,     2304, 0xf13c7b9c
-0,      20736,      20736,     1152,     2304, 0xdfba83af
-0,      21888,      21888,     1152,     2304, 0xc9467d4b
-0,      23040,      23040,     1152,     2304, 0xbbb58e2b
-0,      24192,      24192,     1152,     2304, 0x3a1078ea
-0,      25344,      25344,     1152,     2304, 0xe9587a5c
-0,      26496,      26496,     1152,     2304, 0xef5a8039
-0,      27648,      27648,     1152,     2304, 0x9d5f782f
-0,      28800,      28800,     1152,     2304, 0x1a548291
-0,      29952,      29952,     1152,     2304, 0x07517701
-0,      31104,      31104,     1152,     2304, 0x78127d6e
-0,      32256,      32256,     1152,     2304, 0x62e2788a
-0,      33408,      33408,     1152,     2304, 0x29397ad9
-0,      34560,      34560,     1152,     2304, 0x45da82d6
-0,      35712,      35712,     1152,     2304, 0x8ed66e51
-0,      36864,      36864,     1152,     2304, 0x660775cd
-0,      38016,      38016,     1152,     2304, 0x802c767a
-0,      39168,      39168,     1152,     2304, 0xcc055840
-0,      40320,      40320,     1152,     2304, 0x701b7eaf
-0,      41472,      41472,     1152,     2304, 0x8290749f
-0,      42624,      42624,     1152,     2304, 0x2c7b7d30
-0,      43776,      43776,     1152,     2304, 0xe4f17743
-0,      44928,      44928,     1152,     2304, 0x0e747d6e
-0,      46080,      46080,     1152,     2304, 0xbe7775a0
-0,      47232,      47232,     1152,     2304, 0xcf797673
-0,      48384,      48384,     1152,     2304, 0x29cb7800
-0,      49536,      49536,     1152,     2304, 0xfc947890
-0,      50688,      50688,     1152,     2304, 0x62757fc6
-0,      51840,      51840,     1152,     2304, 0x098876d0
-0,      52992,      52992,     1152,     2304, 0xa9567ee2
-0,      54144,      54144,     1152,     2304, 0xe3bb9173
-0,      55296,      55296,     1152,     2304, 0xcc2d6dee
-0,      56448,      56448,     1152,     2304, 0xe94591ab
-0,      57600,      57600,     1152,     2304, 0x5c7588de
-0,      58752,      58752,     1152,     2304, 0xfd83643c
-0,      59904,      59904,     1152,     2304, 0x528177f1
-0,      61056,      61056,     1152,     2304, 0x65d08474
-0,      62208,      62208,     1152,     2304, 0x738d765b
-0,      63360,      63360,     1152,     2304, 0xdd3d810e
-0,      64512,      64512,     1152,     2304, 0xef4f90d3
-0,      65664,      65664,     1152,     2304, 0x61e28d43
-0,      66816,      66816,     1152,     2304, 0x9a11796b
-0,      67968,      67968,     1152,     2304, 0x96c97dcd
-0,      69120,      69120,     1152,     2304, 0xa8fe8621
-0,      70272,      70272,     1152,     2304, 0x499b7d38
-0,      71424,      71424,     1152,     2304, 0xfcb078a9
-0,      72576,      72576,     1152,     2304, 0x40d78651
-0,      73728,      73728,     1152,     2304, 0xa4af7234
-0,      74880,      74880,     1152,     2304, 0x6831870a
-0,      76032,      76032,     1152,     2304, 0x030e7b9d
-0,      77184,      77184,     1152,     2304, 0x445a75b6
-0,      78336,      78336,     1152,     2304, 0x09857389
-0,      79488,      79488,     1152,     2304, 0x0d018866
-0,      80640,      80640,     1152,     2304, 0x2afe810a
-0,      81792,      81792,     1152,     2304, 0x0bcf7c43
-0,      82944,      82944,     1152,     2304, 0x13737c12
-0,      84096,      84096,     1152,     2304, 0x716c7bba
-0,      85248,      85248,     1152,     2304, 0xb801823b
-0,      86400,      86400,     1152,     2304, 0x0fd573ee
-0,      87552,      87552,     1152,     2304, 0xe1ab879c
-0,      88704,      88704,     1152,     2304, 0x49e6764f
-0,      89856,      89856,     1152,     2304, 0xd5f26ddc
-0,      91008,      91008,     1152,     2304, 0x076775ff
-0,      92160,      92160,     1152,     2304, 0xfbb86fce
-0,      93312,      93312,     1152,     2304, 0x20c56858
-0,      94464,      94464,     1152,     2304, 0x043e6891
-0,      95616,      95616,     1152,     2304, 0x59648729
-0,      96768,      96768,     1152,     2304, 0xd4907a63
-0,      97920,      97920,     1152,     2304, 0xd0208a4c
-0,      99072,      99072,     1152,     2304, 0xce968383
-0,     100224,     100224,     1152,     2304, 0x3cfc7cd1
-0,     101376,     101376,     1152,     2304, 0x628a7bf5
-0,     102528,     102528,     1152,     2304, 0x9cfe8a4f
-0,     103680,     103680,     1152,     2304, 0xdf6f7c6d
-0,     104832,     104832,     1152,     2304, 0x6cf6882a
-0,     105984,     105984,     1152,     2304, 0x099773a3
-0,     107136,     107136,     1152,     2304, 0x4a1c7649
-0,     108288,     108288,     1152,     2304, 0x31ea71cb
-0,     109440,     109440,     1152,     2304, 0xed127ed9
-0,     110592,     110592,     1152,     2304, 0x5b156954
-0,     111744,     111744,     1152,     2304, 0xdd638532
-0,     112896,     112896,     1152,     2304, 0xf1a271f2
-0,     114048,     114048,     1152,     2304, 0x779184d7
-0,     115200,     115200,     1152,     2304, 0x49a88aa8
-0,     116352,     116352,     1152,     2304, 0xa11b7c90
-0,     117504,     117504,     1152,     2304, 0xbf488274
-0,     118656,     118656,     1152,     2304, 0x002f79a8
-0,     119808,     119808,     1152,     2304, 0x0ed97e2f
-0,     120960,     120960,     1152,     2304, 0x7845878f
-0,     122112,     122112,     1152,     2304, 0x46d777dc
-0,     123264,     123264,     1152,     2304, 0x8d0179e3
-0,     124416,     124416,     1152,     2304, 0x38917f9f
-0,     125568,     125568,     1152,     2304, 0x449876e7
-0,     126720,     126720,     1152,     2304, 0x001a8769
-0,     127872,     127872,     1152,     2304, 0x06c1826b
-0,     129024,     129024,     1152,     2304, 0x41b68047
-0,     130176,     130176,     1152,     2304, 0xeb9782c6
-0,     131328,     131328,     1152,     2304, 0x7cd9719c
-0,     132480,     132480,     1152,     2304, 0x3a4a767c
-0,     133632,     133632,     1152,     2304, 0x7f887e81
-0,     134784,     134784,     1152,     2304, 0xf75d714b
-0,     135936,     135936,     1152,     2304, 0x33b57e9f
-0,     137088,     137088,     1152,     2304, 0xc732749e
-0,     138240,     138240,     1152,     2304, 0x386f7e1a
-0,     139392,     139392,     1152,     2304, 0x6b9c767d
-0,     140544,     140544,     1152,     2304, 0x701c83e5
-0,     141696,     141696,     1152,     2304, 0xb92571e1
-0,     142848,     142848,     1152,     2304, 0x833a84bc
-0,     144000,     144000,     1152,     2304, 0x1b6984e0
-0,     145152,     145152,     1152,     2304, 0x1b2474ba
-0,     146304,     146304,     1152,     2304, 0xc22775a6
-0,     147456,     147456,     1152,     2304, 0x3e8f7972
-0,     148608,     148608,     1152,     2304, 0x17a28a65
-0,     149760,     149760,     1152,     2304, 0x9b6178a4
-0,     150912,     150912,     1152,     2304, 0x5d707873
-0,     152064,     152064,     1152,     2304, 0x68e2645a
-0,     153216,     153216,     1152,     2304, 0x1e377d28
-0,     154368,     154368,     1152,     2304, 0x54b384be
-0,     155520,     155520,     1152,     2304, 0x0617808c
-0,     156672,     156672,     1152,     2304, 0xbc2b8a6c
-0,     157824,     157824,     1152,     2304, 0x7ced7180
-0,     158976,     158976,     1152,     2304, 0xf22180ab
-0,     160128,     160128,     1152,     2304, 0xf13682c9
-0,     161280,     161280,     1152,     2304, 0x7eff87fd
-0,     162432,     162432,     1152,     2304, 0x5a0b5cec
-0,     163584,     163584,     1152,     2304, 0x57c18906
-0,     164736,     164736,     1152,     2304, 0xb55a6a16
-0,     165888,     165888,     1152,     2304, 0xf2608371
-0,     167040,     167040,     1152,     2304, 0x36df7576
-0,     168192,     168192,     1152,     2304, 0xdb106fb4
-0,     169344,     169344,     1152,     2304, 0x7e4f85d0
-0,     170496,     170496,     1152,     2304, 0xe3ee78ab
-0,     171648,     171648,     1152,     2304, 0xd36b7dc7
-0,     172800,     172800,     1152,     2304, 0xadab7c5c
-0,     173952,     173952,     1152,     2304, 0x70786f26
-0,     175104,     175104,     1152,     2304, 0xcd5d717e
-0,     176256,     176256,     1152,     2304, 0xc1a96f9a
-0,     177408,     177408,     1152,     2304, 0xad777887
-0,     178560,     178560,     1152,     2304, 0x98277c16
-0,     179712,     179712,     1152,     2304, 0x868882c5
-0,     180864,     180864,     1152,     2304, 0xc48092b9
-0,     182016,     182016,     1152,     2304, 0x230069da
-0,     183168,     183168,     1152,     2304, 0x14147ad6
-0,     184320,     184320,     1152,     2304, 0xc9007172
-0,     185472,     185472,     1152,     2304, 0x85d67bcc
-0,     186624,     186624,     1152,     2304, 0x22418bab
-0,     187776,     187776,     1152,     2304, 0xe53c8b71
-0,     188928,     188928,     1152,     2304, 0x5a1a9053
-0,     190080,     190080,     1152,     2304, 0x9cd179af
-0,     191232,     191232,     1152,     2304, 0xbb3c7d72
-0,     192384,     192384,     1152,     2304, 0x477a8677
-0,     193536,     193536,     1152,     2304, 0xe3337834
-0,     194688,     194688,     1152,     2304, 0x1cb56d77
-0,     195840,     195840,     1152,     2304, 0xe89d6dac
-0,     196992,     196992,     1152,     2304, 0xd468827e
-0,     198144,     198144,     1152,     2304, 0xebc46b87
-0,     199296,     199296,     1152,     2304, 0x5fbb78d2
-0,     200448,     200448,     1152,     2304, 0xa1b483d6
-0,     201600,     201600,     1152,     2304, 0x6fec7cab
-0,     202752,     202752,     1152,     2304, 0xd86d6f6c
-0,     203904,     203904,     1152,     2304, 0x8c2c7d51
-0,     205056,     205056,     1152,     2304, 0xe8377cd7
-0,     206208,     206208,     1152,     2304, 0xb57071b4
-0,     207360,     207360,     1152,     2304, 0xc35c71fd
-0,     208512,     208512,     1152,     2304, 0x789079e9
-0,     209664,     209664,     1152,     2304, 0x413b710e
-0,     210816,     210816,     1152,     2304, 0x82678332
-0,     211968,     211968,     1152,     2304, 0xe1576e75
-0,     213120,     213120,     1152,     2304, 0x7c0b7ad6
-0,     214272,     214272,     1152,     2304, 0xc6b6786d
-0,     215424,     215424,     1152,     2304, 0x736f7b89
-0,     216576,     216576,     1152,     2304, 0x0ded72f1
-0,     217728,     217728,     1152,     2304, 0xcb877a3c
-0,     218880,     218880,     1152,     2304, 0x7c497d40
-0,     220032,     220032,     1152,     2304, 0xaefc798c
-0,     221184,     221184,     1152,     2304, 0x4cce748c
-0,     222336,     222336,     1152,     2304, 0xaa187fbe
-0,     223488,     223488,     1152,     2304, 0x1aa77db9
-0,     224640,     224640,     1152,     2304, 0x9e0074b8
-0,     225792,     225792,     1152,     2304, 0x74ee822b
-0,     226944,     226944,     1152,     2304, 0x975c6ff6
-0,     228096,     228096,     1152,     2304, 0xe1847bb4
-0,     229248,     229248,     1152,     2304, 0xe0828777
-0,     230400,     230400,     1152,     2304, 0xf4027205
-0,     231552,     231552,     1152,     2304, 0x535e7a20
-0,     232704,     232704,     1152,     2304, 0x5bd88404
-0,     233856,     233856,     1152,     2304, 0xf29478b1
-0,     235008,     235008,     1152,     2304, 0x9b7c7d88
-0,     236160,     236160,     1152,     2304, 0xaeb07335
-0,     237312,     237312,     1152,     2304, 0xbef06e08
-0,     238464,     238464,     1152,     2304, 0x795f7b8c
-0,     239616,     239616,     1152,     2304, 0x435a674d
-0,     240768,     240768,     1152,     2304, 0xd8ee7a09
-0,     241920,     241920,     1152,     2304, 0x9059812e
-0,     243072,     243072,     1152,     2304, 0x7481744a
-0,     244224,     244224,     1152,     2304, 0xdff27475
-0,     245376,     245376,     1152,     2304, 0xb17783ab
-0,     246528,     246528,     1152,     2304, 0x42e9706b
-0,     247680,     247680,     1152,     2304, 0x9f0d86b4
-0,     248832,     248832,     1152,     2304, 0x2963955f
-0,     249984,     249984,     1152,     2304, 0x059a6957
-0,     251136,     251136,     1152,     2304, 0x85948206
-0,     252288,     252288,     1152,     2304, 0x185e8400
-0,     253440,     253440,     1152,     2304, 0xe98e70df
-0,     254592,     254592,     1152,     2304, 0x69057b27
-0,     255744,     255744,     1152,     2304, 0x49e26f21
-0,     256896,     256896,     1152,     2304, 0xb0867da5
-0,     258048,     258048,     1152,     2304, 0x785980ff
-0,     259200,     259200,     1152,     2304, 0xf4b774be
-0,     260352,     260352,     1152,     2304, 0x63897e8c
-0,     261504,     261504,     1152,     2304, 0x248b89af
-0,     262656,     262656,     1152,     2304, 0xd3627c4a
-0,     263808,     263808,     1152,     2304, 0x5a4d9349
-0,     264960,     264960,     1152,     2304, 0xe2ce7c4c
-0,     266112,     266112,     1152,     2304, 0x321f6c0b
-0,     267264,     267264,     1152,     2304, 0x51ac74e0
-0,     268416,     268416,     1152,     2304, 0x8efa91ba
-0,     269568,     269568,     1152,     2304, 0x8b4b784c
-0,     270720,     270720,     1152,     2304, 0xe9e4879e
-0,     271872,     271872,     1152,     2304, 0x8dc28081
-0,     273024,     273024,     1152,     2304, 0x44b477b0
-0,     274176,     274176,     1152,     2304, 0xf7b67084
-0,     275328,     275328,     1152,     2304, 0x4b198c17
-0,     276480,     276480,     1152,     2304, 0x9c947194
-0,     277632,     277632,     1152,     2304, 0x6eaa7f15
-0,     278784,     278784,     1152,     2304, 0x119f7c1d
-0,     279936,     279936,     1152,     2304, 0x157b7f43
-0,     281088,     281088,     1152,     2304, 0xcd2e7acc
-0,     282240,     282240,     1152,     2304, 0x97597247
-0,     283392,     283392,     1152,     2304, 0x7ba06acb
-0,     284544,     284544,     1152,     2304, 0x233c7995
-0,     285696,     285696,     1152,     2304, 0x08e28587
-0,     286848,     286848,     1152,     2304, 0x92be84b5
-0,     288000,     288000,     1152,     2304, 0xbb857d43
-0,     289152,     289152,     1152,     2304, 0x168e7c74
-0,     290304,     290304,     1152,     2304, 0xac5465d9
-0,     291456,     291456,     1152,     2304, 0x18f58831
-0,     292608,     292608,     1152,     2304, 0x19b48196
-0,     293760,     293760,     1152,     2304, 0x20297653
-0,     294912,     294912,     1152,     2304, 0x93397a82
-0,     296064,     296064,     1152,     2304, 0x65ea7deb
-0,     297216,     297216,     1152,     2304, 0xd7316e20
-0,     298368,     298368,     1152,     2304, 0x94107f2b
-0,     299520,     299520,     1152,     2304, 0xec3b7dc6
-0,     300672,     300672,     1152,     2304, 0x2d3783aa
-0,     301824,     301824,     1152,     2304, 0x07e47340
-0,     302976,     302976,     1152,     2304, 0xbc117893
-0,     304128,     304128,     1152,     2304, 0x8bd97851
-0,     305280,     305280,     1152,     2304, 0xc27376a9
-0,     306432,     306432,     1152,     2304, 0x30d88c83
-0,     307584,     307584,     1152,     2304, 0x19c2704c
-0,     308736,     308736,     1152,     2304, 0x093b7b6e
-0,     309888,     309888,     1152,     2304, 0x221a7349
-0,     311040,     311040,     1152,     2304, 0xa4fd82cd
-0,     312192,     312192,     1152,     2304, 0x762e6bc9
-0,     313344,     313344,     1152,     2304, 0x270075d4
-0,     314496,     314496,     1152,     2304, 0xa5f27b90
-0,     315648,     315648,     1152,     2304, 0xf72e7edc
-0,     316800,     316800,     1152,     2304, 0x42178486
-0,     317952,     317952,     1152,     2304, 0x5f7978e8
-0,     319104,     319104,     1152,     2304, 0x5d7c6703
-0,     320256,     320256,     1152,     2304, 0x2c4483d5
-0,     321408,     321408,     1152,     2304, 0x31bd951d
-0,     322560,     322560,     1152,     2304, 0x99487af0
-0,     323712,     323712,     1152,     2304, 0x0bd27ee7
-0,     324864,     324864,     1152,     2304, 0xc3e07ac4
-0,     326016,     326016,     1152,     2304, 0x98a16ba7
-0,     327168,     327168,     1152,     2304, 0xd7a5747b
-0,     328320,     328320,     1152,     2304, 0x96fb811c
-0,     329472,     329472,     1152,     2304, 0x7cee8109
-0,     330624,     330624,     1152,     2304, 0x52b18ba2
-0,     331776,     331776,     1152,     2304, 0x33be8861
-0,     332928,     332928,     1152,     2304, 0xf41282a0
-0,     334080,     334080,     1152,     2304, 0xb4268993
-0,     335232,     335232,     1152,     2304, 0x52126a1c
-0,     336384,     336384,     1152,     2304, 0x050b6f7a
-0,     337536,     337536,     1152,     2304, 0x67a26fc3
-0,     338688,     338688,     1152,     2304, 0x966c7cf2
-0,     339840,     339840,     1152,     2304, 0x22097750
-0,     340992,     340992,     1152,     2304, 0xfbb0796c
-0,     342144,     342144,     1152,     2304, 0xbd508964
-0,     343296,     343296,     1152,     2304, 0xc24478d8
-0,     344448,     344448,     1152,     2304, 0x3913769d
-0,     345600,     345600,     1152,     2304, 0x8aab872f
-0,     346752,     346752,     1152,     2304, 0x7cb4822f
-0,     347904,     347904,     1152,     2304, 0xea318144
-0,     349056,     349056,     1152,     2304, 0xaf0f86d2
-0,     350208,     350208,     1152,     2304, 0x24f27598
-0,     351360,     351360,     1152,     2304, 0xd76f6d40
-0,     352512,     352512,     1152,     2304, 0x085071a7
-0,     353664,     353664,     1152,     2304, 0x1d11704c
-0,     354816,     354816,     1152,     2304, 0x21517cbd
-0,     355968,     355968,     1152,     2304, 0xcdca8d32
-0,     357120,     357120,     1152,     2304, 0x71c18433
-0,     358272,     358272,     1152,     2304, 0xd39d7d81
-0,     359424,     359424,     1152,     2304, 0x7a0d7a43
-0,     360576,     360576,     1152,     2304, 0x007c8884
-0,     361728,     361728,     1152,     2304, 0x403282d0
-0,     362880,     362880,     1152,     2304, 0xe3737214
-0,     364032,     364032,     1152,     2304, 0xaf906f47
-0,     365184,     365184,     1152,     2304, 0x54f57b3b
-0,     366336,     366336,     1152,     2304, 0x29be7791
-0,     367488,     367488,     1152,     2304, 0xe3c663d5
-0,     368640,     368640,     1152,     2304, 0xd7258238
-0,     369792,     369792,     1152,     2304, 0x3719820d
-0,     370944,     370944,     1152,     2304, 0xbe04814f
-0,     372096,     372096,     1152,     2304, 0x556c815e
-0,     373248,     373248,     1152,     2304, 0xb2447e10
-0,     374400,     374400,     1152,     2304, 0x7c16867c
-0,     375552,     375552,     1152,     2304, 0x6a7b78ed
-0,     376704,     376704,     1152,     2304, 0x5d307b81
-0,     377856,     377856,     1152,     2304, 0xaab680d3
-0,     379008,     379008,     1152,     2304, 0xb5d37a23
-0,     380160,     380160,     1152,     2304, 0x7f7d6f76
-0,     381312,     381312,     1152,     2304, 0x317a8296
-0,     382464,     382464,     1152,     2304, 0x8a987b3d
-0,     383616,     383616,     1152,     2304, 0x4f317a27
-0,     384768,     384768,     1152,     2304, 0xfc65852f
-0,     385920,     385920,     1152,     2304, 0x40527719
-0,     387072,     387072,     1152,     2304, 0x84988e13
-0,     388224,     388224,     1152,     2304, 0x318b6ddc
-0,     389376,     389376,     1152,     2304, 0x94cf7939
-0,     390528,     390528,     1152,     2304, 0x6f22819d
-0,     391680,     391680,     1152,     2304, 0xa7dd80a9
-0,     392832,     392832,     1152,     2304, 0x1c7968fa
-0,     393984,     393984,     1152,     2304, 0xd9937bae
-0,     395136,     395136,     1152,     2304, 0xf7137cf9
-0,     396288,     396288,     1152,     2304, 0xeadb84b5
-0,     397440,     397440,     1152,     2304, 0x9a2390ac
-0,     398592,     398592,     1152,     2304, 0xdb6a73f6
-0,     399744,     399744,     1152,     2304, 0x69e07507
-0,     400896,     400896,     1152,     2304, 0xbc8478b2
-0,     402048,     402048,     1152,     2304, 0x32cf8638
-0,     403200,     403200,     1152,     2304, 0x2b8d755a
-0,     404352,     404352,     1152,     2304, 0x52e05bd2
-0,     405504,     405504,     1152,     2304, 0x2aed8c49
-0,     406656,     406656,     1152,     2304, 0x587a896e
-0,     407808,     407808,     1152,     2304, 0x6dd87dee
-0,     408960,     408960,     1152,     2304, 0xd2858338
-0,     410112,     410112,     1152,     2304, 0xd90f7842
-0,     411264,     411264,     1152,     2304, 0xd6fb6d4a
-0,     412416,     412416,     1152,     2304, 0x85498aea
-0,     413568,     413568,     1152,     2304, 0x18597790
-0,     414720,     414720,     1152,     2304, 0x3cd78fea
-0,     415872,     415872,     1152,     2304, 0x94377fbc
-0,     417024,     417024,     1152,     2304, 0xf9db73f5
-0,     418176,     418176,     1152,     2304, 0x14fb6fca
-0,     419328,     419328,     1152,     2304, 0xe9d17d69
-0,     420480,     420480,     1152,     2304, 0xdeb57286
-0,     421632,     421632,     1152,     2304, 0xa5d37e17
-0,     422784,     422784,     1152,     2304, 0xcf6882fb
-0,     423936,     423936,     1152,     2304, 0x31758066
-0,     425088,     425088,     1152,     2304, 0x6b4d8175
-0,     426240,     426240,     1152,     2304, 0x2a3d7f8e
-0,     427392,     427392,     1152,     2304, 0xc066743b
-0,     428544,     428544,     1152,     2304, 0xcab88146
-0,     429696,     429696,     1152,     2304, 0x2b4c6e13
-0,     430848,     430848,     1152,     2304, 0x00b36b6f
-0,     432000,     432000,     1152,     2304, 0x664a88d3
-0,     433152,     433152,     1152,     2304, 0x18a66f76
-0,     434304,     434304,     1152,     2304, 0x4f828a8b
-0,     435456,     435456,     1152,     2304, 0x9cc7728e
-0,     436608,     436608,     1152,     2304, 0xbe357936
-0,     437760,     437760,     1152,     2304, 0x19878f8d
-0,     438912,     438912,     1152,     2304, 0x227b7c71
-0,     440064,     440064,     1152,     2304, 0xf7c879ec
-0,     441216,     441216,     1152,     2304, 0x0bca7b50
-0,     442368,     442368,     1152,     2304, 0xe4398304
-0,     443520,     443520,     1152,     2304, 0xf5da75c7
-0,     444672,     444672,     1152,     2304, 0x9f9070a3
-0,     445824,     445824,     1152,     2304, 0x789076f6
-0,     446976,     446976,     1152,     2304, 0x362977cc
-0,     448128,     448128,     1152,     2304, 0x4d0a8928
-0,     449280,     449280,     1152,     2304, 0x1bb767ec
-0,     450432,     450432,     1152,     2304, 0xbe727fa5
-0,     451584,     451584,     1152,     2304, 0x27f38347
-0,     452736,     452736,     1152,     2304, 0x0a3c8783
-0,     453888,     453888,     1152,     2304, 0x8249639c
-0,     455040,     455040,     1152,     2304, 0x3b076f69
-0,     456192,     456192,     1152,     2304, 0xd9597ee3
-0,     457344,     457344,     1152,     2304, 0x026e7fad
-0,     458496,     458496,     1152,     2304, 0xbd7a6de4
-0,     459648,     459648,     1152,     2304, 0x7d718a4d
-0,     460800,     460800,     1152,     2304, 0x1f5e83b4
-0,     461952,     461952,     1152,     2304, 0x597d7755
-0,     463104,     463104,     1152,     2304, 0x3fb080bd
-0,     464256,     464256,     1152,     2304, 0xdcbd7b16
-0,     465408,     465408,     1152,     2304, 0x5c48865f
-0,     466560,     466560,     1152,     2304, 0xcda37ae8
-0,     467712,     467712,     1152,     2304, 0x4810796d
-0,     468864,     468864,     1152,     2304, 0x34317fd0
-0,     470016,     470016,     1152,     2304, 0x5c0e7456
-0,     471168,     471168,     1152,     2304, 0x44d78040
-0,     472320,     472320,     1152,     2304, 0x88587882
-0,     473472,     473472,     1152,     2304, 0x77687d5e
-0,     474624,     474624,     1152,     2304, 0x116d68bb
-0,     475776,     475776,     1152,     2304, 0x3e5b6f60
-0,     476928,     476928,     1152,     2304, 0x64ea783c
-0,     478080,     478080,     1152,     2304, 0x23547f7d
-0,     479232,     479232,     1152,     2304, 0x2eee7892
-0,     480384,     480384,     1152,     2304, 0xfb837cba
-0,     481536,     481536,     1152,     2304, 0x86518209
-0,     482688,     482688,     1152,     2304, 0x672f7bba
-0,     483840,     483840,     1152,     2304, 0x6ab583fb
-0,     484992,     484992,     1152,     2304, 0xc1297428
-0,     486144,     486144,     1152,     2304, 0x164e7717
-0,     487296,     487296,     1152,     2304, 0xb754976d
-0,     488448,     488448,     1152,     2304, 0xb99d81b2
-0,     489600,     489600,     1152,     2304, 0x79046fd6
-0,     490752,     490752,     1152,     2304, 0x9a3f8426
-0,     491904,     491904,     1152,     2304, 0x896371f5
-0,     493056,     493056,     1152,     2304, 0x63d1799f
-0,     494208,     494208,     1152,     2304, 0x4842844e
-0,     495360,     495360,     1152,     2304, 0x850e8372
-0,     496512,     496512,     1152,     2304, 0x85d07022
-0,     497664,     497664,     1152,     2304, 0x9e6683d1
-0,     498816,     498816,     1152,     2304, 0x301b8981
-0,     499968,     499968,     1152,     2304, 0x1f1e98c5
-0,     501120,     501120,     1152,     2304, 0xc8797b03
-0,     502272,     502272,     1152,     2304, 0xf9d189f5
-0,     503424,     503424,     1152,     2304, 0x0e0d88be
-0,     504576,     504576,     1152,     2304, 0x6c1d7843
-0,     505728,     505728,     1152,     2304, 0xd13b8a38
-0,     506880,     506880,     1152,     2304, 0x9b8f773c
-0,     508032,     508032,     1152,     2304, 0x9acd7309
-0,     509184,     509184,     1152,     2304, 0x5e7d7d15
-0,     510336,     510336,     1152,     2304, 0xf09d7640
-0,     511488,     511488,     1152,     2304, 0xaebb718f
-0,     512640,     512640,     1152,     2304, 0x0c8570f4
-0,     513792,     513792,     1152,     2304, 0x3c93862c
-0,     514944,     514944,     1152,     2304, 0xcee46696
-0,     516096,     516096,     1152,     2304, 0x01ba7e20
-0,     517248,     517248,     1152,     2304, 0x7fcb7a09
-0,     518400,     518400,     1152,     2304, 0xddf18c14
-0,     519552,     519552,     1152,     2304, 0xd2e97eeb
-0,     520704,     520704,     1152,     2304, 0x514d8719
-0,     521856,     521856,     1152,     2304, 0xe89279c9
-0,     523008,     523008,     1152,     2304, 0x806d6a95
-0,     524160,     524160,     1152,     2304, 0xbc7a8a60
-0,     525312,     525312,     1152,     2304, 0x302f8fa8
-0,     526464,     526464,     1152,     2304, 0xb136784c
-0,     527616,     527616,     1152,     2304, 0x9b0f6aab
-0,     528768,     528768,     1152,     2304, 0xd8e27582
-0,     529920,     529920,     1152,     2304, 0xdaaf78b1
-0,     531072,     531072,     1152,     2304, 0x65967f5f
-0,     532224,     532224,     1152,     2304, 0x6f917aa4
-0,     533376,     533376,     1152,     2304, 0x7f607444
-0,     534528,     534528,     1152,     2304, 0xfd316f2c
-0,     535680,     535680,     1152,     2304, 0x776e83c7
-0,     536832,     536832,     1152,     2304, 0xb9c17e16
-0,     537984,     537984,     1152,     2304, 0xdf287de8
-0,     539136,     539136,     1152,     2304, 0xf33d96a3
-0,     540288,     540288,     1152,     2304, 0xad216e5b
-0,     541440,     541440,     1152,     2304, 0x4a328342
-0,     542592,     542592,     1152,     2304, 0xcf3f8079
-0,     543744,     543744,     1152,     2304, 0xb46f77b0
-0,     544896,     544896,     1152,     2304, 0x3199713d
-0,     546048,     546048,     1152,     2304, 0x5e667a0d
-0,     547200,     547200,     1152,     2304, 0xa3047ae3
-0,     548352,     548352,     1152,     2304, 0x9edf8594
-0,     549504,     549504,     1152,     2304, 0xd16382d5
-0,     550656,     550656,     1152,     2304, 0x6e838328
-0,     551808,     551808,     1152,     2304, 0xa1f697c1
-0,     552960,     552960,     1152,     2304, 0xefcc749c
-0,     554112,     554112,     1152,     2304, 0x1f94839e
-0,     555264,     555264,     1152,     2304, 0x429e7c5b
-0,     556416,     556416,     1152,     2304, 0x9b59711c
-0,     557568,     557568,     1152,     2304, 0xdac27323
-0,     558720,     558720,     1152,     2304, 0xa4856d2b
-0,     559872,     559872,     1152,     2304, 0x07a37498
-0,     561024,     561024,     1152,     2304, 0xe1ce7512
-0,     562176,     562176,     1152,     2304, 0x15e182c3
-0,     563328,     563328,     1152,     2304, 0x0fa46b8c
-0,     564480,     564480,     1152,     2304, 0xbdf07bfd
-0,     565632,     565632,     1152,     2304, 0xe0238b2a
-0,     566784,     566784,     1152,     2304, 0xab537267
-0,     567936,     567936,     1152,     2304, 0xd46b75f3
-0,     569088,     569088,     1152,     2304, 0xec73794b
-0,     570240,     570240,     1152,     2304, 0x680580c1
-0,     571392,     571392,     1152,     2304, 0x1ace8f6c
-0,     572544,     572544,     1152,     2304, 0x19d583ac
-0,     573696,     573696,     1152,     2304, 0x4b6b8105
-0,     574848,     574848,     1152,     2304, 0x392a78b2
-0,     576000,     576000,     1152,     2304, 0xd3916dad
-0,     577152,     577152,     1152,     2304, 0x569c7a75
-0,     578304,     578304,     1152,     2304, 0xf5ac814b
-0,     579456,     579456,     1152,     2304, 0x18d77e98
-0,     580608,     580608,     1152,     2304, 0x007074ce
-0,     581760,     581760,     1152,     2304, 0x0fe38373
-0,     582912,     582912,     1152,     2304, 0x5a967920
-0,     584064,     584064,     1152,     2304, 0x22167501
-0,     585216,     585216,     1152,     2304, 0xf0828cab
-0,     586368,     586368,     1152,     2304, 0xaeec71b7
-0,     587520,     587520,     1152,     2304, 0xc47b62ff
-0,     588672,     588672,     1152,     2304, 0xab688478
-0,     589824,     589824,     1152,     2304, 0xf35e7bd2
-0,     590976,     590976,     1152,     2304, 0x9cff763e
-0,     592128,     592128,     1152,     2304, 0x59568dc8
-0,     593280,     593280,     1152,     2304, 0x51a278ac
-0,     594432,     594432,     1152,     2304, 0xc08a6e68
-0,     595584,     595584,     1152,     2304, 0xd3067ef4
-0,     596736,     596736,     1152,     2304, 0x54767c49
-0,     597888,     597888,     1152,     2304, 0xf8ff8386
-0,     599040,     599040,     1152,     2304, 0xef267f63
-0,     600192,     600192,     1152,     2304, 0xe2537cd9
-0,     601344,     601344,     1152,     2304, 0x77a57680
-0,     602496,     602496,     1152,     2304, 0x325c74ad
-0,     603648,     603648,     1152,     2304, 0xd7fe87c4
-0,     604800,     604800,     1152,     2304, 0x2e756310
-0,     605952,     605952,     1152,     2304, 0x6a81796b
-0,     607104,     607104,     1152,     2304, 0x2f057daf
-0,     608256,     608256,     1152,     2304, 0xcd9f7c9d
-0,     609408,     609408,     1152,     2304, 0xc91560a0
-0,     610560,     610560,     1152,     2304, 0x962a91eb
-0,     611712,     611712,     1152,     2304, 0xa0ff7416
-0,     612864,     612864,     1152,     2304, 0xcb5c7dff
-0,     614016,     614016,     1152,     2304, 0xd3527041
-0,     615168,     615168,     1152,     2304, 0xc89d77c2
-0,     616320,     616320,     1152,     2304, 0xe1ce7ccf
-0,     617472,     617472,     1152,     2304, 0xe3417c4c
-0,     618624,     618624,     1152,     2304, 0x3f1a7166
-0,     619776,     619776,     1152,     2304, 0xcdcc7e23
-0,     620928,     620928,     1152,     2304, 0x4e727e97
-0,     622080,     622080,     1152,     2304, 0x53427ff1
-0,     623232,     623232,     1152,     2304, 0x173f6ca9
-0,     624384,     624384,     1152,     2304, 0x962887ec
-0,     625536,     625536,     1152,     2304, 0xcbec67f4
-0,     626688,     626688,     1152,     2304, 0x7a2c943d
-0,     627840,     627840,     1152,     2304, 0x8b877570
-0,     628992,     628992,     1152,     2304, 0xcf337323
-0,     630144,     630144,     1152,     2304, 0x8c8682a4
-0,     631296,     631296,     1152,     2304, 0x94c3753c
-0,     632448,     632448,     1152,     2304, 0x86898d79
-0,     633600,     633600,     1152,     2304, 0xdf667312
-0,     634752,     634752,     1152,     2304, 0x062f8ba8
-0,     635904,     635904,     1152,     2304, 0xa2c36f08
-0,     637056,     637056,     1152,     2304, 0x5bca7358
-0,     638208,     638208,     1152,     2304, 0x5648804d
-0,     639360,     639360,     1152,     2304, 0xefac87c8
-0,     640512,     640512,     1152,     2304, 0x66bf7dcf
-0,     641664,     641664,     1152,     2304, 0x62ad73bc
-0,     642816,     642816,     1152,     2304, 0x72fe630c
-0,     643968,     643968,     1152,     2304, 0xeebe87da
-0,     645120,     645120,     1152,     2304, 0x11c870cf
-0,     646272,     646272,     1152,     2304, 0x18fb7c27
-0,     647424,     647424,     1152,     2304, 0x39047145
-0,     648576,     648576,     1152,     2304, 0xdcf07032
-0,     649728,     649728,     1152,     2304, 0x61027c50
-0,     650880,     650880,     1152,     2304, 0x6e2e89de
-0,     652032,     652032,     1152,     2304, 0xc50c6d1d
-0,     653184,     653184,     1152,     2304, 0xeed587ee
-0,     654336,     654336,     1152,     2304, 0xe38269c7
-0,     655488,     655488,     1152,     2304, 0xcf66806e
-0,     656640,     656640,     1152,     2304, 0x2d3b7c1b
-0,     657792,     657792,     1152,     2304, 0xa4127d48
-0,     658944,     658944,     1152,     2304, 0x480b8325
-0,     660096,     660096,     1152,     2304, 0xc1527221
-0,     661248,     661248,     1152,     2304, 0x94c1769a
-0,     662400,     662400,     1152,     2304, 0xcfb37271
-0,     663552,     663552,     1152,     2304, 0x946d7a96
-0,     664704,     664704,     1152,     2304, 0xdfc18e50
-0,     665856,     665856,     1152,     2304, 0x10c48393
-0,     667008,     667008,     1152,     2304, 0x58556b10
-0,     668160,     668160,     1152,     2304, 0x997b7993
-0,     669312,     669312,     1152,     2304, 0x4a787992
-0,     670464,     670464,     1152,     2304, 0x11406c20
-0,     671616,     671616,     1152,     2304, 0x04a4874a
-0,     672768,     672768,     1152,     2304, 0xf3077164
-0,     673920,     673920,     1152,     2304, 0x08ac80e3
-0,     675072,     675072,     1152,     2304, 0x268370d0
-0,     676224,     676224,     1152,     2304, 0x1d137778
-0,     677376,     677376,     1152,     2304, 0xfa148e97
-0,     678528,     678528,     1152,     2304, 0xec50717c
-0,     679680,     679680,     1152,     2304, 0xcbf46b75
-0,     680832,     680832,     1152,     2304, 0xd4168038
-0,     681984,     681984,     1152,     2304, 0xdd9577f0
-0,     683136,     683136,     1152,     2304, 0xc7077685
-0,     684288,     684288,     1152,     2304, 0x34d25e91
-0,     685440,     685440,     1152,     2304, 0x96537e6d
-0,     686592,     686592,     1152,     2304, 0xb12e7940
-0,     687744,     687744,     1152,     2304, 0x861d64c0
-0,     688896,     688896,     1152,     2304, 0xa2bc64ed
-0,     690048,     690048,     1152,     2304, 0x0c5f8261
-0,     691200,     691200,     1152,     2304, 0x540584ff
-0,     692352,     692352,     1152,     2304, 0xe8328b09
-0,     693504,     693504,     1152,     2304, 0x1e777079
-0,     694656,     694656,     1152,     2304, 0x453483b4
-0,     695808,     695808,     1152,     2304, 0x1cab7a1e
-0,     696960,     696960,     1152,     2304, 0xcb37856d
-0,     698112,     698112,     1152,     2304, 0x5a4883ed
-0,     699264,     699264,     1152,     2304, 0xd1f27cbf
-0,     700416,     700416,     1152,     2304, 0x0d377a4d
-0,     701568,     701568,     1152,     2304, 0x264e76df
-0,     702720,     702720,     1152,     2304, 0x2a68771e
-0,     703872,     703872,     1152,     2304, 0xcb317a31
-0,     705024,     705024,     1152,     2304, 0xfc5d7a27
-0,     706176,     706176,     1152,     2304, 0x6e067d96
-0,     707328,     707328,     1152,     2304, 0x0c538560
-0,     708480,     708480,     1152,     2304, 0xfbad717a
-0,     709632,     709632,     1152,     2304, 0xf9fc7608
-0,     710784,     710784,     1152,     2304, 0xb1817c8f
-0,     711936,     711936,     1152,     2304, 0x57c37f82
-0,     713088,     713088,     1152,     2304, 0x8cac8356
-0,     714240,     714240,     1152,     2304, 0x97108186
-0,     715392,     715392,     1152,     2304, 0x095d81bb
-0,     716544,     716544,     1152,     2304, 0x475f6b2b
-0,     717696,     717696,     1152,     2304, 0xdf7c8cc5
-0,     718848,     718848,     1152,     2304, 0x979c77be
-0,     720000,     720000,     1152,     2304, 0x56a7844b
-0,     721152,     721152,     1152,     2304, 0x7ee46b21
-0,     722304,     722304,     1152,     2304, 0x05b67220
-0,     723456,     723456,     1152,     2304, 0x25787252
-0,     724608,     724608,     1152,     2304, 0x8ad278ad
-0,     725760,     725760,     1152,     2304, 0x67bd722e
-0,     726912,     726912,     1152,     2304, 0x204f77be
-0,     728064,     728064,     1152,     2304, 0x82d27ae2
-0,     729216,     729216,     1152,     2304, 0x23fa82e4
-0,     730368,     730368,     1152,     2304, 0xa9cf8159
-0,     731520,     731520,     1152,     2304, 0x13f08749
-0,     732672,     732672,     1152,     2304, 0xf84f71b5
-0,     733824,     733824,     1152,     2304, 0x1cb777c8
-0,     734976,     734976,     1152,     2304, 0x11236722
-0,     736128,     736128,     1152,     2304, 0x10197cac
-0,     737280,     737280,     1152,     2304, 0xbd417e65
-0,     738432,     738432,     1152,     2304, 0x9a1c7d05
-0,     739584,     739584,     1152,     2304, 0x4c3a85de
-0,     740736,     740736,     1152,     2304, 0x03816eb7
-0,     741888,     741888,     1152,     2304, 0x80186e6c
-0,     743040,     743040,     1152,     2304, 0x5c097928
-0,     744192,     744192,     1152,     2304, 0x94aa823d
-0,     745344,     745344,     1152,     2304, 0xa1c27f04
-0,     746496,     746496,     1152,     2304, 0x6ddb74a9
-0,     747648,     747648,     1152,     2304, 0x5ea67901
-0,     748800,     748800,     1152,     2304, 0xd710742d
-0,     749952,     749952,     1152,     2304, 0xf8c27add
-0,     751104,     751104,     1152,     2304, 0xf1717011
-0,     752256,     752256,     1152,     2304, 0xb59072d0
-0,     753408,     753408,     1152,     2304, 0xc8dc84a7
-0,     754560,     754560,     1152,     2304, 0x33116737
-0,     755712,     755712,     1152,     2304, 0x86216bdd
-0,     756864,     756864,     1152,     2304, 0xa2f87866
-0,     758016,     758016,     1152,     2304, 0x5d77771e
-0,     759168,     759168,     1152,     2304, 0x5d8c77fd
-0,     760320,     760320,     1152,     2304, 0x23cc89cb
-0,     761472,     761472,     1152,     2304, 0x334e7407
-0,     762624,     762624,     1152,     2304, 0x01c976ff
-0,     763776,     763776,     1152,     2304, 0x3a3b7b15
-0,     764928,     764928,     1152,     2304, 0xfa427de9
-0,     766080,     766080,     1152,     2304, 0xbeaa7c91
-0,     767232,     767232,     1152,     2304, 0xd6988b31
-0,     768384,     768384,     1152,     2304, 0x4db47f80
-0,     769536,     769536,     1152,     2304, 0xea687d9e
-0,     770688,     770688,     1152,     2304, 0x1a6281ce
-0,     771840,     771840,     1152,     2304, 0xe1958003
-0,     772992,     772992,     1152,     2304, 0xb4ae7c5e
-0,     774144,     774144,     1152,     2304, 0x28827c8a
-0,     775296,     775296,     1152,     2304, 0x1fb88b25
-0,     776448,     776448,     1152,     2304, 0x588d71e8
-0,     777600,     777600,     1152,     2304, 0x68227c34
-0,     778752,     778752,     1152,     2304, 0xee4d73e8
-0,     779904,     779904,     1152,     2304, 0x69287c6d
-0,     781056,     781056,     1152,     2304, 0xbb04926a
-0,     782208,     782208,     1152,     2304, 0x89456cec
-0,     783360,     783360,     1152,     2304, 0xabe18992
-0,     784512,     784512,     1152,     2304, 0x50cc7f6c
-0,     785664,     785664,     1152,     2304, 0x6d7270be
-0,     786816,     786816,     1152,     2304, 0x664c6fef
-0,     787968,     787968,     1152,     2304, 0x7f7982f3
-0,     789120,     789120,     1152,     2304, 0x6ca170e9
-0,     790272,     790272,     1152,     2304, 0x36437d5b
-0,     791424,     791424,     1152,     2304, 0xfd2380e8
-0,     792576,     792576,     1152,     2304, 0x2e3c6e9f
-0,     793728,     793728,     1152,     2304, 0xc8427f3f
-0,     794880,     794880,     1152,     2304, 0x962a79ad
-0,     796032,     796032,     1152,     2304, 0xc9597c8b
-0,     797184,     797184,     1152,     2304, 0x899580bb
-0,     798336,     798336,     1152,     2304, 0x2d179dff
-0,     799488,     799488,     1152,     2304, 0x4ac1707c
-0,     800640,     800640,     1152,     2304, 0x32ea7e95
-0,     801792,     801792,     1152,     2304, 0x265e9a2d
-0,     802944,     802944,     1152,     2304, 0x1c6484d0
-0,     804096,     804096,     1152,     2304, 0x39ae6884
-0,     805248,     805248,     1152,     2304, 0x82ed7bc5
-0,     806400,     806400,     1152,     2304, 0x556b7b3c
-0,     807552,     807552,     1152,     2304, 0xb7f778dd
-0,     808704,     808704,     1152,     2304, 0x74447d55
-0,     809856,     809856,     1152,     2304, 0x0c66861e
-0,     811008,     811008,     1152,     2304, 0x15ba7932
-0,     812160,     812160,     1152,     2304, 0xb19170fc
-0,     813312,     813312,     1152,     2304, 0x19d37551
-0,     814464,     814464,     1152,     2304, 0xdc529142
-0,     815616,     815616,     1152,     2304, 0xf2637e77
-0,     816768,     816768,     1152,     2304, 0xd065944b
-0,     817920,     817920,     1152,     2304, 0x22878123
-0,     819072,     819072,     1152,     2304, 0xc21a8bf7
-0,     820224,     820224,     1152,     2304, 0x2e3582dc
-0,     821376,     821376,     1152,     2304, 0xd42f7987
-0,     822528,     822528,     1152,     2304, 0x69b88236
-0,     823680,     823680,     1152,     2304, 0x7c988f90
-0,     824832,     824832,     1152,     2304, 0x2cd66ded
-0,     825984,     825984,     1152,     2304, 0x3e65828b
-0,     827136,     827136,     1152,     2304, 0x7e9871c9
-0,     828288,     828288,     1152,     2304, 0xf1f2806b
-0,     829440,     829440,     1152,     2304, 0xf5087c7b
-0,     830592,     830592,     1152,     2304, 0x62b98097
-0,     831744,     831744,     1152,     2304, 0xec457c43
-0,     832896,     832896,     1152,     2304, 0x87af87a6
-0,     834048,     834048,     1152,     2304, 0x97cc757d
-0,     835200,     835200,     1152,     2304, 0x08ca76bd
-0,     836352,     836352,     1152,     2304, 0x14ae7cbd
-0,     837504,     837504,     1152,     2304, 0x1f79709a
-0,     838656,     838656,     1152,     2304, 0x17948207
-0,     839808,     839808,     1152,     2304, 0x16ee7228
-0,     840960,     840960,     1152,     2304, 0x76cc82d7
-0,     842112,     842112,     1152,     2304, 0x8f327a8e
-0,     843264,     843264,     1152,     2304, 0x14ee7756
-0,     844416,     844416,     1152,     2304, 0x15996d2f
-0,     845568,     845568,     1152,     2304, 0x4c707d5c
-0,     846720,     846720,     1152,     2304, 0x268c6fee
-0,     847872,     847872,     1152,     2304, 0x6d838c76
-0,     849024,     849024,     1152,     2304, 0xafa17e64
-0,     850176,     850176,     1152,     2304, 0xb6546e66
-0,     851328,     851328,     1152,     2304, 0x945d8b9f
-0,     852480,     852480,     1152,     2304, 0x5bfb7446
-0,     853632,     853632,     1152,     2304, 0xae6086f9
-0,     854784,     854784,     1152,     2304, 0xa01380cd
-0,     855936,     855936,     1152,     2304, 0x06f0828f
-0,     857088,     857088,     1152,     2304, 0x0ae07176
-0,     858240,     858240,     1152,     2304, 0x66f07522
-0,     859392,     859392,     1152,     2304, 0x44018106
-0,     860544,     860544,     1152,     2304, 0x8cd283da
-0,     861696,     861696,     1152,     2304, 0x14257f45
-0,     862848,     862848,     1152,     2304, 0x04979537
-0,     864000,     864000,     1152,     2304, 0x8b5f797c
-0,     865152,     865152,     1152,     2304, 0x12d67493
-0,     866304,     866304,     1152,     2304, 0xc8886a25
-0,     867456,     867456,     1152,     2304, 0x614b803a
-0,     868608,     868608,     1152,     2304, 0x75667d35
-0,     869760,     869760,     1152,     2304, 0xe42c7b00
-0,     870912,     870912,     1152,     2304, 0x37787927
-0,     872064,     872064,     1152,     2304, 0x85db8409
-0,     873216,     873216,     1152,     2304, 0x823b822c
-0,     874368,     874368,     1152,     2304, 0xa1658479
-0,     875520,     875520,     1152,     2304, 0xdbe58ff7
-0,     876672,     876672,     1152,     2304, 0x725175e2
-0,     877824,     877824,     1152,     2304, 0xb2ae7741
-0,     878976,     878976,     1152,     2304, 0x4de169e4
-0,     880128,     880128,     1152,     2304, 0x3cb18530
-0,     881280,     881280,     1152,     2304, 0x5a0c5e7b
-0,     882432,     882432,     1152,     2304, 0x4e1f68b6
-0,     883584,     883584,     1152,     2304, 0x9a0c7525
-0,     884736,     884736,     1152,     2304, 0x86ae6a04
-0,     885888,     885888,     1152,     2304, 0xf508877c
-0,     887040,     887040,     1152,     2304, 0x29746d7c
-0,     888192,     888192,     1152,     2304, 0x24078edb
-0,     889344,     889344,     1152,     2304, 0x4f9784d2
-0,     890496,     890496,     1152,     2304, 0x24186e95
-0,     891648,     891648,     1152,     2304, 0xd1927b28
-0,     892800,     892800,     1152,     2304, 0xc77283d5
-0,     893952,     893952,     1152,     2304, 0xc0ad73f8
-0,     895104,     895104,     1152,     2304, 0x0de78524
-0,     896256,     896256,     1152,     2304, 0x8b0061dc
-0,     897408,     897408,     1152,     2304, 0x37c77ad6
-0,     898560,     898560,     1152,     2304, 0x5eae7410
-0,     899712,     899712,     1152,     2304, 0x72d97ab5
-0,     900864,     900864,     1152,     2304, 0x1afd8745
-0,     902016,     902016,     1152,     2304, 0xe30b7c9b
-0,     903168,     903168,     1152,     2304, 0xe86480b3
-0,     904320,     904320,     1152,     2304, 0xb9c77a4e
-0,     905472,     905472,     1152,     2304, 0x64508f2a
-0,     906624,     906624,     1152,     2304, 0x839a74ed
-0,     907776,     907776,     1152,     2304, 0x12147a5d
-0,     908928,     908928,     1152,     2304, 0x33b07d3b
-0,     910080,     910080,     1152,     2304, 0x65e67731
-0,     911232,     911232,     1152,     2304, 0x8d1c8191
-0,     912384,     912384,     1152,     2304, 0xada17701
-0,     913536,     913536,     1152,     2304, 0x49d47f6c
-0,     914688,     914688,     1152,     2304, 0x5df97989
-0,     915840,     915840,     1152,     2304, 0x0db17ad9
-0,     916992,     916992,     1152,     2304, 0x0d3281d7
-0,     918144,     918144,     1152,     2304, 0x2503704f
-0,     919296,     919296,     1152,     2304, 0xdf7c73d0
-0,     920448,     920448,     1152,     2304, 0x93dd737c
-0,     921600,     921600,     1152,     2304, 0x48ed5740
-0,     922752,     922752,     1152,     2304, 0x6d477eaf
-0,     923904,     923904,     1152,     2304, 0xe72373a1
-0,     925056,     925056,     1152,     2304, 0x7d3e7e2e
-0,     926208,     926208,     1152,     2304, 0x0d167743
-0,     927360,     927360,     1152,     2304, 0xc2d97f6c
-0,     928512,     928512,     1152,     2304, 0xc66175a0
-0,     929664,     929664,     1152,     2304, 0xd0127475
-0,     930816,     930816,     1152,     2304, 0xdbf57801
-0,     931968,     931968,     1152,     2304, 0x52b47a8e
-0,     933120,     933120,     1152,     2304, 0xc96b80c5
-0,     934272,     934272,     1152,     2304, 0x070c76cf
-0,     935424,     935424,     1152,     2304, 0x2aa07ee2
-0,     936576,     936576,     1152,     2304, 0x04059074
-0,     937728,     937728,     1152,     2304, 0x228f6def
-0,     938880,     938880,     1152,     2304, 0x927893a8
-0,     940032,     940032,     1152,     2304, 0x581a87df
-0,     941184,     941184,     1152,     2304, 0x0d3f653c
-0,     942336,     942336,     1152,     2304, 0xc0e775f3
-0,     943488,     943488,     1152,     2304, 0x41d98473
-0,     944640,     944640,     1152,     2304, 0x68fb775a
-0,     945792,     945792,     1152,     2304, 0x81e0810e
-0,     946944,     946944,     1152,     2304, 0xf35790d3
-0,     948096,     948096,     1152,     2304, 0xc1468c45
-0,     949248,     949248,     1152,     2304, 0x6949796a
-0,     950400,     950400,     1152,     2304, 0xa0bb7dcd
-0,     951552,     951552,     1152,     2304, 0x50af8424
-0,     952704,     952704,     1152,     2304, 0x449b7d37
-0,     953856,     953856,     1152,     2304, 0xcf2779a8
-0,     955008,     955008,     1152,     2304, 0x069d894e
-0,     956160,     956160,     1152,     2304, 0x41bd7334
-0,     957312,     957312,     1152,     2304, 0x0dfd8c05
-0,     958464,     958464,     1152,     2304, 0x2ce27b9d
-0,     959616,     959616,     1152,     2304, 0x422875b5
-0,     960768,     960768,     1152,     2304, 0xd5297389
-0,     961920,     961920,     1152,     2304, 0x49ed8668
-0,     963072,     963072,     1152,     2304, 0x2a9a810b
-0,     964224,     964224,     1152,     2304, 0x5a3f7b43
-0,     965376,     965376,     1152,     2304, 0x32087c12
-0,     966528,     966528,     1152,     2304, 0x759c7bba
-0,     967680,     967680,     1152,     2304, 0x5b26823b
-0,     968832,     968832,     1152,     2304, 0xeeb872ef
-0,     969984,     969984,     1152,     2304, 0x6eb0869d
-0,     971136,     971136,     1152,     2304, 0x4b3e764f
-0,     972288,     972288,     1152,     2304, 0x30a66fdb
-0,     973440,     973440,     1152,     2304, 0x958374ff
-0,     974592,     974592,     1152,     2304, 0x411b71cc
-0,     975744,     975744,     1152,     2304, 0x80136759
-0,     976896,     976896,     1152,     2304, 0x05ed6793
-0,     978048,     978048,     1152,     2304, 0x48f18827
-0,     979200,     979200,     1152,     2304, 0xb40d7866
-0,     980352,     980352,     1152,     2304, 0xcad18c4a
-0,     981504,     981504,     1152,     2304, 0x9ad68582
-0,     982656,     982656,     1152,     2304, 0x3ad17fce
-0,     983808,     983808,     1152,     2304, 0x824b7af6
-0,     984960,     984960,     1152,     2304, 0xcc9d894f
-0,     986112,     986112,     1152,     2304, 0x46167a70
-0,     987264,     987264,     1152,     2304, 0x2479872a
-0,     988416,     988416,     1152,     2304, 0x58cb72a5
-0,     989568,     989568,     1152,     2304, 0x01957747
-0,     990720,     990720,     1152,     2304, 0x6df172cb
-0,     991872,     991872,     1152,     2304, 0x9ebd7ed9
-0,     993024,     993024,     1152,     2304, 0x89436a53
-0,     994176,     994176,     1152,     2304, 0xcce78433
-0,     995328,     995328,     1152,     2304, 0x1b7a70f2
-0,     996480,     996480,     1152,     2304, 0x530584d7
-0,     997632,     997632,     1152,     2304, 0xda158aa9
-0,     998784,     998784,     1152,     2304, 0xe85e7b90
-0,     999936,     999936,     1152,     2304, 0xa8ac8176
-0,    1001088,    1001088,     1152,     2304, 0xe43578a9
-0,    1002240,    1002240,     1152,     2304, 0x9ecc7c30
-0,    1003392,    1003392,     1152,     2304, 0x1cbd8591
-0,    1004544,    1004544,     1152,     2304, 0x84917bd9
-0,    1005696,    1005696,     1152,     2304, 0xea8477e4
-0,    1006848,    1006848,     1152,     2304, 0xf50c7ea1
-0,    1008000,    1008000,     1152,     2304, 0xc85e76e7
-0,    1009152,    1009152,     1152,     2304, 0xa5148768
-0,    1010304,    1010304,     1152,     2304, 0x2bd5846a
-0,    1011456,    1011456,     1152,     2304, 0x1cd88641
-0,    1012608,    1012608,     1152,     2304, 0x04b883c5
-0,    1013760,    1013760,     1152,     2304, 0x5bf1719b
-0,    1014912,    1014912,     1152,     2304, 0x0abe777c
-0,    1016064,    1016064,     1152,     2304, 0xc74a7f7f
-0,    1017216,    1017216,     1152,     2304, 0xd36a724b
-0,    1018368,    1018368,     1152,     2304, 0x4ae67e9e
-0,    1019520,    1019520,     1152,     2304, 0xfb6173a0
-0,    1020672,    1020672,     1152,     2304, 0x31d77e19
-0,    1021824,    1021824,     1152,     2304, 0x6153767d
-0,    1022976,    1022976,     1152,     2304, 0xe40d84e4
-0,    1024128,    1024128,     1152,     2304, 0xc8ef71e2
-0,    1025280,    1025280,     1152,     2304, 0xdeca81be
-0,    1026432,    1026432,     1152,     2304, 0x75da85df
-0,    1027584,    1027584,     1152,     2304, 0x6c8e71bd
-0,    1028736,    1028736,     1152,     2304, 0x6f8576a4
-0,    1029888,    1029888,     1152,     2304, 0xe6637b71
-0,    1031040,    1031040,     1152,     2304, 0xddfe8866
-0,    1032192,    1032192,     1152,     2304, 0xa08578a5
-0,    1033344,    1033344,     1152,     2304, 0x72af7873
-0,    1034496,    1034496,     1152,     2304, 0xb550615c
-0,    1035648,    1035648,     1152,     2304, 0x9ef87d28
-0,    1036800,    1036800,     1152,     2304, 0xe30d83bf
-0,    1037952,    1037952,     1152,     2304, 0xf030818c
-0,    1039104,    1039104,     1152,     2304, 0x1e23896c
-0,    1040256,    1040256,     1152,     2304, 0xf4c87180
-0,    1041408,    1041408,     1152,     2304, 0xe34181aa
-0,    1042560,    1042560,     1152,     2304, 0x660681cb
-0,    1043712,    1043712,     1152,     2304, 0x7ebb87fc
-0,    1044864,    1044864,     1152,     2304, 0xa8305eeb
-0,    1046016,    1046016,     1152,     2304, 0xea898906
-0,    1047168,    1047168,     1152,     2304, 0x342b6c13
-0,    1048320,    1048320,     1152,     2304, 0x7fd18372
-0,    1049472,    1049472,     1152,     2304, 0x965c7377
-0,    1050624,    1050624,     1152,     2304, 0xb53b70b3
-0,    1051776,    1051776,     1152,     2304, 0xd54884d1
-0,    1052928,    1052928,     1152,     2304, 0x546d77ac
-0,    1054080,    1054080,     1152,     2304, 0xd7a77dc7
-0,    1055232,    1055232,     1152,     2304, 0x4b0e7c5c
-0,    1056384,    1056384,     1152,     2304, 0x346d7026
-0,    1057536,    1057536,     1152,     2304, 0x3f346e81
-0,    1058688,    1058688,     1152,     2304, 0x3f167098
-0,    1059840,    1059840,     1152,     2304, 0x1c857987
-0,    1060992,    1060992,     1152,     2304, 0xf9317c16
-0,    1062144,    1062144,     1152,     2304, 0x25ea82c4
-0,    1063296,    1063296,     1152,     2304, 0x2b0f90bb
-0,    1064448,    1064448,     1152,     2304, 0x999668db
-0,    1065600,    1065600,     1152,     2304, 0x9a0676da
-0,    1066752,    1066752,     1152,     2304, 0x97a87172
-0,    1067904,    1067904,     1152,     2304, 0x323d7ace
-0,    1069056,    1069056,     1152,     2304, 0x1b538baa
-0,    1070208,    1070208,     1152,     2304, 0xeb208b72
-0,    1071360,    1071360,     1152,     2304, 0x36c69152
-0,    1072512,    1072512,     1152,     2304, 0x570c7bac
-0,    1073664,    1073664,     1152,     2304, 0xd7287c72
-0,    1074816,    1074816,     1152,     2304, 0xc4d78776
-0,    1075968,    1075968,     1152,     2304, 0xdcbb7735
-0,    1077120,    1077120,     1152,     2304, 0x08066d78
-0,    1078272,    1078272,     1152,     2304, 0x5cb96bad
-0,    1079424,    1079424,     1152,     2304, 0x30d58180
-0,    1080576,    1080576,     1152,     2304, 0x3ac96b87
-0,    1081728,    1081728,     1152,     2304, 0x1fc878d2
-0,    1082880,    1082880,     1152,     2304, 0x2c7c85d4
-0,    1084032,    1084032,     1152,     2304, 0x6c307caa
-0,    1085184,    1085184,     1152,     2304, 0x647b6d6e
-0,    1086336,    1086336,     1152,     2304, 0xfbef7e51
-0,    1087488,    1087488,     1152,     2304, 0x27867bd7
-0,    1088640,    1088640,     1152,     2304, 0xa2b875b1
-0,    1089792,    1089792,     1152,     2304, 0xc7dd73fb
-0,    1090944,    1090944,     1152,     2304, 0x717e79e9
-0,    1092096,    1092096,     1152,     2304, 0xfecb710d
-0,    1093248,    1093248,     1152,     2304, 0xa5308333
-0,    1094400,    1094400,     1152,     2304, 0x92fe6b77
-0,    1095552,    1095552,     1152,     2304, 0x7db17ad6
-0,    1096704,    1096704,     1152,     2304, 0x1248786d
-0,    1097856,    1097856,     1152,     2304, 0xf3647a8a
-0,    1099008,    1099008,     1152,     2304, 0x7b2173f0
-0,    1100160,    1100160,     1152,     2304, 0xfd1f7a3c
-0,    1101312,    1101312,     1152,     2304, 0x09e57f3e
-0,    1102464,    1102464,     1152,     2304, 0x13d7798d
-0,    1103616,    1103616,     1152,     2304, 0x7d4d758a
-0,    1104768,    1104768,     1152,     2304, 0xdf1a80be
-0,    1105920,    1105920,     1152,     2304, 0xb91b7cb9
-0,    1107072,    1107072,     1152,     2304, 0x102e73ba
-0,    1108224,    1108224,     1152,     2304, 0x8dce8428
-0,    1109376,    1109376,     1152,     2304, 0xdf9070f6
-0,    1110528,    1110528,     1152,     2304, 0x27d77bb4
-0,    1111680,    1111680,     1152,     2304, 0x2f8a8579
-0,    1112832,    1112832,     1152,     2304, 0xd1e27204
-0,    1113984,    1113984,     1152,     2304, 0x4f087920
-0,    1115136,    1115136,     1152,     2304, 0x69f58306
-0,    1116288,    1116288,     1152,     2304, 0xfc7378b0
-0,    1117440,    1117440,     1152,     2304, 0x8b167d89
-0,    1118592,    1118592,     1152,     2304, 0x0c867235
-0,    1119744,    1119744,     1152,     2304, 0xc3126e09
-0,    1120896,    1120896,     1152,     2304, 0x7d097d89
-0,    1122048,    1122048,     1152,     2304, 0x73dc664e
-0,    1123200,    1123200,     1152,     2304, 0xdc247a0a
-0,    1124352,    1124352,     1152,     2304, 0xaa8d7f2f
-0,    1125504,    1125504,     1152,     2304, 0x9a1d744b
-0,    1126656,    1126656,     1152,     2304, 0xd9e87475
-0,    1127808,    1127808,     1152,     2304, 0xf6f182ab
-0,    1128960,    1128960,     1152,     2304, 0x4937706b
-0,    1130112,    1130112,     1152,     2304, 0x837189b1
-0,    1131264,    1131264,     1152,     2304, 0xd565975d
-0,    1132416,    1132416,     1152,     2304, 0xc6156a56
-0,    1133568,    1133568,     1152,     2304, 0xcd9c8107
-0,    1134720,    1134720,     1152,     2304, 0x1f868400
-0,    1135872,    1135872,     1152,     2304, 0xeddb71e0
-0,    1137024,    1137024,     1152,     2304, 0x5fa2782a
-0,    1138176,    1138176,     1152,     2304, 0x9b076e22
-0,    1139328,    1139328,     1152,     2304, 0x90027ba7
-0,    1140480,    1140480,     1152,     2304, 0x40a481fd
-0,    1141632,    1141632,     1152,     2304, 0x240873bf
-0,    1142784,    1142784,     1152,     2304, 0x28cd7d8e
-0,    1143936,    1143936,     1152,     2304, 0x5d4287b0
-0,    1145088,    1145088,     1152,     2304, 0xf9567c4b
-0,    1146240,    1146240,     1152,     2304, 0x67dd924a
-0,    1147392,    1147392,     1152,     2304, 0x33d37b4c
-0,    1148544,    1148544,     1152,     2304, 0x37316d0b
-0,    1149696,    1149696,     1152,     2304, 0x43a877dc
-0,    1150848,    1150848,     1152,     2304, 0x44fc90bc
-0,    1152000,    1152000,     1152,     2304, 0x90f5774c
-0,    1153152,    1153152,     1152,     2304, 0x106e85a0
-0,    1154304,    1154304,     1152,     2304, 0xb78b7f83
-0,    1155456,    1155456,     1152,     2304, 0x3c3a77af
-0,    1156608,    1156608,     1152,     2304, 0x7a5f7282
-0,    1157760,    1157760,     1152,     2304, 0x164d891a
-0,    1158912,    1158912,     1152,     2304, 0x930a7194
-0,    1160064,    1160064,     1152,     2304, 0xb2628013
-0,    1161216,    1161216,     1152,     2304, 0x3adf7e1c
-0,    1162368,    1162368,     1152,     2304, 0x48627c45
-0,    1163520,    1163520,     1152,     2304, 0xa3367cca
-0,    1164672,    1164672,     1152,     2304, 0x9cf77248
-0,    1165824,    1165824,     1152,     2304, 0x75a86acb
-0,    1166976,    1166976,     1152,     2304, 0x75487698
-0,    1168128,    1168128,     1152,     2304, 0xd65b8389
-0,    1169280,    1169280,     1152,     2304, 0xb71d84b5
-0,    1170432,    1170432,     1152,     2304, 0xdca87e42
-0,    1171584,    1171584,     1152,     2304, 0x44dd7d72
-0,    1172736,    1172736,     1152,     2304, 0xb29465da
-0,    1173888,    1173888,     1152,     2304, 0x88ed8930
-0,    1175040,    1175040,     1152,     2304, 0xabdd8096
-0,    1176192,    1176192,     1152,     2304, 0xe5f57653
-0,    1177344,    1177344,     1152,     2304, 0x7c777984
-0,    1178496,    1178496,     1152,     2304, 0x631d7deb
-0,    1179648,    1179648,     1152,     2304, 0x70d36e1f
-0,    1180800,    1180800,     1152,     2304, 0x412c802b
-0,    1181952,    1181952,     1152,     2304, 0xacc281c1
-0,    1183104,    1183104,     1152,     2304, 0x569d86a8
-0,    1184256,    1184256,     1152,     2304, 0x5c69743f
-0,    1185408,    1185408,     1152,     2304, 0x29637794
-0,    1186560,    1186560,     1152,     2304, 0x0d1b7a4f
-0,    1187712,    1187712,     1152,     2304, 0xfc6978a6
-0,    1188864,    1188864,     1152,     2304, 0x9b178987
-0,    1190016,    1190016,     1152,     2304, 0xe436714a
-0,    1191168,    1191168,     1152,     2304, 0x783a7b6e
-0,    1192320,    1192320,     1152,     2304, 0x6ff16f4e
-0,    1193472,    1193472,     1152,     2304, 0x9fd582cc
-0,    1194624,    1194624,     1152,     2304, 0xff0d6bc9
-0,    1195776,    1195776,     1152,     2304, 0x6f4975d5
-0,    1196928,    1196928,     1152,     2304, 0x94a77a90
-0,    1198080,    1198080,     1152,     2304, 0x764f80db
-0,    1199232,    1199232,     1152,     2304, 0x68e88782
-0,    1200384,    1200384,     1152,     2304, 0x91a378e8
-0,    1201536,    1201536,     1152,     2304, 0x29f26903
-0,    1202688,    1202688,     1152,     2304, 0xc3b684d4
-0,    1203840,    1203840,     1152,     2304, 0x7d95921f
-0,    1204992,    1204992,     1152,     2304, 0x876a7ded
-0,    1206144,    1206144,     1152,     2304, 0x0fbe7ee8
-0,    1207296,    1207296,     1152,     2304, 0x7d0f78c5
-0,    1208448,    1208448,     1152,     2304, 0xce906ba8
-0,    1209600,    1209600,     1152,     2304, 0xd281747b
-0,    1210752,    1210752,     1152,     2304, 0xec38811b
-0,    1211904,    1211904,     1152,     2304, 0x8170810a
-0,    1213056,    1213056,     1152,     2304, 0x4d858ba2
-0,    1214208,    1214208,     1152,     2304, 0x8c798662
-0,    1215360,    1215360,     1152,     2304, 0x23a8879b
-0,    1216512,    1216512,     1152,     2304, 0x9d0c8894
-0,    1217664,    1217664,     1152,     2304, 0xf07c681f
-0,    1218816,    1218816,     1152,     2304, 0x03ab6f7a
-0,    1219968,    1219968,     1152,     2304, 0x88c170c2
-0,    1221120,    1221120,     1152,     2304, 0xeb977bf3
-0,    1222272,    1222272,     1152,     2304, 0x33b6774f
-0,    1223424,    1223424,     1152,     2304, 0x534c776e
-0,    1224576,    1224576,     1152,     2304, 0x33c4826a
-0,    1225728,    1225728,     1152,     2304, 0xdb9878d8
-0,    1226880,    1226880,     1152,     2304, 0xeffd789b
-0,    1228032,    1228032,     1152,     2304, 0x2cc9862f
-0,    1229184,    1229184,     1152,     2304, 0x9bf18131
-0,    1230336,    1230336,     1152,     2304, 0xfa078144
-0,    1231488,    1231488,     1152,     2304, 0x922885d2
-0,    1232640,    1232640,     1152,     2304, 0x29ad7796
-0,    1233792,    1233792,     1152,     2304, 0x62766f3f
-0,    1234944,    1234944,     1152,     2304, 0x72a873a5
-0,    1236096,    1236096,     1152,     2304, 0x65176e4d
-0,    1237248,    1237248,     1152,     2304, 0x75447cbd
-0,    1238400,    1238400,     1152,     2304, 0x83528e32
-0,    1239552,    1239552,     1152,     2304, 0x0c758531
-0,    1240704,    1240704,     1152,     2304, 0x4a5c7a84
-0,    1241856,    1241856,     1152,     2304, 0xd5937648
-0,    1243008,    1243008,     1152,     2304, 0xef5c8983
-0,    1244160,    1244160,     1152,     2304, 0x24b281d0
-0,    1245312,    1245312,     1152,     2304, 0xc0af7017
-0,    1246464,    1246464,     1152,     2304, 0xf1cc7145
-0,    1247616,    1247616,     1152,     2304, 0x7ddb7a3b
-0,    1248768,    1248768,     1152,     2304, 0xd2e57692
-0,    1249920,    1249920,     1152,     2304, 0x7abc61d6
-0,    1251072,    1251072,     1152,     2304, 0x44a68338
-0,    1252224,    1252224,     1152,     2304, 0x9cad810e
-0,    1253376,    1253376,     1152,     2304, 0xbf3a7f51
-0,    1254528,    1254528,     1152,     2304, 0x52ac815e
-0,    1255680,    1255680,     1152,     2304, 0xdf327e10
-0,    1256832,    1256832,     1152,     2304, 0x797c867b
-0,    1257984,    1257984,     1152,     2304, 0x685b79ec
-0,    1259136,    1259136,     1152,     2304, 0x60267b82
-0,    1260288,    1260288,     1152,     2304, 0xe70e7fd4
-0,    1261440,    1261440,     1152,     2304, 0xaf197a22
-0,    1262592,    1262592,     1152,     2304, 0xf5e57075
-0,    1263744,    1263744,     1152,     2304, 0xa86f8197
-0,    1264896,    1264896,     1152,     2304, 0x922c7b3d
-0,    1266048,    1266048,     1152,     2304, 0x8e3b7929
-0,    1267200,    1267200,     1152,     2304, 0x816f862d
-0,    1268352,    1268352,     1152,     2304, 0xc09c761a
-0,    1269504,    1269504,     1152,     2304, 0xf1cd9010
-0,    1270656,    1270656,     1152,     2304, 0xcf5c72d8
-0,    1271808,    1271808,     1152,     2304, 0x0e607a38
-0,    1272960,    1272960,     1152,     2304, 0x6cb0819c
-0,    1274112,    1274112,     1152,     2304, 0x9e2880aa
-0,    1275264,    1275264,     1152,     2304, 0xb46f69f9
-0,    1276416,    1276416,     1152,     2304, 0xbc807eab
-0,    1277568,    1277568,     1152,     2304, 0x91237cf9
-0,    1278720,    1278720,     1152,     2304, 0xb6bf82b6
-0,    1279872,    1279872,     1152,     2304, 0xfcd58daf
-0,    1281024,    1281024,     1152,     2304, 0x53b176f4
-0,    1282176,    1282176,     1152,     2304, 0xc79d7507
-0,    1283328,    1283328,     1152,     2304, 0x810677b2
-0,    1284480,    1284480,     1152,     2304, 0x13148737
-0,    1285632,    1285632,     1152,     2304, 0x2eb5755a
-0,    1286784,    1286784,     1152,     2304, 0x5bec5cd2
-0,    1287936,    1287936,     1152,     2304, 0x66c68d47
-0,    1289088,    1289088,     1152,     2304, 0x55d18770
-0,    1290240,    1290240,     1152,     2304, 0x13997def
-0,    1291392,    1291392,     1152,     2304, 0x23cd8437
-0,    1292544,    1292544,     1152,     2304, 0x5cd07942
-0,    1293696,    1293696,     1152,     2304, 0x43ab6b4c
-0,    1294848,    1294848,     1152,     2304, 0xc55788ec
-0,    1296000,    1296000,     1152,     2304, 0x6b69788e
-0,    1297152,    1297152,     1152,     2304, 0xcacb8fea
-0,    1298304,    1298304,     1152,     2304, 0x83407ebd
-0,    1299456,    1299456,     1152,     2304, 0x998275f3
-0,    1300608,    1300608,     1152,     2304, 0x0ca76dcc
-0,    1301760,    1301760,     1152,     2304, 0xa1ff7c6a
-0,    1302912,    1302912,     1152,     2304, 0x4a597683
-0,    1304064,    1304064,     1152,     2304, 0xc8e57f16
-0,    1305216,    1305216,     1152,     2304, 0x376d82fb
-0,    1306368,    1306368,     1152,     2304, 0x489b8164
-0,    1307520,    1307520,     1152,     2304, 0xece27f77
-0,    1308672,    1308672,     1152,     2304, 0x4c197e8f
-0,    1309824,    1309824,     1152,     2304, 0x6eb07739
-0,    1310976,    1310976,     1152,     2304, 0xef628145
-0,    1312128,    1312128,     1152,     2304, 0x549d6f12
-0,    1313280,    1313280,     1152,     2304, 0xbe566b70
-0,    1314432,    1314432,     1152,     2304, 0x2bde87d3
-0,    1315584,    1315584,     1152,     2304, 0x0ffd7175
-0,    1316736,    1316736,     1152,     2304, 0x47d28a8b
-0,    1317888,    1317888,     1152,     2304, 0x6f6c718e
-0,    1319040,    1319040,     1152,     2304, 0x665e7937
-0,    1320192,    1320192,     1152,     2304, 0x73538e8d
-0,    1321344,    1321344,     1152,     2304, 0x56167d71
-0,    1322496,    1322496,     1152,     2304, 0x3d0278ed
-0,    1323648,    1323648,     1152,     2304, 0x9baf7a51
-0,    1324800,    1324800,     1152,     2304, 0x8c058105
-0,    1325952,    1325952,     1152,     2304, 0x2b9573ca
-0,    1327104,    1327104,     1152,     2304, 0x4a2271a1
-0,    1328256,    1328256,     1152,     2304, 0x3d5276f6
-0,    1329408,    1329408,     1152,     2304, 0x43c076cd
-0,    1330560,    1330560,     1152,     2304, 0x4fec8929
-0,    1331712,    1331712,     1152,     2304, 0xaa7167ec
-0,    1332864,    1332864,     1152,     2304, 0x39ff7ea5
-0,    1334016,    1334016,     1152,     2304, 0x2dd58348
-0,    1335168,    1335168,     1152,     2304, 0x03fc8782
-0,    1336320,    1336320,     1152,     2304, 0x55a4649c
-0,    1337472,    1337472,     1152,     2304, 0x71407067
-0,    1338624,    1338624,     1152,     2304, 0x3b667ce6
-0,    1339776,    1339776,     1152,     2304, 0xb12e81aa
-0,    1340928,    1340928,     1152,     2304, 0xc1646ee3
-0,    1342080,    1342080,     1152,     2304, 0x6b55884f
-0,    1343232,    1343232,     1152,     2304, 0x292382b5
-0,    1344384,    1344384,     1152,     2304, 0x18f97854
-0,    1345536,    1345536,     1152,     2304, 0x47a480be
-0,    1346688,    1346688,     1152,     2304, 0xe0857b16
-0,    1347840,    1347840,     1152,     2304, 0xe3368462
-0,    1348992,    1348992,     1152,     2304, 0xced17ae8
-0,    1350144,    1350144,     1152,     2304, 0x4020796d
-0,    1351296,    1351296,     1152,     2304, 0x26797fd0
-0,    1352448,    1352448,     1152,     2304, 0x0e087554
-0,    1353600,    1353600,     1152,     2304, 0xf8868040
-0,    1354752,    1354752,     1152,     2304, 0x3f0a7784
-0,    1355904,    1355904,     1152,     2304, 0xbe517d5e
-0,    1357056,    1357056,     1152,     2304, 0xed0b66bc
-0,    1358208,    1358208,     1152,     2304, 0x79b96d63
-0,    1359360,    1359360,     1152,     2304, 0x1ab4793a
-0,    1360512,    1360512,     1152,     2304, 0xd2db7f7d
-0,    1361664,    1361664,     1152,     2304, 0x91017992
-0,    1362816,    1362816,     1152,     2304, 0xf88f7cb9
-0,    1363968,    1363968,     1152,     2304, 0x0cc98309
-0,    1365120,    1365120,     1152,     2304, 0x38b77cb8
-0,    1366272,    1366272,     1152,     2304, 0x0cb185f9
-0,    1367424,    1367424,     1152,     2304, 0xc4a27824
-0,    1368576,    1368576,     1152,     2304, 0x1ce97618
-0,    1369728,    1369728,     1152,     2304, 0x1423976e
-0,    1370880,    1370880,     1152,     2304, 0xdbb781b2
-0,    1372032,    1372032,     1152,     2304, 0x156d71d3
-0,    1373184,    1373184,     1152,     2304, 0xce228625
-0,    1374336,    1374336,     1152,     2304, 0x318072f3
-0,    1375488,    1375488,     1152,     2304, 0x464b77a1
-0,    1376640,    1376640,     1152,     2304, 0x0204854d
-0,    1377792,    1377792,     1152,     2304, 0x498c8373
-0,    1378944,    1378944,     1152,     2304, 0xfe1a6e23
-0,    1380096,    1380096,     1152,     2304, 0x705284d0
-0,    1381248,    1381248,     1152,     2304, 0x91848980
-0,    1382400,    1382400,     1152,     2304, 0x259098c5
-0,    1383552,    1383552,     1152,     2304, 0x52b07c02
-0,    1384704,    1384704,     1152,     2304, 0x017289f6
-0,    1385856,    1385856,     1152,     2304, 0xb87687bf
-0,    1387008,    1387008,     1152,     2304, 0x72de7843
-0,    1388160,    1388160,     1152,     2304, 0x3daf8939
-0,    1389312,    1389312,     1152,     2304, 0x0dfb763c
-0,    1390464,    1390464,     1152,     2304, 0xc3017408
-0,    1391616,    1391616,     1152,     2304, 0x73f27f14
-0,    1392768,    1392768,     1152,     2304, 0x1c517541
-0,    1393920,    1393920,     1152,     2304, 0xf390748b
-0,    1395072,    1395072,     1152,     2304, 0xe6ac71f3
-0,    1396224,    1396224,     1152,     2304, 0x6277862d
-0,    1397376,    1397376,     1152,     2304, 0x923d6794
-0,    1398528,    1398528,     1152,     2304, 0x320a7f20
-0,    1399680,    1399680,     1152,     2304, 0x12f07a09
-0,    1400832,    1400832,     1152,     2304, 0xf9568b14
-0,    1401984,    1401984,     1152,     2304, 0x4d787fea
-0,    1403136,    1403136,     1152,     2304, 0x57c5861a
-0,    1404288,    1404288,     1152,     2304, 0x826b7cc7
-0,    1405440,    1405440,     1152,     2304, 0x7d056a94
-0,    1406592,    1406592,     1152,     2304, 0xbf3e8a60
-0,    1407744,    1407744,     1152,     2304, 0x5e518cab
-0,    1408896,    1408896,     1152,     2304, 0x64e7794b
-0,    1410048,    1410048,     1152,     2304, 0x9c9569ac
-0,    1411200,    1411200,     1152,     2304, 0xde167582
-0,    1412352,    1412352,     1152,     2304, 0xdd8f78b1
-0,    1413504,    1413504,     1152,     2304, 0xec577e60
-0,    1414656,    1414656,     1152,     2304, 0x06877aa4
-0,    1415808,    1415808,     1152,     2304, 0xb5157742
-0,    1416960,    1416960,     1152,     2304, 0xf9be6f2b
-0,    1418112,    1418112,     1152,     2304, 0xf95784c7
-0,    1419264,    1419264,     1152,     2304, 0x5aec7e15
-0,    1420416,    1420416,     1152,     2304, 0x666e7ce9
-0,    1421568,    1421568,     1152,     2304, 0xa76393a7
-0,    1422720,    1422720,     1152,     2304, 0xd1bb6d5c
-0,    1423872,    1423872,     1152,     2304, 0x48d88144
-0,    1425024,    1425024,     1152,     2304, 0xa7df8177
-0,    1426176,    1426176,     1152,     2304, 0x906476b1
-0,    1427328,    1427328,     1152,     2304, 0x9f46723d
-0,    1428480,    1428480,     1152,     2304, 0xa6d37a0c
-0,    1429632,    1429632,     1152,     2304, 0x73e57be2
-0,    1430784,    1430784,     1152,     2304, 0x666e8594
-0,    1431936,    1431936,     1152,     2304, 0xd4b481d7
-0,    1433088,    1433088,     1152,     2304, 0xfced8427
-0,    1434240,    1434240,     1152,     2304, 0x743999bd
-0,    1435392,    1435392,     1152,     2304, 0x71dd759b
-0,    1436544,    1436544,     1152,     2304, 0x54a2849e
-0,    1437696,    1437696,     1152,     2304, 0x07377f58
-0,    1438848,    1438848,     1152,     2304, 0xfd8f721a
-0,    1440000,    1440000,     1152,     2304, 0x4d917422
-0,    1441152,    1441152,     1152,     2304, 0xaaa46e2b
-0,    1442304,    1442304,     1152,     2304, 0x49e57597
-0,    1443456,    1443456,     1152,     2304, 0x29337511
-0,    1444608,    1444608,     1152,     2304, 0xdfee84c2
-0,    1445760,    1445760,     1152,     2304, 0x57266b8c
-0,    1446912,    1446912,     1152,     2304, 0x5bf27cfc
-0,    1448064,    1448064,     1152,     2304, 0xdc4b8b29
-0,    1449216,    1449216,     1152,     2304, 0xf49e7466
-0,    1450368,    1450368,     1152,     2304, 0xf1d975f3
-0,    1451520,    1451520,     1152,     2304, 0xc863774d
-0,    1452672,    1452672,     1152,     2304, 0x5fab80c0
-0,    1453824,    1453824,     1152,     2304, 0x19548d6f
-0,    1454976,    1454976,     1152,     2304, 0x165983ac
-0,    1456128,    1456128,     1152,     2304, 0x2d658203
-0,    1457280,    1457280,     1152,     2304, 0x717678b2
-0,    1458432,    1458432,     1152,     2304, 0x1ed26dae
-0,    1459584,    1459584,     1152,     2304, 0xecef7975
-0,    1460736,    1460736,     1152,     2304, 0xcd2b804c
-0,    1461888,    1461888,     1152,     2304, 0xf4597f97
-0,    1463040,    1463040,     1152,     2304, 0x904b75cd
-0,    1464192,    1464192,     1152,     2304, 0x6b508374
-0,    1465344,    1465344,     1152,     2304, 0xb6b4791f
-0,    1466496,    1466496,     1152,     2304, 0xce6d7501
-0,    1467648,    1467648,     1152,     2304, 0x99ec8bac
-0,    1468800,    1468800,     1152,     2304, 0x65d772b7
-0,    1469952,    1469952,     1152,     2304, 0xbc2762ff
-0,    1471104,    1471104,     1152,     2304, 0xf8a48378
-0,    1472256,    1472256,     1152,     2304, 0x18057ad4
-0,    1473408,    1473408,     1152,     2304, 0x89af753f
-0,    1474560,    1474560,     1152,     2304, 0x95a48fc5
-0,    1475712,    1475712,     1152,     2304, 0x8b7576af
-0,    1476864,    1476864,     1152,     2304, 0xcdcb6e67
-0,    1478016,    1478016,     1152,     2304, 0xe02a7ff3
-0,    1479168,    1479168,     1152,     2304, 0x4f0a7d48
-0,    1480320,    1480320,     1152,     2304, 0x9bab8287
-0,    1481472,    1481472,     1152,     2304, 0xc1fa8063
-0,    1482624,    1482624,     1152,     2304, 0x410e7dd7
-0,    1483776,    1483776,     1152,     2304, 0xd4de777f
-0,    1484928,    1484928,     1152,     2304, 0x403675ac
-0,    1486080,    1486080,     1152,     2304, 0x4d7687c5
-0,    1487232,    1487232,     1152,     2304, 0x10c6650d
-0,    1488384,    1488384,     1152,     2304, 0xd7e67b69
-0,    1489536,    1489536,     1152,     2304, 0x42b97fae
-0,    1490688,    1490688,     1152,     2304, 0xa8b57a9f
-0,    1491840,    1491840,     1152,     2304, 0xc24960a0
-0,    1492992,    1492992,     1152,     2304, 0x16198fec
-0,    1494144,    1494144,     1152,     2304, 0x29297416
-0,    1495296,    1495296,     1152,     2304, 0x129b7d00
-0,    1496448,    1496448,     1152,     2304, 0xa9677141
-0,    1497600,    1497600,     1152,     2304, 0x86a779bf
-0,    1498752,    1498752,     1152,     2304, 0xa3857dce
-0,    1499904,    1499904,     1152,     2304, 0xee6d7c4d
-0,    1501056,    1501056,     1152,     2304, 0x97127166
-0,    1502208,    1502208,     1152,     2304, 0x6e407c25
-0,    1503360,    1503360,     1152,     2304, 0x82a07c9a
-0,    1504512,    1504512,     1152,     2304, 0xf62381ef
-0,    1505664,    1505664,     1152,     2304, 0x2ab86ca8
-0,    1506816,    1506816,     1152,     2304, 0x97ca87ed
-0,    1507968,    1507968,     1152,     2304, 0x6b4868f2
-0,    1509120,    1509120,     1152,     2304, 0xdb2a953c
-0,    1510272,    1510272,     1152,     2304, 0x061e7770
-0,    1511424,    1511424,     1152,     2304, 0xae067224
-0,    1512576,    1512576,     1152,     2304, 0x88a282a3
-0,    1513728,    1513728,     1152,     2304, 0x4e14743e
-0,    1514880,    1514880,     1152,     2304, 0x3dbd8b7b
-0,    1516032,    1516032,     1152,     2304, 0xcae57312
-0,    1517184,    1517184,     1152,     2304, 0xf9038ca7
-0,    1518336,    1518336,     1152,     2304, 0x03ec6f07
-0,    1519488,    1519488,     1152,     2304, 0x8c297359
-0,    1520640,    1520640,     1152,     2304, 0x8af1804c
-0,    1521792,    1521792,     1152,     2304, 0xf30087c8
-0,    1522944,    1522944,     1152,     2304, 0x21127cd1
-0,    1524096,    1524096,     1152,     2304, 0xc66772bd
-0,    1525248,    1525248,     1152,     2304, 0x05e6630b
-0,    1526400,    1526400,     1152,     2304, 0x020688d9
-0,    1527552,    1527552,     1152,     2304, 0xee3873cd
-0,    1528704,    1528704,     1152,     2304, 0xf263782b
-0,    1529856,    1529856,     1152,     2304, 0xf8877144
-0,    1531008,    1531008,     1152,     2304, 0xce1e6f33
-0,    1532160,    1532160,     1152,     2304, 0xf0457e4f
-0,    1533312,    1533312,     1152,     2304, 0xde9387df
-0,    1534464,    1534464,     1152,     2304, 0x5df86d1d
-0,    1535616,    1535616,     1152,     2304, 0x949087ef
-0,    1536768,    1536768,     1152,     2304, 0xdbd269c6
-0,    1537920,    1537920,     1152,     2304, 0xd530806e
-0,    1539072,    1539072,     1152,     2304, 0x80027b1d
-0,    1540224,    1540224,     1152,     2304, 0xccb47c48
-0,    1541376,    1541376,     1152,     2304, 0x4f2f8326
-0,    1542528,    1542528,     1152,     2304, 0xfe046f23
-0,    1543680,    1543680,     1152,     2304, 0x5939759b
-0,    1544832,    1544832,     1152,     2304, 0x83147073
-0,    1545984,    1545984,     1152,     2304, 0xc9e77b95
-0,    1547136,    1547136,     1152,     2304, 0x702d8e51
-0,    1548288,    1548288,     1152,     2304, 0x5f5f8492
-0,    1549440,    1549440,     1152,     2304, 0x10116d0e
-0,    1550592,    1550592,     1152,     2304, 0xb8587894
-0,    1551744,    1551744,     1152,     2304, 0x2b357892
-0,    1552896,    1552896,     1152,     2304, 0x160e6c20
-0,    1554048,    1554048,     1152,     2304, 0x0534874b
-0,    1555200,    1555200,     1152,     2304, 0xde687064
-0,    1556352,    1556352,     1152,     2304, 0x91ef7fe4
-0,    1557504,    1557504,     1152,     2304, 0x077473ce
-0,    1558656,    1558656,     1152,     2304, 0x1be77777
-0,    1559808,    1559808,     1152,     2304, 0xaada9096
-0,    1560960,    1560960,     1152,     2304, 0xeac96f7e
-0,    1562112,    1562112,     1152,     2304, 0xe9ca6e72
-0,    1563264,    1563264,     1152,     2304, 0xd0868038
-0,    1564416,    1564416,     1152,     2304, 0x689176f1
-0,    1565568,    1565568,     1152,     2304, 0xdfdb7684
-0,    1566720,    1566720,     1152,     2304, 0x83c55d92
-0,    1567872,    1567872,     1152,     2304, 0x77587b70
-0,    1569024,    1569024,     1152,     2304, 0x60937941
-0,    1570176,    1570176,     1152,     2304, 0xb00267bd
-0,    1571328,    1571328,     1152,     2304, 0xd0a166eb
-0,    1572480,    1572480,     1152,     2304, 0x88718162
-0,    1573632,    1573632,     1152,     2304, 0xf9e084ff
-0,    1574784,    1574784,     1152,     2304, 0x2ddc8c08
-0,    1575936,    1575936,     1152,     2304, 0x843a7178
-0,    1577088,    1577088,     1152,     2304, 0x1d9784b4
-0,    1578240,    1578240,     1152,     2304, 0x98aa7b1d
-0,    1579392,    1579392,     1152,     2304, 0x583e846e
-0,    1580544,    1580544,     1152,     2304, 0xa6d284ec
-0,    1581696,    1581696,     1152,     2304, 0xa0147cbf
-0,    1582848,    1582848,     1152,     2304, 0xb3bf7b4b
-0,    1584000,    1584000,     1152,     2304, 0x7f8b78dd
-0,    1585152,    1585152,     1152,     2304, 0xd3fb761f
-0,    1586304,    1586304,     1152,     2304, 0x78147c2f
-0,    1587456,    1587456,     1152,     2304, 0x5e747b26
-0,    1588608,    1588608,     1152,     2304, 0x0d8e7d96
-0,    1589760,    1589760,     1152,     2304, 0x73728362
-0,    1590912,    1590912,     1152,     2304, 0x7c54707b
-0,    1592064,    1592064,     1152,     2304, 0xb89c7806
-0,    1593216,    1593216,     1152,     2304, 0x5f0f7a91
-0,    1594368,    1594368,     1152,     2304, 0x35217e84
-0,    1595520,    1595520,     1152,     2304, 0xb3798553
-0,    1596672,    1596672,     1152,     2304, 0xd8377e8a
-0,    1597824,    1597824,     1152,     2304, 0x339181ba
-0,    1598976,    1598976,     1152,     2304, 0xbe196b2c
-0,    1600128,    1600128,     1152,     2304, 0xdc708cc4
-0,    1601280,    1601280,     1152,     2304, 0x649c74c1
-0,    1602432,    1602432,     1152,     2304, 0xac128649
-0,    1603584,    1603584,     1152,     2304, 0xcd266a22
-0,    1604736,    1604736,     1152,     2304, 0xdfde7023
-0,    1605888,    1605888,     1152,     2304, 0xf92d7450
-0,    1607040,    1607040,     1152,     2304, 0x9ce379ac
-0,    1608192,    1608192,     1152,     2304, 0xee5e712e
-0,    1609344,    1609344,     1152,     2304, 0xa12676c0
-0,    1610496,    1610496,     1152,     2304, 0x4cee7be1
-0,    1611648,    1611648,     1152,     2304, 0x90c881e5
-0,    1612800,    1612800,     1152,     2304, 0xa6a78158
-0,    1613952,    1613952,     1152,     2304, 0xd248864a
-0,    1615104,    1615104,     1152,     2304, 0x7a8972b4
-0,    1616256,    1616256,     1152,     2304, 0x5c8a75cb
-0,    1617408,    1617408,     1152,     2304, 0x07606722
-0,    1618560,    1618560,     1152,     2304, 0xbf497aae
-0,    1619712,    1619712,     1152,     2304, 0xffab7d66
-0,    1620864,    1620864,     1152,     2304, 0xefdc7d05
-0,    1622016,    1622016,     1152,     2304, 0x02d686dd
-0,    1623168,    1623168,     1152,     2304, 0x852e6eb6
-0,    1624320,    1624320,     1152,     2304, 0x3445706b
-0,    1625472,    1625472,     1152,     2304, 0x04677828
-0,    1626624,    1626624,     1152,     2304, 0x5d5c833c
-0,    1627776,    1627776,     1152,     2304, 0xf4277e06
-0,    1628928,    1628928,     1152,     2304, 0xcece72ab
-0,    1630080,    1630080,     1152,     2304, 0x10837aff
-0,    1631232,    1631232,     1152,     2304, 0x0265762b
-0,    1632384,    1632384,     1152,     2304, 0x0fc979de
-0,    1633536,    1633536,     1152,     2304, 0xeecf7010
-0,    1634688,    1634688,     1152,     2304, 0x313975cd
-0,    1635840,    1635840,     1152,     2304, 0xccce84a7
-0,    1636992,    1636992,     1152,     2304, 0x823e6935
-0,    1638144,    1638144,     1152,     2304, 0xa0606ade
-0,    1639296,    1639296,     1152,     2304, 0x2c2d7866
-0,    1640448,    1640448,     1152,     2304, 0x3a5a781e
-0,    1641600,    1641600,     1152,     2304, 0xdaf677fd
-0,    1642752,    1642752,     1152,     2304, 0x79d88aca
-0,    1643904,    1643904,     1152,     2304, 0x3db27308
-0,    1645056,    1645056,     1152,     2304, 0x627175ff
-0,    1646208,    1646208,     1152,     2304, 0x40697b16
-0,    1647360,    1647360,     1152,     2304, 0x40e47ee7
-0,    1648512,    1648512,     1152,     2304, 0x6a6a7d91
-0,    1649664,    1649664,     1152,     2304, 0x8d3e8a32
-0,    1650816,    1650816,     1152,     2304, 0x576a817e
-0,    1651968,    1651968,     1152,     2304, 0x86287f9b
-0,    1653120,    1653120,     1152,     2304, 0x43e081ce
-0,    1654272,    1654272,     1152,     2304, 0xe44b8003
-0,    1655424,    1655424,     1152,     2304, 0xfb237f5b
-0,    1656576,    1656576,     1152,     2304, 0x28967c8a
-0,    1657728,    1657728,     1152,     2304, 0x97478829
-0,    1658880,    1658880,     1152,     2304, 0x615570e8
-0,    1660032,    1660032,     1152,     2304, 0xa52c7d34
-0,    1661184,    1661184,     1152,     2304, 0xb55473e8
-0,    1662336,    1662336,     1152,     2304, 0x08a07d6c
-0,    1663488,    1663488,     1152,     2304, 0x2eed916a
-0,    1664640,    1664640,     1152,     2304, 0x8ce36cec
-0,    1665792,    1665792,     1152,     2304, 0x775e8992
-0,    1666944,    1666944,     1152,     2304, 0x99ad816b
-0,    1668096,    1668096,     1152,     2304, 0x697a70be
-0,    1669248,    1669248,     1152,     2304, 0x38c46cf2
-0,    1670400,    1670400,     1152,     2304, 0xf9d681f4
-0,    1671552,    1671552,     1152,     2304, 0xa7336fea
-0,    1672704,    1672704,     1152,     2304, 0xd2a97e59
-0,    1673856,    1673856,     1152,     2304, 0x02b880e9
-0,    1675008,    1675008,     1152,     2304, 0x2a486e9e
-0,    1676160,    1676160,     1152,     2304, 0x5f197c43
-0,    1677312,    1677312,     1152,     2304, 0x266679ad
-0,    1678464,    1678464,     1152,     2304, 0x7cfd7c8a
-0,    1679616,    1679616,     1152,     2304, 0x43b681bb
-0,    1680768,    1680768,     1152,     2304, 0x63309cff
-0,    1681920,    1681920,     1152,     2304, 0x80f36f7d
-0,    1683072,    1683072,     1152,     2304, 0xcead7b98
-0,    1684224,    1684224,     1152,     2304, 0x8475992f
-0,    1685376,    1685376,     1152,     2304, 0x746782d2
-0,    1686528,    1686528,     1152,     2304, 0x2b486982
-0,    1687680,    1687680,     1152,     2304, 0xc80c7fc2
-0,    1688832,    1688832,     1152,     2304, 0x148c7c3b
-0,    1689984,    1689984,     1152,     2304, 0x49c477df
-0,    1691136,    1691136,     1152,     2304, 0x1bf97c56
-0,    1692288,    1692288,     1152,     2304, 0x89a38221
-0,    1693440,    1693440,     1152,     2304, 0xcb757a31
-0,    1694592,    1694592,     1152,     2304, 0xd3bb72fa
-0,    1695744,    1695744,     1152,     2304, 0xe5727650
-0,    1696896,    1696896,     1152,     2304, 0x5d4e9242
-0,    1698048,    1698048,     1152,     2304, 0x29207f75
-0,    1699200,    1699200,     1152,     2304, 0x5b1d954a
-0,    1700352,    1700352,     1152,     2304, 0xa2888223
-0,    1701504,    1701504,     1152,     2304, 0xe31d8af7
-0,    1702656,    1702656,     1152,     2304, 0x29ee82dd
-0,    1703808,    1703808,     1152,     2304, 0x4b1d7887
-0,    1704960,    1704960,     1152,     2304, 0xc0308236
-0,    1706112,    1706112,     1152,     2304, 0xaed68e92
-0,    1707264,    1707264,     1152,     2304, 0xbc1171e9
-0,    1708416,    1708416,     1152,     2304, 0x379f828b
-0,    1709568,    1709568,     1152,     2304, 0x83de72c8
-0,    1710720,    1710720,     1152,     2304, 0xb11a806a
-0,    1711872,    1711872,     1152,     2304, 0x7ac17e7a
-0,    1713024,    1713024,     1152,     2304, 0x031b7d9a
-0,    1714176,    1714176,     1152,     2304, 0xebc97c43
-0,    1715328,    1715328,     1152,     2304, 0x914e86a7
-0,    1716480,    1716480,     1152,     2304, 0x88e2747e
-0,    1717632,    1717632,     1152,     2304, 0x153076bc
-0,    1718784,    1718784,     1152,     2304, 0xdacc7ebb
-0,    1719936,    1719936,     1152,     2304, 0x0905709a
-0,    1721088,    1721088,     1152,     2304, 0x13358109
-0,    1722240,    1722240,     1152,     2304, 0x18227228
-0,    1723392,    1723392,     1152,     2304, 0x318082d7
-0,    1724544,    1724544,     1152,     2304, 0x8aec7a8e
-0,    1725696,    1725696,     1152,     2304, 0x8c9c7755
-0,    1726848,    1726848,     1152,     2304, 0xdba46e2f
-0,    1728000,    1728000,     1152,     2304, 0xd47d7f5a
-0,    1729152,    1729152,     1152,     2304, 0xeb1370ec
-0,    1730304,    1730304,     1152,     2304, 0xf6d68e74
-0,    1731456,    1731456,     1152,     2304, 0x3d5a7c66
-0,    1732608,    1732608,     1152,     2304, 0x264b6d68
-0,    1733760,    1733760,     1152,     2304, 0x7cde8b9f
-0,    1734912,    1734912,     1152,     2304, 0xba467347
-0,    1736064,    1736064,     1152,     2304, 0x921987f7
-0,    1737216,    1737216,     1152,     2304, 0x9c6c7fce
-0,    1738368,    1738368,     1152,     2304, 0x02cb828f
-0,    1739520,    1739520,     1152,     2304, 0x3b427077
-0,    1740672,    1740672,     1152,     2304, 0x6f9c7523
-0,    1741824,    1741824,     1152,     2304, 0xef688204
-0,    1742976,    1742976,     1152,     2304, 0x263782dc
-0,    1744128,    1744128,     1152,     2304, 0xb6ca7e46
-0,    1745280,    1745280,     1152,     2304, 0xb0929636
-0,    1746432,    1746432,     1152,     2304, 0x60a97b7a
-0,    1747584,    1747584,     1152,     2304, 0xcf037690
-0,    1748736,    1748736,     1152,     2304, 0xc9046a25
-0,    1749888,    1749888,     1152,     2304, 0xbf247f3c
-0,    1751040,    1751040,     1152,     2304, 0x71e77d34
-0,    1752192,    1752192,     1152,     2304, 0x96ce7a01
-0,    1753344,    1753344,     1152,     2304, 0xf9b07a27
-0,    1754496,    1754496,     1152,     2304, 0x984b830b
-0,    1755648,    1755648,     1152,     2304, 0xff11822b
-0,    1756800,    1756800,     1152,     2304, 0xb0a18578
-0,    1757952,    1757952,     1152,     2304, 0xa9d98ef8
-0,    1759104,    1759104,     1152,     2304, 0x4f9076e1
-0,    1760256,    1760256,     1152,     2304, 0xb63c7742
-0,    1761408,    1761408,     1152,     2304, 0xb4c06ae2
-0,    1762560,    1762560,     1152,     2304, 0x41738531
-0,    1763712,    1763712,     1152,     2304, 0x55125e7b
+0,      26496,      26496,     1152,     2304, 0xb8dc7525
+0,      52992,      52992,     1152,     2304, 0x3e7d6905
+0,      79488,      79488,     1152,     2304, 0xef47877b
+0,     105984,     105984,     1152,     2304, 0xfe916b7e
+0,     132480,     132480,     1152,     2304, 0xe3d08cde
+0,     158976,     158976,     1152,     2304, 0xff7f86cf
+0,     185472,     185472,     1152,     2304, 0x843e6f95
+0,     211968,     211968,     1152,     2304, 0x81577c26
+0,     238464,     238464,     1152,     2304, 0x04a085d5
+0,     264960,     264960,     1152,     2304, 0x1c5a76f5
+0,     291456,     291456,     1152,     2304, 0x4ee78623
+0,     317952,     317952,     1152,     2304, 0x8ec861dc
+0,     344448,     344448,     1152,     2304, 0x0ca179d8
+0,     370944,     370944,     1152,     2304, 0xc6da750f
+0,     397440,     397440,     1152,     2304, 0xf6bf79b5
+0,     423936,     423936,     1152,     2304, 0x97b88a43
+0,     450432,     450432,     1152,     2304, 0xf13c7b9c
+0,     476928,     476928,     1152,     2304, 0xdfba83af
+0,     503424,     503424,     1152,     2304, 0xc9467d4b
+0,     529920,     529920,     1152,     2304, 0xbbb58e2b
+0,     556416,     556416,     1152,     2304, 0x3a1078ea
+0,     582912,     582912,     1152,     2304, 0xe9587a5c
+0,     609408,     609408,     1152,     2304, 0xef5a8039
+0,     635904,     635904,     1152,     2304, 0x9d5f782f
+0,     662400,     662400,     1152,     2304, 0x1a548291
+0,     688896,     688896,     1152,     2304, 0x07517701
+0,     715392,     715392,     1152,     2304, 0x78127d6e
+0,     741888,     741888,     1152,     2304, 0x62e2788a
+0,     768384,     768384,     1152,     2304, 0x29397ad9
+0,     794880,     794880,     1152,     2304, 0x45da82d6
+0,     821376,     821376,     1152,     2304, 0x8ed66e51
+0,     847872,     847872,     1152,     2304, 0x660775cd
+0,     874368,     874368,     1152,     2304, 0x802c767a
+0,     900864,     900864,     1152,     2304, 0xcc055840
+0,     927360,     927360,     1152,     2304, 0x701b7eaf
+0,     953856,     953856,     1152,     2304, 0x8290749f
+0,     980352,     980352,     1152,     2304, 0x2c7b7d30
+0,    1006848,    1006848,     1152,     2304, 0xe4f17743
+0,    1033344,    1033344,     1152,     2304, 0x0e747d6e
+0,    1059840,    1059840,     1152,     2304, 0xbe7775a0
+0,    1086336,    1086336,     1152,     2304, 0xcf797673
+0,    1112832,    1112832,     1152,     2304, 0x29cb7800
+0,    1139328,    1139328,     1152,     2304, 0xfc947890
+0,    1165824,    1165824,     1152,     2304, 0x62757fc6
+0,    1192320,    1192320,     1152,     2304, 0x098876d0
+0,    1218816,    1218816,     1152,     2304, 0xa9567ee2
+0,    1245312,    1245312,     1152,     2304, 0xe3bb9173
+0,    1271808,    1271808,     1152,     2304, 0xcc2d6dee
+0,    1298304,    1298304,     1152,     2304, 0xe94591ab
+0,    1324800,    1324800,     1152,     2304, 0x5c7588de
+0,    1351296,    1351296,     1152,     2304, 0xfd83643c
+0,    1377792,    1377792,     1152,     2304, 0x528177f1
+0,    1404288,    1404288,     1152,     2304, 0x65d08474
+0,    1430784,    1430784,     1152,     2304, 0x738d765b
+0,    1457280,    1457280,     1152,     2304, 0xdd3d810e
+0,    1483776,    1483776,     1152,     2304, 0xef4f90d3
+0,    1510272,    1510272,     1152,     2304, 0x61e28d43
+0,    1536768,    1536768,     1152,     2304, 0x9a11796b
+0,    1563264,    1563264,     1152,     2304, 0x96c97dcd
+0,    1589760,    1589760,     1152,     2304, 0xa8fe8621
+0,    1616256,    1616256,     1152,     2304, 0x499b7d38
+0,    1642752,    1642752,     1152,     2304, 0xfcb078a9
+0,    1669248,    1669248,     1152,     2304, 0x40d78651
+0,    1695744,    1695744,     1152,     2304, 0xa4af7234
+0,    1722240,    1722240,     1152,     2304, 0x6831870a
+0,    1748736,    1748736,     1152,     2304, 0x030e7b9d
+0,    1775232,    1775232,     1152,     2304, 0x445a75b6
+0,    1801728,    1801728,     1152,     2304, 0x09857389
+0,    1828224,    1828224,     1152,     2304, 0x0d018866
+0,    1854720,    1854720,     1152,     2304, 0x2afe810a
+0,    1881216,    1881216,     1152,     2304, 0x0bcf7c43
+0,    1907712,    1907712,     1152,     2304, 0x13737c12
+0,    1934208,    1934208,     1152,     2304, 0x716c7bba
+0,    1960704,    1960704,     1152,     2304, 0xb801823b
+0,    1987200,    1987200,     1152,     2304, 0x0fd573ee
+0,    2013696,    2013696,     1152,     2304, 0xe1ab879c
+0,    2040192,    2040192,     1152,     2304, 0x49e6764f
+0,    2066688,    2066688,     1152,     2304, 0xd5f26ddc
+0,    2093184,    2093184,     1152,     2304, 0x076775ff
+0,    2119680,    2119680,     1152,     2304, 0xfbb86fce
+0,    2146176,    2146176,     1152,     2304, 0x20c56858
+0,    2172672,    2172672,     1152,     2304, 0x043e6891
+0,    2199168,    2199168,     1152,     2304, 0x59648729
+0,    2225664,    2225664,     1152,     2304, 0xd4907a63
+0,    2252160,    2252160,     1152,     2304, 0xd0208a4c
+0,    2278656,    2278656,     1152,     2304, 0xce968383
+0,    2305152,    2305152,     1152,     2304, 0x3cfc7cd1
+0,    2331648,    2331648,     1152,     2304, 0x628a7bf5
+0,    2358144,    2358144,     1152,     2304, 0x9cfe8a4f
+0,    2384640,    2384640,     1152,     2304, 0xdf6f7c6d
+0,    2411136,    2411136,     1152,     2304, 0x6cf6882a
+0,    2437632,    2437632,     1152,     2304, 0x099773a3
+0,    2464128,    2464128,     1152,     2304, 0x4a1c7649
+0,    2490624,    2490624,     1152,     2304, 0x31ea71cb
+0,    2517120,    2517120,     1152,     2304, 0xed127ed9
+0,    2543616,    2543616,     1152,     2304, 0x5b156954
+0,    2570112,    2570112,     1152,     2304, 0xdd638532
+0,    2596608,    2596608,     1152,     2304, 0xf1a271f2
+0,    2623104,    2623104,     1152,     2304, 0x779184d7
+0,    2649600,    2649600,     1152,     2304, 0x49a88aa8
+0,    2676096,    2676096,     1152,     2304, 0xa11b7c90
+0,    2702592,    2702592,     1152,     2304, 0xbf488274
+0,    2729088,    2729088,     1152,     2304, 0x002f79a8
+0,    2755584,    2755584,     1152,     2304, 0x0ed97e2f
+0,    2782080,    2782080,     1152,     2304, 0x7845878f
+0,    2808576,    2808576,     1152,     2304, 0x46d777dc
+0,    2835072,    2835072,     1152,     2304, 0x8d0179e3
+0,    2861568,    2861568,     1152,     2304, 0x38917f9f
+0,    2888064,    2888064,     1152,     2304, 0x449876e7
+0,    2914560,    2914560,     1152,     2304, 0x001a8769
+0,    2941056,    2941056,     1152,     2304, 0x06c1826b
+0,    2967552,    2967552,     1152,     2304, 0x41b68047
+0,    2994048,    2994048,     1152,     2304, 0xeb9782c6
+0,    3020544,    3020544,     1152,     2304, 0x7cd9719c
+0,    3047040,    3047040,     1152,     2304, 0x3a4a767c
+0,    3073536,    3073536,     1152,     2304, 0x7f887e81
+0,    3100032,    3100032,     1152,     2304, 0xf75d714b
+0,    3126528,    3126528,     1152,     2304, 0x33b57e9f
+0,    3153024,    3153024,     1152,     2304, 0xc732749e
+0,    3179520,    3179520,     1152,     2304, 0x386f7e1a
+0,    3206016,    3206016,     1152,     2304, 0x6b9c767d
+0,    3232512,    3232512,     1152,     2304, 0x701c83e5
+0,    3259008,    3259008,     1152,     2304, 0xb92571e1
+0,    3285504,    3285504,     1152,     2304, 0x833a84bc
+0,    3312000,    3312000,     1152,     2304, 0x1b6984e0
+0,    3338496,    3338496,     1152,     2304, 0x1b2474ba
+0,    3364992,    3364992,     1152,     2304, 0xc22775a6
+0,    3391488,    3391488,     1152,     2304, 0x3e8f7972
+0,    3417984,    3417984,     1152,     2304, 0x17a28a65
+0,    3444480,    3444480,     1152,     2304, 0x9b6178a4
+0,    3470976,    3470976,     1152,     2304, 0x5d707873
+0,    3497472,    3497472,     1152,     2304, 0x68e2645a
+0,    3523968,    3523968,     1152,     2304, 0x1e377d28
+0,    3550464,    3550464,     1152,     2304, 0x54b384be
+0,    3576960,    3576960,     1152,     2304, 0x0617808c
+0,    3603456,    3603456,     1152,     2304, 0xbc2b8a6c
+0,    3629952,    3629952,     1152,     2304, 0x7ced7180
+0,    3656448,    3656448,     1152,     2304, 0xf22180ab
+0,    3682944,    3682944,     1152,     2304, 0xf13682c9
+0,    3709440,    3709440,     1152,     2304, 0x7eff87fd
+0,    3735936,    3735936,     1152,     2304, 0x5a0b5cec
+0,    3762432,    3762432,     1152,     2304, 0x57c18906
+0,    3788928,    3788928,     1152,     2304, 0xb55a6a16
+0,    3815424,    3815424,     1152,     2304, 0xf2608371
+0,    3841920,    3841920,     1152,     2304, 0x36df7576
+0,    3868416,    3868416,     1152,     2304, 0xdb106fb4
+0,    3894912,    3894912,     1152,     2304, 0x7e4f85d0
+0,    3921408,    3921408,     1152,     2304, 0xe3ee78ab
+0,    3947904,    3947904,     1152,     2304, 0xd36b7dc7
+0,    3974400,    3974400,     1152,     2304, 0xadab7c5c
+0,    4000896,    4000896,     1152,     2304, 0x70786f26
+0,    4027392,    4027392,     1152,     2304, 0xcd5d717e
+0,    4053888,    4053888,     1152,     2304, 0xc1a96f9a
+0,    4080384,    4080384,     1152,     2304, 0xad777887
+0,    4106880,    4106880,     1152,     2304, 0x98277c16
+0,    4133376,    4133376,     1152,     2304, 0x868882c5
+0,    4159872,    4159872,     1152,     2304, 0xc48092b9
+0,    4186368,    4186368,     1152,     2304, 0x230069da
+0,    4212864,    4212864,     1152,     2304, 0x14147ad6
+0,    4239360,    4239360,     1152,     2304, 0xc9007172
+0,    4265856,    4265856,     1152,     2304, 0x85d67bcc
+0,    4292352,    4292352,     1152,     2304, 0x22418bab
+0,    4318848,    4318848,     1152,     2304, 0xe53c8b71
+0,    4345344,    4345344,     1152,     2304, 0x5a1a9053
+0,    4371840,    4371840,     1152,     2304, 0x9cd179af
+0,    4398336,    4398336,     1152,     2304, 0xbb3c7d72
+0,    4424832,    4424832,     1152,     2304, 0x477a8677
+0,    4451328,    4451328,     1152,     2304, 0xe3337834
+0,    4477824,    4477824,     1152,     2304, 0x1cb56d77
+0,    4504320,    4504320,     1152,     2304, 0xe89d6dac
+0,    4530816,    4530816,     1152,     2304, 0xd468827e
+0,    4557312,    4557312,     1152,     2304, 0xebc46b87
+0,    4583808,    4583808,     1152,     2304, 0x5fbb78d2
+0,    4610304,    4610304,     1152,     2304, 0xa1b483d6
+0,    4636800,    4636800,     1152,     2304, 0x6fec7cab
+0,    4663296,    4663296,     1152,     2304, 0xd86d6f6c
+0,    4689792,    4689792,     1152,     2304, 0x8c2c7d51
+0,    4716288,    4716288,     1152,     2304, 0xe8377cd7
+0,    4742784,    4742784,     1152,     2304, 0xb57071b4
+0,    4769280,    4769280,     1152,     2304, 0xc35c71fd
+0,    4795776,    4795776,     1152,     2304, 0x789079e9
+0,    4822272,    4822272,     1152,     2304, 0x413b710e
+0,    4848768,    4848768,     1152,     2304, 0x82678332
+0,    4875264,    4875264,     1152,     2304, 0xe1576e75
+0,    4901760,    4901760,     1152,     2304, 0x7c0b7ad6
+0,    4928256,    4928256,     1152,     2304, 0xc6b6786d
+0,    4954752,    4954752,     1152,     2304, 0x736f7b89
+0,    4981248,    4981248,     1152,     2304, 0x0ded72f1
+0,    5007744,    5007744,     1152,     2304, 0xcb877a3c
+0,    5034240,    5034240,     1152,     2304, 0x7c497d40
+0,    5060736,    5060736,     1152,     2304, 0xaefc798c
+0,    5087232,    5087232,     1152,     2304, 0x4cce748c
+0,    5113728,    5113728,     1152,     2304, 0xaa187fbe
+0,    5140224,    5140224,     1152,     2304, 0x1aa77db9
+0,    5166720,    5166720,     1152,     2304, 0x9e0074b8
+0,    5193216,    5193216,     1152,     2304, 0x74ee822b
+0,    5219712,    5219712,     1152,     2304, 0x975c6ff6
+0,    5246208,    5246208,     1152,     2304, 0xe1847bb4
+0,    5272704,    5272704,     1152,     2304, 0xe0828777
+0,    5299200,    5299200,     1152,     2304, 0xf4027205
+0,    5325696,    5325696,     1152,     2304, 0x535e7a20
+0,    5352192,    5352192,     1152,     2304, 0x5bd88404
+0,    5378688,    5378688,     1152,     2304, 0xf29478b1
+0,    5405184,    5405184,     1152,     2304, 0x9b7c7d88
+0,    5431680,    5431680,     1152,     2304, 0xaeb07335
+0,    5458176,    5458176,     1152,     2304, 0xbef06e08
+0,    5484672,    5484672,     1152,     2304, 0x795f7b8c
+0,    5511168,    5511168,     1152,     2304, 0x435a674d
+0,    5537664,    5537664,     1152,     2304, 0xd8ee7a09
+0,    5564160,    5564160,     1152,     2304, 0x9059812e
+0,    5590656,    5590656,     1152,     2304, 0x7481744a
+0,    5617152,    5617152,     1152,     2304, 0xdff27475
+0,    5643648,    5643648,     1152,     2304, 0xb17783ab
+0,    5670144,    5670144,     1152,     2304, 0x42e9706b
+0,    5696640,    5696640,     1152,     2304, 0x9f0d86b4
+0,    5723136,    5723136,     1152,     2304, 0x2963955f
+0,    5749632,    5749632,     1152,     2304, 0x059a6957
+0,    5776128,    5776128,     1152,     2304, 0x85948206
+0,    5802624,    5802624,     1152,     2304, 0x185e8400
+0,    5829120,    5829120,     1152,     2304, 0xe98e70df
+0,    5855616,    5855616,     1152,     2304, 0x69057b27
+0,    5882112,    5882112,     1152,     2304, 0x49e26f21
+0,    5908608,    5908608,     1152,     2304, 0xb0867da5
+0,    5935104,    5935104,     1152,     2304, 0x785980ff
+0,    5961600,    5961600,     1152,     2304, 0xf4b774be
+0,    5988096,    5988096,     1152,     2304, 0x63897e8c
+0,    6014592,    6014592,     1152,     2304, 0x248b89af
+0,    6041088,    6041088,     1152,     2304, 0xd3627c4a
+0,    6067584,    6067584,     1152,     2304, 0x5a4d9349
+0,    6094080,    6094080,     1152,     2304, 0xe2ce7c4c
+0,    6120576,    6120576,     1152,     2304, 0x321f6c0b
+0,    6147072,    6147072,     1152,     2304, 0x51ac74e0
+0,    6173568,    6173568,     1152,     2304, 0x8efa91ba
+0,    6200064,    6200064,     1152,     2304, 0x8b4b784c
+0,    6226560,    6226560,     1152,     2304, 0xe9e4879e
+0,    6253056,    6253056,     1152,     2304, 0x8dc28081
+0,    6279552,    6279552,     1152,     2304, 0x44b477b0
+0,    6306048,    6306048,     1152,     2304, 0xf7b67084
+0,    6332544,    6332544,     1152,     2304, 0x4b198c17
+0,    6359040,    6359040,     1152,     2304, 0x9c947194
+0,    6385536,    6385536,     1152,     2304, 0x6eaa7f15
+0,    6412032,    6412032,     1152,     2304, 0x119f7c1d
+0,    6438528,    6438528,     1152,     2304, 0x157b7f43
+0,    6465024,    6465024,     1152,     2304, 0xcd2e7acc
+0,    6491520,    6491520,     1152,     2304, 0x97597247
+0,    6518016,    6518016,     1152,     2304, 0x7ba06acb
+0,    6544512,    6544512,     1152,     2304, 0x233c7995
+0,    6571008,    6571008,     1152,     2304, 0x08e28587
+0,    6597504,    6597504,     1152,     2304, 0x92be84b5
+0,    6624000,    6624000,     1152,     2304, 0xbb857d43
+0,    6650496,    6650496,     1152,     2304, 0x168e7c74
+0,    6676992,    6676992,     1152,     2304, 0xac5465d9
+0,    6703488,    6703488,     1152,     2304, 0x18f58831
+0,    6729984,    6729984,     1152,     2304, 0x19b48196
+0,    6756480,    6756480,     1152,     2304, 0x20297653
+0,    6782976,    6782976,     1152,     2304, 0x93397a82
+0,    6809472,    6809472,     1152,     2304, 0x65ea7deb
+0,    6835968,    6835968,     1152,     2304, 0xd7316e20
+0,    6862464,    6862464,     1152,     2304, 0x94107f2b
+0,    6888960,    6888960,     1152,     2304, 0xec3b7dc6
+0,    6915456,    6915456,     1152,     2304, 0x2d3783aa
+0,    6941952,    6941952,     1152,     2304, 0x07e47340
+0,    6968448,    6968448,     1152,     2304, 0xbc117893
+0,    6994944,    6994944,     1152,     2304, 0x8bd97851
+0,    7021440,    7021440,     1152,     2304, 0xc27376a9
+0,    7047936,    7047936,     1152,     2304, 0x30d88c83
+0,    7074432,    7074432,     1152,     2304, 0x19c2704c
+0,    7100928,    7100928,     1152,     2304, 0x093b7b6e
+0,    7127424,    7127424,     1152,     2304, 0x221a7349
+0,    7153920,    7153920,     1152,     2304, 0xa4fd82cd
+0,    7180416,    7180416,     1152,     2304, 0x762e6bc9
+0,    7206912,    7206912,     1152,     2304, 0x270075d4
+0,    7233408,    7233408,     1152,     2304, 0xa5f27b90
+0,    7259904,    7259904,     1152,     2304, 0xf72e7edc
+0,    7286400,    7286400,     1152,     2304, 0x42178486
+0,    7312896,    7312896,     1152,     2304, 0x5f7978e8
+0,    7339392,    7339392,     1152,     2304, 0x5d7c6703
+0,    7365888,    7365888,     1152,     2304, 0x2c4483d5
+0,    7392384,    7392384,     1152,     2304, 0x31bd951d
+0,    7418880,    7418880,     1152,     2304, 0x99487af0
+0,    7445376,    7445376,     1152,     2304, 0x0bd27ee7
+0,    7471872,    7471872,     1152,     2304, 0xc3e07ac4
+0,    7498368,    7498368,     1152,     2304, 0x98a16ba7
+0,    7524864,    7524864,     1152,     2304, 0xd7a5747b
+0,    7551360,    7551360,     1152,     2304, 0x96fb811c
+0,    7577856,    7577856,     1152,     2304, 0x7cee8109
+0,    7604352,    7604352,     1152,     2304, 0x52b18ba2
+0,    7630848,    7630848,     1152,     2304, 0x33be8861
+0,    7657344,    7657344,     1152,     2304, 0xf41282a0
+0,    7683840,    7683840,     1152,     2304, 0xb4268993
+0,    7710336,    7710336,     1152,     2304, 0x52126a1c
+0,    7736832,    7736832,     1152,     2304, 0x050b6f7a
+0,    7763328,    7763328,     1152,     2304, 0x67a26fc3
+0,    7789824,    7789824,     1152,     2304, 0x966c7cf2
+0,    7816320,    7816320,     1152,     2304, 0x22097750
+0,    7842816,    7842816,     1152,     2304, 0xfbb0796c
+0,    7869312,    7869312,     1152,     2304, 0xbd508964
+0,    7895808,    7895808,     1152,     2304, 0xc24478d8
+0,    7922304,    7922304,     1152,     2304, 0x3913769d
+0,    7948800,    7948800,     1152,     2304, 0x8aab872f
+0,    7975296,    7975296,     1152,     2304, 0x7cb4822f
+0,    8001792,    8001792,     1152,     2304, 0xea318144
+0,    8028288,    8028288,     1152,     2304, 0xaf0f86d2
+0,    8054784,    8054784,     1152,     2304, 0x24f27598
+0,    8081280,    8081280,     1152,     2304, 0xd76f6d40
+0,    8107776,    8107776,     1152,     2304, 0x085071a7
+0,    8134272,    8134272,     1152,     2304, 0x1d11704c
+0,    8160768,    8160768,     1152,     2304, 0x21517cbd
+0,    8187264,    8187264,     1152,     2304, 0xcdca8d32
+0,    8213760,    8213760,     1152,     2304, 0x71c18433
+0,    8240256,    8240256,     1152,     2304, 0xd39d7d81
+0,    8266752,    8266752,     1152,     2304, 0x7a0d7a43
+0,    8293248,    8293248,     1152,     2304, 0x007c8884
+0,    8319744,    8319744,     1152,     2304, 0x403282d0
+0,    8346240,    8346240,     1152,     2304, 0xe3737214
+0,    8372736,    8372736,     1152,     2304, 0xaf906f47
+0,    8399232,    8399232,     1152,     2304, 0x54f57b3b
+0,    8425728,    8425728,     1152,     2304, 0x29be7791
+0,    8452224,    8452224,     1152,     2304, 0xe3c663d5
+0,    8478720,    8478720,     1152,     2304, 0xd7258238
+0,    8505216,    8505216,     1152,     2304, 0x3719820d
+0,    8531712,    8531712,     1152,     2304, 0xbe04814f
+0,    8558208,    8558208,     1152,     2304, 0x556c815e
+0,    8584704,    8584704,     1152,     2304, 0xb2447e10
+0,    8611200,    8611200,     1152,     2304, 0x7c16867c
+0,    8637696,    8637696,     1152,     2304, 0x6a7b78ed
+0,    8664192,    8664192,     1152,     2304, 0x5d307b81
+0,    8690688,    8690688,     1152,     2304, 0xaab680d3
+0,    8717184,    8717184,     1152,     2304, 0xb5d37a23
+0,    8743680,    8743680,     1152,     2304, 0x7f7d6f76
+0,    8770176,    8770176,     1152,     2304, 0x317a8296
+0,    8796672,    8796672,     1152,     2304, 0x8a987b3d
+0,    8823168,    8823168,     1152,     2304, 0x4f317a27
+0,    8849664,    8849664,     1152,     2304, 0xfc65852f
+0,    8876160,    8876160,     1152,     2304, 0x40527719
+0,    8902656,    8902656,     1152,     2304, 0x84988e13
+0,    8929152,    8929152,     1152,     2304, 0x318b6ddc
+0,    8955648,    8955648,     1152,     2304, 0x94cf7939
+0,    8982144,    8982144,     1152,     2304, 0x6f22819d
+0,    9008640,    9008640,     1152,     2304, 0xa7dd80a9
+0,    9035136,    9035136,     1152,     2304, 0x1c7968fa
+0,    9061632,    9061632,     1152,     2304, 0xd9937bae
+0,    9088128,    9088128,     1152,     2304, 0xf7137cf9
+0,    9114624,    9114624,     1152,     2304, 0xeadb84b5
+0,    9141120,    9141120,     1152,     2304, 0x9a2390ac
+0,    9167616,    9167616,     1152,     2304, 0xdb6a73f6
+0,    9194112,    9194112,     1152,     2304, 0x69e07507
+0,    9220608,    9220608,     1152,     2304, 0xbc8478b2
+0,    9247104,    9247104,     1152,     2304, 0x32cf8638
+0,    9273600,    9273600,     1152,     2304, 0x2b8d755a
+0,    9300096,    9300096,     1152,     2304, 0x52e05bd2
+0,    9326592,    9326592,     1152,     2304, 0x2aed8c49
+0,    9353088,    9353088,     1152,     2304, 0x587a896e
+0,    9379584,    9379584,     1152,     2304, 0x6dd87dee
+0,    9406080,    9406080,     1152,     2304, 0xd2858338
+0,    9432576,    9432576,     1152,     2304, 0xd90f7842
+0,    9459072,    9459072,     1152,     2304, 0xd6fb6d4a
+0,    9485568,    9485568,     1152,     2304, 0x85498aea
+0,    9512064,    9512064,     1152,     2304, 0x18597790
+0,    9538560,    9538560,     1152,     2304, 0x3cd78fea
+0,    9565056,    9565056,     1152,     2304, 0x94377fbc
+0,    9591552,    9591552,     1152,     2304, 0xf9db73f5
+0,    9618048,    9618048,     1152,     2304, 0x14fb6fca
+0,    9644544,    9644544,     1152,     2304, 0xe9d17d69
+0,    9671040,    9671040,     1152,     2304, 0xdeb57286
+0,    9697536,    9697536,     1152,     2304, 0xa5d37e17
+0,    9724032,    9724032,     1152,     2304, 0xcf6882fb
+0,    9750528,    9750528,     1152,     2304, 0x31758066
+0,    9777024,    9777024,     1152,     2304, 0x6b4d8175
+0,    9803520,    9803520,     1152,     2304, 0x2a3d7f8e
+0,    9830016,    9830016,     1152,     2304, 0xc066743b
+0,    9856512,    9856512,     1152,     2304, 0xcab88146
+0,    9883008,    9883008,     1152,     2304, 0x2b4c6e13
+0,    9909504,    9909504,     1152,     2304, 0x00b36b6f
+0,    9936000,    9936000,     1152,     2304, 0x664a88d3
+0,    9962496,    9962496,     1152,     2304, 0x18a66f76
+0,    9988992,    9988992,     1152,     2304, 0x4f828a8b
+0,   10015488,   10015488,     1152,     2304, 0x9cc7728e
+0,   10041984,   10041984,     1152,     2304, 0xbe357936
+0,   10068480,   10068480,     1152,     2304, 0x19878f8d
+0,   10094976,   10094976,     1152,     2304, 0x227b7c71
+0,   10121472,   10121472,     1152,     2304, 0xf7c879ec
+0,   10147968,   10147968,     1152,     2304, 0x0bca7b50
+0,   10174464,   10174464,     1152,     2304, 0xe4398304
+0,   10200960,   10200960,     1152,     2304, 0xf5da75c7
+0,   10227456,   10227456,     1152,     2304, 0x9f9070a3
+0,   10253952,   10253952,     1152,     2304, 0x789076f6
+0,   10280448,   10280448,     1152,     2304, 0x362977cc
+0,   10306944,   10306944,     1152,     2304, 0x4d0a8928
+0,   10333440,   10333440,     1152,     2304, 0x1bb767ec
+0,   10359936,   10359936,     1152,     2304, 0xbe727fa5
+0,   10386432,   10386432,     1152,     2304, 0x27f38347
+0,   10412928,   10412928,     1152,     2304, 0x0a3c8783
+0,   10439424,   10439424,     1152,     2304, 0x8249639c
+0,   10465920,   10465920,     1152,     2304, 0x3b076f69
+0,   10492416,   10492416,     1152,     2304, 0xd9597ee3
+0,   10518912,   10518912,     1152,     2304, 0x026e7fad
+0,   10545408,   10545408,     1152,     2304, 0xbd7a6de4
+0,   10571904,   10571904,     1152,     2304, 0x7d718a4d
+0,   10598400,   10598400,     1152,     2304, 0x1f5e83b4
+0,   10624896,   10624896,     1152,     2304, 0x597d7755
+0,   10651392,   10651392,     1152,     2304, 0x3fb080bd
+0,   10677888,   10677888,     1152,     2304, 0xdcbd7b16
+0,   10704384,   10704384,     1152,     2304, 0x5c48865f
+0,   10730880,   10730880,     1152,     2304, 0xcda37ae8
+0,   10757376,   10757376,     1152,     2304, 0x4810796d
+0,   10783872,   10783872,     1152,     2304, 0x34317fd0
+0,   10810368,   10810368,     1152,     2304, 0x5c0e7456
+0,   10836864,   10836864,     1152,     2304, 0x44d78040
+0,   10863360,   10863360,     1152,     2304, 0x88587882
+0,   10889856,   10889856,     1152,     2304, 0x77687d5e
+0,   10916352,   10916352,     1152,     2304, 0x116d68bb
+0,   10942848,   10942848,     1152,     2304, 0x3e5b6f60
+0,   10969344,   10969344,     1152,     2304, 0x64ea783c
+0,   10995840,   10995840,     1152,     2304, 0x23547f7d
+0,   11022336,   11022336,     1152,     2304, 0x2eee7892
+0,   11048832,   11048832,     1152,     2304, 0xfb837cba
+0,   11075328,   11075328,     1152,     2304, 0x86518209
+0,   11101824,   11101824,     1152,     2304, 0x672f7bba
+0,   11128320,   11128320,     1152,     2304, 0x6ab583fb
+0,   11154816,   11154816,     1152,     2304, 0xc1297428
+0,   11181312,   11181312,     1152,     2304, 0x164e7717
+0,   11207808,   11207808,     1152,     2304, 0xb754976d
+0,   11234304,   11234304,     1152,     2304, 0xb99d81b2
+0,   11260800,   11260800,     1152,     2304, 0x79046fd6
+0,   11287296,   11287296,     1152,     2304, 0x9a3f8426
+0,   11313792,   11313792,     1152,     2304, 0x896371f5
+0,   11340288,   11340288,     1152,     2304, 0x63d1799f
+0,   11366784,   11366784,     1152,     2304, 0x4842844e
+0,   11393280,   11393280,     1152,     2304, 0x850e8372
+0,   11419776,   11419776,     1152,     2304, 0x85d07022
+0,   11446272,   11446272,     1152,     2304, 0x9e6683d1
+0,   11472768,   11472768,     1152,     2304, 0x301b8981
+0,   11499264,   11499264,     1152,     2304, 0x1f1e98c5
+0,   11525760,   11525760,     1152,     2304, 0xc8797b03
+0,   11552256,   11552256,     1152,     2304, 0xf9d189f5
+0,   11578752,   11578752,     1152,     2304, 0x0e0d88be
+0,   11605248,   11605248,     1152,     2304, 0x6c1d7843
+0,   11631744,   11631744,     1152,     2304, 0xd13b8a38
+0,   11658240,   11658240,     1152,     2304, 0x9b8f773c
+0,   11684736,   11684736,     1152,     2304, 0x9acd7309
+0,   11711232,   11711232,     1152,     2304, 0x5e7d7d15
+0,   11737728,   11737728,     1152,     2304, 0xf09d7640
+0,   11764224,   11764224,     1152,     2304, 0xaebb718f
+0,   11790720,   11790720,     1152,     2304, 0x0c8570f4
+0,   11817216,   11817216,     1152,     2304, 0x3c93862c
+0,   11843712,   11843712,     1152,     2304, 0xcee46696
+0,   11870208,   11870208,     1152,     2304, 0x01ba7e20
+0,   11896704,   11896704,     1152,     2304, 0x7fcb7a09
+0,   11923200,   11923200,     1152,     2304, 0xddf18c14
+0,   11949696,   11949696,     1152,     2304, 0xd2e97eeb
+0,   11976192,   11976192,     1152,     2304, 0x514d8719
+0,   12002688,   12002688,     1152,     2304, 0xe89279c9
+0,   12029184,   12029184,     1152,     2304, 0x806d6a95
+0,   12055680,   12055680,     1152,     2304, 0xbc7a8a60
+0,   12082176,   12082176,     1152,     2304, 0x302f8fa8
+0,   12108672,   12108672,     1152,     2304, 0xb136784c
+0,   12135168,   12135168,     1152,     2304, 0x9b0f6aab
+0,   12161664,   12161664,     1152,     2304, 0xd8e27582
+0,   12188160,   12188160,     1152,     2304, 0xdaaf78b1
+0,   12214656,   12214656,     1152,     2304, 0x65967f5f
+0,   12241152,   12241152,     1152,     2304, 0x6f917aa4
+0,   12267648,   12267648,     1152,     2304, 0x7f607444
+0,   12294144,   12294144,     1152,     2304, 0xfd316f2c
+0,   12320640,   12320640,     1152,     2304, 0x776e83c7
+0,   12347136,   12347136,     1152,     2304, 0xb9c17e16
+0,   12373632,   12373632,     1152,     2304, 0xdf287de8
+0,   12400128,   12400128,     1152,     2304, 0xf33d96a3
+0,   12426624,   12426624,     1152,     2304, 0xad216e5b
+0,   12453120,   12453120,     1152,     2304, 0x4a328342
+0,   12479616,   12479616,     1152,     2304, 0xcf3f8079
+0,   12506112,   12506112,     1152,     2304, 0xb46f77b0
+0,   12532608,   12532608,     1152,     2304, 0x3199713d
+0,   12559104,   12559104,     1152,     2304, 0x5e667a0d
+0,   12585600,   12585600,     1152,     2304, 0xa3047ae3
+0,   12612096,   12612096,     1152,     2304, 0x9edf8594
+0,   12638592,   12638592,     1152,     2304, 0xd16382d5
+0,   12665088,   12665088,     1152,     2304, 0x6e838328
+0,   12691584,   12691584,     1152,     2304, 0xa1f697c1
+0,   12718080,   12718080,     1152,     2304, 0xefcc749c
+0,   12744576,   12744576,     1152,     2304, 0x1f94839e
+0,   12771072,   12771072,     1152,     2304, 0x429e7c5b
+0,   12797568,   12797568,     1152,     2304, 0x9b59711c
+0,   12824064,   12824064,     1152,     2304, 0xdac27323
+0,   12850560,   12850560,     1152,     2304, 0xa4856d2b
+0,   12877056,   12877056,     1152,     2304, 0x07a37498
+0,   12903552,   12903552,     1152,     2304, 0xe1ce7512
+0,   12930048,   12930048,     1152,     2304, 0x15e182c3
+0,   12956544,   12956544,     1152,     2304, 0x0fa46b8c
+0,   12983040,   12983040,     1152,     2304, 0xbdf07bfd
+0,   13009536,   13009536,     1152,     2304, 0xe0238b2a
+0,   13036032,   13036032,     1152,     2304, 0xab537267
+0,   13062528,   13062528,     1152,     2304, 0xd46b75f3
+0,   13089024,   13089024,     1152,     2304, 0xec73794b
+0,   13115520,   13115520,     1152,     2304, 0x680580c1
+0,   13142016,   13142016,     1152,     2304, 0x1ace8f6c
+0,   13168512,   13168512,     1152,     2304, 0x19d583ac
+0,   13195008,   13195008,     1152,     2304, 0x4b6b8105
+0,   13221504,   13221504,     1152,     2304, 0x392a78b2
+0,   13248000,   13248000,     1152,     2304, 0xd3916dad
+0,   13274496,   13274496,     1152,     2304, 0x569c7a75
+0,   13300992,   13300992,     1152,     2304, 0xf5ac814b
+0,   13327488,   13327488,     1152,     2304, 0x18d77e98
+0,   13353984,   13353984,     1152,     2304, 0x007074ce
+0,   13380480,   13380480,     1152,     2304, 0x0fe38373
+0,   13406976,   13406976,     1152,     2304, 0x5a967920
+0,   13433472,   13433472,     1152,     2304, 0x22167501
+0,   13459968,   13459968,     1152,     2304, 0xf0828cab
+0,   13486464,   13486464,     1152,     2304, 0xaeec71b7
+0,   13512960,   13512960,     1152,     2304, 0xc47b62ff
+0,   13539456,   13539456,     1152,     2304, 0xab688478
+0,   13565952,   13565952,     1152,     2304, 0xf35e7bd2
+0,   13592448,   13592448,     1152,     2304, 0x9cff763e
+0,   13618944,   13618944,     1152,     2304, 0x59568dc8
+0,   13645440,   13645440,     1152,     2304, 0x51a278ac
+0,   13671936,   13671936,     1152,     2304, 0xc08a6e68
+0,   13698432,   13698432,     1152,     2304, 0xd3067ef4
+0,   13724928,   13724928,     1152,     2304, 0x54767c49
+0,   13751424,   13751424,     1152,     2304, 0xf8ff8386
+0,   13777920,   13777920,     1152,     2304, 0xef267f63
+0,   13804416,   13804416,     1152,     2304, 0xe2537cd9
+0,   13830912,   13830912,     1152,     2304, 0x77a57680
+0,   13857408,   13857408,     1152,     2304, 0x325c74ad
+0,   13883904,   13883904,     1152,     2304, 0xd7fe87c4
+0,   13910400,   13910400,     1152,     2304, 0x2e756310
+0,   13936896,   13936896,     1152,     2304, 0x6a81796b
+0,   13963392,   13963392,     1152,     2304, 0x2f057daf
+0,   13989888,   13989888,     1152,     2304, 0xcd9f7c9d
+0,   14016384,   14016384,     1152,     2304, 0xc91560a0
+0,   14042880,   14042880,     1152,     2304, 0x962a91eb
+0,   14069376,   14069376,     1152,     2304, 0xa0ff7416
+0,   14095872,   14095872,     1152,     2304, 0xcb5c7dff
+0,   14122368,   14122368,     1152,     2304, 0xd3527041
+0,   14148864,   14148864,     1152,     2304, 0xc89d77c2
+0,   14175360,   14175360,     1152,     2304, 0xe1ce7ccf
+0,   14201856,   14201856,     1152,     2304, 0xe3417c4c
+0,   14228352,   14228352,     1152,     2304, 0x3f1a7166
+0,   14254848,   14254848,     1152,     2304, 0xcdcc7e23
+0,   14281344,   14281344,     1152,     2304, 0x4e727e97
+0,   14307840,   14307840,     1152,     2304, 0x53427ff1
+0,   14334336,   14334336,     1152,     2304, 0x173f6ca9
+0,   14360832,   14360832,     1152,     2304, 0x962887ec
+0,   14387328,   14387328,     1152,     2304, 0xcbec67f4
+0,   14413824,   14413824,     1152,     2304, 0x7a2c943d
+0,   14440320,   14440320,     1152,     2304, 0x8b877570
+0,   14466816,   14466816,     1152,     2304, 0xcf337323
+0,   14493312,   14493312,     1152,     2304, 0x8c8682a4
+0,   14519808,   14519808,     1152,     2304, 0x94c3753c
+0,   14546304,   14546304,     1152,     2304, 0x86898d79
+0,   14572800,   14572800,     1152,     2304, 0xdf667312
+0,   14599296,   14599296,     1152,     2304, 0x062f8ba8
+0,   14625792,   14625792,     1152,     2304, 0xa2c36f08
+0,   14652288,   14652288,     1152,     2304, 0x5bca7358
+0,   14678784,   14678784,     1152,     2304, 0x5648804d
+0,   14705280,   14705280,     1152,     2304, 0xefac87c8
+0,   14731776,   14731776,     1152,     2304, 0x66bf7dcf
+0,   14758272,   14758272,     1152,     2304, 0x62ad73bc
+0,   14784768,   14784768,     1152,     2304, 0x72fe630c
+0,   14811264,   14811264,     1152,     2304, 0xeebe87da
+0,   14837760,   14837760,     1152,     2304, 0x11c870cf
+0,   14864256,   14864256,     1152,     2304, 0x18fb7c27
+0,   14890752,   14890752,     1152,     2304, 0x39047145
+0,   14917248,   14917248,     1152,     2304, 0xdcf07032
+0,   14943744,   14943744,     1152,     2304, 0x61027c50
+0,   14970240,   14970240,     1152,     2304, 0x6e2e89de
+0,   14996736,   14996736,     1152,     2304, 0xc50c6d1d
+0,   15023232,   15023232,     1152,     2304, 0xeed587ee
+0,   15049728,   15049728,     1152,     2304, 0xe38269c7
+0,   15076224,   15076224,     1152,     2304, 0xcf66806e
+0,   15102720,   15102720,     1152,     2304, 0x2d3b7c1b
+0,   15129216,   15129216,     1152,     2304, 0xa4127d48
+0,   15155712,   15155712,     1152,     2304, 0x480b8325
+0,   15182208,   15182208,     1152,     2304, 0xc1527221
+0,   15208704,   15208704,     1152,     2304, 0x94c1769a
+0,   15235200,   15235200,     1152,     2304, 0xcfb37271
+0,   15261696,   15261696,     1152,     2304, 0x946d7a96
+0,   15288192,   15288192,     1152,     2304, 0xdfc18e50
+0,   15314688,   15314688,     1152,     2304, 0x10c48393
+0,   15341184,   15341184,     1152,     2304, 0x58556b10
+0,   15367680,   15367680,     1152,     2304, 0x997b7993
+0,   15394176,   15394176,     1152,     2304, 0x4a787992
+0,   15420672,   15420672,     1152,     2304, 0x11406c20
+0,   15447168,   15447168,     1152,     2304, 0x04a4874a
+0,   15473664,   15473664,     1152,     2304, 0xf3077164
+0,   15500160,   15500160,     1152,     2304, 0x08ac80e3
+0,   15526656,   15526656,     1152,     2304, 0x268370d0
+0,   15553152,   15553152,     1152,     2304, 0x1d137778
+0,   15579648,   15579648,     1152,     2304, 0xfa148e97
+0,   15606144,   15606144,     1152,     2304, 0xec50717c
+0,   15632640,   15632640,     1152,     2304, 0xcbf46b75
+0,   15659136,   15659136,     1152,     2304, 0xd4168038
+0,   15685632,   15685632,     1152,     2304, 0xdd9577f0
+0,   15712128,   15712128,     1152,     2304, 0xc7077685
+0,   15738624,   15738624,     1152,     2304, 0x34d25e91
+0,   15765120,   15765120,     1152,     2304, 0x96537e6d
+0,   15791616,   15791616,     1152,     2304, 0xb12e7940
+0,   15818112,   15818112,     1152,     2304, 0x861d64c0
+0,   15844608,   15844608,     1152,     2304, 0xa2bc64ed
+0,   15871104,   15871104,     1152,     2304, 0x0c5f8261
+0,   15897600,   15897600,     1152,     2304, 0x540584ff
+0,   15924096,   15924096,     1152,     2304, 0xe8328b09
+0,   15950592,   15950592,     1152,     2304, 0x1e777079
+0,   15977088,   15977088,     1152,     2304, 0x453483b4
+0,   16003584,   16003584,     1152,     2304, 0x1cab7a1e
+0,   16030080,   16030080,     1152,     2304, 0xcb37856d
+0,   16056576,   16056576,     1152,     2304, 0x5a4883ed
+0,   16083072,   16083072,     1152,     2304, 0xd1f27cbf
+0,   16109568,   16109568,     1152,     2304, 0x0d377a4d
+0,   16136064,   16136064,     1152,     2304, 0x264e76df
+0,   16162560,   16162560,     1152,     2304, 0x2a68771e
+0,   16189056,   16189056,     1152,     2304, 0xcb317a31
+0,   16215552,   16215552,     1152,     2304, 0xfc5d7a27
+0,   16242048,   16242048,     1152,     2304, 0x6e067d96
+0,   16268544,   16268544,     1152,     2304, 0x0c538560
+0,   16295040,   16295040,     1152,     2304, 0xfbad717a
+0,   16321536,   16321536,     1152,     2304, 0xf9fc7608
+0,   16348032,   16348032,     1152,     2304, 0xb1817c8f
+0,   16374528,   16374528,     1152,     2304, 0x57c37f82
+0,   16401024,   16401024,     1152,     2304, 0x8cac8356
+0,   16427520,   16427520,     1152,     2304, 0x97108186
+0,   16454016,   16454016,     1152,     2304, 0x095d81bb
+0,   16480512,   16480512,     1152,     2304, 0x475f6b2b
+0,   16507008,   16507008,     1152,     2304, 0xdf7c8cc5
+0,   16533504,   16533504,     1152,     2304, 0x979c77be
+0,   16560000,   16560000,     1152,     2304, 0x56a7844b
+0,   16586496,   16586496,     1152,     2304, 0x7ee46b21
+0,   16612992,   16612992,     1152,     2304, 0x05b67220
+0,   16639488,   16639488,     1152,     2304, 0x25787252
+0,   16665984,   16665984,     1152,     2304, 0x8ad278ad
+0,   16692480,   16692480,     1152,     2304, 0x67bd722e
+0,   16718976,   16718976,     1152,     2304, 0x204f77be
+0,   16745472,   16745472,     1152,     2304, 0x82d27ae2
+0,   16771968,   16771968,     1152,     2304, 0x23fa82e4
+0,   16798464,   16798464,     1152,     2304, 0xa9cf8159
+0,   16824960,   16824960,     1152,     2304, 0x13f08749
+0,   16851456,   16851456,     1152,     2304, 0xf84f71b5
+0,   16877952,   16877952,     1152,     2304, 0x1cb777c8
+0,   16904448,   16904448,     1152,     2304, 0x11236722
+0,   16930944,   16930944,     1152,     2304, 0x10197cac
+0,   16957440,   16957440,     1152,     2304, 0xbd417e65
+0,   16983936,   16983936,     1152,     2304, 0x9a1c7d05
+0,   17010432,   17010432,     1152,     2304, 0x4c3a85de
+0,   17036928,   17036928,     1152,     2304, 0x03816eb7
+0,   17063424,   17063424,     1152,     2304, 0x80186e6c
+0,   17089920,   17089920,     1152,     2304, 0x5c097928
+0,   17116416,   17116416,     1152,     2304, 0x94aa823d
+0,   17142912,   17142912,     1152,     2304, 0xa1c27f04
+0,   17169408,   17169408,     1152,     2304, 0x6ddb74a9
+0,   17195904,   17195904,     1152,     2304, 0x5ea67901
+0,   17222400,   17222400,     1152,     2304, 0xd710742d
+0,   17248896,   17248896,     1152,     2304, 0xf8c27add
+0,   17275392,   17275392,     1152,     2304, 0xf1717011
+0,   17301888,   17301888,     1152,     2304, 0xb59072d0
+0,   17328384,   17328384,     1152,     2304, 0xc8dc84a7
+0,   17354880,   17354880,     1152,     2304, 0x33116737
+0,   17381376,   17381376,     1152,     2304, 0x86216bdd
+0,   17407872,   17407872,     1152,     2304, 0xa2f87866
+0,   17434368,   17434368,     1152,     2304, 0x5d77771e
+0,   17460864,   17460864,     1152,     2304, 0x5d8c77fd
+0,   17487360,   17487360,     1152,     2304, 0x23cc89cb
+0,   17513856,   17513856,     1152,     2304, 0x334e7407
+0,   17540352,   17540352,     1152,     2304, 0x01c976ff
+0,   17566848,   17566848,     1152,     2304, 0x3a3b7b15
+0,   17593344,   17593344,     1152,     2304, 0xfa427de9
+0,   17619840,   17619840,     1152,     2304, 0xbeaa7c91
+0,   17646336,   17646336,     1152,     2304, 0xd6988b31
+0,   17672832,   17672832,     1152,     2304, 0x4db47f80
+0,   17699328,   17699328,     1152,     2304, 0xea687d9e
+0,   17725824,   17725824,     1152,     2304, 0x1a6281ce
+0,   17752320,   17752320,     1152,     2304, 0xe1958003
+0,   17778816,   17778816,     1152,     2304, 0xb4ae7c5e
+0,   17805312,   17805312,     1152,     2304, 0x28827c8a
+0,   17831808,   17831808,     1152,     2304, 0x1fb88b25
+0,   17858304,   17858304,     1152,     2304, 0x588d71e8
+0,   17884800,   17884800,     1152,     2304, 0x68227c34
+0,   17911296,   17911296,     1152,     2304, 0xee4d73e8
+0,   17937792,   17937792,     1152,     2304, 0x69287c6d
+0,   17964288,   17964288,     1152,     2304, 0xbb04926a
+0,   17990784,   17990784,     1152,     2304, 0x89456cec
+0,   18017280,   18017280,     1152,     2304, 0xabe18992
+0,   18043776,   18043776,     1152,     2304, 0x50cc7f6c
+0,   18070272,   18070272,     1152,     2304, 0x6d7270be
+0,   18096768,   18096768,     1152,     2304, 0x664c6fef
+0,   18123264,   18123264,     1152,     2304, 0x7f7982f3
+0,   18149760,   18149760,     1152,     2304, 0x6ca170e9
+0,   18176256,   18176256,     1152,     2304, 0x36437d5b
+0,   18202752,   18202752,     1152,     2304, 0xfd2380e8
+0,   18229248,   18229248,     1152,     2304, 0x2e3c6e9f
+0,   18255744,   18255744,     1152,     2304, 0xc8427f3f
+0,   18282240,   18282240,     1152,     2304, 0x962a79ad
+0,   18308736,   18308736,     1152,     2304, 0xc9597c8b
+0,   18335232,   18335232,     1152,     2304, 0x899580bb
+0,   18361728,   18361728,     1152,     2304, 0x2d179dff
+0,   18388224,   18388224,     1152,     2304, 0x4ac1707c
+0,   18414720,   18414720,     1152,     2304, 0x32ea7e95
+0,   18441216,   18441216,     1152,     2304, 0x265e9a2d
+0,   18467712,   18467712,     1152,     2304, 0x1c6484d0
+0,   18494208,   18494208,     1152,     2304, 0x39ae6884
+0,   18520704,   18520704,     1152,     2304, 0x82ed7bc5
+0,   18547200,   18547200,     1152,     2304, 0x556b7b3c
+0,   18573696,   18573696,     1152,     2304, 0xb7f778dd
+0,   18600192,   18600192,     1152,     2304, 0x74447d55
+0,   18626688,   18626688,     1152,     2304, 0x0c66861e
+0,   18653184,   18653184,     1152,     2304, 0x15ba7932
+0,   18679680,   18679680,     1152,     2304, 0xb19170fc
+0,   18706176,   18706176,     1152,     2304, 0x19d37551
+0,   18732672,   18732672,     1152,     2304, 0xdc529142
+0,   18759168,   18759168,     1152,     2304, 0xf2637e77
+0,   18785664,   18785664,     1152,     2304, 0xd065944b
+0,   18812160,   18812160,     1152,     2304, 0x22878123
+0,   18838656,   18838656,     1152,     2304, 0xc21a8bf7
+0,   18865152,   18865152,     1152,     2304, 0x2e3582dc
+0,   18891648,   18891648,     1152,     2304, 0xd42f7987
+0,   18918144,   18918144,     1152,     2304, 0x69b88236
+0,   18944640,   18944640,     1152,     2304, 0x7c988f90
+0,   18971136,   18971136,     1152,     2304, 0x2cd66ded
+0,   18997632,   18997632,     1152,     2304, 0x3e65828b
+0,   19024128,   19024128,     1152,     2304, 0x7e9871c9
+0,   19050624,   19050624,     1152,     2304, 0xf1f2806b
+0,   19077120,   19077120,     1152,     2304, 0xf5087c7b
+0,   19103616,   19103616,     1152,     2304, 0x62b98097
+0,   19130112,   19130112,     1152,     2304, 0xec457c43
+0,   19156608,   19156608,     1152,     2304, 0x87af87a6
+0,   19183104,   19183104,     1152,     2304, 0x97cc757d
+0,   19209600,   19209600,     1152,     2304, 0x08ca76bd
+0,   19236096,   19236096,     1152,     2304, 0x14ae7cbd
+0,   19262592,   19262592,     1152,     2304, 0x1f79709a
+0,   19289088,   19289088,     1152,     2304, 0x17948207
+0,   19315584,   19315584,     1152,     2304, 0x16ee7228
+0,   19342080,   19342080,     1152,     2304, 0x76cc82d7
+0,   19368576,   19368576,     1152,     2304, 0x8f327a8e
+0,   19395072,   19395072,     1152,     2304, 0x14ee7756
+0,   19421568,   19421568,     1152,     2304, 0x15996d2f
+0,   19448064,   19448064,     1152,     2304, 0x4c707d5c
+0,   19474560,   19474560,     1152,     2304, 0x268c6fee
+0,   19501056,   19501056,     1152,     2304, 0x6d838c76
+0,   19527552,   19527552,     1152,     2304, 0xafa17e64
+0,   19554048,   19554048,     1152,     2304, 0xb6546e66
+0,   19580544,   19580544,     1152,     2304, 0x945d8b9f
+0,   19607040,   19607040,     1152,     2304, 0x5bfb7446
+0,   19633536,   19633536,     1152,     2304, 0xae6086f9
+0,   19660032,   19660032,     1152,     2304, 0xa01380cd
+0,   19686528,   19686528,     1152,     2304, 0x06f0828f
+0,   19713024,   19713024,     1152,     2304, 0x0ae07176
+0,   19739520,   19739520,     1152,     2304, 0x66f07522
+0,   19766016,   19766016,     1152,     2304, 0x44018106
+0,   19792512,   19792512,     1152,     2304, 0x8cd283da
+0,   19819008,   19819008,     1152,     2304, 0x14257f45
+0,   19845504,   19845504,     1152,     2304, 0x04979537
+0,   19872000,   19872000,     1152,     2304, 0x8b5f797c
+0,   19898496,   19898496,     1152,     2304, 0x12d67493
+0,   19924992,   19924992,     1152,     2304, 0xc8886a25
+0,   19951488,   19951488,     1152,     2304, 0x614b803a
+0,   19977984,   19977984,     1152,     2304, 0x75667d35
+0,   20004480,   20004480,     1152,     2304, 0xe42c7b00
+0,   20030976,   20030976,     1152,     2304, 0x37787927
+0,   20057472,   20057472,     1152,     2304, 0x85db8409
+0,   20083968,   20083968,     1152,     2304, 0x823b822c
+0,   20110464,   20110464,     1152,     2304, 0xa1658479
+0,   20136960,   20136960,     1152,     2304, 0xdbe58ff7
+0,   20163456,   20163456,     1152,     2304, 0x725175e2
+0,   20189952,   20189952,     1152,     2304, 0xb2ae7741
+0,   20216448,   20216448,     1152,     2304, 0x4de169e4
+0,   20242944,   20242944,     1152,     2304, 0x3cb18530
+0,   20269440,   20269440,     1152,     2304, 0x5a0c5e7b
+0,   20295936,   20295936,     1152,     2304, 0x4e1f68b6
+0,   20322432,   20322432,     1152,     2304, 0x9a0c7525
+0,   20348928,   20348928,     1152,     2304, 0x86ae6a04
+0,   20375424,   20375424,     1152,     2304, 0xf508877c
+0,   20401920,   20401920,     1152,     2304, 0x29746d7c
+0,   20428416,   20428416,     1152,     2304, 0x24078edb
+0,   20454912,   20454912,     1152,     2304, 0x4f9784d2
+0,   20481408,   20481408,     1152,     2304, 0x24186e95
+0,   20507904,   20507904,     1152,     2304, 0xd1927b28
+0,   20534400,   20534400,     1152,     2304, 0xc77283d5
+0,   20560896,   20560896,     1152,     2304, 0xc0ad73f8
+0,   20587392,   20587392,     1152,     2304, 0x0de78524
+0,   20613888,   20613888,     1152,     2304, 0x8b0061dc
+0,   20640384,   20640384,     1152,     2304, 0x37c77ad6
+0,   20666880,   20666880,     1152,     2304, 0x5eae7410
+0,   20693376,   20693376,     1152,     2304, 0x72d97ab5
+0,   20719872,   20719872,     1152,     2304, 0x1afd8745
+0,   20746368,   20746368,     1152,     2304, 0xe30b7c9b
+0,   20772864,   20772864,     1152,     2304, 0xe86480b3
+0,   20799360,   20799360,     1152,     2304, 0xb9c77a4e
+0,   20825856,   20825856,     1152,     2304, 0x64508f2a
+0,   20852352,   20852352,     1152,     2304, 0x839a74ed
+0,   20878848,   20878848,     1152,     2304, 0x12147a5d
+0,   20905344,   20905344,     1152,     2304, 0x33b07d3b
+0,   20931840,   20931840,     1152,     2304, 0x65e67731
+0,   20958336,   20958336,     1152,     2304, 0x8d1c8191
+0,   20984832,   20984832,     1152,     2304, 0xada17701
+0,   21011328,   21011328,     1152,     2304, 0x49d47f6c
+0,   21037824,   21037824,     1152,     2304, 0x5df97989
+0,   21064320,   21064320,     1152,     2304, 0x0db17ad9
+0,   21090816,   21090816,     1152,     2304, 0x0d3281d7
+0,   21117312,   21117312,     1152,     2304, 0x2503704f
+0,   21143808,   21143808,     1152,     2304, 0xdf7c73d0
+0,   21170304,   21170304,     1152,     2304, 0x93dd737c
+0,   21196800,   21196800,     1152,     2304, 0x48ed5740
+0,   21223296,   21223296,     1152,     2304, 0x6d477eaf
+0,   21249792,   21249792,     1152,     2304, 0xe72373a1
+0,   21276288,   21276288,     1152,     2304, 0x7d3e7e2e
+0,   21302784,   21302784,     1152,     2304, 0x0d167743
+0,   21329280,   21329280,     1152,     2304, 0xc2d97f6c
+0,   21355776,   21355776,     1152,     2304, 0xc66175a0
+0,   21382272,   21382272,     1152,     2304, 0xd0127475
+0,   21408768,   21408768,     1152,     2304, 0xdbf57801
+0,   21435264,   21435264,     1152,     2304, 0x52b47a8e
+0,   21461760,   21461760,     1152,     2304, 0xc96b80c5
+0,   21488256,   21488256,     1152,     2304, 0x070c76cf
+0,   21514752,   21514752,     1152,     2304, 0x2aa07ee2
+0,   21541248,   21541248,     1152,     2304, 0x04059074
+0,   21567744,   21567744,     1152,     2304, 0x228f6def
+0,   21594240,   21594240,     1152,     2304, 0x927893a8
+0,   21620736,   21620736,     1152,     2304, 0x581a87df
+0,   21647232,   21647232,     1152,     2304, 0x0d3f653c
+0,   21673728,   21673728,     1152,     2304, 0xc0e775f3
+0,   21700224,   21700224,     1152,     2304, 0x41d98473
+0,   21726720,   21726720,     1152,     2304, 0x68fb775a
+0,   21753216,   21753216,     1152,     2304, 0x81e0810e
+0,   21779712,   21779712,     1152,     2304, 0xf35790d3
+0,   21806208,   21806208,     1152,     2304, 0xc1468c45
+0,   21832704,   21832704,     1152,     2304, 0x6949796a
+0,   21859200,   21859200,     1152,     2304, 0xa0bb7dcd
+0,   21885696,   21885696,     1152,     2304, 0x50af8424
+0,   21912192,   21912192,     1152,     2304, 0x449b7d37
+0,   21938688,   21938688,     1152,     2304, 0xcf2779a8
+0,   21965184,   21965184,     1152,     2304, 0x069d894e
+0,   21991680,   21991680,     1152,     2304, 0x41bd7334
+0,   22018176,   22018176,     1152,     2304, 0x0dfd8c05
+0,   22044672,   22044672,     1152,     2304, 0x2ce27b9d
+0,   22071168,   22071168,     1152,     2304, 0x422875b5
+0,   22097664,   22097664,     1152,     2304, 0xd5297389
+0,   22124160,   22124160,     1152,     2304, 0x49ed8668
+0,   22150656,   22150656,     1152,     2304, 0x2a9a810b
+0,   22177152,   22177152,     1152,     2304, 0x5a3f7b43
+0,   22203648,   22203648,     1152,     2304, 0x32087c12
+0,   22230144,   22230144,     1152,     2304, 0x759c7bba
+0,   22256640,   22256640,     1152,     2304, 0x5b26823b
+0,   22283136,   22283136,     1152,     2304, 0xeeb872ef
+0,   22309632,   22309632,     1152,     2304, 0x6eb0869d
+0,   22336128,   22336128,     1152,     2304, 0x4b3e764f
+0,   22362624,   22362624,     1152,     2304, 0x30a66fdb
+0,   22389120,   22389120,     1152,     2304, 0x958374ff
+0,   22415616,   22415616,     1152,     2304, 0x411b71cc
+0,   22442112,   22442112,     1152,     2304, 0x80136759
+0,   22468608,   22468608,     1152,     2304, 0x05ed6793
+0,   22495104,   22495104,     1152,     2304, 0x48f18827
+0,   22521600,   22521600,     1152,     2304, 0xb40d7866
+0,   22548096,   22548096,     1152,     2304, 0xcad18c4a
+0,   22574592,   22574592,     1152,     2304, 0x9ad68582
+0,   22601088,   22601088,     1152,     2304, 0x3ad17fce
+0,   22627584,   22627584,     1152,     2304, 0x824b7af6
+0,   22654080,   22654080,     1152,     2304, 0xcc9d894f
+0,   22680576,   22680576,     1152,     2304, 0x46167a70
+0,   22707072,   22707072,     1152,     2304, 0x2479872a
+0,   22733568,   22733568,     1152,     2304, 0x58cb72a5
+0,   22760064,   22760064,     1152,     2304, 0x01957747
+0,   22786560,   22786560,     1152,     2304, 0x6df172cb
+0,   22813056,   22813056,     1152,     2304, 0x9ebd7ed9
+0,   22839552,   22839552,     1152,     2304, 0x89436a53
+0,   22866048,   22866048,     1152,     2304, 0xcce78433
+0,   22892544,   22892544,     1152,     2304, 0x1b7a70f2
+0,   22919040,   22919040,     1152,     2304, 0x530584d7
+0,   22945536,   22945536,     1152,     2304, 0xda158aa9
+0,   22972032,   22972032,     1152,     2304, 0xe85e7b90
+0,   22998528,   22998528,     1152,     2304, 0xa8ac8176
+0,   23025024,   23025024,     1152,     2304, 0xe43578a9
+0,   23051520,   23051520,     1152,     2304, 0x9ecc7c30
+0,   23078016,   23078016,     1152,     2304, 0x1cbd8591
+0,   23104512,   23104512,     1152,     2304, 0x84917bd9
+0,   23131008,   23131008,     1152,     2304, 0xea8477e4
+0,   23157504,   23157504,     1152,     2304, 0xf50c7ea1
+0,   23184000,   23184000,     1152,     2304, 0xc85e76e7
+0,   23210496,   23210496,     1152,     2304, 0xa5148768
+0,   23236992,   23236992,     1152,     2304, 0x2bd5846a
+0,   23263488,   23263488,     1152,     2304, 0x1cd88641
+0,   23289984,   23289984,     1152,     2304, 0x04b883c5
+0,   23316480,   23316480,     1152,     2304, 0x5bf1719b
+0,   23342976,   23342976,     1152,     2304, 0x0abe777c
+0,   23369472,   23369472,     1152,     2304, 0xc74a7f7f
+0,   23395968,   23395968,     1152,     2304, 0xd36a724b
+0,   23422464,   23422464,     1152,     2304, 0x4ae67e9e
+0,   23448960,   23448960,     1152,     2304, 0xfb6173a0
+0,   23475456,   23475456,     1152,     2304, 0x31d77e19
+0,   23501952,   23501952,     1152,     2304, 0x6153767d
+0,   23528448,   23528448,     1152,     2304, 0xe40d84e4
+0,   23554944,   23554944,     1152,     2304, 0xc8ef71e2
+0,   23581440,   23581440,     1152,     2304, 0xdeca81be
+0,   23607936,   23607936,     1152,     2304, 0x75da85df
+0,   23634432,   23634432,     1152,     2304, 0x6c8e71bd
+0,   23660928,   23660928,     1152,     2304, 0x6f8576a4
+0,   23687424,   23687424,     1152,     2304, 0xe6637b71
+0,   23713920,   23713920,     1152,     2304, 0xddfe8866
+0,   23740416,   23740416,     1152,     2304, 0xa08578a5
+0,   23766912,   23766912,     1152,     2304, 0x72af7873
+0,   23793408,   23793408,     1152,     2304, 0xb550615c
+0,   23819904,   23819904,     1152,     2304, 0x9ef87d28
+0,   23846400,   23846400,     1152,     2304, 0xe30d83bf
+0,   23872896,   23872896,     1152,     2304, 0xf030818c
+0,   23899392,   23899392,     1152,     2304, 0x1e23896c
+0,   23925888,   23925888,     1152,     2304, 0xf4c87180
+0,   23952384,   23952384,     1152,     2304, 0xe34181aa
+0,   23978880,   23978880,     1152,     2304, 0x660681cb
+0,   24005376,   24005376,     1152,     2304, 0x7ebb87fc
+0,   24031872,   24031872,     1152,     2304, 0xa8305eeb
+0,   24058368,   24058368,     1152,     2304, 0xea898906
+0,   24084864,   24084864,     1152,     2304, 0x342b6c13
+0,   24111360,   24111360,     1152,     2304, 0x7fd18372
+0,   24137856,   24137856,     1152,     2304, 0x965c7377
+0,   24164352,   24164352,     1152,     2304, 0xb53b70b3
+0,   24190848,   24190848,     1152,     2304, 0xd54884d1
+0,   24217344,   24217344,     1152,     2304, 0x546d77ac
+0,   24243840,   24243840,     1152,     2304, 0xd7a77dc7
+0,   24270336,   24270336,     1152,     2304, 0x4b0e7c5c
+0,   24296832,   24296832,     1152,     2304, 0x346d7026
+0,   24323328,   24323328,     1152,     2304, 0x3f346e81
+0,   24349824,   24349824,     1152,     2304, 0x3f167098
+0,   24376320,   24376320,     1152,     2304, 0x1c857987
+0,   24402816,   24402816,     1152,     2304, 0xf9317c16
+0,   24429312,   24429312,     1152,     2304, 0x25ea82c4
+0,   24455808,   24455808,     1152,     2304, 0x2b0f90bb
+0,   24482304,   24482304,     1152,     2304, 0x999668db
+0,   24508800,   24508800,     1152,     2304, 0x9a0676da
+0,   24535296,   24535296,     1152,     2304, 0x97a87172
+0,   24561792,   24561792,     1152,     2304, 0x323d7ace
+0,   24588288,   24588288,     1152,     2304, 0x1b538baa
+0,   24614784,   24614784,     1152,     2304, 0xeb208b72
+0,   24641280,   24641280,     1152,     2304, 0x36c69152
+0,   24667776,   24667776,     1152,     2304, 0x570c7bac
+0,   24694272,   24694272,     1152,     2304, 0xd7287c72
+0,   24720768,   24720768,     1152,     2304, 0xc4d78776
+0,   24747264,   24747264,     1152,     2304, 0xdcbb7735
+0,   24773760,   24773760,     1152,     2304, 0x08066d78
+0,   24800256,   24800256,     1152,     2304, 0x5cb96bad
+0,   24826752,   24826752,     1152,     2304, 0x30d58180
+0,   24853248,   24853248,     1152,     2304, 0x3ac96b87
+0,   24879744,   24879744,     1152,     2304, 0x1fc878d2
+0,   24906240,   24906240,     1152,     2304, 0x2c7c85d4
+0,   24932736,   24932736,     1152,     2304, 0x6c307caa
+0,   24959232,   24959232,     1152,     2304, 0x647b6d6e
+0,   24985728,   24985728,     1152,     2304, 0xfbef7e51
+0,   25012224,   25012224,     1152,     2304, 0x27867bd7
+0,   25038720,   25038720,     1152,     2304, 0xa2b875b1
+0,   25065216,   25065216,     1152,     2304, 0xc7dd73fb
+0,   25091712,   25091712,     1152,     2304, 0x717e79e9
+0,   25118208,   25118208,     1152,     2304, 0xfecb710d
+0,   25144704,   25144704,     1152,     2304, 0xa5308333
+0,   25171200,   25171200,     1152,     2304, 0x92fe6b77
+0,   25197696,   25197696,     1152,     2304, 0x7db17ad6
+0,   25224192,   25224192,     1152,     2304, 0x1248786d
+0,   25250688,   25250688,     1152,     2304, 0xf3647a8a
+0,   25277184,   25277184,     1152,     2304, 0x7b2173f0
+0,   25303680,   25303680,     1152,     2304, 0xfd1f7a3c
+0,   25330176,   25330176,     1152,     2304, 0x09e57f3e
+0,   25356672,   25356672,     1152,     2304, 0x13d7798d
+0,   25383168,   25383168,     1152,     2304, 0x7d4d758a
+0,   25409664,   25409664,     1152,     2304, 0xdf1a80be
+0,   25436160,   25436160,     1152,     2304, 0xb91b7cb9
+0,   25462656,   25462656,     1152,     2304, 0x102e73ba
+0,   25489152,   25489152,     1152,     2304, 0x8dce8428
+0,   25515648,   25515648,     1152,     2304, 0xdf9070f6
+0,   25542144,   25542144,     1152,     2304, 0x27d77bb4
+0,   25568640,   25568640,     1152,     2304, 0x2f8a8579
+0,   25595136,   25595136,     1152,     2304, 0xd1e27204
+0,   25621632,   25621632,     1152,     2304, 0x4f087920
+0,   25648128,   25648128,     1152,     2304, 0x69f58306
+0,   25674624,   25674624,     1152,     2304, 0xfc7378b0
+0,   25701120,   25701120,     1152,     2304, 0x8b167d89
+0,   25727616,   25727616,     1152,     2304, 0x0c867235
+0,   25754112,   25754112,     1152,     2304, 0xc3126e09
+0,   25780608,   25780608,     1152,     2304, 0x7d097d89
+0,   25807104,   25807104,     1152,     2304, 0x73dc664e
+0,   25833600,   25833600,     1152,     2304, 0xdc247a0a
+0,   25860096,   25860096,     1152,     2304, 0xaa8d7f2f
+0,   25886592,   25886592,     1152,     2304, 0x9a1d744b
+0,   25913088,   25913088,     1152,     2304, 0xd9e87475
+0,   25939584,   25939584,     1152,     2304, 0xf6f182ab
+0,   25966080,   25966080,     1152,     2304, 0x4937706b
+0,   25992576,   25992576,     1152,     2304, 0x837189b1
+0,   26019072,   26019072,     1152,     2304, 0xd565975d
+0,   26045568,   26045568,     1152,     2304, 0xc6156a56
+0,   26072064,   26072064,     1152,     2304, 0xcd9c8107
+0,   26098560,   26098560,     1152,     2304, 0x1f868400
+0,   26125056,   26125056,     1152,     2304, 0xeddb71e0
+0,   26151552,   26151552,     1152,     2304, 0x5fa2782a
+0,   26178048,   26178048,     1152,     2304, 0x9b076e22
+0,   26204544,   26204544,     1152,     2304, 0x90027ba7
+0,   26231040,   26231040,     1152,     2304, 0x40a481fd
+0,   26257536,   26257536,     1152,     2304, 0x240873bf
+0,   26284032,   26284032,     1152,     2304, 0x28cd7d8e
+0,   26310528,   26310528,     1152,     2304, 0x5d4287b0
+0,   26337024,   26337024,     1152,     2304, 0xf9567c4b
+0,   26363520,   26363520,     1152,     2304, 0x67dd924a
+0,   26390016,   26390016,     1152,     2304, 0x33d37b4c
+0,   26416512,   26416512,     1152,     2304, 0x37316d0b
+0,   26443008,   26443008,     1152,     2304, 0x43a877dc
+0,   26469504,   26469504,     1152,     2304, 0x44fc90bc
+0,   26496000,   26496000,     1152,     2304, 0x90f5774c
+0,   26522496,   26522496,     1152,     2304, 0x106e85a0
+0,   26548992,   26548992,     1152,     2304, 0xb78b7f83
+0,   26575488,   26575488,     1152,     2304, 0x3c3a77af
+0,   26601984,   26601984,     1152,     2304, 0x7a5f7282
+0,   26628480,   26628480,     1152,     2304, 0x164d891a
+0,   26654976,   26654976,     1152,     2304, 0x930a7194
+0,   26681472,   26681472,     1152,     2304, 0xb2628013
+0,   26707968,   26707968,     1152,     2304, 0x3adf7e1c
+0,   26734464,   26734464,     1152,     2304, 0x48627c45
+0,   26760960,   26760960,     1152,     2304, 0xa3367cca
+0,   26787456,   26787456,     1152,     2304, 0x9cf77248
+0,   26813952,   26813952,     1152,     2304, 0x75a86acb
+0,   26840448,   26840448,     1152,     2304, 0x75487698
+0,   26866944,   26866944,     1152,     2304, 0xd65b8389
+0,   26893440,   26893440,     1152,     2304, 0xb71d84b5
+0,   26919936,   26919936,     1152,     2304, 0xdca87e42
+0,   26946432,   26946432,     1152,     2304, 0x44dd7d72
+0,   26972928,   26972928,     1152,     2304, 0xb29465da
+0,   26999424,   26999424,     1152,     2304, 0x88ed8930
+0,   27025920,   27025920,     1152,     2304, 0xabdd8096
+0,   27052416,   27052416,     1152,     2304, 0xe5f57653
+0,   27078912,   27078912,     1152,     2304, 0x7c777984
+0,   27105408,   27105408,     1152,     2304, 0x631d7deb
+0,   27131904,   27131904,     1152,     2304, 0x70d36e1f
+0,   27158400,   27158400,     1152,     2304, 0x412c802b
+0,   27184896,   27184896,     1152,     2304, 0xacc281c1
+0,   27211392,   27211392,     1152,     2304, 0x569d86a8
+0,   27237888,   27237888,     1152,     2304, 0x5c69743f
+0,   27264384,   27264384,     1152,     2304, 0x29637794
+0,   27290880,   27290880,     1152,     2304, 0x0d1b7a4f
+0,   27317376,   27317376,     1152,     2304, 0xfc6978a6
+0,   27343872,   27343872,     1152,     2304, 0x9b178987
+0,   27370368,   27370368,     1152,     2304, 0xe436714a
+0,   27396864,   27396864,     1152,     2304, 0x783a7b6e
+0,   27423360,   27423360,     1152,     2304, 0x6ff16f4e
+0,   27449856,   27449856,     1152,     2304, 0x9fd582cc
+0,   27476352,   27476352,     1152,     2304, 0xff0d6bc9
+0,   27502848,   27502848,     1152,     2304, 0x6f4975d5
+0,   27529344,   27529344,     1152,     2304, 0x94a77a90
+0,   27555840,   27555840,     1152,     2304, 0x764f80db
+0,   27582336,   27582336,     1152,     2304, 0x68e88782
+0,   27608832,   27608832,     1152,     2304, 0x91a378e8
+0,   27635328,   27635328,     1152,     2304, 0x29f26903
+0,   27661824,   27661824,     1152,     2304, 0xc3b684d4
+0,   27688320,   27688320,     1152,     2304, 0x7d95921f
+0,   27714816,   27714816,     1152,     2304, 0x876a7ded
+0,   27741312,   27741312,     1152,     2304, 0x0fbe7ee8
+0,   27767808,   27767808,     1152,     2304, 0x7d0f78c5
+0,   27794304,   27794304,     1152,     2304, 0xce906ba8
+0,   27820800,   27820800,     1152,     2304, 0xd281747b
+0,   27847296,   27847296,     1152,     2304, 0xec38811b
+0,   27873792,   27873792,     1152,     2304, 0x8170810a
+0,   27900288,   27900288,     1152,     2304, 0x4d858ba2
+0,   27926784,   27926784,     1152,     2304, 0x8c798662
+0,   27953280,   27953280,     1152,     2304, 0x23a8879b
+0,   27979776,   27979776,     1152,     2304, 0x9d0c8894
+0,   28006272,   28006272,     1152,     2304, 0xf07c681f
+0,   28032768,   28032768,     1152,     2304, 0x03ab6f7a
+0,   28059264,   28059264,     1152,     2304, 0x88c170c2
+0,   28085760,   28085760,     1152,     2304, 0xeb977bf3
+0,   28112256,   28112256,     1152,     2304, 0x33b6774f
+0,   28138752,   28138752,     1152,     2304, 0x534c776e
+0,   28165248,   28165248,     1152,     2304, 0x33c4826a
+0,   28191744,   28191744,     1152,     2304, 0xdb9878d8
+0,   28218240,   28218240,     1152,     2304, 0xeffd789b
+0,   28244736,   28244736,     1152,     2304, 0x2cc9862f
+0,   28271232,   28271232,     1152,     2304, 0x9bf18131
+0,   28297728,   28297728,     1152,     2304, 0xfa078144
+0,   28324224,   28324224,     1152,     2304, 0x922885d2
+0,   28350720,   28350720,     1152,     2304, 0x29ad7796
+0,   28377216,   28377216,     1152,     2304, 0x62766f3f
+0,   28403712,   28403712,     1152,     2304, 0x72a873a5
+0,   28430208,   28430208,     1152,     2304, 0x65176e4d
+0,   28456704,   28456704,     1152,     2304, 0x75447cbd
+0,   28483200,   28483200,     1152,     2304, 0x83528e32
+0,   28509696,   28509696,     1152,     2304, 0x0c758531
+0,   28536192,   28536192,     1152,     2304, 0x4a5c7a84
+0,   28562688,   28562688,     1152,     2304, 0xd5937648
+0,   28589184,   28589184,     1152,     2304, 0xef5c8983
+0,   28615680,   28615680,     1152,     2304, 0x24b281d0
+0,   28642176,   28642176,     1152,     2304, 0xc0af7017
+0,   28668672,   28668672,     1152,     2304, 0xf1cc7145
+0,   28695168,   28695168,     1152,     2304, 0x7ddb7a3b
+0,   28721664,   28721664,     1152,     2304, 0xd2e57692
+0,   28748160,   28748160,     1152,     2304, 0x7abc61d6
+0,   28774656,   28774656,     1152,     2304, 0x44a68338
+0,   28801152,   28801152,     1152,     2304, 0x9cad810e
+0,   28827648,   28827648,     1152,     2304, 0xbf3a7f51
+0,   28854144,   28854144,     1152,     2304, 0x52ac815e
+0,   28880640,   28880640,     1152,     2304, 0xdf327e10
+0,   28907136,   28907136,     1152,     2304, 0x797c867b
+0,   28933632,   28933632,     1152,     2304, 0x685b79ec
+0,   28960128,   28960128,     1152,     2304, 0x60267b82
+0,   28986624,   28986624,     1152,     2304, 0xe70e7fd4
+0,   29013120,   29013120,     1152,     2304, 0xaf197a22
+0,   29039616,   29039616,     1152,     2304, 0xf5e57075
+0,   29066112,   29066112,     1152,     2304, 0xa86f8197
+0,   29092608,   29092608,     1152,     2304, 0x922c7b3d
+0,   29119104,   29119104,     1152,     2304, 0x8e3b7929
+0,   29145600,   29145600,     1152,     2304, 0x816f862d
+0,   29172096,   29172096,     1152,     2304, 0xc09c761a
+0,   29198592,   29198592,     1152,     2304, 0xf1cd9010
+0,   29225088,   29225088,     1152,     2304, 0xcf5c72d8
+0,   29251584,   29251584,     1152,     2304, 0x0e607a38
+0,   29278080,   29278080,     1152,     2304, 0x6cb0819c
+0,   29304576,   29304576,     1152,     2304, 0x9e2880aa
+0,   29331072,   29331072,     1152,     2304, 0xb46f69f9
+0,   29357568,   29357568,     1152,     2304, 0xbc807eab
+0,   29384064,   29384064,     1152,     2304, 0x91237cf9
+0,   29410560,   29410560,     1152,     2304, 0xb6bf82b6
+0,   29437056,   29437056,     1152,     2304, 0xfcd58daf
+0,   29463552,   29463552,     1152,     2304, 0x53b176f4
+0,   29490048,   29490048,     1152,     2304, 0xc79d7507
+0,   29516544,   29516544,     1152,     2304, 0x810677b2
+0,   29543040,   29543040,     1152,     2304, 0x13148737
+0,   29569536,   29569536,     1152,     2304, 0x2eb5755a
+0,   29596032,   29596032,     1152,     2304, 0x5bec5cd2
+0,   29622528,   29622528,     1152,     2304, 0x66c68d47
+0,   29649024,   29649024,     1152,     2304, 0x55d18770
+0,   29675520,   29675520,     1152,     2304, 0x13997def
+0,   29702016,   29702016,     1152,     2304, 0x23cd8437
+0,   29728512,   29728512,     1152,     2304, 0x5cd07942
+0,   29755008,   29755008,     1152,     2304, 0x43ab6b4c
+0,   29781504,   29781504,     1152,     2304, 0xc55788ec
+0,   29808000,   29808000,     1152,     2304, 0x6b69788e
+0,   29834496,   29834496,     1152,     2304, 0xcacb8fea
+0,   29860992,   29860992,     1152,     2304, 0x83407ebd
+0,   29887488,   29887488,     1152,     2304, 0x998275f3
+0,   29913984,   29913984,     1152,     2304, 0x0ca76dcc
+0,   29940480,   29940480,     1152,     2304, 0xa1ff7c6a
+0,   29966976,   29966976,     1152,     2304, 0x4a597683
+0,   29993472,   29993472,     1152,     2304, 0xc8e57f16
+0,   30019968,   30019968,     1152,     2304, 0x376d82fb
+0,   30046464,   30046464,     1152,     2304, 0x489b8164
+0,   30072960,   30072960,     1152,     2304, 0xece27f77
+0,   30099456,   30099456,     1152,     2304, 0x4c197e8f
+0,   30125952,   30125952,     1152,     2304, 0x6eb07739
+0,   30152448,   30152448,     1152,     2304, 0xef628145
+0,   30178944,   30178944,     1152,     2304, 0x549d6f12
+0,   30205440,   30205440,     1152,     2304, 0xbe566b70
+0,   30231936,   30231936,     1152,     2304, 0x2bde87d3
+0,   30258432,   30258432,     1152,     2304, 0x0ffd7175
+0,   30284928,   30284928,     1152,     2304, 0x47d28a8b
+0,   30311424,   30311424,     1152,     2304, 0x6f6c718e
+0,   30337920,   30337920,     1152,     2304, 0x665e7937
+0,   30364416,   30364416,     1152,     2304, 0x73538e8d
+0,   30390912,   30390912,     1152,     2304, 0x56167d71
+0,   30417408,   30417408,     1152,     2304, 0x3d0278ed
+0,   30443904,   30443904,     1152,     2304, 0x9baf7a51
+0,   30470400,   30470400,     1152,     2304, 0x8c058105
+0,   30496896,   30496896,     1152,     2304, 0x2b9573ca
+0,   30523392,   30523392,     1152,     2304, 0x4a2271a1
+0,   30549888,   30549888,     1152,     2304, 0x3d5276f6
+0,   30576384,   30576384,     1152,     2304, 0x43c076cd
+0,   30602880,   30602880,     1152,     2304, 0x4fec8929
+0,   30629376,   30629376,     1152,     2304, 0xaa7167ec
+0,   30655872,   30655872,     1152,     2304, 0x39ff7ea5
+0,   30682368,   30682368,     1152,     2304, 0x2dd58348
+0,   30708864,   30708864,     1152,     2304, 0x03fc8782
+0,   30735360,   30735360,     1152,     2304, 0x55a4649c
+0,   30761856,   30761856,     1152,     2304, 0x71407067
+0,   30788352,   30788352,     1152,     2304, 0x3b667ce6
+0,   30814848,   30814848,     1152,     2304, 0xb12e81aa
+0,   30841344,   30841344,     1152,     2304, 0xc1646ee3
+0,   30867840,   30867840,     1152,     2304, 0x6b55884f
+0,   30894336,   30894336,     1152,     2304, 0x292382b5
+0,   30920832,   30920832,     1152,     2304, 0x18f97854
+0,   30947328,   30947328,     1152,     2304, 0x47a480be
+0,   30973824,   30973824,     1152,     2304, 0xe0857b16
+0,   31000320,   31000320,     1152,     2304, 0xe3368462
+0,   31026816,   31026816,     1152,     2304, 0xced17ae8
+0,   31053312,   31053312,     1152,     2304, 0x4020796d
+0,   31079808,   31079808,     1152,     2304, 0x26797fd0
+0,   31106304,   31106304,     1152,     2304, 0x0e087554
+0,   31132800,   31132800,     1152,     2304, 0xf8868040
+0,   31159296,   31159296,     1152,     2304, 0x3f0a7784
+0,   31185792,   31185792,     1152,     2304, 0xbe517d5e
+0,   31212288,   31212288,     1152,     2304, 0xed0b66bc
+0,   31238784,   31238784,     1152,     2304, 0x79b96d63
+0,   31265280,   31265280,     1152,     2304, 0x1ab4793a
+0,   31291776,   31291776,     1152,     2304, 0xd2db7f7d
+0,   31318272,   31318272,     1152,     2304, 0x91017992
+0,   31344768,   31344768,     1152,     2304, 0xf88f7cb9
+0,   31371264,   31371264,     1152,     2304, 0x0cc98309
+0,   31397760,   31397760,     1152,     2304, 0x38b77cb8
+0,   31424256,   31424256,     1152,     2304, 0x0cb185f9
+0,   31450752,   31450752,     1152,     2304, 0xc4a27824
+0,   31477248,   31477248,     1152,     2304, 0x1ce97618
+0,   31503744,   31503744,     1152,     2304, 0x1423976e
+0,   31530240,   31530240,     1152,     2304, 0xdbb781b2
+0,   31556736,   31556736,     1152,     2304, 0x156d71d3
+0,   31583232,   31583232,     1152,     2304, 0xce228625
+0,   31609728,   31609728,     1152,     2304, 0x318072f3
+0,   31636224,   31636224,     1152,     2304, 0x464b77a1
+0,   31662720,   31662720,     1152,     2304, 0x0204854d
+0,   31689216,   31689216,     1152,     2304, 0x498c8373
+0,   31715712,   31715712,     1152,     2304, 0xfe1a6e23
+0,   31742208,   31742208,     1152,     2304, 0x705284d0
+0,   31768704,   31768704,     1152,     2304, 0x91848980
+0,   31795200,   31795200,     1152,     2304, 0x259098c5
+0,   31821696,   31821696,     1152,     2304, 0x52b07c02
+0,   31848192,   31848192,     1152,     2304, 0x017289f6
+0,   31874688,   31874688,     1152,     2304, 0xb87687bf
+0,   31901184,   31901184,     1152,     2304, 0x72de7843
+0,   31927680,   31927680,     1152,     2304, 0x3daf8939
+0,   31954176,   31954176,     1152,     2304, 0x0dfb763c
+0,   31980672,   31980672,     1152,     2304, 0xc3017408
+0,   32007168,   32007168,     1152,     2304, 0x73f27f14
+0,   32033664,   32033664,     1152,     2304, 0x1c517541
+0,   32060160,   32060160,     1152,     2304, 0xf390748b
+0,   32086656,   32086656,     1152,     2304, 0xe6ac71f3
+0,   32113152,   32113152,     1152,     2304, 0x6277862d
+0,   32139648,   32139648,     1152,     2304, 0x923d6794
+0,   32166144,   32166144,     1152,     2304, 0x320a7f20
+0,   32192640,   32192640,     1152,     2304, 0x12f07a09
+0,   32219136,   32219136,     1152,     2304, 0xf9568b14
+0,   32245632,   32245632,     1152,     2304, 0x4d787fea
+0,   32272128,   32272128,     1152,     2304, 0x57c5861a
+0,   32298624,   32298624,     1152,     2304, 0x826b7cc7
+0,   32325120,   32325120,     1152,     2304, 0x7d056a94
+0,   32351616,   32351616,     1152,     2304, 0xbf3e8a60
+0,   32378112,   32378112,     1152,     2304, 0x5e518cab
+0,   32404608,   32404608,     1152,     2304, 0x64e7794b
+0,   32431104,   32431104,     1152,     2304, 0x9c9569ac
+0,   32457600,   32457600,     1152,     2304, 0xde167582
+0,   32484096,   32484096,     1152,     2304, 0xdd8f78b1
+0,   32510592,   32510592,     1152,     2304, 0xec577e60
+0,   32537088,   32537088,     1152,     2304, 0x06877aa4
+0,   32563584,   32563584,     1152,     2304, 0xb5157742
+0,   32590080,   32590080,     1152,     2304, 0xf9be6f2b
+0,   32616576,   32616576,     1152,     2304, 0xf95784c7
+0,   32643072,   32643072,     1152,     2304, 0x5aec7e15
+0,   32669568,   32669568,     1152,     2304, 0x666e7ce9
+0,   32696064,   32696064,     1152,     2304, 0xa76393a7
+0,   32722560,   32722560,     1152,     2304, 0xd1bb6d5c
+0,   32749056,   32749056,     1152,     2304, 0x48d88144
+0,   32775552,   32775552,     1152,     2304, 0xa7df8177
+0,   32802048,   32802048,     1152,     2304, 0x906476b1
+0,   32828544,   32828544,     1152,     2304, 0x9f46723d
+0,   32855040,   32855040,     1152,     2304, 0xa6d37a0c
+0,   32881536,   32881536,     1152,     2304, 0x73e57be2
+0,   32908032,   32908032,     1152,     2304, 0x666e8594
+0,   32934528,   32934528,     1152,     2304, 0xd4b481d7
+0,   32961024,   32961024,     1152,     2304, 0xfced8427
+0,   32987520,   32987520,     1152,     2304, 0x743999bd
+0,   33014016,   33014016,     1152,     2304, 0x71dd759b
+0,   33040512,   33040512,     1152,     2304, 0x54a2849e
+0,   33067008,   33067008,     1152,     2304, 0x07377f58
+0,   33093504,   33093504,     1152,     2304, 0xfd8f721a
+0,   33120000,   33120000,     1152,     2304, 0x4d917422
+0,   33146496,   33146496,     1152,     2304, 0xaaa46e2b
+0,   33172992,   33172992,     1152,     2304, 0x49e57597
+0,   33199488,   33199488,     1152,     2304, 0x29337511
+0,   33225984,   33225984,     1152,     2304, 0xdfee84c2
+0,   33252480,   33252480,     1152,     2304, 0x57266b8c
+0,   33278976,   33278976,     1152,     2304, 0x5bf27cfc
+0,   33305472,   33305472,     1152,     2304, 0xdc4b8b29
+0,   33331968,   33331968,     1152,     2304, 0xf49e7466
+0,   33358464,   33358464,     1152,     2304, 0xf1d975f3
+0,   33384960,   33384960,     1152,     2304, 0xc863774d
+0,   33411456,   33411456,     1152,     2304, 0x5fab80c0
+0,   33437952,   33437952,     1152,     2304, 0x19548d6f
+0,   33464448,   33464448,     1152,     2304, 0x165983ac
+0,   33490944,   33490944,     1152,     2304, 0x2d658203
+0,   33517440,   33517440,     1152,     2304, 0x717678b2
+0,   33543936,   33543936,     1152,     2304, 0x1ed26dae
+0,   33570432,   33570432,     1152,     2304, 0xecef7975
+0,   33596928,   33596928,     1152,     2304, 0xcd2b804c
+0,   33623424,   33623424,     1152,     2304, 0xf4597f97
+0,   33649920,   33649920,     1152,     2304, 0x904b75cd
+0,   33676416,   33676416,     1152,     2304, 0x6b508374
+0,   33702912,   33702912,     1152,     2304, 0xb6b4791f
+0,   33729408,   33729408,     1152,     2304, 0xce6d7501
+0,   33755904,   33755904,     1152,     2304, 0x99ec8bac
+0,   33782400,   33782400,     1152,     2304, 0x65d772b7
+0,   33808896,   33808896,     1152,     2304, 0xbc2762ff
+0,   33835392,   33835392,     1152,     2304, 0xf8a48378
+0,   33861888,   33861888,     1152,     2304, 0x18057ad4
+0,   33888384,   33888384,     1152,     2304, 0x89af753f
+0,   33914880,   33914880,     1152,     2304, 0x95a48fc5
+0,   33941376,   33941376,     1152,     2304, 0x8b7576af
+0,   33967872,   33967872,     1152,     2304, 0xcdcb6e67
+0,   33994368,   33994368,     1152,     2304, 0xe02a7ff3
+0,   34020864,   34020864,     1152,     2304, 0x4f0a7d48
+0,   34047360,   34047360,     1152,     2304, 0x9bab8287
+0,   34073856,   34073856,     1152,     2304, 0xc1fa8063
+0,   34100352,   34100352,     1152,     2304, 0x410e7dd7
+0,   34126848,   34126848,     1152,     2304, 0xd4de777f
+0,   34153344,   34153344,     1152,     2304, 0x403675ac
+0,   34179840,   34179840,     1152,     2304, 0x4d7687c5
+0,   34206336,   34206336,     1152,     2304, 0x10c6650d
+0,   34232832,   34232832,     1152,     2304, 0xd7e67b69
+0,   34259328,   34259328,     1152,     2304, 0x42b97fae
+0,   34285824,   34285824,     1152,     2304, 0xa8b57a9f
+0,   34312320,   34312320,     1152,     2304, 0xc24960a0
+0,   34338816,   34338816,     1152,     2304, 0x16198fec
+0,   34365312,   34365312,     1152,     2304, 0x29297416
+0,   34391808,   34391808,     1152,     2304, 0x129b7d00
+0,   34418304,   34418304,     1152,     2304, 0xa9677141
+0,   34444800,   34444800,     1152,     2304, 0x86a779bf
+0,   34471296,   34471296,     1152,     2304, 0xa3857dce
+0,   34497792,   34497792,     1152,     2304, 0xee6d7c4d
+0,   34524288,   34524288,     1152,     2304, 0x97127166
+0,   34550784,   34550784,     1152,     2304, 0x6e407c25
+0,   34577280,   34577280,     1152,     2304, 0x82a07c9a
+0,   34603776,   34603776,     1152,     2304, 0xf62381ef
+0,   34630272,   34630272,     1152,     2304, 0x2ab86ca8
+0,   34656768,   34656768,     1152,     2304, 0x97ca87ed
+0,   34683264,   34683264,     1152,     2304, 0x6b4868f2
+0,   34709760,   34709760,     1152,     2304, 0xdb2a953c
+0,   34736256,   34736256,     1152,     2304, 0x061e7770
+0,   34762752,   34762752,     1152,     2304, 0xae067224
+0,   34789248,   34789248,     1152,     2304, 0x88a282a3
+0,   34815744,   34815744,     1152,     2304, 0x4e14743e
+0,   34842240,   34842240,     1152,     2304, 0x3dbd8b7b
+0,   34868736,   34868736,     1152,     2304, 0xcae57312
+0,   34895232,   34895232,     1152,     2304, 0xf9038ca7
+0,   34921728,   34921728,     1152,     2304, 0x03ec6f07
+0,   34948224,   34948224,     1152,     2304, 0x8c297359
+0,   34974720,   34974720,     1152,     2304, 0x8af1804c
+0,   35001216,   35001216,     1152,     2304, 0xf30087c8
+0,   35027712,   35027712,     1152,     2304, 0x21127cd1
+0,   35054208,   35054208,     1152,     2304, 0xc66772bd
+0,   35080704,   35080704,     1152,     2304, 0x05e6630b
+0,   35107200,   35107200,     1152,     2304, 0x020688d9
+0,   35133696,   35133696,     1152,     2304, 0xee3873cd
+0,   35160192,   35160192,     1152,     2304, 0xf263782b
+0,   35186688,   35186688,     1152,     2304, 0xf8877144
+0,   35213184,   35213184,     1152,     2304, 0xce1e6f33
+0,   35239680,   35239680,     1152,     2304, 0xf0457e4f
+0,   35266176,   35266176,     1152,     2304, 0xde9387df
+0,   35292672,   35292672,     1152,     2304, 0x5df86d1d
+0,   35319168,   35319168,     1152,     2304, 0x949087ef
+0,   35345664,   35345664,     1152,     2304, 0xdbd269c6
+0,   35372160,   35372160,     1152,     2304, 0xd530806e
+0,   35398656,   35398656,     1152,     2304, 0x80027b1d
+0,   35425152,   35425152,     1152,     2304, 0xccb47c48
+0,   35451648,   35451648,     1152,     2304, 0x4f2f8326
+0,   35478144,   35478144,     1152,     2304, 0xfe046f23
+0,   35504640,   35504640,     1152,     2304, 0x5939759b
+0,   35531136,   35531136,     1152,     2304, 0x83147073
+0,   35557632,   35557632,     1152,     2304, 0xc9e77b95
+0,   35584128,   35584128,     1152,     2304, 0x702d8e51
+0,   35610624,   35610624,     1152,     2304, 0x5f5f8492
+0,   35637120,   35637120,     1152,     2304, 0x10116d0e
+0,   35663616,   35663616,     1152,     2304, 0xb8587894
+0,   35690112,   35690112,     1152,     2304, 0x2b357892
+0,   35716608,   35716608,     1152,     2304, 0x160e6c20
+0,   35743104,   35743104,     1152,     2304, 0x0534874b
+0,   35769600,   35769600,     1152,     2304, 0xde687064
+0,   35796096,   35796096,     1152,     2304, 0x91ef7fe4
+0,   35822592,   35822592,     1152,     2304, 0x077473ce
+0,   35849088,   35849088,     1152,     2304, 0x1be77777
+0,   35875584,   35875584,     1152,     2304, 0xaada9096
+0,   35902080,   35902080,     1152,     2304, 0xeac96f7e
+0,   35928576,   35928576,     1152,     2304, 0xe9ca6e72
+0,   35955072,   35955072,     1152,     2304, 0xd0868038
+0,   35981568,   35981568,     1152,     2304, 0x689176f1
+0,   36008064,   36008064,     1152,     2304, 0xdfdb7684
+0,   36034560,   36034560,     1152,     2304, 0x83c55d92
+0,   36061056,   36061056,     1152,     2304, 0x77587b70
+0,   36087552,   36087552,     1152,     2304, 0x60937941
+0,   36114048,   36114048,     1152,     2304, 0xb00267bd
+0,   36140544,   36140544,     1152,     2304, 0xd0a166eb
+0,   36167040,   36167040,     1152,     2304, 0x88718162
+0,   36193536,   36193536,     1152,     2304, 0xf9e084ff
+0,   36220032,   36220032,     1152,     2304, 0x2ddc8c08
+0,   36246528,   36246528,     1152,     2304, 0x843a7178
+0,   36273024,   36273024,     1152,     2304, 0x1d9784b4
+0,   36299520,   36299520,     1152,     2304, 0x98aa7b1d
+0,   36326016,   36326016,     1152,     2304, 0x583e846e
+0,   36352512,   36352512,     1152,     2304, 0xa6d284ec
+0,   36379008,   36379008,     1152,     2304, 0xa0147cbf
+0,   36405504,   36405504,     1152,     2304, 0xb3bf7b4b
+0,   36432000,   36432000,     1152,     2304, 0x7f8b78dd
+0,   36458496,   36458496,     1152,     2304, 0xd3fb761f
+0,   36484992,   36484992,     1152,     2304, 0x78147c2f
+0,   36511488,   36511488,     1152,     2304, 0x5e747b26
+0,   36537984,   36537984,     1152,     2304, 0x0d8e7d96
+0,   36564480,   36564480,     1152,     2304, 0x73728362
+0,   36590976,   36590976,     1152,     2304, 0x7c54707b
+0,   36617472,   36617472,     1152,     2304, 0xb89c7806
+0,   36643968,   36643968,     1152,     2304, 0x5f0f7a91
+0,   36670464,   36670464,     1152,     2304, 0x35217e84
+0,   36696960,   36696960,     1152,     2304, 0xb3798553
+0,   36723456,   36723456,     1152,     2304, 0xd8377e8a
+0,   36749952,   36749952,     1152,     2304, 0x339181ba
+0,   36776448,   36776448,     1152,     2304, 0xbe196b2c
+0,   36802944,   36802944,     1152,     2304, 0xdc708cc4
+0,   36829440,   36829440,     1152,     2304, 0x649c74c1
+0,   36855936,   36855936,     1152,     2304, 0xac128649
+0,   36882432,   36882432,     1152,     2304, 0xcd266a22
+0,   36908928,   36908928,     1152,     2304, 0xdfde7023
+0,   36935424,   36935424,     1152,     2304, 0xf92d7450
+0,   36961920,   36961920,     1152,     2304, 0x9ce379ac
+0,   36988416,   36988416,     1152,     2304, 0xee5e712e
+0,   37014912,   37014912,     1152,     2304, 0xa12676c0
+0,   37041408,   37041408,     1152,     2304, 0x4cee7be1
+0,   37067904,   37067904,     1152,     2304, 0x90c881e5
+0,   37094400,   37094400,     1152,     2304, 0xa6a78158
+0,   37120896,   37120896,     1152,     2304, 0xd248864a
+0,   37147392,   37147392,     1152,     2304, 0x7a8972b4
+0,   37173888,   37173888,     1152,     2304, 0x5c8a75cb
+0,   37200384,   37200384,     1152,     2304, 0x07606722
+0,   37226880,   37226880,     1152,     2304, 0xbf497aae
+0,   37253376,   37253376,     1152,     2304, 0xffab7d66
+0,   37279872,   37279872,     1152,     2304, 0xefdc7d05
+0,   37306368,   37306368,     1152,     2304, 0x02d686dd
+0,   37332864,   37332864,     1152,     2304, 0x852e6eb6
+0,   37359360,   37359360,     1152,     2304, 0x3445706b
+0,   37385856,   37385856,     1152,     2304, 0x04677828
+0,   37412352,   37412352,     1152,     2304, 0x5d5c833c
+0,   37438848,   37438848,     1152,     2304, 0xf4277e06
+0,   37465344,   37465344,     1152,     2304, 0xcece72ab
+0,   37491840,   37491840,     1152,     2304, 0x10837aff
+0,   37518336,   37518336,     1152,     2304, 0x0265762b
+0,   37544832,   37544832,     1152,     2304, 0x0fc979de
+0,   37571328,   37571328,     1152,     2304, 0xeecf7010
+0,   37597824,   37597824,     1152,     2304, 0x313975cd
+0,   37624320,   37624320,     1152,     2304, 0xccce84a7
+0,   37650816,   37650816,     1152,     2304, 0x823e6935
+0,   37677312,   37677312,     1152,     2304, 0xa0606ade
+0,   37703808,   37703808,     1152,     2304, 0x2c2d7866
+0,   37730304,   37730304,     1152,     2304, 0x3a5a781e
+0,   37756800,   37756800,     1152,     2304, 0xdaf677fd
+0,   37783296,   37783296,     1152,     2304, 0x79d88aca
+0,   37809792,   37809792,     1152,     2304, 0x3db27308
+0,   37836288,   37836288,     1152,     2304, 0x627175ff
+0,   37862784,   37862784,     1152,     2304, 0x40697b16
+0,   37889280,   37889280,     1152,     2304, 0x40e47ee7
+0,   37915776,   37915776,     1152,     2304, 0x6a6a7d91
+0,   37942272,   37942272,     1152,     2304, 0x8d3e8a32
+0,   37968768,   37968768,     1152,     2304, 0x576a817e
+0,   37995264,   37995264,     1152,     2304, 0x86287f9b
+0,   38021760,   38021760,     1152,     2304, 0x43e081ce
+0,   38048256,   38048256,     1152,     2304, 0xe44b8003
+0,   38074752,   38074752,     1152,     2304, 0xfb237f5b
+0,   38101248,   38101248,     1152,     2304, 0x28967c8a
+0,   38127744,   38127744,     1152,     2304, 0x97478829
+0,   38154240,   38154240,     1152,     2304, 0x615570e8
+0,   38180736,   38180736,     1152,     2304, 0xa52c7d34
+0,   38207232,   38207232,     1152,     2304, 0xb55473e8
+0,   38233728,   38233728,     1152,     2304, 0x08a07d6c
+0,   38260224,   38260224,     1152,     2304, 0x2eed916a
+0,   38286720,   38286720,     1152,     2304, 0x8ce36cec
+0,   38313216,   38313216,     1152,     2304, 0x775e8992
+0,   38339712,   38339712,     1152,     2304, 0x99ad816b
+0,   38366208,   38366208,     1152,     2304, 0x697a70be
+0,   38392704,   38392704,     1152,     2304, 0x38c46cf2
+0,   38419200,   38419200,     1152,     2304, 0xf9d681f4
+0,   38445696,   38445696,     1152,     2304, 0xa7336fea
+0,   38472192,   38472192,     1152,     2304, 0xd2a97e59
+0,   38498688,   38498688,     1152,     2304, 0x02b880e9
+0,   38525184,   38525184,     1152,     2304, 0x2a486e9e
+0,   38551680,   38551680,     1152,     2304, 0x5f197c43
+0,   38578176,   38578176,     1152,     2304, 0x266679ad
+0,   38604672,   38604672,     1152,     2304, 0x7cfd7c8a
+0,   38631168,   38631168,     1152,     2304, 0x43b681bb
+0,   38657664,   38657664,     1152,     2304, 0x63309cff
+0,   38684160,   38684160,     1152,     2304, 0x80f36f7d
+0,   38710656,   38710656,     1152,     2304, 0xcead7b98
+0,   38737152,   38737152,     1152,     2304, 0x8475992f
+0,   38763648,   38763648,     1152,     2304, 0x746782d2
+0,   38790144,   38790144,     1152,     2304, 0x2b486982
+0,   38816640,   38816640,     1152,     2304, 0xc80c7fc2
+0,   38843136,   38843136,     1152,     2304, 0x148c7c3b
+0,   38869632,   38869632,     1152,     2304, 0x49c477df
+0,   38896128,   38896128,     1152,     2304, 0x1bf97c56
+0,   38922624,   38922624,     1152,     2304, 0x89a38221
+0,   38949120,   38949120,     1152,     2304, 0xcb757a31
+0,   38975616,   38975616,     1152,     2304, 0xd3bb72fa
+0,   39002112,   39002112,     1152,     2304, 0xe5727650
+0,   39028608,   39028608,     1152,     2304, 0x5d4e9242
+0,   39055104,   39055104,     1152,     2304, 0x29207f75
+0,   39081600,   39081600,     1152,     2304, 0x5b1d954a
+0,   39108096,   39108096,     1152,     2304, 0xa2888223
+0,   39134592,   39134592,     1152,     2304, 0xe31d8af7
+0,   39161088,   39161088,     1152,     2304, 0x29ee82dd
+0,   39187584,   39187584,     1152,     2304, 0x4b1d7887
+0,   39214080,   39214080,     1152,     2304, 0xc0308236
+0,   39240576,   39240576,     1152,     2304, 0xaed68e92
+0,   39267072,   39267072,     1152,     2304, 0xbc1171e9
+0,   39293568,   39293568,     1152,     2304, 0x379f828b
+0,   39320064,   39320064,     1152,     2304, 0x83de72c8
+0,   39346560,   39346560,     1152,     2304, 0xb11a806a
+0,   39373056,   39373056,     1152,     2304, 0x7ac17e7a
+0,   39399552,   39399552,     1152,     2304, 0x031b7d9a
+0,   39426048,   39426048,     1152,     2304, 0xebc97c43
+0,   39452544,   39452544,     1152,     2304, 0x914e86a7
+0,   39479040,   39479040,     1152,     2304, 0x88e2747e
+0,   39505536,   39505536,     1152,     2304, 0x153076bc
+0,   39532032,   39532032,     1152,     2304, 0xdacc7ebb
+0,   39558528,   39558528,     1152,     2304, 0x0905709a
+0,   39585024,   39585024,     1152,     2304, 0x13358109
+0,   39611520,   39611520,     1152,     2304, 0x18227228
+0,   39638016,   39638016,     1152,     2304, 0x318082d7
+0,   39664512,   39664512,     1152,     2304, 0x8aec7a8e
+0,   39691008,   39691008,     1152,     2304, 0x8c9c7755
+0,   39717504,   39717504,     1152,     2304, 0xdba46e2f
+0,   39744000,   39744000,     1152,     2304, 0xd47d7f5a
+0,   39770496,   39770496,     1152,     2304, 0xeb1370ec
+0,   39796992,   39796992,     1152,     2304, 0xf6d68e74
+0,   39823488,   39823488,     1152,     2304, 0x3d5a7c66
+0,   39849984,   39849984,     1152,     2304, 0x264b6d68
+0,   39876480,   39876480,     1152,     2304, 0x7cde8b9f
+0,   39902976,   39902976,     1152,     2304, 0xba467347
+0,   39929472,   39929472,     1152,     2304, 0x921987f7
+0,   39955968,   39955968,     1152,     2304, 0x9c6c7fce
+0,   39982464,   39982464,     1152,     2304, 0x02cb828f
+0,   40008960,   40008960,     1152,     2304, 0x3b427077
+0,   40035456,   40035456,     1152,     2304, 0x6f9c7523
+0,   40061952,   40061952,     1152,     2304, 0xef688204
+0,   40088448,   40088448,     1152,     2304, 0x263782dc
+0,   40114944,   40114944,     1152,     2304, 0xb6ca7e46
+0,   40141440,   40141440,     1152,     2304, 0xb0929636
+0,   40167936,   40167936,     1152,     2304, 0x60a97b7a
+0,   40194432,   40194432,     1152,     2304, 0xcf037690
+0,   40220928,   40220928,     1152,     2304, 0xc9046a25
+0,   40247424,   40247424,     1152,     2304, 0xbf247f3c
+0,   40273920,   40273920,     1152,     2304, 0x71e77d34
+0,   40300416,   40300416,     1152,     2304, 0x96ce7a01
+0,   40326912,   40326912,     1152,     2304, 0xf9b07a27
+0,   40353408,   40353408,     1152,     2304, 0x984b830b
+0,   40379904,   40379904,     1152,     2304, 0xff11822b
+0,   40406400,   40406400,     1152,     2304, 0xb0a18578
+0,   40432896,   40432896,     1152,     2304, 0xa9d98ef8
+0,   40459392,   40459392,     1152,     2304, 0x4f9076e1
+0,   40485888,   40485888,     1152,     2304, 0xb63c7742
+0,   40512384,   40512384,     1152,     2304, 0xb4c06ae2
+0,   40538880,   40538880,     1152,     2304, 0x41738531
+0,   40565376,   40565376,     1152,     2304, 0x55125e7b

diff --git a/tests/ref/fate/filter-hue b/tests/ref/fate/filter-hue
deleted file mode 100644
index 2f1ae61..0000000
--- a/tests/ref/fate/filter-hue
+++ /dev/null

@@ -1 +0,0 @@
-hue                 57463dd9bc17156a51b704dd7271c863

diff --git a/tests/ref/fate/filter-hue1 b/tests/ref/fate/filter-hue1
new file mode 100644
index 0000000..7059ba7
--- /dev/null
+++ b/tests/ref/fate/filter-hue1

@@ -0,0 +1 @@
+hue1                57463dd9bc17156a51b704dd7271c863

diff --git a/tests/ref/fate/filter-hue2 b/tests/ref/fate/filter-hue2
new file mode 100644
index 0000000..2e060e1
--- /dev/null
+++ b/tests/ref/fate/filter-hue2

@@ -0,0 +1 @@
+hue2                e951ae8ffb279552abc4354c4de7cb5b

diff --git a/tests/ref/fate/filter-hue3 b/tests/ref/fate/filter-hue3
new file mode 100644
index 0000000..14a8379
--- /dev/null
+++ b/tests/ref/fate/filter-hue3

@@ -0,0 +1 @@
+hue3                149dc22f5dc9f578f7d281dc3120c09b

diff --git a/tests/ref/fate/filter-hue4 b/tests/ref/fate/filter-hue4
new file mode 100644
index 0000000..2a08c33
--- /dev/null
+++ b/tests/ref/fate/filter-hue4

@@ -0,0 +1 @@
+hue4                6279ed43527e7b5be645819e08880107

diff --git a/tests/ref/fate/filter-mcdeint-fast b/tests/ref/fate/filter-mcdeint-fast
index 228be5e..e4c2f8a 100644
--- a/tests/ref/fate/filter-mcdeint-fast
+++ b/tests/ref/fate/filter-mcdeint-fast

@@ -3,33 +3,33 @@
 #codec_id 0: rawvideo
 #dimensions 0: 720x576
 #sar 0: 16/15
-0,          9,          9,        1,   622080, 0xb3b66c5c
-0,         10,         10,        1,   622080, 0xc6568bd7
-0,         11,         11,        1,   622080, 0xa5b543c3
-0,         12,         12,        1,   622080, 0x4095ac51
-0,         13,         13,        1,   622080, 0xccd8c1d9
-0,         14,         14,        1,   622080, 0x84a88f22
-0,         15,         15,        1,   622080, 0x7273c26b
-0,         16,         16,        1,   622080, 0xac188c41
-0,         17,         17,        1,   622080, 0xf32f6fb4
-0,         18,         18,        1,   622080, 0xd696ccce
-0,         19,         19,        1,   622080, 0x9778a418
-0,         20,         20,        1,   622080, 0xf2b5be2e
-0,         21,         21,        1,   622080, 0x653ee12a
-0,         22,         22,        1,   622080, 0xe7fce188
-0,         23,         23,        1,   622080, 0x6e9f1deb
-0,         24,         24,        1,   622080, 0x33090aac
-0,         25,         25,        1,   622080, 0x840a57f1
-0,         26,         26,        1,   622080, 0x635e430a
-0,         27,         27,        1,   622080, 0x52f98809
-0,         28,         28,        1,   622080, 0xc567b6a5
-0,         29,         29,        1,   622080, 0x4134f583
-0,         30,         30,        1,   622080, 0xd02a73bc
-0,         31,         31,        1,   622080, 0x763085d6
-0,         32,         32,        1,   622080, 0x77fdc7a6
-0,         33,         33,        1,   622080, 0x77f71b9f
-0,         34,         34,        1,   622080, 0x71c91244
-0,         35,         35,        1,   622080, 0xc7b86da5
-0,         36,         36,        1,   622080, 0x1edf8890
-0,         37,         37,        1,   622080, 0x03c82bec
-0,         38,         38,        1,   622080, 0x148b6a04
+0,          9,          9,        1,   622080, 0xff496bf5
+0,         10,         10,        1,   622080, 0x513c8bd9
+0,         11,         11,        1,   622080, 0x4e474368
+0,         12,         12,        1,   622080, 0x1248abe9
+0,         13,         13,        1,   622080, 0xa705c158
+0,         14,         14,        1,   622080, 0xf9048e95
+0,         15,         15,        1,   622080, 0x78b5c1a2
+0,         16,         16,        1,   622080, 0x0efa8be8
+0,         17,         17,        1,   622080, 0xd3396eac
+0,         18,         18,        1,   622080, 0x5870cbdd
+0,         19,         19,        1,   622080, 0x086fa311
+0,         20,         20,        1,   622080, 0x7ce9bced
+0,         21,         21,        1,   622080, 0xe7e0e0e1
+0,         22,         22,        1,   622080, 0x5af3e14b
+0,         23,         23,        1,   622080, 0xbf221d96
+0,         24,         24,        1,   622080, 0x43d90a62
+0,         25,         25,        1,   622080, 0x267a57b6
+0,         26,         26,        1,   622080, 0x88d942eb
+0,         27,         27,        1,   622080, 0x34ff87bf
+0,         28,         28,        1,   622080, 0xa849b5ec
+0,         29,         29,        1,   622080, 0x8302f51f
+0,         30,         30,        1,   622080, 0xac9e7315
+0,         31,         31,        1,   622080, 0x38b284fc
+0,         32,         32,        1,   622080, 0x1ff0c6c4
+0,         33,         33,        1,   622080, 0x50bf1ba5
+0,         34,         34,        1,   622080, 0xe9bd1240
+0,         35,         35,        1,   622080, 0x22116da3
+0,         36,         36,        1,   622080, 0x6f3e887a
+0,         37,         37,        1,   622080, 0x46b82bc5
+0,         38,         38,        1,   622080, 0xeaaf69ee

diff --git a/tests/ref/fate/filter-mcdeint-medium b/tests/ref/fate/filter-mcdeint-medium
index 05d1d72..1b02619 100644
--- a/tests/ref/fate/filter-mcdeint-medium
+++ b/tests/ref/fate/filter-mcdeint-medium

@@ -3,33 +3,33 @@
 #codec_id 0: rawvideo
 #dimensions 0: 720x576
 #sar 0: 16/15
-0,          9,          9,        1,   622080, 0xb3b66c5c
-0,         10,         10,        1,   622080, 0x26a29152
-0,         11,         11,        1,   622080, 0x787adddc
-0,         12,         12,        1,   622080, 0xcc52df08
-0,         13,         13,        1,   622080, 0x53dad126
-0,         14,         14,        1,   622080, 0xe1448652
-0,         15,         15,        1,   622080, 0x159fd353
-0,         16,         16,        1,   622080, 0xcbe893a0
-0,         17,         17,        1,   622080, 0x43a67c6b
-0,         18,         18,        1,   622080, 0xef30caf9
-0,         19,         19,        1,   622080, 0xa9cea62b
-0,         20,         20,        1,   622080, 0x4c4cada1
-0,         21,         21,        1,   622080, 0x8e91f6de
-0,         22,         22,        1,   622080, 0xb03ef044
-0,         23,         23,        1,   622080, 0x6b54262b
-0,         24,         24,        1,   622080, 0x911e0cea
-0,         25,         25,        1,   622080, 0x8320632c
-0,         26,         26,        1,   622080, 0x2bde42b2
-0,         27,         27,        1,   622080, 0xe9d988c3
-0,         28,         28,        1,   622080, 0xa9f0b1db
-0,         29,         29,        1,   622080, 0xb5bcf186
-0,         30,         30,        1,   622080, 0x469c6717
-0,         31,         31,        1,   622080, 0x2ca883e6
-0,         32,         32,        1,   622080, 0x4f5fba72
-0,         33,         33,        1,   622080, 0xa2e423ca
-0,         34,         34,        1,   622080, 0xc1fb0aaf
-0,         35,         35,        1,   622080, 0x96a879b8
-0,         36,         36,        1,   622080, 0x212e92e6
-0,         37,         37,        1,   622080, 0x9f26378a
-0,         38,         38,        1,   622080, 0xdeaf77ab
+0,          9,          9,        1,   622080, 0xff496bf5
+0,         10,         10,        1,   622080, 0xc0e4912c
+0,         11,         11,        1,   622080, 0xa8aedd7e
+0,         12,         12,        1,   622080, 0x2054deb9
+0,         13,         13,        1,   622080, 0x1005d0ca
+0,         14,         14,        1,   622080, 0x60f085dc
+0,         15,         15,        1,   622080, 0x4da0d261
+0,         16,         16,        1,   622080, 0x968e940e
+0,         17,         17,        1,   622080, 0x86687b04
+0,         18,         18,        1,   622080, 0xd63bc93d
+0,         19,         19,        1,   622080, 0x7ab0a6e6
+0,         20,         20,        1,   622080, 0x883dab85
+0,         21,         21,        1,   622080, 0x9f6ef6b5
+0,         22,         22,        1,   622080, 0xceccee25
+0,         23,         23,        1,   622080, 0x2aa823a5
+0,         24,         24,        1,   622080, 0xb20d0f48
+0,         25,         25,        1,   622080, 0x571560b9
+0,         26,         26,        1,   622080, 0xc0904764
+0,         27,         27,        1,   622080, 0xdb5b89c3
+0,         28,         28,        1,   622080, 0x707aadc5
+0,         29,         29,        1,   622080, 0x6383ef1b
+0,         30,         30,        1,   622080, 0xf9e56040
+0,         31,         31,        1,   622080, 0x86ce7ff0
+0,         32,         32,        1,   622080, 0x0c76bd84
+0,         33,         33,        1,   622080, 0xd7192781
+0,         34,         34,        1,   622080, 0x83b70cdc
+0,         35,         35,        1,   622080, 0xaae87453
+0,         36,         36,        1,   622080, 0xfafa92e2
+0,         37,         37,        1,   622080, 0x28323354
+0,         38,         38,        1,   622080, 0x34d47484

diff --git a/tests/ref/fate/filter-metadata-silencedetect b/tests/ref/fate/filter-metadata-silencedetect
index 4161287..16a9d07 100644
--- a/tests/ref/fate/filter-metadata-silencedetect
+++ b/tests/ref/fate/filter-metadata-silencedetect

@@ -1,512 +1,12 @@
-pkt_pts=0
-pkt_pts=320
-pkt_pts=640
-pkt_pts=960
-pkt_pts=1280
-pkt_pts=1600
-pkt_pts=1920|tag:lavfi.silence_start=0.02
-pkt_pts=2240
-pkt_pts=2560|tag:lavfi.silence_end=0.16|tag:lavfi.silence_duration=0.14
-pkt_pts=2880
-pkt_pts=3200
-pkt_pts=3520
-pkt_pts=3840
-pkt_pts=4160
-pkt_pts=4480
-pkt_pts=4800
-pkt_pts=5120
-pkt_pts=5440
-pkt_pts=5760|tag:lavfi.silence_start=0.26|tag:lavfi.silence_end=0.36|tag:lavfi.silence_duration=0.1
-pkt_pts=6080
-pkt_pts=6400
-pkt_pts=6720
-pkt_pts=7040
-pkt_pts=7360
-pkt_pts=7680
-pkt_pts=8000
-pkt_pts=8320
-pkt_pts=8640
-pkt_pts=8960
-pkt_pts=9280
-pkt_pts=9600
-pkt_pts=9920
-pkt_pts=10240
-pkt_pts=10560
-pkt_pts=10880
-pkt_pts=11200
-pkt_pts=11520
-pkt_pts=11840
-pkt_pts=12160
-pkt_pts=12480
-pkt_pts=12800
-pkt_pts=13120
-pkt_pts=13440
-pkt_pts=13760
-pkt_pts=14080
-pkt_pts=14400
-pkt_pts=14720
-pkt_pts=15040
-pkt_pts=15360
-pkt_pts=15680
-pkt_pts=16000
-pkt_pts=16320
-pkt_pts=16640
-pkt_pts=16960
-pkt_pts=17280
-pkt_pts=17600
-pkt_pts=17920
-pkt_pts=18240
-pkt_pts=18560
-pkt_pts=18880
-pkt_pts=19200
-pkt_pts=19520
-pkt_pts=19840
-pkt_pts=20160
-pkt_pts=20480
-pkt_pts=20800
-pkt_pts=21120
-pkt_pts=21440
-pkt_pts=21760
-pkt_pts=22080|tag:lavfi.silence_start=1.28
-pkt_pts=22400
-pkt_pts=22720
-pkt_pts=23040
-pkt_pts=23360
-pkt_pts=23680
-pkt_pts=24000
-pkt_pts=24320
-pkt_pts=24640
-pkt_pts=24960
-pkt_pts=25280
-pkt_pts=25600
-pkt_pts=25920
-pkt_pts=26240
-pkt_pts=26560
-pkt_pts=26880
-pkt_pts=27200
-pkt_pts=27520
-pkt_pts=27840
-pkt_pts=28160
-pkt_pts=28480
-pkt_pts=28800
-pkt_pts=29120
-pkt_pts=29440
-pkt_pts=29760
-pkt_pts=30080
-pkt_pts=30400
-pkt_pts=30720
-pkt_pts=31040
-pkt_pts=31360
-pkt_pts=31680|tag:lavfi.silence_end=1.98|tag:lavfi.silence_duration=0.7
-pkt_pts=32000
-pkt_pts=32320
-pkt_pts=32640
-pkt_pts=32960
-pkt_pts=33280
-pkt_pts=33600
-pkt_pts=33920
-pkt_pts=34240
-pkt_pts=34560
-pkt_pts=34880
-pkt_pts=35200
-pkt_pts=35520
-pkt_pts=35840
-pkt_pts=36160
-pkt_pts=36480
-pkt_pts=36800
-pkt_pts=37120
-pkt_pts=37440
-pkt_pts=37760
-pkt_pts=38080
-pkt_pts=38400
-pkt_pts=38720
-pkt_pts=39040
-pkt_pts=39360
-pkt_pts=39680
-pkt_pts=40000
-pkt_pts=40320
-pkt_pts=40640
-pkt_pts=40960
-pkt_pts=41280
-pkt_pts=41600
-pkt_pts=41920
-pkt_pts=42240
-pkt_pts=42560
-pkt_pts=42880
-pkt_pts=43200
-pkt_pts=43520
-pkt_pts=43840
-pkt_pts=44160
-pkt_pts=44480
-pkt_pts=44800
-pkt_pts=45120
-pkt_pts=45440
-pkt_pts=45760
-pkt_pts=46080
-pkt_pts=46400
-pkt_pts=46720
-pkt_pts=47040
-pkt_pts=47360
-pkt_pts=47680
-pkt_pts=48000
-pkt_pts=48320
-pkt_pts=48640
-pkt_pts=48960
-pkt_pts=49280
-pkt_pts=49600
-pkt_pts=49920
-pkt_pts=50240
-pkt_pts=50560
-pkt_pts=50880
-pkt_pts=51200
-pkt_pts=51520
-pkt_pts=51840
-pkt_pts=52160
-pkt_pts=52480
-pkt_pts=52800|tag:lavfi.silence_start=3.2
-pkt_pts=53120
-pkt_pts=53440
-pkt_pts=53760
-pkt_pts=54080
-pkt_pts=54400
-pkt_pts=54720
-pkt_pts=55040
-pkt_pts=55360
-pkt_pts=55680
-pkt_pts=56000
-pkt_pts=56320
-pkt_pts=56640
-pkt_pts=56960
-pkt_pts=57280
-pkt_pts=57600
-pkt_pts=57920
-pkt_pts=58240
-pkt_pts=58560
-pkt_pts=58880
-pkt_pts=59200
-pkt_pts=59520
-pkt_pts=59840
-pkt_pts=60160
-pkt_pts=60480
-pkt_pts=60800
-pkt_pts=61120
-pkt_pts=61440
-pkt_pts=61760
-pkt_pts=62080
-pkt_pts=62400|tag:lavfi.silence_end=3.9|tag:lavfi.silence_duration=0.7
-pkt_pts=62720
-pkt_pts=63040
-pkt_pts=63360
-pkt_pts=63680
-pkt_pts=64000
-pkt_pts=64320
-pkt_pts=64640
-pkt_pts=64960
-pkt_pts=65280
-pkt_pts=65600
-pkt_pts=65920
-pkt_pts=66240
-pkt_pts=66560
-pkt_pts=66880
-pkt_pts=67200
-pkt_pts=67520
-pkt_pts=67840
-pkt_pts=68160
-pkt_pts=68480
-pkt_pts=68800
-pkt_pts=69120
-pkt_pts=69440
-pkt_pts=69760
-pkt_pts=70080
-pkt_pts=70400
-pkt_pts=70720
-pkt_pts=71040
-pkt_pts=71360
-pkt_pts=71680
-pkt_pts=72000
-pkt_pts=72320
-pkt_pts=72640
-pkt_pts=72960
-pkt_pts=73280
-pkt_pts=73600
-pkt_pts=73920
-pkt_pts=74240
-pkt_pts=74560
-pkt_pts=74880
-pkt_pts=75200
-pkt_pts=75520
-pkt_pts=75840
-pkt_pts=76160
-pkt_pts=76480
-pkt_pts=76800
-pkt_pts=77120
-pkt_pts=77440
-pkt_pts=77760
-pkt_pts=78080
-pkt_pts=78400
-pkt_pts=78720
-pkt_pts=79040
-pkt_pts=79360
-pkt_pts=79680
-pkt_pts=80000|tag:lavfi.silence_start=4.9
-pkt_pts=80320
-pkt_pts=80640
-pkt_pts=80960
-pkt_pts=81280
-pkt_pts=81600
-pkt_pts=81920
-pkt_pts=82240
-pkt_pts=82560
-pkt_pts=82880
-pkt_pts=83200
-pkt_pts=83520
-pkt_pts=83840
-pkt_pts=84160
-pkt_pts=84480
-pkt_pts=84800
-pkt_pts=85120
-pkt_pts=85440
-pkt_pts=85760
-pkt_pts=86080
-pkt_pts=86400
-pkt_pts=86720
-pkt_pts=87040|tag:lavfi.silence_end=5.44|tag:lavfi.silence_duration=0.54
-pkt_pts=87360
-pkt_pts=87680
-pkt_pts=88000
-pkt_pts=88320
-pkt_pts=88640
-pkt_pts=88960
-pkt_pts=89280
-pkt_pts=89600
-pkt_pts=89920
-pkt_pts=90240
-pkt_pts=90560
-pkt_pts=90880
-pkt_pts=91200
-pkt_pts=91520
-pkt_pts=91840
+pkt_pts=0|tag:lavfi.silence_duration=0.523107|tag:lavfi.silence_end=0.690023|tag:lavfi.silence_start=0.736417
+pkt_pts=46080|tag:lavfi.silence_start=1.27626|tag:lavfi.silence_end=1.80751|tag:lavfi.silence_duration=0.531247
 pkt_pts=92160
-pkt_pts=92480
-pkt_pts=92800
-pkt_pts=93120
-pkt_pts=93440
-pkt_pts=93760
-pkt_pts=94080
-pkt_pts=94400
-pkt_pts=94720
-pkt_pts=95040
-pkt_pts=95360
-pkt_pts=95680
-pkt_pts=96000
-pkt_pts=96320
-pkt_pts=96640
-pkt_pts=96960
-pkt_pts=97280
-pkt_pts=97600
-pkt_pts=97920
-pkt_pts=98240
-pkt_pts=98560
-pkt_pts=98880
-pkt_pts=99200
-pkt_pts=99520
-pkt_pts=99840
-pkt_pts=100160
-pkt_pts=100480
-pkt_pts=100800
-pkt_pts=101120
-pkt_pts=101440
-pkt_pts=101760
-pkt_pts=102080
-pkt_pts=102400
-pkt_pts=102720
-pkt_pts=103040
-pkt_pts=103360
-pkt_pts=103680
-pkt_pts=104000
-pkt_pts=104320
-pkt_pts=104640|tag:lavfi.silence_start=6.44
-pkt_pts=104960
-pkt_pts=105280
-pkt_pts=105600
-pkt_pts=105920
-pkt_pts=106240
-pkt_pts=106560
-pkt_pts=106880
-pkt_pts=107200
-pkt_pts=107520
-pkt_pts=107840
-pkt_pts=108160
-pkt_pts=108480
-pkt_pts=108800
-pkt_pts=109120
-pkt_pts=109440
-pkt_pts=109760
-pkt_pts=110080
-pkt_pts=110400
-pkt_pts=110720
-pkt_pts=111040
-pkt_pts=111360
-pkt_pts=111680
-pkt_pts=112000
-pkt_pts=112320
-pkt_pts=112640
-pkt_pts=112960
-pkt_pts=113280
-pkt_pts=113600
-pkt_pts=113920
-pkt_pts=114240
-pkt_pts=114560
-pkt_pts=114880
-pkt_pts=115200
-pkt_pts=115520
-pkt_pts=115840
-pkt_pts=116160|tag:lavfi.silence_end=7.26|tag:lavfi.silence_duration=0.82
-pkt_pts=116480
-pkt_pts=116800
-pkt_pts=117120
-pkt_pts=117440
-pkt_pts=117760
-pkt_pts=118080
-pkt_pts=118400
-pkt_pts=118720
-pkt_pts=119040
-pkt_pts=119360
-pkt_pts=119680
-pkt_pts=120000
-pkt_pts=120320
-pkt_pts=120640
-pkt_pts=120960
-pkt_pts=121280
-pkt_pts=121600
-pkt_pts=121920
-pkt_pts=122240
-pkt_pts=122560
-pkt_pts=122880
-pkt_pts=123200
-pkt_pts=123520
-pkt_pts=123840
-pkt_pts=124160
-pkt_pts=124480
-pkt_pts=124800
-pkt_pts=125120
-pkt_pts=125440
-pkt_pts=125760
-pkt_pts=126080
-pkt_pts=126400
-pkt_pts=126720
-pkt_pts=127040
-pkt_pts=127360
-pkt_pts=127680
-pkt_pts=128000
-pkt_pts=128320
-pkt_pts=128640
-pkt_pts=128960
-pkt_pts=129280
-pkt_pts=129600
-pkt_pts=129920
-pkt_pts=130240
-pkt_pts=130560
-pkt_pts=130880
-pkt_pts=131200
-pkt_pts=131520
-pkt_pts=131840
-pkt_pts=132160
-pkt_pts=132480
-pkt_pts=132800
-pkt_pts=133120
-pkt_pts=133440
-pkt_pts=133760
-pkt_pts=134080
-pkt_pts=134400
-pkt_pts=134720
-pkt_pts=135040
-pkt_pts=135360
-pkt_pts=135680
-pkt_pts=136000
-pkt_pts=136320
-pkt_pts=136640
-pkt_pts=136960
-pkt_pts=137280
-pkt_pts=137600|tag:lavfi.silence_start=8.5
-pkt_pts=137920
 pkt_pts=138240
-pkt_pts=138560
-pkt_pts=138880|tag:lavfi.silence_end=8.68|tag:lavfi.silence_duration=0.18
-pkt_pts=139200
-pkt_pts=139520
-pkt_pts=139840
-pkt_pts=140160
-pkt_pts=140480|tag:lavfi.silence_start=8.68
-pkt_pts=140800
-pkt_pts=141120
-pkt_pts=141440
-pkt_pts=141760
-pkt_pts=142080
-pkt_pts=142400
-pkt_pts=142720
-pkt_pts=143040
-pkt_pts=143360
-pkt_pts=143680|tag:lavfi.silence_end=8.98|tag:lavfi.silence_duration=0.3
-pkt_pts=144000
-pkt_pts=144320
-pkt_pts=144640
-pkt_pts=144960
-pkt_pts=145280
-pkt_pts=145600
-pkt_pts=145920
-pkt_pts=146240
-pkt_pts=146560
-pkt_pts=146880
-pkt_pts=147200
-pkt_pts=147520
-pkt_pts=147840
-pkt_pts=148160
-pkt_pts=148480
-pkt_pts=148800
-pkt_pts=149120
-pkt_pts=149440
-pkt_pts=149760
-pkt_pts=150080
-pkt_pts=150400
-pkt_pts=150720
-pkt_pts=151040
-pkt_pts=151360
-pkt_pts=151680
-pkt_pts=152000
-pkt_pts=152320
-pkt_pts=152640
-pkt_pts=152960
-pkt_pts=153280
-pkt_pts=153600
-pkt_pts=153920
-pkt_pts=154240
-pkt_pts=154560
-pkt_pts=154880
-pkt_pts=155200
-pkt_pts=155520
-pkt_pts=155840
-pkt_pts=156160
-pkt_pts=156480
-pkt_pts=156800
-pkt_pts=157120
-pkt_pts=157440
-pkt_pts=157760
-pkt_pts=158080
-pkt_pts=158400
-pkt_pts=158720
-pkt_pts=159040
-pkt_pts=159360
-pkt_pts=159680
-pkt_pts=160000
-pkt_pts=160320
-pkt_pts=160640
-pkt_pts=160960
-pkt_pts=161280
-pkt_pts=161600|tag:lavfi.silence_start=10
-pkt_pts=161920
-pkt_pts=162240
-pkt_pts=162560
-pkt_pts=162880
-pkt_pts=163200
-pkt_pts=163520
+pkt_pts=184320
+pkt_pts=230400
+pkt_pts=276480
+pkt_pts=322560
+pkt_pts=368640
+pkt_pts=414720
+pkt_pts=460800
+pkt_pts=506880

diff --git a/tests/ref/fate/filter-pal100bars b/tests/ref/fate/filter-pal100bars
new file mode 100644
index 0000000..0ee8d22
--- /dev/null
+++ b/tests/ref/fate/filter-pal100bars

@@ -0,0 +1,10 @@
+#tb 0: 1/5
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+0,          0,          0,        1,   115200, 0x97a31f02
+0,          1,          1,        1,   115200, 0x97a31f02
+0,          2,          2,        1,   115200, 0x97a31f02
+0,          3,          3,        1,   115200, 0x97a31f02
+0,          4,          4,        1,   115200, 0x97a31f02

diff --git a/tests/ref/fate/filter-pal75bars b/tests/ref/fate/filter-pal75bars
new file mode 100644
index 0000000..a2d2e17
--- /dev/null
+++ b/tests/ref/fate/filter-pal75bars

@@ -0,0 +1,10 @@
+#tb 0: 1/5
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+0,          0,          0,        1,   115200, 0xa131179a
+0,          1,          1,        1,   115200, 0xa131179a
+0,          2,          2,        1,   115200, 0xa131179a
+0,          3,          3,        1,   115200, 0xa131179a
+0,          4,          4,        1,   115200, 0xa131179a

diff --git a/tests/ref/fate/filter-pan-downmix1 b/tests/ref/fate/filter-pan-downmix1
new file mode 100644
index 0000000..47d5044
--- /dev/null
+++ b/tests/ref/fate/filter-pan-downmix1

@@ -0,0 +1,26 @@
+#tb 0: 1/44100
+#media_type 0: audio
+#codec_id 0: pcm_s16le
+#sample_rate 0: 44100
+#channel_layout 0: 3
+#channel_layout_name 0: stereo
+0,          0,          0,      512,     2048, 0xccb10170
+0,        512,        512,      512,     2048, 0xc51cf8e7
+0,       1024,       1024,      512,     2048, 0x959eef2d
+0,       1536,       1536,      512,     2048, 0x559f0751
+0,       2048,       2048,      512,     2048, 0x4c4c08e1
+0,       2560,       2560,      512,     2048, 0x41c3fd6f
+0,       3072,       3072,      512,     2048, 0x4c92f8d2
+0,       3584,       3584,      512,     2048, 0x9defee05
+0,       4096,       4096,      512,     2048, 0x2d33f4dc
+0,       4608,       4608,      512,     2048, 0xd854eeca
+0,       5120,       5120,      512,     2048, 0xdb55eaf7
+0,       5632,       5632,      512,     2048, 0x2bac060d
+0,       6144,       6144,      512,     2048, 0x91beec0f
+0,       6656,       6656,      512,     2048, 0x3262f0da
+0,       7168,       7168,      512,     2048, 0xf00708a6
+0,       7680,       7680,      512,     2048, 0xc90cfa02
+0,       8192,       8192,      512,     2048, 0x09e9f413
+0,       8704,       8704,      512,     2048, 0xcd0d0c36
+0,       9216,       9216,      512,     2048, 0x6f6bff82
+0,       9728,       9728,      512,     2048, 0x18a40912

diff --git a/tests/ref/fate/filter-pan-downmix2 b/tests/ref/fate/filter-pan-downmix2
new file mode 100644
index 0000000..8d3ee10
--- /dev/null
+++ b/tests/ref/fate/filter-pan-downmix2

@@ -0,0 +1,26 @@
+#tb 0: 1/44100
+#media_type 0: audio
+#codec_id 0: pcm_s16le
+#sample_rate 0: 44100
+#channel_layout 0: 0
+#channel_layout_name 0: 5 channels
+0,          0,          0,      186,     1860, 0x2480a20e
+0,        186,        186,      186,     1860, 0xde9e9c40
+0,        372,        372,      186,     1860, 0xc497b2a0
+0,        558,        558,      186,     1860, 0xe5a8a2d7
+0,        744,        744,      186,     1860, 0x01c09b6a
+0,        930,        930,      186,     1860, 0xb26fae2f
+0,       1116,       1116,      186,     1860, 0x1292a9ab
+0,       1302,       1302,      186,     1860, 0xe7a79136
+0,       1488,       1488,      186,     1860, 0x57dc81a3
+0,       1674,       1674,      186,     1860, 0xc13b9a1b
+0,       1860,       1860,      186,     1860, 0xdf7c9fd6
+0,       2046,       2046,      186,     1860, 0x4d769cff
+0,       2232,       2232,      186,     1860, 0xb8b79977
+0,       2418,       2418,      186,     1860, 0xd287989f
+0,       2604,       2604,      186,     1860, 0xc2a38f03
+0,       2790,       2790,      186,     1860, 0xd7149438
+0,       2976,       2976,      186,     1860, 0xc3719081
+0,       3162,       3162,      186,     1860, 0xb9fba1a3
+0,       3348,       3348,      186,     1860, 0xe92cb2de
+0,       3534,       3534,      186,     1860, 0x9dd9b272

diff --git a/tests/ref/fate/filter-pan-mono1 b/tests/ref/fate/filter-pan-mono1
new file mode 100644
index 0000000..3bd7c25
--- /dev/null
+++ b/tests/ref/fate/filter-pan-mono1

@@ -0,0 +1,26 @@
+#tb 0: 1/44100
+#media_type 0: audio
+#codec_id 0: pcm_s16le
+#sample_rate 0: 44100
+#channel_layout 0: 4
+#channel_layout_name 0: mono
+0,          0,          0,     1024,     2048, 0x750f0a66
+0,       1024,       1024,     1024,     2048, 0x155cf063
+0,       2048,       2048,     1024,     2048, 0x1e43fc32
+0,       3072,       3072,     1024,     2048, 0x282ffc28
+0,       4096,       4096,     1024,     2048, 0x6d7bf000
+0,       5120,       5120,     1024,     2048, 0xc0b2f411
+0,       6144,       6144,     1024,     2048, 0xd711fb03
+0,       7168,       7168,     1024,     2048, 0x3164189c
+0,       8192,       8192,     1024,     2048, 0x8c69e827
+0,       9216,       9216,     1024,     2048, 0x562d0518
+0,      10240,      10240,     1024,     2048, 0x380aee27
+0,      11264,      11264,     1024,     2048, 0x990a03e4
+0,      12288,      12288,     1024,     2048, 0x68d7ef60
+0,      13312,      13312,     1024,     2048, 0xd13fef9e
+0,      14336,      14336,     1024,     2048, 0x009306e4
+0,      15360,      15360,     1024,     2048, 0x51850390
+0,      16384,      16384,     1024,     2048, 0xcd3ceeae
+0,      17408,      17408,     1024,     2048, 0x189ff277
+0,      18432,      18432,     1024,     2048, 0x4b98f68c
+0,      19456,      19456,     1024,     2048, 0x34eaf544

diff --git a/tests/ref/fate/filter-pan-mono2 b/tests/ref/fate/filter-pan-mono2
new file mode 100644
index 0000000..0867ca9
--- /dev/null
+++ b/tests/ref/fate/filter-pan-mono2

@@ -0,0 +1,26 @@
+#tb 0: 1/44100
+#media_type 0: audio
+#codec_id 0: pcm_s16le
+#sample_rate 0: 44100
+#channel_layout 0: 0
+#channel_layout_name 0: 1 channels
+0,          0,          0,     1024,     2048, 0x6130fb80
+0,       1024,       1024,     1024,     2048, 0xd5ef0930
+0,       2048,       2048,     1024,     2048, 0x40bce3f6
+0,       3072,       3072,     1024,     2048, 0x72e5d193
+0,       4096,       4096,     1024,     2048, 0xb005073f
+0,       5120,       5120,     1024,     2048, 0xa323fdbe
+0,       6144,       6144,     1024,     2048, 0xe5cbfe1e
+0,       7168,       7168,     1024,     2048, 0x4b42fe79
+0,       8192,       8192,     1024,     2048, 0x384eedea
+0,       9216,       9216,     1024,     2048, 0xe5cdf825
+0,      10240,      10240,     1024,     2048, 0xc2970ec0
+0,      11264,      11264,     1024,     2048, 0xa85fe5e0
+0,      12288,      12288,     1024,     2048, 0xfd51f2de
+0,      13312,      13312,     1024,     2048, 0xa1aafe30
+0,      14336,      14336,     1024,     2048, 0x8770fea2
+0,      15360,      15360,     1024,     2048, 0x67c50d76
+0,      16384,      16384,     1024,     2048, 0x7772fbc8
+0,      17408,      17408,     1024,     2048, 0xc48eff54
+0,      18432,      18432,     1024,     2048, 0x2e14f359
+0,      19456,      19456,     1024,     2048, 0x2df70a60

diff --git a/tests/ref/fate/filter-pan-stereo1 b/tests/ref/fate/filter-pan-stereo1
new file mode 100644
index 0000000..3125d4e
--- /dev/null
+++ b/tests/ref/fate/filter-pan-stereo1

@@ -0,0 +1,26 @@
+#tb 0: 1/44100
+#media_type 0: audio
+#codec_id 0: pcm_s16le
+#sample_rate 0: 44100
+#channel_layout 0: 3
+#channel_layout_name 0: stereo
+0,          0,          0,      682,     2728, 0xaf365458
+0,        682,        682,      682,     2728, 0xcd684898
+0,       1364,       1364,      682,     2728, 0x5d514ae5
+0,       2046,       2046,      682,     2728, 0x48cb4605
+0,       2728,       2728,      682,     2728, 0x76ac43ee
+0,       3410,       3410,      682,     2728, 0x088355fa
+0,       4092,       4092,      682,     2728, 0xf66f4efa
+0,       4774,       4774,      682,     2728, 0x7efc3b1a
+0,       5456,       5456,      682,     2728, 0x1c1745f3
+0,       6138,       6138,      682,     2728, 0x824d50fe
+0,       6820,       6820,      682,     2728, 0xb26c5b94
+0,       7502,       7502,      682,     2728, 0x02d5636d
+0,       8184,       8184,      682,     2728, 0x65e647de
+0,       8866,       8866,      682,     2728, 0x93374812
+0,       9548,       9548,      682,     2728, 0xa0d55153
+0,      10230,      10230,      682,     2728, 0x56cf392c
+0,      10912,      10912,      682,     2728, 0x554051c3
+0,      11594,      11594,      682,     2728, 0xbc3655ce
+0,      12276,      12276,      682,     2728, 0xb432529f
+0,      12958,      12958,      682,     2728, 0x64df52a7

diff --git a/tests/ref/fate/filter-pan-stereo2 b/tests/ref/fate/filter-pan-stereo2
new file mode 100644
index 0000000..7f96799
--- /dev/null
+++ b/tests/ref/fate/filter-pan-stereo2

@@ -0,0 +1,26 @@
+#tb 0: 1/44100
+#media_type 0: audio
+#codec_id 0: pcm_s16le
+#sample_rate 0: 44100
+#channel_layout 0: 3
+#channel_layout_name 0: stereo
+0,          0,          0,      682,     2728, 0x35c15e81
+0,        682,        682,      682,     2728, 0x770f2e2d
+0,       1364,       1364,      682,     2728, 0x6f8d6d9f
+0,       2046,       2046,      682,     2728, 0x5b9e46f3
+0,       2728,       2728,      682,     2728, 0xda7e5fdc
+0,       3410,       3410,      682,     2728, 0x55e446f8
+0,       4092,       4092,      682,     2728, 0x4fcf4f8e
+0,       4774,       4774,      682,     2728, 0x86e757b2
+0,       5456,       5456,      682,     2728, 0x8d4256e9
+0,       6138,       6138,      682,     2728, 0xa28d4e58
+0,       6820,       6820,      682,     2728, 0xacaa5738
+0,       7502,       7502,      682,     2728, 0xd1fe580f
+0,       8184,       8184,      682,     2728, 0x09a05c0a
+0,       8866,       8866,      682,     2728, 0xcaf2555d
+0,       9548,       9548,      682,     2728, 0xcee159fc
+0,      10230,      10230,      682,     2728, 0xaa3444a5
+0,      10912,      10912,      682,     2728, 0x23b76512
+0,      11594,      11594,      682,     2728, 0xef2243dd
+0,      12276,      12276,      682,     2728, 0x36486118
+0,      12958,      12958,      682,     2728, 0xfd59626c

diff --git a/tests/ref/fate/filter-pan-stereo3 b/tests/ref/fate/filter-pan-stereo3
new file mode 100644
index 0000000..effe11c
--- /dev/null
+++ b/tests/ref/fate/filter-pan-stereo3

@@ -0,0 +1,26 @@
+#tb 0: 1/44100
+#media_type 0: audio
+#codec_id 0: pcm_s16le
+#sample_rate 0: 44100
+#channel_layout 0: 3
+#channel_layout_name 0: stereo
+0,          0,          0,     1024,     4096, 0xa0d1fbb3
+0,       1024,       1024,     1024,     4096, 0x598a056c
+0,       2048,       2048,     1024,     4096, 0x741ded28
+0,       3072,       3072,     1024,     4096, 0xc651b0ec
+0,       4096,       4096,     1024,     4096, 0x1082057c
+0,       5120,       5120,     1024,     4096, 0x70f7f00d
+0,       6144,       6144,     1024,     4096, 0xaed7fc53
+0,       7168,       7168,     1024,     4096, 0x4250faae
+0,       8192,       8192,     1024,     4096, 0xf7fcf61a
+0,       9216,       9216,     1024,     4096, 0xb1350562
+0,      10240,      10240,     1024,     4096, 0x16adea0b
+0,      11264,      11264,     1024,     4096, 0x706fd834
+0,      12288,      12288,     1024,     4096, 0x5431dd24
+0,      13312,      13312,     1024,     4096, 0xfaedfb73
+0,      14336,      14336,     1024,     4096, 0xee3d07e2
+0,      15360,      15360,     1024,     4096, 0x2561eeb8
+0,      16384,      16384,     1024,     4096, 0x8f76fc05
+0,      17408,      17408,     1024,     4096, 0xef05f0a1
+0,      18432,      18432,     1024,     4096, 0x4e92f19a
+0,      19456,      19456,     1024,     4096, 0x81b6e0bc

diff --git a/tests/ref/fate/filter-pan-stereo4 b/tests/ref/fate/filter-pan-stereo4
new file mode 100644
index 0000000..6d10ed6
--- /dev/null
+++ b/tests/ref/fate/filter-pan-stereo4

@@ -0,0 +1,26 @@
+#tb 0: 1/44100
+#media_type 0: audio
+#codec_id 0: pcm_s16le
+#sample_rate 0: 44100
+#channel_layout 0: 0
+#channel_layout_name 0: 2 channels
+0,          0,          0,     1024,     4096, 0x7bd6fc44
+0,       1024,       1024,     1024,     4096, 0xed38ec37
+0,       2048,       2048,     1024,     4096, 0x4696ea64
+0,       3072,       3072,     1024,     4096, 0xf2c8e38c
+0,       4096,       4096,     1024,     4096, 0x9aa8f5c9
+0,       5120,       5120,     1024,     4096, 0xfb46fe06
+0,       6144,       6144,     1024,     4096, 0x6700fa3e
+0,       7168,       7168,     1024,     4096, 0x235e0687
+0,       8192,       8192,     1024,     4096, 0x8b9aecab
+0,       9216,       9216,     1024,     4096, 0x62f3e0db
+0,      10240,      10240,     1024,     4096, 0xe59ef89c
+0,      11264,      11264,     1024,     4096, 0x1a48f2fe
+0,      12288,      12288,     1024,     4096, 0x9026e182
+0,      13312,      13312,     1024,     4096, 0xec4d05d1
+0,      14336,      14336,     1024,     4096, 0x7ee70559
+0,      15360,      15360,     1024,     4096, 0x807ae3e6
+0,      16384,      16384,     1024,     4096, 0xf0d2fc29
+0,      17408,      17408,     1024,     4096, 0xd9e7f52b
+0,      18432,      18432,     1024,     4096, 0x0f1ce5f7
+0,      19456,      19456,     1024,     4096, 0x6b3cf294

diff --git a/tests/ref/fate/filter-pan-upmix1 b/tests/ref/fate/filter-pan-upmix1
new file mode 100644
index 0000000..87c70a7
--- /dev/null
+++ b/tests/ref/fate/filter-pan-upmix1

@@ -0,0 +1,26 @@
+#tb 0: 1/44100
+#media_type 0: audio
+#codec_id 0: pcm_s16le
+#sample_rate 0: 44100
+#channel_layout 0: 0
+#channel_layout_name 0: 4 channels
+0,          0,          0,     1024,     8192, 0xdaadfc44
+0,       1024,       1024,     1024,     8192, 0xe6d9ec37
+0,       2048,       2048,     1024,     8192, 0x5edfea64
+0,       3072,       3072,     1024,     8192, 0x7f7be38c
+0,       4096,       4096,     1024,     8192, 0x3f60f5c9
+0,       5120,       5120,     1024,     8192, 0xcee4fe06
+0,       6144,       6144,     1024,     8192, 0xdb98fa3e
+0,       7168,       7168,     1024,     8192, 0x4cb30687
+0,       8192,       8192,     1024,     8192, 0x00f6ecab
+0,       9216,       9216,     1024,     8192, 0xa7b0e0db
+0,      10240,      10240,     1024,     8192, 0xc2dcf89c
+0,      11264,      11264,     1024,     8192, 0x30d6f2fe
+0,      12288,      12288,     1024,     8192, 0xf83ae182
+0,      13312,      13312,     1024,     8192, 0xeecd05d1
+0,      14336,      14336,     1024,     8192, 0xfd3b0559
+0,      15360,      15360,     1024,     8192, 0xcd69e3e6
+0,      16384,      16384,     1024,     8192, 0xdf80fc29
+0,      17408,      17408,     1024,     8192, 0x7e8bf52b
+0,      18432,      18432,     1024,     8192, 0xee07e5f7
+0,      19456,      19456,     1024,     8192, 0xc874f294

diff --git a/tests/ref/fate/filter-pan-upmix2 b/tests/ref/fate/filter-pan-upmix2
new file mode 100644
index 0000000..56db1c1
--- /dev/null
+++ b/tests/ref/fate/filter-pan-upmix2

@@ -0,0 +1,26 @@
+#tb 0: 1/44100
+#media_type 0: audio
+#codec_id 0: pcm_s16le
+#sample_rate 0: 44100
+#channel_layout 0: 0
+#channel_layout_name 0: 9 channels
+0,          0,          0,      512,     9216, 0xd561bb2b
+0,        512,        512,      512,     9216, 0x591805f9
+0,       1024,       1024,      512,     9216, 0xd46fecd6
+0,       1536,       1536,      512,     9216, 0x1e1f176b
+0,       2048,       2048,      512,     9216, 0xe383f1a6
+0,       2560,       2560,      512,     9216, 0xc628c661
+0,       3072,       3072,      512,     9216, 0x1ad09914
+0,       3584,       3584,      512,     9216, 0x798533cc
+0,       4096,       4096,      512,     9216, 0x4913d581
+0,       4608,       4608,      512,     9216, 0x59f506b3
+0,       5120,       5120,      512,     9216, 0x8885fa28
+0,       5632,       5632,      512,     9216, 0x769ffa95
+0,       6144,       6144,      512,     9216, 0xc636cc46
+0,       6656,       6656,      512,     9216, 0x4740f34b
+0,       7168,       7168,      512,     9216, 0x00d2de28
+0,       7680,       7680,      512,     9216, 0x704a07fc
+0,       8192,       8192,      512,     9216, 0x54a4f682
+0,       8704,       8704,      512,     9216, 0xd6dac181
+0,       9216,       9216,      512,     9216, 0xfcd6fe28
+0,       9728,       9728,      512,     9216, 0x5c41cbdc

diff --git a/tests/ref/fate/filter-pixdesc-gray14be b/tests/ref/fate/filter-pixdesc-gray14be
new file mode 100644
index 0000000..dc7836a
--- /dev/null
+++ b/tests/ref/fate/filter-pixdesc-gray14be

@@ -0,0 +1 @@
+pixdesc-gray14be    6c9faae02a63f17d78ae6bff2866c0c1

diff --git a/tests/ref/fate/filter-pixdesc-gray14le b/tests/ref/fate/filter-pixdesc-gray14le
new file mode 100644
index 0000000..9057875
--- /dev/null
+++ b/tests/ref/fate/filter-pixdesc-gray14le

@@ -0,0 +1 @@
+pixdesc-gray14le    713c6b98b8f22a0716bf3541fb311936

diff --git a/tests/ref/fate/filter-pixdesc-grayf32be b/tests/ref/fate/filter-pixdesc-grayf32be
new file mode 100644
index 0000000..423bbfb
--- /dev/null
+++ b/tests/ref/fate/filter-pixdesc-grayf32be

@@ -0,0 +1 @@
+pixdesc-grayf32be   381c8d0f19d286809b91cd6e6c0048ab

diff --git a/tests/ref/fate/filter-pixdesc-grayf32le b/tests/ref/fate/filter-pixdesc-grayf32le
new file mode 100644
index 0000000..a76e0a9
--- /dev/null
+++ b/tests/ref/fate/filter-pixdesc-grayf32le

@@ -0,0 +1 @@
+pixdesc-grayf32le   381c8d0f19d286809b91cd6e6c0048ab

diff --git a/tests/ref/fate/filter-pixdesc-p016be b/tests/ref/fate/filter-pixdesc-p016be
new file mode 100644
index 0000000..7c93407
--- /dev/null
+++ b/tests/ref/fate/filter-pixdesc-p016be

@@ -0,0 +1 @@
+pixdesc-p016be      784a49bf554861da9d0809a615bcf813

diff --git a/tests/ref/fate/filter-pixdesc-p016le b/tests/ref/fate/filter-pixdesc-p016le
new file mode 100644
index 0000000..c723a0f
--- /dev/null
+++ b/tests/ref/fate/filter-pixdesc-p016le

@@ -0,0 +1 @@
+pixdesc-p016le      ed04897de0a6788bb3458e7365f10d36

diff --git a/tests/ref/fate/filter-pixfmts-copy b/tests/ref/fate/filter-pixfmts-copy
index 124ddde..5385036 100644
--- a/tests/ref/fate/filter-pixfmts-copy
+++ b/tests/ref/fate/filter-pixfmts-copy

@@ -41,16 +41,22 @@
 gray10le            917d687103b2adcca7132bfc070ca54a
 gray12be            9685614450f1282be433d2b07234ca1f
 gray12le            2700bd7fb3fea56e54eb03e31d6d4e57
+gray14be            19ed2bf25878980d6f81f6ae699024ec
+gray14le            4b148b26b30040c05dc248a8852f31ac
 gray16be            08d997a3faa25a3db9d6be272d282eef
 gray16le            df65eb804360795e3e38a2701fa9641a
 gray9be             6382a14594a8b68f0ec7de25531f9334
 gray9le             4eb1dda58706436e3b69aef29b0089db
+grayf32be           f3bf178835f8146aa09d1da94bba4d8a
+grayf32le           fb6ea85bfbc8cd21c51fc0e110197294
 monob               8b04f859fee6a0be856be184acd7a0b5
 monow               54d16d2c01abfd72ecdb5e51e283937c
 nv12                8e24feb2c544dc26a20047a71e4c27aa
 nv21                335d85c9af6110f26ae9e187a82ed2cf
 p010be              7f9842d6015026136bad60d03c035cc3
 p010le              c453421b9f726bdaf2bacf59a492c43b
+p016be              7f9842d6015026136bad60d03c035cc3
+p016le              c453421b9f726bdaf2bacf59a492c43b
 pal8                ff5929f5b42075793b2c34cb441bede5
 rgb0                0de71e5a1f97f81fb51397a0435bfa72
 rgb24               f4438057d046e6d98ade4e45294b21be

diff --git a/tests/ref/fate/filter-pixfmts-crop b/tests/ref/fate/filter-pixfmts-crop
index e21479c..ae48c2b 100644
--- a/tests/ref/fate/filter-pixfmts-crop
+++ b/tests/ref/fate/filter-pixfmts-crop

@@ -41,14 +41,20 @@
 gray10le            fd83f7489880160783ddb125615b4638
 gray12be            472700c26cc49b8d5f74af141f6a0d38
 gray12le            4f6537fe1f32b3963350f8c435009433
+gray14be            302b5b534f64ee15fffe2d3818e8c29c
+gray14le            9c205ae791cbb9e479beb0ece236c05f
 gray16be            38f599da990224de86e3dc7a543121a9
 gray16le            9ff7c866bd98def4e6c91542c1c45f80
 gray9be             8ffcb18d699480f55414bfc21ab33321
 gray9le             4d1932d4968a248584f5e39c25f1dd43
+grayf32be           cf40ec06a8abe54852b7f85a00549eec
+grayf32le           b672526c9da9c8959ab881f242f6890a
 nv12                92cda427f794374731ec0321ee00caac
 nv21                1bcfc197f4fb95de85ba58182d8d2f69
 p010be              8b2de2eb6b099bbf355bfc55a0694ddc
 p010le              373b50c766dfd0a8e79c9a73246d803a
+p016be              8b2de2eb6b099bbf355bfc55a0694ddc
+p016le              373b50c766dfd0a8e79c9a73246d803a
 pal8                1f2cdc8e718f95c875dbc1034a688bfb
 rgb0                736646b70dd9a0be22b8da8041e35035
 rgb24               c5fbbf816bb2000f4d2914e335698ef5

diff --git a/tests/ref/fate/filter-pixfmts-field b/tests/ref/fate/filter-pixfmts-field
index ba2af6e..857ded1 100644
--- a/tests/ref/fate/filter-pixfmts-field
+++ b/tests/ref/fate/filter-pixfmts-field

@@ -41,16 +41,22 @@
 gray10le            c749b80049b152f4ba3e66a72c0c5acc
 gray12be            d34c50810b37e6f97dffdf6a8ab958de
 gray12le            cf71b8fee47ce7821f3ae9f9b62ae39a
+gray14be            2644f330259d70793d789b8dc3c01226
+gray14le            7776a471945d303088012cbc2ff2a2d0
 gray16be            e1700e056de9917744a7ff4ab2ca63fd
 gray16le            338de7ac5f7d36d5ad5ac2c8d5bbea68
 gray9be             25e50940fa300a8f09edfb6eba4fd250
 gray9le             1146cfc1b92bfd07ed238e65ffcd134f
+grayf32be           72fbfa47b2863658a8a80d588f23b3e7
+grayf32le           6b856bdbf2a2bfcd2bc7d50f109daaf0
 monob               2129cc72a484d7e10a44de9117aa9f80
 monow               03d783611d265cae78293f88ea126ea1
 nv12                16f7a46708ef25ebd0b72e47920cc11e
 nv21                7294574037cc7f9373ef5695d8ebe809
 p010be              a0311a09bba7383553267d2b3b9c075e
 p010le              ee09a18aefa3ebe97715b3a7312cb8ff
+p016be              a0311a09bba7383553267d2b3b9c075e
+p016le              ee09a18aefa3ebe97715b3a7312cb8ff
 pal8                0658c18dcd8d052d59dfbe23f5b368d9
 rgb0                ca3fa6e865b91b3511c7f2bf62830059
 rgb24               25ab271e26a5785be169578d99da5dd0

diff --git a/tests/ref/fate/filter-pixfmts-fieldmatch b/tests/ref/fate/filter-pixfmts-fieldmatch
index 99946c8..c3165b8 100644
--- a/tests/ref/fate/filter-pixfmts-fieldmatch
+++ b/tests/ref/fate/filter-pixfmts-fieldmatch

@@ -2,4 +2,4 @@
 yuv411p             b913e634ad37ce046240252bed8681fb
 yuv420p             a9286560141eb14595e427dbe5829b00
 yuv422p             11ad22ce00c5e8a30d0472f29fb15434
-yuv444p             9350a3f23cd7d95ec441a49f63f55953
+yuv444p             6c5b0c1343d625d0656b6755906fd874

diff --git a/tests/ref/fate/filter-pixfmts-fieldorder b/tests/ref/fate/filter-pixfmts-fieldorder
index 84a9e00..fc00345 100644
--- a/tests/ref/fate/filter-pixfmts-fieldorder
+++ b/tests/ref/fate/filter-pixfmts-fieldorder

@@ -41,10 +41,14 @@
 gray10le            16e4db1d611ec3fa5c9fd8fbdbf1ffcc
 gray12be            1c3285c150e1dddcf0fbee405cfb068e
 gray12le            a57b6199f5690add0ac0150fa95c4988
+gray14be            1e3d0d0421cf84eac93d7ab1964207ff
+gray14le            04899f53627203bd1fe3f17fb0de199c
 gray16be            293a36548ce16543494790f8f7f76a05
 gray16le            84f83f5fcbb5d458efb8395a50a3797e
 gray9be             ec877f5bcf0ea275a6f36c12cc9adf11
 gray9le             fba944fde7923d5089f4f52d12988b9e
+grayf32be           1aa7960131f880c54fe3c77f13448674
+grayf32le           4029ac9d197f255794c1b9e416520fc7
 rgb0                2e3d8c91c7a83d451593dfd06607ff39
 rgb24               b82577f8215d3dc2681be60f1da247af
 rgb444be            1c3afc3a0c53c51139c76504f59bb1f4

diff --git a/tests/ref/fate/filter-pixfmts-hflip b/tests/ref/fate/filter-pixfmts-hflip
index 875980d..e97c185 100644
--- a/tests/ref/fate/filter-pixfmts-hflip
+++ b/tests/ref/fate/filter-pixfmts-hflip

@@ -41,14 +41,20 @@
 gray10le            6baac1da6be3789409b67cd506afe7da
 gray12be            de7b5ef4b513e7e8270c617249d1cbdf
 gray12le            e8d0739ff61649bd82722b3134cbe776
+gray14be            22560aaac37f5bb2982819b752bf4608
+gray14le            d4b2f5e7c4bbd39130655b8f2c55f010
 gray16be            cf7294d9aa23e1b838692ec01ade587b
 gray16le            d91ce41e304419bcf32ac792f01bd64f
 gray9be             ac8d260669479ae720a5b6d4d8639e34
 gray9le             424fc581947bc8c357c9ec5e3c1c04d1
+grayf32be           a69add7bbf892a71fe81b3b75982dbe2
+grayf32le           4563e176a35dc8a8a07e0829fad5eb88
 nv12                801e58f1be5fd0b5bc4bf007c604b0b4
 nv21                9f10dfff8963dc327d3395af21f0554f
 p010be              744b13e44d39e1ff7588983fa03e0101
 p010le              a50b160346ab94f55a425065b57006f0
+p016be              744b13e44d39e1ff7588983fa03e0101
+p016le              a50b160346ab94f55a425065b57006f0
 pal8                5b7c77d99817b4f52339742a47de7797
 rgb0                0092452f37d73da20193265ace0b7d57
 rgb24               21571104e6091a689feabb7867e513dd

diff --git a/tests/ref/fate/filter-pixfmts-il b/tests/ref/fate/filter-pixfmts-il
index c6885b9..a006fc1 100644
--- a/tests/ref/fate/filter-pixfmts-il
+++ b/tests/ref/fate/filter-pixfmts-il

@@ -41,16 +41,22 @@
 gray10le            b7d6e49e8d1291f2b0a57d55e9478ef1
 gray12be            c62bc3def5ea217dfb68433905cb9d64
 gray12le            5bd0fef836928e1e19a315782a8c1302
+gray14be            5b3a15c182e2daed65dc39c33fd62735
+gray14le            3573d6870b14256f01800066d36ad862
 gray16be            92c3b09f371b610cc1b6a9776034f4d0
 gray16le            1db278d23a554e01910cedacc6c02521
 gray9be             ed7db5bb2ddc09bc26068c8b858db204
 gray9le             2ec9188f0dcfefef76a09f371d7beb8e
+grayf32be           f36197c9e2ef5c50a995e980c1a37203
+grayf32le           8bf3d295c3ffd53da0e06d0702e7c1ca
 monob               faba75df28033ba7ce3d82ff2a99ee68
 monow               6e9cfb8d3a344c5f0c3e1d5e1297e580
 nv12                3c3ba9b1b4c4dfff09c26f71b51dd146
 nv21                ab586d8781246b5a32d8760a61db9797
 p010be              3df51286ef66b53e3e283dbbab582263
 p010le              eadcd8241e97e35b2b47d5eb2eaea6cd
+p016be              3df51286ef66b53e3e283dbbab582263
+p016le              eadcd8241e97e35b2b47d5eb2eaea6cd
 rgb0                cfaf68671e43248267d8cd50cae8c13f
 rgb24               88894f608cf33ba310f21996748d77a7
 rgb444be            99d36d814988fb388aacdef575dacfcf

diff --git a/tests/ref/fate/filter-pixfmts-lut b/tests/ref/fate/filter-pixfmts-lut
index db3fd41..41c66e2 100644
--- a/tests/ref/fate/filter-pixfmts-lut
+++ b/tests/ref/fate/filter-pixfmts-lut

@@ -12,6 +12,12 @@
 gbrp14le            bdfdfd6f36c60497d1cdae791f3cc117
 gbrp16le            df095ef3a20995935cfcaf144afc68b6
 gbrp9le             a8c4e29f4cb627db81ba053e0853e702
+gray                20b14b5e26cd11300ed1249e04082170
+gray10le            8f4140b55e847cc423002b89666db5ea
+gray12le            ea89c02f6b3af49ddaf13364ed33d86d
+gray14le            12bebea325a7822e890675bfc5111f0c
+gray16le            aa10599924fb2440fa12b76e90f57dcb
+gray9le             7d9cc9ad6118674c547a54281d10cf05
 rgb24               a356171207723a580e7d277078072005
 rgb48le             5c7dd8575836d18c91e09f1915cf9aa9
 rgba                7bc854c2698b78af3e9159a19c2d9d21

diff --git a/tests/ref/fate/filter-pixfmts-null b/tests/ref/fate/filter-pixfmts-null
index 124ddde..5385036 100644
--- a/tests/ref/fate/filter-pixfmts-null
+++ b/tests/ref/fate/filter-pixfmts-null

@@ -41,16 +41,22 @@
 gray10le            917d687103b2adcca7132bfc070ca54a
 gray12be            9685614450f1282be433d2b07234ca1f
 gray12le            2700bd7fb3fea56e54eb03e31d6d4e57
+gray14be            19ed2bf25878980d6f81f6ae699024ec
+gray14le            4b148b26b30040c05dc248a8852f31ac
 gray16be            08d997a3faa25a3db9d6be272d282eef
 gray16le            df65eb804360795e3e38a2701fa9641a
 gray9be             6382a14594a8b68f0ec7de25531f9334
 gray9le             4eb1dda58706436e3b69aef29b0089db
+grayf32be           f3bf178835f8146aa09d1da94bba4d8a
+grayf32le           fb6ea85bfbc8cd21c51fc0e110197294
 monob               8b04f859fee6a0be856be184acd7a0b5
 monow               54d16d2c01abfd72ecdb5e51e283937c
 nv12                8e24feb2c544dc26a20047a71e4c27aa
 nv21                335d85c9af6110f26ae9e187a82ed2cf
 p010be              7f9842d6015026136bad60d03c035cc3
 p010le              c453421b9f726bdaf2bacf59a492c43b
+p016be              7f9842d6015026136bad60d03c035cc3
+p016le              c453421b9f726bdaf2bacf59a492c43b
 pal8                ff5929f5b42075793b2c34cb441bede5
 rgb0                0de71e5a1f97f81fb51397a0435bfa72
 rgb24               f4438057d046e6d98ade4e45294b21be

diff --git a/tests/ref/fate/filter-pixfmts-pad b/tests/ref/fate/filter-pixfmts-pad
index e777211..71f5ddf 100644
--- a/tests/ref/fate/filter-pixfmts-pad
+++ b/tests/ref/fate/filter-pixfmts-pad

@@ -18,6 +18,7 @@
 gray                ddc663a0491df3959d9c5795dceaa72e
 gray10le            e6559c1c8c05ce89f44b465573db44e7
 gray12le            1e6c6757658c7ae8a1f830432c5b7722
+gray14le            af3f2f911c71cb34a8179a3291b5c90f
 gray16le            468bda6155bdc7a7a20c34d6e599fd16
 gray9le             f8f3dfe31ca5fcba828285bceefdab9a
 nv12                381574979cb04be10c9168540310afad
@@ -62,8 +63,8 @@
 yuva444p10le        251ea4ead8300d752eb355a08cbb0352
 yuva444p16le        5b65287e1862d2d9f1ad2cfdcde94661
 yuva444p9le         e6946c10b94c271e7ea24b3bcff314e1
-yuvj411p            ca967e68759a4956729dd366adc7e7fa
-yuvj420p            c00611cd5f1558047d579d8a7d30e381
-yuvj422p            b3acdf07147a7598836065836ad8420b
-yuvj440p            3446ba4b1d7fdf536c926cee643c2b35
-yuvj444p            3b0f1a185af048b9e0b202d003fc7e62
+yuvj411p            87dbac57b211ab4823c1abbd702f1516
+yuvj420p            1abef62bce65131ca4913eb2006fd860
+yuvj422p            198c57b519e2be14b150889bd7f94898
+yuvj440p            e6533260d197ad15e39319117c57473e
+yuvj444p            26a44748960513783ea676eff409d89a

diff --git a/tests/ref/fate/filter-pixfmts-scale b/tests/ref/fate/filter-pixfmts-scale
index f43c519..05879ee 100644
--- a/tests/ref/fate/filter-pixfmts-scale
+++ b/tests/ref/fate/filter-pixfmts-scale

@@ -41,16 +41,22 @@
 gray10le            37fd2e1ec6b66410212d39a342e864df
 gray12be            950de5d1b6b943a26c51f6a157e19a14
 gray12le            9c3b154a8bb0a73a3b465892dbc23b36
+gray14be            db9094229f32fb22c5cf06471b9a1cfa
+gray14le            c33308eb8b40142dfd9273249c1cd73a
 gray16be            32891cb0928b1119d8d43a6e1bef0e2b
 gray16le            f96cfb5652b090dad52615930f0ce65f
 gray9be             779dec0c6c2df008128b91622a20daf8
 gray9le             fa87a96ca275f82260358635f838b514
+grayf32be           5e4c715519f53c15f1345df90481e5f5
+grayf32le           2ff1b84023e820307b1ba7a9550115bc
 monob               f01cb0b623357387827902d9d0963435
 monow               35c68b86c226d6990b2dcb573a05ff6b
 nv12                b118d24a3653fe66e5d9e079033aef79
 nv21                c74bb1c10dbbdee8a1f682b194486c4d
 p010be              1d6726d94bf1385996a9a9840dd0e878
 p010le              4b316f2b9e18972299beb73511278fa8
+p016be              31e204018cbb53f8988c4e1174ea8ce9
+p016le              d5afe557f492a09317e525d7cb782f5b
 pal8                29e10892009b2cfe431815ec3052ed3b
 rgb0                fbd27e98154efb7535826afed41e9bb0
 rgb24               e022e741451e81f2ecce1c7240b93e87

diff --git a/tests/ref/fate/filter-pixfmts-transpose b/tests/ref/fate/filter-pixfmts-transpose
new file mode 100644
index 0000000..4464409
--- /dev/null
+++ b/tests/ref/fate/filter-pixfmts-transpose

@@ -0,0 +1,114 @@
+0bgr                6929c1e308d2f4f941d002627047d262
+0rgb                cf1bedd0784a3efd3ab00c4e44005c37
+abgr                6d6f896f853a6c6f93ee70dba9af3d17
+argb                87bbd23debb94d486ac3a6b6c0b005f9
+ayuv64le            e4c07e0d5b333b3bc9eb4f3ce6af3a2c
+bgr0                df3a6eedd4939ce09a357b655ac2962a
+bgr24               f9a08135e5d58c0b2a5509c369a88414
+bgr444be            dd9e990a327649ec0b2b81a8ee4d8f49
+bgr444le            bee1d9fae8733d0c0669bca2ac4dfaf6
+bgr48be             39f48f6353dfc772af36cbb41e6126a4
+bgr48le             9a61d9531b1f6de44b27f6bb9b4dfc79
+bgr4_byte           ddff9da461afce90e3122a41d79b287d
+bgr555be            24e5c6502a6d927f8ba88f3320ebf619
+bgr555le            5201d098979ea86a66d8df1ef41c79ad
+bgr565be            59afe17b455e921daf428ba05a40bab9
+bgr565le            b2709790684abbd2133906b637f2b4b8
+bgr8                b6ee15f70989d2f52f184e32b3af2c18
+bgra                f2fe61e08446900ad209f2c586997e15
+bgra64be            8d01994c8c32e628fcf9749851f1ffe8
+bgra64le            faaef6d280f92e7e8abdd9fa4a61f7b5
+gbrap               0899b3af50d35a63bfecb419a5b29968
+gbrap10be           3e3be2d8f9aa5f449a1df404e27d0054
+gbrap10le           db4e4861010cbbf726492fad282d5813
+gbrap12be           1518c9a565d1ba1a45dd369acc1aa75e
+gbrap12le           714fe318af81a46f83655c6e7e13351e
+gbrap16be           39d488528aacff466aac7539c9b948a8
+gbrap16le           5426ac9457289927bfe2ec03038a8780
+gbrp                7b4b6a2f1cdc51455b25515c3ecea944
+gbrp10be            d7401725699b2ddf954caa16a0878a1e
+gbrp10le            6036711969eae1979be6358f688bd9c8
+gbrp12be            ec7d6e69fc579619b53d57a76c20480d
+gbrp12le            bf7478185274486c3f7dd4db1da8f7d0
+gbrp14be            9b66f22e4315aaa878a430ae3f44ab57
+gbrp14le            16f30349b42dca007b37b8522d3018df
+gbrp16be            0d003b88d4f446ae9ba12cab1cbb359a
+gbrp16le            a1c09038fa4636c9843ab8dd2b7601ea
+gbrp9be             df381b4b27be25d172fa556434478807
+gbrp9le             a5301e978f68b29bfc613b2462ec4888
+gray                c5f8bc6636fd15dbc57deb4bba1e7379
+gray10be            48b421da79c195fd91dffb8fca79a8a2
+gray10le            7774e3296916b896afa46f626334a280
+gray12be            89f1c4b7821b771f6d967f9db871f8ef
+gray12le            43d392c3dcbd79b47cce31f2006c5050
+gray14be            5e2d1eb84d6d375502b3210d572d7433
+gray14le            6114774e9d07b08ec52fabaf6d0ee85a
+gray16be            4aef307021a91b1de67f1d4381a39132
+gray16le            76f2afe156edca7ae05cfa4e5867126e
+gray9be             2c425fa532c940d226822da8b3592310
+gray9le             bcc575942910b3c72eaa72e8794f3acd
+grayf32be           823288e1ec497bb1f22c070e502e5272
+grayf32le           6e9ec0e1cac3617f3041e681afd2c575
+nv12                1965e3826144686748f2f6b516fca5ba
+nv21                292adaf5271c5c8516b71640458c01f4
+p010be              ad0de2cc9bff81688b182a870fcf7000
+p010le              e7ff5143595021246733ce6bd0a769e8
+p016be              ad0de2cc9bff81688b182a870fcf7000
+p016le              e7ff5143595021246733ce6bd0a769e8
+rgb0                31ea5da7fe779c6ea0a33f1d28aad918
+rgb24               47654cabaaad79170b90afd5a02161dd
+rgb444be            3cac1f0c43a74d2a95eb02e187070845
+rgb444le            46d602468bd9e5a430622e3d4b7c8f40
+rgb48be             400932419bbb780614254253ef5591c3
+rgb48le             6a99c40f21629cb0655e8772d7190374
+rgb4_byte           d3990da196266305a3f2e5b1d72401a5
+rgb555be            79e4503ff0d5cf52d3a7901397499a28
+rgb555le            c65f2594c0b3107a322f7aeb81aa8a16
+rgb565be            0c746b5063d02d6cb98e9e9a59ad3b99
+rgb565le            63b02db11c3d20be54d218c7c44f8ddb
+rgb8                c90feb30c3c9391ef5f470209d7b7a15
+rgba                4d76a9542143752a4ac30f82f88f68f1
+rgba64be            a60041217f4c0cd796d19d3940a12a41
+rgba64le            ad47197774858858ae7b0c177dffa459
+xyz12be             68e5cba640f6e4ef72dff950e88b5342
+xyz12le             8b6b6a6db4d7561e80db88ccaecce7a9
+ya8                 d4b7a62f80681fa44c977ff3a64f4ce4
+yuv410p             4c0143429edd30aa01493447c90132ea
+yuv420p             2fa5b2201c75034206cc20e2c6134aed
+yuv420p10be         0931660f930d9be8aea9d0c76b406055
+yuv420p10le         9ce12b168c49db871836c979b526c1f1
+yuv420p12be         73d6be4230b6f4e4e269977afab56323
+yuv420p12le         6938815c8acd690138506cbb5f005fb8
+yuv420p14be         bf76a805b9c2f9808c73492d3b8da268
+yuv420p14le         5df47483b89ffe6ef4bbf14058d7d3b3
+yuv420p16be         3a64132681656be6db635f4e6a282dc9
+yuv420p16le         c77a81e47d1690a338693ec6f323ef1e
+yuv420p9be          2307cb7f324df299c4829b11cb0e6bc7
+yuv420p9le          c735c3c8424c70d822ab4a1fe1f504e2
+yuv444p             eb755977ca464baac5f03771858080ae
+yuv444p10be         866b59a23dff3dc1cb6bf7bd7da26da4
+yuv444p10le         417d62f15abf4777c4ec5e0d00796a9e
+yuv444p12be         c1da110f0ee898fbcd4b45afb5aed58b
+yuv444p12le         dc18bddd7b6bb9fdb2e0c7e7476375fa
+yuv444p14be         2f181fa3403e7911b233d3d976abea73
+yuv444p14le         ac718343878786a25b9a50924f9aabca
+yuv444p16be         128214efef6fffe3293db513ae700d4a
+yuv444p16le         a8b6613094b8d2b275e2e4bc4512c9e4
+yuv444p9be          eae529dd1cdb7f512ae2674334c1ef08
+yuv444p9le          06ffcacdd03f6457614c352a4ccb7642
+yuva420p            058d00d9564be827e5db6ce2b8b2dbb5
+yuva420p10be        333209d11916161a65c6453d2bf435c2
+yuva420p10le        4b7ea5b59a712f1f59cd394b3b40ff69
+yuva420p16be        5984c7f4d14e4cf0e511cb0aa6c53089
+yuva420p16le        34e29fc4a22a0ab1ea01641d0df2ac86
+yuva420p9be         45ea80889575b31cccc83a4d16555497
+yuva420p9le         6e5cb3e761a9c45e26370307c49f8831
+yuva444p            4f9e649fbc2c0c91178d1576e462bb31
+yuva444p10be        9450fbac30b5f9da7414c895695591a9
+yuva444p10le        84a93637bf2c7e498380beff9b1fc503
+yuva444p16be        9fd2f00ea9bef8e488228bc0b47b28cb
+yuva444p16le        ae9fd8d1baea0f8626b963816d667d2d
+yuva444p9be         4ce11ae57780f74c78cdd5c06be4bded
+yuva444p9le         1b9cc85fd6ab0c7e240915a99e98d1c1
+yuvj420p            9603b8dd64daec41f0514197989c2b19
+yuvj444p            66ec9b3219df9eb2c1315d293602ab42

diff --git a/tests/ref/fate/filter-pixfmts-vflip b/tests/ref/fate/filter-pixfmts-vflip
index 84b9d56..51628f1 100644
--- a/tests/ref/fate/filter-pixfmts-vflip
+++ b/tests/ref/fate/filter-pixfmts-vflip

@@ -41,16 +41,22 @@
 gray10le            9c432a163f0cfe9ee2a4b72ae8a7c307
 gray12be            7423ce8a77fbc40c5d4776eb28fec60a
 gray12le            808158633559d7deebc7dac2d79e88f8
+gray14be            68b14e31a089d6bd1fa2082d66d004da
+gray14le            3842b874a9b05ce2ae3cad9ef7131013
 gray16be            29f24ba7cb0fc4fd2ae78963d008f6e6
 gray16le            a37e9c4ea76e8eeddc2af8f600ba2c10
 gray9be             dda11d4ffd62b414012ffc4667fb4971
 gray9le             159bf6482d217b2b8276eb2216cd7a09
+grayf32be           c1ba5943a0d24d70e6a280f37e4f4593
+grayf32le           8e6c048a5b3b8b26d3a5ddfce255f3f6
 monob               7810c4857822ccfc844d78f5e803269a
 monow               90a947bfcd5f2261e83b577f48ec57b1
 nv12                261ebe585ae2aa4e70d39a10c1679294
 nv21                2909feacd27bebb080c8e0fa41795269
 p010be              06e9354b6e0e38ba41736352cedc0bd5
 p010le              fd18d322bffbf5816902c13102872e22
+p016be              06e9354b6e0e38ba41736352cedc0bd5
+p016le              fd18d322bffbf5816902c13102872e22
 pal8                450b0155d0f2d5628bf95a442db5f817
 rgb0                56a7ea69541bcd27bef6a5615784722b
 rgb24               195e6dae1c3a488b9d3ceb7560d25d85

diff --git a/tests/ref/fate/filter-w3fdif-complex b/tests/ref/fate/filter-w3fdif-complex
index 4b334da..cbd8f06 100644
--- a/tests/ref/fate/filter-w3fdif-complex
+++ b/tests/ref/fate/filter-w3fdif-complex

@@ -3,33 +3,33 @@
 #codec_id 0: rawvideo
 #dimensions 0: 720x576
 #sar 0: 16/15
-0,         18,         18,        1,   622080, 0x21d21485
-0,         19,         19,        1,   622080, 0x600a5468
-0,         20,         20,        1,   622080, 0x9526f7b8
-0,         21,         21,        1,   622080, 0x8b3e661f
-0,         22,         22,        1,   622080, 0xff5cb5a9
-0,         23,         23,        1,   622080, 0x7e5e730c
-0,         24,         24,        1,   622080, 0x85219ac6
-0,         25,         25,        1,   622080, 0x2f3465a0
-0,         26,         26,        1,   622080, 0xddbf4da0
-0,         27,         27,        1,   622080, 0xc115d4ee
-0,         28,         28,        1,   622080, 0x7a8a8d72
-0,         29,         29,        1,   622080, 0xbafcd973
-0,         30,         30,        1,   622080, 0xd2c15603
-0,         31,         31,        1,   622080, 0xd7217855
-0,         32,         32,        1,   622080, 0x9a584eca
-0,         33,         33,        1,   622080, 0x9f3e1c40
-0,         34,         34,        1,   622080, 0x6d01efb7
-0,         35,         35,        1,   622080, 0x9ecfcce0
-0,         36,         36,        1,   622080, 0xb355fd7e
-0,         37,         37,        1,   622080, 0xc7784021
-0,         38,         38,        1,   622080, 0x13fe4187
-0,         39,         39,        1,   622080, 0xfa03b613
-0,         40,         40,        1,   622080, 0x2c9ccfcd
-0,         41,         41,        1,   622080, 0xcae6e6c6
-0,         42,         42,        1,   622080, 0x177968f9
-0,         43,         43,        1,   622080, 0xf708de36
-0,         44,         44,        1,   622080, 0x4491870a
-0,         45,         45,        1,   622080, 0x37709f98
-0,         46,         46,        1,   622080, 0x23e8d22f
-0,         47,         47,        1,   622080, 0x25cba876
+0,         18,         18,        1,   622080, 0xe1b21462
+0,         19,         19,        1,   622080, 0x1362538d
+0,         20,         20,        1,   622080, 0x0f55f79b
+0,         21,         21,        1,   622080, 0xfdb265f6
+0,         22,         22,        1,   622080, 0x2f8eb534
+0,         23,         23,        1,   622080, 0x0de472b1
+0,         24,         24,        1,   622080, 0x3e699a78
+0,         25,         25,        1,   622080, 0x66396524
+0,         26,         26,        1,   622080, 0x17244d40
+0,         27,         27,        1,   622080, 0x04a5d554
+0,         28,         28,        1,   622080, 0x0e278cd9
+0,         29,         29,        1,   622080, 0x7b53d8de
+0,         30,         30,        1,   622080, 0xe51a558e
+0,         31,         31,        1,   622080, 0xd58177e4
+0,         32,         32,        1,   622080, 0x270b4e1f
+0,         33,         33,        1,   622080, 0x3fbf1bdf
+0,         34,         34,        1,   622080, 0xbfebee9d
+0,         35,         35,        1,   622080, 0x2c6fcccf
+0,         36,         36,        1,   622080, 0x4a57fcc2
+0,         37,         37,        1,   622080, 0x33b53f5d
+0,         38,         38,        1,   622080, 0x93ba405d
+0,         39,         39,        1,   622080, 0xf4dbb54b
+0,         40,         40,        1,   622080, 0xf205ce68
+0,         41,         41,        1,   622080, 0x383fe5bc
+0,         42,         42,        1,   622080, 0x4dd06905
+0,         43,         43,        1,   622080, 0xc925de57
+0,         44,         44,        1,   622080, 0x7c8786e8
+0,         45,         45,        1,   622080, 0x550b9f60
+0,         46,         46,        1,   622080, 0x4162d1e9
+0,         47,         47,        1,   622080, 0xf609a847

diff --git a/tests/ref/fate/filter-w3fdif-simple b/tests/ref/fate/filter-w3fdif-simple
index 62efaae..09c0d7c 100644
--- a/tests/ref/fate/filter-w3fdif-simple
+++ b/tests/ref/fate/filter-w3fdif-simple

@@ -3,33 +3,33 @@
 #codec_id 0: rawvideo
 #dimensions 0: 720x576
 #sar 0: 16/15
-0,         18,         18,        1,   622080, 0xc73774f5
-0,         19,         19,        1,   622080, 0x4ea3a400
-0,         20,         20,        1,   622080, 0x95153cda
-0,         21,         21,        1,   622080, 0xec39bf0b
-0,         22,         22,        1,   622080, 0x94b6f836
-0,         23,         23,        1,   622080, 0xc145c3ee
-0,         24,         24,        1,   622080, 0x4d4cdee2
-0,         25,         25,        1,   622080, 0x193ebc7c
-0,         26,         26,        1,   622080, 0xbd728fd8
-0,         27,         27,        1,   622080, 0xf0f3252f
-0,         28,         28,        1,   622080, 0xc012d20a
-0,         29,         29,        1,   622080, 0x7b5831b2
-0,         30,         30,        1,   622080, 0x464e9622
-0,         31,         31,        1,   622080, 0x46e3c6c0
-0,         32,         32,        1,   622080, 0xa6ec908b
-0,         33,         33,        1,   622080, 0x6a257595
-0,         34,         34,        1,   622080, 0xa6552ecc
-0,         35,         35,        1,   622080, 0xdecd1a91
-0,         36,         36,        1,   622080, 0xfaa53e71
-0,         37,         37,        1,   622080, 0xc94a9707
-0,         38,         38,        1,   622080, 0xb5727fd4
-0,         39,         39,        1,   622080, 0x143c018c
-0,         40,         40,        1,   622080, 0x92d110c9
-0,         41,         41,        1,   622080, 0x4f762fc0
-0,         42,         42,        1,   622080, 0x3dd2a7d2
-0,         43,         43,        1,   622080, 0xa5d02dc0
-0,         44,         44,        1,   622080, 0x2223ce3d
-0,         45,         45,        1,   622080, 0xe4a5fc36
-0,         46,         46,        1,   622080, 0x8384159e
-0,         47,         47,        1,   622080, 0x995efa57
+0,         18,         18,        1,   622080, 0x338874e8
+0,         19,         19,        1,   622080, 0x1a9da32b
+0,         20,         20,        1,   622080, 0x2ecc3cd9
+0,         21,         21,        1,   622080, 0x0441beec
+0,         22,         22,        1,   622080, 0x4de3f7ba
+0,         23,         23,        1,   622080, 0x59a4c388
+0,         24,         24,        1,   622080, 0x833ade92
+0,         25,         25,        1,   622080, 0x4c79bbf3
+0,         26,         26,        1,   622080, 0xe1998f77
+0,         27,         27,        1,   622080, 0xd00e2586
+0,         28,         28,        1,   622080, 0xe716d185
+0,         29,         29,        1,   622080, 0x24763136
+0,         30,         30,        1,   622080, 0xaeaa95a2
+0,         31,         31,        1,   622080, 0x92eec65a
+0,         32,         32,        1,   622080, 0x7cde9000
+0,         33,         33,        1,   622080, 0x98e2752c
+0,         34,         34,        1,   622080, 0x5ffe2db6
+0,         35,         35,        1,   622080, 0x1e911a65
+0,         36,         36,        1,   622080, 0x302d3dc2
+0,         37,         37,        1,   622080, 0xc1399647
+0,         38,         38,        1,   622080, 0xc4477ebf
+0,         39,         39,        1,   622080, 0x50e900ca
+0,         40,         40,        1,   622080, 0x867e0f7a
+0,         41,         41,        1,   622080, 0xa2412ebe
+0,         42,         42,        1,   622080, 0xc7a5a7e6
+0,         43,         43,        1,   622080, 0xaa5d2de7
+0,         44,         44,        1,   622080, 0x9bf0ce31
+0,         45,         45,        1,   622080, 0xfb88fbf9
+0,         46,         46,        1,   622080, 0xe6321572
+0,         47,         47,        1,   622080, 0x5541fa37

diff --git a/tests/ref/fate/filter-yadif-mode0 b/tests/ref/fate/filter-yadif-mode0
index 2c6346b..be807f9 100644
--- a/tests/ref/fate/filter-yadif-mode0
+++ b/tests/ref/fate/filter-yadif-mode0

@@ -3,33 +3,33 @@
 #codec_id 0: rawvideo
 #dimensions 0: 720x576
 #sar 0: 16/15
-0,          9,          9,        1,   622080, 0x6331caee
-0,         10,         10,        1,   622080, 0xa459e690
-0,         11,         11,        1,   622080, 0x6429c648
-0,         12,         12,        1,   622080, 0xa49891ca
-0,         13,         13,        1,   622080, 0x2a887404
-0,         14,         14,        1,   622080, 0xe8d49705
-0,         15,         15,        1,   622080, 0x1b627835
-0,         16,         16,        1,   622080, 0x686858fd
-0,         17,         17,        1,   622080, 0x2675174f
-0,         18,         18,        1,   622080, 0x78470e7f
-0,         19,         19,        1,   622080, 0xffb366ec
-0,         20,         20,        1,   622080, 0xd575da72
-0,         21,         21,        1,   622080, 0x5fb297f7
-0,         22,         22,        1,   622080, 0xbac77ca0
-0,         23,         23,        1,   622080, 0x3276ed72
-0,         24,         24,        1,   622080, 0x264092b2
-0,         25,         25,        1,   622080, 0x20ba1094
-0,         26,         26,        1,   622080, 0x76cc3139
-0,         27,         27,        1,   622080, 0x469a4902
-0,         28,         28,        1,   622080, 0x0ed7b8f5
-0,         29,         29,        1,   622080, 0xdc51aeac
-0,         30,         30,        1,   622080, 0xee06aa36
-0,         31,         31,        1,   622080, 0x7372405f
-0,         32,         32,        1,   622080, 0x9e0ee776
-0,         33,         33,        1,   622080, 0x39e6d8c9
-0,         34,         34,        1,   622080, 0x51d9ac9a
-0,         35,         35,        1,   622080, 0x2b63441d
-0,         36,         36,        1,   622080, 0x58afbd5e
-0,         37,         37,        1,   622080, 0xb972f716
-0,         38,         38,        1,   622080, 0x6a6df129
+0,          9,          9,        1,   622080, 0x77c0ca92
+0,         10,         10,        1,   622080, 0xbe7fe646
+0,         11,         11,        1,   622080, 0x4384c5da
+0,         12,         12,        1,   622080, 0x296b9168
+0,         13,         13,        1,   622080, 0x96d5738e
+0,         14,         14,        1,   622080, 0x769b9681
+0,         15,         15,        1,   622080, 0x461d778d
+0,         16,         16,        1,   622080, 0xb88c584b
+0,         17,         17,        1,   622080, 0x7d7b1635
+0,         18,         18,        1,   622080, 0x49c60dc0
+0,         19,         19,        1,   622080, 0x498765a4
+0,         20,         20,        1,   622080, 0x0caed8f9
+0,         21,         21,        1,   622080, 0x41d897d3
+0,         22,         22,        1,   622080, 0x7aeb7c93
+0,         23,         23,        1,   622080, 0xa8bced40
+0,         24,         24,        1,   622080, 0x11de928c
+0,         25,         25,        1,   622080, 0x64741075
+0,         26,         26,        1,   622080, 0x160f310e
+0,         27,         27,        1,   622080, 0x702d489c
+0,         28,         28,        1,   622080, 0xaf2fb8aa
+0,         29,         29,        1,   622080, 0x575bae0f
+0,         30,         30,        1,   622080, 0xfd68a990
+0,         31,         31,        1,   622080, 0x8b513f66
+0,         32,         32,        1,   622080, 0x0e6ae6c3
+0,         33,         33,        1,   622080, 0x3d12d8ab
+0,         34,         34,        1,   622080, 0x45d0ac80
+0,         35,         35,        1,   622080, 0xb18d4421
+0,         36,         36,        1,   622080, 0x2e81bd32
+0,         37,         37,        1,   622080, 0x852cf6cf
+0,         38,         38,        1,   622080, 0xb055f0e5

diff --git a/tests/ref/fate/filter-yadif-mode1 b/tests/ref/fate/filter-yadif-mode1
index e2d14d5..53741b0 100644
--- a/tests/ref/fate/filter-yadif-mode1
+++ b/tests/ref/fate/filter-yadif-mode1

@@ -3,62 +3,62 @@
 #codec_id 0: rawvideo
 #dimensions 0: 720x576
 #sar 0: 16/15
-0,         18,         18,        1,   622080, 0x6331caee
-0,         19,         19,        1,   622080, 0x625da883
-0,         20,         20,        1,   622080, 0xa459e690
-0,         21,         21,        1,   622080, 0xce5d891e
-0,         22,         22,        1,   622080, 0x6429c648
-0,         23,         23,        1,   622080, 0x608cc0ba
-0,         24,         24,        1,   622080, 0xa49891ca
-0,         25,         25,        1,   622080, 0x9721987f
-0,         26,         26,        1,   622080, 0x2a887404
-0,         27,         27,        1,   622080, 0x60d71d47
-0,         28,         28,        1,   622080, 0xe8d49705
-0,         29,         29,        1,   622080, 0x821e13cb
-0,         30,         30,        1,   622080, 0x1b627835
-0,         31,         31,        1,   622080, 0x1806c5f4
-0,         32,         32,        1,   622080, 0x686858fd
-0,         33,         33,        1,   622080, 0xab865773
-0,         34,         34,        1,   622080, 0x2675174f
-0,         35,         35,        1,   622080, 0x43a61a14
-0,         36,         36,        1,   622080, 0x78470e7f
-0,         37,         37,        1,   622080, 0xeb877bc6
-0,         38,         38,        1,   622080, 0xffb366ec
-0,         39,         39,        1,   622080, 0xda0906e7
-0,         40,         40,        1,   622080, 0xd575da72
-0,         41,         41,        1,   622080, 0x23ae25a4
-0,         42,         42,        1,   622080, 0x5fb297f7
-0,         43,         43,        1,   622080, 0x99b32978
-0,         44,         44,        1,   622080, 0xbac77ca0
-0,         45,         45,        1,   622080, 0xc1cdcbf9
-0,         46,         46,        1,   622080, 0x3276ed72
-0,         47,         47,        1,   622080, 0x4061f5ab
-0,         48,         48,        1,   622080, 0x264092b2
-0,         49,         49,        1,   622080, 0xa4e2039e
-0,         50,         50,        1,   622080, 0x20ba1094
-0,         51,         51,        1,   622080, 0x984e906e
-0,         52,         52,        1,   622080, 0x76cc3139
-0,         53,         53,        1,   622080, 0xf70e2cf6
-0,         54,         54,        1,   622080, 0x469a4902
-0,         55,         55,        1,   622080, 0x235312e6
-0,         56,         56,        1,   622080, 0x0ed7b8f5
-0,         57,         57,        1,   622080, 0xd0269cc3
-0,         58,         58,        1,   622080, 0xdc51aeac
-0,         59,         59,        1,   622080, 0x1aa5f76e
-0,         60,         60,        1,   622080, 0xee06aa36
-0,         61,         61,        1,   622080, 0xa7103230
-0,         62,         62,        1,   622080, 0x7372405f
-0,         63,         63,        1,   622080, 0x8d7a44b5
-0,         64,         64,        1,   622080, 0x9e0ee776
-0,         65,         65,        1,   622080, 0xd41e8560
-0,         66,         66,        1,   622080, 0x39e6d8c9
-0,         67,         67,        1,   622080, 0x7a23d70c
-0,         68,         68,        1,   622080, 0x51d9ac9a
-0,         69,         69,        1,   622080, 0x8eacf7f2
-0,         70,         70,        1,   622080, 0x2b63441d
-0,         71,         71,        1,   622080, 0x9f71b742
-0,         72,         72,        1,   622080, 0x58afbd5e
-0,         73,         73,        1,   622080, 0x4d645292
-0,         74,         74,        1,   622080, 0xb972f716
-0,         75,         75,        1,   622080, 0xbb5d01a2
-0,         76,         76,        1,   622080, 0x6a6df129
+0,         18,         18,        1,   622080, 0x77c0ca92
+0,         19,         19,        1,   622080, 0x06d3a822
+0,         20,         20,        1,   622080, 0xbe7fe646
+0,         21,         21,        1,   622080, 0x542e891a
+0,         22,         22,        1,   622080, 0x4384c5da
+0,         23,         23,        1,   622080, 0xddd4c056
+0,         24,         24,        1,   622080, 0x296b9168
+0,         25,         25,        1,   622080, 0xf8e09812
+0,         26,         26,        1,   622080, 0x96d5738e
+0,         27,         27,        1,   622080, 0xac341d9b
+0,         28,         28,        1,   622080, 0x769b9681
+0,         29,         29,        1,   622080, 0xb5da1354
+0,         30,         30,        1,   622080, 0x461d778d
+0,         31,         31,        1,   622080, 0xd9dcc5a8
+0,         32,         32,        1,   622080, 0xb88c584b
+0,         33,         33,        1,   622080, 0x581b5727
+0,         34,         34,        1,   622080, 0x7d7b1635
+0,         35,         35,        1,   622080, 0xfc1b1a12
+0,         36,         36,        1,   622080, 0x49c60dc0
+0,         37,         37,        1,   622080, 0x1d537b22
+0,         38,         38,        1,   622080, 0x498765a4
+0,         39,         39,        1,   622080, 0xdd2e063a
+0,         40,         40,        1,   622080, 0x0caed8f9
+0,         41,         41,        1,   622080, 0xfb4a24aa
+0,         42,         42,        1,   622080, 0x41d897d3
+0,         43,         43,        1,   622080, 0xdd3f29ae
+0,         44,         44,        1,   622080, 0x7aeb7c93
+0,         45,         45,        1,   622080, 0x2410cbe2
+0,         46,         46,        1,   622080, 0xa8bced40
+0,         47,         47,        1,   622080, 0x5534f5ca
+0,         48,         48,        1,   622080, 0x11de928c
+0,         49,         49,        1,   622080, 0x82180322
+0,         50,         50,        1,   622080, 0x64741075
+0,         51,         51,        1,   622080, 0x5e048fc8
+0,         52,         52,        1,   622080, 0x160f310e
+0,         53,         53,        1,   622080, 0x4f6d2ce7
+0,         54,         54,        1,   622080, 0x702d489c
+0,         55,         55,        1,   622080, 0xa4b41315
+0,         56,         56,        1,   622080, 0xaf2fb8aa
+0,         57,         57,        1,   622080, 0x5ec09c25
+0,         58,         58,        1,   622080, 0x575bae0f
+0,         59,         59,        1,   622080, 0x94ecf775
+0,         60,         60,        1,   622080, 0xfd68a990
+0,         61,         61,        1,   622080, 0x15a7315c
+0,         62,         62,        1,   622080, 0x8b513f66
+0,         63,         63,        1,   622080, 0xeba9440a
+0,         64,         64,        1,   622080, 0x0e6ae6c3
+0,         65,         65,        1,   622080, 0x751484a6
+0,         66,         66,        1,   622080, 0x3d12d8ab
+0,         67,         67,        1,   622080, 0xdff3d681
+0,         68,         68,        1,   622080, 0x45d0ac80
+0,         69,         69,        1,   622080, 0xbe2df7f7
+0,         70,         70,        1,   622080, 0xb18d4421
+0,         71,         71,        1,   622080, 0xa49cb6de
+0,         72,         72,        1,   622080, 0x2e81bd32
+0,         73,         73,        1,   622080, 0xa47a5272
+0,         74,         74,        1,   622080, 0x852cf6cf
+0,         75,         75,        1,   622080, 0x892a014e
+0,         76,         76,        1,   622080, 0xb055f0e5

diff --git a/tests/ref/fate/filter-yadif10 b/tests/ref/fate/filter-yadif10
index 09ab745..28e799f 100644
--- a/tests/ref/fate/filter-yadif10
+++ b/tests/ref/fate/filter-yadif10

@@ -3,33 +3,33 @@
 #codec_id 0: rawvideo
 #dimensions 0: 720x576
 #sar 0: 16/15
-0,          9,          9,        1,  1244160, 0x5b49e0c0
-0,         10,         10,        1,  1244160, 0x76ba6bab
-0,         11,         11,        1,  1244160, 0x0298cb8d
-0,         12,         12,        1,  1244160, 0x9c81759a
-0,         13,         13,        1,  1244160, 0xa239d1ae
-0,         14,         14,        1,  1244160, 0x3e95ada9
-0,         15,         15,        1,  1244160, 0x8b87e8f8
-0,         16,         16,        1,  1244160, 0x64f89653
-0,         17,         17,        1,  1244160, 0x58e5d12e
-0,         18,         18,        1,  1244160, 0x38b4003a
-0,         19,         19,        1,  1244160, 0xc005c29c
-0,         20,         20,        1,  1244160, 0x10c0c60d
-0,         21,         21,        1,  1244160, 0x1b550998
-0,         22,         22,        1,  1244160, 0x7aacf6ab
-0,         23,         23,        1,  1244160, 0xeb205d98
-0,         24,         24,        1,  1244160, 0x6ad2134c
-0,         25,         25,        1,  1244160, 0x8aea4e56
-0,         26,         26,        1,  1244160, 0x0d910a6b
-0,         27,         27,        1,  1244160, 0x749ae307
-0,         28,         28,        1,  1244160, 0x8ff7af3c
-0,         29,         29,        1,  1244160, 0x9ba51b91
-0,         30,         30,        1,  1244160, 0xad476514
-0,         31,         31,        1,  1244160, 0x674481d6
-0,         32,         32,        1,  1244160, 0x0937e677
-0,         33,         33,        1,  1244160, 0x6c2c53ee
-0,         34,         34,        1,  1244160, 0x524a164e
-0,         35,         35,        1,  1244160, 0x77a405ab
-0,         36,         36,        1,  1244160, 0xaa6b47c4
-0,         37,         37,        1,  1244160, 0x0b5ab556
-0,         38,         38,        1,  1244160, 0xbe1edab9
+0,          9,          9,        1,  1244160, 0xe0c2231b
+0,         10,         10,        1,  1244160, 0xdc7caa43
+0,         11,         11,        1,  1244160, 0x52c4dfbf
+0,         12,         12,        1,  1244160, 0x7c577f07
+0,         13,         13,        1,  1244160, 0x5b6ad7ce
+0,         14,         14,        1,  1244160, 0x6f15ce76
+0,         15,         15,        1,  1244160, 0xf120034a
+0,         16,         16,        1,  1244160, 0x9c65ba64
+0,         17,         17,        1,  1244160, 0x883b237e
+0,         18,         18,        1,  1244160, 0xb8292e0d
+0,         19,         19,        1,  1244160, 0xbc392721
+0,         20,         20,        1,  1244160, 0x7cd82ec9
+0,         21,         21,        1,  1244160, 0x167325eb
+0,         22,         22,        1,  1244160, 0x49bafa73
+0,         23,         23,        1,  1244160, 0xe1ff6dbf
+0,         24,         24,        1,  1244160, 0x85f710b6
+0,         25,         25,        1,  1244160, 0xd1fd4cdb
+0,         26,         26,        1,  1244160, 0xafee03c5
+0,         27,         27,        1,  1244160, 0x566be070
+0,         28,         28,        1,  1244160, 0xb6abbd01
+0,         29,         29,        1,  1244160, 0xa98f38fd
+0,         30,         30,        1,  1244160, 0x00f4736b
+0,         31,         31,        1,  1244160, 0x6b0f9dd2
+0,         32,         32,        1,  1244160, 0x15810b92
+0,         33,         33,        1,  1244160, 0x0b516465
+0,         34,         34,        1,  1244160, 0x927d15e6
+0,         35,         35,        1,  1244160, 0xd102f2bf
+0,         36,         36,        1,  1244160, 0xdd8b3b20
+0,         37,         37,        1,  1244160, 0x229ac529
+0,         38,         38,        1,  1244160, 0xf844e0a2

diff --git a/tests/ref/fate/filter-yadif16 b/tests/ref/fate/filter-yadif16
index 3386b02..0c856ab 100644
--- a/tests/ref/fate/filter-yadif16
+++ b/tests/ref/fate/filter-yadif16

@@ -3,33 +3,33 @@
 #codec_id 0: rawvideo
 #dimensions 0: 720x576
 #sar 0: 16/15
-0,          9,          9,        1,  1244160, 0xfb65caee
-0,         10,         10,        1,  1244160, 0x6222e690
-0,         11,         11,        1,  1244160, 0x020ac648
-0,         12,         12,        1,  1244160, 0xb76691ca
-0,         13,         13,        1,  1244160, 0xe0fd7404
-0,         14,         14,        1,  1244160, 0x3ab29705
-0,         15,         15,        1,  1244160, 0xbe807835
-0,         16,         16,        1,  1244160, 0x77d358fd
-0,         17,         17,        1,  1244160, 0x359b174f
-0,         18,         18,        1,  1244160, 0xe20f0e7f
-0,         19,         19,        1,  1244160, 0x988966ec
-0,         20,         20,        1,  1244160, 0xd078da72
-0,         21,         21,        1,  1244160, 0x276d97f7
-0,         22,         22,        1,  1244160, 0xf8ee7ca0
-0,         23,         23,        1,  1244160, 0x776bed72
-0,         24,         24,        1,  1244160, 0xb9bf92b2
-0,         25,         25,        1,  1244160, 0x30e01094
-0,         26,         26,        1,  1244160, 0xbc5f3139
-0,         27,         27,        1,  1244160, 0x44324902
-0,         28,         28,        1,  1244160, 0x64aab8f5
-0,         29,         29,        1,  1244160, 0x0a05aeac
-0,         30,         30,        1,  1244160, 0x31e5aa36
-0,         31,         31,        1,  1244160, 0xa685405f
-0,         32,         32,        1,  1244160, 0x54a6e776
-0,         33,         33,        1,  1244160, 0x9af4d8c9
-0,         34,         34,        1,  1244160, 0xf709ac9a
-0,         35,         35,        1,  1244160, 0x12a9441d
-0,         36,         36,        1,  1244160, 0xf3f1bd5e
-0,         37,         37,        1,  1244160, 0x7bcef716
-0,         38,         38,        1,  1244160, 0xe3a2f129
+0,          9,          9,        1,  1244160, 0x24eeca92
+0,         10,         10,        1,  1244160, 0x96b8e646
+0,         11,         11,        1,  1244160, 0xc11fc5da
+0,         12,         12,        1,  1244160, 0xc15f9168
+0,         13,         13,        1,  1244160, 0xba1c738e
+0,         14,         14,        1,  1244160, 0x56b59681
+0,         15,         15,        1,  1244160, 0x14ad778d
+0,         16,         16,        1,  1244160, 0x18dc584b
+0,         17,         17,        1,  1244160, 0xe4c11635
+0,         18,         18,        1,  1244160, 0x85cc0dc0
+0,         19,         19,        1,  1244160, 0x2d6a65a4
+0,         20,         20,        1,  1244160, 0x4054d8f9
+0,         21,         21,        1,  1244160, 0xebce97d3
+0,         22,         22,        1,  1244160, 0x79437c93
+0,         23,         23,        1,  1244160, 0x6438ed40
+0,         24,         24,        1,  1244160, 0x9121928c
+0,         25,         25,        1,  1244160, 0xb8731075
+0,         26,         26,        1,  1244160, 0xfb01310e
+0,         27,         27,        1,  1244160, 0x97be489c
+0,         28,         28,        1,  1244160, 0xa5b4b8aa
+0,         29,         29,        1,  1244160, 0x00a7ae0f
+0,         30,         30,        1,  1244160, 0x514fa990
+0,         31,         31,        1,  1244160, 0xd73c3f66
+0,         32,         32,        1,  1244160, 0x3602e6c3
+0,         33,         33,        1,  1244160, 0xa16ad8ab
+0,         34,         34,        1,  1244160, 0xdf11ac80
+0,         35,         35,        1,  1244160, 0x1f084421
+0,         36,         36,        1,  1244160, 0x9fc1bd32
+0,         37,         37,        1,  1244160, 0x1389f6cf
+0,         38,         38,        1,  1244160, 0x6fc5f0e5

diff --git a/tests/ref/fate/fitsdec-gbrap16 b/tests/ref/fate/fitsdec-gbrap16le
similarity index 100%
rename from tests/ref/fate/fitsdec-gbrap16
rename to tests/ref/fate/fitsdec-gbrap16le


diff --git a/tests/ref/fate/fmvc-type1 b/tests/ref/fate/fmvc-type1
new file mode 100644
index 0000000..d59de04
--- /dev/null
+++ b/tests/ref/fate/fmvc-type1

@@ -0,0 +1,11 @@
+#tb 0: 1/15
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 664x382
+#sar 0: 0/1
+0,          0,          0,        1,   760944, 0x2feb1453
+0,          1,          1,        1,   760944, 0x9677ebf8
+0,          2,          2,        1,   760944, 0x83d2ed49
+0,          3,          3,        1,   760944, 0x9ab6c63d
+0,          4,          4,        1,   760944, 0x1820189c
+0,          5,          5,        1,   760944, 0x4b94f521

diff --git a/tests/ref/fate/fmvc-type2 b/tests/ref/fate/fmvc-type2
new file mode 100644
index 0000000..60c6308
--- /dev/null
+++ b/tests/ref/fate/fmvc-type2

@@ -0,0 +1,33 @@
+#tb 0: 1/10
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 768x576
+#sar 0: 0/1
+0,          0,          0,        1,  1327104, 0xa73c232d
+0,          1,          1,        1,  1327104, 0xa73c232d
+0,          2,          2,        1,  1327104, 0xa73c232d
+0,          3,          3,        1,  1327104, 0xf14e05be
+0,          4,          4,        1,  1327104, 0xd8c03726
+0,          5,          5,        1,  1327104, 0x16e8447a
+0,          6,          6,        1,  1327104, 0x8dfd94d2
+0,          7,          7,        1,  1327104, 0x3550833b
+0,          8,          8,        1,  1327104, 0x74bd2959
+0,          9,          9,        1,  1327104, 0xc2b4505c
+0,         10,         10,        1,  1327104, 0x7c6999cd
+0,         11,         11,        1,  1327104, 0xb6562711
+0,         12,         12,        1,  1327104, 0x50993565
+0,         13,         13,        1,  1327104, 0x579549d2
+0,         14,         14,        1,  1327104, 0xb17170f6
+0,         15,         15,        1,  1327104, 0xc7d87708
+0,         16,         16,        1,  1327104, 0xdb3879da
+0,         17,         17,        1,  1327104, 0xdb3879da
+0,         18,         18,        1,  1327104, 0xdb3879da
+0,         19,         19,        1,  1327104, 0x6f1c5c01
+0,         20,         20,        1,  1327104, 0xd9d15c01
+0,         21,         21,        1,  1327104, 0x826f0bab
+0,         22,         22,        1,  1327104, 0xf25b041d
+0,         23,         23,        1,  1327104, 0xc85a041d
+0,         24,         24,        1,  1327104, 0xc85a041d
+0,         25,         25,        1,  1327104, 0xc85a041d
+0,         26,         26,        1,  1327104, 0xc85a041d
+0,         27,         27,        1,  1327104, 0xc54cc6ae

diff --git a/tests/ref/fate/h264-bsf-mp4toannexb b/tests/ref/fate/h264-bsf-mp4toannexb
index 2049f39..7cd086a 100644
--- a/tests/ref/fate/h264-bsf-mp4toannexb
+++ b/tests/ref/fate/h264-bsf-mp4toannexb

@@ -1 +1 @@
-5f04c27cc6ee8625fe2405fb0f7da9a3
+f340e7ca9a46d437af4e96f6c8de221c

diff --git a/tests/ref/fate/h264-ref-pic-mod-overflow b/tests/ref/fate/h264-ref-pic-mod-overflow
new file mode 100644
index 0000000..d4386c0
--- /dev/null
+++ b/tests/ref/fate/h264-ref-pic-mod-overflow

@@ -0,0 +1,25 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 1920x1080
+#sar 0: 1/1
+0,          0,          0,        1,  3110400, 0x5cd0bee9
+0,          1,          1,        1,  3110400, 0x19d0155c
+0,          2,          2,        1,  3110400, 0x3519be4b
+0,          3,          3,        1,  3110400, 0x87dc6708
+0,          4,          4,        1,  3110400, 0xef3c1056
+0,          5,          5,        1,  3110400, 0x5064aad2
+0,          6,          6,        1,  3110400, 0xbfb35057
+0,          7,          7,        1,  3110400, 0x6089eb95
+0,          8,          8,        1,  3110400, 0x85de8dea
+0,          9,          9,        1,  3110400, 0x216b1e5a
+0,         10,         10,        1,  3110400, 0xba73bdc4
+0,         11,         11,        1,  3110400, 0x536e437a
+0,         12,         12,        1,  3110400, 0x4be0cdbd
+0,         13,         13,        1,  3110400, 0x7316462d
+0,         14,         14,        1,  3110400, 0x80eb8622
+0,         15,         15,        1,  3110400, 0xa612a70b
+0,         16,         16,        1,  3110400, 0x9ff65345
+0,         17,         17,        1,  3110400, 0x57003dfa
+0,         18,         18,        1,  3110400, 0xf29e036e
+0,         19,         19,        1,  3110400, 0x70285363

diff --git a/tests/ref/fate/h264_mp4toannexb_ticket2991 b/tests/ref/fate/h264_mp4toannexb_ticket2991
index 76bdf3c..3245ef4 100644
--- a/tests/ref/fate/h264_mp4toannexb_ticket2991
+++ b/tests/ref/fate/h264_mp4toannexb_ticket2991

@@ -1,12 +1,12 @@
-05d66e60ab22ee004720e0051af0fe74 *tests/data/fate/h264_mp4toannexb_ticket2991.h264
-1985815 tests/data/fate/h264_mp4toannexb_ticket2991.h264
-#extradata 0:       47, 0x3a590d55
+dba672c154b41414cf26aae967c27eef *tests/data/fate/h264_mp4toannexb_ticket2991.h264
+1985823 tests/data/fate/h264_mp4toannexb_ticket2991.h264
+#extradata 0:       48, 0x47ae0d55
 #tb 0: 1/1200000
 #media_type 0: video
 #codec_id 0: h264
 #dimensions 0: 1280x720
 #sar 0: 3/4
-0,          0,          0,    48000,    37126, 0xb020184c
+0,          0,          0,    48000,    37127, 0xc125184c
 0,      48000,      48000,    40040,     6920, 0x8512361a, F=0x0
 0,      88040,      88040,    40040,     7550, 0x1bc56ed4, F=0x0
 0,     128081,     128081,    40040,     8752, 0xb8c6f0a1, F=0x0
@@ -21,7 +21,7 @@
 0,     488444,     488444,    40040,    11234, 0x83cbd9fd, F=0x0
 0,     528485,     528485,    40040,    17616, 0xfdf95104, F=0x0
 0,     568525,     568525,    40040,    10689, 0x9633d32b, F=0x0
-0,     608566,     608566,    40040,    45291, 0x543c2cf6
+0,     608566,     608566,    40040,    45292, 0x66dd2cf6
 0,     648606,     648606,    40040,    20837, 0x051abfab, F=0x0
 0,     688646,     688646,    40040,    21418, 0xe2a59d70, F=0x0
 0,     728687,     728687,    40040,    15643, 0x15cf2cec, F=0x0
@@ -36,7 +36,7 @@
 0,    1089050,    1089050,    40040,    13130, 0xcbb6bb8e, F=0x0
 0,    1129091,    1129091,    40040,    16180, 0x5d188a7a, F=0x0
 0,    1169131,    1169131,    40040,    14961, 0x9ff2f463, F=0x0
-0,    1209172,    1209172,    40040,    54296, 0xe6ec30ed
+0,    1209172,    1209172,    40040,    54297, 0xf98d30ed
 0,    1249212,    1249212,    40040,    11500, 0x8c4852c9, F=0x0
 0,    1289252,    1289252,    40040,    12065, 0xfb7954c3, F=0x0
 0,    1329293,    1329293,    40040,    12532, 0xf0a935d3, F=0x0
@@ -51,7 +51,7 @@
 0,    1689656,    1689656,    40040,    13250, 0xfed0deb8, F=0x0
 0,    1729697,    1729697,    40040,    13360, 0xbf92d476, F=0x0
 0,    1769737,    1769737,    40040,    11749, 0x3041eaf1, F=0x0
-0,    1809778,    1809778,    40040,    23997, 0xdbe6d5c4
+0,    1809778,    1809778,    40040,    23998, 0xee87d5c4
 0,    1849818,    1849818,    40040,    16065, 0xe8f715b7, F=0x0
 0,    1889858,    1889858,    40040,    16441, 0x0a4e060f, F=0x0
 0,    1929899,    1929899,    40040,    17395, 0xa8edecc2, F=0x0
@@ -66,7 +66,7 @@
 0,    2290262,    2290262,    40040,    13748, 0xed26aeb4, F=0x0
 0,    2330303,    2330303,    40040,    15092, 0x3c983538, F=0x0
 0,    2370343,    2370343,    40040,    14636, 0x9b278a6c, F=0x0
-0,    2410384,    2410384,    40040,    29134, 0xf784be18
+0,    2410384,    2410384,    40040,    29135, 0x0a34be18
 0,    2450424,    2450424,    40040,    10232, 0x5408e15b, F=0x0
 0,    2490464,    2490464,    40040,     9769, 0xc93cb7f9, F=0x0
 0,    2530505,    2530505,    40040,    14454, 0x45230dbe, F=0x0
@@ -81,7 +81,7 @@
 0,    2890868,    2890868,    40040,    14801, 0x40bae016, F=0x0
 0,    2930909,    2930909,    40040,    17303, 0x9ce1fd31, F=0x0
 0,    2970949,    2970949,    40040,    17678, 0x9bd66141, F=0x0
-0,    3010990,    3010990,    40040,    48672, 0x3215ce46
+0,    3010990,    3010990,    40040,    48673, 0x44b6ce46
 0,    3051030,    3051030,    40040,    11894, 0x12e1fece, F=0x0
 0,    3091070,    3091070,    40040,    16514, 0xc57aed05, F=0x0
 0,    3131111,    3131111,    40040,    13044, 0x61914fa0, F=0x0
@@ -96,7 +96,7 @@
 0,    3491474,    3491474,    40040,    12208, 0x81a587c0, F=0x0
 0,    3531515,    3531515,    40040,    14709, 0x5dffbe04, F=0x0
 0,    3571555,    3571555,    40040,    14390, 0xbfd1e041, F=0x0
-0,    3611596,    3611596,    40040,    37236, 0xe7f924b1
+0,    3611596,    3611596,    40040,    37237, 0xfa9a24b1
 0,    3651636,    3651636,    40040,    14056, 0x24714c7c, F=0x0
 0,    3691676,    3691676,    40040,    19438, 0x0c50dcd5, F=0x0
 0,    3731717,    3731717,    40040,    21728, 0x7eea4a11, F=0x0
@@ -111,7 +111,7 @@
 0,    4092080,    4092080,    40040,    16878, 0x98efbae2, F=0x0
 0,    4132121,    4132121,    40040,    14685, 0x1bf78d65, F=0x0
 0,    4172161,    4172161,    40040,    13127, 0x0b91881d, F=0x0
-0,    4212202,    4212202,    40040,    29390, 0xf6a5ed6b
+0,    4212202,    4212202,    40040,    29391, 0x0955ed6b
 0,    4252242,    4252242,    40040,    12576, 0xe9845ded, F=0x0
 0,    4292282,    4292282,    40040,    12599, 0x96a79ab8, F=0x0
 0,    4332323,    4332323,    40040,    16134, 0xb4c36d3f, F=0x0

diff --git a/tests/ref/fate/h264_mp4toannexb_ticket5927 b/tests/ref/fate/h264_mp4toannexb_ticket5927
index 95e35c4..006ea39 100644
--- a/tests/ref/fate/h264_mp4toannexb_ticket5927
+++ b/tests/ref/fate/h264_mp4toannexb_ticket5927

@@ -1,12 +1,12 @@
-a3b02fd09392e01619cebc959d4d9ff2 *tests/data/fate/h264_mp4toannexb_ticket5927.h264
-595583 tests/data/fate/h264_mp4toannexb_ticket5927.h264
-#extradata 0:       33, 0x84fe08f8
+562487bfea635cdadbc23d390322b589 *tests/data/fate/h264_mp4toannexb_ticket5927.h264
+595585 tests/data/fate/h264_mp4toannexb_ticket5927.h264
+#extradata 0:       34, 0x8df608f8
 #tb 0: 1/1200000
 #media_type 0: video
 #codec_id 0: h264
 #dimensions 0: 1920x1080
 #sar 0: 0/1
-0,     -48000, -9223372036854775808,    48000,   247993, 0x1ce821ea
+0,     -48000, -9223372036854775808,    48000,   247994, 0x2e1e21ea
 0,          0, -9223372036854775808,    48000,    43354, 0xa05dca6f, F=0x0
 0,      48000, -9223372036854775808,    48000,    11423, 0x5e8086dd, F=0x0
 0,      96000, -9223372036854775808,    48000,    50798, 0x145fbe4f, F=0x0
@@ -18,4 +18,4 @@
 0,     384000, -9223372036854775808,    48000,    54483, 0xefead99f, F=0x0
 0,     432000, -9223372036854775808,    48000,    13705, 0x23cd27e8, F=0x0
 0,     480000, -9223372036854775808,    48000,    22308, 0x4093b5af, F=0x0
-0,     528000, -9223372036854775808,    48000,     6369, 0x858b2aa1
+0,     528000, -9223372036854775808,    48000,     6370, 0x96c12aa1

diff --git a/tests/ref/fate/h264_mp4toannexb_ticket5927_2 b/tests/ref/fate/h264_mp4toannexb_ticket5927_2
index 8db6a7e..51432b1 100644
--- a/tests/ref/fate/h264_mp4toannexb_ticket5927_2
+++ b/tests/ref/fate/h264_mp4toannexb_ticket5927_2

@@ -1,12 +1,12 @@
-a3b02fd09392e01619cebc959d4d9ff2 *tests/data/fate/h264_mp4toannexb_ticket5927_2.h264
-595583 tests/data/fate/h264_mp4toannexb_ticket5927_2.h264
-#extradata 0:       33, 0x84fe08f8
+562487bfea635cdadbc23d390322b589 *tests/data/fate/h264_mp4toannexb_ticket5927_2.h264
+595585 tests/data/fate/h264_mp4toannexb_ticket5927_2.h264
+#extradata 0:       34, 0x8df608f8
 #tb 0: 1/1200000
 #media_type 0: video
 #codec_id 0: h264
 #dimensions 0: 1920x1080
 #sar 0: 0/1
-0,     -48000, -9223372036854775808,    48000,   247993, 0x1ce821ea
+0,     -48000, -9223372036854775808,    48000,   247994, 0x2e1e21ea
 0,          0, -9223372036854775808,    48000,    43354, 0xa05dca6f, F=0x0
 0,      48000, -9223372036854775808,    48000,    11423, 0x5e8086dd, F=0x0
 0,      96000, -9223372036854775808,    48000,    50798, 0x145fbe4f, F=0x0
@@ -18,4 +18,4 @@
 0,     384000, -9223372036854775808,    48000,    54483, 0xefead99f, F=0x0
 0,     432000, -9223372036854775808,    48000,    13705, 0x23cd27e8, F=0x0
 0,     480000, -9223372036854775808,    48000,    22308, 0x4093b5af, F=0x0
-0,     528000, -9223372036854775808,    48000,     6369, 0x858b2aa1
+0,     528000, -9223372036854775808,    48000,     6370, 0x96c12aa1

diff --git a/tests/ref/fate/hap-alpha-only-nosnappy-128x72 b/tests/ref/fate/hap-alpha-only-nosnappy-128x72
new file mode 100644
index 0000000..d9b1b56
--- /dev/null
+++ b/tests/ref/fate/hap-alpha-only-nosnappy-128x72

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 128x72
+#sar 0: 1/1
+0,          0,          0,        1,     9216, 0x5967036f

diff --git a/tests/ref/fate/hap-alpha-only-snappy-127x71 b/tests/ref/fate/hap-alpha-only-snappy-127x71
new file mode 100644
index 0000000..1559204
--- /dev/null
+++ b/tests/ref/fate/hap-alpha-only-snappy-127x71

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 127x71
+#sar 0: 1/1
+0,          0,          0,        1,     9017, 0xc390c38c

diff --git a/tests/ref/fate/hapenc-hap-none b/tests/ref/fate/hapenc-hap-none
new file mode 100644
index 0000000..22c8b2c
--- /dev/null
+++ b/tests/ref/fate/hapenc-hap-none

@@ -0,0 +1,14 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: hap
+#dimensions 0: 352x288
+#sar 0: 0/1
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,    50696, 7fd5bc08f7b96326953ba6926cb06109
+0,          1,          1,        1,    50696, 24d172f2ea03994add2596a48151fca8
+0,          2,          2,        1,    50696, e5f2bac02ad850eb769007694a2f7acc
+0,          3,          3,        1,    50696, 26042fbce3868ad012b0b4557fd95d8a
+0,          4,          4,        1,    50696, 80902dd47d1211f1cd388652d5e0a797

diff --git a/tests/ref/fate/hapenc-hapa-none b/tests/ref/fate/hapenc-hapa-none
new file mode 100644
index 0000000..9aa80ae
--- /dev/null
+++ b/tests/ref/fate/hapenc-hapa-none

@@ -0,0 +1,14 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: hap
+#dimensions 0: 352x288
+#sar 0: 0/1
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   101384, 7bd1e603a54fb468242c9e00b0a90359
+0,          1,          1,        1,   101384, 09bf0e48db9bf465f3d89a192ddfda2d
+0,          2,          2,        1,   101384, 4993a96ed5f5d2bc0f0ff2080622eed2
+0,          3,          3,        1,   101384, 5cdbd0f763ba40366816588160ec73c2
+0,          4,          4,        1,   101384, 9202fe8358efde2c92c75210ed93c118

diff --git a/tests/ref/fate/hapenc-hapq-none b/tests/ref/fate/hapenc-hapq-none
new file mode 100644
index 0000000..ee467b6
--- /dev/null
+++ b/tests/ref/fate/hapenc-hapq-none

@@ -0,0 +1,14 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: hap
+#dimensions 0: 352x288
+#sar 0: 0/1
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   101384, cb6ef787ae7adff965ba4904a57c2188
+0,          1,          1,        1,   101384, 7cd50676c256354c06f728a265556efd
+0,          2,          2,        1,   101384, 454b14f7770a1f57830a678ea6e24b86
+0,          3,          3,        1,   101384, 6e6c088ed068d05870976fe78a75bb30
+0,          4,          4,        1,   101384, ba79bd8e7fc5d8919b174f0c8ea82753

diff --git a/tests/ref/fate/hapqa-extract-nosnappy-to-hapalphaonly-mov b/tests/ref/fate/hapqa-extract-nosnappy-to-hapalphaonly-mov
new file mode 100644
index 0000000..eacb1f1
--- /dev/null
+++ b/tests/ref/fate/hapqa-extract-nosnappy-to-hapalphaonly-mov

@@ -0,0 +1,72 @@
+[PACKET]
+codec_type=video
+stream_index=0
+pts=0
+pts_time=0.000000
+dts=0
+dts_time=0.000000
+duration=512
+duration_time=0.040000
+convergence_duration=N/A
+convergence_duration_time=N/A
+size=4612
+pos=36
+flags=K_
+data_hash=adler32:ed83c166
+[/PACKET]
+[STREAM]
+index=0
+codec_name=hap
+profile=unknown
+codec_type=video
+codec_time_base=1/25
+codec_tag_string=HapA
+codec_tag=0x41706148
+width=127
+height=71
+coded_width=128
+coded_height=72
+has_b_frames=0
+sample_aspect_ratio=1:1
+display_aspect_ratio=127:71
+pix_fmt=gray
+level=-99
+color_range=unknown
+color_space=unknown
+color_transfer=unknown
+color_primaries=unknown
+chroma_location=unspecified
+field_order=unknown
+timecode=N/A
+refs=1
+id=N/A
+r_frame_rate=25/1
+avg_frame_rate=25/1
+time_base=1/12800
+start_pts=0
+start_time=0.000000
+duration_ts=512
+duration=0.040000
+bit_rate=922400
+max_bit_rate=N/A
+bits_per_raw_sample=N/A
+nb_frames=1
+nb_read_frames=N/A
+nb_read_packets=1
+extradata_hash=adler32:00000001
+DISPOSITION:default=1
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+TAG:language=eng
+TAG:handler_name=Module de gestion video
+TAG:encoder=HAPAlpha Only
+[/STREAM]

diff --git a/tests/ref/fate/hapqa-extract-nosnappy-to-hapq-mov b/tests/ref/fate/hapqa-extract-nosnappy-to-hapq-mov
new file mode 100644
index 0000000..ff96888
--- /dev/null
+++ b/tests/ref/fate/hapqa-extract-nosnappy-to-hapq-mov

@@ -0,0 +1,72 @@
+[PACKET]
+codec_type=video
+stream_index=0
+pts=0
+pts_time=0.000000
+dts=0
+dts_time=0.000000
+duration=512
+duration_time=0.040000
+convergence_duration=N/A
+convergence_duration_time=N/A
+size=9220
+pos=36
+flags=K_
+data_hash=adler32:b3ccc147
+[/PACKET]
+[STREAM]
+index=0
+codec_name=hap
+profile=unknown
+codec_type=video
+codec_time_base=1/25
+codec_tag_string=HapY
+codec_tag=0x59706148
+width=127
+height=71
+coded_width=128
+coded_height=72
+has_b_frames=0
+sample_aspect_ratio=1:1
+display_aspect_ratio=127:71
+pix_fmt=rgb0
+level=-99
+color_range=unknown
+color_space=unknown
+color_transfer=unknown
+color_primaries=unknown
+chroma_location=unspecified
+field_order=unknown
+timecode=N/A
+refs=1
+id=N/A
+r_frame_rate=25/1
+avg_frame_rate=25/1
+time_base=1/12800
+start_pts=0
+start_time=0.000000
+duration_ts=512
+duration=0.040000
+bit_rate=1844000
+max_bit_rate=N/A
+bits_per_raw_sample=N/A
+nb_frames=1
+nb_read_frames=N/A
+nb_read_packets=1
+extradata_hash=adler32:00000001
+DISPOSITION:default=1
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+TAG:language=eng
+TAG:handler_name=Module de gestion video
+TAG:encoder=HAPQ
+[/STREAM]

diff --git a/tests/ref/fate/hapqa-extract-snappy1-to-hapalphaonly b/tests/ref/fate/hapqa-extract-snappy1-to-hapalphaonly
new file mode 100644
index 0000000..9ab123f
--- /dev/null
+++ b/tests/ref/fate/hapqa-extract-snappy1-to-hapalphaonly

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: hap
+#dimensions 0: 127x71
+#sar 0: 1/1
+0,          0,          0,        1,     3044, 0xcaf6ddd0

diff --git a/tests/ref/fate/hapqa-extract-snappy1-to-hapq b/tests/ref/fate/hapqa-extract-snappy1-to-hapq
new file mode 100644
index 0000000..f658b1c
--- /dev/null
+++ b/tests/ref/fate/hapqa-extract-snappy1-to-hapq

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: hap
+#dimensions 0: 127x71
+#sar 0: 1/1
+0,          0,          0,        1,     8217, 0x04271f0f

diff --git a/tests/ref/fate/hapqa-extract-snappy16-to-hapalphaonly b/tests/ref/fate/hapqa-extract-snappy16-to-hapalphaonly
new file mode 100644
index 0000000..1bd9206
--- /dev/null
+++ b/tests/ref/fate/hapqa-extract-snappy16-to-hapalphaonly

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: hap
+#dimensions 0: 127x71
+#sar 0: 1/1
+0,          0,          0,        1,     3513, 0x69c7014f

diff --git a/tests/ref/fate/hapqa-extract-snappy16-to-hapq b/tests/ref/fate/hapqa-extract-snappy16-to-hapq
new file mode 100644
index 0000000..8334d53
--- /dev/null
+++ b/tests/ref/fate/hapqa-extract-snappy16-to-hapq

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: hap
+#dimensions 0: 127x71
+#sar 0: 1/1
+0,          0,          0,        1,     8726, 0xf889691c

diff --git a/tests/ref/fate/hapqa-nosnappy-127x71 b/tests/ref/fate/hapqa-nosnappy-127x71
new file mode 100644
index 0000000..36069b4
--- /dev/null
+++ b/tests/ref/fate/hapqa-nosnappy-127x71

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 127x71
+#sar 0: 1/1
+0,          0,          0,        1,    36068, 0x16c0a011

diff --git a/tests/ref/fate/hapqa-snappy1-127x71 b/tests/ref/fate/hapqa-snappy1-127x71
new file mode 100644
index 0000000..36069b4
--- /dev/null
+++ b/tests/ref/fate/hapqa-snappy1-127x71

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 127x71
+#sar 0: 1/1
+0,          0,          0,        1,    36068, 0x16c0a011

diff --git a/tests/ref/fate/hapqa-snappy16-127x71 b/tests/ref/fate/hapqa-snappy16-127x71
new file mode 100644
index 0000000..36069b4
--- /dev/null
+++ b/tests/ref/fate/hapqa-snappy16-127x71

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 127x71
+#sar 0: 1/1
+0,          0,          0,        1,    36068, 0x16c0a011

diff --git a/tests/ref/fate/hevc-conformance-PERSIST_RPARAM_A_RExt_Sony_3 b/tests/ref/fate/hevc-conformance-PERSIST_RPARAM_A_RExt_Sony_3
new file mode 100644
index 0000000..54f4db8
--- /dev/null
+++ b/tests/ref/fate/hevc-conformance-PERSIST_RPARAM_A_RExt_Sony_3

@@ -0,0 +1,7 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 400x384
+#sar 0: 0/1
+0,          0,          0,        1,   921600, 0x702f0d67
+0,          1,          1,        1,   921600, 0x99822b52

diff --git a/tests/ref/fate/hevc-monochrome-crop b/tests/ref/fate/hevc-monochrome-crop
new file mode 100644
index 0000000..4e45412
--- /dev/null
+++ b/tests/ref/fate/hevc-monochrome-crop

@@ -0,0 +1,8 @@
+[FRAME]
+width=384
+height=240
+[/FRAME]
+[STREAM]
+width=384
+height=240
+[/STREAM]

diff --git a/tests/ref/fate/hevc-skiploopfilter b/tests/ref/fate/hevc-skiploopfilter
new file mode 100644
index 0000000..9c29909
--- /dev/null
+++ b/tests/ref/fate/hevc-skiploopfilter

@@ -0,0 +1,14 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 1920x1080
+#sar 0: 0/1
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,  3110400, 076c9288843ef1197a8cbef7f9a13fee
+0,          1,          1,        1,  3110400, 6190eeea952805ebde69d22961aaeb45
+0,          2,          2,        1,  3110400, 9aaa5111d5e6b25dcf5ddd19c58f17f7
+0,          3,          3,        1,  3110400, 52a487e5f71b314e33e6632b4496f0a6
+0,          4,          4,        1,  3110400, 13abb1c78313705b57a8298dc1e6c0e2

diff --git a/tests/ref/fate/id3v2-priv b/tests/ref/fate/id3v2-priv
new file mode 100644
index 0000000..965c869
--- /dev/null
+++ b/tests/ref/fate/id3v2-priv

@@ -0,0 +1,5 @@
+[FORMAT]
+TAG:title=id3v2-test
+TAG:id3v2_priv.testowner=testdata
+TAG:id3v2_priv.testowner2=\x00\x01\x02
+[/FORMAT]

diff --git a/tests/ref/fate/lagarith-red b/tests/ref/fate/lagarith-red
index 0e065d6..7cfb792 100644
--- a/tests/ref/fate/lagarith-red
+++ b/tests/ref/fate/lagarith-red

@@ -3,28 +3,28 @@
 #codec_id 0: rawvideo
 #dimensions 0: 320x240
 #sar 0: 0/1
-0,          0,          0,        1,   230400, 0x67dfe576
-0,          1,          1,        1,   230400, 0x67dfe576
-0,          2,          2,        1,   230400, 0x67dfe576
-0,          3,          3,        1,   230400, 0x67dfe576
-0,          4,          4,        1,   230400, 0x67dfe576
-0,          5,          5,        1,   230400, 0x67dfe576
-0,          6,          6,        1,   230400, 0x67dfe576
-0,          7,          7,        1,   230400, 0x67dfe576
-0,          8,          8,        1,   230400, 0x67dfe576
-0,          9,          9,        1,   230400, 0x67dfe576
-0,         10,         10,        1,   230400, 0x67dfe576
-0,         11,         11,        1,   230400, 0x67dfe576
-0,         12,         12,        1,   230400, 0x67dfe576
-0,         13,         13,        1,   230400, 0x67dfe576
-0,         14,         14,        1,   230400, 0x67dfe576
-0,         15,         15,        1,   230400, 0x67dfe576
-0,         16,         16,        1,   230400, 0x67dfe576
-0,         17,         17,        1,   230400, 0x67dfe576
-0,         18,         18,        1,   230400, 0x67dfe576
-0,         19,         19,        1,   230400, 0x67dfe576
-0,         20,         20,        1,   230400, 0x67dfe576
-0,         21,         21,        1,   230400, 0x67dfe576
-0,         22,         22,        1,   230400, 0x67dfe576
-0,         23,         23,        1,   230400, 0x67dfe576
-0,         24,         24,        1,   230400, 0x67dfe576
+0,          0,          0,        1,   230400, 0x77f0e576
+0,          1,          1,        1,   230400, 0x77f0e576
+0,          2,          2,        1,   230400, 0x77f0e576
+0,          3,          3,        1,   230400, 0x77f0e576
+0,          4,          4,        1,   230400, 0x77f0e576
+0,          5,          5,        1,   230400, 0x77f0e576
+0,          6,          6,        1,   230400, 0x77f0e576
+0,          7,          7,        1,   230400, 0x77f0e576
+0,          8,          8,        1,   230400, 0x77f0e576
+0,          9,          9,        1,   230400, 0x77f0e576
+0,         10,         10,        1,   230400, 0x77f0e576
+0,         11,         11,        1,   230400, 0x77f0e576
+0,         12,         12,        1,   230400, 0x77f0e576
+0,         13,         13,        1,   230400, 0x77f0e576
+0,         14,         14,        1,   230400, 0x77f0e576
+0,         15,         15,        1,   230400, 0x77f0e576
+0,         16,         16,        1,   230400, 0x77f0e576
+0,         17,         17,        1,   230400, 0x77f0e576
+0,         18,         18,        1,   230400, 0x77f0e576
+0,         19,         19,        1,   230400, 0x77f0e576
+0,         20,         20,        1,   230400, 0x77f0e576
+0,         21,         21,        1,   230400, 0x77f0e576
+0,         22,         22,        1,   230400, 0x77f0e576
+0,         23,         23,        1,   230400, 0x77f0e576
+0,         24,         24,        1,   230400, 0x77f0e576

diff --git a/tests/ref/fate/lagarith-rgb24 b/tests/ref/fate/lagarith-rgb24
index 63250c6..dea49e9 100644
--- a/tests/ref/fate/lagarith-rgb24
+++ b/tests/ref/fate/lagarith-rgb24

@@ -3,7 +3,7 @@
 #codec_id 0: rawvideo
 #dimensions 0: 480x256
 #sar 0: 0/1
-0,          0,          0,        1,   368640, 0x26f74db2
-0,          1,          1,        1,   368640, 0x63b29ea4
-0,          2,          2,        1,   368640, 0x19467f03
-0,          3,          3,        1,   368640, 0x5fdc3575
+0,          0,          0,        1,   368640, 0x18364db2
+0,          1,          1,        1,   368640, 0x60e79ea4
+0,          2,          2,        1,   368640, 0xb28a7f03
+0,          3,          3,        1,   368640, 0x2ed83575

diff --git a/tests/ref/fate/lagarith-ticket4119 b/tests/ref/fate/lagarith-ticket4119
index c46ef04..c1de9dc 100644
--- a/tests/ref/fate/lagarith-ticket4119
+++ b/tests/ref/fate/lagarith-ticket4119

@@ -4,5 +4,5 @@
 #dimensions 0: 640x360
 #sar 0: 0/1
 0,          0,          0,        1,   691200, 0x00000000
-0,         25,         25,        1,   691200, 0xc88a6f24
-0,         50,         50,        1,   691200, 0x906d474c
+0,         25,         25,        1,   691200, 0x1c4a6f24
+0,         50,         50,        1,   691200, 0x1fa0474c

diff --git a/tests/ref/fate/lagarith-ticket4119-cfr b/tests/ref/fate/lagarith-ticket4119-cfr
index 324fe44..1b68901 100644
--- a/tests/ref/fate/lagarith-ticket4119-cfr
+++ b/tests/ref/fate/lagarith-ticket4119-cfr

@@ -27,53 +27,53 @@
 0,         21,         21,        1,   691200, 0x00000000
 0,         22,         22,        1,   691200, 0x00000000
 0,         23,         23,        1,   691200, 0x00000000
-0,         24,         24,        1,   691200, 0xc88a6f24
-0,         25,         25,        1,   691200, 0xc88a6f24
-0,         26,         26,        1,   691200, 0xc88a6f24
-0,         27,         27,        1,   691200, 0xc88a6f24
-0,         28,         28,        1,   691200, 0xc88a6f24
-0,         29,         29,        1,   691200, 0xc88a6f24
-0,         30,         30,        1,   691200, 0xc88a6f24
-0,         31,         31,        1,   691200, 0xc88a6f24
-0,         32,         32,        1,   691200, 0xc88a6f24
-0,         33,         33,        1,   691200, 0xc88a6f24
-0,         34,         34,        1,   691200, 0xc88a6f24
-0,         35,         35,        1,   691200, 0xc88a6f24
-0,         36,         36,        1,   691200, 0xc88a6f24
-0,         37,         37,        1,   691200, 0xc88a6f24
-0,         38,         38,        1,   691200, 0xc88a6f24
-0,         39,         39,        1,   691200, 0xc88a6f24
-0,         40,         40,        1,   691200, 0xc88a6f24
-0,         41,         41,        1,   691200, 0xc88a6f24
-0,         42,         42,        1,   691200, 0xc88a6f24
-0,         43,         43,        1,   691200, 0xc88a6f24
-0,         44,         44,        1,   691200, 0xc88a6f24
-0,         45,         45,        1,   691200, 0xc88a6f24
-0,         46,         46,        1,   691200, 0xc88a6f24
-0,         47,         47,        1,   691200, 0xc88a6f24
-0,         48,         48,        1,   691200, 0xc88a6f24
-0,         49,         49,        1,   691200, 0x906d474c
-0,         50,         50,        1,   691200, 0x906d474c
-0,         51,         51,        1,   691200, 0x906d474c
-0,         52,         52,        1,   691200, 0x906d474c
-0,         53,         53,        1,   691200, 0x906d474c
-0,         54,         54,        1,   691200, 0x906d474c
-0,         55,         55,        1,   691200, 0x906d474c
-0,         56,         56,        1,   691200, 0x906d474c
-0,         57,         57,        1,   691200, 0x906d474c
-0,         58,         58,        1,   691200, 0x906d474c
-0,         59,         59,        1,   691200, 0x906d474c
-0,         60,         60,        1,   691200, 0x906d474c
-0,         61,         61,        1,   691200, 0x906d474c
-0,         62,         62,        1,   691200, 0x906d474c
-0,         63,         63,        1,   691200, 0x906d474c
-0,         64,         64,        1,   691200, 0x906d474c
-0,         65,         65,        1,   691200, 0x906d474c
-0,         66,         66,        1,   691200, 0x906d474c
-0,         67,         67,        1,   691200, 0x906d474c
-0,         68,         68,        1,   691200, 0x906d474c
-0,         69,         69,        1,   691200, 0x906d474c
-0,         70,         70,        1,   691200, 0x906d474c
-0,         71,         71,        1,   691200, 0x906d474c
-0,         72,         72,        1,   691200, 0x906d474c
-0,         73,         73,        1,   691200, 0x906d474c
+0,         24,         24,        1,   691200, 0x1c4a6f24
+0,         25,         25,        1,   691200, 0x1c4a6f24
+0,         26,         26,        1,   691200, 0x1c4a6f24
+0,         27,         27,        1,   691200, 0x1c4a6f24
+0,         28,         28,        1,   691200, 0x1c4a6f24
+0,         29,         29,        1,   691200, 0x1c4a6f24
+0,         30,         30,        1,   691200, 0x1c4a6f24
+0,         31,         31,        1,   691200, 0x1c4a6f24
+0,         32,         32,        1,   691200, 0x1c4a6f24
+0,         33,         33,        1,   691200, 0x1c4a6f24
+0,         34,         34,        1,   691200, 0x1c4a6f24
+0,         35,         35,        1,   691200, 0x1c4a6f24
+0,         36,         36,        1,   691200, 0x1c4a6f24
+0,         37,         37,        1,   691200, 0x1c4a6f24
+0,         38,         38,        1,   691200, 0x1c4a6f24
+0,         39,         39,        1,   691200, 0x1c4a6f24
+0,         40,         40,        1,   691200, 0x1c4a6f24
+0,         41,         41,        1,   691200, 0x1c4a6f24
+0,         42,         42,        1,   691200, 0x1c4a6f24
+0,         43,         43,        1,   691200, 0x1c4a6f24
+0,         44,         44,        1,   691200, 0x1c4a6f24
+0,         45,         45,        1,   691200, 0x1c4a6f24
+0,         46,         46,        1,   691200, 0x1c4a6f24
+0,         47,         47,        1,   691200, 0x1c4a6f24
+0,         48,         48,        1,   691200, 0x1c4a6f24
+0,         49,         49,        1,   691200, 0x1fa0474c
+0,         50,         50,        1,   691200, 0x1fa0474c
+0,         51,         51,        1,   691200, 0x1fa0474c
+0,         52,         52,        1,   691200, 0x1fa0474c
+0,         53,         53,        1,   691200, 0x1fa0474c
+0,         54,         54,        1,   691200, 0x1fa0474c
+0,         55,         55,        1,   691200, 0x1fa0474c
+0,         56,         56,        1,   691200, 0x1fa0474c
+0,         57,         57,        1,   691200, 0x1fa0474c
+0,         58,         58,        1,   691200, 0x1fa0474c
+0,         59,         59,        1,   691200, 0x1fa0474c
+0,         60,         60,        1,   691200, 0x1fa0474c
+0,         61,         61,        1,   691200, 0x1fa0474c
+0,         62,         62,        1,   691200, 0x1fa0474c
+0,         63,         63,        1,   691200, 0x1fa0474c
+0,         64,         64,        1,   691200, 0x1fa0474c
+0,         65,         65,        1,   691200, 0x1fa0474c
+0,         66,         66,        1,   691200, 0x1fa0474c
+0,         67,         67,        1,   691200, 0x1fa0474c
+0,         68,         68,        1,   691200, 0x1fa0474c
+0,         69,         69,        1,   691200, 0x1fa0474c
+0,         70,         70,        1,   691200, 0x1fa0474c
+0,         71,         71,        1,   691200, 0x1fa0474c
+0,         72,         72,        1,   691200, 0x1fa0474c
+0,         73,         73,        1,   691200, 0x1fa0474c

diff --git a/tests/ref/fate/lagarith-ticket4119-drop b/tests/ref/fate/lagarith-ticket4119-drop
index abc58d5..c7738ae 100644
--- a/tests/ref/fate/lagarith-ticket4119-drop
+++ b/tests/ref/fate/lagarith-ticket4119-drop

@@ -4,5 +4,5 @@
 #dimensions 0: 640x360
 #sar 0: 0/1
 0,          0,          0,        1,   691200, 0x00000000
-0,          1,          1,        1,   691200, 0xc88a6f24
-0,          2,          2,        1,   691200, 0x906d474c
+0,          1,          1,        1,   691200, 0x1c4a6f24
+0,          2,          2,        1,   691200, 0x1fa0474c

diff --git a/tests/ref/fate/lagarith-ticket4119-pass b/tests/ref/fate/lagarith-ticket4119-pass
index c46ef04..c1de9dc 100644
--- a/tests/ref/fate/lagarith-ticket4119-pass
+++ b/tests/ref/fate/lagarith-ticket4119-pass

@@ -4,5 +4,5 @@
 #dimensions 0: 640x360
 #sar 0: 0/1
 0,          0,          0,        1,   691200, 0x00000000
-0,         25,         25,        1,   691200, 0xc88a6f24
-0,         50,         50,        1,   691200, 0x906d474c
+0,         25,         25,        1,   691200, 0x1c4a6f24
+0,         50,         50,        1,   691200, 0x1fa0474c

diff --git a/tests/ref/fate/lagarith-ticket4119-vfr b/tests/ref/fate/lagarith-ticket4119-vfr
index c46ef04..c1de9dc 100644
--- a/tests/ref/fate/lagarith-ticket4119-vfr
+++ b/tests/ref/fate/lagarith-ticket4119-vfr

@@ -4,5 +4,5 @@
 #dimensions 0: 640x360
 #sar 0: 0/1
 0,          0,          0,        1,   691200, 0x00000000
-0,         25,         25,        1,   691200, 0xc88a6f24
-0,         50,         50,        1,   691200, 0x906d474c
+0,         25,         25,        1,   691200, 0x1c4a6f24
+0,         50,         50,        1,   691200, 0x1fa0474c

diff --git a/tests/ref/fate/loco-rgb b/tests/ref/fate/loco-rgb
index 289e73f..446f5d8 100644
--- a/tests/ref/fate/loco-rgb
+++ b/tests/ref/fate/loco-rgb

@@ -3,8 +3,8 @@
 #codec_id 0: rawvideo
 #dimensions 0: 188x128
 #sar 0: 0/1
-0,          0,          0,        1,    72192, 0x1847500c
-0,          1,          1,        1,    72192, 0x1b0e2e87
-0,          2,          2,        1,    72192, 0x293276c8
-0,          3,          3,        1,    72192, 0x743b9705
-0,          4,          4,        1,    72192, 0xcc1b2530
+0,          0,          0,        1,    72192, 0xbe9f500c
+0,          1,          1,        1,    72192, 0xafc82e87
+0,          2,          2,        1,    72192, 0x3a2476c8
+0,          3,          3,        1,    72192, 0x1fda9705
+0,          4,          4,        1,    72192, 0x46b82530

diff --git a/tests/ref/fate/mov-440hz-10ms b/tests/ref/fate/mov-440hz-10ms
new file mode 100644
index 0000000..498879e
--- /dev/null
+++ b/tests/ref/fate/mov-440hz-10ms

@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1/44100
+#media_type 0: audio
+#codec_id 0: pcm_s16le
+#sample_rate 0: 44100
+#channel_layout 0: 4
+#channel_layout_name 0: mono
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,      960,     1920, 44e7e48ff08835ce30e93c7971dae7df

diff --git a/tests/ref/fate/mov-bbi-elst-starts-b b/tests/ref/fate/mov-bbi-elst-starts-b
new file mode 100644
index 0000000..3ba28b5
--- /dev/null
+++ b/tests/ref/fate/mov-bbi-elst-starts-b

@@ -0,0 +1,391 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1001/30000
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 1920x1080
+#sar 0: 1/1
+#tb 1: 1/48000
+#media_type 1: audio
+#codec_id 1: pcm_s16le
+#sample_rate 1: 48000
+#channel_layout 1: 3
+#channel_layout_name 1: stereo
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,  3110400, e9454409af76038dbe56e834921d2fa8
+1,          0,          0,     1024,     4096, 620f0b67a91f7f74151bc5be745b7110
+1,       1024,       1024,     1024,     4096, 4963ab641585142fbac06502873fb6d3
+0,          1,          1,        1,  3110400, 37aa620e494d22ba248feea9a5a869a0
+1,       2048,       2048,     1024,     4096, 80649ae10d2587264fcb43c7dc25e61b
+1,       3072,       3072,     1024,     4096, 01ec94c18060be08988e8b9b1c9de822
+0,          2,          2,        1,  3110400, 7b8be9619e4e1d618ab1ed85aff3957b
+1,       4096,       4096,     1024,     4096, 3f8342da3124e22b89211c2f233eb5b2
+0,          3,          3,        1,  3110400, 756a4551420853bc3ae444e6b86169f3
+1,       5120,       5120,     1024,     4096, 4832d1a3f7d8aaceb0489c1ca0706d31
+1,       6144,       6144,     1024,     4096, 7e89f1e8318ed54d81e4efe3c679515f
+0,          4,          4,        1,  3110400, 94a93c1669a96ebf41a7258177849396
+1,       7168,       7168,     1024,     4096, d73468c68e92c9e2939eb30d66a45bc2
+0,          5,          5,        1,  3110400, 5d1c49d3f43cd472683e4a039e376af5
+1,       8192,       8192,     1024,     4096, 72e1020c1bb961cb1c177f97e427edb9
+1,       9216,       9216,     1024,     4096, 1a4773af7080bd904a3b14c12a8028fa
+0,          6,          6,        1,  3110400, e0e88cc34b0a9d7963cddf554a3909f0
+1,      10240,      10240,     1024,     4096, d39ae1809032c4807efe41520302bd37
+0,          7,          7,        1,  3110400, 1f464f2427aa3842224803b6d67154f8
+1,      11264,      11264,     1024,     4096, e26af86998d1a9579127a97dbe45890b
+1,      12288,      12288,     1024,     4096, 148e1ab102c1c62d9018095c7e934eea
+0,          8,          8,        1,  3110400, e5d610b8e53203b5ccb01877acb3ac56
+1,      13312,      13312,     1024,     4096, 96582c941184ad967e9e91198775e61f
+1,      14336,      14336,     1024,     4096, 8d2371bb1524d9d007ae44519b55a055
+0,          9,          9,        1,  3110400, d1363fbdffbb85a0affe660bfd09b98c
+1,      15360,      15360,     1024,     4096, 6034c9e409d8dcdfd032b44f631e2362
+0,         10,         10,        1,  3110400, 130bb4c17e963b092a717325498c06c3
+1,      16384,      16384,     1024,     4096, 3649e9edc65a042c4c913c06c3f3bff7
+1,      17408,      17408,     1024,     4096, a4cdb0c4bed48d1f7c4918e5590a542d
+0,         11,         11,        1,  3110400, a3cb7a37fef5d8a49815dd15970c1f3d
+1,      18432,      18432,     1024,     4096, a130e8ba9a47df0c76edf5c3ece5762f
+0,         12,         12,        1,  3110400, 7dc4a4334fa9bb61f2ad56b3d059b3f7
+1,      19456,      19456,     1024,     4096, cf76a132c205c29a0b7042599973fda4
+1,      20480,      20480,     1024,     4096, b9b7d819599bb6d7cd53cc1bc0810ca7
+0,         13,         13,        1,  3110400, ccda106ec5899787c3bc8679ee052854
+1,      21504,      21504,     1024,     4096, 3d0d6ef8318a94416a56108248e6cad3
+0,         14,         14,        1,  3110400, 4254c7b01a67a0810bf234ba7b1f59d0
+1,      22528,      22528,     1024,     4096, a8a082b4c521d2e7fc990f54e3580a9e
+1,      23552,      23552,     1024,     4096, 60b36c268bc98093f35a7c90c64b507c
+0,         15,         15,        1,  3110400, 6c0acd9781cfecaf9a10fe91f8d6ec41
+1,      24576,      24576,     1024,     4096, c5211b49eb51cea4c16079eb54df3d12
+1,      25600,      25600,     1024,     4096, dddd55a31e8a65110a89de1af67861b0
+0,         16,         16,        1,  3110400, 2a9d5db0bc2f54e2c66834db64386bb7
+1,      26624,      26624,     1024,     4096, 0c220f1d405bb463469b54fa126a2c41
+0,         17,         17,        1,  3110400, 0140f515be87da399061764050d15b16
+1,      27648,      27648,     1024,     4096, 11d740234a9a4dd28e3191e258fc8e3b
+1,      28672,      28672,     1024,     4096, e456943c722a9807669f57776dcb8c5d
+0,         18,         18,        1,  3110400, cad554fe221fbd8ee848facebd7b24d2
+1,      29696,      29696,     1024,     4096, 3c13165d648f46c3e3e699b874da372a
+0,         19,         19,        1,  3110400, 1e8474911cb1bd831808dc2beba540c0
+1,      30720,      30720,     1024,     4096, 7e7a46e3360f884507c95f640cd6f532
+1,      31744,      31744,     1024,     4096, 420d047ee4d55b7af2d97c5116e25d37
+0,         20,         20,        1,  3110400, f39c483fbf3c598c38cf543abad2c03c
+1,      32768,      32768,     1024,     4096, b3334bcd2ed3b2ab32ac13c24a8d1322
+0,         21,         21,        1,  3110400, 3b21171a003827a203c0c5853c36893e
+1,      33792,      33792,     1024,     4096, 1472898e0d4fafd52fc0adb789e06987
+1,      34816,      34816,     1024,     4096, f3e7c33d92578fa5eb90933491106c37
+0,         22,         22,        1,  3110400, 40e8733f93c468cb58c3921c80f81c68
+1,      35840,      35840,     1024,     4096, 7d7f8e8f542be9291b7895472f8cf0a7
+0,         23,         23,        1,  3110400, a2d716f215af7d778bd605decae201dd
+1,      36864,      36864,     1024,     4096, 66e2e35d40beb6e1df30d4d180661d4b
+1,      37888,      37888,     1024,     4096, 8fc0c16fc3ec5db47e437263b429406b
+0,         24,         24,        1,  3110400, e6fb8d3f1c9747f185b50eded67ff0ab
+1,      38912,      38912,     1024,     4096, dd6331f430abf41dade4e4a85784745f
+1,      39936,      39936,     1024,     4096, 5c693b37419fe43fb83b715d4f7083ec
+0,         25,         25,        1,  3110400, fe4e0cf566674672ad043f24eae02e63
+1,      40960,      40960,     1024,     4096, 9e13b054c07feac0fad601ed4823996e
+0,         26,         26,        1,  3110400, f57d4d32012dfcb14a270bdc90812e5e
+1,      41984,      41984,     1024,     4096, dbffea22781b35088859868189e270aa
+1,      43008,      43008,     1024,     4096, 59f1a985d7a96273bf48be9dc8a7d4d2
+0,         27,         27,        1,  3110400, fe7aa8c1754b9590fe75b783d4e126ed
+1,      44032,      44032,     1024,     4096, b07e02d0bfecbde9b48e6e83952aa3c7
+0,         28,         28,        1,  3110400, 29ae6e541021bf481c8937c37ac34570
+1,      45056,      45056,     1024,     4096, d0dfca31885ac1d21fe42bb26e3b11ec
+1,      46080,      46080,     1024,     4096, 5479670aea3a4d07a61b9edcc2113c7b
+0,         29,         29,        1,  3110400, 211e0803b5a704c7ef1c36fc5c182710
+1,      47104,      47104,     1024,     4096, 0118d7fc0d0f8462647f37aa6d82c8fc
+0,         30,         30,        1,  3110400, a4ea3a4686d214ad54fcdce63f8c2dc0
+1,      48128,      48128,     1024,     4096, cc2b22d4c7497accb6b194968b407e10
+1,      49152,      49152,     1024,     4096, fe8d30c082a3608b8ccf5e3d6f4b0ddb
+0,         31,         31,        1,  3110400, 8af971cf637609534ec59b6f29a3666e
+1,      50176,      50176,     1024,     4096, ed15a6aa2d87395b47888f744f41df30
+1,      51200,      51200,     1024,     4096, 8d4971ee25d274822cceaa00cc8df1a1
+0,         32,         32,        1,  3110400, 8cfe93dfd76e4f97cc04a145bb559af5
+1,      52224,      52224,     1024,     4096, 799f1c9cac6c3cedaac80cc6b26df827
+0,         33,         33,        1,  3110400, 691fe61398016f767ea7ba781ef72953
+1,      53248,      53248,     1024,     4096, 40cbefbb455231c0f21af129e2504395
+1,      54272,      54272,     1024,     4096, 05eb1731cb93ff68815e92f0a528d4d7
+0,         34,         34,        1,  3110400, 7eaf5bc5e942f8218f46c9c505d7b58c
+1,      55296,      55296,     1024,     4096, de96fa062c59b6b3b88731e60e02d040
+0,         35,         35,        1,  3110400, feacc8282186935e310ec2dccbb79239
+1,      56320,      56320,     1024,     4096, 14d38d89079c1b7801987c5315477d11
+1,      57344,      57344,     1024,     4096, 9a0029ae7fc7e029402ec29536288cff
+0,         36,         36,        1,  3110400, a5dfb5c4c23e647e2c77dd117e4ed714
+1,      58368,      58368,     1024,     4096, 83925a6ebf953e6b711d3042c95ffe51
+0,         37,         37,        1,  3110400, da9306c2872788ba2a587eab7f597f13
+1,      59392,      59392,     1024,     4096, 5b332b22a2a8ae72cbf8154d2cfd9b5d
+1,      60416,      60416,     1024,     4096, 8dcd5041b49036e43f74819439a6db63
+0,         38,         38,        1,  3110400, 19d14a655aa02cc2767afd9ed9bc2ff4
+1,      61440,      61440,     1024,     4096, 90ea3ac56003b4f1c0242a611746b76a
+0,         39,         39,        1,  3110400, 4d9e6455fb71d5cd511dddf894b4f7d8
+1,      62464,      62464,     1024,     4096, 6ed9f815739e6e3b8ed125688f693d5f
+1,      63488,      63488,     1024,     4096, e056b91f4875029af3c29a26e37380a1
+0,         40,         40,        1,  3110400, 052c32c00bc1b40d5270f4b5c4d34d6b
+1,      64512,      64512,     1024,     4096, 36ff82cb34ffc9ea60b1fbc8d3edd9f1
+1,      65536,      65536,     1024,     4096, 975c2d6080532645be510241d52b788c
+0,         41,         41,        1,  3110400, 645e0c3eb8599c2cc712a97cbae9d1d3
+1,      66560,      66560,     1024,     4096, 9a53c620f19d5bb733c8bd068137ba3a
+0,         42,         42,        1,  3110400, d0b3fe092511fed0e384ffad85f6ae3c
+1,      67584,      67584,     1024,     4096, 482365611c86663b70a13511d8194282
+1,      68608,      68608,     1024,     4096, 4f9002718a7103a4b15b96e9e08a2cfd
+0,         43,         43,        1,  3110400, 131c090f806658ffbb4949b3310a6348
+1,      69632,      69632,     1024,     4096, 2301e3336164fe298b85006b0e7dc0b8
+0,         44,         44,        1,  3110400, 71e8042ed01a1bbe971417d87685dda1
+1,      70656,      70656,     1024,     4096, d8b4c47bb3f58a70109d70edaea9916b
+1,      71680,      71680,     1024,     4096, ff87ca54ad112c083b1cc25043429410
+0,         45,         45,        1,  3110400, 948cc8d7eceb2e512c8660063143557b
+1,      72704,      72704,     1024,     4096, eee9c7fd3c78681fcfcdf40ed7ee2c6b
+0,         46,         46,        1,  3110400, 8b580d4f620af52ac0a5702197f5e9d6
+1,      73728,      73728,     1024,     4096, d05a2d810e2c201cff9e115c48cff977
+1,      74752,      74752,     1024,     4096, 5ef14640b86062c99427e0177b33657c
+0,         47,         47,        1,  3110400, a30bfc0727c6abb477751969c1b1450c
+1,      75776,      75776,     1024,     4096, 92398dd17af6362aa81fe8a9aad0af01
+1,      76800,      76800,     1024,     4096, 131b910e0af30e5250726b3d4a5f4f10
+0,         48,         48,        1,  3110400, 28381142e60e9011d0cd75e7881c1956
+1,      77824,      77824,     1024,     4096, 50157f3541acd42d2c460e125a9f2393
+0,         49,         49,        1,  3110400, 1b4bdae40d76ed7625f85da7c5289360
+1,      78848,      78848,     1024,     4096, 266fd18529d95b7347e9a019271056fc
+1,      79872,      79872,     1024,     4096, 2f9a347ce750501f2e4d0a36c95bc8b0
+0,         50,         50,        1,  3110400, 354fb545adf6082dc57b8077ba1466ad
+1,      80896,      80896,     1024,     4096, 0f8b0626ef01d0e941914d7810945aa1
+0,         51,         51,        1,  3110400, c969f21be0dcbef6b232d3f0a9e3f189
+1,      81920,      81920,     1024,     4096, 51da6b0c3d5daddb8f4635d692b0e8ba
+1,      82944,      82944,     1024,     4096, aac8a7152103243dabd94e2a37caf53f
+0,         52,         52,        1,  3110400, 4ff19ab89ebe466e8ce3df0ba25b6b0a
+1,      83968,      83968,     1024,     4096, 50c8d5b0a373c07ecb66ca33e087a299
+0,         53,         53,        1,  3110400, 49f601932c19ae8d405771e9cfd4f9e8
+1,      84992,      84992,     1024,     4096, 2abd975868c9805fa456a08f1ab16c91
+1,      86016,      86016,     1024,     4096, 56309326bc80a6216b048934713db907
+0,         54,         54,        1,  3110400, c75d2221327d5953b013fdc9d36dbcd3
+1,      87040,      87040,     1024,     4096, 470fd4997793f53ad2b21b47c6b98bb1
+1,      88064,      88064,     1024,     4096, 1aed57e8ba33350711e1ff668c906953
+0,         55,         55,        1,  3110400, 6c8b8c6012b6ac36c4c2a3ab5629ec61
+1,      89088,      89088,     1024,     4096, 9e910446a3b8d30f270c05aca011704d
+0,         56,         56,        1,  3110400, 44a05705ad5cb730e7244c36b4f94771
+1,      90112,      90112,     1024,     4096, af6c036d66dd5bb5c250f16207173180
+1,      91136,      91136,     1024,     4096, de76d587f349fe597e65cb1f51bdcc34
+0,         57,         57,        1,  3110400, 0ee53224814c84224525181aacbd9a39
+1,      92160,      92160,     1024,     4096, 7abab5bffebfcc801a11f6b7905a13c9
+0,         58,         58,        1,  3110400, cd7797aa75c96a2a71e939ea6b677310
+1,      93184,      93184,     1024,     4096, 3567caaaea75b1cda5920fd6dbf2a016
+1,      94208,      94208,     1024,     4096, 2a6848418d829a273b85620e5b59cabe
+0,         59,         59,        1,  3110400, ac2207156da29fd33ca1c66224445bbc
+1,      95232,      95232,     1024,     4096, 79d0c213acefa2bc00de4fd455b34df9
+0,         60,         60,        1,  3110400, 1669c959bd2b6f245cc83ea00d0de83f
+1,      96256,      96256,     1024,     4096, 267414473e0e7c75c7c1dde7be001762
+1,      97280,      97280,     1024,     4096, 893107ddb969cbc25a5aa8c564559836
+0,         61,         61,        1,  3110400, a74e6586842a4c877fa9a123e7b8ef94
+1,      98304,      98304,     1024,     4096, 1555a7117d5254afef3e36b1d0a9edd2
+0,         62,         62,        1,  3110400, 5af4edbf8a77ead17f4891cbac6d72be
+1,      99328,      99328,     1024,     4096, dd92c3241e45d54cc072fc0307d1850c
+1,     100352,     100352,     1024,     4096, 95c55b1767ddea9af80edbb32e2b4dbc
+0,         63,         63,        1,  3110400, 52598cb5f3cb75bc401370118bcb2c49
+1,     101376,     101376,     1024,     4096, 1d133e7a83f0cf96a6e9cee90d17975a
+1,     102400,     102400,     1024,     4096, b71b82a4abcde640a46fcc3bd6930d51
+0,         64,         64,        1,  3110400, d44aa05de89e71b3af23de9afbb2a3b1
+1,     103424,     103424,     1024,     4096, 57697c6f8e32dda304b77b2881b5011d
+0,         65,         65,        1,  3110400, 161fd1ff8b477a137e113ce7e6ac26f7
+1,     104448,     104448,     1024,     4096, 592725ee5ee2adfc02e8c189f402a3d9
+1,     105472,     105472,     1024,     4096, 2b1073734ea08c0cfeeaab15ad04aa60
+0,         66,         66,        1,  3110400, b9b5efaf5f74954897882b0f09a5c088
+1,     106496,     106496,     1024,     4096, 18f2a5daef2293e690fe0915127cb795
+0,         67,         67,        1,  3110400, 1405f3d5e75c2ffd390b46f7def70478
+1,     107520,     107520,     1024,     4096, 4fa75ff788019d99f6ab0ad2ee5cc3d4
+1,     108544,     108544,     1024,     4096, d40f1172149821d84130990d5acd9395
+0,         68,         68,        1,  3110400, 3bf19eda056d5fc609662bf5ca8e3b33
+1,     109568,     109568,     1024,     4096, edae14a39d1034cfce5655337df2206e
+0,         69,         69,        1,  3110400, 8ba1925dc6b38a340e51aa234b5ff4df
+1,     110592,     110592,     1024,     4096, 574a6a3b427314c2eb48226d2d7ce67d
+1,     111616,     111616,     1024,     4096, ebf13ab2795c091f15b9086d5bf2e11e
+0,         70,         70,        1,  3110400, 301aecbe3f8bf3831d6de540a568abbc
+1,     112640,     112640,     1024,     4096, 074c63c81345804c7437de33cb5e355f
+1,     113664,     113664,     1024,     4096, 58da9edb5e278e292cebebf6b26435b0
+0,         71,         71,        1,  3110400, 388565f088710aeb0f50ecdf8ef0ee86
+1,     114688,     114688,     1024,     4096, ce75fd079a21e0637900ac491c4fbc36
+0,         72,         72,        1,  3110400, df27196ff8da5085c58979deeae9c4a1
+1,     115712,     115712,     1024,     4096, 92153c14aeabfb4b4546c340201202b0
+1,     116736,     116736,     1024,     4096, f02e8d23ee0ca80713b4458873ce724f
+0,         73,         73,        1,  3110400, 750226c84aae270449e0a20751218217
+1,     117760,     117760,     1024,     4096, 0adec04349d269f7dde27c59c7077954
+0,         74,         74,        1,  3110400, 076d4246e5494e6d5cf8ffb09be3a751
+1,     118784,     118784,     1024,     4096, 4408d6824ca6c2080fd76c1ab840382c
+1,     119808,     119808,     1024,     4096, c3603730a05cff479148d9b69b7ed2e8
+0,         75,         75,        1,  3110400, 9f626289f18a3482628b511840deaa10
+1,     120832,     120832,     1024,     4096, 25a6af5ef98aae3c40610e58ca65b8a9
+0,         76,         76,        1,  3110400, f925e3ff4bda9962a313b6ac21a201f1
+1,     121856,     121856,     1024,     4096, 5ad875e4437059157f0f3a5ec911e741
+1,     122880,     122880,     1024,     4096, cfc06fdc755579be7440fca797d81191
+0,         77,         77,        1,  3110400, 1d893861545e804944c519e580204988
+1,     123904,     123904,     1024,     4096, 7c783d7b973bb8048eeaa568297cfa35
+0,         78,         78,        1,  3110400, 4d42fca73082fde38020a8fec0b27db2
+1,     124928,     124928,     1024,     4096, cae22b99b2c6fea527fb5c038d3765f8
+1,     125952,     125952,     1024,     4096, fc4b5ef5a5a28102bf782b8732fc2993
+0,         79,         79,        1,  3110400, ed88335480ef00aedcf73c3a8712705c
+1,     126976,     126976,     1024,     4096, e278968277ccc64992ba6bb73ad9909d
+1,     128000,     128000,     1024,     4096, e2eb81d30fe7fa901eaf295fca8bc8b8
+0,         80,         80,        1,  3110400, 968ba4c93f80cb8c6fbfaccb765c3d70
+1,     129024,     129024,     1024,     4096, 5a9b99c0639bd80ef7f20971590e23c7
+0,         81,         81,        1,  3110400, 01ef52b0a18f46556ba9e00d1b0c77af
+1,     130048,     130048,     1024,     4096, 2765d109f09efa2f2661dfd2e666f25c
+1,     131072,     131072,     1024,     4096, 1ff52d5870cd667388fe69f48e228fc6
+0,         82,         82,        1,  3110400, 54582f2ae6878b37cbd1b5c576fd09c7
+1,     132096,     132096,     1024,     4096, 3612470a58bb5eab26239fbd3d097866
+0,         83,         83,        1,  3110400, fec6585d244257b52c90cfa19b22fd7c
+1,     133120,     133120,     1024,     4096, be170a1e3eba4c53b0e2fc8b795c7767
+1,     134144,     134144,     1024,     4096, abb85ab4987c50e05ecab0e831d53fd6
+0,         84,         84,        1,  3110400, be1e0237c5d8bab98aa75a91e0d4bd9b
+1,     135168,     135168,     1024,     4096, 2e9b5c17dcaf77f171544c0eee6cf191
+0,         85,         85,        1,  3110400, 1138a9f316d1a2a9ae3c42d777561595
+1,     136192,     136192,     1024,     4096, 54c1c5d9c706e1999b2243828736c98d
+1,     137216,     137216,     1024,     4096, 7b482f85e123981e8eb37c364b2f4bbe
+0,         86,         86,        1,  3110400, 62da42e9e8643ec9b1299bbeaffa4be5
+1,     138240,     138240,     1024,     4096, 0be223967283ac3a0029fad458176155
+1,     139264,     139264,     1024,     4096, 8ec0c4054d484ebf8d68e32c3ffb5924
+0,         87,         87,        1,  3110400, 55fc4dcfb7f8bab37518884fc6747416
+1,     140288,     140288,     1024,     4096, 0308762a4362acbf1c9e1e62e5055880
+0,         88,         88,        1,  3110400, 51be69793bfae3fe078d04e8dee3a48b
+1,     141312,     141312,     1024,     4096, 56a18f795b25ef93d27a53a9f6a87ec9
+1,     142336,     142336,     1024,     4096, ad46ed8c3ac23b79bbd066e43b77f4e6
+0,         89,         89,        1,  3110400, 363b794880a2cc06e5be4626c6847c19
+1,     143360,     143360,     1024,     4096, 62719a7d4521ac82957ce3f52ba21854
+0,         90,         90,        1,  3110400, cd1d585cf9cb68f72f7d6a0ec9ab96ab
+1,     144384,     144384,     1024,     4096, 8854128aa8377ad5e52af990d4642ef5
+1,     145408,     145408,     1024,     4096, 38827bf7a129f92062b90fcf03ba80c1
+0,         91,         91,        1,  3110400, 138f8686ee7294bf4af072cee7043f52
+1,     146432,     146432,     1024,     4096, 24d4e112b275116dcb512e6b283487e3
+0,         92,         92,        1,  3110400, 137c1bdc2bb6fa1e181f84b09c14e0b6
+1,     147456,     147456,     1024,     4096, 03c374d9d72df87f99427fe1045be8da
+1,     148480,     148480,     1024,     4096, c0c5a8fd24d5a9f535118ec266468976
+0,         93,         93,        1,  3110400, e7bc46739c46ec886fc5059af72f772a
+1,     149504,     149504,     1024,     4096, 3381472e14c42a06cdcf35774a0b3a9a
+1,     150528,     150528,     1024,     4096, 4c07a7a02831675a686c6f31e2809e4a
+0,         94,         94,        1,  3110400, db4c2aa67c76573e4880b029bef59c8d
+1,     151552,     151552,     1024,     4096, adde928f734ffe6c07446c9d70214f74
+0,         95,         95,        1,  3110400, 3a287113ad2196420a0474243bd1813d
+1,     152576,     152576,     1024,     4096, f1ff82fec1ef096090acbf60212b2b69
+1,     153600,     153600,     1024,     4096, e3b271991cc2cd4fdb56616443c9f3e8
+0,         96,         96,        1,  3110400, d3ed803ca1984f021802e6c7364cb57f
+1,     154624,     154624,     1024,     4096, 025c3272bfb9f24ab687bc818d57d853
+0,         97,         97,        1,  3110400, 776d99c2973261c12e978fe9fe5c5794
+1,     155648,     155648,     1024,     4096, 8bb2de059194408962be4dfdd88775fc
+1,     156672,     156672,     1024,     4096, eb61bcbcc9897d01f5645a0662dfbd6d
+0,         98,         98,        1,  3110400, 62ee2ec1261db8544ded9db8c4442f15
+1,     157696,     157696,     1024,     4096, 48a3c897183538de9920e94417f54733
+0,         99,         99,        1,  3110400, 1bfabc7ab3701e112e76955a449431d7
+1,     158720,     158720,     1024,     4096, ffe159d137e51d8fd4d5b3639df563a3
+1,     159744,     159744,     1024,     4096, 7702e58e4ae075b6e12e863f8c5d908e
+0,        100,        100,        1,  3110400, e03dfc70c25b77fc47ea42ddb8d5da5c
+1,     160768,     160768,     1024,     4096, 2e2b497643e1b6bf13dd5e585ecd9092
+0,        101,        101,        1,  3110400, b0f0c308b49cb82fad3b4995cb60b6a1
+1,     161792,     161792,     1024,     4096, fcce76ebba5166966db792dfc77fda3d
+1,     162816,     162816,     1024,     4096, ccf5a819e00ce8c5197ae5b2b827f3e5
+0,        102,        102,        1,  3110400, 627c888ea94dabff2aaf96c800a85f7a
+1,     163840,     163840,     1024,     4096, 2385c9e205cfdd5e0b2ac54e9ef0f525
+1,     164864,     164864,     1024,     4096, 40214ba3f9bdc1bbb7d59e1fdb9def2f
+0,        103,        103,        1,  3110400, f2231eac6e5885f61f470a5c6b65ceea
+1,     165888,     165888,     1024,     4096, 52516e860beca0af0d80f79266c761cc
+0,        104,        104,        1,  3110400, ec873def59cde7c114fe27ba1b2c25a6
+1,     166912,     166912,     1024,     4096, a25848945235fd70d1021b9a92dafd8d
+1,     167936,     167936,     1024,     4096, 38c7a26c7a545df5fa1721ee3e820309
+0,        105,        105,        1,  3110400, 7a5e98b10b1f984b624bc6399a45dde2
+1,     168960,     168960,     1024,     4096, 506d3c3b34f060a0516011ca237cb6a5
+0,        106,        106,        1,  3110400, 180d7b9ef69a8ee32e5fcc3b9579c11f
+1,     169984,     169984,     1024,     4096, 2ef3cd56dfc870a251bb97f4d474be84
+1,     171008,     171008,     1024,     4096, 383c82e600ebadddd0467cc3a9575f3e
+0,        107,        107,        1,  3110400, 8c0107e3bda4c993131b361d598b4b4d
+1,     172032,     172032,     1024,     4096, b99763cdc1d5412f89e7d1fcb3bd1c35
+0,        108,        108,        1,  3110400, 31e1ea32c3200a41814b5890b81c5211
+1,     173056,     173056,     1024,     4096, 7b3ecb91c63f7f86d6e96f305b3bd507
+1,     174080,     174080,     1024,     4096, 6945621a1ee0fbc68e7db786bc837f53
+0,        109,        109,        1,  3110400, f6223221be5136d03c87333c7f2113c5
+1,     175104,     175104,     1024,     4096, fb1de71ab0d840699834d583fbd3468e
+1,     176128,     176128,     1024,     4096, 2a3b2d299d87f5b982156ae740fb7b46
+0,        110,        110,        1,  3110400, 325d0fb65e35826aebb0f80fc34f426f
+1,     177152,     177152,     1024,     4096, 6462952e8555172110c37c579d45f63c
+0,        111,        111,        1,  3110400, 1328e8c69419fe1c44fc6cfa1f648839
+1,     178176,     178176,     1024,     4096, 279537a0668d254a6710c935f2a7b8d7
+1,     179200,     179200,     1024,     4096, 286a27fee95bf8773ee8ea03621e9502
+0,        112,        112,        1,  3110400, b234f9aab6965e136bb283a2ad0fbb68
+1,     180224,     180224,     1024,     4096, 343413b110fe48674dddeba071440707
+0,        113,        113,        1,  3110400, 3961ef43173c69a28d2196f4fa11d37a
+1,     181248,     181248,     1024,     4096, 66265d80629369fc9fffddbbe6ef104d
+1,     182272,     182272,     1024,     4096, 3ff193da4d2168f97f17841f9669d33e
+0,        114,        114,        1,  3110400, 943c6dd42254d346d81fd55bfad60573
+1,     183296,     183296,     1024,     4096, dd4074398b101502c29009687686e78d
+0,        115,        115,        1,  3110400, 0028c32eacb6f4d77cb98cf856d266e4
+1,     184320,     184320,     1024,     4096, 562be444896543f259527081c7795597
+1,     185344,     185344,     1024,     4096, 145b4b260f2393ec1e3464d79c87fe8e
+0,        116,        116,        1,  3110400, f478fe49fed80b92eaa182aa8e794077
+1,     186368,     186368,     1024,     4096, 6884ebc820bb41e61dc54a87bd55011e
+0,        117,        117,        1,  3110400, 9a52c061c135bd75ee317fe48a2cddf8
+1,     187392,     187392,     1024,     4096, d2d8b2fd7a140a3f32d5649f8e507637
+1,     188416,     188416,     1024,     4096, 767071a409e0b07bd2cdb250c3c89607
+0,        118,        118,        1,  3110400, c327b95c8afa716f7719603d2b65679b
+1,     189440,     189440,     1024,     4096, 64c4eed5e1b85e0728b1b0e3de30ffa1
+1,     190464,     190464,     1024,     4096, 11ffef7e85d9925a4032b3bcd9740e20
+0,        119,        119,        1,  3110400, edd3b4c4fa157940b9c252a7d94e6f55
+1,     191488,     191488,     1024,     4096, 99893421a493664be17085567e160280
+0,        120,        120,        1,  3110400, 5ccdffe7356a73d2d11eb7767ddfa396
+1,     192512,     192512,     1024,     4096, ea422077bc582aef4af5bf3ca3aa062a
+1,     193536,     193536,     1024,     4096, a9e7ab9c45faf39f3cd1eee69c95d647
+0,        121,        121,        1,  3110400, 7c4d8083656581ccc666597da5559a46
+1,     194560,     194560,     1024,     4096, fc19b6642df3b85c7ebe684e705bf29d
+0,        122,        122,        1,  3110400, 28cae533418f9a5d62869cbed0be384c
+1,     195584,     195584,     1024,     4096, 39420620aea9d61b240691dada0f3063
+1,     196608,     196608,     1024,     4096, 5ad516a55655cdc2a3348badc2c7e5a1
+0,        123,        123,        1,  3110400, e9cf527a059c80461fd507bab4817069
+1,     197632,     197632,     1024,     4096, fa4f3d45c1cfc88088b4cd6f2924184d
+0,        124,        124,        1,  3110400, a2f2518b8a25932afbb78523e8da289e
+1,     198656,     198656,     1024,     4096, 689be1d6f84b136537df11b8ed22b540
+1,     199680,     199680,     1024,     4096, 9612dad6c93c75b6911f21997f2f7aee
+0,        125,        125,        1,  3110400, 8fcbddce7f2a6c3bc380ad4cec17e51c
+1,     200704,     200704,     1024,     4096, defe4b66e5dd9c6a527aeb9ebec04cdf
+1,     201728,     201728,     1024,     4096, 0c39b71be01611d6be02b796aec2c6de
+0,        126,        126,        1,  3110400, 32ff7b550d09821b2b1eb64c288559ba
+1,     202752,     202752,     1024,     4096, 2150a3d6fdec61cd587b83217f9bbb2c
+0,        127,        127,        1,  3110400, 1eb4e9050a6657ab9290ac58ed3aa93f
+1,     203776,     203776,     1024,     4096, 4be35b36d46ca56b778ae0afdd1e4200
+1,     204800,     204800,     1024,     4096, fb79d3d9d0e4d590882cbd023bbcd00d
+0,        128,        128,        1,  3110400, e45fd52e9ec3ad2d42740dd7cb5e971b
+1,     205824,     205824,     1024,     4096, d4295a4a32d647a04ec0eab639c0d9fc
+0,        129,        129,        1,  3110400, d625b970a797e37e465d9319471d40c3
+1,     206848,     206848,     1024,     4096, f03478b7689af9bdf6e67d271a4b3114
+1,     207872,     207872,     1024,     4096, 9fd97010278dc9147d8b73be9aacfb35
+0,        130,        130,        1,  3110400, b98e4a8fd994915a3dde3def26a9b171
+1,     208896,     208896,     1024,     4096, 2b9670707ad661b2125620d2156f11f6
+0,        131,        131,        1,  3110400, a99b759bdfcf7f7cee8f1ee84ca2d66d
+1,     209920,     209920,     1024,     4096, e1b2afbfcb5edb8e4b88ae9c08848c30
+1,     210944,     210944,     1024,     4096, 58c8914fd4ec12bfe13f388050b251e8
+0,        132,        132,        1,  3110400, 5439cd9643a76d21e5f1deb0aa4000a2
+1,     211968,     211968,     1024,     4096, 4d8e0b0d4cd31f98eb4ca9922bba1497
+1,     212992,     212992,     1024,     4096, 02b1c362c2f1939fa41a4cc8d4b25d6a
+0,        133,        133,        1,  3110400, 383640e375fa6b46bba21acd556f0efd
+1,     214016,     214016,     1024,     4096, 10d3b076171713e9978c67515fdd4304
+0,        134,        134,        1,  3110400, e91ba9ef2595fbee20fb0f1d5e0b92ba
+1,     215040,     215040,     1024,     4096, 726450c85dbd838bc00c6866d4c10786
+1,     216064,     216064,     1024,     4096, 3479045f30a97faaed2f241478ea0447
+0,        135,        135,        1,  3110400, 9c8ce07f22215b8450b01e82fc085b3d
+1,     217088,     217088,     1024,     4096, 63d5ca0f8d7e086dfae54e86a75f8971
+0,        136,        136,        1,  3110400, 4f11b553eaaa1feb682842a1123e1b43
+1,     218112,     218112,     1024,     4096, 41953b97d8f2d3e5c05827c670e3c562
+1,     219136,     219136,     1024,     4096, 50709f01bf8dbe8e40f5b414211e7dbe
+0,        137,        137,        1,  3110400, 32e8823643ce4cb0154482611f4de51d
+1,     220160,     220160,     1024,     4096, 03d813116d70fed14987e17ebcaeed3e
+0,        138,        138,        1,  3110400, 207ee505f0cfb54a3fd1fd5256ba906a
+1,     221184,     221184,     1024,     4096, d695e2905aac1dee39a8f35e3a4b7fed
+1,     222208,     222208,     1024,     4096, 35c5c7a9cfe5cfb08d3dd269aec9b901
+0,        139,        139,        1,  3110400, f95de0e1f3cd2559b6a81a6dbe53ed5c
+1,     223232,     223232,     1024,     4096, 2a4f3699775e0d67651c4c40913c9059
+0,        140,        140,        1,  3110400, 3c47b1ea481bdae7b7730084407fd22a
+1,     224256,     224256,     1024,     4096, bb561814ffb5b76a1faf7df4375c233e
+1,     225280,     225280,     1024,     4096, a3c632a0d54e07e3928aec8ef169fd11
+0,        141,        141,        1,  3110400, fbec533f0cb4d79185e09c9917329473
+1,     226304,     226304,     1024,     4096, a805c35e91615f17669130ec58191b75
+1,     227328,     227328,     1024,     4096, 0858806663f6430219b68df7ff83a932
+0,        142,        142,        1,  3110400, 5b9f1cf510bc72d42772a78d400d9831
+1,     228352,     228352,     1024,     4096, 59339810dea47a705ab9e0913f7a88d5
+0,        143,        143,        1,  3110400, 6b031f180439e753b6b0e36de46dc1a5
+1,     229376,     229376,     1024,     4096, 6d9cdd81e0ba85ad4e00c34e2a096611
+1,     230400,     230400,     1024,     4096, cd6dd8dd0dc5198b7bf0b45d4ecc2608
+0,        144,        144,        1,  3110400, c09870b1e13efa8b685d89ebd781e835
+1,     231424,     231424,     1024,     4096, c2dd972eb34af27a210bcfa3e5f8da93
+0,        145,        145,        1,  3110400, 439c6cc6f7157eaf046e06d9e55ddf69
+1,     232448,     232448,     1024,     4096, 0993bc45c9582048d6982beeaec6b72e
+1,     233472,     233472,     1024,     4096, ae93c593fd775ae997c43eef40fe8d0e
+0,        146,        146,        1,  3110400, 7d5520c184c7bf3f175eea3526deb7a8

diff --git a/tests/ref/fate/mov-elst-ends-betn-b-and-i b/tests/ref/fate/mov-elst-ends-betn-b-and-i
new file mode 100644
index 0000000..d6f325b
--- /dev/null
+++ b/tests/ref/fate/mov-elst-ends-betn-b-and-i

@@ -0,0 +1,33 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   115200, e10741e5457e9326d5e992e6c05c3e32
+0,          1,          1,        1,   115200, 7e20f8729b6b53dc11791927bf4a5aec
+0,          2,          2,        1,   115200, 4e5dc2b806e394cd666c968f736fecd0
+0,          3,          3,        1,   115200, 7a3c7473d44c5f60c07655f6fc0c2ac3
+0,          4,          4,        1,   115200, 038254422a603a3270c09cdcd149707b
+0,          5,          5,        1,   115200, 7553b6b4547cb23ef8f0392ed5a5d4b0
+0,          6,          6,        1,   115200, 6d017ede7f446124af7308667cb0dc41
+0,          7,          7,        1,   115200, 77752f0288ae64f857732b8e62e47457
+0,          8,          8,        1,   115200, d656833951af99330625f7c6de7685c4
+0,          9,          9,        1,   115200, 14338b833e431e566ac98da841600bfe
+0,         10,         10,        1,   115200, 07ea95d1659f3c4424a470a546d0df6e
+0,         11,         11,        1,   115200, fd05b8cc83072f813e89d394d1f6efc6
+0,         12,         12,        1,   115200, 750b82ca5c7e901545e7b1aa69692426
+0,         13,         13,        1,   115200, 7347679ab09bc936047368b8caebcaff
+0,         14,         14,        1,   115200, 63a23fdd57ac8462b9ffbcb12ab717b3
+0,         15,         15,        1,   115200, 705257a1c99693db233e2a3ee027adcf
+0,         16,         16,        1,   115200, df861a2ec7a4ef70e82b1c28025e5a48
+0,         17,         17,        1,   115200, 2a8b403c077b6b43aa71eaf7d1537713
+0,         18,         18,        1,   115200, 973b5cd3ce473e3970dfa96045553172
+0,         19,         19,        1,   115200, fc612c0afeae3b6576b5ee2f3f119832
+0,         20,         20,        1,   115200, 97074fe5a0b6e7e8470729654092e56c
+0,         21,         21,        1,   115200, 8cf9337201065335b3aa4da21dc9b37a
+0,         22,         22,        1,   115200, 93ff3589294cc0673af3daee1e7fe42a
+0,         23,         23,        1,   115200, c0b6fd870a022f374f9d6c697e8e293d

diff --git a/tests/ref/fate/mov-frag-encrypted b/tests/ref/fate/mov-frag-encrypted
new file mode 100644
index 0000000..e6c109b
--- /dev/null
+++ b/tests/ref/fate/mov-frag-encrypted

@@ -0,0 +1,57 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1/24
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 120x52
+#sar 0: 544/545
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,     9360, 920bdc277a6a31c1daed9aca44b10caf
+0,          1,          1,        1,     9360, f1c0b61fef593de57cb97be7fa846569
+0,          2,          2,        1,     9360, 6ef32d9d4398355aebf6d3fb11d51d3f
+0,          3,          3,        1,     9360, d38fd3ef1e5a92fc109b8dd9eb6dadeb
+0,          4,          4,        1,     9360, 54cc0c8a25d2f14f32663837d5e646f1
+0,          5,          5,        1,     9360, b4b6829726dc3decb8b80ba0c35bcf30
+0,          6,          6,        1,     9360, fca3f941e60a2f0a4ce30d5e0efbec3c
+0,          7,          7,        1,     9360, cda6e26b6c1039ff3d229b262c9210c3
+0,          8,          8,        1,     9360, f0d69255e3a27a8b4ae8a4b7b210929d
+0,          9,          9,        1,     9360, 12cb23dd4e32af9c3b35f943714e3fdd
+0,         10,         10,        1,     9360, 082aaf3216124ddcecb422fe5c832e82
+0,         11,         11,        1,     9360, ff37bb8cd6bd0412a3b3cb45db54afc9
+0,         12,         12,        1,     9360, dfb9085441575732844b6c2f05d5f542
+0,         13,         13,        1,     9360, 0017100feaaa9fc7eacd2447d50d7542
+0,         14,         14,        1,     9360, 4e2f1b8c4e04c59934c2f58541e62613
+0,         15,         15,        1,     9360, 27a44dfea7cd2d30e488194c34ab473c
+0,         16,         16,        1,     9360, fc7b56bd95e990a33cf575d1ef820902
+0,         17,         17,        1,     9360, fa2d1609e69714dffc410e65f3c8b755
+0,         18,         18,        1,     9360, 705d7429f447cb13febe202d567795f2
+0,         19,         19,        1,     9360, 234802ce86e868faaf2cd40a286846ea
+0,         20,         20,        1,     9360, 2f0354b40d211d0a4ade4568bea4f85e
+0,         21,         21,        1,     9360, e96af3b6c0cc931463ca77d6be0f1148
+0,         22,         22,        1,     9360, 04a904d798361959971361401879c7e4
+0,         23,         23,        1,     9360, 2f119642340df6d25362b5590ded46b7
+0,         24,         24,        1,     9360, 5993fca2e60050706f857ac76e48f386
+0,         25,         25,        1,     9360, 2ff3b5775fed3d527bfbbeea786787fe
+0,         26,         26,        1,     9360, 42024dbe23d3fb5b0d8987ae1ce390a8
+0,         27,         27,        1,     9360, d804204f0bd9db5f6a758e2c934d9e38
+0,         28,         28,        1,     9360, e322712e6e34c58ec1a2ab5e2c1e3bfe
+0,         29,         29,        1,     9360, 3975bd1a5f6a6b6260276777f9de611e
+0,         30,         30,        1,     9360, 4388f0412efc6310706a7cdedc859ea9
+0,         31,         31,        1,     9360, b4b9a11b0b86635267345a569640e8d4
+0,         32,         32,        1,     9360, 31879c7b8d6b67a4209ffde786bb8cb4
+0,         33,         33,        1,     9360, 4b6dc02d7c889fe4abd4e013b25f585a
+0,         34,         34,        1,     9360, dc73aae82bd39a1220d1106c8d3e8252
+0,         35,         35,        1,     9360, 54c7dfbd49f312806f6c1a89f7c2c36f
+0,         36,         36,        1,     9360, 150abc64f8994d444a521ea90570443c
+0,         37,         37,        1,     9360, d277cdc7dcadbe0016f2e950459e7ebf
+0,         38,         38,        1,     9360, 2196bf338ead90ea54687b85c73c8229
+0,         39,         39,        1,     9360, 53ce5da5365abc0bd3217dd98e7c465d
+0,         40,         40,        1,     9360, 34ee9832aea55c0c4e6f4381c413c10e
+0,         41,         41,        1,     9360, 1769c7b5849e4681119067a06ac29a4f
+0,         42,         42,        1,     9360, 71f53df739ef283a5184c91ef4b158e8
+0,         43,         43,        1,     9360, d2d394739e9a59c06f0354c16843cb63
+0,         44,         44,        1,     9360, d8e458e92ae29344505a24a3059fc584
+0,         45,         45,        1,     9360, 0f1b11a09911851b798df2ef76253a7f
+0,         46,         46,        1,     9360, 5c4a9f22baecf4e749c0d5c65a4f1007
+0,         47,         47,        1,     9360, 3e2b7e7262fdca08d9d1ef6070125c4b

diff --git a/tests/ref/fate/mov-frag-overlap b/tests/ref/fate/mov-frag-overlap
new file mode 100644
index 0000000..265a93d
--- /dev/null
+++ b/tests/ref/fate/mov-frag-overlap

@@ -0,0 +1,105 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1001/24000
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 640x360
+#sar 0: 1/1
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   345600, bb5e1ec37e1cb5a4ef2399c7b1c0ed08
+0,          1,          1,        1,   345600, 40455fa94b1f3b1cd199f127401fe187
+0,          2,          2,        1,   345600, 9147b59915250de3508f062b45dda499
+0,          3,          3,        1,   345600, bb9b9109ea61622f491c1e2aa21c503e
+0,          4,          4,        1,   345600, eaceff4d694212b2eb6abf5e090d7b4c
+0,          5,          5,        1,   345600, eaceff4d694212b2eb6abf5e090d7b4c
+0,          6,          6,        1,   345600, f04ca4f64e7e990874c7ac9264ebbe65
+0,          7,          7,        1,   345600, f195d115e0ad6d5c240b08226d312cef
+0,          8,          8,        1,   345600, 7585f3f325cc82028701fcd2360610e0
+0,          9,          9,        1,   345600, 7585f3f325cc82028701fcd2360610e0
+0,         10,         10,        1,   345600, 4e4a2d2d47881b238c4f423917c29182
+0,         11,         11,        1,   345600, 72609b459bde5644fe5815bb88fa537e
+0,         12,         12,        1,   345600, 67569df185a64ec08498d71e94ab0197
+0,         13,         13,        1,   345600, 352e1d41fd8d5fb0a92b722782591066
+0,         14,         14,        1,   345600, f50f96f0ff0ee58ef8577cb9ce218be4
+0,         15,         15,        1,   345600, 9681c998992d20055388ae6c0d4f40e6
+0,         16,         16,        1,   345600, 4cd9c3056c0d1b2d6ed666d2fbabef45
+0,         17,         17,        1,   345600, 88d0c13cff5acb1746e7bb227e7ec81a
+0,         18,         18,        1,   345600, 37a15a2d1d055a9c157a82d77cf48328
+0,         19,         19,        1,   345600, f30e43818a41242ca75caec4be7f366d
+0,         20,         20,        1,   345600, e0441ca5e4137910ed94ea2d67f506ad
+0,         21,         21,        1,   345600, 61cae30cdb5512610ba1ffb7b372be51
+0,         22,         22,        1,   345600, 28832e6ec917fece50ab94f8d6883d4b
+0,         23,         23,        1,   345600, 07cfd60c53a4ede0de4f5eba749abae0
+0,         24,         24,        1,   345600, 254b072e2fe7017933738b8d43cd3f2e
+0,         25,         25,        1,   345600, aa2ceef866576011653fc9e4add8c852
+0,         26,         26,        1,   345600, d5507e992763f69ef7da06bca2e5e0e4
+0,         27,         27,        1,   345600, 3d7fd83fab12dc9ca1bb5c0d09104174
+0,         28,         28,        1,   345600, 1fdc34d30f94bfa2acd5ab4791b3b6be
+0,         29,         29,        1,   345600, a993a348279f71db20e48a879c718096
+0,         30,         30,        1,   345600, b999b6a51b3bdeb99df80aa88f2c43ff
+0,         31,         31,        1,   345600, 2c851f444aaf1dd6f149ee8e6e1bdfa4
+0,         32,         32,        1,   345600, a50301d48d283e9881d142cd87128d86
+0,         33,         33,        1,   345600, 0ddffa560c6310a2ad1cb534336472ac
+0,         34,         34,        1,   345600, 5f5565daeb737f3543a5d07ff9115d4a
+0,         35,         35,        1,   345600, 91640f9887d074221e4e08f9a97a938b
+0,         36,         36,        1,   345600, bc01f13d5c73a92895ca2ad13209628a
+0,         37,         37,        1,   345600, 9750f282817c6df143565e2f57f9f85f
+0,         38,         38,        1,   345600, a1803a3ce342d25b45f913cc8b9fe961
+0,         39,         39,        1,   345600, 4a0ae00d4e9df52c7df5bfb08236ae96
+0,         40,         40,        1,   345600, 819b12f2f7356b296bffd39e85c9b6be
+0,         41,         41,        1,   345600, 40a5af89bff4b10a10d0a51ab5eeb4e0
+0,         42,         42,        1,   345600, 454a9b71be88d3c026cb7239988f3e37
+0,         43,         43,        1,   345600, c653e9c9d64c9495b764b0a6eb3bfbc9
+0,         44,         44,        1,   345600, 6a4facbf8e14ff6177fd4b0f6f659180
+0,         45,         45,        1,   345600, 843d5b77042e347c3d21d113ba2993fb
+0,         46,         46,        1,   345600, 284785e924efba8ab17e6e1f75329287
+0,         47,         47,        1,   345600, 4f2b2d0f3dc4ad165e0bc56560930d49
+0,         48,         48,        1,   345600, 71154e17c2f5c316272e8904ef61c0bc
+0,         49,         49,        1,   345600, 6b10588be4540b5dbbd765db406b8723
+0,         50,         50,        1,   345600, aaf62a95b621c8de2dca22f78594b8c7
+0,         51,         51,        1,   345600, bb508b634416287baf4406f70ca3693d
+0,         52,         52,        1,   345600, 0551149846fca1ae1d164a7a7c64439f
+0,         53,         53,        1,   345600, c3fad13ac1643744c4ea194763ae6cf7
+0,         54,         54,        1,   345600, 4c487adf3ec204c041a50cbf82abd8e8
+0,         55,         55,        1,   345600, dc48539225f4dc05588995d4288c71d0
+0,         56,         56,        1,   345600, be005c4f81422a775fc909dcee84094d
+0,         57,         57,        1,   345600, a374c2ed0e644f03cbb8cd5456877cbb
+0,         58,         58,        1,   345600, 9b7d5fac577cf0d82e11551cd9b280db
+0,         59,         59,        1,   345600, c1d0551bc5cd1f53133e30ed811d8032
+0,         60,         60,        1,   345600, 6ab563cf2df92a2e9e1b8b90c0e0ec07
+0,         61,         61,        1,   345600, 19dadbe3c3638578fe8a3ed3f730858e
+0,         62,         62,        1,   345600, e9810a12f7c14c4b8305a418e4bc3055
+0,         63,         63,        1,   345600, d45d74535cd471949238716bdc12c16f
+0,         64,         64,        1,   345600, d1c2c19d0aba4170fe1a03f9c10b6863
+0,         65,         65,        1,   345600, 047c7fbfcce41fcaec6f1e1686db0429
+0,         66,         66,        1,   345600, 4adc1aa1017880f6baf4786ecd2c8ddb
+0,         67,         67,        1,   345600, 90bad80d9e6c75784423f74984e40dd2
+0,         68,         68,        1,   345600, feb378d740e1e5f9312bf68c047040c3
+0,         69,         69,        1,   345600, 28b1de125fd6f6a9d42eb42300be7a3f
+0,         70,         70,        1,   345600, 495043e65515c790c1fe026c9d2c0a49
+0,         71,         71,        1,   345600, fa6d5062932e7fc11fbfe31349ecbbfd
+0,         72,         72,        1,   345600, 897952d332218f2dab7d760b2d2076eb
+0,         73,         73,        1,   345600, 79dfce7ac29b96ef23c7f648e87a3e0b
+0,         74,         74,        1,   345600, 94d11a85dc32b4693048a39a6227919b
+0,         75,         75,        1,   345600, f9c149dbdc71c60f146cf68a673102d5
+0,         76,         76,        1,   345600, febddbb90b3f37cb29b07102575d844f
+0,         77,         77,        1,   345600, b9e5cdfa63c2637f2204dcb885608bb9
+0,         78,         78,        1,   345600, 051773f40a3749b54c5a802f8a97128f
+0,         79,         79,        1,   345600, 5fed52a298b8eef94ebe21c03313a5d7
+0,         80,         80,        1,   345600, f8a447cd738ec587fbacde061db51be5
+0,         81,         81,        1,   345600, 474cfd0529c0a5daffdcaed5183ee583
+0,         82,         82,        1,   345600, b0dd2ddd7037e70e774a7b3baad0b094
+0,         83,         83,        1,   345600, 86622696017283d3ce98e71d10f89f75
+0,         84,         84,        1,   345600, 20d6ae8b4dcf75ab31cc1f00002298ca
+0,         85,         85,        1,   345600, a882528cd387fb3a55d6e184a33fe2c9
+0,         86,         86,        1,   345600, 6bfcb539ce16f3db0d7f24dab6166913
+0,         87,         87,        1,   345600, 6bfcb539ce16f3db0d7f24dab6166913
+0,         88,         88,        1,   345600, 3bde52ee85883e4f353d4736a47b3874
+0,         89,         89,        1,   345600, 6f20835eb29f824d6a3e1be0ce772f08
+0,         90,         90,        1,   345600, 7b99b15c1b8fbe0e643b75fda7b17b04
+0,         91,         91,        1,   345600, f0139cd28a0030d611d60e9d28837af0
+0,         92,         92,        1,   345600, f0139cd28a0030d611d60e9d28837af0
+0,         93,         93,        1,   345600, adf0a37ad23c38b54c7394871c876468
+0,         94,         94,        1,   345600, f0bd37bcb8299e90efd05a0e01c84029
+0,         95,         95,        1,   345600, 8a9d76bc611731eabc29bbf231e21528

diff --git a/tests/ref/fate/mov-guess-delay-1 b/tests/ref/fate/mov-guess-delay-1
new file mode 100644
index 0000000..96cb67b
--- /dev/null
+++ b/tests/ref/fate/mov-guess-delay-1

@@ -0,0 +1,3 @@
+[STREAM]
+has_b_frames=1
+[/STREAM]

diff --git a/tests/ref/fate/mov-guess-delay-2 b/tests/ref/fate/mov-guess-delay-2
new file mode 100644
index 0000000..248de1c
--- /dev/null
+++ b/tests/ref/fate/mov-guess-delay-2

@@ -0,0 +1,3 @@
+[STREAM]
+has_b_frames=2
+[/STREAM]

diff --git a/tests/ref/fate/mov-guess-delay-3 b/tests/ref/fate/mov-guess-delay-3
new file mode 100644
index 0000000..248de1c
--- /dev/null
+++ b/tests/ref/fate/mov-guess-delay-3

@@ -0,0 +1,3 @@
+[STREAM]
+has_b_frames=2
+[/STREAM]

diff --git a/tests/ref/fate/mov-ibi-elst-starts-b b/tests/ref/fate/mov-ibi-elst-starts-b
new file mode 100644
index 0000000..1ab9c2a
--- /dev/null
+++ b/tests/ref/fate/mov-ibi-elst-starts-b

@@ -0,0 +1,33 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   115200, 7e20f8729b6b53dc11791927bf4a5aec
+0,          1,          1,        1,   115200, 4e5dc2b806e394cd666c968f736fecd0
+0,          2,          2,        1,   115200, 7a3c7473d44c5f60c07655f6fc0c2ac3
+0,          3,          3,        1,   115200, 038254422a603a3270c09cdcd149707b
+0,          4,          4,        1,   115200, 7553b6b4547cb23ef8f0392ed5a5d4b0
+0,          5,          5,        1,   115200, 6d017ede7f446124af7308667cb0dc41
+0,          6,          6,        1,   115200, 77752f0288ae64f857732b8e62e47457
+0,          7,          7,        1,   115200, d656833951af99330625f7c6de7685c4
+0,          8,          8,        1,   115200, 14338b833e431e566ac98da841600bfe
+0,          9,          9,        1,   115200, 07ea95d1659f3c4424a470a546d0df6e
+0,         10,         10,        1,   115200, fd05b8cc83072f813e89d394d1f6efc6
+0,         11,         11,        1,   115200, 750b82ca5c7e901545e7b1aa69692426
+0,         12,         12,        1,   115200, 7347679ab09bc936047368b8caebcaff
+0,         13,         13,        1,   115200, 63a23fdd57ac8462b9ffbcb12ab717b3
+0,         14,         14,        1,   115200, 705257a1c99693db233e2a3ee027adcf
+0,         15,         15,        1,   115200, df861a2ec7a4ef70e82b1c28025e5a48
+0,         16,         16,        1,   115200, 2a8b403c077b6b43aa71eaf7d1537713
+0,         17,         17,        1,   115200, 973b5cd3ce473e3970dfa96045553172
+0,         18,         18,        1,   115200, fc612c0afeae3b6576b5ee2f3f119832
+0,         19,         19,        1,   115200, 97074fe5a0b6e7e8470729654092e56c
+0,         20,         20,        1,   115200, 8cf9337201065335b3aa4da21dc9b37a
+0,         21,         21,        1,   115200, 93ff3589294cc0673af3daee1e7fe42a
+0,         22,         22,        1,   115200, c0b6fd870a022f374f9d6c697e8e293d
+0,         23,         23,        1,   115200, bc4638ff7036b323c39a948a6407695d

diff --git a/tests/ref/fate/mov-invalid-elst-entry-count b/tests/ref/fate/mov-invalid-elst-entry-count
new file mode 100644
index 0000000..ac1e02e
--- /dev/null
+++ b/tests/ref/fate/mov-invalid-elst-entry-count

@@ -0,0 +1,57 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1/24
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 640x480
+#sar 0: 1/1
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   460800, 549730883a0b56e6accaf021903daecf
+0,          1,          1,        1,   460800, d5fc844d512a1decb0814b8692914d60
+0,          2,          2,        1,   460800, 69753185911f99aa38bd9d9ee0ff24f7
+0,          3,          3,        1,   460800, 2a985178c8b24e97708c6d33142f18a9
+0,          4,          4,        1,   460800, a23818e43bdf4ee77291e0f74051d0bf
+0,          5,          5,        1,   460800, f2c927b11350b9822065bec5f06b65d9
+0,          6,          6,        1,   460800, 0c553fb530cf042eb84f5b13817a96a6
+0,          7,          7,        1,   460800, a2be429b570ce30a7471b901f2ea5bcc
+0,          8,          8,        1,   460800, b4f4a3b1988404b35c57688755beb8b9
+0,          9,          9,        1,   460800, 2778b931d5e1570e1b809a87b1c45491
+0,         10,         10,        1,   460800, 4eb341ebf32006d8bce13f1d132d84b2
+0,         11,         11,        1,   460800, c39a34287300ee52547752198a2222fc
+0,         12,         12,        1,   460800, f3ec59e6fc4e3c2e75f42bef34ca73b5
+0,         13,         13,        1,   460800, 3f0e6116a94fefe890b79d1e94c156c6
+0,         14,         14,        1,   460800, 0a8a597c0806818112bf8351f53d350a
+0,         15,         15,        1,   460800, ab5c10023712ea21d595f5f856f53c66
+0,         16,         16,        1,   460800, d4e218051ef32e68e3df58c8c3d6a1ab
+0,         17,         17,        1,   460800, 24082fa9a277123a65398a36cb1b06af
+0,         18,         18,        1,   460800, 4c6da38c984496fb514d5e4d8ebb13f8
+0,         19,         19,        1,   460800, 5354435f662e80a47e1460d3ba020133
+0,         20,         20,        1,   460800, 6854beaaf90ad9f055af80fc75e802ae
+0,         21,         21,        1,   460800, cb40c57f9b61788fb4abe674b585142f
+0,         22,         22,        1,   460800, 8f42c6553c33dc049f6fb5e71f631125
+0,         23,         23,        1,   460800, d393b3efa7921caab3504044551bf6c3
+0,         24,         24,        1,   460800, 60a099be31244b2f69ca6107cdbd7e06
+0,         25,         25,        1,   460800, 05e542c105dc3f6f810d815fbf022afe
+0,         26,         26,        1,   460800, bf478f55694eda43942a343e0b70eb19
+0,         27,         27,        1,   460800, b1012ea5ff7232da20d2c7f242540bda
+0,         28,         28,        1,   460800, 936cdd0fd147f9c3ad4e82bed77a6d2c
+0,         29,         29,        1,   460800, 20baafde1ab281444d8cbf58995344c2
+0,         30,         30,        1,   460800, b0bb3647ec88d1a862e2e92c32d95b65
+0,         31,         31,        1,   460800, c86d78b06c281a8c3b63b4d7beeb3ddd
+0,         32,         32,        1,   460800, 4d699157c55ec3439390c6ff400655b4
+0,         33,         33,        1,   460800, 3327b0e09944dc13f0f2124cdfad86e9
+0,         34,         34,        1,   460800, 5db56bb14cd478f70bb5244781f1382d
+0,         35,         35,        1,   460800, cb8564d7a763444cc85512df3f7f10db
+0,         36,         36,        1,   460800, 1256eac030985c04c4501ad5a72e9d66
+0,         37,         37,        1,   460800, 20efdff9919faac5528cb576a21d6c2a
+0,         38,         38,        1,   460800, 28420714ad1008772ce8100d917e2226
+0,         39,         39,        1,   460800, 6c18dad0a470e8c0d62413b549e0ae2c
+0,         40,         40,        1,   460800, 3d8b9352e5d21697733e5fb5fdd55d58
+0,         41,         41,        1,   460800, 025e32ced915530eb814d399a8b28a20
+0,         42,         42,        1,   460800, cc880a64b66f158595521182a1db8a36
+0,         43,         43,        1,   460800, 322a2349a08e3b3cd18562050287c133
+0,         44,         44,        1,   460800, c19c16a382449594f1dec7a77a4e264e
+0,         45,         45,        1,   460800, 0d424b26031fa1db52fbd80c100e27a4
+0,         46,         46,        1,   460800, cef6158cd43beee0aea777982613e493
+0,         47,         47,        1,   460800, eb64a091a54ebb1077c667e1b33add2a

diff --git a/tests/ref/fate/mov-mp4-with-mov-in24-ver b/tests/ref/fate/mov-mp4-with-mov-in24-ver
new file mode 100644
index 0000000..b552245
--- /dev/null
+++ b/tests/ref/fate/mov-mp4-with-mov-in24-ver

@@ -0,0 +1,3 @@
+[STREAM]
+codec_name=pcm_s24le
+[/STREAM]

diff --git a/tests/ref/fate/mov-neg-firstpts-discard b/tests/ref/fate/mov-neg-firstpts-discard
new file mode 100644
index 0000000..2e295e3
--- /dev/null
+++ b/tests/ref/fate/mov-neg-firstpts-discard

@@ -0,0 +1,3 @@
+[STREAM]
+start_time=0.000000
+[/STREAM]

diff --git a/tests/ref/fate/mov-neg-firstpts-discard-frames b/tests/ref/fate/mov-neg-firstpts-discard-frames
new file mode 100644
index 0000000..81b59b3
--- /dev/null
+++ b/tests/ref/fate/mov-neg-firstpts-discard-frames

@@ -0,0 +1,24 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1/30
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   115200, 1e55263b359b4c99c3463655a1120f11
+0,          1,          1,        1,   115200, ce33efea81064e7c23deb57dc4c21995
+0,          2,          2,        1,   115200, 42234f25d6191ab13c3676a7937c921b
+0,          3,          3,        1,   115200, eab2ccb227c66cba4c9feb8cdbf28ef8
+0,          4,          4,        1,   115200, c8816e0b151b2c892163e35086918520
+0,          5,          5,        1,   115200, c633f5604c8651165d551ee88fb4cd92
+0,          6,          6,        1,   115200, 5f3f8530d720fef3ac4c937e7f488ea7
+0,          7,          7,        1,   115200, be24a583909ca92008b642f39be02685
+0,          8,          8,        1,   115200, 83872a6e5c3369fe76f684de31bd9a36
+0,          9,          9,        1,   115200, 4629d6eb656883b337e8e0b381f2db8d
+0,         10,         10,        1,   115200, f6bec55bc026440d23a44b948900e785
+0,         11,         11,        1,   115200, 7e12e8113916305c79e5d08354acc9ae
+0,         12,         12,        1,   115200, d315c0093536642d340ea50de3b2bfbb
+0,         13,         13,        1,   115200, 3d12b24aaed72bfada4a1a3e5e02945a
+0,         14,         14,        1,   115200, 070d6b8935c11304d8f9520c4401a130

diff --git a/tests/ref/fate/mov-neg-firstpts-discard-vorbis b/tests/ref/fate/mov-neg-firstpts-discard-vorbis
new file mode 100644
index 0000000..2e295e3
--- /dev/null
+++ b/tests/ref/fate/mov-neg-firstpts-discard-vorbis

@@ -0,0 +1,3 @@
+[STREAM]
+start_time=0.000000
+[/STREAM]

diff --git a/tests/ref/fate/mov-stream-shorter-than-movie b/tests/ref/fate/mov-stream-shorter-than-movie
new file mode 100644
index 0000000..28f3ef3
--- /dev/null
+++ b/tests/ref/fate/mov-stream-shorter-than-movie

@@ -0,0 +1,33 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1/24
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 640x480
+#sar 0: 0/1
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   460800, 3a26ddfa53f09d535c701138027e49dc
+0,          1,          1,        1,   460800, f09fe0d079ee81eb7db617b48ab5eecf
+0,          2,          2,        1,   460800, 40a165b074c7f4d34a41f320400737fc
+0,          3,          3,        1,   460800, 8ba73359c89ebc51e29847ef0e27f7c3
+0,          4,          4,        1,   460800, 0d783fcf3d37b99e7b41c0450e28f905
+0,          5,          5,        1,   460800, 7251de6f3e2ebccc2183aa7090dd59fb
+0,          6,          6,        1,   460800, 5d2ab26df00d2ef9adf763480f9c0493
+0,          7,          7,        1,   460800, b545f2623d4f4b3bc5ba28ca842cb00e
+0,          8,          8,        1,   460800, b3132e721028d0ccd0a734ef44fec5ff
+0,          9,          9,        1,   460800, 354af9e99c0d5b9cb910b468e8afc89e
+0,         10,         10,        1,   460800, 55452405c3cf6c44365d2dbe6cf37be3
+0,         11,         11,        1,   460800, 8ad58d19a00acc37536054706e235868
+0,         12,         12,        1,   460800, 833295be0a52fc2cf8dc7ae0375fd4f1
+0,         13,         13,        1,   460800, 3a943976671337231b962c659bb70f5c
+0,         14,         14,        1,   460800, 1dddc5aae4adb42b582565bc19f3f464
+0,         15,         15,        1,   460800, 0562a855137f84269cbfaa94e7d0f623
+0,         16,         16,        1,   460800, e4a89894c47c7142fbc0b9122a7c0561
+0,         17,         17,        1,   460800, 31f4354cc10c6a9d3fc35c0123710528
+0,         18,         18,        1,   460800, c4cfbb24b55d09be1a9b6f1f3fe3ae33
+0,         19,         19,        1,   460800, 0643e1e9cf7e3a4494ad4b425e7dcce7
+0,         20,         20,        1,   460800, 1caf065c1fcb1541e5625d9d5e9d0944
+0,         21,         21,        1,   460800, f519db6dcb739a494e4b57bff2c3b021
+0,         22,         22,        1,   460800, 5bcd1c762ff8edf2b9a4e489be97f18d
+0,         23,         23,        1,   460800, a818d6d3a94270294d6cf9432b19daa5

diff --git a/tests/ref/fate/mov-tenc-only-encrypted b/tests/ref/fate/mov-tenc-only-encrypted
new file mode 100644
index 0000000..1d57aa6
--- /dev/null
+++ b/tests/ref/fate/mov-tenc-only-encrypted

@@ -0,0 +1,57 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1/24
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 1024x436
+#sar 0: 1/1
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   669696, f48f296a85eda5ba069dc851a3228bef
+0,          1,          1,        1,   669696, a50c5f69bfa3387d49b5bdf738e6529c
+0,          2,          2,        1,   669696, 05061299003760f6a4795b408f72aa31
+0,          3,          3,        1,   669696, 2572119f0b0cdd83f8a7e06252cecd3b
+0,          4,          4,        1,   669696, 29fe6a6bdb4a69018e318886a297f07e
+0,          5,          5,        1,   669696, e8233c7fbaecfbff965c7dfdd3982b1b
+0,          6,          6,        1,   669696, d9259df9880ff5d4a4b38282e67f407b
+0,          7,          7,        1,   669696, 3e8d795195038993503ea9ab6984c915
+0,          8,          8,        1,   669696, bc4e2d253b715a34f85aae1b080e3460
+0,          9,          9,        1,   669696, 09aba8b3a96f53f9268e7420a10bfab6
+0,         10,         10,        1,   669696, 179447977dd580da8b35fb5310a809ca
+0,         11,         11,        1,   669696, 7a0eea9d54577990345f5705ab9882be
+0,         12,         12,        1,   669696, 5bb96eb76f461825740e5938456df759
+0,         13,         13,        1,   669696, bd4ac4a760ead774b9422a27dc071964
+0,         14,         14,        1,   669696, 1cc05f760a9b751fc89e77f2bcc97259
+0,         15,         15,        1,   669696, 825d0dee6f0174ba7102892c7de30b4d
+0,         16,         16,        1,   669696, d26a2ef5267f6bb03c4e1d8514eee0df
+0,         17,         17,        1,   669696, c916ffdeadca76596a8f7fd47914b5ef
+0,         18,         18,        1,   669696, 6e085acfa7fee0658ea0ae6188274c17
+0,         19,         19,        1,   669696, 1e95fa5b3561283f05bf0bd44cb91721
+0,         20,         20,        1,   669696, 37e3d135aba9dfb8b87e441753115374
+0,         21,         21,        1,   669696, 9c398310e8564491de624393c16265ce
+0,         22,         22,        1,   669696, c87209e4d2617bc2ab40a75f455f09da
+0,         23,         23,        1,   669696, 2679c2f8d1d1af21982e245945c1ee60
+0,         24,         24,        1,   669696, 6151ab4781f31c5beb66b356ad547122
+0,         25,         25,        1,   669696, f7ef6293bfb3a6a329061cb6a5ed5a38
+0,         26,         26,        1,   669696, 2f6e666d14dfc407ca0c0f347b13eb08
+0,         27,         27,        1,   669696, 3454fa1730d79b1aa8dbbc865dc150f4
+0,         28,         28,        1,   669696, e93dc683e2453419a0419ab9af0f8f95
+0,         29,         29,        1,   669696, 031eb3154f7f83cf86d42bee66be9cf7
+0,         30,         30,        1,   669696, 1205c36723e88811206c68892d3aaed6
+0,         31,         31,        1,   669696, 7dd7a8a19dcd73b31ddc6a6d0c597a42
+0,         32,         32,        1,   669696, 7c91115368ea2531262a1197468bc3f4
+0,         33,         33,        1,   669696, 3cf6d9ba385e0fff76da33299ed5380c
+0,         34,         34,        1,   669696, 859fc8c3ef049e3c1175a85fb0a90a3d
+0,         35,         35,        1,   669696, 1d09ce6c7027103d99a4d5799f6e72ab
+0,         36,         36,        1,   669696, 3dcb8357408ac88abd734128d8f5dd6f
+0,         37,         37,        1,   669696, 4dafce137a0a5178f6efaec878e64d36
+0,         38,         38,        1,   669696, 44c478f29a1399ed03275a7357f57d48
+0,         39,         39,        1,   669696, 6e9edaac7414c0e14591ac3d4d0b1ac4
+0,         40,         40,        1,   669696, 522e4aaeea0825da27f631a9e690d654
+0,         41,         41,        1,   669696, 85f2502a718440834c40051d30f8a65e
+0,         42,         42,        1,   669696, ae8816f7bd4645ef1a17ee6d09b4c8d2
+0,         43,         43,        1,   669696, 914b006fa92f1eb3e590245749f6810d
+0,         44,         44,        1,   669696, 9406901542e94c429dff46108782ed69
+0,         45,         45,        1,   669696, 324c13641c39eef5c476023e358c0391
+0,         46,         46,        1,   669696, 4058e886e17c22e4eb9da1dd0d6ad891
+0,         47,         47,        1,   669696, 9edf9cd15eea985b42fd1f5035b1d693

diff --git a/tests/ref/fate/mov-zombie b/tests/ref/fate/mov-zombie
index e89abb1..f45fa59 100644
--- a/tests/ref/fate/mov-zombie
+++ b/tests/ref/fate/mov-zombie

@@ -1,6 +1,6 @@
 packet|codec_type=video|stream_index=0|pts=0|pts_time=0.000000|dts=-3004|dts_time=-0.033378|duration=3003|duration_time=0.033367|convergence_duration=N/A|convergence_duration_time=N/A|size=4133|pos=11309|flags=K_
-frame|media_type=video|stream_index=0|key_frame=1|pkt_pts=0|pkt_pts_time=0.000000|pkt_dts=-3004|pkt_dts_time=-0.033378|best_effort_timestamp=0|best_effort_timestamp_time=0.000000|pkt_duration=3003|pkt_duration_time=0.033367|pkt_pos=11309|pkt_size=4133|width=160|height=240|pix_fmt=yuv420p|sample_aspect_ratio=2:1|pict_type=I|coded_picture_number=0|display_picture_number=0|interlaced_frame=0|top_field_first=0|repeat_pict=0|color_range=tv|color_space=smpte170m|color_primaries=smpte170m|color_transfer=bt709|chroma_location=topleft
 packet|codec_type=video|stream_index=0|pts=5440|pts_time=0.060444|dts=-567|dts_time=-0.006300|duration=3003|duration_time=0.033367|convergence_duration=N/A|convergence_duration_time=N/A|size=1077|pos=15442|flags=__
+frame|media_type=video|stream_index=0|key_frame=1|pkt_pts=0|pkt_pts_time=0.000000|pkt_dts=-567|pkt_dts_time=-0.006300|best_effort_timestamp=0|best_effort_timestamp_time=0.000000|pkt_duration=3003|pkt_duration_time=0.033367|pkt_pos=11309|pkt_size=4133|width=160|height=240|pix_fmt=yuv420p|sample_aspect_ratio=2:1|pict_type=I|coded_picture_number=0|display_picture_number=0|interlaced_frame=0|top_field_first=0|repeat_pict=0|color_range=tv|color_space=smpte170m|color_primaries=smpte170m|color_transfer=bt709|chroma_location=topleft
 packet|codec_type=video|stream_index=0|pts=2437|pts_time=0.027078|dts=2436|dts_time=0.027067|duration=3003|duration_time=0.033367|convergence_duration=N/A|convergence_duration_time=N/A|size=355|pos=16519|flags=__
 frame|media_type=video|stream_index=0|key_frame=0|pkt_pts=2437|pkt_pts_time=0.027078|pkt_dts=2436|pkt_dts_time=0.027067|best_effort_timestamp=2437|best_effort_timestamp_time=0.027078|pkt_duration=3003|pkt_duration_time=0.033367|pkt_pos=16519|pkt_size=355|width=160|height=240|pix_fmt=yuv420p|sample_aspect_ratio=2:1|pict_type=B|coded_picture_number=2|display_picture_number=0|interlaced_frame=0|top_field_first=0|repeat_pict=0|color_range=tv|color_space=smpte170m|color_primaries=smpte170m|color_transfer=bt709|chroma_location=topleft
 packet|codec_type=video|stream_index=0|pts=11446|pts_time=0.127178|dts=5439|dts_time=0.060433|duration=3003|duration_time=0.033367|convergence_duration=N/A|convergence_duration_time=N/A|size=1110|pos=16874|flags=__
@@ -129,5 +129,5 @@
 frame|media_type=video|stream_index=0|key_frame=0|pkt_pts=188623|pkt_pts_time=2.095811|pkt_dts=188622|pkt_dts_time=2.095800|best_effort_timestamp=188623|best_effort_timestamp_time=2.095811|pkt_duration=3003|pkt_duration_time=0.033367|pkt_pos=100846|pkt_size=974|width=160|height=240|pix_fmt=yuv420p|sample_aspect_ratio=2:1|pict_type=B|coded_picture_number=64|display_picture_number=0|interlaced_frame=0|top_field_first=0|repeat_pict=0|color_range=tv|color_space=smpte170m|color_primaries=smpte170m|color_transfer=bt709|chroma_location=topleft
 packet|codec_type=video|stream_index=0|pts=197632|pts_time=2.195911|dts=191625|dts_time=2.129167|duration=3003|duration_time=0.033367|convergence_duration=N/A|convergence_duration_time=N/A|size=580|pos=101820|flags=__
 frame|media_type=video|stream_index=0|key_frame=0|pkt_pts=191626|pkt_pts_time=2.129178|pkt_dts=N/A|pkt_dts_time=N/A|best_effort_timestamp=191626|best_effort_timestamp_time=2.129178|pkt_duration=3003|pkt_duration_time=0.033367|pkt_pos=99180|pkt_size=1666|width=160|height=240|pix_fmt=yuv420p|sample_aspect_ratio=2:1|pict_type=P|coded_picture_number=63|display_picture_number=0|interlaced_frame=0|top_field_first=0|repeat_pict=0|color_range=tv|color_space=smpte170m|color_primaries=smpte170m|color_transfer=bt709|chroma_location=topleft
-stream|index=0|codec_name=h264|profile=77|codec_type=video|codec_time_base=212521/12744000|codec_tag_string=avc1|codec_tag=0x31637661|width=160|height=240|coded_width=160|coded_height=240|has_b_frames=0|sample_aspect_ratio=2:1|display_aspect_ratio=4:3|pix_fmt=yuv420p|level=12|color_range=tv|color_space=smpte170m|color_transfer=bt709|color_primaries=smpte170m|chroma_location=topleft|field_order=unknown|timecode=N/A|refs=2|is_avc=true|nal_length_size=4|id=N/A|r_frame_rate=30000/1001|avg_frame_rate=6372000/212521|time_base=1/90000|start_pts=0|start_time=0.000000|duration_ts=2125200|duration=23.613333|bit_rate=333874|max_bit_rate=N/A|bits_per_raw_sample=8|nb_frames=708|nb_read_frames=65|nb_read_packets=66|disposition:default=1|disposition:dub=0|disposition:original=0|disposition:comment=0|disposition:lyrics=0|disposition:karaoke=0|disposition:forced=0|disposition:hearing_impaired=0|disposition:visual_impaired=0|disposition:clean_effects=0|disposition:attached_pic=0|disposition:timed_thumbnails=0|tag:rotate=0|tag:creation_time=2008-05-12T20:59:27.000000Z|tag:language=eng|tag:handler_name=Apple Alias Data Handler|tag:encoder=H.264
+stream|index=0|codec_name=h264|profile=77|codec_type=video|codec_time_base=212521/12744000|codec_tag_string=avc1|codec_tag=0x31637661|width=160|height=240|coded_width=160|coded_height=240|has_b_frames=1|sample_aspect_ratio=2:1|display_aspect_ratio=4:3|pix_fmt=yuv420p|level=12|color_range=tv|color_space=smpte170m|color_transfer=bt709|color_primaries=smpte170m|chroma_location=topleft|field_order=unknown|timecode=N/A|refs=2|is_avc=true|nal_length_size=4|id=N/A|r_frame_rate=30000/1001|avg_frame_rate=6372000/212521|time_base=1/90000|start_pts=0|start_time=0.000000|duration_ts=2125200|duration=23.613333|bit_rate=333874|max_bit_rate=N/A|bits_per_raw_sample=8|nb_frames=708|nb_read_frames=65|nb_read_packets=66|disposition:default=1|disposition:dub=0|disposition:original=0|disposition:comment=0|disposition:lyrics=0|disposition:karaoke=0|disposition:forced=0|disposition:hearing_impaired=0|disposition:visual_impaired=0|disposition:clean_effects=0|disposition:attached_pic=0|disposition:timed_thumbnails=0|tag:rotate=0|tag:creation_time=2008-05-12T20:59:27.000000Z|tag:language=eng|tag:handler_name=Apple Video Media Handler|tag:encoder=H.264
 side_data|side_data_type=Display Matrix|displaymatrix=\n00000000:       131072           0           0\n00000001:            0       65536           0\n00000002:            0           0  1073741824\n|rotation=0

diff --git a/tests/ref/fate/movenc b/tests/ref/fate/movenc
index 872796e..5e8f324 100644
--- a/tests/ref/fate/movenc
+++ b/tests/ref/fate/movenc

@@ -17,10 +17,10 @@
 write_data len 1171, time nopts, type header atom -
 write_data len 728, time 0, type sync atom moof
 write_data len 828, time nopts, type unknown atom -
-write_data len 728, time 1013106, type sync atom moof
+write_data len 728, time 1046439, type sync atom moof
 write_data len 812, time nopts, type unknown atom -
 write_data len 148, time nopts, type trailer atom -
-1f37c1a8e01651e8bebcd66f00b6a226 4435 ismv
+49bf122c4c732a344ef68b58acd19be5 4435 ismv
 write_data len 36, time nopts, type header atom ftyp
 write_data len 1123, time nopts, type header atom -
 write_data len 796, time 0, type sync atom moof

diff --git a/tests/ref/fate/mpeg2-field-enc b/tests/ref/fate/mpeg2-field-enc
index 4c288a8..8062b82 100644
--- a/tests/ref/fate/mpeg2-field-enc
+++ b/tests/ref/fate/mpeg2-field-enc

@@ -3,33 +3,33 @@
 #codec_id 0: rawvideo
 #dimensions 0: 720x576
 #sar 0: 16/15
-0,          9,          9,        1,   622080, 0xb3b66c5c
-0,         10,         10,        1,   622080, 0x088ec02b
-0,         11,         11,        1,   622080, 0x7a36db21
-0,         12,         12,        1,   622080, 0x541b286f
-0,         13,         13,        1,   622080, 0xb6c3e590
-0,         14,         14,        1,   622080, 0x39dbed51
-0,         15,         15,        1,   622080, 0x973dc728
-0,         16,         16,        1,   622080, 0xd7a4f804
-0,         17,         17,        1,   622080, 0xa2484762
-0,         18,         18,        1,   622080, 0x0cd268d1
-0,         19,         19,        1,   622080, 0x72eb663d
-0,         20,         20,        1,   622080, 0x8fdbac59
-0,         21,         21,        1,   622080, 0xa6f4feb9
-0,         22,         22,        1,   622080, 0xadb828c6
-0,         23,         23,        1,   622080, 0xea630a63
-0,         24,         24,        1,   622080, 0xa901d925
-0,         25,         25,        1,   622080, 0xac5e7087
-0,         26,         26,        1,   622080, 0x10274a2b
-0,         27,         27,        1,   622080, 0x143d541c
-0,         28,         28,        1,   622080, 0xee94c93a
-0,         29,         29,        1,   622080, 0xca030208
-0,         30,         30,        1,   622080, 0x26f30ead
-0,         31,         31,        1,   622080, 0xfc22f32c
-0,         32,         32,        1,   622080, 0x940a5ff8
-0,         33,         33,        1,   622080, 0x2164f805
-0,         34,         34,        1,   622080, 0xa76f5aba
-0,         35,         35,        1,   622080, 0x8c311471
-0,         36,         36,        1,   622080, 0xa45e1d95
-0,         37,         37,        1,   622080, 0x6cc61d6c
-0,         38,         38,        1,   622080, 0x6983b417
+0,          9,          9,        1,   622080, 0xff496bf5
+0,         10,         10,        1,   622080, 0x9bf6c014
+0,         11,         11,        1,   622080, 0x870edac7
+0,         12,         12,        1,   622080, 0x9dec280c
+0,         13,         13,        1,   622080, 0x0f02e57a
+0,         14,         14,        1,   622080, 0x161beccb
+0,         15,         15,        1,   622080, 0x2234c6b0
+0,         16,         16,        1,   622080, 0x143ef78a
+0,         17,         17,        1,   622080, 0x0d6e46cf
+0,         18,         18,        1,   622080, 0xb41667fd
+0,         19,         19,        1,   622080, 0xcc476539
+0,         20,         20,        1,   622080, 0x85d8ab16
+0,         21,         21,        1,   622080, 0xcd6afec1
+0,         22,         22,        1,   622080, 0x187a28ac
+0,         23,         23,        1,   622080, 0x06100a4b
+0,         24,         24,        1,   622080, 0x1b4ed8e9
+0,         25,         25,        1,   622080, 0xde33702c
+0,         26,         26,        1,   622080, 0x11974a0c
+0,         27,         27,        1,   622080, 0x1a0553e8
+0,         28,         28,        1,   622080, 0x98e1c8da
+0,         29,         29,        1,   622080, 0x003801ce
+0,         30,         30,        1,   622080, 0x6f300e00
+0,         31,         31,        1,   622080, 0xb232f27d
+0,         32,         32,        1,   622080, 0x07c65f57
+0,         33,         33,        1,   622080, 0x6363f7ce
+0,         34,         34,        1,   622080, 0x69ba5ac3
+0,         35,         35,        1,   622080, 0x8561143e
+0,         36,         36,        1,   622080, 0xf45e1d76
+0,         37,         37,        1,   622080, 0x69f81d2f
+0,         38,         38,        1,   622080, 0x8653b3ed

diff --git a/tests/ref/fate/mpeg2-ticket6677 b/tests/ref/fate/mpeg2-ticket6677
new file mode 100644
index 0000000..e963e32
--- /dev/null
+++ b/tests/ref/fate/mpeg2-ticket6677

@@ -0,0 +1,12 @@
+#tb 0: 1/30
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 720x480
+#sar 0: 8/9
+0,          0,          0,        1,   518400, 0xc1866f5f
+0,          1,          1,        1,   518400, 0x9ba32764
+0,          2,          2,        1,   518400, 0xa9031bb8
+0,          3,          3,        1,   518400, 0x5e2c3502
+0,          4,          4,        1,   518400, 0xe860027a
+0,          5,          5,        1,   518400, 0xa9152430
+0,          6,          6,        1,   518400, 0xb98dd9f7

diff --git a/tests/ref/fate/mpegps-remuxed-pcm-demux b/tests/ref/fate/mpegps-remuxed-pcm-demux
new file mode 100644
index 0000000..b5b2778
--- /dev/null
+++ b/tests/ref/fate/mpegps-remuxed-pcm-demux

@@ -0,0 +1,50 @@
+#tb 0: 1/90000
+#media_type 0: audio
+#codec_id 0: pcm_dvd
+#sample_rate 0: 44100
+#channel_layout 0: 4
+#channel_layout_name 0: mono
+0,          0,          0,     2040,     2005, 0x8e98e563
+0,       2090,       2090,     2069,     2033, 0xd6f2f455
+0,       4180,       4180,     2057,     2021, 0x77a6e6c9
+0,       6269,       6269,     2057,     2021, 0x8dbcf6be
+0,       8359,       8359,     2057,     2021, 0xc629eaa0
+0,      10449,      10449,     2057,     2021, 0xa951ebe5
+0,      12539,      12539,     2057,     2021, 0x1324f29b
+0,      14629,      14629,     2057,     2021, 0x55f1e968
+0,      16718,      16718,     2057,     2021, 0x98a7f994
+0,      18808,      18808,     2057,     2021, 0xba42f42f
+0,      20898,      20898,     2057,     2021, 0xcf5cefe6
+0,      22988,      22988,     2057,     2021, 0xeef8f2b5
+0,      25078,      25078,     2057,     2021, 0x71faf42f
+0,      27167,      27167,     2057,     2021, 0x0346f019
+0,      29257,      29257,     2057,     2021, 0xbca0f4a3
+0,      31347,      31347,     2057,     2021, 0x3b6ced4f
+0,      33437,      33437,     2057,     2021, 0xc8b9ea0a
+0,      35527,      35527,     2057,     2021, 0x18e5f385
+0,      37616,      37616,     2057,     2021, 0x74f6e9d6
+0,      39706,      39706,     2057,     2021, 0x35d8f2b5
+0,      41796,      41796,     2057,     2021, 0x9d2aec53
+0,      43886,      43886,     2057,     2021, 0xf0acf1a0
+0,      45976,      45976,     2057,     2021, 0xa724ebe9
+0,      48065,      48065,     2057,     2021, 0xd4f8f534
+0,      50155,      50155,     2057,     2021, 0xdf62efc6
+0,      52245,      52245,     2057,     2021, 0x9865f504
+0,      54335,      54335,     2057,     2021, 0x8670efb7
+0,      56424,      56424,     2057,     2021, 0xe51af219
+0,      58514,      58514,     2057,     2021, 0x0210f27f
+0,      60604,      60604,     2057,     2021, 0x4b08f406
+0,      62694,      62694,     2057,     2021, 0x2b02eed4
+0,      64784,      64784,     2057,     2021, 0x0445ed00
+0,      66873,      66873,     2057,     2021, 0xfae9f21f
+0,      68963,      68963,     2057,     2021, 0x3d6beabc
+0,      71053,      71053,     2057,     2021, 0xc50af39c
+0,      73143,      73143,     2057,     2021, 0xf9eceb82
+0,      75233,      75233,     2057,     2021, 0x7b89eb9b
+0,      77322,      77322,     2057,     2021, 0x7c07ef4b
+0,      79412,      79412,     2057,     2021, 0xbfacf1eb
+0,      81502,      81502,     2057,     2021, 0xccb2f27b
+0,      83592,      83592,     2057,     2021, 0xc035f557
+0,      85682,      85682,     2057,     2021, 0xbdf1edea
+0,      87771,      87771,     2057,     2021, 0x3644f424
+0,      89861,      89861,     1457,     1433, 0xdd17d51f

diff --git a/tests/ref/fate/mpegts-probe-pmt-merge b/tests/ref/fate/mpegts-probe-pmt-merge
new file mode 100644
index 0000000..6e424af
--- /dev/null
+++ b/tests/ref/fate/mpegts-probe-pmt-merge

@@ -0,0 +1,32 @@
+[PROGRAM]
+[STREAM]
+codec_name=ac3
+[/STREAM]
+[STREAM]
+codec_name=ac3
+[/STREAM]
+[STREAM]
+codec_name=ac3
+[/STREAM]
+[STREAM]
+codec_name=mpeg2video
+[/STREAM]
+[STREAM]
+codec_name=scte_35
+[/STREAM]
+[/PROGRAM]
+[STREAM]
+codec_name=ac3
+[/STREAM]
+[STREAM]
+codec_name=ac3
+[/STREAM]
+[STREAM]
+codec_name=ac3
+[/STREAM]
+[STREAM]
+codec_name=mpeg2video
+[/STREAM]
+[STREAM]
+codec_name=scte_35
+[/STREAM]

diff --git a/tests/ref/fate/mpegts-probe-program b/tests/ref/fate/mpegts-probe-program
new file mode 100644
index 0000000..bb1012c
--- /dev/null
+++ b/tests/ref/fate/mpegts-probe-program

@@ -0,0 +1,8 @@
+[PROGRAM]
+[STREAM]
+codec_name=hevc
+[/STREAM]
+[/PROGRAM]
+[STREAM]
+codec_name=hevc
+[/STREAM]

diff --git a/tests/ref/fate/mxf-reel_name b/tests/ref/fate/mxf-reel_name
new file mode 100644
index 0000000..cfe62df
--- /dev/null
+++ b/tests/ref/fate/mxf-reel_name

@@ -0,0 +1 @@
+a788589c14f343dcc6d75aaaec0f0266

diff --git a/tests/ref/fate/opt b/tests/ref/fate/opt
index 7b47d42..6a7dbfa 100644
--- a/tests/ref/fate/opt
+++ b/tests/ref/fate/opt

@@ -18,31 +18,31 @@
 flt=0.333333
 dbl=0.333333
 TestContext AVOptions:
-  -num               <int>        E....... set num (from 0 to 100) (default 0)
-  -toggle            <int>        E....... set toggle (from 0 to 1) (default 1)
-  -rational          <rational>   E....... set rational (from 0 to 10) (default 1/1)
-  -string            <string>     E....... set string (default "default")
-  -escape            <string>     E....... set escape str (default "\=,")
-  -flags             <flags>      E....... set flags (default cool)
-     cool                         E....... set cool flag
-     lame                         E....... set lame flag
-     mu                           E....... set mu flag
-  -size              <image_size> E....... set size (default "200x300")
-  -pix_fmt           <pix_fmt>    E....... set pixfmt (default 0bgr)
-  -sample_fmt        <sample_fmt> E....... set samplefmt (default s16)
-  -video_rate        <video_rate> E....... set videorate (default "25")
-  -duration          <duration>   E....... set duration (default 0.001)
-  -color             <color>      E....... set color (default "pink")
-  -cl                <channel_layout> E....... set channel layout (default 0x137)
-  -bin               <binary>     E....... set binary value
-  -bin1              <binary>     E....... set binary value
-  -bin2              <binary>     E....... set binary value
-  -num64             <int64>      E....... set num 64bit (from 0 to 100) (default 1)
-  -flt               <float>      E....... set float (from 0 to 100) (default 0.333333)
-  -dbl               <double>     E....... set double (from 0 to 100) (default 0.333333)
-  -bool1             <boolean>    E....... set boolean value (default auto)
-  -bool2             <boolean>    E....... set boolean value (default true)
-  -bool3             <boolean>    E....... set boolean value (default false)
+  -num               <int>        E........ set num (from 0 to 100) (default 0)
+  -toggle            <int>        E........ set toggle (from 0 to 1) (default 1)
+  -rational          <rational>   E........ set rational (from 0 to 10) (default 1/1)
+  -string            <string>     E........ set string (default "default")
+  -escape            <string>     E........ set escape str (default "\=,")
+  -flags             <flags>      E........ set flags (default cool)
+     cool                         E........ set cool flag
+     lame                         E........ set lame flag
+     mu                           E........ set mu flag
+  -size              <image_size> E........ set size (default "200x300")
+  -pix_fmt           <pix_fmt>    E........ set pixfmt (default 0bgr)
+  -sample_fmt        <sample_fmt> E........ set samplefmt (default s16)
+  -video_rate        <video_rate> E........ set videorate (default "25")
+  -duration          <duration>   E........ set duration (default 0.001)
+  -color             <color>      E........ set color (default "pink")
+  -cl                <channel_layout> E........ set channel layout (default 0x137)
+  -bin               <binary>     E........ set binary value
+  -bin1              <binary>     E........ set binary value
+  -bin2              <binary>     E........ set binary value
+  -num64             <int64>      E........ set num 64bit (from 0 to 100) (default 1)
+  -flt               <float>      E........ set float (from 0 to 100) (default 0.333333)
+  -dbl               <double>     E........ set double (from 0 to 100) (default 0.333333)
+  -bool1             <boolean>    E........ set boolean value (default auto)
+  -bool2             <boolean>    E........ set boolean value (default true)
+  -bool3             <boolean>    E........ set boolean value (default false)
 
 Testing av_opt_is_set_to_default()
 name:       num default:1 error:

diff --git a/tests/ref/fate/parseutils b/tests/ref/fate/parseutils
index 568b6d2..bd36c9b 100644
--- a/tests/ref/fate/parseutils
+++ b/tests/ref/fate/parseutils

@@ -90,6 +90,10 @@
 42.1729                  ->             +42172900
 -1729.42                 ->           -1729420000
 12:34                    ->            +754000000
+2147483648s              ->     +2147483648000000
+4294967296ms             ->        +4294967296000
+8589934592us             ->           +8589934592
+9223372036854775808us    -> error
 
 Testing av_get_known_color_name()
 AliceBlue -> R(240) G(248) B(255) A(0)

diff --git a/tests/ref/fate/pixelutils b/tests/ref/fate/pixelutils
index 493497f..df72d52 100644
--- a/tests/ref/fate/pixelutils
+++ b/tests/ref/fate/pixelutils

@@ -2,38 +2,47 @@
 [OK] [UU] SAD [random] 4x4=1370 ref=1370
 [OK] [UU] SAD [random] 8x8=5178 ref=5178
 [OK] [UU] SAD [random] 16x16=20946 ref=20946
+[OK] [UU] SAD [random] 32x32=83150 ref=83150
 [OK] [AU] SAD [random] 2x2=320 ref=320
 [OK] [AU] SAD [random] 4x4=1522 ref=1522
 [OK] [AU] SAD [random] 8x8=5821 ref=5821
 [OK] [AU] SAD [random] 16x16=21951 ref=21951
+[OK] [AU] SAD [random] 32x32=86983 ref=86983
 [OK] [AA] SAD [random] 2x2=276 ref=276
 [OK] [AA] SAD [random] 4x4=1521 ref=1521
 [OK] [AA] SAD [random] 8x8=5130 ref=5130
 [OK] [AA] SAD [random] 16x16=20775 ref=20775
+[OK] [AA] SAD [random] 32x32=83402 ref=83402
 [OK] [UU] SAD [max] 2x2=1020 ref=1020
 [OK] [UU] SAD [max] 4x4=4080 ref=4080
 [OK] [UU] SAD [max] 8x8=16320 ref=16320
 [OK] [UU] SAD [max] 16x16=65280 ref=65280
+[OK] [UU] SAD [max] 32x32=261120 ref=261120
 [OK] [AU] SAD [max] 2x2=1020 ref=1020
 [OK] [AU] SAD [max] 4x4=4080 ref=4080
 [OK] [AU] SAD [max] 8x8=16320 ref=16320
 [OK] [AU] SAD [max] 16x16=65280 ref=65280
+[OK] [AU] SAD [max] 32x32=261120 ref=261120
 [OK] [AA] SAD [max] 2x2=1020 ref=1020
 [OK] [AA] SAD [max] 4x4=4080 ref=4080
 [OK] [AA] SAD [max] 8x8=16320 ref=16320
 [OK] [AA] SAD [max] 16x16=65280 ref=65280
+[OK] [AA] SAD [max] 32x32=261120 ref=261120
 [OK] [UU] SAD [min] 2x2=0 ref=0
 [OK] [UU] SAD [min] 4x4=0 ref=0
 [OK] [UU] SAD [min] 8x8=0 ref=0
 [OK] [UU] SAD [min] 16x16=0 ref=0
+[OK] [UU] SAD [min] 32x32=0 ref=0
 [OK] [AU] SAD [min] 2x2=0 ref=0
 [OK] [AU] SAD [min] 4x4=0 ref=0
 [OK] [AU] SAD [min] 8x8=0 ref=0
 [OK] [AU] SAD [min] 16x16=0 ref=0
+[OK] [AU] SAD [min] 32x32=0 ref=0
 [OK] [AA] SAD [min] 2x2=0 ref=0
 [OK] [AA] SAD [min] 4x4=0 ref=0
 [OK] [AA] SAD [min] 8x8=0 ref=0
 [OK] [AA] SAD [min] 16x16=0 ref=0
+[OK] [AA] SAD [min] 32x32=0 ref=0
 [OK] [UU] SAD [small] 2x2=400 ref=400
 [OK] [AU] SAD [small] 2x2=384 ref=384
 [OK] [AA] SAD [small] 2x2=409 ref=409
@@ -46,3 +55,6 @@
 [OK] [UU] SAD [small] 16x16=19490 ref=19490
 [OK] [AU] SAD [small] 16x16=21037 ref=21037
 [OK] [AA] SAD [small] 16x16=22986 ref=22986
+[OK] [UU] SAD [small] 32x32=86550 ref=86550
+[OK] [AU] SAD [small] 32x32=83656 ref=83656
+[OK] [AA] SAD [small] 32x32=85164 ref=85164

diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
index 8da113d..27bb8aa 100644
--- a/tests/ref/fate/qtrle-8bit
+++ b/tests/ref/fate/qtrle-8bit

@@ -4,169 +4,60 @@
 #dimensions 0: 640x480
 #sar 0: 0/1
 0,          0,          0,        1,   921600, 0x1492e3ed
-0,          1,          1,        1,   921600, 0x1492e3ed
-0,          2,          2,        1,   921600, 0x1492e3ed
 0,          3,          3,        1,   921600, 0x23ef4fc7
-0,          4,          4,        1,   921600, 0x23ef4fc7
 0,          5,          5,        1,   921600, 0xe406d4be
-0,          6,          6,        1,   921600, 0xe406d4be
-0,          7,          7,        1,   921600, 0xe406d4be
 0,          8,          8,        1,   921600, 0x62b8b5a1
-0,          9,          9,        1,   921600, 0x62b8b5a1
 0,         10,         10,        1,   921600, 0x7d8ba674
-0,         11,         11,        1,   921600, 0x7d8ba674
-0,         12,         12,        1,   921600, 0x7d8ba674
 0,         13,         13,        1,   921600, 0xfe666be7
-0,         14,         14,        1,   921600, 0xfe666be7
 0,         15,         15,        1,   921600, 0x721baec0
-0,         16,         16,        1,   921600, 0x721baec0
-0,         17,         17,        1,   921600, 0x721baec0
 0,         18,         18,        1,   921600, 0xc237180a
-0,         19,         19,        1,   921600, 0xc237180a
 0,         20,         20,        1,   921600, 0xf03a7482
-0,         21,         21,        1,   921600, 0xf03a7482
-0,         22,         22,        1,   921600, 0xf03a7482
 0,         23,         23,        1,   921600, 0x5612a391
-0,         24,         24,        1,   921600, 0x5612a391
 0,         25,         25,        1,   921600, 0x9dbcc46a
-0,         26,         26,        1,   921600, 0x9dbcc46a
-0,         27,         27,        1,   921600, 0x9dbcc46a
 0,         28,         28,        1,   921600, 0xa128a5d5
-0,         29,         29,        1,   921600, 0xa128a5d5
 0,         30,         30,        1,   921600, 0x63e0025c
-0,         31,         31,        1,   921600, 0x63e0025c
-0,         32,         32,        1,   921600, 0x63e0025c
 0,         33,         33,        1,   921600, 0x262359ed
-0,         34,         34,        1,   921600, 0x262359ed
 0,         35,         35,        1,   921600, 0x343688e8
-0,         36,         36,        1,   921600, 0x343688e8
-0,         37,         37,        1,   921600, 0x343688e8
-0,         38,         38,        1,   921600, 0x343688e8
-0,         39,         39,        1,   921600, 0x343688e8
-0,         40,         40,        1,   921600, 0x343688e8
-0,         41,         41,        1,   921600, 0x343688e8
-0,         42,         42,        1,   921600, 0x343688e8
-0,         43,         43,        1,   921600, 0x343688e8
-0,         44,         44,        1,   921600, 0x343688e8
 0,         45,         45,        1,   921600, 0xe4b29d57
-0,         46,         46,        1,   921600, 0xe4b29d57
-0,         47,         47,        1,   921600, 0xe4b29d57
 0,         48,         48,        1,   921600, 0x198e8a4a
-0,         49,         49,        1,   921600, 0x198e8a4a
 0,         50,         50,        1,   921600, 0x0cad8dc9
-0,         51,         51,        1,   921600, 0x0cad8dc9
-0,         52,         52,        1,   921600, 0x0cad8dc9
 0,         53,         53,        1,   921600, 0x1f74cf3d
-0,         54,         54,        1,   921600, 0x1f74cf3d
 0,         55,         55,        1,   921600, 0xec5b5449
-0,         56,         56,        1,   921600, 0xec5b5449
-0,         57,         57,        1,   921600, 0xec5b5449
 0,         58,         58,        1,   921600, 0x39829711
-0,         59,         59,        1,   921600, 0x39829711
 0,         60,         60,        1,   921600, 0x6de5b9c6
-0,         61,         61,        1,   921600, 0x6de5b9c6
-0,         62,         62,        1,   921600, 0x6de5b9c6
 0,         63,         63,        1,   921600, 0x47b0e9d4
-0,         64,         64,        1,   921600, 0x47b0e9d4
 0,         65,         65,        1,   921600, 0x756452b8
-0,         66,         66,        1,   921600, 0x756452b8
-0,         67,         67,        1,   921600, 0x756452b8
 0,         68,         68,        1,   921600, 0x6fce3478
-0,         69,         69,        1,   921600, 0x6fce3478
 0,         70,         70,        1,   921600, 0x372397cd
-0,         71,         71,        1,   921600, 0x372397cd
-0,         72,         72,        1,   921600, 0x372397cd
 0,         73,         73,        1,   921600, 0xe3999ba1
-0,         74,         74,        1,   921600, 0xe3999ba1
 0,         75,         75,        1,   921600, 0x6ba26b43
-0,         76,         76,        1,   921600, 0x6ba26b43
-0,         77,         77,        1,   921600, 0x6ba26b43
 0,         78,         78,        1,   921600, 0x4e9ee49e
-0,         79,         79,        1,   921600, 0x4e9ee49e
 0,         80,         80,        1,   921600, 0xdb5fd6e7
-0,         81,         81,        1,   921600, 0xdb5fd6e7
-0,         82,         82,        1,   921600, 0xdb5fd6e7
 0,         83,         83,        1,   921600, 0x8f2254a5
-0,         84,         84,        1,   921600, 0x8f2254a5
-0,         85,         85,        1,   921600, 0x8f2254a5
-0,         86,         86,        1,   921600, 0x8f2254a5
-0,         87,         87,        1,   921600, 0x8f2254a5
-0,         88,         88,        1,   921600, 0x8f2254a5
-0,         89,         89,        1,   921600, 0x8f2254a5
-0,         90,         90,        1,   921600, 0x8f2254a5
-0,         91,         91,        1,   921600, 0x8f2254a5
-0,         92,         92,        1,   921600, 0x8f2254a5
 0,         93,         93,        1,   921600, 0x57e95c32
-0,         94,         94,        1,   921600, 0x57e95c32
 0,         95,         95,        1,   921600, 0x41627a9b
-0,         96,         96,        1,   921600, 0x41627a9b
-0,         97,         97,        1,   921600, 0x41627a9b
 0,         98,         98,        1,   921600, 0x7412dcee
-0,         99,         99,        1,   921600, 0x7412dcee
 0,        100,        100,        1,   921600, 0xaebe10ed
-0,        101,        101,        1,   921600, 0xaebe10ed
-0,        102,        102,        1,   921600, 0xaebe10ed
 0,        103,        103,        1,   921600, 0x411a91f6
-0,        104,        104,        1,   921600, 0x411a91f6
 0,        105,        105,        1,   921600, 0xb059df3f
-0,        106,        106,        1,   921600, 0xb059df3f
-0,        107,        107,        1,   921600, 0xb059df3f
 0,        108,        108,        1,   921600, 0x4d6f5a77
-0,        109,        109,        1,   921600, 0x4d6f5a77
 0,        110,        110,        1,   921600, 0xbbf06df4
-0,        111,        111,        1,   921600, 0xbbf06df4
-0,        112,        112,        1,   921600, 0xbbf06df4
 0,        113,        113,        1,   921600, 0xe27f7bf6
-0,        114,        114,        1,   921600, 0xe27f7bf6
 0,        115,        115,        1,   921600, 0xd7e8360e
-0,        116,        116,        1,   921600, 0xd7e8360e
-0,        117,        117,        1,   921600, 0xd7e8360e
 0,        118,        118,        1,   921600, 0x1dd4c344
-0,        119,        119,        1,   921600, 0x1dd4c344
 0,        120,        120,        1,   921600, 0x7995a7ce
-0,        121,        121,        1,   921600, 0x7995a7ce
-0,        122,        122,        1,   921600, 0x7995a7ce
 0,        123,        123,        1,   921600, 0x2ef3c566
-0,        124,        124,        1,   921600, 0x2ef3c566
 0,        125,        125,        1,   921600, 0xf296736e
-0,        126,        126,        1,   921600, 0xf296736e
-0,        127,        127,        1,   921600, 0xf296736e
-0,        128,        128,        1,   921600, 0xf296736e
-0,        129,        129,        1,   921600, 0xf296736e
-0,        130,        130,        1,   921600, 0xf296736e
-0,        131,        131,        1,   921600, 0xf296736e
-0,        132,        132,        1,   921600, 0xf296736e
-0,        133,        133,        1,   921600, 0xf296736e
-0,        134,        134,        1,   921600, 0xf296736e
 0,        135,        135,        1,   921600, 0x1a488311
-0,        136,        136,        1,   921600, 0x1a488311
-0,        137,        137,        1,   921600, 0x1a488311
 0,        138,        138,        1,   921600, 0x9e28011b
-0,        139,        139,        1,   921600, 0x9e28011b
 0,        140,        140,        1,   921600, 0x84d1ea80
-0,        141,        141,        1,   921600, 0x84d1ea80
-0,        142,        142,        1,   921600, 0x84d1ea80
 0,        143,        143,        1,   921600, 0x9ed41052
-0,        144,        144,        1,   921600, 0x9ed41052
 0,        145,        145,        1,   921600, 0xd4db7206
-0,        146,        146,        1,   921600, 0xd4db7206
-0,        147,        147,        1,   921600, 0xd4db7206
 0,        148,        148,        1,   921600, 0x55f695a9
-0,        149,        149,        1,   921600, 0x55f695a9
 0,        150,        150,        1,   921600, 0x9d8c667f
-0,        151,        151,        1,   921600, 0x9d8c667f
-0,        152,        152,        1,   921600, 0x9d8c667f
 0,        153,        153,        1,   921600, 0x9b6037ec
-0,        154,        154,        1,   921600, 0x9b6037ec
 0,        155,        155,        1,   921600, 0x57c5e835
-0,        156,        156,        1,   921600, 0x57c5e835
-0,        157,        157,        1,   921600, 0x57c5e835
 0,        158,        158,        1,   921600, 0x476dad89
-0,        159,        159,        1,   921600, 0x476dad89
 0,        160,        160,        1,   921600, 0xcfd6ad2b
-0,        161,        161,        1,   921600, 0xcfd6ad2b
-0,        162,        162,        1,   921600, 0xcfd6ad2b
 0,        163,        163,        1,   921600, 0x3b372379
-0,        164,        164,        1,   921600, 0x3b372379
 0,        165,        165,        1,   921600, 0x36f245f5
-0,        166,        166,        1,   921600, 0x36f245f5

diff --git a/tests/ref/fate/rgb24-mkv b/tests/ref/fate/rgb24-mkv
index 4c357ac..9f0064b 100644
--- a/tests/ref/fate/rgb24-mkv
+++ b/tests/ref/fate/rgb24-mkv

@@ -1,5 +1,5 @@
-55270be3b5d393d770a1dfcb19b68271 *tests/data/fate/rgb24-mkv.matroska
-58345 tests/data/fate/rgb24-mkv.matroska
+d84d5a83971be9c2caa2f4c37bbbfefd *tests/data/fate/rgb24-mkv.matroska
+58343 tests/data/fate/rgb24-mkv.matroska
 #tb 0: 1/10
 #media_type 0: video
 #codec_id 0: rawvideo

diff --git a/tests/ref/fate/segment-mp4-to-ts b/tests/ref/fate/segment-mp4-to-ts
index 847c1a2..b5accb6 100644
--- a/tests/ref/fate/segment-mp4-to-ts
+++ b/tests/ref/fate/segment-mp4-to-ts

@@ -1,10 +1,10 @@
-#extradata 0:       50, 0x4f1b0df9
+#extradata 0:       51, 0x5d140df9
 #tb 0: 1/90000
 #media_type 0: video
 #codec_id 0: h264
 #dimensions 0: 640x360
 #sar 0: 1/1
-0,      -7200,          0,        0,    22630, 0x9b109541, S=1,        1, 0x00e000e0
+0,      -7200,          0,        0,    22631, 0x9cec9541, S=1,        1, 0x00e000e0
 0,      -3600,      14400,        0,     4021, 0xbf7cdb02, F=0x0, S=1,        1, 0x00e000e0
 0,          0,       7200,        0,     1096, 0x4f162690, F=0x0, S=1,        1, 0x00e000e0
 0,       3600,       3600,        0,      687, 0x00394b95, F=0x0, S=1,        1, 0x00e000e0

diff --git a/tests/ref/fate/source b/tests/ref/fate/source
index 2def034..4b9467a 100644
--- a/tests/ref/fate/source
+++ b/tests/ref/fate/source

@@ -2,6 +2,8 @@
 compat/avisynth/windowsPorts/basicDataTypeConversions.h
 compat/avisynth/windowsPorts/windows2linux.h
 libavcodec/file_open.c
+libavcodec/ilbcdata.h
+libavcodec/ilbcdec.c
 libavcodec/interplayacm.c
 libavcodec/log2_tab.c
 libavcodec/reverse.c
@@ -23,12 +25,8 @@
 compat/avisynth/avxsynth_c.h
 compat/avisynth/windowsPorts/basicDataTypeConversions.h
 compat/avisynth/windowsPorts/windows2linux.h
-compat/cuda/dynlink_cuda.h
-compat/cuda/dynlink_cuviddec.h
 compat/cuda/dynlink_loader.h
-compat/cuda/dynlink_nvcuvid.h
 compat/float/float.h
 compat/float/limits.h
-compat/nvenc/nvEncodeAPI.h
 Use of av_clip() where av_clip_uintp2() could be used:
 Use of av_clip() where av_clip_intp2() could be used:

diff --git a/tests/ref/fate/sub-cc-scte20 b/tests/ref/fate/sub-cc-scte20
new file mode 100644
index 0000000..71fc92b
--- /dev/null
+++ b/tests/ref/fate/sub-cc-scte20

@@ -0,0 +1,15 @@
+[Script Info]

+; Script generated by FFmpeg/Lavc

+ScriptType: v4.00+

+PlayResX: 384

+PlayResY: 288

+

+[V4+ Styles]

+Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding

+Style: Default,Monospace,16,&Hffffff,&Hffffff,&H0,&H0,0,0,0,0,100,100,0,0,3,1,0,2,10,10,10,0

+

+[Events]

+Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text

+Dialogue: 0,0:00:00.00,0:00:01.44,Default,,0,0,0,,{\an7}{\pos(48,182)}BESIDES THE 

+Dialogue: 0,0:00:01.43,0:00:03.93,Default,,0,0,0,,{\an7}{\pos(38,166)}\hBESIDES THE \N{\an7}{\pos(38,197)}SPENDING AND THIS, IS THAT CAR 

+Dialogue: 0,0:00:03.94,0:00:06.31,Default,,0,0,0,,{\an7}{\pos(38,182)}SPENDING AND THIS, IS THAT CAR \N{\an7}{\pos(38,197)}MANUFACTURERS ARE ABOUT AS 


diff --git a/tests/ref/fate/sws-pixdesc-query b/tests/ref/fate/sws-pixdesc-query
index 0adfdca..451c7d8 100644
--- a/tests/ref/fate/sws-pixdesc-query
+++ b/tests/ref/fate/sws-pixdesc-query

@@ -49,6 +49,8 @@
   gray10le
   gray12be
   gray12le
+  gray14be
+  gray14le
   gray9be
   gray9le
   nv20be
@@ -121,8 +123,10 @@
   gbrpf32be
   gray10be
   gray12be
+  gray14be
   gray16be
   gray9be
+  grayf32be
   nv20be
   p010be
   p016be
@@ -403,10 +407,14 @@
   gray10le
   gray12be
   gray12le
+  gray14be
+  gray14le
   gray16be
   gray16le
   gray9be
   gray9le
+  grayf32be
+  grayf32le
   ya16be
   ya16le
   ya8

diff --git a/tests/ref/fate/time_base b/tests/ref/fate/time_base
index 7923556..710fde1 100644
--- a/tests/ref/fate/time_base
+++ b/tests/ref/fate/time_base

@@ -1 +1 @@
-d26a35b141551b36c5b8bd716451cfcb
+42863a53f6c63efbc8c5a2eb76f13f5f

diff --git a/tests/ref/fate/ts-demux b/tests/ref/fate/ts-demux
index e2931af..eb13ecc 100644
--- a/tests/ref/fate/ts-demux
+++ b/tests/ref/fate/ts-demux

@@ -13,7 +13,7 @@
 1,          0,          0,     2880,     1536, 0x773ffeea, S=1,        1, 0x00bd00bd
 1,       2880,       2880,     2880,     1536, 0x6dc10748
 1,       5760,       5760,     2880,     1536, 0xbab5129c
-1,       8640,       8640,     2880,     1536, 0x602f034b
+1,       8640,       8640,     2880,     1536, 0x602f034b, S=1,        1, 0x00bd00bd
 1,      11520,      11520,     2880,      906, 0x69cdcbcd
 0,      32037,      36541,     1501,   114336, 0x37a215a8, S=1,        1, 0x00e000e0
 0,      33538,      33538,     1501,    12560, 0xb559a3d4, F=0x0, S=1,        1, 0x00e000e0

diff --git a/tests/ref/fate/ts-small-demux b/tests/ref/fate/ts-small-demux
new file mode 100644
index 0000000..cdd7c09
--- /dev/null
+++ b/tests/ref/fate/ts-small-demux

@@ -0,0 +1,80 @@
+#extradata 0:       35, 0x83f4073d
+#tb 0: 1/90000
+#media_type 0: video
+#codec_id 0: h264
+#dimensions 0: 82x144
+#sar 0: 1/1
+0,          0,          0,     6000,     1290, 0x4684e0ab, S=1,        1, 0x00e000e0
+0,       6000,       6000,     6000,       21, 0x267504dc, F=0x0, S=1,        1, 0x00e000e0
+0,      12000,      12000,     6000,       15, 0x0f9402f6, F=0x0, S=1,        1, 0x00e000e0
+0,      18000,      18000,     6000,       15, 0x0ff40316, F=0x0, S=1,        1, 0x00e000e0
+0,      24000,      24000,     6000,       15, 0x10540336, F=0x0, S=1,        1, 0x00e000e0
+0,      30000,      30000,     6000,       15, 0x10b40356, F=0x0, S=1,        1, 0x00e000e0
+0,      36000,      36000,     6000,       15, 0x11140376, F=0x0, S=1,        1, 0x00e000e0
+0,      42000,      42000,     6000,       15, 0x11740396, F=0x0, S=1,        1, 0x00e000e0
+0,      48000,      48000,     6000,       15, 0x0ed802b7, F=0x0, S=1,        1, 0x00e000e0
+0,      54000,      54000,     6000,       15, 0x0f3802d7, F=0x0, S=1,        1, 0x00e000e0
+0,      60000,      60000,     6000,       15, 0x0f9802f7, F=0x0, S=1,        1, 0x00e000e0
+0,      66000,      66000,     6000,       15, 0x0ff80317, F=0x0, S=1,        1, 0x00e000e0
+0,      72000,      72000,     6000,       15, 0x10580337, F=0x0, S=1,        1, 0x00e000e0
+0,      78000,      78000,     6000,       15, 0x10b80357, F=0x0, S=1,        1, 0x00e000e0
+0,      84000,      84000,     6000,       15, 0x11180377, F=0x0, S=1,        1, 0x00e000e0
+0,      90000,      90000,     6000,       15, 0x11780397, F=0x0, S=1,        1, 0x00e000e0
+0,      96000,      96000,     6000,       15, 0x0ed402b6, F=0x0, S=1,        1, 0x00e000e0
+0,     102000,     102000,     6000,       15, 0x0f3402d6, F=0x0, S=1,        1, 0x00e000e0
+0,     108000,     108000,     6000,       15, 0x0f9402f6, F=0x0, S=1,        1, 0x00e000e0
+0,     114000,     114000,     6000,       15, 0x0ff40316, F=0x0, S=1,        1, 0x00e000e0
+0,     120000,     120000,     6000,       15, 0x10540336, F=0x0, S=1,        1, 0x00e000e0
+0,     126000,     126000,     6000,       15, 0x10b40356, F=0x0, S=1,        1, 0x00e000e0
+0,     132000,     132000,     6000,       15, 0x11140376, F=0x0, S=1,        1, 0x00e000e0
+0,     138000,     138000,     6000,       15, 0x11740396, F=0x0, S=1,        1, 0x00e000e0
+0,     144000,     144000,     6000,       15, 0x0ed802b7, F=0x0, S=1,        1, 0x00e000e0
+0,     150000,     150000,     6000,       15, 0x0f3802d7, F=0x0, S=1,        1, 0x00e000e0
+0,     156000,     156000,     6000,       15, 0x0f9802f7, F=0x0, S=1,        1, 0x00e000e0
+0,     162000,     162000,     6000,       15, 0x0ff80317, F=0x0, S=1,        1, 0x00e000e0
+0,     168000,     168000,     6000,       15, 0x10580337, F=0x0, S=1,        1, 0x00e000e0
+0,     174000,     174000,     6000,       15, 0x10b80357, F=0x0, S=1,        1, 0x00e000e0
+0,     180000,     180000,     6000,       15, 0x11180377, F=0x0, S=1,        1, 0x00e000e0
+0,     186000,     186000,     6000,       15, 0x11780397, F=0x0, S=1,        1, 0x00e000e0
+0,     192000,     192000,     6000,       15, 0x0ed402b6, F=0x0, S=1,        1, 0x00e000e0
+0,     198000,     198000,     6000,       15, 0x0f3402d6, F=0x0, S=1,        1, 0x00e000e0
+0,     204000,     204000,     6000,       15, 0x0f9402f6, F=0x0, S=1,        1, 0x00e000e0
+0,     210000,     210000,     6000,       15, 0x0ff40316, F=0x0, S=1,        1, 0x00e000e0
+0,     216000,     216000,     6000,       15, 0x10540336, F=0x0, S=1,        1, 0x00e000e0
+0,     222000,     222000,     6000,       15, 0x10b40356, F=0x0, S=1,        1, 0x00e000e0
+0,     228000,     228000,     6000,       15, 0x11140376, F=0x0, S=1,        1, 0x00e000e0
+0,     234000,     234000,     6000,       15, 0x11740396, F=0x0, S=1,        1, 0x00e000e0
+0,     240000,     240000,     6000,       15, 0x0ed802b7, F=0x0, S=1,        1, 0x00e000e0
+0,     246000,     246000,     6000,       15, 0x0f3802d7, F=0x0, S=1,        1, 0x00e000e0
+0,     252000,     252000,     6000,       15, 0x0f9802f7, F=0x0, S=1,        1, 0x00e000e0
+0,     258000,     258000,     6000,       15, 0x0ff80317, F=0x0, S=1,        1, 0x00e000e0
+0,     264000,     264000,     6000,       15, 0x10580337, F=0x0, S=1,        1, 0x00e000e0
+0,     270000,     270000,     6000,       15, 0x10b80357, F=0x0, S=1,        1, 0x00e000e0
+0,     276000,     276000,     6000,       15, 0x11180377, F=0x0, S=1,        1, 0x00e000e0
+0,     282000,     282000,     6000,       15, 0x11780397, F=0x0, S=1,        1, 0x00e000e0
+0,     288000,     288000,     6000,       15, 0x0ed402b6, F=0x0, S=1,        1, 0x00e000e0
+0,     294000,     294000,     6000,       15, 0x0f3402d6, F=0x0, S=1,        1, 0x00e000e0
+0,     300000,     300000,     6000,       15, 0x0f9402f6, F=0x0, S=1,        1, 0x00e000e0
+0,     306000,     306000,     6000,       15, 0x0ff40316, F=0x0, S=1,        1, 0x00e000e0
+0,     312000,     312000,     6000,       15, 0x10540336, F=0x0, S=1,        1, 0x00e000e0
+0,     318000,     318000,     6000,       15, 0x10b40356, F=0x0, S=1,        1, 0x00e000e0
+0,     324000,     324000,     6000,       15, 0x11140376, F=0x0, S=1,        1, 0x00e000e0
+0,     330000,     330000,     6000,       15, 0x11740396, F=0x0, S=1,        1, 0x00e000e0
+0,     336000,     336000,     6000,       15, 0x0ed802b7, F=0x0, S=1,        1, 0x00e000e0
+0,     342000,     342000,     6000,       15, 0x0f3802d7, F=0x0, S=1,        1, 0x00e000e0
+0,     348000,     348000,     6000,       15, 0x0f9802f7, F=0x0, S=1,        1, 0x00e000e0
+0,     354000,     354000,     6000,       15, 0x0ff80317, F=0x0, S=1,        1, 0x00e000e0
+0,     360000,     360000,     6000,       15, 0x10580337, F=0x0, S=1,        1, 0x00e000e0
+0,     366000,     366000,     6000,       15, 0x10b80357, F=0x0, S=1,        1, 0x00e000e0
+0,     372000,     372000,     6000,       15, 0x11180377, F=0x0, S=1,        1, 0x00e000e0
+0,     378000,     378000,     6000,       15, 0x11780397, F=0x0, S=1,        1, 0x00e000e0
+0,     384000,     384000,     6000,       15, 0x0ed402b6, F=0x0, S=1,        1, 0x00e000e0
+0,     390000,     390000,     6000,       15, 0x0f3402d6, F=0x0, S=1,        1, 0x00e000e0
+0,     396000,     396000,     6000,       15, 0x0f9402f6, F=0x0, S=1,        1, 0x00e000e0
+0,     402000,     402000,     6000,       15, 0x0ff40316, F=0x0, S=1,        1, 0x00e000e0
+0,     408000,     408000,     6000,       15, 0x10540336, F=0x0, S=1,        1, 0x00e000e0
+0,     414000,     414000,     6000,       15, 0x10b40356, F=0x0, S=1,        1, 0x00e000e0
+0,     420000,     420000,     6000,       15, 0x11140376, F=0x0, S=1,        1, 0x00e000e0
+0,     426000,     426000,     6000,       16, 0x15a2042d, F=0x0, S=1,        1, 0x00e000e0
+0,     432000,     432000,     6000,       16, 0x1227034e, F=0x0, S=1,        1, 0x00e000e0
+0,     438000,     438000,     6000,       16, 0x136703ae, F=0x0

diff --git a/tests/ref/fate/tscc-15bit b/tests/ref/fate/tscc-15bit
index abfe6a6..ded8ad4 100644
--- a/tests/ref/fate/tscc-15bit
+++ b/tests/ref/fate/tscc-15bit

@@ -11,93 +11,34 @@
 #channel_layout_name 1: mono
 0,          0,          0,        1,   657600, 0x50b3a0c2
 1,          0,          0,    11025,    22050, 0x1740aaec
-0,          1,          1,        1,   657600, 0x50b3a0c2
-0,          2,          2,        1,   657600, 0x50b3a0c2
 0,          3,          3,        1,   657600, 0x661aa145
-0,          4,          4,        1,   657600, 0x661aa145
-0,          5,          5,        1,   657600, 0x661aa145
-0,          6,          6,        1,   657600, 0x661aa145
-0,          7,          7,        1,   657600, 0x661aa145
-0,          8,          8,        1,   657600, 0x661aa145
-0,          9,          9,        1,   657600, 0x661aa145
-0,         10,         10,        1,   657600, 0x661aa145
-0,         11,         11,        1,   657600, 0x661aa145
-0,         12,         12,        1,   657600, 0x661aa145
-0,         13,         13,        1,   657600, 0x661aa145
-0,         14,         14,        1,   657600, 0x661aa145
-0,         15,         15,        1,   657600, 0x661aa145
 1,      11025,      11025,    11025,    22050, 0x75ed6086
-0,         16,         16,        1,   657600, 0x661aa145
-0,         17,         17,        1,   657600, 0x661aa145
-0,         18,         18,        1,   657600, 0x661aa145
-0,         19,         19,        1,   657600, 0x661aa145
-0,         20,         20,        1,   657600, 0x661aa145
 0,         21,         21,        1,   657600, 0x3c29a73f
-0,         22,         22,        1,   657600, 0x3c29a73f
-0,         23,         23,        1,   657600, 0x3c29a73f
 0,         24,         24,        1,   657600, 0xee2ca145
-0,         25,         25,        1,   657600, 0xee2ca145
-0,         26,         26,        1,   657600, 0xee2ca145
-0,         27,         27,        1,   657600, 0xee2ca145
-0,         28,         28,        1,   657600, 0xee2ca145
-0,         29,         29,        1,   657600, 0xee2ca145
 0,         30,         30,        1,   657600, 0xeb6fa442
 1,      22050,      22050,    11025,    22050, 0xca52a4e9
-0,         31,         31,        1,   657600, 0xeb6fa442
-0,         32,         32,        1,   657600, 0xeb6fa442
 0,         33,         33,        1,   657600, 0xb235a145
-0,         34,         34,        1,   657600, 0xb235a145
-0,         35,         35,        1,   657600, 0xb235a145
 0,         36,         36,        1,   657600, 0x39f7ad39
-0,         37,         37,        1,   657600, 0x39f7ad39
-0,         38,         38,        1,   657600, 0x39f7ad39
 0,         39,         39,        1,   657600, 0xb851abda
-0,         40,         40,        1,   657600, 0xb851abda
-0,         41,         41,        1,   657600, 0xb851abda
 0,         42,         42,        1,   657600, 0xf6574b22
 0,         43,         43,        1,   657600, 0x1a154a9f
 0,         44,         44,        1,   657600, 0x3de64916
 0,         45,         45,        1,   657600, 0xca3d9cd5
 1,      33075,      33075,    11025,    22050, 0xb306d419
-0,         46,         46,        1,   657600, 0xca3d9cd5
 0,         47,         47,        1,   657600, 0x4779a2cf
-0,         48,         48,        1,   657600, 0x4779a2cf
-0,         49,         49,        1,   657600, 0x4779a2cf
-0,         50,         50,        1,   657600, 0x4779a2cf
-0,         51,         51,        1,   657600, 0x4779a2cf
-0,         52,         52,        1,   657600, 0x4779a2cf
-0,         53,         53,        1,   657600, 0x4779a2cf
-0,         54,         54,        1,   657600, 0x4779a2cf
-0,         55,         55,        1,   657600, 0x4779a2cf
-0,         56,         56,        1,   657600, 0x4779a2cf
 0,         57,         57,        1,   657600, 0x29af1818
-0,         58,         58,        1,   657600, 0x29af1818
-0,         59,         59,        1,   657600, 0x29af1818
 0,         60,         60,        1,   657600, 0x77ace9c5
 1,      44100,      44100,    11025,    22050, 0x8cbb9625
-0,         61,         61,        1,   657600, 0x77ace9c5
 0,         62,         62,        1,   657600, 0x61b8e74b
 0,         63,         63,        1,   657600, 0x8c6deace
-0,         64,         64,        1,   657600, 0x8c6deace
-0,         65,         65,        1,   657600, 0x8c6deace
 0,         66,         66,        1,   657600, 0xbe1fe8d7
-0,         67,         67,        1,   657600, 0xbe1fe8d7
-0,         68,         68,        1,   657600, 0xbe1fe8d7
 0,         69,         69,        1,   657600, 0x633209db
-0,         70,         70,        1,   657600, 0x633209db
-0,         71,         71,        1,   657600, 0x633209db
 0,         72,         72,        1,   657600, 0x3148adb5
-0,         73,         73,        1,   657600, 0x3148adb5
 0,         74,         74,        1,   657600, 0x3b5f5216
-0,         75,         75,        1,   657600, 0x3b5f5216
 1,      55125,      55125,    11025,    22050, 0x34a11f66
-0,         76,         76,        1,   657600, 0x3b5f5216
-0,         77,         77,        1,   657600, 0x3b5f5216
 0,         78,         78,        1,   657600, 0x5e51fb89
-0,         79,         79,        1,   657600, 0x5e51fb89
 0,         80,         80,        1,   657600, 0x48eafb06
 0,         81,         81,        1,   657600, 0x9f7a8653
-0,         82,         82,        1,   657600, 0x9f7a8653
 0,         83,         83,        1,   657600, 0x29fc83d9
 0,         84,         84,        1,   657600, 0xe7689f10
 0,         85,         85,        1,   657600, 0x9f788dba
@@ -107,145 +48,66 @@
 0,         89,         89,        1,   657600, 0xf7583802
 0,         90,         90,        1,   657600, 0x239e2fc6
 1,      66150,      66150,    11025,    22050, 0x1ae81230
-0,         91,         91,        1,   657600, 0x239e2fc6
-0,         92,         92,        1,   657600, 0x239e2fc6
 0,         93,         93,        1,   657600, 0x001c134c
 0,         94,         94,        1,   657600, 0x5c85134c
-0,         95,         95,        1,   657600, 0x5c85134c
-0,         96,         96,        1,   657600, 0x5c85134c
-0,         97,         97,        1,   657600, 0x5c85134c
-0,         98,         98,        1,   657600, 0x5c85134c
 0,         99,         99,        1,   657600, 0x5fef8bea
-0,        100,        100,        1,   657600, 0x5fef8bea
 0,        101,        101,        1,   657600, 0x23135efa
-0,        102,        102,        1,   657600, 0x23135efa
-0,        103,        103,        1,   657600, 0x23135efa
-0,        104,        104,        1,   657600, 0x23135efa
-0,        105,        105,        1,   657600, 0x23135efa
 1,      77175,      77175,    11025,    22050, 0x1217eeba
-0,        106,        106,        1,   657600, 0x23135efa
-0,        107,        107,        1,   657600, 0x23135efa
 0,        108,        108,        1,   657600, 0x50cf63ee
-0,        109,        109,        1,   657600, 0x50cf63ee
-0,        110,        110,        1,   657600, 0x50cf63ee
 0,        111,        111,        1,   657600, 0x2f5c5efa
-0,        112,        112,        1,   657600, 0x2f5c5efa
-0,        113,        113,        1,   657600, 0x2f5c5efa
 0,        114,        114,        1,   657600, 0x9980d3c1
-0,        115,        115,        1,   657600, 0x9980d3c1
-0,        116,        116,        1,   657600, 0x9980d3c1
 0,        117,        117,        1,   657600, 0x23f02141
-0,        118,        118,        1,   657600, 0x23f02141
-0,        119,        119,        1,   657600, 0x23f02141
 0,        120,        120,        1,   657600, 0x3d31ea57
 1,      88200,      88200,    11025,    22050, 0x50e70baa
 0,        121,        121,        1,   657600, 0x1e9be92f
-0,        122,        122,        1,   657600, 0x1e9be92f
 0,        123,        123,        1,   657600, 0x05091a2e
-0,        124,        124,        1,   657600, 0x05091a2e
-0,        125,        125,        1,   657600, 0x05091a2e
 0,        126,        126,        1,   657600, 0xd214c71a
-0,        127,        127,        1,   657600, 0xd214c71a
-0,        128,        128,        1,   657600, 0xd214c71a
 0,        129,        129,        1,   657600, 0x3b07f720
-0,        130,        130,        1,   657600, 0x3b07f720
-0,        131,        131,        1,   657600, 0x3b07f720
 0,        132,        132,        1,   657600, 0x02becc42
 0,        133,        133,        1,   657600, 0x3d8fcf2e
-0,        134,        134,        1,   657600, 0x3d8fcf2e
 0,        135,        135,        1,   657600, 0xec51ddd7
 1,      99225,      99225,    11025,    22050, 0xb19e89c0
-0,        136,        136,        1,   657600, 0xec51ddd7
-0,        137,        137,        1,   657600, 0xec51ddd7
-0,        138,        138,        1,   657600, 0xec51ddd7
-0,        139,        139,        1,   657600, 0xec51ddd7
-0,        140,        140,        1,   657600, 0xec51ddd7
 0,        141,        141,        1,   657600, 0x40a3b905
-0,        142,        142,        1,   657600, 0x40a3b905
 0,        143,        143,        1,   657600, 0xbfc5baa9
-0,        144,        144,        1,   657600, 0xbfc5baa9
-0,        145,        145,        1,   657600, 0xbfc5baa9
-0,        146,        146,        1,   657600, 0xbfc5baa9
-0,        147,        147,        1,   657600, 0xbfc5baa9
-0,        148,        148,        1,   657600, 0xbfc5baa9
-0,        149,        149,        1,   657600, 0xbfc5baa9
 0,        150,        150,        1,   657600, 0x54a2f8dd
 1,     110250,     110250,    11025,    22050, 0x78526696
 0,        151,        151,        1,   657600, 0x0b96f90d
-0,        152,        152,        1,   657600, 0x0b96f90d
 0,        153,        153,        1,   657600, 0xa18119e9
 0,        154,        154,        1,   657600, 0x70a11ce6
 0,        155,        155,        1,   657600, 0xb36f19e9
 0,        156,        156,        1,   657600, 0xeb2219e9
-0,        157,        157,        1,   657600, 0xeb2219e9
-0,        158,        158,        1,   657600, 0xeb2219e9
 0,        159,        159,        1,   657600, 0xb98f19e9
 0,        160,        160,        1,   657600, 0xa4281966
 0,        161,        161,        1,   657600, 0xf0e61966
 0,        162,        162,        1,   657600, 0x065c19e9
-0,        163,        163,        1,   657600, 0x065c19e9
-0,        164,        164,        1,   657600, 0x065c19e9
-0,        165,        165,        1,   657600, 0x065c19e9
 1,     121275,     121275,    11025,    22050, 0x48e3bb21
-0,        166,        166,        1,   657600, 0x065c19e9
-0,        167,        167,        1,   657600, 0x065c19e9
-0,        168,        168,        1,   657600, 0x065c19e9
-0,        169,        169,        1,   657600, 0x065c19e9
-0,        170,        170,        1,   657600, 0x065c19e9
 0,        171,        171,        1,   657600, 0x2f1d1ce6
-0,        172,        172,        1,   657600, 0x2f1d1ce6
 0,        173,        173,        1,   657600, 0x181719e9
 0,        174,        174,        1,   657600, 0x938d1ce6
-0,        175,        175,        1,   657600, 0x938d1ce6
-0,        176,        176,        1,   657600, 0x938d1ce6
 0,        177,        177,        1,   657600, 0xf0acbabf
-0,        178,        178,        1,   657600, 0xf0acbabf
-0,        179,        179,        1,   657600, 0xf0acbabf
 0,        180,        180,        1,   657600, 0x0f47804f
 1,     132300,     132300,    11025,    22050, 0xbc32204a
-0,        181,        181,        1,   657600, 0x0f47804f
 0,        182,        182,        1,   657600, 0x5e0c7a55
 0,        183,        183,        1,   657600, 0x0c8f4374
 0,        184,        184,        1,   657600, 0x709a3b00
-0,        185,        185,        1,   657600, 0x709a3b00
 0,        186,        186,        1,   657600, 0xf57b7a0f
 0,        187,        187,        1,   657600, 0x99427f1b
-0,        188,        188,        1,   657600, 0x99427f1b
 0,        189,        189,        1,   657600, 0xcb3608e7
 0,        190,        190,        1,   657600, 0x0992fd64
-0,        191,        191,        1,   657600, 0x0992fd64
 0,        192,        192,        1,   657600, 0x7a95fa02
-0,        193,        193,        1,   657600, 0x7a95fa02
-0,        194,        194,        1,   657600, 0x7a95fa02
 0,        195,        195,        1,   657600, 0xb97dd910
 1,     143325,     143325,    11025,    22050, 0xdf6f1e46
-0,        196,        196,        1,   657600, 0xb97dd910
-0,        197,        197,        1,   657600, 0xb97dd910
 0,        198,        198,        1,   657600, 0x3be07a66
-0,        199,        199,        1,   657600, 0x3be07a66
 0,        200,        200,        1,   657600, 0x1ae77960
 0,        201,        201,        1,   657600, 0x62177f5a
 0,        202,        202,        1,   657600, 0xf57c7c5d
 0,        203,        203,        1,   657600, 0x600e7960
 0,        204,        204,        1,   657600, 0xe15d7960
 0,        205,        205,        1,   657600, 0x79427663
-0,        206,        206,        1,   657600, 0x79427663
 0,        207,        207,        1,   657600, 0xa7c77960
-0,        208,        208,        1,   657600, 0xa7c77960
 0,        209,        209,        1,   657600, 0x75f67663
 0,        210,        210,        1,   657600, 0x3a157960
 1,     154350,     154350,    11025,    22050, 0x4c91da9d
 0,        211,        211,        1,   657600, 0x72aa7663
-0,        212,        212,        1,   657600, 0x72aa7663
 0,        213,        213,        1,   657600, 0x1b277663
-0,        214,        214,        1,   657600, 0x1b277663
-0,        215,        215,        1,   657600, 0x1b277663
 0,        216,        216,        1,   657600, 0x6f5e7663
-0,        217,        217,        1,   657600, 0x6f5e7663
-0,        218,        218,        1,   657600, 0x6f5e7663
-0,        219,        219,        1,   657600, 0x6f5e7663
-0,        220,        220,        1,   657600, 0x6f5e7663
-0,        221,        221,        1,   657600, 0x6f5e7663
-0,        222,        222,        1,   657600, 0x6f5e7663
-0,        223,        223,        1,   657600, 0x6f5e7663
-0,        224,        224,        1,   657600, 0x6f5e7663

diff --git a/tests/ref/fate/tscc-32bit b/tests/ref/fate/tscc-32bit
index dfb37a9..7e8e6b9 100644
--- a/tests/ref/fate/tscc-32bit
+++ b/tests/ref/fate/tscc-32bit

@@ -11,151 +11,30 @@
 0,          5,          5,        1,  2359296, 0xbb0e0026
 0,          6,          6,        1,  2359296, 0x66a905ab
 0,          7,          7,        1,  2359296, 0xe990f855
-0,          8,          8,        1,  2359296, 0xe990f855
 0,          9,          9,        1,  2359296, 0x3ec2c64e
 0,         13,         13,        1,  2359296, 0xda3ba3cf
 0,         14,         14,        1,  2359296, 0x60a070fd
 0,         15,         15,        1,  2359296, 0x42e5fedc
-0,         16,         16,        1,  2359296, 0x42e5fedc
 0,         17,         17,        1,  2359296, 0x699cf990
-0,         18,         18,        1,  2359296, 0x699cf990
-0,         19,         19,        1,  2359296, 0x699cf990
-0,         20,         20,        1,  2359296, 0x699cf990
-0,         21,         21,        1,  2359296, 0x699cf990
-0,         22,         22,        1,  2359296, 0x699cf990
-0,         23,         23,        1,  2359296, 0x699cf990
 0,         24,         24,        1,  2359296, 0x1524160c
-0,         25,         25,        1,  2359296, 0x1524160c
-0,         26,         26,        1,  2359296, 0x1524160c
-0,         27,         27,        1,  2359296, 0x1524160c
-0,         28,         28,        1,  2359296, 0x1524160c
-0,         29,         29,        1,  2359296, 0x1524160c
-0,         30,         30,        1,  2359296, 0x1524160c
 0,         31,         31,        1,  2359296, 0x33df0c8c
-0,         32,         32,        1,  2359296, 0x33df0c8c
-0,         33,         33,        1,  2359296, 0x33df0c8c
-0,         34,         34,        1,  2359296, 0x33df0c8c
-0,         35,         35,        1,  2359296, 0x33df0c8c
-0,         36,         36,        1,  2359296, 0x33df0c8c
-0,         37,         37,        1,  2359296, 0x33df0c8c
 0,         38,         38,        1,  2359296, 0xfe3d29f8
-0,         39,         39,        1,  2359296, 0xfe3d29f8
-0,         40,         40,        1,  2359296, 0xfe3d29f8
-0,         41,         41,        1,  2359296, 0xfe3d29f8
-0,         42,         42,        1,  2359296, 0xfe3d29f8
-0,         43,         43,        1,  2359296, 0xfe3d29f8
-0,         44,         44,        1,  2359296, 0xfe3d29f8
 0,         45,         45,        1,  2359296, 0x1b9d197f
-0,         46,         46,        1,  2359296, 0x1b9d197f
-0,         47,         47,        1,  2359296, 0x1b9d197f
-0,         48,         48,        1,  2359296, 0x1b9d197f
-0,         49,         49,        1,  2359296, 0x1b9d197f
-0,         50,         50,        1,  2359296, 0x1b9d197f
-0,         51,         51,        1,  2359296, 0x1b9d197f
 0,         52,         52,        1,  2359296, 0x48c126fb
-0,         53,         53,        1,  2359296, 0x48c126fb
-0,         54,         54,        1,  2359296, 0x48c126fb
-0,         55,         55,        1,  2359296, 0x48c126fb
-0,         56,         56,        1,  2359296, 0x48c126fb
-0,         57,         57,        1,  2359296, 0x48c126fb
-0,         58,         58,        1,  2359296, 0x48c126fb
 0,         59,         59,        1,  2359296, 0xcaa31c7c
-0,         60,         60,        1,  2359296, 0xcaa31c7c
-0,         61,         61,        1,  2359296, 0xcaa31c7c
-0,         62,         62,        1,  2359296, 0xcaa31c7c
-0,         63,         63,        1,  2359296, 0xcaa31c7c
-0,         64,         64,        1,  2359296, 0xcaa31c7c
-0,         65,         65,        1,  2359296, 0xcaa31c7c
 0,         66,         66,        1,  2359296, 0xc6a333ee
-0,         67,         67,        1,  2359296, 0xc6a333ee
-0,         68,         68,        1,  2359296, 0xc6a333ee
-0,         69,         69,        1,  2359296, 0xc6a333ee
-0,         70,         70,        1,  2359296, 0xc6a333ee
-0,         71,         71,        1,  2359296, 0xc6a333ee
-0,         72,         72,        1,  2359296, 0xc6a333ee
 0,         73,         73,        1,  2359296, 0xb96d1583
-0,         74,         74,        1,  2359296, 0xb96d1583
-0,         75,         75,        1,  2359296, 0xb96d1583
-0,         76,         76,        1,  2359296, 0xb96d1583
-0,         77,         77,        1,  2359296, 0xb96d1583
-0,         78,         78,        1,  2359296, 0xb96d1583
-0,         79,         79,        1,  2359296, 0xb96d1583
 0,         80,         80,        1,  2359296, 0x878135ec
-0,         82,         82,        1,  2359296, 0x878135ec
-0,         83,         83,        1,  2359296, 0x878135ec
-0,         84,         84,        1,  2359296, 0x878135ec
-0,         85,         85,        1,  2359296, 0x878135ec
-0,         86,         86,        1,  2359296, 0x878135ec
-0,         87,         87,        1,  2359296, 0x878135ec
 0,         88,         88,        1,  2359296, 0x76922870
-0,         89,         89,        1,  2359296, 0x76922870
-0,         90,         90,        1,  2359296, 0x76922870
-0,         91,         91,        1,  2359296, 0x76922870
-0,         92,         92,        1,  2359296, 0x76922870
-0,         93,         93,        1,  2359296, 0x76922870
-0,         94,         94,        1,  2359296, 0x76922870
 0,         95,         95,        1,  2359296, 0xb0e031f0
-0,         96,         96,        1,  2359296, 0xb0e031f0
-0,         97,         97,        1,  2359296, 0xb0e031f0
-0,         98,         98,        1,  2359296, 0xb0e031f0
-0,         99,         99,        1,  2359296, 0xb0e031f0
-0,        100,        100,        1,  2359296, 0xb0e031f0
-0,        101,        101,        1,  2359296, 0xb0e031f0
 0,        102,        102,        1,  2359296, 0xb2ef2a6e
-0,        103,        103,        1,  2359296, 0xb2ef2a6e
-0,        104,        104,        1,  2359296, 0xb2ef2a6e
 0,        105,        105,        1,  2359296, 0x083c2474
-0,        106,        106,        1,  2359296, 0x083c2474
-0,        107,        107,        1,  2359296, 0x083c2474
-0,        108,        108,        1,  2359296, 0x083c2474
 0,        109,        109,        1,  2359296, 0xbdfe2ef3
-0,        110,        110,        1,  2359296, 0xbdfe2ef3
-0,        111,        111,        1,  2359296, 0xbdfe2ef3
-0,        112,        112,        1,  2359296, 0xbdfe2ef3
-0,        113,        113,        1,  2359296, 0xbdfe2ef3
-0,        114,        114,        1,  2359296, 0xbdfe2ef3
-0,        115,        115,        1,  2359296, 0xbdfe2ef3
 0,        116,        116,        1,  2359296, 0x934b1484
-0,        117,        117,        1,  2359296, 0x934b1484
-0,        118,        118,        1,  2359296, 0x934b1484
-0,        119,        119,        1,  2359296, 0x934b1484
 0,        120,        120,        1,  2359296, 0x3e0d1a7e
-0,        121,        121,        1,  2359296, 0x3e0d1a7e
-0,        122,        122,        1,  2359296, 0x3e0d1a7e
 0,        123,        123,        1,  2359296, 0x3ce539e8
-0,        124,        124,        1,  2359296, 0x3ce539e8
-0,        125,        125,        1,  2359296, 0x3ce539e8
-0,        126,        126,        1,  2359296, 0x3ce539e8
-0,        127,        127,        1,  2359296, 0x3ce539e8
-0,        128,        128,        1,  2359296, 0x3ce539e8
-0,        129,        129,        1,  2359296, 0x3ce539e8
 0,        130,        130,        1,  2359296, 0xd46c2f69
-0,        131,        131,        1,  2359296, 0xd46c2f69
-0,        132,        132,        1,  2359296, 0xd46c2f69
-0,        133,        133,        1,  2359296, 0xd46c2f69
-0,        134,        134,        1,  2359296, 0xd46c2f69
-0,        135,        135,        1,  2359296, 0xd46c2f69
-0,        136,        136,        1,  2359296, 0xd46c2f69
 0,        137,        137,        1,  2359296, 0x8d2933ee
-0,        138,        138,        1,  2359296, 0x8d2933ee
-0,        139,        139,        1,  2359296, 0x8d2933ee
-0,        140,        140,        1,  2359296, 0x8d2933ee
-0,        141,        141,        1,  2359296, 0x8d2933ee
-0,        142,        142,        1,  2359296, 0x8d2933ee
-0,        143,        143,        1,  2359296, 0x8d2933ee
 0,        144,        144,        1,  2359296, 0xb6092b6d
-0,        145,        145,        1,  2359296, 0xb6092b6d
-0,        146,        146,        1,  2359296, 0xb6092b6d
-0,        147,        147,        1,  2359296, 0xb6092b6d
-0,        148,        148,        1,  2359296, 0xb6092b6d
-0,        149,        149,        1,  2359296, 0xb6092b6d
-0,        150,        150,        1,  2359296, 0xb6092b6d
 0,        151,        151,        1,  2359296, 0xe4ef27fa
-0,        152,        152,        1,  2359296, 0xe4ef27fa
-0,        153,        153,        1,  2359296, 0xe4ef27fa
-0,        154,        154,        1,  2359296, 0xe4ef27fa
-0,        155,        155,        1,  2359296, 0xe4ef27fa
-0,        156,        156,        1,  2359296, 0xe4ef27fa
-0,        157,        157,        1,  2359296, 0xe4ef27fa
 0,        158,        158,        1,  2359296, 0x5e5b2672
-0,        159,        159,        1,  2359296, 0x5e5b2672

diff --git a/tests/ref/fate/utvideo_rgb_int_gradient b/tests/ref/fate/utvideo_rgb_int_gradient
new file mode 100644
index 0000000..a4e6986
--- /dev/null
+++ b/tests/ref/fate/utvideo_rgb_int_gradient

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 64x48
+#sar 0: 0/1
+0,          0,          0,        1,     9216, 0xd00fdd8c

diff --git a/tests/ref/fate/utvideo_rgb_int_median b/tests/ref/fate/utvideo_rgb_int_median
new file mode 100644
index 0000000..a4e6986
--- /dev/null
+++ b/tests/ref/fate/utvideo_rgb_int_median

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 64x48
+#sar 0: 0/1
+0,          0,          0,        1,     9216, 0xd00fdd8c

diff --git a/tests/ref/fate/utvideo_rgba_gradient b/tests/ref/fate/utvideo_rgba_gradient
new file mode 100644
index 0000000..f52f46b
--- /dev/null
+++ b/tests/ref/fate/utvideo_rgba_gradient

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 37x37
+#sar 0: 0/1
+0,          0,          0,        1,     5476, 0x20f860ad

diff --git a/tests/ref/fate/utvideo_yuv420_gradient b/tests/ref/fate/utvideo_yuv420_gradient
new file mode 100644
index 0000000..5e2b7ee
--- /dev/null
+++ b/tests/ref/fate/utvideo_yuv420_gradient

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 64x48
+#sar 0: 0/1
+0,          0,          0,        1,     4608, 0xc441dd90

diff --git a/tests/ref/fate/utvideo_yuv420_int_gradient b/tests/ref/fate/utvideo_yuv420_int_gradient
new file mode 100644
index 0000000..08e45d7
--- /dev/null
+++ b/tests/ref/fate/utvideo_yuv420_int_gradient

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 64x48
+#sar 0: 0/1
+0,          0,          0,        1,     4608, 0x8cecddae

diff --git a/tests/ref/fate/utvideo_yuv420_int_median b/tests/ref/fate/utvideo_yuv420_int_median
new file mode 100644
index 0000000..08e45d7
--- /dev/null
+++ b/tests/ref/fate/utvideo_yuv420_int_median

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 64x48
+#sar 0: 0/1
+0,          0,          0,        1,     4608, 0x8cecddae

diff --git a/tests/ref/fate/utvideo_yuv422_gradient b/tests/ref/fate/utvideo_yuv422_gradient
new file mode 100644
index 0000000..f4949b2
--- /dev/null
+++ b/tests/ref/fate/utvideo_yuv422_gradient

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 64x48
+#sar 0: 0/1
+0,          0,          0,        1,     6144, 0xd33add91

diff --git a/tests/ref/fate/utvideo_yuv422_int_gradient b/tests/ref/fate/utvideo_yuv422_int_gradient
new file mode 100644
index 0000000..f4949b2
--- /dev/null
+++ b/tests/ref/fate/utvideo_yuv422_int_gradient

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 64x48
+#sar 0: 0/1
+0,          0,          0,        1,     6144, 0xd33add91

diff --git a/tests/ref/fate/utvideo_yuv422_int_median b/tests/ref/fate/utvideo_yuv422_int_median
new file mode 100644
index 0000000..f4949b2
--- /dev/null
+++ b/tests/ref/fate/utvideo_yuv422_int_median

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 64x48
+#sar 0: 0/1
+0,          0,          0,        1,     6144, 0xd33add91

diff --git a/tests/ref/fate/utvideo_yuv444_709_gradient b/tests/ref/fate/utvideo_yuv444_709_gradient
new file mode 100644
index 0000000..0870c87
--- /dev/null
+++ b/tests/ref/fate/utvideo_yuv444_709_gradient

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 38x38
+#sar 0: 0/1
+0,          0,          0,        1,     4332, 0xa9de65ba

diff --git a/tests/ref/fate/utvideo_yuv444_709_int_gradient b/tests/ref/fate/utvideo_yuv444_709_int_gradient
new file mode 100644
index 0000000..af9337c
--- /dev/null
+++ b/tests/ref/fate/utvideo_yuv444_709_int_gradient

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 64x48
+#sar 0: 0/1
+0,          0,          0,        1,     9216, 0xbcb2dd78

diff --git a/tests/ref/fate/utvideo_yuv444_709_int_median b/tests/ref/fate/utvideo_yuv444_709_int_median
new file mode 100644
index 0000000..af9337c
--- /dev/null
+++ b/tests/ref/fate/utvideo_yuv444_709_int_median

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 64x48
+#sar 0: 0/1
+0,          0,          0,        1,     9216, 0xbcb2dd78

diff --git a/tests/ref/fate/utvideo_yuv444_709_median b/tests/ref/fate/utvideo_yuv444_709_median
new file mode 100644
index 0000000..af9337c
--- /dev/null
+++ b/tests/ref/fate/utvideo_yuv444_709_median

@@ -0,0 +1,6 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 64x48
+#sar 0: 0/1
+0,          0,          0,        1,     9216, 0xbcb2dd78

diff --git a/tests/ref/fate/utvideoenc_rgb_left b/tests/ref/fate/utvideoenc_rgb_left
index a1d2000..1ee7c585 100644
--- a/tests/ref/fate/utvideoenc_rgb_left
+++ b/tests/ref/fate/utvideoenc_rgb_left

@@ -8,53 +8,53 @@
 #dimensions 0: 352x288
 #sar 0: 0/1
 #stream#, dts,        pts, duration,     size, hash
-0,          0,          0,        1,   182328, cd084b244939d7e0008d8e5ab3429dc1
-0,          1,          1,        1,   182336, c9c40672750f372134185901147fb776
-0,          2,          2,        1,   182956, c728911ca73225f2dc7453533c9be95e
-0,          3,          3,        1,   182384, 54521f709b461a25198db755bce582fa
-0,          4,          4,        1,   181704, 5e03ab58b4480a6613f54857f10c39e5
-0,          5,          5,        1,   182136, c623fb06b90fdd7a5ba0b4f217b6a388
-0,          6,          6,        1,   181552, 5d03be9dfc01ad99364fc3cc8378af72
-0,          7,          7,        1,   182292, fc90878278c82b2f835151dc6d43dd47
-0,          8,          8,        1,   181424, 9b6339a0d3af2d3034162183cd4d79e4
-0,          9,          9,        1,   182316, 7e45bb5ffe57f98a433420abaffe78cc
-0,         10,         10,        1,   182064, d9525605a7d7d75a8e33502f61733af1
-0,         11,         11,        1,   182596, 62e87fa5c33a8d208deaa8719682b9a5
-0,         12,         12,        1,   180900, 149059d3d56c55358c7044c7d569730f
-0,         13,         13,        1,   181920, 0d20f588c27471a038e159a131e9c8ea
-0,         14,         14,        1,   182824, a301a411ff11042ecb583e1e3b12dbda
-0,         15,         15,        1,   182452, 0ee2a9ed39fb8569a8d6c2b3afb8f80a
-0,         16,         16,        1,   182312, 68dd3b820adf2cbc6686a7d48fa22c6e
-0,         17,         17,        1,   181856, 1897926cfe9b7acaf9c21714c449ce41
-0,         18,         18,        1,   181108, 15d2af460733fdd896078632cdfef9fd
-0,         19,         19,        1,   181388, 8b8e7a4b7d355f41f7e836120c4792ac
-0,         20,         20,        1,   180936, e18e27aa027f2470bfa95c536a0a89af
-0,         21,         21,        1,   180900, eb663ae3c5ffa8e751280e0dbb260e02
-0,         22,         22,        1,   181936, 7514bbe06cee027f54710dc900297863
-0,         23,         23,        1,   182304, 8cb2dcdbd4c919b4c977f45bee46c54c
-0,         24,         24,        1,   182580, 9185ed53b7e8339b61d3abe230bbab71
-0,         25,         25,        1,   182572, 81f8bdd3255b91d6621e9ebd3c9d7679
-0,         26,         26,        1,   182356, 1f9ff40700881054c62e33acde06910d
-0,         27,         27,        1,   181532, 10d2477aa1e319a61e517d25fd6c95d0
-0,         28,         28,        1,   179580, 3012480c43d15866ccc4a81d56650ee2
-0,         29,         29,        1,   179528, 5e0fbd62a164dc72cf511023da792175
-0,         30,         30,        1,   180760, 679f430c86dca203456f532e978dffc2
-0,         31,         31,        1,   181564, 64d31faf01cb7b52d7d7e20280e6652b
-0,         32,         32,        1,   181428, 04961d71aa3c81b33d28b39ead20ee1d
-0,         33,         33,        1,   182980, 51361c802005721002f5f4924f081708
-0,         34,         34,        1,   182624, 67c5582c45e3ee7e6aca49fdc0a980b8
-0,         35,         35,        1,   182352, 4fade9db12f2d6ce633556fdb8914971
-0,         36,         36,        1,   181336, ac8fbab67b36d58c4e8374bfb07458e7
-0,         37,         37,        1,   181528, f798157b6d4d04c767ecb76346922ddc
-0,         38,         38,        1,   179776, 01d407ed0b86eeb2c3ee3c24dd452d8d
-0,         39,         39,        1,   180100, 062e4af150100d7accf86a907a4b99b5
-0,         40,         40,        1,   180228, 23c617b76ef8f274bd089016fb8516c7
-0,         41,         41,        1,   180592, 5cd3d93597325196079dc019556f6933
-0,         42,         42,        1,   181188, d39d52f5b690661434b1abd8717b3e30
-0,         43,         43,        1,   181300, 9e202444287234bafd103fab83b1a974
-0,         44,         44,        1,   180812, 602165271de71594132cce98af56a7b2
-0,         45,         45,        1,   178816, c427d67196f43ece6bf3855e1256d7bb
-0,         46,         46,        1,   178196, 0d05902e2870a85333a216c723077e98
-0,         47,         47,        1,   178772, 57f528eb984b5b7181c89b48b58271f3
-0,         48,         48,        1,   178652, 5cd1031b0ada3ba9c2d4c2f2b7c8e277
-0,         49,         49,        1,   178512, d3c0c84fc63f1e32a4a026e2cd39b161
+0,          0,          0,        1,   197832, 11da778d3d904fcd2ad6daf84b227e6b
+0,          1,          1,        1,   197840, e89c967c27d7cd9963c9f12f9df2c268
+0,          2,          2,        1,   198632, b2c59f9834024d1e4b829e540a91814e
+0,          3,          3,        1,   197100, b21b344d42da10ef6730fed553fb3f2f
+0,          4,          4,        1,   196236, e61de0e4ff879b6630ae8eb63d063b7f
+0,          5,          5,        1,   197352, efd21105f5f98cbd5953e85a10091531
+0,          6,          6,        1,   197252, 0e2444a9185f68294c16f034dd522666
+0,          7,          7,        1,   197204, 0254750d85c474c13fe3c0e4e3c272cf
+0,          8,          8,        1,   197084, 4c89a99df233575978d9bcd8936e4588
+0,          9,          9,        1,   197692, 681a76f67892f9d05be72abaa403bc32
+0,         10,         10,        1,   197456, bba96b8520eaa5b6612b5c48d6409938
+0,         11,         11,        1,   197024, b9ea3dcc0cb8cf552e536765c5bb31f6
+0,         12,         12,        1,   195132, 2664b79397af76081b7ddd5c3507d579
+0,         13,         13,        1,   196796, d6128308c1fc3493ebc2cece7ed785cb
+0,         14,         14,        1,   198708, 6b3e50d5d47f29dfb38e178ffb9036c5
+0,         15,         15,        1,   198000, a3d6be893edb02e44f65f7a06aa8eeed
+0,         16,         16,        1,   197568, 1ed7a01f810770d62aed3d12d10cd12c
+0,         17,         17,        1,   197396, 515e13befb9eebf92f77574d6cd9fec4
+0,         18,         18,        1,   196680, ac1835b9a426927855e3e5dbab28c177
+0,         19,         19,        1,   196784, 56fa26a339c69df17d003fb00b335baa
+0,         20,         20,        1,   195980, 853744fb2a2de76858d32f35f8b3d836
+0,         21,         21,        1,   196120, e9c50124d92e261f7a98599cb9a20e36
+0,         22,         22,        1,   196620, e750eaed752d09874d823ee40a8904c4
+0,         23,         23,        1,   197528, 6671598014db402b70e2753efda028d1
+0,         24,         24,        1,   198204, 6e471eb9c354d6be4e0d8a691313e23e
+0,         25,         25,        1,   197512, 7e415738fb7889dee478f63900f47e7c
+0,         26,         26,        1,   196832, 0e30afab6de602d099d88edd6276f1de
+0,         27,         27,        1,   196396, 13880ad888c9cf3504e2eb087767602e
+0,         28,         28,        1,   194220, aa9007687cade8c9a872de063bed0755
+0,         29,         29,        1,   194428, 78ab57958ef8914095477e58ee4461c2
+0,         30,         30,        1,   195676, a7d0779dcda7f93da5601482bb68625b
+0,         31,         31,        1,   196352, 668c525c910000aab7d9285be6ac7ed1
+0,         32,         32,        1,   196468, 9c124995d63f2a548e640811e9391951
+0,         33,         33,        1,   198188, 1cb600005e68d1ef1418b1b42e463f0a
+0,         34,         34,        1,   197556, 80f6223aba2d6cfab3cb596f372243f2
+0,         35,         35,        1,   197020, b1890bbb734c016103190a9c043e9fe2
+0,         36,         36,        1,   196208, 7adf4b08c90231eddec16aff65cc138f
+0,         37,         37,        1,   196448, de69c81a605c9e173a7ac65077d0396d
+0,         38,         38,        1,   194072, 8b3afb6897210f1660b4427df0d9cc51
+0,         39,         39,        1,   194036, 1827f1deef659426939ffd94b73575bc
+0,         40,         40,        1,   194436, 46f3ad01e18a2a24d720e78f9bdb532d
+0,         41,         41,        1,   195696, a160ff1e281959147fe11606754ecc7d
+0,         42,         42,        1,   196044, 37f15771f5c04c13b02c0127bd977fed
+0,         43,         43,        1,   196384, fc43c356e577555757e220bee7e8a232
+0,         44,         44,        1,   195424, 6820346735e390b86d9564298c934101
+0,         45,         45,        1,   193688, 5f346f622f8e0d33d7f662e9dfbcccb5
+0,         46,         46,        1,   193000, 15eb6569d9fd39a343d110b965a97277
+0,         47,         47,        1,   193592, d41d561eb927b8e0d608177af90d0e1d
+0,         48,         48,        1,   193512, 35112e23e238beff03c769674db40399
+0,         49,         49,        1,   192528, 3a3ac1b24fb8fc72174970d668c30292

diff --git a/tests/ref/fate/utvideoenc_rgb_median b/tests/ref/fate/utvideoenc_rgb_median
index b1558d7..6c4d2f8 100644
--- a/tests/ref/fate/utvideoenc_rgb_median
+++ b/tests/ref/fate/utvideoenc_rgb_median

@@ -8,53 +8,53 @@
 #dimensions 0: 352x288
 #sar 0: 0/1
 #stream#, dts,        pts, duration,     size, hash
-0,          0,          0,        1,   182160, abcf4f477f74b696faca2fcff1f62aa9
-0,          1,          1,        1,   182104, 7cbcf339fa40c24522067295b39d637f
-0,          2,          2,        1,   183108, dfc2c418f4379a89654c16b34ff19446
-0,          3,          3,        1,   182320, 62a4647b05709d86c51a18be16877e98
-0,          4,          4,        1,   181920, 61d63520703503f6e17fad67cbc08794
-0,          5,          5,        1,   182424, f467638396feabe613b3c851289452d8
-0,          6,          6,        1,   182248, 8a0cba950d6c5d63ba9603212ca95b0e
-0,          7,          7,        1,   181876, 91432f472cf373d5d4036bd100950f3e
-0,          8,          8,        1,   182104, 1c8852d82a48c1b01911ffbedf0ac1f4
-0,          9,          9,        1,   182540, f36b9d48123b55f2141ae10dd26e1ca0
-0,         10,         10,        1,   182120, e6ecdb9af6591916153ca9aeba76b9d0
-0,         11,         11,        1,   182136, 7dc7b828a5b7c652df612474fad66f6b
-0,         12,         12,        1,   181296, 347eac6563435a62f75298cefe13d3a6
-0,         13,         13,        1,   182136, 3bbcd8afacdf9549da9ebd736df548a7
-0,         14,         14,        1,   182412, 17f8c6ef692b4085624ce1ef7efbc963
-0,         15,         15,        1,   182732, 9212760fa11fe4fa193ba1aa259e9765
-0,         16,         16,        1,   181944, 7dd6d6a7084f97a77ec09ec6c62f0ab8
-0,         17,         17,        1,   182232, 518552687d47ae93726679f0ed962ef4
-0,         18,         18,        1,   181512, 29a66924742add13a0cae65d93d38ea9
-0,         19,         19,        1,   181424, 67c965637248333f92da9d493bf7546e
-0,         20,         20,        1,   180764, 298457c6c2b3f4ebcda87a12579f094d
-0,         21,         21,        1,   181072, 493ea592b7d59eebf01c002e7e22fc43
-0,         22,         22,        1,   181160, e30195fcc16ecfbb9348943cff01623f
-0,         23,         23,        1,   182156, d26cfac33e19b4ca11210c9e6cb91955
-0,         24,         24,        1,   182260, 963c157d3f0023b49d23099d53d60c8b
-0,         25,         25,        1,   181976, 2494d481bf2be97692eaeda95f279b0d
-0,         26,         26,        1,   181832, f1be95c840d4fcb0c8d4b7aed5b197c5
-0,         27,         27,        1,   181424, 03d92e89358a8b9b9e7cf302edde307e
-0,         28,         28,        1,   180632, 09f9e162fdaf28342c442172179a75c9
-0,         29,         29,        1,   180624, 481e7f7730ab3ba67c06faa620a8bd5e
-0,         30,         30,        1,   181024, 7a1d1b06b73d2bf41563eb749805780c
-0,         31,         31,        1,   181844, 8a6ce6dd6f79e423a3bb6c2b163adc55
-0,         32,         32,        1,   181712, a68007bbdf0169c9ed2dffae3dc63221
-0,         33,         33,        1,   182008, f37dd0635de369761e2de979ee799c3a
-0,         34,         34,        1,   181800, 14029ba1c364eca476559ce553919e99
-0,         35,         35,        1,   181840, ee227d15f15c3cd564dcad2160453fb7
-0,         36,         36,        1,   181848, 13b5d0892cc76a25b4914f2d706a0ad5
-0,         37,         37,        1,   181976, 1a0be9f2cefe0d867c5c03d6b3987ad8
-0,         38,         38,        1,   181216, 79795d735f9e0f92091203bf8b9eb9ed
-0,         39,         39,        1,   181236, 2d006c8c4ba448ca7841df76e44ffa88
-0,         40,         40,        1,   180672, ed5210abdae49042fcae9bde2f65a057
-0,         41,         41,        1,   181324, fbbc7839c595cd0f0efc0917edfed2c3
-0,         42,         42,        1,   180980, c6120b5a9440f4a0d83731627eb96d98
-0,         43,         43,        1,   181204, ac4371912d16f657c90e8a00cfafdfd2
-0,         44,         44,        1,   180720, d392d95c67349296d922dbf53ec3f832
-0,         45,         45,        1,   180028, 37a2717fbd5aaeb128812298484f8267
-0,         46,         46,        1,   179704, e8716f4856e4ccdc541632a218894f62
-0,         47,         47,        1,   179648, e99cbe5d1bbd7bce241ae500b4de06c2
-0,         48,         48,        1,   179424, 6f8a5e356fb77b61d9dfcabdf97340b9
-0,         49,         49,        1,   178980, 75a7700b822236b0ecb169fd692910f1
+0,          0,          0,        1,   194232, 96431f57c15dfce7894563df186457c7
+0,          1,          1,        1,   194256, f7617d88a5bb862047dd62a3618eca2c
+0,          2,          2,        1,   195048, 77aa5446e64733aa29bf5ba4ad92073d
+0,          3,          3,        1,   194012, 4a73e8b156eebc15de299c57087639a5
+0,          4,          4,        1,   193268, a0afb35be4f9f91366d848b0600c949d
+0,          5,          5,        1,   194168, b0580fbf05dc9a2abc053f2deb48b8c9
+0,          6,          6,        1,   194240, 5f5adb862a13843123e27f01d1870799
+0,          7,          7,        1,   193512, 71dca88d3fdf753858887006ac3cc13b
+0,          8,          8,        1,   193952, 7efa7d138fa412343741ccbeb18acd18
+0,          9,          9,        1,   194700, ffddf410e5b8e49cf0462baf7bc9e179
+0,         10,         10,        1,   193984, c71b59a9699f2832e8c3d76e5d5e4f0c
+0,         11,         11,        1,   193640, 598bdf3a814db44775ad703d674e9ab9
+0,         12,         12,        1,   192900, de83dd60e69a305ab0a79c4a859d444d
+0,         13,         13,        1,   193780, 7858349baa970794b0353e011f751259
+0,         14,         14,        1,   194564, 9470359eb6f09d5a1db5dc199e910bcf
+0,         15,         15,        1,   194856, c15823765a9d4d1583f2dd7b63534c95
+0,         16,         16,        1,   194056, 5f60ee32c9e06f080dcd0b4025576d5b
+0,         17,         17,        1,   194124, a10815c6603e375ab0b21331e03651de
+0,         18,         18,        1,   193508, 54a5762f2717ce1be3b9193b930d1b1f
+0,         19,         19,        1,   193412, 782b07b8ac3bd1a86ad562ef7d7ebca5
+0,         20,         20,        1,   192472, 96f7f1983d6ac34c591e33045c5ba3dc
+0,         21,         21,        1,   192844, b56bec01d69b3e073ac9823b2bf4c2f5
+0,         22,         22,        1,   192668, 3071c2d282672a597bee4682cef304ff
+0,         23,         23,        1,   194108, 3484d2ca748c20d87280f5333054552d
+0,         24,         24,        1,   194552, b05a90da02c18ee880234d620259361d
+0,         25,         25,        1,   193860, 08617a493d2abe75b438a35315aef7a0
+0,         26,         26,        1,   193452, 958eaccd55d6a55832ec9cb6ce201fc4
+0,         27,         27,        1,   193364, 8cab6850866ce9c0ecf818cc4630bdb3
+0,         28,         28,        1,   192064, 03ce30fadf6b80d0b901c0f53f09e23b
+0,         29,         29,        1,   192464, 7bf3364a095cbea2a032f6762d3433bf
+0,         30,         30,        1,   192880, 6176bcbdf42f787aa4c21791fd3d73e2
+0,         31,         31,        1,   193672, 0760b04edf969c5b0914fcae82ee7ee3
+0,         32,         32,        1,   193636, a08c905e46e35d575001e570389a9b70
+0,         33,         33,        1,   193976, dff151dc2dfa5a86103bddc11484a38f
+0,         34,         34,        1,   193456, 86f44e4a438ad150baee14facdebb701
+0,         35,         35,        1,   193768, 5645f55ed8154a0c913890a11b6f1261
+0,         36,         36,        1,   193584, 023ca364d9a88ccb2bae08008c6c3098
+0,         37,         37,        1,   193844, be83062bffff72ba0c442f502c41a187
+0,         38,         38,        1,   193008, 6e1dd8f2d18145ee199395d34c0e55ff
+0,         39,         39,        1,   192680, 27256f3f1111554a75d7b7366b77457f
+0,         40,         40,        1,   192188, d86ea5fafc41d57e320479f54fd1286a
+0,         41,         41,        1,   193200, a37553416c0187b364ce2b64ece22975
+0,         42,         42,        1,   193052, 8e3c81d246b744c2aed9b735015ee93f
+0,         43,         43,        1,   193032, dd8e2b350d8181c6a73832c868ea99f4
+0,         44,         44,        1,   192444, 6844e2dc435f417028644d3314b94c4f
+0,         45,         45,        1,   191824, fc01e22ba45bca6eac69ba76043e1484
+0,         46,         46,        1,   191772, 5e2bedda3828d590d25caa14f96ba7b0
+0,         47,         47,        1,   191720, edaae522f8a975bbef0e909ef09e7fc7
+0,         48,         48,        1,   191528, f2d98179f5cf0752ecbd6d39c4457283
+0,         49,         49,        1,   190692, 74cd0f762371781712e2ff2e3f60ac57

diff --git a/tests/ref/fate/utvideoenc_rgb_none b/tests/ref/fate/utvideoenc_rgb_none
index 403c807..27df8ba 100644
--- a/tests/ref/fate/utvideoenc_rgb_none
+++ b/tests/ref/fate/utvideoenc_rgb_none

@@ -8,53 +8,53 @@
 #dimensions 0: 352x288
 #sar 0: 0/1
 #stream#, dts,        pts, duration,     size, hash
-0,          0,          0,        1,   301024, 44de62472f485410819707c44b53f276
-0,          1,          1,        1,   301036, ff3c28c23b15834a84c57b304610924f
-0,          2,          2,        1,   300812, 72f02a697464f5fdd54ae2e054c131d1
-0,          3,          3,        1,   300876, 8879becf8b3d5001b196f45b7817ef6b
-0,          4,          4,        1,   300880, 2edeed55c4d84dea1fc9386553d7503f
-0,          5,          5,        1,   300904, f799f26eae30e1796bd62f9cdbcb2b17
-0,          6,          6,        1,   300908, bc606ee3ab284d3567a3fbd476d674f0
-0,          7,          7,        1,   301012, 404f55be9ec860a1ab3d15711965c9ba
-0,          8,          8,        1,   301048, 112394db28656101b4e8ba3621b437ae
-0,          9,          9,        1,   301008, 8945bb7668b4a529844e68e1f6b6522b
-0,         10,         10,        1,   300908, a9097c5f0bd7ddea711a25aa74696f70
-0,         11,         11,        1,   300876, 579de317d166295088530c78f403611d
-0,         12,         12,        1,   301012, d97e3627c494012d6167a30ec8192360
-0,         13,         13,        1,   300928, fd20066b7f31363751328aefedfae04c
-0,         14,         14,        1,   300836, d9bdd5606f4426b503f19a674e8058a3
-0,         15,         15,        1,   300848, 6c8f2cbd75646592876f8138a017c1ce
-0,         16,         16,        1,   300988, 54e19940011b3bfed809a0edc12c3dd7
-0,         17,         17,        1,   301040, b1d5f39215f305953a846fb01dbc2f24
-0,         18,         18,        1,   301164, 9dde74f0ee3626eeea41c538fd80e1fb
-0,         19,         19,        1,   301196, fdf22d2c35c7ab72416a268bf6612650
-0,         20,         20,        1,   301148, 275261bc1c1dec0bf712dcf05213def2
-0,         21,         21,        1,   301144, 66ccea6fce9d6d7016dafb3b349fa163
-0,         22,         22,        1,   301080, d6dc6ce0708dfdf74e936271a98c19e9
-0,         23,         23,        1,   301028, 48e610b9d798e0642825919fab233524
-0,         24,         24,        1,   301100, aed8f8f0a9d96ab9906ea8175e18c9ff
-0,         25,         25,        1,   301116, fe9e1eccb9ccc92b4041228ea2c56c8e
-0,         26,         26,        1,   301052, 3ec2f76b41fd8a6eafaa6bb14b94c153
-0,         27,         27,        1,   301200, 60d608bbe0ca285a7d8a1a4822a84c4d
-0,         28,         28,        1,   301120, 6d2cec50ee32e76eb6dff76a4976d221
-0,         29,         29,        1,   301188, 8faa69fd62e0646e4eb85c1601827364
-0,         30,         30,        1,   301192, eb1fa109c5e9b89f29be7cf363649acd
-0,         31,         31,        1,   301128, 32ba1797f5dee6643712688621984326
-0,         32,         32,        1,   301088, a1bfa70314c40f60a0823beef74e233e
-0,         33,         33,        1,   301064, b18d84efa0091199dd9167bbdb36b873
-0,         34,         34,        1,   300964, a2ae8d3dd655403bcfdace40aaa1d78b
-0,         35,         35,        1,   301124, 8ece60df0f0ef4f3d887eac16c23cad6
-0,         36,         36,        1,   301200, 13eb4b5ec7471837aadce38848e48cb2
-0,         37,         37,        1,   301196, b5fdef211755134f8e7998793a0ab0c0
-0,         38,         38,        1,   301260, 2f7e2046bae9e664e74bc56a3596743c
-0,         39,         39,        1,   301264, 2cfc013c9e66a5dd0229c6551febd658
-0,         40,         40,        1,   301272, 47e8ff02a8f054c66687e2b613e46cf5
-0,         41,         41,        1,   301236, f4d766155eeeb7b03687a3141840bf32
-0,         42,         42,        1,   301312, 8fe134aefc02b6910dc2054447fd9c37
-0,         43,         43,        1,   301272, 9ec57db275fca2b596734c48a50c28bc
-0,         44,         44,        1,   301256, 17bae207d8d6f5b2b500885e3058185f
-0,         45,         45,        1,   301308, 9f8e91f3fbbdd0ca17b2ad0ffe888d5a
-0,         46,         46,        1,   301400, 8184e55eb5432516547df512175c15fc
-0,         47,         47,        1,   301408, dd0c0aa1426427549e9cbb22ef82f930
-0,         48,         48,        1,   301424, c65025dc3fa21fad98118ab0386b910e
-0,         49,         49,        1,   301408, 2982b49e94aa25b8ef30f81769650f15
+0,          0,          0,        1,   303768, c37d23b5bc1dcceacafcebd0a5054590
+0,          1,          1,        1,   303764, a6a8b445323c00ab56ad1077a90b34bf
+0,          2,          2,        1,   303672, 029048769e006f2a2357f373cfc0b6bf
+0,          3,          3,        1,   303672, e13433d7e8f75a26eeca86523d47d0cf
+0,          4,          4,        1,   303724, 4c3d437c158255e398071bdd2e699d9e
+0,          5,          5,        1,   303672, 11e335df4fdfcfb7563305a33740058e
+0,          6,          6,        1,   303676, 594a7a6101fbf884bb3bc309479bdf18
+0,          7,          7,        1,   303764, 09f91a05f8f507f6338d15591df34002
+0,          8,          8,        1,   303772, 1e177889c188fdd783815103c1987e5e
+0,          9,          9,        1,   303700, f6da8873c69fb6365764327305a36840
+0,         10,         10,        1,   303684, 6810c80719058078b7d40a787fabee70
+0,         11,         11,        1,   303696, f8dcad694c276074f5022276e4067694
+0,         12,         12,        1,   303720, d1f764aaf14e78b64b182503342df764
+0,         13,         13,        1,   303676, 43066ddba20726f57d76230891f76730
+0,         14,         14,        1,   303548, 6512bff050521cd6343e5b2f4b18be6c
+0,         15,         15,        1,   303604, 76c91368e8db8c18591c7cb569774e12
+0,         16,         16,        1,   303748, 365805842ee3cefee8d5feb169075ecf
+0,         17,         17,        1,   303740, e6bbc34d7057d812cd994473b93ccadb
+0,         18,         18,        1,   303788, 48bcbbc3bf805586c34ff23ebeaa28ea
+0,         19,         19,        1,   303808, 29f268867c4522551e6f73dd4afb36e1
+0,         20,         20,        1,   303808, effd1cf0a61501f427e47c76c469ff32
+0,         21,         21,        1,   303832, 0ea84576612a4b0b8f9e14b3ecf10861
+0,         22,         22,        1,   303800, 4b4d24ce4f176de4c34ec85656ea1e49
+0,         23,         23,        1,   303716, 58845ad1c9a07d62f83414662eaf1181
+0,         24,         24,        1,   303780, 8db647c5324b3c061aa2cdae7527ec8d
+0,         25,         25,        1,   303824, 7a2ddbe9d1af3d34d7538ba10a0aae3c
+0,         26,         26,        1,   303796, a8444e8ec7b64c3481766ebcafd2e7b7
+0,         27,         27,        1,   303816, bf78e04e04429aab3527baacd51da1b0
+0,         28,         28,        1,   303872, 02d9693cc78af1b9d4a17b5361daa325
+0,         29,         29,        1,   303900, 065cde35c0ac49675a9ee8e047bb1471
+0,         30,         30,        1,   303876, 0888dd5201864a9f34f784b7959563ba
+0,         31,         31,        1,   303828, 6837a1a3f4ae04c9601fa9f4c27098b2
+0,         32,         32,        1,   303720, 8c90fc8481d745b020c325134bf0047d
+0,         33,         33,        1,   303704, d37bd88a42e4b84a98f0e51208a3877d
+0,         34,         34,        1,   303640, 2444c069427bf6a847113fd16e7a863f
+0,         35,         35,        1,   303780, 7a9aa1d7d43bcb22a754fa5e7ee911a0
+0,         36,         36,        1,   303828, 4a0ab1bbea88697950bbafe566a03b52
+0,         37,         37,        1,   303860, 6d2073d28e226286e1e78df92bc27154
+0,         38,         38,        1,   303884, 74c43f24d636de3fd6afd1d0ab098e57
+0,         39,         39,        1,   303876, 329a1d1558ffc42b7cdb1f141e042431
+0,         40,         40,        1,   303912, 6b1f3c35f28ab1182354b33ea9e37331
+0,         41,         41,        1,   303856, ec00db5b019a3f72c3fc934d3811cad7
+0,         42,         42,        1,   303880, 6d291f829145c07f0002631b0e22a1f9
+0,         43,         43,        1,   303856, c438ae4db7f897c9e2a1bc6526bb43e1
+0,         44,         44,        1,   303828, d9e5870cf4598577e4ba0ac468ef25db
+0,         45,         45,        1,   303936, 6c90dbca317295010024cc4ec70d3f87
+0,         46,         46,        1,   303940, 025d9173ff307f89c3edf43074a261e9
+0,         47,         47,        1,   303948, 48e7ab65fb9c7a35e4933a7658935726
+0,         48,         48,        1,   303944, 925eb3c42b865d0861ae0348136fc18c
+0,         49,         49,        1,   303924, 83ae105d9d301ea900f004d274adf894

diff --git a/tests/ref/fate/utvideoenc_rgba_left b/tests/ref/fate/utvideoenc_rgba_left
index eb3305d..11bde1e 100644
--- a/tests/ref/fate/utvideoenc_rgba_left
+++ b/tests/ref/fate/utvideoenc_rgba_left

@@ -8,53 +8,53 @@
 #dimensions 0: 352x288
 #sar 0: 0/1
 #stream#, dts,        pts, duration,     size, hash
-0,          0,          0,        1,   195260, a8fdb226460f210542e7aca6c12b0874
-0,          1,          1,        1,   195268, 45f098764ccba85dc641b7e401461c0a
-0,          2,          2,        1,   195888, e922261672c7de46a302abad3a3fe450
-0,          3,          3,        1,   195316, f8febd5af0fed000fab2943cc649975f
-0,          4,          4,        1,   194636, d90985ad8afd2f969afa842510085852
-0,          5,          5,        1,   195068, 909adb44bd049186a959f2803e641520
-0,          6,          6,        1,   194484, 547772233e653daccc6610fcb6369da7
-0,          7,          7,        1,   195224, aa82b75f6230b2e948abdfe36bce1150
-0,          8,          8,        1,   194356, d67fb1208532137252701ddcbf7bfc2e
-0,          9,          9,        1,   195248, 7b08698d2a911fba5231c0fef0ded4c2
-0,         10,         10,        1,   194996, 570e7d9caec52975dec1c2a5dbf7cdef
-0,         11,         11,        1,   195528, 7309c0531b942902c691781f2a6da1a1
-0,         12,         12,        1,   193832, 40954ceb87370cac3db5c8c2d7c001d1
-0,         13,         13,        1,   194852, 538382c377f0c6e9070ec0b8c5fb3e39
-0,         14,         14,        1,   195756, d002a80346f3460380abb794f2d56a62
-0,         15,         15,        1,   195384, 1a4f58b3c710f2fedaf746c281556b4c
-0,         16,         16,        1,   195244, 218832dab1251e1dc852e6839a48b3fc
-0,         17,         17,        1,   194788, a035acf428bd9565a4c3fec25dfc6f4f
-0,         18,         18,        1,   194040, 6dbfeb949bdeeb055a4f7ebf78af9a85
-0,         19,         19,        1,   194320, 8b4aa3f6f05aa684d9e4adfa4cdce814
-0,         20,         20,        1,   193868, 98cd8b150784f6695f8dea163cecf286
-0,         21,         21,        1,   193832, 27a8db33014c4bc6a8da356c4b1a3e4f
-0,         22,         22,        1,   194868, f1e1460dca9127197c0a40e659924616
-0,         23,         23,        1,   195236, a012f8548e92243334edcd7c8f57aa52
-0,         24,         24,        1,   195512, bf3d4e968d126231baa6618d3344ef81
-0,         25,         25,        1,   195504, adf4b49d8721f1b323fa518f2f0d4750
-0,         26,         26,        1,   195288, aa16dd087a73a9cadc5abfc0a46ccdd4
-0,         27,         27,        1,   194464, 2e3f07244999cbe949495b57adcabe69
-0,         28,         28,        1,   192512, 2d6ce0d29a929e0208e1578e95c9d388
-0,         29,         29,        1,   192460, 081d18cd138eead0cd4b25f54a7c7540
-0,         30,         30,        1,   193692, 546b6ad28b612c2f601c7d87b265ba95
-0,         31,         31,        1,   194496, b2b2ae3b1d67e332295456e6c7bdd381
-0,         32,         32,        1,   194360, 0837b122d85abc4d704d40629266c58d
-0,         33,         33,        1,   195912, b7a4bbe436d63394cee70d40e8a8a4cf
-0,         34,         34,        1,   195556, 1b9a3eb6cda9bbd44bbdd0dc26a74252
-0,         35,         35,        1,   195284, f28d780d43fa6979379dd21fcb12e906
-0,         36,         36,        1,   194268, 5508989dad06fa05ee4054e759cdfd4b
-0,         37,         37,        1,   194460, cb65a0e97c03b2fbe69caa6ac1660070
-0,         38,         38,        1,   192708, 4125ee86cf4b8ca6d891c176662e584f
-0,         39,         39,        1,   193032, 096ee24b3e35049480e28171693fbd85
-0,         40,         40,        1,   193160, c01aef63b8bc792d08355ce6d68affee
-0,         41,         41,        1,   193524, 57af47b7bfe132cf58eab2807996b3fe
-0,         42,         42,        1,   194120, 9a4e2d72f4526815b253b3acce2dd49c
-0,         43,         43,        1,   194232, e575508ffbd2a53871e817cd8947e2d2
-0,         44,         44,        1,   193744, 95d8eefbfbf5a7354ecbf35835243e44
-0,         45,         45,        1,   191748, de2eedebb28491e59700300635af1f90
-0,         46,         46,        1,   191128, 652670f6881419be5068f2de6ac2d91e
-0,         47,         47,        1,   191704, 070f46dc278230528aa6b40256a6d891
-0,         48,         48,        1,   191584, e4b76fcf344e125729c339e360a14b15
-0,         49,         49,        1,   191444, 6a33b374a8b48549094a24543d81d999
+0,          0,          0,        1,   210764, 7c17cd8382580fcfd6dac1ddaa644c8d
+0,          1,          1,        1,   210772, 4c6bcde52e58d800927620df9e0e03b8
+0,          2,          2,        1,   211564, c0754e5c821ae0c0ca0bae78ad8fe1aa
+0,          3,          3,        1,   210032, 6ea988397d00f553c6e5ddbca7a08af9
+0,          4,          4,        1,   209168, c2d1fcee61bfdb5e3925aca342550ae5
+0,          5,          5,        1,   210284, dc92cc07ac1211e664a24c701c410e0e
+0,          6,          6,        1,   210184, 0977e253355bff82f0efd330e94cec6c
+0,          7,          7,        1,   210136, d81c28cfef7dd47abf2ac951b6a948e7
+0,          8,          8,        1,   210016, d378efd0efe103407afa8b8f45d17e24
+0,          9,          9,        1,   210624, 1a7e4a4c7b4ccbd10b9238b22dbc953b
+0,         10,         10,        1,   210388, 607fdb29b0bf5846cbbb776459840401
+0,         11,         11,        1,   209956, c4c4c18b83db1983b1d9268024027025
+0,         12,         12,        1,   208064, f1a73d7fe1dfdeb33758c9d7c61fa81b
+0,         13,         13,        1,   209728, ce7b81e19d940ebddeef93e281fed00d
+0,         14,         14,        1,   211640, 0d6929d8d034f8accd00f58b887073f5
+0,         15,         15,        1,   210932, b895f81c00f959868e24944ee816e0df
+0,         16,         16,        1,   210500, c43494420a45bb8fb0a907d3169d30b7
+0,         17,         17,        1,   210328, 5e4d72389e2733d8ce274f0c9a7cf7aa
+0,         18,         18,        1,   209612, 59d368acc1ea2c0f28ed0787fe5bad4a
+0,         19,         19,        1,   209716, 77a331effc0bb544ef1445fd5c42cfe2
+0,         20,         20,        1,   208912, 1de694210d8e781ce8ea29add190ad06
+0,         21,         21,        1,   209052, b48927d6e1658b5df335c34bf0fc0dc9
+0,         22,         22,        1,   209552, 1c00ad79bcf1efdb633baa040f5e1400
+0,         23,         23,        1,   210460, 755c6daf2681ba0aed4e4c962faa288c
+0,         24,         24,        1,   211136, e6a5687e05a70fc7c741f2ef7afb93b9
+0,         25,         25,        1,   210444, 6fe3ec5657f092b009a7bcc4815fc5b3
+0,         26,         26,        1,   209764, 19bc9792b30711fb7276a9f74d423c3e
+0,         27,         27,        1,   209328, 0144894411a4c5f14c66a393a46bb3b4
+0,         28,         28,        1,   207152, 61bbd2676caf8fbe6582274fe8add09d
+0,         29,         29,        1,   207360, 9663a6d131b029c9ad7469d67a381823
+0,         30,         30,        1,   208608, a0908484dfeead74fb351bb1ba3b514a
+0,         31,         31,        1,   209284, 00dda733e216eb968cfdf97f6138294d
+0,         32,         32,        1,   209400, 7f7e4d3546ab89ba5eaed835a2fabf64
+0,         33,         33,        1,   211120, 3b281fadbc1eed3b4d0e6b14deb4a18f
+0,         34,         34,        1,   210488, f3fb93f6504ffee9984574a7b6854d89
+0,         35,         35,        1,   209952, 1ccf5aa118c8cd51e099749289fdc374
+0,         36,         36,        1,   209140, 4e4c0f0a38608f1ff760a7ada9d44a6f
+0,         37,         37,        1,   209380, dac76683c0ce9e1008fe1490dc1479bf
+0,         38,         38,        1,   207004, e914b5454210fc6b4c39d22b7daf16da
+0,         39,         39,        1,   206968, 0a9566c879a6897536ec094100f32fbf
+0,         40,         40,        1,   207368, a644c2b3b8c89ec01b2b51576a5b09b5
+0,         41,         41,        1,   208628, 2ce6ca69d5f6e29641f5a907ffac9bb0
+0,         42,         42,        1,   208976, d3be7fe389e80df517eafd0c861947a2
+0,         43,         43,        1,   209316, c5148e0073cebd50d8f619d4875874dd
+0,         44,         44,        1,   208356, 54e0082cdc1aa7fcbb8c655e94fbd410
+0,         45,         45,        1,   206620, 77ec6d3714a0dcbf0cdd5349df0951fc
+0,         46,         46,        1,   205932, 7857acd2afd3169126a5428ddef97abd
+0,         47,         47,        1,   206524, c903ec677106ec1f7f583bb813a1120a
+0,         48,         48,        1,   206444, ea549da62a460b50811cb3f182fe5c19
+0,         49,         49,        1,   205460, 81d823633491b13a31dfdbc5f33d79f4

diff --git a/tests/ref/fate/utvideoenc_rgba_median b/tests/ref/fate/utvideoenc_rgba_median
index 0cdab8d..a2118c2 100644
--- a/tests/ref/fate/utvideoenc_rgba_median
+++ b/tests/ref/fate/utvideoenc_rgba_median

@@ -8,53 +8,53 @@
 #dimensions 0: 352x288
 #sar 0: 0/1
 #stream#, dts,        pts, duration,     size, hash
-0,          0,          0,        1,   195092, d32d5a3dc88b9aef0826b565ee5dfbc6
-0,          1,          1,        1,   195036, ea13e3522d1f3aeddd47117c91eccc55
-0,          2,          2,        1,   196040, 21c2c9abe791bed2a9bf02e539caa787
-0,          3,          3,        1,   195252, 58d5e081127f246f711f5b8ee1c760ff
-0,          4,          4,        1,   194852, 8ceb3824ec628a73e1c08e498f369484
-0,          5,          5,        1,   195356, 30ea64094f29d670e2ff8f43b50578d6
-0,          6,          6,        1,   195180, 08b406b9f3063a54681d7195fb53e953
-0,          7,          7,        1,   194808, e28c43ef3aef174f0f9b9d7a702ca747
-0,          8,          8,        1,   195036, 66247b40b0def9373bf6fdda9ef832f7
-0,          9,          9,        1,   195472, efa8a624d6b0fa69e0c1c746baed0b33
-0,         10,         10,        1,   195052, fdfc784aed661cb76bc5b3ef1863bd89
-0,         11,         11,        1,   195068, 040ad503d18a36d4f1cdaec64998138a
-0,         12,         12,        1,   194228, 1cd168427d022825a801b232cb23ca12
-0,         13,         13,        1,   195068, e4082f833d3bf75af24e1bb5f06d94fe
-0,         14,         14,        1,   195344, 19638340e93d4f1f7099deda34d28e3f
-0,         15,         15,        1,   195664, 001a801c5c5ceb197576c0f7b793850d
-0,         16,         16,        1,   194876, 9550b3cf6133997bf7557483f346b036
-0,         17,         17,        1,   195164, eb7220caf48ab2605ec971ca1297a7ae
-0,         18,         18,        1,   194444, a2ac812e6307a92ecd09d4282367a9d4
-0,         19,         19,        1,   194356, ee4d250226ab2a34cef0e3ed8920f7b2
-0,         20,         20,        1,   193696, 4bff0fc871969d17ad1f7391bbd543b2
-0,         21,         21,        1,   194004, 080e2a91fe768fd1725a8400bc6a1331
-0,         22,         22,        1,   194092, 5b9d65275695372e1f6b9c0a23f1ffa0
-0,         23,         23,        1,   195088, a4060bf595c877476a5952b335526d57
-0,         24,         24,        1,   195192, 17b55735834f291ecae399c317007d2c
-0,         25,         25,        1,   194908, fc78d797bd5740f4fec8f3d34bc2ff1b
-0,         26,         26,        1,   194764, d229a9e4d1c782504cad617d2b00802b
-0,         27,         27,        1,   194356, f987a587cb9fdcd04e36d9382c4d9139
-0,         28,         28,        1,   193564, bdfa512e3a0a46aabf289c22dbaee0b9
-0,         29,         29,        1,   193556, 341708f3181ba4b37114d6dbffc65e63
-0,         30,         30,        1,   193956, f97693469ae6f49c3995794d00430c4a
-0,         31,         31,        1,   194776, 970c96fe0f733683ce4f2b478af21b88
-0,         32,         32,        1,   194644, 8a96248e2821040e4d0d6d32d0a4f1a9
-0,         33,         33,        1,   194940, 70a462d8de0da8b5718bf8fb8034fa38
-0,         34,         34,        1,   194732, 2fb3437abbc0a85b6c46e1b1edd922c9
-0,         35,         35,        1,   194772, 0a0f54e266438e1a840247e1af2fb1f1
-0,         36,         36,        1,   194780, c4d516a459523b1c150d8aad1d5e3a6d
-0,         37,         37,        1,   194908, e09db67196513400dd55397a525b73c2
-0,         38,         38,        1,   194148, f3cc9fc8597f8806fbc2a0c13af5b9ba
-0,         39,         39,        1,   194168, 2bb82f80c239984fe7c1091ab6afb332
-0,         40,         40,        1,   193604, 6fd128240c540a0655e8f27ff6a50ee9
-0,         41,         41,        1,   194256, 9b49275154b4538abdebbddffe010105
-0,         42,         42,        1,   193912, 6acff798f9cca91347e36ee1ea86183c
-0,         43,         43,        1,   194136, f24ca78d9b813ab8bfc720bad2682e7b
-0,         44,         44,        1,   193652, d7d927faf59a3b82bd1cce418c13c430
-0,         45,         45,        1,   192960, 8a813fce1d21dd4ad474d06d890a3de5
-0,         46,         46,        1,   192636, 6bbe157ec4f799cfd47a69c5a5cbb0fc
-0,         47,         47,        1,   192580, 1c17bf08a3928533a0036bda1fb08ecb
-0,         48,         48,        1,   192356, a5b34ac48c82e79ff827f72dddbfc6b0
-0,         49,         49,        1,   191912, afceb467ddffd9697c9566c43f2576f9
+0,          0,          0,        1,   207164, 91bd0a04c522e4a449a2b2729cceb273
+0,          1,          1,        1,   207188, 8a9a23c32503fe30df47a598d319591e
+0,          2,          2,        1,   207980, fac02289d860c485b09053119a259772
+0,          3,          3,        1,   206944, bc0251f0a3ad5881cb402d1d071cbfe8
+0,          4,          4,        1,   206200, e3b811ea58b790b171b75c9af46a3637
+0,          5,          5,        1,   207100, 076ca542190996d4ff4380bfff1b8707
+0,          6,          6,        1,   207172, 87d8540012df5c62ffcec97ac4255e44
+0,          7,          7,        1,   206444, 8a4a787dd73b93653ab69622b3fcaa6d
+0,          8,          8,        1,   206884, e4ce5916ecee62387a44257f9f7e309f
+0,          9,          9,        1,   207632, 67f343e24f47b1eddb47e59831752aab
+0,         10,         10,        1,   206916, 6a1d003a2b3c10bd87d5b71a0ecb6fae
+0,         11,         11,        1,   206572, 8939adde46e92bf02be301590de578ae
+0,         12,         12,        1,   205832, ac36ce0cc201b89aa85aea9f8f85b654
+0,         13,         13,        1,   206712, 1d85faa22ef18e060354d2e2931f78f3
+0,         14,         14,        1,   207496, 2459b868050730ba3c3152443f119e59
+0,         15,         15,        1,   207788, 27616d7494e570814378a35fd72b3d4a
+0,         16,         16,        1,   206988, 4f0daddba744ae0ebeea8b55ebc6778b
+0,         17,         17,        1,   207056, e2340667064a25a75f67e261fe96baf7
+0,         18,         18,        1,   206440, d487795c5f4a3f338cebfd36b264a760
+0,         19,         19,        1,   206344, 0cd1a4c459402e82d7c89a131d6f75fe
+0,         20,         20,        1,   205404, 52ea62992efacc4bf4234507752cd4ed
+0,         21,         21,        1,   205776, 7712e82b94fd155c1e0dfec1db19a298
+0,         22,         22,        1,   205600, ec081ebb4e12f35aa47e673c5cd8c858
+0,         23,         23,        1,   207040, 637d7b20ab169db4671d76abc986c20d
+0,         24,         24,        1,   207484, a776de1269ad564e1dc21ead94dcc71e
+0,         25,         25,        1,   206792, 6aaa68388fb12b2e70eb542819902a97
+0,         26,         26,        1,   206384, b4c4d079d7c75b03e2d3915a8676a64d
+0,         27,         27,        1,   206296, 521647a00535f63c241b54699b5d803a
+0,         28,         28,        1,   204996, 892f0c216d04dfa120576c0cbbbdb0a2
+0,         29,         29,        1,   205396, fa1b2301c765bb1ca1c85828918462b9
+0,         30,         30,        1,   205812, e40d8da60a07f19b43ab27a13928b87d
+0,         31,         31,        1,   206604, 2a06b115a3a400a73ad06b69c90a0768
+0,         32,         32,        1,   206568, e6d367cc3d7bd17a6c470fe86591e56d
+0,         33,         33,        1,   206908, 8d53fe57b1043562c50a2fd63fa2ebfa
+0,         34,         34,        1,   206388, 3a5544a0a814dbba171d0384c91ea566
+0,         35,         35,        1,   206700, bfe43124bcbbaef9884893c32396f316
+0,         36,         36,        1,   206516, a93e303f30a9269c6d6b59882b668630
+0,         37,         37,        1,   206776, 3044d9c98555d3415fc75b085f8aab0a
+0,         38,         38,        1,   205940, 870403f6326de73750a57692a288bc2d
+0,         39,         39,        1,   205612, f97b94da7b565f67cb476c73c2196bb1
+0,         40,         40,        1,   205120, 78341e169158f97791ec9a88ee936048
+0,         41,         41,        1,   206132, 83d31e0b40f2850b0767fbd162b4aabd
+0,         42,         42,        1,   205984, ba029595b56087c55f324a54cb5e20a6
+0,         43,         43,        1,   205964, d067932b507acda9020837ed271eb09e
+0,         44,         44,        1,   205376, 62b45f4949405875f947cd6d5133b76d
+0,         45,         45,        1,   204756, ec45bcef0601faff97d89f89bfede4f3
+0,         46,         46,        1,   204704, 59c9d309aa381e39f3b236d332ae516a
+0,         47,         47,        1,   204652, c44f8fba96dfb305cdc797ddfa033899
+0,         48,         48,        1,   204460, d0116d06128babcf8ec55205ce89d46a
+0,         49,         49,        1,   203624, bbd3523ac263c622541aa67825601fd2

diff --git a/tests/ref/fate/utvideoenc_rgba_none b/tests/ref/fate/utvideoenc_rgba_none
index 65f0728..f388c91 100644
--- a/tests/ref/fate/utvideoenc_rgba_none
+++ b/tests/ref/fate/utvideoenc_rgba_none

@@ -8,53 +8,53 @@
 #dimensions 0: 352x288
 #sar 0: 0/1
 #stream#, dts,        pts, duration,     size, hash
-0,          0,          0,        1,   301284, 55e84c6e1f41e48f47dcefb63e3c1efd
-0,          1,          1,        1,   301296, 12dab23dfd2c2d5b48bed2292b876688
-0,          2,          2,        1,   301072, cc2e2889403dcc5d8e36868f07918b9d
-0,          3,          3,        1,   301136, 9813d60e613a3a14e639f9af0a5b1fe7
-0,          4,          4,        1,   301140, 58554bb6749e8bbd9476335ac1cb0076
-0,          5,          5,        1,   301164, 37a249286019761a4a3e498e977f9da1
-0,          6,          6,        1,   301168, f3aa11b419ec6f683ad906e7f7a36342
-0,          7,          7,        1,   301272, f489654640f0e42225815ea9c9681201
-0,          8,          8,        1,   301308, f10eb3c1d0324b59bd25c8bc6556aca0
-0,          9,          9,        1,   301268, 2163aa992afef5210d677953d81adb17
-0,         10,         10,        1,   301168, 684d19d14212615ebffa1748a9c552ed
-0,         11,         11,        1,   301136, a5c47c30d12dbc679ce932a5988b32e5
-0,         12,         12,        1,   301272, 3a51dc37d7e5ecb4a8db948eade2e0fa
-0,         13,         13,        1,   301188, f9f9ed41c233e791e6cd75a34e52edf8
-0,         14,         14,        1,   301096, 63bbfbee6f0fa6745e143dfae40ce7ff
-0,         15,         15,        1,   301108, 1fb3340dd1804d27fb40aea6b073e9ce
-0,         16,         16,        1,   301248, 476d27f29da8e74db696ff38e81743fc
-0,         17,         17,        1,   301300, 84a7f5804a856b7ef640838320634568
-0,         18,         18,        1,   301424, f34006fb55745aac29e265e0362434bf
-0,         19,         19,        1,   301456, d9207e54e261d184ddd02f3706e63103
-0,         20,         20,        1,   301408, 89b5a6804bad85025a6a3d23dc539426
-0,         21,         21,        1,   301404, 5987d22d6e3bf18cd2ebea98a1915f57
-0,         22,         22,        1,   301340, 43ff13bb237a7899ecb04fa7d27e94ab
-0,         23,         23,        1,   301288, 01b3e148ed6b8a0d05ee628fb21fa4a5
-0,         24,         24,        1,   301360, aa7704007e3c437cfcad4fb83a69594a
-0,         25,         25,        1,   301376, 4dea983f0b4a012ba6875aa857d02e91
-0,         26,         26,        1,   301312, e263f1cb0fb19b50751e9a214a4c9d81
-0,         27,         27,        1,   301460, 85ad441664c99c591d6dc427910faa19
-0,         28,         28,        1,   301380, 86ffe8273011763d800dbf6c89942a70
-0,         29,         29,        1,   301448, 500862ea62e1982325d653d4853dcbcd
-0,         30,         30,        1,   301452, 91c4390a805e02d1924bd75946bc0b63
-0,         31,         31,        1,   301388, 4eb3040d65948355c4506ee8e8e041ca
-0,         32,         32,        1,   301348, ece814a764470f1d80973743a7adaa4b
-0,         33,         33,        1,   301324, 2ae0b9af5380c8f98087b90c646af813
-0,         34,         34,        1,   301224, d6a3ba0b543534bd7de9dd82107c468c
-0,         35,         35,        1,   301384, 4f4919c1b2502c6e03ddaa83f4c03f15
-0,         36,         36,        1,   301460, b45189bc89e6583f4426c390622fa1fc
-0,         37,         37,        1,   301456, db0c5e2bc705c825e554c2da54314746
-0,         38,         38,        1,   301520, 8d01037b2dcbba39d4746758fd53323c
-0,         39,         39,        1,   301524, 0a78af44bf49520ae8830060e6011898
-0,         40,         40,        1,   301532, da9032ac97b76ec10f94d74ee878cf41
-0,         41,         41,        1,   301496, 9a22b2a9a3ad897406fc7c3137d41a3b
-0,         42,         42,        1,   301572, a14a80ab416cf4a9a1ec24bfc72602a1
-0,         43,         43,        1,   301532, 71ea5a240540a2e08ced8ad78c1a0676
-0,         44,         44,        1,   301516, fffe101d036ed5afee9b6f86267c2a0c
-0,         45,         45,        1,   301568, 58ea3a6edaee760d98eadb072fb30796
-0,         46,         46,        1,   301660, 9886e77f5df35d8bd164d598d0f87514
-0,         47,         47,        1,   301668, 1f326eb789974fc853e1db57115ef58b
-0,         48,         48,        1,   301684, 08b2eb620b9a7be1bded4744cd4c88db
-0,         49,         49,        1,   301668, bfcce1ce5f7c30230aae9a2d67fc8a70
+0,          0,          0,        1,   304028, e1687f63bd131ace3866b61881e8e9a1
+0,          1,          1,        1,   304024, 93067fdad1470b1c2f2fd4ae603a3afe
+0,          2,          2,        1,   303932, 2fef08ce570f581373bd48b3a4773acb
+0,          3,          3,        1,   303932, 8623d4635239deac16d56085edb8aec1
+0,          4,          4,        1,   303984, ef0a1be76f7c854e5f2724d7a6707f54
+0,          5,          5,        1,   303932, 0e57009f6e93c2e5ce80d6109d9a66f3
+0,          6,          6,        1,   303936, db988d8d6e9ee0383cfc69a201de62e2
+0,          7,          7,        1,   304024, 963e86c081fafed2f757c4aafc9186df
+0,          8,          8,        1,   304032, e867c068706bff34a561a4b2c4622fd3
+0,          9,          9,        1,   303960, 0dd6d1cbe6a9fb07e3f3cb4a12c2821a
+0,         10,         10,        1,   303944, 424612ce8850b378171ee8bfca5d91ce
+0,         11,         11,        1,   303956, f26d5065818f98bbf97aae56fe07b057
+0,         12,         12,        1,   303980, dfd1a61b5a717805a06f19ee899c3917
+0,         13,         13,        1,   303936, 2223f98ac58078c49489811171c713e1
+0,         14,         14,        1,   303808, 6a63bd7cd4cafe5e64dc8dc46f9f3760
+0,         15,         15,        1,   303864, a355699c989795379d1ef2089c98e619
+0,         16,         16,        1,   304008, 07a0c41088846ed77fedc565cfeaeba7
+0,         17,         17,        1,   304000, dbb37a1af0bb534112aac9269b7d7c2c
+0,         18,         18,        1,   304048, 3129997c9ba84a0c91ff9d1c48d12d7e
+0,         19,         19,        1,   304068, 620b4c964daca105cd1b6bf8d55e86ae
+0,         20,         20,        1,   304068, bafaf53530f76d525eafb75a3866b5e7
+0,         21,         21,        1,   304092, 678138b93f07ef8f30ad373499a9caa3
+0,         22,         22,        1,   304060, 4bf8174ff9ee62c13bde29ad5bba3fc9
+0,         23,         23,        1,   303976, e6e926057f49e69e2ea8e679871e449a
+0,         24,         24,        1,   304040, f81ba4d36df7e8967a16e7de5c7a023b
+0,         25,         25,        1,   304084, a5a63a22e6bc3a54bfbd60852ca9da60
+0,         26,         26,        1,   304056, df9bd50b9dd7437afb3671c99e2e1746
+0,         27,         27,        1,   304076, 785a2a7de44e83f1111e66165b3d0765
+0,         28,         28,        1,   304132, ab01bacd7aee91ab80b30a48f54ec4d7
+0,         29,         29,        1,   304160, 4893296140e7ee6f2d194bc279f928f3
+0,         30,         30,        1,   304136, 6046c7516a5e8db2f4dd3b1286ad46c0
+0,         31,         31,        1,   304088, e6dbc1ec86cb678bab6a1ca0e93888d0
+0,         32,         32,        1,   303980, 66db98674c107af297a3de01c047ebce
+0,         33,         33,        1,   303964, e2ce0a2ee5ba8de50960873a8405c001
+0,         34,         34,        1,   303900, 7ae96a39e66e0abe63e298f45477c0ec
+0,         35,         35,        1,   304040, d5fc8f7809470bc40935285d4ed69a80
+0,         36,         36,        1,   304088, efae0966a7e421c3107016df862b0429
+0,         37,         37,        1,   304120, c6a3bf8991bf2b559f27d5ea9ae76d08
+0,         38,         38,        1,   304144, 588ac89c403c5d3ca9289fbd339fdb74
+0,         39,         39,        1,   304136, a3824537484dd2fc25dde9ec2974068c
+0,         40,         40,        1,   304172, e1a024aa9faba0ee0cb98e41cef254c9
+0,         41,         41,        1,   304116, c26c4c053d53b70c97cbf989cb51a15d
+0,         42,         42,        1,   304140, 4d13ee2a3b32ba3f080fffa7ca2db6e6
+0,         43,         43,        1,   304116, fb02b57f9a9fafa7bc6d2a35a68548fc
+0,         44,         44,        1,   304088, 6daa494c1ed11c9ea8da6c83eb473ce7
+0,         45,         45,        1,   304196, 8886953ff026efef28f4af12ce5239b6
+0,         46,         46,        1,   304200, 51ff2c8552b8b044a1a183330a188d62
+0,         47,         47,        1,   304208, 72fcd3dbf45284bb434ad3660f8a1ddf
+0,         48,         48,        1,   304204, 1d5e2170963d70acea2ad01e1486d490
+0,         49,         49,        1,   304184, 6eec23c3d4402f7a0ab4de16fde86462

diff --git a/tests/ref/fate/utvideoenc_yuv444_left b/tests/ref/fate/utvideoenc_yuv444_left
new file mode 100644
index 0000000..1b8d925
--- /dev/null
+++ b/tests/ref/fate/utvideoenc_yuv444_left

@@ -0,0 +1,60 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#extradata 0,                              16, e46c7123194c0ebf19a23e5cefebaa63
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: utvideo
+#dimensions 0: 352x288
+#sar 0: 0/1
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   144080, 678b4a3ece35971c3e0f199fcab2b4a1
+0,          1,          1,        1,   144120, d0c326ed062b569a04ea84359e43cfde
+0,          2,          2,        1,   144956, 32e2d8a1dc22b1d837e8f70509767d7d
+0,          3,          3,        1,   143324, 6fff1691c498ccf1cf848147dcc1b62c
+0,          4,          4,        1,   142464, df4caa8316ec987fe3cba4a76c0976f0
+0,          5,          5,        1,   144068, 2cc92f54850a45d76b8a959d62c28408
+0,          6,          6,        1,   143840, c80e0e9945a92869ca639b59118f24f3
+0,          7,          7,        1,   143536, 682a3148911f94daa333e12a505d689f
+0,          8,          8,        1,   143736, af692d8d5fe2e23f9e16c7164ceaaeb5
+0,          9,          9,        1,   144076, 13412473483a727e8cced00d0d0fbb78
+0,         10,         10,        1,   144012, 4f1402c7d25ee222329cc007aa4df345
+0,         11,         11,        1,   143520, cc4b83f407d031cc4a166c3210583599
+0,         12,         12,        1,   141640, e4a4d1b88c7aabfcc40c51ff80305f70
+0,         13,         13,        1,   143096, b3de93652c190bc88dd6e17a27db4ebf
+0,         14,         14,        1,   144704, 48ce5d6a84b965f38f5e4a46e8bfd0c3
+0,         15,         15,        1,   144520, 936970ae76c23e6a7880f2c875c1fb66
+0,         16,         16,        1,   143704, 32d9a3e4118e1134f3c0ead846805aaf
+0,         17,         17,        1,   143648, 4214224b843fab53ae45921426de1892
+0,         18,         18,        1,   142920, 22b1c3a09f23fa262d7ffa2fb4347f3a
+0,         19,         19,        1,   142604, d24c0ba247bb38bf7bdfc348d7c6fdf1
+0,         20,         20,        1,   141928, 8115cdd0fb51fa90821f93900c8cc980
+0,         21,         21,        1,   142072, f6d87afe383aa9a85b89832e422859e0
+0,         22,         22,        1,   142204, ced75258ae7001a8a4029423d61abf06
+0,         23,         23,        1,   143420, 686a5ded45cd70af9e699e9919da9cc7
+0,         24,         24,        1,   144552, 5a1e0f522e74a1284b86c5d0e185f16d
+0,         25,         25,        1,   143328, f54e6787ac2f3ac0e925388b66c95b2a
+0,         26,         26,        1,   142584, 652dda504ace9c4e1569319ef6cd333d
+0,         27,         27,        1,   142084, 3eafb87f47844ce6d0572af051d2e25b
+0,         28,         28,        1,   140196, 92b60cf33e5c430f3e92a39bb4389096
+0,         29,         29,        1,   140152, 658a331d3f5ee1bb50392f3835aed56d
+0,         30,         30,        1,   141484, b54db7d5978d99f7192dd2008438ab41
+0,         31,         31,        1,   142360, ed84a9d2241ade4c36f95eaa1ae00b0e
+0,         32,         32,        1,   142744, 7dbb99de3a4a81de5564bb577471128d
+0,         33,         33,        1,   144360, bd26a71b0824a04badfffa0653f0ac00
+0,         34,         34,        1,   143464, 11fe56173423180d581bc52fe044657a
+0,         35,         35,        1,   142856, 9f690fa8727669ba8640032933eb8235
+0,         36,         36,        1,   142108, b5c182f3f0e6d307f598dc55f51f9d51
+0,         37,         37,        1,   142492, ab7b6b446b4f5f5ec2b2eb52f934523c
+0,         38,         38,        1,   140532, 3fda7e5443dcf59e52f523bcf863d5d2
+0,         39,         39,        1,   140288, 1426cb304a219876586ba5c161809fac
+0,         40,         40,        1,   140540, 3a69f372875e2cd6ec5b49498e1f1a05
+0,         41,         41,        1,   141868, aca8f0bd1b29c4142b80e5e3e59aeb75
+0,         42,         42,        1,   141892, 91bf3847703041b4fbe87602e78fb577
+0,         43,         43,        1,   142080, f1b28c53e1068a6305ebb6d4862aebf1
+0,         44,         44,        1,   141220, 188ffeef2973b490364e8cf01fc2d8e6
+0,         45,         45,        1,   139908, 1eaa1194f719aa6da4c7ab4dddcee4ce
+0,         46,         46,        1,   138868, 2243b5d473d6df2ea283752fe799c95b
+0,         47,         47,        1,   139276, d74f2cbfac00177d848fb1b1e223e654
+0,         48,         48,        1,   139312, 27054be184a1028a86c5e3406418a92c
+0,         49,         49,        1,   138132, feb653f902c92cfd1954b61b9d1149f2

diff --git a/tests/ref/fate/utvideoenc_yuv444_median b/tests/ref/fate/utvideoenc_yuv444_median
new file mode 100644
index 0000000..f690a38
--- /dev/null
+++ b/tests/ref/fate/utvideoenc_yuv444_median

@@ -0,0 +1,60 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#extradata 0,                              16, e46c7123194c0ebf19a23e5cefebaa63
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: utvideo
+#dimensions 0: 352x288
+#sar 0: 0/1
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   136492, 5e3cc78fe9e404b0eca7c2df98841eab
+0,          1,          1,        1,   136336, cec7bad80feac6e16a3d84734ae473d8
+0,          2,          2,        1,   137296, cb43d766c513da815c5964f1f48c2e49
+0,          3,          3,        1,   136088, 4c8900602c580526057b52bb14161274
+0,          4,          4,        1,   135276, a9b134a0e49812ff929cdd57d8a00907
+0,          5,          5,        1,   136668, 0b9dc3cd51ce5c674315c06bd677fcea
+0,          6,          6,        1,   136480, 16020ff181e477f1b01b86aa5070bf7c
+0,          7,          7,        1,   135876, 557f1f43cdfc06603f3bd97bb9d44394
+0,          8,          8,        1,   136204, c03b7cd310961ef9fe8734f45a9276e2
+0,          9,          9,        1,   136796, b668c63bed0b2de4d182cf0281e71ecf
+0,         10,         10,        1,   136252, 7758777682f6e3ea84010cbaf2645a6e
+0,         11,         11,        1,   136284, 699e6ab66911cdd0fa357c069537e3ec
+0,         12,         12,        1,   135096, 65d4d3b6e1bbf6d67baca1f346d06735
+0,         13,         13,        1,   136088, 7d69fb86f67b3fd3df4008cb03d9f1ef
+0,         14,         14,        1,   136876, abca4a026bf93da58283d5ddd51cead4
+0,         15,         15,        1,   137096, f5bb254ede8a9ed4c07fb80fb1a301d7
+0,         16,         16,        1,   136232, cdbe70b4b7051b9c1092ea626e825658
+0,         17,         17,        1,   136308, acb31052249c48b729842938977172e8
+0,         18,         18,        1,   135444, bf38e56e2a152154b0401507550b5605
+0,         19,         19,        1,   135300, 1737341c361f656feffc86903da34647
+0,         20,         20,        1,   134548, c28e9c441544b7c51cf76afd4b5a2799
+0,         21,         21,        1,   134816, 5b8ba0d205863105d723afcc28274fb2
+0,         22,         22,        1,   134776, 90c5ac5ed83ed6a43b4fe14c0ee07dc5
+0,         23,         23,        1,   136332, 3ca065b60b4fbc5b04f68e6bb900d862
+0,         24,         24,        1,   136724, 283b23278ca40dc93a326e96e5c22629
+0,         25,         25,        1,   135948, 6568fb8593edce20f294a936ab11153b
+0,         26,         26,        1,   135400, 22abaf0c3184a79cb90b4d7fe6cee897
+0,         27,         27,        1,   135056, abd6117e31af2d136c0df8ab1db64599
+0,         28,         28,        1,   133676, 67a65017fb2a232a726717628fbe1d95
+0,         29,         29,        1,   133700, 7dff3d958ecbedc691d3b188084d26d9
+0,         30,         30,        1,   134444, 192c98c1a7c2f4a15c7aa59607b7d1ff
+0,         31,         31,        1,   135388, 32d1aba6d499162345fcb0ca858558d5
+0,         32,         32,        1,   135680, 77cd1f8ddf7ce977ba9ff074d595e033
+0,         33,         33,        1,   136404, 9eb0213f0d66d957f668f88d426f014f
+0,         34,         34,        1,   135816, 337b4082181e8627f2fe9c852681688f
+0,         35,         35,        1,   135616, c73c95e75669ce9dafd0ac253239430a
+0,         36,         36,        1,   135136, e4e5515dbb05b855b4ce42fe1638d119
+0,         37,         37,        1,   135572, 987ffdc8d0c142e92dc02190f73afd70
+0,         38,         38,        1,   134748, 8a01538b863e51066f896f18ad8cc68b
+0,         39,         39,        1,   134428, 3096e9435e5813d54013bf5c86e15c4f
+0,         40,         40,        1,   133796, 78d3952c814172173b24881e110c861f
+0,         41,         41,        1,   135052, b0c2bf0b1047dd71268d3c3114b655d8
+0,         42,         42,        1,   134768, 653862b7e519c299278186920217a303
+0,         43,         43,        1,   134692, 26fb91814efadd0120f247d6b5f22d81
+0,         44,         44,        1,   134164, e85390622cb7eb6618512ead58d578c6
+0,         45,         45,        1,   133380, 7c04e5015f89c24713227329aa1820fb
+0,         46,         46,        1,   132872, 877db229c19c9719bd9b19d0dab1c169
+0,         47,         47,        1,   132864, c4a86326303f96d53cb267111ed199e5
+0,         48,         48,        1,   132644, 7dd3fe41c535d168eb0450d96171ec0f
+0,         49,         49,        1,   131772, c430f15c1683207fbec250dc9d7a2b13

diff --git a/tests/ref/fate/utvideoenc_yuv444_none b/tests/ref/fate/utvideoenc_yuv444_none
new file mode 100644
index 0000000..880b813
--- /dev/null
+++ b/tests/ref/fate/utvideoenc_yuv444_none

@@ -0,0 +1,60 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#extradata 0,                              16, e46c7123194c0ebf19a23e5cefebaa63
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: utvideo
+#dimensions 0: 352x288
+#sar 0: 0/1
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   286288, f77b660b13f4ca2d906dbe620019224f
+0,          1,          1,        1,   286388, 61c3e9c05be428c2adf312257580d528
+0,          2,          2,        1,   286148, cf2d8de31997ba6c9c87ca0203295ffd
+0,          3,          3,        1,   286124, 981f9eeeed512af38eac7c5df0693b85
+0,          4,          4,        1,   286644, be9e95106ca59737b1154b865e917110
+0,          5,          5,        1,   286280, e70b66f13e4a08c10e1d014ac1be0cf9
+0,          6,          6,        1,   286176, b20a78e4bcd02d7fedc0f00af8276847
+0,          7,          7,        1,   286132, 80cc66c88ea015b62bbf46ae188eed95
+0,          8,          8,        1,   285968, 446ae00d0e1fc233b66ecb35c720464d
+0,          9,          9,        1,   285888, 9d4b29bfd62e5cac2d77185e19706325
+0,         10,         10,        1,   285792, bc74548c1ed37dc1d6ca523e00b83ed7
+0,         11,         11,        1,   285412, f7234684c90e0dfcfd7ff9a7612b9cb9
+0,         12,         12,        1,   286080, 3873173b7da6113b058c0e50d1fb2ac9
+0,         13,         13,        1,   285524, 90654181491240e1883cfa8e9dcbc53a
+0,         14,         14,        1,   285628, 5ae2de1a60c2586af4a27dca27e0d614
+0,         15,         15,        1,   285620, 9d78ee5f1390c05af2677bedf2b088e4
+0,         16,         16,        1,   285768, 7c47cbbba7af11ace3233c904e628242
+0,         17,         17,        1,   285880, 8292ea2eb80b860edf0747707eb99966
+0,         18,         18,        1,   285956, 17aef410ca3fec63ef83185401e005a3
+0,         19,         19,        1,   285992, 779819d6d090a67261ef3611f7b50ad6
+0,         20,         20,        1,   285920, 52fbe43f864350643cb4e8a57fcf8f63
+0,         21,         21,        1,   286040, c2e7919e6e20c7fcf78c0229e4d456d4
+0,         22,         22,        1,   285844, edc46036a229878d26a8df1f6dfbd661
+0,         23,         23,        1,   285868, 7e5a9af2d70dc57a7ca0a03f88a3a1a9
+0,         24,         24,        1,   286056, 174d2c8aca73cb5481e12a5a22516d02
+0,         25,         25,        1,   285836, cf1d28a88aa4fc2a5e16e03e2081919f
+0,         26,         26,        1,   286004, 2ffd12c7501e4709939712a62945ec29
+0,         27,         27,        1,   286264, 61962127ef2a4612a9a352119b7444dd
+0,         28,         28,        1,   286632, 9355ea7ef38424633b9e0b77ebb23d77
+0,         29,         29,        1,   286508, a741f67693b0154355966a1b07265939
+0,         30,         30,        1,   286124, 0fb2a3a0b6e525907b6d0c50ca8f7be1
+0,         31,         31,        1,   286308, 89fd53c38119426cdd63be714e857dac
+0,         32,         32,        1,   286032, 674d9043981cce19c39e2c6d405d856a
+0,         33,         33,        1,   285672, 7b9678e7772b71abf5a95fa967d319af
+0,         34,         34,        1,   285520, 6e626d4c722ccafea6600a62c093a47a
+0,         35,         35,        1,   285828, 49e9023d9c820798af6b445ec1d87b13
+0,         36,         36,        1,   286344, 6cf611f3ef47442659e3ce3807d3a480
+0,         37,         37,        1,   286360, a0eca88afed37e0747b1fa61c7e36713
+0,         38,         38,        1,   286264, 5b3e605f71b8bab3aa2cc3ed439c6f8a
+0,         39,         39,        1,   286096, f9a1c5beca596e2b5eef2392b0adb01e
+0,         40,         40,        1,   286284, eb2440f6539413efc86e5a0cd545ecd4
+0,         41,         41,        1,   285816, 520d3344f335b580bfb2fad58b1643b4
+0,         42,         42,        1,   285864, 48ed0d9ed707808298800c87f2d61d75
+0,         43,         43,        1,   286108, 9cb952285049f8354ab7e00c0ab2f7d5
+0,         44,         44,        1,   286308, 7ba07e4cc04cd42272ab75e1a65703ca
+0,         45,         45,        1,   286552, 55722a7bc60c9eb68a94fd0e8ba3ba4f
+0,         46,         46,        1,   286456, e3e54e031bcf3067360955817690a755
+0,         47,         47,        1,   286240, 96aa493b2b8d264ffb0d49c952594c11
+0,         48,         48,        1,   286128, 6e7e04eec86f257aa46c59f0e8b6b22c
+0,         49,         49,        1,   286128, ab72c8a01a095040efb1d4cb1fddbd4a

diff --git a/tests/ref/fate/vc1_ilaced_twomv b/tests/ref/fate/vc1_ilaced_twomv
index fa56b29..096e5fc 100644
--- a/tests/ref/fate/vc1_ilaced_twomv
+++ b/tests/ref/fate/vc1_ilaced_twomv

@@ -3,16 +3,16 @@
 #codec_id 0: rawvideo
 #dimensions 0: 1920x1080
 #sar 0: 1/1
-0,          0,          0,        1,  3110400, 0x764f8856
-0,          2,          2,        1,  3110400, 0x3b615b79
-0,          3,          3,        1,  3110400, 0x4fbb6f84
-0,          4,          4,        1,  3110400, 0xc1ca8532
-0,          5,          5,        1,  3110400, 0xb6e7d363
-0,          6,          6,        1,  3110400, 0x1beb5c34
-0,          7,          7,        1,  3110400, 0xcb8cb061
-0,          8,          8,        1,  3110400, 0x13ddbd61
-0,          9,          9,        1,  3110400, 0xde8f052f
-0,         10,         10,        1,  3110400, 0x4d4072db
-0,         11,         11,        1,  3110400, 0x4e5d29e3
-0,         12,         12,        1,  3110400, 0x75300531
-0,         13,         13,        1,  3110400, 0x1114285a
+0,          0,          0,        1,  3110400, 0xc95e8861
+0,          2,          2,        1,  3110400, 0xf58b5cbf
+0,          3,          3,        1,  3110400, 0x2f866f33
+0,          4,          4,        1,  3110400, 0x05c18415
+0,          5,          5,        1,  3110400, 0x4077ca93
+0,          6,          6,        1,  3110400, 0x44d105fc
+0,          7,          7,        1,  3110400, 0xa0608374
+0,          8,          8,        1,  3110400, 0x407689dc
+0,          9,          9,        1,  3110400, 0x4707d00a
+0,         10,         10,        1,  3110400, 0x74986831
+0,         11,         11,        1,  3110400, 0xa5912619
+0,         12,         12,        1,  3110400, 0x44aa5565
+0,         13,         13,        1,  3110400, 0xb9752774

diff --git a/tests/ref/fate/vc1_sa10143 b/tests/ref/fate/vc1_sa10143
index d690481..db89cc9 100644
--- a/tests/ref/fate/vc1_sa10143
+++ b/tests/ref/fate/vc1_sa10143

@@ -3,33 +3,33 @@
 #codec_id 0: rawvideo
 #dimensions 0: 720x480
 #sar 0: 1/1
-0,          0,          0,        1,   518400, 0x89407f55
-0,          2,          2,        1,   518400, 0xaa896afd
-0,          3,          3,        1,   518400, 0x0e69ff59
-0,          4,          4,        1,   518400, 0x0c30bfa0
-0,          5,          5,        1,   518400, 0x1a5b6a69
-0,          6,          6,        1,   518400, 0x23470858
-0,          7,          7,        1,   518400, 0x9a4e3c54
-0,          8,          8,        1,   518400, 0xad63160b
-0,          9,          9,        1,   518400, 0x0fcfeebc
-0,         10,         10,        1,   518400, 0x20b31777
-0,         11,         11,        1,   518400, 0x9d79df09
-0,         12,         12,        1,   518400, 0x3e86766f
-0,         13,         13,        1,   518400, 0x638a8746
-0,         14,         14,        1,   518400, 0x7a6c1a0e
-0,         15,         15,        1,   518400, 0x306f6cef
-0,         16,         16,        1,   518400, 0x81f81281
-0,         17,         17,        1,   518400, 0x49ab5bf5
-0,         18,         18,        1,   518400, 0x8f316e44
-0,         19,         19,        1,   518400, 0x95ae00c9
-0,         20,         20,        1,   518400, 0xf71bb7f5
-0,         21,         21,        1,   518400, 0x5205ea68
-0,         22,         22,        1,   518400, 0x74a1d8b9
-0,         23,         23,        1,   518400, 0xa3217616
-0,         24,         24,        1,   518400, 0x2b28bbf8
-0,         25,         25,        1,   518400, 0xf024872a
-0,         26,         26,        1,   518400, 0x2fdbaaf3
-0,         27,         27,        1,   518400, 0xa3a2418e
-0,         28,         28,        1,   518400, 0x55bfe435
-0,         29,         29,        1,   518400, 0x50fb6c94
-0,         30,         30,        1,   518400, 0x5584bb40
+0,          0,          0,        1,   518400, 0x34fa7f55
+0,          2,          2,        1,   518400, 0x60466bc1
+0,          3,          3,        1,   518400, 0xe68dff1e
+0,          4,          4,        1,   518400, 0x790ac06a
+0,          5,          5,        1,   518400, 0xb3b26b27
+0,          6,          6,        1,   518400, 0x8840096c
+0,          7,          7,        1,   518400, 0xf75c3d61
+0,          8,          8,        1,   518400, 0xca071781
+0,          9,          9,        1,   518400, 0xa8e6edf9
+0,         10,         10,        1,   518400, 0xabb61984
+0,         11,         11,        1,   518400, 0x0b31dedd
+0,         12,         12,        1,   518400, 0xf44378ef
+0,         13,         13,        1,   518400, 0xf7268996
+0,         14,         14,        1,   518400, 0x8c5b1ff4
+0,         15,         15,        1,   518400, 0xda356fd2
+0,         16,         16,        1,   518400, 0x0e091c57
+0,         17,         17,        1,   518400, 0x17645e68
+0,         18,         18,        1,   518400, 0xf47a71ef
+0,         19,         19,        1,   518400, 0x6c440498
+0,         20,         20,        1,   518400, 0xd705bd32
+0,         21,         21,        1,   518400, 0x0800edd0
+0,         22,         22,        1,   518400, 0x902be119
+0,         23,         23,        1,   518400, 0x0f7d7bc4
+0,         24,         24,        1,   518400, 0x9f4dc421
+0,         25,         25,        1,   518400, 0x3b8c8d5a
+0,         26,         26,        1,   518400, 0xbcdfb2b9
+0,         27,         27,        1,   518400, 0xa02a46c3
+0,         28,         28,        1,   518400, 0x8ecde915
+0,         29,         29,        1,   518400, 0x20576bfd
+0,         30,         30,        1,   518400, 0xac40bc36

diff --git a/tests/ref/fate/vp8-alpha b/tests/ref/fate/vp8-alpha
index 4922d52..cc096c6 100644
--- a/tests/ref/fate/vp8-alpha
+++ b/tests/ref/fate/vp8-alpha

@@ -3,7 +3,7 @@
 #codec_id 0: vp8
 #dimensions 0: 320x213
 #sar 0: 1/1
-0,          0,          0,       33,     2108, 0x59b92a34, S=2,     1900, 0x8fb3adc5,        8, 0x00000000
+0,          0,          0,       33,     2108, 0x59b92a34, S=2,     1900, 0x8fb3adc5,       12, 0x00000000
 0,         32,         32,       33,      142, 0x2f2a3fed, F=0x0, S=1,      160, 0xa13346af
 0,         65,         65,       33,      157, 0x17804767, F=0x0, S=1,      209, 0x64115f15
 0,         99,         99,       33,      206, 0x537262ca, F=0x0, S=1,      317, 0x44a09dd0

diff --git a/tests/ref/lavf-fate/mov_qtrle_mace6 b/tests/ref/lavf-fate/mov_qtrle_mace6
index 30c705e..e8fc882 100644
--- a/tests/ref/lavf-fate/mov_qtrle_mace6
+++ b/tests/ref/lavf-fate/mov_qtrle_mace6

@@ -1,3 +1,3 @@
-dcc9c4c182a5809dee9a9366f4533797 *./tests/data/lavf-fate/lavf.mov
-1270387 ./tests/data/lavf-fate/lavf.mov
-./tests/data/lavf-fate/lavf.mov CRC=0x5ec66f68
+f0ae34fb90e11342624e092094273aef *./tests/data/lavf-fate/lavf.mov
+1270415 ./tests/data/lavf-fate/lavf.mov
+./tests/data/lavf-fate/lavf.mov CRC=0x9320cd26

diff --git a/tests/ref/lavf/ffm b/tests/ref/lavf/ffm
deleted file mode 100644
index d9fa8d5..0000000
--- a/tests/ref/lavf/ffm
+++ /dev/null

@@ -1,3 +0,0 @@
-ca2a450cd0d1e299514a345923b4c82a *./tests/data/lavf/lavf.ffm
-376832 ./tests/data/lavf/lavf.ffm
-./tests/data/lavf/lavf.ffm CRC=0x000e23ae

diff --git a/tests/ref/lavf/ismv b/tests/ref/lavf/ismv
index f29b5ff..7b4a466 100644
--- a/tests/ref/lavf/ismv
+++ b/tests/ref/lavf/ismv

@@ -1,9 +1,9 @@
-a9ccbb4cd1436d222ef4425567b4e03d *./tests/data/lavf/lavf.ismv
+96053075a3f60d271131fe2d0765c267 *./tests/data/lavf/lavf.ismv
 312542 ./tests/data/lavf/lavf.ismv
 ./tests/data/lavf/lavf.ismv CRC=0x9d9a638a
-440d85f9fd5b9f63c2676638782b5c15 *./tests/data/lavf/lavf.ismv
+7022701b4c693bc4ffe1e9f96dd82a02 *./tests/data/lavf/lavf.ismv
 321448 ./tests/data/lavf/lavf.ismv
 ./tests/data/lavf/lavf.ismv CRC=0xe8130120
-a9ccbb4cd1436d222ef4425567b4e03d *./tests/data/lavf/lavf.ismv
+96053075a3f60d271131fe2d0765c267 *./tests/data/lavf/lavf.ismv
 312542 ./tests/data/lavf/lavf.ismv
 ./tests/data/lavf/lavf.ismv CRC=0x9d9a638a

diff --git a/tests/ref/lavf/mxf b/tests/ref/lavf/mxf
index b9c3733..4466685 100644
--- a/tests/ref/lavf/mxf
+++ b/tests/ref/lavf/mxf

@@ -1,9 +1,9 @@
-1c06a9d69b6e309579784db5ecb0b69f *./tests/data/lavf/lavf.mxf
-525369 ./tests/data/lavf/lavf.mxf
+649009e3d3d62eb3b6c56334d057cc4d *./tests/data/lavf/lavf.mxf
+526393 ./tests/data/lavf/lavf.mxf
 ./tests/data/lavf/lavf.mxf CRC=0x8dddfaab
-50b4f9ca0493e6d83f4c52dc3aa2b7a5 *./tests/data/lavf/lavf.mxf
-560697 ./tests/data/lavf/lavf.mxf
-./tests/data/lavf/lavf.mxf CRC=0xf21b1b48
-4b71b154ae37364c8028cb50850a54c5 *./tests/data/lavf/lavf.mxf
-525369 ./tests/data/lavf/lavf.mxf
+9076b7015cffe8aa72883e900a2041a5 *./tests/data/lavf/lavf.mxf
+561721 ./tests/data/lavf/lavf.mxf
+./tests/data/lavf/lavf.mxf CRC=0x96ff1b48
+02bf8f0cd8951a49e277306691cb1538 *./tests/data/lavf/lavf.mxf
+526393 ./tests/data/lavf/lavf.mxf
 ./tests/data/lavf/lavf.mxf CRC=0x8dddfaab

diff --git a/tests/ref/lavf/mxf_d10 b/tests/ref/lavf/mxf_d10
index 134db87..856fe9c 100644
--- a/tests/ref/lavf/mxf_d10
+++ b/tests/ref/lavf/mxf_d10

@@ -1,3 +1,3 @@
-73c0cb416548c33d0651c59519a8f7e2 *./tests/data/lavf/lavf.mxf_d10
-5330989 ./tests/data/lavf/lavf.mxf_d10
+e597f73ef9c9819710d2f815813eb91f *./tests/data/lavf/lavf.mxf_d10
+5332013 ./tests/data/lavf/lavf.mxf_d10
 ./tests/data/lavf/lavf.mxf_d10 CRC=0x6c74d488

diff --git a/tests/ref/lavf/mxf_dv25 b/tests/ref/lavf/mxf_dv25
index 8509482..e94b3ca 100644
--- a/tests/ref/lavf/mxf_dv25
+++ b/tests/ref/lavf/mxf_dv25

@@ -1,3 +1,3 @@
-1871bd11947924116776201f24fd0adf *./tests/data/lavf/lavf.mxf_dv25
-3833389 ./tests/data/lavf/lavf.mxf_dv25
+0fc964fa22bc8b3a389b81b9a2efccb3 *./tests/data/lavf/lavf.mxf_dv25
+3834413 ./tests/data/lavf/lavf.mxf_dv25
 ./tests/data/lavf/lavf.mxf_dv25 CRC=0xbdaf7f52

diff --git a/tests/ref/lavf/mxf_dvcpro50 b/tests/ref/lavf/mxf_dvcpro50
index 1d0cf79..514a047 100644
--- a/tests/ref/lavf/mxf_dvcpro50
+++ b/tests/ref/lavf/mxf_dvcpro50

@@ -1,3 +1,3 @@
-6c9cb62911ac16c3b55f0ad0b052c05b *./tests/data/lavf/lavf.mxf_dvcpro50
-7430189 ./tests/data/lavf/lavf.mxf_dvcpro50
+aa81ea83af44a69e73849e327cc4bd12 *./tests/data/lavf/lavf.mxf_dvcpro50
+7431213 ./tests/data/lavf/lavf.mxf_dvcpro50
 ./tests/data/lavf/lavf.mxf_dvcpro50 CRC=0xe3bbe4b4

diff --git a/tests/ref/lavf/mxf_opatom b/tests/ref/lavf/mxf_opatom
index ea1190c..cc4eb51 100644
--- a/tests/ref/lavf/mxf_opatom
+++ b/tests/ref/lavf/mxf_opatom

@@ -1,3 +1,3 @@
-962c2cd582340f8961a8283636093abf *./tests/data/lavf/lavf.mxf_opatom
-4717113 ./tests/data/lavf/lavf.mxf_opatom
+06a1816aa91c733e1ef7e45d82e4f1d3 *./tests/data/lavf/lavf.mxf_opatom
+4717625 ./tests/data/lavf/lavf.mxf_opatom
 ./tests/data/lavf/lavf.mxf_opatom CRC=0xf55aa22a

diff --git a/tests/ref/lavf/mxf_opatom_audio b/tests/ref/lavf/mxf_opatom_audio
index 953df90..4859645 100644
--- a/tests/ref/lavf/mxf_opatom_audio
+++ b/tests/ref/lavf/mxf_opatom_audio

@@ -1,3 +1,3 @@
-d4ad5a0faf410a9d9e99b3328143e89d *./tests/data/lavf/lavf.mxf_opatom_audio
-101945 ./tests/data/lavf/lavf.mxf_opatom_audio
+c45bb140605339556a77e751fda2c449 *./tests/data/lavf/lavf.mxf_opatom_audio
+102969 ./tests/data/lavf/lavf.mxf_opatom_audio
 ./tests/data/lavf/lavf.mxf_opatom_audio CRC=0xd155c6ff

diff --git a/tests/ref/lavf/wav_peak b/tests/ref/lavf/wav_peak
index aa7e5fc..861b246 100644
--- a/tests/ref/lavf/wav_peak
+++ b/tests/ref/lavf/wav_peak

@@ -1,3 +1,3 @@
-35148d1f6e66b0080893851d917ecbf4 *./tests/data/lavf/lavf.peak.wav
+105805963fb767d00da056f42f32d9f3 *./tests/data/lavf/lavf.peak.wav
 89094 ./tests/data/lavf/lavf.peak.wav
 ./tests/data/lavf/lavf.peak.wav CRC=0x3a1da17e

diff --git a/tests/ref/lavf/wav_peak_only b/tests/ref/lavf/wav_peak_only
index dccd0e7..b203d03 100644
--- a/tests/ref/lavf/wav_peak_only
+++ b/tests/ref/lavf/wav_peak_only

@@ -1,2 +1,2 @@
-b609a363e6d490710ed52231a8d09d3c *./tests/data/lavf/lavf.peak_only.wav
+f1a8aeeae8069f3992c4d780436c3d23 *./tests/data/lavf/lavf.peak_only.wav
 832 ./tests/data/lavf/lavf.peak_only.wav

diff --git a/tests/ref/lavf/xwd b/tests/ref/lavf/xwd
index 3d63821..b263ce6 100644
--- a/tests/ref/lavf/xwd
+++ b/tests/ref/lavf/xwd

@@ -11,10 +11,10 @@
 ./tests/data/images/xwd/%02d.xwd CRC=0x14555d6e
 202863 ./tests/data/images/xwd/02.xwd
 c6f3cb7c45f7238474a89d2ad61a1caf *./tests/data/images/xwd/02.xwd
-./tests/data/images/xwd/%02d.xwd CRC=0xfaafb59b
+./tests/data/images/xwd/%02d.xwd CRC=0xf217a95e
 104559 ./tests/data/images/xwd/02.xwd
 fe1af954966a40c2cd35fc27094ff823 *./tests/data/images/xwd/02.xwd
-./tests/data/images/xwd/%02d.xwd CRC=0xd69c3a09
+./tests/data/images/xwd/%02d.xwd CRC=0xce042dcc
 104559 ./tests/data/images/xwd/02.xwd
 85e9b8b814a1dea71d143aac2e487037 *./tests/data/images/xwd/02.xwd
 ./tests/data/images/xwd/%02d.xwd CRC=0x0ff205be

diff --git a/tests/ref/seek/empty-edit-mp4 b/tests/ref/seek/empty-edit-mp4
new file mode 100644
index 0000000..f0a4ad3
--- /dev/null
+++ b/tests/ref/seek/empty-edit-mp4

@@ -0,0 +1,134 @@
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 5.000000 pos:     48 size:  2917
+ret: 0         st: 0 flags:0 dts: 5.100000 pts: 5.100000 pos:   2965 size:   672
+ret: 0         st: 0 flags:0 dts: 5.200000 pts: 5.200000 pos:   3637 size:   464
+ret: 0         st: 0 flags:0 dts: 5.300000 pts: 5.300000 pos:   4101 size:   454
+ret: 0         st:-1 flags:0  ts:-1.000000
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 5.000000 pos:     48 size:  2917
+ret: 0         st: 0 flags:0 dts: 5.100000 pts: 5.100000 pos:   2965 size:   672
+ret: 0         st: 0 flags:0 dts: 5.200000 pts: 5.200000 pos:   3637 size:   464
+ret: 0         st: 0 flags:0 dts: 5.300000 pts: 5.300000 pos:   4101 size:   454
+ret: 0         st:-1 flags:1  ts: 11.894167
+ret: 0         st: 0 flags:1 dts: 11.000000 pts: 11.000000 pos:  40515 size:  3214
+ret: 0         st: 0 flags:0 dts: 11.100000 pts: 11.100000 pos:  43729 size:   581
+ret: 0         st: 0 flags:0 dts: 11.200000 pts: 11.200000 pos:  44310 size:   432
+ret: 0         st: 0 flags:0 dts: 11.300000 pts: 11.300000 pos:  44742 size:   380
+ret: 0         st: 0 flags:0  ts: 9.788379
+ret: 0         st: 0 flags:1 dts: 10.000000 pts: 10.000000 pos:  33523 size:  3221
+ret: 0         st: 0 flags:0 dts: 10.100000 pts: 10.100000 pos:  36744 size:   575
+ret: 0         st: 0 flags:0 dts: 10.200000 pts: 10.200000 pos:  37319 size:   438
+ret: 0         st: 0 flags:0 dts: 10.300000 pts: 10.300000 pos:  37757 size:   449
+ret: 0         st: 0 flags:1  ts: 7.682520
+ret: 0         st: 0 flags:1 dts: 7.000000 pts: 7.000000 pos:  13643 size:  3234
+ret: 0         st: 0 flags:0 dts: 7.100000 pts: 7.100000 pos:  16877 size:   585
+ret: 0         st: 0 flags:0 dts: 7.200000 pts: 7.200000 pos:  17462 size:   442
+ret: 0         st: 0 flags:0 dts: 7.300000 pts: 7.300000 pos:  17904 size:   371
+ret: 0         st:-1 flags:0  ts: 5.576668
+ret: 0         st: 0 flags:1 dts: 6.000000 pts: 6.000000 pos:   6953 size:  3166
+ret: 0         st: 0 flags:0 dts: 6.100000 pts: 6.100000 pos:  10119 size:   599
+ret: 0         st: 0 flags:0 dts: 6.200000 pts: 6.200000 pos:  10718 size:   418
+ret: 0         st: 0 flags:0 dts: 6.300000 pts: 6.300000 pos:  11136 size:   354
+ret: 0         st:-1 flags:1  ts: 3.470835
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 5.000000 pos:     48 size:  2917
+ret: 0         st: 0 flags:0 dts: 5.100000 pts: 5.100000 pos:   2965 size:   672
+ret: 0         st: 0 flags:0 dts: 5.200000 pts: 5.200000 pos:   3637 size:   464
+ret: 0         st: 0 flags:0 dts: 5.300000 pts: 5.300000 pos:   4101 size:   454
+ret: 0         st: 0 flags:0  ts: 1.365039
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 5.000000 pos:     48 size:  2917
+ret: 0         st: 0 flags:0 dts: 5.100000 pts: 5.100000 pos:   2965 size:   672
+ret: 0         st: 0 flags:0 dts: 5.200000 pts: 5.200000 pos:   3637 size:   464
+ret: 0         st: 0 flags:0 dts: 5.300000 pts: 5.300000 pos:   4101 size:   454
+ret: 0         st: 0 flags:1  ts:-0.740820
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 5.000000 pos:     48 size:  2917
+ret: 0         st: 0 flags:0 dts: 5.100000 pts: 5.100000 pos:   2965 size:   672
+ret: 0         st: 0 flags:0 dts: 5.200000 pts: 5.200000 pos:   3637 size:   464
+ret: 0         st: 0 flags:0 dts: 5.300000 pts: 5.300000 pos:   4101 size:   454
+ret: 0         st:-1 flags:0  ts: 12.153336
+ret: 0         st: 0 flags:1 dts: 13.000000 pts: 13.000000 pos:  54444 size:  3310
+ret: 0         st: 0 flags:0 dts: 13.100000 pts: 13.100000 pos:  57754 size:   540
+ret: 0         st: 0 flags:0 dts: 13.200000 pts: 13.200000 pos:  58294 size:   419
+ret: 0         st: 0 flags:0 dts: 13.300000 pts: 13.300000 pos:  58713 size:   338
+ret: 0         st:-1 flags:1  ts: 10.047503
+ret: 0         st: 0 flags:1 dts: 10.000000 pts: 10.000000 pos:  33523 size:  3221
+ret: 0         st: 0 flags:0 dts: 10.100000 pts: 10.100000 pos:  36744 size:   575
+ret: 0         st: 0 flags:0 dts: 10.200000 pts: 10.200000 pos:  37319 size:   438
+ret: 0         st: 0 flags:0 dts: 10.300000 pts: 10.300000 pos:  37757 size:   449
+ret: 0         st: 0 flags:0  ts: 7.941699
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 8.000000 pos:  20396 size:  3281
+ret: 0         st: 0 flags:0 dts: 8.100000 pts: 8.100000 pos:  23677 size:   631
+ret: 0         st: 0 flags:0 dts: 8.200000 pts: 8.200000 pos:  24308 size:   349
+ret: 0         st: 0 flags:0 dts: 8.300000 pts: 8.300000 pos:  24657 size:   319
+ret: 0         st: 0 flags:1  ts: 5.835840
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 5.000000 pos:     48 size:  2917
+ret: 0         st: 0 flags:0 dts: 5.100000 pts: 5.100000 pos:   2965 size:   672
+ret: 0         st: 0 flags:0 dts: 5.200000 pts: 5.200000 pos:   3637 size:   464
+ret: 0         st: 0 flags:0 dts: 5.300000 pts: 5.300000 pos:   4101 size:   454
+ret: 0         st:-1 flags:0  ts: 3.730004
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 5.000000 pos:     48 size:  2917
+ret: 0         st: 0 flags:0 dts: 5.100000 pts: 5.100000 pos:   2965 size:   672
+ret: 0         st: 0 flags:0 dts: 5.200000 pts: 5.200000 pos:   3637 size:   464
+ret: 0         st: 0 flags:0 dts: 5.300000 pts: 5.300000 pos:   4101 size:   454
+ret: 0         st:-1 flags:1  ts: 1.624171
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 5.000000 pos:     48 size:  2917
+ret: 0         st: 0 flags:0 dts: 5.100000 pts: 5.100000 pos:   2965 size:   672
+ret: 0         st: 0 flags:0 dts: 5.200000 pts: 5.200000 pos:   3637 size:   464
+ret: 0         st: 0 flags:0 dts: 5.300000 pts: 5.300000 pos:   4101 size:   454
+ret: 0         st: 0 flags:0  ts:-0.481641
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 5.000000 pos:     48 size:  2917
+ret: 0         st: 0 flags:0 dts: 5.100000 pts: 5.100000 pos:   2965 size:   672
+ret: 0         st: 0 flags:0 dts: 5.200000 pts: 5.200000 pos:   3637 size:   464
+ret: 0         st: 0 flags:0 dts: 5.300000 pts: 5.300000 pos:   4101 size:   454
+ret: 0         st: 0 flags:1  ts: 12.412500
+ret: 0         st: 0 flags:1 dts: 12.000000 pts: 12.000000 pos:  47419 size:  3229
+ret: 0         st: 0 flags:0 dts: 12.100000 pts: 12.100000 pos:  50648 size:   588
+ret: 0         st: 0 flags:0 dts: 12.200000 pts: 12.200000 pos:  51236 size:   404
+ret: 0         st: 0 flags:0 dts: 12.300000 pts: 12.300000 pos:  51640 size:   415
+ret: 0         st:-1 flags:0  ts: 10.306672
+ret: 0         st: 0 flags:1 dts: 11.000000 pts: 11.000000 pos:  40515 size:  3214
+ret: 0         st: 0 flags:0 dts: 11.100000 pts: 11.100000 pos:  43729 size:   581
+ret: 0         st: 0 flags:0 dts: 11.200000 pts: 11.200000 pos:  44310 size:   432
+ret: 0         st: 0 flags:0 dts: 11.300000 pts: 11.300000 pos:  44742 size:   380
+ret: 0         st:-1 flags:1  ts: 8.200839
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 8.000000 pos:  20396 size:  3281
+ret: 0         st: 0 flags:0 dts: 8.100000 pts: 8.100000 pos:  23677 size:   631
+ret: 0         st: 0 flags:0 dts: 8.200000 pts: 8.200000 pos:  24308 size:   349
+ret: 0         st: 0 flags:0 dts: 8.300000 pts: 8.300000 pos:  24657 size:   319
+ret: 0         st: 0 flags:0  ts: 6.095020
+ret: 0         st: 0 flags:1 dts: 7.000000 pts: 7.000000 pos:  13643 size:  3234
+ret: 0         st: 0 flags:0 dts: 7.100000 pts: 7.100000 pos:  16877 size:   585
+ret: 0         st: 0 flags:0 dts: 7.200000 pts: 7.200000 pos:  17462 size:   442
+ret: 0         st: 0 flags:0 dts: 7.300000 pts: 7.300000 pos:  17904 size:   371
+ret: 0         st: 0 flags:1  ts: 3.989160
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 5.000000 pos:     48 size:  2917
+ret: 0         st: 0 flags:0 dts: 5.100000 pts: 5.100000 pos:   2965 size:   672
+ret: 0         st: 0 flags:0 dts: 5.200000 pts: 5.200000 pos:   3637 size:   464
+ret: 0         st: 0 flags:0 dts: 5.300000 pts: 5.300000 pos:   4101 size:   454
+ret: 0         st:-1 flags:0  ts: 1.883340
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 5.000000 pos:     48 size:  2917
+ret: 0         st: 0 flags:0 dts: 5.100000 pts: 5.100000 pos:   2965 size:   672
+ret: 0         st: 0 flags:0 dts: 5.200000 pts: 5.200000 pos:   3637 size:   464
+ret: 0         st: 0 flags:0 dts: 5.300000 pts: 5.300000 pos:   4101 size:   454
+ret: 0         st:-1 flags:1  ts:-0.222493
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 5.000000 pos:     48 size:  2917
+ret: 0         st: 0 flags:0 dts: 5.100000 pts: 5.100000 pos:   2965 size:   672
+ret: 0         st: 0 flags:0 dts: 5.200000 pts: 5.200000 pos:   3637 size:   464
+ret: 0         st: 0 flags:0 dts: 5.300000 pts: 5.300000 pos:   4101 size:   454
+ret: 0         st: 0 flags:0  ts: 12.671680
+ret: 0         st: 0 flags:1 dts: 13.000000 pts: 13.000000 pos:  54444 size:  3310
+ret: 0         st: 0 flags:0 dts: 13.100000 pts: 13.100000 pos:  57754 size:   540
+ret: 0         st: 0 flags:0 dts: 13.200000 pts: 13.200000 pos:  58294 size:   419
+ret: 0         st: 0 flags:0 dts: 13.300000 pts: 13.300000 pos:  58713 size:   338
+ret: 0         st: 0 flags:1  ts: 10.565820
+ret: 0         st: 0 flags:1 dts: 10.000000 pts: 10.000000 pos:  33523 size:  3221
+ret: 0         st: 0 flags:0 dts: 10.100000 pts: 10.100000 pos:  36744 size:   575
+ret: 0         st: 0 flags:0 dts: 10.200000 pts: 10.200000 pos:  37319 size:   438
+ret: 0         st: 0 flags:0 dts: 10.300000 pts: 10.300000 pos:  37757 size:   449
+ret: 0         st:-1 flags:0  ts: 8.460008
+ret: 0         st: 0 flags:1 dts: 9.000000 pts: 9.000000 pos:  27090 size:  3182
+ret: 0         st: 0 flags:0 dts: 9.100000 pts: 9.100000 pos:  30272 size:   481
+ret: 0         st: 0 flags:0 dts: 9.200000 pts: 9.200000 pos:  30753 size:   334
+ret: 0         st: 0 flags:0 dts: 9.300000 pts: 9.300000 pos:  31087 size:   328
+ret: 0         st:-1 flags:1  ts: 6.354175
+ret: 0         st: 0 flags:1 dts: 6.000000 pts: 6.000000 pos:   6953 size:  3166
+ret: 0         st: 0 flags:0 dts: 6.100000 pts: 6.100000 pos:  10119 size:   599
+ret: 0         st: 0 flags:0 dts: 6.200000 pts: 6.200000 pos:  10718 size:   418
+ret: 0         st: 0 flags:0 dts: 6.300000 pts: 6.300000 pos:  11136 size:   354

diff --git a/tests/ref/seek/extra-mp4 b/tests/ref/seek/extra-mp4
index c25544c..c17ce40 100644
--- a/tests/ref/seek/extra-mp4
+++ b/tests/ref/seek/extra-mp4

@@ -28,10 +28,10 @@
 ret: 0         st: 0 flags:0 dts: 50.666667 pts: 50.666667 pos:5927464 size:   150
 ret: 0         st: 0 flags:0 dts: 50.700000 pts: 50.700000 pos:5927614 size:   176
 ret: 0         st:-1 flags:1  ts: 153.470835
-ret: 0         st: 0 flags:1 dts: 153.466667 pts: 153.500000 pos:15867700 size: 96169
-ret: 0         st: 0 flags:0 dts: 153.500000 pts: 153.533333 pos:15963869 size:   785
-ret: 0         st: 0 flags:0 dts: 153.533333 pts: 153.633333 pos:15964654 size:  3135
-ret: 0         st: 0 flags:0 dts: 153.566667 pts: 153.566667 pos:15967789 size:   859
+ret: 0         st: 0 flags:1 dts: 151.966667 pts: 152.000000 pos:15705355 size:146924
+ret: 0         st: 0 flags:0 dts: 152.000000 pts: 152.100000 pos:15852279 size:  1355
+ret: 0         st: 0 flags:0 dts: 152.033333 pts: 152.033333 pos:15853634 size:   211
+ret: 0         st: 0 flags:0 dts: 152.066667 pts: 152.066667 pos:15853845 size:   217
 ret: 0         st: 0 flags:0  ts: 76.365000
 ret: 0         st: 0 flags:1 dts: 77.833333 pts: 77.866667 pos:8659657 size: 41182
 ret: 0         st: 0 flags:0 dts: 77.866667 pts: 77.966667 pos:8700839 size:  4197
@@ -83,10 +83,10 @@
 ret: 0         st: 0 flags:0 dts: 101.366667 pts: 101.366667 pos:11053072 size:   562
 ret: 0         st: 0 flags:0 dts: 101.400000 pts: 101.400000 pos:11053634 size:   599
 ret: 0         st:-1 flags:0  ts: 25.306672
-ret: 0         st: 0 flags:1 dts: 27.400000 pts: 27.433333 pos:2674605 size:127383
-ret: 0         st: 0 flags:0 dts: 27.433333 pts: 27.466667 pos:2801988 size:    68
-ret: 0         st: 0 flags:0 dts: 27.466667 pts: 27.500000 pos:2802268 size:  1754
-ret: 0         st: 0 flags:0 dts: 27.500000 pts: 27.533333 pos:2804022 size:  4071
+ret: 0         st: 0 flags:1 dts: 25.300000 pts: 25.333333 pos:2607246 size: 40273
+ret: 0         st: 0 flags:0 dts: 25.333333 pts: 25.433333 pos:2647519 size:  2959
+ret: 0         st: 0 flags:0 dts: 25.366667 pts: 25.366667 pos:2650478 size:   197
+ret: 0         st: 0 flags:0 dts: 25.400000 pts: 25.400000 pos:2650675 size:   230
 ret: 0         st:-1 flags:1  ts: 128.200839
 ret: 0         st: 0 flags:1 dts: 127.833333 pts: 127.866667 pos:13514072 size: 67382
 ret: 0         st: 0 flags:0 dts: 127.866667 pts: 127.966667 pos:13581454 size:  2936

diff --git a/tests/ref/seek/lavf-alaw b/tests/ref/seek/lavf-alaw
index 4b1f8fb..8d517fa 100644
--- a/tests/ref/seek/lavf-alaw
+++ b/tests/ref/seek/lavf-alaw

@@ -1,53 +1,53 @@
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st:-1 flags:0  ts:-1.000000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st:-1 flags:1  ts: 1.894167
-ret: 0         st: 0 flags:1 dts: 1.894150 pts: 1.894150 pos:  41766 size:  1024
+ret: 0         st: 0 flags:1 dts: 1.894150 pts: 1.894150 pos:  41766 size:   882
 ret: 0         st: 0 flags:0  ts: 0.788345
-ret: 0         st: 0 flags:1 dts: 0.788345 pts: 0.788345 pos:  17383 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.788345 pts: 0.788345 pos:  17383 size:   882
 ret: 0         st: 0 flags:1  ts:-0.317506
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st:-1 flags:0  ts: 2.576668
 ret:-EOF
 ret: 0         st:-1 flags:1  ts: 1.470835
-ret: 0         st: 0 flags:1 dts: 1.470839 pts: 1.470839 pos:  32432 size:  1024
+ret: 0         st: 0 flags:1 dts: 1.470839 pts: 1.470839 pos:  32432 size:   882
 ret: 0         st: 0 flags:0  ts: 0.364989
-ret: 0         st: 0 flags:1 dts: 0.364989 pts: 0.364989 pos:   8048 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.364989 pts: 0.364989 pos:   8048 size:   882
 ret: 0         st: 0 flags:1  ts:-0.740816
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st:-1 flags:0  ts: 2.153336
 ret:-EOF
 ret: 0         st:-1 flags:1  ts: 1.047503
-ret: 0         st: 0 flags:1 dts: 1.047483 pts: 1.047483 pos:  23097 size:  1024
+ret: 0         st: 0 flags:1 dts: 1.047483 pts: 1.047483 pos:  23097 size:   882
 ret: 0         st: 0 flags:0  ts:-0.058322
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st: 0 flags:1  ts: 2.835828
 ret:-EOF
 ret: 0         st:-1 flags:0  ts: 1.730004
-ret: 0         st: 0 flags:1 dts: 1.730023 pts: 1.730023 pos:  38147 size:  1024
+ret: 0         st: 0 flags:1 dts: 1.730023 pts: 1.730023 pos:  38147 size:   882
 ret: 0         st:-1 flags:1  ts: 0.624171
-ret: 0         st: 0 flags:1 dts: 0.624172 pts: 0.624172 pos:  13763 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.624172 pts: 0.624172 pos:  13763 size:   882
 ret: 0         st: 0 flags:0  ts:-0.481678
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st: 0 flags:1  ts: 2.412517
 ret:-EOF
 ret: 0         st:-1 flags:0  ts: 1.306672
-ret: 0         st: 0 flags:1 dts: 1.306667 pts: 1.306667 pos:  28812 size:  1024
+ret: 0         st: 0 flags:1 dts: 1.306667 pts: 1.306667 pos:  28812 size:   882
 ret: 0         st:-1 flags:1  ts: 0.200839
-ret: 0         st: 0 flags:1 dts: 0.200816 pts: 0.200816 pos:   4428 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.200816 pts: 0.200816 pos:   4428 size:   882
 ret: 0         st: 0 flags:0  ts:-0.904989
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st: 0 flags:1  ts: 1.989161
 ret: 0         st: 0 flags:1 dts: 1.989161 pts: 1.989161 pos:  43861 size:   239
 ret: 0         st:-1 flags:0  ts: 0.883340
-ret: 0         st: 0 flags:1 dts: 0.883356 pts: 0.883356 pos:  19478 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.883356 pts: 0.883356 pos:  19478 size:   882
 ret: 0         st:-1 flags:1  ts:-0.222493
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st: 0 flags:0  ts: 2.671655
 ret:-EOF
 ret: 0         st: 0 flags:1  ts: 1.565850
-ret: 0         st: 0 flags:1 dts: 1.565850 pts: 1.565850 pos:  34527 size:  1024
+ret: 0         st: 0 flags:1 dts: 1.565850 pts: 1.565850 pos:  34527 size:   882
 ret: 0         st:-1 flags:0  ts: 0.460008
-ret: 0         st: 0 flags:1 dts: 0.460000 pts: 0.460000 pos:  10143 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.460000 pts: 0.460000 pos:  10143 size:   882
 ret: 0         st:-1 flags:1  ts:-0.645825
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882

diff --git a/tests/ref/seek/lavf-ffm b/tests/ref/seek/lavf-ffm
deleted file mode 100644
index eceed1a..0000000
--- a/tests/ref/seek/lavf-ffm
+++ /dev/null

@@ -1,53 +0,0 @@
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   8192 size: 24663
-ret: 0         st:-1 flags:0  ts:-1.000000
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   8192 size: 24663
-ret: 0         st:-1 flags:1  ts: 1.894167
-ret: 0         st: 1 flags:1 dts: 0.929501 pts: 0.929501 pos: 376832 size:   209
-ret: 0         st: 0 flags:0  ts: 0.788334
-ret: 0         st: 1 flags:1 dts: 0.825011 pts: 0.825011 pos: 327680 size:   209
-ret: 0         st: 0 flags:1  ts:-0.317499
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   8192 size: 24663
-ret: 0         st: 1 flags:0  ts: 2.576668
-ret: 0         st: 1 flags:1 dts: 0.929501 pts: 0.929501 pos: 376832 size:   209
-ret: 0         st: 1 flags:1  ts: 1.470835
-ret: 0         st: 1 flags:1 dts: 0.929501 pts: 0.929501 pos: 376832 size:   209
-ret: 0         st:-1 flags:0  ts: 0.365002
-ret: 0         st: 1 flags:1 dts: 0.380930 pts: 0.380930 pos: 163840 size:   209
-ret: 0         st:-1 flags:1  ts:-0.740831
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   8192 size: 24663
-ret: 0         st: 0 flags:0  ts: 2.153336
-ret: 0         st: 1 flags:1 dts: 0.929501 pts: 0.929501 pos: 376832 size:   209
-ret: 0         st: 0 flags:1  ts: 1.047503
-ret: 0         st: 1 flags:1 dts: 0.929501 pts: 0.929501 pos: 376832 size:   209
-ret: 0         st: 1 flags:0  ts:-0.058330
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   8192 size: 24663
-ret: 0         st: 1 flags:1  ts: 2.835837
-ret: 0         st: 1 flags:1 dts: 0.929501 pts: 0.929501 pos: 376832 size:   209
-ret: 0         st:-1 flags:0  ts: 1.730004
-ret: 0         st: 1 flags:1 dts: 0.929501 pts: 0.929501 pos: 376832 size:   209
-ret: 0         st:-1 flags:1  ts: 0.624171
-ret: 0         st: 1 flags:1 dts: 0.642154 pts: 0.642154 pos: 274432 size:   209
-ret: 0         st: 0 flags:0  ts:-0.481662
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   8192 size: 24663
-ret: 0         st: 0 flags:1  ts: 2.412505
-ret: 0         st: 1 flags:1 dts: 0.929501 pts: 0.929501 pos: 376832 size:   209
-ret: 0         st: 1 flags:0  ts: 1.306672
-ret: 0         st: 1 flags:1 dts: 0.929501 pts: 0.929501 pos: 376832 size:   209
-ret: 0         st: 1 flags:1  ts: 0.200839
-ret: 0         st: 1 flags:1 dts: 0.224195 pts: 0.224195 pos: 114688 size:   209
-ret: 0         st:-1 flags:0  ts:-0.904994
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   8192 size: 24663
-ret: 0         st:-1 flags:1  ts: 1.989173
-ret: 0         st: 1 flags:1 dts: 0.929501 pts: 0.929501 pos: 376832 size:   209
-ret: 0         st: 0 flags:0  ts: 0.883340
-ret: 0         st: 0 flags:0 dts: 0.880000 pts: 0.920000 pos: 339968 size: 12307
-ret: 0         st: 0 flags:1  ts:-0.222493
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   8192 size: 24663
-ret: 0         st: 1 flags:0  ts: 2.671674
-ret: 0         st: 1 flags:1 dts: 0.929501 pts: 0.929501 pos: 376832 size:   209
-ret: 0         st: 1 flags:1  ts: 1.565841
-ret: 0         st: 1 flags:1 dts: 0.929501 pts: 0.929501 pos: 376832 size:   209
-ret: 0         st:-1 flags:0  ts: 0.460008
-ret: 0         st: 1 flags:1 dts: 0.485420 pts: 0.485420 pos: 221184 size:   209
-ret: 0         st:-1 flags:1  ts:-0.645825
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   8192 size: 24663

diff --git a/tests/ref/seek/lavf-mulaw b/tests/ref/seek/lavf-mulaw
index 4b1f8fb..8d517fa 100644
--- a/tests/ref/seek/lavf-mulaw
+++ b/tests/ref/seek/lavf-mulaw

@@ -1,53 +1,53 @@
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st:-1 flags:0  ts:-1.000000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st:-1 flags:1  ts: 1.894167
-ret: 0         st: 0 flags:1 dts: 1.894150 pts: 1.894150 pos:  41766 size:  1024
+ret: 0         st: 0 flags:1 dts: 1.894150 pts: 1.894150 pos:  41766 size:   882
 ret: 0         st: 0 flags:0  ts: 0.788345
-ret: 0         st: 0 flags:1 dts: 0.788345 pts: 0.788345 pos:  17383 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.788345 pts: 0.788345 pos:  17383 size:   882
 ret: 0         st: 0 flags:1  ts:-0.317506
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st:-1 flags:0  ts: 2.576668
 ret:-EOF
 ret: 0         st:-1 flags:1  ts: 1.470835
-ret: 0         st: 0 flags:1 dts: 1.470839 pts: 1.470839 pos:  32432 size:  1024
+ret: 0         st: 0 flags:1 dts: 1.470839 pts: 1.470839 pos:  32432 size:   882
 ret: 0         st: 0 flags:0  ts: 0.364989
-ret: 0         st: 0 flags:1 dts: 0.364989 pts: 0.364989 pos:   8048 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.364989 pts: 0.364989 pos:   8048 size:   882
 ret: 0         st: 0 flags:1  ts:-0.740816
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st:-1 flags:0  ts: 2.153336
 ret:-EOF
 ret: 0         st:-1 flags:1  ts: 1.047503
-ret: 0         st: 0 flags:1 dts: 1.047483 pts: 1.047483 pos:  23097 size:  1024
+ret: 0         st: 0 flags:1 dts: 1.047483 pts: 1.047483 pos:  23097 size:   882
 ret: 0         st: 0 flags:0  ts:-0.058322
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st: 0 flags:1  ts: 2.835828
 ret:-EOF
 ret: 0         st:-1 flags:0  ts: 1.730004
-ret: 0         st: 0 flags:1 dts: 1.730023 pts: 1.730023 pos:  38147 size:  1024
+ret: 0         st: 0 flags:1 dts: 1.730023 pts: 1.730023 pos:  38147 size:   882
 ret: 0         st:-1 flags:1  ts: 0.624171
-ret: 0         st: 0 flags:1 dts: 0.624172 pts: 0.624172 pos:  13763 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.624172 pts: 0.624172 pos:  13763 size:   882
 ret: 0         st: 0 flags:0  ts:-0.481678
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st: 0 flags:1  ts: 2.412517
 ret:-EOF
 ret: 0         st:-1 flags:0  ts: 1.306672
-ret: 0         st: 0 flags:1 dts: 1.306667 pts: 1.306667 pos:  28812 size:  1024
+ret: 0         st: 0 flags:1 dts: 1.306667 pts: 1.306667 pos:  28812 size:   882
 ret: 0         st:-1 flags:1  ts: 0.200839
-ret: 0         st: 0 flags:1 dts: 0.200816 pts: 0.200816 pos:   4428 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.200816 pts: 0.200816 pos:   4428 size:   882
 ret: 0         st: 0 flags:0  ts:-0.904989
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st: 0 flags:1  ts: 1.989161
 ret: 0         st: 0 flags:1 dts: 1.989161 pts: 1.989161 pos:  43861 size:   239
 ret: 0         st:-1 flags:0  ts: 0.883340
-ret: 0         st: 0 flags:1 dts: 0.883356 pts: 0.883356 pos:  19478 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.883356 pts: 0.883356 pos:  19478 size:   882
 ret: 0         st:-1 flags:1  ts:-0.222493
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882
 ret: 0         st: 0 flags:0  ts: 2.671655
 ret:-EOF
 ret: 0         st: 0 flags:1  ts: 1.565850
-ret: 0         st: 0 flags:1 dts: 1.565850 pts: 1.565850 pos:  34527 size:  1024
+ret: 0         st: 0 flags:1 dts: 1.565850 pts: 1.565850 pos:  34527 size:   882
 ret: 0         st:-1 flags:0  ts: 0.460008
-ret: 0         st: 0 flags:1 dts: 0.460000 pts: 0.460000 pos:  10143 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.460000 pts: 0.460000 pos:  10143 size:   882
 ret: 0         st:-1 flags:1  ts:-0.645825
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:  1024
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:   882

diff --git a/tests/ref/seek/lavf-mxf b/tests/ref/seek/lavf-mxf
index 93d4197..495badd 100644
--- a/tests/ref/seek/lavf-mxf
+++ b/tests/ref/seek/lavf-mxf

@@ -1,48 +1,48 @@
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   7168 size: 24801
 ret: 0         st:-1 flags:0  ts:-1.000000
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   7168 size: 24801
 ret: 0         st:-1 flags:1  ts: 1.894167
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 461312 size: 24711
 ret: 0         st: 0 flags:0  ts: 0.800000
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 461312 size: 24711
 ret: 0         st: 0 flags:1  ts:-0.320000
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   7168 size: 24801
 ret:-1         st: 1 flags:0  ts: 2.576667
 ret: 0         st: 1 flags:1  ts: 1.470833
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 461312 size: 24711
 ret: 0         st:-1 flags:0  ts: 0.365002
-ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.480000 pos: 211456 size: 24786
+ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.480000 pos: 212480 size: 24786
 ret: 0         st:-1 flags:1  ts:-0.740831
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   7168 size: 24801
 ret:-1         st: 0 flags:0  ts: 2.160000
 ret: 0         st: 0 flags:1  ts: 1.040000
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 461312 size: 24711
 ret: 0         st: 1 flags:0  ts:-0.058333
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   7168 size: 24801
 ret: 0         st: 1 flags:1  ts: 2.835833
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 461312 size: 24711
 ret:-1         st:-1 flags:0  ts: 1.730004
 ret: 0         st:-1 flags:1  ts: 0.624171
-ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.480000 pos: 211456 size: 24786
+ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.480000 pos: 212480 size: 24786
 ret: 0         st: 0 flags:0  ts:-0.480000
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   7168 size: 24801
 ret: 0         st: 0 flags:1  ts: 2.400000
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 461312 size: 24711
 ret:-1         st: 1 flags:0  ts: 1.306667
 ret: 0         st: 1 flags:1  ts: 0.200833
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   7168 size: 24801
 ret: 0         st:-1 flags:0  ts:-0.904994
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   7168 size: 24801
 ret: 0         st:-1 flags:1  ts: 1.989173
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 461312 size: 24711
 ret: 0         st: 0 flags:0  ts: 0.880000
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 461312 size: 24711
 ret: 0         st: 0 flags:1  ts:-0.240000
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   7168 size: 24801
 ret:-1         st: 1 flags:0  ts: 2.671667
 ret: 0         st: 1 flags:1  ts: 1.565833
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 461312 size: 24711
 ret: 0         st:-1 flags:0  ts: 0.460008
-ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.480000 pos: 211456 size: 24786
+ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.480000 pos: 212480 size: 24786
 ret: 0         st:-1 flags:1  ts:-0.645825
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   7168 size: 24801

diff --git a/tests/ref/seek/lavf-mxf_d10 b/tests/ref/seek/lavf-mxf_d10
index 17cca29..ad3e4da 100644
--- a/tests/ref/seek/lavf-mxf_d10
+++ b/tests/ref/seek/lavf-mxf_d10

@@ -1,53 +1,53 @@
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:150000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:150000
 ret: 0         st:-1 flags:0  ts:-1.000000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:150000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:150000
 ret: 0         st:-1 flags:1  ts: 1.894167
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5117952 size:150000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5118976 size:150000
 ret: 0         st: 0 flags:0  ts: 0.800000
-ret: 0         st: 0 flags:1 dts: 0.800000 pts: 0.800000 pos:4265984 size:150000
+ret: 0         st: 0 flags:1 dts: 0.800000 pts: 0.800000 pos:4267008 size:150000
 ret: 0         st: 0 flags:1  ts:-0.320000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:150000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:150000
 ret: 0         st: 1 flags:0  ts: 2.576667
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:5117952 size:150000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5118976 size:150000
 ret: 0         st: 1 flags:1  ts: 1.470833
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:5117952 size:150000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5118976 size:150000
 ret: 0         st:-1 flags:0  ts: 0.365002
-ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.360000 pos:1923072 size:150000
+ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.360000 pos:1924096 size:150000
 ret: 0         st:-1 flags:1  ts:-0.740831
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:150000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:150000
 ret: 0         st: 0 flags:0  ts: 2.160000
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5117952 size:150000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5118976 size:150000
 ret: 0         st: 0 flags:1  ts: 1.040000
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5117952 size:150000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5118976 size:150000
 ret: 0         st: 1 flags:0  ts:-0.058333
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:150000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:150000
 ret: 0         st: 1 flags:1  ts: 2.835833
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:5117952 size:150000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5118976 size:150000
 ret: 0         st:-1 flags:0  ts: 1.730004
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5117952 size:150000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5118976 size:150000
 ret: 0         st:-1 flags:1  ts: 0.624171
-ret: 0         st: 0 flags:1 dts: 0.640000 pts: 0.640000 pos:3414016 size:150000
+ret: 0         st: 0 flags:1 dts: 0.640000 pts: 0.640000 pos:3415040 size:150000
 ret: 0         st: 0 flags:0  ts:-0.480000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:150000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:150000
 ret: 0         st: 0 flags:1  ts: 2.400000
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5117952 size:150000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5118976 size:150000
 ret: 0         st: 1 flags:0  ts: 1.306667
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:5117952 size:150000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5118976 size:150000
 ret: 0         st: 1 flags:1  ts: 0.200833
-ret: 0         st: 0 flags:1 dts: 0.200000 pts: 0.200000 pos:1071104 size:150000
+ret: 0         st: 0 flags:1 dts: 0.200000 pts: 0.200000 pos:1072128 size:150000
 ret: 0         st:-1 flags:0  ts:-0.904994
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:150000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:150000
 ret: 0         st:-1 flags:1  ts: 1.989173
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5117952 size:150000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5118976 size:150000
 ret: 0         st: 0 flags:0  ts: 0.880000
-ret: 0         st: 0 flags:1 dts: 0.880000 pts: 0.880000 pos:4691968 size:150000
+ret: 0         st: 0 flags:1 dts: 0.880000 pts: 0.880000 pos:4692992 size:150000
 ret: 0         st: 0 flags:1  ts:-0.240000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:150000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:150000
 ret: 0         st: 1 flags:0  ts: 2.671667
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:5117952 size:150000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5118976 size:150000
 ret: 0         st: 1 flags:1  ts: 1.565833
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:5117952 size:150000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5118976 size:150000
 ret: 0         st:-1 flags:0  ts: 0.460008
-ret: 0         st: 0 flags:1 dts: 0.480000 pts: 0.480000 pos:2562048 size:150000
+ret: 0         st: 0 flags:1 dts: 0.480000 pts: 0.480000 pos:2563072 size:150000
 ret: 0         st:-1 flags:1  ts:-0.645825
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:150000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:150000

diff --git a/tests/ref/seek/lavf-mxf_dv25 b/tests/ref/seek/lavf-mxf_dv25
index 42b3bbd..755b847 100644
--- a/tests/ref/seek/lavf-mxf_dv25
+++ b/tests/ref/seek/lavf-mxf_dv25

@@ -1,53 +1,53 @@
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:144000
 ret: 0         st:-1 flags:0  ts:-1.000000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:144000
 ret: 0         st:-1 flags:1  ts: 1.894167
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3680256 size:144000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3681280 size:144000
 ret: 0         st: 0 flags:0  ts: 0.800000
-ret: 0         st: 0 flags:1 dts: 0.800000 pts: 0.800000 pos:3067904 size:144000
+ret: 0         st: 0 flags:1 dts: 0.800000 pts: 0.800000 pos:3068928 size:144000
 ret: 0         st: 0 flags:1  ts:-0.320000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:144000
 ret: 0         st: 1 flags:0  ts: 2.576667
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:3680256 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:3681280 size:144000
 ret: 0         st: 1 flags:1  ts: 1.470833
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:3680256 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:3681280 size:144000
 ret: 0         st:-1 flags:0  ts: 0.365002
-ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.360000 pos:1383936 size:144000
+ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.360000 pos:1384960 size:144000
 ret: 0         st:-1 flags:1  ts:-0.740831
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:144000
 ret: 0         st: 0 flags:0  ts: 2.160000
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3680256 size:144000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3681280 size:144000
 ret: 0         st: 0 flags:1  ts: 1.040000
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3680256 size:144000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3681280 size:144000
 ret: 0         st: 1 flags:0  ts:-0.058333
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:144000
 ret: 0         st: 1 flags:1  ts: 2.835833
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:3680256 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:3681280 size:144000
 ret: 0         st:-1 flags:0  ts: 1.730004
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3680256 size:144000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3681280 size:144000
 ret: 0         st:-1 flags:1  ts: 0.624171
-ret: 0         st: 0 flags:1 dts: 0.640000 pts: 0.640000 pos:2455552 size:144000
+ret: 0         st: 0 flags:1 dts: 0.640000 pts: 0.640000 pos:2456576 size:144000
 ret: 0         st: 0 flags:0  ts:-0.480000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:144000
 ret: 0         st: 0 flags:1  ts: 2.400000
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3680256 size:144000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3681280 size:144000
 ret: 0         st: 1 flags:0  ts: 1.306667
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:3680256 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:3681280 size:144000
 ret: 0         st: 1 flags:1  ts: 0.200833
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 771584 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 772608 size:144000
 ret: 0         st:-1 flags:0  ts:-0.904994
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:144000
 ret: 0         st:-1 flags:1  ts: 1.989173
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3680256 size:144000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3681280 size:144000
 ret: 0         st: 0 flags:0  ts: 0.880000
-ret: 0         st: 0 flags:1 dts: 0.880000 pts: 0.880000 pos:3374080 size:144000
+ret: 0         st: 0 flags:1 dts: 0.880000 pts: 0.880000 pos:3375104 size:144000
 ret: 0         st: 0 flags:1  ts:-0.240000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:144000
 ret: 0         st: 1 flags:0  ts: 2.671667
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:3680256 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:3681280 size:144000
 ret: 0         st: 1 flags:1  ts: 1.565833
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:3680256 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:3681280 size:144000
 ret: 0         st:-1 flags:0  ts: 0.460008
-ret: 0         st: 0 flags:1 dts: 0.480000 pts: 0.480000 pos:1843200 size:144000
+ret: 0         st: 0 flags:1 dts: 0.480000 pts: 0.480000 pos:1844224 size:144000
 ret: 0         st:-1 flags:1  ts:-0.645825
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:144000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:144000

diff --git a/tests/ref/seek/lavf-mxf_dvcpro50 b/tests/ref/seek/lavf-mxf_dvcpro50
index c3d9d3a..f6c26db 100644
--- a/tests/ref/seek/lavf-mxf_dvcpro50
+++ b/tests/ref/seek/lavf-mxf_dvcpro50

@@ -1,53 +1,53 @@
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:288000
 ret: 0         st:-1 flags:0  ts:-1.000000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:288000
 ret: 0         st:-1 flags:1  ts: 1.894167
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:7133184 size:288000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:7134208 size:288000
 ret: 0         st: 0 flags:0  ts: 0.800000
-ret: 0         st: 0 flags:1 dts: 0.800000 pts: 0.800000 pos:5945344 size:288000
+ret: 0         st: 0 flags:1 dts: 0.800000 pts: 0.800000 pos:5946368 size:288000
 ret: 0         st: 0 flags:1  ts:-0.320000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:288000
 ret: 0         st: 1 flags:0  ts: 2.576667
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:7133184 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:7134208 size:288000
 ret: 0         st: 1 flags:1  ts: 1.470833
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:7133184 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:7134208 size:288000
 ret: 0         st:-1 flags:0  ts: 0.365002
-ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.360000 pos:2678784 size:288000
+ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.360000 pos:2679808 size:288000
 ret: 0         st:-1 flags:1  ts:-0.740831
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:288000
 ret: 0         st: 0 flags:0  ts: 2.160000
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:7133184 size:288000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:7134208 size:288000
 ret: 0         st: 0 flags:1  ts: 1.040000
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:7133184 size:288000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:7134208 size:288000
 ret: 0         st: 1 flags:0  ts:-0.058333
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:288000
 ret: 0         st: 1 flags:1  ts: 2.835833
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:7133184 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:7134208 size:288000
 ret: 0         st:-1 flags:0  ts: 1.730004
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:7133184 size:288000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:7134208 size:288000
 ret: 0         st:-1 flags:1  ts: 0.624171
-ret: 0         st: 0 flags:1 dts: 0.640000 pts: 0.640000 pos:4757504 size:288000
+ret: 0         st: 0 flags:1 dts: 0.640000 pts: 0.640000 pos:4758528 size:288000
 ret: 0         st: 0 flags:0  ts:-0.480000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:288000
 ret: 0         st: 0 flags:1  ts: 2.400000
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:7133184 size:288000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:7134208 size:288000
 ret: 0         st: 1 flags:0  ts: 1.306667
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:7133184 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:7134208 size:288000
 ret: 0         st: 1 flags:1  ts: 0.200833
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:1490944 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:1491968 size:288000
 ret: 0         st:-1 flags:0  ts:-0.904994
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:288000
 ret: 0         st:-1 flags:1  ts: 1.989173
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:7133184 size:288000
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:7134208 size:288000
 ret: 0         st: 0 flags:0  ts: 0.880000
-ret: 0         st: 0 flags:1 dts: 0.880000 pts: 0.880000 pos:6539264 size:288000
+ret: 0         st: 0 flags:1 dts: 0.880000 pts: 0.880000 pos:6540288 size:288000
 ret: 0         st: 0 flags:1  ts:-0.240000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:288000
 ret: 0         st: 1 flags:0  ts: 2.671667
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:7133184 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:7134208 size:288000
 ret: 0         st: 1 flags:1  ts: 1.565833
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:7133184 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:7134208 size:288000
 ret: 0         st:-1 flags:0  ts: 0.460008
-ret: 0         st: 0 flags:1 dts: 0.480000 pts: 0.480000 pos:3569664 size:288000
+ret: 0         st: 0 flags:1 dts: 0.480000 pts: 0.480000 pos:3570688 size:288000
 ret: 0         st:-1 flags:1  ts:-0.645825
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:288000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   7168 size:288000

diff --git a/tests/ref/seek/lavf-mxf_opatom b/tests/ref/seek/lavf-mxf_opatom
index ca17ba7..11367cd 100644
--- a/tests/ref/seek/lavf-mxf_opatom
+++ b/tests/ref/seek/lavf-mxf_opatom

@@ -1,53 +1,53 @@
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:188416
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:188416
 ret: 0         st:-1 flags:0  ts:-1.000000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:188416
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:188416
 ret: 0         st:-1 flags:1  ts: 1.894167
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527129 size:188416
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527641 size:188416
 ret: 0         st: 0 flags:0  ts: 0.800000
-ret: 0         st: 0 flags:1 dts: 0.800000 pts: 0.800000 pos:3773465 size:188416
+ret: 0         st: 0 flags:1 dts: 0.800000 pts: 0.800000 pos:3773977 size:188416
 ret: 0         st: 0 flags:1  ts:-0.320000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:188416
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:188416
 ret: 0         st:-1 flags:0  ts: 2.576668
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527129 size:188416
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527641 size:188416
 ret: 0         st:-1 flags:1  ts: 1.470835
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527129 size:188416
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527641 size:188416
 ret: 0         st: 0 flags:0  ts: 0.360000
-ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.360000 pos:1700889 size:188416
+ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.360000 pos:1701401 size:188416
 ret: 0         st: 0 flags:1  ts:-0.760000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:188416
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:188416
 ret: 0         st:-1 flags:0  ts: 2.153336
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527129 size:188416
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527641 size:188416
 ret: 0         st:-1 flags:1  ts: 1.047503
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527129 size:188416
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527641 size:188416
 ret: 0         st: 0 flags:0  ts:-0.040000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:188416
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:188416
 ret: 0         st: 0 flags:1  ts: 2.840000
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527129 size:188416
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527641 size:188416
 ret: 0         st:-1 flags:0  ts: 1.730004
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527129 size:188416
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527641 size:188416
 ret: 0         st:-1 flags:1  ts: 0.624171
-ret: 0         st: 0 flags:1 dts: 0.640000 pts: 0.640000 pos:3019801 size:188416
+ret: 0         st: 0 flags:1 dts: 0.640000 pts: 0.640000 pos:3020313 size:188416
 ret: 0         st: 0 flags:0  ts:-0.480000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:188416
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:188416
 ret: 0         st: 0 flags:1  ts: 2.400000
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527129 size:188416
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527641 size:188416
 ret: 0         st:-1 flags:0  ts: 1.306672
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527129 size:188416
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527641 size:188416
 ret: 0         st:-1 flags:1  ts: 0.200839
-ret: 0         st: 0 flags:1 dts: 0.200000 pts: 0.200000 pos: 947225 size:188416
+ret: 0         st: 0 flags:1 dts: 0.200000 pts: 0.200000 pos: 947737 size:188416
 ret: 0         st: 0 flags:0  ts:-0.920000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:188416
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:188416
 ret: 0         st: 0 flags:1  ts: 2.000000
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527129 size:188416
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527641 size:188416
 ret: 0         st:-1 flags:0  ts: 0.883340
-ret: 0         st: 0 flags:1 dts: 0.880000 pts: 0.880000 pos:4150297 size:188416
+ret: 0         st: 0 flags:1 dts: 0.880000 pts: 0.880000 pos:4150809 size:188416
 ret: 0         st:-1 flags:1  ts:-0.222493
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:188416
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:188416
 ret: 0         st: 0 flags:0  ts: 2.680000
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527129 size:188416
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527641 size:188416
 ret: 0         st: 0 flags:1  ts: 1.560000
-ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527129 size:188416
+ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:4527641 size:188416
 ret: 0         st:-1 flags:0  ts: 0.460008
-ret: 0         st: 0 flags:1 dts: 0.480000 pts: 0.480000 pos:2266137 size:188416
+ret: 0         st: 0 flags:1 dts: 0.480000 pts: 0.480000 pos:2266649 size:188416
 ret: 0         st:-1 flags:1  ts:-0.645825
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:188416
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:188416

diff --git a/tests/ref/seek/lavf-mxf_opatom_audio b/tests/ref/seek/lavf-mxf_opatom_audio
index 2d1a7cc..9bc0bfc 100644
--- a/tests/ref/seek/lavf-mxf_opatom_audio
+++ b/tests/ref/seek/lavf-mxf_opatom_audio

@@ -1,53 +1,53 @@
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:  3840
 ret: 0         st:-1 flags:0  ts:-1.000000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:  3840
 ret: 0         st:-1 flags:1  ts: 1.894167
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101655 size:     2
 ret: 0         st: 0 flags:0  ts: 0.788333
-ret: 0         st: 0 flags:1 dts: 0.788333 pts: 0.788333 pos:  80313 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.788333 pts: 0.788333 pos:  81337 size:  3840
 ret: 0         st: 0 flags:1  ts:-0.317500
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:  3840
 ret: 0         st:-1 flags:0  ts: 2.576668
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101655 size:     2
 ret: 0         st:-1 flags:1  ts: 1.470835
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101655 size:     2
 ret: 0         st: 0 flags:0  ts: 0.365000
-ret: 0         st: 0 flags:1 dts: 0.365000 pts: 0.365000 pos:  39673 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.365000 pts: 0.365000 pos:  40697 size:  3840
 ret: 0         st: 0 flags:1  ts:-0.740833
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:  3840
 ret: 0         st:-1 flags:0  ts: 2.153336
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101655 size:     2
 ret: 0         st:-1 flags:1  ts: 1.047503
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101655 size:     2
 ret: 0         st: 0 flags:0  ts:-0.058333
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:  3840
 ret: 0         st: 0 flags:1  ts: 2.835833
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101655 size:     2
 ret: 0         st:-1 flags:0  ts: 1.730004
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101655 size:     2
 ret: 0         st:-1 flags:1  ts: 0.624171
-ret: 0         st: 0 flags:1 dts: 0.624167 pts: 0.624167 pos:  64553 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.624167 pts: 0.624167 pos:  65577 size:  3840
 ret: 0         st: 0 flags:0  ts:-0.481667
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:  3840
 ret: 0         st: 0 flags:1  ts: 2.412500
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101655 size:     2
 ret: 0         st:-1 flags:0  ts: 1.306672
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101655 size:     2
 ret: 0         st:-1 flags:1  ts: 0.200839
-ret: 0         st: 0 flags:1 dts: 0.200833 pts: 0.200833 pos:  23913 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.200833 pts: 0.200833 pos:  24937 size:  3840
 ret: 0         st: 0 flags:0  ts:-0.905000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:  3840
 ret: 0         st: 0 flags:1  ts: 1.989167
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101655 size:     2
 ret: 0         st:-1 flags:0  ts: 0.883340
-ret: 0         st: 0 flags:1 dts: 0.883333 pts: 0.883333 pos:  89433 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.883333 pts: 0.883333 pos:  90457 size:  3840
 ret: 0         st:-1 flags:1  ts:-0.222493
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:  3840
 ret: 0         st: 0 flags:0  ts: 2.671667
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101655 size:     2
 ret: 0         st: 0 flags:1  ts: 1.565833
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101655 size:     2
 ret: 0         st:-1 flags:0  ts: 0.460008
-ret: 0         st: 0 flags:1 dts: 0.460000 pts: 0.460000 pos:  48793 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.460000 pts: 0.460000 pos:  49817 size:  3840
 ret: 0         st:-1 flags:1  ts:-0.645825
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5657 size:  3840

diff --git a/tests/ref/seek/lavf-rm b/tests/ref/seek/lavf-rm
index 4b19173..756e157 100644
--- a/tests/ref/seek/lavf-rm
+++ b/tests/ref/seek/lavf-rm

@@ -1,4 +1,4 @@
-ret: 0         st: 1 flags:1 dts: 0.000000 pts: 0.000000 pos:    395 size:   278
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:    696 size: 31082
 ret: 0         st:-1 flags:0  ts:-1.000000
 ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:    696 size: 31082
 ret: 0         st:-1 flags:1  ts: 1.894167
@@ -20,7 +20,7 @@
 ret: 0         st: 0 flags:1  ts: 1.048000
 ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos: 314992 size: 31143
 ret: 0         st: 1 flags:0  ts:-0.058000
-ret: 0         st: 1 flags:1 dts: 0.000000 pts: 0.000000 pos:    395 size:   278
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:    696 size: 31082
 ret: 0         st: 1 flags:1  ts: 2.836000
 ret: 0         st: 1 flags:1 dts: 0.975000 pts: 0.975000 pos: 346138 size:   278
 ret: 0         st:-1 flags:0  ts: 1.730004
@@ -34,7 +34,7 @@
 ret: 0         st: 1 flags:0  ts: 1.307000
 ret: 0         st: 1 flags:1 dts: 0.975000 pts: 0.975000 pos: 346138 size:   278
 ret: 0         st: 1 flags:1  ts: 0.201000
-ret: 0         st: 1 flags:1 dts: 0.174000 pts: 0.174000 pos:  78977 size:   278
+ret: 0         st: 0 flags:0 dts: 0.200000 pts: 0.200000 pos:  79274 size: 11400
 ret: 0         st:-1 flags:0  ts:-0.904994
 ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:    696 size: 31082
 ret: 0         st:-1 flags:1  ts: 1.989173

diff --git a/tests/ref/seek/lavf-yuv4mpeg b/tests/ref/seek/lavf-yuv4mpeg
index 6be9ba9..c416b46 100644
--- a/tests/ref/seek/lavf-yuv4mpeg
+++ b/tests/ref/seek/lavf-yuv4mpeg

@@ -4,14 +4,16 @@
 ret:-EOF
 ret: 0         st: 0 flags:0  ts: 0.800000
 ret: 0         st: 0 flags:1 dts: 0.800000 pts: 0.800000 pos:3041464 size:152064
-ret:-1         st: 0 flags:1  ts:-0.320000
+ret: 0         st: 0 flags:1  ts:-0.320000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:     64 size:152064
 ret: 0         st:-1 flags:0  ts: 2.576668
 ret:-EOF
 ret: 0         st:-1 flags:1  ts: 1.470835
 ret:-EOF
 ret: 0         st: 0 flags:0  ts: 0.360000
 ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.360000 pos:1368694 size:152064
-ret:-1         st: 0 flags:1  ts:-0.760000
+ret: 0         st: 0 flags:1  ts:-0.760000
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:     64 size:152064
 ret: 0         st:-1 flags:0  ts: 2.153336
 ret:-EOF
 ret: 0         st:-1 flags:1  ts: 1.047503
@@ -22,24 +24,26 @@
 ret: 0         st:-1 flags:0  ts: 1.730004
 ret:-EOF
 ret: 0         st:-1 flags:1  ts: 0.624171
-ret: 0         st: 0 flags:1 dts: 0.640000 pts: 0.640000 pos:2433184 size:152064
+ret: 0         st: 0 flags:1 dts: 0.600000 pts: 0.600000 pos:2281114 size:152064
 ret:-1         st: 0 flags:0  ts:-0.480000
 ret: 0         st: 0 flags:1  ts: 2.400000
 ret:-EOF
 ret: 0         st:-1 flags:0  ts: 1.306672
 ret:-EOF
 ret: 0         st:-1 flags:1  ts: 0.200839
-ret: 0         st: 0 flags:1 dts: 0.200000 pts: 0.200000 pos: 760414 size:152064
+ret: 0         st: 0 flags:1 dts: 0.160000 pts: 0.160000 pos: 608344 size:152064
 ret:-1         st: 0 flags:0  ts:-0.920000
 ret: 0         st: 0 flags:1  ts: 2.000000
 ret:-EOF
 ret: 0         st:-1 flags:0  ts: 0.883340
 ret: 0         st: 0 flags:1 dts: 0.880000 pts: 0.880000 pos:3345604 size:152064
-ret:-1         st:-1 flags:1  ts:-0.222493
+ret: 0         st:-1 flags:1  ts:-0.222493
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:     64 size:152064
 ret: 0         st: 0 flags:0  ts: 2.680000
 ret:-EOF
 ret: 0         st: 0 flags:1  ts: 1.560000
 ret:-EOF
 ret: 0         st:-1 flags:0  ts: 0.460008
 ret: 0         st: 0 flags:1 dts: 0.480000 pts: 0.480000 pos:1824904 size:152064
-ret:-1         st:-1 flags:1  ts:-0.645825
+ret: 0         st:-1 flags:1  ts:-0.645825
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:     64 size:152064

diff --git a/tests/ref/seek/test-iibbibb-mp4 b/tests/ref/seek/test-iibbibb-mp4
new file mode 100644
index 0000000..5a89600
--- /dev/null
+++ b/tests/ref/seek/test-iibbibb-mp4

@@ -0,0 +1,122 @@
+ret: 0         st: 0 flags:1 dts:-2.000000 pts: 0.000000 pos:     48 size:  7804
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st:-1 flags:0  ts:-1.000000
+ret: 0         st: 0 flags:1 dts:-2.000000 pts: 0.000000 pos:     48 size:  7804
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st:-1 flags:1  ts: 4.894167
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0 dts: 3.000000 pts: 5.000000 pos:  25805 size:  1247
+ret: 0         st: 0 flags:0 dts: 4.000000 pts: 4.000000 pos:  27052 size:  1110
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret:-1         st: 0 flags:0  ts: 10.788330
+ret: 0         st: 0 flags:1  ts: 3.682495
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st:-1 flags:0  ts: 9.576668
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st: 0 flags:0 dts: 9.000000 pts: 11.000000 pos:  44894 size:  1437
+ret: 0         st: 0 flags:0 dts: 10.000000 pts: 10.000000 pos:  46331 size:  1186
+ret:-EOF
+ret: 0         st:-1 flags:1  ts: 2.470835
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0  ts: 8.364990
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st: 0 flags:0 dts: 9.000000 pts: 11.000000 pos:  44894 size:  1437
+ret: 0         st: 0 flags:0 dts: 10.000000 pts: 10.000000 pos:  46331 size:  1186
+ret:-EOF
+ret: 0         st: 0 flags:1  ts: 1.259155
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st:-1 flags:0  ts: 7.153336
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st: 0 flags:0 dts: 9.000000 pts: 11.000000 pos:  44894 size:  1437
+ret: 0         st: 0 flags:0 dts: 10.000000 pts: 10.000000 pos:  46331 size:  1186
+ret:-EOF
+ret: 0         st:-1 flags:1  ts: 0.047503
+ret: 0         st: 0 flags:1 dts:-2.000000 pts: 0.000000 pos:     48 size:  7804
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st: 0 flags:0  ts: 5.941650
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st: 0 flags:0 dts: 6.000000 pts: 8.000000 pos:  35757 size:  1273
+ret: 0         st: 0 flags:0 dts: 7.000000 pts: 7.000000 pos:  37030 size:  1130
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st: 0 flags:1  ts: 11.835815
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st: 0 flags:0 dts: 9.000000 pts: 11.000000 pos:  44894 size:  1437
+ret: 0         st: 0 flags:0 dts: 10.000000 pts: 10.000000 pos:  46331 size:  1186
+ret:-EOF
+ret: 0         st:-1 flags:0  ts: 4.730004
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st: 0 flags:0 dts: 6.000000 pts: 8.000000 pos:  35757 size:  1273
+ret: 0         st: 0 flags:0 dts: 7.000000 pts: 7.000000 pos:  37030 size:  1130
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st:-1 flags:1  ts: 10.624171
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st: 0 flags:0 dts: 9.000000 pts: 11.000000 pos:  44894 size:  1437
+ret: 0         st: 0 flags:0 dts: 10.000000 pts: 10.000000 pos:  46331 size:  1186
+ret:-EOF
+ret: 0         st: 0 flags:0  ts: 3.518311
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0 dts: 3.000000 pts: 5.000000 pos:  25805 size:  1247
+ret: 0         st: 0 flags:0 dts: 4.000000 pts: 4.000000 pos:  27052 size:  1110
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st: 0 flags:1  ts: 9.412476
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st: 0 flags:0 dts: 6.000000 pts: 8.000000 pos:  35757 size:  1273
+ret: 0         st: 0 flags:0 dts: 7.000000 pts: 7.000000 pos:  37030 size:  1130
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st:-1 flags:0  ts: 2.306672
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0 dts: 3.000000 pts: 5.000000 pos:  25805 size:  1247
+ret: 0         st: 0 flags:0 dts: 4.000000 pts: 4.000000 pos:  27052 size:  1110
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st:-1 flags:1  ts: 8.200839
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st: 0 flags:0 dts: 6.000000 pts: 8.000000 pos:  35757 size:  1273
+ret: 0         st: 0 flags:0 dts: 7.000000 pts: 7.000000 pos:  37030 size:  1130
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st: 0 flags:0  ts: 1.095032
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0 dts: 3.000000 pts: 5.000000 pos:  25805 size:  1247
+ret: 0         st: 0 flags:0 dts: 4.000000 pts: 4.000000 pos:  27052 size:  1110
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st: 0 flags:1  ts: 6.989197
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0 dts: 3.000000 pts: 5.000000 pos:  25805 size:  1247
+ret: 0         st: 0 flags:0 dts: 4.000000 pts: 4.000000 pos:  27052 size:  1110
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st:-1 flags:0  ts:-0.116660
+ret: 0         st: 0 flags:1 dts:-2.000000 pts: 0.000000 pos:     48 size:  7804
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st:-1 flags:1  ts: 5.777507
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0 dts: 3.000000 pts: 5.000000 pos:  25805 size:  1247
+ret: 0         st: 0 flags:0 dts: 4.000000 pts: 4.000000 pos:  27052 size:  1110
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret:-1         st: 0 flags:0  ts: 11.671692
+ret: 0         st: 0 flags:1  ts: 4.565857
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0 dts: 3.000000 pts: 5.000000 pos:  25805 size:  1247
+ret: 0         st: 0 flags:0 dts: 4.000000 pts: 4.000000 pos:  27052 size:  1110
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret:-1         st:-1 flags:0  ts: 10.460008
+ret: 0         st:-1 flags:1  ts: 3.354175
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730

diff --git a/tests/ref/seek/test-iibbibb-neg-ctts-mp4 b/tests/ref/seek/test-iibbibb-neg-ctts-mp4
new file mode 100644
index 0000000..5a89600
--- /dev/null
+++ b/tests/ref/seek/test-iibbibb-neg-ctts-mp4

@@ -0,0 +1,122 @@
+ret: 0         st: 0 flags:1 dts:-2.000000 pts: 0.000000 pos:     48 size:  7804
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st:-1 flags:0  ts:-1.000000
+ret: 0         st: 0 flags:1 dts:-2.000000 pts: 0.000000 pos:     48 size:  7804
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st:-1 flags:1  ts: 4.894167
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0 dts: 3.000000 pts: 5.000000 pos:  25805 size:  1247
+ret: 0         st: 0 flags:0 dts: 4.000000 pts: 4.000000 pos:  27052 size:  1110
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret:-1         st: 0 flags:0  ts: 10.788330
+ret: 0         st: 0 flags:1  ts: 3.682495
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st:-1 flags:0  ts: 9.576668
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st: 0 flags:0 dts: 9.000000 pts: 11.000000 pos:  44894 size:  1437
+ret: 0         st: 0 flags:0 dts: 10.000000 pts: 10.000000 pos:  46331 size:  1186
+ret:-EOF
+ret: 0         st:-1 flags:1  ts: 2.470835
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0  ts: 8.364990
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st: 0 flags:0 dts: 9.000000 pts: 11.000000 pos:  44894 size:  1437
+ret: 0         st: 0 flags:0 dts: 10.000000 pts: 10.000000 pos:  46331 size:  1186
+ret:-EOF
+ret: 0         st: 0 flags:1  ts: 1.259155
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st:-1 flags:0  ts: 7.153336
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st: 0 flags:0 dts: 9.000000 pts: 11.000000 pos:  44894 size:  1437
+ret: 0         st: 0 flags:0 dts: 10.000000 pts: 10.000000 pos:  46331 size:  1186
+ret:-EOF
+ret: 0         st:-1 flags:1  ts: 0.047503
+ret: 0         st: 0 flags:1 dts:-2.000000 pts: 0.000000 pos:     48 size:  7804
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st: 0 flags:0  ts: 5.941650
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st: 0 flags:0 dts: 6.000000 pts: 8.000000 pos:  35757 size:  1273
+ret: 0         st: 0 flags:0 dts: 7.000000 pts: 7.000000 pos:  37030 size:  1130
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st: 0 flags:1  ts: 11.835815
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st: 0 flags:0 dts: 9.000000 pts: 11.000000 pos:  44894 size:  1437
+ret: 0         st: 0 flags:0 dts: 10.000000 pts: 10.000000 pos:  46331 size:  1186
+ret:-EOF
+ret: 0         st:-1 flags:0  ts: 4.730004
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st: 0 flags:0 dts: 6.000000 pts: 8.000000 pos:  35757 size:  1273
+ret: 0         st: 0 flags:0 dts: 7.000000 pts: 7.000000 pos:  37030 size:  1130
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st:-1 flags:1  ts: 10.624171
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st: 0 flags:0 dts: 9.000000 pts: 11.000000 pos:  44894 size:  1437
+ret: 0         st: 0 flags:0 dts: 10.000000 pts: 10.000000 pos:  46331 size:  1186
+ret:-EOF
+ret: 0         st: 0 flags:0  ts: 3.518311
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0 dts: 3.000000 pts: 5.000000 pos:  25805 size:  1247
+ret: 0         st: 0 flags:0 dts: 4.000000 pts: 4.000000 pos:  27052 size:  1110
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st: 0 flags:1  ts: 9.412476
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st: 0 flags:0 dts: 6.000000 pts: 8.000000 pos:  35757 size:  1273
+ret: 0         st: 0 flags:0 dts: 7.000000 pts: 7.000000 pos:  37030 size:  1130
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st:-1 flags:0  ts: 2.306672
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0 dts: 3.000000 pts: 5.000000 pos:  25805 size:  1247
+ret: 0         st: 0 flags:0 dts: 4.000000 pts: 4.000000 pos:  27052 size:  1110
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st:-1 flags:1  ts: 8.200839
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st: 0 flags:0 dts: 6.000000 pts: 8.000000 pos:  35757 size:  1273
+ret: 0         st: 0 flags:0 dts: 7.000000 pts: 7.000000 pos:  37030 size:  1130
+ret: 0         st: 0 flags:1 dts: 8.000000 pts: 12.000000 pos:  38160 size:  6734
+ret: 0         st: 0 flags:0  ts: 1.095032
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0 dts: 3.000000 pts: 5.000000 pos:  25805 size:  1247
+ret: 0         st: 0 flags:0 dts: 4.000000 pts: 4.000000 pos:  27052 size:  1110
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st: 0 flags:1  ts: 6.989197
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0 dts: 3.000000 pts: 5.000000 pos:  25805 size:  1247
+ret: 0         st: 0 flags:0 dts: 4.000000 pts: 4.000000 pos:  27052 size:  1110
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret: 0         st:-1 flags:0  ts:-0.116660
+ret: 0         st: 0 flags:1 dts:-2.000000 pts: 0.000000 pos:     48 size:  7804
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st:-1 flags:1  ts: 5.777507
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0 dts: 3.000000 pts: 5.000000 pos:  25805 size:  1247
+ret: 0         st: 0 flags:0 dts: 4.000000 pts: 4.000000 pos:  27052 size:  1110
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret:-1         st: 0 flags:0  ts: 11.671692
+ret: 0         st: 0 flags:1  ts: 4.565857
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730
+ret: 0         st: 0 flags:0 dts: 3.000000 pts: 5.000000 pos:  25805 size:  1247
+ret: 0         st: 0 flags:0 dts: 4.000000 pts: 4.000000 pos:  27052 size:  1110
+ret: 0         st: 0 flags:1 dts: 5.000000 pts: 9.000000 pos:  28162 size:  7595
+ret:-1         st:-1 flags:0  ts: 10.460008
+ret: 0         st:-1 flags:1  ts: 3.354175
+ret: 0         st: 0 flags:1 dts:-1.000000 pts: 3.000000 pos:   7852 size:  7808
+ret: 0         st: 0 flags:0 dts: 0.000000 pts: 2.000000 pos:  15660 size:  1301
+ret: 0         st: 0 flags:0 dts: 1.000000 pts: 1.000000 pos:  16961 size:  1114
+ret: 0         st: 0 flags:1 dts: 2.000000 pts: 6.000000 pos:  18075 size:  7730

diff --git a/tests/ref/vsynth/vsynth1-cinepak b/tests/ref/vsynth/vsynth1-cinepak
index f1dfcd8..e47ae26 100644
--- a/tests/ref/vsynth/vsynth1-cinepak
+++ b/tests/ref/vsynth/vsynth1-cinepak

@@ -1,4 +1,4 @@
-546c7c1069f9e418aa787f469b693b94 *tests/data/fate/vsynth1-cinepak.mov
-99465 tests/data/fate/vsynth1-cinepak.mov
-bee091c200262be3427a233a2812388c *tests/data/fate/vsynth1-cinepak.out.rawvideo
-stddev:    8.46 PSNR: 29.58 MAXDIFF:  105 bytes:  7603200/   456192
+cd28e47a6ac396240a3fee69f15625d1 *tests/data/fate/vsynth1-cinepak.avi
+408616 tests/data/fate/vsynth1-cinepak.avi
+e74066a028c708f467272884ecd3f7d3 *tests/data/fate/vsynth1-cinepak.out.rawvideo
+stddev:   61.38 PSNR: 12.37 MAXDIFF:  225 bytes:  7603200/   921600

diff --git a/tests/ref/vsynth/vsynth1-ffvhuff420p12 b/tests/ref/vsynth/vsynth1-ffvhuff420p12
index 0d80bd6..d4b22f3 100644
--- a/tests/ref/vsynth/vsynth1-ffvhuff420p12
+++ b/tests/ref/vsynth/vsynth1-ffvhuff420p12

@@ -1,4 +1,4 @@
 866485c954242232878e40f0389790dd *tests/data/fate/vsynth1-ffvhuff420p12.avi
 14205356 tests/data/fate/vsynth1-ffvhuff420p12.avi
-b48f32c140712e8c7bf81cfdd66ae312 *tests/data/fate/vsynth1-ffvhuff420p12.out.rawvideo
-stddev:    0.68 PSNR: 51.47 MAXDIFF:    1 bytes:  7603200/  7603200
+c5ccac874dbf808e9088bc3107860042 *tests/data/fate/vsynth1-ffvhuff420p12.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200

diff --git a/tests/ref/vsynth/vsynth1-vc2-420p10 b/tests/ref/vsynth/vsynth1-vc2-420p10
index 037c77c..025a1cc 100644
--- a/tests/ref/vsynth/vsynth1-vc2-420p10
+++ b/tests/ref/vsynth/vsynth1-vc2-420p10

@@ -1,4 +1,4 @@
 1365742985b6315f6796c765aa17f39e *tests/data/fate/vsynth1-vc2-420p10.mov
 1417047 tests/data/fate/vsynth1-vc2-420p10.mov
-d3deedfa461a2696f82910890412fa2d *tests/data/fate/vsynth1-vc2-420p10.out.rawvideo
-stddev:    0.60 PSNR: 52.47 MAXDIFF:    1 bytes:  7603200/   760320
+387696707c79cf1a6c9aeff4024226b9 *tests/data/fate/vsynth1-vc2-420p10.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/   760320

diff --git a/tests/ref/vsynth/vsynth1-vc2-420p12 b/tests/ref/vsynth/vsynth1-vc2-420p12
index b0c56af..719f0d5 100644
--- a/tests/ref/vsynth/vsynth1-vc2-420p12
+++ b/tests/ref/vsynth/vsynth1-vc2-420p12

@@ -1,4 +1,4 @@
 08a844d17940cd612da269fb08430628 *tests/data/fate/vsynth1-vc2-420p12.mov
 1746007 tests/data/fate/vsynth1-vc2-420p12.mov
-5a78509638a96b0fa17c1b7e9159fd24 *tests/data/fate/vsynth1-vc2-420p12.out.rawvideo
-stddev:    0.67 PSNR: 51.48 MAXDIFF:    1 bytes:  7603200/   760320
+387696707c79cf1a6c9aeff4024226b9 *tests/data/fate/vsynth1-vc2-420p12.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/   760320

diff --git a/tests/ref/vsynth/vsynth1-vc2-t5_3 b/tests/ref/vsynth/vsynth1-vc2-t5_3
new file mode 100644
index 0000000..543f632
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-vc2-t5_3

@@ -0,0 +1,4 @@
+a5c80a091e7c3021ab9d5b854b4c653f *tests/data/fate/vsynth1-vc2-t5_3.mov
+1604316 tests/data/fate/vsynth1-vc2-t5_3.mov
+f35dd1c1df4726bb1d75d95e321b0698 *tests/data/fate/vsynth1-vc2-t5_3.out.rawvideo
+stddev:    1.88 PSNR: 42.61 MAXDIFF:   23 bytes:  7603200/   760320

diff --git a/tests/ref/vsynth/vsynth1-vc2-thaar b/tests/ref/vsynth/vsynth1-vc2-thaar
new file mode 100644
index 0000000..4c1f50f
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-vc2-thaar

@@ -0,0 +1,4 @@
+62bcccb2981c4b79b635a0199a7fafb1 *tests/data/fate/vsynth1-vc2-thaar.mov
+1717724 tests/data/fate/vsynth1-vc2-thaar.mov
+f35dd1c1df4726bb1d75d95e321b0698 *tests/data/fate/vsynth1-vc2-thaar.out.rawvideo
+stddev:    1.88 PSNR: 42.61 MAXDIFF:   23 bytes:  7603200/   760320

diff --git a/tests/ref/vsynth/vsynth2-cinepak b/tests/ref/vsynth/vsynth2-cinepak
index 18eb1d5..835de78 100644
--- a/tests/ref/vsynth/vsynth2-cinepak
+++ b/tests/ref/vsynth/vsynth2-cinepak

@@ -1,4 +1,4 @@
-cc0879f1993cdd6231e2c3b9c2c015a0 *tests/data/fate/vsynth2-cinepak.mov
-88400 tests/data/fate/vsynth2-cinepak.mov
-12c480911ebb89762dc49af003b176c7 *tests/data/fate/vsynth2-cinepak.out.rawvideo
-stddev:    5.07 PSNR: 34.02 MAXDIFF:   59 bytes:  7603200/   456192
+663a2804f421709208c76f6e34e7bea5 *tests/data/fate/vsynth2-cinepak.avi
+400402 tests/data/fate/vsynth2-cinepak.avi
+2c761c3c8cda083eb8f54b2df72b257b *tests/data/fate/vsynth2-cinepak.out.rawvideo
+stddev:   80.96 PSNR:  9.96 MAXDIFF:  227 bytes:  7603200/   921600

diff --git a/tests/ref/vsynth/vsynth2-ffvhuff420p12 b/tests/ref/vsynth/vsynth2-ffvhuff420p12
index 82c467a..f97edfb 100644
--- a/tests/ref/vsynth/vsynth2-ffvhuff420p12
+++ b/tests/ref/vsynth/vsynth2-ffvhuff420p12

@@ -1,4 +1,4 @@
 3ab9567895bf1ec31a82aadf16a5da0e *tests/data/fate/vsynth2-ffvhuff420p12.avi
 10562808 tests/data/fate/vsynth2-ffvhuff420p12.avi
-542327cb5ca7708085513ffc3d7c693c *tests/data/fate/vsynth2-ffvhuff420p12.out.rawvideo
-stddev:    0.72 PSNR: 50.87 MAXDIFF:    1 bytes:  7603200/  7603200
+36d7ca943916e1743cefa609eba0205c *tests/data/fate/vsynth2-ffvhuff420p12.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200

diff --git a/tests/ref/vsynth/vsynth2-vc2-420p10 b/tests/ref/vsynth/vsynth2-vc2-420p10
index 9de40d2..48d97d6 100644
--- a/tests/ref/vsynth/vsynth2-vc2-420p10
+++ b/tests/ref/vsynth/vsynth2-vc2-420p10

@@ -1,4 +1,4 @@
 1197f8108683b9eb6b0777adb2db1aa8 *tests/data/fate/vsynth2-vc2-420p10.mov
 1181271 tests/data/fate/vsynth2-vc2-420p10.mov
-75174cb90e76c433f6d769531d573ac2 *tests/data/fate/vsynth2-vc2-420p10.out.rawvideo
-stddev:    0.63 PSNR: 52.11 MAXDIFF:    1 bytes:  7603200/   760320
+01389f7ae4f2a3dc0d7b8384d435fd83 *tests/data/fate/vsynth2-vc2-420p10.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/   760320

diff --git a/tests/ref/vsynth/vsynth2-vc2-420p12 b/tests/ref/vsynth/vsynth2-vc2-420p12
index 660a1ee..9b9ccb8 100644
--- a/tests/ref/vsynth/vsynth2-vc2-420p12
+++ b/tests/ref/vsynth/vsynth2-vc2-420p12

@@ -1,4 +1,4 @@
 0e6b3aefd70fca45e67dc8cbc99640e8 *tests/data/fate/vsynth2-vc2-420p12.mov
 1525079 tests/data/fate/vsynth2-vc2-420p12.mov
-b4d45651e20faa7a0bb84a0738638c48 *tests/data/fate/vsynth2-vc2-420p12.out.rawvideo
-stddev:    0.73 PSNR: 50.84 MAXDIFF:    1 bytes:  7603200/   760320
+01389f7ae4f2a3dc0d7b8384d435fd83 *tests/data/fate/vsynth2-vc2-420p12.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/   760320

diff --git a/tests/ref/vsynth/vsynth2-vc2-t5_3 b/tests/ref/vsynth/vsynth2-vc2-t5_3
new file mode 100644
index 0000000..c4ac50d
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-vc2-t5_3

@@ -0,0 +1,4 @@
+654f04ae4f5947f0d354025fee1f37e0 *tests/data/fate/vsynth2-vc2-t5_3.mov
+1335772 tests/data/fate/vsynth2-vc2-t5_3.mov
+8f629e5cea24cc804d6aeadceacf0b2a *tests/data/fate/vsynth2-vc2-t5_3.out.rawvideo
+stddev:    0.37 PSNR: 56.66 MAXDIFF:    7 bytes:  7603200/   760320

diff --git a/tests/ref/vsynth/vsynth2-vc2-thaar b/tests/ref/vsynth/vsynth2-vc2-thaar
new file mode 100644
index 0000000..fe69817
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-vc2-thaar

@@ -0,0 +1,4 @@
+51b03663f4187f4eea11c5311669a2cc *tests/data/fate/vsynth2-vc2-thaar.mov
+1470300 tests/data/fate/vsynth2-vc2-thaar.mov
+8f629e5cea24cc804d6aeadceacf0b2a *tests/data/fate/vsynth2-vc2-thaar.out.rawvideo
+stddev:    0.37 PSNR: 56.66 MAXDIFF:    7 bytes:  7603200/   760320

diff --git a/tests/ref/vsynth/vsynth3-ffvhuff420p12 b/tests/ref/vsynth/vsynth3-ffvhuff420p12
index 72d412d..201ec76 100644
--- a/tests/ref/vsynth/vsynth3-ffvhuff420p12
+++ b/tests/ref/vsynth/vsynth3-ffvhuff420p12

@@ -1,4 +1,4 @@
 e5a178d75afeda6df1d4eb6f7cdfa3a0 *tests/data/fate/vsynth3-ffvhuff420p12.avi
 175260 tests/data/fate/vsynth3-ffvhuff420p12.avi
-ee95a44ccd612b5057860b43fe9775d6 *tests/data/fate/vsynth3-ffvhuff420p12.out.rawvideo
-stddev:    0.69 PSNR: 51.35 MAXDIFF:    1 bytes:    86700/    86700
+a038ad7c3c09f776304ef7accdea9c74 *tests/data/fate/vsynth3-ffvhuff420p12.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:    86700/    86700

diff --git a/tests/ref/vsynth/vsynth_lena-cinepak b/tests/ref/vsynth/vsynth_lena-cinepak
index 39b1d68..6b02167 100644
--- a/tests/ref/vsynth/vsynth_lena-cinepak
+++ b/tests/ref/vsynth/vsynth_lena-cinepak

@@ -1,4 +1,4 @@
-e3837018f84929f07019ae2eccd303e2 *tests/data/fate/vsynth_lena-cinepak.mov
-88900 tests/data/fate/vsynth_lena-cinepak.mov
-f54ffa70f335ac7b701d7ae34462e001 *tests/data/fate/vsynth_lena-cinepak.out.rawvideo
-stddev:    4.09 PSNR: 35.88 MAXDIFF:   46 bytes:  7603200/   456192
+a9ea19eb0d239a53af8630d5bc4167d0 *tests/data/fate/vsynth_lena-cinepak.avi
+407574 tests/data/fate/vsynth_lena-cinepak.avi
+e32d4103194665d2ea0f46d5cdd0cdf2 *tests/data/fate/vsynth_lena-cinepak.out.rawvideo
+stddev:   58.10 PSNR: 12.85 MAXDIFF:  185 bytes:  7603200/   921600

diff --git a/tests/ref/vsynth/vsynth_lena-ffvhuff420p12 b/tests/ref/vsynth/vsynth_lena-ffvhuff420p12
index e8ea4bc..e77698b 100644
--- a/tests/ref/vsynth/vsynth_lena-ffvhuff420p12
+++ b/tests/ref/vsynth/vsynth_lena-ffvhuff420p12

@@ -1,4 +1,4 @@
 b2f3d04ca30c113b79877bb5518dd6ea *tests/data/fate/vsynth_lena-ffvhuff420p12.avi
 10925580 tests/data/fate/vsynth_lena-ffvhuff420p12.avi
-08b3c6c70eba608bae926608ff253f2a *tests/data/fate/vsynth_lena-ffvhuff420p12.out.rawvideo
-stddev:    0.68 PSNR: 51.38 MAXDIFF:    1 bytes:  7603200/  7603200
+dde5895817ad9d219f79a52d0bdfb001 *tests/data/fate/vsynth_lena-ffvhuff420p12.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200

diff --git a/tests/ref/vsynth/vsynth_lena-vc2-420p10 b/tests/ref/vsynth/vsynth_lena-vc2-420p10
index 6428dd1..2558ef8 100644
--- a/tests/ref/vsynth/vsynth_lena-vc2-420p10
+++ b/tests/ref/vsynth/vsynth_lena-vc2-420p10

@@ -1,4 +1,4 @@
 5bccec653c330f03b90065a84fad9b4b *tests/data/fate/vsynth_lena-vc2-420p10.mov
 1154775 tests/data/fate/vsynth_lena-vc2-420p10.mov
-32265ec286c54104b3be8f11c519da1b *tests/data/fate/vsynth_lena-vc2-420p10.out.rawvideo
-stddev:    0.61 PSNR: 52.34 MAXDIFF:    1 bytes:  7603200/   760320
+b1c660113acab8eb4075f3d9fbb9cee9 *tests/data/fate/vsynth_lena-vc2-420p10.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/   760320

diff --git a/tests/ref/vsynth/vsynth_lena-vc2-420p12 b/tests/ref/vsynth/vsynth_lena-vc2-420p12
index e62b2b6..bc60116 100644
--- a/tests/ref/vsynth/vsynth_lena-vc2-420p12
+++ b/tests/ref/vsynth/vsynth_lena-vc2-420p12

@@ -1,4 +1,4 @@
 d27a6d3517cc9a6d22e338f4b206545c *tests/data/fate/vsynth_lena-vc2-420p12.mov
 1516759 tests/data/fate/vsynth_lena-vc2-420p12.mov
-bc62f1c3bade7224c55219dba8a3c6af *tests/data/fate/vsynth_lena-vc2-420p12.out.rawvideo
-stddev:    0.68 PSNR: 51.39 MAXDIFF:    1 bytes:  7603200/   760320
+b1c660113acab8eb4075f3d9fbb9cee9 *tests/data/fate/vsynth_lena-vc2-420p12.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/   760320

diff --git a/tests/ref/vsynth/vsynth_lena-vc2-t5_3 b/tests/ref/vsynth/vsynth_lena-vc2-t5_3
new file mode 100644
index 0000000..f5f865e
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-vc2-t5_3

@@ -0,0 +1,4 @@
+c2ce9a70c523903620794af6bc4ea0d4 *tests/data/fate/vsynth_lena-vc2-t5_3.mov
+1305436 tests/data/fate/vsynth_lena-vc2-t5_3.mov
+e5ea17416bda234ae58f27dea27e8135 *tests/data/fate/vsynth_lena-vc2-t5_3.out.rawvideo
+stddev:    0.30 PSNR: 58.58 MAXDIFF:    5 bytes:  7603200/   760320

diff --git a/tests/ref/vsynth/vsynth_lena-vc2-thaar b/tests/ref/vsynth/vsynth_lena-vc2-thaar
new file mode 100644
index 0000000..9c16bdf
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-vc2-thaar

@@ -0,0 +1,4 @@
+5f062ca6c56d14590507c4373ca942ab *tests/data/fate/vsynth_lena-vc2-thaar.mov
+1431772 tests/data/fate/vsynth_lena-vc2-thaar.mov
+e5ea17416bda234ae58f27dea27e8135 *tests/data/fate/vsynth_lena-vc2-thaar.out.rawvideo
+stddev:    0.30 PSNR: 58.58 MAXDIFF:    5 bytes:  7603200/   760320

diff --git a/tools/aviocat.c b/tools/aviocat.c
index 983108a..2aa08b9 100644
--- a/tools/aviocat.c
+++ b/tools/aviocat.c

@@ -42,7 +42,6 @@
     AVDictionary *in_opts = NULL;
     AVDictionary *out_opts = NULL;
 
-    av_register_all();
     avformat_network_init();
 
     for (i = 1; i < argc; i++) {
@@ -107,6 +106,11 @@
         if (n <= 0)
             break;
         avio_write(output, buf, n);
+        if (output->error) {
+            av_strerror(output->error, errbuf, sizeof(errbuf));
+            fprintf(stderr, "Unable to write %s: %s\n", output_url, errbuf);
+            break;
+        }
         stream_pos += n;
         if (bps) {
             avio_flush(output);

diff --git a/tools/bisect-create b/tools/bisect-create
index fc60e86..ee6ec3f 100755
--- a/tools/bisect-create
+++ b/tools/bisect-create

@@ -20,7 +20,7 @@
 case "$1" in
     need)
         case $2 in
-            ffmpeg|ffplay|ffprobe|ffserver)
+            ffmpeg|ffplay|ffprobe)
                 echo $2.c >> tools/bisect.need
             ;;
         esac

diff --git a/tools/cl2c b/tools/cl2c
new file mode 100755
index 0000000..e3f92ba
--- /dev/null
+++ b/tools/cl2c

@@ -0,0 +1,36 @@
+#!/bin/sh
+# Convert an OpenCL source file into a C source file containing the
+# OpenCL source as a C string.  Also adds a #line directive so that
+# compiler messages are useful.
+
+# This file is part of FFmpeg.
+#
+# FFmpeg is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# FFmpeg is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+input="$1"
+output="$2"
+
+name=$(basename "$input" | sed 's/.cl$//')
+
+cat >$output <<EOF
+// Generated from $input
+const char *ff_opencl_source_$name =
+"#line 1 \"$input\"\n"
+EOF
+
+# Convert \ to \\ and " to \", then add " to the start and end of the line.
+cat "$input" | sed 's/\\/\\\\/g;s/\"/\\\"/g;s/^/\"/;s/$/\\n\"/' >>$output
+
+echo ";" >>$output

diff --git a/tools/crypto_bench.c b/tools/crypto_bench.c
index 203bffe..aca8bbb 100644
--- a/tools/crypto_bench.c
+++ b/tools/crypto_bench.c

@@ -23,6 +23,7 @@
 #define USE_crypto           0x01    /* OpenSSL's libcrypto */
 #define USE_gcrypt           0x02    /* GnuTLS's libgcrypt */
 #define USE_tomcrypt         0x04    /* LibTomCrypt */
+#define USE_mbedcrypto       0x08    /* mbed TLS */
 
 #include <stdlib.h>
 #include <math.h>
@@ -311,23 +312,24 @@
 DEFINE_GCRYPT_WRAPPER(sha512,    SHA512)
 DEFINE_GCRYPT_WRAPPER(ripemd160, RMD160)
 
-#define DEFINE_GCRYPT_CYPHER_WRAPPER(suffix, cypher, sz)                            \
+#define DEFINE_GCRYPT_CYPHER_WRAPPER(suffix, cypher, mode, sz)                      \
 static void run_gcrypt_ ## suffix(uint8_t *output,                                  \
                               const uint8_t *input, unsigned size)                  \
 {                                                                                   \
     static gcry_cipher_hd_t suffix;                                                 \
     if (!suffix)                                                                    \
-        gcry_cipher_open(&suffix, GCRY_CIPHER_ ## cypher, GCRY_CIPHER_MODE_ECB, 0); \
+        gcry_cipher_open(&suffix, GCRY_CIPHER_ ## cypher, GCRY_CIPHER_MODE_ ## mode, 0); \
     gcry_cipher_setkey(suffix, hardcoded_key, sz);                                  \
     gcry_cipher_encrypt(suffix, output, size, input, size);                         \
 }
 
-DEFINE_GCRYPT_CYPHER_WRAPPER(aes128,   AES128,      16)
-DEFINE_GCRYPT_CYPHER_WRAPPER(blowfish, BLOWFISH,    16)
-DEFINE_GCRYPT_CYPHER_WRAPPER(camellia, CAMELLIA128, 16)
-DEFINE_GCRYPT_CYPHER_WRAPPER(cast128,  CAST5,       16)
-DEFINE_GCRYPT_CYPHER_WRAPPER(des,      DES,         8)
-DEFINE_GCRYPT_CYPHER_WRAPPER(twofish,  TWOFISH128,  16)
+DEFINE_GCRYPT_CYPHER_WRAPPER(aes128,   AES128,      ECB,    16)
+DEFINE_GCRYPT_CYPHER_WRAPPER(blowfish, BLOWFISH,    ECB,    16)
+DEFINE_GCRYPT_CYPHER_WRAPPER(camellia, CAMELLIA128, ECB,    16)
+DEFINE_GCRYPT_CYPHER_WRAPPER(cast128,  CAST5,       ECB,    16)
+DEFINE_GCRYPT_CYPHER_WRAPPER(des,      DES,         ECB,    8)
+DEFINE_GCRYPT_CYPHER_WRAPPER(twofish,  TWOFISH128,  ECB,    16)
+DEFINE_GCRYPT_CYPHER_WRAPPER(rc4,      ARCFOUR,     STREAM, 16)
 
 #define IMPL_USE_gcrypt(...) IMPL_USE(__VA_ARGS__)
 #else
@@ -335,6 +337,116 @@
 #endif
 
 /***************************************************************************
+ * mbedcrypto: mbed TLS
+ ***************************************************************************/
+
+#if (USE_EXT_LIBS) & USE_mbedcrypto
+
+#include <mbedtls/aes.h>
+#include <mbedtls/arc4.h>
+#include <mbedtls/blowfish.h>
+#include <mbedtls/camellia.h>
+#include <mbedtls/des.h>
+#include <mbedtls/md5.h>
+#include <mbedtls/ripemd160.h>
+#include <mbedtls/sha1.h>
+#include <mbedtls/sha256.h>
+#include <mbedtls/sha512.h>
+#include <mbedtls/xtea.h>
+
+#define DEFINE_MBEDCRYPTO_WRAPPER(suffix)                                  \
+static void run_mbedcrypto_ ## suffix(uint8_t *output,                     \
+                                      const uint8_t *input, unsigned size) \
+{                                                                          \
+    mbedtls_ ## suffix ## _ret(input, size, output);                       \
+}
+
+#define DEFINE_MBEDCRYPTO_WRAPPER_SHA2(suffix)                             \
+static void run_mbedcrypto_ ## suffix(uint8_t *output,                     \
+                                      const uint8_t *input, unsigned size) \
+{                                                                          \
+    mbedtls_ ## suffix ## _ret(input, size, output, 0);                    \
+}
+
+DEFINE_MBEDCRYPTO_WRAPPER(md5)
+DEFINE_MBEDCRYPTO_WRAPPER(ripemd160)
+DEFINE_MBEDCRYPTO_WRAPPER(sha1)
+DEFINE_MBEDCRYPTO_WRAPPER_SHA2(sha256)
+DEFINE_MBEDCRYPTO_WRAPPER_SHA2(sha512)
+
+
+#define DEFINE_MBEDCRYPTO_CYPHER_WRAPPER(suffix, cypher, algo)                  \
+static void run_mbedcrypto_ ## suffix(uint8_t *output,                          \
+                                      const uint8_t *input, unsigned size)      \
+{                                                                               \
+    mbedtls_ ## cypher ## _context cypher;                                      \
+                                                                                \
+    mbedtls_ ## cypher ## _init(&cypher);                                       \
+    mbedtls_ ## cypher ## _setkey_enc(&cypher, hardcoded_key, 128);             \
+    for (int i = 0; i < size; i += 16)                                          \
+        mbedtls_ ## cypher ## _crypt_ecb(&cypher, MBEDTLS_ ## algo ## _ENCRYPT, \
+                                         input + i, output + i);                \
+    mbedtls_ ## cypher ## _free(&cypher);                                       \
+}
+
+DEFINE_MBEDCRYPTO_CYPHER_WRAPPER(aes128, aes, AES)
+DEFINE_MBEDCRYPTO_CYPHER_WRAPPER(camellia, camellia, CAMELLIA)
+
+static void run_mbedcrypto_blowfish(uint8_t *output,
+                                    const uint8_t *input, unsigned size)
+{
+    mbedtls_blowfish_context blowfish;
+
+    mbedtls_blowfish_init(&blowfish);
+    mbedtls_blowfish_setkey(&blowfish, hardcoded_key, 128);
+    for (int i = 0; i < size; i += 8)
+        mbedtls_blowfish_crypt_ecb(&blowfish, MBEDTLS_BLOWFISH_ENCRYPT,
+                                   input + i, output + i);
+    mbedtls_blowfish_free(&blowfish);
+}
+
+static void run_mbedcrypto_des(uint8_t *output,
+                               const uint8_t *input, unsigned size)
+{
+    mbedtls_des_context des;
+
+    mbedtls_des_init(&des);
+    mbedtls_des_setkey_enc(&des, hardcoded_key);
+    for (int i = 0; i < size; i += 8)
+        mbedtls_des_crypt_ecb(&des, input + i, output + i);
+    mbedtls_des_free(&des);
+}
+
+static void run_mbedcrypto_rc4(uint8_t *output,
+                               const uint8_t *input, unsigned size)
+{
+    mbedtls_arc4_context rc4;
+
+    mbedtls_arc4_init(&rc4);
+    mbedtls_arc4_setup(&rc4, hardcoded_key, 16);
+    mbedtls_arc4_crypt(&rc4, size, input, output);
+    mbedtls_arc4_free(&rc4);
+}
+
+static void run_mbedcrypto_xtea(uint8_t *output,
+                                const uint8_t *input, unsigned size)
+{
+    mbedtls_xtea_context xtea;
+
+    mbedtls_xtea_init(&xtea);
+    mbedtls_xtea_setup(&xtea, hardcoded_key);
+    for (int i = 0; i < size; i += 8)
+        mbedtls_xtea_crypt_ecb(&xtea, MBEDTLS_XTEA_ENCRYPT,
+                               input + i, output + i);
+    mbedtls_xtea_free(&xtea);
+}
+
+#define IMPL_USE_mbedcrypto(...) IMPL_USE(__VA_ARGS__)
+#else
+#define IMPL_USE_mbedcrypto(...) /* ignore */
+#endif
+
+/***************************************************************************
  * tomcrypt: LibTomCrypt
  ***************************************************************************/
 
@@ -416,6 +528,16 @@
         des_ecb_encrypt(input + i, output + i, &des);
 }
 
+static void run_tomcrypt_rc4(uint8_t *output,
+                             const uint8_t *input, unsigned size)
+{
+    rc4_state rc4;
+
+    rc4_stream_setup(&rc4, hardcoded_key, 16);
+    rc4_stream_crypt(&rc4, input, size, output);
+    rc4_stream_done(&rc4);
+}
+
 static void run_tomcrypt_twofish(uint8_t *output,
                                 const uint8_t *input, unsigned size)
 {
@@ -512,6 +634,7 @@
     IMPL(lavu,       __VA_ARGS__) \
     IMPL(crypto,     __VA_ARGS__) \
     IMPL(gcrypt,     __VA_ARGS__) \
+    IMPL(mbedcrypto, __VA_ARGS__) \
     IMPL(tomcrypt,   __VA_ARGS__)
 
 struct hash_impl implementations[] = {
@@ -525,15 +648,18 @@
     IMPL_ALL("RIPEMD-160", ripemd160, "62a5321e4fc8784903bb43ab7752c75f8b25af00")
     IMPL_ALL("AES-128",    aes128,    "crc:ff6bc888")
     IMPL_ALL("CAMELLIA",   camellia,  "crc:7abb59a7")
-    IMPL_ALL("CAST-128",   cast128,   "crc:456aa584")
+    IMPL(lavu,     "CAST-128", cast128, "crc:456aa584")
+    IMPL(crypto,   "CAST-128", cast128, "crc:456aa584")
+    IMPL(gcrypt,   "CAST-128", cast128, "crc:456aa584")
+    IMPL(tomcrypt, "CAST-128", cast128, "crc:456aa584")
     IMPL_ALL("BLOWFISH",   blowfish,  "crc:33e8aa74")
     IMPL_ALL("DES",        des,       "crc:31291e0b")
     IMPL(lavu,     "TWOFISH", twofish, "crc:9edbd5c1")
     IMPL(gcrypt,   "TWOFISH", twofish, "crc:9edbd5c1")
     IMPL(tomcrypt, "TWOFISH", twofish, "crc:9edbd5c1")
-    IMPL(lavu,     "RC4",     rc4,     "crc:538d37b2")
-    IMPL(crypto,   "RC4",     rc4,     "crc:538d37b2")
+    IMPL_ALL("RC4",           rc4,     "crc:538d37b2")
     IMPL(lavu,     "XTEA",    xtea,    "crc:931fc270")
+    IMPL(mbedcrypto, "XTEA",  xtea,    "crc:931fc270")
     IMPL(tomcrypt, "XTEA",    xtea,    "crc:931fc270")
 };
 
@@ -561,15 +687,16 @@
                     argv[0]);
             if ((USE_EXT_LIBS)) {
                 char buf[1024];
-                snprintf(buf, sizeof(buf), "%s%s%s",
+                snprintf(buf, sizeof(buf), "%s%s%s%s",
                          ((USE_EXT_LIBS) & USE_crypto)   ? "+crypto"   : "",
                          ((USE_EXT_LIBS) & USE_gcrypt)   ? "+gcrypt"   : "",
+                         ((USE_EXT_LIBS) & USE_mbedcrypto) ? "+mbedcrypto" : "",
                          ((USE_EXT_LIBS) & USE_tomcrypt) ? "+tomcrypt" : "");
                 fprintf(stderr, "Built with the following external libraries:\n"
                         "make VERSUS=%s\n", buf + 1);
             } else {
                 fprintf(stderr, "Built without external libraries; use\n"
-                        "make VERSUS=crypto+gcrypt+tomcrypt tools/crypto_bench\n"
+                        "make VERSUS=crypto+gcrypt+mbedcrypto+tomcrypt tools/crypto_bench\n"
                         "to enable them.\n");
             }
             exit(opt != 'h');

diff --git a/tools/enum_options.c b/tools/enum_options.c
index c2a295c..77e1f9f 100644
--- a/tools/enum_options.c
+++ b/tools/enum_options.c

@@ -130,8 +130,6 @@
     if (argc < 2)
         print_usage();
 
-    av_register_all();
-
     if (!strcmp(argv[1], "format"))
         show_format_opts();
     else if (!strcmp(argv[1], "codec"))

diff --git a/tools/graph2dot.c b/tools/graph2dot.c
index 21d0795..d5c1e4e 100644
--- a/tools/graph2dot.c
+++ b/tools/graph2dot.c

@@ -189,8 +189,6 @@
         *p = '\0';
     }
 
-    avfilter_register_all();
-
     if (avfilter_graph_parse(graph, graph_string, NULL, NULL, NULL) < 0) {
         fprintf(stderr, "Failed to parse the graph description\n");
         return 1;

diff --git a/tools/ismindex.c b/tools/ismindex.c
index 0254a98..7601f62 100644
--- a/tools/ismindex.c
+++ b/tools/ismindex.c

@@ -791,8 +791,6 @@
     int split = 0, ismf = 0, i;
     struct Tracks tracks = { 0, .video_track = -1, .audio_track = -1 };
 
-    av_register_all();
-
     for (i = 1; i < argc; i++) {
         if (!strcmp(argv[i], "-n")) {
             basename = argv[i + 1];

diff --git a/tools/patcheck b/tools/patcheck
index 26137d6..101a542 100755
--- a/tools/patcheck
+++ b/tools/patcheck

@@ -68,7 +68,7 @@
 cat $TMP
 hiegrep '# *ifdef * (HAVE|CONFIG)_' 'ifdefs that should be #if' $*
 
-hiegrep '\b(awnser|cant|dont|wont|doesnt|usefull|successfull|occured|teh|alot|wether|skiped|skiping|heigth|informations|colums|loosy|loosing|ouput|seperate|preceed|upto|paket|posible|unkown|inpossible|dimention|acheive|funtions|overriden|outputing|seperation|initalize|compatibilty|bistream|knwon|unknwon|choosen|additonal|gurantee|availble|wich|begining|milisecond|missmatch)\b' 'common typos' $*
+hiegrep '\b(awnser|cant|dont|wont|doesnt|usefull|successfull|occured|teh|alot|wether|skiped|skiping|heigth|informations|colums|loosy|loosing|ouput|seperate|preceed|upto|paket|posible|unkown|inpossible|dimention|acheive|funtions|overriden|outputing|seperation|initalize|compatibilty|bistream|knwon|unknwon|choosen|additonal|gurantee|availble|wich|begining|milisecond|missmatch|threshhold)\b' 'common typos' $*
 
 hiegrep 'av_log\( *NULL' 'Missing context in av_log' $*
 hiegrep '[^sn]printf' 'Please use av_log' $*

diff --git a/tools/pktdumper.c b/tools/pktdumper.c
index 6516ad3..16a965b 100644
--- a/tools/pktdumper.c
+++ b/tools/pktdumper.c

@@ -89,9 +89,6 @@
     strcat(fntemplate, PKTFILESUFF);
     printf("FNTEMPLATE: '%s'\n", fntemplate);
 
-    // register all file formats
-    av_register_all();
-
     err = avformat_open_input(&fctx, argv[1], NULL, NULL);
     if (err < 0) {
         fprintf(stderr, "cannot open input: error %d\n", err);

diff --git a/tools/probetest.c b/tools/probetest.c
index 74045eb..2c6c1de 100644
--- a/tools/probetest.c
+++ b/tools/probetest.c

@@ -124,9 +124,6 @@
         return 1;
     }
 
-    avcodec_register_all();
-    av_register_all();
-
     av_lfg_init(&state, 0xdeadbeef);
 
     pd.buf = NULL;

diff --git a/tools/qt-faststart.c b/tools/qt-faststart.c
index 728f80c..46950a5 100644
--- a/tools/qt-faststart.c
+++ b/tools/qt-faststart.c

@@ -28,11 +28,9 @@
 #include <stdlib.h>
 #include <inttypes.h>
 #include <string.h>
+#include <limits.h>
 
-#ifdef __MINGW32CE__
-#define fseeko(x, y, z) fseek(x, y, z)
-#define ftello(x)       ftell(x)
-#elif defined(__MINGW32__)
+#ifdef __MINGW32__
 #undef fseeko
 #define fseeko(x, y, z) fseeko64(x, y, z)
 #undef ftello
@@ -46,8 +44,6 @@
 
 #define MIN(a,b) ((a) > (b) ? (b) : (a))
 
-#define BE_16(x) ((((uint8_t*)(x))[0] <<  8) | ((uint8_t*)(x))[1])
-
 #define BE_32(x) (((uint32_t)(((uint8_t*)(x))[0]) << 24) |  \
                              (((uint8_t*)(x))[1]  << 16) |  \
                              (((uint8_t*)(x))[2]  <<  8) |  \
@@ -62,6 +58,18 @@
                   ((uint64_t)(((uint8_t*)(x))[6]) <<  8) |  \
                   ((uint64_t)( (uint8_t*)(x))[7]))
 
+#define AV_WB32(p, val)    {                    \
+    ((uint8_t*)(p))[0] = ((val) >> 24) & 0xff;  \
+    ((uint8_t*)(p))[1] = ((val) >> 16) & 0xff;  \
+    ((uint8_t*)(p))[2] = ((val) >> 8) & 0xff;   \
+    ((uint8_t*)(p))[3] = (val) & 0xff;          \
+    }
+
+#define AV_WB64(p, val)    {                    \
+    AV_WB32(p, (val) >> 32)                     \
+    AV_WB32(p + 4, val)                         \
+    }
+
 #define BE_FOURCC(ch0, ch1, ch2, ch3)           \
     ( (uint32_t)(unsigned char)(ch3)        |   \
      ((uint32_t)(unsigned char)(ch2) <<  8) |   \
@@ -82,11 +90,342 @@
 #define UUID_ATOM QT_ATOM('u', 'u', 'i', 'd')
 
 #define CMOV_ATOM QT_ATOM('c', 'm', 'o', 'v')
+#define TRAK_ATOM QT_ATOM('t', 'r', 'a', 'k')
+#define MDIA_ATOM QT_ATOM('m', 'd', 'i', 'a')
+#define MINF_ATOM QT_ATOM('m', 'i', 'n', 'f')
+#define STBL_ATOM QT_ATOM('s', 't', 'b', 'l')
 #define STCO_ATOM QT_ATOM('s', 't', 'c', 'o')
 #define CO64_ATOM QT_ATOM('c', 'o', '6', '4')
 
 #define ATOM_PREAMBLE_SIZE    8
 #define COPY_BUFFER_SIZE   33554432
+#define MAX_FTYP_ATOM_SIZE 1048576
+
+typedef struct {
+    uint32_t type;
+    uint32_t header_size;
+    uint64_t size;
+    unsigned char *data;
+} atom_t;
+
+typedef struct {
+    uint64_t moov_atom_size;
+    uint64_t stco_offset_count;
+    uint64_t stco_data_size;
+    int stco_overflow;
+    uint32_t depth;
+} update_chunk_offsets_context_t;
+
+typedef struct {
+    unsigned char *dest;
+    uint64_t original_moov_size;
+    uint64_t new_moov_size;
+} upgrade_stco_context_t;
+
+typedef int (*parse_atoms_callback_t)(void *context, atom_t *atom);
+
+static int parse_atoms(
+    unsigned char *buf,
+    uint64_t size,
+    parse_atoms_callback_t callback,
+    void *context)
+{
+    unsigned char *pos = buf;
+    unsigned char *end = pos + size;
+    atom_t atom;
+    int ret;
+
+    while (end - pos >= ATOM_PREAMBLE_SIZE) {
+        atom.size = BE_32(pos);
+        atom.type = BE_32(pos + 4);
+        pos += ATOM_PREAMBLE_SIZE;
+        atom.header_size = ATOM_PREAMBLE_SIZE;
+
+        switch (atom.size) {
+        case 1:
+            if (end - pos < 8) {
+                fprintf(stderr, "not enough room for 64 bit atom size\n");
+                return -1;
+            }
+
+            atom.size = BE_64(pos);
+            pos += 8;
+            atom.header_size = ATOM_PREAMBLE_SIZE + 8;
+            break;
+
+        case 0:
+            atom.size = ATOM_PREAMBLE_SIZE + end - pos;
+            break;
+        }
+
+        if (atom.size < atom.header_size) {
+            fprintf(stderr, "atom size %"PRIu64" too small\n", atom.size);
+            return -1;
+        }
+
+        atom.size -= atom.header_size;
+
+        if (atom.size > end - pos) {
+            fprintf(stderr, "atom size %"PRIu64" too big\n", atom.size);
+            return -1;
+        }
+
+        atom.data = pos;
+        ret = callback(context, &atom);
+        if (ret < 0) {
+            return ret;
+        }
+
+        pos += atom.size;
+    }
+
+    return 0;
+}
+
+static int update_stco_offsets(update_chunk_offsets_context_t *context, atom_t *atom)
+{
+    uint32_t current_offset;
+    uint32_t offset_count;
+    unsigned char *pos;
+    unsigned char *end;
+
+    printf(" patching stco atom...\n");
+    if (atom->size < 8) {
+        fprintf(stderr, "stco atom size %"PRIu64" too small\n", atom->size);
+        return -1;
+    }
+
+    offset_count = BE_32(atom->data + 4);
+    if (offset_count > (atom->size - 8) / 4) {
+        fprintf(stderr, "stco offset count %"PRIu32" too big\n", offset_count);
+        return -1;
+    }
+
+    context->stco_offset_count += offset_count;
+    context->stco_data_size += atom->size - 8;
+
+    for (pos = atom->data + 8, end = pos + offset_count * 4;
+        pos < end;
+        pos += 4) {
+        current_offset = BE_32(pos);
+        if (current_offset > UINT_MAX - context->moov_atom_size) {
+            context->stco_overflow = 1;
+        }
+        current_offset += context->moov_atom_size;
+        AV_WB32(pos, current_offset);
+    }
+
+    return 0;
+}
+
+static int update_co64_offsets(update_chunk_offsets_context_t *context, atom_t *atom)
+{
+    uint64_t current_offset;
+    uint32_t offset_count;
+    unsigned char *pos;
+    unsigned char *end;
+
+    printf(" patching co64 atom...\n");
+    if (atom->size < 8) {
+        fprintf(stderr, "co64 atom size %"PRIu64" too small\n", atom->size);
+        return -1;
+    }
+
+    offset_count = BE_32(atom->data + 4);
+    if (offset_count > (atom->size - 8) / 8) {
+        fprintf(stderr, "co64 offset count %"PRIu32" too big\n", offset_count);
+        return -1;
+    }
+
+    for (pos = atom->data + 8, end = pos + offset_count * 8;
+        pos < end;
+        pos += 8) {
+        current_offset = BE_64(pos);
+        current_offset += context->moov_atom_size;
+        AV_WB64(pos, current_offset);
+    }
+
+    return 0;
+}
+
+static int update_chunk_offsets_callback(void *ctx, atom_t *atom)
+{
+    update_chunk_offsets_context_t *context = ctx;
+    int ret;
+
+    switch (atom->type) {
+    case STCO_ATOM:
+        return update_stco_offsets(context, atom);
+
+    case CO64_ATOM:
+        return update_co64_offsets(context, atom);
+
+    case MOOV_ATOM:
+    case TRAK_ATOM:
+    case MDIA_ATOM:
+    case MINF_ATOM:
+    case STBL_ATOM:
+        context->depth++;
+        if (context->depth > 10) {
+            fprintf(stderr, "atoms too deeply nested\n");
+            return -1;
+        }
+
+        ret = parse_atoms(
+            atom->data,
+            atom->size,
+            update_chunk_offsets_callback,
+            context);
+        context->depth--;
+        return ret;
+    }
+
+    return 0;
+}
+
+static void set_atom_size(unsigned char *header, uint32_t header_size, uint64_t size)
+{
+    switch (header_size) {
+    case 8:
+        AV_WB32(header, size);
+        break;
+
+    case 16:
+        AV_WB64(header + 8, size);
+        break;
+    }
+}
+
+static void upgrade_stco_atom(upgrade_stco_context_t *context, atom_t *atom)
+{
+    unsigned char *pos;
+    unsigned char *end;
+    uint64_t new_offset;
+    uint32_t offset_count;
+    uint32_t original_offset;
+
+    /* Note: not performing validations since they were performed on the first pass */
+
+    offset_count = BE_32(atom->data + 4);
+
+    /* write the header */
+    memcpy(context->dest, atom->data - atom->header_size, atom->header_size + 8);
+    AV_WB32(context->dest + 4, CO64_ATOM);
+    set_atom_size(context->dest, atom->header_size, atom->header_size + 8 + offset_count * 8);
+    context->dest += atom->header_size + 8;
+
+    /* write the data */
+    for (pos = atom->data + 8, end = pos + offset_count * 4;
+        pos < end;
+        pos += 4) {
+        original_offset = BE_32(pos) - context->original_moov_size;
+        new_offset = (uint64_t)original_offset + context->new_moov_size;
+        AV_WB64(context->dest, new_offset);
+        context->dest += 8;
+    }
+}
+
+static int upgrade_stco_callback(void *ctx, atom_t *atom)
+{
+    upgrade_stco_context_t *context = ctx;
+    unsigned char *start_pos;
+    uint64_t copy_size;
+
+    switch (atom->type) {
+    case STCO_ATOM:
+        upgrade_stco_atom(context, atom);
+        break;
+
+    case MOOV_ATOM:
+    case TRAK_ATOM:
+    case MDIA_ATOM:
+    case MINF_ATOM:
+    case STBL_ATOM:
+        /* write the atom header */
+        memcpy(context->dest, atom->data - atom->header_size, atom->header_size);
+        start_pos = context->dest;
+        context->dest += atom->header_size;
+
+        /* parse internal atoms*/
+        if (parse_atoms(
+            atom->data,
+            atom->size,
+            upgrade_stco_callback,
+            context) < 0) {
+            return -1;
+        }
+
+        /* update the atom size */
+        set_atom_size(start_pos, atom->header_size, context->dest - start_pos);
+        break;
+
+    default:
+        copy_size = atom->header_size + atom->size;
+        memcpy(context->dest, atom->data - atom->header_size, copy_size);
+        context->dest += copy_size;
+        break;
+    }
+
+    return 0;
+}
+
+static int update_moov_atom(
+    unsigned char **moov_atom,
+    uint64_t *moov_atom_size)
+{
+    update_chunk_offsets_context_t update_context = { 0 };
+    upgrade_stco_context_t upgrade_context;
+    unsigned char *new_moov_atom;
+
+    update_context.moov_atom_size = *moov_atom_size;
+
+    if (parse_atoms(
+        *moov_atom,
+        *moov_atom_size,
+        update_chunk_offsets_callback,
+        &update_context) < 0) {
+        return -1;
+    }
+
+    if (!update_context.stco_overflow) {
+        return 0;
+    }
+
+    printf(" upgrading stco atoms to co64...\n");
+    upgrade_context.new_moov_size = *moov_atom_size +
+        update_context.stco_offset_count * 8 -
+        update_context.stco_data_size;
+
+    new_moov_atom = malloc(upgrade_context.new_moov_size);
+    if (new_moov_atom == NULL) {
+        fprintf(stderr, "could not allocate %"PRIu64" bytes for updated moov atom\n",
+            upgrade_context.new_moov_size);
+        return -1;
+    }
+
+    upgrade_context.original_moov_size = *moov_atom_size;
+    upgrade_context.dest = new_moov_atom;
+
+    if (parse_atoms(
+        *moov_atom,
+        *moov_atom_size,
+        upgrade_stco_callback,
+        &upgrade_context) < 0) {
+        free(new_moov_atom);
+        return -1;
+    }
+
+    free(*moov_atom);
+    *moov_atom = new_moov_atom;
+    *moov_atom_size = upgrade_context.new_moov_size;
+
+    if (upgrade_context.dest != *moov_atom + *moov_atom_size) {
+        fprintf(stderr, "unexpected - wrong number of moov bytes written\n");
+        return -1;
+    }
+
+    return 0;
+}
 
 int main(int argc, char *argv[])
 {
@@ -101,12 +440,11 @@
     unsigned char *ftyp_atom = NULL;
     uint64_t moov_atom_size;
     uint64_t ftyp_atom_size = 0;
-    uint64_t i, j;
-    uint32_t offset_count;
-    uint64_t current_offset;
     int64_t start_offset = 0;
     unsigned char *copy_buffer = NULL;
     int bytes_to_copy;
+    uint64_t free_size = 0;
+    uint64_t moov_size = 0;
 
     if (argc != 3) {
         printf("Usage: qt-faststart <infile.mov> <outfile.mov>\n"
@@ -136,11 +474,16 @@
 
         /* keep ftyp atom */
         if (atom_type == FTYP_ATOM) {
+            if (atom_size > MAX_FTYP_ATOM_SIZE) {
+                fprintf(stderr, "ftyp atom size %"PRIu64" too big\n",
+                       atom_size);
+                goto error_out;
+            }
             ftyp_atom_size = atom_size;
             free(ftyp_atom);
             ftyp_atom = malloc(ftyp_atom_size);
             if (!ftyp_atom) {
-                printf("could not allocate %"PRIu64" bytes for ftyp atom\n",
+                fprintf(stderr, "could not allocate %"PRIu64" bytes for ftyp atom\n",
                        atom_size);
                 goto error_out;
             }
@@ -184,7 +527,7 @@
             (atom_type != PICT_ATOM) &&
             (atom_type != UUID_ATOM) &&
             (atom_type != FTYP_ATOM)) {
-            printf("encountered non-QT top-level atom (is this a QuickTime file?)\n");
+            fprintf(stderr, "encountered non-QT top-level atom (is this a QuickTime file?)\n");
             break;
         }
         atom_offset += atom_size;
@@ -194,6 +537,15 @@
          * able to continue scanning sensibly after this atom, so break. */
         if (atom_size < 8)
             break;
+
+        if (atom_type == MOOV_ATOM)
+            moov_size = atom_size;
+
+        if (moov_size && atom_type == FREE_ATOM) {
+            free_size += atom_size;
+            atom_type = MOOV_ATOM;
+            atom_size = moov_size;
+        }
     }
 
     if (atom_type != MOOV_ATOM) {
@@ -203,9 +555,14 @@
         return 0;
     }
 
+    if (atom_size < 16) {
+        fprintf(stderr, "bad moov atom size\n");
+        goto error_out;
+    }
+
     /* moov atom was, in fact, the last atom in the chunk; load the whole
      * moov atom */
-    if (fseeko(infile, -atom_size, SEEK_END)) {
+    if (fseeko(infile, -(atom_size + free_size), SEEK_END)) {
         perror(argv[1]);
         goto error_out;
     }
@@ -217,7 +574,7 @@
     moov_atom_size = atom_size;
     moov_atom      = malloc(moov_atom_size);
     if (!moov_atom) {
-        printf("could not allocate %"PRIu64" bytes for moov atom\n", atom_size);
+        fprintf(stderr, "could not allocate %"PRIu64" bytes for moov atom\n", atom_size);
         goto error_out;
     }
     if (fread(moov_atom, atom_size, 1, infile) != 1) {
@@ -228,7 +585,7 @@
     /* this utility does not support compressed atoms yet, so disqualify
      * files with compressed QT atoms */
     if (BE_32(&moov_atom[12]) == CMOV_ATOM) {
-        printf("this utility does not support compressed moov atoms yet\n");
+        fprintf(stderr, "this utility does not support compressed moov atoms yet\n");
         goto error_out;
     }
 
@@ -236,56 +593,8 @@
     fclose(infile);
     infile = NULL;
 
-    /* crawl through the moov chunk in search of stco or co64 atoms */
-    for (i = 4; i < moov_atom_size - 4; i++) {
-        atom_type = BE_32(&moov_atom[i]);
-        if (atom_type == STCO_ATOM) {
-            printf(" patching stco atom...\n");
-            atom_size = BE_32(&moov_atom[i - 4]);
-            if (i + atom_size - 4 > moov_atom_size) {
-                printf(" bad atom size\n");
-                goto error_out;
-            }
-            offset_count = BE_32(&moov_atom[i + 8]);
-            if (i + 12 + offset_count * UINT64_C(4) > moov_atom_size) {
-                printf(" bad atom size/element count\n");
-                goto error_out;
-            }
-            for (j = 0; j < offset_count; j++) {
-                current_offset  = BE_32(&moov_atom[i + 12 + j * 4]);
-                current_offset += moov_atom_size;
-                moov_atom[i + 12 + j * 4 + 0] = (current_offset >> 24) & 0xFF;
-                moov_atom[i + 12 + j * 4 + 1] = (current_offset >> 16) & 0xFF;
-                moov_atom[i + 12 + j * 4 + 2] = (current_offset >>  8) & 0xFF;
-                moov_atom[i + 12 + j * 4 + 3] = (current_offset >>  0) & 0xFF;
-            }
-            i += atom_size - 4;
-        } else if (atom_type == CO64_ATOM) {
-            printf(" patching co64 atom...\n");
-            atom_size = BE_32(&moov_atom[i - 4]);
-            if (i + atom_size - 4 > moov_atom_size) {
-                printf(" bad atom size\n");
-                goto error_out;
-            }
-            offset_count = BE_32(&moov_atom[i + 8]);
-            if (i + 12 + offset_count * UINT64_C(8) > moov_atom_size) {
-                printf(" bad atom size/element count\n");
-                goto error_out;
-            }
-            for (j = 0; j < offset_count; j++) {
-                current_offset  = BE_64(&moov_atom[i + 12 + j * 8]);
-                current_offset += moov_atom_size;
-                moov_atom[i + 12 + j * 8 + 0] = (current_offset >> 56) & 0xFF;
-                moov_atom[i + 12 + j * 8 + 1] = (current_offset >> 48) & 0xFF;
-                moov_atom[i + 12 + j * 8 + 2] = (current_offset >> 40) & 0xFF;
-                moov_atom[i + 12 + j * 8 + 3] = (current_offset >> 32) & 0xFF;
-                moov_atom[i + 12 + j * 8 + 4] = (current_offset >> 24) & 0xFF;
-                moov_atom[i + 12 + j * 8 + 5] = (current_offset >> 16) & 0xFF;
-                moov_atom[i + 12 + j * 8 + 6] = (current_offset >>  8) & 0xFF;
-                moov_atom[i + 12 + j * 8 + 7] = (current_offset >>  0) & 0xFF;
-            }
-            i += atom_size - 4;
-        }
+    if (update_moov_atom(&moov_atom, &moov_atom_size) < 0) {
+        goto error_out;
     }
 
     /* re-open the input file and open the output file */
@@ -330,7 +639,7 @@
     bytes_to_copy = MIN(COPY_BUFFER_SIZE, last_offset);
     copy_buffer = malloc(bytes_to_copy);
     if (!copy_buffer) {
-        printf("could not allocate %d bytes for copy_buffer\n", bytes_to_copy);
+        fprintf(stderr, "could not allocate %d bytes for copy_buffer\n", bytes_to_copy);
         goto error_out;
     }
     printf(" copying rest of file...\n");

diff --git a/tools/seek_print.c b/tools/seek_print.c
index de876b4..9280421 100644
--- a/tools/seek_print.c
+++ b/tools/seek_print.c

@@ -65,7 +65,6 @@
     argv++;
     argc--;
 
-    av_register_all();
     if ((ret = avformat_open_input(&avf, filename, NULL, NULL)) < 0) {
         fprintf(stderr, "%s: %s\n", filename, av_err2str(ret));
         return 1;

diff --git a/tools/sidxindex.c b/tools/sidxindex.c
index be28461..a4f9551 100644
--- a/tools/sidxindex.c
+++ b/tools/sidxindex.c

@@ -363,8 +363,6 @@
     struct Tracks tracks = { 0 };
     int i;
 
-    av_register_all();
-
     for (i = 1; i < argc; i++) {
         if (!strcmp(argv[i], "-out")) {
             out = argv[i + 1];

diff --git a/tools/target_dec_fuzzer.c b/tools/target_dec_fuzzer.c
index a8696f9..a94d5e6 100644
--- a/tools/target_dec_fuzzer.c
+++ b/tools/target_dec_fuzzer.c

@@ -56,6 +56,8 @@
 
 int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
 
+extern AVCodec * codec_list[];
+
 static void error(const char *err)
 {
     fprintf(stderr, "%s", err);
@@ -99,7 +101,7 @@
 static void FDBDesroy(FuzzDataBuffer *FDB) { av_free(FDB->data_); }
 
 static void FDBRealloc(FuzzDataBuffer *FDB, size_t size) {
-    size_t needed = size + FF_INPUT_BUFFER_PADDING_SIZE;
+    size_t needed = size + AV_INPUT_BUFFER_PADDING_SIZE;
     av_assert0(needed > size);
     if (needed > FDB->size_) {
         av_free(FDB->data_);
@@ -116,8 +118,8 @@
     FDBRealloc(FDB, size);
     memcpy(FDB->data_, data, size);
     size_t padd = FDB->size_ - size;
-    if (padd > FF_INPUT_BUFFER_PADDING_SIZE)
-        padd = FF_INPUT_BUFFER_PADDING_SIZE;
+    if (padd > AV_INPUT_BUFFER_PADDING_SIZE)
+        padd = AV_INPUT_BUFFER_PADDING_SIZE;
     memset(FDB->data_ + size, 0, padd);
     av_init_packet(dst);
     dst->data = FDB->data_;
@@ -138,12 +140,15 @@
     int (*decode_handler)(AVCodecContext *avctx, AVFrame *picture,
                           int *got_picture_ptr,
                           const AVPacket *avpkt) = NULL;
+    AVCodecParserContext *parser = NULL;
+
 
     if (!c) {
 #ifdef FFMPEG_DECODER
 #define DECODER_SYMBOL0(CODEC) ff_##CODEC##_decoder
 #define DECODER_SYMBOL(CODEC) DECODER_SYMBOL0(CODEC)
         extern AVCodec DECODER_SYMBOL(FFMPEG_DECODER);
+        codec_list[0] = &DECODER_SYMBOL(FFMPEG_DECODER);
         avcodec_register(&DECODER_SYMBOL(FFMPEG_DECODER));
 
         c = &DECODER_SYMBOL(FFMPEG_DECODER);
@@ -154,10 +159,6 @@
         av_log_set_level(AV_LOG_PANIC);
     }
 
-    // Unsupported
-    if (c->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU)
-        return 0;
-
     switch (c->type) {
     case AVMEDIA_TYPE_AUDIO   : decode_handler = avcodec_decode_audio4; break;
     case AVMEDIA_TYPE_VIDEO   : decode_handler = avcodec_decode_video2; break;
@@ -165,7 +166,8 @@
     }
 
     AVCodecContext* ctx = avcodec_alloc_context3(NULL);
-    if (!ctx)
+    AVCodecContext* parser_avctx = avcodec_alloc_context3(NULL);
+    if (!ctx || !parser_avctx)
         error("Failed memory allocation");
 
     ctx->max_pixels = 4096 * 4096; //To reduce false positive OOM and hangs
@@ -177,6 +179,9 @@
         ctx->height                             = bytestream2_get_le32(&gbc);
         ctx->bit_rate                           = bytestream2_get_le64(&gbc);
         ctx->bits_per_coded_sample              = bytestream2_get_le32(&gbc);
+        // Try to initialize a parser for this codec, note, this may fail which just means we test without one
+        if (bytestream2_get_byte(&gbc) & 1)
+            parser = av_parser_init(c->id);
         if (av_image_check_size(ctx->width, ctx->height, 0, ctx))
             ctx->width = ctx->height = 0;
         size -= 1024;
@@ -185,8 +190,10 @@
     int res = avcodec_open2(ctx, c, NULL);
     if (res < 0) {
         av_free(ctx);
+        av_free(parser_avctx);
         return 0; // Failure of avcodec_open2() does not imply that a issue was found
     }
+    parser_avctx->codec_id = ctx->codec_id;
 
     FDBCreate(&buffer);
     int got_frame;
@@ -195,7 +202,7 @@
         error("Failed memory allocation");
 
     // Read very simple container
-    AVPacket avpkt;
+    AVPacket avpkt, parsepkt;
     while (data < end && it < maxiteration) {
         // Search for the TAG
         while (data + sizeof(fuzz_tag) < end) {
@@ -206,12 +213,34 @@
         if (data + sizeof(fuzz_tag) > end)
             data = end;
 
-        FDBPrepare(&buffer, &avpkt, last, data - last);
+        FDBPrepare(&buffer, &parsepkt, last, data - last);
         data += sizeof(fuzz_tag);
         last = data;
 
-        // Iterate through all data
-        while (avpkt.size > 0 && it++ < maxiteration) {
+        while (parsepkt.size > 0) {
+
+            if (parser) {
+                av_init_packet(&avpkt);
+                int ret = av_parser_parse2(parser, parser_avctx, &avpkt.data, &avpkt.size,
+                                           parsepkt.data, parsepkt.size,
+                                           parsepkt.pts, parsepkt.dts, parsepkt.pos);
+                parsepkt.data += ret;
+                parsepkt.size -= ret;
+                parsepkt.pos  += ret;
+                avpkt.pts = parser->pts;
+                avpkt.dts = parser->dts;
+                avpkt.pos = parser->pos;
+                if ( parser->key_frame == 1 ||
+                    (parser->key_frame == -1 && parser->pict_type == AV_PICTURE_TYPE_I))
+                    avpkt.flags |= AV_PKT_FLAG_KEY;
+                avpkt.flags |= parsepkt.flags & AV_PKT_FLAG_DISCARD;
+            } else {
+                avpkt = parsepkt;
+                parsepkt.size = 0;
+            }
+
+          // Iterate through all data
+          while (avpkt.size > 0 && it++ < maxiteration) {
             av_frame_unref(frame);
             int ret = decode_handler(ctx, frame, &got_frame, &avpkt);
 
@@ -224,6 +253,7 @@
                 ret = avpkt.size;
             avpkt.data += ret;
             avpkt.size -= ret;
+          }
         }
     }
 
@@ -239,6 +269,9 @@
     av_frame_free(&frame);
     avcodec_free_context(&ctx);
     av_freep(&ctx);
+    avcodec_free_context(&parser_avctx);
+    av_freep(&parser_avctx);
+    av_parser_close(parser);
     FDBDesroy(&buffer);
     return 0;
 }

diff --git a/tools/uncoded_frame.c b/tools/uncoded_frame.c
index 3ca2ba4..3f850d3 100644
--- a/tools/uncoded_frame.c
+++ b/tools/uncoded_frame.c

@@ -11,7 +11,6 @@
     AVFormatContext *mux;
     AVStream *stream;
     AVFilterContext *sink;
-    AVFilterLink *link;
 } Stream;
 
 static int create_sink(Stream *st, AVFilterGraph *graph,
@@ -36,7 +35,6 @@
     ret = avfilter_link(f, idx, st->sink, 0);
     if (ret < 0)
         return ret;
-    st->link = st->sink->inputs[0];
     return 0;
 }
 
@@ -64,9 +62,7 @@
     out_dev_name = argv + 2;
     nb_out_dev = argc - 2;
 
-    av_register_all();
     avdevice_register_all();
-    avfilter_register_all();
 
     /* Create input graph */
     if (!(in_graph = avfilter_graph_alloc())) {
@@ -143,7 +139,7 @@
             goto fail;
         }
         if (!(st->mux->oformat->flags & AVFMT_NOFILE)) {
-            ret = avio_open2(&st->mux->pb, st->mux->filename, AVIO_FLAG_WRITE,
+            ret = avio_open2(&st->mux->pb, st->mux->url, AVIO_FLAG_WRITE,
                              NULL, NULL);
             if (ret < 0) {
                 av_log(st->mux, AV_LOG_ERROR, "Failed to init output: %s\n",
@@ -163,26 +159,24 @@
             av_log(NULL, AV_LOG_ERROR, "Failed to create output stream\n");
             goto fail;
         }
-        st->stream->codec->codec_type = st->link->type;
-        st->stream->time_base = st->stream->codec->time_base =
-            st->link->time_base;
-        switch (st->link->type) {
+        st->stream->codecpar->codec_type = av_buffersink_get_type(st->sink);
+        st->stream->time_base = av_buffersink_get_time_base(st->sink);
+        switch (av_buffersink_get_type(st->sink)) {
         case AVMEDIA_TYPE_VIDEO:
-            st->stream->codec->codec_id = AV_CODEC_ID_RAWVIDEO;
+            st->stream->codecpar->codec_id = AV_CODEC_ID_RAWVIDEO;
             st->stream->avg_frame_rate =
             st->stream->  r_frame_rate = av_buffersink_get_frame_rate(st->sink);
-            st->stream->codec->width               = st->link->w;
-            st->stream->codec->height              = st->link->h;
-            st->stream->codec->sample_aspect_ratio = st->link->sample_aspect_ratio;
-            st->stream->codec->pix_fmt             = st->link->format;
+            st->stream->codecpar->width               = av_buffersink_get_w(st->sink);
+            st->stream->codecpar->height              = av_buffersink_get_h(st->sink);
+            st->stream->codecpar->sample_aspect_ratio = av_buffersink_get_sample_aspect_ratio(st->sink);
+            st->stream->codecpar->format              = av_buffersink_get_format(st->sink);
             break;
         case AVMEDIA_TYPE_AUDIO:
-            st->stream->codec->channel_layout = st->link->channel_layout;
-            st->stream->codec->channels = avfilter_link_get_channels(st->link);
-            st->stream->codec->sample_rate = st->link->sample_rate;
-            st->stream->codec->sample_fmt = st->link->format;
-            st->stream->codec->codec_id =
-                av_get_pcm_codec(st->stream->codec->sample_fmt, -1);
+            st->stream->codecpar->channel_layout = av_buffersink_get_channel_layout(st->sink);
+            st->stream->codecpar->channels       = av_buffersink_get_channels(st->sink);
+            st->stream->codecpar->sample_rate    = av_buffersink_get_sample_rate(st->sink);
+            st->stream->codecpar->format         = av_buffersink_get_format(st->sink);
+            st->stream->codecpar->codec_id       = av_get_pcm_codec(st->stream->codecpar->format, -1);
             break;
         default:
             av_assert0(!"reached");
@@ -240,14 +234,14 @@
                 }
                 if (frame->pts != AV_NOPTS_VALUE)
                     frame->pts = av_rescale_q(frame->pts,
-                                              st->link  ->time_base,
+                                              av_buffersink_get_time_base(st->sink),
                                               st->stream->time_base);
                 ret = av_interleaved_write_uncoded_frame(st->mux,
                                                          st->stream->index,
                                                          frame);
                 frame = NULL;
                 if (ret < 0) {
-                    av_log(st->stream->codec, AV_LOG_ERROR,
+                    av_log(st->mux, AV_LOG_ERROR,
                            "Error writing frame: %s\n", av_err2str(ret));
                     goto fail;
                 }
commit	a3fc3e7c47aa1d8163cee7c0bd1a5c95f0c2fc2d	[log] [tgz]
author	Dale Sather <dalesat@google.com>	Mon Nov 05 22:01:10 2018 +0000
committer	Dale Sather <dalesat@google.com>	Mon Nov 05 22:01:10 2018 +0000
tree	136807b8c91ae1f3650036d6f2b1acef28fa0686
parent	635bdae185e8df082e60c9e230f5087f8f89929a [diff]
parent	79bb5d6abc7181bc33e96a3da6089bb41d0bf557 [diff]